From 39236c6e673c41db228275375ab7fdb0f837b292 Mon Sep 17 00:00:00 2001 From: Apple Date: Tue, 29 Oct 2013 00:03:36 +0000 Subject: [PATCH] xnu-2422.1.72.tar.gz --- EXTERNAL_HEADERS/AssertMacros.h | 1396 +- EXTERNAL_HEADERS/Availability.h | 25 +- EXTERNAL_HEADERS/AvailabilityInternal.h | 4689 +++++- EXTERNAL_HEADERS/AvailabilityMacros.h | 447 +- EXTERNAL_HEADERS/Makefile | 10 +- EXTERNAL_HEADERS/ar.h | 96 - EXTERNAL_HEADERS/architecture/Makefile | 3 - EXTERNAL_HEADERS/architecture/i386/Makefile | 4 - EXTERNAL_HEADERS/architecture/i386/asm_help.h | 5 + EXTERNAL_HEADERS/mach-o/Makefile | 2 - EXTERNAL_HEADERS/mach-o/loader.h | 29 +- EXTERNAL_HEADERS/stdarg.h | 5 +- EXTERNAL_HEADERS/stddef.h | 64 + EXTERNAL_HEADERS/stdint.h | 145 +- Makefile | 175 +- README | 55 +- SETUP/Makefile | 3 +- SETUP/config/Makefile | 18 +- SETUP/config/mkmakefile.c | 78 +- SETUP/decomment/Makefile | 14 +- SETUP/{md => installfile}/Makefile | 19 +- SETUP/installfile/installfile.c | 137 + SETUP/kextsymboltool/Makefile | 21 +- SETUP/kextsymboltool/kextsymboltool.c | 94 +- SETUP/md/md.1 | 121 - SETUP/md/md.c | 654 - SETUP/setsegname/Makefile | 14 +- SETUP/setsegname/setsegname.c | 49 +- SETUP/setup.csh | 4 - SETUP/setup.sh | 10 +- bsd/Makefile | 14 +- bsd/bsm/Makefile | 13 - bsd/bsm/audit.h | 4 +- bsd/conf/MASTER | 78 +- bsd/conf/MASTER.i386 | 120 - bsd/conf/MASTER.x86_64 | 12 +- bsd/conf/Makefile | 68 +- bsd/conf/Makefile.i386 | 20 - bsd/conf/Makefile.template | 337 +- bsd/conf/Makefile.x86_64 | 3 - bsd/conf/files | 226 +- bsd/conf/files.i386 | 35 - bsd/conf/files.x86_64 | 2 +- bsd/conf/param.c | 3 +- bsd/crypto/Makefile | 16 +- bsd/crypto/blowfish/Makefile | 12 - bsd/crypto/cast128/Makefile | 12 - bsd/crypto/rc4/Makefile | 16 - bsd/dev/Makefile | 21 +- bsd/dev/chud/chud_bsd_callback.c | 6 + bsd/dev/{vn => dtrace}/Makefile | 24 +- bsd/dev/dtrace/dtrace.c | 253 +- bsd/dev/dtrace/dtrace_glue.c | 56 +- bsd/dev/dtrace/dtrace_ptss.c | 30 +- bsd/dev/dtrace/fasttrap.c | 4 +- bsd/dev/dtrace/fbt.c | 8 +- bsd/dev/dtrace/lockstat.c | 16 +- bsd/dev/dtrace/profile_prvd.c | 39 +- bsd/dev/dtrace/scripts/Makefile | 38 + bsd/dev/dtrace/scripts/darwin.d | 330 + bsd/dev/dtrace/scripts/errno.d | 230 + bsd/dev/dtrace/scripts/io.d | 265 + bsd/dev/dtrace/scripts/ip.d | 225 + bsd/dev/dtrace/scripts/mptcp.d | 231 + bsd/dev/dtrace/scripts/regs_x86_64.d | 96 + bsd/dev/dtrace/scripts/sched.d | 77 + bsd/dev/dtrace/scripts/signal.d | 86 + bsd/dev/dtrace/scripts/socket.d | 77 + bsd/dev/dtrace/scripts/tcp.d | 254 + bsd/dev/dtrace/scripts/unistd.d | 56 + bsd/dev/dtrace/sdt.c | 15 +- bsd/dev/dtrace/sdt_subr.c | 195 +- bsd/dev/dtrace/systrace.c | 30 +- bsd/dev/i386/conf.c | 7 +- bsd/dev/i386/dtrace_isa.c | 29 +- bsd/dev/i386/fbt_x86.c | 565 +- bsd/dev/i386/munge.s | 393 - bsd/dev/i386/sdt_x86.c | 59 - bsd/dev/i386/stubs.c | 12 +- bsd/dev/i386/sysctl.c | 179 +- bsd/dev/i386/systemcalls.c | 83 +- bsd/dev/memdev.c | 19 +- bsd/dev/munge.c | 528 + bsd/dev/random/Makefile | 23 +- bsd/dev/random/randomdev.c | 93 +- bsd/dev/unix_startup.c | 13 +- bsd/dev/vn/vn.c | 2 +- bsd/dev/x86_64/munge.s | 373 - bsd/hfs/Makefile | 14 +- bsd/hfs/hfs.h | 105 +- bsd/hfs/hfs_attrlist.c | 17 +- bsd/hfs/hfs_btreeio.c | 117 +- bsd/hfs/hfs_catalog.c | 950 +- bsd/hfs/hfs_catalog.h | 52 +- bsd/hfs/hfs_chash.c | 12 +- bsd/hfs/hfs_cnode.c | 273 +- bsd/hfs/hfs_cnode.h | 43 +- bsd/hfs/hfs_cprotect.c | 1626 +- bsd/hfs/hfs_dbg.h | 1 - bsd/hfs/hfs_encodinghint.c | 16 +- bsd/hfs/hfs_encodings.c | 80 +- bsd/hfs/hfs_encodings.h | 1 - bsd/hfs/hfs_endian.c | 15 +- bsd/hfs/hfs_format.h | 75 +- bsd/hfs/hfs_fsctl.h | 9 + bsd/hfs/hfs_hotfiles.c | 41 +- bsd/hfs/hfs_kdebug.h | 7 + bsd/hfs/hfs_link.c | 69 +- bsd/hfs/hfs_lookup.c | 73 +- bsd/hfs/hfs_notification.c | 44 +- bsd/hfs/hfs_quota.c | 8 +- bsd/hfs/hfs_readwrite.c | 481 +- bsd/hfs/hfs_search.c | 318 +- bsd/hfs/hfs_unistr.h | 64 + bsd/hfs/hfs_vfsops.c | 1177 +- bsd/hfs/hfs_vfsutils.c | 398 +- bsd/hfs/hfs_vnops.c | 552 +- bsd/hfs/hfs_xattr.c | 303 +- bsd/hfs/hfscommon/BTree/BTreeNodeReserve.c | 20 +- bsd/hfs/hfscommon/BTree/BTreeTreeOps.c | 2 +- bsd/hfs/hfscommon/Catalog/CatalogUtilities.c | 25 +- bsd/hfs/hfscommon/Catalog/FileIDsServices.c | 76 +- bsd/hfs/hfscommon/Misc/BTreeWrapper.c | 89 +- bsd/hfs/hfscommon/Misc/FileExtentMapping.c | 280 +- bsd/hfs/hfscommon/Misc/HybridAllocator.c | 533 - bsd/hfs/hfscommon/Misc/VolumeAllocation.c | 4096 ++--- bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c | 64 +- bsd/hfs/hfscommon/headers/BTreesInternal.h | 4 +- bsd/hfs/hfscommon/headers/FileMgrInternal.h | 31 +- .../hfscommon/headers/HFSUnicodeWrappers.h | 4 +- bsd/hfs/hfscommon/headers/HybridAllocator.h | 101 - bsd/hfs/hfscommon/headers/RedBlackTree.h | 969 -- bsd/i386/Makefile | 6 +- bsd/i386/_mcontext.h | 130 + bsd/i386/_structs.h | 114 +- bsd/i386/_types.h | 10 +- bsd/i386/param.h | 4 - bsd/i386/setjmp.h | 9 +- bsd/i386/signal.h | 18 - bsd/i386/types.h | 35 +- bsd/kern/Makefile | 21 +- bsd/kern/bsd_init.c | 79 +- bsd/kern/decmpfs.c | 10 +- bsd/kern/kdebug.c | 1242 +- bsd/kern/kern_aio.c | 39 +- bsd/kern/kern_asl.c | 257 + bsd/kern/kern_authorization.c | 2 +- bsd/kern/kern_clock.c | 25 + bsd/kern/kern_control.c | 166 +- bsd/kern/kern_core.c | 24 +- bsd/kern/kern_credential.c | 357 +- bsd/kern/kern_cs.c | 474 + bsd/kern/kern_descrip.c | 750 +- bsd/kern/kern_event.c | 1578 +- bsd/kern/kern_exec.c | 886 +- bsd/kern/kern_exit.c | 216 +- bsd/kern/kern_fork.c | 106 +- bsd/kern/kern_guarded.c | 683 + bsd/kern/kern_kpc.c | 533 + bsd/kern/kern_lockf.c | 142 +- bsd/kern/kern_malloc.c | 43 +- bsd/kern/kern_memorystatus.c | 3520 ++-- bsd/kern/kern_mib.c | 42 +- bsd/kern/kern_mman.c | 117 +- bsd/kern/kern_overrides.c | 190 + bsd/kern/kern_pcsamples.c | 6 +- bsd/kern/kern_physio.c | 4 - bsd/kern/kern_priv.c | 11 + bsd/kern/kern_proc.c | 466 +- bsd/kern/kern_prot.c | 45 +- bsd/kern/kern_resource.c | 703 +- bsd/kern/kern_sig.c | 21 +- bsd/kern/kern_subr.c | 29 +- bsd/kern/kern_symfile.c | 37 +- bsd/kern/kern_sysctl.c | 169 +- bsd/kern/kern_tests.c | 48 + bsd/kern/kern_time.c | 101 +- bsd/kern/kpi_mbuf.c | 300 +- bsd/kern/kpi_socket.c | 858 +- bsd/kern/kpi_socketfilter.c | 1375 +- bsd/kern/mach_fat.c | 65 +- bsd/kern/mach_fat.h | 2 - bsd/kern/mach_loader.c | 231 +- bsd/kern/mach_loader.h | 1 + bsd/kern/mach_process.c | 23 +- bsd/kern/makesyscalls.sh | 50 +- bsd/kern/mcache.c | 47 +- bsd/kern/netboot.c | 8 +- bsd/kern/policy_check.c | 742 +- bsd/kern/posix_sem.c | 24 +- bsd/kern/posix_shm.c | 31 +- bsd/kern/proc_info.c | 529 +- bsd/kern/proc_uuid_policy.c | 361 + bsd/kern/process_policy.c | 533 +- bsd/kern/pthread_shims.c | 449 + bsd/kern/pthread_support.c | 4510 ----- bsd/kern/pthread_synch.c | 2416 --- bsd/kern/socket_info.c | 293 +- bsd/kern/subr_log.c | 28 +- bsd/kern/subr_prf.c | 2 - bsd/kern/sys_domain.c | 30 +- bsd/kern/sys_generic.c | 294 +- bsd/kern/sys_pipe.c | 38 +- bsd/kern/sys_socket.c | 194 +- bsd/kern/syscalls.master | 104 +- bsd/kern/sysv_sem.c | 24 +- bsd/kern/trace.codes | 375 +- bsd/kern/tty.c | 8 +- bsd/kern/tty_ptmx.c | 68 +- bsd/kern/tty_pty.c | 28 +- bsd/kern/tty_tty.c | 114 +- bsd/kern/ubc_subr.c | 759 +- bsd/kern/uipc_domain.c | 1085 +- bsd/kern/uipc_mbuf.c | 422 +- bsd/kern/uipc_mbuf2.c | 105 +- bsd/kern/uipc_proto.c | 93 +- bsd/kern/uipc_socket.c | 2320 ++- bsd/kern/uipc_socket2.c | 1243 +- bsd/kern/uipc_syscalls.c | 644 +- bsd/kern/uipc_usrreq.c | 89 +- bsd/kern/vm_pressure.c | 473 +- bsd/kern/vm_pressure.h | 9 +- bsd/libkern/bcmp.c | 92 - bsd/libkern/inet_ntop.c | 214 - bsd/libkern/libkern.h | 19 +- bsd/libkern/memchr.c | 42 + bsd/libkern/rindex.c | 85 - bsd/libkern/strtol.c | 261 - bsd/machine/Makefile | 6 +- .../timer.h => bsd/machine/_mcontext.h | 9 +- bsd/man/man2/connectx.2 | 288 + bsd/man/man2/disconnectx.2 | 133 + bsd/man/man2/fcntl.2 | 5 +- bsd/man/man2/getdtablesize.2 | 12 + bsd/man/man2/getpriority.2 | 55 +- bsd/man/man2/kqueue.2 | 8 +- bsd/man/man2/peeloff.2 | 99 + bsd/man/man2/searchfs.2 | 13 +- bsd/man/man2/setxattr.2 | 27 +- bsd/man/man2/shm_open.2 | 3 +- bsd/man/man2/sigaltstack.2 | 4 +- bsd/man/man2/socket.2 | 3 - bsd/man/man3/Makefile | 6 +- bsd/man/man3/getiopolicy_np.3 | 179 + bsd/man/man4/bpf.4 | 140 +- bsd/man/man4/netintro.4 | 2 +- bsd/man/man4/tcp.4 | 4 +- bsd/man/man4/tun.4 | 361 - bsd/miscfs/Makefile | 14 +- bsd/miscfs/devfs/Makefile | 8 - bsd/miscfs/devfs/devfs_fdesc_support.c | 18 +- bsd/miscfs/devfs/devfs_vfsops.c | 4 +- bsd/miscfs/devfs/reproto.sh | 0 bsd/miscfs/fifofs/Makefile | 8 - bsd/miscfs/fifofs/fifo_vnops.c | 3 - bsd/miscfs/mockfs/mockfs.h | 75 + bsd/miscfs/mockfs/mockfs_fsnode.c | 394 + bsd/miscfs/mockfs/mockfs_fsnode.h | 87 + bsd/miscfs/mockfs/mockfs_vfsops.c | 259 + bsd/miscfs/mockfs/mockfs_vnops.c | 446 + .../miscfs/mockfs/mockfs_vnops.h | 15 +- bsd/miscfs/specfs/Makefile | 9 - bsd/miscfs/specfs/spec_vnops.c | 896 +- bsd/miscfs/union/Makefile | 8 - bsd/net/Makefile | 19 +- bsd/net/altq/Makefile | 12 - bsd/net/altq/altq_cbq.c | 6 +- bsd/net/altq/altq_fairq.c | 6 +- bsd/net/altq/altq_hfsc.c | 6 +- bsd/net/altq/altq_qfq.c | 6 +- bsd/net/bpf.c | 327 +- bsd/net/bpf.h | 846 +- bsd/net/bpf_filter.c | 4 + bsd/net/bpfdesc.h | 1 + bsd/net/bridgestp.c | 12 +- bsd/net/bsd_comp.c | 1153 -- bsd/net/classq/Makefile | 12 - bsd/net/classq/classq.c | 4 +- bsd/net/classq/classq.h | 2 + bsd/net/classq/classq_blue.c | 24 +- bsd/net/classq/classq_red.c | 25 +- bsd/net/classq/classq_rio.c | 44 +- bsd/net/classq/classq_sfb.c | 255 +- bsd/net/classq/classq_sfb.h | 19 +- bsd/net/classq/classq_subr.c | 38 +- bsd/net/classq/classq_util.c | 2 + bsd/net/classq/if_classq.h | 31 +- bsd/net/dlil.c | 2305 ++- bsd/net/dlil.h | 111 +- bsd/net/ether_at_pr_module.c | 262 - bsd/net/ether_if_module.c | 557 +- bsd/net/ether_inet6_pr_module.c | 38 +- bsd/net/ether_inet_pr_module.c | 43 +- bsd/net/flowadv.c | 243 + bsd/net/flowadv.h | 28 +- bsd/net/flowhash.c | 6 +- bsd/net/if.c | 1182 +- bsd/net/if.h | 307 +- bsd/net/if_arp.h | 31 +- bsd/net/if_bond.c | 39 +- bsd/net/if_bond_internal.h | 2 +- bsd/net/if_bridge.c | 2239 ++- bsd/net/if_ether.h | 29 +- bsd/net/if_gif.c | 574 +- bsd/net/if_gif.h | 31 +- bsd/net/if_ipsec.c | 926 ++ osfmk/kern/etimer.h => bsd/net/if_ipsec.h | 85 +- bsd/net/if_llreach.c | 10 +- bsd/net/if_loop.c | 74 +- bsd/net/if_media.c | 538 - bsd/net/if_media.h | 52 +- bsd/net/if_mib.c | 13 +- bsd/net/if_pflog.c | 8 +- bsd/net/if_pppvar.h | 146 - bsd/net/if_stf.c | 36 +- bsd/net/if_types.h | 17 +- bsd/net/if_utun.c | 113 +- bsd/net/if_utun.h | 22 +- bsd/net/if_utun_crypto.c | 208 +- bsd/net/if_utun_crypto.h | 184 +- bsd/net/if_utun_crypto_dtls.c | 1045 ++ bsd/net/if_utun_crypto_dtls.h | 127 + bsd/net/if_utun_crypto_ipsec.c | 65 +- bsd/net/if_var.h | 186 +- bsd/net/if_vlan.c | 40 +- bsd/net/if_vlan_var.h | 2 +- bsd/net/init.h | 2 +- bsd/net/iptap.c | 819 +- bsd/net/iptap.h | 42 +- bsd/net/iso88025.h | 117 - bsd/net/kext_net.c | 349 - bsd/net/kext_net.h | 49 +- bsd/net/kpi_interface.c | 525 +- bsd/net/kpi_interface.h | 698 +- bsd/net/kpi_interfacefilter.c | 4 +- bsd/net/kpi_protocol.c | 46 +- bsd/net/kpi_protocol.h | 21 +- bsd/net/ndrv.c | 116 +- bsd/net/net_osdep.c | 89 - bsd/net/net_osdep.h | 1 - bsd/net/net_stubs.c | 2692 +++ bsd/net/netsrc.c | 7 +- bsd/net/netsrc.h | 11 +- bsd/net/ntstat.c | 677 +- bsd/net/ntstat.h | 66 +- bsd/net/pf.c | 334 +- bsd/net/pf_if.c | 7 +- bsd/net/pf_ioctl.c | 325 +- bsd/net/pf_norm.c | 58 +- bsd/net/pfkeyv2.h | 36 +- bsd/net/pfvar.h | 84 +- bsd/net/pktap.c | 1133 ++ bsd/net/pktap.h | 141 + bsd/net/pktsched/Makefile | 12 - bsd/net/pktsched/pktsched_cbq.c | 4 + bsd/net/pktsched/pktsched_fairq.c | 15 +- bsd/net/pktsched/pktsched_hfsc.c | 10 +- bsd/net/pktsched/pktsched_priq.c | 36 +- bsd/net/pktsched/pktsched_qfq.c | 51 +- bsd/net/pktsched/pktsched_rmclass.c | 5 +- bsd/net/pktsched/pktsched_tcq.c | 34 +- bsd/net/ppp_deflate.c | 717 - bsd/net/radix.c | 26 +- bsd/net/radix.h | 2 +- bsd/net/raw_cb.c | 13 +- bsd/net/raw_cb.h | 29 +- bsd/net/raw_usrreq.c | 37 +- bsd/net/route.c | 944 +- bsd/net/route.h | 502 +- bsd/net/rtsock.c | 1442 +- bsd/net/slip.h | 90 - bsd/netat/Makefile | 51 - bsd/netat/adsp.c | 395 - bsd/netat/adsp.h | 722 - bsd/netat/adsp_CLDeny.c | 98 - bsd/netat/adsp_CLListen.c | 103 - bsd/netat/adsp_Close.c | 513 - bsd/netat/adsp_Control.c | 552 - bsd/netat/adsp_Init.c | 180 - bsd/netat/adsp_NewCID.c | 89 - bsd/netat/adsp_Open.c | 299 - bsd/netat/adsp_Options.c | 103 - bsd/netat/adsp_Packet.c | 837 - bsd/netat/adsp_Read.c | 414 - bsd/netat/adsp_RxAttn.c | 218 - bsd/netat/adsp_RxData.c | 392 - bsd/netat/adsp_Status.c | 155 - bsd/netat/adsp_Timer.c | 218 - bsd/netat/adsp_TimerElem.c | 178 - bsd/netat/adsp_Write.c | 242 - bsd/netat/adsp_attention.c | 133 - bsd/netat/adsp_internal.h | 411 - bsd/netat/adsp_misc.c | 138 - bsd/netat/adsp_reset.c | 240 - bsd/netat/adsp_stream.c | 624 - bsd/netat/appletalk.h | 317 - bsd/netat/asp.h | 242 - bsd/netat/asp_proto.c | 2313 --- bsd/netat/at.c | 781 - bsd/netat/at_aarp.h | 202 - bsd/netat/at_config.h | 85 - bsd/netat/at_ddp_brt.h | 93 - bsd/netat/at_pat.h | 75 - bsd/netat/at_pcb.c | 253 - bsd/netat/at_pcb.h | 146 - bsd/netat/at_proto.c | 150 - bsd/netat/at_snmp.h | 230 - bsd/netat/at_var.h | 358 - bsd/netat/atalk.exp | 9 - bsd/netat/atalk.imp | 9 - bsd/netat/atp.h | 484 - bsd/netat/atp_alloc.c | 188 - bsd/netat/atp_misc.c | 341 - bsd/netat/atp_open.c | 280 - bsd/netat/atp_read.c | 562 - bsd/netat/atp_write.c | 1915 --- bsd/netat/aurp.h | 296 - bsd/netat/aurp_aurpd.c | 446 - bsd/netat/aurp_cfg.c | 109 - bsd/netat/aurp_gdata.c | 72 - bsd/netat/aurp_misc.c | 221 - bsd/netat/aurp_open.c | 259 - bsd/netat/aurp_rd.c | 128 - bsd/netat/aurp_ri.c | 850 - bsd/netat/aurp_rx.c | 219 - bsd/netat/aurp_tickle.c | 169 - bsd/netat/aurp_tx.c | 152 - bsd/netat/aurp_zi.c | 624 - bsd/netat/ddp.c | 1420 -- bsd/netat/ddp.h | 267 - bsd/netat/ddp.save | 903 - bsd/netat/ddp_aarp.c | 991 -- bsd/netat/ddp_aep.c | 118 - bsd/netat/ddp_brt.c | 197 - bsd/netat/ddp_lap.c | 1734 -- bsd/netat/ddp_nbp.c | 1538 -- bsd/netat/ddp_proto.c | 172 - bsd/netat/ddp_r_rtmp.c | 1659 -- bsd/netat/ddp_r_zip.c | 2003 --- bsd/netat/ddp_rtmp.c | 386 - bsd/netat/ddp_rtmptable.c | 1144 -- bsd/netat/ddp_sip.c | 186 - bsd/netat/ddp_usrreq.c | 344 - bsd/netat/debug.h | 270 - bsd/netat/drv_dep.c | 315 - bsd/netat/lap.h | 101 - bsd/netat/nbp.h | 176 - bsd/netat/pap.h | 137 - bsd/netat/routing_tables.h | 237 - bsd/netat/rtmp.h | 91 - bsd/netat/sys_dep.c | 357 - bsd/netat/sys_glue.c | 1332 -- bsd/netat/sysglue.h | 222 - bsd/netat/zip.h | 102 - bsd/netinet/Makefile | 17 +- bsd/netinet/cpu_in_cksum.c | 396 + bsd/netinet/dhcp_options.h | 4 +- bsd/netinet/flow_divert.c | 3266 ++++ bsd/netinet/flow_divert.h | 80 + bsd/netinet/flow_divert_proto.h | 80 + bsd/netinet/icmp6.h | 124 +- bsd/netinet/icmp_var.h | 6 +- bsd/netinet/if_ether.h | 4 +- bsd/netinet/igmp.c | 797 +- bsd/netinet/igmp_var.h | 37 +- bsd/netinet/in.c | 2294 ++- bsd/netinet/in.h | 123 +- bsd/netinet/in_arp.c | 925 +- bsd/netinet/in_arp.h | 170 +- bsd/netinet/in_cksum.c | 522 +- bsd/netinet/in_dhcp.c | 15 +- bsd/netinet/in_gif.c | 138 +- bsd/netinet/in_gif.h | 22 +- bsd/netinet/in_mcast.c | 218 +- bsd/netinet/in_pcb.c | 2434 ++- bsd/netinet/in_pcb.h | 799 +- bsd/netinet/in_pcblist.c | 294 +- bsd/netinet/in_proto.c | 409 +- bsd/netinet/in_rmx.c | 473 +- bsd/netinet/in_systm.h | 7 +- bsd/netinet/in_tclass.c | 142 +- bsd/netinet/in_var.h | 201 +- bsd/netinet/ip.h | 4 +- bsd/netinet/ip6.h | 11 +- bsd/netinet/ip_divert.c | 139 +- bsd/netinet/ip_divert.h | 6 +- bsd/netinet/ip_dummynet.c | 85 +- bsd/netinet/ip_dummynet.h | 7 +- bsd/netinet/ip_ecn.h | 6 +- bsd/netinet/ip_encap.c | 30 +- bsd/netinet/ip_encap.h | 17 +- bsd/netinet/ip_flowid.h | 4 +- bsd/netinet/ip_fw.h | 6 +- bsd/netinet/ip_fw2.c | 123 +- bsd/netinet/ip_fw2.h | 10 +- bsd/netinet/ip_fw2_compat.h | 4 +- bsd/netinet/ip_icmp.c | 113 +- bsd/netinet/ip_icmp.h | 9 +- bsd/netinet/ip_id.c | 338 +- bsd/netinet/ip_input.c | 2265 ++- bsd/netinet/ip_mroute.c | 21 +- bsd/netinet/ip_mroute.h | 16 +- bsd/netinet/ip_output.c | 1779 +- bsd/netinet/ip_var.h | 260 +- bsd/netinet/kpi_ipfilter.c | 18 +- bsd/netinet/lro_ext.h | 1 + bsd/netinet/mp_pcb.c | 273 + bsd/netinet/mp_pcb.h | 91 + bsd/netinet/mp_proto.c | 91 + bsd/netinet/mptcp.c | 665 + bsd/netinet/mptcp.h | 382 + bsd/netinet/mptcp_opt.c | 1887 +++ bsd/netinet/mptcp_opt.h | 66 + bsd/netinet/mptcp_seq.h | 42 + bsd/netinet/mptcp_subr.c | 4648 ++++++ bsd/netinet/mptcp_timer.c | 233 + bsd/netinet/mptcp_timer.h | 45 + bsd/netinet/mptcp_usrreq.c | 1954 +++ bsd/netinet/mptcp_var.h | 604 + bsd/netinet/raw_ip.c | 253 +- bsd/netinet/tcp.h | 68 +- bsd/netinet/tcp_input.c | 1885 ++- bsd/netinet/tcp_ledbat.c | 22 +- bsd/netinet/tcp_lro.c | 162 +- bsd/netinet/tcp_newreno.c | 20 +- bsd/netinet/tcp_output.c | 728 +- bsd/netinet/tcp_sack.c | 50 +- bsd/netinet/tcp_subr.c | 479 +- bsd/netinet/tcp_timer.c | 536 +- bsd/netinet/tcp_timer.h | 26 +- bsd/netinet/tcp_usrreq.c | 861 +- bsd/netinet/tcp_var.h | 240 +- bsd/netinet/udp_usrreq.c | 1507 +- bsd/netinet/udp_var.h | 127 +- bsd/netinet6/Makefile | 13 +- bsd/netinet6/ah.h | 4 +- bsd/netinet6/ah6.h | 5 +- bsd/netinet6/ah_input.c | 22 +- bsd/netinet6/esp.h | 6 +- bsd/netinet6/esp6.h | 6 +- bsd/netinet6/esp_input.c | 22 +- bsd/netinet6/esp_rijndael.h | 4 +- bsd/netinet6/frag6.c | 771 +- bsd/netinet6/icmp6.c | 311 +- bsd/netinet6/in6.c | 3221 ++-- bsd/netinet6/in6.h | 694 +- bsd/netinet6/in6_cga.c | 324 + bsd/netinet6/in6_cksum.c | 288 +- bsd/netinet6/in6_gif.c | 133 +- bsd/netinet6/in6_gif.h | 12 +- bsd/netinet6/in6_ifattach.c | 725 +- bsd/netinet6/in6_ifattach.h | 25 +- bsd/netinet6/in6_mcast.c | 161 +- bsd/netinet6/in6_pcb.c | 930 +- bsd/netinet6/in6_pcb.h | 24 +- bsd/netinet6/in6_prefix.c | 1382 -- bsd/netinet6/in6_prefix.h | 90 - bsd/netinet6/in6_proto.c | 477 +- bsd/netinet6/in6_rmx.c | 453 +- bsd/netinet6/in6_src.c | 592 +- bsd/netinet6/in6_var.h | 716 +- bsd/netinet6/ip6_ecn.h | 6 +- bsd/netinet6/ip6_forward.c | 99 +- bsd/netinet6/ip6_fw.h | 10 +- bsd/netinet6/ip6_id.c | 56 +- bsd/netinet6/ip6_input.c | 1005 +- bsd/netinet6/ip6_mroute.c | 21 +- bsd/netinet6/ip6_mroute.h | 23 +- bsd/netinet6/ip6_output.c | 1975 ++- bsd/netinet6/ip6_var.h | 346 +- bsd/netinet6/ip6protosw.h | 118 +- bsd/netinet6/ipcomp.h | 6 +- bsd/netinet6/ipcomp6.h | 4 +- bsd/netinet6/ipsec.c | 401 +- bsd/netinet6/ipsec.h | 28 +- bsd/netinet6/ipsec6.h | 9 +- bsd/netinet6/mld6.c | 720 +- bsd/netinet6/mld6_var.h | 26 +- bsd/netinet6/nd6.c | 1695 +- bsd/netinet6/nd6.h | 433 +- bsd/netinet6/nd6_nbr.c | 588 +- bsd/netinet6/nd6_prproxy.c | 214 +- bsd/netinet6/nd6_rtr.c | 876 +- bsd/netinet6/nd6_send.c | 227 + bsd/netinet6/pim6_var.h | 4 +- bsd/netinet6/raw_ip6.c | 230 +- bsd/netinet6/raw_ip6.h | 8 +- bsd/netinet6/route6.c | 182 +- bsd/netinet6/scope6.c | 314 +- bsd/netinet6/scope6_var.h | 56 +- bsd/netinet6/tcp6_var.h | 7 +- bsd/netinet6/udp6_output.c | 174 +- bsd/netinet6/udp6_usrreq.c | 602 +- bsd/netinet6/udp6_var.h | 44 +- bsd/netkey/Makefile | 8 - bsd/netkey/key.c | 6483 ++++---- bsd/netkey/key.h | 7 +- bsd/netkey/key_debug.c | 31 +- bsd/netkey/key_debug.h | 4 +- bsd/netkey/key_var.h | 6 +- bsd/netkey/keydb.h | 8 +- bsd/netkey/keysock.c | 75 +- bsd/netkey/keysock.h | 7 +- bsd/nfs/Makefile | 12 +- bsd/nfs/nfs.h | 123 +- bsd/nfs/nfs4_subs.c | 8 +- bsd/nfs/nfs4_vnops.c | 2 +- bsd/nfs/nfs_gss.c | 311 +- bsd/nfs/nfs_gss.h | 56 +- .../IOAsmSupport.s => bsd/nfs/nfs_ioctl.h | 26 +- bsd/nfs/nfs_node.c | 79 +- bsd/nfs/nfs_socket.c | 600 +- bsd/nfs/nfs_subs.c | 121 +- bsd/nfs/nfs_syscalls.c | 7 +- bsd/nfs/nfs_vfsops.c | 73 +- bsd/nfs/nfs_vnops.c | 103 +- bsd/nfs/nfsmount.h | 10 +- bsd/nfs/nfsnode.h | 22 +- bsd/nfs/nfsproto.h | 6 +- bsd/nfs/nfsrvcache.h | 4 +- bsd/security/Makefile | 20 - bsd/security/audit/Makefile | 21 - bsd/security/audit/audit_arg.c | 20 +- bsd/security/audit/audit_pipe.c | 3 +- bsd/security/audit/audit_session.c | 3 +- bsd/security/audit/audit_syscalls.c | 5 + bsd/sys/Makefile | 14 +- bsd/sys/_structs.h | 211 +- bsd/sys/_types/Makefile | 151 + bsd/sys/_types/___offsetof.h | 30 + bsd/sys/_types/_blkcnt_t.h | 31 + bsd/sys/_types/_blksize_t.h | 31 + bsd/sys/_types/_clock_t.h | 31 + bsd/sys/_types/_ct_rune_t.h | 32 + bsd/sys/_types/_dev_t.h | 31 + bsd/sys/_types/_errno_t.h | 31 + .../sys/_types/_fd_clr.h | 10 +- bsd/sys/_types/_fd_copy.h | 30 + bsd/sys/_types/_fd_def.h | 73 + bsd/sys/_types/_fd_isset.h | 30 + .../sys/_types/_fd_set.h | 10 +- bsd/sys/_types/_fd_setsize.h | 30 + bsd/sys/_types/_fd_zero.h | 30 + bsd/sys/_types/_filesec_t.h | 32 + bsd/sys/_types/_fsblkcnt_t.h | 31 + bsd/sys/_types/_fsfilcnt_t.h | 31 + .../mach/task.h => bsd/sys/_types/_gid_t.h | 11 +- bsd/sys/_types/_guid_t.h | 36 + bsd/sys/_types/_id_t.h | 31 + bsd/sys/_types/_in_addr_t.h | 31 + bsd/sys/_types/_in_port_t.h | 31 + bsd/sys/_types/_ino64_t.h | 31 + bsd/sys/_types/_ino_t.h | 31 + bsd/sys/_types/_int16_t.h | 31 + bsd/sys/_types/_int32_t.h | 31 + bsd/sys/_types/_int64_t.h | 31 + bsd/sys/_types/_int8_t.h | 31 + bsd/sys/_types/_intptr_t.h | 31 + bsd/sys/_types/_iovec_t.h | 34 + bsd/sys/_types/_key_t.h | 31 + .../sys/_types/_mach_port_t.h | 47 +- bsd/sys/_types/_mbstate_t.h | 32 + bsd/sys/_types/_mode_t.h | 31 + bsd/sys/_types/_nlink_t.h | 31 + bsd/sys/_types/_null.h | 30 + bsd/sys/_types/_o_dsync.h | 30 + bsd/sys/_types/_o_sync.h | 30 + bsd/sys/_types/_off_t.h | 31 + bsd/sys/_types/_os_inline.h | 34 + bsd/sys/_types/_pid_t.h | 31 + bsd/sys/_types/_posix_vdisable.h | 30 + bsd/sys/_types/_pthread_attr_t.h | 31 + bsd/sys/_types/_pthread_cond_t.h | 31 + bsd/sys/_types/_pthread_condattr_t.h | 31 + bsd/sys/_types/_pthread_key_t.h | 31 + bsd/sys/_types/_pthread_mutex_t.h | 31 + bsd/sys/_types/_pthread_mutexattr_t.h | 31 + bsd/sys/_types/_pthread_once_t.h | 31 + bsd/sys/_types/_pthread_rwlock_t.h | 31 + bsd/sys/_types/_pthread_rwlockattr_t.h | 31 + bsd/sys/_types/_pthread_t.h | 31 + bsd/sys/_types/_ptrdiff_t.h | 32 + bsd/sys/_types/_rsize_t.h | 31 + bsd/sys/_types/_rune_t.h | 31 + bsd/sys/_types/_s_ifmt.h | 74 + bsd/sys/_types/_sa_family_t.h | 31 + bsd/sys/_types/_seek_set.h | 34 + .../sys/_types/_sigaltstack.h | 41 +- bsd/sys/_types/_sigset_t.h | 31 + bsd/sys/_types/_size_t.h | 31 + bsd/sys/_types/_socklen_t.h | 32 + bsd/sys/_types/_ssize_t.h | 31 + bsd/sys/_types/_suseconds_t.h | 31 + bsd/sys/_types/_time_t.h | 31 + .../sys/_types/_timespec.h | 17 +- bsd/sys/_types/_timeval.h | 35 + bsd/sys/_types/_timeval32.h | 35 + bsd/sys/_types/_ucontext.h | 50 + bsd/sys/_types/_ucontext64.h | 45 + bsd/sys/_types/_uid_t.h | 31 + bsd/sys/_types/_uintptr_t.h | 31 + bsd/sys/_types/_useconds_t.h | 31 + bsd/sys/_types/_user32_itimerval.h | 37 + bsd/sys/_types/_user32_timespec.h | 37 + bsd/sys/_types/_user32_timeval.h | 37 + bsd/sys/_types/_user64_itimerval.h | 37 + bsd/sys/_types/_user64_timespec.h | 37 + bsd/sys/_types/_user64_timeval.h | 37 + bsd/sys/_types/_user_timespec.h | 42 + bsd/sys/_types/_user_timeval.h | 37 + bsd/sys/_types/_uuid_t.h | 31 + bsd/sys/_types/_va_list.h | 32 + bsd/sys/_types/_wchar_t.h | 35 + bsd/sys/_types/_wint_t.h | 32 + bsd/sys/aio.h | 28 +- bsd/sys/aio_kern.h | 3 +- bsd/sys/attr.h | 8 +- bsd/sys/bitstring.h | 180 + bsd/sys/buf.h | 48 +- bsd/sys/buf_internal.h | 33 +- bsd/sys/cdefs.h | 105 +- bsd/sys/codesign.h | 173 +- bsd/sys/conf.h | 6 - bsd/sys/content_protection.h | 7 + bsd/sys/cprotect.h | 188 +- bsd/sys/dirent.h | 5 +- bsd/sys/disk.h | 26 + bsd/sys/disklabel.h | 4 +- bsd/sys/domain.h | 139 +- bsd/sys/dtrace.h | 30 + bsd/sys/dtrace_glue.h | 11 +- bsd/sys/dtrace_impl.h | 2 + bsd/sys/errno.h | 6 + bsd/sys/event.h | 97 +- bsd/sys/fasttrap_impl.h | 2 +- bsd/sys/fbt.h | 2 +- bsd/sys/fcntl.h | 133 +- bsd/sys/file_internal.h | 47 +- bsd/sys/filedesc.h | 17 +- bsd/sys/fsctl.h | 69 +- bsd/sys/fslog.h | 38 +- bsd/sys/guarded.h | 107 + bsd/sys/imgact.h | 2 + bsd/sys/ipc.h | 24 +- bsd/sys/kas_info.h | 2 +- bsd/sys/kasl.h | 48 + bsd/sys/kauth.h | 27 +- bsd/sys/kdebug.h | 232 +- bsd/sys/kern_control.h | 42 +- bsd/sys/kern_event.h | 254 +- bsd/sys/kern_memorystatus.h | 274 +- bsd/sys/kern_overrides.h | 55 + bsd/sys/kern_tests.h | 4 + bsd/sys/kernel.h | 2 - bsd/sys/kernel_types.h | 11 +- bsd/sys/kpi_mbuf.h | 130 +- bsd/sys/kpi_socket.h | 78 +- bsd/sys/kpi_socketfilter.h | 18 + bsd/sys/linker_set.h | 4 +- bsd/sys/lockf.h | 8 + bsd/sys/malloc.h | 17 +- bsd/sys/mbuf.h | 440 +- bsd/sys/mcache.h | 87 +- bsd/sys/mman.h | 20 +- bsd/sys/mount.h | 17 +- bsd/sys/mount_internal.h | 28 +- bsd/sys/msg.h | 23 +- bsd/sys/munge.h | 3 + bsd/sys/namei.h | 8 +- bsd/sys/param.h | 8 +- bsd/sys/posix_sem.h | 3 +- bsd/sys/priv.h | 7 + bsd/sys/proc.h | 43 +- bsd/sys/proc_info.h | 69 +- bsd/sys/proc_internal.h | 66 +- bsd/sys/proc_uuid_policy.h | 111 + bsd/sys/process_policy.h | 58 +- bsd/sys/protosw.h | 627 +- bsd/sys/pthread_internal.h | 120 +- bsd/sys/pthread_shims.h | 224 + bsd/sys/queue.h | 115 +- bsd/sys/resource.h | 189 +- bsd/sys/resourcevar.h | 13 +- bsd/sys/sdt_impl.h | 10 +- bsd/sys/select.h | 49 +- bsd/sys/sem.h | 17 +- bsd/sys/semaphore.h | 2 +- bsd/sys/shm.h | 17 +- bsd/sys/signal.h | 40 +- bsd/sys/signalvar.h | 4 +- bsd/sys/socket.h | 290 +- bsd/sys/socketvar.h | 682 +- bsd/sys/sockio.h | 36 +- bsd/sys/spawn_internal.h | 134 +- bsd/sys/stat.h | 122 +- bsd/sys/sys_domain.h | 10 +- bsd/sys/sysctl.h | 19 +- bsd/sys/sysent.h | 12 +- bsd/sys/systm.h | 16 +- bsd/sys/termios.h | 4 +- bsd/sys/time.h | 64 +- bsd/sys/timeb.h | 5 +- bsd/sys/times.h | 5 +- bsd/sys/types.h | 216 +- bsd/sys/ubc.h | 6 +- bsd/sys/ubc_internal.h | 9 +- bsd/sys/ucontext.h | 12 +- bsd/sys/ucred.h | 2 - bsd/sys/uio.h | 19 +- bsd/sys/uio_internal.h | 2 +- bsd/sys/un.h | 14 +- bsd/sys/unistd.h | 10 +- bsd/sys/unpcb.h | 2 - bsd/sys/user.h | 53 +- bsd/sys/vnode.h | 27 +- bsd/sys/vnode_if.h | 4 +- bsd/sys/vnode_internal.h | 46 +- bsd/sys/wait.h | 11 +- bsd/sys/xattr.h | 14 +- bsd/uuid/Makefile | 21 +- bsd/uuid/uuid.h | 6 +- bsd/vfs/Makefile | 9 - bsd/vfs/kpi_vfs.c | 1515 +- bsd/vfs/vfs_attrlist.c | 216 +- bsd/vfs/vfs_bio.c | 128 +- bsd/vfs/vfs_cache.c | 103 +- bsd/vfs/vfs_cluster.c | 125 +- bsd/vfs/vfs_conf.c | 36 +- bsd/vfs/vfs_fsevents.c | 30 +- bsd/vfs/vfs_fslog.c | 323 +- bsd/vfs/vfs_init.c | 2 +- bsd/vfs/vfs_journal.c | 298 +- bsd/vfs/vfs_journal.h | 17 +- bsd/vfs/vfs_lookup.c | 298 +- bsd/vfs/vfs_subr.c | 90 +- bsd/vfs/vfs_syscalls.c | 647 +- bsd/vfs/vfs_vnops.c | 60 +- bsd/vfs/vfs_xattr.c | 369 +- bsd/vfs/vnode_if.sh | 0 bsd/vm/Makefile | 14 +- bsd/vm/dp_backing_file.c | 169 +- bsd/vm/vm_compressor_backing_file.c | 332 + bsd/vm/vm_unix.c | 243 +- bsd/vm/vnode_pager.c | 31 +- config/BSDKernel.exports | 2 + config/BSDKernel.i386.exports | 38 - config/Dummy.exports | 1 - config/IOKit.exports | 1 + config/IOKit.i386.exports | 571 - config/IOKit.x86_64.exports | 15 +- config/Libkern.exports | 1 + config/Libkern.i386.exports | 142 - config/MACFramework.i386.exports | 11 - config/Mach.exports | 1 + config/Mach.i386.exports | 4 - config/Makefile | 317 +- config/MasterVersion | 2 +- config/Private.exports | 76 +- config/Private.i386.exports | 38 - .../PlugIns/BSDKernel6.0.kext/Info.plist | 34 - .../PlugIns/IOKit6.0.kext/Info.plist | 34 - .../PlugIns/Libkern6.0.kext/Info.plist | 34 - .../PlugIns/Mach6.0.kext/Info.plist | 34 - .../PlugIns/System6.0.kext/Info.plist | 34 - config/System6.0.exports | 3190 ---- config/System6.0.i386.exports | 34 - config/System6.0.x86_64.exports | 8 - config/Unsupported.exports | 4 + config/Unsupported.i386.exports | 136 - config/Unused.exports | 3 + config/compress-man-pages.pl | 95 - config/list_supported.sh | 24 +- config/newvers.pl | 4 +- .../platform/drvAppleMacIO/AppleMacIO.cpp | 305 - .../Drivers/platform/drvAppleNMI/AppleNMI.cpp | 153 - .../platform/drvAppleNVRAM/AppleNVRAM.cpp | 151 - .../drvApplePlatformExpert/AppleCPU.cpp | 114 - .../ApplePlatformExpert.cpp | 163 - iokit/IOKit/AppleKeyStoreInterface.h | 2 +- iokit/IOKit/IOCatalogue.h | 38 +- iokit/IOKit/IODMAController.h | 8 +- iokit/IOKit/IODMAEventSource.h | 15 +- iokit/IOKit/IODeviceTreeSupport.h | 6 +- iokit/IOKit/IOHibernatePrivate.h | 67 +- iokit/IOKit/IOKitDebug.h | 4 + iokit/IOKit/IOKitKeys.h | 5 +- iokit/IOKit/IOKitKeysPrivate.h | 9 +- iokit/IOKit/IOLocks.h | 12 +- iokit/IOKit/IOMapper.h | 1 + iokit/IOKit/IOMemoryDescriptor.h | 37 +- iokit/IOKit/IONVRAM.h | 7 +- iokit/IOKit/IOPlatformExpert.h | 2 + iokit/IOKit/IOPolledInterface.h | 1 + iokit/IOKit/IOReportMacros.h | 356 + iokit/IOKit/IOReportTypes.h | 186 + iokit/IOKit/IOReturn.h | 7 +- iokit/IOKit/IOService.h | 499 +- iokit/IOKit/IOServicePM.h | 23 + iokit/IOKit/IOTypes.h | 20 +- iokit/IOKit/IOUserClient.h | 26 + iokit/IOKit/Makefile | 20 +- iokit/IOKit/i386/Makefile | 33 - iokit/IOKit/machine/Makefile | 14 +- iokit/IOKit/nvram/Makefile | 16 +- iokit/IOKit/pci/IOPCIDevice.h | 161 - iokit/IOKit/platform/Makefile | 14 +- iokit/IOKit/power/Makefile | 16 +- iokit/IOKit/pwr_mgt/IOPM.h | 18 +- iokit/IOKit/pwr_mgt/IOPMLibDefs.h | 5 +- iokit/IOKit/pwr_mgt/IOPMPowerSource.h | 18 +- iokit/IOKit/pwr_mgt/IOPMPrivate.h | 108 +- iokit/IOKit/pwr_mgt/IOPMlog.h | 4 +- iokit/IOKit/pwr_mgt/IOPMpowerState.h | 7 +- iokit/IOKit/pwr_mgt/IOPowerConnection.h | 7 + iokit/IOKit/pwr_mgt/Makefile | 14 +- iokit/IOKit/pwr_mgt/RootDomain.h | 175 +- iokit/IOKit/rtc/Makefile | 16 +- iokit/IOKit/system_management/Makefile | 16 +- iokit/IOKit/x86_64/Makefile | 33 - iokit/Kernel/IOBufferMemoryDescriptor.cpp | 227 +- iokit/Kernel/IOCPU.cpp | 11 +- iokit/Kernel/IOCatalogue.cpp | 48 +- iokit/Kernel/IODMACommand.cpp | 8 +- iokit/Kernel/IODMAController.cpp | 4 +- iokit/Kernel/IODMAEventSource.cpp | 45 +- iokit/Kernel/IODataQueue.cpp | 3 +- iokit/Kernel/IODeviceTreeSupport.cpp | 232 +- iokit/Kernel/IOHibernateIO.cpp | 350 +- iokit/Kernel/IOHibernateRestoreKernel.c | 59 +- iokit/Kernel/IOInterruptController.cpp | 21 +- iokit/Kernel/IOKitKernelInternal.h | 31 + iokit/Kernel/IOLib.cpp | 208 +- iokit/Kernel/IOMapper.cpp | 41 +- iokit/Kernel/IOMemoryDescriptor.cpp | 181 +- iokit/Kernel/IOMultiMemoryDescriptor.cpp | 4 +- iokit/Kernel/IONVRAM.cpp | 58 +- iokit/Kernel/IOPMrootDomain.cpp | 2273 ++- iokit/Kernel/IOPlatformExpert.cpp | 73 +- iokit/Kernel/IORegistryEntry.cpp | 7 +- iokit/Kernel/IOService.cpp | 149 +- iokit/Kernel/IOServicePM.cpp | 1687 +- iokit/Kernel/IOServicePMPrivate.h | 174 +- iokit/Kernel/IOServicePrivate.h | 7 +- iokit/Kernel/IOStartIOKit.cpp | 2 +- iokit/Kernel/IOStatistics.cpp | 19 +- iokit/Kernel/IOTimerEventSource.cpp | 11 +- iokit/Kernel/IOUserClient.cpp | 125 +- iokit/Kernel/RootDomainUserClient.cpp | 38 +- iokit/Kernel/i386/IOKeyStoreHelper.cpp | 2 +- iokit/KernelConfigTables.cpp | 5 +- iokit/Makefile | 12 - iokit/User/Makefile | 42 - iokit/User/Makefile.user | 41 - iokit/bsddev/IOKitBSDInit.cpp | 76 +- iokit/conf/MASTER | 15 +- iokit/conf/MASTER.i386 | 20 - iokit/conf/MASTER.x86_64 | 8 +- iokit/conf/Makefile | 67 +- iokit/conf/Makefile.i386 | 16 - iokit/conf/Makefile.template | 54 +- iokit/conf/Makefile.x86_64 | 3 +- iokit/conf/files.i386 | 15 - kgmacros | 13670 ---------------- libkern/Makefile | 25 +- libkern/OSKextLib.cpp | 17 +- libkern/c++/OSData.cpp | 4 +- libkern/c++/OSKext.cpp | 525 +- libkern/c++/OSSymbol.cpp | 17 +- libkern/c++/OSUnserializeXML.cpp | 185 +- libkern/c++/OSUnserializeXML.y | 125 +- libkern/conf/MASTER | 5 - libkern/conf/MASTER.i386 | 15 - libkern/conf/MASTER.x86_64 | 2 + libkern/conf/Makefile | 67 +- libkern/conf/Makefile.i386 | 17 - libkern/conf/Makefile.template | 60 +- libkern/conf/Makefile.x86_64 | 9 - libkern/conf/files | 7 +- libkern/conf/files.i386 | 7 - libkern/conf/files.x86_64 | 5 - libkern/crypto/corecrypto_aes.c | 4 + libkern/crypto/intel/sha1edp.h | 51 - libkern/crypto/intel/sha1edp.s | 1495 -- libkern/gen/OSAtomicOperations.c | 8 + libkern/gen/OSDebug.cpp | 85 +- libkern/i386/OSAtomic.s | 108 - libkern/kmod/cplus_start.c | 58 +- libkern/kmod/cplus_stop.c | 30 +- libkern/kxld/Makefile | 13 +- libkern/kxld/i386/WKdmCompress.s | 597 - libkern/kxld/i386/WKdmDecompress.s | 675 - libkern/kxld/kxld_kext.c | 4 +- libkern/kxld/kxld_object.c | 18 +- libkern/kxld/kxld_reloc.c | 1 + libkern/kxld/kxld_seg.c | 6 +- libkern/libkern/Makefile | 11 +- libkern/libkern/OSAtomic.h | 39 +- libkern/libkern/OSKextLibPrivate.h | 7 + libkern/libkern/OSTypes.h | 51 +- libkern/libkern/c++/Makefile | 23 +- libkern/libkern/c++/OSBoolean.h | 14 +- libkern/libkern/c++/OSKext.h | 25 +- libkern/libkern/c++/OSMetaClass.h | 4 - libkern/libkern/c++/OSUnserialize.h | 32 +- libkern/libkern/crypto/Makefile | 9 +- libkern/libkern/crypto/sha2.h | 6 +- libkern/libkern/i386/Makefile | 13 +- libkern/libkern/i386/OSByteOrder.h | 5 +- libkern/libkern/i386/_OSByteOrder.h | 4 +- libkern/libkern/kxld_types.h | 4 +- libkern/libkern/machine/Makefile | 23 +- libkern/mkext.c | 16 - libkern/net/inet_aton.c | 138 + libkern/net/inet_ntoa.c | 59 + libkern/net/inet_ntop.c | 182 + libkern/net/inet_pton.c | 212 + libkern/uuid/Makefile | 40 - libkern/zlib/adler32.c | 20 - libkern/zlib/crc32.c | 1 + libkern/zlib/inffast.c | 9 - libkern/zlib/intel/adler32vec.s | 1050 -- libkern/zlib/intel/inffastS.s | 1179 -- libsa/Makefile | 17 - libsa/bootstrap.cpp | 147 +- libsa/conf/MASTER | 5 - libsa/conf/MASTER.i386 | 15 - libsa/conf/Makefile | 68 +- libsa/conf/Makefile.i386 | 8 - libsa/conf/Makefile.template | 40 +- libsa/conf/Makefile.x86_64 | 3 + libsa/lastkernelconstructor.c | 4 +- .../Libsyscall.aliases | 0 libsyscall/Libsyscall.xcconfig | 15 +- .../Libsyscall.xcodeproj/project.pbxproj | 403 +- libsyscall/Platforms/MacOSX/i386/syscall.map | 5 +- .../Platforms/MacOSX/x86_64/syscall.map | 7 +- libsyscall/custom/SYS.h | 53 +- libsyscall/custom/__fork.s | 5 +- libsyscall/custom/__syscall.s | 2 +- libsyscall/custom/__vfork.s | 8 +- libsyscall/custom/custom.s | 94 +- libsyscall/custom/dummy.c | 1 + libsyscall/custom/errno.c | 69 + libsyscall/mach/abort.h | 11 +- libsyscall/mach/clock_sleep.c | 1 + libsyscall/mach/err_iokit.sub | 0 libsyscall/mach/exc_catcher.h | 42 +- libsyscall/mach/mach/mach.h | 6 + .../mach/mach/vm_page_size.h | 40 +- libsyscall/mach/mach_init.c | 89 +- libsyscall/mach/mach_msg.c | 143 + libsyscall/mach/mach_port.c | 91 + libsyscall/mach/mach_vm.c | 185 +- libsyscall/mach/mig_deallocate.c | 2 +- libsyscall/mach/mig_reply_port.c | 32 +- libsyscall/mach/panic.c | 2 +- libsyscall/mach/port_obj.c | 4 +- libsyscall/mach/semaphore.c | 35 +- libsyscall/mach/slot_name.c | 15 +- .../mach/stack_logging_internal.h | 66 +- libsyscall/mach/string.h | 17 +- libsyscall/os/alloc_once.c | 39 + libsyscall/os/tsd.h | 80 + libsyscall/wrappers/__commpage_gettimeofday.c | 23 + libsyscall/wrappers/__commpage_gettimeofday.s | 131 + libsyscall/wrappers/_errno.h | 4 - libsyscall/wrappers/_libc_funcptr.c | 54 +- libsyscall/wrappers/_libkernel_init.c | 26 +- libsyscall/wrappers/_libkernel_init.h | 29 +- libsyscall/wrappers/cancelable/fcntl-base.c | 2 + libsyscall/wrappers/carbon_delete.c | 34 + libsyscall/wrappers/gethostuuid.c | 70 + libsyscall/wrappers/gethostuuid.h | 42 + libsyscall/wrappers/gethostuuid_private.h | 45 + libsyscall/wrappers/getiopolicy_np.c | 67 + libsyscall/wrappers/guarded_open_np.c | 45 + libsyscall/wrappers/legacy/munmap.c | 12 +- libsyscall/wrappers/libproc/libproc.c | 814 + libsyscall/wrappers/libproc/libproc.h | 130 + .../wrappers/libproc/libproc_internal.h | 127 + .../wrappers/libproc/proc_listpidspath.c | 611 + libsyscall/wrappers/mach_absolute_time.s | 155 + libsyscall/wrappers/spawn/posix_spawn.c | 1520 ++ libsyscall/wrappers/spawn/spawn.h | 135 + libsyscall/wrappers/spawn/spawn_private.h | 55 + libsyscall/wrappers/string/index.c | 47 + libsyscall/wrappers/{ => string}/memcpy.c | 8 +- libsyscall/wrappers/string/memset.c | 112 + libsyscall/wrappers/string/strcmp.c | 46 + libsyscall/wrappers/string/strcpy.c | 35 + .../wrappers/string/strings.h | 54 +- libsyscall/wrappers/string/strlcpy.c | 37 + libsyscall/wrappers/string/strlen.c | 107 + libsyscall/wrappers/string/strsep.c | 69 + libsyscall/wrappers/unix03/mmap.c | 20 +- .../wrappers/unix03/munmap.c | 57 +- libsyscall/xcodescripts/compile-syscalls.pl | 1 + libsyscall/xcodescripts/create-syscalls.pl | 17 +- libsyscall/xcodescripts/mach_install_mig.sh | 42 +- lldbmacros.py | 184 - makedefs/MakeInc.cmd | 186 +- makedefs/MakeInc.def | 620 +- makedefs/MakeInc.dir | 850 +- makedefs/MakeInc.kernel | 390 + makedefs/MakeInc.rule | 896 +- makedefs/MakeInc.top | 470 + osfmk/Makefile | 12 - .../UserNotification/KUNCUserNotifications.c | 4 +- osfmk/UserNotification/Makefile | 17 +- osfmk/chud/chud_thread.c | 18 + osfmk/chud/chud_xnu.h | 11 + osfmk/chud/i386/chud_osfmk_callback_i386.c | 2 +- osfmk/chud/i386/chud_thread_i386.c | 43 +- osfmk/conf/MASTER | 39 +- osfmk/conf/MASTER.i386 | 79 - osfmk/conf/MASTER.x86_64 | 5 +- osfmk/conf/Makefile | 75 +- osfmk/conf/Makefile.i386 | 17 - osfmk/conf/Makefile.template | 111 +- osfmk/conf/Makefile.x86_64 | 19 +- osfmk/conf/files | 18 +- osfmk/conf/files.i386 | 134 - osfmk/conf/files.x86_64 | 10 +- osfmk/console/video_console.c | 10 +- osfmk/console/video_console.h | 2 - osfmk/default_pager/Makefile | 16 +- osfmk/default_pager/Makefile.template | 43 - osfmk/default_pager/default_pager.c | 25 +- osfmk/default_pager/default_pager_internal.h | 8 +- osfmk/default_pager/dp_backing_store.c | 97 +- osfmk/default_pager/dp_memory_object.c | 30 +- osfmk/device/Makefile | 7 +- osfmk/device/device.defs | 26 +- osfmk/device/iokit_rpc.c | 26 +- osfmk/device/subrs.c | 32 +- osfmk/gssd/Makefile | 14 +- osfmk/i386/AT386/model_dep.c | 43 +- osfmk/i386/Diagnostics.c | 55 +- osfmk/i386/Makefile | 1 - osfmk/i386/_setjmp.s | 96 - osfmk/i386/acpi.c | 63 +- osfmk/i386/acpi_wakeup.s | 315 - osfmk/i386/asm.h | 1 + osfmk/i386/asm64.h | 37 +- osfmk/i386/bcopy.s | 168 - osfmk/i386/bsd_i386.c | 56 +- osfmk/i386/bsd_i386_native.c | 13 +- osfmk/i386/commpage/commpage.c | 2 +- osfmk/i386/commpage/commpage.h | 14 +- osfmk/i386/commpage/commpage_asm.s | 41 +- osfmk/i386/commpage/pthreads.s | 228 - osfmk/i386/copyio.c | 621 - osfmk/i386/cpu.c | 16 +- osfmk/i386/cpu_capabilities.h | 3 - osfmk/i386/cpu_data.h | 122 +- osfmk/i386/cpu_threads.c | 45 +- osfmk/i386/cpu_threads.h | 8 +- osfmk/i386/cpu_topology.c | 20 +- osfmk/i386/cpuid.c | 62 +- osfmk/i386/cpuid.h | 43 +- osfmk/i386/cswitch.s | 165 - osfmk/i386/endian.h | 8 +- osfmk/i386/etimer.c | 309 - osfmk/i386/fpu.c | 49 +- osfmk/i386/fpu.h | 6 +- osfmk/i386/gdt.c | 10 +- osfmk/i386/genassym.c | 78 +- osfmk/i386/hibernate_i386.c | 19 +- osfmk/i386/hibernate_restore.c | 12 +- osfmk/i386/i386_init.c | 100 +- osfmk/i386/i386_lock.s | 400 +- osfmk/i386/i386_lowmem.h | 8 - osfmk/i386/i386_timer.c | 520 + osfmk/i386/i386_vm_init.c | 103 +- osfmk/i386/idle_pt.c | 71 - osfmk/i386/idt.s | 1221 -- osfmk/i386/idt64.s | 1701 -- osfmk/i386/ktss.c | 116 +- osfmk/i386/lapic.h | 6 + osfmk/i386/lapic_native.c | 5 + osfmk/i386/ldt.c | 34 +- osfmk/i386/locks.h | 17 +- osfmk/i386/locks_i386.c | 83 +- osfmk/i386/locore.s | 464 - osfmk/i386/loose_ends.c | 794 - osfmk/i386/lowglobals.h | 81 - osfmk/i386/lowmem_vectors.s | 103 - osfmk/i386/machine_check.c | 34 +- osfmk/i386/machine_routines.c | 107 +- osfmk/i386/machine_routines.h | 37 + osfmk/i386/machine_routines_asm.s | 314 - osfmk/i386/mcount.s | 75 - osfmk/i386/misc_protos.h | 24 +- osfmk/i386/mp.c | 128 +- osfmk/i386/mp.h | 75 +- osfmk/i386/mp_desc.c | 366 +- osfmk/i386/mp_desc.h | 12 +- osfmk/i386/pal_routines.h | 16 +- osfmk/i386/pal_routines_asm.s | 192 - osfmk/i386/pcb.c | 233 +- osfmk/i386/pcb_native.c | 362 +- osfmk/i386/phys.c | 61 +- osfmk/i386/pmCPU.c | 259 +- osfmk/i386/pmap.c | 2817 ---- osfmk/i386/pmap.h | 215 +- osfmk/i386/pmap_common.c | 33 +- osfmk/i386/pmap_internal.h | 59 +- osfmk/i386/pmap_x86_common.c | 502 +- osfmk/i386/proc_reg.h | 88 +- osfmk/i386/rtclock.c | 45 +- osfmk/i386/rtclock_asm.h | 131 +- osfmk/i386/rtclock_asm_native.h | 36 +- osfmk/i386/rtclock_native.c | 2 +- osfmk/i386/seg.h | 57 +- osfmk/i386/start.s | 327 - osfmk/i386/start64.s | 256 - osfmk/i386/startup64.c | 182 +- osfmk/i386/thread.h | 24 +- osfmk/i386/trap.c | 210 +- osfmk/i386/trap.h | 10 +- osfmk/i386/trap_native.c | 115 +- osfmk/i386/ucode.c | 15 +- osfmk/i386/vmx/vmx_cpu.c | 14 +- osfmk/ipc/ipc_entry.c | 2 +- osfmk/ipc/ipc_hash.c | 4 +- osfmk/ipc/ipc_init.c | 1 - osfmk/ipc/ipc_init.h | 4 +- osfmk/ipc/ipc_kmsg.c | 269 +- osfmk/ipc/ipc_kmsg.h | 12 +- osfmk/ipc/ipc_mqueue.c | 156 +- osfmk/ipc/ipc_mqueue.h | 24 + osfmk/ipc/ipc_object.c | 75 +- osfmk/ipc/ipc_object.h | 2 +- osfmk/ipc/ipc_port.c | 405 +- osfmk/ipc/ipc_port.h | 76 +- osfmk/ipc/ipc_pset.c | 20 +- osfmk/ipc/ipc_right.c | 321 +- osfmk/ipc/ipc_right.h | 15 +- osfmk/ipc/ipc_space.c | 11 +- osfmk/ipc/ipc_space.h | 1 + osfmk/ipc/ipc_table.h | 2 +- osfmk/ipc/mach_debug.c | 16 +- osfmk/ipc/mach_kernelrpc.c | 103 + osfmk/ipc/mach_msg.c | 106 +- osfmk/ipc/mach_port.c | 794 +- osfmk/kdp/Makefile | 5 - osfmk/kdp/kdp.c | 405 +- osfmk/kdp/kdp_dyld.h | 7 + osfmk/kdp/kdp_protocol.h | 9 + osfmk/kdp/kdp_udp.c | 361 +- osfmk/kdp/kdp_udp.h | 138 +- osfmk/kdp/ml/i386/kdp_machdep.c | 699 - osfmk/kdp/ml/i386/kdp_vm.c | 146 - osfmk/kdp/ml/i386/kdp_x86_common.c | 27 +- osfmk/kern/Makefile | 10 +- osfmk/kern/affinity.c | 1 + osfmk/kern/ast.c | 26 +- osfmk/kern/ast.h | 29 +- osfmk/kern/bsd_kern.c | 111 +- osfmk/kern/btlog.c | 327 + osfmk/kern/btlog.h | 89 + osfmk/kern/call_entry.h | 14 +- osfmk/kern/clock.c | 36 +- osfmk/kern/clock.h | 45 +- osfmk/kern/clock_oldops.c | 2 +- osfmk/kern/debug.c | 59 +- osfmk/kern/debug.h | 123 +- osfmk/kern/exc_resource.h | 191 + osfmk/kern/exception.c | 40 +- osfmk/kern/hibernate.c | 100 +- osfmk/kern/host.c | 139 +- osfmk/kern/host_notify.h | 2 +- osfmk/kern/host_statistics.h | 5 + osfmk/kern/ipc_host.c | 13 +- osfmk/kern/ipc_host.h | 2 +- osfmk/kern/ipc_kobject.c | 9 +- osfmk/kern/ipc_kobject.h | 3 +- osfmk/kern/ipc_mig.c | 105 +- osfmk/kern/ipc_mig.h | 10 +- osfmk/kern/ipc_sync.c | 27 +- osfmk/kern/ipc_tt.c | 213 +- osfmk/kern/ipc_tt.h | 6 + osfmk/kern/kalloc.h | 2 +- osfmk/kern/kern_types.h | 81 + osfmk/kern/kext_alloc.c | 9 +- osfmk/kern/kmod.c | 18 +- osfmk/kern/kpc.h | 162 + osfmk/kern/kpc_common.c | 364 + osfmk/kern/kpc_thread.c | 228 + osfmk/kern/ledger.c | 652 +- osfmk/kern/ledger.h | 37 +- osfmk/kern/locks.c | 100 +- osfmk/kern/locks.h | 18 +- osfmk/kern/machine.h | 2 +- osfmk/kern/mk_sp.c | 25 +- osfmk/kern/mk_timer.c | 16 +- osfmk/kern/mk_timer.h | 2 +- osfmk/kern/page_decrypt.h | 9 + osfmk/kern/printf.c | 2 - osfmk/kern/priority.c | 108 +- osfmk/kern/processor.c | 57 +- osfmk/kern/processor.h | 13 +- osfmk/kern/processor_data.h | 9 +- osfmk/kern/queue.h | 50 +- osfmk/kern/sched.h | 30 +- osfmk/kern/sched_average.c | 95 +- osfmk/kern/sched_fixedpriority.c | 60 +- osfmk/kern/sched_grrr.c | 12 +- osfmk/kern/sched_prim.c | 958 +- osfmk/kern/sched_prim.h | 67 +- osfmk/kern/sched_proto.c | 2 +- osfmk/kern/stack.c | 8 +- osfmk/kern/startup.c | 59 +- osfmk/kern/startup.h | 7 +- osfmk/kern/sync_lock.c | 811 +- osfmk/kern/sync_lock.h | 67 - osfmk/kern/sync_sema.c | 4 +- osfmk/kern/sync_sema.h | 2 +- osfmk/kern/syscall_subr.c | 123 +- osfmk/kern/syscall_sw.c | 287 +- osfmk/kern/syscall_sw.h | 37 +- osfmk/kern/task.c | 1388 +- osfmk/kern/task.h | 425 +- osfmk/kern/task_policy.c | 3800 +++-- osfmk/kern/telemetry.c | 1023 ++ osfmk/kern/telemetry.h | 59 + osfmk/kern/template.mk | 68 - osfmk/kern/thread.c | 431 +- osfmk/kern/thread.h | 156 +- osfmk/kern/thread_act.c | 10 +- osfmk/kern/thread_call.c | 374 +- osfmk/kern/thread_call.h | 135 +- osfmk/kern/thread_policy.c | 131 +- osfmk/kern/timer.c | 4 - osfmk/kern/timer_call.c | 1018 +- osfmk/kern/timer_call.h | 96 +- osfmk/kern/timer_queue.h | 76 +- osfmk/kern/wait_queue.c | 212 +- osfmk/kern/wait_queue.h | 47 +- osfmk/kern/zalloc.c | 1628 +- osfmk/kern/zalloc.h | 66 +- osfmk/kextd/Makefile | 15 +- osfmk/kperf/Makefile | 12 +- osfmk/kperf/action.c | 320 +- osfmk/kperf/action.h | 30 +- osfmk/kperf/buffer.h | 39 +- osfmk/kperf/callstack.c | 22 +- osfmk/kperf/filter.c | 117 - osfmk/kperf/kperf.c | 144 +- osfmk/kperf/kperf.h | 45 +- osfmk/kperf/kperf_arch.h | 1 + osfmk/kperf/kperf_kpc.c | 118 + osfmk/kperf/{filter.h => kperf_kpc.h} | 28 +- osfmk/kperf/kperfbsd.c | 527 +- osfmk/kperf/kperfbsd.h | 8 + osfmk/kperf/pet.c | 73 +- osfmk/kperf/pet.h | 6 + osfmk/kperf/sample.h | 15 +- osfmk/kperf/threadinfo.c | 2 + osfmk/kperf/timetrigger.c | 189 +- osfmk/kperf/timetrigger.h | 7 +- osfmk/libsa/Makefile | 1 - osfmk/libsa/string.h | 1 + osfmk/lockd/Makefile | 14 +- osfmk/mach/Makefile | 21 +- osfmk/mach/Makefile.template | 173 - osfmk/mach/branch_predicates.h | 4 +- osfmk/mach/clock_types.h | 18 +- osfmk/mach/exception_types.h | 5 + osfmk/mach/host_info.h | 14 +- osfmk/mach/host_special_ports.h | 9 +- osfmk/mach/i386/Makefile | 2 +- osfmk/mach/i386/exception.h | 2 +- osfmk/mach/i386/thread_status.h | 120 +- osfmk/mach/i386/vm_param.h | 45 +- osfmk/mach/lock_set.defs | 4 + osfmk/mach/mach_norma.defs | 261 - osfmk/mach/mach_port.defs | 87 +- osfmk/mach/mach_traps.h | 86 +- osfmk/mach/mach_types.defs | 40 +- osfmk/mach/mach_types.h | 13 +- osfmk/mach/mach_vm.defs | 12 +- osfmk/mach/machine.h | 6 + osfmk/mach/machine/sdt.h | 85 +- osfmk/mach/memory_object_types.h | 16 +- osfmk/mach/message.h | 60 +- osfmk/mach/norma_special_ports.h | 93 - osfmk/mach/port.h | 69 +- osfmk/mach/shared_region.h | 9 +- osfmk/mach/syscall_sw.h | 5 + osfmk/mach/task.defs | 30 +- osfmk/mach/task_info.h | 68 +- osfmk/mach/task_policy.h | 280 +- osfmk/mach/telemetry_notification.defs | 20 + osfmk/mach/thread_policy.h | 24 + osfmk/mach/thread_switch.h | 9 +- osfmk/mach/vm_map.defs | 6 +- osfmk/mach/vm_param.h | 13 +- osfmk/mach/vm_purgable.h | 13 +- osfmk/mach/vm_region.h | 81 +- osfmk/mach/vm_statistics.h | 68 +- osfmk/mach_debug/Makefile | 3 - osfmk/mach_debug/template.mk | 59 - osfmk/machine/Makefile | 3 +- osfmk/machine/machine_kpc.h | 37 + osfmk/man/DMN_port_deleted.html | 0 osfmk/man/DMN_port_destroyed.html | 0 osfmk/man/DP_backing_store_create.html | 0 osfmk/man/DP_backing_store_delete.html | 0 osfmk/man/DP_backing_store_info.html | 0 osfmk/man/DP_object_create.html | 0 osfmk/man/DR_overwrite_async.html | 0 osfmk/man/HD_memory_manager.html | 0 osfmk/man/MO_SY_completed.html | 0 osfmk/man/MO_change_attributes.html | 0 osfmk/man/MO_change_completed.html | 0 osfmk/man/MO_data_initialize.html | 0 osfmk/man/MO_data_unavailable.html | 0 osfmk/man/MO_default_server.html | 0 osfmk/man/MO_get_attributes.html | 0 osfmk/man/MO_lock_completed.html | 0 osfmk/man/MO_supply_completed.html | 0 osfmk/man/MP_allocate_subsystem.html | 0 osfmk/man/MP_request_notification.html | 0 osfmk/man/P_set_policy_control.html | 0 osfmk/man/P_set_policy_disable.html | 0 osfmk/man/P_set_policy_enable.html | 0 osfmk/man/SMO_default_server.html | 0 osfmk/man/SMO_server.html | 0 osfmk/man/TS_exception_ports.html | 0 osfmk/man/VSD_memory_manager.html | 0 osfmk/man/bootstrap_arguments.html | 0 osfmk/man/bootstrap_completed.html | 0 osfmk/man/bootstrap_environment.html | 0 osfmk/man/bootstrap_ports.html | 0 osfmk/man/catch_exception_raise.html | 0 osfmk/man/clock_alarm.html | 0 osfmk/man/clock_alarm_reply.html | 0 osfmk/man/clock_get_attributes.html | 0 osfmk/man/clock_get_time.html | 0 osfmk/man/clock_map_time.html | 0 osfmk/man/clock_reply_server.html | 0 osfmk/man/clock_set_attributes.html | 0 osfmk/man/clock_set_time.html | 0 osfmk/man/clock_sleep.html | 0 osfmk/man/default_pager_add_segment.html | 0 osfmk/man/default_pager_info.html | 0 osfmk/man/device_close.html | 0 osfmk/man/device_get_status.html | 0 osfmk/man/device_map.html | 0 osfmk/man/device_open.html | 0 osfmk/man/device_read.html | 0 osfmk/man/device_read_async.html | 0 osfmk/man/device_read_async_inband.html | 0 osfmk/man/device_read_inband.html | 0 osfmk/man/device_read_overwrite.html | 0 osfmk/man/device_reply_server.html | 0 osfmk/man/device_set_filter.html | 0 osfmk/man/device_set_status.html | 0 osfmk/man/device_write.html | 0 osfmk/man/device_write_async.html | 0 osfmk/man/device_write_async_inband.html | 0 osfmk/man/device_write_inband.html | 0 osfmk/man/do_mach_notify_dead_name.html | 0 osfmk/man/do_mach_notify_no_senders.html | 0 osfmk/man/do_mach_notify_send_once.html | 0 osfmk/man/etap_get_info.html | 0 osfmk/man/etap_probe.html | 0 osfmk/man/etap_trace_event.html | 0 osfmk/man/etap_trace_thread.html | 0 osfmk/man/evc_wait.html | 0 osfmk/man/exc_server.html | 0 osfmk/man/host_adjust_time.html | 0 osfmk/man/host_basic_info.html | 0 osfmk/man/host_get_boot_info.html | 0 osfmk/man/host_get_clock_control.html | 0 osfmk/man/host_get_clock_service.html | 0 osfmk/man/host_get_time.html | 0 osfmk/man/host_info.html | 0 osfmk/man/host_kernel_version.html | 0 osfmk/man/host_load_info.html | 0 osfmk/man/host_page_size.html | 0 osfmk/man/host_processor_set_priv.html | 0 osfmk/man/host_processor_sets.html | 0 osfmk/man/host_processor_slots.html | 0 osfmk/man/host_processors.html | 0 osfmk/man/host_reboot.html | 0 osfmk/man/host_sched_info.html | 0 .../man/host_security_create_task_token.html | 0 osfmk/man/host_security_set_task_token.html | 0 osfmk/man/host_set_time.html | 0 osfmk/man/host_statistics.html | 0 osfmk/man/i386_get_ldt.html | 0 osfmk/man/i386_io_port_add.html | 0 osfmk/man/i386_io_port_list.html | 0 osfmk/man/i386_io_port_remove.html | 0 osfmk/man/i386_set_ldt.html | 0 osfmk/man/index.html | 7 - osfmk/man/io_done_queue_create.html | 0 osfmk/man/io_done_queue_terminate.html | 0 osfmk/man/io_done_queue_wait.html | 0 osfmk/man/kernel_resource_sizes.html | 0 osfmk/man/ledger_create.html | 0 osfmk/man/ledger_get_remote.html | 0 osfmk/man/ledger_read.html | 0 osfmk/man/ledger_set_remote.html | 0 osfmk/man/ledger_terminate.html | 0 osfmk/man/ledger_transfer.html | 0 osfmk/man/lock_acquire.html | 0 osfmk/man/lock_handoff.html | 0 osfmk/man/lock_handoff_accept.html | 0 osfmk/man/lock_make_stable.html | 0 osfmk/man/lock_release.html | 0 osfmk/man/lock_set_create.html | 0 osfmk/man/lock_set_destroy.html | 0 osfmk/man/lock_try.html | 0 osfmk/man/mach_host_self.html | 0 osfmk/man/mach_msg.html | 0 osfmk/man/mach_msg_descriptor.html | 0 osfmk/man/mach_msg_header.html | 0 osfmk/man/mach_port_allocate.html | 0 osfmk/man/mach_port_allocate_full.html | 0 osfmk/man/mach_port_allocate_name.html | 0 osfmk/man/mach_port_allocate_qos.html | 0 osfmk/man/mach_port_deallocate.html | 0 osfmk/man/mach_port_destroy.html | 0 osfmk/man/mach_port_extract_member.html | 0 osfmk/man/mach_port_extract_right.html | 0 osfmk/man/mach_port_get_attributes.html | 0 osfmk/man/mach_port_get_refs.html | 0 osfmk/man/mach_port_get_set_status.html | 0 osfmk/man/mach_port_insert_member.html | 0 osfmk/man/mach_port_insert_right.html | 0 osfmk/man/mach_port_limits.html | 0 osfmk/man/mach_port_mod_refs.html | 0 osfmk/man/mach_port_move_member.html | 0 osfmk/man/mach_port_names.html | 0 osfmk/man/mach_port_qos.html | 0 osfmk/man/mach_port_set_attributes.html | 0 osfmk/man/mach_port_set_mscount.html | 0 osfmk/man/mach_port_set_seqno.html | 0 osfmk/man/mach_port_status.html | 0 osfmk/man/mach_port_type.html | 0 osfmk/man/mach_ports_lookup.html | 0 osfmk/man/mach_ports_register.html | 0 osfmk/man/mach_reply_port.html | 0 osfmk/man/mach_rpc_return_trap.html | 0 osfmk/man/mach_rpc_trap.html | 0 osfmk/man/mach_subsystem_create.html | 0 osfmk/man/mach_task_self.html | 0 osfmk/man/mach_thread_self.html | 0 osfmk/man/mapped_tvalspec.html | 0 osfmk/man/memory_object_attr_info.html | 0 osfmk/man/memory_object_create.html | 0 osfmk/man/memory_object_data_error.html | 0 osfmk/man/memory_object_data_request.html | 0 osfmk/man/memory_object_data_return.html | 0 osfmk/man/memory_object_data_supply.html | 0 osfmk/man/memory_object_data_unlock.html | 0 osfmk/man/memory_object_destroy.html | 0 osfmk/man/memory_object_init.html | 0 osfmk/man/memory_object_lock_request.html | 0 osfmk/man/memory_object_perf_info.html | 0 osfmk/man/memory_object_server.html | 0 osfmk/man/memory_object_synchronize.html | 0 osfmk/man/memory_object_terminate.html | 0 osfmk/man/norma_get_special_port.html | 102 - osfmk/man/norma_node_self.html | 32 - osfmk/man/norma_port_location_hint.html | 44 - osfmk/man/norma_set_special_port.html | 97 - osfmk/man/norma_task_clone.html | 78 - osfmk/man/norma_task_create.html | 59 - osfmk/man/norma_task_teleport.html | 71 - osfmk/man/notify_server.html | 0 osfmk/man/policy_fifo_info.html | 0 osfmk/man/policy_rr_info.html | 0 osfmk/man/policy_timeshare_info.html | 0 osfmk/man/processor_assign.html | 0 osfmk/man/processor_basic_info.html | 0 osfmk/man/processor_control.html | 0 osfmk/man/processor_exit.html | 0 osfmk/man/processor_get_assignment.html | 0 osfmk/man/processor_info.html | 0 osfmk/man/processor_set_basic_info.html | 0 osfmk/man/processor_set_create.html | 0 osfmk/man/processor_set_default.html | 0 osfmk/man/processor_set_destroy.html | 0 osfmk/man/processor_set_info.html | 0 osfmk/man/processor_set_load_info.html | 0 osfmk/man/processor_set_max_priority.html | 0 osfmk/man/processor_set_statistics.html | 0 osfmk/man/processor_set_tasks.html | 0 osfmk/man/processor_set_threads.html | 0 osfmk/man/processor_start.html | 0 osfmk/man/prof_server.html | 0 osfmk/man/receive_samples.html | 0 osfmk/man/semaphore_create.html | 0 osfmk/man/semaphore_destroy.html | 0 osfmk/man/semaphore_signal.html | 0 osfmk/man/semaphore_signal_all.html | 0 osfmk/man/semaphore_wait.html | 0 osfmk/man/seqnos_notify_server.html | 0 osfmk/man/task_assign.html | 0 osfmk/man/task_assign_default.html | 0 osfmk/man/task_basic_info.html | 0 osfmk/man/task_create.html | 1 - osfmk/man/task_get_assignment.html | 0 osfmk/man/task_get_emulation_vector.html | 0 osfmk/man/task_get_exception_ports.html | 0 osfmk/man/task_get_special_port.html | 0 osfmk/man/task_info.html | 0 osfmk/man/task_policy.html | 0 osfmk/man/task_resume.html | 0 osfmk/man/task_sample.html | 0 osfmk/man/task_set_emulation.html | 0 osfmk/man/task_set_emulation_vector.html | 0 osfmk/man/task_set_exception_ports.html | 0 osfmk/man/task_set_info.html | 0 osfmk/man/task_set_policy.html | 0 osfmk/man/task_set_port_space.html | 0 osfmk/man/task_set_special_port.html | 0 osfmk/man/task_suspend.html | 0 osfmk/man/task_swap_exception_ports.html | 0 osfmk/man/task_terminate.html | 0 osfmk/man/task_thread_times_info.html | 0 osfmk/man/task_threads.html | 0 osfmk/man/thread_abort.html | 0 osfmk/man/thread_abort_safely.html | 0 osfmk/man/thread_activation_create.html | 0 osfmk/man/thread_assign.html | 0 osfmk/man/thread_assign_default.html | 0 osfmk/man/thread_basic_info.html | 0 osfmk/man/thread_create.html | 0 osfmk/man/thread_create_running.html | 0 osfmk/man/thread_depress_abort.html | 0 osfmk/man/thread_get_assignment.html | 0 osfmk/man/thread_get_exception_ports.html | 0 osfmk/man/thread_get_special_port.html | 0 osfmk/man/thread_get_state.html | 0 osfmk/man/thread_info.html | 0 osfmk/man/thread_policy.html | 0 osfmk/man/thread_resume.html | 0 osfmk/man/thread_sample.html | 0 osfmk/man/thread_set_exception_ports.html | 0 osfmk/man/thread_set_policy.html | 0 osfmk/man/thread_set_special_port.html | 0 osfmk/man/thread_set_state.html | 0 osfmk/man/thread_suspend.html | 0 osfmk/man/thread_switch.html | 0 osfmk/man/thread_terminate.html | 0 osfmk/man/thread_wire.html | 0 osfmk/man/tvalspec.html | 0 osfmk/man/vm_allocate.html | 0 osfmk/man/vm_behavior_set.html | 0 osfmk/man/vm_copy.html | 0 osfmk/man/vm_deallocate.html | 0 osfmk/man/vm_inherit.html | 1 - osfmk/man/vm_machine_attribute.html | 0 osfmk/man/vm_map.html | 0 osfmk/man/vm_msync.html | 0 osfmk/man/vm_protect.html | 0 osfmk/man/vm_read.html | 0 osfmk/man/vm_region.html | 0 osfmk/man/vm_region_basic_info.html | 0 osfmk/man/vm_remap.html | 0 osfmk/man/vm_statistics.html | 0 osfmk/man/vm_wire.html | 0 osfmk/man/vm_write.html | 0 osfmk/profiling/Makefile | 13 +- osfmk/profiling/i386/profile-asm.s | 1451 -- osfmk/profiling/i386/profile-md.c | 1244 -- osfmk/profiling/i386/profile-md.h | 51 +- osfmk/vm/Makefile | 4 +- libkern/libkern/WKdm.h => osfmk/vm/WKdm_new.h | 73 +- osfmk/vm/bsd_vm.c | 13 +- osfmk/vm/default_freezer.c | 117 +- osfmk/vm/memory_object.c | 54 +- osfmk/vm/memory_object.h | 7 +- osfmk/vm/pmap.h | 114 +- osfmk/vm/vm32_user.c | 10 +- osfmk/vm/vm_apple_protect.c | 2 + osfmk/vm/vm_compressor.c | 2602 +++ osfmk/vm/vm_compressor.h | 256 + osfmk/vm/vm_compressor_backing_store.c | 1597 ++ osfmk/vm/vm_compressor_backing_store.h | 107 + osfmk/vm/vm_compressor_pager.c | 781 + osfmk/vm/vm_compressor_pager.h | 91 + osfmk/vm/vm_debug.c | 91 +- osfmk/vm/vm_external.h | 1 + osfmk/vm/vm_fault.c | 869 +- osfmk/vm/vm_fault.h | 3 +- osfmk/vm/vm_init.c | 3 +- osfmk/vm/vm_init.h | 6 +- osfmk/vm/vm_kern.c | 354 +- osfmk/vm/vm_kern.h | 15 +- osfmk/vm/vm_map.c | 1550 +- osfmk/vm/vm_map.h | 118 +- osfmk/vm/vm_map_store.c | 4 + osfmk/vm/vm_map_store_ll.c | 62 +- osfmk/vm/vm_map_store_rb.c | 4 + osfmk/vm/vm_object.c | 747 +- osfmk/vm/vm_object.h | 72 +- osfmk/vm/vm_page.h | 93 +- osfmk/vm/vm_pageout.c | 1695 +- osfmk/vm/vm_pageout.h | 85 +- osfmk/vm/vm_protos.h | 52 +- osfmk/vm/vm_purgeable.c | 433 +- osfmk/vm/vm_purgeable_internal.h | 7 +- osfmk/vm/vm_resident.c | 974 +- osfmk/vm/vm_shared_region.c | 379 +- osfmk/vm/vm_shared_region.h | 66 +- osfmk/vm/vm_user.c | 461 +- osfmk/x86_64/Makefile | 4 + osfmk/x86_64/WKdmCompress_new.s | 439 + osfmk/x86_64/WKdmData_new.s | 288 + osfmk/x86_64/WKdmDecompress_new.s | 280 + osfmk/x86_64/idt64.s | 720 +- osfmk/x86_64/idt_table.h | 17 +- osfmk/x86_64/kpc_x86.c | 554 + osfmk/x86_64/locore.s | 49 +- osfmk/x86_64/loose_ends.c | 50 +- osfmk/x86_64/machine_kpc.h | 51 + osfmk/x86_64/machine_routines_asm.s | 11 + osfmk/x86_64/pmap.c | 350 +- osfmk/x86_64/start.s | 5 +- pexpert/Makefile | 8 - pexpert/conf/MASTER | 6 +- pexpert/conf/MASTER.i386 | 18 - pexpert/conf/Makefile | 67 +- pexpert/conf/Makefile.i386 | 8 - pexpert/conf/Makefile.template | 32 +- pexpert/conf/files.i386 | 8 - pexpert/gen/bootargs.c | 13 - pexpert/i386/pe_kprintf.c | 10 +- pexpert/pexpert/Makefile | 3 - pexpert/pexpert/i386/boot.h | 1 + pexpert/pexpert/pexpert.h | 4 + security/Makefile | 16 +- security/_label.h | 1 - security/conf/MASTER | 4 + security/conf/MASTER.i386 | 32 - security/conf/MASTER.x86_64 | 2 +- security/conf/Makefile | 67 +- security/conf/Makefile.i386 | 7 - security/conf/Makefile.template | 56 +- security/conf/files | 3 +- security/conf/files.i386 | 0 security/mac.h | 5 + security/mac_alloc.c | 9 + security/mac_base.c | 124 +- security/mac_data.h | 4 + security/mac_framework.h | 29 +- security/mac_internal.h | 11 +- security/mac_kext.c | 22 + security/mac_mach_internal.h | 4 + security/mac_policy.h | 259 +- security/mac_process.c | 34 + security/mac_pty.c | 16 + security/mac_socket.c | 2 + security/mac_stub.c | 708 - security/mac_system.c | 13 + security/mac_vfs.c | 193 +- {libsa/libsa => tools}/Makefile | 18 +- tools/lldbmacros/.lldbinit | 4 + tools/lldbmacros/Makefile | 65 + tools/lldbmacros/README | 338 + tools/lldbmacros/apic.py | 354 + tools/lldbmacros/core/__init__.py | 4 + tools/lldbmacros/core/caching.py | 165 + tools/lldbmacros/core/configuration.py | 10 + tools/lldbmacros/core/cvalue.py | 463 + tools/lldbmacros/core/kernelcore.py | 376 + tools/lldbmacros/core/lazytarget.py | 57 + tools/lldbmacros/core/operating_system.py | 600 + tools/lldbmacros/core/standard.py | 126 + tools/lldbmacros/core/syntax_checker.py | 51 + tools/lldbmacros/core/xnu_lldb_init.py | 43 + tools/lldbmacros/ioreg.py | 773 + tools/lldbmacros/ipc.py | 589 + tools/lldbmacros/kdp.py | 285 + tools/lldbmacros/mbufdefines.py | 30 + tools/lldbmacros/mbufs.py | 778 + tools/lldbmacros/memory.py | 1911 +++ tools/lldbmacros/misc.py | 341 + tools/lldbmacros/net.py | 1890 +++ tools/lldbmacros/netdefines.py | 125 + tools/lldbmacros/pci.py | 265 + tools/lldbmacros/plugins/__init__.py | 3 + tools/lldbmacros/plugins/speedtracer.py | 47 + tools/lldbmacros/plugins/zprint_perf_log.py | 31 + tools/lldbmacros/pmap.py | 560 + tools/lldbmacros/process.py | 1320 ++ tools/lldbmacros/routedefines.py | 27 + tools/lldbmacros/scheduler.py | 63 + tools/lldbmacros/userspace.py | 525 + tools/lldbmacros/utils.py | 391 + tools/lldbmacros/xnu.py | 626 + tools/lldbmacros/xnudefines.py | 79 + tools/remote_build.sh | 221 + tools/symbolify.py | 15 +- tools/tests/MPMMTest/Makefile | 69 +- tools/tests/Makefile | 40 + tools/tests/affinity/Makefile | 58 +- tools/tests/affinity/sets.c | 3 +- tools/tests/jitter/Makefile | 40 +- tools/tests/jitter/cpu_number.s | 2 +- tools/tests/kqueue_tests/Makefile | 39 +- tools/tests/kqueue_tests/kqueue_file_tests.c | 1 + tools/tests/kqueue_tests/kqueue_timer_tests.c | 2 + tools/tests/libMicro/bench.sh | 0 tools/tests/libMicro/benchDS.sh | 0 tools/tests/libMicro/coreos_bench.sh | 0 tools/tests/libMicro/create_stuff.sh | 0 tools/tests/libMicro/embd_bench.sh | 0 tools/tests/libMicro/multiview.sh | 0 tools/tests/libMicro/od_account_create.sh | 0 tools/tests/libMicro/od_account_delete.sh | 0 tools/tests/libMicro/wrapper.sh | 0 tools/tests/memorystatus/Makefile | 38 + tools/tests/memorystatus/memorystatus.c | 1808 ++ tools/tests/perf_index/Makefile | 42 + tools/tests/perf_index/compile.c | 34 + tools/tests/perf_index/iperf.c | 16 + tools/tests/perf_index/main.c | 259 + tools/tests/perf_index/md5.c | 154 + tools/tests/perf_index/perf_index.h | 96 + tools/tests/perf_index/stress_cpu.c | 11 + tools/tests/perf_index/stress_fault.c | 85 + tools/tests/perf_index/stress_file_create.c | 19 + tools/tests/perf_index/stress_file_local.c | 50 + tools/tests/perf_index/stress_file_ram.c | 65 + tools/tests/perf_index/stress_file_read.c | 66 + tools/tests/perf_index/stress_file_write.c | 46 + tools/tests/perf_index/stress_general.c | 10 + tools/tests/perf_index/stress_memory.c | 62 + tools/tests/perf_index/stress_syscall.c | 13 + tools/tests/perf_index/test_controller.py | 65 + tools/tests/superpages/Makefile | 32 + tools/tests/testkext/testthreadcall.cpp | 2 - tools/tests/unit_tests/Makefile | 237 + tools/tests/unit_tests/build_tests.sh | 134 + tools/tests/unit_tests/clock_types_6368156.c | 20 + .../codesigntests-entitlements.plist | 8 + tools/tests/unit_tests/codesigntests.c | 130 + .../cpu_hog/cpu_hog-Entitlements.plist | 8 + .../cpu_hog/cpu_hog.m | 470 + .../cpu_hog/cpu_hog.xcodeproj/project.pbxproj | 356 + .../contents.xcworkspacedata | 15 + .../cpumon_test_framework.c | 529 + .../mach_exc.defs | 21 +- .../mem_hog/mem_hog.c | 221 + .../fcntlrangecheck_tests_11202484.c | 210 + .../guarded_test.c | 532 + .../guarded_test_common.h | 11 + .../guarded_test_framework.c | 276 + .../mach_exc.defs | 29 +- .../guarded_test.c | 536 + .../guarded_test_framework.c | 255 + .../mach_exc.defs | 61 +- .../libproc_privilege_test_13203438.c | 127 + .../monitor_stress-Entitlements.plist | 8 + .../monitor_stress.xcodeproj/project.pbxproj | 324 + .../contents.xcworkspacedata | 7 + .../UserInterfaceState.xcuserstate | Bin 0 -> 87338 bytes .../xcschemes/monitor_stress 2.xcscheme | 59 + .../xcschemes/monitor_stress copy.xcscheme | 86 + .../xcschemes/monitor_stress.xcscheme | 86 + .../xcschemes/xcschememanagement.plist | 42 + .../monitor_stress/monitor_stress.m | 178 + .../unit_tests/pipe_test_10807398_src/child.c | 27 + .../pipe_test_10807398_src/parent.c | 50 + .../unit_tests/pipes_fill_procinfo_11179336.c | 38 + .../ptcwd_test_11269991.c | 219 + .../ptrace_test_12507045_src/ptrace_test.c | 749 + .../ptrace_tests_10767133.c | 281 + tools/tests/unit_tests/sampletest.c | 28 + .../semctl_test_8534495.c | 38 + .../sprace_test_11891562.c | 265 + .../unit_tests/test_waitqlocktry_12053360.c | 79 + .../test_wq_exit_race_panic_10970548.c | 32 + .../excserver.defs | 1 + .../thread_get_state.c | 190 + tools/tests/unit_tests/xnu_raft_tests.py | 169 + tools/tests/xnu_quick_test/README | 2 +- .../xnu_quick_test/content_protection_test.c | 69 +- tools/tests/xnu_quick_test/helpers/launch.c | 2 +- tools/tests/xnu_quick_test/main.c | 126 +- tools/tests/xnu_quick_test/makefile | 257 +- tools/tests/xnu_quick_test/memory_tests.c | 128 +- tools/tests/xnu_quick_test/misc.c | 23 +- tools/tests/xnu_quick_test/pipes_tests.c | 1 + .../xnu_quick_test/shared_memory_tests.c | 8 + tools/tests/xnu_quick_test/tests.c | 86 +- tools/tests/zero-to-n/Makefile | 35 +- tools/tests/zero-to-n/zero-to-n.c | 22 +- tools/xcrun_cache.sh | 276 + 1896 files changed, 186425 insertions(+), 161294 deletions(-) delete mode 100644 EXTERNAL_HEADERS/ar.h create mode 100644 EXTERNAL_HEADERS/stddef.h rename SETUP/{md => installfile}/Makefile (58%) create mode 100644 SETUP/installfile/installfile.c delete mode 100644 SETUP/md/md.1 delete mode 100644 SETUP/md/md.c delete mode 100644 SETUP/setup.csh mode change 100644 => 100755 SETUP/setup.sh delete mode 100644 bsd/conf/MASTER.i386 delete mode 100644 bsd/conf/Makefile.i386 delete mode 100644 bsd/conf/files.i386 rename bsd/dev/{vn => dtrace}/Makefile (52%) create mode 100644 bsd/dev/dtrace/scripts/Makefile create mode 100644 bsd/dev/dtrace/scripts/darwin.d create mode 100644 bsd/dev/dtrace/scripts/errno.d create mode 100644 bsd/dev/dtrace/scripts/io.d create mode 100644 bsd/dev/dtrace/scripts/ip.d create mode 100644 bsd/dev/dtrace/scripts/mptcp.d create mode 100644 bsd/dev/dtrace/scripts/regs_x86_64.d create mode 100644 bsd/dev/dtrace/scripts/sched.d create mode 100644 bsd/dev/dtrace/scripts/signal.d create mode 100644 bsd/dev/dtrace/scripts/socket.d create mode 100644 bsd/dev/dtrace/scripts/tcp.d create mode 100644 bsd/dev/dtrace/scripts/unistd.d delete mode 100644 bsd/dev/i386/munge.s create mode 100644 bsd/dev/munge.c create mode 100644 bsd/hfs/hfs_unistr.h delete mode 100644 bsd/hfs/hfscommon/Misc/HybridAllocator.c delete mode 100644 bsd/hfs/hfscommon/headers/HybridAllocator.h delete mode 100644 bsd/hfs/hfscommon/headers/RedBlackTree.h create mode 100644 bsd/i386/_mcontext.h create mode 100644 bsd/kern/kern_asl.c create mode 100644 bsd/kern/kern_cs.c create mode 100644 bsd/kern/kern_guarded.c create mode 100644 bsd/kern/kern_kpc.c create mode 100644 bsd/kern/kern_overrides.c create mode 100644 bsd/kern/kern_tests.c create mode 100644 bsd/kern/proc_uuid_policy.c create mode 100644 bsd/kern/pthread_shims.c delete mode 100644 bsd/kern/pthread_support.c delete mode 100644 bsd/kern/pthread_synch.c delete mode 100644 bsd/libkern/bcmp.c delete mode 100644 bsd/libkern/inet_ntop.c create mode 100644 bsd/libkern/memchr.c delete mode 100644 bsd/libkern/rindex.c delete mode 100644 bsd/libkern/strtol.c rename osfmk/machine/timer.h => bsd/machine/_mcontext.h (89%) create mode 100644 bsd/man/man2/connectx.2 create mode 100644 bsd/man/man2/disconnectx.2 create mode 100644 bsd/man/man2/peeloff.2 create mode 100644 bsd/man/man3/getiopolicy_np.3 delete mode 100644 bsd/man/man4/tun.4 mode change 100644 => 100755 bsd/miscfs/devfs/reproto.sh create mode 100644 bsd/miscfs/mockfs/mockfs.h create mode 100644 bsd/miscfs/mockfs/mockfs_fsnode.c create mode 100644 bsd/miscfs/mockfs/mockfs_fsnode.h create mode 100644 bsd/miscfs/mockfs/mockfs_vfsops.c create mode 100644 bsd/miscfs/mockfs/mockfs_vnops.c rename libsyscall/mach/mig_reply_port.h => bsd/miscfs/mockfs/mockfs_vnops.h (85%) delete mode 100644 bsd/net/bsd_comp.c delete mode 100644 bsd/net/ether_at_pr_module.c create mode 100644 bsd/net/flowadv.c create mode 100644 bsd/net/if_ipsec.c rename osfmk/kern/etimer.h => bsd/net/if_ipsec.h (50%) delete mode 100644 bsd/net/if_media.c delete mode 100644 bsd/net/if_pppvar.h create mode 100644 bsd/net/if_utun_crypto_dtls.c create mode 100644 bsd/net/if_utun_crypto_dtls.h delete mode 100644 bsd/net/iso88025.h delete mode 100644 bsd/net/kext_net.c delete mode 100644 bsd/net/net_osdep.c create mode 100644 bsd/net/net_stubs.c create mode 100644 bsd/net/pktap.c create mode 100644 bsd/net/pktap.h delete mode 100644 bsd/net/ppp_deflate.c delete mode 100644 bsd/net/slip.h delete mode 100644 bsd/netat/Makefile delete mode 100644 bsd/netat/adsp.c delete mode 100644 bsd/netat/adsp.h delete mode 100644 bsd/netat/adsp_CLDeny.c delete mode 100644 bsd/netat/adsp_CLListen.c delete mode 100644 bsd/netat/adsp_Close.c delete mode 100644 bsd/netat/adsp_Control.c delete mode 100644 bsd/netat/adsp_Init.c delete mode 100644 bsd/netat/adsp_NewCID.c delete mode 100644 bsd/netat/adsp_Open.c delete mode 100644 bsd/netat/adsp_Options.c delete mode 100644 bsd/netat/adsp_Packet.c delete mode 100644 bsd/netat/adsp_Read.c delete mode 100644 bsd/netat/adsp_RxAttn.c delete mode 100644 bsd/netat/adsp_RxData.c delete mode 100644 bsd/netat/adsp_Status.c delete mode 100644 bsd/netat/adsp_Timer.c delete mode 100644 bsd/netat/adsp_TimerElem.c delete mode 100644 bsd/netat/adsp_Write.c delete mode 100644 bsd/netat/adsp_attention.c delete mode 100644 bsd/netat/adsp_internal.h delete mode 100644 bsd/netat/adsp_misc.c delete mode 100644 bsd/netat/adsp_reset.c delete mode 100644 bsd/netat/adsp_stream.c delete mode 100644 bsd/netat/appletalk.h delete mode 100644 bsd/netat/asp.h delete mode 100644 bsd/netat/asp_proto.c delete mode 100644 bsd/netat/at.c delete mode 100644 bsd/netat/at_aarp.h delete mode 100644 bsd/netat/at_config.h delete mode 100644 bsd/netat/at_ddp_brt.h delete mode 100644 bsd/netat/at_pat.h delete mode 100644 bsd/netat/at_pcb.c delete mode 100644 bsd/netat/at_pcb.h delete mode 100644 bsd/netat/at_proto.c delete mode 100644 bsd/netat/at_snmp.h delete mode 100644 bsd/netat/at_var.h delete mode 100644 bsd/netat/atalk.exp delete mode 100644 bsd/netat/atalk.imp delete mode 100644 bsd/netat/atp.h delete mode 100644 bsd/netat/atp_alloc.c delete mode 100644 bsd/netat/atp_misc.c delete mode 100644 bsd/netat/atp_open.c delete mode 100644 bsd/netat/atp_read.c delete mode 100644 bsd/netat/atp_write.c delete mode 100644 bsd/netat/aurp.h delete mode 100644 bsd/netat/aurp_aurpd.c delete mode 100644 bsd/netat/aurp_cfg.c delete mode 100644 bsd/netat/aurp_gdata.c delete mode 100644 bsd/netat/aurp_misc.c delete mode 100644 bsd/netat/aurp_open.c delete mode 100644 bsd/netat/aurp_rd.c delete mode 100644 bsd/netat/aurp_ri.c delete mode 100644 bsd/netat/aurp_rx.c delete mode 100644 bsd/netat/aurp_tickle.c delete mode 100644 bsd/netat/aurp_tx.c delete mode 100644 bsd/netat/aurp_zi.c delete mode 100644 bsd/netat/ddp.c delete mode 100644 bsd/netat/ddp.h delete mode 100644 bsd/netat/ddp.save delete mode 100644 bsd/netat/ddp_aarp.c delete mode 100644 bsd/netat/ddp_aep.c delete mode 100644 bsd/netat/ddp_brt.c delete mode 100644 bsd/netat/ddp_lap.c delete mode 100644 bsd/netat/ddp_nbp.c delete mode 100644 bsd/netat/ddp_proto.c delete mode 100644 bsd/netat/ddp_r_rtmp.c delete mode 100644 bsd/netat/ddp_r_zip.c delete mode 100644 bsd/netat/ddp_rtmp.c delete mode 100644 bsd/netat/ddp_rtmptable.c delete mode 100644 bsd/netat/ddp_sip.c delete mode 100644 bsd/netat/ddp_usrreq.c delete mode 100644 bsd/netat/debug.h delete mode 100644 bsd/netat/drv_dep.c delete mode 100644 bsd/netat/lap.h delete mode 100644 bsd/netat/nbp.h delete mode 100644 bsd/netat/pap.h delete mode 100644 bsd/netat/routing_tables.h delete mode 100644 bsd/netat/rtmp.h delete mode 100644 bsd/netat/sys_dep.c delete mode 100644 bsd/netat/sys_glue.c delete mode 100644 bsd/netat/sysglue.h delete mode 100644 bsd/netat/zip.h create mode 100644 bsd/netinet/cpu_in_cksum.c create mode 100644 bsd/netinet/flow_divert.c create mode 100644 bsd/netinet/flow_divert.h create mode 100644 bsd/netinet/flow_divert_proto.h create mode 100644 bsd/netinet/mp_pcb.c create mode 100644 bsd/netinet/mp_pcb.h create mode 100644 bsd/netinet/mp_proto.c create mode 100644 bsd/netinet/mptcp.c create mode 100644 bsd/netinet/mptcp.h create mode 100644 bsd/netinet/mptcp_opt.c create mode 100644 bsd/netinet/mptcp_opt.h create mode 100644 bsd/netinet/mptcp_seq.h create mode 100644 bsd/netinet/mptcp_subr.c create mode 100644 bsd/netinet/mptcp_timer.c create mode 100644 bsd/netinet/mptcp_timer.h create mode 100644 bsd/netinet/mptcp_usrreq.c create mode 100644 bsd/netinet/mptcp_var.h create mode 100644 bsd/netinet6/in6_cga.c delete mode 100644 bsd/netinet6/in6_prefix.c delete mode 100644 bsd/netinet6/in6_prefix.h create mode 100644 bsd/netinet6/nd6_send.c rename iokit/Kernel/i386/IOAsmSupport.s => bsd/nfs/nfs_ioctl.h (75%) create mode 100644 bsd/sys/_types/Makefile create mode 100644 bsd/sys/_types/___offsetof.h create mode 100644 bsd/sys/_types/_blkcnt_t.h create mode 100644 bsd/sys/_types/_blksize_t.h create mode 100644 bsd/sys/_types/_clock_t.h create mode 100644 bsd/sys/_types/_ct_rune_t.h create mode 100644 bsd/sys/_types/_dev_t.h create mode 100644 bsd/sys/_types/_errno_t.h rename osfmk/chud/i386/chud_cpu_asm.s => bsd/sys/_types/_fd_clr.h (90%) create mode 100644 bsd/sys/_types/_fd_copy.h create mode 100644 bsd/sys/_types/_fd_def.h create mode 100644 bsd/sys/_types/_fd_isset.h rename osfmk/chud/i386/chud_cpu_asm.h => bsd/sys/_types/_fd_set.h (90%) create mode 100644 bsd/sys/_types/_fd_setsize.h create mode 100644 bsd/sys/_types/_fd_zero.h create mode 100644 bsd/sys/_types/_filesec_t.h create mode 100644 bsd/sys/_types/_fsblkcnt_t.h create mode 100644 bsd/sys/_types/_fsfilcnt_t.h rename libsyscall/mach/mach/task.h => bsd/sys/_types/_gid_t.h (86%) create mode 100644 bsd/sys/_types/_guid_t.h create mode 100644 bsd/sys/_types/_id_t.h create mode 100644 bsd/sys/_types/_in_addr_t.h create mode 100644 bsd/sys/_types/_in_port_t.h create mode 100644 bsd/sys/_types/_ino64_t.h create mode 100644 bsd/sys/_types/_ino_t.h create mode 100644 bsd/sys/_types/_int16_t.h create mode 100644 bsd/sys/_types/_int32_t.h create mode 100644 bsd/sys/_types/_int64_t.h create mode 100644 bsd/sys/_types/_int8_t.h create mode 100644 bsd/sys/_types/_intptr_t.h create mode 100644 bsd/sys/_types/_iovec_t.h create mode 100644 bsd/sys/_types/_key_t.h rename iokit/Drivers/platform/drvApplePlatformExpert/AppleCPU.h => bsd/sys/_types/_mach_port_t.h (62%) create mode 100644 bsd/sys/_types/_mbstate_t.h create mode 100644 bsd/sys/_types/_mode_t.h create mode 100644 bsd/sys/_types/_nlink_t.h create mode 100644 bsd/sys/_types/_null.h create mode 100644 bsd/sys/_types/_o_dsync.h create mode 100644 bsd/sys/_types/_o_sync.h create mode 100644 bsd/sys/_types/_off_t.h create mode 100644 bsd/sys/_types/_os_inline.h create mode 100644 bsd/sys/_types/_pid_t.h create mode 100644 bsd/sys/_types/_posix_vdisable.h create mode 100644 bsd/sys/_types/_pthread_attr_t.h create mode 100644 bsd/sys/_types/_pthread_cond_t.h create mode 100644 bsd/sys/_types/_pthread_condattr_t.h create mode 100644 bsd/sys/_types/_pthread_key_t.h create mode 100644 bsd/sys/_types/_pthread_mutex_t.h create mode 100644 bsd/sys/_types/_pthread_mutexattr_t.h create mode 100644 bsd/sys/_types/_pthread_once_t.h create mode 100644 bsd/sys/_types/_pthread_rwlock_t.h create mode 100644 bsd/sys/_types/_pthread_rwlockattr_t.h create mode 100644 bsd/sys/_types/_pthread_t.h create mode 100644 bsd/sys/_types/_ptrdiff_t.h create mode 100644 bsd/sys/_types/_rsize_t.h create mode 100644 bsd/sys/_types/_rune_t.h create mode 100644 bsd/sys/_types/_s_ifmt.h create mode 100644 bsd/sys/_types/_sa_family_t.h create mode 100644 bsd/sys/_types/_seek_set.h rename iokit/Drivers/platform/drvAppleNVRAM/AppleNVRAM.h => bsd/sys/_types/_sigaltstack.h (67%) create mode 100644 bsd/sys/_types/_sigset_t.h create mode 100644 bsd/sys/_types/_size_t.h create mode 100644 bsd/sys/_types/_socklen_t.h create mode 100644 bsd/sys/_types/_ssize_t.h create mode 100644 bsd/sys/_types/_suseconds_t.h create mode 100644 bsd/sys/_types/_time_t.h rename libsyscall/mach/mach/thread_act.h => bsd/sys/_types/_timespec.h (84%) create mode 100644 bsd/sys/_types/_timeval.h create mode 100644 bsd/sys/_types/_timeval32.h create mode 100644 bsd/sys/_types/_ucontext.h create mode 100644 bsd/sys/_types/_ucontext64.h create mode 100644 bsd/sys/_types/_uid_t.h create mode 100644 bsd/sys/_types/_uintptr_t.h create mode 100644 bsd/sys/_types/_useconds_t.h create mode 100644 bsd/sys/_types/_user32_itimerval.h create mode 100644 bsd/sys/_types/_user32_timespec.h create mode 100644 bsd/sys/_types/_user32_timeval.h create mode 100644 bsd/sys/_types/_user64_itimerval.h create mode 100644 bsd/sys/_types/_user64_timespec.h create mode 100644 bsd/sys/_types/_user64_timeval.h create mode 100644 bsd/sys/_types/_user_timespec.h create mode 100644 bsd/sys/_types/_user_timeval.h create mode 100644 bsd/sys/_types/_uuid_t.h create mode 100644 bsd/sys/_types/_va_list.h create mode 100644 bsd/sys/_types/_wchar_t.h create mode 100644 bsd/sys/_types/_wint_t.h create mode 100644 bsd/sys/bitstring.h create mode 100644 bsd/sys/guarded.h create mode 100644 bsd/sys/kasl.h create mode 100644 bsd/sys/kern_overrides.h create mode 100644 bsd/sys/kern_tests.h create mode 100644 bsd/sys/proc_uuid_policy.h create mode 100644 bsd/sys/pthread_shims.h mode change 100644 => 100755 bsd/vfs/vnode_if.sh create mode 100644 bsd/vm/vm_compressor_backing_file.c delete mode 100644 config/BSDKernel.i386.exports delete mode 100644 config/Dummy.exports delete mode 100644 config/IOKit.i386.exports delete mode 100644 config/Libkern.i386.exports delete mode 100644 config/MACFramework.i386.exports delete mode 100644 config/Mach.i386.exports delete mode 100644 config/Private.i386.exports delete mode 100644 config/System.kext/PlugIns/BSDKernel6.0.kext/Info.plist delete mode 100644 config/System.kext/PlugIns/IOKit6.0.kext/Info.plist delete mode 100644 config/System.kext/PlugIns/Libkern6.0.kext/Info.plist delete mode 100644 config/System.kext/PlugIns/Mach6.0.kext/Info.plist delete mode 100644 config/System.kext/PlugIns/System6.0.kext/Info.plist delete mode 100644 config/System6.0.exports delete mode 100644 config/System6.0.i386.exports delete mode 100644 config/System6.0.x86_64.exports delete mode 100644 config/Unsupported.i386.exports create mode 100644 config/Unused.exports delete mode 100755 config/compress-man-pages.pl delete mode 100644 iokit/Drivers/platform/drvAppleMacIO/AppleMacIO.cpp delete mode 100644 iokit/Drivers/platform/drvAppleNMI/AppleNMI.cpp delete mode 100644 iokit/Drivers/platform/drvAppleNVRAM/AppleNVRAM.cpp delete mode 100644 iokit/Drivers/platform/drvApplePlatformExpert/AppleCPU.cpp delete mode 100644 iokit/Drivers/platform/drvApplePlatformExpert/ApplePlatformExpert.cpp create mode 100644 iokit/IOKit/IOReportMacros.h create mode 100644 iokit/IOKit/IOReportTypes.h delete mode 100644 iokit/IOKit/i386/Makefile delete mode 100644 iokit/IOKit/pci/IOPCIDevice.h delete mode 100644 iokit/IOKit/x86_64/Makefile delete mode 100644 iokit/User/Makefile delete mode 100644 iokit/User/Makefile.user delete mode 100644 iokit/conf/MASTER.i386 delete mode 100644 iokit/conf/Makefile.i386 delete mode 100644 iokit/conf/files.i386 delete mode 100644 kgmacros delete mode 100644 libkern/conf/MASTER.i386 delete mode 100644 libkern/conf/Makefile.i386 delete mode 100644 libkern/conf/files.i386 delete mode 100644 libkern/crypto/intel/sha1edp.h delete mode 100644 libkern/crypto/intel/sha1edp.s delete mode 100644 libkern/i386/OSAtomic.s delete mode 100644 libkern/kxld/i386/WKdmCompress.s delete mode 100644 libkern/kxld/i386/WKdmDecompress.s create mode 100644 libkern/net/inet_aton.c create mode 100644 libkern/net/inet_ntoa.c create mode 100644 libkern/net/inet_ntop.c create mode 100644 libkern/net/inet_pton.c delete mode 100644 libkern/uuid/Makefile delete mode 100644 libkern/zlib/intel/adler32vec.s delete mode 100644 libkern/zlib/intel/inffastS.s delete mode 100644 libsa/conf/MASTER.i386 delete mode 100644 libsa/conf/Makefile.i386 rename libsa/conf/files.i386 => libsyscall/Libsyscall.aliases (100%) create mode 100644 libsyscall/custom/dummy.c mode change 100755 => 100644 libsyscall/mach/err_iokit.sub rename bsd/netat/ep.h => libsyscall/mach/mach/vm_page_size.h (67%) rename bsd/netat/adsp_InitGlobals.c => libsyscall/mach/stack_logging_internal.h (56%) create mode 100644 libsyscall/os/alloc_once.c create mode 100644 libsyscall/os/tsd.h create mode 100644 libsyscall/wrappers/__commpage_gettimeofday.c create mode 100644 libsyscall/wrappers/__commpage_gettimeofday.s create mode 100644 libsyscall/wrappers/carbon_delete.c create mode 100644 libsyscall/wrappers/gethostuuid.c create mode 100644 libsyscall/wrappers/gethostuuid.h create mode 100644 libsyscall/wrappers/gethostuuid_private.h create mode 100644 libsyscall/wrappers/getiopolicy_np.c create mode 100644 libsyscall/wrappers/guarded_open_np.c create mode 100644 libsyscall/wrappers/libproc/libproc.c create mode 100644 libsyscall/wrappers/libproc/libproc.h create mode 100644 libsyscall/wrappers/libproc/libproc_internal.h create mode 100644 libsyscall/wrappers/libproc/proc_listpidspath.c create mode 100644 libsyscall/wrappers/mach_absolute_time.s create mode 100644 libsyscall/wrappers/spawn/posix_spawn.c create mode 100644 libsyscall/wrappers/spawn/spawn.h create mode 100644 libsyscall/wrappers/spawn/spawn_private.h create mode 100644 libsyscall/wrappers/string/index.c rename libsyscall/wrappers/{ => string}/memcpy.c (96%) create mode 100644 libsyscall/wrappers/string/memset.c create mode 100644 libsyscall/wrappers/string/strcmp.c create mode 100644 libsyscall/wrappers/string/strcpy.c rename bsd/libkern/locc.c => libsyscall/wrappers/string/strings.h (74%) create mode 100644 libsyscall/wrappers/string/strlcpy.c create mode 100644 libsyscall/wrappers/string/strlen.c create mode 100644 libsyscall/wrappers/string/strsep.c rename iokit/Kernel/i386/IOSharedLock.s => libsyscall/wrappers/unix03/munmap.c (57%) delete mode 100644 lldbmacros.py create mode 100644 makedefs/MakeInc.kernel create mode 100644 makedefs/MakeInc.top delete mode 100644 osfmk/conf/MASTER.i386 delete mode 100644 osfmk/conf/Makefile.i386 delete mode 100644 osfmk/conf/files.i386 delete mode 100644 osfmk/default_pager/Makefile.template delete mode 100644 osfmk/i386/_setjmp.s delete mode 100644 osfmk/i386/acpi_wakeup.s delete mode 100644 osfmk/i386/bcopy.s delete mode 100644 osfmk/i386/commpage/pthreads.s delete mode 100644 osfmk/i386/copyio.c delete mode 100644 osfmk/i386/cswitch.s delete mode 100644 osfmk/i386/etimer.c create mode 100644 osfmk/i386/i386_timer.c delete mode 100644 osfmk/i386/idle_pt.c delete mode 100644 osfmk/i386/idt.s delete mode 100644 osfmk/i386/idt64.s delete mode 100644 osfmk/i386/locore.s delete mode 100644 osfmk/i386/loose_ends.c delete mode 100644 osfmk/i386/lowglobals.h delete mode 100644 osfmk/i386/lowmem_vectors.s delete mode 100644 osfmk/i386/machine_routines_asm.s delete mode 100644 osfmk/i386/mcount.s delete mode 100644 osfmk/i386/pal_routines_asm.s delete mode 100644 osfmk/i386/pmap.c delete mode 100644 osfmk/i386/start.s delete mode 100644 osfmk/i386/start64.s delete mode 100644 osfmk/kdp/ml/i386/kdp_machdep.c delete mode 100644 osfmk/kdp/ml/i386/kdp_vm.c create mode 100644 osfmk/kern/btlog.c create mode 100644 osfmk/kern/btlog.h create mode 100644 osfmk/kern/exc_resource.h create mode 100644 osfmk/kern/kpc.h create mode 100644 osfmk/kern/kpc_common.c create mode 100644 osfmk/kern/kpc_thread.c create mode 100644 osfmk/kern/telemetry.c create mode 100644 osfmk/kern/telemetry.h delete mode 100644 osfmk/kern/template.mk delete mode 100644 osfmk/kperf/filter.c create mode 100644 osfmk/kperf/kperf_kpc.c rename osfmk/kperf/{filter.h => kperf_kpc.h} (75%) delete mode 100644 osfmk/mach/Makefile.template delete mode 100644 osfmk/mach/mach_norma.defs delete mode 100644 osfmk/mach/norma_special_ports.h create mode 100644 osfmk/mach/telemetry_notification.defs delete mode 100644 osfmk/mach_debug/template.mk create mode 100644 osfmk/machine/machine_kpc.h mode change 100755 => 100644 osfmk/man/DMN_port_deleted.html mode change 100755 => 100644 osfmk/man/DMN_port_destroyed.html mode change 100755 => 100644 osfmk/man/DP_backing_store_create.html mode change 100755 => 100644 osfmk/man/DP_backing_store_delete.html mode change 100755 => 100644 osfmk/man/DP_backing_store_info.html mode change 100755 => 100644 osfmk/man/DP_object_create.html mode change 100755 => 100644 osfmk/man/DR_overwrite_async.html mode change 100755 => 100644 osfmk/man/HD_memory_manager.html mode change 100755 => 100644 osfmk/man/MO_SY_completed.html mode change 100755 => 100644 osfmk/man/MO_change_attributes.html mode change 100755 => 100644 osfmk/man/MO_change_completed.html mode change 100755 => 100644 osfmk/man/MO_data_initialize.html mode change 100755 => 100644 osfmk/man/MO_data_unavailable.html mode change 100755 => 100644 osfmk/man/MO_default_server.html mode change 100755 => 100644 osfmk/man/MO_get_attributes.html mode change 100755 => 100644 osfmk/man/MO_lock_completed.html mode change 100755 => 100644 osfmk/man/MO_supply_completed.html mode change 100755 => 100644 osfmk/man/MP_allocate_subsystem.html mode change 100755 => 100644 osfmk/man/MP_request_notification.html mode change 100755 => 100644 osfmk/man/P_set_policy_control.html mode change 100755 => 100644 osfmk/man/P_set_policy_disable.html mode change 100755 => 100644 osfmk/man/P_set_policy_enable.html mode change 100755 => 100644 osfmk/man/SMO_default_server.html mode change 100755 => 100644 osfmk/man/SMO_server.html mode change 100755 => 100644 osfmk/man/TS_exception_ports.html mode change 100755 => 100644 osfmk/man/VSD_memory_manager.html mode change 100755 => 100644 osfmk/man/bootstrap_arguments.html mode change 100755 => 100644 osfmk/man/bootstrap_completed.html mode change 100755 => 100644 osfmk/man/bootstrap_environment.html mode change 100755 => 100644 osfmk/man/bootstrap_ports.html mode change 100755 => 100644 osfmk/man/catch_exception_raise.html mode change 100755 => 100644 osfmk/man/clock_alarm.html mode change 100755 => 100644 osfmk/man/clock_alarm_reply.html mode change 100755 => 100644 osfmk/man/clock_get_attributes.html mode change 100755 => 100644 osfmk/man/clock_get_time.html mode change 100755 => 100644 osfmk/man/clock_map_time.html mode change 100755 => 100644 osfmk/man/clock_reply_server.html mode change 100755 => 100644 osfmk/man/clock_set_attributes.html mode change 100755 => 100644 osfmk/man/clock_set_time.html mode change 100755 => 100644 osfmk/man/clock_sleep.html mode change 100755 => 100644 osfmk/man/default_pager_add_segment.html mode change 100755 => 100644 osfmk/man/default_pager_info.html mode change 100755 => 100644 osfmk/man/device_close.html mode change 100755 => 100644 osfmk/man/device_get_status.html mode change 100755 => 100644 osfmk/man/device_map.html mode change 100755 => 100644 osfmk/man/device_open.html mode change 100755 => 100644 osfmk/man/device_read.html mode change 100755 => 100644 osfmk/man/device_read_async.html mode change 100755 => 100644 osfmk/man/device_read_async_inband.html mode change 100755 => 100644 osfmk/man/device_read_inband.html mode change 100755 => 100644 osfmk/man/device_read_overwrite.html mode change 100755 => 100644 osfmk/man/device_reply_server.html mode change 100755 => 100644 osfmk/man/device_set_filter.html mode change 100755 => 100644 osfmk/man/device_set_status.html mode change 100755 => 100644 osfmk/man/device_write.html mode change 100755 => 100644 osfmk/man/device_write_async.html mode change 100755 => 100644 osfmk/man/device_write_async_inband.html mode change 100755 => 100644 osfmk/man/device_write_inband.html mode change 100755 => 100644 osfmk/man/do_mach_notify_dead_name.html mode change 100755 => 100644 osfmk/man/do_mach_notify_no_senders.html mode change 100755 => 100644 osfmk/man/do_mach_notify_send_once.html mode change 100755 => 100644 osfmk/man/etap_get_info.html mode change 100755 => 100644 osfmk/man/etap_probe.html mode change 100755 => 100644 osfmk/man/etap_trace_event.html mode change 100755 => 100644 osfmk/man/etap_trace_thread.html mode change 100755 => 100644 osfmk/man/evc_wait.html mode change 100755 => 100644 osfmk/man/exc_server.html mode change 100755 => 100644 osfmk/man/host_adjust_time.html mode change 100755 => 100644 osfmk/man/host_basic_info.html mode change 100755 => 100644 osfmk/man/host_get_boot_info.html mode change 100755 => 100644 osfmk/man/host_get_clock_control.html mode change 100755 => 100644 osfmk/man/host_get_clock_service.html mode change 100755 => 100644 osfmk/man/host_get_time.html mode change 100755 => 100644 osfmk/man/host_info.html mode change 100755 => 100644 osfmk/man/host_kernel_version.html mode change 100755 => 100644 osfmk/man/host_load_info.html mode change 100755 => 100644 osfmk/man/host_page_size.html mode change 100755 => 100644 osfmk/man/host_processor_set_priv.html mode change 100755 => 100644 osfmk/man/host_processor_sets.html mode change 100755 => 100644 osfmk/man/host_processor_slots.html mode change 100755 => 100644 osfmk/man/host_processors.html mode change 100755 => 100644 osfmk/man/host_reboot.html mode change 100755 => 100644 osfmk/man/host_sched_info.html mode change 100755 => 100644 osfmk/man/host_security_create_task_token.html mode change 100755 => 100644 osfmk/man/host_security_set_task_token.html mode change 100755 => 100644 osfmk/man/host_set_time.html mode change 100755 => 100644 osfmk/man/host_statistics.html mode change 100755 => 100644 osfmk/man/i386_get_ldt.html mode change 100755 => 100644 osfmk/man/i386_io_port_add.html mode change 100755 => 100644 osfmk/man/i386_io_port_list.html mode change 100755 => 100644 osfmk/man/i386_io_port_remove.html mode change 100755 => 100644 osfmk/man/i386_set_ldt.html mode change 100755 => 100644 osfmk/man/index.html mode change 100755 => 100644 osfmk/man/io_done_queue_create.html mode change 100755 => 100644 osfmk/man/io_done_queue_terminate.html mode change 100755 => 100644 osfmk/man/io_done_queue_wait.html mode change 100755 => 100644 osfmk/man/kernel_resource_sizes.html mode change 100755 => 100644 osfmk/man/ledger_create.html mode change 100755 => 100644 osfmk/man/ledger_get_remote.html mode change 100755 => 100644 osfmk/man/ledger_read.html mode change 100755 => 100644 osfmk/man/ledger_set_remote.html mode change 100755 => 100644 osfmk/man/ledger_terminate.html mode change 100755 => 100644 osfmk/man/ledger_transfer.html mode change 100755 => 100644 osfmk/man/lock_acquire.html mode change 100755 => 100644 osfmk/man/lock_handoff.html mode change 100755 => 100644 osfmk/man/lock_handoff_accept.html mode change 100755 => 100644 osfmk/man/lock_make_stable.html mode change 100755 => 100644 osfmk/man/lock_release.html mode change 100755 => 100644 osfmk/man/lock_set_create.html mode change 100755 => 100644 osfmk/man/lock_set_destroy.html mode change 100755 => 100644 osfmk/man/lock_try.html mode change 100755 => 100644 osfmk/man/mach_host_self.html mode change 100755 => 100644 osfmk/man/mach_msg.html mode change 100755 => 100644 osfmk/man/mach_msg_descriptor.html mode change 100755 => 100644 osfmk/man/mach_msg_header.html mode change 100755 => 100644 osfmk/man/mach_port_allocate.html mode change 100755 => 100644 osfmk/man/mach_port_allocate_full.html mode change 100755 => 100644 osfmk/man/mach_port_allocate_name.html mode change 100755 => 100644 osfmk/man/mach_port_allocate_qos.html mode change 100755 => 100644 osfmk/man/mach_port_deallocate.html mode change 100755 => 100644 osfmk/man/mach_port_destroy.html mode change 100755 => 100644 osfmk/man/mach_port_extract_member.html mode change 100755 => 100644 osfmk/man/mach_port_extract_right.html mode change 100755 => 100644 osfmk/man/mach_port_get_attributes.html mode change 100755 => 100644 osfmk/man/mach_port_get_refs.html mode change 100755 => 100644 osfmk/man/mach_port_get_set_status.html mode change 100755 => 100644 osfmk/man/mach_port_insert_member.html mode change 100755 => 100644 osfmk/man/mach_port_insert_right.html mode change 100755 => 100644 osfmk/man/mach_port_limits.html mode change 100755 => 100644 osfmk/man/mach_port_mod_refs.html mode change 100755 => 100644 osfmk/man/mach_port_move_member.html mode change 100755 => 100644 osfmk/man/mach_port_names.html mode change 100755 => 100644 osfmk/man/mach_port_qos.html mode change 100755 => 100644 osfmk/man/mach_port_set_attributes.html mode change 100755 => 100644 osfmk/man/mach_port_set_mscount.html mode change 100755 => 100644 osfmk/man/mach_port_set_seqno.html mode change 100755 => 100644 osfmk/man/mach_port_status.html mode change 100755 => 100644 osfmk/man/mach_port_type.html mode change 100755 => 100644 osfmk/man/mach_ports_lookup.html mode change 100755 => 100644 osfmk/man/mach_ports_register.html mode change 100755 => 100644 osfmk/man/mach_reply_port.html mode change 100755 => 100644 osfmk/man/mach_rpc_return_trap.html mode change 100755 => 100644 osfmk/man/mach_rpc_trap.html mode change 100755 => 100644 osfmk/man/mach_subsystem_create.html mode change 100755 => 100644 osfmk/man/mach_task_self.html mode change 100755 => 100644 osfmk/man/mach_thread_self.html mode change 100755 => 100644 osfmk/man/mapped_tvalspec.html mode change 100755 => 100644 osfmk/man/memory_object_attr_info.html mode change 100755 => 100644 osfmk/man/memory_object_create.html mode change 100755 => 100644 osfmk/man/memory_object_data_error.html mode change 100755 => 100644 osfmk/man/memory_object_data_request.html mode change 100755 => 100644 osfmk/man/memory_object_data_return.html mode change 100755 => 100644 osfmk/man/memory_object_data_supply.html mode change 100755 => 100644 osfmk/man/memory_object_data_unlock.html mode change 100755 => 100644 osfmk/man/memory_object_destroy.html mode change 100755 => 100644 osfmk/man/memory_object_init.html mode change 100755 => 100644 osfmk/man/memory_object_lock_request.html mode change 100755 => 100644 osfmk/man/memory_object_perf_info.html mode change 100755 => 100644 osfmk/man/memory_object_server.html mode change 100755 => 100644 osfmk/man/memory_object_synchronize.html mode change 100755 => 100644 osfmk/man/memory_object_terminate.html delete mode 100755 osfmk/man/norma_get_special_port.html delete mode 100755 osfmk/man/norma_node_self.html delete mode 100755 osfmk/man/norma_port_location_hint.html delete mode 100755 osfmk/man/norma_set_special_port.html delete mode 100755 osfmk/man/norma_task_clone.html delete mode 100755 osfmk/man/norma_task_create.html delete mode 100755 osfmk/man/norma_task_teleport.html mode change 100755 => 100644 osfmk/man/notify_server.html mode change 100755 => 100644 osfmk/man/policy_fifo_info.html mode change 100755 => 100644 osfmk/man/policy_rr_info.html mode change 100755 => 100644 osfmk/man/policy_timeshare_info.html mode change 100755 => 100644 osfmk/man/processor_assign.html mode change 100755 => 100644 osfmk/man/processor_basic_info.html mode change 100755 => 100644 osfmk/man/processor_control.html mode change 100755 => 100644 osfmk/man/processor_exit.html mode change 100755 => 100644 osfmk/man/processor_get_assignment.html mode change 100755 => 100644 osfmk/man/processor_info.html mode change 100755 => 100644 osfmk/man/processor_set_basic_info.html mode change 100755 => 100644 osfmk/man/processor_set_create.html mode change 100755 => 100644 osfmk/man/processor_set_default.html mode change 100755 => 100644 osfmk/man/processor_set_destroy.html mode change 100755 => 100644 osfmk/man/processor_set_info.html mode change 100755 => 100644 osfmk/man/processor_set_load_info.html mode change 100755 => 100644 osfmk/man/processor_set_max_priority.html mode change 100755 => 100644 osfmk/man/processor_set_statistics.html mode change 100755 => 100644 osfmk/man/processor_set_tasks.html mode change 100755 => 100644 osfmk/man/processor_set_threads.html mode change 100755 => 100644 osfmk/man/processor_start.html mode change 100755 => 100644 osfmk/man/prof_server.html mode change 100755 => 100644 osfmk/man/receive_samples.html mode change 100755 => 100644 osfmk/man/semaphore_create.html mode change 100755 => 100644 osfmk/man/semaphore_destroy.html mode change 100755 => 100644 osfmk/man/semaphore_signal.html mode change 100755 => 100644 osfmk/man/semaphore_signal_all.html mode change 100755 => 100644 osfmk/man/semaphore_wait.html mode change 100755 => 100644 osfmk/man/seqnos_notify_server.html mode change 100755 => 100644 osfmk/man/task_assign.html mode change 100755 => 100644 osfmk/man/task_assign_default.html mode change 100755 => 100644 osfmk/man/task_basic_info.html mode change 100755 => 100644 osfmk/man/task_create.html mode change 100755 => 100644 osfmk/man/task_get_assignment.html mode change 100755 => 100644 osfmk/man/task_get_emulation_vector.html mode change 100755 => 100644 osfmk/man/task_get_exception_ports.html mode change 100755 => 100644 osfmk/man/task_get_special_port.html mode change 100755 => 100644 osfmk/man/task_info.html mode change 100755 => 100644 osfmk/man/task_policy.html mode change 100755 => 100644 osfmk/man/task_resume.html mode change 100755 => 100644 osfmk/man/task_sample.html mode change 100755 => 100644 osfmk/man/task_set_emulation.html mode change 100755 => 100644 osfmk/man/task_set_emulation_vector.html mode change 100755 => 100644 osfmk/man/task_set_exception_ports.html mode change 100755 => 100644 osfmk/man/task_set_info.html mode change 100755 => 100644 osfmk/man/task_set_policy.html mode change 100755 => 100644 osfmk/man/task_set_port_space.html mode change 100755 => 100644 osfmk/man/task_set_special_port.html mode change 100755 => 100644 osfmk/man/task_suspend.html mode change 100755 => 100644 osfmk/man/task_swap_exception_ports.html mode change 100755 => 100644 osfmk/man/task_terminate.html mode change 100755 => 100644 osfmk/man/task_thread_times_info.html mode change 100755 => 100644 osfmk/man/task_threads.html mode change 100755 => 100644 osfmk/man/thread_abort.html mode change 100755 => 100644 osfmk/man/thread_abort_safely.html mode change 100755 => 100644 osfmk/man/thread_activation_create.html mode change 100755 => 100644 osfmk/man/thread_assign.html mode change 100755 => 100644 osfmk/man/thread_assign_default.html mode change 100755 => 100644 osfmk/man/thread_basic_info.html mode change 100755 => 100644 osfmk/man/thread_create.html mode change 100755 => 100644 osfmk/man/thread_create_running.html mode change 100755 => 100644 osfmk/man/thread_depress_abort.html mode change 100755 => 100644 osfmk/man/thread_get_assignment.html mode change 100755 => 100644 osfmk/man/thread_get_exception_ports.html mode change 100755 => 100644 osfmk/man/thread_get_special_port.html mode change 100755 => 100644 osfmk/man/thread_get_state.html mode change 100755 => 100644 osfmk/man/thread_info.html mode change 100755 => 100644 osfmk/man/thread_policy.html mode change 100755 => 100644 osfmk/man/thread_resume.html mode change 100755 => 100644 osfmk/man/thread_sample.html mode change 100755 => 100644 osfmk/man/thread_set_exception_ports.html mode change 100755 => 100644 osfmk/man/thread_set_policy.html mode change 100755 => 100644 osfmk/man/thread_set_special_port.html mode change 100755 => 100644 osfmk/man/thread_set_state.html mode change 100755 => 100644 osfmk/man/thread_suspend.html mode change 100755 => 100644 osfmk/man/thread_switch.html mode change 100755 => 100644 osfmk/man/thread_terminate.html mode change 100755 => 100644 osfmk/man/thread_wire.html mode change 100755 => 100644 osfmk/man/tvalspec.html mode change 100755 => 100644 osfmk/man/vm_allocate.html mode change 100755 => 100644 osfmk/man/vm_behavior_set.html mode change 100755 => 100644 osfmk/man/vm_copy.html mode change 100755 => 100644 osfmk/man/vm_deallocate.html mode change 100755 => 100644 osfmk/man/vm_inherit.html mode change 100755 => 100644 osfmk/man/vm_machine_attribute.html mode change 100755 => 100644 osfmk/man/vm_map.html mode change 100755 => 100644 osfmk/man/vm_msync.html mode change 100755 => 100644 osfmk/man/vm_protect.html mode change 100755 => 100644 osfmk/man/vm_read.html mode change 100755 => 100644 osfmk/man/vm_region.html mode change 100755 => 100644 osfmk/man/vm_region_basic_info.html mode change 100755 => 100644 osfmk/man/vm_remap.html mode change 100755 => 100644 osfmk/man/vm_statistics.html mode change 100755 => 100644 osfmk/man/vm_wire.html mode change 100755 => 100644 osfmk/man/vm_write.html delete mode 100644 osfmk/profiling/i386/profile-asm.s delete mode 100644 osfmk/profiling/i386/profile-md.c rename libkern/libkern/WKdm.h => osfmk/vm/WKdm_new.h (77%) create mode 100644 osfmk/vm/vm_compressor.c create mode 100644 osfmk/vm/vm_compressor.h create mode 100644 osfmk/vm/vm_compressor_backing_store.c create mode 100644 osfmk/vm/vm_compressor_backing_store.h create mode 100644 osfmk/vm/vm_compressor_pager.c create mode 100644 osfmk/vm/vm_compressor_pager.h create mode 100644 osfmk/x86_64/WKdmCompress_new.s create mode 100644 osfmk/x86_64/WKdmData_new.s create mode 100644 osfmk/x86_64/WKdmDecompress_new.s create mode 100644 osfmk/x86_64/kpc_x86.c create mode 100644 osfmk/x86_64/machine_kpc.h delete mode 100644 pexpert/conf/MASTER.i386 delete mode 100644 pexpert/conf/Makefile.i386 delete mode 100644 pexpert/conf/files.i386 delete mode 100644 security/conf/MASTER.i386 delete mode 100644 security/conf/Makefile.i386 delete mode 100644 security/conf/files.i386 create mode 100644 security/mac_kext.c create mode 100644 security/mac_pty.c delete mode 100644 security/mac_stub.c rename {libsa/libsa => tools}/Makefile (50%) create mode 100644 tools/lldbmacros/.lldbinit create mode 100644 tools/lldbmacros/Makefile create mode 100644 tools/lldbmacros/README create mode 100644 tools/lldbmacros/apic.py create mode 100644 tools/lldbmacros/core/__init__.py create mode 100644 tools/lldbmacros/core/caching.py create mode 100644 tools/lldbmacros/core/configuration.py create mode 100644 tools/lldbmacros/core/cvalue.py create mode 100644 tools/lldbmacros/core/kernelcore.py create mode 100644 tools/lldbmacros/core/lazytarget.py create mode 100644 tools/lldbmacros/core/operating_system.py create mode 100644 tools/lldbmacros/core/standard.py create mode 100755 tools/lldbmacros/core/syntax_checker.py create mode 100644 tools/lldbmacros/core/xnu_lldb_init.py create mode 100644 tools/lldbmacros/ioreg.py create mode 100644 tools/lldbmacros/ipc.py create mode 100644 tools/lldbmacros/kdp.py create mode 100644 tools/lldbmacros/mbufdefines.py create mode 100644 tools/lldbmacros/mbufs.py create mode 100644 tools/lldbmacros/memory.py create mode 100644 tools/lldbmacros/misc.py create mode 100644 tools/lldbmacros/net.py create mode 100644 tools/lldbmacros/netdefines.py create mode 100644 tools/lldbmacros/pci.py create mode 100644 tools/lldbmacros/plugins/__init__.py create mode 100644 tools/lldbmacros/plugins/speedtracer.py create mode 100644 tools/lldbmacros/plugins/zprint_perf_log.py create mode 100644 tools/lldbmacros/pmap.py create mode 100644 tools/lldbmacros/process.py create mode 100644 tools/lldbmacros/routedefines.py create mode 100644 tools/lldbmacros/scheduler.py create mode 100644 tools/lldbmacros/userspace.py create mode 100644 tools/lldbmacros/utils.py create mode 100644 tools/lldbmacros/xnu.py create mode 100644 tools/lldbmacros/xnudefines.py create mode 100755 tools/remote_build.sh create mode 100644 tools/tests/Makefile mode change 100644 => 100755 tools/tests/libMicro/bench.sh mode change 100644 => 100755 tools/tests/libMicro/benchDS.sh mode change 100644 => 100755 tools/tests/libMicro/coreos_bench.sh mode change 100644 => 100755 tools/tests/libMicro/create_stuff.sh mode change 100644 => 100755 tools/tests/libMicro/embd_bench.sh mode change 100644 => 100755 tools/tests/libMicro/multiview.sh mode change 100644 => 100755 tools/tests/libMicro/od_account_create.sh mode change 100644 => 100755 tools/tests/libMicro/od_account_delete.sh mode change 100644 => 100755 tools/tests/libMicro/wrapper.sh create mode 100644 tools/tests/memorystatus/Makefile create mode 100644 tools/tests/memorystatus/memorystatus.c create mode 100644 tools/tests/perf_index/Makefile create mode 100644 tools/tests/perf_index/compile.c create mode 100644 tools/tests/perf_index/iperf.c create mode 100644 tools/tests/perf_index/main.c create mode 100644 tools/tests/perf_index/md5.c create mode 100644 tools/tests/perf_index/perf_index.h create mode 100644 tools/tests/perf_index/stress_cpu.c create mode 100644 tools/tests/perf_index/stress_fault.c create mode 100644 tools/tests/perf_index/stress_file_create.c create mode 100644 tools/tests/perf_index/stress_file_local.c create mode 100644 tools/tests/perf_index/stress_file_ram.c create mode 100644 tools/tests/perf_index/stress_file_read.c create mode 100644 tools/tests/perf_index/stress_file_write.c create mode 100644 tools/tests/perf_index/stress_general.c create mode 100644 tools/tests/perf_index/stress_memory.c create mode 100644 tools/tests/perf_index/stress_syscall.c create mode 100644 tools/tests/perf_index/test_controller.py create mode 100644 tools/tests/superpages/Makefile create mode 100644 tools/tests/unit_tests/Makefile create mode 100755 tools/tests/unit_tests/build_tests.sh create mode 100644 tools/tests/unit_tests/clock_types_6368156.c create mode 100644 tools/tests/unit_tests/codesigntests-entitlements.plist create mode 100644 tools/tests/unit_tests/codesigntests.c create mode 100644 tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog-Entitlements.plist create mode 100644 tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.m create mode 100644 tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.xcodeproj/project.pbxproj create mode 100644 tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.xcodeproj/project.xcworkspace/contents.xcworkspacedata create mode 100644 tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpumon_test_framework.c rename osfmk/i386/timer.h => tools/tests/unit_tests/cpu_monitor_tests_11646922_src/mach_exc.defs (88%) create mode 100644 tools/tests/unit_tests/cpu_monitor_tests_11646922_src/mem_hog/mem_hog.c create mode 100644 tools/tests/unit_tests/fcntlrangecheck_tests_11202484_src/fcntlrangecheck_tests_11202484.c create mode 100644 tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test.c create mode 100644 tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test_common.h create mode 100644 tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test_framework.c rename osfmk/kern/norma_protos.h => tools/tests/unit_tests/guarded_fd_tests_11746236_src/mach_exc.defs (76%) create mode 100644 tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/guarded_test.c create mode 100644 tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/guarded_test_framework.c rename osfmk/i386/bzero.s => tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/mach_exc.defs (71%) create mode 100644 tools/tests/unit_tests/libproc_privilege_test_13203438_src/libproc_privilege_test_13203438.c create mode 100644 tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress-Entitlements.plist create mode 100644 tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.pbxproj create mode 100644 tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.xcworkspace/contents.xcworkspacedata create mode 100644 tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.xcworkspace/xcuserdata/rab.xcuserdatad/UserInterfaceState.xcuserstate create mode 100644 tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress 2.xcscheme create mode 100644 tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress copy.xcscheme create mode 100644 tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress.xcscheme create mode 100644 tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/xcschememanagement.plist create mode 100644 tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress/monitor_stress.m create mode 100644 tools/tests/unit_tests/pipe_test_10807398_src/child.c create mode 100644 tools/tests/unit_tests/pipe_test_10807398_src/parent.c create mode 100644 tools/tests/unit_tests/pipes_fill_procinfo_11179336.c create mode 100644 tools/tests/unit_tests/ptcwd_test_11269991_src/ptcwd_test_11269991.c create mode 100644 tools/tests/unit_tests/ptrace_test_12507045_src/ptrace_test.c create mode 100644 tools/tests/unit_tests/ptrace_tests_10767133_src/ptrace_tests_10767133.c create mode 100644 tools/tests/unit_tests/sampletest.c create mode 100644 tools/tests/unit_tests/semctl_test_8534495_src/semctl_test_8534495.c create mode 100644 tools/tests/unit_tests/sprace_test_11891562_src/sprace_test_11891562.c create mode 100644 tools/tests/unit_tests/test_waitqlocktry_12053360.c create mode 100644 tools/tests/unit_tests/test_wq_exit_race_panic_10970548.c create mode 100644 tools/tests/unit_tests/thread_get_state_11918811_src/excserver.defs create mode 100644 tools/tests/unit_tests/thread_get_state_11918811_src/thread_get_state.c create mode 100755 tools/tests/unit_tests/xnu_raft_tests.py create mode 100755 tools/xcrun_cache.sh diff --git a/EXTERNAL_HEADERS/AssertMacros.h b/EXTERNAL_HEADERS/AssertMacros.h index 2deea1201..99214497b 100644 --- a/EXTERNAL_HEADERS/AssertMacros.h +++ b/EXTERNAL_HEADERS/AssertMacros.h @@ -1,26 +1,51 @@ /* - File: AssertMacros.h - - Contains: This file defines structured error handling and assertion macros for - programming in C. Originally used in QuickDraw GX and later enhanced. - These macros are used throughout Apple's software. - - See "Living In an Exceptional World" by Sean Parent - (develop, The Apple Technical Journal, Issue 11, August/September 1992) - - for the methodology behind these error handling and assertion macros. - - Copyright: © 2002-2007 by Apple Inc., all rights reserved. - - Bugs?: For bug reports, consult the following page on - the World Wide Web: + * Copyright (c) 2002-2008 by Apple Inc.. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + + +/* + File: AssertMacros.h - http://developer.apple.com/bugreporter/ + Contains: This file defines structured error handling and assertion macros for + programming in C. Originally used in QuickDraw GX and later enhanced. + These macros are used throughout Apple's software. + + New code may not want to begin adopting these macros and instead use + existing language functionality. + + See "Living In an Exceptional World" by Sean Parent + (develop, The Apple Technical Journal, Issue 11, August/September 1992) + or + + for the methodology behind these error handling and assertion macros. + + Bugs?: For bug reports, consult the following page on + the World Wide Web: + + http://developer.apple.com/bugreporter/ */ #ifndef __ASSERTMACROS__ #define __ASSERTMACROS__ - /* * Macro overview: * @@ -60,6 +85,27 @@ * By default, all messages write to stderr. If you would like to write a custom * error message formater, defined DEBUG_ASSERT_MESSAGE to your function name. * + * Each individual macro will only be defined if it is not already defined, so + * you can redefine their behavior singly by providing your own definition before + * this file is included. + * + * If you define __ASSERTMACROS__ before this file is included, then nothing in + * this file will take effect. + * + * Prior to Mac OS X 10.6 the macro names used in this file conflicted with some + * user code, including libraries in boost and the proposed C++ standards efforts, + * and there was no way for a client of this header to resolve this conflict. Because + * of this, most of the macros have been changed so that they are prefixed with + * __ and contain at least one capital letter, which should alleviate the current + * and future conflicts. However, to allow current sources to continue to compile, + * compatibility macros are defined at the end with the old names. A tops script + * at the end of this file will convert all of the old macro names used in a directory + * to the new names. Clients are recommended to migrate over to these new macros as + * they update their sources because a future release of Mac OS X will remove the + * old macro definitions ( without the double-underscore prefix ). Clients who + * want to compile without the old macro definitions can define the macro + * __ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES to 0 before this file is + * included. */ @@ -180,7 +226,7 @@ /* - * debug_string(message) + * __Debug_String(message) * * Summary: * Production builds: does nothing and produces no code. @@ -193,26 +239,27 @@ * The C string to display. * */ -#if DEBUG_ASSERT_PRODUCTION_CODE - #define debug_string(message) -#else - #define debug_string(message) \ - do \ - { \ - DEBUG_ASSERT_MESSAGE( \ - DEBUG_ASSERT_COMPONENT_NAME_STRING, \ - "", \ - 0, \ - message, \ - __FILE__, \ - __LINE__, \ - 0); \ - } while ( 0 ) +#ifndef __Debug_String + #if DEBUG_ASSERT_PRODUCTION_CODE + #define __Debug_String(message) + #else + #define __Debug_String(message) \ + do \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + "", \ + 0, \ + message, \ + __FILE__, \ + __LINE__, \ + 0); \ + } while ( 0 ) + #endif #endif - /* - * check(assertion) + * __Check(assertion) * * Summary: * Production builds: does nothing and produces no code. @@ -225,32 +272,29 @@ * assertion: * The assertion expression. */ -#if DEBUG_ASSERT_PRODUCTION_CODE - #define check(assertion) -#else - #define check(assertion) \ - do \ - { \ - if ( __builtin_expect(!(assertion), 0) ) \ - { \ - DEBUG_ASSERT_MESSAGE( \ - DEBUG_ASSERT_COMPONENT_NAME_STRING, \ - #assertion, \ - 0, \ - 0, \ - __FILE__, \ - __LINE__, \ - 0); \ - } \ - } while ( 0 ) +#ifndef __Check + #if DEBUG_ASSERT_PRODUCTION_CODE + #define __Check(assertion) + #else + #define __Check(assertion) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #assertion, 0, 0, __FILE__, __LINE__, 0 ); \ + } \ + } while ( 0 ) + #endif #endif -#define ncheck(assertion) \ - check(!(assertion)) - +#ifndef __nCheck + #define __nCheck(assertion) __Check(!(assertion)) +#endif /* - * check_string(assertion, message) + * __Check_String(assertion, message) * * Summary: * Production builds: does nothing and produces no code. @@ -266,32 +310,29 @@ * message: * The C string to display. */ -#if DEBUG_ASSERT_PRODUCTION_CODE - #define check_string(assertion, message) -#else - #define check_string(assertion, message) \ - do \ - { \ - if ( __builtin_expect(!(assertion), 0) ) \ - { \ - DEBUG_ASSERT_MESSAGE( \ - DEBUG_ASSERT_COMPONENT_NAME_STRING, \ - #assertion, \ - 0, \ - message, \ - __FILE__, \ - __LINE__, \ - 0); \ - } \ - } while ( 0 ) +#ifndef __Check_String + #if DEBUG_ASSERT_PRODUCTION_CODE + #define __Check_String(assertion, message) + #else + #define __Check_String(assertion, message) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #assertion, 0, message, __FILE__, __LINE__, 0 ); \ + } \ + } while ( 0 ) + #endif #endif -#define ncheck_string(assertion, message) \ - check_string(!(assertion), message) - +#ifndef __nCheck_String + #define __nCheck_String(assertion, message) __Check_String(!(assertion), message) +#endif /* - * check_noerr(errorCode) + * __Check_noErr(errorCode) * * Summary: * Production builds: does nothing and produces no code. @@ -304,30 +345,26 @@ * errorCode: * The errorCode expression to compare with 0. */ -#if DEBUG_ASSERT_PRODUCTION_CODE - #define check_noerr(errorCode) -#else - #define check_noerr(errorCode) \ - do \ - { \ - long evalOnceErrorCode = (errorCode); \ - if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ - { \ - DEBUG_ASSERT_MESSAGE( \ - DEBUG_ASSERT_COMPONENT_NAME_STRING, \ - #errorCode " == 0 ", \ - 0, \ - 0, \ - __FILE__, \ - __LINE__, \ - evalOnceErrorCode); \ - } \ - } while ( 0 ) +#ifndef __Check_noErr + #if DEBUG_ASSERT_PRODUCTION_CODE + #define __Check_noErr(errorCode) + #else + #define __Check_noErr(errorCode) \ + do \ + { \ + long evalOnceErrorCode = (errorCode); \ + if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #errorCode " == 0 ", 0, 0, __FILE__, __LINE__, evalOnceErrorCode ); \ + } \ + } while ( 0 ) + #endif #endif - /* - * check_noerr_string(errorCode, message) + * __Check_noErr_String(errorCode, message) * * Summary: * Production builds: check_noerr_string() does nothing and produces @@ -344,30 +381,26 @@ * message: * The C string to display. */ -#if DEBUG_ASSERT_PRODUCTION_CODE - #define check_noerr_string(errorCode, message) -#else - #define check_noerr_string(errorCode, message) \ - do \ - { \ - long evalOnceErrorCode = (errorCode); \ - if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ - { \ - DEBUG_ASSERT_MESSAGE( \ - DEBUG_ASSERT_COMPONENT_NAME_STRING, \ - #errorCode " == 0 ", \ - 0, \ - message, \ - __FILE__, \ - __LINE__, \ - evalOnceErrorCode); \ - } \ - } while ( 0 ) +#ifndef __Check_noErr_String + #if DEBUG_ASSERT_PRODUCTION_CODE + #define __Check_noErr_String(errorCode, message) + #else + #define __Check_noErr_String(errorCode, message) \ + do \ + { \ + long evalOnceErrorCode = (errorCode); \ + if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #errorCode " == 0 ", 0, message, __FILE__, __LINE__, evalOnceErrorCode ); \ + } \ + } while ( 0 ) + #endif #endif - /* - * verify(assertion) + * __Verify(assertion) * * Summary: * Production builds: evaluate the assertion expression, but ignore @@ -381,38 +414,35 @@ * assertion: * The assertion expression. */ -#if DEBUG_ASSERT_PRODUCTION_CODE - #define verify(assertion) \ - do \ - { \ - if ( !(assertion) ) \ - { \ - } \ - } while ( 0 ) -#else - #define verify(assertion) \ - do \ - { \ - if ( __builtin_expect(!(assertion), 0) ) \ - { \ - DEBUG_ASSERT_MESSAGE( \ - DEBUG_ASSERT_COMPONENT_NAME_STRING, \ - #assertion, \ - 0, \ - 0, \ - __FILE__, \ - __LINE__, \ - 0); \ - } \ - } while ( 0 ) +#ifndef __Verify + #if DEBUG_ASSERT_PRODUCTION_CODE + #define __Verify(assertion) \ + do \ + { \ + if ( !(assertion) ) \ + { \ + } \ + } while ( 0 ) + #else + #define __Verify(assertion) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #assertion, 0, 0, __FILE__, __LINE__, 0 ); \ + } \ + } while ( 0 ) + #endif #endif -#define nverify(assertion) \ - verify(!(assertion)) - +#ifndef __nVerify + #define __nVerify(assertion) __Verify(!(assertion)) +#endif /* - * verify_string(assertion, message) + * __Verify_String(assertion, message) * * Summary: * Production builds: evaluate the assertion expression, but ignore @@ -429,38 +459,35 @@ * message: * The C string to display. */ -#if DEBUG_ASSERT_PRODUCTION_CODE - #define verify_string(assertion, message) \ - do \ - { \ - if ( !(assertion) ) \ - { \ - } \ - } while ( 0 ) -#else - #define verify_string(assertion, message) \ - do \ - { \ - if ( __builtin_expect(!(assertion), 0) ) \ - { \ - DEBUG_ASSERT_MESSAGE( \ - DEBUG_ASSERT_COMPONENT_NAME_STRING, \ - #assertion, \ - 0, \ - message, \ - __FILE__, \ - __LINE__, \ - 0); \ - } \ - } while ( 0 ) +#ifndef __Verify_String + #if DEBUG_ASSERT_PRODUCTION_CODE + #define __Verify_String(assertion, message) \ + do \ + { \ + if ( !(assertion) ) \ + { \ + } \ + } while ( 0 ) + #else + #define __Verify_String(assertion, message) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #assertion, 0, message, __FILE__, __LINE__, 0 ); \ + } \ + } while ( 0 ) + #endif #endif -#define nverify_string(assertion, message) \ - verify_string(!(assertion), message) - +#ifndef __nVerify_String + #define __nVerify_String(assertion, message) __Verify_String(!(assertion), message) +#endif /* - * verify_noerr(errorCode) + * __Verify_noErr(errorCode) * * Summary: * Production builds: evaluate the errorCode expression, but ignore @@ -474,36 +501,32 @@ * errorCode: * The expression to compare to 0. */ -#if DEBUG_ASSERT_PRODUCTION_CODE - #define verify_noerr(errorCode) \ - do \ - { \ - if ( 0 != (errorCode) ) \ - { \ - } \ - } while ( 0 ) -#else - #define verify_noerr(errorCode) \ - do \ - { \ - long evalOnceErrorCode = (errorCode); \ - if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ - { \ - DEBUG_ASSERT_MESSAGE( \ - DEBUG_ASSERT_COMPONENT_NAME_STRING, \ - #errorCode " == 0 ", \ - 0, \ - 0, \ - __FILE__, \ - __LINE__, \ - evalOnceErrorCode); \ - } \ - } while ( 0 ) +#ifndef __Verify_noErr + #if DEBUG_ASSERT_PRODUCTION_CODE + #define __Verify_noErr(errorCode) \ + do \ + { \ + if ( 0 != (errorCode) ) \ + { \ + } \ + } while ( 0 ) + #else + #define __Verify_noErr(errorCode) \ + do \ + { \ + long evalOnceErrorCode = (errorCode); \ + if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #errorCode " == 0 ", 0, 0, __FILE__, __LINE__, evalOnceErrorCode ); \ + } \ + } while ( 0 ) + #endif #endif - /* - * verify_noerr_string(errorCode, message) + * __Verify_noErr_String(errorCode, message) * * Summary: * Production builds: evaluate the errorCode expression, but ignore @@ -520,36 +543,72 @@ * message: * The C string to display. */ -#if DEBUG_ASSERT_PRODUCTION_CODE - #define verify_noerr_string(errorCode, message) \ - do \ - { \ - if ( 0 != (errorCode) ) \ - { \ - } \ - } while ( 0 ) -#else - #define verify_noerr_string(errorCode, message) \ - do \ - { \ - long evalOnceErrorCode = (errorCode); \ - if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ - { \ - DEBUG_ASSERT_MESSAGE( \ - DEBUG_ASSERT_COMPONENT_NAME_STRING, \ - #errorCode " == 0 ", \ - 0, \ - message, \ - __FILE__, \ - __LINE__, \ - evalOnceErrorCode); \ - } \ - } while ( 0 ) +#ifndef __Verify_noErr_String + #if DEBUG_ASSERT_PRODUCTION_CODE + #define __Verify_noErr_String(errorCode, message) \ + do \ + { \ + if ( 0 != (errorCode) ) \ + { \ + } \ + } while ( 0 ) + #else + #define __Verify_noErr_String(errorCode, message) \ + do \ + { \ + long evalOnceErrorCode = (errorCode); \ + if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #errorCode " == 0 ", 0, message, __FILE__, __LINE__, evalOnceErrorCode ); \ + } \ + } while ( 0 ) + #endif #endif +/* + * __Verify_noErr_Action(errorCode, action) + * + * Summary: + * Production builds: if the errorCode expression does not equal 0 (noErr), + * execute the action statement or compound statement (block). + * + * Non-production builds: if the errorCode expression does not equal 0 (noErr), + * call DEBUG_ASSERT_MESSAGE and then execute the action statement or compound + * statement (block). + * + * Parameters: + * + * errorCode: + * The expression to compare to 0. + * + * action: + * The statement or compound statement (block). + */ +#ifndef __Verify_noErr_Action + #if DEBUG_ASSERT_PRODUCTION_CODE + #define __Verify_noErr_Action(errorCode, action) \ + if ( 0 != (errorCode) ) { \ + action; \ + } \ + else do {} while (0) + #else + #define __Verify_noErr_Action(errorCode, action) \ + do { \ + long evalOnceErrorCode = (errorCode); \ + if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #errorCode " == 0 ", 0, 0, __FILE__, __LINE__, 0 ); \ + action; \ + } \ + } while (0) + #endif +#endif /* - * verify_action(assertion, action) + * __Verify_Action(assertion, action) * * Summary: * Production builds: if the assertion expression evaluates to false, @@ -567,37 +626,27 @@ * action: * The statement or compound statement (block). */ -#if DEBUG_ASSERT_PRODUCTION_CODE - #define verify_action(assertion, action) \ - do \ - { \ - if ( __builtin_expect(!(assertion), 0) ) \ - { \ - action; \ - } \ - } while ( 0 ) -#else - #define verify_action(assertion, action) \ - do \ - { \ - if ( __builtin_expect(!(assertion), 0) ) \ - { \ - DEBUG_ASSERT_MESSAGE( \ - DEBUG_ASSERT_COMPONENT_NAME_STRING, \ - #assertion, \ - 0, \ - 0, \ - __FILE__, \ - __LINE__, \ - 0); \ - { action; } \ - } \ - } while ( 0 ) +#ifndef __Verify_Action + #if DEBUG_ASSERT_PRODUCTION_CODE + #define __Verify_Action(assertion, action) \ + if ( __builtin_expect(!(assertion), 0) ) { \ + action; \ + } \ + else do {} while (0) + #else + #define __Verify_Action(assertion, action) \ + if ( __builtin_expect(!(assertion), 0) ) { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #assertion, 0, 0, __FILE__, __LINE__, 0 ); \ + action; \ + } \ + else do {} while (0) + #endif #endif - /* - * require(assertion, exceptionLabel) + * __Require(assertion, exceptionLabel) * * Summary: * Production builds: if the assertion expression evaluates to false, @@ -614,40 +663,36 @@ * exceptionLabel: * The label. */ -#if DEBUG_ASSERT_PRODUCTION_CODE - #define require(assertion, exceptionLabel) \ - do \ - { \ - if ( __builtin_expect(!(assertion), 0) ) \ - { \ - goto exceptionLabel; \ - } \ - } while ( 0 ) -#else - #define require(assertion, exceptionLabel) \ - do \ - { \ - if ( __builtin_expect(!(assertion), 0) ) \ - { \ - DEBUG_ASSERT_MESSAGE( \ - DEBUG_ASSERT_COMPONENT_NAME_STRING, \ - #assertion, \ - #exceptionLabel, \ - 0, \ - __FILE__, \ - __LINE__, \ - 0); \ - goto exceptionLabel; \ - } \ - } while ( 0 ) +#ifndef __Require + #if DEBUG_ASSERT_PRODUCTION_CODE + #define __Require(assertion, exceptionLabel) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + goto exceptionLabel; \ + } \ + } while ( 0 ) + #else + #define __Require(assertion, exceptionLabel) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #assertion, #exceptionLabel, 0, __FILE__, __LINE__, 0); \ + goto exceptionLabel; \ + } \ + } while ( 0 ) + #endif #endif -#define nrequire(assertion, exceptionLabel) \ - require(!(assertion), exceptionLabel) - +#ifndef __nRequire + #define __nRequire(assertion, exceptionLabel) __Require(!(assertion), exceptionLabel) +#endif /* - * require_action(assertion, exceptionLabel, action) + * __Require_Action(assertion, exceptionLabel, action) * * Summary: * Production builds: if the assertion expression evaluates to false, @@ -669,46 +714,44 @@ * action: * The statement or compound statement (block). */ -#if DEBUG_ASSERT_PRODUCTION_CODE - #define require_action(assertion, exceptionLabel, action) \ - do \ - { \ - if ( __builtin_expect(!(assertion), 0) ) \ - { \ - { \ - action; \ - } \ - goto exceptionLabel; \ - } \ - } while ( 0 ) -#else - #define require_action(assertion, exceptionLabel, action) \ - do \ - { \ - if ( __builtin_expect(!(assertion), 0) ) \ - { \ - DEBUG_ASSERT_MESSAGE( \ - DEBUG_ASSERT_COMPONENT_NAME_STRING, \ - #assertion, \ - #exceptionLabel, \ - 0, \ - __FILE__, \ - __LINE__, \ - 0); \ - { \ - action; \ - } \ - goto exceptionLabel; \ - } \ - } while ( 0 ) +#ifndef __Require_Action + #if DEBUG_ASSERT_PRODUCTION_CODE + #define __Require_Action(assertion, exceptionLabel, action) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + { \ + action; \ + } \ + goto exceptionLabel; \ + } \ + } while ( 0 ) + #else + #define __Require_Action(assertion, exceptionLabel, action) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #assertion, #exceptionLabel, 0, __FILE__, __LINE__, 0); \ + { \ + action; \ + } \ + goto exceptionLabel; \ + } \ + } while ( 0 ) + #endif #endif -#define nrequire_action(assertion, exceptionLabel, action) \ - require_action(!(assertion), exceptionLabel, action) - +#ifndef __nRequire_Action + #define __nRequire_Action(assertion, exceptionLabel, action) \ + __Require_Action(!(assertion), exceptionLabel, action) +#endif /* - * require_quiet(assertion, exceptionLabel) + * __Require_Quiet(assertion, exceptionLabel) * * Summary: * If the assertion expression evaluates to false, goto exceptionLabel. @@ -721,21 +764,23 @@ * exceptionLabel: * The label. */ -#define require_quiet(assertion, exceptionLabel) \ - do \ - { \ - if ( __builtin_expect(!(assertion), 0) ) \ - { \ - goto exceptionLabel; \ - } \ - } while ( 0 ) - -#define nrequire_quiet(assertion, exceptionLabel) \ - require_quiet(!(assertion), exceptionLabel) +#ifndef __Require_Quiet + #define __Require_Quiet(assertion, exceptionLabel) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#endif +#ifndef __nRequire_Quiet + #define __nRequire_Quiet(assertion, exceptionLabel) __Require_Quiet(!(assertion), exceptionLabel) +#endif /* - * require_action_quiet(assertion, exceptionLabel, action) + * __Require_Action_Quiet(assertion, exceptionLabel, action) * * Summary: * If the assertion expression evaluates to false, execute the action @@ -752,24 +797,27 @@ * action: * The statement or compound statement (block). */ -#define require_action_quiet(assertion, exceptionLabel, action) \ - do \ - { \ - if ( __builtin_expect(!(assertion), 0) ) \ - { \ - { \ - action; \ - } \ - goto exceptionLabel; \ - } \ - } while ( 0 ) - -#define nrequire_action_quiet(assertion, exceptionLabel, action) \ - require_action_quiet(!(assertion), exceptionLabel, action) +#ifndef __Require_Action_Quiet + #define __Require_Action_Quiet(assertion, exceptionLabel, action) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + { \ + action; \ + } \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#endif +#ifndef __nRequire_Action_Quiet + #define __nRequire_Action_Quiet(assertion, exceptionLabel, action) \ + __Require_Action_Quiet(!(assertion), exceptionLabel, action) +#endif /* - * require_string(assertion, exceptionLabel, message) + * __Require_String(assertion, exceptionLabel, message) * * Summary: * Production builds: if the assertion expression evaluates to false, @@ -789,40 +837,38 @@ * message: * The C string to display. */ -#if DEBUG_ASSERT_PRODUCTION_CODE - #define require_string(assertion, exceptionLabel, message) \ - do \ - { \ - if ( __builtin_expect(!(assertion), 0) ) \ - { \ - goto exceptionLabel; \ - } \ - } while ( 0 ) -#else - #define require_string(assertion, exceptionLabel, message) \ - do \ - { \ - if ( __builtin_expect(!(assertion), 0) ) \ - { \ - DEBUG_ASSERT_MESSAGE( \ - DEBUG_ASSERT_COMPONENT_NAME_STRING, \ - #assertion, \ - #exceptionLabel, \ - message, \ - __FILE__, \ - __LINE__, \ - 0); \ - goto exceptionLabel; \ - } \ - } while ( 0 ) +#ifndef __Require_String + #if DEBUG_ASSERT_PRODUCTION_CODE + #define __Require_String(assertion, exceptionLabel, message) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + goto exceptionLabel; \ + } \ + } while ( 0 ) + #else + #define __Require_String(assertion, exceptionLabel, message) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #assertion, #exceptionLabel, message, __FILE__, __LINE__, 0); \ + goto exceptionLabel; \ + } \ + } while ( 0 ) + #endif #endif -#define nrequire_string(assertion, exceptionLabel, string) \ - require_string(!(assertion), exceptionLabel, string) - +#ifndef __nRequire_String + #define __nRequire_String(assertion, exceptionLabel, string) \ + __Require_String(!(assertion), exceptionLabel, string) +#endif /* - * require_action_string(assertion, exceptionLabel, action, message) + * __Require_Action_String(assertion, exceptionLabel, action, message) * * Summary: * Production builds: if the assertion expression evaluates to false, @@ -847,46 +893,44 @@ * message: * The C string to display. */ -#if DEBUG_ASSERT_PRODUCTION_CODE - #define require_action_string(assertion, exceptionLabel, action, message) \ - do \ - { \ - if ( __builtin_expect(!(assertion), 0) ) \ - { \ - { \ - action; \ - } \ - goto exceptionLabel; \ - } \ - } while ( 0 ) -#else - #define require_action_string(assertion, exceptionLabel, action, message) \ - do \ - { \ - if ( __builtin_expect(!(assertion), 0) ) \ - { \ - DEBUG_ASSERT_MESSAGE( \ - DEBUG_ASSERT_COMPONENT_NAME_STRING, \ - #assertion, \ - #exceptionLabel, \ - message, \ - __FILE__, \ - __LINE__, \ - 0); \ - { \ - action; \ - } \ - goto exceptionLabel; \ - } \ - } while ( 0 ) +#ifndef __Require_Action_String + #if DEBUG_ASSERT_PRODUCTION_CODE + #define __Require_Action_String(assertion, exceptionLabel, action, message) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + { \ + action; \ + } \ + goto exceptionLabel; \ + } \ + } while ( 0 ) + #else + #define __Require_Action_String(assertion, exceptionLabel, action, message) \ + do \ + { \ + if ( __builtin_expect(!(assertion), 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #assertion, #exceptionLabel, message, __FILE__, __LINE__, 0); \ + { \ + action; \ + } \ + goto exceptionLabel; \ + } \ + } while ( 0 ) + #endif #endif -#define nrequire_action_string(assertion, exceptionLabel, action, message) \ - require_action_string(!(assertion), exceptionLabel, action, message) - +#ifndef __nRequire_Action_String + #define __nRequire_Action_String(assertion, exceptionLabel, action, message) \ + __Require_Action_String(!(assertion), exceptionLabel, action, message) +#endif /* - * require_noerr(errorCode, exceptionLabel) + * __Require_noErr(errorCode, exceptionLabel) * * Summary: * Production builds: if the errorCode expression does not equal 0 (noErr), @@ -903,37 +947,34 @@ * exceptionLabel: * The label. */ -#if DEBUG_ASSERT_PRODUCTION_CODE - #define require_noerr(errorCode, exceptionLabel) \ - do \ - { \ - if ( __builtin_expect(0 != (errorCode), 0) ) \ - { \ - goto exceptionLabel; \ - } \ - } while ( 0 ) -#else - #define require_noerr(errorCode, exceptionLabel) \ - do \ - { \ - long evalOnceErrorCode = (errorCode); \ - if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ - { \ - DEBUG_ASSERT_MESSAGE( \ - DEBUG_ASSERT_COMPONENT_NAME_STRING, \ - #errorCode " == 0 ", \ - #exceptionLabel, \ - 0, \ - __FILE__, \ - __LINE__, \ - evalOnceErrorCode); \ - goto exceptionLabel; \ - } \ - } while ( 0 ) +#ifndef __Require_noErr + #if DEBUG_ASSERT_PRODUCTION_CODE + #define __Require_noErr(errorCode, exceptionLabel) \ + do \ + { \ + if ( __builtin_expect(0 != (errorCode), 0) ) \ + { \ + goto exceptionLabel; \ + } \ + } while ( 0 ) + #else + #define __Require_noErr(errorCode, exceptionLabel) \ + do \ + { \ + long evalOnceErrorCode = (errorCode); \ + if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #errorCode " == 0 ", #exceptionLabel, 0, __FILE__, __LINE__, evalOnceErrorCode); \ + goto exceptionLabel; \ + } \ + } while ( 0 ) + #endif #endif /* - * require_noerr_action(errorCode, exceptionLabel, action) + * __Require_noErr_Action(errorCode, exceptionLabel, action) * * Summary: * Production builds: if the errorCode expression does not equal 0 (noErr), @@ -955,44 +996,40 @@ * action: * The statement or compound statement (block). */ -#if DEBUG_ASSERT_PRODUCTION_CODE - #define require_noerr_action(errorCode, exceptionLabel, action) \ - do \ - { \ - if ( __builtin_expect(0 != (errorCode), 0) ) \ - { \ - { \ - action; \ - } \ - goto exceptionLabel; \ - } \ - } while ( 0 ) -#else - #define require_noerr_action(errorCode, exceptionLabel, action) \ - do \ - { \ - long evalOnceErrorCode = (errorCode); \ - if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ - { \ - DEBUG_ASSERT_MESSAGE( \ - DEBUG_ASSERT_COMPONENT_NAME_STRING, \ - #errorCode " == 0 ", \ - #exceptionLabel, \ - 0, \ - __FILE__, \ - __LINE__, \ - evalOnceErrorCode); \ - { \ - action; \ - } \ - goto exceptionLabel; \ - } \ - } while ( 0 ) +#ifndef __Require_noErr_Action + #if DEBUG_ASSERT_PRODUCTION_CODE + #define __Require_noErr_Action(errorCode, exceptionLabel, action) \ + do \ + { \ + if ( __builtin_expect(0 != (errorCode), 0) ) \ + { \ + { \ + action; \ + } \ + goto exceptionLabel; \ + } \ + } while ( 0 ) + #else + #define __Require_noErr_Action(errorCode, exceptionLabel, action) \ + do \ + { \ + long evalOnceErrorCode = (errorCode); \ + if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #errorCode " == 0 ", #exceptionLabel, 0, __FILE__, __LINE__, evalOnceErrorCode); \ + { \ + action; \ + } \ + goto exceptionLabel; \ + } \ + } while ( 0 ) + #endif #endif - /* - * require_noerr_quiet(errorCode, exceptionLabel) + * __Require_noErr_Quiet(errorCode, exceptionLabel) * * Summary: * If the errorCode expression does not equal 0 (noErr), @@ -1006,18 +1043,19 @@ * exceptionLabel: * The label. */ -#define require_noerr_quiet(errorCode, exceptionLabel) \ - do \ - { \ - if ( __builtin_expect(0 != (errorCode), 0) ) \ - { \ - goto exceptionLabel; \ - } \ - } while ( 0 ) - +#ifndef __Require_noErr_Quiet + #define __Require_noErr_Quiet(errorCode, exceptionLabel) \ + do \ + { \ + if ( __builtin_expect(0 != (errorCode), 0) ) \ + { \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#endif /* - * require_noerr_action_quiet(errorCode, exceptionLabel, action) + * __Require_noErr_Action_Quiet(errorCode, exceptionLabel, action) * * Summary: * If the errorCode expression does not equal 0 (noErr), @@ -1035,21 +1073,22 @@ * action: * The statement or compound statement (block). */ -#define require_noerr_action_quiet(errorCode, exceptionLabel, action) \ - do \ - { \ - if ( __builtin_expect(0 != (errorCode), 0) ) \ - { \ - { \ - action; \ - } \ - goto exceptionLabel; \ - } \ - } while ( 0 ) - +#ifndef __Require_noErr_Action_Quiet + #define __Require_noErr_Action_Quiet(errorCode, exceptionLabel, action) \ + do \ + { \ + if ( __builtin_expect(0 != (errorCode), 0) ) \ + { \ + { \ + action; \ + } \ + goto exceptionLabel; \ + } \ + } while ( 0 ) +#endif /* - * require_noerr_string(errorCode, exceptionLabel, message) + * __Require_noErr_String(errorCode, exceptionLabel, message) * * Summary: * Production builds: if the errorCode expression does not equal 0 (noErr), @@ -1069,38 +1108,34 @@ * message: * The C string to display. */ -#if DEBUG_ASSERT_PRODUCTION_CODE - #define require_noerr_string(errorCode, exceptionLabel, message) \ - do \ - { \ - if ( __builtin_expect(0 != (errorCode), 0) ) \ - { \ - goto exceptionLabel; \ - } \ - } while ( 0 ) -#else - #define require_noerr_string(errorCode, exceptionLabel, message) \ - do \ - { \ - long evalOnceErrorCode = (errorCode); \ - if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ - { \ - DEBUG_ASSERT_MESSAGE( \ - DEBUG_ASSERT_COMPONENT_NAME_STRING, \ - #errorCode " == 0 ", \ - #exceptionLabel, \ - message, \ - __FILE__, \ - __LINE__, \ - evalOnceErrorCode); \ - goto exceptionLabel; \ - } \ - } while ( 0 ) +#ifndef __Require_noErr_String + #if DEBUG_ASSERT_PRODUCTION_CODE + #define __Require_noErr_String(errorCode, exceptionLabel, message) \ + do \ + { \ + if ( __builtin_expect(0 != (errorCode), 0) ) \ + { \ + goto exceptionLabel; \ + } \ + } while ( 0 ) + #else + #define __Require_noErr_String(errorCode, exceptionLabel, message) \ + do \ + { \ + long evalOnceErrorCode = (errorCode); \ + if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #errorCode " == 0 ", #exceptionLabel, message, __FILE__, __LINE__, evalOnceErrorCode); \ + goto exceptionLabel; \ + } \ + } while ( 0 ) + #endif #endif - /* - * require_noerr_action_string(errorCode, exceptionLabel, action, message) + * __Require_noErr_Action_String(errorCode, exceptionLabel, action, message) * * Summary: * Production builds: if the errorCode expression does not equal 0 (noErr), @@ -1125,41 +1160,270 @@ * message: * The C string to display. */ -#if DEBUG_ASSERT_PRODUCTION_CODE - #define require_noerr_action_string(errorCode, exceptionLabel, action, message)\ - do \ - { \ - if ( __builtin_expect(0 != (errorCode), 0) ) \ - { \ - { \ - action; \ - } \ - goto exceptionLabel; \ - } \ - } while ( 0 ) -#else - #define require_noerr_action_string(errorCode, exceptionLabel, action, message) \ - do \ - { \ - long evalOnceErrorCode = (errorCode); \ - if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ - { \ - DEBUG_ASSERT_MESSAGE( \ - DEBUG_ASSERT_COMPONENT_NAME_STRING, \ - #errorCode " == 0 ", \ - #exceptionLabel, \ - message, \ - __FILE__, \ - __LINE__, \ - evalOnceErrorCode); \ - { \ - action; \ - } \ - goto exceptionLabel; \ - } \ - } while ( 0 ) +#ifndef __Require_noErr_Action_String + #if DEBUG_ASSERT_PRODUCTION_CODE + #define __Require_noErr_Action_String(errorCode, exceptionLabel, action, message) \ + do \ + { \ + if ( __builtin_expect(0 != (errorCode), 0) ) \ + { \ + { \ + action; \ + } \ + goto exceptionLabel; \ + } \ + } while ( 0 ) + #else + #define __Require_noErr_Action_String(errorCode, exceptionLabel, action, message) \ + do \ + { \ + long evalOnceErrorCode = (errorCode); \ + if ( __builtin_expect(0 != evalOnceErrorCode, 0) ) \ + { \ + DEBUG_ASSERT_MESSAGE( \ + DEBUG_ASSERT_COMPONENT_NAME_STRING, \ + #errorCode " == 0 ", #exceptionLabel, message, __FILE__, __LINE__, evalOnceErrorCode); \ + { \ + action; \ + } \ + goto exceptionLabel; \ + } \ + } while ( 0 ) + #endif #endif +/* + * __Check_Compile_Time(expr) + * + * Summary: + * any build: if the expression is not true, generated a compile time error. + * + * Parameters: + * + * expr: + * The compile time expression that should evaluate to non-zero. + * + * Discussion: + * This declares an array with a size that is determined by a compile-time expression. + * If false, it declares a negatively sized array, which generates a compile-time error. + * + * Examples: + * __Check_Compile_Time( sizeof( int ) == 4 ); + * __Check_Compile_Time( offsetof( MyStruct, myField ) == 4 ); + * __Check_Compile_Time( ( kMyBufferSize % 512 ) == 0 ); + * + * Note: This only works with compile-time expressions. + * Note: This only works in places where extern declarations are allowed (e.g. global scope). + */ +#ifndef __Check_Compile_Time + #ifdef __GNUC__ + #define __Check_Compile_Time( expr ) \ + extern int compile_time_assert_failed[ ( expr ) ? 1 : -1 ] __attribute__( ( unused ) ) + #else + #define __Check_Compile_Time( expr ) \ + extern int compile_time_assert_failed[ ( expr ) ? 1 : -1 ] + #endif +#endif -#endif /* __ASSERTMACROS__ */ +/* + * For time immemorial, Mac OS X has defined version of most of these macros without the __ prefix, which + * could collide with similarly named functions or macros in user code, including new functionality in + * Boost and the C++ standard library. + * + * A future release of Mac OS X will no longer do this, and will require that clients move to the + * new macros as defined above. However, in the interim both the new and old macros will work, unless + * clients define a macro __ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES before this file is included + * in their compilations. Clients who do not want the older macros defined can accomplish this by adding + * #define __ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES 0 + * at the top of their sources, or my adding -D__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES=0 to the + * gcc compilation options. + * + * To aid users of these macros in converting their sources, the following tops script will convert usages + * of the old macros into the new equivalents. To do so, in Terminal go into the directory containing the + * sources to be converted and run this command. + * + find . -name '*.[c|cc|cp|cpp|m|mm|h]' -print0 | xargs -0 tops -verbose \ + replace "check()" with "__Check()" \ + replace "check_noerr()" with "__Check_noErr()" \ + replace "check_noerr_string()" with "__Check_noErr_String()" \ + replace "check_string()" with "__Check_String()" \ + replace "require()" with "__Require()" \ + replace "require_action()" with "__Require_Action()" \ + replace "require_action_string()" with "__Require_Action_String()" \ + replace "require_noerr()" with "__Require_noErr()" \ + replace "require_noerr_action()" with "__Require_noErr_Action()" \ + replace "require_noerr_action_string()" with "__Require_noErr_Action_String()" \ + replace "require_noerr_string()" with "__Require_noErr_String()" \ + replace "require_string()" with "__Require_String()" \ + replace "verify()" with "__Verify()" \ + replace "verify_action()" with "__Verify_Action()" \ + replace "verify_noerr()" with "__Verify_noErr()" \ + replace "verify_noerr_action()" with "__Verify_noErr_Action()" \ + replace "verify_noerr_string()" with "__Verify_noErr_String()" \ + replace "verify_string()" with "__Verify_String()" \ + replace "ncheck()" with "__nCheck()" \ + replace "ncheck_string()" with "__nCheck_String()" \ + replace "nrequire()" with "__nRequire()" \ + replace "nrequire_action()" with "__nRequire_Action()" \ + replace "nrequire_action_quiet()" with "__nRequire_Action_Quiet()" \ + replace "nrequire_action_string()" with "__nRequire_Action_String()" \ + replace "nrequire_quiet()" with "__nRequire_Quiet()" \ + replace "nrequire_string()" with "__nRequire_String()" \ + replace "nverify()" with "__nVerify()" \ + replace "nverify_string()" with "__nVerify_String()" \ + replace "require_action_quiet()" with "__Require_Action_Quiet()" \ + replace "require_noerr_action_quiet()" with "__Require_noErr_Action_Quiet()" \ + replace "require_noerr_quiet()" with "__Require_noErr_Quiet()" \ + replace "require_quiet()" with "__Require_Quiet()" \ + replace "check_compile_time()" with "__Check_Compile_Time()" \ + replace "debug_string()" with "__Debug_String()" + * + */ + +#ifndef __ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES + /* If we haven't set this yet, it defaults to on. In the next release, this will default to off. */ + #define __ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES 1 +#endif + +#if __ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES + + #ifndef check + #define check(assertion) __Check(assertion) + #endif + + #ifndef check_noerr + #define check_noerr(errorCode) __Check_noErr(errorCode) + #endif + + #ifndef check_noerr_string + #define check_noerr_string(errorCode, message) __Check_noErr_String(errorCode, message) + #endif + + #ifndef check_string + #define check_string(assertion, message) __Check_String(assertion, message) + #endif + + #ifndef require + #define require(assertion, exceptionLabel) __Require(assertion, exceptionLabel) + #endif + + #ifndef require_action + #define require_action(assertion, exceptionLabel, action) __Require_Action(assertion, exceptionLabel, action) + #endif + + #ifndef require_action_string + #define require_action_string(assertion, exceptionLabel, action, message) __Require_Action_String(assertion, exceptionLabel, action, message) + #endif + + #ifndef require_noerr + #define require_noerr(errorCode, exceptionLabel) __Require_noErr(errorCode, exceptionLabel) + #endif + #ifndef require_noerr_action + #define require_noerr_action(errorCode, exceptionLabel, action) __Require_noErr_Action(errorCode, exceptionLabel, action) + #endif + + #ifndef require_noerr_action_string + #define require_noerr_action_string(errorCode, exceptionLabel, action, message) __Require_noErr_Action_String(errorCode, exceptionLabel, action, message) + #endif + + #ifndef require_noerr_string + #define require_noerr_string(errorCode, exceptionLabel, message) __Require_noErr_String(errorCode, exceptionLabel, message) + #endif + + #ifndef require_string + #define require_string(assertion, exceptionLabel, message) __Require_String(assertion, exceptionLabel, message) + #endif + + #ifndef verify + #define verify(assertion) __Verify(assertion) + #endif + + #ifndef verify_action + #define verify_action(assertion, action) __Verify_Action(assertion, action) + #endif + + #ifndef verify_noerr + #define verify_noerr(errorCode) __Verify_noErr(errorCode) + #endif + + #ifndef verify_noerr_action + #define verify_noerr_action(errorCode, action) __Verify_noErr_Action(errorCode, action) + #endif + + #ifndef verify_noerr_string + #define verify_noerr_string(errorCode, message) __Verify_noErr_String(errorCode, message) + #endif + + #ifndef verify_string + #define verify_string(assertion, message) __Verify_String(assertion, message) + #endif + + #ifndef ncheck + #define ncheck(assertion) __nCheck(assertion) + #endif + + #ifndef ncheck_string + #define ncheck_string(assertion, message) __nCheck_String(assertion, message) + #endif + + #ifndef nrequire + #define nrequire(assertion, exceptionLabel) __nRequire(assertion, exceptionLabel) + #endif + + #ifndef nrequire_action + #define nrequire_action(assertion, exceptionLabel, action) __nRequire_Action(assertion, exceptionLabel, action) + #endif + + #ifndef nrequire_action_quiet + #define nrequire_action_quiet(assertion, exceptionLabel, action) __nRequire_Action_Quiet(assertion, exceptionLabel, action) + #endif + + #ifndef nrequire_action_string + #define nrequire_action_string(assertion, exceptionLabel, action, message) __nRequire_Action_String(assertion, exceptionLabel, action, message) + #endif + + #ifndef nrequire_quiet + #define nrequire_quiet(assertion, exceptionLabel) __nRequire_Quiet(assertion, exceptionLabel) + #endif + + #ifndef nrequire_string + #define nrequire_string(assertion, exceptionLabel, string) __nRequire_String(assertion, exceptionLabel, string) + #endif + + #ifndef nverify + #define nverify(assertion) __nVerify(assertion) + #endif + + #ifndef nverify_string + #define nverify_string(assertion, message) __nVerify_String(assertion, message) + #endif + + #ifndef require_action_quiet + #define require_action_quiet(assertion, exceptionLabel, action) __Require_Action_Quiet(assertion, exceptionLabel, action) + #endif + + #ifndef require_noerr_action_quiet + #define require_noerr_action_quiet(errorCode, exceptionLabel, action) __Require_noErr_Action_Quiet(errorCode, exceptionLabel, action) + #endif + + #ifndef require_noerr_quiet + #define require_noerr_quiet(errorCode, exceptionLabel) __Require_noErr_Quiet(errorCode, exceptionLabel) + #endif + + #ifndef require_quiet + #define require_quiet(assertion, exceptionLabel) __Require_Quiet(assertion, exceptionLabel) + #endif + + #ifndef check_compile_time + #define check_compile_time( expr ) __Check_Compile_Time( expr ) + #endif + + #ifndef debug_string + #define debug_string(message) __Debug_String(message) + #endif + +#endif /* ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES */ + + +#endif /* __ASSERTMACROS__ */ diff --git a/EXTERNAL_HEADERS/Availability.h b/EXTERNAL_HEADERS/Availability.h index 5c6ccf781..8fb99382d 100644 --- a/EXTERNAL_HEADERS/Availability.h +++ b/EXTERNAL_HEADERS/Availability.h @@ -29,12 +29,12 @@ were first available; and, if applicable, the OS version in which they became deprecated. - The desktop Mac OS X and the iPhone OS X each have different version numbers. + The desktop Mac OS X and iOS each have different version numbers. The __OSX_AVAILABLE_STARTING() macro allows you to specify both the desktop - and phone OS version numbers. For instance: + and iOS version numbers. For instance: __OSX_AVAILABLE_STARTING(__MAC_10_2,__IPHONE_2_0) means the function/method was first available on Mac OS X 10.2 on the desktop - and first available in OS X 2.0 on the iPhone. + and first available in iOS 2.0 on the iPhone. If a function is available on one platform, but not the other a _NA (not applicable) parameter is used. For instance: @@ -50,15 +50,15 @@ as well as the OS version in which it became deprecated. For instance: __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0,__MAC_10_5,__IPHONE_NA,__IPHONE_NA) means that the function/method was introduced in Mac OS X 10.0, then - became deprecated beginning in Mac OS X 10.5. On the iPhone the function + became deprecated beginning in Mac OS X 10.5. On iOS the function has never been available. For these macros to function properly, a program must specify the OS version range it is targeting. The min OS version is specified as an option to the compiler: - -mmacosx-version-min=10.x when building for Mac OS X, and -miphoneos-version-min=x.x + -mmacosx-version-min=10.x when building for Mac OS X, and -miphoneos-version-min=y.z when building for the iPhone. The upper bound for the OS version is rarely needed, - but it can be set on the command line via: -D__MAC_OS_X_VERSION_MAX_ALLOWED=10xx for - Mac OS X and __IPHONE_OS_VERSION_MAX_ALLOWED = 1xxx for iPhone. + but it can be set on the command line via: -D__MAC_OS_X_VERSION_MAX_ALLOWED=10x0 for + Mac OS X and __IPHONE_OS_VERSION_MAX_ALLOWED = y0z00 for iOS. Examples: @@ -125,6 +125,7 @@ #define __MAC_10_6 1060 #define __MAC_10_7 1070 #define __MAC_10_8 1080 +#define __MAC_10_9 1090 #define __MAC_NA 9999 /* not available */ #define __IPHONE_2_0 20000 @@ -139,6 +140,9 @@ #define __IPHONE_4_3 40300 #define __IPHONE_5_0 50000 #define __IPHONE_5_1 50100 +#define __IPHONE_6_0 60000 +#define __IPHONE_6_1 60100 +#define __IPHONE_7_0 70000 #define __IPHONE_NA 99999 /* not available */ #include @@ -148,15 +152,20 @@ #define __OSX_AVAILABLE_STARTING(_osx, _ios) __AVAILABILITY_INTERNAL##_ios #define __OSX_AVAILABLE_BUT_DEPRECATED(_osxIntro, _osxDep, _iosIntro, _iosDep) \ __AVAILABILITY_INTERNAL##_iosIntro##_DEP##_iosDep + #define __OSX_AVAILABLE_BUT_DEPRECATED_MSG(_osxIntro, _osxDep, _iosIntro, _iosDep, _msg) \ + __AVAILABILITY_INTERNAL##_iosIntro##_DEP##_iosDep##_MSG(_msg) #elif defined(__MAC_OS_X_VERSION_MIN_REQUIRED) #define __OSX_AVAILABLE_STARTING(_osx, _ios) __AVAILABILITY_INTERNAL##_osx #define __OSX_AVAILABLE_BUT_DEPRECATED(_osxIntro, _osxDep, _iosIntro, _iosDep) \ __AVAILABILITY_INTERNAL##_osxIntro##_DEP##_osxDep + #define __OSX_AVAILABLE_BUT_DEPRECATED_MSG(_osxIntro, _osxDep, _iosIntro, _iosDep, _msg) \ + __AVAILABILITY_INTERNAL##_osxIntro##_DEP##_osxDep##_MSG(_msg) #else #define __OSX_AVAILABLE_STARTING(_osx, _ios) - #define __OSX_AVAILABLE_BUT_DEPRECATED(_osxIntro, _osxDep, _iosIntro, _iosDep) + #define __OSX_AVAILABLE_BUT_DEPRECATED(_osxIntro, _osxDep, _iosIntro, _iosDep) + #define __OSX_AVAILABLE_BUT_DEPRECATED_MSG(_osxIntro, _osxDep, _iosIntro, _iosDep, _msg) #endif diff --git a/EXTERNAL_HEADERS/AvailabilityInternal.h b/EXTERNAL_HEADERS/AvailabilityInternal.h index d94e55d08..dc8a30747 100644 --- a/EXTERNAL_HEADERS/AvailabilityInternal.h +++ b/EXTERNAL_HEADERS/AvailabilityInternal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011 by Apple Inc.. All rights reserved. + * Copyright (c) 2007-2012 by Apple Inc.. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -39,23 +39,26 @@ #endif #endif -#ifdef __IPHONE_OS_VERSION_MIN_REQUIRED - /* don't use visibility attribute for iPhoneOS */ - #define __AVAILABILITY_INTERNAL_DEPRECATED __attribute__((deprecated)) - #define __AVAILABILITY_INTERNAL_UNAVAILABLE __attribute__((unavailable)) - #define __AVAILABILITY_INTERNAL_WEAK_IMPORT __attribute__((weak_import)) - #define __AVAILABILITY_INTERNAL_REGULAR +#define __AVAILABILITY_INTERNAL_DEPRECATED __attribute__((deprecated)) +#ifdef __has_feature + #if __has_feature(attribute_deprecated_with_message) + #define __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) __attribute__((deprecated(_msg))) + #else + #define __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) __attribute__((deprecated)) + #endif +#elif defined(__GNUC__) && ((__GNUC__ >= 5) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5))) + #define __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) __attribute__((deprecated(_msg))) #else - #define __AVAILABILITY_INTERNAL_DEPRECATED __attribute__((deprecated,visibility("default"))) - #define __AVAILABILITY_INTERNAL_UNAVAILABLE __attribute__((unavailable,visibility("default"))) - #define __AVAILABILITY_INTERNAL_WEAK_IMPORT __attribute__((weak_import,visibility("default"))) - #define __AVAILABILITY_INTERNAL_REGULAR __attribute__((visibility("default"))) + #define __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) __attribute__((deprecated)) #endif +#define __AVAILABILITY_INTERNAL_UNAVAILABLE __attribute__((unavailable)) +#define __AVAILABILITY_INTERNAL_WEAK_IMPORT __attribute__((weak_import)) +#define __AVAILABILITY_INTERNAL_REGULAR #ifdef __IPHONE_OS_VERSION_MIN_REQUIRED /* make sure a default max version is set */ #ifndef __IPHONE_OS_VERSION_MAX_ALLOWED - #define __IPHONE_OS_VERSION_MAX_ALLOWED __IPHONE_5_1 + #define __IPHONE_OS_VERSION_MAX_ALLOWED __IPHONE_7_0 #endif /* make sure a valid min is set */ #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_0 @@ -63,966 +66,3548 @@ #define __IPHONE_OS_VERSION_MIN_REQUIRED __IPHONE_2_0 #endif - #ifdef __has_attribute + #if defined(__has_attribute) && defined(__has_feature) #if __has_attribute(availability) /* use better attributes if possible */ #define __AVAILABILITY_INTERNAL__IPHONE_2_0 __attribute__((availability(ios,introduced=2.0))) #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_0 __attribute__((availability(ios,introduced=2.0,deprecated=2.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_0_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=2.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_0_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=2.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_1 __attribute__((availability(ios,introduced=2.0,deprecated=2.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_1_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=2.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_1_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=2.1))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_2 __attribute__((availability(ios,introduced=2.0,deprecated=2.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_2_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=2.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_2_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=2.2))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0 __attribute__((availability(ios,introduced=2.0,deprecated=3.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=3.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=3.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1 __attribute__((availability(ios,introduced=2.0,deprecated=3.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=3.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=3.1))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2 __attribute__((availability(ios,introduced=2.0,deprecated=3.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=3.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=3.2))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0 __attribute__((availability(ios,introduced=2.0,deprecated=4.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=4.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=4.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1 __attribute__((availability(ios,introduced=2.0,deprecated=4.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=4.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=4.1))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2 __attribute__((availability(ios,introduced=2.0,deprecated=4.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=4.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=4.2))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3 __attribute__((availability(ios,introduced=2.0,deprecated=4.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=4.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=4.3))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __attribute__((availability(ios,introduced=2.0,deprecated=5.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=5.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=5.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __attribute__((availability(ios,introduced=2.0,deprecated=5.1))) - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=2.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=5.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=5.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0 __attribute__((availability(ios,introduced=2.0,deprecated=6.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=6.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=6.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1 __attribute__((availability(ios,introduced=2.0,deprecated=6.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=6.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=6.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0 __attribute__((availability(ios,introduced=2.0,deprecated=7.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=7.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=2.0,deprecated=7.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=2.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=2.0))) #define __AVAILABILITY_INTERNAL__IPHONE_2_1 __attribute__((availability(ios,introduced=2.1))) #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_1 __attribute__((availability(ios,introduced=2.1,deprecated=2.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_1_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=2.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_1_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=2.1))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_2 __attribute__((availability(ios,introduced=2.1,deprecated=2.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_2_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=2.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_2_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=2.2))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0 __attribute__((availability(ios,introduced=2.1,deprecated=3.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=3.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=3.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1 __attribute__((availability(ios,introduced=2.1,deprecated=3.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=3.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=3.1))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2 __attribute__((availability(ios,introduced=2.1,deprecated=3.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=3.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=3.2))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0 __attribute__((availability(ios,introduced=2.1,deprecated=4.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=4.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=4.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1 __attribute__((availability(ios,introduced=2.1,deprecated=4.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=4.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=4.1))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2 __attribute__((availability(ios,introduced=2.1,deprecated=4.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=4.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=4.2))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3 __attribute__((availability(ios,introduced=2.1,deprecated=4.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=4.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=4.3))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __attribute__((availability(ios,introduced=2.1,deprecated=5.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=5.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=5.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __attribute__((availability(ios,introduced=2.1,deprecated=5.1))) - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=2.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=5.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=5.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0 __attribute__((availability(ios,introduced=2.1,deprecated=6.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=6.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=6.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1 __attribute__((availability(ios,introduced=2.1,deprecated=6.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=6.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=6.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0 __attribute__((availability(ios,introduced=2.1,deprecated=7.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=7.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=2.1,deprecated=7.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=2.1))) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=2.1))) #define __AVAILABILITY_INTERNAL__IPHONE_2_2 __attribute__((availability(ios,introduced=2.2))) #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_2_2 __attribute__((availability(ios,introduced=2.2,deprecated=2.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_2_2_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=2.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_2_2_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=2.2))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0 __attribute__((availability(ios,introduced=2.2,deprecated=3.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=3.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=3.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1 __attribute__((availability(ios,introduced=2.2,deprecated=3.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=3.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=3.1))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2 __attribute__((availability(ios,introduced=2.2,deprecated=3.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=3.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=3.2))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0 __attribute__((availability(ios,introduced=2.2,deprecated=4.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=4.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=4.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1 __attribute__((availability(ios,introduced=2.2,deprecated=4.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=4.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=4.1))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2 __attribute__((availability(ios,introduced=2.2,deprecated=4.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=4.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=4.2))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3 __attribute__((availability(ios,introduced=2.2,deprecated=4.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=4.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=4.3))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __attribute__((availability(ios,introduced=2.2,deprecated=5.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=5.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=5.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __attribute__((availability(ios,introduced=2.2,deprecated=5.1))) - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_NA __attribute__((availability(ios,introduced=2.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=5.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=5.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0 __attribute__((availability(ios,introduced=2.2,deprecated=6.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=6.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=6.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1 __attribute__((availability(ios,introduced=2.2,deprecated=6.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=6.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=6.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0 __attribute__((availability(ios,introduced=2.2,deprecated=7.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=7.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=2.2,deprecated=7.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_NA __attribute__((availability(ios,introduced=2.2))) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=2.2))) #define __AVAILABILITY_INTERNAL__IPHONE_3_0 __attribute__((availability(ios,introduced=3.0))) #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0 __attribute__((availability(ios,introduced=3.0,deprecated=3.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=3.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=3.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1 __attribute__((availability(ios,introduced=3.0,deprecated=3.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=3.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=3.1))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2 __attribute__((availability(ios,introduced=3.0,deprecated=3.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=3.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=3.2))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0 __attribute__((availability(ios,introduced=3.0,deprecated=4.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=4.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=4.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1 __attribute__((availability(ios,introduced=3.0,deprecated=4.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=4.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=4.1))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2 __attribute__((availability(ios,introduced=3.0,deprecated=4.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=4.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=4.2))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3 __attribute__((availability(ios,introduced=3.0,deprecated=4.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=4.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=4.3))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __attribute__((availability(ios,introduced=3.0,deprecated=5.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=5.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=5.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __attribute__((availability(ios,introduced=3.0,deprecated=5.1))) - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=3.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=5.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=5.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0 __attribute__((availability(ios,introduced=3.0,deprecated=6.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=6.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=6.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1 __attribute__((availability(ios,introduced=3.0,deprecated=6.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=6.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=6.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0 __attribute__((availability(ios,introduced=3.0,deprecated=7.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=7.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=3.0,deprecated=7.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=3.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=3.0))) #define __AVAILABILITY_INTERNAL__IPHONE_3_1 __attribute__((availability(ios,introduced=3.1))) #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1 __attribute__((availability(ios,introduced=3.1,deprecated=3.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=3.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=3.1))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2 __attribute__((availability(ios,introduced=3.1,deprecated=3.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=3.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=3.2))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0 __attribute__((availability(ios,introduced=3.1,deprecated=4.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=4.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=4.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1 __attribute__((availability(ios,introduced=3.1,deprecated=4.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=4.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=4.1))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2 __attribute__((availability(ios,introduced=3.1,deprecated=4.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=4.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=4.2))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3 __attribute__((availability(ios,introduced=3.1,deprecated=4.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=4.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=4.3))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __attribute__((availability(ios,introduced=3.1,deprecated=5.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=5.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=5.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __attribute__((availability(ios,introduced=3.1,deprecated=5.1))) - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=3.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=5.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=5.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0 __attribute__((availability(ios,introduced=3.1,deprecated=6.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=6.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=6.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1 __attribute__((availability(ios,introduced=3.1,deprecated=6.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=6.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=6.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0 __attribute__((availability(ios,introduced=3.1,deprecated=7.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=7.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=3.1,deprecated=7.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=3.1))) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=3.1))) #define __AVAILABILITY_INTERNAL__IPHONE_3_2 __attribute__((availability(ios,introduced=3.2))) #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2 __attribute__((availability(ios,introduced=3.2,deprecated=3.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=3.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=3.2))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0 __attribute__((availability(ios,introduced=3.2,deprecated=4.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=4.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=4.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1 __attribute__((availability(ios,introduced=3.2,deprecated=4.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=4.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=4.1))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2 __attribute__((availability(ios,introduced=3.2,deprecated=4.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=4.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=4.2))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3 __attribute__((availability(ios,introduced=3.2,deprecated=4.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=4.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=4.3))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __attribute__((availability(ios,introduced=3.2,deprecated=5.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=5.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=5.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __attribute__((availability(ios,introduced=3.2,deprecated=5.1))) - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_NA __attribute__((availability(ios,introduced=3.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=5.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=5.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0 __attribute__((availability(ios,introduced=3.2,deprecated=6.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=6.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=6.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1 __attribute__((availability(ios,introduced=3.2,deprecated=6.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=6.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=6.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0 __attribute__((availability(ios,introduced=3.2,deprecated=7.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=7.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=3.2,deprecated=7.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_NA __attribute__((availability(ios,introduced=3.2))) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=3.2))) #define __AVAILABILITY_INTERNAL__IPHONE_4_0 __attribute__((availability(ios,introduced=4.0))) #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0 __attribute__((availability(ios,introduced=4.0,deprecated=4.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=4.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=4.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1 __attribute__((availability(ios,introduced=4.0,deprecated=4.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=4.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=4.1))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2 __attribute__((availability(ios,introduced=4.0,deprecated=4.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=4.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=4.2))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3 __attribute__((availability(ios,introduced=4.0,deprecated=4.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=4.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=4.3))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __attribute__((availability(ios,introduced=4.0,deprecated=5.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=5.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=5.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __attribute__((availability(ios,introduced=4.0,deprecated=5.1))) - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=4.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=5.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=5.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0 __attribute__((availability(ios,introduced=4.0,deprecated=6.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=6.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=6.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1 __attribute__((availability(ios,introduced=4.0,deprecated=6.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=6.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=6.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0 __attribute__((availability(ios,introduced=4.0,deprecated=7.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=7.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=4.0,deprecated=7.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=4.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=4.0))) #define __AVAILABILITY_INTERNAL__IPHONE_4_1 __attribute__((availability(ios,introduced=4.1))) #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1 __attribute__((availability(ios,introduced=4.1,deprecated=4.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=4.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=4.1))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2 __attribute__((availability(ios,introduced=4.1,deprecated=4.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=4.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=4.2))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3 __attribute__((availability(ios,introduced=4.1,deprecated=4.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=4.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=4.3))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __attribute__((availability(ios,introduced=4.1,deprecated=5.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=5.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=5.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __attribute__((availability(ios,introduced=4.1,deprecated=5.1))) - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=4.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=5.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=5.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0 __attribute__((availability(ios,introduced=4.1,deprecated=6.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=6.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=6.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1 __attribute__((availability(ios,introduced=4.1,deprecated=6.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=6.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=6.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0 __attribute__((availability(ios,introduced=4.1,deprecated=7.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=7.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=4.1,deprecated=7.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=4.1))) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=4.1))) #define __AVAILABILITY_INTERNAL__IPHONE_4_2 __attribute__((availability(ios,introduced=4.2))) #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2 __attribute__((availability(ios,introduced=4.2,deprecated=4.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=4.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=4.2))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3 __attribute__((availability(ios,introduced=4.2,deprecated=4.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=4.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=4.3))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __attribute__((availability(ios,introduced=4.2,deprecated=5.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=5.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=5.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __attribute__((availability(ios,introduced=4.2,deprecated=5.1))) - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_NA __attribute__((availability(ios,introduced=4.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=5.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=5.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0 __attribute__((availability(ios,introduced=4.2,deprecated=6.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=6.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=6.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1 __attribute__((availability(ios,introduced=4.2,deprecated=6.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=6.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=6.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0 __attribute__((availability(ios,introduced=4.2,deprecated=7.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=7.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=4.2,deprecated=7.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_NA __attribute__((availability(ios,introduced=4.2))) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=4.2))) #define __AVAILABILITY_INTERNAL__IPHONE_4_3 __attribute__((availability(ios,introduced=4.3))) #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3 __attribute__((availability(ios,introduced=4.3,deprecated=4.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=4.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=4.3))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __attribute__((availability(ios,introduced=4.3,deprecated=5.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=5.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=5.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __attribute__((availability(ios,introduced=4.3,deprecated=5.1))) - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_NA __attribute__((availability(ios,introduced=4.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=5.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=5.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0 __attribute__((availability(ios,introduced=4.3,deprecated=6.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=6.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=6.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1 __attribute__((availability(ios,introduced=4.3,deprecated=6.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=6.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=6.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0 __attribute__((availability(ios,introduced=4.3,deprecated=7.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=7.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=4.3,deprecated=7.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_NA __attribute__((availability(ios,introduced=4.3))) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=4.3))) #define __AVAILABILITY_INTERNAL__IPHONE_5_0 __attribute__((availability(ios,introduced=5.0))) #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __attribute__((availability(ios,introduced=5.0,deprecated=5.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=5.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=5.0))) + #endif #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __attribute__((availability(ios,introduced=5.0,deprecated=5.1))) - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=5.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=5.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=5.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0 __attribute__((availability(ios,introduced=5.0,deprecated=6.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=6.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=6.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1 __attribute__((availability(ios,introduced=5.0,deprecated=6.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=6.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=6.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0 __attribute__((availability(ios,introduced=5.0,deprecated=7.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=7.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=5.0,deprecated=7.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=5.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=5.0))) #define __AVAILABILITY_INTERNAL__IPHONE_5_1 __attribute__((availability(ios,introduced=5.1))) #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __attribute__((availability(ios,introduced=5.1,deprecated=5.1))) - #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=5.1))) - #define __AVAILABILITY_INTERNAL__IPHONE_NA __attribute__((availability(ios,unavailable))) - #define __AVAILABILITY_INTERNAL__IPHONE_NA_DEP__IPHONE_NA __attribute__((availability(ios,unavailable))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=5.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=5.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0 __attribute__((availability(ios,introduced=5.1,deprecated=6.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=6.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=6.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1 __attribute__((availability(ios,introduced=5.1,deprecated=6.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=6.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=6.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0 __attribute__((availability(ios,introduced=5.1,deprecated=7.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=7.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=5.1,deprecated=7.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=5.1))) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=5.1))) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0 __attribute__((availability(ios,introduced=6.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0 __attribute__((availability(ios,introduced=6.0,deprecated=6.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=6.0,deprecated=6.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0_MSG(_msg) __attribute__((availability(ios,introduced=6.0,deprecated=6.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1 __attribute__((availability(ios,introduced=6.0,deprecated=6.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=6.0,deprecated=6.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=6.0,deprecated=6.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0 __attribute__((availability(ios,introduced=6.0,deprecated=7.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=6.0,deprecated=7.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=6.0,deprecated=7.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=6.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=6.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_6_1 __attribute__((availability(ios,introduced=6.1))) + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1 __attribute__((availability(ios,introduced=6.1,deprecated=6.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=6.1,deprecated=6.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1_MSG(_msg) __attribute__((availability(ios,introduced=6.1,deprecated=6.1))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0 __attribute__((availability(ios,introduced=6.1,deprecated=7.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=6.1,deprecated=7.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=6.1,deprecated=7.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_NA __attribute__((availability(ios,introduced=6.1))) + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=6.1))) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0 __attribute__((availability(ios,introduced=7.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0 __attribute__((availability(ios,introduced=7.0,deprecated=7.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=7.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __attribute__((availability(ios,introduced=7.0,deprecated=7.0))) + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_NA __attribute__((availability(ios,introduced=7.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,introduced=7.0))) + #define __AVAILABILITY_INTERNAL__IPHONE_NA __attribute__((availability(ios,unavailable))) + #define __AVAILABILITY_INTERNAL__IPHONE_NA_DEP__IPHONE_NA __attribute__((availability(ios,unavailable))) + #define __AVAILABILITY_INTERNAL__IPHONE_NA_DEP__IPHONE_NA_MSG(_msg) __attribute__((availability(ios,unavailable))) #endif #endif #ifndef __AVAILABILITY_INTERNAL__IPHONE_2_0 /* set up old style internal macros (up to 2.0) */ #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_2_0 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0 __AVAILABILITY_INTERNAL_UNAVAILABLE + #define __AVAILABILITY_INTERNAL__IPHONE_2_0 __AVAILABILITY_INTERNAL_UNAVAILABLE #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_0 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #define __AVAILABILITY_INTERNAL__IPHONE_2_0 __AVAILABILITY_INTERNAL_WEAK_IMPORT #else - #define __AVAILABILITY_INTERNAL__IPHONE_2_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0 __AVAILABILITY_INTERNAL_REGULAR #endif - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_2_0 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_2_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) /* set up old style internal macros (up to 2.1) */ #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_1 __AVAILABILITY_INTERNAL_UNAVAILABLE + #define __AVAILABILITY_INTERNAL__IPHONE_2_1 __AVAILABILITY_INTERNAL_UNAVAILABLE #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_1 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #define __AVAILABILITY_INTERNAL__IPHONE_2_1 __AVAILABILITY_INTERNAL_WEAK_IMPORT #else - #define __AVAILABILITY_INTERNAL__IPHONE_2_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1 __AVAILABILITY_INTERNAL_REGULAR #endif - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 #else - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) #endif /* set up old style internal macros (up to 2.2) */ #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_2 __AVAILABILITY_INTERNAL_UNAVAILABLE + #define __AVAILABILITY_INTERNAL__IPHONE_2_2 __AVAILABILITY_INTERNAL_UNAVAILABLE #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_2 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #define __AVAILABILITY_INTERNAL__IPHONE_2_2 __AVAILABILITY_INTERNAL_WEAK_IMPORT #else - #define __AVAILABILITY_INTERNAL__IPHONE_2_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2 __AVAILABILITY_INTERNAL_REGULAR #endif - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_2_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_2_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_2_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_2_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_2_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_2_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 #else - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_2 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_2 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_2_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_2_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_2_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_2_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_2_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) #endif /* set up old style internal macros (up to 3.0) */ #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_3_0 __AVAILABILITY_INTERNAL_UNAVAILABLE + #define __AVAILABILITY_INTERNAL__IPHONE_3_0 __AVAILABILITY_INTERNAL_UNAVAILABLE #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_3_0 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #define __AVAILABILITY_INTERNAL__IPHONE_3_0 __AVAILABILITY_INTERNAL_WEAK_IMPORT #else - #define __AVAILABILITY_INTERNAL__IPHONE_3_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0 __AVAILABILITY_INTERNAL_REGULAR #endif - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 #else - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) #endif /* set up old style internal macros (up to 3.1) */ #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1 __AVAILABILITY_INTERNAL_UNAVAILABLE + #define __AVAILABILITY_INTERNAL__IPHONE_3_1 __AVAILABILITY_INTERNAL_UNAVAILABLE #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #define __AVAILABILITY_INTERNAL__IPHONE_3_1 __AVAILABILITY_INTERNAL_WEAK_IMPORT #else - #define __AVAILABILITY_INTERNAL__IPHONE_3_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1 __AVAILABILITY_INTERNAL_REGULAR #endif - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 #else - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) #endif /* set up old style internal macros (up to 3.2) */ #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2 __AVAILABILITY_INTERNAL_UNAVAILABLE + #define __AVAILABILITY_INTERNAL__IPHONE_3_2 __AVAILABILITY_INTERNAL_UNAVAILABLE #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #define __AVAILABILITY_INTERNAL__IPHONE_3_2 __AVAILABILITY_INTERNAL_WEAK_IMPORT #else - #define __AVAILABILITY_INTERNAL__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR #endif - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 #else - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_3_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) #endif /* set up old style internal macros (up to 4.0) */ #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0 __AVAILABILITY_INTERNAL_UNAVAILABLE + #define __AVAILABILITY_INTERNAL__IPHONE_4_0 __AVAILABILITY_INTERNAL_UNAVAILABLE #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #define __AVAILABILITY_INTERNAL__IPHONE_4_0 __AVAILABILITY_INTERNAL_WEAK_IMPORT #else - #define __AVAILABILITY_INTERNAL__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR #endif - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 #else - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) #endif /* set up old style internal macros (up to 4.1) */ #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_4_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1 __AVAILABILITY_INTERNAL_UNAVAILABLE + #define __AVAILABILITY_INTERNAL__IPHONE_4_1 __AVAILABILITY_INTERNAL_UNAVAILABLE #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #define __AVAILABILITY_INTERNAL__IPHONE_4_1 __AVAILABILITY_INTERNAL_WEAK_IMPORT #else - #define __AVAILABILITY_INTERNAL__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR #endif - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 #else - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) #endif /* set up old style internal macros (up to 4.2) */ #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_4_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2 __AVAILABILITY_INTERNAL_UNAVAILABLE + #define __AVAILABILITY_INTERNAL__IPHONE_4_2 __AVAILABILITY_INTERNAL_UNAVAILABLE #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #define __AVAILABILITY_INTERNAL__IPHONE_4_2 __AVAILABILITY_INTERNAL_WEAK_IMPORT #else - #define __AVAILABILITY_INTERNAL__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR #endif - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_4_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 #else - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) #endif /* set up old style internal macros (up to 4.3) */ #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_4_3 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3 __AVAILABILITY_INTERNAL_UNAVAILABLE + #define __AVAILABILITY_INTERNAL__IPHONE_4_3 __AVAILABILITY_INTERNAL_UNAVAILABLE #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #define __AVAILABILITY_INTERNAL__IPHONE_4_3 __AVAILABILITY_INTERNAL_WEAK_IMPORT #else - #define __AVAILABILITY_INTERNAL__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR #endif - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_4_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_4_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_4_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 #else - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_4_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) #endif /* set up old style internal macros (up to 5.0) */ #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_5_0 - #define __AVAILABILITY_INTERNAL__IPHONE_5_0 __AVAILABILITY_INTERNAL_UNAVAILABLE + #define __AVAILABILITY_INTERNAL__IPHONE_5_0 __AVAILABILITY_INTERNAL_UNAVAILABLE #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_0 - #define __AVAILABILITY_INTERNAL__IPHONE_5_0 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #define __AVAILABILITY_INTERNAL__IPHONE_5_0 __AVAILABILITY_INTERNAL_WEAK_IMPORT #else - #define __AVAILABILITY_INTERNAL__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR #endif - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_0 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 #else - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) #endif /* set up old style internal macros (up to 5.1) */ #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_5_1 - #define __AVAILABILITY_INTERNAL__IPHONE_5_1 __AVAILABILITY_INTERNAL_UNAVAILABLE + #define __AVAILABILITY_INTERNAL__IPHONE_5_1 __AVAILABILITY_INTERNAL_UNAVAILABLE + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #else + #define __AVAILABILITY_INTERNAL__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_1 - #define __AVAILABILITY_INTERNAL__IPHONE_5_1 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #endif + /* set up old style internal macros (up to 6.0) */ + #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0 __AVAILABILITY_INTERNAL_UNAVAILABLE + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0 __AVAILABILITY_INTERNAL_WEAK_IMPORT #else - #define __AVAILABILITY_INTERNAL__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR #endif - #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 - #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 - #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 - #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 - #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 - #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 - #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 - #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 - #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 - #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_0 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_5_0 - #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_1 - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_REGULAR - #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 #else - #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_5_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #endif + /* set up old style internal macros (up to 6.1) */ + #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1 __AVAILABILITY_INTERNAL_UNAVAILABLE + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #else + #define __AVAILABILITY_INTERNAL__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_6_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #endif + /* set up old style internal macros (up to 7.0) */ + #if __IPHONE_OS_VERSION_MAX_ALLOWED < __IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0 __AVAILABILITY_INTERNAL_UNAVAILABLE + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #else + #define __AVAILABILITY_INTERNAL__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #endif + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_NA __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #if __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_2_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_2_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_3_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_3_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_2 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_2 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_4_3 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_4_3 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_5_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_5_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_5_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_6_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_0 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_6_1 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_6_1 + #elif __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_REGULAR + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL__IPHONE_7_0 + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL__IPHONE_7_0 + #else + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_2_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_3_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_2_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_4_3_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_5_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_6_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_6_1_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__IPHONE_7_0_DEP__IPHONE_7_0_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) #endif /* set up internal macros (n/a) */ - #define __AVAILABILITY_INTERNAL__IPHONE_NA __AVAILABILITY_INTERNAL_UNAVAILABLE - #define __AVAILABILITY_INTERNAL__IPHONE_NA_DEP__IPHONE_NA __AVAILABILITY_INTERNAL_UNAVAILABLE + #define __AVAILABILITY_INTERNAL__IPHONE_NA __AVAILABILITY_INTERNAL_UNAVAILABLE + #define __AVAILABILITY_INTERNAL__IPHONE_NA_DEP__IPHONE_NA __AVAILABILITY_INTERNAL_UNAVAILABLE + #define __AVAILABILITY_INTERNAL__IPHONE_NA_DEP__IPHONE_NA_MSG(_msg) __AVAILABILITY_INTERNAL_UNAVAILABLE #endif #elif defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) @@ -1030,82 +3615,387 @@ #define __MAC_OS_X_VERSION_MIN_REQUIRED __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ /* make sure a default max version is set */ #ifndef __MAC_OS_X_VERSION_MAX_ALLOWED - #define __MAC_OS_X_VERSION_MAX_ALLOWED __MAC_10_8 + #define __MAC_OS_X_VERSION_MAX_ALLOWED __MAC_10_9 #endif - #ifdef __has_attribute + #if defined(__has_attribute) && defined(__has_feature) #if __has_attribute(availability) /* use better attributes if possible */ #define __AVAILABILITY_INTERNAL__MAC_10_0 __attribute__((availability(macosx,introduced=10.0))) #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_0 __attribute__((availability(macosx,introduced=10.0,deprecated=10.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_0_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.0,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_0_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.0))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_1 __attribute__((availability(macosx,introduced=10.0,deprecated=10.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_1_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_1_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.1))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_2 __attribute__((availability(macosx,introduced=10.0,deprecated=10.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.2))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_3 __attribute__((availability(macosx,introduced=10.0,deprecated=10.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.3))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_4 __attribute__((availability(macosx,introduced=10.0,deprecated=10.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_4_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_4_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.4))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_5 __attribute__((availability(macosx,introduced=10.0,deprecated=10.5))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_5_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.5,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_5_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.5))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_6 __attribute__((availability(macosx,introduced=10.0,deprecated=10.6))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_6_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.6,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_6_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.6))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_7 __attribute__((availability(macosx,introduced=10.0,deprecated=10.7))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_7_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.7,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_7_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.7))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_8 __attribute__((availability(macosx,introduced=10.0,deprecated=10.8))) - #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.0))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_8_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.8,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_8_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.8))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_9 __attribute__((availability(macosx,introduced=10.0,deprecated=10.9))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.9,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.0,deprecated=10.9))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.0))) + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.0))) #define __AVAILABILITY_INTERNAL__MAC_10_1 __attribute__((availability(macosx,introduced=10.1))) #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_1 __attribute__((availability(macosx,introduced=10.1,deprecated=10.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_1_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.1,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_1_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.1))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_2 __attribute__((availability(macosx,introduced=10.1,deprecated=10.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.2))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_3 __attribute__((availability(macosx,introduced=10.1,deprecated=10.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.3))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_4 __attribute__((availability(macosx,introduced=10.1,deprecated=10.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_4_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_4_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.4))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_5 __attribute__((availability(macosx,introduced=10.1,deprecated=10.5))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_5_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.5,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_5_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.5))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_6 __attribute__((availability(macosx,introduced=10.1,deprecated=10.6))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_6_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.6,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_6_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.6))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_7 __attribute__((availability(macosx,introduced=10.1,deprecated=10.7))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_7_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.7,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_7_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.7))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_8 __attribute__((availability(macosx,introduced=10.1,deprecated=10.8))) - #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.1))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_8_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.8,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_8_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.8))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_9 __attribute__((availability(macosx,introduced=10.1,deprecated=10.9))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.9,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.1,deprecated=10.9))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.1))) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.1))) #define __AVAILABILITY_INTERNAL__MAC_10_2 __attribute__((availability(macosx,introduced=10.2))) #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_2 __attribute__((availability(macosx,introduced=10.2,deprecated=10.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.2,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_2_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.2))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_3 __attribute__((availability(macosx,introduced=10.2,deprecated=10.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.3))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_4 __attribute__((availability(macosx,introduced=10.2,deprecated=10.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_4_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_4_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.4))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_5 __attribute__((availability(macosx,introduced=10.2,deprecated=10.5))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_5_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.5,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_5_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.5))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_6 __attribute__((availability(macosx,introduced=10.2,deprecated=10.6))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_6_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.6,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_6_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.6))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_7 __attribute__((availability(macosx,introduced=10.2,deprecated=10.7))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_7_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.7,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_7_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.7))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_8 __attribute__((availability(macosx,introduced=10.2,deprecated=10.8))) - #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.2))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_8_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.8,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_8_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.8))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_9 __attribute__((availability(macosx,introduced=10.2,deprecated=10.9))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.9,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.2,deprecated=10.9))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.2))) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.2))) #define __AVAILABILITY_INTERNAL__MAC_10_3 __attribute__((availability(macosx,introduced=10.3))) #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_3 __attribute__((availability(macosx,introduced=10.3,deprecated=10.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.3,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_3_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.3))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_4 __attribute__((availability(macosx,introduced=10.3,deprecated=10.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_4_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_4_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.4))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_5 __attribute__((availability(macosx,introduced=10.3,deprecated=10.5))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_5_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.5,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_5_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.5))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_6 __attribute__((availability(macosx,introduced=10.3,deprecated=10.6))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_6_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.6,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_6_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.6))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_7 __attribute__((availability(macosx,introduced=10.3,deprecated=10.7))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_7_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.7,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_7_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.7))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_8 __attribute__((availability(macosx,introduced=10.3,deprecated=10.8))) - #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.3))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_8_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.8,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_8_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.8))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_9 __attribute__((availability(macosx,introduced=10.3,deprecated=10.9))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.9,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.3,deprecated=10.9))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.3))) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.3))) #define __AVAILABILITY_INTERNAL__MAC_10_4 __attribute__((availability(macosx,introduced=10.4))) #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_4 __attribute__((availability(macosx,introduced=10.4,deprecated=10.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_4_MSG(_msg) __attribute__((availability(macosx,introduced=10.4,deprecated=10.4,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_4_MSG(_msg) __attribute__((availability(macosx,introduced=10.4,deprecated=10.4))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_5 __attribute__((availability(macosx,introduced=10.4,deprecated=10.5))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_5_MSG(_msg) __attribute__((availability(macosx,introduced=10.4,deprecated=10.5,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_5_MSG(_msg) __attribute__((availability(macosx,introduced=10.4,deprecated=10.5))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_6 __attribute__((availability(macosx,introduced=10.4,deprecated=10.6))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_6_MSG(_msg) __attribute__((availability(macosx,introduced=10.4,deprecated=10.6,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_6_MSG(_msg) __attribute__((availability(macosx,introduced=10.4,deprecated=10.6))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_7 __attribute__((availability(macosx,introduced=10.4,deprecated=10.7))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_7_MSG(_msg) __attribute__((availability(macosx,introduced=10.4,deprecated=10.7,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_7_MSG(_msg) __attribute__((availability(macosx,introduced=10.4,deprecated=10.7))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_8 __attribute__((availability(macosx,introduced=10.4,deprecated=10.8))) - #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.4))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_8_MSG(_msg) __attribute__((availability(macosx,introduced=10.4,deprecated=10.8,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_8_MSG(_msg) __attribute__((availability(macosx,introduced=10.4,deprecated=10.8))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_9 __attribute__((availability(macosx,introduced=10.4,deprecated=10.9))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.4,deprecated=10.9,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.4,deprecated=10.9))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.4))) + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.4))) #define __AVAILABILITY_INTERNAL__MAC_10_5 __attribute__((availability(macosx,introduced=10.5))) #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_5 __attribute__((availability(macosx,introduced=10.5,deprecated=10.5))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_5_MSG(_msg) __attribute__((availability(macosx,introduced=10.5,deprecated=10.5,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_5_MSG(_msg) __attribute__((availability(macosx,introduced=10.5,deprecated=10.5))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_6 __attribute__((availability(macosx,introduced=10.5,deprecated=10.6))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_6_MSG(_msg) __attribute__((availability(macosx,introduced=10.5,deprecated=10.6,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_6_MSG(_msg) __attribute__((availability(macosx,introduced=10.5,deprecated=10.6))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_7 __attribute__((availability(macosx,introduced=10.5,deprecated=10.7))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_7_MSG(_msg) __attribute__((availability(macosx,introduced=10.5,deprecated=10.7,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_7_MSG(_msg) __attribute__((availability(macosx,introduced=10.5,deprecated=10.7))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_8 __attribute__((availability(macosx,introduced=10.5,deprecated=10.8))) - #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.5))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_8_MSG(_msg) __attribute__((availability(macosx,introduced=10.5,deprecated=10.8,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_8_MSG(_msg) __attribute__((availability(macosx,introduced=10.5,deprecated=10.8))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_9 __attribute__((availability(macosx,introduced=10.5,deprecated=10.9))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.5,deprecated=10.9,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.5,deprecated=10.9))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.5))) + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.5))) #define __AVAILABILITY_INTERNAL__MAC_10_6 __attribute__((availability(macosx,introduced=10.6))) #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_6 __attribute__((availability(macosx,introduced=10.6,deprecated=10.6))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_6_MSG(_msg) __attribute__((availability(macosx,introduced=10.6,deprecated=10.6,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_6_MSG(_msg) __attribute__((availability(macosx,introduced=10.6,deprecated=10.6))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_7 __attribute__((availability(macosx,introduced=10.6,deprecated=10.7))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_7_MSG(_msg) __attribute__((availability(macosx,introduced=10.6,deprecated=10.7,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_7_MSG(_msg) __attribute__((availability(macosx,introduced=10.6,deprecated=10.7))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_8 __attribute__((availability(macosx,introduced=10.6,deprecated=10.8))) - #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.6))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_8_MSG(_msg) __attribute__((availability(macosx,introduced=10.6,deprecated=10.8,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_8_MSG(_msg) __attribute__((availability(macosx,introduced=10.6,deprecated=10.8))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_9 __attribute__((availability(macosx,introduced=10.6,deprecated=10.9))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.6,deprecated=10.9,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.6,deprecated=10.9))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.6))) + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.6))) #define __AVAILABILITY_INTERNAL__MAC_10_7 __attribute__((availability(macosx,introduced=10.7))) #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_7 __attribute__((availability(macosx,introduced=10.7,deprecated=10.7))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_7_MSG(_msg) __attribute__((availability(macosx,introduced=10.7,deprecated=10.7,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_7_MSG(_msg) __attribute__((availability(macosx,introduced=10.7,deprecated=10.7))) + #endif #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_8 __attribute__((availability(macosx,introduced=10.7,deprecated=10.8))) - #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.7))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_8_MSG(_msg) __attribute__((availability(macosx,introduced=10.7,deprecated=10.8,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_8_MSG(_msg) __attribute__((availability(macosx,introduced=10.7,deprecated=10.8))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_9 __attribute__((availability(macosx,introduced=10.7,deprecated=10.9))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.7,deprecated=10.9,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.7,deprecated=10.9))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.7))) + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.7))) #define __AVAILABILITY_INTERNAL__MAC_10_8 __attribute__((availability(macosx,introduced=10.8))) #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_8 __attribute__((availability(macosx,introduced=10.8,deprecated=10.8))) - #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.8))) - #define __AVAILABILITY_INTERNAL__MAC_NA __attribute__((availability(macosx,unavailable))) - #define __AVAILABILITY_INTERNAL__MAC_NA_DEP__MAC_NA __attribute__((availability(macosx,unavailable))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_8_MSG(_msg) __attribute__((availability(macosx,introduced=10.8,deprecated=10.8,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_8_MSG(_msg) __attribute__((availability(macosx,introduced=10.8,deprecated=10.8))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_9 __attribute__((availability(macosx,introduced=10.8,deprecated=10.9))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.8,deprecated=10.9,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.8,deprecated=10.9))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.8))) + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.8))) + #define __AVAILABILITY_INTERNAL__MAC_10_9 __attribute__((availability(macosx,introduced=10.9))) + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_9 __attribute__((availability(macosx,introduced=10.9,deprecated=10.9))) + #if __has_feature(attribute_availability_with_message) + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.9,deprecated=10.9,message=_msg))) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_9_MSG(_msg) __attribute__((availability(macosx,introduced=10.9,deprecated=10.9))) + #endif + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,introduced=10.9))) + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_NA __attribute__((availability(macosx,introduced=10.9))) + #define __AVAILABILITY_INTERNAL__MAC_NA __attribute__((availability(macosx,unavailable))) + #define __AVAILABILITY_INTERNAL__MAC_NA_DEP__MAC_NA __attribute__((availability(macosx,unavailable))) + #define __AVAILABILITY_INTERNAL__MAC_NA_DEP__MAC_NA_MSG(_msg) __attribute__((availability(macosx,unavailable))) #endif #endif #ifndef __AVAILABILITY_INTERNAL__MAC_10_0 /* use old style attributes */ + #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_9 + #define __AVAILABILITY_INTERNAL__MAC_10_9 __AVAILABILITY_INTERNAL_UNAVAILABLE + #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_9 + #define __AVAILABILITY_INTERNAL__MAC_10_9 __AVAILABILITY_INTERNAL_WEAK_IMPORT + #else + #define __AVAILABILITY_INTERNAL__MAC_10_9 __AVAILABILITY_INTERNAL_REGULAR + #endif #if __MAC_OS_X_VERSION_MAX_ALLOWED < __MAC_10_8 #define __AVAILABILITY_INTERNAL__MAC_10_8 __AVAILABILITY_INTERNAL_UNAVAILABLE #elif __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_8 @@ -1171,127 +4061,270 @@ #endif #define __AVAILABILITY_INTERNAL__MAC_NA __AVAILABILITY_INTERNAL_UNAVAILABLE #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_1 - #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_1 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_1 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_1_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) #else - #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_1 __AVAILABILITY_INTERNAL__MAC_10_0 - #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_1 __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_1 __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_1_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_1 __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_1_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_1 #endif #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_2 - #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_2 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_2 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_2 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_2_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) #else - #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_2 __AVAILABILITY_INTERNAL__MAC_10_0 - #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_2 __AVAILABILITY_INTERNAL__MAC_10_1 - #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_2 __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_2 __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_2_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_2 __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_2_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_2 __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_2_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_2 #endif #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_3 - #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_3 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_3 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_3 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_3 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_3_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) #else - #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_3 __AVAILABILITY_INTERNAL__MAC_10_0 - #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_3 __AVAILABILITY_INTERNAL__MAC_10_1 - #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_3 __AVAILABILITY_INTERNAL__MAC_10_2 - #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_3 __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_3 __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_3_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_3 __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_3_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_3 __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_3_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_3 __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_3_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_3 #endif #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_4 - #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_4 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_4 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_4 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_4 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_4 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_4_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) #else - #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_4 __AVAILABILITY_INTERNAL__MAC_10_0 - #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_4 __AVAILABILITY_INTERNAL__MAC_10_1 - #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_4 __AVAILABILITY_INTERNAL__MAC_10_2 - #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_4 __AVAILABILITY_INTERNAL__MAC_10_3 - #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_4 __AVAILABILITY_INTERNAL__MAC_10_4 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_4 __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_4_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_4 __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_4_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_4 __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_4_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_4 __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_4_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_4 __AVAILABILITY_INTERNAL__MAC_10_4 + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_4_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_4 #endif #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_5 - #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_5 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_5 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_5 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_5 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_5 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_5 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_5 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_5_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_5 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_5_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_5 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_5_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_5 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_5_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_5 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_5_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_5 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_5_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) #else - #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_5 __AVAILABILITY_INTERNAL__MAC_10_0 - #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_5 __AVAILABILITY_INTERNAL__MAC_10_1 - #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_5 __AVAILABILITY_INTERNAL__MAC_10_2 - #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_5 __AVAILABILITY_INTERNAL__MAC_10_3 - #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_5 __AVAILABILITY_INTERNAL__MAC_10_4 - #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_5 __AVAILABILITY_INTERNAL__MAC_10_5 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_5 __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_5_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_5 __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_5_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_5 __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_5_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_5 __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_5_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_5 __AVAILABILITY_INTERNAL__MAC_10_4 + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_5_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_4 + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_5 __AVAILABILITY_INTERNAL__MAC_10_5 + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_5_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_5 #endif #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_6 - #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_6 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_6 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_6 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_6 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_6 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_6 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_6 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_6 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_6_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_6 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_6_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_6 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_6_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_6 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_6_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_6 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_6_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_6 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_6_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_6 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_6_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) #else - #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_6 __AVAILABILITY_INTERNAL__MAC_10_0 - #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_6 __AVAILABILITY_INTERNAL__MAC_10_1 - #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_6 __AVAILABILITY_INTERNAL__MAC_10_2 - #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_6 __AVAILABILITY_INTERNAL__MAC_10_3 - #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_6 __AVAILABILITY_INTERNAL__MAC_10_4 - #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_6 __AVAILABILITY_INTERNAL__MAC_10_5 - #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_6 __AVAILABILITY_INTERNAL__MAC_10_6 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_6 __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_6_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_6 __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_6_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_6 __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_6_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_6 __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_6_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_6 __AVAILABILITY_INTERNAL__MAC_10_4 + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_6_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_4 + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_6 __AVAILABILITY_INTERNAL__MAC_10_5 + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_6_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_5 + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_6 __AVAILABILITY_INTERNAL__MAC_10_6 + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_6_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_6 #endif #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_7 - #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_7 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_7 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_7 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_7 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_7 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_7 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_7 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_7 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_7 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_7_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_7 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_7_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_7 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_7_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_7 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_7_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_7 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_7_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_7 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_7_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_7 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_7_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_7 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_7_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) #else - #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_7 __AVAILABILITY_INTERNAL__MAC_10_0 - #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_7 __AVAILABILITY_INTERNAL__MAC_10_1 - #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_7 __AVAILABILITY_INTERNAL__MAC_10_2 - #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_7 __AVAILABILITY_INTERNAL__MAC_10_3 - #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_7 __AVAILABILITY_INTERNAL__MAC_10_4 - #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_7 __AVAILABILITY_INTERNAL__MAC_10_5 - #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_7 __AVAILABILITY_INTERNAL__MAC_10_6 - #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_7 __AVAILABILITY_INTERNAL__MAC_10_7 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_7 __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_7_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_7 __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_7_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_7 __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_7_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_7 __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_7_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_7 __AVAILABILITY_INTERNAL__MAC_10_4 + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_7_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_4 + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_7 __AVAILABILITY_INTERNAL__MAC_10_5 + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_7_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_5 + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_7 __AVAILABILITY_INTERNAL__MAC_10_6 + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_7_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_6 + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_7 __AVAILABILITY_INTERNAL__MAC_10_7 + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_7_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_7 #endif #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_8 - #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_8 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_8 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_8 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_8 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_8 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_8 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_8 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_8 __AVAILABILITY_INTERNAL_DEPRECATED - #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_8 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_8 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_8_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_8 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_8_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_8 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_8_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_8 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_8_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_8 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_8_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_8 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_8_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_8 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_8_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_8 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_8_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_8 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_8_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #else + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_8 __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_8_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_8 __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_8_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_8 __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_8_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_8 __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_8_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_8 __AVAILABILITY_INTERNAL__MAC_10_4 + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_8_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_4 + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_8 __AVAILABILITY_INTERNAL__MAC_10_5 + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_8_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_5 + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_8 __AVAILABILITY_INTERNAL__MAC_10_6 + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_8_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_6 + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_8 __AVAILABILITY_INTERNAL__MAC_10_7 + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_8_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_7 + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_8 __AVAILABILITY_INTERNAL__MAC_10_8 + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_8_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_8 + #endif + #if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_9 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_9 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_9_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_9 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_9_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_9 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_9_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_9 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_9_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_9 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_9_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_9 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_9_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_9 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_9_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_9 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_9_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_9 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_9_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_9 __AVAILABILITY_INTERNAL_DEPRECATED + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_9_MSG(_msg) __AVAILABILITY_INTERNAL_DEPRECATED_MSG(_msg) #else - #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_8 __AVAILABILITY_INTERNAL__MAC_10_0 - #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_8 __AVAILABILITY_INTERNAL__MAC_10_1 - #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_8 __AVAILABILITY_INTERNAL__MAC_10_2 - #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_8 __AVAILABILITY_INTERNAL__MAC_10_3 - #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_8 __AVAILABILITY_INTERNAL__MAC_10_4 - #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_8 __AVAILABILITY_INTERNAL__MAC_10_5 - #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_8 __AVAILABILITY_INTERNAL__MAC_10_6 - #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_8 __AVAILABILITY_INTERNAL__MAC_10_7 - #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_8 __AVAILABILITY_INTERNAL__MAC_10_8 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_9 __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_10_9_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_9 __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_10_9_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_9 __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_10_9_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_9 __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_10_9_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_9 __AVAILABILITY_INTERNAL__MAC_10_4 + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_10_9_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_4 + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_9 __AVAILABILITY_INTERNAL__MAC_10_5 + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_10_9_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_5 + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_9 __AVAILABILITY_INTERNAL__MAC_10_6 + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_10_9_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_6 + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_9 __AVAILABILITY_INTERNAL__MAC_10_7 + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_10_9_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_7 + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_9 __AVAILABILITY_INTERNAL__MAC_10_8 + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_10_9_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_8 + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_9 __AVAILABILITY_INTERNAL__MAC_10_9 + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_10_9_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_9 #endif #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_NA __AVAILABILITY_INTERNAL__MAC_10_0 + #define __AVAILABILITY_INTERNAL__MAC_10_0_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_0 #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_NA __AVAILABILITY_INTERNAL__MAC_10_1 + #define __AVAILABILITY_INTERNAL__MAC_10_1_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_1 #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_NA __AVAILABILITY_INTERNAL__MAC_10_2 + #define __AVAILABILITY_INTERNAL__MAC_10_2_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_2 #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_NA __AVAILABILITY_INTERNAL__MAC_10_3 + #define __AVAILABILITY_INTERNAL__MAC_10_3_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_3 #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_NA __AVAILABILITY_INTERNAL__MAC_10_4 + #define __AVAILABILITY_INTERNAL__MAC_10_4_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_4 #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_NA __AVAILABILITY_INTERNAL__MAC_10_5 + #define __AVAILABILITY_INTERNAL__MAC_10_5_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_5 #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_NA __AVAILABILITY_INTERNAL__MAC_10_6 + #define __AVAILABILITY_INTERNAL__MAC_10_6_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_6 #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_NA __AVAILABILITY_INTERNAL__MAC_10_7 + #define __AVAILABILITY_INTERNAL__MAC_10_7_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_7 #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_NA __AVAILABILITY_INTERNAL__MAC_10_8 + #define __AVAILABILITY_INTERNAL__MAC_10_8_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_8 + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_NA __AVAILABILITY_INTERNAL__MAC_10_9 + #define __AVAILABILITY_INTERNAL__MAC_10_9_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL__MAC_10_9 #define __AVAILABILITY_INTERNAL__MAC_NA_DEP__MAC_NA __AVAILABILITY_INTERNAL_UNAVAILABLE + #define __AVAILABILITY_INTERNAL__MAC_NA_DEP__MAC_NA_MSG(_msg) __AVAILABILITY_INTERNAL_UNAVAILABLE #endif #endif diff --git a/EXTERNAL_HEADERS/AvailabilityMacros.h b/EXTERNAL_HEADERS/AvailabilityMacros.h index 25587d849..d569c3cd8 100644 --- a/EXTERNAL_HEADERS/AvailabilityMacros.h +++ b/EXTERNAL_HEADERS/AvailabilityMacros.h @@ -98,6 +98,7 @@ #define MAC_OS_X_VERSION_10_6 1060 #define MAC_OS_X_VERSION_10_7 1070 #define MAC_OS_X_VERSION_10_8 1080 +#define MAC_OS_X_VERSION_10_9 1090 /* * If min OS not specified, assume 10.1 for ppc and 10.4 for all others @@ -121,13 +122,13 @@ #endif /* - * if max OS not specified, assume larger of (10.8, min) + * if max OS not specified, assume larger of (10.9, min) */ #ifndef MAC_OS_X_VERSION_MAX_ALLOWED - #if MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_8 + #if MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_9 #define MAC_OS_X_VERSION_MAX_ALLOWED MAC_OS_X_VERSION_MIN_REQUIRED #else - #define MAC_OS_X_VERSION_MAX_ALLOWED MAC_OS_X_VERSION_10_8 + #define MAC_OS_X_VERSION_MAX_ALLOWED MAC_OS_X_VERSION_10_9 #endif #endif @@ -155,10 +156,28 @@ /* * only certain compilers support __attribute__((deprecated)) */ -#if defined(__GNUC__) && ((__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1))) - #define DEPRECATED_ATTRIBUTE __attribute__((deprecated)) +#if defined(__has_feature) && defined(__has_attribute) + #if __has_attribute(deprecated) + #define DEPRECATED_ATTRIBUTE __attribute__((deprecated)) + #if __has_feature(attribute_deprecated_with_message) + #define DEPRECATED_MSG_ATTRIBUTE(s) __attribute__((deprecated(s))) + #else + #define DEPRECATED_MSG_ATTRIBUTE(s) __attribute__((deprecated)) + #endif + #else + #define DEPRECATED_ATTRIBUTE + #define DEPRECATED_MSG_ATTRIBUTE(s) + #endif +#elif defined(__GNUC__) && ((__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1))) + #define DEPRECATED_ATTRIBUTE __attribute__((deprecated)) + #if (__GNUC__ >= 5) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5)) + #define DEPRECATED_MSG_ATTRIBUTE(s) __attribute__((deprecated(s))) + #else + #define DEPRECATED_MSG_ATTRIBUTE(s) __attribute__((deprecated)) + #endif #else #define DEPRECATED_ATTRIBUTE + #define DEPRECATED_MSG_ATTRIBUTE(s) #endif /* @@ -193,12 +212,23 @@ */ #define DEPRECATED_IN_MAC_OS_X_VERSION_10_0_AND_LATER DEPRECATED_ATTRIBUTE +#ifndef __AVAILABILITY_MACROS_USES_AVAILABILITY + #ifdef __has_attribute + #if __has_attribute(availability) + #include + #define __AVAILABILITY_MACROS_USES_AVAILABILITY 1 + #endif + #endif +#endif + /* * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER * * Used on declarations introduced in Mac OS X 10.1 */ -#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_1 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_1, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_1 #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER UNAVAILABLE_ATTRIBUTE #elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_1 #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER WEAK_IMPORT_ATTRIBUTE @@ -212,7 +242,9 @@ * Used on declarations introduced in Mac OS X 10.1, * and deprecated in Mac OS X 10.1 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_1 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_1, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_1 #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER @@ -224,7 +256,9 @@ * Used on declarations introduced in Mac OS X 10.0, * but later deprecated in Mac OS X 10.1 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_1 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_1 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_1, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_1 #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_1 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_1 AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER @@ -235,7 +269,9 @@ * * Used on types deprecated in Mac OS X 10.1 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_1 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_1_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_1, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_1 #define DEPRECATED_IN_MAC_OS_X_VERSION_10_1_AND_LATER DEPRECATED_ATTRIBUTE #else #define DEPRECATED_IN_MAC_OS_X_VERSION_10_1_AND_LATER @@ -247,7 +283,9 @@ * * Used on declarations introduced in Mac OS X 10.2 */ -#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_2 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_2, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_2 #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER UNAVAILABLE_ATTRIBUTE #elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_2 #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER WEAK_IMPORT_ATTRIBUTE @@ -261,7 +299,9 @@ * Used on declarations introduced in Mac OS X 10.2, * and deprecated in Mac OS X 10.2 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_2 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_2, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_2 #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER @@ -273,7 +313,9 @@ * Used on declarations introduced in Mac OS X 10.0, * but later deprecated in Mac OS X 10.2 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_2 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_2 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_2, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_2 #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_2 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_2 AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER @@ -285,7 +327,9 @@ * Used on declarations introduced in Mac OS X 10.1, * but later deprecated in Mac OS X 10.2 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_2 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_2 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_2, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_2 #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_2 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_2 AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER @@ -296,7 +340,9 @@ * * Used on types deprecated in Mac OS X 10.2 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_2 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_2_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_2, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_2 #define DEPRECATED_IN_MAC_OS_X_VERSION_10_2_AND_LATER DEPRECATED_ATTRIBUTE #else #define DEPRECATED_IN_MAC_OS_X_VERSION_10_2_AND_LATER @@ -308,7 +354,9 @@ * * Used on declarations introduced in Mac OS X 10.3 */ -#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_3 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_3, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_3 #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER UNAVAILABLE_ATTRIBUTE #elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_3 #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER WEAK_IMPORT_ATTRIBUTE @@ -322,7 +370,9 @@ * Used on declarations introduced in Mac OS X 10.3, * and deprecated in Mac OS X 10.3 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_3, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3 #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER @@ -334,7 +384,9 @@ * Used on declarations introduced in Mac OS X 10.0, * but later deprecated in Mac OS X 10.3 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_3, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3 #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3 AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER @@ -346,7 +398,9 @@ * Used on declarations introduced in Mac OS X 10.1, * but later deprecated in Mac OS X 10.3 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_3, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3 #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3 AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER @@ -358,7 +412,9 @@ * Used on declarations introduced in Mac OS X 10.2, * but later deprecated in Mac OS X 10.3 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_3, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3 #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_3 AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER @@ -369,7 +425,9 @@ * * Used on types deprecated in Mac OS X 10.3 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_3_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_3, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_3 #define DEPRECATED_IN_MAC_OS_X_VERSION_10_3_AND_LATER DEPRECATED_ATTRIBUTE #else #define DEPRECATED_IN_MAC_OS_X_VERSION_10_3_AND_LATER @@ -381,7 +439,9 @@ * * Used on declarations introduced in Mac OS X 10.4 */ -#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_4 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_4, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_4 #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER UNAVAILABLE_ATTRIBUTE #elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_4 #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER WEAK_IMPORT_ATTRIBUTE @@ -395,7 +455,9 @@ * Used on declarations introduced in Mac OS X 10.4, * and deprecated in Mac OS X 10.4 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_4, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4 #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER @@ -407,7 +469,9 @@ * Used on declarations introduced in Mac OS X 10.0, * but later deprecated in Mac OS X 10.4 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_4, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4 #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER @@ -419,7 +483,9 @@ * Used on declarations introduced in Mac OS X 10.1, * but later deprecated in Mac OS X 10.4 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_4, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4 #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER @@ -431,7 +497,9 @@ * Used on declarations introduced in Mac OS X 10.2, * but later deprecated in Mac OS X 10.4 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_4, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4 #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER @@ -443,7 +511,9 @@ * Used on declarations introduced in Mac OS X 10.3, * but later deprecated in Mac OS X 10.4 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_4, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4 #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_4 AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER @@ -454,7 +524,9 @@ * * Used on types deprecated in Mac OS X 10.4 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_4_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_4, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_4 #define DEPRECATED_IN_MAC_OS_X_VERSION_10_4_AND_LATER DEPRECATED_ATTRIBUTE #else #define DEPRECATED_IN_MAC_OS_X_VERSION_10_4_AND_LATER @@ -466,7 +538,9 @@ * * Used on declarations introduced in Mac OS X 10.5 */ -#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_5 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_5 #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER UNAVAILABLE_ATTRIBUTE #elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_5 #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER WEAK_IMPORT_ATTRIBUTE @@ -480,7 +554,9 @@ * Used on declarations introduced in Mac OS X 10.5, * and deprecated in Mac OS X 10.5 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5, __MAC_10_5, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER @@ -492,7 +568,9 @@ * Used on declarations introduced in Mac OS X 10.0, * but later deprecated in Mac OS X 10.5 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_5, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER @@ -504,7 +582,9 @@ * Used on declarations introduced in Mac OS X 10.1, * but later deprecated in Mac OS X 10.5 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_5, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER @@ -516,7 +596,9 @@ * Used on declarations introduced in Mac OS X 10.2, * but later deprecated in Mac OS X 10.5 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_5, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER @@ -528,7 +610,9 @@ * Used on declarations introduced in Mac OS X 10.3, * but later deprecated in Mac OS X 10.5 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_5, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER @@ -540,7 +624,9 @@ * Used on declarations introduced in Mac OS X 10.4, * but later deprecated in Mac OS X 10.5 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_5, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_5 AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER @@ -551,7 +637,9 @@ * * Used on types deprecated in Mac OS X 10.5 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_5_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_5, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 #define DEPRECATED_IN_MAC_OS_X_VERSION_10_5_AND_LATER DEPRECATED_ATTRIBUTE #else #define DEPRECATED_IN_MAC_OS_X_VERSION_10_5_AND_LATER @@ -563,7 +651,9 @@ * * Used on declarations introduced in Mac OS X 10.6 */ -#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_6 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_6, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_6 #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER UNAVAILABLE_ATTRIBUTE #elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_6 #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER WEAK_IMPORT_ATTRIBUTE @@ -577,7 +667,9 @@ * Used on declarations introduced in Mac OS X 10.6, * and deprecated in Mac OS X 10.6 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_6, __MAC_10_6, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER @@ -589,7 +681,9 @@ * Used on declarations introduced in Mac OS X 10.0, * but later deprecated in Mac OS X 10.6 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_6, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER @@ -601,7 +695,9 @@ * Used on declarations introduced in Mac OS X 10.1, * but later deprecated in Mac OS X 10.6 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_6, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER @@ -613,7 +709,9 @@ * Used on declarations introduced in Mac OS X 10.2, * but later deprecated in Mac OS X 10.6 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_6, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER @@ -625,7 +723,9 @@ * Used on declarations introduced in Mac OS X 10.3, * but later deprecated in Mac OS X 10.6 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_6, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER @@ -637,7 +737,9 @@ * Used on declarations introduced in Mac OS X 10.4, * but later deprecated in Mac OS X 10.6 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_6, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER @@ -649,7 +751,9 @@ * Used on declarations introduced in Mac OS X 10.5, * but later deprecated in Mac OS X 10.6 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5, __MAC_10_6, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_6 AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER @@ -660,7 +764,9 @@ * * Used on types deprecated in Mac OS X 10.6 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_6_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_6, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_6 #define DEPRECATED_IN_MAC_OS_X_VERSION_10_6_AND_LATER DEPRECATED_ATTRIBUTE #else #define DEPRECATED_IN_MAC_OS_X_VERSION_10_6_AND_LATER @@ -672,7 +778,9 @@ * * Used on declarations introduced in Mac OS X 10.7 */ -#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_7 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_7, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_7 #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER UNAVAILABLE_ATTRIBUTE #elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_7 #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER WEAK_IMPORT_ATTRIBUTE @@ -686,7 +794,9 @@ * Used on declarations introduced in Mac OS X 10.7, * and deprecated in Mac OS X 10.7 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_7, __MAC_10_7, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER @@ -698,7 +808,9 @@ * Used on declarations introduced in Mac OS X 10.0, * but later deprecated in Mac OS X 10.7 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_7, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER @@ -710,7 +822,9 @@ * Used on declarations introduced in Mac OS X 10.1, * but later deprecated in Mac OS X 10.7 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_7, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER @@ -722,7 +836,9 @@ * Used on declarations introduced in Mac OS X 10.2, * but later deprecated in Mac OS X 10.7 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_7, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER @@ -734,7 +850,9 @@ * Used on declarations introduced in Mac OS X 10.3, * but later deprecated in Mac OS X 10.7 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_7, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER @@ -746,7 +864,9 @@ * Used on declarations introduced in Mac OS X 10.4, * but later deprecated in Mac OS X 10.7 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_7, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER @@ -758,7 +878,9 @@ * Used on declarations introduced in Mac OS X 10.5, * but later deprecated in Mac OS X 10.7 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5, __MAC_10_7, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER @@ -770,7 +892,9 @@ * Used on declarations introduced in Mac OS X 10.6, * but later deprecated in Mac OS X 10.7 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_6, __MAC_10_7, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER @@ -781,7 +905,9 @@ * * Used on types deprecated in Mac OS X 10.7 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_7_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_7, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7 #define DEPRECATED_IN_MAC_OS_X_VERSION_10_7_AND_LATER DEPRECATED_ATTRIBUTE #else #define DEPRECATED_IN_MAC_OS_X_VERSION_10_7_AND_LATER @@ -793,7 +919,9 @@ * * Used on declarations introduced in Mac OS X 10.8 */ -#if MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_8 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_8, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_8 #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER UNAVAILABLE_ATTRIBUTE #elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_8 #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER WEAK_IMPORT_ATTRIBUTE @@ -807,7 +935,9 @@ * Used on declarations introduced in Mac OS X 10.8, * and deprecated in Mac OS X 10.8 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_8, __MAC_10_8, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER @@ -819,7 +949,9 @@ * Used on declarations introduced in Mac OS X 10.0, * but later deprecated in Mac OS X 10.8 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_8, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER @@ -831,7 +963,9 @@ * Used on declarations introduced in Mac OS X 10.1, * but later deprecated in Mac OS X 10.8 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_8, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER @@ -843,7 +977,9 @@ * Used on declarations introduced in Mac OS X 10.2, * but later deprecated in Mac OS X 10.8 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_8, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER @@ -855,7 +991,9 @@ * Used on declarations introduced in Mac OS X 10.3, * but later deprecated in Mac OS X 10.8 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_8, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER @@ -867,7 +1005,9 @@ * Used on declarations introduced in Mac OS X 10.4, * but later deprecated in Mac OS X 10.8 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_8, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER @@ -879,7 +1019,9 @@ * Used on declarations introduced in Mac OS X 10.5, * but later deprecated in Mac OS X 10.8 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5, __MAC_10_8, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER @@ -891,7 +1033,9 @@ * Used on declarations introduced in Mac OS X 10.6, * but later deprecated in Mac OS X 10.8 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_6, __MAC_10_8, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER @@ -903,7 +1047,9 @@ * Used on declarations introduced in Mac OS X 10.7, * but later deprecated in Mac OS X 10.8 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_7, __MAC_10_8, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 DEPRECATED_ATTRIBUTE #else #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER @@ -914,13 +1060,184 @@ * * Used on types deprecated in Mac OS X 10.8 */ -#if MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_8_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_8, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_8 #define DEPRECATED_IN_MAC_OS_X_VERSION_10_8_AND_LATER DEPRECATED_ATTRIBUTE #else #define DEPRECATED_IN_MAC_OS_X_VERSION_10_8_AND_LATER #endif +/* + * AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER + * + * Used on declarations introduced in Mac OS X 10.9 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MAX_ALLOWED < MAC_OS_X_VERSION_10_9 + #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER UNAVAILABLE_ATTRIBUTE +#elif MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_9 + #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER WEAK_IMPORT_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED + * + * Used on declarations introduced in Mac OS X 10.9, + * and deprecated in Mac OS X 10.9 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_9, __MAC_10_9, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 + #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER_BUT_DEPRECATED AVAILABLE_MAC_OS_X_VERSION_10_9_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 + * + * Used on declarations introduced in Mac OS X 10.0, + * but later deprecated in Mac OS X 10.9 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_9, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 + * + * Used on declarations introduced in Mac OS X 10.1, + * but later deprecated in Mac OS X 10.9 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_1, __MAC_10_9, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 AVAILABLE_MAC_OS_X_VERSION_10_1_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 + * + * Used on declarations introduced in Mac OS X 10.2, + * but later deprecated in Mac OS X 10.9 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_2, __MAC_10_9, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 AVAILABLE_MAC_OS_X_VERSION_10_2_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 + * + * Used on declarations introduced in Mac OS X 10.3, + * but later deprecated in Mac OS X 10.9 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_3, __MAC_10_9, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 + * + * Used on declarations introduced in Mac OS X 10.4, + * but later deprecated in Mac OS X 10.9 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_4, __MAC_10_9, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 AVAILABLE_MAC_OS_X_VERSION_10_4_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 + * + * Used on declarations introduced in Mac OS X 10.5, + * but later deprecated in Mac OS X 10.9 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5, __MAC_10_9, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 + * + * Used on declarations introduced in Mac OS X 10.6, + * but later deprecated in Mac OS X 10.9 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_6, __MAC_10_9, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 + * + * Used on declarations introduced in Mac OS X 10.7, + * but later deprecated in Mac OS X 10.9 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_7, __MAC_10_9, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 + #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER +#endif + +/* + * AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 + * + * Used on declarations introduced in Mac OS X 10.8, + * but later deprecated in Mac OS X 10.9 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_8, __MAC_10_9, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 + #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 DEPRECATED_ATTRIBUTE +#else + #define AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_9 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER +#endif + +/* + * DEPRECATED_IN_MAC_OS_X_VERSION_10_9_AND_LATER + * + * Used on types deprecated in Mac OS X 10.9 + */ +#if __AVAILABILITY_MACROS_USES_AVAILABILITY + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_9_AND_LATER __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_9, __IPHONE_4_0, __IPHONE_4_0) +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9 + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_9_AND_LATER DEPRECATED_ATTRIBUTE +#else + #define DEPRECATED_IN_MAC_OS_X_VERSION_10_9_AND_LATER +#endif + + #endif /* __AVAILABILITYMACROS__ */ diff --git a/EXTERNAL_HEADERS/Makefile b/EXTERNAL_HEADERS/Makefile index 61680a394..0eef296a7 100644 --- a/EXTERNAL_HEADERS/Makefile +++ b/EXTERNAL_HEADERS/Makefile @@ -11,9 +11,6 @@ INSTINC_SUBDIRS = \ architecture \ mach-o -INSTINC_SUBDIRS_I386 = \ - architecture - INSTINC_SUBDIRS_X86_64 = \ architecture @@ -25,7 +22,6 @@ EXPORT_FILES = \ Availability.h \ AvailabilityInternal.h \ AvailabilityMacros.h \ - ar.h \ stdarg.h \ stdbool.h \ stdint.h @@ -34,7 +30,11 @@ INSTALL_MI_LIST = INSTALL_MI_DIR = . -EXPORT_MI_LIST = ${EXPORT_FILES} +INSTALL_KF_MI_LIST = ${EXPORT_FILES} + +INSTALL_KF_MI_LCL_LIST = ${EXPORT_FILES} + +EXPORT_MI_LIST = ${EXPORT_FILES} stddef.h EXPORT_MI_DIR = . diff --git a/EXTERNAL_HEADERS/ar.h b/EXTERNAL_HEADERS/ar.h deleted file mode 100644 index c80b50183..000000000 --- a/EXTERNAL_HEADERS/ar.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/*- - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. - * - * This code is derived from software contributed to Berkeley by - * Hugh Smith at The University of Guelph. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)ar.h 8.2 (Berkeley) 1/21/94 - */ -#ifdef KERNEL_PRIVATE - -#ifndef _AR_H_ -#define _AR_H_ - -/* Pre-4BSD archives had these magic numbers in them. */ -#define OARMAG1 0177555 -#define OARMAG2 0177545 - -#define ARMAG "!\n" /* ar "magic number" */ -#define SARMAG 8 /* strlen(ARMAG); */ - -#define AR_EFMT1 "#1/" /* extended format #1 */ - -struct ar_hdr { - char ar_name[16]; /* name */ - char ar_date[12]; /* modification time */ - char ar_uid[6]; /* user id */ - char ar_gid[6]; /* group id */ - char ar_mode[8]; /* octal file permissions */ - char ar_size[10]; /* size in bytes */ -#define ARFMAG "`\n" - char ar_fmag[2]; /* consistency check */ -}; - -#endif /* !_AR_H_ */ -#endif /* KERNEL_PRIVATE */ diff --git a/EXTERNAL_HEADERS/architecture/Makefile b/EXTERNAL_HEADERS/architecture/Makefile index a322a080f..034d13692 100644 --- a/EXTERNAL_HEADERS/architecture/Makefile +++ b/EXTERNAL_HEADERS/architecture/Makefile @@ -9,9 +9,6 @@ include $(MakeInc_def) INSTINC_SUBDIRS = -INSTINC_SUBDIRS_I386 = \ - i386 - INSTINC_SUBDIRS_X86_64 = \ i386 diff --git a/EXTERNAL_HEADERS/architecture/i386/Makefile b/EXTERNAL_HEADERS/architecture/i386/Makefile index 5e5d47fa8..e4c02e150 100644 --- a/EXTERNAL_HEADERS/architecture/i386/Makefile +++ b/EXTERNAL_HEADERS/architecture/i386/Makefile @@ -7,10 +7,6 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS_I386 = - -INSTINC_SUBDIRS_X86_64 = - EXPORT_FILES = \ asm_help.h \ cpu.h \ diff --git a/EXTERNAL_HEADERS/architecture/i386/asm_help.h b/EXTERNAL_HEADERS/architecture/i386/asm_help.h index 5d9ec0e0c..ec278d556 100644 --- a/EXTERNAL_HEADERS/architecture/i386/asm_help.h +++ b/EXTERNAL_HEADERS/architecture/i386/asm_help.h @@ -52,6 +52,11 @@ #define ALIGN \ .align 2, 0x90 +/* Note that ROUND_TO_STACK rounds to Intel's stack alignment requirement, + * but it is not sufficient for the Apple ABI which requires a 16-byte + * aligned stack. Various parts of the OS depend on this requirement, + * including dyld. + */ #define ROUND_TO_STACK(len) \ (((len) + STACK_INCR - 1) / STACK_INCR * STACK_INCR) diff --git a/EXTERNAL_HEADERS/mach-o/Makefile b/EXTERNAL_HEADERS/mach-o/Makefile index e54b58c6d..55ac56a4b 100644 --- a/EXTERNAL_HEADERS/mach-o/Makefile +++ b/EXTERNAL_HEADERS/mach-o/Makefile @@ -7,8 +7,6 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = - EXPORT_FILES = \ fat.h \ loader.h \ diff --git a/EXTERNAL_HEADERS/mach-o/loader.h b/EXTERNAL_HEADERS/mach-o/loader.h index f41664e54..d26ad14e3 100644 --- a/EXTERNAL_HEADERS/mach-o/loader.h +++ b/EXTERNAL_HEADERS/mach-o/loader.h @@ -294,6 +294,7 @@ struct load_command { #define LC_DATA_IN_CODE 0x29 /* table of non-instructions in __text */ #define LC_SOURCE_VERSION 0x2A /* source version used to build binary */ #define LC_DYLIB_CODE_SIGN_DRS 0x2B /* Code signing DRs copied from linked dylibs */ +#define LC_ENCRYPTION_INFO_64 0x2C /* 64-bit encrypted segment information */ /* @@ -1174,6 +1175,21 @@ struct encryption_info_command { 0 means not-encrypted yet */ }; +/* + * The encryption_info_command_64 contains the file offset and size of an + * of an encrypted segment (for use in 64-bit targets). + */ +struct encryption_info_command_64 { + uint32_t cmd; /* LC_ENCRYPTION_INFO_64 */ + uint32_t cmdsize; /* sizeof(struct encryption_info_command_64) */ + uint32_t cryptoff; /* file offset of encrypted range */ + uint32_t cryptsize; /* file size of encrypted range */ + uint32_t cryptid; /* which enryption system, + 0 means not-encrypted yet */ + uint32_t pad; /* padding to make this struct's size a multiple + of 8 bytes */ +}; + /* * The version_min_command contains the min OS version on which this * binary was built to run. @@ -1428,19 +1444,18 @@ struct source_version_command { /* * The LC_DATA_IN_CODE load commands uses a linkedit_data_command * to point to an array of data_in_code_entry entries. Each entry - * describes a range of data in a code section. This load command - * is only used in final linked images. + * describes a range of data in a code section. */ struct data_in_code_entry { uint32_t offset; /* from mach_header to start of data range*/ uint16_t length; /* number of bytes in data range */ uint16_t kind; /* a DICE_KIND_* value */ }; -#define DICE_KIND_DATA 0x0001 /* L$start$data$... label */ -#define DICE_KIND_JUMP_TABLE8 0x0002 /* L$start$jt8$... label */ -#define DICE_KIND_JUMP_TABLE16 0x0003 /* L$start$jt16$... label */ -#define DICE_KIND_JUMP_TABLE32 0x0004 /* L$start$jt32$... label */ -#define DICE_KIND_ABS_JUMP_TABLE32 0x0005 /* L$start$jta32$... label */ +#define DICE_KIND_DATA 0x0001 +#define DICE_KIND_JUMP_TABLE8 0x0002 +#define DICE_KIND_JUMP_TABLE16 0x0003 +#define DICE_KIND_JUMP_TABLE32 0x0004 +#define DICE_KIND_ABS_JUMP_TABLE32 0x0005 diff --git a/EXTERNAL_HEADERS/stdarg.h b/EXTERNAL_HEADERS/stdarg.h index bbbaff93e..2957bf058 100644 --- a/EXTERNAL_HEADERS/stdarg.h +++ b/EXTERNAL_HEADERS/stdarg.h @@ -26,7 +26,10 @@ #ifndef __STDARG_H #define __STDARG_H +#ifndef _VA_LIST typedef __builtin_va_list va_list; +#define _VA_LIST +#endif #define va_start(ap, param) __builtin_va_start(ap, param) #define va_end(ap) __builtin_va_end(ap) #define va_arg(ap, type) __builtin_va_arg(ap, type) @@ -36,7 +39,7 @@ typedef __builtin_va_list va_list; */ #define __va_copy(d,s) __builtin_va_copy(d,s) -#if __STDC_VERSION__ >= 199900L || !defined(__STRICT_ANSI__) +#if __STDC_VERSION__ >= 199900L || __cplusplus >= 201103L || !defined(__STRICT_ANSI__) #define va_copy(dest, src) __builtin_va_copy(dest, src) #endif diff --git a/EXTERNAL_HEADERS/stddef.h b/EXTERNAL_HEADERS/stddef.h new file mode 100644 index 000000000..9e87ee89b --- /dev/null +++ b/EXTERNAL_HEADERS/stddef.h @@ -0,0 +1,64 @@ +/*===---- stddef.h - Basic type definitions --------------------------------=== + * + * Copyright (c) 2008 Eli Friedman + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __STDDEF_H +#define __STDDEF_H + +#ifndef _PTRDIFF_T +#define _PTRDIFF_T +typedef __typeof__(((int*)0)-((int*)0)) ptrdiff_t; +#endif +#ifndef _SIZE_T +#define _SIZE_T +typedef __typeof__(sizeof(int)) size_t; +#endif +#ifndef __cplusplus +#ifndef _WCHAR_T +#define _WCHAR_T +typedef __WCHAR_TYPE__ wchar_t; +#endif +#endif + +#undef NULL +#ifdef __cplusplus +#undef __null // VC++ hack. +#define NULL __null +#else +#define NULL ((void*)0) +#endif + +#define offsetof(t, d) __builtin_offsetof(t, d) + +#endif /* __STDDEF_H */ + +/* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use +__WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */ +#if defined(__need_wint_t) +#if !defined(_WINT_T) +#define _WINT_T +typedef __WINT_TYPE__ wint_t; +#endif /* _WINT_T */ +#undef __need_wint_t +#endif /* __need_wint_t */ diff --git a/EXTERNAL_HEADERS/stdint.h b/EXTERNAL_HEADERS/stdint.h index ca048597a..9d86e8a62 100644 --- a/EXTERNAL_HEADERS/stdint.h +++ b/EXTERNAL_HEADERS/stdint.h @@ -1,28 +1,24 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. - * - * We build on rather than in order to - * minimize the global namespace pollution (i.e., we'd like to define - * *only* those identifiers that the C standard mandates should be - * defined by ). Using means that (at - * least as of January 2001) all of the extra macros that do get - * #defined by #include'ing are in the implementor's - * namespace ("_[A-Z].*" or "__.*"). - * - * The reason that we do #include the relevant ...types.h instead of - * creating several "competing" typedefs is to make header collisions - * less likely during the transition to C99. - * - * Caveat: There are still five extra typedef's defined by doing it - * this way: "u_int{8,16,32,64}_t" and "register_t". Might be - * fixable via pre- and post- #defines, but probably not worth it. + * Copyright (c) 2000-2010 Apple Inc. + * All rights reserved. */ -#ifndef _STDINT_H_ -#define _STDINT_H_ +#ifndef _KERNEL_STDINT_H_ +#define _KERNEL_STDINT_H_ + +#ifndef KERNEL +/* For user-space code that may include this header */ +#include_next +#else /* KERNEL */ #include +#if __LP64__ +#define __WORDSIZE 64 +#else +#define __WORDSIZE 32 +#endif + /* from ISO/IEC 988:1999 spec */ /* 7.18.1.1 Exact-width integer types */ @@ -36,7 +32,7 @@ typedef u_int32_t uint32_t; /* u_int32_t is defined in */ -/* 7.18.1.2 Minumun-width integer types */ +/* 7.18.1.2 Minimum-width integer types */ typedef int8_t int_least8_t; typedef int16_t int_least16_t; typedef int32_t int_least32_t; @@ -58,7 +54,7 @@ typedef uint32_t uint_fast32_t; typedef uint64_t uint_fast64_t; -/* 7.18.1.4 Integer types capable of hgolding object pointers */ +/* 7.18.1.4 Integer types capable of holding object pointers */ /* intptr_t is defined in */ /* uintptr_t is defined in */ @@ -67,16 +63,6 @@ typedef uint64_t uint_fast64_t; typedef long long intmax_t; typedef unsigned long long uintmax_t; - -/* "C++ implementations should define these macros only when - * __STDC_LIMIT_MACROS is defined before is included." - * In other words, if C++, then __STDC_LIMIT_MACROS enables the - * macros below. (Note that there also exists a different enabling - * macro (__STDC_CONSTANT_MACROS) for the last few, below.) - */ -#if (! defined(__cplusplus)) || defined(__STDC_LIMIT_MACROS) - - /* 7.18.2 Limits of specified-width integer types: * These #defines specify the minimum and maximum limits * of each of the types declared above. @@ -84,15 +70,21 @@ typedef unsigned long long uintmax_t; /* 7.18.2.1 Limits of exact-width integer types */ -#define INT8_MIN (-127-1) -#define INT16_MIN (-32767-1) -#define INT32_MIN (-2147483647-1) -#define INT64_MIN (-9223372036854775807LL-1LL) - -#define INT8_MAX +127 -#define INT16_MAX +32767 -#define INT32_MAX +2147483647 -#define INT64_MAX +9223372036854775807LL +#define INT8_MAX 127 +#define INT16_MAX 32767 +#define INT32_MAX 2147483647 +#define INT64_MAX 9223372036854775807LL + +#define INT8_MIN -128 +#define INT16_MIN -32768 + /* + Note: the literal "most negative int" cannot be written in C -- + the rules in the standard (section 6.4.4.1 in C99) will give it + an unsigned type, so INT32_MIN (and the most negative member of + any larger signed type) must be written via a constant expression. + */ +#define INT32_MIN (-INT32_MAX-1) +#define INT64_MIN (-INT64_MAX-1) #define UINT8_MAX 255 #define UINT16_MAX 65535 @@ -132,13 +124,18 @@ typedef unsigned long long uintmax_t; #define UINT_FAST64_MAX UINT64_MAX /* 7.18.2.4 Limits of integer types capable of holding object pointers */ -#if defined(__LP64__) -#define INTPTR_MIN INT64_MIN -#define INTPTR_MAX INT64_MAX -#define UINTPTR_MAX UINT64_MAX + +#if __WORDSIZE == 64 +#define INTPTR_MIN INT64_MIN +#define INTPTR_MAX INT64_MAX #else #define INTPTR_MIN INT32_MIN #define INTPTR_MAX INT32_MAX +#endif + +#if __WORDSIZE == 64 +#define UINTPTR_MAX UINT64_MAX +#else #define UINTPTR_MAX UINT32_MAX #endif @@ -149,48 +146,68 @@ typedef unsigned long long uintmax_t; #define UINTMAX_MAX UINT64_MAX /* 7.18.3 "Other" */ -#if defined(__LP64__) -#define PTRDIFF_MIN INT64_MIN -#define PTRDIFF_MAX INT64_MAX +#if __WORDSIZE == 64 +#define PTRDIFF_MIN INT64_MIN +#define PTRDIFF_MAX INT64_MAX #else #define PTRDIFF_MIN INT32_MIN #define PTRDIFF_MAX INT32_MAX #endif + /* We have no sig_atomic_t yet, so no SIG_ATOMIC_{MIN,MAX}. Should end up being {-127,127} or {0,255} ... or bigger. My bet would be on one of {U}INT32_{MIN,MAX}. */ +#if __WORDSIZE == 64 +#define SIZE_MAX UINT64_MAX +#else #define SIZE_MAX UINT32_MAX +#endif -#ifndef WCHAR_MAX -#define WCHAR_MAX INT32_MAX +#if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 +#define RSIZE_MAX (SIZE_MAX >> 1) #endif -/* We have no wint_t yet, so no WINT_{MIN,MAX}. - Should end up being {U}INT32_{MIN,MAX}, depending. */ +#ifndef WCHAR_MAX +# ifdef __WCHAR_MAX__ +# define WCHAR_MAX __WCHAR_MAX__ +# else +# define WCHAR_MAX 0x7fffffff +# endif +#endif +/* WCHAR_MIN should be 0 if wchar_t is an unsigned type and + (-WCHAR_MAX-1) if wchar_t is a signed type. Unfortunately, + it turns out that -fshort-wchar changes the signedness of + the type. */ +#ifndef WCHAR_MIN +# if WCHAR_MAX == 0xffff +# define WCHAR_MIN 0 +# else +# define WCHAR_MIN (-WCHAR_MAX-1) +# endif +#endif -#endif /* if C++, then __STDC_LIMIT_MACROS enables the above macros */ +#define WINT_MIN INT32_MIN +#define WINT_MAX INT32_MAX -/* "C++ implementations should define these macros only when - * __STDC_CONSTANT_MACROS is defined before is included." - */ -#if (! defined(__cplusplus)) || defined(__STDC_CONSTANT_MACROS) +#define SIG_ATOMIC_MIN INT32_MIN +#define SIG_ATOMIC_MAX INT32_MAX /* 7.18.4 Macros for integer constants */ -#define INT8_C(v) ((int8_t)v) -#define INT16_C(v) ((int16_t)v) -#define INT32_C(v) (v ## L) +#define INT8_C(v) (v) +#define INT16_C(v) (v) +#define INT32_C(v) (v) #define INT64_C(v) (v ## LL) -#define UINT8_C(v) ((uint8_t)v) -#define UINT16_C(v) ((uint16_t)v) -#define UINT32_C(v) (v ## UL) +#define UINT8_C(v) (v ## U) +#define UINT16_C(v) (v ## U) +#define UINT32_C(v) (v ## U) #define UINT64_C(v) (v ## ULL) #define INTMAX_C(v) (v ## LL) #define UINTMAX_C(v) (v ## ULL) -#endif /* if C++, then __STDC_CONSTANT_MACROS enables the above macros */ +#endif /* KERNEL */ -#endif /* _STDINT_H_ */ +#endif /* _KERNEL_STDINT_H_ */ diff --git a/Makefile b/Makefile index 3e9a72e7e..2f4d164e6 100644 --- a/Makefile +++ b/Makefile @@ -1,80 +1,199 @@ +# +# Copyright (C) 1999-2010 Apple Inc. All rights reserved. +# + ifndef VERSDIR -export VERSDIR=$(shell /bin/pwd) +export VERSDIR := $(shell /bin/pwd) endif + ifndef SRCROOT -export SRCROOT=$(shell /bin/pwd) +export SRCROOT := $(shell /bin/pwd) endif ifndef OBJROOT -export OBJROOT=$(SRCROOT)/BUILD/obj/ +export OBJROOT = $(SRCROOT)/BUILD/obj endif ifndef DSTROOT -export DSTROOT=$(SRCROOT)/BUILD/dst/ +export DSTROOT = $(SRCROOT)/BUILD/dst endif ifndef SYMROOT -export SYMROOT=$(SRCROOT)/BUILD/sym/ +export SYMROOT = $(SRCROOT)/BUILD/sym endif +export MakeInc_top=${VERSDIR}/makedefs/MakeInc.top +export MakeInc_kernel=${VERSDIR}/makedefs/MakeInc.kernel export MakeInc_cmd=${VERSDIR}/makedefs/MakeInc.cmd export MakeInc_def=${VERSDIR}/makedefs/MakeInc.def export MakeInc_rule=${VERSDIR}/makedefs/MakeInc.rule export MakeInc_dir=${VERSDIR}/makedefs/MakeInc.dir +# +# Dispatch non-xnu build aliases to their own build +# systems. All xnu variants start with MakeInc_top. +# + +ifeq ($(findstring Libsyscall,$(RC_ProjectName)),Libsyscall) + +ifeq ($(RC_ProjectName),Libsyscall_headers_Sim) +TARGET=-target Libsyscall_headers_Sim +endif + +default: install + +installhdrs install: + cd libsyscall ; \ + xcodebuild $@ $(TARGET) \ + "SRCROOT=$(SRCROOT)/libsyscall" \ + "OBJROOT=$(OBJROOT)" \ + "SYMROOT=$(SYMROOT)" \ + "DSTROOT=$(DSTROOT)" \ + "SDKROOT=$(SDKROOT)" + +clean: + +installsrc: + pax -rw . $(SRCROOT) + +else ifeq ($(RC_ProjectName),libkxld) + +default: install + +installhdrs install clean: + $(MAKE) -C libkern/kxld $@ USE_APPLE_PB_SUPPORT=all + +installsrc: + pax -rw . $(SRCROOT) + +else ifeq ($(RC_ProjectName),libkxld_host) + +default: install + +installhdrs install clean: + $(MAKE) -C libkern/kxld $@ USE_APPLE_PB_SUPPORT=all PRODUCT_TYPE=ARCHIVE + +installsrc: + pax -rw . $(SRCROOT) + +else ifeq ($(RC_ProjectName),libkmod) + +default: install + +installhdrs install: + cd libkern/kmod ; \ + xcodebuild $@ \ + "SRCROOT=$(SRCROOT)/libkern/kmod" \ + "OBJROOT=$(OBJROOT)" \ + "SYMROOT=$(SYMROOT)" \ + "DSTROOT=$(DSTROOT)" \ + "SDKROOT=$(SDKROOT)" + +clean: + +installsrc: + pax -rw . $(SRCROOT) + +else ifeq ($(RC_ProjectName),xnu_quick_test) + +default: install + +installhdrs: + +install: xnu_quick_test + +clean: + +installsrc: + pax -rw . $(SRCROOT) + +else # all other RC_ProjectName + +ifndef CURRENT_BUILD_CONFIG + +# avoid having to include MakeInc.cmd +ifeq ($(RC_XBS),YES) +_v = +else ifeq ($(VERBOSE),YES) +_v = +else +_v = @ +endif + +# +# Setup for parallel sub-makes based on 2 times number of logical CPUs. +# If MAKEJOBS or -jN is passed on the make line, that takes precedence. +# +MAKEJOBS := --jobs=$(shell expr `/usr/sbin/sysctl -n hw.physicalcpu` \* 2) + +TOP_TARGETS = clean installsrc installhdrs installhdrs_embedded installman exporthdrs setup build all all_embedded install install_embedded installopensource cscope tags help print_exports print_exports_first_build_config + +.PHONY: $(TOP_TARGETS) + +default: all + +ifneq ($(REMOTEBUILD),) +$(TOP_TARGETS): + $(_v)$(VERSDIR)/tools/remote_build.sh _REMOTEBUILD_TARGET=$@ _REMOTEBUILD_MAKE=$(MAKE) $(MAKEFLAGS) +else +$(TOP_TARGETS): + $(_v)$(MAKE) -r $(if $(filter -j,$(MAKEFLAGS)),,$(MAKEJOBS)) -f $(MakeInc_top) $@ +endif + +else # CURRENT_BUILD_CONFIG include $(MakeInc_cmd) include $(MakeInc_def) ALL_SUBDIRS = \ + bsd \ iokit \ osfmk \ - bsd \ pexpert \ libkern \ libsa \ - security + security \ + config -CONFIG_SUBDIRS_I386 = config -CONFIG_SUBDIRS_X86_64 = config -CONFIG_SUBDIRS_ARM = config +CONFIG_SUBDIRS = config tools INSTINC_SUBDIRS = $(ALL_SUBDIRS) EXTERNAL_HEADERS -INSTINC_SUBDIRS_I386 = $(INSTINC_SUBDIRS) INSTINC_SUBDIRS_X86_64 = $(INSTINC_SUBDIRS) INSTINC_SUBDIRS_ARM = $(INSTINC_SUBDIRS) EXPINC_SUBDIRS = $(ALL_SUBDIRS) -EXPINC_SUBDIRS_I386 = $(EXPINC_SUBDIRS) EXPINC_SUBDIRS_X86_64 = $(EXPINC_SUBDIRS) EXPINC_SUBDIRS_ARM = $(EXPINC_SUBDIRS) SETUP_SUBDIRS = SETUP -COMP_SUBDIRS_I386 = $(ALL_SUBDIRS) COMP_SUBDIRS_X86_64 = $(ALL_SUBDIRS) COMP_SUBDIRS_ARM = $(ALL_SUBDIRS) INST_SUBDIRS = \ - libkern \ - libsa \ - iokit \ - osfmk \ bsd \ - config \ - security - -INSTALL_KERNEL_FILE = mach_kernel - -INSTALL_KERNEL_DIR = / - -INSTALL_KERNEL_SYM_DIR = $(INSTALL_KERNEL_DIR)/System/Library/Extensions/KDK/ - + config INSTMAN_SUBDIRS = \ bsd +include $(MakeInc_kernel) include $(MakeInc_rule) include $(MakeInc_dir) +endif # CURRENT_BUILD_CONFIG + +endif # all other RC_ProjectName + +# "xnu_quick_test" and "testbots" are targets that can be invoked via a standalone +# "make xnu_quick_test" or via buildit/XBS with the RC_ProjectName=xnu_quick_test. +# Define the target here in the outermost scope of the initial Makefile + +xnu_quick_test: + $(MAKE) -C $(SRCROOT)/tools/tests \ + SRCROOT=$(SRCROOT)/tools/tests + # This target is defined to compile and run xnu_quick_test under testbots testbots: - /usr/bin/make MORECFLAGS="-D RUN_UNDER_TESTBOTS=1" testbots -C ./tools/tests/xnu_quick_test/ - + $(MAKE) -C $(SRCROOT)/tools/tests/xnu_quick_test \ + SRCROOT=$(SRCROOT)/tools/tests/xnu_quick_test \ + MORECFLAGS="-DRUN_UNDER_TESTBOTS=1" \ + MAKE=$(MAKE) \ + testbots diff --git a/README b/README index b71d70f72..0ac97d202 100644 --- a/README +++ b/README @@ -27,41 +27,7 @@ A. How to build XNU: make ARCH_CONFIGS=X86_64 -2) Building a Component - - Go to the top directory in your XNU project. - - If you are using a sh-style shell, run the following command: - $ . SETUP/setup.sh - - If you are using a csh-style shell, run the following command: - % source SETUP/setup.csh - - This will define the following environmental variables: - SRCROOT, OBJROOT, DSTROOT, SYMROOT - - From a component top directory: - - $ make all - - This builds a component for all architectures, kernel configurations, and - machine configurations defined in TARGET_CONFIGS (or alternately ARCH_CONFIGS - and KERNEL_CONFIGS). - - Example: - $(OBJROOT)/RELEASE_X86_64/osfmk/RELEASE/osfmk.filelist: list of objects in osfmk component - - From the component top directory: - - $ make mach_kernel - - This includes your component in the bootable image, mach_kernel, and - in the kernel binary with symbols, mach_kernel.sys. - - WARNING: If a component header file has been modified, you will have to do - the above procedure 1. - -3) Building DEBUG +2) Building DEBUG Define kernel configuration to DEBUG in your environment or when running a make command. Then, apply procedures 4, 5 @@ -82,7 +48,7 @@ A. How to build XNU: $(OBJROOT)/DEBUG_X86_64/osfmk/DEBUG/osfmk.filelist: list of objects in osfmk component $(OBJROOT)/DEBUG_X86_64/mach_kernel: bootable image -4) Building fat +3) Building fat Define architectures in your environment or when running a make command. Apply procedures 3, 4, 5 @@ -98,17 +64,17 @@ A. How to build XNU: $ export ARCH_CONFIGS="I386 X86_64" $ make exporthdrs all -5) Verbose make +4) Verbose make To display complete tool invocations rather than an abbreviated version, $ make VERBOSE=YES -6) Debug information formats +5) Debug information formats By default, a DWARF debug information repository is created during the install phase; this is a "bundle" named mach_kernel.dSYM To select the older STABS debug information format (where debug information is embedded in the mach_kernel.sys image), set the BUILD_STABS environment variable. $ export BUILD_STABS=1 $ make -7) Build check before integration +6) Build check before integration From the top directory, run: @@ -135,9 +101,11 @@ A. How to build XNU: -project Libsyscall # automatically generate BSD syscall stubs + -project xnu_quick_test # install xnu unit tests + -8) Creating tags and cscope +7) Creating tags and cscope Set up your build environment as per instructions in 2a @@ -150,15 +118,16 @@ A. How to build XNU: $ make cscope # this will build cscope database -9) Other makefile options +8) Other makefile options - $ make MAKEJOBS=-j8 # this will use 8 processes during the build. The default is 2x the number of active cores + $ make MAKEJOBS=-j8 # this will use 8 processes during the build. The default is 2x the number of active CPUS. + $ make -j8 # the standard command-line option is also accepted $ make -w # trace recursive make invocations. Useful in combination with VERBOSE=YES $ make BUILD_LTO=1 # build with LLVM Link Time Optimization (experimental) - $ make BUILD_INTEGRATED_ASSEMBLER=1 # build with LLVM integrated assembler (experimental) + $ make REMOTEBUILD=user@remotehost # perform build on remote host ============================================= B. How to install a new header file from XNU diff --git a/SETUP/Makefile b/SETUP/Makefile index 97c11fe61..74291170c 100644 --- a/SETUP/Makefile +++ b/SETUP/Makefile @@ -3,7 +3,6 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - include $(MakeInc_cmd) include $(MakeInc_def) @@ -12,7 +11,7 @@ SETUP_SUBDIRS = \ kextsymboltool \ setsegname \ decomment \ - md + installfile include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/SETUP/config/Makefile b/SETUP/config/Makefile index 8889afef3..567f2966d 100644 --- a/SETUP/config/Makefile +++ b/SETUP/config/Makefile @@ -10,33 +10,33 @@ include $(MakeInc_def) OBJS = externs.o main.o mkglue.o mkheaders.o mkioconf.o mkmakefile.o \ mkswapconf.o openp.o searchp.o lexer.yy.o parser.o -CFLAGS = -isysroot $(HOST_SDKROOT) -g -O0 -I$(SOURCE) -I. +CFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION) -g -O0 -I$(SOURCE) -I. WARNFLAGS = -Wall -LDFLAGS = -isysroot $(HOST_SDKROOT) +LDFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION) config: $(OBJS) - $(_v)$(HOST_CC) $(LDFLAGS) -o $@ $^ @echo HOST_LD $@ - $(_v)$(HOST_CODESIGN) -s - $@ + $(_v)$(HOST_CC) $(LDFLAGS) -o $@ $^ @echo HOST_CODESIGN $@ + $(_v)env CODESIGN_ALLOCATE=$(HOST_CODESIGN_ALLOCATE) $(HOST_CODESIGN) -s - $@ -.c.o: - $(_v)$(HOST_CC) $(WARNFLAGS) $(CFLAGS) -c -o $@ $< +%.o: %.c @echo HOST_CC $@ + $(_v)$(HOST_CC) $(WARNFLAGS) $(CFLAGS) -c -o $@ $< parser.c: parser.y - $(_v)$(HOST_BISON) -y -d -d -o $@ $< @echo HOST_BISON $@ + $(_v)$(HOST_BISON) -y -d -d -o $@ $< lexer.yy.c: lexer.l - $(_v)$(HOST_FLEX) --header-file=lexer.yy.h -o $@ $< @echo HOST_FLEX $@ + $(_v)env M4=$(HOST_GM4) $(HOST_FLEX) --header-file=lexer.yy.h -o $@ $< main.o mkglue.o mkheaders.o mkioconf.o mkmakefile.o lexer.yy.c: parser.c -do_build_setup: config +do_build_setup:: config include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/SETUP/config/mkmakefile.c b/SETUP/config/mkmakefile.c index 4bf5602fd..f52ff8e18 100644 --- a/SETUP/config/mkmakefile.c +++ b/SETUP/config/mkmakefile.c @@ -238,9 +238,9 @@ makefile(void) if (machine == MACHINE_SUN || machine == MACHINE_SUN2 || machine == MACHINE_SUN3 || machine == MACHINE_SUN4) - fprintf(ofp, "IDENT=-D%s -D%s", machinename, allCaps(ident)); + fprintf(ofp, "export IDENT=-D%s -D%s", machinename, allCaps(ident)); else - fprintf(ofp, "IDENT=-D%s", allCaps(ident)); + fprintf(ofp, "export IDENT=-D%s", allCaps(ident)); if (profiling) fprintf(ofp, " -DGPROF"); if (cputype == 0) { @@ -248,6 +248,7 @@ makefile(void) exit(1); } do_build("cputypes.h", build_cputypes); + do_build("platforms.h", build_cputypes); for (op = opt; op; op = op->op_next) if (op->op_value) @@ -310,24 +311,17 @@ makefile(void) } else if (eq(line, "%CFILES\n")) { do_files(ofp, "CFILES=", 'c'); do_objs(ofp, "COBJS=", 'c'); - } else if (eq(line, "%MFILES\n")) { - do_files(ofp, "MFILES=", 'm'); - do_objs(ofp, "MOBJS=", 'm'); } else if (eq(line, "%SFILES\n")) { do_files(ofp, "SFILES=", 's'); do_objs(ofp, "SOBJS=", 's'); - } else if (eq(line, "%BFILES\n")) - do_files(ofp, "BFILES=", 'b'); - else if (eq(line, "%MACHDEP\n")) { + } else if (eq(line, "%MACHDEP\n")) { /* * Move do_machdep() after the mkopt stuff. */ for (op = mkopt; op; op = op->op_next) fprintf(ofp, "%s=%s\n", op->op_name, op->op_value); do_machdep(ofp); - } else if (eq(line, "%ORDERED\n")) - do_ordered(ofp); - else if (eq(line, "%RULES\n")) + } else if (eq(line, "%RULES\n")) do_rules(ofp); else if (eq(line, "%LOAD\n")) do_load(ofp); @@ -923,51 +917,22 @@ do_rules(FILE *f) source_dir = "$(SOURCE_DIR)/"; *cp = '\0'; tp = tail(np); /* dvw: init tp before 'if' */ - if (och == 'o') { - fprintf(f, "%so: %so\n\t${O_RULE_1A}%s%.*s${O_RULE_1B}\n\n", - tp, np, source_dir, (int)(tp-np), np); - continue; - } + fprintf(f, "-include %sd\n", tp); fprintf(f, "%so: %s%s%c\n", tp, source_dir, np, och); if (och == 's') { switch (machine) { case MACHINE_MIPSY: case MACHINE_MIPS: - switch (ftp->f_type) { - case NORMAL: - case DRIVER: - fprintf(f, "\t@${RM} %so\n", tp); - fprintf(f, "\t${CC} ${CCASFLAGS}%s %s%s%ss\n\n", - (ftp->f_extra?ftp->f_extra:""), extras, source_dir, np); - break; - - case PROFILING: - if (!profiling) - continue; - fprintf(f, "\t@${RM} %so\n", tp); - fprintf(f, "\t${CC} ${CCPASFLAGS}%s %s%s%ss\n\n", - (ftp->f_extra?ftp->f_extra:""), extras, source_dir, np); - break; - - default: - printf("Don't know rules for %s.s\n", np); - break; - } break; default: + fprintf(f, "\t${S_RULE_0}\n"); fprintf(f, "\t${S_RULE_1A}%s%.*s${S_RULE_1B}%s\n", source_dir, (int)(tp-np), np, nl); fprintf(f, "\t${S_RULE_2}%s\n", nl); - fprintf(f, "\t${S_RULE_3}\n\n"); break; } continue; } - if (och == 'b') { - fprintf(f, "\t${B_RULE_1A}%s%.*s${B_RULE_1B}\n\n", - source_dir, (int)(tp-np), np); - continue; - } extras = ""; switch (ftp->f_type) { @@ -976,21 +941,7 @@ do_rules(FILE *f) case MACHINE_MIPSY: case MACHINE_MIPS: - fprintf(f, "\t@${RM} %so\n", tp); - fprintf(f, "\t${CC} ${CCNFLAGS}%s %s%s%sc\n\n", - (ftp->f_extra?ftp->f_extra:""), extras, source_dir, np); - continue; - #if 0 - case MACHINE_SQT: - if (ftp->f_flags & SEDIT) { - fprintf(f, "\t${CC} -SO ${COPTS} %s%s%sc | \\\n", extras, source_dir, np); - fprintf(f, "\t${SEDCMD} | ${C2} | ${AS} ${CAFLAGS} -o %so\n\n", tp); - } else { - fprintf(f, "\t${CC} -c -O ${COPTS} %s%s%sc\n\n", - source_dir, extras, np); - } break; - #endif /* 0 */ default: goto common; } @@ -1049,14 +1000,25 @@ do_rules(FILE *f) common: och_upper = och + 'A' - 'a'; + fprintf(f, "\t${%c_RULE_0%s}\n", och_upper, extras); fprintf(f, "\t${%c_RULE_1A%s}", och_upper, extras); if (ftp->f_extra) fprintf(f, "%s", ftp->f_extra); fprintf(f, "%s%.*s${%c_RULE_1B%s}%s\n", source_dir, (int)(tp-np), np, och_upper, extras, nl); + + /* While we are still using CTF, any build that normally does not support CTF will + * a "standard" compile done as well that we can harvest CTF information from; do + * that here. + */ + fprintf(f, "\t${%c_CTFRULE_1A%s}", och_upper, extras); + if (ftp->f_extra) + fprintf(f, "%s", ftp->f_extra); + fprintf(f, "%s%.*s${%c_CTFRULE_1B%s}%s\n", + source_dir, (int)(tp-np), np, och_upper, extras, nl); + fprintf(f, "\t${%c_RULE_2%s}%s\n", och_upper, extras, nl); - fprintf(f, "\t${%c_RULE_3%s}%s\n", och_upper, extras, nl); - fprintf(f, "\t${%c_RULE_4%s}\n\n", och_upper, extras); + fprintf(f, "\t${%c_CTFRULE_2%s}%s\n", och_upper, extras, nl); break; default: diff --git a/SETUP/decomment/Makefile b/SETUP/decomment/Makefile index 05cf5b833..5de5e0d57 100644 --- a/SETUP/decomment/Makefile +++ b/SETUP/decomment/Makefile @@ -9,23 +9,23 @@ include $(MakeInc_def) OBJS = decomment.o -CFLAGS = -isysroot $(HOST_SDKROOT) -g -O0 -I$(SOURCE) -I. +CFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION) -g -O0 -I$(SOURCE) -I. WARNFLAGS = -Wall -LDFLAGS = -isysroot $(HOST_SDKROOT) +LDFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION) decomment: $(OBJS) - $(_v)$(HOST_CC) $(LDFLAGS) -o $@ $^ @echo HOST_LD $@ - $(_v)$(HOST_CODESIGN) -s - $@ + $(_v)$(HOST_CC) $(LDFLAGS) -o $@ $^ @echo HOST_CODESIGN $@ + $(_v)env CODESIGN_ALLOCATE=$(HOST_CODESIGN_ALLOCATE) $(HOST_CODESIGN) -s - $@ -.c.o: - $(_v)$(HOST_CC) $(WARNFLAGS) $(CFLAGS) -c -o $@ $< +%.o: %.c @echo HOST_CC $@ + $(_v)$(HOST_CC) $(WARNFLAGS) $(CFLAGS) -c -o $@ $< -do_build_setup: decomment +do_build_setup:: decomment include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/SETUP/md/Makefile b/SETUP/installfile/Makefile similarity index 58% rename from SETUP/md/Makefile rename to SETUP/installfile/Makefile index b2741a7b2..060d923fe 100644 --- a/SETUP/md/Makefile +++ b/SETUP/installfile/Makefile @@ -3,29 +3,28 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - include $(MakeInc_cmd) include $(MakeInc_def) -OBJS = md.o +OBJS = installfile.o -CFLAGS = -isysroot $(HOST_SDKROOT) -g -O0 -I$(SOURCE) -I. +CFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION) -g -O0 -I$(SOURCE) -I. WARNFLAGS = -Wall -LDFLAGS = -isysroot $(HOST_SDKROOT) +LDFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION) -md: $(OBJS) - $(_v)$(HOST_CC) $(LDFLAGS) -o $@ $^ +installfile: $(OBJS) @echo HOST_LD $@ - $(_v)$(HOST_CODESIGN) -s - $@ + $(_v)$(HOST_CC) $(LDFLAGS) -o $@ $^ @echo HOST_CODESIGN $@ + $(_v)env CODESIGN_ALLOCATE=$(HOST_CODESIGN_ALLOCATE) $(HOST_CODESIGN) -s - $@ -.c.o: - $(_v)$(HOST_CC) $(WARNFLAGS) $(CFLAGS) -c -o $@ $< +%.o: %.c @echo HOST_CC $@ + $(_v)$(HOST_CC) $(WARNFLAGS) $(CFLAGS) -c -o $@ $< -do_build_setup: md +do_build_setup:: installfile include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/SETUP/installfile/installfile.c b/SETUP/installfile/installfile.c new file mode 100644 index 000000000..d7fc765c2 --- /dev/null +++ b/SETUP/installfile/installfile.c @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2012 Apple, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +void usage(void); + +int main(int argc, char * argv[]) +{ + struct stat sb; + void *mset; + mode_t mode; + bool gotmode = false; + int ch; + int ret; + int srcfd, dstfd; + const char *src = NULL; + const char *dst = NULL; + char dsttmpname[MAXPATHLEN]; + + while ((ch = getopt(argc, argv, "cSm:")) != -1) { + switch(ch) { + case 'c': + case 'S': + /* ignored for compatibility */ + break; + case 'm': + gotmode = true; + mset = setmode(optarg); + if (!mset) + errx(EX_USAGE, "Unrecognized mode %s", optarg); + + mode = getmode(mset, 0); + free(mset); + break; + case '?': + default: + usage(); + } + } + + argc -= optind; + argv += optind; + + if (argc < 2) { + usage(); + } + + src = argv[0]; + dst = argv[1]; + + srcfd = open(src, O_RDONLY | O_SYMLINK, 0); + if (srcfd < 0) + err(EX_NOINPUT, "open(%s)", src); + + ret = fstat(srcfd, &sb); + if (ret < 0) + err(EX_NOINPUT, "fstat(%s)", src); + + if (!S_ISREG(sb.st_mode)) + err(EX_USAGE, "%s is not a regular file", src); + + snprintf(dsttmpname, sizeof(dsttmpname), "%s.XXXXXX", dst); + + dstfd = mkstemp(dsttmpname); + if (dstfd < 0) + err(EX_UNAVAILABLE, "mkstemp(%s)", dsttmpname); + + ret = fcopyfile(srcfd, dstfd, NULL, + COPYFILE_DATA); + if (ret < 0) + err(EX_UNAVAILABLE, "fcopyfile(%s, %s)", src, dsttmpname); + + ret = futimes(dstfd, NULL); + if (ret < 0) + err(EX_UNAVAILABLE, "futimes(%s)", dsttmpname); + + if (gotmode) { + ret = fchmod(dstfd, mode); + if (ret < 0) + err(EX_NOINPUT, "fchmod(%s, %ho)", dsttmpname, mode); + } + + ret = rename(dsttmpname, dst); + if (ret < 0) + err(EX_NOINPUT, "rename(%s, %s)", dsttmpname, dst); + + ret = close(dstfd); + if (ret < 0) + err(EX_NOINPUT, "close(dst)"); + + ret = close(srcfd); + if (ret < 0) + err(EX_NOINPUT, "close(src)"); + + return 0; +} + +void usage(void) +{ + fprintf(stderr, "Usage: %s [-c] [-S] [-m ] \n", + getprogname()); + exit(EX_USAGE); +} diff --git a/SETUP/kextsymboltool/Makefile b/SETUP/kextsymboltool/Makefile index 607ef0839..17cf1108f 100644 --- a/SETUP/kextsymboltool/Makefile +++ b/SETUP/kextsymboltool/Makefile @@ -9,26 +9,31 @@ include $(MakeInc_def) OBJS = kextsymboltool.o -CFLAGS = -isysroot $(HOST_SDKROOT) -g -O0 -I$(SOURCE) -I. +CFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION) -g -O0 -I$(SOURCE) -I. WARNFLAGS = -Wall -LDFLAGS = -isysroot $(HOST_SDKROOT) -L$(HOST_SPARSE_SDKROOT)/usr/local/lib/system -lstdc++ +LDFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION) -L$(HOST_SPARSE_SDKROOT)/usr/local/lib/system -lstdc++ ifneq ($(HOST_SPARSE_SDKROOT),/) LDFLAGS += -lmacho endif -kextsymboltool: $(OBJS) - $(_v)$(HOST_CC) $(LDFLAGS) -o $@ $^ +.PHONY: force + +.SparseSDK: force + $(_v)echo '$(HOST_SPARSE_SDKROOT)' | cmp -s - $@ || echo '$(HOST_SPARSE_SDKROOT)' > $@ + +kextsymboltool: $(OBJS) .SparseSDK @echo HOST_LD $@ - $(_v)$(HOST_CODESIGN) -s - $@ + $(_v)$(HOST_CC) $(LDFLAGS) -o $@ $(OBJS) @echo HOST_CODESIGN $@ + $(_v)env CODESIGN_ALLOCATE=$(HOST_CODESIGN_ALLOCATE) $(HOST_CODESIGN) -s - $@ -.c.o: - $(_v)$(HOST_CC) $(WARNFLAGS) $(CFLAGS) -c -o $@ $< +%.o: %.c @echo HOST_CC $@ + $(_v)$(HOST_CC) $(WARNFLAGS) $(CFLAGS) -c -o $@ $< -do_build_setup: kextsymboltool +do_build_setup:: kextsymboltool include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/SETUP/kextsymboltool/kextsymboltool.c b/SETUP/kextsymboltool/kextsymboltool.c index ee46713e4..9b9125bae 100644 --- a/SETUP/kextsymboltool/kextsymboltool.c +++ b/SETUP/kextsymboltool/kextsymboltool.c @@ -24,6 +24,8 @@ #include #include +#include + #include #include #include @@ -35,8 +37,7 @@ #include #include - -#include +#include #pragma mark Typedefs, Enums, Constants /********************************************************************* @@ -60,6 +61,9 @@ readFile(const char *path, vm_offset_t * objAddr, vm_size_t * objSize); __private_extern__ ToolError writeFile(int fd, const void * data, size_t length); +__private_extern__ ToolError +seekFile(int fd, off_t offset); + extern char* __cxa_demangle (const char* mangled_name, char* buf, size_t* n, @@ -84,6 +88,24 @@ writeFile(int fd, const void * data, size_t length) return( err ); } + /********************************************************************* + *********************************************************************/ +__private_extern__ ToolError +seekFile(int fd, off_t offset) +{ + ToolError err; + + if (offset != lseek(fd, offset, SEEK_SET)) + err = kErrorDiskFull; + else + err = kErrorNone; + + if (kErrorNone != err) + perror("couldn't write output"); + + return( err ); +} + /********************************************************************* *********************************************************************/ __private_extern__ ToolError @@ -699,10 +721,10 @@ int main(int argc, char * argv[]) struct symtab_command symcmd; struct uuid_command uuidcmd; + off_t symsoffset; symcmd.cmd = LC_SYMTAB; symcmd.cmdsize = sizeof(symcmd); - symcmd.symoff = sizeof(symcmd) + sizeof(uuidcmd); symcmd.nsyms = result_count; symcmd.strsize = strtabpad; @@ -712,41 +734,83 @@ int main(int argc, char * argv[]) if (CPU_ARCH_ABI64 & target_arch->cputype) { - struct mach_header_64 hdr; + struct mach_header_64 hdr; + struct segment_command_64 segcmd; + hdr.magic = MH_MAGIC_64; hdr.cputype = target_arch->cputype; hdr.cpusubtype = target_arch->cpusubtype; hdr.filetype = MH_KEXT_BUNDLE; - hdr.ncmds = 2; - hdr.sizeofcmds = sizeof(symcmd) + sizeof(uuidcmd); + hdr.ncmds = 3; + hdr.sizeofcmds = sizeof(segcmd) + sizeof(symcmd) + sizeof(uuidcmd); hdr.flags = MH_INCRLINK; - - symcmd.symoff += sizeof(hdr); + symsoffset = mach_vm_round_page(hdr.sizeofcmds); + + segcmd.cmd = LC_SEGMENT_64; + segcmd.cmdsize = sizeof(segcmd); + strncpy(segcmd.segname, SEG_LINKEDIT, sizeof(segcmd.segname)); + segcmd.vmaddr = 0; + segcmd.vmsize = result_count * sizeof(struct nlist_64) + strtabpad; + segcmd.fileoff = symsoffset; + segcmd.filesize = segcmd.vmsize; + segcmd.maxprot = PROT_READ; + segcmd.initprot = PROT_READ; + segcmd.nsects = 0; + segcmd.flags = SG_NORELOC; + + symcmd.symoff = symsoffset; symcmd.stroff = result_count * sizeof(struct nlist_64) + symcmd.symoff; if (target_arch->byteorder != host_arch->byteorder) + { swap_mach_header_64(&hdr, target_arch->byteorder); + swap_segment_command_64(&segcmd, target_arch->byteorder); + } err = writeFile(fd, &hdr, sizeof(hdr)); + if (kErrorNone != err) + goto finish; + err = writeFile(fd, &segcmd, sizeof(segcmd)); } else { - struct mach_header hdr; + struct mach_header hdr; + struct segment_command segcmd; + hdr.magic = MH_MAGIC; hdr.cputype = target_arch->cputype; hdr.cpusubtype = target_arch->cpusubtype; hdr.filetype = (target_arch->cputype == CPU_TYPE_I386) ? MH_OBJECT : MH_KEXT_BUNDLE; - hdr.ncmds = 2; - hdr.sizeofcmds = sizeof(symcmd) + sizeof(uuidcmd); + hdr.ncmds = 3; + hdr.sizeofcmds = sizeof(segcmd) + sizeof(symcmd) + sizeof(uuidcmd); hdr.flags = MH_INCRLINK; - - symcmd.symoff += sizeof(hdr); + symsoffset = mach_vm_round_page(hdr.sizeofcmds); + + segcmd.cmd = LC_SEGMENT; + segcmd.cmdsize = sizeof(segcmd); + strncpy(segcmd.segname, SEG_LINKEDIT, sizeof(segcmd.segname)); + segcmd.vmaddr = 0; + segcmd.vmsize = result_count * sizeof(struct nlist) + strtabpad; + segcmd.fileoff = symsoffset; + segcmd.filesize = segcmd.vmsize; + segcmd.maxprot = PROT_READ; + segcmd.initprot = PROT_READ; + segcmd.nsects = 0; + segcmd.flags = SG_NORELOC; + + symcmd.symoff = symsoffset; symcmd.stroff = result_count * sizeof(struct nlist) + symcmd.symoff; if (target_arch->byteorder != host_arch->byteorder) + { swap_mach_header(&hdr, target_arch->byteorder); + swap_segment_command(&segcmd, target_arch->byteorder); + } err = writeFile(fd, &hdr, sizeof(hdr)); + if (kErrorNone != err) + goto finish; + err = writeFile(fd, &segcmd, sizeof(segcmd)); } if (kErrorNone != err) @@ -763,6 +827,10 @@ int main(int argc, char * argv[]) if (kErrorNone != err) goto finish; + err = seekFile(fd, symsoffset); + if (kErrorNone != err) + goto finish; + strx = 4; for (export_idx = 0; export_idx < num_export_syms; export_idx++) { diff --git a/SETUP/md/md.1 b/SETUP/md/md.1 deleted file mode 100644 index 9505f886a..000000000 --- a/SETUP/md/md.1 +++ /dev/null @@ -1,121 +0,0 @@ -.\" Man page Copyright (c) 2002 -.\" Apple Computer -.\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" -.\" THIS DOCUMENTATION IS PROVIDED BY THE APPLE ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL APPLE BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. -.\" -.\" -.Dd "December 25, 2002" -.Dt md 1 -.Os -.Sh NAME -.Nm md -.Nd process raw dependency files produced by cpp -MD -.Sh SYNOPSIS -.Nm -.Op Fl d -.Op Fl f -.Op Fl m Ar makefile -.Op Fl u Ar makefile -.Op Fl o Ar outputfile -.Op Fl v -.Op Fl x -.Op Fl D Ar c|d|m|o|t|D -.Sh DESCRIPTION -The -.Nm -command basically does two things: -.Pp -Process the raw dependency files produced by the cpp -MD option. -There is one line in the file for every #include encountered, but -there are repeats and patterns like .../dir1/../dir2 that appear which -should reduce to .../dir2. -.Nm -canonicalizes and flushes repeats from the dependency list. -It also sorts the file names and "fills" them to a 78 character line. -.Pp -.Nm -also updates the makefile directly with the dependency information, -so the .d file can be thrown away (see -.Ar d -option). This is done to save space. -.Nm -assumes that dependency information in the makefile is sorted by .o -file name and it procedes to merge in (add/or replace [as appropriate]) -the new dependency lines that it has generated. -For time efficiency, -.Nm -assumes that any .d files it is given that were created -before the creation date of the "makefile" were processed -already. It ignores them unless the force flag -.Op f -is given. -.Pp -.Sh FLAG SUMMARY -.Bl -tag -width indent -.It Fl D Ar c|D|d|m|o|t -Specify debugging option(s): -.Bl -tag -width indent -.It c -show file contents -.It D -show very low level debugging -.It d -show new dependency crunching -.It m -show generation of makefile -.It o -show files being opened -.It t -show time comparisons -.El -.It Fl d -Delete the .d file after it is processed -.It Fl f -Force an update of the dependencies in the makefile, -even if the makefile is more recent than the .n file. -(This implies that -.Nm -has been run already.) -.It Fl m Ar makefile -Specify the makefile to be upgraded. The defaults are -.Ar makefile -and then -.Ar Makefile . -.It Fl o Ar outputfile -Specify an output file (other than a makefile) for the dependencies. -.It Fl u Ar makefile -Like -.Fl m , -but the file will be created if necessary. -.It Fl v -Set the verbose flag. -.It Fl x -Expunge old dependency information from the makefile. -.El -.Sh SEE ALSO -.Xr make 1 -.Sh BUGS -Old, possibly not used by anyone. -.Sh HISTORY -The -.Nm -utility was written by Robert V. Baron at Carnegie-Mellon University. diff --git a/SETUP/md/md.c b/SETUP/md/md.c deleted file mode 100644 index f253bf5fe..000000000 --- a/SETUP/md/md.c +++ /dev/null @@ -1,654 +0,0 @@ -/* ************************************************************************ *\ - * * - * File: md.c * - * * - * Updates makefiles from the .n dependency files generated by the * - * -MD option to "cc" (and "cpp"). * - * * - * Abstract: * - * * - * Basically, "md" does two things: * - * 1) It processes the raw dependency files produced by the cpp -MD * - * option. There is one line in the file for every #include * - * encountered, but there are repeats and patterns like * - * .../dir1/../dir2 appear which should reduce to .../dir2 * - * Md canonicalizes and flushes repeats from the dependency * - * list. It also sorts the file names and "fills" them to a 78 * - * character line. * - * 2) Md also updates the makefile directly with the dependency * - * information, so the .d file can be thrown away (-- -d option) * - * This is done to save space. Md assumes that dependency * - * information in the makefile is sorted by .o file name and it * - * procedes to merge in (add/or replace [as appropriate]) the new * - * dependency lines that it has generated. For time effeciency, * - * Md assumes that any .d files it is given that were created * - * before the creation date of the "makefile" were processed * - * already. It ignores them unless the force flag (-f) is given. * - * * - * Arguments: * - * * - * -d delete the .d file after it is processed * - * -f force an update of the dependencies in the makefile * - * even though the makefile is more recent than the .n file * - * (This implies that md has been run already.) * - * -m specify the makefile to be upgraded. The defaults are * - * "makefile" and then "Makefile". * - * -u like -m above, but the file will be created if necessary * - * -o specify an output file for the dependencies other than a * - * makefile * - * -v set the verbose flag * - * -x expunge old dependency info from makefile * - * -D subswitch for debugging. can be followed by any of * - * "c", "d", "m", "o", "t", "D" meaning: * - * c show file contents * - * d show new dependency crunching * - * m show generation of makefile * - * o show files being opened * - * t show time comparisons * - * D show very low level debugging * - * * - * Author: Robert V. Baron * - * Copyright (c) 1986 by Robert V. Baron * - * * - * HISTORY * - * 29-Apr-87 Robert Baron (rvb) at Carnegie-Mellon University - * If specified -u file does not exist, assume it is empty and - * generate one. As a sanity check, it must be possible to create - * the output file. - * Also, generalized fix below to handle any case of . as a - * file name. - * - * 25-Mar-87 Mary Thompson (mrt) at Carnegie Mellon - * Fixed up pathnamecanonicalization to recognize .// and - * drop the second / as well. mmax cpp generates this form. - * - * 6-Jan-87 Robert Baron (rvb) at Carnegie-Mellon University - * Fixed up pathname canonicalization to that ../../, etc would be - * handled correctly. - * Also made "force" on by default. - * - * 16-Mar-86 Robert Baron (rvb) at Carnegie-Mellon University - * Created 4/16/86 * - * * -\* ************************************************************************ */ - - -#include -#include -#include -#include -#include - -#define LINESIZE 65536 // NeXT_MOD - -#define OUTLINELEN 79 -#define IObuffer 50000 -#define SALUTATION "# Dependencies for File:" -#define SALUTATIONLEN (sizeof SALUTATION - 1) -#define OLDSALUTATION "# DO NOT DELETE THIS LINE" -#define OLDSALUTATIONLEN (sizeof OLDSALUTATION - 1) - -char file_array[IObuffer]; /* read file and store crunched names */ -char dep_line[LINESIZE]; /* line being processed */ -char dot_o[LINESIZE]; /* : prefix */ -char *path_component[100]; /* stores components for a path while being - crunched */ - -struct dep { /* stores paths that a file depends on */ - int len; - char *str; -} dep_files[1000]; -int dep_file_index; - -qsort_strcmp(a, b) -struct dep *a, *b; -{ -extern int strcmp(); - return strcmp(a->str, b->str); -} - -char *outfile = (char *) 0; /* generate dependency file */ -FILE *out; - -char *makefile = (char *) 0; /* user supplied makefile name */ -char *real_mak_name; /* actual makefile name (if not supplied) */ -char shadow_mak_name[LINESIZE]; /* changes done here then renamed */ -FILE *mak; /* for reading makefile */ -FILE *makout; /* for writing shadow */ -char makbuf[LINESIZE]; /* one line buffer for makefile */ -struct stat makstat; /* stat of makefile for time comparisons */ -int mak_eof = 0; /* eof seen on makefile */ -FILE *find_mak(), *temp_mak(); - -int delete = 0; /* -d delete dependency file */ -int debug = 0; -int D_contents = 0; /* print file contents */ -int D_depend = 0; /* print dependency processing info */ -int D_make = 0; /* print makefile processing info */ -int D_open = 0; /* print after succesful open */ -int D_time = 0; /* print time comparison info */ -int force = 1; /* always update dependency info */ -int update = 0; /* it's ok if the -m file does not exist */ -int verbose = 0; /* tell me something */ -int expunge = 0; /* first flush dependency stuff from makefile */ - - -char *name; - -static void scan_mak(FILE *, FILE *, char *); -static void finish_mak(FILE *, FILE *); - -main(argc,argv) -register char **argv; -{ -int size; - - name = *argv; - {register char *cp =name; - while (*cp) if (*cp++ == '/') name = cp; - } - - for ( argv++ ; --argc ; argv++ ) { register char *token = *argv; - if (*token++ != '-' || !*token) - break; - else { register int flag; - for ( ; flag = *token++ ; ) { - switch (flag) { - case 'd': - delete++; - break; - case 'f': - force++; - break; - case 'u': - update++; - case 'm': - makefile = *++argv; - if (--argc < 0) goto usage; - break; - case 'o': - outfile = *++argv; - if (--argc < 0) goto usage; - break; - case 'v': - verbose++; - break; - case 'x': - expunge++; - break; - case 'D': - for ( ; flag = *token++ ; ) - switch (flag) { - case 'c': - D_contents++; - break; - case 'd': - D_depend++; - break; - case 'm': - D_make++; - break; - case 'o': - D_open++; - break; - case 't': - D_time++; - break; - case 'D': - debug++; - break; - default: - goto letters; - } - goto newtoken; - default: - goto usage; - } -letters: ; - } - } -newtoken: ; - } - - if (!expunge && argc < 1) goto usage; - if ((int) outfile && (int) makefile) /* not both */ - goto usage; - - if ((int) outfile) { - /* - * NeXT_MOD, For SGS stuff, in case still linked to master version - */ - unlink(outfile); - - if ((out = fopen(outfile, "w")) == NULL) { - fprintf(stderr, "%s: outfile = \"%s\" ", name, outfile); - perror("fopen"); - fflush(stdout), fflush(stderr); - exit(1); - } else if (D_open) - printf("%s: opened outfile \"%s\"\n", name, outfile); - } else if (mak = find_mak(makefile)) { - makout = temp_mak(); - out = makout; - if (expunge) - expunge_mak(mak, makout); - else - skip_mak(mak, makout); - } else if (mak_eof && /* non existent file == mt file */ - (int)(makout = temp_mak())) { /* but we need to be able */ - out = makout; /* to write here */ - } else if (makefile) { - fprintf(stderr, "%s: makefile \"%s\" can not be opened or stat'ed\n", - name, makefile); - exit(2); - } - - for (; argc--; argv++) { - dep_file_index = 0; - - if (size = read_dep(*argv)) { - - save_dot_o(); - if (D_depend) printf("%s: dot_o = \"%s\"\n", name, dot_o); - - parse_dep(); - if (mak) scan_mak(mak, makout, dot_o); - if (out) output_dep(out); - - if (delete) - unlink(*argv); - } - } - - if (mak) finish_mak(mak, makout); - rename(shadow_mak_name, real_mak_name); - exit(0); -usage: - fprintf(stderr, "usage: md -f -Dcdmot -m makefile -o outputfile -v ... \n"); - exit(1); -} - - -read_dep(file) -register char *file; -{ -register int fd; -register int size; -struct stat statbuf; - - if ((fd = open(file, 0)) < 0) { - fprintf(stderr, "%s: file = \"%s\" ", name, file); - perror("open"); - fflush(stdout), fflush(stderr); - return 0; - } - if (D_open) - printf("%s: opened dependency file \"%s\"\n", name, file); - - if (fstat(fd, &statbuf) < 0) { - fprintf(stderr, "%s: file = \"%s\" ", name, file); - perror("stat"); - fflush(stdout), fflush(stderr); - goto out; - } - switch(statbuf.st_mode & S_IFMT) { - case S_IFREG: - if (D_time) - printf("%s: file time = %d\n", name, statbuf.st_mtime); - - if (statbuf.st_size > IObuffer) { - fprintf(stderr, "%s: file \"%s\" tooo big for IObuffer\n", - name, file); - goto out; - } else if (force) - break; - else if ((int) mak && statbuf.st_mtime < makstat.st_mtime) { - if (verbose || D_time) - fprintf(stderr, "%s: skipping \"%s\" %d < %d \"%s\"\n", - name, file, statbuf.st_mtime, makstat.st_mtime, - real_mak_name); - goto out; - } else /* >= =>ok */ - break; - case S_IFDIR: - case S_IFLNK: - case S_IFCHR: - case S_IFBLK: - case S_IFSOCK: - default: - fprintf(stderr, "%s: bad mode: 0%o on \"%s\"\n", - name, statbuf.st_mode, file); - fflush(stdout), fflush(stderr); - goto out; - } - - if ((size = read(fd, file_array, sizeof (file_array))) < 0) { - fprintf(stderr, "%s: file = \"%s\" ", name, file); - perror("read"); - fflush(stdout), fflush(stderr); - goto out; - } - file_array[size] = 0; - - if (close(fd) < 0) { - fprintf(stderr, "%s: file = \"%s\" ", name, file); - perror("close"); - fflush(stdout), fflush(stderr); - return 0; - } - - if (D_depend && D_contents) - printf("file_array: \"%s\"\n", file_array); - return size; -out: ; - close(fd); - return 0; -} - -save_dot_o() -{ -register char *cp = file_array; -register char *svp = dot_o; -register int c; - - while ((*svp++ = (c = *cp++)) && c != ':'); - *svp = 0; -} - -parse_dep() -{ -register char *lp = file_array; -register int c; - - while (*lp) {register char *tlp = lp; - register char *cp = dep_line; - register int i = 0; - int abspath = 0; - char oldc; - char *oldcp; - - /* get a line to process */ - while ((c = *lp++) && c != '\n') - { - if (c == '\\') - lp++; /* skip backslash newline */ - else - *cp++ = c; - } - if (!c) - break; - *cp = 0; - cp = dep_line; - lp[-1] = 0; - /* skip .o file name */ - while ((c = *cp++) && c != ':'); if (!c) continue; -next_filename: - i = 0; - abspath = 0; - while ((c = *cp) && (c == ' ' || c == '\t')) cp++; if (!c) continue; - - /* canonicalization processing */ - - /* initial / is remembered */ - if (c == '/') - abspath++; - - while (c && c != ' ' && c != '\t') { - if (D_depend) printf("i = %d going \"%s\"\n", i, cp); - /* kill \'s */ - while ((c = *cp) && c == '/') cp++; if (!c) break; - path_component[i] = cp; - /* swallow chars till next / or null */ - while ((c = *cp++) && c != '/' && c != ' ' && c != '\t'); - if (c) cp[-1]=0;/* end component C style */ - - /* ignore . */; - if (!strcmp(path_component[i], ".")) - ; /* if "component" != .. */ - else /* don't reduce /component/.. to nothing */ - i++; /* there could be symbolic links! */ - } - /* reassemble components */ - oldc = c; /* save c */ - oldcp = cp; /* save cp */ - cp = tlp; /* overwrite line in buffer */ - if (abspath) - *cp++ = '/'; - for (c=0; clen; - register char *str = dp->str; - if (j && len == (dp-1)->len && !strcmp(str, (dp-1)->str)) - continue; - written++; - if (size + len + 1 > OUTLINELEN) { - fprintf(out, "\n%s %s", dot_o, str); - size = dot_o_len + len + 1; - } else { - fprintf(out, " %s", str); - size += len + 1; - } - } - fprintf(out, "\n"); - if (verbose) - fprintf(stdout, "%s: \"%s\" %d => %d\n", name, dot_o, dep_file_index, written); -} - - /* process makefile */ -FILE * -find_mak(file) -char *file; -{ -FILE *mak; - - if ((int) file) { - if ((mak = fopen(file, "r")) != NULL) { - real_mak_name = file; - } else if (update) { - mak_eof = 1; - real_mak_name = file; - return NULL; - } else { - fprintf(stderr, "%s: file = \"%s\" ", name, file); - perror("fopen"); - fflush(stdout), fflush(stderr); - return NULL; - } - } else { - if ((mak = fopen("makefile", "r")) != NULL) { - real_mak_name = "makefile"; - } else if ((mak = fopen("Makefile", "r")) != NULL) { - real_mak_name = "Makefile"; - } else return NULL; - } - - if (fstat(fileno(mak), &makstat) < 0) { - fprintf(stderr, "%s: file = \"%s\" ", name, real_mak_name); - perror("stat"); - fflush(stdout), fflush(stderr); - return NULL; - } - if (D_open) - printf("%s: opened makefile \"%s\"\n", name, real_mak_name); - if (D_time) - printf("%s: makefile time = %d\n", name, makstat.st_mtime); - - return mak; -} - -FILE * -temp_mak() -{ -FILE *mak; - - strcpy(shadow_mak_name, real_mak_name); - strcat(shadow_mak_name, ".md"); - - /* - * For SGS stuff, in case still linked to master version - */ - unlink(shadow_mak_name); - if ((mak = fopen(shadow_mak_name, "w")) == NULL) { - fprintf(stderr, "%s: file = \"%s\" ", name, shadow_mak_name); - perror("fopen"); - fflush(stdout), fflush(stderr); - return NULL; - } - if (D_open) - printf("%s: opened makefile.md \"%s\"\n", name, shadow_mak_name); - - return mak; -} - -skip_mak(makin, makout) -register FILE *makin, *makout; -{ -register int len = SALUTATIONLEN; - - if (D_make) - printf("skipping in \"%s\" ", real_mak_name); - - while (fgets(makbuf, LINESIZE, makin) != NULL) { - if (D_make && D_contents) - printf("%s: \"%s\"\n", real_mak_name, makbuf); - if (strncmp(makbuf, SALUTATION, len)) { - fputs(makbuf, makout); - } else - break; - } - mak_eof = feof(makin); - if (mak_eof) - fclose(makin); - if (D_make) - printf("eof = %d str = \"%s\"", mak_eof, makbuf); -} - -expunge_mak(makin, makout) -register FILE *makin, *makout; -{ -register int len = SALUTATIONLEN; -register int oldlen = OLDSALUTATIONLEN; - - if (D_make) - printf("expunging in \"%s\" ", real_mak_name); - - while (fgets(makbuf, LINESIZE, makin) != NULL) { - if (D_make && D_contents) - printf("%s: \"%s\"\n", real_mak_name, makbuf); - if (! strncmp(makbuf, SALUTATION, len) || - ! strncmp(makbuf, OLDSALUTATION, oldlen)) - break; - else - fputs(makbuf, makout); - } - mak_eof = 1; - if (mak_eof) - fclose(makin); - if (D_make) - printf("eof = %d str = \"%s\"", mak_eof, makbuf); -} - -static void -scan_mak(FILE *makin, FILE *makout, char *file) -{ -register char *cp = &makbuf[SALUTATIONLEN+1]; -register int len = strlen(file); -register int ret; - - if (D_make) - printf("scanning in \"%s\" for \"%s\"\n", real_mak_name, file); - - do { - if (mak_eof) /* don't scan any more */ - return; - - ret = strncmp(cp, file, len); - if (D_make) - printf("saw \"%s\" ret = %d\n", cp, ret); - - if (ret < 0) { /* skip forward till match or greater */ - fputs(makbuf, makout); /* line we're looking at */ - while (fgets(makbuf, LINESIZE, makin) != NULL) { - if (strncmp(makbuf, SALUTATION, SALUTATIONLEN)) { - fputs(makbuf, makout); - } else - break; - } - mak_eof = feof(makin); - if (mak_eof) - fclose(makin); - continue; - } else if (ret == 0) { /* flush match */ - while (fgets(makbuf, LINESIZE, makin) != NULL) { - if (strncmp(makbuf, SALUTATION, SALUTATIONLEN)) { - ; /* flush old stuff */ - } else - break; - } - mak_eof = feof(makin); - if (mak_eof) - fclose(makin); - break; - } else { /* no luck this time */ - break; - } - } while (1); -} - -static void -finish_mak(FILE *makin, FILE *makout) -{ - if (mak_eof) /* don't scan any more */ - return; - - if (D_make) - printf("finishing in \"%s\"\n", real_mak_name); - - fputs(makbuf, makout); /* line we're looking at */ - while (fgets(makbuf, LINESIZE, makin) != NULL) { - fputs(makbuf, makout); - } -} diff --git a/SETUP/setsegname/Makefile b/SETUP/setsegname/Makefile index 70e5e2641..ece876930 100644 --- a/SETUP/setsegname/Makefile +++ b/SETUP/setsegname/Makefile @@ -9,23 +9,23 @@ include $(MakeInc_def) OBJS = setsegname.o -CFLAGS = -isysroot $(HOST_SDKROOT) -g -O0 -I$(SOURCE) -I. +CFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION) -g -O0 -I$(SOURCE) -I. WARNFLAGS = -Wall -LDFLAGS = -isysroot $(HOST_SDKROOT) +LDFLAGS = -isysroot $(HOST_SDKROOT) -mmacosx-version-min=$(HOST_OS_VERSION) setsegname: $(OBJS) - $(_v)$(HOST_CC) $(LDFLAGS) -o $@ $^ @echo HOST_LD $@ - $(_v)$(HOST_CODESIGN) -s - $@ + $(_v)$(HOST_CC) $(LDFLAGS) -o $@ $^ @echo HOST_CODESIGN $@ + $(_v)env CODESIGN_ALLOCATE=$(HOST_CODESIGN_ALLOCATE) $(HOST_CODESIGN) -s - $@ -.c.o: - $(_v)$(HOST_CC) $(WARNFLAGS) $(CFLAGS) -c -o $@ $< +%.o: %.c @echo HOST_CC $@ + $(_v)$(HOST_CC) $(WARNFLAGS) $(CFLAGS) -c -o $@ $< -do_build_setup: setsegname +do_build_setup:: setsegname include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/SETUP/setsegname/setsegname.c b/SETUP/setsegname/setsegname.c index 9afd6bc5d..ab3dd410f 100644 --- a/SETUP/setsegname/setsegname.c +++ b/SETUP/setsegname/setsegname.c @@ -29,7 +29,7 @@ #include -#include +#include /********************************************************************* *********************************************************************/ @@ -105,12 +105,21 @@ readFile(const char *path, vm_offset_t * objAddr, vm_size_t * objSize) return error; } +static void +usage(void) +{ + fprintf(stderr, "Usage: %s [-s OLDSEGNAME] -n NEWSEGNAME input -o output\n", getprogname()); + exit(1); +} + /********************************************************************* *********************************************************************/ int main(int argc, char * argv[]) { int error; const char * output_name = NULL; + const char * input_name = NULL; + const char * oldseg_name = NULL; const char * newseg_name = NULL; struct mach_header * hdr; struct mach_header_64 * hdr64; @@ -125,16 +134,36 @@ int main(int argc, char * argv[]) uint32_t attr; typedef char segname_t[16]; segname_t * names = NULL; + int ch; + + + while ((ch = getopt(argc, argv, "s:n:o:")) != -1) { + switch (ch) { + case 's': + oldseg_name = optarg; + break; + case 'n': + newseg_name = optarg; + break; + case 'o': + output_name = optarg; + break; + case '?': + default: + usage(); + } + } - if ((argc != 5) || strcmp("-o", argv[3])) { - fprintf(stderr, "Usage: %s NEWSEGNAME input -o output\n", argv[0]); - exit(1); + argc -= optind; + argv += optind; + + if ((argc != 1) || !newseg_name || !output_name) { + usage(); } - output_name = argv[4]; - newseg_name = argv[1]; + input_name = argv[0]; - error = readFile(argv[2], &input, &input_size); + error = readFile(input_name, &input, &input_size); if (error) { exit(1); } @@ -205,7 +234,11 @@ int main(int argc, char * argv[]) } if (!(S_ATTR_DEBUG & attr)) { - strncpy((char *)names, newseg_name, sizeof(*names)); + if (!oldseg_name || + 0 == strncmp(oldseg_name, (char *)names, sizeof(*names))) { + memset(names, 0x0, sizeof(*names)); + strncpy((char *)names, newseg_name, sizeof(*names)); + } } names = (typeof(names))(((uintptr_t) names) + len); diff --git a/SETUP/setup.csh b/SETUP/setup.csh deleted file mode 100644 index 3d489f4ca..000000000 --- a/SETUP/setup.csh +++ /dev/null @@ -1,4 +0,0 @@ -setenv SRCROOT `pwd` -setenv OBJROOT $SRCROOT/BUILD/obj -setenv DSTROOT $SRCROOT/BUILD/dst -setenv SYMROOT $SRCROOT/BUILD/sym diff --git a/SETUP/setup.sh b/SETUP/setup.sh old mode 100644 new mode 100755 index 7924785c7..7cb532f94 --- a/SETUP/setup.sh +++ b/SETUP/setup.sh @@ -1,4 +1,6 @@ -export SRCROOT=$(pwd) -export OBJROOT=$SRCROOT/BUILD/obj -export DSTROOT=$SRCROOT/BUILD/dst -export SYMROOT=$SRCROOT/BUILD/sym +setup_tmp=`mktemp -d -t setup` +printenv | sort > "${setup_tmp}/orig" +make print_exports | grep -E -v '^(MAKE|MFLAGS|SHLVL)' > "${setup_tmp}/exports" +eval `comm -13 "${setup_tmp}/orig" "${setup_tmp}/exports" | sed 's,^\(.*\)$,export "\1",'` + + diff --git a/bsd/Makefile b/bsd/Makefile index 8beb22975..03281456d 100644 --- a/bsd/Makefile +++ b/bsd/Makefile @@ -16,7 +16,6 @@ INSTINC_SUBDIRS = \ machine \ miscfs \ net \ - netat \ netinet \ netinet6 \ netkey \ @@ -26,10 +25,6 @@ INSTINC_SUBDIRS = \ uuid \ vfs -INSTINC_SUBDIRS_I386 = \ - i386 \ - crypto - INSTINC_SUBDIRS_X86_64 = \ i386 \ crypto @@ -39,39 +34,32 @@ INSTINC_SUBDIRS_ARM = \ EXPINC_SUBDIRS = \ bsm \ - crypto \ dev \ hfs \ libkern \ machine \ miscfs \ net \ - netat \ netinet \ netinet6 \ netkey \ - nfs \ security \ sys \ uuid \ vfs \ vm -EXPINC_SUBDIRS_I386 = \ - i386 - EXPINC_SUBDIRS_X86_64 = \ i386 EXPINC_SUBDIRS_ARM = \ arm -SETUP_SUBDIRS = - COMP_SUBDIRS = \ conf INST_SUBDIRS = \ + dev \ kern INSTMAN_SUBDIRS = \ diff --git a/bsd/bsm/Makefile b/bsd/bsm/Makefile index f660aafb5..b2ff3b57d 100644 --- a/bsd/bsm/Makefile +++ b/bsd/bsm/Makefile @@ -7,25 +7,12 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -INSTINC_SUBDIRS_X86_64 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS_X86_64 = \ - DATAFILES = \ audit.h audit_domain.h audit_errno.h audit_fcntl.h audit_internal.h \ audit_kevents.h audit_record.h audit_socket_type.h KERNFILES = \ audit.h - INSTALL_MI_LIST = ${DATAFILES} INSTALL_MI_DIR = bsm diff --git a/bsd/bsm/audit.h b/bsd/bsm/audit.h index d3bc41fa6..525363788 100644 --- a/bsd/bsm/audit.h +++ b/bsd/bsm/audit.h @@ -324,10 +324,10 @@ int setaudit_addr(const struct auditinfo_addr *, int); int getaudit(struct auditinfo *) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_8, - __IPHONE_2_0, __IPHONE_NA); + __IPHONE_2_0, __IPHONE_6_0); int setaudit(const struct auditinfo *) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_8, - __IPHONE_2_0, __IPHONE_NA); + __IPHONE_2_0, __IPHONE_6_0); #else int getaudit(struct auditinfo *); diff --git a/bsd/conf/MASTER b/bsd/conf/MASTER index 17c0acc52..da9dcbea2 100644 --- a/bsd/conf/MASTER +++ b/bsd/conf/MASTER @@ -82,7 +82,6 @@ options TIMEZONE=0, PST=0 options QUOTA # # options INET # # -options ABSOLUTETIME_SCALAR_TYPE options NEW_VM_CODE # # options OLD_VM_CODE # # options HW_AST # Hardware ast support # @@ -106,11 +105,6 @@ options MACH_NP # Mach IPC support # options MACH_NBC # No buffer cache # options MACH_NET # Fast network access # options MACH_XP # external pager support # -options NORMA_IPC # NORMA IPC support # -options NORMA_DEVICE # NORMA unified device names # -options NORMA_VM # NORMA virtual memory support # -options NORMA_TASK # NORMA task support # -options NORMA_ETHER # NORMA across ethernet # options SIMPLE_CLOCK # don't assume fixed tick # options XPR_DEBUG # kernel tracing # options KDEBUG # kernel tracing # @@ -119,13 +113,8 @@ options NO_KDEBUG # no kernel tracing # options DDM_DEBUG # driverkit-style tracing # options MACH_OLD_VM_COPY # Old vm_copy technology # options NO_DIRECT_RPC # for untyped mig servers # -options IPX # ipx stack # -options EON # # -options ISO # ISO stack # -options LLC # 802.2 support # options LOOP # loopback support # options MROUTING # multicast routing # -options ROUTING # routing # options VLAN # # options BOND # # options PF # Packet Filter # @@ -135,6 +124,7 @@ options PKTSCHED_CBQ # CBQ packet scheduler # options PKTSCHED_HFSC # H-FSC packet scheduler # options PKTSCHED_PRIQ # PRIQ packet scheduler # options PKTSCHED_FAIRQ # FAIRQ packet scheduler # +options MEASURE_BW # interface bandwidth measurement # options CLASSQ_BLUE # BLUE queueing algorithm # options CLASSQ_RED # RED queueing algorithm # options CLASSQ_RIO # RIO queueing algorithm # @@ -147,15 +137,17 @@ options TRAFFIC_MGT # traffic management support # options IPFW2 # IP firewall (new version) # options MULTICAST # Internet Protocol Class-D $ options TCPDEBUG # TCP debug # -options RANDOM_IP_ID # random (not sequential) ip ids # options TCP_DROP_SYNFIN # Drop TCP packets with SYN+FIN set # options ICMP_BANDLIM # ICMP bandwidth limiting sysctl options IFNET_INPUT_SANITY_CHK # allow dlil/ifnet input sanity check # +options MULTIPATH # Multipath domain # +options MPTCP # Multipath TCP # options SYSV_SEM # SVID semaphores # options SYSV_MSG # SVID messages # options SYSV_SHM # SVID shared mem # options PSYNCH # pthread synch # options DEVELOPMENT # dev kernel # +options FLOW_DIVERT # # secure_kernel - secure kernel from user programs options SECURE_KERNEL # @@ -165,15 +157,14 @@ options OLD_SEMWAIT_SIGNAL # old semwait_signal handler # # 4.4 general kernel # -options SOCKETS # socket support # +options SOCKETS # socket support # options DIAGNOSTIC # diagnostics # options CONFIG_DTRACE # dtrace support # options GPROF # build profiling # options SENDFILE # sendfile # -options NETWORKING # networking layer # +options NETWORKING # networking layer # options CONFIG_FSE # file system events # options CONFIG_IMAGEBOOT # local image boot # -options CONFIG_SOWUPCALL # SB_UPCALL on sowwakeup # options CONFIG_MBUF_JUMBO # jumbo cluster pool # options CONFIG_FORCE_OUT_IFP # Enable IP_FORCE_OUT_IFP # options CONFIG_IFEF_NOWINDOWSCALE # Scale TCP window per driver # @@ -185,6 +176,7 @@ options CONFIG_WORKQUEUE # # options FFS # Fast Filesystem Support # options HFS # HFS/HFS+ support # +options MOCKFS # Boot from an executable # options FIFO # fifo support # options FDESC # fdesc_fs support # options DEVFS # devfs support # @@ -193,7 +185,7 @@ options HFS_COMPRESSION # hfs compression # options CONFIG_HFS_STD # hfs standard support # options CONFIG_HFS_TRIM # hfs trims unused blocks # options CONFIG_HFS_MOUNT_UNMAP #hfs trims blocks at mount # - +options CONFIG_HFS_DIRLINK #allow directory hardlink creation # # # file system features @@ -201,10 +193,10 @@ options CONFIG_HFS_MOUNT_UNMAP #hfs trims blocks at mount # options REV_ENDIAN_FS # Reverse Endian FS # options NAMEDSTREAMS # named stream vnop support # +options CONFIG_APPLEDOUBLE # apple double support # options CONFIG_VOLFS # volfs path support (legacy) # options CONFIG_IMGSRC_ACCESS # source of imageboot dmg # options CONFIG_TRIGGERS # trigger vnodes # -options CONFIG_VFS_FUNNEL # thread unsafe vfs's # options CONFIG_EXT_RESOLVER # e.g. memberd # options CONFIG_SEARCHFS # searchfs syscall support # @@ -214,12 +206,6 @@ options CONFIG_SEARCHFS # searchfs syscall support # options NFSCLIENT # Be an NFS client # options NFSSERVER # Be an NFS server # -# -# AppleTalk Support -# -options NETAT # AppleTalk support # -#options AURP_SUPPORT # AppleTalk Update Routing # - # # Machine Independent Apple Features # @@ -231,15 +217,11 @@ options OBJCTEST # Objc internal test # options KERNEL_STACK # MI kernel stack support # profile # build a profiling kernel # -# -# Point-to-Point Protocol support -# -pseudo-device ppp 2 # - # # IPv6 Support # options "INET6" # kernel IPv6 Support # +options IPV6SEND # Secure Neighbor Discovery # options IPSEC # IP security # options IPSEC_ESP # IP security # options "IPV6FIREWALL" # IPv6 Firewall Feature # @@ -252,7 +234,6 @@ pseudo-device stf 1 # options crypto # options ALLCRYPTO # -options randomipid # options ZLIB # inflate/deflate support # @@ -424,27 +405,10 @@ options CONFIG_NO_KPRINTF_STRINGS # # options CONFIG_FINE_LOCK_GROUPS # -# -# configurable kernel - general switch to say we are building for an -# embedded device -# -options CONFIG_EMBEDDED # - -# only execute signed code. Hang this off config_embedded since there's -# nothing more appropriate right now -# -options CONFIG_ENFORCE_SIGNED_CODE # - # support dynamic signing of code # options CONFIG_DYNAMIC_CODE_SIGNING # -# -# code decryption... used on embedded for app protection -# must be set in all the bsd/conf and osfmk/conf MASTER files -# -options CONFIG_CODE_DECRYPTION # - # # User Content Protection, used on embedded # @@ -473,14 +437,30 @@ options CHECK_CS_VALIDATION_BITMAP # # options VM_PRESSURE_EVENTS # +# +# Enable inheritance of importance through specially marked mach ports and for file locks +# For now debug is enabled wherever inheritance is +# +options IMPORTANCE_INHERITANCE # +options IMPORTANCE_DEBUG # + +options CONFIG_TELEMETRY # + +options CONFIG_PROC_UUID_POLICY # + +# +# In-kernel tests +# +options CONFIG_IN_KERNEL_TESTS # + # # Ethernet (ARP) # -pseudo-device ether # +pseudo-device ether # # # Network loopback device # -pseudo-device loop # +pseudo-device loop # # # UCB pseudo terminal service # @@ -513,7 +493,7 @@ pseudo-device mdevdevice 1 init mdevinit # # packet filter device # -pseudo-device bpfilter 4 init bpf_init # +pseudo-device bpfilter 4 init bpf_init # # # fsevents device diff --git a/bsd/conf/MASTER.i386 b/bsd/conf/MASTER.i386 deleted file mode 100644 index c2cae3eba..000000000 --- a/bsd/conf/MASTER.i386 +++ /dev/null @@ -1,120 +0,0 @@ -# -# Mach Operating System -# Copyright (c) 1986 Carnegie-Mellon University -# All rights reserved. The CMU software License Agreement -# specifies the terms and conditions for use and redistribution. -# -###################################################################### -# -# Master Apple configuration file (see the master machine independent -# configuration file for a description of the file format). -# -###################################################################### -# -# Apple (PSEUDO-)DEVICES (select any combination) -# ex = Excelan EXOS 202 Ethernet interface -# ip = Interphase V/SMD 3200 disk controller -# od = Canon OMD-1 Optical Disk -# rd = RAM disk -# sd = SCSI disk -# sg = Generic SCSI Device -# st = SCSI tape -# fd = Floppy Disk -# en = Integrated Ethernet controller -# dsp = DSP560001 digital signal processor -# iplmeas = ipl time measurement -# nextp = NeXT Laser Printer -# sound = sound I/O -# vol = removable volume support device -# venip = virtual Ethernet/IP network interface -# zs = Serial device -# -# MULTIPROCESSOR SUPPORT (select exactly one) -# multi = support 4 processors -# uni = supports single processor -# -# SPECIAL CHARACTERISTICS (select any combination) -# gdb = GNU kernel debugger -# posix_kern = POSIX support -# -# CPU TYPE (select exactly one) -# NeXT = FIXME -# -###################################################################### -# -# Standard Apple Research Configurations: -# -------- ----- -------- --------------- -# BASE = [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch zleaks memorystatus vm_pressure_events ] -# FILESYS = [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo config_volfs hfs_compression config_hfs_std config_hfs_alloc_rbtree config_hfs_trim config_imgsrc_access config_triggers config_vfs_funnel config_ext_resolver config_searchfs] -# NETWORKING = [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo if_bridge PF ] -# NFS = [ nfsclient nfsserver ] -# VPN = [ ipsec ] -# PF = [ pf pflog ] -# PKTSCHED = [ pktsched_cbq pktsched_fairq pktsched_hfsc pktsched_priq ] -# CLASSQ = [ classq_blue classq_red classq_rio ] -# RELEASE = [ BASE NETWORKING NFS VPN FILESYS libdriver ] -# PROFILE = [ RELEASE profile ] -# DEBUG = [ BASE NETWORKING NFS VPN FILESYS libdriver_g debug xpr_debug mach_assert ] -# -# EMBEDDED_BASE = [ intel mach bsmall vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue psynch ] -# EMBEDDED_FILESYS = [ devfs hfs journaling fdesc fifo ] -# EMBEDDED_NET = [ inet compat_oldsock tcpdrop_synfin bpfilter ] -# EMBEDDED = [ EMBEDDED_BASE EMBEDDED_NET VPN EMBEDDED_FILESYS libdriver no_printf_str no_kprintf_str no_kdebug ] -# DEVELOPMENT = [ EMBEDDED_BASE EMBEDDED_NET NFS VPN EMBEDDED_FILESYS libdriver development mach_assert config_dtrace ] -# -###################################################################### -# -machine "i386" # -cpu "i386" # - -makeoptions CCONFIGFLAGS = "-g -O3 -fno-omit-frame-pointer" # -makeoptions CCONFIGFLAGS = "-O3" # -makeoptions RELOC = "00100000" # -makeoptions SYMADDR = "00780000" # - -options GDB # GNU kernel debugger # -options DEBUG # general debugging code # -options SHOW_SPACE # print size of structures # -options EVENTMETER # event meter support # -options FP_EMUL # floating point emulation # -options UXPR # user-level XPR package # -config mach_kernel swap generic # -options EVENT # - -options NO_NESTED_PMAP # - -# -# Note: MAC/AUDIT options must be set in all the bsd/conf, osfmk/conf, and -# security/conf MASTER files. -# -options CONFIG_MACF # Mandatory Access Control Framework -options CONFIG_MACF_SOCKET_SUBSET # MAC socket subest (no labels) -#options CONFIG_MACF_SOCKET # MAC socket labels -#options CONFIG_MACF_NET # mbuf -#options CONFIG_MACF_DEBUG -#options CONFIG_MACF_MACH -options CONFIG_AUDIT # Kernel auditing - -# app-profiling i.e. pre-heating - off? -options CONFIG_APP_PROFILE=0 - -# -# code decryption... used on i386 for DSMOS -# must be set in all the bsd/conf and osfmk/conf MASTER files -# -options CONFIG_CODE_DECRYPTION - -# -# Ipl measurement system -# -pseudo-device iplmeas # - -# -# NFS measurement system -# -pseudo-device nfsmeas # - -# -# Removable Volume support -# -pseudo-device vol # diff --git a/bsd/conf/MASTER.x86_64 b/bsd/conf/MASTER.x86_64 index 64b17727c..ff85c8806 100644 --- a/bsd/conf/MASTER.x86_64 +++ b/bsd/conf/MASTER.x86_64 @@ -44,14 +44,15 @@ # # Standard Apple Research Configurations: # -------- ----- -------- --------------- -# BASE = [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit config_imageboot config_workqueue psynch zleaks memorystatus vm_pressure_events ] -# FILESYS = [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo config_volfs hfs_compression config_hfs_std config_hfs_alloc_rbtree config_hfs_trim config_imgsrc_access config_triggers config_ext_resolver config_searchfs ] -# NETWORKING = [ inet inet6 compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo if_bridge PF ] +# BASE = [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug sysv_sem sysv_msg sysv_shm audit config_imageboot config_workqueue psynch zleaks memorystatus vm_pressure_events kperf kpc importance_inheritance dynamic_codesigning config_telemetry config_proc_uuid_policy ] +# FILESYS = [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo config_volfs hfs_compression config_hfs_std config_hfs_alloc_rbtree config_hfs_trim config_imgsrc_access config_triggers config_ext_resolver config_searchfs config_hfs_dirlink config_appledouble ] +# NETWORKING = [ inet inet6 ipv6send compat_oldsock tcpdrop_synfin bpfilter ipdivert ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile bond vlan gif stf zlib ifnet_input_chk config_mbuf_jumbo if_bridge PF ] # NFS = [ nfsclient nfsserver ] -# VPN = [ ipsec ] +# VPN = [ ipsec flow_divert ] # PF = [ pf pflog ] # PKTSCHED = [ pktsched_cbq pktsched_fairq pktsched_hfsc pktsched_priq ] # CLASSQ = [ classq_blue classq_red classq_rio ] +# MULTIPATH = [ multipath mptcp ] # RELEASE = [ BASE NETWORKING NFS VPN FILESYS libdriver ] # PROFILE = [ RELEASE profile ] # DEBUG = [ BASE NETWORKING NFS VPN FILESYS libdriver_g debug xpr_debug mach_assert ] @@ -99,7 +100,8 @@ options CONFIG_AUDIT # Kernel auditing options CONFIG_APP_PROFILE=0 # kernel performance tracing -#options KPERF # +options KPERF # +options KPC # # # code decryption... used on i386 for DSMOS diff --git a/bsd/conf/Makefile b/bsd/conf/Makefile index 610e6d6c5..25a42ef5e 100644 --- a/bsd/conf/Makefile +++ b/bsd/conf/Makefile @@ -6,52 +6,34 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -SETUP_SUBDIRS = - -COMP_SUBDIRS = - -INST_SUBDIRS = - -ifndef BSD_KERNEL_CONFIG -export BSD_KERNEL_CONFIG = $(KERNEL_CONFIG) -endif - -ifneq ($(MACHINE_CONFIG), DEFAULT) -export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT) -else -export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT) -endif - -MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(ARCH_CONFIG_LC).$(MACHINE_CONFIG_LC) - -$(COMPOBJROOT)/$(BSD_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ - $(SOURCE)/MASTER.$(ARCH_CONFIG_LC) \ - $(SOURCE)/Makefile.template \ - $(SOURCE)/Makefile.$(ARCH_CONFIG_LC) \ - $(SOURCE)/files \ - $(SOURCE)/files.$(ARCH_CONFIG_LC) - $(_v)(doconf_target=$(addsuffix /conf, $(TARGET)); \ - $(MKDIR) $${doconf_target}; \ - cd $${doconf_target}; \ +MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) + +$(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ + $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC) \ + $(SOURCE)/Makefile.template \ + $(SOURCE)/Makefile.$(CURRENT_ARCH_CONFIG_LC) \ + $(SOURCE)/files \ + $(SOURCE)/files.$(CURRENT_ARCH_CONFIG_LC) + $(_v)$(MKDIR) $(addsuffix /conf, $(TARGET)); \ + cd $(addsuffix /conf, $(TARGET)); \ rm -f $(notdir $?); \ - cp $? $${doconf_target}; \ - if [ -f $(MASTER_CPU_PER_SOC) ]; then cp $(MASTER_CPU_PER_SOC) $${doconf_target}; fi; \ - $(SRCROOT)/SETUP/config/doconf -c -cpu $(ARCH_CONFIG_LC) -soc $(MACHINE_CONFIG_LC) -d $(TARGET)/$(BSD_KERNEL_CONFIG) $(BSD_KERNEL_CONFIG); \ - ); - -do_all: $(COMPOBJROOT)/$(BSD_KERNEL_CONFIG)/Makefile - $(_v)next_source=$(subst conf/,,$(SOURCE)); \ - next_relsource=$(subst conf/,,$(RELATIVE_SOURCE_PATH)); \ - ${MAKE} -C $(COMPOBJROOT)/$(BSD_KERNEL_CONFIG) \ - MAKEFILES=$(TARGET)/$(BSD_KERNEL_CONFIG)/Makefile \ - SOURCE=$${next_source} \ - RELATIVE_SOURCE_PATH=$${next_relsource} \ - TARGET=$(TARGET) \ - INCL_MAKEDEP=FALSE \ - KERNEL_CONFIG=$(BSD_KERNEL_CONFIG) \ + cp $? .; \ + if [ $(MASTER_CPU_PER_SOC) -nt $@ ]; then cp $(MASTER_CPU_PER_SOC) .; fi; \ + $(SRCROOT)/SETUP/config/doconf -c -cpu $(CURRENT_ARCH_CONFIG_LC) -soc $(CURRENT_MACHINE_CONFIG_LC) -d $(TARGET)/$(CURRENT_KERNEL_CONFIG) $(CURRENT_KERNEL_CONFIG); + +do_all: $(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile + $(_v)${MAKE} \ + -C $(TARGET)/$(CURRENT_KERNEL_CONFIG) \ + -f $(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile \ + CURRENT_KERNEL_CONFIG=${CURRENT_KERNEL_CONFIG} \ + CURRENT_ARCH_CONFIG=${CURRENT_ARCH_CONFIG} \ + CURRENT_MACHINE_CONFIG=${CURRENT_MACHINE_CONFIG} \ + SOURCE=$(subst conf/,,$(SOURCE)) \ + TARGET=${TARGET} \ + OBJPATH=${OBJPATH} \ build_all; -do_build_all: do_all +do_build_all:: do_all include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/conf/Makefile.i386 b/bsd/conf/Makefile.i386 deleted file mode 100644 index 59554731c..000000000 --- a/bsd/conf/Makefile.i386 +++ /dev/null @@ -1,20 +0,0 @@ -###################################################################### -#BEGIN Machine dependent Makefile fragment for i386 -###################################################################### - -# Files to build with certain warnings turned off -dis_tables.o_CFLAGS_ADD += -Wno-cast-qual -fbt_x86.o_CFLAGS_ADD += -Wno-cast-qual - -# sha256 Files to build with -DSHA256_USE_ASSEMBLY=1 -sha2.o_CFLAGS_ADD += -DSHA256_USE_ASSEMBLY=1 - -# Inline assembly doesn't interact well with LTO -fbt_x86.o_CFLAGS_ADD += $(CFLAGS_NOLTO_FLAG) -# Taking the address of labels doesn't work with LTO (9524055) -dtrace.o_CFLAGS_ADD += $(CFLAGS_NOLTO_FLAG) - -###################################################################### -#END Machine dependent Makefile fragment for i386 -###################################################################### - diff --git a/bsd/conf/Makefile.template b/bsd/conf/Makefile.template index 61a088bd8..5c10a5657 100644 --- a/bsd/conf/Makefile.template +++ b/bsd/conf/Makefile.template @@ -27,11 +27,6 @@ # the terms and conditions for use and redistribution. # -# -# Export IDENT for sub-makefiles -# -export IDENT - export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule @@ -43,14 +38,47 @@ include $(MakeInc_def) # # XXX: CFLAGS # -CFLAGS+= -include meta_features.h -DARCH_PRIVATE -DDRIVER_PRIVATE \ +CFLAGS+= -include meta_features.h -DDRIVER_PRIVATE \ -D_KERNEL_BUILD -DKERNEL_BUILD -DMACH_KERNEL -DBSD_BUILD \ - -DBSD_KERNEL_PRIVATE -DLP64KERN=1 -DLP64_DEBUG=0 -I. $(CFLAGS_INLINE_CONFIG) + -DBSD_KERNEL_PRIVATE -DLP64_DEBUG=0 + +# +# Directories for mig generated files +# +COMP_SUBDIRS = + +# +# Make sure we don't remove this by accident if interrupted at the wrong +# time. +# +.PRECIOUS: Makefile + +# +# Theses macros are filled in by the config program depending on the +# current configuration. The MACHDEP macro is replaced by the +# contents of the machine dependent makefile template and the others +# are replaced by the corresponding symbol definitions for the +# configuration. +# + +%OBJS + +%CFILES + +%SFILES + +%MACHDEP + +# +# Machine-independent per-file flags +# dp_backing_file.o_CFLAGS_ADD += -Wshorten-64-to-32 ubc_subr.o_CFLAGS_ADD += -Wshorten-64-to-32 vnode_pager.o_CFLAGS_ADD += -Wshorten-64-to-32 vm_unix.o_CFLAGS_ADD += -Wshorten-64-to-32 +pthread_synch.o_CFLAGS_ADD += -Wno-unused-parameter -Wno-missing-prototypes +pthread_support.o_CFLAGS_ADD += -Wno-unused-parameter -Wno-missing-prototypes # Objects that don't want -Wsign-compare OBJS_NO_SIGN_COMPARE = \ @@ -119,194 +147,101 @@ OBJS_NO_SIGN_COMPARE = \ $(foreach file,$(OBJS_NO_SIGN_COMPARE),$(eval $(call add_perfile_cflags,$(file),-Wno-sign-compare))) -# Objects that want -Wcast-align warning treated as error -OBJS_ERROR_CAST_ALIGN = \ - kern_control.o \ - mcache.o \ - sys_socket.o \ - uipc_mbuf.o \ - uipc_mbuf2.o \ - uipc_socket.o \ - uipc_socket2.o \ - uipc_syscalls.o \ - bpf.o \ - bpf_filter.o \ - bridgestp.o \ - bsd_comp.o \ - devtimer.o \ - dlil.o \ - ether_if_module.o \ - ether_inet_pr_module.o \ - ether_inet6_pr_module.o \ - flowhash.o \ - if.o \ - if_bridge.o \ - if_gif.o \ - if_llreach.o \ - if_loop.o \ - if_media.o \ - if_mib.o \ - if_pflog.o \ - if_stf.o \ - if_utun.o \ - if_utun_crypto.o \ - if_utun_crypto_ipsec.o \ - if_vlan.o \ - init.o \ - iptap.o \ - kext_net.o \ - kpi_interface.o \ - kpi_interfacefilter.o \ - kpi_protocol.o \ - kpi_protocol.o \ - ndrv.o \ - net_osdep.o \ - net_str_id.o \ - netsrc.o \ - ntstat.o \ - pf.o \ - pf_if.o \ - pf_ioctl.o \ - pf_norm.o \ - pf_osfp.o \ - pf_ruleset.o \ - pf_table.o \ - ppp_deflate.o \ - radix.o \ - raw_cb.o \ - raw_usrreq.o \ - route.o \ - rtsock.o \ - dhcp_options.o \ - altq_cbq.o \ - altq_fairq.o \ - altq_hfsc.o \ - altq_priq.o \ - altq_qfq.o \ - altq_subr.o \ - pktsched.o \ - pktsched_cbq.o \ - pktsched_fairq.o \ - pktsched_hfsc.o \ - pktsched_priq.o \ - pktsched_qfq.o \ - pktsched_rmclass.o \ - pktsched_tcq.o \ - classq.o \ - classq_blue.o \ - classq_red.o \ - classq_rio.o \ - classq_sfb.o \ - classq_subr.o \ - classq_util.o \ - igmp.o \ - in.o \ - in_arp.o \ - in_cksum.o \ - in_dhcp.o \ - in_gif.o \ - in_mcast.o \ - in_pcb.o \ - in_pcblist.o \ - in_proto.o \ - in_rmx.o \ - in_tclass.o \ - ip_divert.o \ - ip_ecn.o \ - ip_encap.o \ - ip_icmp.o \ - ip_id.o \ - ip_input.o \ - ip_mroute.o \ - ip_output.o \ - kpi_ipfilter.o \ - raw_ip.o \ - tcp_debug.o \ - tcp_input.o \ - tcp_ledbat.o \ - tcp_newreno.o \ - tcp_output.o \ - tcp_sack.o \ - tcp_subr.o \ - tcp_timer.o \ - tcp_usrreq.o \ - udp_usrreq.o \ - ah_core.o \ - ah_input.o \ - ah_output.o \ - dest6.o \ - esp_core.o \ - esp_input.o \ - esp_output.o \ - esp_rijndael.o \ - frag6.o \ - icmp6.o \ - in6.o \ - in6_cksum.o \ - in6_gif.o \ - in6_ifattach.o \ - in6_mcast.o \ - in6_pcb.o \ - in6_prefix.o \ - in6_proto.o \ - in6_rmx.o \ - in6_src.o \ - ip6_forward.o \ - ip6_id.o \ - ip6_input.o \ - ip6_mroute.o \ - ip6_output.o \ - ipcomp_core.o \ - ipcomp_input.o \ - ipcomp_output.o \ - ipsec.o \ - mld6.o \ - nd6.o \ - nd6_nbr.o \ - nd6_rtr.o \ - raw_ip6.o \ - route6.o \ - scope6.o \ - udp6_output.o \ - udp6_usrreq.o \ - key.o \ - key_debug.o \ - keydb.o \ - keysock.o - -$(foreach file,$(OBJS_ERROR_CAST_ALIGN),$(eval $(call add_perfile_cflags,$(file),-Werror=cast-align))) - -# -# Directories for mig generated files -# -COMP_SUBDIRS = - -# -# Make sure we don't remove this by accident if interrupted at the wrong -# time. -# -.PRECIOUS: Makefile - -# -# Theses macros are filled in by the config program depending on the -# current configuration. The MACHDEP macro is replaced by the -# contents of the machine dependent makefile template and the others -# are replaced by the corresponding symbol definitions for the -# configuration. -# - -%OBJS - -%CFILES - -%MFILES - -%SFILES - -%BFILES - -%ORDERED -%MACHDEP +# Objects that don't want -Wcast-align warning (8474835) +OBJS_NO_CAST_ALIGN = \ + BTree.o \ + BTreeAllocate.o \ + BTreeMiscOps.o \ + BTreeNodeOps.o \ + BTreeScanner.o \ + BTreeTreeOps.o \ + CatalogUtilities.o \ + audit_bsm_token.o \ + audit_pipe.o \ + audit_session.o \ + bsd_i386.o \ + decmpfs.o \ + dtrace.o \ + fasttrap.o \ + fasttrap_isa.o \ + fbt_arm.o \ + fbt_x86.o \ + fips_sha1.o \ + hfs_attrlist.o \ + hfs_btreeio.o \ + hfs_catalog.o \ + hfs_cnode.o \ + hfs_endian.o \ + hfs_hotfiles.o \ + hfs_link.o \ + hfs_quota.o \ + hfs_readwrite.o \ + hfs_search.o \ + hfs_vfsops.o \ + hfs_vnops.o \ + hfs_xattr.o \ + if_bond.o \ + ip6_fw.o \ + ip_dummynet.o \ + ip_fw2.o \ + kern_credential.o \ + kern_descrip.o \ + kern_event.o \ + kern_exec.o \ + kern_lockf.o \ + kern_subr.o \ + km.o \ + lockstat.o \ + mach_loader.o \ + memdev.o \ + nfs4_subs.o \ + nfs4_vnops.o \ + nfs_boot.o \ + nfs_gss.o \ + nfs_serv.o \ + nfs_socket.o \ + nfs_srvcache.o \ + nfs_subs.o \ + nfs_syscalls.o \ + nfs_vfsops.o \ + nfs_vnops.o \ + proc_info.o \ + pthread_synch.o \ + qsort.o \ + randomdev.o \ + sdt.o \ + sha1mod.o \ + shadow.o \ + spec_vnops.o \ + subr_log.o \ + subr_prof.o \ + sys_generic.o \ + sys_pipe.o \ + systemcalls.o \ + systrace.o \ + tcp_lro.o \ + tty.o \ + tty_compat.o \ + tty_ptmx.o \ + tty_pty.o \ + ubc_subr.o \ + uipc_usrreq.o \ + vfs_attrlist.o \ + vfs_fsevents.o \ + vfs_journal.o \ + vfs_lookup.o \ + vfs_subr.o \ + vfs_syscalls.o \ + vfs_utfconv.o \ + vfs_vnops.o \ + vfs_xattr.o \ + vn.o \ + munge.o \ + aes.o \ + aeskey.o \ + sdt_arm.o + +$(foreach file,$(OBJS_NO_CAST_ALIGN),$(eval $(call add_perfile_cflags,$(file),-Wno-cast-align))) # # This rule insures that the subr_prof.c does NOT get compiled with @@ -322,15 +257,12 @@ subr_prof.o_CFLAGS_RM = -pg # ${OBJS}: ${OBJSDEPS} - -%LOAD - LDOBJS = $(OBJS) -$(COMPONENT).filelist: $(LDOBJS) +$(COMPONENT).filelist: $(LDOBJS) @echo LDFILELIST $(COMPONENT) $(_v)( for obj in ${LDOBJS}; do \ - echo $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$${obj}; \ + echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \ done; ) > $(COMPONENT).filelist MAKESYSCALLS = $(SRCROOT)/bsd/kern/makesyscalls.sh @@ -347,12 +279,9 @@ audit_kevents.c: $(SRCROOT)/bsd/kern/syscalls.master $(MAKESYSCALLS) @echo "Generating $@ from $<"; $(_v)$(MAKESYSCALLS) $< audit > /dev/null -do_depend: do_all - $(_v)${MD} -u Makedep -f -d `ls *.d`; - do_all: $(COMPONENT).filelist -do_build_all: do_depend +do_build_all:: do_all %RULES diff --git a/bsd/conf/Makefile.x86_64 b/bsd/conf/Makefile.x86_64 index e45baf159..30072bf7b 100644 --- a/bsd/conf/Makefile.x86_64 +++ b/bsd/conf/Makefile.x86_64 @@ -6,9 +6,6 @@ dis_tables.o_CFLAGS_ADD += -Wno-cast-qual fbt_x86.o_CFLAGS_ADD += -Wno-cast-qual -# sha256 Files to build with -DSHA256_USE_ASSEMBLY=1 -sha2.o_CFLAGS_ADD += -DSHA256_USE_ASSEMBLY=1 - # Inline assembly doesn't interact well with LTO fbt_x86.o_CFLAGS_ADD += $(CFLAGS_NOLTO_FLAG) # Taking the address of labels doesn't work with LTO (9524055) diff --git a/bsd/conf/files b/bsd/conf/files index 118244ccc..8fa8a831a 100644 --- a/bsd/conf/files +++ b/bsd/conf/files @@ -50,11 +50,6 @@ OPTIONS/kernobjc optional kernobjc OPTIONS/kernremote optional kernremote OPTIONS/uxpr optional uxpr OPTIONS/kernel_stack optional kernel_stack -OPTIONS/norma_ipc optional norma_ipc -OPTIONS/norma_device optional norma_device -OPTIONS/norma_vm optional norma_vm -OPTIONS/norma_task optional norma_task -OPTIONS/norma_ether optional norma_ether OPTIONS/new_vm_code optional new_vm_code OPTIONS/old_vm_code optional old_vm_code OPTIONS/compat_43 optional compat_43 @@ -70,6 +65,9 @@ OPTIONS/sysv_sem optional sysv_sem OPTIONS/sysv_msg optional sysv_msg OPTIONS/sysv_shm optional sysv_shm OPTIONS/no_bsd_inlines optional no_bsd_inlines +OPTIONS/importance_inheritance optional importance_inheritance +OPTIONS/importance_debug optional importance_debug +OPTIONS/in_kernel_tests optional config_in_kernel_tests # # Network options @@ -77,28 +75,14 @@ OPTIONS/no_bsd_inlines optional no_bsd_inlines OPTIONS/networking optional networking OPTIONS/inet optional inet OPTIONS/inet6 optional inet6 +OPTIONS/ipv6send optional ipv6send OPTIONS/ether optional ether -OPTIONS/multicast optional multicast OPTIONS/mrouting optional mrouting -OPTIONS/routing optional routing -OPTIONS/eon optional eon -OPTIONS/tpip optional tpip -OPTIONS/ns optional ns -OPTIONS/iso optional iso -OPTIONS/tuba optional tuba -OPTIONS/gateway optional gateway -OPTIONS/ipx optional ipx -OPTIONS/tun optional tun OPTIONS/vlan optional vlan OPTIONS/bond optional bond OPTIONS/bpfilter optional bpfilter -OPTIONS/sl optional sl -OPTIONS/ppp optional ppp -OPTIONS/sppp optional sppp -OPTIONS/ppp_deflate optional ppp_deflate zlib -OPTIONS/disc optional disc -OPTIONS/fddi optional fddi - +OPTIONS/multipath optional multipath +OPTIONS/mptcp optional mptcp OPTIONS/ipdivert optional ipdivert OPTIONS/dummynet optional dummynet OPTIONS/ipfw2 optional ipfw2 @@ -108,9 +92,7 @@ OPTIONS/tcpdebug optional tcpdebug OPTIONS/if_bridge optional if_bridge OPTIONS/bridgestp optional bridgestp if_bridge OPTIONS/gif optional gif -OPTIONS/netat optional netat OPTIONS/sendfile optional sendfile -OPTIONS/randomipid optional randomipid OPTIONS/pf optional pf OPTIONS/pflog optional pflog pf OPTIONS/pf_altq optional pf_altq pf @@ -142,6 +124,45 @@ OPTIONS/hfs_compression optional hfs_compression OPTIONS/config_imageboot optional config_imageboot +bsd/nfs/nfs4_subs.c optional nfsclient +bsd/nfs/nfs4_vnops.c optional nfsclient +bsd/nfs/krpc_subr.c optional nfsclient +bsd/nfs/nfs_bio.c optional nfsclient +bsd/nfs/nfs_boot.c optional nfsclient +bsd/nfs/nfs_gss.c optional nfsclient +bsd/nfs/nfs_gss.c optional nfsserver +bsd/nfs/nfs_gss_crypto.c optional nfsclient +bsd/nfs/nfs_gss_crypto.c optional nfsserver +bsd/nfs/nfs_lock.c optional nfsclient +bsd/nfs/nfs_node.c optional nfsclient +bsd/nfs/nfs_serv.c optional nfsserver +bsd/nfs/nfs_socket.c optional nfsclient +bsd/nfs/nfs_socket.c optional nfsserver +bsd/nfs/nfs_srvcache.c optional nfsserver +bsd/nfs/nfs_subs.c optional nfsclient +bsd/nfs/nfs_subs.c optional nfsserver +bsd/nfs/nfs_syscalls.c optional nfsclient +bsd/nfs/nfs_syscalls.c optional nfsserver +bsd/nfs/nfs_vfsops.c optional nfsclient +bsd/nfs/nfs_vnops.c optional nfsclient +bsd/nfs/nfs_upcall.c optional nfsserver + +bsd/kern/netboot.c optional nfsclient + +bsd/dev/dtrace/dtrace.c optional config_dtrace +bsd/dev/dtrace/lockstat.c optional config_dtrace +bsd/dev/dtrace/dtrace_ptss.c optional config_dtrace +bsd/dev/dtrace/dtrace_subr.c optional config_dtrace +bsd/dev/dtrace/dtrace_glue.c standard +bsd/dev/dtrace/dtrace_alloc.c optional config_dtrace +bsd/dev/dtrace/blist.c optional config_dtrace +bsd/dev/dtrace/fbt.c optional config_dtrace +bsd/dev/dtrace/sdt.c optional config_dtrace +bsd/dev/dtrace/sdt_subr.c optional config_dtrace +bsd/dev/dtrace/systrace.c optional config_dtrace +bsd/dev/dtrace/profile_prvd.c optional config_dtrace +bsd/dev/dtrace/fasttrap.c optional config_dtrace + bsd/dev/random/randomdev.c standard bsd/dev/random/fips_sha1.c standard bsd/dev/random/YarrowCoreLib/port/smf.c standard @@ -151,7 +172,7 @@ bsd/dev/random/YarrowCoreLib/src/sha1mod.c standard bsd/dev/random/YarrowCoreLib/src/yarrowUtils.c standard bsd/dev/memdev.c standard - +bsd/dev/munge.c standard bsd/dev/unix_startup.c standard bsd/dev/vn/vn.c optional vndevice @@ -162,8 +183,8 @@ bsd/libkern/random.c standard bsd/libkern/scanc.c standard bsd/libkern/skpc.c standard bsd/libkern/strsep.c standard -bsd/libkern/inet_ntop.c standard bsd/libkern/bcd.c standard +bsd/libkern/memchr.c standard bsd/vfs/vfs_attrlist.c standard bsd/vfs/vfs_bio.c standard @@ -194,30 +215,30 @@ bsd/miscfs/devfs/devfs_tree.c optional devfs bsd/miscfs/devfs/devfs_vnops.c optional devfs bsd/miscfs/devfs/devfs_vfsops.c optional devfs +bsd/miscfs/mockfs/mockfs_fsnode.c optional mockfs +bsd/miscfs/mockfs/mockfs_vfsops.c optional mockfs +bsd/miscfs/mockfs/mockfs_vnops.c optional mockfs + bsd/kern/decmpfs.c standard +bsd/net/net_stubs.c standard bsd/net/bpf.c optional bpfilter bsd/net/bpf_filter.c optional bpfilter bsd/net/if_bridge.c optional if_bridge bsd/net/bridgestp.c optional bridgestp -bsd/net/bsd_comp.c optional ppp_bsdcomp bsd/net/if.c optional networking bsd/net/init.c optional sockets bsd/net/dlil.c optional networking bsd/net/ether_if_module.c optional ether -bsd/net/ether_at_pr_module.c optional ether netat bsd/net/ether_inet_pr_module.c optional ether inet bsd/net/ether_inet6_pr_module.c optional ether inet6 bsd/net/if_loop.c optional loop bsd/net/if_mib.c optional networking -bsd/net/if_sl.c optional sl -bsd/net/if_tun.c optional tun bsd/net/if_vlan.c optional vlan bsd/net/multicast_list.c optional networking bsd/net/if_bond.c optional bond bsd/net/devtimer.c optional bond bsd/net/ndrv.c optional networking -bsd/net/ppp_deflate.c optional ppp_deflate bsd/net/radix.c optional networking bsd/net/raw_cb.c optional networking bsd/net/raw_usrreq.c optional networking @@ -225,18 +246,17 @@ bsd/net/route.c optional networking bsd/net/rtsock.c optional networking bsd/net/netsrc.c optional networking bsd/net/ntstat.c optional networking -bsd/net/slcompress.c optional ppp -bsd/net/slcompress.c optional sl bsd/net/if_gif.c optional gif bsd/net/if_stf.c optional stf -bsd/net/net_osdep.c optional sockets bsd/net/kpi_interface.c optional networking bsd/net/kpi_protocol.c optional networking bsd/net/kpi_interfacefilter.c optional networking bsd/net/net_str_id.c optional networking bsd/net/if_utun.c optional networking bsd/net/if_utun_crypto.c optional networking +bsd/net/if_utun_crypto_dtls.c optional networking bsd/net/if_utun_crypto_ipsec.c optional networking +bsd/net/if_ipsec.c optional ipsec bsd/net/if_pflog.c optional pflog bsd/net/pf.c optional pf bsd/net/pf_if.c optional pf @@ -246,8 +266,10 @@ bsd/net/pf_osfp.c optional pf bsd/net/pf_ruleset.c optional pf bsd/net/pf_table.c optional pf bsd/net/iptap.c optional networking +bsd/net/pktap.c optional networking bsd/net/if_llreach.c optional networking bsd/net/flowhash.c optional networking +bsd/net/flowadv.c optional networking bsd/net/classq/classq.c optional networking bsd/net/classq/classq_blue.c optional classq_blue @@ -289,7 +311,7 @@ bsd/netinet/ip_dummynet.c optional dummynet bsd/netinet/ip_fw2.c optional ipfw2 bsd/netinet/ip_fw2_compat.c optional ipfw2 bsd/netinet/ip_icmp.c optional inet -bsd/netinet/ip_id.c optional randomipid inet +bsd/netinet/ip_id.c optional inet bsd/netinet/ip_input.c optional inet bsd/netinet/ip_mroute.c optional mrouting bsd/netinet/ip_output.c optional inet @@ -309,6 +331,14 @@ bsd/netinet/in_gif.c optional gif inet bsd/netinet/ip_ecn.c optional inet bsd/netinet/ip_encap.c optional inet bsd/netinet/kpi_ipfilter.c optional inet +bsd/netinet/flow_divert.c optional flow_divert +bsd/netinet/mp_proto.c optional multipath +bsd/netinet/mp_pcb.c optional multipath +bsd/netinet/mptcp.c optional mptcp +bsd/netinet/mptcp_subr.c optional mptcp +bsd/netinet/mptcp_usrreq.c optional mptcp +bsd/netinet/mptcp_opt.c optional mptcp +bsd/netinet/mptcp_timer.c optional mptcp bsd/netinet6/ah_core.c optional ipsec bsd/netinet6/ah_input.c optional ipsec bsd/netinet6/ah_output.c optional ipsec @@ -321,6 +351,7 @@ bsd/netinet6/dest6.c optional inet6 bsd/netinet6/frag6.c optional inet6 bsd/netinet6/icmp6.c optional inet6 bsd/netinet6/in6.c optional inet6 +bsd/netinet6/in6_cga.c optional inet6 ipv6send bsd/netinet6/in6_cksum.c optional inet6 bsd/netinet6/in6_gif.c optional gif inet6 bsd/netinet6/ip6_fw.c optional inet6 ipfw2 @@ -342,6 +373,7 @@ bsd/netinet6/nd6.c optional inet6 bsd/netinet6/nd6_nbr.c optional inet6 bsd/netinet6/nd6_prproxy.c optional inet6 bsd/netinet6/nd6_rtr.c optional inet6 +bsd/netinet6/nd6_send.c optional inet6 ipv6send bsd/netinet6/raw_ip6.c optional inet6 bsd/netinet6/route6.c optional inet6 bsd/netinet6/scope6.c optional inet6 @@ -370,98 +402,14 @@ bsd/crypto/rc4/rc4.c optional crypto #bsd/netpm/pm_route.c optional pm #bsd/netpm/pm_usrreq.c optional pm - -bsd/netat/at.c optional netat -bsd/netat/at_proto.c optional netat -bsd/netat/at_pcb.c optional netat -bsd/netat/ddp_usrreq.c optional netat -bsd/netat/atp_alloc.c optional netat -bsd/netat/atp_misc.c optional netat -bsd/netat/atp_open.c optional netat -bsd/netat/atp_read.c optional netat -bsd/netat/atp_write.c optional netat -bsd/netat/asp_proto.c optional netat -bsd/netat/ddp_aep.c optional netat -bsd/netat/ddp.c optional netat -bsd/netat/ddp_brt.c optional netat -bsd/netat/ddp_proto.c optional netat -bsd/netat/ddp_nbp.c optional netat -bsd/netat/ddp_rtmp.c optional netat -bsd/netat/ddp_sip.c optional netat -bsd/netat/ddp_rtmptable.c optional netat -bsd/netat/ddp_r_rtmp.c optional netat -bsd/netat/ddp_r_zip.c optional netat -bsd/netat/ddp_aarp.c optional netat -bsd/netat/ddp_lap.c optional netat -bsd/netat/adsp_CLDeny.c optional netat -bsd/netat/adsp_Read.c optional netat -bsd/netat/adsp_Timer.c optional netat -bsd/netat/adsp_attention.c optional netat -bsd/netat/adsp_CLListen.c optional netat -bsd/netat/adsp_NewCID.c optional netat -bsd/netat/adsp_TimerElem.c optional netat -bsd/netat/adsp_reset.c optional netat -bsd/netat/adsp_Close.c optional netat -bsd/netat/adsp_Open.c optional netat -bsd/netat/adsp_RxAttn.c optional netat -bsd/netat/adsp_Write.c optional netat -bsd/netat/adsp_Control.c optional netat -bsd/netat/adsp_RxData.c optional netat -bsd/netat/adsp.c optional netat -bsd/netat/adsp_Init.c optional netat -bsd/netat/adsp_Options.c optional netat -bsd/netat/adsp_stream.c optional netat -bsd/netat/adsp_InitGlobals.c optional netat -bsd/netat/adsp_Packet.c optional netat -bsd/netat/adsp_Status.c optional netat -bsd/netat/adsp_misc.c optional netat -bsd/netat/sys_glue.c optional netat -bsd/netat/sys_dep.c optional netat -bsd/netat/drv_dep.c optional netat -bsd/netat/aurp_aurpd.c optional netat -bsd/netat/aurp_cfg.c optional netat -bsd/netat/aurp_gdata.c optional netat -bsd/netat/aurp_misc.c optional netat -bsd/netat/aurp_open.c optional netat -bsd/netat/aurp_rd.c optional netat -bsd/netat/aurp_ri.c optional netat -bsd/netat/aurp_rx.c optional netat -bsd/netat/aurp_tickle.c optional netat -bsd/netat/aurp_tx.c optional netat -bsd/netat/aurp_zi.c optional netat - -bsd/nfs/krpc_subr.c optional nfsclient -bsd/nfs/nfs_bio.c optional nfsclient -bsd/nfs/nfs_boot.c optional nfsclient -bsd/nfs/nfs_gss.c optional nfsclient -bsd/nfs/nfs_gss.c optional nfsserver -bsd/nfs/nfs_gss_crypto.c optional nfsclient -bsd/nfs/nfs_gss_crypto.c optional nfsserver -bsd/nfs/nfs_lock.c optional nfsclient -bsd/nfs/nfs_node.c optional nfsclient -bsd/nfs/nfs_serv.c optional nfsserver -bsd/nfs/nfs_socket.c optional nfsclient -bsd/nfs/nfs_socket.c optional nfsserver -bsd/nfs/nfs_srvcache.c optional nfsserver -bsd/nfs/nfs_subs.c optional nfsclient -bsd/nfs/nfs_subs.c optional nfsserver -bsd/nfs/nfs_syscalls.c optional nfsclient -bsd/nfs/nfs_syscalls.c optional nfsserver -bsd/nfs/nfs_vfsops.c optional nfsclient -bsd/nfs/nfs_vnops.c optional nfsclient -bsd/nfs/nfs4_subs.c optional nfsclient -bsd/nfs/nfs4_vnops.c optional nfsclient -bsd/nfs/nfs_upcall.c optional nfsserver - -bsd/kern/netboot.c optional nfsclient - +#Some hfs files are standard due to exported KPI if HFS is not enabled bsd/hfs/hfs_attrlist.c optional hfs bsd/hfs/hfs_btreeio.c optional hfs bsd/hfs/hfs_catalog.c optional hfs bsd/hfs/hfs_chash.c optional hfs bsd/hfs/hfs_cnode.c optional hfs -bsd/hfs/hfs_encodinghint.c optional hfs -bsd/hfs/hfs_encodings.c optional hfs +bsd/hfs/hfs_encodinghint.c standard +bsd/hfs/hfs_encodings.c standard bsd/hfs/hfs_endian.c optional hfs bsd/hfs/hfs_hotfiles.c optional hfs bsd/hfs/hfs_link.c optional hfs @@ -475,7 +423,7 @@ bsd/hfs/hfs_vfsutils.c optional hfs bsd/hfs/hfs_vnops.c optional hfs bsd/hfs/hfs_xattr.c optional hfs bsd/hfs/MacOSStubs.c optional hfs -bsd/hfs/hfs_cprotect.c optional hfs +bsd/hfs/hfs_cprotect.c standard bsd/hfs/rangelist.c optional hfs bsd/hfs/hfscommon/BTree/BTree.c optional hfs bsd/hfs/hfscommon/BTree/BTreeAllocate.c optional hfs @@ -489,8 +437,7 @@ bsd/hfs/hfscommon/Catalog/FileIDsServices.c optional hfs bsd/hfs/hfscommon/Misc/BTreeWrapper.c optional hfs bsd/hfs/hfscommon/Misc/FileExtentMapping.c optional hfs bsd/hfs/hfscommon/Misc/VolumeAllocation.c optional hfs -bsd/hfs/hfscommon/Misc/HybridAllocator.c optional hfs -bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c optional hfs +bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c standard bsd/security/audit/audit.c optional config_audit bsd/security/audit/audit_arg.c optional config_audit @@ -518,14 +465,17 @@ bsd/kern/kern_authorization.c standard bsd/kern/kern_clock.c standard bsd/kern/kern_core.c standard bsd/kern/kern_credential.c standard +bsd/kern/kern_cs.c standard bsd/kern/kern_symfile.c standard bsd/kern/kern_descrip.c standard +bsd/kern/kern_guarded.c standard bsd/kern/kern_event.c standard bsd/kern/kern_control.c optional networking bsd/kern/kern_exec.c standard bsd/kern/kern_exit.c standard bsd/kern/kern_lockf.c standard bsd/kern/kern_fork.c standard +bsd/kern/kern_asl.c standard bsd/kern/kern_malloc.c standard bsd/kern/kern_mman.c standard bsd/kern/kern_physio.c standard @@ -585,16 +535,17 @@ bsd/kern/posix_shm.c standard bsd/kern/qsort.c standard bsd/kern/kpi_socket.c optional sockets bsd/kern/kpi_socketfilter.c optional sockets -bsd/kern/pthread_support.c optional psynch -bsd/kern/pthread_synch.c standard +bsd/kern/pthread_shims.c standard bsd/kern/proc_info.c standard bsd/kern/process_policy.c standard +bsd/kern/kern_overrides.c standard bsd/kern/vm_pressure.c optional vm_pressure_events bsd/kern/socket_info.c optional sockets bsd/vm/vnode_pager.c standard bsd/vm/vm_unix.c standard bsd/vm/dp_backing_file.c standard +bsd/vm/vm_compressor_backing_file.c standard bsd/uxkern/ux_exception.c standard @@ -604,23 +555,12 @@ bsd/conf/param.c standard bsd/dev/chud/chud_bsd_callback.c standard bsd/dev/chud/chud_process.c standard +bsd/kern/imageboot.c optional config_imageboot +osfmk/kperf/kperfbsd.c optional kperf +bsd/kern/kern_kpc.c optional kpc -bsd/dev/dtrace/dtrace.c optional config_dtrace -bsd/dev/dtrace/lockstat.c optional config_dtrace -bsd/dev/dtrace/dtrace_ptss.c optional config_dtrace -bsd/dev/dtrace/dtrace_subr.c optional config_dtrace -bsd/dev/dtrace/dtrace_glue.c standard -bsd/dev/dtrace/dtrace_alloc.c optional config_dtrace -bsd/dev/dtrace/blist.c optional config_dtrace -bsd/dev/dtrace/fbt.c optional config_dtrace -bsd/dev/dtrace/sdt.c optional config_dtrace -bsd/dev/dtrace/sdt_subr.c optional config_dtrace -bsd/dev/dtrace/systrace.c optional config_dtrace -bsd/dev/dtrace/profile_prvd.c optional config_dtrace -bsd/dev/dtrace/fasttrap.c optional config_dtrace - -bsd/kern/imageboot.c optional config_imageboot +bsd/kern/kern_tests.c optional config_in_kernel_tests -osfmk/kperf/kperfbsd.c optional kperf +bsd/kern/proc_uuid_policy.c optional config_proc_uuid_policy diff --git a/bsd/conf/files.i386 b/bsd/conf/files.i386 deleted file mode 100644 index 5c8852f6c..000000000 --- a/bsd/conf/files.i386 +++ /dev/null @@ -1,35 +0,0 @@ -OPTIONS/show_space optional show_space -OPTIONS/gdb optional gdb -OPTIONS/iplmeas optional iplmeas - - -bsd/dev/i386/conf.c standard -bsd/dev/i386/cons.c standard -bsd/dev/i386/mem.c standard -bsd/dev/i386/km.c standard -bsd/dev/i386/kern_machdep.c standard -bsd/dev/i386/memmove.c standard -bsd/dev/i386/stubs.c standard -bsd/dev/i386/systemcalls.c standard -bsd/dev/i386/sysctl.c standard -bsd/dev/i386/unix_signal.c standard -bsd/dev/i386/munge.s standard - - -# Lightly ifdef'd to support K64 DTrace -bsd/dev/i386/dtrace_isa.c optional config_dtrace -bsd/dev/i386/dtrace_subr_x86.c optional config_dtrace -bsd/dev/i386/fbt_x86.c optional config_dtrace -bsd/dev/i386/sdt_x86.c optional config_dtrace -bsd/dev/i386/fasttrap_isa.c optional config_dtrace -bsd/dev/i386/instr_size.c optional config_dtrace -bsd/dev/i386/dis_tables.c optional config_dtrace - -# Support for identifying MACF calouts with locks held -bsd/kern/policy_check.c optional config_macf - -bsd/kern/bsd_stubs.c standard -bsd/netinet/in_cksum.c optional inet - - - diff --git a/bsd/conf/files.x86_64 b/bsd/conf/files.x86_64 index ed63a4a2f..d9bbd2736 100644 --- a/bsd/conf/files.x86_64 +++ b/bsd/conf/files.x86_64 @@ -13,7 +13,6 @@ bsd/dev/i386/stubs.c standard bsd/dev/i386/systemcalls.c standard bsd/dev/i386/sysctl.c standard bsd/dev/i386/unix_signal.c standard -bsd/dev/x86_64/munge.s standard # Lightly ifdef'd to support K64 DTrace @@ -29,5 +28,6 @@ bsd/dev/i386/dis_tables.c optional config_dtrace bsd/kern/policy_check.c optional config_macf bsd/kern/bsd_stubs.c standard +bsd/netinet/cpu_in_cksum.c standard bsd/netinet/in_cksum.c optional inet diff --git a/bsd/conf/param.c b/bsd/conf/param.c index 95c01ffb5..351fc8cc0 100644 --- a/bsd/conf/param.c +++ b/bsd/conf/param.c @@ -121,5 +121,4 @@ struct cblock *cfree; struct cblock *cfreelist = NULL; int cfreecount = 0; struct buf *buf_headers; -struct domain *domains; - +struct domains_head domains = TAILQ_HEAD_INITIALIZER(domains); diff --git a/bsd/crypto/Makefile b/bsd/crypto/Makefile index 109c4c4cb..01b1da8b2 100644 --- a/bsd/crypto/Makefile +++ b/bsd/crypto/Makefile @@ -10,21 +10,7 @@ include $(MakeInc_def) INSTINC_SUBDIRS = \ blowfish \ cast128 \ - rc4 \ - -INSTINC_SUBDIRS_I386 = \ - -INSTINC_SUBDIRS_X86_64 = \ - -INSTINC_SUBDIRS_ARM = \ - -EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} - -EXPINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS_X86_64 = \ - -EXPINC_SUBDIRS_ARM = \ + rc4 \ PRIVATE_DATAFILES = \ sha1.h \ diff --git a/bsd/crypto/blowfish/Makefile b/bsd/crypto/blowfish/Makefile index 26126163a..6a7a74803 100644 --- a/bsd/crypto/blowfish/Makefile +++ b/bsd/crypto/blowfish/Makefile @@ -7,18 +7,6 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -INSTINC_SUBDIRS_X86_64 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS_X86_64 = \ - PRIVATE_DATAFILES = \ blowfish.h diff --git a/bsd/crypto/cast128/Makefile b/bsd/crypto/cast128/Makefile index 100921729..51439fb0b 100644 --- a/bsd/crypto/cast128/Makefile +++ b/bsd/crypto/cast128/Makefile @@ -7,18 +7,6 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -INSTINC_SUBDIRS_X86_64 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS_X86_64 = \ - PRIVATE_DATAFILES = \ cast128.h diff --git a/bsd/crypto/rc4/Makefile b/bsd/crypto/rc4/Makefile index 9aad66e3a..dc49732bc 100644 --- a/bsd/crypto/rc4/Makefile +++ b/bsd/crypto/rc4/Makefile @@ -7,22 +7,6 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -INSTINC_SUBDIRS_X86_64 = \ - -INSTINC_SUBDIRS_ARM = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS_X86_64 = \ - -EXPINC_SUBDIRS_ARM = \ - PRIVATE_DATAFILES = \ rc4.h diff --git a/bsd/dev/Makefile b/bsd/dev/Makefile index 01f00592f..2f15fce2a 100644 --- a/bsd/dev/Makefile +++ b/bsd/dev/Makefile @@ -7,26 +7,9 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -INSTINC_SUBDIRS_X86_64 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS_X86_64 = \ - -INSTALL_MI_LIST = - -INSTALL_MI_DIR = dev - -EXPORT_MI_LIST = - -EXPORT_MI_DIR = dev +EXPINC_SUBDIRS = random +INST_SUBDIRS = dtrace include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/dev/chud/chud_bsd_callback.c b/bsd/dev/chud/chud_bsd_callback.c index a28bebf46..6519b06a5 100644 --- a/bsd/dev/chud/chud_bsd_callback.c +++ b/bsd/dev/chud/chud_bsd_callback.c @@ -51,6 +51,7 @@ kern_return_t chudxnu_kdebug_callback_enter(chudxnu_kdebug_callback_func_t); kern_return_t chudxnu_kdebug_callback_cancel(void); extern void kdbg_control_chud(int val, void *fn); +extern void kperf_kdebug_callback(uint32_t debugid); static void chud_null_kdebug(uint32_t debugid __unused, uintptr_t arg0 __unused, uintptr_t arg1 __unused, uintptr_t arg2 __unused, uintptr_t arg3 __unused, @@ -68,6 +69,11 @@ chudxnu_private_kdebug_callback( uintptr_t arg4) { chudxnu_kdebug_callback_func_t fn = kdebug_callback_fn; + +#if KPERF + /* call out to kperf first */ + kperf_kdebug_callback(debugid); +#endif if(fn) { (fn)(debugid, arg0, arg1, arg2, arg3, arg4); diff --git a/bsd/dev/vn/Makefile b/bsd/dev/dtrace/Makefile similarity index 52% rename from bsd/dev/vn/Makefile rename to bsd/dev/dtrace/Makefile index b4e415a16..f0d9c4f10 100644 --- a/bsd/dev/vn/Makefile +++ b/bsd/dev/dtrace/Makefile @@ -7,29 +7,7 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -INSTINC_SUBDIRS_X86_64 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS_X86_64 = \ - -DATAFILES = \ - vnioctl.h - -INSTALL_MI_LIST = ${DATAFILES} - -INSTALL_MI_DIR = sys - -EXPORT_MI_LIST = ${DATAFILES} - -EXPORT_MI_DIR = sys - +INST_SUBDIRS = scripts include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/dev/dtrace/dtrace.c b/bsd/dev/dtrace/dtrace.c index 1cf6fff33..1a3196990 100644 --- a/bsd/dev/dtrace/dtrace.c +++ b/bsd/dev/dtrace/dtrace.c @@ -19,6 +19,10 @@ * CDDL HEADER END */ +/* + * Portions copyright (c) 2011, Joyent, Inc. All rights reserved. + */ + /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -179,8 +183,8 @@ dtrace_optval_t dtrace_helper_actions_max = 32; dtrace_optval_t dtrace_helper_providers_max = 64; dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024); size_t dtrace_strsize_default = 256; -dtrace_optval_t dtrace_cleanrate_default = 9900990; /* 101 hz */ -dtrace_optval_t dtrace_cleanrate_min = 200000; /* 5000 hz */ +dtrace_optval_t dtrace_cleanrate_default = 990099000; /* 1.1 hz */ +dtrace_optval_t dtrace_cleanrate_min = 20000000; /* 50 hz */ dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */ dtrace_optval_t dtrace_aggrate_default = NANOSEC; /* 1 hz */ dtrace_optval_t dtrace_statusrate_default = NANOSEC; /* 1 hz */ @@ -211,7 +215,7 @@ hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC; * it is used by some translators as an implementation detail. */ const char dtrace_zero[256] = { 0 }; /* zero-filled memory */ - +unsigned int dtrace_max_cpus = 0; /* number of enabled cpus */ /* * DTrace Internal Variables */ @@ -450,7 +454,7 @@ static lck_mtx_t dtrace_errlock; (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ } #else -#if defined(__x86_64__) +#if defined (__x86_64__) /* FIXME: two function calls!! */ #define DTRACE_TLS_THRKEY(where) { \ uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \ @@ -460,15 +464,7 @@ static lck_mtx_t dtrace_errlock; (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ } #else -/* FIXME: three function calls!!! */ -#define DTRACE_TLS_THRKEY(where) { \ - uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \ - uint64_t thr = (uintptr_t)current_thread(); \ - uint_t pid = (uint_t)proc_selfpid(); \ - ASSERT(intr < (1 << 3)); \ - (where) = (((thr << 32 | pid) + DIF_VARIABLE_MAX) & \ - (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ -} +#error Unknown architecture #endif #endif /* __APPLE__ */ @@ -482,25 +478,13 @@ static lck_mtx_t dtrace_errlock; #define DTRACE_STORE(type, tomax, offset, what) \ *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what); -#if !defined(__APPLE__) -#ifndef __i386 -#define DTRACE_ALIGNCHECK(addr, size, flags) \ - if (addr & (size - 1)) { \ - *flags |= CPU_DTRACE_BADALIGN; \ - cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ - return (0); \ - } -#else -#define DTRACE_ALIGNCHECK(addr, size, flags) -#endif -#else /* __APPLE__ */ + #define DTRACE_ALIGNCHECK(addr, size, flags) \ if (addr & (MIN(size,4) - 1)) { \ *flags |= CPU_DTRACE_BADALIGN; \ cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ return (0); \ } -#endif /* __APPLE__ */ /* * Test whether a range of memory starting at testaddr of size testsz falls @@ -564,7 +548,7 @@ dtrace_load##bits(uintptr_t addr) \ #else /* __APPLE__ */ #define RECOVER_LABEL(bits) dtraceLoadRecover##bits: -#if (defined(__i386__) || defined (__x86_64__)) +#if defined (__x86_64__) #define DTRACE_LOADFUNC(bits) \ /*CSTYLED*/ \ uint##bits##_t dtrace_load##bits(uintptr_t addr); \ @@ -615,50 +599,7 @@ dtrace_load##bits(uintptr_t addr) \ return (rval); \ } #else /* all other architectures */ -#define DTRACE_LOADFUNC(bits) \ -/*CSTYLED*/ \ -uint##bits##_t dtrace_load##bits(uintptr_t addr); \ - \ -uint##bits##_t \ -dtrace_load##bits(uintptr_t addr) \ -{ \ - size_t size = bits / NBBY; \ - /*CSTYLED*/ \ - uint##bits##_t rval = 0; \ - int i; \ - volatile uint16_t *flags = (volatile uint16_t *) \ - &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \ - \ - DTRACE_ALIGNCHECK(addr, size, flags); \ - \ - for (i = 0; i < dtrace_toxranges; i++) { \ - if (addr >= dtrace_toxrange[i].dtt_limit) \ - continue; \ - \ - if (addr + size <= dtrace_toxrange[i].dtt_base) \ - continue; \ - \ - /* \ - * This address falls within a toxic region; return 0. \ - */ \ - *flags |= CPU_DTRACE_BADADDR; \ - cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ - return (0); \ - } \ - \ - { \ - volatile vm_offset_t recover = (vm_offset_t)&&dtraceLoadRecover##bits; \ - *flags |= CPU_DTRACE_NOFAULT; \ - recover = dtrace_set_thread_recover(current_thread(), recover); \ - /*CSTYLED*/ \ - rval = *((volatile uint##bits##_t *)addr); \ - RECOVER_LABEL(bits); \ - (void)dtrace_set_thread_recover(current_thread(), recover); \ - *flags &= ~CPU_DTRACE_NOFAULT; \ - } \ - \ - return (rval); \ -} +#error Unknown Architecture #endif #endif /* __APPLE__ */ @@ -2123,6 +2064,74 @@ dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr) lquanta[levels + 1] += incr; } +static int +dtrace_aggregate_llquantize_bucket(int16_t factor, int16_t low, int16_t high, + int16_t nsteps, int64_t value) +{ + int64_t this = 1, last, next; + int base = 1, order; + + for (order = 0; order < low; ++order) + this *= factor; + + /* + * If our value is less than our factor taken to the power of the + * low order of magnitude, it goes into the zeroth bucket. + */ + if (value < this) + return 0; + else + last = this; + + for (this *= factor; order <= high; ++order) { + int nbuckets = this > nsteps ? nsteps : this; + + /* + * We should not generally get log/linear quantizations + * with a high magnitude that allows 64-bits to + * overflow, but we nonetheless protect against this + * by explicitly checking for overflow, and clamping + * our value accordingly. + */ + next = this * factor; + if (next < this) { + value = this - 1; + } + + /* + * If our value lies within this order of magnitude, + * determine its position by taking the offset within + * the order of magnitude, dividing by the bucket + * width, and adding to our (accumulated) base. + */ + if (value < this) { + return (base + (value - last) / (this / nbuckets)); + } + + base += nbuckets - (nbuckets / factor); + last = this; + this = next; + } + + /* + * Our value is greater than or equal to our factor taken to the + * power of one plus the high magnitude -- return the top bucket. + */ + return base; +} + +static void +dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr) +{ + uint64_t arg = *llquanta++; + uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg); + uint16_t low = DTRACE_LLQUANTIZE_LOW(arg); + uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg); + uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEPS(arg); + + llquanta[dtrace_aggregate_llquantize_bucket(factor, low, high, nsteps, nval)] += incr; +} + /*ARGSUSED*/ static void dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg) @@ -3254,7 +3263,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, /* Anchored probe that fires while on an interrupt accrues to process 0 */ return 0; - return ((uint64_t)proc_selfpid()); + return ((uint64_t)dtrace_proc_selfpid()); #endif /* __APPLE__ */ #if !defined(__APPLE__) @@ -3286,7 +3295,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) return (0); - return ((uint64_t)proc_selfppid()); + return ((uint64_t)dtrace_proc_selfppid()); #endif /* __APPLE__ */ #if !defined(__APPLE__) @@ -3382,11 +3391,27 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, #else case DIF_VAR_ZONENAME: + { + /* scratch_size is equal to length('global') + 1 for the null-terminator. */ + char *zname = (char *)mstate->dtms_scratch_ptr; + size_t scratch_size = 6 + 1; + if (!dtrace_priv_proc(state)) return (0); - - /* FIXME: return e.g. "global" allocated from scratch a la execname. */ - return ((uint64_t)(uintptr_t)NULL); /* Darwin doesn't do "zones" */ + + /* The scratch allocation's lifetime is that of the clause. */ + if (!DTRACE_INSCRATCH(mstate, scratch_size)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); + return 0; + } + + mstate->dtms_scratch_ptr += scratch_size; + + /* The kernel does not provide zonename, it will always return 'global'. */ + strlcpy(zname, "global", scratch_size); + + return ((uint64_t)(uintptr_t)zname); + } #endif /* __APPLE__ */ #if !defined(__APPLE__) @@ -3412,7 +3437,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return ((uint64_t)curthread->t_procp->p_cred->cr_uid); #else case DIF_VAR_UID: - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc_relaxed(state)) return (0); /* @@ -3421,14 +3446,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) return (0); - if (dtrace_CRED() != NULL) - /* Credential does not require lazy initialization. */ - return ((uint64_t)kauth_getuid()); - else { - /* proc_lock would be taken under kauth_cred_proc_ref() in kauth_cred_get(). */ - DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); - return -1ULL; - } + return ((uint64_t) dtrace_proc_selfruid()); #endif /* __APPLE__ */ #if !defined(__APPLE__) @@ -6984,7 +7002,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, continue; DTRACE_STORE(uint64_t, tomax, - valoffs, (uint64_t)proc_selfpid()); + valoffs, (uint64_t)dtrace_proc_selfpid()); DTRACE_STORE(uint64_t, tomax, valoffs + sizeof (uint64_t), val); @@ -8160,16 +8178,7 @@ dtrace_unregister(dtrace_provider_id_t id) /* * Attempt to destroy the probes associated with this provider. */ - for (i = 0; i < dtrace_nprobes; i++) { - if ((probe = dtrace_probes[i]) == NULL) - continue; - - if (probe->dtpr_provider != old) - continue; - - if (probe->dtpr_ecb == NULL) - continue; - + if (old->ecb_count!=0) { /* * We have at least one ECB; we can't remove this provider. */ @@ -8185,7 +8194,7 @@ dtrace_unregister(dtrace_provider_id_t id) * All of the probes for this provider are disabled; we can safely * remove all of them from their hash chains and from the probe array. */ - for (i = 0; i < dtrace_nprobes; i++) { + for (i = 0; i < dtrace_nprobes && old->probe_count!=0; i++) { if ((probe = dtrace_probes[i]) == NULL) continue; @@ -8193,6 +8202,7 @@ dtrace_unregister(dtrace_provider_id_t id) continue; dtrace_probes[i] = NULL; + old->probe_count--; dtrace_hash_remove(dtrace_bymod, probe); dtrace_hash_remove(dtrace_byfunc, probe); @@ -8328,6 +8338,7 @@ dtrace_condense(dtrace_provider_id_t id) continue; dtrace_probes[i] = NULL; + prov->probe_count--; dtrace_hash_remove(dtrace_bymod, probe); dtrace_hash_remove(dtrace_byfunc, probe); @@ -8447,6 +8458,7 @@ dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, ASSERT(dtrace_probes[id - 1] == NULL); dtrace_probes[id - 1] = probe; + provider->probe_count++; if (provider != dtrace_provider) lck_mtx_unlock(&dtrace_lock); @@ -10424,6 +10436,7 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb) return(0); } + probe->dtpr_provider->ecb_count++; if (probe->dtpr_ecb == NULL) { dtrace_provider_t *prov = probe->dtpr_provider; @@ -10625,6 +10638,34 @@ dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) break; } + case DTRACEAGG_LLQUANTIZE: { + uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg); + uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg); + uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg); + uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEPS(desc->dtad_arg); + int64_t v; + + agg->dtag_initial = desc->dtad_arg; + agg->dtag_aggregate = dtrace_aggregate_llquantize; + + if (factor < 2 || low >= high || nsteps < factor) + goto err; + + /* + * Now check that the number of steps evenly divides a power + * of the factor. (This assures both integer bucket size and + * linearity within each magnitude.) + */ + for (v = factor; v < nsteps; v *= factor) + continue; + + if ((v % nsteps) || (nsteps % factor)) + goto err; + + size = (dtrace_aggregate_llquantize_bucket(factor, low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t); + break; + } + case DTRACEAGG_AVG: agg->dtag_aggregate = dtrace_aggregate_avg; size = sizeof (uint64_t) * 2; @@ -11081,6 +11122,7 @@ dtrace_ecb_disable(dtrace_ecb_t *ecb) probe->dtpr_ecb_last = prev; } + probe->dtpr_provider->ecb_count--; /* * The ECB has been disconnected from the probe; now sync to assure * that all CPUs have seen the change before returning. @@ -16529,6 +16571,7 @@ dtrace_module_unloaded(struct modctl *ctl) ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe); dtrace_probes[probe->dtpr_id - 1] = NULL; + probe->dtpr_provider->probe_count--; next = probe->dtpr_nextmod; dtrace_hash_remove(dtrace_bymod, probe); @@ -16684,6 +16727,7 @@ syncloop: ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe); dtrace_probes[probe->dtpr_id - 1] = NULL; + probe->dtpr_provider->probe_count--; next = probe->dtpr_nextmod; dtrace_hash_remove(dtrace_bymod, probe); @@ -16961,7 +17005,7 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) dtrace_provider, NULL, NULL, "END", 0, NULL); dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t) dtrace_provider, NULL, NULL, "ERROR", 1, NULL); -#elif (defined(__i386__) || defined (__x86_64__)) +#elif defined (__x86_64__) dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t) dtrace_provider, NULL, NULL, "BEGIN", 1, NULL); dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t) @@ -18089,7 +18133,8 @@ dtrace_ioctl_helper(u_long cmd, caddr_t arg, int *rv) return KERN_SUCCESS; switch (cmd) { - case DTRACEHIOC_ADDDOF: { + case DTRACEHIOC_ADDDOF: + { dof_helper_t *dhp = NULL; size_t dof_ioctl_data_size; dof_ioctl_data_t* multi_dof; @@ -19640,8 +19685,16 @@ void dtrace_init( void ) { if (0 == gDTraceInited) { - int i, ncpu = NCPU; + int i, ncpu; + /* + * DTrace allocates buffers based on the maximum number + * of enabled cpus. This call avoids any race when finding + * that count. + */ + ASSERT(dtrace_max_cpus == 0); + ncpu = dtrace_max_cpus = ml_get_max_cpus(); + gMajDevNo = cdevsw_add(DTRACE_MAJOR, &dtrace_cdevsw); if (gMajDevNo < 0) { diff --git a/bsd/dev/dtrace/dtrace_glue.c b/bsd/dev/dtrace/dtrace_glue.c index db3c5766c..3e34344a7 100644 --- a/bsd/dev/dtrace/dtrace_glue.c +++ b/bsd/dev/dtrace/dtrace_glue.c @@ -51,6 +51,8 @@ #include #include #include +#include +#include #include #include #include @@ -64,6 +66,7 @@ #include #include /* All the bits we care about are guarded by MACH_KERNEL_PRIVATE :-( */ + /* * pid/proc */ @@ -278,49 +281,6 @@ crgetuid(const cred_t *cr) { cred_t copy_cr = *cr; return kauth_cred_getuid(&cop * "cyclic" */ -/* osfmk/kern/timer_call.h */ -typedef void *timer_call_param_t; -typedef void (*timer_call_func_t)( - timer_call_param_t param0, - timer_call_param_t param1); - -typedef struct timer_call { - queue_chain_t q_link; - queue_t queue; - timer_call_func_t func; - timer_call_param_t param0; - timer_call_param_t param1; - decl_simple_lock_data(,lock); - uint64_t deadline; - uint64_t soft_deadline; - uint32_t flags; - boolean_t async_dequeue; -} timer_call_data_t; - -typedef struct timer_call *timer_call_t; - -extern void -timer_call_setup( - timer_call_t call, - timer_call_func_t func, - timer_call_param_t param0); - -extern boolean_t -timer_call_enter1( - timer_call_t call, - timer_call_param_t param1, - uint64_t deadline, - uint32_t flags); - -#ifndef TIMER_CALL_CRITICAL -#define TIMER_CALL_CRITICAL 0x1 -#define TIMER_CALL_LOCAL 0x2 -#endif /* TIMER_CALL_CRITICAL */ - -extern boolean_t -timer_call_cancel( - timer_call_t call); - typedef struct wrap_timer_call { cyc_handler_t hdlr; cyc_time_t when; @@ -340,7 +300,7 @@ _timer_call_apply_cyclic( void *ignore, void *vTChdl ) (*(wrapTC->hdlr.cyh_func))( wrapTC->hdlr.cyh_arg ); clock_deadline_for_periodic_event( wrapTC->when.cyt_interval, mach_absolute_time(), &(wrapTC->deadline) ); - timer_call_enter1( &(wrapTC->call), (void *)wrapTC, wrapTC->deadline, TIMER_CALL_CRITICAL | TIMER_CALL_LOCAL ); + timer_call_enter1( &(wrapTC->call), (void *)wrapTC, wrapTC->deadline, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL ); /* Did timer_call_remove_cyclic request a wakeup call when this timer call was re-armed? */ if (wrapTC->when.cyt_interval == WAKEUP_REAPER) @@ -362,7 +322,7 @@ timer_call_add_cyclic(wrap_timer_call_t *wrapTC, cyc_handler_t *handler, cyc_tim wrapTC->deadline = now; clock_deadline_for_periodic_event( wrapTC->when.cyt_interval, now, &(wrapTC->deadline) ); - timer_call_enter1( &(wrapTC->call), (void *)wrapTC, wrapTC->deadline, TIMER_CALL_CRITICAL | TIMER_CALL_LOCAL ); + timer_call_enter1( &(wrapTC->call), (void *)wrapTC, wrapTC->deadline, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL ); return (cyclic_id_t)wrapTC; } @@ -579,7 +539,7 @@ dtrace_timeout(void (*func)(void *, void *), void* arg, uint64_t nanos) * and clock drift on later invocations is not a worry. */ uint64_t deadline = mach_absolute_time() + nanos; - + /* DRK: consider using a lower priority callout here */ thread_call_enter_delayed(call, deadline); return call; @@ -1189,9 +1149,7 @@ dtrace_copycheck(user_addr_t uaddr, uintptr_t kaddr, size_t size) ASSERT(kaddr + size >= kaddr); - if (ml_at_interrupt_context() || /* Avoid possible copyio page fault on int stack, which panics! */ - 0 != recover || /* Avoid reentrancy into copyio facility. */ - uaddr + size < uaddr || /* Avoid address wrap. */ + if ( uaddr + size < uaddr || /* Avoid address wrap. */ KERN_FAILURE == dtrace_copyio_preflight(uaddr)) /* Machine specific setup/constraints. */ { DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); diff --git a/bsd/dev/dtrace/dtrace_ptss.c b/bsd/dev/dtrace/dtrace_ptss.c index 1027f0d01..b43d4b17f 100644 --- a/bsd/dev/dtrace/dtrace_ptss.c +++ b/bsd/dev/dtrace/dtrace_ptss.c @@ -161,32 +161,19 @@ dtrace_ptss_allocate_page(struct proc* p) // Now allocate a page in user space and set its protections to allow execute. task_t task = p->task; vm_map_t map = get_task_map_reference(task); + if (map == NULL) + goto err; - mach_vm_address_t addr = 0LL; - mach_vm_size_t size = PAGE_SIZE; // We need some way to assert that this matches vm_map_round_page() !!! + vm_prot_t cur_protection = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; + vm_prot_t max_protection = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; -#if CONFIG_EMBEDDED - /* The embedded OS has extra permissions for writable and executable pages. We can't pass in the flags - * we need for the correct permissions from mach_vm_allocate, so need to call mach_vm_map directly. */ - mach_vm_offset_t map_addr = 0; - kern_return_t kr = mach_vm_map(map, &map_addr, size, 0, VM_FLAGS_ANYWHERE, IPC_PORT_NULL, 0, FALSE, VM_PROT_READ|VM_PROT_EXECUTE, VM_PROT_READ|VM_PROT_EXECUTE, VM_INHERIT_DEFAULT); - if (kr != KERN_SUCCESS) { - goto err; - } - addr = map_addr; -#else - kern_return_t kr = mach_vm_allocate(map, &addr, size, VM_FLAGS_ANYWHERE); + mach_vm_offset_t addr = 0; + mach_vm_size_t size = PAGE_SIZE; // We need some way to assert that this matches vm_map_round_page() !!! + kern_return_t kr = mach_vm_map(map, &addr, size, 0, VM_FLAGS_ANYWHERE, IPC_PORT_NULL, 0, FALSE, cur_protection, max_protection, VM_INHERIT_DEFAULT); if (kr != KERN_SUCCESS) { goto err; } - kr = mach_vm_protect(map, addr, size, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE); - if (kr != KERN_SUCCESS) { - mach_vm_deallocate(map, addr, size); - goto err; - } -#endif - // Chain the page entries. int i; for (i=0; i */ struct savearea_t; /* Used anonymously */ -typedef kern_return_t (*perfCallback)(int, struct savearea_t *, uintptr_t *, int); +typedef kern_return_t (*perfCallback)(int, struct savearea_t *, uintptr_t *, __unused int); extern perfCallback tempDTraceTrapHook; -extern kern_return_t fbt_perfCallback(int, struct savearea_t *, uintptr_t *); +extern kern_return_t fbt_perfCallback(int, struct savearea_t *, uintptr_t *, __unused int); #define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask) #define FBT_PROBETAB_SIZE 0x8000 /* 32k entries -- 128K total */ @@ -207,6 +207,8 @@ fbt_suspend(void *arg, dtrace_id_t id, void *parg) (void)ml_nofault_copy( (vm_offset_t)&fbt->fbtp_savedval, (vm_offset_t)fbt->fbtp_patchpoint, sizeof(fbt->fbtp_savedval)); + + fbt->fbtp_currentval = fbt->fbtp_savedval; } @@ -240,6 +242,8 @@ fbt_resume(void *arg, dtrace_id_t id, void *parg) (void)ml_nofault_copy( (vm_offset_t)&fbt->fbtp_patchval, (vm_offset_t)fbt->fbtp_patchpoint, sizeof(fbt->fbtp_patchval)); + + fbt->fbtp_currentval = fbt->fbtp_patchval; } diff --git a/bsd/dev/dtrace/lockstat.c b/bsd/dev/dtrace/lockstat.c index 68a3a91d0..508f2ea19 100644 --- a/bsd/dev/dtrace/lockstat.c +++ b/bsd/dev/dtrace/lockstat.c @@ -55,7 +55,7 @@ /* * Hot patch values, x86 */ -#if defined(__i386__) || defined(__x86_64__) +#if defined(__x86_64__) #define NOP 0x90 #define RET 0xc3 #define LOCKSTAT_AFRAMES 1 @@ -72,7 +72,7 @@ typedef struct lockstat_probe { lockstat_probe_t lockstat_probes[] = { -#if defined(__i386__) || defined(__x86_64__) +#if defined(__x86_64__) /* Only provide implemented probes for each architecture */ { LS_LCK_MTX_LOCK, LSA_ACQUIRE, LS_LCK_MTX_LOCK_ACQUIRE, DTRACE_IDNONE }, { LS_LCK_MTX_LOCK, LSA_SPIN, LS_LCK_MTX_LOCK_SPIN, DTRACE_IDNONE }, @@ -119,21 +119,21 @@ lockstat_probe_t lockstat_probes[] = dtrace_id_t lockstat_probemap[LS_NPROBES]; #if CONFIG_DTRACE +#if defined(__x86_64__) extern void lck_mtx_lock_lockstat_patch_point(void); extern void lck_mtx_try_lock_lockstat_patch_point(void); extern void lck_mtx_try_lock_spin_lockstat_patch_point(void); extern void lck_mtx_unlock_lockstat_patch_point(void); extern void lck_mtx_lock_ext_lockstat_patch_point(void); extern void lck_mtx_ext_unlock_lockstat_patch_point(void); - -extern void lck_rw_done_release1_lockstat_patch_point(void); -extern void lck_rw_done_release2_lockstat_patch_point(void); extern void lck_rw_lock_shared_lockstat_patch_point(void); extern void lck_rw_lock_exclusive_lockstat_patch_point(void); extern void lck_rw_lock_shared_to_exclusive_lockstat_patch_point(void); extern void lck_rw_try_lock_shared_lockstat_patch_point(void); extern void lck_rw_try_lock_exclusive_lockstat_patch_point(void); extern void lck_mtx_lock_spin_lockstat_patch_point(void); +#endif + #endif /* CONFIG_DTRACE */ typedef struct lockstat_assembly_probe { @@ -145,7 +145,7 @@ typedef struct lockstat_assembly_probe { lockstat_assembly_probe_t assembly_probes[] = { #if CONFIG_DTRACE -#if defined(__i386__) || defined(__x86_64__) +#if defined(__x86_64__) /* * On x86 these points are better done via hot patches, which ensure * there is zero overhead when not in use. On x86 these patch points @@ -168,6 +168,8 @@ typedef struct lockstat_assembly_probe { #endif /* CONFIG_DTRACE */ { LS_LCK_INVALID, NULL } }; + + /* * Hot patch switches back and forth the probe points between NOP and RET. * The active argument indicates whether the probe point will turn on or off. @@ -188,7 +190,7 @@ void lockstat_hot_patch(boolean_t active, int ls_probe) */ for (i = 0; assembly_probes[i].lsap_patch_point; i++) { if (ls_probe == assembly_probes[i].lsap_probe) -#if defined(__i386__) || defined(__x86_64__) +#if defined(__x86_64__) { uint8_t instr; instr = (active ? NOP : RET ); diff --git a/bsd/dev/dtrace/profile_prvd.c b/bsd/dev/dtrace/profile_prvd.c index 36e213ce1..60ebf9bd7 100644 --- a/bsd/dev/dtrace/profile_prvd.c +++ b/bsd/dev/dtrace/profile_prvd.c @@ -46,7 +46,6 @@ #endif #endif -#define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */ #include #include #include @@ -68,7 +67,7 @@ #include -#if defined(__i386__) || defined(__x86_64__) +#if defined(__x86_64__) extern x86_saved_state_t *find_kern_regs(thread_t); #else #error Unknown architecture @@ -112,30 +111,13 @@ static dtrace_provider_id_t profile_id; * and the static definition doesn't seem to be overly brittle. Still, we * allow for a manual override in case we get it completely wrong. */ -#if !defined(__APPLE__) - -#ifdef __x86 -#define PROF_ARTIFICIAL_FRAMES 10 -#else -#ifdef __sparc -#if DEBUG -#define PROF_ARTIFICIAL_FRAMES 4 -#else -#define PROF_ARTIFICIAL_FRAMES 3 -#endif -#endif -#endif - -#else /* is Mac OS X */ -#if defined(__i386__) || defined(__x86_64__) +#if defined(__x86_64__) #define PROF_ARTIFICIAL_FRAMES 9 #else #error Unknown architecture #endif -#endif /* __APPLE__ */ - #define PROF_NAMELEN 15 #define PROF_PROFILE 0 @@ -198,18 +180,13 @@ profile_fire(void *arg) dtrace_probe(prof->prof_id, CPU->cpu_profile_pc, CPU->cpu_profile_upc, late, 0, 0); #else -#if defined(__i386__) || defined(__x86_64__) +#if defined(__x86_64__) x86_saved_state_t *kern_regs = find_kern_regs(current_thread()); if (NULL != kern_regs) { /* Kernel was interrupted. */ -#if defined(__i386__) - dtrace_probe(prof->prof_id, saved_state32(kern_regs)->eip, 0x0, 0, 0, 0); -#elif defined(__x86_64__) dtrace_probe(prof->prof_id, saved_state64(kern_regs)->isf.rip, 0x0, 0, 0, 0); -#else -#error Unknown arch -#endif + } else { pal_register_cache_state(current_thread(), VALID); /* Possibly a user interrupt */ @@ -244,18 +221,12 @@ profile_tick(void *arg) dtrace_probe(prof->prof_id, CPU->cpu_profile_pc, CPU->cpu_profile_upc, 0, 0, 0); #else -#if defined(__i386__) || defined(__x86_64__) +#if defined(__x86_64__) x86_saved_state_t *kern_regs = find_kern_regs(current_thread()); if (NULL != kern_regs) { /* Kernel was interrupted. */ -#if defined(__i386__) - dtrace_probe(prof->prof_id, saved_state32(kern_regs)->eip, 0x0, 0, 0, 0); -#elif defined(__x86_64__) dtrace_probe(prof->prof_id, saved_state64(kern_regs)->isf.rip, 0x0, 0, 0, 0); -#else -#error Unknown arch -#endif } else { pal_register_cache_state(current_thread(), VALID); /* Possibly a user interrupt */ diff --git a/bsd/dev/dtrace/scripts/Makefile b/bsd/dev/dtrace/scripts/Makefile new file mode 100644 index 000000000..532a8699f --- /dev/null +++ b/bsd/dev/dtrace/scripts/Makefile @@ -0,0 +1,38 @@ +export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd +export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def +export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule +export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + +include $(MakeInc_cmd) +include $(MakeInc_def) + +INSTALL_DTRACE_SCRIPTS_LIST = \ + darwin.d \ + errno.d \ + io.d \ + ip.d \ + regs_x86_64.d \ + sched.d \ + signal.d \ + socket.d \ + tcp.d \ + unistd.d + +ifeq ($(PLATFORM),iPhoneOS) +INSTALL_DTRACE_SCRIPTS_LIST += mptcp.d +endif + +INSTALL_DTRACE_SCRIPTS_FILES = \ + $(addprefix $(DSTROOT)/$(INSTALL_DTRACE_SCRIPTS_DIR)/, $(INSTALL_DTRACE_SCRIPTS_LIST)) + +$(INSTALL_DTRACE_SCRIPTS_FILES): $(DSTROOT)/$(INSTALL_DTRACE_SCRIPTS_DIR)/% : % + $(_v)$(MKDIR) $(DSTROOT)/$(INSTALL_DTRACE_SCRIPTS_DIR) + @echo INSTALL $(@F) + $(_v)$(INSTALL) $(DATA_INSTALL_FLAGS) $< $@ + +do_build_install_primary:: $(INSTALL_DTRACE_SCRIPTS_FILES) + +include $(MakeInc_rule) +include $(MakeInc_dir) + + diff --git a/bsd/dev/dtrace/scripts/darwin.d b/bsd/dev/dtrace/scripts/darwin.d new file mode 100644 index 000000000..2a2cf933e --- /dev/null +++ b/bsd/dev/dtrace/scripts/darwin.d @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +inline int UT_VFORK = 0x02000000; /* thread has vfork children */ +#pragma D binding "1.0" UT_VFORK + +inline uthread_t uthread = (mach_kernel`uthread_t)(curthread->uthread); /* ` */ +#pragma D binding "1.0" uthread + +inline struct proc * curproc = + (uthread && (uthread->uu_flag & UT_VFORK) && uthread->uu_proc) ? (struct proc *)uthread->uu_proc : + ((struct proc *)(curthread->task->bsd_info)) != NULL ? ((struct proc *)(curthread->task->bsd_info)) : + mach_kernel`kernproc; /* ` */ +#pragma D binding "1.0" curproc + +/* + * curthread->thread_tag will have this set if the thread is the main thread + */ +inline uint32_t THREAD_TAG_MAINTHREAD = 0x1; +inline uint32_t THREAD_TAG_CALLOUT = 0x2; +inline uint32_t THREAD_TAG_IOWORKLOOP = 0x4; + +/* + * The following miscellaneous constants are used by the proc(4) translators + * defined below. + */ +inline char SIDL = 1; +#pragma D binding "1.0" SIDL +inline char SRUN = 2; +#pragma D binding "1.0" SRUN +inline char SSLEEP = 3; +#pragma D binding "1.0" SSLEEP +inline char SSTOP = 4; +#pragma D binding "1.0" SSTOP +inline char SZOMB = 5; +#pragma D binding "1.0" SZOMB +/* + * SONPROC defined here for compatability with ported scripts + */ +inline char SONPROC = 6; +#pragma D binding "1.0" SONPROC + +inline char SOBJ_NONE = 0; +#pragma D binding "1.0" SOBJ_NONE +/* + * SOBJ_* defined here for compatability with ported scripts + */ +inline char SOBJ_MUTEX = 1; +#pragma D binding "1.0" SOBJ_MUTEX +inline char SOBJ_RWLOCK = 2; +#pragma D binding "1.0" SOBJ_RWLOCK +inline char SOBJ_CV = 3; +#pragma D binding "1.0" SOBJ_CV +inline char SOBJ_SEMA = 4; +#pragma D binding "1.0" SOBJ_SEMA +inline char SOBJ_USER = 5; +#pragma D binding "1.0" SOBJ_USER +inline char SOBJ_USER_PI = 6; +#pragma D binding "1.0" SOBJ_USER_PI +inline char SOBJ_SHUTTLE = 7; +#pragma D binding "1.0" SOBJ_SHUTTLE + +inline char PR_MODEL_ILP32 = 1; +#pragma D binding "1.0" PR_MODEL_ILP32 +inline char PR_MODEL_LP64 = 2; +#pragma D binding "1.0" PR_MODEL_LP64 + +/* + * PR_* defined here for compatability with ported scripts + */ +inline int PR_STOPPED = 0x00000001; +#pragma D binding "1.0" PR_STOPPED +inline int PR_ISTOP = 0x00000002; +#pragma D binding "1.0" PR_ISTOP +inline int PR_DSTOP = 0x00000004; +#pragma D binding "1.0" PR_DSTOP +inline int PR_STEP = 0x00000008; +#pragma D binding "1.0" PR_STEP +inline int PR_ASLEEP = 0x00000010; +#pragma D binding "1.0" PR_ASLEEP +inline int PR_PCINVAL = 0x00000020; +#pragma D binding "1.0" PR_PCINVAL +inline int PR_ASLWP = 0x00000040; +#pragma D binding "1.0" PR_ASLWP +inline int PR_AGENT = 0x00000080; +#pragma D binding "1.0" PR_AGENT +inline int PR_DETACH = 0x00000100; +#pragma D binding "1.0" PR_DETACH +inline int PR_DAEMON = 0x00000200; +#pragma D binding "1.0" PR_DAEMON +inline int PR_ISSYS = 0x00001000; +#pragma D binding "1.0" PR_ISSYS +inline int PR_VFORKP = 0x00002000; +#pragma D binding "1.0" PR_VFORKP +inline int PR_ORPHAN = 0x00004000; +#pragma D binding "1.0" PR_ORPHAN +inline int PR_FORK = 0x00100000; +#pragma D binding "1.0" PR_FORK +inline int PR_RLC = 0x00200000; +#pragma D binding "1.0" PR_RLC +inline int PR_KLC = 0x00400000; +#pragma D binding "1.0" PR_KLC +inline int PR_ASYNC = 0x00800000; +#pragma D binding "1.0" PR_ASYNC +inline int PR_MSACCT = 0x01000000; +#pragma D binding "1.0" PR_MSACCT +inline int PR_BPTADJ = 0x02000000; +#pragma D binding "1.0" PR_BPTADJ +inline int PR_PTRACE = 0x04000000; +#pragma D binding "1.0" PR_PTRACE +inline int PR_MSFORK = 0x08000000; +#pragma D binding "1.0" PR_MSFORK +inline int PR_IDLE = 0x10000000; +#pragma D binding "1.0" PR_IDLE + +/* + * Translate from the kernel's proc_t structure to a proc(4) psinfo_t struct. + * We do not provide support for pr_size, pr_rssize, pr_pctcpu, and pr_pctmem. + * We also do not fill in pr_lwp (the lwpsinfo_t for the representative LWP) + * because we do not have the ability to select and stop any representative. + * Also, for the moment, pr_wstat, pr_time, and pr_ctime are not supported, + * but these could be supported by DTrace in the future using subroutines. + * Note that any member added to this translator should also be added to the + * kthread_t-to-psinfo_t translator, below. + */ +typedef int taskid_t; +typedef int projid_t; +typedef int poolid_t; +typedef struct timespec timestruc_t; /* (SECONDS, NANOSECONDS) */ + +typedef struct psinfo { + int pr_nlwp; /* number of active lwps in the process */ + pid_t pr_pid; /* unique process id */ + pid_t pr_ppid; /* process id of parent */ + pid_t pr_pgid; /* pid of process group leader */ + pid_t pr_sid; /* session id */ + uid_t pr_uid; /* real user id */ + uid_t pr_euid; /* effective user id */ + gid_t pr_gid; /* real group id */ + gid_t pr_egid; /* effective group id */ + uintptr_t pr_addr; /* address of process */ + dev_t pr_ttydev; /* controlling tty device (or PRNODEV) */ + timestruc_t pr_start;/* process start time, DEPRECATED, see pr_start_tv below */ + char pr_fname[16]; /* name of execed file */ + char pr_psargs[80]; /* initial characters of arg list */ + int pr_argc; /* initial argument count */ + user_addr_t pr_argv; /* address of initial argument vector */ + user_addr_t pr_envp; /* address of initial environment vector */ + char pr_dmodel; /* data model of the process */ + taskid_t pr_taskid; /* task id */ + projid_t pr_projid; /* project id */ + poolid_t pr_poolid; /* pool id */ + zoneid_t pr_zoneid; /* zone id */ + + struct timeval pr_start_tv; /* process start time, from the epoch (SECONDS, MICROSECONDS) */ +} psinfo_t; + +inline int P_LP64 = 0x00000004; /* Process is LP64 */ +#pragma D binding "1.0" P_LP64 + +#pragma D binding "1.0" translator +translator psinfo_t < struct proc * P > { + pr_nlwp = ((struct task *)(P->task))->thread_count; + pr_pid = P->p_pid; + pr_ppid = P->p_ppid; + pr_pgid = P->p_pgrp->pg_id; + pr_sid = P->p_pgrp->pg_session->s_sid; + pr_uid = P->p_ucred->cr_posix.cr_ruid; + pr_euid = P->p_ucred->cr_posix.cr_uid; + pr_gid = P->p_ucred->cr_posix.cr_rgid; + pr_egid = P->p_ucred->cr_posix.cr_groups[0]; + pr_addr = (uintptr_t)P; + + pr_ttydev = (P->p_pgrp->pg_session->s_ttyvp == NULL) ? (dev_t)-1 : + P->p_pgrp->pg_session->s_ttyp->t_dev; + + /* + * timestruct_t (SECONDS, NANOSECONDS) is not available directly nor can a further translation + * be specified here. Zero the structure. Use pr_start_tv instead. + */ + pr_start = *((timestruc_t *)`dtrace_zero); /* ` */ + + pr_fname = P->p_comm; + pr_psargs = P->p_comm; /* XXX omits command line arguments XXX */ + pr_argc = P->p_argc; + pr_argv = P->p_dtrace_argv; + pr_envp = P->p_dtrace_envp; + + pr_dmodel = (P->p_flag & P_LP64) ? PR_MODEL_LP64 : PR_MODEL_ILP32; + + pr_taskid = 0; + pr_projid = 0; + pr_poolid = 0; + pr_zoneid = 0; + + /* + * pstats->pstart is a struct timeval: (SECONDS, MICROSECONDS). + */ + pr_start_tv = P->p_start; +}; + +/* + * Translate from the kernel's kthread_t structure to a proc(4) psinfo_t + * struct. Lacking a facility to define one translator only in terms of + * another, we explicitly define each member by using the proc_t-to-psinfo_t + * translator, above; any members added to that translator should also be + * added here. (The only exception to this is pr_start, which -- due to it + * being a structure -- cannot be defined in terms of a translator at all.) + */ +#pragma D binding "1.0" translator +translator psinfo_t < thread_t T > { + pr_nlwp = xlate ((struct proc *)(T->task->bsd_info)).pr_nlwp; + pr_pid = xlate ((struct proc *)(T->task->bsd_info)).pr_pid; + pr_ppid = xlate ((struct proc *)(T->task->bsd_info)).pr_ppid; + pr_pgid = xlate ((struct proc *)(T->task->bsd_info)).pr_pgid; + pr_sid = xlate ((struct proc *)(T->task->bsd_info)).pr_sid; + pr_uid = xlate ((struct proc *)(T->task->bsd_info)).pr_uid; + pr_euid = xlate ((struct proc *)(T->task->bsd_info)).pr_euid; + pr_gid = xlate ((struct proc *)(T->task->bsd_info)).pr_gid; + pr_egid = xlate ((struct proc *)(T->task->bsd_info)).pr_egid; + pr_addr = xlate ((struct proc *)(T->task->bsd_info)).pr_addr; + pr_ttydev = xlate ((struct proc *)(T->task->bsd_info)).pr_ttydev; + pr_start = xlate ((struct proc *)(T->task->bsd_info)).pr_start; + pr_fname = xlate ((struct proc *)(T->task->bsd_info)).pr_fname; + pr_psargs = xlate ((struct proc *)(T->task->bsd_info)).pr_psargs; /* XXX omits command line arguments XXX */ + pr_argc = xlate ((struct proc *)(T->task->bsd_info)).pr_argc; + pr_argv = xlate ((struct proc *)(T->task->bsd_info)).pr_argv; + pr_envp = xlate ((struct proc *)(T->task->bsd_info)).pr_envp; + pr_dmodel = xlate ((struct proc *)(T->task->bsd_info)).pr_dmodel; + pr_taskid = xlate ((struct proc *)(T->task->bsd_info)).pr_taskid; + pr_projid = xlate ((struct proc *)(T->task->bsd_info)).pr_projid; + pr_poolid = xlate ((struct proc *)(T->task->bsd_info)).pr_poolid; + pr_zoneid = xlate ((struct proc *)(T->task->bsd_info)).pr_zoneid; + + pr_start_tv = xlate ((struct proc *)(T->task->bsd_info)).pr_start_tv; +}; + +/* + * Translate from the kernel's kthread_t structure to a proc(4) lwpsinfo_t. + * We do not provide support for pr_nice, pr_oldpri, pr_cpu, or pr_pctcpu. + * Also, for the moment, pr_start and pr_time are not supported, but these + * could be supported by DTrace in the future using subroutines. + */ + +inline processor_t PROCESSOR_NULL = ((processor_t) 0); +#pragma D binding "1.0" PROCESSOR_NULL + +typedef int psetid_t; + +typedef struct lwpsinfo { + int pr_flag; /* lwp flags (DEPRECATED; do not use) */ + id_t pr_lwpid; /* lwp id */ + uintptr_t pr_addr; /* internal address of lwp */ + uintptr_t pr_wchan; /* wait addr for sleeping lwp */ + char pr_stype; /* synchronization event type */ + char pr_state; /* numeric lwp state */ + char pr_sname; /* printable character for pr_state */ + short pr_syscall; /* system call number (if in syscall) */ + int pr_pri; /* priority, high value is high priority */ + char pr_clname[8]; /* scheduling class name */ + processorid_t pr_onpro; /* processor which last ran this lwp */ + processorid_t pr_bindpro; /* processor to which lwp is bound */ + psetid_t pr_bindpset; /* processor set to which lwp is bound */ +} lwpsinfo_t; + +#pragma D binding "1.0" translator +translator lwpsinfo_t < thread_t T > { + pr_flag = 0; /* lwp flags (DEPRECATED; do not use) */ + pr_lwpid = (id_t)T->thread_id; + pr_addr = (uintptr_t)T; + pr_wchan = (uintptr_t)(((uthread_t)(T->uthread))->uu_wchan); + + pr_stype = SOBJ_NONE; /* XXX Undefined synch object (or none) XXX */ + pr_state = curproc->p_stat; + pr_sname = (curproc->p_stat == SIDL) ? 'I' : + (curproc->p_stat == SRUN) ? 'R' : + (curproc->p_stat == SSLEEP) ? 'S' : + (curproc->p_stat == SSTOP) ? 'T' : + (curproc->p_stat == SZOMB) ? 'Z' : '?'; + + pr_syscall = ((uthread_t)(T->uthread))->uu_code; + pr_pri = T->sched_pri; + + pr_clname = (T->sched_mode & 0x0001) ? "RT" : + (T->sched_mode & 0x0002) ? "TS" : "SYS"; + + pr_onpro = (T->last_processor == PROCESSOR_NULL) ? -1 : T->last_processor->cpu_id; + pr_bindpro = -1; /* Darwin does not bind threads to processors. */ + pr_bindpset = -1; /* Darwin does not partition processors. */ +}; + +inline psinfo_t *curpsinfo = xlate (curproc); +#pragma D attributes Stable/Stable/Common curpsinfo +#pragma D binding "1.0" curpsinfo + +inline lwpsinfo_t *curlwpsinfo = xlate (curthread); +#pragma D attributes Stable/Stable/Common curlwpsinfo +#pragma D binding "1.0" curlwpsinfo + +/* XXX Really want vn_getpath(curproc->p_fd->fd_cdir, , ) but that takes namecache_rw_lock XXX */ +inline string cwd = curproc->p_fd->fd_cdir->v_name == NULL ? + "" : stringof(curproc->p_fd->fd_cdir->v_name); +#pragma D attributes Stable/Stable/Common cwd +#pragma D binding "1.0" cwd + +/* XXX Really want vn_getpath(curproc->p_fd->fd_rdir, , ) but that takes namecache_rw_lock XXX */ +inline string root = curproc->p_fd->fd_rdir == NULL ? "/" : + curproc->p_fd->fd_rdir->v_name == NULL ? "" : + stringof(curproc->p_fd->fd_rdir->v_name); +#pragma D attributes Stable/Stable/Common root +#pragma D binding "1.0" root diff --git a/bsd/dev/dtrace/scripts/errno.d b/bsd/dev/dtrace/scripts/errno.d new file mode 100644 index 000000000..f29d9a1dc --- /dev/null +++ b/bsd/dev/dtrace/scripts/errno.d @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +inline int EPERM = 1; +#pragma D binding "1.0" EPERM +inline int ENOENT = 2; +#pragma D binding "1.0" ENOENT +inline int ESRCH = 3; +#pragma D binding "1.0" ESRCH +inline int EINTR = 4; +#pragma D binding "1.0" EINTR +inline int EIO = 5; +#pragma D binding "1.0" EIO +inline int ENXIO = 6; +#pragma D binding "1.0" ENXIO +inline int E2BIG = 7; +#pragma D binding "1.0" E2BIG +inline int ENOEXEC = 8; +#pragma D binding "1.0" ENOEXEC +inline int EBADF = 9; +#pragma D binding "1.0" EBADF +inline int ECHILD = 10; +#pragma D binding "1.0" ECHILD +inline int EDEADLK = 11; +#pragma D binding "1.0" EDEADLK +inline int ENOMEM = 12; +#pragma D binding "1.0" ENOMEM +inline int EACCES = 13; +#pragma D binding "1.0" EACCES +inline int EFAULT = 14; +#pragma D binding "1.0" EFAULT +inline int ENOTBLK = 15; +#pragma D binding "1.0" ENOTBLK +inline int EBUSY = 16; +#pragma D binding "1.0" EBUSY +inline int EEXIST = 17; +#pragma D binding "1.0" EEXIST +inline int EXDEV = 18; +#pragma D binding "1.0" EXDEV +inline int ENODEV = 19; +#pragma D binding "1.0" ENODEV +inline int ENOTDIR = 20; +#pragma D binding "1.0" ENOTDIR +inline int EISDIR = 21; +#pragma D binding "1.0" EISDIR +inline int EINVAL = 22; +#pragma D binding "1.0" EINVAL +inline int ENFILE = 23; +#pragma D binding "1.0" ENFILE +inline int EMFILE = 24; +#pragma D binding "1.0" EMFILE +inline int ENOTTY = 25; +#pragma D binding "1.0" ENOTTY +inline int ETXTBSY = 26; +#pragma D binding "1.0" ETXTBSY +inline int EFBIG = 27; +#pragma D binding "1.0" EFBIG +inline int ENOSPC = 28; +#pragma D binding "1.0" ENOSPC +inline int ESPIPE = 29; +#pragma D binding "1.0" ESPIPE +inline int EROFS = 30; +#pragma D binding "1.0" EROFS +inline int EMLINK = 31; +#pragma D binding "1.0" EMLINK +inline int EPIPE = 32; +#pragma D binding "1.0" EPIPE +inline int EDOM = 33; +#pragma D binding "1.0" EDOM +inline int ERANGE = 34; +#pragma D binding "1.0" ERANGE +inline int EAGAIN = 35; +inline int EWOULDBLOCK = 35; +#pragma D binding "1.0" EAGAIN +#pragma D binding "1.0" EWOULDBLOCK +inline int EINPROGRESS = 36; +#pragma D binding "1.0" EINPROGRESS +inline int EALREADY = 37; +#pragma D binding "1.0" EALREADY +inline int ENOTSOCK = 38; +#pragma D binding "1.0" ENOTSOCK +inline int EDESTADDRREQ = 39; +#pragma D binding "1.0" EDESTADDRREQ +inline int EMSGSIZE = 40; +#pragma D binding "1.0" EMSGSIZE +inline int EPROTOTYPE = 41; +#pragma D binding "1.0" EPROTOTYPE +inline int ENOPROTOOPT = 42; +#pragma D binding "1.0" ENOPROTOOPT +inline int EPROTONOSUPPORT = 43; +#pragma D binding "1.0" EPROTONOSUPPORT +inline int ESOCKTNOSUPPORT = 44; +#pragma D binding "1.0" ESOCKTNOSUPPORT +inline int ENOTSUP = 45; +#pragma D binding "1.0" ENOTSUP +inline int EPFNOSUPPORT = 46; +#pragma D binding "1.0" EPFNOSUPPORT +inline int EAFNOSUPPORT = 47; +#pragma D binding "1.0" EAFNOSUPPORT +inline int EADDRINUSE = 48; +#pragma D binding "1.0" EADDRINUSE +inline int EADDRNOTAVAIL = 49; +#pragma D binding "1.0" EADDRNOTAVAIL +inline int ENETDOWN = 50; +#pragma D binding "1.0" ENETDOWN +inline int ENETUNREACH = 51; +#pragma D binding "1.0" ENETUNREACH +inline int ENETRESET = 52; +#pragma D binding "1.0" ENETRESET +inline int ECONNABORTED = 53; +#pragma D binding "1.0" ECONNABORTED +inline int ECONNRESET = 54; +#pragma D binding "1.0" ECONNRESET +inline int ENOBUFS = 55; +#pragma D binding "1.0" ENOBUFS +inline int EISCONN = 56; +#pragma D binding "1.0" EISCONN +inline int ENOTCONN = 57; +#pragma D binding "1.0" ENOTCONN +inline int ESHUTDOWN = 58; +#pragma D binding "1.0" ESHUTDOWN +inline int ETOOMANYREFS = 59; +#pragma D binding "1.0" ETOOMANYREFS +inline int ETIMEDOUT = 60; +#pragma D binding "1.0" ETIMEDOUT +inline int ECONNREFUSED = 61; +#pragma D binding "1.0" ECONNREFUSED +inline int ELOOP = 62; +#pragma D binding "1.0" ELOOP +inline int ENAMETOOLONG = 63; +#pragma D binding "1.0" ENAMETOOLONG +inline int EHOSTDOWN = 64; +#pragma D binding "1.0" EHOSTDOWN +inline int EHOSTUNREACH = 65; +#pragma D binding "1.0" EHOSTUNREACH +inline int ENOTEMPTY = 66; +#pragma D binding "1.0" ENOTEMPTY +inline int EPROCLIM = 67; +#pragma D binding "1.0" EPROCLIM +inline int EUSERS = 68; +#pragma D binding "1.0" EUSERS +inline int EDQUOT = 69; +#pragma D binding "1.0" EDQUOT +inline int ESTALE = 70; +#pragma D binding "1.0" ESTALE +inline int EREMOTE = 71; +#pragma D binding "1.0" EREMOTE +inline int EBADRPC = 72; +#pragma D binding "1.0" EBADRPC +inline int ERPCMISMATCH = 73; +#pragma D binding "1.0" ERPCMISMATCH +inline int EPROGUNAVAIL = 74; +#pragma D binding "1.0" EPROGUNAVAIL +inline int EPROGMISMATCH = 75; +#pragma D binding "1.0" EPROGMISMATCH +inline int EPROCUNAVAIL = 76; +#pragma D binding "1.0" EPROCUNAVAIL +inline int ENOLCK = 77; +#pragma D binding "1.0" ENOLCK +inline int ENOSYS = 78; +#pragma D binding "1.0" ENOSYS +inline int EFTYPE = 79; +#pragma D binding "1.0" EFTYPE +inline int EAUTH = 80; +#pragma D binding "1.0" EAUTH +inline int ENEEDAUTH = 81; +#pragma D binding "1.0" ENEEDAUTH +inline int EPWROFF = 82; +#pragma D binding "1.0" EPWROFF +inline int EDEVERR = 83; +#pragma D binding "1.0" EDEVERR +inline int EOVERFLOW = 84; +#pragma D binding "1.0" EOVERFLOW +inline int EBADEXEC = 85; +#pragma D binding "1.0" EBADEXEC +inline int EBADARCH = 86; +#pragma D binding "1.0" EBADARCH +inline int ESHLIBVERS = 87; +#pragma D binding "1.0" ESHLIBVERS +inline int EBADMACHO = 88; +#pragma D binding "1.0" EBADMACHO +inline int ECANCELED = 89; +#pragma D binding "1.0" ECANCELED +inline int EIDRM = 90; +#pragma D binding "1.0" EIDRM +inline int ENOMSG = 91; +#pragma D binding "1.0" ENOMSG +inline int EILSEQ = 92; +#pragma D binding "1.0" EILSEQ +inline int ENOATTR = 93; +#pragma D binding "1.0" ENOATTR +inline int EBADMSG = 94; +#pragma D binding "1.0" EBADMSG +inline int EMULTIHOP = 95; +#pragma D binding "1.0" EMULTIHOP +inline int ENODATA = 96; +#pragma D binding "1.0" ENODATA +inline int ENOLINK = 97; +#pragma D binding "1.0" ENOLINK +inline int ENOSR = 98; +#pragma D binding "1.0" ENOSR +inline int ENOSTR = 99; +#pragma D binding "1.0" ENOSTR +inline int EPROTO = 100; +#pragma D binding "1.0" EPROTO +inline int ETIME = 101; +#pragma D binding "1.0" ETIME +inline int EOPNOTSUPP = 102; +#pragma D binding "1.0" EOPNOTSUPP +inline int ELAST = 102; +#pragma D binding "1.0" ELAST diff --git a/bsd/dev/dtrace/scripts/io.d b/bsd/dev/dtrace/scripts/io.d new file mode 100644 index 000000000..f295f1026 --- /dev/null +++ b/bsd/dev/dtrace/scripts/io.d @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2007 Apple, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#pragma D depends_on library darwin.d +#pragma D depends_on module mach_kernel +#pragma D depends_on provider io + +inline int B_WRITE = 0x0000; +#pragma D binding "1.0" B_WRITE +inline int B_READ = 0x0001; +#pragma D binding "1.0" B_READ +inline int B_ASYNC = 0x0002; +#pragma D binding "1.0" B_ASYNC +inline int B_NOCACHE = 0x0004; +#pragma D binding "1.0" B_NOCACHE +inline int B_DELWRI = 0x0008; +#pragma D binding "1.0" B_DELWRI +inline int B_LOCKED = 0x0010; +#pragma D binding "1.0" B_LOCKED +inline int B_PHYS = 0x0020; +#pragma D binding "1.0" B_PHYS +inline int B_CLUSTER = 0x0040; +#pragma D binding "1.0" B_CLUSTER +inline int B_PAGEIO = 0x0080; +#pragma D binding "1.0" B_PAGEIO +inline int B_META = 0x0100; +#pragma D binding "1.0" B_META +inline int B_RAW = 0x0200; +#pragma D binding "1.0" B_RAW +inline int B_FUA = 0x0400; +#pragma D binding "1.0" B_FUA +inline int B_PASSIVE = 0x0800; +#pragma D binding "1.0" B_PASSIVE + +typedef struct bufinfo { + int b_flags; /* buffer status */ + size_t b_bcount; /* number of bytes */ + caddr_t b_addr; /* buffer address */ + uint64_t b_lblkno; /* block # on device */ + uint64_t b_blkno; /* expanded block # on device */ + size_t b_resid; /* # of bytes not transferred */ + size_t b_bufsize; /* size of allocated buffer */ + caddr_t b_iodone; /* I/O completion routine */ + int b_error; /* expanded error field */ + dev_t b_edev; /* extended device */ +} bufinfo_t; + +#pragma D binding "1.0" translator +translator bufinfo_t < struct buf *B > { + b_flags = B->b_flags; + b_addr = (caddr_t)B->b_datap; + b_bcount = B->b_bcount; + b_lblkno = B->b_lblkno; + b_blkno = B->b_blkno; + b_resid = B->b_resid; + b_bufsize = B->b_bufsize; + b_iodone = (caddr_t)B->b_iodone; + b_error = B->b_error; + b_edev = B->b_dev; +}; + +typedef struct devinfo { + int dev_major; /* major number */ + int dev_minor; /* minor number */ + int dev_instance; /* instance number */ + string dev_name; /* name of device */ + string dev_statname; /* name of device + instance/minor */ + string dev_pathname; /* pathname of device */ +} devinfo_t; + +#pragma D binding "1.0" translator +translator devinfo_t < struct buf *B > { + dev_major = getmajor(B->b_dev); + dev_minor = getminor(B->b_dev); + dev_instance = getminor(B->b_dev); + dev_name = "??"; /* XXX */ + dev_statname = "??"; /* XXX */ + dev_pathname = "??"; /* XXX */ +}; + +typedef off_t offset_t; + +typedef struct fileinfo { + string fi_name; /* name (basename of fi_pathname) */ + string fi_dirname; /* directory (dirname of fi_pathname) */ + string fi_pathname; /* full pathname */ + offset_t fi_offset; /* offset within file */ + string fi_fs; /* filesystem */ + string fi_mount; /* mount point of file system */ + int fi_oflags; /* open(2) flags for file descriptor */ +} fileinfo_t; + +#pragma D binding "1.0" translator +translator fileinfo_t < struct buf *B > { + fi_name = B->b_vp->v_name == NULL ? "" : B->b_vp->v_name; + + fi_dirname = B->b_vp->v_parent == NULL ? "" : + (B->b_vp->v_parent->v_name == NULL ? "" : B->b_vp->v_parent->v_name); + + fi_pathname = strjoin("??/", + strjoin(B->b_vp->v_parent == NULL ? "" : + (B->b_vp->v_parent->v_name == NULL ? "" : B->b_vp->v_parent->v_name), + strjoin("/", + B->b_vp->v_name == NULL ? "" : B->b_vp->v_name))); + + fi_offset = B->b_upl == NULL ? -1 : ((upl_t)B->b_upl)->offset; + + fi_fs = B->b_vp->v_mount->mnt_vtable->vfc_name; + + fi_mount = B->b_vp->v_mount->mnt_vnodecovered == NULL ? "/" : B->b_vp->v_mount->mnt_vnodecovered->v_name; + + fi_oflags = 0; +}; + +/* + * The following inline constants can be used to examine fi_oflags when using + * the fds[] array or a translated fileglob *. Note that the various open + * flags behave as a bit-field *except* for O_RDONLY, O_WRONLY, and O_RDWR. + * To test the open mode, you write code similar to that used with the fcntl(2) + * F_GET[X]FL command, such as: if ((fi_oflags & O_ACCMODE) == O_WRONLY). + */ +inline int O_ACCMODE = 0x0003; +#pragma D binding "1.1" O_ACCMODE + +inline int O_RDONLY = 0x0000; +#pragma D binding "1.1" O_RDONLY +inline int O_WRONLY = 0x0001; +#pragma D binding "1.1" O_WRONLY +inline int O_RDWR = 0x0002; +#pragma D binding "1.1" O_RDWR + +inline int O_NONBLOCK = 0x0004; +#pragma D binding "1.1" O_NONBLOCK +inline int O_APPEND = 0x0008; +#pragma D binding "1.1" O_APPEND +inline int O_SHLOCK = 0x0010; +#pragma D binding "1.1" O_SHLOCK +inline int O_EXLOCK = 0x0020; +#pragma D binding "1.1" O_EXLOCK +inline int O_ASYNC = 0x0040; +#pragma D binding "1.1" O_ASYNC +inline int O_SYNC = 0x0080; +#pragma D binding "1.1" O_SYNC +inline int O_NOFOLLOW = 0x0100; +#pragma D binding "1.1" O_NOFOLLOW +inline int O_CREAT = 0x0200; +#pragma D binding "1.1" O_CREAT +inline int O_TRUNC = 0x0400; +#pragma D binding "1.1" O_TRUNC +inline int O_EXCL = 0x0800; +#pragma D binding "1.1" O_EXCL +inline int O_EVTONLY = 0x8000; +#pragma D binding "1.1" O_EVTONLY +inline int O_NOCTTY = 0x20000; +#pragma D binding "1.1" O_NOCTTY +inline int O_DIRECTORY = 0x100000; +#pragma D binding "1.1" O_DIRECTORY +inline int O_SYMLINK = 0x200000; +#pragma D binding "1.1" O_SYMLINK + +/* From bsd/sys/file_internal.h */ +inline int DTYPE_VNODE = 1; +#pragma D binding "1.1" DTYPE_VNODE +inline int DTYPE_SOCKET = 2; +#pragma D binding "1.1" DTYPE_SOCKET +inline int DTYPE_PSXSHM = 3; +#pragma D binding "1.1" DTYPE_PSXSHM +inline int DTYPE_PSXSEM = 4; +#pragma D binding "1.1" DTYPE_PSXSEM +inline int DTYPE_KQUEUE = 5; +#pragma D binding "1.1" DTYPE_KQUEUE +inline int DTYPE_PIPE = 6; +#pragma D binding "1.1" DTYPE_PIPE +inline int DTYPE_FSEVENTS = 7; +#pragma D binding "1.1" DTYPE_FSEVENTS + +#pragma D binding "1.1" translator +translator fileinfo_t < struct fileglob *F > { + fi_name = (F == NULL) ? "" : + F->fg_ops->fo_type == DTYPE_VNODE ? + ((struct vnode *)F->fg_data)->v_name == NULL ? "" : ((struct vnode *)F->fg_data)->v_name : + F->fg_ops->fo_type == DTYPE_SOCKET ? "" : + F->fg_ops->fo_type == DTYPE_PSXSHM ? "" : + F->fg_ops->fo_type == DTYPE_PSXSEM ? "" : + F->fg_ops->fo_type == DTYPE_KQUEUE ? "" : + F->fg_ops->fo_type == DTYPE_PIPE ? "" : + F->fg_ops->fo_type == DTYPE_FSEVENTS ? "" : ""; + + fi_dirname = (F == NULL) ? "" : + F->fg_ops->fo_type != DTYPE_VNODE ? "" : + ((struct vnode *)F->fg_data)->v_parent == NULL ? "" : + (((struct vnode *)F->fg_data)->v_parent->v_name == NULL ? "" : + ((struct vnode *)F->fg_data)->v_parent->v_name); + + fi_pathname = (F == NULL) ? "" : + F->fg_ops->fo_type != DTYPE_VNODE ? "" : + strjoin("??/", + strjoin(((struct vnode *)F->fg_data)->v_parent == NULL ? "" : + (((struct vnode *)F->fg_data)->v_parent->v_name == NULL ? "" : + ((struct vnode *)F->fg_data)->v_parent->v_name), + strjoin("/", + ((struct vnode *)F->fg_data)->v_name == NULL ? "" : + ((struct vnode *)F->fg_data)->v_name))); + + fi_offset = (F == NULL) ? 0 : + F->fg_offset; + + fi_fs = (F == NULL) ? "" : + F->fg_ops->fo_type != DTYPE_VNODE ? "" : + ((struct vnode *)F->fg_data)->v_mount->mnt_vtable->vfc_name; + + fi_mount = (F == NULL) ? "" : + F->fg_ops->fo_type != DTYPE_VNODE ? "" : + ((struct vnode *)F->fg_data)->v_mount->mnt_vnodecovered == NULL ? "/" : + ((struct vnode *)F->fg_data)->v_mount->mnt_vnodecovered->v_name; + + fi_oflags = (F == NULL) ? 0 : + F->fg_flag - 1; /* Subtract one to map FREAD/FWRITE bitfield to O_RD/WR open() flags. */ +}; + +inline fileinfo_t fds[int fd] = xlate ( + (fd >= 0 && fd <= curproc->p_fd->fd_lastfile) ? + (struct fileglob *)(curproc->p_fd->fd_ofiles[fd]->f_fglob) : + (struct fileglob *)NULL); + +#pragma D attributes Stable/Stable/Common fds +#pragma D binding "1.1" fds + +#pragma D binding "1.2" translator +translator fileinfo_t < struct vnode *V > { + fi_name = V->v_name == NULL ? "" : V->v_name; + + fi_dirname = V->v_parent == NULL ? "" : + (V->v_parent->v_name == NULL ? "" : V->v_parent->v_name); + + fi_pathname = strjoin("??/", + strjoin(V->v_parent == NULL ? "" : + (V->v_parent->v_name == NULL ? "" : V->v_parent->v_name), + strjoin("/", + V->v_name == NULL ? "" : V->v_name))); + + fi_fs = V->v_mount->mnt_vtable->vfc_name; + + fi_mount = V->v_mount->mnt_vnodecovered == NULL ? "/" : V->v_mount->mnt_vnodecovered->v_name; +}; + diff --git a/bsd/dev/dtrace/scripts/ip.d b/bsd/dev/dtrace/scripts/ip.d new file mode 100644 index 000000000..c3658adc4 --- /dev/null +++ b/bsd/dev/dtrace/scripts/ip.d @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2006-2012 Apple Inc. All Rights Reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#pragma D depends_on library darwin.d +#pragma D depends_on module mach_kernel +#pragma D depends_on provider ip + +/* Translators for IP dtrace provider */ + +typedef struct pktinfo { + struct mbuf *pkt_addr; /* Pointer to the packet (struct mbuf) */ +} pktinfo_t; + +#pragma D binding "1.0" translator +translator pktinfo_t < struct mbuf *m > { + pkt_addr = m; +}; + +typedef struct csinfo { + uint8_t ip_ver; + uint16_t dport; + uint16_t sport; + string ip_daddr; + string ip_saddr; + uint8_t protocol; + struct inpcb *cs_addr; /* Pointer to inpcb (struct inpcb) */ +} csinfo_t; + +#pragma D binding "1.0" translator +translator csinfo_t < struct inpcb *P > { + cs_addr = P; + ip_ver = (P != NULL) ? (((P->inp_vflag & 0x2) != 0) ? 6 : 4) : 0; + dport = (P != NULL) ? ntohs(P->inp_fport) : 0; + sport = (P != NULL) ? ntohs(P->inp_lport) : 0; + ip_saddr = (P != NULL) ? (((P->inp_vflag & 0x2) != 0) ? + inet_ntoa6(&P->inp_dependladdr.inp6_local) : + inet_ntoa((uint32_t *)&P->inp_dependladdr.inp46_local.ia46_addr4.s_addr)) : ""; + ip_daddr = (P != NULL) ? (((P->inp_vflag & 0x2) != 0) ? + inet_ntoa6(&P->inp_dependfaddr.inp6_foreign) : + inet_ntoa((uint32_t *)&P->inp_dependfaddr.inp46_foreign.ia46_addr4.s_addr)) : ""; + protocol = P->inp_ip_p; +}; + +typedef struct ipinfo { + uint8_t ip_ver; /* IP version (4, 6) */ + uint16_t ip_plength; /* payload length */ + string ip_saddr; /* source address */ + string ip_daddr; /* destination address */ +} ipinfo_t; + +/* + * The ip vhl byte is the first byte in struct ip. The type names are + * different depending on whether _IP_VHL is defined or not and that will + * confuse dtrace. So instead of using type names, just cast and extract + * version and header length info from the ip structure. + */ +#pragma D binding "1.0" translator +translator ipinfo_t < struct ip * ip > { + ip_ver = (ip != NULL) ? ((*(uint8_t *) ip) & 0xf0) >> 4 : 0; + ip_plength = (ip != NULL) ? + (ntohs(ip->ip_len) - (((*(uint8_t *) ip) & 0x0f) << 2)) : 0; + ip_saddr = (ip != NULL) ? inet_ntoa((uint32_t *)&ip->ip_src.s_addr) : ""; + ip_daddr = (ip != NULL) ? inet_ntoa((uint32_t *)&ip->ip_dst.s_addr) : ""; +}; + +#pragma D binding "1.0" translator +translator ipinfo_t < struct ip6_hdr *ip6 > { + ip_ver = (ip6 != NULL) ? (ip6->ip6_ctlun.ip6_un2_vfc & 0xf0) >> 4 : 0; + ip_plength = (ip6 != NULL) ? (ntohs(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen)) : 0; + ip_saddr = (ip6 != NULL) ? inet_ntoa6(&ip6->ip6_src) : ""; + ip_daddr = (ip6 != NULL) ? inet_ntoa6(&ip6->ip6_dst) : ""; +}; + +/* + * void_ip_t is a void pointer to either an IPv4 or IPv6 header. It has + * its own type name so that a translator can be determined. + */ +typedef uintptr_t void_ip_t; +#pragma D binding "1.0" translator +translator ipinfo_t < void_ip_t *i> { + ip_ver = (i != NULL) ? (*(uint8_t *)i >> 4) : 0; + ip_plength = (i != NULL) ? (((*(uint8_t *)i) >> 4 == 4) ? + ntohs(((struct ip *)i)->ip_len) - + (((*(uint8_t *)i) & 0x0f) << 2): + (((*(uint8_t *)i) >> 4 == 6) ? + ntohs(((struct ip6_hdr *)i)->ip6_ctlun.ip6_un1.ip6_un1_plen) : 0)) : 0; + ip_saddr = (i != NULL) ? ((((*(uint8_t *)i)) >> 4 == 4) ? + inet_ntoa((uint32_t *)&(((struct ip *)i)->ip_src.s_addr)) : + ((((*(uint8_t *)i) >> 4) == 6) ? + inet_ntoa6(&((struct ip6_hdr *)i)->ip6_src) : "")) : ""; + ip_daddr = (i != NULL) ? (((*(uint8_t *)i) >> 4 == 4) ? + inet_ntoa((uint32_t *)&((struct ip*)i)->ip_dst.s_addr) : ((((*(uint8_t *)i) >> 4) == 6) ? + inet_ntoa6(&((struct ip6_hdr *)i)->ip6_dst) : "")) : ""; +}; + +typedef struct ifinfo { + string if_name; /* interface name */ + int8_t if_local; /* is delivered locally */ + int8_t if_ipstack; /* ipstack id */ + struct ifnet *if_addr; /* pointer to raw ill_t */ + uint16_t if_flags; /* flags: up/down, broadcast etc. */ + uint32_t if_eflags; /* extended flags */ + uint16_t if_unit; +} ifinfo_t; + +#pragma D binding "1.0" translator +translator ifinfo_t < struct ifnet *ifp > { + if_name = (ifp != NULL) ? ifp->if_name : ""; + if_unit = (ifp != NULL) ? ifp->if_unit : 0; + if_local = 0; + if_ipstack = 0; + if_addr = ifp; + if_flags = (ifp != NULL) ? ifp->if_flags : 0; + if_eflags = (ifp != NULL) ? ifp->if_eflags : 0; + +}; + +typedef struct ipv4info { + uint8_t ipv4_ver; /* IP version (4) */ + uint8_t ipv4_ihl; /* header length, bytes */ + uint8_t ipv4_tos; /* type of service field */ + uint16_t ipv4_length; /* length (header + payload) */ + uint16_t ipv4_ident; /* identification */ + uint8_t ipv4_flags; /* IP flags */ + uint16_t ipv4_offset; /* fragment offset */ + uint8_t ipv4_ttl; /* time to live */ + uint8_t ipv4_protocol; /* next level protocol */ + string ipv4_protostr; /* next level protocol, as a string */ + uint16_t ipv4_checksum; /* header checksum */ + in_addr_t ipv4_src; /* source address */ + in_addr_t ipv4_dst; /* destination address */ + string ipv4_saddr; /* source address, string */ + string ipv4_daddr; /* destination address, string */ + struct ip *ipv4_hdr; /* pointer to raw header */ +} ipv4info_t; + +#pragma D binding "1.0" translator +translator ipv4info_t < struct ip *ip > { + ipv4_ver = (ip != NULL) ? (*(uint8_t *)ip & 0xf0) >> 4 : 0; + ipv4_ihl = (ip != NULL) ? ((*(uint8_t *)ip & 0x0f) << 2) : 0; + ipv4_tos = (ip!= NULL) ? ip->ip_tos : 0; + ipv4_length = (ip != NULL) ? ntohs(ip->ip_len) : 0; + ipv4_ident = (ip != NULL) ? ip->ip_id : 0; + ipv4_flags = (ip != NULL) ? (ntohs(ip->ip_off) & 0xe000) : 0; + ipv4_offset = (ip != NULL) ? (ntohs(ip->ip_off) & 0x1fff) : 0; + ipv4_ttl = (ip != NULL) ? ip->ip_ttl : 0; + ipv4_protocol = (ip != NULL) ? ip->ip_p : 0; + ipv4_protostr = (ip == NULL) ? "" : + (ip->ip_p == 1) ? "ICMP" : + (ip->ip_p == 2) ? "IGMP" : + (ip->ip_p == 4) ? "IP" : + (ip->ip_p == 6) ? "TCP": + (ip->ip_p == 17) ? "UDP" : + (ip->ip_p == 50) ? "ESP": + (ip->ip_p == 51) ? "AH" : + (ip->ip_p == 58) ? "ICMPV6" : + (ip->ip_p == 255) ? "RAW" : stringof(ip->ip_p); + ipv4_checksum = (ip != NULL) ? ntohs(ip->ip_sum) : 0; + ipv4_src = (ip != NULL) ? ip->ip_src.s_addr : 0; + ipv4_dst = (ip != NULL) ? ip->ip_dst.s_addr : 0; + ipv4_saddr = (ip != NULL) ? inet_ntoa((uint32_t *)&ip->ip_src.s_addr) : ""; + ipv4_daddr = (ip != NULL) ? inet_ntoa((uint32_t *)&ip->ip_dst.s_addr) : ""; + ipv4_hdr = ip; +}; + +typedef struct ipv6info { + uint8_t ipv6_ver; /* IP version (6) */ + uint8_t ipv6_tclass; /* traffic class */ + uint32_t ipv6_flow; /* flow label */ + uint16_t ipv6_plen; /* payload length */ + uint8_t ipv6_nexthdr; /* next header protocol */ + string ipv6_nextstr; /* next header protocol, as a string */ + uint8_t ipv6_hlim; /* hop limit */ + struct in6_addr *ipv6_src; /* source address, pointer to struct in6_addr */ + struct in6_addr *ipv6_dst; /* destination address, pointer to struct in6_addr */ + string ipv6_saddr; /* source address, string */ + string ipv6_daddr; /* destination address, string */ + struct ip6_hdr *ipv6_hdr; /* pointer to raw header */ +} ipv6info_t; + +#pragma D binding "1.0" translator +translator ipv6info_t < struct ip6_hdr *ip6 > { + ipv6_ver = (ip6 != NULL) ? ip6->ip6_ctlun.ip6_un2_vfc : 10; + ipv6_tclass = (ip6 != NULL) ? (ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0x0ff00000) >> 20 : 0; + ipv6_flow = (ip6 != NULL) ? (ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0x000fffff) : 0; + ipv6_plen = (ip6 != NULL) ? ntohs(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen) : 0; + ipv6_nexthdr = (ip6 != NULL) ? ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt : 0; + ipv6_nextstr = (ip6 == NULL) ? "" : + (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt == 1) ? "ICMP" : + (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt == 2) ? "IGMP" : + (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt == 4) ? "IP" : + (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt == 6) ? "TCP" : + (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt == 17) ? "UDP" : + (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt == 50) ? "ESP" : + (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt == 51) ? "AH" : + (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt == 58) ? "ICMPV6" : + (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt == 255) ? "RAW" : + stringof(ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt); + ipv6_hlim = (ip6 != NULL) ? ip6->ip6_ctlun.ip6_un1.ip6_un1_hlim : 0; + ipv6_src = (ip6 != NULL) ? (&ip6->ip6_src) : 0; + ipv6_dst = (ip6 != NULL) ? (&ip6->ip6_dst) : 0; + ipv6_saddr = (ip6 != NULL) ? inet_ntoa6(&ip6->ip6_src) : ""; + ipv6_daddr = (ip6 != NULL) ? inet_ntoa6(&ip6->ip6_dst) : ""; + ipv6_hdr = ip6; +}; diff --git a/bsd/dev/dtrace/scripts/mptcp.d b/bsd/dev/dtrace/scripts/mptcp.d new file mode 100644 index 000000000..cc7b9d365 --- /dev/null +++ b/bsd/dev/dtrace/scripts/mptcp.d @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2013 Apple Computer, Inc. All Rights Reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#pragma D depends_on library darwin.d +#pragma D depends_on library socket.d +#pragma D depends_on module mach_kernel +#pragma D depends_on provider mptcp +#pragma D depends_on provider ip + +/* + * MPTCP Protocol Control Block. + */ +inline int MPTCPS_CLOSED = 0; +#pragma D binding "1.0" MPTCPS_CLOSED +inline int MPTCPS_LISTEN = 1; +#pragma D binding "1.0" MPTCPS_LISTEN +inline int MPTCPS_ESTABLISHED = 2; +#pragma D binding "1.0" MPTCPS_ESTABLISHED +inline int MPTCPS_CLOSE_WAIT = 3; +#pragma D binding "1.0" MPTCPS_CLOSE_WAIT +inline int MPTCPS_FIN_WAIT_1 = 4; +#pragma D binding "1.0" MPTCPS_FIN_WAIT_1 +inline int MPTCPS_CLOSING = 5; +#pragma D binding "1.0" MPTCPS_CLOSING +inline int MPTCPS_LAST_ACK = 6; +#pragma D binding "1.0" MPTCPS_LAST_ACK +inline int MPTCPS_FIN_WAIT_2 = 7; +#pragma D binding "1.0" MPTCPS_FIN_WAIT_2 +inline int MPTCPS_TIME_WAIT = 8; +#pragma D binding "1.0" MPTCPS_TIME_WAIT +inline int MPTCPS_FASTCLOSE_WAIT = 9; +#pragma D binding "1.0" MPTCPS_FASTCLOSE_WAIT + +typedef uint64_t mptcp_key_t; +typedef uint32_t mptcp_token_t; + +typedef struct mptsinfo { + string state; + uint32_t flags; + uint32_t vers; + uint32_t error; + mptcp_key_t localkey; + mptcp_key_t remotekey; + mptcp_token_t localtoken; + mptcp_token_t remotetoken; + int rxtshift; + uint32_t rxtstart; + uint64_t rtseq; + uint32_t timervals; + uint32_t timewait; + uint64_t snduna; + uint64_t sndnxt; + uint64_t sndmax; + uint64_t local_idsn; + uint32_t sndwnd; + uint64_t rcvnxt; + uint64_t rcvatmark; + uint64_t remote_idsn; + uint32_t rcvwnd; + struct mptcb *mptcb; +} mptsinfo_t; + +#pragma D binding "1.0" translator +translator mptsinfo_t < struct mptcb *T > { + state = T->mpt_state == MPTCPS_CLOSED ? "state-closed" : + T->mpt_state == MPTCPS_LISTEN ? "state-listen" : + T->mpt_state == MPTCPS_ESTABLISHED ? + "state-established" : + T->mpt_state == MPTCPS_CLOSE_WAIT ? "state-close-wait" : + T->mpt_state == MPTCPS_FIN_WAIT_1 ? "state-fin-wait-1" : + T->mpt_state == MPTCPS_CLOSING ? "state-closing" : + T->mpt_state == MPTCPS_LAST_ACK ? "state-last-ack" : + T->mpt_state == MPTCPS_FIN_WAIT_2 ? "state-fin-wait-2" : + T->mpt_state == MPTCPS_TIME_WAIT ? "state-time-wait" : + T->mpt_state == MPTCPS_FASTCLOSE_WAIT ? + "state-fastclose-wait" : + ""; + flags = T->mpt_flags; + vers = T->mpt_version; + error = T->mpt_softerror; + localkey = T->mpt_localkey ? *T->mpt_localkey : 0; + remotekey = T->mpt_remotekey; + localtoken = T->mpt_localtoken; + remotetoken = T->mpt_remotetoken; + rxtshift = T->mpt_rxtshift; + rxtstart = T->mpt_rxtstart; + rtseq = T->mpt_rtseq; + timervals = T->mpt_timer_vals; + timewait = T->mpt_timewait; + snduna = T->mpt_snduna; + sndnxt = T->mpt_sndnxt; + sndmax = T->mpt_sndmax; + local_idsn = T->mpt_local_idsn; + sndwnd = T->mpt_sndwnd; + rcvnxt = T->mpt_rcvnxt; + rcvatmark = T->mpt_rcvatmark; + remote_idsn = T->mpt_remote_idsn; + rcvwnd = T->mpt_rcvwnd; + mptcb = T; +}; + +/* + * Multipath Control Block. + */ +inline int MPPCB_STATE_INUSE = 1; +#pragma D binding "1.0" MPPCB_STATE_INUSE +inline int MPPCB_STATE_DEAD = 2; +#pragma D binding "1.0" MPPCB_STATE_DEAD + +typedef struct mppsinfo { + string state; + uint32_t flags; + struct mppcb *mppcb; +} mppsinfo_t; + +#pragma D binding "1.0" translator +translator mppsinfo_t < struct mppcb *T> { + state = T ? + T->mpp_state == MPPCB_STATE_INUSE ? "state-inuse" : + T->mpp_state == MPPCB_STATE_DEAD ? "state-dead" : + "" : ""; + flags = T->mpp_flags; + mppcb = T; +}; + +/* + * MPTCP Session. + */ +typedef struct mptsesinfo { + uint16_t numflows; + uint16_t nummpcapflows; + connid_t connid_last; + uint8_t flags; + struct mptses *mptses; +} mptsesinfo_t; + +#pragma D binding "1.0" translator +translator mptsesinfo_t < struct mptses *T > { + numflows = T->mpte_numflows; + nummpcapflows = T->mpte_nummpcapflows; + connid_last = T->mpte_connid_last; + flags = T->mpte_flags; + mptses = T; +}; + +/* + * MPTCP Subflow. + */ +inline int MPTSF_ATTACHED = 0x00001; +#pragma D binding "1.0" MPTSF_ATTACHED +inline int MPTSF_CONNECTING = 0x00002; +#pragma D binding "1.0" MPTSF_CONNECTING +inline int MPTSF_CONNECT_PENDING= 0x00004; +#pragma D binding "1.0" MPTSF_CONNECT_PENDING +inline int MPTSF_CONNECTED = 0x00008; +#pragma D binding "1.0" MPTSF_CONNECTED +inline int MPTSF_DISCONNECTING = 0x00010; +#pragma D binding "1.0" MPTSF_DISCONNECTING +inline int MPTSF_DISCONNECTED = 0x00020; +#pragma D binding "1.0" MPTSF_DISCONNECTED +inline int MPTSF_MP_CAPABLE = 0x00040; +#pragma D binding "1.0" MPTSF_MP_CAPABLE +inline int MPTSF_MP_READY = 0x00080; +#pragma D binding "1.0" MPTSF_MP_READY +inline int MPTSF_MP_DEGRADED = 0x00100; +#pragma D binding "1.0" MPTSF_MP_DEGRADED +inline int MPTSF_SUSPENDED = 0x00200; +#pragma D binding "1.0" MPTSF_SUSPENDED +inline int MPTSF_BOUND_IF = 0x00400; +#pragma D binding "1.0" MPTSF_BOUND_IF +inline int MPTSF_BOUND_IP = 0x00800; +#pragma D binding "1.0" MPTSF_BOUND_IP +inline int MPTSF_BOUND_PORT = 0x01000; +#pragma D binding "1.0" MPTSF_BOUND_PORT +inline int MPTSF_PREFERRED = 0x02000; +#pragma D binding "1.0" MPTSF_PREFERRED +inline int MPTSF_SOPT_OLDVAL = 0x04000; +#pragma D binding "1.0" MPTSF_SOPT_OLDVAL +inline int MPTSF_SOPT_INPROG = 0x08000; +#pragma D binding "1.0" MPTSF_SOPT_INPROG +inline int MPTSF_DELETEOK = 0x10000; +#pragma D binding "1.0" MPTSF_DELETEOK +inline int MPTSF_FAILINGOVER = 0x20000; +#pragma D binding "1.0" MPTSF_FAILINGOVER +inline int MPTSF_ACTIVE = 0x40000; +#pragma D binding "1.0" MPTSF_ACTIVE +inline int MPTSF_MPCAP_CTRSET = 0x80000; +#pragma D binding "1.0" MPTSF_MPCAP_CTRSET + +typedef struct mptsubinfo { + uint32_t flags; + uint32_t evctl; + uint32_t family; + connid_t connid; + uint32_t rank; + int32_t error; + uint64_t sndnxt; + struct mptsub *mptsub; +} mptsubinfo_t; + +#pragma D binding "1.0" translator +translator mptsubinfo_t < struct mptsub *T > { + flags = T->mpts_flags; + evctl = T->mpts_evctl; + family = T->mpts_family; + connid = T->mpts_connid; + rank = T->mpts_rank; + error = T->mpts_soerror; + sndnxt = T->mpts_sndnxt; + mptsub = T; +}; diff --git a/bsd/dev/dtrace/scripts/regs_x86_64.d b/bsd/dev/dtrace/scripts/regs_x86_64.d new file mode 100644 index 000000000..8a5acc699 --- /dev/null +++ b/bsd/dev/dtrace/scripts/regs_x86_64.d @@ -0,0 +1,96 @@ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)regs.d.in 1.1 04/09/28 SMI" + +inline int R_GS = 0; +#pragma D binding "1.0" R_GS +inline int R_FS = 1; +#pragma D binding "1.0" R_FS +inline int R_ES = 2; +#pragma D binding "1.0" R_ES +inline int R_DS = 3; +#pragma D binding "1.0" R_DS + +inline int R_EDI = 4; +#pragma D binding "1.0" R_EDI +inline int R_ESI = 5; +#pragma D binding "1.0" R_ESI +inline int R_EBP = 6; +#pragma D binding "1.0" R_EBP +inline int R_ESP = 7; +#pragma D binding "1.0" R_ESP +inline int R_EBX = 8; +#pragma D binding "1.0" R_EBX +inline int R_EDX = 9; +#pragma D binding "1.0" R_EDX +inline int R_ECX = 10; +#pragma D binding "1.0" R_ECX +inline int R_EAX = 11; +#pragma D binding "1.0" R_EAX + +inline int R_TRAPNO = 12; +#pragma D binding "1.0" R_TRAPNO +inline int R_ERR = 13; +#pragma D binding "1.0" R_ERR +inline int R_EIP = 14; +#pragma D binding "1.0" R_EIP +inline int R_CS = 15; +#pragma D binding "1.0" R_CS +inline int R_EFL = 16; +#pragma D binding "1.0" R_EFL +inline int R_UESP = 17; +#pragma D binding "1.0" R_UESP +inline int R_SS = 18; +#pragma D binding "1.0" R_SS + +inline int R_PC = R_EIP; +#pragma D binding "1.0" R_PC +inline int R_SP = R_UESP; +#pragma D binding "1.0" R_SP +inline int R_PS = R_EFL; +#pragma D binding "1.0" R_PS +inline int R_R0 = R_EAX; +#pragma D binding "1.0" R_R0 +inline int R_R1 = R_EBX; +#pragma D binding "1.0" R_R1 + +inline int R_RSP = 18 + 1 + 20; +#pragma D binding "1.0" R_RSP +inline int R_RFL = 18 + 1 + 19; +#pragma D binding "1.0" R_RFL +inline int R_RIP = 18 + 1 + 17; +#pragma D binding "1.0" R_RIP +inline int R_RAX = 18 + 1 + 14; +#pragma D binding "1.0" R_RAX +inline int R_RCX = 18 + 1 + 13; +#pragma D binding "1.0" R_RCX +inline int R_RDX = 18 + 1 + 12; +#pragma D binding "1.0" R_RDX +inline int R_RBX = 18 + 1 + 11; +#pragma D binding "1.0" R_RBX +inline int R_RBP = 18 + 1 + 10; +#pragma D binding "1.0" R_RBP +inline int R_RSI = 18 + 1 + 9; +#pragma D binding "1.0" R_RSI +inline int R_RDI = 18 + 1 + 8; +#pragma D binding "1.0" R_RDI +inline int R_R8 = 18 + 1 + 7; +#pragma D binding "1.0" R_R8 +inline int R_R9 = 18 + 1 + 6; +#pragma D binding "1.0" R_R9 +inline int R_R10 = 18 + 1 + 5; +#pragma D binding "1.0" R_R10 +inline int R_R11 = 18 + 1 + 4; +#pragma D binding "1.0" R_R11 +inline int R_R12 = 18 + 1 + 3; +#pragma D binding "1.0" R_R12 +inline int R_R13 = 18 + 1 + 2; +#pragma D binding "1.0" R_R13 +inline int R_R14 = 18 + 1 + 1; +#pragma D binding "1.0" R_R14 +inline int R_R15 = 18 + 1 + 0; +#pragma D binding "1.0" R_R15 + diff --git a/bsd/dev/dtrace/scripts/sched.d b/bsd/dev/dtrace/scripts/sched.d new file mode 100644 index 000000000..d86772554 --- /dev/null +++ b/bsd/dev/dtrace/scripts/sched.d @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2006-2008 Apple Computer, Inc. All Rights Reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#pragma D depends_on library darwin.d +#pragma D depends_on module mach_kernel +#pragma D depends_on provider sched + +struct _processor_info { + int pi_state; /* processor state, see above */ + char pi_processor_type[32]; /* ASCII CPU type */ + char pi_fputypes[32]; /* ASCII FPU types */ + int pi_clock; /* CPU clock freq in MHz */ +}; + +typedef struct _processor_info _processor_info_t; + +typedef int chipid_t; +typedef int lgrp_id_t; + +struct cpuinfo { + processorid_t cpu_id; /* CPU identifier */ + psetid_t cpu_pset; /* processor set identifier */ + chipid_t cpu_chip; /* chip identifier */ + lgrp_id_t cpu_lgrp; /* locality group identifer */ + _processor_info_t cpu_info; /* CPU information */ +}; + +typedef struct cpuinfo cpuinfo_t; + +translator cpuinfo_t < processor_t P > { + cpu_id = P->cpu_id; + cpu_pset = -1; /* Darwin does not partition processors. */ + cpu_chip = P->cpu_id; /* XXX */ + cpu_lgrp = 0; /* XXX */ + cpu_info = *((_processor_info_t *)`dtrace_zero); /* ` */ /* XXX */ +}; + +inline cpuinfo_t *curcpu = xlate (curthread->last_processor); +#pragma D attributes Stable/Stable/Common curcpu +#pragma D binding "1.0" curcpu + +inline processorid_t cpu = curcpu->cpu_id; +#pragma D attributes Stable/Stable/Common cpu +#pragma D binding "1.0" cpu + +inline psetid_t pset = curcpu->cpu_pset; +#pragma D attributes Stable/Stable/Common pset +#pragma D binding "1.0" pset + +inline chipid_t chip = curcpu->cpu_chip; +#pragma D attributes Stable/Stable/Common chip +#pragma D binding "1.0" chip + +inline lgrp_id_t lgrp = curcpu->cpu_lgrp; +#pragma D attributes Stable/Stable/Common lgrp +#pragma D binding "1.0" lgrp + diff --git a/bsd/dev/dtrace/scripts/signal.d b/bsd/dev/dtrace/scripts/signal.d new file mode 100644 index 000000000..76cee7624 --- /dev/null +++ b/bsd/dev/dtrace/scripts/signal.d @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +inline int SIGHUP = 1; +#pragma D binding "1.0" SIGHUP +inline int SIGINT = 2; +#pragma D binding "1.0" SIGINT +inline int SIGQUIT = 3; +#pragma D binding "1.0" SIGQUIT +inline int SIGILL = 4; +#pragma D binding "1.0" SIGILL +inline int SIGTRAP = 5; +#pragma D binding "1.0" SIGTRAP +inline int SIGABRT = 6; +#pragma D binding "1.0" SIGABRT +inline int SIGIOT = 6; +#pragma D binding "1.0" SIGIOT +inline int SIGEMT = 7; +#pragma D binding "1.0" SIGEMT +inline int SIGFPE = 8; +#pragma D binding "1.0" SIGFPE +inline int SIGKILL = 9; +#pragma D binding "1.0" SIGKILL +inline int SIGBUS = 10; +#pragma D binding "1.0" SIGBUS +inline int SIGSEGV = 11; +#pragma D binding "1.0" SIGSEGV +inline int SIGSYS = 12; +#pragma D binding "1.0" SIGSYS +inline int SIGPIPE = 13; +#pragma D binding "1.0" SIGPIPE +inline int SIGALRM = 14; +#pragma D binding "1.0" SIGALRM +inline int SIGTERM = 15; +#pragma D binding "1.0" SIGTERM +inline int SIGURG = 16; +#pragma D binding "1.0" SIGURG +inline int SIGSTOP = 17; +#pragma D binding "1.0" SIGSTOP +inline int SIGTSTP = 18; +#pragma D binding "1.0" SIGTSTP +inline int SIGCONT = 19; +#pragma D binding "1.0" SIGCONT +inline int SIGCHLD = 20; +#pragma D binding "1.0" SIGCHLD +inline int SIGTTIN = 21; +#pragma D binding "1.0" SIGTTIN +inline int SIGTTOU = 22; +#pragma D binding "1.0" SIGTTOU +inline int SIGIO = 23; +#pragma D binding "1.0" SIGIO +inline int SIGXCPU = 24; +#pragma D binding "1.0" SIGXCPU +inline int SIGXFSZ = 25; +#pragma D binding "1.0" SIGXFSZ +inline int SIGVTALRM = 26; +#pragma D binding "1.0" SIGVTALRM +inline int SIGPROF = 27; +#pragma D binding "1.0" SIGPROF +inline int SIGWINCH = 28; +#pragma D binding "1.0" SIGWINCH +inline int SIGINFO = 29; +#pragma D binding "1.0" SIGINFO +inline int SIGUSR1 = 30; +#pragma D binding "1.0" SIGUSR1 +inline int SIGUSR2 = 31; +#pragma D binding "1.0" SIGUSR2 diff --git a/bsd/dev/dtrace/scripts/socket.d b/bsd/dev/dtrace/scripts/socket.d new file mode 100644 index 000000000..d122d9fac --- /dev/null +++ b/bsd/dev/dtrace/scripts/socket.d @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2013 Apple Computer, Inc. All Rights Reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#pragma D depends_on library darwin.d +#pragma D depends_on module mach_kernel + +typedef struct socketbuf { + uint32_t cc; + uint32_t hiwat; + uint32_t lowat; + uint32_t mbcnt; + uint32_t mbmax; + uint32_t flags; + struct sockbuf *sockbuf; +} socketbuf_t; + +translator socketbuf_t < struct sockbuf *T > { + cc = T->sb_cc; + hiwat = T->sb_hiwat; + lowat = T->sb_lowat; + mbcnt = T->sb_mbcnt; + mbmax = T->sb_mbmax; + flags = T->sb_flags; + sockbuf = T; +}; + +typedef struct socketinfo { + int zone; + short type; + uint32_t options; + short linger; + short state; + short qlen; + short incqlen; + short qlimit; + short error; + uint32_t flags; + int traffic_class; + struct socket *socket; +} socketinfo_t; + +translator socketinfo_t < struct socket *T > { + zone = T->so_zone; + type = T->so_type; + options = T->so_options; + linger = T->so_linger; + state = T->so_state; + qlen = T->so_qlen; + incqlen = T->so_incqlen; + qlimit = T->so_qlimit; + error = T->so_error; + flags = T->so_flags; + traffic_class = T->so_traffic_class; + socket = T; +}; + + diff --git a/bsd/dev/dtrace/scripts/tcp.d b/bsd/dev/dtrace/scripts/tcp.d new file mode 100644 index 000000000..3aca8b7bb --- /dev/null +++ b/bsd/dev/dtrace/scripts/tcp.d @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2006-2008 Apple Computer, Inc. All Rights Reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#pragma D depends_on library darwin.d +#pragma D depends_on module mach_kernel +#pragma D depends_on provider tcp + +/* + * TCP flags + */ +inline int TH_FIN = 0x01; +#pragma D binding "1.0" TH_FIN +inline int TH_SYN = 0x02; +#pragma D binding "1.0" TH_SYN +inline int TH_RST = 0x04; +#pragma D binding "1.0" TH_RST +inline int TH_PUSH = 0x08; +#pragma D binding "1.0" TH_PUSH +inline int TH_ACK = 0x10; +#pragma D binding "1.0" TH_ACK +inline int TH_URG = 0x20; +#pragma D binding "1.0" TH_URG +inline int TH_ECE = 0x40; +#pragma D binding "1.0" TH_ECE +inline int TH_CWR = 0x80; +#pragma D binding "1.0" TH_CWR + +/* + * TCP states + */ +inline int TCPS_CLOSED = 0; +#pragma D binding "1.0" TCPS_CLOSED +inline int TCPS_LISTEN = 1; +#pragma D binding "1.0" TCPS_LISTEN +inline int TCPS_SYN_SENT = 2; +#pragma D binding "1.0" TCPS_SYN_SENT +inline int TCPS_SYN_RECEIVED = 3; +#pragma D binding "1.0" TCPS_SYN_RECEIVED +inline int TCPS_ESTABLISHED = 4; +#pragma D binding "1.0" TCPS_ESTABLISHED +inline int TCPS_CLOSE_WAIT = 5; +#pragma D binding "1.0" TCPS_CLOSE_WAIT +inline int TCPS_FIN_WAIT_1 = 6; +#pragma D binding "1.0" TCPS_FIN_WAIT_1 +inline int TCPS_CLOSING = 7; +#pragma D binding "1.0" TCPS_CLOSING +inline int TCPS_LAST_ACK = 8; +#pragma D binding "1.0" TCPS_LAST_ACK +inline int TCPS_FIN_WAIT_2 = 9; +#pragma D binding "1.0" TCPS_FIN_WAIT_2 +inline int TCPS_TIME_WAIT = 10; +#pragma D binding "1.0" TCPS_TIME_WAIT + +/* + * TCP congestion control events + */ +inline int TCP_CC_CWND_INIT = 0; +#pragma D binding "1.0" TCP_CC_CWND_INIT +inline int TCP_CC_INSEQ_ACK_RCVD = 1; +#pragma D binding "1.0" TCP_CC_INSEQ_ACK_RCVD +inline int TCP_CC_ACK_RCVD = 2; +#pragma D binding "1.0" TCP_CC_ACK_RCVD +inline int TCP_CC_ENTER_FASTRECOVERY = 3; +#pragma D binding "1.0" TCP_CC_ENTER_FASTRECOVERY +inline int TCP_CC_IN_FASTRECOVERY = 4; +#pragma D binding "1.0" TCP_CC_IN_FASTRECOVERY +inline int TCP_CC_EXIT_FASTRECOVERY = 5; +#pragma D binding "1.0" TCP_CC_EXIT_FASTRECOVERY +inline int TCP_CC_PARTIAL_ACK = 6; +#pragma D binding "1.0" TCP_CC_PARTIAL_ACK +inline int TCP_CC_IDLE_TIMEOUT = 7; +#pragma D binding "1.0" TCP_CC_IDLE_TIMEOUT +inline int TCP_CC_REXMT_TIMEOUT = 8; +#pragma D binding "1.0" TCP_CC_REXMT_TIMEOUT +inline int TCP_CC_ECN_RCVD = 9; +#pragma D binding "1.0" TCP_CC_ECN_RCVD +inline int TCP_CC_BAD_REXMT_RECOVERY = 10; +#pragma D binding "1.0" TCP_CC_BAD_REXMT_RECOVERY +inline int TCP_CC_OUTPUT_ERROR = 11; +#pragma D binding "1.0" TCP_CC_OUTPUT_ERROR +inline int TCP_CC_CHANGE_ALGO = 12; +#pragma D binding "1.0" TCP_CC_CHANGE_ALGO +inline int TCP_CC_FLOW_CONTROL = 13; +#pragma D binding "1.0" TCP_CC_FLOW_CONTROL +inline int TCP_CC_SUSPEND = 14; +#pragma D binding "1.0" TCP_CC_SUSPEND +inline int TCP_CC_LIMITED_TRANSMIT = 15; +#pragma D binding "1.0" TCP_CC_LIMITED_TRANSMIT +inline int TCP_CC_EARLY_RETRANSMIT = 16; +#pragma D binding "1.0" TCP_CC_EARLY_RETRANSMIT + + +/* + * tcpinfo is the TCP header field + */ +typedef struct tcpinfo { + uint16_t tcp_sport; /* source port */ + uint16_t tcp_dport; /* destination port */ + uint32_t tcp_seq; /* sequence number */ + uint32_t tcp_ack; /* acknowledgement number */ + uint8_t tcp_offset; /* data offset, in bytes */ + uint8_t tcp_flags; /* flags */ + uint16_t tcp_window; /* window size */ + uint16_t tcp_checksum; /* checksum */ + uint16_t tcp_urgent; /* urgent data pointer */ + struct tcphdr *tcp_hdr; /* raw TCP header */ +} tcpinfo_t; + +#pragma D binding "1.0" translator +translator tcpinfo_t < struct tcphdr *T > { + tcp_sport = ntohs(T->th_sport); + tcp_dport = ntohs(T->th_dport); + tcp_seq = ntohl(T->th_seq); + tcp_ack = ntohl(T->th_ack); + tcp_offset = T->th_off << 2; + tcp_flags = T->th_flags; + tcp_window = ntohs(T->th_win); + tcp_checksum = ntohs(T->th_sum); + tcp_urgent = ntohs(T->th_urp); + tcp_hdr = T; +}; + +/* + * tcpsinfo contains stable TCP details from TCP control block + */ +typedef struct tcpsinfo { + int tcps_local; /* is delivered locally, boolean */ + int tcps_active; /* active open, boolean */ + string tcps_state; /* TCP state, as a string */ + u_int t_flags; /* flags */ + uint32_t rcv_nxt; /* receive next */ + uint32_t rcv_adv; /* advertised window */ + uint32_t rcv_wnd; /* receive window */ + uint32_t snd_wnd; /* send window */ + uint32_t snd_cwnd; /* congestion controlled window */ + uint32_t snd_ssthresh; /* slow-start threshold */ + uint32_t snd_una; /* send unacknowledged */ + uint32_t snd_nxt; /* send next */ + uint32_t snd_max; /* send max */ + uint32_t snd_recover; /* send recover for NewReno */ + int t_rxtcur; /* retransmit timeout in ms */ + u_int t_maxseg; /* maximum segment size */ + u_int t_rttbest; /* best rtt we have seen */ + int rcv_numsacks; /* number of sack blocks present */ + int snd_numholes; /* number of holes seen by sender */ + struct tcpcb* tcpcb; /* Pointer to tcp control block */ +} tcpsinfo_t; + +#pragma D binding "1.0" translator +translator tcpsinfo_t < struct tcpcb *T> { + tcps_local = 0; /* Not used */ + tcps_active = 0; + tcps_state = T ? + T->t_state == TCPS_CLOSED ? "state-closed" : + T->t_state == TCPS_LISTEN ? "state-listen" : + T->t_state == TCPS_SYN_SENT ? "state-syn-sent" : + T->t_state == TCPS_SYN_RECEIVED ? "state-syn-received" : + T->t_state == TCPS_ESTABLISHED ? "state-established" : + T->t_state == TCPS_CLOSE_WAIT ? "state-close-wait" : + T->t_state == TCPS_FIN_WAIT_1 ? "state-fin-wait1" : + T->t_state == TCPS_CLOSING ? "state-closing" : + T->t_state == TCPS_LAST_ACK ? "state-last-ack" : + T->t_state == TCPS_FIN_WAIT_2 ? "state-fin-wait2" : + T->t_state == TCPS_TIME_WAIT ? "state-time-wait" : + "" : ""; + t_flags = T->t_flags; + rcv_nxt = T->rcv_nxt; + rcv_adv = T->rcv_adv; + rcv_wnd = T->rcv_wnd; + snd_wnd = T->snd_wnd; + snd_cwnd = T->snd_cwnd; + snd_ssthresh = T->snd_ssthresh; + snd_una = T->snd_una; + snd_nxt = T->snd_nxt; + snd_max = T->snd_max; + snd_recover = T->snd_recover; + t_rxtcur = T->t_rxtcur; + t_maxseg = T->t_maxseg; + t_rttbest = T->t_rttbest; + rcv_numsacks = T->rcv_numsacks; + snd_numholes = T->snd_numholes; + tcpcb = T; +}; + +/* + * tcpnsinfo provides the new tcp state for state changes. + */ +typedef struct tcpnsinfo { + string tcps_state; /* TCP state, as a string */ +} tcpnsinfo_t; + +#pragma D binding "1.0" translator +translator tcpnsinfo_t < int32_t I > { + tcps_state = I ? + I == TCPS_LISTEN ? "state-listen" : + I == TCPS_SYN_SENT ? "state-syn-sent" : + I == TCPS_SYN_RECEIVED ? "state-syn-received" : + I == TCPS_ESTABLISHED ? "state-established" : + I == TCPS_CLOSE_WAIT ? "state-close-wait" : + I == TCPS_FIN_WAIT_1 ? "state-fin-wait1" : + I == TCPS_CLOSING ? "state-closing" : + I == TCPS_LAST_ACK ? "state-last-ack" : + I == TCPS_FIN_WAIT_2 ? "state-fin-wait2" : + I == TCPS_TIME_WAIT ? "state-time-wait" : + "" : "state-closed"; +}; + +/* + * tcpccevent provides the congestion control event for TCP cc probes + */ +typedef struct tcpccevent { + string tcp_cc; /* TCP congestion control event, as a string */ +} tcpccevent_t; + +#pragma D binding "1.0" translator +translator tcpccevent_t < int32_t I > { + tcp_cc = I ? + I == TCP_CC_INSEQ_ACK_RCVD ? "inseq-ack-rcvd" : + I == TCP_CC_ACK_RCVD ? "ack-rcvd" : + I == TCP_CC_ENTER_FASTRECOVERY ? "enter-fastrecovery" : + I == TCP_CC_EXIT_FASTRECOVERY ? "exit-fastrecovery" : + I == TCP_CC_PARTIAL_ACK ? "partial-ack" : + I == TCP_CC_IDLE_TIMEOUT ? "idle-timeout" : + I == TCP_CC_REXMT_TIMEOUT ? "rexmt-timeout" : + I == TCP_CC_ECN_RCVD ? "ecn-rcvd" : + I == TCP_CC_BAD_REXMT_RECOVERY ? "bad-rexmt" : + I == TCP_CC_OUTPUT_ERROR ? "output-error" : + I == TCP_CC_CHANGE_ALGO ? "change-algo" : + I == TCP_CC_FLOW_CONTROL ? "flow-control" : + I == TCP_CC_SUSPEND ? "suspend" : + I == TCP_CC_LIMITED_TRANSMIT ? "limited-transmit" : + I == TCP_CC_EARLY_RETRANSMIT ? "early-rexmt" : + "" : "cwnd-init"; +}; diff --git a/bsd/dev/dtrace/scripts/unistd.d b/bsd/dev/dtrace/scripts/unistd.d new file mode 100644 index 000000000..7279b3118 --- /dev/null +++ b/bsd/dev/dtrace/scripts/unistd.d @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)unistd.d 1.4 07/02/20 SMI" + +inline int DTRACEFLT_UNKNOWN = 0; /* Unknown fault */ +#pragma D binding "1.0" DTRACEFLT_UNKNOWN + +inline int DTRACEFLT_BADADDR = 1; /* Bad address */ +#pragma D binding "1.0" DTRACEFLT_BADADDR + +inline int DTRACEFLT_BADALIGN = 2; /* Bad alignment */ +#pragma D binding "1.0" DTRACEFLT_BADALIGN + +inline int DTRACEFLT_ILLOP = 3; /* Illegal operation */ +#pragma D binding "1.0" DTRACEFLT_ILLOP + +inline int DTRACEFLT_DIVZERO = 4; /* Divide-by-zero */ +#pragma D binding "1.0" DTRACEFLT_DIVZERO + +inline int DTRACEFLT_NOSCRATCH = 5; /* Out of scratch space */ +#pragma D binding "1.0" DTRACEFLT_NOSCRATCH + +inline int DTRACEFLT_KPRIV = 6; /* Illegal kernel access */ +#pragma D binding "1.0" DTRACEFLT_KPRIV + +inline int DTRACEFLT_UPRIV = 7; /* Illegal user access */ +#pragma D binding "1.0" DTRACEFLT_UPRIV + +inline int DTRACEFLT_TUPOFLOW = 8; /* Tuple stack overflow */ +#pragma D binding "1.0" DTRACEFLT_TUPOFLOW + +inline int DTRACEFLT_BADSTACK = 9; /* Bad stack */ +#pragma D binding "1.4.1" DTRACEFLT_BADSTACK diff --git a/bsd/dev/dtrace/sdt.c b/bsd/dev/dtrace/sdt.c index 89ac8ef2b..ad03df995 100644 --- a/bsd/dev/dtrace/sdt.c +++ b/bsd/dev/dtrace/sdt.c @@ -40,6 +40,7 @@ #include #include + #include #include @@ -51,10 +52,9 @@ extern int dtrace_kernel_symbol_mode; struct savearea_t; /* Used anonymously */ typedef kern_return_t (*perfCallback)(int, struct savearea_t *, uintptr_t *, int); -#if defined(__i386__) || defined(__x86_64__) +#if defined(__x86_64__) extern perfCallback tempDTraceTrapHook; extern kern_return_t fbt_perfCallback(int, struct savearea_t *, int, int); - #define SDT_PATCHVAL 0xf0 #define SDT_AFRAMES 6 #else @@ -153,6 +153,10 @@ __sdt_provide_module(void *arg, struct modctl *ctl) mp->sdt_nprobes++; } +#if 0 + printf ("__sdt_provide_module: sdpd=0x%p sdp=0x%p name=%s, id=%d\n", sdpd, sdp, nname, sdp->sdp_id); +#endif + sdp->sdp_hashnext = sdt_probetab[SDT_ADDR2NDX(sdpd->sdpd_offset)]; sdt_probetab[SDT_ADDR2NDX(sdpd->sdpd_offset)] = sdp; @@ -626,7 +630,12 @@ void sdt_init( void ) strncpy(sdpd->sdpd_func, prev_name, len); /* NUL termination is ensured. */ sdpd->sdpd_offset = *(unsigned long *)sym[i].n_value; - + +#if 0 + printf("sdt_init: sdpd_offset=0x%lx, n_value=0x%lx, name=%s\n", + sdpd->sdpd_offset, *(unsigned long *)sym[i].n_value, name); +#endif + sdpd->sdpd_next = g_sdt_mach_module.sdt_probes; g_sdt_mach_module.sdt_probes = sdpd; } else { diff --git a/bsd/dev/dtrace/sdt_subr.c b/bsd/dev/dtrace/sdt_subr.c index 891207713..82ab01989 100644 --- a/bsd/dev/dtrace/sdt_subr.c +++ b/bsd/dev/dtrace/sdt_subr.c @@ -75,14 +75,6 @@ static dtrace_pattr_t sdt_attr = { { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA }, }; -static dtrace_pattr_t xpv_attr = { -{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_PLATFORM }, -{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, -{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_PLATFORM }, -{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_PLATFORM }, -}; - sdt_provider_t sdt_providers[] = { { "vtrace", "__vtrace____", &vtrace_attr, 0 }, { "sysinfo", "__cpu_sysinfo____", &info_attr, 0 }, @@ -93,22 +85,20 @@ sdt_provider_t sdt_providers[] = { { "io", "__io____", &stab_attr, 0 }, { "ip", "__ip____", &stab_attr, 0 }, { "tcp", "__tcp____", &stab_attr, 0 }, + { "mptcp", "__mptcp____", &stab_attr, 0 }, { "mib", "__mib____", &stab_attr, 0 }, { "fsinfo", "__fsinfo____", &fsinfo_attr, 0 }, { "nfsv3", "__nfsv3____", &stab_attr, 0 }, { "nfsv4", "__nfsv4____", &stab_attr, 0 }, - { "xpv", "__xpv____", &xpv_attr, 0 }, { "sysevent", "__sysevent____", &stab_attr, 0 }, { "sdt", "__sdt____", &sdt_attr, 0 }, -#if !defined(__APPLE__) - { NULL } -#else + { "boost", "__boost____", &stab_attr, 0}, { NULL, NULL, NULL, 0 } -#endif /* __APPLE__ */ }; /* Warning: Need xnu cognate for disp_t. */ sdt_argdesc_t sdt_args[] = { + /* provider probename arg# arg-mapping native-type translated-type */ { "sched", "wakeup", 0, 0, "struct thread *", "lwpsinfo_t *" }, { "sched", "wakeup", 1, 1, "struct proc *", "psinfo_t *" }, { "sched", "dequeue", 0, 0, "struct thread *", "lwpsinfo_t *" }, @@ -163,6 +153,11 @@ sdt_argdesc_t sdt_args[] = { { "proc", "signal-send", 0, 0, "struct thread *", "lwpsinfo_t *" }, { "proc", "signal-send", 1, 1, "struct proc *", "psinfo_t *" }, { "proc", "signal-send", 2, 2, "int", NULL }, + /* proc:::spawn-success has no arguments */ + { "proc", "spawn-failure", 0, 0, "int", NULL }, + { "proc", "spawn-fd-failure", 0, 0, "int", NULL }, + { "proc", "spawn-open-failure", 0, 0, "string", NULL }, + { "proc", "spawn-port-failure", 0, 0, "int", NULL }, /* proc:::start has no arguments */ { "io", "start", 0, 0, "struct buf *", "bufinfo_t *" }, @@ -187,8 +182,9 @@ sdt_argdesc_t sdt_args[] = { #endif /* __APPLE__ */ { "mib", NULL, 0, 0, "int", NULL }, + { "fsinfo", NULL, 0, 0, "struct vnode *", "fileinfo_t *" }, - { "fsinfo", NULL, 1, 1, "int", "int" }, + { "fsinfo", NULL, 1, 1, "int", NULL }, { "nfsv3", "op-getattr-start", 0, 0, "struct svc_req *", "conninfo_t *" }, @@ -871,69 +867,98 @@ sdt_argdesc_t sdt_args[] = { { "tcp", "iaj", 2, 2, "uint32_t", NULL}, { "sysevent", "post", 0, 0, "evch_bind_t *", "syseventchaninfo_t *" }, { "sysevent", "post", 1, 1, "sysevent_impl_t *", "syseventinfo_t *" }, - - { "xpv", "add-to-physmap-end", 0, 0, "int", NULL }, - { "xpv", "add-to-physmap-start", 0, 0, "domid_t", NULL }, - { "xpv", "add-to-physmap-start", 1, 1, "uint_t", NULL }, - { "xpv", "add-to-physmap-start", 2, 2, "ulong_t", NULL }, - { "xpv", "add-to-physmap-start", 3, 3, "ulong_t", NULL }, - { "xpv", "decrease-reservation-end", 0, 0, "int", NULL }, - { "xpv", "decrease-reservation-start", 0, 0, "domid_t", NULL }, - { "xpv", "decrease-reservation-start", 1, 1, "ulong_t", NULL }, - { "xpv", "decrease-reservation-start", 2, 2, "uint_t", NULL }, - { "xpv", "decrease-reservation-start", 3, 3, "ulong_t *", NULL }, - { "xpv", "dom-create-start", 0, 0, "xen_domctl_t *", NULL }, - { "xpv", "dom-destroy-start", 0, 0, "domid_t", NULL }, - { "xpv", "dom-pause-start", 0, 0, "domid_t", NULL }, - { "xpv", "dom-unpause-start", 0, 0, "domid_t", NULL }, - { "xpv", "dom-create-end", 0, 0, "int", NULL }, - { "xpv", "dom-destroy-end", 0, 0, "int", NULL }, - { "xpv", "dom-pause-end", 0, 0, "int", NULL }, - { "xpv", "dom-unpause-end", 0, 0, "int", NULL }, - { "xpv", "evtchn-op-end", 0, 0, "int", NULL }, - { "xpv", "evtchn-op-start", 0, 0, "int", NULL }, - { "xpv", "evtchn-op-start", 1, 1, "void *", NULL }, - { "xpv", "increase-reservation-end", 0, 0, "int", NULL }, - { "xpv", "increase-reservation-start", 0, 0, "domid_t", NULL }, - { "xpv", "increase-reservation-start", 1, 1, "ulong_t", NULL }, - { "xpv", "increase-reservation-start", 2, 2, "uint_t", NULL }, - { "xpv", "increase-reservation-start", 3, 3, "ulong_t *", NULL }, - { "xpv", "mmap-end", 0, 0, "int", NULL }, - { "xpv", "mmap-entry", 0, 0, "ulong_t", NULL }, - { "xpv", "mmap-entry", 1, 1, "ulong_t", NULL }, - { "xpv", "mmap-entry", 2, 2, "ulong_t", NULL }, - { "xpv", "mmap-start", 0, 0, "domid_t", NULL }, - { "xpv", "mmap-start", 1, 1, "int", NULL }, - { "xpv", "mmap-start", 2, 2, "privcmd_mmap_entry_t *", NULL }, - { "xpv", "mmapbatch-end", 0, 0, "int", NULL }, - { "xpv", "mmapbatch-end", 1, 1, "struct seg *", NULL }, - { "xpv", "mmapbatch-end", 2, 2, "caddr_t", NULL }, - { "xpv", "mmapbatch-start", 0, 0, "domid_t", NULL }, - { "xpv", "mmapbatch-start", 1, 1, "int", NULL }, - { "xpv", "mmapbatch-start", 2, 2, "caddr_t", NULL }, - { "xpv", "mmu-ext-op-end", 0, 0, "int", NULL }, - { "xpv", "mmu-ext-op-start", 0, 0, "int", NULL }, - { "xpv", "mmu-ext-op-start", 1, 1, "struct mmuext_op *" , NULL}, - { "xpv", "mmu-update-start", 0, 0, "int", NULL }, - { "xpv", "mmu-update-start", 1, 1, "int", NULL }, - { "xpv", "mmu-update-start", 2, 2, "mmu_update_t *", NULL }, - { "xpv", "mmu-update-end", 0, 0, "int", NULL }, - { "xpv", "populate-physmap-end", 0, 0, "int" , NULL}, - { "xpv", "populate-physmap-start", 0, 0, "domid_t" , NULL}, - { "xpv", "populate-physmap-start", 1, 1, "ulong_t" , NULL}, - { "xpv", "populate-physmap-start", 2, 2, "ulong_t *" , NULL}, - { "xpv", "set-memory-map-end", 0, 0, "int" , NULL}, - { "xpv", "set-memory-map-start", 0, 0, "domid_t" , NULL}, - { "xpv", "set-memory-map-start", 1, 1, "int", NULL }, - { "xpv", "set-memory-map-start", 2, 2, "struct xen_memory_map *", NULL }, - { "xpv", "setvcpucontext-end", 0, 0, "int", NULL }, - { "xpv", "setvcpucontext-start", 0, 0, "domid_t", NULL }, - { "xpv", "setvcpucontext-start", 1, 1, "vcpu_guest_context_t *", NULL }, -#if !defined(__APPLE__) - { NULL } -#else + /* mptcp::input has no arguments */ + { "mptcp", "receive-degraded", 0, 0, "struct mbuf *", "pktinfo_t *" }, + { "mptcp", "receive-degraded", 1, 1, "struct socket *", "socketinfo_t *" }, + { "mptcp", "receive-degraded", 2, 2, "struct sockbuf *", "socketbuf_t *" }, + { "mptcp", "receive-degraded", 3, 3, "struct sockbuf *", "socketbuf_t *" }, + { "mptcp", "receive-degraded", 4, 4, "struct mptses *", "mptsesinfo_t *" }, + { "mptcp", "receive", 0, 0, "struct mbuf *", "pktinfo_t *" }, + { "mptcp", "receive", 1, 1, "struct socket *", "socketinfo_t *" }, + { "mptcp", "receive", 2, 2, "struct sockbuf *", "socketbuf_t *" }, + { "mptcp", "receive", 3, 3, "struct sockbuf *", "socketbuf_t *" }, + { "mptcp", "receive", 4, 4, "struct mptses *", "mptsesinfo_t *" }, + { "mptcp", "receive", 5, 5, "struct mptcb *", "mptsinfo_t *" }, + { "mptcp", "output", 0, 0, "struct mptses *", "mptsesinfo_t *" }, + { "mptcp", "output", 1, 1, "struct mptsub *", "mptsubinfo_t *" }, + { "mptcp", "output", 2, 2, "struct socket *", "socketinfo_t *" }, + { "mptcp", "state-change", 0, 0, "struct mptcb *", "mptsinfo_t *" }, + { "mptcp", "state-change", 1, 1, "uint32_t", "uint32_t" }, + { "mptcp", "checksum-result", 0, 0, "struct tcpcb *", "tcpsinfo_t *" }, + { "mptcp", "checksum-result", 1, 1, "struct mbuf *", "pktinfo_t *" }, + { "mptcp", "checksum-result", 2, 2, "uint32_t", "uint32_t" }, + { "mptcp", "session-create", 0, 0, "struct socket *", "socketinfo_t *"}, + { "mptcp", "session-create", 1, 1, "struct sockbuf *", "socketbuf_t *"}, + { "mptcp", "session-create", 2, 2, "struct sockbuf *", "socketbuf_t *"}, + { "mptcp", "session-create", 3, 3, "struct mppcb *", "mppsinfo_t *" }, + { "mptcp", "session-create", 4, 4, "int", "int" }, + { "mptcp", "session-destroy", 0, 0, "struct mptses *", "mptsesinfo_t *" }, + { "mptcp", "session-destroy", 1, 1, "struct mptcb *", "mptsinfo_t *" }, + { "mptcp", "subflow-create", 0, 0, "struct mptses *", "mptsesinfo_t *"}, + { "mptcp", "subflow-create", 1, 1, "struct mptsub *", "mptsubinfo_t *"}, + { "mptcp", "subflow-create", 2, 2, "int", "int" }, + { "mptcp", "subflow-create", 3, 3, "int", "int" }, + { "mptcp", "subflow-close", 0, 0, "struct mptsub *", "mptsubinfo_t *" }, + { "mptcp", "subflow-close", 1, 1, "struct socket *", "socketinfo_t *" }, + { "mptcp", "subflow-close", 2, 2, "struct sockbuf *", "socketbuf_t *" }, + { "mptcp", "subflow-close", 3, 3, "struct sockbuf *", "socketbuf_t *" }, + { "mptcp", "subflow-close", 4, 4, "struct mptses *", "mptsesinfo_t *" }, + { "mptcp", "subflow-connect", 0, 0, "struct mptses *", "mptsesinfo_t *" }, + { "mptcp", "subflow-connect", 1, 1, "struct mptsub *", "mptsubinfo_t *" }, + { "mptcp", "subflow-connect", 2, 2, "int", "int" }, + { "mptcp", "subflow-receive", 0, 0, "struct socket *", "socketinfo_t *" }, + { "mptcp", "subflow-receive", 1, 1, "struct sockbuf *", "socketbuf_t *" }, + { "mptcp", "subflow-receive", 2, 2, "struct sockbuf *", "socketbuf_t *" }, + { "mptcp", "subflow-peeloff", 0, 0, "struct mptses *", "mptsesinfo_t *",}, + { "mptcp", "subflow-peeloff", 1, 1, "struct mptsub *", "mptsubinfo_t *",}, + { "mptcp", "subflow-peeloff", 2, 2, "struct socket *", "socketinfo_t *",}, + { "mptcp", "subflow-peeloff", 3, 3, "struct sockbuf *", "socketbuf_t *" }, + { "mptcp", "subflow-peeloff", 4, 4, "struct sockbuf *", "socketbuf_t *" }, + { "mptcp", "subflow-input", 0, 0, "struct mptses *", "mptsesinfo_t *" }, + { "mptcp", "subflow-input", 1, 1, "struct mptsub *", "mptsubinfo_t *" }, + { "mptcp", "subflow-output", 0, 0, "struct mptses *", "mptsesinfo_t *"}, + { "mptcp", "subflow-output", 1, 1, "struct mptsub *", "mptsubinfo_t *"}, + { "mptcp", "subflow-events", 0, 0, "struct mptses *", "mptsesinfo_t *"}, + { "mptcp", "subflow-events", 1, 1, "struct mptsub *", "mptsubinfo_t *"}, + { "mptcp", "subflow-events", 2, 2, "uint32_t", "uint32_t"}, + { "mptcp", "send", 0, 0, "struct mbuf *", "pktinfo_t *" }, + { "mptcp", "send", 1, 1, "struct socket *", "socketinfo_t *" }, + { "mptcp", "send", 2, 2, "struct sockbuf *", "socketbuf_t *" }, + { "mptcp", "send", 3, 3, "struct sockbuf *", "socketbuf_t *" }, + { "mptcp", "send", 4, 4, "struct mptses *", "mptsesinfo_t *" }, + { "mptcp", "send", 5, 5, "struct mptsub *", "mptsubinfo_t *" }, + { "mptcp", "send", 6, 6, "size_t", "size_t" }, + { "mptcp", "dispose", 0, 0, "struct socket *", "socketinfo_t *" }, + { "mptcp", "dispose", 1, 1, "struct sockbuf *", "socketbuf_t *" }, + { "mptcp", "dispose", 2, 2, "struct sockbuf *", "socketbuf_t *" }, + { "mptcp", "dispose", 3, 3, "struct mppcb *", "mppsinfo_t *" }, + { "mptcp", "multipath-ready", 0, 0, "struct socket *", "socketinfo_t *" }, + { "mptcp", "multipath-ready", 1, 1, "struct sockbuf *", "socketbuf_t *" }, + { "mptcp", "multipath-ready", 2, 2, "struct sockbuf *", "socketbuf_t *" }, + { "mptcp", "multipath-ready", 3, 3, "struct tcpcb *", "tcpsinfo_t *" }, + { "mptcp", "multipath-failed", 0, 0, "struct socket *", "socketinfo_t *" }, + { "mptcp", "multipath-failed", 1, 1, "struct sockbuf *", "socketbuf_t *" }, + { "mptcp", "multipath-failed", 2, 2, "struct sockbuf *", "socketbuf_t *" }, + { "mptcp", "multipath-failed", 3, 3, "struct tcpcb *", "tcpsinfo_t *" }, + { "mptcp", "start-timer", 0, 0, "struct mptcb *", "mptsinfo_t *" }, + { "mptcp", "start-timer", 1, 1, "int", "int" }, + { "mptcp", "cancel-timer", 0, 0, "struct mptcb *", "mptsinfo_t *" }, + { "mptcp", "cancel-timer", 1, 1, "int", "int" }, + { "mptcp", "timer", 0, 0, "struct mptses *", "mptsesinfo_t *" }, + { "mptcp", "timer", 1, 1, "struct mptcb *", "mptsinfo_t *" }, + { "mptcp", "error", 0, 0, "struct mptcb *", "mptsinfo_t *" }, + { "mptcp", "connectx", 0, 0, "struct mptses *", "mptsesinfo_t *" }, + { "mptcp", "connectx", 1, 1, "associd_t", "associd_t" }, + { "mptcp", "connectx", 2, 2, "struct socket *", "socketinfo_t *" }, + { "mptcp", "disconnectx", 0, 0, "struct mptses *", "mptsesinfo_t *" }, + { "mptcp", "disconnectx", 1, 1, "associd_t", "associd_t" }, + { "mptcp", "disconnectx", 2, 2, "connid_t", "connid_t" }, + { "mptcp", "disconnectx", 3, 3, "struct socket *", "sockinfo_t *" }, + { "mptcp", "disconnectx", 4, 4, "struct mptcb *", "mptsinfo_t *" }, + { "mptcp", "peeloff", 0, 0, "struct mptses *", "mptsesinfo_t *" }, + { "mptcp", "peeloff", 1, 1, "associd_t", "associd_t" }, + { "mptcp", "peeloff", 2, 2, "struct socket *", "sockinfo_t *" }, { NULL, NULL, 0, 0, NULL, NULL } -#endif /* __APPLE__ */ }; /*ARGSUSED*/ @@ -950,23 +975,6 @@ sdt_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc) for (i = 0; sdt_args[i].sda_provider != NULL; i++) { sdt_argdesc_t *a = &sdt_args[i]; -#if !defined(__APPLE__) - if (strcmp(sdp->sdp_provider->sdtp_name, a->sda_provider) != 0) - continue; - - if (a->sda_name != NULL && - strcmp(sdp->sdp_name, a->sda_name) != 0) - continue; - - if (desc->dtargd_ndx != a->sda_ndx) - continue; - - if (a->sda_native != NULL) - (void) strcpy(desc->dtargd_native, a->sda_native); - - if (a->sda_xlate != NULL) - (void) strcpy(desc->dtargd_xlate, a->sda_xlate); -#else if (strncmp(sdp->sdp_provider->sdtp_name, a->sda_provider, strlen(a->sda_provider) + 1) != 0) continue; @@ -982,7 +990,6 @@ sdt_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc) if (a->sda_xlate != NULL) (void) strlcpy(desc->dtargd_xlate, a->sda_xlate, DTRACE_ARGTYPELEN); -#endif /* __APPLE__ */ desc->dtargd_mapping = a->sda_mapping; return; diff --git a/bsd/dev/dtrace/systrace.c b/bsd/dev/dtrace/systrace.c index 6761beec9..cdd5a3040 100644 --- a/bsd/dev/dtrace/systrace.c +++ b/bsd/dev/dtrace/systrace.c @@ -43,17 +43,15 @@ #endif #endif -#define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */ #include #include + /* XXX All of these should really be derived from syscall_sw.h */ -#if defined(__i386__) || defined (__x86_64__) +#if defined (__x86_64__) #define SYSCALL_CLASS_SHIFT 24 #define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT) #define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK) #define I386_SYSCALL_NUMBER_MASK (0xFFFF) - -typedef x86_saved_state_t savearea_t; #endif #include @@ -75,7 +73,7 @@ typedef x86_saved_state_t savearea_t; #include -#if defined(__i386__) || defined (__x86_64__) +#if defined (__x86_64__) #define SYSTRACE_ARTIFICIAL_FRAMES 2 #define MACHTRACE_ARTIFICIAL_FRAMES 3 #else @@ -114,8 +112,7 @@ systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1, int32_t dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv) { - boolean_t flavor; - unsigned short code; + unsigned short code; /* The system call number */ systrace_sysent_t *sy; dtrace_id_t id; @@ -125,8 +122,7 @@ dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv) #endif syscall_arg_t *ip = (syscall_arg_t *)uap; -#if defined(__i386__) || defined (__x86_64__) -#pragma unused(flavor) +#if defined (__x86_64__) { pal_register_cache_state(current_thread(), VALID); x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread()); @@ -742,16 +738,16 @@ typedef void mach_munge_t(const void *, void *); typedef struct { int mach_trap_arg_count; kern_return_t (*mach_trap_function)(void *); -#if 0 /* no active architectures use mungers for mach traps */ +#if defined(__x86_64__) mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */ - mach_munge_t *mach_trap_arg_munge64; /* system call arguments for 64-bit */ #endif + int mach_trap_u32_words; #if MACH_ASSERT const char* mach_trap_name; #endif /* MACH_ASSERT */ } mach_trap_t; -extern mach_trap_t mach_trap_table[]; +extern const mach_trap_t mach_trap_table[]; /* syscall_sw.h now declares this as const */ extern int mach_trap_count; extern const char *mach_syscall_name_table[]; @@ -796,8 +792,7 @@ static dtrace_provider_id_t machtrace_id; static kern_return_t dtrace_machtrace_syscall(struct mach_call_args *args) { - boolean_t flavor; - unsigned short code; + int code; /* The mach call number */ machtrace_sysent_t *sy; dtrace_id_t id; @@ -808,8 +803,7 @@ dtrace_machtrace_syscall(struct mach_call_args *args) syscall_arg_t *ip = (syscall_arg_t *)args; mach_call_t mach_call; -#if defined(__i386__) || defined (__x86_64__) -#pragma unused(flavor) +#if defined (__x86_64__) { pal_register_cache_state(current_thread(), VALID); x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread()); @@ -862,7 +856,7 @@ dtrace_machtrace_syscall(struct mach_call_args *args) } static void -machtrace_init(mach_trap_t *actual, machtrace_sysent_t **interposed) +machtrace_init(const mach_trap_t *actual, machtrace_sysent_t **interposed) { machtrace_sysent_t *msysent = *interposed; int i; @@ -873,7 +867,7 @@ machtrace_init(mach_trap_t *actual, machtrace_sysent_t **interposed) } for (i = 0; i < NSYSCALL; i++) { - mach_trap_t *a = &actual[i]; + const mach_trap_t *a = &actual[i]; machtrace_sysent_t *s = &msysent[i]; if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a)) diff --git a/bsd/dev/i386/conf.c b/bsd/dev/i386/conf.c index bff6f7232..cfaae450b 100644 --- a/bsd/dev/i386/conf.c +++ b/bsd/dev/i386/conf.c @@ -121,7 +121,6 @@ extern d_ioctl_t volioctl; #endif extern d_open_t cttyopen; -extern d_close_t cttyclose; extern d_read_t cttyread; extern d_write_t cttywrite; extern d_ioctl_t cttyioctl; @@ -202,9 +201,9 @@ struct cdevsw cdevsw[] = }, NO_CDEVICE, /* 1*/ { - cttyopen, cttyclose, cttyread, cttywrite, /* 2*/ + cttyopen, nullclose, cttyread, cttywrite, /* 2*/ cttyioctl, nullstop, nullreset, 0, cttyselect, - eno_mmap, eno_strat, eno_getc, eno_putc, D_TTY | D_TRACKCLOSE + eno_mmap, eno_strat, eno_getc, eno_putc, D_TTY }, { nullopen, nullclose, mmread, mmwrite, /* 3*/ @@ -308,7 +307,7 @@ isdisk(dev_t dev, int type) } /* FALL THROUGH */ case VBLK: - if ((D_TYPEMASK & bdevsw[maj].d_type) == D_DISK) { + if (bdevsw[maj].d_type == D_DISK) { return (1); } break; diff --git a/bsd/dev/i386/dtrace_isa.c b/bsd/dev/i386/dtrace_isa.c index bdb177028..05f366291 100644 --- a/bsd/dev/i386/dtrace_isa.c +++ b/bsd/dev/i386/dtrace_isa.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2005-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -436,7 +436,7 @@ dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit) if (regs == NULL) goto zero; - *pcstack++ = (uint64_t)proc_selfpid(); + *pcstack++ = (uint64_t)dtrace_proc_selfpid(); pcstack_limit--; if (pcstack_limit <= 0) @@ -563,7 +563,7 @@ dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) if (regs == NULL) goto zero; - *pcstack++ = (uint64_t)proc_selfpid(); + *pcstack++ = (uint64_t)dtrace_proc_selfpid(); pcstack_limit--; if (pcstack_limit <= 0) @@ -679,11 +679,7 @@ dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes, while (depth < pcstack_limit) { nextfp = *(struct frame **)fp; -#if defined(__x86_64__) pc = *(uintptr_t *)(((uintptr_t)fp) + RETURN_OFFSET64); -#else - pc = *(uintptr_t *)(((uintptr_t)fp) + RETURN_OFFSET); -#endif if (nextfp <= minfp || nextfp >= stacktop) { if (on_intr) { @@ -747,13 +743,11 @@ dtrace_getarg(int arg, int aframes) int i; -#if defined(__x86_64__) /* * A total of 6 arguments are passed via registers; any argument with * index of 5 or lower is therefore in a register. */ int inreg = 5; -#endif for (i = 1; i <= aframes; i++) { fp = fp->backchain; @@ -762,18 +756,6 @@ dtrace_getarg(int arg, int aframes) if (dtrace_invop_callsite_pre != NULL && pc > (uintptr_t)dtrace_invop_callsite_pre && pc <= (uintptr_t)dtrace_invop_callsite_post) { -#if defined(__i386__) - /* - * If we pass through the invalid op handler, we will - * use the pointer that it passed to the stack as the - * second argument to dtrace_invop() as the pointer to - * the frame we're hunting for. - */ - - stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */ - fp = (struct frame *)stack[1]; /* Grab *second* argument */ - stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */ -#elif defined(__x86_64__) /* * In the case of x86_64, we will use the pointer to the * save area structure that was pushed when we took the @@ -803,9 +785,6 @@ dtrace_getarg(int arg, int aframes) arguments */ arg -= inreg + 1; } -#else -#error Unknown arch -#endif goto load; } } @@ -819,7 +798,6 @@ dtrace_getarg(int arg, int aframes) */ arg++; /* Advance past probeID */ -#if defined(__x86_64__) if (arg <= inreg) { /* * This shouldn't happen. If the argument is passed in a @@ -831,7 +809,6 @@ dtrace_getarg(int arg, int aframes) } arg -= (inreg + 1); -#endif stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */ load: diff --git a/bsd/dev/i386/fbt_x86.c b/bsd/dev/i386/fbt_x86.c index ef45ffacd..750a024cf 100644 --- a/bsd/dev/i386/fbt_x86.c +++ b/bsd/dev/i386/fbt_x86.c @@ -217,8 +217,6 @@ static const char * probe_ctx_closure[] = "prf", "proc_is64bit", "proc_selfname", - "proc_selfpid", - "proc_selfppid", "psignal_lock", "rtc_nanotime_load", "rtc_nanotime_read", @@ -289,7 +287,8 @@ is_module_valid(struct modctl* ctl) /* * These drivers control low level functions that when traced - * cause problems, especially in the sleep/wake paths. + * cause problems often in the sleep/wake paths as well as + * critical debug and panic paths. * If somebody really wants to drill in on one of these kexts, then * they can override blacklisting using the boot-arg above. */ @@ -313,9 +312,10 @@ is_module_valid(struct modctl* ctl) return FALSE; if (strstr(ctl->mod_modname, "AppleIntelProfile") != NULL) - return FALSE; - - + return FALSE; + + if (strstr(ctl->mod_modname, "AppleEFI") != NULL) + return FALSE; return TRUE; } @@ -472,10 +472,13 @@ is_symbol_valid(const char* name) LIT_STRNSTART(name, "kdbg_") || LIT_STRNSTART(name, "kdebug_") || LIT_STRNSTART(name, "kernel_debug") || + LIT_STRNSTART(name, "debug_") || LIT_STRNEQL(name, "Debugger") || LIT_STRNEQL(name, "Call_DebuggerC") || LIT_STRNEQL(name, "lock_debugger") || LIT_STRNEQL(name, "unlock_debugger") || + LIT_STRNEQL(name, "packA") || + LIT_STRNEQL(name, "unpackA") || LIT_STRNEQL(name, "SysChoked")) { return FALSE; } @@ -493,547 +496,6 @@ is_symbol_valid(const char* name) return TRUE; } -#if defined(__i386__) -int -fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval) -{ - uintptr_t stack0 = 0, stack1 = 0, stack2 = 0, stack3 = 0, stack4 = 0; - fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)]; - - for (; fbt != NULL; fbt = fbt->fbtp_hashnext) { - if ((uintptr_t)fbt->fbtp_patchpoint == addr) { - - if (fbt->fbtp_roffset == 0) { - uintptr_t *stacktop; - if (CPU_ON_INTR(CPU)) - stacktop = (uintptr_t *)dtrace_get_cpu_int_stack_top(); - else - stacktop = (uintptr_t *)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size); - - stack += 1; /* skip over the target's pushl'd %ebp */ - - if (stack <= stacktop) - CPU->cpu_dtrace_caller = *stack++; - if (stack <= stacktop) - stack0 = *stack++; - if (stack <= stacktop) - stack1 = *stack++; - if (stack <= stacktop) - stack2 = *stack++; - if (stack <= stacktop) - stack3 = *stack++; - if (stack <= stacktop) - stack4 = *stack++; - - /* 32-bit ABI, arguments passed on stack. */ - dtrace_probe(fbt->fbtp_id, stack0, stack1, stack2, stack3, stack4); - CPU->cpu_dtrace_caller = 0; - } else { - dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0); - CPU->cpu_dtrace_caller = 0; - } - - return (fbt->fbtp_rval); - } - } - - return (0); -} - -#define IS_USER_TRAP(regs) (regs && (((regs)->cs & 3) != 0)) -#define T_INVALID_OPCODE 6 -#define FBT_EXCEPTION_CODE T_INVALID_OPCODE -#define T_PREEMPT 255 - -kern_return_t -fbt_perfCallback( - int trapno, - x86_saved_state_t *tagged_regs, - uintptr_t *lo_spp, - __unused int unused ) -{ - kern_return_t retval = KERN_FAILURE; - x86_saved_state32_t *saved_state = saved_state32(tagged_regs); - struct x86_saved_state32_from_kernel *regs = (struct x86_saved_state32_from_kernel *)saved_state; - - if (FBT_EXCEPTION_CODE == trapno && !IS_USER_TRAP(saved_state)) { - boolean_t oldlevel, cpu_64bit; - uint32_t esp_probe, fp, *pDst, delta = 0; - uintptr_t old_sp; - int emul; - - cpu_64bit = ml_is64bit(); - oldlevel = ml_set_interrupts_enabled(FALSE); - - /* Calculate where the stack pointer was when the probe instruction "fired." */ - if (cpu_64bit) { - esp_probe = saved_state->uesp; /* Easy, x86_64 establishes this value in idt64.s */ - } else { - esp_probe = (uint32_t)&(regs[1]); /* Nasty, infer the location above the save area */ - } - - __asm__ volatile( - "Ldtrace_invop_callsite_pre_label:\n" - ".data\n" - ".private_extern _dtrace_invop_callsite_pre\n" - "_dtrace_invop_callsite_pre:\n" - " .long Ldtrace_invop_callsite_pre_label\n" - ".text\n" - ); - - emul = dtrace_invop( saved_state->eip, (uintptr_t *)esp_probe, saved_state->eax ); - - __asm__ volatile( - "Ldtrace_invop_callsite_post_label:\n" - ".data\n" - ".private_extern _dtrace_invop_callsite_post\n" - "_dtrace_invop_callsite_post:\n" - " .long Ldtrace_invop_callsite_post_label\n" - ".text\n" - ); - - switch (emul) { - case DTRACE_INVOP_NOP: - saved_state->eip += DTRACE_INVOP_NOP_SKIP; /* Skip over the patched NOP (planted by sdt.) */ - retval = KERN_SUCCESS; - break; - - case DTRACE_INVOP_MOVL_ESP_EBP: - saved_state->ebp = esp_probe; /* Emulate patched movl %esp,%ebp */ - saved_state->eip += DTRACE_INVOP_MOVL_ESP_EBP_SKIP; /* Skip over the bytes of the patched movl %esp,%ebp */ - retval = KERN_SUCCESS; - break; - - case DTRACE_INVOP_POPL_EBP: - case DTRACE_INVOP_LEAVE: -/* - * Emulate first micro-op of patched leave: movl %ebp,%esp - * fp points just below the return address slot for target's ret - * and at the slot holding the frame pointer saved by the target's prologue. - */ - fp = saved_state->ebp; -/* Emulate second micro-op of patched leave: patched popl %ebp - * savearea ebp is set for the frame of the caller to target - * The *live* %esp will be adjusted below for pop increment(s) - */ - saved_state->ebp = *(uint32_t *)fp; -/* Skip over the patched leave */ - saved_state->eip += DTRACE_INVOP_LEAVE_SKIP; -/* - * Lift the stack to account for the emulated leave - * Account for words local in this frame - * (in "case DTRACE_INVOP_POPL_EBP:" this is zero.) - */ - delta = ((uint32_t *)fp) - ((uint32_t *)esp_probe); -/* Account for popping off the ebp (just accomplished by the emulation - * above...) - */ - delta += 1; - - if (cpu_64bit) - saved_state->uesp += (delta << 2); -/* Obtain the stack pointer recorded by the trampolines */ - old_sp = *lo_spp; -/* Shift contents of stack */ - for (pDst = (uint32_t *)fp; - pDst > (((uint32_t *)old_sp)); - pDst--) - *pDst = pDst[-delta]; - -/* Track the stack lift in "saved_state". */ - saved_state = (x86_saved_state32_t *) (((uintptr_t)saved_state) + (delta << 2)); -/* Adjust the stack pointer utilized by the trampolines */ - *lo_spp = old_sp + (delta << 2); - - retval = KERN_SUCCESS; - break; - - default: - retval = KERN_FAILURE; - break; - } - saved_state->trapno = T_PREEMPT; /* Avoid call to i386_astintr()! */ - - ml_set_interrupts_enabled(oldlevel); - } - - return retval; -} - -/*ARGSUSED*/ -static void -__provide_probe_32(struct modctl *ctl, uintptr_t instrLow, uintptr_t instrHigh, char *modname, char* symbolName, machine_inst_t* symbolStart) -{ - unsigned int j; - unsigned int doenable = 0; - dtrace_id_t thisid; - - fbt_probe_t *newfbt, *retfbt, *entryfbt; - machine_inst_t *instr, *limit, theInstr, i1, i2; - int size; - - for (j = 0, instr = symbolStart, theInstr = 0; - (j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)(instr + 2)); - j++) { - theInstr = instr[0]; - if (theInstr == FBT_PUSHL_EBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16) - break; - - if ((size = dtrace_instr_size(instr)) <= 0) - break; - - instr += size; - } - - if (theInstr != FBT_PUSHL_EBP) - return; - - i1 = instr[1]; - i2 = instr[2]; - - limit = (machine_inst_t *)instrHigh; - - if ((i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) || - (i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1)) { - instr += 1; /* Advance to the movl %esp,%ebp */ - theInstr = i1; - } else { - /* - * Sometimes, the compiler will schedule an intervening instruction - * in the function prologue. Example: - * - * _mach_vm_read: - * 000006d8 pushl %ebp - * 000006d9 movl $0x00000004,%edx - * 000006de movl %esp,%ebp - * - * Try the next instruction, to see if it is a movl %esp,%ebp - */ - - instr += 1; /* Advance past the pushl %ebp */ - if ((size = dtrace_instr_size(instr)) <= 0) - return; - - instr += size; - - if ((instr + 1) >= limit) - return; - - i1 = instr[0]; - i2 = instr[1]; - - if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) && - !(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1)) - return; - - /* instr already points at the movl %esp,%ebp */ - theInstr = i1; - } - - thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_ENTRY); - newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP); - strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS ); - - if (thisid != 0) { - /* - * The dtrace_probe previously existed, so we have to hook - * the newfbt entry onto the end of the existing fbt's chain. - * If we find an fbt entry that was previously patched to - * fire, (as indicated by the current patched value), then - * we want to enable this newfbt on the spot. - */ - entryfbt = dtrace_probe_arg (fbt_id, thisid); - ASSERT (entryfbt != NULL); - for(; entryfbt != NULL; entryfbt = entryfbt->fbtp_next) { - if (entryfbt->fbtp_currentval == entryfbt->fbtp_patchval) - doenable++; - - if (entryfbt->fbtp_next == NULL) { - entryfbt->fbtp_next = newfbt; - newfbt->fbtp_id = entryfbt->fbtp_id; - break; - } - } - } - else { - /* - * The dtrace_probe did not previously exist, so we - * create it and hook in the newfbt. Since the probe is - * new, we obviously do not need to enable it on the spot. - */ - newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname, symbolName, FBT_ENTRY, FBT_AFRAMES_ENTRY, newfbt); - doenable = 0; - } - - - newfbt->fbtp_patchpoint = instr; - newfbt->fbtp_ctl = ctl; - newfbt->fbtp_loadcnt = ctl->mod_loadcnt; - newfbt->fbtp_rval = DTRACE_INVOP_MOVL_ESP_EBP; - newfbt->fbtp_savedval = theInstr; - newfbt->fbtp_patchval = FBT_PATCHVAL; - newfbt->fbtp_currentval = 0; - newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; - fbt_probetab[FBT_ADDR2NDX(instr)] = newfbt; - - if (doenable) - fbt_enable(NULL, newfbt->fbtp_id, newfbt); - - /* - * The fbt entry chain is in place, one entry point per symbol. - * The fbt return chain can have multiple return points per symbol. - * Here we find the end of the fbt return chain. - */ - - doenable=0; - - thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_RETURN); - if (thisid != 0) { - /* The dtrace_probe previously existed, so we have to - * find the end of the existing fbt chain. If we find - * an fbt return that was previously patched to fire, - * (as indicated by the currrent patched value), then - * we want to enable any new fbts on the spot. - */ - retfbt = dtrace_probe_arg (fbt_id, thisid); - ASSERT(retfbt != NULL); - for (; retfbt != NULL; retfbt = retfbt->fbtp_next) { - if (retfbt->fbtp_currentval == retfbt->fbtp_patchval) - doenable++; - if(retfbt->fbtp_next == NULL) - break; - } - } - else { - doenable = 0; - retfbt = NULL; - } - -again: - if (instr >= limit) - return; - - /* - * If this disassembly fails, then we've likely walked off into - * a jump table or some other unsuitable area. Bail out of the - * disassembly now. - */ - if ((size = dtrace_instr_size(instr)) <= 0) - return; - - /* - * We (desperately) want to avoid erroneously instrumenting a - * jump table, especially given that our markers are pretty - * short: two bytes on x86, and just one byte on amd64. To - * determine if we're looking at a true instruction sequence - * or an inline jump table that happens to contain the same - * byte sequences, we resort to some heuristic sleeze: we - * treat this instruction as being contained within a pointer, - * and see if that pointer points to within the body of the - * function. If it does, we refuse to instrument it. - */ - for (j = 0; j < sizeof (uintptr_t); j++) { - uintptr_t check = (uintptr_t)instr - j; - uint8_t *ptr; - - if (check < (uintptr_t)symbolStart) - break; - - if (check + sizeof (uintptr_t) > (uintptr_t)limit) - continue; - - ptr = *(uint8_t **)check; - - if (ptr >= (uint8_t *)symbolStart && ptr < limit) { - instr += size; - goto again; - } - } - - /* - * OK, it's an instruction. - */ - theInstr = instr[0]; - - /* Walked onto the start of the next routine? If so, bail out of this function. */ - if (theInstr == FBT_PUSHL_EBP) - return; - - if (!(size == 1 && (theInstr == FBT_POPL_EBP || theInstr == FBT_LEAVE))) { - instr += size; - goto again; - } - - /* - * Found the popl %ebp; or leave. - */ - machine_inst_t *patch_instr = instr; - - /* - * Scan forward for a "ret", or "jmp". - */ - instr += size; - if (instr >= limit) - return; - - size = dtrace_instr_size(instr); - if (size <= 0) /* Failed instruction decode? */ - return; - - theInstr = instr[0]; - - if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) && - !(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) && - !(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) && - !(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) && - !(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS))) - return; - - /* - * popl %ebp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner! - */ - newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP); - strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS ); - - if (retfbt == NULL) { - newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname, - symbolName, FBT_RETURN, FBT_AFRAMES_RETURN, newfbt); - } else { - retfbt->fbtp_next = newfbt; - newfbt->fbtp_id = retfbt->fbtp_id; - } - - retfbt = newfbt; - newfbt->fbtp_patchpoint = patch_instr; - newfbt->fbtp_ctl = ctl; - newfbt->fbtp_loadcnt = ctl->mod_loadcnt; - - if (*patch_instr == FBT_POPL_EBP) { - newfbt->fbtp_rval = DTRACE_INVOP_POPL_EBP; - } else { - ASSERT(*patch_instr == FBT_LEAVE); - newfbt->fbtp_rval = DTRACE_INVOP_LEAVE; - } - newfbt->fbtp_roffset = - (uintptr_t)(patch_instr - (uint8_t *)symbolStart); - - newfbt->fbtp_savedval = *patch_instr; - newfbt->fbtp_patchval = FBT_PATCHVAL; - newfbt->fbtp_currentval = 0; - newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)]; - fbt_probetab[FBT_ADDR2NDX(patch_instr)] = newfbt; - - if (doenable) - fbt_enable(NULL, newfbt->fbtp_id, newfbt); - - instr += size; - goto again; -} - -static void -__kernel_syms_provide_module(void *arg, struct modctl *ctl) -{ -#pragma unused(arg) - kernel_mach_header_t *mh; - struct load_command *cmd; - kernel_segment_command_t *orig_ts = NULL, *orig_le = NULL; - struct symtab_command *orig_st = NULL; - struct nlist *sym = NULL; - char *strings; - uintptr_t instrLow, instrHigh; - char *modname; - unsigned int i; - - mh = (kernel_mach_header_t *)(ctl->mod_address); - modname = ctl->mod_modname; - - if (mh->magic != MH_MAGIC) - return; - - cmd = (struct load_command *) &mh[1]; - for (i = 0; i < mh->ncmds; i++) { - if (cmd->cmd == LC_SEGMENT_KERNEL) { - kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd; - - if (LIT_STRNEQL(orig_sg->segname, SEG_TEXT)) - orig_ts = orig_sg; - else if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT)) - orig_le = orig_sg; - else if (LIT_STRNEQL(orig_sg->segname, "")) - orig_ts = orig_sg; /* kexts have a single unnamed segment */ - } - else if (cmd->cmd == LC_SYMTAB) - orig_st = (struct symtab_command *) cmd; - - cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize); - } - - if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL)) - return; - - sym = (struct nlist *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff); - strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff); - - /* Find extent of the TEXT section */ - instrLow = (uintptr_t)orig_ts->vmaddr; - instrHigh = (uintptr_t)(orig_ts->vmaddr + orig_ts->vmsize); - - for (i = 0; i < orig_st->nsyms; i++) { - uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT); - char *name = strings + sym[i].n_un.n_strx; - - /* Check that the symbol is a global and that it has a name. */ - if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type)) - continue; - - if (0 == sym[i].n_un.n_strx) /* iff a null, "", name. */ - continue; - - /* Lop off omnipresent leading underscore. */ - if (*name == '_') - name += 1; - - /* - * We're only blacklisting functions in the kernel for now. - */ - if (MOD_IS_MACH_KERNEL(ctl) && !is_symbol_valid(name)) - continue; - - __provide_probe_32(ctl, instrLow, instrHigh, modname, name, (machine_inst_t*)sym[i].n_value); - } -} - -static void -__user_syms_provide_module(void *arg, struct modctl *ctl) -{ -#pragma unused(arg) - char *modname; - unsigned int i; - - modname = ctl->mod_modname; - - dtrace_module_symbols_t* module_symbols = ctl->mod_user_symbols; - if (module_symbols) { - for (i=0; idtmodsyms_count; i++) { - dtrace_symbol_t* symbol = &module_symbols->dtmodsyms_symbols[i]; - char* name = symbol->dtsym_name; - - /* Lop off omnipresent leading underscore. */ - if (*name == '_') - name += 1; - - /* - * We're only blacklisting functions in the kernel for now. - */ - if (MOD_IS_MACH_KERNEL(ctl) && !is_symbol_valid(name)) - continue; - - __provide_probe_32(ctl, (uintptr_t)symbol->dtsym_addr, (uintptr_t)(symbol->dtsym_addr + symbol->dtsym_size), modname, name, (machine_inst_t*)(uintptr_t)symbol->dtsym_addr); - } - } -} - -#elif defined(__x86_64__) int fbt_invop(uintptr_t addr, uintptr_t *state, uintptr_t rval) { @@ -1456,7 +918,7 @@ __kernel_syms_provide_module(void *arg, struct modctl *ctl) struct load_command *cmd; kernel_segment_command_t *orig_ts = NULL, *orig_le = NULL; struct symtab_command *orig_st = NULL; - struct nlist_64 *sym = NULL; + kernel_nlist_t *sym = NULL; char *strings; uintptr_t instrLow, instrHigh; char *modname; @@ -1465,7 +927,7 @@ __kernel_syms_provide_module(void *arg, struct modctl *ctl) mh = (kernel_mach_header_t *)(ctl->mod_address); modname = ctl->mod_modname; - if (mh->magic != MH_MAGIC_64) + if (mh->magic != MH_MAGIC_KERNEL) return; cmd = (struct load_command *) &mh[1]; @@ -1489,7 +951,7 @@ __kernel_syms_provide_module(void *arg, struct modctl *ctl) if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL)) return; - sym = (struct nlist_64 *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff); + sym = (kernel_nlist_t *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff); strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff); /* Find extent of the TEXT section */ @@ -1556,9 +1018,6 @@ __user_syms_provide_module(void *arg, struct modctl *ctl) } } } -#else -#error Unknown arch -#endif extern int dtrace_kernel_symbol_mode; diff --git a/bsd/dev/i386/munge.s b/bsd/dev/i386/munge.s deleted file mode 100644 index 9df397097..000000000 --- a/bsd/dev/i386/munge.s +++ /dev/null @@ -1,393 +0,0 @@ -/* - * Coyright (c) 2005-2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/* - * Syscall argument mungers. - * - * The data to be munged has been explicitly copied in to the argument area, - * and will be munged in place in the uu_arg[] array. Because of this, the - * functions all take the same arguments as their PPC equivalents, but the - * first argument is ignored. These mungers are for 32-bit app's syscalls, - * since 64-bit args are stored into the save area (which overlays the - * uu_args) in the order the syscall ABI calls for. - * - * The issue is that the incoming args are 32-bit, but we must expand - * them in place into 64-bit args, as if they were from a 64-bit process. - * - * There are several functions in this file. Each takes two parameters: - * - * void munge_XXXX( const void *regs, void *uu_args); - * - * The name of the function encodes the number and type of the parameters, - * as follows: - * - * w = a 32-bit value such as an int or a 32-bit ptr, that does not - * require sign extension. These are handled by zeroing a word - * of output, and copying a word from input to output. - * - * s = a 32-bit value such as a long, which must be sign-extended to - * a 64-bit long-long in the uu_args. These are handled by - * loading a word of input and sign extending it to a double, - * and storing two words of output. - * - * l = a 64-bit long-long. These are handled by copying two words - * of input to the output. - * - * For example, "munge_wls" takes a word, a long-long, and a word. This - * takes four words in the uu_arg[] area: the first word is in one, the - * long-long takes two, and the final word is in the fourth. We store six - * words: the low word is left in place, followed by a 0, followed by the - * two words of the long-long, followed by the low word and the sign extended - * high word of the preceeding low word. - * - * Because this is an in-place modification, we actually start at the end - * of uu_arg[] and work our way back to the beginning of the array. - * - * As you can see, we save a lot of code by collapsing mungers that are - * prefixes or suffixes of each other. - */ -#include - -ENTRY(munge_w) - movl 8(%esp),%ecx // get &uu_args - movl $0,4(%ecx) - ret - -ENTRY(munge_ww) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx - jmp Lw2 -ENTRY(munge_www) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx - jmp Lw3 -ENTRY(munge_wwww) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx - jmp Lw4 -ENTRY(munge_wwwww) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx - jmp Lw5 -ENTRY(munge_wwwwww) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx - jmp Lw6 -ENTRY(munge_wwwwwww) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx - jmp Lw7 -ENTRY(munge_wwwwwwww) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx - movl 28(%ecx),%eax - movl %eax,56(%ecx) - movl %edx,60(%ecx) -Lw7: - movl 24(%ecx),%eax - movl %eax,48(%ecx) - movl %edx,52(%ecx) -Lw6: - movl 20(%ecx),%eax - movl %eax,40(%ecx) - movl %edx,44(%ecx) -Lw5: - movl 16(%ecx),%eax - movl %eax,32(%ecx) - movl %edx,36(%ecx) -Lw4: - movl 12(%ecx),%eax - movl %eax,24(%ecx) - movl %edx,28(%ecx) -Lw3: - movl 8(%ecx),%eax - movl %eax,16(%ecx) - movl %edx,20(%ecx) -Lw2: - movl 4(%ecx),%eax - movl %eax,8(%ecx) - movl %edx,12(%ecx) - movl %edx,4(%ecx) - ret - - -Entry(munge_wl) /* Costs an extra w move to do this */ -ENTRY(munge_wlw) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx -Lwlw: - movl 12(%ecx),%eax //l - movl %eax,16(%ecx) - movl %edx,20(%ecx) -Lwl: - movl 8(%ecx),%eax //l - movl %eax,12(%ecx) - movl 4(%ecx),%eax - movl %eax,8(%ecx) - movl %edx,4(%ecx) //w - ret - -ENTRY(munge_wlwwwll) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx -Lwlwwwll: - movl 36(%ecx),%eax - movl %eax,52(%ecx) - movl 32(%ecx),%eax - movl %eax,48(%ecx) - movl 28(%ecx),%eax - movl %eax,44(%ecx) - movl 24(%ecx),%eax - movl %eax,40(%ecx) - movl 20(%ecx),%eax - movl %eax,32(%ecx) - movl %edx,36(%ecx) -Lwlww: - movl 16(%ecx),%eax - movl %eax,24(%ecx) - movl %edx,28(%ecx) - jmp Lwlw - -ENTRY(munge_wlwwwllw) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx - movl 40(%ecx),%eax - movl %eax,56(%ecx) - movl %edx,60(%ecx) - jmp Lwlwwwll - -ENTRY(munge_wlwwlwlw) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx - movl 40(%ecx),%eax - movl %eax,56(%ecx) - movl %edx,60(%ecx) - movl 36(%ecx),%eax - movl %eax,52(%ecx) - movl 32(%ecx),%eax - movl %eax,48(%ecx) - movl 28(%ecx),%eax - movl %eax,40(%ecx) - movl %edx,44(%ecx) - movl 24(%ecx),%eax - movl %eax,36(%ecx) - movl 20(%ecx),%eax - movl %eax,32(%ecx) - jmp Lwlww - -ENTRY(munge_wllwwll) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx - - movl 40(%ecx),%eax // l - movl %eax,52(%ecx) - movl 36(%ecx),%eax - movl %eax,48(%ecx) - movl 32(%ecx),%eax // l - movl %eax,44(%ecx) - movl 28(%ecx),%eax - movl %eax,40(%ecx) - - movl 24(%ecx),%eax //w - movl %eax,32(%ecx) - movl %edx,36(%ecx) - movl 20(%ecx),%eax //w - movl %eax,24(%ecx) - movl %edx,28(%ecx) - - movl 16(%ecx),%eax //l - movl %eax,20(%ecx) - movl 12(%ecx),%eax - movl %eax,16(%ecx) - jmp Lwl - -Entry(munge_wwwlw) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx - movl 20(%ecx),%eax - movl %eax,32(%ecx) - movl %edx,36(%ecx) - jmp Lwwwl - -ENTRY(munge_wwwl) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx -Lwwwl: - movl 12(%ecx),%eax - movl %eax,24(%ecx) - movl 16(%ecx),%eax - movl %eax,28(%ecx) - jmp Lw3 - -ENTRY(munge_wwwwlw) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx - movl 24(%ecx),%eax - movl %eax,40(%ecx) - movl %edx,44(%ecx) - jmp Lwwwwl - -ENTRY(munge_wwwwl) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx -Lwwwwl: - movl 16(%ecx),%eax - movl %eax,32(%ecx) - movl 20(%ecx),%eax - movl %eax,36(%ecx) - jmp Lw4 - -ENTRY(munge_wwwwwl) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx - movl 20(%ecx),%eax - movl %eax,40(%ecx) - movl 24(%ecx),%eax - movl %eax,44(%ecx) - jmp Lw5 - -ENTRY(munge_wwwwwlww) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx - movl 32(%ecx),%eax - movl %eax,56(%ecx) - movl %edx,60(%ecx) - movl 28(%ecx),%eax - movl %eax,48(%ecx) - movl %edx,52(%ecx) - movl 20(%ecx),%eax - movl %eax,40(%ecx) - movl 24(%ecx),%eax - movl %eax,44(%ecx) - jmp Lw5 - -ENTRY(munge_wwwwwllw) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx - movl 36(%ecx),%eax - movl %eax,56(%ecx) - movl %edx,60(%ecx) - movl 28(%ecx),%eax - movl %eax,48(%ecx) - movl 32(%ecx),%eax - movl %eax,52(%ecx) - movl 20(%ecx),%eax - movl %eax,40(%ecx) - movl 24(%ecx),%eax - movl %eax,44(%ecx) - jmp Lw5 - -ENTRY(munge_wwwwwlll) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx - movl 36(%ecx),%eax - movl %eax,56(%ecx) - movl 40(%ecx),%eax - movl %eax,60(%ecx) - movl 28(%ecx),%eax - movl %eax,48(%ecx) - movl 32(%ecx),%eax - movl %eax,52(%ecx) - movl 20(%ecx),%eax - movl %eax,40(%ecx) - movl 24(%ecx),%eax - movl %eax,44(%ecx) - jmp Lw5 - -ENTRY(munge_wwwwwwl) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx - movl 24(%ecx),%eax - movl %eax,48(%ecx) - movl 28(%ecx),%eax - movl %eax,52(%ecx) - jmp Lw6 - -ENTRY(munge_wwwwwwlw) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx - movl 32(%ecx),%eax - movl %eax,56(%ecx) - movl %edx,60(%ecx) - movl 24(%ecx),%eax - movl %eax,48(%ecx) - movl 28(%ecx),%eax - movl %eax,52(%ecx) - jmp Lw6 - - -ENTRY(munge_wwwwwwll) - movl 8(%esp),%ecx // get &uu_args - xorl %edx,%edx - movl 32(%ecx),%eax - movl %eax,56(%ecx) - movl 36(%ecx),%eax - movl %eax,60(%ecx) - movl 24(%ecx),%eax - movl %eax,48(%ecx) - movl 28(%ecx),%eax - movl %eax,52(%ecx) - jmp Lw6 - -ENTRY(munge_wsw) - movl 8(%esp),%ecx // get &uu_args - movl 8(%ecx),%eax - movl %eax,16(%ecx) - movl $0,20(%ecx) - movl 4(%ecx),%eax - cltd - movl %eax,8(%ecx) - movl %edx,12(%ecx) - movl $0,4(%ecx) - ret - -ENTRY(munge_wws) - movl 8(%esp),%ecx // get &uu_args - movl 8(%ecx),%eax - cltd - movl %eax,16(%ecx) - movl %edx,20(%ecx) - xorl %edx,%edx - jmp Lw2 - -ENTRY(munge_wwwsw) - movl 8(%esp),%ecx // get &uu_args - movl 16(%ecx),%eax - movl %eax,32(%ecx) - movl $0,36(%ecx) - movl 12(%ecx),%eax - cltd - movl %eax,24(%ecx) - movl %edx,28(%ecx) - xorl %edx,%edx - jmp Lw3 - -ENTRY(munge_llllll) - ret // actually, this method has nothing to do - all - // arguments are already 64-bits, with no mixing of - // args that need sign/zero extension diff --git a/bsd/dev/i386/sdt_x86.c b/bsd/dev/i386/sdt_x86.c index 680ed779b..aeb7b3410 100644 --- a/bsd/dev/i386/sdt_x86.c +++ b/bsd/dev/i386/sdt_x86.c @@ -46,43 +46,6 @@ extern sdt_probe_t **sdt_probetab; -#if defined(__i386__) -/*ARGSUSED*/ -int -sdt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax) -{ -#pragma unused(eax) - uintptr_t stack0 = 0, stack1 = 0, stack2 = 0, stack3 = 0, stack4 = 0; - sdt_probe_t *sdt = sdt_probetab[SDT_ADDR2NDX(addr)]; - - for (; sdt != NULL; sdt = sdt->sdp_hashnext) { - if ((uintptr_t)sdt->sdp_patchpoint == addr) { - uintptr_t *stacktop; - if (CPU_ON_INTR(CPU)) - stacktop = (uintptr_t *)dtrace_get_cpu_int_stack_top(); - else - stacktop = (uintptr_t *)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size); - - if (stack <= stacktop) - stack0 = *stack++; - if (stack <= stacktop) - stack1 = *stack++; - if (stack <= stacktop) - stack2 = *stack++; - if (stack <= stacktop) - stack3 = *stack++; - if (stack <= stacktop) - stack4 = *stack++; - - dtrace_probe(sdt->sdp_id, stack0, stack1, stack2, stack3, stack4); - - return (DTRACE_INVOP_NOP); - } - } - - return (0); -} -#elif defined(__x86_64__) /*ARGSUSED*/ int sdt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax) @@ -102,9 +65,6 @@ sdt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax) return (0); } -#else -#error Unknown arch -#endif struct frame { @@ -123,13 +83,11 @@ sdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) uintptr_t pc; int i; -#if defined(__x86_64__) /* * A total of 6 arguments are passed via registers; any argument with * index of 5 or lower is therefore in a register. */ int inreg = 5; -#endif for (i = 1; i <= aframes; i++) { fp = fp->backchain; @@ -138,18 +96,6 @@ sdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) if (dtrace_invop_callsite_pre != NULL && pc > (uintptr_t)dtrace_invop_callsite_pre && pc <= (uintptr_t)dtrace_invop_callsite_post) { -#if defined(__i386__) - /* - * If we pass through the invalid op handler, we will - * use the pointer that it passed to the stack as the - * second argument to dtrace_invop() as the pointer to - * the frame we're hunting for. - */ - - stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */ - fp = (struct frame *)stack[1]; /* Grab *second* argument */ - stack = (uintptr_t *)&fp[0]; /* Find marshalled arguments */ -#elif defined(__x86_64__) /* * In the case of x86_64, we will use the pointer to the * save area structure that was pushed when we took the @@ -179,9 +125,6 @@ sdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) arguments */ argno -= (inreg +1); } -#else -#error Unknown arch -#endif goto load; } } @@ -195,7 +138,6 @@ sdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) */ argno++; /* Advance past probeID */ -#if defined(__x86_64__) if (argno <= inreg) { /* * This shouldn't happen. If the argument is passed in a @@ -207,7 +149,6 @@ sdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) } argno -= (inreg + 1); -#endif stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */ load: diff --git a/bsd/dev/i386/stubs.c b/bsd/dev/i386/stubs.c index ddc759c29..bf69ac8ac 100644 --- a/bsd/dev/i386/stubs.c +++ b/bsd/dev/i386/stubs.c @@ -120,17 +120,9 @@ copywithin(void *src, void *dst, size_t count) void * get_bsduthreadarg(thread_t th) { - void *arg_ptr; -struct uthread *ut; - + struct uthread *ut; ut = get_bsdthread_info(th); - - if (ml_thread_is64bit(th) == TRUE) - arg_ptr = (void *)saved_state64(find_user_regs(th)); - else - arg_ptr = (void *)(ut->uu_arg); - - return(arg_ptr); + return ut->uu_ap; } int * diff --git a/bsd/dev/i386/sysctl.c b/bsd/dev/i386/sysctl.c index f97173075..d96ceb1c0 100644 --- a/bsd/dev/i386/sysctl.c +++ b/bsd/dev/i386/sysctl.c @@ -38,7 +38,6 @@ #include #include #include -#include static int @@ -776,3 +775,181 @@ SYSCTL_PROC(_machdep_misc, OID_AUTO, machine_check_panic, 0, 0, misc_machine_check_panic, "A", "Machine-check exception test"); + + +extern void timer_queue_trace_cpu(int); +static int +misc_timer_queue_trace(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int changed = 0, error; + char buf[128]; + buf[0] = '\0'; + + error = sysctl_io_string(req, buf, sizeof(buf), 0, &changed); + + if (error == 0 && changed) { + timer_queue_trace_cpu(0); + } + return error; +} + +SYSCTL_PROC(_machdep_misc, OID_AUTO, timer_queue_trace, + CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, + misc_timer_queue_trace, "A", "Cut timer queue tracepoint"); + +extern long NMI_count; +extern void NMI_cpus(void); +static int +misc_nmis(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int new = 0, old = 0, changed = 0, error; + + old = NMI_count; + + error = sysctl_io_number(req, old, sizeof(int), &new, &changed); + if (error == 0 && changed) { + NMI_cpus(); + } + + return error; +} + +SYSCTL_PROC(_machdep_misc, OID_AUTO, nmis, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, + misc_nmis, "I", "Report/increment NMI count"); + +/* Parameters related to timer coalescing tuning, to be replaced + * with a dedicated systemcall in the future. + */ +/* Enable processing pending timers in the context of any other interrupt */ +SYSCTL_INT(_kern, OID_AUTO, interrupt_timer_coalescing_enabled, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &interrupt_timer_coalescing_enabled, 0, ""); +/* Upon entering idle, process pending timers with HW deadlines + * this far in the future. + */ +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_idle_entry_hard_deadline_max, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &idle_entry_timer_processing_hdeadline_threshold, 0, ""); +/* Coalescing tuning parameters for various thread/task attributes */ +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_bg_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_bg_shift, 0, ""); + +SYSCTL_QUAD(_kern, OID_AUTO, timer_coalesce_bg_ns_max, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_bg_ns_max, ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_kt_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_kt_shift, 0, ""); + +SYSCTL_QUAD(_kern, OID_AUTO, timer_coalesce_kt_ns_max, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_kt_ns_max, ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_fp_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_fp_shift, 0, ""); + +SYSCTL_QUAD(_kern, OID_AUTO, timer_coalesce_fp_ns_max, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_fp_ns_max, ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_ts_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_ts_shift, 0, ""); + +SYSCTL_QUAD(_kern, OID_AUTO, timer_coalesce_ts_ns_max, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.timer_coalesce_ts_ns_max, ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier0_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_scale[0], 0, ""); + +SYSCTL_QUAD(_kern, OID_AUTO, timer_coalesce_tier0_ns_max, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_ns_max[0], ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier1_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_scale[1], 0, ""); + +SYSCTL_QUAD(_kern, OID_AUTO, timer_coalesce_tier1_ns_max, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_ns_max[1], ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier2_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_scale[2], 0, ""); + +SYSCTL_QUAD(_kern, OID_AUTO, timer_coalesce_tier2_ns_max, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_ns_max[2], ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier3_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_scale[3], 0, ""); + +SYSCTL_QUAD(_kern, OID_AUTO, timer_coalesce_tier3_ns_max, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_ns_max[3], ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier4_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_scale[4], 0, ""); + +SYSCTL_QUAD(_kern, OID_AUTO, timer_coalesce_tier4_ns_max, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_ns_max[4], ""); + +SYSCTL_INT(_kern, OID_AUTO, timer_coalesce_tier5_scale, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_scale[5], 0, ""); + +SYSCTL_QUAD(_kern, OID_AUTO, timer_coalesce_tier5_ns_max, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcoal_prio_params.latency_qos_ns_max[5], ""); + +/* Track potentially expensive eager timer evaluations on QoS tier + * switches. + */ +extern uint32_t ml_timer_eager_evaluations; + +SYSCTL_INT(_machdep, OID_AUTO, eager_timer_evaluations, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &ml_timer_eager_evaluations, 0, ""); + +extern uint64_t ml_timer_eager_evaluation_max; + +SYSCTL_QUAD(_machdep, OID_AUTO, eager_timer_evaluation_max, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, + &ml_timer_eager_evaluation_max, ""); + +/* Communicate the "user idle level" heuristic to the timer layer, and + * potentially other layers in the future. + */ + +static int +timer_set_user_idle_level(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) { + int new_value = 0, old_value = 0, changed = 0, error; + + old_value = ml_timer_get_user_idle_level(); + + error = sysctl_io_number(req, old_value, sizeof(int), &new_value, &changed); + + if (error == 0 && changed) { + if (ml_timer_set_user_idle_level(new_value) != KERN_SUCCESS) + error = ERANGE; + } + + return error; +} + +SYSCTL_PROC(_machdep, OID_AUTO, user_idle_level, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + 0, 0, + timer_set_user_idle_level, "I", "User idle level heuristic, 0-128"); diff --git a/bsd/dev/i386/systemcalls.c b/bsd/dev/i386/systemcalls.c index e8494ca4e..77ecfba3a 100644 --- a/bsd/dev/i386/systemcalls.c +++ b/bsd/dev/i386/systemcalls.c @@ -66,9 +66,6 @@ extern void unix_syscall(x86_saved_state_t *); extern void unix_syscall64(x86_saved_state_t *); extern void *find_user_regs(thread_t); -extern void x86_toggle_sysenter_arg_store(thread_t thread, boolean_t valid); -extern boolean_t x86_sysenter_arg_store_isvalid(thread_t thread); - /* dynamically generated at build time based on syscalls.master */ extern const char *syscallnames[]; @@ -101,7 +98,6 @@ unix_syscall(x86_saved_state_t *state) struct proc *p; struct uthread *uthread; x86_saved_state32_t *regs; - boolean_t args_in_uthread; boolean_t is_vfork; assert(is_saved_state32(state)); @@ -132,7 +128,6 @@ unix_syscall(x86_saved_state_t *state) code = regs->eax & I386_SYSCALL_NUMBER_MASK; DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n", code, syscallnames[code >= NUM_SYSENT ? 63 : code], (uint32_t)regs->eip); - args_in_uthread = ((regs->eax & I386_SYSCALL_ARG_BYTES_MASK) != 0) && x86_sysenter_arg_store_isvalid(thread); params = (vm_offset_t) (regs->uesp + sizeof (int)); regs->efl &= ~(EFL_CF); @@ -146,22 +141,20 @@ unix_syscall(x86_saved_state_t *state) } vt = (void *)uthread->uu_arg; + uthread->uu_ap = vt; if (callp->sy_arg_bytes != 0) { sy_munge_t *mungerp; + uint32_t nargs; assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg)); - if (!args_in_uthread) - { - uint32_t nargs; - nargs = callp->sy_arg_bytes; - error = copyin((user_addr_t) params, (char *) vt, nargs); - if (error) { - regs->eax = error; - regs->efl |= EFL_CF; - thread_exception_return(); - /* NOTREACHED */ - } + nargs = callp->sy_arg_bytes; + error = copyin((user_addr_t) params, (char *) vt, nargs); + if (error) { + regs->eax = error; + regs->efl |= EFL_CF; + thread_exception_return(); + /* NOTREACHED */ } if (__probable(code != 180)) { @@ -255,7 +248,7 @@ unix_syscall(x86_saved_state_t *state) * delay in order to mitigate the impact of this * task on the normal operation of the system */ - throttle_lowpri_io(TRUE); + throttle_lowpri_io(1); } if (__probable(code != 180)) KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, @@ -326,6 +319,7 @@ unix_syscall64(x86_saved_state_t *state) uargp = (void *)(®s->rsi); args_in_regs = 5; } + uthread->uu_ap = uargp; if (callp->sy_narg != 0) { if (code != 180) { @@ -350,18 +344,10 @@ unix_syscall64(x86_saved_state_t *state) /* NOTREACHED */ } } - /* - * XXX Turn 64 bit unsafe calls into nosys() - */ - if (__improbable(callp->sy_flags & UNSAFE_64BIT)) { - callp = &sysent[63]; - goto unsafe; - } } else KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, 0, 0, 0, 0, 0); -unsafe: /* * Delayed binding of thread credential to process credential, if we @@ -455,7 +441,7 @@ unsafe: * delay in order to mitigate the impact of this * task on the normal operation of the system */ - throttle_lowpri_io(TRUE); + throttle_lowpri_io(1); } if (__probable(code != 180)) KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, @@ -602,7 +588,7 @@ unix_syscall_return(int error) * delay in order to mitigate the impact of this * task on the normal operation of the system */ - throttle_lowpri_io(TRUE); + throttle_lowpri_io(1); } if (code != 180) KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, @@ -613,46 +599,3 @@ unix_syscall_return(int error) /* NOTREACHED */ } -void -munge_wwwlww( - __unused const void *in32, - void *out64) -{ - uint32_t *arg32; - uint64_t *arg64; - - /* we convert in place in out64 */ - arg32 = (uint32_t *) out64; - arg64 = (uint64_t *) out64; - - arg64[5] = arg32[6]; /* wwwlwW */ - arg64[4] = arg32[5]; /* wwwlWw */ - arg32[7] = arg32[4]; /* wwwLww (hi) */ - arg32[6] = arg32[3]; /* wwwLww (lo) */ - arg64[2] = arg32[2]; /* wwWlww */ - arg64[1] = arg32[1]; /* wWwlww */ - arg64[0] = arg32[0]; /* Wwwlww */ -} - - -void -munge_wwlwww( - __unused const void *in32, - void *out64) -{ - uint32_t *arg32; - uint64_t *arg64; - - /* we convert in place in out64 */ - arg32 = (uint32_t *) out64; - arg64 = (uint64_t *) out64; - - arg64[5] = arg32[6]; /* wwlwwW */ - arg64[4] = arg32[5]; /* wwlwWw */ - arg64[3] = arg32[4]; /* wwlWww */ - arg32[5] = arg32[3]; /* wwLwww (hi) */ - arg32[4] = arg32[2]; /* wwLwww (lo) */ - arg64[1] = arg32[1]; /* wWlwww */ - arg64[0] = arg32[0]; /* Wwlwww */ -} - diff --git a/bsd/dev/memdev.c b/bsd/dev/memdev.c index 58fecce01..5a659bebd 100644 --- a/bsd/dev/memdev.c +++ b/bsd/dev/memdev.c @@ -354,6 +354,7 @@ static int mdevioctl(dev_t dev, u_long cmd, caddr_t data, __unused int flag, u_int32_t *f; u_int64_t *o; int devid; + dk_memdev_info_t * memdev_info; devid = minor(dev); /* Get minor device number */ @@ -364,6 +365,7 @@ static int mdevioctl(dev_t dev, u_long cmd, caddr_t data, __unused int flag, f = (u_int32_t*)data; o = (u_int64_t *)data; + memdev_info = (dk_memdev_info_t *) data; switch (cmd) { @@ -408,7 +410,22 @@ static int mdevioctl(dev_t dev, u_long cmd, caddr_t data, __unused int flag, if(!(mdev[devid].mdFlags & mdInited)) return (ENXIO); *o = ((mdev[devid].mdSize << 12) + mdev[devid].mdSecsize - 1) / mdev[devid].mdSecsize; break; - + + /* + * We're interested in the following bits of information: + * Are you a memory-backed device (always yes, in this case)? + * Physical memory (mdPhys)? + * What is your base page? + * What is your size? + */ + case DKIOCGETMEMDEVINFO: + if (!(mdev[devid].mdFlags & mdInited)) return (ENXIO); + memdev_info->mi_mdev = TRUE; + memdev_info->mi_phys = (mdev[devid].mdFlags & mdPhys) ? TRUE : FALSE; + memdev_info->mi_base = mdev[devid].mdBase; + memdev_info->mi_size = mdev[devid].mdSize; + break; + default: error = ENOTTY; break; diff --git a/bsd/dev/munge.c b/bsd/dev/munge.c new file mode 100644 index 000000000..168fad929 --- /dev/null +++ b/bsd/dev/munge.c @@ -0,0 +1,528 @@ +/* + * Coyright (c) 2005-2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include + +static inline __attribute__((always_inline)) void +munge_32_to_64_unsigned(volatile uint64_t *dest, volatile uint32_t *src, int count); + +/* + * Refer to comments in bsd/sys/munge.h + */ +void +munge_w(const void *arg0 __unused, void *args) +{ + munge_32_to_64_unsigned(args, args, 1); +} + +void +munge_ww(const void *arg0 __unused, void *args) +{ + munge_32_to_64_unsigned(args, args, 2); +} + +void +munge_www(const void *arg0 __unused, void *args) +{ + munge_32_to_64_unsigned(args, args, 3); +} + +void +munge_wwww(const void *arg0 __unused, void *args) +{ + munge_32_to_64_unsigned(args, args, 4); +} + +void +munge_wwwww(const void *arg0 __unused, void *args) +{ + munge_32_to_64_unsigned(args, args, 5); +} + +void +munge_wwwwww(const void *arg0 __unused, void *args) +{ + munge_32_to_64_unsigned(args, args, 6); +} + +void +munge_wwwwwww(const void *arg0 __unused, void *args) +{ + munge_32_to_64_unsigned(args, args, 7); +} + +void +munge_wwwwwwww(const void *arg0 __unused, void *args) +{ + munge_32_to_64_unsigned(args, args, 8); +} + +void +munge_wl(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[1] = *(uint64_t*)&in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wwl(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[2] = *(uint64_t*)&in_args[2]; + out_args[1] = in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wwlw(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[3] = in_args[4]; + out_args[2] = *(uint64_t*)&in_args[2]; + out_args[1] = in_args[1]; + out_args[0] = in_args[0]; +} +void +munge_wwlll(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[4] = *(uint64_t*)&in_args[6]; + out_args[3] = *(uint64_t*)&in_args[4]; + out_args[2] = *(uint64_t*)&in_args[2]; + out_args[1] = in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wwllww(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[5] = in_args[7]; + out_args[4] = in_args[6]; + out_args[3] = *(uint64_t*)&in_args[4]; + out_args[2] = *(uint64_t*)&in_args[2]; + out_args[1] = in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wlw(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[2] = in_args[3]; + out_args[1] = *(uint64_t*)&in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wlwwwll(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[6] = *(uint64_t*)&in_args[8]; + out_args[5] = *(uint64_t*)&in_args[6]; + out_args[4] = in_args[5]; + out_args[3] = in_args[4]; + out_args[2] = in_args[3]; + out_args[1] = *(uint64_t*)&in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wlwwwllw(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[7] = in_args[10]; + munge_wlwwwll(args, args); +} + +void +munge_wlwwlwlw(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[7] = in_args[10]; + out_args[6] = *(uint64_t*)&in_args[8]; + out_args[5] = in_args[7]; + out_args[4] = *(uint64_t*)&in_args[5]; + out_args[3] = in_args[4]; + out_args[2] = in_args[3]; + out_args[1] = *(uint64_t*)&in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wll(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[2] = *(uint64_t*)&in_args[3]; + out_args[1] = *(uint64_t*)&in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wlll(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[3] = *(uint64_t*)&in_args[5]; + out_args[2] = *(uint64_t*)&in_args[3]; + out_args[1] = *(uint64_t*)&in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wllww(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[4] = in_args[6]; + out_args[3] = in_args[5]; + out_args[2] = *(uint64_t*)&in_args[3]; + out_args[1] = *(uint64_t*)&in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wllwwll(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[6] = *(uint64_t*)&in_args[9]; + out_args[5] = *(uint64_t*)&in_args[7]; + out_args[4] = in_args[6]; + out_args[3] = in_args[5]; + out_args[2] = *(uint64_t*)&in_args[3]; + out_args[1] = *(uint64_t*)&in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wwwlw(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[4] = in_args[5]; + out_args[3] = *(uint64_t*)&in_args[3]; + out_args[2] = in_args[2]; + out_args[1] = in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wwwlww(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[5] = in_args[6]; + out_args[4] = in_args[5]; + out_args[3] = *(uint64_t*)&in_args[3]; + out_args[2] = in_args[2]; + out_args[1] = in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wwwl(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[3] = *(uint64_t*)&in_args[3]; + out_args[2] = in_args[2]; + out_args[1] = in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wwwwlw(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[5] = in_args[6]; + out_args[4] = *(uint64_t*)&in_args[4]; + out_args[3] = in_args[3]; + out_args[2] = in_args[2]; + out_args[1] = in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wwwwl(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[4] = *(uint64_t*)&in_args[4]; + out_args[3] = in_args[3]; + out_args[2] = in_args[2]; + out_args[1] = in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wwwwwl(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[5] = *(uint64_t*)&in_args[5]; + out_args[4] = in_args[4]; + out_args[3] = in_args[3]; + out_args[2] = in_args[2]; + out_args[1] = in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wwwwwlww(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[7] = in_args[8]; + out_args[6] = in_args[7]; + out_args[5] = *(uint64_t*)&in_args[5]; + out_args[4] = in_args[4]; + out_args[3] = in_args[3]; + out_args[2] = in_args[2]; + out_args[1] = in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wwwwwllw(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[7] = in_args[9]; + out_args[6] = *(uint64_t*)&in_args[7]; + out_args[5] = *(uint64_t*)&in_args[5]; + out_args[4] = in_args[4]; + out_args[3] = in_args[3]; + out_args[2] = in_args[2]; + out_args[1] = in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wwwwwlll(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[7] = *(uint64_t*)&in_args[9]; + out_args[6] = *(uint64_t*)&in_args[7]; + out_args[5] = *(uint64_t*)&in_args[5]; + out_args[4] = in_args[4]; + out_args[3] = in_args[3]; + out_args[2] = in_args[2]; + out_args[1] = in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wwwwwwl(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[6] = *(uint64_t*)&in_args[6]; + out_args[5] = in_args[5]; + out_args[4] = in_args[4]; + out_args[3] = in_args[3]; + out_args[2] = in_args[2]; + out_args[1] = in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wwwwwwlw(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[7] = in_args[8]; + out_args[6] = *(uint64_t*)&in_args[6]; + out_args[5] = in_args[5]; + out_args[4] = in_args[4]; + out_args[3] = in_args[3]; + out_args[2] = in_args[2]; + out_args[1] = in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wwwwwwll(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[7] = *(uint64_t*)&in_args[8]; + out_args[6] = *(uint64_t*)&in_args[6]; + out_args[5] = in_args[5]; + out_args[4] = in_args[4]; + out_args[3] = in_args[3]; + out_args[2] = in_args[2]; + out_args[1] = in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wsw(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[2] = in_args[2]; + out_args[1] = (int64_t)(int)in_args[1]; /* Sign-extend */ + out_args[0] = in_args[0]; +} + +void +munge_wws(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[2] = (int64_t)(int)in_args[2]; /* Sign-extend */ + out_args[1] = in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_wwwsw(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[4] = in_args[4]; + out_args[3] = (int64_t)(int)in_args[3]; /* Sign-extend */ + out_args[2] = in_args[2]; + out_args[1] = in_args[1]; + out_args[0] = in_args[0]; +} + +void +munge_llllll(const void *arg0 __unused, void *args __unused) +{ + /* Nothing to do, already all 64-bit */ +} + +void +munge_ll(const void *arg0 __unused, void *args __unused) +{ + /* Nothing to do, already all 64-bit */ +} + +void +munge_l(const void *arg0 __unused, void *args __unused) +{ + /* Nothing to do, already all 64-bit */ +} + +void +munge_lw(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[1] = in_args[2]; + out_args[0] = *(uint64_t*)&in_args[0]; +} + +void +munge_lwww(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[3] = in_args[4]; + out_args[2] = in_args[3]; + out_args[1] = in_args[2]; + out_args[0] = *(uint64_t*)&in_args[0]; +} + +void +munge_wwlwww(const void *arg0 __unused, void *args) +{ + volatile uint64_t *out_args = (volatile uint64_t*)args; + volatile uint32_t *in_args = (volatile uint32_t*)args; + + out_args[5] = in_args[6]; + out_args[4] = in_args[5]; + out_args[3] = in_args[4]; + out_args[2] = *(uint64_t*)&in_args[2]; + out_args[1] = in_args[1]; + out_args[0] = in_args[0]; +} + +/* + * Munge array of 32-bit values into an array of 64-bit values, + * without sign extension. Note, src and dest can be the same + * (copies from end of array) + */ +static inline __attribute__((always_inline)) void +munge_32_to_64_unsigned(volatile uint64_t *dest, volatile uint32_t *src, int count) +{ + int i; + + for (i = count - 1; i >= 0; i--) { + dest[i] = src[i]; + } +} + diff --git a/bsd/dev/random/Makefile b/bsd/dev/random/Makefile index 7a07200d9..412ca1e5f 100644 --- a/bsd/dev/random/Makefile +++ b/bsd/dev/random/Makefile @@ -3,32 +3,17 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -INSTINC_SUBDIRS_X86_64 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS_X86_64 = \ - DATAFILES = \ + randomdev.h -INSTALL_MI_LIST = ${DATAFILES} - -INSTALL_MI_DIR = - -EXPORT_MI_LIST = ${DATAFILES} +INSTALL_MI_LIST = -EXPORT_MI_DIR = +EXPORT_MI_LIST = ${DATAFILES} +EXPORT_MI_DIR = dev/random include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/dev/random/randomdev.c b/bsd/dev/random/randomdev.c index c29e9f877..c73553994 100644 --- a/bsd/dev/random/randomdev.c +++ b/bsd/dev/random/randomdev.c @@ -67,6 +67,9 @@ d_ioctl_t random_ioctl; +/* To generate the seed for the RNG */ +extern uint64_t early_random(); + /* * A struct describing which functions will get invoked for certain * actions. @@ -340,21 +343,14 @@ PreliminarySetup(void) /* clear the error flag, reads and write should then work */ gRandomError = 0; - struct timeval tt; + uint64_t tt; char buffer [16]; /* get a little non-deterministic data as an initial seed. */ /* On OSX, securityd will add much more entropy as soon as it */ /* comes up. On iOS, entropy is added with each system interrupt. */ - microtime(&tt); - - /* - * So how much of the system clock is entropic? - * It's hard to say, but assume that at least the - * least significant byte of a 64 bit structure - * is entropic. It's probably more, how can you figure - * the exact time the user turned the computer on, for example. - */ + tt = early_random(); + perr = prngInput(gPrngRef, (BYTE*) &tt, sizeof (tt), SYSTEM_SOURCE, 8); if (perr != 0) { /* an error, complain */ @@ -544,51 +540,6 @@ error_exit: /* do this to make sure the mutex unlocks. */ return (retCode); } -/* - * return data to the caller. Results unpredictable. - */ -int -random_read(__unused dev_t dev, struct uio *uio, __unused int ioflag) -{ - int retCode = 0; - - if (gRandomError != 0) - return (ENOTSUP); - - /* lock down the mutex */ - lck_mtx_lock(gYarrowMutex); - - - int bytes_remaining = uio_resid(uio); - while (bytes_remaining > 0 && retCode == 0) { - /* get the user's data */ - int bytes_to_read = 0; - - int bytes_available = kBlockSize - g_bytes_used; - if (bytes_available == 0) - { - random_block(g_random_data, TRUE); - g_bytes_used = 0; - bytes_available = kBlockSize; - } - - bytes_to_read = min (bytes_remaining, bytes_available); - - retCode = uiomove(((caddr_t)g_random_data)+ g_bytes_used, bytes_to_read, uio); - g_bytes_used += bytes_to_read; - - if (retCode != 0) - goto error_exit; - - bytes_remaining = uio_resid(uio); - } - - retCode = 0; - -error_exit: - lck_mtx_unlock(gYarrowMutex); - return retCode; -} /* export good random numbers to the rest of the kernel */ void @@ -599,6 +550,8 @@ read_random(void* buffer, u_int numbytes) } lck_mtx_lock(gYarrowMutex); + + int bytes_read = 0; int bytes_remaining = numbytes; @@ -620,6 +573,36 @@ read_random(void* buffer, u_int numbytes) lck_mtx_unlock(gYarrowMutex); } +/* + * return data to the caller. Results unpredictable. + */ +int +random_read(__unused dev_t dev, struct uio *uio, __unused int ioflag) +{ + int retCode = 0; + + if (gRandomError != 0) + return (ENOTSUP); + + char buffer[64]; + + user_ssize_t bytes_remaining = uio_resid(uio); + while (bytes_remaining > 0 && retCode == 0) { + user_ssize_t bytesToRead = min(sizeof(buffer), bytes_remaining); + read_random(buffer, bytesToRead); + retCode = uiomove(buffer, bytesToRead, uio); + + if (retCode != 0) + goto error_exit; + + bytes_remaining = uio_resid(uio); + } + + retCode = 0; + +error_exit: + return retCode; +} /* * Return an u_int32_t pseudo-random number. */ diff --git a/bsd/dev/unix_startup.c b/bsd/dev/unix_startup.c index a1d8f5200..0f36920f0 100644 --- a/bsd/dev/unix_startup.c +++ b/bsd/dev/unix_startup.c @@ -61,7 +61,7 @@ extern uint32_t tcp_sendspace; extern uint32_t tcp_recvspace; #endif -void bsd_bufferinit(void) __attribute__((section("__TEXT, initcode"))); +void bsd_bufferinit(void); extern void md_prepare_for_shutdown(int, int, char *); unsigned int bsd_mbuf_cluster_reserve(boolean_t *); @@ -90,13 +90,16 @@ SYSCTL_INT (_kern, OID_AUTO, maxnbuf, CTLFLAG_RW | CTLFLAG_LOCKED, &max_nbuf_hea __private_extern__ int customnbuf = 0; int serverperfmode = 0; /* Flag indicates a server boot when set */ int ncl = 0; + +#if SOCKETS static unsigned int mbuf_poolsz; +#endif vm_map_t buffer_map; vm_map_t bufferhdr_map; static int vnodes_sized = 0; -extern void bsd_startupearly(void) __attribute__((section("__TEXT, initcode"))); +extern void bsd_startupearly(void); void bsd_startupearly(void) @@ -200,8 +203,9 @@ bsd_startupearly(void) void bsd_bufferinit(void) { +#if SOCKETS kern_return_t ret; - +#endif /* * Note: Console device initialized in kminit() from bsd_autoconf() * prior to call to us in bsd_init(). @@ -235,6 +239,7 @@ bsd_bufferinit(void) #endif /* !__LP64__ */ #define MAX_NCL (MAX_MBUF_POOL >> MCLSHIFT) +#if SOCKETS /* * this has been broken out into a separate routine that * can be called from the x86 early vm initialization to @@ -296,6 +301,8 @@ done: return (mbuf_poolsz); } +#endif + #if defined(__LP64__) extern int tcp_tcbhashsize; extern int max_cached_sock_count; diff --git a/bsd/dev/vn/vn.c b/bsd/dev/vn/vn.c index 2a0001d48..c5f9eae81 100644 --- a/bsd/dev/vn/vn.c +++ b/bsd/dev/vn/vn.c @@ -121,7 +121,7 @@ #include "shadow.h" static void -vndevice_do_init(void) __attribute__((section("__TEXT, initcode"))); +vndevice_do_init(void); static ioctl_fcn_t vnioctl_chr; static ioctl_fcn_t vnioctl_blk; diff --git a/bsd/dev/x86_64/munge.s b/bsd/dev/x86_64/munge.s index cb2f6dfc0..e69de29bb 100644 --- a/bsd/dev/x86_64/munge.s +++ b/bsd/dev/x86_64/munge.s @@ -1,373 +0,0 @@ -/* - * Coyright (c) 2005-2008 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/* - * Syscall argument mungers. - * - * The data to be munged has been explicitly copied in to the argument area, - * and will be munged in place in the uu_arg[] array. Because of this, the - * functions all take the same arguments as their PPC equivalents, but the - * first argument is ignored. These mungers are for 32-bit app's syscalls, - * since 64-bit args are stored into the save area (which overlays the - * uu_args) in the order the syscall ABI calls for. - * - * The issue is that the incoming args are 32-bit, but we must expand - * them in place into 64-bit args, as if they were from a 64-bit process. - * - * There are several functions in this file. Each takes two parameters: - * - * void munge_XXXX(const void *regs, // %rdi - * void *uu_args); // %rsi - * - * The name of the function encodes the number and type of the parameters, - * as follows: - * - * w = a 32-bit value such as an int or a 32-bit ptr, that does not - * require sign extension. These are handled by zeroing a word - * of output, and copying a word from input to output. - * - * s = a 32-bit value such as a long, which must be sign-extended to - * a 64-bit long-long in the uu_args. These are handled by - * loading a word of input and sign extending it to a double, - * and storing two words of output. - * - * l = a 64-bit long-long. These are handled by copying two words - * of input to the output. - * - * For example, "munge_wls" takes a word, a long-long, and a word. This - * takes four words in the uu_arg[] area: the first word is in one, the - * long-long takes two, and the final word is in the fourth. We store six - * words: the low word is left in place, followed by a 0, followed by the - * two words of the long-long, followed by the low word and the sign extended - * high word of the preceeding low word. - * - * Because this is an in-place modification, we actually start at the end - * of uu_arg[] and work our way back to the beginning of the array. - * - * As you can see, we save a lot of code by collapsing mungers that are - * prefixes or suffixes of each other. - */ -#include - -ENTRY(munge_w) - movl $0,4(%rsi) - ret - -ENTRY(munge_ww) - xorl %edx,%edx - jmp Lw2 -ENTRY(munge_www) - xorl %edx,%edx - jmp Lw3 -ENTRY(munge_wwww) - xorl %edx,%edx - jmp Lw4 -ENTRY(munge_wwwww) - xorl %edx,%edx - jmp Lw5 -ENTRY(munge_wwwwww) - xorl %edx,%edx - jmp Lw6 -ENTRY(munge_wwwwwww) - xorl %edx,%edx - jmp Lw7 -ENTRY(munge_wwwwwwww) - xorl %edx,%edx - movl 28(%rsi),%eax - movl %eax,56(%rsi) - movl %edx,60(%rsi) -Lw7: - movl 24(%rsi),%eax - movl %eax,48(%rsi) - movl %edx,52(%rsi) -Lw6: - movl 20(%rsi),%eax - movl %eax,40(%rsi) - movl %edx,44(%rsi) -Lw5: - movl 16(%rsi),%eax - movl %eax,32(%rsi) - movl %edx,36(%rsi) -Lw4: - movl 12(%rsi),%eax - movl %eax,24(%rsi) - movl %edx,28(%rsi) -Lw3: - movl 8(%rsi),%eax - movl %eax,16(%rsi) - movl %edx,20(%rsi) -Lw2: - movl 4(%rsi),%eax - movl %eax,8(%rsi) - movl %edx,12(%rsi) - movl %edx,4(%rsi) - ret - - -Entry(munge_wl) /* Costs an extra w move to do this */ -ENTRY(munge_wlw) - xorl %edx,%edx -Lwlw: - movl 12(%rsi),%eax - movl %eax,16(%rsi) - movl %edx,20(%rsi) -Lwl: - movl 8(%rsi),%eax - movl %eax,12(%rsi) - movl 4(%rsi),%eax - movl %eax,8(%rsi) - - movl %edx,4(%rsi) - ret - -ENTRY(munge_wlwwwll) - xorl %edx,%edx -Lwlwwwll: - movl 36(%rsi),%eax - movl %eax,52(%rsi) - movl 32(%rsi),%eax - movl %eax,48(%rsi) - - movl 28(%rsi),%eax - movl %eax,44(%rsi) - movl 24(%rsi),%eax - movl %eax,40(%rsi) - - movl 20(%rsi),%eax - movl %eax,32(%rsi) - movl %edx,36(%rsi) -Lwlww: - movl 16(%rsi),%eax - movl %eax,24(%rsi) - movl %edx,28(%rsi) - jmp Lwlw - -ENTRY(munge_wlwwwllw) - xorl %edx,%edx - movl 40(%rsi),%eax - movl %eax,56(%rsi) - movl %edx,60(%rsi) - jmp Lwlwwwll - -ENTRY(munge_wlwwlwlw) - xorl %edx,%edx - movl 40(%rsi),%eax - movl %eax,56(%rsi) - movl %edx,60(%rsi) - movl 36(%rsi),%eax - movl %eax,52(%rsi) - movl 32(%rsi),%eax - movl %eax,48(%rsi) - movl 28(%rsi),%eax - movl %eax,40(%rsi) - movl %edx,44(%rsi) - movl 24(%rsi),%eax - movl %eax,36(%rsi) - movl 20(%rsi),%eax - movl %eax,32(%rsi) - jmp Lwlww - - -ENTRY(munge_wllwwll) - xorl %edx,%edx - - movl 40(%rsi),%eax //l - movl %eax,52(%rsi) - movl 36(%rsi),%eax - movl %eax,48(%rsi) - movl 32(%rsi),%eax //l - movl %eax,44(%rsi) - movl 28(%rsi),%eax - movl %eax,40(%rsi) - movl 24(%rsi),%eax //w - movl %eax,32(%rsi) - movl %edx,36(%rsi) - movl 20(%rsi),%eax //w - movl %eax,24(%rsi) - movl %edx,28(%rsi) - movl 16(%rsi),%eax //l - movl %eax,20(%rsi) - movl 12(%rsi),%eax - movl %eax,16(%rsi) - - jmp Lwl - -Entry(munge_wwwlw) - xorl %edx,%edx - movl 20(%rsi),%eax - movl %eax,32(%rsi) - movl %edx,36(%rsi) - jmp Lwwwl - - -ENTRY(munge_wwwl) - xorl %edx,%edx -Lwwwl: - movl 12(%rsi),%eax - movl %eax,24(%rsi) - movl 16(%rsi),%eax - movl %eax,28(%rsi) - jmp Lw3 - -ENTRY(munge_wwwwlw) - xorl %edx,%edx - movl 24(%rsi),%eax - movl %eax,40(%rsi) - movl %edx,44(%rsi) - jmp Lwwwwl - -ENTRY(munge_wwwwl) - xorl %edx,%edx -Lwwwwl: - movl 16(%rsi),%eax - movl %eax,32(%rsi) - movl 20(%rsi),%eax - movl %eax,36(%rsi) - jmp Lw4 - -ENTRY(munge_wwwwwl) - xorl %edx,%edx - movl 20(%rsi),%eax - movl %eax,40(%rsi) - movl 24(%rsi),%eax - movl %eax,44(%rsi) - jmp Lw5 - - -ENTRY(munge_wwwwwlww) - xorl %edx,%edx - movl 32(%rsi),%eax - movl %eax,56(%rsi) - movl %edx,60(%rsi) - movl 28(%rsi),%eax - movl %eax,48(%rsi) - movl %edx,52(%rsi) - movl 20(%rsi),%eax - movl %eax,40(%rsi) - movl 24(%rsi),%eax - movl %eax,44(%rsi) - - jmp Lw5 - -ENTRY(munge_wwwwwllw) - xorl %edx,%edx - movl 36(%rsi),%eax - movl %eax,56(%rsi) - movl %edx,60(%rsi) - movl 28(%rsi),%eax - movl %eax,48(%rsi) - movl 32(%rsi),%eax - movl %eax,52(%rsi) - movl 20(%rsi),%eax - movl %eax,40(%rsi) - movl 24(%rsi),%eax - movl %eax,44(%rsi) - jmp Lw5 - -ENTRY(munge_wwwwwlll) - xorl %edx,%edx - movl 36(%rsi),%eax - movl %eax,56(%rsi) - movl 40(%rsi),%eax - movl %eax,60(%rsi) - movl 28(%rsi),%eax - movl %eax,48(%rsi) - movl 32(%rsi),%eax - movl %eax,52(%rsi) - movl 20(%rsi),%eax - movl %eax,40(%rsi) - movl 24(%rsi),%eax - movl %eax,44(%rsi) - jmp Lw5 - -ENTRY(munge_wwwwwwl) - xorl %edx,%edx - movl 24(%rsi),%eax - movl %eax,48(%rsi) - movl 28(%rsi),%eax - movl %eax,52(%rsi) - jmp Lw6 - -ENTRY(munge_wwwwwwlw) - xorl %edx,%edx - movl 32(%rsi),%eax - movl %eax,56(%rsi) - movl %edx,60(%rsi) - movl 24(%rsi),%eax - movl %eax,48(%rsi) - movl 28(%rsi),%eax - movl %eax,52(%rsi) - jmp Lw6 - -ENTRY(munge_wwwwwwll) - xorl %edx,%edx - movl 32(%rsi),%eax - movl %eax,56(%rsi) - movl 36(%rsi),%eax - movl %eax,60(%rsi) - movl 24(%rsi),%eax - movl %eax,48(%rsi) - movl 28(%rsi),%eax - movl %eax,52(%rsi) - jmp Lw6 - -ENTRY(munge_wsw) - movl 8(%rsi),%eax - movl %eax,16(%rsi) - movl $0,20(%rsi) - movl 4(%rsi),%eax - cltd - movl %eax,8(%rsi) - movl %edx,12(%rsi) - movl $0,4(%rsi) - ret - -ENTRY(munge_wws) - movl 8(%rsi),%eax - cltd - movl %eax,16(%rsi) - movl %edx,20(%rsi) - xorl %edx,%edx - jmp Lw2 - -ENTRY(munge_wwwsw) - movl 16(%rsi),%eax - movl %eax,32(%rsi) - movl $0,36(%rsi) - movl 12(%rsi),%eax - cltd - movl %eax,24(%rsi) - movl %edx,28(%rsi) - xorl %edx,%edx - jmp Lw3 - -ENTRY(munge_llllll) - ret // nothing to do here, either - all args are already - // 64-bit and do not require sign/zero extension - // also, there is no mixing in of shorter args that - // do need extension diff --git a/bsd/hfs/Makefile b/bsd/hfs/Makefile index 27705308f..0f11a9737 100644 --- a/bsd/hfs/Makefile +++ b/bsd/hfs/Makefile @@ -7,20 +7,8 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -INSTINC_SUBDIRS_X86_64 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS_X86_64 = \ - DATAFILES = \ - hfs_encodings.h hfs_format.h hfs_mount.h + hfs_encodings.h hfs_format.h hfs_mount.h hfs_unistr.h PRIVATE_DATAFILES = \ hfs.h hfs_attrlist.h hfs_catalog.h hfs_cnode.h hfs_endian.h \ diff --git a/bsd/hfs/hfs.h b/bsd/hfs/hfs.h index ec8db3864..584b7f5a4 100644 --- a/bsd/hfs/hfs.h +++ b/bsd/hfs/hfs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -68,10 +68,6 @@ #include #include -#if CONFIG_HFS_ALLOC_RBTREE -#include -#endif - #if CONFIG_PROTECT /* Forward declare the cprotect struct */ struct cprotect; @@ -118,6 +114,7 @@ extern struct timezone gTimeZone; */ #define HFS_MINFREE 1 #define HFS_MAXRESERVE ((u_int64_t)(250*1024*1024)) +#define HFS_BT_MAXRESERVE ((u_int64_t)(10*1024*1024)) /* * The system distinguishes between the desirable low-disk @@ -225,26 +222,12 @@ typedef struct hfsmount { HFSPlusExtentDescriptor vcbFreeExt[kMaxFreeExtents]; lck_spin_t vcbFreeExtLock; -#if CONFIG_HFS_ALLOC_RBTREE - /* - * Access to these fields should only be done - * after acquiring the bitmap lock. Note that the - * "offset_block_end" field indicates the portion of - * the bitmap that is currently managed by the red-black tree. - */ - - /* Normal Allocation Tree */ - extent_tree_offset_t offset_tree; - u_int32_t offset_free_extents; /* number of free extents managed by tree */ - u_int32_t offset_block_end; -#endif + /* Summary Table */ + u_int8_t *hfs_summary_table; /* Each bit is 1 vcbVBMIOSize of bitmap, byte indexed */ + u_int32_t hfs_summary_size; /* number of BITS in summary table defined above (not bytes!) */ + u_int32_t hfs_summary_bytes; /* number of BYTES in summary table */ - /* - * For setting persistent in-mount fields that relate - * to the use of the extent trees. See HFS Red-Black - * Tree Allocator Flags below. - */ - u_int32_t extent_tree_flags; + u_int32_t scan_var; /* For initializing the summary table */ u_int32_t reserveBlocks; /* free block reserve */ @@ -329,6 +312,7 @@ typedef struct hfsmount { #if CONFIG_PROTECT struct cprotect *hfs_resize_cpentry; u_int16_t hfs_running_cp_major_vers; + uint32_t default_cp_class; #endif @@ -337,6 +321,9 @@ typedef struct hfsmount { u_long hfs_cnodehash; /* size of cnode hash table - 1 */ LIST_HEAD(cnodehashhead, cnode) *hfs_cnodehashtbl; /* base of cnode hash */ + /* Per mount fileid hash variables (protected by catalog lock!) */ + u_long hfs_idhash; /* size of cnid/fileid hash table -1 */ + LIST_HEAD(idhashhead, cat_preflightid) *hfs_idhashtbl; /* base of ID hash */ /* * About the sync counters: @@ -356,7 +343,6 @@ typedef struct hfsmount { int32_t hfs_sync_scheduled; int32_t hfs_sync_incomplete; u_int64_t hfs_last_sync_request_time; - u_int64_t hfs_last_sync_time; u_int32_t hfs_active_threads; u_int64_t hfs_max_pending_io; @@ -364,8 +350,14 @@ typedef struct hfsmount { } hfsmount_t; -#define HFS_META_DELAY (100) -#define HFS_MILLISEC_SCALE (1000*1000) +/* + * HFS_META_DELAY is a duration (0.1 seconds, expressed in microseconds) + * used for triggering the hfs_syncer() routine. It is used in two ways: + * as the delay between ending a transaction and firing hfs_syncer(), and + * the delay in re-firing hfs_syncer() when it decides to back off (for + * example, due to in-progress writes). + */ +enum { HFS_META_DELAY = 100 * 1000ULL }; typedef hfsmount_t ExtendedVCB; @@ -410,15 +402,7 @@ static __inline__ Boolean IsVCBDirty(ExtendedVCB *vcb) */ enum privdirtype {FILE_HARDLINKS, DIR_HARDLINKS}; -/* HFS Red-Black Tree Allocator Flags */ -#define HFS_ALLOC_RB_ENABLED 0x000001 /* trees in use */ -#define HFS_ALLOC_RB_ERRORED 0x000002 /* tree hit error; disabled for the mount */ -#define HFS_ALLOC_RB_MZACTIVE 0x000004 /* metazone tree has finished building */ -#define HFS_ALLOC_RB_ACTIVE 0x000008 /* normalzone tree finished building */ - -/* HFS Red-Black Unmount Synch. Flags */ -#define HFS_ALLOC_TREEBUILD_INFLIGHT 0x000010 -#define HFS_ALLOC_TEARDOWN_INFLIGHT 0x000020 +#define HFS_ALLOCATOR_SCAN_INFLIGHT 0x0001 /* scan started */ /* HFS mount point flags */ #define HFS_READ_ONLY 0x00001 @@ -451,7 +435,8 @@ enum privdirtype {FILE_HARDLINKS, DIR_HARDLINKS}; #define HFS_DID_CONTIG_SCAN 0x100000 #define HFS_UNMAP 0x200000 #define HFS_SSD 0x400000 - +#define HFS_SUMMARY_TABLE 0x800000 +#define HFS_CS 0x1000000 /* Macro to update next allocation block in the HFS mount structure. If @@ -464,18 +449,6 @@ enum privdirtype {FILE_HARDLINKS, DIR_HARDLINKS}; hfsmp->nextAllocation = new_nextAllocation; \ } \ -#define HFS_MOUNT_LOCK(hfsmp, metadata) \ - { \ - if ((metadata) && 1) \ - lck_mtx_lock(&(hfsmp)->hfs_mutex); \ - } \ - -#define HFS_MOUNT_UNLOCK(hfsmp, metadata) \ - { \ - if ((metadata) && 1) \ - lck_mtx_unlock(&(hfsmp)->hfs_mutex); \ - } \ - /* Macro for incrementing and decrementing the folder count in a cnode * attribute only if the HFS_FOLDERCOUNT bit is set in the mount flags * and kHFSHasFolderCount bit is set in the cnode flags. Currently these @@ -519,7 +492,7 @@ typedef struct filefork FCB; ((sizeof(struct dirent) - (NAME_MAX+1)) + (((namlen)+1 + 3) &~ 3)) #define EXT_DIRENT_LEN(namlen) \ - ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 3) & ~3) + ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7) enum { kHFSPlusMaxFileNameBytes = kHFSPlusMaxFileNameChars * 3 }; @@ -656,11 +629,12 @@ void hfs_setencodingbias(u_int32_t bias); ******************************************************************************/ void hfs_converterinit(void); +int hfs_relconverter (u_int32_t encoding); + int hfs_getconverter(u_int32_t encoding, hfs_to_unicode_func_t *get_unicode, unicode_to_hfs_func_t *get_hfsname); -int hfs_relconverter(u_int32_t encoding); - +#if CONFIG_HFS_STD int hfs_to_utf8(ExtendedVCB *vcb, const Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr); @@ -675,7 +649,7 @@ int utf8_to_mac_roman(ByteCount srcLen, const unsigned char* srcStr, Str31 dstSt int mac_roman_to_unicode(const Str31 hfs_str, UniChar *uni_str, u_int32_t maxCharLen, u_int32_t *usedCharLen); int unicode_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, u_int16_t* srcStr, Str31 dstStr, int retry); - +#endif /***************************************************************************** Functions from hfs_notifications.c @@ -688,6 +662,10 @@ void hfs_generate_volume_notifications(struct hfsmount *hfsmp); ******************************************************************************/ extern int hfs_relocate(struct vnode *, u_int32_t, kauth_cred_t, struct proc *); +/* Flags for HFS truncate */ +#define HFS_TRUNCATE_SKIPUPDATE 0x00000001 +#define HFS_TRUNCATE_SKIPTIMES 0x00000002 /* implied by skipupdate; it is a subset */ + extern int hfs_truncate(struct vnode *, off_t, int, int, int, vfs_context_t); extern int hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, @@ -740,6 +718,8 @@ extern int hfs_resize_progress(struct hfsmount *, u_int32_t *); */ void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp); +void hfs_scan_blocks (struct hfsmount *hfsmp); + /***************************************************************************** Functions from hfs_vfsutils.c ******************************************************************************/ @@ -747,8 +727,10 @@ u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize, u_int32_t blockSizeLimit, u_int32_t baseMultiple); +#if CONFIG_HFS_STD OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, struct proc *p); +#endif OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, off_t embeddedOffset, u_int64_t disksize, struct proc *p, void *args, kauth_cred_t cred); @@ -765,9 +747,13 @@ extern int check_for_dataless_file(struct vnode *vp, uint64_t op_type); /* * Journal lock function prototypes */ -int hfs_lock_global (struct hfsmount *hfsmp, enum hfslocktype locktype); +int hfs_lock_global (struct hfsmount *hfsmp, enum hfs_locktype locktype); void hfs_unlock_global (struct hfsmount *hfsmp); +/* HFS mount lock/unlock prototypes */ +void hfs_lock_mount (struct hfsmount *hfsmp); +void hfs_unlock_mount (struct hfsmount *hfsmp); + /* HFS System file locking */ #define SFL_CATALOG 0x0001 @@ -777,7 +763,7 @@ void hfs_unlock_global (struct hfsmount *hfsmp); #define SFL_STARTUP 0x0010 #define SFL_VALIDMASK (SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE | SFL_STARTUP) -extern int hfs_systemfile_lock(struct hfsmount *, int, enum hfslocktype); +extern int hfs_systemfile_lock(struct hfsmount *, int, enum hfs_locktype); extern void hfs_systemfile_unlock(struct hfsmount *, int); extern u_int32_t GetFileInfo(ExtendedVCB *vcb, u_int32_t dirid, const char *name, @@ -843,13 +829,14 @@ extern int hfs_update(struct vnode *, int); Functions from hfs_xattr.c ******************************************************************************/ -/* Maximum extended attribute size supported for all extended attributes except +/* + * Maximum extended attribute size supported for all extended attributes except * resource fork and finder info. */ -#define HFS_XATTR_MAXSIZE (128 * 1024) +#define HFS_XATTR_MAXSIZE INT32_MAX /* Number of bits used to represent maximum extended attribute size */ -#define HFS_XATTR_SIZE_BITS 18 +#define HFS_XATTR_SIZE_BITS 31 int hfs_attrkeycompare(HFSPlusAttrKey *searchKey, HFSPlusAttrKey *trialKey); int hfs_buildattrkey(u_int32_t fileID, const char *attrname, HFSPlusAttrKey *key); @@ -871,6 +858,8 @@ extern int hfs_unlink(struct hfsmount *hfsmp, struct vnode *dvp, struct vnode * struct componentname *cnp, int skip_reserve); extern int hfs_lookup_siblinglinks(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t *prevlinkid, cnid_t *nextlinkid); +extern int hfs_lookup_lastlink(struct hfsmount *hfsmp, cnid_t linkfileid, + cnid_t *nextlinkid, struct cat_desc *cdesc); extern void hfs_privatedir_init(struct hfsmount *, enum privdirtype); extern void hfs_savelinkorigin(cnode_t *cp, cnid_t parentcnid); diff --git a/bsd/hfs/hfs_attrlist.c b/bsd/hfs/hfs_attrlist.c index 2cb73b6b5..0d230c199 100644 --- a/bsd/hfs/hfs_attrlist.c +++ b/bsd/hfs/hfs_attrlist.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -154,7 +154,7 @@ hfs_vnop_readdirattr(ap) /* * Take an exclusive directory lock since we manipulate the directory hints */ - if ((error = hfs_lock(VTOC(dvp), HFS_EXCLUSIVE_LOCK))) { + if ((error = hfs_lock(VTOC(dvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { return (error); } dcp = VTOC(dvp); @@ -384,7 +384,7 @@ hfs_vnop_readdirattr(ap) cat_releasedesc(&ce_list->entry[i].ce_desc); ce_list->realentries = 0; - (void) hfs_lock(VTOC(dvp), HFS_FORCE_LOCK); + (void) hfs_lock(VTOC(dvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); dcp = VTOC(dvp); exit1: @@ -562,8 +562,8 @@ packcommonattr( if (ATTR_CMN_FSID & attr) { fsid_t fsid; - fsid.val[0] = (long)hfsmp->hfs_raw_dev; - fsid.val[1] = (long)vfs_typenum(mp); + fsid.val[0] = hfsmp->hfs_raw_dev; + fsid.val[1] = vfs_typenum(mp); *((fsid_t *)attrbufptr) = fsid; attrbufptr = ((fsid_t *)attrbufptr) + 1; } @@ -682,9 +682,12 @@ packcommonattr( /* advance 16 bytes into the attrbuf */ finfo = finfo + 16; - if (S_ISREG(cap->ca_mode)) { + + /* also don't expose the date_added or write_gen_counter fields */ + if (S_ISREG(cap->ca_mode) || S_ISLNK(cap->ca_mode)) { struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo; extinfo->date_added = 0; + extinfo->write_gen_counter = 0; } else if (S_ISDIR(cap->ca_mode)) { struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo; @@ -744,7 +747,7 @@ packcommonattr( user_access = hfs_real_user_access(vp, abp->ab_context); } else { user_access = DerivePermissionSummary(cap->ca_uid, cap->ca_gid, - cap->ca_mode, mp, proc_ucred(current_proc()), 0); + cap->ca_mode, mp, vfs_context_ucred(ctx), 0); } /* Also consider READ-ONLY file system. */ if (vfs_flags(mp) & MNT_RDONLY) { diff --git a/bsd/hfs/hfs_btreeio.c b/bsd/hfs/hfs_btreeio.c index 7e5182cd9..5fc36da95 100644 --- a/bsd/hfs/hfs_btreeio.c +++ b/bsd/hfs/hfs_btreeio.c @@ -50,7 +50,7 @@ /* From bsd/vfs/vfs_bio.c */ extern int bdwrite_internal(struct buf *, int); -static int ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount); +static int ClearBTNodes(struct vnode *vp, int blksize, off_t offset, off_t amount); static int btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp); void btree_swap_node(struct buf *bp, __unused void *arg); @@ -133,47 +133,72 @@ OSStatus GetBTreeBlock(FileReference vp, u_int32_t blockNum, GetBlockOptions opt // XXXdbg block->isModified = 0; - /* Check and endian swap B-Tree node (only if it's a valid block) */ - if (!(options & kGetEmptyBlock)) { - /* This happens when we first open the b-tree, we might not have all the node data on hand */ - if ((((BTNodeDescriptor *)block->buffer)->kind == kBTHeaderNode) && - (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != buf_count(bp)) && - (SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != buf_count(bp))) { + /* Check and endian swap B-Tree node (only if it's a valid block) */ + if (!(options & kGetEmptyBlock)) { + + /* This happens when we first open the b-tree, we might not have all the node data on hand */ + if ((((BTNodeDescriptor *)block->buffer)->kind == kBTHeaderNode) && + (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != buf_count(bp)) && + (SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != buf_count(bp))) { - /* - * Don't swap the node descriptor, record offsets, or other records. - * This record will be invalidated and re-read with the correct node - * size once the B-tree control block is set up with the node size - * from the header record. - */ - retval = hfs_swap_BTNode (block, vp, kSwapBTNodeHeaderRecordOnly, allow_empty_node); - - } else if (block->blockReadFromDisk) { - /* - * The node was just read from disk, so always swap/check it. - * This is necessary on big endian since the test below won't trigger. - */ - retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node); - } else if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x0e00) { /* - * The node was left in the cache in non-native order, so swap it. - * This only happens on little endian, after the node is written - * back to disk. + * Don't swap the node descriptor, record offsets, or other records. + * This record will be invalidated and re-read with the correct node + * size once the B-tree control block is set up with the node size + * from the header record. */ - retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node); - } - - /* - * If we got an error, then the node is only partially swapped. - * We mark the buffer invalid so that the next attempt to get the - * node will read it and attempt to swap again, and will notice - * the error again. If we didn't do this, the next attempt to get - * the node might use the partially swapped node as-is. - */ - if (retval) + retval = hfs_swap_BTNode (block, vp, kSwapBTNodeHeaderRecordOnly, allow_empty_node); + + } else { + /* + * In this case, we have enough data in-hand to do basic validation + * on the B-Tree node. + */ + if (block->blockReadFromDisk) { + /* + * The node was just read from disk, so always swap/check it. + * This is necessary on big endian since the test below won't trigger. + */ + retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node); + } + else { + /* + * Block wasn't read from disk; it was found in the cache. + */ + if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x0e00) { + /* + * The node was left in the cache in non-native order, so swap it. + * This only happens on little endian, after the node is written + * back to disk. + */ + retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node); + } + else if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x000e) { + /* + * The node was in-cache in native-endianness. We don't need to do + * anything here, because the node is ready to use. Set retval == 0. + */ + retval = 0; + } + /* + * If the node doesn't have hex 14 (0xe) in the last two bytes of the buffer, + * it doesn't necessarily mean that this is a bad node. Zeroed nodes that are + * marked as unused in the b-tree map node would be OK and not have valid content. + */ + } + } + + /* + * If we got an error, then the node is only partially swapped. + * We mark the buffer invalid so that the next attempt to get the + * node will read it and attempt to swap again, and will notice + * the error again. If we didn't do this, the next attempt to get + * the node might use the partially swapped node as-is. + */ + if (retval) buf_markinvalid(bp); - } - } + } + } if (retval) { if (bp) @@ -549,7 +574,7 @@ out: * Clear out (zero) new b-tree nodes on disk. */ static int -ClearBTNodes(struct vnode *vp, long blksize, off_t offset, off_t amount) +ClearBTNodes(struct vnode *vp, int blksize, off_t offset, off_t amount) { struct hfsmount *hfsmp = VTOHFS(vp); struct buf *bp = NULL; @@ -635,7 +660,7 @@ again: /* * Serialize creation using HFS_CREATING_BTREE flag. */ - lck_mtx_lock(&hfsmp->hfs_mutex); + hfs_lock_mount (hfsmp); if (hfsmp->hfs_flags & HFS_CREATING_BTREE) { /* Someone else beat us, wait for them to finish. */ (void) msleep(hfsmp->hfs_attribute_cp, &hfsmp->hfs_mutex, @@ -646,7 +671,7 @@ again: goto again; } hfsmp->hfs_flags |= HFS_CREATING_BTREE; - lck_mtx_unlock(&hfsmp->hfs_mutex); + hfs_unlock_mount (hfsmp); /* Check if were out of usable disk space. */ if ((hfs_freeblks(hfsmp, 1) == 0)) { @@ -859,10 +884,10 @@ again: } /* Update vp/cp for attribute btree */ - lck_mtx_lock(&hfsmp->hfs_mutex); + hfs_lock_mount (hfsmp); hfsmp->hfs_attribute_cp = VTOC(vp); hfsmp->hfs_attribute_vp = vp; - lck_mtx_unlock(&hfsmp->hfs_mutex); + hfs_unlock_mount (hfsmp); (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); @@ -874,7 +899,7 @@ again: /* Initialize the vnode for virtual attribute data file */ result = init_attrdata_vnode(hfsmp); if (result) { - printf("hfs_create_attr_btree: init_attrdata_vnode() error=%d\n", result); + printf("hfs_create_attr_btree: vol=%s init_attrdata_vnode() error=%d\n", hfsmp->vcbVN, result); } exit: @@ -897,10 +922,10 @@ exit: /* * All done, clear HFS_CREATING_BTREE, and wake up any sleepers. */ - lck_mtx_lock(&hfsmp->hfs_mutex); + hfs_lock_mount (hfsmp); hfsmp->hfs_flags &= ~HFS_CREATING_BTREE; wakeup((caddr_t)hfsmp->hfs_attribute_cp); - lck_mtx_unlock(&hfsmp->hfs_mutex); + hfs_unlock_mount (hfsmp); return (result); } diff --git a/bsd/hfs/hfs_catalog.c b/bsd/hfs/hfs_catalog.c index 0d310bc82..c7aa7b38e 100644 --- a/bsd/hfs/hfs_catalog.c +++ b/bsd/hfs/hfs_catalog.c @@ -92,6 +92,7 @@ u_char modetodirtype[16] = { #define HFS_LOOKUP_SYSFILE 0x1 /* If set, allow lookup of system files */ #define HFS_LOOKUP_HARDLINK 0x2 /* If set, allow lookup of hard link records and not resolve the hard links */ +#define HFS_LOOKUP_CASESENSITIVE 0x4 /* If set, verify results of a file/directory record match input case */ static int cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int flags, u_int32_t hint, int wantrsrc, struct cat_desc *descp, struct cat_attr *attrp, struct cat_fork *forkp, cnid_t *desc_cnid); @@ -121,9 +122,11 @@ static int builddesc(const HFSPlusCatalogKey *key, cnid_t cnid, u_int32_t hint, static void getbsdattr(struct hfsmount *hfsmp, const struct HFSPlusCatalogFile *crp, struct cat_attr * attrp); +#if CONFIG_HFS_STD static void promotekey(struct hfsmount *hfsmp, const HFSCatalogKey *hfskey, HFSPlusCatalogKey *keyp, u_int32_t *encoding); static void promotefork(struct hfsmount *hfsmp, const struct HFSCatalogFile *file, int resource, struct cat_fork * forkp); static void promoteattr(struct hfsmount *hfsmp, const CatalogRecord *dataPtr, struct HFSPlusCatalogFile *crp); +#endif static cnid_t getcnid(const CatalogRecord *crp); static u_int32_t getencoding(const CatalogRecord *crp); @@ -138,6 +141,199 @@ static int cat_makealias(struct hfsmount *hfsmp, u_int32_t inode_num, struct HFS static int cat_update_internal(struct hfsmount *hfsmp, int update_hardlink, struct cat_desc *descp, struct cat_attr *attrp, struct cat_fork *dataforkp, struct cat_fork *rsrcforkp); + + +/* HFS ID Hashtable Functions */ +#define IDHASH(hfsmp, inum) (&hfsmp->hfs_idhashtbl[(inum) & hfsmp->hfs_idhash]) + +/* Initialize the HFS ID hash table */ +void +hfs_idhash_init (struct hfsmount *hfsmp) { + /* secured by catalog lock so no lock init needed */ + hfsmp->hfs_idhashtbl = hashinit(HFS_IDHASH_DEFAULT, M_HFSMNT, &hfsmp->hfs_idhash); +} + +/* Free the HFS ID hash table */ +void +hfs_idhash_destroy (struct hfsmount *hfsmp) { + /* during failed mounts & unmounts */ + FREE(hfsmp->hfs_idhashtbl, M_HFSMNT); +} + +/* +from hfs_catalog.h: +typedef struct cat_preflightid { + cnid_t fileid; + LIST_ENTRY(cat_preflightid) id_hash; +} cat_preflightid_t; + +from hfs.h: + u_long hfs_idhash; / size of cnid/fileid hash table -1 / + LIST_HEAD(idhashhead, cat_preflightid) *hfs_idhashtbl; / base of ID hash / +*/ + +/* + * Check the run-time ID hashtable. + * + * The catalog lock must be held (like other functions in this file). + * + * Returns: + * 1 if the ID is in the hash table. + * 0 if the ID is not in the hash table + */ +int cat_check_idhash (struct hfsmount *hfsmp, cnid_t test_fileid) { + + cat_preflightid_t *preflight; + int found = 0; + + for (preflight = IDHASH(hfsmp, test_fileid)->lh_first; preflight ; preflight = preflight->id_hash.le_next) { + if (preflight->fileid == test_fileid) { + found = 1; + break; + } + } + + return found; +} + +/* Insert the supplied preflight into the ID hash table */ +int cat_insert_idhash (struct hfsmount *hfsmp, cat_preflightid_t *preflight) { + + if (preflight) { + LIST_INSERT_HEAD(IDHASH(hfsmp, (preflight->fileid)), preflight, id_hash); + return 0; + } + return -1; +} + + +/* Remove the data structure with the specified ID from the hashtable */ +int cat_remove_idhash (cat_preflightid_t *preflight) { + + if ((preflight) && ((preflight->id_hash.le_next || preflight->id_hash.le_prev))) { + LIST_REMOVE (preflight, id_hash); + preflight->id_hash.le_next = NULL; + preflight->id_hash.le_prev = NULL; + + return 0; + } + + return -1; +} + +/* + * Acquire a new CNID for use. + * + * This is slightly more complicated than just pulling the value from the + * hfsmount data structure. We need to validate that the ID is not in-use + * even if we've not wrapped around and that there are not any lingering + * or orphaned fileIDs for this ID. + * + * Also validate that there are not any pending insertions into the + * catalog by checking the ID hash table. + */ +int +cat_acquire_cnid (struct hfsmount *hfsmp, cnid_t *new_cnid) { + + uint32_t nextCNID; + struct BTreeIterator *iterator; + FSBufferDescriptor btdata; + uint16_t datasize; + CatalogRecord *recp; + int result = 0; + int std_hfs; + int wrapped = 0; + + std_hfs = (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord); + /* + * Get the next CNID. We can change it since we hold the catalog lock. + */ +nextid: + nextCNID = hfsmp->vcbNxtCNID; + if (nextCNID == 0xFFFFFFFF) { + if (std_hfs) { + return (ENOSPC); + } else { + wrapped++; + if (wrapped > 1) { + /* don't allow more than one wrap-around */ + return ENOSPC; + } + hfs_lock_mount (hfsmp); + hfsmp->vcbNxtCNID = kHFSFirstUserCatalogNodeID; + hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask; + hfs_unlock_mount (hfsmp); + } + } else { + hfsmp->vcbNxtCNID++; + } + MarkVCBDirty(hfsmp); + + /* First check that there are not any entries pending in the hash table with this ID */ + if (cat_check_idhash (hfsmp, nextCNID)) { + /* Someone wants to insert this into the catalog but hasn't done so yet. Skip it */ + goto nextid; + } + + /* Check to see if a thread record exists for the target ID we just got */ + MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); + bzero(iterator, sizeof(*iterator)); + buildthreadkey(nextCNID, std_hfs, (CatalogKey *)&iterator->key); + + MALLOC(recp, CatalogRecord *, sizeof(CatalogRecord), M_TEMP, M_WAITOK); + BDINIT(btdata, recp); + + result = BTSearchRecord(hfsmp->hfs_catalog_cp->c_datafork, iterator, &btdata, &datasize, iterator); + FREE (recp, M_TEMP); + FREE (iterator, M_TEMP); + + if (result == btNotFound) { + /* Good. File ID was not in use. Move on to checking EA B-Tree */ + result = file_attribute_exist (hfsmp, nextCNID); + if (result == EEXIST) { + /* This CNID has orphaned EAs. Skip it and move on to the next one */ + result = 0; + goto nextid; + } + if (result) { + /* For any other error, return the result */ + return result; + } + + /* + * Now validate that there are no lingering cnodes with this ID. If a cnode + * has been removed on-disk (marked C_NOEXISTS), but has not yet been reclaimed, + * then it will still have an entry in the cnode hash table. This means that + * a subsequent lookup will find THAT entry and believe this one has been deleted + * prematurely. If there is a lingering cnode, then just skip this entry and move on. + * + * Note that we pass (existence_only == 1) argument to hfs_chash_snoop. + */ + if (!std_hfs && (hfsmp->vcbAtrb & kHFSCatalogNodeIDsReusedMask)) { + if (hfs_chash_snoop (hfsmp, nextCNID, 1, NULL, NULL) == 0) { + goto nextid; + } + } + + /* + * If we get here, then we didn't see any thread records, orphaned EAs, + * or stale cnodes. This ID is safe to vend out. + */ + *new_cnid = nextCNID; + } + else if (result == noErr) { + /* move on to the next ID */ + goto nextid; + } + else { + /* For any other situation, just bail out */ + return EIO; + } + + return 0; + +} + int cat_preflight(struct hfsmount *hfsmp, catops_t ops, cat_cookie_t *cookie, __unused proc_t p) { @@ -180,21 +376,28 @@ cat_convertattr( { int std_hfs = HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord; - if (std_hfs) { + if (std_hfs == 0) { + getbsdattr(hfsmp, (struct HFSPlusCatalogFile *)recp, attrp); + } +#if CONFIG_HFS_STD + else { struct HFSPlusCatalogFile cnoderec; promoteattr(hfsmp, recp, &cnoderec); getbsdattr(hfsmp, &cnoderec, attrp); - } else { - getbsdattr(hfsmp, (struct HFSPlusCatalogFile *)recp, attrp); - } + } +#endif - if (isadir(recp)) + if (isadir(recp)) { bzero(datafp, sizeof(*datafp)); + } +#if CONFIG_HFS_STD else if (std_hfs) { promotefork(hfsmp, (HFSCatalogFile *)&recp->hfsFile, 0, datafp); promotefork(hfsmp, (HFSCatalogFile *)&recp->hfsFile, 1, rsrcfp); - } else { + } +#endif + else { /* Convert the data fork. */ datafp->cf_size = recp->hfsPlusFile.dataFork.logicalSize; datafp->cf_new_size = 0; @@ -245,21 +448,38 @@ cat_convertkey( int std_hfs = HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord; HFSPlusCatalogKey * pluskey = NULL; u_int32_t encoding; + cnid_t cnid = 0; + int err = 0; - if (std_hfs) { - MALLOC(pluskey, HFSPlusCatalogKey *, sizeof(HFSPlusCatalogKey), M_TEMP, M_WAITOK); - promotekey(hfsmp, (HFSCatalogKey *)key, pluskey, &encoding); - - } else { + if (std_hfs == 0) { pluskey = (HFSPlusCatalogKey *)key; encoding = getencoding(recp); } +#if CONFIG_HFS_STD + else { + MALLOC(pluskey, HFSPlusCatalogKey *, sizeof(HFSPlusCatalogKey), M_TEMP, M_WAITOK); + promotekey(hfsmp, (HFSCatalogKey *)key, pluskey, &encoding); + } +#endif + + /* Get the CNID before calling builddesc. Need to error check it. */ + cnid = getcnid(recp); + if (cnid == 0) { + /* If ths CNID == 0, it's invalid. Mark as corrupt */ + hfs_mark_volume_inconsistent (hfsmp); + err = EINVAL; + } + else { + builddesc(pluskey, cnid, 0, encoding, isadir(recp), descp); + } - builddesc(pluskey, getcnid(recp), 0, encoding, isadir(recp), descp); +#if CONFIG_HFS_STD if (std_hfs) { FREE(pluskey, M_TEMP); } - return (0); +#endif + + return err; } @@ -299,15 +519,17 @@ cat_releasedesc(struct cat_desc *descp) * catalog descriptor (when supplied outdescp is non-null). */ int -cat_lookup(struct hfsmount *hfsmp, struct cat_desc *descp, int wantrsrc, +cat_lookup(struct hfsmount *hfsmp, struct cat_desc *descp, int wantrsrc, int force_casesensitive_lookup, struct cat_desc *outdescp, struct cat_attr *attrp, struct cat_fork *forkp, cnid_t *desc_cnid) { CatalogKey * keyp; int std_hfs; int result; + int flags; std_hfs = (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord); + flags = force_casesensitive_lookup ? HFS_LOOKUP_CASESENSITIVE : 0; MALLOC(keyp, CatalogKey *, sizeof(CatalogKey), M_TEMP, M_WAITOK); @@ -315,7 +537,7 @@ cat_lookup(struct hfsmount *hfsmp, struct cat_desc *descp, int wantrsrc, if (result) goto exit; - result = cat_lookupbykey(hfsmp, keyp, 0, descp->cd_hint, wantrsrc, outdescp, attrp, forkp, desc_cnid); + result = cat_lookupbykey(hfsmp, keyp, flags, descp->cd_hint, wantrsrc, outdescp, attrp, forkp, desc_cnid); if (result == ENOENT) { if (!std_hfs) { @@ -435,27 +657,35 @@ cat_findname(struct hfsmount *hfsmp, cnid_t cnid, struct cat_desc *outdescp) /* Turn thread record into a cnode key (in place). */ switch (recp->recordType) { - case kHFSFolderThreadRecord: - isdir = 1; - /* fall through */ - case kHFSFileThreadRecord: - keyp = (CatalogKey *)((char *)&recp->hfsThread.reserved + 6); - keyp->hfs.keyLength = kHFSCatalogKeyMinimumLength + keyp->hfs.nodeName[0]; - break; - case kHFSPlusFolderThreadRecord: - isdir = 1; - /* fall through */ - case kHFSPlusFileThreadRecord: - keyp = (CatalogKey *)&recp->hfsPlusThread.reserved; - keyp->hfsPlus.keyLength = kHFSPlusCatalogKeyMinimumLength + - (keyp->hfsPlus.nodeName.length * 2); - break; - default: - result = ENOENT; - goto exit; +#if CONFIG_HFS_STD + case kHFSFolderThreadRecord: + isdir = 1; + /* fall through */ + case kHFSFileThreadRecord: + keyp = (CatalogKey *)((char *)&recp->hfsThread.reserved + 6); + keyp->hfs.keyLength = kHFSCatalogKeyMinimumLength + keyp->hfs.nodeName[0]; + break; +#endif + + case kHFSPlusFolderThreadRecord: + isdir = 1; + /* fall through */ + case kHFSPlusFileThreadRecord: + keyp = (CatalogKey *)&recp->hfsPlusThread.reserved; + keyp->hfsPlus.keyLength = kHFSPlusCatalogKeyMinimumLength + + (keyp->hfsPlus.nodeName.length * 2); + break; + default: + result = ENOENT; + goto exit; } - if (std_hfs) { + + if (std_hfs == 0) { + builddesc((HFSPlusCatalogKey *)keyp, cnid, 0, 0, isdir, outdescp); + } +#if CONFIG_HFS_STD + else { HFSPlusCatalogKey * pluskey = NULL; u_int32_t encoding; @@ -463,10 +693,9 @@ cat_findname(struct hfsmount *hfsmp, cnid_t cnid, struct cat_desc *outdescp) promotekey(hfsmp, &keyp->hfs, pluskey, &encoding); builddesc(pluskey, cnid, 0, encoding, isdir, outdescp); FREE(pluskey, M_TEMP); - - } else { - builddesc((HFSPlusCatalogKey *)keyp, cnid, 0, 0, isdir, outdescp); } +#endif + exit: FREE(recp, M_TEMP); FREE(iterator, M_TEMP); @@ -508,36 +737,39 @@ cat_idlookup(struct hfsmount *hfsmp, cnid_t cnid, int allow_system_files, int wa /* Turn thread record into a cnode key (in place) */ switch (recp->recordType) { - case kHFSFileThreadRecord: - case kHFSFolderThreadRecord: - keyp = (CatalogKey *)((char *)&recp->hfsThread.reserved + 6); - /* check for NULL name */ - if (keyp->hfs.nodeName[0] == 0) { - result = ENOENT; - goto exit; - } +#if CONFIG_HFS_STD + case kHFSFileThreadRecord: + case kHFSFolderThreadRecord: + keyp = (CatalogKey *)((char *)&recp->hfsThread.reserved + 6); - keyp->hfs.keyLength = kHFSCatalogKeyMinimumLength + keyp->hfs.nodeName[0]; - break; + /* check for NULL name */ + if (keyp->hfs.nodeName[0] == 0) { + result = ENOENT; + goto exit; + } - case kHFSPlusFileThreadRecord: - case kHFSPlusFolderThreadRecord: - keyp = (CatalogKey *)&recp->hfsPlusThread.reserved; + keyp->hfs.keyLength = kHFSCatalogKeyMinimumLength + keyp->hfs.nodeName[0]; + break; +#endif - /* check for NULL name */ - if (keyp->hfsPlus.nodeName.length == 0) { - result = ENOENT; - goto exit; - } + case kHFSPlusFileThreadRecord: + case kHFSPlusFolderThreadRecord: + keyp = (CatalogKey *)&recp->hfsPlusThread.reserved; - keyp->hfsPlus.keyLength = kHFSPlusCatalogKeyMinimumLength + - (keyp->hfsPlus.nodeName.length * 2); - break; + /* check for NULL name */ + if (keyp->hfsPlus.nodeName.length == 0) { + result = ENOENT; + goto exit; + } - default: - result = ENOENT; - goto exit; + keyp->hfsPlus.keyLength = kHFSPlusCatalogKeyMinimumLength + + (keyp->hfsPlus.nodeName.length * 2); + break; + + default: + result = ENOENT; + goto exit; } result = cat_lookupbykey(hfsmp, keyp, @@ -594,7 +826,7 @@ cat_lookupmangled(struct hfsmount *hfsmp, struct cat_desc *descp, int wantrsrc, return (ENOENT); } - result = cat_idlookup(hfsmp, fileID, 0,0, outdescp, attrp, forkp); + result = cat_idlookup(hfsmp, fileID, 0, 0, outdescp, attrp, forkp); if (result) return (ENOENT); /* It must be in the correct directory */ @@ -664,7 +896,14 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int flags, u_int32_t h /* Save the cnid, parentid, and encoding now in case there's a hard link or inode */ cnid = getcnid(recp); - if (!std_hfs) { + if (cnid == 0) { + /* CNID of 0 is invalid. Mark as corrupt */ + hfs_mark_volume_inconsistent (hfsmp); + result = EINVAL; + goto exit; + } + + if (std_hfs == 0) { parentid = keyp->hfsPlus.parentID; } @@ -675,9 +914,19 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int flags, u_int32_t h if ((hfsmp->jnl || ((HFSTOVCB(hfsmp)->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) && ((cnid == hfsmp->hfs_jnlfileid) || (cnid == hfsmp->hfs_jnlinfoblkid)) && !(flags & HFS_LOOKUP_SYSFILE)) { - result = ENOENT; + result = ERESERVEDNAME; goto exit; } + + if (!std_hfs && !(hfsmp->hfs_flags & HFS_CASE_SENSITIVE)) { + /* Make sure the case of the file was correct if requested */ + if (flags & HFS_LOOKUP_CASESENSITIVE) { + if (0 != cat_binarykeycompare(&keyp->hfsPlus, (HFSPlusCatalogKey *)&iterator->key)) { + result = ERESERVEDNAME; + goto exit; + } + } + } /* * When a hardlink link is encountered, auto resolve it. @@ -706,13 +955,8 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int flags, u_int32_t h } } - if (attrp != NULL) { - if (std_hfs) { - struct HFSPlusCatalogFile cnoderec; - - promoteattr(hfsmp, recp, &cnoderec); - getbsdattr(hfsmp, &cnoderec, attrp); - } else { + if (attrp != NULL) { + if (std_hfs == 0) { getbsdattr(hfsmp, (struct HFSPlusCatalogFile *)recp, attrp); if (ilink) { /* Update the inode number for this hard link */ @@ -757,13 +1001,25 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int flags, u_int32_t h } } } +#if CONFIG_HFS_STD + else { + struct HFSPlusCatalogFile cnoderec; + + promoteattr(hfsmp, recp, &cnoderec); + getbsdattr(hfsmp, &cnoderec, attrp); + } +#endif } if (forkp != NULL) { if (isadir(recp)) { bzero(forkp, sizeof(*forkp)); - } else if (std_hfs) { + } +#if CONFIG_HFS_STD + else if (std_hfs) { promotefork(hfsmp, (HFSCatalogFile *)&recp->hfsFile, wantrsrc, forkp); - } else if (wantrsrc) { + } +#endif + else if (wantrsrc) { /* Convert the resource fork. */ forkp->cf_size = recp->hfsPlusFile.resourceFork.logicalSize; forkp->cf_new_size = 0; @@ -849,17 +1105,25 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int flags, u_int32_t h if (descp != NULL) { HFSPlusCatalogKey * pluskey = NULL; - if (std_hfs) { + if (std_hfs == 0) { + pluskey = (HFSPlusCatalogKey *)&iterator->key; + } +#if CONFIG_HFS_STD + else { MALLOC(pluskey, HFSPlusCatalogKey *, sizeof(HFSPlusCatalogKey), M_TEMP, M_WAITOK); promotekey(hfsmp, (HFSCatalogKey *)&iterator->key, pluskey, &encoding); - } else { - pluskey = (HFSPlusCatalogKey *)&iterator->key; } +#endif + builddesc(pluskey, cnid, hint, encoding, isadir(recp), descp); + +#if CONFIG_HFS_STD if (std_hfs) { FREE(pluskey, M_TEMP); } +#endif + } if (desc_cnid != NULL) { @@ -883,13 +1147,12 @@ exit: * catalog descriptor (when supplied outdescp is non-null). */ int -cat_create(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attrp, +cat_create(struct hfsmount *hfsmp, cnid_t new_fileid, struct cat_desc *descp, struct cat_attr *attrp, struct cat_desc *out_descp) { FCB * fcb; struct btobj * bto; FSBufferDescriptor btdata; - u_int32_t nextCNID; u_int32_t datalen; int std_hfs; int result = 0; @@ -901,24 +1164,8 @@ cat_create(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attr fcb = hfsmp->hfs_catalog_cp->c_datafork; std_hfs = (hfsmp->hfs_flags & HFS_STANDARD); - /* - * Get the next CNID. We can change it since we hold the catalog lock. - */ - nextCNID = hfsmp->vcbNxtCNID; - if (nextCNID == 0xFFFFFFFF) { - if (std_hfs) { - return (ENOSPC); - } else { - HFS_MOUNT_LOCK(hfsmp, TRUE) - hfsmp->vcbNxtCNID = kHFSFirstUserCatalogNodeID; - hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask; - HFS_MOUNT_UNLOCK(hfsmp, TRUE); - } - } else { - hfsmp->vcbNxtCNID++; - } - MarkVCBDirty(hfsmp); - + /* The caller is expected to reserve a CNID before calling this function! */ + /* Get space for iterator, key and data */ MALLOC(bto, struct btobj *, sizeof(struct btobj), M_TEMP, M_WAITOK); bto->iterator.hint.nodeNum = 0; @@ -943,65 +1190,22 @@ cat_create(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attr btdata.itemSize = datalen; btdata.itemCount = 1; - for (;;) { - // this call requires the attribute file lock to be held - result = file_attribute_exist(hfsmp, nextCNID); - if (result == EEXIST) { - // that cnid has orphaned attributes so just skip it. - if (++nextCNID < kHFSFirstUserCatalogNodeID) { - nextCNID = kHFSFirstUserCatalogNodeID; - } - continue; - } - if (result) goto exit; - - buildthreadkey(nextCNID, std_hfs, (CatalogKey *) &bto->iterator.key); - - /* - * If the CNID wraparound bit is set, then we need to validate if there - * is a cnode in the hash already with this ID (even if it no longer exists - * on disk). If so, then just skip this ID and move on to the next one. - */ - if (!std_hfs && (hfsmp->vcbAtrb & kHFSCatalogNodeIDsReusedMask)) { - if (hfs_chash_snoop (hfsmp, nextCNID, 1, NULL, NULL) == 0) { - /* It was found in the cnode hash!*/ - result = btExists; - } - } - - if (result == 0) { - result = BTInsertRecord(fcb, &bto->iterator, &btdata, datalen); - } - - if ((result == btExists) && !std_hfs && (hfsmp->vcbAtrb & kHFSCatalogNodeIDsReusedMask)) { - /* - * Allow CNIDs on HFS Plus volumes to wrap around - */ - if (++nextCNID < kHFSFirstUserCatalogNodeID) { - nextCNID = kHFSFirstUserCatalogNodeID; - } - continue; - } - break; - } - if (result) goto exit; - } - - /* - * CNID is now established. If we have wrapped then - * update the vcbNxtCNID. - */ - if ((hfsmp->vcbAtrb & kHFSCatalogNodeIDsReusedMask)) { - hfsmp->vcbNxtCNID = nextCNID + 1; - if (hfsmp->vcbNxtCNID < kHFSFirstUserCatalogNodeID) { - hfsmp->vcbNxtCNID = kHFSFirstUserCatalogNodeID; + /* Caller asserts the following: + * 1) this CNID is not in use by any orphaned EAs + * 2) There are no lingering cnodes (removed on-disk but still in-core) with this CNID + * 3) There are no thread or catalog records for this ID + */ + buildthreadkey(new_fileid, std_hfs, (CatalogKey *) &bto->iterator.key); + result = BTInsertRecord(fcb, &bto->iterator, &btdata, datalen); + if (result) { + goto exit; } } /* * Now insert the file/directory record */ - buildrecord(attrp, nextCNID, std_hfs, encoding, &bto->data, &datalen); + buildrecord(attrp, new_fileid, std_hfs, encoding, &bto->data, &datalen); btdata.bufferAddress = &bto->data; btdata.itemSize = datalen; btdata.itemCount = 1; @@ -1015,12 +1219,12 @@ cat_create(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attr /* Back out the thread record */ if (!std_hfs || S_ISDIR(attrp->ca_mode)) { - buildthreadkey(nextCNID, std_hfs, (CatalogKey *)&bto->iterator.key); + buildthreadkey(new_fileid, std_hfs, (CatalogKey *)&bto->iterator.key); if (BTDeleteRecord(fcb, &bto->iterator)) { /* Error on deleting extra thread record, mark * volume inconsistent */ - printf ("hfs: cat_create() failed to delete thread record on volume %s\n", hfsmp->vcbVN); + printf ("hfs: cat_create() failed to delete thread record id=%u on vol=%s\n", new_fileid, hfsmp->vcbVN); hfs_mark_volume_inconsistent(hfsmp); } } @@ -1033,20 +1237,26 @@ cat_create(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attr if (out_descp != NULL) { HFSPlusCatalogKey * pluskey = NULL; - if (std_hfs) { + if (std_hfs == 0) { + pluskey = (HFSPlusCatalogKey *)&bto->iterator.key; + } +#if CONFIG_HFS_STD + else { MALLOC(pluskey, HFSPlusCatalogKey *, sizeof(HFSPlusCatalogKey), M_TEMP, M_WAITOK); promotekey(hfsmp, (HFSCatalogKey *)&bto->iterator.key, pluskey, &encoding); - - } else - pluskey = (HFSPlusCatalogKey *)&bto->iterator.key; + } +#endif - builddesc(pluskey, nextCNID, bto->iterator.hint.nodeNum, + builddesc(pluskey, new_fileid, bto->iterator.hint.nodeNum, encoding, S_ISDIR(attrp->ca_mode), out_descp); +#if CONFIG_HFS_STD if (std_hfs) { FREE(pluskey, M_TEMP); } +#endif + } - attrp->ca_fileid = nextCNID; + attrp->ca_fileid = new_fileid; exit: (void) BTFlushPath(fcb); @@ -1219,9 +1429,13 @@ cat_rename ( out_cdp->cd_encoding = encoding; } +#if CONFIG_HFS_STD if (std_hfs && !directory && - !(recp->hfsFile.flags & kHFSThreadExistsMask)) + !(recp->hfsFile.flags & kHFSThreadExistsMask)) { skipthread = 1; + } +#endif + #if 0 /* * If the keys are identical then there's nothing left to do! @@ -1239,6 +1453,7 @@ cat_rename ( result = BTInsertRecord(fcb, to_iterator, &btdata, datasize); if (result == btExists) { int fromtype = recp->recordType; + cnid_t cnid = 0; if (from_cdp->cd_parentcnid != to_cdp->cd_parentcnid) goto exit; /* EEXIST */ @@ -1248,8 +1463,16 @@ cat_rename ( if (result) goto exit; + /* Get the CNID after calling searchrecord */ + cnid = getcnid (recp); + if (cnid == 0) { + hfs_mark_volume_inconsistent(hfsmp); + result = EINVAL; + goto exit; + } + if ((fromtype != recp->recordType) || - (from_cdp->cd_cnid != getcnid(recp))) { + (from_cdp->cd_cnid != cnid)) { result = EEXIST; goto exit; /* EEXIST */ } @@ -1268,7 +1491,7 @@ cat_rename ( int err; err = BTInsertRecord(fcb, from_iterator, &btdata, datasize); if (err) { - printf("hfs: cat_create: could not undo (BTInsert = %d)", err); + printf("hfs: cat_create: could not undo (BTInsert = %d)\n", err); hfs_mark_volume_inconsistent(hfsmp); result = err; goto exit; @@ -1295,7 +1518,7 @@ cat_rename ( int err; err = BTDeleteRecord(fcb, to_iterator); if (err) { - printf("hfs: cat_create: could not undo (BTDelete = %d)", err); + printf("hfs: cat_create: could not undo (BTDelete = %d)\n", err); hfs_mark_volume_inconsistent(hfsmp); result = err; goto exit; @@ -1337,7 +1560,11 @@ cat_rename ( if (out_cdp) { HFSPlusCatalogKey * pluskey = NULL; - if (std_hfs) { + if (std_hfs == 0) { + pluskey = (HFSPlusCatalogKey *)&to_iterator->key; + } +#if CONFIG_HFS_STD + else { MALLOC(pluskey, HFSPlusCatalogKey *, sizeof(HFSPlusCatalogKey), M_TEMP, M_WAITOK); promotekey(hfsmp, (HFSCatalogKey *)&to_iterator->key, pluskey, &encoding); @@ -1348,15 +1575,17 @@ cat_rename ( realhint = hfs_pickencoding(pluskey->nodeName.unicode, pluskey->nodeName.length); vcb->vcbFndrInfo[4] = SET_HFS_TEXT_ENCODING(realhint); } - - } else - pluskey = (HFSPlusCatalogKey *)&to_iterator->key; + } +#endif builddesc(pluskey, from_cdp->cd_cnid, to_iterator->hint.nodeNum, encoding, directory, out_cdp); +#if CONFIG_HFS_STD if (std_hfs) { FREE(pluskey, M_TEMP); } +#endif + } exit: (void) BTFlushPath(fcb); @@ -1455,7 +1684,7 @@ cat_delete(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr *attr buildthreadkey(cnid, std_hfs, (CatalogKey *)&iterator->key); if (BTDeleteRecord(fcb, iterator)) { if (!std_hfs) { - printf ("hfs: cat_delete() failed to delete thread record on volume %s\n", hfsmp->vcbVN); + printf ("hfs: cat_delete() failed to delete thread record id=%u on vol=%s\n", cnid, hfsmp->vcbVN); hfs_mark_volume_inconsistent(hfsmp); } } @@ -1480,11 +1709,9 @@ cat_update_internal(struct hfsmount *hfsmp, int update_hardlink, struct cat_desc FCB * fcb; BTreeIterator * iterator; struct update_state state; - int std_hfs; int result; fcb = hfsmp->hfs_catalog_cp->c_datafork; - std_hfs = (hfsmp->hfs_flags & HFS_STANDARD); state.s_desc = descp; state.s_attr = attrp; @@ -1554,7 +1781,6 @@ catrec_update(const CatalogKey *ckp, CatalogRecord *crp, struct update_state *st struct cat_fork *forkp; struct hfsmount *hfsmp; long blksize; - int i; descp = state->s_desc; attrp = state->s_attr; @@ -1562,6 +1788,8 @@ catrec_update(const CatalogKey *ckp, CatalogRecord *crp, struct update_state *st blksize = HFSTOVCB(hfsmp)->blockSize; switch (crp->recordType) { + +#if CONFIG_HFS_STD case kHFSFolderRecord: { HFSCatalogFolder *dir; @@ -1580,7 +1808,8 @@ catrec_update(const CatalogKey *ckp, CatalogRecord *crp, struct update_state *st } case kHFSFileRecord: { HFSCatalogFile *file; - + int i; + file = (struct HFSCatalogFile *)crp; /* Do a quick sanity check */ if ((ckp->hfs.parentID != descp->cd_parentcnid) || @@ -1621,13 +1850,15 @@ catrec_update(const CatalogKey *ckp, CatalogRecord *crp, struct update_state *st file->flags &= ~kHFSFileLockedMask; break; } +#endif + case kHFSPlusFolderRecord: { HFSPlusCatalogFolder *dir; dir = (struct HFSPlusCatalogFolder *)crp; /* Do a quick sanity check */ if (dir->folderID != attrp->ca_fileid) { - printf("hfs: catrec_update: id %d != %d\n", dir->folderID, attrp->ca_fileid); + printf("hfs: catrec_update: id %d != %d, vol=%s\n", dir->folderID, attrp->ca_fileid, hfsmp->vcbVN); return (btNotFound); } dir->flags = attrp->ca_recflags; @@ -1910,12 +2141,12 @@ cat_check_link_ancestry(struct hfsmount *hfsmp, cnid_t cnid, cnid_t pointed_at_c break; } if ((result = getkey(hfsmp, cnid, (CatalogKey *)keyp))) { - printf("hfs: cat_check_link_ancestry: getkey for %u failed\n", cnid); + printf("hfs: cat_check_link_ancestry: getkey failed id=%u, vol=%s\n", cnid, hfsmp->vcbVN); invalid = 1; /* On errors, assume an invalid parent */ break; } if ((result = BTSearchRecord(fcb, ip, &btdata, NULL, NULL))) { - printf("hfs: cat_check_link_ancestry: cannot find %u\n", cnid); + printf("hfs: cat_check_link_ancestry: cannot find id=%u, vol=%s\n", cnid, hfsmp->vcbVN); invalid = 1; /* On errors, assume an invalid parent */ break; } @@ -1990,7 +2221,7 @@ cat_update_siblinglinks(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t prevli result = BTUpdateRecord(fcb, iterator, (IterateCallBackProcPtr)update_siblinglinks_callback, &state); (void) BTFlushPath(fcb); } else { - printf("hfs: cat_update_siblinglinks: couldn't resolve cnid %d\n", linkfileid); + printf("hfs: cat_update_siblinglinks: couldn't resolve cnid=%d, vol=%s\n", linkfileid, hfsmp->vcbVN); } FREE (iterator, M_TEMP); @@ -2094,6 +2325,109 @@ exit: } +/* + * cat_lookup_lastlink - find the last sibling link in the chain (no "next" ptr) + */ +int +cat_lookup_lastlink(struct hfsmount *hfsmp, cnid_t linkfileid, + cnid_t *lastlink, struct cat_desc *cdesc) +{ + FCB * fcb; + BTreeIterator * iterator; + struct FSBufferDescriptor btdata; + struct HFSPlusCatalogFile file; + int result; + int itercount = 0; + int foundlast = 0; + cnid_t currentlink = linkfileid; + + fcb = hfsmp->hfs_catalog_cp->c_datafork; + + /* Create an iterator for use by us temporarily */ + MALLOC(iterator, BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK); + + while ((foundlast == 0) && (itercount < HFS_LINK_MAX )) { + itercount++; + bzero(iterator, sizeof(*iterator)); + + if ((result = getkey(hfsmp, currentlink, (CatalogKey *)&iterator->key))) { + goto exit; + } + BDINIT(btdata, &file); + + if ((result = BTSearchRecord(fcb, iterator, &btdata, NULL, NULL))) { + goto exit; + } + + /* The prev/next chain is only valid when kHFSHasLinkChainMask is set. */ + if (file.flags & kHFSHasLinkChainMask) { + cnid_t parent; + + parent = ((HFSPlusCatalogKey *)&iterator->key)->parentID; + /* + * The raw inode for a directory hardlink doesn't have a chain. + * Its link information lives in an EA. + */ + if (parent == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) { + /* We don't iterate to find the oldest directory hardlink. */ + result = ENOLINK; + goto exit; + } + else if (parent == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) { + /* Raw inode for file hardlink (the base inode) */ + currentlink = file.hl_firstLinkID; + + /* + * One minor special-casing here is necessary. + * If our ID brought us to the raw hardlink inode, and it does + * not have any siblings, then it's an open-unlinked file, and we + * should not proceed any further. + */ + if (currentlink == 0) { + result = ENOLINK; + goto exit; + } + } + else { + /* Otherwise, this item's parent is a legitimate directory in the namespace */ + if (file.hl_nextLinkID == 0) { + /* If nextLinkID is 0, then we found the end; no more hardlinks */ + foundlast = 1; + *lastlink = currentlink; + /* + * Since we had to construct a catalog key to do this lookup + * we still hold it in-hand. We might as well use it to build + * the descriptor that the caller asked for. + */ + builddesc ((HFSPlusCatalogKey*)&iterator->key, currentlink, 0, 0, 0, cdesc); + break; + } + + currentlink = file.hl_nextLinkID; + } + } + else { + /* Sorry, can't help you without a link chain */ + result = ENOLINK; + goto exit; + } + } +exit: + /* If we didn't find what we were looking for, zero out the args */ + if (foundlast == 0) { + if (cdesc) { + bzero (cdesc, sizeof(struct cat_desc)); + } + if (lastlink) { + *lastlink = 0; + } + } + + FREE(iterator, M_TEMP); + return MacToVFSError(result); +} + + /* * cat_createlink - create a link in the catalog * @@ -2130,10 +2464,10 @@ cat_createlink(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr * */ nextCNID = hfsmp->vcbNxtCNID; if (nextCNID == 0xFFFFFFFF) { - HFS_MOUNT_LOCK(hfsmp, TRUE) + hfs_lock_mount (hfsmp); hfsmp->vcbNxtCNID = kHFSFirstUserCatalogNodeID; hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask; - HFS_MOUNT_UNLOCK(hfsmp, TRUE); + hfs_unlock_mount(hfsmp); } else { hfsmp->vcbNxtCNID++; } @@ -2241,7 +2575,7 @@ cat_createlink(struct hfsmount *hfsmp, struct cat_desc *descp, struct cat_attr * exit: if (result) { if (thread_inserted) { - printf("hfs: cat_createlink: err %d from BTInsertRecord\n", MacToVFSError(result)); + printf("hfs: cat_createlink: BTInsertRecord err=%d, vol=%s\n", MacToVFSError(result), hfsmp->vcbVN); buildthreadkey(nextCNID, 0, (CatalogKey *)&bto->iterator.key); if (BTDeleteRecord(fcb, &bto->iterator)) { @@ -2354,6 +2688,13 @@ cat_makealias(struct hfsmount *hfsmp, u_int32_t inode_num, struct HFSPlusCatalog HFS_ALLOC_FORCECONTIG | HFS_ALLOC_METAZONE, &rsrcforkp->extents[0].startBlock, &rsrcforkp->extents[0].blockCount); + /* Did it fail with an out of space error? If so, re-try and allow journal flushing. */ + if (result == dskFulErr ) { + result = BlockAllocate(hfsmp, 0, blkcount, blkcount, + HFS_ALLOC_FORCECONTIG | HFS_ALLOC_METAZONE | HFS_ALLOC_FLUSHTXN, + &rsrcforkp->extents[0].startBlock, + &rsrcforkp->extents[0].blockCount); + } if (result) { rsrcforkp->extents[0].startBlock = 0; goto exit; @@ -2510,8 +2851,10 @@ getentriesattr_callback(const CatalogKey *key, const CatalogRecord *rec, switch(rec->recordType) { case kHFSPlusFolderRecord: case kHFSPlusFileRecord: +#if CONFIG_HFS_STD case kHFSFolderRecord: case kHFSFileRecord: +#endif if (parentcnid != state->dir_cnid) { state->error = ENOENT; return (0); /* stop */ @@ -2541,8 +2884,31 @@ getentriesattr_callback(const CatalogKey *key, const CatalogRecord *rec, } cep = &list->entry[list->realentries++]; + + if (state->stdhfs == 0) { + getbsdattr(hfsmp, (const struct HFSPlusCatalogFile *)rec, &cep->ce_attr); + builddesc((const HFSPlusCatalogKey *)key, getcnid(rec), 0, getencoding(rec), + isadir(rec), &cep->ce_desc); - if (state->stdhfs) { + if (rec->recordType == kHFSPlusFileRecord) { + cep->ce_datasize = rec->hfsPlusFile.dataFork.logicalSize; + cep->ce_datablks = rec->hfsPlusFile.dataFork.totalBlocks; + cep->ce_rsrcsize = rec->hfsPlusFile.resourceFork.logicalSize; + cep->ce_rsrcblks = rec->hfsPlusFile.resourceFork.totalBlocks; + + /* Save link reference for later processing. */ + if ((SWAP_BE32(rec->hfsPlusFile.userInfo.fdType) == kHardLinkFileType) && + (SWAP_BE32(rec->hfsPlusFile.userInfo.fdCreator) == kHFSPlusCreator)) { + cep->ce_attr.ca_linkref = rec->hfsPlusFile.bsdInfo.special.iNodeNum; + } else if ((rec->hfsPlusFile.flags & kHFSHasLinkChainMask) && + (SWAP_BE32(rec->hfsPlusFile.userInfo.fdType) == kHFSAliasType) && + (SWAP_BE32(rec->hfsPlusFile.userInfo.fdCreator) == kHFSAliasCreator)) { + cep->ce_attr.ca_linkref = rec->hfsPlusFile.bsdInfo.special.iNodeNum; + } + } + } +#if CONFIG_HFS_STD + else { struct HFSPlusCatalogFile cnoderec; HFSPlusCatalogKey * pluskey; u_int32_t encoding; @@ -2563,28 +2929,8 @@ getentriesattr_callback(const CatalogKey *key, const CatalogRecord *rec, cep->ce_rsrcsize = rec->hfsFile.rsrcLogicalSize; cep->ce_rsrcblks = rec->hfsFile.rsrcPhysicalSize / blksize; } - } else { - getbsdattr(hfsmp, (const struct HFSPlusCatalogFile *)rec, &cep->ce_attr); - builddesc((const HFSPlusCatalogKey *)key, getcnid(rec), 0, getencoding(rec), - isadir(rec), &cep->ce_desc); - - if (rec->recordType == kHFSPlusFileRecord) { - cep->ce_datasize = rec->hfsPlusFile.dataFork.logicalSize; - cep->ce_datablks = rec->hfsPlusFile.dataFork.totalBlocks; - cep->ce_rsrcsize = rec->hfsPlusFile.resourceFork.logicalSize; - cep->ce_rsrcblks = rec->hfsPlusFile.resourceFork.totalBlocks; - - /* Save link reference for later processing. */ - if ((SWAP_BE32(rec->hfsPlusFile.userInfo.fdType) == kHardLinkFileType) && - (SWAP_BE32(rec->hfsPlusFile.userInfo.fdCreator) == kHFSPlusCreator)) { - cep->ce_attr.ca_linkref = rec->hfsPlusFile.bsdInfo.special.iNodeNum; - } else if ((rec->hfsPlusFile.flags & kHFSHasLinkChainMask) && - (SWAP_BE32(rec->hfsPlusFile.userInfo.fdType) == kHFSAliasType) && - (SWAP_BE32(rec->hfsPlusFile.userInfo.fdCreator) == kHFSAliasCreator)) { - cep->ce_attr.ca_linkref = rec->hfsPlusFile.bsdInfo.special.iNodeNum; - } - } } +#endif return (list->realentries < list->maxentries); } @@ -2857,7 +3203,7 @@ getdirentries_callback(const CatalogKey *ckp, const CatalogRecord *crp, * handle a longer name). In that case, NFS will ask us * to mangle the name to keep it short enough. */ - maxnamelen = NAME_MAX; + maxnamelen = NAME_MAX + 1; } else { maxnamelen = sizeof(entry->d_name); } @@ -2951,7 +3297,7 @@ getdirentries_callback(const CatalogKey *ckp, const CatalogRecord *crp, } else { encodestr: result = utf8_encodestr(cnp->ustr.unicode, namelen * sizeof(UniChar), - nameptr, &namelen, maxnamelen + 1, ':', 0); + nameptr, &namelen, maxnamelen, ':', 0); } /* Check result returned from encoding the filename to utf8 */ @@ -2970,7 +3316,7 @@ encodestr: } result = ConvertUnicodeToUTF8Mangled(cnp->ustr.length * sizeof(UniChar), - cnp->ustr.unicode, maxnamelen + 1, + cnp->ustr.unicode, maxnamelen, (ByteCount*)&namelen, nameptr, linkid); is_mangled = 1; } @@ -3100,6 +3446,7 @@ encodestr: uio_resid(state->cbs_uio) >= SMALL_DIRENTRY_SIZE); } +#if CONFIG_HFS_STD /* * getdirentries callback for standard HFS (non HFS+) directories. */ @@ -3131,7 +3478,7 @@ getdirentries_std_callback(const CatalogKey *ckp, const CatalogRecord *crp, } nameptr = (u_int8_t *)&catent.d_name[0]; - maxnamelen = NAME_MAX; + maxnamelen = sizeof(catent.d_name); switch(crp->recordType) { case kHFSFolderRecord: @@ -3147,13 +3494,13 @@ getdirentries_std_callback(const CatalogKey *ckp, const CatalogRecord *crp, }; cnp = (const CatalogName*) ckp->hfs.nodeName; - result = hfs_to_utf8(hfsmp, cnp->pstr, maxnamelen + 1, (ByteCount *)&namelen, nameptr); + result = hfs_to_utf8(hfsmp, cnp->pstr, maxnamelen, (ByteCount *)&namelen, nameptr); /* * When an HFS name cannot be encoded with the current * volume encoding we use MacRoman as a fallback. */ if (result) { - result = mac_roman_to_utf8(cnp->pstr, maxnamelen + 1, (ByteCount *)&namelen, nameptr); + result = mac_roman_to_utf8(cnp->pstr, maxnamelen, (ByteCount *)&namelen, nameptr); } catent.d_type = type; catent.d_namlen = namelen; @@ -3187,6 +3534,7 @@ getdirentries_std_callback(const CatalogKey *ckp, const CatalogRecord *crp, /* Continue iteration if there's room */ return (state->cbs_result == 0 && uio_resid(state->cbs_uio) >= SMALL_DIRENTRY_SIZE); } +#endif /* * Pack a uio buffer with directory entries from the catalog @@ -3337,10 +3685,8 @@ cat_getdirentries(struct hfsmount *hfsmp, u_int32_t entrycnt, directoryhint_t *d /* * Process as many entries as possible starting at iterator->key. */ - if (hfsmp->hfs_flags & HFS_STANDARD) - result = BTIterateRecords(fcb, op, iterator, - (IterateCallBackProcPtr)getdirentries_std_callback, &state); - else { + if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) { + /* HFS+ */ result = BTIterateRecords(fcb, op, iterator, (IterateCallBackProcPtr)getdirentries_callback, &state); @@ -3361,6 +3707,13 @@ cat_getdirentries(struct hfsmount *hfsmp, u_int32_t entrycnt, directoryhint_t *d result = getdirentries_callback(&ckp, &crp, &state); } } +#if CONFIG_HFS_STD + else { + /* HFS (standard) */ + result = BTIterateRecords(fcb, op, iterator, + (IterateCallBackProcPtr)getdirentries_std_callback, &state); + } +#endif /* Note that state.cbs_index is still valid on errors */ *items = state.cbs_index - index; @@ -3448,12 +3801,16 @@ static int cat_findposition(const CatalogKey *ckp, const CatalogRecord *crp, struct position_state *state) { - cnid_t curID; + cnid_t curID = 0; - if (state->hfsmp->hfs_flags & HFS_STANDARD) - curID = ckp->hfs.parentID; - else + if ((state->hfsmp->hfs_flags & HFS_STANDARD) == 0) { curID = ckp->hfsPlus.parentID; + } +#if CONFIG_HFS_STD + else { + curID = ckp->hfs.parentID; + } +#endif /* Make sure parent directory didn't change */ if (state->parentID != curID) { @@ -3465,8 +3822,10 @@ cat_findposition(const CatalogKey *ckp, const CatalogRecord *crp, switch(crp->recordType) { case kHFSPlusFolderRecord: case kHFSPlusFileRecord: +#if CONFIG_HFS_STD case kHFSFolderRecord: case kHFSFileRecord: +#endif ++state->count; break; default: @@ -3505,38 +3864,15 @@ cat_binarykeycompare(HFSPlusCatalogKey *searchKey, HFSPlusCatalogKey *trialKey) u_int16_t * str2 = &trialKey->nodeName.unicode[0]; int length1 = searchKey->nodeName.length; int length2 = trialKey->nodeName.length; - u_int16_t c1, c2; - int length; - - if (length1 < length2) { - length = length1; - --result; - } else if (length1 > length2) { - length = length2; - ++result; - } else { - length = length1; - } - - while (length--) { - c1 = *(str1++); - c2 = *(str2++); - - if (c1 > c2) { - result = 1; - break; - } - if (c1 < c2) { - result = -1; - break; - } - } + + result = UnicodeBinaryCompare (str1, length1, str2, length2); } return result; } +#if CONFIG_HFS_STD /* * Compare two standard HFS catalog keys * @@ -3562,6 +3898,7 @@ CompareCatalogKeys(HFSCatalogKey *searchKey, HFSCatalogKey *trialKey) return result; } +#endif /* @@ -3607,9 +3944,14 @@ static int buildkey(struct hfsmount *hfsmp, struct cat_desc *descp, HFSPlusCatalogKey *key, int retry) { + int std_hfs = (hfsmp->hfs_flags & HFS_STANDARD); int utf8_flags = UTF_ESCAPE_ILLEGAL; int result = 0; size_t unicodeBytes = 0; + + if (std_hfs == 0) { + retry = 0; + } if (descp->cd_namelen == 0 || descp->cd_nameptr[0] == '\0') return (EINVAL); /* invalid name */ @@ -3633,12 +3975,13 @@ buildkey(struct hfsmount *hfsmp, struct cat_desc *descp, return (result); } +#if CONFIG_HFS_STD /* * For HFS volumes convert to an HFS compatible key * * XXX need to save the encoding that succeeded */ - if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) { + if (std_hfs) { HFSCatalogKey hfskey; bzero(&hfskey, sizeof(hfskey)); @@ -3660,6 +4003,8 @@ buildkey(struct hfsmount *hfsmp, struct cat_desc *descp, } bcopy(&hfskey, key, sizeof(hfskey)); } +#endif + return (0); } @@ -3708,7 +4053,7 @@ cat_resolvelink(struct hfsmount *hfsmp, u_int32_t linkref, int isdirlink, struct if (recp->hl_linkCount == 0) recp->hl_linkCount = 2; } else { - printf("hfs: cat_resolvelink: can't find %s\n", inodename); + printf("hfs: cat_resolvelink: can't find inode=%s on vol=%s\n", inodename, hfsmp->vcbVN); } FREE(iterator, M_TEMP); @@ -3771,12 +4116,15 @@ getkey(struct hfsmount *hfsmp, cnid_t cnid, CatalogKey * key) /* Turn thread record into a cnode key (in place) */ switch (recp->recordType) { + +#if CONFIG_HFS_STD case kHFSFileThreadRecord: case kHFSFolderThreadRecord: keyp = (CatalogKey *)((char *)&recp->hfsThread.reserved + 6); keyp->hfs.keyLength = kHFSCatalogKeyMinimumLength + keyp->hfs.nodeName[0]; bcopy(keyp, key, keyp->hfs.keyLength + 1); break; +#endif case kHFSPlusFileThreadRecord: case kHFSPlusFolderThreadRecord: @@ -3854,27 +4202,7 @@ buildrecord(struct cat_attr *attrp, cnid_t cnid, int std_hfs, u_int32_t encoding int type = attrp->ca_mode & S_IFMT; u_int32_t createtime = to_hfs_time(attrp->ca_itime); - if (std_hfs) { - createtime = UTCToLocal(createtime); - if (type == S_IFDIR) { - bzero(crp, sizeof(HFSCatalogFolder)); - crp->recordType = kHFSFolderRecord; - crp->hfsFolder.folderID = cnid; - crp->hfsFolder.createDate = createtime; - crp->hfsFolder.modifyDate = createtime; - bcopy(attrp->ca_finderinfo, &crp->hfsFolder.userInfo, 32); - *recordSize = sizeof(HFSCatalogFolder); - } else { - bzero(crp, sizeof(HFSCatalogFile)); - crp->recordType = kHFSFileRecord; - crp->hfsFile.fileID = cnid; - crp->hfsFile.createDate = createtime; - crp->hfsFile.modifyDate = createtime; - bcopy(attrp->ca_finderinfo, &crp->hfsFile.userInfo, 16); - bcopy(&attrp->ca_finderinfo[16], &crp->hfsFile.finderInfo, 16); - *recordSize = sizeof(HFSCatalogFile); - } - } else { + if (std_hfs == 0) { struct HFSPlusBSDInfo * bsdp = NULL; if (type == S_IFDIR) { @@ -3922,6 +4250,30 @@ buildrecord(struct cat_attr *attrp, cnid_t cnid, int std_hfs, u_int32_t encoding bsdp->adminFlags = attrp->ca_flags >> 16; bsdp->ownerFlags = attrp->ca_flags & 0x000000FF; } +#if CONFIG_HFS_STD + else { + createtime = UTCToLocal(createtime); + if (type == S_IFDIR) { + bzero(crp, sizeof(HFSCatalogFolder)); + crp->recordType = kHFSFolderRecord; + crp->hfsFolder.folderID = cnid; + crp->hfsFolder.createDate = createtime; + crp->hfsFolder.modifyDate = createtime; + bcopy(attrp->ca_finderinfo, &crp->hfsFolder.userInfo, 32); + *recordSize = sizeof(HFSCatalogFolder); + } else { + bzero(crp, sizeof(HFSCatalogFile)); + crp->recordType = kHFSFileRecord; + crp->hfsFile.fileID = cnid; + crp->hfsFile.createDate = createtime; + crp->hfsFile.modifyDate = createtime; + bcopy(attrp->ca_finderinfo, &crp->hfsFile.userInfo, 16); + bcopy(&attrp->ca_finderinfo[16], &crp->hfsFile.finderInfo, 16); + *recordSize = sizeof(HFSCatalogFile); + } + } +#endif + } @@ -4091,6 +4443,7 @@ getbsdattr(struct hfsmount *hfsmp, const struct HFSPlusCatalogFile *crp, struct bcopy(&crp->userInfo, attrp->ca_finderinfo, 32); } +#if CONFIG_HFS_STD /* * promotekey - promote hfs key to hfs plus key * @@ -4205,6 +4558,7 @@ promoteattr(struct hfsmount *hfsmp, const CatalogRecord *dataPtr, struct HFSPlus crp->accessDate = crp->contentModDate; bzero(&crp->bsdInfo, sizeof(HFSPlusBSDInfo)); } +#endif /* * Build a catalog node thread record from a catalog key @@ -4215,20 +4569,7 @@ buildthread(void *keyp, void *recp, int std_hfs, int directory) { int size = 0; - if (std_hfs) { - HFSCatalogKey *key = (HFSCatalogKey *)keyp; - HFSCatalogThread *rec = (HFSCatalogThread *)recp; - - size = sizeof(HFSCatalogThread); - bzero(rec, size); - if (directory) - rec->recordType = kHFSFolderThreadRecord; - else - rec->recordType = kHFSFileThreadRecord; - rec->parentID = key->parentID; - bcopy(key->nodeName, rec->nodeName, key->nodeName[0]+1); - - } else /* HFS+ */ { + if (std_hfs == 0) { HFSPlusCatalogKey *key = (HFSPlusCatalogKey *)keyp; HFSPlusCatalogThread *rec = (HFSPlusCatalogThread *)recp; @@ -4242,11 +4583,28 @@ buildthread(void *keyp, void *recp, int std_hfs, int directory) bcopy(&key->nodeName, &rec->nodeName, sizeof(UniChar) * (key->nodeName.length + 1)); - /* HFS Plus has varaible sized thread records */ + /* HFS Plus has variable sized thread records */ size -= (sizeof(rec->nodeName.unicode) - (rec->nodeName.length * sizeof(UniChar))); + } - +#if CONFIG_HFS_STD + else { + HFSCatalogKey *key = (HFSCatalogKey *)keyp; + HFSCatalogThread *rec = (HFSCatalogThread *)recp; + + size = sizeof(HFSCatalogThread); + bzero(rec, size); + if (directory) + rec->recordType = kHFSFolderThreadRecord; + else + rec->recordType = kHFSFileThreadRecord; + rec->parentID = key->parentID; + bcopy(key->nodeName, rec->nodeName, key->nodeName[0]+1); + + } +#endif + return (size); } @@ -4256,16 +4614,20 @@ buildthread(void *keyp, void *recp, int std_hfs, int directory) static void buildthreadkey(HFSCatalogNodeID parentID, int std_hfs, CatalogKey *key) { - if (std_hfs) { + if (std_hfs == 0) { + key->hfsPlus.keyLength = kHFSPlusCatalogKeyMinimumLength; + key->hfsPlus.parentID = parentID; + key->hfsPlus.nodeName.length = 0; + } +#if CONFIG_HFS_STD + else { key->hfs.keyLength = kHFSCatalogKeyMinimumLength; key->hfs.reserved = 0; key->hfs.parentID = parentID; key->hfs.nodeName[0] = 0; - } else { - key->hfsPlus.keyLength = kHFSPlusCatalogKeyMinimumLength; - key->hfsPlus.parentID = parentID; - key->hfsPlus.nodeName.length = 0; } +#endif + } /* @@ -4295,12 +4657,16 @@ getcnid(const CatalogRecord *crp) cnid_t cnid = 0; switch (crp->recordType) { + +#if CONFIG_HFS_STD case kHFSFolderRecord: cnid = crp->hfsFolder.folderID; break; case kHFSFileRecord: cnid = crp->hfsFile.fileID; break; +#endif + case kHFSPlusFolderRecord: cnid = crp->hfsPlusFolder.folderID; break; @@ -4308,7 +4674,7 @@ getcnid(const CatalogRecord *crp) cnid = crp->hfsPlusFile.fileID; break; default: - panic("hfs: getcnid: unknown recordType (crp @ %p)\n", crp); + printf("hfs: getcnid: unknown recordType=%d\n", crp->recordType); break; } @@ -4324,10 +4690,13 @@ getparentcnid(const CatalogRecord *recp) cnid_t cnid = 0; switch (recp->recordType) { + +#if CONFIG_HFS_STD case kHFSFileThreadRecord: case kHFSFolderThreadRecord: cnid = recp->hfsThread.parentID; break; +#endif case kHFSPlusFileThreadRecord: case kHFSPlusFolderThreadRecord: @@ -4347,8 +4716,16 @@ getparentcnid(const CatalogRecord *recp) static int isadir(const CatalogRecord *crp) { - return (crp->recordType == kHFSFolderRecord || - crp->recordType == kHFSPlusFolderRecord); + if (crp->recordType == kHFSPlusFolderRecord) { + return 1; + } +#if CONFIG_HFS_STD + if (crp->recordType == kHFSFolderRecord) { + return 1; + } +#endif + + return 0; } /* @@ -4447,3 +4824,4 @@ cat_update_dirlink(struct hfsmount *hfsmp, u_int8_t forktype, return cat_update_internal(hfsmp, true, descp, attrp, forkp, NULL); } } + diff --git a/bsd/hfs/hfs_catalog.h b/bsd/hfs/hfs_catalog.h index 022fddabb..3eea08ac8 100644 --- a/bsd/hfs/hfs_catalog.h +++ b/bsd/hfs/hfs_catalog.h @@ -212,6 +212,50 @@ struct cat_entrylist { #define CE_LIST_SIZE(entries) \ sizeof (*ce_list) + (((entries) - 1) * sizeof (struct cat_entry)) +struct hfsmount; + +/* + * Catalog FileID/CNID Acquisition / Lookup + * + * Some use-cases require that we find a valid CNID + * before we may be ready to enter the item into the namespace. + * In order to resolve this, we support a hashtable attached to + * the mount that is secured by the catalog lock. + * + * Finding the next valid CNID is easy if the wraparound bit is + * not set -- you just pull from the hfsmp next pointer. + * If it is set then you must find a free entry in the catalog + * and also query the hashtable to see if the item is free or not. + * + * If you want to request a CNID before there is a backing item + * in the catalog, you must find one that is valid, then insert + * it into the hash table until such time that the item is + * inserted into the catalog. After successful catalog insertion, + * you must remove the item from the hashtable. + */ + +typedef struct cat_preflightid { + cnid_t fileid; + LIST_ENTRY(cat_preflightid) id_hash; +} cat_preflightid_t; + +extern int cat_remove_idhash (cat_preflightid_t *preflight); +extern int cat_insert_idhash (struct hfsmount *hfsmp, cat_preflightid_t *preflight); +extern int cat_check_idhash (struct hfsmount *hfsmp, cnid_t test_fileid); + +/* initialize the id look up hashtable during mount */ +extern void hfs_idhash_init (struct hfsmount *hfsmp); + +/* release the id lookup hashtable during unmount */ +extern void hfs_idhash_destroy (struct hfsmount *hfsmp); + +/* Get a new CNID for use */ +extern int cat_acquire_cnid (struct hfsmount *hfsmp, cnid_t *new_cnid); + + +/* default size of ID hash is 64 entries */ +#define HFS_IDHASH_DEFAULT 64 + /* * Catalog Operations Hint @@ -272,11 +316,11 @@ enum { * (please don't go around it) */ -struct hfsmount; extern void cat_releasedesc(struct cat_desc *descp); extern int cat_create ( struct hfsmount *hfsmp, + cnid_t new_fileid, struct cat_desc *descp, struct cat_attr *attrp, struct cat_desc *out_descp); @@ -288,6 +332,7 @@ extern int cat_delete ( struct hfsmount *hfsmp, extern int cat_lookup ( struct hfsmount *hfsmp, struct cat_desc *descp, int wantrsrc, + int force_casesensitive_lookup, struct cat_desc *outdescp, struct cat_attr *attrp, struct cat_fork *forkp, @@ -426,6 +471,11 @@ extern int cat_lookup_siblinglinks( struct hfsmount *hfsmp, cnid_t *prevlinkid, cnid_t *nextlinkid); +extern int cat_lookup_lastlink( struct hfsmount *hfsmp, + cnid_t startid, + cnid_t *nextlinkid, + struct cat_desc *cdesc); + extern int cat_lookup_dirlink(struct hfsmount *hfsmp, cnid_t dirlink_id, u_int8_t forktype, diff --git a/bsd/hfs/hfs_chash.c b/bsd/hfs/hfs_chash.c index 2910c5612..b162a53b2 100644 --- a/bsd/hfs/hfs_chash.c +++ b/bsd/hfs/hfs_chash.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2008 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2002-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -108,11 +108,7 @@ static void hfs_chash_lock_spin(struct hfsmount *hfsmp) lck_mtx_lock_spin(&hfsmp->hfs_chash_mutex); } -#ifdef i386 -static void hfs_chash_lock_convert (struct hfsmount *hfsmp) -#else static void hfs_chash_lock_convert (__unused struct hfsmount *hfsmp) -#endif { lck_mtx_convert_spin(&hfsmp->hfs_chash_mutex); } @@ -188,7 +184,7 @@ loop: */ return (NULL); } - if (!skiplock && hfs_lock(cp, HFS_EXCLUSIVE_LOCK) != 0) { + if (!skiplock && hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) { vnode_put(vp); return (NULL); } @@ -352,7 +348,7 @@ loop_with_lock: } if (!skiplock) { - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); } /* @@ -423,7 +419,7 @@ loop_with_lock: lck_rw_init(&ncp->c_rwlock, hfs_rwlock_group, hfs_lock_attr); if (!skiplock) - (void) hfs_lock(ncp, HFS_EXCLUSIVE_LOCK); + (void) hfs_lock(ncp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); /* Insert the new cnode with it's H_ALLOC flag set */ LIST_INSERT_HEAD(CNODEHASH(hfsmp, inum), ncp, c_hash); diff --git a/bsd/hfs/hfs_cnode.c b/bsd/hfs/hfs_cnode.c index 65f2825d0..439e6b270 100644 --- a/bsd/hfs/hfs_cnode.c +++ b/bsd/hfs/hfs_cnode.c @@ -62,6 +62,7 @@ static int hfs_isordered(struct cnode *, struct cnode *); extern int hfs_removefile_callback(struct buf *bp, void *hfsmp); + __inline__ int hfs_checkdeleted (struct cnode *cp) { return ((cp->c_flag & (C_DELETED | C_NOEXISTS)) ? ENOENT : 0); } @@ -84,7 +85,7 @@ int hfs_set_backingstore (struct vnode *vp, int val) { } /* lock the cnode */ - err = hfs_lock (cp, HFS_EXCLUSIVE_LOCK); + err = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); if (err) { return err; } @@ -123,7 +124,7 @@ int hfs_is_backingstore (struct vnode *vp, int *val) { cp = VTOC(vp); /* lock the cnode */ - err = hfs_lock (cp, HFS_SHARED_LOCK); + err = hfs_lock (cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); if (err) { return err; } @@ -224,7 +225,7 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) { hfs_unlock(cp); /* ubc_setsize just fails if we were to call this from VNOP_RECLAIM */ ubc_setsize(vp, 0); - (void) hfs_lock(cp, HFS_FORCE_LOCK); + (void) hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); } } @@ -256,7 +257,7 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) { hfs_filedone(vp, ctx); } - /* + /* * We're holding the cnode lock now. Stall behind any shadow BPs that may * be involved with this vnode if it is a symlink. We don't want to allow * the blocks that we're about to release to be put back into the pool if there @@ -309,9 +310,9 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) { */ if ((v_type == VREG || v_type == VLNK) && - (cp->c_flag & C_DELETED) && - ((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) { - + (cp->c_flag & C_DELETED) && + ((forkcount == 1) || (!VNODE_IS_RSRC(vp)))) { + /* Truncate away our own fork data. (Case A, B, C above) */ if (VTOF(vp)->ff_blocks != 0) { @@ -337,7 +338,7 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) { } } - /* + /* * At this point, we have decided that this cnode is * suitable for full removal. We are about to deallocate * its blocks and remove its entry from the catalog. @@ -359,7 +360,8 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) { if (hfsmp->jnl && vnode_islnk(vp)) { buf_iterate(vp, hfs_removefile_callback, BUF_SKIP_NONLOCKED, (void *)hfsmp); } - + + /* * This truncate call (and the one below) is fine from VNOP_RECLAIM's * context because we're only removing blocks, not zero-filling new @@ -376,6 +378,7 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) { hfs_end_transaction(hfsmp); started_tr = 0; } + } /* @@ -421,7 +424,7 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) { lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - error = cat_lookup (hfsmp, desc_ptr, 1, (struct cat_desc *) NULL, + error = cat_lookup (hfsmp, desc_ptr, 1, 0, (struct cat_desc *) NULL, (struct cat_attr*) NULL, &lookup_rsrc->lookup_fork.ff_data, NULL); hfs_systemfile_unlock (hfsmp, lockflags); @@ -536,8 +539,9 @@ int hfs_cnode_teardown (struct vnode *vp, vfs_context_t ctx, int reclaim) { */ error = cat_delete(hfsmp, &cp->c_desc, &cp->c_attr); - if (error && truncated && (error != ENXIO)) + if (error && truncated && (error != ENXIO)) { printf("hfs_inactive: couldn't delete a truncated file!"); + } /* Update HFS Private Data dir */ if (error == 0) { @@ -701,11 +705,11 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap) } if ((v_type == VREG || v_type == VLNK)) { - hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK); + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); took_trunc_lock = 1; } - (void) hfs_lock(cp, HFS_FORCE_LOCK); + (void) hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); /* * Call cnode_teardown to push out dirty blocks to disk, release open-unlinked @@ -720,7 +724,7 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap) * truncate lock) */ if (took_trunc_lock) { - hfs_unlock_truncate(cp, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); } hfs_unlock(cp); @@ -787,7 +791,7 @@ hfs_filedone(struct vnode *vp, vfs_context_t context) hfs_unlock(cp); (void) cluster_push(vp, cluster_flags); - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); /* * Explicitly zero out the areas of file @@ -806,7 +810,7 @@ hfs_filedone(struct vnode *vp, vfs_context_t context) hfs_unlock(cp); (void) cluster_write(vp, (struct uio *) 0, leof, end + 1, start, (off_t)0, cluster_zero_flags); - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); cp->c_flag |= C_MODIFIED; } cp->c_flag &= ~C_ZFWANTSYNC; @@ -824,7 +828,7 @@ hfs_filedone(struct vnode *vp, vfs_context_t context) hfs_unlock(cp); (void) cluster_push(vp, cluster_flags); - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); /* * If the hfs_truncate didn't happen to flush the vnode's @@ -862,7 +866,7 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap) * all dirty pages have been synced and nobody should be competing * with us for this thread. */ - (void) hfs_lock (cp, HFS_FORCE_LOCK); + (void) hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); /* * Sync to disk any remaining data in the cnode/vnode. This includes @@ -936,6 +940,7 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap) */ if (reclaim_cnode) { hfs_chashwakeup(hfsmp, cp, H_ALLOC | H_TRANSIT); + hfs_unlock(cp); hfs_reclaim_cnode(cp); } else { @@ -952,12 +957,15 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap) extern int (**hfs_vnodeop_p) (void *); -extern int (**hfs_std_vnodeop_p) (void *); extern int (**hfs_specop_p) (void *); #if FIFO extern int (**hfs_fifoop_p) (void *); #endif +#if CONFIG_HFS_STD +extern int (**hfs_std_vnodeop_p) (void *); +#endif + /* * hfs_getnewvnode - get new default vnode * @@ -1013,7 +1021,7 @@ hfs_getnewvnode( if (vtype == VBAD) { /* Mark the FS as corrupt and bail out */ hfs_mark_volume_inconsistent(hfsmp); - return (EINVAL); + return EINVAL; } /* Zero out the out_flags */ @@ -1111,6 +1119,20 @@ hfs_getnewvnode( * item. We are not susceptible to the lookup fastpath issue at this point. */ replace_desc(cp, descp); + + /* + * This item was a hardlink, and its name needed to be updated. By replacing the + * descriptor above, we've now updated the cnode's internal representation of + * its link ID/CNID, parent ID, and its name. However, VFS must now be alerted + * to the fact that this vnode now has a new parent, since we cannot guarantee + * that the new link lived in the same directory as the alternative name for + * this item. + */ + if ((*vpp != NULL) && (cnp)) { + /* we could be requesting the rsrc of a hardlink file... */ + vnode_update_identity (*vpp, dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, + (VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME)); + } } } } @@ -1134,6 +1156,9 @@ hfs_getnewvnode( int error = 0; if (!hfs_valid_cnode (hfsmp, dvp, (wantrsrc ? NULL : cnp), cp->c_fileid, attrp, &error)) { hfs_chash_abort(hfsmp, cp); + if ((flags & GNV_SKIPLOCK) == 0) { + hfs_unlock(cp); + } hfs_reclaim_cnode(cp); *vpp = NULL; /* @@ -1304,8 +1329,10 @@ hfs_getnewvnode( #endif if (vtype == VBLK || vtype == VCHR) vfsp.vnfs_vops = hfs_specop_p; +#if CONFIG_HFS_STD else if (hfs_standard) vfsp.vnfs_vops = hfs_std_vnodeop_p; +#endif else vfsp.vnfs_vops = hfs_vnodeop_p; @@ -1431,8 +1458,7 @@ hfs_reclaim_cnode(struct cnode *cp) * attempting to reclaim a cnode with only one live fork. Because the vnode * went through reclaim, any future attempts to use this item will have to * go through lookup again, which will need to create a new vnode. Thus, - * destroying the locks below (while they were still held during our parent - * function hfs_vnop_reclaim) is safe. + * destroying the locks below is safe. */ lck_rw_destroy(&cp->c_rwlock, hfs_rwlock_group); @@ -1444,7 +1470,8 @@ hfs_reclaim_cnode(struct cnode *cp) } #endif #if CONFIG_PROTECT - cp_entry_destroy(&cp->c_cpentry); + cp_entry_destroy(cp->c_cpentry); + cp->c_cpentry = NULL; #endif @@ -1505,7 +1532,7 @@ hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname * summing the number of blocks in the resident extents. */ - lookup = cat_lookup (hfsmp, &cndesc, 0, NULL, &attr, &fork, NULL); + lookup = cat_lookup (hfsmp, &cndesc, 0, 0, NULL, &attr, &fork, NULL); if ((lookup == 0) && (cnid == attr.ca_fileid)) { stillvalid = 1; @@ -1645,6 +1672,110 @@ u_int32_t hfs_get_dateadded (struct cnode *cp) { return dateadded; } +/* + * Per HI and Finder requirements, HFS maintains a "write/generation count" + * for each file that is incremented on any write & pageout. It should start + * at 1 to reserve "0" as a special value. If it should ever wrap around, + * it will skip using 0. + * + * Note that this field is also set explicitly in the hfs_vnop_setxattr code. + * We must ignore user attempts to set this part of the finderinfo, and + * so we need to save a local copy of the date added, write in the user + * finderinfo, then stuff the value back in. + */ +void hfs_write_gencount (struct cat_attr *attrp, uint32_t gencount) { + u_int8_t *finfo = NULL; + + /* overlay the FinderInfo to the correct pointer, and advance */ + finfo = (u_int8_t*)attrp->ca_finderinfo; + finfo = finfo + 16; + + /* + * Make sure to write it out as big endian, since that's how + * finder info is defined. + * + * Generation count is only supported for files. + */ + if (S_ISREG(attrp->ca_mode)) { + struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo; + extinfo->write_gen_counter = OSSwapHostToBigInt32(gencount); + } + + /* If it were neither directory/file, then we'd bail out */ + return; +} + +/* Increase the gen count by 1; if it wraps around to 0, increment by two */ +uint32_t hfs_incr_gencount (struct cnode *cp) { + u_int8_t *finfo = NULL; + u_int32_t gcount = 0; + + /* overlay the FinderInfo to the correct pointer, and advance */ + finfo = (u_int8_t*)cp->c_finderinfo; + finfo = finfo + 16; + + /* + * FinderInfo is written out in big endian... make sure to convert it to host + * native before we use it. + */ + if (S_ISREG(cp->c_attr.ca_mode)) { + struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo; + gcount = OSSwapBigToHostInt32 (extinfo->write_gen_counter); + + /* Was it zero to begin with (file originated in 10.8 or earlier?) */ + if (gcount == 0) { + gcount++; + } + + /* now bump it */ + gcount++; + + /* Did it wrap around ? */ + if (gcount == 0) { + gcount++; + } + extinfo->write_gen_counter = OSSwapHostToBigInt32 (gcount); + } + else { + gcount = 0; + } + + return gcount; +} + +/* Getter for the gen count */ +u_int32_t hfs_get_gencount (struct cnode *cp) { + u_int8_t *finfo = NULL; + u_int32_t gcount = 0; + + /* overlay the FinderInfo to the correct pointer, and advance */ + finfo = (u_int8_t*)cp->c_finderinfo; + finfo = finfo + 16; + + /* + * FinderInfo is written out in big endian... make sure to convert it to host + * native before we use it. + */ + if (S_ISREG(cp->c_attr.ca_mode)) { + struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo; + gcount = OSSwapBigToHostInt32 (extinfo->write_gen_counter); + + /* + * Is it zero? File might originate in 10.8 or earlier. We lie and bump it to 1, + * since the incrementer code is able to handle this case and will double-increment + * for us. + */ + if (gcount == 0) { + gcount++; + } + } + else { + gcount = 0; + } + + return gcount; +} + /* * Touch cnode times based on c_touch_xxx flags * @@ -1663,10 +1794,12 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp) cp->c_touch_modtime = FALSE; return; } +#if CONFIG_HFS_STD else if (hfsmp->hfs_flags & HFS_STANDARD) { /* HFS Standard doesn't support access times */ cp->c_touch_acctime = FALSE; } +#endif ctx = vfs_context_current(); /* @@ -1710,7 +1843,7 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp) cp->c_touch_modtime = FALSE; cp->c_flag |= C_MODIFIED; touchvol = 1; -#if 1 +#if CONFIG_HFS_STD /* * HFS dates that WE set must be adjusted for DST */ @@ -1746,14 +1879,12 @@ hfs_touchtimes(struct hfsmount *hfsmp, struct cnode* cp) * Lock a cnode. */ int -hfs_lock(struct cnode *cp, enum hfslocktype locktype) +hfs_lock(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags) { void * thread = current_thread(); if (cp->c_lockowner == thread) { - /* - * Only the extents and bitmap file's support lock recursion. - */ + /* Only the extents and bitmap files support lock recursion. */ if ((cp->c_fileid == kHFSExtentsFileID) || (cp->c_fileid == kHFSAllocationFileID)) { cp->c_syslockcount++; @@ -1764,13 +1895,11 @@ hfs_lock(struct cnode *cp, enum hfslocktype locktype) lck_rw_lock_shared(&cp->c_rwlock); cp->c_lockowner = HFS_SHARED_OWNER; - } else /* HFS_EXCLUSIVE_LOCK */ { + } else { /* HFS_EXCLUSIVE_LOCK */ lck_rw_lock_exclusive(&cp->c_rwlock); cp->c_lockowner = thread; - /* - * Only the extents and bitmap file's support lock recursion. - */ + /* Only the extents and bitmap files support lock recursion. */ if ((cp->c_fileid == kHFSExtentsFileID) || (cp->c_fileid == kHFSAllocationFileID)) { cp->c_syslockcount = 1; @@ -1813,9 +1942,10 @@ hfs_lock(struct cnode *cp, enum hfslocktype locktype) #endif /* HFS_CHECK_LOCK_ORDER */ /* - * Skip cnodes that no longer exist (were deleted). + * Skip cnodes for regular files that no longer exist + * (marked deleted, catalog entry gone). */ - if ((locktype != HFS_FORCE_LOCK) && + if (((flags & HFS_LOCK_ALLOW_NOEXISTS) == 0) && ((cp->c_desc.cd_flags & CD_ISMETA) == 0) && (cp->c_flag & C_NOEXISTS)) { hfs_unlock(cp); @@ -1828,7 +1958,7 @@ hfs_lock(struct cnode *cp, enum hfslocktype locktype) * Lock a pair of cnodes. */ int -hfs_lockpair(struct cnode *cp1, struct cnode *cp2, enum hfslocktype locktype) +hfs_lockpair(struct cnode *cp1, struct cnode *cp2, enum hfs_locktype locktype) { struct cnode *first, *last; int error; @@ -1837,7 +1967,7 @@ hfs_lockpair(struct cnode *cp1, struct cnode *cp2, enum hfslocktype locktype) * If cnodes match then just lock one. */ if (cp1 == cp2) { - return hfs_lock(cp1, locktype); + return hfs_lock(cp1, locktype, HFS_LOCK_DEFAULT); } /* @@ -1851,10 +1981,10 @@ hfs_lockpair(struct cnode *cp1, struct cnode *cp2, enum hfslocktype locktype) last = cp1; } - if ( (error = hfs_lock(first, locktype))) { + if ( (error = hfs_lock(first, locktype, HFS_LOCK_DEFAULT))) { return (error); } - if ( (error = hfs_lock(last, locktype))) { + if ( (error = hfs_lock(last, locktype, HFS_LOCK_DEFAULT))) { hfs_unlock(first); return (error); } @@ -1888,7 +2018,7 @@ hfs_isordered(struct cnode *cp1, struct cnode *cp2) */ int hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, - struct cnode *cp4, enum hfslocktype locktype, struct cnode **error_cnode) + struct cnode *cp4, enum hfs_locktype locktype, struct cnode **error_cnode) { struct cnode * a[3]; struct cnode * b[3]; @@ -1928,7 +2058,7 @@ hfs_lockfour(struct cnode *cp1, struct cnode *cp2, struct cnode *cp3, */ for (i = 0; i < k; ++i) { if (list[i]) - if ((error = hfs_lock(list[i], locktype))) { + if ((error = hfs_lock(list[i], locktype, HFS_LOCK_DEFAULT))) { /* Only stuff error_cnode if requested */ if (error_cnode) { *error_cnode = list[i]; @@ -2062,13 +2192,14 @@ skip2: * hfs_lock. */ void -hfs_lock_truncate(struct cnode *cp, enum hfslocktype locktype) +hfs_lock_truncate(struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags) { void * thread = current_thread(); if (cp->c_truncatelockowner == thread) { /* - * Only HFS_RECURSE_TRUNCLOCK is allowed to recurse. + * Ignore grabbing the lock if it the current thread already + * holds exclusive lock. * * This is needed on the hfs_vnop_pagein path where we need to ensure * the file does not change sizes while we are paging in. However, @@ -2077,16 +2208,13 @@ hfs_lock_truncate(struct cnode *cp, enum hfslocktype locktype) * the truncate lock exclusive, allow it to proceed, but ONLY if * it's in the recursive case. */ - if (locktype != HFS_RECURSE_TRUNCLOCK) { + if ((flags & HFS_LOCK_SKIP_IF_EXCLUSIVE) == 0) { panic("hfs_lock_truncate: cnode %p locked!", cp); } - } - /* HFS_RECURSE_TRUNCLOCK takes a shared lock if it is not already locked */ - else if ((locktype == HFS_SHARED_LOCK) || (locktype == HFS_RECURSE_TRUNCLOCK)) { + } else if (locktype == HFS_SHARED_LOCK) { lck_rw_lock_shared(&cp->c_truncatelock); cp->c_truncatelockowner = HFS_SHARED_OWNER; - } - else { /* must be an HFS_EXCLUSIVE_LOCK */ + } else { /* HFS_EXCLUSIVE_LOCK */ lck_rw_lock_exclusive(&cp->c_truncatelock); cp->c_truncatelockowner = thread; } @@ -2099,13 +2227,15 @@ hfs_lock_truncate(struct cnode *cp, enum hfslocktype locktype) * case. To prevent deadlocks while a VM copy object is moving pages, HFS vnop pagein will * temporarily need to disable V2 semantics. */ -int hfs_try_trunclock (struct cnode *cp, enum hfslocktype locktype) { +int hfs_try_trunclock (struct cnode *cp, enum hfs_locktype locktype, enum hfs_lockflags flags) +{ void * thread = current_thread(); boolean_t didlock = false; if (cp->c_truncatelockowner == thread) { /* - * Only HFS_RECURSE_TRUNCLOCK is allowed to recurse. + * Ignore grabbing the lock if the current thread already + * holds exclusive lock. * * This is needed on the hfs_vnop_pagein path where we need to ensure * the file does not change sizes while we are paging in. However, @@ -2114,18 +2244,15 @@ int hfs_try_trunclock (struct cnode *cp, enum hfslocktype locktype) { * the truncate lock exclusive, allow it to proceed, but ONLY if * it's in the recursive case. */ - if (locktype != HFS_RECURSE_TRUNCLOCK) { + if ((flags & HFS_LOCK_SKIP_IF_EXCLUSIVE) == 0) { panic("hfs_lock_truncate: cnode %p locked!", cp); } - } - /* HFS_RECURSE_TRUNCLOCK takes a shared lock if it is not already locked */ - else if ((locktype == HFS_SHARED_LOCK) || (locktype == HFS_RECURSE_TRUNCLOCK)) { + } else if (locktype == HFS_SHARED_LOCK) { didlock = lck_rw_try_lock(&cp->c_truncatelock, LCK_RW_TYPE_SHARED); if (didlock) { cp->c_truncatelockowner = HFS_SHARED_OWNER; } - } - else { /* must be an HFS_EXCLUSIVE_LOCK */ + } else { /* HFS_EXCLUSIVE_LOCK */ didlock = lck_rw_try_lock (&cp->c_truncatelock, LCK_RW_TYPE_EXCLUSIVE); if (didlock) { cp->c_truncatelockowner = thread; @@ -2139,26 +2266,30 @@ int hfs_try_trunclock (struct cnode *cp, enum hfslocktype locktype) { /* * Unlock the truncate lock, which protects against size changes. * - * The been_recursed argument is used when we may need to return - * from this function without actually unlocking the truncate lock. + * If HFS_LOCK_SKIP_IF_EXCLUSIVE flag was set, it means that a previous + * hfs_lock_truncate() might have skipped grabbing a lock because + * the current thread was already holding the lock exclusive and + * we may need to return from this function without actually unlocking + * the truncate lock. */ void -hfs_unlock_truncate(struct cnode *cp, int been_recursed) +hfs_unlock_truncate(struct cnode *cp, enum hfs_lockflags flags) { void *thread = current_thread(); /* - * If been_recursed is nonzero AND the current lock owner of the - * truncate lock is our current thread, then we must have recursively - * taken the lock earlier on. If the lock were unlocked, - * HFS_RECURSE_TRUNCLOCK took a shared lock and it would fall through - * to the SHARED case below. + * If HFS_LOCK_SKIP_IF_EXCLUSIVE is set in the flags AND the current + * lock owner of the truncate lock is our current thread, then + * we must have skipped taking the lock earlier by in + * hfs_lock_truncate() by setting HFS_LOCK_SKIP_IF_EXCLUSIVE in the + * flags (as the current thread was current lock owner). * - * If been_recursed is zero (most of the time) then we check the - * lockowner field to infer whether the lock was taken exclusively or - * shared in order to know what underlying lock routine to call. + * If HFS_LOCK_SKIP_IF_EXCLUSIVE is not set (most of the time) then + * we check the lockowner field to infer whether the lock was taken + * exclusively or shared in order to know what underlying lock + * routine to call. */ - if (been_recursed) { + if (flags & HFS_LOCK_SKIP_IF_EXCLUSIVE) { if (cp->c_truncatelockowner == thread) { return; } @@ -2168,9 +2299,7 @@ hfs_unlock_truncate(struct cnode *cp, int been_recursed) if (thread == cp->c_truncatelockowner) { cp->c_truncatelockowner = NULL; lck_rw_unlock_exclusive(&cp->c_truncatelock); - } - /* HFS_LOCK_SHARED */ - else { + } else { /* HFS_LOCK_SHARED */ lck_rw_unlock_shared(&cp->c_truncatelock); } } diff --git a/bsd/hfs/hfs_cnode.h b/bsd/hfs/hfs_cnode.h index 082fbb858..824aa2938 100644 --- a/bsd/hfs/hfs_cnode.h +++ b/bsd/hfs/hfs_cnode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2008 Apple Inc. All rights reserved. + * Copyright (c) 2002-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -233,6 +233,12 @@ typedef struct cnode cnode_t; #define C_BACKINGSTORE 0x40000 /* cnode is a backing store for an existing or currently-mounting filesystem */ #define C_SWAPINPROGRESS 0x80000 /* cnode's data is about to be swapped. Issue synchronous cluster io */ +/* + * For C_SSD_GREEDY_MODE: SSDs may want to write the file payload data using the greedy mode knowing + * that the content needs to be written out to the disk quicker than normal at the expense of storage efficiency. + * This is purely advisory at the HFS level, and is not maintained after the cnode goes out of core. + */ +#define C_SSD_GREEDY_MODE 0x100000 /* Assume future writes are recommended to be written in SLC mode */ #define ZFTIMELIMIT (5 * 60) @@ -335,6 +341,11 @@ extern void hfs_touchtimes(struct hfsmount *, struct cnode *); extern void hfs_write_dateadded (struct cat_attr *cattrp, u_int32_t dateadded); extern u_int32_t hfs_get_dateadded (struct cnode *cp); +/* Gen counter methods */ +extern void hfs_write_gencount(struct cat_attr *cattrp, uint32_t gencount); +extern uint32_t hfs_get_gencount(struct cnode *cp); +extern uint32_t hfs_incr_gencount (struct cnode *cp); + /* Zero-fill file and push regions out to disk */ extern int hfs_filedone(struct vnode *vp, vfs_context_t context); @@ -367,6 +378,9 @@ extern int hfs_chash_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid); * HFS Locking Order: * * 1. cnode truncate lock (if needed) + * hfs_vnop_pagein/out can skip grabbing of this lock by flag option by + * HFS_LOCK_SKIP_IF_EXCLUSIVE if the truncate lock is already held exclusive + * by current thread from an earlier vnop. * 2. cnode lock (in parent-child order if related, otherwise by address order) * 3. journal (if needed) * 4. system files (as needed) @@ -380,22 +394,33 @@ extern int hfs_chash_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid); * * I. HFS cnode hash lock (must not acquire any new locks while holding this lock, always taken last) */ -enum hfslocktype {HFS_SHARED_LOCK = 1, HFS_EXCLUSIVE_LOCK = 2, HFS_FORCE_LOCK = 3, HFS_RECURSE_TRUNCLOCK = 4}; + + +enum hfs_locktype { + HFS_SHARED_LOCK = 1, + HFS_EXCLUSIVE_LOCK = 2 +}; + +/* Option flags for cnode and truncate lock functions */ +enum hfs_lockflags { + HFS_LOCK_DEFAULT = 0x0, /* Default flag, no options provided */ + HFS_LOCK_ALLOW_NOEXISTS = 0x1, /* Allow locking of all cnodes, including cnode marked deleted with no catalog entry */ + HFS_LOCK_SKIP_IF_EXCLUSIVE = 0x2 /* Skip locking if the current thread already holds the lock exclusive */ +}; #define HFS_SHARED_OWNER (void *)0xffffffff -int hfs_lock(struct cnode *, enum hfslocktype); -int hfs_lockpair(struct cnode *, struct cnode *, enum hfslocktype); +int hfs_lock(struct cnode *, enum hfs_locktype, enum hfs_lockflags); +int hfs_lockpair(struct cnode *, struct cnode *, enum hfs_locktype); int hfs_lockfour(struct cnode *, struct cnode *, struct cnode *, struct cnode *, - enum hfslocktype, struct cnode **); + enum hfs_locktype, struct cnode **); void hfs_unlock(struct cnode *); void hfs_unlockpair(struct cnode *, struct cnode *); void hfs_unlockfour(struct cnode *, struct cnode *, struct cnode *, struct cnode *); -void hfs_lock_truncate(struct cnode *, enum hfslocktype); -void hfs_unlock_truncate(struct cnode *, int been_recursed); - -int hfs_try_trunclock(struct cnode *, enum hfslocktype); +void hfs_lock_truncate(struct cnode *, enum hfs_locktype, enum hfs_lockflags); +void hfs_unlock_truncate(struct cnode *, enum hfs_lockflags); +int hfs_try_trunclock(struct cnode *, enum hfs_locktype, enum hfs_lockflags); #endif /* __APPLE_API_PRIVATE */ #endif /* KERNEL */ diff --git a/bsd/hfs/hfs_cprotect.c b/bsd/hfs/hfs_cprotect.c index 4a88d0c52..742b095eb 100644 --- a/bsd/hfs/hfs_cprotect.c +++ b/bsd/hfs/hfs_cprotect.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ #include @@ -36,33 +36,35 @@ #include #include #include +#include +#include #include "hfs.h" #include "hfs_cnode.h" #if CONFIG_PROTECT -static struct cp_wrap_func g_cp_wrap_func = {NULL, NULL}; -static struct cp_global_state g_cp_state = {0, 0, 0}; +static struct cp_wrap_func g_cp_wrap_func = {}; +static struct cp_global_state g_cp_state = {0, 0, 0}; extern int (**hfs_vnodeop_p) (void *); /* * CP private functions */ -static int cp_is_valid_class(int); static int cp_root_major_vers(mount_t mp); static int cp_getxattr(cnode_t *, struct hfsmount *hfsmp, struct cprotect **); static struct cprotect *cp_entry_alloc(size_t); static void cp_entry_dealloc(struct cprotect *entry); -static int cp_setup_aes_ctx(struct cprotect *); -static int cp_make_keys (struct cprotect **, struct hfsmount *hfsmp, cnid_t, int); -static int cp_restore_keys(struct cprotect *, struct hfsmount *hfsmp); +static int cp_restore_keys(struct cprotect *, struct hfsmount *hfsmp, struct cnode *); static int cp_lock_vfs_callback(mount_t, void *); static int cp_lock_vnode_callback(vnode_t, void *); static int cp_vnode_is_eligible (vnode_t); static int cp_check_access (cnode_t *, int); -static int cp_wrap(int, struct hfsmount *hfsmp, cnid_t, struct cprotect**); -static int cp_unwrap(int, struct cprotect *); +static int cp_new(int newclass, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode, struct cprotect **output_entry); +static int cp_rewrap(struct cnode *cp, struct hfsmount *hfsmp, int newclass); +static int cp_unwrap(struct hfsmount *, struct cprotect *, struct cnode *); +static int cp_setup_aes_ctx(struct cprotect *entry); +static void cp_init_access(cp_cred_t access, struct cnode *cp); @@ -75,232 +77,273 @@ static int cp_unwrap(int, struct cprotect *); #define CP_ASSERT(x) #endif -int +int cp_key_store_action(int action) { - g_cp_state.lock_state = action; + + if (action < 0 || action > CP_MAX_STATE) { + return -1; + } + + /* this truncates the upper 3 bytes */ + g_cp_state.lock_state = (uint8_t)action; + if (action == CP_LOCKED_STATE) { /* - * Note that because we are using the void* arg to pass the key store - * value into the vfs cp iteration, we need to pass around the int as an ptr. - * This may silence 32-64 truncation warnings. + * Upcast the value in 'action' to be a pointer-width unsigned integer. + * This avoids issues relating to pointer-width. */ - return vfs_iterate(0, cp_lock_vfs_callback, (void*)((uintptr_t)action)); + unsigned long action_arg = (unsigned long) action; + return vfs_iterate(0, cp_lock_vfs_callback, (void*)action_arg); } - + + /* Do nothing on unlock events */ return 0; - + } -int +int cp_register_wraps(cp_wrap_func_t key_store_func) { - g_cp_wrap_func.wrapper = key_store_func->wrapper; + g_cp_wrap_func.new_key = key_store_func->new_key; g_cp_wrap_func.unwrapper = key_store_func->unwrapper; - + g_cp_wrap_func.rewrapper = key_store_func->rewrapper; + /* do not use invalidater until rdar://12170050 goes in ! */ + g_cp_wrap_func.invalidater = key_store_func->invalidater; + g_cp_state.wrap_functions_set = 1; - - return 0; -} -#if 0 -/* - * If necessary, this function can be used to - * query the device's lock state. - */ -int -cp_isdevice_locked (void) { - if (g_cp_state.lock_state == CP_UNLOCKED_STATE) { - return 0; - } - return 1; + return 0; } -#endif /* * Allocate and initialize a cprotect blob for a new cnode. * Called from hfs_getnewvnode: cnode is locked exclusive. + * * Read xattr data off the cnode. Then, if conditions permit, * unwrap the file key and cache it in the cprotect blob. */ -int +int cp_entry_init(struct cnode *cp, struct mount *mp) { struct cprotect *entry = NULL; int error = 0; struct hfsmount *hfsmp = VFSTOHFS(mp); + /* + * The cnode should be locked at this point, regardless of whether or not + * we are creating a new item in the namespace or vending a vnode on behalf + * of lookup. The only time we tell getnewvnode to skip the lock is when + * constructing a resource fork vnode. But a resource fork vnode must come + * after the regular data fork cnode has already been constructed. + */ if (!cp_fs_protected (mp)) { cp->c_cpentry = NULL; return 0; } - + if (!S_ISREG(cp->c_mode) && !S_ISDIR(cp->c_mode)) { cp->c_cpentry = NULL; return 0; } - + if (!g_cp_state.wrap_functions_set) { printf("hfs: cp_update_entry: wrap functions not yet set\n"); return ENXIO; } - + if (hfsmp->hfs_running_cp_major_vers == 0) { - cp_root_major_vers(mp); + panic ("hfs cp: no running mount point version! "); } - + CP_ASSERT (cp->c_cpentry == NULL); error = cp_getxattr(cp, hfsmp, &entry); - - /* - * Normally, we should always have a CP EA for a file or directory that - * we are initializing here. However, there are some extenuating circumstances, - * such as the root directory immediately following a newfs_hfs. - * - * As a result, we leave code here to deal with an ENOATTR which will always - * default to a 'D' key, though we don't expect to use it much. - */ - if (error == ENOATTR) { - int sub_error; + if (error == 0) { + /* + * Success; attribute was found, though it may not have keys. + * If the entry is not returned without keys, we will delay generating + * keys until the first I/O. + */ + if (S_ISREG(cp->c_mode)) { + if (entry->cp_flags & CP_NEEDS_KEYS) { + entry->cp_flags &= ~CP_KEY_FLUSHED; + } + else { + entry->cp_flags |= CP_KEY_FLUSHED; + } + } + } + else if (error == ENOATTR) { + /* + * Normally, we should always have a CP EA for a file or directory that + * we are initializing here. However, there are some extenuating circumstances, + * such as the root directory immediately following a newfs_hfs. + * + * As a result, we leave code here to deal with an ENOATTR which will always + * default to a 'D/NONE' key, though we don't expect to use it much. + */ + int target_class = PROTECTION_CLASS_D; - sub_error = cp_entry_create_keys (&entry, NULL, hfsmp, PROTECTION_CLASS_D, cp->c_fileid, cp->c_mode); - - /* Now we have keys. Write them out. */ - if (sub_error == 0) { - sub_error = cp_setxattr (cp, entry, hfsmp, cp->c_fileid, XATTR_CREATE); + if (S_ISDIR(cp->c_mode)) { + target_class = PROTECTION_CLASS_DIR_NONE; + } + error = cp_new (target_class, hfsmp, cp, cp->c_mode, &entry); + if (error == 0) { + error = cp_setxattr (cp, entry, hfsmp, cp->c_fileid, XATTR_CREATE); } - error = sub_error; } - else if (error == 0) { - if (S_ISREG(cp->c_mode)) { - entry->cp_flags |= CP_KEY_FLUSHED; - } - } + /* - * For errors other than ENOATTR, we don't do anything. - * cp_entry_destroy can deal with a NULL argument if cp_getxattr - * failed malloc or there was a B-Tree error. + * Bail out if: + * a) error was not ENOATTR (we got something bad from the getxattr call) + * b) we encountered an error setting the xattr above. + * c) we failed to generate a new cprotect data structure. */ + if (error) { + goto out; + } cp->c_cpentry = entry; - if (error) { - cp_entry_destroy(&cp->c_cpentry); +out: + if (error == 0) { + entry->cp_backing_cnode = cp; } - + else { + if (entry) { + cp_entry_destroy(entry); + } + cp->c_cpentry = NULL; + } + return error; } /* - * Set up initial key/class pair on cnode. The cnode does not yet exist, - * so we must take a pointer to the cprotect struct. + * cp_setup_newentry * - * NOTE: - * We call this function in two places: - * 1) hfs_makenode *prior* to taking the journal/b-tree locks. - * A successful return value from this function is a pre-requisite for continuing on - * with file creation, as a wrap failure should immediately preclude the creation of - * the file. - * - * 2) cp_entry_init if we are trying to establish keys for a file/directory that did not - * have them already. (newfs_hfs may create entries in the namespace). - * - * At this point, we hold the directory cnode lock exclusive if it is available. - */ -int -cp_entry_create_keys(struct cprotect **entry_ptr, struct cnode *dcp, struct hfsmount *hfsmp, - uint32_t input_class, cnid_t fileid, mode_t cmode) + * Generate a keyless cprotect structure for use with the new AppleKeyStore kext. + * Since the kext is now responsible for vending us both wrapped/unwrapped keys + * we need to create a keyless xattr upon file / directory creation. When we have the inode value + * and the file/directory is established, then we can ask it to generate keys. Note that + * this introduces a potential race; If the device is locked and the wrapping + * keys are purged between the time we call this function and the time we ask it to generate + * keys for us, we could have to fail the open(2) call and back out the entry. + */ + +int cp_setup_newentry (struct hfsmount *hfsmp, struct cnode *dcp, int32_t suppliedclass, + mode_t cmode, struct cprotect **tmpentry) { - int error = 0; + int isdir = 0; struct cprotect *entry = NULL; - size_t keylen; + uint32_t target_class = hfsmp->default_cp_class; - /* Default to class D */ - uint32_t target_class = PROTECTION_CLASS_D; + if (hfsmp->hfs_running_cp_major_vers == 0) { + panic ("CP: major vers not set in mount!"); + } + + if (S_ISDIR (cmode)) { + isdir = 1; + } /* Decide the target class. Input argument takes priority. */ - if (cp_is_valid_class (input_class)) { - target_class = input_class; - /* - * One exception, F is never valid for a directory + if (cp_is_valid_class (isdir, suppliedclass)) { + /* caller supplies -1 if it was not specified so we will default to the mount point value */ + target_class = suppliedclass; + /* + * One exception, F is never valid for a directory * because its children may inherit and userland will be * unable to read/write to the files. */ - if (S_ISDIR(cmode)) { + if (isdir) { if (target_class == PROTECTION_CLASS_F) { + *tmpentry = NULL; return EINVAL; } } } else { - /* If no valid class was supplied, then inherit from parent if possible */ + /* + * If no valid class was supplied, behave differently depending on whether or not + * the item being created is a file or directory. + * + * for FILE: + * If parent directory has a non-zero class, use that. + * If parent directory has a zero class (not set), then attempt to + * apply the mount point default. + * + * for DIRECTORY: + * Directories always inherit from the parent; if the parent + * has a NONE class set, then we can continue to use that. + */ if ((dcp) && (dcp->c_cpentry)) { uint32_t parentclass = dcp->c_cpentry->cp_pclass; - /* If the parent class is not valid, default back to D */ - if (cp_is_valid_class(parentclass)) { - /* Parent class was good. use it. */ - target_class = parentclass; + /* If the parent class is not valid, default to the mount point value */ + if (cp_is_valid_class(1, parentclass)) { + if (isdir) { + target_class = parentclass; + } + else if (parentclass != PROTECTION_CLASS_DIR_NONE) { + /* files can inherit so long as it's not NONE */ + target_class = parentclass; + } } - /* Otherwise, we already defaulted to 'D' */ + /* Otherwise, we already defaulted to the mount point's default */ } } - keylen = S_ISDIR(cmode) ? 0 : CP_INITIAL_WRAPPEDKEYSIZE; - entry = cp_entry_alloc (keylen); - if (!entry) { - *entry_ptr = NULL; + /* Generate the cprotect to vend out */ + entry = cp_entry_alloc (0); + if (entry == NULL) { + *tmpentry = NULL; return ENOMEM; - } - - if (S_ISREG(cmode)) { - entry->cp_pclass = target_class; - entry->cp_flags |= CP_NEEDS_KEYS; - /* - * The 'fileid' argument to this function will either be - * a valid fileid for an existing file/dir, or it will be 0. - * If it is 0, then that is an indicator to the layer below - * that the file does not yet exist and we need to bypass the - * cp_wrap work to the keybag. - * - * If we are being invoked on behalf of a file/dir that does - * not yet have a key, then it will be a valid key and we - * need to behave like a setclass. - */ - error = cp_make_keys(&entry, hfsmp, fileid, entry->cp_pclass); - } - else if (S_ISDIR(cmode)) { - /* Directories just get their cp_pclass set */ - entry->cp_pclass = target_class; - } - else { - /* Unsupported for non-dir and non-file. */ - error = EINVAL; - } + } /* - * We only initialize and create the keys here; we cannot - * write out the EA until the journal lock and EA b-tree locks - * are acquired. + * We don't have keys yet, so fill in what we can. At this point + * this blob has no keys and it has no backing xattr. We just know the + * target class. */ + entry->cp_flags = (CP_NEEDS_KEYS | CP_NO_XATTR); + entry->cp_pclass = target_class; + *tmpentry = entry; - if (error) { - /* destroy the CP blob */ - cp_entry_destroy (&entry); - *entry_ptr = NULL; + return 0; +} + + +/* + * cp_needs_tempkeys + * + * Relay to caller whether or not the filesystem should generate temporary keys + * during resize operations. + */ + +int cp_needs_tempkeys (struct hfsmount *hfsmp, int *needs) +{ + + if (hfsmp->hfs_running_cp_major_vers < CP_PREV_MAJOR_VERS || + hfsmp->hfs_running_cp_major_vers > CP_NEW_MAJOR_VERS) { + return -1; + } + + /* CP_NEW_MAJOR_VERS implies CP_OFF_IV_ENABLED */ + if (hfsmp->hfs_running_cp_major_vers < CP_NEW_MAJOR_VERS) { + *needs = 0; } else { - /* otherwise, emit the cprotect entry */ - *entry_ptr = entry; + *needs = 1; } - return error; + return 0; } + /* * Set up an initial key/class pair for a disassociated cprotect entry. - * This function is used to generate transient keys that will never be + * This function is used to generate transient keys that will never be * written to disk. We use class F for this since it provides the exact * semantics that are needed here. Because we never attach this blob to * a cnode directly, we take a pointer to the cprotect struct. @@ -309,63 +352,58 @@ cp_entry_create_keys(struct cprotect **entry_ptr, struct cnode *dcp, struct hfsm * where we may rely on AES symmetry to relocate encrypted data from * one spot in the disk to another. */ -int cp_entry_gentempkeys(struct cprotect **entry_ptr, struct hfsmount *hfsmp) { - int error = 0; +int cp_entry_gentempkeys(struct cprotect **entry_ptr, struct hfsmount *hfsmp) +{ + struct cprotect *entry = NULL; - size_t keylen; - /* Default to class F */ - uint32_t target_class = PROTECTION_CLASS_F; + if (hfsmp->hfs_running_cp_major_vers < CP_NEW_MAJOR_VERS) { + return EPERM; + } - /* - * This should only be used for files, so we default to the - * initial wrapped key size + /* + * This should only be used for files and won't be written out. + * We don't need a persistent key. */ - keylen = CP_INITIAL_WRAPPEDKEYSIZE; - entry = cp_entry_alloc (keylen); - if (!entry) { + entry = cp_entry_alloc (0); + if (entry == NULL) { *entry_ptr = NULL; return ENOMEM; } + entry->cp_cache_key_len = CP_MAX_KEYSIZE; + entry->cp_pclass = PROTECTION_CLASS_F; + entry->cp_persistent_key_len = 0; - error = cp_make_keys (&entry, hfsmp, 0, target_class); - - /* - * We only initialize the keys here; we don't write anything out - */ + /* Generate the class F key */ + read_random (&entry->cp_cache_key[0], entry->cp_cache_key_len); - if (error) { - /* destroy the CP blob */ - cp_entry_destroy (&entry); - *entry_ptr = NULL; - } - else { - /* otherwise, emit the cprotect entry */ - *entry_ptr = entry; - } + /* Generate the IV key */ + cp_setup_aes_ctx(entry); + entry->cp_flags |= CP_OFF_IV_ENABLED; - return error; + *entry_ptr = entry; + return 0; } /* * Tear down and clear a cprotect blob for a closing file. - * Called at hfs_reclaim_cnode: cnode is locked exclusive. + * Called at hfs_reclaim_cnode: cnode is locked exclusive. */ void -cp_entry_destroy(struct cprotect **entry_ptr) { - struct cprotect *entry = *entry_ptr; - if (!entry) { +cp_entry_destroy(struct cprotect *entry_ptr) +{ + if (entry_ptr == NULL) { /* nothing to clean up */ return; } - *entry_ptr = NULL; - cp_entry_dealloc(entry); + cp_entry_dealloc(entry_ptr); } -int -cp_fs_protected (mount_t mnt) { +int +cp_fs_protected (mount_t mnt) +{ return (vfs_flags(mnt) & MNT_CPROTECT); } @@ -380,12 +418,12 @@ cp_get_protected_cnode(struct vnode *vp) if (!cp_vnode_is_eligible(vp)) { return NULL; } - + if (!cp_fs_protected(VTOVFS(vp))) { /* mount point doesn't support it */ return NULL; } - + return (struct cnode*) vp->v_data; } @@ -394,7 +432,7 @@ cp_get_protected_cnode(struct vnode *vp) * Sets *class to persistent class associated with vnode, * or returns error. */ -int +int cp_vnode_getclass(struct vnode *vp, int *class) { struct cprotect *entry; @@ -410,54 +448,47 @@ cp_vnode_getclass(struct vnode *vp, int *class) /* Is the mount point formatted for content protection? */ if (!cp_fs_protected(VTOVFS(vp))) { - return EPERM; + return ENOTSUP; } - + cp = VTOC(vp); hfsmp = VTOHFS(vp); - + /* - * Take the truncate lock up-front in shared mode because we may need - * to manipulate the CP blob. Pend lock events until we're done here. + * Take the truncate lock up-front in shared mode because we may need + * to manipulate the CP blob. Pend lock events until we're done here. */ - hfs_lock_truncate (cp, HFS_SHARED_LOCK); + hfs_lock_truncate (cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); took_truncate_lock = 1; /* * We take only the shared cnode lock up-front. If it turns out that - * we need to manipulate the CP blob to write a key out, drop the - * shared cnode lock and acquire an exclusive lock. + * we need to manipulate the CP blob to write a key out, drop the + * shared cnode lock and acquire an exclusive lock. */ - error = hfs_lock(cp, HFS_SHARED_LOCK); + error = hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); if (error) { - hfs_unlock_truncate(cp, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); return error; } - + /* pull the class from the live entry */ entry = cp->c_cpentry; - - if (!entry) { + + if (entry == NULL) { panic("Content Protection: uninitialized cnode %p", cp); } - /* - * Any vnode on a content protected filesystem must have keys - * created by the time the vnode is vended out. If we generate - * a vnode that does not have keys, something bad happened. - */ - if ((entry->cp_flags & CP_NEEDS_KEYS)) { - panic ("cp_vnode_getclass: cp %p has no keys!", cp); - } + /* Note that we may not have keys yet, but we know the target class. */ if (error == 0) { *class = entry->cp_pclass; } - + if (took_truncate_lock) { - hfs_unlock_truncate(cp, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); } - + hfs_unlock(cp); return error; } @@ -469,30 +500,23 @@ cp_vnode_getclass(struct vnode *vp, int *class) * If the new class can't be accessed now, EPERM. * Otherwise, record class and re-wrap key if the mount point is content-protected. */ -int +int cp_vnode_setclass(struct vnode *vp, uint32_t newclass) { struct cnode *cp; struct cprotect *entry = 0; int error = 0; int took_truncate_lock = 0; - u_int32_t keylen = 0; struct hfsmount *hfsmp = NULL; - - if (!cp_is_valid_class(newclass)) { - printf("hfs: CP: cp_setclass called with invalid class %d\n", newclass); - return EINVAL; + int isdir = 0; + + if (vnode_isdir (vp)) { + isdir = 1; } - if (vnode_isdir(vp)) { - if (newclass == PROTECTION_CLASS_F) { - /* - * Directories are not allowed to set to class F, since the - * children may inherit it and then userland will not be able - * to read/write to the file. - */ - return EINVAL; - } + if (!cp_is_valid_class(isdir, newclass)) { + printf("hfs: CP: cp_setclass called with invalid class %d\n", newclass); + return EINVAL; } /* Is this an interesting vp? */ @@ -502,70 +526,117 @@ cp_vnode_setclass(struct vnode *vp, uint32_t newclass) /* Is the mount point formatted for content protection? */ if (!cp_fs_protected(VTOVFS(vp))) { - return EPERM; + return ENOTSUP; } - cp = VTOC(vp); hfsmp = VTOHFS(vp); + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return EROFS; + } - /* - * Take the cnode truncate lock exclusive because we want to manipulate the + /* + * Take the cnode truncate lock exclusive because we want to manipulate the * CP blob. The lock-event handling code is doing the same. This also forces * all pending IOs to drain before we can re-write the persistent and cache keys. */ - hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK); + cp = VTOC(vp); + hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); took_truncate_lock = 1; - - if (hfs_lock(cp, HFS_EXCLUSIVE_LOCK)) { + + if (hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) { return EINVAL; } - + entry = cp->c_cpentry; if (entry == NULL) { error = EINVAL; goto out; } - if ((entry->cp_flags & CP_NEEDS_KEYS)) { - /* - * We should have created this vnode and its keys atomically during - * file/directory creation. If we get here and it doesn't have keys yet, - * something bad happened. + /* + * re-wrap per-file key with new class. + * Generate an entirely new key if switching to F. + */ + if (vnode_isreg(vp)) { + /* + * The vnode is a file. Before proceeding with the re-wrap, we need + * to unwrap the keys before proceeding. This is to ensure that + * the destination class's properties still work appropriately for the + * target class (since B allows I/O but an unwrap prior to the next unlock + * will not be allowed). */ - panic ("cp_vnode_setclass: cp %p has no keys!\n", cp); - } - - if (entry->cp_flags & CP_KEY_FLUSHED) { - error = cp_restore_keys(entry, hfsmp); - if (error) - goto out; - } + if (entry->cp_flags & CP_KEY_FLUSHED) { + error = cp_restore_keys (entry, hfsmp, cp); + if (error) { + goto out; + } + } + if (newclass == PROTECTION_CLASS_F) { + /* Verify that file is blockless if switching to class F */ + if (cp->c_datafork->ff_size > 0) { + error = EINVAL; + goto out; + } - /* re-wrap per-file key with new class */ - if (vnode_isreg(vp)) { - error = cp_wrap(newclass, hfsmp, cp->c_fileid, &cp->c_cpentry); + entry->cp_pclass = newclass; + entry->cp_cache_key_len = CP_MAX_KEYSIZE; + read_random (&entry->cp_cache_key[0], entry->cp_cache_key_len); + if (hfsmp->hfs_running_cp_major_vers == CP_NEW_MAJOR_VERS) { + cp_setup_aes_ctx (entry); + entry->cp_flags |= CP_OFF_IV_ENABLED; + } + bzero(entry->cp_persistent_key, entry->cp_persistent_key_len); + entry->cp_persistent_key_len = 0; + } else { + /* Deny the setclass if file is to be moved from F to something else */ + if (entry->cp_pclass == PROTECTION_CLASS_F) { + error = EPERM; + goto out; + } + /* We cannot call cp_rewrap unless the keys were already in existence. */ + if (entry->cp_flags & CP_NEEDS_KEYS) { + struct cprotect *newentry = NULL; + error = cp_generate_keys (hfsmp, cp, newclass, &newentry); + if (error == 0) { + cp_replace_entry (cp, newentry); + } + /* Bypass the setxattr code below since generate_keys does it for us */ + goto out; + } + else { + error = cp_rewrap(cp, hfsmp, newclass); + } + } if (error) { /* we didn't have perms to set this class. leave file as-is and error out */ goto out; } } - - /* cp_wrap() potentially updates c_cpentry because we passed in its ptr */ - entry = cp->c_cpentry; + else if (vnode_isdir(vp)) { + /* For directories, just update the pclass */ + entry->cp_pclass = newclass; + error = 0; + } + else { + /* anything else, just error out */ + error = EINVAL; + goto out; + } - entry->cp_pclass = newclass; + /* + * We get here if the new class was F, or if we were re-wrapping a cprotect that already + * existed. If the keys were never generated, then they'll skip the setxattr calls. + */ + + error = cp_setxattr(cp, cp->c_cpentry, VTOHFS(vp), 0, XATTR_REPLACE); + if (error == ENOATTR) { + error = cp_setxattr(cp, cp->c_cpentry, VTOHFS(vp), 0, XATTR_CREATE); + } - /* prepare to write the xattr out */ - keylen = entry->cp_persistent_key_len; - - error = cp_setxattr(cp, entry, VTOHFS(vp), 0,XATTR_REPLACE); - if (error == ENOATTR) - error = cp_setxattr(cp, entry, VTOHFS(vp), 0, XATTR_CREATE); - out: - + if (took_truncate_lock) { - hfs_unlock_truncate (cp, 0); + hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT); } hfs_unlock(cp); return error; @@ -580,6 +651,10 @@ int cp_vnode_transcode(vnode_t vp) int took_truncate_lock = 0; struct hfsmount *hfsmp = NULL; + /* Structures passed between HFS and AKS */ + cp_cred_s access_in; + cp_wrapped_key_s wrapped_key_in; + /* Is this an interesting vp? */ if (!cp_vnode_is_eligible(vp)) { return EBADF; @@ -587,24 +662,24 @@ int cp_vnode_transcode(vnode_t vp) /* Is the mount point formatted for content protection? */ if (!cp_fs_protected(VTOVFS(vp))) { - return EPERM; + return ENOTSUP; } cp = VTOC(vp); hfsmp = VTOHFS(vp); - /* - * Take the cnode truncate lock exclusive because we want to manipulate the + /* + * Take the cnode truncate lock exclusive because we want to manipulate the * CP blob. The lock-event handling code is doing the same. This also forces * all pending IOs to drain before we can re-write the persistent and cache keys. */ - hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK); + hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); took_truncate_lock = 1; - - if (hfs_lock(cp, HFS_EXCLUSIVE_LOCK)) { + + if (hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) { return EINVAL; } - + entry = cp->c_cpentry; if (entry == NULL) { error = EINVAL; @@ -614,21 +689,14 @@ int cp_vnode_transcode(vnode_t vp) if ((entry->cp_flags & CP_NEEDS_KEYS)) { /* * If we are transcoding keys for AKB, then we should have already established - * a set of keys for this vnode. IF we don't have keys yet, then something bad + * a set of keys for this vnode. IF we don't have keys yet, then something bad * happened. */ - panic ("cp_vnode_transcode: cp %p has no keys!", cp); - } - - if (entry->cp_flags & CP_KEY_FLUSHED) { - error = cp_restore_keys(entry, hfsmp); - - if (error) { - goto out; - } + error = EINVAL; + goto out; } - /* Send the per-file key for re-wrap with the current class information + /* Send the per-file key in wrapped form for re-wrap with the current class information * Send NULLs in the output parameters of the wrapper() and AKS will do the rest. * Don't need to process any outputs, so just clear the locks and pass along the error. */ if (vnode_isreg(vp)) { @@ -641,12 +709,17 @@ int cp_vnode_transcode(vnode_t vp) goto out; } - error = g_cp_wrap_func.wrapper(entry->cp_pclass, - cp->c_fileid, - entry->cp_cache_key, - entry->cp_cache_key_len, - NULL, - NULL); + cp_init_access(&access_in, cp); + + bzero(&wrapped_key_in, sizeof(wrapped_key_in)); + wrapped_key_in.key = entry->cp_persistent_key; + wrapped_key_in.key_len = entry->cp_persistent_key_len; + wrapped_key_in.dp_class = entry->cp_pclass; + + error = g_cp_wrap_func.rewrapper(&access_in, + entry->cp_pclass, + &wrapped_key_in, + NULL); if(error) error = EPERM; @@ -654,7 +727,7 @@ int cp_vnode_transcode(vnode_t vp) out: if (took_truncate_lock) { - hfs_unlock_truncate (cp, 0); + hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT); } hfs_unlock(cp); return error; @@ -670,12 +743,12 @@ out: * * Takes cnode lock, and upgrades to exclusive if modifying cprotect. * - * Note that this function does *NOT* take the cnode truncate lock. This is because + * Note that this function does *NOT* take the cnode truncate lock. This is because * the thread calling us may already have the truncate lock. It is not necessary * because either we successfully finish this function before the keys are tossed - * and the IO will fail, or the keys are tossed and then this function will fail. + * and the IO will fail, or the keys are tossed and then this function will fail. * Either way, the cnode lock still ultimately guards the keys. We only rely on the - * truncate lock to protect us against tossing the keys as a cluster call is in-flight. + * truncate lock to protect us against tossing the keys as a cluster call is in-flight. */ int cp_handle_vnop(struct vnode *vp, int vnop, int ioflag) @@ -685,12 +758,12 @@ cp_handle_vnop(struct vnode *vp, int vnop, int ioflag) struct hfsmount *hfsmp = NULL; struct cnode *cp = NULL; - /* + /* * First, do validation against the vnode before proceeding any further: * Is this vnode originating from a valid content-protected filesystem ? */ if (cp_vnode_is_eligible(vp) == 0) { - /* + /* * It is either not HFS or not a file/dir. Just return success. This is a valid * case if servicing i/o against another filesystem type from VFS */ @@ -699,31 +772,40 @@ cp_handle_vnop(struct vnode *vp, int vnop, int ioflag) if (cp_fs_protected (VTOVFS(vp)) == 0) { /* - * The underlying filesystem does not support content protection. This is also + * The underlying filesystem does not support content protection. This is also * a valid case. Simply return success. */ return 0; } - - /* + + /* * At this point, we know we have a HFS vnode that backs a file or directory on a * filesystem that supports content protection */ cp = VTOC(vp); - if ((error = hfs_lock(cp, HFS_SHARED_LOCK))) { + if ((error = hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT))) { return error; } entry = cp->c_cpentry; - - if (!entry) { + + if (entry == NULL) { /* * If this cnode is not content protected, simply return success. - * Note that this function is called by all I/O-based call sites + * Note that this function is called by all I/O-based call sites * when CONFIG_PROTECT is enabled during XNU building. */ + /* + * All files should have cprotect structs. It's possible to encounter + * a directory from a V2.0 CP system but all files should have protection + * EAs + */ + if (vnode_isreg(vp)) { + error = EPERM; + } + goto out; } @@ -741,8 +823,8 @@ cp_handle_vnop(struct vnode *vp, int vnop, int ioflag) if ((error = cp_check_access(cp, vnop))) { /* check for raw encrypted access before bailing out */ if ((vnop == CP_READ_ACCESS) && (ioflag & IO_ENCRYPTED)) { - /* - * read access only + asking for the raw encrypted bytes + /* + * read access only + asking for the raw encrypted bytes * is legitimate, so reset the error value to 0 */ error = 0; @@ -759,21 +841,24 @@ cp_handle_vnop(struct vnode *vp, int vnop, int ioflag) /* upgrade to exclusive lock */ if (lck_rw_lock_shared_to_exclusive(&cp->c_rwlock) == FALSE) { - if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { + if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { return error; } } else { cp->c_lockowner = current_thread(); } - + /* generate new keys if none have ever been saved */ if ((entry->cp_flags & CP_NEEDS_KEYS)) { - /* - * By the time we're trying to initiate I/O against a content - * protected vnode, we should have already created keys for this - * file/dir. If we don't have keys, something bad happened. - */ - panic ("cp_handle_vnop: cp %p has no keys!", cp); + struct cprotect *newentry = NULL; + error = cp_generate_keys (hfsmp, cp, cp->c_cpentry->cp_pclass, &newentry); + if (error == 0) { + cp_replace_entry (cp, newentry); + entry = newentry; + } + else { + goto out; + } } /* unwrap keys if needed */ @@ -783,8 +868,7 @@ cp_handle_vnop(struct vnode *vp, int vnop, int ioflag) error = 0; } else { - error = cp_restore_keys(entry, hfsmp); - + error = cp_restore_keys(entry, hfsmp, cp); if (error) { goto out; } @@ -807,13 +891,14 @@ cp_handle_open(struct vnode *vp, int mode) { struct cnode *cp = NULL ; struct cprotect *entry = NULL; + struct hfsmount *hfsmp; int error = 0; - + /* If vnode not eligible, just return success */ if (!cp_vnode_is_eligible(vp)) { return 0; } - + /* If mount point not properly set up, then also return success */ if (!cp_fs_protected(VTOVFS(vp))) { return 0; @@ -821,31 +906,113 @@ cp_handle_open(struct vnode *vp, int mode) /* We know the vnode is in a valid state. acquire cnode and validate */ cp = VTOC(vp); + hfsmp = VTOHFS(vp); - if ((error = hfs_lock(cp, HFS_SHARED_LOCK))) { + if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { return error; } entry = cp->c_cpentry; - if (!entry) + if (entry == NULL) { + /* + * If the mount is protected and we couldn't get a cprotect for this vnode, + * then it's not valid for opening. + */ + if (vnode_isreg(vp)) { + error = EPERM; + } goto out; + } if (!S_ISREG(cp->c_mode)) goto out; + /* + * Does the cnode have keys yet? If not, then generate them. + */ + if (entry->cp_flags & CP_NEEDS_KEYS) { + struct cprotect *newentry = NULL; + error = cp_generate_keys (hfsmp, cp, cp->c_cpentry->cp_pclass, &newentry); + if (error == 0) { + cp_replace_entry (cp, newentry); + entry = newentry; + } + else { + goto out; + } + } + + /* + * We want to minimize the number of unwraps that we'll have to do since + * the cost can vary, depending on the platform we're running. + */ switch (entry->cp_pclass) { case PROTECTION_CLASS_B: - /* Class B always allows creation */ - if (mode & O_CREAT) - goto out; + if (mode & O_CREAT) { + /* + * Class B always allows creation. Since O_CREAT was passed through + * we infer that this was a newly created vnode/cnode. Even though a potential + * race exists when multiple threads attempt to create/open a particular + * file, only one can "win" and actually create it. VFS will unset the + * O_CREAT bit on the loser. + * + * Note that skipping the unwrap check here is not a security issue -- + * we have to unwrap the key permanently upon the first I/O. + */ + break; + } + + if ((entry->cp_flags & CP_KEY_FLUSHED) == 0) { + /* + * For a class B file, attempt the unwrap if we have the key in + * core already. + * The device could have just transitioned into the lock state, and + * this vnode may not yet have been purged from the vnode cache (which would + * remove the keys). + */ + cp_cred_s access_in; + cp_wrapped_key_s wrapped_key_in; + + cp_init_access(&access_in, cp); + bzero(&wrapped_key_in, sizeof(wrapped_key_in)); + wrapped_key_in.key = entry->cp_persistent_key; + wrapped_key_in.key_len = entry->cp_persistent_key_len; + wrapped_key_in.dp_class = entry->cp_pclass; + error = g_cp_wrap_func.unwrapper(&access_in, &wrapped_key_in, NULL); + if (error) { + error = EPERM; + } + break; + } + /* otherwise, fall through to attempt the unwrap/restore */ case PROTECTION_CLASS_A: - error = g_cp_wrap_func.unwrapper(entry->cp_pclass, - entry->cp_persistent_key, - entry->cp_persistent_key_len, - NULL, NULL); - if (error) + case PROTECTION_CLASS_C: + /* + * At this point, we know that we need to attempt an unwrap if needed; we want + * to makes sure that open(2) fails properly if the device is either just-locked + * or never made it past first unlock. Since the keybag serializes access to the + * unwrapping keys for us and only calls our VFS callback once they've been purged, + * we will get here in two cases: + * + * A) we're in a window before the wrapping keys are purged; this is OK since when they get + * purged, the vnode will get flushed if needed. + * + * B) The keys are already gone. In this case, the restore_keys call below will fail. + * + * Since this function is bypassed entirely if we're opening a raw encrypted file, + * we can always attempt the restore. + */ + if (entry->cp_flags & CP_KEY_FLUSHED) { + error = cp_restore_keys(entry, hfsmp, cp); + } + + if (error) { error = EPERM; + } + break; + + case PROTECTION_CLASS_D: default: break; } @@ -856,7 +1023,7 @@ out: } -/* +/* * During hfs resize operations, we have slightly different constraints than during * normal VNOPS that read/write data to files. Specifically, we already have the cnode * locked (so nobody else can modify it), and we are doing the IO with root privileges, since @@ -864,23 +1031,24 @@ out: * vs. lock), and don't worry about non-existing keys. If the file exists on-disk with valid * payload, then it must have keys set up already by definition. */ -int -cp_handle_relocate (struct cnode *cp, struct hfsmount *hfsmp) { +int +cp_handle_relocate (struct cnode *cp, struct hfsmount *hfsmp) +{ struct cprotect *entry; int error = -1; - /* cp is already locked */ + /* cp is already locked */ entry = cp->c_cpentry; if (!entry) goto out; - /* - * Still need to validate whether to permit access to the file or not - * based on lock status + /* + * Still need to validate whether to permit access to the file or not + * based on lock status */ if ((error = cp_check_access(cp, CP_READ_ACCESS | CP_WRITE_ACCESS))) { goto out; - } + } if (entry->cp_flags == 0) { /* no more work to do */ @@ -892,14 +1060,14 @@ cp_handle_relocate (struct cnode *cp, struct hfsmount *hfsmp) { /* unwrap keys if needed */ if (entry->cp_flags & CP_KEY_FLUSHED) { - error = cp_restore_keys(entry, hfsmp); + error = cp_restore_keys(entry, hfsmp, cp); } - /* + /* * Don't need to write out the EA since if the file has actual extents, * it must have an EA */ -out: +out: /* return the cp still locked */ return error; @@ -912,8 +1080,9 @@ out: * Note that all multi-byte fields are written to disk little endian so they must be * converted to native endian-ness as needed. */ -int -cp_getrootxattr(struct hfsmount* hfsmp, struct cp_root_xattr *outxattr) { +int +cp_getrootxattr(struct hfsmount* hfsmp, struct cp_root_xattr *outxattr) +{ uio_t auio; char uio_buf[UIO_SIZEOF(1)]; size_t attrsize = sizeof(struct cp_root_xattr); @@ -942,7 +1111,7 @@ cp_getrootxattr(struct hfsmount* hfsmp, struct cp_root_xattr *outxattr) { outxattr->minor_version = OSSwapLittleToHostInt16(outxattr->minor_version); outxattr->flags = OSSwapLittleToHostInt64(outxattr->flags); - if (error != 0) { + if (error != 0) { goto out; } @@ -971,7 +1140,7 @@ cp_setrootxattr(struct hfsmount *hfsmp, struct cp_root_xattr *newxattr) args.a_vp = NULL; args.a_name = CONTENT_PROTECTION_XATTR_NAME; args.a_uio = NULL; //pass data ptr instead - args.a_options = 0; + args.a_options = 0; args.a_context = NULL; //no context needed, only done from mount. /* Now convert the multi-byte fields to little endian before writing to disk. */ @@ -979,7 +1148,7 @@ cp_setrootxattr(struct hfsmount *hfsmp, struct cp_root_xattr *newxattr) newxattr->minor_version = OSSwapHostToLittleInt16(newxattr->minor_version); newxattr->flags = OSSwapHostToLittleInt64(newxattr->flags); - error = hfs_setxattr_internal(NULL, (caddr_t)newxattr, + error = hfs_setxattr_internal(NULL, (caddr_t)newxattr, sizeof(struct cp_root_xattr), &args, hfsmp, 1); return error; } @@ -994,20 +1163,25 @@ cp_setrootxattr(struct hfsmount *hfsmp, struct cp_root_xattr *newxattr) int cp_setxattr(struct cnode *cp, struct cprotect *entry, struct hfsmount *hfsmp, uint32_t fileid, int options) { int error = 0; - size_t attrsize; + size_t attrsize; struct vnop_setxattr_args args; uint32_t target_fileid; struct cnode *arg_cp = NULL; uint32_t tempflags = 0; args.a_desc = NULL; + + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return EROFS; + } + if (cp) { args.a_vp = cp->c_vp; target_fileid = 0; arg_cp = cp; } else { - /* + /* * When we set the EA in the same txn as the file creation, * we do not have a vnode/cnode yet. Use the specified fileid. */ @@ -1016,14 +1190,10 @@ int cp_setxattr(struct cnode *cp, struct cprotect *entry, struct hfsmount *hfsmp } args.a_name = CONTENT_PROTECTION_XATTR_NAME; args.a_uio = NULL; //pass data ptr instead - args.a_options = options; + args.a_options = options; args.a_context = vfs_context_current(); - - /* Add asserts for the CP flags in the CP blob. */ - if (entry->cp_flags & CP_NEEDS_KEYS) { - panic ("cp_setxattr: cp %p , cpentry %p still needs keys!", cp, entry); - } + /* Note that it's OK to write out an XATTR without keys. */ /* Disable flags that will be invalid as we're writing the EA out at this point. */ tempflags = entry->cp_flags; tempflags &= ~CP_NO_XATTR; @@ -1039,16 +1209,16 @@ int cp_setxattr(struct cnode *cp, struct cprotect *entry, struct hfsmount *hfsmp bzero (newxattr, sizeof(struct cp_xattr_v4)); attrsize = sizeof(*newxattr) - CP_MAX_WRAPPEDKEYSIZE + entry->cp_persistent_key_len; - + /* Endian swap the multi-byte fields into L.E from host. */ newxattr->xattr_major_version = OSSwapHostToLittleInt16 (hfsmp->hfs_running_cp_major_vers); newxattr->xattr_minor_version = OSSwapHostToLittleInt16(CP_MINOR_VERS); newxattr->key_size = OSSwapHostToLittleInt32(entry->cp_persistent_key_len); newxattr->flags = OSSwapHostToLittleInt32(tempflags); - newxattr->persistent_class = OSSwapHostToLittleInt32(entry->cp_pclass); + newxattr->persistent_class = OSSwapHostToLittleInt32(entry->cp_pclass); bcopy(entry->cp_persistent_key, newxattr->persistent_key, entry->cp_persistent_key_len); - - error = hfs_setxattr_internal(arg_cp, (caddr_t)newxattr, attrsize, &args, hfsmp, target_fileid); + + error = hfs_setxattr_internal(arg_cp, (caddr_t)newxattr, attrsize, &args, hfsmp, target_fileid); FREE(newxattr, M_TEMP); break; @@ -1061,24 +1231,27 @@ int cp_setxattr(struct cnode *cp, struct cprotect *entry, struct hfsmount *hfsmp break; } bzero (newxattr, sizeof(struct cp_xattr_v2)); - + attrsize = sizeof(*newxattr); - + /* Endian swap the multi-byte fields into L.E from host. */ newxattr->xattr_major_version = OSSwapHostToLittleInt16(hfsmp->hfs_running_cp_major_vers); newxattr->xattr_minor_version = OSSwapHostToLittleInt16(CP_MINOR_VERS); newxattr->key_size = OSSwapHostToLittleInt32(entry->cp_persistent_key_len); newxattr->flags = OSSwapHostToLittleInt32(tempflags); - newxattr->persistent_class = OSSwapHostToLittleInt32(entry->cp_pclass); + newxattr->persistent_class = OSSwapHostToLittleInt32(entry->cp_pclass); bcopy(entry->cp_persistent_key, newxattr->persistent_key, entry->cp_persistent_key_len); - + error = hfs_setxattr_internal(arg_cp, (caddr_t)newxattr, attrsize, &args, hfsmp, target_fileid); FREE (newxattr, M_TEMP); break; } + default: + printf("hfs: cp_setxattr: Unknown CP version running \n"); + break; } - + if (error == 0 ) { entry->cp_flags &= ~CP_NO_XATTR; } @@ -1088,36 +1261,13 @@ int cp_setxattr(struct cnode *cp, struct cprotect *entry, struct hfsmount *hfsmp } -/* - * This function takes a cprotect struct with the cache keys and re-wraps them for - * MKB's sake so that it can update its own data structures. It is useful when - * there may not be a cnode in existence yet (for example, after creating - * a file). - */ -int -cp_update_mkb (struct cprotect *entry, uint32_t fileid) { - - int error = 0; - - /* We already validated this pclass earlier */ - if (entry->cp_pclass != PROTECTION_CLASS_F ) { - error = g_cp_wrap_func.wrapper (entry->cp_pclass, fileid, entry->cp_cache_key, - entry->cp_cache_key_len, NULL, NULL); - } - - if (error) { - error = EPERM; - } - - return error; -} - /* * Used by an fcntl to query the underlying FS for its content protection version # */ -int -cp_get_root_major_vers(vnode_t vp, uint32_t *level) { +int +cp_get_root_major_vers(vnode_t vp, uint32_t *level) +{ int err = 0; struct hfsmount *hfsmp = NULL; struct mount *mp = NULL; @@ -1126,7 +1276,7 @@ cp_get_root_major_vers(vnode_t vp, uint32_t *level) { /* check if it supports content protection */ if (cp_fs_protected(mp) == 0) { - return EINVAL; + return ENOTSUP; } hfsmp = VFSTOHFS(mp); @@ -1142,6 +1292,26 @@ cp_get_root_major_vers(vnode_t vp, uint32_t *level) { return err; } +/* Used by fcntl to query default protection level of FS */ +int cp_get_default_level (struct vnode *vp, uint32_t *level) { + int err = 0; + struct hfsmount *hfsmp = NULL; + struct mount *mp = NULL; + + mp = VTOVFS(vp); + + /* check if it supports content protection */ + if (cp_fs_protected(mp) == 0) { + return ENOTSUP; + } + + hfsmp = VFSTOHFS(mp); + /* figure out the default */ + + *level = hfsmp->default_cp_class; + return err; +} + /******************** * Private Functions *******************/ @@ -1157,7 +1327,7 @@ cp_root_major_vers(mount_t mp) err = cp_getrootxattr (hfsmp, &xattr); if (err == 0) { - hfsmp->hfs_running_cp_major_vers = xattr.major_version; + hfsmp->hfs_running_cp_major_vers = xattr.major_version; } else { return EINVAL; @@ -1176,11 +1346,23 @@ cp_vnode_is_eligible(struct vnode *vp) -static int -cp_is_valid_class(int class) +int +cp_is_valid_class(int isdir, int32_t protectionclass) { - return ((class >= PROTECTION_CLASS_A) && - (class <= PROTECTION_CLASS_F)); + /* + * The valid protection classes are from 0 -> N + * We use a signed argument to detect unassigned values from + * directory entry creation time in HFS. + */ + if (isdir) { + /* Directories are not allowed to have F, but they can have "NONE" */ + return ((protectionclass >= PROTECTION_CLASS_DIR_NONE) && + (protectionclass <= PROTECTION_CLASS_D)); + } + else { + return ((protectionclass >= PROTECTION_CLASS_A) && + (protectionclass <= PROTECTION_CLASS_F)); + } } @@ -1191,8 +1373,8 @@ cp_entry_alloc(size_t keylen) if (keylen > CP_MAX_WRAPPEDKEYSIZE) return (NULL); - - MALLOC(cp_entry, struct cprotect *, sizeof(struct cprotect) + keylen, + + MALLOC(cp_entry, struct cprotect *, sizeof(struct cprotect) + keylen, M_TEMP, M_WAITOK); if (cp_entry == NULL) return (NULL); @@ -1207,7 +1389,7 @@ cp_entry_dealloc(struct cprotect *entry) { uint32_t keylen = entry->cp_persistent_key_len; bzero(entry, (sizeof(*entry) + keylen)); - FREE(entry, M_TEMP); + FREE(entry, M_TEMP); } @@ -1215,7 +1397,7 @@ cp_entry_dealloc(struct cprotect *entry) * Initializes a new cprotect entry with xattr data from the cnode. * cnode lock held shared */ -static int +static int cp_getxattr(struct cnode *cp, struct hfsmount *hfsmp, struct cprotect **outentry) { int error = 0; @@ -1252,16 +1434,16 @@ cp_getxattr(struct cnode *cp, struct hfsmount *hfsmp, struct cprotect **outentry FREE (xattr, M_TEMP); goto out; } - + /* Endian swap the multi-byte fields into host endianness from L.E. */ xattr->xattr_major_version = OSSwapLittleToHostInt16(xattr->xattr_major_version); xattr->xattr_minor_version = OSSwapLittleToHostInt16(xattr->xattr_minor_version); xattr->key_size = OSSwapLittleToHostInt32(xattr->key_size); xattr->flags = OSSwapLittleToHostInt32(xattr->flags); xattr->persistent_class = OSSwapLittleToHostInt32(xattr->persistent_class); - + if (xattr->xattr_major_version != hfsmp->hfs_running_cp_major_vers ) { - printf("hfs: cp_getxattr: bad xattr version %d expecting %d\n", + printf("hfs: cp_getxattr: bad xattr version %d expecting %d\n", xattr->xattr_major_version, hfsmp->hfs_running_cp_major_vers); error = EINVAL; FREE (xattr, M_TEMP); @@ -1277,9 +1459,17 @@ cp_getxattr(struct cnode *cp, struct hfsmount *hfsmp, struct cprotect **outentry error = EINVAL; FREE (xattr, M_TEMP); - goto out; + goto out; } + /* + * Class F files have no backing key; their keylength should be 0, + * though they should have the proper flags set. + * + * A request to instantiate a CP for a class F file should result + * in a bzero'd cp that just says class F, with key_flushed set. + */ + /* set up entry with information from xattr */ entry = cp_entry_alloc(xattr->key_size); if (!entry) { @@ -1287,12 +1477,24 @@ cp_getxattr(struct cnode *cp, struct hfsmount *hfsmp, struct cprotect **outentry return ENOMEM; } - - entry->cp_pclass = xattr->persistent_class; + + entry->cp_pclass = xattr->persistent_class; + + /* + * Suppress invalid flags that should not be set. + * If we have gotten this far, then CP_NO_XATTR cannot possibly + * be valid; the EA exists. + */ + xattr->flags &= ~CP_NO_XATTR; + + entry->cp_flags = xattr->flags; if (xattr->xattr_major_version >= CP_NEW_MAJOR_VERS) { entry->cp_flags |= CP_OFF_IV_ENABLED; } - bcopy(xattr->persistent_key, entry->cp_persistent_key, xattr->key_size); + + if (entry->cp_pclass != PROTECTION_CLASS_F ) { + bcopy(xattr->persistent_key, entry->cp_persistent_key, xattr->key_size); + } FREE (xattr, M_TEMP); @@ -1310,27 +1512,27 @@ cp_getxattr(struct cnode *cp, struct hfsmount *hfsmp, struct cprotect **outentry uio_addiov(auio, CAST_USER_ADDR_T(xattr), attrsize); args.a_size = &attrsize; - + error = hfs_getxattr_internal(cp, &args, VTOHFS(cp->c_vp), 0); if (error != 0) { FREE (xattr, M_TEMP); goto out; } - + /* Endian swap the multi-byte fields into host endianness from L.E. */ xattr->xattr_major_version = OSSwapLittleToHostInt16(xattr->xattr_major_version); xattr->xattr_minor_version = OSSwapLittleToHostInt16(xattr->xattr_minor_version); xattr->key_size = OSSwapLittleToHostInt32(xattr->key_size); xattr->flags = OSSwapLittleToHostInt32(xattr->flags); xattr->persistent_class = OSSwapLittleToHostInt32(xattr->persistent_class); - + if (xattr->xattr_major_version != hfsmp->hfs_running_cp_major_vers) { - printf("hfs: cp_getxattr: bad xattr version %d expecting %d\n", + printf("hfs: cp_getxattr: bad xattr version %d expecting %d\n", xattr->xattr_major_version, hfsmp->hfs_running_cp_major_vers); error = EINVAL; FREE (xattr, M_TEMP); goto out; - } + } /* * Prevent a buffer overflow, and validate the key length obtained from the @@ -1340,7 +1542,7 @@ cp_getxattr(struct cnode *cp, struct hfsmount *hfsmp, struct cprotect **outentry if (xattr->key_size > CP_V2_WRAPPEDKEYSIZE) { error = EINVAL; FREE (xattr, M_TEMP); - goto out; + goto out; } /* set up entry with information from xattr */ entry = cp_entry_alloc(xattr->key_size); @@ -1348,9 +1550,22 @@ cp_getxattr(struct cnode *cp, struct hfsmount *hfsmp, struct cprotect **outentry FREE (xattr, M_TEMP); return ENOMEM; } - + entry->cp_pclass = xattr->persistent_class; - bcopy(xattr->persistent_key, entry->cp_persistent_key, xattr->key_size); + + /* + * Suppress invalid flags that should not be set. + * If we have gotten this far, then CP_NO_XATTR cannot possibly + * be valid; the EA exists. + */ + xattr->flags &= ~CP_NO_XATTR; + + entry->cp_flags = xattr->flags; + + if (entry->cp_pclass != PROTECTION_CLASS_F ) { + bcopy(xattr->persistent_key, entry->cp_persistent_key, xattr->key_size); + } + FREE (xattr, M_TEMP); break; } @@ -1358,89 +1573,8 @@ cp_getxattr(struct cnode *cp, struct hfsmount *hfsmp, struct cprotect **outentry out: uio_free(auio); - - *outentry = entry; - return error; -} - - -/* Setup AES context */ -static int -cp_setup_aes_ctx(struct cprotect *entry) -{ - SHA1_CTX sha1ctxt; - uint8_t cp_cache_iv_key[CP_IV_KEYSIZE]; /* Kiv */ - - /* First init the cp_cache_iv_key[] */ - SHA1Init(&sha1ctxt); - SHA1Update(&sha1ctxt, &entry->cp_cache_key[0], CP_MAX_KEYSIZE); - SHA1Final(&cp_cache_iv_key[0], &sha1ctxt); - - aes_encrypt_key128(&cp_cache_iv_key[0], &entry->cp_cache_iv_ctx); - - return 0; -} - - -/* - * Make a new random per-file key and wrap it. - * Normally this will get default_pclass as PROTECTION_CLASS_D. - * - * But when the directory's class is set, we use that as the default. - */ -static int -cp_make_keys(struct cprotect **entry_arg, struct hfsmount *hfsmp, cnid_t fileid, int default_pclass) -{ - struct cprotect *entry = *entry_arg; - int target_pclass = 0; - int error = 0; - - if (g_cp_state.wrap_functions_set != 1) { - printf("hfs: CP: could not create keys: no wrappers set\n"); - return ENXIO; - } - - /* create new cp data: key and class */ - entry->cp_cache_key_len = CP_MAX_KEYSIZE; - read_random(&entry->cp_cache_key[0], entry->cp_cache_key_len); - - if (cp_is_valid_class(default_pclass) == 0) { - target_pclass = PROTECTION_CLASS_D; - } else { - target_pclass = default_pclass; - } - - /* - * Attempt to wrap the new key in the class key specified by target_pclass - * Note that because we may be inheriting a protection level specified - * by the containing directory, this can fail; we could be trying to - * wrap this cache key in the class 'A' key while the device is locked. - * As such, emit an error if we fail to wrap the key here, instead of - * panicking. - */ - - error = cp_wrap(target_pclass, hfsmp, fileid, entry_arg); - if (error) { - goto out; - } - /* cp_wrap() potentially updates c_cpentry */ - entry = *entry_arg; - - /* set the pclass to the target since the wrap was successful */ - entry->cp_pclass = target_pclass; - - /* No need to go here for older EAs */ - if (hfsmp->hfs_running_cp_major_vers == CP_NEW_MAJOR_VERS) { - cp_setup_aes_ctx(entry); - entry->cp_flags |= CP_OFF_IV_ENABLED; - } - - /* ready for business */ - entry->cp_flags &= ~CP_NEEDS_KEYS; - entry->cp_flags |= CP_NO_XATTR; - -out: + *outentry = entry; return error; } @@ -1450,43 +1584,52 @@ out: * cnode lock held exclusive */ static int -cp_restore_keys(struct cprotect *entry, struct hfsmount *hfsmp) +cp_restore_keys(struct cprotect *entry, struct hfsmount *hfsmp, struct cnode *cp) { int error = 0; - error = cp_unwrap(entry->cp_pclass, entry); + error = cp_unwrap(hfsmp, entry, cp); if (error) { entry->cp_flags |= CP_KEY_FLUSHED; bzero(entry->cp_cache_key, entry->cp_cache_key_len); error = EPERM; } else { - /* No need to go here for older EAs */ - if (hfsmp->hfs_running_cp_major_vers == CP_NEW_MAJOR_VERS) { - cp_setup_aes_ctx(entry); - entry->cp_flags |= CP_OFF_IV_ENABLED; - } - /* ready for business */ entry->cp_flags &= ~CP_KEY_FLUSHED; - + } return error; } static int -cp_lock_vfs_callback(mount_t mp, void *arg) { - - /* - * When iterating the various mount points that may - * be present on a content-protected device, we need to skip - * those that do not have it enabled. - */ - if (!cp_fs_protected(mp)) { - return 0; - } - - return vnode_iterate(mp, 0, cp_lock_vnode_callback, arg); +cp_lock_vfs_callback(mount_t mp, void *arg) +{ + + /* Use a pointer-width integer field for casting */ + unsigned long new_state; + + /* + * When iterating the various mount points that may + * be present on a content-protected device, we need to skip + * those that do not have it enabled. + */ + if (!cp_fs_protected(mp)) { + return 0; + } + + new_state = (unsigned long) arg; + if (new_state == CP_LOCKED_STATE) { + /* + * We respond only to lock events. Since cprotect structs + * decrypt/restore keys lazily, the unlock events don't + * actually cause anything to happen. + */ + return vnode_iterate(mp, 0, cp_lock_vnode_callback, arg); + } + /* Otherwise just return 0. */ + return 0; + } @@ -1537,7 +1680,7 @@ cp_lock_vnode_callback(struct vnode *vp, void *arg) struct cprotect *entry = NULL; int error = 0; int locked = 1; - int action = 0; + unsigned long action = 0; int took_truncate_lock = 0; error = vnode_getwithref (vp); @@ -1546,49 +1689,49 @@ cp_lock_vnode_callback(struct vnode *vp, void *arg) } cp = VTOC(vp); - + /* * When cleaning cnodes due to a lock event, we must * take the truncate lock AND the cnode lock. By taking - * the truncate lock here, we force (nearly) all pending IOs + * the truncate lock here, we force (nearly) all pending IOs * to drain before we can acquire the truncate lock. All HFS cluster * io calls except for swapfile IO need to acquire the truncate lock * prior to calling into the cluster layer. */ - hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK); + hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); took_truncate_lock = 1; - - hfs_lock(cp, HFS_FORCE_LOCK); - + + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); + entry = cp->c_cpentry; if (!entry) { /* unprotected vnode: not a regular file */ goto out; } - - action = (int)((uintptr_t) arg); + + action = (unsigned long) arg; switch (action) { case CP_LOCKED_STATE: { vfs_context_t ctx; if (entry->cp_pclass != PROTECTION_CLASS_A || vnode_isdir(vp)) { - /* + /* * There is no change at lock for other classes than A. * B is kept in memory for writing, and class F (for VM) does - * not have a wrapped key, so there is no work needed for - * wrapping/unwrapping. - * - * Note that 'class F' is relevant here because if + * not have a wrapped key, so there is no work needed for + * wrapping/unwrapping. + * + * Note that 'class F' is relevant here because if * hfs_vnop_strategy does not take the cnode lock - * to protect the cp blob across IO operations, we rely - * implicitly on the truncate lock to be held when doing IO. - * The only case where the truncate lock is not held is during - * swapfile IO because HFS just funnels the VNOP_PAGEOUT - * directly to cluster_pageout. + * to protect the cp blob across IO operations, we rely + * implicitly on the truncate lock to be held when doing IO. + * The only case where the truncate lock is not held is during + * swapfile IO because HFS just funnels the VNOP_PAGEOUT + * directly to cluster_pageout. */ goto out; } - + /* Before doing anything else, zero-fill sparse ranges as needed */ ctx = vfs_context_current(); (void) hfs_filedone (vp, ctx); @@ -1596,26 +1739,26 @@ cp_lock_vnode_callback(struct vnode *vp, void *arg) /* first, sync back dirty pages */ hfs_unlock (cp); ubc_msync (vp, 0, ubc_getsize(vp), NULL, UBC_PUSHALL | UBC_INVALIDATE | UBC_SYNC); - hfs_lock (cp, HFS_FORCE_LOCK); + hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); /* flush keys: * There was a concern here(9206856) about flushing keys before nand layer is done using them. * But since we are using ubc_msync with UBC_SYNC, it blocks until all IO is completed. * Once IOFS caches or is done with these keys, it calls the completion routine in IOSF. * Which in turn calls buf_biodone() and eventually unblocks ubc_msync() - * Also verified that the cached data in IOFS is overwritten by other data, and there + * Also verified that the cached data in IOFS is overwritten by other data, and there * is no key leakage in that layer. */ entry->cp_flags |= CP_KEY_FLUSHED; bzero(&entry->cp_cache_key, entry->cp_cache_key_len); bzero(&entry->cp_cache_iv_ctx, sizeof(aes_encrypt_ctx)); - + /* some write may have arrived in the mean time. dump those pages */ hfs_unlock(cp); locked = 0; - - ubc_msync (vp, 0, ubc_getsize(vp), NULL, UBC_INVALIDATE | UBC_SYNC); + + ubc_msync (vp, 0, ubc_getsize(vp), NULL, UBC_INVALIDATE | UBC_SYNC); break; } case CP_UNLOCKED_STATE: { @@ -1623,85 +1766,103 @@ cp_lock_vnode_callback(struct vnode *vp, void *arg) break; } default: - panic("Content Protection: unknown lock action %d\n", action); + panic("Content Protection: unknown lock action %lu\n", action); } - + out: if (locked) { hfs_unlock(cp); } - + if (took_truncate_lock) { - hfs_unlock_truncate (cp, 0); + hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT); } - + vnode_put (vp); return error; } + +/* + * cp_rewrap: + * + * Generate a new wrapped key based on the existing cache key. + */ + static int -cp_wrap(int class, struct hfsmount *hfsmp, cnid_t fileid, struct cprotect **entry_ptr) +cp_rewrap(struct cnode *cp, struct hfsmount *hfsmp, int newclass) { - - struct cprotect *entry = *entry_ptr; - uint8_t newkey[CP_MAX_WRAPPEDKEYSIZE]; + + struct cprotect *entry = cp->c_cpentry; + uint8_t new_persistent_key[CP_MAX_WRAPPEDKEYSIZE]; size_t keylen = CP_MAX_WRAPPEDKEYSIZE; int error = 0; + /* Structures passed between HFS and AKS */ + cp_cred_s access_in; + cp_wrapped_key_s wrapped_key_in; + cp_wrapped_key_s wrapped_key_out; + /* - * PROTECTION_CLASS_F is in-use by VM swapfile; it represents a transient + * PROTECTION_CLASS_F is in-use by VM swapfile; it represents a transient * key that is only good as long as the file is open. There is no - * wrapped key, so there isn't anything to wrap. + * wrapped key, so there isn't anything to wrap. */ - if (class == PROTECTION_CLASS_F) { - bzero(entry->cp_persistent_key, entry->cp_persistent_key_len); - entry->cp_persistent_key_len = 0; - return 0; + if (newclass == PROTECTION_CLASS_F) { + return EINVAL; } + cp_init_access(&access_in, cp); + + bzero(&wrapped_key_in, sizeof(wrapped_key_in)); + wrapped_key_in.key = entry->cp_persistent_key; + wrapped_key_in.key_len = entry->cp_persistent_key_len; + wrapped_key_in.dp_class = entry->cp_pclass; + + bzero(&wrapped_key_out, sizeof(wrapped_key_out)); + wrapped_key_out.key = new_persistent_key; + wrapped_key_out.key_len = keylen; + /* * inode is passed here to find the backup bag wrapped blob * from userspace. This lookup will occur shortly after creation - * and only if the file still exists. Beyond this lookup the + * and only if the file still exists. Beyond this lookup the * inode is not used. Technically there is a race, we practically * don't lose. */ - error = g_cp_wrap_func.wrapper(class, - fileid, - entry->cp_cache_key, - entry->cp_cache_key_len, - newkey, - &keylen); + error = g_cp_wrap_func.rewrapper(&access_in, + newclass, /* new class */ + &wrapped_key_in, + &wrapped_key_out); - if (!error) { + keylen = wrapped_key_out.key_len; + + if (error == 0) { + struct cprotect *newentry = NULL; /* - * v2 EA's don't support the larger class B keys + * v2 EA's don't support the larger class B keys */ if ((keylen != CP_V2_WRAPPEDKEYSIZE) && - (hfsmp->hfs_running_cp_major_vers == CP_PREV_MAJOR_VERS)) { + (hfsmp->hfs_running_cp_major_vers == CP_PREV_MAJOR_VERS)) { return EINVAL; } - /* - * Reallocate the entry if the new persistent key changed length - */ - if (entry->cp_persistent_key_len != keylen) { - struct cprotect *oldentry = entry; + /* Allocate a new cpentry */ + newentry = cp_entry_alloc (keylen); + bcopy (entry, newentry, sizeof(struct cprotect)); - entry = cp_entry_alloc(keylen); - if (entry == NULL) - return ENOMEM; + /* copy the new key into the entry */ + bcopy (new_persistent_key, newentry->cp_persistent_key, keylen); + newentry->cp_persistent_key_len = keylen; + newentry->cp_backing_cnode = cp; + newentry->cp_pclass = newclass; - bcopy(oldentry, entry, sizeof(struct cprotect)); - entry->cp_persistent_key_len = keylen; - - cp_entry_destroy (&oldentry); - - *entry_ptr = entry; - } + /* Attach the new entry to the cnode */ + cp->c_cpentry = newentry; - bcopy(newkey, entry->cp_persistent_key, keylen); - } + /* destroy the old entry */ + cp_entry_destroy (entry); + } else { error = EPERM; } @@ -1711,34 +1872,329 @@ cp_wrap(int class, struct hfsmount *hfsmp, cnid_t fileid, struct cprotect **entr static int -cp_unwrap(int class, struct cprotect *entry) +cp_unwrap(struct hfsmount *hfsmp, struct cprotect *entry, struct cnode *cp) { int error = 0; - size_t keylen = CP_MAX_KEYSIZE; + uint8_t iv_key[CP_IV_KEYSIZE]; + + /* Structures passed between HFS and AKS */ + cp_cred_s access_in; + cp_wrapped_key_s wrapped_key_in; + cp_raw_key_s key_out; /* - * PROTECTION_CLASS_F is in-use by VM swapfile; it represents a transient + * PROTECTION_CLASS_F is in-use by VM swapfile; it represents a transient * key that is only good as long as the file is open. There is no - * wrapped key, so there isn't anything to unwrap. + * wrapped key, so there isn't anything to unwrap. */ - if (class == PROTECTION_CLASS_F) { + if (entry->cp_pclass == PROTECTION_CLASS_F) { return EPERM; } - error = g_cp_wrap_func.unwrapper(class, - entry->cp_persistent_key, - entry->cp_persistent_key_len, - entry->cp_cache_key, - &keylen); + cp_init_access(&access_in, cp); + + bzero(&wrapped_key_in, sizeof(wrapped_key_in)); + wrapped_key_in.key = entry->cp_persistent_key; + wrapped_key_in.key_len = entry->cp_persistent_key_len; + wrapped_key_in.dp_class = entry->cp_pclass; + + bzero(&key_out, sizeof(key_out)); + key_out.key = entry->cp_cache_key; + key_out.key_len = CP_MAX_KEYSIZE; + key_out.iv_key = iv_key; + key_out.iv_key_len = CP_IV_KEYSIZE; + + error = g_cp_wrap_func.unwrapper(&access_in, &wrapped_key_in, &key_out); if (!error) { - entry->cp_cache_key_len = keylen; + entry->cp_cache_key_len = key_out.key_len; + + /* No need to go here for older EAs */ + if (hfsmp->hfs_running_cp_major_vers == CP_NEW_MAJOR_VERS) { + aes_encrypt_key128(iv_key, &entry->cp_cache_iv_ctx); + entry->cp_flags |= CP_OFF_IV_ENABLED; + } } else { error = EPERM; } + + return error; +} + +/* Setup AES context */ +static int +cp_setup_aes_ctx(struct cprotect *entry) +{ + SHA1_CTX sha1ctxt; + uint8_t cp_cache_iv_key[CP_IV_KEYSIZE]; /* Kiv */ + + /* First init the cp_cache_iv_key[] */ + SHA1Init(&sha1ctxt); + SHA1Update(&sha1ctxt, &entry->cp_cache_key[0], CP_MAX_KEYSIZE); + SHA1Final(&cp_cache_iv_key[0], &sha1ctxt); + + aes_encrypt_key128(&cp_cache_iv_key[0], &entry->cp_cache_iv_ctx); + + return 0; +} + +/* + * cp_generate_keys + * + * Take a cnode that has already been initialized and establish persistent and + * cache keys for it at this time. Note that at the time this is called, the + * directory entry has already been created and we are holding the cnode lock + * on 'cp'. + * + */ +int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, int targetclass, struct cprotect **newentry) +{ + + int error = 0; + struct cprotect *newcp = NULL; + *newentry = NULL; + + /* Validate that it has a cprotect already */ + if (cp->c_cpentry == NULL) { + /* We can't do anything if it shouldn't be protected. */ + return 0; + } + + /* Asserts for the underlying cprotect */ + if (cp->c_cpentry->cp_flags & CP_NO_XATTR) { + /* should already have an xattr by this point. */ + error = EINVAL; + goto out; + } + + if (S_ISREG(cp->c_mode)) { + if ((cp->c_cpentry->cp_flags & CP_NEEDS_KEYS) == 0){ + error = EINVAL; + goto out; + } + } + + error = cp_new (targetclass, hfsmp, cp, cp->c_mode, &newcp); + if (error) { + /* + * Key generation failed. This is not necessarily fatal + * since the device could have transitioned into the lock + * state before we called this. + */ + error = EPERM; + goto out; + } + + /* + * If we got here, then we have a new cprotect. + * Attempt to write the new one out. + */ + error = cp_setxattr (cp, newcp, hfsmp, cp->c_fileid, XATTR_REPLACE); + + if (error) { + /* Tear down the new cprotect; Tell MKB that it's invalid. Bail out */ + /* TODO: rdar://12170074 needs to be fixed before we can tell MKB */ + if (newcp) { + cp_entry_destroy(newcp); + } + goto out; + } + + /* + * If we get here then we can assert that: + * 1) generated wrapped/unwrapped keys. + * 2) wrote the new keys to disk. + * 3) cprotect is ready to go. + */ + newcp->cp_flags &= ~CP_NEEDS_KEYS; + *newentry = newcp; + +out: return error; + } +void cp_replace_entry (struct cnode *cp, struct cprotect *newentry) +{ + + if (cp->c_cpentry) { + cp_entry_destroy (cp->c_cpentry); + } + cp->c_cpentry = newentry; + newentry->cp_backing_cnode = cp; + + return; +} + + +/* + * cp_new + * + * Given a double-pointer to a cprotect, generate keys (either in-kernel or from keystore), + * allocate a cprotect, and vend it back to the caller. + * + * Additionally, decide if keys are even needed -- directories get cprotect data structures + * but they do not have keys. + * + */ + +static int +cp_new(int newclass, struct hfsmount *hfsmp, struct cnode *cp, mode_t cmode, struct cprotect **output_entry) +{ + struct cprotect *entry = NULL; + int error = 0; + uint8_t new_key[CP_MAX_KEYSIZE]; + size_t new_key_len = CP_MAX_KEYSIZE; + uint8_t new_persistent_key[CP_MAX_WRAPPEDKEYSIZE]; + size_t new_persistent_len = CP_MAX_WRAPPEDKEYSIZE; + uint8_t iv_key[CP_IV_KEYSIZE]; + size_t iv_key_len = CP_IV_KEYSIZE; + + /* Structures passed between HFS and AKS */ + cp_cred_s access_in; + cp_wrapped_key_s wrapped_key_out; + cp_raw_key_s key_out; + + if (*output_entry != NULL) { + panic ("cp_new with non-null entry!"); + } + + if (!g_cp_state.wrap_functions_set) { + printf("hfs: cp_new: wrap/gen functions not yet set\n"); + return ENXIO; + } + + /* + * Step 1: Generate Keys if needed. + * + * For class F files, the kernel provides the key. + * PROTECTION_CLASS_F is in-use by VM swapfile; it represents a transient + * key that is only good as long as the file is open. There is no + * wrapped key, so there isn't anything to wrap. + * + * For class A->D files, the key store provides the key + * + * For Directories, we only give them a class ; no keys. + */ + if (S_ISDIR (cmode)) { + /* Directories */ + new_persistent_len = 0; + new_key_len = 0; + + error = 0; + } + else if (S_ISREG(cmode)) { + /* Files */ + if (newclass == PROTECTION_CLASS_F) { + new_key_len = CP_MAX_KEYSIZE; + read_random (&new_key[0], new_key_len); + new_persistent_len = 0; + + error = 0; + } + else { + /* + * The keystore is provided the file ID so that it can associate + * the wrapped backup blob with this key from userspace. This + * lookup occurs after successful file creation. Beyond this, the + * file ID is not used. Note that there is a potential race here if + * the file ID is re-used. + */ + cp_init_access(&access_in, cp); + + bzero(&key_out, sizeof(key_out)); + key_out.key = new_key; + key_out.key_len = new_key_len; + key_out.iv_key = iv_key; + key_out.iv_key_len = iv_key_len; + + bzero(&wrapped_key_out, sizeof(wrapped_key_out)); + wrapped_key_out.key = new_persistent_key; + wrapped_key_out.key_len = new_persistent_len; + + error = g_cp_wrap_func.new_key(&access_in, + newclass, + &key_out, + &wrapped_key_out); + + new_key_len = key_out.key_len; + iv_key_len = key_out.iv_key_len; + new_persistent_len = wrapped_key_out.key_len; + } + + } + else { + /* Something other than file or dir? */ + error = EPERM; + } + + /* + * Step 2: Allocate cprotect and initialize it. + */ + + if (error == 0) { + /* + * v2 EA's don't support the larger class B keys + */ + if ((new_persistent_len != CP_V2_WRAPPEDKEYSIZE) && + (hfsmp->hfs_running_cp_major_vers == CP_PREV_MAJOR_VERS)) { + return EINVAL; + } + + entry = cp_entry_alloc (new_persistent_len); + if (entry == NULL) { + return ENOMEM; + } + + *output_entry = entry; + + entry->cp_pclass = newclass; + + /* Copy the cache key & IV keys into place if needed. */ + if (new_key_len > 0) { + bcopy (new_key, entry->cp_cache_key, new_key_len); + entry->cp_cache_key_len = new_key_len; + + /* Initialize the IV key */ + if (hfsmp->hfs_running_cp_major_vers == CP_NEW_MAJOR_VERS) { + if (newclass == PROTECTION_CLASS_F) { + /* class F needs a full IV initialize */ + cp_setup_aes_ctx(entry); + } + else { + /* Key store gave us an iv key. Just need to wrap it.*/ + aes_encrypt_key128(iv_key, &entry->cp_cache_iv_ctx); + } + entry->cp_flags |= CP_OFF_IV_ENABLED; + } + } + if (new_persistent_len > 0) { + bcopy(new_persistent_key, entry->cp_persistent_key, new_persistent_len); + } + } + else { + error = EPERM; + } + + return error; +} + +/* Initialize the cp_cred_t structure passed to AKS */ +static void cp_init_access(cp_cred_t access, struct cnode *cp) +{ + vfs_context_t context = vfs_context_current(); + kauth_cred_t cred = vfs_context_ucred(context); + proc_t proc = vfs_context_proc(context); + + bzero(access, sizeof(*access)); + + /* Note: HFS uses 32-bit fileID, even though inode is a 64-bit value */ + access->inode = cp->c_fileid; + access->pid = proc_pid(proc); + access->uid = kauth_cred_getuid(cred); + + return; +} #else diff --git a/bsd/hfs/hfs_dbg.h b/bsd/hfs/hfs_dbg.h index f39271fe4..f2c9aea08 100644 --- a/bsd/hfs/hfs_dbg.h +++ b/bsd/hfs/hfs_dbg.h @@ -80,7 +80,6 @@ extern void Debugger(const char *message); #endif -//#define PRINT_DELAY (void) tsleep((caddr_t)&lbolt, PPAUSE, "hfs kprintf", 0) #define PRINT_DELAY /* diff --git a/bsd/hfs/hfs_encodinghint.c b/bsd/hfs/hfs_encodinghint.c index 94b926e9e..232944bd9 100644 --- a/bsd/hfs/hfs_encodinghint.c +++ b/bsd/hfs/hfs_encodinghint.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2001-2013 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -73,6 +73,9 @@ u_int8_t cjk_encoding[] = { /* 1111 */ kTextEncodingMacJapanese }; + +#if CONFIG_HFS_STD + /* * CJK Mac Encoding Bitmap * @@ -736,6 +739,7 @@ static u_int8_t cjk_bitmap[] = { 0x0D,0x04,0x04,0x00,0x00,0x00,0x00,0x00, }; + /* * Pick a suitable Mac encoding value for a Unicode string. * @@ -897,6 +901,16 @@ hfs_pickencoding(const u_int16_t *src, int len) return guess; } +#else /* HFS standard *NOT* supported */ + +u_int32_t +hfs_pickencoding(__unused const u_int16_t *src, __unused int len) { + /* Just return kTextEncodingMacRoman if HFS standard is not supported. */ + return kTextEncodingMacRoman; +} + +#endif /* CONFIG_HFS_STD */ + __private_extern__ u_int32_t diff --git a/bsd/hfs/hfs_encodings.c b/bsd/hfs/hfs_encodings.c index 13c9781f8..65fae3049 100644 --- a/bsd/hfs/hfs_encodings.c +++ b/bsd/hfs/hfs_encodings.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,6 +25,10 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ + +#include +#include + #if HFS #include @@ -51,8 +55,6 @@ SLIST_HEAD(encodinglst, hfs_encoding) hfs_encoding_list = {0}; lck_mtx_t encodinglst_mutex; - - /* hfs encoding converter entry */ struct hfs_encoding { SLIST_ENTRY(hfs_encoding) link; @@ -65,7 +67,9 @@ struct hfs_encoding { #define MAX_HFS_UNICODE_CHARS (15*5) +#if CONFIG_HFS_STD static int unicode_to_mac_roman(UniChar *uni_str, u_int32_t unicodeChars, Str31 hfs_str); +#endif void hfs_converterinit(void) @@ -78,14 +82,62 @@ hfs_converterinit(void) lck_mtx_init(&encodinglst_mutex, encodinglst_lck_grp, encodinglst_lck_attr); +#if CONFIG_HFS_STD /* * add resident MacRoman converter and take a reference - * since its always "loaded". + * since its always "loaded". MacRoman is the default converter + * for HFS standard volumes. + * + * Only do this if we are actually supporting HFS standard + * volumes. The converter is not used on configurations + * that do not support HFS standard. */ hfs_addconverter(0, kTextEncodingMacRoman, mac_roman_to_unicode, unicode_to_mac_roman); SLIST_FIRST(&hfs_encoding_list)->refcount++; +#endif + +} + +#if !CONFIG_HFS_STD + +/* + * Function stubs are needed for KPI export. + * It is a little swizzly to have two separate copies of the stub functions in this file + * but the prototypes of these functions are different if we're using the real headers + * vs. the dummy prototypes at the end of the file. (hfs_to_unicode_func_t vs. void*) + * + * As a result, we need our own copies in the no-HFS-Standard configuration + */ +int hfs_addconverter( __unused int id, + __unused u_int32_t encoding, + __unused hfs_to_unicode_func_t get_unicode, + __unused unicode_to_hfs_func_t get_hfsname ) +{ + return(0); +} + +int hfs_getconverter( __unused u_int32_t encoding, + __unused hfs_to_unicode_func_t *get_unicode, + __unused unicode_to_hfs_func_t *get_hfsname) +{ + return(EINVAL); +} + +int hfs_relconverter(__unused u_int32_t encoding) +{ + return(EINVAL); } +int hfs_remconverter(__unused int id, __unused u_int32_t encoding) +{ + return(0); +} + +#else + +/* + * For configurations that do support HFS standard, we need all of these.. + */ /* * hfs_addconverter - add an HFS encoding converter @@ -225,7 +277,6 @@ hfs_relconverter(u_int32_t encoding) return (EINVAL); } - /* * Convert HFS encoded string into UTF-8 * @@ -269,7 +320,6 @@ hfs_to_utf8(ExtendedVCB *vcb, const Str31 hfs_str, ByteCount maxDstLen, ByteCoun return error; } - /* * When an HFS name cannot be encoded with the current * volume encoding then MacRoman is used as a fallback. @@ -310,7 +360,6 @@ mac_roman_to_utf8(const Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDst return error; } - /* * Convert Unicode string into HFS encoding * @@ -350,6 +399,7 @@ utf8_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, const unsigned char* srcStr, Str return error; } + int utf8_to_mac_roman(ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr) { @@ -688,17 +738,22 @@ mac_roman_to_unicode(const Str31 hfs_str, UniChar *uni_str, return noErr; } -#else /* not HFS - temp workaround until 4277828 is fixed */ -/* stubs for exported routines that aren't present when we build kernel without HFS */ +#endif /* CONFIG_STD_HFS */ -#include -#include +#else /* not HFS */ +/* + * These function prototypes are here because hfs.h is not #included + * so its prototypes are not provided. These are needed because they are exported + * as KPI for the conversion subroutines during mounting of HFS standard. + */ int hfs_addconverter(int id, u_int32_t encoding, void * get_unicode, void * get_hfsname); int hfs_getconverter(u_int32_t encoding, void *get_unicode, void *get_hfsname); int hfs_relconverter(u_int32_t encoding); int hfs_remconverter(int id, u_int32_t encoding); +/* Function stubs are needed for KPI export */ + int hfs_addconverter( __unused int id, __unused u_int32_t encoding, __unused void * get_unicode, @@ -721,5 +776,6 @@ int hfs_remconverter(__unused int id, __unused u_int32_t encoding) { return(0); } +#endif + -#endif /* HFS */ diff --git a/bsd/hfs/hfs_encodings.h b/bsd/hfs/hfs_encodings.h index 8d9dc4b08..f93ed4666 100644 --- a/bsd/hfs/hfs_encodings.h +++ b/bsd/hfs/hfs_encodings.h @@ -60,7 +60,6 @@ typedef int (* hfs_to_unicode_func_t)(const Str31 hfs_str, UniChar *uni_str, typedef int (* unicode_to_hfs_func_t)(UniChar *uni_str, u_int32_t unicodeChars, Str31 hfs_str); - int hfs_addconverter(int kmod_id, u_int32_t encoding, hfs_to_unicode_func_t get_unicode, unicode_to_hfs_func_t get_hfsname); diff --git a/bsd/hfs/hfs_endian.c b/bsd/hfs/hfs_endian.c index 367785b29..4e7dfea24 100644 --- a/bsd/hfs/hfs_endian.c +++ b/bsd/hfs/hfs_endian.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -50,9 +50,12 @@ * The kSwapBTNodeHeaderRecordOnly "direction" is not valid for these routines. */ int hfs_swap_HFSPlusBTInternalNode (BlockDescriptor *src, HFSCatalogNodeID fileID, enum HFSBTSwapDirection direction); -int hfs_swap_HFSBTInternalNode (BlockDescriptor *src, HFSCatalogNodeID fileID, enum HFSBTSwapDirection direction); void hfs_swap_HFSPlusForkData (HFSPlusForkData *src); +#if CONFIG_HFS_STD +int hfs_swap_HFSBTInternalNode (BlockDescriptor *src, HFSCatalogNodeID fileID, enum HFSBTSwapDirection direction); +#endif + /* * hfs_swap_HFSPlusForkData */ @@ -229,9 +232,12 @@ hfs_swap_BTNode ( if (VTOVCB(vp)->vcbSigWord == kHFSPlusSigWord) { error = hfs_swap_HFSPlusBTInternalNode (src, VTOC(vp)->c_fileid, direction); - } else { + } +#if CONFIG_HFS_STD + else { error = hfs_swap_HFSBTInternalNode (src, VTOC(vp)->c_fileid, direction); } +#endif if (error) goto fail; @@ -926,6 +932,7 @@ hfs_swap_HFSPlusBTInternalNode ( return (0); } +#if CONFIG_HFS_STD int hfs_swap_HFSBTInternalNode ( BlockDescriptor *src, @@ -1216,3 +1223,5 @@ hfs_swap_HFSBTInternalNode ( return (0); } +#endif + diff --git a/bsd/hfs/hfs_format.h b/bsd/hfs/hfs_format.h index 2cf6a0756..52bcd0e20 100644 --- a/bsd/hfs/hfs_format.h +++ b/bsd/hfs/hfs_format.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -30,15 +30,18 @@ #include #include +#include /* - * hfs_format.c + * hfs_format.h * * This file describes the on-disk format for HFS and HFS Plus volumes. * The HFS Plus volume format is desciibed in detail in Apple Technote 1150. * * http://developer.apple.com/technotes/tn/tn1150.html * + * Note: Starting 10.9, definition of struct HFSUniStr255 exists in hfs_unitstr.h + * */ #ifdef __cplusplus @@ -132,17 +135,6 @@ enum { }; -#ifndef _HFSUNISTR255_DEFINED_ -#define _HFSUNISTR255_DEFINED_ -/* Unicode strings are used for HFS Plus file and folder names */ -struct HFSUniStr255 { - u_int16_t length; /* number of unicode characters */ - u_int16_t unicode[255]; /* unicode characters */ -} __attribute__((aligned(2), packed)); -typedef struct HFSUniStr255 HFSUniStr255; -typedef const HFSUniStr255 *ConstHFSUniStr255Param; -#endif /* _HFSUNISTR255_DEFINED_ */ - enum { kHFSMaxVolumeNameChars = 27, kHFSMaxFileNameChars = 31, @@ -245,7 +237,7 @@ struct FndrExtendedFileInfo { u_int32_t date_added; u_int16_t extended_flags; u_int16_t reserved2; - u_int32_t reserved3; + u_int32_t write_gen_counter; } __attribute__((aligned(2), packed)); /* HFS Plus Fork data info - 80 bytes */ @@ -477,9 +469,9 @@ typedef struct HFSPlusCatalogThread HFSPlusCatalogThread; #ifdef __APPLE_API_UNSTABLE /* - These are the types of records in the attribute B-tree. The values were - chosen so that they wouldn't conflict with the catalog record types. -*/ + * These are the types of records in the attribute B-tree. The values were + * chosen so that they wouldn't conflict with the catalog record types. + */ enum { kHFSPlusAttrInlineData = 0x10, /* attributes whose data fits in a b-tree node */ kHFSPlusAttrForkData = 0x20, /* extent based attributes (data lives in extents) */ @@ -488,11 +480,11 @@ enum { /* - HFSPlusAttrForkData - For larger attributes, whose value is stored in allocation blocks. - If the attribute has more than 8 extents, there will be additional - records (of type HFSPlusAttrExtents) for this attribute. -*/ + * HFSPlusAttrForkData + * For larger attributes, whose value is stored in allocation blocks. + * If the attribute has more than 8 extents, there will be additional + * records (of type HFSPlusAttrExtents) for this attribute. + */ struct HFSPlusAttrForkData { u_int32_t recordType; /* == kHFSPlusAttrForkData*/ u_int32_t reserved; @@ -501,10 +493,10 @@ struct HFSPlusAttrForkData { typedef struct HFSPlusAttrForkData HFSPlusAttrForkData; /* - HFSPlusAttrExtents - This record contains information about overflow extents for large, - fragmented attributes. -*/ + * HFSPlusAttrExtents + * This record contains information about overflow extents for large, + * fragmented attributes. + */ struct HFSPlusAttrExtents { u_int32_t recordType; /* == kHFSPlusAttrExtents*/ u_int32_t reserved; @@ -537,7 +529,7 @@ struct HFSPlusAttrInlineData { typedef struct HFSPlusAttrInlineData HFSPlusAttrInlineData; -/* A generic Attribute Record*/ +/* A generic Attribute Record */ union HFSPlusAttrRecord { u_int32_t recordType; HFSPlusAttrInlineData inlineData; /* NOT USED */ @@ -597,18 +589,23 @@ enum { kHFSUnusedNodeFixBit = 31, /* Unused nodes in the Catalog B-tree have been zero-filled. See Radar #6947811. */ kHFSContentProtectionBit = 30, /* Volume has per-file content protection */ - kHFSVolumeHardwareLockMask = 1 << kHFSVolumeHardwareLockBit, - kHFSVolumeUnmountedMask = 1 << kHFSVolumeUnmountedBit, - kHFSVolumeSparedBlocksMask = 1 << kHFSVolumeSparedBlocksBit, - kHFSVolumeNoCacheRequiredMask = 1 << kHFSVolumeNoCacheRequiredBit, - kHFSBootVolumeInconsistentMask = 1 << kHFSBootVolumeInconsistentBit, - kHFSCatalogNodeIDsReusedMask = 1 << kHFSCatalogNodeIDsReusedBit, - kHFSVolumeJournaledMask = 1 << kHFSVolumeJournaledBit, - kHFSVolumeInconsistentMask = 1 << kHFSVolumeInconsistentBit, - kHFSVolumeSoftwareLockMask = 1 << kHFSVolumeSoftwareLockBit, - kHFSUnusedNodeFixMask = 1 << kHFSUnusedNodeFixBit, - kHFSContentProtectionMask = 1 << kHFSContentProtectionBit, - kHFSMDBAttributesMask = 0x8380 + /*** Keep these in sync with the bits above ! ****/ + kHFSVolumeHardwareLockMask = 0x00000080, + kHFSVolumeUnmountedMask = 0x00000100, + kHFSVolumeSparedBlocksMask = 0x00000200, + kHFSVolumeNoCacheRequiredMask = 0x00000400, + kHFSBootVolumeInconsistentMask = 0x00000800, + kHFSCatalogNodeIDsReusedMask = 0x00001000, + kHFSVolumeJournaledMask = 0x00002000, + kHFSVolumeInconsistentMask = 0x00004000, + kHFSVolumeSoftwareLockMask = 0x00008000, + + /* Bits 16-31 are allocated from high to low */ + + kHFSContentProtectionMask = 0x40000000, + kHFSUnusedNodeFixMask = 0x80000000, + + kHFSMDBAttributesMask = 0x8380 }; enum { diff --git a/bsd/hfs/hfs_fsctl.h b/bsd/hfs/hfs_fsctl.h index d19d8e7d4..ccd86e8d3 100644 --- a/bsd/hfs/hfs_fsctl.h +++ b/bsd/hfs/hfs_fsctl.h @@ -92,6 +92,12 @@ struct hfs_journal_info { #define HFSIOC_GETPATH _IOWR('h', 13, pathname_t) #define HFS_GETPATH IOCBASECMD(HFSIOC_GETPATH) +/* By default, the path returned by HFS_GETPATH is an absolute path, + * i.e. it also contains the mount point of the volume on which the + * fileID exists. If the following bit is set, the path returned is + * relative to the root of the volume. + */ +#define HFS_GETPATH_VOLUME_RELATIVE 0x1 /* Enable/disable extent-based extended attributes */ #define HFSIOC_SET_XATTREXTENTS_STATE _IOW('h', 14, u_int32_t) @@ -139,6 +145,9 @@ struct hfs_journal_info { #define HFSIOC_GET_DESIRED_DISK _IOR('h', 29, u_int32_t) #define HFS_FSCTL_GET_DESIRED_DISK IOCBASECMD(HFSIOC_GET_DESIRED_DISK) +#define HFSIOC_GET_WRITE_GEN_COUNTER _IOR('h', 30, u_int32_t) +#define HFS_GET_WRITE_GEN_COUNTER IOCBASECMD(HFSIOC_GET_WRITE_GEN_COUNTER) + #endif /* __APPLE_API_UNSTABLE */ #endif /* ! _HFS_FSCTL_H_ */ diff --git a/bsd/hfs/hfs_hotfiles.c b/bsd/hfs/hfs_hotfiles.c index 50a29e223..bde852475 100644 --- a/bsd/hfs/hfs_hotfiles.c +++ b/bsd/hfs/hfs_hotfiles.c @@ -468,7 +468,7 @@ hfs_recording_suspend(struct hfsmount *hfsmp) error = EINVAL; goto out; } - if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) { + if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) { error = EPERM; goto end_transaction; } @@ -615,7 +615,7 @@ hfs_recording_init(struct hfsmount *hfsmp) error = EINVAL; goto out1; } - if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) { + if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) { error = EPERM; goto out0; } @@ -792,10 +792,17 @@ hfs_addhotfile_internal(struct vnode *vp) if (hfsmp->hfc_stage != HFC_RECORDING) return (0); - /* Only regular files are allowed for hotfile inclusion ; symlinks disallowed */ - if ((!vnode_isreg(vp)) || vnode_issystem(vp)) { + /* + * Only regular files are eligible for hotfiles addition. + * + * Symlinks were previously added to the list and may exist in + * extant hotfiles regions, but no new ones will be added, and no + * symlinks will now be relocated/evicted from the hotfiles region. + */ + if (!vnode_isreg(vp) || vnode_issystem(vp)) { return (0); } + /* Skip resource forks for now. */ if (VNODE_IS_RSRC(vp)) { return (0); @@ -863,7 +870,6 @@ hfs_removehotfile(struct vnode *vp) if (hfsmp->hfc_stage != HFC_RECORDING) return (0); - /* Only regular files can move out of hotfiles */ if ((!vnode_isreg(vp)) || vnode_issystem(vp)) { return (0); } @@ -987,7 +993,7 @@ hotfiles_refine(struct hfsmount *hfsmp) error = EINVAL; goto out; } - if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) { + if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) { error = EPERM; goto out1; } @@ -1092,7 +1098,7 @@ hotfiles_adopt(struct hfsmount *hfsmp) if (hfsmp->hfc_stage != HFC_ADOPTION) { return (EBUSY); } - if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) { + if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) { return (EPERM); } @@ -1140,9 +1146,8 @@ hotfiles_adopt(struct hfsmount *hfsmp) } break; } - - /* only regular files are eligible */ - if (!vnode_isreg(vp)) { + if (!vnode_isreg(vp)) { + /* Symlinks are ineligible for adoption into the hotfile zone. */ printf("hfs: hotfiles_adopt: huh, not a file %d (%d)\n", listp->hfl_hotfile[i].hf_fileid, VTOC(vp)->c_cnid); hfs_unlock(VTOC(vp)); vnode_put(vp); @@ -1303,7 +1308,7 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx) if ((listp = (hotfilelist_t *)hfsmp->hfc_recdata) == NULL) return (0); - if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) { + if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) { return (EPERM); } @@ -1366,13 +1371,17 @@ hotfiles_evict(struct hfsmount *hfsmp, vfs_context_t ctx) break; } - /* only regular files are eligible */ + /* + * Symlinks that may have been inserted into the hotfile zone during a previous OS are now stuck + * here. We do not want to move them. + */ if (!vnode_isreg(vp)) { printf("hfs: hotfiles_evict: huh, not a file %d\n", key->fileID); hfs_unlock(VTOC(vp)); vnode_put(vp); goto delete; /* invalid entry, go to next */ } + fileblocks = VTOF(vp)->ff_blocks; if ((blksmoved > 0) && (blksmoved + fileblocks) > HFC_BLKSPERSYNC) { @@ -1527,7 +1536,7 @@ hotfiles_age(struct hfsmount *hfsmp) error = EINVAL; goto out2; } - if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK) != 0) { + if (hfs_lock(VTOC(hfsmp->hfc_filevp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) { error = EPERM; goto out1; } @@ -1703,7 +1712,7 @@ hfc_btree_open(struct hfsmount *hfsmp, struct vnode **vpp) lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - error = cat_lookup(hfsmp, &cdesc, 0, &cdesc, &cattr, &cfork, NULL); + error = cat_lookup(hfsmp, &cdesc, 0, 0, &cdesc, &cattr, &cfork, NULL); hfs_systemfile_unlock(hfsmp, lockflags); @@ -1770,7 +1779,7 @@ hfc_btree_close(struct hfsmount *hfsmp, struct vnode *vp) } if (vnode_get(vp) == 0) { - error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK); + error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); if (error == 0) { (void) hfs_fsync(vp, MNT_WAIT, 0, p); error = BTClosePath(VTOF(vp)); @@ -1838,7 +1847,7 @@ hfc_btree_create(struct hfsmount *hfsmp, unsigned int nodesize, unsigned int ent vnode_put(dvp); dvp = NULL; } - if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) { + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { goto out; } cp = VTOC(vp); diff --git a/bsd/hfs/hfs_kdebug.h b/bsd/hfs/hfs_kdebug.h index d3202bca4..f8451a64c 100644 --- a/bsd/hfs/hfs_kdebug.h +++ b/bsd/hfs/hfs_kdebug.h @@ -28,6 +28,10 @@ enum { HFSDBG_RESET_EXTENT_CACHE = HFSDBG_CODE(16), /* 0x03080040 */ HFSDBG_REMOVE_EXTENT_CACHE = HFSDBG_CODE(17), /* 0x03080044 */ HFSDBG_ADD_EXTENT_CACHE = HFSDBG_CODE(18), /* 0x03080048 */ + HFSDBG_READ_BITMAP_RANGE = HFSDBG_CODE(19), /* 0x0308004C */ + HFSDBG_RELEASE_SCAN_BITMAP = HFSDBG_CODE(20), /* 0x03080050 */ + HFSDBG_SYNCER = HFSDBG_CODE(21), /* 0x03080054 */ + HFSDBG_SYNCER_TIMED = HFSDBG_CODE(22), /* 0x03080058 */ }; /* @@ -51,4 +55,7 @@ enum { HFSDBG_READ_BITMAP_BLOCK startBlock, 0, 0, 0 ... err, 0, 0, 0 HFSDBG_RELEASE_BITMAP_BLOCK dirty, 0, 0, 0 ... 0, 0, 0, 0 HFSDBG_RESET_EXTENT_CACHE 0, 0, 0, 0 ... 0, 0, 0, 0 + HFSDBG_READ_BITMAP_RANGE startBlock, iosize, 0, 0 ... err, 0, 0, 0 + HFSDBG_RELEASE_SCAN_BITMAP 0, 0, 0, 0, ... 0, 0, 0, 0 + */ diff --git a/bsd/hfs/hfs_link.c b/bsd/hfs/hfs_link.c index 43b4a26e4..73afb95d3 100644 --- a/bsd/hfs/hfs_link.c +++ b/bsd/hfs/hfs_link.c @@ -212,8 +212,8 @@ hfs_makelink(struct hfsmount *hfsmp, struct vnode *src_vp, struct cnode *cp, &to_desc, NULL); if (retval != 0 && retval != EEXIST) { - printf("hfs_makelink: cat_rename to %s failed (%d). fileid %d\n", - inodename, retval, cp->c_fileid); + printf("hfs_makelink: cat_rename to %s failed (%d) fileid=%d, vol=%s\n", + inodename, retval, cp->c_fileid, hfsmp->vcbVN); } } while ((retval == EEXIST) && (type == FILE_HARDLINKS)); if (retval) @@ -428,6 +428,7 @@ hfs_vnop_link(struct vnop_link_args *ap) return (EPERM); } if (v_type == VDIR) { +#if CONFIG_HFS_DIRLINK /* Make sure our private directory exists. */ if (hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid == 0) { return (EPERM); @@ -444,6 +445,10 @@ hfs_vnop_link(struct vnop_link_args *ap) if ((error = hfs_vget(hfsmp, hfs_currentparent(VTOC(vp)), &fdvp, 1, 0))) { return (error); } +#else + /* some platforms don't support directory hardlinks. */ + return EPERM; +#endif } else { /* Make sure our private directory exists. */ if (hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid == 0) { @@ -531,7 +536,7 @@ hfs_vnop_link(struct vnop_link_args *ap) lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); /* If destination exists then we lost a race with create. */ - if (cat_lookup(hfsmp, &todesc, 0, NULL, NULL, NULL, NULL) == 0) { + if (cat_lookup(hfsmp, &todesc, 0, 0, NULL, NULL, NULL, NULL) == 0) { error = EEXIST; goto out; } @@ -548,7 +553,7 @@ hfs_vnop_link(struct vnop_link_args *ap) cnid_t fileid; /* If source is missing then we lost a race with unlink. */ - if ((cat_lookup(hfsmp, &cp->c_desc, 0, NULL, NULL, NULL, &fileid) != 0) || + if ((cat_lookup(hfsmp, &cp->c_desc, 0, 0, NULL, NULL, NULL, &fileid) != 0) || (fileid != cp->c_fileid)) { error = ENOENT; goto out; @@ -600,7 +605,7 @@ hfs_vnop_link(struct vnop_link_args *ap) /* Set kHFSHasChildLinkBit in the destination hierarchy */ error = cat_set_childlinkbit(hfsmp, tdcp->c_parentcnid); if (error) { - printf ("hfs_vnop_link: error updating destination parent chain for %u\n", tdcp->c_cnid); + printf ("hfs_vnop_link: error updating destination parent chain for id=%u, vol=%s\n", tdcp->c_cnid, hfsmp->vcbVN); error = 0; } } @@ -629,7 +634,7 @@ hfs_vnop_link(struct vnop_link_args *ap) /* Set kHFSHasChildLinkBit in the source hierarchy */ error = cat_set_childlinkbit(hfsmp, fdcp->c_parentcnid); if (error) { - printf ("hfs_vnop_link: error updating source parent chain for %u\n", fdcp->c_cnid); + printf ("hfs_vnop_link: error updating source parent chain for id=%u, vol=%s\n", fdcp->c_cnid, hfsmp->vcbVN); error = 0; } } @@ -848,6 +853,20 @@ hfs_unlink(struct hfsmount *hfsmp, struct vnode *dvp, struct vnode *vp, struct c if (nextlinkid) { (void) cat_update_siblinglinks(hfsmp, nextlinkid, prevlinkid, HFS_IGNORABLE_LINK); } + + /* + * The call to cat_releasedesc below will only release the name buffer; + * it does not zero out the rest of the fields in the 'cat_desc' data structure. + * + * As a result, since there are still other links at this point, we need + * to make the current cnode descriptor point to the raw inode. If a path-based + * system call comes along first, it will replace the descriptor with a valid link + * ID. If a userland process already has a file descriptor open, then they will + * bypass that lookup, though. Replacing the descriptor CNID with the raw + * inode will force it to generate a new full path. + */ + cp->c_cnid = cp->c_fileid; + } /* Push new link count to disk. */ @@ -927,7 +946,7 @@ hfs_privatedir_init(struct hfsmount * hfsmp, enum privdirtype type) priv_descp->cd_flags = CD_ISDIR | CD_DECOMPOSED; lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - error = cat_lookup(hfsmp, priv_descp, 0, NULL, priv_attrp, NULL, NULL); + error = cat_lookup(hfsmp, priv_descp, 0, 0, NULL, priv_attrp, NULL, NULL); hfs_systemfile_unlock(hfsmp, lockflags); if (error == 0) { @@ -971,7 +990,8 @@ hfs_privatedir_init(struct hfsmount * hfsmp, enum privdirtype type) } trans = 1; - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); + /* Need the catalog and EA b-trees for CNID acquisition */ + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); /* Make sure there's space in the Catalog file. */ if (cat_preflight(hfsmp, CAT_CREATE, NULL, 0) != 0) { @@ -979,8 +999,15 @@ hfs_privatedir_init(struct hfsmount * hfsmp, enum privdirtype type) goto exit; } + /* Get the CNID for use */ + cnid_t new_id; + if ((error = cat_acquire_cnid(hfsmp, &new_id))) { + hfs_systemfile_unlock (hfsmp, lockflags); + goto exit; + } + /* Create the private directory on disk. */ - error = cat_create(hfsmp, priv_descp, priv_attrp, NULL); + error = cat_create(hfsmp, new_id, priv_descp, priv_attrp, NULL); if (error == 0) { priv_descp->cd_cnid = priv_attrp->ca_fileid; @@ -1043,6 +1070,30 @@ hfs_lookup_siblinglinks(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t *prevl return (error); } + +/* Find the oldest / last hardlink in the link chain */ +int +hfs_lookup_lastlink (struct hfsmount *hfsmp, cnid_t linkfileid, + cnid_t *lastid, struct cat_desc *cdesc) { + int lockflags; + int error; + + *lastid = 0; + + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + + error = cat_lookup_lastlink(hfsmp, linkfileid, lastid, cdesc); + + hfs_systemfile_unlock(hfsmp, lockflags); + + /* + * cat_lookup_lastlink will zero out the lastid/cdesc arguments as needed + * upon error cases. + */ + return error; +} + + /* * Cache the origin of a directory or file hard link * diff --git a/bsd/hfs/hfs_lookup.c b/bsd/hfs/hfs_lookup.c index 2200fe1de..c2599a8e3 100644 --- a/bsd/hfs/hfs_lookup.c +++ b/bsd/hfs/hfs_lookup.c @@ -151,7 +151,7 @@ * When should we lock parent_hp in here ?? */ static int -hfs_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int *cnode_locked) +hfs_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int *cnode_locked, int force_casesensitive_lookup) { struct cnode *dcp; /* cnode for directory being searched */ struct vnode *tvp; /* target vnode */ @@ -190,7 +190,7 @@ hfs_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int cnp->cn_flags &= ~MAKEENTRY; goto found; /* We always know who we are */ } else { - if (hfs_lock(VTOC(dvp), HFS_EXCLUSIVE_LOCK) != 0) { + if (hfs_lock(VTOC(dvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) { retval = ENOENT; /* The parent no longer exists ? */ goto exit; } @@ -207,10 +207,15 @@ hfs_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int goto retry; } - /* No need to go to catalog if there are no children */ - if (dcp->c_entries == 0) { - goto notfound; - } + + /* + * We shouldn't need to go to the catalog if there are no children. + * However, in the face of a minor disk corruption where the valence of + * the directory is off, we could infinite loop here if we return ENOENT + * even though there are actually items in the directory. (create will + * see the ENOENT, try to create something, which will return with + * EEXIST over and over again). As a result, always check the catalog. + */ bzero(&cndesc, sizeof(cndesc)); cndesc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr; @@ -220,7 +225,7 @@ hfs_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - retval = cat_lookup(hfsmp, &cndesc, 0, &desc, &attr, &fork, NULL); + retval = cat_lookup(hfsmp, &cndesc, 0, force_casesensitive_lookup, &desc, &attr, &fork, NULL); hfs_systemfile_unlock(hfsmp, lockflags); @@ -242,7 +247,7 @@ hfs_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int goto found; } -notfound: + /* * ENAMETOOLONG supersedes other errors * @@ -255,6 +260,25 @@ notfound: retval = ENAMETOOLONG; } else if (retval == 0) { retval = ENOENT; + } else if (retval == ERESERVEDNAME) { + /* + * We found the name in the catalog, but it is unavailable + * to us. The exact error to return to our caller depends + * on the operation, and whether we've already reached the + * last path component. In all cases, avoid a negative + * cache entry, since someone else may be able to access + * the name if their lookup is configured differently. + */ + + cnp->cn_flags &= ~MAKEENTRY; + + if (((flags & ISLASTCN) == 0) || ((nameiop == LOOKUP) || (nameiop == DELETE))) { + /* A reserved name for a pure lookup is the same as the path not being present */ + retval = ENOENT; + } else { + /* A reserved name with intent to create must be rejected as impossible */ + retval = EEXIST; + } } if (retval != ENOENT) goto exit; @@ -448,7 +472,9 @@ hfs_vnop_lookup(struct vnop_lookup_args *ap) int error; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; + struct proc *p = vfs_context_proc(ap->a_context); int flags = cnp->cn_flags; + int force_casesensitive_lookup = proc_is_forcing_hfs_case_sensitivity(p); int cnode_locked; *vpp = NULL; @@ -498,15 +524,36 @@ hfs_vnop_lookup(struct vnop_lookup_args *ap) * getattrlist calls to return the correct link info. */ - if ((flags & ISLASTCN) && (cp->c_flag & C_HARDLINK)) { + /* + * Alternatively, if we are forcing a case-sensitive lookup + * on a case-insensitive volume, the namecache entry + * may have been for an incorrect case. Since we cannot + * determine case vs. normalization, redrive the catalog + * lookup based on any byte mismatch. + */ + if (((flags & ISLASTCN) && (cp->c_flag & C_HARDLINK)) + || (force_casesensitive_lookup && !(hfsmp->hfs_flags & HFS_CASE_SENSITIVE))) { int stale_link = 0; - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); if ((cp->c_parentcnid != dcp->c_cnid) || + (cnp->cn_namelen != cp->c_desc.cd_namelen) || (bcmp(cnp->cn_nameptr, cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen) != 0)) { struct cat_desc desc; struct cat_attr lookup_attr; int lockflags; + + if (force_casesensitive_lookup && !(hfsmp->hfs_flags & HFS_CASE_SENSITIVE)) { + /* + * Since the name in the cnode doesn't match our lookup + * string exactly, do a full lookup. + */ + hfs_unlock (cp); + + vnode_put(vp); + goto lookup; + } + /* * Get an updated descriptor */ @@ -541,7 +588,7 @@ hfs_vnop_lookup(struct vnop_lookup_args *ap) lockflags = hfs_systemfile_lock(VTOHFS(dvp), SFL_CATALOG, HFS_SHARED_LOCK); - error = cat_lookup(VTOHFS(vp), &desc, 0, &desc, &lookup_attr, NULL, NULL); + error = cat_lookup(VTOHFS(vp), &desc, 0, 0, &desc, &lookup_attr, NULL, NULL); hfs_systemfile_unlock(VTOHFS(dvp), lockflags); @@ -604,7 +651,7 @@ lookup: */ cnode_locked = 0; - error = hfs_lookup(dvp, vpp, cnp, &cnode_locked); + error = hfs_lookup(dvp, vpp, cnp, &cnode_locked, force_casesensitive_lookup); if (cnode_locked) hfs_unlock(VTOC(*vpp)); @@ -623,7 +670,7 @@ exit: * completed and throttling at the systemcall return */ if (__improbable(ut->uu_lowpri_window)) { - throttle_lowpri_io(TRUE); + throttle_lowpri_io(1); } } diff --git a/bsd/hfs/hfs_notification.c b/bsd/hfs/hfs_notification.c index 2423db07a..c63d6307a 100644 --- a/bsd/hfs/hfs_notification.c +++ b/bsd/hfs/hfs_notification.c @@ -58,16 +58,14 @@ void hfs_generate_volume_notifications(struct hfsmount *hfsmp) { fsid_t fsid; u_int32_t freeblks, state=999; - char *volname = NULL; - if (hfsmp->vcbVN) { - if (strlen((char*)hfsmp->vcbVN) < 256) { - volname = (char*) hfsmp->vcbVN; - } + /* Do not generate low disk notifications for read-only volumes */ + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return; } - fsid.val[0] = (long)hfsmp->hfs_raw_dev; - fsid.val[1] = (long)vfs_typenum(HFSTOVFS(hfsmp)); + fsid.val[0] = hfsmp->hfs_raw_dev; + fsid.val[1] = vfs_typenum(HFSTOVFS(hfsmp)); freeblks = hfs_freeblks(hfsmp, 1); @@ -79,14 +77,10 @@ void hfs_generate_volume_notifications(struct hfsmount *hfsmp) state = 0; } + /* Free blocks are less than dangerlimit for the first time */ if (state == 2 && !(hfsmp->hfs_notification_conditions & VQ_VERYLOWDISK)) { /* Dump some logging to track down intermittent issues */ - if (volname) { - printf("HFS: Vol: %s Very Low Disk: freeblks: %d, dangerlimit: %d\n", volname, freeblks, hfsmp->hfs_freespace_notify_dangerlimit); - } - else { - printf("HFS: Very Low Disk: freeblks: %d, dangerlimit: %d\n", freeblks, hfsmp->hfs_freespace_notify_dangerlimit); - } + printf("hfs: set VeryLowDisk: vol:%s, freeblks:%d, dangerlimit:%d\n", hfsmp->vcbVN, freeblks, hfsmp->hfs_freespace_notify_dangerlimit); #if HFS_SPARSE_DEV if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { @@ -94,12 +88,7 @@ void hfs_generate_volume_notifications(struct hfsmount *hfsmp) struct mount *mp = vnode_mount (hfsmp->hfs_backingfs_rootvp); /* If we're a sparse device, dump some info about the backing store... */ if (mp) { - if (volname) { - printf("HFS: Vol: %s Very Low Disk: backingstore b_avail %lld, tag %d\n", volname, mp->mnt_vfsstat.f_bavail, hfsmp->hfs_backingfs_rootvp->v_tag); - } - else { - printf("HFS: Very Low Disk: backingstore b_avail %lld, tag %d\n", mp->mnt_vfsstat.f_bavail, hfsmp->hfs_backingfs_rootvp->v_tag); - } + printf("hfs: set VeryLowDisk: vol:%s, backingstore b_avail:%lld, tag:%d\n", hfsmp->vcbVN, mp->mnt_vfsstat.f_bavail, hfsmp->hfs_backingfs_rootvp->v_tag); } } } @@ -107,21 +96,26 @@ void hfs_generate_volume_notifications(struct hfsmount *hfsmp) hfsmp->hfs_notification_conditions |= (VQ_VERYLOWDISK|VQ_LOWDISK); vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, (intptr_t)NULL); } else if (state == 1) { + /* Free blocks are less than warning limit for the first time */ if (!(hfsmp->hfs_notification_conditions & VQ_LOWDISK)) { - if (volname) { - printf("HFS: Low Disk: Vol: %s freeblks: %d, warninglimit: %d\n", volname, freeblks, hfsmp->hfs_freespace_notify_warninglimit); - } - else { - printf("HFS: Low Disk: freeblks: %d, warninglimit: %d\n", freeblks, hfsmp->hfs_freespace_notify_warninglimit); - } + printf("hfs: set LowDisk: vol:%s, freeblks:%d, warninglimit:%d\n", hfsmp->vcbVN, freeblks, hfsmp->hfs_freespace_notify_warninglimit); hfsmp->hfs_notification_conditions |= VQ_LOWDISK; vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, (intptr_t)NULL); } else if (hfsmp->hfs_notification_conditions & VQ_VERYLOWDISK) { + /* Free blocks count has increased from danger limit to warning limit, so just clear VERYLOWDISK warning */ + printf("hfs: clear VeryLowDisk: vol:%s, freeblks:%d, dangerlimit:%d\n", hfsmp->vcbVN, freeblks, hfsmp->hfs_freespace_notify_dangerlimit); hfsmp->hfs_notification_conditions &= ~VQ_VERYLOWDISK; vfs_event_signal(&fsid, hfsmp->hfs_notification_conditions, (intptr_t)NULL); } } else if (state == 0) { + /* Free blocks count has increased to desirable level, so clear all conditions */ if (hfsmp->hfs_notification_conditions & (VQ_LOWDISK|VQ_VERYLOWDISK)) { + if (hfsmp->hfs_notification_conditions & VQ_LOWDISK) { + printf("hfs: clear LowDisk: vol:%s, freeblks:%d, warninglimit:%d, desiredlevel:%d\n", hfsmp->vcbVN, freeblks, hfsmp->hfs_freespace_notify_warninglimit, hfsmp->hfs_freespace_notify_desiredlevel); + } + if (hfsmp->hfs_notification_conditions & VQ_VERYLOWDISK) { + printf("hfs: clear VeryLowDisk: vol:%s, freeblks:%d, dangerlimit:%d\n", hfsmp->vcbVN, freeblks, hfsmp->hfs_freespace_notify_warninglimit); + } hfsmp->hfs_notification_conditions &= ~(VQ_VERYLOWDISK|VQ_LOWDISK); if (hfsmp->hfs_notification_conditions == 0) { vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL); diff --git a/bsd/hfs/hfs_quota.c b/bsd/hfs/hfs_quota.c index 7224278c7..989bd67bf 100644 --- a/bsd/hfs/hfs_quota.c +++ b/bsd/hfs/hfs_quota.c @@ -572,9 +572,9 @@ hfs_quotaon(p, mp, type, fnamep) goto out; } vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_QUOTA)); - HFS_MOUNT_LOCK(hfsmp, TRUE) + hfs_lock_mount (hfsmp); hfsmp->hfs_flags |= HFS_QUOTAS; - HFS_MOUNT_UNLOCK(hfsmp, TRUE); + hfs_unlock_mount (hfsmp); vnode_setnoflush(vp); /* * Save the credential of the process that turned on quotas. @@ -704,9 +704,9 @@ hfs_quotaoff(__unused struct proc *p, struct mount *mp, register int type) break; if (type == MAXQUOTAS) { vfs_clearflags(mp, (u_int64_t)((unsigned int)MNT_QUOTA)); - HFS_MOUNT_LOCK(hfsmp, TRUE) + hfs_lock_mount (hfsmp); hfsmp->hfs_flags &= ~HFS_QUOTAS; - HFS_MOUNT_UNLOCK(hfsmp, TRUE); + hfs_unlock_mount (hfsmp); } qf_put(qfp, QTF_CLOSING); diff --git a/bsd/hfs/hfs_readwrite.c b/bsd/hfs/hfs_readwrite.c index b9bcdd036..a3f653fc4 100644 --- a/bsd/hfs/hfs_readwrite.c +++ b/bsd/hfs/hfs_readwrite.c @@ -84,12 +84,15 @@ enum { /* from bsd/hfs/hfs_vfsops.c */ extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); -static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *); static int hfs_clonefile(struct vnode *, int, int, int); static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *); static int hfs_minorupdate(struct vnode *vp); static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context); +/* from bsd/hfs/hfs_vnops.c */ +extern decmpfs_cnode* hfs_lazy_init_decmpfs_cnode (struct cnode *cp); + + int flush_cache_on_write = 0; SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW | CTLFLAG_LOCKED, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files"); @@ -136,6 +139,8 @@ hfs_vnop_read(struct vnop_read_args *ap) if (offset < 0) return (EINVAL); /* cant read from a negative offset */ + + #if HFS_COMPRESSION if (VNODE_IS_RSRC(vp)) { if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */ @@ -185,9 +190,7 @@ hfs_vnop_read(struct vnop_read_args *ap) /* * If this read request originated from a syscall (as opposed to * an in-kernel page fault or something), then set it up for - * throttle checks. For example, large EAs may cause a VNOP_READ - * to occur, and we wouldn't want to throttle I/O while holding the - * EA B-Tree lock. + * throttle checks */ if (ap->a_ioflag & IO_SYSCALL_DISPATCH) { io_throttle = IO_RETURN_ON_THROTTLE; @@ -196,7 +199,7 @@ hfs_vnop_read(struct vnop_read_args *ap) read_again: /* Protect against a size change. */ - hfs_lock_truncate(cp, HFS_SHARED_LOCK); + hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); took_truncate_lock = 1; filesize = fp->ff_size; @@ -212,7 +215,7 @@ read_again: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START, (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0); - retval = cluster_read(vp, uio, filesize, ap->a_ioflag | (io_throttle)); + retval = cluster_read(vp, uio, filesize, ap->a_ioflag |io_throttle); cp->c_touch_acctime = TRUE; @@ -230,7 +233,7 @@ read_again: /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */ if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) { - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); took_cnode_lock = 1; } /* @@ -251,7 +254,7 @@ read_again: } exit: if (took_truncate_lock) { - hfs_unlock_truncate(cp, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); } if (retval == EAGAIN) { throttle_lowpri_io(1); @@ -324,7 +327,6 @@ hfs_vnop_write(struct vnop_write_args *ap) #endif - // LP64todo - fix this! uio_resid may be 64-bit value resid = uio_resid(uio); offset = uio_offset(uio); @@ -359,10 +361,11 @@ hfs_vnop_write(struct vnop_write_args *ap) } #endif /* HFS_SPARSE_DEV */ - if ((ioflag & (IO_SINGLE_WRITER | IO_RETURN_ON_THROTTLE)) == - (IO_SINGLE_WRITER | IO_RETURN_ON_THROTTLE)) { + if ((ioflag & (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) == + (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) { io_return_on_throttle = IO_RETURN_ON_THROTTLE; } + again: /* Protect against a size change. */ /* @@ -373,10 +376,10 @@ again: * start. */ if (ioflag & IO_APPEND || took_truncate_lock) { - hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK); + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); } else { - hfs_lock_truncate(cp, HFS_SHARED_LOCK); + hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); } took_truncate_lock = 1; @@ -438,11 +441,15 @@ again: } } - if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) { + if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { goto exit; } cnode_locked = 1; + if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { + hfs_incr_gencount (cp); + } + /* * Now that we have the cnode lock, see if there are delayed zero fill ranges * overlapping our write. If so, we need the truncate lock exclusive (see above). @@ -458,7 +465,7 @@ again: */ hfs_unlock(cp); cnode_locked = 0; - hfs_unlock_truncate(cp, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); goto again; } @@ -611,7 +618,7 @@ sizeok: fp->ff_size, inval_start, zero_off, (off_t)0, lflag | IO_HEADZEROFILL | IO_NOZERODIRTY); - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); cnode_locked = 1; if (retval) goto ioerr_exit; offset = uio_offset(uio); @@ -760,7 +767,7 @@ ioerr_exit: cred = vfs_context_ucred(ap->a_context); if (resid > uio_resid(uio) && cred && suser(cred, NULL)) { if (!cnode_locked) { - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); cnode_locked = 1; } cp->c_mode &= ~(S_ISUID | S_ISGID); @@ -769,19 +776,18 @@ ioerr_exit: if (retval) { if (ioflag & IO_UNIT) { if (!cnode_locked) { - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); cnode_locked = 1; } (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC, 0, 0, ap->a_context); - // LP64todo - fix this! resid needs to by user_ssize_t uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio)))); uio_setresid(uio, resid); filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; } } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) { if (!cnode_locked) { - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); cnode_locked = 1; } retval = hfs_update(vp, TRUE); @@ -796,7 +802,7 @@ exit: hfs_unlock(cp); if (took_truncate_lock) { - hfs_unlock_truncate(cp, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); } if (retval == EAGAIN) { throttle_lowpri_io(1); @@ -946,8 +952,6 @@ lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id) } if (cache->numcached > NUM_CACHE_ENTRIES) { - /*printf("hfs: EGAD! numcached is %d... cut our losses and trim to %d\n", - cache->numcached, NUM_CACHE_ENTRIES);*/ cache->numcached = NUM_CACHE_ENTRIES; } @@ -995,11 +999,9 @@ add_node(struct access_cache *cache, int index, cnid_t nodeID, int access) /* if the cache is full, do a replace rather than an insert */ if (cache->numcached >= NUM_CACHE_ENTRIES) { - //printf("hfs: cache is full (%d). replace at index %d\n", cache->numcached, index); cache->numcached = NUM_CACHE_ENTRIES-1; if (index > cache->numcached) { - // printf("hfs: index %d pinned to %d\n", index, cache->numcached); index = cache->numcached; } } @@ -1525,8 +1527,6 @@ do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp, err_exit_bulk_access: - //printf("hfs: on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups); - if (file_ids) kfree(file_ids, sizeof(int) * num_files); if (parents) @@ -1622,6 +1622,7 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { int outlen; char *bufptr; int error; + int flags = 0; /* Caller must be owner of file system. */ vfsp = vfs_statfs(HFSTOVFS(hfsmp)); @@ -1635,6 +1636,9 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { } bufptr = (char *)ap->a_data; cnid = strtoul(bufptr, NULL, 10); + if (ap->a_fflag & HFS_GETPATH_VOLUME_RELATIVE) { + flags |= BUILDPATH_VOLUME_RELATIVE; + } /* We need to call hfs_vfs_vget to leverage the code that will * fix the origin list for us if needed, as opposed to calling @@ -1644,12 +1648,70 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) { return (error); } - error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, 0, context); + error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, flags, context); vnode_put(file_vp); return (error); } + case HFS_GET_WRITE_GEN_COUNTER: + { + struct cnode *cp = NULL; + int error; + u_int32_t *counter = (u_int32_t *)ap->a_data; + + cp = VTOC(vp); + + if (vnode_isdir (vp)) { + error = EISDIR; + *counter = 0; + return error; + } + + error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + if (error == 0) { + struct ubc_info *uip; + int is_mapped = 0; + + if (UBCINFOEXISTS(vp)) { + uip = vp->v_ubcinfo; + if (uip->ui_flags & UI_ISMAPPED) { + is_mapped = 1; + } + } + + + if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { + uint32_t gcount = hfs_get_gencount(cp); + // + // Even though we return EBUSY for files that are mmap'ed + // we also want to bump the value so that the write-gen + // counter will always be different once the file is unmapped + // (since the file may be unmapped but the pageouts have not + // yet happened). + // + if (is_mapped) { + hfs_incr_gencount (cp); + gcount = hfs_get_gencount(cp); + } + + *counter = gcount; + + } + else { + /* not a file or dir? silently return */ + *counter = 0; + } + hfs_unlock (cp); + + if (is_mapped) { + error = EBUSY; + } + } + + return error; + } + case HFS_PREV_LINK: case HFS_NEXT_LINK: { @@ -1744,7 +1806,7 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { if (!vnode_isvroot(vp)) { return (EINVAL); } - HFS_MOUNT_LOCK(hfsmp, TRUE); + hfs_lock_mount(hfsmp); location = *(u_int32_t *)ap->a_data; if ((location >= hfsmp->allocLimit) && (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) { @@ -1768,7 +1830,7 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { } MarkVCBDirty(hfsmp); fail_change_next_allocation: - HFS_MOUNT_UNLOCK(hfsmp, TRUE); + hfs_unlock_mount(hfsmp); return (error); } @@ -1836,7 +1898,7 @@ fail_change_next_allocation: hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize; hfsmp->hfs_sparsebandblks *= 4; - vfs_markdependency(hfsmp->hfs_mp); + /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */ /* * If the sparse image is on a sparse image file (as opposed to a sparse @@ -1907,8 +1969,8 @@ fail_change_next_allocation: /* Must have catalog lock excl. to advance the CNID pointer */ lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG , HFS_EXCLUSIVE_LOCK); - HFS_MOUNT_LOCK(hfsmp, TRUE); - + hfs_lock_mount(hfsmp); + /* If it is less than the current next CNID, force the wraparound bit to be set */ if (fileid < hfsmp->vcbNxtCNID) { wraparound=1; @@ -1924,7 +1986,7 @@ fail_change_next_allocation: } MarkVCBDirty(hfsmp); - HFS_MOUNT_UNLOCK(hfsmp, TRUE); + hfs_unlock_mount(hfsmp); hfs_systemfile_unlock (hfsmp, lockflags); return (error); @@ -2052,7 +2114,7 @@ fail_change_next_allocation: * are enabled by default, so any change will be transient only * till the volume is remounted. */ - if (!is_suser()) { + if (!kauth_cred_issuser(kauth_cred_get())) { return (EPERM); } if (state == 0 || state == 1) @@ -2086,7 +2148,7 @@ fail_change_next_allocation: } cp = VTOC(vp); - error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK); + error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); if (error == 0) { if (enable_static) { cp->c_flag |= C_SSD_STATIC; @@ -2099,6 +2161,147 @@ fail_change_next_allocation: return error; } + case F_SET_GREEDY_MODE: { + int error; + int enable_greedy_mode = 0; + struct cnode *cp = NULL; + /* + * lock the cnode, decorate the cnode flag, and bail out. + * VFS should have already authenticated the caller for us. + */ + + if (ap->a_data) { + /* + * Note that even though ap->a_data is of type caddr_t, + * the fcntl layer at the syscall handler will pass in NULL + * or 1 depending on what the argument supplied to the fcntl + * was. So it is in fact correct to check the ap->a_data + * argument for zero or non-zero value when deciding whether or not + * to enable the greedy mode bit in the cnode. + */ + enable_greedy_mode = 1; + } + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return EROFS; + } + cp = VTOC(vp); + + error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + if (error == 0) { + if (enable_greedy_mode) { + cp->c_flag |= C_SSD_GREEDY_MODE; + } + else { + cp->c_flag &= ~C_SSD_GREEDY_MODE; + } + hfs_unlock (cp); + } + return error; + } + + case F_MAKECOMPRESSED: { + int error = 0; + uint32_t gen_counter; + struct cnode *cp = NULL; + int reset_decmp = 0; + + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return EROFS; + } + + /* + * acquire & lock the cnode. + * VFS should have already authenticated the caller for us. + */ + + if (ap->a_data) { + /* + * Cast the pointer into a uint32_t so we can extract the + * supplied generation counter. + */ + gen_counter = *((uint32_t*)ap->a_data); + } + else { + return EINVAL; + } + +#if HFS_COMPRESSION + cp = VTOC(vp); + /* Grab truncate lock first; we may truncate the file */ + hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + + error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + if (error) { + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); + return error; + } + + /* Are there any other usecounts/FDs? */ + if (vnode_isinuse(vp, 1)) { + hfs_unlock(cp); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); + return EBUSY; + } + + + /* now we have the cnode locked down; Validate arguments */ + if (cp->c_attr.ca_flags & (UF_IMMUTABLE | UF_COMPRESSED)) { + /* EINVAL if you are trying to manipulate an IMMUTABLE file */ + hfs_unlock(cp); + hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT); + return EINVAL; + } + + if ((hfs_get_gencount (cp)) == gen_counter) { + /* + * OK, the gen_counter matched. Go for it: + * Toggle state bits, truncate file, and suppress mtime update + */ + reset_decmp = 1; + cp->c_bsdflags |= UF_COMPRESSED; + + error = hfs_truncate(vp, 0, IO_NDELAY, 0, (HFS_TRUNCATE_SKIPTIMES), ap->a_context); + } + else { + error = ESTALE; + } + + /* Unlock cnode before executing decmpfs ; they may need to get an EA */ + hfs_unlock(cp); + + /* + * Reset the decmp state while still holding the truncate lock. We need to + * serialize here against a listxattr on this node which may occur at any + * time. + * + * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed, + * that will still potentially require getting the com.apple.decmpfs EA. If the + * EA is required, then we can't hold the cnode lock, because the getxattr call is + * generic(through VFS), and can't pass along any info telling it that we're already + * holding it (the lock). If we don't serialize, then we risk listxattr stopping + * and trying to fill in the hfs_file_is_compressed info during the callback + * operation, which will result in deadlock against the b-tree node. + * + * So, to serialize against listxattr (which will grab buf_t meta references on + * the b-tree blocks), we hold the truncate lock as we're manipulating the + * decmpfs payload. + */ + if ((reset_decmp) && (error == 0)) { + decmpfs_cnode *dp = VTOCMP (vp); + if (dp != NULL) { + decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0); + } + + /* Initialize the decmpfs node as needed */ + (void) hfs_file_is_compressed (cp, 0); /* ok to take lock */ + } + + hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT); + +#endif + return error; + } + case F_SETBACKINGSTORE: { int error = 0; @@ -2134,7 +2337,7 @@ fail_change_next_allocation: if (hfsmp->hfs_flags & HFS_READ_ONLY) { return (EROFS); } - error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK); + error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); if (error == 0) { error = hfs_fsync(vp, MNT_WAIT, TRUE, p); hfs_unlock(VTOC(vp)); @@ -2150,7 +2353,7 @@ fail_change_next_allocation: if (!vnode_isreg(vp)) return EINVAL; - error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK); + error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); if (error == 0) { cp = VTOC(vp); /* @@ -2176,7 +2379,7 @@ fail_change_next_allocation: fp = VTOF(vp); /* Protect against a size change. */ - hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK); + hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); #if HFS_COMPRESSION if (compressed && (uncompressed_size == -1)) { @@ -2195,7 +2398,7 @@ fail_change_next_allocation: error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count); } - hfs_unlock_truncate(VTOC(vp), 0); + hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT); return (error); } @@ -2268,18 +2471,18 @@ fail_change_next_allocation: if (hfsmp->hfs_flags & HFS_READ_ONLY) { return (EROFS); } - HFS_MOUNT_LOCK(hfsmp, TRUE); + hfs_lock_mount (hfsmp); bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo)); - HFS_MOUNT_UNLOCK(hfsmp, TRUE); + hfs_unlock_mount (hfsmp); (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); break; case HFS_GET_BOOT_INFO: if (!vnode_isvroot(vp)) return(EINVAL); - HFS_MOUNT_LOCK(hfsmp, TRUE); + hfs_lock_mount (hfsmp); bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo)); - HFS_MOUNT_UNLOCK(hfsmp, TRUE); + hfs_unlock_mount(hfsmp); break; case HFS_MARK_BOOT_CORRUPT: @@ -2287,7 +2490,7 @@ fail_change_next_allocation: * kHFSVolumeInconsistentBit in the volume header. This will * force fsck_hfs on next mount. */ - if (!is_suser()) { + if (!kauth_cred_issuser(kauth_cred_get())) { return EACCES; } @@ -2334,7 +2537,7 @@ fail_change_next_allocation: case HFS_DISABLE_METAZONE: { /* Only root can disable metadata zone */ - if (!is_suser()) { + if (!kauth_cred_issuser(kauth_cred_get())) { return EACCES; } if (hfsmp->hfs_flags & HFS_READ_ONLY) { @@ -2551,7 +2754,7 @@ hfs_vnop_blockmap(struct vnop_blockmap_args *ap) if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) { if (VTOC(vp)->c_lockowner != current_thread()) { - hfs_lock(VTOC(vp), HFS_FORCE_LOCK); + hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); tooklock = 1; } } @@ -2614,9 +2817,9 @@ retry: cp->c_blocks += loanedBlocks; fp->ff_blocks += loanedBlocks; - HFS_MOUNT_LOCK(hfsmp, TRUE); + hfs_lock_mount (hfsmp); hfsmp->loanedBlocks += loanedBlocks; - HFS_MOUNT_UNLOCK(hfsmp, TRUE); + hfs_unlock_mount (hfsmp); hfs_systemfile_unlock(hfsmp, lockflags); cp->c_flag |= C_MODIFIED; @@ -2659,7 +2862,7 @@ retry: /* Validate if the start offset is within logical file size */ if (ap->a_foffset >= fp->ff_size) { - goto exit; + goto exit; } /* @@ -2777,6 +2980,11 @@ hfs_vnop_strategy(struct vnop_strategy_args *ap) buf_markstatic(bp); } + /* Mark buffer as containing static data if cnode flag set */ + if (VTOC(vp)->c_flag & C_SSD_GREEDY_MODE) { + bufattr_markgreedymode((bufattr_t)(&bp->b_attr)); + } + #if CONFIG_PROTECT cnode_t *cp = NULL; @@ -2835,7 +3043,7 @@ hfs_minorupdate(struct vnode *vp) { } int -do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_context_t context) +do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vfs_context_t context) { register struct cnode *cp = VTOC(vp); struct filefork *fp = VTOF(vp); @@ -2849,7 +3057,9 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_c int blksize; struct hfsmount *hfsmp; int lockflags; - + int skipupdate = (truncateflags & HFS_TRUNCATE_SKIPUPDATE); + int suppress_times = (truncateflags & HFS_TRUNCATE_SKIPTIMES); + blksize = VTOVCB(vp)->blockSize; fileblocks = fp->ff_blocks; filebytes = (off_t)fileblocks * (off_t)blksize; @@ -2954,7 +3164,7 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_c if (skipupdate) { (void) hfs_minorupdate(vp); } - else { + else { (void) hfs_update(vp, TRUE); (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); } @@ -2994,7 +3204,7 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_c retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit, fp->ff_size, (off_t)0, (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY); - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); if (retval) goto Err_Exit; /* Merely invalidate the remaining area, if necessary: */ @@ -3017,7 +3227,9 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_c panic("hfs_truncate: invoked on non-UBC object?!"); }; } - cp->c_touch_modtime = TRUE; + if (suppress_times == 0) { + cp->c_touch_modtime = TRUE; + } fp->ff_size = length; } else { /* Shorten the size of the file */ @@ -3035,8 +3247,7 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_c u_int32_t finalblks; u_int32_t loanedBlocks; - HFS_MOUNT_LOCK(hfsmp, TRUE); - + hfs_lock_mount(hfsmp); loanedBlocks = fp->ff_unallocblocks; cp->c_blocks -= loanedBlocks; fp->ff_blocks -= loanedBlocks; @@ -3054,7 +3265,7 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_c cp->c_blocks += loanedBlocks; fp->ff_blocks += loanedBlocks; } - HFS_MOUNT_UNLOCK(hfsmp, TRUE); + hfs_unlock_mount (hfsmp); } /* @@ -3106,9 +3317,13 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_c (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0); #endif /* QUOTA */ } - /* Only set update flag if the logical length changes */ - if ((off_t)fp->ff_size != length) + /* + * Only set update flag if the logical length changes & we aren't + * suppressing modtime updates. + */ + if (((off_t)fp->ff_size != length) && (suppress_times == 0)) { cp->c_touch_modtime = TRUE; + } fp->ff_size = length; } if (cp->c_mode & (S_ISUID | S_ISGID)) { @@ -3122,7 +3337,18 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_c } else { cp->c_touch_chgtime = TRUE; /* status changed */ - cp->c_touch_modtime = TRUE; /* file data was modified */ + if (suppress_times == 0) { + cp->c_touch_modtime = TRUE; /* file data was modified */ + + /* + * If we are not suppressing the modtime update, then + * update the gen count as well. + */ + if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK (cp->c_attr.ca_mode)) { + hfs_incr_gencount(cp); + } + } + retval = hfs_update(vp, MNT_WAIT); } if (retval) { @@ -3198,8 +3424,7 @@ hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) { if (fp->ff_unallocblocks > 0) { u_int32_t loanedBlocks; - HFS_MOUNT_LOCK(hfsmp, TRUE); - + hfs_lock_mount (hfsmp); loanedBlocks = fp->ff_unallocblocks; cp->c_blocks -= loanedBlocks; fp->ff_blocks -= loanedBlocks; @@ -3207,7 +3432,7 @@ hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) { hfsmp->loanedBlocks -= loanedBlocks; - HFS_MOUNT_UNLOCK(hfsmp, TRUE); + hfs_unlock_mount (hfsmp); } return 0; @@ -3343,7 +3568,7 @@ hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, */ int hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, - int skipupdate, vfs_context_t context) + int truncateflags, vfs_context_t context) { struct filefork *fp = VTOF(vp); off_t filebytes; @@ -3395,7 +3620,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, filebytes = length; } cp->c_flag |= C_FORCEUPDATE; - error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context); + error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context); if (error) break; } @@ -3407,13 +3632,13 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, filebytes = length; } cp->c_flag |= C_FORCEUPDATE; - error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context); + error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context); if (error) break; } } else /* Same logical size */ { - error = do_hfs_truncate(vp, length, flags, skipupdate, context); + error = do_hfs_truncate(vp, length, flags, truncateflags, context); } /* Files that are changing size are not hot file candidates. */ if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) { @@ -3469,9 +3694,9 @@ hfs_vnop_allocate(struct vnop_allocate_args /* { check_for_tracked_file(vp, orig_ctime, ap->a_length == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL); - hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK); + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); - if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { + if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { goto Err_Exit; } @@ -3648,7 +3873,7 @@ hfs_vnop_allocate(struct vnop_allocate_args /* { hfs_unlock(cp); ubc_setsize(vp, fp->ff_size); - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); } } @@ -3660,7 +3885,7 @@ Std_Exit: if (retval == 0) retval = retval2; Err_Exit: - hfs_unlock_truncate(cp, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); hfs_unlock(cp); return (retval); } @@ -3703,6 +3928,27 @@ hfs_vnop_pagein(struct vnop_pagein_args *ap) #if CONFIG_PROTECT if ((error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0)) != 0) { + /* + * If we errored here, then this means that one of two things occurred: + * 1. there was a problem with the decryption of the key. + * 2. the device is locked and we are not allowed to access this particular file. + * + * Either way, this means that we need to shut down this upl now. As long as + * the pl pointer is NULL (meaning that we're supposed to create the UPL ourselves) + * then we create a upl and immediately abort it. + */ + if (ap->a_pl == NULL) { + /* create the upl */ + ubc_create_upl (vp, ap->a_f_offset, ap->a_size, &upl, &pl, + UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT); + /* mark the range as needed so it doesn't immediately get discarded upon abort */ + ubc_upl_range_needed (upl, ap->a_pl_offset / PAGE_SIZE, 1); + + /* Abort the range */ + ubc_upl_abort_range (upl, 0, ap->a_size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR); + } + + return error; } #endif /* CONFIG_PROTECT */ @@ -3765,11 +4011,11 @@ retry_pagein: if (vfs_isforce(vp->v_mount)) { if (cp->c_flag & C_DELETED) { /* If we don't get it, then just go ahead and operate without the lock */ - truncate_lock_held = hfs_try_trunclock(cp, HFS_RECURSE_TRUNCLOCK); + truncate_lock_held = hfs_try_trunclock(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE); } } else { - hfs_lock_truncate(cp, HFS_RECURSE_TRUNCLOCK); + hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE); truncate_lock_held = TRUE; } @@ -3859,7 +4105,7 @@ retry_pagein: * takes the lock shared, we can deadlock if some other thread * tries to grab the lock exclusively in between. */ - hfs_unlock_truncate(cp, 1); + hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE); truncate_lock_held = FALSE; } ap->a_pl = upl; @@ -3942,7 +4188,7 @@ retry_pagein: /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */ if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) { - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); took_cnode_lock = 1; } /* @@ -3974,7 +4220,7 @@ pagein_next_range: pagein_done: if (truncate_lock_held == TRUE) { /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */ - hfs_unlock_truncate(cp, 1); + hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE); } return (error); @@ -4025,6 +4271,10 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap) a_flags = ap->a_flags; a_pl_offset = ap->a_pl_offset; + if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { + hfs_incr_gencount (cp); + } + /* * we can tell if we're getting the new or old behavior from the UPL */ @@ -4052,7 +4302,7 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap) * because we may be already holding the truncate lock exclusive to force any other * IOs to have blocked behind us. */ - hfs_lock_truncate(cp, HFS_RECURSE_TRUNCLOCK); + hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE); if (a_flags & UPL_MSYNC) { request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY; @@ -4170,7 +4420,7 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap) tooklock = 0; if (cp->c_lockowner != current_thread()) { - if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { + if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { /* * we're in the v2 path, so we are the * owner of the UPL... we may have already @@ -4220,7 +4470,7 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap) int tooklock = 0; if (cp->c_lockowner != current_thread()) { - if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { + if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { if (!(a_flags & UPL_NOCOMMIT)) { ubc_upl_abort_range(upl, a_pl_offset, @@ -4263,7 +4513,7 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap) cp->c_touch_chgtime = TRUE; if ((cp->c_mode & (S_ISUID | S_ISGID)) && (vfs_context_suser(ap->a_context) != 0)) { - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); cp->c_mode &= ~(S_ISUID | S_ISGID); hfs_unlock(cp); } @@ -4277,7 +4527,7 @@ pageout_done: * being invoked via ubc_msync due to lockdown, * we should release it recursively, too. */ - hfs_unlock_truncate(cp, 1); + hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE); } return (retval); } @@ -4392,7 +4642,7 @@ hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred, vnodetype = vnode_vtype(vp); if (vnodetype != VREG) { - /* Note symlinks are not allowed to be relocated */ + /* Not allowed to move symlinks. */ return (EPERM); } @@ -4425,7 +4675,7 @@ hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred, if (blockHint == 0) blockHint = hfsmp->nextAllocation; - if ((fp->ff_size > 0x7fffffff)) { + if (fp->ff_size > 0x7fffffff) { return (EFBIG); } @@ -4442,15 +4692,15 @@ hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred, if (!vnode_issystem(vp) && (vnodetype != VLNK)) { hfs_unlock(cp); - hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK); + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); /* Force lock since callers expects lock to be held. */ - if ((retval = hfs_lock(cp, HFS_FORCE_LOCK))) { - hfs_unlock_truncate(cp, 0); + if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS))) { + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); return (retval); } /* No need to continue if file was removed. */ if (cp->c_flag & C_NOEXISTS) { - hfs_unlock_truncate(cp, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); return (ENOENT); } took_trunc_lock = 1; @@ -4465,7 +4715,7 @@ hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred, if (hfs_start_transaction(hfsmp) != 0) { if (took_trunc_lock) - hfs_unlock_truncate(cp, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); return (EINVAL); } started_tr = 1; @@ -4490,10 +4740,10 @@ hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred, nextallocsave = hfsmp->nextAllocation; retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes); if (eflags & kEFMetadataMask) { - HFS_MOUNT_LOCK(hfsmp, TRUE); + hfs_lock_mount(hfsmp); HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave); MarkVCBDirty(hfsmp); - HFS_MOUNT_UNLOCK(hfsmp, TRUE); + hfs_unlock_mount(hfsmp); } retval = MacToVFSError(retval); @@ -4503,7 +4753,7 @@ hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred, retval = ENOSPC; goto restore; } else if (fp->ff_blocks < (headblks + datablks)) { - printf("hfs_relocate: allocation failed"); + printf("hfs_relocate: allocation failed id=%u, vol=%s\n", cp->c_cnid, hfsmp->vcbVN); retval = ENOSPC; goto restore; } @@ -4554,7 +4804,7 @@ hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred, */ if (vnodetype == VLNK) - retval = hfs_clonelink(vp, blksize, cred, p); + retval = EPERM; else if (vnode_issystem(vp)) retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p); else @@ -4585,7 +4835,7 @@ hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred, goto restore; out: if (took_trunc_lock) - hfs_unlock_truncate(cp, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); if (lockflags) { hfs_systemfile_unlock(hfsmp, lockflags); @@ -4611,7 +4861,7 @@ exit: restore: if (fp->ff_blocks == headblks) { if (took_trunc_lock) - hfs_unlock_truncate(cp, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); goto exit; } /* @@ -4631,44 +4881,11 @@ restore: lockflags = 0; if (took_trunc_lock) - hfs_unlock_truncate(cp, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); goto exit; } -/* - * Clone a symlink. - * - */ -static int -hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, __unused struct proc *p) -{ - struct buf *head_bp = NULL; - struct buf *tail_bp = NULL; - int error; - - - error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp); - if (error) - goto out; - - tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META); - if (tail_bp == NULL) { - error = EIO; - goto out; - } - bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize); - error = (int)buf_bwrite(tail_bp); -out: - if (head_bp) { - buf_markinvalid(head_bp); - buf_brelse(head_bp); - } - (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0); - - return (error); -} - /* * Clone a file's data within the file. * @@ -4694,13 +4911,13 @@ hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize) #if CONFIG_PROTECT if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) { - hfs_lock(VTOC(vp), HFS_FORCE_LOCK); + hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); return (error); } #endif /* CONFIG_PROTECT */ if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) { - hfs_lock(VTOC(vp), HFS_FORCE_LOCK); + hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); return (ENOMEM); } @@ -4763,7 +4980,7 @@ hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize) } kmem_free(kernel_map, (vm_offset_t)bufp, bufsize); - hfs_lock(VTOC(vp), HFS_FORCE_LOCK); + hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); return (error); } diff --git a/bsd/hfs/hfs_search.c b/bsd/hfs/hfs_search.c index b73ec8339..53ea092f4 100644 --- a/bsd/hfs/hfs_search.c +++ b/bsd/hfs/hfs_search.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997-2012 Apple Inc. All rights reserved. + * Copyright (c) 1997-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -115,7 +115,8 @@ static int CheckCriteria( ExtendedVCB *vcb, CatalogRecord *rec, CatalogKey *key, searchinfospec_t *searchInfo1, - searchinfospec_t *searchInfo2 ); + searchinfospec_t *searchInfo2, + struct vfs_context *ctx); static int CheckAccess(ExtendedVCB *vcb, u_long searchBits, CatalogKey *key, struct vfs_context *ctx); @@ -204,8 +205,9 @@ hfs_vnop_search(ap) *(ap->a_nummatches) = 0; - if (ap->a_options & ~SRCHFS_VALIDOPTIONSMASK) + if (ap->a_options & ~SRCHFS_VALIDOPTIONSMASK) { return (EINVAL); + } /* * Fail requests for attributes that HFS does not support for the @@ -217,6 +219,7 @@ hfs_vnop_search(ap) (ap->a_returnattrs->dirattr & ~HFS_ATTR_DIR_VALID) || (ap->a_returnattrs->fileattr & ~HFS_ATTR_FILE_VALID) || (ap->a_returnattrs->forkattr != 0)) { + return (EINVAL); } @@ -229,10 +232,13 @@ hfs_vnop_search(ap) attrgroup_t attrs; attrs = ap->a_searchattrs->commonattr | ap->a_returnattrs->commonattr; - if (attrs & (ATTR_CMN_NAME | ATTR_CMN_PAROBJID)) + if (attrs & (ATTR_CMN_NAME | ATTR_CMN_PAROBJID)) { return (EINVAL); - if ((err = vfs_context_suser(ap->a_context))) + } + + if ((err = vfs_context_suser(ap->a_context))) { return (err); + } } // If both 32-bit and 64-bit parent ids or file ids are given @@ -241,12 +247,13 @@ hfs_vnop_search(ap) attrgroup_t test_attrs=ap->a_searchattrs->commonattr; if (((test_attrs & ATTR_CMN_OBJID) && (test_attrs & ATTR_CMN_FILEID)) || - ((test_attrs & ATTR_CMN_PARENTID) && (test_attrs & ATTR_CMN_PAROBJID))) + ((test_attrs & ATTR_CMN_PARENTID) && (test_attrs & ATTR_CMN_PAROBJID))) { return (EINVAL); + } - - if (uio_resid(ap->a_uio) <= 0) + if (uio_resid(ap->a_uio) <= 0) { return (EINVAL); + } isHFSPlus = (vcb->vcbSigWord == kHFSPlusSigWord); hfsmp = VTOHFS(ap->a_vp); @@ -261,11 +268,14 @@ hfs_vnop_search(ap) /* UnPack the search boundries, searchInfo1, searchInfo2 */ err = UnpackSearchAttributeBlock(hfsmp, ap->a_searchattrs, &searchInfo1, ap->a_searchparams1, 1); - if (err) return err; + if (err) { + return err; + } err = UnpackSearchAttributeBlock(hfsmp, ap->a_searchattrs, &searchInfo2, ap->a_searchparams2, 0); - if (err) return err; - + if (err) { + return err; + } //shadow search bits if 64-bit file/parent ids are used if (ap->a_searchattrs->commonattr & ATTR_CMN_FILEID) ap->a_searchattrs->commonattr |= ATTR_CMN_OBJID; @@ -364,7 +374,7 @@ hfs_vnop_search(ap) ResolveHardlink(vcb, (HFSPlusCatalogFile *)myCurrentDataPtr); } if (CheckCriteria( vcb, ap->a_options, ap->a_searchattrs, myCurrentDataPtr, - myCurrentKeyPtr, &searchInfo1, &searchInfo2 ) + myCurrentKeyPtr, &searchInfo1, &searchInfo2, ap->a_context ) && CheckAccess(vcb, ap->a_options, myCurrentKeyPtr, ap->a_context)) { err = InsertMatch(hfsmp, ap->a_uio, myCurrentDataPtr, myCurrentKeyPtr, ap->a_returnattrs, @@ -499,20 +509,29 @@ CompareMasked(const u_int32_t *thisValue, const u_int32_t *compareData, static Boolean ComparePartialUnicodeName (register ConstUniCharArrayPtr str, register ItemCount s_len, - register ConstUniCharArrayPtr find, register ItemCount f_len ) + register ConstUniCharArrayPtr find, register ItemCount f_len, int caseSensitive ) { - if (f_len == 0 || s_len == 0) + if (f_len == 0 || s_len == 0) { return FALSE; + } - do { - if (s_len-- < f_len) - return FALSE; - } while (FastUnicodeCompare(str++, f_len, find, f_len) != 0); + if (caseSensitive) { + do { + if (s_len-- < f_len) + return FALSE; + } while (UnicodeBinaryCompare(str++, f_len, find, f_len) != 0); + } + else { + do { + if (s_len-- < f_len) + return FALSE; + } while (FastUnicodeCompare(str++, f_len, find, f_len) != 0); + } return TRUE; } - +#if CONFIG_HFS_STD static Boolean ComparePartialPascalName ( register ConstStr31Param str, register ConstStr31Param find ) { @@ -536,6 +555,7 @@ ComparePartialPascalName ( register ConstStr31Param str, register ConstStr31Para return FALSE; } +#endif /* @@ -564,8 +584,10 @@ CheckAccess(ExtendedVCB *theVCBPtr, u_long searchBits, CatalogKey *theKeyPtr, st isHFSPlus = ( theVCBPtr->vcbSigWord == kHFSPlusSigWord ); if ( isHFSPlus ) myNodeID = theKeyPtr->hfsPlus.parentID; +#if CONFIG_HFS_STD else myNodeID = theKeyPtr->hfs.parentID; +#endif while ( myNodeID >= kRootDirID ) { cnode_t * cp; @@ -647,7 +669,8 @@ CheckCriteria( ExtendedVCB *vcb, CatalogRecord *rec, CatalogKey *key, searchinfospec_t *searchInfo1, - searchinfospec_t *searchInfo2 ) + searchinfospec_t *searchInfo2, + struct vfs_context *ctx) { Boolean matched, atleastone; Boolean isHFSPlus; @@ -655,57 +678,68 @@ CheckCriteria( ExtendedVCB *vcb, struct cat_attr c_attr; struct cat_fork datafork; struct cat_fork rsrcfork; + struct hfsmount *hfsmp = (struct hfsmount*)vcb; + int force_case_sensitivity = proc_is_forcing_hfs_case_sensitivity(vfs_context_proc(ctx)); bzero(&c_attr, sizeof(c_attr)); isHFSPlus = (vcb->vcbSigWord == kHFSPlusSigWord); switch (rec->recordType) { - case kHFSFolderRecord: - case kHFSPlusFolderRecord: - if ( (searchBits & SRCHFS_MATCHDIRS) == 0 ) { /* If we are NOT searching folders */ - matched = false; - goto TestDone; - } - break; - - case kHFSFileRecord: - if ( (searchBits & SRCHFS_MATCHFILES) == 0 ) { /* If we are NOT searching files */ - matched = false; - goto TestDone; - } - break; - - case kHFSPlusFileRecord: - /* Check if hardlink links should be skipped. */ - if (searchBits & SRCHFS_SKIPLINKS) { - cnid_t parid = key->hfsPlus.parentID; - HFSPlusCatalogFile *filep = (HFSPlusCatalogFile *)rec; - - if ((SWAP_BE32(filep->userInfo.fdType) == kHardLinkFileType) && - (SWAP_BE32(filep->userInfo.fdCreator) == kHFSPlusCreator)) { - return (false); /* skip over file link records */ - } else if ((parid == vcb->hfs_private_desc[FILE_HARDLINKS].cd_cnid) && - (filep->bsdInfo.special.linkCount == 0)) { - return (false); /* skip over unlinked files */ - } else if ((SWAP_BE32(filep->userInfo.fdType) == kHFSAliasType) && - (SWAP_BE32(filep->userInfo.fdCreator) == kHFSAliasCreator) && - (filep->flags & kHFSHasLinkChainMask)) { - return (false); /* skip over dir link records */ + +#if CONFIG_HFS_STD + case kHFSFolderRecord: + if ( (searchBits & SRCHFS_MATCHDIRS) == 0 ) { /* If we are NOT searching folders */ + matched = false; + goto TestDone; } - } else if (key->hfsPlus.parentID == vcb->hfs_private_desc[FILE_HARDLINKS].cd_cnid) { - return (false); /* skip over private files */ - } else if (key->hfsPlus.parentID == vcb->hfs_private_desc[DIR_HARDLINKS].cd_cnid) { - return (false); /* skip over private files */ - } + break; - if ( (searchBits & SRCHFS_MATCHFILES) == 0 ) { /* If we are NOT searching files */ - matched = false; - goto TestDone; - } - break; + case kHFSFileRecord: + if ( (searchBits & SRCHFS_MATCHFILES) == 0 ) { /* If we are NOT searching files */ + matched = false; + goto TestDone; + } + break; +#endif - default: /* Never match a thread record or any other type. */ - return( false ); /* Not a file or folder record, so can't search it */ + case kHFSPlusFolderRecord: + if ( (searchBits & SRCHFS_MATCHDIRS) == 0 ) { /* If we are NOT searching folders */ + matched = false; + goto TestDone; + } + break; + + case kHFSPlusFileRecord: + /* Check if hardlink links should be skipped. */ + if (searchBits & SRCHFS_SKIPLINKS) { + cnid_t parid = key->hfsPlus.parentID; + HFSPlusCatalogFile *filep = (HFSPlusCatalogFile *)rec; + + if ((SWAP_BE32(filep->userInfo.fdType) == kHardLinkFileType) && + (SWAP_BE32(filep->userInfo.fdCreator) == kHFSPlusCreator)) { + return (false); /* skip over file link records */ + } else if ((parid == vcb->hfs_private_desc[FILE_HARDLINKS].cd_cnid) && + (filep->bsdInfo.special.linkCount == 0)) { + return (false); /* skip over unlinked files */ + } else if ((SWAP_BE32(filep->userInfo.fdType) == kHFSAliasType) && + (SWAP_BE32(filep->userInfo.fdCreator) == kHFSAliasCreator) && + (filep->flags & kHFSHasLinkChainMask)) { + return (false); /* skip over dir link records */ + } + } else if (key->hfsPlus.parentID == vcb->hfs_private_desc[FILE_HARDLINKS].cd_cnid) { + return (false); /* skip over private files */ + } else if (key->hfsPlus.parentID == vcb->hfs_private_desc[DIR_HARDLINKS].cd_cnid) { + return (false); /* skip over private files */ + } + + if ( (searchBits & SRCHFS_MATCHFILES) == 0 ) { /* If we are NOT searching files */ + matched = false; + goto TestDone; + } + break; + + default: /* Never match a thread record or any other type. */ + return( false ); /* Not a file or folder record, so can't search it */ } matched = true; /* Assume we got a match */ @@ -714,20 +748,40 @@ CheckCriteria( ExtendedVCB *vcb, /* First, attempt to match the name -- either partial or complete */ if ( attrList->commonattr & ATTR_CMN_NAME ) { if (isHFSPlus) { + int case_sensitive = 0; + + if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) { + case_sensitive = 1; + } else if (force_case_sensitivity) { + case_sensitive = 1; + } + /* Check for partial/full HFS Plus name match */ if ( searchBits & SRCHFS_MATCHPARTIALNAMES ) { matched = ComparePartialUnicodeName(key->hfsPlus.nodeName.unicode, key->hfsPlus.nodeName.length, (UniChar*)searchInfo1->name, - searchInfo1->nameLength ); - } else /* full HFS Plus name match */ { - matched = (FastUnicodeCompare(key->hfsPlus.nodeName.unicode, - key->hfsPlus.nodeName.length, - (UniChar*)searchInfo1->name, - searchInfo1->nameLength ) == 0); + searchInfo1->nameLength, case_sensitive); + } + else { + /* Full name match. Are we HFSX (case sensitive) or HFS+ ? */ + if (case_sensitive) { + matched = (UnicodeBinaryCompare(key->hfsPlus.nodeName.unicode, + key->hfsPlus.nodeName.length, + (UniChar*)searchInfo1->name, + searchInfo1->nameLength ) == 0); + } + else { + matched = (FastUnicodeCompare(key->hfsPlus.nodeName.unicode, + key->hfsPlus.nodeName.length, + (UniChar*)searchInfo1->name, + searchInfo1->nameLength ) == 0); + } } - } else { + } +#if CONFIG_HFS_STD + else { /* Check for partial/full HFS name match */ if ( searchBits & SRCHFS_MATCHPARTIALNAMES ) @@ -735,6 +789,7 @@ CheckCriteria( ExtendedVCB *vcb, else /* full HFS name match */ matched = (FastRelString(key->hfs.nodeName, (u_char*)searchInfo1->name) == 0); } +#endif if ( matched == false || (searchBits & ~SRCHFS_MATCHPARTIALNAMES) == 0 ) goto TestDone; /* no match, or nothing more to compare */ @@ -749,41 +804,67 @@ CheckCriteria( ExtendedVCB *vcb, int flags; switch (rec->recordType) { - case kHFSFolderRecord: - case kHFSPlusFolderRecord: { - struct FndrDirInfo *finder_info; - - finder_info = (struct FndrDirInfo *)&c_attr.ca_finderinfo[0]; - flags = SWAP_BE16(finder_info->frFlags); - break; - } - - case kHFSFileRecord: - case kHFSPlusFileRecord: { - struct FndrFileInfo *finder_info; - - finder_info = (struct FndrFileInfo *)&c_attr.ca_finderinfo[0]; - flags = SWAP_BE16(finder_info->fdFlags); - break; +#if CONFIG_HFS_STD + case kHFSFolderRecord: + { + struct FndrDirInfo *finder_info; + + finder_info = (struct FndrDirInfo *)&c_attr.ca_finderinfo[0]; + flags = SWAP_BE16(finder_info->frFlags); + break; + } + + case kHFSFileRecord: + { + struct FndrFileInfo *finder_info; + + finder_info = (struct FndrFileInfo *)&c_attr.ca_finderinfo[0]; + flags = SWAP_BE16(finder_info->fdFlags); + break; + } +#endif + + case kHFSPlusFolderRecord: + { + struct FndrDirInfo *finder_info; + + finder_info = (struct FndrDirInfo *)&c_attr.ca_finderinfo[0]; + flags = SWAP_BE16(finder_info->frFlags); + break; + } + + case kHFSPlusFileRecord: + { + struct FndrFileInfo *finder_info; + + finder_info = (struct FndrFileInfo *)&c_attr.ca_finderinfo[0]; + flags = SWAP_BE16(finder_info->fdFlags); + break; + } + + default: + { + flags = kIsInvisible; + break; + } } - default: { - flags = kIsInvisible; - break; + if (flags & kIsInvisible) { + matched = false; + goto TestDone; } - } - - if (flags & kIsInvisible) { - matched = false; - goto TestDone; - } } /* Now that we have a record worth searching, see if it matches the search attributes */ +#if CONFIG_HFS_STD if (rec->recordType == kHFSFileRecord || rec->recordType == kHFSPlusFileRecord) { +#else + if (rec->recordType == kHFSPlusFileRecord) { +#endif + if ((attrList->fileattr & ~ATTR_FILE_VALIDMASK) != 0) { /* attr we do know about */ matched = false; goto TestDone; @@ -856,8 +937,12 @@ CheckCriteria( ExtendedVCB *vcb, /* * Check the directory attributes */ +#if CONFIG_HFS_STD else if (rec->recordType == kHFSFolderRecord || rec->recordType == kHFSPlusFolderRecord) { +#else + else if (rec->recordType == kHFSPlusFolderRecord) { +#endif if ((attrList->dirattr & ~ATTR_DIR_VALIDMASK) != 0) { /* attr we do know about */ matched = false; goto TestDone; @@ -899,8 +984,10 @@ CheckCriteria( ExtendedVCB *vcb, if (isHFSPlus) parentID = key->hfsPlus.parentID; +#if CONFIG_HFS_STD else parentID = key->hfs.parentID; +#endif matched = CompareRange(parentID, searchInfo1->parentDirID, searchInfo2->parentDirID ); @@ -1053,13 +1140,20 @@ InsertMatch(struct hfsmount *hfsmp, uio_t a_uio, CatalogRecord *rec, } if (returnAttrList->commonattr & ATTR_CMN_NAME) { - cat_convertkey(hfsmp, key, rec, &c_desc); + err = cat_convertkey(hfsmp, key, rec, &c_desc); + if (err) { + /* This means that we probably had a CNID error */ + goto exit; + } } else { c_desc.cd_cnid = c_attr.ca_fileid; - if (hfsmp->hfs_flags & HFS_STANDARD) - c_desc.cd_parentcnid = key->hfs.parentID; - else + if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) c_desc.cd_parentcnid = key->hfsPlus.parentID; +#if CONFIG_HFS_STD + else + c_desc.cd_parentcnid = key->hfs.parentID; +#endif + } attrblk.ab_attrlist = returnAttrList; @@ -1123,18 +1217,8 @@ UnpackSearchAttributeBlock( struct hfsmount *hfsmp, struct attrlist *alist, if (len > sizeof(searchInfo->name)) return (EINVAL); - if (hfsmp->hfs_flags & HFS_STANDARD) { - /* Convert name to pascal string to match HFS B-Tree names */ - - if (len > 0) { - if (utf8_to_hfs(HFSTOVCB(hfsmp), len-1, (u_char *)s, (u_char*)searchInfo->name) != 0) - return (EINVAL); - searchInfo->nameLength = searchInfo->name[0]; - } else { - searchInfo->name[0] = searchInfo->nameLength = 0; - } - } else { + if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) { size_t ucslen; /* Convert name to Unicode to match HFS Plus B-Tree names */ @@ -1148,6 +1232,20 @@ UnpackSearchAttributeBlock( struct hfsmount *hfsmp, struct attrlist *alist, searchInfo->nameLength = 0; } } +#if CONFIG_HFS_STD + else { + /* Convert name to pascal string to match HFS (Standard) B-Tree names */ + + if (len > 0) { + if (utf8_to_hfs(HFSTOVCB(hfsmp), len-1, (u_char *)s, (u_char*)searchInfo->name) != 0) + return (EINVAL); + + searchInfo->nameLength = searchInfo->name[0]; + } else { + searchInfo->name[0] = searchInfo->nameLength = 0; + } + } +#endif } attributeBuffer = (attrreference_t*) attributeBuffer +1; } diff --git a/bsd/hfs/hfs_unistr.h b/bsd/hfs/hfs_unistr.h new file mode 100644 index 000000000..5b300a28d --- /dev/null +++ b/bsd/hfs/hfs_unistr.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef __HFS_UNISTR__ +#define __HFS_UNISTR__ + +#include + +/* + * hfs_unitstr.h + * + * This file contains definition of the unicode string used for HFS Plus + * files and folder names, as described by the on-disk format. + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifndef _HFSUNISTR255_DEFINED_ +#define _HFSUNISTR255_DEFINED_ +/* Unicode strings are used for HFS Plus file and folder names */ +struct HFSUniStr255 { + u_int16_t length; /* number of unicode characters */ + u_int16_t unicode[255]; /* unicode characters */ +} __attribute__((aligned(2), packed)); +typedef struct HFSUniStr255 HFSUniStr255; +typedef const HFSUniStr255 *ConstHFSUniStr255Param; +#endif /* _HFSUNISTR255_DEFINED_ */ + + +#ifdef __cplusplus +} +#endif + + +#endif /* __HFS_UNISTR__ */ diff --git a/bsd/hfs/hfs_vfsops.c b/bsd/hfs/hfs_vfsops.c index a07a4c38b..bdd433124 100644 --- a/bsd/hfs/hfs_vfsops.c +++ b/bsd/hfs/hfs_vfsops.c @@ -90,6 +90,10 @@ #include #include +/* for parsing boot-args */ +#include + + #include #include @@ -108,6 +112,7 @@ #include "hfs_hotfiles.h" #include "hfs_quota.h" #include "hfs_btreeio.h" +#include "hfs_kdebug.h" #include "hfscommon/headers/FileMgrInternal.h" #include "hfscommon/headers/BTreesInternal.h" @@ -116,10 +121,6 @@ #include #endif -#if CONFIG_HFS_ALLOC_RBTREE -#include "hfscommon/headers/HybridAllocator.h" -#endif - #define HFS_MOUNT_DEBUG 1 #if HFS_DIAGNOSTIC @@ -137,7 +138,11 @@ lck_grp_t * hfs_rwlock_group; lck_grp_t * hfs_spinlock_group; extern struct vnodeopv_desc hfs_vnodeop_opv_desc; + +#if CONFIG_HFS_STD extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc; +static int hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush); +#endif /* not static so we can re-use in hfs_readwrite.c for build_path calls */ int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context); @@ -145,9 +150,9 @@ int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_ static int hfs_changefs(struct mount *mp, struct hfs_mount_args *args); static int hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context); static int hfs_flushfiles(struct mount *, int, struct proc *); -static int hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush); static int hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp); static int hfs_init(struct vfsconf *vfsp); +static void hfs_locks_destroy(struct hfsmount *hfsmp); static int hfs_vfs_root(struct mount *mp, struct vnode **vpp, vfs_context_t context); static int hfs_quotactl(struct mount *, int, uid_t, caddr_t, vfs_context_t context); static int hfs_start(struct mount *mp, int flags, vfs_context_t context); @@ -159,7 +164,6 @@ static int hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_i void hfs_initialize_allocator (struct hfsmount *hfsmp); int hfs_teardown_allocator (struct hfsmount *hfsmp); -void hfs_unmap_blocks (struct hfsmount *hfsmp); int hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context); int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, int journal_replay_only, vfs_context_t context); @@ -328,14 +332,10 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte * Write out any pending I/O still outstanding against the device node * now that the journal has been closed. */ - if (!retval) { - if (vnode_mount(hfsmp->hfs_devvp) == mp) { - retval = hfs_fsync(hfsmp->hfs_devvp, MNT_WAIT, 0, p); - } else { - vnode_get(hfsmp->hfs_devvp); - retval = VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context); - vnode_put(hfsmp->hfs_devvp); - } + if (retval == 0) { + vnode_get(hfsmp->hfs_devvp); + retval = VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context); + vnode_put(hfsmp->hfs_devvp); } if (retval) { @@ -347,19 +347,30 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte hfsmp->hfs_flags &= ~HFS_READ_ONLY; goto out; } - -#if CONFIG_HFS_ALLOC_RBTREE - (void) hfs_teardown_allocator(hfsmp); -#endif + + if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { + if (hfsmp->hfs_summary_table) { + int err = 0; + /* + * Take the bitmap lock to serialize against a concurrent bitmap scan still in progress + */ + if (hfsmp->hfs_allocation_vp) { + err = hfs_lock (VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + } + FREE (hfsmp->hfs_summary_table, M_TEMP); + hfsmp->hfs_summary_table = NULL; + hfsmp->hfs_flags &= ~HFS_SUMMARY_TABLE; + if (err == 0 && hfsmp->hfs_allocation_vp){ + hfs_unlock (VTOC(hfsmp->hfs_allocation_vp)); + } + } + } + hfsmp->hfs_downgrading_proc = NULL; } /* Change to a writable file system. */ if (vfs_iswriteupgrade(mp)) { -#if CONFIG_HFS_ALLOC_RBTREE - thread_t allocator_thread; -#endif - /* * On inconsistent disks, do not allow read-write mount * unless it is the boot volume being mounted. @@ -389,6 +400,12 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK); + /* We provide the mount point twice here: The first is used as + * an opaque argument to be passed back when hfs_sync_metadata + * is called. The second is provided to the throttling code to + * indicate which mount's device should be used when accounting + * for metadata writes. + */ hfsmp->jnl = journal_open(hfsmp->jvp, (hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset, hfsmp->jnl_size, @@ -396,7 +413,8 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte hfsmp->hfs_logical_block_size, jflags, 0, - hfs_sync_metadata, hfsmp->hfs_mp); + hfs_sync_metadata, hfsmp->hfs_mp, + hfsmp->hfs_mp); /* * Set up the trim callback function so that we can add @@ -471,41 +489,6 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte vfs_setextendedsecurity(HFSTOVFS(hfsmp)); } } - -#if CONFIG_HFS_ALLOC_RBTREE - /* - * Like the normal mount case, we need to handle creation of the allocation red-black tree - * if we're upgrading from read-only to read-write. - * - * We spawn a thread to create the pair of red-black trees for this volume. - * However, in so doing, we must be careful to ensure that if this thread is still - * running after mount has finished, it doesn't interfere with an unmount. Specifically, - * we'll need to set a bit that indicates we're in progress building the trees here. - * Unmount will check for this bit, and then if it's set, mark a corresponding bit that - * notifies the tree generation code that an unmount is waiting. Also, mark the extent - * tree flags that the allocator is enabled for use before we spawn the thread that will start - * scanning the RB tree. - * - * Only do this if we're operating on a read-write mount (we wouldn't care for read-only), - * which has not previously encountered a bad error on the red-black tree code. Also, don't - * try to re-build a tree that already exists. - * - * When this is enabled, we must re-integrate the above function into our bitmap iteration - * so that we accurately send TRIMs down to the underlying disk device as needed. - */ - - if (hfsmp->extent_tree_flags == 0) { - hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED); - /* Initialize EOF counter so that the thread can assume it started at initial values */ - hfsmp->offset_block_end = 0; - - InitTree(hfsmp); - - kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread); - thread_deallocate(allocator_thread); - } - -#endif } /* Update file system parameters. */ @@ -520,62 +503,50 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DOVOLFS)); retval = hfs_mountfs(devvp, mp, &args, 0, context); - if (retval && HFS_MOUNT_DEBUG) { - printf("hfs_mount: hfs_mountfs returned %d\n", retval); - } -#if CONFIG_PROTECT - /* - * If above mount call was successful, and this mount is content protection - * enabled, then verify the on-disk EA on the root to ensure that the filesystem - * is of a suitable vintage to allow the mount to proceed. - */ - if ((retval == 0) && (cp_fs_protected (mp))) { - int err = 0; - - struct cp_root_xattr *xattr = NULL; - MALLOC (xattr, struct cp_root_xattr*, sizeof(struct cp_root_xattr), M_TEMP, M_WAITOK); - if (xattr == NULL) { - err = ENOMEM; - goto badalloc; + if (retval) { + const char *name = vnode_getname(devvp); + printf("hfs_mount: hfs_mountfs returned error=%d for device %s\n", retval, (name ? name : "unknown-dev")); + if (name) { + vnode_putname(name); } - bzero (xattr, sizeof(struct cp_root_xattr)); - hfsmp = vfs_fsprivate(mp); + goto out; + } - /* go get the EA to get the version information */ - err = cp_getrootxattr (hfsmp, xattr); - /* - * If there was no EA there, then write one out. - * Assuming EA is not present on the root means - * this is an erase install or a very old FS - */ - if (err == ENOATTR) { - printf("No root EA set, creating new EA with new version: %d\n", CP_NEW_MAJOR_VERS); - bzero(xattr, sizeof(struct cp_root_xattr)); - xattr->major_version = CP_NEW_MAJOR_VERS; - xattr->minor_version = CP_MINOR_VERS; - xattr->flags = 0; - - err = cp_setrootxattr (hfsmp, xattr); - } + /* After hfs_mountfs succeeds, we should have valid hfsmp */ + hfsmp = VFSTOHFS(mp); - /* - * For any other error, including having an out of date CP version in the - * EA, or for an error out of cp_setrootxattr, deny the mount - * and do not proceed further. - */ - if (err || (xattr->major_version != CP_NEW_MAJOR_VERS && xattr->major_version != CP_PREV_MAJOR_VERS)) { - /* Deny the mount and tear down. */ - retval = EPERM; - (void) hfs_unmount (mp, MNT_FORCE, context); - } - printf("Running with CP root xattr: %d.%d\n", xattr->major_version, xattr->minor_version); -badalloc: - if(xattr) { - FREE(xattr, M_TEMP); + /* + * Check to see if the file system exists on CoreStorage. + * + * This must be done after examining the root folder's CP EA since + * hfs_vfs_root will create a vnode (which must not occur until after + * we've established the CP level of the FS). + */ + if (retval == 0) { + errno_t err; + vnode_t root_vnode; + err = hfs_vfs_root(mp, &root_vnode, context); + if (err == 0) { + if (VNOP_IOCTL(devvp, _DKIOCCSSETFSVNODE, + (caddr_t)&root_vnode, 0, context) == 0) { + err = vnode_ref(root_vnode); + if (err == 0) { + hfsmp->hfs_flags |= HFS_CS; + } + } + + err = vnode_put(root_vnode); + if (err) { + printf("hfs: could not release io count on root vnode with error: %d\n", + err); + } + } else { + printf("hfs: could not get root vnode with error: %d\n", + err); } } -#endif } + out: if (retval == 0) { (void)hfs_statfs(mp, vfs_statfs(mp), context); @@ -608,7 +579,7 @@ hfs_changefs_callback(struct vnode *vp, void *cargs) vcb = HFSTOVCB(args->hfsmp); lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - error = cat_lookup(args->hfsmp, &cp->c_desc, 0, &cndesc, &cnattr, NULL, NULL); + error = cat_lookup(args->hfsmp, &cp->c_desc, 0, 0, &cndesc, &cnattr, NULL, NULL); hfs_systemfile_unlock(args->hfsmp, lockflags); if (error) { /* @@ -654,12 +625,15 @@ hfs_changefs(struct mount *mp, struct hfs_mount_args *args) int namefix, permfix, permswitch; struct hfsmount *hfsmp; ExtendedVCB *vcb; - hfs_to_unicode_func_t get_unicode_func; - unicode_to_hfs_func_t get_hfsname_func; - u_int32_t old_encoding = 0; struct hfs_changefs_cargs cargs; u_int32_t mount_flags; +#if CONFIG_HFS_STD + u_int32_t old_encoding = 0; + hfs_to_unicode_func_t get_unicode_func; + unicode_to_hfs_func_t get_hfsname_func; +#endif + hfsmp = VFSTOHFS(mp); vcb = HFSTOVCB(hfsmp); mount_flags = (unsigned int)vfs_flags(mp); @@ -719,6 +693,7 @@ hfs_changefs(struct mount *mp, struct hfs_mount_args *args) } } +#if CONFIG_HFS_STD /* Change the hfs encoding value (hfs only) */ if ((vcb->vcbSigWord == kHFSSigWord) && (args->hfs_encoding != (u_int32_t)VNOVAL) && @@ -743,6 +718,7 @@ hfs_changefs(struct mount *mp, struct hfs_mount_args *args) hfsmp->hfs_encoding = args->hfs_encoding; ++namefix; } +#endif if (!(namefix || permfix || permswitch)) goto exit; @@ -772,16 +748,20 @@ hfs_changefs(struct mount *mp, struct hfs_mount_args *args) vnode_iterate(mp, 0, hfs_changefs_callback, (void *)&cargs); +#if CONFIG_HFS_STD /* * If we're switching name converters we can now * connect the new hfs_get_hfsname converter and * release our interest in the old converters. */ if (namefix) { + /* HFS standard only */ hfsmp->hfs_get_hfsname = get_hfsname_func; vcb->volumeNameEncodingHint = args->hfs_encoding; (void) hfs_relconverter(old_encoding); } +#endif + exit: hfsmp->hfs_flags &= ~HFS_IN_CHANGEFS; return (retval); @@ -1022,247 +1002,158 @@ hfs_reload(struct mount *mountp) } +static uint64_t timeval_to_microseconds(struct timeval *tv) +{ + return tv->tv_sec * 1000000ULL + tv->tv_usec; +} static void hfs_syncer(void *arg0, void *unused) { #pragma unused(unused) - + struct hfsmount *hfsmp = arg0; clock_sec_t secs; clock_usec_t usecs; - uint32_t delay = HFS_META_DELAY; + uint64_t deadline = 0; uint64_t now; - static int no_max=1; - - clock_get_calendar_microtime(&secs, &usecs); - now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; - - // - // If the amount of pending writes is more than our limit, wait - // for 2/3 of it to drain and then flush the journal. - // - if (hfsmp->hfs_mp->mnt_pending_write_size > hfsmp->hfs_max_pending_io) { - int counter=0; - uint64_t pending_io, start, rate = 0; - - no_max = 0; - - hfs_start_transaction(hfsmp); // so we hold off any new i/o's - - pending_io = hfsmp->hfs_mp->mnt_pending_write_size; - - clock_get_calendar_microtime(&secs, &usecs); - start = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; - - while(hfsmp->hfs_mp->mnt_pending_write_size > (pending_io/3) && counter++ < 500) { - tsleep((caddr_t)hfsmp, PRIBIO, "hfs-wait-for-io-to-drain", 10); - } - - if (counter >= 500) { - printf("hfs: timed out waiting for io to drain (%lld)\n", (int64_t)hfsmp->hfs_mp->mnt_pending_write_size); - } - - if (hfsmp->jnl) { - journal_flush(hfsmp->jnl, FALSE); - } else { - hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel()); - } - - clock_get_calendar_microtime(&secs, &usecs); - now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; - hfsmp->hfs_last_sync_time = now; - if (now != start) { - rate = ((pending_io * 1000000ULL) / (now - start)); // yields bytes per second - } - - hfs_end_transaction(hfsmp); - - // - // If a reasonable amount of time elapsed then check the - // i/o rate. If it's taking less than 1 second or more - // than 2 seconds, adjust hfs_max_pending_io so that we - // will allow about 1.5 seconds of i/o to queue up. - // - if (((now - start) >= 300000) && (rate != 0)) { - uint64_t scale = (pending_io * 100) / rate; - - if (scale < 100 || scale > 200) { - // set it so that it should take about 1.5 seconds to drain - hfsmp->hfs_max_pending_io = (rate * 150ULL) / 100ULL; - } - } - - } else if ( ((now - hfsmp->hfs_last_sync_time) >= 5000000ULL) - || (((now - hfsmp->hfs_last_sync_time) >= 100000LL) - && ((now - hfsmp->hfs_last_sync_request_time) >= 100000LL) - && (hfsmp->hfs_active_threads == 0) - && (hfsmp->hfs_global_lock_nesting == 0))) { - - // - // Flush the journal if more than 5 seconds elapsed since - // the last sync OR we have not sync'ed recently and the - // last sync request time was more than 100 milliseconds - // ago and no one is in the middle of a transaction right - // now. Else we defer the sync and reschedule it. - // - if (hfsmp->jnl) { - hfs_lock_global (hfsmp, HFS_SHARED_LOCK); - - journal_flush(hfsmp->jnl, FALSE); - - hfs_unlock_global (hfsmp); - } else { - hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel()); - } - - clock_get_calendar_microtime(&secs, &usecs); - now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; - hfsmp->hfs_last_sync_time = now; - - } else if (hfsmp->hfs_active_threads == 0) { - uint64_t deadline; - - clock_interval_to_deadline(delay, HFS_MILLISEC_SCALE, &deadline); - thread_call_enter_delayed(hfsmp->hfs_syncer, deadline); - - // note: we intentionally return early here and do not - // decrement the sync_scheduled and sync_incomplete - // variables because we rescheduled the timer. - - return; - } - - // - // NOTE: we decrement these *after* we're done the journal_flush() since - // it can take a significant amount of time and so we don't want more - // callbacks scheduled until we're done this one. - // - OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled); - OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete); - wakeup((caddr_t)&hfsmp->hfs_sync_incomplete); -} - - -extern int IOBSDIsMediaEjectable( const char *cdev_name ); - -/* - * Initialization code for Red-Black Tree Allocator - * - * This function will build the two red-black trees necessary for allocating space - * from the metadata zone as well as normal allocations. Currently, we use - * an advisory read to get most of the data into the buffer cache. - * This function is intended to be run in a separate thread so as not to slow down mount. - * - */ - -void -hfs_initialize_allocator (struct hfsmount *hfsmp) { - -#if CONFIG_HFS_ALLOC_RBTREE - u_int32_t err; - + + clock_get_system_microtime(&secs, &usecs); + now = ((uint64_t)secs * USEC_PER_SEC) + (uint64_t)usecs; + KERNEL_DEBUG_CONSTANT(HFSDBG_SYNCER | DBG_FUNC_START, hfsmp, now, timeval_to_microseconds(&hfsmp->hfs_mp->mnt_last_write_completed_timestamp), hfsmp->hfs_mp->mnt_pending_write_size, 0); + + /* + * Flush the journal if there have been no writes (or outstanding writes) for 0.1 seconds. + * + * WARNING! last_write_completed >= last_write_issued isn't sufficient to test whether + * there are still outstanding writes. We could have issued a whole bunch of writes, + * and then stopped issuing new writes, then one or more of those writes complete. + * + * NOTE: This routine uses clock_get_system_microtime (i.e. uptime) instead of + * clock_get_calendar_microtime (i.e. wall time) because mnt_last_write_completed_timestamp + * and mnt_last_write_issued_timestamp are also stored as system (uptime) times. + * Trying to compute durations from a mix of system and calendar times is meaningless + * since they are relative to different points in time. + */ + hfs_start_transaction(hfsmp); // so we hold off any new writes + uint64_t last_write_completed = timeval_to_microseconds(&hfsmp->hfs_mp->mnt_last_write_completed_timestamp); + if (hfsmp->hfs_mp->mnt_pending_write_size == 0 && (now - last_write_completed) >= HFS_META_DELAY) { /* - * Take the allocation file lock. Journal transactions will block until - * we're done here. + * Time to flush the journal. */ - int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + KERNEL_DEBUG_CONSTANT(HFSDBG_SYNCER_TIMED | DBG_FUNC_START, now, last_write_completed, timeval_to_microseconds(&hfsmp->hfs_mp->mnt_last_write_issued_timestamp), hfsmp->hfs_mp->mnt_pending_write_size, 0); /* - * GenerateTree assumes that the bitmap lock is held when you call the function. - * It will drop and re-acquire the lock periodically as needed to let other allocations - * through. It returns with the bitmap lock held. Since we only maintain one tree, - * we don't need to specify a start block (always starts at 0). + * We intentionally do a synchronous flush (of the journal or entire volume) here. + * For journaled volumes, this means we wait until the metadata blocks are written + * to both the journal and their final locations (in the B-trees, etc.). + * + * This tends to avoid interleaving the metadata writes with other writes (for + * example, user data, or to the journal when a later transaction notices that + * an earlier transaction has finished its async writes, and then updates the + * journal start in the journal header). Avoiding interleaving of writes is + * very good for performance on simple flash devices like SD cards, thumb drives; + * and on devices like floppies. Since removable devices tend to be this kind of + * simple device, doing a synchronous flush actually improves performance in + * practice. + * + * NOTE: For non-journaled volumes, the call to hfs_sync will also cause dirty + * user data to be written. */ - err = GenerateTree(hfsmp, hfsmp->totalBlocks, &flags, 1); - if (err) { - goto bailout; + if (hfsmp->jnl) { + hfs_journal_flush(hfsmp, TRUE); + } else { + hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel()); } - /* Mark offset tree as built */ - hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ACTIVE; -bailout: - /* - * GenerateTree may drop the bitmap lock during operation in order to give other - * threads a chance to allocate blocks, but it will always return with the lock held, so - * we don't need to re-grab the lock in order to update the TREEBUILD_INFLIGHT bit. + clock_get_system_microtime(&secs, &usecs); + now = ((uint64_t)secs * USEC_PER_SEC) + (uint64_t)usecs; + + KERNEL_DEBUG_CONSTANT(HFSDBG_SYNCER_TIMED | DBG_FUNC_END, now, timeval_to_microseconds(&hfsmp->hfs_mp->mnt_last_write_completed_timestamp), timeval_to_microseconds(&hfsmp->hfs_mp->mnt_last_write_issued_timestamp), hfsmp->hfs_mp->mnt_pending_write_size, 0); + hfs_end_transaction(hfsmp); + + // + // NOTE: we decrement these *after* we've done the journal_flush() since + // it can take a significant amount of time and so we don't want more + // callbacks scheduled until we've done this one. + // + OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled); + OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete); + wakeup((caddr_t)&hfsmp->hfs_sync_incomplete); + } else { + /* + * Defer the journal flush by rescheduling the timer. */ - hfsmp->extent_tree_flags &= ~HFS_ALLOC_TREEBUILD_INFLIGHT; - if (err != 0) { - /* Wakeup any waiters on the allocation bitmap lock */ - wakeup((caddr_t)&hfsmp->extent_tree_flags); - } - hfs_systemfile_unlock(hfsmp, flags); -#else -#pragma unused (hfsmp) -#endif + clock_interval_to_deadline(HFS_META_DELAY, NSEC_PER_USEC, &deadline); + thread_call_enter_delayed(hfsmp->hfs_syncer, deadline); + + // note: we intentionally return early here and do not + // decrement the sync_scheduled and sync_incomplete + // variables because we rescheduled the timer. + + hfs_end_transaction(hfsmp); + } + KERNEL_DEBUG_CONSTANT(HFSDBG_SYNCER| DBG_FUNC_END, deadline ? EAGAIN : 0, deadline, 0, 0, 0); } -void hfs_unmap_blocks (struct hfsmount *hfsmp) { + +extern int IOBSDIsMediaEjectable( const char *cdev_name ); + +/* + * Call into the allocator code and perform a full scan of the bitmap file. + * + * This allows us to TRIM unallocated ranges if needed, and also to build up + * an in-memory summary table of the state of the allocated blocks. + */ +void hfs_scan_blocks (struct hfsmount *hfsmp) { /* * Take the allocation file lock. Journal transactions will block until * we're done here. */ - int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); - - /* - * UnmapBlocks assumes that the bitmap lock is held when you call the function. - * We don't care if there were any error issuing unmaps yet. - */ - (void) UnmapBlocks(hfsmp); - - hfs_systemfile_unlock(hfsmp, flags); -} - - -/* - * Teardown code for the Red-Black Tree allocator. - * This function consolidates the code which serializes with respect - * to a thread that may be potentially still building the tree when we need to begin - * tearing it down. Since the red-black tree may not be live when we enter this function - * we return: - * 1 -> Tree was live. - * 0 -> Tree was not active at time of call. - */ - -int -hfs_teardown_allocator (struct hfsmount *hfsmp) { - int rb_used = 0; -#if CONFIG_HFS_ALLOC_RBTREE - - int flags = 0; + int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); /* - * Check to see if the tree-generation is still on-going. - * If it is, then block until it's done. + * We serialize here with the HFS mount lock as we're mounting. + * + * The mount can only proceed once this thread has acquired the bitmap + * lock, since we absolutely do not want someone else racing in and + * getting the bitmap lock, doing a read/write of the bitmap file, + * then us getting the bitmap lock. + * + * To prevent this, the mount thread takes the HFS mount mutex, starts us + * up, then immediately msleeps on the scan_var variable in the mount + * point as a condition variable. This serialization is safe since + * if we race in and try to proceed while they're still holding the lock, + * we'll block trying to acquire the global lock. Since the mount thread + * acquires the HFS mutex before starting this function in a new thread, + * any lock acquisition on our part must be linearizably AFTER the mount thread's. + * + * Note that the HFS mount mutex is always taken last, and always for only + * a short time. In this case, we just take it long enough to mark the + * scan-in-flight bit. */ - - flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); - - - while (hfsmp->extent_tree_flags & HFS_ALLOC_TREEBUILD_INFLIGHT) { - hfsmp->extent_tree_flags |= HFS_ALLOC_TEARDOWN_INFLIGHT; - - lck_rw_sleep(&(VTOC(hfsmp->hfs_allocation_vp))->c_rwlock, LCK_SLEEP_EXCLUSIVE, - &hfsmp->extent_tree_flags, THREAD_UNINT); - } - - if (hfs_isrbtree_active (hfsmp)) { - rb_used = 1; - - /* Tear down the RB Trees while we have the bitmap locked */ - DestroyTrees(hfsmp); + (void) hfs_lock_mount (hfsmp); + hfsmp->scan_var |= HFS_ALLOCATOR_SCAN_INFLIGHT; + wakeup((caddr_t) &hfsmp->scan_var); + hfs_unlock_mount (hfsmp); + + /* Initialize the summary table */ + if (hfs_init_summary (hfsmp)) { + printf("hfs: could not initialize summary table for %s\n", hfsmp->vcbVN); + } - } + /* + * ScanUnmapBlocks assumes that the bitmap lock is held when you + * call the function. We don't care if there were any errors issuing unmaps. + * + * It will also attempt to build up the summary table for subsequent + * allocator use, as configured. + */ + (void) ScanUnmapBlocks(hfsmp); hfs_systemfile_unlock(hfsmp, flags); -#else - #pragma unused (hfsmp) -#endif - return rb_used; - } static int hfs_root_unmounted_cleanly = 0; @@ -1300,9 +1191,6 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, int isroot = 0; u_int32_t device_features = 0; int isssd; -#if CONFIG_HFS_ALLOC_RBTREE - thread_t allocator_thread; -#endif if (args == NULL) { /* only hfs_mountroot passes us NULL as the 'args' argument */ @@ -1351,7 +1239,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, */ phys_blksize = log_blksize; } - if (phys_blksize == 0 || phys_blksize > 1024*1024*1024) { + if (phys_blksize == 0 || phys_blksize > MAXBSIZE) { printf("hfs: physical block size 0x%x looks bad. Not mounting.\n", phys_blksize); retval = ENXIO; goto error_exit; @@ -1401,13 +1289,14 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, /* * The cluster layer is not currently prepared to deal with a logical - * block size larger than the system's page size. (It can handle + * block size larger than the system's page size. (It can handle * blocks per page, but not multiple pages per block.) So limit the * logical block size to the page size. */ - if (log_blksize > PAGE_SIZE) + if (log_blksize > PAGE_SIZE) { log_blksize = PAGE_SIZE; - + } + /* Now switch to our preferred physical block size. */ if (log_blksize > 512) { if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) { @@ -1466,6 +1355,9 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, hfs_chashinit_finish(hfsmp); + /* Init the ID lookup hashtable */ + hfs_idhash_init (hfsmp); + /* * See if the disk supports unmap (trim). * @@ -1628,7 +1520,8 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, goto error_exit; #endif - } else /* Mount an HFS Plus disk */ { + } + else { /* Mount an HFS Plus disk */ HFSPlusVolumeHeader *vhp; off_t embeddedOffset; int jnl_disable = 0; @@ -1700,7 +1593,8 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, bp = NULL; vhp = (HFSPlusVolumeHeader*) mdbp; - } else /* pure HFS+ */ { + } + else { /* pure HFS+ */ embeddedOffset = 0; vhp = (HFSPlusVolumeHeader*) mdbp; } @@ -1942,7 +1836,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, goto error_exit; } - mp->mnt_vfsstat.f_fsid.val[0] = (long)dev; + mp->mnt_vfsstat.f_fsid.val[0] = dev; mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp); vfs_setmaxsymlen(mp, 0); @@ -1950,13 +1844,16 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, #if NAMEDSTREAMS mp->mnt_kern_flag |= MNTK_NAMED_STREAMS; #endif - if (!(hfsmp->hfs_flags & HFS_STANDARD)) { + if ((hfsmp->hfs_flags & HFS_STANDARD) == 0 ) { /* Tell VFS that we support directory hard links. */ mp->mnt_vtable->vfc_vfsflags |= VFC_VFSDIRLINKS; - } else { + } +#if CONFIG_HFS_STD + else { /* HFS standard doesn't support extended readdir! */ mount_set_noreaddirext (mp); } +#endif if (args) { /* @@ -2016,44 +1913,8 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, } } -#if CONFIG_HFS_MOUNT_UNMAP - /* Enable UNMAPs for embedded SSDs only for now */ - /* - * TODO: Should we enable this for CoreStorage volumes, too? - */ - if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) { - if (hfsmp->hfs_flags & HFS_UNMAP) { - hfs_unmap_blocks(hfsmp); - } - } -#endif - - -#if CONFIG_HFS_ALLOC_RBTREE - /* - * We spawn a thread to create the pair of red-black trees for this volume. - * However, in so doing, we must be careful to ensure that if this thread is still - * running after mount has finished, it doesn't interfere with an unmount. Specifically, - * we'll need to set a bit that indicates we're in progress building the trees here. - * Unmount will check for this bit, and then if it's set, mark a corresponding bit that - * notifies the tree generation code that an unmount is waiting. Also mark the bit that - * indicates the tree is live and operating. - * - * Only do this if we're operating on a read-write mount (we wouldn't care for read-only). - */ - - if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) { - hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED); - - /* Initialize EOF counter so that the thread can assume it started at initial values */ - hfsmp->offset_block_end = 0; - InitTree(hfsmp); - - kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread); - thread_deallocate(allocator_thread); - } - -#endif + printf("hfs: mounted %s on device %s\n", (hfsmp->vcbVN ? (const char*) hfsmp->vcbVN : "unknown"), + (devvp->v_name ? devvp->v_name : (isroot ? "root_device": "unknown device"))); /* * Start looking for free space to drop below this level and generate a @@ -2083,7 +1944,9 @@ error_exit: if (hfsmp->hfs_devvp) { vnode_rele(hfsmp->hfs_devvp); } + hfs_locks_destroy(hfsmp); hfs_delete_chash(hfsmp); + hfs_idhash_destroy (hfsmp); FREE(hfsmp, M_HFSMNT); vfs_setfsprivate(mp, NULL); @@ -2116,7 +1979,6 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) int flags; int force; int started_tr = 0; - int rb_used = 0; flags = 0; force = 0; @@ -2125,6 +1987,10 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) force = 1; } + printf("hfs: unmount initiated on %s on device %s\n", + (hfsmp->vcbVN ? (const char*) hfsmp->vcbVN : "unknown"), + (hfsmp->hfs_devvp ? ((hfsmp->hfs_devvp->v_name ? hfsmp->hfs_devvp->v_name : "unknown device")) : "unknown device")); + if ((retval = hfs_flushfiles(mp, flags, p)) && !force) return (retval); @@ -2162,10 +2028,26 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) if (hfsmp->hfs_sync_incomplete < 0) panic("hfs_unmount: pm_sync_incomplete underflow!\n"); } - -#if CONFIG_HFS_ALLOC_RBTREE - rb_used = hfs_teardown_allocator(hfsmp); -#endif + + if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { + if (hfsmp->hfs_summary_table) { + int err = 0; + /* + * Take the bitmap lock to serialize against a concurrent bitmap scan still in progress + */ + if (hfsmp->hfs_allocation_vp) { + err = hfs_lock (VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + } + FREE (hfsmp->hfs_summary_table, M_TEMP); + hfsmp->hfs_summary_table = NULL; + hfsmp->hfs_flags &= ~HFS_SUMMARY_TABLE; + + if (err == 0 && hfsmp->hfs_allocation_vp){ + hfs_unlock (VTOC(hfsmp->hfs_allocation_vp)); + } + + } + } /* * Flush out the b-trees, volume bitmap and Volume Header @@ -2179,7 +2061,7 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) } if (hfsmp->hfs_startup_vp) { - (void) hfs_lock(VTOC(hfsmp->hfs_startup_vp), HFS_EXCLUSIVE_LOCK); + (void) hfs_lock(VTOC(hfsmp->hfs_startup_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); retval = hfs_fsync(hfsmp->hfs_startup_vp, MNT_WAIT, 0, p); hfs_unlock(VTOC(hfsmp->hfs_startup_vp)); if (retval && !force) @@ -2187,27 +2069,27 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) } if (hfsmp->hfs_attribute_vp) { - (void) hfs_lock(VTOC(hfsmp->hfs_attribute_vp), HFS_EXCLUSIVE_LOCK); + (void) hfs_lock(VTOC(hfsmp->hfs_attribute_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); retval = hfs_fsync(hfsmp->hfs_attribute_vp, MNT_WAIT, 0, p); hfs_unlock(VTOC(hfsmp->hfs_attribute_vp)); if (retval && !force) goto err_exit; } - (void) hfs_lock(VTOC(hfsmp->hfs_catalog_vp), HFS_EXCLUSIVE_LOCK); + (void) hfs_lock(VTOC(hfsmp->hfs_catalog_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); retval = hfs_fsync(hfsmp->hfs_catalog_vp, MNT_WAIT, 0, p); hfs_unlock(VTOC(hfsmp->hfs_catalog_vp)); if (retval && !force) goto err_exit; - (void) hfs_lock(VTOC(hfsmp->hfs_extents_vp), HFS_EXCLUSIVE_LOCK); + (void) hfs_lock(VTOC(hfsmp->hfs_extents_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); retval = hfs_fsync(hfsmp->hfs_extents_vp, MNT_WAIT, 0, p); hfs_unlock(VTOC(hfsmp->hfs_extents_vp)); if (retval && !force) goto err_exit; if (hfsmp->hfs_allocation_vp) { - (void) hfs_lock(VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK); + (void) hfs_lock(VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); retval = hfs_fsync(hfsmp->hfs_allocation_vp, MNT_WAIT, 0, p); hfs_unlock(VTOC(hfsmp->hfs_allocation_vp)); if (retval && !force) @@ -2229,31 +2111,23 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask; } - - if (rb_used) { - /* If the rb-tree was live, just set min_start to 0 */ - hfsmp->nextAllocation = 0; - } - else { - if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { - int i; - u_int32_t min_start = hfsmp->totalBlocks; - - // set the nextAllocation pointer to the smallest free block number - // we've seen so on the next mount we won't rescan unnecessarily - lck_spin_lock(&hfsmp->vcbFreeExtLock); - for(i=0; i < (int)hfsmp->vcbFreeExtCnt; i++) { - if (hfsmp->vcbFreeExt[i].startBlock < min_start) { - min_start = hfsmp->vcbFreeExt[i].startBlock; - } - } - lck_spin_unlock(&hfsmp->vcbFreeExtLock); - if (min_start < hfsmp->nextAllocation) { - hfsmp->nextAllocation = min_start; + if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { + int i; + u_int32_t min_start = hfsmp->totalBlocks; + + // set the nextAllocation pointer to the smallest free block number + // we've seen so on the next mount we won't rescan unnecessarily + lck_spin_lock(&hfsmp->vcbFreeExtLock); + for(i=0; i < (int)hfsmp->vcbFreeExtCnt; i++) { + if (hfsmp->vcbFreeExt[i].startBlock < min_start) { + min_start = hfsmp->vcbFreeExt[i].startBlock; } } + lck_spin_unlock(&hfsmp->vcbFreeExtLock); + if (min_start < hfsmp->nextAllocation) { + hfsmp->nextAllocation = min_start; + } } - retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0); if (retval) { @@ -2277,8 +2151,11 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) */ (void) hfsUnmount(hfsmp, p); - if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) +#if CONFIG_HFS_STD + if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) { (void) hfs_relconverter(hfsmp->hfs_encoding); + } +#endif // XXXdbg if (hfsmp->jnl) { @@ -2314,11 +2191,12 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context) vnode_rele(tmpvp); } #endif /* HFS_SPARSE_DEV */ - lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group); - lck_spin_destroy(&hfsmp->vcbFreeExtLock, hfs_spinlock_group); + vnode_rele(hfsmp->hfs_devvp); + hfs_locks_destroy(hfsmp); hfs_delete_chash(hfsmp); + hfs_idhash_destroy(hfsmp); FREE(hfsmp, M_HFSMNT); return (0); @@ -2457,14 +2335,21 @@ hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, __unused vfs_contex * 128: Mac OS Standard * */ - if (hfsmp->hfs_flags & HFS_STANDARD) { - subtype = HFS_SUBTYPE_STANDARDHFS; - } else /* HFS Plus */ { - if (hfsmp->jnl) + if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) { + /* HFS+ & variants */ + if (hfsmp->jnl) { subtype |= HFS_SUBTYPE_JOURNALED; - if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) + } + if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) { subtype |= HFS_SUBTYPE_CASESENSITIVE; + } } +#if CONFIG_HFS_STD + else { + /* HFS standard */ + subtype = HFS_SUBTYPE_STANDARDHFS; + } +#endif sbp->f_fssubtype = subtype; return (0); @@ -2544,7 +2429,7 @@ hfs_sync_callback(struct vnode *vp, void *cargs) args = (struct hfs_sync_cargs *)cargs; - if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) { + if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) { return (VNODE_RETURNED); } cp = VTOC(vp); @@ -2627,7 +2512,7 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context) continue; /* XXX use hfs_systemfile_lock instead ? */ - (void) hfs_lock(VTOC(btvp), HFS_EXCLUSIVE_LOCK); + (void) hfs_lock(VTOC(btvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); cp = VTOC(btvp); if (((cp->c_flag & C_MODIFIED) == 0) && @@ -2650,6 +2535,8 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context) vnode_put(btvp); }; + +#if CONFIG_HFS_STD /* * Force stale file system control information to be flushed. */ @@ -2658,6 +2545,8 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context) allerror = error; } } +#endif + #if QUOTA hfs_qsync(mp); #endif /* QUOTA */ @@ -2677,16 +2566,6 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context) hfs_journal_flush(hfsmp, FALSE); } - { - clock_sec_t secs; - clock_usec_t usecs; - uint64_t now; - - clock_get_calendar_microtime(&secs, &usecs); - now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs; - hfsmp->hfs_last_sync_time = now; - } - lck_rw_unlock_shared(&hfsmp->hfs_insync); return (allerror); } @@ -2770,7 +2649,13 @@ hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, __unused vfs_conte /* - * Initial HFS filesystems, done only once. + * Initialize HFS filesystems, done only once per boot. + * + * HFS is not a kext-based file system. This makes it difficult to find + * out when the last HFS file system was unmounted and call hfs_uninit() + * to deallocate data structures allocated in hfs_init(). Therefore we + * never deallocate memory allocated by lock attribute and group initializations + * in this function. */ static int hfs_init(__unused struct vfsconf *vfsp) @@ -2785,7 +2670,6 @@ hfs_init(__unused struct vfsconf *vfsp) BTReserveSetup(); - hfs_lock_attr = lck_attr_alloc_init(); hfs_group_attr = lck_grp_attr_alloc_init(); hfs_mutex_group = lck_grp_alloc_init("hfs-mutex", hfs_group_attr); @@ -2793,12 +2677,30 @@ hfs_init(__unused struct vfsconf *vfsp) hfs_spinlock_group = lck_grp_alloc_init("hfs-spinlock", hfs_group_attr); #if HFS_COMPRESSION - decmpfs_init(); + decmpfs_init(); #endif return (0); } + +/* + * Destroy all locks, mutexes and spinlocks in hfsmp on unmount or failed mount + */ +static void +hfs_locks_destroy(struct hfsmount *hfsmp) +{ + + lck_mtx_destroy(&hfsmp->hfs_mutex, hfs_mutex_group); + lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group); + lck_rw_destroy(&hfsmp->hfs_global_lock, hfs_rwlock_group); + lck_rw_destroy(&hfsmp->hfs_insync, hfs_rwlock_group); + lck_spin_destroy(&hfsmp->vcbFreeExtLock, hfs_spinlock_group); + + return; +} + + static int hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp) { @@ -2851,7 +2753,7 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp, return (error); } else if (name[0] == HFS_EXTEND_FS) { - u_int64_t newsize; + u_int64_t newsize; vnode_t vp = vfs_context_cwd(context); if (newp == USER_ADDR_NULL || vp == NULLVP) @@ -2909,13 +2811,23 @@ encodinghint_exit: vnode_t vp = vfs_context_cwd(context); vnode_t jvp; ExtendedVCB *vcb; - struct cat_attr jnl_attr, jinfo_attr; - struct cat_fork jnl_fork, jinfo_fork; + struct cat_attr jnl_attr; + struct cat_attr jinfo_attr; + struct cat_fork jnl_fork; + struct cat_fork jinfo_fork; + buf_t jib_buf; + uint64_t jib_blkno; + uint32_t tmpblkno; + uint64_t journal_byte_offset; + uint64_t journal_size; + vnode_t jib_vp = NULLVP; + struct JournalInfoBlock local_jib; + int err = 0; void *jnl = NULL; int lockflags; /* Only root can enable journaling */ - if (!is_suser()) { + if (!kauth_cred_issuser(kauth_cred_get())) { return (EPERM); } if (vp == NULLVP) @@ -2934,8 +2846,16 @@ encodinghint_exit: printf("hfs: volume @ mp %p is already journaled!\n", vnode_mount(vp)); return EAGAIN; } - vcb = HFSTOVCB(hfsmp); + + /* Set up local copies of the initialization info */ + tmpblkno = (uint32_t) name[1]; + jib_blkno = (uint64_t) tmpblkno; + journal_byte_offset = (uint64_t) name[2]; + journal_byte_offset *= hfsmp->blockSize; + journal_byte_offset += hfsmp->hfsPlusIOPosOffset; + journal_size = (uint64_t)((unsigned)name[3]); + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS, HFS_EXCLUSIVE_LOCK); if (BTHasContiguousNodes(VTOF(vcb->catalogRefNum)) == 0 || BTHasContiguousNodes(VTOF(vcb->extentsRefNum)) == 0) { @@ -2953,6 +2873,77 @@ encodinghint_exit: return EINVAL; } + /* + * At this point, we have a copy of the metadata that lives in the catalog for the + * journal info block. Compare that the journal info block's single extent matches + * that which was passed into this sysctl. + * + * If it is different, deny the journal enable call. + */ + if (jinfo_fork.cf_blocks > 1) { + /* too many blocks */ + return EINVAL; + } + + if (jinfo_fork.cf_extents[0].startBlock != jib_blkno) { + /* Wrong block */ + return EINVAL; + } + + /* + * We want to immediately purge the vnode for the JIB. + * + * Because it was written to from userland, there's probably + * a vnode somewhere in the vnode cache (possibly with UBC backed blocks). + * So we bring the vnode into core, then immediately do whatever + * we can to flush/vclean it out. This is because those blocks will be + * interpreted as user data, which may be treated separately on some platforms + * than metadata. If the vnode is gone, then there cannot be backing blocks + * in the UBC. + */ + if (hfs_vget (hfsmp, jinfo_attr.ca_fileid, &jib_vp, 1, 0)) { + return EINVAL; + } + /* + * Now we have a vnode for the JIB. recycle it. Because we hold an iocount + * on the vnode, we'll just mark it for termination when the last iocount + * (hopefully ours), is dropped. + */ + vnode_recycle (jib_vp); + err = vnode_put (jib_vp); + if (err) { + return EINVAL; + } + + /* Initialize the local copy of the JIB (just like hfs.util) */ + memset (&local_jib, 'Z', sizeof(struct JournalInfoBlock)); + local_jib.flags = SWAP_BE32(kJIJournalInFSMask); + /* Note that the JIB's offset is in bytes */ + local_jib.offset = SWAP_BE64(journal_byte_offset); + local_jib.size = SWAP_BE64(journal_size); + + /* + * Now write out the local JIB. This essentially overwrites the userland + * copy of the JIB. Read it as BLK_META to treat it as a metadata read/write. + */ + jib_buf = buf_getblk (hfsmp->hfs_devvp, + jib_blkno * (hfsmp->blockSize / hfsmp->hfs_logical_block_size), + hfsmp->blockSize, 0, 0, BLK_META); + char* buf_ptr = (char*) buf_dataptr (jib_buf); + + /* Zero out the portion of the block that won't contain JIB data */ + memset (buf_ptr, 0, hfsmp->blockSize); + + bcopy(&local_jib, buf_ptr, sizeof(local_jib)); + if (buf_bwrite (jib_buf)) { + return EIO; + } + + /* Force a flush track cache */ + (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); + + + /* Now proceed with full volume sync */ hfs_sync(hfsmp->hfs_mp, MNT_WAIT, context); printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n", @@ -2967,15 +2958,13 @@ encodinghint_exit: // to locate and open the journal device. // jvp = hfsmp->hfs_devvp; - jnl = journal_create(jvp, - (off_t)name[2] * (off_t)HFSTOVCB(hfsmp)->blockSize - + HFSTOVCB(hfsmp)->hfsPlusIOPosOffset, - (off_t)((unsigned)name[3]), + jnl = journal_create(jvp, journal_byte_offset, journal_size, hfsmp->hfs_devvp, hfsmp->hfs_logical_block_size, 0, 0, - hfs_sync_metadata, hfsmp->hfs_mp); + hfs_sync_metadata, hfsmp->hfs_mp, + hfsmp->hfs_mp); /* * Set up the trim callback function so that we can add @@ -3038,7 +3027,7 @@ encodinghint_exit: vnode_t vp = vfs_context_cwd(context); /* Only root can disable journaling */ - if (!is_suser()) { + if (!kauth_cred_issuser(kauth_cred_get())) { return (EPERM); } if (vp == NULLVP) @@ -3194,7 +3183,7 @@ hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_con * out of the cnode; instead, we're going to the catalog. */ if ((VTOC(*vpp)->c_flag & C_HARDLINK) && - (hfs_lock(VTOC(*vpp), HFS_EXCLUSIVE_LOCK) == 0)) { + (hfs_lock(VTOC(*vpp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) == 0)) { cnode_t *cp = VTOC(*vpp); struct cat_desc cdesc; @@ -3315,27 +3304,33 @@ hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock, * We need a valid name and parent for reverse lookups. */ if (linkref) { - cnid_t nextlinkid; - cnid_t prevlinkid; + cnid_t lastid; struct cat_desc linkdesc; - int lockflags; - + int linkerr = 0; + cnattr.ca_linkref = linkref; + bzero (&linkdesc, sizeof (linkdesc)); - /* - * Pick up the first link in the chain and get a descriptor for it. - * This allows blind volfs paths to work for hardlinks. + /* + * If the caller supplied the raw inode value, then we don't know exactly + * which hardlink they wanted. It's likely that they acquired the raw inode + * value BEFORE the item became a hardlink, in which case, they probably + * want the oldest link. So request the oldest link from the catalog. + * + * Unfortunately, this requires that we iterate through all N hardlinks. On the plus + * side, since we know that we want the last linkID, we can also have this one + * call give us back the name of the last ID, since it's going to have it in-hand... */ - if ((hfs_lookup_siblinglinks(hfsmp, linkref, &prevlinkid, &nextlinkid) == 0) && - (nextlinkid != 0)) { - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - error = cat_findname(hfsmp, nextlinkid, &linkdesc); - hfs_systemfile_unlock(hfsmp, lockflags); - if (error == 0) { - cat_releasedesc(&cndesc); - bcopy(&linkdesc, &cndesc, sizeof(linkdesc)); - } + linkerr = hfs_lookup_lastlink (hfsmp, linkref, &lastid, &linkdesc); + if ((linkerr == 0) && (lastid != 0)) { + /* + * Release any lingering buffers attached to our local descriptor. + * Then copy the name and other business into the cndesc + */ + cat_releasedesc (&cndesc); + bcopy (&linkdesc, &cndesc, sizeof(linkdesc)); } + /* If it failed, the linkref code will just use whatever it had in-hand below. */ } if (linkref) { @@ -3394,32 +3389,36 @@ hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p) struct hfsmount *hfsmp; struct vnode *skipvp = NULLVP; int error; + int accounted_root_usecounts; #if QUOTA - int quotafilecnt; int i; #endif hfsmp = VFSTOHFS(mp); + accounted_root_usecounts = 0; #if QUOTA /* * The open quota files have an indirect reference on * the root directory vnode. We must account for this * extra reference when doing the intial vflush. */ - quotafilecnt = 0; if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) { - /* Find out how many quota files we have open. */ for (i = 0; i < MAXQUOTAS; i++) { if (hfsmp->hfs_qfiles[i].qf_vp != NULLVP) - ++quotafilecnt; + ++accounted_root_usecounts; } + } +#endif /* QUOTA */ + if (hfsmp->hfs_flags & HFS_CS) { + ++accounted_root_usecounts; + } + if (accounted_root_usecounts > 0) { /* Obtain the root vnode so we can skip over it. */ skipvp = hfs_chash_getvnode(hfsmp, kHFSRootFolderID, 0, 0, 0); } -#endif /* QUOTA */ error = vflush(mp, skipvp, SKIPSYSTEM | SKIPSWAP | flags); if (error != 0) @@ -3427,38 +3426,54 @@ hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p) error = vflush(mp, skipvp, SKIPSYSTEM | flags); -#if QUOTA - if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) { - if (skipvp) { - /* - * See if there are additional references on the - * root vp besides the ones obtained from the open - * quota files and the hfs_chash_getvnode call above. - */ - if ((error == 0) && - (vnode_isinuse(skipvp, quotafilecnt))) { - error = EBUSY; /* root directory is still open */ - } - hfs_unlock(VTOC(skipvp)); - vnode_put(skipvp); + if (skipvp) { + /* + * See if there are additional references on the + * root vp besides the ones obtained from the open + * quota files and CoreStorage. + */ + if ((error == 0) && + (vnode_isinuse(skipvp, accounted_root_usecounts))) { + error = EBUSY; /* root directory is still open */ } - if (error && (flags & FORCECLOSE) == 0) - return (error); + hfs_unlock(VTOC(skipvp)); + /* release the iocount from the hfs_chash_getvnode call above. */ + vnode_put(skipvp); + } + if (error && (flags & FORCECLOSE) == 0) + return (error); +#if QUOTA + if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) { for (i = 0; i < MAXQUOTAS; i++) { if (hfsmp->hfs_qfiles[i].qf_vp == NULLVP) continue; hfs_quotaoff(p, mp, i); } - error = vflush(mp, NULLVP, SKIPSYSTEM | flags); } #endif /* QUOTA */ + if (hfsmp->hfs_flags & HFS_CS) { + error = VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSSETFSVNODE, + (caddr_t)NULL, 0, vfs_context_kernel()); + vnode_rele(skipvp); + printf("hfs_flushfiles: VNOP_IOCTL(_DKIOCCSSETFSVNODE) failed with error code %d\n", + error); + + /* ignore the CS error and proceed with the unmount. */ + error = 0; + } + if (skipvp) { + error = vflush(mp, NULLVP, SKIPSYSTEM | flags); + } return (error); } /* * Update volume encoding bitmap (HFS Plus only) + * + * Mark a legacy text encoding as in-use (as needed) + * in the volume header of this HFS+ filesystem. */ __private_extern__ void @@ -3481,11 +3496,12 @@ hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding) break; } + /* Only mark the encoding as in-use if it wasn't already set */ if (index < 64 && (hfsmp->encodingsBitmap & (u_int64_t)(1ULL << index)) == 0) { - HFS_MOUNT_LOCK(hfsmp, TRUE) + hfs_lock_mount (hfsmp); hfsmp->encodingsBitmap |= (u_int64_t)(1ULL << index); MarkVCBDirty(hfsmp); - HFS_MOUNT_UNLOCK(hfsmp, TRUE); + hfs_unlock_mount(hfsmp); } } @@ -3501,7 +3517,7 @@ hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot) microtime(&tv); - lck_mtx_lock(&hfsmp->hfs_mutex); + hfs_lock_mount (hfsmp); MarkVCBDirty(hfsmp); hfsmp->hfs_mtime = tv.tv_sec; @@ -3535,7 +3551,7 @@ hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot) break; } - lck_mtx_unlock(&hfsmp->hfs_mutex); + hfs_unlock_mount (hfsmp); if (hfsmp->jnl) { hfs_flushvolumeheader(hfsmp, 0, 0); @@ -3545,6 +3561,7 @@ hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot) } +#if CONFIG_HFS_STD static int hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush) { @@ -3564,7 +3581,7 @@ hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush) return retval; } - lck_mtx_lock(&hfsmp->hfs_mutex); + hfs_lock_mount (hfsmp); mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp) + HFS_PRI_OFFSET(sector_size)); @@ -3615,7 +3632,7 @@ hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush) MarkVCBClean( vcb ); - lck_mtx_unlock(&hfsmp->hfs_mutex); + hfs_unlock_mount (hfsmp); /* If requested, flush out the alternate MDB */ if (altflush) { @@ -3636,6 +3653,7 @@ hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush) return (retval); } +#endif /* * Flush any dirty in-memory mount data to the on-disk @@ -3662,9 +3680,11 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) if (hfsmp->hfs_flags & HFS_READ_ONLY) { return(0); } +#if CONFIG_HFS_STD if (hfsmp->hfs_flags & HFS_STANDARD) { return hfs_flushMDB(hfsmp, waitfor, altflush); } +#endif critical = altflush; priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size)); @@ -3680,7 +3700,7 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys), hfsmp->hfs_physical_block_size, NOCRED, &bp); if (retval) { - printf("hfs: err %d reading VH blk (%s)\n", retval, vcb->vcbVN); + printf("hfs: err %d reading VH blk (vol=%s)\n", retval, vcb->vcbVN); goto err_exit; } @@ -3782,7 +3802,7 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) } } - lck_mtx_lock(&hfsmp->hfs_mutex); + hfs_lock_mount (hfsmp); /* Note: only update the lower 16 bits worth of attributes */ volumeHeader->attributes = SWAP_BE32 (vcb->vcbAtrb); @@ -3897,7 +3917,7 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush) done: MarkVCBClean(hfsmp); - lck_mtx_unlock(&hfsmp->hfs_mutex); + hfs_unlock_mount (hfsmp); /* If requested, flush out the alternate volume header */ if (altflush && hfsmp->hfs_alt_id_sector) { @@ -4024,7 +4044,7 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) return (ENXIO); } if ((sector_size * sector_count) < newsize) { - printf("hfs_extendfs: not enough space on device\n"); + printf("hfs_extendfs: not enough space on device (vol=%s)\n", hfsmp->vcbVN); return (ENOSPC); } error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sector_size, 0, context); @@ -4041,12 +4061,14 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) * Validate new size. */ if ((newsize <= oldsize) || (newsize % sector_size) || (newsize % phys_sector_size)) { - printf("hfs_extendfs: invalid size\n"); + printf("hfs_extendfs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize); return (EINVAL); } newblkcnt = newsize / vcb->blockSize; - if (newblkcnt > (u_int64_t)0xFFFFFFFF) + if (newblkcnt > (u_int64_t)0xFFFFFFFF) { + printf ("hfs_extendfs: current blockSize=%u too small for newsize=%qu\n", hfsmp->blockSize, newsize); return (EOVERFLOW); + } addblks = newblkcnt - vcb->totalBlocks; @@ -4056,14 +4078,14 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) } printf("hfs_extendfs: will extend \"%s\" by %d blocks\n", vcb->vcbVN, addblks); - HFS_MOUNT_LOCK(hfsmp, TRUE); + hfs_lock_mount (hfsmp); if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) { - HFS_MOUNT_UNLOCK(hfsmp, TRUE); + hfs_unlock_mount(hfsmp); error = EALREADY; goto out; } hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS; - HFS_MOUNT_UNLOCK(hfsmp, TRUE); + hfs_unlock_mount (hfsmp); /* Start with a clean journal. */ hfs_journal_flush(hfsmp, TRUE); @@ -4206,7 +4228,7 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) } } if (error) { - printf("hfs_extendfs: error %d clearing blocks\n", error); + printf("hfs_extendfs: error %d clearing blocks\n", error); goto out; } /* @@ -4377,9 +4399,9 @@ out: } out_noalloc: - HFS_MOUNT_LOCK(hfsmp, TRUE); + hfs_lock_mount (hfsmp); hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS; - HFS_MOUNT_UNLOCK(hfsmp, TRUE); + hfs_unlock_mount (hfsmp); if (lockflags) { hfs_systemfile_unlock(hfsmp, lockflags); } @@ -4389,6 +4411,9 @@ out_noalloc: /* Just to be sure, sync all data to the disk */ (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); } + if (error) { + printf ("hfs_extentfs: failed error=%d on vol=%s\n", MacToVFSError(error), hfsmp->vcbVN); + } return MacToVFSError(error); } @@ -4411,16 +4436,16 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) Boolean disable_sparse = false; int error = 0; - lck_mtx_lock(&hfsmp->hfs_mutex); + hfs_lock_mount (hfsmp); if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) { - lck_mtx_unlock(&hfsmp->hfs_mutex); + hfs_unlock_mount (hfsmp); return (EALREADY); } hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS; hfsmp->hfs_resize_blocksmoved = 0; hfsmp->hfs_resize_totalblocks = 0; hfsmp->hfs_resize_progress = 0; - lck_mtx_unlock(&hfsmp->hfs_mutex); + hfs_unlock_mount (hfsmp); /* * - Journaled HFS Plus volumes only. @@ -4540,7 +4565,7 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) * extent caches sorted differently, etc. So we stick to this * solution for now. */ - HFS_MOUNT_LOCK(hfsmp, TRUE); + hfs_lock_mount (hfsmp); if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE; ResetVCBFreeExtCache(hfsmp); @@ -4560,8 +4585,8 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) */ hfsmp->freeBlocks -= reclaimblks; updateFreeBlocks = true; - HFS_MOUNT_UNLOCK(hfsmp, TRUE); - + hfs_unlock_mount(hfsmp); + if (lockflags) { hfs_systemfile_unlock(hfsmp, lockflags); lockflags = 0; @@ -4708,7 +4733,7 @@ out: */ UpdateAllocLimit (hfsmp, hfsmp->totalBlocks); - HFS_MOUNT_LOCK(hfsmp, TRUE); + hfs_lock_mount (hfsmp); if (disable_sparse == true) { /* Now that resize is completed, set the volume to be sparse * device again so that all further allocations will be first @@ -4727,7 +4752,7 @@ out: hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1; } hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS; - HFS_MOUNT_UNLOCK(hfsmp, TRUE); + hfs_unlock_mount (hfsmp); /* On error, reset the metadata zone for original volume size */ if (error && (updateFreeBlocks == true)) { @@ -4744,6 +4769,10 @@ out: (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context); } + if (error) { + printf ("hfs_truncatefs: failed error=%d on vol=%s\n", MacToVFSError(error), hfsmp->vcbVN); + } + return MacToVFSError(error); } @@ -5663,7 +5692,7 @@ relocate_full_extent: * by the allocation, split the extent into two parts, and then * relocate the first splitted extent. */ - alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS; + alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS; if (extent_info->is_sysfile) { alloc_flags |= HFS_ALLOC_METAZONE; } @@ -5679,9 +5708,11 @@ relocate_full_extent: } if ((error == dskFulErr) || (error == ENOSPC)) { /* We did not find desired contiguous space for this extent. - * So try to allocate the maximum contiguous space available. + * So don't worry about getting contiguity anymore. Also, allow using + * blocks that were recently deallocated. */ alloc_flags &= ~HFS_ALLOC_FORCECONTIG; + alloc_flags |= HFS_ALLOC_FLUSHTXN; error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, &newStartBlock, &newBlockCount); @@ -5945,10 +5976,10 @@ hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, buf_flushdirtyblks(vp, 0, BUF_SKIP_LOCKED, "hfs_reclaim_file"); hfs_unlock(cp); - hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK); + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); took_truncate_lock = true; (void) cluster_push(vp, 0); - error = hfs_lock(cp, HFS_FORCE_LOCK); + error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); if (error) { goto out; } @@ -6127,7 +6158,7 @@ out: (void) hfs_update(vp, MNT_WAIT); } if (took_truncate_lock) { - hfs_unlock_truncate(cp, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); } if (extent_info) { FREE(extent_info, M_TEMP); @@ -6261,7 +6292,7 @@ hfs_relocate_journal_file(struct hfsmount *hfsmp, u_int32_t jnl_size, int resize lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); error = BlockAllocate(hfsmp, 1, jnlBlockCount, jnlBlockCount, - HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | alloc_skipfreeblks, + HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_FLUSHTXN | alloc_skipfreeblks, &newStartBlock, &newBlockCount); if (error) { printf("hfs_relocate_journal_file: BlockAllocate returned %d\n", error); @@ -6441,7 +6472,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); error = BlockAllocate(hfsmp, 1, 1, 1, - HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS, + HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS | HFS_ALLOC_FLUSHTXN, &newBlock, &blockCount); if (error) { printf("hfs_reclaim_journal_info_block: BlockAllocate returned %d\n", error); @@ -6451,11 +6482,6 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs printf("hfs_reclaim_journal_info_block: blockCount != 1 (%u)\n", blockCount); goto free_fail; } - error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1, HFS_ALLOC_SKIPFREEBLKS); - if (error) { - printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error); - goto free_fail; - } /* Copy the old journal info block content to the new location */ error = buf_meta_bread(hfsmp->hfs_devvp, @@ -6487,6 +6513,14 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs /* Don't fail the operation. */ } } + + /* Deallocate the old block once the new one has the new valid content */ + error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1, HFS_ALLOC_SKIPFREEBLKS); + if (error) { + printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error); + goto free_fail; + } + /* Update the catalog record for .journal_info_block */ error = cat_idlookup(hfsmp, hfsmp->hfs_jnlinfoblkid, 1, 0, &jib_desc, &jib_attr, &jib_fork); @@ -6694,10 +6728,10 @@ hfs_reclaim_xattr(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, u_ * I/O using a virtual file that spans across entire file system * space. */ - hfs_lock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_EXCLUSIVE_LOCK); + hfs_lock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); (void)cluster_push(hfsmp->hfs_attrdata_vp, 0); error = vnode_waitforwrites(hfsmp->hfs_attrdata_vp, 0, 0, 0, "hfs_reclaim_xattr"); - hfs_unlock_truncate(VTOC(hfsmp->hfs_attrdata_vp), 0); + hfs_unlock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_LOCK_DEFAULT); if (error) { goto out; } @@ -6942,6 +6976,10 @@ hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_ u_int32_t files_moved = 0; u_int32_t prev_blocksmoved; +#if CONFIG_PROTECT + int keys_generated = 0; +#endif + fcb = VTOF(hfsmp->hfs_catalog_vp); /* Store the value to print total blocks moved by this function at the end */ prev_blocksmoved = hfsmp->hfs_resize_blocksmoved; @@ -6952,7 +6990,6 @@ hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_ } #if CONFIG_PROTECT - int keys_generated = 0; /* * For content-protected filesystems, we may need to relocate files that * are encrypted. If they use the new-style offset-based IVs, then @@ -6961,12 +6998,22 @@ hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_ * end of the function. */ if (cp_fs_protected (hfsmp->hfs_mp)) { - error = cp_entry_gentempkeys(&hfsmp->hfs_resize_cpentry, hfsmp); + int needs = 0; + error = cp_needs_tempkeys(hfsmp, &needs); + + if ((error == 0) && (needs)) { + error = cp_entry_gentempkeys(&hfsmp->hfs_resize_cpentry, hfsmp); + if (error == 0) { + keys_generated = 1; + } + } + if (error) { printf("hfs_reclaimspace: Error generating temporary keys for resize (%d)\n", error); goto reclaim_filespace_done; } } + #endif bzero(iterator, sizeof(*iterator)); @@ -7068,7 +7115,8 @@ reclaim_filespace_done: #if CONFIG_PROTECT if (keys_generated) { - cp_entry_destroy(&hfsmp->hfs_resize_cpentry); + cp_entry_destroy(hfsmp->hfs_resize_cpentry); + hfsmp->hfs_resize_cpentry = NULL; } #endif return error; @@ -7374,6 +7422,19 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t struct hfsmount *hfsmp = VFSTOHFS(mp); u_int32_t freeCNIDs; + int searchfs_on = 0; + int exchangedata_on = 1; + +#if CONFIG_SEARCHFS + searchfs_on = 1; +#endif + +#if CONFIG_PROTECT + if (cp_fs_protected(mp)) { + exchangedata_on = 0; + } +#endif + freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)hfsmp->vcbNxtCNID; VFSATTR_RETURN(fsap, f_objcount, (u_int64_t)hfsmp->vcbFilCnt + (u_int64_t)hfsmp->vcbDirCnt); @@ -7403,14 +7464,8 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t cap = &fsap->f_capabilities; - if (hfsmp->hfs_flags & HFS_STANDARD) { - cap->capabilities[VOL_CAPABILITIES_FORMAT] = - VOL_CAP_FMT_PERSISTENTOBJECTIDS | - VOL_CAP_FMT_CASE_PRESERVING | - VOL_CAP_FMT_FAST_STATFS | - VOL_CAP_FMT_HIDDEN_FILES | - VOL_CAP_FMT_PATH_FROM_ID; - } else { + if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) { + /* HFS+ & variants */ cap->capabilities[VOL_CAPABILITIES_FORMAT] = VOL_CAP_FMT_PERSISTENTOBJECTIDS | VOL_CAP_FMT_SYMBOLICLINKS | @@ -7430,12 +7485,27 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t VOL_CAP_FMT_PATH_FROM_ID; #endif } +#if CONFIG_HFS_STD + else { + /* HFS standard */ + cap->capabilities[VOL_CAPABILITIES_FORMAT] = + VOL_CAP_FMT_PERSISTENTOBJECTIDS | + VOL_CAP_FMT_CASE_PRESERVING | + VOL_CAP_FMT_FAST_STATFS | + VOL_CAP_FMT_HIDDEN_FILES | + VOL_CAP_FMT_PATH_FROM_ID; + } +#endif + + /* + * The capabilities word in 'cap' tell you whether or not + * this particular filesystem instance has feature X enabled. + */ + cap->capabilities[VOL_CAPABILITIES_INTERFACES] = - VOL_CAP_INT_SEARCHFS | VOL_CAP_INT_ATTRLIST | VOL_CAP_INT_NFSEXPORT | VOL_CAP_INT_READDIRATTR | - VOL_CAP_INT_EXCHANGEDATA | VOL_CAP_INT_ALLOCATE | VOL_CAP_INT_VOL_RENAME | VOL_CAP_INT_ADVLOCK | @@ -7446,6 +7516,16 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t #else VOL_CAP_INT_EXTENDED_ATTR; #endif + + /* HFS may conditionally support searchfs and exchangedata depending on the runtime */ + + if (searchfs_on) { + cap->capabilities[VOL_CAPABILITIES_INTERFACES] |= VOL_CAP_INT_SEARCHFS; + } + if (exchangedata_on) { + cap->capabilities[VOL_CAPABILITIES_INTERFACES] |= VOL_CAP_INT_EXCHANGEDATA; + } + cap->capabilities[VOL_CAPABILITIES_RESERVED1] = 0; cap->capabilities[VOL_CAPABILITIES_RESERVED2] = 0; @@ -7470,12 +7550,16 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t #else VOL_CAP_FMT_PATH_FROM_ID; #endif + + /* + * Bits in the "valid" field tell you whether or not the on-disk + * format supports feature X. + */ + cap->valid[VOL_CAPABILITIES_INTERFACES] = - VOL_CAP_INT_SEARCHFS | VOL_CAP_INT_ATTRLIST | VOL_CAP_INT_NFSEXPORT | VOL_CAP_INT_READDIRATTR | - VOL_CAP_INT_EXCHANGEDATA | VOL_CAP_INT_COPYFILE | VOL_CAP_INT_ALLOCATE | VOL_CAP_INT_VOL_RENAME | @@ -7488,6 +7572,11 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t #else VOL_CAP_INT_EXTENDED_ATTR; #endif + + /* HFS always supports exchangedata and searchfs in the on-disk format natively */ + cap->valid[VOL_CAPABILITIES_INTERFACES] |= (VOL_CAP_INT_SEARCHFS | VOL_CAP_INT_EXCHANGEDATA); + + cap->valid[VOL_CAPABILITIES_RESERVED1] = 0; cap->valid[VOL_CAPABILITIES_RESERVED2] = 0; VFSATTR_SET_SUPPORTED(fsap, f_capabilities); @@ -7531,14 +7620,19 @@ hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t * 128: Mac OS Standard * */ - if (hfsmp->hfs_flags & HFS_STANDARD) { - subtype = HFS_SUBTYPE_STANDARDHFS; - } else /* HFS Plus */ { - if (hfsmp->jnl) + if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) { + if (hfsmp->jnl) { subtype |= HFS_SUBTYPE_JOURNALED; - if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) + } + if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) { subtype |= HFS_SUBTYPE_CASESENSITIVE; + } } +#if CONFIG_HFS_STD + else { + subtype = HFS_SUBTYPE_STANDARDHFS; + } +#endif fsap->f_fssubtype = subtype; VFSATTR_SET_SUPPORTED(fsap, f_fssubtype); } @@ -7595,7 +7689,7 @@ hfs_rename_volume(struct vnode *vp, const char *name, proc_t p) to_desc.cd_cnid = cp->c_cnid; to_desc.cd_flags = CD_ISDIR; - if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK)) == 0) { + if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) == 0) { if ((error = hfs_start_transaction(hfsmp)) == 0) { if ((error = cat_preflight(hfsmp, CAT_RENAME, &cookie, p)) == 0) { lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); @@ -7605,8 +7699,9 @@ hfs_rename_volume(struct vnode *vp, const char *name, proc_t p) /* * If successful, update the name in the VCB, ensure it's terminated. */ - if (!error) { + if (error == 0) { strlcpy((char *)vcb->vcbVN, name, sizeof(vcb->vcbVN)); + volname_length = strlen ((const char*)vcb->vcbVN); #define DKIOCCSSETLVNAME _IOW('d', 198, char[256]) /* Send the volume name down to CoreStorage if necessary */ @@ -7692,7 +7787,7 @@ out: */ void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp) { - HFS_MOUNT_LOCK(hfsmp, TRUE); + hfs_lock_mount (hfsmp); if ((hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) == 0) { hfsmp->vcbAtrb |= kHFSVolumeInconsistentMask; MarkVCBDirty(hfsmp); @@ -7702,7 +7797,7 @@ void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp) fslog_fs_corrupt(hfsmp->hfs_mp); printf("hfs: Runtime corruption detected on %s, fsck will be forced on next mount.\n", hfsmp->vcbVN); } - HFS_MOUNT_UNLOCK(hfsmp, TRUE); + hfs_unlock_mount (hfsmp); } /* Replay the journal on the device node provided. Returns zero if diff --git a/bsd/hfs/hfs_vfsutils.c b/bsd/hfs/hfs_vfsutils.c index 626c1e64c..f67adacc4 100644 --- a/bsd/hfs/hfs_vfsutils.c +++ b/bsd/hfs/hfs_vfsutils.c @@ -52,6 +52,13 @@ #include +/* for parsing boot-args */ +#include + +#if CONFIG_PROTECT +#include +#endif + #include "hfs.h" #include "hfs_catalog.h" #include "hfs_dbg.h" @@ -89,7 +96,7 @@ unsigned char hfs_vbmname[] = "Volume Bitmap"; unsigned char hfs_attrname[] = "Attribute B-tree"; unsigned char hfs_startupname[] = "Startup File"; - +#if CONFIG_HFS_STD OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb, __unused struct proc *p) { @@ -302,6 +309,8 @@ MtVolErr: return (error); } +#endif + //******************************************************************************* // Routine: hfs_MountHFSPlusVolume // @@ -332,12 +341,12 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, if (signature == kHFSPlusSigWord) { if (hfs_version != kHFSPlusVersion) { - printf("hfs_mount: invalid HFS+ version: %d\n", hfs_version); + printf("hfs_mount: invalid HFS+ version: %x\n", hfs_version); return (EINVAL); } } else if (signature == kHFSXSigWord) { if (hfs_version != kHFSXVersion) { - printf("hfs_mount: invalid HFSX version: %d\n", hfs_version); + printf("hfs_mount: invalid HFSX version: %x\n", hfs_version); return (EINVAL); } /* The in-memory signature is always 'H+'. */ @@ -348,7 +357,7 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, * false error for UFS root volume */ if (HFS_MOUNT_DEBUG) { - printf("hfs_mounthfsplus: unknown Volume Signature\n"); + printf("hfs_mounthfsplus: unknown Volume Signature : %x\n", signature); } return (EINVAL); } @@ -662,7 +671,12 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, hfs_unlock(hfsmp->hfs_startup_cp); } - /* Pick up volume name and create date */ + /* + * Pick up volume name and create date + * + * Acquiring the volume name should not manipulate the bitmap, only the catalog + * btree and possibly the extents overflow b-tree. + */ retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, &cnattr, NULL); if (retval) { if (HFS_MOUNT_DEBUG) { @@ -687,7 +701,34 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, /* reset retval == 0. we don't care about errors in volname conversion */ retval = 0; + + /* + * We now always initiate a full bitmap scan even if the volume is read-only because this is + * our only shot to do I/Os of dramaticallly different sizes than what the buffer cache ordinarily + * expects. TRIMs will not be delivered to the underlying media if the volume is not + * read-write though. + */ + thread_t allocator_scanner; + hfsmp->scan_var = 0; + + /* Take the HFS mount mutex and wait on scan_var */ + hfs_lock_mount (hfsmp); + + kernel_thread_start ((thread_continue_t) hfs_scan_blocks, hfsmp, &allocator_scanner); + /* Wait until it registers that it's got the appropriate locks */ + while ((hfsmp->scan_var & HFS_ALLOCATOR_SCAN_INFLIGHT) == 0) { + (void) msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, (PDROP | PINOD), "hfs_scan_blocks", 0); + if (hfsmp->scan_var & HFS_ALLOCATOR_SCAN_INFLIGHT) { + break; + } + else { + hfs_lock_mount (hfsmp); + } + } + + thread_deallocate (allocator_scanner); + /* mark the volume dirty (clear clean unmount bit) */ vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask; if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) { @@ -791,6 +832,104 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, } } + if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) ) // if the disk is not write protected + { + MarkVCBDirty( vcb ); // mark VCB dirty so it will be written + } + + /* + * Distinguish 3 potential cases involving content protection: + * 1. mount point bit set; vcbAtrb does not support it. Fail. + * 2. mount point bit set; vcbattrb supports it. we're good. + * 3. mount point bit not set; vcbatrb supports it, turn bit on, then good. + */ + if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) { + /* Does the mount point support it ? */ + if ((vcb->vcbAtrb & kHFSContentProtectionMask) == 0) { + /* Case 1 above */ + retval = EINVAL; + goto ErrorExit; + } + } + else { + /* not requested in the mount point. Is it in FS? */ + if (vcb->vcbAtrb & kHFSContentProtectionMask) { + /* Case 3 above */ + vfs_setflags (hfsmp->hfs_mp, MNT_CPROTECT); + } + } + + /* At this point, if the mount point flag is set, we can enable it. */ + if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) { + /* Cases 2+3 above */ +#if CONFIG_PROTECT + /* Get the EAs as needed. */ + int cperr = 0; + uint16_t majorversion; + uint16_t minorversion; + + struct cp_root_xattr *xattr = NULL; + MALLOC (xattr, struct cp_root_xattr*, sizeof(struct cp_root_xattr), M_TEMP, M_WAITOK); + if (xattr == NULL) { + retval = ENOMEM; + goto ErrorExit; + } + bzero (xattr, sizeof(struct cp_root_xattr)); + + /* go get the EA to get the version information */ + cperr = cp_getrootxattr (hfsmp, xattr); + /* + * If there was no EA there, then write one out. + * Assuming EA is not present on the root means + * this is an erase install or a very old FS + */ + + if (cperr == 0) { + /* Have to run a valid CP version. */ + if ((xattr->major_version < CP_PREV_MAJOR_VERS) || (xattr->major_version > CP_NEW_MAJOR_VERS)) { + cperr = EINVAL; + } + } + else if (cperr == ENOATTR) { + printf("No root EA set, creating new EA with new version: %d\n", CP_NEW_MAJOR_VERS); + bzero(xattr, sizeof(struct cp_root_xattr)); + xattr->major_version = CP_NEW_MAJOR_VERS; + xattr->minor_version = CP_MINOR_VERS; + xattr->flags = 0; + cperr = cp_setrootxattr (hfsmp, xattr); + } + majorversion = xattr->major_version; + minorversion = xattr->minor_version; + if (xattr) { + FREE(xattr, M_TEMP); + } + + /* Recheck for good status */ + if (cperr == 0) { + /* If we got here, then the CP version is valid. Set it in the mount point */ + hfsmp->hfs_running_cp_major_vers = majorversion; + printf("Running with CP root xattr: %d.%d\n", majorversion, minorversion); + + /* + * Acquire the boot-arg for the AKS default key. + * Ensure that the boot-arg's value is valid for FILES (not directories), + * since only files are actually protected for now. + */ + PE_parse_boot_argn("aks_default_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class)); + if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) { + hfsmp->default_cp_class = PROTECTION_CLASS_D; + } + } + else { + retval = EPERM; + goto ErrorExit; + } +#else + /* If CONFIG_PROTECT not built, ignore CP */ + vfs_clearflags(hfsmp->hfs_mp, MNT_CPROTECT); +#endif + } + /* * Establish a metadata allocation zone. */ @@ -831,12 +970,7 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, goto ErrorExit; } } - - if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) ) // if the disk is not write protected - { - MarkVCBDirty( vcb ); // mark VCB dirty so it will be written - } - + /* * Allow hot file clustering if conditions allow. */ @@ -851,11 +985,6 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, /* Enable extent-based extended attributes by default */ hfsmp->hfs_flags |= HFS_XATTR_EXTENTS; - /* See if this volume should have per-file content protection enabled */ - if (vcb->vcbAtrb & kHFSContentProtectionMask) { - vfs_setflags (hfsmp->hfs_mp, MNT_CPROTECT); - } - return (0); ErrorExit: @@ -866,7 +995,7 @@ ErrorExit: hfsUnmount(hfsmp, NULL); if (HFS_MOUNT_DEBUG) { - printf("hfs_mounthfsplus: encountered errorr (%d)\n", retval); + printf("hfs_mounthfsplus: encountered error (%d)\n", retval); } return (retval); } @@ -883,7 +1012,7 @@ static void ReleaseMetaFileVNode(struct vnode *vp) if (vp && (fp = VTOF(vp))) { if (fp->fcbBTCBPtr != NULL) { - (void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK); + (void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); (void) BTClosePath(fp); hfs_unlock(VTOC(vp)); } @@ -950,6 +1079,10 @@ hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p) /* * Test if fork has overflow extents. + * + * Returns: + * non-zero - overflow extents exist + * zero - overflow extents do not exist */ __private_extern__ int @@ -992,8 +1125,8 @@ overflow_extents(struct filefork *fp) * Lock the HFS global journal lock */ int -hfs_lock_global (struct hfsmount *hfsmp, enum hfslocktype locktype) { - +hfs_lock_global (struct hfsmount *hfsmp, enum hfs_locktype locktype) +{ void *thread = current_thread(); if (hfsmp->hfs_global_lockowner == thread) { @@ -1019,7 +1152,8 @@ hfs_lock_global (struct hfsmount *hfsmp, enum hfslocktype locktype) { * Unlock the HFS global journal lock */ void -hfs_unlock_global (struct hfsmount *hfsmp) { +hfs_unlock_global (struct hfsmount *hfsmp) +{ void *thread = current_thread(); @@ -1034,18 +1168,36 @@ hfs_unlock_global (struct hfsmount *hfsmp) { } } +/* + * Lock the HFS mount lock + * + * Note: this is a mutex, not a rw lock! + */ +inline +void hfs_lock_mount (struct hfsmount *hfsmp) { + lck_mtx_lock (&(hfsmp->hfs_mutex)); +} + +/* + * Unlock the HFS mount lock + * + * Note: this is a mutex, not a rw lock! + */ +inline +void hfs_unlock_mount (struct hfsmount *hfsmp) { + lck_mtx_unlock (&(hfsmp->hfs_mutex)); +} /* * Lock HFS system file(s). */ int -hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype) +hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktype) { /* * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file */ if (flags & SFL_CATALOG) { - #ifdef HFS_CHECK_LOCK_ORDER if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) { panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)"); @@ -1059,23 +1211,23 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype #endif /* HFS_CHECK_LOCK_ORDER */ if (hfsmp->hfs_catalog_cp) { - (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype); + (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype, HFS_LOCK_DEFAULT); + /* + * When the catalog file has overflow extents then + * also acquire the extents b-tree lock if its not + * already requested. + */ + if (((flags & SFL_EXTENTS) == 0) && + (hfsmp->hfs_catalog_vp != NULL) && + (overflow_extents(VTOF(hfsmp->hfs_catalog_vp)))) { + flags |= SFL_EXTENTS; + } } else { flags &= ~SFL_CATALOG; } - - /* - * When the catalog file has overflow extents then - * also acquire the extents b-tree lock if its not - * already requested. - */ - if ((flags & SFL_EXTENTS) == 0 && - overflow_extents(VTOF(hfsmp->hfs_catalog_vp))) { - flags |= SFL_EXTENTS; - } } - if (flags & SFL_ATTRIBUTE) { + if (flags & SFL_ATTRIBUTE) { #ifdef HFS_CHECK_LOCK_ORDER if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) { panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)"); @@ -1086,20 +1238,22 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype #endif /* HFS_CHECK_LOCK_ORDER */ if (hfsmp->hfs_attribute_cp) { - (void) hfs_lock(hfsmp->hfs_attribute_cp, locktype); + (void) hfs_lock(hfsmp->hfs_attribute_cp, locktype, HFS_LOCK_DEFAULT); /* * When the attribute file has overflow extents then * also acquire the extents b-tree lock if its not * already requested. */ - if ((flags & SFL_EXTENTS) == 0 && - overflow_extents(VTOF(hfsmp->hfs_attribute_vp))) { + if (((flags & SFL_EXTENTS) == 0) && + (hfsmp->hfs_attribute_vp != NULL) && + (overflow_extents(VTOF(hfsmp->hfs_attribute_vp)))) { flags |= SFL_EXTENTS; } } else { flags &= ~SFL_ATTRIBUTE; } } + if (flags & SFL_STARTUP) { #ifdef HFS_CHECK_LOCK_ORDER if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) { @@ -1108,52 +1262,52 @@ hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfslocktype locktype #endif /* HFS_CHECK_LOCK_ORDER */ if (hfsmp->hfs_startup_cp) { - (void) hfs_lock(hfsmp->hfs_startup_cp, locktype); + (void) hfs_lock(hfsmp->hfs_startup_cp, locktype, HFS_LOCK_DEFAULT); + /* + * When the startup file has overflow extents then + * also acquire the extents b-tree lock if its not + * already requested. + */ + if (((flags & SFL_EXTENTS) == 0) && + (hfsmp->hfs_startup_vp != NULL) && + (overflow_extents(VTOF(hfsmp->hfs_startup_vp)))) { + flags |= SFL_EXTENTS; + } } else { flags &= ~SFL_STARTUP; } - - /* - * When the startup file has overflow extents then - * also acquire the extents b-tree lock if its not - * already requested. - */ - if ((flags & SFL_EXTENTS) == 0 && - overflow_extents(VTOF(hfsmp->hfs_startup_vp))) { - flags |= SFL_EXTENTS; - } } + /* * To prevent locks being taken in the wrong order, the extent lock * gets a bitmap lock as well. */ if (flags & (SFL_BITMAP | SFL_EXTENTS)) { - /* - * If there's no bitmap cnode, ignore the bitmap lock. - */ - if (hfsmp->hfs_allocation_cp == NULL) { - flags &= ~SFL_BITMAP; - } else { - (void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK); + if (hfsmp->hfs_allocation_cp) { + (void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); /* * The bitmap lock is also grabbed when only extent lock * was requested. Set the bitmap lock bit in the lock * flags which callers will use during unlock. */ flags |= SFL_BITMAP; + } else { + flags &= ~SFL_BITMAP; } } + if (flags & SFL_EXTENTS) { /* * Since the extents btree lock is recursive we always * need exclusive access. */ if (hfsmp->hfs_extents_cp) { - (void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK); + (void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); } else { flags &= ~SFL_EXTENTS; } } + return (flags); } @@ -1351,7 +1505,7 @@ GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name, jdesc.cd_namelen = strlen(name); lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - error = cat_lookup(hfsmp, &jdesc, 0, NULL, fattr, forkinfo, NULL); + error = cat_lookup(hfsmp, &jdesc, 0, 0, NULL, fattr, forkinfo, NULL); hfs_systemfile_unlock(hfsmp, lockflags); if (error == 0) { @@ -1509,7 +1663,6 @@ hfs_remove_orphans(struct hfsmount * hfsmp) if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0, cnode.c_attr.ca_fileid, false) != 0) { printf("hfs: error truncating data fork!\n"); - break; } @@ -1703,7 +1856,7 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve) } if ((vfsp = vfs_statfs(backingfs_mp))) { - HFS_MOUNT_LOCK(hfsmp, TRUE); + hfs_lock_mount (hfsmp); vfreeblks = vfsp->f_bavail; /* Normalize block count if needed. */ if (vfsp->f_bsize != hfsmp->blockSize) { @@ -1725,10 +1878,23 @@ hfs_freeblks(struct hfsmount * hfsmp, int wantreserve) vfreeblks = MIN(vfreeblks, hfsmp->hfs_backingfs_maxblocks); } freeblks = MIN(vfreeblks, freeblks); - HFS_MOUNT_UNLOCK(hfsmp, TRUE); + hfs_unlock_mount (hfsmp); } } #endif /* HFS_SPARSE_DEV */ + if (hfsmp->hfs_flags & HFS_CS) { + uint64_t cs_free_bytes; + uint64_t cs_free_blks; + if (VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSGETFREEBYTES, + (caddr_t)&cs_free_bytes, 0, vfs_context_kernel()) == 0) { + cs_free_blks = cs_free_bytes / hfsmp->blockSize; + if (cs_free_blks > loanblks) + cs_free_blks -= loanblks; + else + cs_free_blks = 0; + freeblks = MIN(cs_free_blks, freeblks); + } + } return (freeblks); } @@ -1742,6 +1908,12 @@ short MacToVFSError(OSErr err) if (err >= 0) return err; + /* BSD/VFS internal errnos */ + switch (err) { + case ERESERVEDNAME: /* -8 */ + return err; + } + switch (err) { case dskFulErr: /* -34 */ case btNoSpaceAvail: /* -32733 */ @@ -2040,7 +2212,6 @@ journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg) strlcpy(ji->desired_uuid, uuid_str, 128); } vnode_setmountedon(ji->jvp); - // printf("hfs: journal open cb: got device %s (%s)\n", bsd_name, uuid_str); return 0; // stop iterating } else { vnode_put(ji->jvp); @@ -2051,7 +2222,6 @@ journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg) return 1; // keep iterating } -extern dev_t IOBSDGetMediaWithUUID(const char *uuid_cstring, char *bsd_name, int bsd_name_len, int timeout); extern void IOBSDIterateMediaWithContent(const char *uuid_cstring, int (*func)(const char *bsd_dev_name, const char *uuid_str, void *arg), void *arg); kern_return_t IOBSDGetPlatformSerialNumber(char *serial_number_str, u_int32_t len); @@ -2122,10 +2292,7 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, const char *dev_name; devvp = hfsmp->hfs_devvp; - dev_name = vnode_name(devvp); - if (dev_name == NULL) { - dev_name = "unknown-dev"; - } + dev_name = vnode_getname_printable(devvp); if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) { arg_flags = args->journal_flags; @@ -2143,7 +2310,7 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, if (jinfo_bp) { buf_brelse(jinfo_bp); } - return retval; + goto cleanup_dev_name; } jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp); @@ -2171,8 +2338,9 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, hfsmp->hfs_logical_block_size, &need_init); if (hfsmp->jvp == NULL) { - buf_brelse(jinfo_bp); - return EROFS; + buf_brelse(jinfo_bp); + retval = EROFS; + goto cleanup_dev_name; } else { if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) { strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num)); @@ -2205,14 +2373,13 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, buf_brelse(jinfo_bp); if (retval) { - const char *name = vnode_getname(devvp); - printf("hfs: early journal init: volume on %s is read-only and journal is dirty. Can not mount volume.\n", - name ? name : ""); - if (name) - vnode_putname(name); + const char *name = vnode_getname_printable(devvp); + printf("hfs: early journal init: volume on %s is read-only and journal is dirty. Can not mount volume.\n", + name); + vnode_putname_printable(name); } - return retval; + goto cleanup_dev_name; } if (jib_flags & kJIJournalNeedInitMask) { @@ -2225,7 +2392,8 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, blksize, arg_flags, arg_tbufsz, - hfs_sync_metadata, hfsmp->hfs_mp); + hfs_sync_metadata, hfsmp->hfs_mp, + hfsmp->hfs_mp); if (hfsmp->jnl) journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp); @@ -2248,7 +2416,8 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, blksize, arg_flags, arg_tbufsz, - hfs_sync_metadata, hfsmp->hfs_mp); + hfs_sync_metadata, hfsmp->hfs_mp, + hfsmp->hfs_mp); if (hfsmp->jnl) journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp); @@ -2276,7 +2445,7 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, } printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n", retval); - return retval; + goto cleanup_dev_name; } bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512); buf_brelse(bp); @@ -2284,17 +2453,19 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, } } - - //printf("journal @ 0x%x\n", hfsmp->jnl); - // if we expected the journal to be there and we couldn't // create it or open it then we have to bail out. if (hfsmp->jnl == NULL) { printf("hfs: early jnl init: failed to open/create the journal (retval %d).\n", retval); - return EINVAL; + retval = EINVAL; + goto cleanup_dev_name; } - return 0; + retval = 0; + +cleanup_dev_name: + vnode_putname_printable(dev_name); + return retval; } @@ -2417,11 +2588,8 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a } else { const char *dev_name; int need_init = 0; - - dev_name = vnode_name(devvp); - if (dev_name == NULL) { - dev_name = "unknown-dev"; - } + + dev_name = vnode_getname_printable(devvp); // since the journal is empty, just use any available external journal *((char *)&jibp->ext_jnl_uuid[0]) = '\0'; @@ -2435,19 +2603,21 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a hfsmp->hfs_logical_block_size, &need_init); if (hfsmp->jvp == NULL) { - buf_brelse(jinfo_bp); - return EROFS; + buf_brelse(jinfo_bp); + vnode_putname_printable(dev_name); + return EROFS; } else { if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) { strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num)); } - } + } jib_offset = 0; recreate_journal = 1; write_jibp = 1; if (need_init) { jib_flags |= kJIJournalNeedInitMask; } + vnode_putname_printable(dev_name); } // save this off for the hack-y check in hfs_remove() @@ -2469,11 +2639,10 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a buf_brelse(jinfo_bp); if (retval) { - const char *name = vnode_getname(devvp); - printf("hfs: late journal init: volume on %s is read-only and journal is dirty. Can not mount volume.\n", - name ? name : ""); - if (name) - vnode_putname(name); + const char *name = vnode_getname_printable(devvp); + printf("hfs: late journal init: volume on %s is read-only and journal is dirty. Can not mount volume.\n", + name); + vnode_putname_printable(name); } return retval; @@ -2489,7 +2658,8 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a hfsmp->hfs_logical_block_size, arg_flags, arg_tbufsz, - hfs_sync_metadata, hfsmp->hfs_mp); + hfs_sync_metadata, hfsmp->hfs_mp, + hfsmp->hfs_mp); if (hfsmp->jnl) journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp); @@ -2520,7 +2690,8 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a hfsmp->hfs_logical_block_size, arg_flags, arg_tbufsz, - hfs_sync_metadata, hfsmp->hfs_mp); + hfs_sync_metadata, hfsmp->hfs_mp, + hfsmp->hfs_mp); if (hfsmp->jnl) journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp); } @@ -2538,8 +2709,6 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a jinfo_bp = NULL; jibp = NULL; - //printf("hfs: journal @ 0x%x\n", hfsmp->jnl); - // if we expected the journal to be there and we couldn't // create it or open it then we have to bail out. if (hfsmp->jnl == NULL) { @@ -2770,9 +2939,16 @@ hfs_metadatazone_init(struct hfsmount *hfsmp, int disable) hfsmp->hfs_metazone_end = blk - 1; /* The default hotfile area is at the end of the zone. */ - hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize); - hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end; - hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp); + if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) { + hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize); + hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end; + hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp); + } + else { + hfsmp->hfs_hotfile_start = 0; + hfsmp->hfs_hotfile_end = 0; + hfsmp->hfs_hotfile_freeblks = 0; + } #if 0 printf("hfs: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end); printf("hfs: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end); @@ -2861,7 +3037,7 @@ hfs_sync_ejectable(struct hfsmount *hfsmp) hfsmp->hfs_last_sync_request_time = now; - clock_interval_to_deadline(HFS_META_DELAY, HFS_MILLISEC_SCALE, &deadline); + clock_interval_to_deadline(HFS_META_DELAY, NSEC_PER_USEC, &deadline); /* * Increment hfs_sync_scheduled on the assumption that we're the @@ -3092,6 +3268,11 @@ check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *a return 0; } + /* Swap files are special; skip them */ + if (vnode_isswap(vp)) { + return 0; + } + if (VTOC(vp)->c_bsdflags & UF_TRACKED) { // the file has the tracked bit set, so send an event to the tracked-file handler int error; @@ -3141,7 +3322,12 @@ check_for_dataless_file(struct vnode *vp, uint64_t op_type) // there's nothing to do, it's not dataless return 0; } - + + /* Swap files are special; ignore them */ + if (vnode_isswap(vp)) { + return 0; + } + // printf("hfs: dataless: encountered a file with the dataless bit set! (vp %p)\n", vp); error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_NSPACE_EVENT); if (error == EDEADLK && op_type == NAMESPACE_HANDLER_WRITE_OP) { diff --git a/bsd/hfs/hfs_vnops.c b/bsd/hfs/hfs_vnops.c index fad99d0a4..1c74ab1f7 100644 --- a/bsd/hfs/hfs_vnops.c +++ b/bsd/hfs/hfs_vnops.c @@ -97,6 +97,8 @@ int hfs_movedata (struct vnode *, struct vnode*); static int hfs_move_fork (struct filefork *srcfork, struct cnode *src, struct filefork *dstfork, struct cnode *dst); +decmpfs_cnode* hfs_lazy_init_decmpfs_cnode (struct cnode *cp); + #if FIFO static int hfsfifo_read(struct vnop_read_args *); static int hfsfifo_write(struct vnop_write_args *); @@ -257,7 +259,7 @@ hfs_ref_data_vp(struct cnode *cp, struct vnode **data_vp, int skiplock) /* maybe we should take the hfs cnode lock here, and if so, use the skiplock parameter to tell us not to */ - if (!skiplock) hfs_lock(cp, HFS_SHARED_LOCK); + if (!skiplock) hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); struct vnode *c_vp = cp->c_vp; if (c_vp) { /* we already have a data vnode */ @@ -296,9 +298,10 @@ hfs_ref_data_vp(struct cnode *cp, struct vnode **data_vp, int skiplock) /* * hfs_lazy_init_decmpfs_cnode(): returns the decmpfs_cnode for a cnode, - * allocating it if necessary; returns NULL if there was an allocation error + * allocating it if necessary; returns NULL if there was an allocation error. + * function is non-static so that it can be used from the FCNTL handler. */ -static decmpfs_cnode * +decmpfs_cnode * hfs_lazy_init_decmpfs_cnode(struct cnode *cp) { if (!cp->c_decmp) { @@ -516,7 +519,7 @@ hfs_vnop_open(struct vnop_open_args *ap) return (0); } - if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) + if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) return (error); #if QUOTA @@ -586,7 +589,7 @@ hfs_vnop_close(ap) int tooktrunclock = 0; int knownrefs = 0; - if ( hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) + if ( hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) return (0); cp = VTOC(vp); hfsmp = VTOHFS(vp); @@ -613,11 +616,11 @@ hfs_vnop_close(ap) // release cnode lock; must acquire truncate lock BEFORE cnode lock hfs_unlock(cp); - hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK); + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); tooktrunclock = 1; - if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) { - hfs_unlock_truncate(cp, 0); + if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) { + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); // bail out if we can't re-acquire cnode lock return 0; } @@ -655,7 +658,7 @@ hfs_vnop_close(ap) } if (tooktrunclock){ - hfs_unlock_truncate(cp, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); } hfs_unlock(cp); @@ -751,12 +754,12 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) */ if ((vap->va_active & VNODE_ATTR_TIMES) && (cp->c_touch_acctime || cp->c_touch_chgtime || cp->c_touch_modtime)) { - if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) + if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) return (error); hfs_touchtimes(hfsmp, cp); } else { - if ((error = hfs_lock(cp, HFS_SHARED_LOCK))) + if ((error = hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT))) return (error); } @@ -1042,7 +1045,8 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap) if ((cp->c_flag & C_HARDLINK) && ((cp->c_desc.cd_namelen == 0) || (vap->va_linkid != cp->c_cnid))) { - /* If we have no name and our link ID is the raw inode number, then we may + /* + * If we have no name and our link ID is the raw inode number, then we may * have an open-unlinked file. Go to the next link in this case. */ if ((cp->c_desc.cd_namelen == 0) && (vap->va_linkid == cp->c_fileid)) { @@ -1133,16 +1137,14 @@ hfs_vnop_setattr(ap) error = decmpfs_update_attributes(vp, vap); if (error) return error; - +#endif // // if this is not a size-changing setattr and it is not just // an atime update, then check for a snapshot. // if (!VATTR_IS_ACTIVE(vap, va_data_size) && !(vap->va_active == VNODE_ATTR_va_access_time)) { - check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_METADATA_MOD, NULL); + check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_METADATA_MOD, NSPACE_REARM_NO_ARG); } -#endif - #if CONFIG_PROTECT if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) { @@ -1195,13 +1197,13 @@ hfs_vnop_setattr(ap) #endif /* Take truncate lock before taking cnode lock. */ - hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK); + hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); /* Perform the ubc_setsize before taking the cnode lock. */ ubc_setsize(vp, vap->va_data_size); - if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) { - hfs_unlock_truncate(VTOC(vp), 0); + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { + hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT); #if HFS_COMPRESSION decmpfs_unlock_compressed_data(dp, 1); #endif @@ -1211,7 +1213,7 @@ hfs_vnop_setattr(ap) error = hfs_truncate(vp, vap->va_data_size, vap->va_vaflags & 0xffff, 1, 0, ap->a_context); - hfs_unlock_truncate(cp, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); #if HFS_COMPRESSION decmpfs_unlock_compressed_data(dp, 1); #endif @@ -1219,7 +1221,7 @@ hfs_vnop_setattr(ap) goto out; } if (cp == NULL) { - if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) return (error); cp = VTOC(vp); } @@ -1592,11 +1594,24 @@ good: /* - * The hfs_exchange routine swaps the fork data in two files by - * exchanging some of the information in the cnode. It is used - * to preserve the file ID when updating an existing file, in - * case the file is being tracked through its file ID. Typically - * its used after creating a new file during a safe-save. + * hfs_vnop_exchange: + * + * Inputs: + * 'from' vnode/cnode + * 'to' vnode/cnode + * options flag bits + * vfs_context + * + * Discussion: + * hfs_vnop_exchange is used to service the exchangedata(2) system call. + * Per the requirements of that system call, this function "swaps" some + * of the information that lives in one catalog record for some that + * lives in another. Note that not everything is swapped; in particular, + * the extent information stored in each cnode is kept local to that + * cnode. This allows existing file descriptor references to continue + * to operate on the same content, regardless of the location in the + * namespace that the file may have moved to. See inline comments + * in the function for more information. */ int hfs_vnop_exchange(ap) @@ -1627,12 +1642,12 @@ hfs_vnop_exchange(ap) cat_cookie_t cookie; time_t orig_from_ctime, orig_to_ctime; - /* The files must be on the same volume. */ - if (vnode_mount(from_vp) != vnode_mount(to_vp)) - return (EXDEV); - - if (from_vp == to_vp) - return (EINVAL); + /* + * VFS does the following checks: + * 1. Validate that both are files. + * 2. Validate that both are on the same mount. + * 3. Validate that they're not the same vnode. + */ orig_from_ctime = VTOC(from_vp)->c_ctime; orig_to_ctime = VTOC(to_vp)->c_ctime; @@ -1678,10 +1693,10 @@ hfs_vnop_exchange(ap) * Allow the rest of the codeflow to re-acquire the cnode locks in order. */ - hfs_lock_truncate (VTOC(from_vp), HFS_SHARED_LOCK); + hfs_lock_truncate (VTOC(from_vp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); - if ((error = hfs_lock(VTOC(from_vp), HFS_EXCLUSIVE_LOCK))) { - hfs_unlock_truncate (VTOC(from_vp), 0); + if ((error = hfs_lock(VTOC(from_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { + hfs_unlock_truncate (VTOC(from_vp), HFS_LOCK_DEFAULT); return error; } @@ -1689,7 +1704,7 @@ hfs_vnop_exchange(ap) if (vnode_isinuse(from_vp, 1)) { error = EBUSY; hfs_unlock(VTOC(from_vp)); - hfs_unlock_truncate (VTOC(from_vp), 0); + hfs_unlock_truncate (VTOC(from_vp), HFS_LOCK_DEFAULT); return error; } @@ -1698,7 +1713,7 @@ hfs_vnop_exchange(ap) error = hfs_filedone (from_vp, ap->a_context); VTOC(from_vp)->c_flag &= ~C_SWAPINPROGRESS; hfs_unlock(VTOC(from_vp)); - hfs_unlock_truncate(VTOC(from_vp), 0); + hfs_unlock_truncate(VTOC(from_vp), HFS_LOCK_DEFAULT); if (error) { return error; @@ -1713,7 +1728,7 @@ hfs_vnop_exchange(ap) hfsmp = VTOHFS(from_vp); /* Resource forks cannot be exchanged. */ - if (VNODE_IS_RSRC(from_vp) || VNODE_IS_RSRC(to_vp)) { + if ( VNODE_IS_RSRC(from_vp) || VNODE_IS_RSRC(to_vp)) { error = EINVAL; goto exit; } @@ -1776,7 +1791,23 @@ hfs_vnop_exchange(ap) to_parid = to_cp->c_parentcnid; } - /* Do the exchange */ + /* + * ExchangeFileIDs swaps the extent information attached to two + * different file IDs. It also swaps the extent information that + * may live in the extents-overflow B-Tree. + * + * We do this in a transaction as this may require a lot of B-Tree nodes + * to do completely, particularly if one of the files in question + * has a lot of extents. + * + * For example, assume "file1" has fileID 50, and "file2" has fileID 52. + * For the on-disk records, which are assumed to be synced, we will + * first swap the resident inline-8 extents as part of the catalog records. + * Then we will swap any extents overflow records for each file. + * + * When this function is done, "file1" will have fileID 52, and "file2" will + * have fileID 50. + */ error = ExchangeFileIDs(hfsmp, from_nameptr, to_nameptr, from_parid, to_parid, from_cp->c_hint, to_cp->c_hint); hfs_systemfile_unlock(hfsmp, lockflags); @@ -1797,7 +1828,14 @@ hfs_vnop_exchange(ap) if (to_vp) cache_purge(to_vp); - /* Save a copy of from attributes before swapping. */ + /* Bump both source and destination write counts before any swaps. */ + { + hfs_incr_gencount (from_cp); + hfs_incr_gencount (to_cp); + } + + + /* Save a copy of "from" attributes before swapping. */ bcopy(&from_cp->c_desc, &tempdesc, sizeof(struct cat_desc)); bcopy(&from_cp->c_attr, &tempattr, sizeof(struct cat_attr)); @@ -1810,22 +1848,46 @@ hfs_vnop_exchange(ap) to_cp->c_flag &= ~(C_HARDLINK | C_HASXATTRS); /* - * Swap the descriptors and all non-fork related attributes. - * (except the modify date) + * Complete the in-memory portion of the copy. + * + * ExchangeFileIDs swaps the on-disk records involved. We complete the + * operation by swapping the in-memory contents of the two files here. + * We swap the cnode descriptors, which contain name, BSD attributes, + * timestamps, etc, about the file. + * + * NOTE: We do *NOT* swap the fileforks of the two cnodes. We have + * already swapped the on-disk extent information. As long as we swap the + * IDs, the in-line resident 8 extents that live in the filefork data + * structure will point to the right data for the new file ID if we leave + * them alone. + * + * As a result, any file descriptor that points to a particular + * vnode (even though it should change names), will continue + * to point to the same content. */ + + /* Copy the "to" -> "from" cnode */ bcopy(&to_cp->c_desc, &from_cp->c_desc, sizeof(struct cat_desc)); from_cp->c_hint = 0; /* * If 'to' was a hardlink, then we copied over its link ID/CNID/(namespace ID) - * when we bcopy'd the descriptor above. However, we need to be careful - * when setting up the fileID below, because we cannot assume that the - * file ID is the same as the CNID if either one was a hardlink. - * The file ID is stored in the c_attr as the ca_fileid. So it needs - * to be pulled explicitly; we cannot just use the CNID. + * when we bcopy'd the descriptor above. However, the cnode attributes + * are not bcopied. As a result, make sure to swap the file IDs of each item. + * + * Further, other hardlink attributes must be moved along in this swap: + * the linkcount, the linkref, and the firstlink all need to move + * along with the file IDs. See note below regarding the flags and + * what moves vs. what does not. + * + * For Reference: + * linkcount == total # of hardlinks. + * linkref == the indirect inode pointer. + * firstlink == the first hardlink in the chain (written to the raw inode). + * These three are tied to the fileID and must move along with the rest of the data. */ from_cp->c_fileid = to_cp->c_attr.ca_fileid; - + from_cp->c_itime = to_cp->c_itime; from_cp->c_btime = to_cp->c_btime; from_cp->c_atime = to_cp->c_atime; @@ -1860,10 +1922,11 @@ hfs_vnop_exchange(ap) * 3) swap the special flag bits to their destination */ from_cp->c_flag |= to_flag_special; - from_cp->c_attr.ca_recflags = to_cp->c_attr.ca_recflags; bcopy(to_cp->c_finderinfo, from_cp->c_finderinfo, 32); + + /* Copy the "from" -> "to" cnode */ bcopy(&tempdesc, &to_cp->c_desc, sizeof(struct cat_desc)); to_cp->c_hint = 0; /* @@ -1892,6 +1955,7 @@ hfs_vnop_exchange(ap) to_cp->c_attr.ca_recflags = tempattr.ca_recflags; bcopy(tempattr.ca_finderinfo, to_cp->c_finderinfo, 32); + /* Rehash the cnodes using their new file IDs */ hfs_chash_rehash(hfsmp, from_cp, to_cp); @@ -1943,6 +2007,16 @@ hfs_vnop_mmap(struct vnop_mmap_args *ap) if (ap->a_fflags & PROT_WRITE) { check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, NULL); + + /* even though we're manipulating a cnode field here, we're only monotonically increasing + * the generation counter. The vnode can't be recycled (because we hold a FD in order to cause the + * map to happen). So it's safe to do this without holding the cnode lock. The caller's only + * requirement is that the number has been changed. + */ + struct cnode *cp = VTOC(vp); + if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { + hfs_incr_gencount(cp); + } } } @@ -2296,18 +2370,18 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) } } else if (UBCINFOEXISTS(vp)) { hfs_unlock(cp); - hfs_lock_truncate(cp, HFS_SHARED_LOCK); + hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); took_trunc_lock = 1; if (fp->ff_unallocblocks != 0) { - hfs_unlock_truncate(cp, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); - hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK); + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); } /* Don't hold cnode lock when calling into cluster layer. */ (void) cluster_push(vp, waitdata ? IO_SYNC : 0); - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); } /* * When MNT_WAIT is requested and the zero fill timeout @@ -2332,10 +2406,10 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) if (!took_trunc_lock || (cp->c_truncatelockowner == HFS_SHARED_OWNER)) { hfs_unlock(cp); if (took_trunc_lock) { - hfs_unlock_truncate(cp, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); } - hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK); - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); took_trunc_lock = 1; } while ((invalid_range = TAILQ_FIRST(&fp->ff_invalidranges))) { @@ -2353,19 +2427,19 @@ hfs_fsync(struct vnode *vp, int waitfor, int fullsync, struct proc *p) (void) cluster_write(vp, (struct uio *) 0, fp->ff_size, end + 1, start, (off_t)0, IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE); - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); cp->c_flag |= C_MODIFIED; } hfs_unlock(cp); (void) cluster_push(vp, waitdata ? IO_SYNC : 0); - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); } cp->c_flag &= ~C_ZFWANTSYNC; cp->c_zftimeout = 0; } datasync: if (took_trunc_lock) { - hfs_unlock_truncate(cp, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); took_trunc_lock = 0; } /* @@ -2842,15 +2916,16 @@ hfs_vnop_remove(ap) struct cnode *cp; struct vnode *rvp = NULL; int error=0, recycle_rsrc=0; - time_t orig_ctime; + int recycle_vnode = 0; uint32_t rsrc_vid = 0; + time_t orig_ctime; if (dvp == vp) { return (EINVAL); } orig_ctime = VTOC(vp)->c_ctime; - if ( (!vnode_isnamedstream(vp)) && ((ap->a_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) == 0)) { + if (!vnode_isnamedstream(vp) && ((ap->a_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) == 0)) { error = check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_DELETE_OP, NULL); if (error) { // XXXdbg - decide on a policy for handling namespace handler failures! @@ -2863,10 +2938,10 @@ hfs_vnop_remove(ap) relock: - hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK); + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); if ((error = hfs_lockpair(dcp, cp, HFS_EXCLUSIVE_LOCK))) { - hfs_unlock_truncate(cp, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); if (rvp) { vnode_put (rvp); } @@ -2888,17 +2963,15 @@ relock: * steps if 'vp' is a directory. */ - if ((vp->v_type == VLNK) || (vp->v_type == VREG)) { if ((cp->c_rsrc_vp) && (rvp == NULL)) { /* We need to acquire the rsrc vnode */ rvp = cp->c_rsrc_vp; rsrc_vid = vnode_vid (rvp); - + /* Unlock everything to acquire iocount on the rsrc vnode */ - hfs_unlock_truncate (cp, 0); + hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT); hfs_unlockpair (dcp, cp); - /* Use the vid to maintain identity on rvp */ if (vnode_getwithvid(rvp, rsrc_vid)) { /* @@ -2942,10 +3015,21 @@ relock: * If the cnode was instead marked C_NOEXISTS, then there wouldn't be any * more work. */ - if ((error == 0) && (rvp)) { - recycle_rsrc = 1; + if (error == 0) { + if (rvp) { + recycle_rsrc = 1; + } + /* + * If the target was actually removed from the catalog schedule it for + * full reclamation/inactivation. We hold an iocount on it so it should just + * get marked with MARKTERM + */ + if (cp->c_flag & C_NOEXISTS) { + recycle_vnode = 1; + } } + /* * Drop the truncate lock before unlocking the cnode * (which can potentially perform a vnode_put and @@ -2953,14 +3037,17 @@ relock: * truncate lock) */ rm_done: - hfs_unlock_truncate(cp, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); hfs_unlockpair(dcp, cp); if (recycle_rsrc) { /* inactive or reclaim on rvp will clean up the blocks from the rsrc fork */ vnode_recycle(rvp); } - + if (recycle_vnode) { + vnode_recycle (vp); + } + if (rvp) { /* drop iocount on rsrc fork, was obtained at beginning of fxn */ vnode_put(rvp); @@ -3053,7 +3140,7 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, return (EPERM); } - /* + /* * If removing a symlink, then we need to ensure that the * data blocks for the symlink are not still in-flight or pending. * If so, we will unlink the symlink here, making its blocks @@ -3378,7 +3465,7 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, /* Look up the resource fork first, if necessary */ if (temp_rsrc_fork) { - error = cat_lookup (hfsmp, &desc, 1, (struct cat_desc*) NULL, + error = cat_lookup (hfsmp, &desc, 1, 0, (struct cat_desc*) NULL, (struct cat_attr*) NULL, &temp_rsrc_fork->ff_data, NULL); if (error) { FREE_ZONE (temp_rsrc_fork, sizeof(struct filefork), M_HFSFORK); @@ -3400,8 +3487,8 @@ hfs_removefile(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, error = cat_delete(hfsmp, &desc, &cp->c_attr); if (error && error != ENXIO && error != ENOENT) { - printf("hfs_removefile: deleting file %s (%d), err: %d\n", - cp->c_desc.cd_nameptr, cp->c_attr.ca_fileid, error); + printf("hfs_removefile: deleting file %s (id=%d) vol=%s err=%d\n", + cp->c_desc.cd_nameptr, cp->c_attr.ca_fileid, hfsmp->vcbVN, error); } if (error == 0) { @@ -3606,7 +3693,7 @@ hfs_vnop_rename(ap) * resource fork vnode (and only if necessary). We don't care if the * source has a resource fork vnode or not. */ - struct vnode *tvp_rsrc = NULLVP; + struct vnode *tvp_rsrc = NULLVP; uint32_t tvp_rsrc_vid = 0; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; @@ -3629,6 +3716,7 @@ hfs_vnop_rename(ap) time_t orig_from_ctime, orig_to_ctime; int emit_rename = 1; int emit_delete = 1; + int is_tracked = 0; orig_from_ctime = VTOC(fvp)->c_ctime; if (tvp && VTOC(tvp)) { @@ -3649,7 +3737,7 @@ hfs_vnop_rename(ap) * We may not necessarily emit a RENAME event */ emit_delete = 0; - if ((error = hfs_lock(VTOC(fvp), HFS_SHARED_LOCK))) { + if ((error = hfs_lock(VTOC(fvp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT))) { return error; } /* Check to see if the item is a hardlink or not */ @@ -3679,6 +3767,13 @@ hfs_vnop_rename(ap) } } if (emit_rename) { + /* c_bsdflags should only be assessed while holding the cnode lock. + * This is not done consistently throughout the code and can result + * in race. This will be fixed via rdar://12181064 + */ + if (VTOC(fvp)->c_bsdflags & UF_TRACKED) { + is_tracked = 1; + } check_for_tracked_file(fvp, orig_from_ctime, NAMESPACE_HANDLER_RENAME_OP, NULL); } @@ -3687,11 +3782,11 @@ hfs_vnop_rename(ap) check_for_tracked_file(tvp, orig_to_ctime, NAMESPACE_HANDLER_DELETE_OP, NULL); } } - + retry: /* When tvp exists, take the truncate lock for hfs_removefile(). */ if (tvp && (vnode_isreg(tvp) || vnode_islnk(tvp))) { - hfs_lock_truncate(VTOC(tvp), HFS_EXCLUSIVE_LOCK); + hfs_lock_truncate(VTOC(tvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); took_trunc_lock = 1; } @@ -3699,7 +3794,7 @@ retry: HFS_EXCLUSIVE_LOCK, &error_cnode); if (error) { if (took_trunc_lock) { - hfs_unlock_truncate(VTOC(tvp), 0); + hfs_unlock_truncate(VTOC(tvp), HFS_LOCK_DEFAULT); took_trunc_lock = 0; } @@ -3728,6 +3823,10 @@ retry: goto retry; } + if (emit_rename && is_tracked) { + resolve_nspace_item(fvp, NAMESPACE_HANDLER_RENAME_FAILED_OP | NAMESPACE_HANDLER_TRACK_EVENT); + } + return (error); } @@ -3753,7 +3852,7 @@ retry: /* Unlock everything to acquire iocount on this rsrc vnode */ if (took_trunc_lock) { - hfs_unlock_truncate (VTOC(tvp), 0); + hfs_unlock_truncate (VTOC(tvp), HFS_LOCK_DEFAULT); took_trunc_lock = 0; } hfs_unlockfour(fdcp, fcp, tdcp, tcp); @@ -3767,6 +3866,8 @@ retry: } } + + /* Ensure we didn't race src or dst parent directories with rmdir. */ if (fdcp->c_flag & (C_NOEXISTS | C_DELETED)) { error = ENOENT; @@ -3795,7 +3896,7 @@ retry: // never existed in the first place. // if (took_trunc_lock) { - hfs_unlock_truncate(VTOC(tvp), 0); + hfs_unlock_truncate(VTOC(tvp), HFS_LOCK_DEFAULT); took_trunc_lock = 0; } error = 0; @@ -3984,7 +4085,7 @@ retry: lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - if (cat_lookup(hfsmp, &tmpdesc, 0, NULL, NULL, NULL, &real_cnid) != 0) { + if (cat_lookup(hfsmp, &tmpdesc, 0, 0, NULL, NULL, NULL, &real_cnid) != 0) { hfs_systemfile_unlock(hfsmp, lockflags); goto out; } @@ -4224,7 +4325,7 @@ skip_rm: /* Update the vnode's name now that the rename has completed. */ vnode_update_identity(fvp, tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_hash, (VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME)); - + /* * At this point, we may have a resource fork vnode attached to the * 'from' vnode. If it exists, we will want to update its name, because @@ -4251,7 +4352,7 @@ skip_rm: * 4) update the vnode's vid */ vnode_update_identity (fcp->c_rsrc_vp, fvp, rsrc_path, len, 0, (VNODE_UPDATE_NAME | VNODE_UPDATE_CACHE)); - + /* Free the memory associated with the resource fork's name */ FREE_ZONE (rsrc_path, MAXPATHLEN, M_NAMEI); } @@ -4271,21 +4372,29 @@ out: } if (took_trunc_lock) { - hfs_unlock_truncate(VTOC(tvp), 0); + hfs_unlock_truncate(VTOC(tvp), HFS_LOCK_DEFAULT); } hfs_unlockfour(fdcp, fcp, tdcp, tcp); - /* Now vnode_put the resource fork vnode if necessary */ + /* Now vnode_put the resource forks vnodes if necessary */ if (tvp_rsrc) { vnode_put(tvp_rsrc); - tvp_rsrc = NULL; + tvp_rsrc = NULL; } /* After tvp is removed the only acceptable error is EIO */ if (error && tvp_deleted) error = EIO; + if (emit_rename && is_tracked) { + if (error) { + resolve_nspace_item(fvp, NAMESPACE_HANDLER_RENAME_FAILED_OP | NAMESPACE_HANDLER_TRACK_EVENT); + } else { + resolve_nspace_item(fvp, NAMESPACE_HANDLER_RENAME_SUCCESS_OP | NAMESPACE_HANDLER_TRACK_EVENT); + } + } + return (error); } @@ -4342,7 +4451,7 @@ hfs_vnop_symlink(struct vnop_symlink_args *ap) goto out; } vp = *vpp; - if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) { + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { goto out; } cp = VTOC(vp); @@ -4381,17 +4490,17 @@ hfs_vnop_symlink(struct vnop_symlink_args *ap) /* hfs_removefile() requires holding the truncate lock */ hfs_unlock(cp); - hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK); - hfs_lock(cp, HFS_FORCE_LOCK); + hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); if (hfs_start_transaction(hfsmp) != 0) { started_tr = 0; - hfs_unlock_truncate(cp, TRUE); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); goto out; } (void) hfs_removefile(dvp, vp, ap->a_cnp, 0, 0, 0, NULL, 0); - hfs_unlock_truncate(cp, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); goto out; } @@ -4541,7 +4650,7 @@ hfs_vnop_readdir(ap) hfsmp = VTOHFS(vp); /* Note that the dirhint calls require an exclusive lock. */ - if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) return (error); /* Pick up cnid hint (if any). */ @@ -4775,7 +4884,7 @@ hfs_vnop_readlink(ap) if (!vnode_islnk(vp)) return (EINVAL); - if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) return (error); cp = VTOC(vp); fp = VTOF(vp); @@ -4852,18 +4961,28 @@ hfs_vnop_pathconf(ap) vfs_context_t a_context; } */ *ap; { + + int std_hfs = (VTOHFS(ap->a_vp)->hfs_flags & HFS_STANDARD); switch (ap->a_name) { case _PC_LINK_MAX: - if (VTOHFS(ap->a_vp)->hfs_flags & HFS_STANDARD) - *ap->a_retval = 1; - else + if (std_hfs == 0){ *ap->a_retval = HFS_LINK_MAX; + } +#if CONFIG_HFS_STD + else { + *ap->a_retval = 1; + } +#endif break; case _PC_NAME_MAX: - if (VTOHFS(ap->a_vp)->hfs_flags & HFS_STANDARD) - *ap->a_retval = kHFSMaxFileNameChars; /* 31 */ - else + if (std_hfs == 0) { *ap->a_retval = kHFSPlusMaxFileNameChars; /* 255 */ + } +#if CONFIG_HFS_STD + else { + *ap->a_retval = kHFSMaxFileNameChars; /* 31 */ + } +#endif break; case _PC_PATH_MAX: *ap->a_retval = PATH_MAX; /* 1024 */ @@ -4878,10 +4997,14 @@ hfs_vnop_pathconf(ap) *ap->a_retval = 200112; /* _POSIX_NO_TRUNC */ break; case _PC_NAME_CHARS_MAX: - if (VTOHFS(ap->a_vp)->hfs_flags & HFS_STANDARD) - *ap->a_retval = kHFSMaxFileNameChars; /* 31 */ - else + if (std_hfs == 0) { *ap->a_retval = kHFSPlusMaxFileNameChars; /* 255 */ + } +#if CONFIG_HFS_STD + else { + *ap->a_retval = kHFSMaxFileNameChars; /* 31 */ + } +#endif break; case _PC_CASE_SENSITIVE: if (VTOHFS(ap->a_vp)->hfs_flags & HFS_CASE_SENSITIVE) @@ -4893,10 +5016,15 @@ hfs_vnop_pathconf(ap) *ap->a_retval = 1; break; case _PC_FILESIZEBITS: - if (VTOHFS(ap->a_vp)->hfs_flags & HFS_STANDARD) + /* number of bits to store max file size */ + if (std_hfs == 0) { + *ap->a_retval = 64; + } +#if CONFIG_HFS_STD + else { *ap->a_retval = 32; - else - *ap->a_retval = 64; /* number of bits to store max file size */ + } +#endif break; case _PC_XATTR_SIZE_BITS: /* Number of bits to store maximum extended attribute size */ @@ -4930,6 +5058,7 @@ hfs_update(struct vnode *vp, __unused int waitfor) struct hfsmount *hfsmp; int lockflags; int error; + uint32_t tstate = 0; p = current_proc(); hfsmp = VTOHFS(vp); @@ -4945,7 +5074,21 @@ hfs_update(struct vnode *vp, __unused int waitfor) cp->c_touch_modtime = 0; return (0); } - + if (kdebug_enable) { + if (cp->c_touch_acctime) + tstate |= DBG_HFS_UPDATE_ACCTIME; + if (cp->c_touch_modtime) + tstate |= DBG_HFS_UPDATE_MODTIME; + if (cp->c_touch_chgtime) + tstate |= DBG_HFS_UPDATE_CHGTIME; + + if (cp->c_flag & C_MODIFIED) + tstate |= DBG_HFS_UPDATE_MODIFIED; + if (cp->c_flag & C_FORCEUPDATE) + tstate |= DBG_HFS_UPDATE_FORCE; + if (cp->c_flag & C_NEEDS_DATEADDED) + tstate |= DBG_HFS_UPDATE_DATEADDED; + } hfs_touchtimes(hfsmp, cp); /* Nothing to update. */ @@ -4978,7 +5121,11 @@ hfs_update(struct vnode *vp, __unused int waitfor) return (0); } + KERNEL_DEBUG_CONSTANT(0x3018000 | DBG_FUNC_START, vp, tstate, 0, 0, 0); + if ((error = hfs_start_transaction(hfsmp)) != 0) { + + KERNEL_DEBUG_CONSTANT(0x3018000 | DBG_FUNC_END, vp, tstate, error, -1, 0); return error; } @@ -5039,6 +5186,18 @@ hfs_update(struct vnode *vp, __unused int waitfor) rsrcfork.cf_size = rsrcfork.cf_blocks * HFSTOVCB(hfsmp)->blockSize; rsrcforkp = &rsrcfork; } + if (kdebug_enable) { + long dbg_parms[NUMPARMS]; + int dbg_namelen; + + dbg_namelen = NUMPARMS * sizeof(long); + vn_getpath(vp, (char *)dbg_parms, &dbg_namelen); + + if (dbg_namelen < (int)sizeof(dbg_parms)) + memset((char *)dbg_parms + dbg_namelen, 0, sizeof(dbg_parms) - dbg_namelen); + + kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE); + } /* * Lock the Catalog b-tree file. @@ -5055,6 +5214,8 @@ hfs_update(struct vnode *vp, __unused int waitfor) hfs_end_transaction(hfsmp); + KERNEL_DEBUG_CONSTANT(0x3018000 | DBG_FUNC_END, vp, tstate, error, 0, 0); + return (error); } @@ -5080,18 +5241,19 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int newvnode_flags = 0; u_int32_t gnv_flags = 0; int protectable_target = 0; + int nocache = 0; #if CONFIG_PROTECT struct cprotect *entry = NULL; - uint32_t cp_class = 0; + int32_t cp_class = -1; if (VATTR_IS_ACTIVE(vap, va_dataprotect_class)) { - cp_class = vap->va_dataprotect_class; + cp_class = (int32_t)vap->va_dataprotect_class; } int protected_mount = 0; #endif - if ((error = hfs_lock(VTOC(dvp), HFS_EXCLUSIVE_LOCK))) + if ((error = hfs_lock(VTOC(dvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) return (error); /* set the cnode pointer only after successfully acquiring lock */ @@ -5143,9 +5305,11 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, } else { attr.ca_itime = tv.tv_sec; } +#if CONFIG_HFS_STD if ((hfsmp->hfs_flags & HFS_STANDARD) && gTimeZone.tz_dsttime) { attr.ca_itime += 3600; /* Same as what hfs_update does */ } +#endif attr.ca_atime = attr.ca_ctime = attr.ca_mtime = attr.ca_itime; attr.ca_atimeondisk = attr.ca_atime; if (VATTR_IS_ACTIVE(vap, va_flags)) { @@ -5181,6 +5345,8 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, */ if ((protected_mount) && (protectable_target)) { attr.ca_recflags |= kHFSHasAttributesMask; + /* delay entering in the namecache */ + nocache = 1; } #endif @@ -5191,6 +5357,9 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, */ hfs_write_dateadded (&attr, attr.ca_atime); + /* Initialize the gen counter to 1 */ + hfs_write_gencount(&attr, (uint32_t)1); + attr.ca_uid = vap->va_uid; attr.ca_gid = vap->va_gid; VATTR_SET_SUPPORTED(vap, va_mode); @@ -5232,13 +5401,15 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, #if CONFIG_PROTECT /* * To preserve file creation atomicity with regards to the content protection EA, - * we must create the file in the catalog and then write out the EA in the same - * transaction. Pre-flight any operations that we can (such as allocating/preparing - * the buffer, wrapping the keys) before we start the txn and take the requisite - * b-tree locks. We pass '0' as the fileid because we do not know it yet. + * we must create the file in the catalog and then write out its EA in the same + * transaction. + * + * We only denote the target class in this EA; key generation is not completed + * until the file has been inserted into the catalog and will be done + * in a separate transaction. */ if ((protected_mount) && (protectable_target)) { - error = cp_entry_create_keys (&entry, dcp, hfsmp, cp_class, 0, attr.ca_mode); + error = cp_setup_newentry(hfsmp, dcp, cp_class, attr.ca_mode, &entry); if (error) { goto exit; } @@ -5254,13 +5425,20 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, // to check that any fileID it wants to use does not have orphaned // attributes in it. lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE, HFS_EXCLUSIVE_LOCK); + cnid_t new_id; /* Reserve some space in the Catalog file. */ if ((error = cat_preflight(hfsmp, CAT_CREATE, NULL, 0))) { hfs_systemfile_unlock(hfsmp, lockflags); goto exit; } - error = cat_create(hfsmp, &in_desc, &attr, &out_desc); + + if ((error = cat_acquire_cnid(hfsmp, &new_id))) { + hfs_systemfile_unlock (hfsmp, lockflags); + goto exit; + } + + error = cat_create(hfsmp, new_id, &in_desc, &attr, &out_desc); if (error == 0) { /* Update the parent directory */ dcp->c_childhint = out_desc.cd_hint; /* Cache directory's location */ @@ -5340,14 +5518,12 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, #if CONFIG_PROTECT /* * At this point, we must have encountered success with writing the EA. - * Update MKB with the data for the cached key, then destroy it. This may - * prevent information leakage by ensuring the cache key is only unwrapped - * to perform file I/O and it is allowed. + * Destroy our temporary cprotect (which had no keys). */ if ((attr.ca_fileid != 0) && (protected_mount) && (protectable_target)) { - cp_update_mkb (entry, attr.ca_fileid); - cp_entry_destroy (&entry); + cp_entry_destroy (entry); + entry = NULL; } #endif @@ -5357,6 +5533,9 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, } gnv_flags |= GNV_CREATE; + if (nocache) { + gnv_flags |= GNV_NOCACHE; + } /* * Create a vnode for the object just created. @@ -5382,6 +5561,72 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, cp = VTOC(tvp); *vpp = tvp; +#if CONFIG_PROTECT + /* + * Now that we have a vnode-in-hand, generate keys for this namespace item. + * If we fail to create the keys, then attempt to delete the item from the + * namespace. If we can't delete the item, that's not desirable but also not fatal.. + * All of the places which deal with restoring/unwrapping keys must also be + * prepared to encounter an entry that does not have keys. + */ + if ((protectable_target) && (protected_mount)) { + struct cprotect *keyed_entry = NULL; + + if (cp->c_cpentry == NULL) { + panic ("hfs_makenode: no cpentry for cnode (%p)", cp); + } + + error = cp_generate_keys (hfsmp, cp, cp->c_cpentry->cp_pclass, &keyed_entry); + if (error == 0) { + /* + * Upon success, the keys were generated and written out. + * Update the cp pointer in the cnode. + */ + cp_replace_entry (cp, keyed_entry); + if (nocache) { + cache_enter (dvp, tvp, cnp); + } + } + else { + /* If key creation OR the setxattr failed, emit EPERM to userland */ + error = EPERM; + + /* + * Beware! This slightly violates the lock ordering for the + * cnode/vnode 'tvp'. Ordinarily, you must acquire the truncate lock + * which guards file size changes before acquiring the normal cnode lock + * and calling hfs_removefile on an item. + * + * However, in this case, we are still holding the directory lock so + * 'tvp' is not lookup-able and it was a newly created vnode so it + * cannot have any content yet. The only reason we are initiating + * the removefile is because we could not generate content protection keys + * for this namespace item. Note also that we pass a '1' in the allow_dirs + * argument for hfs_removefile because we may be creating a directory here. + * + * All this to say that while it is technically a violation it is + * impossible to race with another thread for this cnode so it is safe. + */ + int err = hfs_removefile (dvp, tvp, cnp, 0, 0, 1, NULL, 0); + if (err) { + printf("hfs_makenode: removefile failed (%d) for CP entry %p\n", err, tvp); + } + + /* Release the cnode lock and mark the vnode for termination */ + hfs_unlock (cp); + err = vnode_recycle (tvp); + if (err) { + printf("hfs_makenode: vnode_recycle failed (%d) for CP entry %p\n", err, tvp); + } + + /* Drop the iocount on the new vnode to force reclamation/recycling */ + vnode_put (tvp); + cp = NULL; + *vpp = NULL; + } + } +#endif + #if QUOTA /* * Once we create this vnode, we need to initialize its quota data @@ -5390,7 +5635,10 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, * function) to see if creating this cnode/vnode would cause us to go over quota. */ if (hfsmp->hfs_flags & HFS_QUOTAS) { - (void) hfs_getinoquota(cp); + if (cp) { + /* cp could have been zeroed earlier */ + (void) hfs_getinoquota(cp); + } } #endif @@ -5405,7 +5653,8 @@ exit: * out the pointer if it was called already. */ if (entry) { - cp_entry_destroy (&entry); + cp_entry_destroy (entry); + entry = NULL; } #endif @@ -5499,7 +5748,7 @@ restart: error = vnode_getwithvid(rvp, vid); if (can_drop_lock) { - (void) hfs_lock(cp, HFS_FORCE_LOCK); + (void) hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); /* * When we relinquished our cnode lock, the cnode could have raced @@ -5538,7 +5787,7 @@ restart: if (name) printf("hfs_vgetrsrc: couldn't get resource" - " fork for %s, err %d\n", name, error); + " fork for %s, vol=%s, err=%d\n", name, hfsmp->vcbVN, error); return (error); } } else { @@ -5604,9 +5853,7 @@ restart: lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - /* - * Get resource fork data - * + /* * We call cat_idlookup (instead of cat_lookup) below because we can't * trust the descriptor in the provided cnode for lookups at this point. * Between the time of the original lookup of this vnode and now, the @@ -5630,8 +5877,8 @@ restart: * do a lookup on /tmp/b, you'd acquire an entirely different record's resource * fork. * - * As a result, we use the fileid, which should be invariant for the lifetime - * of the cnode (possibly barring calls to exchangedata). + * As a result, we use the fileid, which should be invariant for the lifetime + * of the cnode (possibly barring calls to exchangedata). * * Addendum: We can't do the above for HFS standard since we aren't guaranteed to * have thread records for files. They were only required for directories. So @@ -5639,26 +5886,31 @@ restart: * never allowed on HFS standard. */ - if (hfsmp->hfs_flags & HFS_STANDARD) { + /* Get resource fork data */ + if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) { + error = cat_idlookup (hfsmp, cp->c_fileid, 0, 1, NULL, NULL, &rsrcfork); + } +#if CONFIG_HFS_STD + else { /* * HFS standard only: * - * Get the resource fork for this item via catalog lookup - * since HFS standard was case-insensitive only. We don't want the - * descriptor; just the fork data here. + * Get the resource fork for this item with a cat_lookup call, but do not + * force a case lookup since HFS standard is case-insensitive only. We + * don't want the descriptor; just the fork data here. If we tried to + * do a ID lookup (via thread record -> catalog record), then we might fail + * prematurely since, as noted above, thread records were not strictly required + * on files in HFS. */ - error = cat_lookup (hfsmp, descptr, 1, (struct cat_desc*)NULL, + error = cat_lookup (hfsmp, descptr, 1, 0, (struct cat_desc*)NULL, (struct cat_attr*)NULL, &rsrcfork, NULL); } - else { - error = cat_idlookup (hfsmp, cp->c_fileid, 0, 1, NULL, NULL, &rsrcfork); - } +#endif hfs_systemfile_unlock(hfsmp, lockflags); if (error) { return (error); } - /* * Supply hfs_getnewvnode with a component name. */ @@ -5748,7 +6000,7 @@ hfsspec_close(ap) struct cnode *cp; if (vnode_isinuse(ap->a_vp, 0)) { - if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) == 0) { + if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) == 0) { cp = VTOC(vp); hfs_touchtimes(VTOHFS(vp), cp); hfs_unlock(cp); @@ -5814,7 +6066,7 @@ hfsfifo_close(ap) struct cnode *cp; if (vnode_isinuse(ap->a_vp, 1)) { - if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) == 0) { + if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) == 0) { cp = VTOC(vp); hfs_touchtimes(VTOHFS(vp), cp); hfs_unlock(cp); @@ -5859,7 +6111,7 @@ hfs_vnop_fsync(ap) * We need to allow ENOENT lock errors since unlink * systenm call can call VNOP_FSYNC during vclean. */ - error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK); + error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); if (error) return (0); @@ -5938,10 +6190,12 @@ exit: } int (**hfs_vnodeop_p)(void *); -int (**hfs_std_vnodeop_p) (void *); #define VOPFUNC int (*)(void *) + +#if CONFIG_HFS_STD +int (**hfs_std_vnodeop_p) (void *); static int hfs_readonly_op (__unused void* ap) { return (EROFS); } /* @@ -6008,7 +6262,7 @@ struct vnodeopv_entry_desc hfs_standard_vnodeop_entries[] = { struct vnodeopv_desc hfs_std_vnodeop_opv_desc = { &hfs_std_vnodeop_p, hfs_standard_vnodeop_entries }; - +#endif /* VNOP table for HFS+ */ struct vnodeopv_entry_desc hfs_vnodeop_entries[] = { @@ -6109,6 +6363,10 @@ struct vnodeopv_entry_desc hfs_specop_entries[] = { { &vnop_copyfile_desc, (VOPFUNC)err_copyfile }, /* copyfile */ { &vnop_blktooff_desc, (VOPFUNC)hfs_vnop_blktooff }, /* blktooff */ { &vnop_offtoblk_desc, (VOPFUNC)hfs_vnop_offtoblk }, /* offtoblk */ + { &vnop_getxattr_desc, (VOPFUNC)hfs_vnop_getxattr}, + { &vnop_setxattr_desc, (VOPFUNC)hfs_vnop_setxattr}, + { &vnop_removexattr_desc, (VOPFUNC)hfs_vnop_removexattr}, + { &vnop_listxattr_desc, (VOPFUNC)hfs_vnop_listxattr}, { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL } }; struct vnodeopv_desc hfs_specop_opv_desc = @@ -6153,6 +6411,10 @@ struct vnodeopv_entry_desc hfs_fifoop_entries[] = { { &vnop_blktooff_desc, (VOPFUNC)hfs_vnop_blktooff }, /* blktooff */ { &vnop_offtoblk_desc, (VOPFUNC)hfs_vnop_offtoblk }, /* offtoblk */ { &vnop_blockmap_desc, (VOPFUNC)hfs_vnop_blockmap }, /* blockmap */ + { &vnop_getxattr_desc, (VOPFUNC)hfs_vnop_getxattr}, + { &vnop_setxattr_desc, (VOPFUNC)hfs_vnop_setxattr}, + { &vnop_removexattr_desc, (VOPFUNC)hfs_vnop_removexattr}, + { &vnop_listxattr_desc, (VOPFUNC)hfs_vnop_listxattr}, { (struct vnodeop_desc*)NULL, (VOPFUNC)NULL } }; struct vnodeopv_desc hfs_fifoop_opv_desc = diff --git a/bsd/hfs/hfs_xattr.c b/bsd/hfs/hfs_xattr.c index e7a91addd..c29b4a0c8 100644 --- a/bsd/hfs/hfs_xattr.c +++ b/bsd/hfs/hfs_xattr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2009 Apple Inc. All rights reserved. + * Copyright (c) 2004-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -67,8 +67,6 @@ struct listattr_callback_state { #endif /* HFS_COMPRESSION */ }; -#define HFS_MAXATTRBLKS (32 * 1024) - /* HFS Internal Names */ #define XATTR_EXTENDEDSECURITY_NAME "system.extendedsecurity" @@ -80,7 +78,7 @@ struct listattr_callback_state { static u_int32_t emptyfinfo[8] = {0}; -static int hfs_zero_dateadded (struct cnode *cp, u_int8_t *finderinfo); +static int hfs_zero_hidden_fields (struct cnode *cp, u_int8_t *finderinfo); const char hfs_attrdatafilename[] = "Attribute Data"; @@ -132,7 +130,7 @@ hfs_vnop_getnamedstream(struct vnop_getnamedstream_args* ap) #if HFS_COMPRESSION int hide_rsrc = hfs_hides_rsrc(ap->a_context, VTOC(vp), 1); #endif /* HFS_COMPRESSION */ - if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) { + if ((error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { return (error); } if ((!RSRC_FORK_EXISTS(cp) @@ -183,7 +181,7 @@ hfs_vnop_makenamedstream(struct vnop_makenamedstream_args* ap) } } #endif /* HFS_COMPRESSION */ - if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { + if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { return (error); } error = hfs_vgetrsrc(VTOHFS(vp), vp, svpp, TRUE, FALSE); @@ -218,8 +216,8 @@ hfs_vnop_removenamedstream(struct vnop_removenamedstream_args* ap) scp = VTOC(svp); /* Take truncate lock before taking cnode lock. */ - hfs_lock_truncate(scp, HFS_EXCLUSIVE_LOCK); - if ((error = hfs_lock(scp, HFS_EXCLUSIVE_LOCK))) { + hfs_lock_truncate(scp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + if ((error = hfs_lock(scp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { goto out; } if (VTOF(svp)->ff_size != 0) { @@ -227,28 +225,28 @@ hfs_vnop_removenamedstream(struct vnop_removenamedstream_args* ap) } hfs_unlock(scp); out: - hfs_unlock_truncate(scp, 0); + hfs_unlock_truncate(scp, HFS_LOCK_DEFAULT); return (error); } #endif /* Zero out the date added field for the specified cnode */ -static int hfs_zero_dateadded (struct cnode *cp, u_int8_t *finderinfo) { +static int hfs_zero_hidden_fields (struct cnode *cp, u_int8_t *finderinfo) +{ u_int8_t *finfo = finderinfo; /* Advance finfo by 16 bytes to the 2nd half of the finderinfo */ finfo = finfo + 16; - if (S_ISREG(cp->c_attr.ca_mode)) { - struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo; - extinfo->date_added = 0; - } - else if (S_ISDIR(cp->c_attr.ca_mode)) { - struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo; - extinfo->date_added = 0; - } - else { + if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { + struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo; + extinfo->date_added = 0; + extinfo->write_gen_counter = 0; + } else if (S_ISDIR(cp->c_attr.ca_mode)) { + struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo; + extinfo->date_added = 0; + } else { /* Return an error */ return -1; } @@ -293,15 +291,15 @@ hfs_vnop_getxattr(struct vnop_getxattr_args *ap) u_int8_t finderinfo[32]; bufsize = 32; - if ((result = hfs_lock(cp, HFS_SHARED_LOCK))) { + if ((result = hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT))) { return (result); } /* Make a copy since we may not export all of it. */ bcopy(cp->c_finderinfo, finderinfo, sizeof(finderinfo)); hfs_unlock(cp); - - /* Zero out the date added field in the local copy */ - hfs_zero_dateadded (cp, finderinfo); + + /* Zero out the date added field in the local copy */ + hfs_zero_hidden_fields (cp, finderinfo); /* Don't expose a symlink's private type/creator. */ if (vnode_islnk(vp)) { @@ -335,7 +333,7 @@ hfs_vnop_getxattr(struct vnop_getxattr_args *ap) if ( !S_ISREG(cp->c_mode) ) { return (EPERM); } - if ((result = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { + if ((result = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { return (result); } namelen = cp->c_desc.cd_namelen; @@ -405,7 +403,7 @@ hfs_vnop_getxattr(struct vnop_getxattr_args *ap) return (EPERM); } - if ((result = hfs_lock(cp, HFS_SHARED_LOCK))) { + if ((result = hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT))) { return (result); } @@ -435,10 +433,9 @@ hfs_vnop_getxattr(struct vnop_getxattr_args *ap) * * NOTE: This function assumes the cnode lock for 'cp' is held exclusive or shared. */ - - int hfs_getxattr_internal (struct cnode *cp, struct vnop_getxattr_args *ap, - struct hfsmount *hfsmp, u_int32_t fileid) { + struct hfsmount *hfsmp, u_int32_t fileid) +{ struct filefork *btfile; struct BTreeIterator * iterator = NULL; @@ -453,8 +450,7 @@ int hfs_getxattr_internal (struct cnode *cp, struct vnop_getxattr_args *ap, if (cp) { target_id = cp->c_fileid; - } - else { + } else { target_id = fileid; } @@ -476,18 +472,17 @@ int hfs_getxattr_internal (struct cnode *cp, struct vnop_getxattr_args *ap, } bzero(iterator, sizeof(*iterator)); - bufsize = sizeof(HFSPlusAttrData) - 2; - if (uio) { - bufsize += uio_resid(uio); - } - bufsize = MAX(bufsize, sizeof(HFSPlusAttrRecord)); - MALLOC(recp, HFSPlusAttrRecord *, bufsize, M_TEMP, M_WAITOK); + /* Allocate memory for reading in the attribute record. This buffer is + * big enough to read in all types of attribute records. It is not big + * enough to read inline attribute data which is read in later. + */ + MALLOC(recp, HFSPlusAttrRecord *, sizeof(HFSPlusAttrRecord), M_TEMP, M_WAITOK); if (recp == NULL) { result = ENOMEM; goto exit; } btdata.bufferAddress = recp; - btdata.itemSize = bufsize; + btdata.itemSize = sizeof(HFSPlusAttrRecord); btdata.itemCount = 1; result = hfs_buildattrkey(target_id, ap->a_name, (HFSPlusAttrKey *)&iterator->key); @@ -512,8 +507,9 @@ int hfs_getxattr_internal (struct cnode *cp, struct vnop_getxattr_args *ap, * we have extent based EAs. */ switch (recp->recordType) { + /* Attribute fits in the Attribute B-Tree */ - case kHFSPlusAttrInlineData: + case kHFSPlusAttrInlineData: { /* * Sanity check record size. It's not required to have any * user data, so the minimum size is 2 bytes less that the @@ -521,26 +517,63 @@ int hfs_getxattr_internal (struct cnode *cp, struct vnop_getxattr_args *ap, * has 2 bytes set aside for attribute data). */ if (datasize < (sizeof(HFSPlusAttrData) - 2)) { - printf("hfs_getxattr: %d,%s invalid record size %d (expecting %lu)\n", - target_id, ap->a_name, datasize, sizeof(HFSPlusAttrData)); + printf("hfs_getxattr: vol=%s %d,%s invalid record size %d (expecting %lu)\n", + hfsmp->vcbVN, target_id, ap->a_name, datasize, sizeof(HFSPlusAttrData)); result = ENOATTR; break; } *ap->a_size = recp->attrData.attrSize; if (uio && recp->attrData.attrSize != 0) { if (*ap->a_size > (user_size_t)uio_resid(uio)) { + /* User provided buffer is not large enough for the xattr data */ result = ERANGE; - } - else { + } else { + /* Previous BTreeSearchRecord() read in only the attribute record, + * and not the attribute data. Now allocate enough memory for + * both attribute record and data, and read the attribute record again. + */ + bufsize = sizeof(HFSPlusAttrData) - 2 + recp->attrData.attrSize; + FREE(recp, M_TEMP); + MALLOC(recp, HFSPlusAttrRecord *, bufsize, M_TEMP, M_WAITOK); + if (recp == NULL) { + result = ENOMEM; + goto exit; + } + + btdata.bufferAddress = recp; + btdata.itemSize = bufsize; + btdata.itemCount = 1; + + bzero(iterator, sizeof(*iterator)); + result = hfs_buildattrkey(target_id, ap->a_name, (HFSPlusAttrKey *)&iterator->key); + if (result) { + goto exit; + } + + /* Lookup the attribute record and inline data */ + lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK); + result = BTSearchRecord(btfile, iterator, &btdata, &datasize, NULL); + hfs_systemfile_unlock(hfsmp, lockflags); + if (result) { + if (result == btNotFound) { + result = ENOATTR; + } + goto exit; + } + + /* Copy-out the attribute data to the user buffer */ + *ap->a_size = recp->attrData.attrSize; result = uiomove((caddr_t) &recp->attrData.attrData , recp->attrData.attrSize, uio); } } break; + } + /* Extent-Based EAs */ case kHFSPlusAttrForkData: { if (datasize < sizeof(HFSPlusAttrForkData)) { - printf("hfs_getxattr: %d,%s invalid record size %d (expecting %lu)\n", - target_id, ap->a_name, datasize, sizeof(HFSPlusAttrForkData)); + printf("hfs_getxattr: vol=%s %d,%s invalid record size %d (expecting %lu)\n", + hfsmp->vcbVN, target_id, ap->a_name, datasize, sizeof(HFSPlusAttrForkData)); result = ENOATTR; break; } @@ -563,7 +596,7 @@ int hfs_getxattr_internal (struct cnode *cp, struct vnop_getxattr_args *ap, totalblocks = recp->forkData.theFork.totalBlocks; /* Ignore bogus block counts. */ - if (totalblocks > HFS_MAXATTRBLKS) { + if (totalblocks > howmany(HFS_XATTR_MAXSIZE, hfsmp->blockSize)) { result = ERANGE; break; } @@ -609,14 +642,12 @@ int hfs_getxattr_internal (struct cnode *cp, struct vnop_getxattr_args *ap, if (blkcnt < totalblocks) { result = ENOATTR; - } - else { + } else { result = read_attr_data(hfsmp, uio, attrlen, extentbuf); } FREE(extentbuf, M_TEMP); - } - else /* No overflow extents. */ { + } else { /* No overflow extents. */ result = read_attr_data(hfsmp, uio, recp->forkData.theFork.logicalSize, recp->forkData.theFork.extents); } break; @@ -680,9 +711,9 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) if (result != 0) return result; } - - check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_METADATA_WRITE_OP, NULL); #endif /* HFS_COMPRESSION */ + + check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_METADATA_WRITE_OP, NSPACE_REARM_NO_ARG); /* Set the Finder Info. */ if (bcmp(ap->a_name, XATTR_FINDERINFO_NAME, sizeof(XATTR_FINDERINFO_NAME)) == 0) { @@ -692,6 +723,7 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) u_int8_t *finfo = NULL; u_int16_t fdFlags; u_int32_t dateadded = 0; + u_int32_t write_gen_counter = 0; attrsize = sizeof(VTOC(vp)->c_finderinfo); @@ -704,7 +736,7 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) } fip = (struct FndrFileInfo *)&finderinfo; - if ((result = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) { + if ((result = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { return (result); } cp = VTOC(vp); @@ -728,9 +760,12 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) /* Grab the current date added from the cnode */ dateadded = hfs_get_dateadded (cp); - + if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { + write_gen_counter = hfs_get_gencount(cp); + } + /* Zero out the date added field to ignore user's attempts to set it */ - hfs_zero_dateadded(cp, finderinfo); + hfs_zero_hidden_fields(cp, finderinfo); if (bcmp(finderinfo_start, emptyfinfo, attrsize)) { /* attr exists and "create" was specified. */ @@ -738,7 +773,7 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) hfs_unlock(cp); return (EEXIST); } - } else /* empty */ { + } else { /* empty */ /* attr doesn't exists and "replace" was specified. */ if (ap->a_options & XATTR_REPLACE) { hfs_unlock(cp); @@ -757,20 +792,21 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) */ finfo = &finderinfo[16]; - if (S_ISREG(cp->c_attr.ca_mode)) { + if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo; extinfo->date_added = OSSwapHostToBigInt32(dateadded); - } - else if (S_ISDIR(cp->c_attr.ca_mode)) { + extinfo->write_gen_counter = write_gen_counter; + } else if (S_ISDIR(cp->c_attr.ca_mode)) { struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo; extinfo->date_added = OSSwapHostToBigInt32(dateadded); } /* Set the cnode's Finder Info. */ - if (attrsize == sizeof(cp->c_finderinfo)) + if (attrsize == sizeof(cp->c_finderinfo)) { bcopy(&finderinfo[0], finderinfo_start, attrsize); - else + } else { bcopy(&finderinfo[8], finderinfo_start, attrsize); + } /* Updating finderInfo updates change time and modified time */ cp->c_touch_chgtime = TRUE; @@ -784,10 +820,11 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) * are both 16-bit fields. */ fdFlags = *((u_int16_t *) &cp->c_finderinfo[8]); - if (fdFlags & OSSwapHostToBigConstInt16(kFinderInvisibleMask)) + if (fdFlags & OSSwapHostToBigConstInt16(kFinderInvisibleMask)) { cp->c_bsdflags |= UF_HIDDEN; - else + } else { cp->c_bsdflags &= ~UF_HIDDEN; + } result = hfs_update(vp, FALSE); @@ -803,7 +840,7 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) if (!vnode_isreg(vp)) { return (EPERM); } - if ((result = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) { + if ((result = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { return (result); } cp = VTOC(vp); @@ -847,10 +884,9 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) vnode_rele(rvp); } vnode_recycle (rvp); - } - else { + } else { /* cnode is not open-unlinked, so re-lock cnode to sync */ - if ((result = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { + if ((result = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { vnode_recycle (rvp); vnode_put(rvp); return result; @@ -879,7 +915,9 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) } /* - * Attempt to copy the users attr data before taking any locks. + * Attempt to copy the users attr data before taking any locks, + * only if it will be an inline attribute. For larger attributes, + * the data will be directly read from the uio. */ if (attrsize > 0 && hfsmp->hfs_max_inline_attrsize != 0 && @@ -896,7 +934,7 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) } } - result = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK); + result = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); if (result) { goto exit; } @@ -938,7 +976,8 @@ hfs_vnop_setxattr(struct vnop_setxattr_args *ap) */ int hfs_setxattr_internal (struct cnode *cp, caddr_t data_ptr, size_t attrsize, struct vnop_setxattr_args *ap, struct hfsmount *hfsmp, - u_int32_t fileid) { + u_int32_t fileid) +{ uio_t uio = ap->a_uio; struct vnode *vp = ap->a_vp; int started_transaction = 0; @@ -957,8 +996,7 @@ int hfs_setxattr_internal (struct cnode *cp, caddr_t data_ptr, size_t attrsize, if (cp) { target_id = cp->c_fileid; - } - else { + } else { target_id = fileid; if (target_id != 1) { /* @@ -1074,8 +1112,8 @@ int hfs_setxattr_internal (struct cnode *cp, caddr_t data_ptr, size_t attrsize, if (result) { if (vp) { const char *name = vnode_getname(vp); - printf("hfs_setxattr: write_attr_data err (%d) %s:%s\n", - result, name ? name : "", ap->a_name); + printf("hfs_setxattr: write_attr_data vol=%s err (%d) %s:%s\n", + hfsmp->vcbVN, result, name ? name : "", ap->a_name); if (name) vnode_putname(name); } @@ -1088,8 +1126,8 @@ int hfs_setxattr_internal (struct cnode *cp, caddr_t data_ptr, size_t attrsize, if (result) { if (vp) { const char *name = vnode_getname(vp); - printf("hfs_setxattr: remove_attribute_records err (%d) %s:%s\n", - result, name ? name : "", ap->a_name); + printf("hfs_setxattr: remove_attribute_records vol=%s err (%d) %s:%s\n", + hfsmp->vcbVN, result, name ? name : "", ap->a_name); if (name) vnode_putname(name); } @@ -1117,8 +1155,8 @@ int hfs_setxattr_internal (struct cnode *cp, caddr_t data_ptr, size_t attrsize, result = BTInsertRecord(btfile, iterator, &btdata, btdata.itemSize); if (result) { - printf ("hfs_setxattr: BTInsertRecord() - %d,%s err=%d\n", - target_id, ap->a_name, result); + printf ("hfs_setxattr: BTInsertRecord(): vol=%s %d,%s err=%d\n", + hfsmp->vcbVN, target_id, ap->a_name, result); goto exit; } extentblks = count_extent_blocks(blkcnt, recp->forkData.theFork.extents); @@ -1140,15 +1178,14 @@ int hfs_setxattr_internal (struct cnode *cp, caddr_t data_ptr, size_t attrsize, result = BTInsertRecord(btfile, iterator, &btdata, btdata.itemSize); if (result) { - printf ("hfs_setxattr: BTInsertRecord() overflow - %d,%s err=%d\n", - target_id, ap->a_name, result); + printf ("hfs_setxattr: BTInsertRecord() overflow: vol=%s %d,%s err=%d\n", + hfsmp->vcbVN, target_id, ap->a_name, result); goto exit; } extentblks = count_extent_blocks(blkcnt, recp->overflowExtents.extents); blkcnt -= extentblks; } - } - else { /* Inline data */ + } else { /* Inline data */ if (exists) { result = remove_attribute_records(hfsmp, iterator); if (result) { @@ -1172,8 +1209,7 @@ int hfs_setxattr_internal (struct cnode *cp, caddr_t data_ptr, size_t attrsize, if (attrsize > 0) { if (data_ptr) { bcopy(data_ptr, &recp->attrData.attrData, attrsize); - } - else { + } else { /* * A null UIO meant it originated in-kernel. If they didn't supply data_ptr * then deny the copy operation. @@ -1276,9 +1312,9 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap) if (hfs_hides_xattr(ap->a_context, VTOC(vp), ap->a_name, 1) && !(ap->a_options & XATTR_SHOWCOMPRESSION)) { return ENOATTR; } - - check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_METADATA_DELETE_OP, NULL); #endif /* HFS_COMPRESSION */ + + check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_METADATA_DELETE_OP, NSPACE_REARM_NO_ARG); /* If Resource Fork is non-empty then truncate it. */ if (bcmp(ap->a_name, XATTR_RESOURCEFORK_NAME, sizeof(XATTR_RESOURCEFORK_NAME)) == 0) { @@ -1287,7 +1323,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap) if ( !vnode_isreg(vp) ) { return (EPERM); } - if ((result = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { + if ((result = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { return (result); } if ( !RSRC_FORK_EXISTS(cp)) { @@ -1300,9 +1336,9 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap) return (result); } - hfs_lock_truncate(VTOC(rvp), HFS_EXCLUSIVE_LOCK); - if ((result = hfs_lock(VTOC(rvp), HFS_EXCLUSIVE_LOCK))) { - hfs_unlock_truncate(cp, 0); + hfs_lock_truncate(VTOC(rvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + if ((result = hfs_lock(VTOC(rvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); vnode_put(rvp); return (result); } @@ -1311,7 +1347,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap) * hfs_truncate() and hfs_update() */ if ((result = hfs_start_transaction(hfsmp))) { - hfs_unlock_truncate(cp, 0); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); hfs_unlock(cp); vnode_put(rvp); return (result); @@ -1325,7 +1361,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap) } hfs_end_transaction(hfsmp); - hfs_unlock_truncate(VTOC(rvp), 0); + hfs_unlock_truncate(VTOC(rvp), HFS_LOCK_DEFAULT); hfs_unlock(VTOC(rvp)); vnode_put(rvp); @@ -1336,10 +1372,10 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap) void * finderinfo_start; int finderinfo_size; u_int8_t finderinfo[32]; - u_int32_t date_added; + u_int32_t date_added, write_gen_counter; u_int8_t *finfo = NULL; - if ((result = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { + if ((result = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { return (result); } @@ -1348,7 +1384,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap) /* Zero out the date added field in the local copy */ - hfs_zero_dateadded (cp, finderinfo); + hfs_zero_hidden_fields (cp, finderinfo); /* Don't expose a symlink's private type/creator. */ if (vnode_islnk(vp)) { @@ -1371,11 +1407,11 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap) */ finfo = cp->c_finderinfo; finfo = finfo + 16; - if (S_ISREG(cp->c_attr.ca_mode)) { + if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo; date_added = extinfo->date_added; - } - else if (S_ISDIR(cp->c_attr.ca_mode)) { + write_gen_counter = extinfo->write_gen_counter; + } else if (S_ISDIR(cp->c_attr.ca_mode)) { struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo; date_added = extinfo->date_added; } @@ -1384,8 +1420,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap) /* Ignore type/creator */ finderinfo_start = &cp->c_finderinfo[8]; finderinfo_size = sizeof(cp->c_finderinfo) - 8; - } - else { + } else { finderinfo_start = &cp->c_finderinfo[0]; finderinfo_size = sizeof(cp->c_finderinfo); } @@ -1393,11 +1428,11 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap) /* Now restore the date added */ - if (S_ISREG(cp->c_attr.ca_mode)) { + if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)finfo; extinfo->date_added = date_added; - } - else if (S_ISDIR(cp->c_attr.ca_mode)) { + extinfo->write_gen_counter = write_gen_counter; + } else if (S_ISDIR(cp->c_attr.ca_mode)) { struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)finfo; extinfo->date_added = date_added; } @@ -1427,7 +1462,7 @@ hfs_vnop_removexattr(struct vnop_removexattr_args *ap) } bzero(iterator, sizeof(*iterator)); - if ((result = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) { + if ((result = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) { goto exit_nolock; } @@ -1601,8 +1636,8 @@ remove_attribute_records(struct hfsmount *hfsmp, BTreeIterator * iterator) if (result || (attrdata.recordType != kHFSPlusAttrExtents) || (datasize < sizeof(HFSPlusAttrExtents))) { - printf("hfs: remove_attribute_records: BTSearchRecord %d (%d), totalblks %d\n", - MacToVFSError(result), attrdata.recordType != kHFSPlusAttrExtents, totalblks); + printf("hfs: remove_attribute_records: BTSearchRecord: vol=%s, err=%d (%d), totalblks %d\n", + hfsmp->vcbVN, MacToVFSError(result), attrdata.recordType != kHFSPlusAttrExtents, totalblks); result = ENOATTR; break; /* break from while */ } @@ -1663,8 +1698,17 @@ hfs_vnop_listxattr(struct vnop_listxattr_args *ap) hfsmp = VTOHFS(vp); *ap->a_size = 0; + + /* + * Take the truncate lock; this serializes us against the ioctl + * to truncate data & reset the decmpfs state + * in the compressed file handler. + */ + hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); - if ((result = hfs_lock(cp, HFS_SHARED_LOCK))) { + /* Now the regular cnode lock (shared) */ + if ((result = hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT))) { + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); return (result); } @@ -1674,7 +1718,7 @@ hfs_vnop_listxattr(struct vnop_listxattr_args *ap) * for symlinks. */ bcopy(cp->c_finderinfo, finderinfo, sizeof(finderinfo)); - hfs_zero_dateadded (cp, finderinfo); + hfs_zero_hidden_fields (cp, finderinfo); /* Don't expose a symlink's private type/creator. */ if (vnode_islnk(vp)) { @@ -1686,7 +1730,7 @@ hfs_vnop_listxattr(struct vnop_listxattr_args *ap) } - /* If Finder Info is non-empty then export it's name. */ + /* If Finder Info is non-empty then export it's name. */ if (bcmp(finderinfo, emptyfinfo, sizeof(emptyfinfo)) != 0) { if (uio == NULL) { *ap->a_size += sizeof(XATTR_FINDERINFO_NAME); @@ -1800,6 +1844,7 @@ exit: FREE(iterator, M_TEMP); } hfs_unlock(cp); + hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); return MacToVFSError(result); } @@ -2030,13 +2075,13 @@ hfs_set_volxattr(struct hfsmount *hfsmp, unsigned int xattrtype, int state) hfs_end_transaction(hfsmp); /* Update the state in the mount point */ - HFS_MOUNT_LOCK(hfsmp, TRUE); + hfs_lock_mount (hfsmp); if (state == 0) { hfsmp->hfs_flags &= ~HFS_XATTR_EXTENTS; } else { hfsmp->hfs_flags |= HFS_XATTR_EXTENTS; } - HFS_MOUNT_UNLOCK(hfsmp, TRUE); + hfs_unlock_mount (hfsmp); exit: if (iterator) { @@ -2103,11 +2148,12 @@ hfs_attrkeycompare(HFSPlusAttrKey *searchKey, HFSPlusAttrKey *trialKey) /* * Names are equal; compare startBlock */ - if (searchKey->startBlock == trialKey->startBlock) + if (searchKey->startBlock == trialKey->startBlock) { return (0); - else + } else { return (searchKey->startBlock < trialKey->startBlock ? -1 : 1); } + } return result; } @@ -2187,7 +2233,7 @@ getmaxinlineattrsize(struct vnode * attrvp) size_t maxsize; if (attrvp != NULL) { - (void) hfs_lock(VTOC(attrvp), HFS_SHARED_LOCK); + (void) hfs_lock(VTOC(attrvp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); if (BTGetInformation(VTOF(attrvp), 0, &btinfo) == 0) nodesize = btinfo.nodeSize; hfs_unlock(VTOC(attrvp)); @@ -2279,13 +2325,13 @@ read_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtent { vnode_t evp = hfsmp->hfs_attrdata_vp; int bufsize; - int iosize; + int64_t iosize; int attrsize; int blksize; int i; int result = 0; - hfs_lock_truncate(VTOC(evp), HFS_SHARED_LOCK); + hfs_lock_truncate(VTOC(evp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); bufsize = (int)uio_resid(uio); attrsize = (int)datasize; @@ -2296,7 +2342,7 @@ read_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtent * For the typical case there is only one extent. */ for (i = 0; (attrsize > 0) && (bufsize > 0) && (extents[i].startBlock != 0); ++i) { - iosize = (int)extents[i].blockCount * blksize; + iosize = extents[i].blockCount * blksize; iosize = MIN(iosize, attrsize); iosize = MIN(iosize, bufsize); uio_setresid(uio, iosize); @@ -2305,7 +2351,7 @@ read_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtent result = cluster_read(evp, uio, VTOF(evp)->ff_size, IO_SYNC | IO_UNIT); #if HFS_XATTR_VERBOSE - printf("hfs: read_attr_data: cr iosize %d [%d, %d] (%d)\n", + printf("hfs: read_attr_data: cr iosize %lld [%d, %d] (%d)\n", iosize, extents[i].startBlock, extents[i].blockCount, result); #endif if (result) @@ -2316,7 +2362,7 @@ read_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExtent uio_setresid(uio, bufsize); uio_setoffset(uio, datasize); - hfs_unlock_truncate(VTOC(evp), 0); + hfs_unlock_truncate(VTOC(evp), HFS_LOCK_DEFAULT); return (result); } @@ -2330,12 +2376,12 @@ write_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExten off_t filesize; int bufsize; int attrsize; - int iosize; + int64_t iosize; int blksize; int i; int result = 0; - hfs_lock_truncate(VTOC(evp), HFS_SHARED_LOCK); + hfs_lock_truncate(VTOC(evp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT); bufsize = uio_resid(uio); attrsize = (int) datasize; @@ -2346,7 +2392,7 @@ write_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExten * Write the attribute data one extent at a time. */ for (i = 0; (attrsize > 0) && (bufsize > 0) && (extents[i].startBlock != 0); ++i) { - iosize = (int)extents[i].blockCount * blksize; + iosize = extents[i].blockCount * blksize; iosize = MIN(iosize, attrsize); iosize = MIN(iosize, bufsize); uio_setresid(uio, iosize); @@ -2355,7 +2401,7 @@ write_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExten result = cluster_write(evp, uio, filesize, filesize, filesize, (off_t) 0, IO_SYNC | IO_UNIT); #if HFS_XATTR_VERBOSE - printf("hfs: write_attr_data: cw iosize %d [%d, %d] (%d)\n", + printf("hfs: write_attr_data: cw iosize %lld [%d, %d] (%d)\n", iosize, extents[i].startBlock, extents[i].blockCount, result); #endif if (result) @@ -2366,7 +2412,7 @@ write_attr_data(struct hfsmount *hfsmp, uio_t uio, size_t datasize, HFSPlusExten uio_setresid(uio, bufsize); uio_setoffset(uio, datasize); - hfs_unlock_truncate(VTOC(evp), 0); + hfs_unlock_truncate(VTOC(evp), HFS_LOCK_DEFAULT); return (result); } @@ -2394,8 +2440,19 @@ alloc_attr_blks(struct hfsmount *hfsmp, size_t attrsize, size_t extentbufsize, H lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); for (i = 0; (blkcnt > 0) && (i < maxextents); i++) { + /* Try allocating and see if we find something decent */ result = BlockAllocate(hfsmp, startblk, blkcnt, blkcnt, 0, &extents[i].startBlock, &extents[i].blockCount); + /* + * If we couldn't find anything, then re-try the allocation but allow + * journal flushes. + */ + if (result == dskFulErr) { + result = BlockAllocate(hfsmp, startblk, blkcnt, blkcnt, HFS_ALLOC_FLUSHTXN, + &extents[i].startBlock, &extents[i].blockCount); + } + + #if HFS_XATTR_VERBOSE printf("hfs: alloc_attr_blks: BA blkcnt %d [%d, %d] (%d)\n", blkcnt, extents[i].startBlock, extents[i].blockCount, result); diff --git a/bsd/hfs/hfscommon/BTree/BTreeNodeReserve.c b/bsd/hfs/hfscommon/BTree/BTreeNodeReserve.c index e58dde4e6..94577758a 100644 --- a/bsd/hfs/hfscommon/BTree/BTreeNodeReserve.c +++ b/bsd/hfs/hfscommon/BTree/BTreeNodeReserve.c @@ -153,12 +153,28 @@ BTReserveSpace(FCB *file, int operations, void* data) if (rsrvNodes > availNodes) { u_int32_t reqblks, freeblks, rsrvblks; + uint32_t bt_rsrv; struct hfsmount *hfsmp; - /* Try and reserve the last 5% of the disk space for file blocks. */ + /* + * For UNIX conformance, we try and reserve the MIN of either 5% of + * total file blocks or 10MB worth of blocks, for growing existing + * files. On non-HFS filesystems, creating a new directory entry may + * not cause additional disk space to be allocated, but on HFS, creating + * a new entry could cause the b-tree to grow. As a result, we take + * some precautions here to prevent that on configurations that try to + * satisfy conformance. + */ hfsmp = VTOVCB(btree->fileRefNum); rsrvblks = ((u_int64_t)hfsmp->allocLimit * 5) / 100; - rsrvblks = MIN(rsrvblks, HFS_MAXRESERVE / hfsmp->blockSize); + if (hfsmp->blockSize > HFS_BT_MAXRESERVE) { + bt_rsrv = 1; + } + else { + bt_rsrv = (HFS_BT_MAXRESERVE / hfsmp->blockSize); + } + rsrvblks = MIN(rsrvblks, bt_rsrv); + freeblks = hfs_freeblks(hfsmp, 0); if (freeblks <= rsrvblks) { /* When running low, disallow adding new items. */ diff --git a/bsd/hfs/hfscommon/BTree/BTreeTreeOps.c b/bsd/hfs/hfscommon/BTree/BTreeTreeOps.c index 71d9e06c9..45456569f 100644 --- a/bsd/hfs/hfscommon/BTree/BTreeTreeOps.c +++ b/bsd/hfs/hfscommon/BTree/BTreeTreeOps.c @@ -569,7 +569,7 @@ static OSErr InsertNode (BTreeControlBlockPtr btreePtr, Boolean *insertParent, Boolean *rootSplit ) { - BlockDescriptor *targetNode; + BlockDescriptor *targetNode = NULL; u_int32_t leftNodeNum; u_int16_t recsRotated; OSErr err; diff --git a/bsd/hfs/hfscommon/Catalog/CatalogUtilities.c b/bsd/hfs/hfscommon/Catalog/CatalogUtilities.c index 114104c6f..e4b86f2ec 100644 --- a/bsd/hfs/hfscommon/Catalog/CatalogUtilities.c +++ b/bsd/hfs/hfscommon/Catalog/CatalogUtilities.c @@ -96,11 +96,14 @@ LocateCatalogNodeByKey(const ExtendedVCB *volume, u_int32_t hint, CatalogKey *ke // if we got a thread record, then go look up real record switch ( dataPtr->recordType ) { + +#if CONFIG_HFS_STD case kHFSFileThreadRecord: case kHFSFolderThreadRecord: threadParentID = dataPtr->hfsThread.parentID; nodeName = (CatalogName *) &dataPtr->hfsThread.nodeName; break; +#endif case kHFSPlusFileThreadRecord: case kHFSPlusFolderThreadRecord: @@ -192,6 +195,7 @@ BuildCatalogKey(HFSCatalogNodeID parentID, const CatalogName *cName, Boolean isH key->hfsPlus.keyLength += sizeof(UniChar) * cName->ustr.length; // add CName size to key length } } +#if CONFIG_HFS_STD else { key->hfs.keyLength = kHFSCatalogKeyMinimumLength; // initial key length (1 + 4 + 1) @@ -204,6 +208,8 @@ BuildCatalogKey(HFSCatalogNodeID parentID, const CatalogName *cName, Boolean isH key->hfs.keyLength += key->hfs.nodeName[0]; // add CName size to key length } } +#endif + } OSErr @@ -234,6 +240,7 @@ BuildCatalogKeyUTF8(ExtendedVCB *volume, HFSCatalogNodeID parentID, const unsign *textEncoding = hfs_pickencoding(key->hfsPlus.nodeName.unicode, key->hfsPlus.nodeName.length); } +#if CONFIG_HFS_STD else { key->hfs.keyLength = kHFSCatalogKeyMinimumLength; // initial key length (1 + 4 + 1) key->hfs.reserved = 0; // clear unused byte @@ -253,6 +260,7 @@ BuildCatalogKeyUTF8(ExtendedVCB *volume, HFSCatalogNodeID parentID, const unsign if (textEncoding) *textEncoding = 0; } +#endif if (err) { if (err == ENAMETOOLONG) @@ -277,6 +285,7 @@ FlushCatalog(ExtendedVCB *volume) { FCB * fcb; OSErr result; + struct hfsmount *hfsmp = VCBTOHFS (volume); fcb = GetFileControlBlock(volume->catalogRefNum); result = BTFlushPath(fcb); @@ -287,10 +296,10 @@ FlushCatalog(ExtendedVCB *volume) if ( 0 /*fcb->fcbFlags & fcbModifiedMask*/ ) { - HFS_MOUNT_LOCK(volume, TRUE); + hfs_lock_mount (hfsmp); MarkVCBDirty(volume); // Mark the VCB dirty volume->vcbLsMod = GetTimeUTC(); // update last modified date - HFS_MOUNT_UNLOCK(volume, TRUE); + hfs_unlock_mount (hfsmp); // result = FlushVolumeControlBlock(volume); } @@ -323,9 +332,9 @@ UpdateCatalogName(ConstStr31Param srcName, Str31 destName) //_______________________________________________________________________ void -CopyCatalogName(const CatalogName *srcName, CatalogName *dstName, Boolean isHFSPLus) +CopyCatalogName(const CatalogName *srcName, CatalogName *dstName, Boolean isHFSPlus) { - u_int32_t length; + u_int32_t length = 0; if ( srcName == NULL ) { @@ -334,10 +343,14 @@ CopyCatalogName(const CatalogName *srcName, CatalogName *dstName, Boolean isHFSP return; } - if (isHFSPLus) + if (isHFSPlus) { length = sizeof(UniChar) * (srcName->ustr.length + 1); - else + } +#if CONFIG_HFS_STD + else { length = sizeof(u_int8_t) + srcName->pstr[0]; + } +#endif if ( length > 1 ) BlockMoveData(srcName, dstName, length); diff --git a/bsd/hfs/hfscommon/Catalog/FileIDsServices.c b/bsd/hfs/hfscommon/Catalog/FileIDsServices.c index 29242d367..d53fd5fd5 100644 --- a/bsd/hfs/hfscommon/Catalog/FileIDsServices.c +++ b/bsd/hfs/hfscommon/Catalog/FileIDsServices.c @@ -47,7 +47,11 @@ typedef struct ExtentsRecBuffer ExtentsRecBuffer; static u_int32_t CheckExtents( void *extents, u_int32_t blocks, Boolean isHFSPlus ); static OSErr DeleteExtents( ExtendedVCB *vcb, u_int32_t fileNumber, int quitEarly, u_int8_t forkType, Boolean isHFSPlus ); static OSErr MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t destFileID, int quitEarly, u_int8_t forkType, Boolean isHFSPlus ); + +#if CONFIG_HFS_STD static void CopyCatalogNodeInfo( CatalogRecord *src, CatalogRecord *dest ); +#endif + static void CopyBigCatalogNodeInfo( CatalogRecord *src, CatalogRecord *dest ); static void CopyExtentInfo( ExtentKey *key, ExtentRecord *data, ExtentsRecBuffer *buffer, u_int16_t bufferCount ); @@ -162,10 +166,17 @@ OSErr ExchangeFileIDs( ExtendedVCB *vcb, ConstUTF8Param srcName, ConstUTF8Param err = MoveExtents( vcb, srcData.hfsPlusFile.fileID, kHFSBogusExtentFileID, 0,0, isHFSPlus ); if ( err != noErr ) { - if ( err != dskFulErr ) + if ( err != dskFulErr ) { return( err ); - else - goto ExUndo1a; + } + else { + err = DeleteExtents( vcb, kHFSBogusExtentFileID, 0, 0, isHFSPlus ); + ReturnIfError( err ); // we are doomed. Just QUIT! + + err = FlushCatalog( vcb ); // flush the catalog + err = FlushExtentFile( vcb ); // flush the extent file (unneeded for common case, but it's cheap) + return( dskFulErr ); + } } //-- Change the destination extents file id's to the source id's @@ -181,7 +192,13 @@ ExUndo2aPlus: err = DeleteExtents( vcb, srcData.hfsPlusFile.fileID, 0, 0, isHFSP err = MoveExtents( vcb, kHFSBogusExtentFileID, srcData.hfsPlusFile.fileID, 0, 0, isHFSPlus ); // Move the extents back ReturnIfError( err ); // we are doomed. Just QUIT! - goto ExUndo1a; + err = DeleteExtents( vcb, kHFSBogusExtentFileID, 0, 0, isHFSPlus ); + ReturnIfError( err ); // we are doomed. Just QUIT! + + err = FlushCatalog( vcb ); // flush the catalog + err = FlushExtentFile( vcb ); // flush the extent file (unneeded for common case, but it's cheap) + return( dskFulErr ); + } //-- Change the bogus extents file id's to the dest id's @@ -252,6 +269,7 @@ ExUndo2aPlus: err = DeleteExtents( vcb, srcData.hfsPlusFile.fileID, 0, 0, isHFSP err = ReplaceBTreeRecord( vcb->catalogRefNum, &destKey, destHint, &destData, sizeof(HFSPlusCatalogFile), &destHint ); ReturnIfError( err ); } +#if CONFIG_HFS_STD else // HFS // { //-- Step 1: Check the catalog nodes for extents @@ -395,7 +413,8 @@ ExUndo2a: err = DeleteExtents( vcb, srcData.hfsFile.fileID, 0, 0, isHFSPlus ); err = ReplaceBTreeRecord( vcb->catalogRefNum, &destKey, destHint, &destData, sizeof(HFSCatalogFile), &destHint ); ReturnIfError( err ); } - +#endif + err = noErr; //-- Step 4: Error Handling section @@ -408,6 +427,7 @@ FlushAndReturn: } +#if CONFIG_HFS_STD static void CopyCatalogNodeInfo( CatalogRecord *src, CatalogRecord *dest ) { dest->hfsFile.dataLogicalSize = src->hfsFile.dataLogicalSize; @@ -418,6 +438,7 @@ static void CopyCatalogNodeInfo( CatalogRecord *src, CatalogRecord *dest ) BlockMoveData( src->hfsFile.dataExtents, dest->hfsFile.dataExtents, sizeof(HFSExtentRecord) ); BlockMoveData( src->hfsFile.rsrcExtents, dest->hfsFile.rsrcExtents, sizeof(HFSExtentRecord) ); } +#endif static void CopyBigCatalogNodeInfo( CatalogRecord *src, CatalogRecord *dest ) { @@ -485,6 +506,7 @@ static OSErr MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t dest extentKeyPtr->hfsPlus.fileID = srcFileID; extentKeyPtr->hfsPlus.startBlock = 0; } +#if CONFIG_HFS_STD else { btRecord.itemSize = sizeof(HFSExtentRecord); btKeySize = sizeof(HFSExtentKey); @@ -494,6 +516,11 @@ static OSErr MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t dest extentKeyPtr->hfs.fileID = srcFileID; extentKeyPtr->hfs.startBlock = 0; } +#else + else { + return cmBadNews; + } +#endif // // We do an initial BTSearchRecord to position the BTree's iterator just before any extent @@ -534,7 +561,7 @@ static OSErr MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t dest for ( i=0 ; ihfsPlus.fileID : extentKeyPtr->hfs.fileID; + if (isHFSPlus) { + foundFileID = extentKeyPtr->hfsPlus.fileID; + } +#if CONFIG_HFS_STD + else { + foundFileID = extentKeyPtr->hfs.fileID; + } +#endif if ( foundFileID == srcFileID ) { /* Check if we need to quit early. */ if (quitEarly && isHFSPlus) { @@ -565,17 +599,21 @@ static OSErr MoveExtents( ExtendedVCB *vcb, u_int32_t srcFileID, u_int32_t dest //-- edit each extent key, and reinsert each extent record in the extent file if (isHFSPlus) btRecordSize = sizeof(HFSPlusExtentRecord); +#if CONFIG_HFS_STD else btRecordSize = sizeof(HFSExtentRecord); - +#endif + for ( j=0 ; jkey, btKeySize); @@ -637,6 +675,7 @@ static OSErr DeleteExtents( ExtendedVCB *vcb, u_int32_t fileID, int quitEarly, u_int16_t btRecordSize; OSErr err; + MALLOC (btIterator, struct BTreeIterator*, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK); if (btIterator == NULL) { @@ -672,6 +711,7 @@ static OSErr DeleteExtents( ExtendedVCB *vcb, u_int32_t fileID, int quitEarly, extentKeyPtr->hfsPlus.fileID = fileID; extentKeyPtr->hfsPlus.startBlock = 0; } +#if CONFIG_HFS_STD else { btRecord.itemSize = sizeof(HFSExtentRecord); @@ -680,6 +720,9 @@ static OSErr DeleteExtents( ExtendedVCB *vcb, u_int32_t fileID, int quitEarly, extentKeyPtr->hfs.fileID = fileID; extentKeyPtr->hfs.startBlock = 0; } +#else + else return cmBadNews; +#endif err = BTSearchRecord(fcb, btIterator, &btRecord, &btRecordSize, btIterator); if ( err != btNotFound ) @@ -693,7 +736,7 @@ static OSErr DeleteExtents( ExtendedVCB *vcb, u_int32_t fileID, int quitEarly, do { - HFSCatalogNodeID foundFileID; + HFSCatalogNodeID foundFileID = 0; err = BTIterateRecord(fcb, kBTreeNextRecord, btIterator, &btRecord, &btRecordSize); if ( err != noErr ) @@ -703,8 +746,15 @@ static OSErr DeleteExtents( ExtendedVCB *vcb, u_int32_t fileID, int quitEarly, break; // We're done now. } - - foundFileID = isHFSPlus ? extentKeyPtr->hfsPlus.fileID : extentKeyPtr->hfs.fileID; + if (isHFSPlus) { + foundFileID = extentKeyPtr->hfsPlus.fileID; + } +#if CONFIG_HFS_STD + else { + foundFileID = extentKeyPtr->hfs.fileID; + } +#endif + if ( foundFileID != fileID ) { break; // numbers don't match, we must be done } @@ -749,6 +799,7 @@ static u_int32_t CheckExtents( void *extents, u_int32_t totalBlocks, Boolean is return( 0 ); } } +#if CONFIG_HFS_STD else { for ( i = 0 ; i < kHFSExtentDensity ; i++ ) @@ -758,6 +809,7 @@ static u_int32_t CheckExtents( void *extents, u_int32_t totalBlocks, Boolean is return( 0 ); } } +#endif return( extentAllocationBlocks ); } diff --git a/bsd/hfs/hfscommon/Misc/BTreeWrapper.c b/bsd/hfs/hfscommon/Misc/BTreeWrapper.c index 590fd9397..92b49c840 100644 --- a/bsd/hfs/hfscommon/Misc/BTreeWrapper.c +++ b/bsd/hfs/hfscommon/Misc/BTreeWrapper.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2002, 2005-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000, 2002, 2005-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -106,20 +106,29 @@ static Boolean ValidHFSRecord(const void *record, const BTreeControlBlock *btcb, { u_int32_t cNodeID; - if ( btcb->maxKeyLength == kHFSExtentKeyMaximumLength ) + if (btcb->maxKeyLength == kHFSPlusExtentKeyMaximumLength ) { - return ( recordSize == sizeof(HFSExtentRecord) ); + return ( recordSize == sizeof(HFSPlusExtentRecord) ); } - else if (btcb->maxKeyLength == kHFSPlusExtentKeyMaximumLength ) +#if CONFIG_HFS_STD + else if ( btcb->maxKeyLength == kHFSExtentKeyMaximumLength ) { - return ( recordSize == sizeof(HFSPlusExtentRecord) ); + return ( recordSize == sizeof(HFSExtentRecord) ); } +#endif + else // Catalog record { const CatalogRecord *catalogRecord = (const CatalogRecord*) record; switch(catalogRecord->recordType) { + +#if CONFIG_HFS_STD + /* + * HFS standard File/folder records and File/Folder Thread records + * are only valid on configs that support HFS standard. + */ case kHFSFolderRecord: { if ( recordSize != sizeof(HFSCatalogFolder) ) @@ -136,25 +145,8 @@ static Boolean ValidHFSRecord(const void *record, const BTreeControlBlock *btcb, } break; - case kHFSPlusFolderRecord: - { - if ( recordSize != sizeof(HFSPlusCatalogFolder) ) - return false; - if ( catalogRecord->hfsPlusFolder.flags != 0 ) - return false; - if ( catalogRecord->hfsPlusFolder.valence > 0x7FFF ) - return false; - - cNodeID = catalogRecord->hfsPlusFolder.folderID; - - if ( (cNodeID == 0) || (cNodeID < 16 && cNodeID > 2) ) - return false; - } - break; - case kHFSFileRecord: { -// u_int16_t i; HFSExtentDescriptor *dataExtent; HFSExtentDescriptor *rsrcExtent; @@ -193,7 +185,40 @@ static Boolean ValidHFSRecord(const void *record, const BTreeControlBlock *btcb, #endif } break; + + case kHFSFileThreadRecord: + case kHFSFolderThreadRecord: + { + if ( recordSize != sizeof(HFSCatalogThread) ) + return false; + + cNodeID = catalogRecord->hfsThread.parentID; + if ( (cNodeID == 0) || (cNodeID < 16 && cNodeID > 2) ) + return false; + + if ( (catalogRecord->hfsThread.nodeName[0] == 0) || + (catalogRecord->hfsThread.nodeName[0] > 31) ) + return false; + } + break; +#endif + + case kHFSPlusFolderRecord: + { + if ( recordSize != sizeof(HFSPlusCatalogFolder) ) + return false; + if ( catalogRecord->hfsPlusFolder.flags != 0 ) + return false; + if ( catalogRecord->hfsPlusFolder.valence > 0x7FFF ) + return false; + + cNodeID = catalogRecord->hfsPlusFolder.folderID; + if ( (cNodeID == 0) || (cNodeID < 16 && cNodeID > 2) ) + return false; + } + break; + case kHFSPlusFileRecord: { // u_int16_t i; @@ -225,26 +250,10 @@ static Boolean ValidHFSRecord(const void *record, const BTreeControlBlock *btcb, } #endif } - break; + break; - case kHFSFolderThreadRecord: - case kHFSFileThreadRecord: - { - if ( recordSize != sizeof(HFSCatalogThread) ) - return false; - - cNodeID = catalogRecord->hfsThread.parentID; - if ( (cNodeID == 0) || (cNodeID < 16 && cNodeID > 2) ) - return false; - - if ( (catalogRecord->hfsThread.nodeName[0] == 0) || - (catalogRecord->hfsThread.nodeName[0] > 31) ) - return false; - } - break; - - case kHFSPlusFolderThreadRecord: case kHFSPlusFileThreadRecord: + case kHFSPlusFolderThreadRecord: { if ( recordSize > sizeof(HFSPlusCatalogThread) || recordSize < (sizeof(HFSPlusCatalogThread) - sizeof(HFSUniStr255))) return false; diff --git a/bsd/hfs/hfscommon/Misc/FileExtentMapping.c b/bsd/hfs/hfscommon/Misc/FileExtentMapping.c index cae3db8e2..6d6c228d9 100644 --- a/bsd/hfs/hfscommon/Misc/FileExtentMapping.c +++ b/bsd/hfs/hfscommon/Misc/FileExtentMapping.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -108,9 +108,11 @@ enum }; +#if CONFIG_HFS_STD static OSErr HFSPlusToHFSExtents( const HFSPlusExtentRecord oldExtents, HFSExtentRecord newExtents); +#endif static OSErr FindExtentRecord( const ExtendedVCB *vcb, @@ -241,18 +243,20 @@ static OSErr FindExtentRecord( } bzero(btIterator, sizeof(*btIterator)); - if (vcb->vcbSigWord == kHFSSigWord) { - HFSExtentKey * extentKeyPtr; - HFSExtentRecord extentData; + /* HFS Plus / HFSX */ + if (vcb->vcbSigWord != kHFSSigWord) { + HFSPlusExtentKey * extentKeyPtr; + HFSPlusExtentRecord extentData; - extentKeyPtr = (HFSExtentKey*) &btIterator->key; - extentKeyPtr->keyLength = kHFSExtentKeyMaximumLength; - extentKeyPtr->forkType = forkType; - extentKeyPtr->fileID = fileID; + extentKeyPtr = (HFSPlusExtentKey*) &btIterator->key; + extentKeyPtr->keyLength = kHFSPlusExtentKeyMaximumLength; + extentKeyPtr->forkType = forkType; + extentKeyPtr->pad = 0; + extentKeyPtr->fileID = fileID; extentKeyPtr->startBlock = startBlock; btRecord.bufferAddress = &extentData; - btRecord.itemSize = sizeof(HFSExtentRecord); + btRecord.itemSize = sizeof(HFSPlusExtentRecord); btRecord.itemCount = 1; err = BTSearchRecord(fcb, btIterator, &btRecord, &btRecordSize, btIterator); @@ -273,44 +277,26 @@ static OSErr FindExtentRecord( } if (err == noErr) { - u_int16_t i; - // Copy the found key back for the caller - if (foundKey) { - foundKey->keyLength = kHFSPlusExtentKeyMaximumLength; - foundKey->forkType = extentKeyPtr->forkType; - foundKey->pad = 0; - foundKey->fileID = extentKeyPtr->fileID; - foundKey->startBlock = extentKeyPtr->startBlock; - } + if (foundKey) + BlockMoveData(extentKeyPtr, foundKey, sizeof(HFSPlusExtentKey)); // Copy the found data back for the caller - foundData[0].startBlock = extentData[0].startBlock; - foundData[0].blockCount = extentData[0].blockCount; - foundData[1].startBlock = extentData[1].startBlock; - foundData[1].blockCount = extentData[1].blockCount; - foundData[2].startBlock = extentData[2].startBlock; - foundData[2].blockCount = extentData[2].blockCount; - - for (i = 3; i < kHFSPlusExtentDensity; ++i) - { - foundData[i].startBlock = 0; - foundData[i].blockCount = 0; - } + BlockMoveData(&extentData, foundData, sizeof(HFSPlusExtentRecord)); } } - else { // HFS Plus volume - HFSPlusExtentKey * extentKeyPtr; - HFSPlusExtentRecord extentData; +#if CONFIG_HFS_STD + else { + HFSExtentKey * extentKeyPtr; + HFSExtentRecord extentData; - extentKeyPtr = (HFSPlusExtentKey*) &btIterator->key; - extentKeyPtr->keyLength = kHFSPlusExtentKeyMaximumLength; - extentKeyPtr->forkType = forkType; - extentKeyPtr->pad = 0; - extentKeyPtr->fileID = fileID; + extentKeyPtr = (HFSExtentKey*) &btIterator->key; + extentKeyPtr->keyLength = kHFSExtentKeyMaximumLength; + extentKeyPtr->forkType = forkType; + extentKeyPtr->fileID = fileID; extentKeyPtr->startBlock = startBlock; btRecord.bufferAddress = &extentData; - btRecord.itemSize = sizeof(HFSPlusExtentRecord); + btRecord.itemSize = sizeof(HFSExtentRecord); btRecord.itemCount = 1; err = BTSearchRecord(fcb, btIterator, &btRecord, &btRecordSize, btIterator); @@ -331,13 +317,32 @@ static OSErr FindExtentRecord( } if (err == noErr) { + u_int16_t i; + // Copy the found key back for the caller - if (foundKey) - BlockMoveData(extentKeyPtr, foundKey, sizeof(HFSPlusExtentKey)); + if (foundKey) { + foundKey->keyLength = kHFSPlusExtentKeyMaximumLength; + foundKey->forkType = extentKeyPtr->forkType; + foundKey->pad = 0; + foundKey->fileID = extentKeyPtr->fileID; + foundKey->startBlock = extentKeyPtr->startBlock; + } // Copy the found data back for the caller - BlockMoveData(&extentData, foundData, sizeof(HFSPlusExtentRecord)); + foundData[0].startBlock = extentData[0].startBlock; + foundData[0].blockCount = extentData[0].blockCount; + foundData[1].startBlock = extentData[1].startBlock; + foundData[1].blockCount = extentData[1].blockCount; + foundData[2].startBlock = extentData[2].startBlock; + foundData[2].blockCount = extentData[2].blockCount; + + for (i = 3; i < kHFSPlusExtentDensity; ++i) + { + foundData[i].startBlock = 0; + foundData[i].blockCount = 0; + } } } +#endif if (foundHint) *foundHint = btIterator->hint.nodeNum; @@ -378,7 +383,18 @@ static OSErr CreateExtentRecord( */ lockflags = hfs_systemfile_lock(vcb, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK); - if (vcb->vcbSigWord == kHFSSigWord) { + /* HFS+/HFSX */ + if (vcb->vcbSigWord != kHFSSigWord) { + btRecordSize = sizeof(HFSPlusExtentRecord); + btRecord.bufferAddress = extents; + btRecord.itemSize = btRecordSize; + btRecord.itemCount = 1; + + BlockMoveData(key, &btIterator->key, sizeof(HFSPlusExtentKey)); + } +#if CONFIG_HFS_STD + else { + /* HFS Standard */ HFSExtentKey * keyPtr; HFSExtentRecord data; @@ -395,14 +411,7 @@ static OSErr CreateExtentRecord( err = HFSPlusToHFSExtents(extents, data); } - else { // HFS Plus volume - btRecordSize = sizeof(HFSPlusExtentRecord); - btRecord.bufferAddress = extents; - btRecord.itemSize = btRecordSize; - btRecord.itemCount = 1; - - BlockMoveData(key, &btIterator->key, sizeof(HFSPlusExtentKey)); - } +#endif if (err == noErr) err = BTInsertRecord(GetFileControlBlock(vcb->extentsRefNum), btIterator, &btRecord, btRecordSize); @@ -436,25 +445,29 @@ static OSErr DeleteExtentRecord( } bzero(btIterator, sizeof(*btIterator)); - if (vcb->vcbSigWord == kHFSSigWord) { - HFSExtentKey * keyPtr; + /* HFS+ / HFSX */ + if (vcb->vcbSigWord != kHFSSigWord) { // HFS Plus volume + HFSPlusExtentKey * keyPtr; - keyPtr = (HFSExtentKey*) &btIterator->key; - keyPtr->keyLength = kHFSExtentKeyMaximumLength; + keyPtr = (HFSPlusExtentKey*) &btIterator->key; + keyPtr->keyLength = kHFSPlusExtentKeyMaximumLength; keyPtr->forkType = forkType; + keyPtr->pad = 0; keyPtr->fileID = fileID; keyPtr->startBlock = startBlock; } - else { // HFS Plus volume - HFSPlusExtentKey * keyPtr; +#if CONFIG_HFS_STD + else { + /* HFS standard */ + HFSExtentKey * keyPtr; - keyPtr = (HFSPlusExtentKey*) &btIterator->key; - keyPtr->keyLength = kHFSPlusExtentKeyMaximumLength; + keyPtr = (HFSExtentKey*) &btIterator->key; + keyPtr->keyLength = kHFSExtentKeyMaximumLength; keyPtr->forkType = forkType; - keyPtr->pad = 0; keyPtr->fileID = fileID; keyPtr->startBlock = startBlock; } +#endif err = BTDeleteRecord(GetFileControlBlock(vcb->extentsRefNum), btIterator); (void) BTFlushPath(GetFileControlBlock(vcb->extentsRefNum)); @@ -735,6 +748,7 @@ OSErr FlushExtentFile( ExtendedVCB *vcb ) } +#if CONFIG_HFS_STD //‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹ // Routine: CompareExtentKeys // @@ -796,7 +810,7 @@ int32_t CompareExtentKeys( const HFSExtentKey *searchKey, const HFSExtentKey *tr return( result ); } - +#endif //‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹ @@ -959,6 +973,7 @@ OSErr ExtendFileC ( Boolean wantContig; Boolean useMetaZone; Boolean needsFlush; + int allowFlushTxns; u_int32_t actualStartBlock; u_int32_t actualNumBlocks; u_int32_t numExtentsPerRecord; @@ -966,8 +981,8 @@ OSErr ExtendFileC ( int64_t availbytes; int64_t peof; u_int32_t prevblocks; - - + struct hfsmount *hfsmp = (struct hfsmount*)vcb; + allowFlushTxns = 0; needsFlush = false; *actualBytesAdded = 0; volumeBlockSize = vcb->blockSize; @@ -975,20 +990,22 @@ OSErr ExtendFileC ( forceContig = ((flags & kEFContigMask) != 0); prevblocks = fcb->ff_blocks; - if (vcb->vcbSigWord == kHFSPlusSigWord) + if (vcb->vcbSigWord != kHFSSigWord) { numExtentsPerRecord = kHFSPlusExtentDensity; - else + } +#if CONFIG_HFS_STD + else { + /* HFS Standard */ numExtentsPerRecord = kHFSExtentDensity; - // - // Make sure the request and new PEOF are less than 2GB if HFS. - // - if (vcb->vcbSigWord == kHFSSigWord) { + /* Make sure the request and new PEOF are less than 2GB if HFS std*/ if (bytesToAdd >= kTwoGigabytes) - goto Overflow; + goto HFS_Std_Overflow; if ((((int64_t)fcb->ff_blocks * (int64_t)volumeBlockSize) + bytesToAdd) >= kTwoGigabytes) - goto Overflow; + goto HFS_Std_Overflow; } +#endif + // // Determine how many blocks need to be allocated. // Round up the number of desired bytes to add. @@ -1003,9 +1020,9 @@ OSErr ExtendFileC ( && (vcb->vcbSigWord == kHFSPlusSigWord) && (bytesToAdd < (int64_t)HFS_MAX_DEFERED_ALLOC) && (blocksToAdd < hfs_freeblks(VCBTOHFS(vcb), 1))) { - HFS_MOUNT_LOCK(vcb, TRUE); + hfs_lock_mount (hfsmp); vcb->loanedBlocks += blocksToAdd; - HFS_MOUNT_UNLOCK(vcb, TRUE); + hfs_unlock_mount(hfsmp); fcb->ff_unallocblocks += blocksToAdd; FTOC(fcb)->c_blocks += blocksToAdd; @@ -1028,9 +1045,9 @@ OSErr ExtendFileC ( fcb->ff_blocks -= loanedBlocks; fcb->ff_unallocblocks = 0; - HFS_MOUNT_LOCK(vcb, TRUE); + hfs_lock_mount(hfsmp); vcb->loanedBlocks -= loanedBlocks; - HFS_MOUNT_UNLOCK(vcb, TRUE); + hfs_unlock_mount(hfsmp); } // @@ -1047,18 +1064,20 @@ OSErr ExtendFileC ( maximumBytes = bytesToAdd; } +#if CONFIG_HFS_STD // // Compute new physical EOF, rounded up to a multiple of a block. // if ( (vcb->vcbSigWord == kHFSSigWord) && // Too big? ((((int64_t)fcb->ff_blocks * (int64_t)volumeBlockSize) + bytesToAdd) >= kTwoGigabytes) ) { if (allOrNothing) // Yes, must they have it all? - goto Overflow; // Yes, can't have it + goto HFS_Std_Overflow; // Yes, can't have it else { --blocksToAdd; // No, give give 'em one block less bytesToAdd -= volumeBlockSize; } } +#endif // // If allocation is all-or-nothing, make sure there are @@ -1169,20 +1188,40 @@ OSErr ExtendFileC ( if (wantContig && (availbytes < bytesToAdd)) err = dskFulErr; else { + uint32_t ba_flags = 0; + if (wantContig) { + ba_flags |= HFS_ALLOC_FORCECONTIG; + } + if (useMetaZone) { + ba_flags |= HFS_ALLOC_METAZONE; + } + if (allowFlushTxns) { + ba_flags |= HFS_ALLOC_FLUSHTXN; + } + err = BlockAllocate( vcb, startBlock, howmany(MIN(bytesToAdd, availbytes), volumeBlockSize), howmany(MIN(maximumBytes, availbytes), volumeBlockSize), - (wantContig ? HFS_ALLOC_FORCECONTIG : 0) | - (useMetaZone ? HFS_ALLOC_METAZONE : 0), + ba_flags, &actualStartBlock, &actualNumBlocks); } } if (err == dskFulErr) { - if (forceContig) - break; // AllocContig failed because not enough contiguous space + if (forceContig) { + if (allowFlushTxns == 0) { + /* If we're forcing contiguity, re-try but allow plucking from recently freed regions */ + allowFlushTxns = 1; + wantContig = 1; + err = noErr; + continue; + } + else { + break; // AllocContig failed because not enough contiguous space + } + } if (wantContig) { // Couldn't get one big chunk, so get whatever we can. err = noErr; @@ -1191,12 +1230,21 @@ OSErr ExtendFileC ( } if (actualNumBlocks != 0) err = noErr; + if (useMetaZone == 0) { /* Couldn't get anything so dip into metadat zone */ err = noErr; useMetaZone = 1; continue; } + + /* If we couldn't find what we needed without flushing the journal, then go ahead and do it now */ + if (allowFlushTxns == 0) { + allowFlushTxns = 1; + err = noErr; + continue; + } + } if (err == noErr) { if (actualNumBlocks != 0) { @@ -1300,10 +1348,10 @@ Exit: /* Keep the roving allocator out of the metadata zone. */ if (vcb->nextAllocation >= VCBTOHFS(vcb)->hfs_metazone_start && vcb->nextAllocation <= VCBTOHFS(vcb)->hfs_metazone_end) { - HFS_MOUNT_LOCK(vcb, TRUE); + hfs_lock_mount (hfsmp); HFS_UPDATE_NEXT_ALLOCATION(vcb, VCBTOHFS(vcb)->hfs_metazone_end + 1); MarkVCBDirty(vcb); - HFS_MOUNT_UNLOCK(vcb, TRUE); + hfs_unlock_mount(hfsmp); } } if (prevblocks < fcb->ff_blocks) { @@ -1317,7 +1365,9 @@ Exit: return err; -Overflow: +#if CONFIG_HFS_STD +HFS_Std_Overflow: +#endif err = fileBoundsErr; goto ErrorExit; } @@ -1387,6 +1437,8 @@ OSErr TruncateFileC ( // nextBlock = howmany(peof, vcb->blockSize); // number of allocation blocks to remain in file peof = (int64_t)((int64_t)nextBlock * (int64_t)vcb->blockSize); // number of bytes in those blocks + +#if CONFIG_HFS_STD if ((vcb->vcbSigWord == kHFSSigWord) && (peof >= kTwoGigabytes)) { #if DEBUG_BUILD DebugStr("HFS: Trying to truncate a file to 2GB or more"); @@ -1394,6 +1446,7 @@ OSErr TruncateFileC ( err = fileBoundsErr; goto ErrorExit; } +#endif // // Update FCB's length @@ -1701,7 +1754,8 @@ static OSErr SearchExtentRecord( { OSErr err = noErr; u_int32_t extentIndex; - u_int32_t numberOfExtents; + /* Set it to the HFS std value */ + u_int32_t numberOfExtents = kHFSExtentDensity; u_int32_t numAllocationBlocks; Boolean foundExtent; @@ -1709,10 +1763,10 @@ static OSErr SearchExtentRecord( *noMoreExtents = false; foundExtent = false; - if (vcb->vcbSigWord == kHFSPlusSigWord) + /* Override numberOfExtents for HFS+/HFSX */ + if (vcb->vcbSigWord != kHFSSigWord) { numberOfExtents = kHFSPlusExtentDensity; - else - numberOfExtents = kHFSExtentDensity; + } for( extentIndex = 0; extentIndex < numberOfExtents; ++extentIndex ) { @@ -1935,7 +1989,30 @@ static OSErr UpdateExtentRecord (ExtendedVCB *vcb, FCB *fcb, int deleted, */ lockflags = hfs_systemfile_lock(vcb, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK); - if (vcb->vcbSigWord == kHFSSigWord) { + /* HFS+/HFSX */ + if (vcb->vcbSigWord != kHFSSigWord) { // HFS Plus volume + HFSPlusExtentRecord foundData; // The extent data actually found + + BlockMoveData(extentFileKey, &btIterator->key, sizeof(HFSPlusExtentKey)); + + btIterator->hint.index = 0; + btIterator->hint.nodeNum = extentBTreeHint; + + btRecord.bufferAddress = &foundData; + btRecord.itemSize = sizeof(HFSPlusExtentRecord); + btRecord.itemCount = 1; + + err = BTSearchRecord(btFCB, btIterator, &btRecord, &btRecordSize, btIterator); + + if (err == noErr) { + BlockMoveData(extentData, &foundData, sizeof(HFSPlusExtentRecord)); + err = BTReplaceRecord(btFCB, btIterator, &btRecord, btRecordSize); + } + (void) BTFlushPath(btFCB); + } +#if CONFIG_HFS_STD + else { + /* HFS Standard */ HFSExtentKey * key; // Actual extent key used on disk in HFS HFSExtentRecord foundData; // The extent data actually found @@ -1953,34 +2030,17 @@ static OSErr UpdateExtentRecord (ExtendedVCB *vcb, FCB *fcb, int deleted, btRecord.itemCount = 1; err = BTSearchRecord(btFCB, btIterator, &btRecord, &btRecordSize, btIterator); - + if (err == noErr) err = HFSPlusToHFSExtents(extentData, (HFSExtentDescriptor *)&foundData); if (err == noErr) err = BTReplaceRecord(btFCB, btIterator, &btRecord, btRecordSize); (void) BTFlushPath(btFCB); - } - else { // HFS Plus volume - HFSPlusExtentRecord foundData; // The extent data actually found - - BlockMoveData(extentFileKey, &btIterator->key, sizeof(HFSPlusExtentKey)); - - btIterator->hint.index = 0; - btIterator->hint.nodeNum = extentBTreeHint; - - btRecord.bufferAddress = &foundData; - btRecord.itemSize = sizeof(HFSPlusExtentRecord); - btRecord.itemCount = 1; - err = BTSearchRecord(btFCB, btIterator, &btRecord, &btRecordSize, btIterator); - - if (err == noErr) { - BlockMoveData(extentData, &foundData, sizeof(HFSPlusExtentRecord)); - err = BTReplaceRecord(btFCB, btIterator, &btRecord, btRecordSize); - } - (void) BTFlushPath(btFCB); } +#endif + hfs_systemfile_unlock(vcb, lockflags); FREE(btIterator, M_TEMP); @@ -1991,7 +2051,7 @@ static OSErr UpdateExtentRecord (ExtendedVCB *vcb, FCB *fcb, int deleted, - +#if CONFIG_HFS_STD static OSErr HFSPlusToHFSExtents( const HFSPlusExtentRecord oldExtents, HFSExtentRecord newExtents) @@ -2017,7 +2077,7 @@ static OSErr HFSPlusToHFSExtents( return err; } - +#endif diff --git a/bsd/hfs/hfscommon/Misc/HybridAllocator.c b/bsd/hfs/hfscommon/Misc/HybridAllocator.c deleted file mode 100644 index 6e0e1f23a..000000000 --- a/bsd/hfs/hfscommon/Misc/HybridAllocator.c +++ /dev/null @@ -1,533 +0,0 @@ -/* - * Copyright (c) 2009 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -#if CONFIG_HFS_ALLOC_RBTREE - -#define assert(a) { if (!(a)) { panic("File "__FILE__", line %d: assertion '%s' failed.\n", __LINE__, #a); } } - -//#include -#include "../../hfs_macos_defs.h" -#include "../headers/HybridAllocator.h" - -#define bool Boolean - -#define ALLOC_DEBUG 0 - -/* - * The rb_wrap macro in RedBlackTree.h automatically generates the source for a variety of functions that - * operate on the red-black trees. The bodies of these automatically generated functions are the corresponding - * macro from RedBlackTree.h. For example, the extent_tree_length_new() function invokes the rb_new() macro. - * We re-define actual wrapper functions around them so that we can re-name them and adjust the functions - * that are available to the allocator in VolumeAllocation.c. - * - * Here are the functions that get automatically generated: - * Offset-Tree Functions: - * - * initialize the tree - * static void extent_tree_offset_new(extent_tree_offset_t * tree) - * - * Get the first node in the tree. If it is empty, return NULL - * static extent_node_t* extent_tree_offset_first (extent_tree_offset_t * tree) - * - * Get the last node in the tree. If it is empty, return NULL - * static extent_node_t* extent_tree_offset_last (extent_tree_offset_t * tree) - * - * From a given extent_node_t, grab the next one. If no next exists, return NULL - * static extent_node_t* extent_tree_offset_next (extent_tree_offset_t * tree, extent_node_t * node) - * - * From a given extent_node_t, grab the previous. If no prev exists, return NULL - * static extent_node_t* extent_tree_offset_prev(extent_tree_offset_t * tree, extent_node_t * node) - * - * Find a extent_node_t with the specified key (search by offset). If it does not exist, return NULL - * static extent_node_t* extent_tree_offset_search(extent_tree_offset_t * tree, extent_node_t * key) - * - * Find an extent node_t withthe specified key (offset). If it does not exist, - * either grab the next node, if possible, or return NULL - * static extent_node_t* extent_tree_offset_nsearch(extent_tree_offset_t * tree, extent_node_t * key) - * - * Find an extent_node_t with the specified key (offset). If it does not exist, - * either grab the previous node, if possible, or return NULL - * static extent_node_t* extent_tree_offset_psearch(extent_tree_offset_t * tree, extent_node_t * key) - * - * Insert the specified node into the tree. - * static void extent_tree_offset_insert(extent_tree_offset_t * tree, extent_node_t * node) - * - * Remove the specified node from the tree. - * static void extent_tree_offset_remove(extent_tree_offset_t * tree, extent_node_t * node) - * - */ - - -/* Static Functions only used in this file */ -static int32_t -extent_tree_internal_alloc_space(extent_tree_offset_t *offset_tree, - u_int32_t size, u_int32_t offset, extent_node_t *node); - -/* - * cmp_offset_node - * - * Compare the extents in two nodes by offset. - * - * Returns: - * -1 if node 1's offset < node 2's offset. - * 1 if node 1's offset > node 2's offset. - */ - -__private_extern__ int -cmp_offset_node(extent_node_t *node_1, extent_node_t *node_2) { - u_int32_t addr_1 = node_1->offset; - u_int32_t addr_2 = node_2->offset; - - return ((addr_1 > addr_2) - (addr_1 < addr_2)); -} - -/* - * Allocate a new red-black tree node. - * - * Currently, we get memory from the M_TEMP zone. - * TODO: Need to get our own zone to avoid bloating the M_TEMP zone. - */ -__private_extern__ extent_node_t * -alloc_node(u_int32_t length, u_int32_t offset) { - extent_node_t *node; - MALLOC(node, extent_node_t *, sizeof(extent_node_t), M_TEMP, M_WAITOK); - - if (node) { - node->offset = offset; - node->length = length; - node->offset_next = NULL; - } - return node; -} - -/* - * De-allocate a red-black tree node. - * - * Currently, this goes back to the M_TEMP zone. - * TODO: May need to adjust this if we pull memory out of our own zone. - */ -__private_extern__ void -free_node(extent_node_t *node) { - FREE(node, M_TEMP); -} - -/* - * rb_wrap is a macro found in the rb.h header file. It builds functions that operate on - * the red-black tree based upon the types specified here. This code will build red-black tree - * search functions that operate on extent_node_t's and use cmp_length_node to do length searches. - * It uses cmp_offset_node to do offset searches. Ties are broken by offset. This will generate - * the functions specified above. - */ - -rb_wrap(__attribute__ ((unused)) static, extent_tree_offset_, extent_tree_offset_t, extent_node_t, offset_link, cmp_offset_node) - - -/* - * Create a new extent tree, composed of links sorted by offset. - */ -__private_extern__ void -extent_tree_init(extent_tree_offset_t *offset_tree) -{ - extent_node_t *node = NULL; - extent_tree_offset_new(offset_tree); - - node = extent_tree_off_first (offset_tree); - if (node) { - node->offset_next = NULL; - } -} - -/* - * Destroy an extent tree - * - * This function finds the first node in the specified red-black tree, then - * uses the embedded linked list to walk through the tree in O(n) time and destroy - * all of its nodes. - */ -__private_extern__ void -extent_tree_destroy(extent_tree_offset_t *off_tree) { - extent_node_t *node = NULL; - extent_node_t *next = NULL; - - node = extent_tree_offset_first (off_tree); - - while (node) { - next = node->offset_next; - extent_tree_offset_remove (off_tree, node); - free_node (node); - node = next; - } -} - -/* - * Search the extent tree by offset. The "key" argument is only used to extract - * the offset and length information. Its link fields are not used in the underlying - * tree code. - */ -__private_extern__ extent_node_t * -extent_tree_off_search(extent_tree_offset_t *tree, extent_node_t *key) { - return extent_tree_offset_search(tree, key); -} - -/* - * Search the extent tree by offset, finding the next node in the tree - * if the specified one does not exist. The "key" argument is only used to extract - * the offset and length information. Its link fields are not used in the underlying - * tree code. - */ -__private_extern__ extent_node_t * -extent_tree_off_search_next(extent_tree_offset_t *offset_tree, extent_node_t *key) { - - return extent_tree_offset_nsearch (offset_tree, key); -} - -/* - * Search the extent tree by offset to find a starting position. Then, do a linear search - * through the list of free extents to find the first free extent in the tree that has size - * greater than or equal to the specified size. The "key" argument is only used to extract - * the offset and length information. Its link fields are not used in the underlying - * tree code. - */ -__private_extern__ extent_node_t * -extent_tree_off_search_nextWithSize (extent_tree_offset_t *offset_tree, extent_node_t *key) { - - extent_node_t *current; - - u_int32_t min_size = key->length; - - current = extent_tree_offset_nsearch (offset_tree, key); - - while (current) { - if (current->length >= min_size) { - return current; - } - current = current->offset_next; - } - - /* return NULL if no free extent of suitable size could be found. */ - return NULL; -} - - -/* - * Search the extent tree by offset, finding the previous node in the tree - * if the specified one does not exist. The "key" argument is only used to extract - * the offset and length information. Its link fields are not used in the underlying - * tree code. - */ -__private_extern__ extent_node_t * -extent_tree_off_search_prev(extent_tree_offset_t *offset_tree, extent_node_t *key) { - - return extent_tree_offset_psearch (offset_tree, key); -} - - -/* - * Find the first node in the extent tree, by offset. This will be the first - * free space region relative to the start of the disk. - */ -__private_extern__ extent_node_t * -extent_tree_off_first (extent_tree_offset_t *offset_tree) { - return extent_tree_offset_first(offset_tree); -} - -/* - * From a given tree node (sorted by offset), get the next node in the tree. - */ -__private_extern__ extent_node_t * -extent_tree_off_next(extent_tree_offset_t * tree, extent_node_t *node) -{ - return extent_tree_offset_next(tree, node); -} - -/* - * From a given tree node (sorted by offset), get the previous node in the tree. - */ -__private_extern__ extent_node_t * -extent_tree_off_prev(extent_tree_offset_t * tree, extent_node_t *node) -{ - return extent_tree_offset_prev(tree, node); -} - - -/* - * For a node of a given offset and size, remove it from the extent tree and - * insert a new node that: - * - * A) increase its offset by that of the node we just removed - * B) decreases its size by that of the node we just removed. - * - * NOTE: Callers must ensure that the 'size' specified is less than or equal to the - * length of the extent represented by node. The node pointer must point to an - * extant node in the tree, as it will be removed from the tree. - */ -static int32_t -extent_tree_internal_alloc_space(extent_tree_offset_t *offset_tree, u_int32_t size, - u_int32_t offset, extent_node_t *node) -{ - if (node) { - extent_node_t *prev = NULL; - extent_node_t *next = NULL; - - if( ALLOC_DEBUG ) { - assert ((size <= node->length)); - assert ((offset == node->offset)); - } - - prev = extent_tree_offset_prev(offset_tree, node); - - /* - * Note that, unless the node is exactly the size of the amount of space - * requested, we do not need to remove it from the offset tree, now matter - * how much space we remove from the node. Remember that the offset tree is - * sorting the extents based on their offsets, and that each node is a discrete - * chunk of free space. - * - * If node A has offset B, with length C, in the offset tree, by definition, there - * can be no other node in the extent tree within the range {B, B+C}. If there were, - * we'd have overlapped extents. - * - * So in the normal case, we'll just update the offset node in place with the new offset - * and size. - * - * Otherwise, if we have an exact match, then just remove the node altogether. Don't forget - * to update the next pointer for the linked list if applicable. - */ - if (node->length == size) { - next = node->offset_next; - extent_tree_offset_remove(offset_tree, node); - free_node(node); - if (prev) { - prev->offset_next = next; - } - } - else { - node->offset = node->offset + size; - node->length -= size; - /* The next pointer does not change since we keep the node in place */ - } - return 0; - } - return -1; -} - -/* - * Search the extent tree for a region of free space after the specified - * offset and attempt to allocate it. - * - * This is expected to be used by attempts to grow a file contiguously. If we - * start at a file's EOF, then we can try to allocate space immediately after it - * if it's available. This function specifies a tail (the offset), and then passes it - * into extent_tree_offset_search. Note that this is not the search_prev or search_next - * variant, so if no node exists at the specified offset we'll fail out. - * - */ - -__private_extern__ int32_t -extent_tree_offset_alloc_space(extent_tree_offset_t *offset_tree, u_int32_t size, u_int32_t offset) { - extent_node_t search_sentinel = { .offset = offset }; - extent_node_t *node = extent_tree_offset_search(offset_tree, &search_sentinel); - if (node && (node->length < size)) { - /* It's too small. Fail the allocation */ - if ( ALLOC_DEBUG ) { - printf("HFS Allocator: internal_alloc_space, ptr (%p) node->length (%d), node->offset (%d), off(%d), size (%d) \n", - node, node->length, node->offset, offset, size); - } - return -1; - } - return extent_tree_internal_alloc_space(offset_tree, size, offset, node); -} - - -/* - * Search the extent tree for a region of free space at the specified - * offset and attempt to allocate it. - * - * This is a little bit more involved than the previous function. It is intended for use when - * we may be allocating space from the middle of an existing extent node. - * - */ - - -__private_extern__ int32_t -extent_tree_offset_alloc_unaligned(extent_tree_offset_t *offset_tree, u_int32_t size, u_int32_t offset) { - extent_node_t search_sentinel = { .offset = offset }; - extent_node_t *node= NULL; - - node = extent_tree_off_search_prev(offset_tree, &search_sentinel); - - if (node == NULL) { - return -1; - } - - if (node && (node->length < size)) { - /* It's too small. Fail the allocation */ - if ( ALLOC_DEBUG ) { - printf("HFS Allocator: internal_alloc_space, ptr (%p) node->length (%d), node->offset (%d), off(%d), size (%d) \n", - node, node->length, node->offset, offset, size); - } - return -1; - } - - /* Now see if we need to split this node because we're not allocating from the beginning */ - if (offset != node->offset) { - - if (ALLOC_DEBUG) { - assert ((offset + size) <= (node->offset + node->length)); - if (node->offset_next) { - assert ((offset > node->offset) && (offset < node->offset_next->offset)); - } - } - - u_int32_t end = node->offset + node->length; - node->length = offset - node->offset; - - /* - * Do we need to create a new node? If our extent we're carving away ends earlier than - * the current extent's length, then yes - we do. - */ - if ((offset + size) < (end)) { - u_int32_t newoff = offset + size; - u_int32_t newlen = end - newoff; - - extent_node_t* newnode = alloc_node(newlen, newoff); - extent_tree_offset_insert(offset_tree, newnode); - - extent_node_t *next = extent_tree_offset_next(offset_tree, newnode); - newnode->offset_next = next; - node->offset_next = newnode; - } - - return 0; - } - else { - return extent_tree_internal_alloc_space(offset_tree, size, offset, node); - } -} - - - -/* - * Mark an extent of space as being free. This means we need to insert - * this extent into our tree. - * - * Search the offset tree, based on the new offset that we construct by adding - * the length of our extent to be freed to its offset. If something exists at - * that offset, then we coalesce the nodes. In this case, we do not need to adjust - * the offset tree because our extent we wanted to add could not have been in the tree. - * - * If no node existed at the specified offset, then create a new one and insert it - * into the tree. - * - * Finally, search based on the node that would precede our newly created/inserted one. - * If possible, coalesce the previous node into our new one. - * - * We return the node which we are modifying in this function. - */ - -__private_extern__ extent_node_t * -extent_tree_free_space(extent_tree_offset_t *offset_tree, u_int32_t size, u_int32_t offset) -{ - extent_node_t *prev = NULL; - extent_node_t *node = NULL; - extent_node_t *next = NULL; - extent_node_t search_sentinel = { .offset = size + offset }; - - node = extent_tree_offset_nsearch(offset_tree, &search_sentinel); - /* Insert our node into the tree, and coalesce with the next one if necessary */ - - if ((node) && (node->offset == search_sentinel.offset)) { - node->offset = offset; - node->length += size; - next = node->offset_next; - } - else { - node = alloc_node(size, offset); - assert(node); - extent_tree_offset_insert(offset_tree, node); - - /* Find the next entry in the tree, if applicable. */ - next = extent_tree_offset_next(offset_tree, node); - node->offset_next = next; - } - - /* Coalesce with the previous if necessary */ - prev = extent_tree_offset_prev(offset_tree, node); - if (prev && (prev->offset + prev->length) == offset) { - extent_tree_offset_remove(offset_tree, prev); - node->offset = prev->offset; - node->length += prev->length; - free_node(prev); - prev = extent_tree_offset_prev(offset_tree, node); - } - - /* Update the next pointer for the previous entry (if necessary) */ - if (prev) { - prev->offset_next = node; - } - - return node; -} - -/* - * Remove the specified node from the offset_tree. Note that the parameter node - * must be an extant node in the tree. This function is used by the allocator when - * we are resizing a volume and need to directly manipulate the contents of the red-black - * tree without going through the normal allocation and deallocation routines. - */ -__private_extern__ void -extent_tree_remove_node (extent_tree_offset_t *offset_tree, extent_node_t * node) { - - if (node) { - /* Just remove the entry from the tree */ - extent_tree_offset_remove(offset_tree, node); - } - return; - -} - - - -#if ALLOC_DEBUG -/* - * For each node in the tree, print out its length and block offset. - */ -__private_extern__ void -extent_tree_offset_print(extent_tree_offset_t *offset_tree) -{ - extent_node_t *node = NULL; - - node = extent_tree_offset_first(offset_tree); - while (node) { - printf("length: %u, offset: %u\n", node->length, node->offset); - node = node->offset_next; - } -} -#endif - -#endif diff --git a/bsd/hfs/hfscommon/Misc/VolumeAllocation.c b/bsd/hfs/hfscommon/Misc/VolumeAllocation.c index a8a874c90..0fa70d27b 100644 --- a/bsd/hfs/hfscommon/Misc/VolumeAllocation.c +++ b/bsd/hfs/hfscommon/Misc/VolumeAllocation.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -71,28 +71,17 @@ Public routines: know about. If growing, scan the new range of bitmap, and if shrinking, reduce the number of items in the tree that we can allocate from. - UnmapBlocks - Issues DKIOCUNMAPs to the device as it fills the internal volume buffer when iterating - the volume bitmap. + ScanUnmapBlocks + Traverse the entire allocation bitmap. Potentially issue DKIOCUNMAPs to the device as it + tracks unallocated ranges when iterating the volume bitmap. Additionally, build up the in-core + summary table of the allocation bitmap. Internal routines: - Note that the RBTree routines are guarded by a cpp check for CONFIG_HFS_ALLOC_RBTREE. This - is to cut down on space for functions that could not possibly be used if they are not planning to - use the red-black tree code. - - BlockMarkFreeRBTree - Make an internal call to BlockMarkFree and then update - and/or create Red-Black Tree allocation tree nodes to correspond - to the free space being generated. BlockMarkFreeInternal Mark a contiguous range of blocks as free. The corresponding bits in the volume bitmap will be cleared. This will actually do the work of modifying the bitmap for us. - BlockMarkAllocatedRBTree - Make an internal call to BlockAllocateMarked, which will update the - bitmap on-disk when we allocate blocks. If that is successful, then - we'll remove the appropriate entries from the red-black tree. BlockMarkAllocatedInternal Mark a contiguous range of blocks as allocated. The cor- responding bits in the volume bitmap are set. Also tests to see @@ -112,33 +101,13 @@ Internal routines: Finds a range of blocks per the above requirements without using the Allocation RB Tree. This relies on the bitmap-scanning logic in order to find any valid range of free space needed. - BlockAllocateAnyRBTree - Finds a valid range of blocks per the above requirements by searching - the red-black tree. We can just make an internal call to - BlockAllocateContigRBTree to find the valid range. BlockAllocateContig Find and allocate a contiguous range of blocks of a given size. If a contiguous range of free blocks of the given size isn't found, then - the allocation fails (i.e. it is "all or nothing"). This routine is - essentially a wrapper function around its related sub-functions, - BlockAllocateContigBitmap and BlockAllocateContigRBTree, which use, - respectively, the original HFS+ bitmap scanning logic and the new - Red-Black Tree to search and manage free-space decisions. This function - contains logic for when to use which of the allocation algorithms, - depending on the free space contained in the volume. - BlockAllocateContigBitmap - Finds and allocates a range of blocks specified by the size parameters - using the original HFS+ bitmap scanning logic. The red-black tree - will not be updated if this function is used. - BlockAllocateContigRBTree - Finds and allocates a range of blocks specified by the size parameters - using the new red/black tree data structure and search algorithms - provided by the tree library. Updates the red/black tree nodes after - the on-disk data structure (bitmap) has been updated. + the allocation fails (i.e. it is "all or nothing"). BlockAllocateKnown Try to allocate space from known free space in the volume's free extent cache. - ReadBitmapBlock Given an allocation block number, read the bitmap block that contains that allocation block into a caller-supplied buffer. @@ -146,6 +115,14 @@ Internal routines: ReleaseBitmapBlock Release a bitmap block back into the buffer cache. + ReadBitmapRange + Given an allocation block number, read a range of bitmap that + must begin at that allocation block into a caller supplied buffer. + + ReleaseBitmapRange + Release and invalidate a buf_t corresponding to the bitmap + back into the UBC in order to prevent coherency issues. + remove_free_extent_cache Remove an extent from the free extent cache. Handles overlaps with multiple extents in the cache, and handles splitting an @@ -155,7 +132,11 @@ Internal routines: add_free_extent_cache Add an extent to the free extent cache. It will merge the input extent with extents already in the cache. - + CheckUnmappedBytes + Check whether or not the current transaction + has allocated blocks that were recently freed. This may have data safety implications. + + Debug/Test Routines hfs_isallocated @@ -165,42 +146,8 @@ Debug/Test Routines hfs_isallocated_scan Test to see if any blocks in a range are allocated. Releases and invalidates the block used when finished. - - hfs_isrbtree_active - Test to see if the allocation red-black tree is live. This function - requires either an exclusive or shared lock on the allocation bitmap file - in the HFS mount structure, to prevent red-black tree pointers from disappearing. - - hfs_isrbtree_allocated - Test to see if the specified extent is marked as allocated in the red-black tree. - Multiplexes between the metadata zone trees and the normal allocation zone trees - depending on the offset of the extent specified. - - check_rbtree_extents - Void function that wraps around the above function (hfs_isrbtree_allocated) - and checks to see that the return value was appropriate based on the assertion we're - trying to validate (whether or not the specified extent should be marked as free - or allocated). - - hfs_validate_rbtree - Exhaustive search function that will check every allocation block for its status in the - red-black tree and then check the corresponding status in the bitmap file. If the two are out - of sync, it will panic. Note that this function is extremely expensive and must NEVER - be run outside of debug code. - - hfs_checktreelinks - Checks the embedded linked list structure of the red black tree for integrity. The next pointer - should always point to whatever extent_tree_offset_next returns. - - -Red Black Tree Specific Routines - GenerateTree - Build a red-black tree for the given filesystem's bitmap. - - DestroyTrees - Destroy the tree on the given filesystem - - + +Optimization Routines hfs_alloc_scan_block Given a starting allocation block number, figures out which physical block contains that allocation block's bit, and scans it from the starting bit until either the ending bit or @@ -218,6 +165,8 @@ Red Black Tree Specific Routines #include #include #include +#include + /* For VM Page size */ #include @@ -227,7 +176,6 @@ Red Black Tree Specific Routines #include "../../hfs_endian.h" #include "../../hfs_macos_defs.h" #include "../headers/FileMgrInternal.h" -#include "../headers/HybridAllocator.h" #include "../../hfs_kdebug.h" /* Headers for unmap-on-mount support */ @@ -282,153 +230,127 @@ enum { #define kHighBitInWordMask 0x80000000ul #define kAllBitsSetInWord 0xFFFFFFFFul +#define HFS_MIN_SUMMARY_BLOCKSIZE 4096 #define ALLOC_DEBUG 0 static OSErr ReadBitmapBlock( - ExtendedVCB *vcb, - u_int32_t bit, - u_int32_t **buffer, - uintptr_t *blockRef); + ExtendedVCB *vcb, + u_int32_t bit, + u_int32_t **buffer, + uintptr_t *blockRef); static OSErr ReleaseBitmapBlock( - ExtendedVCB *vcb, - uintptr_t blockRef, - Boolean dirty); + ExtendedVCB *vcb, + uintptr_t blockRef, + Boolean dirty); static OSErr BlockAllocateAny( - ExtendedVCB *vcb, - u_int32_t startingBlock, - u_int32_t endingBlock, - u_int32_t maxBlocks, - Boolean useMetaZone, - u_int32_t *actualStartBlock, - u_int32_t *actualNumBlocks); + ExtendedVCB *vcb, + u_int32_t startingBlock, + u_int32_t endingBlock, + u_int32_t maxBlocks, + u_int32_t flags, + Boolean trustSummary, + u_int32_t *actualStartBlock, + u_int32_t *actualNumBlocks); static OSErr BlockAllocateAnyBitmap( - ExtendedVCB *vcb, - u_int32_t startingBlock, - u_int32_t endingBlock, - u_int32_t maxBlocks, - Boolean useMetaZone, - u_int32_t *actualStartBlock, - u_int32_t *actualNumBlocks); + ExtendedVCB *vcb, + u_int32_t startingBlock, + u_int32_t endingBlock, + u_int32_t maxBlocks, + u_int32_t flags, + u_int32_t *actualStartBlock, + u_int32_t *actualNumBlocks); static OSErr BlockAllocateContig( - ExtendedVCB *vcb, - u_int32_t startingBlock, - u_int32_t minBlocks, - u_int32_t maxBlocks, - Boolean useMetaZone, - u_int32_t *actualStartBlock, - u_int32_t *actualNumBlocks); - -static OSErr BlockAllocateContigBitmap( - ExtendedVCB *vcb, - u_int32_t startingBlock, - u_int32_t minBlocks, - u_int32_t maxBlocks, - Boolean useMetaZone, - u_int32_t *actualStartBlock, - u_int32_t *actualNumBlocks); + ExtendedVCB *vcb, + u_int32_t startingBlock, + u_int32_t minBlocks, + u_int32_t maxBlocks, + u_int32_t flags, + u_int32_t *actualStartBlock, + u_int32_t *actualNumBlocks); static OSErr BlockFindContiguous( - ExtendedVCB *vcb, - u_int32_t startingBlock, - u_int32_t endingBlock, - u_int32_t minBlocks, - u_int32_t maxBlocks, - Boolean useMetaZone, - u_int32_t *actualStartBlock, - u_int32_t *actualNumBlocks); + ExtendedVCB *vcb, + u_int32_t startingBlock, + u_int32_t endingBlock, + u_int32_t minBlocks, + u_int32_t maxBlocks, + Boolean useMetaZone, + Boolean trustSummary, + u_int32_t *actualStartBlock, + u_int32_t *actualNumBlocks); static OSErr BlockAllocateKnown( - ExtendedVCB *vcb, - u_int32_t maxBlocks, - u_int32_t *actualStartBlock, - u_int32_t *actualNumBlocks); + ExtendedVCB *vcb, + u_int32_t maxBlocks, + u_int32_t *actualStartBlock, + u_int32_t *actualNumBlocks); static OSErr BlockMarkAllocatedInternal ( - ExtendedVCB *vcb, - u_int32_t startingBlock, - register u_int32_t numBlocks); + ExtendedVCB *vcb, + u_int32_t startingBlock, + register u_int32_t numBlocks); static OSErr BlockMarkFreeInternal( - ExtendedVCB *vcb, - u_int32_t startingBlock, - u_int32_t numBlocks, - Boolean do_validate); + ExtendedVCB *vcb, + u_int32_t startingBlock, + u_int32_t numBlocks, + Boolean do_validate); -static OSErr ReleaseScanBitmapBlock( struct buf *bp ); +static OSErr ReadBitmapRange (struct hfsmount *hfsmp, uint32_t offset, uint32_t iosize, + uint32_t **buffer, struct buf **blockRef); + +static OSErr ReleaseScanBitmapRange( struct buf *bp ); static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t offset, - u_int32_t numBlocks, struct jnl_trim_list *list); + u_int32_t numBlocks, struct jnl_trim_list *list); static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list); -static int hfs_alloc_scan_block(struct hfsmount *hfsmp, - u_int32_t startbit, - u_int32_t endBit, - u_int32_t *bitToScan, - struct jnl_trim_list *list); +static int hfs_alloc_scan_range(struct hfsmount *hfsmp, + u_int32_t startbit, + u_int32_t *bitToScan, + struct jnl_trim_list *list); -int hfs_isallocated_scan (struct hfsmount *hfsmp, - u_int32_t startingBlock, - u_int32_t *bp_buf); - -#if CONFIG_HFS_ALLOC_RBTREE -static OSErr BlockAllocateAnyRBTree( - ExtendedVCB *vcb, - u_int32_t startingBlock, - u_int32_t maxBlocks, - Boolean useMetaZone, - u_int32_t *actualStartBlock, - u_int32_t *actualNumBlocks); - -static OSErr BlockAllocateContigRBTree( - ExtendedVCB *vcb, - u_int32_t startingBlock, - u_int32_t minBlocks, - u_int32_t maxBlocks, - Boolean useMetaZone, - u_int32_t *actualStartBlock, - u_int32_t *actualNumBlocks, - u_int32_t forceContig); - -static OSErr BlockMarkAllocatedRBTree( - ExtendedVCB *vcb, - u_int32_t startingBlock, - u_int32_t numBlocks); - -static OSErr BlockMarkFreeRBTree( - ExtendedVCB *vcb, - u_int32_t startingBlock, - u_int32_t numBlocks); +static int hfs_scan_range_size (struct hfsmount* hfsmp, uint32_t start, uint32_t *iosize); +static uint32_t CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks, int *recent, uint32_t *next); -static int -hfs_isrbtree_allocated (struct hfsmount * hfsmp, - u_int32_t startBlock, - u_int32_t numBlocks, - extent_node_t** node1); +/* Bitmap Re-use Detection */ +static inline int extents_overlap (uint32_t start1, uint32_t len1, + uint32_t start2, uint32_t len2) { + return !( ((start1 + len1) <= start2) || ((start2 + len2) <= start1) ); +} -extern void -hfs_validate_rbtree (struct hfsmount *hfsmp, - u_int32_t start, - u_int32_t end); -static void hfs_checktreelinks (struct hfsmount *hfsmp); +int hfs_isallocated_scan (struct hfsmount *hfsmp, + u_int32_t startingBlock, + u_int32_t *bp_buf); + +/* Summary Table Functions */ +static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse); +static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t *index); +static int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock); +static int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t *alloc); +static int hfs_release_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t length); +static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t start, uint32_t *freeblocks); +static int hfs_rebuild_summary (struct hfsmount *hfsmp); + +#if 0 +static int hfs_get_next_summary (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock); +#endif +/* Used in external mount code to initialize the summary table */ +int hfs_init_summary (struct hfsmount *hfsmp); -void check_rbtree_extents (struct hfsmount *hfsmp, - u_int32_t start, - u_int32_t numBlocks, - int shouldBeFree); +#if ALLOC_DEBUG +void hfs_validate_summary (struct hfsmount *hfsmp); +#endif -#define ASSERT_FREE 1 -#define ASSERT_ALLOC 0 - -#endif /* CONFIG_HFS_ALLOC_RBTREE */ /* Functions for manipulating free extent cache */ static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount); @@ -507,16 +429,16 @@ static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBloc u_int64_t length; u_int64_t device_sz; int err = 0; - + if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0); - + if (ALLOC_DEBUG) { if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) { panic("hfs: %p: (%u,%u) unmapping allocated blocks", hfsmp, startingBlock, numBlocks); } } - + if (hfsmp->jnl != NULL) { device_sz = hfsmp->hfs_logical_bytes; offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset; @@ -524,24 +446,22 @@ static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBloc /* Validate that the trim is in a valid range of bytes */ if ((offset >= device_sz) || ((offset + length) > device_sz)) { - printf("hfs_unmap_free_ext: ignoring trim @ off %lld len %lld \n", offset, length); + printf("hfs_unmap_free_ext: ignoring trim vol=%s @ off %lld len %lld \n", hfsmp->vcbVN, offset, length); err = EINVAL; } if (err == 0) { err = journal_trim_add_extent(hfsmp->jnl, offset, length); if (err) { - printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent", err); + printf("hfs_unmap_free_extent: error %d from journal_trim_add_extent for vol=%s", err, hfsmp->vcbVN); } } } - + if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_FREE | DBG_FUNC_END, err, 0, 0, 0, 0); } - - /* ;________________________________________________________________________________ ; @@ -570,27 +490,27 @@ static void hfs_unmap_free_extent(struct hfsmount *hfsmp, u_int32_t startingBloc ;________________________________________________________________________________ */ static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t start, - u_int32_t numBlocks, struct jnl_trim_list *list) { - - u_int64_t offset; - u_int64_t length; - int error = 0; - - if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL)) { - int extent_no = list->extent_count; - offset = (u_int64_t) start * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset; - length = (u_int64_t) numBlocks * hfsmp->blockSize; - - - list->extents[extent_no].offset = offset; - list->extents[extent_no].length = length; - list->extent_count++; - if (list->extent_count == list->allocated_count) { - error = hfs_issue_unmap (hfsmp, list); - } - } - - return error; + u_int32_t numBlocks, struct jnl_trim_list *list) { + + u_int64_t offset; + u_int64_t length; + int error = 0; + + if ((hfsmp->hfs_flags & HFS_UNMAP) && (hfsmp->jnl != NULL)) { + int extent_no = list->extent_count; + offset = (u_int64_t) start * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset; + length = (u_int64_t) numBlocks * hfsmp->blockSize; + + + list->extents[extent_no].offset = offset; + list->extents[extent_no].length = length; + list->extent_count++; + if (list->extent_count == list->allocated_count) { + error = hfs_issue_unmap (hfsmp, list); + } + } + + return error; } /* @@ -607,24 +527,22 @@ static int hfs_track_unmap_blocks (struct hfsmount *hfsmp, u_int32_t start, */ static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list) { - dk_unmap_t unmap; - int error = 0; - - if (list->extent_count > 0) { - bzero(&unmap, sizeof(unmap)); - unmap.extents = list->extents; - unmap.extentsCount = list->extent_count; - - /* Issue a TRIM and flush them out */ - error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); - - bzero (list->extents, (list->allocated_count * sizeof(dk_extent_t))); - list->extent_count = 0; - } - return error; -} + dk_unmap_t unmap; + int error = 0; + if (list->extent_count > 0) { + bzero(&unmap, sizeof(unmap)); + unmap.extents = list->extents; + unmap.extentsCount = list->extent_count; + /* Issue a TRIM and flush them out */ + error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); + + bzero (list->extents, (list->allocated_count * sizeof(dk_extent_t))); + list->extent_count = 0; + } + return error; +} /* ;________________________________________________________________________________ @@ -642,25 +560,26 @@ static int hfs_issue_unmap (struct hfsmount *hfsmp, struct jnl_trim_list *list) ; numBlocks - The number of allocation blocks being allocated. ;________________________________________________________________________________ */ + static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks) { u_int64_t offset; u_int64_t length; int err; - + if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0); - + if (hfsmp->jnl != NULL) { offset = (u_int64_t) startingBlock * hfsmp->blockSize + (u_int64_t) hfsmp->hfsPlusIOPosOffset; length = (u_int64_t) numBlocks * hfsmp->blockSize; - + err = journal_trim_remove_extent(hfsmp->jnl, offset, length); if (err) { - printf("hfs_unmap_alloc_extent: error %d from journal_trim_remove_extent", err); + printf("hfs_unmap_alloc_extent: error %d from journal_trim_remove_extent for vol=%s", err, hfsmp->vcbVN); } } - + if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_ALLOC | DBG_FUNC_END, err, 0, 0, 0, 0); } @@ -694,16 +613,17 @@ static void hfs_unmap_alloc_extent(struct hfsmount *hfsmp, u_int32_t startingBlo ; extents - An array of extents (byte ranges) that were freed. ;________________________________________________________________________________ */ + __private_extern__ void hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents) { uint32_t i; uint32_t startBlock, numBlocks; struct hfsmount *hfsmp = arg; - + if (hfs_kdebug_allocation & HFSDBG_UNMAP_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_UNMAP_CALLBACK | DBG_FUNC_START, 0, extent_count, 0, 0, 0); - + for (i=0; ihfsPlusIOPosOffset) / hfsmp->blockSize; @@ -716,14 +636,96 @@ hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents) } +/* + ;________________________________________________________________________________ + ; + ; Routine: CheckUnmappedBytes + ; + ; Function: From the specified inputs, determine if the extent in question overlaps + ; space that was recently freed, where the recently freed space may still be + ; lingering in an uncommitted journal transaction. This may have data safety + ; implications. The intended use is to decide whether or not to force a journal flush + ; before allowing file data I/O to be issued. If we did not do this + ; then it would be possible to issue the file I/O ahead of the + ; journal, resulting in data being overwritten if the transaction either + ; is not committed or cannot be replayed. + ; + ; NOTE: This function assumes that the journal and catalog/extent locks are held. + ; + ; Input Arguments: + ; hfsmp - The volume containing the allocation blocks. + ; foffset - start of the extent in question (in allocation blocks) + ; numbytes - number of blocks in the extent. + ; recently_freed: - output pointer containing whether or not the blocks were freed recently + ; overlap_end - end of the overlap between the argument extent and the trim list (in allocation blocks) + ; + ; Output: + ; + ; Returns 0 if we could determine extent validity for this (or a previous transaction) + ; Returns errno if there was an error + ; + ; If returned 0, then recently freed will contain a boolean that indicates + ; that it was recently freed. + ;________________________________________________________________________________ + */ + +u_int32_t +CheckUnmappedBytes (struct hfsmount *hfsmp, uint64_t blockno, uint64_t numblocks, int *recently_freed, uint32_t *overlap_end) { + uint64_t device_offset; + uint64_t numbytes; + uint32_t err = 0; + uint64_t lba_overlap_end; + + if (hfsmp->jnl != NULL) { + /* + * Convert the allocation block # and the number of blocks into device-relative + * offsets so that they can be compared using the TRIM list. + */ + uint64_t device_sz = hfsmp->hfs_logical_bytes; + device_offset = blockno * ((uint64_t)hfsmp->blockSize); + device_offset += hfsmp->hfsPlusIOPosOffset; + numbytes = (((uint64_t)hfsmp->blockSize) * numblocks); + + /* + * Since we check that the device_offset isn't too large, it's safe to subtract it + * from the size in the second check. + */ + if ((device_offset >= device_sz) || (numbytes > (device_sz - device_offset))) { + return EINVAL; + } + + /* Ask the journal if this extent overlaps with any pending TRIMs */ + if (journal_trim_extent_overlap (hfsmp->jnl, device_offset, numbytes, &lba_overlap_end)) { + *recently_freed = 1; + + /* Convert lba_overlap_end back into allocation blocks */ + uint64_t end_offset = lba_overlap_end - hfsmp->hfsPlusIOPosOffset; + end_offset = end_offset / ((uint64_t) hfsmp->blockSize); + *overlap_end = (uint32_t) end_offset; + } + else { + *recently_freed = 0; + } + err = 0; + } + else { + /* There may not be a journal. In that case, always return success. */ + *recently_freed = 0; + } + return err; + +} + + /* ;________________________________________________________________________________ ; - ; Routine: UnmapBlocks + ; Routine: ScanUnmapBlocks ; - ; Function: Traverse the bitmap, and issue DKIOCUNMAPs to the underlying + ; Function: Traverse the bitmap, and potentially issue DKIOCUNMAPs to the underlying ; device as needed so that the underlying disk device is as ; up-to-date as possible with which blocks are unmapped. + ; Additionally build up the summary table as needed. ; ; Input Arguments: ; hfsmp - The volume containing the allocation blocks. @@ -731,46 +733,80 @@ hfs_trim_callback(void *arg, uint32_t extent_count, const dk_extent_t *extents) */ __private_extern__ -u_int32_t UnmapBlocks (struct hfsmount *hfsmp) { +u_int32_t ScanUnmapBlocks (struct hfsmount *hfsmp) +{ u_int32_t blocks_scanned = 0; int error = 0; - struct jnl_trim_list trimlist; - - /* - *struct jnl_trim_list { - uint32_t allocated_count; - uint32_t extent_count; - dk_extent_t *extents; - }; - */ - bzero (&trimlist, sizeof(trimlist)); - if (CONFIG_HFS_TRIM) { - int alloc_count = PAGE_SIZE / sizeof(dk_extent_t); - void *extents = kalloc (alloc_count * sizeof(dk_extent_t)); - if (extents == NULL) { - return ENOMEM; - } - trimlist.extents = (dk_extent_t*)extents; - trimlist.allocated_count = alloc_count; - trimlist.extent_count = 0; - - - - while ((blocks_scanned < hfsmp->totalBlocks) && (error == 0)){ - error = hfs_alloc_scan_block (hfsmp, blocks_scanned, hfsmp->totalBlocks, - &blocks_scanned, &trimlist); - if (error) { - printf("HFS: bitmap unmap scan error: %d\n", error); - break; - } - } - if (error == 0) { - hfs_issue_unmap(hfsmp, &trimlist); - } - if (trimlist.extents) { - kfree (trimlist.extents, (trimlist.allocated_count * sizeof(dk_extent_t))); - } + struct jnl_trim_list trimlist; + + /* + *struct jnl_trim_list { + uint32_t allocated_count; + uint32_t extent_count; + dk_extent_t *extents; + }; + */ + + /* + * The scanning itself here is not tied to the presence of CONFIG_HFS_TRIM + * which is now enabled for most architectures. Instead, any trim related + * work should be tied to whether the underlying storage media supports + * UNMAP, as any solid state device would on desktop or embedded. + * + * We do this because we may want to scan the full bitmap on desktop + * for spinning media for the purposes of building up the + * summary table. + * + * We also avoid sending TRIMs down to the underlying media if the mount is read-only. + */ + + if ((hfsmp->hfs_flags & HFS_UNMAP) && + ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) { + /* If the underlying device supports unmap and the mount is read-write, initialize */ + int alloc_count = PAGE_SIZE / sizeof(dk_extent_t); + void *extents = kalloc (alloc_count * sizeof(dk_extent_t)); + if (extents == NULL) { + return ENOMEM; + } + bzero (&trimlist, sizeof(trimlist)); + trimlist.extents = (dk_extent_t*)extents; + trimlist.allocated_count = alloc_count; + trimlist.extent_count = 0; + } + + while ((blocks_scanned < hfsmp->totalBlocks) && (error == 0)){ + + error = hfs_alloc_scan_range (hfsmp, blocks_scanned, &blocks_scanned, &trimlist); + + if (error) { + printf("HFS: bitmap scan range error: %d on vol=%s\n", error, hfsmp->vcbVN); + break; + } + } + + if ((hfsmp->hfs_flags & HFS_UNMAP) && + ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) { + if (error == 0) { + hfs_issue_unmap(hfsmp, &trimlist); + } + if (trimlist.extents) { + kfree (trimlist.extents, (trimlist.allocated_count * sizeof(dk_extent_t))); + } + } + + /* + * This is in an #if block because hfs_validate_summary prototype and function body + * will only show up if ALLOC_DEBUG is on, to save wired memory ever so slightly. + */ +#if ALLOC_DEBUG + sanity_check_free_ext(hfsmp, 1); + if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { + /* Validate the summary table too! */ + hfs_validate_summary(hfsmp); + printf("HFS: Summary validation complete on %s\n", hfsmp->vcbVN); } +#endif + return error; } @@ -807,21 +843,24 @@ u_int32_t UnmapBlocks (struct hfsmount *hfsmp) { ; Output: ; (result) - Error code, zero for successful allocation ; *startBlock - Actual starting allocation block - ; *actualBlocks - Actual number of allocation blocks allocated + ; *actualBlccks - Actual number of allocation blocks allocated ; ; Side effects: ; The volume bitmap is read and updated; the volume bitmap cache may be changed. ;________________________________________________________________________________ */ OSErr BlockAllocate ( - ExtendedVCB *vcb, /* which volume to allocate space on */ - u_int32_t startingBlock, /* preferred starting block, or 0 for no preference */ - u_int32_t minBlocks, /* desired number of blocks to allocate */ - u_int32_t maxBlocks, /* maximum number of blocks to allocate */ - u_int32_t flags, /* option flags */ - u_int32_t *actualStartBlock, /* actual first block of allocation */ - u_int32_t *actualNumBlocks) /* number of blocks actually allocated; if forceContiguous */ - /* was zero, then this may represent fewer than minBlocks */ + ExtendedVCB *vcb, /* which volume to allocate space on */ + u_int32_t startingBlock, /* preferred starting block, or 0 for no preference */ + u_int32_t minBlocks, /* desired number of blocks to allocate */ + u_int32_t maxBlocks, /* maximum number of blocks to allocate */ + u_int32_t flags, /* option flags */ + u_int32_t *actualStartBlock, /* actual first block of allocation */ + u_int32_t *actualNumBlocks) +/* + * actualNumBlocks is the number of blocks actually allocated; + * if forceContiguous was zero, then this may represent fewer than minBlocks + */ { u_int32_t freeBlocks; OSErr err; @@ -829,10 +868,11 @@ OSErr BlockAllocate ( struct hfsmount *hfsmp; Boolean useMetaZone; Boolean forceContiguous; + Boolean forceFlush; if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, flags, 0); - + if (flags & HFS_ALLOC_FORCECONTIG) { forceContiguous = true; } else { @@ -845,19 +885,14 @@ OSErr BlockAllocate ( useMetaZone = false; } - //TODO: Figure out when we need to re-enable the RB-Tree. - - - //TODO: Make sure we use allocLimit when appropriate. - - /* - * TODO: Update BlockAllocate and its sub-functions to do cooperative allocation and bitmap scanning - * in conjunction with the Generate Tree function. If the red-black tree does not currently contain - * an allocation block of appropriate size, then start scanning blocks FOR the tree generation function until - * we find what we need. We'll update the tree fields when we're done, indicating that we've advanced the - * high water mark for the tree. - */ - + if (flags & HFS_ALLOC_FLUSHTXN) { + forceFlush = true; + } + else { + forceFlush = false; + } + + // // Initialize outputs in case we get an error // @@ -865,8 +900,8 @@ OSErr BlockAllocate ( *actualNumBlocks = 0; hfsmp = VCBTOHFS (vcb); freeBlocks = hfs_freeblks(hfsmp, 0); - - + + /* Skip free block check if blocks are being allocated for relocating * data during truncating a volume. * @@ -906,8 +941,8 @@ OSErr BlockAllocate ( // next block to allocate from. // if (startingBlock == 0) { - HFS_MOUNT_LOCK(vcb, TRUE); - + hfs_lock_mount (hfsmp); + /* Sparse Allocation and nextAllocation are both used even if the R/B Tree is on */ if (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) { startingBlock = vcb->sparseAllocation; @@ -915,11 +950,11 @@ OSErr BlockAllocate ( else { startingBlock = vcb->nextAllocation; } - HFS_MOUNT_UNLOCK(vcb, TRUE); + hfs_unlock_mount(hfsmp); updateAllocPtr = true; } - - + + if (startingBlock >= vcb->allocLimit) { startingBlock = 0; /* overflow so start at beginning */ } @@ -930,7 +965,7 @@ OSErr BlockAllocate ( // if (forceContiguous) { err = BlockAllocateContig(vcb, startingBlock, minBlocks, maxBlocks, - useMetaZone, actualStartBlock, actualNumBlocks); + flags, actualStartBlock, actualNumBlocks); /* * If we allocated from a new position then also update the roving allocator. * This will keep the roving allocation pointer up-to-date even @@ -939,51 +974,12 @@ OSErr BlockAllocate ( * the block to vend out. */ if ((err == noErr) && - (*actualStartBlock > startingBlock) && - ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) || - (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) { + (*actualStartBlock > startingBlock) && + ((*actualStartBlock < VCBTOHFS(vcb)->hfs_metazone_start) || + (*actualStartBlock > VCBTOHFS(vcb)->hfs_metazone_end))) { updateAllocPtr = true; } - } else { -#if CONFIG_HFS_ALLOC_RBTREE - /* - * If the RB-Tree Allocator is live, just go straight for a - * BlockAllocateAny call and return the result. Otherwise, - * resort to the bitmap scanner. - */ - if (hfs_isrbtree_active(VCBTOHFS(vcb))) { - /* Start by trying to allocate from the starting block forward */ - err = BlockAllocateAny(vcb, startingBlock, vcb->allocLimit, - maxBlocks, useMetaZone, actualStartBlock, - actualNumBlocks); - - /* - * Because the RB-Tree is live, the previous call to BlockAllocateAny - * will use the rbtree variant. As a result, it will automatically search the - * metadata zone for a valid extent if needed. If we get a return value of - * noErr, we found a valid extent and we can skip to the end. If the error indicates - * the disk is full, that's an equally valid return code and we can skip to the end, too. - */ - if (err == noErr || err == dskFulErr) { - goto Exit; - } - else { - //TODO: only tear down tree if the tree is finished building. - //Make sure to handle the ENOSPC condition properly. We shouldn't error out in that case. - /* Tear down tree if we encounter an error */ - if (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE) { - hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ERRORED; - DestroyTrees(hfsmp); - ResetVCBFreeExtCache(hfsmp); - } - else { - goto Exit; - } - // fall through to the normal allocation since the rb-tree allocation failed. - } - } -#endif - + } else { /* * Scan the bitmap once, gather the N largest free extents, then * allocate from these largest extents. Repeat as needed until @@ -995,31 +991,101 @@ OSErr BlockAllocate ( * we are using the red-black tree for allocations. If we jettison * the tree, then we will reset the free-extent cache and start over. */ - + + /* Disable HFS_ALLOC_FLUSHTXN if needed */ + if (forceFlush) { + flags &= ~HFS_ALLOC_FLUSHTXN; + } + + /* + * BlockAllocateKnown only examines the free extent cache; anything in there will + * have been committed to stable storage already. + */ err = BlockAllocateKnown(vcb, maxBlocks, actualStartBlock, actualNumBlocks); + /* dskFulErr out of BlockAllocateKnown indicates an empty Free Extent Cache */ if (err == dskFulErr) { /* * Now we have to do a bigger scan. Start at startingBlock and go up until the - * allocation limit. + * allocation limit. We 'trust' the summary bitmap in this call, if it tells us + * that it could not find any free space. */ err = BlockAllocateAny(vcb, startingBlock, vcb->allocLimit, - maxBlocks, useMetaZone, actualStartBlock, - actualNumBlocks); + maxBlocks, flags, true, + actualStartBlock, actualNumBlocks); } if (err == dskFulErr) { /* - * We may be out of space in the normal zone; go up to the starting block from - * the start of the volume. + * Vary the behavior here if the summary table is on or off. + * If it is on, then we don't trust it it if we get into this case and + * basically do a full scan for maximum coverage. + * If it is off, then we trust the above and go up until the startingBlock. */ - err = BlockAllocateAny(vcb, 1, startingBlock, maxBlocks, - useMetaZone, actualStartBlock, - actualNumBlocks); + if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { + err = BlockAllocateAny(vcb, 1, vcb->allocLimit, maxBlocks, + flags, false, + actualStartBlock, actualNumBlocks); + } + else { + err = BlockAllocateAny(vcb, 1, startingBlock, maxBlocks, + flags, false, + actualStartBlock, actualNumBlocks); + } + + /* + * Last Resort: Find/use blocks that may require a journal flush. + */ + if (err == dskFulErr && forceFlush) { + flags |= HFS_ALLOC_FLUSHTXN; + err = BlockAllocateAny(vcb, 1, vcb->allocLimit, maxBlocks, + flags, false, + actualStartBlock, actualNumBlocks); + } } } Exit: + if ((hfsmp->hfs_flags & HFS_CS) && *actualNumBlocks != 0) { + errno_t ec; + _dk_cs_map_t cm; + uint64_t mapped_blocks; + + cm.cm_extent.offset = (uint64_t)*actualStartBlock * hfsmp->blockSize + hfsmp->hfsPlusIOPosOffset; + cm.cm_extent.length = (uint64_t)*actualNumBlocks * hfsmp->blockSize; + cm.cm_bytes_mapped = 0; + ec = VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSMAP, (caddr_t)&cm, 0, vfs_context_current()); + if (ec != 0 && ec != ENOSPC) { + printf ("VNOP_IOCTL(_DKIOCCSMAP) returned an unexpected error code=%d\n", ec); + err = ec; + goto Exit_CS; + } + mapped_blocks = cm.cm_bytes_mapped / hfsmp->blockSize; + /* CoreStorage returned more blocks than requested */ + if (mapped_blocks > *actualNumBlocks) { + printf ("VNOP_IOCTL(_DKIOCCSMAP) mapped too many blocks, mapped=%lld, actual=%d\n", + mapped_blocks, *actualNumBlocks); + } + if (*actualNumBlocks > mapped_blocks) { + if (forceContiguous && mapped_blocks < minBlocks) { + mapped_blocks = 0; + } + } + uint64_t numBlocksToFree = *actualNumBlocks - mapped_blocks; + uint64_t firstBlockToFree = *actualStartBlock + mapped_blocks; + if (numBlocksToFree > 0) { + err = BlockDeallocate(vcb, firstBlockToFree, numBlocksToFree, flags); + if (err != noErr) { + printf ("BlockDeallocate failed (err=%d)\n", err); + goto Exit_CS; + } + } + *actualNumBlocks = mapped_blocks; + if (*actualNumBlocks == 0 && err == noErr) { + err = dskFulErr; + } + } +Exit_CS: // if we actually allocated something then go update the // various bits of state that we maintain regardless of // whether there was an error (i.e. partial allocations @@ -1034,7 +1100,7 @@ Exit: // the file is closed or its EOF changed. Leaving the allocation pointer at the // start of the last allocation will avoid unnecessary fragmentation in this case. // - HFS_MOUNT_LOCK(vcb, TRUE); + hfs_lock_mount (hfsmp); lck_spin_lock(&hfsmp->vcbFreeExtLock); if (vcb->vcbFreeExtCnt == 0 && vcb->hfs_freed_block_count == 0) { @@ -1065,11 +1131,11 @@ Exit: vcb->freeBlocks -= *actualNumBlocks; } MarkVCBDirty(vcb); - HFS_MOUNT_UNLOCK(vcb, TRUE); + hfs_unlock_mount(hfsmp); hfs_generate_volume_notifications(VCBTOHFS(vcb)); } - + if (ALLOC_DEBUG) { if (err == noErr) { if (*actualStartBlock >= hfsmp->totalBlocks) { @@ -1078,17 +1144,17 @@ Exit: if (*actualStartBlock >= hfsmp->allocLimit) { panic ("BlockAllocate: vending block past allocLimit!"); } - + if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->totalBlocks) { panic ("BlockAllocate: vending too many invalid blocks!"); } - + if ((*actualStartBlock + *actualNumBlocks) >= hfsmp->allocLimit) { panic ("BlockAllocate: vending too many invalid blocks past allocLimit!"); } } } - + if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_ALLOCATE | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0); @@ -1118,15 +1184,15 @@ Exit: */ OSErr BlockDeallocate ( - ExtendedVCB *vcb, // Which volume to deallocate space on - u_int32_t firstBlock, // First block in range to deallocate - u_int32_t numBlocks, // Number of contiguous blocks to deallocate - u_int32_t flags) + ExtendedVCB *vcb, // Which volume to deallocate space on + u_int32_t firstBlock, // First block in range to deallocate + u_int32_t numBlocks, // Number of contiguous blocks to deallocate + u_int32_t flags) { OSErr err; struct hfsmount *hfsmp; hfsmp = VCBTOHFS(vcb); - + if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_DEALLOCATE | DBG_FUNC_START, firstBlock, numBlocks, flags, 0, 0); @@ -1137,59 +1203,36 @@ OSErr BlockDeallocate ( err = noErr; goto Exit; } - - + + if (ALLOC_DEBUG) { if (firstBlock >= hfsmp->totalBlocks) { panic ("BlockDeallocate: freeing invalid blocks!"); } - + if ((firstBlock + numBlocks) >= hfsmp->totalBlocks) { panic ("BlockDeallocate: freeing too many invalid blocks!"); } } - - - /* - * If we're using the red-black tree code, then try to free the - * blocks by marking them in the red-black tree first. If the tree - * is not active for whatever reason (or we're not using the - * R/B Tree code at all), then go straight for the BlockMarkFree - * function. - * - * Remember that we can get into this function if the tree isn't finished - * building. In that case, check to see if the block we're de-allocating is - * past the high watermark + * If we're using the summary bitmap, then try to mark the bits + * as potentially usable/free before actually deallocating them. + * It is better to be slightly speculative here for correctness. */ -#if CONFIG_HFS_ALLOC_RBTREE - if (hfs_isrbtree_active(VCBTOHFS(vcb))) { - /* - * BlockMarkFreeRBTree deals with the case where we are resizing the - * filesystem (shrinking), and we need to manipulate the bitmap beyond the portion - * that is currenly controlled by the r/b tree. - */ - - //TODO: Update multiplexing code for the half-finished case. - err = BlockMarkFreeRBTree(vcb, firstBlock, numBlocks); - adjustFreeExtCache = 0; - } - else { - err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true); - } -#else + (void) hfs_release_summary (hfsmp, firstBlock, numBlocks); + err = BlockMarkFreeInternal(vcb, firstBlock, numBlocks, true); -#endif - if (err) + + if (err) { goto Exit; + } // // Update the volume's free block count, and mark the VCB as dirty. // - HFS_MOUNT_LOCK(vcb, TRUE); - + hfs_lock_mount(hfsmp); /* * Do not update the free block count. This flags is specified * when a volume is being truncated. @@ -1210,7 +1253,7 @@ OSErr BlockDeallocate ( * calls us back to tell us it wrote the transaction to disk. */ (void) add_free_extent_cache(vcb, firstBlock, numBlocks); - + /* * If the journal case, we'll only update sparseAllocation once the * free extent cache becomes empty (when we remove the last entry @@ -1222,9 +1265,9 @@ OSErr BlockDeallocate ( vcb->sparseAllocation = firstBlock; } } - + MarkVCBDirty(vcb); - HFS_MOUNT_UNLOCK(vcb, TRUE); + hfs_unlock_mount(hfsmp); hfs_generate_volume_notifications(VCBTOHFS(vcb)); Exit: @@ -1260,7 +1303,7 @@ MetaZoneFreeBlocks(ExtendedVCB *vcb) bit = VCBTOHFS(vcb)->hfs_metazone_start; if (bit == 1) bit = 0; - + lastbit = VCBTOHFS(vcb)->hfs_metazone_end; bytesperblock = vcb->vcbVBMIOSize; @@ -1300,8 +1343,8 @@ MetaZoneFreeBlocks(ExtendedVCB *vcb) * outside the metadata allocation zone. */ static u_int32_t NextBitmapBlock( - ExtendedVCB *vcb, - u_int32_t bit) + ExtendedVCB *vcb, + u_int32_t bit) { struct hfsmount *hfsmp = VCBTOHFS(vcb); @@ -1311,7 +1354,7 @@ static u_int32_t NextBitmapBlock( * Skip over metadata allocation zone. */ if ((bit >= hfsmp->hfs_metazone_start) && - (bit <= hfsmp->hfs_metazone_end)) { + (bit <= hfsmp->hfs_metazone_end)) { bit = hfsmp->hfs_metazone_end + 1; } return (bit); @@ -1336,10 +1379,10 @@ static u_int32_t NextBitmapBlock( ;_______________________________________________________________________ */ static OSErr ReadBitmapBlock( - ExtendedVCB *vcb, - u_int32_t bit, - u_int32_t **buffer, - uintptr_t *blockRef) + ExtendedVCB *vcb, + u_int32_t bit, + u_int32_t **buffer, + uintptr_t *blockRef) { OSErr err; struct buf *bp = NULL; @@ -1358,13 +1401,17 @@ static OSErr ReadBitmapBlock( blockSize = (u_int32_t)vcb->vcbVBMIOSize; block = (daddr64_t)(bit / (blockSize * kBitsPerByte)); - if (vcb->vcbSigWord == kHFSPlusSigWord) { + /* HFS+ / HFSX */ + if (vcb->vcbSigWord != kHFSSigWord) { vp = vcb->hfs_allocation_vp; /* use allocation file vnode */ - - } else /* hfs */ { + } +#if CONFIG_HFS_STD + else { + /* HFS Standard */ vp = VCBTOHFS(vcb)->hfs_devvp; /* use device I/O vnode */ block += vcb->vcbVBMSt; /* map to physical block */ } +#endif err = (int)buf_meta_bread(vp, block, blockSize, NOCRED, &bp); @@ -1389,67 +1436,154 @@ static OSErr ReadBitmapBlock( /* ;_______________________________________________________________________ ; -; Routine: ReleaseBitmapBlock +; Routine: ReadBitmapRange ; -; Function: Relase a bitmap block. +; Function: Read in a range of the bitmap starting at the given offset. +; Use the supplied size to determine the amount of I/O to generate +; against the bitmap file. Return a pointer to the bitmap block. ; ; Inputs: -; vcb -; blockRef -; dirty +; hfsmp -- Pointer to hfs mount +; offset -- byte offset into the bitmap file +; size -- How much I/O to generate against the bitmap file. +; +; Outputs: +; buffer -- Pointer to bitmap block data corresonding to "block" +; blockRef -- struct 'buf' pointer which MUST be released in a subsequent call. ;_______________________________________________________________________ */ -static OSErr ReleaseBitmapBlock( - ExtendedVCB *vcb, - uintptr_t blockRef, - Boolean dirty) +static OSErr ReadBitmapRange(struct hfsmount *hfsmp, uint32_t offset, + uint32_t iosize, uint32_t **buffer, struct buf **blockRef) { - struct buf *bp = (struct buf *)blockRef; - - if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) - KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, dirty, 0, 0, 0, 0); - if (blockRef == 0) { - if (dirty) - panic("hfs: ReleaseBitmapBlock: missing bp"); - return (0); - } + OSErr err; + struct buf *bp = NULL; + struct vnode *vp = NULL; + daddr64_t block; - if (bp) { - if (dirty) { - // XXXdbg - struct hfsmount *hfsmp = VCBTOHFS(vcb); - - if (hfsmp->jnl) { - journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL); - } else { - buf_bdwrite(bp); - } - } else { - buf_brelse(bp); - } + /* This function isn't supported for HFS standard */ + if (hfsmp->vcbSigWord != kHFSPlusSigWord) { + return EINVAL; } - if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) - KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_END, 0, 0, 0, 0, 0); + if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) { + KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_RANGE | DBG_FUNC_START, offset, iosize, 0, 0, 0); + } + + /* + * volume bitmap blocks are protected by the allocation file lock + */ + REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false); + + vp = hfsmp->hfs_allocation_vp; /* use allocation file vnode */ + + /* + * The byte offset argument must be converted into bitmap-relative logical + * block numbers before using it in buf_meta_bread. + * + * buf_meta_bread (and the things it calls) will eventually try to + * reconstruct the byte offset into the file by multiplying the logical + * block number passed in below by the vcbVBMIOSize field in the mount + * point. So we prepare for that by converting the byte offset back into + * logical blocks in terms of VBMIOSize units. + * + * The amount of I/O requested and the byte offset should be computed + * based on the helper function in the frame that called us, so we can + * get away with just doing a simple divide here. + */ + block = (daddr64_t)(offset / hfsmp->vcbVBMIOSize); + + err = (int) buf_meta_bread(vp, block, iosize, NOCRED, &bp); + + if (bp) { + if (err) { + buf_brelse(bp); + *blockRef = 0; + *buffer = NULL; + } else { + *blockRef = bp; + *buffer = (u_int32_t *)buf_dataptr(bp); + } + } + + if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) { + KERNEL_DEBUG_CONSTANT(HFSDBG_READ_BITMAP_RANGE | DBG_FUNC_END, err, 0, 0, 0, 0); + } + + return err; +} - return (0); -} /* - * ReleaseScanBitmapBlock is used to release struct bufs that were - * created for use by bitmap scanning code. We want to force - * them to be purged out of the buffer cache ASAP, so we'll release them differently - * than in the ReleaseBitmapBlock case. Alternately, we know that we're only reading - * the blocks, so we will never dirty them as part of the tree building scan. - */ +;_______________________________________________________________________ +; +; Routine: ReleaseBitmapBlock +; +; Function: Relase a bitmap block. +; +; Inputs: +; vcb +; blockRef +; dirty +;_______________________________________________________________________ +*/ +static OSErr ReleaseBitmapBlock( + ExtendedVCB *vcb, + uintptr_t blockRef, + Boolean dirty) +{ + struct buf *bp = (struct buf *)blockRef; -static OSErr ReleaseScanBitmapBlock(struct buf *bp ) { - - if (bp == NULL) { + if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) + KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, dirty, 0, 0, 0, 0); + + if (blockRef == 0) { + if (dirty) + panic("hfs: ReleaseBitmapBlock: missing bp"); return (0); } - + + if (bp) { + if (dirty) { + // XXXdbg + struct hfsmount *hfsmp = VCBTOHFS(vcb); + + if (hfsmp->jnl) { + journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL); + } else { + buf_bdwrite(bp); + } + } else { + buf_brelse(bp); + } + } + + if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) + KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_END, 0, 0, 0, 0, 0); + + return (0); +} + +/* + * ReleaseScanBitmapRange + * + * This is used to release struct bufs that were created for use by + * bitmap scanning code. Because they may be of sizes different than the + * typical runtime manipulation code, we want to force them to be purged out + * of the buffer cache ASAP, so we'll release them differently than in the + * ReleaseBitmapBlock case. + * + * Additionally, because we know that we're only reading the blocks and that they + * should have been clean prior to reading them, we will never + * issue a write to them (thus dirtying them). + */ + +static OSErr ReleaseScanBitmapRange(struct buf *bp ) { + + if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) { + KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_BITMAP_BLOCK | DBG_FUNC_START, 0, 0, 0, 0, 0); + } + if (bp) { /* Mark the buffer invalid if it isn't locked, then release it */ if ((buf_flags(bp) & B_LOCKED) == 0) { @@ -1457,10 +1591,12 @@ static OSErr ReleaseScanBitmapBlock(struct buf *bp ) { } buf_brelse(bp); } - + + if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) { + KERNEL_DEBUG_CONSTANT(HFSDBG_RELEASE_SCAN_BITMAP | DBG_FUNC_END, 0, 0, 0, 0, 0); + } + return (0); - - } /* @@ -1478,7 +1614,7 @@ Inputs: startingBlock Preferred first block for allocation minBlocks Minimum number of contiguous blocks to allocate maxBlocks Maximum number of contiguous blocks to allocate - useMetaZone + flags Outputs: actualStartBlock First block of range allocated, or 0 if error @@ -1486,298 +1622,146 @@ Outputs: _______________________________________________________________________ */ static OSErr BlockAllocateContig( - ExtendedVCB *vcb, - u_int32_t startingBlock, - u_int32_t minBlocks, - u_int32_t maxBlocks, - Boolean useMetaZone, - u_int32_t *actualStartBlock, - u_int32_t *actualNumBlocks) + ExtendedVCB *vcb, + u_int32_t startingBlock, + u_int32_t minBlocks, + u_int32_t maxBlocks, + u_int32_t flags, + u_int32_t *actualStartBlock, + u_int32_t *actualNumBlocks) { + OSErr retval = noErr; + uint32_t currentStart = startingBlock; -#if CONFIG_HFS_ALLOC_RBTREE - if (hfs_isrbtree_active(VCBTOHFS(vcb))) { - return BlockAllocateContigRBTree(vcb, startingBlock, minBlocks, maxBlocks, useMetaZone, - actualStartBlock, actualNumBlocks, 1); - } -#endif - return BlockAllocateContigBitmap(vcb, startingBlock, minBlocks, - maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks); -} - -/* - * Variant of BlockAllocateContig that uses the original bitmap-searching logic - */ + uint32_t foundStart = 0; // values to emit to caller + uint32_t foundCount = 0; -static OSErr BlockAllocateContigBitmap( - ExtendedVCB *vcb, - u_int32_t startingBlock, - u_int32_t minBlocks, - u_int32_t maxBlocks, - Boolean useMetaZone, - u_int32_t *actualStartBlock, - u_int32_t *actualNumBlocks) -{ - OSErr err; + uint32_t collision_start = 0; // if we have to re-allocate a recently deleted extent, use this + uint32_t collision_count = 0; - if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) - KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, useMetaZone, 0); + int err; + int allowReuse = (flags & HFS_ALLOC_FLUSHTXN); + Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE); - // - // Find a contiguous group of blocks at least minBlocks long. - // Determine the number of contiguous blocks available (up - // to maxBlocks). - // + int recently_deleted = 0; + struct hfsmount *hfsmp = VCBTOHFS(vcb); - /* - * NOTE: If the only contiguous free extent of at least minBlocks - * crosses startingBlock (i.e. starts before, ends after), then we - * won't find it. Earlier versions *did* find this case by letting - * the second search look past startingBlock by minBlocks. But - * with the free extent cache, this can lead to duplicate entries - * in the cache, causing the same blocks to be allocated twice. - */ - err = BlockFindContiguous(vcb, startingBlock, vcb->allocLimit, minBlocks, - maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks); - if (err == dskFulErr && startingBlock != 0) { - /* - * Constrain the endingBlock so we don't bother looking for ranges - * that would overlap those found in the previous call. - */ - err = BlockFindContiguous(vcb, 1, startingBlock, minBlocks, maxBlocks, - useMetaZone, actualStartBlock, actualNumBlocks); - } - // - // Now mark those blocks allocated. - // - if (err == noErr) - err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks); - if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) - KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0); - - return err; -} + KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_START, startingBlock, minBlocks, maxBlocks, useMetaZone, 0); -#if CONFIG_HFS_ALLOC_RBTREE -/* - * Variant of BlockAllocateContig that uses the newer red-black tree library - * in order to manage free space extents. This will search the red-black tree - * and return results in the same fashion as BlockAllocateContigBitmap. - * - * Note that this function is invoked from both the red-black tree variant of BlockAllocateany - * as well as BlockAllocateContig. In order to determine when we should vend contiguous chunks over - * locality-based-searches, we use the forceContig argument to determine who called us. - */ + while ((retval == noErr) && (foundStart == 0) && (foundCount == 0)) { -static OSErr BlockAllocateContigRBTree( - ExtendedVCB *vcb, - u_int32_t startingBlock, - u_int32_t minBlocks, - u_int32_t maxBlocks, - Boolean useMetaZone, - u_int32_t *actualStartBlock, - u_int32_t *actualNumBlocks, - u_int32_t forceContig) -{ - OSErr err; - struct hfsmount *hfsmp = VCBTOHFS(vcb); - extent_node_t search_sentinel; - extent_node_t *node = NULL; - extent_node_t tempnode; - - bzero (&tempnode, sizeof(extent_node_t)); - - /* Begin search at the end of the file, via startingBlock */ - memset (&search_sentinel, 0, sizeof(extent_node_t)); - search_sentinel.offset = startingBlock; - - *actualStartBlock = 0; - *actualNumBlocks = 0; - - /* - * Find the first available extent that satifies the allocation by searching - * from the starting point and moving forward - */ - node = extent_tree_off_search_next(&hfsmp->offset_tree, &search_sentinel); - - if (node) { - *actualStartBlock = node->offset; - *actualNumBlocks = node->length; - } - - /* If we managed to grab at least minBlocks of space, then we're done. */ + /* Try and find something that works. */ + do { + /* + * NOTE: If the only contiguous free extent of at least minBlocks + * crosses startingBlock (i.e. starts before, ends after), then we + * won't find it. Earlier versions *did* find this case by letting + * the second search look past startingBlock by minBlocks. But + * with the free extent cache, this can lead to duplicate entries + * in the cache, causing the same blocks to be allocated twice. + */ + retval = BlockFindContiguous(vcb, currentStart, vcb->allocLimit, minBlocks, + maxBlocks, useMetaZone, true, &foundStart, &foundCount); - if (*actualNumBlocks >= minBlocks) { - if (*actualNumBlocks > maxBlocks) { - *actualNumBlocks = maxBlocks; - } - - - /* Check to see if blocks are already marked as in-use */ - if (ALLOC_DEBUG) { - REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false); - if (hfs_isallocated(hfsmp, *actualStartBlock, *actualNumBlocks)) { - printf("bad node: %p, offset %d, length %d\n", node, node->offset,node->length); - panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use already\n", - *actualStartBlock, *actualNumBlocks); - } - } - - /* - * BlockMarkAllocatedRBTree is responsible for removing the nodes - * from the red-black tree after the bitmap has been updated on-disk. - */ - err = BlockMarkAllocatedRBTree(vcb, *actualStartBlock, *actualNumBlocks); - if (err == noErr) { - - if ( ALLOC_DEBUG ) { - REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false); - if (!hfs_isallocated(hfsmp, *actualStartBlock, *actualNumBlocks)) { - panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n", - *actualStartBlock, *actualNumBlocks); + if (retval == dskFulErr && currentStart != 0) { + /* + * We constrain the endingBlock so we don't bother looking for ranges + * that would overlap those found in the previous call, if the summary bitmap + * is not on for this volume. If it is, then we assume that it was not trust + * -worthy and do a full scan. + */ + if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { + retval = BlockFindContiguous(vcb, 1, vcb->allocLimit, minBlocks, + maxBlocks, useMetaZone, false, &foundStart, &foundCount); } - check_rbtree_extents (VCBTOHFS(vcb), *actualStartBlock, *actualNumBlocks, ASSERT_ALLOC); - } - - return err; + else { + retval = BlockFindContiguous(vcb, 1, currentStart, minBlocks, + maxBlocks, useMetaZone, false, &foundStart, &foundCount); + } + } + } while (0); + + if (retval != noErr) { + goto bailout; } - } - - /* - * We may have failed to grow at the end of the file. We'll try to find - * appropriate free extents, searching by size in the normal allocation zone. - * - * However, if we're allocating on behalf of a sparse device that hasn't explicitly - * requested a contiguous chunk, then we try to search by offset, even if it - * means fragmenting the file. We want all available entries starting - * from the front of the disk to avoid creating new bandfiles. As a result, - * we'll start by searching the offset tree rather than the normal length - * tree. Note that this function can be invoked from BlockAllocateAny, in - * which the minimum block size is 1 block, making it easy to succeed. - */ - search_sentinel.offset = hfsmp->hfs_metazone_end; - search_sentinel.length = minBlocks; - - if ((vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE) && (forceContig == 0)) { - /* just start with the first offset node */ - node = extent_tree_off_search_next(&hfsmp->offset_tree, &search_sentinel); - } - else { - /* - * Otherwise, start from the end of the metadata zone or our next allocation pointer, - * and try to find the first chunk of size >= min. - */ - node = extent_tree_off_search_nextWithSize (&hfsmp->offset_tree, &search_sentinel); - - if (node == NULL) { - extent_node_t *metaend_node; - /* - * Maybe there's a free extent coalesced with the space still in the metadata - * zone. If there is, find it and allocate from the middle of it, starting at - * the end of the metadata zone. - * - * If search_prev yields a result that is not offset == metazone_end, then that - * means no node existed at that offset. If the previous node's offset + length crosses - * the metazone boundary, then allocate from there. If it is too small to - * cross the metazone boundary, then it is of no importance and we'd have to - * report ENOSPC. - */ - metaend_node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel); - - if ((metaend_node) && (metaend_node->offset < hfsmp->hfs_metazone_end)) { - u_int32_t node_end = metaend_node->offset + metaend_node->length; - if (node_end > hfsmp->hfs_metazone_end) { - u_int32_t modified_length = node_end - hfsmp->hfs_metazone_end; - if (modified_length >= minBlocks) { - /* - * Then we can allocate it. Fill in the contents into tempnode, - * and BlockMarkAllocatedRBTree below will take care of the rest. - */ - tempnode.offset = hfsmp->hfs_metazone_end; - tempnode.length = MIN(minBlocks, node_end - tempnode.offset); - node = &tempnode; - } + + /* Do we overlap with the recently found collision extent? */ + if (collision_start) { + if (extents_overlap (foundStart, foundCount, collision_start, collision_count)) { + /* + * We've looped around, and the only thing we could use was the collision extent. + * Since we are allowed to use it, go ahead and do so now. + */ + if(allowReuse) { + /* + * then we couldn't find anything except values which might have been + * recently deallocated. just return our cached value if we are allowed to. + */ + foundStart = collision_start; + foundCount = collision_count; + goto bailout; } + else { + /* Otherwise, we looped around and couldn't find anything that wouldn't require a journal flush. */ + retval = dskFulErr; + goto bailout; + } } } - } - - /* If we can't find anything useful, search the metadata zone as a last resort. */ - - if ((!node) && useMetaZone) { - search_sentinel.offset = 0; - search_sentinel.length = minBlocks; - node = extent_tree_off_search_nextWithSize (&hfsmp->offset_tree, &search_sentinel); - } - - /* If we found something useful, then go ahead and update the bitmap */ - if ((node) && (node->length >= minBlocks)) { - *actualStartBlock = node->offset; - if (node->length >= maxBlocks) { - *actualNumBlocks = maxBlocks; - } - else { - *actualNumBlocks = node->length; - } - err = BlockMarkAllocatedRBTree(vcb, *actualStartBlock, *actualNumBlocks); - - if (err == noErr) { - if ( ALLOC_DEBUG ) { - REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false); - if (!hfs_isallocated(hfsmp, *actualStartBlock, *actualNumBlocks)) { - panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n", - *actualStartBlock, *actualNumBlocks); + /* OK, we know we must not have collided . See if this one is recently deleted */ + if (hfsmp->jnl) { + recently_deleted = 0; + uint32_t nextStart; + err = CheckUnmappedBytes (hfsmp, (uint64_t)foundStart, + (uint64_t) foundCount, &recently_deleted, &nextStart); + if (err == 0) { + if(recently_deleted != 0) { + /* + * these blocks were recently deleted/deallocated. Cache the extent, but + * but keep searching to see if we can find one that won't collide here. + */ + if (collision_start == 0) { + collision_start = foundStart; + collision_count = foundCount; + } + recently_deleted = 0; + + /* + * advance currentStart to the point just past the overlap we just found. Note that + * we will automatically loop around to start of the bitmap as needed. + */ + currentStart = nextStart; + /* Unset foundStart/Count to allow it to loop around again. */ + foundStart = 0; + foundCount = 0; } - check_rbtree_extents (VCBTOHFS(vcb), *actualStartBlock, *actualNumBlocks, ASSERT_ALLOC); } - } - } - else { - int destroy_trees = 0; - /* - * TODO: Add High-water mark check here. If we couldn't find anything useful, - * when do we tear down the tree? Or should the logic be in BlockAllocateContig?? + } // end jnl/deleted case + + /* + * If we found something good, we'd break out of the loop at the top; foundCount + * and foundStart should be set. */ - if (destroy_trees) { - DestroyTrees(VCBTOHFS(vcb)); - /* Reset the Free Ext Cache since we'll be using it now. */ - ResetVCBFreeExtCache(VCBTOHFS(vcb)); - } - - if (ALLOC_DEBUG) { - printf("HFS allocator: No space on FS (%s). Node %p Start %d Min %d, Max %d, Tree still alive.\n", - hfsmp->vcbVN, node, startingBlock, minBlocks, maxBlocks); - - /* Dump the list ? */ - extent_tree_offset_print(&hfsmp->offset_tree); - - printf("HFS allocator: Done printing list on FS (%s). Min %d, Max %d, Tree still alive.\n", - hfsmp->vcbVN, minBlocks, maxBlocks); + } // end while loop. - - } - err = dskFulErr; - } - - if (err == noErr) { - if (ALLOC_DEBUG) { - if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) - panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN); +bailout: + /* mark the blocks as in-use */ + if (retval == noErr) { + *actualStartBlock = foundStart; + *actualNumBlocks = foundCount; + err = BlockMarkAllocatedInternal(vcb, *actualStartBlock, *actualNumBlocks); + + if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) { + KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_CONTIG_BITMAP | DBG_FUNC_END, *actualStartBlock, *actualNumBlocks, 0, 0, 0); } } - else { - *actualStartBlock = 0; - *actualNumBlocks = 0; - } - - return err; - -} -#endif + return retval; + +} /* @@ -1803,58 +1787,67 @@ Outputs: _______________________________________________________________________ */ -/* - * BlockAllocateAny acts as a multiplexer between BlockAllocateAnyRBTree - * and BlockAllocateAnyBitmap, which uses the bitmap scanning logic. - */ - static OSErr BlockAllocateAny( - ExtendedVCB *vcb, - u_int32_t startingBlock, - register u_int32_t endingBlock, - u_int32_t maxBlocks, - Boolean useMetaZone, - u_int32_t *actualStartBlock, - u_int32_t *actualNumBlocks) + ExtendedVCB *vcb, + u_int32_t startingBlock, + register u_int32_t endingBlock, + u_int32_t maxBlocks, + u_int32_t flags, + Boolean trustSummary, + u_int32_t *actualStartBlock, + u_int32_t *actualNumBlocks) { - -#if CONFIG_HFS_ALLOC_RBTREE - if (hfs_isrbtree_active(VCBTOHFS(vcb))) { - return BlockAllocateAnyRBTree(vcb, startingBlock, maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks); - } -#endif - return BlockAllocateAnyBitmap(vcb, startingBlock, endingBlock, maxBlocks, useMetaZone, actualStartBlock, actualNumBlocks); -} + /* + * If it is enabled, scan through the summary table to find the first free block. + * + * If it reports that there are not any free blocks, we could have a false + * positive, so in that case, use the input arguments as a pass through. + */ + uint32_t start_blk = startingBlock; + uint32_t end_blk = endingBlock; + struct hfsmount *hfsmp; + OSErr err; + hfsmp = (struct hfsmount*)vcb; + if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { + uint32_t suggested_start; + + /* + * If the summary table is enabled, scan through it to find the first free + * block. If there was an error, or we couldn't find anything free in the + * summary table, then just leave the start_blk fields unmodified. We wouldn't + * have gotten to this point if the mount point made it look like there was possibly + * free space in the FS. + */ + err = hfs_find_summary_free (hfsmp, startingBlock, &suggested_start); + if (err == 0) { + start_blk = suggested_start; + } + else { + /* Differentiate between ENOSPC and a more esoteric error in the above call. */ + if ((err == ENOSPC) && (trustSummary)) { + /* + * The 'trustSummary' argument is for doing a full scan if we really + * really, need the space and we think it's somewhere but can't find it in the + * summary table. If it's true, then we trust the summary table and return + * dskFulErr if we couldn't find it above. + */ + return dskFulErr; + } + /* + * If either trustSummary was false or we got a different errno, then we + * want to fall through to the real bitmap single i/o code... + */ + } + } + + err = BlockAllocateAnyBitmap(vcb, start_blk, end_blk, maxBlocks, + flags, actualStartBlock, actualNumBlocks); -#if CONFIG_HFS_ALLOC_RBTREE -/* - * BlockAllocateAnyRBTree finds one or more allocation blocks by using - * the red-black allocation tree to figure out where the free ranges are. - * This function is typically used as a last resort becuase we were unable to - * find the right ranges. Outputs are the same as BlockAllocateAnyBitmap. - */ -static OSErr BlockAllocateAnyRBTree( - ExtendedVCB *vcb, - u_int32_t startingBlock, - u_int32_t maxBlocks, - Boolean useMetaZone, - u_int32_t *actualStartBlock, - u_int32_t *actualNumBlocks) -{ - OSErr err; - - /* - * BlockAllocateContig - */ - /* If we're using the red-black tree, try searching at the specified offsets. */ - err = BlockAllocateContigRBTree(vcb, startingBlock, 1, maxBlocks, useMetaZone, - actualStartBlock, actualNumBlocks, 0); return err; - } -#endif + /* * BlockAllocateAnyBitmap finds free ranges by scanning the bitmap to figure out @@ -1863,13 +1856,13 @@ static OSErr BlockAllocateAnyRBTree( */ static OSErr BlockAllocateAnyBitmap( - ExtendedVCB *vcb, - u_int32_t startingBlock, - register u_int32_t endingBlock, - u_int32_t maxBlocks, - Boolean useMetaZone, - u_int32_t *actualStartBlock, - u_int32_t *actualNumBlocks) + ExtendedVCB *vcb, + u_int32_t startingBlock, + register u_int32_t endingBlock, + u_int32_t maxBlocks, + u_int32_t flags, + u_int32_t *actualStartBlock, + u_int32_t *actualNumBlocks) { OSErr err; register u_int32_t block; // current block number @@ -1883,10 +1876,14 @@ static OSErr BlockAllocateAnyBitmap( u_int32_t wordsPerBlock; Boolean dirty = false; struct hfsmount *hfsmp = VCBTOHFS(vcb); + uint32_t summary_block_scan = 0; + Boolean useMetaZone = (flags & HFS_ALLOC_METAZONE); + Boolean forceFlush = (flags & HFS_ALLOC_FLUSHTXN); if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_START, startingBlock, endingBlock, maxBlocks, useMetaZone, 0); +restartSearchAny: /* * When we're skipping the metadata zone and the start/end * range overlaps with the metadata zone then adjust the @@ -1922,7 +1919,7 @@ static OSErr BlockAllocateAnyBitmap( // { u_int32_t wordIndexInBlock; - + bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte; wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord; @@ -1932,10 +1929,11 @@ static OSErr BlockAllocateAnyBitmap( currentWord = SWAP_BE32 (*buffer); bitMask = kHighBitInWordMask >> (startingBlock & kBitsWithinWordMask); } - - // - // Find the first unallocated block - // + + /* + * While loop 1: + * Find the first unallocated block starting at 'block' + */ block=startingBlock; while (block < endingBlock) { if ((currentWord & bitMask) == 0) @@ -1952,6 +1950,24 @@ static OSErr BlockAllocateAnyBitmap( if (--wordsLeft == 0) { // Next block buffer = currCache = NULL; + if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { + /* + * If summary_block_scan is non-zero, then we must have + * pulled a bitmap file block into core, and scanned through + * the entire thing. Because we're in this loop, we are + * implicitly trusting that the bitmap didn't have any knowledge + * about this particular block. As a result, update the bitmap + * (lazily, now that we've scanned it) with our findings that + * this particular block is completely used up. + */ + if (summary_block_scan != 0) { + uint32_t summary_bit; + (void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit); + hfs_set_summary (hfsmp, summary_bit, 1); + summary_block_scan = 0; + } + } + err = ReleaseBitmapBlock(vcb, blockRef, false); if (err != noErr) goto Exit; @@ -1969,7 +1985,7 @@ static OSErr BlockAllocateAnyBitmap( err = ReadBitmapBlock(vcb, block, &currCache, &blockRef); if (err != noErr) goto Exit; buffer = currCache; - + summary_block_scan = block; wordsLeft = wordsPerBlock; } currentWord = SWAP_BE32 (*buffer); @@ -1983,10 +1999,36 @@ static OSErr BlockAllocateAnyBitmap( goto Exit; } - // Return the first block in the allocated range - *actualStartBlock = block; + + /* + * Don't move forward just yet. Verify that either one of the following + * two conditions is true: + * 1) journaling is not enabled + * 2) block is not currently on any pending TRIM list. + */ + if (hfsmp->jnl != NULL && (forceFlush == false)) { + int recently_deleted = 0; + uint32_t nextblk; + err = CheckUnmappedBytes (hfsmp, (uint64_t) block, 1, &recently_deleted, &nextblk); + if ((err == 0) && (recently_deleted)) { + + /* release the bitmap block & unset currCache. we may jump past it. */ + err = ReleaseBitmapBlock(vcb, blockRef, false); + currCache = NULL; + if (err != noErr) { + goto Exit; + } + /* set our start to nextblk, and re-do the search. */ + startingBlock = nextblk; + goto restartSearchAny; + } + } + + + // Return the first block in the allocated range + *actualStartBlock = block; dirty = true; - + // If we could get the desired number of blocks before hitting endingBlock, // then adjust endingBlock so we won't keep looking. Ideally, the comparison // would be (block + maxBlocks) < endingBlock, but that could overflow. The @@ -1994,98 +2036,89 @@ static OSErr BlockAllocateAnyBitmap( if (block < (endingBlock-maxBlocks)) { endingBlock = block + maxBlocks; // if we get this far, we've found enough } - - // XXXdbg - if (hfsmp->jnl) { - journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef); - } - // - // Allocate all of the consecutive blocks - // - while ((currentWord & bitMask) == 0) { - // Allocate this block - currentWord |= bitMask; - + /* + * While loop 2: + * Scan the bitmap, starting at 'currentWord' in the current + * bitmap block. Continue iterating through the bitmap until + * either we hit an allocated block, or until we have accumuluated + * maxBlocks worth of bitmap. + */ + + /* Continue until we see an allocated block */ + while ((currentWord & bitMask) == 0) { // Move to the next block. If no more, then exit. ++block; - if (block == endingBlock) + if (block == endingBlock) { break; + } // Next bit bitMask >>= 1; if (bitMask == 0) { - *buffer = SWAP_BE32 (currentWord); // update value in bitmap - // Next word bitMask = kHighBitInWordMask; ++buffer; - + if (--wordsLeft == 0) { // Next block buffer = currCache = NULL; - err = ReleaseBitmapBlock(vcb, blockRef, true); - if (err != noErr) goto Exit; + + /* We're only reading the bitmap here, so mark it as clean */ + err = ReleaseBitmapBlock(vcb, blockRef, false); + if (err != noErr) { + goto Exit; + } /* * Skip over metadata blocks. */ if (!useMetaZone) { u_int32_t nextBlock; - nextBlock = NextBitmapBlock(vcb, block); if (nextBlock != block) { goto Exit; /* allocation gap, so stop */ } } - err = ReadBitmapBlock(vcb, block, &currCache, &blockRef); - if (err != noErr) goto Exit; - buffer = currCache; + if (block >= endingBlock) { + goto Exit; + } - // XXXdbg - if (hfsmp->jnl) { - journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef); + err = ReadBitmapBlock(vcb, block, &currCache, &blockRef); + if (err != noErr) { + goto Exit; } - + buffer = currCache; wordsLeft = wordsPerBlock; } - currentWord = SWAP_BE32 (*buffer); } } - *buffer = SWAP_BE32 (currentWord); // update the last change Exit: + if (currCache) { + /* Release the bitmap reference prior to marking bits in-use */ + (void) ReleaseBitmapBlock(vcb, blockRef, false); + currCache = NULL; + } + if (err == noErr) { *actualNumBlocks = block - *actualStartBlock; - + // sanity check if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) { panic("hfs: BlockAllocateAny: allocation overflow on \"%s\"", vcb->vcbVN); } - - /* - * Beware! - * Because this function directly manipulates the bitmap to mark the - * blocks it came across as allocated, we must inform the journal (and - * subsequently, the journal's trim list) that we are allocating these - * blocks, just like in BlockMarkAllocatedInternal. hfs_unmap_alloc_extent - * and the functions it calls will serialize behind the journal trim list lock - * to ensure that either the asynchronous flush/TRIM/UNMAP happens prior to - * us manipulating the trim list, or we get there first and successfully remove - * these bitmap blocks before the TRIM happens. - */ - hfs_unmap_alloc_extent (vcb, *actualStartBlock, *actualNumBlocks); + + /* Mark the bits found as in-use */ + err = BlockMarkAllocatedInternal (vcb, *actualStartBlock, *actualNumBlocks); } else { *actualStartBlock = 0; *actualNumBlocks = 0; } - if (currCache) - (void) ReleaseBitmapBlock(vcb, blockRef, dirty); - if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_ANY_BITMAP | DBG_FUNC_END, err, *actualStartBlock, *actualNumBlocks, 0, 0); @@ -2115,30 +2148,30 @@ _______________________________________________________________________ */ static OSErr BlockAllocateKnown( - ExtendedVCB *vcb, - u_int32_t maxBlocks, - u_int32_t *actualStartBlock, - u_int32_t *actualNumBlocks) + ExtendedVCB *vcb, + u_int32_t maxBlocks, + u_int32_t *actualStartBlock, + u_int32_t *actualNumBlocks) { OSErr err; u_int32_t foundBlocks; + struct hfsmount *hfsmp = VCBTOHFS(vcb); if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_START, 0, 0, maxBlocks, 0, 0); - HFS_MOUNT_LOCK(vcb, TRUE); + hfs_lock_mount (hfsmp); lck_spin_lock(&vcb->vcbFreeExtLock); - if ((hfs_isrbtree_active(vcb) == true) || - vcb->vcbFreeExtCnt == 0 || - vcb->vcbFreeExt[0].blockCount == 0) { + if ( vcb->vcbFreeExtCnt == 0 || + vcb->vcbFreeExt[0].blockCount == 0) { lck_spin_unlock(&vcb->vcbFreeExtLock); - HFS_MOUNT_UNLOCK(vcb, TRUE); + hfs_unlock_mount(hfsmp); if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_ALLOC_KNOWN_BITMAP | DBG_FUNC_END, dskFulErr, *actualStartBlock, *actualNumBlocks, 0, 0); return dskFulErr; } lck_spin_unlock(&vcb->vcbFreeExtLock); - HFS_MOUNT_UNLOCK(vcb, TRUE); + hfs_unlock_mount(hfsmp); lck_spin_lock(&vcb->vcbFreeExtLock); @@ -2148,11 +2181,11 @@ static OSErr BlockAllocateKnown( if (foundBlocks > maxBlocks) foundBlocks = maxBlocks; *actualNumBlocks = foundBlocks; - + lck_spin_unlock(&vcb->vcbFreeExtLock); remove_free_extent_cache(vcb, *actualStartBlock, *actualNumBlocks); - + // sanity check if ((*actualStartBlock + *actualNumBlocks) > vcb->allocLimit) { @@ -2187,47 +2220,17 @@ static OSErr BlockAllocateKnown( * is enabled. */ - OSErr BlockMarkAllocated( - ExtendedVCB *vcb, - u_int32_t startingBlock, - register u_int32_t numBlocks) + ExtendedVCB *vcb, + u_int32_t startingBlock, + register u_int32_t numBlocks) { struct hfsmount *hfsmp; - - hfsmp = VCBTOHFS(vcb); -#if CONFIG_HFS_ALLOC_RBTREE - if (hfs_isrbtree_active(hfsmp)) { - int err; - - if ((startingBlock >= hfsmp->offset_block_end) && - (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS)) { - /* - * We're manipulating a portion of the bitmap that is not controlled by the - * red-black tree. Just update the bitmap and don't bother manipulating the tree - */ - goto justbitmap; - } - - err = BlockMarkAllocatedRBTree(vcb, startingBlock, numBlocks); - if (err == noErr) { - if ( ALLOC_DEBUG ) { - REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false); - if (!hfs_isallocated(hfsmp, startingBlock, numBlocks)) { - panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet\n", - startingBlock, numBlocks); - } - check_rbtree_extents (hfsmp, startingBlock, numBlocks, ASSERT_ALLOC); - } - } - return err; - } -justbitmap: -#endif + hfsmp = VCBTOHFS(vcb); return BlockMarkAllocatedInternal(vcb, startingBlock, numBlocks); - + } @@ -2253,9 +2256,9 @@ _______________________________________________________________________ */ static OSErr BlockMarkAllocatedInternal ( - ExtendedVCB *vcb, - u_int32_t startingBlock, - register u_int32_t numBlocks) + ExtendedVCB *vcb, + u_int32_t startingBlock, + register u_int32_t numBlocks) { OSErr err; register u_int32_t *currentWord; // Pointer to current word within bitmap block @@ -2273,8 +2276,27 @@ OSErr BlockMarkAllocatedInternal ( if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_ALLOC_BITMAP | DBG_FUNC_START, startingBlock, numBlocks, 0, 0, 0); + int force_flush = 0; + /* + * Since we are about to mark these bits as in-use + * in the bitmap, decide if we need to alert the caller + * that a journal flush might be appropriate. It's safe to + * poke at the journal pointer here since we MUST have + * called start_transaction by the time this function is invoked. + * If the journal is enabled, then it will have taken the requisite + * journal locks. If it is not enabled, then we have taken + * a shared lock on the global lock. + */ + if (hfsmp->jnl) { + uint32_t ignore; + err = CheckUnmappedBytes (hfsmp, (uint64_t) startingBlock, (uint64_t)numBlocks, &force_flush, &ignore); + if ((err == 0) && (force_flush)) { + journal_request_immediate_flush (hfsmp->jnl); + } + } + hfs_unmap_alloc_extent(vcb, startingBlock, numBlocks); - + // // Pre-read the bitmap block containing the first word of allocation // @@ -2286,7 +2308,7 @@ OSErr BlockMarkAllocatedInternal ( // { u_int32_t wordIndexInBlock; - + bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte; wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord; @@ -2294,7 +2316,7 @@ OSErr BlockMarkAllocatedInternal ( currentWord = buffer + wordIndexInBlock; wordsLeft = wordsPerBlock - wordIndexInBlock; } - + // XXXdbg if (hfsmp->jnl) { journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef); @@ -2335,7 +2357,7 @@ OSErr BlockMarkAllocatedInternal ( if (wordsLeft == 0) { // Read in the next bitmap block startingBlock += bitsPerBlock; // generate a block number in the next bitmap block - + buffer = NULL; err = ReleaseBitmapBlock(vcb, blockRef, true); if (err != noErr) goto Exit; @@ -2363,17 +2385,17 @@ OSErr BlockMarkAllocatedInternal ( ++currentWord; // move to next word --wordsLeft; // one less word left in this block } - + // // Allocate any remaining blocks. // - + if (numBlocks != 0) { bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits if (wordsLeft == 0) { // Read in the next bitmap block startingBlock += bitsPerBlock; // generate a block number in the next bitmap block - + buffer = NULL; err = ReleaseBitmapBlock(vcb, blockRef, true); if (err != noErr) goto Exit; @@ -2385,7 +2407,7 @@ OSErr BlockMarkAllocatedInternal ( if (hfsmp->jnl) { journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef); } - + // Readjust currentWord and wordsLeft currentWord = buffer; wordsLeft = wordsPerBlock; @@ -2411,102 +2433,6 @@ Exit: return err; } -#if CONFIG_HFS_ALLOC_RBTREE -/* - * This is a wrapper function around BlockMarkAllocated. This function is - * called when the RB Tree-based allocator needs to mark a block as in-use. - * This function should take the locks that would not normally be - * necessary for the normal bitmap allocator, and then call the function. Once - * the on-disk data structures are updated properly, then this will remove the - * appropriate node from the tree. - */ - -static OSErr BlockMarkAllocatedRBTree( - ExtendedVCB *vcb, - u_int32_t startingBlock, - u_int32_t numBlocks) -{ - OSErr err; - struct hfsmount *hfsmp = VCBTOHFS(vcb); - int rb_err = 0; - - - if (ALLOC_DEBUG) { - REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false); - if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) { - panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use already\n", - startingBlock, numBlocks); - } - check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_FREE); - } - - err = BlockMarkAllocatedInternal (vcb, startingBlock, numBlocks); - - if (err == noErr) { - - if (ALLOC_DEBUG) { - if (!hfs_isallocated(hfsmp, startingBlock, numBlocks)) { - panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks not in use yet!\n", - startingBlock, numBlocks); - } - } - - /* - * Mark the blocks in the offset tree. - */ - rb_err = extent_tree_offset_alloc_space(&hfsmp->offset_tree, numBlocks, startingBlock); - if (rb_err) { - if (ALLOC_DEBUG) { - printf("HFS RBTree Allocator: Could not mark blocks as in-use! %d \n", rb_err); - } - - /* - * We may be called from the BlockMarkAllocated interface, in which case, they would - * not be picking extents from their start. Do a check here, find if the specified - * extent is free, and if it is, then find the containing node. - */ - extent_node_t *node = NULL; - extent_node_t search_sentinel; - search_sentinel.offset = startingBlock; - - node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel); - - if (node) { - rb_err = extent_tree_offset_alloc_unaligned (&hfsmp->offset_tree, numBlocks, startingBlock); - } - - if (ALLOC_DEBUG) { - if (rb_err) { - printf ("HFS RBTree Allocator: Still Couldn't mark blocks as in-use! %d\n", rb_err); - } - } - } - if (ALLOC_DEBUG) { - check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_ALLOC); - } - } - - /* - * If we encountered a red-black tree error, for now, we immediately back off and force - * destruction of rb-tree. Set the persistent error-detected bit in the mount point. - * That will ensure that even if we reach a low-water-mark in the future we will still - * not allow the rb-tree to be used. On next mount, we will force a re-construction from - * on-disk state. As a fallback, we will now resort to the bitmap-scanning behavior. - */ - if (rb_err) { - /* Mark RB-Trees with error */ - hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ERRORED; - DestroyTrees(hfsmp); - /* Reset the Free Ext Cache since we'll be using it now. */ - ResetVCBFreeExtCache(hfsmp); - printf("HFS: Red-Black Allocator Tree BlockMarkAllocated error\n"); - } - - return err; -} -#endif - - /* * BlockMarkFree @@ -2518,42 +2444,14 @@ static OSErr BlockMarkAllocatedRBTree( * */ OSErr BlockMarkFree( - ExtendedVCB *vcb, - u_int32_t startingBlock, - register u_int32_t numBlocks) + ExtendedVCB *vcb, + u_int32_t startingBlock, + register u_int32_t numBlocks) { struct hfsmount *hfsmp; hfsmp = VCBTOHFS(vcb); -#if CONFIG_HFS_ALLOC_RBTREE - if (hfs_isrbtree_active(hfsmp)) { - int err; - - if ((startingBlock >= hfsmp->offset_block_end) && - (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS)) { - /* - * We're manipulating a portion of the bitmap that is not controlled by the - * red-black tree. Just update the bitmap and don't bother manipulating the tree - */ - goto justbitmap; - } - - err = BlockMarkFreeRBTree(vcb, startingBlock, numBlocks); - if (err == noErr) { - if ( ALLOC_DEBUG ) { - REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false); - if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) { - panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks in use!\n", - startingBlock, numBlocks); - } - check_rbtree_extents (hfsmp, startingBlock, numBlocks, ASSERT_FREE); - } - } - return err; - } -justbitmap: -#endif + return BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true); - } @@ -2666,10 +2564,10 @@ _______________________________________________________________________ */ static OSErr BlockMarkFreeInternal( - ExtendedVCB *vcb, - u_int32_t startingBlock_in, - register u_int32_t numBlocks_in, - Boolean do_validate) + ExtendedVCB *vcb, + u_int32_t startingBlock_in, + register u_int32_t numBlocks_in, + Boolean do_validate) { OSErr err; u_int32_t startingBlock = startingBlock_in; @@ -2686,9 +2584,9 @@ OSErr BlockMarkFreeInternal( uintptr_t blockRef; u_int32_t bitsPerBlock; u_int32_t wordsPerBlock; - // XXXdbg + // XXXdbg struct hfsmount *hfsmp = VCBTOHFS(vcb); - + if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_START, startingBlock_in, numBlocks_in, do_validate, 0, 0); @@ -2697,35 +2595,35 @@ OSErr BlockMarkFreeInternal( * need to be able to free blocks being relocated during hfs_truncatefs. */ if ((do_validate == true) && - (startingBlock + numBlocks > vcb->totalBlocks)) { + (startingBlock + numBlocks > vcb->totalBlocks)) { if (ALLOC_DEBUG) { panic ("BlockMarkFreeInternal() free non-existent blocks at %u (numBlock=%u) on vol %s\n", startingBlock, numBlocks, vcb->vcbVN); } - + printf ("hfs: BlockMarkFreeInternal() trying to free non-existent blocks starting at %u (numBlock=%u) on volume %s\n", startingBlock, numBlocks, vcb->vcbVN); hfs_mark_volume_inconsistent(vcb); err = EIO; goto Exit; } - + // // Pre-read the bitmap block containing the first word of allocation // - + err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef); if (err != noErr) goto Exit; // XXXdbg if (hfsmp->jnl) { journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef); } - + // // Figure out how many bits and words per bitmap block. // bitsPerBlock = vcb->vcbVBMIOSize * kBitsPerByte; wordsPerBlock = vcb->vcbVBMIOSize / kBytesPerWord; wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord; - + // // Look for a range of free blocks immediately before startingBlock // (up to the start of the current bitmap block). Set unmapStart to @@ -2742,19 +2640,19 @@ OSErr BlockMarkFreeInternal( break; bitMask = kLowBitInWordMask; } - + if (*currentWord & SWAP_BE32(bitMask)) break; // Found an allocated block. Stop searching. --unmapStart; ++unmapCount; } - + // // If the first block to free doesn't start on a word // boundary in the bitmap, then treat that first word // specially. // - + currentWord = buffer + wordIndexInBlock; wordsLeft = wordsPerBlock - wordIndexInBlock; currentBit = startingBlock % kBitsPerWord; @@ -2766,87 +2664,87 @@ OSErr BlockMarkFreeInternal( bitMask &= ~(kAllBitsSetInWord >> (currentBit + numBits)); // turn off bits after last } if ((do_validate == true) && - (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) { + (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) { goto Corruption; } *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap numBlocks -= numBits; // adjust number of blocks left to free - + ++currentWord; // move to next word --wordsLeft; // one less word left in this block } - + // // Free whole words (32 blocks) at a time. // - + while (numBlocks >= kBitsPerWord) { if (wordsLeft == 0) { // Read in the next bitmap block startingBlock += bitsPerBlock; // generate a block number in the next bitmap block - + buffer = NULL; err = ReleaseBitmapBlock(vcb, blockRef, true); if (err != noErr) goto Exit; - + err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef); if (err != noErr) goto Exit; - + // XXXdbg if (hfsmp->jnl) { journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef); } - + // Readjust currentWord and wordsLeft currentWord = buffer; wordsLeft = wordsPerBlock; } if ((do_validate == true) && - (*currentWord != SWAP_BE32 (kAllBitsSetInWord))) { + (*currentWord != SWAP_BE32 (kAllBitsSetInWord))) { goto Corruption; } *currentWord = 0; // clear the entire word numBlocks -= kBitsPerWord; - + ++currentWord; // move to next word --wordsLeft; // one less word left in this block } - + // // Free any remaining blocks. // - + if (numBlocks != 0) { bitMask = ~(kAllBitsSetInWord >> numBlocks); // set first numBlocks bits if (wordsLeft == 0) { // Read in the next bitmap block startingBlock += bitsPerBlock; // generate a block number in the next bitmap block - + buffer = NULL; err = ReleaseBitmapBlock(vcb, blockRef, true); if (err != noErr) goto Exit; - + err = ReadBitmapBlock(vcb, startingBlock, &buffer, &blockRef); if (err != noErr) goto Exit; - + // XXXdbg if (hfsmp->jnl) { journal_modify_block_start(hfsmp->jnl, (struct buf *)blockRef); } - + // Readjust currentWord and wordsLeft currentWord = buffer; wordsLeft = wordsPerBlock; } if ((do_validate == true) && - (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) { + (*currentWord & SWAP_BE32 (bitMask)) != SWAP_BE32 (bitMask)) { goto Corruption; } *currentWord &= SWAP_BE32 (~bitMask); // clear the bits in the bitmap - + // No need to update currentWord or wordsLeft } - + // // Look for a range of free blocks immediately after the range we just freed // (up to the end of the current bitmap block). @@ -2865,17 +2763,17 @@ OSErr BlockMarkFreeInternal( ++currentWord; bitMask = kHighBitInWordMask; } - + if (*currentWord & SWAP_BE32(bitMask)) break; // Found an allocated block. Stop searching. ++unmapCount; } - + Exit: - + if (buffer) (void)ReleaseBitmapBlock(vcb, blockRef, true); - + if (err == noErr) { hfs_unmap_free_extent(vcb, unmapStart, unmapCount); } @@ -2884,7 +2782,7 @@ Exit: KERNEL_DEBUG_CONSTANT(HFSDBG_MARK_FREE_BITMAP | DBG_FUNC_END, err, 0, 0, 0, 0); return err; - + Corruption: #if DEBUG_BUILD panic("hfs: BlockMarkFreeInternal: blocks not allocated!"); @@ -2897,109 +2795,6 @@ Corruption: } -#if CONFIG_HFS_ALLOC_RBTREE -/* - * This is a wrapper function around BlockMarkFree. This function is - * called when the RB Tree-based allocator needs to mark a block as no longer - * in use. This function should take the locks that would not normally be - * necessary for the normal bitmap deallocator, and then call the function. Once - * the on-disk data structures are updated properly, then this will update an - * existing rb-tree node if possible, or else create a new one. - */ - -OSErr BlockMarkFreeRBTree( - ExtendedVCB *vcb, - u_int32_t startingBlock, - register u_int32_t numBlocks) -{ - OSErr err; - struct hfsmount *hfsmp = VCBTOHFS(vcb); - int rb_err = 0; - - if (ALLOC_DEBUG) { - REQUIRE_FILE_LOCK(vcb->hfs_allocation_vp, false); - if (!hfs_isallocated(hfsmp, startingBlock, numBlocks)) { - panic ("HFS RBTree Allocator: Trying to free blocks starting @ %x for %x but blocks not in use! \n", - startingBlock, numBlocks); - } - check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_ALLOC); - } - - err = BlockMarkFreeInternal(vcb, startingBlock, numBlocks, true); - - if (err == noErr) { - - /* - * During a filesystem truncation, we may need to relocate files out of the - * portion of the bitmap that is no longer controlled by the r/b tree. - * In this case, just update the bitmap and do not attempt to manipulate the tree. - */ - if ((startingBlock >= hfsmp->offset_block_end) && - (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS)) { - goto free_error; - } - - extent_node_t *newnode; - - if (ALLOC_DEBUG) { - /* - * Validate that the blocks in question are not allocated in the bitmap, and that they're - * not in the offset tree, since it should be tracking free extents, rather than allocated - * extents - */ - if (hfs_isallocated(hfsmp, startingBlock, numBlocks)) { - panic ("HFS RBTree Allocator: Blocks starting @ %x for %x blocks still marked in-use!\n", - startingBlock, numBlocks); - } - } - - if ((hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE) == 0) { - if (startingBlock >= hfsmp->offset_block_end) { - /* - * If the tree generation code has not yet finished scanning the - * bitmap region containing this extent, do nothing. If the start - * of the range to be deallocated is greater than the current high - * watermark on the offset tree, just bail out and let the scanner catch up with us. - */ - rb_err = 0; - goto free_error; - } - } - - newnode = extent_tree_free_space(&hfsmp->offset_tree, numBlocks, startingBlock); - if (newnode == NULL) { - rb_err = 1; - goto free_error; - } - - if (ALLOC_DEBUG) { - check_rbtree_extents (VCBTOHFS(vcb), startingBlock, numBlocks, ASSERT_FREE); - } - - } - -free_error: - /* - * We follow the same principle as in BlockMarkAllocatedRB. - * If we encounter an error in adding the extents to the rb-tree, then immediately - * back off, destroy the trees, and persistently set a bit in the runtime hfsmp flags - * to indicate we should not use the rb-tree until next mount, when we can force a rebuild. - */ - if (rb_err) { - /* Mark RB-Trees with error */ - hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ERRORED; - DestroyTrees(hfsmp); - /* Reset the Free Ext Cache since we'll be using it now. */ - ResetVCBFreeExtCache(hfsmp); - printf("HFS: Red-Black Allocator Tree BlockMarkFree error\n"); - } - - - return err; - -} -#endif - /* _______________________________________________________________________ @@ -3031,14 +2826,15 @@ _______________________________________________________________________ */ static OSErr BlockFindContiguous( - ExtendedVCB *vcb, - u_int32_t startingBlock, - u_int32_t endingBlock, - u_int32_t minBlocks, - u_int32_t maxBlocks, - Boolean useMetaZone, - u_int32_t *actualStartBlock, - u_int32_t *actualNumBlocks) + ExtendedVCB *vcb, + u_int32_t startingBlock, + u_int32_t endingBlock, + u_int32_t minBlocks, + u_int32_t maxBlocks, + Boolean useMetaZone, + Boolean trustSummary, + u_int32_t *actualStartBlock, + u_int32_t *actualNumBlocks) { OSErr err; register u_int32_t currentBlock; // Block we're currently looking at. @@ -3053,6 +2849,7 @@ static OSErr BlockFindContiguous( uintptr_t blockRef; u_int32_t wordsPerBlock; u_int32_t updated_free_extent = 0; + struct hfsmount *hfsmp = (struct hfsmount*) vcb; if (hfs_kdebug_allocation & HFSDBG_ALLOC_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_BLOCK_FIND_CONTIG | DBG_FUNC_START, startingBlock, endingBlock, minBlocks, maxBlocks, 0); @@ -3090,6 +2887,20 @@ static OSErr BlockFindContiguous( if (!useMetaZone) currentBlock = NextBitmapBlock(vcb, currentBlock); + /* + * Use the summary table if we can. Skip over any totally + * allocated blocks. currentBlock should now point to the first + * block beyond the metadata zone if the metazone allocations are not + * allowed in this invocation. + */ + if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) { + uint32_t suggestion; + if (hfs_find_summary_free (hfsmp, currentBlock, &suggestion) == 0) { + currentBlock = suggestion; + } + } + + // // Pre-read the first bitmap block. // @@ -3104,18 +2915,24 @@ static OSErr BlockFindContiguous( wordsLeft = (currentBlock / kBitsPerWord) & (wordsPerBlock-1); // Current index into buffer currentWord = buffer + wordsLeft; wordsLeft = wordsPerBlock - wordsLeft; - + + /* + * This outer do-while loop is the main body of this function. Its job is + * to search through the blocks (until we hit 'stopBlock'), and iterate + * through swaths of allocated bitmap until it finds free regions. + */ + do { foundBlocks = 0; - - //============================================================ - // Look for a free block, skipping over allocated blocks. - //============================================================ - - // - // Check an initial partial word (if any) - // + uint32_t summary_block_scan = 0; + /* + * Inner while loop 1: + * Look for free blocks, skipping over allocated ones. + * + * Initialization starts with checking the initial partial word + * if applicable. + */ bitMask = currentBlock & kBitsWithinWordMask; if (bitMask) { @@ -3145,6 +2962,23 @@ static OSErr BlockFindContiguous( if (wordsLeft == 0) { buffer = NULL; + if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { + /* + * If summary_block_scan is non-zero, then we must have + * pulled a bitmap file block into core, and scanned through + * the entire thing. Because we're in this loop, we are + * implicitly trusting that the bitmap didn't have any knowledge + * about this particular block. As a result, update the bitmap + * (lazily, now that we've scanned it) with our findings that + * this particular block is completely used up. + */ + if (summary_block_scan != 0) { + uint32_t summary_bit; + (void) hfs_get_summary_index (hfsmp, summary_block_scan, &summary_bit); + hfs_set_summary (hfsmp, summary_bit, 1); + summary_block_scan = 0; + } + } err = ReleaseBitmapBlock(vcb, blockRef, false); if (err != noErr) goto ErrorExit; @@ -3158,13 +2992,32 @@ static OSErr BlockFindContiguous( } } + /* Skip over fully allocated bitmap blocks if we can */ + if ((trustSummary) && (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) { + uint32_t suggestion; + if (hfs_find_summary_free (hfsmp, currentBlock, &suggestion) == 0) { + if (suggestion < stopBlock) { + currentBlock = suggestion; + } + } + } + err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef); if ( err != noErr ) goto ErrorExit; - + + /* + * Set summary_block_scan to be the block we just read into the block cache. + * + * At this point, we've just read an allocation block worth of bitmap file + * into the buffer above, but we don't know if it is completely allocated or not. + * If we find that it is completely allocated/full then we will jump + * through this loop again and set the appropriate summary bit as fully allocated. + */ + summary_block_scan = currentBlock; currentWord = buffer; wordsLeft = wordsPerBlock; } - + // See if any of the bits are clear if ((tempWord = SWAP_BE32(*currentWord)) + 1) // non-zero if any bits were clear { @@ -3175,7 +3028,7 @@ static OSErr BlockFindContiguous( bitMask >>= 1; ++currentBlock; } - + break; // Found the free bit; break out to FoundUnused. } @@ -3195,15 +3048,17 @@ FoundUnused: // Remember the start of the extent firstBlock = currentBlock; - //============================================================ - // Count the number of contiguous free blocks. - //============================================================ - // - // Check an initial partial word (if any) - // - bitMask = currentBlock & kBitsWithinWordMask; - if (bitMask) + /* + * Inner while loop 2: + * We get here if we find a free block. Count the number + * of contiguous free blocks observed. + * + * Initialization starts with checking the initial partial word + * if applicable. + */ + bitMask = currentBlock & kBitsWithinWordMask; + if (bitMask) { tempWord = SWAP_BE32(*currentWord); // Fetch the current word only once bitMask = kHighBitInWordMask >> bitMask; @@ -3221,7 +3076,7 @@ FoundUnused: ++currentWord; --wordsLeft; } - + // // Check whole words // @@ -3248,11 +3103,11 @@ FoundUnused: err = ReadBitmapBlock(vcb, currentBlock, &buffer, &blockRef); if ( err != noErr ) goto ErrorExit; - + currentWord = buffer; wordsLeft = wordsPerBlock; } - + // See if any of the bits are set if ((tempWord = SWAP_BE32(*currentWord)) != 0) { @@ -3263,7 +3118,7 @@ FoundUnused: bitMask >>= 1; ++currentBlock; } - + break; // Found the used bit; break out to FoundUsed. } @@ -3271,7 +3126,7 @@ FoundUnused: currentBlock += kBitsPerWord; ++currentWord; --wordsLeft; - + // If we found at least maxBlocks, we can quit early. if ((currentBlock - firstBlock) >= maxBlocks) break; @@ -3291,10 +3146,30 @@ FoundUsed: if (foundBlocks >= minBlocks) break; // Found what we needed! - /* We did not find the total blocks were were looking for, but - * lets add this free block run to our free extent cache list + /* + * We did not find the total blocks were were looking for, but + * add this free block run to our free extent cache list, if possible. */ - updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks); + if (hfsmp->jnl == NULL) { + /* If there is no journal, go ahead and add to the free ext cache. */ + updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks); + } + else { + /* + * If journaled, only add to the free extent cache if this block is not + * waiting for a TRIM to complete; that implies that the transaction that freed it + * has not yet been committed to stable storage. + */ + int recently_deleted = 0; + uint32_t nextblock; + err = CheckUnmappedBytes(hfsmp, (uint64_t)firstBlock, + (uint64_t)foundBlocks, &recently_deleted, &nextblock); + if ((err) || (recently_deleted == 0)) { + /* if we hit an error, or the blocks not recently freed, go ahead and insert it */ + updated_free_extent = add_free_extent_cache(vcb, firstBlock, foundBlocks); + } + err = 0; + } } while (currentBlock < stopBlock); LoopExit: @@ -3318,15 +3193,15 @@ ErrorExit: */ if ((firstBlock + foundBlocks) > vcb->allocLimit) { panic("hfs: blk allocation overflow on \"%s\" sb:0x%08x eb:0x%08x cb:0x%08x fb:0x%08x stop:0x%08x min:0x%08x found:0x%08x", - vcb->vcbVN, startingBlock, endingBlock, currentBlock, - firstBlock, stopBlock, minBlocks, foundBlocks); + vcb->vcbVN, startingBlock, endingBlock, currentBlock, + firstBlock, stopBlock, minBlocks, foundBlocks); } } - + if (updated_free_extent && (vcb->hfs_flags & HFS_HAS_SPARSE_DEVICE)) { int i; u_int32_t min_start = vcb->totalBlocks; - + // set the nextAllocation pointer to the smallest free block number // we've seen so on the next mount we won't rescan unnecessarily lck_spin_lock(&vcb->vcbFreeExtLock); @@ -3345,7 +3220,7 @@ ErrorExit: } } } - + if (buffer) (void) ReleaseBitmapBlock(vcb, blockRef, false); @@ -3356,187 +3231,6 @@ ErrorExit: } -#if CONFIG_HFS_ALLOC_RBTREE -/* - * Wrapper function around hfs_isrbtree_allocated. This just takes the start offset, - * and the number of blocks, and whether or not we should check if the blocks are - * free or not. This function is designed to be used primarily with the debug #ifdef - * enabled, so it results in a panic if anything unexpected occurs. - * - * shouldBeFree will be nonzero if the caller expects the zone to be free. - */ - -void check_rbtree_extents (struct hfsmount *hfsmp, u_int32_t startBlocks, - u_int32_t numBlocks, int shouldBeFree) { - int alloc; - extent_node_t *node1 = NULL; - u_int32_t off1 = 0; - u_int32_t len1 = 0; - alloc = hfs_isrbtree_allocated (hfsmp, startBlocks, numBlocks, &node1); - - if (node1) { - off1 = node1->offset; - len1 = node1->length; - } - - if (shouldBeFree) { - /* - * If the region should be free, then we expect to see extents in the tree - * matching this start and length. Alloc != 0 means some portion of the extent - * specified was allocated. - */ - if (alloc != 0){ - panic ("HFS check_rbtree_extents: Node (%p) do not exist! " - "node1 off (%d),len(%d),, start(%d) end(%d)\n", - node1, off1, len1, startBlocks, numBlocks); - } - } - else { - /* - * Otherwise, this means that the region should be allocated, and if we find - * an extent matching it, that's bad. - */ - if (alloc == 0){ - panic ("HFS check_rbtree_extents: Node (%p) exists! " - "node1 off (%d),len(%d), start(%d) end(%d)\n", - node1, off1, len1, startBlocks, numBlocks); - } - } -} -#endif - -#if CONFIG_HFS_ALLOC_RBTREE -/* - * Exhaustive validation search. This function iterates over all allocation blocks and - * compares their status in the red-black tree vs. the allocation bitmap. If the two are out of sync - * then it will panic. Bitmap lock must be held while this function is run. - * - * Because this function requires a red-black tree search to validate every allocation block, it is - * very expensive and should ONLY be run in debug mode, and even then, infrequently. - * - * 'end' is non-inclusive, so it should represent the total number of blocks in the volume. - * - */ -void -hfs_validate_rbtree (struct hfsmount *hfsmp, u_int32_t start, u_int32_t end){ - - u_int32_t current; - extent_node_t* node1; - - hfs_checktreelinks (hfsmp); - - for (current = start; current < end; current++) { - node1 = NULL; - int rbtree = hfs_isrbtree_allocated(hfsmp, current, 1, &node1); - int bitmap = hfs_isallocated(hfsmp, current, 1); - - if (bitmap != rbtree){ - panic("HFS: Allocator mismatch @ block %d -- bitmap %d : rbtree %d\n", - current, bitmap, rbtree); - } - } -} - -/* - * Exhaustive Red-Black Tree Linked List verification routine. - * - * This function iterates through the red-black tree's nodes, and then verifies that the linked list - * embedded within each of the nodes accurately points to the correct node as its "next" pointer. - * The bitmap lock must be held while this function is run. - */ - -void -hfs_checktreelinks (struct hfsmount *hfsmp) { - extent_tree_offset_t *tree = &hfsmp->offset_tree; - - extent_node_t *current = NULL; - extent_node_t *next = NULL; - extent_node_t *treenext; - - current = extent_tree_off_first (tree); - - while (current) { - next = current->offset_next; - treenext = extent_tree_off_next (tree, current); - if (next != treenext) { - panic("hfs_checktreelinks: mismatch for node (%p), next: %p , treenext %p !\n", current, next, treenext); - } - current = treenext; - } -} - -#endif - - -#if CONFIG_HFS_ALLOC_RBTREE -/* - * Test to see if any free blocks exist at a given offset. - * If there exists a node at the specified offset, it will return the appropriate - * node. - * - * NULL indicates allocated blocks exist at that offset. - * - * Allocation file lock must be held. - * - * Returns: - * 1 if blocks in the range are allocated. - * 0 if all blocks in the range are free. - */ - -static int -hfs_isrbtree_allocated (struct hfsmount *hfsmp, u_int32_t startBlock, - u_int32_t numBlocks, extent_node_t **ret_node) { - - extent_node_t search_sentinel; - extent_node_t *node = NULL; - extent_node_t *nextnode = NULL; - - /* - * With only one tree, then we just have to validate that there are entries - * in the R/B tree at the specified offset if it really is free. - */ - search_sentinel.offset = startBlock; - search_sentinel.length = numBlocks; - - node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel); - if (node) { - - *ret_node = node; - nextnode = extent_tree_off_next (&hfsmp->offset_tree, node); - if (nextnode != node->offset_next) { - panic ("hfs_rbtree_isallocated: Next pointers out of sync!\n"); - } - - /* - * Check to see if it is a superset of our target range. Because we started - * with the offset or some offset prior to it, then we know the node's offset is - * at least <= startBlock. So, if the end of the node is greater than the end of - * our target range, then the whole range is free. - */ - - if ((node->offset + node->length) >= (startBlock + numBlocks)) { - if (node->offset > startBlock) { - panic ("hfs_rbtree_isallocated: bad node ordering!"); - } - return 0; - } - } - /* - * We got here if either our node search resulted in a node whose extent - * was strictly before our target offset, or we couldnt' find a previous node - * at all (the beginning of the volume). If the former, then we can infer that - * at least one block in the target range is allocated since the next node's offset - * must be greater than startBlock. - * - * Either way, this means that the target node is unavailable to allocate, so - * just return 1; - */ - return 1; -} - - -#endif - /* * Count number of bits set in the given 32-bit unsigned number * @@ -3591,7 +3285,7 @@ hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock, uintptr_t blockRef; u_int32_t bitsPerBlock; u_int32_t wordsPerBlock; - u_int32_t blockCount = 0; + u_int32_t blockCount = 0; int error; if (hfs_kdebug_allocation & HFSDBG_BITMAP_ENABLED) @@ -3609,7 +3303,7 @@ hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock, */ { u_int32_t wordIndexInBlock; - + bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte; wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord; @@ -3617,7 +3311,7 @@ hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock, currentWord = buffer + wordIndexInBlock; wordsLeft = wordsPerBlock - wordIndexInBlock; } - + /* * First test any non word aligned bits. */ @@ -3648,7 +3342,7 @@ hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock, if (wordsLeft == 0) { /* Read in the next bitmap block. */ startingBlock += bitsPerBlock; - + buffer = NULL; error = ReleaseBitmapBlock(hfsmp, blockRef, false); if (error) goto Exit; @@ -3671,7 +3365,7 @@ hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock, ++currentWord; --wordsLeft; } - + /* * Test any remaining blocks. */ @@ -3680,7 +3374,7 @@ hfs_isallocated_internal(struct hfsmount *hfsmp, u_int32_t startingBlock, if (wordsLeft == 0) { /* Read in the next bitmap block */ startingBlock += bitsPerBlock; - + buffer = NULL; error = ReleaseBitmapBlock(hfsmp, blockRef, false); if (error) goto Exit; @@ -3725,7 +3419,7 @@ JustReturn: * 0 on success, non-zero on failure. * On failure, allocCount is zero. */ -int + int hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t numBlocks, u_int32_t *allocCount) { @@ -3748,7 +3442,7 @@ hfs_count_allocated(struct hfsmount *hfsmp, u_int32_t startBlock, * 0 if all blocks in the range are free. * 1 if blocks in the range are allocated, or there was an error. */ -int + int hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBlocks) { int error; @@ -3771,6 +3465,7 @@ hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBl } /* + * CONFIG_HFS_RBTREE * Check to see if the red-black tree is live. Allocation file lock must be held * shared or exclusive to call this function. Note that we may call this even if * HFS is built without activating the red-black tree code. @@ -3778,150 +3473,1094 @@ hfs_isallocated(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t numBl __private_extern__ int hfs_isrbtree_active(struct hfsmount *hfsmp){ - - //TODO: Update this function to deal with a truncate/resize coming in when the tree - //isn't fully finished. maybe we need to check the flags for something other than ENABLED? - -#if CONFIG_HFS_ALLOC_RBTREE - if (ALLOC_DEBUG) { - REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false); + +#pragma unused (hfsmp) + + /* Just return 0 for now */ + return 0; +} + + + +/* Summary Table Functions */ +/* + * hfs_check_summary: + * + * This function should be used to query the summary table to see if we can + * bypass a bitmap block or not when we're trying to find a free allocation block. + * + * + * Inputs: + * allocblock - allocation block number. Will be used to infer the correct summary bit. + * hfsmp -- filesystem in question. + * + * Output Arg: + * *freeblocks - set to 1 if we believe at least one free blocks in this vcbVBMIOSize + * page of bitmap file. + * + * + * Returns: + * 0 on success + * EINVAL on error + * + */ + +static int hfs_check_summary (struct hfsmount *hfsmp, uint32_t allocblock, uint32_t *freeblocks) { + + int err = EINVAL; + if (hfsmp->vcbVBMIOSize) { + if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { + uint32_t index; + if (hfs_get_summary_index (hfsmp, allocblock, &index)) { + *freeblocks = 0; + return EINVAL; + } + + /* Ok, now that we have the bit index into the array, what byte is it in ? */ + uint32_t byteindex = index / kBitsPerByte; + uint8_t current_byte = hfsmp->hfs_summary_table[byteindex]; + uint8_t bit_in_byte = index % kBitsPerByte; + + if (current_byte & (1 << bit_in_byte)) { + /* + * We do not believe there is anything free in the + * entire vcbVBMIOSize'd block. + */ + *freeblocks = 0; + } + else { + /* Looks like there might be a free block here... */ + *freeblocks = 1; + } + } + err = 0; } - if (hfsmp){ - - if (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ENABLED) { - return 1; + + return err; +} + + +#if 0 +/* + * hfs_get_next_summary + * + * From a given allocation block, jump to the allocation block at the start of the + * next vcbVBMIOSize boundary. This is useful when trying to quickly skip over + * large swaths of bitmap once we have determined that the bitmap is relatively full. + * + * Inputs: hfsmount, starting allocation block number + * Output Arg: *newblock will contain the allocation block number to start + * querying. + * + * Returns: + * 0 on success + * EINVAL if the block argument is too large to be used, or the summary table not live. + * EFBIG if there are no more summary bits to be queried + */ +static int +hfs_get_next_summary (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock) { + + u_int32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte; + u_int32_t start_offset; + u_int32_t next_offset; + int err = EINVAL; + + if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { + if ((err = hfs_get_summary_index(hfsmp, block, &start_offset))) { + return err; + } + + next_offset = start_offset++; + + if ((start_offset >= hfsmp->hfs_summary_size) || (next_offset >= hfsmp->hfs_summary_size)) { + /* Can't jump to the next summary bit. */ + return EINVAL; + } + + /* Otherwise, compute and return */ + *newblock = next_offset * bits_per_iosize; + if (*newblock >= hfsmp->totalBlocks) { + return EINVAL; } + err = 0; } -#else - #pragma unused (hfsmp) + + return err; +} + #endif - /* If the RB Tree code is not enabled, then just always return 0 */ + +/* + * hfs_release_summary + * + * Given an extent that is about to be de-allocated on-disk, determine the number + * of summary bitmap bits that need to be marked as 'potentially available'. + * Then go ahead and mark them as free. + * + * Inputs: + * hfsmp - hfs mount + * block - starting allocation block. + * length - length of the extent. + * + * Returns: + * EINVAL upon any errors. + */ +static int hfs_release_summary(struct hfsmount *hfsmp, uint32_t start_blk, uint32_t length) { + int err = EINVAL; + uint32_t end_blk = (start_blk + length) - 1; + + if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { + /* Figure out what the starting / ending block's summary bits are */ + uint32_t start_bit; + uint32_t end_bit; + uint32_t current_bit; + + err = hfs_get_summary_index (hfsmp, start_blk, &start_bit); + if (err) { + goto release_err; + } + err = hfs_get_summary_index (hfsmp, end_blk, &end_bit); + if (err) { + goto release_err; + } + + if (ALLOC_DEBUG) { + if (start_bit > end_bit) { + panic ("HFS: start > end!, %d %d ", start_bit, end_bit); + } + } + current_bit = start_bit; + while (current_bit <= end_bit) { + err = hfs_set_summary (hfsmp, current_bit, 0); + current_bit++; + } + } + +release_err: + return err; +} + +/* + * hfs_find_summary_free + * + * Given a allocation block as input, returns an allocation block number as output as a + * suggestion for where to start scanning the bitmap in order to find free blocks. It will + * determine the vcbVBMIOsize of the input allocation block, convert that into a summary + * bit, then keep iterating over the summary bits in order to find the first free one. + * + * Inputs: + * hfsmp - hfs mount + * block - starting allocation block + * newblock - output block as suggestion + * + * Returns: + * 0 on success + * ENOSPC if we could not find a free block + */ + +int hfs_find_summary_free (struct hfsmount *hfsmp, uint32_t block, uint32_t *newblock) { + + int err = ENOSPC; + uint32_t bit_index = 0; + uint32_t maybe_has_blocks = 0; + + if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { + uint32_t byte_index; + uint8_t curbyte; + uint8_t bit_in_byte; + uint32_t summary_cap; + + /* + * We generate a cap for the summary search because the summary table + * always represents a full summary of the bitmap FILE, which may + * be way more bits than are necessary for the actual filesystem + * whose allocations are mapped by the bitmap. + * + * Compute how much of hfs_summary_size is useable for the given number + * of allocation blocks eligible on this FS. + */ + err = hfs_get_summary_index (hfsmp, hfsmp->allocLimit, &summary_cap); + if (err) { + goto summary_exit; + } + + /* Check the starting block first */ + err = hfs_check_summary (hfsmp, block, &maybe_has_blocks); + if (err) { + goto summary_exit; + } + + if (maybe_has_blocks) { + /* + * It looks like the initial start block could have something. + * Short-circuit and just use that. + */ + *newblock = block; + goto summary_exit; + } + + /* + * OK, now we know that the first block was useless. + * Get the starting summary bit, and find it in the array + */ + maybe_has_blocks = 0; + err = hfs_get_summary_index (hfsmp, block, &bit_index); + if (err) { + goto summary_exit; + } + + /* Iterate until we find something. */ + while (bit_index <= summary_cap) { + byte_index = bit_index / kBitsPerByte; + curbyte = hfsmp->hfs_summary_table[byte_index]; + bit_in_byte = bit_index % kBitsPerByte; + + if (curbyte & (1 << bit_in_byte)) { + /* nothing here. increment and move on */ + bit_index++; + } + else { + /* + * found something! convert bit_index back into + * an allocation block for use. 'newblock' will now + * contain the proper allocation block # based on the bit + * index. + */ + err = hfs_get_summary_allocblock (hfsmp, bit_index, newblock); + if (err) { + goto summary_exit; + } + maybe_has_blocks = 1; + break; + } + } + + /* If our loop didn't find anything, set err to ENOSPC */ + if (maybe_has_blocks == 0) { + err = ENOSPC; + } + } + + /* If the summary table is not active for this mount, we'll just return ENOSPC */ +summary_exit: + if (maybe_has_blocks) { + err = 0; + } + + return err; +} + +/* + * hfs_get_summary_allocblock + * + * Convert a summary bit into an allocation block number to use to start searching for free blocks. + * + * Inputs: + * hfsmp - hfs mount + * summarybit - summmary bit index + * *alloc - allocation block number in the bitmap file. + * + * Output: + * 0 on success + * EINVAL on failure + */ +int hfs_get_summary_allocblock (struct hfsmount *hfsmp, uint32_t + summarybit, uint32_t *alloc) { + uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte; + uint32_t allocblk; + + allocblk = summarybit * bits_per_iosize; + + if (allocblk >= hfsmp->totalBlocks) { + return EINVAL; + } + else { + *alloc = allocblk; + } + + return 0; +} + + +/* + * hfs_set_summary: + * + * This function should be used to manipulate the summary table + * + * The argument 'inuse' will set the value of the bit in question to one or zero + * depending on its value. + * + * Inputs: + * hfsmp - hfs mount + * summarybit - the bit index into the summary table to set/unset. + * inuse - the value to assign to the bit. + * + * Returns: + * 0 on success + * EINVAL on error + * + */ + +static int hfs_set_summary (struct hfsmount *hfsmp, uint32_t summarybit, uint32_t inuse) { + + int err = EINVAL; + if (hfsmp->vcbVBMIOSize) { + if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { + + if (ALLOC_DEBUG) { + if (hfsmp->hfs_summary_table == NULL) { + panic ("hfs_set_summary: no table for %p ", hfsmp); + } + } + + /* Ok, now that we have the bit index into the array, what byte is it in ? */ + uint32_t byte_index = summarybit / kBitsPerByte; + uint8_t current_byte = hfsmp->hfs_summary_table[byte_index]; + uint8_t bit_in_byte = summarybit % kBitsPerByte; + + if (inuse) { + current_byte = (current_byte | (1 << bit_in_byte)); + } + else { + current_byte = (current_byte & ~(1 << bit_in_byte)); + } + + hfsmp->hfs_summary_table[byte_index] = current_byte; + } + err = 0; + } + + return err; +} + + +/* + * hfs_get_summary_index: + * + * This is a helper function which determines what summary bit represents the vcbVBMIOSize worth + * of IO against the bitmap file. + * + * Returns: + * 0 on success + * EINVAL on failure + */ +static int hfs_get_summary_index (struct hfsmount *hfsmp, uint32_t block, uint32_t* index) { + uint32_t summary_bit; + uint32_t bits_per_iosize; + int err = EINVAL; + + if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { + /* Is the input block bigger than the total number of blocks? */ + if (block >= hfsmp->totalBlocks) { + return EINVAL; + } + + /* Is there even a vbmIOSize set? */ + if (hfsmp->vcbVBMIOSize == 0) { + return EINVAL; + } + + bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte; + + summary_bit = block / bits_per_iosize; + + *index = summary_bit; + err = 0; + } + + return err; +} + +/* + * hfs_init_summary + * + * From a given mount structure, compute how big the summary table should be for the given + * filesystem, then allocate and bzero the memory. + * + * Returns: + * 0 on success + * EINVAL on failure + */ +int +hfs_init_summary (struct hfsmount *hfsmp) { + + uint32_t summary_size; + uint32_t summary_size_bytes; + uint8_t *summary_table; + + if (hfsmp->hfs_allocation_cp == NULL) { + if (ALLOC_DEBUG) { + printf("hfs: summary table cannot progress without a bitmap cnode! \n"); + } + return EINVAL; + } + /* + * The practical maximum size of the summary table is 16KB: + * + * (512MB maximum bitmap size / (4k -- min alloc block size)) / 8 bits/byte. + * + * HFS+ will allow filesystems with allocation block sizes smaller than 4k, but + * the end result is that we'll start to issue I/O in 2k or 1k sized chunks, which makes + * supporting this much worse. The math would instead look like this: + * (512MB / 2k) / 8 == 32k. + * + * So, we will disallow the summary table if the allocation block size is < 4k. + */ + + if (hfsmp->blockSize < HFS_MIN_SUMMARY_BLOCKSIZE) { + printf("hfs: summary table not allowed on FS with block size of %d\n", hfsmp->blockSize); + return EINVAL; + } + + summary_size = hfsmp->hfs_allocation_cp->c_blocks; + + if (ALLOC_DEBUG) { + printf("HFS Summary Table Initialization: Bitmap %u blocks\n", + hfsmp->hfs_allocation_cp->c_blocks); + } + + /* + * If the bitmap IO size is not the same as the allocation block size then + * then re-compute the number of summary bits necessary. Note that above, the + * the default size is the number of allocation blocks in the bitmap *FILE* + * (not the number of bits in the bitmap itself). If the allocation block size + * is large enough though, we may need to increase this. + */ + if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) { + uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize; + lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize; + + /* With a full bitmap and 64k-capped iosize chunks, this would be 64k */ + summary_size = (uint32_t) lrg_size; + } + + /* + * If the block size is the same as the IO Size, then the total number of blocks + * is already equal to the number of IO units, which is our number of summary bits. + */ + + summary_size_bytes = summary_size / kBitsPerByte; + /* Always add one byte, just in case we have a dangling number of bits */ + summary_size_bytes++; + + if (ALLOC_DEBUG) { + printf("HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, summary_size); + printf("HFS Summary Table Size (in bytes) %d \n", summary_size_bytes); + } + + /* Store the field in the mount point, and then MALLOC/bzero the memory */ + hfsmp->hfs_summary_size = summary_size; + hfsmp->hfs_summary_bytes = summary_size_bytes; + + MALLOC (summary_table, uint8_t*, summary_size_bytes, M_TEMP, M_WAITOK); + if (summary_table == NULL) { + return ENOMEM; + } + bzero (summary_table, summary_size_bytes); + + /* enable the summary table */ + hfsmp->hfs_flags |= HFS_SUMMARY_TABLE; + hfsmp->hfs_summary_table = summary_table; + + if (ALLOC_DEBUG) { + if (hfsmp->hfs_summary_table == NULL) { + panic ("HFS Summary Init: no table for %p\n", hfsmp); + } + } + return 0; +} + +/* + * hfs_rebuild_summary + * + * This function should be used to allocate a new hunk of memory for use as a summary + * table, then copy the existing data into it. We use it whenever the filesystem's size + * changes. When a resize is in progress, you can still use the extant summary + * table if it is active. + * + * Inputs: + * hfsmp -- FS in question + * newlength -- new length of the FS in allocation blocks. + * + * Outputs: + * 0 on success, EINVAL on failure. If this function fails, the summary table + * will be disabled for future use. + * + */ +static int hfs_rebuild_summary (struct hfsmount *hfsmp) { + + uint32_t new_summary_size; + + new_summary_size = hfsmp->hfs_allocation_cp->c_blocks; + + + if (ALLOC_DEBUG) { + printf("HFS Summary Table Re-init: bitmap %u blocks\n", new_summary_size); + } + + /* + * If the bitmap IO size is not the same as the allocation block size, then re-compute + * the number of summary bits necessary. Note that above, the default size is the number + * of allocation blocks in the bitmap *FILE* (not the number of bits that the bitmap manages). + * If the allocation block size is large enough though, we may need to increase this, as + * bitmap IO is capped at 64k per IO + */ + if (hfsmp->blockSize != hfsmp->vcbVBMIOSize) { + uint64_t lrg_size = (uint64_t) hfsmp->hfs_allocation_cp->c_blocks * (uint64_t) hfsmp->blockSize; + lrg_size = lrg_size / (uint64_t)hfsmp->vcbVBMIOSize; + + /* With a full bitmap and 64k-capped iosize chunks, this would be 64k */ + new_summary_size = (uint32_t) lrg_size; + } + + /* + * Ok, we have the new summary bitmap theoretical max size. See if it's the same as + * what we've got already... + */ + if (new_summary_size != hfsmp->hfs_summary_size) { + uint32_t summarybytes = new_summary_size / kBitsPerByte; + uint32_t copysize; + uint8_t *newtable; + /* Add one byte for slop */ + summarybytes++; + + if (ALLOC_DEBUG) { + printf("HFS Summary Table: vcbVBMIOSize %d summary bits %d \n", hfsmp->vcbVBMIOSize, new_summary_size); + printf("HFS Summary Table Size (in bytes) %d \n", summarybytes); + } + + /* Attempt to MALLOC the memory */ + MALLOC (newtable, uint8_t*, summarybytes, M_TEMP, M_WAITOK); + if (newtable == NULL) { + /* + * ERROR! We need to disable the table now + */ + FREE (hfsmp->hfs_summary_table, M_TEMP); + hfsmp->hfs_summary_table = NULL; + hfsmp->hfs_flags &= ~HFS_SUMMARY_TABLE; + return EINVAL; + } + bzero (newtable, summarybytes); + + /* + * The new table may be smaller than the old one. If this is true, then + * we can't copy the full size of the existing summary table into the new + * one. + * + * The converse is not an issue since we bzeroed the table above. + */ + copysize = hfsmp->hfs_summary_bytes; + if (summarybytes < hfsmp->hfs_summary_bytes) { + copysize = summarybytes; + } + memcpy (newtable, hfsmp->hfs_summary_table, copysize); + + /* We're all good. Destroy the old copy and update ptrs */ + FREE (hfsmp->hfs_summary_table, M_TEMP); + + hfsmp->hfs_summary_table = newtable; + hfsmp->hfs_summary_size = new_summary_size; + hfsmp->hfs_summary_bytes = summarybytes; + } + + return 0; +} + + +#if ALLOC_DEBUG +/* + * hfs_validate_summary + * + * Validation routine for the summary table. Debug-only function. + * + * Bitmap lock must be held. + * + */ +void hfs_validate_summary (struct hfsmount *hfsmp) { + uint32_t i; + int err; + + /* + * Iterate over all of the bits in the summary table, and verify if + * there really are free blocks in the pages that we believe may + * may contain free blocks. + */ + + if (hfsmp->hfs_summary_table == NULL) { + panic ("HFS Summary: No HFS summary table!"); + } + + /* 131072 bits == 16384 bytes. This is the theoretical max size of the summary table. we add 1 byte for slop */ + if (hfsmp->hfs_summary_size == 0 || hfsmp->hfs_summary_size > 131080) { + panic("HFS Summary: Size is bad! %d", hfsmp->hfs_summary_size); + } + + if (hfsmp->vcbVBMIOSize == 0) { + panic("HFS Summary: no VCB VBM IO Size !"); + } + + printf("hfs: summary validation beginning on %s\n", hfsmp->vcbVN); + printf("hfs: summary validation %d summary bits, %d summary blocks\n", hfsmp->hfs_summary_size, hfsmp->totalBlocks); + + + /* iterate through all possible summary bits */ + for (i = 0; i < hfsmp->hfs_summary_size ; i++) { + + uint32_t bits_per_iosize = hfsmp->vcbVBMIOSize * kBitsPerByte; + uint32_t byte_offset = hfsmp->vcbVBMIOSize * i; + + /* Compute the corresponding allocation block for the summary bit. */ + uint32_t alloc_block = i * bits_per_iosize; + + /* + * We use a uint32_t pointer here because it will speed up + * access to the real bitmap data on disk. + */ + uint32_t *block_data; + struct buf *bp; + int counter; + int counter_max; + int saw_free_bits = 0; + + /* Get the block */ + if ((err = ReadBitmapRange (hfsmp, byte_offset, hfsmp->vcbVBMIOSize, &block_data, &bp))) { + panic ("HFS Summary: error (%d) in ReadBitmapRange!", err); + } + + /* Query the status of the bit and then make sure we match */ + uint32_t maybe_has_free_blocks; + err = hfs_check_summary (hfsmp, alloc_block, &maybe_has_free_blocks); + if (err) { + panic ("HFS Summary: hfs_check_summary returned error (%d) ", err); + } + counter_max = hfsmp->vcbVBMIOSize / kBytesPerWord; + + for (counter = 0; counter < counter_max; counter++) { + uint32_t word = block_data[counter]; + + /* We assume that we'll not find any free bits here. */ + if (word != kAllBitsSetInWord) { + if (maybe_has_free_blocks) { + /* All done */ + saw_free_bits = 1; + break; + } + else { + panic ("HFS Summary: hfs_check_summary saw free bits!"); + } + } + } + + if (maybe_has_free_blocks && (saw_free_bits == 0)) { + panic ("HFS Summary: did not see free bits !"); + } + + /* Release the block. */ + if ((err = ReleaseScanBitmapRange (bp))) { + panic ("HFS Summary: Error (%d) in ReleaseScanBitmapRange", err); + } + } + + printf("hfs: summary validation completed successfully on %s\n", hfsmp->vcbVN); + + return; +} +#endif + +/* + * hfs_alloc_scan_range: + * + * This function should be used to scan large ranges of the allocation bitmap + * at one time. It makes two key assumptions: + * + * 1) Bitmap lock is held during the duration of the call (exclusive) + * 2) There are no pages in the buffer cache for any of the bitmap + * blocks that we may encounter. It *MUST* be completely empty. + * + * The expected use case is when we are scanning the bitmap in full while we are + * still mounting the filesystem in order to issue TRIMs or build up the summary + * table for the mount point. It should be done after any potential journal replays + * are completed and their I/Os fully issued. + * + * The key reason for assumption (2) above is that this function will try to issue + * I/O against the bitmap file in chunks as large a possible -- essentially as + * much as the buffer layer will handle (1MB). Because the size of these I/Os + * is larger than what would be expected during normal runtime we must invalidate + * the buffers as soon as we are done with them so that they do not persist in + * the buffer cache for other threads to find, as they'll typically be doing + * allocation-block size I/Os instead. + * + * Input Args: + * hfsmp - hfs mount data structure + * startbit - allocation block # to start our scan. It must be aligned + * on a vcbVBMIOsize boundary. + * list - journal trim list data structure for issuing TRIMs + * + * Output Args: + * bitToScan - Return the next bit to scan if this function is called again. + * Caller will supply this into the next invocation + * of this call as 'startbit'. + */ + +static int hfs_alloc_scan_range(struct hfsmount *hfsmp, u_int32_t startbit, + u_int32_t *bitToScan, struct jnl_trim_list *list) { + + int error; + int readwrite = 1; + u_int32_t curAllocBlock; + struct buf *blockRef = NULL; + u_int32_t *buffer = NULL; + u_int32_t free_offset = 0; //tracks the start of the current free range + u_int32_t size = 0; // tracks the length of the current free range. + u_int32_t iosize = 0; //how much io we should generate against the bitmap + u_int32_t byte_off; // byte offset into the bitmap file. + u_int32_t completed_size; // how much io was actually completed + u_int32_t last_bitmap_block; + u_int32_t current_word; + u_int32_t word_index = 0; + + /* summary table building */ + uint32_t summary_bit = 0; + uint32_t saw_free_blocks = 0; + uint32_t last_marked = 0; + + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + readwrite = 0; + } + + /* + * Compute how much I/O we should generate here. + * hfs_scan_range_size will validate that the start bit + * converted into a byte offset into the bitmap file, + * is aligned on a VBMIOSize boundary. + */ + error = hfs_scan_range_size (hfsmp, startbit, &iosize); + if (error) { + if (ALLOC_DEBUG) { + panic ("hfs_alloc_scan_range: hfs_scan_range_size error %d\n", error); + } + return error; + } + + if (iosize < hfsmp->vcbVBMIOSize) { + if (ALLOC_DEBUG) { + panic ("hfs_alloc_scan_range: iosize too small! (iosize %d)\n", iosize); + } + return EINVAL; + } + + /* hfs_scan_range_size should have verified startbit. Convert it to bytes */ + byte_off = startbit / kBitsPerByte; + + /* + * When the journal replays blocks, it does so by writing directly to the disk + * device (bypassing any filesystem vnodes and such). When it finishes its I/Os + * it also immediately re-reads and invalidates the range covered by the bp so + * it does not leave anything lingering in the cache (for iosize reasons). + * + * As such, it is safe to do large I/Os here with ReadBitmapRange. + * + * NOTE: It is not recommended, but it is possible to call the function below + * on sections of the bitmap that may be in core already as long as the pages are not + * dirty. In that case, we'd notice that something starting at that + * logical block of the bitmap exists in the metadata cache, and we'd check + * if the iosize requested is the same as what was already allocated for it. + * Odds are pretty good we're going to request something larger. In that case, + * we just free the existing memory associated with the buf and reallocate a + * larger range. This function should immediately invalidate it as soon as we're + * done scanning, so this shouldn't cause any coherency issues. + */ + + error = ReadBitmapRange(hfsmp, byte_off, iosize, &buffer, &blockRef); + if (error) { + if (ALLOC_DEBUG) { + panic ("hfs_alloc_scan_range: start %d iosize %d ReadBitmapRange error %d\n", startbit, iosize, error); + } + return error; + } + + /* + * At this point, we have a giant wired buffer that represents some portion of + * the bitmap file that we want to analyze. We may not have gotten all 'iosize' + * bytes though, so clip our ending bit to what we actually read in. + */ + completed_size = buf_count(blockRef); + last_bitmap_block = completed_size * kBitsPerByte; + last_bitmap_block = last_bitmap_block + startbit; + + /* Cap the last block to the total number of blocks if required */ + if (last_bitmap_block > hfsmp->totalBlocks) { + last_bitmap_block = hfsmp->totalBlocks; + } + + /* curAllocBlock represents the logical block we're analyzing. */ + curAllocBlock = startbit; + word_index = 0; + size = 0; + + if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { + if (hfs_get_summary_index (hfsmp, startbit, &summary_bit)) { + error = EINVAL; + if (ALLOC_DEBUG) { + panic ("hfs_alloc_scan_range: Could not acquire summary index for %u", startbit); + } + return error; + } + /* + * summary_bit should now be set to the summary bit corresponding to + * the allocation block of the first bit that we're supposed to scan + */ + } + saw_free_blocks = 0; + + while (curAllocBlock < last_bitmap_block) { + u_int32_t bit; + + /* Update the summary table as needed */ + if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) { + if (ALLOC_DEBUG) { + if (hfsmp->hfs_summary_table == NULL) { + panic ("hfs_alloc_scan_range: no summary table!"); + } + } + + uint32_t temp_summary; + error = hfs_get_summary_index (hfsmp, curAllocBlock, &temp_summary); + if (error) { + if (ALLOC_DEBUG) { + panic ("hfs_alloc_scan_range: could not get summary index for %u", curAllocBlock); + } + return EINVAL; + } + + if (ALLOC_DEBUG) { + if (temp_summary < summary_bit) { + panic ("hfs_alloc_scan_range: backwards summary bit?\n"); + } + } + + /* + * If temp_summary is greater than summary_bit, then this + * means that the next allocation block crosses a vcbVBMIOSize boundary + * and we should treat this range of on-disk data as part of a new summary + * bit. + */ + if (temp_summary > summary_bit) { + if (saw_free_blocks == 0) { + /* Mark the bit as totally consumed in the summary table */ + hfs_set_summary (hfsmp, summary_bit, 1); + } + else { + /* Mark the bit as potentially free in summary table */ + hfs_set_summary (hfsmp, summary_bit, 0); + } + last_marked = summary_bit; + /* + * Any time we set the summary table, update our counter which tracks + * what the last bit that was fully marked in the summary table. + * + * Then reset our marker which says we haven't seen a free bit yet. + */ + saw_free_blocks = 0; + summary_bit = temp_summary; + } + } /* End summary table conditions */ + + current_word = SWAP_BE32(buffer[word_index]); + /* Iterate through the word 1 bit at a time... */ + for (bit = 0 ; bit < kBitsPerWord ; bit++, curAllocBlock++) { + if (curAllocBlock >= last_bitmap_block) { + break; + } + u_int32_t allocated = (current_word & (kHighBitInWordMask >> bit)); + + if (allocated) { + if (size != 0) { + if (readwrite) { + /* Insert the previously tracked range of free blocks to the trim list */ + hfs_track_unmap_blocks (hfsmp, free_offset, size, list); + } + add_free_extent_cache (hfsmp, free_offset, size); + size = 0; + free_offset = 0; + } + } + else { + /* Not allocated */ + size++; + if (free_offset == 0) { + /* Start a new run of free spcae at curAllocBlock */ + free_offset = curAllocBlock; + } + if (saw_free_blocks == 0) { + saw_free_blocks = 1; + } + } + } /* end for loop iterating through the word */ + + if (curAllocBlock < last_bitmap_block) { + word_index++; + } + + } /* End while loop (iterates through last_bitmap_block) */ + + + /* + * We've (potentially) completed our pass through this region of bitmap, + * but one thing we may not have done is updated that last summary bit for + * the last page we scanned, because we would have never transitioned across + * a vcbVBMIOSize boundary again. Check for that and update the last bit + * as needed. + * + * Note that 'last_bitmap_block' is *not* inclusive WRT the very last bit in the bitmap + * for the region of bitmap on-disk that we were scanning. (it is one greater). + */ + if ((curAllocBlock >= last_bitmap_block) && + (hfsmp->hfs_flags & HFS_SUMMARY_TABLE)) { + uint32_t temp_summary; + /* temp_block should be INSIDE the region we just scanned, so subtract 1 */ + uint32_t temp_block = last_bitmap_block - 1; + error = hfs_get_summary_index (hfsmp, temp_block, &temp_summary); + if (error) { + if (ALLOC_DEBUG) { + panic ("hfs_alloc_scan_range: end bit curAllocBlock %u, last_bitmap_block %u", curAllocBlock, last_bitmap_block); + } + return EINVAL; + } + + /* Did we already update this in the table? */ + if (temp_summary > last_marked) { + if (saw_free_blocks == 0) { + hfs_set_summary (hfsmp, temp_summary, 1); + } + else { + hfs_set_summary (hfsmp, temp_summary, 0); + } + } + } + + /* + * We may have been tracking a range of free blocks that hasn't been inserted yet. + * Keep the logic for the TRIM and free extent separate from that of the summary + * table management even though they are closely linked. + */ + if (size != 0) { + if (readwrite) { + hfs_track_unmap_blocks (hfsmp, free_offset, size, list); + } + add_free_extent_cache (hfsmp, free_offset, size); + } + + /* + * curAllocBlock represents the next block we need to scan when we return + * to this function. + */ + *bitToScan = curAllocBlock; + ReleaseScanBitmapRange(blockRef); + return 0; + } -/* - * This function scans the specified bitmap block and acts on it as necessary. - * We may add it to the list of blocks to be UNMAP/TRIM'd or add it to allocator - * data structures. This function is not #if'd to the CONFIG_RB case because - * we want to use it unilaterally at mount time if on a UNMAP-capable device. - * - * Additionally, we may want an allocating thread to invoke this if the tree - * does not have enough extents to satisfy an allocation request. + +/* + * Compute the maximum I/O size to generate against the bitmap file + * Will attempt to generate at LEAST VBMIOsize I/Os for interior ranges of the bitmap. * - * startbit - the allocation block represented by a bit in 'allocblock' where we need to - * start our scan. For instance, we may need to start the normal allocation scan - * in the middle of an existing allocation block. - * endBit - the allocation block where we should end this search (inclusive). - * bitToScan - output argument for this function to specify the next bit to scan. + * Inputs: + * hfsmp -- hfsmount to look at + * bitmap_off -- bit offset into the bitmap file + * + * Outputs: + * iosize -- iosize to generate. * * Returns: - * 0 on success - * nonzero on failure. + * 0 on success; EINVAL otherwise */ +static int hfs_scan_range_size (struct hfsmount *hfsmp, uint32_t bitmap_st, uint32_t *iosize) { -static int hfs_alloc_scan_block(struct hfsmount *hfsmp, u_int32_t startbit, - u_int32_t endBit, u_int32_t *bitToScan, - struct jnl_trim_list *list) { - - int error; - u_int32_t curAllocBlock; - struct buf *blockRef = NULL; - u_int32_t *buffer = NULL; - u_int32_t wordIndexInBlock; - u_int32_t blockSize = (u_int32_t)hfsmp->vcbVBMIOSize; - u_int32_t wordsPerBlock = blockSize / kBytesPerWord; - u_int32_t offset = 0; - u_int32_t size = 0; - /* - * Read the appropriate block from the bitmap file. ReadBitmapBlock - * figures out which actual on-disk block corresponds to the bit we're - * looking at. - */ - error = ReadBitmapBlock(hfsmp, startbit, &buffer, (uintptr_t*)&blockRef); - if (error) { - return error; + * The maximum bitmap size is 512MB regardless of ABN size, so we can get away + * with 32 bit math in this function. + */ + + uint32_t bitmap_len; + uint32_t remaining_bitmap; + uint32_t target_iosize; + uint32_t bitmap_off; + + /* Is this bit index not word aligned? If so, immediately fail. */ + if (bitmap_st % kBitsPerWord) { + if (ALLOC_DEBUG) { + panic ("hfs_scan_range_size unaligned start bit! bitmap_st %d \n", bitmap_st); + } + return EINVAL; } - - /* curAllocBlock represents the logical block we're analyzing. */ - curAllocBlock = startbit; - - /* Figure out which word curAllocBlock corresponds to in the block we read */ - wordIndexInBlock = (curAllocBlock / kBitsPerWord) % wordsPerBlock; - - /* Scan a word at a time */ - while (wordIndexInBlock < wordsPerBlock) { - u_int32_t currentWord = SWAP_BE32(buffer[wordIndexInBlock]); - u_int32_t curBit; - - /* modulate curBit because it may start in the middle of a word */ - for (curBit = curAllocBlock % kBitsPerWord; curBit < kBitsPerWord; curBit++) { - - u_int32_t is_allocated = currentWord & (1 << (kBitsWithinWordMask - curBit)); - if (ALLOC_DEBUG) { - u_int32_t res = hfs_isallocated_scan (hfsmp, curAllocBlock, buffer); - if ( ((res) && (!is_allocated)) || ((!res) && (is_allocated))) { - panic("hfs_alloc_scan: curAllocBit %u, curBit (%d), word (0x%x), is_allocated (0x%x) res(0x%x) \n", - curAllocBlock, curBit, currentWord, is_allocated, res); - } - } - /* - * If curBit is not allocated, keep track of the start of the free range. - * Increment a running tally on how many free blocks in a row we've seen. - */ - if (!is_allocated) { - size++; - if (offset == 0) { - offset = curAllocBlock; - } - } - else { - /* - * If we hit an allocated block, insert the extent that tracked the range - * we saw, and reset our tally counter. - */ - if (size != 0) { -#if CONFIG_HFS_ALLOC_RBTREE - extent_tree_free_space(&hfsmp->offset_tree, size, offset); -#endif - hfs_track_unmap_blocks (hfsmp, offset, size, list); - size = 0; - offset = 0; - } - } - curAllocBlock++; - /* - * Exit early if the next bit we'd analyze would take us beyond the end of the - * range that we're supposed to scan. - */ - if (curAllocBlock >= endBit) { - goto DoneScanning; - } + + /* bitmap_off is in bytes, not allocation blocks/bits */ + bitmap_off = bitmap_st / kBitsPerByte; + + if ((hfsmp->totalBlocks <= bitmap_st) || (bitmap_off > (512 * 1024 * 1024))) { + if (ALLOC_DEBUG) { + panic ("hfs_scan_range_size: invalid start! bitmap_st %d, bitmap_off %d\n", bitmap_st, bitmap_off); } - wordIndexInBlock++; + return EINVAL; } -DoneScanning: - - /* We may have been tracking a range of free blocks that hasn't been inserted yet. */ - if (size != 0) { -#if CONFIG_HFS_ALLOC_RBTREE - extent_tree_free_space(&hfsmp->offset_tree, size, offset); -#endif - hfs_track_unmap_blocks (hfsmp, offset, size, list); + + /* + * Also invalid if it's not at least aligned to HFS bitmap logical + * block boundaries. We don't have to emit an iosize that's an + * exact multiple of the VBMIOSize, but it must start on such + * a boundary. + * + * The vcbVBMIOSize may be SMALLER than the allocation block size + * on a FS with giant allocation blocks, but it will never be + * greater than it, so it should be safe to start I/O + * aligned on a VBMIOsize boundary. + */ + if (bitmap_off & (hfsmp->vcbVBMIOSize - 1)) { + if (ALLOC_DEBUG) { + panic ("hfs_scan_range_size: unaligned start! bitmap_off %d\n", bitmap_off); + } + return EINVAL; } + /* - * curAllocBlock represents the next block we need to scan while we're in this - * function. + * Generate the total bitmap file length in bytes, then round up + * that value to the end of the last allocation block, if needed (It + * will probably be needed). We won't scan past the last actual + * allocation block. + * + * Unless we're completing the bitmap scan (or bitmap < 1MB), we + * have to complete the I/O on VBMIOSize boundaries, but we can only read + * up until the end of the bitmap file. */ - *bitToScan = curAllocBlock; - - ReleaseScanBitmapBlock(blockRef); - + bitmap_len = hfsmp->totalBlocks / kBitsPerByte; + if (bitmap_len % (hfsmp->blockSize)) { + bitmap_len = (bitmap_len / hfsmp->blockSize); + /* round up to the end of the next alloc block */ + bitmap_len++; + + /* Convert the # of alloc blocks back to bytes. */ + bitmap_len = bitmap_len * hfsmp->blockSize; + } + + remaining_bitmap = bitmap_len - bitmap_off; + + /* + * io size is the MIN of the maximum I/O we can generate or the + * remaining amount of bitmap. + */ + target_iosize = MIN((MAXBSIZE), remaining_bitmap); + *iosize = target_iosize; + return 0; } + + /* * This function is basically the same as hfs_isallocated, except it's designed for * use with the red-black tree validation code. It assumes we're only checking whether @@ -3931,7 +4570,7 @@ DoneScanning: * This should not be called in general purpose scanning code. */ int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int32_t *bp_buf) { - + u_int32_t *currentWord; // Pointer to current word within bitmap block u_int32_t bitMask; // Word with given bits already set (ready to test) u_int32_t firstBit; // Bit index within word of first bit to allocate @@ -3944,8 +4583,8 @@ int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int3 int inuse = 0; int error; - - + + if (bp_buf) { /* just use passed-in buffer if avail. */ buffer = bp_buf; @@ -3958,18 +4597,18 @@ int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int3 if (error) return (error); } - + /* * Initialize currentWord, and wordsLeft. */ u_int32_t wordIndexInBlock; - + bitsPerBlock = hfsmp->vcbVBMIOSize * kBitsPerByte; wordsPerBlock = hfsmp->vcbVBMIOSize / kBytesPerWord; - + wordIndexInBlock = (startingBlock & (bitsPerBlock-1)) / kBitsPerWord; currentWord = buffer + wordIndexInBlock; - + /* * First test any non word aligned bits. */ @@ -3986,7 +4625,7 @@ int hfs_isallocated_scan(struct hfsmount *hfsmp, u_int32_t startingBlock, u_int3 } numBlocks -= numBits; ++currentWord; - + Exit: if(bp_buf == NULL) { if (buffer) { @@ -3994,150 +4633,11 @@ Exit: } } return (inuse); - - - -} - -#if CONFIG_HFS_ALLOC_RBTREE - -/* - * Extern function that is called from mount and upgrade mount routines - * that enable us to initialize the tree. - */ - -__private_extern__ -u_int32_t InitTree(struct hfsmount *hfsmp) { - extent_tree_init (&(hfsmp->offset_tree)); - return 0; -} -/* - * This function builds the trees specified in its arguments. It uses - * buf_meta_breads to scan through the bitmap and re-build the tree state. - * It is very important to use buf_meta_bread because we need to ensure that we - * read the most current version of the blocks that we're scanning. If we used - * cluster_io, then journaled transactions could still be sitting in RAM since they are - * written to disk in the proper location asynchronously. - * - * Because this could be still running when mount has finished, we need to check - * after every allocation block that we're working on if an unmount or some other - * operation that would cause us to teardown has come in. (think downgrade mount). - * If an unmount has come in, then abort whatever we're doing and return -1 - * to indicate we hit an error. If we don't do this, we'd hold up unmount for - * a very long time. - * - * This function assumes that the bitmap lock is acquired exclusively before being - * called. It will drop the lock and then re-acquire it during operation, but - * will always return with the lock held. - */ -__private_extern__ -u_int32_t GenerateTree(struct hfsmount *hfsmp, u_int32_t endBlock, int *flags, int initialscan) { - - REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false); - - u_int32_t *cur_block_eof; - int error = 0; - - int USE_FINE_GRAINED_LOCKING = 0; - - /* Initialize the block counter while we hold the bitmap lock */ - cur_block_eof = &hfsmp->offset_block_end; - - /* - * This loop advances over all allocation bitmap blocks of the current region - * to scan them and add the results into the red-black tree. We use the mount point - * variable offset_block_end as our loop counter. This gives us flexibility - * because we can release the allocation bitmap lock and allow a thread that wants - * to make an allocation to grab the lock and do some scanning on our behalf while we're - * waiting to re-acquire the lock. Then, the allocating thread will only do as much bitmap - * scanning as needed to fulfill its allocation. - * - * If the other thread does IO for us, then it will update the offset_block_end - * variable as well, since it will use the same hfs_alloc_scan_block function to do its bit - * scanning. So when we re-grab the lock, our current EOF/loop will immediately skip us to the next - * block that needs scanning. - */ - - while (*cur_block_eof < endBlock) { - /* - * If the filesystem is being resized before the bitmap has been fully scanned, we'll - * update our endBlock to match the current allocation limit in the hfsmp struct. - * The allocLimit field would only be be updated while holding the bitmap lock, so we won't - * be executing this code at the same time that the resize is going on. - */ - if ((initialscan) && (endBlock != hfsmp->allocLimit)) { - - /* If we're past the new/modified allocLimit, then just stop immediately.*/ - if (*cur_block_eof >= hfsmp->allocLimit ) { - break; - } - endBlock = hfsmp->allocLimit; - } - - /* - * TODO: fix unmount stuff! - * See rdar://7391404 - * - * Once the RB allocator is checked in, we'll want to augment it to not hold the - * allocation bitmap lock for the entire duration of the tree scan. For a first check-in - * it's ok to do that but we can't leave it like that forever. - * - * The gist of the new algorithm will work as follows: - * if an unmount is in flight and has been detected: - * abort tree-build. - * unset tree-in-progress bit. - * wakeup unmount thread - * unlock allocation bitmap lock, fail out. - * - * The corresponding code in the unmount side should already be in place. - */ - - error = hfs_alloc_scan_block (hfsmp, *cur_block_eof, endBlock, cur_block_eof); - - //TODO: Fix this below! - if (USE_FINE_GRAINED_LOCKING){ - hfs_systemfile_unlock(hfsmp, *flags); - *flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); - } - //TODO: Infer that if *flags == 0, we don't actually need to lock/unlock. - } - - return error; } -/* - * This function destroys the specified rb-trees associated with the mount point. - */ -__private_extern__ -void DestroyTrees(struct hfsmount *hfsmp) { - - if (ALLOC_DEBUG) { - REQUIRE_FILE_LOCK(hfsmp->hfs_allocation_vp, false); - printf("DestroyTrees: Validating red/black tree for vol %s\n", (char*) hfsmp->vcbVN); - hfs_validate_rbtree (hfsmp, 0, hfsmp->offset_block_end ); - } - - /* - * extent_tree_destroy will start with the first entry in the tree (by offset), then - * iterate through the tree quickly using its embedded linked list. This results in tree - * destruction in O(n) time. - */ - - if (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ENABLED) { - extent_tree_destroy(&hfsmp->offset_tree); - - /* Mark Trees as disabled */ - hfsmp->extent_tree_flags &= ~HFS_ALLOC_RB_ENABLED; - } - - return; -} - -#endif - /* * This function resets all of the data structures relevant to the * free extent cache stored in the hfsmount struct. @@ -4160,16 +4660,16 @@ void ResetVCBFreeExtCache(struct hfsmount *hfsmp) KERNEL_DEBUG_CONSTANT(HFSDBG_RESET_EXTENT_CACHE | DBG_FUNC_START, 0, 0, 0, 0, 0); lck_spin_lock(&hfsmp->vcbFreeExtLock); - + /* reset Free Extent Count */ hfsmp->vcbFreeExtCnt = 0; - + /* reset the actual array */ bytes = kMaxFreeExtents * sizeof(HFSPlusExtentDescriptor); freeExt = (void*)(hfsmp->vcbFreeExt); - + bzero (freeExt, bytes); - + lck_spin_unlock(&hfsmp->vcbFreeExtLock); if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED) @@ -4198,10 +4698,9 @@ void ResetVCBFreeExtCache(struct hfsmount *hfsmp) */ __private_extern__ u_int32_t UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block) { - + /* - * Update allocLimit to the argument specified, but don't do anything else - * if the red/black tree is not enabled. + * Update allocLimit to the argument specified */ hfsmp->allocLimit = new_end_block; @@ -4211,139 +4710,8 @@ u_int32_t UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block) { */ ResetVCBFreeExtCache(hfsmp); -#if CONFIG_HFS_ALLOC_RBTREE - /* Shrinking the existing filesystem */ - if ((new_end_block < hfsmp->offset_block_end) && - (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE)) { - extent_node_t search_sentinel; - extent_node_t *node = NULL; - /* Remover points to the current item to free/remove from the tree */ - extent_node_t *remover = NULL; - - /* Begin search at the specified offset */ - memset (&search_sentinel, 0, sizeof(extent_node_t)); - search_sentinel.offset = new_end_block; - - /* - * Find the first available extent that satifies the allocation by searching - * from the starting point or 1 earlier. We may need to split apart an existing node - * if it straddles the new alloc limit. - */ - node = extent_tree_off_search_prev(&hfsmp->offset_tree, &search_sentinel); - if (node) { - /* If it's an exact match, then just remove them all from this point forward */ - if (node->offset == new_end_block) { - /* - * Find the previous entry and update its next pointer to NULL - * since this entry is biting the dust. Update remover to node. - */ - extent_node_t *prev = NULL; - prev = extent_tree_off_prev (&hfsmp->offset_tree, node); - if (prev) { - prev->offset_next = NULL; - } - remover = node; - } - else { - /* See if we need to split this node */ - if ((node->offset + node->length) > new_end_block) { - /* - * Update node to reflect its new size up until new_end_block. - */ - remover = node->offset_next; - node->length = new_end_block - node->offset; - /* node is becoming the last free extent in the volume. */ - node->offset_next = NULL; - } - else { - if (node->offset_next == NULL) { - /* - * 'node' points to the last free extent in the volume. - * Coincidentally, it is also before the new cut-off point at which - * we will stop representing bitmap values in the tree. Just bail out now. - */ - return 0; - } - /* - * Otherwise, point our temp variable 'remover' to the node where - * we'll need to start yanking things out of the tree, and make 'node' - * the last element in the tree in the linked list. - */ - remover = node->offset_next; - if (remover->offset <= new_end_block) { - panic ("UpdateAllocLimit: Invalid RBTree node next ptr!"); - } - node->offset_next = NULL; - } - } - - /* - * Remover is our "temp" pointer that points to the current node to remove from - * the offset tree. We'll simply iterate through the tree linked list, removing the current - * element from the tree, freeing them as we come across them. - */ - while (remover) { - extent_node_t *next = remover->offset_next; - extent_tree_remove_node (&hfsmp->offset_tree, remover); - free_node (remover); - remover = next; - } - - if (ALLOC_DEBUG) { - printf ("UpdateAllocLimit: Validating rbtree after truncation\n"); - hfs_validate_rbtree (hfsmp, 0, new_end_block-1); - } - - /* - * Don't forget to shrink offset_block_end after a successful truncation - * new_end_block should represent the number of blocks available on the - * truncated volume. - */ - - hfsmp->offset_block_end = new_end_block; - - return 0; - } - else { - if (ALLOC_DEBUG) { - panic ("UpdateAllocLimit: no prev!"); - } - return ENOSPC; - } - } - /* Growing the existing filesystem */ - else if ((new_end_block > hfsmp->offset_block_end) && - (hfsmp->extent_tree_flags & HFS_ALLOC_RB_ACTIVE)) { - int flags = 0; - int retval = 0; - - if (ALLOC_DEBUG) { - printf ("UpdateAllocLimit: Validating rbtree prior to growth\n"); - hfs_validate_rbtree (hfsmp, 0, hfsmp->offset_block_end); - } - - - retval = GenerateTree (hfsmp, new_end_block, &flags, 0); - - /* - * Don't forget to update offset_block_end after a successful tree extension. - */ - if (retval == 0) { - - if (ALLOC_DEBUG) { - printf ("UpdateAllocLimit: Validating rbtree after growth\n"); - hfs_validate_rbtree (hfsmp, 0, new_end_block); - } - - hfsmp->offset_block_end = new_end_block; - } - - return retval; - } - /* Otherwise, do nothing. fall through to the code below. */ - printf ("error : off_block_end: %d, alloclimit: %d, new_end_block: %d\n", - hfsmp->offset_block_end,hfsmp->allocLimit, new_end_block); -#endif + /* Force a rebuild of the summary table. */ + (void) hfs_rebuild_summary (hfsmp); return 0; @@ -4404,17 +4772,17 @@ static void remove_free_extent_list(struct hfsmount *hfsmp, int index) static int add_free_extent_list(struct hfsmount *hfsmp, u_int32_t startBlock, u_int32_t blockCount) { uint32_t i; - + /* ALLOC_DEBUG: Make sure no extents in the list overlap or are contiguous with the input extent. */ if (ALLOC_DEBUG) { uint32_t endBlock = startBlock + blockCount; for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) { if (endBlock < hfsmp->vcbFreeExt[i].startBlock || - startBlock > (hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount)) { - continue; + startBlock > (hfsmp->vcbFreeExt[i].startBlock + hfsmp->vcbFreeExt[i].blockCount)) { + continue; } panic("hfs: add_free_extent_list: %p: extent(%u %u) overlaps existing extent (%u %u) at index %d", - hfsmp, startBlock, blockCount, hfsmp->vcbFreeExt[i].startBlock, hfsmp->vcbFreeExt[i].blockCount, i); + hfsmp, startBlock, blockCount, hfsmp->vcbFreeExt[i].startBlock, hfsmp->vcbFreeExt[i].blockCount, i); } } @@ -4432,7 +4800,7 @@ static int add_free_extent_list(struct hfsmount *hfsmp, u_int32_t startBlock, u_ } } } - + /* When we get here, i is the index where the extent should be inserted. */ if (i == kMaxFreeExtents) { /* @@ -4441,13 +4809,13 @@ static int add_free_extent_list(struct hfsmount *hfsmp, u_int32_t startBlock, u_ */ return i; } - + /* * Grow the list (if possible) to make room for an insert. */ if (hfsmp->vcbFreeExtCnt < kMaxFreeExtents) hfsmp->vcbFreeExtCnt++; - + /* * If we'll be keeping any extents after the insert position, then shift them. */ @@ -4455,7 +4823,7 @@ static int add_free_extent_list(struct hfsmount *hfsmp, u_int32_t startBlock, u_ if (shift_count > 0) { memmove(&hfsmp->vcbFreeExt[i+1], &hfsmp->vcbFreeExt[i], shift_count * sizeof(hfsmp->vcbFreeExt[0])); } - + /* Finally, store the new extent at its correct position. */ hfsmp->vcbFreeExt[i].startBlock = startBlock; hfsmp->vcbFreeExt[i].blockCount = blockCount; @@ -4482,21 +4850,14 @@ static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBloc u_int32_t i, insertedIndex; u_int32_t currentStart, currentEnd, endBlock; int extentsRemoved = 0; - -#if CONFIG_HFS_ALLOC_RBTREE - /* If red-black tree is enabled, no free extent cache is necessary */ - if (hfs_isrbtree_active(hfsmp) == true) { - return; - } -#endif - + if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0); - + endBlock = startBlock + blockCount; - + lck_spin_lock(&hfsmp->vcbFreeExtLock); - + /* * Iterate over all of the extents in the free extent cache, removing or * updating any entries that overlap with the input extent. @@ -4504,7 +4865,7 @@ static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBloc for (i = 0; i < hfsmp->vcbFreeExtCnt; ++i) { currentStart = hfsmp->vcbFreeExt[i].startBlock; currentEnd = currentStart + hfsmp->vcbFreeExt[i].blockCount; - + /* * If the current extent is entirely before or entirely after the * the extent to be removed, then we keep it as-is. @@ -4512,14 +4873,14 @@ static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBloc if (currentEnd <= startBlock || currentStart >= endBlock) { continue; } - + /* * If the extent being removed entirely contains the current extent, * then remove the current extent. */ if (startBlock <= currentStart && endBlock >= currentEnd) { remove_free_extent_list(hfsmp, i); - + /* * We just removed the extent at index i. The extent at * index i+1 just got shifted to index i. So decrement i @@ -4531,7 +4892,7 @@ static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBloc ++extentsRemoved; continue; } - + /* * If the extent being removed is strictly "in the middle" of the * current extent, then we need to split the current extent into @@ -4545,7 +4906,7 @@ static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBloc add_free_extent_list(hfsmp, endBlock, currentEnd - endBlock); break; } - + /* * The only remaining possibility is that the extent to be removed * overlaps the start or end (but not both!) of the current extent. @@ -4571,14 +4932,14 @@ static void remove_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBloc --i; /* Undo the "++i" in the loop, so we examine the entry at index i again. */ } } - + lck_spin_unlock(&hfsmp->vcbFreeExtLock); - + sanity_check_free_ext(hfsmp, 0); - + if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_REMOVE_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, extentsRemoved, 0); - + return; } @@ -4610,36 +4971,22 @@ static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBloc uint32_t endBlock; uint32_t currentEnd; uint32_t i; - + if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_START, startBlock, blockCount, 0, 0, 0); - - /* - * If using the red-black tree allocator, then there's no need to special case - * for the sparse device case. We'll simply add the region we've recently freed - * to the red-black tree, where it will get sorted by offset and length. The only special - * casing will need to be done on the allocation side, where we may favor free extents - * based on offset even if it will cause fragmentation. This may be true, for example, if - * we are trying to reduce the number of bandfiles created in a sparse bundle disk image. - */ -#if CONFIG_HFS_ALLOC_RBTREE - if (hfs_isrbtree_active(hfsmp) == true) { - goto out_not_locked; - } -#endif - + /* No need to add extent that is beyond current allocLimit */ if (startBlock >= hfsmp->allocLimit) { goto out_not_locked; } - + /* If end of the free extent is beyond current allocLimit, clip the extent */ if ((startBlock + blockCount) > hfsmp->allocLimit) { blockCount = hfsmp->allocLimit - startBlock; } - + lck_spin_lock(&hfsmp->vcbFreeExtLock); - + /* * Make a pass through the free extent cache, looking for known extents that * overlap or are contiguous with the extent to be added. We'll remove those @@ -4657,7 +5004,7 @@ static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBloc startBlock = hfsmp->vcbFreeExt[i].startBlock; if (currentEnd > endBlock) endBlock = currentEnd; - + remove_free_extent_list(hfsmp, i); /* * We just removed the extent at index i. The extent at @@ -4670,15 +5017,15 @@ static Boolean add_free_extent_cache(struct hfsmount *hfsmp, u_int32_t startBloc } } add_free_extent_list(hfsmp, startBlock, endBlock - startBlock); - + lck_spin_unlock(&hfsmp->vcbFreeExtLock); - + out_not_locked: sanity_check_free_ext(hfsmp, 0); - + if (hfs_kdebug_allocation & HFSDBG_EXT_CACHE_ENABLED) KERNEL_DEBUG_CONSTANT(HFSDBG_ADD_EXTENT_CACHE | DBG_FUNC_END, 0, 0, 0, retval, 0); - + return retval; } @@ -4687,16 +5034,16 @@ static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated) { u_int32_t i, j; - /* Do not do anything if debug is not on, or if we're using the red-black tree */ - if ((ALLOC_DEBUG == 0) || (hfs_isrbtree_active(hfsmp) == true)) { + /* Do not do anything if debug is not on */ + if (ALLOC_DEBUG == 0) { return; } lck_spin_lock(&hfsmp->vcbFreeExtLock); - + if (hfsmp->vcbFreeExtCnt > kMaxFreeExtents) panic("hfs: %p: free extent count (%u) is too large", hfsmp, hfsmp->vcbFreeExtCnt); - + /* * Iterate the Free extent cache and ensure no entries are bogus or refer to * allocated blocks. @@ -4707,8 +5054,6 @@ static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated) start = hfsmp->vcbFreeExt[i].startBlock; nblocks = hfsmp->vcbFreeExt[i].blockCount; - //printf ("hfs: %p: slot:%d (%u,%u)\n", hfsmp, i, start, nblocks); - /* Check if any of the blocks in free extent cache are allocated. * This should not be enabled always because it might take * very long for large extents that get added to the list. @@ -4724,7 +5069,7 @@ static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated) lck_spin_unlock(&hfsmp->vcbFreeExtLock); if (hfs_isallocated(hfsmp, start, nblocks)) { panic("hfs: %p: slot %d:(%u,%u) in the free extent array is allocated\n", - hfsmp, i, start, nblocks); + hfsmp, i, start, nblocks); } lck_spin_lock(&hfsmp->vcbFreeExtLock); } @@ -4739,8 +5084,8 @@ static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated) for(j=i+1; j < hfsmp->vcbFreeExtCnt; j++) { if (start == hfsmp->vcbFreeExt[j].startBlock) { panic("hfs: %p: slot %d:(%u,%u) and %d:(%u,%u) are duplicate\n", - hfsmp, i, start, nblocks, j, hfsmp->vcbFreeExt[j].startBlock, - hfsmp->vcbFreeExt[j].blockCount); + hfsmp, i, start, nblocks, j, hfsmp->vcbFreeExt[j].startBlock, + hfsmp->vcbFreeExt[j].blockCount); } } @@ -4750,18 +5095,19 @@ static void sanity_check_free_ext(struct hfsmount *hfsmp, int check_allocated) /* sparse devices are sorted by starting block number (ascending) */ if (hfsmp->vcbFreeExt[i].startBlock > hfsmp->vcbFreeExt[i+1].startBlock) { panic ("hfs: %p: SPARSE %d:(%u,%u) and %d:(%u,%u) are out of order\n", - hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock, - hfsmp->vcbFreeExt[i+1].blockCount); + hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock, + hfsmp->vcbFreeExt[i+1].blockCount); } } else { /* normally sorted by block count (descending) */ if (hfsmp->vcbFreeExt[i].blockCount < hfsmp->vcbFreeExt[i+1].blockCount) { panic ("hfs: %p: %d:(%u,%u) and %d:(%u,%u) are out of order\n", - hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock, - hfsmp->vcbFreeExt[i+1].blockCount); + hfsmp, i, start, nblocks, i+1, hfsmp->vcbFreeExt[i+1].startBlock, + hfsmp->vcbFreeExt[i+1].blockCount); } } } } lck_spin_unlock(&hfsmp->vcbFreeExtLock); } + diff --git a/bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c b/bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c index 2eece14e1..2d1c22900 100644 --- a/bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c +++ b/bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -420,6 +420,68 @@ int32_t FastUnicodeCompare ( register ConstUniCharArrayPtr str1, register ItemCo return 1; } +/* + * UnicodeBinaryCompare + * Compare two UTF-16 strings and perform case-sensitive (binary) matching against them. + * + * Results are emitted like FastUnicodeCompare: + * + * + * IF RESULT + * -------------------------- + * str1 < str2 => -1 + * str1 = str2 => 0 + * str1 > str2 => +1 + * + * The case matching source code is greatly simplified due to the lack of case-folding + * in this comparison routine. We compare, in order: the lengths, then do character-by- + * character comparisons. + * + */ +int32_t UnicodeBinaryCompare (register ConstUniCharArrayPtr str1, register ItemCount len1, + register ConstUniCharArrayPtr str2, register ItemCount len2) { + uint16_t c1; + uint16_t c2; + int string_length; + int32_t result = 0; + + /* Set default values for the two character pointers */ + c1 = 0; + c2 = 0; + + /* First generate the string length (for comparison purposes) */ + if (len1 < len2) { + string_length = len1; + --result; + } + else if (len1 > len2) { + string_length = len2; + ++result; + } + else { + string_length = len1; + } + + /* now compare the two string pointers */ + while (string_length--) { + c1 = *(str1++); + c2 = *(str2++); + + if (c1 > c2) { + result = 1; + break; + } + + if (c1 < c2) { + result = -1; + break; + } + /* If equal, iterate to the next two respective chars */ + } + + return result; +} + OSErr ConvertUnicodeToUTF8Mangled(ByteCount srcLen, ConstUniCharArrayPtr srcStr, ByteCount maxDstLen, diff --git a/bsd/hfs/hfscommon/headers/BTreesInternal.h b/bsd/hfs/hfscommon/headers/BTreesInternal.h index c683d0b74..b95d8c992 100644 --- a/bsd/hfs/hfscommon/headers/BTreesInternal.h +++ b/bsd/hfs/hfscommon/headers/BTreesInternal.h @@ -148,7 +148,7 @@ enum { kForceReadBlock = 0x00000002, //€€ how does this relate to Read/Verify? Do we need this? kGetEmptyBlock = 0x00000008 }; -typedef OptionBits GetBlockOptions; +typedef u_int32_t GetBlockOptions; /* Fork Level Access Method Block release options @@ -160,7 +160,7 @@ enum { kTrashBlock = 0x00000004, kLockTransaction = 0x00000100 }; -typedef OptionBits ReleaseBlockOptions; +typedef u_int32_t ReleaseBlockOptions; typedef u_int64_t FSSize; typedef u_int32_t ForkBlockNumber; diff --git a/bsd/hfs/hfscommon/headers/FileMgrInternal.h b/bsd/hfs/hfscommon/headers/FileMgrInternal.h index e8ddcac86..18e64caf4 100644 --- a/bsd/hfs/hfscommon/headers/FileMgrInternal.h +++ b/bsd/hfs/hfscommon/headers/FileMgrInternal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -211,12 +211,10 @@ ReplaceBTreeRecord (FileReference refNum, /* * Flags for BlockAllocate() and BlockDeallocate() */ -/* Force contiguous block allocation and to force minBlocks to actually be allocated */ -#define HFS_ALLOC_FORCECONTIG 0x1 -/* Can use metadata zone blocks */ -#define HFS_ALLOC_METAZONE 0x2 -/* Skip checking and updating of free blocks during allocation and deallocation */ -#define HFS_ALLOC_SKIPFREEBLKS 0x4 +#define HFS_ALLOC_FORCECONTIG 0x1 //force contiguous block allocation; minblocks must be allocated +#define HFS_ALLOC_METAZONE 0x2 //can use metazone blocks +#define HFS_ALLOC_SKIPFREEBLKS 0x4 //skip checking/updating freeblocks during alloc/dealloc +#define HFS_ALLOC_FLUSHTXN 0x8 //pick best fit for allocation, even if a jnl flush is req'd EXTERN_API_C( OSErr ) BlockAllocate (ExtendedVCB * vcb, @@ -252,29 +250,20 @@ EXTERN_API_C( u_int32_t ) UpdateAllocLimit (struct hfsmount *hfsmp, u_int32_t new_end_block); EXTERN_API_C( u_int32_t ) -UnmapBlocks(struct hfsmount *hfsmp); +ScanUnmapBlocks(struct hfsmount *hfsmp); -#if CONFIG_HFS_ALLOC_RBTREE -EXTERN_API_C( u_int32_t ) -GenerateTree( struct hfsmount *hfsmp, u_int32_t end_block, int *flags, int initialscan); - -EXTERN_API_C( void ) -DestroyTrees( struct hfsmount *hfsmp); - -EXTERN_API_C( u_int32_t ) -InitTree(struct hfsmount *hfsmp); -#endif - - - +EXTERN_API_C( int ) +hfs_init_summary (struct hfsmount *hfsmp); /* File Extent Mapping routines*/ EXTERN_API_C( OSErr ) FlushExtentFile (ExtendedVCB * vcb); +#if CONFIG_HFS_STD EXTERN_API_C( int32_t ) CompareExtentKeys (const HFSExtentKey * searchKey, const HFSExtentKey * trialKey); +#endif EXTERN_API_C( int32_t ) CompareExtentKeysPlus (const HFSPlusExtentKey *searchKey, diff --git a/bsd/hfs/hfscommon/headers/HFSUnicodeWrappers.h b/bsd/hfs/hfscommon/headers/HFSUnicodeWrappers.h index 8508f5087..50ae8d87e 100644 --- a/bsd/hfs/hfscommon/headers/HFSUnicodeWrappers.h +++ b/bsd/hfs/hfscommon/headers/HFSUnicodeWrappers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003, 2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2003, 2005-2013 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -106,6 +106,8 @@ extern OSErr ConvertUnicodeToUTF8Mangled ( ByteCount srcLen, extern int32_t FastUnicodeCompare(register ConstUniCharArrayPtr str1, register ItemCount length1, register ConstUniCharArrayPtr str2, register ItemCount length2); +extern int32_t UnicodeBinaryCompare (register ConstUniCharArrayPtr str1, register ItemCount length1, + register ConstUniCharArrayPtr str2, register ItemCount length2); extern int32_t FastRelString( ConstStr255Param str1, ConstStr255Param str2 ); diff --git a/bsd/hfs/hfscommon/headers/HybridAllocator.h b/bsd/hfs/hfscommon/headers/HybridAllocator.h deleted file mode 100644 index 4add9daee..000000000 --- a/bsd/hfs/hfscommon/headers/HybridAllocator.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2009 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - - -#ifndef __HYBRID_ALLOC__ -#define __HYBRID_ALLOC__ - -#include -#include "RedBlackTree.h" - -typedef struct extent_node extent_node_t; - -struct extent_node -{ - u_int32_t length; - u_int32_t offset; - struct extent_node *offset_next; - rb_node(extent_node_t) offset_link; -}; - -typedef rb_tree(extent_node_t) extent_tree_offset_t; - -extern extent_node_t * -alloc_node(u_int32_t length, u_int32_t offset); - -extern void -free_node(extent_node_t *node); - -extern extent_node_t * -extent_tree_free_space( extent_tree_offset_t *offset_tree, u_int32_t size, u_int32_t offset); - -extern void -extent_tree_offset_print(extent_tree_offset_t *offset_tree); - -extern int32_t -extent_tree_offset_alloc_space(extent_tree_offset_t *offset_tree, u_int32_t size, u_int32_t offset); - -extern int32_t -extent_tree_offset_alloc_unaligned(extent_tree_offset_t *tree, u_int32_t size, u_int32_t offset); - - -extern void -extent_tree_remove_node (extent_tree_offset_t *offset_tree, extent_node_t * node); - -extern extent_node_t * -extent_tree_off_first (extent_tree_offset_t *offset_tree); - -extern extent_node_t * -extent_tree_off_search(extent_tree_offset_t *offset_tree, extent_node_t *node); - -extern extent_node_t * -extent_tree_off_search_next(extent_tree_offset_t *offset_tree, extent_node_t *node); - -extern extent_node_t* -extent_tree_off_search_nextWithSize (extent_tree_offset_t *offset_tree, extent_node_t *node); - -extern extent_node_t * -extent_tree_off_search_prev(extent_tree_offset_t *offset_tree, extent_node_t *node); - -extern extent_node_t * -extent_tree_off_next(extent_tree_offset_t *offset_tree, extent_node_t *node); - -extern extent_node_t * -extent_tree_off_prev(extent_tree_offset_t *offset_tree, extent_node_t *node); - -extern void -extent_tree_init(extent_tree_offset_t *offset_tree); - -extern void -extent_tree_destroy(extent_tree_offset_t *offset_tree); - -extern int -cmp_offset_node(extent_node_t *node_1, extent_node_t *node_2); - - -#endif diff --git a/bsd/hfs/hfscommon/headers/RedBlackTree.h b/bsd/hfs/hfscommon/headers/RedBlackTree.h deleted file mode 100644 index 21342296c..000000000 --- a/bsd/hfs/hfscommon/headers/RedBlackTree.h +++ /dev/null @@ -1,969 +0,0 @@ -/* - * Copyright (c) 2009 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/****************************************************************************** - * - * Copyright (C) 2008 Jason Evans . - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice(s), this list of conditions and the following disclaimer - * unmodified other than the allowable addition of one or more - * copyright notices. - * 2. Redistributions in binary form must reproduce the above copyright - * notice(s), this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, - * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ****************************************************************************** - * - * cpp macro implementation of left-leaning red-black trees. - * - * Usage: - * - * (Optional, see assert(3).) - * #define NDEBUG - * - * (Required.) - * #include - * #include - * ... - * - * All operations are done non-recursively. Parent pointers are not used, and - * color bits are stored in the least significant bit of right-child pointers, - * thus making node linkage as compact as is possible for red-black trees. - * - * Some macros use a comparison function pointer, which is expected to have the - * following prototype: - * - * int (a_cmp *)(a_type *a_node, a_type *a_other); - * ^^^^^^ - * or a_key - * - * Interpretation of comparision function return values: - * - * -1 : a_node < a_other - * 0 : a_node == a_other - * 1 : a_node > a_other - * - * In all cases, the a_node or a_key macro argument is the first argument to the - * comparison function, which makes it possible to write comparison functions - * that treat the first argument specially. - * - ******************************************************************************/ - -#ifndef RB_H_ -#define RB_H_ - -#define RB_COMPACT -#ifdef RB_COMPACT -/* Node structure. */ -#define rb_node(a_type) \ -struct { \ - a_type *rbn_left; \ - a_type *rbn_right_red; \ -} -#else -#define rb_node(a_type) \ -struct { \ - a_type *rbn_left; \ - a_type *rbn_right; \ - bool rbn_red; \ -} -#endif - -/* Root structure. */ -#define rb_tree(a_type) \ -struct { \ - a_type *rbt_root; \ - a_type rbt_nil; \ -} - -/* Left accessors. */ -#define rbp_left_get(a_type, a_field, a_node) \ - ((a_node)->a_field.rbn_left) -#define rbp_left_set(a_type, a_field, a_node, a_left) do { \ - (a_node)->a_field.rbn_left = a_left; \ -} while (0) - -#ifdef RB_COMPACT -/* Right accessors. */ -#define rbp_right_get(a_type, a_field, a_node) \ - ((a_type *) (((intptr_t) (a_node)->a_field.rbn_right_red) \ - & ((ssize_t)-2))) -#define rbp_right_set(a_type, a_field, a_node, a_right) do { \ - (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) a_right) \ - | (((uintptr_t) (a_node)->a_field.rbn_right_red) & ((size_t)1))); \ -} while (0) - -/* Color accessors. */ -#define rbp_red_get(a_type, a_field, a_node) \ - ((bool) (((uintptr_t) (a_node)->a_field.rbn_right_red) \ - & ((size_t)1))) -#define rbp_color_set(a_type, a_field, a_node, a_red) do { \ - (a_node)->a_field.rbn_right_red = (a_type *) ((((intptr_t) \ - (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)) \ - | ((ssize_t)a_red)); \ -} while (0) -#define rbp_red_set(a_type, a_field, a_node) do { \ - (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) \ - (a_node)->a_field.rbn_right_red) | ((size_t)1)); \ -} while (0) -#define rbp_black_set(a_type, a_field, a_node) do { \ - (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t) \ - (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)); \ -} while (0) -#else -/* Right accessors. */ -#define rbp_right_get(a_type, a_field, a_node) \ - ((a_node)->a_field.rbn_right) -#define rbp_right_set(a_type, a_field, a_node, a_right) do { \ - (a_node)->a_field.rbn_right = a_right; \ -} while (0) - -/* Color accessors. */ -#define rbp_red_get(a_type, a_field, a_node) \ - ((a_node)->a_field.rbn_red) -#define rbp_color_set(a_type, a_field, a_node, a_red) do { \ - (a_node)->a_field.rbn_red = (a_red); \ -} while (0) -#define rbp_red_set(a_type, a_field, a_node) do { \ - (a_node)->a_field.rbn_red = true; \ -} while (0) -#define rbp_black_set(a_type, a_field, a_node) do { \ - (a_node)->a_field.rbn_red = false; \ -} while (0) -#endif - -/* Node initializer. */ -#define rbp_node_new(a_type, a_field, a_tree, a_node) do { \ - rbp_left_set(a_type, a_field, (a_node), &(a_tree)->rbt_nil); \ - rbp_right_set(a_type, a_field, (a_node), &(a_tree)->rbt_nil); \ - rbp_red_set(a_type, a_field, (a_node)); \ -} while (0) - -/* Tree initializer. */ -#define rb_new(a_type, a_field, a_tree) do { \ - (a_tree)->rbt_root = &(a_tree)->rbt_nil; \ - rbp_node_new(a_type, a_field, a_tree, &(a_tree)->rbt_nil); \ - rbp_black_set(a_type, a_field, &(a_tree)->rbt_nil); \ -} while (0) - -/* Tree operations. */ -#define rbp_black_height(a_type, a_field, a_tree, r_height) do { \ - a_type *rbp_bh_t; \ - for (rbp_bh_t = (a_tree)->rbt_root, (r_height) = 0; \ - rbp_bh_t != &(a_tree)->rbt_nil; \ - rbp_bh_t = rbp_left_get(a_type, a_field, rbp_bh_t)) { \ - if (rbp_red_get(a_type, a_field, rbp_bh_t) == false) { \ - (r_height)++; \ - } \ - } \ -} while (0) - -#define rbp_first(a_type, a_field, a_tree, a_root, r_node) do { \ - for ((r_node) = (a_root); \ - rbp_left_get(a_type, a_field, (r_node)) != &(a_tree)->rbt_nil; \ - (r_node) = rbp_left_get(a_type, a_field, (r_node))) { \ - } \ -} while (0) - -#define rbp_last(a_type, a_field, a_tree, a_root, r_node) do { \ - for ((r_node) = (a_root); \ - rbp_right_get(a_type, a_field, (r_node)) != &(a_tree)->rbt_nil; \ - (r_node) = rbp_right_get(a_type, a_field, (r_node))) { \ - } \ -} while (0) - -#define rbp_next(a_type, a_field, a_cmp, a_tree, a_node, r_node) do { \ - if (rbp_right_get(a_type, a_field, (a_node)) != &(a_tree)->rbt_nil) { \ - rbp_first(a_type, a_field, a_tree, rbp_right_get(a_type, \ - a_field, (a_node)), (r_node)); \ - } else { \ - a_type *rbp_n_t = (a_tree)->rbt_root; \ - assert(rbp_n_t != &(a_tree)->rbt_nil); \ - (r_node) = &(a_tree)->rbt_nil; \ - while (true) { \ - int rbp_n_cmp = (a_cmp)((a_node), rbp_n_t); \ - if (rbp_n_cmp < 0) { \ - (r_node) = rbp_n_t; \ - rbp_n_t = rbp_left_get(a_type, a_field, rbp_n_t); \ - } else if (rbp_n_cmp > 0) { \ - rbp_n_t = rbp_right_get(a_type, a_field, rbp_n_t); \ - } else { \ - break; \ - } \ - assert(rbp_n_t != &(a_tree)->rbt_nil); \ - } \ - } \ -} while (0) - -#define rbp_prev(a_type, a_field, a_cmp, a_tree, a_node, r_node) do { \ - if (rbp_left_get(a_type, a_field, (a_node)) != &(a_tree)->rbt_nil) {\ - rbp_last(a_type, a_field, a_tree, rbp_left_get(a_type, \ - a_field, (a_node)), (r_node)); \ - } else { \ - a_type *rbp_p_t = (a_tree)->rbt_root; \ - assert(rbp_p_t != &(a_tree)->rbt_nil); \ - (r_node) = &(a_tree)->rbt_nil; \ - while (true) { \ - int rbp_p_cmp = (a_cmp)((a_node), rbp_p_t); \ - if (rbp_p_cmp < 0) { \ - rbp_p_t = rbp_left_get(a_type, a_field, rbp_p_t); \ - } else if (rbp_p_cmp > 0) { \ - (r_node) = rbp_p_t; \ - rbp_p_t = rbp_right_get(a_type, a_field, rbp_p_t); \ - } else { \ - break; \ - } \ - assert(rbp_p_t != &(a_tree)->rbt_nil); \ - } \ - } \ -} while (0) - -#define rb_first(a_type, a_field, a_tree, r_node) do { \ - rbp_first(a_type, a_field, a_tree, (a_tree)->rbt_root, (r_node)); \ - if ((r_node) == &(a_tree)->rbt_nil) { \ - (r_node) = NULL; \ - } \ -} while (0) - -#define rb_last(a_type, a_field, a_tree, r_node) do { \ - rbp_last(a_type, a_field, a_tree, (a_tree)->rbt_root, r_node); \ - if ((r_node) == &(a_tree)->rbt_nil) { \ - (r_node) = NULL; \ - } \ -} while (0) - -#define rb_next(a_type, a_field, a_cmp, a_tree, a_node, r_node) do { \ - rbp_next(a_type, a_field, a_cmp, a_tree, (a_node), (r_node)); \ - if ((r_node) == &(a_tree)->rbt_nil) { \ - (r_node) = NULL; \ - } \ -} while (0) - -#define rb_prev(a_type, a_field, a_cmp, a_tree, a_node, r_node) do { \ - rbp_prev(a_type, a_field, a_cmp, a_tree, (a_node), (r_node)); \ - if ((r_node) == &(a_tree)->rbt_nil) { \ - (r_node) = NULL; \ - } \ -} while (0) - -#define rb_search(a_type, a_field, a_cmp, a_tree, a_key, r_node) do { \ - int rbp_se_cmp; \ - (r_node) = (a_tree)->rbt_root; \ - while ((r_node) != &(a_tree)->rbt_nil && (rbp_se_cmp = (a_cmp)((a_key), (r_node))) != 0) { \ - if (rbp_se_cmp < 0) { \ - (r_node) = rbp_left_get(a_type, a_field, (r_node)); \ - } else { \ - (r_node) = rbp_right_get(a_type, a_field, (r_node)); \ - } \ - } \ - if ((r_node) == &(a_tree)->rbt_nil) { \ - (r_node) = NULL; \ - } \ -} while (0) - -/* - * Find a match if it exists. Otherwise, find the next greater node, if one - * exists. - */ -#define rb_nsearch(a_type, a_field, a_cmp, a_tree, a_key, r_node) do { \ - a_type *rbp_ns_t = (a_tree)->rbt_root; \ - (r_node) = NULL; \ - while (rbp_ns_t != &(a_tree)->rbt_nil) { \ - int rbp_ns_cmp = (a_cmp)((a_key), rbp_ns_t); \ - if (rbp_ns_cmp < 0) { \ - (r_node) = rbp_ns_t; \ - rbp_ns_t = rbp_left_get(a_type, a_field, rbp_ns_t); \ - } else if (rbp_ns_cmp > 0) { \ - rbp_ns_t = rbp_right_get(a_type, a_field, rbp_ns_t); \ - } else { \ - (r_node) = rbp_ns_t; \ - break; \ - } \ - } \ -} while (0) - -/* - * Find a match if it exists. Otherwise, find the previous lesser node, if one - * exists. - */ -#define rb_psearch(a_type, a_field, a_cmp, a_tree, a_key, r_node) do { \ - a_type *rbp_ps_t = (a_tree)->rbt_root; \ - (r_node) = NULL; \ - while (rbp_ps_t != &(a_tree)->rbt_nil) { \ - int rbp_ps_cmp = (a_cmp)((a_key), rbp_ps_t); \ - if (rbp_ps_cmp < 0) { \ - rbp_ps_t = rbp_left_get(a_type, a_field, rbp_ps_t); \ - } else if (rbp_ps_cmp > 0) { \ - (r_node) = rbp_ps_t; \ - rbp_ps_t = rbp_right_get(a_type, a_field, rbp_ps_t); \ - } else { \ - (r_node) = rbp_ps_t; \ - break; \ - } \ - } \ -} while (0) - -#define rbp_rotate_left(a_type, a_field, a_node, r_node) do { \ - (r_node) = rbp_right_get(a_type, a_field, (a_node)); \ - rbp_right_set(a_type, a_field, (a_node), rbp_left_get(a_type, a_field, (r_node))); \ - rbp_left_set(a_type, a_field, (r_node), (a_node)); \ -} while (0) - -#define rbp_rotate_right(a_type, a_field, a_node, r_node) do { \ - (r_node) = rbp_left_get(a_type, a_field, (a_node)); \ - rbp_left_set(a_type, a_field, (a_node), rbp_right_get(a_type, a_field, (r_node))); \ - rbp_right_set(a_type, a_field, (r_node), (a_node)); \ -} while (0) - -#define rbp_lean_left(a_type, a_field, a_node, r_node) do { \ - bool rbp_ll_red; \ - rbp_rotate_left(a_type, a_field, (a_node), (r_node)); \ - rbp_ll_red = rbp_red_get(a_type, a_field, (a_node)); \ - rbp_color_set(a_type, a_field, (r_node), rbp_ll_red); \ - rbp_red_set(a_type, a_field, (a_node)); \ -} while (0) - -#define rbp_lean_right(a_type, a_field, a_node, r_node) do { \ - bool rbp_lr_red; \ - rbp_rotate_right(a_type, a_field, (a_node), (r_node)); \ - rbp_lr_red = rbp_red_get(a_type, a_field, (a_node)); \ - rbp_color_set(a_type, a_field, (r_node), rbp_lr_red); \ - rbp_red_set(a_type, a_field, (a_node)); \ -} while (0) - -#define rbp_move_red_left(a_type, a_field, a_node, r_node) do { \ - a_type *rbp_mrl_t, *rbp_mrl_u; \ - rbp_mrl_t = rbp_left_get(a_type, a_field, (a_node)); \ - rbp_red_set(a_type, a_field, rbp_mrl_t); \ - rbp_mrl_t = rbp_right_get(a_type, a_field, (a_node)); \ - rbp_mrl_u = rbp_left_get(a_type, a_field, rbp_mrl_t); \ - if (rbp_red_get(a_type, a_field, rbp_mrl_u)) { \ - rbp_rotate_right(a_type, a_field, rbp_mrl_t, rbp_mrl_u); \ - rbp_right_set(a_type, a_field, (a_node), rbp_mrl_u); \ - rbp_rotate_left(a_type, a_field, (a_node), (r_node)); \ - rbp_mrl_t = rbp_right_get(a_type, a_field, (a_node)); \ - if (rbp_red_get(a_type, a_field, rbp_mrl_t)) { \ - rbp_black_set(a_type, a_field, rbp_mrl_t); \ - rbp_red_set(a_type, a_field, (a_node)); \ - rbp_rotate_left(a_type, a_field, (a_node), rbp_mrl_t); \ - rbp_left_set(a_type, a_field, (r_node), rbp_mrl_t); \ - } else { \ - rbp_black_set(a_type, a_field, (a_node)); \ - } \ - } else { \ - rbp_red_set(a_type, a_field, (a_node)); \ - rbp_rotate_left(a_type, a_field, (a_node), (r_node)); \ - } \ -} while (0) - -#define rbp_move_red_right(a_type, a_field, a_node, r_node) do { \ - a_type *rbp_mrr_t; \ - rbp_mrr_t = rbp_left_get(a_type, a_field, (a_node)); \ - if (rbp_red_get(a_type, a_field, rbp_mrr_t)) { \ - a_type *rbp_mrr_u, *rbp_mrr_v; \ - rbp_mrr_u = rbp_right_get(a_type, a_field, rbp_mrr_t); \ - rbp_mrr_v = rbp_left_get(a_type, a_field, rbp_mrr_u); \ - if (rbp_red_get(a_type, a_field, rbp_mrr_v)) { \ - rbp_color_set(a_type, a_field, rbp_mrr_u, rbp_red_get(a_type, a_field, (a_node))); \ - rbp_black_set(a_type, a_field, rbp_mrr_v); \ - rbp_rotate_left(a_type, a_field, rbp_mrr_t, rbp_mrr_u); \ - rbp_left_set(a_type, a_field, (a_node), rbp_mrr_u); \ - rbp_rotate_right(a_type, a_field, (a_node), (r_node)); \ - rbp_rotate_left(a_type, a_field, (a_node), rbp_mrr_t); \ - rbp_right_set(a_type, a_field, (r_node), rbp_mrr_t); \ - } else { \ - rbp_color_set(a_type, a_field, rbp_mrr_t, rbp_red_get(a_type, a_field, (a_node))); \ - rbp_red_set(a_type, a_field, rbp_mrr_u); \ - rbp_rotate_right(a_type, a_field, (a_node), (r_node)); \ - rbp_rotate_left(a_type, a_field, (a_node), rbp_mrr_t); \ - rbp_right_set(a_type, a_field, (r_node), rbp_mrr_t); \ - } \ - rbp_red_set(a_type, a_field, (a_node)); \ - } else { \ - rbp_red_set(a_type, a_field, rbp_mrr_t); \ - rbp_mrr_t = rbp_left_get(a_type, a_field, rbp_mrr_t); \ - if (rbp_red_get(a_type, a_field, rbp_mrr_t)) { \ - rbp_black_set(a_type, a_field, rbp_mrr_t); \ - rbp_rotate_right(a_type, a_field, (a_node), (r_node)); \ - rbp_rotate_left(a_type, a_field, (a_node), rbp_mrr_t); \ - rbp_right_set(a_type, a_field, (r_node), rbp_mrr_t); \ - } else { \ - rbp_rotate_left(a_type, a_field, (a_node), (r_node)); \ - } \ - } \ -} while (0) - -#define rb_insert(a_type, a_field, a_cmp, a_tree, a_node) do { \ - a_type rbp_i_s; \ - a_type *rbp_i_g, *rbp_i_p, *rbp_i_c, *rbp_i_t, *rbp_i_u; \ - int rbp_i_cmp = 0; \ - rbp_i_g = &(a_tree)->rbt_nil; \ - rbp_left_set(a_type, a_field, &rbp_i_s, (a_tree)->rbt_root); \ - rbp_right_set(a_type, a_field, &rbp_i_s, &(a_tree)->rbt_nil); \ - rbp_black_set(a_type, a_field, &rbp_i_s); \ - rbp_i_p = &rbp_i_s; \ - rbp_i_c = (a_tree)->rbt_root; \ - /* Iteratively search down the tree for the insertion point, */\ - /* splitting 4-nodes as they are encountered. At the end of each */\ - /* iteration, rbp_i_g->rbp_i_p->rbp_i_c is a 3-level path down */\ - /* the tree, assuming a sufficiently deep tree. */\ - while (rbp_i_c != &(a_tree)->rbt_nil) { \ - rbp_i_t = rbp_left_get(a_type, a_field, rbp_i_c); \ - rbp_i_u = rbp_left_get(a_type, a_field, rbp_i_t); \ - if (rbp_red_get(a_type, a_field, rbp_i_t) \ - && rbp_red_get(a_type, a_field, rbp_i_u)) { \ - /* rbp_i_c is the top of a logical 4-node, so split it. */\ - /* This iteration does not move down the tree, due to the */\ - /* disruptiveness of node splitting. */\ - /* */\ - /* Rotate right. */\ - rbp_rotate_right(a_type, a_field, rbp_i_c, rbp_i_t); \ - /* Pass red links up one level. */\ - rbp_i_u = rbp_left_get(a_type, a_field, rbp_i_t); \ - rbp_black_set(a_type, a_field, rbp_i_u); \ - if (rbp_left_get(a_type, a_field, rbp_i_p) == rbp_i_c) { \ - rbp_left_set(a_type, a_field, rbp_i_p, rbp_i_t); \ - rbp_i_c = rbp_i_t; \ - } else { \ - /* rbp_i_c was the right child of rbp_i_p, so rotate */\ - /* left in order to maintain the left-leaning */\ - /* invariant. */\ - assert(rbp_right_get(a_type, a_field, rbp_i_p) == rbp_i_c); \ - rbp_right_set(a_type, a_field, rbp_i_p, rbp_i_t); \ - rbp_lean_left(a_type, a_field, rbp_i_p, rbp_i_u); \ - if (rbp_left_get(a_type, a_field, rbp_i_g) == rbp_i_p) {\ - rbp_left_set(a_type, a_field, rbp_i_g, rbp_i_u); \ - } else { \ - assert(rbp_right_get(a_type, a_field, rbp_i_g) == rbp_i_p); \ - rbp_right_set(a_type, a_field, rbp_i_g, rbp_i_u); \ - } \ - rbp_i_p = rbp_i_u; \ - rbp_i_cmp = (a_cmp)((a_node), rbp_i_p); \ - if (rbp_i_cmp < 0) { \ - rbp_i_c = rbp_left_get(a_type, a_field, rbp_i_p); \ - } else { \ - assert(rbp_i_cmp > 0); \ - rbp_i_c = rbp_right_get(a_type, a_field, rbp_i_p); \ - } \ - continue; \ - } \ - } \ - rbp_i_g = rbp_i_p; \ - rbp_i_p = rbp_i_c; \ - rbp_i_cmp = (a_cmp)((a_node), rbp_i_c); \ - if (rbp_i_cmp < 0) { \ - rbp_i_c = rbp_left_get(a_type, a_field, rbp_i_c); \ - } else { \ - assert(rbp_i_cmp > 0); \ - rbp_i_c = rbp_right_get(a_type, a_field, rbp_i_c); \ - } \ - } \ - /* rbp_i_p now refers to the node under which to insert. */\ - rbp_node_new(a_type, a_field, a_tree, (a_node)); \ - if (rbp_i_cmp > 0) { \ - rbp_right_set(a_type, a_field, rbp_i_p, (a_node)); \ - rbp_lean_left(a_type, a_field, rbp_i_p, rbp_i_t); \ - if (rbp_left_get(a_type, a_field, rbp_i_g) == rbp_i_p) { \ - rbp_left_set(a_type, a_field, rbp_i_g, rbp_i_t); \ - } else if (rbp_right_get(a_type, a_field, rbp_i_g) == rbp_i_p) {\ - rbp_right_set(a_type, a_field, rbp_i_g, rbp_i_t); \ - } \ - } else { \ - rbp_left_set(a_type, a_field, rbp_i_p, (a_node)); \ - } \ - /* Update the root and make sure that it is black. */\ - (a_tree)->rbt_root = rbp_left_get(a_type, a_field, &rbp_i_s); \ - rbp_black_set(a_type, a_field, (a_tree)->rbt_root); \ -} while (0) - -#define rb_remove(a_type, a_field, a_cmp, a_tree, a_node) do { \ - a_type rbp_r_s; \ - a_type *rbp_r_p, *rbp_r_c, *rbp_r_xp, *rbp_r_t, *rbp_r_u; \ - int rbp_r_cmp; \ - rbp_left_set(a_type, a_field, &rbp_r_s, (a_tree)->rbt_root); \ - rbp_right_set(a_type, a_field, &rbp_r_s, &(a_tree)->rbt_nil); \ - rbp_black_set(a_type, a_field, &rbp_r_s); \ - rbp_r_p = &rbp_r_s; \ - rbp_r_c = (a_tree)->rbt_root; \ - rbp_r_xp = &(a_tree)->rbt_nil; \ - /* Iterate down the tree, but always transform 2-nodes to 3- or */\ - /* 4-nodes in order to maintain the invariant that the current */\ - /* node is not a 2-node. This allows simple deletion once a leaf */\ - /* is reached. Handle the root specially though, since there may */\ - /* be no way to convert it from a 2-node to a 3-node. */\ - rbp_r_cmp = (a_cmp)((a_node), rbp_r_c); \ - if (rbp_r_cmp < 0) { \ - rbp_r_t = rbp_left_get(a_type, a_field, rbp_r_c); \ - rbp_r_u = rbp_left_get(a_type, a_field, rbp_r_t); \ - if (rbp_red_get(a_type, a_field, rbp_r_t) == false \ - && rbp_red_get(a_type, a_field, rbp_r_u) == false) { \ - /* Apply standard transform to prepare for left move. */\ - rbp_move_red_left(a_type, a_field, rbp_r_c, rbp_r_t); \ - rbp_black_set(a_type, a_field, rbp_r_t); \ - rbp_left_set(a_type, a_field, rbp_r_p, rbp_r_t); \ - rbp_r_c = rbp_r_t; \ - } else { \ - /* Move left. */\ - rbp_r_p = rbp_r_c; \ - rbp_r_c = rbp_left_get(a_type, a_field, rbp_r_c); \ - } \ - } else { \ - if (rbp_r_cmp == 0) { \ - assert((a_node) == rbp_r_c); \ - if (rbp_right_get(a_type, a_field, rbp_r_c) == &(a_tree)->rbt_nil) { \ - /* Delete root node (which is also a leaf node). */\ - if (rbp_left_get(a_type, a_field, rbp_r_c) != &(a_tree)->rbt_nil) { \ - rbp_lean_right(a_type, a_field, rbp_r_c, rbp_r_t); \ - rbp_right_set(a_type, a_field, rbp_r_t, &(a_tree)->rbt_nil); \ - } else { \ - rbp_r_t = &(a_tree)->rbt_nil; \ - } \ - rbp_left_set(a_type, a_field, rbp_r_p, rbp_r_t); \ - } else { \ - /* This is the node we want to delete, but we will */\ - /* instead swap it with its successor and delete the */\ - /* successor. Record enough information to do the */\ - /* swap later. rbp_r_xp is the a_node's parent. */\ - rbp_r_xp = rbp_r_p; \ - rbp_r_cmp = 1; /* Note that deletion is incomplete. */\ - } \ - } \ - if (rbp_r_cmp == 1) { \ - if (rbp_red_get(a_type, a_field, rbp_left_get(a_type, \ - a_field, rbp_right_get(a_type, a_field, rbp_r_c))) == false) { \ - rbp_r_t = rbp_left_get(a_type, a_field, rbp_r_c); \ - if (rbp_red_get(a_type, a_field, rbp_r_t)) { \ - /* Standard transform. */\ - rbp_move_red_right(a_type, a_field, rbp_r_c, rbp_r_t); \ - } else { \ - /* Root-specific transform. */\ - rbp_red_set(a_type, a_field, rbp_r_c); \ - rbp_r_u = rbp_left_get(a_type, a_field, rbp_r_t); \ - if (rbp_red_get(a_type, a_field, rbp_r_u)) { \ - rbp_black_set(a_type, a_field, rbp_r_u); \ - rbp_rotate_right(a_type, a_field, rbp_r_c, rbp_r_t); \ - rbp_rotate_left(a_type, a_field, rbp_r_c, rbp_r_u); \ - rbp_right_set(a_type, a_field, rbp_r_t, rbp_r_u); \ - } else { \ - rbp_red_set(a_type, a_field, rbp_r_t); \ - rbp_rotate_left(a_type, a_field, rbp_r_c, rbp_r_t); \ - } \ - } \ - rbp_left_set(a_type, a_field, rbp_r_p, rbp_r_t); \ - rbp_r_c = rbp_r_t; \ - } else { \ - /* Move right */\ - rbp_r_p = rbp_r_c; \ - rbp_r_c = rbp_right_get(a_type, a_field, rbp_r_c); \ - } \ - } \ - } \ - if (rbp_r_cmp != 0) { \ - while (true) { \ - assert(rbp_r_p != &(a_tree)->rbt_nil); \ - rbp_r_cmp = (a_cmp)((a_node), rbp_r_c); \ - if (rbp_r_cmp < 0) { \ - rbp_r_t = rbp_left_get(a_type, a_field, rbp_r_c); \ - if (rbp_r_t == &(a_tree)->rbt_nil) { \ - /* rbp_r_c now refers to the successor node to */\ - /* relocate, and rbp_r_xp/a_node refer to the */\ - /* context for the relocation. */\ - if (rbp_left_get(a_type, a_field, rbp_r_xp) == (a_node)) { \ - rbp_left_set(a_type, a_field, rbp_r_xp, rbp_r_c); \ - } else { \ - assert(rbp_right_get(a_type, a_field, rbp_r_xp) == (a_node)); \ - rbp_right_set(a_type, a_field, rbp_r_xp, rbp_r_c); \ - } \ - rbp_left_set(a_type, a_field, rbp_r_c, rbp_left_get(a_type, a_field, (a_node))); \ - rbp_right_set(a_type, a_field, rbp_r_c, rbp_right_get(a_type, a_field, (a_node))); \ - rbp_color_set(a_type, a_field, rbp_r_c, rbp_red_get(a_type, a_field, (a_node))); \ - if (rbp_left_get(a_type, a_field, rbp_r_p) == rbp_r_c) { \ - rbp_left_set(a_type, a_field, rbp_r_p, &(a_tree)->rbt_nil); \ - } else { \ - assert(rbp_right_get(a_type, a_field, rbp_r_p) == rbp_r_c); \ - rbp_right_set(a_type, a_field, rbp_r_p, &(a_tree)->rbt_nil); \ - } \ - break; \ - } \ - rbp_r_u = rbp_left_get(a_type, a_field, rbp_r_t); \ - if (rbp_red_get(a_type, a_field, rbp_r_t) == false \ - && rbp_red_get(a_type, a_field, rbp_r_u) == false) { \ - rbp_move_red_left(a_type, a_field, rbp_r_c, rbp_r_t); \ - if (rbp_left_get(a_type, a_field, rbp_r_p) == rbp_r_c) { \ - rbp_left_set(a_type, a_field, rbp_r_p, rbp_r_t);\ - } else { \ - rbp_right_set(a_type, a_field, rbp_r_p, rbp_r_t); \ - } \ - rbp_r_c = rbp_r_t; \ - } else { \ - rbp_r_p = rbp_r_c; \ - rbp_r_c = rbp_left_get(a_type, a_field, rbp_r_c); \ - } \ - } else { \ - /* Check whether to delete this node (it has to be */\ - /* the correct node and a leaf node). */\ - if (rbp_r_cmp == 0) { \ - assert((a_node) == rbp_r_c); \ - if (rbp_right_get(a_type, a_field, rbp_r_c) == &(a_tree)->rbt_nil) { \ - /* Delete leaf node. */\ - if (rbp_left_get(a_type, a_field, rbp_r_c) != &(a_tree)->rbt_nil) { \ - rbp_lean_right(a_type, a_field, rbp_r_c, rbp_r_t); \ - rbp_right_set(a_type, a_field, rbp_r_t, &(a_tree)->rbt_nil); \ - } else { \ - rbp_r_t = &(a_tree)->rbt_nil; \ - } \ - if (rbp_left_get(a_type, a_field, rbp_r_p) == rbp_r_c) { \ - rbp_left_set(a_type, a_field, rbp_r_p, rbp_r_t); \ - } else { \ - rbp_right_set(a_type, a_field, rbp_r_p, rbp_r_t); \ - } \ - break; \ - } else { \ - /* This is the node we want to delete, but we */\ - /* will instead swap it with its successor */\ - /* and delete the successor. Record enough */\ - /* information to do the swap later. */\ - /* rbp_r_xp is a_node's parent. */\ - rbp_r_xp = rbp_r_p; \ - } \ - } \ - rbp_r_t = rbp_right_get(a_type, a_field, rbp_r_c); \ - rbp_r_u = rbp_left_get(a_type, a_field, rbp_r_t); \ - if (rbp_red_get(a_type, a_field, rbp_r_u) == false) { \ - rbp_move_red_right(a_type, a_field, rbp_r_c, \ - rbp_r_t); \ - if (rbp_left_get(a_type, a_field, rbp_r_p) == rbp_r_c) { \ - rbp_left_set(a_type, a_field, rbp_r_p, rbp_r_t);\ - } else { \ - rbp_right_set(a_type, a_field, rbp_r_p, rbp_r_t); \ - } \ - rbp_r_c = rbp_r_t; \ - } else { \ - rbp_r_p = rbp_r_c; \ - rbp_r_c = rbp_right_get(a_type, a_field, rbp_r_c); \ - } \ - } \ - } \ - } \ - /* Update root. */\ - (a_tree)->rbt_root = rbp_left_get(a_type, a_field, &rbp_r_s); \ -} while (0) - -/* - * The rb_wrap() macro provides a convenient way to wrap functions around the - * cpp macros. The main benefits of wrapping are that 1) repeated macro - * expansion can cause code bloat, especially for rb_{insert,remove)(), and - * 2) type, linkage, comparison functions, etc. need not be specified at every - * call point. - */ - -#define rb_wrap(a_attr, a_prefix, a_tree_type, a_type, a_field, a_cmp) \ -a_attr void \ -a_prefix##new(a_tree_type *tree) { \ - rb_new(a_type, a_field, tree); \ -} \ -a_attr a_type * \ -a_prefix##first(a_tree_type *tree) { \ - a_type *ret; \ - rb_first(a_type, a_field, tree, ret); \ - return (ret); \ -} \ -a_attr a_type * \ -a_prefix##last(a_tree_type *tree) { \ - a_type *ret; \ - rb_last(a_type, a_field, tree, ret); \ - return (ret); \ -} \ -a_attr a_type * \ -a_prefix##next(a_tree_type *tree, a_type *node) { \ - a_type *ret; \ - rb_next(a_type, a_field, a_cmp, tree, node, ret); \ - return (ret); \ -} \ -a_attr a_type * \ -a_prefix##prev(a_tree_type *tree, a_type *node) { \ - a_type *ret; \ - rb_prev(a_type, a_field, a_cmp, tree, node, ret); \ - return (ret); \ -} \ -a_attr a_type * \ -a_prefix##search(a_tree_type *tree, a_type *key) { \ - a_type *ret; \ - rb_search(a_type, a_field, a_cmp, tree, key, ret); \ - return (ret); \ -} \ -a_attr a_type * \ -a_prefix##nsearch(a_tree_type *tree, a_type *key) { \ - a_type *ret; \ - rb_nsearch(a_type, a_field, a_cmp, tree, key, ret); \ - return (ret); \ -} \ -a_attr a_type * \ -a_prefix##psearch(a_tree_type *tree, a_type *key) { \ - a_type *ret; \ - rb_psearch(a_type, a_field, a_cmp, tree, key, ret); \ - return (ret); \ -} \ -a_attr void \ -a_prefix##insert(a_tree_type *tree, a_type *node) { \ - rb_insert(a_type, a_field, a_cmp, tree, node); \ -} \ -a_attr void \ -a_prefix##remove(a_tree_type *tree, a_type *node) { \ - rb_remove(a_type, a_field, a_cmp, tree, node); \ -} - -/* - * The iterators simulate recursion via an array of pointers that store the - * current path. This is critical to performance, since a series of calls to - * rb_{next,prev}() would require time proportional to (n lg n), whereas this - * implementation only requires time proportional to (n). - * - * Since the iterators cache a path down the tree, any tree modification may - * cause the cached path to become invalid. In order to continue iteration, - * use something like the following sequence: - * - * { - * a_type *node, *tnode; - * - * rb_foreach_begin(a_type, a_field, a_tree, node) { - * ... - * rb_next(a_type, a_field, a_cmp, a_tree, node, tnode); - * rb_remove(a_type, a_field, a_cmp, a_tree, node); - * rb_foreach_next(a_type, a_field, a_cmp, a_tree, tnode); - * ... - * } rb_foreach_end(a_type, a_field, a_tree, node) - * } - * - * Note that this idiom is not advised if every iteration modifies the tree, - * since in that case there is no algorithmic complexity improvement over a - * series of rb_{next,prev}() calls, thus making the setup overhead wasted - * effort. - */ - -#define rb_foreach_begin(a_type, a_field, a_tree, a_var) { /* brace A */ \ - /* Compute the maximum possible tree depth (3X the black height). */\ - unsigned rbp_f_height; \ - rbp_black_height(a_type, a_field, a_tree, rbp_f_height); \ - rbp_f_height *= 3; \ - { /* brace B */ \ - /* Initialize the path to contain the left spine. */\ - a_type *rbp_f_path[rbp_f_height]; \ - a_type *rbp_f_node; \ - bool rbp_f_synced = false; \ - unsigned rbp_f_depth = 0; \ - if ((a_tree)->rbt_root != &(a_tree)->rbt_nil) { \ - rbp_f_path[rbp_f_depth] = (a_tree)->rbt_root; \ - rbp_f_depth++; \ - while ((rbp_f_node = rbp_left_get(a_type, a_field, \ - rbp_f_path[rbp_f_depth-1])) != &(a_tree)->rbt_nil) { \ - rbp_f_path[rbp_f_depth] = rbp_f_node; \ - rbp_f_depth++; \ - } \ - } \ - /* While the path is non-empty, iterate. */\ - while (rbp_f_depth > 0) { /* brace C */ \ - (a_var) = rbp_f_path[rbp_f_depth-1]; - -/* - * Note that rb_foreach_begin omits closing }'s because - * it expects that it will be succeeded by a call to - * rb_foreach_end which will have the closing } - */ - -/* Only use if modifying the tree during iteration. */ -#define rb_foreach_next(a_type, a_field, a_cmp, a_tree, a_node) \ - /* Re-initialize the path to contain the path to a_node. */\ - rbp_f_depth = 0; \ - if (a_node != NULL) { \ - if ((a_tree)->rbt_root != &(a_tree)->rbt_nil) { \ - rbp_f_path[rbp_f_depth] = (a_tree)->rbt_root; \ - rbp_f_depth++; \ - rbp_f_node = rbp_f_path[0]; \ - while (true) { \ - int rbp_f_cmp = (a_cmp)((a_node), \ - rbp_f_path[rbp_f_depth-1]); \ - if (rbp_f_cmp < 0) { \ - rbp_f_node = rbp_left_get(a_type, a_field, \ - rbp_f_path[rbp_f_depth-1]); \ - } else if (rbp_f_cmp > 0) { \ - rbp_f_node = rbp_right_get(a_type, a_field, \ - rbp_f_path[rbp_f_depth-1]); \ - } else { \ - break; \ - } \ - assert(rbp_f_node != &(a_tree)->rbt_nil); \ - rbp_f_path[rbp_f_depth] = rbp_f_node; \ - rbp_f_depth++; \ - } \ - } \ - } \ - rbp_f_synced = true; - -#define rb_foreach_end(a_type, a_field, a_tree, a_var) \ - if (rbp_f_synced) { \ - rbp_f_synced = false; \ - continue; \ - } \ - /* Find the successor. */\ - if ((rbp_f_node = rbp_right_get(a_type, a_field, \ - rbp_f_path[rbp_f_depth-1])) != &(a_tree)->rbt_nil) { \ - /* The successor is the left-most node in the right */\ - /* subtree. */\ - rbp_f_path[rbp_f_depth] = rbp_f_node; \ - rbp_f_depth++; \ - while ((rbp_f_node = rbp_left_get(a_type, a_field, \ - rbp_f_path[rbp_f_depth-1])) != &(a_tree)->rbt_nil) { \ - rbp_f_path[rbp_f_depth] = rbp_f_node; \ - rbp_f_depth++; \ - } \ - } else { \ - /* The successor is above the current node. Unwind */\ - /* until a left-leaning edge is removed from the */\ - /* path, or the path is empty. */\ - for (rbp_f_depth--; rbp_f_depth > 0; rbp_f_depth--) { \ - if (rbp_left_get(a_type, a_field, rbp_f_path[rbp_f_depth-1]) \ - == rbp_f_path[rbp_f_depth]) { \ - break; \ - } \ - } \ - } \ - } /* close brace C */ \ - } /* close brace B */ \ -} /* close brace A */ - - - -#define rb_foreach_reverse_begin(a_type, a_field, a_tree, a_var) { /* brace A */ \ - /* Compute the maximum possible tree depth (3X the black height). */\ - unsigned rbp_fr_height; \ - rbp_black_height(a_type, a_field, a_tree, rbp_fr_height); \ - rbp_fr_height *= 3; \ - { /* brace B */ \ - /* Initialize the path to contain the right spine. */\ - a_type *rbp_fr_path[rbp_fr_height]; \ - a_type *rbp_fr_node; \ - bool rbp_fr_synced = false; \ - unsigned rbp_fr_depth = 0; \ - if ((a_tree)->rbt_root != &(a_tree)->rbt_nil) { \ - rbp_fr_path[rbp_fr_depth] = (a_tree)->rbt_root; \ - rbp_fr_depth++; \ - while ((rbp_fr_node = rbp_right_get(a_type, a_field, \ - rbp_fr_path[rbp_fr_depth-1])) != &(a_tree)->rbt_nil) { \ - rbp_fr_path[rbp_fr_depth] = rbp_fr_node; \ - rbp_fr_depth++; \ - } \ - } \ - /* While the path is non-empty, iterate. */\ - while (rbp_fr_depth > 0) { /* brace C */ \ - (a_var) = rbp_fr_path[rbp_fr_depth-1]; - - -/* Only use if modifying the tree during iteration. */ -#define rb_foreach_reverse_prev(a_type, a_field, a_cmp, a_tree, a_node) \ - /* Re-initialize the path to contain the path to a_node. */\ - rbp_fr_depth = 0; \ - if (a_node != NULL) { \ - if ((a_tree)->rbt_root != &(a_tree)->rbt_nil) { \ - rbp_fr_path[rbp_fr_depth] = (a_tree)->rbt_root; \ - rbp_fr_depth++; \ - rbp_fr_node = rbp_fr_path[0]; \ - while (true) { \ - int rbp_fr_cmp = (a_cmp)((a_node), rbp_fr_path[rbp_fr_depth-1]); \ - if (rbp_fr_cmp < 0) { \ - rbp_fr_node = rbp_left_get(a_type, a_field, \ - rbp_fr_path[rbp_fr_depth-1]); \ - } else if (rbp_fr_cmp > 0) { \ - rbp_fr_node = rbp_right_get(a_type, a_field, rbp_fr_path[rbp_fr_depth-1]); \ - } else { \ - break; \ - } \ - assert(rbp_fr_node != &(a_tree)->rbt_nil); \ - rbp_fr_path[rbp_fr_depth] = rbp_fr_node; \ - rbp_fr_depth++; \ - } \ - } \ - } \ - rbp_fr_synced = true; - -#define rb_foreach_reverse_end(a_type, a_field, a_tree, a_var) \ - if (rbp_fr_synced) { \ - rbp_fr_synced = false; \ - continue; \ - } \ - if (rbp_fr_depth == 0) { \ - /* rb_foreach_reverse_sync() was called with a NULL */\ - /* a_node. */\ - break; \ - } \ - /* Find the predecessor. */\ - if ((rbp_fr_node = rbp_left_get(a_type, a_field, \ - rbp_fr_path[rbp_fr_depth-1])) != &(a_tree)->rbt_nil) { \ - /* The predecessor is the right-most node in the left */\ - /* subtree. */\ - rbp_fr_path[rbp_fr_depth] = rbp_fr_node; \ - rbp_fr_depth++; \ - while ((rbp_fr_node = rbp_right_get(a_type, a_field, \ - rbp_fr_path[rbp_fr_depth-1])) != &(a_tree)->rbt_nil) {\ - rbp_fr_path[rbp_fr_depth] = rbp_fr_node; \ - rbp_fr_depth++; \ - } \ - } else { \ - /* The predecessor is above the current node. Unwind */\ - /* until a right-leaning edge is removed from the */\ - /* path, or the path is empty. */\ - for (rbp_fr_depth--; rbp_fr_depth > 0; rbp_fr_depth--) {\ - if (rbp_right_get(a_type, a_field, rbp_fr_path[rbp_fr_depth-1]) \ - == rbp_fr_path[rbp_fr_depth]) { \ - break; \ - } \ - } \ - } \ - } /* Close brace C */ \ - } /* close brace B */ \ -} /* close brace A*/ - -#endif /* RB_H_ */ diff --git a/bsd/i386/Makefile b/bsd/i386/Makefile index 96c9ddbb8..5dd87eecc 100644 --- a/bsd/i386/Makefile +++ b/bsd/i386/Makefile @@ -10,12 +10,14 @@ include $(MakeInc_def) DATAFILES = \ endian.h fasttrap_isa.h param.h \ profile.h setjmp.h signal.h limits.h _limits.h \ - types.h vmparam.h _structs.h _types.h _param.h + types.h vmparam.h _structs.h _types.h _param.h \ + _mcontext.h KERNELFILES = \ endian.h param.h \ profile.h setjmp.h signal.h limits.h _limits.h \ - types.h vmparam.h _structs.h _types.h _param.h + types.h vmparam.h _structs.h _types.h _param.h \ + _mcontext.h INSTALL_MD_LIST = ${DATAFILES} diff --git a/bsd/i386/_mcontext.h b/bsd/i386/_mcontext.h new file mode 100644 index 000000000..917e80d43 --- /dev/null +++ b/bsd/i386/_mcontext.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef __I386_MCONTEXT_H_ +#define __I386_MCONTEXT_H_ + +#include +#include + +#ifndef _STRUCT_MCONTEXT32 +#if __DARWIN_UNIX03 +#define _STRUCT_MCONTEXT32 struct __darwin_mcontext32 +_STRUCT_MCONTEXT32 +{ + _STRUCT_X86_EXCEPTION_STATE32 __es; + _STRUCT_X86_THREAD_STATE32 __ss; + _STRUCT_X86_FLOAT_STATE32 __fs; +}; + +#define _STRUCT_MCONTEXT_AVX32 struct __darwin_mcontext_avx32 +_STRUCT_MCONTEXT_AVX32 +{ + _STRUCT_X86_EXCEPTION_STATE32 __es; + _STRUCT_X86_THREAD_STATE32 __ss; + _STRUCT_X86_AVX_STATE32 __fs; +}; + +#else /* !__DARWIN_UNIX03 */ +#define _STRUCT_MCONTEXT32 struct mcontext32 +_STRUCT_MCONTEXT32 +{ + _STRUCT_X86_EXCEPTION_STATE32 es; + _STRUCT_X86_THREAD_STATE32 ss; + _STRUCT_X86_FLOAT_STATE32 fs; +}; + +#define _STRUCT_MCONTEXT_AVX32 struct mcontext_avx32 +_STRUCT_MCONTEXT_AVX32 +{ + _STRUCT_X86_EXCEPTION_STATE32 es; + _STRUCT_X86_THREAD_STATE32 ss; + _STRUCT_X86_AVX_STATE32 fs; +}; + +#endif /* __DARWIN_UNIX03 */ +#endif /* _STRUCT_MCONTEXT32 */ + +#ifndef _STRUCT_MCONTEXT64 +#if __DARWIN_UNIX03 +#define _STRUCT_MCONTEXT64 struct __darwin_mcontext64 +_STRUCT_MCONTEXT64 +{ + _STRUCT_X86_EXCEPTION_STATE64 __es; + _STRUCT_X86_THREAD_STATE64 __ss; + _STRUCT_X86_FLOAT_STATE64 __fs; +}; + +#define _STRUCT_MCONTEXT_AVX64 struct __darwin_mcontext_avx64 +_STRUCT_MCONTEXT_AVX64 +{ + _STRUCT_X86_EXCEPTION_STATE64 __es; + _STRUCT_X86_THREAD_STATE64 __ss; + _STRUCT_X86_AVX_STATE64 __fs; +}; + +#else /* !__DARWIN_UNIX03 */ +#define _STRUCT_MCONTEXT64 struct mcontext64 +_STRUCT_MCONTEXT64 +{ + _STRUCT_X86_EXCEPTION_STATE64 es; + _STRUCT_X86_THREAD_STATE64 ss; + _STRUCT_X86_FLOAT_STATE64 fs; +}; + +#define _STRUCT_MCONTEXT_AVX64 struct mcontext_avx64 +_STRUCT_MCONTEXT_AVX64 +{ + _STRUCT_X86_EXCEPTION_STATE64 es; + _STRUCT_X86_THREAD_STATE64 ss; + _STRUCT_X86_AVX_STATE64 fs; +}; + +#endif /* __DARWIN_UNIX03 */ +#endif /* _STRUCT_MCONTEXT64 */ + + +#ifndef _MCONTEXT_T +#define _MCONTEXT_T +#if defined(__LP64__) +typedef _STRUCT_MCONTEXT64 *mcontext_t; +#define _STRUCT_MCONTEXT _STRUCT_MCONTEXT64 +#else +typedef _STRUCT_MCONTEXT32 *mcontext_t; +#define _STRUCT_MCONTEXT _STRUCT_MCONTEXT32 +#endif +#endif /* _MCONTEXT_T */ + + +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#ifndef I386_MCONTEXT_SIZE +#define I386_MCONTEXT_SIZE sizeof(struct mcontext) +#endif /* I386_MCONTEXT_SIZE */ +#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ + +#endif /* __I386_MCONTEXT_H_ */ diff --git a/bsd/i386/_structs.h b/bsd/i386/_structs.h index 3bdac83bc..36e42cb24 100644 --- a/bsd/i386/_structs.h +++ b/bsd/i386/_structs.h @@ -26,116 +26,4 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#include - -#ifdef __need_mcontext_t -#ifndef __need_struct_mcontext -#define __need_struct_mcontext -#endif /* __need_struct_mcontext */ -#endif /* __need_mcontext_t */ - -#if defined(__need_struct_mcontext) -#include -#endif /* __need_struct_mcontext */ - - -#ifdef __need_struct_mcontext -#undef __need_struct_mcontext - -#ifndef _STRUCT_MCONTEXT32 -#if __DARWIN_UNIX03 -#define _STRUCT_MCONTEXT32 struct __darwin_mcontext32 -_STRUCT_MCONTEXT32 -{ - _STRUCT_X86_EXCEPTION_STATE32 __es; - _STRUCT_X86_THREAD_STATE32 __ss; - _STRUCT_X86_FLOAT_STATE32 __fs; -}; - -#define _STRUCT_MCONTEXT_AVX32 struct __darwin_mcontext_avx32 -_STRUCT_MCONTEXT_AVX32 -{ - _STRUCT_X86_EXCEPTION_STATE32 __es; - _STRUCT_X86_THREAD_STATE32 __ss; - _STRUCT_X86_AVX_STATE32 __fs; -}; - -#else /* !__DARWIN_UNIX03 */ -#define _STRUCT_MCONTEXT32 struct mcontext32 -_STRUCT_MCONTEXT32 -{ - _STRUCT_X86_EXCEPTION_STATE32 es; - _STRUCT_X86_THREAD_STATE32 ss; - _STRUCT_X86_FLOAT_STATE32 fs; -}; - -#define _STRUCT_MCONTEXT_AVX32 struct mcontext_avx32 -_STRUCT_MCONTEXT_AVX32 -{ - _STRUCT_X86_EXCEPTION_STATE32 es; - _STRUCT_X86_THREAD_STATE32 ss; - _STRUCT_X86_AVX_STATE32 fs; -}; - -#endif /* __DARWIN_UNIX03 */ -#endif /* _STRUCT_MCONTEXT32 */ - -#ifndef _STRUCT_MCONTEXT64 -#if __DARWIN_UNIX03 -#define _STRUCT_MCONTEXT64 struct __darwin_mcontext64 -_STRUCT_MCONTEXT64 -{ - _STRUCT_X86_EXCEPTION_STATE64 __es; - _STRUCT_X86_THREAD_STATE64 __ss; - _STRUCT_X86_FLOAT_STATE64 __fs; -}; - -#define _STRUCT_MCONTEXT_AVX64 struct __darwin_mcontext_avx64 -_STRUCT_MCONTEXT_AVX64 -{ - _STRUCT_X86_EXCEPTION_STATE64 __es; - _STRUCT_X86_THREAD_STATE64 __ss; - _STRUCT_X86_AVX_STATE64 __fs; -}; - -#else /* !__DARWIN_UNIX03 */ -#define _STRUCT_MCONTEXT64 struct mcontext64 -_STRUCT_MCONTEXT64 -{ - _STRUCT_X86_EXCEPTION_STATE64 es; - _STRUCT_X86_THREAD_STATE64 ss; - _STRUCT_X86_FLOAT_STATE64 fs; -}; - -#define _STRUCT_MCONTEXT_AVX64 struct mcontext_avx64 -_STRUCT_MCONTEXT_AVX64 -{ - _STRUCT_X86_EXCEPTION_STATE64 es; - _STRUCT_X86_THREAD_STATE64 ss; - _STRUCT_X86_AVX_STATE64 fs; -}; - -#endif /* __DARWIN_UNIX03 */ -#endif /* _STRUCT_MCONTEXT64 */ -#endif /* __need_struct_mcontext */ - -#ifdef __need_mcontext_t -#undef __need_mcontext_t -#ifndef _MCONTEXT_T -#define _MCONTEXT_T -#if defined(__LP64__) -typedef _STRUCT_MCONTEXT64 *mcontext_t; -#define _STRUCT_MCONTEXT _STRUCT_MCONTEXT64 -#else -typedef _STRUCT_MCONTEXT32 *mcontext_t; -#define _STRUCT_MCONTEXT _STRUCT_MCONTEXT32 -#endif -#endif /* _MCONTEXT_T */ -#endif /* __need_mcontext_t */ - -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#ifndef I386_MCONTEXT_SIZE -#define I386_MCONTEXT_SIZE sizeof(struct mcontext) -#endif /* I386_MCONTEXT_SIZE */ -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ - +#include diff --git a/bsd/i386/_types.h b/bsd/i386/_types.h index 67741d5d9..4df007203 100644 --- a/bsd/i386/_types.h +++ b/bsd/i386/_types.h @@ -80,13 +80,15 @@ typedef union { typedef __mbstate_t __darwin_mbstate_t; /* mbstate_t */ -#if defined(__GNUC__) && defined(__PTRDIFF_TYPE__) +#if defined(__PTRDIFF_TYPE__) typedef __PTRDIFF_TYPE__ __darwin_ptrdiff_t; /* ptr1 - ptr2 */ +#elif defined(__LP64__) +typedef long __darwin_ptrdiff_t; /* ptr1 - ptr2 */ #else typedef int __darwin_ptrdiff_t; /* ptr1 - ptr2 */ #endif /* __GNUC__ */ -#if defined(__GNUC__) && defined(__SIZE_TYPE__) +#if defined(__SIZE_TYPE__) typedef __SIZE_TYPE__ __darwin_size_t; /* sizeof() */ #else typedef unsigned long __darwin_size_t; /* sizeof() */ @@ -98,7 +100,7 @@ typedef __builtin_va_list __darwin_va_list; /* va_list */ typedef void * __darwin_va_list; /* va_list */ #endif -#if defined(__GNUC__) && defined(__WCHAR_TYPE__) +#if defined(__WCHAR_TYPE__) typedef __WCHAR_TYPE__ __darwin_wchar_t; /* wchar_t */ #else typedef __darwin_ct_rune_t __darwin_wchar_t; /* wchar_t */ @@ -106,7 +108,7 @@ typedef __darwin_ct_rune_t __darwin_wchar_t; /* wchar_t */ typedef __darwin_wchar_t __darwin_rune_t; /* rune_t */ -#if defined(__GNUC__) && defined(__WINT_TYPE__) +#if defined(__WINT_TYPE__) typedef __WINT_TYPE__ __darwin_wint_t; /* wint_t */ #else typedef __darwin_ct_rune_t __darwin_wint_t; /* wint_t */ diff --git a/bsd/i386/param.h b/bsd/i386/param.h index 0eae0fea5..c4c906bdb 100644 --- a/bsd/i386/param.h +++ b/bsd/i386/param.h @@ -112,11 +112,7 @@ #define MCLOFSET (MCLBYTES - 1) #ifndef NMBCLUSTERS -#ifdef GATEWAY #define NMBCLUSTERS ((1024 * 1024) / MCLBYTES) /* cl map size: 1MB */ -#else -#define NMBCLUSTERS ((1024 * 512) / MCLBYTES) /* cl map size: 0.5MB */ -#endif #endif /* diff --git a/bsd/i386/setjmp.h b/bsd/i386/setjmp.h index e28e4d679..ad23a339b 100644 --- a/bsd/i386/setjmp.h +++ b/bsd/i386/setjmp.h @@ -56,15 +56,8 @@ typedef int sigjmp_buf[_JBLEN + 1]; * onstack, mask = 2 ints */ -#if defined(KERNEL) +#if !defined(KERNEL) -#define __need_struct_sigcontext -#include - -typedef struct sigcontext jmp_buf[1]; -#define _JBLEN ((sizeof(struct sigcontext)) / sizeof(int)) -typedef int sigjmp_buf[_JBLEN+1]; -#else #define _JBLEN (18) typedef int jmp_buf[_JBLEN]; typedef int sigjmp_buf[_JBLEN + 1]; diff --git a/bsd/i386/signal.h b/bsd/i386/signal.h index 841988884..2e3ce85ec 100644 --- a/bsd/i386/signal.h +++ b/bsd/i386/signal.h @@ -38,24 +38,6 @@ #ifndef _ANSI_SOURCE typedef int sig_atomic_t; -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) - -#include - -#ifdef __APPLE_API_OBSOLETE - -/* - * Information pushed on stack when a signal is delivered. - * This is used by the kernel to restore state following - * execution of the signal handler. It is also made available - * to the handler to allow it to properly restore state if - * a non-standard exit is performed. - */ -#define __need_struct_sigcontext -#include - -#endif /* __APPLE_API_OBSOLETE */ -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ #endif /* ! _ANSI_SOURCE */ #endif /* _I386_SIGNAL_H_ */ diff --git a/bsd/i386/types.h b/bsd/i386/types.h index 3e31ca89c..301114f86 100644 --- a/bsd/i386/types.h +++ b/bsd/i386/types.h @@ -73,25 +73,14 @@ * Basic integral types. Omit the typedef if * not possible for a machine/compiler combination. */ -#ifndef _INT8_T -#define _INT8_T -typedef __signed char int8_t; -#endif +#include +#include +#include +#include + typedef unsigned char u_int8_t; -#ifndef _INT16_T -#define _INT16_T -typedef short int16_t; -#endif typedef unsigned short u_int16_t; -#ifndef _INT32_T -#define _INT32_T -typedef int int32_t; -#endif typedef unsigned int u_int32_t; -#ifndef _INT64_T -#define _INT64_T -typedef long long int64_t; -#endif typedef unsigned long long u_int64_t; #if __LP64__ @@ -100,14 +89,8 @@ typedef int64_t register_t; typedef int32_t register_t; #endif -#ifndef _INTPTR_T -#define _INTPTR_T -typedef __darwin_intptr_t intptr_t; -#endif -#ifndef _UINTPTR_T -#define _UINTPTR_T -typedef unsigned long uintptr_t; -#endif +#include +#include #if !defined(_ANSI_SOURCE) && (!defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)) /* These types are used for reserving the largest possible size. */ @@ -162,9 +145,7 @@ typedef __int64_t user32_off_t __attribute__((aligned(4))); /* This defines the size of syscall arguments after copying into the kernel: */ typedef u_int64_t syscall_arg_t; -#ifndef __offsetof -#define __offsetof(type, field) ((size_t)(&((type *)0)->field)) -#endif +#include #endif /* __ASSEMBLER__ */ #endif /* _MACHTYPES_H_ */ diff --git a/bsd/kern/Makefile b/bsd/kern/Makefile index c7eecbb12..2e14acc85 100644 --- a/bsd/kern/Makefile +++ b/bsd/kern/Makefile @@ -9,18 +9,15 @@ include $(MakeInc_def) INSTALL_SHARE_MISC_LIST = \ trace.codes -include $(MakeInc_rule) -include $(MakeInc_dir) - -SHARE_MISC_DIR = usr/share/misc - INSTALL_SHARE_MISC_FILES = \ - $(addprefix $(DSTROOT)/$(SHARE_MISC_DIR)/, $(INSTALL_SHARE_MISC_LIST)) + $(addprefix $(DSTROOT)/$(INSTALL_SHARE_MISC_DIR)/, $(INSTALL_SHARE_MISC_LIST)) -$(INSTALL_SHARE_MISC_FILES): $(DSTROOT)/$(SHARE_MISC_DIR)/% : % - @echo Installing $< in $(dir $@) - $(_v) $(MKDIR) $(DSTROOT)/$(SHARE_MISC_DIR); \ - $(RM) $(RMFLAGS) $@; \ - $(INSTALL) $(INSTALL_FLAGS) $< $(dir $@); +$(INSTALL_SHARE_MISC_FILES): $(DSTROOT)/$(INSTALL_SHARE_MISC_DIR)/% : % + $(_v)$(MKDIR) $(DSTROOT)/$(INSTALL_SHARE_MISC_DIR) + @echo INSTALL $(@F) + $(_v)$(INSTALL) $(INSTALL_FLAGS) $< $@ -do_build_install: $(INSTALL_SHARE_MISC_FILES) +do_build_install_primary:: $(INSTALL_SHARE_MISC_FILES) + +include $(MakeInc_rule) +include $(MakeInc_dir) diff --git a/bsd/kern/bsd_init.c b/bsd/kern/bsd_init.c index 2a04fff66..ace8c58ba 100644 --- a/bsd/kern/bsd_init.c +++ b/bsd/kern/bsd_init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -144,6 +144,7 @@ #include /* for domaininit() */ #include /* for thread_wakeup() */ #include /* for ether_family_init() */ +#include /* for gif_init() */ #include /* for vnode_pager_bootstrap() */ #include /* for devfs_kernel_mount() */ #include /* for host_set_exception_ports() */ @@ -152,11 +153,15 @@ #include /* for psem_lock_init() */ #include /* for log_setsize() */ #include /* for tty_init() */ +#include /* proc_uuid_policy_init() */ +#include /* flow_divert_init() */ #include /* for utun_register_control() */ +#include /* for ipsec_register_control() */ #include /* for net_str_id_init() */ #include /* for netsrc_init() */ #include /* for nstat_init() */ #include /* for assert() */ +#include /* for init_system_override() */ #include @@ -232,16 +237,13 @@ char rootdevice[16]; /* hfs device names have at least 9 chars */ struct kmemstats kmemstats[M_LAST]; #endif -int lbolt; /* awoken once a second */ struct vnode *rootvp; int boothowto = RB_DEBUG; -void lightning_bolt(void *); extern kern_return_t IOFindBSDRoot(char *, unsigned int, dev_t *, u_int32_t *); extern void IOSecureBSDRoot(const char * rootName); extern kern_return_t IOKitBSDInit(void ); extern void kminit(void); -extern void klogwakeup(void); extern void file_lock_init(void); extern void kmeminit(void); extern void bsd_bufferinit(void); @@ -259,7 +261,7 @@ __private_extern__ int execargs_cache_size = 0; __private_extern__ int execargs_free_count = 0; __private_extern__ vm_offset_t * execargs_cache = NULL; -void bsd_exec_setup(int) __attribute__((aligned(4096))); +void bsd_exec_setup(int); __private_extern__ int bootarg_vnode_cache_defeat = 0; @@ -273,9 +275,9 @@ __private_extern__ int bootarg_disable_aslr = 0; int cmask = CMASK; extern int customnbuf; -void bsd_init(void) __attribute__((section("__TEXT, initcode"))); -kern_return_t bsd_autoconf(void) __attribute__((section("__TEXT, initcode"))); -void bsd_utaskbootstrap(void) __attribute__((section("__TEXT, initcode"))); +void bsd_init(void); +kern_return_t bsd_autoconf(void); +void bsd_utaskbootstrap(void); static void parse_bsd_args(void); extern task_t bsd_init_task; @@ -313,9 +315,6 @@ extern int check_policy_init(int); #endif #endif /* CONFIG_MACF */ -extern void stackshot_lock_init(void); - - /* If we are using CONFIG_DTRACE */ #if CONFIG_DTRACE extern void dtrace_postinit(void); @@ -352,7 +351,7 @@ struct rlimit vm_initial_limit_stack = { DFLSSIZ, MAXSSIZ - PAGE_SIZE }; struct rlimit vm_initial_limit_data = { DFLDSIZ, MAXDSIZ }; struct rlimit vm_initial_limit_core = { DFLCSIZ, MAXCSIZ }; -extern thread_t cloneproc(task_t, proc_t, int); +extern thread_t cloneproc(task_t, proc_t, int, int); extern int (*mountroot)(void); lck_grp_t * proc_lck_grp; @@ -384,7 +383,6 @@ void (*unmountroot_pre_hook)(void); * of the uu_context.vc_ucred field so that the uthread structure can be * used like any other. */ -extern void run_bringup_tests(void); extern void IOServicePublishResource(const char *, boolean_t); @@ -500,6 +498,9 @@ bsd_init(void) #endif #endif /* MAC */ + /* Initialize System Override call */ + init_system_override(); + /* * Create process 0. */ @@ -567,9 +568,10 @@ bsd_init(void) bzero(&temp_cred, sizeof(temp_cred)); bzero(&temp_pcred, sizeof(temp_pcred)); temp_pcred.cr_ngroups = 1; - + /* kern_proc, shouldn't call up to DS for group membership */ + temp_pcred.cr_flags = CRF_NOMEMBERD; temp_cred.cr_audit.as_aia_p = audit_default_aia_p; - + bsd_init_kprintf("calling kauth_cred_create\n"); /* * We have to label the temp cred before we create from it to @@ -601,7 +603,6 @@ bsd_init(void) #endif /* Create the file descriptor table. */ - filedesc0.fd_refcnt = 1+1; /* +1 so shutdown will not _FREE_ZONE */ kernproc->p_fd = &filedesc0; filedesc0.fd_cmask = cmask; filedesc0.fd_knlistsize = -1; @@ -685,6 +686,12 @@ bsd_init(void) bsd_init_kprintf("calling vfsinit\n"); vfsinit(); +#if CONFIG_PROC_UUID_POLICY + /* Initial proc_uuid_policy subsystem */ + bsd_init_kprintf("calling proc_uuid_policy_init()\n"); + proc_uuid_policy_init(); +#endif + #if SOCKETS /* Initialize per-CPU cache allocator */ mcache_init(); @@ -747,8 +754,6 @@ bsd_init(void) bsd_init_kprintf("calling select_wait_queue_init\n"); select_wait_queue_init(); - /* Stack snapshot facility lock */ - stackshot_lock_init(); /* * Initialize protocols. Block reception of incoming packets * until everything is ready. @@ -769,6 +774,9 @@ bsd_init(void) bsd_init_kprintf("calling domaininit\n"); domaininit(); iptap_init(); +#if FLOW_DIVERT + flow_divert_init(); +#endif /* FLOW_DIVERT */ #endif /* SOCKETS */ kernproc->p_fd->fd_cdir = NULL; @@ -794,10 +802,6 @@ bsd_init(void) kmstartup(); #endif - /* kick off timeout driven events by calling first time */ - thread_wakeup(&lbolt); - timeout(lightning_bolt, 0, hz); - bsd_init_kprintf("calling bsd_autoconf\n"); bsd_autoconf(); @@ -815,6 +819,10 @@ bsd_init(void) bsd_init_kprintf("calling loopattach\n"); loopattach(); /* XXX */ #endif +#if NGIF + /* Initialize gif interface (after lo0) */ + gif_init(); +#endif #if PFLOG /* Initialize packet filter log interface */ @@ -834,6 +842,9 @@ bsd_init(void) /* register user tunnel kernel control handler */ utun_register_control(); +#if IPSEC + ipsec_register_control(); +#endif /* IPSEC */ netsrc_init(); nstat_init(); #endif /* NETWORKING */ @@ -932,8 +943,7 @@ bsd_init(void) #endif /* CONFIG_IMAGEBOOT */ /* set initial time; all other resource data is already zero'ed */ - microtime(&kernproc->p_start); - kernproc->p_stats->p_start = kernproc->p_start; /* for compat */ + microtime_with_abstime(&kernproc->p_start, &kernproc->p_stats->ps_start); #if DEVFS { @@ -953,7 +963,6 @@ bsd_init(void) #if defined(__LP64__) kernproc->p_flag |= P_LP64; - printf("Kernel is LP64\n"); #endif pal_kernel_announce(); @@ -969,6 +978,7 @@ bsd_init(void) #endif bsd_init_kprintf("done\n"); + } /* Called with kernel funnel held */ @@ -1003,20 +1013,6 @@ bsdinit_task(void) lock_trace = 1; } -void -lightning_bolt(__unused void *dummy) -{ - boolean_t funnel_state; - - funnel_state = thread_funnel_set(kernel_flock, TRUE); - - thread_wakeup(&lbolt); - timeout(lightning_bolt,0,hz); - klogwakeup(); - - (void) thread_funnel_set(kernel_flock, FALSE); -} - kern_return_t bsd_autoconf(void) { @@ -1083,7 +1079,7 @@ bsd_utaskbootstrap(void) * Clone the bootstrap process from the kernel process, without * inheriting either task characteristics or memory from the kernel; */ - thread = cloneproc(TASK_NULL, kernproc, FALSE); + thread = cloneproc(TASK_NULL, kernproc, FALSE, TRUE); /* Hold the reference as it will be dropped during shutdown */ initproc = proc_find(1); @@ -1110,7 +1106,7 @@ parse_bsd_args(void) char namep[16]; int msgbuf; - if (PE_parse_boot_argn("-s", namep, sizeof (namep))) + if ( PE_parse_boot_argn("-s", namep, sizeof (namep))) boothowto |= RB_SINGLE; if (PE_parse_boot_argn("-b", namep, sizeof (namep))) @@ -1150,6 +1146,7 @@ parse_bsd_args(void) if (PE_parse_boot_argn("-novfscache", namep, sizeof(namep))) { nc_disabled = 1; } + } void diff --git a/bsd/kern/decmpfs.c b/bsd/kern/decmpfs.c index ef8057a4e..582298c17 100644 --- a/bsd/kern/decmpfs.c +++ b/bsd/kern/decmpfs.c @@ -668,11 +668,11 @@ decmpfs_file_is_compressed(vnode_t vp, decmpfs_cnode *cp) return 0; } -// if (!vnode_isreg(vp)) { -// /* only regular files can be compressed */ -// ret = FILE_IS_NOT_COMPRESSED; -// goto done; -// } + if (!vnode_isreg(vp)) { + /* only regular files can be compressed */ + ret = FILE_IS_NOT_COMPRESSED; + goto done; + } mp = vnode_mount(vp); if (mp == NULL) { diff --git a/bsd/kern/kdebug.c b/bsd/kern/kdebug.c index 79896dbbe..71f1ae7c2 100644 --- a/bsd/kern/kdebug.c +++ b/bsd/kern/kdebug.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @Apple_LICENSE_HEADER_START@ * @@ -53,6 +53,7 @@ #include #include #include +#include #include #include @@ -72,6 +73,42 @@ #include +/* + * IOP(s) + * + * https://coreoswiki.apple.com/wiki/pages/U6z3i0q9/Consistent_Logging_Implementers_Guide.html + * + * IOP(s) are auxiliary cores that want to participate in kdebug event logging. + * They are registered dynamically. Each is assigned a cpu_id at registration. + * + * NOTE: IOP trace events may not use the same clock hardware as "normal" + * cpus. There is an effort made to synchronize the IOP timebase with the + * AP, but it should be understood that there may be discrepancies. + * + * Once registered, an IOP is permanent, it cannot be unloaded/unregistered. + * The current implementation depends on this for thread safety. + * + * New registrations occur by allocating an kd_iop struct and assigning + * a provisional cpu_id of list_head->cpu_id + 1. Then a CAS to claim the + * list_head pointer resolves any races. + * + * You may safely walk the kd_iops list at any time, without holding locks. + * + * When allocating buffers, the current kd_iops head is captured. Any operations + * that depend on the buffer state (such as flushing IOP traces on reads, + * etc.) should use the captured list head. This will allow registrations to + * take place while trace is in use. + */ + +typedef struct kd_iop { + kd_callback_t callback; + uint32_t cpu_id; + uint64_t last_timestamp; /* Prevent timer rollback */ + struct kd_iop* next; +} kd_iop_t; + +static kd_iop_t* kd_iops = NULL; + /* XXX should have prototypes, but Mach does not provide one */ void task_act_iterate_wth_args(task_t, void(*)(thread_t, void *), void *); int cpu_number(void); /* XXX include path broken */ @@ -81,16 +118,22 @@ int kdbg_read(user_addr_t, size_t *, vnode_t, vfs_context_t); void kdbg_control_chud(int, void *); int kdbg_control(int *, u_int, user_addr_t, size_t *); int kdbg_getentropy (user_addr_t, size_t *, int); -int kdbg_readmap(user_addr_t, size_t *, vnode_t, vfs_context_t); +int kdbg_readcpumap(user_addr_t, size_t *); +int kdbg_readcurcpumap(user_addr_t, size_t *); +int kdbg_readthrmap(user_addr_t, size_t *, vnode_t, vfs_context_t); +int kdbg_readcurthrmap(user_addr_t, size_t *); int kdbg_getreg(kd_regtype *); int kdbg_setreg(kd_regtype *); int kdbg_setrtcdec(kd_regtype *); int kdbg_setpidex(kd_regtype *); int kdbg_setpid(kd_regtype *); -void kdbg_mapinit(void); +void kdbg_thrmap_init(void); int kdbg_reinit(boolean_t); int kdbg_bootstrap(boolean_t); +int kdbg_cpumap_init_internal(kd_iop_t* iops, uint32_t cpu_count, uint8_t** cpumap, uint32_t* cpumap_size); +kd_threadmap* kdbg_thrmap_init_internal(unsigned int count, unsigned int *mapsize, unsigned int *mapcount); + static int kdbg_enable_typefilter(void); static int kdbg_disable_typefilter(void); @@ -103,7 +146,7 @@ extern void IOSleep(int); unsigned int kdebug_enable = 0; /* track timestamps for security server's entropy needs */ -uint64_t * kd_entropy_buffer = 0; +uint64_t * kd_entropy_buffer = 0; unsigned int kd_entropy_bufsize = 0; unsigned int kd_entropy_count = 0; unsigned int kd_entropy_indx = 0; @@ -111,21 +154,16 @@ vm_offset_t kd_entropy_buftomem = 0; #define MAX_ENTROPY_COUNT (128 * 1024) - #define SLOW_NOLOG 0x01 #define SLOW_CHECKS 0x02 #define SLOW_ENTROPY 0x04 #define SLOW_CHUD 0x08 -unsigned int kd_cpus; - #define EVENTS_PER_STORAGE_UNIT 2048 #define MIN_STORAGE_UNITS_PER_CPU 4 #define POINTER_FROM_KDS_PTR(x) (&kd_bufs[x.buffer_index].kdsb_addr[x.offset]) -#define NATIVE_TRACE_FACILITY - union kds_ptr { struct { uint32_t buffer_index:21; @@ -169,7 +207,7 @@ struct kd_bufinfo { uint32_t _pad; uint64_t kd_prev_timebase; uint32_t num_bufs; -} __attribute__(( aligned(CPU_CACHE_SIZE) )); +} __attribute__(( aligned(MAX_CPU_CACHE_LINE_SIZE) )); struct kd_ctrl_page_t { union kds_ptr kds_free_list; @@ -178,12 +216,17 @@ struct kd_ctrl_page_t { int kds_inuse_count; uint32_t kdebug_flags; uint32_t kdebug_slowcheck; - uint32_t _pad1; - struct { - uint64_t tsc_base; - uint64_t ns_base; - } cpu_timebase[32]; // should be max number of actual logical cpus -} kd_ctrl_page = {.kds_free_list = {.raw = KDS_PTR_NULL}, .enabled = 0, .kds_inuse_count = 0, .kdebug_flags = 0, .kdebug_slowcheck = SLOW_NOLOG}; + /* + * The number of kd_bufinfo structs allocated may not match the current + * number of active cpus. We capture the iops list head at initialization + * which we could use to calculate the number of cpus we allocated data for, + * unless it happens to be null. To avoid that case, we explicitly also + * capture a cpu count. + */ + kd_iop_t* kdebug_iops; + uint32_t kdebug_cpus; +} kd_ctrl_page = { .kds_free_list = {.raw = KDS_PTR_NULL}, .kdebug_slowcheck = SLOW_NOLOG }; + #pragma pack() struct kd_bufinfo *kdbip = NULL; @@ -192,9 +235,6 @@ struct kd_bufinfo *kdbip = NULL; #define KDCOPYBUF_SIZE (KDCOPYBUF_COUNT * sizeof(kd_buf)) kd_buf *kdcopybuf = NULL; - -int kdlog_sched_events = 0; - boolean_t kdlog_bg_trace = FALSE; boolean_t kdlog_bg_trace_running = FALSE; unsigned int bg_nkdbufs = 0; @@ -224,6 +264,8 @@ void *stackshot_snapbuf = NULL; int stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset, int32_t *retval); +int +stack_snapshot_from_kernel(pid_t pid, void *buf, uint32_t size, uint32_t flags, unsigned *bytesTraced); extern void kdp_snapshot_preflight(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset); @@ -235,14 +277,12 @@ kdp_stack_snapshot_bytes_traced(void); kd_threadmap *kd_mapptr = 0; unsigned int kd_mapsize = 0; unsigned int kd_mapcount = 0; -vm_offset_t kd_maptomem = 0; off_t RAW_file_offset = 0; int RAW_file_written = 0; #define RAW_FLUSH_SIZE (2 * 1024 * 1024) - pid_t global_state_pid = -1; /* Used to control exclusive use of kd_buffer */ #define DBG_FUNC_MASK 0xfffffffc @@ -293,10 +333,96 @@ typedef void (*kd_chudhook_fn) (uint32_t debugid, uintptr_t arg1, volatile kd_chudhook_fn kdebug_chudhook = 0; /* pointer to CHUD toolkit function */ -__private_extern__ void stackshot_lock_init( void ) __attribute__((section("__TEXT, initcode"))); +__private_extern__ void stackshot_lock_init( void ); static uint8_t *type_filter_bitmap; +static uint32_t +kdbg_cpu_count(boolean_t early_trace) +{ + if (early_trace) { + /* + * we've started tracing before the IOKit has even + * started running... just use the static max value + */ + return max_ncpus; + } + + host_basic_info_data_t hinfo; + mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT; + host_info((host_t)1 /* BSD_HOST */, HOST_BASIC_INFO, (host_info_t)&hinfo, &count); + assert(hinfo.logical_cpu_max > 0); + return hinfo.logical_cpu_max; +} + +#if MACH_ASSERT +static boolean_t +kdbg_iop_list_is_valid(kd_iop_t* iop) +{ + if (iop) { + /* Is list sorted by cpu_id? */ + kd_iop_t* temp = iop; + do { + assert(!temp->next || temp->next->cpu_id == temp->cpu_id - 1); + assert(temp->next || (temp->cpu_id == kdbg_cpu_count(FALSE) || temp->cpu_id == kdbg_cpu_count(TRUE))); + } while ((temp = temp->next)); + + /* Does each entry have a function and a name? */ + temp = iop; + do { + assert(temp->callback.func); + assert(strlen(temp->callback.iop_name) < sizeof(temp->callback.iop_name)); + } while ((temp = temp->next)); + } + + return TRUE; +} + +static boolean_t +kdbg_iop_list_contains_cpu_id(kd_iop_t* list, uint32_t cpu_id) +{ + while (list) { + if (list->cpu_id == cpu_id) + return TRUE; + list = list->next; + } + + return FALSE; +} + +/* + * This is a temporary workaround for + * + * DO NOT CALL IN RELEASE BUILD, LEAKS ADDRESS INFORMATION! + */ +static boolean_t +kdbg_iop_list_check_for_timestamp_rollback(kd_iop_t* list, uint32_t cpu_id, uint64_t timestamp) +{ + while (list) { + if (list->cpu_id == cpu_id) { + if (list->last_timestamp > timestamp) { + kprintf("%s is sending trace events that have gone backwards in time. Run the following command: \"symbols -2 -lookup 0x%p\" and file a radar against the matching kext.\n", list->callback.iop_name, (void*)list->callback.func); + } + /* Unconditional set mitigates syslog spam */ + list->last_timestamp = timestamp; + return TRUE; + } + list = list->next; + } + + return FALSE; +} +#endif /* MACH_ASSERT */ + +static void +kdbg_iop_list_callback(kd_iop_t* iop, kd_callback_type type, void* arg) +{ + while (iop) { + iop->callback.func(iop->callback.context, type, arg); + iop = iop->next; + } +} + static void kdbg_set_tracing_enabled(boolean_t enabled, uint32_t trace_type) { @@ -314,6 +440,18 @@ kdbg_set_tracing_enabled(boolean_t enabled, uint32_t trace_type) } lck_spin_unlock(kds_spin_lock); ml_set_interrupts_enabled(s); + + if (enabled) { + kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_KDEBUG_ENABLED, NULL); + } else { + /* + * If you do not flush the IOP trace buffers, they can linger + * for a considerable period; consider code which disables and + * deallocates without a final sync flush. + */ + kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_KDEBUG_DISABLED, NULL); + kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_SYNC_FLUSH, NULL); + } } static void @@ -329,12 +467,11 @@ kdbg_set_flags(int slowflag, int enableflag, boolean_t enabled) kd_ctrl_page.kdebug_slowcheck &= ~slowflag; kdebug_enable &= ~enableflag; } + lck_spin_unlock(kds_spin_lock); ml_set_interrupts_enabled(s); } - -#ifdef NATIVE_TRACE_FACILITY void disable_wrap(uint32_t *old_slowcheck, uint32_t *old_flags) { @@ -369,26 +506,8 @@ enable_wrap(uint32_t old_slowcheck, boolean_t lostevents) ml_set_interrupts_enabled(s); } -void trace_set_timebases(__unused uint64_t tsc, __unused uint64_t ns) -{ -} -#else -/* Begin functions that are defined twice */ -void trace_set_timebases(uint64_t tsc, uint64_t ns) -{ - int cpu = cpu_number(); - kd_ctrl_page.cpu_timebase[cpu].tsc_base = tsc; - kd_ctrl_page.cpu_timebase[cpu].ns_base = ns; -} - -#endif - static int -#if defined(__i386__) || defined(__x86_64__) create_buffers(boolean_t early_trace) -#else -create_buffers(__unused boolean_t early_trace) -#endif { int i; int p_buffer_size; @@ -396,44 +515,32 @@ create_buffers(__unused boolean_t early_trace) int f_buffers; int error = 0; - /* - * get the number of cpus and cache it - */ -#if defined(__i386__) || defined(__x86_64__) - if (early_trace == TRUE) { - /* - * we've started tracing before the - * IOKit has even started running... just - * use the static max value - */ - kd_cpus = max_ncpus; - } else -#endif - { - host_basic_info_data_t hinfo; - mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT; + /* + * For the duration of this allocation, trace code will only reference + * kdebug_iops. Any iops registered after this enabling will not be + * messaged until the buffers are reallocated. + * + * TLDR; Must read kd_iops once and only once! + */ + kd_ctrl_page.kdebug_iops = kd_iops; -#define BSD_HOST 1 - host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count); - kd_cpus = hinfo.logical_cpu_max; - } - if (kmem_alloc(kernel_map, (vm_offset_t *)&kdbip, sizeof(struct kd_bufinfo) * kd_cpus) != KERN_SUCCESS) { - error = ENOSPC; - goto out; - } + assert(kdbg_iop_list_is_valid(kd_ctrl_page.kdebug_iops)); + + /* + * If the list is valid, it is sorted, newest -> oldest. Each iop entry + * has a cpu_id of "the older entry + 1", so the highest cpu_id will + * be the list head + 1. + */ - trace_handler_map_bufinfo((uintptr_t)kdbip, sizeof(struct kd_bufinfo) * kd_cpus); + kd_ctrl_page.kdebug_cpus = kd_ctrl_page.kdebug_iops ? kd_ctrl_page.kdebug_iops->cpu_id + 1 : kdbg_cpu_count(early_trace); -#if !defined(NATIVE_TRACE_FACILITY) - for(i=0;i<(int)kd_cpus;i++) { - get_nanotime_timebases(i, - &kd_ctrl_page.cpu_timebase[i].tsc_base, - &kd_ctrl_page.cpu_timebase[i].ns_base); + if (kmem_alloc(kernel_map, (vm_offset_t *)&kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus) != KERN_SUCCESS) { + error = ENOSPC; + goto out; } -#endif - if (nkdbufs < (kd_cpus * EVENTS_PER_STORAGE_UNIT * MIN_STORAGE_UNITS_PER_CPU)) - n_storage_units = kd_cpus * MIN_STORAGE_UNITS_PER_CPU; + if (nkdbufs < (kd_ctrl_page.kdebug_cpus * EVENTS_PER_STORAGE_UNIT * MIN_STORAGE_UNITS_PER_CPU)) + n_storage_units = kd_ctrl_page.kdebug_cpus * MIN_STORAGE_UNITS_PER_CPU; else n_storage_units = nkdbufs / EVENTS_PER_STORAGE_UNIT; @@ -490,8 +597,6 @@ create_buffers(__unused boolean_t early_trace) n_elements = kd_bufs[i].kdsb_size / sizeof(struct kd_storage); kds = kd_bufs[i].kdsb_addr; - trace_handler_map_buffer(i, (uintptr_t)kd_bufs[i].kdsb_addr, kd_bufs[i].kdsb_size); - for (n = 0; n < n_elements; n++) { kds[n].kds_next.buffer_index = kd_ctrl_page.kds_free_list.buffer_index; kds[n].kds_next.offset = kd_ctrl_page.kds_free_list.offset; @@ -502,15 +607,15 @@ create_buffers(__unused boolean_t early_trace) n_storage_units += n_elements; } - bzero((char *)kdbip, sizeof(struct kd_bufinfo) * kd_cpus); + bzero((char *)kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus); - for (i = 0; i < (int)kd_cpus; i++) { + for (i = 0; i < (int)kd_ctrl_page.kdebug_cpus; i++) { kdbip[i].kd_list_head.raw = KDS_PTR_NULL; kdbip[i].kd_list_tail.raw = KDS_PTR_NULL; kdbip[i].kd_lostevents = FALSE; kdbip[i].num_bufs = 0; } - + kd_ctrl_page.kdebug_flags |= KDBG_BUFINIT; kd_ctrl_page.kds_inuse_count = 0; @@ -522,17 +627,15 @@ out: return(error); } - static void delete_buffers(void) { - int i; + int i; if (kd_bufs) { for (i = 0; i < n_storage_buffers; i++) { if (kd_bufs[i].kdsb_addr) { kmem_free(kernel_map, (vm_offset_t)kd_bufs[i].kdsb_addr, (vm_size_t)kd_bufs[i].kdsb_size); - trace_handler_unmap_buffer(i); } } kmem_free(kernel_map, (vm_offset_t)kd_bufs, (vm_size_t)(n_storage_buffers * sizeof(struct kd_storage_buffers))); @@ -548,17 +651,15 @@ delete_buffers(void) kd_ctrl_page.kds_free_list.raw = KDS_PTR_NULL; if (kdbip) { - trace_handler_unmap_bufinfo(); - - kmem_free(kernel_map, (vm_offset_t)kdbip, sizeof(struct kd_bufinfo) * kd_cpus); + kmem_free(kernel_map, (vm_offset_t)kdbip, sizeof(struct kd_bufinfo) * kd_ctrl_page.kdebug_cpus); kdbip = NULL; } + kd_ctrl_page.kdebug_iops = NULL; + kd_ctrl_page.kdebug_cpus = 0; kd_ctrl_page.kdebug_flags &= ~KDBG_BUFINIT; } - -#ifdef NATIVE_TRACE_FACILITY void release_storage_unit(int cpu, uint32_t kdsp_raw) { @@ -587,7 +688,7 @@ release_storage_unit(int cpu, uint32_t kdsp_raw) */ kdsp_actual = POINTER_FROM_KDS_PTR(kdsp); kdbp->kd_list_head = kdsp_actual->kds_next; - + kdsp_actual->kds_next = kd_ctrl_page.kds_free_list; kd_ctrl_page.kds_free_list = kdsp; @@ -636,7 +737,7 @@ allocate_storage_unit(int cpu) kdbp_vict = NULL; oldest_ts = (uint64_t)-1; - for (kdbp_try = &kdbip[0]; kdbp_try < &kdbip[kd_cpus]; kdbp_try++) { + for (kdbp_try = &kdbip[0]; kdbp_try < &kdbip[kd_ctrl_page.kdebug_cpus]; kdbp_try++) { if (kdbp_try->kd_list_head.raw == KDS_PTR_NULL) { /* @@ -706,7 +807,191 @@ out: return (retval); } -#endif + +int +kernel_debug_register_callback(kd_callback_t callback) +{ + kd_iop_t* iop; + if (kmem_alloc(kernel_map, (vm_offset_t *)&iop, sizeof(kd_iop_t)) == KERN_SUCCESS) { + memcpy(&iop->callback, &callback, sizeof(kd_callback_t)); + + /* + * Some IOP clients are not providing a name. + * + * Remove when fixed. + */ + { + boolean_t is_valid_name = FALSE; + for (uint32_t length=0; length 0x20 && callback.iop_name[length] < 0x7F) + continue; + if (callback.iop_name[length] == 0) { + if (length) + is_valid_name = TRUE; + break; + } + } + + if (!is_valid_name) { + strlcpy(iop->callback.iop_name, "IOP-???", sizeof(iop->callback.iop_name)); + } + } + + iop->last_timestamp = 0; + + do { + /* + * We use two pieces of state, the old list head + * pointer, and the value of old_list_head->cpu_id. + * If we read kd_iops more than once, it can change + * between reads. + * + * TLDR; Must not read kd_iops more than once per loop. + */ + iop->next = kd_iops; + iop->cpu_id = iop->next ? (iop->next->cpu_id+1) : kdbg_cpu_count(FALSE); + + /* + * Header says OSCompareAndSwapPtr has a memory barrier + */ + } while (!OSCompareAndSwapPtr(iop->next, iop, (void* volatile*)&kd_iops)); + + return iop->cpu_id; + } + + return 0; +} + +void +kernel_debug_enter( + uint32_t coreid, + uint32_t debugid, + uint64_t timestamp, + uintptr_t arg1, + uintptr_t arg2, + uintptr_t arg3, + uintptr_t arg4, + uintptr_t threadid + ) +{ + uint32_t bindx; + kd_buf *kd; + struct kd_bufinfo *kdbp; + struct kd_storage *kdsp_actual; + union kds_ptr kds_raw; + + if (kd_ctrl_page.kdebug_slowcheck) { + + if ( (kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) || !(kdebug_enable & (KDEBUG_ENABLE_TRACE|KDEBUG_ENABLE_PPT))) + goto out1; + + if (kd_ctrl_page.kdebug_flags & KDBG_TYPEFILTER_CHECK) { + if (isset(type_filter_bitmap, EXTRACT_CSC(debugid))) + goto record_event; + goto out1; + } + else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) { + if (debugid >= kdlog_beg && debugid <= kdlog_end) + goto record_event; + goto out1; + } + else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) { + if ((debugid & DBG_FUNC_MASK) != kdlog_value1 && + (debugid & DBG_FUNC_MASK) != kdlog_value2 && + (debugid & DBG_FUNC_MASK) != kdlog_value3 && + (debugid & DBG_FUNC_MASK) != kdlog_value4) + goto out1; + } + } + +record_event: + assert(kdbg_iop_list_contains_cpu_id(kd_ctrl_page.kdebug_iops, coreid)); + /* Remove when is closed. */ + assert(kdbg_iop_list_check_for_timestamp_rollback(kd_ctrl_page.kdebug_iops, coreid, timestamp)); + + disable_preemption(); + + if (kd_ctrl_page.enabled == 0) + goto out; + + kdbp = &kdbip[coreid]; + timestamp &= KDBG_TIMESTAMP_MASK; + +retry_q: + kds_raw = kdbp->kd_list_tail; + + if (kds_raw.raw != KDS_PTR_NULL) { + kdsp_actual = POINTER_FROM_KDS_PTR(kds_raw); + bindx = kdsp_actual->kds_bufindx; + } else + kdsp_actual = NULL; + + if (kdsp_actual == NULL || bindx >= EVENTS_PER_STORAGE_UNIT) { + if (allocate_storage_unit(coreid) == FALSE) { + /* + * this can only happen if wrapping + * has been disabled + */ + goto out; + } + goto retry_q; + } + if ( !OSCompareAndSwap(bindx, bindx + 1, &kdsp_actual->kds_bufindx)) + goto retry_q; + + // IOP entries can be allocated before xnu allocates and inits the buffer + if (timestamp < kdsp_actual->kds_timestamp) + kdsp_actual->kds_timestamp = timestamp; + + kd = &kdsp_actual->kds_records[bindx]; + + kd->debugid = debugid; + kd->arg1 = arg1; + kd->arg2 = arg2; + kd->arg3 = arg3; + kd->arg4 = arg4; + kd->arg5 = threadid; + + kdbg_set_timestamp_and_cpu(kd, timestamp, coreid); + + OSAddAtomic(1, &kdsp_actual->kds_bufcnt); +out: + enable_preemption(); +out1: + if ((kds_waiter && kd_ctrl_page.kds_inuse_count >= n_storage_threshold)) { + boolean_t need_kds_wakeup = FALSE; + int s; + + /* + * try to take the lock here to synchronize with the + * waiter entering the blocked state... use the try + * mode to prevent deadlocks caused by re-entering this + * routine due to various trace points triggered in the + * lck_spin_sleep_xxxx routines used to actually enter + * our wait condition... no problem if we fail, + * there will be lots of additional events coming in that + * will eventually succeed in grabbing this lock + */ + s = ml_set_interrupts_enabled(FALSE); + + if (lck_spin_try_lock(kdw_spin_lock)) { + + if (kds_waiter && kd_ctrl_page.kds_inuse_count >= n_storage_threshold) { + kds_waiter = 0; + need_kds_wakeup = TRUE; + } + lck_spin_unlock(kdw_spin_lock); + + ml_set_interrupts_enabled(s); + + if (need_kds_wakeup == TRUE) + wakeup(&kds_waiter); + } + } +} + + void kernel_debug_internal( @@ -817,23 +1102,31 @@ kernel_debug_internal( goto out1; } else if (kd_ctrl_page.kdebug_flags & KDBG_RANGECHECK) { - if ((debugid >= kdlog_beg && debugid <= kdlog_end) || (debugid >> 24) == DBG_TRACE) - goto record_event; - if (kdlog_sched_events && (debugid & 0xffff0000) == (MACHDBG_CODE(DBG_MACH_SCHED, 0) | DBG_FUNC_NONE)) + /* Always record trace system info */ + if (EXTRACT_CLASS(debugid) == DBG_TRACE) goto record_event; - goto out1; + + if (debugid < kdlog_beg || debugid > kdlog_end) + goto out1; } else if (kd_ctrl_page.kdebug_flags & KDBG_VALCHECK) { + /* Always record trace system info */ + if (EXTRACT_CLASS(debugid) == DBG_TRACE) + goto record_event; + if ((debugid & DBG_FUNC_MASK) != kdlog_value1 && (debugid & DBG_FUNC_MASK) != kdlog_value2 && (debugid & DBG_FUNC_MASK) != kdlog_value3 && - (debugid & DBG_FUNC_MASK) != kdlog_value4 && - (debugid >> 24 != DBG_TRACE)) + (debugid & DBG_FUNC_MASK) != kdlog_value4) goto out1; } } record_event: disable_preemption(); + + if (kd_ctrl_page.enabled == 0) + goto out; + cpu = cpu_number(); kdbp = &kdbip[cpu]; retry_q: @@ -967,8 +1260,6 @@ kdbg_lock_init(void) { if (kd_ctrl_page.kdebug_flags & KDBG_LOCKINIT) return; - - trace_handler_map_ctrl_page((uintptr_t)&kd_ctrl_page, sizeof(kd_ctrl_page), sizeof(struct kd_storage), sizeof(union kds_ptr)); /* * allocate lock group attribute and group @@ -1107,19 +1398,105 @@ kdbg_resolve_map(thread_t th_act, void *opaque) } } +/* + * + * Writes a cpumap for the given iops_list/cpu_count to the provided buffer. + * + * You may provide a buffer and size, or if you set the buffer to NULL, a + * buffer of sufficient size will be allocated. + * + * If you provide a buffer and it is too small, sets cpumap_size to the number + * of bytes required and returns EINVAL. + * + * On success, if you provided a buffer, cpumap_size is set to the number of + * bytes written. If you did not provide a buffer, cpumap is set to the newly + * allocated buffer and cpumap_size is set to the number of bytes allocated. + * + * NOTE: It may seem redundant to pass both iops and a cpu_count. + * + * We may be reporting data from "now", or from the "past". + * + * The "now" data would be for something like kdbg_readcurcpumap(). + * The "past" data would be for kdbg_readcpumap(). + * + * If we do not pass both iops and cpu_count, and iops is NULL, this function + * will need to read "now" state to get the number of cpus, which would be in + * error if we were reporting "past" state. + */ + +int +kdbg_cpumap_init_internal(kd_iop_t* iops, uint32_t cpu_count, uint8_t** cpumap, uint32_t* cpumap_size) +{ + assert(cpumap); + assert(cpumap_size); + assert(cpu_count); + assert(!iops || iops->cpu_id + 1 == cpu_count); + + uint32_t bytes_needed = sizeof(kd_cpumap_header) + cpu_count * sizeof(kd_cpumap); + uint32_t bytes_available = *cpumap_size; + *cpumap_size = bytes_needed; + + if (*cpumap == NULL) { + if (kmem_alloc(kernel_map, (vm_offset_t*)cpumap, (vm_size_t)*cpumap_size) != KERN_SUCCESS) { + return ENOMEM; + } + } else if (bytes_available < bytes_needed) { + return EINVAL; + } + + kd_cpumap_header* header = (kd_cpumap_header*)(uintptr_t)*cpumap; + + header->version_no = RAW_VERSION1; + header->cpu_count = cpu_count; + + kd_cpumap* cpus = (kd_cpumap*)&header[1]; + + int32_t index = cpu_count - 1; + while (iops) { + cpus[index].cpu_id = iops->cpu_id; + cpus[index].flags = KDBG_CPUMAP_IS_IOP; + bzero(cpus[index].name, sizeof(cpus->name)); + strlcpy(cpus[index].name, iops->callback.iop_name, sizeof(cpus->name)); + + iops = iops->next; + index--; + } + + while (index >= 0) { + cpus[index].cpu_id = index; + cpus[index].flags = 0; + bzero(cpus[index].name, sizeof(cpus->name)); + strlcpy(cpus[index].name, "AP", sizeof(cpus->name)); + + index--; + } + + return KERN_SUCCESS; +} + void -kdbg_mapinit(void) +kdbg_thrmap_init(void) { + if (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) + return; + + kd_mapptr = kdbg_thrmap_init_internal(0, &kd_mapsize, &kd_mapcount); + + if (kd_mapptr) + kd_ctrl_page.kdebug_flags |= KDBG_MAPINIT; +} + + +kd_threadmap* kdbg_thrmap_init_internal(unsigned int count, unsigned int *mapsize, unsigned int *mapcount) +{ + kd_threadmap *mapptr; struct proc *p; struct krt akrt; int tts_count; /* number of task-to-string structures */ struct tts *tts_mapptr; unsigned int tts_mapsize = 0; - vm_offset_t tts_maptomem=0; int i; - - if (kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) - return; + vm_offset_t kaddr; /* * need to use PROC_SCANPROCLIST with proc_iterate @@ -1129,8 +1506,8 @@ kdbg_mapinit(void) /* * Calculate the sizes of map buffers */ - for (p = allproc.lh_first, kd_mapcount=0, tts_count=0; p; p = p->p_list.le_next) { - kd_mapcount += get_task_numacts((task_t)p->task); + for (p = allproc.lh_first, *mapcount=0, tts_count=0; p; p = p->p_list.le_next) { + *mapcount += get_task_numacts((task_t)p->task); tts_count++; } proc_list_unlock(); @@ -1139,72 +1516,76 @@ kdbg_mapinit(void) * The proc count could change during buffer allocation, * so introduce a small fudge factor to bump up the * buffer sizes. This gives new tasks some chance of - * making into the tables. Bump up by 10%. + * making into the tables. Bump up by 25%. */ - kd_mapcount += kd_mapcount/10; - tts_count += tts_count/10; + *mapcount += *mapcount/4; + tts_count += tts_count/4; + + *mapsize = *mapcount * sizeof(kd_threadmap); - kd_mapsize = kd_mapcount * sizeof(kd_threadmap); + if (count && count < *mapcount) + return (0); - if ((kmem_alloc(kernel_map, & kd_maptomem, (vm_size_t)kd_mapsize) == KERN_SUCCESS)) { - kd_mapptr = (kd_threadmap *) kd_maptomem; - bzero(kd_mapptr, kd_mapsize); + if ((kmem_alloc(kernel_map, &kaddr, (vm_size_t)*mapsize) == KERN_SUCCESS)) { + bzero((void *)kaddr, *mapsize); + mapptr = (kd_threadmap *)kaddr; } else - kd_mapptr = (kd_threadmap *) 0; + return (0); tts_mapsize = tts_count * sizeof(struct tts); - if ((kmem_alloc(kernel_map, & tts_maptomem, (vm_size_t)tts_mapsize) == KERN_SUCCESS)) { - tts_mapptr = (struct tts *) tts_maptomem; - bzero(tts_mapptr, tts_mapsize); - } else - tts_mapptr = (struct tts *) 0; + if ((kmem_alloc(kernel_map, &kaddr, (vm_size_t)tts_mapsize) == KERN_SUCCESS)) { + bzero((void *)kaddr, tts_mapsize); + tts_mapptr = (struct tts *)kaddr; + } else { + kmem_free(kernel_map, (vm_offset_t)mapptr, *mapsize); + return (0); + } /* * We need to save the procs command string * and take a reference for each task associated * with a valid process */ - if (tts_mapptr) { - /* - * should use proc_iterate - */ - proc_list_lock(); - - for (p = allproc.lh_first, i=0; p && i < tts_count; p = p->p_list.le_next) { - if (p->p_lflag & P_LEXIT) - continue; - - if (p->task) { - task_reference(p->task); - tts_mapptr[i].task = p->task; - tts_mapptr[i].pid = p->p_pid; - (void)strlcpy(tts_mapptr[i].task_comm, p->p_comm, sizeof(tts_mapptr[i].task_comm)); - i++; - } - } - tts_count = i; - proc_list_unlock(); + proc_list_lock(); + + /* + * should use proc_iterate + */ + for (p = allproc.lh_first, i=0; p && i < tts_count; p = p->p_list.le_next) { + if (p->p_lflag & P_LEXIT) + continue; + + if (p->task) { + task_reference(p->task); + tts_mapptr[i].task = p->task; + tts_mapptr[i].pid = p->p_pid; + (void)strlcpy(tts_mapptr[i].task_comm, p->p_comm, sizeof(tts_mapptr[i].task_comm)); + i++; + } } + tts_count = i; - if (kd_mapptr && tts_mapptr) { - kd_ctrl_page.kdebug_flags |= KDBG_MAPINIT; + proc_list_unlock(); - /* - * Initialize thread map data - */ - akrt.map = kd_mapptr; - akrt.count = 0; - akrt.maxcount = kd_mapcount; + /* + * Initialize thread map data + */ + akrt.map = mapptr; + akrt.count = 0; + akrt.maxcount = *mapcount; - for (i = 0; i < tts_count; i++) { - akrt.atts = &tts_mapptr[i]; - task_act_iterate_wth_args(tts_mapptr[i].task, kdbg_resolve_map, &akrt); - task_deallocate((task_t) tts_mapptr[i].task); - } - kmem_free(kernel_map, (vm_offset_t)tts_mapptr, tts_mapsize); + for (i = 0; i < tts_count; i++) { + akrt.atts = &tts_mapptr[i]; + task_act_iterate_wth_args(tts_mapptr[i].task, kdbg_resolve_map, &akrt); + task_deallocate((task_t) tts_mapptr[i].task); } + kmem_free(kernel_map, (vm_offset_t)tts_mapptr, tts_mapsize); + + *mapcount = akrt.count; + + return (mapptr); } static void @@ -1224,7 +1605,6 @@ kdbg_clear(void) */ IOSleep(100); - kdlog_sched_events = 0; global_state_pid = -1; kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES; kd_ctrl_page.kdebug_flags &= ~(KDBG_NOWRAP | KDBG_RANGECHECK | KDBG_VALCHECK); @@ -1401,18 +1781,11 @@ kdbg_setreg(kd_regtype * kdr) { int ret=0; unsigned int val_1, val_2, val; - - kdlog_sched_events = 0; - switch (kdr->type) { case KDBG_CLASSTYPE : val_1 = (kdr->value1 & 0xff); val_2 = (kdr->value2 & 0xff); - - if (val_1 == DBG_FSYSTEM && val_2 == (DBG_FSYSTEM + 1)) - kdlog_sched_events = 1; - kdlog_beg = (val_1<<24); kdlog_end = (val_2<<24); kd_ctrl_page.kdebug_flags &= (unsigned int)~KDBG_CKTYPES; @@ -1513,21 +1886,73 @@ kdbg_getreg(__unused kd_regtype * kdr) return(EINVAL); } +int +kdbg_readcpumap(user_addr_t user_cpumap, size_t *user_cpumap_size) +{ + uint8_t* cpumap = NULL; + uint32_t cpumap_size = 0; + int ret = KERN_SUCCESS; + + if (kd_ctrl_page.kdebug_flags & KDBG_BUFINIT) { + if (kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, &cpumap, &cpumap_size) == KERN_SUCCESS) { + if (user_cpumap) { + size_t bytes_to_copy = (*user_cpumap_size >= cpumap_size) ? cpumap_size : *user_cpumap_size; + if (copyout(cpumap, user_cpumap, (size_t)bytes_to_copy)) { + ret = EFAULT; + } + } + *user_cpumap_size = cpumap_size; + kmem_free(kernel_map, (vm_offset_t)cpumap, cpumap_size); + } else + ret = EINVAL; + } else + ret = EINVAL; + + return (ret); +} + +int +kdbg_readcurthrmap(user_addr_t buffer, size_t *bufsize) +{ + kd_threadmap *mapptr; + unsigned int mapsize; + unsigned int mapcount; + unsigned int count = 0; + int ret = 0; + + count = *bufsize/sizeof(kd_threadmap); + *bufsize = 0; + + if ( (mapptr = kdbg_thrmap_init_internal(count, &mapsize, &mapcount)) ) { + if (copyout(mapptr, buffer, mapcount * sizeof(kd_threadmap))) + ret = EFAULT; + else + *bufsize = (mapcount * sizeof(kd_threadmap)); + + kmem_free(kernel_map, (vm_offset_t)mapptr, mapsize); + } else + ret = EINVAL; + + return (ret); +} int -kdbg_readmap(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx) +kdbg_readthrmap(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx) { int avail = *number; int ret = 0; uint32_t count = 0; + unsigned int mapsize; count = avail/sizeof (kd_threadmap); + mapsize = kd_mapcount * sizeof(kd_threadmap); + if (count && (count <= kd_mapcount)) { if ((kd_ctrl_page.kdebug_flags & KDBG_MAPINIT) && kd_mapsize && kd_mapptr) { - if (*number < kd_mapsize) + if (*number < mapsize) ret = EINVAL; else { @@ -1537,46 +1962,89 @@ kdbg_readmap(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx) clock_sec_t secs; clock_usec_t usecs; char *pad_buf; - int pad_size; + uint32_t pad_size; + uint32_t extra_thread_count = 0; + uint32_t cpumap_size; + + /* + * To write a RAW_VERSION1+ file, we + * must embed a cpumap in the "padding" + * used to page align the events folloing + * the threadmap. If the threadmap happens + * to not require enough padding, we + * artificially increase its footprint + * until it needs enough padding. + */ + + pad_size = PAGE_SIZE - ((sizeof(RAW_header) + (count * sizeof(kd_threadmap))) & PAGE_MASK_64); + cpumap_size = sizeof(kd_cpumap_header) + kd_ctrl_page.kdebug_cpus * sizeof(kd_cpumap); + + if (cpumap_size > pad_size) { + /* Force an overflow onto the next page, we get a full page of padding */ + extra_thread_count = (pad_size / sizeof(kd_threadmap)) + 1; + } header.version_no = RAW_VERSION1; - header.thread_count = count; + header.thread_count = count + extra_thread_count; clock_get_calendar_microtime(&secs, &usecs); header.TOD_secs = secs; header.TOD_usecs = usecs; - + ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)&header, sizeof(RAW_header), RAW_file_offset, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); if (ret) goto write_error; RAW_file_offset += sizeof(RAW_header); - ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)kd_mapptr, kd_mapsize, RAW_file_offset, + ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)kd_mapptr, mapsize, RAW_file_offset, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); if (ret) goto write_error; - RAW_file_offset += kd_mapsize; + RAW_file_offset += mapsize; - pad_size = PAGE_SIZE - (RAW_file_offset & PAGE_MASK_64); + if (extra_thread_count) { + pad_size = extra_thread_count * sizeof(kd_threadmap); + pad_buf = (char *)kalloc(pad_size); + memset(pad_buf, 0, pad_size); + + ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, pad_size, RAW_file_offset, + UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); + kfree(pad_buf, pad_size); + + if (ret) + goto write_error; + RAW_file_offset += pad_size; - if (pad_size) - { + } + + pad_size = PAGE_SIZE - (RAW_file_offset & PAGE_MASK_64); + if (pad_size) { pad_buf = (char *)kalloc(pad_size); memset(pad_buf, 0, pad_size); + /* + * embed a cpumap in the padding bytes. + * older code will skip this. + * newer code will know how to read it. + */ + uint32_t temp = pad_size; + if (kdbg_cpumap_init_internal(kd_ctrl_page.kdebug_iops, kd_ctrl_page.kdebug_cpus, (uint8_t**)&pad_buf, &temp) != KERN_SUCCESS) { + memset(pad_buf, 0, pad_size); + } + ret = vn_rdwr(UIO_WRITE, vp, (caddr_t)pad_buf, pad_size, RAW_file_offset, - UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); + UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); kfree(pad_buf, pad_size); - + if (ret) goto write_error; RAW_file_offset += pad_size; } - RAW_file_written += sizeof(RAW_header) + kd_mapsize + pad_size; + RAW_file_written += sizeof(RAW_header) + mapsize + pad_size; } else { - if (copyout(kd_mapptr, buffer, kd_mapsize)) + if (copyout(kd_mapptr, buffer, mapsize)) ret = EINVAL; } } @@ -1734,15 +2202,20 @@ kdbg_set_nkdbufs(unsigned int value) } -static void +static int kdbg_enable_bg_trace(void) { + int ret = 0; + if (kdlog_bg_trace == TRUE && kdlog_bg_trace_running == FALSE && n_storage_buffers == 0) { nkdbufs = bg_nkdbufs; - kdbg_reinit(FALSE); - kdbg_set_tracing_enabled(TRUE, KDEBUG_ENABLE_TRACE); - kdlog_bg_trace_running = TRUE; + ret = kdbg_reinit(FALSE); + if (0 == ret) { + kdbg_set_tracing_enabled(TRUE, KDEBUG_ENABLE_TRACE); + kdlog_bg_trace_running = TRUE; + } } + return ret; } static void @@ -1795,12 +2268,12 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) proc_t p, curproc; if (name[0] == KERN_KDGETENTROPY || - name[0] == KERN_KDWRITETR || - name[0] == KERN_KDWRITEMAP || + name[0] == KERN_KDWRITETR || + name[0] == KERN_KDWRITEMAP || name[0] == KERN_KDEFLAGS || name[0] == KERN_KDDFLAGS || name[0] == KERN_KDENABLE || - name[0] == KERN_KDENABLE_BG_TRACE || + name[0] == KERN_KDENABLE_BG_TRACE || name[0] == KERN_KDSETBUF) { if ( namelen < 2 ) @@ -1811,76 +2284,75 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) kdbg_lock_init(); if ( !(kd_ctrl_page.kdebug_flags & KDBG_LOCKINIT)) - return(ENOSPC); + return(ENOSPC); lck_mtx_lock(kd_trace_mtx_sysctl); switch(name[0]) { - - case KERN_KDGETBUF: - /* - * Does not alter the global_state_pid - * This is a passive request. - */ - if (size < sizeof(kd_bufinfo.nkdbufs)) { - /* - * There is not enough room to return even - * the first element of the info structure. - */ - ret = EINVAL; - goto out; - } - kd_bufinfo.nkdbufs = nkdbufs; - kd_bufinfo.nkdthreads = kd_mapsize / sizeof(kd_threadmap); - - if ( (kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) ) - kd_bufinfo.nolog = 1; - else - kd_bufinfo.nolog = 0; - - kd_bufinfo.flags = kd_ctrl_page.kdebug_flags; -#if defined(__LP64__) - kd_bufinfo.flags |= KDBG_LP64; -#endif - kd_bufinfo.bufid = global_state_pid; - - if (size >= sizeof(kd_bufinfo)) { + case KERN_KDGETBUF: /* - * Provide all the info we have + * Does not alter the global_state_pid + * This is a passive request. */ - if (copyout(&kd_bufinfo, where, sizeof(kd_bufinfo))) - ret = EINVAL; - } else { - /* - * For backwards compatibility, only provide - * as much info as there is room for. - */ - if (copyout(&kd_bufinfo, where, size)) + if (size < sizeof(kd_bufinfo.nkdbufs)) { + /* + * There is not enough room to return even + * the first element of the info structure. + */ ret = EINVAL; - } - goto out; - break; - - case KERN_KDGETENTROPY: - if (kd_entropy_buffer) - ret = EBUSY; - else - ret = kdbg_getentropy(where, sizep, value); - goto out; - break; - - case KERN_KDENABLE_BG_TRACE: - bg_nkdbufs = kdbg_set_nkdbufs(value); - kdlog_bg_trace = TRUE; - kdbg_enable_bg_trace(); - goto out; - break; - - case KERN_KDDISABLE_BG_TRACE: - kdlog_bg_trace = FALSE; - kdbg_disable_bg_trace(); - goto out; - break; + goto out; + } + kd_bufinfo.nkdbufs = nkdbufs; + kd_bufinfo.nkdthreads = kd_mapcount; + + if ( (kd_ctrl_page.kdebug_slowcheck & SLOW_NOLOG) ) + kd_bufinfo.nolog = 1; + else + kd_bufinfo.nolog = 0; + + kd_bufinfo.flags = kd_ctrl_page.kdebug_flags; +#if defined(__LP64__) + kd_bufinfo.flags |= KDBG_LP64; +#endif + kd_bufinfo.bufid = global_state_pid; + + if (size >= sizeof(kd_bufinfo)) { + /* + * Provide all the info we have + */ + if (copyout(&kd_bufinfo, where, sizeof(kd_bufinfo))) + ret = EINVAL; + } else { + /* + * For backwards compatibility, only provide + * as much info as there is room for. + */ + if (copyout(&kd_bufinfo, where, size)) + ret = EINVAL; + } + goto out; + break; + + case KERN_KDGETENTROPY: + if (kd_entropy_buffer) + ret = EBUSY; + else + ret = kdbg_getentropy(where, sizep, value); + goto out; + break; + + case KERN_KDENABLE_BG_TRACE: + bg_nkdbufs = kdbg_set_nkdbufs(value); + kdlog_bg_trace = TRUE; + ret = kdbg_enable_bg_trace(); + goto out; + break; + + case KERN_KDDISABLE_BG_TRACE: + kdlog_bg_trace = FALSE; + kdbg_disable_bg_trace(); + goto out; + break; } if ((curproc = current_proc()) != NULL) @@ -1889,7 +2361,7 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) ret = ESRCH; goto out; } - if (global_state_pid == -1) + if (global_state_pid == -1) global_state_pid = curpid; else if (global_state_pid != curpid) { if ((p = proc_find(global_state_pid)) == NULL) { @@ -1937,7 +2409,7 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) ret = EINVAL; break; } - kdbg_mapinit(); + kdbg_thrmap_init(); kdbg_set_tracing_enabled(TRUE, value); } @@ -1958,7 +2430,7 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) break; case KERN_KDREMOVE: kdbg_clear(); - kdbg_enable_bg_trace(); + ret = kdbg_enable_bg_trace(); break; case KERN_KDSETREG: if(size < sizeof(kd_regtype)) { @@ -1979,7 +2451,7 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) break; } ret = kdbg_getreg(&kd_Reg); - if (copyout(&kd_Reg, where, sizeof(kd_regtype))) { + if (copyout(&kd_Reg, where, sizeof(kd_regtype))) { ret = EINVAL; } kdbg_disable_bg_trace(); @@ -1988,8 +2460,8 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) case KERN_KDREADTR: ret = kdbg_read(where, sizep, NULL, NULL); break; - case KERN_KDWRITETR: - case KERN_KDWRITEMAP: + case KERN_KDWRITETR: + case KERN_KDWRITEMAP: { struct vfs_context context; struct fileproc *fp; @@ -2040,7 +2512,7 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) context.vc_thread = current_thread(); context.vc_ucred = fp->f_fglob->fg_cred; - if (fp->f_fglob->fg_type != DTYPE_VNODE) { + if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) { fp_drop(p, fd, fp, 1); proc_fdunlock(p); @@ -2061,8 +2533,8 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) *sizep = number; } else { - number = kd_mapsize; - kdbg_readmap(0, &number, vp, &context); + number = kd_mapcount * sizeof(kd_threadmap); + kdbg_readthrmap(0, &number, vp, &context); } vnode_put(vp); } @@ -2070,6 +2542,61 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) break; } + case KERN_KDBUFWAIT: + { + /* WRITETR lite -- just block until there's data */ + int s; + int wait_result = THREAD_AWAKENED; + u_int64_t abstime; + u_int64_t ns; + size_t number = 0; + + kdbg_disable_bg_trace(); + + + if (*sizep) { + ns = ((u_int64_t)*sizep) * (u_int64_t)(1000 * 1000); + nanoseconds_to_absolutetime(ns, &abstime ); + clock_absolutetime_interval_to_deadline( abstime, &abstime ); + } else + abstime = 0; + + s = ml_set_interrupts_enabled(FALSE); + if( !s ) + panic("trying to wait with interrupts off"); + lck_spin_lock(kdw_spin_lock); + + /* drop the mutex so don't exclude others from + * accessing trace + */ + lck_mtx_unlock(kd_trace_mtx_sysctl); + + while (wait_result == THREAD_AWAKENED && + kd_ctrl_page.kds_inuse_count < n_storage_threshold) { + + kds_waiter = 1; + + if (abstime) + wait_result = lck_spin_sleep_deadline(kdw_spin_lock, 0, &kds_waiter, THREAD_ABORTSAFE, abstime); + else + wait_result = lck_spin_sleep(kdw_spin_lock, 0, &kds_waiter, THREAD_ABORTSAFE); + + kds_waiter = 0; + } + + /* check the count under the spinlock */ + number = (kd_ctrl_page.kds_inuse_count >= n_storage_threshold); + + lck_spin_unlock(kdw_spin_lock); + ml_set_interrupts_enabled(s); + + /* pick the mutex back up again */ + lck_mtx_lock(kd_trace_mtx_sysctl); + + /* write out whether we've exceeded the threshold */ + *sizep = number; + break; + } case KERN_KDPIDTR: if (size < sizeof(kd_regtype)) { ret = EINVAL; @@ -2096,10 +2623,16 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) ret = kdbg_setpidex(&kd_Reg); break; - case KERN_KDTHRMAP: - ret = kdbg_readmap(where, sizep, NULL, NULL); - break; - case KERN_KDSETRTCDEC: + case KERN_KDCPUMAP: + ret = kdbg_readcpumap(where, sizep); + break; + case KERN_KDTHRMAP: + ret = kdbg_readthrmap(where, sizep, NULL, NULL); + break; + case KERN_KDREADCURTHRMAP: + ret = kdbg_readcurthrmap(where, sizep); + break; + case KERN_KDSETRTCDEC: if (size < sizeof(kd_regtype)) { ret = EINVAL; break; @@ -2129,6 +2662,7 @@ kdbg_control(int *name, u_int namelen, user_addr_t where, size_t *sizep) ret = EINVAL; break; } + kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_TYPEFILTER_CHANGED, type_filter_bitmap); break; default: ret = EINVAL; @@ -2151,7 +2685,7 @@ kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx) { unsigned int count; unsigned int cpu, min_cpu; - uint64_t mintime, t; + uint64_t mintime, t, barrier = 0; int error = 0; kd_buf *tempbuf; uint32_t rcursor; @@ -2176,6 +2710,21 @@ kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx) memset(&lostevent, 0, sizeof(lostevent)); lostevent.debugid = TRACEDBG_CODE(DBG_TRACE_INFO, 2); + /* Capture timestamp. Only sort events that have occured before the timestamp. + * Since the iop is being flushed here, its possible that events occur on the AP + * while running live tracing. If we are disabled, no new events should + * occur on the AP. + */ + + if (kd_ctrl_page.enabled) + { + // timestamp is non-zero value + barrier = mach_absolute_time() & KDBG_TIMESTAMP_MASK; + } + + // Request each IOP to provide us with up to date entries before merging buffers together. + kdbg_iop_list_callback(kd_ctrl_page.kdebug_iops, KD_CALLBACK_SYNC_FLUSH, NULL); + /* * because we hold kd_trace_mtx_sysctl, no other control threads can * be playing with kdebug_flags... the code that cuts new events could @@ -2193,25 +2742,35 @@ kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx) tempbuf_count = KDCOPYBUF_COUNT; while (count) { - tempbuf = kdcopybuf; + tempbuf = kdcopybuf; tempbuf_number = 0; // While space - while (tempbuf_count) { + while (tempbuf_count) { mintime = 0xffffffffffffffffULL; min_kdbp = NULL; min_cpu = 0; // Check all CPUs - for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_cpus; cpu++, kdbp++) { + for (cpu = 0, kdbp = &kdbip[0]; cpu < kd_ctrl_page.kdebug_cpus; cpu++, kdbp++) { // Find one with raw data if ((kdsp = kdbp->kd_list_head).raw == KDS_PTR_NULL) continue; + /* Debugging aid: maintain a copy of the "kdsp" + * index. + */ + volatile union kds_ptr kdsp_shadow; + + kdsp_shadow = kdsp; // Get from cpu data to buffer header to buffer kdsp_actual = POINTER_FROM_KDS_PTR(kdsp); + volatile struct kd_storage *kdsp_actual_shadow; + + kdsp_actual_shadow = kdsp_actual; + // See if there are actual data left in this buffer rcursor = kdsp_actual->kds_readlast; @@ -2220,6 +2779,14 @@ kdbg_read(user_addr_t buffer, size_t *number, vnode_t vp, vfs_context_t ctx) t = kdbg_get_timestamp(&kdsp_actual->kds_records[rcursor]); + if ((t > barrier) && (barrier > 0)) { + /* + * Need to wait to flush iop again before we + * sort any more data from the buffers + */ + out_of_events = TRUE; + break; + } if (t < kdsp_actual->kds_timestamp) { /* * indicates we've not yet completed filling @@ -2340,9 +2907,12 @@ unsigned char *getProcName(struct proc *proc) { #define STACKSHOT_SUBSYS_UNLOCK() lck_mtx_unlock(&stackshot_subsys_mutex) #if defined(__i386__) || defined (__x86_64__) #define TRAP_DEBUGGER __asm__ volatile("int3"); +#else +#error No TRAP_DEBUGGER definition for this architecture #endif #define SANE_TRACEBUF_SIZE (8 * 1024 * 1024) +#define SANE_BOOTPROFILE_TRACEBUF_SIZE (64 * 1024 * 1024) /* Initialize the mutex governing access to the stack snapshot subsystem */ __private_extern__ void @@ -2390,18 +2960,112 @@ stack_snapshot(struct proc *p, register struct stack_snapshot_args *uap, int32_t uap->flags, uap->dispatch_offset, retval); } +int +stack_snapshot_from_kernel(pid_t pid, void *buf, uint32_t size, uint32_t flags, unsigned *bytesTraced) +{ + int error = 0; + boolean_t istate; + + if ((buf == NULL) || (size <= 0) || (bytesTraced == NULL)) { + return -1; + } + + /* cap in individual stackshot to SANE_TRACEBUF_SIZE */ + if (size > SANE_TRACEBUF_SIZE) { + size = SANE_TRACEBUF_SIZE; + } + +/* Serialize tracing */ + STACKSHOT_SUBSYS_LOCK(); + istate = ml_set_interrupts_enabled(FALSE); + + +/* Preload trace parameters*/ + kdp_snapshot_preflight(pid, buf, size, flags, 0); + +/* Trap to the debugger to obtain a coherent stack snapshot; this populates + * the trace buffer + */ + TRAP_DEBUGGER; + + ml_set_interrupts_enabled(istate); + + *bytesTraced = kdp_stack_snapshot_bytes_traced(); + + error = kdp_stack_snapshot_geterror(); + + STACKSHOT_SUBSYS_UNLOCK(); + + return error; + +} + int stack_snapshot2(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t flags, uint32_t dispatch_offset, int32_t *retval) { + boolean_t istate; int error = 0; unsigned bytesTraced = 0; - boolean_t istate; + +#if CONFIG_TELEMETRY + if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_ENABLE) { + telemetry_global_ctl(1); + *retval = 0; + return (0); + } else if (flags & STACKSHOT_GLOBAL_MICROSTACKSHOT_DISABLE) { + telemetry_global_ctl(0); + *retval = 0; + return (0); + } +#endif *retval = -1; /* Serialize tracing */ STACKSHOT_SUBSYS_LOCK(); - if ((tracebuf_size <= 0) || (tracebuf_size > SANE_TRACEBUF_SIZE)) { + if (tracebuf_size <= 0) { + error = EINVAL; + goto error_exit; + } + +#if CONFIG_TELEMETRY + if (flags & STACKSHOT_GET_MICROSTACKSHOT) { + + if (tracebuf_size > SANE_TRACEBUF_SIZE) { + error = EINVAL; + goto error_exit; + } + + bytesTraced = tracebuf_size; + error = telemetry_gather(tracebuf, &bytesTraced, + (flags & STACKSHOT_SET_MICROSTACKSHOT_MARK) ? TRUE : FALSE); + if (error == KERN_NO_SPACE) { + error = ENOSPC; + } + + *retval = (int)bytesTraced; + goto error_exit; + } + + if (flags & STACKSHOT_GET_BOOT_PROFILE) { + + if (tracebuf_size > SANE_BOOTPROFILE_TRACEBUF_SIZE) { + error = EINVAL; + goto error_exit; + } + + bytesTraced = tracebuf_size; + error = bootprofile_gather(tracebuf, &bytesTraced); + if (error == KERN_NO_SPACE) { + error = ENOSPC; + } + + *retval = (int)bytesTraced; + goto error_exit; + } +#endif + + if (tracebuf_size > SANE_TRACEBUF_SIZE) { error = EINVAL; goto error_exit; } @@ -2465,20 +3129,30 @@ start_kern_tracing(unsigned int new_nkdbufs, boolean_t need_map) { return; nkdbufs = kdbg_set_nkdbufs(new_nkdbufs); kdbg_lock_init(); - kdbg_reinit(TRUE); - if (need_map == TRUE) - kdbg_mapinit(); - kdbg_set_tracing_enabled(TRUE, KDEBUG_ENABLE_TRACE); + + if (0 == kdbg_reinit(TRUE)) { + + if (need_map == TRUE) { + uint32_t old1, old2; + + kdbg_thrmap_init(); + + disable_wrap(&old1, &old2); + } + kdbg_set_tracing_enabled(TRUE, KDEBUG_ENABLE_TRACE); #if defined(__i386__) || defined(__x86_64__) - uint64_t now = mach_absolute_time(); + uint64_t now = mach_absolute_time(); KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_INFO, 1)) | DBG_FUNC_NONE, (uint32_t)(tsc_rebase_abs_time >> 32), (uint32_t)tsc_rebase_abs_time, (uint32_t)(now >> 32), (uint32_t)now, 0); #endif - printf("kernel tracing started\n"); + printf("kernel tracing started\n"); + } else { + printf("error from kdbg_reinit,kernel tracing not started\n"); + } } void @@ -2499,7 +3173,11 @@ kdbg_dump_trace_to_file(const char *filename) * The global pid exists, we're running * due to fs_usage, latency, etc... * don't cut the panic/shutdown trace file + * Disable tracing from this point to avoid + * perturbing state. */ + kdebug_enable = 0; + kd_ctrl_page.enabled = 0; return; } } @@ -2513,8 +3191,8 @@ kdbg_dump_trace_to_file(const char *filename) if ((error = vnode_open(filename, (O_CREAT | FWRITE | O_NOFOLLOW), 0600, 0, &vp, ctx))) return; - number = kd_mapsize; - kdbg_readmap(0, &number, vp, ctx); + number = kd_mapcount * sizeof(kd_threadmap); + kdbg_readthrmap(0, &number, vp, ctx); number = nkdbufs*sizeof(kd_buf); kdbg_read(0, &number, vp, ctx); @@ -2544,23 +3222,3 @@ void kdbg_get_task_name(char* name_buf, int len, task_t task) else snprintf(name_buf, len, "%p [!bsd]", task); } - - - -#if defined(NATIVE_TRACE_FACILITY) -void trace_handler_map_ctrl_page(__unused uintptr_t addr, __unused size_t ctrl_page_size, __unused size_t storage_size, __unused size_t kds_ptr_size) -{ -} -void trace_handler_map_bufinfo(__unused uintptr_t addr, __unused size_t size) -{ -} -void trace_handler_unmap_bufinfo(void) -{ -} -void trace_handler_map_buffer(__unused int index, __unused uintptr_t addr, __unused size_t size) -{ -} -void trace_handler_unmap_buffer(__unused int index) -{ -} -#endif diff --git a/bsd/kern/kern_aio.c b/bsd/kern/kern_aio.c index 89a2ba012..1627d4964 100644 --- a/bsd/kern/kern_aio.c +++ b/bsd/kern/kern_aio.c @@ -606,14 +606,13 @@ _aio_close(proc_t p, int fd ) (int)p, fd, 0, 0, 0 ); while (aio_proc_active_requests_for_file(p, fd) > 0) { - msleep(&p->AIO_CLEANUP_SLEEP_CHAN, aio_proc_mutex(p), PRIBIO | PDROP, "aio_close", 0 ); + msleep(&p->AIO_CLEANUP_SLEEP_CHAN, aio_proc_mutex(p), PRIBIO, "aio_close", 0 ); } - } else { - aio_proc_unlock(p); } - - + + aio_proc_unlock(p); + KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_close)) | DBG_FUNC_END, (int)p, fd, 0, 0, 0 ); @@ -1896,7 +1895,18 @@ aio_create_queue_entry(proc_t procp, user_addr_t aiocbp, void *group_tag, int ki /* do some more validation on the aiocb and embedded file descriptor */ result = aio_validate( entryp ); + if ( result != 0 ) + goto error_exit_with_ref; + + /* get a reference on the current_thread, which is passed in vfs_context. */ + entryp->thread = current_thread(); + thread_reference( entryp->thread ); + return ( entryp ); +error_exit_with_ref: + if ( VM_MAP_NULL != entryp->aio_map ) { + vm_map_deallocate( entryp->aio_map ); + } error_exit: if ( result && entryp != NULL ) { zfree( aio_workq_zonep, entryp ); @@ -2056,6 +2066,11 @@ aio_free_request(aio_workq_entry *entryp) vm_map_deallocate(entryp->aio_map); } + /* remove our reference to thread which enqueued the request */ + if ( NULL != entryp->thread ) { + thread_deallocate( entryp->thread ); + } + entryp->aio_refcount = -1; /* A bit of poisoning in case of bad refcounting. */ zfree( aio_workq_zonep, entryp ); @@ -2143,7 +2158,7 @@ aio_validate( aio_workq_entry *entryp ) /* we don't have read or write access */ result = EBADF; } - else if ( fp->f_fglob->fg_type != DTYPE_VNODE ) { + else if ( FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE ) { /* this is not a file */ result = ESPIPE; } else @@ -2352,11 +2367,7 @@ do_aio_read( aio_workq_entry *entryp ) return(EBADF); } - /* - * - * Needs vfs_context_t from vfs_context_create() in entryp! - */ - context.vc_thread = proc_thread(entryp->procp); /* XXX */ + context.vc_thread = entryp->thread; /* XXX */ context.vc_ucred = fp->f_fglob->fg_cred; error = dofileread(&context, fp, @@ -2393,11 +2404,7 @@ do_aio_write( aio_workq_entry *entryp ) flags |= FOF_OFFSET; } - /* - * - * Needs vfs_context_t from vfs_context_create() in entryp! - */ - context.vc_thread = proc_thread(entryp->procp); /* XXX */ + context.vc_thread = entryp->thread; /* XXX */ context.vc_ucred = fp->f_fglob->fg_cred; /* NB: tell dofilewrite the offset, and to use the proc cred */ diff --git a/bsd/kern/kern_asl.c b/bsd/kern/kern_asl.c new file mode 100644 index 000000000..3d97f8113 --- /dev/null +++ b/bsd/kern/kern_asl.c @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2013 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include + +/* String to append as format modifier for each key-value pair */ +#define KASL_KEYVAL_FMT "[%s %s] " +#define KASL_KEYVAL_FMT_LEN (sizeof(KASL_KEYVAL_FMT) - 1) + +#define KASL_NEWLINE_CHAR "\n" +#define KASL_NEWLINE_CHAR_LEN (sizeof(KASL_NEWLINE_CHAR) - 1) + +/* Length of entire ASL message in 10 characters. Kernel defaults to zero */ +#define KASL_ASL_MSG_LEN " 0" + +/* Length of default format string to be used by printf */ +#define MAX_FMT_LEN 256 + + +/* Function to print input values as key-value pairs in format + * identifiable by Apple system log (ASL) facility. All key-value pairs + * are assumed to be pointer to strings and are provided using two ways - + * (a) va_list argument which is a list of varying number of arguments + * created by the caller of this function. + * (b) variable number of arguments passed to this function. + * + * Parameters - + * level - Priority level for this ASL message + * facility - Facility for this ASL message. + * num_pairs - Number of key-value pairs provided by vargs argument. + * vargs - List of key-value pairs. + * ... - Additional key-value pairs (apart from vargs) as variable + * argument list. A NULL value indicates the end of the + * variable argument list. + * + * Returns - + * zero - On success, when it prints all key-values pairs provided. + * E2BIG - When it cannot print all key-value pairs provided and had + * to truncate the output. + */ +int +kern_asl_msg_va(int level, const char *facility, int num_pairs, va_list vargs, ...) +{ + int err = 0; + char fmt[MAX_FMT_LEN]; /* Format string to use with vaddlog */ + int calc_pairs = 0; + size_t len; + int i; + va_list ap; + char *ptr; + + /* Mask extra bits, if any, from priority level */ + level = LOG_PRI(level); + + /* Create the first part of format string consisting of ASL + * message length, level, and facility. + */ + if (facility) { + snprintf(fmt, MAX_FMT_LEN, "%s [%s %d] [%s %s] ", + KASL_ASL_MSG_LEN, + KASL_KEY_LEVEL, level, + KASL_KEY_FACILITY, facility); + } else { + snprintf(fmt, MAX_FMT_LEN, "%s [%s %d] ", + KASL_ASL_MSG_LEN, + KASL_KEY_LEVEL, level); + } + + /* Determine the number of key-value format string [%s %s] that + * should be added in format string for every key-value pair provided + * in va_list. Calculate maximum number of format string that can be + * accommodated in the remaining format buffer (after saving space + * for newline character). If the caller provided pairs in va_list + * is more than calculated pairs, truncate extra pairs. + */ + len = MAX_FMT_LEN - strlen(fmt) - KASL_NEWLINE_CHAR_LEN - 1; + calc_pairs = len / KASL_KEYVAL_FMT_LEN; + if (num_pairs <= calc_pairs) { + calc_pairs = num_pairs; + } else { + err = E2BIG; + } + + /* Append format strings [%s %s] for the key-value pairs in vargs */ + len = MAX_FMT_LEN - KASL_NEWLINE_CHAR_LEN; + for (i = 0; i < calc_pairs; i++) { + (void) strlcat(fmt, KASL_KEYVAL_FMT, len); + } + + /* Count number of variable arguments provided to this function + * and determine total number of key-value pairs. + */ + calc_pairs = 0; + va_start(ap, vargs); + ptr = va_arg(ap, char *); + while (ptr) { + calc_pairs++; + ptr = va_arg(ap, char *); + } + calc_pairs /= 2; + va_end(ap); + + /* If user provided variable number of arguments, append them as + * as real key-value "[k v]" into the format string. If the format + * string is too small, ignore the key-value pair completely. + */ + if (calc_pairs) { + char *key, *val; + size_t pairlen; + int offset; + + /* Calculate bytes available for key-value pairs after reserving + * bytes for newline character and NULL terminator + */ + len = MAX_FMT_LEN - strlen(fmt) - KASL_NEWLINE_CHAR_LEN - 1; + offset = strlen(fmt); + + va_start(ap, vargs); + for (i = 0; i < calc_pairs; i++) { + key = va_arg(ap, char *); + val = va_arg(ap, char *); + + /* Calculate bytes required to store next key-value pair + * as "[key val] " including space for '[', ']', and + * two spaces. + */ + pairlen = strlen(key) + strlen(val) + 4; + if (pairlen > len) { + err = E2BIG; + break; + } + + /* len + 1 because one byte has been set aside for NULL + * terminator in calculation of 'len' above + */ + snprintf((fmt + offset), len + 1, KASL_KEYVAL_FMT, + key, val); + offset += pairlen; + len -= pairlen; + } + va_end(ap); + } + + /* Append newline */ + (void) strlcat(fmt, KASL_NEWLINE_CHAR, MAX_FMT_LEN); + + /* Print the key-value pairs in ASL format */ + vaddlog(fmt, vargs); + + return (err); +} + +int +kern_asl_msg(int level, const char *facility, int num_pairs, ...) +{ + int err; + va_list ap; + + va_start(ap, num_pairs); + err = kern_asl_msg_va(level, facility, + num_pairs, ap, NULL); + va_end(ap); + + return err; +} + +/* Search if given string contains '[' and ']'. If any, escape it by + * prefixing with a '\'. If the length of the string is not big enough, + * no changes are done and error is returned. + * + * Parameters - + * str - string that can contain '[' or ']', should be NULL terminated + * len - length, in bytes, of valid data, including NULL character. + * buflen - size of buffer that contains the string + */ +int +escape_str(char *str, int len, int buflen) +{ + int count; + char *src, *dst; + + /* Count number of characters to escape */ + src = str; + count = 0; + do { + if ((*src == '[') || (*src == ']')) { + count++; + } + } while (*src++); + + if (count) { + /* + * Check if the buffer has enough space to escape all + * characters + */ + if ((buflen - len) < count) { + return (ENOSPC); + } + + src = str + len; + dst = src + count; + while (count) { + *dst-- = *src; + if ((*src == '[') || (*src == ']')) { + /* Last char copied needs to be escaped */ + *dst-- = '\\'; + count--; + } + src--; + } + } + + return (0); +} diff --git a/bsd/kern/kern_authorization.c b/bsd/kern/kern_authorization.c index 1cf74dc41..91c2305c8 100644 --- a/bsd/kern/kern_authorization.c +++ b/bsd/kern/kern_authorization.c @@ -117,7 +117,7 @@ struct kauth_scope { static TAILQ_HEAD(,kauth_scope) kauth_scopes; static int kauth_add_callback_to_scope(kauth_scope_t sp, kauth_listener_t klp); -static void kauth_scope_init(void) __attribute__((section("__TEXT, initcode"))); +static void kauth_scope_init(void); static kauth_scope_t kauth_alloc_scope(const char *identifier, kauth_scope_callback_t callback, void *idata); static kauth_listener_t kauth_alloc_listener(const char *identifier, kauth_scope_callback_t callback, void *idata); #if 0 diff --git a/bsd/kern/kern_clock.c b/bsd/kern/kern_clock.c index 1aae2df47..f0e051345 100644 --- a/bsd/kern/kern_clock.c +++ b/bsd/kern/kern_clock.c @@ -140,6 +140,31 @@ timeout( thread_call_func_delayed((thread_call_func_t)fcn, param, deadline); } +/* + * Set a timeout with leeway. + * + * fcn: function to call + * param: parameter to pass to function + * interval: timeout interval, in hz. + * leeway_interval: leeway interval, in hz. + */ +void +timeout_with_leeway( + timeout_fcn_t fcn, + void *param, + int interval, + int leeway_interval) +{ + uint64_t deadline; + uint64_t leeway; + + clock_interval_to_deadline(interval, NSEC_PER_SEC / hz, &deadline); + + clock_interval_to_absolutetime_interval(leeway_interval, NSEC_PER_SEC / hz, &leeway); + + thread_call_func_delayed_with_leeway((thread_call_func_t)fcn, param, deadline, leeway, THREAD_CALL_DELAY_LEEWAY); +} + /* * Cancel a timeout. */ diff --git a/bsd/kern/kern_control.c b/bsd/kern/kern_control.c index ae00ee73b..5a4cfd5f7 100644 --- a/bsd/kern/kern_control.c +++ b/bsd/kern/kern_control.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2011 Apple Inc. All rights reserved. + * Copyright (c) 1999-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -87,6 +87,7 @@ static int ctl_send(struct socket *, int, struct mbuf *, struct sockaddr *, struct mbuf *, struct proc *); static int ctl_ctloutput(struct socket *, struct sockopt *); static int ctl_peeraddr(struct socket *so, struct sockaddr **nam); +static int ctl_usr_rcvd(struct socket *so, int flags); static struct kctl *ctl_find_by_name(const char *); static struct kctl *ctl_find_by_id_unit(u_int32_t id, u_int32_t unit); @@ -99,103 +100,84 @@ static int ctl_lock(struct socket *, int, void *); static int ctl_unlock(struct socket *, int, void *); static lck_mtx_t * ctl_getlock(struct socket *, int); -static struct pr_usrreqs ctl_usrreqs = -{ - pru_abort_notsupp, pru_accept_notsupp, ctl_attach, pru_bind_notsupp, - ctl_connect, pru_connect2_notsupp, ctl_ioctl, ctl_detach, - ctl_disconnect, pru_listen_notsupp, ctl_peeraddr, - pru_rcvd_notsupp, pru_rcvoob_notsupp, ctl_send, - pru_sense_null, pru_shutdown_notsupp, pru_sockaddr_notsupp, - sosend, soreceive, pru_sopoll_notsupp +static struct pr_usrreqs ctl_usrreqs = { + .pru_attach = ctl_attach, + .pru_connect = ctl_connect, + .pru_control = ctl_ioctl, + .pru_detach = ctl_detach, + .pru_disconnect = ctl_disconnect, + .pru_peeraddr = ctl_peeraddr, + .pru_rcvd = ctl_usr_rcvd, + .pru_send = ctl_send, + .pru_sosend = sosend, + .pru_soreceive = soreceive, }; -static struct protosw kctlswk_dgram = +static struct protosw kctlsw[] = { { - SOCK_DGRAM, &systemdomain, SYSPROTO_CONTROL, - PR_ATOMIC|PR_CONNREQUIRED|PR_PCBLOCK, - NULL, NULL, NULL, ctl_ctloutput, - NULL, NULL, - NULL, NULL, NULL, NULL, &ctl_usrreqs, - ctl_lock, ctl_unlock, ctl_getlock, { 0, 0 } , 0, { 0 } -}; - -static struct protosw kctlswk_stream = + .pr_type = SOCK_DGRAM, + .pr_protocol = SYSPROTO_CONTROL, + .pr_flags = PR_ATOMIC|PR_CONNREQUIRED|PR_PCBLOCK|PR_WANTRCVD, + .pr_ctloutput = ctl_ctloutput, + .pr_usrreqs = &ctl_usrreqs, + .pr_lock = ctl_lock, + .pr_unlock = ctl_unlock, + .pr_getlock = ctl_getlock, +}, { - SOCK_STREAM, &systemdomain, SYSPROTO_CONTROL, - PR_CONNREQUIRED|PR_PCBLOCK, - NULL, NULL, NULL, ctl_ctloutput, - NULL, NULL, - NULL, NULL, NULL, NULL, &ctl_usrreqs, - ctl_lock, ctl_unlock, ctl_getlock, { 0, 0 } , 0, { 0 } + .pr_type = SOCK_STREAM, + .pr_protocol = SYSPROTO_CONTROL, + .pr_flags = PR_CONNREQUIRED|PR_PCBLOCK|PR_WANTRCVD, + .pr_ctloutput = ctl_ctloutput, + .pr_usrreqs = &ctl_usrreqs, + .pr_lock = ctl_lock, + .pr_unlock = ctl_unlock, + .pr_getlock = ctl_getlock, +} }; +static int kctl_proto_count = (sizeof (kctlsw) / sizeof (struct protosw)); /* * Install the protosw's for the Kernel Control manager. */ -__private_extern__ int -kern_control_init(void) +__private_extern__ void +kern_control_init(struct domain *dp) { - int error = 0; - + struct protosw *pr; + int i; + + VERIFY(!(dp->dom_flags & DOM_INITIALIZED)); + VERIFY(dp == systemdomain); + ctl_lck_grp_attr = lck_grp_attr_alloc_init(); - if (ctl_lck_grp_attr == 0) { - printf(": lck_grp_attr_alloc_init failed\n"); - error = ENOMEM; - goto done; + if (ctl_lck_grp_attr == NULL) { + panic("%s: lck_grp_attr_alloc_init failed\n", __func__); + /* NOTREACHED */ } - - ctl_lck_grp = lck_grp_alloc_init("Kernel Control Protocol", ctl_lck_grp_attr); - if (ctl_lck_grp == 0) { - printf("kern_control_init: lck_grp_alloc_init failed\n"); - error = ENOMEM; - goto done; + + ctl_lck_grp = lck_grp_alloc_init("Kernel Control Protocol", + ctl_lck_grp_attr); + if (ctl_lck_grp == NULL) { + panic("%s: lck_grp_alloc_init failed\n", __func__); + /* NOTREACHED */ } - + ctl_lck_attr = lck_attr_alloc_init(); - if (ctl_lck_attr == 0) { - printf("kern_control_init: lck_attr_alloc_init failed\n"); - error = ENOMEM; - goto done; + if (ctl_lck_attr == NULL) { + panic("%s: lck_attr_alloc_init failed\n", __func__); + /* NOTREACHED */ } - + ctl_mtx = lck_mtx_alloc_init(ctl_lck_grp, ctl_lck_attr); - if (ctl_mtx == 0) { - printf("kern_control_init: lck_mtx_alloc_init failed\n"); - error = ENOMEM; - goto done; + if (ctl_mtx == NULL) { + panic("%s: lck_mtx_alloc_init failed\n", __func__); + /* NOTREACHED */ } TAILQ_INIT(&ctl_head); - - error = net_add_proto(&kctlswk_dgram, &systemdomain); - if (error) { - log(LOG_WARNING, "kern_control_init: net_add_proto dgram failed (%d)\n", error); - } - error = net_add_proto(&kctlswk_stream, &systemdomain); - if (error) { - log(LOG_WARNING, "kern_control_init: net_add_proto stream failed (%d)\n", error); - } - - done: - if (error != 0) { - if (ctl_mtx) { - lck_mtx_free(ctl_mtx, ctl_lck_grp); - ctl_mtx = 0; - } - if (ctl_lck_grp) { - lck_grp_free(ctl_lck_grp); - ctl_lck_grp = 0; - } - if (ctl_lck_grp_attr) { - lck_grp_attr_free(ctl_lck_grp_attr); - ctl_lck_grp_attr = 0; - } - if (ctl_lck_attr) { - lck_attr_free(ctl_lck_attr); - ctl_lck_attr = 0; - } - } - return error; + + for (i = 0, pr = &kctlsw[0]; i < kctl_proto_count; i++, pr++) + net_add_proto(pr, dp, 1); } static void @@ -442,6 +424,25 @@ ctl_peeraddr(struct socket *so, struct sockaddr **nam) return 0; } +static int +ctl_usr_rcvd(struct socket *so, int flags) +{ + struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; + struct kctl *kctl; + + if ((kctl = kcb->kctl) == NULL) { + return EINVAL; + } + + if (kctl->rcvd) { + socket_unlock(so, 0); + (*kctl->rcvd)(kctl, kcb->unit, kcb->userdata, flags); + socket_lock(so, 0); + } + + return 0; +} + static int ctl_send(struct socket *so, int flags, struct mbuf *m, __unused struct sockaddr *addr, struct mbuf *control, @@ -707,6 +708,7 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref) struct kctl *kctl_next = NULL; u_int32_t id = 1; size_t name_len; + int is_extended = 0; if (userkctl == NULL) /* sanity check */ return(EINVAL); @@ -789,6 +791,9 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref) kctl->id = userkctl->ctl_id; kctl->reg_unit = userkctl->ctl_unit; } + + is_extended = (userkctl->ctl_flags & CTL_FLAG_REG_EXTENDED); + strlcpy(kctl->name, userkctl->ctl_name, MAX_KCTL_NAME); kctl->flags = userkctl->ctl_flags; @@ -806,6 +811,9 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref) kctl->send = userkctl->ctl_send; kctl->setopt = userkctl->ctl_setopt; kctl->getopt = userkctl->ctl_getopt; + if (is_extended) { + kctl->rcvd = userkctl->ctl_rcvd; + } TAILQ_INIT(&kctl->kcb_head); diff --git a/bsd/kern/kern_core.c b/bsd/kern/kern_core.c index 1c3093e4d..fd832d4c5 100644 --- a/bsd/kern/kern_core.c +++ b/bsd/kern/kern_core.c @@ -93,20 +93,23 @@ typedef struct { /* XXX should be static */ void collectth_state(thread_t th_act, void *tirp); +extern int freespace_mb(vnode_t vp); + /* XXX not in a Mach header anywhere */ kern_return_t thread_getstatus(register thread_t act, int flavor, thread_state_t tstate, mach_msg_type_number_t *count); void task_act_iterate_wth_args(task_t, void(*)(thread_t, void *), void *); +extern kern_return_t task_suspend_internal(task_t); static cpu_type_t process_cpu_type(proc_t proc); static cpu_type_t process_cpu_subtype(proc_t proc); #ifdef SECURE_KERNEL -__private_extern__ int do_coredump = 0; /* default: don't dump cores */ +__XNU_PRIVATE_EXTERN int do_coredump = 0; /* default: don't dump cores */ #else -__private_extern__ int do_coredump = 1; /* default: dump cores */ +__XNU_PRIVATE_EXTERN int do_coredump = 1; /* default: dump cores */ #endif -__private_extern__ int sugid_coredump = 0; /* default: but not SGUID binaries */ +__XNU_PRIVATE_EXTERN int sugid_coredump = 0; /* default: but not SGUID binaries */ /* cpu_type returns only the most generic indication of the current CPU. */ @@ -187,6 +190,9 @@ collectth_state(thread_t th_act, void *tirp) * indicated * * Parameters: core_proc Process to dump core [*] + * reserve_mb If non-zero, leave filesystem with + * at least this much free space. + * ignore_ulimit If set, ignore the process's core file ulimit. * * Returns: 0 Success * EFAULT Failed @@ -197,7 +203,7 @@ collectth_state(thread_t th_act, void *tirp) */ #define MAX_TSTATE_FLAVORS 10 int -coredump(proc_t core_proc) +coredump(proc_t core_proc, uint32_t reserve_mb, int ignore_ulimit) { /* Begin assumptions that limit us to only the current process */ vfs_context_t ctx = vfs_context_current(); @@ -255,9 +261,9 @@ coredump(proc_t core_proc) mapsize = get_vmmap_size(map); - if (mapsize >= core_proc->p_rlimit[RLIMIT_CORE].rlim_cur) + if ((mapsize >= core_proc->p_rlimit[RLIMIT_CORE].rlim_cur) && (ignore_ulimit == 0)) return (EFAULT); - (void) task_suspend(task); + (void) task_suspend_internal(task); MALLOC(alloced_name, char *, MAXPATHLEN, M_TEMP, M_NOWAIT | M_ZERO); @@ -289,6 +295,12 @@ coredump(proc_t core_proc) vnode_setattr(vp, &va, ctx); core_proc->p_acflag |= ACORE; + if ((reserve_mb > 0) && + ((freespace_mb(vp) - (mapsize >> 20)) < reserve_mb)) { + error = ENOSPC; + goto out; + } + /* * If the task is modified while dumping the file * (e.g., changes in threads or VM, the resulting diff --git a/bsd/kern/kern_credential.c b/bsd/kern/kern_credential.c index 7569cf5eb..0098c54d3 100644 --- a/bsd/kern/kern_credential.c +++ b/bsd/kern/kern_credential.c @@ -80,6 +80,25 @@ void mach_kauth_cred_uthread_update( void ); # define NULLCRED_CHECK(_c) do {if (!IS_VALID_CRED(_c)) panic("%s: bad credential %p", __FUNCTION__,_c);} while(0) +/* Set to 1 to turn on KAUTH_DEBUG for kern_credential.c */ +#if 0 +#ifdef KAUTH_DEBUG +#undef KAUTH_DEBUG +#endif + +#ifdef K_UUID_FMT +#undef K_UUID_FMT +#endif + +#ifdef K_UUID_ARG +#undef K_UUID_ARG +#endif + +# define K_UUID_FMT "%08x:%08x:%08x:%08x" +# define K_UUID_ARG(_u) *(int *)&_u.g_guid[0],*(int *)&_u.g_guid[4],*(int *)&_u.g_guid[8],*(int *)&_u.g_guid[12] +# define KAUTH_DEBUG(fmt, args...) do { printf("%s:%d: " fmt "\n", __PRETTY_FUNCTION__, __LINE__ , ##args); } while (0) +#endif + /* * Credential debugging; we can track entry into a function that might * change a credential, and we can track actual credential changes that @@ -141,6 +160,7 @@ static lck_mtx_t *kauth_resolver_mtx; #define KAUTH_RESOLVER_UNLOCK() lck_mtx_unlock(kauth_resolver_mtx); static volatile pid_t kauth_resolver_identity; +static int kauth_identitysvc_has_registered; static int kauth_resolver_registered; static uint32_t kauth_resolver_sequence; static int kauth_resolver_timeout = 30; /* default: 30 seconds */ @@ -170,6 +190,8 @@ static int kauth_resolver_submit(struct kauth_identity_extlookup *lkp, uint64_t static int kauth_resolver_complete(user_addr_t message); static int kauth_resolver_getwork(user_addr_t message); static int kauth_resolver_getwork2(user_addr_t message); +static __attribute__((noinline)) int __KERNEL_IS_WAITING_ON_EXTERNAL_CREDENTIAL_RESOLVER__( + struct kauth_resolver_work *); #define KAUTH_CACHES_MAX_SIZE 10000 /* Max # entries for both groups and id caches */ @@ -178,6 +200,8 @@ struct kauth_identity { int ki_valid; uid_t ki_uid; gid_t ki_gid; + int ki_supgrpcnt; + gid_t ki_supgrps[NGROUPS]; guid_t ki_guid; ntsid_t ki_ntsid; const char *ki_name; /* string name from string cache */ @@ -188,6 +212,7 @@ struct kauth_identity { * not go to userland to resolve, just assume that there is no answer * available. */ + time_t ki_groups_expiry; time_t ki_guid_expiry; time_t ki_ntsid_expiry; }; @@ -201,7 +226,8 @@ static int kauth_identity_cachemax = KAUTH_IDENTITY_CACHEMAX_DEFAULT; static int kauth_identity_count; static struct kauth_identity *kauth_identity_alloc(uid_t uid, gid_t gid, guid_t *guidp, time_t guid_expiry, - ntsid_t *ntsidp, time_t ntsid_expiry, const char *name, int nametype); + ntsid_t *ntsidp, time_t ntsid_expiry, int supgrpcnt, gid_t *supgrps, time_t groups_expiry, + const char *name, int nametype); static void kauth_identity_register_and_free(struct kauth_identity *kip); static void kauth_identity_updatecache(struct kauth_identity_extlookup *elp, struct kauth_identity *kip, uint64_t extend_data); static void kauth_identity_trimcache(int newsize); @@ -248,11 +274,11 @@ static struct kauth_cred_entry_head * kauth_cred_table_anchor = NULL; #define KAUTH_CRED_HASH_DEBUG 0 static int kauth_cred_add(kauth_cred_t new_cred); -static void kauth_cred_remove(kauth_cred_t cred); +static boolean_t kauth_cred_remove(kauth_cred_t cred); static inline u_long kauth_cred_hash(const uint8_t *datap, int data_len, u_long start_key); static u_long kauth_cred_get_hashkey(kauth_cred_t cred); static kauth_cred_t kauth_cred_update(kauth_cred_t old_cred, kauth_cred_t new_cred, boolean_t retain_auditinfo); -static void kauth_cred_unref_hashlocked(kauth_cred_t *credp); +static boolean_t kauth_cred_unref_hashlocked(kauth_cred_t *credp); #if KAUTH_CRED_HASH_DEBUG static int kauth_cred_count = 0; @@ -261,6 +287,50 @@ static void kauth_cred_print(kauth_cred_t cred); #endif #if CONFIG_EXT_RESOLVER + +/* + * __KERNEL_IS_WAITING_ON_EXTERNAL_CREDENTIAL_RESOLVER__ + * + * Description: Waits for the user space daemon to respond to the request + * we made. Function declared non inline to be visible in + * stackshots and spindumps as well as debugging. + * + * Parameters: workp Work queue entry. + * + * Returns: 0 on Success. + * EIO if Resolver is dead. + * EINTR thread interrupted in msleep + * EWOULDBLOCK thread timed out in msleep + * ERESTART returned by msleep. + * + */ +static __attribute__((noinline)) int +__KERNEL_IS_WAITING_ON_EXTERNAL_CREDENTIAL_RESOLVER__( + struct kauth_resolver_work *workp) +{ + int error = 0; + struct timespec ts; + for (;;) { + /* we could compute a better timeout here */ + ts.tv_sec = kauth_resolver_timeout; + ts.tv_nsec = 0; + error = msleep(workp, kauth_resolver_mtx, PCATCH, "kr_submit", &ts); + /* request has been completed? */ + if ((error == 0) && (workp->kr_flags & KAUTH_REQUEST_DONE)) + break; + /* woken because the resolver has died? */ + if (kauth_resolver_identity == 0) { + error = EIO; + break; + } + /* an error? */ + if (error != 0) + break; + } + return error; +} + + /* * kauth_resolver_init * @@ -398,23 +468,7 @@ kauth_resolver_submit(struct kauth_identity_extlookup *lkp, uint64_t extend_data * work. */ wakeup_one((caddr_t)&kauth_resolver_unsubmitted); - for (;;) { - /* we could compute a better timeout here */ - ts.tv_sec = kauth_resolver_timeout; - ts.tv_nsec = 0; - error = msleep(workp, kauth_resolver_mtx, PCATCH, "kr_submit", &ts); - /* request has been completed? */ - if ((error == 0) && (workp->kr_flags & KAUTH_REQUEST_DONE)) - break; - /* woken because the resolver has died? */ - if (kauth_resolver_identity == 0) { - error = EIO; - break; - } - /* an error? */ - if (error != 0) - break; - } + error = __KERNEL_IS_WAITING_ON_EXTERNAL_CREDENTIAL_RESOLVER__(workp); /* if the request was processed, copy the result */ if (error == 0) @@ -557,6 +611,7 @@ identitysvc(__unused struct proc *p, struct identitysvc_args *uap, __unused int3 } kauth_resolver_identity = new_id; kauth_resolver_registered = 1; + kauth_identitysvc_has_registered = 1; wakeup(&kauth_resolver_unsubmitted); } KAUTH_RESOLVER_UNLOCK(); @@ -709,7 +764,7 @@ kauth_resolver_getwork_continue(int result) thread = current_thread(); ut = get_bsdthread_info(thread); - message = ut->uu_kauth.message; + message = ut->uu_kevent.uu_kauth.message; return(kauth_resolver_getwork2(message)); } @@ -836,7 +891,7 @@ kauth_resolver_getwork(user_addr_t message) thread_t thread = current_thread(); struct uthread *ut = get_bsdthread_info(thread); - ut->uu_kauth.message = message; + ut->uu_kevent.uu_kauth.message = message; error = msleep0(&kauth_resolver_unsubmitted, kauth_resolver_mtx, PCATCH, "GRGetWork", 0, kauth_resolver_getwork_continue); KAUTH_RESOLVER_UNLOCK(); /* @@ -1026,6 +1081,7 @@ kauth_resolver_complete(user_addr_t message) #define KI_VALID_NTSID (1<<3) #define KI_VALID_PWNAM (1<<4) /* Used for translation */ #define KI_VALID_GRNAM (1<<5) /* Used for translation */ +#define KI_VALID_GROUPS (1<<6) #if CONFIG_EXT_RESOLVER /* @@ -1070,7 +1126,9 @@ kauth_identity_init(void) * and *either* a UID *or* a GID, but not both. */ static struct kauth_identity * -kauth_identity_alloc(uid_t uid, gid_t gid, guid_t *guidp, time_t guid_expiry, ntsid_t *ntsidp, time_t ntsid_expiry, const char *name, int nametype) +kauth_identity_alloc(uid_t uid, gid_t gid, guid_t *guidp, time_t guid_expiry, + ntsid_t *ntsidp, time_t ntsid_expiry, int supgrpcnt, gid_t *supgrps, time_t groups_expiry, + const char *name, int nametype) { struct kauth_identity *kip; @@ -1087,6 +1145,16 @@ kauth_identity_alloc(uid_t uid, gid_t gid, guid_t *guidp, time_t guid_expiry, nt kip->ki_uid = uid; kip->ki_valid = KI_VALID_UID; } + if (supgrpcnt) { + assert(supgrpcnt <= NGROUPS); + assert(supgrps != NULL); + if (kip->ki_valid & KI_VALID_GID) + panic("can't allocate kauth identity with both gid and supplementary groups"); + kip->ki_supgrpcnt = supgrpcnt; + memcpy(kip->ki_supgrps, supgrps, sizeof(supgrps[0]) * supgrpcnt); + kip->ki_valid |= KI_VALID_GROUPS; + } + kip->ki_groups_expiry = groups_expiry; if (guidp != NULL) { kip->ki_guid = *guidp; kip->ki_valid |= KI_VALID_GUID; @@ -1248,6 +1316,13 @@ kauth_identity_updatecache(struct kauth_identity_extlookup *elp, struct kauth_id TAILQ_FOREACH(kip, &kauth_identities, ki_link) { /* matching record */ if ((kip->ki_valid & KI_VALID_UID) && (kip->ki_uid == elp->el_uid)) { + if (elp->el_flags & KAUTH_EXTLOOKUP_VALID_SUPGRPS) { + assert(elp->el_sup_grp_cnt <= NGROUPS); + kip->ki_supgrpcnt = elp->el_sup_grp_cnt; + memcpy(kip->ki_supgrps, elp->el_sup_groups, sizeof(elp->el_sup_groups[0]) * kip->ki_supgrpcnt); + kip->ki_valid |= KI_VALID_GROUPS; + kip->ki_groups_expiry = (elp->el_member_valid) ? tv.tv_sec + elp->el_member_valid : 0; + } if (elp->el_flags & KAUTH_EXTLOOKUP_VALID_UGUID) { kip->ki_guid = elp->el_uguid; kip->ki_valid |= KI_VALID_GUID; @@ -1286,6 +1361,9 @@ kauth_identity_updatecache(struct kauth_identity_extlookup *elp, struct kauth_id (elp->el_uguid_valid) ? tv.tv_sec + elp->el_uguid_valid : 0, (elp->el_flags & KAUTH_EXTLOOKUP_VALID_USID) ? &elp->el_usid : NULL, (elp->el_usid_valid) ? tv.tv_sec + elp->el_usid_valid : 0, + (elp->el_flags & KAUTH_EXTLOOKUP_VALID_SUPGRPS) ? elp->el_sup_grp_cnt : 0, + (elp->el_flags & KAUTH_EXTLOOKUP_VALID_SUPGRPS) ? elp->el_sup_groups : NULL, + (elp->el_member_valid) ? tv.tv_sec + elp->el_member_valid : 0, (elp->el_flags & KAUTH_EXTLOOKUP_VALID_PWNAM) ? speculative_name : NULL, KI_VALID_PWNAM); if (kip != NULL) { @@ -1343,6 +1421,9 @@ kauth_identity_updatecache(struct kauth_identity_extlookup *elp, struct kauth_id (elp->el_gguid_valid) ? tv.tv_sec + elp->el_gguid_valid : 0, (elp->el_flags & KAUTH_EXTLOOKUP_VALID_GSID) ? &elp->el_gsid : NULL, (elp->el_gsid_valid) ? tv.tv_sec + elp->el_gsid_valid : 0, + (elp->el_flags & KAUTH_EXTLOOKUP_VALID_SUPGRPS) ? elp->el_sup_grp_cnt : 0, + (elp->el_flags & KAUTH_EXTLOOKUP_VALID_SUPGRPS) ? elp->el_sup_groups : NULL, + (elp->el_member_valid) ? tv.tv_sec + elp->el_member_valid : 0, (elp->el_flags & KAUTH_EXTLOOKUP_VALID_GRNAM) ? speculative_name : NULL, KI_VALID_GRNAM); if (kip != NULL) { @@ -1432,7 +1513,7 @@ kauth_identity_guid_expired(struct kauth_identity *kip) return (0); microuptime(&tv); - KAUTH_DEBUG("CACHE - GUID expires @ %d now %d", kip->ki_guid_expiry, tv.tv_sec); + KAUTH_DEBUG("CACHE - GUID expires @ %ld now %ld", kip->ki_guid_expiry, tv.tv_sec); return((kip->ki_guid_expiry <= tv.tv_sec) ? 1 : 0); } @@ -1461,11 +1542,38 @@ kauth_identity_ntsid_expired(struct kauth_identity *kip) return (0); microuptime(&tv); - KAUTH_DEBUG("CACHE - NTSID expires @ %d now %d", kip->ki_ntsid_expiry, tv.tv_sec); + KAUTH_DEBUG("CACHE - NTSID expires @ %ld now %ld", kip->ki_ntsid_expiry, tv.tv_sec); return((kip->ki_ntsid_expiry <= tv.tv_sec) ? 1 : 0); } +/* + * kauth_identity_groups_expired + * + * Description: Handle lazy expiration of supplemental group translations. + * + * Parameters: kip kauth identity to check for + * groups expiration + * + * Returns: 1 Expired + * 0 Not expired + */ +static int +kauth_identity_groups_expired(struct kauth_identity *kip) +{ + struct timeval tv; + + /* + * Expiration time of 0 means this entry is persistent. + */ + if (kip->ki_groups_expiry == 0) + return (0); + + microuptime(&tv); + KAUTH_DEBUG("CACHE - GROUPS expires @ %ld now %ld\n", kip->ki_groups_expiry, tv.tv_sec); + + return((kip->ki_groups_expiry <= tv.tv_sec) ? 1 : 0); +} /* * kauth_identity_find_uid @@ -1957,6 +2065,48 @@ kauth_cred_cache_lookup(__unused int from, __unused int to, } #endif +#if defined(CONFIG_EXT_RESOLVER) && (CONFIG_EXT_RESOLVER) +/* + * Structure to hold supplemental groups. Used for impedance matching with + * kauth_cred_cache_lookup below. + */ +struct supgroups { + int *count; + gid_t *groups; +}; + +/* + * kauth_cred_uid2groups + * + * Description: Fetch supplemental GROUPS from UID + * + * Parameters: uid UID to examine + * groups pointer to an array of gid_ts + * gcount pointer to the number of groups wanted/returned + * + * Returns: 0 Success + * kauth_cred_cache_lookup:EINVAL + * + * Implicit returns: + * *groups Modified, if successful + * *gcount Modified, if successful + * + */ +static int +kauth_cred_uid2groups(uid_t *uid, gid_t *groups, int *gcount) +{ + int rv; + + struct supgroups supgroups; + supgroups.count = gcount; + supgroups.groups = groups; + + rv = kauth_cred_cache_lookup(KI_VALID_UID, KI_VALID_GROUPS, uid, &supgroups); + + return (rv); +} +#endif + /* * kauth_cred_guid2pwnam * @@ -2384,6 +2534,9 @@ kauth_cred_cache_lookup(int from, int to, void *src, void *dst) case KI_VALID_NTSID: expired = kauth_identity_ntsid_expired; break; + case KI_VALID_GROUPS: + expired = kauth_identity_groups_expired; + break; default: switch(from) { case KI_VALID_GUID: @@ -2521,6 +2674,27 @@ kauth_cred_cache_lookup(int from, int to, void *src, void *dst) el.el_flags |= KAUTH_EXTLOOKUP_WANT_GRNAM; extend_data = CAST_USER_ADDR_T(dst); } + if (to == KI_VALID_GROUPS) { + /* Expensive and only useful for an NFS client not using kerberos */ + el.el_flags |= KAUTH_EXTLOOKUP_WANT_SUPGRPS; + if (ki.ki_valid & KI_VALID_GROUPS) { + /* + * Copy the current supplemental groups for the resolver. + * The resolver should check these groups first and if + * the user (uid) is still a member it should endeavor to + * keep them in the list. Otherwise NFS clients could get + * changing access to server file system objects on each + * expiration. + */ + el.el_sup_grp_cnt = ki.ki_supgrpcnt; + + memcpy(el.el_sup_groups, ki.ki_supgrps, sizeof (el.el_sup_groups[0]) * ki.ki_supgrpcnt); + /* Let the resolver know these were the previous valid groups */ + el.el_flags |= KAUTH_EXTLOOKUP_VALID_SUPGRPS; + KAUTH_DEBUG("GROUPS: Sending previously valid GROUPS"); + } else + KAUTH_DEBUG("GROUPS: no valid groups to send"); + } /* Call resolver */ KAUTH_DEBUG("CACHE - calling resolver for %x", el.el_flags); @@ -2577,6 +2751,18 @@ found: case KI_VALID_NTSID: *(ntsid_t *)dst = ki.ki_ntsid; break; + case KI_VALID_GROUPS: { + struct supgroups *gp = (struct supgroups *)dst; + u_int32_t limit = ki.ki_supgrpcnt; + + if (gp->count) { + limit = MIN(ki.ki_supgrpcnt, *gp->count); + *gp->count = limit; + } + + memcpy(gp->groups, ki.ki_supgrps, sizeof(gid_t) * limit); + } + break; case KI_VALID_PWNAM: case KI_VALID_GRNAM: /* handled in kauth_resolver_complete() */ @@ -3877,17 +4063,42 @@ kauth_cred_setgroups(kauth_cred_t cred, gid_t *groups, int groupcount, uid_t gmu } /* - * XXX temporary, for NFS support until we can come up with a better - * XXX enumeration/comparison mechanism - * * Notes: The return value exists to account for the possibility of a * kauth_cred_t without a POSIX label. This will be the case in * the future (see posix_cred_get() below, for more details). */ +#if CONFIG_EXT_RESOLVER +int kauth_external_supplementary_groups_supported = 1; + +SYSCTL_INT(_kern, OID_AUTO, ds_supgroups_supported, CTLFLAG_RW | CTLFLAG_LOCKED, &kauth_external_supplementary_groups_supported, 0, ""); +#endif + int kauth_cred_getgroups(kauth_cred_t cred, gid_t *grouplist, int *countp) { int limit = NGROUPS; + posix_cred_t pcred; + + pcred = posix_cred_get(cred); + +#if CONFIG_EXT_RESOLVER + /* + * If we've not opted out of using the resolver, then convert the cred to a list + * of supplemental groups. We do this only if there has been a resolver to talk to, + * since we may be too early in boot, or in an environment that isn't using DS. + */ + if (kauth_identitysvc_has_registered && kauth_external_supplementary_groups_supported && (pcred->cr_flags & CRF_NOMEMBERD) == 0) { + uid_t uid = kauth_cred_getuid(cred); + int err; + + err = kauth_cred_uid2groups(&uid, grouplist, countp); + if (!err) + return 0; + + /* On error just fall through */ + KAUTH_DEBUG("kauth_cred_getgroups failed %d\n", err); + } +#endif /* CONFIG_EXT_RESOLVER */ /* * If they just want a copy of the groups list, they may not care @@ -3896,11 +4107,11 @@ kauth_cred_getgroups(kauth_cred_t cred, gid_t *grouplist, int *countp) * and limit the returned list to that size. */ if (countp) { - limit = MIN(*countp, cred->cr_posix.cr_ngroups); + limit = MIN(*countp, pcred->cr_ngroups); *countp = limit; } - memcpy(grouplist, cred->cr_posix.cr_groups, sizeof(gid_t) * limit); + memcpy(grouplist, pcred->cr_groups, sizeof(gid_t) * limit); return 0; } @@ -4172,8 +4383,8 @@ kauth_cred_label_update(kauth_cred_t cred, struct label *label) static kauth_cred_t kauth_cred_label_update_execve(kauth_cred_t cred, vfs_context_t ctx, - struct vnode *vp, struct label *scriptl, struct label *execl, - int *disjointp) + struct vnode *vp, struct vnode *scriptvp, struct label *scriptl, + struct label *execl, void *macextensions, int *disjointp) { kauth_cred_t newcred; struct ucred temp_cred; @@ -4183,7 +4394,8 @@ kauth_cred_label_update_execve(kauth_cred_t cred, vfs_context_t ctx, mac_cred_label_init(&temp_cred); mac_cred_label_associate(cred, &temp_cred); *disjointp = mac_cred_label_update_execve(ctx, &temp_cred, - vp, scriptl, execl); + vp, scriptvp, scriptl, execl, + macextensions); newcred = kauth_cred_update(cred, &temp_cred, TRUE); mac_cred_label_destroy(&temp_cred); @@ -4279,7 +4491,8 @@ int kauth_proc_label_update(struct proc *p, struct label *label) */ int kauth_proc_label_update_execve(struct proc *p, vfs_context_t ctx, - struct vnode *vp, struct label *scriptl, struct label *execl) + struct vnode *vp, struct vnode *scriptvp, struct label *scriptl, + struct label *execl, void *macextensions) { kauth_cred_t my_cred, my_new_cred; int disjoint = 0; @@ -4298,7 +4511,7 @@ kauth_proc_label_update_execve(struct proc *p, vfs_context_t ctx, * passed in. The subsequent compare is safe, because it is * a pointer compare rather than a contents compare. */ - my_new_cred = kauth_cred_label_update_execve(my_cred, ctx, vp, scriptl, execl, &disjoint); + my_new_cred = kauth_cred_label_update_execve(my_cred, ctx, vp, scriptvp, scriptl, execl, macextensions, &disjoint); if (my_cred != my_new_cred) { DEBUG_CRED_CHANGE("kauth_proc_label_update_execve_unlocked CH(%d): %p/0x%08x -> %p/0x%08x\n", p->p_pid, my_cred, my_cred->cr_flags, my_new_cred, my_new_cred->cr_flags); @@ -4442,7 +4655,8 @@ kauth_cred_ref(kauth_cred_t cred) * Parameters: credp Pointer to address containing * credential to be freed * - * Returns: (void) + * Returns: TRUE if the credential must be destroyed by the caller. + * FALSE otherwise. * * Implicit returns: * *credp Set to NOCRED @@ -4461,10 +4675,11 @@ kauth_cred_ref(kauth_cred_t cred) * intended effect, to take into account the reference held by * the credential hash, which is released at the same time. */ -static void +static boolean_t kauth_cred_unref_hashlocked(kauth_cred_t *credp) { int old_value; + boolean_t destroy_it = FALSE; KAUTH_CRED_HASH_LOCK_ASSERT(); NULLCRED_CHECK(*credp); @@ -4490,9 +4705,14 @@ kauth_cred_unref_hashlocked(kauth_cred_t *credp) */ if (old_value < 3) { /* The last absolute reference is our credential hash table */ - kauth_cred_remove(*credp); + destroy_it = kauth_cred_remove(*credp); + } + + if (destroy_it == FALSE) { + *credp = NOCRED; } - *credp = NOCRED; + + return (destroy_it); } @@ -4517,9 +4737,23 @@ kauth_cred_unref_hashlocked(kauth_cred_t *credp) void kauth_cred_unref(kauth_cred_t *credp) { + boolean_t destroy_it; + KAUTH_CRED_HASH_LOCK(); - kauth_cred_unref_hashlocked(credp); + destroy_it = kauth_cred_unref_hashlocked(credp); KAUTH_CRED_HASH_UNLOCK(); + + if (destroy_it == TRUE) { + assert(*credp != NOCRED); +#if CONFIG_MACF + mac_cred_label_destroy(*credp); +#endif + AUDIT_SESSION_UNREF(*credp); + + (*credp)->cr_ref = 0; + FREE_ZONE(*credp, sizeof(*(*credp)), M_CRED); + *credp = NOCRED; + } } @@ -4763,17 +4997,31 @@ kauth_cred_update(kauth_cred_t old_cred, kauth_cred_t model_cred, return(old_cred); } if (found_cred != NULL) { + boolean_t destroy_it; + DEBUG_CRED_CHANGE("kauth_cred_update(cache hit): %p -> %p\n", old_cred, found_cred); /* * Found a match so we bump reference count on new * one and decrement reference count on the old one. */ kauth_cred_ref(found_cred); - kauth_cred_unref_hashlocked(&old_cred); + destroy_it = kauth_cred_unref_hashlocked(&old_cred); KAUTH_CRED_HASH_UNLOCK(); + if (destroy_it == TRUE) { + assert(old_cred != NOCRED); +#if CONFIG_MACF + mac_cred_label_destroy(old_cred); +#endif + AUDIT_SESSION_UNREF(old_cred); + + old_cred->cr_ref = 0; + FREE_ZONE(old_cred, sizeof(*old_cred), M_CRED); + old_cred = NOCRED; + + } return(found_cred); } - + /* * Must allocate a new credential using the model. also * adds the new credential to the credential hash table. @@ -4855,7 +5103,7 @@ kauth_cred_add(kauth_cred_t new_cred) * Parameters: cred Credential to remove from cred * hash cache * - * Returns: (void) + * Returns: TRUE if the cred was found & removed from the hash; FALSE if not. * * Locks: Caller is expected to hold KAUTH_CRED_HASH_LOCK * @@ -4864,7 +5112,7 @@ kauth_cred_add(kauth_cred_t new_cred) * following code occurs with the hash lock held; in theory, this * protects us from the 2->1 reference that gets us here. */ -static void +static boolean_t kauth_cred_remove(kauth_cred_t cred) { u_long hash_key; @@ -4877,30 +5125,23 @@ kauth_cred_remove(kauth_cred_t cred) if (cred->cr_ref < 1) panic("cred reference underflow"); if (cred->cr_ref > 1) - return; /* someone else got a ref */ + return (FALSE); /* someone else got a ref */ /* Find cred in the credential hash table */ TAILQ_FOREACH(found_cred, &kauth_cred_table_anchor[hash_key], cr_link) { if (found_cred == cred) { /* found a match, remove it from the hash table */ TAILQ_REMOVE(&kauth_cred_table_anchor[hash_key], found_cred, cr_link); -#if CONFIG_MACF - mac_cred_label_destroy(cred); -#endif - AUDIT_SESSION_UNREF(cred); - - cred->cr_ref = 0; - FREE_ZONE(cred, sizeof(*cred), M_CRED); #if KAUTH_CRED_HASH_DEBUG kauth_cred_count--; #endif - return; + return (TRUE); } } /* Did not find a match... this should not happen! XXX Make panic? */ printf("%s:%d - %s - %s - did not find a match for %p\n", __FILE__, __LINE__, __FUNCTION__, current_proc()->p_comm, cred); - return; + return (FALSE); } @@ -4952,12 +5193,13 @@ kauth_cred_find(kauth_cred_t cred) match = (bcmp(found_pcred, pcred, sizeof (*pcred)) == 0) ? TRUE : FALSE; match = match && ((bcmp(&found_cred->cr_audit, &cred->cr_audit, sizeof(cred->cr_audit)) == 0) ? TRUE : FALSE); +#if CONFIG_MACF if (((found_pcred->cr_flags & CRF_MAC_ENFORCE) != 0) || ((pcred->cr_flags & CRF_MAC_ENFORCE) != 0)) { match = match && mac_cred_label_compare(found_cred->cr_label, cred->cr_label); } - +#endif if (match) { /* found a match */ return(found_cred); @@ -5019,7 +5261,9 @@ kauth_cred_hash(const uint8_t *datap, int data_len, u_long start_key) static u_long kauth_cred_get_hashkey(kauth_cred_t cred) { +#if CONFIG_MACF posix_cred_t pcred = posix_cred_get(cred); +#endif u_long hash_key = 0; hash_key = kauth_cred_hash((uint8_t *)&cred->cr_posix, @@ -5028,12 +5272,13 @@ kauth_cred_get_hashkey(kauth_cred_t cred) hash_key = kauth_cred_hash((uint8_t *)&cred->cr_audit, sizeof(struct au_session), hash_key); - +#if CONFIG_MACF if (pcred->cr_flags & CRF_MAC_ENFORCE) { hash_key = kauth_cred_hash((uint8_t *)cred->cr_label, sizeof (struct label), hash_key); } +#endif return(hash_key); } diff --git a/bsd/kern/kern_cs.c b/bsd/kern/kern_cs.c new file mode 100644 index 000000000..041405044 --- /dev/null +++ b/bsd/kern/kern_cs.c @@ -0,0 +1,474 @@ +/* + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include + +#include + +#include + +unsigned long cs_procs_killed = 0; +unsigned long cs_procs_invalidated = 0; + +int cs_force_kill = 0; +int cs_force_hard = 0; +int cs_debug = 0; +#if SECURE_KERNEL +const int cs_enforcement_enable=1; +#else +#if CONFIG_ENFORCE_SIGNED_CODE +int cs_enforcement_enable=1; +#else +int cs_enforcement_enable=0; +#endif +int cs_enforcement_panic=0; +#endif +int cs_all_vnodes = 0; + +static lck_grp_t *cs_lockgrp; +static lck_rw_t * SigPUPLock; + +SYSCTL_INT(_vm, OID_AUTO, cs_force_kill, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_force_kill, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, cs_force_hard, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_force_hard, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, cs_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_debug, 0, ""); + +SYSCTL_INT(_vm, OID_AUTO, cs_all_vnodes, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_all_vnodes, 0, ""); + +#if !SECURE_KERNEL +SYSCTL_INT(_vm, OID_AUTO, cs_enforcement, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_enforcement_enable, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, cs_enforcement_panic, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_enforcement_panic, 0, ""); +#endif + +void +cs_init(void) +{ +#if !SECURE_KERNEL + int disable_cs_enforcement = 0; + PE_parse_boot_argn("cs_enforcement_disable", &disable_cs_enforcement, + sizeof (disable_cs_enforcement)); + if (disable_cs_enforcement) { + cs_enforcement_enable = 0; + } else { + int panic = 0; + PE_parse_boot_argn("cs_enforcement_panic", &panic, sizeof(panic)); + cs_enforcement_panic = (panic != 0); + } + + PE_parse_boot_argn("cs_debug", &cs_debug, sizeof (cs_debug)); +#endif + lck_grp_attr_t *attr = lck_grp_attr_alloc_init(); + cs_lockgrp = lck_grp_alloc_init("KERNCS", attr); + SigPUPLock = lck_rw_alloc_init(cs_lockgrp, NULL); +} + +int +cs_allow_invalid(struct proc *p) +{ +#if MACH_ASSERT + lck_mtx_assert(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED); +#endif +#if CONFIG_MACF && CONFIG_ENFORCE_SIGNED_CODE + /* There needs to be a MAC policy to implement this hook, or else the + * kill bits will be cleared here every time. If we have + * CONFIG_ENFORCE_SIGNED_CODE, we can assume there is a policy + * implementing the hook. + */ + if( 0 != mac_proc_check_run_cs_invalid(p)) { + if(cs_debug) printf("CODE SIGNING: cs_allow_invalid() " + "not allowed: pid %d\n", + p->p_pid); + return 0; + } + if(cs_debug) printf("CODE SIGNING: cs_allow_invalid() " + "allowed: pid %d\n", + p->p_pid); + proc_lock(p); + p->p_csflags &= ~(CS_KILL | CS_HARD); + proc_unlock(p); + vm_map_switch_protect(get_task_map(p->task), FALSE); +#endif + return (p->p_csflags & (CS_KILL | CS_HARD)) == 0; +} + +int +cs_invalid_page( + addr64_t vaddr) +{ + struct proc *p; + int send_kill = 0, retval = 0, verbose = cs_debug; + uint32_t csflags; + + p = current_proc(); + + /* + * XXX revisit locking when proc is no longer protected + * by the kernel funnel... + */ + + if (verbose) + printf("CODE SIGNING: cs_invalid_page(0x%llx): p=%d[%s]\n", + vaddr, p->p_pid, p->p_comm); + + proc_lock(p); + + /* XXX for testing */ + if (cs_force_kill) + p->p_csflags |= CS_KILL; + if (cs_force_hard) + p->p_csflags |= CS_HARD; + + /* CS_KILL triggers a kill signal, and no you can't have the page. Nothing else. */ + if (p->p_csflags & CS_KILL) { + p->p_csflags |= CS_KILLED; + cs_procs_killed++; + send_kill = 1; + retval = 1; + } + + /* CS_HARD means fail the mapping operation so the process stays valid. */ + if (p->p_csflags & CS_HARD) { + retval = 1; + } else { + if (p->p_csflags & CS_VALID) { + p->p_csflags &= ~CS_VALID; + cs_procs_invalidated++; + verbose = 1; + } + } + csflags = p->p_csflags; + proc_unlock(p); + + if (verbose) { + char pid_str[10]; + snprintf(pid_str, sizeof(pid_str), "%d", p->p_pid); + kern_asl_msg(LOG_NOTICE, "messagetracer", + 5, + "com.apple.message.domain", "com.apple.kernel.cs.invalidate", + "com.apple.message.signature", send_kill ? "kill" : retval ? "deny" : "invalidate", + "com.apple.message.signature4", pid_str, + "com.apple.message.signature3", p->p_comm, + "com.apple.message.summarize", "YES", + NULL + ); + printf("CODE SIGNING: cs_invalid_page(0x%llx): " + "p=%d[%s] final status 0x%x, %sing page%s\n", + vaddr, p->p_pid, p->p_comm, p->p_csflags, + retval ? "deny" : "allow (remove VALID)", + send_kill ? " sending SIGKILL" : ""); + } + + if (send_kill) + psignal(p, SIGKILL); + + + return retval; +} + +/* + * Assumes p (if passed in) is locked with proc_lock(). + */ + +int +cs_enforcement(struct proc *p) +{ + + if (cs_enforcement_enable) + return 1; + + if (p == NULL) + p = current_proc(); + + if (p != NULL && (p->p_csflags & CS_ENFORCEMENT)) + return 1; + + return 0; +} + +static struct { + struct cscsr_functions *funcs; + vm_map_offset_t csr_map_base; + vm_map_size_t csr_map_size; + int inuse; + int disabled; +} csr_state; + +SYSCTL_INT(_vm, OID_AUTO, sigpup_disable, CTLFLAG_RW | CTLFLAG_LOCKED, &csr_state.disabled, 0, ""); + +static int +vnsize(vfs_context_t vfs, vnode_t vp, uint64_t *size) +{ + struct vnode_attr va; + int error; + + VATTR_INIT(&va); + VATTR_WANTED(&va, va_data_size); + + error = vnode_getattr(vp, &va, vfs); + if (error) + return error; + *size = va.va_data_size; + return 0; +} + +int +sigpup_install(user_addr_t argsp) +{ + struct sigpup_install_table args; + memory_object_control_t control; + kern_return_t result; + vfs_context_t vfs = NULL; + struct vnode_attr va; + vnode_t vp = NULL; + char *buf = NULL; + uint64_t size; + size_t len = 0; + int error = 0; + + if (!cs_enforcement_enable || csr_state.funcs == NULL) + return ENOTSUP; + + lck_rw_lock_exclusive(SigPUPLock); + + if (kauth_cred_issuser(kauth_cred_get()) == 0) { + error = EPERM; + goto cleanup; + } + + if (cs_debug > 10) + printf("sigpup install\n"); + + if (csr_state.csr_map_base != 0 || csr_state.inuse) { + error = EPERM; + goto cleanup; + } + + if (USER_ADDR_NULL == argsp) { + error = EINVAL; + goto cleanup; + } + if ((error = copyin(argsp, &args, sizeof(args))) != 0) + goto cleanup; + + if (cs_debug > 10) + printf("sigpup install with args\n"); + + MALLOC(buf, char *, MAXPATHLEN, M_TEMP, M_WAITOK); + if (buf == NULL) { + error = ENOMEM; + goto cleanup; + } + if ((error = copyinstr((user_addr_t)args.path, buf, MAXPATHLEN, &len)) != 0) + goto cleanup; + + if ((vfs = vfs_context_create(NULL)) == NULL) { + error = ENOMEM; + goto cleanup; + } + + if ((error = vnode_lookup(buf, VNODE_LOOKUP_NOFOLLOW, &vp, vfs)) != 0) + goto cleanup; + + if (cs_debug > 10) + printf("sigpup found file: %s\n", buf); + + /* make sure vnode is on the process's root volume */ + if (rootvnode->v_mount != vp->v_mount) { + if (cs_debug) printf("sigpup csr no on root volume\n"); + error = EPERM; + goto cleanup; + } + + /* make sure vnode is owned by "root" */ + VATTR_INIT(&va); + VATTR_WANTED(&va, va_uid); + error = vnode_getattr(vp, &va, vfs); + if (error) + goto cleanup; + + if (va.va_uid != 0) { + if (cs_debug) printf("sigpup: csr file not owned by root\n"); + error = EPERM; + goto cleanup; + } + + error = vnsize(vfs, vp, &size); + if (error) + goto cleanup; + + control = ubc_getobject(vp, 0); + if (control == MEMORY_OBJECT_CONTROL_NULL) { + error = EINVAL; + goto cleanup; + } + + csr_state.csr_map_size = mach_vm_round_page(size); + + if (cs_debug > 10) + printf("mmap!\n"); + + result = vm_map_enter_mem_object_control(kernel_map, + &csr_state.csr_map_base, + csr_state.csr_map_size, + 0, VM_FLAGS_ANYWHERE, + control, 0 /* file offset */, + 0 /* cow */, + VM_PROT_READ, + VM_PROT_READ, + VM_INHERIT_DEFAULT); + if (result != KERN_SUCCESS) { + error = EINVAL; + goto cleanup; + } + + error = csr_state.funcs->csr_validate_header((const uint8_t *)csr_state.csr_map_base, + csr_state.csr_map_size); + if (error) { + if (cs_debug > 10) + printf("sigpup header invalid, dropping mapping"); + sigpup_drop(); + goto cleanup; + } + + if (cs_debug > 10) + printf("table loaded %ld bytes\n", (long)csr_state.csr_map_size); + +cleanup: + lck_rw_unlock_exclusive(SigPUPLock); + + if (buf) + FREE(buf, M_TEMP); + if (vp) + (void)vnode_put(vp); + if (vfs) + (void)vfs_context_rele(vfs); + + if (error) + printf("sigpup: load failed with error: %d\n", error); + + + return error; +} + +int +sigpup_drop(void) +{ + + if (kauth_cred_issuser(kauth_cred_get()) == 0) + return EPERM; + + lck_rw_lock_exclusive(SigPUPLock); + + if (csr_state.csr_map_base == 0 || csr_state.inuse) { + printf("failed to unload the sigpup database\n"); + lck_rw_unlock_exclusive(SigPUPLock); + return EINVAL; + } + + if (cs_debug > 10) + printf("sigpup: unloading\n"); + + (void)mach_vm_deallocate(kernel_map, + csr_state.csr_map_base, csr_state.csr_map_size); + + csr_state.csr_map_base = 0; + csr_state.csr_map_size = 0; + + lck_rw_unlock_exclusive(SigPUPLock); + + return 0; +} + +void sigpup_attach_vnode(vnode_t); /* XXX */ + +void +sigpup_attach_vnode(vnode_t vp) +{ + const void *csblob; + size_t cslen; + + if (!cs_enforcement_enable || csr_state.funcs == NULL || csr_state.csr_map_base == 0 || csr_state.disabled) + return; + + /* if the file is not on the root volumes or already been check, skip */ + if (vp->v_mount != rootvnode->v_mount || (vp->v_flag & VNOCS)) + return; + + csblob = csr_state.funcs->csr_find_file_codedirectory(vp, (const uint8_t *)csr_state.csr_map_base, + (size_t)csr_state.csr_map_size, &cslen); + if (csblob) { + ubc_cs_sigpup_add(vp, (vm_address_t)csblob, (vm_size_t)cslen); + csr_state.inuse = 1; + } + vp->v_flag |= VNOCS; +} + +void +cs_register_cscsr(struct cscsr_functions *funcs) +{ + if (csr_state.funcs || funcs->csr_version < CSCSR_VERSION) + return; + csr_state.funcs = funcs; +} diff --git a/bsd/kern/kern_descrip.c b/bsd/kern/kern_descrip.c index 04af17fe7..7f53765a2 100644 --- a/bsd/kern/kern_descrip.c +++ b/bsd/kern/kern_descrip.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -80,6 +80,7 @@ #include #include #include +#include #include #include #include @@ -94,6 +95,7 @@ #include #include #include +#include #include @@ -136,13 +138,8 @@ void fileport_releasefg(struct fileglob *fg); /* flags for close_internal_locked */ #define FD_DUP2RESV 1 -static int close_internal_locked(struct proc *p, int fd, struct fileproc *fp, int flags); - -static int closef_finish(struct fileproc *fp, struct fileglob *fg, proc_t p, vfs_context_t ctx); /* We don't want these exported */ -__private_extern__ -int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, int32_t *); __private_extern__ int unlink1(vfs_context_t, struct nameidata *, int); @@ -150,26 +147,25 @@ int unlink1(vfs_context_t, struct nameidata *, int); static void _fdrelse(struct proc * p, int fd); -extern void file_lock_init(void) __attribute__((section("__TEXT, initcode"))); +extern void file_lock_init(void); extern int kqueue_stat(struct fileproc *fp, void *ub, int isstat4, proc_t p); -#if SOCKETS -extern int soo_stat(struct socket *so, void *ub, int isstat64); -#endif /* SOCKETS */ extern kauth_scope_t kauth_scope_fileop; -extern int cs_debug; - /* Conflict wait queue for when selects collide (opaque type) */ extern struct wait_queue select_conflict_queue; #define f_flag f_fglob->fg_flag -#define f_type f_fglob->fg_type +#define f_type f_fglob->fg_ops->fo_type #define f_msgcount f_fglob->fg_msgcount #define f_cred f_fglob->fg_cred #define f_ops f_fglob->fg_ops #define f_offset f_fglob->fg_offset #define f_data f_fglob->fg_data +#define CHECK_ADD_OVERFLOW_INT64L(x, y) \ + (((((x) > 0) && ((y) > 0) && ((x) > LLONG_MAX - (y))) || \ + (((x) < 0) && ((y) < 0) && ((x) < LLONG_MIN - (y)))) \ + ? 1 : 0) /* * Descriptor management. */ @@ -185,6 +181,64 @@ lck_attr_t * file_lck_attr; lck_mtx_t * uipc_lock; +/* + * check_file_seek_range + * + * Description: Checks if seek offsets are in the range of 0 to LLONG_MAX. + * + * Parameters: fl Flock structure. + * cur_file_offset Current offset in the file. + * + * Returns: 0 on Success. + * EOVERFLOW on overflow. + * EINVAL on offset less than zero. + */ + +static int +check_file_seek_range(struct flock *fl, off_t cur_file_offset) +{ + if (fl->l_whence == SEEK_CUR) { + /* Check if the start marker is beyond LLONG_MAX. */ + if (CHECK_ADD_OVERFLOW_INT64L(fl->l_start, cur_file_offset)) { + /* Check if start marker is negative */ + if (fl->l_start < 0) { + return EINVAL; + } + return EOVERFLOW; + } + /* Check if the start marker is negative. */ + if (fl->l_start + cur_file_offset < 0) { + return EINVAL; + } + /* Check if end marker is beyond LLONG_MAX. */ + if ((fl->l_len > 0) && (CHECK_ADD_OVERFLOW_INT64L(fl->l_start + + cur_file_offset, fl->l_len - 1))) { + return EOVERFLOW; + } + /* Check if the end marker is negative. */ + if ((fl->l_len <= 0) && (fl->l_start + cur_file_offset + + fl->l_len < 0)) { + return EINVAL; + } + } else if (fl->l_whence == SEEK_SET) { + /* Check if the start marker is negative. */ + if (fl->l_start < 0) { + return EINVAL; + } + /* Check if the end marker is beyond LLONG_MAX. */ + if ((fl->l_len > 0) && + CHECK_ADD_OVERFLOW_INT64L(fl->l_start, fl->l_len - 1)) { + return EOVERFLOW; + } + /* Check if the end marker is negative. */ + if ((fl->l_len < 0) && fl->l_start + fl->l_len < 0) { + return EINVAL; + } + } + return 0; +} + + /* * file_lock_init * @@ -473,6 +527,12 @@ dup(proc_t p, struct dup_args *uap, int32_t *retval) proc_fdunlock(p); return(error); } + if (FP_ISGUARDED(fp, GUARD_DUP)) { + error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP); + (void) fp_drop(p, old, fp, 1); + proc_fdunlock(p); + return (error); + } if ( (error = fdalloc(p, 0, &new)) ) { fp_drop(p, old, fp, 1); proc_fdunlock(p); @@ -516,6 +576,12 @@ startover: proc_fdunlock(p); return(error); } + if (FP_ISGUARDED(fp, GUARD_DUP)) { + error = fp_guard_exception(p, old, fp, kGUARD_EXC_DUP); + (void) fp_drop(p, old, fp, 1); + proc_fdunlock(p); + return (error); + } if (new < 0 || (rlim_t)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || new >= maxfiles) { @@ -553,6 +619,13 @@ closeit: if ((fdp->fd_ofiles[new] != NULL) && ((error = fp_lookup(p, new, &nfp, 1)) == 0)) { fp_drop(p, old, fp, 1); + if (FP_ISGUARDED(nfp, GUARD_CLOSE)) { + error = fp_guard_exception(p, + new, nfp, kGUARD_EXC_CLOSE); + (void) fp_drop(p, new, nfp, 1); + proc_fdunlock(p); + return (error); + } (void)close_internal_locked(p, new, nfp, FD_DUP2RESV); #if DIAGNOSTIC proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED); @@ -642,6 +715,7 @@ fcntl(proc_t p, struct fcntl_args *uap, int32_t *retval) * copyin:EFAULT * vnode_getwithref:??? * VNOP_ADVLOCK:??? + * msleep:ETIMEDOUT * [F_GETLK] * EBADF * EOVERFLOW @@ -678,6 +752,8 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) struct vnode *vp = NULLVP; /* for AUDIT_ARG() at end */ int i, tmp, error, error2, flg = F_POSIX; struct flock fl; + struct flocktimeout fltimeout; + struct timespec *timeout = NULL; struct vfs_context context; off_t offset; int newmin; @@ -727,6 +803,10 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) case F_DUPFD: case F_DUPFD_CLOEXEC: + if (FP_ISGUARDED(fp, GUARD_DUP)) { + error = fp_guard_exception(p, fd, fp, kGUARD_EXC_DUP); + goto out; + } newmin = CAST_DOWN_EXPLICIT(int, uap->arg); /* arg is an int, so we won't lose bits */ AUDIT_ARG(value32, newmin); if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || @@ -741,14 +821,22 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) goto out; case F_GETFD: - *retval = (*pop & UF_EXCLOSE)? 1 : 0; + *retval = (*pop & UF_EXCLOSE)? FD_CLOEXEC : 0; error = 0; goto out; case F_SETFD: AUDIT_ARG(value32, uap->arg); - *pop = (*pop &~ UF_EXCLOSE) | - (uap->arg & 1)? UF_EXCLOSE : 0; + if (uap->arg & FD_CLOEXEC) + *pop |= UF_EXCLOSE; + else { + if (FILEPROC_TYPE(fp) == FTYPE_GUARDED) { + error = fp_guard_exception(p, + fd, fp, kGUARD_EXC_NOCLOEXEC); + goto out; + } + *pop &= ~UF_EXCLOSE; + } error = 0; goto out; @@ -815,8 +903,12 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) case F_SETNOSIGPIPE: tmp = CAST_DOWN_EXPLICIT(int, uap->arg); if (fp->f_type == DTYPE_SOCKET) { +#if SOCKETS error = sock_setsockopt((struct socket *)fp->f_data, SOL_SOCKET, SO_NOSIGPIPE, &tmp, sizeof (tmp)); +#else + error = EINVAL; +#endif } else { struct fileglob *fg = fp->f_fglob; @@ -832,9 +924,13 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) case F_GETNOSIGPIPE: if (fp->f_type == DTYPE_SOCKET) { +#if SOCKETS int retsize = sizeof (*retval); error = sock_getsockopt((struct socket *)fp->f_data, SOL_SOCKET, SO_NOSIGPIPE, retval, &retsize); +#else + error = EINVAL; +#endif } else { *retval = (fp->f_fglob->fg_lflags & FG_NOSIGPIPE) ? 1 : 0; @@ -842,6 +938,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) } goto out; + case F_SETLKWTIMEOUT: case F_SETLKW: flg |= F_WAIT; /* Fall into F_SETLK */ @@ -858,16 +955,25 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) proc_fdunlock(p); /* Copy in the lock structure */ - error = copyin(argp, (caddr_t)&fl, sizeof(fl)); - if (error) { - goto outdrop; + if (uap->cmd == F_SETLKWTIMEOUT) { + error = copyin(argp, (caddr_t) &fltimeout, sizeof(fltimeout)); + if (error) { + goto outdrop; + } + fl = fltimeout.fl; + timeout = &fltimeout.timeout; + } else { + error = copyin(argp, (caddr_t)&fl, sizeof(fl)); + if (error) { + goto outdrop; + } } - volatile off_t affected_lock_area_set = 0; - affected_lock_area_set = fl.l_start + offset; - if ((fl.l_whence == SEEK_CUR) && (affected_lock_area_set < fl.l_start)) { - error = EOVERFLOW; - goto outdrop; + /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */ + /* and ending byte for EOVERFLOW in SEEK_SET */ + error = check_file_seek_range(&fl, offset); + if (error) { + goto outdrop; } if ( (error = vnode_getwithref(vp)) ) { @@ -894,7 +1000,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) } // XXX UInt32 unsafe for LP64 kernel OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag); - error = VNOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg, &context); + error = VNOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg, &context, timeout); (void)vnode_put(vp); goto outdrop; @@ -906,13 +1012,13 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) } // XXX UInt32 unsafe for LP64 kernel OSBitOrAtomic(P_LADVLOCK, &p->p_ladvflag); - error = VNOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg, &context); + error = VNOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg, &context, timeout); (void)vnode_put(vp); goto outdrop; case F_UNLCK: error = VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl, - F_POSIX, &context); + F_POSIX, &context, timeout); (void)vnode_put(vp); goto outdrop; @@ -923,9 +1029,6 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) } case F_GETLK: -#if CONFIG_EMBEDDED - case F_GETLKPID: -#endif if (fp->f_type != DTYPE_VNODE) { error = EBADF; goto out; @@ -940,17 +1043,10 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) if (error) goto outdrop; - volatile off_t affected_lock_area_end = 0; - affected_lock_area_end = fl.l_start + offset; /* Check starting byte and ending byte for EOVERFLOW in SEEK_CUR */ /* and ending byte for EOVERFLOW in SEEK_SET */ - if (((fl.l_whence == SEEK_CUR) && - ((affected_lock_area_end < fl.l_start) || - ((fl.l_len > 0) && (affected_lock_area_end + fl.l_len - 1 < affected_lock_area_end)))) || - ((fl.l_whence == SEEK_SET) && (fl.l_len > 0) && (fl.l_start + fl.l_len - 1 < fl.l_start))) - { - /* lf_advlock doesn't check start/end for F_GETLK if file has no locks */ - error = EOVERFLOW; + error = check_file_seek_range(&fl, offset); + if (error) { goto outdrop; } @@ -988,7 +1084,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) uap->cmd, &fl); if (error == 0) #endif - error = VNOP_ADVLOCK(vp, (caddr_t)p, uap->cmd, &fl, F_POSIX, &context); + error = VNOP_ADVLOCK(vp, (caddr_t)p, uap->cmd, &fl, F_POSIX, &context, NULL); (void)vnode_put(vp); @@ -1113,7 +1209,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) * without zero filling the data is a security hole * root would have access anyway so we'll allow it */ - if (!is_suser()) { + if (!kauth_cred_issuser(kauth_cred_get())) { error = EACCES; } else { /* @@ -1290,7 +1386,23 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) } devBlockSize = vfs_devblocksize(vnode_mount(vp)); if (uap->cmd == F_LOG2PHYS_EXT) { +#if defined(__LP64__) a_size = l2p_struct.l2p_contigbytes; +#else + if ((l2p_struct.l2p_contigbytes > SIZE_MAX) || (l2p_struct.l2p_contigbytes < 0)) { + /* size_t is 32-bit on a 32-bit kernel, therefore + * assigning l2p_contigbytes to a_size may have + * caused integer overflow. We, therefore, return + * an error here instead of calculating incorrect + * value. + */ + printf ("fcntl: F_LOG2PHYS_EXT: l2p_contigbytes=%lld will overflow, returning error\n", l2p_struct.l2p_contigbytes); + error = EFBIG; + goto outdrop; + } else { + a_size = l2p_struct.l2p_contigbytes; + } +#endif } else { a_size = devBlockSize; } @@ -1306,8 +1418,19 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) } else { l2p_struct.l2p_contigbytes = 0; /* for now */ } - l2p_struct.l2p_devoffset = bn * devBlockSize; - l2p_struct.l2p_devoffset += file_offset - offset; + + /* + * The block number being -1 suggests that the file offset is not backed + * by any real blocks on-disk. As a result, just let it be passed back up wholesale. + */ + if (bn == -1) { + /* Don't multiply it by the block size */ + l2p_struct.l2p_devoffset = bn; + } + else { + l2p_struct.l2p_devoffset = bn * devBlockSize; + l2p_struct.l2p_devoffset += file_offset - offset; + } error = copyout((caddr_t)&l2p_struct, argp, sizeof(l2p_struct)); } goto outdrop; @@ -1442,7 +1565,8 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) fopen.o_pathname, &context); nd.ni_dvp = vp; - error = open1(&context, &nd, fopen.o_flags, &va, retval); + error = open1(&context, &nd, fopen.o_flags, &va, + fileproc_alloc_init, NULL, retval); vnode_put(vp); break; @@ -1499,6 +1623,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) { struct user_fsignatures fs; kern_return_t kr; + off_t kernel_blob_offset; vm_offset_t kernel_blob_addr; vm_size_t kernel_blob_size; @@ -1530,15 +1655,14 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) if(ubc_cs_blob_get(vp, CPU_TYPE_ANY, fs.fs_file_start)) { - /* - if(cs_debug) - printf("CODE SIGNING: resident blob offered for: %s\n", vp->v_name); - */ vnode_put(vp); goto outdrop; } - -#define CS_MAX_BLOB_SIZE (1280ULL * 1024) /* max shared cache file XXX ? */ +#if defined(__LP64__) +#define CS_MAX_BLOB_SIZE (2560ULL * 1024ULL) /* max shared cache file XXX ? */ +#else +#define CS_MAX_BLOB_SIZE (1600ULL * 1024ULL) /* max shared cache file XXX ? */ +#endif if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) { error = E2BIG; vnode_put(vp); @@ -1554,10 +1678,12 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) } if(uap->cmd == F_ADDSIGS) { + kernel_blob_offset = 0; error = copyin(fs.fs_blob_start, (void *) kernel_blob_addr, kernel_blob_size); } else /* F_ADDFILESIGS */ { + kernel_blob_offset = fs.fs_blob_start; error = vn_rdwr(UIO_READ, vp, (caddr_t) kernel_blob_addr, @@ -1582,6 +1708,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) CPU_TYPE_ANY, /* not for a specific architecture */ fs.fs_file_start, kernel_blob_addr, + kernel_blob_offset, kernel_blob_size); if (error) { ubc_cs_blob_deallocate(kernel_blob_addr, @@ -1596,63 +1723,40 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) (void) vnode_put(vp); break; } + case F_FINDSIGS: { +#ifdef SECURE_KERNEL + error = ENOTSUP; +#else /* !SECURE_KERNEL */ + off_t offsetMacho; - case F_MARKDEPENDENCY: { - struct vnode_attr va; - vfs_context_t ctx = vfs_context_current(); - kauth_cred_t cred; - - if ((current_proc()->p_flag & P_DEPENDENCY_CAPABLE) == 0) { - error = EPERM; - goto out; - } - if (fp->f_type != DTYPE_VNODE) { error = EBADF; goto out; } - vp = (struct vnode *)fp->f_data; proc_fdunlock(p); - - if (vnode_getwithref(vp)) { - error = ENOENT; + error = vnode_getwithref(vp); + if (error) goto outdrop; - } - - if (!vnode_isvroot(vp)) { - error = EINVAL; - vnode_put(vp); - goto outdrop; - } - // get the owner of the root dir - VATTR_INIT(&va); - VATTR_WANTED(&va, va_uid); - if (vnode_getattr(vp, &va, ctx) != 0) { - error = EINVAL; - vnode_put(vp); - goto outdrop; + error = copyin(argp, &offsetMacho, sizeof(offsetMacho)); + if (error) { + (void)vnode_put(vp); + goto outdrop; } - // and last, check that the caller is the super user or - // the owner of the mount point - cred = vfs_context_ucred(ctx); - if (!is_suser() && va.va_uid != kauth_cred_getuid(cred)) { - error = EACCES; - vnode_put(vp); +#if CONFIG_MACF + error = mac_vnode_find_sigs(p, vp, offsetMacho); +#else + error = EPERM; +#endif + if (error) { + (void)vnode_put(vp); goto outdrop; } - - // if all those checks pass then we can mark the dependency - vfs_markdependency(vp->v_mount); - error = 0; - - vnode_put(vp); - +#endif /* SECURE_KERNEL */ break; } - #if CONFIG_PROTECT case F_GETPROTECTIONCLASS: { int class = 0; @@ -1755,6 +1859,35 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) vnode_put (vp); break; } + + case F_GETDEFAULTPROTLEVEL: { + uint32_t cp_default = 0; + + if (fp->f_type != DTYPE_VNODE) { + error = EBADF; + goto out; + } + + vp = (struct vnode*) fp->f_data; + proc_fdunlock (p); + + if (vnode_getwithref(vp)) { + error = ENOENT; + goto outdrop; + } + + /* + * if cp_get_major_vers fails, error will be set to proper errno + * and cp_version will still be 0. + */ + + error = cp_get_default_level(vp, &cp_default); + *retval = cp_default; + + vnode_put (vp); + break; + } + #endif /* CONFIG_PROTECT */ @@ -1884,8 +2017,58 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) fp_drop(p, fd2, fp2, 0); break; } - + + /* + * SPI for making a file compressed. + */ + case F_MAKECOMPRESSED: { + uint32_t gcounter = CAST_DOWN_EXPLICIT(uint32_t, uap->arg); + + if (fp->f_type != DTYPE_VNODE) { + error = EBADF; + goto out; + } + + vp = (struct vnode*) fp->f_data; + proc_fdunlock (p); + + /* get the vnode */ + if (vnode_getwithref(vp)) { + error = ENOENT; + goto outdrop; + } + + /* Is it a file? */ + if ((vnode_isreg(vp) == 0) && (vnode_islnk(vp) == 0)) { + vnode_put(vp); + error = EBADF; + goto outdrop; + } + + /* invoke ioctl to pass off to FS */ + /* Only go forward if you have write access */ + vfs_context_t ctx = vfs_context_current(); + if(vnode_authorize(vp, NULLVP, (KAUTH_VNODE_ACCESS | KAUTH_VNODE_WRITE_DATA), ctx) != 0) { + vnode_put(vp); + error = EBADF; + goto outdrop; + } + + error = VNOP_IOCTL(vp, uap->cmd, (caddr_t)&gcounter, 0, &context); + + vnode_put (vp); + break; + } + /* + * SPI (private) for indicating to a filesystem that subsequent writes to + * the open FD will written to the Fastflow. + */ + case F_SET_GREEDY_MODE: + /* intentionally drop through to the same handler as F_SETSTATIC. + * both fcntls should pass the argument and their selector into VNOP_IOCTL. + */ + /* * SPI (private) for indicating to a filesystem that subsequent writes to * the open FD will represent static content. @@ -2028,10 +2211,19 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) * effectively overload fcntl() to send ioctl()'s. */ if((uap->cmd & IOC_VOID) && (uap->cmd & IOC_INOUT)){ - error = EINVAL; + error = EINVAL; goto out; } + /* Catch any now-invalid fcntl() selectors */ + switch (uap->cmd) { + case F_MARKDEPENDENCY: + error = EINVAL; + goto out; + default: + break; + } + if (fp->f_type != DTYPE_VNODE) { error = EBADF; goto out; @@ -2083,6 +2275,11 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) kfree(memp, size); goto outdrop; } + + /* Bzero the section beyond that which was needed */ + if (size <= sizeof(stkbuf)) { + bzero ( (((uint8_t*)data) + size), (sizeof(stkbuf) - size)); + } } else { /* int */ if (is64bit) { @@ -2185,8 +2382,7 @@ finishdup(proc_t p, proc_fdunlock(p); - MALLOC_ZONE(nfp, struct fileproc *, sizeof(struct fileproc), M_FILEPROC, M_WAITOK); - /* Failure check follows proc_fdlock() due to handling requirements */ + nfp = fileproc_alloc_init(NULL); proc_fdlock(p); @@ -2196,11 +2392,7 @@ finishdup(proc_t p, return (ENOMEM); } - bzero(nfp, sizeof(struct fileproc)); - - nfp->f_flags = 0; nfp->f_fglob = ofp->f_fglob; - nfp->f_iocount = 0; #if DIAGNOSTIC if (fdp->fd_ofiles[new] != 0) @@ -2230,6 +2422,7 @@ finishdup(proc_t p, * * Returns: 0 Success * fp_lookup:EBADF Bad file descriptor + * fp_guard_exception:??? Guarded file descriptor * close_internal:EBADF * close_internal:??? Anything returnable by a per-fileops * close function @@ -2247,7 +2440,7 @@ close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retv { struct fileproc *fp; int fd = uap->fd; - int error =0; + int error; AUDIT_SYSCLOSE(p, fd); @@ -2258,11 +2451,18 @@ close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retv return(error); } + if (FP_ISGUARDED(fp, GUARD_CLOSE)) { + error = fp_guard_exception(p, fd, fp, kGUARD_EXC_CLOSE); + (void) fp_drop(p, fd, fp, 1); + proc_fdunlock(p); + return (error); + } + error = close_internal_locked(p, fd, fp, 0); proc_fdunlock(p); - return(error); + return (error); } @@ -2286,10 +2486,9 @@ close_nocancel(proc_t p, struct close_nocancel_args *uap, __unused int32_t *retv * * Notes: This function may drop and reacquire this lock; it is unsafe * for a caller to assume that other state protected by the lock - * has not been subsequently changes out from under it, if the - * caller made the call with the lock held. + * has not been subsequently changed out from under it. */ -static int +int close_internal_locked(proc_t p, int fd, struct fileproc *fp, int flags) { struct filedesc *fdp = p->p_fd; @@ -2350,8 +2549,7 @@ close_internal_locked(proc_t p, int fd, struct fileproc *fp, int flags) if (fp->f_flags & FP_WAITEVENT) (void)waitevent_close(p, fp); - if ((fp->f_flags & FP_INCHRREAD) == 0) - fileproc_drain(p, fp); + fileproc_drain(p, fp); if (resvfd == 0) { _fdrelse(p, fd); @@ -2366,7 +2564,7 @@ close_internal_locked(proc_t p, int fd, struct fileproc *fp, int flags) proc_fdunlock(p); - FREE_ZONE(fp, sizeof(*fp), M_FILEPROC); + fileproc_free(fp); proc_fdlock(p); @@ -2714,10 +2912,7 @@ fpathconf(proc_t p, struct fpathconf_args *uap, int32_t *retval) } goto out; - case DTYPE_PSXSHM: - case DTYPE_PSXSEM: - case DTYPE_KQUEUE: - case DTYPE_FSEVENTS: + default: error = EINVAL; goto out; @@ -3337,68 +3532,6 @@ fp_getfpipe(proc_t p, int fd, struct fileproc **resultfp, return (0); } -#if NETAT -#define DTYPE_ATALK -1 /* XXX This does not belong here */ - - -/* - * fp_getfatalk - * - * Description: Get fileproc and atalk pointer for a given fd from the - * per process open file table of the specified process - * and if successful, increment the f_iocount - * - * Parameters: p Process in which fd lives - * fd fd to get information for - * resultfp Pointer to result fileproc - * pointer area, or 0 if none - * resultatalk Pointer to result atalk - * pointer area, or 0 if none - * Returns: EBADF The file descriptor is invalid - * EBADF The file descriptor is not a socket - * 0 Success - * - * Implicit returns: - * *resultfp (modified) Fileproc pointer - * *resultatalk (modified) atalk pointer - * - * Notes: The second EBADF should probably be something else to make - * the error condition distinct. - * - * XXX This code is specific to AppleTalk protocol support, and - * XXX should be conditionally compiled - */ -int -fp_getfatalk(proc_t p, int fd, struct fileproc **resultfp, - struct atalk **resultatalk) -{ - struct filedesc *fdp = p->p_fd; - struct fileproc *fp; - - proc_fdlock_spin(p); - if (fd < 0 || fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL || - (fdp->fd_ofileflags[fd] & UF_RESERVED)) { - proc_fdunlock(p); - return (EBADF); - } - if (fp->f_type != (DTYPE_ATALK+1)) { - proc_fdunlock(p); - return(EBADF); - } - fp->f_iocount++; - - if (resultfp) - *resultfp = fp; - if (resultatalk) - *resultatalk = (struct atalk *)fp->f_data; - proc_fdunlock(p); - - return (0); -} - -#endif /* NETAT */ - /* * fp_lookup * @@ -3449,6 +3582,72 @@ fp_lookup(proc_t p, int fd, struct fileproc **resultfp, int locked) } +/* + * fp_tryswap + * + * Description: Swap the fileproc pointer for a given fd with a new + * fileproc pointer in the per-process open file table of + * the specified process. The fdlock must be held at entry. + * + * Parameters: p Process containing the fd + * fd The fd of interest + * nfp Pointer to the newfp + * + * Returns: 0 Success + * EBADF Bad file descriptor + * EINTR Interrupted + * EKEEPLOOKING f_iocount changed while lock was dropped. + */ +int +fp_tryswap(proc_t p, int fd, struct fileproc *nfp) +{ + struct fileproc *fp; + int error; + + proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED); + + if (0 != (error = fp_lookup(p, fd, &fp, 1))) + return (error); + /* + * At this point, our caller (change_guardedfd_np) has + * one f_iocount reference, and we just took another + * one to begin the replacement. + */ + if (fp->f_iocount < 2) { + panic("f_iocount too small %d", fp->f_iocount); + } else if (2 == fp->f_iocount) { + + /* Copy the contents of *fp, preserving the "type" of *nfp */ + + nfp->f_flags = (nfp->f_flags & FP_TYPEMASK) | + (fp->f_flags & ~FP_TYPEMASK); + nfp->f_iocount = fp->f_iocount; + nfp->f_fglob = fp->f_fglob; + nfp->f_waddr = fp->f_waddr; + + p->p_fd->fd_ofiles[fd] = nfp; + (void) fp_drop(p, fd, nfp, 1); + } else { + /* + * Wait for all other active references to evaporate. + */ + p->p_fpdrainwait = 1; + error = msleep(&p->p_fpdrainwait, &p->p_fdmlock, + PRIBIO | PCATCH, "tryswap fpdrain", NULL); + if (0 == error) { + /* + * Return an "internal" errno to trigger a full + * reevaluation of the change-guard attempt. + */ + error = EKEEPLOOKING; + printf("%s: lookup collision fd %d\n", __func__, fd); + } + (void) fp_drop(p, fd, fp, 1); + } + return (error); +} + + /* * fp_drop_written * @@ -3879,6 +4078,9 @@ file_drop(int fd) } +static int falloc_withalloc_locked(proc_t, struct fileproc **, int *, + vfs_context_t, struct fileproc * (*)(void *), void *, int); + /* * falloc * @@ -3912,16 +4114,32 @@ file_drop(int fd) */ int falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx) +{ + return (falloc_withalloc(p, resultfp, resultfd, ctx, + fileproc_alloc_init, NULL)); +} + +/* + * Like falloc, but including the fileproc allocator and create-args + */ +int +falloc_withalloc(proc_t p, struct fileproc **resultfp, int *resultfd, + vfs_context_t ctx, fp_allocfn_t fp_zalloc, void *arg) { int error; proc_fdlock(p); - error = falloc_locked(p, resultfp, resultfd, ctx, 1); + error = falloc_withalloc_locked(p, + resultfp, resultfd, ctx, fp_zalloc, arg, 1); proc_fdunlock(p); - return(error); + return (error); } +/* + * "uninitialized" ops -- ensure fg->fg_ops->fo_type always exists + */ +static const struct fileops uninitops; /* * falloc_locked @@ -3968,6 +4186,15 @@ falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx) int falloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx, int locked) +{ + return (falloc_withalloc_locked(p, resultfp, resultfd, ctx, + fileproc_alloc_init, NULL, locked)); +} + +static int +falloc_withalloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd, + vfs_context_t ctx, fp_allocfn_t fp_zalloc, void *crarg, + int locked) { struct fileproc *fp; struct fileglob *fg; @@ -4003,7 +4230,7 @@ falloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd, */ proc_fdunlock(p); - MALLOC_ZONE(fp, struct fileproc *, sizeof(struct fileproc), M_FILEPROC, M_WAITOK); + fp = (*fp_zalloc)(crarg); if (fp == NULL) { if (locked) proc_fdlock(p); @@ -4011,17 +4238,17 @@ falloc_locked(proc_t p, struct fileproc **resultfp, int *resultfd, } MALLOC_ZONE(fg, struct fileglob *, sizeof(struct fileglob), M_FILEGLOB, M_WAITOK); if (fg == NULL) { - FREE_ZONE(fp, sizeof(*fp), M_FILEPROC); + fileproc_free(fp); if (locked) proc_fdlock(p); return (ENOMEM); } - bzero(fp, sizeof(struct fileproc)); bzero(fg, sizeof(struct fileglob)); lck_mtx_init(&fg->fg_lock, file_lck_grp, file_lck_attr); fp->f_iocount = 1; fg->fg_count = 1; + fg->fg_ops = &uninitops; fp->f_fglob = fg; #if CONFIG_MACF mac_file_label_init(fg); @@ -4164,7 +4391,7 @@ fdexec(proc_t p, short flags) closef_locked(fp, fp->f_fglob, p); - FREE_ZONE(fp, sizeof(*fp), M_FILEPROC); + fileproc_free(fp); } } proc_fdunlock(p); @@ -4289,7 +4516,6 @@ fdcopy(proc_t p, vnode_t uth_cdir) FREE_ZONE(newfdp, sizeof *newfdp, M_FILEDESC); return(NULL); } - newfdp->fd_refcnt = 1; /* * If the number of open files fits in the internal arrays @@ -4342,10 +4568,14 @@ fdcopy(proc_t p, vnode_t uth_cdir) */ if (newfdp->fd_knlistsize != -1) { fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile]; - for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) { + flags = &newfdp->fd_ofileflags[newfdp->fd_lastfile]; + for (i = newfdp->fd_lastfile; + i >= 0; i--, fpp--, flags--) { + if (*flags & UF_RESERVED) + continue; /* (removed below) */ if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) { *fpp = NULL; - newfdp->fd_ofileflags[i] = 0; + *flags = 0; if (i < newfdp->fd_freefile) newfdp->fd_freefile = i; } @@ -4361,8 +4591,13 @@ fdcopy(proc_t p, vnode_t uth_cdir) flags = newfdp->fd_ofileflags; for (i = newfdp->fd_lastfile + 1; --i >= 0; fpp++, flags++) - if ((ofp = *fpp) != NULL && !(*flags & UF_RESERVED)) { - MALLOC_ZONE(fp, struct fileproc *, sizeof(struct fileproc), M_FILEPROC, M_WAITOK); + if ((ofp = *fpp) != NULL && + 0 == (*flags & (UF_FORKCLOSE|UF_RESERVED))) { +#if DEBUG + if (FILEPROC_TYPE(ofp) != FTYPE_SIMPLE) + panic("complex fileproc"); +#endif + fp = fileproc_alloc_init(NULL); if (fp == NULL) { /* * XXX no room to copy, unable to @@ -4370,10 +4605,8 @@ fdcopy(proc_t p, vnode_t uth_cdir) */ *fpp = NULL; } else { - bzero(fp, sizeof(struct fileproc)); - fp->f_flags = ofp->f_flags; - //fp->f_iocount = ofp->f_iocount; - fp->f_iocount = 0; + fp->f_flags |= + (ofp->f_flags & ~FP_TYPEMASK); fp->f_fglob = ofp->f_fglob; (void)fg_ref(fp); *fpp = fp; @@ -4414,17 +4647,15 @@ fdfree(proc_t p) proc_fdlock(p); - /* Certain daemons might not have file descriptors */ - fdp = p->p_fd; - - if ((fdp == NULL) || (--fdp->fd_refcnt > 0)) { + if (p == kernproc || NULL == (fdp = p->p_fd)) { proc_fdunlock(p); return; } - if (fdp->fd_refcnt == 0xffff) - panic("fdfree: bad fd_refcnt"); - /* Last reference: the structure can't change out from under us */ + extern struct filedesc filedesc0; + + if (&filedesc0 == fdp) + panic("filedesc0"); if (fdp->fd_nfiles > 0 && fdp->fd_ofiles) { for (i = fdp->fd_lastfile; i >= 0; i--) { @@ -4433,9 +4664,6 @@ fdfree(proc_t p) if (fdp->fd_ofileflags[i] & UF_RESERVED) panic("fdfree: found fp with UF_RESERVED"); - /* closef drops the iocount ... */ - if ((fp->f_flags & FP_INCHRREAD) != 0) - fp->f_iocount++; procfdtbl_reservefd(p, i); if (i < fdp->fd_knlistsize) @@ -4443,7 +4671,7 @@ fdfree(proc_t p) if (fp->f_flags & FP_WAITEVENT) (void)waitevent_close(p, fp); (void) closef_locked(fp, fp->f_fglob, p); - FREE_ZONE(fp, sizeof(*fp), M_FILEPROC); + fileproc_free(fp); } } FREE_ZONE(fdp->fd_ofiles, fdp->fd_nfiles * OFILESIZE, M_OFILETABL); @@ -4470,55 +4698,6 @@ fdfree(proc_t p) FREE_ZONE(fdp, sizeof(*fdp), M_FILEDESC); } - -/* - * closef_finish - * - * Description: Called on last open instance for a fileglob for a file being - * closed. - * - * Parameters: fp Pointer to fileproc for fd - * fg Pointer to fileglob for fd - * p Pointer to proc structure - * - * Returns: 0 Success - * :??? Anything returnable by a per-fileops - * close function - * - * Note: fp can only be non-NULL if p is also non-NULL. If p is NULL, - * then fg must eith be locked (FHASLOCK) or must not have a - * type of DTYPE_VNODE. - * - * On return, the fg is freed. - * - * This function may block draining output to a character - * device on last close of that device. - */ -static int -closef_finish(struct fileproc *fp, struct fileglob *fg, proc_t p, vfs_context_t ctx) -{ - int error; - - - /* fg_ops completed initialization? */ - if (fg->fg_ops) - error = fo_close(fg, ctx); - else - error = 0; - - /* if fp is non-NULL, drain it out */ - if (((fp != (struct fileproc *)0) && ((fp->f_flags & FP_INCHRREAD) != 0))) { - proc_fdlock_spin(p); - if ( ((fp->f_flags & FP_INCHRREAD) != 0) ) { - fileproc_drain(p, fp); - } - proc_fdunlock(p); - } - fg_free(fg); - - return (error); -} - /* * closef_locked * @@ -4566,7 +4745,8 @@ closef_locked(struct fileproc *fp, struct fileglob *fg, proc_t p) * If the descriptor was in a message, POSIX-style locks * aren't passed with the descriptor. */ - if (p && (p->p_ladvflag & P_LADVLOCK) && fg->fg_type == DTYPE_VNODE) { + if (p && (p->p_ladvflag & P_LADVLOCK) && + DTYPE_VNODE == FILEGLOB_DTYPE(fg)) { proc_fdunlock(p); lf.l_whence = SEEK_SET; @@ -4576,7 +4756,7 @@ closef_locked(struct fileproc *fp, struct fileglob *fg, proc_t p) vp = (struct vnode *)fg->fg_data; if ( (error = vnode_getwithref(vp)) == 0 ) { - (void) VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context); + (void) VNOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX, &context, NULL); (void)vnode_put(vp); } proc_fdlock(p); @@ -4601,7 +4781,13 @@ closef_locked(struct fileproc *fp, struct fileglob *fg, proc_t p) if (p) proc_fdunlock(p); - error = closef_finish(fp, fg, p, &context); + + /* Since we ensure that fg->fg_ops is always initialized, + * it is safe to invoke fo_close on the fg */ + error = fo_close(fg, &context); + + fg_free(fg); + if (p) proc_fdlock(p); @@ -4693,7 +4879,7 @@ fp_free(proc_t p, int fd, struct fileproc * fp) proc_fdunlock(p); fg_free(fp->f_fglob); - FREE_ZONE(fp, sizeof(*fp), M_FILEPROC); + fileproc_free(fp); return(0); } @@ -4747,7 +4933,7 @@ flock(proc_t p, struct flock_args *uap, __unused int32_t *retval) if (how & LOCK_UN) { lf.l_type = F_UNLCK; fp->f_flag &= ~FHASLOCK; - error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx); + error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL); goto out; } if (how & LOCK_EX) @@ -4765,10 +4951,10 @@ flock(proc_t p, struct flock_args *uap, __unused int32_t *retval) #endif fp->f_flag |= FHASLOCK; if (how & LOCK_NB) { - error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, F_FLOCK, ctx); + error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, F_FLOCK, ctx, NULL); goto out; } - error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, F_FLOCK|F_WAIT, ctx); + error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, F_FLOCK|F_WAIT, ctx, NULL); out: (void)vnode_put(vp); out1: @@ -4817,6 +5003,13 @@ fileport_makeport(proc_t p, struct fileport_makeport_args *uap, goto out; } + if (FP_ISGUARDED(fp, GUARD_FILEPORT)) { + proc_fdlock(p); + err = fp_guard_exception(p, fd, fp, kGUARD_EXC_FILEPORT); + proc_fdunlock(p); + goto out; + } + /* Dropped when port is deallocated */ fg = fp->f_fglob; fg_ref(fp); @@ -4913,14 +5106,12 @@ fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval) goto out; } - MALLOC_ZONE(fp, struct fileproc *, sizeof(*fp), M_FILEPROC, M_WAITOK); + fp = fileproc_alloc_init(NULL); if (fp == FILEPROC_NULL) { err = ENOMEM; goto out; } - bzero(fp, sizeof(*fp)); - fp->f_fglob = fg; fg_ref(fp); @@ -4939,7 +5130,7 @@ fileport_makefd(proc_t p, struct fileport_makefd_args *uap, int32_t *retval) err = 0; out: if ((fp != NULL) && (0 != err)) { - FREE_ZONE(fp, sizeof(*fp), M_FILEPROC); + fileproc_free(fp); } if (IPC_PORT_NULL != port) { @@ -5018,6 +5209,13 @@ dupfdopen(struct filedesc *fdp, int indx, int dfd, int flags, int error) */ switch (error) { case ENODEV: + if (FP_ISGUARDED(wfp, GUARD_DUP)) { + int err = fp_guard_exception(p, + dfd, wfp, kGUARD_EXC_DUP); + proc_fdunlock(p); + return (err); + } + /* * Check that the mode the file is being opened for is a * subset of the mode of the existing descriptor. @@ -5105,7 +5303,7 @@ fg_drop(struct fileproc * fp) lck_mtx_unlock(&fg->fg_lock); } - +#if SOCKETS /* * fg_insertuipc * @@ -5200,7 +5398,7 @@ fg_removeuipc(struct fileglob * fg) lck_mtx_unlock(&fg->fg_lock); } } - +#endif /* SOCKETS */ /* * fo_read @@ -5356,3 +5554,31 @@ filetype_issendable(file_type_t fdtype) return FALSE; } } + + +struct fileproc * +fileproc_alloc_init(__unused void *arg) +{ + struct fileproc *fp; + + MALLOC_ZONE(fp, struct fileproc *, sizeof (*fp), M_FILEPROC, M_WAITOK); + if (fp) + bzero(fp, sizeof (*fp)); + + return (fp); +} + +void +fileproc_free(struct fileproc *fp) +{ + switch (FILEPROC_TYPE(fp)) { + case FTYPE_SIMPLE: + FREE_ZONE(fp, sizeof (*fp), M_FILEPROC); + break; + case FTYPE_GUARDED: + guarded_fileproc_free(fp); + break; + default: + panic("%s: corrupt fp %p flags %x", __func__, fp, fp->f_flags); + } +} diff --git a/bsd/kern/kern_event.c b/bsd/kern/kern_event.c index 0e2705e5a..52d271a02 100644 --- a/bsd/kern/kern_event.c +++ b/bsd/kern/kern_event.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ * */ @@ -62,7 +62,7 @@ #include #include #include -#include +#include #include #include #include @@ -81,6 +81,7 @@ #include #include #include +#include #include #include @@ -98,97 +99,106 @@ #include #endif +#if CONFIG_MEMORYSTATUS +#include +#endif + MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); -#define KQ_EVENT NULL +#define KQ_EVENT NULL static inline void kqlock(struct kqueue *kq); static inline void kqunlock(struct kqueue *kq); -static int kqlock2knoteuse(struct kqueue *kq, struct knote *kn); -static int kqlock2knoteusewait(struct kqueue *kq, struct knote *kn); -static int kqlock2knotedrop(struct kqueue *kq, struct knote *kn); -static int knoteuse2kqlock(struct kqueue *kq, struct knote *kn); - -static void kqueue_wakeup(struct kqueue *kq, int closed); -static int kqueue_read(struct fileproc *fp, struct uio *uio, - int flags, vfs_context_t ctx); -static int kqueue_write(struct fileproc *fp, struct uio *uio, - int flags, vfs_context_t ctx); -static int kqueue_ioctl(struct fileproc *fp, u_long com, caddr_t data, - vfs_context_t ctx); -static int kqueue_select(struct fileproc *fp, int which, void *wql, - vfs_context_t ctx); -static int kqueue_close(struct fileglob *fg, vfs_context_t ctx); -static int kqueue_kqfilter(struct fileproc *fp, struct knote *kn, vfs_context_t ctx); -static int kqueue_drain(struct fileproc *fp, vfs_context_t ctx); -extern int kqueue_stat(struct fileproc *fp, void *ub, int isstat64, vfs_context_t ctx); - -static struct fileops kqueueops = { - .fo_read = kqueue_read, - .fo_write = kqueue_write, - .fo_ioctl = kqueue_ioctl, - .fo_select = kqueue_select, - .fo_close = kqueue_close, - .fo_kqfilter = kqueue_kqfilter, +static int kqlock2knoteuse(struct kqueue *kq, struct knote *kn); +static int kqlock2knoteusewait(struct kqueue *kq, struct knote *kn); +static int kqlock2knotedrop(struct kqueue *kq, struct knote *kn); +static int knoteuse2kqlock(struct kqueue *kq, struct knote *kn); + +static void kqueue_wakeup(struct kqueue *kq, int closed); +static int kqueue_read(struct fileproc *fp, struct uio *uio, + int flags, vfs_context_t ctx); +static int kqueue_write(struct fileproc *fp, struct uio *uio, + int flags, vfs_context_t ctx); +static int kqueue_ioctl(struct fileproc *fp, u_long com, caddr_t data, + vfs_context_t ctx); +static int kqueue_select(struct fileproc *fp, int which, void *wql, + vfs_context_t ctx); +static int kqueue_close(struct fileglob *fg, vfs_context_t ctx); +static int kqueue_kqfilter(struct fileproc *fp, struct knote *kn, + vfs_context_t ctx); +static int kqueue_drain(struct fileproc *fp, vfs_context_t ctx); +extern int kqueue_stat(struct fileproc *fp, void *ub, int isstat64, + vfs_context_t ctx); + +static const struct fileops kqueueops = { + .fo_type = DTYPE_KQUEUE, + .fo_read = kqueue_read, + .fo_write = kqueue_write, + .fo_ioctl = kqueue_ioctl, + .fo_select = kqueue_select, + .fo_close = kqueue_close, + .fo_kqfilter = kqueue_kqfilter, .fo_drain = kqueue_drain, }; static int kevent_internal(struct proc *p, int iskev64, user_addr_t changelist, - int nchanges, user_addr_t eventlist, int nevents, int fd, - user_addr_t utimeout, unsigned int flags, int32_t *retval); -static int kevent_copyin(user_addr_t *addrp, struct kevent64_s *kevp, struct proc *p, int iskev64); -static int kevent_copyout(struct kevent64_s *kevp, user_addr_t *addrp, struct proc *p, int iskev64); + int nchanges, user_addr_t eventlist, int nevents, int fd, + user_addr_t utimeout, unsigned int flags, int32_t *retval); +static int kevent_copyin(user_addr_t *addrp, struct kevent64_s *kevp, + struct proc *p, int iskev64); +static int kevent_copyout(struct kevent64_s *kevp, user_addr_t *addrp, + struct proc *p, int iskev64); char * kevent_description(struct kevent64_s *kevp, char *s, size_t n); -static int kevent_callback(struct kqueue *kq, struct kevent64_s *kevp, void *data); -static void kevent_continue(struct kqueue *kq, void *data, int error); -static void kqueue_scan_continue(void *contp, wait_result_t wait_result); -static int kqueue_process(struct kqueue *kq, kevent_callback_t callback, - void *data, int *countp, struct proc *p); -static int kqueue_begin_processing(struct kqueue *kq); -static void kqueue_end_processing(struct kqueue *kq); -static int knote_process(struct knote *kn, kevent_callback_t callback, - void *data, struct kqtailq *inprocessp, struct proc *p); -static void knote_put(struct knote *kn); -static int knote_fdpattach(struct knote *kn, struct filedesc *fdp, struct proc *p); -static void knote_drop(struct knote *kn, struct proc *p); -static void knote_activate(struct knote *kn, int); -static void knote_deactivate(struct knote *kn); -static void knote_enqueue(struct knote *kn); -static void knote_dequeue(struct knote *kn); -static struct knote *knote_alloc(void); -static void knote_free(struct knote *kn); - -static int filt_fileattach(struct knote *kn); +static int kevent_callback(struct kqueue *kq, struct kevent64_s *kevp, + void *data); +static void kevent_continue(struct kqueue *kq, void *data, int error); +static void kqueue_scan_continue(void *contp, wait_result_t wait_result); +static int kqueue_process(struct kqueue *kq, kevent_callback_t callback, + void *data, int *countp, struct proc *p); +static int kqueue_begin_processing(struct kqueue *kq); +static void kqueue_end_processing(struct kqueue *kq); +static int knote_process(struct knote *kn, kevent_callback_t callback, + void *data, struct kqtailq *inprocessp, struct proc *p); +static void knote_put(struct knote *kn); +static int knote_fdpattach(struct knote *kn, struct filedesc *fdp, + struct proc *p); +static void knote_drop(struct knote *kn, struct proc *p); +static void knote_activate(struct knote *kn, int); +static void knote_deactivate(struct knote *kn); +static void knote_enqueue(struct knote *kn); +static void knote_dequeue(struct knote *kn); +static struct knote *knote_alloc(void); +static void knote_free(struct knote *kn); + +static int filt_fileattach(struct knote *kn); static struct filterops file_filtops = { - .f_isfd = 1, - .f_attach = filt_fileattach, + .f_isfd = 1, + .f_attach = filt_fileattach, }; -static void filt_kqdetach(struct knote *kn); -static int filt_kqueue(struct knote *kn, long hint); +static void filt_kqdetach(struct knote *kn); +static int filt_kqueue(struct knote *kn, long hint); static struct filterops kqread_filtops = { - .f_isfd = 1, - .f_detach = filt_kqdetach, - .f_event = filt_kqueue, + .f_isfd = 1, + .f_detach = filt_kqdetach, + .f_event = filt_kqueue, }; -/* - * placeholder for not-yet-implemented filters - */ -static int filt_badattach(struct knote *kn); +/* placeholder for not-yet-implemented filters */ +static int filt_badattach(struct knote *kn); static struct filterops bad_filtops = { - .f_attach = filt_badattach, + .f_attach = filt_badattach, }; -static int filt_procattach(struct knote *kn); -static void filt_procdetach(struct knote *kn); -static int filt_proc(struct knote *kn, long hint); +static int filt_procattach(struct knote *kn); +static void filt_procdetach(struct knote *kn); +static int filt_proc(struct knote *kn, long hint); static struct filterops proc_filtops = { - .f_attach = filt_procattach, - .f_detach = filt_procdetach, - .f_event = filt_proc, + .f_attach = filt_procattach, + .f_detach = filt_procdetach, + .f_event = filt_proc, }; #if VM_PRESSURE_EVENTS @@ -202,40 +212,43 @@ static struct filterops vm_filtops = { }; #endif /* VM_PRESSURE_EVENTS */ +#if CONFIG_MEMORYSTATUS +extern struct filterops memorystatus_filtops; +#endif /* CONFIG_MEMORYSTATUS */ + extern struct filterops fs_filtops; extern struct filterops sig_filtops; /* Timer filter */ -static int filt_timerattach(struct knote *kn); -static void filt_timerdetach(struct knote *kn); -static int filt_timer(struct knote *kn, long hint); -static void filt_timertouch(struct knote *kn, struct kevent64_s *kev, - long type); +static int filt_timerattach(struct knote *kn); +static void filt_timerdetach(struct knote *kn); +static int filt_timer(struct knote *kn, long hint); +static void filt_timertouch(struct knote *kn, struct kevent64_s *kev, + long type); static struct filterops timer_filtops = { - .f_attach = filt_timerattach, - .f_detach = filt_timerdetach, - .f_event = filt_timer, - .f_touch = filt_timertouch, + .f_attach = filt_timerattach, + .f_detach = filt_timerdetach, + .f_event = filt_timer, + .f_touch = filt_timertouch, }; /* Helpers */ +static void filt_timerexpire(void *knx, void *param1); +static int filt_timervalidate(struct knote *kn); +static void filt_timerupdate(struct knote *kn); +static void filt_timercancel(struct knote *kn); -static void filt_timerexpire(void *knx, void *param1); -static int filt_timervalidate(struct knote *kn); -static void filt_timerupdate(struct knote *kn); -static void filt_timercancel(struct knote *kn); - -#define TIMER_RUNNING 0x1 -#define TIMER_CANCELWAIT 0x2 +#define TIMER_RUNNING 0x1 +#define TIMER_CANCELWAIT 0x2 static lck_mtx_t _filt_timerlock; -static void filt_timerlock(void); -static void filt_timerunlock(void); +static void filt_timerlock(void); +static void filt_timerunlock(void); -static zone_t knote_zone; +static zone_t knote_zone; -#define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) +#define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) #if 0 extern struct filterops aio_filtops; @@ -245,20 +258,20 @@ extern struct filterops aio_filtops; extern struct filterops machport_filtops; /* User filter */ -static int filt_userattach(struct knote *kn); -static void filt_userdetach(struct knote *kn); -static int filt_user(struct knote *kn, long hint); -static void filt_usertouch(struct knote *kn, struct kevent64_s *kev, - long type); +static int filt_userattach(struct knote *kn); +static void filt_userdetach(struct knote *kn); +static int filt_user(struct knote *kn, long hint); +static void filt_usertouch(struct knote *kn, struct kevent64_s *kev, + long type); static struct filterops user_filtops = { - .f_attach = filt_userattach, - .f_detach = filt_userdetach, - .f_event = filt_user, - .f_touch = filt_usertouch, + .f_attach = filt_userattach, + .f_detach = filt_userdetach, + .f_event = filt_user, + .f_touch = filt_usertouch, }; /* - * Table for for all system-defined filters. + * Table for all system-defined filters. */ static struct filterops *sysfilt_ops[] = { &file_filtops, /* EVFILT_READ */ @@ -282,6 +295,11 @@ static struct filterops *sysfilt_ops[] = { &bad_filtops, /* EVFILT_VM */ #endif &file_filtops, /* EVFILT_SOCK */ +#if CONFIG_MEMORYSTATUS + &memorystatus_filtops, /* EVFILT_MEMORYSTATUS */ +#else + &bad_filtops, /* EVFILT_MEMORYSTATUS */ +#endif }; /* @@ -307,13 +325,12 @@ kqunlock(struct kqueue *kq) lck_spin_unlock(&kq->kq_lock); } -/* +/* * Convert a kq lock to a knote use referece. * * If the knote is being dropped, we can't get * a use reference, so just return with it * still locked. - * * - kq locked at entry * - unlock on exit if we get the use reference */ @@ -321,20 +338,19 @@ static int kqlock2knoteuse(struct kqueue *kq, struct knote *kn) { if (kn->kn_status & KN_DROPPING) - return 0; + return (0); kn->kn_inuse++; kqunlock(kq); - return 1; - } + return (1); +} -/* +/* * Convert a kq lock to a knote use referece, * but wait for attach and drop events to complete. * * If the knote is being dropped, we can't get * a use reference, so just return with it * still locked. - * * - kq locked at entry * - kq always unlocked on exit */ @@ -343,18 +359,18 @@ kqlock2knoteusewait(struct kqueue *kq, struct knote *kn) { if ((kn->kn_status & (KN_DROPPING | KN_ATTACHING)) != 0) { kn->kn_status |= KN_USEWAIT; - wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, &kn->kn_status, THREAD_UNINT, 0); + wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, + &kn->kn_status, THREAD_UNINT, 0); kqunlock(kq); thread_block(THREAD_CONTINUE_NULL); - return 0; + return (0); } kn->kn_inuse++; kqunlock(kq); - return 1; - } - + return (1); +} -/* +/* * Convert from a knote use reference back to kq lock. * * Drop a use reference and wake any waiters if @@ -374,21 +390,22 @@ knoteuse2kqlock(struct kqueue *kq, struct knote *kn) } if ((kn->kn_status & KN_USEWAIT) != 0) { kn->kn_status &= ~KN_USEWAIT; - wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kn->kn_status, THREAD_AWAKENED); + wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, + &kn->kn_status, THREAD_AWAKENED); } } return ((kn->kn_status & KN_DROPPING) == 0); - } +} -/* - * Convert a kq lock to a knote drop referece. +/* + * Convert a kq lock to a knote drop reference. * * If the knote is in use, wait for the use count * to subside. We first mark our intention to drop * it - keeping other users from "piling on." * If we are too late, we have to wait for the * other drop to complete. - * + * * - kq locked at entry * - always unlocked on exit. * - caller can't hold any locks that would prevent @@ -404,17 +421,18 @@ kqlock2knotedrop(struct kqueue *kq, struct knote *kn) if (oktodrop) { if (kn->kn_inuse == 0) { kqunlock(kq); - return oktodrop; + return (oktodrop); } } kn->kn_status |= KN_USEWAIT; - wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, &kn->kn_status, THREAD_UNINT, 0); + wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, &kn->kn_status, + THREAD_UNINT, 0); kqunlock(kq); thread_block(THREAD_CONTINUE_NULL); - return oktodrop; + return (oktodrop); } - -/* + +/* * Release a knote use count reference. */ static void @@ -426,26 +444,25 @@ knote_put(struct knote *kn) if (--kn->kn_inuse == 0) { if ((kn->kn_status & KN_USEWAIT) != 0) { kn->kn_status &= ~KN_USEWAIT; - wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kn->kn_status, THREAD_AWAKENED); + wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, + &kn->kn_status, THREAD_AWAKENED); } } kqunlock(kq); - } +} static int filt_fileattach(struct knote *kn) { - return (fo_kqfilter(kn->kn_fp, kn, vfs_context_current())); } -#define f_flag f_fglob->fg_flag -#define f_type f_fglob->fg_type -#define f_msgcount f_fglob->fg_msgcount -#define f_cred f_fglob->fg_cred -#define f_ops f_fglob->fg_ops -#define f_offset f_fglob->fg_offset -#define f_data f_fglob->fg_data +#define f_flag f_fglob->fg_flag +#define f_msgcount f_fglob->fg_msgcount +#define f_cred f_fglob->fg_cred +#define f_ops f_fglob->fg_ops +#define f_offset f_fglob->fg_offset +#define f_data f_fglob->fg_data static void filt_kqdetach(struct knote *kn) @@ -473,9 +490,9 @@ filt_procattach(struct knote *kn) struct proc *p; assert(PID_MAX < NOTE_PDATAMASK); - + if ((kn->kn_sfflags & (NOTE_TRACK | NOTE_TRACKERR | NOTE_CHILD)) != 0) - return(ENOTSUP); + return (ENOTSUP); p = proc_find(kn->kn_id); if (p == NULL) { @@ -524,7 +541,7 @@ filt_procdetach(struct knote *kn) struct proc *p; proc_klist_lock(); - + p = kn->kn_ptr.p_proc; if (p != PROC_NULL) { kn->kn_ptr.p_proc = PROC_NULL; @@ -537,6 +554,11 @@ filt_procdetach(struct knote *kn) static int filt_proc(struct knote *kn, long hint) { + /* + * Note: a lot of bits in hint may be obtained from the knote + * To free some of those bits, see Freeing up + * bits in hint for filt_proc + */ /* hint is 0 when called from above */ if (hint != 0) { u_int event; @@ -572,29 +594,61 @@ filt_proc(struct knote *kn, long hint) if (kn->kn_sfflags & event) kn->kn_fflags |= event; - if (event == NOTE_REAP || (event == NOTE_EXIT && !(kn->kn_sfflags & NOTE_REAP))) { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-declarations" + if ((event == NOTE_REAP) || ((event == NOTE_EXIT) && !(kn->kn_sfflags & NOTE_REAP))) { kn->kn_flags |= (EV_EOF | EV_ONESHOT); } - if ((event == NOTE_EXIT) && ((kn->kn_sfflags & NOTE_EXITSTATUS) != 0)) { - kn->kn_fflags |= NOTE_EXITSTATUS; - kn->kn_data = (hint & NOTE_PDATAMASK); - } - if ((event == NOTE_RESOURCEEND) && ((kn->kn_sfflags & NOTE_RESOURCEEND) != 0)) { - kn->kn_fflags |= NOTE_RESOURCEEND; - kn->kn_data = (hint & NOTE_PDATAMASK); - } -#if CONFIG_EMBEDDED - /* If the event is one of the APPSTATE events,remove the rest */ - if (((event & NOTE_APPALLSTATES) != 0) && ((kn->kn_sfflags & NOTE_APPALLSTATES) != 0)) { - /* only one state at a time */ - kn->kn_fflags &= ~NOTE_APPALLSTATES; - kn->kn_fflags |= event; +#pragma clang diagnostic pop + + if (event == NOTE_EXIT) { + kn->kn_data = 0; + if ((kn->kn_sfflags & NOTE_EXITSTATUS) != 0) { + kn->kn_fflags |= NOTE_EXITSTATUS; + kn->kn_data |= (hint & NOTE_PDATAMASK); + } + if ((kn->kn_sfflags & NOTE_EXIT_DETAIL) != 0) { + kn->kn_fflags |= NOTE_EXIT_DETAIL; + if ((kn->kn_ptr.p_proc->p_lflag & + P_LTERM_DECRYPTFAIL) != 0) { + kn->kn_data |= NOTE_EXIT_DECRYPTFAIL; + } + if ((kn->kn_ptr.p_proc->p_lflag & + P_LTERM_JETSAM) != 0) { + kn->kn_data |= NOTE_EXIT_MEMORY; + switch (kn->kn_ptr.p_proc->p_lflag & + P_JETSAM_MASK) { + case P_JETSAM_VMPAGESHORTAGE: + kn->kn_data |= NOTE_EXIT_MEMORY_VMPAGESHORTAGE; + break; + case P_JETSAM_VMTHRASHING: + kn->kn_data |= NOTE_EXIT_MEMORY_VMTHRASHING; + break; + case P_JETSAM_VNODE: + kn->kn_data |= NOTE_EXIT_MEMORY_VNODE; + break; + case P_JETSAM_HIWAT: + kn->kn_data |= NOTE_EXIT_MEMORY_HIWAT; + break; + case P_JETSAM_PID: + kn->kn_data |= NOTE_EXIT_MEMORY_PID; + break; + case P_JETSAM_IDLEEXIT: + kn->kn_data |= NOTE_EXIT_MEMORY_IDLE; + break; + } + } + if ((kn->kn_ptr.p_proc->p_csflags & + CS_KILLED) != 0) { + kn->kn_data |= NOTE_EXIT_CSERROR; + } + } } -#endif /* CONFIG_EMBEDDED */ + } /* atomic check, no locking need when called from above */ - return (kn->kn_fflags != 0); + return (kn->kn_fflags != 0); } #if VM_PRESSURE_EVENTS @@ -606,14 +660,13 @@ filt_proc(struct knote *kn, long hint) static int filt_vmattach(struct knote *kn) -{ - /* - * The note will be cleared once the information has been flushed to the client. - * If there is still pressure, we will be re-alerted. +{ + /* + * The note will be cleared once the information has been flushed to + * the client. If there is still pressure, we will be re-alerted. */ - kn->kn_flags |= EV_CLEAR; - - return vm_knote_register(kn); + kn->kn_flags |= EV_CLEAR; + return (vm_knote_register(kn)); } static void @@ -626,29 +679,30 @@ static int filt_vm(struct knote *kn, long hint) { /* hint == 0 means this is just an alive? check (always true) */ - if (hint != 0) { + if (hint != 0) { const pid_t pid = (pid_t)hint; - if ((kn->kn_sfflags & NOTE_VM_PRESSURE) && (kn->kn_kq->kq_p->p_pid == pid)) { + if ((kn->kn_sfflags & NOTE_VM_PRESSURE) && + (kn->kn_kq->kq_p->p_pid == pid)) { kn->kn_fflags |= NOTE_VM_PRESSURE; } } - + return (kn->kn_fflags != 0); } #endif /* VM_PRESSURE_EVENTS */ /* * filt_timervalidate - process data from user - * - * Converts to either interval or deadline format. - * + * + * Converts to either interval or deadline format. + * * The saved-data field in the knote contains the * time value. The saved filter-flags indicates * the unit of measurement. * - * After validation, either the saved-data field - * contains the interval in absolute time, or ext[0] - * contains the expected deadline. If that deadline + * After validation, either the saved-data field + * contains the interval in absolute time, or ext[0] + * contains the expected deadline. If that deadline * is in the past, ext[0] is 0. * * Returns EINVAL for unrecognized units of time. @@ -660,7 +714,7 @@ static int filt_timervalidate(struct knote *kn) { uint64_t multiplier; - uint64_t raw; + uint64_t raw = 0; switch (kn->kn_sfflags & (NOTE_SECONDS|NOTE_USECONDS|NOTE_NSECONDS)) { case NOTE_SECONDS: @@ -676,7 +730,13 @@ filt_timervalidate(struct knote *kn) multiplier = NSEC_PER_SEC / 1000; break; default: - return EINVAL; + return (EINVAL); + } + + /* transform the slop delta(leeway) in kn_ext[1] if passed to same time scale */ + if(kn->kn_sfflags & NOTE_LEEWAY){ + nanoseconds_to_absolutetime((uint64_t)kn->kn_ext[1] * multiplier, &raw); + kn->kn_ext[1] = raw; } nanoseconds_to_absolutetime((uint64_t)kn->kn_sdata * multiplier, &raw); @@ -690,22 +750,22 @@ filt_timervalidate(struct knote *kn) uint64_t now; clock_get_calendar_nanotime(&seconds, &nanoseconds); - nanoseconds_to_absolutetime((uint64_t)seconds * NSEC_PER_SEC + - nanoseconds, &now); + nanoseconds_to_absolutetime((uint64_t)seconds * NSEC_PER_SEC + + nanoseconds, &now); if (raw < now) { /* time has already passed */ kn->kn_ext[0] = 0; } else { raw -= now; - clock_absolutetime_interval_to_deadline(raw, - &kn->kn_ext[0]); + clock_absolutetime_interval_to_deadline(raw, + &kn->kn_ext[0]); } } else { kn->kn_sdata = raw; } - return 0; + return (0); } /* @@ -719,7 +779,7 @@ filt_timervalidate(struct knote *kn) * * Timer filter lock is held. */ -static void +static void filt_timerupdate(struct knote *kn) { /* if there's no interval, deadline is just in kn_ext[0] */ @@ -729,27 +789,27 @@ filt_timerupdate(struct knote *kn) /* if timer hasn't fired before, fire in interval nsecs */ if (kn->kn_ext[0] == 0) { clock_absolutetime_interval_to_deadline(kn->kn_sdata, - &kn->kn_ext[0]); + &kn->kn_ext[0]); } else { - /* - * If timer has fired before, schedule the next pop - * relative to the last intended deadline. + /* + * If timer has fired before, schedule the next pop + * relative to the last intended deadline. * - * We could check for whether the deadline has expired, + * We could check for whether the deadline has expired, * but the thread call layer can handle that. */ kn->kn_ext[0] += kn->kn_sdata; } } -/* +/* * filt_timerexpire - the timer callout routine * - * Just propagate the timer event into the knote - * filter routine (by going through the knote - * synchronization point). Pass a hint to - * indicate this is a real event, not just a - * query from above. + * Just propagate the timer event into the knote + * filter routine (by going through the knote + * synchronization point). Pass a hint to + * indicate this is a real event, not just a + * query from above. */ static void filt_timerexpire(void *knx, __unused void *spare) @@ -763,14 +823,14 @@ filt_timerexpire(void *knx, __unused void *spare) /* no "object" for timers, so fake a list */ SLIST_INIT(&timer_list); - SLIST_INSERT_HEAD(&timer_list, kn, kn_selnext); + SLIST_INSERT_HEAD(&timer_list, kn, kn_selnext); KNOTE(&timer_list, 1); /* if someone is waiting for timer to pop */ if (kn->kn_hookid & TIMER_CANCELWAIT) { struct kqueue *kq = kn->kn_kq; - wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kn->kn_hook, - THREAD_AWAKENED); + wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kn->kn_hook, + THREAD_AWAKENED); } filt_timerunlock(); @@ -795,8 +855,8 @@ filt_timercancel(struct knote *kn) } else { /* we have to wait for the expire routine. */ kn->kn_hookid |= TIMER_CANCELWAIT; - wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, - &kn->kn_hook, THREAD_UNINT, 0); + wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, + &kn->kn_hook, THREAD_UNINT, 0); filt_timerunlock(); thread_block(THREAD_CONTINUE_NULL); filt_timerlock(); @@ -807,7 +867,7 @@ filt_timercancel(struct knote *kn) /* * Allocate a thread call for the knote's lifetime, and kick off the timer. - */ + */ static int filt_timerattach(struct knote *kn) { @@ -820,7 +880,7 @@ filt_timerattach(struct knote *kn) filt_timerlock(); error = filt_timervalidate(kn); - if (error) { + if (error != 0) { filt_timerunlock(); return (error); } @@ -830,12 +890,25 @@ filt_timerattach(struct knote *kn) /* absolute=EV_ONESHOT */ if (kn->kn_sfflags & NOTE_ABSOLUTE) - kn->kn_flags |= EV_ONESHOT; + kn->kn_flags |= EV_ONESHOT; filt_timerupdate(kn); if (kn->kn_ext[0]) { kn->kn_flags |= EV_CLEAR; - thread_call_enter_delayed(callout, kn->kn_ext[0]); + unsigned int timer_flags = 0; + if (kn->kn_sfflags & NOTE_CRITICAL) + timer_flags |= THREAD_CALL_DELAY_USER_CRITICAL; + else if (kn->kn_sfflags & NOTE_BACKGROUND) + timer_flags |= THREAD_CALL_DELAY_USER_BACKGROUND; + else + timer_flags |= THREAD_CALL_DELAY_USER_NORMAL; + + if (kn->kn_sfflags & NOTE_LEEWAY) + timer_flags |= THREAD_CALL_DELAY_LEEWAY; + + thread_call_enter_delayed_with_leeway(callout, NULL, + kn->kn_ext[0], kn->kn_ext[1], timer_flags); + kn->kn_hookid |= TIMER_RUNNING; } else { /* fake immediate */ @@ -858,8 +931,8 @@ filt_timerdetach(struct knote *kn) callout = (thread_call_t)kn->kn_hook; filt_timercancel(kn); - - filt_timerunlock(); + + filt_timerunlock(); thread_call_free(callout); } @@ -870,28 +943,40 @@ static int filt_timer(struct knote *kn, long hint) { int result; - + if (hint) { /* real timer pop -- timer lock held by filt_timerexpire */ - kn->kn_data++; - if (((kn->kn_hookid & TIMER_CANCELWAIT) == 0) && + if (((kn->kn_hookid & TIMER_CANCELWAIT) == 0) && ((kn->kn_flags & EV_ONESHOT) == 0)) { /* evaluate next time to fire */ filt_timerupdate(kn); if (kn->kn_ext[0]) { + unsigned int timer_flags = 0; + /* keep the callout and re-arm */ - thread_call_enter_delayed(kn->kn_hook, - kn->kn_ext[0]); + if (kn->kn_sfflags & NOTE_CRITICAL) + timer_flags |= THREAD_CALL_DELAY_USER_CRITICAL; + else if (kn->kn_sfflags & NOTE_BACKGROUND) + timer_flags |= THREAD_CALL_DELAY_USER_BACKGROUND; + else + timer_flags |= THREAD_CALL_DELAY_USER_NORMAL; + + if (kn->kn_sfflags & NOTE_LEEWAY) + timer_flags |= THREAD_CALL_DELAY_LEEWAY; + + thread_call_enter_delayed_with_leeway(kn->kn_hook, NULL, + kn->kn_ext[0], kn->kn_ext[1], timer_flags); + kn->kn_hookid |= TIMER_RUNNING; } } - return 1; - } + return (1); + } /* user-query */ filt_timerlock(); @@ -899,18 +984,19 @@ filt_timer(struct knote *kn, long hint) result = (kn->kn_data != 0); filt_timerunlock(); - return result; + + return (result); } /* * filt_timertouch - update knote with new user input * - * Cancel and restart the timer based on new user data. When - * the user picks up a knote, clear the count of how many timer - * pops have gone off (in kn_data). + * Cancel and restart the timer based on new user data. When + * the user picks up a knote, clear the count of how many timer + * pops have gone off (in kn_data). */ -static void +static void filt_timertouch(struct knote *kn, struct kevent64_s *kev, long type) { int error; @@ -924,6 +1010,8 @@ filt_timertouch(struct knote *kn, struct kevent64_s *kev, long type) /* recalculate deadline */ kn->kn_sdata = kev->data; kn->kn_sfflags = kev->fflags; + kn->kn_ext[0] = kev->ext[0]; + kn->kn_ext[1] = kev->ext[1]; error = filt_timervalidate(kn); if (error) { @@ -931,12 +1019,26 @@ filt_timertouch(struct knote *kn, struct kevent64_s *kev, long type) kn->kn_flags |= EV_ERROR; kn->kn_data = error; break; - } + } /* start timer if necessary */ filt_timerupdate(kn); + if (kn->kn_ext[0]) { - thread_call_enter_delayed(kn->kn_hook, kn->kn_ext[0]); + unsigned int timer_flags = 0; + if (kn->kn_sfflags & NOTE_CRITICAL) + timer_flags |= THREAD_CALL_DELAY_USER_CRITICAL; + else if (kn->kn_sfflags & NOTE_BACKGROUND) + timer_flags |= THREAD_CALL_DELAY_USER_BACKGROUND; + else + timer_flags |= THREAD_CALL_DELAY_USER_NORMAL; + + if (kn->kn_sfflags & NOTE_LEEWAY) + timer_flags |= THREAD_CALL_DELAY_LEEWAY; + + thread_call_enter_delayed_with_leeway(kn->kn_hook, NULL, + kn->kn_ext[0], kn->kn_ext[1], timer_flags); + kn->kn_hookid |= TIMER_RUNNING; } else { /* pretend the timer has fired */ @@ -954,7 +1056,7 @@ filt_timertouch(struct knote *kn, struct kevent64_s *kev, long type) kn->kn_fflags = 0; break; default: - panic("filt_timertouch() - invalid type (%ld)", type); + panic("%s: - invalid type (%ld)", __func__, type); break; } @@ -976,114 +1078,114 @@ filt_timerunlock(void) static int filt_userattach(struct knote *kn) { - /* EVFILT_USER knotes are not attached to anything in the kernel */ - kn->kn_hook = NULL; + /* EVFILT_USER knotes are not attached to anything in the kernel */ + kn->kn_hook = NULL; if (kn->kn_fflags & NOTE_TRIGGER) { kn->kn_hookid = 1; } else { kn->kn_hookid = 0; } - return 0; + return (0); } static void filt_userdetach(__unused struct knote *kn) { - /* EVFILT_USER knotes are not attached to anything in the kernel */ + /* EVFILT_USER knotes are not attached to anything in the kernel */ } static int filt_user(struct knote *kn, __unused long hint) { - return kn->kn_hookid; + return (kn->kn_hookid); } static void filt_usertouch(struct knote *kn, struct kevent64_s *kev, long type) { - uint32_t ffctrl; - switch (type) { - case EVENT_REGISTER: - if (kev->fflags & NOTE_TRIGGER) { - kn->kn_hookid = 1; - } - - ffctrl = kev->fflags & NOTE_FFCTRLMASK; - kev->fflags &= NOTE_FFLAGSMASK; - switch (ffctrl) { - case NOTE_FFNOP: - break; - case NOTE_FFAND: - OSBitAndAtomic(kev->fflags, &kn->kn_sfflags); - break; - case NOTE_FFOR: - OSBitOrAtomic(kev->fflags, &kn->kn_sfflags); - break; - case NOTE_FFCOPY: - kn->kn_sfflags = kev->fflags; - break; - } - kn->kn_sdata = kev->data; - break; - case EVENT_PROCESS: - *kev = kn->kn_kevent; - kev->fflags = (volatile UInt32)kn->kn_sfflags; - kev->data = kn->kn_sdata; - if (kn->kn_flags & EV_CLEAR) { + uint32_t ffctrl; + switch (type) { + case EVENT_REGISTER: + if (kev->fflags & NOTE_TRIGGER) { + kn->kn_hookid = 1; + } + + ffctrl = kev->fflags & NOTE_FFCTRLMASK; + kev->fflags &= NOTE_FFLAGSMASK; + switch (ffctrl) { + case NOTE_FFNOP: + break; + case NOTE_FFAND: + OSBitAndAtomic(kev->fflags, &kn->kn_sfflags); + break; + case NOTE_FFOR: + OSBitOrAtomic(kev->fflags, &kn->kn_sfflags); + break; + case NOTE_FFCOPY: + kn->kn_sfflags = kev->fflags; + break; + } + kn->kn_sdata = kev->data; + break; + case EVENT_PROCESS: + *kev = kn->kn_kevent; + kev->fflags = (volatile UInt32)kn->kn_sfflags; + kev->data = kn->kn_sdata; + if (kn->kn_flags & EV_CLEAR) { kn->kn_hookid = 0; kn->kn_data = 0; kn->kn_fflags = 0; } - break; - default: - panic("filt_usertouch() - invalid type (%ld)", type); - break; - } + break; + default: + panic("%s: - invalid type (%ld)", __func__, type); + break; + } } /* * JMM - placeholder for not-yet-implemented filters - */ + */ static int filt_badattach(__unused struct knote *kn) { - return(ENOTSUP); + return (ENOTSUP); } - struct kqueue * kqueue_alloc(struct proc *p) { struct filedesc *fdp = p->p_fd; struct kqueue *kq; - MALLOC_ZONE(kq, struct kqueue *, sizeof(struct kqueue), M_KQUEUE, M_WAITOK); + MALLOC_ZONE(kq, struct kqueue *, sizeof (struct kqueue), M_KQUEUE, + M_WAITOK); if (kq != NULL) { wait_queue_set_t wqs; - wqs = wait_queue_set_alloc(SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST); + wqs = wait_queue_set_alloc(SYNC_POLICY_FIFO | + SYNC_POLICY_PREPOST); if (wqs != NULL) { - bzero(kq, sizeof(struct kqueue)); + bzero(kq, sizeof (struct kqueue)); lck_spin_init(&kq->kq_lock, kq_lck_grp, kq_lck_attr); TAILQ_INIT(&kq->kq_head); kq->kq_wqs = wqs; kq->kq_p = p; } else { - FREE_ZONE(kq, sizeof(struct kqueue), M_KQUEUE); + FREE_ZONE(kq, sizeof (struct kqueue), M_KQUEUE); } } if (fdp->fd_knlistsize < 0) { proc_fdlock(p); if (fdp->fd_knlistsize < 0) - fdp->fd_knlistsize = 0; /* this process has had a kq */ + fdp->fd_knlistsize = 0; /* this process has had a kq */ proc_fdunlock(p); } - return kq; + return (kq); } - /* * kqueue_dealloc - detach all knotes from a kqueue and free it * @@ -1149,24 +1251,25 @@ kqueue_dealloc(struct kqueue *kq) } proc_fdunlock(p); - /* + /* * before freeing the wait queue set for this kqueue, * make sure it is unlinked from all its containing (select) sets. */ wait_queue_unlink_all((wait_queue_t)kq->kq_wqs); wait_queue_set_free(kq->kq_wqs); lck_spin_destroy(&kq->kq_lock, kq_lck_grp); - FREE_ZONE(kq, sizeof(struct kqueue), M_KQUEUE); + FREE_ZONE(kq, sizeof (struct kqueue), M_KQUEUE); } int -kqueue(struct proc *p, __unused struct kqueue_args *uap, int32_t *retval) +kqueue_body(struct proc *p, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval) { struct kqueue *kq; struct fileproc *fp; int fd, error; - error = falloc(p, &fp, &fd, vfs_context_current()); + error = falloc_withalloc(p, + &fp, &fd, vfs_context_current(), fp_zalloc, cra); if (error) { return (error); } @@ -1178,9 +1281,8 @@ kqueue(struct proc *p, __unused struct kqueue_args *uap, int32_t *retval) } fp->f_flag = FREAD | FWRITE; - fp->f_type = DTYPE_KQUEUE; fp->f_ops = &kqueueops; - fp->f_data = (caddr_t)kq; + fp->f_data = kq; proc_fdlock(p); procfdtbl_releasefd(p, fd, NULL); @@ -1191,23 +1293,30 @@ kqueue(struct proc *p, __unused struct kqueue_args *uap, int32_t *retval) return (error); } +int +kqueue(struct proc *p, __unused struct kqueue_args *uap, int32_t *retval) +{ + return (kqueue_body(p, fileproc_alloc_init, NULL, retval)); +} + static int -kevent_copyin(user_addr_t *addrp, struct kevent64_s *kevp, struct proc *p, int iskev64) +kevent_copyin(user_addr_t *addrp, struct kevent64_s *kevp, struct proc *p, + int iskev64) { int advance; int error; if (iskev64) { - advance = sizeof(struct kevent64_s); + advance = sizeof (struct kevent64_s); error = copyin(*addrp, (caddr_t)kevp, advance); } else if (IS_64BIT_PROCESS(p)) { struct user64_kevent kev64; - bzero(kevp, sizeof(struct kevent64_s)); + bzero(kevp, sizeof (struct kevent64_s)); - advance = sizeof(kev64); + advance = sizeof (kev64); error = copyin(*addrp, (caddr_t)&kev64, advance); if (error) - return error; + return (error); kevp->ident = kev64.ident; kevp->filter = kev64.filter; kevp->flags = kev64.flags; @@ -1216,12 +1325,12 @@ kevent_copyin(user_addr_t *addrp, struct kevent64_s *kevp, struct proc *p, int i kevp->udata = kev64.udata; } else { struct user32_kevent kev32; - bzero(kevp, sizeof(struct kevent64_s)); + bzero(kevp, sizeof (struct kevent64_s)); - advance = sizeof(kev32); + advance = sizeof (kev32); error = copyin(*addrp, (caddr_t)&kev32, advance); if (error) - return error; + return (error); kevp->ident = (uintptr_t)kev32.ident; kevp->filter = kev32.filter; kevp->flags = kev32.flags; @@ -1231,17 +1340,18 @@ kevent_copyin(user_addr_t *addrp, struct kevent64_s *kevp, struct proc *p, int i } if (!error) *addrp += advance; - return error; + return (error); } static int -kevent_copyout(struct kevent64_s *kevp, user_addr_t *addrp, struct proc *p, int iskev64) +kevent_copyout(struct kevent64_s *kevp, user_addr_t *addrp, struct proc *p, + int iskev64) { int advance; int error; if (iskev64) { - advance = sizeof(struct kevent64_s); + advance = sizeof (struct kevent64_s); error = copyout((caddr_t)kevp, *addrp, advance); } else if (IS_64BIT_PROCESS(p)) { struct user64_kevent kev64; @@ -1251,14 +1361,14 @@ kevent_copyout(struct kevent64_s *kevp, user_addr_t *addrp, struct proc *p, int * value of (uintptr_t)-1. */ kev64.ident = (kevp->ident == (uintptr_t)-1) ? - (uint64_t)-1LL : (uint64_t)kevp->ident; + (uint64_t)-1LL : (uint64_t)kevp->ident; kev64.filter = kevp->filter; kev64.flags = kevp->flags; kev64.fflags = kevp->fflags; kev64.data = (int64_t) kevp->data; kev64.udata = kevp->udata; - advance = sizeof(kev64); + advance = sizeof (kev64); error = copyout((caddr_t)&kev64, *addrp, advance); } else { struct user32_kevent kev32; @@ -1269,12 +1379,12 @@ kevent_copyout(struct kevent64_s *kevp, user_addr_t *addrp, struct proc *p, int kev32.fflags = kevp->fflags; kev32.data = (int32_t)kevp->data; kev32.udata = kevp->udata; - advance = sizeof(kev32); + advance = sizeof (kev32); error = copyout((caddr_t)&kev32, *addrp, advance); } if (!error) *addrp += advance; - return error; + return (error); } /* @@ -1318,38 +1428,38 @@ kevent_continue(__unused struct kqueue *kq, void *data, int error) int kevent(struct proc *p, struct kevent_args *uap, int32_t *retval) { - return kevent_internal(p, - 0, - uap->changelist, - uap->nchanges, - uap->eventlist, - uap->nevents, - uap->fd, - uap->timeout, - 0, /* no flags from old kevent() call */ - retval); -} - + return (kevent_internal(p, + 0, + uap->changelist, + uap->nchanges, + uap->eventlist, + uap->nevents, + uap->fd, + uap->timeout, + 0, /* no flags from old kevent() call */ + retval)); +} + int kevent64(struct proc *p, struct kevent64_args *uap, int32_t *retval) { - return kevent_internal(p, - 1, - uap->changelist, - uap->nchanges, - uap->eventlist, - uap->nevents, - uap->fd, - uap->timeout, - uap->flags, - retval); + return (kevent_internal(p, + 1, + uap->changelist, + uap->nchanges, + uap->eventlist, + uap->nevents, + uap->fd, + uap->timeout, + uap->flags, + retval)); } static int -kevent_internal(struct proc *p, int iskev64, user_addr_t changelist, - int nchanges, user_addr_t ueventlist, int nevents, int fd, - user_addr_t utimeout, __unused unsigned int flags, - int32_t *retval) +kevent_internal(struct proc *p, int iskev64, user_addr_t changelist, + int nchanges, user_addr_t ueventlist, int nevents, int fd, + user_addr_t utimeout, __unused unsigned int flags, + int32_t *retval) { struct _kevent *cont_args; uthread_t ut; @@ -1375,9 +1485,9 @@ kevent_internal(struct proc *p, int iskev64, user_addr_t changelist, TIMESPEC_TO_TIMEVAL(&rtv, &ts); } if (error) - return error; + return (error); if (itimerfix(&rtv)) - return EINVAL; + return (EINVAL); getmicrouptime(&atv); timevaladd(&atv, &rtv); } else { @@ -1387,8 +1497,8 @@ kevent_internal(struct proc *p, int iskev64, user_addr_t changelist, /* get a usecount for the kq itself */ if ((error = fp_getfkq(p, fd, &fp, &kq)) != 0) - return(error); - + return (error); + /* each kq should only be used for events of one type */ kqlock(kq); if (kq->kq_state & (KQ_KEV32 | KQ_KEV64)) { @@ -1409,7 +1519,7 @@ kevent_internal(struct proc *p, int iskev64, user_addr_t changelist, error = kevent_copyin(&changelist, &kev, p, iskev64); if (error) break; - + kev.flags &= ~EV_SYSFLAGS; error = kevent_register(kq, &kev, p); if ((error || (kev.flags & EV_RECEIPT)) && nevents > 0) { @@ -1437,26 +1547,25 @@ kevent_internal(struct proc *p, int iskev64, user_addr_t changelist, if (nevents > 0 && noutputs == 0 && error == 0) error = kqueue_scan(kq, kevent_callback, - kevent_continue, cont_args, - &atv, p); + kevent_continue, cont_args, + &atv, p); kevent_continue(kq, cont_args, error); errorout: fp_drop(p, fd, fp, 0); - return error; + return (error); } /* * kevent_callback - callback for each individual event * - * called with nothing locked - * caller holds a reference on the kqueue + * called with nothing locked + * caller holds a reference on the kqueue */ - static int -kevent_callback(__unused struct kqueue *kq, struct kevent64_s *kevp, - void *data) +kevent_callback(__unused struct kqueue *kq, struct kevent64_s *kevp, + void *data) { struct _kevent *cont_args; int error; @@ -1470,38 +1579,40 @@ kevent_callback(__unused struct kqueue *kq, struct kevent64_s *kevp, /* * Copy out the appropriate amount of event data for this user. */ - error = kevent_copyout(kevp, &cont_args->eventlist, current_proc(), iskev64); + error = kevent_copyout(kevp, &cont_args->eventlist, current_proc(), + iskev64); /* * If there isn't space for additional events, return * a harmless error to stop the processing here */ if (error == 0 && ++cont_args->eventout == cont_args->eventcount) - error = EWOULDBLOCK; - return error; + error = EWOULDBLOCK; + return (error); } /* * kevent_description - format a description of a kevent for diagnostic output * - * called with a 128-byte string buffer + * called with a 128-byte string buffer */ char * kevent_description(struct kevent64_s *kevp, char *s, size_t n) { - snprintf(s, n, - "kevent=" - "{.ident=%#llx, .filter=%d, .flags=%#x, .fflags=%#x, .data=%#llx, .udata=%#llx, .ext[0]=%#llx, .ext[1]=%#llx}", - kevp->ident, - kevp->filter, - kevp->flags, - kevp->fflags, - kevp->data, - kevp->udata, - kevp->ext[0], - kevp->ext[1]); - return s; + snprintf(s, n, + "kevent=" + "{.ident=%#llx, .filter=%d, .flags=%#x, .fflags=%#x, .data=%#llx, .udata=%#llx, .ext[0]=%#llx, .ext[1]=%#llx}", + kevp->ident, + kevp->filter, + kevp->flags, + kevp->fflags, + kevp->data, + kevp->udata, + kevp->ext[0], + kevp->ext[1]); + + return (s); } /* @@ -1519,7 +1630,8 @@ kevent_description(struct kevent64_s *kevp, char *s, size_t n) */ int -kevent_register(struct kqueue *kq, struct kevent64_s *kev, __unused struct proc *ctxp) +kevent_register(struct kqueue *kq, struct kevent64_s *kev, + __unused struct proc *ctxp) { struct proc *p = kq->kq_p; struct filedesc *fdp = p->p_fd; @@ -1542,12 +1654,12 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, __unused struct proc return (EINVAL); } - restart: +restart: /* this iocount needs to be dropped if it is not registered */ proc_fdlock(p); if (fops->f_isfd && (error = fp_lookup(p, kev->ident, &fp, 1)) != 0) { proc_fdunlock(p); - return(error); + return (error); } if (fops->f_isfd) { @@ -1562,7 +1674,7 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, __unused struct proc /* hash non-fd knotes here too */ if (fdp->fd_knhashmask != 0) { struct klist *list; - + list = &fdp->fd_knhash[ KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)]; SLIST_FOREACH(kn, list, kn_link) @@ -1650,7 +1762,7 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, __unused struct proc /* existing knote - get kqueue lock */ kqlock(kq); proc_fdunlock(p); - + if (kev->flags & EV_DELETE) { knote_dequeue(kn); kn->kn_status |= KN_DISABLED; @@ -1660,7 +1772,7 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, __unused struct proc } goto done; } - + /* update status flags for existing knote */ if (kev->flags & EV_DISABLE) { knote_dequeue(kn); @@ -1673,13 +1785,13 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, __unused struct proc /* * The user may change some filter values after the - * initial EV_ADD, but doing so will not reset any + * initial EV_ADD, but doing so will not reset any * filter which have already been triggered. */ kn->kn_kevent.udata = kev->udata; if (fops->f_isfd || fops->f_touch == NULL) { - kn->kn_sfflags = kev->fflags; - kn->kn_sdata = kev->data; + kn->kn_sfflags = kev->fflags; + kn->kn_sdata = kev->data; } /* @@ -1700,7 +1812,7 @@ kevent_register(struct kqueue *kq, struct kevent64_s *kev, __unused struct proc * in filter values. */ if (!fops->f_isfd && fops->f_touch != NULL) - fops->f_touch(kn, kev, EVENT_REGISTER); + fops->f_touch(kn, kev, EVENT_REGISTER); } /* still have use ref on knote */ @@ -1738,11 +1850,11 @@ done: * kqueue locked on entry and exit - but may be dropped */ static int -knote_process(struct knote *kn, - kevent_callback_t callback, - void *data, - struct kqtailq *inprocessp, - struct proc *p) +knote_process(struct knote *kn, + kevent_callback_t callback, + void *data, + struct kqtailq *inprocessp, + struct proc *p) { struct kqueue *kq = kn->kn_kq; struct kevent64_s kev; @@ -1771,52 +1883,67 @@ knote_process(struct knote *kn, result = 1; revalidate = ((kn->kn_status & KN_STAYQUEUED) != 0 || - (kn->kn_flags & EV_ONESHOT) == 0); - touch = (!kn->kn_fop->f_isfd && kn->kn_fop->f_touch != NULL); + (kn->kn_flags & EV_ONESHOT) == 0); + touch = (!kn->kn_fop->f_isfd && kn->kn_fop->f_touch != NULL); if (revalidate || touch) { if (revalidate) knote_deactivate(kn); - + /* call the filter/touch routines with just a ref */ if (kqlock2knoteuse(kq, kn)) { - /* if we have to revalidate, call the filter */ if (revalidate) { result = kn->kn_fop->f_event(kn, 0); } - /* capture the kevent data - using touch if specified */ + /* + * capture the kevent data - using touch if + * specified + */ if (result && touch) { - kn->kn_fop->f_touch(kn, &kev, EVENT_PROCESS); + kn->kn_fop->f_touch(kn, &kev, + EVENT_PROCESS); } - /* convert back to a kqlock - bail if the knote went away */ + /* + * convert back to a kqlock - bail if the knote + * went away + */ if (!knoteuse2kqlock(kq, kn)) { - return EJUSTRETURN; + return (EJUSTRETURN); } else if (result) { - /* if revalidated as alive, make sure it's active */ + /* + * if revalidated as alive, make sure + * it's active + */ if (!(kn->kn_status & KN_ACTIVE)) { knote_activate(kn, 0); } - /* capture all events that occurred during filter */ + /* + * capture all events that occurred + * during filter + */ if (!touch) { kev = kn->kn_kevent; } } else if ((kn->kn_status & KN_STAYQUEUED) == 0) { - /* was already dequeued, so just bail on this one */ - return EJUSTRETURN; + /* + * was already dequeued, so just bail on + * this one + */ + return (EJUSTRETURN); } } else { - return EJUSTRETURN; + return (EJUSTRETURN); } } else { kev = kn->kn_kevent; } } - + /* move knote onto inprocess queue */ assert(kn->kn_tq == &kq->kq_head); TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); @@ -1833,7 +1960,7 @@ knote_process(struct knote *kn, */ if (result == 0) { - return EJUSTRETURN; + return (EJUSTRETURN); } else if ((kn->kn_flags & EV_ONESHOT) != 0) { knote_deactivate(kn); if (kqlock2knotedrop(kq, kn)) { @@ -1867,9 +1994,9 @@ knote_process(struct knote *kn, /* callback to handle each event as we find it */ error = (callback)(kq, &kev, data); - + kqlock(kq); - return error; + return (error); } /* @@ -1884,19 +2011,20 @@ kqueue_begin_processing(struct kqueue *kq) { for (;;) { if (kq->kq_count == 0) { - return -1; + return (-1); } /* if someone else is processing the queue, wait */ if (kq->kq_nprocess != 0) { - wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, &kq->kq_nprocess, THREAD_UNINT, 0); + wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, + &kq->kq_nprocess, THREAD_UNINT, 0); kq->kq_state |= KQ_PROCWAIT; kqunlock(kq); thread_block(THREAD_CONTINUE_NULL); kqlock(kq); } else { kq->kq_nprocess = 1; - return 0; + return (0); } } } @@ -1910,7 +2038,8 @@ kqueue_end_processing(struct kqueue *kq) kq->kq_nprocess = 0; if (kq->kq_state & KQ_PROCWAIT) { kq->kq_state &= ~KQ_PROCWAIT; - wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kq->kq_nprocess, THREAD_AWAKENED); + wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, + &kq->kq_nprocess, THREAD_AWAKENED); } } @@ -1931,27 +2060,27 @@ kqueue_end_processing(struct kqueue *kq) static int kqueue_process(struct kqueue *kq, - kevent_callback_t callback, - void *data, - int *countp, - struct proc *p) + kevent_callback_t callback, + void *data, + int *countp, + struct proc *p) { - struct kqtailq inprocess; + struct kqtailq inprocess; struct knote *kn; int nevents; int error; - TAILQ_INIT(&inprocess); + TAILQ_INIT(&inprocess); if (kqueue_begin_processing(kq) == -1) { *countp = 0; /* Nothing to process */ - return 0; + return (0); } /* - * Clear any pre-posted status from previous runs, so we only - * detect events that occur during this run. + * Clear any pre-posted status from previous runs, so we + * only detect events that occur during this run. */ wait_queue_sub_clearrefs(kq->kq_wqs); @@ -1964,7 +2093,7 @@ kqueue_process(struct kqueue *kq, nevents = 0; while (error == 0 && - (kn = TAILQ_FIRST(&kq->kq_head)) != NULL) { + (kn = TAILQ_FIRST(&kq->kq_head)) != NULL) { error = knote_process(kn, callback, data, &inprocess, p); if (error == EJUSTRETURN) error = 0; @@ -1987,7 +2116,7 @@ kqueue_process(struct kqueue *kq, kqueue_end_processing(kq); *countp = nevents; - return error; + return (error); } @@ -2005,10 +2134,11 @@ kqueue_scan_continue(void *data, wait_result_t wait_result) switch (wait_result) { case THREAD_AWAKENED: kqlock(kq); - error = kqueue_process(kq, cont_args->call, cont_args, &count, current_proc()); + error = kqueue_process(kq, cont_args->call, cont_args, &count, + current_proc()); if (error == 0 && count == 0) { - wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, KQ_EVENT, - THREAD_ABORTSAFE, cont_args->deadline); + wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, + KQ_EVENT, THREAD_ABORTSAFE, cont_args->deadline); kq->kq_state |= KQ_SLEEP; kqunlock(kq); thread_block_parameter(kqueue_scan_continue, kq); @@ -2017,16 +2147,17 @@ kqueue_scan_continue(void *data, wait_result_t wait_result) kqunlock(kq); break; case THREAD_TIMED_OUT: - error = EWOULDBLOCK; + error = EWOULDBLOCK; break; case THREAD_INTERRUPTED: error = EINTR; break; default: - panic("kevent_scan_cont() - invalid wait_result (%d)", wait_result); + panic("%s: - invalid wait_result (%d)", __func__, + wait_result); error = 0; } - + /* call the continuation with the results */ assert(cont_args->cont != NULL); (cont_args->cont)(kq, cont_args->data, error); @@ -2048,7 +2179,7 @@ kqueue_scan_continue(void *data, wait_result_t wait_result) */ int -kqueue_scan(struct kqueue *kq, +kqueue_scan(struct kqueue *kq, kevent_callback_t callback, kqueue_continue_t continuation, void *data, @@ -2069,7 +2200,7 @@ kqueue_scan(struct kqueue *kq, /* * Make a pass through the kq to find events already - * triggered. + * triggered. */ kqlock(kq); error = kqueue_process(kq, callback, data, &count, p); @@ -2082,10 +2213,10 @@ kqueue_scan(struct kqueue *kq, /* convert the timeout to a deadline once */ if (atvp->tv_sec || atvp->tv_usec) { uint64_t now; - + clock_get_uptime(&now); nanoseconds_to_absolutetime((uint64_t)atvp->tv_sec * NSEC_PER_SEC + - atvp->tv_usec * NSEC_PER_USEC, + atvp->tv_usec * (long)NSEC_PER_USEC, &deadline); if (now >= deadline) { /* non-blocking call */ @@ -2101,7 +2232,7 @@ kqueue_scan(struct kqueue *kq, if (continuation) { uthread_t ut = (uthread_t)get_bsdthread_info(current_thread()); struct _kqueue_scan *cont_args = &ut->uu_kevent.ss_kqueue_scan; - + cont_args->call = callback; cont_args->cont = continuation; cont_args->deadline = deadline; @@ -2111,7 +2242,9 @@ kqueue_scan(struct kqueue *kq, } /* go ahead and wait */ - wait_queue_assert_wait((wait_queue_t)kq->kq_wqs, KQ_EVENT, THREAD_ABORTSAFE, deadline); + wait_queue_assert_wait_with_leeway((wait_queue_t)kq->kq_wqs, + KQ_EVENT, THREAD_ABORTSAFE, TIMEOUT_URGENCY_USER_NORMAL, + deadline, 0); kq->kq_state |= KQ_SLEEP; kqunlock(kq); wait_result = thread_block_parameter(cont, kq); @@ -2121,17 +2254,17 @@ kqueue_scan(struct kqueue *kq, case THREAD_AWAKENED: continue; case THREAD_TIMED_OUT: - return EWOULDBLOCK; + return (EWOULDBLOCK); case THREAD_INTERRUPTED: - return EINTR; + return (EINTR); default: - panic("kevent_scan - bad wait_result (%d)", - wait_result); + panic("%s: - bad wait_result (%d)", __func__, + wait_result); error = 0; } } kqunlock(kq); - return error; + return (error); } @@ -2141,50 +2274,51 @@ kqueue_scan(struct kqueue *kq, */ /*ARGSUSED*/ static int -kqueue_read(__unused struct fileproc *fp, - __unused struct uio *uio, - __unused int flags, - __unused vfs_context_t ctx) +kqueue_read(__unused struct fileproc *fp, + __unused struct uio *uio, + __unused int flags, + __unused vfs_context_t ctx) { return (ENXIO); } /*ARGSUSED*/ static int -kqueue_write(__unused struct fileproc *fp, - __unused struct uio *uio, - __unused int flags, - __unused vfs_context_t ctx) +kqueue_write(__unused struct fileproc *fp, + __unused struct uio *uio, + __unused int flags, + __unused vfs_context_t ctx) { return (ENXIO); } /*ARGSUSED*/ static int -kqueue_ioctl(__unused struct fileproc *fp, - __unused u_long com, - __unused caddr_t data, - __unused vfs_context_t ctx) +kqueue_ioctl(__unused struct fileproc *fp, + __unused u_long com, + __unused caddr_t data, + __unused vfs_context_t ctx) { return (ENOTTY); } /*ARGSUSED*/ static int -kqueue_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx) +kqueue_select(struct fileproc *fp, int which, void *wql, + __unused vfs_context_t ctx) { struct kqueue *kq = (struct kqueue *)fp->f_data; struct knote *kn; struct kqtailq inprocessq; int retnum = 0; - + if (which != FREAD) - return 0; + return (0); TAILQ_INIT(&inprocessq); kqlock(kq); - /* + /* * If this is the first pass, link the wait queue associated with the * the kqueue onto the wait queue set for the select(). Normally we * use selrecord() for this, but it uses the wait queue within the @@ -2193,17 +2327,17 @@ kqueue_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t * (The select() call will unlink them when it ends). */ if (wql != NULL) { - thread_t cur_act = current_thread(); + thread_t cur_act = current_thread(); struct uthread * ut = get_bsdthread_info(cur_act); kq->kq_state |= KQ_SEL; wait_queue_link_noalloc((wait_queue_t)kq->kq_wqs, ut->uu_wqset, - (wait_queue_link_t)wql); + (wait_queue_link_t)wql); } if (kqueue_begin_processing(kq) == -1) { kqunlock(kq); - return 0; + return (0); } if (kq->kq_count != 0) { @@ -2214,7 +2348,7 @@ kqueue_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t * list of knotes to see, and peek at the stay- * queued ones to be really sure. */ - while ((kn = (struct knote*)TAILQ_FIRST(&kq->kq_head)) != NULL) { + while ((kn = (struct knote *)TAILQ_FIRST(&kq->kq_head)) != NULL) { if ((kn->kn_status & KN_STAYQUEUED) == 0) { retnum = 1; goto out; @@ -2235,7 +2369,7 @@ kqueue_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t } else { retnum = 0; } - } + } } } @@ -2249,7 +2383,7 @@ out: kqueue_end_processing(kq); kqunlock(kq); - return retnum; + return (retnum); } /* @@ -2294,7 +2428,7 @@ kqueue_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused vfs_con */ kqlock(parentkq); - if (parentkq->kq_level > 0 && + if (parentkq->kq_level > 0 && parentkq->kq_level < kq->kq_level) { kqunlock(parentkq); @@ -2329,7 +2463,7 @@ kqueue_drain(struct fileproc *fp, __unused vfs_context_t ctx) kqlock(kq); kqueue_wakeup(kq, 1); kqunlock(kq); - return 0; + return (0); } /*ARGSUSED*/ @@ -2371,8 +2505,8 @@ kqueue_wakeup(struct kqueue *kq, int closed) { if ((kq->kq_state & (KQ_SLEEP | KQ_SEL)) != 0 || kq->kq_nprocess > 0) { kq->kq_state &= ~(KQ_SLEEP | KQ_SEL); - wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, KQ_EVENT, - (closed) ? THREAD_INTERRUPTED : THREAD_AWAKENED); + wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, KQ_EVENT, + (closed) ? THREAD_INTERRUPTED : THREAD_AWAKENED); } } @@ -2428,7 +2562,7 @@ knote_attach(struct klist *list, struct knote *kn) { int ret = SLIST_EMPTY(list); SLIST_INSERT_HEAD(list, kn, kn_selnext); - return ret; + return (ret); } /* @@ -2439,12 +2573,12 @@ int knote_detach(struct klist *list, struct knote *kn) { SLIST_REMOVE(list, kn, knote, kn_selnext); - return SLIST_EMPTY(list); + return (SLIST_EMPTY(list)); } /* * For a given knote, link a provided wait queue directly with the kqueue. - * Wakeups will happen via recursive wait queue support. But nothing will move + * Wakeups will happen via recursive wait queue support. But nothing will move * the knote to the active list at wakeup (nothing calls knote()). Instead, * we permanently enqueue them here. * @@ -2461,9 +2595,9 @@ knote_link_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link_t kr = wait_queue_link_noalloc(wq, kq->kq_wqs, wql); if (kr == KERN_SUCCESS) { knote_markstayqueued(kn); - return 0; + return (0); } else { - return EINVAL; + return (EINVAL); } } @@ -2487,7 +2621,7 @@ knote_unlink_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link kn->kn_status &= ~KN_STAYQUEUED; knote_dequeue(kn); kqunlock(kq); - return (kr != KERN_SUCCESS) ? EINVAL : 0; + return ((kr != KERN_SUCCESS) ? EINVAL : 0); } /* @@ -2495,7 +2629,7 @@ knote_unlink_wait_queue(struct knote *kn, struct wait_queue *wq, wait_queue_link * * Essentially an inlined knote_remove & knote_drop * when we know for sure that the thing is a file - * + * * Entered with the proc_fd lock already held. * It returns the same way, but may drop it temporarily. */ @@ -2511,7 +2645,8 @@ knote_fdclose(struct proc *p, int fd) struct kqueue *kq = kn->kn_kq; if (kq->kq_p != p) - panic("knote_fdclose: proc mismatch (kq->kq_p=%p != p=%p)", kq->kq_p, p); + panic("%s: proc mismatch (kq->kq_p=%p != p=%p)", + __func__, kq->kq_p, p); kqlock(kq); proc_fdunlock(p); @@ -2527,7 +2662,7 @@ knote_fdclose(struct proc *p, int fd) kn->kn_fop->f_detach(kn); knote_drop(kn, p); } - + proc_fdlock(p); /* the fd tables may have changed - start over */ @@ -2550,10 +2685,10 @@ knote_fdpattach(struct knote *kn, struct filedesc *fdp, struct proc *p) if ((u_int)fdp->fd_knlistsize <= kn->kn_id) { u_int size = 0; - if (kn->kn_id >= (uint64_t)p->p_rlimit[RLIMIT_NOFILE].rlim_cur + if (kn->kn_id >= (uint64_t)p->p_rlimit[RLIMIT_NOFILE].rlim_cur || kn->kn_id >= (uint64_t)maxfiles) return (EINVAL); - + /* have to grow the fd_knlist */ size = fdp->fd_knlistsize; while (size <= kn->kn_id) @@ -2563,15 +2698,15 @@ knote_fdpattach(struct knote *kn, struct filedesc *fdp, struct proc *p) return (EINVAL); MALLOC(list, struct klist *, - size * sizeof(struct klist *), M_KQUEUE, M_WAITOK); + size * sizeof(struct klist *), M_KQUEUE, M_WAITOK); if (list == NULL) return (ENOMEM); - + bcopy((caddr_t)fdp->fd_knlist, (caddr_t)list, - fdp->fd_knlistsize * sizeof(struct klist *)); + fdp->fd_knlistsize * sizeof(struct klist *)); bzero((caddr_t)list + - fdp->fd_knlistsize * sizeof(struct klist *), - (size - fdp->fd_knlistsize) * sizeof(struct klist *)); + fdp->fd_knlistsize * sizeof(struct klist *), + (size - fdp->fd_knlistsize) * sizeof(struct klist *)); FREE(fdp->fd_knlist, M_KQUEUE); fdp->fd_knlist = list; fdp->fd_knlistsize = size; @@ -2593,7 +2728,7 @@ knote_drop(struct knote *kn, __unused struct proc *ctxp) { struct kqueue *kq = kn->kn_kq; struct proc *p = kq->kq_p; - struct filedesc *fdp = p->p_fd; + struct filedesc *fdp = p->p_fd; struct klist *list; int needswakeup; @@ -2611,7 +2746,8 @@ knote_drop(struct knote *kn, __unused struct proc *ctxp) proc_fdunlock(p); if (needswakeup) - wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kn->kn_status, THREAD_AWAKENED); + wait_queue_wakeup_all((wait_queue_t)kq->kq_wqs, &kn->kn_status, + THREAD_AWAKENED); if (kn->kn_fop->f_isfd) fp_drop(p, kn->kn_id, kn->kn_fp, 0); @@ -2637,7 +2773,7 @@ knote_activate(struct knote *kn, int propagate) /* called with kqueue lock held */ static void knote_deactivate(struct knote *kn) -{ +{ kn->kn_status &= ~KN_ACTIVE; knote_dequeue(kn); } @@ -2651,7 +2787,7 @@ knote_enqueue(struct knote *kn) struct kqtailq *tq = kn->kn_tq; struct kqueue *kq = kn->kn_kq; - TAILQ_INSERT_TAIL(tq, kn, kn_tqe); + TAILQ_INSERT_TAIL(tq, kn, kn_tqe); kn->kn_status |= KN_QUEUED; kq->kq_count++; } @@ -2666,7 +2802,7 @@ knote_dequeue(struct knote *kn) if ((kn->kn_status & (KN_QUEUED | KN_STAYQUEUED)) == KN_QUEUED) { struct kqtailq *tq = kn->kn_tq; - TAILQ_REMOVE(tq, kn, kn_tqe); + TAILQ_REMOVE(tq, kn, kn_tqe); kn->kn_tq = &kq->kq_head; kn->kn_status &= ~KN_QUEUED; kq->kq_count--; @@ -2676,10 +2812,11 @@ knote_dequeue(struct knote *kn) void knote_init(void) { - knote_zone = zinit(sizeof(struct knote), 8192*sizeof(struct knote), 8192, "knote zone"); + knote_zone = zinit(sizeof(struct knote), 8192*sizeof(struct knote), + 8192, "knote zone"); /* allocate kq lock group attribute and group */ - kq_lck_grp_attr= lck_grp_attr_alloc_init(); + kq_lck_grp_attr = lck_grp_attr_alloc_init(); kq_lck_grp = lck_grp_alloc_init("kqueue", kq_lck_grp_attr); @@ -2688,11 +2825,16 @@ knote_init(void) /* Initialize the timer filter lock */ lck_mtx_init(&_filt_timerlock, kq_lck_grp, kq_lck_attr); - + #if VM_PRESSURE_EVENTS /* Initialize the vm pressure list lock */ vm_pressure_init(kq_lck_grp, kq_lck_attr); #endif + +#if CONFIG_MEMORYSTATUS + /* Initialize the memorystatus list lock */ + memorystatus_kevent_init(kq_lck_grp, kq_lck_attr); +#endif } SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL) @@ -2719,264 +2861,443 @@ knote_free(struct knote *kn) #include #include +static lck_grp_attr_t *kev_lck_grp_attr; +static lck_attr_t *kev_lck_attr; +static lck_grp_t *kev_lck_grp; +static decl_lck_rw_data(,kev_lck_data); +static lck_rw_t *kev_rwlock = &kev_lck_data; static int kev_attach(struct socket *so, int proto, struct proc *p); static int kev_detach(struct socket *so); -static int kev_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct proc *p); - -struct pr_usrreqs event_usrreqs = { - pru_abort_notsupp, pru_accept_notsupp, kev_attach, pru_bind_notsupp, pru_connect_notsupp, - pru_connect2_notsupp, kev_control, kev_detach, pru_disconnect_notsupp, - pru_listen_notsupp, pru_peeraddr_notsupp, pru_rcvd_notsupp, pru_rcvoob_notsupp, - pru_send_notsupp, pru_sense_null, pru_shutdown_notsupp, pru_sockaddr_notsupp, - pru_sosend_notsupp, soreceive, pru_sopoll_notsupp +static int kev_control(struct socket *so, u_long cmd, caddr_t data, + struct ifnet *ifp, struct proc *p); +static lck_mtx_t * event_getlock(struct socket *, int); +static int event_lock(struct socket *, int, void *); +static int event_unlock(struct socket *, int, void *); + +static int event_sofreelastref(struct socket *); +static void kev_delete(struct kern_event_pcb *); + +static struct pr_usrreqs event_usrreqs = { + .pru_attach = kev_attach, + .pru_control = kev_control, + .pru_detach = kev_detach, + .pru_soreceive = soreceive, }; -struct protosw eventsw[] = { - { - .pr_type = SOCK_RAW, - .pr_domain = &systemdomain, - .pr_protocol = SYSPROTO_EVENT, - .pr_flags = PR_ATOMIC, - .pr_usrreqs = &event_usrreqs, - } +static struct protosw eventsw[] = { +{ + .pr_type = SOCK_RAW, + .pr_protocol = SYSPROTO_EVENT, + .pr_flags = PR_ATOMIC, + .pr_usrreqs = &event_usrreqs, + .pr_lock = event_lock, + .pr_unlock = event_unlock, + .pr_getlock = event_getlock, +} }; +static lck_mtx_t * +event_getlock(struct socket *so, int locktype) +{ +#pragma unused(locktype) + struct kern_event_pcb *ev_pcb = (struct kern_event_pcb *)so->so_pcb; + + if (so->so_pcb != NULL) { + if (so->so_usecount < 0) + panic("%s: so=%p usecount=%d lrh= %s\n", __func__, + so, so->so_usecount, solockhistory_nr(so)); + /* NOTREACHED */ + } else { + panic("%s: so=%p NULL NO so_pcb %s\n", __func__, + so, solockhistory_nr(so)); + /* NOTREACHED */ + } + return (&ev_pcb->evp_mtx); +} + +static int +event_lock(struct socket *so, int refcount, void *lr) +{ + void *lr_saved; + + if (lr == NULL) + lr_saved = __builtin_return_address(0); + else + lr_saved = lr; + + if (so->so_pcb != NULL) { + lck_mtx_lock(&((struct kern_event_pcb *)so->so_pcb)->evp_mtx); + } else { + panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__, + so, lr_saved, solockhistory_nr(so)); + /* NOTREACHED */ + } + + if (so->so_usecount < 0) { + panic("%s: so=%p so_pcb=%p lr=%p ref=%d lrh= %s\n", __func__, + so, so->so_pcb, lr_saved, so->so_usecount, + solockhistory_nr(so)); + /* NOTREACHED */ + } + + if (refcount) + so->so_usecount++; + + so->lock_lr[so->next_lock_lr] = lr_saved; + so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX; + return (0); +} + +static int +event_unlock(struct socket *so, int refcount, void *lr) +{ + void *lr_saved; + lck_mtx_t *mutex_held; + + if (lr == NULL) + lr_saved = __builtin_return_address(0); + else + lr_saved = lr; + + if (refcount) + so->so_usecount--; + + if (so->so_usecount < 0) { + panic("%s: so=%p usecount=%d lrh= %s\n", __func__, + so, so->so_usecount, solockhistory_nr(so)); + /* NOTREACHED */ + } + if (so->so_pcb == NULL) { + panic("%s: so=%p NO PCB usecount=%d lr=%p lrh= %s\n", __func__, + so, so->so_usecount, (void *)lr_saved, + solockhistory_nr(so)); + /* NOTREACHED */ + } + mutex_held = (&((struct kern_event_pcb *)so->so_pcb)->evp_mtx); + + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); + so->unlock_lr[so->next_unlock_lr] = lr_saved; + so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; + + if (so->so_usecount == 0) { + VERIFY(so->so_flags & SOF_PCBCLEARING); + event_sofreelastref(so); + } else { + lck_mtx_unlock(mutex_held); + } + + return (0); +} + +static int +event_sofreelastref(struct socket *so) +{ + struct kern_event_pcb *ev_pcb = (struct kern_event_pcb *)so->so_pcb; + + lck_mtx_assert(&(ev_pcb->evp_mtx), LCK_MTX_ASSERT_OWNED); + + so->so_pcb = NULL; + + /* + * Disable upcall in the event another thread is in kev_post_msg() + * appending record to the receive socket buffer, since sbwakeup() + * may release the socket lock otherwise. + */ + so->so_rcv.sb_flags &= ~SB_UPCALL; + so->so_snd.sb_flags &= ~SB_UPCALL; + so->so_event = NULL; + lck_mtx_unlock(&(ev_pcb->evp_mtx)); + + lck_mtx_assert(&(ev_pcb->evp_mtx), LCK_MTX_ASSERT_NOTOWNED); + lck_rw_lock_exclusive(kev_rwlock); + LIST_REMOVE(ev_pcb, evp_link); + lck_rw_done(kev_rwlock); + kev_delete(ev_pcb); + + sofreelastref(so, 1); + return (0); +} + +static int event_proto_count = (sizeof (eventsw) / sizeof (struct protosw)); + static struct kern_event_head kern_event_head; static u_int32_t static_event_id = 0; -struct domain *sysdom = &systemdomain; -static lck_mtx_t *sys_mtx; + +#define EVPCB_ZONE_MAX 65536 +#define EVPCB_ZONE_NAME "kerneventpcb" +static struct zone *ev_pcb_zone; /* - * Install the protosw's for the NKE manager. Invoked at - * extension load time + * Install the protosw's for the NKE manager. Invoked at extension load time */ -int -kern_event_init(void) +void +kern_event_init(struct domain *dp) { - int retval; + struct protosw *pr; + int i; + + VERIFY(!(dp->dom_flags & DOM_INITIALIZED)); + VERIFY(dp == systemdomain); + + kev_lck_grp_attr = lck_grp_attr_alloc_init(); + if (kev_lck_grp_attr == NULL) { + panic("%s: lck_grp_attr_alloc_init failed\n", __func__); + /* NOTREACHED */ + } + + kev_lck_grp = lck_grp_alloc_init("Kernel Event Protocol", + kev_lck_grp_attr); + if (kev_lck_grp == NULL) { + panic("%s: lck_grp_alloc_init failed\n", __func__); + /* NOTREACHED */ + } + + kev_lck_attr = lck_attr_alloc_init(); + if (kev_lck_attr == NULL) { + panic("%s: lck_attr_alloc_init failed\n", __func__); + /* NOTREACHED */ + } - if ((retval = net_add_proto(eventsw, &systemdomain)) != 0) { - log(LOG_WARNING, "Can't install kernel events protocol (%d)\n", retval); - return(retval); + lck_rw_init(kev_rwlock, kev_lck_grp, kev_lck_attr); + if (kev_rwlock == NULL) { + panic("%s: lck_mtx_alloc_init failed\n", __func__); + /* NOTREACHED */ } - - /* - * Use the domain mutex for all system event sockets - */ - sys_mtx = sysdom->dom_mtx; - - return(KERN_SUCCESS); + + for (i = 0, pr = &eventsw[0]; i < event_proto_count; i++, pr++) + net_add_proto(pr, dp, 1); + + ev_pcb_zone = zinit(sizeof(struct kern_event_pcb), + EVPCB_ZONE_MAX * sizeof(struct kern_event_pcb), 0, EVPCB_ZONE_NAME); + if (ev_pcb_zone == NULL) { + panic("%s: failed allocating ev_pcb_zone", __func__); + /* NOTREACHED */ + } + zone_change(ev_pcb_zone, Z_EXPAND, TRUE); + zone_change(ev_pcb_zone, Z_CALLERACCT, TRUE); } static int kev_attach(struct socket *so, __unused int proto, __unused struct proc *p) { - int error; - struct kern_event_pcb *ev_pcb; + int error = 0; + struct kern_event_pcb *ev_pcb; - error = soreserve(so, KEV_SNDSPACE, KEV_RECVSPACE); - if (error) - return error; + error = soreserve(so, KEV_SNDSPACE, KEV_RECVSPACE); + if (error != 0) + return (error); - MALLOC(ev_pcb, struct kern_event_pcb *, sizeof(struct kern_event_pcb), M_PCB, M_WAITOK); - if (ev_pcb == 0) - return ENOBUFS; + if ((ev_pcb = (struct kern_event_pcb *)zalloc(ev_pcb_zone)) == NULL) { + return (ENOBUFS); + } + bzero(ev_pcb, sizeof(struct kern_event_pcb)); + lck_mtx_init(&ev_pcb->evp_mtx, kev_lck_grp, kev_lck_attr); - ev_pcb->ev_socket = so; - ev_pcb->vendor_code_filter = 0xffffffff; + ev_pcb->evp_socket = so; + ev_pcb->evp_vendor_code_filter = 0xffffffff; - so->so_pcb = (caddr_t) ev_pcb; - lck_mtx_lock(sys_mtx); - LIST_INSERT_HEAD(&kern_event_head, ev_pcb, ev_link); - lck_mtx_unlock(sys_mtx); + so->so_pcb = (caddr_t) ev_pcb; + lck_rw_lock_exclusive(kev_rwlock); + LIST_INSERT_HEAD(&kern_event_head, ev_pcb, evp_link); + lck_rw_done(kev_rwlock); - return 0; + return (error); } +static void +kev_delete(struct kern_event_pcb *ev_pcb) +{ + VERIFY(ev_pcb != NULL); + lck_mtx_destroy(&ev_pcb->evp_mtx, kev_lck_grp); + zfree(ev_pcb_zone, ev_pcb); +} static int kev_detach(struct socket *so) { - struct kern_event_pcb *ev_pcb = (struct kern_event_pcb *) so->so_pcb; + struct kern_event_pcb *ev_pcb = (struct kern_event_pcb *) so->so_pcb; - if (ev_pcb != 0) { - LIST_REMOVE(ev_pcb, ev_link); - FREE(ev_pcb, M_PCB); - so->so_pcb = 0; + if (ev_pcb != NULL) { + soisdisconnected(so); so->so_flags |= SOF_PCBCLEARING; - } + } - return 0; + return (0); } /* * For now, kev_vendor_code and mbuf_tags use the same * mechanism. */ - errno_t kev_vendor_code_find( const char *string, u_int32_t *out_vendor_code) { if (strlen(string) >= KEV_VENDOR_CODE_MAX_STR_LEN) { - return EINVAL; + return (EINVAL); } - return net_str_id_find_internal(string, out_vendor_code, NSI_VENDOR_CODE, 1); + return (net_str_id_find_internal(string, out_vendor_code, + NSI_VENDOR_CODE, 1)); } -errno_t kev_msg_post(struct kev_msg *event_msg) +errno_t +kev_msg_post(struct kev_msg *event_msg) { - mbuf_tag_id_t min_vendor, max_vendor; - + mbuf_tag_id_t min_vendor, max_vendor; + net_str_id_first_last(&min_vendor, &max_vendor, NSI_VENDOR_CODE); - + if (event_msg == NULL) - return EINVAL; - - /* Limit third parties to posting events for registered vendor codes only */ + return (EINVAL); + + /* + * Limit third parties to posting events for registered vendor codes + * only + */ if (event_msg->vendor_code < min_vendor || - event_msg->vendor_code > max_vendor) - { - return EINVAL; - } - - return kev_post_msg(event_msg); + event_msg->vendor_code > max_vendor) + return (EINVAL); + + return (kev_post_msg(event_msg)); } - -int kev_post_msg(struct kev_msg *event_msg) +int +kev_post_msg(struct kev_msg *event_msg) { - struct mbuf *m, *m2; - struct kern_event_pcb *ev_pcb; - struct kern_event_msg *ev; - char *tmp; - u_int32_t total_size; - int i; + struct mbuf *m, *m2; + struct kern_event_pcb *ev_pcb; + struct kern_event_msg *ev; + char *tmp; + u_int32_t total_size; + int i; /* Verify the message is small enough to fit in one mbuf w/o cluster */ total_size = KEV_MSG_HEADER_SIZE; - + for (i = 0; i < 5; i++) { if (event_msg->dv[i].data_length == 0) break; total_size += event_msg->dv[i].data_length; } - + if (total_size > MLEN) { - return EMSGSIZE; - } - - m = m_get(M_DONTWAIT, MT_DATA); - if (m == 0) - return ENOBUFS; - - ev = mtod(m, struct kern_event_msg *); - total_size = KEV_MSG_HEADER_SIZE; - - tmp = (char *) &ev->event_data[0]; - for (i = 0; i < 5; i++) { - if (event_msg->dv[i].data_length == 0) - break; - - total_size += event_msg->dv[i].data_length; - bcopy(event_msg->dv[i].data_ptr, tmp, - event_msg->dv[i].data_length); - tmp += event_msg->dv[i].data_length; - } - - ev->id = ++static_event_id; - ev->total_size = total_size; - ev->vendor_code = event_msg->vendor_code; - ev->kev_class = event_msg->kev_class; - ev->kev_subclass = event_msg->kev_subclass; - ev->event_code = event_msg->event_code; - - m->m_len = total_size; - lck_mtx_lock(sys_mtx); - for (ev_pcb = LIST_FIRST(&kern_event_head); - ev_pcb; - ev_pcb = LIST_NEXT(ev_pcb, ev_link)) { - - if (ev_pcb->vendor_code_filter != KEV_ANY_VENDOR) { - if (ev_pcb->vendor_code_filter != ev->vendor_code) - continue; - - if (ev_pcb->class_filter != KEV_ANY_CLASS) { - if (ev_pcb->class_filter != ev->kev_class) - continue; - - if ((ev_pcb->subclass_filter != KEV_ANY_SUBCLASS) && - (ev_pcb->subclass_filter != ev->kev_subclass)) - continue; - } - } - - m2 = m_copym(m, 0, m->m_len, M_NOWAIT); - if (m2 == 0) { - m_free(m); - lck_mtx_unlock(sys_mtx); - return ENOBUFS; - } - /* the socket is already locked because we hold the sys_mtx here */ - if (sbappendrecord(&ev_pcb->ev_socket->so_rcv, m2)) - sorwakeup(ev_pcb->ev_socket); - } - - m_free(m); - lck_mtx_unlock(sys_mtx); - return 0; + return (EMSGSIZE); + } + + m = m_get(M_DONTWAIT, MT_DATA); + if (m == 0) + return (ENOBUFS); + + ev = mtod(m, struct kern_event_msg *); + total_size = KEV_MSG_HEADER_SIZE; + + tmp = (char *) &ev->event_data[0]; + for (i = 0; i < 5; i++) { + if (event_msg->dv[i].data_length == 0) + break; + + total_size += event_msg->dv[i].data_length; + bcopy(event_msg->dv[i].data_ptr, tmp, + event_msg->dv[i].data_length); + tmp += event_msg->dv[i].data_length; + } + + ev->id = ++static_event_id; + ev->total_size = total_size; + ev->vendor_code = event_msg->vendor_code; + ev->kev_class = event_msg->kev_class; + ev->kev_subclass = event_msg->kev_subclass; + ev->event_code = event_msg->event_code; + + m->m_len = total_size; + lck_rw_lock_shared(kev_rwlock); + for (ev_pcb = LIST_FIRST(&kern_event_head); + ev_pcb; + ev_pcb = LIST_NEXT(ev_pcb, evp_link)) { + lck_mtx_lock(&ev_pcb->evp_mtx); + if (ev_pcb->evp_socket->so_pcb == NULL) { + lck_mtx_unlock(&ev_pcb->evp_mtx); + continue; + } + if (ev_pcb->evp_vendor_code_filter != KEV_ANY_VENDOR) { + if (ev_pcb->evp_vendor_code_filter != ev->vendor_code) { + lck_mtx_unlock(&ev_pcb->evp_mtx); + continue; + } + + if (ev_pcb->evp_class_filter != KEV_ANY_CLASS) { + if (ev_pcb->evp_class_filter != ev->kev_class) { + lck_mtx_unlock(&ev_pcb->evp_mtx); + continue; + } + + if ((ev_pcb->evp_subclass_filter != KEV_ANY_SUBCLASS) && + (ev_pcb->evp_subclass_filter != ev->kev_subclass)) { + lck_mtx_unlock(&ev_pcb->evp_mtx); + continue; + } + } + } + + m2 = m_copym(m, 0, m->m_len, M_NOWAIT); + if (m2 == 0) { + m_free(m); + lck_mtx_unlock(&ev_pcb->evp_mtx); + lck_rw_done(kev_rwlock); + return (ENOBUFS); + } + if (sbappendrecord(&ev_pcb->evp_socket->so_rcv, m2)) + sorwakeup(ev_pcb->evp_socket); + lck_mtx_unlock(&ev_pcb->evp_mtx); + } + m_free(m); + lck_rw_done(kev_rwlock); + + return (0); } static int -kev_control(struct socket *so, - u_long cmd, - caddr_t data, - __unused struct ifnet *ifp, - __unused struct proc *p) +kev_control(struct socket *so, + u_long cmd, + caddr_t data, + __unused struct ifnet *ifp, + __unused struct proc *p) { struct kev_request *kev_req = (struct kev_request *) data; struct kern_event_pcb *ev_pcb; struct kev_vendor_code *kev_vendor; u_int32_t *id_value = (u_int32_t *) data; - - + switch (cmd) { - case SIOCGKEVID: *id_value = static_event_id; break; - case SIOCSKEVFILT: ev_pcb = (struct kern_event_pcb *) so->so_pcb; - ev_pcb->vendor_code_filter = kev_req->vendor_code; - ev_pcb->class_filter = kev_req->kev_class; - ev_pcb->subclass_filter = kev_req->kev_subclass; + ev_pcb->evp_vendor_code_filter = kev_req->vendor_code; + ev_pcb->evp_class_filter = kev_req->kev_class; + ev_pcb->evp_subclass_filter = kev_req->kev_subclass; break; - case SIOCGKEVFILT: ev_pcb = (struct kern_event_pcb *) so->so_pcb; - kev_req->vendor_code = ev_pcb->vendor_code_filter; - kev_req->kev_class = ev_pcb->class_filter; - kev_req->kev_subclass = ev_pcb->subclass_filter; + kev_req->vendor_code = ev_pcb->evp_vendor_code_filter; + kev_req->kev_class = ev_pcb->evp_class_filter; + kev_req->kev_subclass = ev_pcb->evp_subclass_filter; break; - case SIOCGKEVVENDOR: - kev_vendor = (struct kev_vendor_code*)data; - + kev_vendor = (struct kev_vendor_code *)data; /* Make sure string is NULL terminated */ kev_vendor->vendor_string[KEV_VENDOR_CODE_MAX_STR_LEN-1] = 0; - - return net_str_id_find_internal(kev_vendor->vendor_string, - &kev_vendor->vendor_code, NSI_VENDOR_CODE, 0); - + return (net_str_id_find_internal(kev_vendor->vendor_string, + &kev_vendor->vendor_code, NSI_VENDOR_CODE, 0)); default: - return ENOTSUP; + return (ENOTSUP); } - - return 0; + + return (0); } #endif /* SOCKETS */ @@ -2988,7 +3309,6 @@ fill_kqueueinfo(struct kqueue *kq, struct kqueue_info * kinfo) struct vinfo_stat * st; /* No need for the funnel as fd is kept alive */ - st = &kinfo->kq_stat; st->vst_size = kq->kq_count; @@ -3002,7 +3322,7 @@ fill_kqueueinfo(struct kqueue *kq, struct kqueue_info * kinfo) if (kq->kq_state & KQ_SLEEP) kinfo->kq_state |= PROC_KQUEUE_SLEEP; - return(0); + return (0); } diff --git a/bsd/kern/kern_exec.c b/bsd/kern/kern_exec.c index dde1b3c40..0fb4a347a 100644 --- a/bsd/kern/kern_exec.c +++ b/bsd/kern/kern_exec.c @@ -110,9 +110,12 @@ #include /* ubc_map() */ #include #include +#include #include #include +#include + #include #include @@ -141,6 +144,7 @@ #include #include #include +#include #include @@ -166,7 +170,6 @@ thread_t fork_create_child(task_t parent_task, proc_t child_proc, int inherit_me void vfork_exit(proc_t p, int rv); int setsigvec(proc_t, thread_t, int, struct __kern_sigaction *, boolean_t in_sigstart); extern void proc_apply_task_networkbg_internal(proc_t, thread_t); -int task_set_cpuusage(task_t task, uint64_t percentage, uint64_t interval, uint64_t deadline, int scope); /* * Mach things for which prototypes are unavailable from Mach headers @@ -212,7 +215,7 @@ extern struct savearea *get_user_regs(thread_t); #define IS_EOL(ch) ((ch == '#') || (ch == '\n')) extern vm_map_t bsd_pageable_map; -extern struct fileops vnops; +extern const struct fileops vnops; #define ROUND_PTR(type, addr) \ (type *)( ( (uintptr_t)(addr) + 16 - 1) \ @@ -235,26 +238,8 @@ static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size); static void exec_resettextvp(proc_t, struct image_params *); static int check_for_signature(proc_t, struct image_params *); static void exec_prefault_data(proc_t, struct image_params *, load_result_t *); - -#if !CONFIG_EMBEDDED - -/* Identify process during exec and opt into legacy behaviors */ - -struct legacy_behavior { - uuid_t process_uuid; - uint32_t legacy_mask; -}; - -static const struct legacy_behavior legacy_behaviors[] = -{ - {{ 0xF8, 0x7C, 0xC3, 0x67, 0xFB, 0x68, 0x37, 0x93, 0xBC, 0x34, 0xB2, 0xB6, 0x05, 0x2B, 0xCD, 0xE2 }, PROC_LEGACY_BEHAVIOR_IOTHROTTLE }, - {{ 0x0B, 0x4E, 0xDF, 0xD8, 0x76, 0xD1, 0x3D, 0x4D, 0x9D, 0xD7, 0x37, 0x43, 0x1C, 0xA8, 0xFB, 0x26 }, PROC_LEGACY_BEHAVIOR_IOTHROTTLE }, -}; -#endif /* !CONFIG_EMBEDDED */ - -/* We don't want this one exported */ -__private_extern__ -int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, int32_t *); +static errno_t exec_handle_port_actions(struct image_params *imgp, short psa_flags, int * need_portwatch, ipc_port_t * portwatch); +static errno_t exec_handle_spawnattr_apptype(proc_t p, int psa_apptype); /* * exec_add_user_string @@ -539,7 +524,6 @@ exec_shell_imgact(struct image_params *imgp) return(error); fp->f_fglob->fg_flag = FREAD; - fp->f_fglob->fg_type = DTYPE_VNODE; fp->f_fglob->fg_ops = &vnops; fp->f_fglob->fg_data = (caddr_t)imgp->ip_vp; @@ -720,7 +704,6 @@ exec_mach_imgact(struct image_params *imgp) load_result_t load_result; struct _posix_spawnattr *psa = NULL; int spawn = (imgp->ip_flags & IMGPF_SPAWN); - int apptype = 0; /* * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference @@ -794,8 +777,7 @@ exec_mach_imgact(struct image_params *imgp) goto bad; } grade: - if (!grade_binary(imgp->ip_origcputype & ~CPU_SUBTYPE_LIB64, - imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK)) { + if (!grade_binary(imgp->ip_origcputype, imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK)) { error = EBADARCH; goto bad; } @@ -877,9 +859,10 @@ grade: * requested them on exec. */ if (load_result.csflags & CS_VALID) { - imgp->ip_csflags |= load_result.csflags & + imgp->ip_csflags |= load_result.csflags & (CS_VALID| - CS_HARD|CS_KILL|CS_EXEC_SET_HARD|CS_EXEC_SET_KILL); + CS_HARD|CS_KILL|CS_ENFORCEMENT| + CS_EXEC_SET_HARD|CS_EXEC_SET_KILL|CS_EXEC_SET_ENFORCEMENT); } else { imgp->ip_csflags &= ~CS_VALID; } @@ -888,6 +871,8 @@ grade: imgp->ip_csflags |= CS_HARD; if (p->p_csflags & CS_EXEC_SET_KILL) imgp->ip_csflags |= CS_KILL; + if (p->p_csflags & CS_EXEC_SET_ENFORCEMENT) + imgp->ip_csflags |= CS_ENFORCEMENT; /* @@ -1019,28 +1004,6 @@ grade: memcpy(&p->p_uuid[0], &load_result.uuid[0], sizeof(p->p_uuid)); -#if !CONFIG_EMBEDDED - unsigned int i; - - if (!vfexec && !spawn) { - if (p->p_legacy_behavior & PROC_LEGACY_BEHAVIOR_IOTHROTTLE) { - throttle_legacy_process_decr(); - } - } - - p->p_legacy_behavior = 0; - for (i=0; i < sizeof(legacy_behaviors)/sizeof(legacy_behaviors[0]); i++) { - if (0 == uuid_compare(legacy_behaviors[i].process_uuid, p->p_uuid)) { - p->p_legacy_behavior = legacy_behaviors[i].legacy_mask; - break; - } - } - - if (p->p_legacy_behavior & PROC_LEGACY_BEHAVIOR_IOTHROTTLE) { - throttle_legacy_process_incr(); - } -#endif - // dtrace code cleanup needed #if CONFIG_DTRACE /* @@ -1117,54 +1080,15 @@ grade: proc_unlock(p); (void) task_suspend(p->task); } -#if CONFIG_EMBEDDED - if ((psa->psa_flags & POSIX_SPAWN_IOS_RESV1_APP_START) || (psa->psa_flags & POSIX_SPAWN_IOS_APPLE_DAEMON_START) || (psa->psa_flags & POSIX_SPAWN_IOS_APP_START)) { - if ((psa->psa_flags & POSIX_SPAWN_IOS_RESV1_APP_START)) - apptype = PROC_POLICY_IOS_RESV1_APPTYPE; - else if (psa->psa_flags & POSIX_SPAWN_IOS_APPLE_DAEMON_START) - apptype = PROC_POLICY_IOS_APPLE_DAEMON; - else if (psa->psa_flags & POSIX_SPAWN_IOS_APP_START) - apptype = PROC_POLICY_IOS_APPTYPE; - else - apptype = PROC_POLICY_OSX_APPTYPE_NONE; - proc_set_task_apptype(p->task, apptype, imgp->ip_new_thread); - if (apptype == PROC_POLICY_IOS_RESV1_APPTYPE) - proc_apply_task_networkbg_internal(p, NULL); - } + } - if (psa->psa_apptype & POSIX_SPAWN_APPTYPE_IOS_APPLEDAEMON) { - apptype = PROC_POLICY_IOS_APPLE_DAEMON; - proc_set_task_apptype(p->task, apptype, imgp->ip_new_thread); - } -#else /* CONFIG_EMBEDDED */ - if ((psa->psa_flags & POSIX_SPAWN_OSX_TALAPP_START) || (psa->psa_flags & POSIX_SPAWN_OSX_DBCLIENT_START)) { - if ((psa->psa_flags & POSIX_SPAWN_OSX_TALAPP_START)) - apptype = PROC_POLICY_OSX_APPTYPE_TAL; - else if (psa->psa_flags & POSIX_SPAWN_OSX_DBCLIENT_START) - apptype = PROC_POLICY_OSX_APPTYPE_DBCLIENT; - else - apptype = PROC_POLICY_OSX_APPTYPE_NONE; - proc_set_task_apptype(p->task, apptype, NULL); - if ((apptype == PROC_POLICY_OSX_APPTYPE_TAL) || - (apptype == PROC_POLICY_OSX_APPTYPE_DBCLIENT)) { - proc_apply_task_networkbg_internal(p, NULL); - } - } - if ((psa->psa_apptype & POSIX_SPAWN_APPTYPE_OSX_TAL) || - (psa->psa_apptype & POSIX_SPAWN_APPTYPE_OSX_WIDGET)) { - if ((psa->psa_apptype & POSIX_SPAWN_APPTYPE_OSX_TAL)) - apptype = PROC_POLICY_OSX_APPTYPE_TAL; - else if (psa->psa_flags & POSIX_SPAWN_APPTYPE_OSX_WIDGET) - apptype = PROC_POLICY_OSX_APPTYPE_DBCLIENT; - else - apptype = PROC_POLICY_OSX_APPTYPE_NONE; - proc_set_task_apptype(p->task, apptype, imgp->ip_new_thread); - if ((apptype == PROC_POLICY_OSX_APPTYPE_TAL) || - (apptype == PROC_POLICY_OSX_APPTYPE_DBCLIENT)) { - proc_apply_task_networkbg_internal(p, NULL); - } - } -#endif /* CONFIG_EMBEDDED */ + /* + * Apply the apptype state (which primes the task for importance donation) + * This must be done after the exec so that the child's thread is ready + */ + if (imgp->ip_px_sa != NULL) { + psa = (struct _posix_spawnattr *) imgp->ip_px_sa; + exec_handle_spawnattr_apptype(p, psa->psa_apptype); } /* @@ -1190,7 +1114,9 @@ grade: badtoolate: if (!spawn) - proc_knote(p, NOTE_EXEC); + /* notify only if it has not failed due to FP Key error */ + if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0) + proc_knote(p, NOTE_EXEC); if (vfexec || spawn) { task_deallocate(new_task); @@ -1262,7 +1188,7 @@ exec_activate_image(struct image_params *imgp) error = execargs_alloc(imgp); if (error) - goto bad; + goto bad_notrans; error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg); if (error) { @@ -1348,6 +1274,14 @@ encapsulated_binary: } mac_vnode_label_copy(imgp->ip_vp->v_label, imgp->ip_scriptlabelp); + + /* + * Take a ref of the script vnode for later use. + */ + if (imgp->ip_scriptvp) + vnode_put(imgp->ip_scriptvp); + if (vnode_getwithref(imgp->ip_vp) == 0) + imgp->ip_scriptvp = imgp->ip_vp; #endif nameidone(&nd); @@ -1390,6 +1324,58 @@ bad_notrans: return (error); } + +/* + * exec_handle_spawnattr_apptype + * + * Description: Decode and apply the posix_spawn apptype to the task. + * + * Parameters: proc_t p process to apply attributes to + * int psa_apptype posix spawn attribute apptype + * + * Returns: 0 Success + */ +static errno_t +exec_handle_spawnattr_apptype(proc_t p, int psa_apptype) +{ + if ((psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK) != 0) { + int apptype = TASK_APPTYPE_NONE; + int proctype = psa_apptype & POSIX_SPAWN_PROC_TYPE_MASK; + + switch(proctype) { + case POSIX_SPAWN_PROC_TYPE_DAEMON_INTERACTIVE: + apptype = TASK_APPTYPE_DAEMON_INTERACTIVE; + break; + case POSIX_SPAWN_PROC_TYPE_DAEMON_STANDARD: + apptype = TASK_APPTYPE_DAEMON_STANDARD; + break; + case POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE: + apptype = TASK_APPTYPE_DAEMON_ADAPTIVE; + break; + case POSIX_SPAWN_PROC_TYPE_DAEMON_BACKGROUND: + apptype = TASK_APPTYPE_DAEMON_BACKGROUND; + break; + case POSIX_SPAWN_PROC_TYPE_APP_DEFAULT: + apptype = TASK_APPTYPE_APP_DEFAULT; + break; + case POSIX_SPAWN_PROC_TYPE_APP_TAL: + apptype = TASK_APPTYPE_APP_TAL; + break; + default: + apptype = TASK_APPTYPE_NONE; + break; + } + + proc_set_task_apptype(p->task, apptype); + + /* TODO: Should an invalid value here fail the spawn? */ + return (0); + } + + return (0); +} + + /* * exec_handle_port_actions * @@ -1405,7 +1391,7 @@ bad_notrans: * ENOTSUP Illegal posix_spawn attr flag was set */ static errno_t -exec_handle_port_actions(struct image_params *imgp, short psa_flags) +exec_handle_port_actions(struct image_params *imgp, short psa_flags, int * need_portwatch, ipc_port_t * portwatch_ports) { _posix_spawn_port_actions_t pacts = imgp->ip_px_spa; proc_t p = vfs_context_proc(imgp->ip_vfs_context); @@ -1415,13 +1401,18 @@ exec_handle_port_actions(struct image_params *imgp, short psa_flags) errno_t ret = 0; int i; + if (need_portwatch != NULL) + *need_portwatch = 0; + for (i = 0; i < pacts->pspa_count; i++) { act = &pacts->pspa_actions[i]; if (ipc_object_copyin(get_task_ipcspace(current_task()), act->new_port, MACH_MSG_TYPE_COPY_SEND, - (ipc_object_t *) &port) != KERN_SUCCESS) - return (EINVAL); + (ipc_object_t *) &port) != KERN_SUCCESS) { + ret = EINVAL; + goto done; + } switch (act->port_type) { case PSPA_SPECIAL: @@ -1429,7 +1420,7 @@ exec_handle_port_actions(struct image_params *imgp, short psa_flags) if (!(psa_flags & POSIX_SPAWN_SETEXEC)) ret = ENOTSUP; else if (task_set_special_port(task, - act->which, port) != KERN_SUCCESS) + act->which, port) != KERN_SUCCESS) ret = EINVAL; break; @@ -1438,8 +1429,8 @@ exec_handle_port_actions(struct image_params *imgp, short psa_flags) if (!(psa_flags & POSIX_SPAWN_SETEXEC)) ret = ENOTSUP; else if (task_set_exception_ports(task, - act->mask, port, act->behavior, - act->flavor) != KERN_SUCCESS) + act->mask, port, act->behavior, + act->flavor) != KERN_SUCCESS) ret = EINVAL; break; #if CONFIG_AUDIT @@ -1447,6 +1438,16 @@ exec_handle_port_actions(struct image_params *imgp, short psa_flags) ret = audit_session_spawnjoin(p, port); break; #endif + case PSPA_IMP_WATCHPORTS: + if (portwatch_ports != NULL) { + if (need_portwatch != NULL) + *need_portwatch = 1; + /* hold on to this till end of spawn */ + portwatch_ports[i] = port; + ret = 0; + } else + ipc_port_release_send(port); + break; default: ret = EINVAL; break; @@ -1460,6 +1461,9 @@ exec_handle_port_actions(struct image_params *imgp, short psa_flags) } } +done: + if (0 != ret) + DTRACE_PROC1(spawn__port__failure, mach_port_name_t, act->new_port); return (ret); } @@ -1523,6 +1527,7 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags) &nd, psfa->psfaa_openargs.psfao_oflag, &va, + fileproc_alloc_init, NULL, ival); /* @@ -1588,20 +1593,27 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags) break; case PSFA_INHERIT: { - struct fileproc *fp; - int fd = psfa->psfaa_filedes; + struct fcntl_nocancel_args fcntla; /* * Check to see if the descriptor exists, and * ensure it's -not- marked as close-on-exec. - * [Less code than the equivalent F_GETFD/F_SETFD.] + * + * Attempting to "inherit" a guarded fd will + * result in a error. */ - proc_fdlock(p); - if ((error = fp_lookup(p, fd, &fp, 1)) == 0) { - *fdflags(p, fd) &= ~UF_EXCLOSE; - (void) fp_drop(p, fd, fp, 1); + fcntla.fd = psfa->psfaa_filedes; + fcntla.cmd = F_GETFD; + if ((error = fcntl_nocancel(p, &fcntla, ival)) != 0) + break; + + if ((ival[0] & FD_CLOEXEC) == FD_CLOEXEC) { + fcntla.fd = psfa->psfaa_filedes; + fcntla.cmd = F_SETFD; + fcntla.arg = ival[0] & ~FD_CLOEXEC; + error = fcntl_nocancel(p, &fcntla, ival); } - proc_fdunlock(p); + } break; @@ -1612,8 +1624,15 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags) /* All file actions failures are considered fatal, per POSIX */ - if (error) + if (error) { + if (PSFA_OPEN == psfa->psfaa_type) { + DTRACE_PROC1(spawn__open__failure, uintptr_t, + psfa->psfaa_openargs.psfao_path); + } else { + DTRACE_PROC1(spawn__fd__failure, int, psfa->psfaa_filedes); + } break; + } } if (error != 0 || (psa_flags & POSIX_SPAWN_CLOEXEC_DEFAULT) == 0) @@ -1651,6 +1670,102 @@ exec_handle_file_actions(struct image_params *imgp, short psa_flags) return (0); } +#if CONFIG_MACF +/* + * exec_spawnattr_getmacpolicyinfo + */ +void * +exec_spawnattr_getmacpolicyinfo(const void *macextensions, const char *policyname, size_t *lenp) +{ + const struct _posix_spawn_mac_policy_extensions *psmx = macextensions; + int i; + + if (psmx == NULL) + return NULL; + + for (i = 0; i < psmx->psmx_count; i++) { + const _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i]; + if (strncmp(extension->policyname, policyname, sizeof(extension->policyname)) == 0) { + if (lenp != NULL) + *lenp = extension->datalen; + return extension->datap; + } + } + + if (lenp != NULL) + *lenp = 0; + return NULL; +} + +static int +spawn_copyin_macpolicyinfo(const struct user__posix_spawn_args_desc *px_args, _posix_spawn_mac_policy_extensions_t *psmxp) +{ + _posix_spawn_mac_policy_extensions_t psmx = NULL; + int error = 0; + int copycnt = 0; + int i = 0; + + *psmxp = NULL; + + if (px_args->mac_extensions_size < PS_MAC_EXTENSIONS_SIZE(1) || + px_args->mac_extensions_size > PAGE_SIZE) { + error = EINVAL; + goto bad; + } + + MALLOC(psmx, _posix_spawn_mac_policy_extensions_t, px_args->mac_extensions_size, M_TEMP, M_WAITOK); + if ((error = copyin(px_args->mac_extensions, psmx, px_args->mac_extensions_size)) != 0) + goto bad; + + if (PS_MAC_EXTENSIONS_SIZE(psmx->psmx_count) > px_args->mac_extensions_size) { + error = EINVAL; + goto bad; + } + + for (i = 0; i < psmx->psmx_count; i++) { + _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i]; + if (extension->datalen == 0 || extension->datalen > PAGE_SIZE) { + error = EINVAL; + goto bad; + } + } + + for (copycnt = 0; copycnt < psmx->psmx_count; copycnt++) { + _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[copycnt]; + void *data = NULL; + + MALLOC(data, void *, extension->datalen, M_TEMP, M_WAITOK); + if ((error = copyin(extension->data, data, extension->datalen)) != 0) { + FREE(data, M_TEMP); + goto bad; + } + extension->datap = data; + } + + *psmxp = psmx; + return 0; + +bad: + if (psmx != NULL) { + for (i = 0; i < copycnt; i++) + FREE(psmx->psmx_extensions[i].datap, M_TEMP); + FREE(psmx, M_TEMP); + } + return error; +} + +static void +spawn_free_macpolicyinfo(_posix_spawn_mac_policy_extensions_t psmx) +{ + int i; + + if (psmx == NULL) + return; + for (i = 0; i < psmx->psmx_count; i++) + FREE(psmx->psmx_extensions[i].datap, M_TEMP); + FREE(psmx, M_TEMP); +} +#endif /* CONFIG_MACF */ /* * posix_spawn @@ -1703,6 +1818,9 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) boolean_t spawn_no_exec = FALSE; boolean_t proc_transit_set = TRUE; boolean_t exec_done = FALSE; + int need_portwatch = 0, portwatch_count = 0; + ipc_port_t * portwatch_ports = NULL; + vm_size_t px_sa_offset = offsetof(struct _posix_spawnattr, psa_ports); /* * Allocate a big chunk for locals instead of using stack since these @@ -1746,17 +1864,23 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) px_args.file_actions = CAST_USER_ADDR_T(px_args32.file_actions); px_args.port_actions_size = px_args32.port_actions_size; px_args.port_actions = CAST_USER_ADDR_T(px_args32.port_actions); + px_args.mac_extensions_size = px_args32.mac_extensions_size; + px_args.mac_extensions = CAST_USER_ADDR_T(px_args32.mac_extensions); } if (error) goto bad; if (px_args.attr_size != 0) { /* - * This could lose some of the port_actions pointer, - * but we already have it from px_args. + * We are not copying the port_actions pointer, + * because we already have it from px_args. */ - if ((error = copyin(px_args.attrp, &px_sa, sizeof(px_sa))) != 0) + + + if ((error = copyin(px_args.attrp, &px_sa, px_sa_offset) != 0)) goto bad; + + bzero( (void *)( (unsigned long) &px_sa + px_sa_offset), sizeof(px_sa) - px_sa_offset ); imgp->ip_px_sa = &px_sa; } @@ -1778,6 +1902,12 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) if ((error = copyin(px_args.file_actions, px_sfap, px_args.file_actions_size)) != 0) goto bad; + + /* Verify that the action count matches the struct size */ + if (PSF_ACTIONS_SIZE(px_sfap->psfa_act_count) != px_args.file_actions_size) { + error = EINVAL; + goto bad; + } } if (px_args.port_actions_size != 0) { /* Limit port_actions to one page of data */ @@ -1798,7 +1928,19 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) if ((error = copyin(px_args.port_actions, px_spap, px_args.port_actions_size)) != 0) goto bad; + + /* Verify that the action count matches the struct size */ + if (PS_PORT_ACTIONS_SIZE(px_spap->pspa_count) != px_args.port_actions_size) { + error = EINVAL; + goto bad; + } + } +#if CONFIG_MACF + if (px_args.mac_extensions_size != 0) { + if ((error = spawn_copyin_macpolicyinfo(&px_args, (_posix_spawn_mac_policy_extensions_t *)&imgp->ip_px_smpx)) != 0) + goto bad; } +#endif /* CONFIG_MACF */ } /* set uthread to parent */ @@ -1826,6 +1968,7 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) goto bad; imgp->ip_flags |= IMGPF_SPAWN; /* spawn w/o exec */ spawn_no_exec = TRUE; /* used in later tests */ + DTRACE_PROC1(create, proc_t, p); } if (spawn_no_exec) @@ -1862,13 +2005,27 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) } /* Has spawn port actions? */ - if (imgp->ip_px_spa != NULL) { - /* - * The check for the POSIX_SPAWN_SETEXEC flag is done in - * exec_handle_port_actions(). + if (imgp->ip_px_spa != NULL) { + boolean_t is_adaptive = FALSE; + + /* Will this process become adaptive? The apptype isn't ready yet, so we can't look there. */ + if (imgp->ip_px_sa != NULL && px_sa.psa_apptype == POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE) + is_adaptive = TRUE; + + /* + * portwatch only: + * Allocate a place to store the ports we want to bind to the new task + * We can't bind them until after the apptype is set. */ + if (px_spap->pspa_count != 0 && is_adaptive) { + portwatch_count = px_spap->pspa_count; + MALLOC(portwatch_ports, ipc_port_t *, (sizeof(ipc_port_t) * portwatch_count), M_TEMP, M_WAITOK | M_ZERO); + } else { + portwatch_ports = NULL; + } + if ((error = exec_handle_port_actions(imgp, - imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0)) != 0) + imgp->ip_px_sa != NULL ? px_sa.psa_flags : 0, &need_portwatch, portwatch_ports)) != 0) goto bad; } @@ -1916,6 +2073,11 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) /* * Disable ASLR for the spawned process. */ + /* + * But only do so if we are not embedded; embedded allows for a + * boot-arg (-disable_aslr) to deal with this (which itself is + * only honored on DEVELOPMENT or DEBUG builds of xnu). + */ if (px_sa.psa_flags & _POSIX_SPAWN_DISABLE_ASLR) OSBitOrAtomic(P_DISABLE_ASLR, &p->p_flag); @@ -2023,21 +2185,64 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) * Userland gives us interval in seconds, and the kernel SPI expects nanoseconds. */ if (px_sa.psa_cpumonitor_percent != 0) { + /* + * Always treat a CPU monitor activation coming from spawn as entitled. Requiring + * an entitlement to configure the monitor a certain way seems silly, since + * whomever is turning it on could just as easily choose not to do so. + * + * XXX - Ignore the parameters that we get from userland. The spawnattr method of + * activating the monitor always gets the system default parameters. Once we have + * an explicit spawn SPI for configuring the defaults, we can revert this to + * respect the params passed in from userland. + */ error = proc_set_task_ruse_cpu(p->task, TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC, - px_sa.psa_cpumonitor_percent, - px_sa.psa_cpumonitor_interval * NSEC_PER_SEC, - 0); + PROC_POLICY_CPUMON_DEFAULTS, 0, + 0, TRUE); } } bad: + + if (portwatch_ports != NULL) { + int needboost = 0; + + /* + * Mark the ports as destined to be handed off to the new task, and + * transfer any boosts to the new task. + * We need to release the rights even if the posix_spawn has failed. + */ + if (need_portwatch != 0) { + for (int i = 0; i < portwatch_count; i++) { + ipc_port_t port = NULL; + + if ((port = portwatch_ports[i]) != NULL) { + int boost = 0; + if (error == 0) + task_add_importance_watchport(p->task, p->p_pid, port, &boost); + ipc_port_release_send(port); + needboost += boost; + } + } + } + + if (needboost != 0) { + /* + * Apply the boost count found on the ports, which will keep the + * newly created process out of background until it handles the incoming messages. + */ + task_hold_multiple_assertion(p->task, needboost); + } + + FREE(portwatch_ports, M_TEMP); + portwatch_ports = NULL; + portwatch_count = 0; + } + if (error == 0) { /* reset delay idle sleep status if set */ -#if !CONFIG_EMBEDDED if ((p->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP) OSBitAndAtomic(~((uint32_t)P_DELAYIDLESLEEP), &p->p_flag); -#endif /* !CONFIG_EMBEDDED */ /* upon successful spawn, re/set the proc control state */ if (imgp->ip_px_sa != NULL) { switch (px_sa.psa_pcontrol) { @@ -2055,18 +2260,14 @@ bad: p->p_pcaction = 0; break; }; -#if !CONFIG_EMBEDDED - if ((px_sa.psa_apptype & POSIX_SPAWN_APPTYPE_DELAYIDLESLEEP) != 0) - OSBitOrAtomic(P_DELAYIDLESLEEP, &p->p_flag); -#endif /* !CONFIG_EMBEDDED */ } exec_resettextvp(p, imgp); -#if CONFIG_EMBEDDED +#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM /* Has jetsam attributes? */ - if (imgp->ip_px_sa != NULL) { - memorystatus_list_change((px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY), - p->p_pid, px_sa.psa_priority, -1, px_sa.psa_high_water_mark); + if (imgp->ip_px_sa != NULL && (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_SET)) { + memorystatus_update(p, px_sa.psa_priority, 0, (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY), + TRUE, px_sa.psa_high_water_mark, (px_sa.psa_jetsam_flags & POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND)); } #endif } @@ -2093,8 +2294,9 @@ bad: /* flag the 'fork' has occurred */ proc_knote(p->p_pptr, NOTE_FORK | p->p_pid); /* then flag exec has occurred */ - proc_knote(p, NOTE_EXEC); - DTRACE_PROC1(create, proc_t, p); + /* notify only if it has not failed due to FP Key error */ + if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0) + proc_knote(p, NOTE_EXEC); } /* @@ -2119,6 +2321,8 @@ bad: if (imgp != NULL) { if (imgp->ip_vp) vnode_put(imgp->ip_vp); + if (imgp->ip_scriptvp) + vnode_put(imgp->ip_scriptvp); if (imgp->ip_strings) execargs_free(imgp); if (imgp->ip_px_sfa != NULL) @@ -2127,6 +2331,8 @@ bad: FREE(imgp->ip_px_spa, M_TEMP); #if CONFIG_MACF + if (imgp->ip_px_smpx != NULL) + spawn_free_macpolicyinfo(imgp->ip_px_smpx); if (imgp->ip_execlabelp) mac_cred_label_free(imgp->ip_execlabelp); if (imgp->ip_scriptlabelp) @@ -2134,33 +2340,44 @@ bad: #endif } - if (error) { - DTRACE_PROC1(exec__failure, int, error); +#if CONFIG_DTRACE + if (spawn_no_exec) { + /* + * In the original DTrace reference implementation, + * posix_spawn() was a libc routine that just + * did vfork(2) then exec(2). Thus the proc::: probes + * are very fork/exec oriented. The details of this + * in-kernel implementation of posix_spawn() is different + * (while producing the same process-observable effects) + * particularly w.r.t. errors, and which thread/process + * is constructing what on behalf of whom. + */ + if (error) { + DTRACE_PROC1(spawn__failure, int, error); + } else { + DTRACE_PROC(spawn__success); + /* + * Some DTrace scripts, e.g. newproc.d in + * /usr/bin, rely on the the 'exec-success' + * probe being fired in the child after the + * new process image has been constructed + * in order to determine the associated pid. + * + * So, even though the parent built the image + * here, for compatibility, mark the new thread + * so 'exec-success' fires on it as it leaves + * the kernel. + */ + dtrace_thread_didexec(imgp->ip_new_thread); + } } else { - /* - * temporary - so dtrace call to current_proc() - * returns the child process instead of the parent. - */ - if (imgp != NULL && imgp->ip_flags & IMGPF_SPAWN) { - p->p_lflag |= P_LINVFORK; - p->p_vforkact = current_thread(); - uthread->uu_proc = p; - uthread->uu_flag |= UT_VFORK; - } - - DTRACE_PROC(exec__success); - - /* - * temporary - so dtrace call to current_proc() - * returns the child process instead of the parent. - */ - if (imgp != NULL && imgp->ip_flags & IMGPF_SPAWN) { - p->p_lflag &= ~P_LINVFORK; - p->p_vforkact = NULL; - uthread->uu_proc = PROC_NULL; - uthread->uu_flag &= ~UT_VFORK; - } + if (error) { + DTRACE_PROC1(exec__failure, int, error); + } else { + DTRACE_PROC(exec__success); + } } +#endif /* Return to both the parent and the child? */ if (imgp != NULL && spawn_no_exec) { @@ -2196,7 +2413,7 @@ bad: } else { /* - * Return" to the child + * Return to the child * * Note: the image activator earlier dropped the * task/thread references to the newly spawned @@ -2243,6 +2460,8 @@ execve(proc_t p, struct execve_args *uap, int32_t *retval) struct __mac_execve_args muap; int err; + memoryshot(VM_EXECVE, DBG_FUNC_NONE); + muap.fname = uap->fname; muap.argp = uap->argp; muap.envp = uap->envp; @@ -2340,6 +2559,8 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval) } if (imgp->ip_vp != NULLVP) vnode_put(imgp->ip_vp); + if (imgp->ip_scriptvp != NULLVP) + vnode_put(imgp->ip_scriptvp); if (imgp->ip_strings) execargs_free(imgp); #if CONFIG_MACF @@ -2909,7 +3130,7 @@ bad: } static char * -random_hex_str(char *str, int len) +random_hex_str(char *str, int len, boolean_t embedNUL) { uint64_t low, high, value; int idx; @@ -2927,6 +3148,14 @@ random_hex_str(char *str, int len) high = random(); value = high << 32 | low; + if (embedNUL) { + /* + * Zero a byte to protect against C string vulnerabilities + * e.g. for userland __stack_chk_guard. + */ + value &= ~(0xffull << 8); + } + str[0] = '0'; str[1] = 'x'; for (idx = 2; idx < len - 1; idx++) { @@ -2956,6 +3185,11 @@ random_hex_str(char *str, int len) #define ENTROPY_VALUES 2 #define ENTROPY_KEY "malloc_entropy=" +/* + * System malloc engages nanozone for UIAPP. + */ +#define NANO_ENGAGE_KEY "MallocNanoZone=1" + #define PFZ_KEY "pfz=" extern user32_addr_t commpage_text32_location; extern user64_addr_t commpage_text64_location; @@ -2978,7 +3212,7 @@ exec_add_apple_strings(struct image_params *imgp) if( imgp->ip_flags & IMGPF_IS_64BIT) { new_ptr_size = 8; snprintf(pfz_string, sizeof(pfz_string),PFZ_KEY "0x%llx",commpage_text64_location); - }else{ + } else { snprintf(pfz_string, sizeof(pfz_string),PFZ_KEY "0x%x",commpage_text32_location); } @@ -2991,13 +3225,31 @@ exec_add_apple_strings(struct image_params *imgp) goto bad; imgp->ip_applec++; + /* adding the NANO_ENGAGE_KEY key */ + if (imgp->ip_px_sa) { + int proc_type = (((struct _posix_spawnattr *) imgp->ip_px_sa)->psa_apptype) & POSIX_SPAWN_PROC_TYPE_MASK; + + if (proc_type == POSIX_SPAWN_PROC_TYPE_APP_DEFAULT || proc_type == POSIX_SPAWN_PROC_TYPE_APP_TAL) { + char uiapp_string[strlen(NANO_ENGAGE_KEY) + 1]; + + snprintf(uiapp_string, sizeof(uiapp_string), NANO_ENGAGE_KEY); + error = exec_add_user_string(imgp, CAST_USER_ADDR_T(uiapp_string),UIO_SYSSPACE,FALSE); + if(error) + goto bad; + imgp->ip_applec++; + } + } + /* * Supply libc with a collection of random values to use when * implementing -fstack-protector. + * + * (The first random string always contains an embedded NUL so that + * __stack_chk_guard also protects against C string vulnerabilities) */ (void)strlcpy(guard_vec, GUARD_KEY, sizeof (guard_vec)); for (i = 0; i < GUARD_VALUES; i++) { - random_hex_str(guard, sizeof (guard)); + random_hex_str(guard, sizeof (guard), i == 0); if (i) (void)strlcat(guard_vec, ",", sizeof (guard_vec)); (void)strlcat(guard_vec, guard, sizeof (guard_vec)); @@ -3013,7 +3265,7 @@ exec_add_apple_strings(struct image_params *imgp) */ (void)strlcpy(entropy_vec, ENTROPY_KEY, sizeof(entropy_vec)); for (i = 0; i < ENTROPY_VALUES; i++) { - random_hex_str(entropy, sizeof (entropy)); + random_hex_str(entropy, sizeof (entropy), FALSE); if (i) (void)strlcat(entropy_vec, ",", sizeof (entropy_vec)); (void)strlcat(entropy_vec, entropy, sizeof (entropy_vec)); @@ -3095,6 +3347,13 @@ exec_check_permissions(struct image_params *imgp) if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_lflag & P_LTRACED)) vap->va_mode &= ~(VSUID | VSGID); + /* + * Disable _POSIX_SPAWN_ALLOW_DATA_EXEC and _POSIX_SPAWN_DISABLE_ASLR + * flags for setuid/setgid binaries. + */ + if (vap->va_mode & (VSUID | VSGID)) + imgp->ip_flags &= ~(IMGPF_ALLOW_DATA_EXEC | IMGPF_DISABLE_ASLR); + #if CONFIG_MACF error = mac_vnode_check_exec(imgp->ip_vfs_context, vp, imgp); if (error) @@ -3176,8 +3435,11 @@ exec_handle_sugid(struct image_params *imgp) mac_transition = mac_cred_check_label_update_execve( imgp->ip_vfs_context, imgp->ip_vp, + imgp->ip_scriptvp, imgp->ip_scriptlabelp, - imgp->ip_execlabelp, p); + imgp->ip_execlabelp, + p, + imgp->ip_px_smpx); #endif OSBitAndAtomic(~((uint32_t)P_SUGID), &p->p_flag); @@ -3246,8 +3508,10 @@ handle_mac_transition: if (kauth_proc_label_update_execve(p, imgp->ip_vfs_context, imgp->ip_vp, + imgp->ip_scriptvp, imgp->ip_scriptlabelp, - imgp->ip_execlabelp)) { + imgp->ip_execlabelp, + imgp->ip_px_smpx)) { /* * If updating the MAC label resulted in a * disjoint credential, flag that we need to @@ -3266,78 +3530,90 @@ handle_mac_transition: } #endif /* CONFIG_MACF */ - /* - * Have mach reset the task and thread ports. - * We don't want anyone who had the ports before - * a setuid exec to be able to access/control the - * task/thread after. - */ - ipc_task_reset(p->task); - ipc_thread_reset((imgp->ip_new_thread != NULL) ? - imgp->ip_new_thread : current_thread()); - /* * If 'leave_sugid_clear' is non-zero, then we passed the * VSUID and MACF checks, and successfully determined that * the previous cred was a member of the VSGID group, but * that it was not the default at the time of the execve, * and that the post-labelling credential was not disjoint. - * So we don't set the P_SUGID on the basis of simply - * running this code. - */ - if (!leave_sugid_clear) - OSBitOrAtomic(P_SUGID, &p->p_flag); - - /* - * Radar 2261856; setuid security hole fix - * XXX For setuid processes, attempt to ensure that - * stdin, stdout, and stderr are already allocated. - * We do not want userland to accidentally allocate - * descriptors in this range which has implied meaning - * to libc. + * So we don't set the P_SUGID or reset mach ports and fds + * on the basis of simply running this code. */ - for (i = 0; i < 3; i++) { + if (!leave_sugid_clear) { + /* + * Have mach reset the task and thread ports. + * We don't want anyone who had the ports before + * a setuid exec to be able to access/control the + * task/thread after. + */ + ipc_task_reset(p->task); + ipc_thread_reset((imgp->ip_new_thread != NULL) ? + imgp->ip_new_thread : current_thread()); - if (p->p_fd->fd_ofiles[i] != NULL) - continue; + /* + * Flag the process as setuid. + */ + OSBitOrAtomic(P_SUGID, &p->p_flag); /* - * Do the kernel equivalent of - * - * (void) open("/dev/null", O_RDONLY); + * Radar 2261856; setuid security hole fix + * XXX For setuid processes, attempt to ensure that + * stdin, stdout, and stderr are already allocated. + * We do not want userland to accidentally allocate + * descriptors in this range which has implied meaning + * to libc. */ + for (i = 0; i < 3; i++) { + + if (p->p_fd->fd_ofiles[i] != NULL) + continue; + + /* + * Do the kernel equivalent of + * + * if i == 0 + * (void) open("/dev/null", O_RDONLY); + * else + * (void) open("/dev/null", O_WRONLY); + */ - struct fileproc *fp; - int indx; + struct fileproc *fp; + int indx; + int flag; - if ((error = falloc(p, - &fp, &indx, imgp->ip_vfs_context)) != 0) - continue; + if (i == 0) + flag = FREAD; + else + flag = FWRITE; - struct nameidata nd1; + if ((error = falloc(p, + &fp, &indx, imgp->ip_vfs_context)) != 0) + continue; - NDINIT(&nd1, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE, - CAST_USER_ADDR_T("/dev/null"), - imgp->ip_vfs_context); + struct nameidata nd1; - if ((error = vn_open(&nd1, FREAD, 0)) != 0) { - fp_free(p, indx, fp); - break; - } + NDINIT(&nd1, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE, + CAST_USER_ADDR_T("/dev/null"), + imgp->ip_vfs_context); - struct fileglob *fg = fp->f_fglob; + if ((error = vn_open(&nd1, flag, 0)) != 0) { + fp_free(p, indx, fp); + break; + } - fg->fg_flag = FREAD; - fg->fg_type = DTYPE_VNODE; - fg->fg_ops = &vnops; - fg->fg_data = nd1.ni_vp; + struct fileglob *fg = fp->f_fglob; - vnode_put(nd1.ni_vp); + fg->fg_flag = flag; + fg->fg_ops = &vnops; + fg->fg_data = nd1.ni_vp; - proc_fdlock(p); - procfdtbl_releasefd(p, indx, NULL); - fp_drop(p, indx, fp, 1); - proc_fdunlock(p); + vnode_put(nd1.ni_vp); + + proc_fdlock(p); + procfdtbl_releasefd(p, indx, NULL); + fp_drop(p, indx, fp, 1); + proc_fdunlock(p); + } } } #if CONFIG_MACF @@ -3595,6 +3871,7 @@ load_return_to_errno(load_return_t lrtn) case LOAD_IOERROR: return EIO; case LOAD_FAILURE: + case LOAD_DECRYPTFAIL: default: return EBADEXEC; } @@ -3654,9 +3931,9 @@ execargs_lock_unlock(void) { lck_mtx_unlock(execargs_cache_lock); } -static void +static wait_result_t execargs_lock_sleep(void) { - lck_mtx_sleep(execargs_cache_lock, LCK_SLEEP_DEFAULT, &execargs_free_count, THREAD_UNINT); + return(lck_mtx_sleep(execargs_cache_lock, LCK_SLEEP_DEFAULT, &execargs_free_count, THREAD_INTERRUPTIBLE)); } static kern_return_t @@ -3695,14 +3972,19 @@ static int execargs_alloc(struct image_params *imgp) { kern_return_t kret; + wait_result_t res; int i, cache_index = -1; execargs_lock_lock(); while (execargs_free_count == 0) { execargs_waiters++; - execargs_lock_sleep(); + res = execargs_lock_sleep(); execargs_waiters--; + if (res != THREAD_AWAKENED) { + execargs_lock_unlock(); + return (EINTR); + } } execargs_free_count--; @@ -3822,15 +4104,46 @@ exec_resettextvp(proc_t p, struct image_params *imgp) } -static int +/* + * If the process is not signed or if it contains entitlements, we + * need to communicate through the task_access_port to taskgated. + * + * taskgated will provide a detached code signature if present, and + * will enforce any restrictions on entitlements. + */ + +static boolean_t +taskgated_required(proc_t p, boolean_t *require_success) +{ + size_t length; + void *blob; + int error; + + if ((p->p_csflags & CS_VALID) == 0) { + *require_success = FALSE; + return TRUE; + } + + error = cs_entitlements_blob_get(p, &blob, &length); + if (error == 0 && blob != NULL) { + *require_success = TRUE; /* fatal on the desktop when entitlements are present */ + return TRUE; + } + + *require_success = FALSE; + return 0; +} + + +static int check_for_signature(proc_t p, struct image_params *imgp) { - void *blob = NULL; - size_t length = 0; mach_port_t port = NULL; kern_return_t kr = KERN_FAILURE; int error = EACCES; + boolean_t unexpected_failure = FALSE; unsigned char hash[SHA1_RESULTLEN]; + boolean_t require_success = FALSE; /* * Override inherited code signing flags with the @@ -3846,18 +4159,8 @@ check_for_signature(proc_t p, struct image_params *imgp) vm_map_switch_protect(get_task_map(p->task), TRUE); } - /* If the process is not signed or if it contains - * entitlements, we need to communicate through the - * task_access_port to taskgated. taskgated will provide a - * detached code signature if present, and will enforce any - * restrictions on entitlements. taskgated returns - * KERN_SUCCESS if it has completed its work and the exec - * should continue, or KERN_FAILURE if the exec should fail. - */ - error = cs_entitlements_blob_get(p, &blob, &length); - - /* if signed and no entitlements, then we're done here */ - if ((p->p_csflags & CS_VALID) && NULL == blob) { + /* check if callout to taskgated is needed */ + if (!taskgated_required(p, &require_success)) { error = 0; goto done; } @@ -3865,18 +4168,31 @@ check_for_signature(proc_t p, struct image_params *imgp) kr = task_get_task_access_port(p->task, &port); if (KERN_SUCCESS != kr || !IPC_PORT_VALID(port)) { error = 0; -#if !CONFIG_EMBEDDED - /* fatal on the desktop when entitlements are present */ - if (NULL != blob) + if (require_success) error = EACCES; -#endif goto done; } + /* + * taskgated returns KERN_SUCCESS if it has completed its work + * and the exec should continue, KERN_FAILURE if the exec should + * fail, or it may error out with different error code in an + * event of mig failure (e.g. process was signalled during the + * rpc call, taskgated died, mig server died etc.). + */ + kr = find_code_signature(port, p->p_pid); - if (KERN_SUCCESS != kr) { + switch (kr) { + case KERN_SUCCESS: + error = 0; + break; + case KERN_FAILURE: error = EACCES; goto done; + default: + error = EACCES; + unexpected_failure = TRUE; + goto done; } /* Only do this if exec_resettextvp() did not fail */ @@ -3893,9 +4209,12 @@ check_for_signature(proc_t p, struct image_params *imgp) } done: - if (0 != error) + if (0 != error) { + if (!unexpected_failure) + p->p_csflags |= CS_KILLED; /* make very sure execution fails */ psignal(p, SIGKILL); + } return error; } @@ -3917,11 +4236,12 @@ static void exec_prefault_data(proc_t p __unused, struct image_params *imgp, loa /* * Prefault executable or dyld entry point. */ - vm_fault( current_map(), - vm_map_trunc_page(load_result->entry_point), - VM_PROT_READ | VM_PROT_EXECUTE, - FALSE, - THREAD_UNINT, NULL, 0); + vm_fault(current_map(), + vm_map_trunc_page(load_result->entry_point, + vm_map_page_mask(current_map())), + VM_PROT_READ | VM_PROT_EXECUTE, + FALSE, + THREAD_UNINT, NULL, 0); if (imgp->ip_flags & IMGPF_IS_64BIT) { expected_all_image_infos_size = sizeof(struct user64_dyld_all_image_infos); @@ -3942,18 +4262,20 @@ static void exec_prefault_data(proc_t p __unused, struct image_params *imgp, loa * Pre-fault to avoid copyin() going through the trap handler * and recovery path. */ - vm_fault( current_map(), - vm_map_trunc_page(load_result->all_image_info_addr), - VM_PROT_READ | VM_PROT_WRITE, - FALSE, - THREAD_UNINT, NULL, 0); + vm_fault(current_map(), + vm_map_trunc_page(load_result->all_image_info_addr, + vm_map_page_mask(current_map())), + VM_PROT_READ | VM_PROT_WRITE, + FALSE, + THREAD_UNINT, NULL, 0); if ((load_result->all_image_info_addr & PAGE_MASK) + expected_all_image_infos_size > PAGE_SIZE) { /* all_image_infos straddles a page */ - vm_fault( current_map(), - vm_map_trunc_page(load_result->all_image_info_addr + expected_all_image_infos_size - 1), - VM_PROT_READ | VM_PROT_WRITE, - FALSE, - THREAD_UNINT, NULL, 0); + vm_fault(current_map(), + vm_map_trunc_page(load_result->all_image_info_addr + expected_all_image_infos_size - 1, + vm_map_page_mask(current_map())), + VM_PROT_READ | VM_PROT_WRITE, + FALSE, + THREAD_UNINT, NULL, 0); } ret = copyin(load_result->all_image_info_addr, @@ -4004,26 +4326,30 @@ static void exec_prefault_data(proc_t p __unused, struct image_params *imgp, loa (uint64_t)dyld_all_image_infos_address); #endif - vm_fault( current_map(), - vm_map_trunc_page(notification_address + dyld_slide_amount), - VM_PROT_READ | VM_PROT_EXECUTE, - FALSE, - THREAD_UNINT, NULL, 0); - vm_fault( current_map(), - vm_map_trunc_page(dyld_image_address + dyld_slide_amount), - VM_PROT_READ | VM_PROT_EXECUTE, - FALSE, - THREAD_UNINT, NULL, 0); - vm_fault( current_map(), - vm_map_trunc_page(dyld_version_address + dyld_slide_amount), - VM_PROT_READ, - FALSE, - THREAD_UNINT, NULL, 0); - vm_fault( current_map(), - vm_map_trunc_page(dyld_all_image_infos_address + dyld_slide_amount), - VM_PROT_READ | VM_PROT_WRITE, - FALSE, - THREAD_UNINT, NULL, 0); + vm_fault(current_map(), + vm_map_trunc_page(notification_address + dyld_slide_amount, + vm_map_page_mask(current_map())), + VM_PROT_READ | VM_PROT_EXECUTE, + FALSE, + THREAD_UNINT, NULL, 0); + vm_fault(current_map(), + vm_map_trunc_page(dyld_image_address + dyld_slide_amount, + vm_map_page_mask(current_map())), + VM_PROT_READ | VM_PROT_EXECUTE, + FALSE, + THREAD_UNINT, NULL, 0); + vm_fault(current_map(), + vm_map_trunc_page(dyld_version_address + dyld_slide_amount, + vm_map_page_mask(current_map())), + VM_PROT_READ, + FALSE, + THREAD_UNINT, NULL, 0); + vm_fault(current_map(), + vm_map_trunc_page(dyld_all_image_infos_address + dyld_slide_amount, + vm_map_page_mask(current_map())), + VM_PROT_READ | VM_PROT_WRITE, + FALSE, + THREAD_UNINT, NULL, 0); } } } diff --git a/bsd/kern/kern_exit.c b/bsd/kern/kern_exit.c index fa45facf3..1ef04170d 100644 --- a/bsd/kern/kern_exit.c +++ b/bsd/kern/kern_exit.c @@ -167,7 +167,8 @@ kern_return_t sys_perf_notify(thread_t thread, int pid); kern_return_t task_exception_notify(exception_type_t exception, mach_exception_data_type_t code, mach_exception_data_type_t subcode); void delay(int); - +void gather_rusage_info_v2(proc_t p, struct rusage_info_v2 *ru, int flavor); + /* * NOTE: Source and target may *NOT* overlap! * XXX Should share code with bsd/dev/ppc/unix_signal.c @@ -245,11 +246,12 @@ exit(proc_t p, struct exit_args *uap, int *retval) int exit1(proc_t p, int rv, int *retval) { - return exit1_internal(p, rv, retval, TRUE, TRUE); + return exit1_internal(p, rv, retval, TRUE, TRUE, 0); } int -exit1_internal(proc_t p, int rv, int *retval, boolean_t thread_can_terminate, boolean_t perf_notify) +exit1_internal(proc_t p, int rv, int *retval, boolean_t thread_can_terminate, boolean_t perf_notify, + int jetsam_flags) { thread_t self = current_thread(); struct task *task = p->task; @@ -292,11 +294,9 @@ exit1_internal(proc_t p, int rv, int *retval, boolean_t thread_can_terminate, bo DTRACE_PROC1(exit, int, CLD_EXITED); /* mark process is going to exit and pull out of DBG/disk throttle */ - proc_removethrottle(p); - -#if CONFIG_MEMORYSTATUS - memorystatus_list_remove(p->p_pid); -#endif + /* TODO: This should be done after becoming exit thread */ + proc_set_task_policy(p->task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, + TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE); proc_lock(p); error = proc_transstart(p, 1); @@ -308,6 +308,9 @@ exit1_internal(proc_t p, int rv, int *retval, boolean_t thread_can_terminate, bo */ proc_unlock(p); if (current_proc() == p){ + if (p->exit_thread == self) + printf("exit_thread failed to exit, leaving process %s[%d] in unkillable limbo\n", + p->p_comm, p->p_pid); thread_exception_return(); } else { /* external termination like jetsam */ @@ -347,6 +350,7 @@ exit1_internal(proc_t p, int rv, int *retval, boolean_t thread_can_terminate, bo p->p_lflag |= P_LEXIT; p->p_xstat = rv; + p->p_lflag |= jetsam_flags; proc_transend(p, 1); proc_unlock(p); @@ -366,6 +370,7 @@ proc_prepareexit(proc_t p, int rv, boolean_t perf_notify) struct uthread *ut; thread_t self = current_thread(); ut = get_bsdthread_info(self); + struct rusage_superset *rup; /* If a core should be generated, notify crash reporter */ if (hassigprop(WTERMSIG(rv), SA_CORE) || ((p->p_csflags & CS_KILLED) != 0)) { @@ -396,6 +401,27 @@ skipcheck: (void)sys_perf_notify(self, p->p_pid); } + /* + * Before this process becomes a zombie, stash resource usage + * stats in the proc for external observers to query + * via proc_pid_rusage(). + * + * If the zombie allocation fails, just punt the stats. + */ + MALLOC_ZONE(rup, struct rusage_superset *, + sizeof (*rup), M_ZOMBIE, M_WAITOK); + if (rup != NULL) { + gather_rusage_info_v2(p, &rup->ri, RUSAGE_INFO_V2); + rup->ri.ri_phys_footprint = 0; + rup->ri.ri_proc_exit_abstime = mach_absolute_time(); + + /* + * Make the rusage_info visible to external observers + * only after it has been completely filled in. + */ + p->p_ru = rup; + } + /* * Remove proc from allproc queue and from pidhash chain. * Need to do this before we do anything that can block. @@ -405,6 +431,10 @@ skipcheck: proc_list_lock(); +#if CONFIG_MEMORYSTATUS + memorystatus_remove(p, TRUE); +#endif + LIST_REMOVE(p, p_list); LIST_INSERT_HEAD(&zombproc, p, p_list); /* Place onto zombproc. */ /* will not be visible via proc_find */ @@ -513,10 +543,6 @@ proc_exit(proc_t p) proc_unlock(p); #endif - /* XXX Zombie allocation may fail, in which case stats get lost */ - MALLOC_ZONE(p->p_ru, struct rusage *, - sizeof (*p->p_ru), M_ZOMBIE, M_WAITOK); - nspace_proc_exit(p); #if VM_PRESSURE_EVENTS @@ -531,7 +557,7 @@ proc_exit(proc_t p) proc_refdrain(p); /* if any pending cpu limits action, clear it */ - task_clear_cpuusage(p->task); + task_clear_cpuusage(p->task, TRUE); workqueue_mark_exiting(p); workqueue_exit(p); @@ -552,14 +578,8 @@ proc_exit(proc_t p) * no need to throttle this thread since its going away * but we do need to update our bookeeping w/r to throttled threads */ - throttle_lowpri_io(FALSE); - } - -#if !CONFIG_EMBEDDED - if (p->p_legacy_behavior & PROC_LEGACY_BEHAVIOR_IOTHROTTLE) { - throttle_legacy_process_decr(); + throttle_lowpri_io(0); } -#endif #if SYSV_SHM /* Close ref SYSV Shared memory*/ @@ -581,6 +601,7 @@ proc_exit(proc_t p) if (sessp->s_ttyvp != NULLVP) { struct vnode *ttyvp; int ttyvid; + int cttyflag = 0; struct vfs_context context; struct tty *tp; @@ -600,6 +621,8 @@ proc_exit(proc_t p) session_lock(sessp); tp = SESSION_TP(sessp); } + cttyflag = sessp->s_flags & S_CTTYREF; + sessp->s_flags &= ~S_CTTYREF; ttyvp = sessp->s_ttyvp; ttyvid = sessp->s_ttyvid; sessp->s_ttyvp = NULLVP; @@ -618,6 +641,13 @@ proc_exit(proc_t p) context.vc_ucred = kauth_cred_proc_ref(p); vnode_rele(ttyvp); VNOP_REVOKE(ttyvp, REVOKEALL, &context); + if (cttyflag) { + /* + * Release the extra usecount taken in cttyopen. + * usecount should be released after VNOP_REVOKE is called. + */ + vnode_rele(ttyvp); + } vnode_put(ttyvp); kauth_cred_unref(&context.vc_ucred); ttyvp = NULLVP; @@ -653,7 +683,6 @@ proc_exit(proc_t p) proc_childdrainstart(p); while ((q = p->p_children.lh_first) != NULL) { int reparentedtoinit = (q->p_listflag & P_LIST_DEADPARENT) ? 1 : 0; - q->p_listflag |= P_LIST_DEADPARENT; if (q->p_stat == SZOMB) { if (p != q->p_pptr) panic("parent child linkage broken"); @@ -673,12 +702,13 @@ proc_exit(proc_t p) */ (void)reap_child_locked(p, q, 1, reparentedtoinit, 1, 0); } else { - proc_reparentlocked(q, initproc, 0, 1); /* * Traced processes are killed * since their existence means someone is messing up. */ if (q->p_lflag & P_LTRACED) { + struct proc *opp; + /* * Take a reference on the child process to * ensure it doesn't exit and disappear between @@ -689,8 +719,26 @@ proc_exit(proc_t p) continue; proc_list_unlock(); + + opp = proc_find(q->p_oppid); + if (opp != PROC_NULL) { + proc_list_lock(); + q->p_oppid = 0; + proc_list_unlock(); + proc_reparentlocked(q, opp, 0, 0); + proc_rele(opp); + } else { + /* original parent exited while traced */ + proc_list_lock(); + q->p_listflag |= P_LIST_DEADPARENT; + q->p_oppid = 0; + proc_list_unlock(); + proc_reparentlocked(q, initproc, 0, 0); + } + proc_lock(q); q->p_lflag &= ~P_LTRACED; + if (q->sigwait_thread) { thread_t thread = q->sigwait_thread; @@ -712,6 +760,9 @@ proc_exit(proc_t p) psignal(q, SIGKILL); proc_list_lock(); proc_rele_locked(q); + } else { + q->p_listflag |= P_LIST_DEADPARENT; + proc_reparentlocked(q, initproc, 0, 1); } } } @@ -733,12 +784,11 @@ proc_exit(proc_t p) * info and self times. If we were unable to allocate a zombie * structure, this information is lost. */ - /* No need for locking here as no one than this thread can access this */ if (p->p_ru != NULL) { calcru(p, &p->p_stats->p_ru.ru_utime, &p->p_stats->p_ru.ru_stime, NULL); - *p->p_ru = p->p_stats->p_ru; + p->p_ru->ru = p->p_stats->p_ru; - ruadd(p->p_ru, &p->p_stats->p_cru); + ruadd(&(p->p_ru->ru), &p->p_stats->p_cru); } /* @@ -807,6 +857,8 @@ proc_exit(proc_t p) pp = proc_parent(p); if (pp->p_flag & P_NOCLDWAIT) { + if (p->p_ru != NULL) { + proc_lock(pp); #if 3839178 /* * If the parent is ignoring SIGCHLD, then POSIX requires @@ -821,13 +873,12 @@ proc_exit(proc_t p) * zombie to init. If we were unable to allocate a * zombie structure, this information is lost. */ - if (p->p_ru != NULL) { - proc_lock(pp); - ruadd(&pp->p_stats->p_cru, p->p_ru); + ruadd(&pp->p_stats->p_cru, &p->p_ru->ru); +#endif /* !3839178 */ + update_rusage_info_child(&pp->p_stats->ri_child, &p->p_ru->ri); proc_unlock(pp); } -#endif /* !3839178 */ - + /* kernel can reap this one, no need to move it to launchd */ proc_list_lock(); p->p_listflag |= P_LIST_DEADPARENT; @@ -906,7 +957,7 @@ proc_exit(proc_t p) * no need to throttle this thread since its going away * but we do need to update our bookeeping w/r to throttled threads */ - throttle_lowpri_io(FALSE); + throttle_lowpri_io(0); } proc_rele(pp); @@ -1004,7 +1055,11 @@ reap_child_locked(proc_t parent, proc_t child, int deadparent, int reparentedtoi } } +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-declarations" proc_knote(child, NOTE_REAP); +#pragma clang diagnostic pop + proc_knote_drain(child); child->p_xstat = 0; @@ -1020,7 +1075,8 @@ reap_child_locked(proc_t parent, proc_t child, int deadparent, int reparentedtoi */ if (!(parent->p_flag & P_NOCLDWAIT)) #endif /* 3839178 */ - ruadd(&parent->p_stats->p_cru, child->p_ru); + ruadd(&parent->p_stats->p_cru, &child->p_ru->ru); + update_rusage_info_child(&parent->p_stats->ri_child, &child->p_ru->ri); proc_unlock(parent); FREE_ZONE(child->p_ru, sizeof *child->p_ru, M_ZOMBIE); child->p_ru = NULL; @@ -1075,6 +1131,14 @@ reap_child_locked(proc_t parent, proc_t child, int deadparent, int reparentedtoi proc_checkdeadrefs(child); nprocs--; + if (deadparent) { + /* + * If a child zombie is being reaped because its parent + * is exiting, make sure we update the list flag + */ + child->p_listflag |= P_LIST_DEADPARENT; + } + proc_list_unlock(); #if CONFIG_FINE_LOCK_GROUPS @@ -1189,14 +1253,14 @@ loop1: } else { if (IS_64BIT_PROCESS(q)) { struct user64_rusage my_rusage; - munge_user64_rusage(p->p_ru, &my_rusage); + munge_user64_rusage(&p->p_ru->ru, &my_rusage); error = copyout((caddr_t)&my_rusage, uap->rusage, sizeof (my_rusage)); } else { struct user32_rusage my_rusage; - munge_user32_rusage(p->p_ru, &my_rusage); + munge_user32_rusage(&p->p_ru->ru, &my_rusage); error = copyout((caddr_t)&my_rusage, uap->rusage, sizeof (my_rusage)); @@ -1658,6 +1722,10 @@ vfork_exit_internal(proc_t p, int rv, int forceexit) proc_list_lock(); +#if CONFIG_MEMORYSTATUS + memorystatus_remove(p, TRUE); +#endif + LIST_REMOVE(p, p_list); LIST_INSERT_HEAD(&zombproc, p, p_list); /* Place onto zombproc. */ /* will not be visible via proc_find */ @@ -1704,11 +1772,11 @@ vproc_exit(proc_t p) #endif struct pgrp * pg; struct session *sessp; + struct rusage_superset *rup; /* XXX Zombie allocation may fail, in which case stats get lost */ - MALLOC_ZONE(p->p_ru, struct rusage *, - sizeof (*p->p_ru), M_ZOMBIE, M_WAITOK); - + MALLOC_ZONE(rup, struct rusage_superset *, + sizeof (*rup), M_ZOMBIE, M_WAITOK); proc_refdrain(p); @@ -1718,18 +1786,13 @@ vproc_exit(proc_t p) */ fdfree(p); -#if !CONFIG_EMBEDDED - if (p->p_legacy_behavior & PROC_LEGACY_BEHAVIOR_IOTHROTTLE) { - throttle_legacy_process_decr(); - } -#endif - sessp = proc_session(p); if (SESS_LEADER(p, sessp)) { if (sessp->s_ttyvp != NULLVP) { struct vnode *ttyvp; int ttyvid; + int cttyflag = 0; struct vfs_context context; struct tty *tp; @@ -1749,6 +1812,8 @@ vproc_exit(proc_t p) session_lock(sessp); tp = SESSION_TP(sessp); } + cttyflag = sessp->s_flags & S_CTTYREF; + sessp->s_flags &= ~S_CTTYREF; ttyvp = sessp->s_ttyvp; ttyvid = sessp->s_ttyvid; sessp->s_ttyvp = NULL; @@ -1767,6 +1832,13 @@ vproc_exit(proc_t p) context.vc_ucred = kauth_cred_proc_ref(p); vnode_rele(ttyvp); VNOP_REVOKE(ttyvp, REVOKEALL, &context); + if (cttyflag) { + /* + * Release the extra usecount taken in cttyopen. + * usecount should be released after VNOP_REVOKE is called. + */ + vnode_rele(ttyvp); + } vnode_put(ttyvp); kauth_cred_unref(&context.vc_ucred); ttyvp = NULLVP; @@ -1791,7 +1863,6 @@ vproc_exit(proc_t p) proc_list_lock(); proc_childdrainstart(p); while ((q = p->p_children.lh_first) != NULL) { - q->p_listflag |= P_LIST_DEADPARENT; if (q->p_stat == SZOMB) { if (p != q->p_pptr) panic("parent child linkage broken"); @@ -1811,15 +1882,34 @@ vproc_exit(proc_t p) */ (void)reap_child_locked(p, q, 1, 0, 1, 0); } else { - proc_reparentlocked(q, initproc, 0, 1); /* * Traced processes are killed * since their existence means someone is messing up. */ if (q->p_lflag & P_LTRACED) { + struct proc *opp; + proc_list_unlock(); + + opp = proc_find(q->p_oppid); + if (opp != PROC_NULL) { + proc_list_lock(); + q->p_oppid = 0; + proc_list_unlock(); + proc_reparentlocked(q, opp, 0, 0); + proc_rele(opp); + } else { + /* original parent exited while traced */ + proc_list_lock(); + q->p_listflag |= P_LIST_DEADPARENT; + q->p_oppid = 0; + proc_list_unlock(); + proc_reparentlocked(q, initproc, 0, 0); + } + proc_lock(q); q->p_lflag &= ~P_LTRACED; + if (q->sigwait_thread) { thread_t thread = q->sigwait_thread; @@ -1840,6 +1930,9 @@ vproc_exit(proc_t p) psignal(q, SIGKILL); proc_list_lock(); + } else { + q->p_listflag |= P_LIST_DEADPARENT; + proc_reparentlocked(q, initproc, 0, 1); } } } @@ -1861,11 +1954,10 @@ vproc_exit(proc_t p) * info and self times. If we were unable to allocate a zombie * structure, this information is lost. */ - /* No need for locking here as no one than this thread can access this */ - if (p->p_ru != NULL) { - *p->p_ru = p->p_stats->p_ru; - timerclear(&p->p_ru->ru_utime); - timerclear(&p->p_ru->ru_stime); + if (rup != NULL) { + rup->ru = p->p_stats->p_ru; + timerclear(&rup->ru.ru_utime); + timerclear(&rup->ru.ru_stime); #ifdef FIXME if (task) { @@ -1877,10 +1969,10 @@ vproc_exit(proc_t p) task_info_stuff = MACH_TASK_BASIC_INFO_COUNT; task_info(task, MACH_TASK_BASIC_INFO, &tinfo, &task_info_stuff); - p->p_ru->ru_utime.tv_sec = tinfo.user_time.seconds; - p->p_ru->ru_utime.tv_usec = tinfo.user_time.microseconds; - p->p_ru->ru_stime.tv_sec = tinfo.system_time.seconds; - p->p_ru->ru_stime.tv_usec = tinfo.system_time.microseconds; + p->p_ru->ru.ru_utime.tv_sec = tinfo.user_time.seconds; + p->p_ru->ru.ru_utime.tv_usec = tinfo.user_time.microseconds; + p->p_ru->ru.ru_stime.tv_sec = tinfo.system_time.seconds; + p->p_ru->ru.ru_stime.tv_usec = tinfo.system_time.microseconds; task_ttimes_stuff = TASK_THREAD_TIMES_INFO_COUNT; task_info(task, TASK_THREAD_TIMES_INFO, @@ -1890,12 +1982,22 @@ vproc_exit(proc_t p) ut.tv_usec = ttimesinfo.user_time.microseconds; st.tv_sec = ttimesinfo.system_time.seconds; st.tv_usec = ttimesinfo.system_time.microseconds; - timeradd(&ut,&p->p_ru->ru_utime,&p->p_ru->ru_utime); - timeradd(&st,&p->p_ru->ru_stime,&p->p_ru->ru_stime); + timeradd(&ut,&p->p_ru->ru.ru_utime,&p->p_ru->ru.ru_utime); + timeradd(&st,&p->p_ru->ru.ru_stime,&p->p_ru->ru.ru_stime); } #endif /* FIXME */ - ruadd(p->p_ru, &p->p_stats->p_cru); + ruadd(&rup->ru, &p->p_stats->p_cru); + + gather_rusage_info_v2(p, &rup->ri, RUSAGE_INFO_V2); + rup->ri.ri_phys_footprint = 0; + rup->ri.ri_proc_exit_abstime = mach_absolute_time(); + + /* + * Now that we have filled in the rusage info, make it + * visible to an external observer via proc_pid_rusage(). + */ + p->p_ru = rup; } /* diff --git a/bsd/kern/kern_fork.c b/bsd/kern/kern_fork.c index 2f09ba8ee..c6dd52824 100644 --- a/bsd/kern/kern_fork.c +++ b/bsd/kern/kern_fork.c @@ -141,7 +141,7 @@ void thread_set_child(thread_t child, int pid); void *act_thread_csave(void); -thread_t cloneproc(task_t, proc_t, int); +thread_t cloneproc(task_t, proc_t, int, int); proc_t forkproc(proc_t); void forkproc_free(proc_t); thread_t fork_create_child(task_t parent_task, proc_t child, int inherit_memory, int is64bit); @@ -281,12 +281,8 @@ vfork(proc_t parent_proc, __unused struct vfork_args *uap, int32_t *retval) if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_VFORK)) != 0) { retval[1] = 0; } else { - /* - * kludge: rely on uu_proc being set in the vfork case, - * rather than returning the actual thread. We can remove - * this when we remove the uu_proc/current_proc() kludge. - */ - proc_t child_proc = current_proc(); + uthread_t ut = get_bsdthread_info(current_thread()); + proc_t child_proc = ut->uu_proc; retval[0] = child_proc->p_pid; retval[1] = 1; /* flag child return for user space */ @@ -299,12 +295,12 @@ vfork(proc_t parent_proc, __unused struct vfork_args *uap, int32_t *retval) proc_signalend(child_proc, 0); proc_transend(child_proc, 0); - /* flag the fork has occurred */ proc_knote(parent_proc, NOTE_FORK | child_proc->p_pid); DTRACE_PROC1(create, proc_t, child_proc); + ut->uu_flag &= ~UT_VFORKING; } - return(err); + return (err); } @@ -484,7 +480,20 @@ fork1(proc_t parent_proc, thread_t *child_threadp, int kind) child_proc->p_vforkact = parent_thread; child_proc->p_stat = SRUN; - parent_uthread->uu_flag |= UT_VFORK; + /* + * Until UT_VFORKING is cleared at the end of the vfork + * syscall, the process identity of this thread is slightly + * murky. + * + * As long as UT_VFORK and it's associated field (uu_proc) + * is set, current_proc() will always return the child process. + * + * However dtrace_proc_selfpid() returns the parent pid to + * ensure that e.g. the proc:::create probe actions accrue + * to the parent. (Otherwise the child magically seems to + * have created itself!) + */ + parent_uthread->uu_flag |= UT_VFORK | UT_VFORKING; parent_uthread->uu_proc = child_proc; parent_uthread->uu_userstate = (void *)act_thread_csave(); parent_uthread->uu_vforkmask = parent_uthread->uu_sigmask; @@ -529,7 +538,7 @@ fork1(proc_t parent_proc, thread_t *child_threadp, int kind) * will, in effect, create a duplicate of it, with only minor * differences. Contrarily, spawned processes do not inherit. */ - if ((child_thread = cloneproc(parent_proc->task, parent_proc, spawn ? FALSE : TRUE)) == NULL) { + if ((child_thread = cloneproc(parent_proc->task, parent_proc, spawn ? FALSE : TRUE, FALSE)) == NULL) { /* Failed to create thread */ err = EAGAIN; goto bad; @@ -654,12 +663,6 @@ fork1(proc_t parent_proc, thread_t *child_threadp, int kind) /* return the thread pointer to the caller */ *child_threadp = child_thread; -#if CONFIG_MEMORYSTATUS - if (!err) { - memorystatus_list_add(child_proc->p_pid, DEFAULT_JETSAM_PRIORITY, -1); - } -#endif - bad: /* * In the error case, we return a 0 value for the returned pid (but @@ -773,7 +776,8 @@ fork_create_child(task_t parent_task, proc_t child_proc, int inherit_memory, int is64bit, &child_task); if (result != KERN_SUCCESS) { - printf("execve: task_create_internal failed. Code: %d\n", result); + printf("%s: task_create_internal failed. Code: %d\n", + __func__, result); goto bad; } @@ -809,14 +813,15 @@ fork_create_child(task_t parent_task, proc_t child_proc, int inherit_memory, int /* Create a new thread for the child process */ result = thread_create(child_task, &child_thread); if (result != KERN_SUCCESS) { - printf("execve: thread_create failed. Code: %d\n", result); + printf("%s: thread_create failed. Code: %d\n", + __func__, result); task_deallocate(child_task); child_task = NULL; } /* - * Tag thread as being the first thread in its task. - */ + * Tag thread as being the first thread in its task. + */ thread_set_tag(child_thread, THREAD_TAG_MAINTHREAD); bad: @@ -918,6 +923,8 @@ fork(proc_t parent_proc, __unused struct fork_args *uap, int32_t *retval) * memory from the parent; if this is * non-NULL, then the parent_task must * also be non-NULL + * memstat_internal Whether to track the process in the + * jetsam priority list (if configured) * * Returns: !NULL pointer to new child thread * NULL Failure (unspecified) @@ -939,8 +946,11 @@ fork(proc_t parent_proc, __unused struct fork_args *uap, int32_t *retval) * live with this being somewhat awkward. */ thread_t -cloneproc(task_t parent_task, proc_t parent_proc, int inherit_memory) +cloneproc(task_t parent_task, proc_t parent_proc, int inherit_memory, int memstat_internal) { +#if !CONFIG_MEMORYSTATUS +#pragma unused(memstat_internal) +#endif task_t child_task; proc_t child_proc; thread_t child_thread = NULL; @@ -970,6 +980,14 @@ cloneproc(task_t parent_task, proc_t parent_proc, int inherit_memory) OSBitAndAtomic(~((uint32_t)P_LP64), (UInt32 *)&child_proc->p_flag); } +#if CONFIG_MEMORYSTATUS + if (memstat_internal) { + proc_list_lock(); + child_proc->p_memstat_state |= P_MEMSTAT_INTERNAL; + proc_list_unlock(); + } +#endif + /* make child visible */ pinsertchild(parent_proc, child_proc); @@ -1027,12 +1045,6 @@ forkproc_free(proc_t p) /* Need to undo the effects of the fdcopy(), if any */ fdfree(p); -#if !CONFIG_EMBEDDED - if (p->p_legacy_behavior & PROC_LEGACY_BEHAVIOR_IOTHROTTLE) { - throttle_legacy_process_decr(); - } -#endif - /* * Drop the reference on a text vnode pointer, if any * XXX This code is broken in forkproc(); see ; @@ -1201,19 +1213,11 @@ retry: * Increase reference counts on shared objects. * The p_stats and p_sigacts substructs are set in vm_fork. */ -#if !CONFIG_EMBEDDED child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY | P_DISABLE_ASLR | P_DELAYIDLESLEEP)); -#else /* !CONFIG_EMBEDDED */ - child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY | P_DISABLE_ASLR)); -#endif /* !CONFIG_EMBEDDED */ if (parent_proc->p_flag & P_PROFIL) startprofclock(child_proc); -#if !CONFIG_EMBEDDED - if (child_proc->p_legacy_behavior & PROC_LEGACY_BEHAVIOR_IOTHROTTLE) { - throttle_legacy_process_incr(); - } -#endif + child_proc->p_vfs_iopolicy = (parent_proc->p_vfs_iopolicy & (P_VFS_IOPOLICY_FORCE_HFS_CASE_SENSITIVITY)); /* * Note that if the current thread has an assumed identity, this @@ -1281,12 +1285,8 @@ retry: /* Intialize new process stats, including start time */ /* non-zeroed portion contains garbage AFAICT */ - bzero(&child_proc->p_stats->pstat_startzero, - (unsigned) ((caddr_t)&child_proc->p_stats->pstat_endzero - - (caddr_t)&child_proc->p_stats->pstat_startzero)); - bzero(&child_proc->p_stats->user_p_prof, sizeof(struct user_uprof)); - microtime(&child_proc->p_start); - child_proc->p_stats->p_start = child_proc->p_start; /* for compat */ + bzero(child_proc->p_stats, sizeof(*child_proc->p_stats)); + microtime_with_abstime(&child_proc->p_start, &child_proc->p_stats->ps_start); if (parent_proc->p_sigacts != NULL) (void)memcpy(child_proc->p_sigacts, @@ -1340,6 +1340,7 @@ retry: child_proc->p_lflag |= P_LREGISTER; } child_proc->p_dispatchqueue_offset = parent_proc->p_dispatchqueue_offset; + child_proc->p_dispatchqueue_serialno_offset = parent_proc->p_dispatchqueue_serialno_offset; #if PSYNCH pth_proc_hashinit(child_proc); #endif /* PSYNCH */ @@ -1357,8 +1358,18 @@ retry: } #endif - /* Default to no tracking of dirty state */ - child_proc->p_dirty = 0; +#if CONFIG_MEMORYSTATUS + /* Memorystatus + jetsam init */ + child_proc->p_memstat_state = 0; + child_proc->p_memstat_effectivepriority = JETSAM_PRIORITY_DEFAULT; + child_proc->p_memstat_requestedpriority = JETSAM_PRIORITY_DEFAULT; + child_proc->p_memstat_userdata = 0; +#if CONFIG_FREEZE + child_proc->p_memstat_suspendedfootprint = 0; +#endif + child_proc->p_memstat_dirty = 0; + child_proc->p_memstat_idledeadline = 0; +#endif /* CONFIG_MEMORYSTATUS */ bad: return(child_proc); @@ -1433,7 +1444,6 @@ uthread_alloc(task_t task, thread_t thread, int noinherit) p = (proc_t) get_bsdtask_info(task); uth = (uthread_t)ut; - uth->uu_kwe.kwe_uth = uth; uth->uu_thread = thread; /* @@ -1520,7 +1530,7 @@ uthread_cleanup(task_t task, void *uthread, void * bsd_info) * Calling this routine will clean up any throttle info reference * still inuse by the thread. */ - throttle_lowpri_io(FALSE); + throttle_lowpri_io(0); } /* * Per-thread audit state should never last beyond system @@ -1545,10 +1555,8 @@ uthread_cleanup(task_t task, void *uthread, void * bsd_info) if (uth->uu_allocsize && uth->uu_wqset){ kfree(uth->uu_wqset, uth->uu_allocsize); - sel->count = 0; uth->uu_allocsize = 0; uth->uu_wqset = 0; - sel->wql = 0; } if(uth->pth_name != NULL) diff --git a/bsd/kern/kern_guarded.c b/bsd/kern/kern_guarded.c new file mode 100644 index 000000000..5c175c7bb --- /dev/null +++ b/bsd/kern/kern_guarded.c @@ -0,0 +1,683 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Experimental guarded file descriptor support. + */ + +kern_return_t task_exception_notify(exception_type_t exception, + mach_exception_data_type_t code, mach_exception_data_type_t subcode); + +/* + * Most fd's have an underlying fileproc struct; but some may be + * guarded_fileproc structs which implement guarded fds. The latter + * struct (below) embeds the former. + * + * The two types should be distinguished by the "type" portion of f_flags. + * There's also a magic number to help catch misuse and bugs. + * + * This is a bit unpleasant, but results from the desire to allow + * alternate file behaviours for a few file descriptors without + * growing the fileproc data structure. + */ + +struct guarded_fileproc { + struct fileproc gf_fileproc; + u_int gf_magic; + u_int gf_attrs; + thread_t gf_thread; + guardid_t gf_guard; + int gf_exc_fd; + u_int gf_exc_code; +}; + +const size_t sizeof_guarded_fileproc = sizeof (struct guarded_fileproc); + +#define FP_TO_GFP(fp) ((struct guarded_fileproc *)(fp)) +#define GFP_TO_FP(gfp) (&(gfp)->gf_fileproc) + +#define GUARDED_FILEPROC_MAGIC 0x29083 + +struct gfp_crarg { + guardid_t gca_guard; + u_int gca_attrs; +}; + +static struct fileproc * +guarded_fileproc_alloc_init(void *crarg) +{ + struct gfp_crarg *aarg = crarg; + struct guarded_fileproc *gfp; + + if ((gfp = kalloc(sizeof (*gfp))) == NULL) + return (NULL); + + bzero(gfp, sizeof (*gfp)); + gfp->gf_fileproc.f_flags = FTYPE_GUARDED; + gfp->gf_magic = GUARDED_FILEPROC_MAGIC; + gfp->gf_guard = aarg->gca_guard; + gfp->gf_attrs = aarg->gca_attrs; + + return (GFP_TO_FP(gfp)); +} + +void +guarded_fileproc_free(struct fileproc *fp) +{ + struct guarded_fileproc *gfp = FP_TO_GFP(fp); + + if (FILEPROC_TYPE(fp) != FTYPE_GUARDED || + GUARDED_FILEPROC_MAGIC != gfp->gf_magic) + panic("%s: corrupt fp %p flags %x", __func__, fp, fp->f_flags); + + kfree(gfp, sizeof (*gfp)); +} + +static int +fp_lookup_guarded(proc_t p, int fd, guardid_t guard, + struct guarded_fileproc **gfpp) +{ + struct fileproc *fp; + int error; + + if ((error = fp_lookup(p, fd, &fp, 1)) != 0) + return (error); + if (FILEPROC_TYPE(fp) != FTYPE_GUARDED) { + (void) fp_drop(p, fd, fp, 1); + return (EINVAL); + } + struct guarded_fileproc *gfp = FP_TO_GFP(fp); + + if (GUARDED_FILEPROC_MAGIC != gfp->gf_magic) + panic("%s: corrupt fp %p", __func__, fp); + + if (guard != gfp->gf_guard) { + (void) fp_drop(p, fd, fp, 1); + return (EPERM); /* *not* a mismatch exception */ + } + if (gfpp) + *gfpp = gfp; + return (0); +} + +/* + * Expected use pattern: + * + * if (FP_ISGUARDED(fp, GUARD_CLOSE)) { + * error = fp_guard_exception(p, fd, fp, kGUARD_EXC_CLOSE); + * proc_fdunlock(p); + * return (error); + * } + */ + +int +fp_isguarded(struct fileproc *fp, u_int attrs) +{ + if (FILEPROC_TYPE(fp) == FTYPE_GUARDED) { + struct guarded_fileproc *gfp = FP_TO_GFP(fp); + + if (GUARDED_FILEPROC_MAGIC != gfp->gf_magic) + panic("%s: corrupt gfp %p flags %x", + __func__, gfp, fp->f_flags); + return ((attrs & gfp->gf_attrs) ? 1 : 0); + } + return (0); +} + +extern char *proc_name_address(void *p); + +int +fp_guard_exception(proc_t p, int fd, struct fileproc *fp, u_int code) +{ + if (FILEPROC_TYPE(fp) != FTYPE_GUARDED) + panic("%s corrupt fp %p flags %x", __func__, fp, fp->f_flags); + + struct guarded_fileproc *gfp = FP_TO_GFP(fp); + + /* all gfd fields protected via proc_fdlock() */ + proc_fdlock_assert(p, LCK_MTX_ASSERT_OWNED); + + if (NULL == gfp->gf_thread) { + thread_t t = current_thread(); + gfp->gf_thread = t; + gfp->gf_exc_fd = fd; + gfp->gf_exc_code = code; + + /* + * This thread was the first to attempt the + * operation that violated the guard on this fd; + * generate an exception. + */ + printf("%s: guarded fd exception: " + "fd %d code 0x%x guard 0x%llx\n", + proc_name_address(p), gfp->gf_exc_fd, + gfp->gf_exc_code, gfp->gf_guard); + + thread_guard_violation(t, GUARD_TYPE_FD); + } else { + /* + * We already recorded a violation on this fd for a + * different thread, so posting an exception is + * already in progress. We could pause for a bit + * and check again, or we could panic (though that seems + * heavy handed), or we could just press on with the + * error return alone. For now, resort to printf. + */ + printf("%s: guarded fd exception+: " + "fd %d code 0x%x guard 0x%llx\n", + proc_name_address(p), gfp->gf_exc_fd, + gfp->gf_exc_code, gfp->gf_guard); + } + + return (EPERM); +} + +/* + * (Invoked before returning to userland from the syscall handler.) + */ +void +fd_guard_ast(thread_t t) +{ + proc_t p = current_proc(); + struct filedesc *fdp = p->p_fd; + int i; + + proc_fdlock(p); + for (i = fdp->fd_lastfile; i >= 0; i--) { + struct fileproc *fp = fdp->fd_ofiles[i]; + + if (fp == NULL || + FILEPROC_TYPE(fp) != FTYPE_GUARDED) + continue; + + struct guarded_fileproc *gfp = FP_TO_GFP(fp); + + if (GUARDED_FILEPROC_MAGIC != gfp->gf_magic) + panic("%s: corrupt gfp %p flags %x", + __func__, gfp, fp->f_flags); + + if (gfp->gf_thread == t) { + mach_exception_data_type_t code, subcode; + + gfp->gf_thread = NULL; + + /* + * EXC_GUARD exception code namespace. + * + * code: + * +-------------------------------------------------+ + * | [63:61] guard type | [60:0] guard-specific data | + * +-------------------------------------------------+ + * + * subcode: + * +-------------------------------------------------+ + * | [63:0] guard-specific data | + * +-------------------------------------------------+ + * + * At the moment, we have just one guard type: file + * descriptor guards. + * + * File descriptor guards use the exception codes like + * so: + * + * code: + * +--------------------------------------------------+ + * |[63:61] GUARD_TYPE_FD | [60:32] flavor | [31:0] fd| + * +--------------------------------------------------+ + * + * subcode: + * +--------------------------------------------------+ + * | [63:0] guard value | + * +--------------------------------------------------+ + */ + code = (((uint64_t)GUARD_TYPE_FD) << 61) | + (((uint64_t)gfp->gf_exc_code) << 32) | + ((uint64_t)gfp->gf_exc_fd); + subcode = gfp->gf_guard; + proc_fdunlock(p); + + (void) task_exception_notify(EXC_GUARD, code, subcode); + psignal(p, SIGKILL); + + return; + } + } + proc_fdunlock(p); +} + +/* + * Experimental guarded file descriptor SPIs + */ + +/* + * int guarded_open_np(const char *pathname, int flags, + * const guardid_t *guard, u_int guardflags, ...); + * + * In this initial implementation, GUARD_DUP must be specified. + * GUARD_CLOSE, GUARD_SOCKET_IPC and GUARD_FILEPORT are optional. + * + * If GUARD_DUP wasn't specified, then we'd have to do the (extra) work + * to allow dup-ing a descriptor to inherit the guard onto the new + * descriptor. (Perhaps GUARD_DUP behaviours should just always be true + * for a guarded fd? Or, more sanely, all the dup operations should + * just always propagate the guard?) + * + * Guarded descriptors are always close-on-exec, and GUARD_CLOSE + * requires close-on-fork; O_CLOEXEC must be set in flags. + * This setting is immutable; attempts to clear the flag will + * cause a guard exception. + */ +int +guarded_open_np(proc_t p, struct guarded_open_np_args *uap, int32_t *retval) +{ + if ((uap->flags & O_CLOEXEC) == 0) + return (EINVAL); + +#define GUARD_REQUIRED (GUARD_DUP) +#define GUARD_ALL (GUARD_REQUIRED | \ + (GUARD_CLOSE | GUARD_SOCKET_IPC | GUARD_FILEPORT)) + + if (((uap->guardflags & GUARD_REQUIRED) != GUARD_REQUIRED) || + ((uap->guardflags & ~GUARD_ALL) != 0)) + return (EINVAL); + + int error; + struct gfp_crarg crarg = { + .gca_attrs = uap->guardflags + }; + + if ((error = copyin(uap->guard, + &(crarg.gca_guard), sizeof (crarg.gca_guard))) != 0) + return (error); + + /* + * Disallow certain guard values -- is zero enough? + */ + if (crarg.gca_guard == 0) + return (EINVAL); + + struct filedesc *fdp = p->p_fd; + struct vnode_attr va; + struct nameidata nd; + vfs_context_t ctx = vfs_context_current(); + int cmode; + + VATTR_INIT(&va); + cmode = ((uap->mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; + VATTR_SET(&va, va_mode, cmode & ACCESSPERMS); + + NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, + uap->path, ctx); + + return (open1(ctx, &nd, uap->flags | O_CLOFORK, &va, + guarded_fileproc_alloc_init, &crarg, retval)); +} + +/* + * int guarded_kqueue_np(const guardid_t *guard, u_int guardflags); + * + * Create a guarded kqueue descriptor with guardid and guardflags. + * + * Same restrictions on guardflags as for guarded_open_np(). + * All kqueues are -always- close-on-exec and close-on-fork by themselves. + * + * XXX Is it ever sensible to allow a kqueue fd (guarded or not) to + * be sent to another process via a fileport or socket? + */ +int +guarded_kqueue_np(proc_t p, struct guarded_kqueue_np_args *uap, int32_t *retval) +{ + if (((uap->guardflags & GUARD_REQUIRED) != GUARD_REQUIRED) || + ((uap->guardflags & ~GUARD_ALL) != 0)) + return (EINVAL); + + int error; + struct gfp_crarg crarg = { + .gca_attrs = uap->guardflags + }; + + if ((error = copyin(uap->guard, + &(crarg.gca_guard), sizeof (crarg.gca_guard))) != 0) + return (error); + + if (crarg.gca_guard == 0) + return (EINVAL); + + return (kqueue_body(p, guarded_fileproc_alloc_init, &crarg, retval)); +} + +/* + * int guarded_close_np(int fd, const guardid_t *guard); + */ +int +guarded_close_np(proc_t p, struct guarded_close_np_args *uap, + __unused int32_t *retval) +{ + struct guarded_fileproc *gfp; + int fd = uap->fd; + int error; + guardid_t uguard; + + AUDIT_SYSCLOSE(p, fd); + + if ((error = copyin(uap->guard, &uguard, sizeof (uguard))) != 0) + return (error); + + proc_fdlock(p); + if ((error = fp_lookup_guarded(p, fd, uguard, &gfp)) != 0) { + proc_fdunlock(p); + return (error); + } + error = close_internal_locked(p, fd, GFP_TO_FP(gfp), 0); + proc_fdunlock(p); + return (error); +} + +/* + * int + * change_fdguard_np(int fd, const guardid_t *guard, u_int guardflags, + * const guardid_t *nguard, u_int nguardflags, int *fdflagsp); + * + * Given a file descriptor, atomically exchange for + * a new guard , returning the previous fd + * flags (see fcntl:F_SETFD) in *fdflagsp. + * + * This syscall can be used to either (a) add a new guard to an existing + * unguarded file descriptor (b) remove the old guard from an existing + * guarded file descriptor or (c) change the guard (guardid and/or + * guardflags) on a guarded file descriptor. + * + * If 'guard' is NULL, fd must be unguarded at entry. If the call completes + * successfully the fd will be guarded with . + * + * Guarding a file descriptor has some side-effects on the "fdflags" + * associated with the descriptor - in particular FD_CLOEXEC is + * forced ON unconditionally, and FD_CLOFORK is forced ON by GUARD_CLOSE. + * Callers who wish to subsequently restore the state of the fd should save + * the value of *fdflagsp after a successful invocation. + * + * If 'nguard' is NULL, fd must be guarded at entry, + * must match with what's already guarding the descriptor, and the + * result will be to completely remove the guard. Note also that the + * fdflags are copied to the descriptor from the incoming *fdflagsp argument. + * + * If the descriptor is guarded, and neither 'guard' nor 'nguard' is NULL + * and matches what's already guarding the descriptor, + * then becomes the new guard. In this case, even if + * the GUARD_CLOSE flag is being cleared, it is still possible to continue + * to keep FD_CLOFORK on the descriptor by passing FD_CLOFORK via fdflagsp. + * + * Example 1: Guard an unguarded descriptor during a set of operations, + * then restore the original state of the descriptor. + * + * int sav_flags = 0; + * change_fdguard_np(fd, NULL, 0, &myguard, GUARD_CLOSE, &sav_flags); + * // do things with now guarded 'fd' + * change_fdguard_np(fd, &myguard, GUARD_CLOSE, NULL, 0, &sav_flags); + * // fd now unguarded. + * + * Example 2: Change the guard of a guarded descriptor during a set of + * operations, then restore the original state of the descriptor. + * + * int sav_flags = (gdflags & GUARD_CLOSE) ? FD_CLOFORK : 0; + * change_fdguard_np(fd, &gd, gdflags, &myguard, GUARD_CLOSE, &sav_flags); + * // do things with 'fd' with a different guard + * change_fdguard_np(fd, &myg, GUARD_CLOSE, &gd, gdflags, &sav_flags); + * // back to original guarded state + */ + +#define FDFLAGS_GET(p, fd) (*fdflags(p, fd) & (UF_EXCLOSE|UF_FORKCLOSE)) +#define FDFLAGS_SET(p, fd, bits) \ + (*fdflags(p, fd) |= ((bits) & (UF_EXCLOSE|UF_FORKCLOSE))) +#define FDFLAGS_CLR(p, fd, bits) \ + (*fdflags(p, fd) &= ~((bits) & (UF_EXCLOSE|UF_FORKCLOSE))) + +int +change_fdguard_np(proc_t p, struct change_fdguard_np_args *uap, + __unused int32_t *retval) +{ + struct fileproc *fp; + int fd = uap->fd; + int error; + guardid_t oldg = 0, newg = 0; + int nfdflags = 0; + + if (0 != uap->guard && + 0 != (error = copyin(uap->guard, &oldg, sizeof (oldg)))) + return (error); /* can't copyin current guard */ + + if (0 != uap->nguard && + 0 != (error = copyin(uap->nguard, &newg, sizeof (newg)))) + return (error); /* can't copyin new guard */ + + if (0 != uap->fdflagsp && + 0 != (error = copyin(uap->fdflagsp, &nfdflags, sizeof (nfdflags)))) + return (error); /* can't copyin new fdflags */ + + proc_fdlock(p); +restart: + if ((error = fp_lookup(p, fd, &fp, 1)) != 0) { + proc_fdunlock(p); + return (error); + } + + if (0 != uap->fdflagsp) { + int ofdflags = FDFLAGS_GET(p, fd); + int ofl = ((ofdflags & UF_EXCLOSE) ? FD_CLOEXEC : 0) | + ((ofdflags & UF_FORKCLOSE) ? FD_CLOFORK : 0); + proc_fdunlock(p); + if (0 != (error = copyout(&ofl, uap->fdflagsp, sizeof (ofl)))) { + proc_fdlock(p); + goto dropout; /* can't copyout old fdflags */ + } + proc_fdlock(p); + } + + if (FILEPROC_TYPE(fp) == FTYPE_GUARDED) { + if (0 == uap->guard || 0 == uap->guardflags) + error = EINVAL; /* missing guard! */ + else if (0 == oldg) + error = EPERM; /* guardids cannot be zero */ + } else { + if (0 != uap->guard || 0 != uap->guardflags) + error = EINVAL; /* guard provided, but none needed! */ + } + + if (0 != error) + goto dropout; + + if (0 != uap->nguard) { + /* + * There's a new guard in town. + */ + if (0 == newg) + error = EINVAL; /* guards cannot contain zero */ + else if (0 == uap->nguardflags) + error = EINVAL; /* attributes cannot be zero */ + else if (((uap->nguardflags & GUARD_REQUIRED) != GUARD_REQUIRED) || + ((uap->guardflags & ~GUARD_ALL) != 0)) + error = EINVAL; /* must have valid attributes too */ + + if (0 != error) + goto dropout; + + if (FILEPROC_TYPE(fp) == FTYPE_GUARDED) { + /* + * Replace old guard with new guard + */ + struct guarded_fileproc *gfp = FP_TO_GFP(fp); + + if (GUARDED_FILEPROC_MAGIC != gfp->gf_magic) + panic("%s: corrupt gfp %p flags %x", + __func__, gfp, fp->f_flags); + + if (oldg == gfp->gf_guard && + uap->guardflags == gfp->gf_attrs) { + /* + * Must match existing guard + attributes + * before we'll swap them to new ones, managing + * fdflags "side-effects" as we go. Note that + * userland can request FD_CLOFORK semantics. + */ + if (gfp->gf_attrs & GUARD_CLOSE) + FDFLAGS_CLR(p, fd, UF_FORKCLOSE); + gfp->gf_guard = newg; + gfp->gf_attrs = uap->nguardflags; + if (gfp->gf_attrs & GUARD_CLOSE) + FDFLAGS_SET(p, fd, UF_FORKCLOSE); + FDFLAGS_SET(p, fd, + (nfdflags & FD_CLOFORK) ? UF_FORKCLOSE : 0); + } else { + error = EPERM; + } + goto dropout; + } else { + /* + * Add a guard to a previously unguarded descriptor + */ + switch (FILEGLOB_DTYPE(fp->f_fglob)) { + case DTYPE_VNODE: + case DTYPE_PIPE: + case DTYPE_SOCKET: + case DTYPE_KQUEUE: + break; + default: + error = ENOTSUP; + goto dropout; + } + + proc_fdunlock(p); + + struct gfp_crarg crarg = { + .gca_guard = newg, + .gca_attrs = uap->nguardflags + }; + struct fileproc *nfp = + guarded_fileproc_alloc_init(&crarg); + + proc_fdlock(p); + + switch (error = fp_tryswap(p, fd, nfp)) { + struct guarded_fileproc *gfp; + + case 0: /* guarded-ness comes with side-effects */ + gfp = FP_TO_GFP(nfp); + if (gfp->gf_attrs & GUARD_CLOSE) + FDFLAGS_SET(p, fd, UF_FORKCLOSE); + FDFLAGS_SET(p, fd, UF_EXCLOSE); + (void) fp_drop(p, fd, nfp, 1); + fileproc_free(fp); + break; + case EKEEPLOOKING: /* f_iocount indicates a collision */ + (void) fp_drop(p, fd, fp, 1); + fileproc_free(nfp); + goto restart; + default: + (void) fp_drop(p, fd, fp, 1); + fileproc_free(nfp); + break; + } + proc_fdunlock(p); + return (error); + } + } else { + /* + * No new guard. + */ + if (FILEPROC_TYPE(fp) == FTYPE_GUARDED) { + /* + * Remove the guard altogether. + */ + struct guarded_fileproc *gfp = FP_TO_GFP(fp); + + if (0 != uap->nguardflags) { + error = EINVAL; + goto dropout; + } + + if (GUARDED_FILEPROC_MAGIC != gfp->gf_magic) + panic("%s: corrupt gfp %p flags %x", + __func__, gfp, fp->f_flags); + + if (oldg != gfp->gf_guard || + uap->guardflags != gfp->gf_attrs) { + error = EPERM; + goto dropout; + } + + proc_fdunlock(p); + struct fileproc *nfp = fileproc_alloc_init(NULL); + proc_fdlock(p); + + switch (error = fp_tryswap(p, fd, nfp)) { + case 0: /* undo side-effects of guarded-ness */ + FDFLAGS_CLR(p, fd, UF_FORKCLOSE | UF_EXCLOSE); + FDFLAGS_SET(p, fd, + (nfdflags & FD_CLOFORK) ? UF_FORKCLOSE : 0); + FDFLAGS_SET(p, fd, + (nfdflags & FD_CLOEXEC) ? UF_EXCLOSE : 0); + (void) fp_drop(p, fd, nfp, 1); + fileproc_free(fp); + break; + case EKEEPLOOKING: /* f_iocount indicates collision */ + (void) fp_drop(p, fd, fp, 1); + fileproc_free(nfp); + goto restart; + default: + (void) fp_drop(p, fd, fp, 1); + fileproc_free(nfp); + break; + } + proc_fdunlock(p); + return (error); + } else { + /* + * Not already guarded, and no new guard? + */ + error = EINVAL; + } + } + +dropout: + (void) fp_drop(p, fd, fp, 1); + proc_fdunlock(p); + return (error); +} + diff --git a/bsd/kern/kern_kpc.c b/bsd/kern/kern_kpc.c new file mode 100644 index 000000000..321fa1b5a --- /dev/null +++ b/bsd/kern/kern_kpc.c @@ -0,0 +1,533 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +/* Various sysctl requests */ +#define REQ_CLASSES (1) +#define REQ_COUNTING (2) +#define REQ_THREAD_COUNTING (3) +#define REQ_CONFIG_COUNT (4) +#define REQ_COUNTER_COUNT (5) +#define REQ_THREAD_COUNTERS (6) +#define REQ_COUNTERS (7) +#define REQ_SHADOW_COUNTERS (8) +#define REQ_CONFIG (9) +#define REQ_PERIOD (10) +#define REQ_ACTIONID (11) +#define REQ_FORCE_ALL_CTRS (12) +#define REQ_DISABLE_WHITELIST (13) + +/* Type-munging casts */ +typedef int (*getint_t)(void); +typedef int (*setint_t)(int); + +/* safety */ +static int kpc_initted = 0; + +/* locking and buffer for large data requests */ +static lck_grp_attr_t *sysctl_buffer_lckgrp_attr = NULL; +static lck_grp_t *sysctl_buffer_lckgrp = NULL; +static lck_mtx_t sysctl_buffer_lock; +static void *sysctl_buffer = NULL; + +typedef int (*setget_func_t)(int); + +/* init our stuff */ +extern void kpc_thread_init(void); /* osfmk/kern/kpc_thread.c */ +extern void kpc_arch_init(void); + +void +kpc_init(void) +{ + sysctl_buffer_lckgrp_attr = lck_grp_attr_alloc_init(); + sysctl_buffer_lckgrp = lck_grp_alloc_init("kpc", + sysctl_buffer_lckgrp_attr); + lck_mtx_init(&sysctl_buffer_lock, sysctl_buffer_lckgrp, LCK_ATTR_NULL); + + kpc_arch_init(); + kpc_thread_init(); + + kpc_initted = 1; +} + +/* abstract sysctl handlers */ +static int +sysctl_get_int( struct sysctl_oid *oidp, struct sysctl_req *req, + uint32_t value ) +{ + int error = 0; + + /* copy out the old value */ + error = sysctl_handle_int(oidp, &value, 0, req); + + return error; +} + +static int +sysctl_getset_int( struct sysctl_oid *oidp, struct sysctl_req *req, + int (*get_func)(void), int (*set_func)(int) ) +{ + int error = 0; + uint32_t value = 0; + + /* get the old value and process it */ + value = get_func(); + + /* copy out the old value, get the new value */ + error = sysctl_handle_int(oidp, &value, 0, req); + if (error || !req->newptr) + return (error); + + /* if that worked, and we're writing... */ + error = set_func( value ); + + return error; +} + +static int +sysctl_setget_int( struct sysctl_req *req, + int (*setget_func)(int) ) +{ + int error = 0; + int value = 0; + + error = SYSCTL_IN( req, &value, sizeof(value) ); + if( error ) + return error; + + value = setget_func(value); + + error = SYSCTL_OUT( req, &value, sizeof(value) ); + + return error; +} + +static int +kpc_sysctl_acquire_buffer(void) +{ + if( sysctl_buffer == NULL ) + sysctl_buffer = kpc_counterbuf_alloc(); + + if( !sysctl_buffer ) + { + return ENOMEM; + } + + return 0; +} + +static int +sysctl_kpc_get_counters(uint32_t counters, + uint32_t *size, void *buf) +{ + uint64_t *ctr_buf = (uint64_t*)buf; + int curcpu; + uint32_t count; + + count = kpc_get_cpu_counters(counters & KPC_ALL_CPUS, + counters, + &curcpu, &ctr_buf[1]); + if (!count) + return EINVAL; + + ctr_buf[0] = curcpu; + + *size = (count+1) * sizeof(uint64_t); + + return 0; +} + +static int +sysctl_kpc_get_shadow_counters(uint32_t counters, + uint32_t *size, void *buf) +{ + uint64_t *ctr_buf = (uint64_t*)buf; + int curcpu; + uint32_t count; + + count = kpc_get_shadow_counters(counters & KPC_ALL_CPUS, + counters, + &curcpu, &ctr_buf[1]); + + if (!count) + return EINVAL; + + ctr_buf[0] = curcpu; + + *size = (count+1) * sizeof(uint64_t); + + return 0; +} + +static int +sysctl_kpc_get_thread_counters(uint32_t tid, + uint32_t *size, void *buf) +{ + uint32_t count = *size / sizeof(uint64_t); + int r; + + if( tid != 0 ) + return EINVAL; + + r = kpc_get_curthread_counters(&count, buf); + if( !r ) + *size = count * sizeof(uint64_t); + + return r; +} + +static int +sysctl_kpc_get_config(uint32_t classes, void* buf) +{ + return kpc_get_config( classes, buf ); +} + +static int +sysctl_kpc_set_config(uint32_t classes, void* buf) +{ + return kpc_set_config( classes, buf); +} + +static int +sysctl_kpc_get_period(uint32_t classes, void* buf) +{ + return kpc_get_period( classes, buf ); +} + +static int +sysctl_kpc_set_period(uint32_t classes, void* buf) +{ + return kpc_set_period( classes, buf); +} + +static int +sysctl_kpc_get_actionid(uint32_t classes, void* buf) +{ + return kpc_get_actionid( classes, buf ); +} + +static int +sysctl_kpc_set_actionid(uint32_t classes, void* buf) +{ + return kpc_set_actionid( classes, buf); +} + + +static int +sysctl_get_bigarray( struct sysctl_req *req, + int (*get_fn)(uint32_t, uint32_t*, void*) ) +{ + int error = 0; + uint32_t bufsize = KPC_MAX_COUNTERS * sizeof(uint64_t); /* XXX? */ + uint32_t arg = 0; + + /* get the argument */ + error = SYSCTL_IN( req, &arg, sizeof(arg) ); + if(error) + { + printf( "kpc: no arg?\n" ); + return error; + } + + /* get the wired buffer */ + error = kpc_sysctl_acquire_buffer(); + if (error) + return error; + + /* atomically get the array into the wired buffer. We have a double + * copy, but this is better than page faulting / interrupting during + * a copy. + */ + error = get_fn( arg, &bufsize, sysctl_buffer ); + + /* do the copy out */ + if( !error ) + error = SYSCTL_OUT( req, sysctl_buffer, bufsize ); + + return error; +} + +/* given a config word, how many bytes does it take? */ +static int +sysctl_config_size( uint32_t config ) +{ + return kpc_get_config_count(config) * sizeof(kpc_config_t); +} + +static int +sysctl_counter_size( uint32_t classes ) +{ + return kpc_get_counter_count(classes) * sizeof(uint64_t); +} + +static int +sysctl_actionid_size( uint32_t classes ) +{ + return kpc_get_counter_count(classes) * sizeof(int32_t); +} + +static int +sysctl_getset_bigarray( struct sysctl_req *req, + int (*size_fn)(uint32_t arg), + int (*get_fn)(uint32_t, void*), + int (*set_fn)(uint32_t, void*) ) +{ + int error = 0; + uint32_t bufsize = KPC_MAX_COUNTERS * sizeof(uint64_t); /* XXX? */ + uint32_t regsize = 0; + uint64_t arg; + + /* get the config word */ + error = SYSCTL_IN( req, &arg, sizeof(arg) ); + if(error) + { + printf( "kpc: no arg?\n" ); + return error; + } + + /* Work out size of registers */ + regsize = size_fn((uint32_t)arg); + + /* Ignore NULL requests */ + if(regsize == 0) + return EINVAL; + + /* ensure not too big */ + if( regsize > bufsize ) + return EINVAL; + + /* get the wired buffer */ + error = kpc_sysctl_acquire_buffer(); + if (error) + return error; + + // if writing... + if(req->newptr) + { + // copy in the rest in -- sysctl remembers we did one already + error = SYSCTL_IN( req, sysctl_buffer, + regsize ); + + // if SYSCTL_IN fails it means we are only doing a read + if(!error) { + // set it + error = set_fn( (uint32_t)arg, sysctl_buffer ); + if( error ) + goto fail; + } + } + + // if reading + if(req->oldptr) + { + // read it + error = get_fn( (uint32_t)arg, sysctl_buffer ); + if( error ) + goto fail; + + // copy out the full set + error = SYSCTL_OUT( req, sysctl_buffer, regsize ); + } + +fail: + return error; +} + + + +/* + * #define SYSCTL_HANDLER_ARGS (struct sysctl_oid *oidp, \ + * void *arg1, int arg2, \ + * struct sysctl_req *req ) + */ +static int +kpc_sysctl SYSCTL_HANDLER_ARGS +{ + int ret; + + // __unused struct sysctl_oid *unused_oidp = oidp; + (void)arg2; + + if( !kpc_initted ) + panic("kpc_init not called"); + + lck_mtx_lock(&sysctl_buffer_lock); + + /* which request */ + switch( (uintptr_t) arg1 ) + { + case REQ_CLASSES: + ret = sysctl_get_int( oidp, req, + kpc_get_classes() ); + break; + case REQ_COUNTING: + ret = sysctl_getset_int( oidp, req, + (getint_t)kpc_get_running, + (setint_t)kpc_set_running ); + break; + case REQ_THREAD_COUNTING: + ret = sysctl_getset_int( oidp, req, + (getint_t)kpc_get_thread_counting, + (setint_t)kpc_set_thread_counting ); + break; + + case REQ_CONFIG_COUNT: + ret = sysctl_setget_int( req, + (setget_func_t)kpc_get_config_count ); + break; + + case REQ_COUNTER_COUNT: + ret = sysctl_setget_int( req, + (setget_func_t)kpc_get_counter_count ); + break; + + + case REQ_THREAD_COUNTERS: + ret = sysctl_get_bigarray( req, sysctl_kpc_get_thread_counters ); + break; + + case REQ_COUNTERS: + ret = sysctl_get_bigarray( req, sysctl_kpc_get_counters ); + break; + + case REQ_SHADOW_COUNTERS: + ret = sysctl_get_bigarray( req, sysctl_kpc_get_shadow_counters ); + break; + + case REQ_CONFIG: + ret = sysctl_getset_bigarray( req, + sysctl_config_size, + sysctl_kpc_get_config, + sysctl_kpc_set_config ); + break; + + case REQ_PERIOD: + ret = sysctl_getset_bigarray( req, + sysctl_counter_size, + sysctl_kpc_get_period, + sysctl_kpc_set_period ); + break; + + case REQ_ACTIONID: + ret = sysctl_getset_bigarray( req, + sysctl_actionid_size, + sysctl_kpc_get_actionid, + sysctl_kpc_set_actionid ); + break; + + default: + ret = ENOENT; + break; + } + + lck_mtx_unlock(&sysctl_buffer_lock); + + return ret; +} + + +/*** sysctl definitions ***/ + +/* root kperf node */ +SYSCTL_NODE(, OID_AUTO, kpc, CTLFLAG_RW|CTLFLAG_LOCKED, 0, + "kpc"); + +/* values */ +SYSCTL_PROC(_kpc, OID_AUTO, classes, + CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_ANYBODY, + (void*)REQ_CLASSES, + sizeof(int), kpc_sysctl, "I", "Available classes"); + +SYSCTL_PROC(_kpc, OID_AUTO, counting, + CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, + (void*)REQ_COUNTING, + sizeof(int), kpc_sysctl, "I", "PMCs counting"); + +SYSCTL_PROC(_kpc, OID_AUTO, thread_counting, + CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, + (void*)REQ_THREAD_COUNTING, + sizeof(int), kpc_sysctl, "I", "Thread accumulation"); + +/* faux values */ +SYSCTL_PROC(_kpc, OID_AUTO, config_count, + CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, + (void*)REQ_CONFIG_COUNT, + sizeof(int), kpc_sysctl, "S", "Config count"); + +SYSCTL_PROC(_kpc, OID_AUTO, counter_count, + CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, + (void*)REQ_COUNTER_COUNT, + sizeof(int), kpc_sysctl, "S", "Counter count"); + +/* arrays */ +SYSCTL_PROC(_kpc, OID_AUTO, thread_counters, + CTLFLAG_RD|CTLFLAG_WR|CTLFLAG_ANYBODY, + (void*)REQ_THREAD_COUNTERS, + sizeof(uint64_t), kpc_sysctl, + "QU", "Current thread counters"); + +SYSCTL_PROC(_kpc, OID_AUTO, counters, + CTLFLAG_RD|CTLFLAG_WR|CTLFLAG_ANYBODY, + (void*)REQ_COUNTERS, + sizeof(uint64_t), kpc_sysctl, + "QU", "Current counters"); + +SYSCTL_PROC(_kpc, OID_AUTO, shadow_counters, + CTLFLAG_RD|CTLFLAG_WR|CTLFLAG_ANYBODY, + (void*)REQ_SHADOW_COUNTERS, + sizeof(uint64_t), kpc_sysctl, + "QU", "Current shadow counters"); + +SYSCTL_PROC(_kpc, OID_AUTO, config, + CTLFLAG_RD|CTLFLAG_WR|CTLFLAG_ANYBODY, + (void*)REQ_CONFIG, + sizeof(uint64_t), kpc_sysctl, + "QU", "Set counter configs"); + +SYSCTL_PROC(_kpc, OID_AUTO, period, + CTLFLAG_RD|CTLFLAG_WR|CTLFLAG_ANYBODY, + (void*)REQ_PERIOD, + sizeof(uint64_t), kpc_sysctl, + "QU", "Set counter periods"); + +SYSCTL_PROC(_kpc, OID_AUTO, actionid, + CTLFLAG_RD|CTLFLAG_WR|CTLFLAG_ANYBODY, + (void*)REQ_ACTIONID, + sizeof(uint32_t), kpc_sysctl, + "QU", "Set counter actionids"); diff --git a/bsd/kern/kern_lockf.c b/bsd/kern/kern_lockf.c index 963493a11..2b5f0906a 100644 --- a/bsd/kern/kern_lockf.c +++ b/bsd/kern/kern_lockf.c @@ -75,6 +75,8 @@ #include #include #include +#include +#include /* * This variable controls the maximum number of processes that will @@ -129,9 +131,14 @@ static overlap_t lf_findoverlap(struct lockf *, struct lockf *, int, struct lockf ***, struct lockf **); static struct lockf *lf_getblock(struct lockf *, pid_t); static int lf_getlock(struct lockf *, struct flock *, pid_t); -static int lf_setlock(struct lockf *); +static int lf_setlock(struct lockf *, struct timespec *); static int lf_split(struct lockf *, struct lockf *); static void lf_wakelock(struct lockf *, boolean_t); +#if IMPORTANCE_INHERITANCE +static void lf_hold_assertion(task_t, struct lockf *); +static void lf_jump_to_queue_head(struct lockf *, struct lockf *); +static void lf_drop_assertion(struct lockf *); +#endif /* IMPORTANCE_INHERITANCE */ /* * lf_advlock @@ -150,6 +157,7 @@ static void lf_wakelock(struct lockf *, boolean_t); * lf_setlock:EDEADLK * lf_setlock:EINTR * lf_setlock:ENOLCK + * lf_setlock:ETIMEDOUT * lf_clearlock:ENOLCK * vnode_size:??? * @@ -260,6 +268,9 @@ lf_advlock(struct vnop_advlock_args *ap) lock->lf_next = (struct lockf *)0; TAILQ_INIT(&lock->lf_blkhd); lock->lf_flags = ap->a_flags; +#if IMPORTANCE_INHERITANCE + lock->lf_boosted = LF_NOT_BOOSTED; +#endif /* IMPORTANCE_INHERITANCE */ if (ap->a_flags & F_FLOCK) lock->lf_flags |= F_WAKE1_SAFE; @@ -270,7 +281,7 @@ lf_advlock(struct vnop_advlock_args *ap) */ switch(ap->a_op) { case F_SETLK: - error = lf_setlock(lock); + error = lf_setlock(lock, ap->a_timeout); break; case F_UNLCK: @@ -283,12 +294,6 @@ lf_advlock(struct vnop_advlock_args *ap) FREE(lock, M_LOCKF); break; -#if CONFIG_EMBEDDED - case F_GETLKPID: - error = lf_getlock(lock, fl, fl->l_pid); - FREE(lock, M_LOCKF); - break; -#endif default: FREE(lock, M_LOCKF); @@ -427,19 +432,23 @@ lf_coalesce_adjacent(struct lockf *lock) * the set is successful, and freed if the * set is unsuccessful. * + * timeout Timeout specified in the case of + * SETLKWTIMEOUT. + * * Returns: 0 Success * EAGAIN * EDEADLK * lf_split:ENOLCK * lf_clearlock:ENOLCK * msleep:EINTR + * msleep:ETIMEDOUT * * Notes: We add the lock to the provisional lock list. We do not * coalesce at this time; this has implications for other lock * requestors in the blocker search mechanism. */ static int -lf_setlock(struct lockf *lock) +lf_setlock(struct lockf *lock, struct timespec *timeout) { struct lockf *block; struct lockf **head = lock->lf_head; @@ -448,6 +457,9 @@ lf_setlock(struct lockf *lock) int priority, needtolink, error; struct vnode *vp = lock->lf_vnode; overlap_t ovcase; +#if IMPORTANCE_INHERITANCE + task_t boosting_task, block_task; +#endif /* IMPORTANCE_INHERITANCE */ #ifdef LOCKF_DEBUGGING if (lockf_debug & 1) { @@ -471,6 +483,7 @@ lf_setlock(struct lockf *lock) * Free the structure and return if nonblocking. */ if ((lock->lf_flags & F_WAIT) == 0) { + DTRACE_FSINFO(advlock__nowait, vnode_t, vp); FREE(lock, M_LOCKF); return (EAGAIN); } @@ -577,7 +590,36 @@ lf_setlock(struct lockf *lock) lf_printlist("lf_setlock(block)", block); } #endif /* LOCKF_DEBUGGING */ - error = msleep(lock, &vp->v_lock, priority, lockstr, 0); + DTRACE_FSINFO(advlock__wait, vnode_t, vp); +#if IMPORTANCE_INHERITANCE + /* + * Posix type of locks are not inherited by child processes and + * it maintains one to one mapping between lock and its owner, while + * Flock type of locks are inherited across forks and it does not + * maintian any one to one mapping between the lock and the lock + * owner. Thus importance donation is done only for Posix type of + * locks. + */ + if ((lock->lf_flags & F_POSIX) && (block->lf_flags & F_POSIX)) { + block_task = proc_task((proc_t) block->lf_id); + boosting_task = proc_task((proc_t) lock->lf_id); + + /* Check if current task can donate importance. The + * check of imp_donor bit is done without holding + * task lock. The value may change after you read it, + * but it is ok to boost a task while someone else is + * unboosting you. + */ + if (task_is_importance_donor(boosting_task)) { + if (block->lf_boosted != LF_BOOSTED && + task_is_importance_receiver(block_task)) { + lf_hold_assertion(block_task, block); + } + lf_jump_to_queue_head(block, lock); + } + } +#endif /* IMPORTANCE_INHERITANCE */ + error = msleep(lock, &vp->v_lock, priority, lockstr, timeout); if (error == 0 && (lock->lf_flags & F_ABORT) != 0) error = EBADF; @@ -613,6 +655,10 @@ lf_setlock(struct lockf *lock) if (!TAILQ_EMPTY(&lock->lf_blkhd)) lf_wakelock(lock, TRUE); FREE(lock, M_LOCKF); + /* Return ETIMEDOUT if timeout occoured. */ + if (error == EWOULDBLOCK) { + error = ETIMEDOUT; + } return (error); } } @@ -796,6 +842,11 @@ lf_clearlock(struct lockf *unlock) * Wakeup the list of locks to be retried. */ lf_wakelock(overlap, FALSE); +#if IMPORTANCE_INHERITANCE + if (overlap->lf_boosted == LF_BOOSTED) { + lf_drop_assertion(overlap); + } +#endif /* IMPORTANCE_INHERITANCE */ switch (ovcase) { case OVERLAP_NONE: /* satisfy compiler enum/switch */ @@ -1308,3 +1359,74 @@ lf_printlist(const char *tag, struct lockf *lock) } } #endif /* LOCKF_DEBUGGING */ + +#if IMPORTANCE_INHERITANCE + +/* + * lf_hold_assertion + * + * Call task importance hold assertion on the owner of the lock. + * + * Parameters: block_task Owner of the lock blocking + * current thread. + * + * block lock on which the current thread + * is blocking on. + * + * Returns: + * + * Notes: The task reference on block_task is not needed to be hold since + * the current thread has vnode lock and block_task has a file + * lock, thus removing file lock in exit requires block_task to + * grab the vnode lock. + */ +static void +lf_hold_assertion(task_t block_task, struct lockf *block) +{ + task_importance_hold_internal_assertion(block_task, 1); + block->lf_boosted = LF_BOOSTED; +} + + +/* + * lf_jump_to_queue_head + * + * Jump the lock from the tail of the block queue to the head of + * the queue. + * + * Parameters: block lockf struct containing the + * block queue. + * lock lockf struct to be jumped to the + * front. + * + * Returns: + */ +static void +lf_jump_to_queue_head(struct lockf *block, struct lockf *lock) +{ + /* Move the lock to the head of the block queue. */ + TAILQ_REMOVE(&block->lf_blkhd, lock, lf_block); + TAILQ_INSERT_HEAD(&block->lf_blkhd, lock, lf_block); +} + + +/* + * lf_drop_assertion + * + * Drops the task hold assertion. + * + * Parameters: block lockf struct holding the assertion. + * + * Returns: + */ +static void +lf_drop_assertion(struct lockf *block) +{ + task_t current_task; + + current_task = proc_task((proc_t) block->lf_id); + task_importance_drop_internal_assertion(current_task, 1); + block->lf_boosted = LF_NOT_BOOSTED; +} + +#endif /* IMPORTANCE_INHERITANCE */ diff --git a/bsd/kern/kern_malloc.c b/bsd/kern/kern_malloc.c index ee021079a..3f689cf35 100644 --- a/bsd/kern/kern_malloc.c +++ b/bsd/kern/kern_malloc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -79,6 +79,7 @@ #include #include #include +#include #include #include @@ -112,7 +113,7 @@ #include #include -void kmeminit(void) __attribute__((section("__TEXT, initcode"))); +void kmeminit(void); /* Strings corresponding to types of memory. * Must be in synch with the #defines is sys/malloc.h @@ -162,7 +163,7 @@ const char *memname[] = { #else "", /* 27 M_DQUOT */ #endif - "", /* 28 M_UFSMNT */ + "proc uuid policy", /* 28 M_PROC_UUID_POLICY */ #if (SYSV_SEM || SYSV_MSG || SYSV_SHM) "shm", /* 29 M_SHM */ #else @@ -234,8 +235,8 @@ const char *memname[] = { "", /* 76 M_HFSNODE */ "", /* 77 M_HFSFORK */ #endif - "ZFS mount", /* 78 M_ZFSFSMNT */ - "ZFS node", /* 79 M_ZFSNODE */ + "", /* 78 unused */ + "", /* 79 unused */ "temp", /* 80 M_TEMP */ "key mgmt", /* 81 M_SECA */ "DEVFS", /* 82 M_DEVFS */ @@ -273,11 +274,7 @@ const char *memname[] = { "fileglob", /* 99 M_FILEGLOB */ "kauth", /* 100 M_KAUTH */ "dummynet", /* 101 M_DUMMYNET */ -#if CONFIG_VFS_FUNNEL - "unsafe_fsnode", /* 102 M_UNSAFEFS */ -#else "", /* 102 M_UNSAFEFS */ -#endif /* CONFIG_VFS_FUNNEL */ "macpipelabel", /* 103 M_MACPIPELABEL */ "mactemp", /* 104 M_MACTEMP */ "sbuf", /* 105 M_SBUF */ @@ -298,6 +295,14 @@ const char *memname[] = { "in6mfilter", /* 112 M_IN6MFILTER */ "ip6mopts", /* 113 M_IP6MOPTS */ "ip6msource", /* 114 M_IP6MSOURCE */ +#if FLOW_DIVERT + "flow_divert_pcb", /* 115 M_FLOW_DIVERT_PCB */ + "flow_divert_group", /* 116 M_FLOW_DIVERT_GROUP */ +#else + "", /* 115 M_FLOW_DIVERT_PCB */ + "", /* 116 M_FLOW_DIVERT_GROUP */ +#endif + "ip6cga", /* 117 M_IP6CGA */ }; /* for use with kmzones.kz_zalloczone */ @@ -352,7 +357,7 @@ struct kmzones { #else { 0, KMZ_MALLOC, FALSE }, /* 27 M_DQUOT */ #endif - { 0, KMZ_MALLOC, FALSE }, /* 28 M_UFSMNT */ + { 0, KMZ_MALLOC, FALSE }, /* 28 M_PROC_UUID_POLICY */ { 0, KMZ_MALLOC, FALSE }, /* 29 M_SHM */ { SOS(plimit), KMZ_CREATEZONE, TRUE }, /* 30 M_PLIMIT */ { SOS(sigacts), KMZ_CREATEZONE_ACCT, TRUE }, /* 31 M_SIGACTS */ @@ -426,8 +431,8 @@ struct kmzones { { 0, KMZ_MALLOC, FALSE }, /* 76 M_HFSNODE */ { 0, KMZ_MALLOC, FALSE }, /* 77 M_HFSFORK */ #endif - { 0, KMZ_MALLOC, FALSE }, /* 78 M_ZFSMNT */ - { 0, KMZ_MALLOC, FALSE }, /* 79 M_ZFSNODE */ + { 0, KMZ_MALLOC, FALSE }, /* 78 unused */ + { 0, KMZ_MALLOC, FALSE }, /* 79 unused */ { 0, KMZ_MALLOC, FALSE }, /* 80 M_TEMP */ { 0, KMZ_MALLOC, FALSE }, /* 81 M_SECA */ { 0, KMZ_MALLOC, FALSE }, /* 82 M_DEVFS */ @@ -459,11 +464,7 @@ struct kmzones { { SOS(fileglob), KMZ_CREATEZONE, TRUE }, /* 99 M_FILEGLOB */ { 0, KMZ_MALLOC, FALSE }, /* 100 M_KAUTH */ { 0, KMZ_MALLOC, FALSE }, /* 101 M_DUMMYNET */ -#if CONFIG_VFS_FUNNEL - { SOS(unsafe_fsnode),KMZ_CREATEZONE, TRUE }, /* 102 M_UNSAFEFS */ -#else { 0, KMZ_MALLOC, FALSE }, /* 102 M_UNSAFEFS */ -#endif /* CONFIG_VFS_FUNNEL */ { 0, KMZ_MALLOC, FALSE }, /* 103 M_MACPIPELABEL */ { 0, KMZ_MALLOC, FALSE }, /* 104 M_MACTEMP */ { 0, KMZ_MALLOC, FALSE }, /* 105 M_SBUF */ @@ -480,6 +481,14 @@ struct kmzones { { 0, KMZ_MALLOC, FALSE }, /* 112 M_IN6MFILTER */ { 0, KMZ_MALLOC, FALSE }, /* 113 M_IP6MOPTS */ { 0, KMZ_MALLOC, FALSE }, /* 114 M_IP6MSOURCE */ +#if FLOW_DIVERT + { SOS(flow_divert_pcb), KMZ_CREATEZONE, TRUE }, /* 115 M_FLOW_DIVERT_PCB */ + { SOS(flow_divert_group), KMZ_CREATEZONE, TRUE }, /* 116 M_FLOW_DIVERT_GROUP */ +#else + { 0, KMZ_MALLOC, FALSE }, /* 115 M_FLOW_DIVERT_PCB */ + { 0, KMZ_MALLOC, FALSE }, /* 116 M_FLOW_DIVERT_GROUP */ +#endif /* FLOW_DIVERT */ + { 0, KMZ_MALLOC, FALSE }, /* 117 M_IP6CGA */ #undef SOS #undef SOX }; @@ -552,7 +561,7 @@ _MALLOC( int type, int flags) { - struct _mhead *hdr; + struct _mhead *hdr = NULL; size_t memsize = sizeof (*hdr) + size; if (type >= M_LAST) diff --git a/bsd/kern/kern_memorystatus.c b/bsd/kern/kern_memorystatus.c index 1bf8dd616..852037af4 100644 --- a/bsd/kern/kern_memorystatus.c +++ b/bsd/kern/kern_memorystatus.c @@ -38,10 +38,12 @@ #include #include #include -#include #include +#include +#include #include #include +#include #include #include #include @@ -49,12 +51,12 @@ #include #include #include -#include +#include +#include #if CONFIG_FREEZE -#include #include -#endif +#endif /* CONFIG_FREEZE */ #include @@ -70,39 +72,108 @@ do { \ #define MEMORYSTATUS_DEBUG(cond, format, ...) #endif +/* General tunables */ + +unsigned long delta_percentage = 5; +unsigned long critical_threshold_percentage = 5; +unsigned long idle_offset_percentage = 5; +unsigned long pressure_threshold_percentage = 15; +unsigned long freeze_threshold_percentage = 50; + /* General memorystatus stuff */ -static void memorystatus_add_node(memorystatus_node *node); -static void memorystatus_remove_node(memorystatus_node *node); -static memorystatus_node *memorystatus_get_node(pid_t pid); -static void memorystatus_release_node(memorystatus_node *node); +struct klist memorystatus_klist; +static lck_mtx_t memorystatus_klist_mutex; -int memorystatus_wakeup = 0; +static void memorystatus_klist_lock(void); +static void memorystatus_klist_unlock(void); -static void memorystatus_thread(void *param __unused, wait_result_t wr __unused); +static uint64_t memorystatus_idle_delay_time = 0; + +/* + * Memorystatus kevents + */ + +static int filt_memorystatusattach(struct knote *kn); +static void filt_memorystatusdetach(struct knote *kn); +static int filt_memorystatus(struct knote *kn, long hint); + +struct filterops memorystatus_filtops = { + .f_attach = filt_memorystatusattach, + .f_detach = filt_memorystatusdetach, + .f_event = filt_memorystatus, +}; + +enum { + kMemorystatusNoPressure = 1, + kMemorystatusPressure = 2 +}; + +/* Idle guard handling */ + +static int32_t memorystatus_scheduled_idle_demotions = 0; + +static thread_call_t memorystatus_idle_demotion_call; + +static void memorystatus_perform_idle_demotion(__unused void *spare1, __unused void *spare2); +static void memorystatus_schedule_idle_demotion_locked(proc_t p, boolean_t set_state); +static void memorystatus_invalidate_idle_demotion_locked(proc_t p, boolean_t clean_state); +static void memorystatus_reschedule_idle_demotion_locked(void); -static memorystatus_node *next_memorystatus_node = NULL; +static void memorystatus_update_priority_locked(proc_t p, int priority); + +int memorystatus_wakeup = 0; + +unsigned int memorystatus_level = 0; static int memorystatus_list_count = 0; -static lck_mtx_t * memorystatus_list_mlock; -static lck_attr_t * memorystatus_lck_attr; -static lck_grp_t * memorystatus_lck_grp; -static lck_grp_attr_t * memorystatus_lck_grp_attr; +#define MEMSTAT_BUCKET_COUNT (JETSAM_PRIORITY_MAX + 1) -static TAILQ_HEAD(memorystatus_list_head, memorystatus_node) memorystatus_list; +typedef struct memstat_bucket { + TAILQ_HEAD(, proc) list; + int count; +} memstat_bucket_t; -static uint64_t memorystatus_idle_delay_time = 0; +memstat_bucket_t memstat_bucket[MEMSTAT_BUCKET_COUNT]; + +uint64_t memstat_idle_demotion_deadline = 0; static unsigned int memorystatus_dirty_count = 0; -extern void proc_dirty_start(struct proc *p); -extern void proc_dirty_end(struct proc *p); +#if !CONFIG_JETSAM +static boolean_t kill_idle_exit = FALSE; +#endif + + +int +memorystatus_get_level(__unused struct proc *p, struct memorystatus_get_level_args *args, __unused int *ret) +{ + user_addr_t level = 0; + + level = args->level; + + if (copyout(&memorystatus_level, level, sizeof(memorystatus_level)) != 0) { + return EFAULT; + } + + return 0; +} + +static proc_t memorystatus_get_first_proc_locked(unsigned int *bucket_index, boolean_t search); +static proc_t memorystatus_get_next_proc_locked(unsigned int *bucket_index, proc_t p, boolean_t search); + +static void memorystatus_thread(void *param __unused, wait_result_t wr __unused); /* Jetsam */ #if CONFIG_JETSAM +/* Kill processes exceeding their limit either under memory pressure (1), or as soon as possible (0) */ +#define LEGACY_HIWATER 1 + +static int memorystatus_highwater_enabled = 1; + extern unsigned int vm_page_free_count; extern unsigned int vm_page_active_count; extern unsigned int vm_page_inactive_count; @@ -110,82 +181,66 @@ extern unsigned int vm_page_throttled_count; extern unsigned int vm_page_purgeable_count; extern unsigned int vm_page_wire_count; -static lck_mtx_t * exit_list_mlock; - -static TAILQ_HEAD(exit_list_head, memorystatus_node) exit_list; - -static unsigned int memorystatus_kev_failure_count = 0; - -/* Counted in pages... */ unsigned int memorystatus_delta = 0; -unsigned int memorystatus_available_pages = (unsigned int)-1; -unsigned int memorystatus_available_pages_critical = 0; -unsigned int memorystatus_available_pages_highwater = 0; - -/* ...with the exception of the legacy level in percent. */ -unsigned int memorystatus_level = 0; - -SYSCTL_UINT(_kern, OID_AUTO, memorystatus_kev_failure_count, CTLFLAG_RD, &memorystatus_kev_failure_count, 0, ""); -SYSCTL_INT(_kern, OID_AUTO, memorystatus_level, CTLFLAG_RD, &memorystatus_level, 0, ""); - -unsigned int memorystatus_jetsam_policy = kPolicyDefault; - -unsigned int memorystatus_jetsam_policy_offset_pages_more_free = 0; -#if DEVELOPMENT || DEBUG -unsigned int memorystatus_jetsam_policy_offset_pages_diagnostic = 0; +static unsigned int memorystatus_available_pages = (unsigned int)-1; +static unsigned int memorystatus_available_pages_pressure = 0; +static unsigned int memorystatus_available_pages_critical = 0; +static unsigned int memorystatus_available_pages_critical_base = 0; +static unsigned int memorystatus_last_foreground_pressure_pages = (unsigned int)-1; +#if !LATENCY_JETSAM +static unsigned int memorystatus_available_pages_critical_idle_offset = 0; #endif -static memorystatus_jetsam_snapshot_t memorystatus_jetsam_snapshot; -#define memorystatus_jetsam_snapshot_list memorystatus_jetsam_snapshot.entries - -static int memorystatus_jetsam_snapshot_list_count = 0; - -int memorystatus_jetsam_wakeup = 0; -unsigned int memorystatus_jetsam_running = 1; +#if DEVELOPMENT || DEBUG +static unsigned int memorystatus_jetsam_panic_debug = 0; -static uint32_t memorystatus_task_page_count(task_t task); +static unsigned int memorystatus_jetsam_policy = kPolicyDefault; +static unsigned int memorystatus_jetsam_policy_offset_pages_diagnostic = 0; +#endif -static void memorystatus_move_node_to_exit_list(memorystatus_node *node); +static boolean_t kill_under_pressure = FALSE; -static void memorystatus_update_levels_locked(void); +static memorystatus_jetsam_snapshot_t *memorystatus_jetsam_snapshot; +#define memorystatus_jetsam_snapshot_list memorystatus_jetsam_snapshot->entries -static void memorystatus_jetsam_thread_block(void); -static void memorystatus_jetsam_thread(void *param __unused, wait_result_t wr __unused); +static unsigned int memorystatus_jetsam_snapshot_count = 0; +static unsigned int memorystatus_jetsam_snapshot_max = 0; +static void memorystatus_clear_errors(void); +static void memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint); static int memorystatus_send_note(int event_code, void *data, size_t data_length); +static uint32_t memorystatus_build_state(proc_t p); +static void memorystatus_update_levels_locked(boolean_t critical_only); +static boolean_t memorystatus_issue_pressure_kevent(boolean_t pressured); + +static boolean_t memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause); +static boolean_t memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, uint32_t *errors); +#if LEGACY_HIWATER +static boolean_t memorystatus_kill_hiwat_proc(uint32_t *errors); +#endif + +static boolean_t memorystatus_kill_process_async(pid_t victim_pid, uint32_t cause); +static boolean_t memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause); -static uint32_t memorystatus_build_flags_from_state(uint32_t state); +#endif /* CONFIG_JETSAM */ /* VM pressure */ #if VM_PRESSURE_EVENTS -typedef enum vm_pressure_level { - kVMPressureNormal = 0, - kVMPressureWarning = 1, - kVMPressureUrgent = 2, - kVMPressureCritical = 3, -} vm_pressure_level_t; - -static vm_pressure_level_t memorystatus_vm_pressure_level = kVMPressureNormal; +#include "vm_pressure.h" -unsigned int memorystatus_available_pages_pressure = 0; +extern boolean_t memorystatus_warn_process(pid_t pid); -static inline boolean_t memorystatus_get_pressure_locked(void); -static void memorystatus_check_pressure_reset(void); +vm_pressure_level_t memorystatus_vm_pressure_level = kVMPressureNormal; #endif /* VM_PRESSURE_EVENTS */ -#endif /* CONFIG_JETSAM */ - /* Freeze */ #if CONFIG_FREEZE -static unsigned int memorystatus_suspended_resident_count = 0; -static unsigned int memorystatus_suspended_count = 0; - boolean_t memorystatus_freeze_enabled = FALSE; int memorystatus_freeze_wakeup = 0; @@ -216,22 +271,78 @@ static throttle_interval_t throttle_intervals[] = { static uint64_t memorystatus_freeze_throttle_count = 0; -#endif /* CONFIG_FREEZE */ +static unsigned int memorystatus_suspended_count = 0; +static unsigned int memorystatus_suspended_footprint_total = 0; -#if CONFIG_JETSAM +#endif /* CONFIG_FREEZE */ /* Debug */ #if DEVELOPMENT || DEBUG -SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages, CTLFLAG_RD, &memorystatus_available_pages, 0, ""); -SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical, CTLFLAG_RW, &memorystatus_available_pages_critical, 0, ""); -SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_highwater, CTLFLAG_RW, &memorystatus_available_pages_highwater, 0, ""); -#if VM_PRESSURE_EVENTS -SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_pressure, CTLFLAG_RW, &memorystatus_available_pages_pressure, 0, ""); -#endif /* VM_PRESSURE_EVENTS */ +#if CONFIG_JETSAM + +/* Debug aid to aid determination of limit */ + +static int +sysctl_memorystatus_highwater_enable SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg2) + proc_t p; + unsigned int b = 0; + int error, enable = 0; + int32_t memlimit; + + error = SYSCTL_OUT(req, arg1, sizeof(int)); + if (error || !req->newptr) { + return (error); + } + + error = SYSCTL_IN(req, &enable, sizeof(int)); + if (error || !req->newptr) { + return (error); + } + + if (!(enable == 0 || enable == 1)) { + return EINVAL; + } + + proc_list_lock(); + + p = memorystatus_get_first_proc_locked(&b, TRUE); + while (p) { + if (enable) { + if ((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) { + memlimit = -1; + } else { + memlimit = p->p_memstat_memlimit; + } + } else { + memlimit = -1; + } + task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE); + + p = memorystatus_get_next_proc_locked(&b, p, TRUE); + } + + memorystatus_highwater_enabled = enable; + + proc_list_unlock(); + + return 0; +} + +SYSCTL_PROC(_kern, OID_AUTO, memorystatus_highwater_enabled, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_highwater_enabled, 0, sysctl_memorystatus_highwater_enable, "I", ""); + +SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_available_pages, 0, ""); +SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_available_pages_critical, 0, ""); +SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical_base, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_critical_base, 0, ""); +#if !LATENCY_JETSAM +SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical_idle_offset, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_critical_idle_offset, 0, ""); +#endif /* Diagnostic code */ + enum { kJetsamDiagnosticModeNone = 0, kJetsamDiagnosticModeAll = 1, @@ -263,7 +374,7 @@ sysctl_jetsam_diagnostic_mode SYSCTL_HANDLER_ARGS return EINVAL; } - lck_mtx_lock(memorystatus_list_mlock); + proc_list_lock(); if ((unsigned int) val != jetsam_diagnostic_mode) { jetsam_diagnostic_mode = val; @@ -285,11 +396,11 @@ sysctl_jetsam_diagnostic_mode SYSCTL_HANDLER_ARGS break; } - memorystatus_update_levels_locked(); + memorystatus_update_levels_locked(FALSE); changed = TRUE; } - lck_mtx_unlock(memorystatus_list_mlock); + proc_list_unlock(); if (changed) { printf("%s\n", diagnosticStrings[val]); @@ -298,15 +409,14 @@ sysctl_jetsam_diagnostic_mode SYSCTL_HANDLER_ARGS return (0); } -SYSCTL_PROC(_debug, OID_AUTO, jetsam_diagnostic_mode, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, +SYSCTL_PROC(_debug, OID_AUTO, jetsam_diagnostic_mode, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED|CTLFLAG_ANYBODY, &jetsam_diagnostic_mode, 0, sysctl_jetsam_diagnostic_mode, "I", "Jetsam Diagnostic Mode"); -SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jetsam_policy_offset_pages_more_free, CTLFLAG_RW, &memorystatus_jetsam_policy_offset_pages_more_free, 0, ""); -SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jetsam_policy_offset_pages_diagnostic, CTLFLAG_RW, &memorystatus_jetsam_policy_offset_pages_diagnostic, 0, ""); +SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jetsam_policy_offset_pages_diagnostic, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_jetsam_policy_offset_pages_diagnostic, 0, ""); #if VM_PRESSURE_EVENTS -#include "vm_pressure.h" +SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_pressure, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_pressure, 0, ""); static int sysctl_memorystatus_vm_pressure_level SYSCTL_HANDLER_ARGS @@ -335,11 +445,7 @@ sysctl_memorystatus_vm_pressure_send SYSCTL_HANDLER_ARGS if (error || !req->newptr) return (error); - if (vm_dispatch_pressure_note_to_pid(pid)) { - return 0; - } - - return EINVAL; + return vm_dispatch_pressure_note_to_pid(pid, FALSE); } SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_send, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED, @@ -349,23 +455,28 @@ SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_send, CTLTYPE_INT|CTLFLAG_ #endif /* CONFIG_JETSAM */ +#endif /* DEVELOPMENT || DEBUG */ + #if CONFIG_FREEZE -SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW, &memorystatus_freeze_threshold, 0, ""); +SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, ""); -SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_min, CTLFLAG_RW, &memorystatus_freeze_pages_min, 0, ""); -SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_max, CTLFLAG_RW, &memorystatus_freeze_pages_max, 0, ""); +SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_min, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_pages_min, 0, ""); +SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_max, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_pages_max, 0, ""); -SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD, &memorystatus_freeze_count, ""); -SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD, &memorystatus_freeze_pageouts, ""); -SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_throttle_count, CTLFLAG_RD, &memorystatus_freeze_throttle_count, ""); -SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_min_processes, CTLFLAG_RW, &memorystatus_freeze_suspended_threshold, 0, ""); +SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_freeze_count, ""); +SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, ""); +SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_throttle_count, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_freeze_throttle_count, ""); +SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_min_processes, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_suspended_threshold, 0, ""); boolean_t memorystatus_freeze_throttle_enabled = TRUE; -SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW, &memorystatus_freeze_throttle_enabled, 0, ""); +SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, ""); /* - * Manual trigger of freeze and thaw for dev / debug kernels only. + * Enabled via: Enable the sysctl_memorystatus_freeze/thaw sysctls on Release KC + * + * TODO: Manual trigger of freeze and thaw for dev / debug kernels only. + * Disable/restrict the sysctl_memorystatus_freeze/thaw sysctls on Release KC */ static int sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS @@ -383,12 +494,20 @@ sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS if (p != NULL) { uint32_t purgeable, wired, clean, dirty; boolean_t shared; - uint32_t max_pages = MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max); - task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE); + uint32_t max_pages = 0; + + if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + max_pages = MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max); + } else { + max_pages = UINT32_MAX - 1; + } + error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE); proc_rele(p); - return 0; - } + if (error) + error = EIO; + return error; + } return EINVAL; } @@ -409,9 +528,12 @@ sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS p = proc_find(pid); if (p != NULL) { - task_thaw(p->task); + error = task_thaw(p->task); proc_rele(p); - return 0; + + if (error) + error = EIO; + return error; } return EINVAL; @@ -422,56 +544,141 @@ SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_L #endif /* CONFIG_FREEZE */ -#endif /* DEVELOPMENT || DEBUG */ +extern kern_return_t kernel_thread_start_priority(thread_continue_t continuation, + void *parameter, + integer_t priority, + thread_t *new_thread); + +static proc_t memorystatus_get_first_proc_locked(unsigned int *bucket_index, boolean_t search) { + memstat_bucket_t *current_bucket; + proc_t next_p; + + if ((*bucket_index) >= MEMSTAT_BUCKET_COUNT) { + return NULL; + } + + current_bucket = &memstat_bucket[*bucket_index]; + next_p = TAILQ_FIRST(¤t_bucket->list); + if (!next_p && search) { + while (!next_p && (++(*bucket_index) < MEMSTAT_BUCKET_COUNT)) { + current_bucket = &memstat_bucket[*bucket_index]; + next_p = TAILQ_FIRST(¤t_bucket->list); + } + } + + return next_p; +} + +static proc_t memorystatus_get_next_proc_locked(unsigned int *bucket_index, proc_t p, boolean_t search) { + memstat_bucket_t *current_bucket; + proc_t next_p; + + if (!p || ((*bucket_index) >= MEMSTAT_BUCKET_COUNT)) { + return NULL; + } + + next_p = TAILQ_NEXT(p, p_memstat_list); + while (!next_p && search && (++(*bucket_index) < MEMSTAT_BUCKET_COUNT)) { + current_bucket = &memstat_bucket[*bucket_index]; + next_p = TAILQ_FIRST(¤t_bucket->list); + } + + return next_p; +} __private_extern__ void memorystatus_init(void) { thread_t thread = THREAD_NULL; kern_return_t result; + int i; + + nanoseconds_to_absolutetime((uint64_t)DEFERRED_IDLE_EXIT_TIME_SECS * NSEC_PER_SEC, &memorystatus_idle_delay_time); + + /* Init buckets */ + for (i = 0; i < MEMSTAT_BUCKET_COUNT; i++) { + TAILQ_INIT(&memstat_bucket[i].list); + memstat_bucket[i].count = 0; + } - memorystatus_lck_attr = lck_attr_alloc_init(); - memorystatus_lck_grp_attr = lck_grp_attr_alloc_init(); - memorystatus_lck_grp = lck_grp_alloc_init("memorystatus", memorystatus_lck_grp_attr); - memorystatus_list_mlock = lck_mtx_alloc_init(memorystatus_lck_grp, memorystatus_lck_attr); - TAILQ_INIT(&memorystatus_list); + memorystatus_idle_demotion_call = thread_call_allocate((thread_call_func_t)memorystatus_perform_idle_demotion, NULL); -#if CONFIG_JETSAM - exit_list_mlock = lck_mtx_alloc_init(memorystatus_lck_grp, memorystatus_lck_attr); - TAILQ_INIT(&exit_list); + /* Apply overrides */ + PE_get_default("kern.jetsam_delta", &delta_percentage, sizeof(delta_percentage)); + assert(delta_percentage < 100); + PE_get_default("kern.jetsam_critical_threshold", &critical_threshold_percentage, sizeof(critical_threshold_percentage)); + assert(critical_threshold_percentage < 100); + PE_get_default("kern.jetsam_idle_offset", &idle_offset_percentage, sizeof(idle_offset_percentage)); + assert(idle_offset_percentage < 100); + PE_get_default("kern.jetsam_pressure_threshold", &pressure_threshold_percentage, sizeof(pressure_threshold_percentage)); + assert(pressure_threshold_percentage < 100); + PE_get_default("kern.jetsam_freeze_threshold", &freeze_threshold_percentage, sizeof(freeze_threshold_percentage)); + assert(freeze_threshold_percentage < 100); - memorystatus_delta = DELTA_PERCENT * atop_64(max_mem) / 100; +#if CONFIG_JETSAM + memorystatus_delta = delta_percentage * atop_64(max_mem) / 100; +#if !LATENCY_JETSAM + memorystatus_available_pages_critical_idle_offset = idle_offset_percentage * atop_64(max_mem) / 100; #endif + memorystatus_available_pages_critical_base = (critical_threshold_percentage / delta_percentage) * memorystatus_delta; + + memorystatus_jetsam_snapshot_max = maxproc; + memorystatus_jetsam_snapshot = + (memorystatus_jetsam_snapshot_t*)kalloc(sizeof(memorystatus_jetsam_snapshot_t) + + sizeof(memorystatus_jetsam_snapshot_entry_t) * memorystatus_jetsam_snapshot_max); + if (!memorystatus_jetsam_snapshot) { + panic("Could not allocate memorystatus_jetsam_snapshot"); + } + + /* No contention at this point */ + memorystatus_update_levels_locked(FALSE); +#endif + #if CONFIG_FREEZE - memorystatus_freeze_threshold = (FREEZE_PERCENT / DELTA_PERCENT) * memorystatus_delta; + memorystatus_freeze_threshold = (freeze_threshold_percentage / delta_percentage) * memorystatus_delta; #endif - - nanoseconds_to_absolutetime((uint64_t)IDLE_EXIT_TIME_SECS * NSEC_PER_SEC, &memorystatus_idle_delay_time); - - result = kernel_thread_start(memorystatus_thread, NULL, &thread); + + result = kernel_thread_start_priority(memorystatus_thread, NULL, 95 /* MAXPRI_KERNEL */, &thread); if (result == KERN_SUCCESS) { thread_deallocate(thread); } else { panic("Could not create memorystatus_thread"); } +} -#if CONFIG_JETSAM - memorystatus_jetsam_policy_offset_pages_more_free = (POLICY_MORE_FREE_OFFSET_PERCENT / DELTA_PERCENT) * memorystatus_delta; -#if DEVELOPMENT || DEBUG - memorystatus_jetsam_policy_offset_pages_diagnostic = (POLICY_DIAGNOSTIC_OFFSET_PERCENT / DELTA_PERCENT) * memorystatus_delta; -#endif +/* Centralised for the purposes of allowing panic-on-jetsam */ +extern void +vm_wake_compactor_swapper(void); - /* No contention at this point */ - memorystatus_update_levels_locked(); - - result = kernel_thread_start(memorystatus_jetsam_thread, NULL, &thread); - if (result == KERN_SUCCESS) { - thread_deallocate(thread); - } else { - panic("Could not create memorystatus_jetsam_thread"); +static boolean_t +memorystatus_do_kill(proc_t p, uint32_t cause) { + + int retval = 0; + +#if CONFIG_JETSAM && (DEVELOPMENT || DEBUG) + if (memorystatus_jetsam_panic_debug & (1 << cause)) { + panic("memorystatus_do_kill(): jetsam debug panic (cause: %d)", cause); } +#else +#pragma unused(cause) #endif + int jetsam_flags = P_LTERM_JETSAM; + switch (cause) { + case kMemorystatusKilledHiwat: jetsam_flags |= P_JETSAM_HIWAT; break; + case kMemorystatusKilledVnodes: jetsam_flags |= P_JETSAM_VNODE; break; + case kMemorystatusKilledVMPageShortage: jetsam_flags |= P_JETSAM_VMPAGESHORTAGE; break; + case kMemorystatusKilledVMThrashing: jetsam_flags |= P_JETSAM_VMTHRASHING; break; + case kMemorystatusKilledPerProcessLimit: jetsam_flags |= P_JETSAM_PID; break; + case kMemorystatusKilledIdleExit: jetsam_flags |= P_JETSAM_IDLEEXIT; break; + } + retval = exit1_internal(p, W_EXITCODE(0, SIGKILL), (int *)NULL, FALSE, FALSE, jetsam_flags); + + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + vm_wake_compactor_swapper(); + } + + return (retval == 0); } /* @@ -479,557 +686,998 @@ memorystatus_init(void) */ static void -memorystatus_add_node(memorystatus_node *new_node) -{ - memorystatus_node *node; +memorystatus_check_levels_locked(void) { +#if CONFIG_JETSAM + /* Update levels */ + memorystatus_update_levels_locked(TRUE); +#endif +} - /* Make sure we're called with the list lock held */ - lck_mtx_assert(memorystatus_list_mlock, LCK_MTX_ASSERT_OWNED); +static void +memorystatus_perform_idle_demotion(__unused void *spare1, __unused void *spare2) +{ + proc_t p; + uint64_t current_time; + memstat_bucket_t *demotion_bucket; + + MEMORYSTATUS_DEBUG(1, "memorystatus_perform_idle_demotion()\n"); + + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_IDLE_DEMOTE) | DBG_FUNC_START, 0, 0, 0, 0, 0); + + current_time = mach_absolute_time(); + + proc_list_lock(); - TAILQ_FOREACH(node, &memorystatus_list, link) { - if (node->priority <= new_node->priority) { - break; + demotion_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE_DEFERRED]; + p = TAILQ_FIRST(&demotion_bucket->list); + + while (p) { + MEMORYSTATUS_DEBUG(1, "memorystatus_perform_idle_demotion() found %d\n", p->p_pid); + + assert(p->p_memstat_idledeadline); + assert(p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS); + assert((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_IS_DIRTY)) == P_DIRTY_IDLE_EXIT_ENABLED); + + if (current_time >= p->p_memstat_idledeadline) { +#if DEBUG || DEVELOPMENT + if (!(p->p_memstat_dirty & P_DIRTY_MARKED)) { + printf("memorystatus_perform_idle_demotion: moving process %d to idle band, but never dirtied (0x%x)!\n", p->p_pid, p->p_memstat_dirty); + } +#endif + memorystatus_invalidate_idle_demotion_locked(p, TRUE); + memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE); + + // The prior process has moved out of the demotion bucket, so grab the new head and continue + p = TAILQ_FIRST(&demotion_bucket->list); + continue; } + + // No further candidates + break; } + + memorystatus_reschedule_idle_demotion_locked(); + + proc_list_unlock(); - if (node) { - TAILQ_INSERT_BEFORE(node, new_node, link); - } else { - TAILQ_INSERT_TAIL(&memorystatus_list, new_node, link); - } - - next_memorystatus_node = TAILQ_FIRST(&memorystatus_list); - - memorystatus_list_count++; + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_IDLE_DEMOTE) | DBG_FUNC_END, 0, 0, 0, 0, 0); } static void -memorystatus_remove_node(memorystatus_node *node) -{ - /* Make sure we're called with the list lock held */ - lck_mtx_assert(memorystatus_list_mlock, LCK_MTX_ASSERT_OWNED); - - TAILQ_REMOVE(&memorystatus_list, node, link); - next_memorystatus_node = TAILQ_FIRST(&memorystatus_list); +memorystatus_schedule_idle_demotion_locked(proc_t p, boolean_t set_state) +{ + MEMORYSTATUS_DEBUG(1, "memorystatus_schedule_idle_demotion_locked: scheduling demotion to idle band for process %d (dirty:0x%x, set_state %d, demotions %d).\n", + p->p_pid, p->p_memstat_dirty, set_state, memorystatus_scheduled_idle_demotions); -#if CONFIG_FREEZE - if (node->state & (kProcessFrozen)) { - memorystatus_frozen_count--; - } + assert((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)) == (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)); - if (node->state & kProcessSuspended) { - memorystatus_suspended_resident_count -= node->resident_pages; - memorystatus_suspended_count--; + if (set_state) { + assert(p->p_memstat_idledeadline == 0); + p->p_memstat_idledeadline = mach_absolute_time() + memorystatus_idle_delay_time; } -#endif - - memorystatus_list_count--; + + assert(p->p_memstat_idledeadline); + + memorystatus_scheduled_idle_demotions++; } -/* Returns with the lock taken if found */ -static memorystatus_node * -memorystatus_get_node(pid_t pid) +static void +memorystatus_invalidate_idle_demotion_locked(proc_t p, boolean_t clear_state) { - memorystatus_node *node; - - lck_mtx_lock(memorystatus_list_mlock); - - TAILQ_FOREACH(node, &memorystatus_list, link) { - if (node->pid == pid) { - break; - } - } - - if (!node) { - lck_mtx_unlock(memorystatus_list_mlock); + MEMORYSTATUS_DEBUG(1, "memorystatus_invalidate_idle_demotion(): invalidating demotion to idle band for process %d (clear_state %d, demotions %d).\n", + p->p_pid, clear_state, memorystatus_scheduled_idle_demotions); + + assert(p->p_memstat_idledeadline); + + if (clear_state) { + p->p_memstat_idledeadline = 0; + p->p_memstat_dirty &= ~P_DIRTY_DEFER_IN_PROGRESS; } - - return node; + + memorystatus_scheduled_idle_demotions--; + assert(memorystatus_scheduled_idle_demotions >= 0); } static void -memorystatus_release_node(memorystatus_node *node) -{ -#pragma unused(node) - lck_mtx_unlock(memorystatus_list_mlock); +memorystatus_reschedule_idle_demotion_locked(void) { + if (0 == memorystatus_scheduled_idle_demotions) { + if (memstat_idle_demotion_deadline) { + /* Transitioned 1->0, so cancel next call */ + thread_call_cancel(memorystatus_idle_demotion_call); + memstat_idle_demotion_deadline = 0; + } + } else { + memstat_bucket_t *demotion_bucket; + proc_t p; + demotion_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE_DEFERRED]; + p = TAILQ_FIRST(&demotion_bucket->list); + assert(p && p->p_memstat_idledeadline); + + if (memstat_idle_demotion_deadline != p->p_memstat_idledeadline){ + thread_call_enter_delayed(memorystatus_idle_demotion_call, p->p_memstat_idledeadline); + memstat_idle_demotion_deadline = p->p_memstat_idledeadline; + } + } } /* * List manipulation */ -kern_return_t -memorystatus_list_add(pid_t pid, int priority, int high_water_mark) +int +memorystatus_add(proc_t p, boolean_t locked) { + memstat_bucket_t *bucket; + + MEMORYSTATUS_DEBUG(1, "memorystatus_list_add(): adding process %d with priority %d.\n", p->pid, priority); + + if (!locked) { + proc_list_lock(); + } + + /* Processes marked internal do not have priority tracked */ + if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { + goto exit; + } + + bucket = &memstat_bucket[p->p_memstat_effectivepriority]; + + TAILQ_INSERT_TAIL(&bucket->list, p, p_memstat_list); + bucket->count++; -#if !CONFIG_JETSAM -#pragma unused(high_water_mark) -#endif + memorystatus_list_count++; - memorystatus_node *new_node; + memorystatus_check_levels_locked(); + +exit: + if (!locked) { + proc_list_unlock(); + } + + return 0; +} - new_node = (memorystatus_node*)kalloc(sizeof(memorystatus_node)); - if (!new_node) { - assert(FALSE); +static void +memorystatus_update_priority_locked(proc_t p, int priority) +{ + memstat_bucket_t *old_bucket, *new_bucket; + + assert(priority < MEMSTAT_BUCKET_COUNT); + + /* Ensure that exit isn't underway, leaving the proc retained but removed from its bucket */ + if ((p->p_listflag & P_LIST_EXITED) != 0) { + return; } - memset(new_node, 0, sizeof(memorystatus_node)); - - MEMORYSTATUS_DEBUG(1, "memorystatus_list_add: adding process %d with priority %d, high water mark %d.\n", pid, priority, high_water_mark); - - new_node->pid = pid; - new_node->priority = priority; -#if CONFIG_JETSAM - new_node->hiwat_pages = high_water_mark; -#endif + + MEMORYSTATUS_DEBUG(1, "memorystatus_update_priority_locked(): setting process %d to priority %d\n", p->p_pid, priority); - lck_mtx_lock(memorystatus_list_mlock); - - memorystatus_add_node(new_node); - - lck_mtx_unlock(memorystatus_list_mlock); + old_bucket = &memstat_bucket[p->p_memstat_effectivepriority]; + TAILQ_REMOVE(&old_bucket->list, p, p_memstat_list); + old_bucket->count--; - return KERN_SUCCESS; + new_bucket = &memstat_bucket[priority]; + TAILQ_INSERT_TAIL(&new_bucket->list, p, p_memstat_list); + new_bucket->count++; + +#if CONFIG_JETSAM + if (memorystatus_highwater_enabled && (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND)) { + if (((priority >= JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority < JETSAM_PRIORITY_FOREGROUND)) || + ((priority < JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND))) { + int32_t memlimit = (priority >= JETSAM_PRIORITY_FOREGROUND) ? -1 : p->p_memstat_memlimit; + task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE); + } + } +#endif + + p->p_memstat_effectivepriority = priority; + + memorystatus_check_levels_locked(); } -kern_return_t -memorystatus_list_change(boolean_t effective, pid_t pid, int priority, int state_flags, int high_water_mark) +int +memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effective, boolean_t update_memlimit, int32_t memlimit, boolean_t memlimit_background) { - + int ret; + #if !CONFIG_JETSAM -#pragma unused(high_water_mark) +#pragma unused(update_memlimit, memlimit, memlimit_background) #endif - - kern_return_t ret; - memorystatus_node *node, *search; - - MEMORYSTATUS_DEBUG(1, "memorystatus_list_change: changing process %d to priority %d with flags %d\n", pid, priority, state_flags); - - lck_mtx_lock(memorystatus_list_mlock); - TAILQ_FOREACH(node, &memorystatus_list, link) { - if (node->pid == pid) { - break; - } - } + MEMORYSTATUS_DEBUG(1, "memorystatus_update: changing process %d: priority %d, user_data 0x%llx\n", p->p_pid, priority, user_data); - if (!node) { - ret = KERN_FAILURE; - goto out; + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_UPDATE) | DBG_FUNC_START, p->p_pid, priority, user_data, effective, 0); + + if (priority == -1) { + /* Use as shorthand for default priority */ + priority = JETSAM_PRIORITY_DEFAULT; + } else if (priority == JETSAM_PRIORITY_IDLE_DEFERRED) { + /* JETSAM_PRIORITY_IDLE_DEFERRED is reserved for internal use; if requested, adjust to JETSAM_PRIORITY_IDLE. */ + priority = JETSAM_PRIORITY_IDLE; + } else if ((priority < 0) || (priority >= MEMSTAT_BUCKET_COUNT)) { + /* Sanity check */ + ret = EINVAL; + goto out; } + + proc_list_lock(); + + assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL)); - if (effective && (node->state & kProcessPriorityUpdated)) { - MEMORYSTATUS_DEBUG(1, "memorystatus_list_change: effective change specified for pid %d, but change already occurred.\n", pid); - ret = KERN_FAILURE; + if (effective && (p->p_memstat_state & P_MEMSTAT_PRIORITYUPDATED)) { + ret = EALREADY; + proc_list_unlock(); + MEMORYSTATUS_DEBUG(1, "memorystatus_update: effective change specified for pid %d, but change already occurred.\n", pid); goto out; } - node->state |= kProcessPriorityUpdated; - - if (state_flags != -1) { - node->state &= ~(kProcessActive|kProcessForeground); - if (state_flags & kMemorystatusFlagsFrontmost) { - node->state |= kProcessForeground; - } - if (state_flags & kMemorystatusFlagsActive) { - node->state |= kProcessActive; - } - } - -#if CONFIG_JETSAM - if (high_water_mark != -1) { - node->hiwat_pages = high_water_mark; - } -#endif - - if (node->priority == priority) { - /* Priority unchanged */ - MEMORYSTATUS_DEBUG(1, "memorystatus_list_change: same priority set for pid %d\n", pid); - ret = KERN_SUCCESS; - goto out; - } - - if (node->priority < priority) { - /* Higher priority value (ie less important) - search backwards */ - search = TAILQ_PREV(node, memorystatus_list_head, link); - TAILQ_REMOVE(&memorystatus_list, node, link); - - node->priority = priority; - while (search && (search->priority <= node->priority)) { - search = TAILQ_PREV(search, memorystatus_list_head, link); - } - if (search) { - TAILQ_INSERT_AFTER(&memorystatus_list, search, node, link); - } else { - TAILQ_INSERT_HEAD(&memorystatus_list, node, link); - } - } else { - /* Lower priority value (ie more important) - search forwards */ - search = TAILQ_NEXT(node, link); - TAILQ_REMOVE(&memorystatus_list, node, link); - - node->priority = priority; - while (search && (search->priority >= node->priority)) { - search = TAILQ_NEXT(search, link); - } - if (search) { - TAILQ_INSERT_BEFORE(search, node, link); + p->p_memstat_state |= P_MEMSTAT_PRIORITYUPDATED; + p->p_memstat_userdata = user_data; + p->p_memstat_requestedpriority = priority; + +#if CONFIG_JETSAM + if (update_memlimit) { + p->p_memstat_memlimit = memlimit; + if (memlimit_background) { + /* Will be set as priority is updated */ + p->p_memstat_state |= P_MEMSTAT_MEMLIMIT_BACKGROUND; } else { - TAILQ_INSERT_TAIL(&memorystatus_list, node, link); + /* Otherwise, apply now */ + if (memorystatus_highwater_enabled) { + task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE); + } } } +#endif - next_memorystatus_node = TAILQ_FIRST(&memorystatus_list); - ret = KERN_SUCCESS; + memorystatus_update_priority_locked(p, priority); + + proc_list_unlock(); + ret = 0; out: - lck_mtx_unlock(memorystatus_list_mlock); + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_UPDATE) | DBG_FUNC_END, ret, 0, 0, 0, 0); + return ret; } -kern_return_t memorystatus_list_remove(pid_t pid) +int +memorystatus_remove(proc_t p, boolean_t locked) { - kern_return_t ret; - memorystatus_node *node = NULL; + int ret; + memstat_bucket_t *bucket; MEMORYSTATUS_DEBUG(1, "memorystatus_list_remove: removing process %d\n", pid); -#if CONFIG_JETSAM - /* Did we mark this as a exited process? */ - lck_mtx_lock(exit_list_mlock); + if (!locked) { + proc_list_lock(); + } - TAILQ_FOREACH(node, &exit_list, link) { - if (node->pid == pid) { - /* We did, so remove it from the list. The stats were updated when the queues were shifted. */ - TAILQ_REMOVE(&exit_list, node, link); - break; - } - } + assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL)); - lck_mtx_unlock(exit_list_mlock); -#endif + bucket = &memstat_bucket[p->p_memstat_effectivepriority]; + TAILQ_REMOVE(&bucket->list, p, p_memstat_list); + bucket->count--; + + memorystatus_list_count--; - /* If not, search the main list */ - if (!node) { - lck_mtx_lock(memorystatus_list_mlock); + /* If awaiting demotion to the idle band, clean up */ + if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { + memorystatus_invalidate_idle_demotion_locked(p, TRUE); + memorystatus_reschedule_idle_demotion_locked(); + } - TAILQ_FOREACH(node, &memorystatus_list, link) { - if (node->pid == pid) { - /* Remove from the list, and update accounting accordingly */ - memorystatus_remove_node(node); - break; - } - } + memorystatus_check_levels_locked(); + +#if CONFIG_FREEZE + if (p->p_memstat_state & (P_MEMSTAT_FROZEN)) { + memorystatus_frozen_count--; + } - lck_mtx_unlock(memorystatus_list_mlock); + if (p->p_memstat_state & P_MEMSTAT_SUSPENDED) { + memorystatus_suspended_footprint_total -= p->p_memstat_suspendedfootprint; + memorystatus_suspended_count--; } +#endif + + if (!locked) { + proc_list_unlock(); + } - if (node) { - kfree(node, sizeof(memorystatus_node)); - ret = KERN_SUCCESS; + if (p) { + ret = 0; } else { - ret = KERN_FAILURE; + ret = ESRCH; } return ret; } -kern_return_t -memorystatus_on_track_dirty(int pid, boolean_t track) -{ - kern_return_t ret = KERN_FAILURE; - memorystatus_node *node; - - node = memorystatus_get_node((pid_t)pid); - if (!node) { - return KERN_FAILURE; +static boolean_t +memorystatus_validate_track_flags(struct proc *target_p, uint32_t pcontrol) { + /* See that the process isn't marked for termination */ + if (target_p->p_memstat_dirty & P_DIRTY_TERMINATED) { + return FALSE; } - if (track & !(node->state & kProcessSupportsIdleExit)) { - node->state |= kProcessSupportsIdleExit; - node->clean_time = mach_absolute_time() + memorystatus_idle_delay_time; - ret = KERN_SUCCESS; - } else if (!track & (node->state & kProcessSupportsIdleExit)) { - node->state &= ~kProcessSupportsIdleExit; - node->clean_time = 0; - ret = KERN_SUCCESS; + /* Idle exit requires that process be tracked */ + if ((pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) && + !(pcontrol & PROC_DIRTY_TRACK)) { + return FALSE; + } + + /* Deferral is only relevant if idle exit is specified */ + if ((pcontrol & PROC_DIRTY_DEFER) && + !(pcontrol & PROC_DIRTY_ALLOWS_IDLE_EXIT)) { + return FALSE; } - memorystatus_release_node(node); - - return ret; + return TRUE; } -kern_return_t -memorystatus_on_dirty(int pid, boolean_t dirty) -{ - kern_return_t ret = KERN_FAILURE; - memorystatus_node *node; +static void +memorystatus_update_idle_priority_locked(proc_t p) { + int32_t priority; + + MEMORYSTATUS_DEBUG(1, "memorystatus_update_idle_priority_locked(): pid %d dirty 0x%X\n", p->p_pid, p->p_memstat_dirty); + + if ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_IS_DIRTY)) == P_DIRTY_IDLE_EXIT_ENABLED) { + priority = (p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS) ? JETSAM_PRIORITY_IDLE_DEFERRED : JETSAM_PRIORITY_IDLE; + } else { + priority = p->p_memstat_requestedpriority; + } + + memorystatus_update_priority_locked(p, priority); +} + +/* + * Processes can opt to have their state tracked by the kernel, indicating when they are busy (dirty) or idle + * (clean). They may also indicate that they support termination when idle, with the result that they are promoted + * to their desired, higher, jetsam priority when dirty (and are therefore killed later), and demoted to the low + * priority idle band when clean (and killed earlier, protecting higher priority procesess). + * + * If the deferral flag is set, then newly tracked processes will be protected for an initial period (as determined by + * memorystatus_idle_delay_time); if they go clean during this time, then they will be moved to a deferred-idle band + * with a slightly higher priority, guarding against immediate termination under memory pressure and being unable to + * make forward progress. Finally, when the guard expires, they will be moved to the standard, lowest-priority, idle + * band. The deferral can be cleared early by clearing the appropriate flag. + * + * The deferral timer is active only for the duration that the process is marked as guarded and clean; if the process + * is marked dirty, the timer will be cancelled. Upon being subsequently marked clean, the deferment will either be + * re-enabled or the guard state cleared, depending on whether the guard deadline has passed. + */ + +int +memorystatus_dirty_track(proc_t p, uint32_t pcontrol) { + unsigned int old_dirty; + boolean_t reschedule = FALSE; + int ret; + + proc_list_lock(); - node = memorystatus_get_node((pid_t)pid); - if (!node) { - return KERN_FAILURE; + if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { + ret = EPERM; + goto exit; } - if (dirty) { - if (!(node->state & kProcessDirty)) { - node->state |= kProcessDirty; - node->clean_time = 0; - memorystatus_dirty_count++; - ret = KERN_SUCCESS; - } + if (!memorystatus_validate_track_flags(p, pcontrol)) { + ret = EINVAL; + goto exit; + } + + old_dirty = p->p_memstat_dirty; + + /* These bits are cumulative, as per */ + if (pcontrol & PROC_DIRTY_TRACK) { + p->p_memstat_dirty |= P_DIRTY_TRACK; + } + + if (pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) { + p->p_memstat_dirty |= P_DIRTY_ALLOW_IDLE_EXIT; + } + + /* This can be set and cleared exactly once. */ + if ((pcontrol & PROC_DIRTY_DEFER) && !(old_dirty & P_DIRTY_DEFER)) { + p->p_memstat_dirty |= (P_DIRTY_DEFER|P_DIRTY_DEFER_IN_PROGRESS); } else { - if (node->state & kProcessDirty) { - node->state &= ~kProcessDirty; - node->clean_time = mach_absolute_time() + memorystatus_idle_delay_time; - memorystatus_dirty_count--; - ret = KERN_SUCCESS; + p->p_memstat_dirty &= ~P_DIRTY_DEFER_IN_PROGRESS; + } + + MEMORYSTATUS_DEBUG(1, "memorystatus_on_track_dirty(): set idle-exit %s / deferred %s / dirty %s for process %d\n", + ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) ? "Y" : "N", + p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS ? "Y" : "N", + p->p_memstat_dirty & P_DIRTY ? "Y" : "N", + p->p_pid); + + /* Kick off or invalidate the idle exit deferment if there's a state transition. */ + if (!(p->p_memstat_dirty & P_DIRTY_IS_DIRTY)) { + if (((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) && + (p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS) && !(old_dirty & P_DIRTY_DEFER_IN_PROGRESS)) { + memorystatus_schedule_idle_demotion_locked(p, TRUE); + reschedule = TRUE; + } else if (!(p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS) && (old_dirty & P_DIRTY_DEFER_IN_PROGRESS)) { + memorystatus_invalidate_idle_demotion_locked(p, TRUE); + reschedule = TRUE; } } + + memorystatus_update_idle_priority_locked(p); + + if (reschedule) { + memorystatus_reschedule_idle_demotion_locked(); + } + + ret = 0; - memorystatus_release_node(node); +exit: + proc_list_unlock(); return ret; } -void -memorystatus_on_suspend(int pid) -{ - memorystatus_node *node = memorystatus_get_node((pid_t)pid); +int +memorystatus_dirty_set(proc_t p, boolean_t self, uint32_t pcontrol) { + int ret; + boolean_t kill = false; + boolean_t reschedule = FALSE; + boolean_t was_dirty = FALSE; + boolean_t now_dirty = FALSE; - if (node) { -#if CONFIG_FREEZE - proc_t p; + MEMORYSTATUS_DEBUG(1, "memorystatus_dirty_set(): %d %d 0x%x 0x%x\n", self, p->p_pid, pcontrol, p->p_memstat_dirty); - p = proc_find(pid); - if (p != NULL) { - uint32_t pages = memorystatus_task_page_count(p->task); - proc_rele(p); - node->resident_pages = pages; - memorystatus_suspended_resident_count += pages; + proc_list_lock(); + + if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { + ret = EPERM; + goto exit; + } + + if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) + was_dirty = TRUE; + + if (!(p->p_memstat_dirty & P_DIRTY_TRACK)) { + /* Dirty tracking not enabled */ + ret = EINVAL; + } else if (pcontrol && (p->p_memstat_dirty & P_DIRTY_TERMINATED)) { + /* + * Process is set to be terminated and we're attempting to mark it dirty. + * Set for termination and marking as clean is OK - see . + */ + ret = EBUSY; + } else { + int flag = (self == TRUE) ? P_DIRTY : P_DIRTY_SHUTDOWN; + if (pcontrol && !(p->p_memstat_dirty & flag)) { + /* Mark the process as having been dirtied at some point */ + p->p_memstat_dirty |= (flag | P_DIRTY_MARKED); + memorystatus_dirty_count++; + ret = 0; + } else if ((pcontrol == 0) && (p->p_memstat_dirty & flag)) { + if ((flag == P_DIRTY_SHUTDOWN) && (!p->p_memstat_dirty & P_DIRTY)) { + /* Clearing the dirty shutdown flag, and the process is otherwise clean - kill */ + p->p_memstat_dirty |= P_DIRTY_TERMINATED; + kill = true; + } else if ((flag == P_DIRTY) && (p->p_memstat_dirty & P_DIRTY_TERMINATED)) { + /* Kill previously terminated processes if set clean */ + kill = true; + } + p->p_memstat_dirty &= ~flag; + memorystatus_dirty_count--; + ret = 0; + } else { + /* Already set */ + ret = EALREADY; } - memorystatus_suspended_count++; -#endif + } + + if (ret != 0) { + goto exit; + } + + if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) + now_dirty = TRUE; + + if ((was_dirty == TRUE && now_dirty == FALSE) || + (was_dirty == FALSE && now_dirty == TRUE)) { + + /* Manage idle exit deferral, if applied */ + if ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)) == + (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)) { + if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) { + memorystatus_invalidate_idle_demotion_locked(p, FALSE); + reschedule = TRUE; + } else { + /* We evaluate lazily, so reset the idle-deadline if it's expired by the time the process becomes clean. */ + if (mach_absolute_time() >= p->p_memstat_idledeadline) { + p->p_memstat_idledeadline = 0; + p->p_memstat_dirty &= ~P_DIRTY_DEFER_IN_PROGRESS; + } else { + memorystatus_schedule_idle_demotion_locked(p, FALSE); + reschedule = TRUE; + } + } + } + + memorystatus_update_idle_priority_locked(p); + + /* If the deferral state changed, reschedule the demotion timer */ + if (reschedule) { + memorystatus_reschedule_idle_demotion_locked(); + } + } + + if (kill) { + psignal(p, SIGKILL); + } + +exit: + proc_list_unlock(); + + return ret; +} - node->state |= kProcessSuspended; +int +memorystatus_dirty_get(proc_t p) { + int ret = 0; + + proc_list_lock(); + + if (p->p_memstat_dirty & P_DIRTY_TRACK) { + ret |= PROC_DIRTY_TRACKED; + if (p->p_memstat_dirty & P_DIRTY_ALLOW_IDLE_EXIT) { + ret |= PROC_DIRTY_ALLOWS_IDLE_EXIT; + } + if (p->p_memstat_dirty & P_DIRTY) { + ret |= PROC_DIRTY_IS_DIRTY; + } + } + + proc_list_unlock(); + + return ret; +} - memorystatus_release_node(node); +int +memorystatus_on_terminate(proc_t p) { + int sig; + + proc_list_lock(); + + p->p_memstat_dirty |= P_DIRTY_TERMINATED; + + if ((p->p_memstat_dirty & (P_DIRTY_TRACK|P_DIRTY_IS_DIRTY)) == P_DIRTY_TRACK) { + /* Clean; mark as terminated and issue SIGKILL */ + sig = SIGKILL; + } else { + /* Dirty, terminated, or state tracking is unsupported; issue SIGTERM to allow cleanup */ + sig = SIGTERM; } + + proc_list_unlock(); + + return sig; } void -memorystatus_on_resume(int pid) -{ - memorystatus_node *node = memorystatus_get_node((pid_t)pid); - - if (node) { +memorystatus_on_suspend(proc_t p) +{ #if CONFIG_FREEZE - boolean_t frozen = (node->state & kProcessFrozen); - if (node->state & (kProcessFrozen)) { - memorystatus_frozen_count--; - } - memorystatus_suspended_resident_count -= node->resident_pages; - memorystatus_suspended_count--; + uint32_t pages; + memorystatus_get_task_page_counts(p->task, &pages, NULL); +#endif + proc_list_lock(); +#if CONFIG_FREEZE + p->p_memstat_suspendedfootprint = pages; + memorystatus_suspended_footprint_total += pages; + memorystatus_suspended_count++; #endif + p->p_memstat_state |= P_MEMSTAT_SUSPENDED; + proc_list_unlock(); +} - node->state &= ~(kProcessSuspended | kProcessFrozen | kProcessIgnored); +void +memorystatus_on_resume(proc_t p) +{ +#if CONFIG_FREEZE + boolean_t frozen; + pid_t pid; +#endif - memorystatus_release_node(node); + proc_list_lock(); #if CONFIG_FREEZE - if (frozen) { - memorystatus_freeze_entry_t data = { pid, kMemorystatusFlagsThawed, 0 }; - memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data)); - } + frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN); + if (frozen) { + memorystatus_frozen_count--; + p->p_memstat_state |= P_MEMSTAT_PRIOR_THAW; + } + + memorystatus_suspended_footprint_total -= p->p_memstat_suspendedfootprint; + memorystatus_suspended_count--; + + pid = p->p_pid; #endif + + p->p_memstat_state &= ~(P_MEMSTAT_SUSPENDED | P_MEMSTAT_FROZEN); + + proc_list_unlock(); + +#if CONFIG_FREEZE + if (frozen) { + memorystatus_freeze_entry_t data = { pid, FALSE, 0 }; + memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data)); } +#endif } void -memorystatus_on_inactivity(int pid) +memorystatus_on_inactivity(proc_t p) { -#pragma unused(pid) +#pragma unused(p) #if CONFIG_FREEZE /* Wake the freeze thread */ thread_wakeup((event_t)&memorystatus_freeze_wakeup); #endif } -static void -memorystatus_thread(void *param __unused, wait_result_t wr __unused) +static uint32_t +memorystatus_build_state(proc_t p) { + uint32_t snapshot_state = 0; + + /* General */ + if (p->p_memstat_state & P_MEMSTAT_SUSPENDED) { + snapshot_state |= kMemorystatusSuspended; + } + if (p->p_memstat_state & P_MEMSTAT_FROZEN) { + snapshot_state |= kMemorystatusFrozen; + } + if (p->p_memstat_state & P_MEMSTAT_PRIOR_THAW) { + snapshot_state |= kMemorystatusWasThawed; + } + + /* Tracking */ + if (p->p_memstat_dirty & P_DIRTY_TRACK) { + snapshot_state |= kMemorystatusTracked; + } + if ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) { + snapshot_state |= kMemorystatusSupportsIdleExit; + } + if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) { + snapshot_state |= kMemorystatusDirty; + } + + return snapshot_state; +} + +#if !CONFIG_JETSAM + +static boolean_t +kill_idle_exit_proc(void) { - static boolean_t initialized = FALSE; - memorystatus_node *node; + proc_t p, victim_p = PROC_NULL; uint64_t current_time; - pid_t victim_pid = -1; - - if (initialized == FALSE) { - initialized = TRUE; - assert_wait(&memorystatus_wakeup, THREAD_UNINT); - (void)thread_block((thread_continue_t)memorystatus_thread); - } + boolean_t killed = FALSE; + unsigned int i = 0; - /* Pick next idle exit victim. For now, just iterate through; ideally, this would be be more intelligent. */ + /* Pick next idle exit victim. */ current_time = mach_absolute_time(); - /* Set a cutoff so that we don't idle exit processes that went recently clean */ - - lck_mtx_lock(memorystatus_list_mlock); + proc_list_lock(); - if (memorystatus_dirty_count) { - TAILQ_FOREACH(node, &memorystatus_list, link) { - if ((node->state & kProcessSupportsIdleExit) && !(node->state & (kProcessDirty|kProcessIgnoreIdleExit))) { - if (current_time >= node->clean_time) { - victim_pid = node->pid; - break; - } + p = memorystatus_get_first_proc_locked(&i, FALSE); + while (p) { + /* No need to look beyond the idle band */ + if (p->p_memstat_effectivepriority != JETSAM_PRIORITY_IDLE) { + break; + } + + if ((p->p_memstat_dirty & (P_DIRTY_ALLOW_IDLE_EXIT|P_DIRTY_IS_DIRTY|P_DIRTY_TERMINATED)) == (P_DIRTY_ALLOW_IDLE_EXIT)) { + if (current_time >= p->p_memstat_idledeadline) { + p->p_memstat_dirty |= P_DIRTY_TERMINATED; + victim_p = proc_ref_locked(p); + break; } } + + p = memorystatus_get_next_proc_locked(&i, p, FALSE); } - - lck_mtx_unlock(memorystatus_list_mlock); - if (-1 != victim_pid) { - proc_t p = proc_find(victim_pid); - if (p != NULL) { - boolean_t kill = FALSE; - proc_dirty_start(p); - /* Ensure process is still marked for idle exit and is clean */ - if ((p->p_dirty & (P_DIRTY_ALLOW_IDLE_EXIT|P_DIRTY_IS_DIRTY|P_DIRTY_TERMINATED)) == (P_DIRTY_ALLOW_IDLE_EXIT)) { - /* Clean; issue SIGKILL */ - p->p_dirty |= P_DIRTY_TERMINATED; - kill = TRUE; - } - proc_dirty_end(p); - if (TRUE == kill) { - printf("memorystatus_thread: idle exiting pid %d [%s]\n", victim_pid, (p->p_comm ? p->p_comm : "(unknown)")); - psignal(p, SIGKILL); - } - proc_rele(p); - } + proc_list_unlock(); + + if (victim_p) { + printf("memorystatus_thread: idle exiting pid %d [%s]\n", victim_p->p_pid, (victim_p->p_comm ? victim_p->p_comm : "(unknown)")); + killed = memorystatus_do_kill(victim_p, kMemorystatusKilledIdleExit); + proc_rele(victim_p); } - assert_wait(&memorystatus_wakeup, THREAD_UNINT); - (void)thread_block((thread_continue_t)memorystatus_thread); + return killed; } +#endif -#if CONFIG_JETSAM - -static uint32_t -memorystatus_task_page_count(task_t task) -{ - kern_return_t ret; - static task_info_data_t data; - static struct task_basic_info *info = (struct task_basic_info *)&data; - static mach_msg_type_number_t count = TASK_BASIC_INFO_COUNT; - - ret = task_info(task, TASK_BASIC_INFO, (task_info_t)&data, &count); - if (ret == KERN_SUCCESS) { - return info->resident_size / PAGE_SIZE; - } - return 0; +static void +memorystatus_thread_wake(void) { + thread_wakeup((event_t)&memorystatus_wakeup); } static int -memorystatus_send_note(int event_code, void *data, size_t data_length) { - int ret; - struct kev_msg ev_msg; +memorystatus_thread_block(uint32_t interval_ms, thread_continue_t continuation) +{ + if (interval_ms) { + assert_wait_timeout(&memorystatus_wakeup, THREAD_UNINT, interval_ms, 1000 * NSEC_PER_USEC); + } else { + assert_wait(&memorystatus_wakeup, THREAD_UNINT); + } - ev_msg.vendor_code = KEV_VENDOR_APPLE; - ev_msg.kev_class = KEV_SYSTEM_CLASS; - ev_msg.kev_subclass = KEV_MEMORYSTATUS_SUBCLASS; + return thread_block(continuation); +} - ev_msg.event_code = event_code; +extern boolean_t vm_compressor_thrashing_detected; +extern uint64_t vm_compressor_total_compressions(void); - ev_msg.dv[0].data_length = data_length; - ev_msg.dv[0].data_ptr = data; - ev_msg.dv[1].data_length = 0; +static void +memorystatus_thread(void *param __unused, wait_result_t wr __unused) +{ + static boolean_t is_vm_privileged = FALSE; +#if CONFIG_JETSAM + boolean_t post_snapshot = FALSE; + uint32_t errors = 0; +#endif - ret = kev_post_msg(&ev_msg); - if (ret) { - memorystatus_kev_failure_count++; - printf("%s: kev_post_msg() failed, err %d\n", __func__, ret); + if (is_vm_privileged == FALSE) { + /* + * It's the first time the thread has run, so just mark the thread as privileged and block. + * This avoids a spurious pass with unset variables, as set out in . + */ + thread_wire(host_priv_self(), current_thread(), TRUE); + is_vm_privileged = TRUE; + + memorystatus_thread_block(0, memorystatus_thread); } - return ret; -} +#if CONFIG_JETSAM + + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_START, + memorystatus_available_pages, 0, 0, 0, 0); -static uint32_t -memorystatus_build_flags_from_state(uint32_t state) { - uint32_t flags = 0; - - if (state & kProcessForeground) { - flags |= kMemorystatusFlagsFrontmost; - } - if (state & kProcessActive) { - flags |= kMemorystatusFlagsActive; - } - if (state & kProcessSupportsIdleExit) { - flags |= kMemorystatusFlagsSupportsIdleExit; - } - if (state & kProcessDirty) { - flags |= kMemorystatusFlagsDirty; - } - - return flags; -} + uint32_t cause = vm_compressor_thrashing_detected ? kMemorystatusKilledVMThrashing : kMemorystatusKilledVMPageShortage; -static void -memorystatus_move_node_to_exit_list(memorystatus_node *node) -{ - /* Make sure we're called with the list lock held */ - lck_mtx_assert(memorystatus_list_mlock, LCK_MTX_ASSERT_OWNED); - - /* Now, acquire the exit list lock... */ - lck_mtx_lock(exit_list_mlock); + /* Jetsam aware version. + * + * If woken under pressure, go down the path of killing: + * + * - processes exceeding their highwater mark if no clean victims available + * - the least recently used process if no highwater mark victims available + */ +#if !LATENCY_JETSAM + while (vm_compressor_thrashing_detected || memorystatus_available_pages <= memorystatus_available_pages_critical) { +#else + while (kill_under_pressure) { + const uint32_t SNAPSHOT_WAIT_TIMEOUT_MS = 100; + wait_result_t wait_result; +#endif + boolean_t killed; + int32_t priority; + +#if LEGACY_HIWATER + /* Highwater */ + killed = memorystatus_kill_hiwat_proc(&errors); + if (killed) { + post_snapshot = TRUE; + goto done; + } +#endif + + /* LRU */ + killed = memorystatus_kill_top_process(TRUE, cause, &priority, &errors); + if (killed) { + if (!kill_under_pressure && (priority != JETSAM_PRIORITY_IDLE)) { + /* Don't generate logs for steady-state idle-exit kills */ + post_snapshot = TRUE; + } + goto done; + } + + /* Under pressure and unable to kill a process - panic */ + panic("memorystatus_jetsam_thread: no victim! available pages:%d\n", memorystatus_available_pages); + +done: + kill_under_pressure = FALSE; + vm_compressor_thrashing_detected = FALSE; + +#if LATENCY_JETSAM + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_LATENCY_COALESCE) | DBG_FUNC_START, + memorystatus_available_pages, 0, 0, 0, 0); + thread_wakeup((event_t)&latency_jetsam_wakeup); + /* + * Coalesce snapshot reports in the face of repeated jetsams by blocking here with a timeout. + * If the wait expires, issue the note. + */ + wait_result = memorystatus_thread_block(SNAPSHOT_WAIT_TIMEOUT_MS, THREAD_CONTINUE_NULL); + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_LATENCY_COALESCE) | DBG_FUNC_END, + memorystatus_available_pages, 0, 0, 0, 0); + if (wait_result != THREAD_AWAKENED) { + /* Catch-all */ + break; + } +#endif + } + + if (errors) { + memorystatus_clear_errors(); + } + +#if VM_PRESSURE_EVENTS + memorystatus_update_vm_pressure(TRUE); +#endif + + if (post_snapshot) { + size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + + sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count); + memorystatus_jetsam_snapshot->notification_time = mach_absolute_time(); + memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size)); + } + + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_END, + memorystatus_available_pages, 0, 0, 0, 0); + +#else /* CONFIG_JETSAM */ + + /* Simple version. + * + * Jetsam not enabled, so just kill the first suitable clean process + * and sleep. + */ + + if (kill_idle_exit) { + kill_idle_exit_proc(); + kill_idle_exit = FALSE; + } - /* Remove from list + update accounting... */ - memorystatus_remove_node(node); +#endif /* CONFIG_JETSAM */ + + memorystatus_thread_block(0, memorystatus_thread); +} + +#if !CONFIG_JETSAM +boolean_t memorystatus_idle_exit_from_VM(void) { + kill_idle_exit = TRUE; + memorystatus_thread_wake(); + return TRUE; +} +#endif + +#if CONFIG_JETSAM + +/* + * Callback invoked when allowable physical memory footprint exceeded + * (dirty pages + IOKit mappings) + * + * This is invoked for both advisory, non-fatal per-task high watermarks, + * as well as the fatal system-wide task memory limit. + */ +void +memorystatus_on_ledger_footprint_exceeded(boolean_t warning, const int max_footprint_mb) +{ + proc_t p = current_proc(); + + printf("process %d (%s) %s physical memory footprint limit of %d MB\n", + p->p_pid, p->p_comm, + warning ? "approaching" : "exceeded", + max_footprint_mb); + +#if VM_PRESSURE_EVENTS + if (warning == TRUE) { + if (memorystatus_warn_process(p->p_pid) != TRUE) { + /* Print warning, since it's possible that task has not registered for pressure notifications */ + printf("task_exceeded_footprint: failed to warn the current task (exiting?).\n"); + } + return; + } +#endif /* VM_PRESSURE_EVENTS */ + + if (p->p_memstat_memlimit <= 0) { + /* + * If this process has no high watermark, then we have been invoked because the task + * has violated the system-wide per-task memory limit. + */ + if (memorystatus_kill_process_sync(p->p_pid, kMemorystatusKilledPerProcessLimit) != TRUE) { + printf("task_exceeded_footprint: failed to kill the current task (exiting?).\n"); + } + } +} + +static void +memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint) +{ + assert(task); + assert(footprint); + + *footprint = (uint32_t)(get_task_phys_footprint(task) / PAGE_SIZE_64); + if (max_footprint) { + *max_footprint = (uint32_t)(get_task_phys_footprint_max(task) / PAGE_SIZE_64); + } +} + +static int +memorystatus_send_note(int event_code, void *data, size_t data_length) { + int ret; + struct kev_msg ev_msg; - /* ...then insert at the end of the exit queue */ - TAILQ_INSERT_TAIL(&exit_list, node, link); + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_SYSTEM_CLASS; + ev_msg.kev_subclass = KEV_MEMORYSTATUS_SUBCLASS; + + ev_msg.event_code = event_code; + + ev_msg.dv[0].data_length = data_length; + ev_msg.dv[0].data_ptr = data; + ev_msg.dv[1].data_length = 0; + + ret = kev_post_msg(&ev_msg); + if (ret) { + printf("%s: kev_post_msg() failed, err %d\n", __func__, ret); + } - /* And relax */ - lck_mtx_unlock(exit_list_mlock); + return ret; +} + +static void +memorystatus_update_snapshot_locked(proc_t p, uint32_t kill_cause) +{ + unsigned int i; + + for (i = 0; i < memorystatus_jetsam_snapshot_count; i++) { + if (memorystatus_jetsam_snapshot_list[i].pid == p->p_pid) { + /* Update if the priority has changed since the snapshot was taken */ + if (memorystatus_jetsam_snapshot_list[i].priority != p->p_memstat_effectivepriority) { + memorystatus_jetsam_snapshot_list[i].priority = p->p_memstat_effectivepriority; + strlcpy(memorystatus_jetsam_snapshot_list[i].name, p->p_comm, MAXCOMLEN+1); + memorystatus_jetsam_snapshot_list[i].state = memorystatus_build_state(p); + memorystatus_jetsam_snapshot_list[i].user_data = p->p_memstat_userdata; + memorystatus_jetsam_snapshot_list[i].fds = p->p_fd->fd_nfiles; + } + memorystatus_jetsam_snapshot_list[i].killed = kill_cause; + return; + } + } } -void memorystatus_update(unsigned int pages_avail) -{ +void memorystatus_pages_update(unsigned int pages_avail) +{ + boolean_t critical, delta; + if (!memorystatus_delta) { return; } - - if ((pages_avail < memorystatus_available_pages_critical) || - (pages_avail >= (memorystatus_available_pages + memorystatus_delta)) || - (memorystatus_available_pages >= (pages_avail + memorystatus_delta))) { + + critical = (pages_avail < memorystatus_available_pages_critical) ? TRUE : FALSE; + delta = ((pages_avail >= (memorystatus_available_pages + memorystatus_delta)) + || (memorystatus_available_pages >= (pages_avail + memorystatus_delta))) ? TRUE : FALSE; + + if (critical || delta) { memorystatus_available_pages = pages_avail; - memorystatus_level = memorystatus_available_pages * 100 / atop_64(max_mem); - /* Only wake the thread if currently blocked */ - if (OSCompareAndSwap(0, 1, &memorystatus_jetsam_running)) { - thread_wakeup((event_t)&memorystatus_jetsam_wakeup); + memorystatus_level = memorystatus_available_pages * 100 / atop_64(max_mem); + +#if LATENCY_JETSAM + /* Bail early to avoid excessive wake-ups */ + if (critical) { + return; } +#endif + + memorystatus_thread_wake(); } } static boolean_t memorystatus_get_snapshot_properties_for_proc_locked(proc_t p, memorystatus_jetsam_snapshot_entry_t *entry) { - memorystatus_node *node; - - TAILQ_FOREACH(node, &memorystatus_list, link) { - if (node->pid == p->p_pid) { - break; - } - } - - if (!node) { - return FALSE; - } + memset(entry, 0, sizeof(memorystatus_jetsam_snapshot_entry_t)); entry->pid = p->p_pid; strlcpy(&entry->name[0], p->p_comm, MAXCOMLEN+1); - entry->priority = node->priority; - entry->pages = memorystatus_task_page_count(p->task); - entry->flags = memorystatus_build_flags_from_state(node->state); + entry->priority = p->p_memstat_effectivepriority; + memorystatus_get_task_page_counts(p->task, &entry->pages, &entry->max_pages); + entry->state = memorystatus_build_state(p); + entry->user_data = p->p_memstat_userdata; memcpy(&entry->uuid[0], &p->p_uuid[0], sizeof(p->p_uuid)); return TRUE; @@ -1038,17 +1686,38 @@ memorystatus_get_snapshot_properties_for_proc_locked(proc_t p, memorystatus_jets static void memorystatus_jetsam_snapshot_procs_locked(void) { - proc_t p; - int i = 0; - - memorystatus_jetsam_snapshot.stats.free_pages = vm_page_free_count; - memorystatus_jetsam_snapshot.stats.active_pages = vm_page_active_count; - memorystatus_jetsam_snapshot.stats.inactive_pages = vm_page_inactive_count; - memorystatus_jetsam_snapshot.stats.throttled_pages = vm_page_throttled_count; - memorystatus_jetsam_snapshot.stats.purgeable_pages = vm_page_purgeable_count; - memorystatus_jetsam_snapshot.stats.wired_pages = vm_page_wire_count; - proc_list_lock(); - LIST_FOREACH(p, &allproc, p_list) { + proc_t p, next_p; + unsigned int b = 0, i = 0; + kern_return_t kr = KERN_SUCCESS; + + mach_msg_type_number_t count = HOST_VM_INFO64_COUNT; + vm_statistics64_data_t vm_stat; + + if ((kr = host_statistics64(host_self(), HOST_VM_INFO64, (host_info64_t)&vm_stat, &count) != KERN_SUCCESS)) { + printf("memorystatus_jetsam_snapshot_procs_locked: host_statistics64 failed with %d\n", kr); + memset(&memorystatus_jetsam_snapshot->stats, 0, sizeof(memorystatus_jetsam_snapshot->stats)); + } else { + memorystatus_jetsam_snapshot->stats.free_pages = vm_stat.free_count; + memorystatus_jetsam_snapshot->stats.active_pages = vm_stat.active_count; + memorystatus_jetsam_snapshot->stats.inactive_pages = vm_stat.inactive_count; + memorystatus_jetsam_snapshot->stats.throttled_pages = vm_stat.throttled_count; + memorystatus_jetsam_snapshot->stats.purgeable_pages = vm_stat.purgeable_count; + memorystatus_jetsam_snapshot->stats.wired_pages = vm_stat.wire_count; + + memorystatus_jetsam_snapshot->stats.speculative_pages = vm_stat.speculative_count; + memorystatus_jetsam_snapshot->stats.filebacked_pages = vm_stat.external_page_count; + memorystatus_jetsam_snapshot->stats.anonymous_pages = vm_stat.internal_page_count; + memorystatus_jetsam_snapshot->stats.compressions = vm_stat.compressions; + memorystatus_jetsam_snapshot->stats.decompressions = vm_stat.decompressions; + memorystatus_jetsam_snapshot->stats.compressor_pages = vm_stat.compressor_page_count; + memorystatus_jetsam_snapshot->stats.total_uncompressed_pages_in_compressor = vm_stat.total_uncompressed_pages_in_compressor; + } + + next_p = memorystatus_get_first_proc_locked(&b, TRUE); + while (next_p) { + p = next_p; + next_p = memorystatus_get_next_proc_locked(&b, p, TRUE); + if (FALSE == memorystatus_get_snapshot_properties_for_proc_locked(p, &memorystatus_jetsam_snapshot_list[i])) { continue; } @@ -1058,317 +1727,423 @@ memorystatus_jetsam_snapshot_procs_locked(void) p->p_uuid[0], p->p_uuid[1], p->p_uuid[2], p->p_uuid[3], p->p_uuid[4], p->p_uuid[5], p->p_uuid[6], p->p_uuid[7], p->p_uuid[8], p->p_uuid[9], p->p_uuid[10], p->p_uuid[11], p->p_uuid[12], p->p_uuid[13], p->p_uuid[14], p->p_uuid[15]); - if (++i == kMaxSnapshotEntries) { + if (++i == memorystatus_jetsam_snapshot_max) { break; } } - proc_list_unlock(); - memorystatus_jetsam_snapshot.snapshot_time = mach_absolute_time(); - memorystatus_jetsam_snapshot.entry_count = memorystatus_jetsam_snapshot_list_count = i - 1; + + memorystatus_jetsam_snapshot->snapshot_time = mach_absolute_time(); + memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = i; } -static void -memorystatus_mark_pid_in_snapshot(pid_t pid, int flags) -{ - int i = 0; +#if DEVELOPMENT || DEBUG - for (i = 0; i < memorystatus_jetsam_snapshot_list_count; i++) { - if (memorystatus_jetsam_snapshot_list[i].pid == pid) { - memorystatus_jetsam_snapshot_list[i].flags |= flags; - return; - } +static int +memorystatus_cmd_set_panic_bits(user_addr_t buffer, uint32_t buffer_size) { + int ret; + memorystatus_jetsam_panic_options_t debug; + + if (buffer_size != sizeof(memorystatus_jetsam_panic_options_t)) { + return EINVAL; } + + ret = copyin(buffer, &debug, buffer_size); + if (ret) { + return ret; + } + + /* Panic bits match kMemorystatusKilled* enum */ + memorystatus_jetsam_panic_debug = (memorystatus_jetsam_panic_debug & ~debug.mask) | (debug.data & debug.mask); + + /* Copyout new value */ + debug.data = memorystatus_jetsam_panic_debug; + ret = copyout(&debug, buffer, sizeof(memorystatus_jetsam_panic_options_t)); + + return ret; } -int -memorystatus_kill_top_proc(boolean_t any, uint32_t cause) -{ +#endif + +/* + * Jetsam a specific process. + */ +static boolean_t +memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause) { + boolean_t killed; proc_t p; - int pending_snapshot = 0; + + /* TODO - add a victim queue and push this into the main jetsam thread */ + + p = proc_find(victim_pid); + if (!p) { + return FALSE; + } + + printf("memorystatus: specifically killing pid %d [%s] - memorystatus_available_pages: %d\n", + victim_pid, (p->p_comm ? p->p_comm : "(unknown)"), memorystatus_available_pages); + + proc_list_lock(); + + if (memorystatus_jetsam_snapshot_count == 0) { + memorystatus_jetsam_snapshot_procs_locked(); + } + + memorystatus_update_snapshot_locked(p, cause); + proc_list_unlock(); + + killed = memorystatus_do_kill(p, cause); + proc_rele(p); + + return killed; +} + +/* + * Jetsam the first process in the queue. + */ +static boolean_t +memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, uint32_t *errors) +{ + pid_t aPid; + proc_t p = PROC_NULL, next_p = PROC_NULL; + boolean_t new_snapshot = FALSE, killed = FALSE; + unsigned int i = 0; #ifndef CONFIG_FREEZE #pragma unused(any) #endif - lck_mtx_lock(memorystatus_list_mlock); + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_START, + memorystatus_available_pages, 0, 0, 0, 0); - if (memorystatus_jetsam_snapshot_list_count == 0) { - memorystatus_jetsam_snapshot_procs_locked(); - } else { - pending_snapshot = 1; - } + proc_list_lock(); - while (next_memorystatus_node) { - memorystatus_node *node; - pid_t aPid; + next_p = memorystatus_get_first_proc_locked(&i, TRUE); + while (next_p) { #if DEVELOPMENT || DEBUG int activeProcess; int procSuspendedForDiagnosis; #endif /* DEVELOPMENT || DEBUG */ - - node = next_memorystatus_node; - next_memorystatus_node = TAILQ_NEXT(next_memorystatus_node, link); - + + p = next_p; + next_p = memorystatus_get_next_proc_locked(&i, p, TRUE); + #if DEVELOPMENT || DEBUG - activeProcess = node->state & kProcessForeground; - procSuspendedForDiagnosis = node->state & kProcessSuspendedForDiag; + activeProcess = p->p_memstat_state & P_MEMSTAT_FOREGROUND; + procSuspendedForDiagnosis = p->p_memstat_state & P_MEMSTAT_DIAG_SUSPENDED; #endif /* DEVELOPMENT || DEBUG */ - aPid = node->pid; + aPid = p->p_pid; - /* skip empty slots in the list */ - if (aPid == 0 || (node->state & kProcessKilled)) { - continue; // with lock held + if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) { + continue; } - - p = proc_find(aPid); - if (p != NULL) { - int flags = cause; - + #if DEVELOPMENT || DEBUG - if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && procSuspendedForDiagnosis) { - printf("jetsam: continuing after ignoring proc suspended already for diagnosis - %d\n", aPid); - proc_rele(p); - continue; - } + if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && procSuspendedForDiagnosis) { + printf("jetsam: continuing after ignoring proc suspended already for diagnosis - %d\n", aPid); + continue; + } #endif /* DEVELOPMENT || DEBUG */ #if CONFIG_FREEZE - boolean_t skip; - boolean_t reclaim_proc = !(node->state & (kProcessLocked | kProcessNoReclaimWorth)); - if (any || reclaim_proc) { - if (node->state & kProcessFrozen) { - flags |= kMemorystatusFlagsFrozen; - } - skip = FALSE; - } else { - skip = TRUE; - } + boolean_t skip; + boolean_t reclaim_proc = !(p->p_memstat_state & (P_MEMSTAT_LOCKED | P_MEMSTAT_NORECLAIM)); + if (any || reclaim_proc) { + skip = FALSE; + } else { + skip = TRUE; + } - if (skip) { - proc_rele(p); - } else + if (skip) { + continue; + } else #endif - { + { + if (priority) { + *priority = p->p_memstat_effectivepriority; + } + + /* + * Capture a snapshot if none exists and: + * - priority was not requested (this is something other than an ambient kill) + * - the priority was requested *and* the targeted process is not at idle priority + */ + if ((memorystatus_jetsam_snapshot_count == 0) && + ((!priority) || (priority && (*priority != JETSAM_PRIORITY_IDLE)))) { + memorystatus_jetsam_snapshot_procs_locked(); + new_snapshot = TRUE; + } + + /* + * Mark as terminated so that if exit1() indicates success, but the process (for example) + * is blocked in task_exception_notify(), it'll be skipped if encountered again - see + * . This is cheaper than examining P_LEXIT, which requires the + * acquisition of the proc lock. + */ + p->p_memstat_state |= P_MEMSTAT_TERMINATED; + #if DEVELOPMENT || DEBUG - if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && activeProcess) { - MEMORYSTATUS_DEBUG(1, "jetsam: suspending pid %d [%s] (active) for diagnosis - memory_status_level: %d\n", - aPid, (p->p_comm ? p->p_comm: "(unknown)"), memorystatus_level); - memorystatus_mark_pid_in_snapshot(aPid, kMemorystatusFlagsSuspForDiagnosis); - node->state |= kProcessSuspendedForDiag; - if (memorystatus_jetsam_policy & kPolicyDiagnoseFirst) { - jetsam_diagnostic_suspended_one_active_proc = 1; - printf("jetsam: returning after suspending first active proc - %d\n", aPid); - } - lck_mtx_unlock(memorystatus_list_mlock); + if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && activeProcess) { + MEMORYSTATUS_DEBUG(1, "jetsam: suspending pid %d [%s] (active) for diagnosis - memory_status_level: %d\n", + aPid, (p->p_comm ? p->p_comm: "(unknown)"), memorystatus_level); + memorystatus_update_snapshot_locked(p, kMemorystatusKilledDiagnostic); + p->p_memstat_state |= P_MEMSTAT_DIAG_SUSPENDED; + if (memorystatus_jetsam_policy & kPolicyDiagnoseFirst) { + jetsam_diagnostic_suspended_one_active_proc = 1; + printf("jetsam: returning after suspending first active proc - %d\n", aPid); + } + + p = proc_ref_locked(p); + proc_list_unlock(); + if (p) { task_suspend(p->task); proc_rele(p); - return 0; - } else + killed = TRUE; + } + + goto exit; + } else #endif /* DEVELOPMENT || DEBUG */ - { + { + /* Shift queue, update stats */ + memorystatus_update_snapshot_locked(p, cause); + + p = proc_ref_locked(p); + proc_list_unlock(); + if (p) { printf("memorystatus: jetsam killing pid %d [%s] - memorystatus_available_pages: %d\n", - aPid, (p->p_comm ? p->p_comm : "(unknown)"), memorystatus_available_pages); - /* Shift queue, update stats */ - memorystatus_move_node_to_exit_list(node); - memorystatus_mark_pid_in_snapshot(aPid, flags); - lck_mtx_unlock(memorystatus_list_mlock); - exit1_internal(p, W_EXITCODE(0, SIGKILL), (int *)NULL, FALSE, FALSE); + aPid, (p->p_comm ? p->p_comm : "(unknown)"), memorystatus_available_pages); + killed = memorystatus_do_kill(p, cause); + } + + /* Success? */ + if (killed) { proc_rele(p); - return 0; + goto exit; } + + /* Failure - unwind and restart. */ + proc_list_lock(); + proc_rele_locked(p); + p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; + p->p_memstat_state |= P_MEMSTAT_ERROR; + *errors += 1; + i = 0; + next_p = memorystatus_get_first_proc_locked(&i, TRUE); } } } - lck_mtx_unlock(memorystatus_list_mlock); + proc_list_unlock(); - // If we didn't kill anything, toss any newly-created snapshot - if (!pending_snapshot) { - memorystatus_jetsam_snapshot.entry_count = memorystatus_jetsam_snapshot_list_count = 0; +exit: + /* Clear snapshot if freshly captured and no target was found */ + if (new_snapshot && !killed) { + memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; } - return -1; -} + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_END, + memorystatus_available_pages, killed ? aPid : 0, 0, 0, 0); -int memorystatus_kill_top_proc_from_VM(void) { - return memorystatus_kill_top_proc(TRUE, kMemorystatusFlagsKilledVM); + return killed; } -static int -memorystatus_kill_hiwat_proc(void) +#if LEGACY_HIWATER + +static boolean_t +memorystatus_kill_hiwat_proc(uint32_t *errors) { - proc_t p; - int pending_snapshot = 0; - memorystatus_node *next_hiwat_node; - - lck_mtx_lock(memorystatus_list_mlock); + pid_t aPid = 0; + proc_t p = PROC_NULL, next_p = PROC_NULL; + boolean_t new_snapshot = FALSE, killed = FALSE; + unsigned int i = 0; - if (memorystatus_jetsam_snapshot_list_count == 0) { - memorystatus_jetsam_snapshot_procs_locked(); - } else { - pending_snapshot = 1; - } + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_START, + memorystatus_available_pages, 0, 0, 0, 0); - next_hiwat_node = next_memorystatus_node; + proc_list_lock(); - while (next_hiwat_node) { - pid_t aPid; - int32_t hiwat; - memorystatus_node *node; - - node = next_hiwat_node; - next_hiwat_node = TAILQ_NEXT(next_hiwat_node, link); + next_p = memorystatus_get_first_proc_locked(&i, TRUE); + while (next_p) { + uint32_t footprint; + boolean_t skip; + + p = next_p; + next_p = memorystatus_get_next_proc_locked(&i, p, TRUE); - aPid = node->pid; - hiwat = node->hiwat_pages; + aPid = p->p_pid; - /* skip empty or non-hiwat slots in the list */ - if (aPid == 0 || (hiwat < 0) || (node->state & kProcessKilled)) { - continue; // with lock held + if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) { + continue; + } + + /* skip if no limit set */ + if (p->p_memstat_memlimit <= 0) { + continue; } - p = proc_find(aPid); - if (p != NULL) { - int32_t pages = (int32_t)memorystatus_task_page_count(p->task); - boolean_t skip = (pages <= hiwat); + /* skip if a currently inapplicable limit is encountered */ + if ((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) { + continue; + } + + footprint = (uint32_t)(get_task_phys_footprint(p->task) / (1024 * 1024)); + skip = (((int32_t)footprint) <= p->p_memstat_memlimit); #if DEVELOPMENT || DEBUG - if (!skip && (memorystatus_jetsam_policy & kPolicyDiagnoseActive)) { - if (node->state & kProcessSuspendedForDiag) { - proc_rele(p); - continue; - } + if (!skip && (memorystatus_jetsam_policy & kPolicyDiagnoseActive)) { + if (p->p_memstat_state & P_MEMSTAT_DIAG_SUSPENDED) { + continue; } + } #endif /* DEVELOPMENT || DEBUG */ #if CONFIG_FREEZE - if (!skip) { - if (node->state & kProcessLocked) { - skip = TRUE; - } else { - skip = FALSE; - } - } + if (!skip) { + if (p->p_memstat_state & P_MEMSTAT_LOCKED) { + skip = TRUE; + } else { + skip = FALSE; + } + } #endif - if (!skip) { - MEMORYSTATUS_DEBUG(1, "jetsam: %s pid %d [%s] - %d pages > 1 (%d)\n", - (memorystatus_jetsam_policy & kPolicyDiagnoseActive) ? "suspending": "killing", aPid, p->p_comm, pages, hiwat); + if (skip) { + continue; + } else { + MEMORYSTATUS_DEBUG(1, "jetsam: %s pid %d [%s] - %d pages > 1 (%d)\n", + (memorystatus_jetsam_policy & kPolicyDiagnoseActive) ? "suspending": "killing", aPid, p->p_comm, pages, hiwat); + + if (memorystatus_jetsam_snapshot_count == 0) { + memorystatus_jetsam_snapshot_procs_locked(); + new_snapshot = TRUE; + } + + p->p_memstat_state |= P_MEMSTAT_TERMINATED; + #if DEVELOPMENT || DEBUG - if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) { - memorystatus_mark_pid_in_snapshot(aPid, kMemorystatusFlagsSuspForDiagnosis); - node->state |= kProcessSuspendedForDiag; - lck_mtx_unlock(memorystatus_list_mlock); + if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) { + MEMORYSTATUS_DEBUG(1, "jetsam: pid %d suspended for diagnosis - memorystatus_available_pages: %d\n", aPid, memorystatus_available_pages); + memorystatus_update_snapshot_locked(p, kMemorystatusKilledDiagnostic); + p->p_memstat_state |= P_MEMSTAT_DIAG_SUSPENDED; + + p = proc_ref_locked(p); + proc_list_unlock(); + if (p) { task_suspend(p->task); proc_rele(p); - MEMORYSTATUS_DEBUG(1, "jetsam: pid %d suspended for diagnosis - memorystatus_available_pages: %d\n", aPid, memorystatus_available_pages); - } else + killed = TRUE; + } + + goto exit; + } else #endif /* DEVELOPMENT || DEBUG */ - { - printf("memorystatus: jetsam killing pid %d [%s] (highwater) - memorystatus_available_pages: %d\n", - aPid, (p->p_comm ? p->p_comm : "(unknown)"), memorystatus_available_pages); - /* Shift queue, update stats */ - memorystatus_move_node_to_exit_list(node); - memorystatus_mark_pid_in_snapshot(aPid, kMemorystatusFlagsKilledHiwat); - lck_mtx_unlock(memorystatus_list_mlock); - exit1(p, W_EXITCODE(0, SIGKILL), (int *)NULL); + { + memorystatus_update_snapshot_locked(p, kMemorystatusKilledHiwat); + + p = proc_ref_locked(p); + proc_list_unlock(); + if (p) { + printf("memorystatus: jetsam killing pid %d [%s] (highwater) - memorystatus_available_pages: %d\n", + aPid, (p->p_comm ? p->p_comm : "(unknown)"), memorystatus_available_pages); + killed = memorystatus_do_kill(p, kMemorystatusKilledHiwat); + } + + /* Success? */ + if (killed) { proc_rele(p); + goto exit; } - return 0; - } else { - proc_rele(p); - } + /* Failure - unwind and restart. */ + proc_list_lock(); + proc_rele_locked(p); + p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; + p->p_memstat_state |= P_MEMSTAT_ERROR; + *errors += 1; + i = 0; + next_p = memorystatus_get_first_proc_locked(&i, TRUE); + } } } - lck_mtx_unlock(memorystatus_list_mlock); + proc_list_unlock(); - // If we didn't kill anything, toss any newly-created snapshot - if (!pending_snapshot) { - memorystatus_jetsam_snapshot.entry_count = memorystatus_jetsam_snapshot_list_count = 0; +exit: + /* Clear snapshot if freshly captured and no target was found */ + if (new_snapshot && !killed) { + memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; } - return -1; -} + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_END, + memorystatus_available_pages, killed ? aPid : 0, 0, 0, 0); -static void -memorystatus_jetsam_thread_block(void) -{ - assert_wait(&memorystatus_jetsam_wakeup, THREAD_UNINT); - assert(memorystatus_jetsam_running == 1); - OSDecrementAtomic(&memorystatus_jetsam_running); - (void)thread_block((thread_continue_t)memorystatus_jetsam_thread); + return killed; } -static void -memorystatus_jetsam_thread(void *param __unused, wait_result_t wr __unused) -{ - boolean_t post_snapshot = FALSE; - static boolean_t is_vm_privileged = FALSE; +#endif /* LEGACY_HIWATER */ - if (is_vm_privileged == FALSE) { - /* - * It's the first time the thread has run, so just mark the thread as privileged and block. - * This avoids a spurious pass with unset variables, as set out in . - */ - thread_wire(host_priv_self(), current_thread(), TRUE); - is_vm_privileged = TRUE; - memorystatus_jetsam_thread_block(); +static boolean_t +memorystatus_kill_process_async(pid_t victim_pid, uint32_t cause) { + /* TODO: allow a general async path */ + if ((victim_pid != -1) || (cause != kMemorystatusKilledVMPageShortage || cause != kMemorystatusKilledVMThrashing)) { + return FALSE; } - - assert(memorystatus_available_pages != (unsigned)-1); - - while(1) { - unsigned int last_available_pages; - -#if DEVELOPMENT || DEBUG - jetsam_diagnostic_suspended_one_active_proc = 0; -#endif /* DEVELOPMENT || DEBUG */ - - while (memorystatus_available_pages <= memorystatus_available_pages_highwater) { - if (memorystatus_kill_hiwat_proc() < 0) { - break; - } - post_snapshot = TRUE; - } - - while (memorystatus_available_pages <= memorystatus_available_pages_critical) { - if (memorystatus_kill_top_proc(FALSE, kMemorystatusFlagsKilled) < 0) { - /* No victim was found - panic */ - panic("memorystatus_jetsam_thread: no victim! available pages:%d, critical page level: %d\n", - memorystatus_available_pages, memorystatus_available_pages_critical); - } - post_snapshot = TRUE; -#if DEVELOPMENT || DEBUG - if ((memorystatus_jetsam_policy & kPolicyDiagnoseFirst) && jetsam_diagnostic_suspended_one_active_proc) { - printf("jetsam: stopping killing since 1 active proc suspended already for diagnosis\n"); - break; // we found first active proc, let's not kill any more - } -#endif /* DEVELOPMENT || DEBUG */ - } - - last_available_pages = memorystatus_available_pages; - - if (post_snapshot) { - size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_list_count - 1); - memorystatus_jetsam_snapshot.notification_time = mach_absolute_time(); - memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size)); - } + + kill_under_pressure = TRUE; + memorystatus_thread_wake(); + return TRUE; +} - if (memorystatus_available_pages >= (last_available_pages + memorystatus_delta) || - last_available_pages >= (memorystatus_available_pages + memorystatus_delta)) { - continue; - } +static boolean_t +memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause) { + boolean_t res; + uint32_t errors = 0; + + if (victim_pid == -1) { + /* No pid, so kill first process */ + res = memorystatus_kill_top_process(TRUE, cause, NULL, &errors); + } else { + res = memorystatus_kill_specific_process(victim_pid, cause); + } + + if (errors) { + memorystatus_clear_errors(); + } + + if (res == TRUE) { + /* Fire off snapshot notification */ + size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + + sizeof(memorystatus_jetsam_snapshot_entry_t) * memorystatus_jetsam_snapshot_count; + memorystatus_jetsam_snapshot->notification_time = mach_absolute_time(); + memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size)); + } + + return res; +} -#if VM_PRESSURE_EVENTS - memorystatus_check_pressure_reset(); -#endif +boolean_t +memorystatus_kill_on_VM_page_shortage(boolean_t async) { + if (async) { + return memorystatus_kill_process_async(-1, kMemorystatusKilledVMPageShortage); + } else { + return memorystatus_kill_process_sync(-1, kMemorystatusKilledVMPageShortage); + } +} - memorystatus_jetsam_thread_block(); +boolean_t +memorystatus_kill_on_VM_thrashing(boolean_t async) { + if (async) { + return memorystatus_kill_process_async(-1, kMemorystatusKilledVMThrashing); + } else { + return memorystatus_kill_process_sync(-1, kMemorystatusKilledVMThrashing); } } +boolean_t +memorystatus_kill_on_vnode_limit(void) { + return memorystatus_kill_process_sync(-1, kMemorystatusKilledVnodes); +} + #endif /* CONFIG_JETSAM */ #if CONFIG_FREEZE @@ -1378,7 +2153,7 @@ memorystatus_freeze_init(void) { kern_return_t result; thread_t thread; - + result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread); if (result == KERN_SUCCESS) { thread_deallocate(thread); @@ -1388,102 +2163,113 @@ memorystatus_freeze_init(void) } static int -memorystatus_freeze_top_proc(boolean_t *memorystatus_freeze_swap_low) +memorystatus_freeze_top_process(boolean_t *memorystatus_freeze_swap_low) { - proc_t p; - uint32_t i; - memorystatus_node *next_freeze_node; + pid_t aPid = 0; + int ret = -1; + proc_t p = PROC_NULL, next_p = PROC_NULL; + unsigned int i = 0; - lck_mtx_lock(memorystatus_list_mlock); - - next_freeze_node = next_memorystatus_node; + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, + memorystatus_available_pages, 0, 0, 0, 0); + + proc_list_lock(); - while (next_freeze_node) { - memorystatus_node *node; - pid_t aPid; + next_p = memorystatus_get_first_proc_locked(&i, TRUE); + while (next_p) { + kern_return_t kr; + uint32_t purgeable, wired, clean, dirty; + boolean_t shared; + uint32_t pages; + uint32_t max_pages = 0; uint32_t state; - node = next_freeze_node; - next_freeze_node = TAILQ_NEXT(next_freeze_node, link); + p = next_p; + next_p = memorystatus_get_next_proc_locked(&i, p, TRUE); - aPid = node->pid; - state = node->state; - - /* skip empty slots in the list */ - if (aPid == 0) { - continue; // with lock held - } + aPid = p->p_pid; + state = p->p_memstat_state; /* Ensure the process is eligible for freezing */ - if ((state & (kProcessKilled | kProcessLocked | kProcessFrozen)) || !(state & kProcessSuspended)) { + if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FROZEN)) || !(state & P_MEMSTAT_SUSPENDED)) { continue; // with lock held } - - p = proc_find(aPid); - if (p != NULL) { - kern_return_t kr; - uint32_t purgeable, wired, clean, dirty; - boolean_t shared; - uint32_t max_pages = 0; - /* Only freeze processes meeting our minimum resident page criteria */ - if (memorystatus_task_page_count(p->task) < memorystatus_freeze_pages_min) { - proc_rele(p); - continue; - } + /* Only freeze processes meeting our minimum resident page criteria */ + memorystatus_get_task_page_counts(p->task, &pages, NULL); + if (pages < memorystatus_freeze_pages_min) { + continue; // with lock held + } + if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { /* Ensure there's enough free space to freeze this process. */ max_pages = MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max); if (max_pages < memorystatus_freeze_pages_min) { *memorystatus_freeze_swap_low = TRUE; - proc_rele(p); - lck_mtx_unlock(memorystatus_list_mlock); - return 0; + proc_list_unlock(); + goto exit; } + } else { + max_pages = UINT32_MAX - 1; + } + + /* Mark as locked temporarily to avoid kill */ + p->p_memstat_state |= P_MEMSTAT_LOCKED; + + p = proc_ref_locked(p); + proc_list_unlock(); + if (!p) { + goto exit; + } + + kr = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE); + + MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - " + "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, shared %d, free swap: %d\n", + (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (p->p_comm ? p->p_comm : "(unknown)"), + memorystatus_available_pages, purgeable, wired, clean, dirty, shared, default_pager_swap_pages_free()); + + proc_list_lock(); + p->p_memstat_state &= ~P_MEMSTAT_LOCKED; + + /* Success? */ + if (KERN_SUCCESS == kr) { + memorystatus_freeze_entry_t data = { aPid, TRUE, dirty }; - /* Mark as locked temporarily to avoid kill */ - node->state |= kProcessLocked; - - kr = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE); - - MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_top_proc: task_freeze %s for pid %d [%s] - " - "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, shared %d, free swap: %d\n", - (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (p->p_comm ? p->p_comm : "(unknown)"), - memorystatus_available_pages, purgeable, wired, clean, dirty, shared, default_pager_swap_pages_free()); - - proc_rele(p); - - node->state &= ~kProcessLocked; - - if (KERN_SUCCESS == kr) { - memorystatus_freeze_entry_t data = { aPid, kMemorystatusFlagsFrozen, dirty }; - - memorystatus_frozen_count++; - - node->state |= (kProcessFrozen | (shared ? 0: kProcessNoReclaimWorth)); - - /* Update stats */ - for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) { - throttle_intervals[i].pageouts += dirty; - } + memorystatus_frozen_count++; - memorystatus_freeze_pageouts += dirty; - memorystatus_freeze_count++; + p->p_memstat_state |= (P_MEMSTAT_FROZEN | (shared ? 0: P_MEMSTAT_NORECLAIM)); + + /* Update stats */ + for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) { + throttle_intervals[i].pageouts += dirty; + } + + memorystatus_freeze_pageouts += dirty; + memorystatus_freeze_count++; + + proc_list_unlock(); - lck_mtx_unlock(memorystatus_list_mlock); + memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data)); - memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data)); + /* Return the number of reclaimed pages */ + ret = dirty; - return dirty; - } - - /* Failed; go round again */ + } else { + proc_list_unlock(); } + + proc_rele(p); + goto exit; } - lck_mtx_unlock(memorystatus_list_mlock); + proc_list_unlock(); + +exit: + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, + memorystatus_available_pages, aPid, 0, 0, 0); - return -1; + return ret; } static inline boolean_t @@ -1491,13 +2277,13 @@ memorystatus_can_freeze_processes(void) { boolean_t ret; - lck_mtx_lock(memorystatus_list_mlock); + proc_list_lock(); if (memorystatus_suspended_count) { uint32_t average_resident_pages, estimated_processes; /* Estimate the number of suspended processes we can fit */ - average_resident_pages = memorystatus_suspended_resident_count / memorystatus_suspended_count; + average_resident_pages = memorystatus_suspended_footprint_total / memorystatus_suspended_count; estimated_processes = memorystatus_suspended_count + ((memorystatus_available_pages - memorystatus_available_pages_critical) / average_resident_pages); @@ -1505,7 +2291,7 @@ memorystatus_can_freeze_processes(void) if (estimated_processes <= FREEZE_SUSPENDED_THRESHOLD_DEFAULT) { memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_LOW; } else { - memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT; + memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT; } MEMORYSTATUS_DEBUG(1, "memorystatus_can_freeze_processes: %d suspended processes, %d average resident pages / process, %d suspended processes estimated\n", @@ -1520,7 +2306,7 @@ memorystatus_can_freeze_processes(void) ret = FALSE; } - lck_mtx_unlock(memorystatus_list_mlock); + proc_list_unlock(); return ret; } @@ -1625,7 +2411,7 @@ memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused) if (memorystatus_can_freeze(&memorystatus_freeze_swap_low)) { /* Only freeze if we've not exceeded our pageout budgets */ if (!memorystatus_freeze_update_throttle()) { - memorystatus_freeze_top_proc(&memorystatus_freeze_swap_low); + memorystatus_freeze_top_process(&memorystatus_freeze_swap_low); } else { printf("memorystatus_freeze_thread: in throttle, ignoring freeze\n"); memorystatus_freeze_throttle_count++; /* Throttled, update stats */ @@ -1639,254 +2425,996 @@ memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused) #endif /* CONFIG_FREEZE */ -#if CONFIG_JETSAM +#if CONFIG_JETSAM && VM_PRESSURE_EVENTS -#if VM_PRESSURE_EVENTS +boolean_t +memorystatus_warn_process(pid_t pid) { + return (vm_dispatch_pressure_note_to_pid(pid, FALSE) == 0); +} static inline boolean_t -memorystatus_get_pressure_locked(void) { - if (memorystatus_available_pages > memorystatus_available_pages_pressure) { - /* Too many free pages */ - return kVMPressureNormal; - } +memorystatus_update_pressure_locked(boolean_t *pressured) { + vm_pressure_level_t old_level, new_level; + + old_level = memorystatus_vm_pressure_level; + if (memorystatus_available_pages > memorystatus_available_pages_pressure) { + /* Too many free pages */ + new_level = kVMPressureNormal; + } #if CONFIG_FREEZE - if (memorystatus_frozen_count > 0) { - /* Frozen processes exist */ - return kVMPressureNormal; + else if (memorystatus_frozen_count > 0) { + /* Frozen processes exist */ + new_level = kVMPressureNormal; } #endif - - if (memorystatus_suspended_count > MEMORYSTATUS_SUSPENDED_THRESHOLD) { - /* Too many supended processes */ - return kVMPressureNormal; + else if (memorystatus_suspended_count > MEMORYSTATUS_SUSPENDED_THRESHOLD) { + /* Too many supended processes */ + new_level = kVMPressureNormal; + } + else if (memorystatus_suspended_count > 0) { + /* Some suspended processes - warn */ + new_level = kVMPressureWarning; } + else { + /* Otherwise, pressure level is urgent */ + new_level = kVMPressureUrgent; + } + + *pressured = (new_level != kVMPressureNormal); - if (memorystatus_suspended_count > 0) { - /* Some suspended processes - warn */ - return kVMPressureWarning; + /* Did the pressure level change? */ + if (old_level != new_level) { + MEMORYSTATUS_DEBUG(1, "memorystatus_update_pressure_locked(): memory pressure changed %d -> %d; memorystatus_available_pages: %d\n ", + old_level, new_level, memorystatus_available_pages); + memorystatus_vm_pressure_level = new_level; + return TRUE; } - - /* Otherwise, pressure level is urgent */ - return kVMPressureUrgent; + + return FALSE; } -pid_t -memorystatus_request_vm_pressure_candidate(void) { - memorystatus_node *node; - pid_t pid = -1; +kern_return_t +memorystatus_update_vm_pressure(boolean_t target_foreground) { + boolean_t pressure_changed, pressured; + boolean_t warn = FALSE; + + /* + * Centralised pressure handling routine. Called from: + * - The main jetsam thread. In this case, we update the pressure level and dispatch warnings to the foreground + * process *only*, each time the available page % drops. + * - The pageout scan path. In this scenario, every other registered process is targeted in footprint order. + * + * This scheme guarantees delivery to the foreground app, while providing for warnings to the remaining processes + * driven by the pageout scan. + */ - lck_mtx_lock(memorystatus_list_mlock); + MEMORYSTATUS_DEBUG(1, "memorystatus_update_vm_pressure(): foreground %d; available %d, critical %d, pressure %d\n", + target_foreground, memorystatus_available_pages, memorystatus_available_pages_critical, memorystatus_available_pages_pressure); - /* Are we in a low memory state? */ - memorystatus_vm_pressure_level = memorystatus_get_pressure_locked(); - if (kVMPressureNormal != memorystatus_vm_pressure_level) { - TAILQ_FOREACH(node, &memorystatus_list, link) { - /* Skip ineligible processes */ - if (node->state & (kProcessKilled | kProcessLocked | kProcessSuspended | kProcessFrozen | kProcessNotifiedForPressure)) { - continue; + proc_list_lock(); + + pressure_changed = memorystatus_update_pressure_locked(&pressured); + + if (pressured) { + if (target_foreground) { + if (memorystatus_available_pages != memorystatus_last_foreground_pressure_pages) { + if (memorystatus_available_pages < memorystatus_last_foreground_pressure_pages) { + warn = TRUE; + } + memorystatus_last_foreground_pressure_pages = memorystatus_available_pages; } - node->state |= kProcessNotifiedForPressure; - pid = node->pid; - break; + } else { + warn = TRUE; + } + } else if (pressure_changed) { + memorystatus_last_foreground_pressure_pages = (unsigned int)-1; + } + + proc_list_unlock(); + + /* Target foreground processes if specified */ + if (warn) { + if (target_foreground) { + MEMORYSTATUS_DEBUG(1, "memorystatus_update_vm_pressure(): invoking vm_find_pressure_foreground_candidates()\n"); + vm_find_pressure_foreground_candidates(); + } else { + MEMORYSTATUS_DEBUG(1, "memorystatus_update_vm_pressure(): invoking vm_find_pressure_candidate()\n"); + /* Defer to VM code. This can race with the foreground priority, but + * it's preferable to holding onto locks for an extended period. */ + vm_find_pressure_candidate(); } } - lck_mtx_unlock(memorystatus_list_mlock); + /* Dispatch the global kevent to privileged listeners */ + if (pressure_changed) { + memorystatus_issue_pressure_kevent(pressured); + } - return pid; + return KERN_SUCCESS; } -void +int memorystatus_send_pressure_note(pid_t pid) { - memorystatus_send_note(kMemorystatusPressureNote, &pid, sizeof(pid)); + MEMORYSTATUS_DEBUG(1, "memorystatus_send_pressure_note(): pid %d\n", pid); + return memorystatus_send_note(kMemorystatusPressureNote, &pid, sizeof(pid)); } -static void -memorystatus_check_pressure_reset() { - lck_mtx_lock(memorystatus_list_mlock); - - if (kVMPressureNormal != memorystatus_vm_pressure_level) { - memorystatus_vm_pressure_level = memorystatus_get_pressure_locked(); - if (kVMPressureNormal == memorystatus_vm_pressure_level) { - memorystatus_node *node; - TAILQ_FOREACH(node, &memorystatus_list, link) { - node->state &= ~kProcessNotifiedForPressure; +boolean_t +memorystatus_bg_pressure_eligible(proc_t p) { + boolean_t eligible = FALSE; + + proc_list_lock(); + + MEMORYSTATUS_DEBUG(1, "memorystatus_bg_pressure_eligible: pid %d, state 0x%x\n", p->p_pid, p->p_memstat_state); + + /* Foreground processes have already been dealt with at this point, so just test for eligibility */ + if (!(p->p_memstat_state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_SUSPENDED | P_MEMSTAT_FROZEN))) { + eligible = TRUE; + } + + proc_list_unlock(); + + return eligible; +} + +boolean_t +memorystatus_is_foreground_locked(proc_t p) { + return ((p->p_memstat_effectivepriority == JETSAM_PRIORITY_FOREGROUND) || + (p->p_memstat_effectivepriority == JETSAM_PRIORITY_FOREGROUND_SUPPORT)); +} + +#else /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */ + +/* + * Trigger levels to test the mechanism. + * Can be used via a sysctl. + */ +#define TEST_LOW_MEMORY_TRIGGER_ONE 1 +#define TEST_LOW_MEMORY_TRIGGER_ALL 2 +#define TEST_PURGEABLE_TRIGGER_ONE 3 +#define TEST_PURGEABLE_TRIGGER_ALL 4 +#define TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE 5 +#define TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL 6 + +boolean_t memorystatus_manual_testing_on = FALSE; +vm_pressure_level_t memorystatus_manual_testing_level = kVMPressureNormal; + +extern struct knote * +vm_pressure_select_optimal_candidate_to_notify(struct klist *, int); + +extern +kern_return_t vm_pressure_notification_without_levels(void); + +extern void vm_pressure_klist_lock(void); +extern void vm_pressure_klist_unlock(void); + +extern void vm_reset_active_list(void); + +extern void delay(int); + +#define INTER_NOTIFICATION_DELAY (250000) /* .25 second */ + +void memorystatus_on_pageout_scan_end(void) { + /* No-op */ +} + +/* + * kn_max - knote + * + * knote_pressure_level - to check if the knote is registered for this notification level. + * + * task - task whose bits we'll be modifying + * + * pressure_level_to_clear - if the task has been notified of this past level, clear that notification bit so that if/when we revert to that level, the task will be notified again. + * + * pressure_level_to_set - the task is about to be notified of this new level. Update the task's bit notification information appropriately. + * + */ +boolean_t +is_knote_registered_modify_task_pressure_bits(struct knote*, int, task_t, vm_pressure_level_t, vm_pressure_level_t); + +boolean_t +is_knote_registered_modify_task_pressure_bits(struct knote *kn_max, int knote_pressure_level, task_t task, vm_pressure_level_t pressure_level_to_clear, vm_pressure_level_t pressure_level_to_set) +{ + if (kn_max->kn_sfflags & knote_pressure_level) { + + if (task_has_been_notified(task, pressure_level_to_clear) == TRUE) { + + task_clear_has_been_notified(task, pressure_level_to_clear); + } + + task_mark_has_been_notified(task, pressure_level_to_set); + return TRUE; + } + + return FALSE; +} + +extern kern_return_t vm_pressure_notify_dispatch_vm_clients(void); + +kern_return_t +memorystatus_update_vm_pressure(boolean_t target_best_process) +{ + struct knote *kn_max = NULL; + pid_t target_pid = -1; + struct klist dispatch_klist = { NULL }; + proc_t target_proc = PROC_NULL; + static vm_pressure_level_t level_snapshot = kVMPressureNormal; + struct task *task = NULL; + boolean_t found_candidate = FALSE; + + while (1) { + + /* + * There is a race window here. But it's not clear + * how much we benefit from having extra synchronization. + */ + level_snapshot = memorystatus_vm_pressure_level; + + memorystatus_klist_lock(); + kn_max = vm_pressure_select_optimal_candidate_to_notify(&memorystatus_klist, level_snapshot); + + if (kn_max == NULL) { + memorystatus_klist_unlock(); + + /* + * No more level-based clients to notify. + * Try the non-level based notification clients. + * + * However, these non-level clients don't understand + * the "return-to-normal" notification. + * + * So don't consider them for those notifications. Just + * return instead. + * + */ + + if (level_snapshot != kVMPressureNormal) { + goto try_dispatch_vm_clients; + } else { + return KERN_FAILURE; + } + } + + target_proc = kn_max->kn_kq->kq_p; + + proc_list_lock(); + if (target_proc != proc_ref_locked(target_proc)) { + target_proc = PROC_NULL; + proc_list_unlock(); + memorystatus_klist_unlock(); + continue; + } + proc_list_unlock(); + memorystatus_klist_unlock(); + + target_pid = target_proc->p_pid; + + task = (struct task *)(target_proc->task); + + if (level_snapshot != kVMPressureNormal) { + + if (level_snapshot == kVMPressureWarning || level_snapshot == kVMPressureUrgent) { + + if (is_knote_registered_modify_task_pressure_bits(kn_max, NOTE_MEMORYSTATUS_PRESSURE_WARN, task, kVMPressureCritical, kVMPressureWarning) == TRUE) { + found_candidate = TRUE; + } + } else { + if (level_snapshot == kVMPressureCritical) { + + if (is_knote_registered_modify_task_pressure_bits(kn_max, NOTE_MEMORYSTATUS_PRESSURE_CRITICAL, task, kVMPressureWarning, kVMPressureCritical) == TRUE) { + found_candidate = TRUE; + } + } + } + } else { + if (kn_max->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { + + task_clear_has_been_notified(task, kVMPressureWarning); + task_clear_has_been_notified(task, kVMPressureCritical); + + found_candidate = TRUE; } } + + if (found_candidate == FALSE) { + continue; + } + + memorystatus_klist_lock(); + KNOTE_DETACH(&memorystatus_klist, kn_max); + KNOTE_ATTACH(&dispatch_klist, kn_max); + memorystatus_klist_unlock(); + + KNOTE(&dispatch_klist, (level_snapshot != kVMPressureNormal) ? kMemorystatusPressure : kMemorystatusNoPressure); + + memorystatus_klist_lock(); + KNOTE_DETACH(&dispatch_klist, kn_max); + KNOTE_ATTACH(&memorystatus_klist, kn_max); + memorystatus_klist_unlock(); + + microuptime(&target_proc->vm_pressure_last_notify_tstamp); + proc_rele(target_proc); + + if (target_best_process == TRUE) { + break; + } + +try_dispatch_vm_clients: + if (level_snapshot != kVMPressureNormal) { + /* + * Wake up idle-exit thread. + * Targets one process per invocation. + * + * TODO: memorystatus_idle_exit_from_VM should return FALSE once it's + * done with all idle-exitable processes. Currently, we will exit this + * loop when we are done with notification clients (level and non-level based) + * but we may still have some idle-exitable processes around. + * + */ + memorystatus_idle_exit_from_VM(); + + if ((vm_pressure_notify_dispatch_vm_clients() == KERN_FAILURE) && (kn_max == NULL)) { + /* + * kn_max == NULL i.e. we didn't find any eligible clients for the level-based notifications + * AND + * we have failed to find any eligible clients for the non-level based notifications too. + * So, we are done. + */ + + return KERN_FAILURE; + } + } + + if (memorystatus_manual_testing_on == FALSE) { + delay(INTER_NOTIFICATION_DELAY); + } } - - lck_mtx_unlock(memorystatus_list_mlock); + + return KERN_SUCCESS; } -#endif /* VM_PRESSURE_EVENTS */ +vm_pressure_level_t +convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t); + +vm_pressure_level_t +convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t internal_pressure_level) +{ + vm_pressure_level_t dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_NORMAL; + + switch (internal_pressure_level) { + + case kVMPressureNormal: + { + dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_NORMAL; + break; + } + + case kVMPressureWarning: + case kVMPressureUrgent: + { + dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_WARN; + break; + } + + case kVMPressureCritical: + { + dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_CRITICAL; + break; + } + + default: + break; + } -/* Sysctls... */ + return dispatch_level; +} static int -sysctl_memorystatus_list_change SYSCTL_HANDLER_ARGS +sysctl_memorystatus_vm_pressure_level SYSCTL_HANDLER_ARGS { - int ret; - memorystatus_priority_entry_t entry; +#pragma unused(arg1, arg2, oidp) + + vm_pressure_level_t dispatch_level = convert_internal_pressure_level_to_dispatch_level(memorystatus_vm_pressure_level); + + return SYSCTL_OUT(req, &dispatch_level, sizeof(dispatch_level)); +} + +SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_level, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED, + 0, 0, &sysctl_memorystatus_vm_pressure_level, "I", ""); + -#pragma unused(oidp, arg1, arg2) +extern int memorystatus_purge_on_warning; +extern int memorystatus_purge_on_critical; + +static int +sysctl_memorypressure_manual_trigger SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) - if (!req->newptr || req->newlen > sizeof(entry)) { + int level = 0; + int error = 0; + int pressure_level = 0; + int trigger_request = 0; + int force_purge; + + error = sysctl_handle_int(oidp, &level, 0, req); + if (error || !req->newptr) { + return (error); + } + + memorystatus_manual_testing_on = TRUE; + + trigger_request = (level >> 16) & 0xFFFF; + pressure_level = (level & 0xFFFF); + + if (trigger_request < TEST_LOW_MEMORY_TRIGGER_ONE || + trigger_request > TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL) { + return EINVAL; + } + switch (pressure_level) { + case NOTE_MEMORYSTATUS_PRESSURE_NORMAL: + case NOTE_MEMORYSTATUS_PRESSURE_WARN: + case NOTE_MEMORYSTATUS_PRESSURE_CRITICAL: + break; + default: return EINVAL; } - ret = SYSCTL_IN(req, &entry, req->newlen); - if (ret) { - return ret; + /* + * The pressure level is being set from user-space. + * And user-space uses the constants in sys/event.h + * So we translate those events to our internal levels here. + */ + if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { + + memorystatus_manual_testing_level = kVMPressureNormal; + force_purge = 0; + + } else if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_WARN) { + + memorystatus_manual_testing_level = kVMPressureWarning; + force_purge = memorystatus_purge_on_warning; + + } else if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) { + + memorystatus_manual_testing_level = kVMPressureCritical; + force_purge = memorystatus_purge_on_critical; } - memorystatus_list_change(FALSE, entry.pid, entry.priority, entry.flags, -1); + memorystatus_vm_pressure_level = memorystatus_manual_testing_level; - return ret; + /* purge according to the new pressure level */ + switch (trigger_request) { + case TEST_PURGEABLE_TRIGGER_ONE: + case TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE: + if (force_purge == 0) { + /* no purging requested */ + break; + } + vm_purgeable_object_purge_one_unlocked(force_purge); + break; + case TEST_PURGEABLE_TRIGGER_ALL: + case TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL: + if (force_purge == 0) { + /* no purging requested */ + break; + } + while (vm_purgeable_object_purge_one_unlocked(force_purge)); + break; + } + + if ((trigger_request == TEST_LOW_MEMORY_TRIGGER_ONE) || + (trigger_request == TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE)) { + + memorystatus_update_vm_pressure(TRUE); + } + + if ((trigger_request == TEST_LOW_MEMORY_TRIGGER_ALL) || + (trigger_request == TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL)) { + + while (memorystatus_update_vm_pressure(FALSE) == KERN_SUCCESS) { + continue; + } + } + + if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { + memorystatus_manual_testing_on = FALSE; + + vm_pressure_klist_lock(); + vm_reset_active_list(); + vm_pressure_klist_unlock(); + } else { + + vm_pressure_klist_lock(); + vm_pressure_notification_without_levels(); + vm_pressure_klist_unlock(); + } + + return 0; } -SYSCTL_PROC(_kern, OID_AUTO, memorystatus_jetsam_change, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED, - 0, 0, &sysctl_memorystatus_list_change, "I", ""); - +SYSCTL_PROC(_kern, OID_AUTO, memorypressure_manual_trigger, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED, + 0, 0, &sysctl_memorypressure_manual_trigger, "I", ""); + + +extern int memorystatus_purge_on_warning; +extern int memorystatus_purge_on_urgent; +extern int memorystatus_purge_on_critical; + +SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_warning, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_purge_on_warning, 0, ""); +SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_urgent, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_purge_on_urgent, 0, ""); +SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_critical, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_purge_on_critical, 0, ""); + + +#endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */ + +/* Return both allocated and actual size, since there's a race between allocation and list compilation */ static int -sysctl_memorystatus_priority_list(__unused struct sysctl_oid *oid, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +memorystatus_get_priority_list(memorystatus_priority_entry_t **list_ptr, size_t *buffer_size, size_t *list_size, boolean_t size_only) { - int ret; - size_t allocated_size, list_size = 0; - memorystatus_priority_entry_t *list; uint32_t list_count, i = 0; - memorystatus_node *node; - - /* Races, but this is only for diagnostic purposes */ + memorystatus_priority_entry_t *list_entry; + proc_t p; + list_count = memorystatus_list_count; - allocated_size = sizeof(memorystatus_priority_entry_t) * list_count; - list = kalloc(allocated_size); - if (!list) { + *list_size = sizeof(memorystatus_priority_entry_t) * list_count; + + /* Just a size check? */ + if (size_only) { + return 0; + } + + /* Otherwise, validate the size of the buffer */ + if (*buffer_size < *list_size) { + return EINVAL; + } + + *list_ptr = (memorystatus_priority_entry_t*)kalloc(*list_size); + if (!list_ptr) { return ENOMEM; } - memset(list, 0, allocated_size); - - lck_mtx_lock(memorystatus_list_mlock); - - TAILQ_FOREACH(node, &memorystatus_list, link) { - list[i].pid = node->pid; - list[i].priority = node->priority; - list[i].flags = memorystatus_build_flags_from_state(node->state); - list[i].hiwat_pages = node->hiwat_pages; - list_size += sizeof(memorystatus_priority_entry_t); - if (++i >= list_count) { - break; - } + memset(*list_ptr, 0, *list_size); + + *buffer_size = *list_size; + *list_size = 0; + + list_entry = *list_ptr; + + proc_list_lock(); + + p = memorystatus_get_first_proc_locked(&i, TRUE); + while (p && (*list_size < *buffer_size)) { + list_entry->pid = p->p_pid; + list_entry->priority = p->p_memstat_effectivepriority; + list_entry->user_data = p->p_memstat_userdata; +#if LEGACY_HIWATER + if (((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) || + (p->p_memstat_memlimit <= 0)) { + task_get_phys_footprint_limit(p->task, &list_entry->limit); + } else { + list_entry->limit = p->p_memstat_memlimit; + } +#else + task_get_phys_footprint_limit(p->task, &list_entry->limit); +#endif + list_entry->state = memorystatus_build_state(p); + list_entry++; + + *list_size += sizeof(memorystatus_priority_entry_t); + + p = memorystatus_get_next_proc_locked(&i, p, TRUE); } - lck_mtx_unlock(memorystatus_list_mlock); + proc_list_unlock(); - if (!list_size) { - if (req->oldptr) { - MEMORYSTATUS_DEBUG(1, "kern.memorystatus_priority_list returning EINVAL\n"); - return EINVAL; - } - else { - MEMORYSTATUS_DEBUG(1, "kern.memorystatus_priority_list returning 0 for size\n"); - } - } else { - MEMORYSTATUS_DEBUG(1, "kern.memorystatus_priority_list returning %ld for size\n", (long)list_size); - } + MEMORYSTATUS_DEBUG(1, "memorystatus_get_priority_list: returning %lu for size\n", (unsigned long)*list_size); - ret = SYSCTL_OUT(req, list, list_size); + return 0; +} - kfree(list, allocated_size); +static int +memorystatus_cmd_get_priority_list(user_addr_t buffer, size_t buffer_size, int32_t *retval) { + int error = EINVAL; + boolean_t size_only; + memorystatus_priority_entry_t *list = NULL; + size_t list_size; - return ret; + size_only = ((buffer == USER_ADDR_NULL) ? TRUE: FALSE); + + error = memorystatus_get_priority_list(&list, &buffer_size, &list_size, size_only); + if (error) { + goto out; + } + + if (!size_only) { + error = copyout(list, buffer, list_size); + } + + if (error == 0) { + *retval = list_size; + } +out: + + if (list) { + kfree(list, buffer_size); + } + + return error; } -SYSCTL_PROC(_kern, OID_AUTO, memorystatus_priority_list, CTLTYPE_OPAQUE|CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, sysctl_memorystatus_priority_list, "S,jetsam_priorities", ""); +#if CONFIG_JETSAM + +static void +memorystatus_clear_errors(void) +{ + proc_t p; + unsigned int i = 0; + + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CLEAR_ERRORS) | DBG_FUNC_START, 0, 0, 0, 0, 0); + + proc_list_lock(); + + p = memorystatus_get_first_proc_locked(&i, TRUE); + while (p) { + if (p->p_memstat_state & P_MEMSTAT_ERROR) { + p->p_memstat_state &= ~P_MEMSTAT_ERROR; + } + p = memorystatus_get_next_proc_locked(&i, p, TRUE); + } + + proc_list_unlock(); + + KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CLEAR_ERRORS) | DBG_FUNC_END, 0, 0, 0, 0, 0); +} static void -memorystatus_update_levels_locked(void) { - /* Set the baseline levels in pages */ - memorystatus_available_pages_critical = (CRITICAL_PERCENT / DELTA_PERCENT) * memorystatus_delta; - memorystatus_available_pages_highwater = (HIGHWATER_PERCENT / DELTA_PERCENT) * memorystatus_delta; -#if VM_PRESSURE_EVENTS - memorystatus_available_pages_pressure = (PRESSURE_PERCENT / DELTA_PERCENT) * memorystatus_delta; +memorystatus_update_levels_locked(boolean_t critical_only) { + memorystatus_available_pages_critical = memorystatus_available_pages_critical_base; +#if !LATENCY_JETSAM + { + // If there's an entry in the first bucket, we have idle processes + memstat_bucket_t *first_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE]; + if (first_bucket->count) { + memorystatus_available_pages_critical += memorystatus_available_pages_critical_idle_offset; + } + } #endif - #if DEBUG || DEVELOPMENT if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) { memorystatus_available_pages_critical += memorystatus_jetsam_policy_offset_pages_diagnostic; - memorystatus_available_pages_highwater += memorystatus_jetsam_policy_offset_pages_diagnostic; + } +#endif + + if (critical_only) { + return; + } + #if VM_PRESSURE_EVENTS + memorystatus_available_pages_pressure = (pressure_threshold_percentage / delta_percentage) * memorystatus_delta; +#if DEBUG || DEVELOPMENT + if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) { memorystatus_available_pages_pressure += memorystatus_jetsam_policy_offset_pages_diagnostic; -#endif } #endif +#endif +} + +static int +memorystatus_get_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only) { + size_t input_size = *snapshot_size; - /* Only boost the critical level - it's more important to kill right away than issue warnings */ - if (memorystatus_jetsam_policy & kPolicyMoreFree) { - memorystatus_available_pages_critical += memorystatus_jetsam_policy_offset_pages_more_free; + if (memorystatus_jetsam_snapshot_count > 0) { + *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + (sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count)); + } else { + *snapshot_size = 0; + } + + if (size_only) { + return 0; } + + if (input_size < *snapshot_size) { + return EINVAL; + } + + *snapshot = memorystatus_jetsam_snapshot; + + MEMORYSTATUS_DEBUG(1, "memorystatus_snapshot: returning %ld for size\n", (long)*snapshot_size); + + return 0; } static int -sysctl_memorystatus_jetsam_policy_more_free SYSCTL_HANDLER_ARGS -{ -#pragma unused(arg1, arg2, oidp) - int error, more_free = 0; +memorystatus_cmd_get_jetsam_snapshot(user_addr_t buffer, size_t buffer_size, int32_t *retval) { + int error = EINVAL; + boolean_t size_only; + memorystatus_jetsam_snapshot_t *snapshot; + + size_only = ((buffer == USER_ADDR_NULL) ? TRUE : FALSE); + + error = memorystatus_get_snapshot(&snapshot, &buffer_size, size_only); + if (error) { + goto out; + } - error = priv_check_cred(kauth_cred_get(), PRIV_VM_JETSAM, 0); - if (error) - return (error); + /* Copy out and reset */ + if (!size_only) { + if ((error = copyout(snapshot, buffer, buffer_size)) == 0) { + snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; + } + } - error = sysctl_handle_int(oidp, &more_free, 0, req); - if (error || !req->newptr) - return (error); + if (error == 0) { + *retval = buffer_size; + } +out: + return error; +} - lck_mtx_lock(memorystatus_list_mlock); +static int +memorystatus_cmd_set_priority_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) { + const uint32_t MAX_ENTRY_COUNT = 2; /* Cap the entry count */ - if (more_free) { - memorystatus_jetsam_policy |= kPolicyMoreFree; - } else { - memorystatus_jetsam_policy &= ~kPolicyMoreFree; + int error; + uint32_t i; + uint32_t entry_count; + memorystatus_priority_properties_t *entries; + + /* Validate inputs */ + if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size == 0)) { + return EINVAL; + } + + /* Make sure the buffer is a multiple of the entry size, and that an excessive size isn't specified */ + entry_count = (buffer_size / sizeof(memorystatus_priority_properties_t)); + if (((buffer_size % sizeof(memorystatus_priority_properties_t)) != 0) || (entry_count > MAX_ENTRY_COUNT)) { + return EINVAL; } - - memorystatus_update_levels_locked(); - lck_mtx_unlock(memorystatus_list_mlock); + entries = (memorystatus_priority_properties_t *)kalloc(buffer_size); + + error = copyin(buffer, entries, buffer_size); - return 0; + for (i = 0; i < entry_count; i++) { + proc_t p; + + if (error) { + break; + } + + p = proc_find(pid); + if (!p) { + error = ESRCH; + break; + } + + if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { + error = EPERM; + proc_rele(p); + break; + } + + error = memorystatus_update(p, entries[i].priority, entries[i].user_data, FALSE, FALSE, 0, 0); + proc_rele(p); + } + + kfree(entries, buffer_size); + + return error; } -SYSCTL_PROC(_kern, OID_AUTO, memorystatus_jetsam_policy_more_free, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED|CTLFLAG_ANYBODY, - 0, 0, &sysctl_memorystatus_jetsam_policy_more_free, "I", ""); +static int +memorystatus_cmd_get_pressure_status(int32_t *retval) { + int error; + + /* Need privilege for check */ + error = priv_check_cred(kauth_cred_get(), PRIV_VM_PRESSURE, 0); + if (error) { + return (error); + } + + /* Inherently racy, so it's not worth taking a lock here */ + *retval = (kVMPressureNormal != memorystatus_vm_pressure_level) ? 1 : 0; + + return error; +} static int -sysctl_handle_memorystatus_snapshot(__unused struct sysctl_oid *oid, __unused void *arg1, __unused int arg2, struct sysctl_req *req) -{ - int ret; - size_t currentsize = 0; +memorystatus_cmd_set_jetsam_high_water_mark(pid_t pid, int32_t high_water_mark, __unused int32_t *retval) { + int error = 0; + + proc_t p = proc_find(pid); + if (!p) { + return ESRCH; + } + + if (high_water_mark <= 0) { + high_water_mark = -1; /* Disable */ + } + + proc_list_lock(); + + if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { + error = EPERM; + goto exit; + } + + p->p_memstat_memlimit = high_water_mark; + if (memorystatus_highwater_enabled) { + if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) { + memorystatus_update_priority_locked(p, p->p_memstat_effectivepriority); + } else { + error = (task_set_phys_footprint_limit_internal(p->task, high_water_mark, NULL, TRUE) == 0) ? 0 : EINVAL; + } + } + +exit: + proc_list_unlock(); + proc_rele(p); + + return error; +} + +#endif /* CONFIG_JETSAM */ - if (memorystatus_jetsam_snapshot_list_count > 0) { - currentsize = sizeof(memorystatus_jetsam_snapshot_t) + sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_list_count - 1); +int +memorystatus_control(struct proc *p __unused, struct memorystatus_control_args *args, int *ret) { + int error = EINVAL; + +#if !CONFIG_JETSAM + #pragma unused(ret) +#endif + + /* Root only for now */ + if (!kauth_cred_issuser(kauth_cred_get())) { + error = EPERM; + goto out; } - if (!currentsize) { - if (req->oldptr) { - MEMORYSTATUS_DEBUG(1, "kern.memorystatus_snapshot returning EINVAL\n"); - return EINVAL; + + /* Sanity check */ + if (args->buffersize > MEMORYSTATUS_BUFFERSIZE_MAX) { + error = EINVAL; + goto out; + } + + switch (args->command) { + case MEMORYSTATUS_CMD_GET_PRIORITY_LIST: + error = memorystatus_cmd_get_priority_list(args->buffer, args->buffersize, ret); + break; +#if CONFIG_JETSAM + case MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES: + error = memorystatus_cmd_set_priority_properties(args->pid, args->buffer, args->buffersize, ret); + break; + case MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT: + error = memorystatus_cmd_get_jetsam_snapshot(args->buffer, args->buffersize, ret); + break; + case MEMORYSTATUS_CMD_GET_PRESSURE_STATUS: + error = memorystatus_cmd_get_pressure_status(ret); + break; + case MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK: + /* TODO: deprecate. Keeping it in as there's no pid based way to set the ledger limit right now. */ + error = memorystatus_cmd_set_jetsam_high_water_mark(args->pid, (int32_t)args->flags, ret); + break; + /* Test commands */ +#if DEVELOPMENT || DEBUG + case MEMORYSTATUS_CMD_TEST_JETSAM: + error = memorystatus_kill_process_sync(args->pid, kMemorystatusKilled) ? 0 : EINVAL; + break; + case MEMORYSTATUS_CMD_SET_JETSAM_PANIC_BITS: + error = memorystatus_cmd_set_panic_bits(args->buffer, args->buffersize); + break; +#endif /* DEVELOPMENT || DEBUG */ +#endif /* CONFIG_JETSAM */ + default: + break; + } + +out: + return error; +} + + +static int +filt_memorystatusattach(struct knote *kn) +{ + kn->kn_flags |= EV_CLEAR; + return memorystatus_knote_register(kn); +} + +static void +filt_memorystatusdetach(struct knote *kn) +{ + memorystatus_knote_unregister(kn); +} + +static int +filt_memorystatus(struct knote *kn __unused, long hint) +{ + if (hint) { + switch (hint) { + case kMemorystatusNoPressure: + if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { + kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_NORMAL; + } + break; + case kMemorystatusPressure: + if (memorystatus_vm_pressure_level == kVMPressureWarning || memorystatus_vm_pressure_level == kVMPressureUrgent) { + if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_WARN) { + kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_WARN; + } + } else if (memorystatus_vm_pressure_level == kVMPressureCritical) { + + if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) { + kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_CRITICAL; + } + } + break; + default: + break; } - else { - MEMORYSTATUS_DEBUG(1, "kern.memorystatus_snapshot returning 0 for size\n"); + } + + return (kn->kn_fflags != 0); +} + +static void +memorystatus_klist_lock(void) { + lck_mtx_lock(&memorystatus_klist_mutex); +} + +static void +memorystatus_klist_unlock(void) { + lck_mtx_unlock(&memorystatus_klist_mutex); +} + +void +memorystatus_kevent_init(lck_grp_t *grp, lck_attr_t *attr) { + lck_mtx_init(&memorystatus_klist_mutex, grp, attr); + klist_init(&memorystatus_klist); +} + +int +memorystatus_knote_register(struct knote *kn) { + int error = 0; + + memorystatus_klist_lock(); + + if (kn->kn_sfflags & (NOTE_MEMORYSTATUS_PRESSURE_NORMAL | NOTE_MEMORYSTATUS_PRESSURE_WARN | NOTE_MEMORYSTATUS_PRESSURE_CRITICAL)) { + +#if CONFIG_JETSAM && VM_PRESSURE_EVENTS + /* Need a privilege to register */ + error = priv_check_cred(kauth_cred_get(), PRIV_VM_PRESSURE, 0); +#endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */ + + if (!error) { + KNOTE_ATTACH(&memorystatus_klist, kn); } - } else { - MEMORYSTATUS_DEBUG(1, "kern.memorystatus_snapshot returning %ld for size\n", (long)currentsize); - } - ret = SYSCTL_OUT(req, &memorystatus_jetsam_snapshot, currentsize); - if (!ret && req->oldptr) { - memorystatus_jetsam_snapshot.entry_count = memorystatus_jetsam_snapshot_list_count = 0; + } else { + error = ENOTSUP; } - return ret; + + memorystatus_klist_unlock(); + + return error; } -SYSCTL_PROC(_kern, OID_AUTO, memorystatus_snapshot, CTLTYPE_OPAQUE|CTLFLAG_RD, 0, 0, sysctl_handle_memorystatus_snapshot, "S,memorystatus_snapshot", ""); +void +memorystatus_knote_unregister(struct knote *kn __unused) { + memorystatus_klist_lock(); + KNOTE_DETACH(&memorystatus_klist, kn); + memorystatus_klist_unlock(); +} -#endif /* CONFIG_JETSAM */ +#if CONFIG_JETSAM && VM_PRESSURE_EVENTS +static boolean_t +memorystatus_issue_pressure_kevent(boolean_t pressured) { + memorystatus_klist_lock(); + KNOTE(&memorystatus_klist, pressured ? kMemorystatusPressure : kMemorystatusNoPressure); + memorystatus_klist_unlock(); + return TRUE; +} + +#endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */ diff --git a/bsd/kern/kern_mib.c b/bsd/kern/kern_mib.c index 497ab5a44..c03c98c97 100644 --- a/bsd/kern/kern_mib.c +++ b/bsd/kern/kern_mib.c @@ -97,6 +97,7 @@ #include #include #include +#include #include #include @@ -237,6 +238,13 @@ sysctl_hw_generic(__unused struct sysctl_oid *oidp, __unused void *arg1, } else { return(EINVAL); } + case HW_PAGESIZE: + { + vm_map_t map = get_task_map(current_task()); + val = vm_map_page_size(map); + qval = (long long)val; + break; + } case HW_CACHELINE: val = cpu_info.cache_line_size; qval = (long long)val; @@ -318,7 +326,8 @@ static int sysctl_pagesize (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) { - long long l = page_size; + vm_map_t map = get_task_map(current_task()); + long long l = vm_map_page_size(map); return sysctl_io_number(req, l, sizeof(l), NULL, NULL); } @@ -386,7 +395,7 @@ SYSCTL_INT(_hw_optional, OID_AUTO, floatingpoint, CTLFLAG_RD | CTLFLAG_KERN | CT * * The *_compat nodes are *NOT* visible within the kernel. */ -SYSCTL_COMPAT_INT (_hw, HW_PAGESIZE, pagesize_compat, CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, &page_size, 0, ""); +SYSCTL_PROC(_hw, HW_PAGESIZE, pagesize_compat, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, 0, HW_PAGESIZE, sysctl_hw_generic, "I", ""); SYSCTL_COMPAT_INT (_hw, HW_BUS_FREQ, busfrequency_compat, CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, &gPEClockFrequencyInfo.bus_clock_rate_hz, 0, ""); SYSCTL_COMPAT_INT (_hw, HW_CPU_FREQ, cpufrequency_compat, CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, &gPEClockFrequencyInfo.cpu_clock_rate_hz, 0, ""); SYSCTL_PROC(_hw, HW_CACHELINE, cachelinesize_compat, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, 0, HW_CACHELINE, sysctl_hw_generic, "I", ""); @@ -442,33 +451,6 @@ SYSCTL_PROC(_hw_optional, OID_AUTO, hle, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOC #error Unsupported arch #endif /* !__i386__ && !__x86_64 && !__arm__ */ -/* - * Debugging interface to the CPU power management code. - * - * Note: Does not need locks because it disables interrupts over - * the call. - */ -static int -pmsSysctl(__unused struct sysctl_oid *oidp, __unused void *arg1, - __unused int arg2, struct sysctl_req *req) -{ - pmsctl_t ctl; - int error; - boolean_t intr; - - if ((error = SYSCTL_IN(req, &ctl, sizeof(ctl)))) - return(error); - - intr = ml_set_interrupts_enabled(FALSE); /* No interruptions in here */ - error = pmsControl(ctl.request, (user_addr_t)(uintptr_t)ctl.reqaddr, ctl.reqsize); - (void)ml_set_interrupts_enabled(intr); /* Restore interruptions */ - - return(error); -} - -SYSCTL_PROC(_hw, OID_AUTO, pms, CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_LOCKED, 0, 0, pmsSysctl, "S", "Processor Power Management"); - - /****************************************************************************** * Generic MIB initialisation. @@ -483,7 +465,7 @@ sysctl_mib_init(void) cpusubtype = cpu_subtype(); cputhreadtype = cpu_threadtype(); #if defined(__i386__) || defined (__x86_64__) - cpu64bit = (_get_cpu_capabilities() & k64Bit) == k64Bit; + cpu64bit = (_get_cpu_capabilities() & k64Bit) == k64Bit; #else #error Unsupported arch #endif diff --git a/bsd/kern/kern_mman.c b/bsd/kern/kern_mman.c index 13a64cb93..575110d69 100644 --- a/bsd/kern/kern_mman.c +++ b/bsd/kern/kern_mman.c @@ -164,6 +164,7 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) int fd = uap->fd; int num_retries = 0; + user_map = current_map(); user_addr = (vm_map_offset_t)uap->addr; user_size = (vm_map_size_t) uap->len; @@ -202,13 +203,14 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) * Align the file position to a page boundary, * and save its page offset component. */ - pageoff = (file_pos & PAGE_MASK); + pageoff = (file_pos & vm_map_page_mask(user_map)); file_pos -= (vm_object_offset_t)pageoff; /* Adjust size for rounding (on both ends). */ - user_size += pageoff; /* low end... */ - user_size = mach_vm_round_page(user_size); /* hi end */ + user_size += pageoff; /* low end... */ + user_size = vm_map_round_page(user_size, + vm_map_page_mask(user_map)); /* hi end */ if ((flags & MAP_JIT) && ((flags & MAP_FIXED) || (flags & MAP_SHARED) || !(flags & MAP_ANON))){ return EINVAL; @@ -224,7 +226,7 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) * should be aligned after adjustment by pageoff. */ user_addr -= pageoff; - if (user_addr & PAGE_MASK) + if (user_addr & vm_map_page_mask(user_map)) return (EINVAL); } #ifdef notyet @@ -237,8 +239,10 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) * There should really be a pmap call to determine a reasonable * location. */ - else if (addr < mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ)) - addr = mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ); + else if (addr < vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ, + vm_map_page_mask(user_map))) + addr = vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ, + vm_map_page_mask(user_map)); #endif @@ -292,7 +296,8 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) if (err) return(err); fpref = 1; - if(fp->f_fglob->fg_type == DTYPE_PSXSHM) { + switch (FILEGLOB_DTYPE(fp->f_fglob)) { + case DTYPE_PSXSHM: uap->addr = (user_addr_t)user_addr; uap->len = (user_size_t)user_size; uap->prot = prot; @@ -300,9 +305,9 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) uap->pos = file_pos; error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff); goto bad; - } - - if (fp->f_fglob->fg_type != DTYPE_VNODE) { + case DTYPE_VNODE: + break; + default: error = EINVAL; goto bad; } @@ -431,22 +436,23 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) * We bend a little - round the start and end addresses * to the nearest page boundary. */ - user_size = mach_vm_round_page(user_size); + user_size = vm_map_round_page(user_size, + vm_map_page_mask(user_map)); - if (file_pos & PAGE_MASK_64) { + if (file_pos & vm_map_page_mask(user_map)) { if (!mapanon) (void)vnode_put(vp); error = EINVAL; goto bad; } - user_map = current_map(); - if ((flags & MAP_FIXED) == 0) { alloc_flags |= VM_FLAGS_ANYWHERE; - user_addr = mach_vm_round_page(user_addr); + user_addr = vm_map_round_page(user_addr, + vm_map_page_mask(user_map)); } else { - if (user_addr != mach_vm_trunc_page(user_addr)) { + if (user_addr != vm_map_trunc_page(user_addr, + vm_map_page_mask(user_map))) { if (!mapanon) (void)vnode_put(vp); error = EINVAL; @@ -509,7 +515,7 @@ map_anon_retry: * lack of space between the address and the map's maximum. */ if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) { - user_addr = PAGE_SIZE; + user_addr = vm_map_page_size(user_map); goto map_anon_retry; } } else { @@ -578,7 +584,7 @@ map_file_retry: * lack of space between the address and the map's maximum. */ if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) { - user_addr = PAGE_SIZE; + user_addr = vm_map_page_size(user_map); goto map_file_retry; } } @@ -638,10 +644,11 @@ msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int3 int rv; vm_sync_t sync_flags=0; + user_map = current_map(); addr = (mach_vm_offset_t) uap->addr; size = (mach_vm_size_t)uap->len; KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0); - if (addr & PAGE_MASK_64) { + if (addr & vm_map_page_mask(user_map)) { /* UNIX SPEC: user address is not page-aligned, return EINVAL */ return EINVAL; } @@ -677,7 +684,6 @@ msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int3 sync_flags |= VM_SYNC_CONTIGUOUS; /* complain if holes */ - user_map = current_map(); rv = mach_vm_msync(user_map, addr, size, sync_flags); switch (rv) { @@ -698,16 +704,18 @@ int munmap(__unused proc_t p, struct munmap_args *uap, __unused int32_t *retval) { mach_vm_offset_t user_addr; - mach_vm_size_t user_size; - kern_return_t result; + mach_vm_size_t user_size; + kern_return_t result; + vm_map_t user_map; + user_map = current_map(); user_addr = (mach_vm_offset_t) uap->addr; user_size = (mach_vm_size_t) uap->len; AUDIT_ARG(addr, user_addr); AUDIT_ARG(len, user_size); - if (user_addr & PAGE_MASK_64) { + if (user_addr & vm_map_page_mask(user_map)) { /* UNIX SPEC: user address is not page-aligned, return EINVAL */ return EINVAL; } @@ -720,7 +728,7 @@ munmap(__unused proc_t p, struct munmap_args *uap, __unused int32_t *retval) return EINVAL; } - result = mach_vm_deallocate(current_map(), user_addr, user_size); + result = mach_vm_deallocate(user_map, user_addr, user_size); if (result != KERN_SUCCESS) { return(EINVAL); } @@ -743,11 +751,12 @@ mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval) AUDIT_ARG(len, uap->len); AUDIT_ARG(value32, uap->prot); + user_map = current_map(); user_addr = (mach_vm_offset_t) uap->addr; user_size = (mach_vm_size_t) uap->len; prot = (vm_prot_t)(uap->prot & (VM_PROT_ALL | VM_PROT_TRUSTED)); - if (user_addr & PAGE_MASK_64) { + if (user_addr & vm_map_page_mask(user_map)) { /* UNIX SPEC: user address is not page-aligned, return EINVAL */ return EINVAL; } @@ -765,8 +774,6 @@ mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval) prot |= VM_PROT_READ; #endif /* 3936456 */ - user_map = current_map(); - #if CONFIG_MACF /* * The MAC check for mprotect is of limited use for 2 reasons: @@ -793,9 +800,12 @@ mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval) * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be * compromised because the check would always succeed and thusly any * process could sign dynamically. */ - result = vm_map_sign(user_map, - vm_map_trunc_page(user_addr), - vm_map_round_page(user_addr+user_size)); + result = vm_map_sign( + user_map, + vm_map_trunc_page(user_addr, + vm_map_page_mask(user_map)), + vm_map_round_page(user_addr+user_size, + vm_map_page_mask(user_map))); switch (result) { case KERN_SUCCESS: break; @@ -942,8 +952,10 @@ mincore(__unused proc_t p, struct mincore_args *uap, __unused int32_t *retval) * Make sure that the addresses presented are valid for user * mode. */ - first_addr = addr = mach_vm_trunc_page(uap->addr); - end = addr + mach_vm_round_page(uap->len); + first_addr = addr = vm_map_trunc_page(uap->addr, + vm_map_page_mask(map)); + end = addr + vm_map_round_page(uap->len, + vm_map_page_mask(map)); if (end < addr) return (EINVAL); @@ -1042,10 +1054,10 @@ mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval) if (size == 0) return (0); - pageoff = (addr & PAGE_MASK); - addr -= pageoff; - size = vm_map_round_page(size+pageoff); user_map = current_map(); + pageoff = (addr & vm_map_page_mask(user_map)); + addr -= pageoff; + size = vm_map_round_page(size+pageoff, vm_map_page_mask(user_map)); /* have to call vm_map_wire directly to pass "I don't know" protections */ result = vm_map_wire(user_map, addr, addr+size, VM_PROT_NONE, TRUE); @@ -1091,7 +1103,6 @@ munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int return(ENOSYS); } -#if !defined(CONFIG_EMBEDDED) /* USV: No! need to obsolete map_fd()! mmap() already supports 64 bits */ kern_return_t map_fd(struct map_fd_args *args) @@ -1133,6 +1144,8 @@ map_fd_funneled( proc_t p = current_proc(); struct vnode_attr vattr; + my_map = current_map(); + /* * Find the inode; verify that it's a regular file. */ @@ -1141,7 +1154,7 @@ map_fd_funneled( if (err) return(err); - if (fp->f_fglob->fg_type != DTYPE_VNODE){ + if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) { err = KERN_INVALID_ARGUMENT; goto bad; } @@ -1194,13 +1207,13 @@ map_fd_funneled( vnode_setattr(vp, &vattr, vfs_context_current()); } - if (offset & PAGE_MASK_64) { + if (offset & vm_map_page_mask(my_map)) { printf("map_fd: file offset not page aligned(%d : %s)\n",p->p_pid, p->p_comm); (void)vnode_put(vp); err = KERN_INVALID_ARGUMENT; goto bad; } - map_size = round_page(size); + map_size = vm_map_round_page(size, vm_map_page_mask(my_map)); /* * Allow user to map in a zero length file. @@ -1220,9 +1233,6 @@ map_fd_funneled( goto bad; } - - my_map = current_map(); - result = vm_map_64( my_map, &map_addr, map_size, (vm_offset_t)0, @@ -1242,7 +1252,7 @@ map_fd_funneled( vm_map_copy_t tmp; if (copyin(CAST_USER_ADDR_T(va), &dst_addr, sizeof (dst_addr)) || - trunc_page(dst_addr) != dst_addr) { + trunc_page(dst_addr) != dst_addr) { (void) vm_map_remove( my_map, map_addr, map_addr + map_size, @@ -1256,9 +1266,13 @@ map_fd_funneled( (vm_map_size_t)map_size, TRUE, &tmp); if (result != KERN_SUCCESS) { - (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr), - vm_map_round_page(map_addr + map_size), - VM_MAP_NO_FLAGS); + (void) vm_map_remove( + my_map, + vm_map_trunc_page(map_addr, + vm_map_page_mask(my_map)), + vm_map_round_page(map_addr + map_size, + vm_map_page_mask(my_map)), + VM_MAP_NO_FLAGS); (void)vnode_put(vp); err = result; goto bad; @@ -1276,9 +1290,13 @@ map_fd_funneled( // K64todo bug compatible now, should fix for 64bit user uint32_t user_map_addr = CAST_DOWN_EXPLICIT(uint32_t, map_addr); if (copyout(&user_map_addr, CAST_USER_ADDR_T(va), sizeof (user_map_addr))) { - (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr), - vm_map_round_page(map_addr + map_size), - VM_MAP_NO_FLAGS); + (void) vm_map_remove( + my_map, + vm_map_trunc_page(map_addr, + vm_map_page_mask(my_map)), + vm_map_round_page(map_addr + map_size, + vm_map_page_mask(my_map)), + VM_MAP_NO_FLAGS); (void)vnode_put(vp); err = KERN_INVALID_ADDRESS; goto bad; @@ -1292,5 +1310,4 @@ bad: fp_drop(p, fd, fp, 0); return (err); } -#endif /* !defined(CONFIG_EMBEDDED) */ diff --git a/bsd/kern/kern_overrides.c b/bsd/kern/kern_overrides.c new file mode 100644 index 000000000..e2055aae6 --- /dev/null +++ b/bsd/kern/kern_overrides.c @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2013 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* + * System Overrides syscall implementation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* Mutex for global system override state */ +static lck_mtx_t sys_override_lock; +static lck_grp_t *sys_override_mtx_grp; +static lck_attr_t *sys_override_mtx_attr; +static lck_grp_attr_t *sys_override_mtx_grp_attr; + +/* Assertion counts for system properties */ +static int64_t io_throttle_assert_cnt; +static int64_t cpu_throttle_assert_cnt; + +/* Wait Channel for system override */ +static uint64_t sys_override_wait; + +/* Forward Declarations */ +static void enable_system_override(uint64_t flags); +static void disable_system_override(uint64_t flags); +static __attribute__((noinline)) void PROCESS_OVERRIDING_SYSTEM_DEFAULTS(uint64_t timeout); + +/***************************** system_override ********************/ +/* + * int system_override(uint64_t timeout, uint64_t flags); + */ + +void +init_system_override() +{ + sys_override_mtx_grp_attr = lck_grp_attr_alloc_init(); + sys_override_mtx_grp = lck_grp_alloc_init("system_override", sys_override_mtx_grp_attr); + sys_override_mtx_attr = lck_attr_alloc_init(); + lck_mtx_init(&sys_override_lock, sys_override_mtx_grp, sys_override_mtx_attr); + io_throttle_assert_cnt = cpu_throttle_assert_cnt = 0; +} + +/* system call implementation */ +int +system_override(__unused struct proc *p, struct system_override_args * uap, __unused int32_t *retval) +{ + uint64_t timeout = uap->timeout; + uint64_t flags = uap->flags; + int error = 0; + + /* Check credentials for caller. Only entitled processes are allowed to make this call. */ + if ((error = priv_check_cred(kauth_cred_get(), PRIV_SYSTEM_OVERRIDE, 0))) { + goto out; + } + + /* Check to see if some flags are specified. Zero flags are invalid. */ + if ((flags == 0) || ((flags & ~SYS_OVERRIDE_FLAGS_MASK) != 0)) { + error = EINVAL; + goto out; + } + + lck_mtx_lock(&sys_override_lock); + + enable_system_override(flags); + + PROCESS_OVERRIDING_SYSTEM_DEFAULTS(timeout); + + disable_system_override(flags); + + lck_mtx_unlock(&sys_override_lock); + +out: + return error; +} + +/* + * Call for enabling global system override. + * This should be called only with the sys_override_lock held. + */ +static void +enable_system_override(uint64_t flags) +{ + + if (flags & SYS_OVERRIDE_IO_THROTTLE) { + if (io_throttle_assert_cnt == 0) { + /* Disable I/O Throttling */ + printf("Process %s [%d] disabling system-wide I/O Throttling\n", current_proc()->p_comm, current_proc()->p_pid); + KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_THROTTLE, IO_THROTTLE_DISABLE) | DBG_FUNC_START, current_proc()->p_pid, 0, 0, 0, 0); + sys_override_io_throttle(THROTTLE_IO_DISABLE); + } + io_throttle_assert_cnt++; + } + + if (flags & SYS_OVERRIDE_CPU_THROTTLE) { + if (cpu_throttle_assert_cnt == 0) { + /* Disable CPU Throttling */ + printf("Process %s [%d] disabling system-wide CPU Throttling\n", current_proc()->p_comm, current_proc()->p_pid); + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_CPU_THROTTLE_DISABLE) | DBG_FUNC_START, current_proc()->p_pid, 0, 0, 0, 0); + sys_override_cpu_throttle(CPU_THROTTLE_DISABLE); + } + cpu_throttle_assert_cnt++; + } + +} + +/* + * Call for disabling global system override. + * This should be called only with the sys_override_lock held. + */ +static void +disable_system_override(uint64_t flags) +{ + + if (flags & SYS_OVERRIDE_IO_THROTTLE) { + assert(io_throttle_assert_cnt > 0); + io_throttle_assert_cnt--; + if (io_throttle_assert_cnt == 0) { + /* Enable I/O Throttling */ + KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_THROTTLE, IO_THROTTLE_DISABLE) | DBG_FUNC_END, current_proc()->p_pid, 0, 0, 0, 0); + sys_override_io_throttle(THROTTLE_IO_ENABLE); + } + } + + if (flags & SYS_OVERRIDE_CPU_THROTTLE) { + assert(cpu_throttle_assert_cnt > 0); + cpu_throttle_assert_cnt--; + if (cpu_throttle_assert_cnt == 0) { + /* Enable CPU Throttling */ + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_CPU_THROTTLE_DISABLE) | DBG_FUNC_END, current_proc()->p_pid, 0, 0, 0, 0); + sys_override_cpu_throttle(CPU_THROTTLE_ENABLE); + } + } +} + +static __attribute__((noinline)) void +PROCESS_OVERRIDING_SYSTEM_DEFAULTS(uint64_t timeout) +{ + struct timespec ts; + ts.tv_sec = timeout / NSEC_PER_SEC; + ts.tv_nsec = timeout - ((long)ts.tv_sec * NSEC_PER_SEC); + msleep((caddr_t)&sys_override_wait, &sys_override_lock, PRIBIO | PCATCH, "system_override", &ts); +} + diff --git a/bsd/kern/kern_pcsamples.c b/bsd/kern/kern_pcsamples.c index 36757c2af..887029225 100644 --- a/bsd/kern/kern_pcsamples.c +++ b/bsd/kern/kern_pcsamples.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -72,7 +72,6 @@ int pcsamples_reinit(void); int enable_branch_tracing(void) { -#ifndef i386 struct proc *p; if (-1 != pc_sample_pid) { p = proc_find(pc_sample_pid); @@ -87,9 +86,6 @@ enable_branch_tracing(void) return 1; -#else - return 0; -#endif } int diff --git a/bsd/kern/kern_physio.c b/bsd/kern/kern_physio.c index 2c4998ec3..6d9580dcf 100644 --- a/bsd/kern/kern_physio.c +++ b/bsd/kern/kern_physio.c @@ -90,11 +90,7 @@ physio( void (*f_strategy)(buf_t), struct proc *p = current_proc(); int error, i, buf_allocated, todo, iosize; int orig_bflags = 0; -#if LP64KERN int64_t done; -#else - int done; -#endif error = 0; flags &= B_READ | B_WRITE; diff --git a/bsd/kern/kern_priv.c b/bsd/kern/kern_priv.c index e7ceb6075..ee17dd2a8 100644 --- a/bsd/kern/kern_priv.c +++ b/bsd/kern/kern_priv.c @@ -68,6 +68,8 @@ #include #endif +int proc_check_footprint_priv(void); + /* * Check a credential for privilege. Lots of good reasons to deny privilege; * only a few to grant it. @@ -75,6 +77,9 @@ int priv_check_cred(kauth_cred_t cred, int priv, __unused int flags) { +#if !CONFIG_MACF +#pragma unused(priv) +#endif int error; /* @@ -117,3 +122,9 @@ priv_check_cred(kauth_cred_t cred, int priv, __unused int flags) out: return (error); } + +int +proc_check_footprint_priv(void) +{ + return (priv_check_cred(kauth_cred_get(), PRIV_VM_FOOTPRINT_LIMIT, 0)); +} diff --git a/bsd/kern/kern_proc.c b/bsd/kern/kern_proc.c index 6d696b424..5125e49b6 100644 --- a/bsd/kern/kern_proc.c +++ b/bsd/kern/kern_proc.c @@ -105,6 +105,10 @@ #include #include +#if CONFIG_MEMORYSTATUS +#include +#endif + #if CONFIG_MACF #include #endif @@ -158,13 +162,13 @@ lck_attr_t * lctx_lck_attr; static void lctxinit(void); #endif -int cs_debug; /* declared further down in this file */ +extern int cs_debug; #if DEBUG #define __PROC_INTERNAL_DEBUG 1 #endif /* Name to give to core files */ -__private_extern__ char corefilename[MAXPATHLEN+1] = {"/cores/core.%P"}; +__XNU_PRIVATE_EXTERN char corefilename[MAXPATHLEN+1] = {"/cores/core.%P"}; static void orphanpg(struct pgrp *pg); void proc_name_kdp(task_t t, char * buf, int size); @@ -176,6 +180,7 @@ static void pgrp_replace(proc_t p, struct pgrp *pgrp); static void pgdelete_dropref(struct pgrp *pgrp); extern void pg_rele_dropref(struct pgrp * pgrp); static int csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user_addr_t uaddittoken); +static boolean_t proc_parent_is_currentproc(proc_t p); struct fixjob_iterargs { struct pgrp * pg; @@ -432,29 +437,32 @@ proc_rele_locked(proc_t p) proc_t proc_find_zombref(int pid) { - proc_t p1 = PROC_NULL; - proc_t p = PROC_NULL; + proc_t p; proc_list_lock(); + again: p = pfind_locked(pid); - /* if process still in creation return NULL */ - if ((p == PROC_NULL) || ((p->p_listflag & P_LIST_INCREATE) != 0)) { + /* should we bail? */ + if ((p == PROC_NULL) /* not found */ + || ((p->p_listflag & P_LIST_INCREATE) != 0) /* not created yet */ + || ((p->p_listflag & P_LIST_EXITED) == 0)) { /* not started exit */ + proc_list_unlock(); - return (p1); + return (PROC_NULL); } - /* if process has not started exit or is being reaped, return NULL */ - if (((p->p_listflag & P_LIST_EXITED) != 0) && ((p->p_listflag & P_LIST_WAITING) == 0)) { - p->p_listflag |= P_LIST_WAITING; - p1 = p; - } else - p1 = PROC_NULL; + /* If someone else is controlling the (unreaped) zombie - wait */ + if ((p->p_listflag & P_LIST_WAITING) != 0) { + (void)msleep(&p->p_stat, proc_list_mlock, PWAIT, "waitcoll", 0); + goto again; + } + p->p_listflag |= P_LIST_WAITING; proc_list_unlock(); - return(p1); + return(p); } void @@ -593,29 +601,65 @@ proc_checkdeadrefs(__unused proc_t p) int proc_pid(proc_t p) { - return(p->p_pid); + return (p->p_pid); } int proc_ppid(proc_t p) { - return(p->p_ppid); + return (p->p_ppid); } -int +int proc_selfpid(void) { - proc_t p = current_proc(); - return(p->p_pid); + return (current_proc()->p_pid); } -int +int proc_selfppid(void) { - proc_t p = current_proc(); - return(p->p_ppid); + return (current_proc()->p_ppid); +} + +#if CONFIG_DTRACE +static proc_t +dtrace_current_proc_vforking(void) +{ + thread_t th = current_thread(); + struct uthread *ut = get_bsdthread_info(th); + + if (ut && + ((ut->uu_flag & (UT_VFORK|UT_VFORKING)) == (UT_VFORK|UT_VFORKING))) { + /* + * Handle the narrow window where we're in the vfork syscall, + * but we're not quite ready to claim (in particular, to DTrace) + * that we're running as the child. + */ + return (get_bsdtask_info(get_threadtask(th))); + } + return (current_proc()); +} + +int +dtrace_proc_selfpid(void) +{ + return (dtrace_current_proc_vforking()->p_pid); } +int +dtrace_proc_selfppid(void) +{ + return (dtrace_current_proc_vforking()->p_ppid); +} + +uid_t +dtrace_proc_selfruid(void) +{ + return (dtrace_current_proc_vforking()->p_ruid); +} +#endif /* CONFIG_DTRACE */ + proc_t proc_parent(proc_t p) { @@ -635,6 +679,18 @@ loop: return(parent); } +static boolean_t +proc_parent_is_currentproc(proc_t p) +{ + boolean_t ret = FALSE; + + proc_list_lock(); + if (p->p_pptr == current_proc()) + ret = TRUE; + + proc_list_unlock(); + return ret; +} void proc_name(int pid, char * buf, int size) @@ -811,10 +867,21 @@ proc_uniqueid(proc_t p) } uint64_t -proc_selfuniqueid(void) +proc_puniqueid(proc_t p) { - proc_t p = current_proc(); - return(p->p_uniqueid); + return(p->p_puniqueid); +} + +uint64_t +proc_was_throttled(proc_t p) +{ + return (p->was_throttled); +} + +uint64_t +proc_did_throttle(proc_t p) +{ + return (p->did_throttle); } int @@ -991,12 +1058,18 @@ pinsertchild(proc_t parent, proc_t child) TAILQ_INIT(&child->p_evlist); child->p_pptr = parent; child->p_ppid = parent->p_pid; + child->p_puniqueid = parent->p_uniqueid; pg = proc_pgrp(parent); pgrp_add(pg, parent, child); pg_rele(pg); proc_list_lock(); + +#if CONFIG_MEMORYSTATUS + memorystatus_add(child, TRUE); +#endif + parent->p_childrencnt++; LIST_INSERT_HEAD(&parent->p_children, child, p_sibling); @@ -1005,7 +1078,6 @@ pinsertchild(proc_t parent, proc_t child) child->p_listflag &= ~P_LIST_INCREATE; proc_list_unlock(); - } /* @@ -1294,12 +1366,24 @@ fixjobc(proc_t p, struct pgrp *pgrp, int entering) struct session *mysession = pgrp->pg_session; proc_t parent; struct fixjob_iterargs fjarg; + boolean_t proc_parent_self; + + /* + * Check if p's parent is current proc, if yes then no need to take + * a ref; calling proc_parent with current proc as parent may + * deadlock if current proc is exiting. + */ + proc_parent_self = proc_parent_is_currentproc(p); + if (proc_parent_self) + parent = current_proc(); + else + parent = proc_parent(p); - parent = proc_parent(p); if (parent != PROC_NULL) { hispgrp = proc_pgrp(parent); hissess = proc_session(parent); - proc_rele(parent); + if (!proc_parent_self) + proc_rele(parent); } @@ -1424,6 +1508,12 @@ current_proc_EXTERNAL(void) return (current_proc()); } +int +proc_is_forcing_hfs_case_sensitivity(proc_t p) +{ + return (p->p_vfs_iopolicy & P_VFS_IOPOLICY_FORCE_HFS_CASE_SENSITIVITY) ? 1 : 0; +} + /* * proc_core_name(name, uid, pid) * Expand the name described in corefilename, using name, uid, and pid. @@ -1720,29 +1810,45 @@ csops_audittoken(__unused proc_t p, struct csops_audittoken_args *uap, __unused { if (uap->uaudittoken == USER_ADDR_NULL) return(EINVAL); - switch (uap->ops) { - case CS_OPS_PIDPATH: - case CS_OPS_ENTITLEMENTS_BLOB: - break; - default: - return(EINVAL); - }; - return(csops_internal(uap->pid, uap->ops, uap->useraddr, uap->usersize, uap->uaudittoken)); } +static int +csops_copy_token(void *start, size_t length, user_size_t usize, user_addr_t uaddr) +{ + char fakeheader[8] = { 0 }; + int error; + + if (usize < sizeof(fakeheader)) + return ERANGE; + + /* if no blob, fill in zero header */ + if (NULL == start) { + start = fakeheader; + length = sizeof(fakeheader); + } else if (usize < length) { + /* ... if input too short, copy out length of entitlement */ + uint32_t length32 = htonl((uint32_t)length); + memcpy(&fakeheader[4], &length32, sizeof(length32)); + + error = copyout(fakeheader, uaddr, sizeof(fakeheader)); + if (error == 0) + return ERANGE; /* input buffer to short, ERANGE signals that */ + return error; + } + return copyout(start, uaddr, length); +} + static int csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user_addr_t uaudittoken) { size_t usize = (size_t)CAST_DOWN(size_t, usersize); proc_t pt; - uint32_t retflags; - int vid, forself; + int forself; int error; vnode_t tvp; off_t toff; - char * buf; unsigned char cdhash[SHA1_RESULTLEN]; audit_token_t token; unsigned int upid=0, uidversion = 0; @@ -1755,25 +1861,17 @@ csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user forself = 1; - /* Pre flight checks for CS_OPS_PIDPATH */ - if (ops == CS_OPS_PIDPATH) { - /* usize is unsigned.. */ - if (usize > 4 * PATH_MAX) - return(EOVERFLOW); - if (kauth_cred_issuser(kauth_cred_get()) != TRUE) - return(EPERM); - } else { - switch (ops) { + switch (ops) { case CS_OPS_STATUS: case CS_OPS_CDHASH: case CS_OPS_PIDOFFSET: case CS_OPS_ENTITLEMENTS_BLOB: + case CS_OPS_BLOB: break; /* unrestricted */ default: if (forself == 0 && kauth_cred_issuser(kauth_cred_get()) != TRUE) return(EPERM); break; - } } pt = proc_find(pid); @@ -1796,12 +1894,19 @@ csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user switch (ops) { - case CS_OPS_STATUS: + case CS_OPS_STATUS: { + uint32_t retflags; + + proc_lock(pt); retflags = pt->p_csflags; + if (cs_enforcement(pt)) + retflags |= CS_ENFORCEMENT; + proc_unlock(pt); + if (uaddr != USER_ADDR_NULL) error = copyout(&retflags, uaddr, sizeof(uint32_t)); break; - + } case CS_OPS_MARKINVALID: proc_lock(pt); if ((pt->p_csflags & CS_VALID) == CS_VALID) { /* is currently valid */ @@ -1844,38 +1949,6 @@ csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user proc_unlock(pt); break; - case CS_OPS_PIDPATH: - tvp = pt->p_textvp; - vid = vnode_vid(tvp); - - if (tvp == NULLVP) { - proc_rele(pt); - return(EINVAL); - } - - buf = (char *)kalloc(usize); - if (buf == NULL) { - proc_rele(pt); - return(ENOMEM); - } - bzero(buf, usize); - - error = vnode_getwithvid(tvp, vid); - if (error == 0) { - int len; - len = usize; - error = vn_getpath(tvp, buf, &len); - vnode_put(tvp); - if (error == 0) { - error = copyout(buf, uaddr, usize); - } - kfree(buf, usize); - } - - proc_rele(pt); - - return(error); - case CS_OPS_PIDOFFSET: toff = pt->p_textoff; proc_rele(pt); @@ -1903,43 +1976,131 @@ csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user return error; case CS_OPS_ENTITLEMENTS_BLOB: { - char fakeheader[8] = { 0 }; void *start; size_t length; + proc_lock(pt); + if ((pt->p_csflags & CS_VALID) == 0) { + proc_unlock(pt); error = EINVAL; break; } - if (usize < sizeof(fakeheader)) { + + error = cs_entitlements_blob_get(pt, &start, &length); + proc_unlock(pt); + if (error) + break; + + error = csops_copy_token(start, length, usize, uaddr); + break; + } + case CS_OPS_MARKRESTRICT: + proc_lock(pt); + pt->p_csflags |= CS_RESTRICT; + proc_unlock(pt); + break; + + case CS_OPS_SET_STATUS: { + uint32_t flags; + + if (usize < sizeof(flags)) { error = ERANGE; break; } - if (0 != (error = cs_entitlements_blob_get(pt, - &start, &length))) + + error = copyin(uaddr, &flags, sizeof(flags)); + if (error) break; - /* if no entitlement, fill in zero header */ - if (NULL == start) { - start = fakeheader; - length = sizeof(fakeheader); - } else if (usize < length) { - /* ... if input too short, copy out length of entitlement */ - uint32_t length32 = htonl((uint32_t)length); - memcpy(&fakeheader[4], &length32, sizeof(length32)); - - error = copyout(fakeheader, uaddr, sizeof(fakeheader)); - if (error == 0) - error = ERANGE; /* input buffer to short, ERANGE signals that */ + + /* only allow setting a subset of all code sign flags */ + flags &= + CS_HARD | CS_EXEC_SET_HARD | + CS_KILL | CS_EXEC_SET_KILL | + CS_RESTRICT | + CS_ENFORCEMENT | CS_EXEC_SET_ENFORCEMENT; + + proc_lock(pt); + if (pt->p_csflags & CS_VALID) + pt->p_csflags |= flags; + else + error = EINVAL; + proc_unlock(pt); + + break; + } + case CS_OPS_BLOB: { + void *start; + size_t length; + + proc_lock(pt); + if ((pt->p_csflags & CS_VALID) == 0) { + proc_unlock(pt); + error = EINVAL; break; } - error = copyout(start, uaddr, length); + + error = cs_blob_get(pt, &start, &length); + proc_unlock(pt); + if (error) + break; + + error = csops_copy_token(start, length, usize, uaddr); break; } + case CS_OPS_IDENTITY: { + const char *identity; + uint8_t fakeheader[8]; + uint32_t idlen; + size_t length; + + /* + * Make identity have a blob header to make it + * easier on userland to guess the identity + * length. + */ + if (usize < sizeof(fakeheader)) { + error = ERANGE; + break; + } + memset(fakeheader, 0, sizeof(fakeheader)); - case CS_OPS_MARKRESTRICT: proc_lock(pt); - pt->p_csflags |= CS_RESTRICT; + if ((pt->p_csflags & CS_VALID) == 0) { + proc_unlock(pt); + error = EINVAL; + break; + } + + identity = cs_identity_get(pt); proc_unlock(pt); + if (identity == NULL) { + error = ENOENT; + break; + } + + length = strlen(identity) + 1; /* include NUL */ + idlen = htonl(length + sizeof(fakeheader)); + memcpy(&fakeheader[4], &idlen, sizeof(idlen)); + + error = copyout(fakeheader, uaddr, sizeof(fakeheader)); + if (error) + break; + + if (usize < sizeof(fakeheader) + length) + error = ERANGE; + else if (usize > sizeof(fakeheader)) + error = copyout(identity, uaddr + sizeof(fakeheader), length); + + break; + } + + case CS_OPS_SIGPUP_INSTALL: + error = sigpup_install(uaddr); + break; + + case CS_OPS_SIGPUP_DROP: + error = sigpup_drop(); break; default: @@ -2699,107 +2860,6 @@ proc_knote_drain(struct proc *p) proc_klist_unlock(); } -unsigned long cs_procs_killed = 0; -unsigned long cs_procs_invalidated = 0; -int cs_force_kill = 0; -int cs_force_hard = 0; -int cs_debug = 0; -SYSCTL_INT(_vm, OID_AUTO, cs_force_kill, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_force_kill, 0, ""); -SYSCTL_INT(_vm, OID_AUTO, cs_force_hard, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_force_hard, 0, ""); -SYSCTL_INT(_vm, OID_AUTO, cs_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_debug, 0, ""); - -int -cs_allow_invalid(struct proc *p) -{ -#if MACH_ASSERT - lck_mtx_assert(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED); -#endif -#if CONFIG_MACF && CONFIG_ENFORCE_SIGNED_CODE - /* There needs to be a MAC policy to implement this hook, or else the - * kill bits will be cleared here every time. If we have - * CONFIG_ENFORCE_SIGNED_CODE, we can assume there is a policy - * implementing the hook. - */ - if( 0 != mac_proc_check_run_cs_invalid(p)) { - if(cs_debug) printf("CODE SIGNING: cs_allow_invalid() " - "not allowed: pid %d\n", - p->p_pid); - return 0; - } - if(cs_debug) printf("CODE SIGNING: cs_allow_invalid() " - "allowed: pid %d\n", - p->p_pid); - proc_lock(p); - p->p_csflags &= ~(CS_KILL | CS_HARD | CS_VALID); - proc_unlock(p); - vm_map_switch_protect(get_task_map(p->task), FALSE); -#endif - return (p->p_csflags & (CS_KILL | CS_HARD)) == 0; -} - -int -cs_invalid_page( - addr64_t vaddr) -{ - struct proc *p; - int retval; - - p = current_proc(); - - /* - * XXX revisit locking when proc is no longer protected - * by the kernel funnel... - */ - - /* XXX for testing */ - proc_lock(p); - if (cs_force_kill) - p->p_csflags |= CS_KILL; - if (cs_force_hard) - p->p_csflags |= CS_HARD; - - /* CS_KILL triggers us to send a kill signal. Nothing else. */ - if (p->p_csflags & CS_KILL) { - p->p_csflags |= CS_KILLED; - proc_unlock(p); - if (cs_debug) { - printf("CODE SIGNING: cs_invalid_page(0x%llx): " - "p=%d[%s] honoring CS_KILL, final status 0x%x\n", - vaddr, p->p_pid, p->p_comm, p->p_csflags); - } - cs_procs_killed++; - psignal(p, SIGKILL); - proc_lock(p); - } - - /* CS_HARD means fail the mapping operation so the process stays valid. */ - if (p->p_csflags & CS_HARD) { - proc_unlock(p); - if (cs_debug) { - printf("CODE SIGNING: cs_invalid_page(0x%llx): " - "p=%d[%s] honoring CS_HARD\n", - vaddr, p->p_pid, p->p_comm); - } - retval = 1; - } else { - if (p->p_csflags & CS_VALID) { - p->p_csflags &= ~CS_VALID; - - proc_unlock(p); - cs_procs_invalidated++; - printf("CODE SIGNING: cs_invalid_page(0x%llx): " - "p=%d[%s] clearing CS_VALID\n", - vaddr, p->p_pid, p->p_comm); - } else { - proc_unlock(p); - } - - retval = 0; - } - - return retval; -} - void proc_setregister(proc_t p) { diff --git a/bsd/kern/kern_prot.c b/bsd/kern/kern_prot.c index d2408a2f3..73806d055 100644 --- a/bsd/kern/kern_prot.c +++ b/bsd/kern/kern_prot.c @@ -1841,47 +1841,6 @@ suser(kauth_cred_t cred, u_short *acflag) } -/* - * XXX This interface is going away; use kauth_cred_issuser() directly - * XXX instead. - */ -int -is_suser(void) -{ - proc_t p = current_proc(); - - if (!p) - return (0); - - return (proc_suser(p) == 0); -} - - -/* - * XXX This interface is going away; use kauth_cred_issuser() directly - * XXX instead. - */ -int -is_suser1(void) -{ - proc_t p = current_proc(); - kauth_cred_t my_cred; - posix_cred_t my_pcred; - int err; - - if (!p) - return (0); - - my_cred = kauth_cred_proc_ref(p); - my_pcred = posix_cred_get(my_cred); - - err = (suser(my_cred, &p->p_acflag) == 0 || - my_pcred->cr_ruid == 0 || my_pcred->cr_svuid == 0); - kauth_cred_unref(&my_cred); - return(err); -} - - /* * getlogin * @@ -2112,7 +2071,7 @@ setlcid(proc_t p0, struct setlcid_args *uap, __unused int32_t *retval) case LCID_REMOVE: /* Only root may Leave/Orphan. */ - if (!is_suser1()) { + if (!kauth_cred_issuser(kauth_cred_get())) { error = EPERM; goto out; } @@ -2156,7 +2115,7 @@ setlcid(proc_t p0, struct setlcid_args *uap, __unused int32_t *retval) default: /* Only root may Join/Adopt. */ - if (!is_suser1()) { + if (!kauth_cred_issuser(kauth_cred_get())) { error = EPERM; goto out; } diff --git a/bsd/kern/kern_resource.c b/bsd/kern/kern_resource.c index ca41339ea..ce669fc02 100644 --- a/bsd/kern/kern_resource.c +++ b/bsd/kern/kern_resource.c @@ -107,14 +107,25 @@ #include +#include +#include + int donice(struct proc *curp, struct proc *chgp, int n); int dosetrlimit(struct proc *p, u_int which, struct rlimit *limp); int uthread_get_background_state(uthread_t); static void do_background_socket(struct proc *p, thread_t thread, int priority); static int do_background_thread(struct proc *curp, thread_t thread, int priority); static int do_background_proc(struct proc *curp, struct proc *targetp, int priority); +static int get_background_proc(struct proc *curp, struct proc *targetp, int *priority); void proc_apply_task_networkbg_internal(proc_t, thread_t); void proc_restore_task_networkbg_internal(proc_t, thread_t); +int proc_pid_rusage(int pid, int flavor, user_addr_t buf, int32_t *retval); +void gather_rusage_info_v2(proc_t p, struct rusage_info_v2 *ru, int flavor); +int fill_task_rusage_v2(task_t task, struct rusage_info_v2 *ri); +static void rusage_info_v2_to_v0(struct rusage_info_v0 *ri_v0, struct rusage_info_v2 *ri_v2); +static void rusage_info_v2_to_v1(struct rusage_info_v1 *ri_v1, struct rusage_info_v2 *ri_v2); + +int proc_get_rusage(proc_t p, int flavor, user_addr_t buffer, __unused int is_zombie); rlim_t maxdmap = MAXDSIZ; /* XXX */ rlim_t maxsmap = MAXSSIZ - PAGE_SIZE; /* XXX */ @@ -162,6 +173,8 @@ getpriority(struct proc *curp, struct getpriority_args *uap, int32_t *retval) struct proc *p; int low = PRIO_MAX + 1; kauth_cred_t my_cred; + int refheld = 0; + int error = 0; /* would also test (uap->who < 0), but id_t is unsigned */ if (uap->who > 0x7fffffff) @@ -221,24 +234,32 @@ getpriority(struct proc *curp, struct getpriority_args *uap, int32_t *retval) break; - case PRIO_DARWIN_THREAD: { - thread_t thread; - struct uthread *ut; - + case PRIO_DARWIN_THREAD: /* we currently only support the current thread */ - if (uap->who != 0) { + if (uap->who != 0) return (EINVAL); - } - - thread = current_thread(); - ut = get_bsdthread_info(thread); - low = 0; - if ( (ut->uu_flag & UT_BACKGROUND_TRAFFIC_MGT) != 0 ) { - low = 1; + low = proc_get_task_policy(current_task(), current_thread(), TASK_POLICY_INTERNAL, TASK_POLICY_DARWIN_BG); + + break; + + case PRIO_DARWIN_PROCESS: + if (uap->who == 0) { + p = curp; + } else { + p = proc_find(uap->who); + if (p == PROC_NULL) + break; + refheld = 1; } + + error = get_background_proc(curp, p, &low); + + if (refheld) + proc_rele(p); + if (error) + return (error); break; - } default: return (EINVAL); @@ -369,17 +390,11 @@ setpriority(struct proc *curp, struct setpriority_args *uap, __unused int32_t *r } case PRIO_DARWIN_THREAD: { - /* process marked for termination no priority management */ - if ((curp->p_lflag & P_LPTERMINATE) != 0) - return(EINVAL); /* we currently only support the current thread */ - if (uap->who != 0) { + if (uap->who != 0) return (EINVAL); - } + error = do_background_thread(curp, current_thread(), uap->prio); - if (!error) { - (void) do_background_socket(curp, current_thread(), uap->prio); - } found++; break; } @@ -394,16 +409,8 @@ setpriority(struct proc *curp, struct setpriority_args *uap, __unused int32_t *r refheld = 1; } - /* process marked for termination no priority management */ - if ((p->p_lflag & P_LPTERMINATE) != 0) { - error = EINVAL; - } else { - error = do_background_proc(curp, p, uap->prio); - if (!error) { - (void) do_background_socket(p, NULL, uap->prio); - } - - } + error = do_background_proc(curp, p, uap->prio); + found++; if (refheld != 0) proc_rele(p); @@ -464,12 +471,44 @@ out: return (error); } +static int +get_background_proc(struct proc *curp, struct proc *targetp, int *priority) +{ + int external = 0; + int error = 0; + kauth_cred_t ucred, target_cred; + + ucred = kauth_cred_get(); + target_cred = kauth_cred_proc_ref(targetp); + + if (!kauth_cred_issuser(ucred) && kauth_cred_getruid(ucred) && + kauth_cred_getuid(ucred) != kauth_cred_getuid(target_cred) && + kauth_cred_getruid(ucred) != kauth_cred_getuid(target_cred)) { + error = EPERM; + goto out; + } + + external = (curp == targetp) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL; + + *priority = proc_get_task_policy(current_task(), THREAD_NULL, external, TASK_POLICY_DARWIN_BG); + +out: + kauth_cred_unref(&target_cred); + return (error); +} + static int do_background_proc(struct proc *curp, struct proc *targetp, int priority) { +#if !CONFIG_MACF +#pragma unused(curp) +#endif int error = 0; kauth_cred_t ucred; kauth_cred_t target_cred; + int external; + int flavor; + int enable; ucred = kauth_cred_get(); target_cred = kauth_cred_proc_ref(targetp); @@ -488,12 +527,30 @@ do_background_proc(struct proc *curp, struct proc *targetp, int priority) goto out; #endif - if (priority == PRIO_DARWIN_NONUI) - error = proc_apply_task_gpuacc(targetp->task, TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS); - else - error = proc_set_and_apply_bgtaskpolicy(targetp->task, priority); - if (error) - goto out; + external = (curp == targetp) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL; + + switch (priority) { + case PRIO_DARWIN_NONUI: + flavor = TASK_POLICY_GPU_DENY; + enable = TASK_POLICY_ENABLE; + break; + case PRIO_DARWIN_BG: + flavor = TASK_POLICY_DARWIN_BG; + enable = TASK_POLICY_ENABLE; + break; + default: + /* + * DARWIN_BG and GPU_DENY disable are overloaded, + * so we need to turn them both off at the same time + * + * TODO: It would be nice to fail if priority != 0 + */ + flavor = TASK_POLICY_DARWIN_BG_AND_GPU; + enable = TASK_POLICY_DISABLE; + break; + } + + proc_set_task_policy(proc_task(targetp), THREAD_NULL, external, flavor, enable); out: kauth_cred_unref(&target_cred); @@ -503,6 +560,7 @@ out: static void do_background_socket(struct proc *p, thread_t thread, int priority) { +#if SOCKETS struct filedesc *fdp; struct fileproc *fp; int i; @@ -513,7 +571,7 @@ do_background_socket(struct proc *p, thread_t thread, int priority) * the sockets with the background flag. There's nothing * to do here for the PRIO_DARWIN_THREAD case. */ - if (thread == NULL) { + if (thread == THREAD_NULL) { proc_fdlock(p); fdp = p->p_fd; @@ -522,7 +580,7 @@ do_background_socket(struct proc *p, thread_t thread, int priority) fp = fdp->fd_ofiles[i]; if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 || - fp->f_fglob->fg_type != DTYPE_SOCKET) { + FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_SOCKET) { continue; } sockp = (struct socket *)fp->f_fglob->fg_data; @@ -546,7 +604,7 @@ do_background_socket(struct proc *p, thread_t thread, int priority) fp = fdp->fd_ofiles[ i ]; if ( fp == NULL || (fdp->fd_ofileflags[ i ] & UF_RESERVED) != 0 || - fp->f_fglob->fg_type != DTYPE_SOCKET ) { + FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_SOCKET ) { continue; } sockp = (struct socket *)fp->f_fglob->fg_data; @@ -559,85 +617,40 @@ do_background_socket(struct proc *p, thread_t thread, int priority) } proc_fdunlock(p); } +#else +#pragma unused(p, thread, priority) +#endif } /* * do_background_thread - * Returns: 0 Success + * Returns: 0 Success + * EPERM Tried to background while in vfork * XXX - todo - does this need a MACF hook? - * - * NOTE: To maintain binary compatibility with PRIO_DARWIN_THREAD with respect - * to network traffic management, UT_BACKGROUND_TRAFFIC_MGT is set/cleared - * along with UT_BACKGROUND flag, as the latter alone no longer implies - * any form of traffic regulation (it simply means that the thread is - * background.) With PRIO_DARWIN_PROCESS, any form of network traffic - * management must be explicitly requested via whatever means appropriate, - * and only TRAFFIC_MGT_SO_BACKGROUND is set via do_background_socket(). */ static int -do_background_thread(struct proc *curp __unused, thread_t thread, int priority) +do_background_thread(struct proc *curp, thread_t thread, int priority) { - struct uthread *ut; - int error = 0; - + struct uthread *ut; + int enable, external; + ut = get_bsdthread_info(thread); /* Backgrounding is unsupported for threads in vfork */ - if ( (ut->uu_flag & UT_VFORK) != 0) { + if ((ut->uu_flag & UT_VFORK) != 0) return(EPERM); - } - error = proc_set_and_apply_bgthreadpolicy(curp->task, thread_tid(thread), priority); - return(error); - -} + /* TODO: Fail if someone passes something besides 0 or PRIO_DARWIN_BG */ + enable = (priority == PRIO_DARWIN_BG) ? TASK_POLICY_ENABLE : TASK_POLICY_DISABLE; + external = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL; -#if CONFIG_EMBEDDED -int mach_do_background_thread(thread_t thread, int prio); + proc_set_task_policy_thread(curp->task, thread_tid(thread), external, + TASK_POLICY_DARWIN_BG, enable); -int -mach_do_background_thread(thread_t thread, int prio) -{ - int error = 0; - struct proc *curp = NULL; - struct proc *targetp = NULL; - kauth_cred_t ucred; - - targetp = get_bsdtask_info(get_threadtask(thread)); - if (!targetp) { - return KERN_INVALID_ARGUMENT; - } - - curp = proc_self(); - if (curp == PROC_NULL) { - return KERN_FAILURE; - } - - ucred = kauth_cred_proc_ref(curp); - - if (suser(ucred, NULL) && curp != targetp) { - error = KERN_PROTECTION_FAILURE; - goto out; - } - - error = do_background_thread(curp, thread, prio); - if (!error) { - (void) do_background_socket(curp, thread, prio); - } else { - if (error == EPERM) { - error = KERN_PROTECTION_FAILURE; - } else { - error = KERN_FAILURE; - } - } - -out: - proc_rele(curp); - kauth_cred_unref(&ucred); - return error; + return(0); } -#endif /* CONFIG_EMBEDDED */ + /* * Returns: 0 Success @@ -848,7 +861,7 @@ dosetrlimit(struct proc *p, u_int which, struct rlimit *limp) * because historically, people have been able to attempt to * set RLIM_INFINITY to get "whatever the maximum is". */ - if ( is_suser() ) { + if ( kauth_cred_issuser(kauth_cred_get()) ) { if (limp->rlim_cur != alimp->rlim_cur && limp->rlim_cur > (rlim_t)maxfiles) { if (posix) { @@ -882,7 +895,7 @@ dosetrlimit(struct proc *p, u_int which, struct rlimit *limp) * systemwide resource; all others are limited to * maxprocperuid (presumably less than maxproc). */ - if ( is_suser() ) { + if ( kauth_cred_issuser(kauth_cred_get()) ) { if (limp->rlim_cur > (rlim_t)maxproc) limp->rlim_cur = maxproc; if (limp->rlim_max > (rlim_t)maxproc) @@ -1065,6 +1078,31 @@ ruadd(struct rusage *ru, struct rusage *ru2) *ip++ += *ip2++; } +/* + * Add the rusage stats of child in parent. + * + * It adds rusage statistics of child process and statistics of all its + * children to its parent. + * + * Note: proc lock of parent should be held while calling this function. + */ +void +update_rusage_info_child(struct rusage_info_child *ri, struct rusage_info_v2 *ri2) +{ + ri->ri_child_user_time += (ri2->ri_user_time + + ri2->ri_child_user_time); + ri->ri_child_system_time += (ri2->ri_system_time + + ri2->ri_child_system_time); + ri->ri_child_pkg_idle_wkups += (ri2->ri_pkg_idle_wkups + + ri2->ri_child_pkg_idle_wkups); + ri->ri_child_interrupt_wkups += (ri2->ri_interrupt_wkups + + ri2->ri_child_interrupt_wkups); + ri->ri_child_pageins += (ri2->ri_pageins + + ri2->ri_child_pageins); + ri->ri_child_elapsed_abstime += ((ri2->ri_proc_exit_abstime - + ri2->ri_proc_start_abstime) + ri2->ri_child_elapsed_abstime); +} + void proc_limitget(proc_t p, int which, struct rlimit * limp) { @@ -1170,7 +1208,6 @@ proc_limitreplace(proc_t p) return(0); } - /* * iopolicysys * @@ -1183,133 +1220,441 @@ proc_limitreplace(proc_t p) * EINVAL Invalid command or invalid policy arguments * */ + +static int +iopolicysys_disk(struct proc *p, int cmd, int scope, int policy, struct _iopol_param_t *iop_param); +static int +iopolicysys_vfs(struct proc *p, int cmd, int scope, int policy, struct _iopol_param_t *iop_param); + int -iopolicysys(__unused struct proc *p, __unused struct iopolicysys_args *uap, __unused int32_t *retval) +iopolicysys(struct proc *p, struct iopolicysys_args *uap, __unused int32_t *retval) { - int error = 0; + int error = 0; struct _iopol_param_t iop_param; - int processwide = 0; if ((error = copyin(uap->arg, &iop_param, sizeof(iop_param))) != 0) goto out; - if (iop_param.iop_iotype != IOPOL_TYPE_DISK) { - error = EINVAL; - goto out; + switch (iop_param.iop_iotype) { + case IOPOL_TYPE_DISK: + error = iopolicysys_disk(p, uap->cmd, iop_param.iop_scope, iop_param.iop_policy, &iop_param); + if (error) + goto out; + break; + case IOPOL_TYPE_VFS_HFS_CASE_SENSITIVITY: + error = iopolicysys_vfs(p, uap->cmd, iop_param.iop_scope, iop_param.iop_policy, &iop_param); + if (error) + goto out; + break; + default: + error = EINVAL; + goto out; } - switch (iop_param.iop_scope) { - case IOPOL_SCOPE_PROCESS: - processwide = 1; - break; - case IOPOL_SCOPE_THREAD: - processwide = 0; - break; - default: - error = EINVAL; - goto out; + /* Individual iotype handlers are expected to update iop_param, if requested with a GET command */ + if (uap->cmd == IOPOL_CMD_GET) { + error = copyout((caddr_t)&iop_param, uap->arg, sizeof(iop_param)); + if (error) + goto out; } - - switch(uap->cmd) { - case IOPOL_CMD_SET: - switch (iop_param.iop_policy) { - case IOPOL_DEFAULT: - case IOPOL_NORMAL: - case IOPOL_THROTTLE: - case IOPOL_PASSIVE: - case IOPOL_UTILITY: - if(processwide != 0) - proc_apply_task_diskacc(current_task(), iop_param.iop_policy); - else - proc_apply_thread_selfdiskacc(iop_param.iop_policy); - + +out: + return (error); +} + +static int +iopolicysys_disk(struct proc *p __unused, int cmd, int scope, int policy, struct _iopol_param_t *iop_param) +{ + int error = 0; + thread_t thread; + int policy_flavor; + + /* Validate scope */ + switch (scope) { + case IOPOL_SCOPE_PROCESS: + thread = THREAD_NULL; + policy_flavor = TASK_POLICY_IOPOL; + break; + + case IOPOL_SCOPE_THREAD: + thread = current_thread(); + policy_flavor = TASK_POLICY_IOPOL; break; + + case IOPOL_SCOPE_DARWIN_BG: + thread = THREAD_NULL; + policy_flavor = TASK_POLICY_DARWIN_BG_IOPOL; + break; + default: error = EINVAL; goto out; + } + + /* Validate policy */ + if (cmd == IOPOL_CMD_SET) { + switch (policy) { + case IOPOL_DEFAULT: + if (scope == IOPOL_SCOPE_DARWIN_BG) { + /* the current default BG throttle level is UTILITY */ + policy = IOPOL_UTILITY; + } else { + policy = IOPOL_IMPORTANT; + } + break; + case IOPOL_UTILITY: + /* fall-through */ + case IOPOL_THROTTLE: + /* These levels are OK */ + break; + case IOPOL_IMPORTANT: + /* fall-through */ + case IOPOL_STANDARD: + /* fall-through */ + case IOPOL_PASSIVE: + if (scope == IOPOL_SCOPE_DARWIN_BG) { + /* These levels are invalid for BG */ + error = EINVAL; + goto out; + } else { + /* OK for other scopes */ + } + break; + default: + error = EINVAL; + goto out; } - break; - - case IOPOL_CMD_GET: - if(processwide != 0) - iop_param.iop_policy = proc_get_task_disacc(current_task()); - else - iop_param.iop_policy = proc_get_thread_selfdiskacc(); - - error = copyout((caddr_t)&iop_param, uap->arg, sizeof(iop_param)); + } - break; - default: - error = EINVAL; // unknown command - break; + /* Perform command */ + switch(cmd) { + case IOPOL_CMD_SET: + proc_set_task_policy(current_task(), thread, + TASK_POLICY_INTERNAL, policy_flavor, + policy); + break; + case IOPOL_CMD_GET: + policy = proc_get_task_policy(current_task(), thread, + TASK_POLICY_INTERNAL, policy_flavor); + + iop_param->iop_policy = policy; + break; + default: + error = EINVAL; /* unknown command */ + break; } out: - *retval = error; return (error); } +static int +iopolicysys_vfs(struct proc *p, int cmd, int scope, int policy, struct _iopol_param_t *iop_param) +{ + int error = 0; -boolean_t thread_is_io_throttled(void); + /* Validate scope */ + switch (scope) { + case IOPOL_SCOPE_PROCESS: + /* Only process OK */ + break; + default: + error = EINVAL; + goto out; + } -boolean_t -thread_is_io_throttled(void) -{ - return(proc_get_task_selfdiskacc() == IOPOL_THROTTLE); + /* Validate policy */ + if (cmd == IOPOL_CMD_SET) { + switch (policy) { + case IOPOL_VFS_HFS_CASE_SENSITIVITY_DEFAULT: + /* fall-through */ + case IOPOL_VFS_HFS_CASE_SENSITIVITY_FORCE_CASE_SENSITIVE: + /* These policies are OK */ + break; + default: + error = EINVAL; + goto out; + } + } + + /* Perform command */ + switch(cmd) { + case IOPOL_CMD_SET: + if (0 == kauth_cred_issuser(kauth_cred_get())) { + error = EPERM; + goto out; + } + + switch (policy) { + case IOPOL_VFS_HFS_CASE_SENSITIVITY_DEFAULT: + OSBitAndAtomic16(~((uint32_t)P_VFS_IOPOLICY_FORCE_HFS_CASE_SENSITIVITY), &p->p_vfs_iopolicy); + break; + case IOPOL_VFS_HFS_CASE_SENSITIVITY_FORCE_CASE_SENSITIVE: + OSBitOrAtomic16((uint32_t)P_VFS_IOPOLICY_FORCE_HFS_CASE_SENSITIVITY, &p->p_vfs_iopolicy); + break; + default: + error = EINVAL; + goto out; + } + + break; + case IOPOL_CMD_GET: + iop_param->iop_policy = (p->p_vfs_iopolicy & P_VFS_IOPOLICY_FORCE_HFS_CASE_SENSITIVITY) + ? IOPOL_VFS_HFS_CASE_SENSITIVITY_FORCE_CASE_SENSITIVE + : IOPOL_VFS_HFS_CASE_SENSITIVITY_DEFAULT; + break; + default: + error = EINVAL; /* unknown command */ + break; + } + +out: + return (error); } +/* BSD call back function for task_policy */ +void proc_apply_task_networkbg(void * bsd_info, thread_t thread, int bg); + void -proc_apply_task_networkbg(void * bsd_info) +proc_apply_task_networkbg(void * bsd_info, thread_t thread, int bg) { proc_t p = PROC_NULL; proc_t curp = (proc_t)bsd_info; pid_t pid; + int prio = (bg ? PRIO_DARWIN_BG : 0); pid = curp->p_pid; p = proc_find(pid); if (p != PROC_NULL) { - do_background_socket(p, NULL, PRIO_DARWIN_BG); + do_background_socket(p, thread, prio); proc_rele(p); } } void -proc_restore_task_networkbg(void * bsd_info) +gather_rusage_info_v2(proc_t p, struct rusage_info_v2 *ru, int flavor) { - proc_t p = PROC_NULL; - proc_t curp = (proc_t)bsd_info; - pid_t pid; + struct rusage_info_child *ri_child; - pid = curp->p_pid; - p = proc_find(pid); - if (p != PROC_NULL) { - do_background_socket(p, NULL, 0); - proc_rele(p); + assert(p->p_stats != NULL); + switch(flavor) { + case RUSAGE_INFO_V2: + ru->ri_diskio_bytesread = p->p_stats->ri_diskiobytes.ri_bytesread; + ru->ri_diskio_byteswritten = p->p_stats->ri_diskiobytes.ri_byteswritten; + /* fall through */ + + case RUSAGE_INFO_V1: + /* + * p->p_stats->ri_child statistics are protected under proc lock. + */ + proc_lock(p); + + ri_child = &(p->p_stats->ri_child); + ru->ri_child_user_time = ri_child->ri_child_user_time; + ru->ri_child_system_time = ri_child->ri_child_system_time; + ru->ri_child_pkg_idle_wkups = ri_child->ri_child_pkg_idle_wkups; + ru->ri_child_interrupt_wkups = ri_child->ri_child_interrupt_wkups; + ru->ri_child_pageins = ri_child->ri_child_pageins; + ru->ri_child_elapsed_abstime = ri_child->ri_child_elapsed_abstime; + + proc_unlock(p); + /* fall through */ + + case RUSAGE_INFO_V0: + proc_getexecutableuuid(p, (unsigned char *)&ru->ri_uuid, sizeof (ru->ri_uuid)); + fill_task_rusage_v2(p->task, ru); + ru->ri_proc_start_abstime = p->p_stats->ps_start; } +} +/* + * Temporary function to copy value from rusage_info_v2 to rusage_info_v0. + */ +static void +rusage_info_v2_to_v0(struct rusage_info_v0 *ri_v0, struct rusage_info_v2 *ri_v2) +{ + memcpy(&ri_v0->ri_uuid[0], &ri_v2->ri_uuid[0], sizeof(ri_v0->ri_uuid)); + ri_v0->ri_user_time = ri_v2->ri_user_time; + ri_v0->ri_system_time = ri_v2->ri_system_time; + ri_v0->ri_pkg_idle_wkups = ri_v2->ri_pkg_idle_wkups; + ri_v0->ri_interrupt_wkups = ri_v2->ri_interrupt_wkups; + ri_v0->ri_pageins = ri_v2->ri_pageins; + ri_v0->ri_wired_size = ri_v2->ri_wired_size; + ri_v0->ri_resident_size = ri_v2->ri_resident_size; + ri_v0->ri_phys_footprint = ri_v2->ri_phys_footprint; + ri_v0->ri_proc_start_abstime = ri_v2->ri_proc_start_abstime; + ri_v0->ri_proc_exit_abstime = ri_v2->ri_proc_exit_abstime; } -void -proc_set_task_networkbg(void * bsdinfo, int setbg) +static void +rusage_info_v2_to_v1(struct rusage_info_v1 *ri_v1, struct rusage_info_v2 *ri_v2) { - if (setbg != 0) - proc_apply_task_networkbg(bsdinfo); - else - proc_restore_task_networkbg(bsdinfo); + memcpy(&ri_v1->ri_uuid[0], &ri_v2->ri_uuid[0], sizeof(ri_v1->ri_uuid)); + ri_v1->ri_user_time = ri_v2->ri_user_time; + ri_v1->ri_system_time = ri_v2->ri_system_time; + ri_v1->ri_pkg_idle_wkups = ri_v2->ri_pkg_idle_wkups; + ri_v1->ri_interrupt_wkups = ri_v2->ri_interrupt_wkups; + ri_v1->ri_pageins = ri_v2->ri_pageins; + ri_v1->ri_wired_size = ri_v2->ri_wired_size; + ri_v1->ri_resident_size = ri_v2->ri_resident_size; + ri_v1->ri_phys_footprint = ri_v2->ri_phys_footprint; + ri_v1->ri_proc_start_abstime = ri_v2->ri_proc_start_abstime; + ri_v1->ri_proc_exit_abstime = ri_v2->ri_proc_exit_abstime; + ri_v1->ri_child_user_time = ri_v2->ri_child_user_time; + ri_v1->ri_child_system_time = ri_v2->ri_child_system_time; + ri_v1->ri_child_pkg_idle_wkups = ri_v2->ri_child_pkg_idle_wkups; + ri_v1->ri_child_interrupt_wkups = ri_v2->ri_child_interrupt_wkups; + ri_v1->ri_child_pageins = ri_v2->ri_child_pageins; + ri_v1->ri_child_elapsed_abstime = ri_v2->ri_child_elapsed_abstime; } -void -proc_apply_task_networkbg_internal(proc_t p, thread_t thread) +int +proc_get_rusage(proc_t p, int flavor, user_addr_t buffer, __unused int is_zombie) { - if (p != PROC_NULL) { - do_background_socket(p, thread, PRIO_DARWIN_BG); + struct rusage_info_v0 ri_v0; + struct rusage_info_v1 ri_v1; + struct rusage_info_v2 ri_v2; + + int error = 0; + + switch (flavor) { + case RUSAGE_INFO_V0: + /* + * If task is still alive, collect info from the live task itself. + * Otherwise, look to the cached info in the zombie proc. + */ + if (p->p_ru == NULL) { + gather_rusage_info_v2(p, &ri_v2, flavor); + ri_v2.ri_proc_exit_abstime = 0; + rusage_info_v2_to_v0(&ri_v0, &ri_v2); + } else { + rusage_info_v2_to_v0(&ri_v0, &p->p_ru->ri); + } + error = copyout(&ri_v0, buffer, sizeof (ri_v0)); + break; + + case RUSAGE_INFO_V1: + /* + * If task is still alive, collect info from the live task itself. + * Otherwise, look to the cached info in the zombie proc. + */ + if (p->p_ru == NULL) { + gather_rusage_info_v2(p, &ri_v2, flavor); + ri_v2.ri_proc_exit_abstime = 0; + rusage_info_v2_to_v1(&ri_v1, &ri_v2); + } else { + rusage_info_v2_to_v1(&ri_v1, &p->p_ru->ri); + } + error = copyout(&ri_v1, buffer, sizeof (ri_v1)); + break; + + case RUSAGE_INFO_V2: + /* + * If task is still alive, collect info from the live task itself. + * Otherwise, look to the cached info in the zombie proc. + */ + if (p->p_ru == NULL) { + gather_rusage_info_v2(p, &ri_v2, flavor); + ri_v2.ri_proc_exit_abstime = 0; + } else { + ri_v2 = p->p_ru->ri; + } + error = copyout(&ri_v2, buffer, sizeof (ri_v2)); + break; + + default: + error = EINVAL; + break; } + + return (error); } -void -proc_restore_task_networkbg_internal(proc_t p, thread_t thread) + +static int +mach_to_bsd_rv(int mach_rv) { - if (p != PROC_NULL) { - do_background_socket(p, thread, PRIO_DARWIN_BG); + int bsd_rv = 0; + + switch (mach_rv) { + case KERN_SUCCESS: + bsd_rv = 0; + break; + case KERN_INVALID_ARGUMENT: + bsd_rv = EINVAL; + break; + default: + panic("unknown error %#x", mach_rv); } + + return bsd_rv; } +/* + * Resource limit controls + * + * uap->flavor available flavors: + * + * RLIMIT_WAKEUPS_MONITOR + */ +int +proc_rlimit_control(__unused struct proc *p, struct proc_rlimit_control_args *uap, int32_t *retval) +{ + proc_t targetp; + int error = 0; + struct proc_rlimit_control_wakeupmon wakeupmon_args; + uint32_t cpumon_flags; + kauth_cred_t my_cred, target_cred; + + *retval = 0; + + if ((targetp = proc_find(uap->pid)) == PROC_NULL) { + *retval = -1; + return (ESRCH); + } + + my_cred = kauth_cred_get(); + target_cred = kauth_cred_proc_ref(targetp); + + if (!kauth_cred_issuser(my_cred) && kauth_cred_getruid(my_cred) && + kauth_cred_getuid(my_cred) != kauth_cred_getuid(target_cred) && + kauth_cred_getruid(my_cred) != kauth_cred_getuid(target_cred)) { + proc_rele(targetp); + kauth_cred_unref(&target_cred); + *retval = -1; + error = EACCES; + return (error); + } + + switch (uap->flavor) { + case RLIMIT_WAKEUPS_MONITOR: + if ((error = copyin(uap->arg, &wakeupmon_args, sizeof (wakeupmon_args))) != 0) { + break; + } + if ((error = mach_to_bsd_rv(task_wakeups_monitor_ctl(targetp->task, &wakeupmon_args.wm_flags, + &wakeupmon_args.wm_rate))) != 0) { + break; + } + error = copyout(&wakeupmon_args, uap->arg, sizeof (wakeupmon_args)); + break; + case RLIMIT_CPU_USAGE_MONITOR: + cpumon_flags = uap->arg; // XXX temporarily stashing flags in argp (12592127) + error = mach_to_bsd_rv(task_cpu_usage_monitor_ctl(targetp->task, &cpumon_flags)); + break; + default: + error = EINVAL; + break; + } + + proc_rele(targetp); + kauth_cred_unref(&target_cred); + + if (error != 0) { + *retval = -1; + } + + /* + * Return value from this function becomes errno to userland caller. + * *retval is what the system call invocation returns. + */ + return (error); +} diff --git a/bsd/kern/kern_sig.c b/bsd/kern/kern_sig.c index d656dcaf3..9605be839 100644 --- a/bsd/kern/kern_sig.c +++ b/bsd/kern/kern_sig.c @@ -122,6 +122,8 @@ extern int thread_enable_fpe(thread_t act, int onoff); extern thread_t port_name_to_thread(mach_port_name_t port_name); extern kern_return_t get_signalact(task_t , thread_t *, int); extern unsigned int get_useraddr(void); +extern kern_return_t task_suspend_internal(task_t); +extern kern_return_t task_resume_internal(task_t); /* * --- @@ -1935,7 +1937,7 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum) sig_proc->p_contproc = current_proc()->p_pid; proc_unlock(sig_proc); - (void) task_resume(sig_task); + (void) task_resume_internal(sig_task); goto psigout; } proc_unlock(sig_proc); @@ -1951,7 +1953,7 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum) if (prop & SA_CONT) { OSBitOrAtomic(P_CONTINUED, &sig_proc->p_flag); proc_unlock(sig_proc); - (void) task_resume(sig_task); + (void) task_resume_internal(sig_task); proc_lock(sig_proc); sig_proc->p_stat = SRUN; } else if (sig_proc->p_stat == SSTOP) { @@ -2090,7 +2092,7 @@ psignal_internal(proc_t p, task_t task, thread_t thread, int flavor, int signum) sig_proc->p_contproc = sig_proc->p_pid; proc_unlock(sig_proc); - (void) task_resume(sig_task); + (void) task_resume_internal(sig_task); proc_lock(sig_proc); /* * When processing a SIGCONT, we need to check @@ -2297,12 +2299,9 @@ issignal_locked(proc_t p) /* * XXX Have to really stop for debuggers; * XXX stop() doesn't do the right thing. - * XXX Inline the task_suspend because we - * XXX have to diddle Unix state in the - * XXX middle of it. */ task = p->task; - task_suspend(task); + task_suspend_internal(task); proc_lock(p); p->sigwait = TRUE; @@ -2642,7 +2641,7 @@ stop(proc_t p, proc_t parent) wakeup((caddr_t)parent); proc_list_unlock(); } - (void) task_suspend(p->task); /*XXX*/ + (void) task_suspend_internal(p->task); } /* @@ -2693,7 +2692,7 @@ postsig_locked(int signum) p->p_sigacts->ps_sig = signum; proc_signalend(p, 1); proc_unlock(p); - if (coredump(p) == 0) + if (coredump(p, 0, 0) == 0) signum |= WCOREFLAG; } else { proc_signalend(p, 1); @@ -2945,7 +2944,7 @@ bsd_ast(thread_t thread) proc_lock(p); p->p_dtrace_stop = 1; proc_unlock(p); - (void)task_suspend(p->task); + (void)task_suspend_internal(p->task); } if (ut->t_dtrace_resumepid) { @@ -2957,7 +2956,7 @@ bsd_ast(thread_t thread) if (resumeproc->p_dtrace_stop) { resumeproc->p_dtrace_stop = 0; proc_unlock(resumeproc); - task_resume(resumeproc->task); + task_resume_internal(resumeproc->task); } else { proc_unlock(resumeproc); diff --git a/bsd/kern/kern_subr.c b/bsd/kern/kern_subr.c index 9e9587bea..eada997ca 100644 --- a/bsd/kern/kern_subr.c +++ b/bsd/kern/kern_subr.c @@ -126,11 +126,7 @@ int uiomove64(const addr64_t c_cp, int n, struct uio *uio) { addr64_t cp = c_cp; -#if LP64KERN uint64_t acnt; -#else - u_int acnt; -#endif int error = 0; #if DIAGNOSTIC @@ -911,15 +907,16 @@ int uio_getiov( uio_t a_uio, * uio_calculateresid - runs through all iovecs associated with this * uio_t and calculates (and sets) the residual IO count. */ -__private_extern__ void uio_calculateresid( uio_t a_uio ) +__private_extern__ int uio_calculateresid( uio_t a_uio ) { int i; + u_int64_t resid = 0; if (a_uio == NULL) { #if LP64_DEBUG panic("%s :%d - invalid uio_t\n", __FILE__, __LINE__); #endif /* LP64_DEBUG */ - return; + return EINVAL; } a_uio->uio_iovcnt = a_uio->uio_max_iovs; @@ -927,9 +924,14 @@ __private_extern__ void uio_calculateresid( uio_t a_uio ) a_uio->uio_resid_64 = 0; for ( i = 0; i < a_uio->uio_max_iovs; i++ ) { if (a_uio->uio_iovs.uiovp[i].iov_len != 0 && a_uio->uio_iovs.uiovp[i].iov_base != 0) { - a_uio->uio_resid_64 += a_uio->uio_iovs.uiovp[i].iov_len; + if (a_uio->uio_iovs.uiovp[i].iov_len > LONG_MAX) + return EINVAL; + resid += a_uio->uio_iovs.uiovp[i].iov_len; + if (resid > LONG_MAX) + return EINVAL; } } + a_uio->uio_resid_64 = resid; /* position to first non zero length iovec (4235922) */ while (a_uio->uio_iovcnt > 0 && a_uio->uio_iovs.uiovp->iov_len == 0) { @@ -943,9 +945,14 @@ __private_extern__ void uio_calculateresid( uio_t a_uio ) a_uio->uio_resid_64 = 0; for ( i = 0; i < a_uio->uio_max_iovs; i++ ) { if (a_uio->uio_iovs.kiovp[i].iov_len != 0 && a_uio->uio_iovs.kiovp[i].iov_base != 0) { - a_uio->uio_resid_64 += a_uio->uio_iovs.kiovp[i].iov_len; + if (a_uio->uio_iovs.kiovp[i].iov_len > LONG_MAX) + return EINVAL; + resid += a_uio->uio_iovs.kiovp[i].iov_len; + if (resid > LONG_MAX) + return EINVAL; } } + a_uio->uio_resid_64 = resid; /* position to first non zero length iovec (4235922) */ while (a_uio->uio_iovcnt > 0 && a_uio->uio_iovs.kiovp->iov_len == 0) { @@ -956,7 +963,7 @@ __private_extern__ void uio_calculateresid( uio_t a_uio ) } } - return; + return 0; } /* @@ -1143,6 +1150,10 @@ uio_t uio_duplicate( uio_t a_uio ) } my_uio->uio_flags = UIO_FLAGS_WE_ALLOCED | UIO_FLAGS_INITED; +#if DEBUG + (void)hw_atomic_add(&uio_t_count, 1); +#endif + return(my_uio); } diff --git a/bsd/kern/kern_symfile.c b/bsd/kern/kern_symfile.c index 2e1965dfd..78c9fa723 100644 --- a/bsd/kern/kern_symfile.c +++ b/bsd/kern/kern_symfile.c @@ -226,6 +226,13 @@ kern_open_file_for_direct_io(const char * name, if ((error = vnode_open(name, (O_CREAT | FWRITE), (0), 0, &ref->vp, ref->ctx))) goto out; + if (ref->vp->v_type == VREG) + { + vnode_lock_spin(ref->vp); + SET(ref->vp->v_flag, VSWAP); + vnode_unlock(ref->vp); + } + if (write_file_addr && write_file_len) { if ((error = kern_write_file(ref, write_file_offset, write_file_addr, write_file_len))) @@ -236,6 +243,7 @@ kern_open_file_for_direct_io(const char * name, VATTR_WANTED(&va, va_rdev); VATTR_WANTED(&va, va_fsid); VATTR_WANTED(&va, va_data_size); + VATTR_WANTED(&va, va_data_alloc); VATTR_WANTED(&va, va_nlink); error = EFAULT; if (vnode_getattr(ref->vp, &va, ref->ctx)) @@ -243,7 +251,7 @@ kern_open_file_for_direct_io(const char * name, kprintf("vp va_rdev major %d minor %d\n", major(va.va_rdev), minor(va.va_rdev)); kprintf("vp va_fsid major %d minor %d\n", major(va.va_fsid), minor(va.va_fsid)); - kprintf("vp size %qd\n", va.va_data_size); + kprintf("vp size %qd alloc %qd\n", va.va_data_size, va.va_data_alloc); if (ref->vp->v_type == VREG) { @@ -258,10 +266,15 @@ kern_open_file_for_direct_io(const char * name, p2 = p; do_ioctl = &file_ioctl; - if (set_file_size) + if (set_file_size && (set_file_size != (off_t) va.va_data_alloc)) { off_t bytesallocated = 0; u_int32_t alloc_flags = PREALLOCATE | ALLOCATEFROMPEOF | ALLOCATEALL; + + vnode_lock_spin(ref->vp); + CLR(ref->vp->v_flag, VSWAP); + vnode_unlock(ref->vp); + error = VNOP_ALLOCATE(ref->vp, set_file_size, alloc_flags, &bytesallocated, 0 /*fst_offset*/, ref->ctx); @@ -269,6 +282,10 @@ kern_open_file_for_direct_io(const char * name, if (!error) error = vnode_setsize(ref->vp, set_file_size, IO_NOZEROFILL, ref->ctx); kprintf("vnode_setsize(%d) %qd\n", error, set_file_size); ref->filelength = bytesallocated; + + vnode_lock_spin(ref->vp); + SET(ref->vp->v_flag, VSWAP); + vnode_unlock(ref->vp); } } else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) @@ -355,7 +372,15 @@ kern_open_file_for_direct_io(const char * name, error = ENOTSUP; goto out; } +#if HIBFRAGMENT + uint64_t rev; + for (rev = 4096; rev <= getphysreq.length; rev += 4096) + { + callback(callback_ref, getphysreq.offset + getphysreq.length - rev, 4096); + } +#else callback(callback_ref, getphysreq.offset, getphysreq.length); +#endif physoffset += getphysreq.length; } f_offset += filechunk; @@ -504,14 +529,14 @@ kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref, } (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL); - if (addr && write_length) - { - (void) kern_write_file(ref, write_offset, addr, write_length); - } if (discard_offset && discard_end && !ref->pinned) { (void) kern_ioctl_file_extents(ref, DKIOCUNMAP, discard_offset, discard_end); } + if (addr && write_length) + { + (void) kern_write_file(ref, write_offset, addr, write_length); + } error = vnode_close(ref->vp, FWRITE, ref->ctx); diff --git a/bsd/kern/kern_sysctl.c b/bsd/kern/kern_sysctl.c index 56782c39c..c97123066 100644 --- a/bsd/kern/kern_sysctl.c +++ b/bsd/kern/kern_sysctl.c @@ -131,6 +131,7 @@ #include #include +#include #include #if defined(__i386__) || defined(__x86_64__) @@ -141,6 +142,10 @@ #include #endif +#if KPERF +#include +#endif + /* * deliberately setting max requests to really high number * so that runaway settings do not cause MALLOC overflows @@ -163,6 +168,8 @@ extern unsigned int preheat_pages_max; extern unsigned int preheat_pages_min; extern long numvnodes; +extern uuid_string_t bootsessionuuid_string; + extern unsigned int vm_max_delayed_work_limit; extern unsigned int vm_max_batch; @@ -186,6 +193,8 @@ extern unsigned int vm_page_speculative_q_age_ms; extern boolean_t mach_timer_coalescing_enabled; +extern uint64_t timer_deadline_tracking_bin_1, timer_deadline_tracking_bin_2; + STATIC void fill_user32_eproc(proc_t, struct user32_eproc *__restrict); STATIC void @@ -243,9 +252,7 @@ STATIC int sysctl_doaffinity SYSCTL_HANDLER_ARGS; #if COUNT_SYSCALLS STATIC int sysctl_docountsyscalls SYSCTL_HANDLER_ARGS; #endif /* COUNT_SYSCALLS */ -#if !CONFIG_EMBEDDED STATIC int sysctl_doprocargs SYSCTL_HANDLER_ARGS; -#endif /* !CONFIG_EMBEDDED */ STATIC int sysctl_doprocargs2 SYSCTL_HANDLER_ARGS; STATIC int sysctl_prochandle SYSCTL_HANDLER_ARGS; #if DEBUG @@ -552,12 +559,12 @@ __sysctl(proc_t p, struct __sysctl_args *uap, __unused int32_t *retval) /* * Attributes stored in the kernel. */ -__private_extern__ char corefilename[MAXPATHLEN+1]; -__private_extern__ int do_coredump; -__private_extern__ int sugid_coredump; +extern char corefilename[MAXPATHLEN+1]; +extern int do_coredump; +extern int sugid_coredump; #if COUNT_SYSCALLS -__private_extern__ int do_count_syscalls; +extern int do_count_syscalls; #endif #ifdef INSECURE @@ -1737,6 +1744,13 @@ sysctl_kdebug_ops SYSCTL_HANDLER_ARGS return(ENOTSUP); ret = suser(kauth_cred_get(), &p->p_acflag); +#if KPERF + /* Non-root processes may be blessed by kperf to access data + * logged into trace. + */ + if (ret) + ret = kperf_access_check(); +#endif /* KPERF */ if (ret) return(ret); @@ -1760,7 +1774,10 @@ sysctl_kdebug_ops SYSCTL_HANDLER_ARGS case KERN_KDGETENTROPY: case KERN_KDENABLE_BG_TRACE: case KERN_KDDISABLE_BG_TRACE: + case KERN_KDREADCURTHRMAP: case KERN_KDSET_TYPEFILTER: + case KERN_KDBUFWAIT: + case KERN_KDCPUMAP: ret = kdbg_control(name, namelen, oldp, oldlenp); break; @@ -1783,7 +1800,6 @@ SYSCTL_PROC(_kern, KERN_KDEBUG, kdebug, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LOCKED ""); -#if !CONFIG_EMBEDDED /* * Return the top *sizep bytes of the user stack, or the entire area of the * user stack down through the saved exec_path, whichever is smaller. @@ -1814,7 +1830,6 @@ SYSCTL_PROC(_kern, KERN_PROCARGS, procargs, CTLTYPE_NODE|CTLFLAG_RD | CTLFLAG_LO sysctl_doprocargs, /* Handler function */ NULL, /* Data pointer */ ""); -#endif /* !CONFIG_EMBEDDED */ STATIC int sysctl_doprocargs2 SYSCTL_HANDLER_ARGS @@ -2193,7 +2208,7 @@ SYSCTL_STRING(_kern, KERN_VERSION, version, version, 0, ""); SYSCTL_STRING(_kern, OID_AUTO, uuid, CTLFLAG_RD | CTLFLAG_KERN | CTLFLAG_LOCKED, - &kernel_uuid[0], 0, ""); + &kernel_uuid_string[0], 0, ""); #if DEBUG int debug_kprint_syscall = 0; @@ -2478,6 +2493,9 @@ SYSCTL_UINT(_kern, OID_AUTO, vm_max_batch, CTLFLAG_RW | CTLFLAG_KERN | CTLFLAG_LOCKED, &vm_max_batch, 0, ""); +SYSCTL_STRING(_kern, OID_AUTO, bootsessionuuid, + CTLFLAG_RD | CTLFLAG_LOCKED, + &bootsessionuuid_string, sizeof(bootsessionuuid_string) , ""); STATIC int sysctl_boottime @@ -2638,10 +2656,83 @@ SYSCTL_PROC(_kern, OID_AUTO, imgsrcinfo, #endif /* CONFIG_IMGSRC_ACCESS */ -SYSCTL_INT(_kern, OID_AUTO, timer_coalescing_enabled, - CTLFLAG_RW | CTLFLAG_LOCKED, + +SYSCTL_DECL(_kern_timer); +SYSCTL_NODE(_kern, OID_AUTO, timer, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "timer"); + +SYSCTL_INT(_kern_timer, OID_AUTO, coalescing_enabled, + CTLFLAG_KERN | CTLFLAG_RW | CTLFLAG_LOCKED, &mach_timer_coalescing_enabled, 0, ""); +SYSCTL_QUAD(_kern_timer, OID_AUTO, deadline_tracking_bin_1, + CTLFLAG_RW | CTLFLAG_LOCKED, + &timer_deadline_tracking_bin_1, ""); +SYSCTL_QUAD(_kern_timer, OID_AUTO, deadline_tracking_bin_2, + CTLFLAG_RW | CTLFLAG_LOCKED, + &timer_deadline_tracking_bin_2, ""); + +SYSCTL_DECL(_kern_timer_longterm); +SYSCTL_NODE(_kern_timer, OID_AUTO, longterm, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "longterm"); + +/* Must match definition in osfmk/kern/timer_call.c */ +enum { + THRESHOLD, QCOUNT, + ENQUEUES, DEQUEUES, ESCALATES, SCANS, PREEMPTS, + LATENCY, LATENCY_MIN, LATENCY_MAX +}; +extern uint64_t timer_sysctl_get(int); +extern int timer_sysctl_set(int, uint64_t); + +STATIC int +sysctl_timer +(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + int oid = (int)arg1; + uint64_t value = timer_sysctl_get(oid); + uint64_t new_value; + int error; + int changed; + + error = sysctl_io_number(req, value, sizeof(value), &new_value, &changed); + if (changed) + error = timer_sysctl_set(oid, new_value); + + return error; +} + +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, threshold, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + (void *) THRESHOLD, 0, sysctl_timer, "Q", ""); +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, qlen, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *) QCOUNT, 0, sysctl_timer, "Q", ""); +#if DEBUG +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, enqueues, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *) ENQUEUES, 0, sysctl_timer, "Q", ""); +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, dequeues, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *) DEQUEUES, 0, sysctl_timer, "Q", ""); +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, escalates, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *) ESCALATES, 0, sysctl_timer, "Q", ""); +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, scans, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *) SCANS, 0, sysctl_timer, "Q", ""); +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, preempts, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *) PREEMPTS, 0, sysctl_timer, "Q", ""); +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, latency, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *) LATENCY, 0, sysctl_timer, "Q", ""); +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, latency_min, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *) LATENCY_MIN, 0, sysctl_timer, "Q", ""); +SYSCTL_PROC(_kern_timer_longterm, OID_AUTO, latency_max, + CTLTYPE_QUAD | CTLFLAG_RD | CTLFLAG_LOCKED, + (void *) LATENCY_MAX, 0, sysctl_timer, "Q", ""); +#endif /* DEBUG */ + STATIC int sysctl_usrstack (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) @@ -2795,6 +2886,9 @@ sysctl_setthread_cpupercent uint8_t percent = 0; int ms_refill = 0; + if (!req->newptr) + return (0); + old_value = 0; if ((error = sysctl_io_number(req, old_value, sizeof(old_value), &new_value, NULL)) != 0) @@ -2808,14 +2902,14 @@ sysctl_setthread_cpupercent /* * If the caller is specifying a percentage of 0, this will unset the CPU limit, if present. */ - if ((kret = thread_set_cpulimit(THREAD_CPULIMIT_BLOCK, percent, ms_refill * NSEC_PER_MSEC)) != 0) + if ((kret = thread_set_cpulimit(THREAD_CPULIMIT_BLOCK, percent, ms_refill * (int)NSEC_PER_MSEC)) != 0) return (EIO); return (0); } SYSCTL_PROC(_kern, OID_AUTO, setthread_cpupercent, - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY, + CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_ANYBODY, 0, 0, sysctl_setthread_cpupercent, "I", "set thread cpu percentage limit"); @@ -2975,6 +3069,12 @@ sysctl_freeze_enabled SYSCTL_HANDLER_ARGS if (error || !req->newptr) return (error); + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + //assert(req->newptr); + printf("Failed this request to set the sysctl\n"); + return EINVAL; + } + /* * If freeze is being disabled, we need to move dirty pages out from the throttle to the active queue. */ @@ -3060,7 +3160,7 @@ fetch_process_cputype( else #endif { - ret = cpu_type(); + ret = cpu_type() & ~CPU_ARCH_MASK; if (IS_64BIT_PROCESS(p)) ret |= CPU_ARCH_ABI64; } @@ -3182,6 +3282,39 @@ SYSCTL_INT(_vm, OID_AUTO, vm_copy_src_not_symmetric, CTLFLAG_RD | CTLFLAG_LOCKED SYSCTL_INT(_vm, OID_AUTO, vm_copy_src_large, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_map_copy_overwrite_aligned_src_large, 0, ""); +extern uint32_t vm_page_external_count; +extern uint32_t vm_page_filecache_min; + +SYSCTL_INT(_vm, OID_AUTO, vm_page_external_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_external_count, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, vm_page_filecache_min, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_page_filecache_min, 0, ""); + +extern int vm_compressor_mode; +extern uint32_t swapout_target_age; +extern int64_t compressor_bytes_used; +extern uint32_t compressor_eval_period_in_msecs; +extern uint32_t compressor_sample_min_in_msecs; +extern uint32_t compressor_sample_max_in_msecs; +extern uint32_t compressor_thrashing_threshold_per_10msecs; +extern uint32_t compressor_thrashing_min_per_10msecs; +extern uint32_t vm_compressor_minorcompact_threshold_divisor; +extern uint32_t vm_compressor_majorcompact_threshold_divisor; +extern uint32_t vm_compressor_unthrottle_threshold_divisor; +extern uint32_t vm_compressor_catchup_threshold_divisor; + +SYSCTL_INT(_vm, OID_AUTO, compressor_mode, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_compressor_mode, 0, ""); +SYSCTL_QUAD(_vm, OID_AUTO, compressor_bytes_used, CTLFLAG_RD | CTLFLAG_LOCKED, &compressor_bytes_used, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_swapout_target_age, CTLFLAG_RD | CTLFLAG_LOCKED, &swapout_target_age, 0, ""); + +SYSCTL_INT(_vm, OID_AUTO, compressor_eval_period_in_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &compressor_eval_period_in_msecs, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_sample_min_in_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &compressor_sample_min_in_msecs, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_sample_max_in_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &compressor_sample_max_in_msecs, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_thrashing_threshold_per_10msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &compressor_thrashing_threshold_per_10msecs, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_thrashing_min_per_10msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &compressor_thrashing_min_per_10msecs, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_minorcompact_threshold_divisor, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_compressor_minorcompact_threshold_divisor, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_majorcompact_threshold_divisor, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_compressor_majorcompact_threshold_divisor, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_unthrottle_threshold_divisor, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_compressor_unthrottle_threshold_divisor, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, compressor_catchup_threshold_divisor, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_compressor_catchup_threshold_divisor, 0, ""); + /* * enable back trace events for thread blocks */ @@ -3228,11 +3361,3 @@ SYSCTL_STRING(_kern, OID_AUTO, sched, * Only support runtime modification on embedded platforms * with development config enabled */ -#if CONFIG_EMBEDDED -#if !SECURE_KERNEL -extern int precise_user_kernel_time; -SYSCTL_INT(_kern, OID_AUTO, precise_user_kernel_time, - CTLFLAG_RW | CTLFLAG_LOCKED, - &precise_user_kernel_time, 0, "Precise accounting of kernel vs. user time"); -#endif -#endif diff --git a/bsd/kern/kern_tests.c b/bsd/kern/kern_tests.c new file mode 100644 index 000000000..8499a3545 --- /dev/null +++ b/bsd/kern/kern_tests.c @@ -0,0 +1,48 @@ +/*************************************************************** + * Test Declarations Go Here * + ***************************************************************/ +#include +#include +#include +#include + +/*************************************************************** + * End Test Declarations * + ***************************************************************/ +typedef int (*xnu_test_func_t)(void); + +typedef struct xnu_test { + xnu_test_func_t t_func; + const char *t_name; +} xnu_test_t; + +#define DEFINE_XNU_TEST(func) { func, #func } + +xnu_test_t xnu_tests[] = { +}; + +#define NUM_XNU_TESTS (sizeof(xnu_tests) / sizeof(xnu_test_t)) + +static int +run_xnu_tests +(struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req) +{ + unsigned i; + int result; + + for (i = 0; i < NUM_XNU_TESTS; i++) { + result = xnu_tests[i].t_func(); + if (result == 0) { + kprintf("xnu_tests: %s passed.\n", xnu_tests[i].t_name); + } else{ + panic("xnu_tests: %s failed.\n", xnu_tests[i].t_name); + } + } + + return sysctl_handle_int(oidp, NULL, 0, req); +} + +SYSCTL_PROC(_kern, OID_AUTO, kern_tests, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, run_xnu_tests, "I", ""); + diff --git a/bsd/kern/kern_time.c b/bsd/kern/kern_time.c index 315863131..cfbd3c99f 100644 --- a/bsd/kern/kern_time.c +++ b/bsd/kern/kern_time.c @@ -80,6 +80,7 @@ #include #include #include +#include /* for net_uptime2timeval() */ #include #include @@ -99,7 +100,7 @@ lck_grp_attr_t *tz_slock_grp_attr; static void setthetime( struct timeval *tv); -void time_zone_slock_init(void) __attribute__((section("__TEXT, initcode"))); +void time_zone_slock_init(void); /* * Time of day and interval timer support. @@ -158,10 +159,8 @@ settimeofday(__unused struct proc *p, struct settimeofday_args *uap, __unused i if (error) return (error); #endif -#ifndef CONFIG_EMBEDDED if ((error = suser(kauth_cred_get(), &p->p_acflag))) return (error); -#endif /* Verify all parameters before changing time */ if (uap->tv) { if (IS_64BIT_PROCESS(p)) { @@ -432,7 +431,8 @@ setitimer(struct proc *p, struct setitimer_args *uap, int32_t *retval) microuptime(&p->p_rtime); timevaladd(&p->p_rtime, &aitv.it_value); p->p_realtimer = aitv; - if (!thread_call_enter_delayed(p->p_rcall, tvtoabstime(&p->p_rtime))) + if (!thread_call_enter_delayed_with_leeway(p->p_rcall, NULL, + tvtoabstime(&p->p_rtime), 0, THREAD_CALL_DELAY_USER_NORMAL)) p->p_ractive++; } else { timerclear(&p->p_rtime); @@ -654,6 +654,19 @@ microtime( tvp->tv_usec = tv_usec; } +void +microtime_with_abstime( + struct timeval *tvp, uint64_t *abstime) +{ + clock_sec_t tv_sec; + clock_usec_t tv_usec; + + clock_get_calendar_absolute_and_microtime(&tv_sec, &tv_usec, abstime); + + tvp->tv_sec = tv_sec; + tvp->tv_usec = tv_usec; +} + void microuptime( struct timeval *tvp) @@ -709,6 +722,86 @@ tvtoabstime( return (result + usresult); } + +#if NETWORKING +/* + * ratecheck(): simple time-based rate-limit checking. + */ +int +ratecheck(struct timeval *lasttime, const struct timeval *mininterval) +{ + struct timeval tv, delta; + int rv = 0; + + net_uptime2timeval(&tv); + delta = tv; + timevalsub(&delta, lasttime); + + /* + * check for 0,0 is so that the message will be seen at least once, + * even if interval is huge. + */ + if (timevalcmp(&delta, mininterval, >=) || + (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) { + *lasttime = tv; + rv = 1; + } + + return (rv); +} + +/* + * ppsratecheck(): packets (or events) per second limitation. + */ +int +ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps) +{ + struct timeval tv, delta; + int rv; + + net_uptime2timeval(&tv); + + timersub(&tv, lasttime, &delta); + + /* + * Check for 0,0 so that the message will be seen at least once. + * If more than one second has passed since the last update of + * lasttime, reset the counter. + * + * we do increment *curpps even in *curpps < maxpps case, as some may + * try to use *curpps for stat purposes as well. + */ + if ((lasttime->tv_sec == 0 && lasttime->tv_usec == 0) || + delta.tv_sec >= 1) { + *lasttime = tv; + *curpps = 0; + rv = 1; + } else if (maxpps < 0) + rv = 1; + else if (*curpps < maxpps) + rv = 1; + else + rv = 0; + +#if 1 /* DIAGNOSTIC? */ + /* be careful about wrap-around */ + if (*curpps + 1 > 0) + *curpps = *curpps + 1; +#else + /* + * assume that there's not too many calls to this function. + * not sure if the assumption holds, as it depends on *caller's* + * behavior, not the behavior of this function. + * IMHO it is wrong to make assumption on the caller's behavior, + * so the above #if is #if 1, not #ifdef DIAGNOSTIC. + */ + *curpps = *curpps + 1; +#endif + + return (rv); +} +#endif /* NETWORKING */ + void time_zone_slock_init(void) { diff --git a/bsd/kern/kpi_mbuf.c b/bsd/kern/kpi_mbuf.c index 259873728..d3cfc8deb 100644 --- a/bsd/kern/kpi_mbuf.c +++ b/bsd/kern/kpi_mbuf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2011 Apple Inc. All rights reserved. + * Copyright (c) 2004-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -38,15 +38,17 @@ #include #include #include +#include #include "net/net_str_id.h" +/* mbuf flags visible to KPI clients; do not add private flags here */ static const mbuf_flags_t mbuf_flags_mask = (MBUF_EXT | MBUF_PKTHDR | MBUF_EOR | MBUF_LOOP | MBUF_BCAST | MBUF_MCAST | MBUF_FRAG | MBUF_FIRSTFRAG | MBUF_LASTFRAG | MBUF_PROMISC | MBUF_HASFCS); -#define MBUF_PKTAUXF_MASK \ - (MBUF_PKTAUXF_INET_RESOLVE_RTR | MBUF_PKTAUXF_INET6_RESOLVE_RTR) +/* Unalterable mbuf flags */ +static const mbuf_flags_t mbuf_cflags_mask = (MBUF_EXT); void* mbuf_data(mbuf_t mbuf) { @@ -91,7 +93,7 @@ errno_t mbuf_align_32(mbuf_t mbuf, size_t len) */ addr64_t mbuf_data_to_physical(void* ptr) { - return (addr64_t)(uintptr_t)mcl_to_paddr(ptr); + return ((addr64_t)mcl_to_paddr(ptr)); } errno_t mbuf_get(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf) @@ -480,27 +482,61 @@ errno_t mbuf_settype(mbuf_t mbuf, mbuf_type_t new_type) return 0; } -mbuf_flags_t mbuf_flags(const mbuf_t mbuf) +mbuf_flags_t +mbuf_flags(const mbuf_t mbuf) { - return mbuf->m_flags & mbuf_flags_mask; + return (mbuf->m_flags & mbuf_flags_mask); } -errno_t mbuf_setflags(mbuf_t mbuf, mbuf_flags_t flags) +errno_t +mbuf_setflags(mbuf_t mbuf, mbuf_flags_t flags) { - if ((flags & ~mbuf_flags_mask) != 0) return EINVAL; - mbuf->m_flags = flags | - (mbuf->m_flags & ~mbuf_flags_mask); - - return 0; + errno_t ret = 0; + + if ((flags | (mbuf->m_flags & mbuf_flags_mask)) & + (~mbuf_flags_mask | mbuf_cflags_mask)) { + ret = EINVAL; + } else { + mbuf_flags_t oflags = mbuf->m_flags; + mbuf->m_flags = flags | (mbuf->m_flags & ~mbuf_flags_mask); + /* + * If M_PKTHDR bit has changed, we have work to do; + * m_reinit() will take care of setting/clearing the + * bit, as well as the rest of bookkeeping. + */ + if ((oflags ^ mbuf->m_flags) & M_PKTHDR) { + mbuf->m_flags ^= M_PKTHDR; /* restore */ + ret = m_reinit(mbuf, + (mbuf->m_flags & M_PKTHDR) ? 0 : 1); + } + } + + return (ret); } -errno_t mbuf_setflags_mask(mbuf_t mbuf, mbuf_flags_t flags, mbuf_flags_t mask) +errno_t +mbuf_setflags_mask(mbuf_t mbuf, mbuf_flags_t flags, mbuf_flags_t mask) { - if (((flags | mask) & ~mbuf_flags_mask) != 0) return EINVAL; - - mbuf->m_flags = (flags & mask) | (mbuf->m_flags & ~mask); - - return 0; + errno_t ret = 0; + + if ((flags | mask) & (~mbuf_flags_mask | mbuf_cflags_mask)) { + ret = EINVAL; + } else { + mbuf_flags_t oflags = mbuf->m_flags; + mbuf->m_flags = (flags & mask) | (mbuf->m_flags & ~mask); + /* + * If M_PKTHDR bit has changed, we have work to do; + * m_reinit() will take care of setting/clearing the + * bit, as well as the rest of bookkeeping. + */ + if ((oflags ^ mbuf->m_flags) & M_PKTHDR) { + mbuf->m_flags ^= M_PKTHDR; /* restore */ + ret = m_reinit(mbuf, + (mbuf->m_flags & M_PKTHDR) ? 0 : 1); + } + } + + return (ret); } errno_t mbuf_copy_pkthdr(mbuf_t dest, const mbuf_t src) @@ -543,12 +579,12 @@ errno_t mbuf_pkthdr_setrcvif(mbuf_t mbuf, ifnet_t ifnet) void* mbuf_pkthdr_header(const mbuf_t mbuf) { - return mbuf->m_pkthdr.header; + return mbuf->m_pkthdr.pkt_hdr; } void mbuf_pkthdr_setheader(mbuf_t mbuf, void *header) { - mbuf->m_pkthdr.header = (void*)header; + mbuf->m_pkthdr.pkt_hdr = (void*)header; } void @@ -558,59 +594,28 @@ mbuf_inbound_modified(mbuf_t mbuf) mbuf->m_pkthdr.csum_flags = 0; } -extern void in_cksum_offset(struct mbuf* m, size_t ip_offset); -extern void in_delayed_cksum_offset(struct mbuf *m, int ip_offset); - void -mbuf_outbound_finalize(mbuf_t mbuf, u_int32_t protocol_family, size_t protocol_offset) +mbuf_outbound_finalize(struct mbuf *m, u_int32_t pf, size_t o) { - if ((mbuf->m_pkthdr.csum_flags & - (CSUM_DELAY_DATA | CSUM_DELAY_IP | CSUM_TCP_SUM16 | CSUM_DELAY_IPV6_DATA)) == 0) - return; - /* Generate the packet in software, client needs it */ - switch (protocol_family) { - case PF_INET: - if (mbuf->m_pkthdr.csum_flags & CSUM_TCP_SUM16) { - /* - * If you're wondering where this lovely code comes - * from, we're trying to undo what happens in ip_output. - * Look for CSUM_TCP_SUM16 in ip_output. - */ - u_int16_t first, second; - mbuf->m_pkthdr.csum_flags &= ~CSUM_TCP_SUM16; - mbuf->m_pkthdr.csum_flags |= CSUM_TCP; - first = mbuf->m_pkthdr.csum_data >> 16; - second = mbuf->m_pkthdr.csum_data & 0xffff; - mbuf->m_pkthdr.csum_data = first - second; - } - if (mbuf->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - in_delayed_cksum_offset(mbuf, protocol_offset); - } - - if (mbuf->m_pkthdr.csum_flags & CSUM_DELAY_IP) { - in_cksum_offset(mbuf, protocol_offset); - } - - mbuf->m_pkthdr.csum_flags &= ~(CSUM_DELAY_DATA | CSUM_DELAY_IP); - break; + switch (pf) { + case PF_INET: + (void) in_finalize_cksum(m, o, m->m_pkthdr.csum_flags); + break; - case PF_INET6: + case PF_INET6: +#if INET6 + /* + * Checksum offload should not have been enabled when + * extension headers exist; indicate that the callee + * should skip such case by setting optlen to -1. + */ + (void) in6_finalize_cksum(m, o, -1, -1, m->m_pkthdr.csum_flags); +#endif /* INET6 */ + break; - if (mbuf->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) { - in_delayed_cksum_offset(mbuf, protocol_offset); - } - mbuf->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA; - break; - - - default: - /* - * Not sure what to do here if anything. - * Hardware checksum code looked pretty IPv4/IPv6 specific. - */ - if ((mbuf->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_IP | CSUM_DELAY_IPV6_DATA)) != 0) - panic("mbuf_outbound_finalize - CSUM flags set for non-IPv4 or IPv6 packet (%u)!\n", protocol_family); + default: + break; } } @@ -650,7 +655,7 @@ mbuf_clear_vlan_tag( static const mbuf_csum_request_flags_t mbuf_valid_csum_request_flags = MBUF_CSUM_REQ_IP | MBUF_CSUM_REQ_TCP | MBUF_CSUM_REQ_UDP | - MBUF_CSUM_REQ_SUM16 | MBUF_CSUM_REQ_TCPIPV6 | MBUF_CSUM_REQ_UDPIPV6; + MBUF_CSUM_PARTIAL | MBUF_CSUM_REQ_TCPIPV6 | MBUF_CSUM_REQ_UDPIPV6; errno_t mbuf_set_csum_requested( @@ -713,7 +718,7 @@ mbuf_clear_csum_requested( static const mbuf_csum_performed_flags_t mbuf_valid_csum_performed_flags = MBUF_CSUM_DID_IP | MBUF_CSUM_IP_GOOD | MBUF_CSUM_DID_DATA | - MBUF_CSUM_PSEUDO_HDR | MBUF_CSUM_TCP_SUM16; + MBUF_CSUM_PSEUDO_HDR | MBUF_CSUM_PARTIAL; errno_t mbuf_set_csum_performed( @@ -930,6 +935,101 @@ mbuf_tag_free( return; } +/* + * Maximum length of driver auxiliary data; keep this small to + * fit in a single mbuf to avoid wasting memory, rounded down to + * the nearest 64-bit boundary. This takes into account mbuf + * tag-related (m_taghdr + m_tag) as well m_drvaux_tag structs. + */ +#define MBUF_DRVAUX_MAXLEN \ + P2ROUNDDOWN(MLEN - sizeof (struct m_taghdr) - \ + M_TAG_ALIGN(sizeof (struct m_drvaux_tag)), sizeof (uint64_t)) + +errno_t +mbuf_add_drvaux(mbuf_t mbuf, mbuf_how_t how, u_int32_t family, + u_int32_t subfamily, size_t length, void **data_p) +{ + struct m_drvaux_tag *p; + struct m_tag *tag; + + if (mbuf == NULL || !(mbuf->m_flags & M_PKTHDR) || + length == 0 || length > MBUF_DRVAUX_MAXLEN) + return (EINVAL); + + if (data_p != NULL) + *data_p = NULL; + + /* Check if one is already associated */ + if ((tag = m_tag_locate(mbuf, KERNEL_MODULE_TAG_ID, + KERNEL_TAG_TYPE_DRVAUX, NULL)) != NULL) + return (EEXIST); + + /* Tag is (m_drvaux_tag + module specific data) */ + if ((tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DRVAUX, + sizeof (*p) + length, how, mbuf)) == NULL) + return ((how == MBUF_WAITOK) ? ENOMEM : EWOULDBLOCK); + + p = (struct m_drvaux_tag *)(tag + 1); + p->da_family = family; + p->da_subfamily = subfamily; + p->da_length = length; + + /* Associate the tag */ + m_tag_prepend(mbuf, tag); + + if (data_p != NULL) + *data_p = (p + 1); + + return (0); +} + +errno_t +mbuf_find_drvaux(mbuf_t mbuf, u_int32_t *family_p, u_int32_t *subfamily_p, + u_int32_t *length_p, void **data_p) +{ + struct m_drvaux_tag *p; + struct m_tag *tag; + + if (mbuf == NULL || !(mbuf->m_flags & M_PKTHDR) || data_p == NULL) + return (EINVAL); + + *data_p = NULL; + + if ((tag = m_tag_locate(mbuf, KERNEL_MODULE_TAG_ID, + KERNEL_TAG_TYPE_DRVAUX, NULL)) == NULL) + return (ENOENT); + + /* Must be at least size of m_drvaux_tag */ + VERIFY(tag->m_tag_len >= sizeof (*p)); + + p = (struct m_drvaux_tag *)(tag + 1); + VERIFY(p->da_length > 0 && p->da_length <= MBUF_DRVAUX_MAXLEN); + + if (family_p != NULL) + *family_p = p->da_family; + if (subfamily_p != NULL) + *subfamily_p = p->da_subfamily; + if (length_p != NULL) + *length_p = p->da_length; + + *data_p = (p + 1); + + return (0); +} + +void +mbuf_del_drvaux(mbuf_t mbuf) +{ + struct m_tag *tag; + + if (mbuf == NULL || !(mbuf->m_flags & M_PKTHDR)) + return; + + if ((tag = m_tag_locate(mbuf, KERNEL_MODULE_TAG_ID, + KERNEL_TAG_TYPE_DRVAUX, NULL)) != NULL) + m_tag_delete(mbuf, tag); +} + /* mbuf stats */ void mbuf_stats(struct mbuf_stat *stats) { @@ -1109,6 +1209,22 @@ mbuf_get_minclsize(void) return (MHLEN + MLEN); } +u_int32_t +mbuf_get_traffic_class_max_count(void) +{ + return (MBUF_TC_MAX); +} + +errno_t +mbuf_get_traffic_class_index(mbuf_traffic_class_t tc, u_int32_t *index) +{ + if (index == NULL || (u_int32_t)tc >= MBUF_TC_MAX) + return (EINVAL); + + *index = MBUF_SCIDX(m_service_class_from_val(MBUF_TC2SCVAL(tc))); + return (0); +} + mbuf_traffic_class_t mbuf_get_traffic_class(mbuf_t m) { @@ -1132,10 +1248,26 @@ int mbuf_is_traffic_class_privileged(mbuf_t m) { if (m == NULL || !(m->m_flags & M_PKTHDR) || - !MBUF_VALID_SC(m->m_pkthdr.svc)) + !MBUF_VALID_SC(m->m_pkthdr.pkt_svc)) return (0); - return (m->m_pkthdr.aux_flags & MAUXF_PRIO_PRIVILEGED); + return ((m->m_pkthdr.pkt_flags & PKTF_PRIO_PRIVILEGED) ? 1 : 0); +} + +u_int32_t +mbuf_get_service_class_max_count(void) +{ + return (MBUF_SC_MAX_CLASSES); +} + +errno_t +mbuf_get_service_class_index(mbuf_svc_class_t sc, u_int32_t *index) +{ + if (index == NULL || !MBUF_VALID_SC(sc)) + return (EINVAL); + + *index = MBUF_SCIDX(sc); + return (0); } mbuf_svc_class_t @@ -1160,16 +1292,34 @@ errno_t mbuf_pkthdr_aux_flags(mbuf_t m, mbuf_pkthdr_aux_flags_t *flagsp) { u_int32_t flags; + if (m == NULL || !(m->m_flags & M_PKTHDR) || flagsp == NULL) return (EINVAL); - flags = m->m_pkthdr.aux_flags & MBUF_PKTAUXF_MASK; + *flagsp = 0; + flags = m->m_pkthdr.pkt_flags; + if ((flags & (PKTF_INET_RESOLVE|PKTF_RESOLVE_RTR)) == + (PKTF_INET_RESOLVE|PKTF_RESOLVE_RTR)) + *flagsp |= MBUF_PKTAUXF_INET_RESOLVE_RTR; + if ((flags & (PKTF_INET6_RESOLVE|PKTF_RESOLVE_RTR)) == + (PKTF_INET6_RESOLVE|PKTF_RESOLVE_RTR)) + *flagsp |= MBUF_PKTAUXF_INET6_RESOLVE_RTR; /* These 2 flags are mutually exclusive */ - VERIFY((flags & + VERIFY((*flagsp & (MBUF_PKTAUXF_INET_RESOLVE_RTR | MBUF_PKTAUXF_INET6_RESOLVE_RTR)) != (MBUF_PKTAUXF_INET_RESOLVE_RTR | MBUF_PKTAUXF_INET6_RESOLVE_RTR)); - *flagsp = flags; + return (0); +} + +errno_t +mbuf_get_driver_scratch(mbuf_t m, u_int8_t **area, size_t *area_len) +{ + if (m == NULL || area == NULL || area_len == NULL || + !(m->m_flags & M_PKTHDR)) + return (EINVAL); + + *area_len = m_scratch_get(m, area); return (0); } diff --git a/bsd/kern/kpi_socket.c b/bsd/kern/kpi_socket.c index 3de525cbe..1db81b353 100644 --- a/bsd/kern/kpi_socket.c +++ b/bsd/kern/kpi_socket.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2003-2011 Apple Inc. All rights reserved. + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,11 +22,11 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#define __KPI__ +#define __KPI__ #include #include #include @@ -46,79 +46,66 @@ #include #include -extern int soclose_locked(struct socket *so); -extern void soclose_wait_locked(struct socket *so); -extern int so_isdstlocal(struct socket *so); - -errno_t sock_send_internal( - socket_t sock, - const struct msghdr *msg, - mbuf_t data, - int flags, - size_t *sentlen); - -typedef void (*so_upcall)(struct socket *, caddr_t , int ); - +static errno_t sock_send_internal(socket_t, const struct msghdr *, + mbuf_t, int, size_t *); +static void sock_setupcalls_common(socket_t, sock_upcall, void *, + sock_upcall, void *); errno_t -sock_accept( - socket_t sock, - struct sockaddr *from, - int fromlen, - int flags, - sock_upcall callback, - void* cookie, - socket_t *new_sock) +sock_accept(socket_t sock, struct sockaddr *from, int fromlen, int flags, + sock_upcall callback, void *cookie, socket_t *new_sock) { struct sockaddr *sa; struct socket *new_so; lck_mtx_t *mutex_held; - int dosocklock; + int dosocklock; errno_t error = 0; - - if (sock == NULL || new_sock == NULL) return EINVAL; + + if (sock == NULL || new_sock == NULL) + return (EINVAL); + socket_lock(sock, 1); if ((sock->so_options & SO_ACCEPTCONN) == 0) { socket_unlock(sock, 1); - return EINVAL; + return (EINVAL); } if ((flags & ~(MSG_DONTWAIT)) != 0) { socket_unlock(sock, 1); - return ENOTSUP; + return (ENOTSUP); } if (((flags & MSG_DONTWAIT) != 0 || (sock->so_state & SS_NBIO) != 0) && - sock->so_comp.tqh_first == NULL) { + sock->so_comp.tqh_first == NULL) { socket_unlock(sock, 1); - return EWOULDBLOCK; + return (EWOULDBLOCK); } if (sock->so_proto->pr_getlock != NULL) { mutex_held = (*sock->so_proto->pr_getlock)(sock, 0); dosocklock = 1; - } - else { + } else { mutex_held = sock->so_proto->pr_domain->dom_mtx; dosocklock = 0; } - + while (TAILQ_EMPTY(&sock->so_comp) && sock->so_error == 0) { if (sock->so_state & SS_CANTRCVMORE) { sock->so_error = ECONNABORTED; break; } - error = msleep((caddr_t)&sock->so_timeo, mutex_held, PSOCK | PCATCH, "sock_accept", NULL); - if (error) { + error = msleep((caddr_t)&sock->so_timeo, mutex_held, + PSOCK | PCATCH, "sock_accept", NULL); + if (error != 0) { socket_unlock(sock, 1); return (error); } } - if (sock->so_error) { + if (sock->so_error != 0) { error = sock->so_error; sock->so_error = 0; socket_unlock(sock, 1); return (error); } - + new_so = TAILQ_FIRST(&sock->so_comp); TAILQ_REMOVE(&sock->so_comp, new_so, so_list); sock->so_qlen--; @@ -145,31 +132,28 @@ sock_accept( if (dosocklock) { lck_mtx_assert(new_so->so_proto->pr_getlock(new_so, 0), - LCK_MTX_ASSERT_NOTOWNED); + LCK_MTX_ASSERT_NOTOWNED); socket_lock(new_so, 1); } - + new_so->so_state &= ~SS_COMP; new_so->so_head = NULL; (void) soacceptlock(new_so, &sa, 0); - + socket_unlock(sock, 1); /* release the head */ - if (callback) { - new_so->so_upcall = (so_upcall) callback; - new_so->so_upcallarg = cookie; - new_so->so_rcv.sb_flags |= SB_UPCALL; -#if CONFIG_SOWUPCALL - new_so->so_snd.sb_flags |= SB_UPCALL; -#endif + /* see comments in sock_setupcall() */ + if (callback != NULL) { + sock_setupcalls_common(new_so, callback, cookie, NULL, NULL); } - - if (sa && from) - { - if (fromlen > sa->sa_len) fromlen = sa->sa_len; + + if (sa != NULL && from != NULL) { + if (fromlen > sa->sa_len) + fromlen = sa->sa_len; memcpy(from, sa, fromlen); } - if (sa) FREE(sa, M_SONAME); + if (sa != NULL) + FREE(sa, M_SONAME); /* * If the socket has been marked as inactive by sosetdefunct(), @@ -180,61 +164,57 @@ sock_accept( SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL); } *new_sock = new_so; - if (dosocklock) + if (dosocklock) socket_unlock(new_so, 1); - return error; + return (error); } errno_t -sock_bind( - socket_t sock, - const struct sockaddr *to) +sock_bind(socket_t sock, const struct sockaddr *to) { - int error = 0; + int error = 0; struct sockaddr *sa = NULL; struct sockaddr_storage ss; boolean_t want_free = TRUE; - if (sock == NULL || to == NULL) - return EINVAL; - - if (to->sa_len > sizeof(ss)) { + if (sock == NULL || to == NULL) + return (EINVAL); + + if (to->sa_len > sizeof (ss)) { MALLOC(sa, struct sockaddr *, to->sa_len, M_SONAME, M_WAITOK); if (sa == NULL) - return ENOBUFS; + return (ENOBUFS); } else { sa = (struct sockaddr *)&ss; want_free = FALSE; } memcpy(sa, to, to->sa_len); - error = sobind(sock, sa); - + error = sobindlock(sock, sa, 1); /* will lock socket */ + if (sa != NULL && want_free == TRUE) - FREE(sa, M_SONAME); + FREE(sa, M_SONAME); - return error; + return (error); } errno_t -sock_connect( - socket_t sock, - const struct sockaddr *to, - int flags) +sock_connect(socket_t sock, const struct sockaddr *to, int flags) { - int error = 0; + int error = 0; lck_mtx_t *mutex_held; struct sockaddr *sa = NULL; struct sockaddr_storage ss; boolean_t want_free = TRUE; - - if (sock == NULL || to == NULL) return EINVAL; - - if (to->sa_len > sizeof(ss)) { + + if (sock == NULL || to == NULL) + return (EINVAL); + + if (to->sa_len > sizeof (ss)) { MALLOC(sa, struct sockaddr *, to->sa_len, M_SONAME, - (flags & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK); + (flags & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK); if (sa == NULL) - return ENOBUFS; + return (ENOBUFS); } else { sa = (struct sockaddr *)&ss; want_free = FALSE; @@ -244,37 +224,37 @@ sock_connect( socket_lock(sock, 1); if ((sock->so_state & SS_ISCONNECTING) && - ((sock->so_state & SS_NBIO) != 0 || - (flags & MSG_DONTWAIT) != 0)) { + ((sock->so_state & SS_NBIO) != 0 || (flags & MSG_DONTWAIT) != 0)) { error = EALREADY; goto out; } error = soconnectlock(sock, sa, 0); if (!error) { if ((sock->so_state & SS_ISCONNECTING) && - ((sock->so_state & SS_NBIO) != 0 || (flags & MSG_DONTWAIT) != 0)) { + ((sock->so_state & SS_NBIO) != 0 || + (flags & MSG_DONTWAIT) != 0)) { error = EINPROGRESS; goto out; } - - if (sock->so_proto->pr_getlock != NULL) + + if (sock->so_proto->pr_getlock != NULL) mutex_held = (*sock->so_proto->pr_getlock)(sock, 0); - else - mutex_held = sock->so_proto->pr_domain->dom_mtx; + else + mutex_held = sock->so_proto->pr_domain->dom_mtx; - while ((sock->so_state & SS_ISCONNECTING) && sock->so_error == 0) { - error = msleep((caddr_t)&sock->so_timeo, mutex_held, PSOCK | PCATCH, - "sock_connect", NULL); - if (error) + while ((sock->so_state & SS_ISCONNECTING) && + sock->so_error == 0) { + error = msleep((caddr_t)&sock->so_timeo, + mutex_held, PSOCK | PCATCH, "sock_connect", NULL); + if (error != 0) break; } - + if (error == 0) { error = sock->so_error; sock->so_error = 0; } - } - else { + } else { sock->so_state &= ~SS_ISCONNECTING; } out: @@ -282,28 +262,25 @@ out: if (sa != NULL && want_free == TRUE) FREE(sa, M_SONAME); - - return error; + + return (error); } errno_t -sock_connectwait( - socket_t sock, - const struct timeval *tv) +sock_connectwait(socket_t sock, const struct timeval *tv) { - lck_mtx_t * mutex_held; + lck_mtx_t *mutex_held; errno_t retval = 0; struct timespec ts; - + socket_lock(sock, 1); - - // Check if we're already connected or if we've already errored out - if ((sock->so_state & SS_ISCONNECTING) == 0 || sock->so_error) { - if (sock->so_error) { + + /* Check if we're already connected or if we've already errored out */ + if ((sock->so_state & SS_ISCONNECTING) == 0 || sock->so_error != 0) { + if (sock->so_error != 0) { retval = sock->so_error; sock->so_error = 0; - } - else { + } else { if ((sock->so_state & SS_ISCONNECTED) != 0) retval = 0; else @@ -311,63 +288,61 @@ sock_connectwait( } goto done; } - - // copied translation from timeval to hertz from SO_RCVTIMEO handling + + /* copied translation from timeval to hertz from SO_RCVTIMEO handling */ if (tv->tv_sec < 0 || tv->tv_sec > SHRT_MAX / hz || - tv->tv_usec < 0 || tv->tv_usec >= 1000000) { + tv->tv_usec < 0 || tv->tv_usec >= 1000000) { retval = EDOM; goto done; } - + ts.tv_sec = tv->tv_sec; - ts.tv_nsec = (tv->tv_usec * NSEC_PER_USEC); - if ( (ts.tv_sec + (ts.tv_nsec/NSEC_PER_SEC))/100 > SHRT_MAX) { + ts.tv_nsec = (tv->tv_usec * (integer_t)NSEC_PER_USEC); + if ((ts.tv_sec + (ts.tv_nsec/(long)NSEC_PER_SEC))/100 > SHRT_MAX) { retval = EDOM; goto done; } - - if (sock->so_proto->pr_getlock != NULL) + + if (sock->so_proto->pr_getlock != NULL) mutex_held = (*sock->so_proto->pr_getlock)(sock, 0); - else - mutex_held = sock->so_proto->pr_domain->dom_mtx; + else + mutex_held = sock->so_proto->pr_domain->dom_mtx; + + msleep((caddr_t)&sock->so_timeo, mutex_held, + PSOCK, "sock_connectwait", &ts); - msleep((caddr_t)&sock->so_timeo, mutex_held, PSOCK, "sock_connectwait", &ts); - - // Check if we're still waiting to connect + /* Check if we're still waiting to connect */ if ((sock->so_state & SS_ISCONNECTING) && sock->so_error == 0) { retval = EINPROGRESS; goto done; } - - if (sock->so_error) { + + if (sock->so_error != 0) { retval = sock->so_error; sock->so_error = 0; } - + done: socket_unlock(sock, 1); - return retval; + return (retval); } errno_t -sock_nointerrupt( - socket_t sock, - int on) +sock_nointerrupt(socket_t sock, int on) { socket_lock(sock, 1); if (on) { - sock->so_rcv.sb_flags |= SB_NOINTR; // This isn't safe - sock->so_snd.sb_flags |= SB_NOINTR; // This isn't safe - } - else { - sock->so_rcv.sb_flags &= ~SB_NOINTR; // This isn't safe - sock->so_snd.sb_flags &= ~SB_NOINTR; // This isn't safe + sock->so_rcv.sb_flags |= SB_NOINTR; /* This isn't safe */ + sock->so_snd.sb_flags |= SB_NOINTR; /* This isn't safe */ + } else { + sock->so_rcv.sb_flags &= ~SB_NOINTR; /* This isn't safe */ + sock->so_snd.sb_flags &= ~SB_NOINTR; /* This isn't safe */ } socket_unlock(sock, 1); - return 0; + return (0); } errno_t @@ -448,7 +423,7 @@ sock_getaddr(socket_t sock, struct sockaddr **psa, int peer) socket_lock(sock, 1); error = sogetaddr_locked(sock, psa, peer); socket_unlock(sock, 1); - + return (error); } @@ -460,59 +435,54 @@ sock_freeaddr(struct sockaddr *sa) } errno_t -sock_getsockopt( - socket_t sock, - int level, - int optname, - void *optval, - int *optlen) +sock_getsockopt(socket_t sock, int level, int optname, void *optval, + int *optlen) { - int error = 0; + int error = 0; struct sockopt sopt; - - if (sock == NULL || optval == NULL || optlen == NULL) return EINVAL; + + if (sock == NULL || optval == NULL || optlen == NULL) + return (EINVAL); + sopt.sopt_dir = SOPT_GET; sopt.sopt_level = level; sopt.sopt_name = optname; - sopt.sopt_val = CAST_USER_ADDR_T(optval); + sopt.sopt_val = CAST_USER_ADDR_T(optval); sopt.sopt_valsize = *optlen; sopt.sopt_p = kernproc; - error = sogetopt(sock, &sopt); /* will lock socket */ - if (error == 0) *optlen = sopt.sopt_valsize; - return error; + error = sogetoptlock(sock, &sopt, 1); /* will lock socket */ + if (error == 0) + *optlen = sopt.sopt_valsize; + return (error); } errno_t -sock_ioctl( - socket_t sock, - unsigned long request, - void *argp) +sock_ioctl(socket_t sock, unsigned long request, void *argp) { - return soioctl(sock, request, argp, kernproc); /* will lock socket */ + return (soioctl(sock, request, argp, kernproc)); /* will lock socket */ } errno_t -sock_setsockopt( - socket_t sock, - int level, - int optname, - const void *optval, - int optlen) +sock_setsockopt(socket_t sock, int level, int optname, const void *optval, + int optlen) { struct sockopt sopt; - - if (sock == NULL || optval == NULL) return EINVAL; + + if (sock == NULL || optval == NULL) + return (EINVAL); + sopt.sopt_dir = SOPT_SET; sopt.sopt_level = level; sopt.sopt_name = optname; sopt.sopt_val = CAST_USER_ADDR_T(optval); sopt.sopt_valsize = optlen; sopt.sopt_p = kernproc; - return sosetopt(sock, &sopt); /* will lock socket */ + return (sosetoptlock(sock, &sopt, 1)); /* will lock socket */ } /* - * This follows the recommended mappings between DSCP code points and WMM access classes + * This follows the recommended mappings between DSCP code points + * and WMM access classes. */ static u_int32_t so_tc_from_dscp(u_int8_t dscp); static u_int32_t @@ -533,20 +503,19 @@ so_tc_from_dscp(u_int8_t dscp) } errno_t -sock_settclassopt( - socket_t sock, - const void *optval, - size_t optlen) { - +sock_settclassopt(socket_t sock, const void *optval, size_t optlen) +{ errno_t error = 0; struct sockopt sopt; int sotc; - if (sock == NULL || optval == NULL || optlen != sizeof(int)) return EINVAL; + if (sock == NULL || optval == NULL || optlen != sizeof (int)) + return (EINVAL); socket_lock(sock, 1); if (!(sock->so_state & SS_ISCONNECTED)) { - /* If the socket is not connected then we don't know + /* + * If the socket is not connected then we don't know * if the destination is on LAN or not. Skip * setting traffic class in this case */ @@ -554,7 +523,8 @@ sock_settclassopt( goto out; } - if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL || sock->so_pcb == NULL) { + if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL || + sock->so_pcb == NULL) { error = EINVAL; goto out; } @@ -567,39 +537,38 @@ sock_settclassopt( sopt.sopt_dir = SOPT_SET; sopt.sopt_val = CAST_USER_ADDR_T(&sotc); - sopt.sopt_valsize = sizeof(sotc); + sopt.sopt_valsize = sizeof (sotc); sopt.sopt_p = kernproc; sopt.sopt_level = SOL_SOCKET; sopt.sopt_name = SO_TRAFFIC_CLASS; - socket_unlock(sock, 0); - error = sosetopt(sock, &sopt); - socket_lock(sock, 0); + error = sosetoptlock(sock, &sopt, 0); /* already locked */ if (error != 0) { - printf("sock_settclassopt: sosetopt SO_TRAFFIC_CLASS failed %d\n", error); + printf("%s: sosetopt SO_TRAFFIC_CLASS failed %d\n", + __func__, error); goto out; } - /* Check if the destination address is LAN or link local address. + /* + * Check if the destination address is LAN or link local address. * We do not want to set traffic class bits if the destination - * is not local - */ - if (!so_isdstlocal(sock)) { + * is not local. + */ + if (!so_isdstlocal(sock)) goto out; - } sopt.sopt_dir = SOPT_SET; sopt.sopt_val = CAST_USER_ADDR_T(optval); sopt.sopt_valsize = optlen; sopt.sopt_p = kernproc; - switch (sock->so_proto->pr_domain->dom_family) { - case AF_INET: + switch (SOCK_DOM(sock)) { + case PF_INET: sopt.sopt_level = IPPROTO_IP; sopt.sopt_name = IP_TOS; break; - case AF_INET6: + case PF_INET6: sopt.sopt_level = IPPROTO_IPV6; sopt.sopt_name = IPV6_TCLASS; break; @@ -607,140 +576,135 @@ sock_settclassopt( error = EINVAL; goto out; } - + + error = sosetoptlock(sock, &sopt, 0); /* already locked */ socket_unlock(sock, 1); - return sosetopt(sock, &sopt); + return (error); out: socket_unlock(sock, 1); - return error; + return (error); } errno_t -sock_gettclassopt( - socket_t sock, - void *optval, - size_t *optlen) { +sock_gettclassopt(socket_t sock, void *optval, size_t *optlen) +{ + errno_t error = 0; + struct sockopt sopt; - errno_t error = 0; - struct sockopt sopt; - - if (sock == NULL || optval == NULL || optlen == NULL) return EINVAL; + if (sock == NULL || optval == NULL || optlen == NULL) + return (EINVAL); sopt.sopt_dir = SOPT_GET; - sopt.sopt_val = CAST_USER_ADDR_T(optval); + sopt.sopt_val = CAST_USER_ADDR_T(optval); sopt.sopt_valsize = *optlen; sopt.sopt_p = kernproc; socket_lock(sock, 1); if (sock->so_proto == NULL || sock->so_proto->pr_domain == NULL) { socket_unlock(sock, 1); - return EINVAL; + return (EINVAL); } - switch (sock->so_proto->pr_domain->dom_family) { - case AF_INET: + switch (SOCK_DOM(sock)) { + case PF_INET: sopt.sopt_level = IPPROTO_IP; sopt.sopt_name = IP_TOS; break; - case AF_INET6: + case PF_INET6: sopt.sopt_level = IPPROTO_IPV6; sopt.sopt_name = IPV6_TCLASS; break; default: socket_unlock(sock, 1); - return EINVAL; + return (EINVAL); } + error = sogetoptlock(sock, &sopt, 0); /* already locked */ socket_unlock(sock, 1); - error = sogetopt(sock, &sopt); /* will lock socket */ - if (error == 0) *optlen = sopt.sopt_valsize; - return error; + if (error == 0) + *optlen = sopt.sopt_valsize; + return (error); } errno_t -sock_listen( - socket_t sock, - int backlog) +sock_listen(socket_t sock, int backlog) { - if (sock == NULL) return EINVAL; - return solisten(sock, backlog); /* will lock socket */ + if (sock == NULL) + return (EINVAL); + + return (solisten(sock, backlog)); /* will lock socket */ } -static errno_t -sock_receive_internal( - socket_t sock, - struct msghdr *msg, - mbuf_t *data, - int flags, - size_t *recvdlen) +errno_t +sock_receive_internal(socket_t sock, struct msghdr *msg, mbuf_t *data, + int flags, size_t *recvdlen) { - uio_t auio; - struct mbuf *control = NULL; - int error = 0; - int length = 0; - struct sockaddr *fromsa; - char uio_buf[ UIO_SIZEOF((msg != NULL) ? msg->msg_iovlen : 0) ]; - - if (sock == NULL) return EINVAL; - - auio = uio_createwithbuffer(((msg != NULL) ? msg->msg_iovlen : 0), - 0, UIO_SYSSPACE, UIO_READ, - &uio_buf[0], sizeof(uio_buf)); - if (msg && data == NULL) { + uio_t auio; + struct mbuf *control = NULL; + int error = 0; + int length = 0; + struct sockaddr *fromsa = NULL; + char uio_buf[ UIO_SIZEOF((msg != NULL) ? msg->msg_iovlen : 0) ]; + + if (sock == NULL) + return (EINVAL); + + auio = uio_createwithbuffer(((msg != NULL) ? msg->msg_iovlen : 0), + 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof (uio_buf)); + if (msg != NULL && data == NULL) { int i; struct iovec *tempp = msg->msg_iov; - + for (i = 0; i < msg->msg_iovlen; i++) { - uio_addiov(auio, CAST_USER_ADDR_T((tempp + i)->iov_base), (tempp + i)->iov_len); + uio_addiov(auio, + CAST_USER_ADDR_T((tempp + i)->iov_base), + (tempp + i)->iov_len); } - if (uio_resid(auio) < 0) return EINVAL; - } - else { + if (uio_resid(auio) < 0) + return (EINVAL); + } else if (recvdlen != NULL) { uio_setresid(auio, (uio_resid(auio) + *recvdlen)); } length = uio_resid(auio); - - if (recvdlen) + + if (recvdlen != NULL) *recvdlen = 0; - /* let pru_soreceive handle the socket locking */ + /* let pru_soreceive handle the socket locking */ error = sock->so_proto->pr_usrreqs->pru_soreceive(sock, &fromsa, auio, data, (msg && msg->msg_control) ? &control : NULL, &flags); - if (error) goto cleanup; - - if (recvdlen) + if (error != 0) + goto cleanup; + + if (recvdlen != NULL) *recvdlen = length - uio_resid(auio); - if (msg) { + if (msg != NULL) { msg->msg_flags = flags; - - if (msg->msg_name) - { + + if (msg->msg_name != NULL) { int salen; salen = msg->msg_namelen; - if (msg->msg_namelen > 0 && fromsa != 0) - { + if (msg->msg_namelen > 0 && fromsa != NULL) { salen = MIN(salen, fromsa->sa_len); memcpy(msg->msg_name, fromsa, - msg->msg_namelen > fromsa->sa_len ? fromsa->sa_len : msg->msg_namelen); + msg->msg_namelen > fromsa->sa_len ? + fromsa->sa_len : msg->msg_namelen); } } - - if (msg->msg_control) - { - struct mbuf* m = control; - u_char* ctlbuf = msg->msg_control; - int clen = msg->msg_controllen; + + if (msg->msg_control != NULL) { + struct mbuf *m = control; + u_char *ctlbuf = msg->msg_control; + int clen = msg->msg_controllen; + msg->msg_controllen = 0; - - while (m && clen > 0) - { + + while (m != NULL && clen > 0) { unsigned int tocopy; - if (clen >= m->m_len) - { + + if (clen >= m->m_len) { tocopy = m->m_len; - } - else - { + } else { msg->msg_flags |= MSG_CTRUNC; tocopy = clen; } @@ -749,107 +713,109 @@ sock_receive_internal( clen -= tocopy; m = m->m_next; } - msg->msg_controllen = (uintptr_t)ctlbuf - (uintptr_t)msg->msg_control; + msg->msg_controllen = + (uintptr_t)ctlbuf - (uintptr_t)msg->msg_control; } } cleanup: - if (control) m_freem(control); - if (fromsa) FREE(fromsa, M_SONAME); - return error; + if (control != NULL) + m_freem(control); + if (fromsa != NULL) + FREE(fromsa, M_SONAME); + return (error); } errno_t -sock_receive( - socket_t sock, - struct msghdr *msg, - int flags, - size_t *recvdlen) +sock_receive(socket_t sock, struct msghdr *msg, int flags, size_t *recvdlen) { - if ((msg == NULL) || - (msg->msg_iovlen < 1) || - (msg->msg_iov[0].iov_len == 0) || - (msg->msg_iov[0].iov_base == NULL)) - return EINVAL; - return sock_receive_internal(sock, msg, NULL, flags, recvdlen); + if ((msg == NULL) || (msg->msg_iovlen < 1) || + (msg->msg_iov[0].iov_len == 0) || + (msg->msg_iov[0].iov_base == NULL)) + return (EINVAL); + + return (sock_receive_internal(sock, msg, NULL, flags, recvdlen)); } errno_t -sock_receivembuf( - socket_t sock, - struct msghdr *msg, - mbuf_t *data, - int flags, - size_t *recvlen) +sock_receivembuf(socket_t sock, struct msghdr *msg, mbuf_t *data, int flags, + size_t *recvlen) { - if (data == NULL || recvlen == 0 || *recvlen <= 0 || (msg && - (msg->msg_iov != NULL || msg->msg_iovlen != 0))) - return EINVAL; - return sock_receive_internal(sock, msg, data, flags, recvlen); + if (data == NULL || recvlen == 0 || *recvlen <= 0 || (msg != NULL && + (msg->msg_iov != NULL || msg->msg_iovlen != 0))) + return (EINVAL); + + return (sock_receive_internal(sock, msg, data, flags, recvlen)); } errno_t -sock_send_internal( - socket_t sock, - const struct msghdr *msg, - mbuf_t data, - int flags, - size_t *sentlen) +sock_send_internal(socket_t sock, const struct msghdr *msg, mbuf_t data, + int flags, size_t *sentlen) { - uio_t auio = NULL; - struct mbuf *control = NULL; - int error = 0; - int datalen = 0; - char uio_buf[ UIO_SIZEOF((msg != NULL ? msg->msg_iovlen : 1)) ]; - + uio_t auio = NULL; + struct mbuf *control = NULL; + int error = 0; + int datalen = 0; + char uio_buf[ UIO_SIZEOF((msg != NULL ? msg->msg_iovlen : 1)) ]; + if (sock == NULL) { error = EINVAL; goto errorout; } - - if (data == 0 && msg != NULL) { + + if (data == NULL && msg != NULL) { struct iovec *tempp = msg->msg_iov; - auio = uio_createwithbuffer(msg->msg_iovlen, 0, UIO_SYSSPACE, UIO_WRITE, - &uio_buf[0], sizeof(uio_buf)); - if (tempp != NULL) - { + auio = uio_createwithbuffer(msg->msg_iovlen, 0, + UIO_SYSSPACE, UIO_WRITE, &uio_buf[0], sizeof (uio_buf)); + if (tempp != NULL) { int i; - + for (i = 0; i < msg->msg_iovlen; i++) { - uio_addiov(auio, CAST_USER_ADDR_T((tempp + i)->iov_base), (tempp + i)->iov_len); + uio_addiov(auio, + CAST_USER_ADDR_T((tempp + i)->iov_base), + (tempp + i)->iov_len); } - + if (uio_resid(auio) < 0) { error = EINVAL; goto errorout; } } } - - if (sentlen) + + if (sentlen != NULL) *sentlen = 0; - - if (auio) + + if (auio != NULL) datalen = uio_resid(auio); else datalen = data->m_pkthdr.len; - - if (msg && msg->msg_control) - { - if ((size_t)msg->msg_controllen < sizeof(struct cmsghdr)) return EINVAL; - if ((size_t)msg->msg_controllen > MLEN) return EINVAL; + + if (msg != NULL && msg->msg_control) { + if ((size_t)msg->msg_controllen < sizeof (struct cmsghdr)) { + error = EINVAL; + goto errorout; + } + + if ((size_t)msg->msg_controllen > MLEN) { + error = EINVAL; + goto errorout; + } + control = m_get(M_NOWAIT, MT_CONTROL); if (control == NULL) { error = ENOMEM; goto errorout; } - memcpy(mtod(control, caddr_t), msg->msg_control, msg->msg_controllen); + memcpy(mtod(control, caddr_t), msg->msg_control, + msg->msg_controllen); control->m_len = msg->msg_controllen; } - + error = sock->so_proto->pr_usrreqs->pru_sosend(sock, msg != NULL ? - (struct sockaddr*)msg->msg_name : NULL, auio, data, control, flags); + (struct sockaddr *)msg->msg_name : NULL, auio, data, + control, flags); /* * Residual data is possible in the case of IO vectors but not @@ -868,8 +834,8 @@ sock_send_internal( else *sentlen = datalen; } - - return error; + + return (error); /* * In cases where we detect an error before returning, we need to @@ -883,89 +849,84 @@ errorout: m_freem(data); if (sentlen) *sentlen = 0; - return error; + return (error); } errno_t -sock_send( - socket_t sock, - const struct msghdr *msg, - int flags, - size_t *sentlen) +sock_send(socket_t sock, const struct msghdr *msg, int flags, size_t *sentlen) { if (msg == NULL || msg->msg_iov == NULL || msg->msg_iovlen < 1) - return EINVAL; - return sock_send_internal(sock, msg, NULL, flags, sentlen); + return (EINVAL); + + return (sock_send_internal(sock, msg, NULL, flags, sentlen)); } errno_t -sock_sendmbuf( - socket_t sock, - const struct msghdr *msg, - mbuf_t data, - int flags, - size_t *sentlen) +sock_sendmbuf(socket_t sock, const struct msghdr *msg, mbuf_t data, + int flags, size_t *sentlen) { - if (data == NULL || (msg && - (msg->msg_iov != NULL || msg->msg_iovlen != 0))) { - if (data) + if (data == NULL || (msg != NULL && (msg->msg_iov != NULL || + msg->msg_iovlen != 0))) { + if (data != NULL) m_freem(data); - return EINVAL; + return (EINVAL); } - return sock_send_internal(sock, msg, data, flags, sentlen); + return (sock_send_internal(sock, msg, data, flags, sentlen)); } errno_t -sock_shutdown( - socket_t sock, - int how) +sock_shutdown(socket_t sock, int how) { - if (sock == NULL) return EINVAL; - return soshutdown(sock, how); + if (sock == NULL) + return (EINVAL); + + return (soshutdown(sock, how)); } errno_t -sock_socket( - int domain, - int type, - int protocol, - sock_upcall callback, - void* context, - socket_t *new_so) +sock_socket(int domain, int type, int protocol, sock_upcall callback, + void *context, socket_t *new_so) { - int error = 0; - if (new_so == NULL) return EINVAL; + int error = 0; + + if (new_so == NULL) + return (EINVAL); + /* socreate will create an initial so_count */ error = socreate(domain, new_so, type, protocol); - if (error == 0 && callback) - { - (*new_so)->so_rcv.sb_flags |= SB_UPCALL; -#if CONFIG_SOWUPCALL - (*new_so)->so_snd.sb_flags |= SB_UPCALL; -#endif - (*new_so)->so_upcall = (so_upcall)callback; - (*new_so)->so_upcallarg = context; + if (error == 0) { + /* see comments in sock_setupcall() */ + if (callback != NULL) { + sock_setupcalls_common(*new_so, callback, context, + NULL, NULL); + } + /* + * last_pid and last_upid should be zero for sockets + * created using sock_socket + */ (*new_so)->last_pid = 0; (*new_so)->last_upid = 0; } - return error; + return (error); } void -sock_close( - socket_t sock) +sock_close(socket_t sock) { - if (sock == NULL) return; + if (sock == NULL) + return; + soclose(sock); } -/* Do we want this to be APPLE_PRIVATE API?: YES (LD 12/23/04)*/ +/* Do we want this to be APPLE_PRIVATE API?: YES (LD 12/23/04) */ void -sock_retain( - socket_t sock) +sock_retain(socket_t sock) { - if (sock == NULL) return; + if (sock == NULL) + return; + socket_lock(sock, 1); sock->so_retaincnt++; sock->so_usecount++; /* add extra reference for holding the socket */ @@ -978,15 +939,17 @@ sock_release(socket_t sock) { if (sock == NULL) return; - socket_lock(sock, 1); - if (sock->so_upcallusecount) + socket_lock(sock, 1); + if (sock->so_upcallusecount > 0) soclose_wait_locked(sock); sock->so_retaincnt--; - if (sock->so_retaincnt < 0) - panic("sock_release: negative retain count for sock=%p " - "cnt=%x\n", sock, sock->so_retaincnt); + if (sock->so_retaincnt < 0) { + panic("%s: negative retain count (%d) for sock=%p\n", + __func__, sock->so_retaincnt, sock); + /* NOTREACHED */ + } if ((sock->so_retaincnt == 0) && (sock->so_usecount == 2)) { /* close socket only if the FD is not holding it */ soclose_locked(sock); @@ -998,62 +961,54 @@ sock_release(socket_t sock) } errno_t -sock_setpriv( - socket_t sock, - int on) +sock_setpriv(socket_t sock, int on) { - if (sock == NULL) return EINVAL; + if (sock == NULL) + return (EINVAL); + socket_lock(sock, 1); if (on) - { sock->so_state |= SS_PRIV; - } else - { sock->so_state &= ~SS_PRIV; - } socket_unlock(sock, 1); - return 0; + return (0); } int -sock_isconnected( - socket_t sock) +sock_isconnected(socket_t sock) { int retval; + socket_lock(sock, 1); - retval = (sock->so_state & SS_ISCONNECTED) != 0; + retval = ((sock->so_state & SS_ISCONNECTED) ? 1 : 0); socket_unlock(sock, 1); return (retval); } int -sock_isnonblocking( - socket_t sock) +sock_isnonblocking(socket_t sock) { int retval; + socket_lock(sock, 1); - retval = (sock->so_state & SS_NBIO) != 0; + retval = ((sock->so_state & SS_NBIO) ? 1 : 0); socket_unlock(sock, 1); return (retval); } errno_t -sock_gettype( - socket_t sock, - int *outDomain, - int *outType, - int *outProtocol) +sock_gettype(socket_t sock, int *outDomain, int *outType, int *outProtocol) { socket_lock(sock, 1); - if (outDomain) - *outDomain = sock->so_proto->pr_domain->dom_family; - if (outType) + if (outDomain != NULL) + *outDomain = SOCK_DOM(sock); + if (outType != NULL) *outType = sock->so_type; - if (outProtocol) - *outProtocol = sock->so_proto->pr_protocol; + if (outProtocol != NULL) + *outProtocol = SOCK_PROTO(sock); socket_unlock(sock, 1); - return 0; + return (0); } /* @@ -1073,12 +1028,9 @@ sock_getlistener(socket_t sock) static inline void sock_set_tcp_stream_priority(socket_t sock) { - if ((sock->so_proto->pr_domain->dom_family == AF_INET || - sock->so_proto->pr_domain->dom_family == AF_INET6) && - sock->so_proto->pr_type == SOCK_STREAM) { - + if ((SOCK_DOM(sock) == PF_INET || SOCK_DOM(sock) == PF_INET6) && + SOCK_TYPE(sock) == SOCK_STREAM) { set_tcp_stream_priority(sock); - } } @@ -1151,33 +1103,91 @@ socket_defunct(struct proc *p, socket_t so, int level) return (retval); } +static void +sock_setupcalls_common(socket_t sock, sock_upcall rcallback, void *rcontext, + sock_upcall wcallback, void *wcontext) +{ + if (rcallback != NULL) { + sock->so_rcv.sb_flags |= SB_UPCALL; + sock->so_rcv.sb_upcall = rcallback; + sock->so_rcv.sb_upcallarg = rcontext; + } else { + sock->so_rcv.sb_flags &= ~SB_UPCALL; + sock->so_rcv.sb_upcall = NULL; + sock->so_rcv.sb_upcallarg = NULL; + } + + if (wcallback != NULL) { + sock->so_snd.sb_flags |= SB_UPCALL; + sock->so_snd.sb_upcall = wcallback; + sock->so_snd.sb_upcallarg = wcontext; + } else { + sock->so_snd.sb_flags &= ~SB_UPCALL; + sock->so_snd.sb_upcall = NULL; + sock->so_snd.sb_upcallarg = NULL; + } +} + errno_t -sock_setupcall(socket_t sock, sock_upcall callback, void* context) +sock_setupcall(socket_t sock, sock_upcall callback, void *context) { if (sock == NULL) - return EINVAL; + return (EINVAL); /* * Note that we don't wait for any in progress upcall to complete. + * On embedded, sock_setupcall() causes both read and write + * callbacks to be set; on desktop, only read callback is set + * to maintain legacy KPI behavior. + * + * The newer sock_setupcalls() KPI should be used instead to set + * the read and write callbacks and their respective parameters. */ socket_lock(sock, 1); + sock_setupcalls_common(sock, callback, context, NULL, NULL); + socket_unlock(sock, 1); - sock->so_upcall = (so_upcall) callback; - sock->so_upcallarg = context; - if (callback) { - sock->so_rcv.sb_flags |= SB_UPCALL; -#if CONFIG_SOWUPCALL - sock->so_snd.sb_flags |= SB_UPCALL; -#endif /* CONFIG_SOWUPCALL */ + return (0); +} + +errno_t +sock_setupcalls(socket_t sock, sock_upcall rcallback, void *rcontext, + sock_upcall wcallback, void *wcontext) +{ + if (sock == NULL) + return (EINVAL); + + /* + * Note that we don't wait for any in progress upcall to complete. + */ + socket_lock(sock, 1); + sock_setupcalls_common(sock, rcallback, rcontext, wcallback, wcontext); + socket_unlock(sock, 1); + + return (0); +} + +errno_t +sock_catchevents(socket_t sock, sock_evupcall ecallback, void *econtext, + u_int32_t emask) +{ + if (sock == NULL) + return (EINVAL); + + /* + * Note that we don't wait for any in progress upcall to complete. + */ + socket_lock(sock, 1); + if (ecallback != NULL) { + sock->so_event = ecallback; + sock->so_eventarg = econtext; + sock->so_eventmask = emask; } else { - sock->so_rcv.sb_flags &= ~SB_UPCALL; -#if CONFIG_SOWUPCALL - sock->so_snd.sb_flags &= ~SB_UPCALL; -#endif /* CONFIG_SOWUPCALL */ + sock->so_event = NULL; + sock->so_eventarg = NULL; + sock->so_eventmask = 0; } - socket_unlock(sock, 1); - return 0; + return (0); } - diff --git a/bsd/kern/kpi_socketfilter.c b/bsd/kern/kpi_socketfilter.c index f456d6246..1b55a8053 100644 --- a/bsd/kern/kpi_socketfilter.c +++ b/bsd/kern/kpi_socketfilter.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ @@ -61,32 +61,32 @@ struct socket_filter_entry { struct socket_filter_entry *sfe_next_onsocket; struct socket_filter_entry *sfe_next_onfilter; struct socket_filter_entry *sfe_next_oncleanup; - + struct socket_filter *sfe_filter; - struct socket *sfe_socket; - void *sfe_cookie; - - uint32_t sfe_flags; - int32_t sfe_refcount; + struct socket *sfe_socket; + void *sfe_cookie; + + uint32_t sfe_flags; + int32_t sfe_refcount; }; struct socket_filter { - TAILQ_ENTRY(socket_filter) sf_protosw_next; + TAILQ_ENTRY(socket_filter) sf_protosw_next; TAILQ_ENTRY(socket_filter) sf_global_next; struct socket_filter_entry *sf_entry_head; - - struct protosw *sf_proto; - struct sflt_filter sf_filter; - u_int32_t sf_refcount; + + struct protosw *sf_proto; + struct sflt_filter sf_filter; + u_int32_t sf_refcount; }; TAILQ_HEAD(socket_filter_list, socket_filter); static struct socket_filter_list sock_filter_head; -static lck_rw_t *sock_filter_lock = NULL; -static lck_mtx_t *sock_filter_cleanup_lock = NULL; +static lck_rw_t *sock_filter_lock = NULL; +static lck_mtx_t *sock_filter_cleanup_lock = NULL; static struct socket_filter_entry *sock_filter_cleanup_entries = NULL; -static thread_t sock_filter_cleanup_thread = NULL; +static thread_t sock_filter_cleanup_thread = NULL; static void sflt_cleanup_thread(void *, wait_result_t); static void sflt_detach_locked(struct socket_filter_entry *entry); @@ -96,12 +96,12 @@ static void sflt_detach_locked(struct socket_filter_entry *entry); __private_extern__ void sflt_init(void) { - lck_grp_attr_t *grp_attrib = 0; - lck_attr_t *lck_attrib = 0; - lck_grp_t *lck_group = 0; - + lck_grp_attr_t *grp_attrib = NULL; + lck_attr_t *lck_attrib = NULL; + lck_grp_t *lck_group = NULL; + TAILQ_INIT(&sock_filter_head); - + /* Allocate a rw lock */ grp_attrib = lck_grp_attr_alloc_init(); lck_group = lck_grp_alloc_init("socket filter lock", grp_attrib); @@ -114,251 +114,264 @@ sflt_init(void) } static void -sflt_retain_locked( - struct socket_filter *filter) +sflt_retain_locked(struct socket_filter *filter) { filter->sf_refcount++; } static void -sflt_release_locked( - struct socket_filter *filter) +sflt_release_locked(struct socket_filter *filter) { filter->sf_refcount--; - if (filter->sf_refcount == 0) - { - // Call the unregistered function + if (filter->sf_refcount == 0) { + /* Call the unregistered function */ if (filter->sf_filter.sf_unregistered) { lck_rw_unlock_exclusive(sock_filter_lock); - filter->sf_filter.sf_unregistered(filter->sf_filter.sf_handle); + filter->sf_filter.sf_unregistered( + filter->sf_filter.sf_handle); lck_rw_lock_exclusive(sock_filter_lock); } - - // Free the entry + + /* Free the entry */ FREE(filter, M_IFADDR); } } static void -sflt_entry_retain( - struct socket_filter_entry *entry) +sflt_entry_retain(struct socket_filter_entry *entry) { - if (OSIncrementAtomic(&entry->sfe_refcount) <= 0) + if (OSIncrementAtomic(&entry->sfe_refcount) <= 0) { panic("sflt_entry_retain - sfe_refcount <= 0\n"); + /* NOTREACHED */ + } } static void -sflt_entry_release( - struct socket_filter_entry *entry) +sflt_entry_release(struct socket_filter_entry *entry) { SInt32 old = OSDecrementAtomic(&entry->sfe_refcount); if (old == 1) { - // That was the last reference - - // Take the cleanup lock + /* That was the last reference */ + + /* Take the cleanup lock */ lck_mtx_lock(sock_filter_cleanup_lock); - - // Put this item on the cleanup list + + /* Put this item on the cleanup list */ entry->sfe_next_oncleanup = sock_filter_cleanup_entries; sock_filter_cleanup_entries = entry; - - // If the item is the first item in the list + + /* If the item is the first item in the list */ if (entry->sfe_next_oncleanup == NULL) { if (sock_filter_cleanup_thread == NULL) { - // Create a thread - kernel_thread_start(sflt_cleanup_thread, NULL, &sock_filter_cleanup_thread); + /* Create a thread */ + kernel_thread_start(sflt_cleanup_thread, + NULL, &sock_filter_cleanup_thread); } else { - // Wakeup the thread + /* Wakeup the thread */ wakeup(&sock_filter_cleanup_entries); } } - - // Drop the cleanup lock + + /* Drop the cleanup lock */ lck_mtx_unlock(sock_filter_cleanup_lock); - } - else if (old <= 0) - { - panic("sflt_entry_release - sfe_refcount (%d) <= 0\n", (int)old); + } else if (old <= 0) { + panic("sflt_entry_release - sfe_refcount (%d) <= 0\n", + (int)old); + /* NOTREACHED */ } } static void -sflt_cleanup_thread( - __unused void * blah, - __unused wait_result_t blah2) +sflt_cleanup_thread(void *blah, wait_result_t blah2) { +#pragma unused(blah, blah2) while (1) { lck_mtx_lock(sock_filter_cleanup_lock); while (sock_filter_cleanup_entries == NULL) { - // Sleep until we've got something better to do - msleep(&sock_filter_cleanup_entries, sock_filter_cleanup_lock, PWAIT, "sflt_cleanup", NULL); + /* Sleep until we've got something better to do */ + msleep(&sock_filter_cleanup_entries, + sock_filter_cleanup_lock, PWAIT, + "sflt_cleanup", NULL); } - - // Pull the current list of dead items - struct socket_filter_entry *dead = sock_filter_cleanup_entries; + + /* Pull the current list of dead items */ + struct socket_filter_entry *dead = sock_filter_cleanup_entries; sock_filter_cleanup_entries = NULL; - - // Drop the lock + + /* Drop the lock */ lck_mtx_unlock(sock_filter_cleanup_lock); - - // Take the socket filter lock + + /* Take the socket filter lock */ lck_rw_lock_exclusive(sock_filter_lock); - - // Cleanup every dead item + + /* Cleanup every dead item */ struct socket_filter_entry *entry; for (entry = dead; entry; entry = dead) { struct socket_filter_entry **nextpp; - + dead = entry->sfe_next_oncleanup; - - // Call the detach function if necessary - drop the lock + + /* Call detach function if necessary - drop the lock */ if ((entry->sfe_flags & SFEF_NODETACH) == 0 && - entry->sfe_filter->sf_filter.sf_detach) { + entry->sfe_filter->sf_filter.sf_detach) { entry->sfe_flags |= SFEF_NODETACH; lck_rw_unlock_exclusive(sock_filter_lock); - - // Warning - passing a potentially dead socket may be bad - entry->sfe_filter->sf_filter. - sf_detach(entry->sfe_cookie, entry->sfe_socket); - + + /* + * Warning - passing a potentially + * dead socket may be bad + */ + entry->sfe_filter->sf_filter. sf_detach( + entry->sfe_cookie, entry->sfe_socket); + lck_rw_lock_exclusive(sock_filter_lock); } - - // Pull entry off the socket list -- if the socket still exists + + /* + * Pull entry off the socket list -- + * if the socket still exists + */ if ((entry->sfe_flags & SFEF_NOSOCKET) == 0) { - for (nextpp = &entry->sfe_socket->so_filt; *nextpp; - nextpp = &(*nextpp)->sfe_next_onsocket) { + for (nextpp = &entry->sfe_socket->so_filt; + *nextpp; + nextpp = &(*nextpp)->sfe_next_onsocket) { if (*nextpp == entry) { - *nextpp = entry->sfe_next_onsocket; + *nextpp = + entry->sfe_next_onsocket; break; } } } - - // Pull entry off the filter list - for (nextpp = &entry->sfe_filter->sf_entry_head; *nextpp; - nextpp = &(*nextpp)->sfe_next_onfilter) { + + /* Pull entry off the filter list */ + for (nextpp = &entry->sfe_filter->sf_entry_head; + *nextpp; nextpp = &(*nextpp)->sfe_next_onfilter) { if (*nextpp == entry) { *nextpp = entry->sfe_next_onfilter; break; } } - - // Release the filter -- may drop lock, but that's okay + + /* + * Release the filter -- may drop lock, but that's okay + */ sflt_release_locked(entry->sfe_filter); entry->sfe_socket = NULL; entry->sfe_filter = NULL; FREE(entry, M_IFADDR); } - - // Drop the socket filter lock + + /* Drop the socket filter lock */ lck_rw_unlock_exclusive(sock_filter_lock); } - // Not reached + /* NOTREACHED */ } static int -sflt_attach_locked( - struct socket *so, - struct socket_filter *filter, - int socklocked) +sflt_attach_locked(struct socket *so, struct socket_filter *filter, + int socklocked) { int error = 0; struct socket_filter_entry *entry = NULL; - + if (filter == NULL) - return ENOENT; + return (ENOENT); - for (entry = so->so_filt; entry; entry = entry->sfe_next_onfilter) + for (entry = so->so_filt; entry; entry = entry->sfe_next_onfilter) { if (entry->sfe_filter->sf_filter.sf_handle == filter->sf_filter.sf_handle) - return EEXIST; - + return (EEXIST); + } /* allocate the socket filter entry */ - MALLOC(entry, struct socket_filter_entry *, sizeof(*entry), M_IFADDR, + MALLOC(entry, struct socket_filter_entry *, sizeof (*entry), M_IFADDR, M_WAITOK); if (entry == NULL) - return ENOMEM; - + return (ENOMEM); + /* Initialize the socket filter entry */ entry->sfe_cookie = NULL; entry->sfe_flags = SFEF_ATTACHED; - entry->sfe_refcount = 1; // corresponds to SFEF_ATTACHED flag set - + entry->sfe_refcount = 1; /* corresponds to SFEF_ATTACHED flag set */ + /* Put the entry in the filter list */ sflt_retain_locked(filter); entry->sfe_filter = filter; entry->sfe_next_onfilter = filter->sf_entry_head; filter->sf_entry_head = entry; - + /* Put the entry on the socket filter list */ entry->sfe_socket = so; entry->sfe_next_onsocket = so->so_filt; so->so_filt = entry; if (entry->sfe_filter->sf_filter.sf_attach) { - // Retain the entry while we call attach + /* Retain the entry while we call attach */ sflt_entry_retain(entry); - - // Release the filter lock -- callers must be aware we will do this + + /* + * Release the filter lock -- + * callers must be aware we will do this + */ lck_rw_unlock_exclusive(sock_filter_lock); - - // Unlock the socket + + /* Unlock the socket */ if (socklocked) socket_unlock(so, 0); - - // It's finally safe to call the filter function - error = entry->sfe_filter->sf_filter.sf_attach(&entry->sfe_cookie, so); - - // Lock the socket again + + /* It's finally safe to call the filter function */ + error = entry->sfe_filter->sf_filter.sf_attach( + &entry->sfe_cookie, so); + + /* Lock the socket again */ if (socklocked) socket_lock(so, 0); - - // Lock the filters again + + /* Lock the filters again */ lck_rw_lock_exclusive(sock_filter_lock); - - // If the attach function returns an error, this filter must be detached + + /* + * If the attach function returns an error, + * this filter must be detached + */ if (error) { - entry->sfe_flags |= SFEF_NODETACH; // don't call sf_detach + /* don't call sf_detach */ + entry->sfe_flags |= SFEF_NODETACH; sflt_detach_locked(entry); } - - // Release the retain we held through the attach call + + /* Release the retain we held through the attach call */ sflt_entry_release(entry); } - - return error; + + return (error); } errno_t -sflt_attach_internal( - socket_t socket, - sflt_handle handle) +sflt_attach_internal(socket_t socket, sflt_handle handle) { if (socket == NULL || handle == 0) - return EINVAL; - + return (EINVAL); + int result = EINVAL; - + lck_rw_lock_exclusive(sock_filter_lock); - + struct socket_filter *filter = NULL; TAILQ_FOREACH(filter, &sock_filter_head, sf_global_next) { if (filter->sf_filter.sf_handle == handle) break; } - + if (filter) { result = sflt_attach_locked(socket, filter, 1); } - + lck_rw_unlock_exclusive(sock_filter_lock); - - return result; + + return (result); } static void -sflt_detach_locked( - struct socket_filter_entry *entry) +sflt_detach_locked(struct socket_filter_entry *entry) { if ((entry->sfe_flags & SFEF_ATTACHED) != 0) { entry->sfe_flags &= ~SFEF_ATTACHED; @@ -369,40 +382,51 @@ sflt_detach_locked( #pragma mark -- Socket Layer Hooks -- __private_extern__ void -sflt_initsock( - struct socket *so) +sflt_initsock(struct socket *so) { - struct protosw *proto = so->so_proto; - + /* + * Point to the real protosw, as so_proto might have been + * pointed to a modified version. + */ + struct protosw *proto = so->so_proto->pr_protosw; + lck_rw_lock_shared(sock_filter_lock); if (TAILQ_FIRST(&proto->pr_filter_head) != NULL) { - // Promote lock to exclusive + /* Promote lock to exclusive */ if (!lck_rw_lock_shared_to_exclusive(sock_filter_lock)) lck_rw_lock_exclusive(sock_filter_lock); - - // Warning: A filter unregistering will be pulled out of the list. - // This could happen while we drop the lock in sftl_attach_locked - // or sflt_release_locked. For this reason we retain a reference - // on the filter (or next_filter) while calling this function - // - // This protects us from a panic, but it could result in a - // socket being created without all of the global filters if - // we're attaching a filter as it is removed, if that's possible. - struct socket_filter *filter = TAILQ_FIRST(&proto->pr_filter_head); + + /* + * Warning: A filter unregistering will be pulled out of + * the list. This could happen while we drop the lock in + * sftl_attach_locked or sflt_release_locked. For this + * reason we retain a reference on the filter (or next_filter) + * while calling this function. This protects us from a panic, + * but it could result in a socket being created without all + * of the global filters if we're attaching a filter as it + * is removed, if that's possible. + */ + struct socket_filter *filter = + TAILQ_FIRST(&proto->pr_filter_head); + sflt_retain_locked(filter); - - while (filter) - { + + while (filter) { struct socket_filter *filter_next; - - // Warning: sflt_attach_private_locked will drop the lock + /* + * Warning: sflt_attach_private_locked + * will drop the lock + */ sflt_attach_locked(so, filter, 0); - + filter_next = TAILQ_NEXT(filter, sf_protosw_next); if (filter_next) sflt_retain_locked(filter_next); - - // Warning: filt_release_locked may remove the filter from the queue + + /* + * Warning: filt_release_locked may remove + * the filter from the queue + */ sflt_release_locked(filter); filter = filter_next; } @@ -415,146 +439,150 @@ sflt_initsock( * * Detaches all filters from the socket. */ - __private_extern__ void -sflt_termsock( - struct socket *so) +sflt_termsock(struct socket *so) { lck_rw_lock_exclusive(sock_filter_lock); - + struct socket_filter_entry *entry; - + while ((entry = so->so_filt) != NULL) { - // Pull filter off the socket + /* Pull filter off the socket */ so->so_filt = entry->sfe_next_onsocket; entry->sfe_flags |= SFEF_NOSOCKET; - - // Call detach + + /* Call detach */ sflt_detach_locked(entry); - - // On sflt_termsock, we can't return until the detach function has been called - // Call the detach function - this is gross because the socket filter - // entry could be freed when we drop the lock, so we make copies on - // the stack and retain everything we need before dropping the lock + + /* + * On sflt_termsock, we can't return until the detach function + * has been called. Call the detach function - this is gross + * because the socket filter entry could be freed when we drop + * the lock, so we make copies on the stack and retain + * everything we need before dropping the lock. + */ if ((entry->sfe_flags & SFEF_NODETACH) == 0 && - entry->sfe_filter->sf_filter.sf_detach) { - void *sfe_cookie = entry->sfe_cookie; - struct socket_filter *sfe_filter = entry->sfe_filter; - - // Retain the socket filter + entry->sfe_filter->sf_filter.sf_detach) { + void *sfe_cookie = entry->sfe_cookie; + struct socket_filter *sfe_filter = entry->sfe_filter; + + /* Retain the socket filter */ sflt_retain_locked(sfe_filter); - - // Mark that we've called the detach function + + /* Mark that we've called the detach function */ entry->sfe_flags |= SFEF_NODETACH; - - // Drop the lock around the call to the detach function + + /* Drop the lock before calling the detach function */ lck_rw_unlock_exclusive(sock_filter_lock); sfe_filter->sf_filter.sf_detach(sfe_cookie, so); lck_rw_lock_exclusive(sock_filter_lock); - - // Release the filter + + /* Release the filter */ sflt_release_locked(sfe_filter); } } - + lck_rw_unlock_exclusive(sock_filter_lock); } static void -sflt_notify_internal( - struct socket *so, - sflt_event_t event, - void *param, - sflt_handle handle) +sflt_notify_internal(struct socket *so, sflt_event_t event, void *param, + sflt_handle handle) { - if (so->so_filt == NULL) return; - - struct socket_filter_entry *entry; - int unlocked = 0; - + if (so->so_filt == NULL) + return; + + struct socket_filter_entry *entry; + int unlocked = 0; + lck_rw_lock_shared(sock_filter_lock); for (entry = so->so_filt; entry; entry = entry->sfe_next_onsocket) { - if ((entry->sfe_flags & SFEF_ATTACHED) - && entry->sfe_filter->sf_filter.sf_notify && - ((handle && entry->sfe_filter->sf_filter.sf_handle != handle) || - !handle)) { - // Retain the filter entry and release the socket filter lock + if ((entry->sfe_flags & SFEF_ATTACHED) && + entry->sfe_filter->sf_filter.sf_notify && + ((handle && entry->sfe_filter->sf_filter.sf_handle != + handle) || !handle)) { + /* + * Retain the filter entry and release + * the socket filter lock + */ sflt_entry_retain(entry); lck_rw_unlock_shared(sock_filter_lock); - - // If the socket isn't already unlocked, unlock it + + /* If the socket isn't already unlocked, unlock it */ if (unlocked == 0) { unlocked = 1; socket_unlock(so, 0); } - - // Finally call the filter - entry->sfe_filter->sf_filter. - sf_notify(entry->sfe_cookie, so, event, param); - - // Take the socket filter lock again and release the entry + + /* Finally call the filter */ + entry->sfe_filter->sf_filter.sf_notify( + entry->sfe_cookie, so, event, param); + + /* + * Take the socket filter lock again + * and release the entry + */ lck_rw_lock_shared(sock_filter_lock); sflt_entry_release(entry); } } lck_rw_unlock_shared(sock_filter_lock); - + if (unlocked != 0) { socket_lock(so, 0); } } __private_extern__ void -sflt_notify( - struct socket *so, - sflt_event_t event, - void *param) +sflt_notify(struct socket *so, sflt_event_t event, void *param) { sflt_notify_internal(so, event, param, 0); } static void -sflt_notify_after_register( - struct socket *so, - sflt_event_t event, - sflt_handle handle) +sflt_notify_after_register(struct socket *so, sflt_event_t event, + sflt_handle handle) { sflt_notify_internal(so, event, NULL, handle); } __private_extern__ int -sflt_ioctl( - struct socket *so, - u_long cmd, - caddr_t data) +sflt_ioctl(struct socket *so, u_long cmd, caddr_t data) { - if (so->so_filt == NULL) return 0; - - struct socket_filter_entry *entry; - int unlocked = 0; - int error = 0; - + if (so->so_filt == NULL) + return (0); + + struct socket_filter_entry *entry; + int unlocked = 0; + int error = 0; + lck_rw_lock_shared(sock_filter_lock); for (entry = so->so_filt; entry && error == 0; - entry = entry->sfe_next_onsocket) { - if ((entry->sfe_flags & SFEF_ATTACHED) - && entry->sfe_filter->sf_filter.sf_ioctl) { - // Retain the filter entry and release the socket filter lock + entry = entry->sfe_next_onsocket) { + if ((entry->sfe_flags & SFEF_ATTACHED) && + entry->sfe_filter->sf_filter.sf_ioctl) { + /* + * Retain the filter entry and release + * the socket filter lock + */ sflt_entry_retain(entry); lck_rw_unlock_shared(sock_filter_lock); - - // If the socket isn't already unlocked, unlock it + + /* If the socket isn't already unlocked, unlock it */ if (unlocked == 0) { socket_unlock(so, 0); unlocked = 1; } - - // Call the filter - error = entry->sfe_filter->sf_filter. - sf_ioctl(entry->sfe_cookie, so, cmd, data); - - // Take the socket filter lock again and release the entry + + /* Call the filter */ + error = entry->sfe_filter->sf_filter.sf_ioctl( + entry->sfe_cookie, so, cmd, data); + + /* + * Take the socket filter lock again + * and release the entry + */ lck_rw_lock_shared(sock_filter_lock); sflt_entry_release(entry); } @@ -564,41 +592,46 @@ sflt_ioctl( if (unlocked) { socket_lock(so, 0); } - - return error; + + return (error); } __private_extern__ int -sflt_bind( - struct socket *so, - const struct sockaddr *nam) +sflt_bind(struct socket *so, const struct sockaddr *nam) { - if (so->so_filt == NULL) return 0; - - struct socket_filter_entry *entry; - int unlocked = 0; - int error = 0; - + if (so->so_filt == NULL) + return (0); + + struct socket_filter_entry *entry; + int unlocked = 0; + int error = 0; + lck_rw_lock_shared(sock_filter_lock); for (entry = so->so_filt; entry && error == 0; - entry = entry->sfe_next_onsocket) { - if ((entry->sfe_flags & SFEF_ATTACHED) - && entry->sfe_filter->sf_filter.sf_bind) { - // Retain the filter entry and release the socket filter lock + entry = entry->sfe_next_onsocket) { + if ((entry->sfe_flags & SFEF_ATTACHED) && + entry->sfe_filter->sf_filter.sf_bind) { + /* + * Retain the filter entry and + * release the socket filter lock + */ sflt_entry_retain(entry); lck_rw_unlock_shared(sock_filter_lock); - - // If the socket isn't already unlocked, unlock it + + /* If the socket isn't already unlocked, unlock it */ if (unlocked == 0) { socket_unlock(so, 0); unlocked = 1; } - - // Call the filter - error = entry->sfe_filter->sf_filter. - sf_bind(entry->sfe_cookie, so, nam); - - // Take the socket filter lock again and release the entry + + /* Call the filter */ + error = entry->sfe_filter->sf_filter.sf_bind( + entry->sfe_cookie, so, nam); + + /* + * Take the socket filter lock again and + * release the entry + */ lck_rw_lock_shared(sock_filter_lock); sflt_entry_release(entry); } @@ -608,40 +641,46 @@ sflt_bind( if (unlocked) { socket_lock(so, 0); } - - return error; + + return (error); } __private_extern__ int -sflt_listen( - struct socket *so) +sflt_listen(struct socket *so) { - if (so->so_filt == NULL) return 0; - - struct socket_filter_entry *entry; - int unlocked = 0; - int error = 0; - + if (so->so_filt == NULL) + return (0); + + struct socket_filter_entry *entry; + int unlocked = 0; + int error = 0; + lck_rw_lock_shared(sock_filter_lock); for (entry = so->so_filt; entry && error == 0; - entry = entry->sfe_next_onsocket) { - if ((entry->sfe_flags & SFEF_ATTACHED) - && entry->sfe_filter->sf_filter.sf_listen) { - // Retain the filter entry and release the socket filter lock + entry = entry->sfe_next_onsocket) { + if ((entry->sfe_flags & SFEF_ATTACHED) && + entry->sfe_filter->sf_filter.sf_listen) { + /* + * Retain the filter entry and release + * the socket filter lock + */ sflt_entry_retain(entry); lck_rw_unlock_shared(sock_filter_lock); - - // If the socket isn't already unlocked, unlock it + + /* If the socket isn't already unlocked, unlock it */ if (unlocked == 0) { socket_unlock(so, 0); unlocked = 1; } - - // Call the filter - error = entry->sfe_filter->sf_filter. - sf_listen(entry->sfe_cookie, so); - - // Take the socket filter lock again and release the entry + + /* Call the filter */ + error = entry->sfe_filter->sf_filter.sf_listen( + entry->sfe_cookie, so); + + /* + * Take the socket filter lock again + * and release the entry + */ lck_rw_lock_shared(sock_filter_lock); sflt_entry_release(entry); } @@ -651,43 +690,47 @@ sflt_listen( if (unlocked) { socket_lock(so, 0); } - - return error; + + return (error); } __private_extern__ int -sflt_accept( - struct socket *head, - struct socket *so, - const struct sockaddr *local, - const struct sockaddr *remote) +sflt_accept(struct socket *head, struct socket *so, + const struct sockaddr *local, const struct sockaddr *remote) { - if (so->so_filt == NULL) return 0; - - struct socket_filter_entry *entry; - int unlocked = 0; - int error = 0; - + if (so->so_filt == NULL) + return (0); + + struct socket_filter_entry *entry; + int unlocked = 0; + int error = 0; + lck_rw_lock_shared(sock_filter_lock); for (entry = so->so_filt; entry && error == 0; - entry = entry->sfe_next_onsocket) { - if ((entry->sfe_flags & SFEF_ATTACHED) - && entry->sfe_filter->sf_filter.sf_accept) { - // Retain the filter entry and release the socket filter lock + entry = entry->sfe_next_onsocket) { + if ((entry->sfe_flags & SFEF_ATTACHED) && + entry->sfe_filter->sf_filter.sf_accept) { + /* + * Retain the filter entry and + * release the socket filter lock + */ sflt_entry_retain(entry); lck_rw_unlock_shared(sock_filter_lock); - - // If the socket isn't already unlocked, unlock it + + /* If the socket isn't already unlocked, unlock it */ if (unlocked == 0) { socket_unlock(so, 0); unlocked = 1; } - - // Call the filter - error = entry->sfe_filter->sf_filter. - sf_accept(entry->sfe_cookie, head, so, local, remote); - - // Take the socket filter lock again and release the entry + + /* Call the filter */ + error = entry->sfe_filter->sf_filter.sf_accept( + entry->sfe_cookie, head, so, local, remote); + + /* + * Take the socket filter lock again + * and release the entry + */ lck_rw_lock_shared(sock_filter_lock); sflt_entry_release(entry); } @@ -697,41 +740,46 @@ sflt_accept( if (unlocked) { socket_lock(so, 0); } - - return error; + + return (error); } __private_extern__ int -sflt_getsockname( - struct socket *so, - struct sockaddr **local) +sflt_getsockname(struct socket *so, struct sockaddr **local) { - if (so->so_filt == NULL) return 0; - - struct socket_filter_entry *entry; - int unlocked = 0; - int error = 0; - + if (so->so_filt == NULL) + return (0); + + struct socket_filter_entry *entry; + int unlocked = 0; + int error = 0; + lck_rw_lock_shared(sock_filter_lock); for (entry = so->so_filt; entry && error == 0; - entry = entry->sfe_next_onsocket) { - if ((entry->sfe_flags & SFEF_ATTACHED) - && entry->sfe_filter->sf_filter.sf_getsockname) { - // Retain the filter entry and release the socket filter lock + entry = entry->sfe_next_onsocket) { + if ((entry->sfe_flags & SFEF_ATTACHED) && + entry->sfe_filter->sf_filter.sf_getsockname) { + /* + * Retain the filter entry and + * release the socket filter lock + */ sflt_entry_retain(entry); lck_rw_unlock_shared(sock_filter_lock); - - // If the socket isn't already unlocked, unlock it + + /* If the socket isn't already unlocked, unlock it */ if (unlocked == 0) { socket_unlock(so, 0); unlocked = 1; } - - // Call the filter - error = entry->sfe_filter->sf_filter. - sf_getsockname(entry->sfe_cookie, so, local); - - // Take the socket filter lock again and release the entry + + /* Call the filter */ + error = entry->sfe_filter->sf_filter.sf_getsockname( + entry->sfe_cookie, so, local); + + /* + * Take the socket filter lock again + * and release the entry + */ lck_rw_lock_shared(sock_filter_lock); sflt_entry_release(entry); } @@ -741,41 +789,46 @@ sflt_getsockname( if (unlocked) { socket_lock(so, 0); } - - return error; + + return (error); } __private_extern__ int -sflt_getpeername( - struct socket *so, - struct sockaddr **remote) +sflt_getpeername(struct socket *so, struct sockaddr **remote) { - if (so->so_filt == NULL) return 0; - - struct socket_filter_entry *entry; - int unlocked = 0; - int error = 0; - + if (so->so_filt == NULL) + return (0); + + struct socket_filter_entry *entry; + int unlocked = 0; + int error = 0; + lck_rw_lock_shared(sock_filter_lock); for (entry = so->so_filt; entry && error == 0; - entry = entry->sfe_next_onsocket) { - if ((entry->sfe_flags & SFEF_ATTACHED) - && entry->sfe_filter->sf_filter.sf_getpeername) { - // Retain the filter entry and release the socket filter lock + entry = entry->sfe_next_onsocket) { + if ((entry->sfe_flags & SFEF_ATTACHED) && + entry->sfe_filter->sf_filter.sf_getpeername) { + /* + * Retain the filter entry and release + * the socket filter lock + */ sflt_entry_retain(entry); lck_rw_unlock_shared(sock_filter_lock); - - // If the socket isn't already unlocked, unlock it + + /* If the socket isn't already unlocked, unlock it */ if (unlocked == 0) { socket_unlock(so, 0); unlocked = 1; } - - // Call the filter - error = entry->sfe_filter->sf_filter. - sf_getpeername(entry->sfe_cookie, so, remote); - - // Take the socket filter lock again and release the entry + + /* Call the filter */ + error = entry->sfe_filter->sf_filter.sf_getpeername( + entry->sfe_cookie, so, remote); + + /* + * Take the socket filter lock again + * and release the entry + */ lck_rw_lock_shared(sock_filter_lock); sflt_entry_release(entry); } @@ -785,41 +838,46 @@ sflt_getpeername( if (unlocked) { socket_lock(so, 0); } - - return error; + + return (error); } __private_extern__ int -sflt_connectin( - struct socket *so, - const struct sockaddr *remote) +sflt_connectin(struct socket *so, const struct sockaddr *remote) { - if (so->so_filt == NULL) return 0; - - struct socket_filter_entry *entry; - int unlocked = 0; - int error = 0; - + if (so->so_filt == NULL) + return (0); + + struct socket_filter_entry *entry; + int unlocked = 0; + int error = 0; + lck_rw_lock_shared(sock_filter_lock); for (entry = so->so_filt; entry && error == 0; - entry = entry->sfe_next_onsocket) { - if ((entry->sfe_flags & SFEF_ATTACHED) - && entry->sfe_filter->sf_filter.sf_connect_in) { - // Retain the filter entry and release the socket filter lock + entry = entry->sfe_next_onsocket) { + if ((entry->sfe_flags & SFEF_ATTACHED) && + entry->sfe_filter->sf_filter.sf_connect_in) { + /* + * Retain the filter entry and release + * the socket filter lock + */ sflt_entry_retain(entry); lck_rw_unlock_shared(sock_filter_lock); - - // If the socket isn't already unlocked, unlock it + + /* If the socket isn't already unlocked, unlock it */ if (unlocked == 0) { socket_unlock(so, 0); unlocked = 1; } - - // Call the filter - error = entry->sfe_filter->sf_filter. - sf_connect_in(entry->sfe_cookie, so, remote); - - // Take the socket filter lock again and release the entry + + /* Call the filter */ + error = entry->sfe_filter->sf_filter.sf_connect_in( + entry->sfe_cookie, so, remote); + + /* + * Take the socket filter lock again + * and release the entry + */ lck_rw_lock_shared(sock_filter_lock); sflt_entry_release(entry); } @@ -829,41 +887,46 @@ sflt_connectin( if (unlocked) { socket_lock(so, 0); } - - return error; + + return (error); } __private_extern__ int -sflt_connectout( - struct socket *so, - const struct sockaddr *nam) +sflt_connectout(struct socket *so, const struct sockaddr *nam) { - if (so->so_filt == NULL) return 0; - - struct socket_filter_entry *entry; - int unlocked = 0; - int error = 0; - + if (so->so_filt == NULL) + return (0); + + struct socket_filter_entry *entry; + int unlocked = 0; + int error = 0; + lck_rw_lock_shared(sock_filter_lock); for (entry = so->so_filt; entry && error == 0; - entry = entry->sfe_next_onsocket) { - if ((entry->sfe_flags & SFEF_ATTACHED) - && entry->sfe_filter->sf_filter.sf_connect_out) { - // Retain the filter entry and release the socket filter lock + entry = entry->sfe_next_onsocket) { + if ((entry->sfe_flags & SFEF_ATTACHED) && + entry->sfe_filter->sf_filter.sf_connect_out) { + /* + * Retain the filter entry and release + * the socket filter lock + */ sflt_entry_retain(entry); lck_rw_unlock_shared(sock_filter_lock); - - // If the socket isn't already unlocked, unlock it + + /* If the socket isn't already unlocked, unlock it */ if (unlocked == 0) { socket_unlock(so, 0); unlocked = 1; } - - // Call the filter - error = entry->sfe_filter->sf_filter. - sf_connect_out(entry->sfe_cookie, so, nam); - - // Take the socket filter lock again and release the entry + + /* Call the filter */ + error = entry->sfe_filter->sf_filter.sf_connect_out( + entry->sfe_cookie, so, nam); + + /* + * Take the socket filter lock again + * and release the entry + */ lck_rw_lock_shared(sock_filter_lock); sflt_entry_release(entry); } @@ -873,41 +936,101 @@ sflt_connectout( if (unlocked) { socket_lock(so, 0); } - - return error; + + return (error); +} + +__private_extern__ int +sflt_connectxout(struct socket *so, struct sockaddr_list **dst_sl0) +{ + char buf[SOCK_MAXADDRLEN]; + struct sockaddr_list *dst_sl; + struct sockaddr_entry *se, *tse; + int modified = 0; + int error = 0; + + if (so->so_filt == NULL) + return (0); + + /* make a copy as sflt_connectout() releases socket lock */ + dst_sl = sockaddrlist_dup(*dst_sl0, M_WAITOK); + if (dst_sl == NULL) + return (ENOBUFS); + + /* + * Hmm; we don't yet have a connectx socket filter callback, + * so the closest thing to do is to probably call sflt_connectout() + * as many times as there are addresses in the list, and bail + * as soon as we get an error. + */ + TAILQ_FOREACH_SAFE(se, &dst_sl->sl_head, se_link, tse) { + int sa_len = se->se_addr->sa_len; + + /* remember the original address */ + bzero(buf, sizeof (buf)); + bcopy(se->se_addr, buf, sa_len); + + VERIFY(se->se_addr != NULL); + error = sflt_connectout(so, se->se_addr); + if (error != 0) + break; + + /* see if the address was modified */ + if (bcmp(se->se_addr, buf, sa_len) != 0) + modified = 1; + } + + if (error != 0 || !modified) { + /* leave the original as is */ + sockaddrlist_free(dst_sl); + } else { + /* + * At least one address was modified and there were no errors; + * ditch the original and return the modified list. + */ + sockaddrlist_free(*dst_sl0); + *dst_sl0 = dst_sl; + } + + return (error); } __private_extern__ int -sflt_setsockopt( - struct socket *so, - struct sockopt *sopt) +sflt_setsockopt(struct socket *so, struct sockopt *sopt) { - if (so->so_filt == NULL) return 0; - - struct socket_filter_entry *entry; - int unlocked = 0; - int error = 0; - + if (so->so_filt == NULL) + return (0); + + struct socket_filter_entry *entry; + int unlocked = 0; + int error = 0; + lck_rw_lock_shared(sock_filter_lock); for (entry = so->so_filt; entry && error == 0; - entry = entry->sfe_next_onsocket) { - if ((entry->sfe_flags & SFEF_ATTACHED) - && entry->sfe_filter->sf_filter.sf_setoption) { - // Retain the filter entry and release the socket filter lock + entry = entry->sfe_next_onsocket) { + if ((entry->sfe_flags & SFEF_ATTACHED) && + entry->sfe_filter->sf_filter.sf_setoption) { + /* + * Retain the filter entry and release + * the socket filter lock + */ sflt_entry_retain(entry); lck_rw_unlock_shared(sock_filter_lock); - - // If the socket isn't already unlocked, unlock it + + /* If the socket isn't already unlocked, unlock it */ if (unlocked == 0) { socket_unlock(so, 0); unlocked = 1; } - - // Call the filter - error = entry->sfe_filter->sf_filter. - sf_setoption(entry->sfe_cookie, so, sopt); - - // Take the socket filter lock again and release the entry + + /* Call the filter */ + error = entry->sfe_filter->sf_filter.sf_setoption( + entry->sfe_cookie, so, sopt); + + /* + * Take the socket filter lock again + * and release the entry + */ lck_rw_lock_shared(sock_filter_lock); sflt_entry_release(entry); } @@ -917,41 +1040,46 @@ sflt_setsockopt( if (unlocked) { socket_lock(so, 0); } - - return error; + + return (error); } __private_extern__ int -sflt_getsockopt( - struct socket *so, - struct sockopt *sopt) +sflt_getsockopt(struct socket *so, struct sockopt *sopt) { - if (so->so_filt == NULL) return 0; - - struct socket_filter_entry *entry; - int unlocked = 0; - int error = 0; - + if (so->so_filt == NULL) + return (0); + + struct socket_filter_entry *entry; + int unlocked = 0; + int error = 0; + lck_rw_lock_shared(sock_filter_lock); for (entry = so->so_filt; entry && error == 0; - entry = entry->sfe_next_onsocket) { - if ((entry->sfe_flags & SFEF_ATTACHED) - && entry->sfe_filter->sf_filter.sf_getoption) { - // Retain the filter entry and release the socket filter lock + entry = entry->sfe_next_onsocket) { + if ((entry->sfe_flags & SFEF_ATTACHED) && + entry->sfe_filter->sf_filter.sf_getoption) { + /* + * Retain the filter entry and release + * the socket filter lock + */ sflt_entry_retain(entry); lck_rw_unlock_shared(sock_filter_lock); - - // If the socket isn't already unlocked, unlock it + + /* If the socket isn't already unlocked, unlock it */ if (unlocked == 0) { socket_unlock(so, 0); unlocked = 1; } - - // Call the filter - error = entry->sfe_filter->sf_filter. - sf_getoption(entry->sfe_cookie, so, sopt); - - // Take the socket filter lock again and release the entry + + /* Call the filter */ + error = entry->sfe_filter->sf_filter.sf_getoption( + entry->sfe_cookie, so, sopt); + + /* + * Take the socket filter lock again + * and release the entry + */ lck_rw_lock_shared(sock_filter_lock); sflt_entry_release(entry); } @@ -961,49 +1089,56 @@ sflt_getsockopt( if (unlocked) { socket_lock(so, 0); } - - return error; + + return (error); } __private_extern__ int -sflt_data_out( - struct socket *so, - const struct sockaddr *to, - mbuf_t *data, - mbuf_t *control, - sflt_data_flag_t flags) +sflt_data_out(struct socket *so, const struct sockaddr *to, mbuf_t *data, + mbuf_t *control, sflt_data_flag_t flags) { - if (so->so_filt == NULL) return 0; - - struct socket_filter_entry *entry; - int unlocked = 0; - int setsendthread = 0; - int error = 0; - + if (so->so_filt == NULL) + return (0); + + struct socket_filter_entry *entry; + int unlocked = 0; + int setsendthread = 0; + int error = 0; + lck_rw_lock_shared(sock_filter_lock); for (entry = so->so_filt; entry && error == 0; - entry = entry->sfe_next_onsocket) { - if ((entry->sfe_flags & SFEF_ATTACHED) - && entry->sfe_filter->sf_filter.sf_data_out) { - // Retain the filter entry and release the socket filter lock + entry = entry->sfe_next_onsocket) { + /* skip if this is a subflow socket */ + if (so->so_flags & SOF_MP_SUBFLOW) + continue; + if ((entry->sfe_flags & SFEF_ATTACHED) && + entry->sfe_filter->sf_filter.sf_data_out) { + /* + * Retain the filter entry and + * release the socket filter lock + */ sflt_entry_retain(entry); lck_rw_unlock_shared(sock_filter_lock); - - // If the socket isn't already unlocked, unlock it + + /* If the socket isn't already unlocked, unlock it */ if (unlocked == 0) { if (so->so_send_filt_thread == NULL) { setsendthread = 1; - so->so_send_filt_thread = current_thread(); + so->so_send_filt_thread = + current_thread(); } socket_unlock(so, 0); unlocked = 1; } - - // Call the filter - error = entry->sfe_filter->sf_filter. - sf_data_out(entry->sfe_cookie, so, to, data, control, flags); - - // Take the socket filter lock again and release the entry + + /* Call the filter */ + error = entry->sfe_filter->sf_filter.sf_data_out( + entry->sfe_cookie, so, to, data, control, flags); + + /* + * Take the socket filter lock again + * and release the entry + */ lck_rw_lock_shared(sock_filter_lock); sflt_entry_release(entry); } @@ -1012,99 +1147,101 @@ sflt_data_out( if (unlocked) { socket_lock(so, 0); - if (setsendthread) so->so_send_filt_thread = NULL; + if (setsendthread) + so->so_send_filt_thread = NULL; } - - return error; + + return (error); } __private_extern__ int -sflt_data_in( - struct socket *so, - const struct sockaddr *from, - mbuf_t *data, - mbuf_t *control, - sflt_data_flag_t flags) +sflt_data_in(struct socket *so, const struct sockaddr *from, mbuf_t *data, + mbuf_t *control, sflt_data_flag_t flags) { - if (so->so_filt == NULL) return 0; - - struct socket_filter_entry *entry; - int error = 0; - int unlocked = 0; - + if (so->so_filt == NULL) + return (0); + + struct socket_filter_entry *entry; + int error = 0; + int unlocked = 0; + lck_rw_lock_shared(sock_filter_lock); - + for (entry = so->so_filt; entry && (error == 0); - entry = entry->sfe_next_onsocket) { + entry = entry->sfe_next_onsocket) { + /* skip if this is a subflow socket */ + if (so->so_flags & SOF_MP_SUBFLOW) + continue; if ((entry->sfe_flags & SFEF_ATTACHED) && - entry->sfe_filter->sf_filter.sf_data_in) { - // Retain the filter entry and release the socket filter lock + entry->sfe_filter->sf_filter.sf_data_in) { + /* + * Retain the filter entry and + * release the socket filter lock + */ sflt_entry_retain(entry); lck_rw_unlock_shared(sock_filter_lock); - - // If the socket isn't already unlocked, unlock it + + /* If the socket isn't already unlocked, unlock it */ if (unlocked == 0) { unlocked = 1; socket_unlock(so, 0); } - - // Call the filter + + /* Call the filter */ error = entry->sfe_filter->sf_filter.sf_data_in( - entry->sfe_cookie, so, from, data, control, flags); - - // Take the socket filter lock again and release the entry + entry->sfe_cookie, so, from, data, control, flags); + + /* + * Take the socket filter lock again + * and release the entry + */ lck_rw_lock_shared(sock_filter_lock); sflt_entry_release(entry); } } lck_rw_unlock_shared(sock_filter_lock); - + if (unlocked) { socket_lock(so, 0); } - - return error; + + return (error); } #pragma mark -- KPI -- errno_t -sflt_attach( - socket_t socket, - sflt_handle handle) +sflt_attach(socket_t socket, sflt_handle handle) { socket_lock(socket, 1); errno_t result = sflt_attach_internal(socket, handle); socket_unlock(socket, 1); - return result; + return (result); } errno_t -sflt_detach( - socket_t socket, - sflt_handle handle) +sflt_detach(socket_t socket, sflt_handle handle) { - struct socket_filter_entry *entry; + struct socket_filter_entry *entry; errno_t result = 0; - + if (socket == NULL || handle == 0) - return EINVAL; - + return (EINVAL); + lck_rw_lock_exclusive(sock_filter_lock); - for (entry = socket->so_filt; entry; - entry = entry->sfe_next_onsocket) { + for (entry = socket->so_filt; entry; entry = entry->sfe_next_onsocket) { if (entry->sfe_filter->sf_filter.sf_handle == handle && - (entry->sfe_flags & SFEF_ATTACHED) != 0) { + (entry->sfe_flags & SFEF_ATTACHED) != 0) { break; } } - + if (entry != NULL) { sflt_detach_locked(entry); } lck_rw_unlock_exclusive(sock_filter_lock); - - return result; + + return (result); } struct solist { @@ -1113,11 +1250,8 @@ struct solist { }; errno_t -sflt_register( - const struct sflt_filter *filter, - int domain, - int type, - int protocol) +sflt_register(const struct sflt_filter *filter, int domain, int type, + int protocol) { struct socket_filter *sock_filt = NULL; struct socket_filter *match = NULL; @@ -1129,17 +1263,17 @@ sflt_register( struct solist *solisthead = NULL, *solist = NULL; if (pr == NULL) - return ENOENT; + return (ENOENT); if (filter->sf_attach == NULL || filter->sf_detach == NULL || filter->sf_handle == 0 || filter->sf_name == NULL) - return EINVAL; + return (EINVAL); /* Allocate the socket filter */ MALLOC(sock_filt, struct socket_filter *, sizeof (*sock_filt), M_IFADDR, M_WAITOK); if (sock_filt == NULL) { - return ENOBUFS; + return (ENOBUFS); } bzero(sock_filt, sizeof (*sock_filt)); @@ -1169,7 +1303,7 @@ sflt_register( break; } } - + /* Add the entry only if there was no existing entry */ if (match == NULL) { TAILQ_INSERT_TAIL(&sock_filter_head, sock_filt, sf_global_next); @@ -1184,53 +1318,55 @@ sflt_register( if (match != NULL) { FREE(sock_filt, M_IFADDR); - return EEXIST; + return (EEXIST); } if (!(filter->sf_flags & SFLT_EXTENDED_REGISTRY)) - return error; + return (error); /* * Setup the filter on the TCP and UDP sockets already created. */ -#define SOLIST_ADD(_so) do { \ +#define SOLIST_ADD(_so) do { \ solist->next = solisthead; \ sock_retain((_so)); \ solist->so = (_so); \ solisthead = solist; \ } while (0) if (protocol == IPPROTO_TCP) { - lck_rw_lock_shared(tcbinfo.mtx); - LIST_FOREACH(inp, tcbinfo.listhead, inp_list) { + lck_rw_lock_shared(tcbinfo.ipi_lock); + LIST_FOREACH(inp, tcbinfo.ipi_listhead, inp_list) { so = inp->inp_socket; - if (so == NULL || so->so_state & SS_DEFUNCT || - so->so_state & SS_NOFDREF || - !INP_CHECK_SOCKAF(so, domain) || - !INP_CHECK_SOCKTYPE(so, type)) + if (so == NULL || (so->so_state & SS_DEFUNCT) || + (!(so->so_flags & SOF_MP_SUBFLOW) && + (so->so_state & SS_NOFDREF)) || + !SOCK_CHECK_DOM(so, domain) || + !SOCK_CHECK_TYPE(so, type)) continue; - MALLOC(solist, struct solist *, sizeof(*solist), + MALLOC(solist, struct solist *, sizeof (*solist), M_IFADDR, M_NOWAIT); if (!solist) continue; SOLIST_ADD(so); } - lck_rw_done(tcbinfo.mtx); + lck_rw_done(tcbinfo.ipi_lock); } else if (protocol == IPPROTO_UDP) { - lck_rw_lock_shared(udbinfo.mtx); - LIST_FOREACH(inp, udbinfo.listhead, inp_list) { + lck_rw_lock_shared(udbinfo.ipi_lock); + LIST_FOREACH(inp, udbinfo.ipi_listhead, inp_list) { so = inp->inp_socket; - if (so == NULL || so->so_state & SS_DEFUNCT || - so->so_state & SS_NOFDREF || - !INP_CHECK_SOCKAF(so, domain) || - !INP_CHECK_SOCKTYPE(so, type)) + if (so == NULL || (so->so_state & SS_DEFUNCT) || + (!(so->so_flags & SOF_MP_SUBFLOW) && + (so->so_state & SS_NOFDREF)) || + !SOCK_CHECK_DOM(so, domain) || + !SOCK_CHECK_TYPE(so, type)) continue; - MALLOC(solist, struct solist *, sizeof(*solist), + MALLOC(solist, struct solist *, sizeof (*solist), M_IFADDR, M_NOWAIT); if (!solist) continue; SOLIST_ADD(so); } - lck_rw_done(udbinfo.mtx); + lck_rw_done(udbinfo.ipi_lock); } /* XXX it's possible to walk the raw socket list as well */ #undef SOLIST_ADD @@ -1270,153 +1406,150 @@ sflt_register( FREE(solist, M_IFADDR); } - return error; + return (error); } errno_t -sflt_unregister( - sflt_handle handle) +sflt_unregister(sflt_handle handle) { struct socket_filter *filter; lck_rw_lock_exclusive(sock_filter_lock); - + /* Find the entry by the handle */ TAILQ_FOREACH(filter, &sock_filter_head, sf_global_next) { if (filter->sf_filter.sf_handle == handle) break; } - + if (filter) { - // Remove it from the global list + /* Remove it from the global list */ TAILQ_REMOVE(&sock_filter_head, filter, sf_global_next); - - // Remove it from the protosw list + + /* Remove it from the protosw list */ if ((filter->sf_filter.sf_flags & SFLT_GLOBAL) != 0) { - TAILQ_REMOVE(&filter->sf_proto->pr_filter_head, filter, sf_protosw_next); + TAILQ_REMOVE(&filter->sf_proto->pr_filter_head, + filter, sf_protosw_next); } - - // Detach from any sockets + + /* Detach from any sockets */ struct socket_filter_entry *entry = NULL; - - for (entry = filter->sf_entry_head; entry; entry = entry->sfe_next_onfilter) { + + for (entry = filter->sf_entry_head; entry; + entry = entry->sfe_next_onfilter) { sflt_detach_locked(entry); } - - // Release the filter + + /* Release the filter */ sflt_release_locked(filter); } - + lck_rw_unlock_exclusive(sock_filter_lock); - + if (filter == NULL) - return ENOENT; - - return 0; + return (ENOENT); + + return (0); } errno_t -sock_inject_data_in( - socket_t so, - const struct sockaddr* from, - mbuf_t data, - mbuf_t control, - sflt_data_flag_t flags) +sock_inject_data_in(socket_t so, const struct sockaddr *from, mbuf_t data, + mbuf_t control, sflt_data_flag_t flags) { int error = 0; - if (so == NULL || data == NULL) return EINVAL; - + + if (so == NULL || data == NULL) + return (EINVAL); + if (flags & sock_data_filt_flag_oob) { - return ENOTSUP; + return (ENOTSUP); } - + socket_lock(so, 1); - + + /* reject if this is a subflow socket */ + if (so->so_flags & SOF_MP_SUBFLOW) { + error = ENOTSUP; + goto done; + } + if (from) { - if (sbappendaddr(&so->so_rcv, (struct sockaddr*)(uintptr_t)from, data, - control, NULL)) + if (sbappendaddr(&so->so_rcv, + (struct sockaddr *)(uintptr_t)from, data, control, NULL)) sorwakeup(so); goto done; } - + if (control) { if (sbappendcontrol(&so->so_rcv, data, control, NULL)) sorwakeup(so); goto done; } - + if (flags & sock_data_filt_flag_record) { if (control || from) { error = EINVAL; goto done; } - if (sbappendrecord(&so->so_rcv, (struct mbuf*)data)) + if (sbappendrecord(&so->so_rcv, (struct mbuf *)data)) sorwakeup(so); goto done; } - + if (sbappend(&so->so_rcv, data)) sorwakeup(so); done: socket_unlock(so, 1); - return error; + return (error); } errno_t -sock_inject_data_out( - socket_t so, - const struct sockaddr* to, - mbuf_t data, - mbuf_t control, - sflt_data_flag_t flags) +sock_inject_data_out(socket_t so, const struct sockaddr *to, mbuf_t data, + mbuf_t control, sflt_data_flag_t flags) { - int sosendflags = 0; - if (flags & sock_data_filt_flag_oob) sosendflags = MSG_OOB; - return sosend(so, (struct sockaddr*)(uintptr_t)to, NULL, - data, control, sosendflags); + int sosendflags = 0; + + /* reject if this is a subflow socket */ + if (so->so_flags & SOF_MP_SUBFLOW) + return (ENOTSUP); + + if (flags & sock_data_filt_flag_oob) + sosendflags = MSG_OOB; + return (sosend(so, (struct sockaddr *)(uintptr_t)to, NULL, + data, control, sosendflags)); } sockopt_dir -sockopt_direction( - sockopt_t sopt) +sockopt_direction(sockopt_t sopt) { - return (sopt->sopt_dir == SOPT_GET) ? sockopt_get : sockopt_set; + return ((sopt->sopt_dir == SOPT_GET) ? sockopt_get : sockopt_set); } int -sockopt_level( - sockopt_t sopt) +sockopt_level(sockopt_t sopt) { - return sopt->sopt_level; + return (sopt->sopt_level); } int -sockopt_name( - sockopt_t sopt) +sockopt_name(sockopt_t sopt) { - return sopt->sopt_name; + return (sopt->sopt_name); } size_t -sockopt_valsize( - sockopt_t sopt) +sockopt_valsize(sockopt_t sopt) { - return sopt->sopt_valsize; + return (sopt->sopt_valsize); } errno_t -sockopt_copyin( - sockopt_t sopt, - void *data, - size_t len) +sockopt_copyin(sockopt_t sopt, void *data, size_t len) { - return sooptcopyin(sopt, data, len, len); + return (sooptcopyin(sopt, data, len, len)); } errno_t -sockopt_copyout( - sockopt_t sopt, - void *data, - size_t len) +sockopt_copyout(sockopt_t sopt, void *data, size_t len) { - return sooptcopyout(sopt, data, len); + return (sooptcopyout(sopt, data, len)); } diff --git a/bsd/kern/mach_fat.c b/bsd/kern/mach_fat.c index e2fd1e350..acac3b819 100644 --- a/bsd/kern/mach_fat.c +++ b/bsd/kern/mach_fat.c @@ -40,7 +40,7 @@ #include /********************************************************************** - * Routine: fatfile_getarch2() + * Routine: fatfile_getarch() * * Function: Locate the architecture-dependant contents of a fat * file that match this CPU. @@ -57,7 +57,7 @@ * KERN_FAILURE: No valid architecture found. **********************************************************************/ static load_return_t -fatfile_getarch2( +fatfile_getarch( #if 0 struct vnode *vp, #else @@ -121,12 +121,6 @@ fatfile_getarch2( if (size == 0) return(LOAD_BADMACHO); - /* - * Ignore LIB64 flag so that binary slices with the flag set - * don't choke in grade_binary. - */ - mask_bits |= CPU_SUBTYPE_LIB64; - /* * Scan the fat_arch's looking for the best one. */ addr = data_ptr; @@ -134,29 +128,20 @@ fatfile_getarch2( best_grade = 0; arch = (struct fat_arch *) (addr + sizeof(struct fat_header)); for (; nfat_arch-- > 0; arch++) { - - /* - * Collect flags from both cputype and cpusubtype - */ - testtype = OSSwapBigToHostInt32(arch->cputype) | - (OSSwapBigToHostInt32(arch->cpusubtype) & - CPU_SUBTYPE_MASK); - testsubtype = OSSwapBigToHostInt32(arch->cpusubtype) - & ~CPU_SUBTYPE_MASK; + testtype = OSSwapBigToHostInt32(arch->cputype); + testsubtype = OSSwapBigToHostInt32(arch->cpusubtype) & ~CPU_SUBTYPE_MASK; /* * Check to see if right cpu type. */ - if((testtype & ~mask_bits) != req_cpu_type) { + if((testtype & ~mask_bits) != (req_cpu_type & ~mask_bits)) { continue; } /* * Get the grade of the cpu subtype (without feature flags) */ - grade = grade_binary( - (testtype & ~CPU_SUBTYPE_LIB64), - testsubtype); + grade = grade_binary(testtype, testsubtype); /* * Remember it if it's the best we've seen. @@ -200,36 +185,11 @@ fatfile_getarch_affinity( struct fat_arch *archret, int affinity __unused) { - /* - * Ignore all architectural bits when determining if an image - * in a fat file should be skipped or graded. - */ - return fatfile_getarch2(vp, data_ptr, cpu_type(), - CPU_ARCH_MASK, archret); -} - -/********************************************************************** - * Routine: fatfile_getarch() - * - * Function: Locate the architecture-dependant contents of a fat - * file that match this CPU. - * - * Args: vp: The vnode for the fat file. - * header: A pointer to the fat file header. - * archret (out): Pointer to fat_arch structure to hold - * the results. - * - * Returns: KERN_SUCCESS: Valid architecture found. - * KERN_FAILURE: No valid architecture found. - **********************************************************************/ -load_return_t -fatfile_getarch( - struct vnode *vp, - vm_offset_t data_ptr, - struct fat_arch *archret) -{ - return fatfile_getarch2(vp, data_ptr, cpu_type(), - CPU_SUBTYPE_LIB64, archret); + /* + * Ignore all architectural bits when determining if an image + * in a fat file should be skipped or graded. + */ + return fatfile_getarch(vp, data_ptr, cpu_type(), CPU_ARCH_MASK, archret); } /********************************************************************** @@ -254,7 +214,6 @@ fatfile_getarch_with_bits( vm_offset_t data_ptr, struct fat_arch *archret) { - return fatfile_getarch2(vp, data_ptr, archbits | cpu_type(), - CPU_SUBTYPE_LIB64, archret); + return fatfile_getarch(vp, data_ptr, archbits | (cpu_type() & ~CPU_ARCH_MASK), 0, archret); } diff --git a/bsd/kern/mach_fat.h b/bsd/kern/mach_fat.h index f05c3675b..df71ddc2a 100644 --- a/bsd/kern/mach_fat.h +++ b/bsd/kern/mach_fat.h @@ -36,8 +36,6 @@ load_return_t fatfile_getarch_affinity(struct vnode *vp, vm_offset_t data_ptr, struct fat_arch *archret, int affinity); -load_return_t fatfile_getarch(struct vnode *vp, vm_offset_t data_ptr, - struct fat_arch *archret); load_return_t fatfile_getarch_with_bits(struct vnode *vp, integer_t archbits, vm_offset_t data_ptr, struct fat_arch *archret); diff --git a/bsd/kern/mach_loader.c b/bsd/kern/mach_loader.c index 02b660be1..711729eeb 100644 --- a/bsd/kern/mach_loader.c +++ b/bsd/kern/mach_loader.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include /* vm_allocate() */ @@ -79,6 +80,7 @@ #include #include #include +#include /* for kIOReturnNotPrivileged */ /* * XXX vm/pmap.h should not treat these prototypes as MACH_KERNEL_PRIVATE @@ -127,6 +129,7 @@ parse_machfile( off_t macho_size, int depth, int64_t slide, + int64_t dyld_slide, load_result_t *result ); @@ -143,6 +146,13 @@ load_segment( load_result_t *result ); +static load_return_t +load_uuid( + struct uuid_command *uulp, + char *command_end, + load_result_t *result +); + static load_return_t load_code_signature( struct linkedit_data_command *lcp, @@ -159,7 +169,9 @@ set_code_unprotect( caddr_t addr, vm_map_t map, int64_t slide, - struct vnode *vp); + struct vnode *vp, + cpu_type_t cputype, + cpu_subtype_t cpusubtype); #endif static @@ -297,6 +309,7 @@ load_machfile( task_t task = current_task(); proc_t p = current_proc(); mach_vm_offset_t aslr_offset = 0; + mach_vm_offset_t dyld_aslr_offset = 0; kern_return_t kret; if (macho_size > file_size) { @@ -327,14 +340,16 @@ load_machfile( 0, vm_compute_max_offset((imgp->ip_flags & IMGPF_IS_64BIT)), TRUE); - } else map = new_map; #ifndef CONFIG_ENFORCE_SIGNED_CODE - /* This turns off faulting for executable pages, which allows to - * circumvent Code Signing Enforcement */ - if ( (header->flags & MH_ALLOW_STACK_EXECUTION) ) + /* This turns off faulting for executable pages, which allows + * to circumvent Code Signing Enforcement. The per process + * flag (CS_ENFORCEMENT) is not set yet, but we can use the + * global flag. + */ + if ( !cs_enforcement(NULL) && (header->flags & MH_ALLOW_STACK_EXECUTION) ) vm_map_disable_NX(map); #endif @@ -344,12 +359,20 @@ load_machfile( vm_map_disallow_data_exec(map); /* - * Compute a random offset for ASLR. + * Compute a random offset for ASLR, and an independent random offset for dyld. */ if (!(imgp->ip_flags & IMGPF_DISABLE_ASLR)) { + uint64_t max_slide_pages; + + max_slide_pages = vm_map_get_max_aslr_slide_pages(map); + aslr_offset = random(); - aslr_offset %= 1 << ((imgp->ip_flags & IMGPF_IS_64BIT) ? 16 : 8); - aslr_offset <<= PAGE_SHIFT; + aslr_offset %= max_slide_pages; + aslr_offset <<= vm_map_page_shift(map); + + dyld_aslr_offset = random(); + dyld_aslr_offset %= max_slide_pages; + dyld_aslr_offset <<= vm_map_page_shift(map); } if (!result) @@ -358,7 +381,7 @@ load_machfile( *result = load_result_null; lret = parse_machfile(vp, map, thread, header, file_offset, macho_size, - 0, (int64_t)aslr_offset, result); + 0, (int64_t)aslr_offset, (int64_t)dyld_aslr_offset, result); if (lret != LOAD_SUCCESS) { if (create_map) { @@ -367,19 +390,6 @@ load_machfile( return(lret); } -#if CONFIG_EMBEDDED - /* - * Check to see if the page zero is enforced by the map->min_offset. - */ - if (vm_map_has_hard_pagezero(map, 0x1000) == FALSE) { - if (create_map) { - vm_map_deallocate(map); /* will lose pmap reference too */ - } - printf("Cannot enforce a hard page-zero for %s\n", imgp->ip_strings); - psignal(vfs_context_proc(imgp->ip_vfs_context), SIGKILL); - return (LOAD_BADMACHO); - } -#else /* * For 64-bit users, check for presence of a 4GB page zero * which will enable the kernel to share the user's address space @@ -390,7 +400,6 @@ load_machfile( vm_map_has_4GB_pagezero(map)) { vm_map_set_4GB_pagezero(map); } -#endif /* * Commit to new map. * @@ -464,13 +473,13 @@ parse_machfile( off_t macho_size, int depth, int64_t aslr_offset, + int64_t dyld_aslr_offset, load_result_t *result ) { uint32_t ncmds; struct load_command *lcp; struct dylinker_command *dlp = 0; - struct uuid_command *uulp = 0; integer_t dlarchbits = 0; void * control; load_return_t ret = LOAD_SUCCESS; @@ -505,7 +514,7 @@ parse_machfile( /* * Check to see if right machine type. */ - if (((cpu_type_t)(header->cputype & ~CPU_ARCH_MASK) != cpu_type()) || + if (((cpu_type_t)(header->cputype & ~CPU_ARCH_MASK) != (cpu_type() & ~CPU_ARCH_MASK)) || !grade_binary(header->cputype, header->cpusubtype & ~CPU_SUBTYPE_MASK)) return(LOAD_BADARCH); @@ -583,9 +592,14 @@ parse_machfile( slide = aslr_offset; } - /* - * Scan through the commands, processing each one as necessary. + /* + * Scan through the commands, processing each one as necessary. + * We parse in three passes through the headers: + * 1: thread state, uuid, code signature + * 2: segments + * 3: dyld, encryption, check entry point */ + for (pass = 1; pass <= 3; pass++) { /* @@ -635,18 +649,50 @@ parse_machfile( */ switch(lcp->cmd) { case LC_SEGMENT: + if (pass != 2) + break; + + if (abi64) { + /* + * Having an LC_SEGMENT command for the + * wrong ABI is invalid + */ + ret = LOAD_BADMACHO; + break; + } + + ret = load_segment(lcp, + header->filetype, + control, + file_offset, + macho_size, + vp, + map, + slide, + result); + break; case LC_SEGMENT_64: if (pass != 2) break; + + if (!abi64) { + /* + * Having an LC_SEGMENT_64 command for the + * wrong ABI is invalid + */ + ret = LOAD_BADMACHO; + break; + } + ret = load_segment(lcp, - header->filetype, - control, - file_offset, - macho_size, - vp, - map, - slide, - result); + header->filetype, + control, + file_offset, + macho_size, + vp, + map, + slide, + result); break; case LC_UNIXTHREAD: if (pass != 1) @@ -680,8 +726,9 @@ parse_machfile( break; case LC_UUID: if (pass == 1 && depth == 1) { - uulp = (struct uuid_command *)lcp; - memcpy(&result->uuid[0], &uulp->uuid[0], sizeof(result->uuid)); + ret = load_uuid((struct uuid_command *) lcp, + (char *)addr + mach_header_sz + header->sizeofcmds, + result); } break; case LC_CODE_SIGNATURE: @@ -710,18 +757,29 @@ parse_machfile( break; #if CONFIG_CODE_DECRYPTION case LC_ENCRYPTION_INFO: + case LC_ENCRYPTION_INFO_64: if (pass != 3) break; ret = set_code_unprotect( (struct encryption_info_command *) lcp, - addr, map, slide, vp); + addr, map, slide, vp, + header->cputype, header->cpusubtype); if (ret != LOAD_SUCCESS) { printf("proc %d: set_code_unprotect() error %d " "for file \"%s\"\n", p->p_pid, ret, vp->v_name); - /* Don't let the app run if it's + /* + * Don't let the app run if it's * encrypted but we failed to set up the - * decrypter */ + * decrypter. If the keys are missing it will + * return LOAD_DECRYPTFAIL. + */ + if (ret == LOAD_DECRYPTFAIL) { + /* failed to load due to missing FP keys */ + proc_lock(p); + p->p_lflag |= P_LTERM_DECRYPTFAIL; + proc_unlock(p); + } psignal(p, SIGKILL); } break; @@ -754,9 +812,13 @@ parse_machfile( } if ((ret == LOAD_SUCCESS) && (dlp != 0)) { - /* load the dylinker, and always slide it by the ASLR - * offset regardless of PIE */ - ret = load_dylinker(dlp, dlarchbits, map, thread, depth, aslr_offset, result); + /* + * load the dylinker, and slide it by the independent DYLD ASLR + * offset regardless of the PIE-ness of the main binary. + */ + + ret = load_dylinker(dlp, dlarchbits, map, thread, depth, + dyld_aslr_offset, result); } if((ret == LOAD_SUCCESS) && (depth == 1)) { @@ -910,6 +972,10 @@ load_segment( seg_size = round_page_64(scp->vmsize); map_size = round_page_64(scp->filesize); map_addr = trunc_page_64(scp->vmaddr); /* JVXXX note that in XNU TOT this is round instead of trunc for 64 bits */ + + seg_size = vm_map_round_page(seg_size, vm_map_page_mask(map)); + map_size = vm_map_round_page(map_size, vm_map_page_mask(map)); + if (seg_size == 0) return (KERN_SUCCESS); if (map_addr == 0 && @@ -925,9 +991,6 @@ load_segment( */ seg_size += slide; slide = 0; -#if CONFIG_EMBEDDED - prohibit_pagezero_mapping = TRUE; -#endif /* XXX (4596982) this interferes with Rosetta, so limit to 64-bit tasks */ if (scp->cmd == LC_SEGMENT_64) { prohibit_pagezero_mapping = TRUE; @@ -973,8 +1036,9 @@ load_segment( VM_FLAGS_FIXED, control, map_offset, TRUE, initprot, maxprot, VM_INHERIT_DEFAULT); - if (ret != KERN_SUCCESS) + if (ret != KERN_SUCCESS) { return (LOAD_NOSPACE); + } /* * If the file didn't end on a page boundary, @@ -1044,7 +1108,28 @@ load_segment( return ret; } +static +load_return_t +load_uuid( + struct uuid_command *uulp, + char *command_end, + load_result_t *result +) +{ + /* + * We need to check the following for this command: + * - The command size should be atleast the size of struct uuid_command + * - The UUID part of the command should be completely within the mach-o header + */ + if ((uulp->cmdsize < sizeof(struct uuid_command)) || + (((char *)uulp + sizeof(struct uuid_command)) > command_end)) { + return (LOAD_BADMACHO); + } + + memcpy(&result->uuid[0], &uulp->uuid[0], sizeof(result->uuid)); + return (LOAD_SUCCESS); +} static load_return_t @@ -1371,7 +1456,7 @@ load_dylinker( */ ret = parse_machfile(vp, map, thread, header, file_offset, - macho_size, depth, slide, myresult); + macho_size, depth, slide, 0, myresult); /* * If it turned out something was in the way, then we'll take @@ -1392,7 +1477,8 @@ load_dylinker( * subsequent map attempt (with a slide) in "myresult" */ ret = parse_machfile(vp, VM_MAP_NULL, THREAD_NULL, header, - file_offset, macho_size, depth, 0 /* slide */, myresult); + file_offset, macho_size, depth, + 0 /* slide */, 0, myresult); if (ret != LOAD_SUCCESS) { goto out; @@ -1427,7 +1513,8 @@ load_dylinker( *myresult = load_result_null; ret = parse_machfile(vp, map, thread, header, - file_offset, macho_size, depth, slide_amount, myresult); + file_offset, macho_size, depth, + slide_amount, 0, myresult); if (ret) { goto out; @@ -1476,17 +1563,20 @@ load_code_signature( } blob = ubc_cs_blob_get(vp, cputype, -1); - if (blob != NULL) { - /* we already have a blob for this vnode and cputype */ - if (blob->csb_cpu_type == cputype && - blob->csb_base_offset == macho_offset && - blob->csb_mem_size == lcp->datasize) { - /* it matches the blob we want here: we're done */ - ret = LOAD_SUCCESS; - } else { - /* the blob has changed for this vnode: fail ! */ - ret = LOAD_BADMACHO; - } + if (blob != NULL && + blob->csb_cpu_type == cputype && + blob->csb_base_offset == macho_offset && + blob->csb_blob_offset == lcp->dataoff && + blob->csb_mem_size == lcp->datasize) { + /* + * we already have a blob for this vnode and cputype + * and its at the same offset in Mach-O. Optimize to + * not reload, revalidate, and compare the blob hashes. + * Security will not be compromised, but we might miss + * out on some messagetracer info about the differences + * in blob content. + */ + ret = LOAD_SUCCESS; goto out; } @@ -1517,6 +1607,7 @@ load_code_signature( cputype, macho_offset, addr, + lcp->dataoff, lcp->datasize)) { ret = LOAD_FAILURE; goto out; @@ -1553,7 +1644,9 @@ set_code_unprotect( caddr_t addr, vm_map_t map, int64_t slide, - struct vnode *vp) + struct vnode *vp, + cpu_type_t cputype, + cpu_subtype_t cpusubtype) { int result, len; pager_crypt_info_t crypt_info; @@ -1598,13 +1691,21 @@ set_code_unprotect( } /* set up decrypter first */ - kr=text_crypter_create(&crypt_info, cryptname, (void*)vpath); + crypt_file_data_t crypt_data = { + .filename = vpath, + .cputype = cputype, + .cpusubtype = cpusubtype}; + kr=text_crypter_create(&crypt_info, cryptname, (void*)&crypt_data); FREE_ZONE(vpath, MAXPATHLEN, M_NAMEI); if(kr) { printf("set_code_unprotect: unable to create decrypter %s, kr=%d\n", cryptname, kr); - return LOAD_RESOURCE; + if (kr == kIOReturnNotPrivileged) { + /* text encryption returned decryption failure */ + return(LOAD_DECRYPTFAIL); + }else + return LOAD_RESOURCE; } /* this is terrible, but we have to rescan the load commands to find the @@ -1737,7 +1838,7 @@ get_macho_vnode( } /* check access */ - if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_EXECUTE, ctx)) != 0) { + if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_EXECUTE | KAUTH_VNODE_READ_DATA, ctx)) != 0) { error = LOAD_PROTECT; goto bad1; } diff --git a/bsd/kern/mach_loader.h b/bsd/kern/mach_loader.h index ece41929e..295199d43 100644 --- a/bsd/kern/mach_loader.h +++ b/bsd/kern/mach_loader.h @@ -89,5 +89,6 @@ load_return_t load_machfile( #define LOAD_RESOURCE 7 /* resource allocation failure */ #define LOAD_ENOENT 8 /* resource not found */ #define LOAD_IOERROR 9 /* IO error */ +#define LOAD_DECRYPTFAIL 10 /* FP decrpty failure */ #endif /* _BSD_KERN_MACH_LOADER_H_ */ diff --git a/bsd/kern/mach_process.c b/bsd/kern/mach_process.c index 1a5cac88f..b0cda592a 100644 --- a/bsd/kern/mach_process.c +++ b/bsd/kern/mach_process.c @@ -83,6 +83,7 @@ #include #include #include +#include /* cs_allow_invalid() */ #include @@ -92,8 +93,6 @@ #include /* for task_resume() */ #include /* for thread_exception_return() */ -#include /* cs_allow_invalid() */ - #include /* XXX ken/bsd_kern.c - prototype should be in common header */ @@ -146,7 +145,7 @@ ptrace(struct proc *p, struct ptrace_args *uap, int32_t *retval) } if (uap->req == PT_FORCEQUOTA) { - if (is_suser()) { + if (kauth_cred_issuser(kauth_cred_get())) { OSBitOrAtomic(P_FORCEQUOTA, &t->p_flag); return (0); } else @@ -294,11 +293,17 @@ ptrace(struct proc *p, struct ptrace_args *uap, int32_t *retval) proc_unlock(t); pp = proc_find(t->p_oppid); - proc_reparentlocked(t, pp ? pp : initproc, 1, 0); - if (pp != PROC_NULL) + if (pp != PROC_NULL) { + proc_reparentlocked(t, pp, 1, 0); proc_rele(pp); + } else { + /* original parent exited while traced */ + proc_list_lock(); + t->p_listflag |= P_LIST_DEADPARENT; + proc_list_unlock(); + proc_reparentlocked(t, initproc, 1, 0); + } proc_lock(t); - } t->p_oppid = 0; @@ -313,7 +318,7 @@ ptrace(struct proc *p, struct ptrace_args *uap, int32_t *retval) * is resumed by adding NSIG to p_cursig. [see issig] */ proc_unlock(t); -#if NOTYET +#if CONFIG_MACF error = mac_proc_check_signal(p, t, SIGKILL); if (0 != error) goto resume; @@ -350,7 +355,7 @@ ptrace(struct proc *p, struct ptrace_args *uap, int32_t *retval) * set trace bit * we use sending SIGSTOP as a comparable security check. */ -#if NOTYET +#if CONFIG_MACF error = mac_proc_check_signal(p, t, SIGSTOP); if (0 != error) { goto out; @@ -365,7 +370,7 @@ ptrace(struct proc *p, struct ptrace_args *uap, int32_t *retval) * clear trace bit if on * we use sending SIGCONT as a comparable security check. */ -#if NOTYET +#if CONFIG_MACF error = mac_proc_check_signal(p, t, SIGCONT); if (0 != error) { goto out; diff --git a/bsd/kern/makesyscalls.sh b/bsd/kern/makesyscalls.sh index d585708d1..405c027d4 100755 --- a/bsd/kern/makesyscalls.sh +++ b/bsd/kern/makesyscalls.sh @@ -242,8 +242,9 @@ s/\$//g printf "void munge_wws(const void *, void *); \n" > sysarg printf "void munge_wwwsw(const void *, void *); \n" > sysarg printf "void munge_llllll(const void *, void *); \n" > sysarg + printf "void munge_ll(const void *, void *); \n" > sysarg printf "#else \n" > sysarg - printf "/* ARM does not need mungers for BSD system calls. */\n" > sysarg + printf "/* ARM does not need mungers for BSD system calls... */\n" > sysarg printf "#define munge_w NULL \n" > sysarg printf "#define munge_ww NULL \n" > sysarg printf "#define munge_www NULL \n" > sysarg @@ -274,6 +275,7 @@ s/\$//g printf "#define munge_wws NULL \n" > sysarg printf "#define munge_wwwsw NULL \n" > sysarg printf "#define munge_llllll NULL \n" > sysarg + printf "#define munge_ll NULL \n" > sysarg printf "#endif /* __arm__ */\n" > sysarg printf "\n" > sysarg printf "/* Active 64-bit user ABIs do not need munging */\n" > sysarg @@ -482,13 +484,9 @@ s/\$//g add_sysnames_entry = 1 add_sysheader_entry = 1 add_sysproto_entry = 1 - add_64bit_unsafe = 0 - add_64bit_fakesafe = 0 - add_resv = "0" - my_flags = "0" - if ($3 != "ALL" && $3 != "UALL") { + if ($3 != "ALL") { files_keyword_OK = 0 add_sysent_entry = 0 add_sysnames_entry = 0 @@ -511,21 +509,12 @@ s/\$//g add_sysproto_entry = 1 files_keyword_OK = 1 } - if (match($3, "[U]") != 0) { - add_64bit_unsafe = 1 - } - if (match($3, "[F]") != 0) { - add_64bit_fakesafe = 1 - } if (files_keyword_OK == 0) { printf "%s: line %d: unrecognized keyword %s\n", infile, NR, $2 exit 1 } } - else if ($3 == "UALL") { - add_64bit_unsafe = 1; - } parseline() @@ -550,29 +539,25 @@ s/\$//g # each argument consumes 64-bits. # see .../xnu/bsd/dev/ppc/munge.s for munge argument types. if (argtype[i] == "long") { - if (add_64bit_unsafe == 0) - ext_argtype[i] = "user_long_t"; + ext_argtype[i] = "user_long_t"; munge32 = munge32 "s" munge64 = munge64 "d" size32 += 4 } else if (argtype[i] == "u_long") { - if (add_64bit_unsafe == 0) - ext_argtype[i] = "user_ulong_t"; + ext_argtype[i] = "user_ulong_t"; munge32 = munge32 "w" munge64 = munge64 "d" size32 += 4 } else if (argtype[i] == "size_t") { - if (add_64bit_unsafe == 0) - ext_argtype[i] = "user_size_t"; + ext_argtype[i] = "user_size_t"; munge32 = munge32 "w" munge64 = munge64 "d" size32 += 4 } else if (argtype[i] == "ssize_t") { - if (add_64bit_unsafe == 0) - ext_argtype[i] = "user_ssize_t"; + ext_argtype[i] = "user_ssize_t"; munge32 = munge32 "s" munge64 = munge64 "d" size32 += 4 @@ -589,9 +574,8 @@ s/\$//g size32 += 4 } else if (argtype[i] == "caddr_t" || argtype[i] == "semun_t" || - match(argtype[i], "[\*]") != 0) { - if (add_64bit_unsafe == 0) - ext_argtype[i] = "user_addr_t"; + argtype[i] == "uuid_t" || match(argtype[i], "[\*]") != 0) { + ext_argtype[i] = "user_addr_t"; munge32 = munge32 "w" munge64 = munge64 "d" size32 += 4 @@ -602,7 +586,8 @@ s/\$//g argtype[i] == "socklen_t" || argtype[i] == "uint32_t" || argtype[i] == "int32_t" || argtype[i] == "sigset_t" || argtype[i] == "gid_t" || argtype[i] == "unsigned int" || argtype[i] == "mode_t" || argtype[i] == "key_t" || - argtype[i] == "mach_port_name_t" || argtype[i] == "au_asid_t") { + argtype[i] == "mach_port_name_t" || argtype[i] == "au_asid_t" || + argtype[i] == "associd_t" || argtype[i] == "connid_t") { munge32 = munge32 "w" munge64 = munge64 "d" size32 += 4 @@ -685,13 +670,10 @@ s/\$//g } } - if (add_64bit_unsafe == 1 && add_64bit_fakesafe == 0) - my_flags = "UNSAFE_64BIT"; - - printf("\t{%s, %s, %s, \(sy_call_t *\)%s, %s, %s, %s, %s},", - argssize, add_resv, my_flags, tempname, munge32, munge64, munge_ret, size32) > sysent - linesize = length(argssize) + length(add_resv) + length(my_flags) + length(tempname) + \ - length(munge32) + length(munge64) + length(munge_ret) + 28 + printf("\t{ \(sy_call_t *\)%s, %s, %s, %s, %s, %s},", + tempname, munge32, munge64, munge_ret, argssize, size32) > sysent + linesize = length(tempname) + length(munge32) + length(munge64) + \ + length(munge_ret) + length(argssize) + length(size32) + 28 align_comment(linesize, 88, sysent) printf("/* %d = %s%s*/\n", syscall_num, funcname, additional_comments) > sysent diff --git a/bsd/kern/mcache.c b/bsd/kern/mcache.c index f3570ae41..0ffea0719 100644 --- a/bsd/kern/mcache.c +++ b/bsd/kern/mcache.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006-2011 Apple Inc. All rights reserved. + * Copyright (c) 2006-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -72,7 +72,7 @@ /* Allocate extra in case we need to manually align the pointer */ #define MCACHE_ALLOC_SIZE \ - (sizeof (void *) + MCACHE_SIZE(ncpu) + CPU_CACHE_SIZE) + (sizeof (void *) + MCACHE_SIZE(ncpu) + CPU_CACHE_LINE_SIZE) #define MCACHE_CPU(c) \ (mcache_cpu_t *)((void *)((char *)(c) + MCACHE_SIZE(cpu_number()))) @@ -99,6 +99,7 @@ #define MCACHE_LOCK_TRY(l) lck_mtx_try_lock(l) static int ncpu; +static unsigned int cache_line_size; static lck_mtx_t *mcache_llock; static struct thread *mcache_llock_owner; static lck_attr_t *mcache_llock_attr; @@ -178,6 +179,7 @@ mcache_init(void) char name[32]; ncpu = ml_get_max_cpus(); + (void) mcache_cache_line_size(); /* prime it */ mcache_llock_grp_attr = lck_grp_attr_alloc_init(); mcache_llock_grp = lck_grp_alloc_init("mcache.list", @@ -210,6 +212,9 @@ mcache_init(void) mcache_reap_interval = 15 * hz; mcache_applyall(mcache_cache_bkt_enable); mcache_ready = 1; + + printf("mcache: %d CPU(s), %d bytes CPU cache line size\n", + ncpu, CPU_CACHE_LINE_SIZE); } /* @@ -221,6 +226,20 @@ mcache_getflags(void) return (mcache_flags); } +/* + * Return the CPU cache line size. + */ +__private_extern__ unsigned int +mcache_cache_line_size(void) +{ + if (cache_line_size == 0) { + ml_cpu_info_t cpu_info; + ml_cpu_get_info(&cpu_info); + cache_line_size = cpu_info.cache_line_size; + } + return (cache_line_size); +} + /* * Create a cache using the zone allocator as the backend slab allocator. * The caller may specify any alignment for the object; if it specifies 0 @@ -293,7 +312,7 @@ mcache_create_common(const char *name, size_t bufsize, size_t align, * is okay since we've allocated extra space for this. */ cp = (mcache_t *) - P2ROUNDUP((intptr_t)buf + sizeof (void *), CPU_CACHE_SIZE); + P2ROUNDUP((intptr_t)buf + sizeof (void *), CPU_CACHE_LINE_SIZE); pbuf = (void **)((intptr_t)cp - sizeof (void *)); *pbuf = buf; @@ -378,7 +397,7 @@ mcache_create_common(const char *name, size_t bufsize, size_t align, for (c = 0; c < ncpu; c++) { mcache_cpu_t *ccp = &cp->mc_cpu[c]; - VERIFY(IS_P2ALIGNED(ccp, CPU_CACHE_SIZE)); + VERIFY(IS_P2ALIGNED(ccp, CPU_CACHE_LINE_SIZE)); lck_mtx_init(&ccp->cc_lock, cp->mc_cpu_lock_grp, cp->mc_cpu_lock_attr); ccp->cc_objs = -1; @@ -1401,16 +1420,30 @@ mcache_dispatch(void (*func)(void *), void *arg) } __private_extern__ void -mcache_buffer_log(mcache_audit_t *mca, void *addr, mcache_t *cp) +mcache_buffer_log(mcache_audit_t *mca, void *addr, mcache_t *cp, + struct timeval *base_ts) { + struct timeval now, base = { 0, 0 }; + void *stack[MCACHE_STACK_DEPTH + 1]; + mca->mca_addr = addr; mca->mca_cache = cp; mca->mca_pthread = mca->mca_thread; mca->mca_thread = current_thread(); bcopy(mca->mca_stack, mca->mca_pstack, sizeof (mca->mca_pstack)); mca->mca_pdepth = mca->mca_depth; - bzero(mca->mca_stack, sizeof (mca->mca_stack)); - mca->mca_depth = OSBacktrace(mca->mca_stack, MCACHE_STACK_DEPTH); + bzero(stack, sizeof (stack)); + mca->mca_depth = OSBacktrace(stack, MCACHE_STACK_DEPTH + 1) - 1; + bcopy(&stack[1], mca->mca_stack, sizeof (mca->mca_pstack)); + + mca->mca_ptstamp = mca->mca_tstamp; + microuptime(&now); + if (base_ts != NULL) + base = *base_ts; + /* tstamp is in ms relative to base_ts */ + mca->mca_tstamp = ((now.tv_usec - base.tv_usec) / 1000); + if ((now.tv_sec - base.tv_sec) > 0) + mca->mca_tstamp += ((now.tv_sec - base.tv_sec) * 1000); } __private_extern__ void diff --git a/bsd/kern/netboot.c b/bsd/kern/netboot.c index dd238f066..22e6497d2 100644 --- a/bsd/kern/netboot.c +++ b/bsd/kern/netboot.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001-2010 Apple Inc. All rights reserved. + * Copyright (c) 2001-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -86,9 +86,6 @@ IOBSDRegistryEntryGetData(void * entry, const char * property_name, #define BSDP_RESPONSE "bsdp-response" #define DHCP_RESPONSE "dhcp-response" -/* forward declarations */ -int inet_aton(char * cp, struct in_addr * pin); - #define IP_FORMAT "%d.%d.%d.%d" #define IP_CH(ip) ((u_char *)ip) #define IP_LIST(ip) IP_CH(ip)[0],IP_CH(ip)[1],IP_CH(ip)[2],IP_CH(ip)[3] @@ -628,8 +625,7 @@ netboot_mountroot(void) error = ENXIO; goto failed; } - snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d", ifp->if_name, - ifp->if_unit); + snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", if_name(ifp)); printf("netboot: using network interface '%s'\n", ifr.ifr_name); /* bring it up */ diff --git a/bsd/kern/policy_check.c b/bsd/kern/policy_check.c index 270a1bbc2..5bf5c5cba 100644 --- a/bsd/kern/policy_check.c +++ b/bsd/kern/policy_check.c @@ -35,9 +35,7 @@ int get_thread_lock_count(thread_t th); /* forced forward */ static int policy_flags = 0; -#define CHECK_SET_INT_HOOK(x) .mpo_##x = (mpo_##x##_t *)common_int_hook, -#define CHECK_SET_VOID_HOOK(x) .mpo_##x = (mpo_##x##_t *)common_void_hook, - +#define CHECK_SET_HOOK(x) .mpo_##x = (mpo_##x##_t *)common_hook, /* * Init; currently, we only print our arrival notice. @@ -66,7 +64,7 @@ static int policy_check_next = CLASS_PERIOD_MULT; static int -common_int_hook(void) +common_hook(void) { int i; int rv = 0; @@ -120,360 +118,400 @@ common_int_hook(void) return rv; } -static void -common_void_hook(void) -{ - (void)common_int_hook(); - - return; -} - - +#if (MAC_POLICY_OPS_VERSION != 24) +# error "struct mac_policy_ops doesn't match definition in mac_policy.h" +#endif /* * Policy hooks; one per possible hook + * + * Please note that this struct initialization should be kept in sync with + * security/mac_policy.h (mac_policy_ops struct definition). */ static struct mac_policy_ops policy_ops = { - - /* separate init */ + CHECK_SET_HOOK(audit_check_postselect) + CHECK_SET_HOOK(audit_check_preselect) + + CHECK_SET_HOOK(bpfdesc_label_associate) + CHECK_SET_HOOK(bpfdesc_label_destroy) + CHECK_SET_HOOK(bpfdesc_label_init) + CHECK_SET_HOOK(bpfdesc_check_receive) + + CHECK_SET_HOOK(cred_check_label_update_execve) + CHECK_SET_HOOK(cred_check_label_update) + CHECK_SET_HOOK(cred_check_visible) + CHECK_SET_HOOK(cred_label_associate_fork) + CHECK_SET_HOOK(cred_label_associate_kernel) + CHECK_SET_HOOK(cred_label_associate) + CHECK_SET_HOOK(cred_label_associate_user) + CHECK_SET_HOOK(cred_label_destroy) + CHECK_SET_HOOK(cred_label_externalize_audit) + CHECK_SET_HOOK(cred_label_externalize) + CHECK_SET_HOOK(cred_label_init) + CHECK_SET_HOOK(cred_label_internalize) + CHECK_SET_HOOK(cred_label_update_execve) + CHECK_SET_HOOK(cred_label_update) + + CHECK_SET_HOOK(devfs_label_associate_device) + CHECK_SET_HOOK(devfs_label_associate_directory) + CHECK_SET_HOOK(devfs_label_copy) + CHECK_SET_HOOK(devfs_label_destroy) + CHECK_SET_HOOK(devfs_label_init) + CHECK_SET_HOOK(devfs_label_update) + + CHECK_SET_HOOK(file_check_change_offset) + CHECK_SET_HOOK(file_check_create) + CHECK_SET_HOOK(file_check_dup) + CHECK_SET_HOOK(file_check_fcntl) + CHECK_SET_HOOK(file_check_get_offset) + CHECK_SET_HOOK(file_check_get) + CHECK_SET_HOOK(file_check_inherit) + CHECK_SET_HOOK(file_check_ioctl) + CHECK_SET_HOOK(file_check_lock) + CHECK_SET_HOOK(file_check_mmap_downgrade) + CHECK_SET_HOOK(file_check_mmap) + CHECK_SET_HOOK(file_check_receive) + CHECK_SET_HOOK(file_check_set) + CHECK_SET_HOOK(file_label_init) + CHECK_SET_HOOK(file_label_destroy) + CHECK_SET_HOOK(file_label_associate) + + CHECK_SET_HOOK(ifnet_check_label_update) + CHECK_SET_HOOK(ifnet_check_transmit) + CHECK_SET_HOOK(ifnet_label_associate) + CHECK_SET_HOOK(ifnet_label_copy) + CHECK_SET_HOOK(ifnet_label_destroy) + CHECK_SET_HOOK(ifnet_label_externalize) + CHECK_SET_HOOK(ifnet_label_init) + CHECK_SET_HOOK(ifnet_label_internalize) + CHECK_SET_HOOK(ifnet_label_update) + CHECK_SET_HOOK(ifnet_label_recycle) + + CHECK_SET_HOOK(inpcb_check_deliver) + CHECK_SET_HOOK(inpcb_label_associate) + CHECK_SET_HOOK(inpcb_label_destroy) + CHECK_SET_HOOK(inpcb_label_init) + CHECK_SET_HOOK(inpcb_label_recycle) + CHECK_SET_HOOK(inpcb_label_update) + + CHECK_SET_HOOK(iokit_check_device) + + CHECK_SET_HOOK(ipq_label_associate) + CHECK_SET_HOOK(ipq_label_compare) + CHECK_SET_HOOK(ipq_label_destroy) + CHECK_SET_HOOK(ipq_label_init) + CHECK_SET_HOOK(ipq_label_update) + + CHECK_SET_HOOK(lctx_check_label_update) + CHECK_SET_HOOK(lctx_label_destroy) + CHECK_SET_HOOK(lctx_label_externalize) + CHECK_SET_HOOK(lctx_label_init) + CHECK_SET_HOOK(lctx_label_internalize) + CHECK_SET_HOOK(lctx_label_update) + CHECK_SET_HOOK(lctx_notify_create) + CHECK_SET_HOOK(lctx_notify_join) + CHECK_SET_HOOK(lctx_notify_leave) + + CHECK_SET_HOOK(mbuf_label_associate_bpfdesc) + CHECK_SET_HOOK(mbuf_label_associate_ifnet) + CHECK_SET_HOOK(mbuf_label_associate_inpcb) + CHECK_SET_HOOK(mbuf_label_associate_ipq) + CHECK_SET_HOOK(mbuf_label_associate_linklayer) + CHECK_SET_HOOK(mbuf_label_associate_multicast_encap) + CHECK_SET_HOOK(mbuf_label_associate_netlayer) + CHECK_SET_HOOK(mbuf_label_associate_socket) + CHECK_SET_HOOK(mbuf_label_copy) + CHECK_SET_HOOK(mbuf_label_destroy) + CHECK_SET_HOOK(mbuf_label_init) + + CHECK_SET_HOOK(mount_check_fsctl) + CHECK_SET_HOOK(mount_check_getattr) + CHECK_SET_HOOK(mount_check_label_update) + CHECK_SET_HOOK(mount_check_mount) + CHECK_SET_HOOK(mount_check_remount) + CHECK_SET_HOOK(mount_check_setattr) + CHECK_SET_HOOK(mount_check_stat) + CHECK_SET_HOOK(mount_check_umount) + CHECK_SET_HOOK(mount_label_associate) + CHECK_SET_HOOK(mount_label_destroy) + CHECK_SET_HOOK(mount_label_externalize) + CHECK_SET_HOOK(mount_label_init) + CHECK_SET_HOOK(mount_label_internalize) + + CHECK_SET_HOOK(netinet_fragment) + CHECK_SET_HOOK(netinet_icmp_reply) + CHECK_SET_HOOK(netinet_tcp_reply) + + CHECK_SET_HOOK(pipe_check_ioctl) + CHECK_SET_HOOK(pipe_check_kqfilter) + CHECK_SET_HOOK(pipe_check_label_update) + CHECK_SET_HOOK(pipe_check_read) + CHECK_SET_HOOK(pipe_check_select) + CHECK_SET_HOOK(pipe_check_stat) + CHECK_SET_HOOK(pipe_check_write) + CHECK_SET_HOOK(pipe_label_associate) + CHECK_SET_HOOK(pipe_label_copy) + CHECK_SET_HOOK(pipe_label_destroy) + CHECK_SET_HOOK(pipe_label_externalize) + CHECK_SET_HOOK(pipe_label_init) + CHECK_SET_HOOK(pipe_label_internalize) + CHECK_SET_HOOK(pipe_label_update) + + CHECK_SET_HOOK(policy_destroy) + /* special hooks for policy init's */ .mpo_policy_init = hook_policy_init, .mpo_policy_initbsd = hook_policy_initbsd, - - /* operations which return int */ - CHECK_SET_INT_HOOK(audit_check_postselect) - CHECK_SET_INT_HOOK(audit_check_preselect) - CHECK_SET_INT_HOOK(bpfdesc_check_receive) - CHECK_SET_INT_HOOK(cred_check_label_update_execve) - CHECK_SET_INT_HOOK(cred_check_label_update) - CHECK_SET_INT_HOOK(cred_check_visible) - CHECK_SET_INT_HOOK(cred_label_externalize_audit) - CHECK_SET_INT_HOOK(cred_label_externalize) - CHECK_SET_INT_HOOK(cred_label_internalize) - CHECK_SET_INT_HOOK(file_check_change_offset) - CHECK_SET_INT_HOOK(file_check_create) - CHECK_SET_INT_HOOK(file_check_dup) - CHECK_SET_INT_HOOK(file_check_fcntl) - CHECK_SET_INT_HOOK(file_check_get) - CHECK_SET_INT_HOOK(file_check_get_offset) - CHECK_SET_INT_HOOK(file_check_inherit) - CHECK_SET_INT_HOOK(file_check_ioctl) - CHECK_SET_INT_HOOK(file_check_lock) - CHECK_SET_INT_HOOK(file_check_mmap) - CHECK_SET_INT_HOOK(file_check_receive) - CHECK_SET_INT_HOOK(file_check_set) - CHECK_SET_INT_HOOK(ifnet_check_label_update) - CHECK_SET_INT_HOOK(ifnet_check_transmit) - CHECK_SET_INT_HOOK(ifnet_label_externalize) - CHECK_SET_INT_HOOK(ifnet_label_internalize) - CHECK_SET_INT_HOOK(inpcb_check_deliver) - CHECK_SET_INT_HOOK(inpcb_label_init) - CHECK_SET_INT_HOOK(iokit_check_device) - CHECK_SET_INT_HOOK(iokit_check_open) - CHECK_SET_INT_HOOK(iokit_check_set_properties) - CHECK_SET_INT_HOOK(iokit_check_hid_control) - CHECK_SET_INT_HOOK(ipq_label_compare) - CHECK_SET_INT_HOOK(ipq_label_init) - CHECK_SET_INT_HOOK(lctx_check_label_update) - CHECK_SET_INT_HOOK(lctx_label_externalize) - CHECK_SET_INT_HOOK(lctx_label_internalize) - CHECK_SET_INT_HOOK(mbuf_label_init) - CHECK_SET_INT_HOOK(mount_check_fsctl) - CHECK_SET_INT_HOOK(mount_check_getattr) - CHECK_SET_INT_HOOK(mount_check_label_update) - CHECK_SET_INT_HOOK(mount_check_mount) - CHECK_SET_INT_HOOK(mount_check_remount) - CHECK_SET_INT_HOOK(mount_check_setattr) - CHECK_SET_INT_HOOK(mount_check_stat) - CHECK_SET_INT_HOOK(mount_check_umount) - CHECK_SET_INT_HOOK(mount_label_externalize) - CHECK_SET_INT_HOOK(mount_label_internalize) - CHECK_SET_INT_HOOK(pipe_check_ioctl) - CHECK_SET_INT_HOOK(pipe_check_kqfilter) - CHECK_SET_INT_HOOK(pipe_check_label_update) - CHECK_SET_INT_HOOK(pipe_check_read) - CHECK_SET_INT_HOOK(pipe_check_select) - CHECK_SET_INT_HOOK(pipe_check_stat) - CHECK_SET_INT_HOOK(pipe_check_write) - CHECK_SET_INT_HOOK(pipe_label_externalize) - CHECK_SET_INT_HOOK(pipe_label_internalize) - CHECK_SET_INT_HOOK(policy_syscall) - CHECK_SET_INT_HOOK(port_check_copy_send) - CHECK_SET_INT_HOOK(port_check_hold_receive) - CHECK_SET_INT_HOOK(port_check_hold_send_once) - CHECK_SET_INT_HOOK(port_check_hold_send) - CHECK_SET_INT_HOOK(port_check_label_update) - CHECK_SET_INT_HOOK(port_check_make_send_once) - CHECK_SET_INT_HOOK(port_check_make_send) - CHECK_SET_INT_HOOK(port_check_method) - CHECK_SET_INT_HOOK(port_check_move_receive) - CHECK_SET_INT_HOOK(port_check_move_send_once) - CHECK_SET_INT_HOOK(port_check_move_send) - CHECK_SET_INT_HOOK(port_check_receive) - CHECK_SET_INT_HOOK(port_check_send) - CHECK_SET_INT_HOOK(port_check_service) - CHECK_SET_INT_HOOK(port_label_compute) - CHECK_SET_INT_HOOK(posixsem_check_create) - CHECK_SET_INT_HOOK(posixsem_check_open) - CHECK_SET_INT_HOOK(posixsem_check_post) - CHECK_SET_INT_HOOK(posixsem_check_unlink) - CHECK_SET_INT_HOOK(posixsem_check_wait) - CHECK_SET_INT_HOOK(posixshm_check_create) - CHECK_SET_INT_HOOK(posixshm_check_mmap) - CHECK_SET_INT_HOOK(posixshm_check_open) - CHECK_SET_INT_HOOK(posixshm_check_stat) - CHECK_SET_INT_HOOK(posixshm_check_truncate) - CHECK_SET_INT_HOOK(posixshm_check_unlink) - CHECK_SET_INT_HOOK(priv_check) - /* relative ordinal location of "priv_grant" */ - CHECK_SET_INT_HOOK(proc_check_debug) - CHECK_SET_INT_HOOK(proc_check_fork) - CHECK_SET_INT_HOOK(proc_check_getaudit) - CHECK_SET_INT_HOOK(proc_check_getauid) - CHECK_SET_INT_HOOK(proc_check_getlcid) - CHECK_SET_INT_HOOK(proc_check_ledger) - CHECK_SET_INT_HOOK(proc_check_map_anon) - CHECK_SET_INT_HOOK(proc_check_mprotect) - CHECK_SET_INT_HOOK(proc_check_sched) - CHECK_SET_INT_HOOK(proc_check_setaudit) - CHECK_SET_INT_HOOK(proc_check_setauid) - CHECK_SET_INT_HOOK(proc_check_setlcid) - CHECK_SET_INT_HOOK(proc_check_signal) - CHECK_SET_INT_HOOK(proc_check_suspend_resume) - CHECK_SET_INT_HOOK(proc_check_wait) - CHECK_SET_INT_HOOK(socket_check_accept) - CHECK_SET_INT_HOOK(socket_check_accepted) - CHECK_SET_INT_HOOK(socket_check_bind) - CHECK_SET_INT_HOOK(socket_check_connect) - CHECK_SET_INT_HOOK(socket_check_create) - CHECK_SET_INT_HOOK(socket_check_deliver) - CHECK_SET_INT_HOOK(socket_check_kqfilter) - CHECK_SET_INT_HOOK(socket_check_label_update) - CHECK_SET_INT_HOOK(socket_check_listen) - CHECK_SET_INT_HOOK(socket_check_receive) - CHECK_SET_INT_HOOK(socket_check_received) - CHECK_SET_INT_HOOK(socket_check_select) - CHECK_SET_INT_HOOK(socket_check_send) - CHECK_SET_INT_HOOK(socket_check_stat) - CHECK_SET_INT_HOOK(socket_check_setsockopt) - CHECK_SET_INT_HOOK(socket_check_getsockopt) - CHECK_SET_INT_HOOK(socket_label_externalize) - CHECK_SET_INT_HOOK(socket_label_init) - CHECK_SET_INT_HOOK(socket_label_internalize) - CHECK_SET_INT_HOOK(socketpeer_label_externalize) - CHECK_SET_INT_HOOK(socketpeer_label_init) - CHECK_SET_INT_HOOK(system_check_acct) - CHECK_SET_INT_HOOK(system_check_audit) - CHECK_SET_INT_HOOK(system_check_auditctl) - CHECK_SET_INT_HOOK(system_check_auditon) - CHECK_SET_INT_HOOK(system_check_chud) - CHECK_SET_INT_HOOK(system_check_host_priv) - CHECK_SET_INT_HOOK(system_check_nfsd) - CHECK_SET_INT_HOOK(system_check_reboot) - CHECK_SET_INT_HOOK(system_check_settime) - CHECK_SET_INT_HOOK(system_check_swapoff) - CHECK_SET_INT_HOOK(system_check_swapon) - CHECK_SET_INT_HOOK(system_check_sysctl) - CHECK_SET_INT_HOOK(system_check_kas_info) - CHECK_SET_INT_HOOK(sysvmsq_check_enqueue) - CHECK_SET_INT_HOOK(sysvmsq_check_msgrcv) - CHECK_SET_INT_HOOK(sysvmsq_check_msgrmid) - CHECK_SET_INT_HOOK(sysvmsq_check_msqctl) - CHECK_SET_INT_HOOK(sysvmsq_check_msqget) - CHECK_SET_INT_HOOK(sysvmsq_check_msqrcv) - CHECK_SET_INT_HOOK(sysvmsq_check_msqsnd) - CHECK_SET_INT_HOOK(sysvsem_check_semctl) - CHECK_SET_INT_HOOK(sysvsem_check_semget) - CHECK_SET_INT_HOOK(sysvsem_check_semop) - CHECK_SET_INT_HOOK(sysvshm_check_shmat) - CHECK_SET_INT_HOOK(sysvshm_check_shmctl) - CHECK_SET_INT_HOOK(sysvshm_check_shmdt) - CHECK_SET_INT_HOOK(sysvshm_check_shmget) - CHECK_SET_INT_HOOK(proc_check_get_task_name) - CHECK_SET_INT_HOOK(proc_check_get_task) - CHECK_SET_INT_HOOK(task_label_externalize) - CHECK_SET_INT_HOOK(task_label_internalize) - CHECK_SET_INT_HOOK(vnode_check_access) - CHECK_SET_INT_HOOK(vnode_check_chdir) - CHECK_SET_INT_HOOK(vnode_check_chroot) - CHECK_SET_INT_HOOK(vnode_check_create) - CHECK_SET_INT_HOOK(vnode_check_deleteextattr) - CHECK_SET_INT_HOOK(vnode_check_exchangedata) - CHECK_SET_INT_HOOK(vnode_check_exec) - CHECK_SET_INT_HOOK(vnode_check_fsgetpath) - CHECK_SET_INT_HOOK(vnode_check_signature) - CHECK_SET_INT_HOOK(vnode_check_getattrlist) - CHECK_SET_INT_HOOK(vnode_check_getextattr) - CHECK_SET_INT_HOOK(vnode_check_ioctl) - CHECK_SET_INT_HOOK(vnode_check_kqfilter) - CHECK_SET_INT_HOOK(vnode_check_label_update) - CHECK_SET_INT_HOOK(vnode_check_link) - CHECK_SET_INT_HOOK(vnode_check_listextattr) - CHECK_SET_INT_HOOK(vnode_check_lookup) - CHECK_SET_INT_HOOK(vnode_check_open) - CHECK_SET_INT_HOOK(vnode_check_read) - CHECK_SET_INT_HOOK(vnode_check_readdir) - CHECK_SET_INT_HOOK(vnode_check_readlink) - CHECK_SET_INT_HOOK(vnode_check_rename_from) - CHECK_SET_INT_HOOK(vnode_check_rename_to) - CHECK_SET_INT_HOOK(vnode_check_revoke) - CHECK_SET_INT_HOOK(vnode_check_searchfs) - CHECK_SET_INT_HOOK(vnode_check_select) - CHECK_SET_INT_HOOK(vnode_check_setattrlist) - CHECK_SET_INT_HOOK(vnode_check_setextattr) - CHECK_SET_INT_HOOK(vnode_check_setflags) - CHECK_SET_INT_HOOK(vnode_check_setmode) - CHECK_SET_INT_HOOK(vnode_check_setowner) - CHECK_SET_INT_HOOK(vnode_check_setutimes) - CHECK_SET_INT_HOOK(vnode_check_stat) - CHECK_SET_INT_HOOK(vnode_check_truncate) - CHECK_SET_INT_HOOK(vnode_check_uipc_bind) - CHECK_SET_INT_HOOK(vnode_check_uipc_connect) - CHECK_SET_INT_HOOK(vnode_check_unlink) - CHECK_SET_INT_HOOK(vnode_check_write) - CHECK_SET_INT_HOOK(vnode_label_associate_extattr) - CHECK_SET_INT_HOOK(vnode_label_externalize_audit) - CHECK_SET_INT_HOOK(vnode_label_externalize) - CHECK_SET_INT_HOOK(vnode_label_internalize) - CHECK_SET_INT_HOOK(vnode_label_store) - CHECK_SET_INT_HOOK(vnode_label_update_extattr) - CHECK_SET_INT_HOOK(vnode_notify_create) - - /* operations which return void */ - CHECK_SET_VOID_HOOK(bpfdesc_label_init) - CHECK_SET_VOID_HOOK(bpfdesc_label_destroy) - CHECK_SET_VOID_HOOK(bpfdesc_label_associate) - CHECK_SET_VOID_HOOK(cred_label_associate_fork) - CHECK_SET_VOID_HOOK(cred_label_associate_kernel) - CHECK_SET_VOID_HOOK(cred_label_associate) - CHECK_SET_VOID_HOOK(cred_label_associate_user) - CHECK_SET_VOID_HOOK(cred_label_destroy) - CHECK_SET_VOID_HOOK(cred_label_init) - CHECK_SET_VOID_HOOK(cred_label_update_execve) - CHECK_SET_VOID_HOOK(cred_label_update) - CHECK_SET_VOID_HOOK(devfs_label_associate_device) - CHECK_SET_VOID_HOOK(devfs_label_associate_directory) - CHECK_SET_VOID_HOOK(devfs_label_copy) - CHECK_SET_VOID_HOOK(devfs_label_destroy) - CHECK_SET_VOID_HOOK(devfs_label_init) - CHECK_SET_VOID_HOOK(devfs_label_update) - CHECK_SET_VOID_HOOK(file_check_mmap_downgrade) - CHECK_SET_VOID_HOOK(file_label_associate) - CHECK_SET_VOID_HOOK(file_label_destroy) - CHECK_SET_VOID_HOOK(file_label_init) - CHECK_SET_VOID_HOOK(ifnet_label_associate) - CHECK_SET_VOID_HOOK(ifnet_label_copy) - CHECK_SET_VOID_HOOK(ifnet_label_destroy) - CHECK_SET_VOID_HOOK(ifnet_label_init) - CHECK_SET_VOID_HOOK(ifnet_label_recycle) - CHECK_SET_VOID_HOOK(ifnet_label_update) - CHECK_SET_VOID_HOOK(inpcb_label_associate) - CHECK_SET_VOID_HOOK(inpcb_label_destroy) - CHECK_SET_VOID_HOOK(inpcb_label_recycle) - CHECK_SET_VOID_HOOK(inpcb_label_update) - CHECK_SET_VOID_HOOK(ipq_label_associate) - CHECK_SET_VOID_HOOK(ipq_label_destroy) - CHECK_SET_VOID_HOOK(ipq_label_update) - CHECK_SET_VOID_HOOK(lctx_label_destroy) - CHECK_SET_VOID_HOOK(lctx_label_init) - CHECK_SET_VOID_HOOK(lctx_label_update) - CHECK_SET_VOID_HOOK(lctx_notify_create) - CHECK_SET_VOID_HOOK(lctx_notify_join) - CHECK_SET_VOID_HOOK(lctx_notify_leave) - CHECK_SET_VOID_HOOK(mbuf_label_associate_bpfdesc) - CHECK_SET_VOID_HOOK(mbuf_label_associate_ifnet) - CHECK_SET_VOID_HOOK(mbuf_label_associate_inpcb) - CHECK_SET_VOID_HOOK(mbuf_label_associate_ipq) - CHECK_SET_VOID_HOOK(mbuf_label_associate_linklayer) - CHECK_SET_VOID_HOOK(mbuf_label_associate_multicast_encap) - CHECK_SET_VOID_HOOK(mbuf_label_associate_netlayer) - CHECK_SET_VOID_HOOK(mbuf_label_associate_socket) - CHECK_SET_VOID_HOOK(mbuf_label_copy) - CHECK_SET_VOID_HOOK(mbuf_label_destroy) - CHECK_SET_VOID_HOOK(mount_label_associate) - CHECK_SET_VOID_HOOK(mount_label_destroy) - CHECK_SET_VOID_HOOK(mount_label_init) - CHECK_SET_VOID_HOOK(netinet_fragment) - CHECK_SET_VOID_HOOK(netinet_icmp_reply) - CHECK_SET_VOID_HOOK(netinet_tcp_reply) - CHECK_SET_VOID_HOOK(pipe_label_associate) - CHECK_SET_VOID_HOOK(pipe_label_copy) - CHECK_SET_VOID_HOOK(pipe_label_destroy) - CHECK_SET_VOID_HOOK(pipe_label_init) - CHECK_SET_VOID_HOOK(pipe_label_update) - CHECK_SET_VOID_HOOK(policy_destroy) - /* relative ordinal location of "policy_init" */ - /* relative ordinal location of "policy_initbsd" */ - CHECK_SET_VOID_HOOK(port_label_associate_kernel) - CHECK_SET_VOID_HOOK(port_label_associate) - CHECK_SET_VOID_HOOK(port_label_copy) - CHECK_SET_VOID_HOOK(port_label_destroy) - CHECK_SET_VOID_HOOK(port_label_init) - CHECK_SET_VOID_HOOK(port_label_update_cred) - CHECK_SET_VOID_HOOK(port_label_update_kobject) - CHECK_SET_VOID_HOOK(posixsem_label_associate) - CHECK_SET_VOID_HOOK(posixsem_label_destroy) - CHECK_SET_VOID_HOOK(posixsem_label_init) - CHECK_SET_VOID_HOOK(posixshm_label_associate) - CHECK_SET_VOID_HOOK(posixshm_label_destroy) - CHECK_SET_VOID_HOOK(posixshm_label_init) - CHECK_SET_VOID_HOOK(proc_label_destroy) - CHECK_SET_VOID_HOOK(proc_label_init) - CHECK_SET_VOID_HOOK(socket_label_associate_accept) - CHECK_SET_VOID_HOOK(socket_label_associate) - CHECK_SET_VOID_HOOK(socket_label_copy) - CHECK_SET_VOID_HOOK(socket_label_destroy) - CHECK_SET_VOID_HOOK(socket_label_update) - CHECK_SET_VOID_HOOK(socketpeer_label_associate_mbuf) - CHECK_SET_VOID_HOOK(socketpeer_label_associate_socket) - CHECK_SET_VOID_HOOK(socketpeer_label_destroy) - CHECK_SET_VOID_HOOK(sysvmsg_label_associate) - CHECK_SET_VOID_HOOK(sysvmsg_label_destroy) - CHECK_SET_VOID_HOOK(sysvmsg_label_init) - CHECK_SET_VOID_HOOK(sysvmsg_label_recycle) - CHECK_SET_VOID_HOOK(sysvmsq_label_associate) - CHECK_SET_VOID_HOOK(sysvmsq_label_destroy) - CHECK_SET_VOID_HOOK(sysvmsq_label_init) - CHECK_SET_VOID_HOOK(sysvmsq_label_recycle) - CHECK_SET_VOID_HOOK(sysvsem_label_associate) - CHECK_SET_VOID_HOOK(sysvsem_label_destroy) - CHECK_SET_VOID_HOOK(sysvsem_label_init) - CHECK_SET_VOID_HOOK(sysvsem_label_recycle) - CHECK_SET_VOID_HOOK(sysvshm_label_associate) - CHECK_SET_VOID_HOOK(sysvshm_label_destroy) - CHECK_SET_VOID_HOOK(sysvshm_label_init) - CHECK_SET_VOID_HOOK(sysvshm_label_recycle) - CHECK_SET_VOID_HOOK(task_label_associate_kernel) - CHECK_SET_VOID_HOOK(task_label_associate) - CHECK_SET_VOID_HOOK(task_label_copy) - CHECK_SET_VOID_HOOK(task_label_destroy) - CHECK_SET_VOID_HOOK(task_label_init) - CHECK_SET_VOID_HOOK(task_label_update) - CHECK_SET_VOID_HOOK(vnode_label_associate_devfs) - CHECK_SET_VOID_HOOK(vnode_label_associate_file) - CHECK_SET_VOID_HOOK(thread_userret) - CHECK_SET_VOID_HOOK(vnode_label_associate_posixsem) - CHECK_SET_VOID_HOOK(vnode_label_associate_posixshm) - CHECK_SET_VOID_HOOK(vnode_label_associate_singlelabel) - CHECK_SET_VOID_HOOK(vnode_label_associate_socket) - CHECK_SET_VOID_HOOK(vnode_label_copy) - CHECK_SET_VOID_HOOK(vnode_label_destroy) - CHECK_SET_VOID_HOOK(vnode_label_init) - CHECK_SET_VOID_HOOK(vnode_label_recycle) - CHECK_SET_VOID_HOOK(vnode_label_update) - CHECK_SET_VOID_HOOK(vnode_notify_rename) - CHECK_SET_VOID_HOOK(thread_label_init) - CHECK_SET_VOID_HOOK(thread_label_destroy) - .mpo_reserved18 = common_void_hook, - CHECK_SET_VOID_HOOK(vnode_notify_open) - .mpo_reserved20 = common_void_hook, - .mpo_reserved21 = common_void_hook, - .mpo_reserved22 = common_void_hook, - .mpo_reserved23 = common_void_hook, - .mpo_reserved24 = common_void_hook, - .mpo_reserved25 = common_void_hook, - .mpo_reserved26 = common_void_hook, - .mpo_reserved27 = common_void_hook, - .mpo_reserved28 = common_void_hook, - .mpo_reserved29 = common_void_hook, + CHECK_SET_HOOK(policy_syscall) + + CHECK_SET_HOOK(port_check_copy_send) + CHECK_SET_HOOK(port_check_hold_receive) + CHECK_SET_HOOK(port_check_hold_send_once) + CHECK_SET_HOOK(port_check_hold_send) + CHECK_SET_HOOK(port_check_label_update) + CHECK_SET_HOOK(port_check_make_send_once) + CHECK_SET_HOOK(port_check_make_send) + CHECK_SET_HOOK(port_check_method) + CHECK_SET_HOOK(port_check_move_receive) + CHECK_SET_HOOK(port_check_move_send_once) + CHECK_SET_HOOK(port_check_move_send) + CHECK_SET_HOOK(port_check_receive) + CHECK_SET_HOOK(port_check_send) + CHECK_SET_HOOK(port_check_service) + CHECK_SET_HOOK(port_label_associate_kernel) + CHECK_SET_HOOK(port_label_associate) + CHECK_SET_HOOK(port_label_compute) + CHECK_SET_HOOK(port_label_copy) + CHECK_SET_HOOK(port_label_destroy) + CHECK_SET_HOOK(port_label_init) + CHECK_SET_HOOK(port_label_update_cred) + CHECK_SET_HOOK(port_label_update_kobject) + + CHECK_SET_HOOK(posixsem_check_create) + CHECK_SET_HOOK(posixsem_check_open) + CHECK_SET_HOOK(posixsem_check_post) + CHECK_SET_HOOK(posixsem_check_unlink) + CHECK_SET_HOOK(posixsem_check_wait) + CHECK_SET_HOOK(posixsem_label_associate) + CHECK_SET_HOOK(posixsem_label_destroy) + CHECK_SET_HOOK(posixsem_label_init) + CHECK_SET_HOOK(posixshm_check_create) + CHECK_SET_HOOK(posixshm_check_mmap) + CHECK_SET_HOOK(posixshm_check_open) + CHECK_SET_HOOK(posixshm_check_stat) + CHECK_SET_HOOK(posixshm_check_truncate) + CHECK_SET_HOOK(posixshm_check_unlink) + CHECK_SET_HOOK(posixshm_label_associate) + CHECK_SET_HOOK(posixshm_label_destroy) + CHECK_SET_HOOK(posixshm_label_init) + + CHECK_SET_HOOK(proc_check_debug) + CHECK_SET_HOOK(proc_check_fork) + CHECK_SET_HOOK(proc_check_get_task_name) + CHECK_SET_HOOK(proc_check_get_task) + CHECK_SET_HOOK(proc_check_getaudit) + CHECK_SET_HOOK(proc_check_getauid) + CHECK_SET_HOOK(proc_check_getlcid) + CHECK_SET_HOOK(proc_check_mprotect) + CHECK_SET_HOOK(proc_check_sched) + CHECK_SET_HOOK(proc_check_setaudit) + CHECK_SET_HOOK(proc_check_setauid) + CHECK_SET_HOOK(proc_check_setlcid) + CHECK_SET_HOOK(proc_check_signal) + CHECK_SET_HOOK(proc_check_wait) + CHECK_SET_HOOK(proc_label_destroy) + CHECK_SET_HOOK(proc_label_init) + + CHECK_SET_HOOK(socket_check_accept) + CHECK_SET_HOOK(socket_check_accepted) + CHECK_SET_HOOK(socket_check_bind) + CHECK_SET_HOOK(socket_check_connect) + CHECK_SET_HOOK(socket_check_create) + CHECK_SET_HOOK(socket_check_deliver) + CHECK_SET_HOOK(socket_check_kqfilter) + CHECK_SET_HOOK(socket_check_label_update) + CHECK_SET_HOOK(socket_check_listen) + CHECK_SET_HOOK(socket_check_receive) + CHECK_SET_HOOK(socket_check_received) + CHECK_SET_HOOK(socket_check_select) + CHECK_SET_HOOK(socket_check_send) + CHECK_SET_HOOK(socket_check_stat) + CHECK_SET_HOOK(socket_check_setsockopt) + CHECK_SET_HOOK(socket_check_getsockopt) + CHECK_SET_HOOK(socket_label_associate_accept) + CHECK_SET_HOOK(socket_label_associate) + CHECK_SET_HOOK(socket_label_copy) + CHECK_SET_HOOK(socket_label_destroy) + CHECK_SET_HOOK(socket_label_externalize) + CHECK_SET_HOOK(socket_label_init) + CHECK_SET_HOOK(socket_label_internalize) + CHECK_SET_HOOK(socket_label_update) + + CHECK_SET_HOOK(socketpeer_label_associate_mbuf) + CHECK_SET_HOOK(socketpeer_label_associate_socket) + CHECK_SET_HOOK(socketpeer_label_destroy) + CHECK_SET_HOOK(socketpeer_label_externalize) + CHECK_SET_HOOK(socketpeer_label_init) + + CHECK_SET_HOOK(system_check_acct) + CHECK_SET_HOOK(system_check_audit) + CHECK_SET_HOOK(system_check_auditctl) + CHECK_SET_HOOK(system_check_auditon) + CHECK_SET_HOOK(system_check_host_priv) + CHECK_SET_HOOK(system_check_nfsd) + CHECK_SET_HOOK(system_check_reboot) + CHECK_SET_HOOK(system_check_settime) + CHECK_SET_HOOK(system_check_swapoff) + CHECK_SET_HOOK(system_check_swapon) + CHECK_SET_HOOK(system_check_sysctl) + + CHECK_SET_HOOK(sysvmsg_label_associate) + CHECK_SET_HOOK(sysvmsg_label_destroy) + CHECK_SET_HOOK(sysvmsg_label_init) + CHECK_SET_HOOK(sysvmsg_label_recycle) + CHECK_SET_HOOK(sysvmsq_check_enqueue) + CHECK_SET_HOOK(sysvmsq_check_msgrcv) + CHECK_SET_HOOK(sysvmsq_check_msgrmid) + CHECK_SET_HOOK(sysvmsq_check_msqctl) + CHECK_SET_HOOK(sysvmsq_check_msqget) + CHECK_SET_HOOK(sysvmsq_check_msqrcv) + CHECK_SET_HOOK(sysvmsq_check_msqsnd) + CHECK_SET_HOOK(sysvmsq_label_associate) + CHECK_SET_HOOK(sysvmsq_label_destroy) + CHECK_SET_HOOK(sysvmsq_label_init) + CHECK_SET_HOOK(sysvmsq_label_recycle) + CHECK_SET_HOOK(sysvsem_check_semctl) + CHECK_SET_HOOK(sysvsem_check_semget) + CHECK_SET_HOOK(sysvsem_check_semop) + CHECK_SET_HOOK(sysvsem_label_associate) + CHECK_SET_HOOK(sysvsem_label_destroy) + CHECK_SET_HOOK(sysvsem_label_init) + CHECK_SET_HOOK(sysvsem_label_recycle) + CHECK_SET_HOOK(sysvshm_check_shmat) + CHECK_SET_HOOK(sysvshm_check_shmctl) + CHECK_SET_HOOK(sysvshm_check_shmdt) + CHECK_SET_HOOK(sysvshm_check_shmget) + CHECK_SET_HOOK(sysvshm_label_associate) + CHECK_SET_HOOK(sysvshm_label_destroy) + CHECK_SET_HOOK(sysvshm_label_init) + CHECK_SET_HOOK(sysvshm_label_recycle) + + CHECK_SET_HOOK(task_label_associate_kernel) + CHECK_SET_HOOK(task_label_associate) + CHECK_SET_HOOK(task_label_copy) + CHECK_SET_HOOK(task_label_destroy) + CHECK_SET_HOOK(task_label_externalize) + CHECK_SET_HOOK(task_label_init) + CHECK_SET_HOOK(task_label_internalize) + CHECK_SET_HOOK(task_label_update) + + CHECK_SET_HOOK(iokit_check_hid_control) + + CHECK_SET_HOOK(vnode_check_access) + CHECK_SET_HOOK(vnode_check_chdir) + CHECK_SET_HOOK(vnode_check_chroot) + CHECK_SET_HOOK(vnode_check_create) + CHECK_SET_HOOK(vnode_check_deleteextattr) + CHECK_SET_HOOK(vnode_check_exchangedata) + CHECK_SET_HOOK(vnode_check_exec) + CHECK_SET_HOOK(vnode_check_getattrlist) + CHECK_SET_HOOK(vnode_check_getextattr) + CHECK_SET_HOOK(vnode_check_ioctl) + CHECK_SET_HOOK(vnode_check_kqfilter) + CHECK_SET_HOOK(vnode_check_label_update) + CHECK_SET_HOOK(vnode_check_link) + CHECK_SET_HOOK(vnode_check_listextattr) + CHECK_SET_HOOK(vnode_check_lookup) + CHECK_SET_HOOK(vnode_check_open) + CHECK_SET_HOOK(vnode_check_read) + CHECK_SET_HOOK(vnode_check_readdir) + CHECK_SET_HOOK(vnode_check_readlink) + CHECK_SET_HOOK(vnode_check_rename_from) + CHECK_SET_HOOK(vnode_check_rename_to) + CHECK_SET_HOOK(vnode_check_revoke) + CHECK_SET_HOOK(vnode_check_select) + CHECK_SET_HOOK(vnode_check_setattrlist) + CHECK_SET_HOOK(vnode_check_setextattr) + CHECK_SET_HOOK(vnode_check_setflags) + CHECK_SET_HOOK(vnode_check_setmode) + CHECK_SET_HOOK(vnode_check_setowner) + CHECK_SET_HOOK(vnode_check_setutimes) + CHECK_SET_HOOK(vnode_check_stat) + CHECK_SET_HOOK(vnode_check_truncate) + CHECK_SET_HOOK(vnode_check_unlink) + CHECK_SET_HOOK(vnode_check_write) + CHECK_SET_HOOK(vnode_label_associate_devfs) + CHECK_SET_HOOK(vnode_label_associate_extattr) + CHECK_SET_HOOK(vnode_label_associate_file) + CHECK_SET_HOOK(vnode_label_associate_pipe) + CHECK_SET_HOOK(vnode_label_associate_posixsem) + CHECK_SET_HOOK(vnode_label_associate_posixshm) + CHECK_SET_HOOK(vnode_label_associate_singlelabel) + CHECK_SET_HOOK(vnode_label_associate_socket) + CHECK_SET_HOOK(vnode_label_copy) + CHECK_SET_HOOK(vnode_label_destroy) + CHECK_SET_HOOK(vnode_label_externalize_audit) + CHECK_SET_HOOK(vnode_label_externalize) + CHECK_SET_HOOK(vnode_label_init) + CHECK_SET_HOOK(vnode_label_internalize) + CHECK_SET_HOOK(vnode_label_recycle) + CHECK_SET_HOOK(vnode_label_store) + CHECK_SET_HOOK(vnode_label_update_extattr) + CHECK_SET_HOOK(vnode_label_update) + CHECK_SET_HOOK(vnode_notify_create) + CHECK_SET_HOOK(vnode_check_signature) + CHECK_SET_HOOK(vnode_check_uipc_bind) + CHECK_SET_HOOK(vnode_check_uipc_connect) + + /* CHECK_SET_HOOK(proc_check_run_cs_invalid) */ + .mpo_proc_check_run_cs_invalid = (mac_proc_check_run_cs_invalid_t *)common_hook, + CHECK_SET_HOOK(proc_check_suspend_resume) + + CHECK_SET_HOOK(thread_userret) + + CHECK_SET_HOOK(iokit_check_set_properties) + + CHECK_SET_HOOK(system_check_chud) + + CHECK_SET_HOOK(vnode_check_searchfs) + + CHECK_SET_HOOK(priv_check) + CHECK_SET_HOOK(priv_grant) + + CHECK_SET_HOOK(proc_check_map_anon) + + CHECK_SET_HOOK(vnode_check_fsgetpath) + + CHECK_SET_HOOK(iokit_check_open) + + CHECK_SET_HOOK(proc_check_ledger) + + CHECK_SET_HOOK(vnode_notify_rename) + + CHECK_SET_HOOK(thread_label_init) + CHECK_SET_HOOK(thread_label_destroy) + + CHECK_SET_HOOK(system_check_kas_info) + + CHECK_SET_HOOK(proc_check_cpumon) + + CHECK_SET_HOOK(vnode_notify_open) + + CHECK_SET_HOOK(system_check_info) + + CHECK_SET_HOOK(pty_notify_grant) + CHECK_SET_HOOK(pty_notify_close) + + CHECK_SET_HOOK(vnode_find_sigs) + + + CHECK_SET_HOOK(kext_check_load) + CHECK_SET_HOOK(kext_check_unload) + + CHECK_SET_HOOK(proc_check_proc_info) + + CHECK_SET_HOOK(vnode_notify_link) + + .mpo_reserved28 = (mpo_reserved_hook_t *)common_hook, + .mpo_reserved29 = (mpo_reserved_hook_t *)common_hook, }; /* diff --git a/bsd/kern/posix_sem.c b/bsd/kern/posix_sem.c index c312d1b84..18f5d4b73 100644 --- a/bsd/kern/posix_sem.c +++ b/bsd/kern/posix_sem.c @@ -85,7 +85,7 @@ #define f_flag f_fglob->fg_flag -#define f_type f_fglob->fg_type +#define f_type f_fglob->fg_ops->fo_type #define f_msgcount f_fglob->fg_msgcount #define f_cred f_fglob->fg_cred #define f_ops f_fglob->fg_ops @@ -101,8 +101,9 @@ struct pseminfo { gid_t psem_gid; char psem_name[PSEMNAMLEN + 1]; /* segment name */ semaphore_t psem_semobject; - proc_t sem_proc; struct label * psem_label; + pid_t psem_creator_pid; + uint64_t psem_creator_uniqueid; }; #define PSEMINFO_NULL (struct pseminfo *)0 @@ -177,9 +178,16 @@ static int psem_closefile (struct fileglob *fp, vfs_context_t ctx); static int psem_kqfilter (struct fileproc *fp, struct knote *kn, vfs_context_t ctx); -struct fileops psemops = - { psem_read, psem_write, psem_ioctl, psem_select, psem_closefile, psem_kqfilter, NULL }; - +static const struct fileops psemops = { + DTYPE_PSXSEM, + psem_read, + psem_write, + psem_ioctl, + psem_select, + psem_closefile, + psem_kqfilter, + NULL +}; static lck_grp_t *psx_sem_subsys_lck_grp; static lck_grp_attr_t *psx_sem_subsys_lck_grp_attr; @@ -530,7 +538,8 @@ sem_open(proc_t p, struct sem_open_args *uap, user_addr_t *retval) pinfo->psem_name[PSEMNAMLEN]= 0; pinfo->psem_flags &= ~PSEM_DEFINED; pinfo->psem_flags |= PSEM_ALLOCATED; - pinfo->sem_proc = p; + pinfo->psem_creator_pid = p->p_pid; + pinfo->psem_creator_uniqueid = p->p_uniqueid; #if CONFIG_MACF error = mac_posixsem_check_create(kauth_cred_get(), nameptr); @@ -593,7 +602,6 @@ sem_open(proc_t p, struct sem_open_args *uap, user_addr_t *retval) proc_fdlock(p); fp->f_flag = fmode & FMASK; - fp->f_type = DTYPE_PSXSEM; fp->f_ops = &psemops; fp->f_data = (caddr_t)new_pnode; procfdtbl_releasefd(p, indx, NULL); @@ -788,7 +796,7 @@ sem_close(proc_t p, struct sem_close_args *uap, __unused int32_t *retval) fileproc_drain(p, fp); fdrelse(p, fd); error = closef_locked(fp, fp->f_fglob, p); - FREE_ZONE(fp, sizeof *fp, M_FILEPROC); + fileproc_free(fp); proc_fdunlock(p); return(error); } diff --git a/bsd/kern/posix_shm.c b/bsd/kern/posix_shm.c index d46eb0b3c..aa57783cb 100644 --- a/bsd/kern/posix_shm.c +++ b/bsd/kern/posix_shm.c @@ -85,7 +85,7 @@ #include #define f_flag f_fglob->fg_flag -#define f_type f_fglob->fg_type +#define f_type f_fglob->fg_ops->fo_type #define f_msgcount f_fglob->fg_msgcount #define f_cred f_fglob->fg_cred #define f_ops f_fglob->fg_ops @@ -192,8 +192,16 @@ static void pshm_cache_purge(void); static int pshm_cache_search(struct pshminfo **pshmp, struct pshmname *pnp, struct pshmcache **pcache, int addref); -struct fileops pshmops = - { pshm_read, pshm_write, pshm_ioctl, pshm_select, pshm_closefile, pshm_kqfilter, 0 }; +static const struct fileops pshmops = { + DTYPE_PSXSHM, + pshm_read, + pshm_write, + pshm_ioctl, + pshm_select, + pshm_closefile, + pshm_kqfilter, + 0 +}; static lck_grp_t *psx_shm_subsys_lck_grp; static lck_grp_attr_t *psx_shm_subsys_lck_grp_attr; @@ -606,7 +614,6 @@ shm_open(proc_t p, struct shm_open_args *uap, int32_t *retval) proc_fdlock(p); fp->f_flag = fmode & FMASK; - fp->f_type = DTYPE_PSXSHM; fp->f_ops = &pshmops; fp->f_data = (caddr_t)new_pnode; *fdflags(p, indx) |= UF_EXCLOSE; @@ -664,10 +671,13 @@ pshm_truncate(__unused proc_t p, struct fileproc *fp, __unused int fd, mach_vm_size_t total_size, alloc_size; memory_object_size_t mosize; struct pshmobj *pshmobj, *pshmobj_next, **pshmobj_next_p; + vm_map_t user_map; #if CONFIG_MACF int error; #endif + user_map = current_map(); + if (fp->f_type != DTYPE_PSXSHM) { return(EINVAL); } @@ -695,7 +705,8 @@ pshm_truncate(__unused proc_t p, struct fileproc *fp, __unused int fd, #endif pinfo->pshm_flags |= PSHM_ALLOCATING; - total_size = round_page_64(length); + total_size = vm_map_round_page(length, + vm_map_page_mask(user_map)); pshmobj_next_p = &pinfo->pshm_memobjects; for (alloc_size = 0; @@ -898,9 +909,11 @@ pshm_mmap(__unused proc_t p, struct mmap_args *uap, user_addr_t *retval, struct if ((flags & MAP_FIXED) == 0) { alloc_flags = VM_FLAGS_ANYWHERE; - user_addr = vm_map_round_page(user_addr); + user_addr = vm_map_round_page(user_addr, + vm_map_page_mask(user_map)); } else { - if (user_addr != vm_map_round_page(user_addr)) + if (user_addr != vm_map_round_page(user_addr, + vm_map_page_mask(user_map))) return (EINVAL); /* * We do not get rid of the existing mappings here because @@ -1059,13 +1072,13 @@ shm_unlink(__unused proc_t p, struct shm_unlink_args *uap, if (error == ENOENT) { PSHM_SUBSYS_UNLOCK(); - error = EINVAL; goto bad; } + /* During unlink lookup failure also implies ENOENT */ if (!error) { PSHM_SUBSYS_UNLOCK(); - error = EINVAL; + error = ENOENT; goto bad; } else incache = 1; diff --git a/bsd/kern/proc_info.c b/bsd/kern/proc_info.c index 482f83e0e..7a06b0a97 100644 --- a/bsd/kern/proc_info.c +++ b/bsd/kern/proc_info.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2010 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2005-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -72,8 +72,9 @@ #include #include #include +#include -#include +#include #include @@ -88,6 +89,7 @@ struct kqueue; struct atalk; uint64_t get_dispatchqueue_offset_from_proc(void *); +uint64_t get_dispatchqueue_serialno_offset_from_proc(void *); int proc_info_internal(int callnum, int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t * retval); /* protos for proc_info calls */ @@ -99,6 +101,7 @@ int proc_setcontrol(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint3 int proc_pidfileportinfo(int pid, int flavor, mach_port_name_t name, user_addr_t buffer, uint32_t buffersize, int32_t *retval); int proc_dirtycontrol(int pid, int flavor, uint64_t arg, int32_t * retval); int proc_terminate(int pid, int32_t * retval); +int proc_pid_rusage(int pid, int flavor, user_addr_t buffer, int32_t * retval); /* protos for procpidinfo calls */ int proc_pidfdlist(proc_t p, user_addr_t buffer, uint32_t buffersize, int32_t *retval); @@ -115,6 +118,7 @@ int proc_pidvnodepathinfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t int proc_pidpathinfo(proc_t p, uint64_t arg, user_addr_t buffer, uint32_t buffersize, int32_t *retval); int proc_pidworkqueueinfo(proc_t p, struct proc_workqueueinfo *pwqinfo); int proc_pidfileportlist(proc_t p, user_addr_t buffer, uint32_t buffersize, int32_t *retval); +void proc_piduniqidentifierinfo(proc_t p, struct proc_uniqidentifierinfo *p_uniqidinfo); /* protos for proc_pidfdinfo calls */ @@ -130,15 +134,16 @@ int pid_atalkinfo(struct atalk * at, struct fileproc * fp, int closeonexec, us /* protos for misc */ -void proc_dirty_start(struct proc *p); -void proc_dirty_end(struct proc *p); - int fill_vnodeinfo(vnode_t vp, struct vnode_info *vinfo); void fill_fileinfo(struct fileproc * fp, int closeonexec, struct proc_fileinfo * finfo); -static int proc_security_policy(proc_t p); +int proc_security_policy(proc_t targetp, int callnum, int flavor, boolean_t check_same_user); static void munge_vinfo_stat(struct stat64 *sbp, struct vinfo_stat *vsbp); extern int cansignal(struct proc *, kauth_cred_t, struct proc *, int, int); +extern int proc_get_rusage(proc_t proc, int flavor, user_addr_t buffer, int is_zombie); + +#define CHECK_SAME_USER TRUE +#define NO_CHECK_SAME_USER FALSE uint64_t get_dispatchqueue_offset_from_proc(void *p) { @@ -150,6 +155,16 @@ uint64_t get_dispatchqueue_offset_from_proc(void *p) } } +uint64_t get_dispatchqueue_serialno_offset_from_proc(void *p) +{ + if(p != NULL) { + proc_t pself = (proc_t)p; + return (pself->p_dispatchqueue_serialno_offset); + } else { + return (uint64_t)0; + } +} + /***************************** proc_info ********************/ int @@ -164,23 +179,25 @@ proc_info_internal(int callnum, int pid, int flavor, uint64_t arg, user_addr_t b { switch(callnum) { - case 1: /* proc_listpids */ + case PROC_INFO_CALL_LISTPIDS: /* pid contains type and flavor contains typeinfo */ return(proc_listpids(pid, flavor, buffer, buffersize, retval)); - case 2: /* proc_pidinfo */ + case PROC_INFO_CALL_PIDINFO: return(proc_pidinfo(pid, flavor, arg, buffer, buffersize, retval)); - case 3: /* proc_pidfdinfo */ + case PROC_INFO_CALL_PIDFDINFO: return(proc_pidfdinfo(pid, flavor, (int)arg, buffer, buffersize, retval)); - case 4: /* proc_kernmsgbuf */ + case PROC_INFO_CALL_KERNMSGBUF: return(proc_kernmsgbuf(buffer, buffersize, retval)); - case 5: /* set on self properties proc_setcontrol */ + case PROC_INFO_CALL_SETCONTROL: return(proc_setcontrol(pid, flavor, arg, buffer, buffersize, retval)); - case 6: /* proc_pidfileportinfo */ + case PROC_INFO_CALL_PIDFILEPORTINFO: return(proc_pidfileportinfo(pid, flavor, (mach_port_name_t)arg, buffer, buffersize, retval)); - case 7: /* proc_terminate */ + case PROC_INFO_CALL_TERMINATE: return(proc_terminate(pid, retval)); - case 8: /* proc_dirtycontrol */ + case PROC_INFO_CALL_DIRTYCONTROL: return(proc_dirtycontrol(pid, flavor, arg, retval)); + case PROC_INFO_CALL_PIDRUSAGE: + return (proc_pid_rusage(pid, flavor, buffer, retval)); default: return(EINVAL); } @@ -201,6 +218,10 @@ proc_listpids(uint32_t type, uint32_t typeinfo, user_addr_t buffer, uint32_t bu int error = 0; struct proclist *current_list; + /* Do we have permission to look into this? */ + if ((error = proc_security_policy(PROC_NULL, PROC_INFO_CALL_LISTPIDS, type, NO_CHECK_SAME_USER))) + return (error); + /* if the buffer is null, return num of procs */ if (buffer == (user_addr_t)0) { *retval = ((nprocs+20) * sizeof(int)); @@ -347,8 +368,10 @@ proc_pidfdlist(proc_t p, user_addr_t buffer, uint32_t buffersize, int32_t *retv for (n = 0; ((n < numfds) && (n < p->p_fd->fd_nfiles)); n++) { if (((fp = p->p_fd->fd_ofiles[n]) != 0) && ((p->p_fd->fd_ofileflags[n] & UF_RESERVED) == 0)) { + file_type_t fdtype = FILEGLOB_DTYPE(fp->f_fglob); pfd->proc_fd = n; - pfd->proc_fdtype = fp->f_fglob->fg_type; + pfd->proc_fdtype = (fdtype != DTYPE_ATALK) ? + fdtype : PROX_FDTYPE_ATALK; count++; pfd++; } @@ -386,7 +409,10 @@ proc_fileport_fdtype(mach_port_name_t name, struct fileglob *fg, void *arg) struct fileport_fdtype_args *ffa = arg; if (ffa->ffa_pfi != ffa->ffa_pfi_end) { - ffa->ffa_pfi->proc_fdtype = fg->fg_type; + file_type_t fdtype = FILEGLOB_DTYPE(fg); + + ffa->ffa_pfi->proc_fdtype = (fdtype != DTYPE_ATALK) ? + fdtype : PROX_FDTYPE_ATALK; ffa->ffa_pfi->proc_fileport = name; ffa->ffa_pfi++; return (0); /* keep walking */ @@ -536,10 +562,8 @@ proc_pidbsdinfo(proc_t p, struct proc_bsdinfo * pbsd, int zombie) pbsd->pbi_flags |= PROC_FLAG_CTTY; } -#if !CONFIG_EMBEDDED if ((p->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP) pbsd->pbi_flags |= PROC_FLAG_DELAYIDLESLEEP; -#endif /* !CONFIG_EMBEDDED */ switch(PROC_CONTROL_STATE(p)) { case P_PCTHROTTLE: @@ -617,10 +641,8 @@ proc_pidshortbsdinfo(proc_t p, struct proc_bsdshortinfo * pbsd_shortp, int zombi pbsd_shortp->pbsi_flags |= PROC_FLAG_PSUGID; if ((p->p_flag & P_EXEC) == P_EXEC) pbsd_shortp->pbsi_flags |= PROC_FLAG_EXEC; -#if !CONFIG_EMBEDDED if ((p->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP) pbsd_shortp->pbsi_flags |= PROC_FLAG_DELAYIDLESLEEP; -#endif /* !CONFIG_EMBEDDED */ switch(PROC_CONTROL_STATE(p)) { case P_PCTHROTTLE: @@ -945,6 +967,19 @@ proc_pidworkqueueinfo(proc_t p, struct proc_workqueueinfo *pwqinfo) return(0); } + + +void +proc_piduniqidentifierinfo(proc_t p, struct proc_uniqidentifierinfo *p_uniqidinfo) +{ + p_uniqidinfo->p_uniqueid = proc_uniqueid(p); + proc_getexecutableuuid(p, (unsigned char *)&p_uniqidinfo->p_uuid, sizeof(p_uniqidinfo->p_uuid)); + p_uniqidinfo->p_puniqueid = proc_puniqueid(p); + p_uniqidinfo->p_reserve2 = 0; + p_uniqidinfo->p_reserve3 = 0; + p_uniqidinfo->p_reserve4 = 0; +} + /********************************** proc_pidinfo ********************************/ @@ -955,10 +990,12 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t bu int error = ENOTSUP; int gotref = 0; int findzomb = 0; - int refheld = 0, shortversion = 0; + int shortversion = 0; uint32_t size; int zombie = 0; int thuniqueid = 0; + int uniqidversion = 0; + boolean_t check_same_user; switch (flavor) { case PROC_PIDLISTFDS: @@ -1014,6 +1051,12 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t bu case PROC_PIDTHREADID64INFO: size = PROC_PIDTHREADID64INFO_SIZE; break; + case PROC_PIDUNIQIDENTIFIERINFO: + size = PROC_PIDUNIQIDENTIFIERINFO_SIZE; + break; + case PROC_PIDT_BSDINFOWITHUNIQID: + size = PROC_PIDT_BSDINFOWITHUNIQID_SIZE; + break; default: return(EINVAL); } @@ -1025,72 +1068,88 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t bu return(EOVERFLOW); } - if ((flavor != PROC_PIDTBSDINFO) && (flavor != PROC_PIDPATHINFO) && (flavor != PROC_PIDT_SHORTBSDINFO)) { - if ((p = proc_find(pid)) == PROC_NULL) { - error = ESRCH; - goto out; - } else { - gotref = 1; + /* Check if we need to look for zombies */ + if ((flavor == PROC_PIDTBSDINFO) || (flavor == PROC_PIDT_SHORTBSDINFO) || (flavor == PROC_PIDT_BSDINFOWITHUNIQID) + || (flavor == PROC_PIDUNIQIDENTIFIERINFO)) { + if (arg) + findzomb = 1; + } - /* Do we have permission to look into this ? */ - if ((error = proc_security_policy(p)) != 0) { - goto out; - } - } + if ((p = proc_find(pid)) == PROC_NULL) { + if (findzomb) + p = proc_find_zombref(pid); + if (p == PROC_NULL) { + error = ESRCH; + goto out; + } + zombie = 1; + } else { + gotref = 1; } + + /* Certain operations don't require privileges */ + switch (flavor) { + case PROC_PIDT_SHORTBSDINFO: + case PROC_PIDUNIQIDENTIFIERINFO: + case PROC_PIDPATHINFO: + check_same_user = NO_CHECK_SAME_USER; + break; + default: + check_same_user = CHECK_SAME_USER; + break; + } + + /* Do we have permission to look into this? */ + if ((error = proc_security_policy(p, PROC_INFO_CALL_PIDINFO, flavor, check_same_user))) + goto out; + switch (flavor) { case PROC_PIDLISTFDS: { error = proc_pidfdlist(p, buffer, buffersize, retval); } break; + case PROC_PIDUNIQIDENTIFIERINFO: { + struct proc_uniqidentifierinfo p_uniqidinfo; + + proc_piduniqidentifierinfo(p, &p_uniqidinfo); + error = copyout(&p_uniqidinfo, buffer, sizeof(struct proc_uniqidentifierinfo)); + if (error == 0) + *retval = sizeof(struct proc_uniqidentifierinfo); + } + break; + case PROC_PIDT_SHORTBSDINFO: shortversion = 1; + case PROC_PIDT_BSDINFOWITHUNIQID: case PROC_PIDTBSDINFO: { struct proc_bsdinfo pbsd; struct proc_bsdshortinfo pbsd_short; - - zombie = 0; - if (arg) - findzomb = 1; - p = proc_find(pid); - if (p == PROC_NULL) { - if (findzomb) - p = proc_find_zombref(pid); - if (p == NULL) { - error = ESRCH; - goto out; - } - zombie = 1; - } - refheld = 1; - /* Do we have permission to look into this ? */ - if ((flavor != PROC_PIDT_SHORTBSDINFO) && ((error = proc_security_policy(p)) != 0)) { - if (refheld != 0) { - if (zombie != 0) - proc_drop_zombref(p); - else - proc_rele(p); - } - goto out; - } + struct proc_bsdinfowithuniqid pbsd_uniqid; + + if (flavor == PROC_PIDT_BSDINFOWITHUNIQID) + uniqidversion = 1; + if (shortversion != 0) { error = proc_pidshortbsdinfo(p, &pbsd_short, zombie); } else { error = proc_pidbsdinfo(p, &pbsd, zombie); + if (uniqidversion != 0) { + proc_piduniqidentifierinfo(p, &pbsd_uniqid.p_uniqidentifier); + pbsd_uniqid.pbsd = pbsd; + } } - if (refheld != 0) { - if (zombie != 0) - proc_drop_zombref(p); - else - proc_rele(p); - } + if (error == 0) { if (shortversion != 0) { error = copyout(&pbsd_short, buffer, sizeof(struct proc_bsdshortinfo)); if (error == 0) *retval = sizeof(struct proc_bsdshortinfo); - } else { + } else if (uniqidversion != 0) { + error = copyout(&pbsd_uniqid, buffer, sizeof(struct proc_bsdinfowithuniqid)); + if (error == 0) + *retval = sizeof(struct proc_bsdinfowithuniqid); + } else { error = copyout(&pbsd, buffer, sizeof(struct proc_bsdinfo)); if (error == 0) *retval = sizeof(struct proc_bsdinfo); @@ -1173,12 +1232,6 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t bu break; case PROC_PIDPATHINFO: { - p = proc_find(pid); - if (p == PROC_NULL) { - error = ESRCH; - goto out; - } - gotref = 1; error = proc_pidpathinfo(p, arg, buffer, buffersize, retval); } break; @@ -1209,6 +1262,8 @@ proc_pidinfo(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t bu out: if (gotref) proc_rele(p); + else if (zombie) + proc_drop_zombref(p); return(error); } @@ -1262,14 +1317,27 @@ pid_vnodeinfopath(vnode_t vp, uint32_t vid, struct fileproc * fp, int closeonexe void fill_fileinfo(struct fileproc * fp, int closeonexec, struct proc_fileinfo * fproc) { - fproc->fi_openflags = fp->f_fglob->fg_flag; - fproc->fi_status = 0; - fproc->fi_offset = fp->f_fglob->fg_offset; - fproc->fi_type = fp->f_fglob->fg_type; - if (fp->f_fglob->fg_count) - fproc->fi_status |= PROC_FP_SHARED; - if (closeonexec != 0) - fproc->fi_status |= PROC_FP_CLEXEC; + fproc->fi_openflags = fp->f_fglob->fg_flag; + fproc->fi_status = 0; + fproc->fi_offset = fp->f_fglob->fg_offset; + fproc->fi_type = FILEGLOB_DTYPE(fp->f_fglob); + if (fp->f_fglob->fg_count > 1) + fproc->fi_status |= PROC_FP_SHARED; + if (closeonexec != 0) + fproc->fi_status |= PROC_FP_CLEXEC; + + if (FILEPROC_TYPE(fp) == FTYPE_GUARDED) { + fproc->fi_status |= PROC_FP_GUARDED; + fproc->fi_guardflags = 0; + if (fp_isguarded(fp, GUARD_CLOSE)) + fproc->fi_guardflags |= PROC_FI_GUARD_CLOSE; + if (fp_isguarded(fp, GUARD_DUP)) + fproc->fi_guardflags |= PROC_FI_GUARD_DUP; + if (fp_isguarded(fp, GUARD_SOCKET_IPC)) + fproc->fi_guardflags |= PROC_FI_GUARD_SOCKET_IPC; + if (fp_isguarded(fp, GUARD_FILEPORT)) + fproc->fi_guardflags |= PROC_FI_GUARD_FILEPORT; + } } @@ -1316,6 +1384,7 @@ pid_socketinfo(socket_t so, struct fileproc *fp, int closeonexec, user_addr_t b } return (error); #else +#pragma unused(so, fp, closeonexec, buffer) *retval = 0; return (ENOTSUP); #endif @@ -1445,10 +1514,10 @@ proc_pidfdinfo(int pid, int flavor, int fd, user_addr_t buffer, uint32_t buffer error = ESRCH; goto out; } - /* Do we have permission to look into this ? */ - if ((error = proc_security_policy(p)) != 0) { + + /* Do we have permission to look into this? */ + if ((error = proc_security_policy(p, PROC_INFO_CALL_PIDFDINFO, flavor, CHECK_SAME_USER))) goto out1; - } switch (flavor) { case PROC_PIDFDVNODEINFO: { @@ -1538,20 +1607,6 @@ proc_pidfdinfo(int pid, int flavor, int fd, user_addr_t buffer, uint32_t buffer } break; -#if NETAT - case PROC_PIDFDATALKINFO: { - struct atalk * at; - - if ((error = fp_getfatalk(p, fd, &fp, &at)) !=0) { - goto out1; - } - - /* no need to be under the fdlock */ - closeonexec = p->p_fd->fd_ofileflags[fd] & UF_EXCLOSE; - error = pid_atalkinfo(at, fp, closeonexec, buffer, buffersize, retval); - } - break; -#endif /* NETAT */ default: { error = EINVAL; goto out1; @@ -1591,7 +1646,7 @@ proc_fileport_info(__unused mach_port_name_t name, case PROC_PIDFILEPORTVNODEPATHINFO: { vnode_t vp; - if (fg->fg_type != DTYPE_VNODE) { + if (FILEGLOB_DTYPE(fg) != DTYPE_VNODE) { error = ENOTSUP; break; } @@ -1603,7 +1658,7 @@ proc_fileport_info(__unused mach_port_name_t name, case PROC_PIDFILEPORTSOCKETINFO: { socket_t so; - if (fg->fg_type != DTYPE_SOCKET) { + if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET) { error = EOPNOTSUPP; break; } @@ -1615,7 +1670,7 @@ proc_fileport_info(__unused mach_port_name_t name, case PROC_PIDFILEPORTPSHMINFO: { struct pshmnode *pshm; - if (fg->fg_type != DTYPE_PSXSHM) { + if (FILEGLOB_DTYPE(fg) != DTYPE_PSXSHM) { error = EBADF; /* ick - mirror fp_getfpshm */ break; } @@ -1627,7 +1682,7 @@ proc_fileport_info(__unused mach_port_name_t name, case PROC_PIDFILEPORTPIPEINFO: { struct pipe *cpipe; - if (fg->fg_type != DTYPE_PIPE) { + if (FILEGLOB_DTYPE(fg) != DTYPE_PIPE) { error = EBADF; /* ick - mirror fp_getfpipe */ break; } @@ -1679,9 +1734,10 @@ proc_pidfileportinfo(int pid, int flavor, mach_port_name_t name, error = ESRCH; goto out; } - if ((error = proc_security_policy(p)) != 0) { + + /* Do we have permission to look into this? */ + if ((error = proc_security_policy(p, PROC_INFO_CALL_PIDFILEPORTINFO, flavor, CHECK_SAME_USER))) goto out1; - } fia.fia_flavor = flavor; fia.fia_buffer = buffer; @@ -1697,20 +1753,40 @@ out: return (error); } -static int -proc_security_policy(proc_t p) +int +proc_security_policy(proc_t targetp, __unused int callnum, __unused int flavor, boolean_t check_same_user) { - kauth_cred_t my_cred; - uid_t uid; +#if CONFIG_MACF + int error = 0; - my_cred = kauth_cred_proc_ref(p); - uid = kauth_cred_getuid(my_cred) ; - kauth_cred_unref(&my_cred); - - if ((uid != kauth_cred_getuid(kauth_cred_get())) - && suser(kauth_cred_get(), (u_short *)0)) { + if ((error = mac_proc_check_proc_info(current_proc(), targetp, callnum, flavor))) + return (error); +#endif + + /* The 'listpids' call doesn't have a target proc */ + if (targetp == PROC_NULL) { + assert(callnum == PROC_INFO_CALL_LISTPIDS && check_same_user == NO_CHECK_SAME_USER); + return (0); + } + + /* + * Check for 'get information for processes owned by other users' privilege + * root has this privilege by default + */ + if (priv_check_cred(kauth_cred_get(), PRIV_GLOBAL_PROC_INFO, 0) == 0) + check_same_user = FALSE; + + if (check_same_user) { + kauth_cred_t target_cred; + uid_t target_uid; + + target_cred = kauth_cred_proc_ref(targetp); + target_uid = kauth_cred_getuid(target_cred); + kauth_cred_unref(&target_cred); + + if (kauth_getuid() != target_uid) return(EPERM); - } + } return(0); } @@ -1738,6 +1814,9 @@ proc_setcontrol(int pid, int flavor, uint64_t arg, user_addr_t buffer, uint32_t if (pid != pself->p_pid) return(EINVAL); + /* Do we have permission to look into this? */ + if ((error = proc_security_policy(pself, PROC_INFO_CALL_SETCONTROL, flavor, NO_CHECK_SAME_USER))) + goto out; switch (flavor) { case PROC_SELFSET_PCONTROL: { @@ -1800,42 +1879,7 @@ out: return(error); } -void -proc_dirty_start(struct proc *p) -{ - proc_lock(p); - while (p->p_dirty & P_DIRTY_BUSY) { - msleep(&p->p_dirty, &p->p_mlock, 0, "proc_dirty_start", NULL); - } - p->p_dirty |= P_DIRTY_BUSY; - proc_unlock(p); -} - -void -proc_dirty_end(struct proc *p) -{ - proc_lock(p); - if (p->p_dirty & P_DIRTY_BUSY) { - p->p_dirty &= ~P_DIRTY_BUSY; - wakeup(&p->p_dirty); - } - proc_unlock(p); -} - -static boolean_t -proc_validate_track_flags(uint32_t pcontrol, struct proc *target_p) { - /* Check idle exit isn't specified independently */ - if ((pcontrol & PROC_DIRTY_TRACK_MASK) == PROC_DIRTY_ALLOW_IDLE_EXIT) { - return false; - } - - /* See that the process isn't marked for termination */ - if (target_p->p_dirty & P_DIRTY_TERMINATED) { - return false; - } - - return true; -} +#if CONFIG_MEMORYSTATUS int proc_dirtycontrol(int pid, int flavor, uint64_t arg, int32_t *retval) { @@ -1845,16 +1889,29 @@ proc_dirtycontrol(int pid, int flavor, uint64_t arg, int32_t *retval) { kauth_cred_t my_cred, target_cred; boolean_t self = FALSE; boolean_t child = FALSE; + boolean_t zombref = FALSE; pid_t selfpid; target_p = proc_find(pid); + if (target_p == PROC_NULL) { - return(ESRCH); + if (flavor == PROC_DIRTYCONTROL_GET) { + target_p = proc_find_zombref(pid); + zombref = 1; + } + + if (target_p == PROC_NULL) + return(ESRCH); + } - + my_cred = kauth_cred_get(); target_cred = kauth_cred_proc_ref(target_p); - + + /* Do we have permission to look into this? */ + if ((error = proc_security_policy(target_p, PROC_INFO_CALL_DIRTYCONTROL, flavor, NO_CHECK_SAME_USER))) + goto out; + selfpid = proc_selfpid(); if (pid == selfpid) { self = TRUE; @@ -1869,96 +1926,26 @@ proc_dirtycontrol(int pid, int flavor, uint64_t arg, int32_t *retval) { error = EPERM; goto out; } - - proc_dirty_start(target_p); - if (proc_validate_track_flags(pcontrol, target_p)) { - /* Cumulative, as per */ - target_p->p_dirty |= - ((pcontrol & PROC_DIRTY_TRACK) ? P_DIRTY_TRACK : 0) | - ((pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) ? P_DIRTY_ALLOW_IDLE_EXIT : 0); -#if CONFIG_MEMORYSTATUS - if ((target_p->p_dirty & P_DIRTY_CAN_IDLE_EXIT) == P_DIRTY_CAN_IDLE_EXIT) { - memorystatus_on_track_dirty(pid, TRUE); - } -#endif - } else { - error = EINVAL; - } - - proc_dirty_end(target_p); + error = memorystatus_dirty_track(target_p, pcontrol); } break; - case PROC_DIRTYCONTROL_SET: { - boolean_t kill = false; - + case PROC_DIRTYCONTROL_SET: { /* Check privileges; use cansignal() here since the process could be terminated */ if (!cansignal(current_proc(), my_cred, target_p, SIGKILL, 0)) { error = EPERM; goto out; } - proc_dirty_start(target_p); - - if (!(target_p->p_dirty & P_DIRTY_TRACK)) { - /* Dirty tracking not enabled */ - error = EINVAL; - } else if (pcontrol && (target_p->p_dirty & P_DIRTY_TERMINATED)) { - /* - * Process is set to be terminated and we're attempting to mark it dirty. - * Set for termination and marking as clean is OK - see . - */ - error = EBUSY; - } else { - int flag = (self == TRUE) ? P_DIRTY : P_DIRTY_SHUTDOWN; - if (pcontrol && !(target_p->p_dirty & flag)) { - target_p->p_dirty |= flag; - } else if ((pcontrol == 0) && (target_p->p_dirty & flag)) { - if ((flag == P_DIRTY_SHUTDOWN) && (!target_p->p_dirty & P_DIRTY)) { - /* Clearing the dirty shutdown flag, and the process is otherwise clean - kill */ - target_p->p_dirty |= P_DIRTY_TERMINATED; - kill = true; - } else if ((flag == P_DIRTY) && (target_p->p_dirty & P_DIRTY_TERMINATED)) { - /* Kill previously terminated processes if set clean */ - kill = true; - } - target_p->p_dirty &= ~flag; - } else { - /* Already set */ - error = EALREADY; - } - } -#if CONFIG_MEMORYSTATUS - if ((error == 0) && ((target_p->p_dirty & P_DIRTY_CAN_IDLE_EXIT) == P_DIRTY_CAN_IDLE_EXIT)) { - memorystatus_on_dirty(pid, pcontrol ? TRUE : FALSE); - } -#endif - proc_dirty_end(target_p); - - if ((error == 0) && (kill == true)) { - psignal(target_p, SIGKILL); - } + error = memorystatus_dirty_set(target_p, self, pcontrol); } break; case PROC_DIRTYCONTROL_GET: { /* No permissions check - dirty state is freely available */ if (retval) { - proc_dirty_start(target_p); - - *retval = 0; - if (target_p->p_dirty & P_DIRTY_TRACK) { - *retval |= PROC_DIRTY_TRACKED; - if (target_p->p_dirty & P_DIRTY_ALLOW_IDLE_EXIT) { - *retval |= PROC_DIRTY_ALLOWS_IDLE_EXIT; - } - if (target_p->p_dirty & P_DIRTY) { - *retval |= PROC_DIRTY_IS_DIRTY; - } - } - - proc_dirty_end(target_p); + *retval = memorystatus_dirty_get(target_p); } else { error = EINVAL; } @@ -1967,11 +1954,23 @@ proc_dirtycontrol(int pid, int flavor, uint64_t arg, int32_t *retval) { } out: - proc_rele(target_p); - kauth_cred_unref(&target_cred); + if (zombref) + proc_drop_zombref(target_p); + else + proc_rele(target_p); + + kauth_cred_unref(&target_cred); return(error); } +#else + +int +proc_dirtycontrol(__unused int pid, __unused int flavor, __unused uint64_t arg, __unused int32_t *retval) { + return ENOTSUP; +} + +#endif /* CONFIG_MEMORYSTATUS */ /* * proc_terminate() provides support for sudden termination. @@ -2011,22 +2010,22 @@ proc_terminate(int pid, int32_t *retval) error = EPERM; goto out; } - - proc_dirty_start(p); - - p->p_dirty |= P_DIRTY_TERMINATED; - - if ((p->p_dirty & (P_DIRTY_TRACK|P_DIRTY_IS_DIRTY)) == P_DIRTY_TRACK) { - /* Clean; mark as terminated and issue SIGKILL */ - sig = SIGKILL; - } else { - /* Dirty, terminated, or state tracking is unsupported; issue SIGTERM to allow cleanup */ - sig = SIGTERM; + + /* Not allowed to sudden terminate yourself */ + if (p == current_proc()) { + error = EPERM; + goto out; } - proc_dirty_end(p); +#if CONFIG_MEMORYSTATUS + /* Determine requisite signal to issue */ + sig = memorystatus_on_terminate(p); +#else + sig = SIGTERM; +#endif - proc_removethrottle(p); + proc_set_task_policy(p->task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, + TASK_POLICY_TERMINATED, TASK_POLICY_ENABLE); psignal(p, sig); *retval = sig; @@ -2037,26 +2036,6 @@ out: return error; } -void -proc_removethrottle(proc_t p) - -{ - /* remove throttled states in all threads; process is going to terminate soon */ - proc_lock(p); - - /* if already marked marked for proc_termiantion.. */ - if ((p->p_lflag & P_LPTERMINATE) != 0) { - proc_unlock(p); - return; - } - p->p_lflag |= P_LPTERMINATE; - proc_unlock(p); - - (void)proc_task_remove_throttle(p->task); - -} - - /* * copy stat64 structure into vinfo_stat structure. */ @@ -2088,3 +2067,33 @@ munge_vinfo_stat(struct stat64 *sbp, struct vinfo_stat *vsbp) vsbp->vst_qspare[0] = sbp->st_qspare[0]; vsbp->vst_qspare[1] = sbp->st_qspare[1]; } + +int +proc_pid_rusage(int pid, int flavor, user_addr_t buffer, __unused int32_t *retval) +{ + proc_t p; + int error; + int zombie = 0; + + if ((p = proc_find(pid)) == PROC_NULL) { + if ((p = proc_find_zombref(pid)) == PROC_NULL) { + return (ESRCH); + } + zombie = 1; + } + + /* Do we have permission to look into this? */ + if ((error = proc_security_policy(p, PROC_INFO_CALL_PIDRUSAGE, flavor, CHECK_SAME_USER))) + goto out; + + error = proc_get_rusage(p, flavor, buffer, zombie); + +out: + if (zombie) + proc_drop_zombref(p); + else + proc_rele(p); + + return (error); +} + diff --git a/bsd/kern/proc_uuid_policy.c b/bsd/kern/proc_uuid_policy.c new file mode 100644 index 000000000..5c69d488a --- /dev/null +++ b/bsd/kern/proc_uuid_policy.c @@ -0,0 +1,361 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include + +#define PROC_UUID_POLICY_DEBUG 0 + +#if PROC_UUID_POLICY_DEBUG +#define dprintf(...) printf(__VA_ARGS__) +#else +#define dprintf(...) do { } while(0) +#endif + +static lck_grp_attr_t *proc_uuid_policy_subsys_lck_grp_attr; +static lck_grp_t *proc_uuid_policy_subsys_lck_grp; +static lck_attr_t *proc_uuid_policy_subsys_lck_attr; +static lck_mtx_t proc_uuid_policy_subsys_mutex; + +#define PROC_UUID_POLICY_SUBSYS_LOCK() lck_mtx_lock(&proc_uuid_policy_subsys_mutex) +#define PROC_UUID_POLICY_SUBSYS_UNLOCK() lck_mtx_unlock(&proc_uuid_policy_subsys_mutex) + +#define PROC_UUID_POLICY_HASH_SIZE 64 +u_long proc_uuid_policy_hash_mask; + +/* Assume first byte of UUIDs are evenly distributed */ +#define UUIDHASH(uuid) (&proc_uuid_policy_hashtbl[uuid[0] & proc_uuid_policy_hash_mask]) +static LIST_HEAD(proc_uuid_policy_hashhead, proc_uuid_policy_entry) *proc_uuid_policy_hashtbl; + +/* + * On modification, invalidate cached lookups by bumping the generation count. + * Other calls will need to take the slowpath of taking + * the subsystem lock. + */ +static volatile int32_t proc_uuid_policy_table_gencount; +#define BUMP_PROC_UUID_POLICY_GENERATION_COUNT() do { \ + if (OSIncrementAtomic(&proc_uuid_policy_table_gencount) == (INT32_MAX - 1)) { \ + proc_uuid_policy_table_gencount = 1; \ + } \ + } while (0) + +#define MAX_PROC_UUID_POLICY_COUNT 10240 +static volatile int32_t proc_uuid_policy_count; + +struct proc_uuid_policy_entry { + LIST_ENTRY(proc_uuid_policy_entry) entries; + uuid_t uuid; /* Mach-O executable UUID */ + uint32_t flags; /* policy flag for that UUID */ +}; + +static int +proc_uuid_policy_insert(uuid_t uuid, uint32_t flags); + +static struct proc_uuid_policy_entry * +proc_uuid_policy_remove_locked(uuid_t uuid); + +static int +proc_uuid_policy_remove(uuid_t uuid); + +static int +proc_uuid_policy_clear(void); + +void +proc_uuid_policy_init(void) +{ + proc_uuid_policy_subsys_lck_grp_attr = lck_grp_attr_alloc_init(); + proc_uuid_policy_subsys_lck_grp = lck_grp_alloc_init("proc_uuid_policy_subsys_lock", proc_uuid_policy_subsys_lck_grp_attr); + proc_uuid_policy_subsys_lck_attr = lck_attr_alloc_init(); + lck_mtx_init(&proc_uuid_policy_subsys_mutex, proc_uuid_policy_subsys_lck_grp, proc_uuid_policy_subsys_lck_attr); + + proc_uuid_policy_hashtbl = hashinit(PROC_UUID_POLICY_HASH_SIZE, M_PROC_UUID_POLICY, &proc_uuid_policy_hash_mask); + proc_uuid_policy_table_gencount = 1; + proc_uuid_policy_count = 0; +} + +static int +proc_uuid_policy_insert(uuid_t uuid, uint32_t flags) +{ + struct proc_uuid_policy_entry *entry, *delentry = NULL; + int error; + +#if PROC_UUID_POLICY_DEBUG + uuid_string_t uuidstr; + uuid_unparse(uuid, uuidstr); +#endif + + if (uuid_is_null(uuid)) + return EINVAL; + + MALLOC(entry, struct proc_uuid_policy_entry *, sizeof(*entry), M_PROC_UUID_POLICY, M_WAITOK|M_ZERO); + + memcpy(entry->uuid, uuid, sizeof(uuid_t)); + entry->flags = flags; + + PROC_UUID_POLICY_SUBSYS_LOCK(); + + delentry = proc_uuid_policy_remove_locked(uuid); + + /* Our target UUID is not in the list, insert it now */ + if (proc_uuid_policy_count < MAX_PROC_UUID_POLICY_COUNT) { + LIST_INSERT_HEAD(UUIDHASH(uuid), entry, entries); + proc_uuid_policy_count++; + error = 0; + BUMP_PROC_UUID_POLICY_GENERATION_COUNT(); + } else { + error = ENOMEM; + } + + PROC_UUID_POLICY_SUBSYS_UNLOCK(); + + /* If we had found a pre-existing entry, deallocate its memory now */ + if (delentry) { + FREE(delentry, M_PROC_UUID_POLICY); + } + + if (error) { + FREE(entry, M_PROC_UUID_POLICY); + dprintf("Failed to insert proc uuid policy (%s,0x%08x), table full\n", uuidstr, flags); + } else { + dprintf("Inserted proc uuid policy (%s,0x%08x)\n", uuidstr, flags); + } + + return error; +} + +static struct proc_uuid_policy_entry * +proc_uuid_policy_remove_locked(uuid_t uuid) +{ + struct proc_uuid_policy_entry *tmpentry, *searchentry, *delentry = NULL; + + LIST_FOREACH_SAFE(searchentry, UUIDHASH(uuid), entries, tmpentry) { + if (0 == memcmp(searchentry->uuid, uuid, sizeof(uuid_t))) { + /* Existing entry under same UUID. Remove it and save for de-allocation */ + delentry = searchentry; + LIST_REMOVE(searchentry, entries); + proc_uuid_policy_count--; + break; + } + } + + return delentry; +} + +static int +proc_uuid_policy_remove(uuid_t uuid) +{ + struct proc_uuid_policy_entry *delentry = NULL; + int error; + +#if PROC_UUID_POLICY_DEBUG + uuid_string_t uuidstr; + uuid_unparse(uuid, uuidstr); +#endif + + if (uuid_is_null(uuid)) + return EINVAL; + + PROC_UUID_POLICY_SUBSYS_LOCK(); + + delentry = proc_uuid_policy_remove_locked(uuid); + + if (delentry) { + error = 0; + BUMP_PROC_UUID_POLICY_GENERATION_COUNT(); + } else { + error = ENOENT; + } + + PROC_UUID_POLICY_SUBSYS_UNLOCK(); + + /* If we had found a pre-existing entry, deallocate its memory now */ + if (delentry) { + FREE(delentry, M_PROC_UUID_POLICY); + } + + if (error) { + dprintf("Failed to remove proc uuid policy (%s), entry not present\n", uuidstr); + } else { + dprintf("Removed proc uuid policy (%s)\n", uuidstr); + } + + return error; +} + +int +proc_uuid_policy_lookup(uuid_t uuid, uint32_t *flags, int32_t *gencount) +{ + struct proc_uuid_policy_entry *tmpentry, *searchentry, *foundentry = NULL; + int error; + +#if PROC_UUID_POLICY_DEBUG + uuid_string_t uuidstr; + uuid_unparse(uuid, uuidstr); +#endif + + if (uuid_is_null(uuid) || !flags || !gencount) + return EINVAL; + + if (*gencount == proc_uuid_policy_table_gencount) { + /* + * Generation count hasn't changed, so old flags should be valid. + * We avoid taking the lock here by assuming any concurrent modifications + * to the table will invalidate the generation count. + */ + return 0; + } + + PROC_UUID_POLICY_SUBSYS_LOCK(); + + LIST_FOREACH_SAFE(searchentry, UUIDHASH(uuid), entries, tmpentry) { + if (0 == memcmp(searchentry->uuid, uuid, sizeof(uuid_t))) { + /* Found existing entry */ + foundentry = searchentry; + break; + } + } + + if (foundentry) { + *flags = foundentry->flags; + *gencount = proc_uuid_policy_table_gencount; + error = 0; + } else { + error = ENOENT; + } + + PROC_UUID_POLICY_SUBSYS_UNLOCK(); + + if (error == 0) { + dprintf("Looked up proc uuid policy (%s,0x%08x)\n", uuidstr, *flags); + } + + return error; +} + +static int +proc_uuid_policy_clear(void) +{ + struct proc_uuid_policy_entry *tmpentry, *searchentry; + struct proc_uuid_policy_hashhead deletehead = LIST_HEAD_INITIALIZER(deletehead); + unsigned long hashslot; + + PROC_UUID_POLICY_SUBSYS_LOCK(); + + if (proc_uuid_policy_count > 0) { + + for (hashslot=0; hashslot <= proc_uuid_policy_hash_mask; hashslot++) { + struct proc_uuid_policy_hashhead *headp = &proc_uuid_policy_hashtbl[hashslot]; + + LIST_FOREACH_SAFE(searchentry, headp, entries, tmpentry) { + /* Move each entry to our delete list */ + LIST_REMOVE(searchentry, entries); + proc_uuid_policy_count--; + LIST_INSERT_HEAD(&deletehead, searchentry, entries); + } + } + + BUMP_PROC_UUID_POLICY_GENERATION_COUNT(); + } + + PROC_UUID_POLICY_SUBSYS_UNLOCK(); + + /* Memory deallocation happens after the hash lock is dropped */ + LIST_FOREACH_SAFE(searchentry, &deletehead, entries, tmpentry) { + LIST_REMOVE(searchentry, entries); + FREE(searchentry, M_PROC_UUID_POLICY); + } + + dprintf("Clearing proc uuid policy table\n"); + + return 0; +} + +int proc_uuid_policy(struct proc *p __unused, struct proc_uuid_policy_args *uap, int32_t *retval __unused) +{ + int error = 0; + uuid_t uuid; + + /* Need privilege for policy changes */ + error = priv_check_cred(kauth_cred_get(), PRIV_PROC_UUID_POLICY, 0); + if (error) { + dprintf("%s failed privilege check for proc_uuid_policy: %d\n", p->p_comm, error); + return (error); + } else { + dprintf("%s succeeded privilege check for proc_uuid_policy\n", p->p_comm); + } + + switch (uap->operation) { + case PROC_UUID_POLICY_OPERATION_CLEAR: + error = proc_uuid_policy_clear(); + break; + + case PROC_UUID_POLICY_OPERATION_ADD: + if (uap->uuidlen != sizeof(uuid_t)) { + error = ERANGE; + break; + } + + error = copyin(uap->uuid, uuid, sizeof(uuid_t)); + if (error) + break; + + error = proc_uuid_policy_insert(uuid, uap->flags); + break; + + case PROC_UUID_POLICY_OPERATION_REMOVE: + if (uap->uuidlen != sizeof(uuid_t)) { + error = ERANGE; + break; + } + + error = copyin(uap->uuid, uuid, sizeof(uuid_t)); + if (error) + break; + + error = proc_uuid_policy_remove(uuid); + break; + + default: + error = EINVAL; + break; + } + + return error; +} diff --git a/bsd/kern/process_policy.c b/bsd/kern/process_policy.c index ff919538f..9ae4b32bc 100644 --- a/bsd/kern/process_policy.c +++ b/bsd/kern/process_policy.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -69,24 +70,15 @@ #include #include -#if CONFIG_EMBEDDED -#include -#include -#endif /* CONFIG_EMBEDDED */ - -static int handle_background(int scope, int action, int policy, int policy_subtype, user_addr_t attrp, proc_t proc, uint64_t target_threadid); -static int handle_hwaccess(int scope, int action, int policy, int policy_subtype, user_addr_t attrp, proc_t proc, uint64_t target_threadid); -static int handle_lowresrouce(int scope, int action, int policy, int policy_subtype, user_addr_t attrp, proc_t proc, uint64_t target_threadid); + +static int handle_lowresource(int scope, int action, int policy, int policy_subtype, user_addr_t attrp, proc_t proc, uint64_t target_threadid); static int handle_resourceuse(int scope, int action, int policy, int policy_subtype, user_addr_t attrp, proc_t proc, uint64_t target_threadid); static int handle_apptype(int scope, int action, int policy, int policy_subtype, user_addr_t attrp, proc_t proc, uint64_t target_threadid); +static int handle_boost(int scope, int action, int policy, int policy_subtype, user_addr_t attrp, proc_t proc, uint64_t target_threadid); extern kern_return_t task_suspend(task_t); extern kern_return_t task_resume(task_t); -#if CONFIG_EMBEDDED -static int handle_applifecycle(int scope, int action, int policy, int policy_subtype, user_addr_t attrp, proc_t proc, uint64_t target_threadid); -#endif /* CONFIG_EMBEDDED */ - /***************************** process_policy ********************/ @@ -98,7 +90,7 @@ static int handle_applifecycle(int scope, int action, int policy, int policy_sub * user_addr_t attrp, pid_t target_pid, uint64_t target_threadid); } */ -/* system call implementaion */ +/* system call implementation */ int process_policy(__unused struct proc *p, struct process_policy_args * uap, __unused int32_t *retval) { @@ -113,27 +105,21 @@ process_policy(__unused struct proc *p, struct process_policy_args * uap, __unus proc_t target_proc = PROC_NULL; proc_t curp = current_proc(); kauth_cred_t my_cred; -#if CONFIG_EMBEDDED - kauth_cred_t target_cred; -#endif if ((scope != PROC_POLICY_SCOPE_PROCESS) && (scope != PROC_POLICY_SCOPE_THREAD)) { return(EINVAL); } - target_proc = proc_find(target_pid); - if (target_proc == PROC_NULL) { + + if (target_pid == 0 || target_pid == proc_selfpid()) + target_proc = proc_self(); + else + target_proc = proc_find(target_pid); + + if (target_proc == PROC_NULL) return(ESRCH); - } my_cred = kauth_cred_get(); -#if CONFIG_EMBEDDED - target_cred = kauth_cred_proc_ref(target_proc); - - if (!kauth_cred_issuser(my_cred) && kauth_cred_getruid(my_cred) && - kauth_cred_getuid(my_cred) != kauth_cred_getuid(target_cred) && - kauth_cred_getruid(my_cred) != kauth_cred_getuid(target_cred)) -#else /* * Resoure starvation control can be used by unpriv resource owner but priv at the time of ownership claim. This is * checked in low resource handle routine. So bypass the checks here. @@ -141,47 +127,43 @@ process_policy(__unused struct proc *p, struct process_policy_args * uap, __unus if ((policy != PROC_POLICY_RESOURCE_STARVATION) && (policy != PROC_POLICY_APPTYPE) && (!kauth_cred_issuser(my_cred) && curp != p)) -#endif { error = EPERM; goto out; } #if CONFIG_MACF -#if CONFIG_EMBEDDED - /* Lifecycle management will invoke approp macf checks */ - if (policy != PROC_POLICY_APP_LIFECYCLE) { -#endif /* CONFIG_EMBEDDED */ - error = mac_proc_check_sched(curp, target_proc); - if (error) - goto out; -#if CONFIG_EMBEDDED + switch (policy) { + case PROC_POLICY_BOOST: + case PROC_POLICY_RESOURCE_USAGE: + /* These policies do their own appropriate mac checks */ + break; + default: + error = mac_proc_check_sched(curp, target_proc); + if (error) goto out; + break; } -#endif /* CONFIG_EMBEDDED */ #endif /* CONFIG_MACF */ - switch(policy) { case PROC_POLICY_BACKGROUND: - error = handle_background(scope, action, policy, policy_subtype, attrp, target_proc, target_threadid); + error = ENOTSUP; break; case PROC_POLICY_HARDWARE_ACCESS: - error = handle_hwaccess(scope, action, policy, policy_subtype, attrp, target_proc, target_threadid); + error = ENOTSUP; break; case PROC_POLICY_RESOURCE_STARVATION: - error = handle_lowresrouce(scope, action, policy, policy_subtype, attrp, target_proc, target_threadid); + error = handle_lowresource(scope, action, policy, policy_subtype, attrp, target_proc, target_threadid); break; case PROC_POLICY_RESOURCE_USAGE: error = handle_resourceuse(scope, action, policy, policy_subtype, attrp, target_proc, target_threadid); break; -#if CONFIG_EMBEDDED - case PROC_POLICY_APP_LIFECYCLE: - error = handle_applifecycle(scope, action, policy, policy_subtype, attrp, target_proc, target_threadid); - break; -#endif /* CONFIG_EMBEDDED */ case PROC_POLICY_APPTYPE: error = handle_apptype(scope, action, policy, policy_subtype, attrp, target_proc, target_threadid); break; + case PROC_POLICY_BOOST: + error = handle_boost(scope, action, policy, policy_subtype, attrp, target_proc, target_threadid); + break; default: error = EINVAL; break; @@ -189,131 +171,11 @@ process_policy(__unused struct proc *p, struct process_policy_args * uap, __unus out: proc_rele(target_proc); -#if CONFIG_EMBEDDED - kauth_cred_unref(&target_cred); -#endif return(error); } - -/* darwin background handling code */ -static int -handle_background(int scope, int action, __unused int policy, __unused int policy_subtype, user_addr_t attrp, proc_t proc, uint64_t target_threadid) -{ - int intval, error = 0; - - - switch (action) { - case PROC_POLICY_ACTION_GET: - if (scope == PROC_POLICY_SCOPE_PROCESS) { - intval = proc_get_task_bg_policy(proc->task); - } else { - /* thread scope */ - intval = proc_get_thread_bg_policy(proc->task, target_threadid); - } - error = copyout((int *)&intval, (user_addr_t)attrp, sizeof(int)); - break; - - case PROC_POLICY_ACTION_SET: - error = copyin((user_addr_t)attrp, (int *)&intval, sizeof(int)); - if (error != 0) - goto out; - if (intval > PROC_POLICY_BG_ALL) { - error = EINVAL; - goto out; - } - if (scope == PROC_POLICY_SCOPE_PROCESS) { - error = proc_set_bgtaskpolicy(proc->task, intval); - } else { - /* thread scope */ - error = proc_set_bgthreadpolicy(proc->task, target_threadid, intval); - } - break; - - case PROC_POLICY_ACTION_ADD: - error = copyin((user_addr_t)attrp, (int *)&intval, sizeof(int)); - if (error != 0) - goto out; - if (intval > PROC_POLICY_BG_ALL) { - error = EINVAL; - goto out; - } - if (scope == PROC_POLICY_SCOPE_PROCESS) { - error = proc_add_bgtaskpolicy(proc->task, intval); - } else { - /* thread scope */ - error = proc_add_bgthreadpolicy(proc->task, target_threadid, intval); - } - break; - - case PROC_POLICY_ACTION_REMOVE: - error = copyin((user_addr_t)attrp, (int *)&intval, sizeof(int)); - if (error != 0) - goto out; - if (intval > PROC_POLICY_BG_ALL) { - error = EINVAL; - goto out; - } - if (scope == PROC_POLICY_SCOPE_PROCESS) { - error = proc_remove_bgtaskpolicy(proc->task, intval); - } else { - /* thread scope */ - error = proc_remove_bgthreadpolicy(proc->task, target_threadid, intval); - } - break; - - case PROC_POLICY_ACTION_APPLY: - if (scope == PROC_POLICY_SCOPE_PROCESS) { - error = proc_apply_bgtaskpolicy(proc->task); - } else { - /* thread scope */ - error = proc_apply_bgthreadpolicy(proc->task, target_threadid); - } - break; - - case PROC_POLICY_ACTION_RESTORE: - if (scope == PROC_POLICY_SCOPE_PROCESS) { - error = proc_restore_bgtaskpolicy(proc->task); - } else { - /* thread scope */ - error = proc_restore_bgthreadpolicy(proc->task, target_threadid); - } - break; - - case PROC_POLICY_ACTION_DENYINHERIT: - error = proc_denyinherit_policy(proc->task); - break; - - case PROC_POLICY_ACTION_DENYSELFSET: - error = proc_denyselfset_policy(proc->task); - break; - - default: - return(EINVAL); - } - -out: - return(error); -} - -static int -handle_hwaccess(__unused int scope, __unused int action, __unused int policy, int policy_subtype, __unused user_addr_t attrp, __unused proc_t proc, __unused uint64_t target_threadid) -{ - switch(policy_subtype) { - case PROC_POLICY_HWACCESS_NONE: - case PROC_POLICY_HWACCESS_DISK: - case PROC_POLICY_HWACCESS_GPU: - case PROC_POLICY_HWACCESS_NETWORK: - case PROC_POLICY_HWACCESS_CPU: - break; - default: - return(EINVAL); - } - return(0); -} - -static int -handle_lowresrouce(__unused int scope, int action, __unused int policy, int policy_subtype, __unused user_addr_t attrp, proc_t proc, __unused uint64_t target_threadid) +static int +handle_lowresource(__unused int scope, int action, __unused int policy, int policy_subtype, __unused user_addr_t attrp, proc_t proc, __unused uint64_t target_threadid) { int error = 0; @@ -337,8 +199,13 @@ handle_lowresrouce(__unused int scope, int action, __unused int policy, int poli static int handle_resourceuse(__unused int scope, __unused int action, __unused int policy, int policy_subtype, user_addr_t attrp, proc_t proc, __unused uint64_t target_threadid) { - proc_policy_cpuusage_attr_t cpuattr; - int error = 0; + proc_policy_cpuusage_attr_t cpuattr; +#if CONFIG_MACF + proc_t curp = current_proc(); +#endif + int entitled = TRUE; + uint64_t interval = -1ULL; + int error = 0; switch(policy_subtype) { case PROC_POLICY_RUSAGE_NONE: @@ -355,30 +222,63 @@ handle_resourceuse(__unused int scope, __unused int action, __unused int policy, break; } +#if CONFIG_MACF + if (curp != proc) { + /* the cpumon entitlement manages messing with CPU limits on self */ + error = mac_proc_check_sched(curp, proc); + if (error) + return error; + } + + /* + * Allow a process to change CPU usage monitor parameters, unless a MAC policy + * overrides it with an entitlement check. + */ + entitled = (mac_proc_check_cpumon(curp) == 0) ? TRUE : FALSE; +#endif + switch (action) { + uint8_t percentage; + case PROC_POLICY_ACTION_GET: error = proc_get_task_ruse_cpu(proc->task, &cpuattr.ppattr_cpu_attr, - &cpuattr.ppattr_cpu_percentage, + &percentage, &cpuattr.ppattr_cpu_attr_interval, &cpuattr.ppattr_cpu_attr_deadline); - if (error == 0) + if (error == 0) { + cpuattr.ppattr_cpu_percentage = percentage; + cpuattr.ppattr_cpu_attr_interval /= NSEC_PER_SEC; error = copyout((proc_policy_cpuusage_attr_t *)&cpuattr, (user_addr_t)attrp, sizeof(proc_policy_cpuusage_attr_t)); + } break; case PROC_POLICY_ACTION_APPLY: case PROC_POLICY_ACTION_SET: error = copyin((user_addr_t)attrp, (proc_policy_cpuusage_attr_t *)&cpuattr, sizeof(proc_policy_cpuusage_attr_t)); + if (error != 0) { + return (error); + } + + /* + * The process_policy API uses seconds as the units for the interval, + * but the mach task policy SPI uses nanoseconds. Do the conversion, + * but preserve -1 as it has special meaning. + */ + if (cpuattr.ppattr_cpu_attr_interval != -1ULL) { + interval = cpuattr.ppattr_cpu_attr_interval * NSEC_PER_SEC; + } else { + interval = -1ULL; + } - if (error == 0) { error = proc_set_task_ruse_cpu(proc->task, cpuattr.ppattr_cpu_attr, cpuattr.ppattr_cpu_percentage, - cpuattr.ppattr_cpu_attr_interval, - cpuattr.ppattr_cpu_attr_deadline); - } + interval, + cpuattr.ppattr_cpu_attr_deadline, + entitled); break; case PROC_POLICY_ACTION_RESTORE: - error = proc_clear_task_ruse_cpu(proc->task); + error = proc_clear_task_ruse_cpu(proc->task, entitled); break; default: @@ -390,170 +290,187 @@ handle_resourceuse(__unused int scope, __unused int action, __unused int policy, return(error); } -#if CONFIG_EMBEDDED -static int -handle_applifecycle(__unused int scope, int action, __unused int policy, int policy_subtype, user_addr_t attrp, proc_t proc, uint64_t target_threadid) + +static int +handle_apptype( int scope, + int action, + __unused int policy, + int policy_subtype, + __unused user_addr_t attrp, + proc_t target_proc, + __unused uint64_t target_threadid) { + int error = 0; + + if (scope != PROC_POLICY_SCOPE_PROCESS) + return (EINVAL); + + /* Temporary compatibility with old importance donation interface until libproc is moved to new boost calls */ + switch (policy_subtype) { + case PROC_POLICY_IOS_DONATEIMP: + if (action != PROC_POLICY_ACTION_ENABLE) + return (EINVAL); + if (target_proc != current_proc()) + return (EINVAL); + + /* PROCESS ENABLE APPTYPE DONATEIMP */ + task_importance_mark_donor(target_proc->task, TRUE); + + return(0); + + case PROC_POLICY_IOS_HOLDIMP: + if (action != PROC_POLICY_ACTION_ENABLE) + return (EINVAL); + if (target_proc != current_proc()) + return (EINVAL); + + /* PROCESS ENABLE APPTYPE HOLDIMP */ + error = task_importance_hold_external_assertion(current_task(), 1); + + return(error); + + case PROC_POLICY_IOS_DROPIMP: + if (action != PROC_POLICY_ACTION_ENABLE) + return (EINVAL); + if (target_proc != current_proc()) + return (EINVAL); + + /* PROCESS ENABLE APPTYPE DROPIMP */ + error = task_importance_drop_external_assertion(current_task(), 1); + + return(error); + + default: + /* continue to TAL handling */ + break; + } + + if (policy_subtype != PROC_POLICY_OSX_APPTYPE_TAL) + return (EINVAL); + /* need to be super user to do this */ + if (kauth_cred_issuser(kauth_cred_get()) == 0) + return (EPERM); + + if (proc_task_is_tal(target_proc->task) == FALSE) + return (EINVAL); + + switch (action) { + case PROC_POLICY_ACTION_ENABLE: + /* PROCESS ENABLE APPTYPE TAL */ + proc_set_task_policy(target_proc->task, THREAD_NULL, + TASK_POLICY_ATTRIBUTE, TASK_POLICY_TAL, + TASK_POLICY_ENABLE); + break; + case PROC_POLICY_ACTION_DISABLE: + /* PROCESS DISABLE APPTYPE TAL */ + proc_set_task_policy(target_proc->task, THREAD_NULL, + TASK_POLICY_ATTRIBUTE, TASK_POLICY_TAL, + TASK_POLICY_DISABLE); + break; + default: + return (EINVAL); + break; + } + + return(0); +} + +static int +handle_boost(int scope, + int action, + __unused int policy, + int policy_subtype, + __unused user_addr_t attrp, + proc_t target_proc, + __unused uint64_t target_threadid) +{ int error = 0; - int state = 0, oldstate = 0; - int noteval = 0; - + assert(policy == PROC_POLICY_BOOST); + + if (scope != PROC_POLICY_SCOPE_PROCESS) + return (EINVAL); + + if (target_proc != current_proc()) + return (EINVAL); switch(policy_subtype) { - case PROC_POLICY_APPLIFE_NONE: - error = 0; - break; + case PROC_POLICY_IMP_IMPORTANT: + if (task_is_importance_receiver(target_proc->task) == FALSE) + return (EINVAL); - case PROC_POLICY_APPLIFE_STATE: -#if CONFIG_MACF - error = mac_proc_check_sched(current_proc(), proc); - if (error) - goto out; -#endif switch (action) { - case PROC_POLICY_ACTION_GET : - state = proc_lf_getappstate(proc->task); - error = copyout((int *)&state, (user_addr_t)attrp, sizeof(int)); + case PROC_POLICY_ACTION_HOLD: + /* PROCESS HOLD BOOST IMPORTANT */ + error = task_importance_hold_external_assertion(current_task(), 1); break; - case PROC_POLICY_ACTION_APPLY : - case PROC_POLICY_ACTION_SET : - error = copyin((user_addr_t)attrp, (int *)&state, sizeof(int)); - if ((error == 0) && (state != TASK_APPSTATE_NONE)) { - oldstate = proc_lf_getappstate(proc->task); - error = proc_lf_setappstate(proc->task, state); - if (error == 0) { - switch (state) { - case TASK_APPSTATE_ACTIVE: - noteval = NOTE_APPACTIVE; - break; - case TASK_APPSTATE_BACKGROUND: - noteval = NOTE_APPBACKGROUND; - break; - case TASK_APPSTATE_NONUI: - noteval = NOTE_APPNONUI; - break; - case TASK_APPSTATE_INACTIVE: - noteval = NOTE_APPINACTIVE; - break; - } - - proc_lock(proc); - proc_knote(proc, noteval); - proc_unlock(proc); - } - } + case PROC_POLICY_ACTION_DROP: + /* PROCESS DROP BOOST IMPORTANT */ + error = task_importance_drop_external_assertion(current_task(), 1); break; - default: - error = EINVAL; + error = (EINVAL); break; } break; - case PROC_POLICY_APPLIFE_DEVSTATUS: + case PROC_POLICY_IMP_DONATION: #if CONFIG_MACF - /* ToDo - this should be a generic check, since we could potentially hang other behaviours here. */ - error = mac_proc_check_suspend_resume(current_proc(), MAC_PROC_CHECK_HIBERNATE); - if (error) { - error = EPERM; - goto out; - } + error = mac_proc_check_sched(current_proc(), target_proc); + if (error) return error; #endif - if (action == PROC_POLICY_ACTION_APPLY) { - /* Used as a freeze hint */ - memorystatus_on_inactivity(-1); - - /* in future use devicestatus for pid_socketshutdown() */ - error = 0; - } else { - error = EINVAL; + switch (action) { + case PROC_POLICY_ACTION_SET: + /* PROCESS SET BOOST DONATION */ + task_importance_mark_donor(target_proc->task, TRUE); + break; + default: + error = (EINVAL); + break; } break; - case PROC_POLICY_APPLIFE_PIDBIND: -#if CONFIG_MACF - error = mac_proc_check_suspend_resume(current_proc(), MAC_PROC_CHECK_PIDBIND); - if (error) { - error = EPERM; - goto out; - } -#endif - error = copyin((user_addr_t)attrp, (int *)&state, sizeof(int)); - if (error != 0) - goto out; - if (action == PROC_POLICY_ACTION_APPLY) { - /* bind the thread in target_thread in current process to target_proc */ - error = proc_lf_pidbind(current_task(), target_threadid, proc->task, state); - } else - error = EINVAL; - break; default: - error = EINVAL; - break; + error = (EINVAL); + break; } -out: return(error); } -#endif /* CONFIG_EMBEDDED */ -static int -handle_apptype(__unused int scope, int action, __unused int policy, int policy_subtype, __unused user_addr_t attrp, proc_t target_proc, __unused uint64_t target_threadid) +/* + * KPI to determine if a pid is currently backgrounded. + * Returns ESRCH if pid cannot be found or has started exiting. + * Returns EINVAL if state is NULL. + * Sets *state to 1 if pid is backgrounded, and 0 otherwise. + */ +int +proc_pidbackgrounded(pid_t pid, uint32_t* state) { - int error = 0; + proc_t target_proc = PROC_NULL; - switch(policy_subtype) { -#if !CONFIG_EMBEDDED - case PROC_POLICY_OSX_APPTYPE_TAL: - /* need to be super user to do this */ - if (kauth_cred_issuser(kauth_cred_get()) == 0) { - error = EPERM; - goto out; - } - break; - case PROC_POLICY_OSX_APPTYPE_DASHCLIENT: - /* no special priv needed */ - break; -#endif /* !CONFIG_EMBEDDED */ - case PROC_POLICY_OSX_APPTYPE_NONE: -#if CONFIG_EMBEDDED - case PROC_POLICY_IOS_RESV1_APPTYPE: - case PROC_POLICY_IOS_APPLE_DAEMON: - case PROC_POLICY_IOS_APPTYPE: - case PROC_POLICY_IOS_NONUITYPE: -#endif /* CONFIG_EMBEDDED */ - return(ENOTSUP); - break; - default: - return(EINVAL); - } + if (state == NULL) + return(EINVAL); - switch (action) { - case PROC_POLICY_ACTION_ENABLE: - /* reapply the app foreground/background policy */ - error = proc_enable_task_apptype(target_proc->task, policy_subtype); - break; - case PROC_POLICY_ACTION_DISABLE: - /* remove the app foreground/background policy */ - error = proc_disable_task_apptype(target_proc->task, policy_subtype); - break; - default: - error = EINVAL; - break; + target_proc = proc_find(pid); + + if (target_proc == PROC_NULL) + return(ESRCH); + + if ( proc_get_effective_task_policy(target_proc->task, TASK_POLICY_DARWIN_BG) ) { + *state = 1; + } else { + *state = 0; } - -#if !CONFIG_EMBEDDED -out: -#endif /* !CONFIG_EMBEDDED */ - return(error); -} + proc_rele(target_proc); + return (0); +} int -proc_apply_resource_actions(void * bsdinfo, int type, int action) +proc_apply_resource_actions(void * bsdinfo, __unused int type, int action) { proc_t p = (proc_t)bsdinfo; @@ -571,9 +488,7 @@ proc_apply_resource_actions(void * bsdinfo, int type, int action) break; case PROC_POLICY_RSRCACT_NOTIFY_KQ: - proc_lock(p); - proc_knote(p, NOTE_RESOURCEEND | (type & 0xff)); - proc_unlock(p); + /* not implemented */ break; case PROC_POLICY_RSRCACT_NOTIFY_EXC: diff --git a/bsd/kern/pthread_shims.c b/bsd/kern/pthread_shims.c new file mode 100644 index 000000000..d951de700 --- /dev/null +++ b/bsd/kern/pthread_shims.c @@ -0,0 +1,449 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#define PTHREAD_INTERNAL 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* version number of the in-kernel shims given to pthread.kext */ +#define PTHREAD_SHIMS_VERSION 1 + +/* old pthread code had definitions for these as they don't exist in headers */ +extern kern_return_t mach_port_deallocate(ipc_space_t, mach_port_name_t); +extern kern_return_t semaphore_signal_internal_trap(mach_port_name_t); + +#define PTHREAD_STRUCT_ACCESSOR(get, set, rettype, structtype, member) \ + static rettype \ + get(structtype x) { \ + return (x)->member; \ + } \ + static void \ + set(structtype x, rettype y) { \ + (x)->member = y; \ + } + +PTHREAD_STRUCT_ACCESSOR(proc_get_threadstart, proc_set_threadstart, user_addr_t, struct proc*, p_threadstart); +PTHREAD_STRUCT_ACCESSOR(proc_get_pthsize, proc_set_pthsize, int, struct proc*, p_pthsize); +PTHREAD_STRUCT_ACCESSOR(proc_get_wqthread, proc_set_wqthread, user_addr_t, struct proc*, p_wqthread); +PTHREAD_STRUCT_ACCESSOR(proc_get_targconc, proc_set_targconc, user_addr_t, struct proc*, p_targconc); +PTHREAD_STRUCT_ACCESSOR(proc_get_dispatchqueue_offset, proc_set_dispatchqueue_offset, uint64_t, struct proc*, p_dispatchqueue_offset); +PTHREAD_STRUCT_ACCESSOR(proc_get_dispatchqueue_serialno_offset, proc_set_dispatchqueue_serialno_offset, uint64_t, struct proc*, p_dispatchqueue_serialno_offset); +PTHREAD_STRUCT_ACCESSOR(proc_get_wqptr, proc_set_wqptr, void*, struct proc*, p_wqptr); +PTHREAD_STRUCT_ACCESSOR(proc_get_wqsize, proc_set_wqsize, int, struct proc*, p_wqsize); +PTHREAD_STRUCT_ACCESSOR(proc_get_pthhash, proc_set_pthhash, void*, struct proc*, p_pthhash); + +PTHREAD_STRUCT_ACCESSOR(uthread_get_threadlist, uthread_set_threadlist, void*, struct uthread*, uu_threadlist); +PTHREAD_STRUCT_ACCESSOR(uthread_get_sigmask, uthread_set_sigmask, sigset_t, struct uthread*, uu_sigmask); +PTHREAD_STRUCT_ACCESSOR(uthread_get_returnval, uthread_set_returnval, int, struct uthread*, uu_rval[0]); + +static void +pthread_returning_to_userspace(void) +{ + thread_exception_return(); +} + +static uint32_t +get_task_threadmax(void) { + return task_threadmax; +} + +static task_t +proc_get_task(struct proc *p) { + return p->task; +} + +static lck_spin_t* +proc_get_wqlockptr(struct proc *p) { + return &(p->p_wqlock); +} + +static boolean_t* +proc_get_wqinitingptr(struct proc *p) { + return &(p->p_wqiniting); +} + +static uint64_t +proc_get_register(struct proc *p) { + return (p->p_lflag & P_LREGISTER); +} + +static void +proc_set_register(struct proc *p) { + proc_setregister(p); +} + +static void* +uthread_get_uukwe(struct uthread *t) +{ + return &t->uu_kevent.uu_kwe; +} + +static int +uthread_is_cancelled(struct uthread *t) +{ + return (t->uu_flag & (UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) == UT_CANCEL; +} + +static vm_map_t +_current_map(void) +{ + return current_map(); +} + + +/* kernel (core) to kext shims */ + +void +pthread_init(void) +{ + if (!pthread_functions) { + panic("pthread kernel extension not loaded (function table is NULL)."); + } + pthread_functions->pthread_init(); +} + +int +fill_procworkqueue(proc_t p, struct proc_workqueueinfo * pwqinfo) +{ + return pthread_functions->fill_procworkqueue(p, pwqinfo); +} + +void +workqueue_init_lock(proc_t p) +{ + pthread_functions->workqueue_init_lock(p); +} + +void +workqueue_destroy_lock(proc_t p) +{ + pthread_functions->workqueue_destroy_lock(p); +} + +void +workqueue_exit(struct proc *p) +{ + pthread_functions->workqueue_exit(p); +} + +void +workqueue_mark_exiting(struct proc *p) +{ + pthread_functions->workqueue_mark_exiting(p); +} + +void +workqueue_thread_yielded(void) +{ + pthread_functions->workqueue_thread_yielded(); +} + +sched_call_t +workqueue_get_sched_callback(void) +{ + if (pthread_functions->workqueue_get_sched_callback) { + return pthread_functions->workqueue_get_sched_callback(); + } + return NULL; +} + +void +pth_proc_hashinit(proc_t p) +{ + pthread_functions->pth_proc_hashinit(p); +} + +void +pth_proc_hashdelete(proc_t p) +{ + pthread_functions->pth_proc_hashdelete(p); +} + +/* syscall shims */ +int +bsdthread_create(struct proc *p, struct bsdthread_create_args *uap, user_addr_t *retval) +{ + return pthread_functions->bsdthread_create(p, uap->func, uap->func_arg, uap->stack, uap->pthread, uap->flags, retval); +} + +int +bsdthread_register(struct proc *p, struct bsdthread_register_args *uap, __unused int32_t *retval) +{ + return pthread_functions->bsdthread_register(p, uap->threadstart, uap->wqthread, uap->pthsize, uap->dummy_value, + uap->targetconc_ptr, uap->dispatchqueue_offset, retval); +} + +int +bsdthread_terminate(struct proc *p, struct bsdthread_terminate_args *uap, int32_t *retval) +{ + return pthread_functions->bsdthread_terminate(p, uap->stackaddr, uap->freesize, uap->port, uap->sem, retval); +} + +int +thread_selfid(struct proc *p, __unused struct thread_selfid_args *uap, uint64_t *retval) +{ + return pthread_functions->thread_selfid(p, retval); +} + +int +workq_kernreturn(struct proc *p, struct workq_kernreturn_args *uap, int32_t *retval) +{ + return pthread_functions->workq_kernreturn(p, uap->options, uap->item, uap->affinity, uap->prio, retval); +} + +int +workq_open(struct proc *p, __unused struct workq_open_args *uap, int32_t *retval) +{ + return pthread_functions->workq_open(p, retval); +} + +/* pthread synchroniser syscalls */ + +int +psynch_mutexwait(proc_t p, struct psynch_mutexwait_args *uap, uint32_t *retval) +{ + return pthread_functions->psynch_mutexwait(p, uap->mutex, uap->mgen, uap->ugen, uap->tid, uap->flags, retval); +} + +int +psynch_mutexdrop(proc_t p, struct psynch_mutexdrop_args *uap, uint32_t *retval) +{ + return pthread_functions->psynch_mutexdrop(p, uap->mutex, uap->mgen, uap->ugen, uap->tid, uap->flags, retval); +} + +int +psynch_cvbroad(proc_t p, struct psynch_cvbroad_args *uap, uint32_t *retval) +{ + return pthread_functions->psynch_cvbroad(p, uap->cv, uap->cvlsgen, uap->cvudgen, uap->flags, uap->mutex, uap->mugen, uap->tid, retval); +} + +int +psynch_cvsignal(proc_t p, struct psynch_cvsignal_args *uap, uint32_t *retval) +{ + return pthread_functions->psynch_cvsignal(p, uap->cv, uap->cvlsgen, uap->cvugen, uap->thread_port, uap->mutex, uap->mugen, uap->tid, uap->flags, retval); +} + +int +psynch_cvwait(proc_t p, struct psynch_cvwait_args * uap, uint32_t * retval) +{ + return pthread_functions->psynch_cvwait(p, uap->cv, uap->cvlsgen, uap->cvugen, uap->mutex, uap->mugen, uap->flags, uap->sec, uap->nsec, retval); +} + +int +psynch_cvclrprepost(proc_t p, struct psynch_cvclrprepost_args * uap, int *retval) +{ + return pthread_functions->psynch_cvclrprepost(p, uap->cv, uap->cvgen, uap->cvugen, uap->cvsgen, uap->prepocnt, uap->preposeq, uap->flags, retval); +} + +int +psynch_rw_longrdlock(proc_t p, struct psynch_rw_longrdlock_args * uap, uint32_t *retval) +{ + return pthread_functions->psynch_rw_longrdlock(p, uap->rwlock, uap->lgenval, uap->ugenval, uap->rw_wc, uap->flags, retval); +} + +int +psynch_rw_rdlock(proc_t p, struct psynch_rw_rdlock_args * uap, uint32_t * retval) +{ + return pthread_functions->psynch_rw_rdlock(p, uap->rwlock, uap->lgenval, uap->ugenval, uap->rw_wc, uap->flags, retval); +} + +int +psynch_rw_unlock(proc_t p, struct psynch_rw_unlock_args *uap, uint32_t *retval) +{ + return pthread_functions->psynch_rw_unlock(p, uap->rwlock, uap->lgenval, uap->ugenval, uap->rw_wc, uap->flags, retval); +} + +int +psynch_rw_unlock2(__unused proc_t p, __unused struct psynch_rw_unlock2_args *uap, __unused uint32_t *retval) +{ + return ENOTSUP; +} + +int +psynch_rw_wrlock(proc_t p, struct psynch_rw_wrlock_args *uap, uint32_t *retval) +{ + return pthread_functions->psynch_rw_wrlock(p, uap->rwlock, uap->lgenval, uap->ugenval, uap->rw_wc, uap->flags, retval); +} + +int +psynch_rw_yieldwrlock(proc_t p, struct psynch_rw_yieldwrlock_args *uap, uint32_t *retval) +{ + return pthread_functions->psynch_rw_yieldwrlock(p, uap->rwlock, uap->lgenval, uap->ugenval, uap->rw_wc, uap->flags, retval); +} + +int +psynch_rw_upgrade(__unused proc_t p, __unused struct psynch_rw_upgrade_args * uap, __unused uint32_t *retval) +{ + return 0; +} + +int +psynch_rw_downgrade(__unused proc_t p, __unused struct psynch_rw_downgrade_args * uap, __unused int *retval) +{ + return 0; +} + +/* unimplemented guard */ + +// static void +// unhooked_panic(void) +// { +// panic("pthread system call not hooked up"); +// } + +/* + * The callbacks structure (defined in pthread_shims.h) contains a collection + * of kernel functions that were not deemed sensible to expose as a KPI to all + * kernel extensions. So the kext is given them in the form of a structure of + * function pointers. + */ +static struct pthread_callbacks_s pthread_callbacks = { + .version = PTHREAD_SHIMS_VERSION, + .config_thread_max = CONFIG_THREAD_MAX, + .get_task_threadmax = get_task_threadmax, + + .proc_get_threadstart = proc_get_threadstart, + .proc_set_threadstart = proc_set_threadstart, + .proc_get_pthsize = proc_get_pthsize, + .proc_set_pthsize = proc_set_pthsize, + .proc_get_wqthread = proc_get_wqthread, + .proc_set_wqthread = proc_set_wqthread, + .proc_get_targconc = proc_get_targconc, + .proc_set_targconc = proc_set_targconc, + .proc_get_dispatchqueue_offset = proc_get_dispatchqueue_offset, + .proc_set_dispatchqueue_offset = proc_set_dispatchqueue_offset, + .proc_get_wqptr = proc_get_wqptr, + .proc_set_wqptr = proc_set_wqptr, + .proc_get_wqsize = proc_get_wqsize, + .proc_set_wqsize = proc_set_wqsize, + .proc_get_wqlockptr = proc_get_wqlockptr, + .proc_get_wqinitingptr = proc_get_wqinitingptr, + .proc_get_pthhash = proc_get_pthhash, + .proc_set_pthhash = proc_set_pthhash, + .proc_get_task = proc_get_task, + .proc_lock = proc_lock, + .proc_unlock = proc_unlock, + .proc_get_register = proc_get_register, + .proc_set_register = proc_set_register, + + /* kernel IPI interfaces */ + .ipc_port_copyout_send = ipc_port_copyout_send, + .task_get_ipcspace = get_task_ipcspace, + .vm_map_page_info = vm_map_page_info, + .vm_map_switch = vm_map_switch, + .thread_set_wq_state32 = thread_set_wq_state32, + .thread_set_wq_state64 = thread_set_wq_state64, + + .uthread_get_threadlist = uthread_get_threadlist, + .uthread_set_threadlist = uthread_set_threadlist, + .uthread_get_sigmask = uthread_get_sigmask, + .uthread_set_sigmask = uthread_set_sigmask, + .uthread_get_uukwe = uthread_get_uukwe, + .uthread_get_returnval = uthread_get_returnval, + .uthread_set_returnval = uthread_set_returnval, + .uthread_is_cancelled = uthread_is_cancelled, + + .thread_exception_return = pthread_returning_to_userspace, + .thread_bootstrap_return = thread_bootstrap_return, + .unix_syscall_return = unix_syscall_return, + + .absolutetime_to_microtime = absolutetime_to_microtime, + + .proc_restore_workq_bgthreadpolicy = proc_restore_workq_bgthreadpolicy, + .proc_apply_workq_bgthreadpolicy = proc_apply_workq_bgthreadpolicy, + + .get_bsdthread_info = (void*)get_bsdthread_info, + .thread_sched_call = thread_sched_call, + .thread_static_param = thread_static_param, + .thread_create_workq = thread_create_workq, + .thread_policy_set_internal = thread_policy_set_internal, + + .thread_affinity_set = thread_affinity_set, + + .zalloc = zalloc, + .zfree = zfree, + .zinit = zinit, + + .__pthread_testcancel = __pthread_testcancel, + + .mach_port_deallocate = mach_port_deallocate, + .semaphore_signal_internal_trap = semaphore_signal_internal_trap, + .current_map = _current_map, + .thread_create = thread_create, + .thread_resume = thread_resume, + + .convert_thread_to_port = convert_thread_to_port, + .ml_get_max_cpus = (void*)ml_get_max_cpus, + + + .proc_get_dispatchqueue_serialno_offset = proc_get_dispatchqueue_serialno_offset, + .proc_set_dispatchqueue_serialno_offset = proc_set_dispatchqueue_serialno_offset, +}; + +pthread_callbacks_t pthread_kern = &pthread_callbacks; +pthread_functions_t pthread_functions = NULL; + +/* + * pthread_kext_register is called by pthread.kext upon load, it has to provide + * us with a function pointer table of pthread internal calls. In return, this + * file provides it with a table of function pointers it needs. + */ + +void +pthread_kext_register(pthread_functions_t fns, pthread_callbacks_t *callbacks) +{ + if (pthread_functions != NULL) { + panic("Re-initialisation of pthread kext callbacks."); + } + + if (callbacks != NULL) { + *callbacks = &pthread_callbacks; + } else { + panic("pthread_kext_register called without callbacks pointer."); + } + + if (fns) { + pthread_functions = fns; + } +} diff --git a/bsd/kern/pthread_support.c b/bsd/kern/pthread_support.c deleted file mode 100644 index 6e96434e1..000000000 --- a/bsd/kern/pthread_support.c +++ /dev/null @@ -1,4510 +0,0 @@ -/* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */ -/* - * pthread_support.c - */ - -#if PSYNCH - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -#define __PSYNCH_DEBUG__ 0 /* debug panic actions */ -#if (KDEBUG && STANDARD_KDEBUG) -#define _PSYNCH_TRACE_ 1 /* kdebug trace */ -#endif - -#define __TESTMODE__ 2 /* 0 - return error on user error conditions */ - /* 1 - log error on user error conditions */ - /* 2 - abort caller on user error conditions */ - /* 3 - panic on user error conditions */ -static int __test_panics__; -static int __test_aborts__; -static int __test_prints__; - -static inline void __FAILEDUSERTEST__(const char *str) -{ - proc_t p; - - if (__test_panics__ != 0) - panic(str); - - if (__test_aborts__ != 0 || __test_prints__ != 0) - p = current_proc(); - - if (__test_prints__ != 0) - printf("PSYNCH: pid[%d]: %s\n", p->p_pid, str); - - if (__test_aborts__ != 0) - psignal(p, SIGABRT); -} - -#if _PSYNCH_TRACE_ -#define _PSYNCH_TRACE_MLWAIT 0x9000000 -#define _PSYNCH_TRACE_MLDROP 0x9000004 -#define _PSYNCH_TRACE_CVWAIT 0x9000008 -#define _PSYNCH_TRACE_CVSIGNAL 0x900000c -#define _PSYNCH_TRACE_CVBROAD 0x9000010 -#define _PSYNCH_TRACE_KMDROP 0x9000014 -#define _PSYNCH_TRACE_RWRDLOCK 0x9000018 -#define _PSYNCH_TRACE_RWLRDLOCK 0x900001c -#define _PSYNCH_TRACE_RWWRLOCK 0x9000020 -#define _PSYNCH_TRACE_RWYWRLOCK 0x9000024 -#define _PSYNCH_TRACE_RWUPGRADE 0x9000028 -#define _PSYNCH_TRACE_RWDOWNGRADE 0x900002c -#define _PSYNCH_TRACE_RWUNLOCK 0x9000030 -#define _PSYNCH_TRACE_RWUNLOCK2 0x9000034 -#define _PSYNCH_TRACE_RWHANDLEU 0x9000038 -#define _PSYNCH_TRACE_FSEQTILL 0x9000040 -#define _PSYNCH_TRACE_CLRPRE 0x9000044 -#define _PSYNCH_TRACE_CVHBROAD 0x9000048 -#define _PSYNCH_TRACE_CVSEQ 0x900004c -#define _PSYNCH_TRACE_THWAKEUP 0x9000050 -/* user side */ -#define _PSYNCH_TRACE_UM_LOCK 0x9000060 -#define _PSYNCH_TRACE_UM_UNLOCK 0x9000064 -#define _PSYNCH_TRACE_UM_MHOLD 0x9000068 -#define _PSYNCH_TRACE_UM_MDROP 0x900006c -#define _PSYNCH_TRACE_UM_CVWAIT 0x9000070 -#define _PSYNCH_TRACE_UM_CVSIG 0x9000074 -#define _PSYNCH_TRACE_UM_CVBRD 0x9000078 - -proc_t pthread_debug_proc = PROC_NULL; -static inline void __PTHREAD_TRACE_DEBUG(uint32_t debugid, uintptr_t arg1, - uintptr_t arg2, - uintptr_t arg3, - uintptr_t arg4, - uintptr_t arg5) -{ - proc_t p = current_proc(); - - if ((pthread_debug_proc != NULL) && (p == pthread_debug_proc)) - KERNEL_DEBUG_CONSTANT(debugid, arg1, arg2, arg3, arg4, arg5); -} - -#endif /* _PSYNCH_TRACE_ */ - -#define ECVCERORR 256 -#define ECVPERORR 512 - -lck_mtx_t * pthread_list_mlock; - -#define PTHHASH(addr) (&pthashtbl[(addr) & pthhash]) -extern LIST_HEAD(pthhashhead, ksyn_wait_queue) *pth_glob_hashtbl; -struct pthhashhead * pth_glob_hashtbl; -u_long pthhash; - -LIST_HEAD(, ksyn_wait_queue) pth_free_list; -int num_total_kwq = 0; /* number of kwq in use currently */ -int num_infreekwq = 0; /* number of kwq in free list */ -int num_freekwq = 0; /* number of kwq actually freed from the free the list */ -int num_reusekwq = 0; /* number of kwq pulled back for reuse from free list */ -int num_addedfreekwq = 0; /* number of added free kwq from the last instance */ -int num_lastfreekwqcount = 0; /* the free count from the last time */ - -static int PTH_HASHSIZE = 100; - -static zone_t kwq_zone; /* zone for allocation of ksyn_queue */ -static zone_t kwe_zone; /* zone for allocation of ksyn_waitq_element */ - -#define SEQFIT 0 -#define FIRSTFIT 1 - -struct ksyn_queue { - TAILQ_HEAD(ksynq_kwelist_head, ksyn_waitq_element) ksynq_kwelist; - uint32_t ksynq_count; /* number of entries in queue */ - uint32_t ksynq_firstnum; /* lowest seq in queue */ - uint32_t ksynq_lastnum; /* highest seq in queue */ -}; -typedef struct ksyn_queue * ksyn_queue_t; - -#define KSYN_QUEUE_READ 0 -#define KSYN_QUEUE_LREAD 1 -#define KSYN_QUEUE_WRITER 2 -#define KSYN_QUEUE_YWRITER 3 -#define KSYN_QUEUE_UPGRADE 4 -#define KSYN_QUEUE_MAX 5 - -struct ksyn_wait_queue { - LIST_ENTRY(ksyn_wait_queue) kw_hash; - LIST_ENTRY(ksyn_wait_queue) kw_list; - user_addr_t kw_addr; - uint64_t kw_owner; - uint64_t kw_object; /* object backing in shared mode */ - uint64_t kw_offset; /* offset inside the object in shared mode */ - int kw_flags; /* mutex, cvar options/flags */ - int kw_pflags; /* flags under listlock protection */ - struct timeval kw_ts; /* timeval need for upkeep before free */ - int kw_iocount; /* inuse reference */ - int kw_dropcount; /* current users unlocking... */ - - int kw_type; /* queue type like mutex, cvar, etc */ - uint32_t kw_inqueue; /* num of waiters held */ - uint32_t kw_fakecount; /* number of error/prepost fakes */ - uint32_t kw_highseq; /* highest seq in the queue */ - uint32_t kw_lowseq; /* lowest seq in the queue */ - uint32_t kw_lword; /* L value from userland */ - uint32_t kw_uword; /* U world value from userland */ - uint32_t kw_sword; /* S word value from userland */ - uint32_t kw_lastunlockseq; /* the last seq that unlocked */ -/* for CV to be used as the seq kernel has seen so far */ -#define kw_cvkernelseq kw_lastunlockseq - uint32_t kw_lastseqword; /* the last seq that unlocked */ -/* for mutex and cvar we need to track I bit values */ - uint32_t kw_nextseqword; /* the last seq that unlocked; with num of waiters */ -#define kw_initrecv kw_nextseqword /* number of incoming waiters with Ibit seen sofar */ - uint32_t kw_overlapwatch; /* chance for overlaps */ -#define kw_initcount kw_overlapwatch /* number of incoming waiters with Ibit expected */ - uint32_t kw_initcountseq; /* highest seq with Ibit on for mutex and cvar*/ - uint32_t kw_pre_rwwc; /* prepost count */ - uint32_t kw_pre_lockseq; /* prepost target seq */ - uint32_t kw_pre_sseq; /* prepost target sword, in cvar used for mutexowned */ - uint32_t kw_pre_intrcount; /* prepost of missed wakeup due to intrs */ - uint32_t kw_pre_intrseq; /* prepost of missed wakeup limit seq */ - uint32_t kw_pre_intrretbits; /* return bits value for missed wakeup threads */ - uint32_t kw_pre_intrtype; /* type of failed wakueps*/ - - int kw_kflags; - struct ksyn_queue kw_ksynqueues[KSYN_QUEUE_MAX]; /* queues to hold threads */ - lck_mtx_t kw_lock; /* mutex lock protecting this structure */ -}; -typedef struct ksyn_wait_queue * ksyn_wait_queue_t; - -#define PTHRW_INC 0x100 -#define PTHRW_BIT_MASK 0x000000ff - -#define PTHRW_COUNT_SHIFT 8 -#define PTHRW_COUNT_MASK 0xffffff00 -#define PTHRW_MAX_READERS 0xffffff00 - -/* New model bits on Lword */ -#define PTH_RWL_KBIT 0x01 /* users cannot acquire in user mode */ -#define PTH_RWL_EBIT 0x02 /* exclusive lock in progress */ -#define PTH_RWL_WBIT 0x04 /* write waiters pending in kernel */ -#define PTH_RWL_PBIT 0x04 /* prepost (cv) pending in kernel */ -#define PTH_RWL_YBIT 0x08 /* yielding write waiters pending in kernel */ -#define PTH_RWL_RETRYBIT 0x08 /* mutex retry wait */ -#define PTH_RWL_LBIT 0x10 /* long read in progress */ -#define PTH_RWL_MTXNONE 0x10 /* indicates the cvwait does not have mutex held */ -#define PTH_RWL_UBIT 0x20 /* upgrade request pending */ -#define PTH_RWL_MTX_WAIT 0x20 /* in cvar in mutex wait */ -#define PTH_RWL_RBIT 0x40 /* reader pending in kernel(not used) */ -#define PTH_RWL_MBIT 0x40 /* overlapping grants from kernel */ -#define PTH_RWL_TRYLKBIT 0x40 /* trylock attempt (mutex only) */ -#define PTH_RWL_IBIT 0x80 /* lcok reset, held untill first succeesful unlock */ - - -/* UBIT values for mutex, cvar */ -#define PTH_RWU_SBIT 0x01 -#define PTH_RWU_BBIT 0x02 - -#define PTHRW_RWL_INIT PTH_RWL_IBIT /* reset state on the lock bits (U)*/ - -/* New model bits on Sword */ -#define PTH_RWS_SBIT 0x01 /* kernel transition seq not set yet*/ -#define PTH_RWS_IBIT 0x02 /* Sequence is not set on return from kernel */ -#define PTH_RWS_CV_CBIT PTH_RWS_SBIT /* kernel has cleared all info w.r.s.t CV */ -#define PTH_RWS_CV_PBIT PTH_RWS_IBIT /* kernel has prepost/fake structs only,no waiters */ -#define PTH_RWS_CV_MBIT PTH_RWL_MBIT /* to indicate prepost return */ -#define PTH_RWS_WSVBIT 0x04 /* save W bit */ -#define PTH_RWS_USVBIT 0x08 /* save U bit */ -#define PTH_RWS_YSVBIT 0x10 /* save Y bit */ -#define PTHRW_RWS_INIT PTH_RWS_SBIT /* reset on the lock bits (U)*/ -#define PTHRW_RWS_SAVEMASK (PTH_RWS_WSVBIT|PTH_RWS_USVBIT|PTH_RWS_YSVBIT) /*save bits mask*/ -#define PTHRW_SW_Reset_BIT_MASK 0x000000fe /* remove S bit and get rest of the bits */ - -#define PTHRW_RWS_INIT PTH_RWS_SBIT /* reset on the lock bits (U)*/ - - -#define PTHRW_UN_BIT_MASK 0x000000bf /* remove overlap bit */ - - -#define PTHREAD_MTX_TID_SWITCHING (uint64_t)-1 - -/* new L word defns */ -#define is_rwl_readinuser(x) ((((x) & (PTH_RWL_UBIT | PTH_RWL_KBIT)) == 0)||(((x) & PTH_RWL_LBIT) != 0)) -#define is_rwl_ebit_set(x) (((x) & PTH_RWL_EBIT) != 0) -#define is_rwl_lbit_set(x) (((x) & PTH_RWL_LBIT) != 0) -#define is_rwl_readoverlap(x) (((x) & PTH_RWL_MBIT) != 0) -#define is_rw_ubit_set(x) (((x) & PTH_RWL_UBIT) != 0) - -/* S word checks */ -#define is_rws_setseq(x) (((x) & PTH_RWS_SBIT)) -#define is_rws_setunlockinit(x) (((x) & PTH_RWS_IBIT)) - -/* first contended seq that kernel sees */ -#define KW_MTXFIRST_KSEQ 0x200 -#define KW_CVFIRST_KSEQ 1 -#define KW_RWFIRST_KSEQ 0x200 - -int is_seqlower(uint32_t x, uint32_t y); -int is_seqlower_eq(uint32_t x, uint32_t y); -int is_seqhigher(uint32_t x, uint32_t y); -int is_seqhigher_eq(uint32_t x, uint32_t y); -int find_diff(uint32_t upto, uint32_t lowest); - - -static inline int diff_genseq(uint32_t x, uint32_t y) { - if (x > y) { - return(x-y); - } else { - return((PTHRW_MAX_READERS - y) + x + PTHRW_INC); - } -} - -#define TID_ZERO (uint64_t)0 - -/* bits needed in handling the rwlock unlock */ -#define PTH_RW_TYPE_READ 0x01 -#define PTH_RW_TYPE_LREAD 0x02 -#define PTH_RW_TYPE_WRITE 0x04 -#define PTH_RW_TYPE_YWRITE 0x08 -#define PTH_RW_TYPE_UPGRADE 0x10 -#define PTH_RW_TYPE_MASK 0xff -#define PTH_RW_TYPE_SHIFT 8 - -#define PTH_RWSHFT_TYPE_READ 0x0100 -#define PTH_RWSHFT_TYPE_LREAD 0x0200 -#define PTH_RWSHFT_TYPE_WRITE 0x0400 -#define PTH_RWSHFT_TYPE_YWRITE 0x0800 -#define PTH_RWSHFT_TYPE_MASK 0xff00 - -/* - * Mutex protocol attributes - */ -#define PTHREAD_PRIO_NONE 0 -#define PTHREAD_PRIO_INHERIT 1 -#define PTHREAD_PRIO_PROTECT 2 -#define PTHREAD_PROTOCOL_FLAGS_MASK 0x3 - -/* - * Mutex type attributes - */ -#define PTHREAD_MUTEX_NORMAL 0 -#define PTHREAD_MUTEX_ERRORCHECK 4 -#define PTHREAD_MUTEX_RECURSIVE 8 -#define PTHREAD_MUTEX_DEFAULT PTHREAD_MUTEX_NORMAL -#define PTHREAD_TYPE_FLAGS_MASK 0xc - -/* - * Mutex pshared attributes - */ -#define PTHREAD_PROCESS_SHARED 0x10 -#define PTHREAD_PROCESS_PRIVATE 0x20 -#define PTHREAD_PSHARED_FLAGS_MASK 0x30 - -/* - * Mutex policy attributes - */ -#define _PTHREAD_MUTEX_POLICY_NONE 0 -#define _PTHREAD_MUTEX_POLICY_FAIRSHARE 0x040 /* 1 */ -#define _PTHREAD_MUTEX_POLICY_FIRSTFIT 0x080 /* 2 */ -#define _PTHREAD_MUTEX_POLICY_REALTIME 0x0c0 /* 3 */ -#define _PTHREAD_MUTEX_POLICY_ADAPTIVE 0x100 /* 4 */ -#define _PTHREAD_MUTEX_POLICY_PRIPROTECT 0x140 /* 5 */ -#define _PTHREAD_MUTEX_POLICY_PRIINHERIT 0x180 /* 6 */ -#define PTHREAD_POLICY_FLAGS_MASK 0x1c0 - -#define _PTHREAD_MTX_OPT_HOLDLOCK 0x200 -#define _PTHREAD_MTX_OPT_NOMTX 0x400 - -#define _PTHREAD_MTX_OPT_NOTIFY 0x1000 -#define _PTHREAD_MTX_OPT_MUTEX 0x2000 /* this is a mutex type */ - -#define _PTHREAD_RWLOCK_UPGRADE_TRY 0x10000 - -/* pflags */ -#define KSYN_WQ_INLIST 1 -#define KSYN_WQ_INHASH 2 -#define KSYN_WQ_SHARED 4 -#define KSYN_WQ_WAITING 8 /* threads waiting for this wq to be available */ -#define KSYN_WQ_FLIST 0X10 /* in free list to be freed after a short delay */ - -/* kflags */ -#define KSYN_KWF_INITCLEARED 1 /* the init status found and preposts cleared */ -#define KSYN_KWF_ZEROEDOUT 2 /* the lword, etc are inited to 0 */ - -#define KSYN_CLEANUP_DEADLINE 10 -int psynch_cleanupset; -thread_call_t psynch_thcall; - -#define KSYN_WQTYPE_INWAIT 0x1000 -#define KSYN_WQTYPE_INDROP 0x2000 -#define KSYN_WQTYPE_MTX 0x1 -#define KSYN_WQTYPE_CVAR 0x2 -#define KSYN_WQTYPE_RWLOCK 0x4 -#define KSYN_WQTYPE_SEMA 0x8 -#define KSYN_WQTYPE_BARR 0x10 -#define KSYN_WQTYPE_MASK 0x00ff - -#define KSYN_MTX_MAX 0x0fffffff -#define KSYN_WQTYPE_MUTEXDROP (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX) - -#define KW_UNLOCK_PREPOST 0x01 -#define KW_UNLOCK_PREPOST_UPGRADE 0x02 -#define KW_UNLOCK_PREPOST_DOWNGRADE 0x04 -#define KW_UNLOCK_PREPOST_READLOCK 0x08 -#define KW_UNLOCK_PREPOST_LREADLOCK 0x10 -#define KW_UNLOCK_PREPOST_WRLOCK 0x20 -#define KW_UNLOCK_PREPOST_YWRLOCK 0x40 - -#define CLEAR_PREPOST_BITS(kwq) {\ - kwq->kw_pre_lockseq = 0; \ - kwq->kw_pre_sseq = PTHRW_RWS_INIT; \ - kwq->kw_pre_rwwc = 0; \ - } - -#define CLEAR_INITCOUNT_BITS(kwq) {\ - kwq->kw_initcount = 0; \ - kwq->kw_initrecv = 0; \ - kwq->kw_initcountseq = 0; \ - } - -#define CLEAR_INTR_PREPOST_BITS(kwq) {\ - kwq->kw_pre_intrcount = 0; \ - kwq->kw_pre_intrseq = 0; \ - kwq->kw_pre_intrretbits = 0; \ - kwq->kw_pre_intrtype = 0; \ - } - -#define CLEAR_REINIT_BITS(kwq) {\ - if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) { \ - if((kwq->kw_inqueue != 0) && (kwq->kw_inqueue != kwq->kw_fakecount)) \ - panic("CV:entries in queue durinmg reinit %d:%d\n",kwq->kw_inqueue, kwq->kw_fakecount); \ - };\ - if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK) { \ - kwq->kw_nextseqword = PTHRW_RWS_INIT; \ - kwq->kw_overlapwatch = 0; \ - }; \ - kwq->kw_pre_lockseq = 0; \ - kwq->kw_pre_rwwc = 0; \ - kwq->kw_pre_sseq = PTHRW_RWS_INIT; \ - kwq->kw_lastunlockseq = PTHRW_RWL_INIT; \ - kwq->kw_lastseqword = PTHRW_RWS_INIT; \ - kwq->kw_pre_intrcount = 0; \ - kwq->kw_pre_intrseq = 0; \ - kwq->kw_pre_intrretbits = 0; \ - kwq->kw_pre_intrtype = 0; \ - kwq->kw_lword = 0; \ - kwq->kw_uword = 0; \ - kwq->kw_sword = PTHRW_RWS_INIT; \ - } - -void pthread_list_lock(void); -void pthread_list_unlock(void); -void pthread_list_lock_spin(void); -void pthread_list_lock_convert_spin(void); -void ksyn_wqlock(ksyn_wait_queue_t kwq); -void ksyn_wqunlock(ksyn_wait_queue_t kwq); -ksyn_wait_queue_t ksyn_wq_hash_lookup(user_addr_t mutex, proc_t p, int flags, uint64_t object, uint64_t offset); -int ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, uint64_t tid, int flags, int wqtype , ksyn_wait_queue_t * wq); -void ksyn_wqrelease(ksyn_wait_queue_t mkwq, ksyn_wait_queue_t ckwq, int qfreenow, int wqtype); -extern int ksyn_findobj(uint64_t mutex, uint64_t * object, uint64_t * offset); -static void UPDATE_CVKWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, uint64_t tid, int wqtype); -extern thread_t port_name_to_thread(mach_port_name_t port_name); - -kern_return_t ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_element_t kwe, int log, thread_continue_t, void * parameter); -kern_return_t ksyn_wakeup_thread(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe); -void ksyn_freeallkwe(ksyn_queue_t kq); - -uint32_t psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t lkseq, uint32_t ugen, int flags); -int kwq_handle_unlock(ksyn_wait_queue_t, uint32_t mgen, uint32_t rw_wc, uint32_t * updatep, int flags, int *blockp, uint32_t premgen); - -void ksyn_queue_init(ksyn_queue_t kq); -int ksyn_queue_insert(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t mgen, struct uthread * uth, ksyn_waitq_element_t kwe, int firstfit); -ksyn_waitq_element_t ksyn_queue_removefirst(ksyn_queue_t kq, ksyn_wait_queue_t kwq); -void ksyn_queue_removeitem(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe); -int ksyn_queue_move_tofree(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t upto, ksyn_queue_t freeq, int all, int reease); -void update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq); -uint32_t find_nextlowseq(ksyn_wait_queue_t kwq); -uint32_t find_nexthighseq(ksyn_wait_queue_t kwq); - -int find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp); -uint32_t ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto); - -ksyn_waitq_element_t ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen); -uint32_t ksyn_queue_cvcount_entries(ksyn_queue_t kq, uint32_t upto, uint32_t from, int * numwaitersp, int * numintrp, int * numprepop); -void ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep); -void ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep, ksyn_queue_t kfreeq, int release); -ksyn_waitq_element_t ksyn_queue_find_signalseq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t toseq, uint32_t lockseq); -ksyn_waitq_element_t ksyn_queue_find_threadseq(ksyn_wait_queue_t ckwq, ksyn_queue_t kq, thread_t th, uint32_t toseq); -void psynch_cvcontinue(void *, wait_result_t); -void psynch_mtxcontinue(void *, wait_result_t); - -int ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int longreadset, int allreaders, uint32_t updatebits, int * wokenp); -int kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int * type, uint32_t lowest[]); -ksyn_waitq_element_t ksyn_queue_find_seq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq, int remove); -int kwq_handle_overlap(ksyn_wait_queue_t kwq, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, uint32_t *updatebitsp, int flags , int * blockp); -int kwq_handle_downgrade(ksyn_wait_queue_t kwq, uint32_t mgen, int flags, uint32_t premgen, int * blockp); - -static void -UPDATE_CVKWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, __unused uint64_t tid, __unused int wqtype) -{ - if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) { - if ((kwq->kw_kflags & KSYN_KWF_ZEROEDOUT) != 0) { - /* the values of L,U and S are cleared out due to L==S in previous transition */ - kwq->kw_lword = mgen; - kwq->kw_uword = ugen; - kwq->kw_sword = rw_wc; - kwq->kw_kflags &= ~KSYN_KWF_ZEROEDOUT; - } - if (is_seqhigher((mgen & PTHRW_COUNT_MASK), (kwq->kw_lword & PTHRW_COUNT_MASK)) != 0) - kwq->kw_lword = mgen; - if (is_seqhigher((ugen & PTHRW_COUNT_MASK), (kwq->kw_uword & PTHRW_COUNT_MASK)) != 0) - kwq->kw_uword = ugen; - if ((rw_wc & PTH_RWS_CV_CBIT) != 0) { - if(is_seqlower(kwq->kw_cvkernelseq, (rw_wc & PTHRW_COUNT_MASK)) != 0) { - kwq->kw_cvkernelseq = (rw_wc & PTHRW_COUNT_MASK); - } - if (is_seqhigher((rw_wc & PTHRW_COUNT_MASK), (kwq->kw_sword & PTHRW_COUNT_MASK)) != 0) - kwq->kw_sword = rw_wc; - } - } -} - - -/* to protect the hashes, iocounts, freelist */ -void -pthread_list_lock(void) -{ - lck_mtx_lock(pthread_list_mlock); -} - -void -pthread_list_lock_spin(void) -{ - lck_mtx_lock_spin(pthread_list_mlock); -} - -void -pthread_list_lock_convert_spin(void) -{ - lck_mtx_convert_spin(pthread_list_mlock); -} - - -void -pthread_list_unlock(void) -{ - lck_mtx_unlock(pthread_list_mlock); -} - -/* to protect the indiv queue */ -void -ksyn_wqlock(ksyn_wait_queue_t kwq) -{ - - lck_mtx_lock(&kwq->kw_lock); -} - -void -ksyn_wqunlock(ksyn_wait_queue_t kwq) -{ - lck_mtx_unlock(&kwq->kw_lock); -} - - -/* routine to drop the mutex unlocks , used both for mutexunlock system call and drop during cond wait */ -uint32_t -psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t lkseq, uint32_t ugen, int flags) -{ - uint32_t nextgen, low_writer, updatebits, returnbits = 0; - int firstfit = flags & _PTHREAD_MUTEX_POLICY_FIRSTFIT; - ksyn_waitq_element_t kwe = NULL; - kern_return_t kret = KERN_SUCCESS; - - nextgen = (ugen + PTHRW_INC); - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_START, (uint32_t)kwq->kw_addr, lkseq, ugen, flags, 0); -#endif /* _PSYNCH_TRACE_ */ - - ksyn_wqlock(kwq); - -redrive: - - if (kwq->kw_inqueue != 0) { - updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_EBIT | PTH_RWL_KBIT); - kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK); - if (firstfit != 0) - { - /* first fit , pick any one */ - kwe = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwq); - kwe->kwe_psynchretval = updatebits; - kwe->kwe_kwqqueue = NULL; - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xcafecaf1, (uint32_t)(thread_tid((struct thread *)(((struct uthread *)(kwe->kwe_uth))->uu_context.vc_thread))), kwe->kwe_psynchretval, 0); -#endif /* _PSYNCH_TRACE_ */ - - kret = ksyn_wakeup_thread(kwq, kwe); -#if __TESTPANICS__ - if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) - panic("psynch_mutexdrop_internal: panic unable to wakeup firstfit mutex thread\n"); -#endif /* __TESTPANICS__ */ - if (kret == KERN_NOT_WAITING) - goto redrive; - } else { - /* handle fairshare */ - low_writer = kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum; - low_writer &= PTHRW_COUNT_MASK; - - if (low_writer == nextgen) { - /* next seq to be granted found */ - kwe = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwq); - - /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */ - kwe->kwe_psynchretval = updatebits | PTH_RWL_MTX_WAIT; - kwe->kwe_kwqqueue = NULL; - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xcafecaf2, (uint32_t)(thread_tid((struct thread *)(((struct uthread *)(kwe->kwe_uth))->uu_context.vc_thread))), kwe->kwe_psynchretval, 0); -#endif /* _PSYNCH_TRACE_ */ - - kret = ksyn_wakeup_thread(kwq, kwe); -#if __TESTPANICS__ - if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) - panic("psynch_mutexdrop_internal: panic unable to wakeup fairshare mutex thread\n"); -#endif /* __TESTPANICS__ */ - if (kret == KERN_NOT_WAITING) { - /* interrupt post */ - kwq->kw_pre_intrcount = 1; - kwq->kw_pre_intrseq = nextgen; - kwq->kw_pre_intrretbits = updatebits; - kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfafafaf1, nextgen, kwq->kw_pre_intrretbits, 0); -#endif /* _PSYNCH_TRACE_ */ - } - - } else if (is_seqhigher(low_writer, nextgen) != 0) { - kwq->kw_pre_rwwc++; - - if (kwq->kw_pre_rwwc > 1) { - __FAILEDUSERTEST__("psynch_mutexdrop_internal: prepost more than one (1)\n"); - goto out; - } - - kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK); -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfefefef1, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0); -#endif /* _PSYNCH_TRACE_ */ - } else { - - //__FAILEDUSERTEST__("psynch_mutexdrop_internal: FS mutex unlock sequence higher than the lowest one is queue\n"); - - kwe = ksyn_queue_find_seq(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], (nextgen & PTHRW_COUNT_MASK), 1); - if (kwe != NULL) { - /* next seq to be granted found */ - /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */ - kwe->kwe_psynchretval = updatebits | PTH_RWL_MTX_WAIT; - kwe->kwe_kwqqueue = NULL; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xcafecaf3, (uint32_t)(thread_tid((struct thread *)(((struct uthread *)(kwe->kwe_uth))->uu_context.vc_thread))), kwe->kwe_psynchretval, 0); -#endif /* _PSYNCH_TRACE_ */ - kret = ksyn_wakeup_thread(kwq, kwe); -#if __TESTPANICS__ - if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) - panic("psynch_mutexdrop_internal: panic unable to wakeup fairshare mutex thread\n"); -#endif /* __TESTPANICS__ */ - if (kret == KERN_NOT_WAITING) - goto redrive; - } else { - /* next seq to be granted not found, prepost */ - kwq->kw_pre_rwwc++; - - if (kwq->kw_pre_rwwc > 1) { - __FAILEDUSERTEST__("psynch_mutexdrop_internal: prepost more than one (2)\n"); - goto out; - } - - kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK); -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfefefef2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0); -#endif /* _PSYNCH_TRACE_ */ - } - } - } - } else { - - /* if firstfit the last one could be spurious */ - if (firstfit == 0) { - kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK); - kwq->kw_pre_rwwc++; - - if (kwq->kw_pre_rwwc > 1) { - __FAILEDUSERTEST__("psynch_mutexdrop_internal: prepost more than one (3)\n"); - goto out; - } - - kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK); -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfefefef3, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0); -#endif /* _PSYNCH_TRACE_ */ - } else { - /* first fit case */ -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfefefef3, kwq->kw_lastunlockseq, kwq->kw_pre_lockseq, 0); -#endif /* _PSYNCH_TRACE_ */ - kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK); - /* not set or the new lkseq is higher */ - if ((kwq->kw_pre_rwwc == 0) || (is_seqlower(kwq->kw_pre_lockseq, lkseq) == 0)) - kwq->kw_pre_lockseq = (lkseq & PTHRW_COUNT_MASK); - kwq->kw_pre_rwwc = 1; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfefefef3, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0); -#endif /* _PSYNCH_TRACE_ */ - - /* indicate prepost content in kernel */ - returnbits = lkseq | PTH_RWL_PBIT; - } - } - -out: - ksyn_wqunlock(kwq); - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_KMDROP | DBG_FUNC_END, (uint32_t)kwq->kw_addr, 0xeeeeeeed, 0, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - ksyn_wqrelease(kwq, NULL, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX)); - return(returnbits); -} - -/* - * psynch_mutexwait: This system call is used for contended psynch mutexes to block. - */ - -int -psynch_mutexwait(__unused proc_t p, struct psynch_mutexwait_args * uap, uint32_t * retval) -{ - user_addr_t mutex = uap->mutex; - uint32_t mgen = uap->mgen; - uint32_t ugen = uap->ugen; - uint64_t tid = uap->tid; - int flags = uap->flags; - ksyn_wait_queue_t kwq; - int error=0; - int ins_flags, retry; - uthread_t uth; - int firstfit = flags & _PTHREAD_MUTEX_POLICY_FIRSTFIT; - uint32_t lockseq, updatebits=0; - ksyn_waitq_element_t kwe; - kern_return_t kret; - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_START, (uint32_t)mutex, mgen, ugen, flags, 0); - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_NONE, (uint32_t)mutex, mgen, ugen, (uint32_t)tid, 0); -#endif /* _PSYNCH_TRACE_ */ - - uth = current_uthread(); - - kwe = &uth->uu_kwe; - kwe->kwe_lockseq = uap->mgen; - kwe->kwe_uth = uth; - kwe->kwe_psynchretval = 0; - kwe->kwe_kwqqueue = NULL; - lockseq = (uap->mgen & PTHRW_COUNT_MASK); - - if (firstfit == 0) { - ins_flags = SEQFIT; - } else { - /* first fit */ - ins_flags = FIRSTFIT; - } - - error = ksyn_wqfind(mutex, mgen, ugen, 0, tid, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX), &kwq); - if (error != 0) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_END, (uint32_t)mutex, 1, 0xdeadbeef, error, 0); -#endif /* _PSYNCH_TRACE_ */ - return(error); - } - - ksyn_wqlock(kwq); - - - if ((mgen & PTH_RWL_RETRYBIT) != 0) { - retry = 1; - mgen &= ~PTH_RWL_RETRYBIT; - } - - /* handle first the missed wakeups */ - if ((kwq->kw_pre_intrcount != 0) && - ((kwq->kw_pre_intrtype == PTH_RW_TYPE_WRITE)) && - (is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) { - kwq->kw_pre_intrcount--; - kwe->kwe_psynchretval = kwq->kw_pre_intrretbits; - if (kwq->kw_pre_intrcount==0) - CLEAR_INTR_PREPOST_BITS(kwq); - ksyn_wqunlock(kwq); - *retval = kwe->kwe_psynchretval; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_NONE, (uint32_t)mutex, 0xfafafaf1, kwe->kwe_psynchretval, kwq->kw_pre_intrcount, 0); -#endif /* _PSYNCH_TRACE_ */ - goto out; - } - - if ((kwq->kw_pre_rwwc != 0) && ((ins_flags == FIRSTFIT) || ((lockseq & PTHRW_COUNT_MASK) == (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK) ))) { - /* got preposted lock */ - kwq->kw_pre_rwwc--; - if (kwq->kw_pre_rwwc == 0) { - CLEAR_PREPOST_BITS(kwq); - kwq->kw_lastunlockseq = PTHRW_RWL_INIT; - if (kwq->kw_inqueue == 0) { - updatebits = lockseq | (PTH_RWL_KBIT | PTH_RWL_EBIT); - } else { - updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_KBIT | PTH_RWL_EBIT); - } - updatebits &= ~PTH_RWL_MTX_WAIT; - - kwe->kwe_psynchretval = updatebits; - - if (updatebits == 0) { - __FAILEDUSERTEST__("psynch_mutexwait(prepost): returning 0 lseq in mutexwait with no EBIT \n"); - } - ksyn_wqunlock(kwq); - *retval = updatebits; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfefefef1, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0); -#endif /* _PSYNCH_TRACE_ */ - goto out; - } else { - __FAILEDUSERTEST__("psynch_mutexwait: more than one prepost\n"); - kwq->kw_pre_lockseq += PTHRW_INC; /* look for next one */ - ksyn_wqunlock(kwq); - error = EINVAL; - goto out; - } - } - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 0xfeedfeed, mgen, ins_flags, 0); -#endif /* _PSYNCH_TRACE_ */ - - error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], mgen, uth, kwe, ins_flags); - if (error != 0) { - ksyn_wqunlock(kwq); -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_END, (uint32_t)mutex, 2, 0xdeadbeef, error, 0); -#endif /* _PSYNCH_TRACE_ */ - goto out; - } - - kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, psynch_mtxcontinue, (void *)kwq); - - psynch_mtxcontinue((void *)kwq, kret); - - /* not expected to return from unix_syscall_return */ - panic("psynch_mtxcontinue returned from unix_syscall_return"); - -out: - ksyn_wqrelease(kwq, NULL, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX)); -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_END, (uint32_t)mutex, 0xeeeeeeed, updatebits, error, 0); -#endif /* _PSYNCH_TRACE_ */ - - return(error); -} - -void -psynch_mtxcontinue(void * parameter, wait_result_t result) -{ - int error = 0; - uint32_t updatebits = 0; - uthread_t uth = current_uthread(); - ksyn_wait_queue_t kwq = (ksyn_wait_queue_t)parameter; - ksyn_waitq_element_t kwe; - - kwe = &uth->uu_kwe; - - switch (result) { - case THREAD_TIMED_OUT: - error = ETIMEDOUT; - break; - case THREAD_INTERRUPTED: - error = EINTR; - break; - default: - error = 0; - break; - } - - if (error != 0) { - ksyn_wqlock(kwq); - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 3, 0xdeadbeef, error, 0); -#endif /* _PSYNCH_TRACE_ */ - if (kwe->kwe_kwqqueue != NULL) - ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe); - ksyn_wqunlock(kwq); - } else { - updatebits = kwe->kwe_psynchretval; - updatebits &= ~PTH_RWL_MTX_WAIT; - uth->uu_rval[0] = updatebits; - - if (updatebits == 0) - __FAILEDUSERTEST__("psynch_mutexwait: returning 0 lseq in mutexwait with no EBIT \n"); - } - ksyn_wqrelease(kwq, NULL, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX)); -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_END, (uint32_t)kwq->kw_addr, 0xeeeeeeed, updatebits, error, 0); -#endif /* _PSYNCH_TRACE_ */ - - unix_syscall_return(error); -} - -/* - * psynch_mutexdrop: This system call is used for unlock postings on contended psynch mutexes. - */ -int -psynch_mutexdrop(__unused proc_t p, struct psynch_mutexdrop_args * uap, uint32_t * retval) -{ - user_addr_t mutex = uap->mutex; - uint32_t mgen = uap->mgen; - uint32_t ugen = uap->ugen; - uint64_t tid = uap->tid; - int flags = uap->flags; - ksyn_wait_queue_t kwq; - uint32_t updateval; - int error=0; - - error = ksyn_wqfind(mutex, mgen, ugen, 0, tid, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX), &kwq); - if (error != 0) { - return(error); - } - - updateval = psynch_mutexdrop_internal(kwq, mgen, ugen, flags); - /* drops the kwq reference */ - - *retval = updateval; - return(0); - -} - -/* - * psynch_cvbroad: This system call is used for broadcast posting on blocked waiters of psynch cvars. - */ -int -psynch_cvbroad(__unused proc_t p, struct psynch_cvbroad_args * uap, uint32_t * retval) -{ - user_addr_t cond = uap->cv; - uint64_t cvlsgen = uap->cvlsgen; - uint64_t cvudgen = uap->cvudgen; - uint32_t cgen, cugen, csgen, diffgen; - uint32_t uptoseq, fromseq; - int flags = uap->flags; - ksyn_wait_queue_t ckwq; - int error=0; - uint32_t updatebits = 0; - uint32_t count; - struct ksyn_queue kfreeq; - - csgen = (uint32_t)((cvlsgen >> 32) & 0xffffffff); - cgen = ((uint32_t)(cvlsgen & 0xffffffff)); - cugen = (uint32_t)((cvudgen >> 32) & 0xffffffff); - diffgen = ((uint32_t)(cvudgen & 0xffffffff)); - count = (diffgen >> PTHRW_COUNT_SHIFT); - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVBROAD | DBG_FUNC_START, (uint32_t)cond, cgen, cugen, csgen, 0); - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVBROAD | DBG_FUNC_NONE, (uint32_t)cond, 0xcbcbcbc1, diffgen,flags, 0); -#endif /* _PSYNCH_TRACE_ */ - - uptoseq = cgen & PTHRW_COUNT_MASK; - fromseq = (cugen & PTHRW_COUNT_MASK) + PTHRW_INC; - - if (is_seqhigher(fromseq, uptoseq) || is_seqhigher((csgen & PTHRW_COUNT_MASK), uptoseq)) { - __FAILEDUSERTEST__("cvbroad: invalid L, U and S values\n"); - return EINVAL; - } - if (count > (uint32_t)task_threadmax) { - __FAILEDUSERTEST__("cvbroad: difference greater than maximum possible thread count\n"); - return EBUSY; - } - - ckwq = NULL; - - error = ksyn_wqfind(cond, cgen, cugen, csgen, 0, flags, (KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INDROP), &ckwq); - if (error != 0) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVBROAD | DBG_FUNC_END, (uint32_t)cond, 0, 0xdeadbeef, error, 0); -#endif /* _PSYNCH_TRACE_ */ - return(error); - } - - *retval = 0; - - ksyn_wqlock(ckwq); - - /* update L, U and S... */ - UPDATE_CVKWQ(ckwq, cgen, cugen, csgen, 0, KSYN_WQTYPE_CVAR); - - /* broadcast wakeups/prepost handling */ - ksyn_handle_cvbroad(ckwq, uptoseq, &updatebits); - - /* set C or P bits and free if needed */ - ckwq->kw_sword += (updatebits & PTHRW_COUNT_MASK); - ksyn_cvupdate_fixup(ckwq, &updatebits, &kfreeq, 1); - ksyn_wqunlock(ckwq); - - *retval = updatebits; - - ksyn_wqrelease(ckwq, NULL, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_CVAR)); -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVBROAD | DBG_FUNC_END, (uint32_t)cond, 0xeeeeeeed, (uint32_t)*retval, error, 0); -#endif /* _PSYNCH_TRACE_ */ - - return(error); -} - -ksyn_waitq_element_t -ksyn_queue_find_threadseq(ksyn_wait_queue_t ckwq, __unused ksyn_queue_t kq, thread_t th, uint32_t upto) -{ - uthread_t uth = get_bsdthread_info(th); - ksyn_waitq_element_t kwe = &uth->uu_kwe; - - if (kwe->kwe_kwqqueue != ckwq || - is_seqhigher((kwe->kwe_lockseq & PTHRW_COUNT_MASK), upto)) { - /* the thread is not waiting in the cv (or wasn't when the wakeup happened) */ - return NULL; - } - return kwe; -} - -/* - * psynch_cvsignal: This system call is used for signalling the blocked waiters of psynch cvars. - */ -int -psynch_cvsignal(__unused proc_t p, struct psynch_cvsignal_args * uap, uint32_t * retval) -{ - user_addr_t cond = uap->cv; - uint64_t cvlsgen = uap->cvlsgen; - uint32_t cgen, csgen, signalseq, uptoseq; - uint32_t cugen = uap->cvugen; - int threadport = uap->thread_port; - int flags = uap->flags; - ksyn_wait_queue_t ckwq = NULL; - ksyn_waitq_element_t kwe, nkwe = NULL; - ksyn_queue_t kq; - int error=0; - thread_t th = THREAD_NULL; - uint32_t updatebits = 0; - kern_return_t kret; - struct ksyn_queue kfreeq; - - - csgen = (uint32_t)((cvlsgen >> 32) & 0xffffffff); - cgen = ((uint32_t)(cvlsgen & 0xffffffff)); - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_START, (uint32_t)cond, cgen, cugen, threadport, 0); -#endif /* _PSYNCH_TRACE_ */ - - uptoseq = cgen & PTHRW_COUNT_MASK; - signalseq = (cugen & PTHRW_COUNT_MASK) + PTHRW_INC; - - /* validate sane L, U, and S values */ - if (((threadport == 0) && (is_seqhigher(signalseq, uptoseq))) || is_seqhigher((csgen & PTHRW_COUNT_MASK), uptoseq)) { - __FAILEDUSERTEST__("psync_cvsignal; invalid sequence numbers\n"); - error = EINVAL; - goto out; - } - - /* If we are looking for a specific thread, grab a reference for it */ - if (threadport != 0) { - th = (thread_t)port_name_to_thread((mach_port_name_t)threadport); - if (th == THREAD_NULL) { - error = ESRCH; - goto out; - } - } - - error = ksyn_wqfind(cond, cgen, cugen, csgen, 0, flags, (KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INDROP), &ckwq); - if (error != 0) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_END, (uint32_t)cond, 0, 0xdeadbeef, error, 0); -#endif /* _PSYNCH_TRACE_ */ - goto out; - } - - ksyn_wqlock(ckwq); - - /* update L, U and S... */ - UPDATE_CVKWQ(ckwq, cgen, cugen, csgen, 0, KSYN_WQTYPE_CVAR); - - kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER]; - -retry: - /* Only bother if we aren't already balanced */ - if ((ckwq->kw_lword & PTHRW_COUNT_MASK) != (ckwq->kw_sword & PTHRW_COUNT_MASK)) { - - kwe = (th != NULL) ? ksyn_queue_find_threadseq(ckwq, kq, th, uptoseq) : - ksyn_queue_find_signalseq(ckwq, kq, uptoseq, signalseq); - if (kwe != NULL) { - switch (kwe->kwe_flags) { - - case KWE_THREAD_BROADCAST: - /* broadcasts swallow our signal */ - break; - - case KWE_THREAD_PREPOST: - /* merge in with existing prepost at our same uptoseq */ - kwe->kwe_count += 1; - break; - - case KWE_THREAD_INWAIT: - if (is_seqlower((kwe->kwe_lockseq & PTHRW_COUNT_MASK), signalseq)) { - /* - * A valid thread in our range, but lower than our signal. - * Matching it may leave our match with nobody to wake it if/when - * it arrives (the signal originally meant for this thread might - * not successfully wake it). - * - * Convert to broadcast - may cause some spurious wakeups - * (allowed by spec), but avoids starvation (better choice). - */ -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xc1c1c1c1, uptoseq, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - ksyn_handle_cvbroad(ckwq, uptoseq, &updatebits); - } else { - ksyn_queue_removeitem(ckwq, kq, kwe); - kwe->kwe_psynchretval = PTH_RWL_MTX_WAIT; - kwe->kwe_kwqqueue = NULL; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xcafecaf2, (uint32_t)(thread_tid((struct thread *)(((struct uthread *)(kwe->kwe_uth))->uu_context.vc_thread))), kwe->kwe_psynchretval, 0); -#endif /* _PSYNCH_TRACE_ */ - kret = ksyn_wakeup_thread(ckwq, kwe); -#if __TESTPANICS__ - if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) - panic("ksyn_wakeup_thread: panic waking up condition waiter\n"); -#endif /* __TESTPANICS__ */ - updatebits += PTHRW_INC; - } - - ckwq->kw_sword += (updatebits & PTHRW_COUNT_MASK); - break; - - default: - panic("unknown kweflags\n"); - break; - } - - } else if (th != NULL) { - /* - * Could not find the thread, post a broadcast, - * otherwise the waiter will be stuck. Use to send - * ESRCH here, did lead to rare hangs. - */ - ksyn_handle_cvbroad(ckwq, uptoseq, &updatebits); - ckwq->kw_sword += (updatebits & PTHRW_COUNT_MASK); - } else if (nkwe == NULL) { - ksyn_wqunlock(ckwq); - nkwe = (ksyn_waitq_element_t)zalloc(kwe_zone); - ksyn_wqlock(ckwq); - goto retry; - - } else { - /* no eligible entries - add prepost */ - bzero(nkwe, sizeof(struct ksyn_waitq_element)); - nkwe->kwe_kwqqueue = ckwq; - nkwe->kwe_flags = KWE_THREAD_PREPOST; - nkwe->kwe_lockseq = uptoseq; - nkwe->kwe_count = 1; - nkwe->kwe_uth = NULL; - nkwe->kwe_psynchretval = 0; - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xfeedfefe, uptoseq, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - - (void)ksyn_queue_insert(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], uptoseq, NULL, nkwe, SEQFIT); - ckwq->kw_fakecount++; - nkwe = NULL; - } - - /* set C or P bits and free if needed */ - ksyn_cvupdate_fixup(ckwq, &updatebits, &kfreeq, 1); - } - - ksyn_wqunlock(ckwq); - if (nkwe != NULL) - zfree(kwe_zone, nkwe); - - ksyn_wqrelease(ckwq, NULL, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_CVAR)); - -out: - if (th != NULL) - thread_deallocate(th); - if (error == 0) - *retval = updatebits; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSIGNAL | DBG_FUNC_END, (uint32_t)cond, 0xeeeeeeed, updatebits, error, 0); -#endif /* _PSYNCH_TRACE_ */ - - return(error); -} - -/* - * psynch_cvwait: This system call is used for psynch cvar waiters to block in kernel. - */ -int -psynch_cvwait(__unused proc_t p, struct psynch_cvwait_args * uap, uint32_t * retval) -{ - user_addr_t cond = uap->cv; - uint64_t cvlsgen = uap->cvlsgen; - uint32_t cgen, csgen; - uint32_t cugen = uap->cvugen; - user_addr_t mutex = uap->mutex; - uint64_t mugen = uap->mugen; - uint32_t mgen, ugen; - int flags = uap->flags; - ksyn_wait_queue_t kwq, ckwq; - int error=0, local_error = 0; - uint64_t abstime = 0; - uint32_t lockseq, updatebits=0; - struct timespec ts; - uthread_t uth; - ksyn_waitq_element_t kwe, nkwe = NULL; - struct ksyn_queue *kq, kfreeq; - kern_return_t kret; - - /* for conformance reasons */ - __pthread_testcancel(0); - - csgen = (uint32_t)((cvlsgen >> 32) & 0xffffffff); - cgen = ((uint32_t)(cvlsgen & 0xffffffff)); - ugen = (uint32_t)((mugen >> 32) & 0xffffffff); - mgen = ((uint32_t)(mugen & 0xffffffff)); - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_START, (uint32_t)cond, cgen, cugen, csgen, 0); - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_NONE, (uint32_t)mutex, mgen, ugen, flags, 0); -#endif /* _PSYNCH_TRACE_ */ - - lockseq = (cgen & PTHRW_COUNT_MASK); - /* - * In cvwait U word can be out of range as cond could be used only for - * timeouts. However S word needs to be within bounds and validated at - * user level as well. - */ - if (is_seqhigher_eq((csgen & PTHRW_COUNT_MASK), lockseq) != 0) { - __FAILEDUSERTEST__("psync_cvwait; invalid sequence numbers\n"); - return EINVAL; - } - - ckwq = kwq = NULL; - error = ksyn_wqfind(cond, cgen, cugen, csgen, 0, flags, KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INWAIT, &ckwq); - if (error != 0) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_END, (uint32_t)cond, 1, 0xdeadbeef, error, 0); -#endif /* _PSYNCH_TRACE_ */ - return(error); - } - - - if (mutex != (user_addr_t)0) { - error = ksyn_wqfind(mutex, mgen, ugen, 0, 0, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX), &kwq); - if (error != 0) { - local_error = error; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_END, (uint32_t)mutex, 2, 0xdeadbeef, error, 0); -#endif /* _PSYNCH_TRACE_ */ - goto out; - } - - (void)psynch_mutexdrop_internal(kwq, mgen, ugen, flags); - /* drops kwq reference */ - kwq = NULL; - } - - if (uap->sec != 0 || (uap->nsec & 0x3fffffff) != 0) { - ts.tv_sec = uap->sec; - ts.tv_nsec = (uap->nsec & 0x3fffffff); - nanoseconds_to_absolutetime((uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec, &abstime ); - clock_absolutetime_interval_to_deadline( abstime, &abstime ); - } - - ksyn_wqlock(ckwq); - - /* update L, U and S... */ - UPDATE_CVKWQ(ckwq, cgen, cugen, csgen, 0, KSYN_WQTYPE_CVAR); - - /* Look for the sequence for prepost (or conflicting thread */ - kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER]; - kwe = ksyn_queue_find_cvpreposeq(kq, lockseq); - - if (kwe != NULL) { - switch (kwe->kwe_flags) { - - case KWE_THREAD_INWAIT: - ksyn_wqunlock(ckwq); - __FAILEDUSERTEST__("cvwait: thread entry with same sequence already present\n"); - local_error = EBUSY; - goto out; - - case KWE_THREAD_BROADCAST: - break; - - case KWE_THREAD_PREPOST: - if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == lockseq) { - /* we can safely consume a reference, so do so */ - if (--kwe->kwe_count == 0) { - ksyn_queue_removeitem(ckwq, kq, kwe); - ckwq->kw_fakecount--; - nkwe = kwe; - } - } else { - /* - * consuming a prepost higher than our lock sequence is valid, but - * can leave the higher thread without a match. Convert the entry - * to a broadcast to compensate for this. - */ -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xc2c2c2c2, kwe->kwe_lockseq, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - - ksyn_handle_cvbroad(ckwq, kwe->kwe_lockseq, &updatebits); -#if __TESTPANICS__ - if (updatebits != 0) - panic("psync_cvwait: convert pre-post to broadcast: woke up %d threads that shouldn't be there\n", - updatebits); -#endif /* __TESTPANICS__ */ - } - - break; - - default: - panic("psync_cvwait: unexpected wait queue element type\n"); - } - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xfefefefe, kwe->kwe_lockseq, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - - - updatebits = PTHRW_INC; - ckwq->kw_sword += PTHRW_INC; - - /* set C or P bits and free if needed */ - ksyn_cvupdate_fixup(ckwq, &updatebits, &kfreeq, 1); - - error = 0; - local_error = 0; - - *retval = updatebits; - - ksyn_wqunlock(ckwq); - - if (nkwe != NULL) - zfree(kwe_zone, nkwe); - - goto out; - - } - - uth = current_uthread(); - kwe = &uth->uu_kwe; - kwe->kwe_kwqqueue = ckwq; - kwe->kwe_flags = KWE_THREAD_INWAIT; - kwe->kwe_lockseq = lockseq; - kwe->kwe_count = 1; - kwe->kwe_uth = uth; - kwe->kwe_psynchretval = 0; - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xfeedfeed, cgen, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - - error = ksyn_queue_insert(ckwq, kq, cgen, uth, kwe, SEQFIT); - if (error != 0) { - ksyn_wqunlock(ckwq); - local_error = error; - goto out; - } - - kret = ksyn_block_thread_locked(ckwq, abstime, kwe, 1, psynch_cvcontinue, (void *)ckwq); - /* lock dropped */ - - psynch_cvcontinue(ckwq, kret); - /* not expected to return from unix_syscall_return */ - panic("psynch_cvcontinue returned from unix_syscall_return"); - -out: -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_END, (uint32_t)cond, 0xeeeeeeed, (uint32_t)*retval, local_error, 0); -#endif /* _PSYNCH_TRACE_ */ - ksyn_wqrelease(ckwq, NULL, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR)); - return(local_error); -} - - -void -psynch_cvcontinue(void * parameter, wait_result_t result) -{ - int error = 0, local_error = 0; - uthread_t uth = current_uthread(); - ksyn_wait_queue_t ckwq = (ksyn_wait_queue_t)parameter; - ksyn_waitq_element_t kwe; - struct ksyn_queue kfreeq; - - switch (result) { - case THREAD_TIMED_OUT: - error = ETIMEDOUT; - break; - case THREAD_INTERRUPTED: - error = EINTR; - break; - default: - error = 0; - break; - } -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_THWAKEUP | DBG_FUNC_NONE, 0xf4f3f2f1, (uintptr_t)uth, result, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - - local_error = error; - kwe = &uth->uu_kwe; - - if (error != 0) { - ksyn_wqlock(ckwq); - /* just in case it got woken up as we were granting */ - uth->uu_rval[0] = kwe->kwe_psynchretval; - -#if __TESTPANICS__ - if ((kwe->kwe_kwqqueue != NULL) && (kwe->kwe_kwqqueue != ckwq)) - panic("cvwait waiting on some other kwq\n"); - -#endif /* __TESTPANICS__ */ - - - if (kwe->kwe_kwqqueue != NULL) { - ksyn_queue_removeitem(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe); - kwe->kwe_kwqqueue = NULL; - } - if ((kwe->kwe_psynchretval & PTH_RWL_MTX_WAIT) != 0) { - /* the condition var granted. - * reset the error so that the thread returns back. - */ - local_error = 0; - /* no need to set any bits just return as cvsig/broad covers this */ - ksyn_wqunlock(ckwq); - goto out; - } - - ckwq->kw_sword += PTHRW_INC; - - /* set C and P bits, in the local error */ - if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) { - local_error |= ECVCERORR; - if (ckwq->kw_inqueue != 0) { - (void)ksyn_queue_move_tofree(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], (ckwq->kw_lword & PTHRW_COUNT_MASK), &kfreeq, 1, 1); - } - ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0; - ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT; - } else { - /* everythig in the queue is a fake entry ? */ - if ((ckwq->kw_inqueue != 0) && (ckwq->kw_fakecount == ckwq->kw_inqueue)) { - local_error |= ECVPERORR; - } - } - ksyn_wqunlock(ckwq); - - } else { - /* PTH_RWL_MTX_WAIT is removed */ - if ((kwe->kwe_psynchretval & PTH_RWS_CV_MBIT) != 0) - uth->uu_rval[0] = PTHRW_INC | PTH_RWS_CV_CBIT; - else - uth->uu_rval[0] = 0; - local_error = 0; - } -out: -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_END, (uint32_t)ckwq->kw_addr, 0xeeeeeeed, uth->uu_rval[0], local_error, 0); -#endif /* _PSYNCH_TRACE_ */ - ksyn_wqrelease(ckwq, NULL, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR)); - - unix_syscall_return(local_error); - -} - -/* - * psynch_cvclrprepost: This system call clears pending prepost if present. - */ -int -psynch_cvclrprepost(__unused proc_t p, struct psynch_cvclrprepost_args * uap, __unused int * retval) -{ - user_addr_t cond = uap->cv; - uint32_t cgen = uap->cvgen; - uint32_t cugen = uap->cvugen; - uint32_t csgen = uap->cvsgen; - uint32_t pseq = uap->preposeq; - uint32_t flags = uap->flags; - int error; - ksyn_wait_queue_t ckwq = NULL; - struct ksyn_queue kfreeq; - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CLRPRE | DBG_FUNC_START, (uint32_t)cond, cgen, cugen, csgen, 0); - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CLRPRE | DBG_FUNC_NONE, (uint32_t)cond, 0xcececece, pseq, flags, 0); -#endif /* _PSYNCH_TRACE_ */ - - if ((flags & _PTHREAD_MTX_OPT_MUTEX) == 0) { - error = ksyn_wqfind(cond, cgen, cugen, csgen, 0, flags, (KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INDROP), &ckwq); - if (error != 0) { - *retval = 0; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CLRPRE | DBG_FUNC_END, (uint32_t)cond, 0, 0xdeadbeef, error, 0); -#endif /* _PSYNCH_TRACE_ */ - return(error); - } - - ksyn_wqlock(ckwq); - (void)ksyn_queue_move_tofree(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], (pseq & PTHRW_COUNT_MASK), &kfreeq, 0, 1); - ksyn_wqunlock(ckwq); - ksyn_wqrelease(ckwq, NULL, 1, (KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INDROP)); - } else { - /* mutex type */ - error = ksyn_wqfind(cond, cgen, cugen, 0, 0, flags, (KSYN_WQTYPE_MTX | KSYN_WQTYPE_INDROP), &ckwq); - if (error != 0) { - *retval = 0; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CLRPRE | DBG_FUNC_END, (uint32_t)cond, 0, 0xdeadbeef, error, 0); -#endif /* _PSYNCH_TRACE_ */ - return(error); - } - - ksyn_wqlock(ckwq); - if (((flags & _PTHREAD_MUTEX_POLICY_FIRSTFIT) != 0) && (ckwq->kw_pre_rwwc != 0)) { - if (is_seqlower_eq(ckwq->kw_pre_lockseq, cgen) != 0) { - /* clear prepost */ - ckwq->kw_pre_rwwc = 0; - ckwq->kw_pre_lockseq = 0; - } - } - ksyn_wqunlock(ckwq); - ksyn_wqrelease(ckwq, NULL, 1, (KSYN_WQTYPE_MTX | KSYN_WQTYPE_INDROP)); - } - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CLRPRE | DBG_FUNC_END, (uint32_t)cond, 0xeeeeeeed, 0, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - return(0); -} - -/* ***************** pthread_rwlock ************************ */ -/* - * psynch_rw_rdlock: This system call is used for psync rwlock readers to block. - */ -int -psynch_rw_rdlock(__unused proc_t p, struct psynch_rw_rdlock_args * uap, uint32_t * retval) -{ - user_addr_t rwlock = uap->rwlock; - uint32_t lgen = uap->lgenval; - uint32_t ugen = uap->ugenval; - uint32_t rw_wc = uap->rw_wc; - //uint64_t tid = uap->tid; - int flags = uap->flags; - int error = 0, block; - uint32_t lockseq = 0, updatebits = 0, preseq = 0, prerw_wc = 0; - ksyn_wait_queue_t kwq; - uthread_t uth; - int isinit = lgen & PTHRW_RWL_INIT; - uint32_t returnbits = 0; - ksyn_waitq_element_t kwe; - kern_return_t kret; - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0); -#endif /* _PSYNCH_TRACE_ */ - uth = current_uthread(); - - /* preserve the seq number */ - kwe = &uth->uu_kwe; - kwe->kwe_lockseq = lgen; - kwe->kwe_uth = uth; - kwe->kwe_psynchretval = 0; - kwe->kwe_kwqqueue = NULL; - - lockseq = lgen & PTHRW_COUNT_MASK; - - - error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq); - if (error != 0) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0); -#endif /* _PSYNCH_TRACE_ */ - return(error); - } - - ksyn_wqlock(kwq); - - if (isinit != 0) { - lgen &= ~PTHRW_RWL_INIT; - if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) { - /* first to notice the reset of the lock, clear preposts */ - CLEAR_REINIT_BITS(kwq); - kwq->kw_kflags |= KSYN_KWF_INITCLEARED; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 1, 0); -#endif /* _PSYNCH_TRACE_ */ - } - } - - /* handle first the missed wakeups */ - if ((kwq->kw_pre_intrcount != 0) && - ((kwq->kw_pre_intrtype == PTH_RW_TYPE_READ) || (kwq->kw_pre_intrtype == PTH_RW_TYPE_LREAD)) && - (is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) { - - kwq->kw_pre_intrcount--; - kwe->kwe_psynchretval = kwq->kw_pre_intrretbits; - if (kwq->kw_pre_intrcount==0) - CLEAR_INTR_PREPOST_BITS(kwq); - ksyn_wqunlock(kwq); - goto out; - } - - /* handle overlap first as they are not counted against pre_rwwc */ - - /* check for overlap and if no pending W bit (indicates writers) */ - if ((kwq->kw_overlapwatch != 0) && ((rw_wc & PTHRW_RWS_SAVEMASK) == 0) && ((lgen & PTH_RWL_WBIT) == 0)) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 10, kwq->kw_nextseqword, kwq->kw_lastseqword, 0); -#endif /* _PSYNCH_TRACE_ */ - error = kwq_handle_overlap(kwq, lgen, ugen, rw_wc, &updatebits, (KW_UNLOCK_PREPOST_READLOCK|KW_UNLOCK_PREPOST), &block); -#if __TESTPANICS__ - if (error != 0) - panic("rw_rdlock: kwq_handle_overlap failed %d\n",error); -#endif /* __TESTPANICS__ */ - if (block == 0) { - error = 0; - kwe->kwe_psynchretval = updatebits; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 0xff, updatebits, 0xee, 0); -#endif /* _PSYNCH_TRACE_ */ - ksyn_wqunlock(kwq); - goto out; - } - } - - if ((kwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK)) != 0)) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0); -#endif /* _PSYNCH_TRACE_ */ - kwq->kw_pre_rwwc--; - if (kwq->kw_pre_rwwc == 0) { - preseq = kwq->kw_pre_lockseq; - prerw_wc = kwq->kw_pre_sseq; - CLEAR_PREPOST_BITS(kwq); - if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){ - kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - } - error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (KW_UNLOCK_PREPOST_READLOCK|KW_UNLOCK_PREPOST), &block, lgen); -#if __TESTPANICS__ - if (error != 0) - panic("rw_rdlock: kwq_handle_unlock failed %d\n",error); -#endif /* __TESTPANICS__ */ - if (block == 0) { - ksyn_wqunlock(kwq); - goto out; - } - /* insert to q and proceed as ususal */ - } - } - - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_READ], lgen, uth, kwe, SEQFIT); -#if __TESTPANICS__ - if (error != 0) - panic("psynch_rw_rdlock: failed to enqueue\n"); -#endif /* __TESTPANICS__ */ - kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, THREAD_CONTINUE_NULL, NULL); - /* drops the kwq lock */ - switch (kret) { - case THREAD_TIMED_OUT: - error = ETIMEDOUT; - break; - case THREAD_INTERRUPTED: - error = EINTR; - break; - default: - error = 0; - break; - } - -out: - if (error != 0) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 4, error, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - ksyn_wqlock(kwq); - if (kwe->kwe_kwqqueue != NULL) - ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_READ], kwe); - ksyn_wqunlock(kwq); - } else { - /* update bits */ - *retval = kwe->kwe_psynchretval; - returnbits = kwe->kwe_psynchretval; - } - ksyn_wqrelease(kwq, NULL, 0, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK)); -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, returnbits, error, 0); -#endif /* _PSYNCH_TRACE_ */ - return(error); -} - -/* - * psynch_rw_longrdlock: This system call is used for psync rwlock long readers to block. - */ -int -#ifdef NOTYET -psynch_rw_longrdlock(__unused proc_t p, struct psynch_rw_longrdlock_args * uap, __unused uint32_t * retval) -#else /* NOTYET */ -psynch_rw_longrdlock(__unused proc_t p, __unused struct psynch_rw_longrdlock_args * uap, __unused uint32_t * retval) -#endif /* NOTYET */ -{ -#ifdef NOTYET - user_addr_t rwlock = uap->rwlock; - uint32_t lgen = uap->lgenval; - uint32_t ugen = uap->ugenval; - uint32_t rw_wc = uap->rw_wc; - //uint64_t tid = uap->tid; - int flags = uap->flags; - int isinit = lgen & PTHRW_RWL_INIT; - uint32_t returnbits=0; - ksyn_waitq_element_t kwe; - kern_return_t kret; - - ksyn_wait_queue_t kwq; - int error=0, block = 0 ; - uthread_t uth; - uint32_t lockseq = 0, updatebits = 0, preseq = 0, prerw_wc = 0; - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0); -#endif /* _PSYNCH_TRACE_ */ - uth = current_uthread(); - kwe = &uth->uu_kwe; - kwe->kwe_lockseq = lgen; - kwe->kwe_uth = uth; - kwe->kwe_psynchretval = 0; - kwe->kwe_kwqqueue = NULL; - lockseq = (lgen & PTHRW_COUNT_MASK); - - error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq); - if (error != 0) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0); -#endif /* _PSYNCH_TRACE_ */ - return(error); - } - - ksyn_wqlock(kwq); - - if (isinit != 0) { - lgen &= ~PTHRW_RWL_INIT; - if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) { - /* first to notice the reset of the lock, clear preposts */ - CLEAR_REINIT_BITS(kwq); - kwq->kw_kflags |= KSYN_KWF_INITCLEARED; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 1, 0); -#endif /* _PSYNCH_TRACE_ */ - } - } - - /* handle first the missed wakeups */ - if ((kwq->kw_pre_intrcount != 0) && - (kwq->kw_pre_intrtype == PTH_RW_TYPE_LREAD) && - (is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) { - - kwq->kw_pre_intrcount--; - kwe->kwe_psynchretval = kwq->kw_pre_intrretbits; - if (kwq->kw_pre_intrcount==0) - CLEAR_INTR_PREPOST_BITS(kwq); - ksyn_wqunlock(kwq); - goto out; - } - - - if ((kwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK)) != 0)) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0); -#endif /* _PSYNCH_TRACE_ */ - kwq->kw_pre_rwwc--; - if (kwq->kw_pre_rwwc == 0) { - preseq = kwq->kw_pre_lockseq; - prerw_wc = kwq->kw_pre_sseq; - CLEAR_PREPOST_BITS(kwq); - if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){ - kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - } - error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (KW_UNLOCK_PREPOST_LREADLOCK|KW_UNLOCK_PREPOST), &block, lgen); -#if __TESTPANICS__ - if (error != 0) - panic("kwq_handle_unlock failed %d\n",error); -#endif /* __TESTPANICS__ */ - if (block == 0) { - ksyn_wqunlock(kwq); - goto out; - } - /* insert to q and proceed as ususal */ - } - } - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_LREAD], lgen, uth, kwe, SEQFIT); -#if __TESTPANICS__ - if (error != 0) - panic("psynch_rw_longrdlock: failed to enqueue\n"); -#endif /* __TESTPANICS__ */ - - kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, THREAD_CONTINUE_NULL, NULL); - /* drops the kwq lock */ - switch (kret) { - case THREAD_TIMED_OUT: - error = ETIMEDOUT; - break; - case THREAD_INTERRUPTED: - error = EINTR; - break; - default: - error = 0; - break; - } -out: - if (error != 0) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0); -#endif /* _PSYNCH_TRACE_ */ - ksyn_wqlock(kwq); - if (kwe->kwe_kwqqueue != NULL) - ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_LREAD], kwe); - ksyn_wqunlock(kwq); - } else { - /* update bits */ - *retval = kwe->kwe_psynchretval; - returnbits = kwe->kwe_psynchretval; - } - - ksyn_wqrelease(kwq, NULL, 0, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK)); - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWLRDLOCK | DBG_FUNC_END, (uint32_t)rwlock, 0, returnbits, error, 0); -#endif /* _PSYNCH_TRACE_ */ - return(error); -#else /* NOTYET */ - return(ESRCH); -#endif /* NOTYET */ -} - - -/* - * psynch_rw_wrlock: This system call is used for psync rwlock writers to block. - */ -int -psynch_rw_wrlock(__unused proc_t p, struct psynch_rw_wrlock_args * uap, uint32_t * retval) -{ - user_addr_t rwlock = uap->rwlock; - uint32_t lgen = uap->lgenval; - uint32_t ugen = uap->ugenval; - uint32_t rw_wc = uap->rw_wc; - //uint64_t tid = uap->tid; - int flags = uap->flags; - int block; - ksyn_wait_queue_t kwq; - int error=0; - uthread_t uth; - uint32_t lockseq = 0, updatebits = 0, preseq = 0, prerw_wc = 0; - int isinit = lgen & PTHRW_RWL_INIT; - uint32_t returnbits = 0; - ksyn_waitq_element_t kwe; - kern_return_t kret; - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0); -#endif /* _PSYNCH_TRACE_ */ - uth = current_uthread(); - kwe = &uth->uu_kwe; - kwe->kwe_lockseq = lgen; - kwe->kwe_uth = uth; - kwe->kwe_psynchretval = 0; - kwe->kwe_kwqqueue = NULL; - lockseq = (lgen & PTHRW_COUNT_MASK); - - error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq); - if (error != 0) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0); -#endif /* _PSYNCH_TRACE_ */ - return(error); - } - - ksyn_wqlock(kwq); - - - if (isinit != 0) { - lgen &= ~PTHRW_RWL_INIT; - if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) { - /* first to notice the reset of the lock, clear preposts */ - CLEAR_REINIT_BITS(kwq); - kwq->kw_kflags |= KSYN_KWF_INITCLEARED; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 1, 0); -#endif /* _PSYNCH_TRACE_ */ - } - } - - - /* handle first the missed wakeups */ - if ((kwq->kw_pre_intrcount != 0) && - (kwq->kw_pre_intrtype == PTH_RW_TYPE_WRITE) && - (is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) { - - kwq->kw_pre_intrcount--; - kwe->kwe_psynchretval = kwq->kw_pre_intrretbits; - if (kwq->kw_pre_intrcount==0) - CLEAR_INTR_PREPOST_BITS(kwq); - ksyn_wqunlock(kwq); - goto out; - } - - - if ((kwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK)) != 0)) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0); -#endif /* _PSYNCH_TRACE_ */ - kwq->kw_pre_rwwc--; - if (kwq->kw_pre_rwwc == 0) { - preseq = kwq->kw_pre_lockseq; - prerw_wc = kwq->kw_pre_sseq; - CLEAR_PREPOST_BITS(kwq); - if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){ - kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - } - error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (KW_UNLOCK_PREPOST_WRLOCK|KW_UNLOCK_PREPOST), &block, lgen); -#if __TESTPANICS__ - if (error != 0) - panic("rw_wrlock: kwq_handle_unlock failed %d\n",error); -#endif /* __TESTPANICS__ */ - if (block == 0) { - ksyn_wqunlock(kwq); - *retval = updatebits; - goto out1; - } - /* insert to q and proceed as ususal */ - } - } - - /* No overlap watch needed go ahead and block */ - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], lgen, uth, kwe, SEQFIT); -#if __TESTPANICS__ - if (error != 0) - panic("psynch_rw_wrlock: failed to enqueue\n"); -#endif /* __TESTPANICS__ */ - - kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, THREAD_CONTINUE_NULL, NULL); - /* drops the wq lock */ - switch (kret) { - case THREAD_TIMED_OUT: - error = ETIMEDOUT; - break; - case THREAD_INTERRUPTED: - error = EINTR; - break; - default: - error = 0; - break; - } - -out: - if (error != 0) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 4, error, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - ksyn_wqlock(kwq); - if (kwe->kwe_kwqqueue != NULL) - ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe); - ksyn_wqunlock(kwq); - } else { - /* update bits */ - *retval = kwe->kwe_psynchretval; - returnbits = kwe->kwe_psynchretval; - } -out1: - ksyn_wqrelease(kwq, NULL, 0, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK)); - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, returnbits, error, 0); -#endif /* _PSYNCH_TRACE_ */ - return(error); -} - -/* - * psynch_rw_yieldwrlock: This system call is used for psync rwlock yielding writers to block. - */ -int -#ifdef NOTYET -psynch_rw_yieldwrlock(__unused proc_t p, __unused struct psynch_rw_yieldwrlock_args * uap, __unused uint32_t * retval) -#else /* NOTYET */ -psynch_rw_yieldwrlock(__unused proc_t p, __unused struct __unused psynch_rw_yieldwrlock_args * uap, __unused uint32_t * retval) -#endif /* NOTYET */ -{ -#ifdef NOTYET - user_addr_t rwlock = uap->rwlock; - uint32_t lgen = uap->lgenval; - uint32_t ugen = uap->ugenval; - uint32_t rw_wc = uap->rw_wc; - //uint64_t tid = uap->tid; - int flags = uap->flags; - int block; - ksyn_wait_queue_t kwq; - int error=0; - int isinit = lgen & PTHRW_RWL_INIT; - uthread_t uth; - uint32_t returnbits=0; - ksyn_waitq_element_t kwe; - kern_return_t kret; - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0); -#endif /* _PSYNCH_TRACE_ */ - uint32_t lockseq = 0, updatebits = 0, preseq = 0, prerw_wc = 0; - - uth = current_uthread(); - kwe = &uth->uu_kwe; - kwe->kwe_lockseq = lgen; - kwe->kwe_uth = uth; - kwe->kwe_psynchretval = 0; - kwe->kwe_kwqqueue = NULL; - lockseq = (lgen & PTHRW_COUNT_MASK); - - error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq); - if (error != 0) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0); -#endif /* _PSYNCH_TRACE_ */ - return(error); - } - - ksyn_wqlock(kwq); - - if (isinit != 0) { - lgen &= ~PTHRW_RWL_INIT; - if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) { - /* first to notice the reset of the lock, clear preposts */ - CLEAR_REINIT_BITS(kwq); - kwq->kw_kflags |= KSYN_KWF_INITCLEARED; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 1, 0); -#endif /* _PSYNCH_TRACE_ */ - } - } - - /* handle first the missed wakeups */ - if ((kwq->kw_pre_intrcount != 0) && - (kwq->kw_pre_intrtype == PTH_RW_TYPE_YWRITE) && - (is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) { - - kwq->kw_pre_intrcount--; - kwe->kwe_psynchretval = kwq->kw_pre_intrretbits; - if (kwq->kw_pre_intrcount==0) - CLEAR_INTR_PREPOST_BITS(kwq); - ksyn_wqunlock(kwq); - goto out; - } - - if ((kwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK)) != 0)) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0); -#endif /* _PSYNCH_TRACE_ */ - kwq->kw_pre_rwwc--; - if (kwq->kw_pre_rwwc == 0) { - preseq = kwq->kw_pre_lockseq; - prerw_wc = kwq->kw_pre_sseq; - CLEAR_PREPOST_BITS(kwq); - if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){ - kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - } - error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (KW_UNLOCK_PREPOST_YWRLOCK|KW_UNLOCK_PREPOST), &block, lgen); -#if __TESTPANICS__ - if (error != 0) - panic("kwq_handle_unlock failed %d\n",error); -#endif /* __TESTPANICS__ */ - if (block == 0) { - ksyn_wqunlock(kwq); - *retval = updatebits; - goto out; - } - /* insert to q and proceed as ususal */ - } - } - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER], lgen, uth, kwe, SEQFIT); -#if __TESTPANICS__ - if (error != 0) - panic("psynch_rw_yieldwrlock: failed to enqueue\n"); -#endif /* __TESTPANICS__ */ - - kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, THREAD_CONTINUE_NULL, NULL); - switch (kret) { - case THREAD_TIMED_OUT: - error = ETIMEDOUT; - break; - case THREAD_INTERRUPTED: - error = EINTR; - break; - default: - error = 0; - break; - } - -out: - if (error != 0) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 4, error, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - ksyn_wqlock(kwq); - if (kwe->kwe_kwqqueue != NULL) - ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER], kwe); - ksyn_wqunlock(kwq); - } else { - /* update bits */ - *retval = kwe->kwe_psynchretval; - returnbits = kwe->kwe_psynchretval; - } - - ksyn_wqrelease(kwq, NULL, 0, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK)); - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, returnbits, error, 0); -#endif /* _PSYNCH_TRACE_ */ - return(error); -#else /* NOTYET */ - return(ESRCH); -#endif /* NOTYET */ -} - -#if NOTYET -/* - * psynch_rw_downgrade: This system call is used for wakeup blocked readers who are eligible to run due to downgrade. - */ -int -psynch_rw_downgrade(__unused proc_t p, struct psynch_rw_downgrade_args * uap, __unused int * retval) -{ - user_addr_t rwlock = uap->rwlock; - uint32_t lgen = uap->lgenval; - uint32_t ugen = uap->ugenval; - uint32_t rw_wc = uap->rw_wc; - //uint64_t tid = uap->tid; - int flags = uap->flags; - uint32_t count = 0; - int isinit = lgen & PTHRW_RWL_INIT; - ksyn_wait_queue_t kwq; - int error=0; - uthread_t uth; - uint32_t curgen = 0; - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0); -#endif /* _PSYNCH_TRACE_ */ - uth = current_uthread(); - - curgen = (lgen & PTHRW_COUNT_MASK); - - error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq); - if (error != 0) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0); -#endif /* _PSYNCH_TRACE_ */ - return(error); - } - - ksyn_wqlock(kwq); - - if ((lgen & PTHRW_RWL_INIT) != 0) { - lgen &= ~PTHRW_RWL_INIT; - if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0){ - CLEAR_REINIT_BITS(kwq); - kwq->kw_kflags |= KSYN_KWF_INITCLEARED; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 1, 0); -#endif /* _PSYNCH_TRACE_ */ - } - isinit = 1; - } - - /* if lastunlock seq is set, ensure the current one is not lower than that, as it would be spurious */ - if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) && (is_seqlower(ugen, kwq->kw_lastunlockseq)!= 0)) { - /* spurious updatebits?? */ - error = 0; - goto out; - } - - - - /* If L-U != num of waiters, then it needs to be preposted or spr */ - diff = find_diff(lgen, ugen); - /* take count of the downgrade thread itself */ - diff--; - - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 1, kwq->kw_inqueue, curgen, 0); -#endif /* _PSYNCH_TRACE_ */ - if (find_seq_till(kwq, curgen, diff, &count) == 0) { - if (count < (uint32_t)diff) - goto prepost; - } - - /* no prepost and all threads are in place, reset the bit */ - if ((isinit != 0) && ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0)){ - kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - } - - /* can handle unlock now */ - - CLEAR_PREPOST_BITS(kwq); - -dounlock: -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - error = kwq_handle_downgrade(kwq, lgen, 0, 0, NULL); - -#if __TESTPANICS__ - if (error != 0) - panic("psynch_rw_downgrade: failed to wakeup\n"); -#endif /* __TESTPANICS__ */ - -out: - ksyn_wqunlock(kwq); -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_END, (uint32_t)rwlock, 0, 0, error, 0); -#endif /* _PSYNCH_TRACE_ */ - ksyn_wqrelease(kwq, NULL, 0, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK)); - - return(error); - -prepost: - kwq->kw_pre_rwwc = (rw_wc - count); - kwq->kw_pre_lockseq = lgen; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWDOWNGRADE | DBG_FUNC_NONE, (uint32_t)rwlock, 1, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0); -#endif /* _PSYNCH_TRACE_ */ - error = 0; - goto out; -} - - -/* - * psynch_rw_upgrade: This system call is used by an reader to block waiting for upgrade to be granted. - */ -int -psynch_rw_upgrade(__unused proc_t p, struct psynch_rw_upgrade_args * uap, uint32_t * retval) -{ - user_addr_t rwlock = uap->rwlock; - uint32_t lgen = uap->lgenval; - uint32_t ugen = uap->ugenval; - uint32_t rw_wc = uap->rw_wc; - //uint64_t tid = uap->tid; - int flags = uap->flags; - int block; - ksyn_wait_queue_t kwq; - int error=0; - uthread_t uth; - uint32_t lockseq = 0, updatebits = 0, preseq = 0; - int isinit = lgen & PTHRW_RWL_INIT; - ksyn_waitq_element_t kwe; - kern_return_t kret; - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0); -#endif /* _PSYNCH_TRACE_ */ - uth = current_uthread(); - kwe = &uth->uu_kwe; - kwe->kwe_lockseq = lgen; - kwe->kwe_uth = uth; - kwe->kwe_psynchretval = 0; - kwe->kwe_kwqqueue = NULL; - lockseq = (lgen & PTHRW_COUNT_MASK); - - error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK), &kwq); - if (error != 0) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0); -#endif /* _PSYNCH_TRACE_ */ - return(error); - } - - ksyn_wqlock(kwq); - - if (isinit != 0) { - lgen &= ~PTHRW_RWL_INIT; - if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) { - /* first to notice the reset of the lock, clear preposts */ - CLEAR_REINIT_BITS(kwq); - kwq->kw_kflags |= KSYN_KWF_INITCLEARED; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 1, 0); -#endif /* _PSYNCH_TRACE_ */ - } - } - - /* handle first the missed wakeups */ - if ((kwq->kw_pre_intrcount != 0) && - ((kwq->kw_pre_intrtype == PTH_RW_TYPE_READ) || (kwq->kw_pre_intrtype == PTH_RW_TYPE_LREAD)) && - (is_seqlower_eq(lockseq, (kwq->kw_pre_intrseq & PTHRW_COUNT_MASK)) != 0)) { - - kwq->kw_pre_intrcount--; - kwe->kwe_psynchretval = kwq->kw_pre_intrretbits; - if (kwq->kw_pre_intrcount==0) - CLEAR_INTR_PREPOST_BITS(kwq); - ksyn_wqunlock(kwq); - goto out; - } - - if ((kwq->kw_pre_rwwc != 0) && (is_seqlower_eq(lockseq, (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK)) != 0)) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0); -#endif /* _PSYNCH_TRACE_ */ - kwq->kw_pre_rwwc--; - if (kwq->kw_pre_rwwc == 0) { - preseq = kwq->kw_pre_lockseq; - prerw_wc = kwq->kw_pre_sseq; - CLEAR_PREPOST_BITS(kwq); - if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){ - kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - } - error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (KW_UNLOCK_PREPOST_UPGRADE|KW_UNLOCK_PREPOST), &block, lgen); -#if __TESTPANICS__ - if (error != 0) - panic("rw_rdlock: kwq_handle_unlock failed %d\n",error); -#endif /* __TESTPANICS__ */ - if (block == 0) { - ksyn_wqunlock(kwq); - goto out; - } - /* insert to q and proceed as ususal */ - } - } - - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_NONE, (uint32_t)rwlock, 3, 0, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - error = ksyn_queue_insert(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_UPGRADE], lgen, uth, kwe, SEQFIT); -#if __TESTPANICS__ - if (error != 0) - panic("psynch_rw_upgrade: failed to enqueue\n"); -#endif /* __TESTPANICS__ */ - - - kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, THREAD_CONTINUE_NULL, NULL); - /* drops the lock */ - switch (kret) { - case THREAD_TIMED_OUT: - error = ETIMEDOUT; - break; - case THREAD_INTERRUPTED: - error = EINTR; - break; - default: - error = 0; - break; - } - -out: - if (error != 0) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_NONE, (uint32_t)rwlock, 4, error, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - ksyn_wqlock(kwq); - if (kwe->kwe_kwqqueue != NULL) - ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_UPGRADE], kwe); - ksyn_wqunlock(kwq); - } else { - /* update bits */ - *retval = kwe->kwe_psynchretval; - } - - ksyn_wqrelease(kwq, NULL, 0, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK)); -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0); -#endif /* _PSYNCH_TRACE_ */ - - return(error); -} - -#else /* NOTYET */ -int -psynch_rw_upgrade(__unused proc_t p, __unused struct psynch_rw_upgrade_args * uap, __unused uint32_t * retval) -{ - return(0); -} -int -psynch_rw_downgrade(__unused proc_t p, __unused struct psynch_rw_downgrade_args * uap, __unused int * retval) -{ - return(0); -} -#endif /* NOTYET */ -/* - * psynch_rw_unlock: This system call is used for unlock state postings. This will grant appropriate - * reader/writer variety lock. - */ - -int -psynch_rw_unlock(__unused proc_t p, struct psynch_rw_unlock_args * uap, uint32_t * retval) -{ - user_addr_t rwlock = uap->rwlock; - uint32_t lgen = uap->lgenval; - uint32_t ugen = uap->ugenval; - uint32_t rw_wc = uap->rw_wc; - uint32_t curgen; - //uint64_t tid = uap->tid; - int flags = uap->flags; - uthread_t uth; - ksyn_wait_queue_t kwq; - uint32_t updatebits = 0; - int error=0, diff; - uint32_t count = 0; - int isinit = 0; - - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0); -#endif /* _PSYNCH_TRACE_ */ - uth = current_uthread(); - - error = ksyn_wqfind(rwlock, lgen, ugen, rw_wc, TID_ZERO, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq); - if (error != 0) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_END, (uint32_t)rwlock, 1, 0, error, 0); -#endif /* _PSYNCH_TRACE_ */ - return(error); - } - - curgen = lgen & PTHRW_COUNT_MASK; - - ksyn_wqlock(kwq); - - if ((lgen & PTHRW_RWL_INIT) != 0) { - lgen &= ~PTHRW_RWL_INIT; - if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0){ - CLEAR_REINIT_BITS(kwq); - kwq->kw_kflags |= KSYN_KWF_INITCLEARED; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 1, 0); -#endif /* _PSYNCH_TRACE_ */ - } - isinit = 1; - } - - /* if lastunlock seq is set, ensure the current one is not lower than that, as it would be spurious */ - if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) && (is_seqlower(ugen, kwq->kw_lastunlockseq)!= 0)) { -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, (uint32_t)0xeeeeeeee, rw_wc, kwq->kw_lastunlockseq, 0); -#endif /* _PSYNCH_TRACE_ */ - error = 0; - goto out; - } - - /* If L-U != num of waiters, then it needs to be preposted or spr */ - diff = find_diff(lgen, ugen); - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 1, kwq->kw_inqueue, curgen, 0); -#endif /* _PSYNCH_TRACE_ */ - if (find_seq_till(kwq, curgen, diff, &count) == 0) { - if ((count == 0) || (count < (uint32_t)diff)) - goto prepost; - } - - /* no prepost and all threads are in place, reset the bit */ - if ((isinit != 0) && ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0)){ - kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, lgen, ugen, rw_wc, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - } - - /* can handle unlock now */ - - CLEAR_PREPOST_BITS(kwq); - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 2, 0, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - error = kwq_handle_unlock(kwq, lgen, rw_wc, &updatebits, 0, NULL, 0); -#if __TESTPANICS__ - if (error != 0) - panic("psynch_rw_unlock: kwq_handle_unlock failed %d\n",error); -#endif /* __TESTPANICS__ */ -out: - if (error == 0) { - /* update bits?? */ - *retval = updatebits; - } - - - ksyn_wqunlock(kwq); - - ksyn_wqrelease(kwq, NULL, 0, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK)); -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_END, (uint32_t)rwlock, 0, updatebits, error, 0); -#endif /* _PSYNCH_TRACE_ */ - - return(error); - -prepost: - /* update if the new seq is higher than prev prepost, or first set */ - if ((is_rws_setseq(kwq->kw_pre_sseq) != 0) || - (is_seqhigher_eq((rw_wc & PTHRW_COUNT_MASK), (kwq->kw_pre_sseq & PTHRW_COUNT_MASK)) != 0)) { - kwq->kw_pre_rwwc = (diff - count); - kwq->kw_pre_lockseq = curgen; - kwq->kw_pre_sseq = rw_wc; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 3, rw_wc, count, 0); - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUNLOCK | DBG_FUNC_NONE, (uint32_t)rwlock, 4, kwq->kw_pre_rwwc, kwq->kw_pre_lockseq, 0); -#endif /* _PSYNCH_TRACE_ */ - updatebits = lgen; /* let this not do unlock handling */ - } - error = 0; - goto out; -} - - -/* - * psynch_rw_unlock2: This system call is used to wakeup pending readers when unlock grant frm kernel - * to new reader arrival races - */ -int -psynch_rw_unlock2(__unused proc_t p, __unused struct psynch_rw_unlock2_args * uap, __unused uint32_t * retval) -{ - return(ENOTSUP); -} - - -/* ************************************************************************** */ -void -pth_global_hashinit() -{ - int arg; - - pth_glob_hashtbl = hashinit(PTH_HASHSIZE * 4, M_PROC, &pthhash); - - /* - * pthtest={0,1,2,3} (override default aborting behavior on pthread sync failures) - * 0 - just return errors - * 1 - print and return errors - * 2 - abort user, print and return errors - * 3 - panic - */ - if (!PE_parse_boot_argn("pthtest", &arg, sizeof(arg))) - arg = __TESTMODE__; - - if (arg == 3) { - __test_panics__ = 1; - printf("Pthread support PANICS when sync kernel primitives misused\n"); - } else if (arg == 2) { - __test_aborts__ = 1; - __test_prints__ = 1; - printf("Pthread support ABORTS when sync kernel primitives misused\n"); - } else if (arg == 1) { - __test_prints__ = 1; - printf("Pthread support LOGS when sync kernel primitives misused\n"); - } -} - -void -pth_proc_hashinit(proc_t p) -{ - p->p_pthhash = hashinit(PTH_HASHSIZE, M_PROC, &pthhash); - if (p->p_pthhash == NULL) - panic("pth_proc_hashinit: hash init returned 0\n"); -} - - -ksyn_wait_queue_t -ksyn_wq_hash_lookup(user_addr_t mutex, proc_t p, int flags, uint64_t object, uint64_t objoffset) -{ - ksyn_wait_queue_t kwq; - struct pthhashhead * hashptr; - - if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) - { - hashptr = pth_glob_hashtbl; - kwq = (&hashptr[object & pthhash])->lh_first; - if (kwq != 0) { - for (; kwq != NULL; kwq = kwq->kw_hash.le_next) { - if ((kwq->kw_object == object) &&(kwq->kw_offset == objoffset)) { - return (kwq); - } - } - } - } else { - hashptr = p->p_pthhash; - kwq = (&hashptr[mutex & pthhash])->lh_first; - if (kwq != 0) - for (; kwq != NULL; kwq = kwq->kw_hash.le_next) { - if (kwq->kw_addr == mutex) { - return (kwq); - } - } - } - return(NULL); -} - -void -pth_proc_hashdelete(proc_t p) -{ - struct pthhashhead * hashptr; - ksyn_wait_queue_t kwq; - int hashsize = pthhash + 1; - int i; - -#if _PSYNCH_TRACE_ - if ((pthread_debug_proc != NULL) && (p == pthread_debug_proc)) - pthread_debug_proc = PROC_NULL; -#endif /* _PSYNCH_TRACE_ */ - hashptr = p->p_pthhash; - p->p_pthhash = NULL; - if (hashptr == NULL) - return; - - pthread_list_lock(); - for(i= 0; i < hashsize; i++) { - while ((kwq = LIST_FIRST(&hashptr[i])) != NULL) { - if ((kwq->kw_pflags & KSYN_WQ_INHASH) != 0) { - kwq->kw_pflags &= ~KSYN_WQ_INHASH; - LIST_REMOVE(kwq, kw_hash); - } - if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) { - kwq->kw_pflags &= ~KSYN_WQ_FLIST; - LIST_REMOVE(kwq, kw_list); - num_infreekwq--; - } - num_freekwq++; - pthread_list_unlock(); - /* release fake entries if present for cvars */ - if (((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) && (kwq->kw_inqueue != 0)) - ksyn_freeallkwe(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER]); - lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp); - zfree(kwq_zone, kwq); - pthread_list_lock(); - } - } - pthread_list_unlock(); - FREE(hashptr, M_PROC); -} - -/* no lock held for this as the waitqueue is getting freed */ -void -ksyn_freeallkwe(ksyn_queue_t kq) -{ - ksyn_waitq_element_t kwe; - - /* free all the fake entries, dequeue rest */ - kwe = TAILQ_FIRST(&kq->ksynq_kwelist); - while (kwe != NULL) { - if (kwe->kwe_flags != KWE_THREAD_INWAIT) { - TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list); - zfree(kwe_zone, kwe); - } else { - TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list); - } - kwe = TAILQ_FIRST(&kq->ksynq_kwelist); - } -} - -/* find kernel waitqueue, if not present create one. Grants a reference */ -int -ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, uint64_t tid, int flags, int wqtype, ksyn_wait_queue_t * kwqp) -{ - ksyn_wait_queue_t kwq; - ksyn_wait_queue_t nkwq; - struct pthhashhead * hashptr; - uint64_t object = 0, offset = 0; - uint64_t hashhint; - proc_t p = current_proc(); - int retry = mgen & PTH_RWL_RETRYBIT; - struct ksyn_queue kfreeq; - int i; - - if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) - { - (void)ksyn_findobj(mutex, &object, &offset); - hashhint = object; - hashptr = pth_glob_hashtbl; - } else { - hashptr = p->p_pthhash; - } - - ksyn_queue_init(&kfreeq); - - if (((wqtype & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_MTX) && (retry != 0)) - mgen &= ~PTH_RWL_RETRYBIT; - -loop: - //pthread_list_lock_spin(); - pthread_list_lock(); - - kwq = ksyn_wq_hash_lookup(mutex, p, flags, object, offset); - - if (kwq != NULL) { - if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) { - LIST_REMOVE(kwq, kw_list); - kwq->kw_pflags &= ~KSYN_WQ_FLIST; - num_infreekwq--; - num_reusekwq++; - } - if ((kwq->kw_type & KSYN_WQTYPE_MASK) != (wqtype &KSYN_WQTYPE_MASK)) { - if ((kwq->kw_inqueue == 0) && (kwq->kw_pre_rwwc ==0) && (kwq->kw_pre_intrcount == 0)) { - if (kwq->kw_iocount == 0) { - kwq->kw_addr = mutex; - kwq->kw_flags = flags; - kwq->kw_object = object; - kwq->kw_offset = offset; - kwq->kw_type = (wqtype & KSYN_WQTYPE_MASK); - CLEAR_REINIT_BITS(kwq); - CLEAR_INTR_PREPOST_BITS(kwq); - CLEAR_PREPOST_BITS(kwq); - kwq->kw_lword = mgen; - kwq->kw_uword = ugen; - kwq->kw_sword = rw_wc; - kwq->kw_owner = tid; - } else if ((kwq->kw_iocount == 1) && (kwq->kw_dropcount == kwq->kw_iocount)) { - /* if all users are unlockers then wait for it to finish */ - kwq->kw_pflags |= KSYN_WQ_WAITING; - /* wait for the wq to be free */ - (void)msleep(&kwq->kw_pflags, pthread_list_mlock, PDROP, "ksyn_wqfind", 0); - /* does not have list lock */ - goto loop; - } else { - __FAILEDUSERTEST__("address already known to kernel for another (busy) synchronizer type\n"); - pthread_list_unlock(); - return EBUSY; - } - } else { - __FAILEDUSERTEST__("address already known to kernel for another (busy) synchronizer type(1)\n"); - pthread_list_unlock(); - return EBUSY; - } - } - kwq->kw_iocount++; - if (wqtype == KSYN_WQTYPE_MUTEXDROP) - kwq->kw_dropcount++; - if (kwqp != NULL) - *kwqp = kwq; - pthread_list_unlock(); - return (0); - } - - pthread_list_unlock(); - - nkwq = (ksyn_wait_queue_t)zalloc(kwq_zone); - bzero(nkwq, sizeof(struct ksyn_wait_queue)); - nkwq->kw_addr = mutex; - nkwq->kw_flags = flags; - nkwq->kw_iocount = 1; - if (wqtype == KSYN_WQTYPE_MUTEXDROP) - nkwq->kw_dropcount++; - nkwq->kw_object = object; - nkwq->kw_offset = offset; - nkwq->kw_type = (wqtype & KSYN_WQTYPE_MASK); - nkwq->kw_lastseqword = PTHRW_RWS_INIT; - if (nkwq->kw_type == KSYN_WQTYPE_RWLOCK) - nkwq->kw_nextseqword = PTHRW_RWS_INIT; - - nkwq->kw_pre_sseq = PTHRW_RWS_INIT; - - CLEAR_PREPOST_BITS(nkwq); - CLEAR_INTR_PREPOST_BITS(nkwq); - CLEAR_REINIT_BITS(nkwq); - nkwq->kw_lword = mgen; - nkwq->kw_uword = ugen; - nkwq->kw_sword = rw_wc; - nkwq->kw_owner = tid; - - - for (i=0; i< KSYN_QUEUE_MAX; i++) - ksyn_queue_init(&nkwq->kw_ksynqueues[i]); - - lck_mtx_init(&nkwq->kw_lock, pthread_lck_grp, pthread_lck_attr); - - //pthread_list_lock_spin(); - pthread_list_lock(); - /* see whether it is alread allocated */ - kwq = ksyn_wq_hash_lookup(mutex, p, flags, object, offset); - - if (kwq != NULL) { - if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) { - LIST_REMOVE(kwq, kw_list); - kwq->kw_pflags &= ~KSYN_WQ_FLIST; - num_infreekwq--; - num_reusekwq++; - } - if ((kwq->kw_type & KSYN_WQTYPE_MASK) != (wqtype &KSYN_WQTYPE_MASK)) { - if ((kwq->kw_inqueue == 0) && (kwq->kw_pre_rwwc ==0) && (kwq->kw_pre_intrcount == 0)) { - if (kwq->kw_iocount == 0) { - kwq->kw_addr = mutex; - kwq->kw_flags = flags; - kwq->kw_object = object; - kwq->kw_offset = offset; - kwq->kw_type = (wqtype & KSYN_WQTYPE_MASK); - CLEAR_REINIT_BITS(kwq); - CLEAR_INTR_PREPOST_BITS(kwq); - CLEAR_PREPOST_BITS(kwq); - kwq->kw_lword = mgen; - kwq->kw_uword = ugen; - kwq->kw_sword = rw_wc; - kwq->kw_owner = tid; - } else if ((kwq->kw_iocount == 1) && (kwq->kw_dropcount == kwq->kw_iocount)) { - kwq->kw_pflags |= KSYN_WQ_WAITING; - /* wait for the wq to be free */ - (void)msleep(&kwq->kw_pflags, pthread_list_mlock, PDROP, "ksyn_wqfind", 0); - - lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp); - zfree(kwq_zone, nkwq); - /* will acquire lock again */ - - goto loop; - } else { - __FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type(2)\n"); - pthread_list_unlock(); - lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp); - zfree(kwq_zone, nkwq); - return EBUSY; - } - } else { - __FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type(3)\n"); - pthread_list_unlock(); - lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp); - zfree(kwq_zone, nkwq); - return EBUSY; - } - } - kwq->kw_iocount++; - if (wqtype == KSYN_WQTYPE_MUTEXDROP) - kwq->kw_dropcount++; - if (kwqp != NULL) - *kwqp = kwq; - pthread_list_unlock(); - lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp); - zfree(kwq_zone, nkwq); - return (0); - } - kwq = nkwq; - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVSEQ | DBG_FUNC_NONE, kwq->kw_lword, kwq->kw_uword, kwq->kw_sword, 0xffff, 0); -#endif /* _PSYNCH_TRACE_ */ - if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) - { - kwq->kw_pflags |= KSYN_WQ_SHARED; - LIST_INSERT_HEAD(&hashptr[kwq->kw_object & pthhash], kwq, kw_hash); - } else - LIST_INSERT_HEAD(&hashptr[mutex & pthhash], kwq, kw_hash); - - kwq->kw_pflags |= KSYN_WQ_INHASH; - num_total_kwq++; - - pthread_list_unlock(); - - if (kwqp != NULL) - *kwqp = kwq; - return (0); -} - -/* Reference from find is dropped here. Starts the free process if needed */ -void -ksyn_wqrelease(ksyn_wait_queue_t kwq, ksyn_wait_queue_t ckwq, int qfreenow, int wqtype) -{ - uint64_t deadline; - struct timeval t; - int sched = 0; - ksyn_wait_queue_t free_elem = NULL; - ksyn_wait_queue_t free_elem1 = NULL; - - //pthread_list_lock_spin(); - pthread_list_lock(); - kwq->kw_iocount--; - if (wqtype == KSYN_WQTYPE_MUTEXDROP) { - kwq->kw_dropcount--; - } - if (kwq->kw_iocount == 0) { - if ((kwq->kw_pflags & KSYN_WQ_WAITING) != 0) { - /* some one is waiting for the waitqueue, wake them up */ - kwq->kw_pflags &= ~KSYN_WQ_WAITING; - wakeup(&kwq->kw_pflags); - } - - if ((kwq->kw_pre_rwwc == 0) && (kwq->kw_inqueue == 0) && (kwq->kw_pre_intrcount == 0)) { - if (qfreenow == 0) { - microuptime(&kwq->kw_ts); - LIST_INSERT_HEAD(&pth_free_list, kwq, kw_list); - kwq->kw_pflags |= KSYN_WQ_FLIST; - num_infreekwq++; - free_elem = NULL; - } else { - /* remove from the only list it is in ie hash */ - kwq->kw_pflags &= ~(KSYN_WQ_FLIST | KSYN_WQ_INHASH); - LIST_REMOVE(kwq, kw_hash); - lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp); - num_total_kwq--; - num_freekwq++; - free_elem = kwq; - } - } else - free_elem = NULL; - if (qfreenow == 0) - sched = 1; - } - - if (ckwq != NULL) { - ckwq->kw_iocount--; - if (wqtype == KSYN_WQTYPE_MUTEXDROP) { - kwq->kw_dropcount--; - } - if ( ckwq->kw_iocount == 0) { - if ((kwq->kw_pflags & KSYN_WQ_WAITING) != 0) { - /* some one is waiting for the waitqueue, wake them up */ - kwq->kw_pflags &= ~KSYN_WQ_WAITING; - wakeup(&kwq->kw_pflags); - } - if ((ckwq->kw_pre_rwwc == 0) && (ckwq->kw_inqueue == 0) && (ckwq->kw_pre_intrcount == 0)) { - if (qfreenow == 0) { - /* mark for free if we can */ - microuptime(&ckwq->kw_ts); - LIST_INSERT_HEAD(&pth_free_list, ckwq, kw_list); - ckwq->kw_pflags |= KSYN_WQ_FLIST; - num_infreekwq++; - free_elem1 = NULL; - } else { - /* remove from the only list it is in ie hash */ - ckwq->kw_pflags &= ~(KSYN_WQ_FLIST | KSYN_WQ_INHASH); - LIST_REMOVE(ckwq, kw_hash); - lck_mtx_destroy(&ckwq->kw_lock, pthread_lck_grp); - num_total_kwq--; - num_freekwq++; - free_elem1 = ckwq; - } - } else - free_elem1 = NULL; - if (qfreenow == 0) - sched = 1; - } - } - - if (sched == 1 && psynch_cleanupset == 0) { - psynch_cleanupset = 1; - microuptime(&t); - t.tv_sec += KSYN_CLEANUP_DEADLINE; - - deadline = tvtoabstime(&t); - thread_call_enter_delayed(psynch_thcall, deadline); - } - pthread_list_unlock(); - if (free_elem != NULL) - zfree(kwq_zone, free_elem); - if (free_elem1 != NULL) - zfree(kwq_zone, free_elem1); -} - -/* responsible to free the waitqueues */ -void -psynch_wq_cleanup(__unused void * param, __unused void * param1) -{ - ksyn_wait_queue_t kwq; - struct timeval t; - LIST_HEAD(, ksyn_wait_queue) freelist = {NULL}; - int count = 0, delayed = 0, diff; - uint64_t deadline = 0; - - //pthread_list_lock_spin(); - pthread_list_lock(); - - num_addedfreekwq = num_infreekwq - num_lastfreekwqcount; - num_lastfreekwqcount = num_infreekwq; - microuptime(&t); - - LIST_FOREACH(kwq, &pth_free_list, kw_list) { - if ((kwq->kw_iocount != 0) || (kwq->kw_pre_rwwc != 0) || (kwq->kw_inqueue != 0) || (kwq->kw_pre_intrcount != 0)) { - /* still in use */ - continue; - } - diff = t.tv_sec - kwq->kw_ts.tv_sec; - if (diff < 0) - diff *= -1; - if (diff >= KSYN_CLEANUP_DEADLINE) { - /* out of hash */ - kwq->kw_pflags &= ~(KSYN_WQ_FLIST | KSYN_WQ_INHASH); - num_infreekwq--; - num_freekwq++; - LIST_REMOVE(kwq, kw_hash); - LIST_REMOVE(kwq, kw_list); - LIST_INSERT_HEAD(&freelist, kwq, kw_list); - count ++; - num_total_kwq--; - } else { - delayed = 1; - } - - } - if (delayed != 0) { - t.tv_sec += KSYN_CLEANUP_DEADLINE; - - deadline = tvtoabstime(&t); - thread_call_enter_delayed(psynch_thcall, deadline); - psynch_cleanupset = 1; - } else - psynch_cleanupset = 0; - - pthread_list_unlock(); - - - while ((kwq = LIST_FIRST(&freelist)) != NULL) { - LIST_REMOVE(kwq, kw_list); - lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp); - zfree(kwq_zone, kwq); - } -} - - -kern_return_t -#if _PSYNCH_TRACE_ -ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_element_t kwe, int mylog, thread_continue_t continuation, void * parameter) -#else -ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_element_t kwe, __unused int mylog, thread_continue_t continuation, void * parameter) -#endif -{ - kern_return_t kret; -#if _PSYNCH_TRACE_ - int error = 0; - uthread_t uth = NULL; -#endif /* _PSYNCH_TRACE_ */ - - kwe->kwe_kwqqueue = (void *)kwq; - assert_wait_deadline(&kwe->kwe_psynchretval, THREAD_ABORTSAFE, abstime); - ksyn_wqunlock(kwq); - - if (continuation == THREAD_CONTINUE_NULL) - kret = thread_block(NULL); - else - kret = thread_block_parameter(continuation, parameter); - -#if _PSYNCH_TRACE_ - switch (kret) { - case THREAD_TIMED_OUT: - error = ETIMEDOUT; - break; - case THREAD_INTERRUPTED: - error = EINTR; - break; - } - uth = current_uthread(); -#if defined(__i386__) - if (mylog != 0) - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_THWAKEUP | DBG_FUNC_NONE, 0xf4f3f2f1, (uint32_t)uth, kret, 0, 0); -#else - if (mylog != 0) - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_THWAKEUP | DBG_FUNC_NONE, 0xeeeeeeee, kret, error, 0xeeeeeeee, 0); -#endif -#endif /* _PSYNCH_TRACE_ */ - - return(kret); -} - -kern_return_t -ksyn_wakeup_thread(__unused ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe) -{ - kern_return_t kret; -#if _PSYNCH_TRACE_ - uthread_t uth = NULL; -#endif /* _PSYNCH_TRACE_ */ - - kret = thread_wakeup_one((caddr_t)&kwe->kwe_psynchretval); - - if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) - panic("ksyn_wakeup_thread: panic waking up thread %x\n", kret); -#if _PSYNCH_TRACE_ - uth = kwe->kwe_uth; -#if defined(__i386__) - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_THWAKEUP | DBG_FUNC_NONE, 0xf1f2f3f4, (uint32_t)uth, kret, 0, 0); -#endif -#endif /* _PSYNCH_TRACE_ */ - - return(kret); -} - -/* find the true shared obect/offset for shared mutexes */ -int -ksyn_findobj(uint64_t mutex, uint64_t * objectp, uint64_t * offsetp) -{ - vm_page_info_basic_data_t info; - kern_return_t kret; - mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT; - - kret = vm_map_page_info(current_map(), mutex, VM_PAGE_INFO_BASIC, - (vm_page_info_t)&info, &count); - - if (kret != KERN_SUCCESS) - return(EINVAL); - - if (objectp != NULL) - *objectp = (uint64_t)info.object_id; - if (offsetp != NULL) - *offsetp = (uint64_t)info.offset; - - return(0); -} - - -/* lowest of kw_fr, kw_flr, kw_fwr, kw_fywr */ -int -kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int * typep, uint32_t lowest[]) -{ - - uint32_t kw_fr, kw_flr, kw_fwr, kw_fywr, low; - int type = 0, lowtype, typenum[4]; - uint32_t numbers[4]; - int count = 0, i; - - - if ((kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) { - type |= PTH_RWSHFT_TYPE_READ; - /* read entries are present */ - if (kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) { - kw_fr = kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_firstnum; - if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, kw_fr) != 0)) - kw_fr = premgen; - } else - kw_fr = premgen; - - lowest[KSYN_QUEUE_READ] = kw_fr; - numbers[count]= kw_fr; - typenum[count] = PTH_RW_TYPE_READ; - count++; - } else - lowest[KSYN_QUEUE_READ] = 0; - - if ((kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_LREADLOCK) != 0)) { - type |= PTH_RWSHFT_TYPE_LREAD; - /* read entries are present */ - if (kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_count != 0) { - kw_flr = kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_firstnum; - if (((flags & KW_UNLOCK_PREPOST_LREADLOCK) != 0) && (is_seqlower(premgen, kw_flr) != 0)) - kw_flr = premgen; - } else - kw_flr = premgen; - - lowest[KSYN_QUEUE_LREAD] = kw_flr; - numbers[count]= kw_flr; - typenum[count] = PTH_RW_TYPE_LREAD; - count++; - } else - lowest[KSYN_QUEUE_LREAD] = 0; - - - if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0)) { - type |= PTH_RWSHFT_TYPE_WRITE; - /* read entries are present */ - if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) { - kw_fwr = kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum; - if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (is_seqlower(premgen, kw_fwr) != 0)) - kw_fwr = premgen; - } else - kw_fwr = premgen; - - lowest[KSYN_QUEUE_WRITER] = kw_fwr; - numbers[count]= kw_fwr; - typenum[count] = PTH_RW_TYPE_WRITE; - count++; - } else - lowest[KSYN_QUEUE_WRITER] = 0; - - if ((kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_YWRLOCK) != 0)) { - type |= PTH_RWSHFT_TYPE_YWRITE; - /* read entries are present */ - if (kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_count != 0) { - kw_fywr = kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_firstnum; - if (((flags & KW_UNLOCK_PREPOST_YWRLOCK) != 0) && (is_seqlower(premgen, kw_fywr) != 0)) - kw_fywr = premgen; - } else - kw_fywr = premgen; - - lowest[KSYN_QUEUE_YWRITER] = kw_fywr; - numbers[count]= kw_fywr; - typenum[count] = PTH_RW_TYPE_YWRITE; - count++; - } else - lowest[KSYN_QUEUE_YWRITER] = 0; - - -#if __TESTPANICS__ - if (count == 0) - panic("nothing in the queue???\n"); -#endif /* __TESTPANICS__ */ - - low = numbers[0]; - lowtype = typenum[0]; - if (count > 1) { - for (i = 1; i< count; i++) { - if(is_seqlower(numbers[i] , low) != 0) { - low = numbers[i]; - lowtype = typenum[i]; - } - } - } - type |= lowtype; - - if (typep != 0) - *typep = type; - return(0); -} - -/* wakeup readers and longreaders to upto the writer limits */ -int -ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int longreadset, int allreaders, uint32_t updatebits, int * wokenp) -{ - ksyn_waitq_element_t kwe = NULL; - ksyn_queue_t kq; - int failedwakeup = 0; - int numwoken = 0; - kern_return_t kret = KERN_SUCCESS; - uint32_t lbits = 0; - - lbits = updatebits; - if (longreadset != 0) { - /* clear all read and longreads */ - while ((kwe = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_READ], kwq)) != NULL) { - kwe->kwe_psynchretval = lbits; - kwe->kwe_kwqqueue = NULL; - - numwoken++; - kret = ksyn_wakeup_thread(kwq, kwe); -#if __TESTPANICS__ - if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) - panic("ksyn_wakeupreaders: panic waking up readers\n"); -#endif /* __TESTPANICS__ */ - if (kret == KERN_NOT_WAITING) { - failedwakeup++; - } - } - while ((kwe = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_LREAD], kwq)) != NULL) { - kwe->kwe_psynchretval = lbits; - kwe->kwe_kwqqueue = NULL; - numwoken++; - kret = ksyn_wakeup_thread(kwq, kwe); -#if __TESTPANICS__ - if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) - panic("ksyn_wakeupreaders: panic waking up lreaders\n"); -#endif /* __TESTPANICS__ */ - if (kret == KERN_NOT_WAITING) { - failedwakeup++; - } - } - } else { - kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ]; - while ((kq->ksynq_count != 0) && (allreaders || (is_seqlower(kq->ksynq_firstnum, limitread) != 0))) { - kwe = ksyn_queue_removefirst(kq, kwq); - kwe->kwe_psynchretval = lbits; - kwe->kwe_kwqqueue = NULL; - numwoken++; - kret = ksyn_wakeup_thread(kwq, kwe); -#if __TESTPANICS__ - if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) - panic("ksyn_wakeupreaders: panic waking up readers\n"); -#endif /* __TESTPANICS__ */ - if (kret == KERN_NOT_WAITING) { - failedwakeup++; - } - } - } - - if (wokenp != NULL) - *wokenp = numwoken; - return(failedwakeup); -} - - -/* This handles the unlock grants for next set on rw_unlock() or on arrival of all preposted waiters */ -int -kwq_handle_unlock(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t rw_wc, uint32_t * updatep, int flags, int * blockp, uint32_t premgen) -{ - uint32_t low_reader, low_writer, low_ywriter, low_lreader,limitrdnum; - int rwtype, error=0; - int longreadset = 0, allreaders, failed; - uint32_t updatebits=0, numneeded = 0;; - int prepost = flags & KW_UNLOCK_PREPOST; - thread_t preth = THREAD_NULL; - ksyn_waitq_element_t kwe; - uthread_t uth; - thread_t th; - int woken = 0; - int block = 1; - uint32_t lowest[KSYN_QUEUE_MAX]; /* np need for upgrade as it is handled separately */ - kern_return_t kret = KERN_SUCCESS; - ksyn_queue_t kq; - int curthreturns = 0; - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_START, (uint32_t)kwq->kw_addr, mgen, premgen, rw_wc, 0); -#endif /* _PSYNCH_TRACE_ */ - if (prepost != 0) { - preth = current_thread(); - } - - kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ]; - kwq->kw_lastseqword = rw_wc; - kwq->kw_lastunlockseq = (rw_wc & PTHRW_COUNT_MASK); - kwq->kw_overlapwatch = 0; - - /* upgrade pending */ - if (is_rw_ubit_set(mgen)) { -#if __TESTPANICS__ - panic("NO UBIT SHOULD BE SET\n"); - updatebits = PTH_RWL_EBIT | PTH_RWL_KBIT; - if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) - updatebits |= PTH_RWL_WBIT; - if (kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_count != 0) - updatebits |= PTH_RWL_YBIT; - if (prepost != 0) { - if((flags & KW_UNLOCK_PREPOST_UPGRADE) != 0) { - /* upgrade thread calling the prepost */ - /* upgrade granted */ - block = 0; - goto out; - } - - } - if (kwq->kw_ksynqueues[KSYN_QUEUE_UPGRADE].ksynq_count > 0) { - kwe = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_UPGRADE], kwq); - - kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits; - kwe->kwe_psynchretval = updatebits; - kwe->kwe_kwqqueue = NULL; - kret = ksyn_wakeup_thread(kwq, kwe); - if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) - panic("kwq_handle_unlock: panic waking up the upgrade thread \n"); - if (kret == KERN_NOT_WAITING) { - kwq->kw_pre_intrcount = 1; /* actually a count */ - kwq->kw_pre_intrseq = mgen; - kwq->kw_pre_intrretbits = kwe->kwe_psynchretval; - kwq->kw_pre_intrtype = PTH_RW_TYPE_UPGRADE; - } - error = 0; - } else { - panic("panic unable to find the upgrade thread\n"); - } -#endif /* __TESTPANICS__ */ - ksyn_wqunlock(kwq); - goto out; - } - - error = kwq_find_rw_lowest(kwq, flags, premgen, &rwtype, lowest); -#if __TESTPANICS__ - if (error != 0) - panic("rwunlock: cannot fails to slot next round of threads"); -#endif /* __TESTPANICS__ */ - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 1, rwtype, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - low_reader = lowest[KSYN_QUEUE_READ]; - low_lreader = lowest[KSYN_QUEUE_LREAD]; - low_writer = lowest[KSYN_QUEUE_WRITER]; - low_ywriter = lowest[KSYN_QUEUE_YWRITER]; - - - longreadset = 0; - allreaders = 0; - updatebits = 0; - - - switch (rwtype & PTH_RW_TYPE_MASK) { - case PTH_RW_TYPE_LREAD: - longreadset = 1; - - case PTH_RW_TYPE_READ: { - /* what about the preflight which is LREAD or READ ?? */ - if ((rwtype & PTH_RWSHFT_TYPE_MASK) != 0) { - if (rwtype & PTH_RWSHFT_TYPE_WRITE) - updatebits |= (PTH_RWL_WBIT | PTH_RWL_KBIT); - if (rwtype & PTH_RWSHFT_TYPE_YWRITE) - updatebits |= PTH_RWL_YBIT; - } - limitrdnum = 0; - if (longreadset == 0) { - switch (rwtype & (PTH_RWSHFT_TYPE_WRITE | PTH_RWSHFT_TYPE_YWRITE)) { - case PTH_RWSHFT_TYPE_WRITE: - limitrdnum = low_writer; - if (((rwtype & PTH_RWSHFT_TYPE_LREAD) != 0) && - (is_seqlower(low_lreader, limitrdnum) != 0)) { - longreadset = 1; - } - if (((flags & KW_UNLOCK_PREPOST_LREADLOCK) != 0) && - (is_seqlower(premgen, limitrdnum) != 0)) { - longreadset = 1; - } - break; - case PTH_RWSHFT_TYPE_YWRITE: - /* all read ? */ - if (((rwtype & PTH_RWSHFT_TYPE_LREAD) != 0) && - (is_seqlower(low_lreader, low_ywriter) != 0)) { - longreadset = 1; - } else - allreaders = 1; - if (((flags & KW_UNLOCK_PREPOST_LREADLOCK) != 0) && - (is_seqlower(premgen, low_ywriter) != 0)) { - longreadset = 1; - allreaders = 0; - } - - - break; - case (PTH_RWSHFT_TYPE_WRITE | PTH_RWSHFT_TYPE_YWRITE): - if (is_seqlower(low_ywriter, low_writer) != 0) { - limitrdnum = low_ywriter; - } else - limitrdnum = low_writer; - if (((rwtype & PTH_RWSHFT_TYPE_LREAD) != 0) && - (is_seqlower(low_lreader, limitrdnum) != 0)) { - longreadset = 1; - } - if (((flags & KW_UNLOCK_PREPOST_LREADLOCK) != 0) && - (is_seqlower(premgen, limitrdnum) != 0)) { - longreadset = 1; - } - break; - default: /* no writers at all */ - if ((rwtype & PTH_RWSHFT_TYPE_LREAD) != 0) - longreadset = 1; - else - allreaders = 1; - }; - - } - numneeded = 0; - if (longreadset != 0) { - updatebits |= PTH_RWL_LBIT; - updatebits &= ~PTH_RWL_KBIT; - if ((flags & (KW_UNLOCK_PREPOST_READLOCK | KW_UNLOCK_PREPOST_LREADLOCK)) != 0) - numneeded += 1; - numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count; - numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_count; - updatebits += (numneeded << PTHRW_COUNT_SHIFT); - kwq->kw_overlapwatch = 1; - } else { - /* no longread, evaluate number of readers */ - - switch (rwtype & (PTH_RWSHFT_TYPE_WRITE | PTH_RWSHFT_TYPE_YWRITE)) { - case PTH_RWSHFT_TYPE_WRITE: - limitrdnum = low_writer; - numneeded = ksyn_queue_count_tolowest(kq, limitrdnum); - if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, limitrdnum) != 0)) { - curthreturns = 1; - numneeded += 1; - } - break; - case PTH_RWSHFT_TYPE_YWRITE: - /* all read ? */ - numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count; - if ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) { - curthreturns = 1; - numneeded += 1; - } - break; - case (PTH_RWSHFT_TYPE_WRITE | PTH_RWSHFT_TYPE_YWRITE): - limitrdnum = low_writer; - numneeded = ksyn_queue_count_tolowest(kq, limitrdnum); - if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, limitrdnum) != 0)) { - curthreturns = 1; - numneeded += 1; - } - break; - default: /* no writers at all */ - /* no other waiters only readers */ - kwq->kw_overlapwatch = 1; - numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count; - if ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) { - curthreturns = 1; - numneeded += 1; - } - }; - - updatebits += (numneeded << PTHRW_COUNT_SHIFT); - } - kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits; - - if (curthreturns != 0) { - block = 0; - uth = current_uthread(); - kwe = &uth->uu_kwe; - kwe->kwe_psynchretval = updatebits; - } - - - failed = ksyn_wakeupreaders(kwq, limitrdnum, longreadset, allreaders, updatebits, &woken); -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 2, woken, failed, 0); -#endif /* _PSYNCH_TRACE_ */ - - if (failed != 0) { - kwq->kw_pre_intrcount = failed; /* actually a count */ - kwq->kw_pre_intrseq = limitrdnum; - kwq->kw_pre_intrretbits = updatebits; - if (longreadset) - kwq->kw_pre_intrtype = PTH_RW_TYPE_LREAD; - else - kwq->kw_pre_intrtype = PTH_RW_TYPE_READ; - } - - error = 0; - - if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) && ((updatebits & PTH_RWL_WBIT) == 0)) - panic("kwq_handle_unlock: writer pending but no writebit set %x\n", updatebits); - } - break; - - case PTH_RW_TYPE_WRITE: { - - /* only one thread is goin to be granted */ - updatebits |= (PTHRW_INC); - updatebits |= PTH_RWL_KBIT| PTH_RWL_EBIT; - - if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (low_writer == premgen)) { - block = 0; - if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) - updatebits |= PTH_RWL_WBIT; - if ((rwtype & PTH_RWSHFT_TYPE_YWRITE) != 0) - updatebits |= PTH_RWL_YBIT; - th = preth; - uth = get_bsdthread_info(th); - kwe = &uth->uu_kwe; - kwe->kwe_psynchretval = updatebits; - } else { - /* we are not granting writelock to the preposting thread */ - kwe = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwq); - - /* if there are writers present or the preposting write thread then W bit is to be set */ - if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) ) - updatebits |= PTH_RWL_WBIT; - if ((rwtype & PTH_RWSHFT_TYPE_YWRITE) != 0) - updatebits |= PTH_RWL_YBIT; - kwe->kwe_psynchretval = updatebits; - kwe->kwe_kwqqueue = NULL; - /* setup next in the queue */ - kret = ksyn_wakeup_thread(kwq, kwe); -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 3, kret, 0, 0); -#endif /* _PSYNCH_TRACE_ */ -#if __TESTPANICS__ - if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) - panic("kwq_handle_unlock: panic waking up writer\n"); -#endif /* __TESTPANICS__ */ - if (kret == KERN_NOT_WAITING) { - kwq->kw_pre_intrcount = 1; /* actually a count */ - kwq->kw_pre_intrseq = low_writer; - kwq->kw_pre_intrretbits = updatebits; - kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE; - } - error = 0; - } - kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits; - if ((updatebits & (PTH_RWL_KBIT | PTH_RWL_EBIT)) != (PTH_RWL_KBIT | PTH_RWL_EBIT)) - panic("kwq_handle_unlock: writer lock granted but no ke set %x\n", updatebits); - - } - break; - - case PTH_RW_TYPE_YWRITE: { - /* can reader locks be granted ahead of this write? */ - if ((rwtype & PTH_RWSHFT_TYPE_READ) != 0) { - if ((rwtype & PTH_RWSHFT_TYPE_MASK) != 0) { - if (rwtype & PTH_RWSHFT_TYPE_WRITE) - updatebits |= (PTH_RWL_WBIT | PTH_RWL_KBIT); - if (rwtype & PTH_RWSHFT_TYPE_YWRITE) - updatebits |= PTH_RWL_YBIT; - } - - if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) { - /* is lowest reader less than the low writer? */ - if (is_seqlower(low_reader,low_writer) == 0) - goto yielditis; - - numneeded = ksyn_queue_count_tolowest(kq, low_writer); - updatebits += (numneeded << PTHRW_COUNT_SHIFT); - if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, low_writer) != 0)) { - uth = current_uthread(); - kwe = &uth->uu_kwe; - /* add one more */ - updatebits += PTHRW_INC; - kwe->kwe_psynchretval = updatebits; - block = 0; - } - - kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits; - - /* there will be readers to wakeup , no need to check for woken */ - failed = ksyn_wakeupreaders(kwq, low_writer, 0, 0, updatebits, NULL); -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 2, woken, failed, 0); -#endif /* _PSYNCH_TRACE_ */ - if (failed != 0) { - kwq->kw_pre_intrcount = failed; /* actually a count */ - kwq->kw_pre_intrseq = low_writer; - kwq->kw_pre_intrretbits = updatebits; - kwq->kw_pre_intrtype = PTH_RW_TYPE_READ; - } - error = 0; - } else { - /* wakeup all readers */ - numneeded = kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count; - updatebits += (numneeded << PTHRW_COUNT_SHIFT); - if ((prepost != 0) && ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) { - uth = current_uthread(); - kwe = &uth->uu_kwe; - updatebits += PTHRW_INC; - kwe->kwe_psynchretval = updatebits; - block = 0; - } - kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits; - failed = ksyn_wakeupreaders(kwq, low_writer, 0, 1, updatebits, &woken); -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 2, woken, failed, 0); -#endif /* _PSYNCH_TRACE_ */ - if (failed != 0) { - kwq->kw_pre_intrcount = failed; /* actually a count */ - kwq->kw_pre_intrseq = kwq->kw_highseq; - kwq->kw_pre_intrretbits = updatebits; - kwq->kw_pre_intrtype = PTH_RW_TYPE_READ; - } - error = 0; - } - } else { -yielditis: - /* no reads, so granting yeilding writes */ - updatebits |= PTHRW_INC; - updatebits |= PTH_RWL_KBIT| PTH_RWL_EBIT; - - if (((flags & KW_UNLOCK_PREPOST_YWRLOCK) != 0) && (low_writer == premgen)) { - /* preposting yielding write thread is being granted exclusive lock */ - - block = 0; - - if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) - updatebits |= PTH_RWL_WBIT; - else if (kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_count != 0) - updatebits |= PTH_RWL_YBIT; - - th = preth; - uth = get_bsdthread_info(th); - kwe = &uth->uu_kwe; - kwe->kwe_psynchretval = updatebits; - } else { - /* we are granting yield writelock to some other thread */ - kwe = ksyn_queue_removefirst(&kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER], kwq); - - if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) - updatebits |= PTH_RWL_WBIT; - /* if there are ywriters present or the preposting ywrite thread then W bit is to be set */ - else if ((kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_YWRLOCK) != 0) ) - updatebits |= PTH_RWL_YBIT; - - kwe->kwe_psynchretval = updatebits; - kwe->kwe_kwqqueue = NULL; - - kret = ksyn_wakeup_thread(kwq, kwe); -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 3, kret, 0, 0); -#endif /* _PSYNCH_TRACE_ */ -#if __TESTPANICS__ - if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) - panic("kwq_handle_unlock : panic waking up readers\n"); -#endif /* __TESTPANICS__ */ - if (kret == KERN_NOT_WAITING) { - kwq->kw_pre_intrcount = 1; /* actually a count */ - kwq->kw_pre_intrseq = low_ywriter; - kwq->kw_pre_intrretbits = updatebits; - kwq->kw_pre_intrtype = PTH_RW_TYPE_YWRITE; - } - error = 0; - } - kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits; - } - } - break; - - default: - panic("rwunlock: invalid type for lock grants"); - - }; - - -out: - if (updatep != NULL) - *updatep = updatebits; - if (blockp != NULL) - *blockp = block; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWHANDLEU | DBG_FUNC_END, (uint32_t)kwq->kw_addr, 0, updatebits, block, 0); -#endif /* _PSYNCH_TRACE_ */ - return(error); -} - -int -kwq_handle_overlap(ksyn_wait_queue_t kwq, uint32_t lgenval, __unused uint32_t ugenval, uint32_t rw_wc, uint32_t *updatebitsp, __unused int flags , int * blockp) -{ - uint32_t highword = kwq->kw_nextseqword & PTHRW_COUNT_MASK; - uint32_t lowword = kwq->kw_lastseqword & PTHRW_COUNT_MASK; - uint32_t val=0; - int withinseq; - - - /* overlap is set, so no need to check for valid state for overlap */ - - withinseq = ((is_seqlower_eq(rw_wc, highword) != 0) || (is_seqhigher_eq(lowword, rw_wc) != 0)); - - if (withinseq != 0) { - if ((kwq->kw_nextseqword & PTH_RWL_LBIT) == 0) { - /* if no writers ahead, overlap granted */ - if ((lgenval & PTH_RWL_WBIT) == 0) { - goto grantoverlap; - } - } else { - /* Lbit is set, and writers ahead does not count */ - goto grantoverlap; - } - } - - *blockp = 1; - return(0); - -grantoverlap: - /* increase the next expected seq by one */ - kwq->kw_nextseqword += PTHRW_INC; - /* set count by one & bits from the nextseq and add M bit */ - val = PTHRW_INC; - val |= ((kwq->kw_nextseqword & PTHRW_BIT_MASK) | PTH_RWL_MBIT); - *updatebitsp = val; - *blockp = 0; - return(0); -} - -#if NOTYET -/* handle downgrade actions */ -int -kwq_handle_downgrade(ksyn_wait_queue_t kwq, uint32_t mgen, __unused int flags, __unused uint32_t premgen, __unused int * blockp) -{ - uint32_t updatebits, lowriter = 0; - int longreadset, allreaders, count; - - /* can handle downgrade now */ - updatebits = mgen; - - longreadset = 0; - allreaders = 0; - if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count > 0) { - lowriter = kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum; - if (kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_count > 0) { - if (is_seqlower(kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_firstnum, lowriter) != 0) - longreadset = 1; - } - } else { - allreaders = 1; - if (kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_count > 0) { - lowriter = kwq->kw_ksynqueues[KSYN_QUEUE_YWRITER].ksynq_firstnum; - if (kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_count > 0) { - if (is_seqlower(kwq->kw_ksynqueues[KSYN_QUEUE_LREAD].ksynq_firstnum, lowriter) != 0) - longreadset = 1; - } - } - } - - count = ksyn_wakeupreaders(kwq, lowriter, longreadset, allreaders, updatebits, NULL); - if (count != 0) { - kwq->kw_pre_limrd = count; - kwq->kw_pre_limrdseq = lowriter; - kwq->kw_pre_limrdbits = lowriter; - /* need to handle prepost */ - } - return(0); -} - -#endif /* NOTYET */ - -/************* Indiv queue support routines ************************/ -void -ksyn_queue_init(ksyn_queue_t kq) -{ - TAILQ_INIT(&kq->ksynq_kwelist); - kq->ksynq_count = 0; - kq->ksynq_firstnum = 0; - kq->ksynq_lastnum = 0; -} - -int -ksyn_queue_insert(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t mgen, struct uthread * uth, ksyn_waitq_element_t kwe, int fit) -{ - uint32_t lockseq = mgen & PTHRW_COUNT_MASK; - ksyn_waitq_element_t q_kwe, r_kwe; - int res = 0; - uthread_t nuth = NULL; - - if (kq->ksynq_count == 0) { - TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list); - kq->ksynq_firstnum = lockseq; - kq->ksynq_lastnum = lockseq; - goto out; - } - - if (fit == FIRSTFIT) { - /* TBD: if retry bit is set for mutex, add it to the head */ - /* firstfit, arriving order */ - TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list); - if (is_seqlower (lockseq, kq->ksynq_firstnum) != 0) - kq->ksynq_firstnum = lockseq; - if (is_seqhigher (lockseq, kq->ksynq_lastnum) != 0) - kq->ksynq_lastnum = lockseq; - goto out; - } - - if ((lockseq == kq->ksynq_firstnum) || (lockseq == kq->ksynq_lastnum)) { - /* During prepost when a thread is getting cancelled, we could have two with same seq */ - if (kwe->kwe_flags == KWE_THREAD_PREPOST) { - q_kwe = ksyn_queue_find_seq(kwq, kq, lockseq, 0); - if ((q_kwe != NULL) && ((nuth = (uthread_t)q_kwe->kwe_uth) != NULL) && - ((nuth->uu_flag & (UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) == UT_CANCEL)) { - TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list); - goto out; - - } else { - __FAILEDUSERTEST__("ksyn_queue_insert: two threads with same lockseq "); - res = EBUSY; - goto out1; - } - } else { - __FAILEDUSERTEST__("ksyn_queue_insert: two threads with same lockseq "); - res = EBUSY; - goto out1; - } - } - - /* check for next seq one */ - if (is_seqlower(kq->ksynq_lastnum, lockseq) != 0) { - TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list); - kq->ksynq_lastnum = lockseq; - goto out; - } - - if (is_seqlower(lockseq, kq->ksynq_firstnum) != 0) { - TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list); - kq->ksynq_firstnum = lockseq; - goto out; - } - - /* goto slow insert mode */ - TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) { - if (is_seqhigher(q_kwe->kwe_lockseq, lockseq) != 0) { - TAILQ_INSERT_BEFORE(q_kwe, kwe, kwe_list); - goto out; - } - } - -#if __TESTPANICS__ - panic("failed to insert \n"); -#endif /* __TESTPANICS__ */ - -out: - if (uth != NULL) - kwe->kwe_uth = uth; - kq->ksynq_count++; - kwq->kw_inqueue++; - update_low_high(kwq, lockseq); -out1: - return(res); -} - -ksyn_waitq_element_t -ksyn_queue_removefirst(ksyn_queue_t kq, ksyn_wait_queue_t kwq) -{ - ksyn_waitq_element_t kwe = NULL; - ksyn_waitq_element_t q_kwe; - uint32_t curseq; - - if (kq->ksynq_count != 0) { - kwe = TAILQ_FIRST(&kq->ksynq_kwelist); - TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list); - curseq = kwe->kwe_lockseq & PTHRW_COUNT_MASK; - kq->ksynq_count--; - kwq->kw_inqueue--; - - if(kq->ksynq_count != 0) { - q_kwe = TAILQ_FIRST(&kq->ksynq_kwelist); - kq->ksynq_firstnum = (q_kwe->kwe_lockseq & PTHRW_COUNT_MASK); - } else { - kq->ksynq_firstnum = 0; - kq->ksynq_lastnum = 0; - - } - if (kwq->kw_inqueue == 0) { - kwq->kw_lowseq = 0; - kwq->kw_highseq = 0; - } else { - if (kwq->kw_lowseq == curseq) - kwq->kw_lowseq = find_nextlowseq(kwq); - if (kwq->kw_highseq == curseq) - kwq->kw_highseq = find_nexthighseq(kwq); - } - } - return(kwe); -} - -void -ksyn_queue_removeitem(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe) -{ - ksyn_waitq_element_t q_kwe; - uint32_t curseq; - - if (kq->ksynq_count > 0) { - TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list); - kq->ksynq_count--; - if(kq->ksynq_count != 0) { - q_kwe = TAILQ_FIRST(&kq->ksynq_kwelist); - kq->ksynq_firstnum = (q_kwe->kwe_lockseq & PTHRW_COUNT_MASK); - q_kwe = TAILQ_LAST(&kq->ksynq_kwelist, ksynq_kwelist_head); - kq->ksynq_lastnum = (q_kwe->kwe_lockseq & PTHRW_COUNT_MASK); - } else { - kq->ksynq_firstnum = 0; - kq->ksynq_lastnum = 0; - - } - kwq->kw_inqueue--; - curseq = kwe->kwe_lockseq & PTHRW_COUNT_MASK; - if (kwq->kw_inqueue == 0) { - kwq->kw_lowseq = 0; - kwq->kw_highseq = 0; - } else { - if (kwq->kw_lowseq == curseq) - kwq->kw_lowseq = find_nextlowseq(kwq); - if (kwq->kw_highseq == curseq) - kwq->kw_highseq = find_nexthighseq(kwq); - } - } -} - -/* find the thread and removes from the queue */ -ksyn_waitq_element_t -ksyn_queue_find_seq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq, int remove) -{ - ksyn_waitq_element_t q_kwe, r_kwe; - - /* TBD: bail out if higher seq is seen */ - /* case where wrap in the tail of the queue exists */ - TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) { - if ((q_kwe->kwe_lockseq & PTHRW_COUNT_MASK) == seq) { - if (remove != 0) - ksyn_queue_removeitem(kwq, kq, q_kwe); - return(q_kwe); - } - } - return(NULL); -} - - -/* find the thread at the target sequence (or a broadcast/prepost at or above) */ -ksyn_waitq_element_t -ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen) -{ - ksyn_waitq_element_t q_kwe, r_kwe; - uint32_t lgen = (cgen & PTHRW_COUNT_MASK); - - /* case where wrap in the tail of the queue exists */ - TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) { - - /* skip the lower entries */ - if (is_seqlower((q_kwe->kwe_lockseq & PTHRW_COUNT_MASK), cgen) != 0) - continue; - - switch (q_kwe->kwe_flags) { - - case KWE_THREAD_INWAIT: - if ((q_kwe->kwe_lockseq & PTHRW_COUNT_MASK) != lgen) - break; - /* fall thru */ - - case KWE_THREAD_BROADCAST: - case KWE_THREAD_PREPOST: - return (q_kwe); - } - } - return(NULL); -} - -/* look for a thread at lockseq, a */ -ksyn_waitq_element_t -ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t uptoseq, uint32_t signalseq) -{ - ksyn_waitq_element_t q_kwe, r_kwe, t_kwe = NULL; - - /* case where wrap in the tail of the queue exists */ - TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) { - - switch (q_kwe->kwe_flags) { - - case KWE_THREAD_PREPOST: - if (is_seqhigher((q_kwe->kwe_lockseq & PTHRW_COUNT_MASK), uptoseq)) - return t_kwe; - /* fall thru */ - - case KWE_THREAD_BROADCAST: - /* match any prepost at our same uptoseq or any broadcast above */ - if (is_seqlower((q_kwe->kwe_lockseq & PTHRW_COUNT_MASK), uptoseq)) - continue; - return q_kwe; - - case KWE_THREAD_INWAIT: - /* - * Match any (non-cancelled) thread at or below our upto sequence - - * but prefer an exact match to our signal sequence (if present) to - * keep exact matches happening. - */ - if (is_seqhigher((q_kwe->kwe_lockseq & PTHRW_COUNT_MASK), uptoseq)) - return t_kwe; - - if (q_kwe->kwe_kwqqueue == kwq) { - uthread_t ut = q_kwe->kwe_uth; - if ((ut->uu_flag & ( UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) != UT_CANCEL) { - /* if equal or higher than our signal sequence, return this one */ - if (is_seqhigher_eq((q_kwe->kwe_lockseq & PTHRW_COUNT_MASK), signalseq)) - return q_kwe; - - /* otherwise, just remember this eligible thread and move on */ - if (t_kwe == NULL) - t_kwe = q_kwe; - } - } - break; - - default: - panic("ksyn_queue_find_signalseq(): unknow wait queue element type (%d)\n", q_kwe->kwe_flags); - break; - } - } - return t_kwe; -} - - -int -ksyn_queue_move_tofree(ksyn_wait_queue_t ckwq, ksyn_queue_t kq, uint32_t upto, ksyn_queue_t kfreeq, int all, int release) -{ - ksyn_waitq_element_t kwe; - int count = 0; - uint32_t tseq = upto & PTHRW_COUNT_MASK; -#if _PSYNCH_TRACE_ - uthread_t ut; -#endif /* _PSYNCH_TRACE_ */ - - ksyn_queue_init(kfreeq); - - /* free all the entries, must be only fakes.. */ - kwe = TAILQ_FIRST(&kq->ksynq_kwelist); - while (kwe != NULL) { - if ((all == 0) && (is_seqhigher((kwe->kwe_lockseq & PTHRW_COUNT_MASK), tseq) != 0)) - break; - if (kwe->kwe_flags == KWE_THREAD_INWAIT) { - /* - * This scenario is typically noticed when the cvar is - * reinited and the new waiters are waiting. We can - * return them as spurious wait so the cvar state gets - * reset correctly. - */ -#if _PSYNCH_TRACE_ - ut = (uthread_t)kwe->kwe_uth; -#endif /* _PSYNCH_TRACE_ */ - - /* skip canceled ones */ - /* wake the rest */ - ksyn_queue_removeitem(ckwq, kq, kwe); - /* set M bit to indicate to waking CV to retun Inc val */ - kwe->kwe_psynchretval = PTHRW_INC | (PTH_RWS_CV_MBIT | PTH_RWL_MTX_WAIT); - kwe->kwe_kwqqueue = NULL; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVHBROAD | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xcafecaf3, (uint32_t)(thread_tid((struct thread *)(((struct uthread *)(kwe->kwe_uth))->uu_context.vc_thread))), kwe->kwe_psynchretval, 0); -#endif /* _PSYNCH_TRACE_ */ - (void)ksyn_wakeup_thread(ckwq, kwe); - } else { - ksyn_queue_removeitem(ckwq, kq, kwe); - TAILQ_INSERT_TAIL(&kfreeq->ksynq_kwelist, kwe, kwe_list); - ckwq->kw_fakecount--; - count++; - } - kwe = TAILQ_FIRST(&kq->ksynq_kwelist); - } - - if ((release != 0) && (count != 0)) { - kwe = TAILQ_FIRST(&kfreeq->ksynq_kwelist); - while (kwe != NULL) { - TAILQ_REMOVE(&kfreeq->ksynq_kwelist, kwe, kwe_list); - zfree(kwe_zone, kwe); - kwe = TAILQ_FIRST(&kfreeq->ksynq_kwelist); - } - } - - return(count); -} - -/*************************************************************************/ - -void -update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq) -{ - if (kwq->kw_inqueue == 1) { - kwq->kw_lowseq = lockseq; - kwq->kw_highseq = lockseq; - } else { - if (is_seqlower(lockseq, kwq->kw_lowseq) != 0) - kwq->kw_lowseq = lockseq; - if (is_seqhigher(lockseq, kwq->kw_highseq) != 0) - kwq->kw_highseq = lockseq; - } -} - -uint32_t -find_nextlowseq(ksyn_wait_queue_t kwq) -{ - uint32_t numbers[KSYN_QUEUE_MAX]; - int count = 0, i; - uint32_t lowest; - - for(i = 0; i< KSYN_QUEUE_MAX; i++) { - if (kwq->kw_ksynqueues[i].ksynq_count != 0) { - numbers[count]= kwq->kw_ksynqueues[i].ksynq_firstnum; - count++; - } - } - - if (count == 0) - return(0); - lowest = numbers[0]; - if (count > 1) { - for (i = 1; i< count; i++) { - if(is_seqlower(numbers[i] , lowest) != 0) - lowest = numbers[count]; - - } - } - return(lowest); -} - -uint32_t -find_nexthighseq(ksyn_wait_queue_t kwq) -{ - uint32_t numbers[KSYN_QUEUE_MAX]; - int count = 0, i; - uint32_t highest; - - for(i = 0; i< KSYN_QUEUE_MAX; i++) { - if (kwq->kw_ksynqueues[i].ksynq_count != 0) { - numbers[count]= kwq->kw_ksynqueues[i].ksynq_lastnum; - count++; - } - } - - - - if (count == 0) - return(0); - highest = numbers[0]; - if (count > 1) { - for (i = 1; i< count; i++) { - if(is_seqhigher(numbers[i], highest) != 0) - highest = numbers[i]; - - } - } - return(highest); -} - -int -is_seqlower(uint32_t x, uint32_t y) -{ - if (x < y) { - if ((y-x) < (PTHRW_MAX_READERS/2)) - return(1); - } else { - if ((x-y) > (PTHRW_MAX_READERS/2)) - return(1); - } - return(0); -} - -int -is_seqlower_eq(uint32_t x, uint32_t y) -{ - if (x==y) - return(1); - else - return(is_seqlower(x,y)); -} - -int -is_seqhigher(uint32_t x, uint32_t y) -{ - if (x > y) { - if ((x-y) < (PTHRW_MAX_READERS/2)) - return(1); - } else { - if ((y-x) > (PTHRW_MAX_READERS/2)) - return(1); - } - return(0); -} - -int -is_seqhigher_eq(uint32_t x, uint32_t y) -{ - if (x==y) - return(1); - else - return(is_seqhigher(x,y)); -} - - -int -find_diff(uint32_t upto, uint32_t lowest) -{ - uint32_t diff; - - if (upto == lowest) - return(0); -#if 0 - diff = diff_genseq(upto, lowest); -#else - if (is_seqlower(upto, lowest) != 0) - diff = diff_genseq(lowest, upto); - else - diff = diff_genseq(upto, lowest); -#endif - diff = (diff >> PTHRW_COUNT_SHIFT); - return(diff); -} - - -int -find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp) -{ - int i; - uint32_t count = 0; - - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_FSEQTILL | DBG_FUNC_START, 0, 0, upto, nwaiters, 0); -#endif /* _PSYNCH_TRACE_ */ - - for (i= 0; i< KSYN_QUEUE_MAX; i++) { - count += ksyn_queue_count_tolowest(&kwq->kw_ksynqueues[i], upto); -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_FSEQTILL | DBG_FUNC_NONE, 0, 1, i, count, 0); -#endif /* _PSYNCH_TRACE_ */ - if (count >= nwaiters) { - break; - } - } - - if (countp != NULL) { - *countp = count; - } -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_FSEQTILL | DBG_FUNC_END, 0, 0, count, nwaiters, 0); -#endif /* _PSYNCH_TRACE_ */ - if (count == 0) - return(0); - else if (count >= nwaiters) - return(1); - else - return(0); -} - - -uint32_t -ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto) -{ - uint32_t i = 0; - ksyn_waitq_element_t kwe, newkwe; - uint32_t curval; - - /* if nothing or the first num is greater than upto, return none */ - if ((kq->ksynq_count == 0) || (is_seqhigher(kq->ksynq_firstnum, upto) != 0)) - return(0); - if (upto == kq->ksynq_firstnum) - return(1); - - TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) { - curval = (kwe->kwe_lockseq & PTHRW_COUNT_MASK); - if (upto == curval) { - i++; - break; - } else if (is_seqhigher(curval, upto) != 0) { - break; - } else { - /* seq is lower */ - i++; - } - } - return(i); -} - - -/* handles the cond broadcast of cvar and returns number of woken threads and bits for syscall return */ -void -ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t * updatep) -{ - kern_return_t kret; - ksyn_queue_t kq; - ksyn_waitq_element_t kwe, newkwe; - uint32_t updatebits = 0; - struct ksyn_queue kfreeq; - uthread_t ut; - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVHBROAD | DBG_FUNC_START, 0xcbcbcbc2, upto, 0, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - - ksyn_queue_init(&kfreeq); - kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER]; - - retry: - TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) { - - if (is_seqhigher((kwe->kwe_lockseq & PTHRW_COUNT_MASK), upto)) /* outside our range */ - break; - - /* now handle the one we found (inside the range) */ - switch (kwe->kwe_flags) { - - case KWE_THREAD_INWAIT: - ut = (uthread_t)kwe->kwe_uth; - - /* skip canceled ones */ - if (kwe->kwe_kwqqueue != ckwq || - (ut->uu_flag & (UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) == UT_CANCEL) - break; - - /* wake the rest */ - ksyn_queue_removeitem(ckwq, kq, kwe); - kwe->kwe_psynchretval = PTH_RWL_MTX_WAIT; - kwe->kwe_kwqqueue = NULL; -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVHBROAD | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xcafecaf2, (uint32_t)(thread_tid((struct thread *)(((struct uthread *)(kwe->kwe_uth))->uu_context.vc_thread))), kwe->kwe_psynchretval, 0); -#endif /* _PSYNCH_TRACE_ */ - kret = ksyn_wakeup_thread(ckwq, kwe); -#if __TESTPANICS__ - if ((kret != KERN_SUCCESS) && (kret != KERN_NOT_WAITING)) - panic("ksyn_wakeupreaders: panic waking up readers\n"); -#endif /* __TESTPANICS__ */ - updatebits += PTHRW_INC; - break; - - case KWE_THREAD_BROADCAST: - case KWE_THREAD_PREPOST: - ksyn_queue_removeitem(ckwq, kq, kwe); - TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, kwe, kwe_list); - ckwq->kw_fakecount--; - break; - - default: - panic("unknown kweflags\n"); - break; - } - } - - /* Need to enter a broadcast in the queue (if not already at L == S) */ - - if ((ckwq->kw_lword & PTHRW_COUNT_MASK) != (ckwq->kw_sword & PTHRW_COUNT_MASK)) { - - newkwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist); - if (newkwe == NULL) { - ksyn_wqunlock(ckwq); - newkwe = (ksyn_waitq_element_t)zalloc(kwe_zone); - TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, newkwe, kwe_list); - ksyn_wqlock(ckwq); - goto retry; - } - - TAILQ_REMOVE(&kfreeq.ksynq_kwelist, newkwe, kwe_list); - bzero(newkwe, sizeof(struct ksyn_waitq_element)); - newkwe->kwe_kwqqueue = ckwq; - newkwe->kwe_flags = KWE_THREAD_BROADCAST; - newkwe->kwe_lockseq = upto; - newkwe->kwe_count = 0; - newkwe->kwe_uth = NULL; - newkwe->kwe_psynchretval = 0; - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVHBROAD | DBG_FUNC_NONE, (uint32_t)ckwq->kw_addr, 0xfeedfeed, upto, 0, 0); -#endif /* _PSYNCH_TRACE_ */ - - (void)ksyn_queue_insert(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], upto, NULL, newkwe, SEQFIT); - ckwq->kw_fakecount++; - } - - /* free up any remaining things stumbled across above */ - kwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist); - while (kwe != NULL) { - TAILQ_REMOVE(&kfreeq.ksynq_kwelist, kwe, kwe_list); - zfree(kwe_zone, kwe); - kwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist); - } - - if (updatep != NULL) - *updatep = updatebits; - -#if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVHBROAD | DBG_FUNC_END, 0xeeeeeeed, updatebits, 0, 0, 0); -#endif /* _PSYNCH_TRACE_ */ -} - -void -ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep, ksyn_queue_t kfreeq, int release) -{ - uint32_t updatebits = 0; - - if (updatep != NULL) - updatebits = *updatep; - if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) { - updatebits |= PTH_RWS_CV_CBIT; - if (ckwq->kw_inqueue != 0) { - /* FREE THE QUEUE */ - ksyn_queue_move_tofree(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], ckwq->kw_lword, kfreeq, 0, release); -#if __TESTPANICS__ - if (ckwq->kw_inqueue != 0) - panic("ksyn_cvupdate_fixup: L == S, but entries in queue beyond S"); -#endif /* __TESTPANICS__ */ - } - ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0; - ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT; - } else if ((ckwq->kw_inqueue != 0) && (ckwq->kw_fakecount == ckwq->kw_inqueue)) { - /* only fake entries are present in the queue */ - updatebits |= PTH_RWS_CV_PBIT; - } - if (updatep != NULL) - *updatep = updatebits; -} - -void -psynch_zoneinit(void) -{ - kwq_zone = (zone_t)zinit(sizeof(struct ksyn_wait_queue), 8192 * sizeof(struct ksyn_wait_queue), 4096, "ksyn_waitqueue zone"); - kwe_zone = (zone_t)zinit(sizeof(struct ksyn_waitq_element), 8192 * sizeof(struct ksyn_waitq_element), 4096, "ksyn_waitq_element zone"); -} -#endif /* PSYNCH */ diff --git a/bsd/kern/pthread_synch.c b/bsd/kern/pthread_synch.c deleted file mode 100644 index d037ee0a1..000000000 --- a/bsd/kern/pthread_synch.c +++ /dev/null @@ -1,2416 +0,0 @@ -/* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */ -/* - * pthread_synch.c - */ - -#define _PTHREAD_CONDATTR_T -#define _PTHREAD_COND_T -#define _PTHREAD_MUTEXATTR_T -#define _PTHREAD_MUTEX_T -#define _PTHREAD_RWLOCKATTR_T -#define _PTHREAD_RWLOCK_T - -#undef pthread_mutexattr_t -#undef pthread_mutex_t -#undef pthread_condattr_t -#undef pthread_cond_t -#undef pthread_rwlockattr_t -#undef pthread_rwlock_t - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* for coredump */ -#include /* for fill_procworkqueue */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* for thread_exception_return */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* for current_map() */ -#include -#include /* for thread_resume */ -#include -#if defined(__i386__) -#include -#include -#include -#include -#endif - -#include - -#if 0 -#undef KERNEL_DEBUG -#define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT -#undef KERNEL_DEBUG1 -#define KERNEL_DEBUG1 KERNEL_DEBUG_CONSTANT1 -#endif - -lck_grp_attr_t *pthread_lck_grp_attr; -lck_grp_t *pthread_lck_grp; -lck_attr_t *pthread_lck_attr; - -extern void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64); -extern kern_return_t mach_port_deallocate(ipc_space_t, mach_port_name_t); -extern kern_return_t semaphore_signal_internal_trap(mach_port_name_t); - -extern void workqueue_thread_yielded(void); - -#if defined(__i386__) || defined(__x86_64__) -extern boolean_t is_useraddr64_canonical(uint64_t addr64); -#endif - -static boolean_t workqueue_run_nextreq(proc_t p, struct workqueue *wq, thread_t th, boolean_t force_oc, - boolean_t overcommit, int oc_prio, int oc_affinity); - -static boolean_t workqueue_run_one(proc_t p, struct workqueue *wq, boolean_t overcommit, int priority); - -static void wq_runreq(proc_t p, boolean_t overcommit, uint32_t priority, thread_t th, struct threadlist *tl, - int reuse_thread, int wake_thread, int return_directly); - -static int setup_wqthread(proc_t p, thread_t th, boolean_t overcommit, uint32_t priority, int reuse_thread, struct threadlist *tl); - -static void wq_unpark_continue(void); -static void wq_unsuspend_continue(void); - -static boolean_t workqueue_addnewthread(struct workqueue *wq, boolean_t oc_thread); -static void workqueue_removethread(struct threadlist *tl, int fromexit); -static void workqueue_lock_spin(proc_t); -static void workqueue_unlock(proc_t); - -int proc_settargetconc(pid_t pid, int queuenum, int32_t targetconc); -int proc_setalltargetconc(pid_t pid, int32_t * targetconcp); - -#define WQ_MAXPRI_MIN 0 /* low prio queue num */ -#define WQ_MAXPRI_MAX 2 /* max prio queuenum */ -#define WQ_PRI_NUM 3 /* number of prio work queues */ - -#define C_32_STK_ALIGN 16 -#define C_64_STK_ALIGN 16 -#define C_64_REDZONE_LEN 128 -#define TRUNC_DOWN32(a,c) ((((uint32_t)a)-(c)) & ((uint32_t)(-(c)))) -#define TRUNC_DOWN64(a,c) ((((uint64_t)a)-(c)) & ((uint64_t)(-(c)))) - - -/* flag values for reuse field in the libc side _pthread_wqthread */ -#define WQ_FLAG_THREAD_PRIOMASK 0x0000ffff -#define WQ_FLAG_THREAD_OVERCOMMIT 0x00010000 /* thread is with overcommit prio */ -#define WQ_FLAG_THREAD_REUSE 0x00020000 /* thread is being reused */ -#define WQ_FLAG_THREAD_NEWSPI 0x00040000 /* the call is with new SPIs */ - -/* - * Flags filed passed to bsdthread_create and back in pthread_start -31 <---------------------------------> 0 -_________________________________________ -| flags(8) | policy(8) | importance(16) | ------------------------------------------ -*/ -void _pthread_start(pthread_t self, mach_port_t kport, void *(*fun)(void *), void * funarg, size_t stacksize, unsigned int flags); - -#define PTHREAD_START_CUSTOM 0x01000000 -#define PTHREAD_START_SETSCHED 0x02000000 -#define PTHREAD_START_DETACHED 0x04000000 -#define PTHREAD_START_POLICY_BITSHIFT 16 -#define PTHREAD_START_POLICY_MASK 0xff -#define PTHREAD_START_IMPORTANCE_MASK 0xffff - -#define SCHED_OTHER POLICY_TIMESHARE -#define SCHED_FIFO POLICY_FIFO -#define SCHED_RR POLICY_RR - - - -int -bsdthread_create(__unused struct proc *p, struct bsdthread_create_args *uap, user_addr_t *retval) -{ - kern_return_t kret; - void * sright; - int error = 0; - int allocated = 0; - mach_vm_offset_t stackaddr; - mach_vm_size_t th_allocsize = 0; - mach_vm_size_t user_stacksize; - mach_vm_size_t th_stacksize; - mach_vm_offset_t th_stackaddr; - mach_vm_offset_t th_stack; - mach_vm_offset_t th_pthread; - mach_port_name_t th_thport; - thread_t th; - user_addr_t user_func = uap->func; - user_addr_t user_funcarg = uap->func_arg; - user_addr_t user_stack = uap->stack; - user_addr_t user_pthread = uap->pthread; - unsigned int flags = (unsigned int)uap->flags; - vm_map_t vmap = current_map(); - task_t ctask = current_task(); - unsigned int policy, importance; - - int isLP64 = 0; - - - if ((p->p_lflag & P_LREGISTER) == 0) - return(EINVAL); -#if 0 - KERNEL_DEBUG_CONSTANT(0x9000080 | DBG_FUNC_START, flags, 0, 0, 0, 0); -#endif - - isLP64 = IS_64BIT_PROCESS(p); - - -#if defined(__i386__) || defined(__x86_64__) - stackaddr = 0xB0000000; -#else -#error Need to define a stack address hint for this architecture -#endif - kret = thread_create(ctask, &th); - if (kret != KERN_SUCCESS) - return(ENOMEM); - thread_reference(th); - - sright = (void *) convert_thread_to_port(th); - th_thport = ipc_port_copyout_send(sright, get_task_ipcspace(ctask)); - - if ((flags & PTHREAD_START_CUSTOM) == 0) { - th_stacksize = (mach_vm_size_t)user_stack; /* if it is custom them it is stacksize */ - th_allocsize = th_stacksize + PTH_DEFAULT_GUARDSIZE + p->p_pthsize; - - kret = mach_vm_map(vmap, &stackaddr, - th_allocsize, - page_size-1, - VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL, - 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, - VM_INHERIT_DEFAULT); - if (kret != KERN_SUCCESS) - kret = mach_vm_allocate(vmap, - &stackaddr, th_allocsize, - VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE); - if (kret != KERN_SUCCESS) { - error = ENOMEM; - goto out; - } -#if 0 - KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_NONE, th_allocsize, stackaddr, 0, 2, 0); -#endif - th_stackaddr = stackaddr; - allocated = 1; - /* - * The guard page is at the lowest address - * The stack base is the highest address - */ - kret = mach_vm_protect(vmap, stackaddr, PTH_DEFAULT_GUARDSIZE, FALSE, VM_PROT_NONE); - - if (kret != KERN_SUCCESS) { - error = ENOMEM; - goto out1; - } - th_stack = (stackaddr + th_stacksize + PTH_DEFAULT_GUARDSIZE); - th_pthread = (stackaddr + th_stacksize + PTH_DEFAULT_GUARDSIZE); - user_stacksize = th_stacksize; - - /* - * Pre-fault the first page of the new thread's stack and the page that will - * contain the pthread_t structure. - */ - vm_fault( vmap, - vm_map_trunc_page(th_stack - PAGE_SIZE_64), - VM_PROT_READ | VM_PROT_WRITE, - FALSE, - THREAD_UNINT, NULL, 0); - - vm_fault( vmap, - vm_map_trunc_page(th_pthread), - VM_PROT_READ | VM_PROT_WRITE, - FALSE, - THREAD_UNINT, NULL, 0); - } else { - th_stack = user_stack; - user_stacksize = user_stack; - th_pthread = user_pthread; -#if 0 - KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_NONE, 0, 0, 0, 3, 0); -#endif - } - -#if defined(__i386__) || defined(__x86_64__) - { - /* - * Set up i386 registers & function call. - */ - if (isLP64 == 0) { - x86_thread_state32_t state; - x86_thread_state32_t *ts = &state; - - ts->eip = (int)p->p_threadstart; - ts->eax = (unsigned int)th_pthread; - ts->ebx = (unsigned int)th_thport; - ts->ecx = (unsigned int)user_func; - ts->edx = (unsigned int)user_funcarg; - ts->edi = (unsigned int)user_stacksize; - ts->esi = (unsigned int)uap->flags; - /* - * set stack pointer - */ - ts->esp = (int)((vm_offset_t)(th_stack-C_32_STK_ALIGN)); - - thread_set_wq_state32(th, (thread_state_t)ts); - - } else { - x86_thread_state64_t state64; - x86_thread_state64_t *ts64 = &state64; - - ts64->rip = (uint64_t)p->p_threadstart; - ts64->rdi = (uint64_t)th_pthread; - ts64->rsi = (uint64_t)(th_thport); - ts64->rdx = (uint64_t)user_func; - ts64->rcx = (uint64_t)user_funcarg; - ts64->r8 = (uint64_t)user_stacksize; - ts64->r9 = (uint64_t)uap->flags; - /* - * set stack pointer aligned to 16 byte boundary - */ - ts64->rsp = (uint64_t)(th_stack - C_64_REDZONE_LEN); - - /* Disallow setting non-canonical PC or stack */ - if (!is_useraddr64_canonical(ts64->rsp) || - !is_useraddr64_canonical(ts64->rip)) { - error = EINVAL; - goto out; - } - - thread_set_wq_state64(th, (thread_state_t)ts64); - } - } -#else -#error bsdthread_create not defined for this architecture -#endif - /* Set scheduling parameters if needed */ - if ((flags & PTHREAD_START_SETSCHED) != 0) { - thread_extended_policy_data_t extinfo; - thread_precedence_policy_data_t precedinfo; -#if CONFIG_EMBEDDED - int ret = 0; -#endif /* CONFIG_EMBEDDED */ - - importance = (flags & PTHREAD_START_IMPORTANCE_MASK); -#if CONFIG_EMBEDDED - /* sets the saved importance for apple ios daemon if backgrounded. else returns 0 */ - ret = proc_setthread_saved_importance(th, importance); - if (ret == 0) { -#endif /* CONFIG_EMBEDDED */ - policy = (flags >> PTHREAD_START_POLICY_BITSHIFT) & PTHREAD_START_POLICY_MASK; - - if (policy == SCHED_OTHER) - extinfo.timeshare = 1; - else - extinfo.timeshare = 0; - thread_policy_set(th, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT); - -#define BASEPRI_DEFAULT 31 - precedinfo.importance = (importance - BASEPRI_DEFAULT); - thread_policy_set(th, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT); -#if CONFIG_EMBEDDED - } -#endif /* CONFIG_EMBEDDED */ - } - - kret = thread_resume(th); - if (kret != KERN_SUCCESS) { - error = EINVAL; - goto out1; - } - thread_deallocate(th); /* drop the creator reference */ -#if 0 - KERNEL_DEBUG_CONSTANT(0x9000080 |DBG_FUNC_END, error, th_pthread, 0, 0, 0); -#endif - *retval = th_pthread; - - return(0); - -out1: - if (allocated != 0) - (void)mach_vm_deallocate(vmap, stackaddr, th_allocsize); -out: - (void)mach_port_deallocate(get_task_ipcspace(ctask), th_thport); - (void)thread_terminate(th); - (void)thread_deallocate(th); - return(error); -} - -int -bsdthread_terminate(__unused struct proc *p, struct bsdthread_terminate_args *uap, __unused int32_t *retval) -{ - mach_vm_offset_t freeaddr; - mach_vm_size_t freesize; - kern_return_t kret; - mach_port_name_t kthport = (mach_port_name_t)uap->port; - mach_port_name_t sem = (mach_port_name_t)uap->sem; - - freeaddr = (mach_vm_offset_t)uap->stackaddr; - freesize = uap->freesize; - -#if 0 - KERNEL_DEBUG_CONSTANT(0x9000084 |DBG_FUNC_START, freeaddr, freesize, kthport, 0xff, 0); -#endif - if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) { - kret = mach_vm_deallocate(current_map(), freeaddr, freesize); - if (kret != KERN_SUCCESS) { - return(EINVAL); - } - } - - (void) thread_terminate(current_thread()); - if (sem != MACH_PORT_NULL) { - kret = semaphore_signal_internal_trap(sem); - if (kret != KERN_SUCCESS) { - return(EINVAL); - } - } - - if (kthport != MACH_PORT_NULL) - mach_port_deallocate(get_task_ipcspace(current_task()), kthport); - thread_exception_return(); - panic("bsdthread_terminate: still running\n"); -#if 0 - KERNEL_DEBUG_CONSTANT(0x9000084 |DBG_FUNC_END, 0, 0, 0, 0xff, 0); -#endif - return(0); -} - - -int -bsdthread_register(struct proc *p, struct bsdthread_register_args *uap, __unused int32_t *retval) -{ - /* prevent multiple registrations */ - if ((p->p_lflag & P_LREGISTER) != 0) - return(EINVAL); - /* syscall randomizer test can pass bogus values */ - if (uap->pthsize > MAX_PTHREAD_SIZE) { - return(EINVAL); - } - p->p_threadstart = uap->threadstart; - p->p_wqthread = uap->wqthread; - p->p_pthsize = uap->pthsize; - p->p_targconc = uap->targetconc_ptr; - p->p_dispatchqueue_offset = uap->dispatchqueue_offset; - proc_setregister(p); - - return(0); -} - -uint32_t wq_yielded_threshold = WQ_YIELDED_THRESHOLD; -uint32_t wq_yielded_window_usecs = WQ_YIELDED_WINDOW_USECS; -uint32_t wq_stalled_window_usecs = WQ_STALLED_WINDOW_USECS; -uint32_t wq_reduce_pool_window_usecs = WQ_REDUCE_POOL_WINDOW_USECS; -uint32_t wq_max_timer_interval_usecs = WQ_MAX_TIMER_INTERVAL_USECS; -uint32_t wq_max_threads = WORKQUEUE_MAXTHREADS; -uint32_t wq_max_constrained_threads = WORKQUEUE_MAXTHREADS / 8; - - -SYSCTL_INT(_kern, OID_AUTO, wq_yielded_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, - &wq_yielded_threshold, 0, ""); - -SYSCTL_INT(_kern, OID_AUTO, wq_yielded_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED, - &wq_yielded_window_usecs, 0, ""); - -SYSCTL_INT(_kern, OID_AUTO, wq_stalled_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED, - &wq_stalled_window_usecs, 0, ""); - -SYSCTL_INT(_kern, OID_AUTO, wq_reduce_pool_window_usecs, CTLFLAG_RW | CTLFLAG_LOCKED, - &wq_reduce_pool_window_usecs, 0, ""); - -SYSCTL_INT(_kern, OID_AUTO, wq_max_timer_interval_usecs, CTLFLAG_RW | CTLFLAG_LOCKED, - &wq_max_timer_interval_usecs, 0, ""); - -SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW | CTLFLAG_LOCKED, - &wq_max_threads, 0, ""); - -SYSCTL_INT(_kern, OID_AUTO, wq_max_constrained_threads, CTLFLAG_RW | CTLFLAG_LOCKED, - &wq_max_constrained_threads, 0, ""); - - -static uint32_t wq_init_constrained_limit = 1; - - -void -workqueue_init_lock(proc_t p) -{ - lck_spin_init(&p->p_wqlock, pthread_lck_grp, pthread_lck_attr); - - p->p_wqiniting = FALSE; -} - -void -workqueue_destroy_lock(proc_t p) -{ - lck_spin_destroy(&p->p_wqlock, pthread_lck_grp); -} - - -static void -workqueue_lock_spin(proc_t p) -{ - lck_spin_lock(&p->p_wqlock); -} - -static void -workqueue_unlock(proc_t p) -{ - lck_spin_unlock(&p->p_wqlock); -} - - -static void -workqueue_interval_timer_start(struct workqueue *wq) -{ - uint64_t deadline; - - if (wq->wq_timer_interval == 0) - wq->wq_timer_interval = wq_stalled_window_usecs; - else { - wq->wq_timer_interval = wq->wq_timer_interval * 2; - - if (wq->wq_timer_interval > wq_max_timer_interval_usecs) - wq->wq_timer_interval = wq_max_timer_interval_usecs; - } - clock_interval_to_deadline(wq->wq_timer_interval, 1000, &deadline); - - thread_call_enter_delayed(wq->wq_atimer_call, deadline); - - KERNEL_DEBUG(0xefffd110, wq, wq->wq_reqcount, wq->wq_flags, wq->wq_timer_interval, 0); -} - - -static boolean_t -wq_thread_is_busy(uint64_t cur_ts, uint64_t *lastblocked_tsp) -{ clock_sec_t secs; - clock_usec_t usecs; - uint64_t lastblocked_ts; - uint64_t elapsed; - - /* - * the timestamp is updated atomically w/o holding the workqueue lock - * so we need to do an atomic read of the 64 bits so that we don't see - * a mismatched pair of 32 bit reads... we accomplish this in an architecturally - * independent fashion by using OSCompareAndSwap64 to write back the - * value we grabbed... if it succeeds, then we have a good timestamp to - * evaluate... if it fails, we straddled grabbing the timestamp while it - * was being updated... treat a failed update as a busy thread since - * it implies we are about to see a really fresh timestamp anyway - */ - lastblocked_ts = *lastblocked_tsp; - - if ( !OSCompareAndSwap64((UInt64)lastblocked_ts, (UInt64)lastblocked_ts, lastblocked_tsp)) - return (TRUE); - - if (lastblocked_ts >= cur_ts) { - /* - * because the update of the timestamp when a thread blocks isn't - * serialized against us looking at it (i.e. we don't hold the workq lock) - * it's possible to have a timestamp that matches the current time or - * that even looks to be in the future relative to when we grabbed the current - * time... just treat this as a busy thread since it must have just blocked. - */ - return (TRUE); - } - elapsed = cur_ts - lastblocked_ts; - - absolutetime_to_microtime(elapsed, &secs, &usecs); - - if (secs == 0 && usecs < wq_stalled_window_usecs) - return (TRUE); - return (FALSE); -} - - -#define WQ_TIMER_NEEDED(wq, start_timer) do { \ - int oldflags = wq->wq_flags; \ - \ - if ( !(oldflags & (WQ_EXITING | WQ_ATIMER_RUNNING))) { \ - if (OSCompareAndSwap(oldflags, oldflags | WQ_ATIMER_RUNNING, (UInt32 *)&wq->wq_flags)) \ - start_timer = TRUE; \ - } \ -} while (0) - - - -static void -workqueue_add_timer(struct workqueue *wq, __unused int param1) -{ - proc_t p; - boolean_t start_timer = FALSE; - boolean_t retval; - boolean_t add_thread; - uint32_t busycount; - - KERNEL_DEBUG(0xefffd108 | DBG_FUNC_START, wq, wq->wq_flags, wq->wq_nthreads, wq->wq_thidlecount, 0); - - p = wq->wq_proc; - - workqueue_lock_spin(p); - - /* - * because workqueue_callback now runs w/o taking the workqueue lock - * we are unsynchronized w/r to a change in state of the running threads... - * to make sure we always evaluate that change, we allow it to start up - * a new timer if the current one is actively evalutating the state - * however, we do not need more than 2 timers fired up (1 active and 1 pending) - * and we certainly do not want 2 active timers evaluating the state - * simultaneously... so use WQL_ATIMER_BUSY to serialize the timers... - * note that WQL_ATIMER_BUSY is in a different flag word from WQ_ATIMER_RUNNING since - * it is always protected by the workq lock... WQ_ATIMER_RUNNING is evaluated - * and set atomimcally since the callback function needs to manipulate it - * w/o holding the workq lock... - * - * !WQ_ATIMER_RUNNING && !WQL_ATIMER_BUSY == no pending timer, no active timer - * !WQ_ATIMER_RUNNING && WQL_ATIMER_BUSY == no pending timer, 1 active timer - * WQ_ATIMER_RUNNING && !WQL_ATIMER_BUSY == 1 pending timer, no active timer - * WQ_ATIMER_RUNNING && WQL_ATIMER_BUSY == 1 pending timer, 1 active timer - */ - while (wq->wq_lflags & WQL_ATIMER_BUSY) { - wq->wq_lflags |= WQL_ATIMER_WAITING; - - assert_wait((caddr_t)wq, (THREAD_UNINT)); - workqueue_unlock(p); - - thread_block(THREAD_CONTINUE_NULL); - - workqueue_lock_spin(p); - } - wq->wq_lflags |= WQL_ATIMER_BUSY; - - /* - * the workq lock will protect us from seeing WQ_EXITING change state, but we - * still need to update this atomically in case someone else tries to start - * the timer just as we're releasing it - */ - while ( !(OSCompareAndSwap(wq->wq_flags, (wq->wq_flags & ~WQ_ATIMER_RUNNING), (UInt32 *)&wq->wq_flags))); - -again: - retval = TRUE; - add_thread = FALSE; - - if ( !(wq->wq_flags & WQ_EXITING)) { - /* - * check to see if the stall frequency was beyond our tolerance - * or we have work on the queue, but haven't scheduled any - * new work within our acceptable time interval because - * there were no idle threads left to schedule - */ - if (wq->wq_reqcount) { - uint32_t priority; - uint32_t affinity_tag; - uint32_t i; - uint64_t curtime; - - for (priority = 0; priority < WORKQUEUE_NUMPRIOS; priority++) { - if (wq->wq_requests[priority]) - break; - } - assert(priority < WORKQUEUE_NUMPRIOS); - - curtime = mach_absolute_time(); - busycount = 0; - - for (affinity_tag = 0; affinity_tag < wq->wq_reqconc[priority]; affinity_tag++) { - /* - * if we have no idle threads, we can try to add them if needed - */ - if (wq->wq_thidlecount == 0) - add_thread = TRUE; - - /* - * look for first affinity group that is currently not active - * i.e. no active threads at this priority level or higher - * and has not been active recently at this priority level or higher - */ - for (i = 0; i <= priority; i++) { - if (wq->wq_thactive_count[i][affinity_tag]) { - add_thread = FALSE; - break; - } - if (wq->wq_thscheduled_count[i][affinity_tag]) { - if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i][affinity_tag])) { - add_thread = FALSE; - busycount++; - break; - } - } - } - if (add_thread == TRUE) { - retval = workqueue_addnewthread(wq, FALSE); - break; - } - } - if (wq->wq_reqcount) { - /* - * as long as we have threads to schedule, and we successfully - * scheduled new work, keep trying - */ - while (wq->wq_thidlecount && !(wq->wq_flags & WQ_EXITING)) { - /* - * workqueue_run_nextreq is responsible for - * dropping the workqueue lock in all cases - */ - retval = workqueue_run_nextreq(p, wq, THREAD_NULL, FALSE, FALSE, 0, 0); - workqueue_lock_spin(p); - - if (retval == FALSE) - break; - } - if ( !(wq->wq_flags & WQ_EXITING) && wq->wq_reqcount) { - - if (wq->wq_thidlecount == 0 && retval == TRUE && add_thread == TRUE) - goto again; - - if (wq->wq_thidlecount == 0 || busycount) - WQ_TIMER_NEEDED(wq, start_timer); - - KERNEL_DEBUG(0xefffd108 | DBG_FUNC_NONE, wq, wq->wq_reqcount, wq->wq_thidlecount, busycount, 0); - } - } - } - } - if ( !(wq->wq_flags & WQ_ATIMER_RUNNING)) - wq->wq_timer_interval = 0; - - wq->wq_lflags &= ~WQL_ATIMER_BUSY; - - if ((wq->wq_flags & WQ_EXITING) || (wq->wq_lflags & WQL_ATIMER_WAITING)) { - /* - * wakeup the thread hung up in workqueue_exit or workqueue_add_timer waiting for this timer - * to finish getting out of the way - */ - wq->wq_lflags &= ~WQL_ATIMER_WAITING; - wakeup(wq); - } - KERNEL_DEBUG(0xefffd108 | DBG_FUNC_END, wq, start_timer, wq->wq_nthreads, wq->wq_thidlecount, 0); - - workqueue_unlock(p); - - if (start_timer == TRUE) - workqueue_interval_timer_start(wq); -} - - -void -workqueue_thread_yielded(void) -{ - struct workqueue *wq; - proc_t p; - - p = current_proc(); - - if ((wq = p->p_wqptr) == NULL || wq->wq_reqcount == 0) - return; - - workqueue_lock_spin(p); - - if (wq->wq_reqcount) { - uint64_t curtime; - uint64_t elapsed; - clock_sec_t secs; - clock_usec_t usecs; - - if (wq->wq_thread_yielded_count++ == 0) - wq->wq_thread_yielded_timestamp = mach_absolute_time(); - - if (wq->wq_thread_yielded_count < wq_yielded_threshold) { - workqueue_unlock(p); - return; - } - KERNEL_DEBUG(0xefffd138 | DBG_FUNC_START, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 0, 0); - - wq->wq_thread_yielded_count = 0; - - curtime = mach_absolute_time(); - elapsed = curtime - wq->wq_thread_yielded_timestamp; - absolutetime_to_microtime(elapsed, &secs, &usecs); - - if (secs == 0 && usecs < wq_yielded_window_usecs) { - - if (wq->wq_thidlecount == 0) { - workqueue_addnewthread(wq, TRUE); - /* - * 'workqueue_addnewthread' drops the workqueue lock - * when creating the new thread and then retakes it before - * returning... this window allows other threads to process - * requests, so we need to recheck for available work - * if none found, we just return... the newly created thread - * will eventually get used (if it hasn't already)... - */ - if (wq->wq_reqcount == 0) { - workqueue_unlock(p); - return; - } - } - if (wq->wq_thidlecount) { - uint32_t priority; - uint32_t affinity = -1; - boolean_t overcommit = FALSE; - boolean_t force_oc = FALSE; - struct uthread *uth; - struct threadlist *tl; - - uth = get_bsdthread_info(current_thread()); - if ((tl = uth->uu_threadlist)) - affinity = tl->th_affinity_tag; - - for (priority = 0; priority < WORKQUEUE_NUMPRIOS; priority++) { - if (wq->wq_requests[priority]) - break; - } - assert(priority < WORKQUEUE_NUMPRIOS); - - wq->wq_reqcount--; - wq->wq_requests[priority]--; - - if (wq->wq_ocrequests[priority]) { - wq->wq_ocrequests[priority]--; - overcommit = TRUE; - } else - force_oc = TRUE; - - (void)workqueue_run_nextreq(p, wq, THREAD_NULL, force_oc, overcommit, priority, affinity); - /* - * workqueue_run_nextreq is responsible for - * dropping the workqueue lock in all cases - */ - KERNEL_DEBUG(0xefffd138 | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 1, 0); - - return; - } - } - KERNEL_DEBUG(0xefffd138 | DBG_FUNC_END, wq, wq->wq_thread_yielded_count, wq->wq_reqcount, 2, 0); - } - workqueue_unlock(p); -} - - - -static void -workqueue_callback(int type, thread_t thread) -{ - struct uthread *uth; - struct threadlist *tl; - struct workqueue *wq; - - uth = get_bsdthread_info(thread); - tl = uth->uu_threadlist; - wq = tl->th_workq; - - switch (type) { - - case SCHED_CALL_BLOCK: - { - uint32_t old_activecount; - - old_activecount = OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority][tl->th_affinity_tag]); - - if (old_activecount == 1) { - boolean_t start_timer = FALSE; - uint64_t curtime; - UInt64 *lastblocked_ptr; - - /* - * we were the last active thread on this affinity set - * and we've got work to do - */ - lastblocked_ptr = (UInt64 *)&wq->wq_lastblocked_ts[tl->th_priority][tl->th_affinity_tag]; - curtime = mach_absolute_time(); - - /* - * if we collide with another thread trying to update the last_blocked (really unlikely - * since another thread would have to get scheduled and then block after we start down - * this path), it's not a problem. Either timestamp is adequate, so no need to retry - */ - - OSCompareAndSwap64(*lastblocked_ptr, (UInt64)curtime, lastblocked_ptr); - - if (wq->wq_reqcount) - WQ_TIMER_NEEDED(wq, start_timer); - - if (start_timer == TRUE) - workqueue_interval_timer_start(wq); - } - KERNEL_DEBUG1(0xefffd020 | DBG_FUNC_START, wq, old_activecount, tl->th_priority, tl->th_affinity_tag, thread_tid(thread)); - } - break; - - case SCHED_CALL_UNBLOCK: - /* - * we cannot take the workqueue_lock here... - * an UNBLOCK can occur from a timer event which - * is run from an interrupt context... if the workqueue_lock - * is already held by this processor, we'll deadlock... - * the thread lock for the thread being UNBLOCKED - * is also held - */ - OSAddAtomic(1, &wq->wq_thactive_count[tl->th_priority][tl->th_affinity_tag]); - - KERNEL_DEBUG1(0xefffd020 | DBG_FUNC_END, wq, wq->wq_threads_scheduled, tl->th_priority, tl->th_affinity_tag, thread_tid(thread)); - - break; - } -} - - -static void -workqueue_removethread(struct threadlist *tl, int fromexit) -{ - struct workqueue *wq; - struct uthread * uth; - - /* - * If fromexit is set, the call is from workqueue_exit(, - * so some cleanups are to be avoided. - */ - wq = tl->th_workq; - - TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry); - - if (fromexit == 0) { - wq->wq_nthreads--; - wq->wq_thidlecount--; - } - - /* - * Clear the threadlist pointer in uthread so - * blocked thread on wakeup for termination will - * not access the thread list as it is going to be - * freed. - */ - thread_sched_call(tl->th_thread, NULL); - - uth = get_bsdthread_info(tl->th_thread); - if (uth != (struct uthread *)0) { - uth->uu_threadlist = NULL; - } - if (fromexit == 0) { - /* during exit the lock is not held */ - workqueue_unlock(wq->wq_proc); - } - - if ( (tl->th_flags & TH_LIST_SUSPENDED) ) { - /* - * thread was created, but never used... - * need to clean up the stack and port ourselves - * since we're not going to spin up through the - * normal exit path triggered from Libc - */ - if (fromexit == 0) { - /* vm map is already deallocated when this is called from exit */ - (void)mach_vm_deallocate(wq->wq_map, tl->th_stackaddr, tl->th_allocsize); - } - (void)mach_port_deallocate(get_task_ipcspace(wq->wq_task), tl->th_thport); - - KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_END, wq, (uintptr_t)thread_tid(current_thread()), wq->wq_nthreads, 0xdead, thread_tid(tl->th_thread)); - } else { - - KERNEL_DEBUG1(0xefffd018 | DBG_FUNC_END, wq, (uintptr_t)thread_tid(current_thread()), wq->wq_nthreads, 0xdead, thread_tid(tl->th_thread)); - } - /* - * drop our ref on the thread - */ - thread_deallocate(tl->th_thread); - - kfree(tl, sizeof(struct threadlist)); -} - - -/* - * called with workq lock held - * dropped and retaken around thread creation - * return with workq lock held - */ -static boolean_t -workqueue_addnewthread(struct workqueue *wq, boolean_t oc_thread) -{ - struct threadlist *tl; - struct uthread *uth; - kern_return_t kret; - thread_t th; - proc_t p; - void *sright; - mach_vm_offset_t stackaddr; - - if ((wq->wq_flags & WQ_EXITING) == WQ_EXITING) - return (FALSE); - - if (wq->wq_nthreads >= wq_max_threads || wq->wq_nthreads >= (CONFIG_THREAD_MAX - 20)) { - wq->wq_lflags |= WQL_EXCEEDED_TOTAL_THREAD_LIMIT; - return (FALSE); - } - wq->wq_lflags &= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT; - - if (oc_thread == FALSE && wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) { - /* - * if we're not creating this thread to service an overcommit request, - * then check the size of the constrained thread pool... if we've already - * reached our max for threads scheduled from this pool, don't create a new - * one... the callers of this function are prepared for failure. - */ - wq->wq_lflags |= WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT; - return (FALSE); - } - if (wq->wq_constrained_threads_scheduled < wq_max_constrained_threads) - wq->wq_lflags &= ~WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT; - - wq->wq_nthreads++; - - p = wq->wq_proc; - workqueue_unlock(p); - - kret = thread_create_workq(wq->wq_task, (thread_continue_t)wq_unsuspend_continue, &th); - - if (kret != KERN_SUCCESS) - goto failed; - - tl = kalloc(sizeof(struct threadlist)); - bzero(tl, sizeof(struct threadlist)); - -#if defined(__i386__) || defined(__x86_64__) - stackaddr = 0xB0000000; -#else -#error Need to define a stack address hint for this architecture -#endif - tl->th_allocsize = PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE + p->p_pthsize; - - kret = mach_vm_map(wq->wq_map, &stackaddr, - tl->th_allocsize, - page_size-1, - VM_MAKE_TAG(VM_MEMORY_STACK)| VM_FLAGS_ANYWHERE , NULL, - 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, - VM_INHERIT_DEFAULT); - - if (kret != KERN_SUCCESS) { - kret = mach_vm_allocate(wq->wq_map, - &stackaddr, tl->th_allocsize, - VM_MAKE_TAG(VM_MEMORY_STACK) | VM_FLAGS_ANYWHERE); - } - if (kret == KERN_SUCCESS) { - /* - * The guard page is at the lowest address - * The stack base is the highest address - */ - kret = mach_vm_protect(wq->wq_map, stackaddr, PTH_DEFAULT_GUARDSIZE, FALSE, VM_PROT_NONE); - - if (kret != KERN_SUCCESS) - (void) mach_vm_deallocate(wq->wq_map, stackaddr, tl->th_allocsize); - } - if (kret != KERN_SUCCESS) { - (void) thread_terminate(th); - thread_deallocate(th); - - kfree(tl, sizeof(struct threadlist)); - goto failed; - } - thread_reference(th); - - sright = (void *) convert_thread_to_port(th); - tl->th_thport = ipc_port_copyout_send(sright, get_task_ipcspace(wq->wq_task)); - - thread_static_param(th, TRUE); - - tl->th_flags = TH_LIST_INITED | TH_LIST_SUSPENDED; - - tl->th_thread = th; - tl->th_workq = wq; - tl->th_stackaddr = stackaddr; - tl->th_affinity_tag = -1; - tl->th_priority = WORKQUEUE_NUMPRIOS; - tl->th_policy = -1; - - uth = get_bsdthread_info(tl->th_thread); - - workqueue_lock_spin(p); - - uth->uu_threadlist = (void *)tl; - TAILQ_INSERT_TAIL(&wq->wq_thidlelist, tl, th_entry); - - wq->wq_thidlecount++; - - KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_START, wq, wq->wq_nthreads, 0, thread_tid(current_thread()), thread_tid(tl->th_thread)); - - return (TRUE); - -failed: - workqueue_lock_spin(p); - wq->wq_nthreads--; - - return (FALSE); -} - - -int -workq_open(struct proc *p, __unused struct workq_open_args *uap, __unused int32_t *retval) -{ - struct workqueue * wq; - int wq_size; - char * ptr; - char * nptr; - uint32_t i; - uint32_t num_cpus; - int error = 0; - boolean_t need_wakeup = FALSE; - - - if ((p->p_lflag & P_LREGISTER) == 0) - return(EINVAL); - - num_cpus = ml_get_max_cpus(); - - if (wq_init_constrained_limit) { - uint32_t limit; - /* - * set up the limit for the constrained pool - * this is a virtual pool in that we don't - * maintain it on a separate idle and run list - */ - limit = num_cpus * (WORKQUEUE_NUMPRIOS + 1); - - if (limit > wq_max_constrained_threads) - wq_max_constrained_threads = limit; - - wq_init_constrained_limit = 0; - } - workqueue_lock_spin(p); - - if (p->p_wqptr == NULL) { - - while (p->p_wqiniting == TRUE) { - - assert_wait((caddr_t)&p->p_wqiniting, THREAD_UNINT); - workqueue_unlock(p); - - thread_block(THREAD_CONTINUE_NULL); - - workqueue_lock_spin(p); - } - if (p->p_wqptr != NULL) - goto out; - - p->p_wqiniting = TRUE; - - workqueue_unlock(p); - - wq_size = sizeof(struct workqueue) + - (num_cpus * WORKQUEUE_NUMPRIOS * sizeof(uint16_t)) + - (num_cpus * WORKQUEUE_NUMPRIOS * sizeof(uint32_t)) + - (num_cpus * WORKQUEUE_NUMPRIOS * sizeof(uint64_t)) + - sizeof(uint32_t) + sizeof(uint64_t); - - ptr = (char *)kalloc(wq_size); - bzero(ptr, wq_size); - - wq = (struct workqueue *)ptr; - wq->wq_flags = WQ_LIST_INITED; - wq->wq_proc = p; - wq->wq_affinity_max = num_cpus; - wq->wq_task = current_task(); - wq->wq_map = current_map(); - - for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) - wq->wq_reqconc[i] = wq->wq_affinity_max; - - nptr = ptr + sizeof(struct workqueue); - - for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) { - wq->wq_thscheduled_count[i] = (uint16_t *)nptr; - nptr += (num_cpus * sizeof(uint16_t)); - } - nptr += (sizeof(uint32_t) - 1); - nptr = (char *)((uintptr_t)nptr & ~(sizeof(uint32_t) - 1)); - - for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) { - wq->wq_thactive_count[i] = (uint32_t *)nptr; - nptr += (num_cpus * sizeof(uint32_t)); - } - /* - * align nptr on a 64 bit boundary so that we can do nice - * atomic64 operations on the timestamps... - * note that we requested an extra uint64_t when calcuating - * the size for the allocation of the workqueue struct - */ - nptr += (sizeof(uint64_t) - 1); - nptr = (char *)((uintptr_t)nptr & ~(sizeof(uint64_t) - 1)); - - for (i = 0; i < WORKQUEUE_NUMPRIOS; i++) { - wq->wq_lastblocked_ts[i] = (uint64_t *)nptr; - nptr += (num_cpus * sizeof(uint64_t)); - } - TAILQ_INIT(&wq->wq_thrunlist); - TAILQ_INIT(&wq->wq_thidlelist); - - wq->wq_atimer_call = thread_call_allocate((thread_call_func_t)workqueue_add_timer, (thread_call_param_t)wq); - - workqueue_lock_spin(p); - - p->p_wqptr = (void *)wq; - p->p_wqsize = wq_size; - - p->p_wqiniting = FALSE; - need_wakeup = TRUE; - } -out: - workqueue_unlock(p); - - if (need_wakeup == TRUE) - wakeup(&p->p_wqiniting); - return(error); -} - - -int -workq_kernreturn(struct proc *p, struct workq_kernreturn_args *uap, __unused int32_t *retval) -{ - struct workqueue *wq; - int error = 0; - - if ((p->p_lflag & P_LREGISTER) == 0) - return(EINVAL); - - switch (uap->options) { - - case WQOPS_QUEUE_NEWSPISUPP: - break; - - case WQOPS_QUEUE_REQTHREADS: { - /* - * for this operation, we re-purpose the affinity - * argument as the number of threads to start - */ - boolean_t overcommit = FALSE; - int priority = uap->prio; - int reqcount = uap->affinity; - - if (priority & WORKQUEUE_OVERCOMMIT) { - priority &= ~WORKQUEUE_OVERCOMMIT; - overcommit = TRUE; - } - if ((reqcount <= 0) || (priority < 0) || (priority >= WORKQUEUE_NUMPRIOS)) { - error = EINVAL; - break; - } - workqueue_lock_spin(p); - - if ((wq = (struct workqueue *)p->p_wqptr) == NULL) { - workqueue_unlock(p); - - error = EINVAL; - break; - } - if (overcommit == FALSE) { - wq->wq_reqcount += reqcount; - wq->wq_requests[priority] += reqcount; - - KERNEL_DEBUG(0xefffd008 | DBG_FUNC_NONE, wq, priority, wq->wq_requests[priority], reqcount, 0); - - while (wq->wq_reqcount) { - if (workqueue_run_one(p, wq, overcommit, priority) == FALSE) - break; - } - } else { - KERNEL_DEBUG(0xefffd13c | DBG_FUNC_NONE, wq, priority, wq->wq_requests[priority], reqcount, 0); - - while (reqcount) { - if (workqueue_run_one(p, wq, overcommit, priority) == FALSE) - break; - reqcount--; - } - if (reqcount) { - /* - * we need to delay starting some of the overcommit requests... - * we should only fail to create the overcommit threads if - * we're at the max thread limit... as existing threads - * return to the kernel, we'll notice the ocrequests - * and spin them back to user space as the overcommit variety - */ - wq->wq_reqcount += reqcount; - wq->wq_requests[priority] += reqcount; - wq->wq_ocrequests[priority] += reqcount; - - KERNEL_DEBUG(0xefffd140 | DBG_FUNC_NONE, wq, priority, wq->wq_requests[priority], reqcount, 0); - } - } - workqueue_unlock(p); - - } - break; - - case WQOPS_THREAD_RETURN: { - thread_t th = current_thread(); - struct uthread *uth = get_bsdthread_info(th); - - /* reset signal mask on the workqueue thread to default state */ - if (uth->uu_sigmask != (sigset_t)(~workq_threadmask)) { - proc_lock(p); - uth->uu_sigmask = ~workq_threadmask; - proc_unlock(p); - } - workqueue_lock_spin(p); - - if ((wq = (struct workqueue *)p->p_wqptr) == NULL || (uth->uu_threadlist == NULL)) { - workqueue_unlock(p); - - error = EINVAL; - break; - } - KERNEL_DEBUG(0xefffd004 | DBG_FUNC_END, wq, 0, 0, 0, 0); - - (void)workqueue_run_nextreq(p, wq, th, FALSE, FALSE, 0, -1); - /* - * workqueue_run_nextreq is responsible for - * dropping the workqueue lock in all cases - */ - } - break; - - default: - error = EINVAL; - break; - } - return (error); -} - -/* - * Routine: workqueue_mark_exiting - * - * Function: Mark the work queue such that new threads will not be added to the - * work queue after we return. - * - * Conditions: Called against the current process. - */ -void -workqueue_mark_exiting(struct proc *p) -{ - struct workqueue * wq; - - wq = p->p_wqptr; - if (wq != NULL) { - - KERNEL_DEBUG(0x9008088 | DBG_FUNC_START, p->p_wqptr, 0, 0, 0, 0); - - workqueue_lock_spin(p); - - /* - * we now arm the timer in the callback function w/o holding the workq lock... - * we do this by setting WQ_ATIMER_RUNNING via OSCompareAndSwap in order to - * insure only a single timer if running and to notice that WQ_EXITING has - * been set (we don't want to start a timer once WQ_EXITING is posted) - * - * so once we have successfully set WQ_EXITING, we cannot fire up a new timer... - * therefor no need to clear the timer state atomically from the flags - * - * since we always hold the workq lock when dropping WQ_ATIMER_RUNNING - * the check for and sleep until clear is protected - */ - while ( !(OSCompareAndSwap(wq->wq_flags, (wq->wq_flags | WQ_EXITING), (UInt32 *)&wq->wq_flags))); - - if (wq->wq_flags & WQ_ATIMER_RUNNING) { - if (thread_call_cancel(wq->wq_atimer_call) == TRUE) - wq->wq_flags &= ~WQ_ATIMER_RUNNING; - } - while ((wq->wq_flags & WQ_ATIMER_RUNNING) || (wq->wq_lflags & WQL_ATIMER_BUSY)) { - - assert_wait((caddr_t)wq, (THREAD_UNINT)); - workqueue_unlock(p); - - thread_block(THREAD_CONTINUE_NULL); - - workqueue_lock_spin(p); - } - workqueue_unlock(p); - - KERNEL_DEBUG(0x9008088 | DBG_FUNC_END, 0, 0, 0, 0, 0); - } -} - -/* - * Routine: workqueue_exit - * - * Function: clean up the work queue structure(s) now that there are no threads - * left running inside the work queue (except possibly current_thread). - * - * Conditions: Called by the last thread in the process. - * Called against current process. - */ -void -workqueue_exit(struct proc *p) -{ - struct workqueue * wq; - struct threadlist * tl, *tlist; - struct uthread *uth; - int wq_size = 0; - - wq = (struct workqueue *)p->p_wqptr; - if (wq != NULL) { - - KERNEL_DEBUG(0x900808c | DBG_FUNC_START, p->p_wqptr, 0, 0, 0, 0); - - wq_size = p->p_wqsize; - p->p_wqptr = NULL; - p->p_wqsize = 0; - - /* - * Clean up workqueue data structures for threads that exited and - * didn't get a chance to clean up after themselves. - */ - TAILQ_FOREACH_SAFE(tl, &wq->wq_thrunlist, th_entry, tlist) { - - thread_sched_call(tl->th_thread, NULL); - - uth = get_bsdthread_info(tl->th_thread); - if (uth != (struct uthread *)0) { - uth->uu_threadlist = NULL; - } - TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry); - - /* - * drop our last ref on the thread - */ - thread_deallocate(tl->th_thread); - - kfree(tl, sizeof(struct threadlist)); - } - TAILQ_FOREACH_SAFE(tl, &wq->wq_thidlelist, th_entry, tlist) { - workqueue_removethread(tl, 1); - } - thread_call_free(wq->wq_atimer_call); - - kfree(wq, wq_size); - - KERNEL_DEBUG(0x900808c | DBG_FUNC_END, 0, 0, 0, 0, 0); - } -} - - -static int workqueue_importance[WORKQUEUE_NUMPRIOS] = -{ - 2, 0, -2, INT_MIN, -}; - -#define WORKQ_POLICY_TIMESHARE 1 - -static int workqueue_policy[WORKQUEUE_NUMPRIOS] = -{ - WORKQ_POLICY_TIMESHARE, WORKQ_POLICY_TIMESHARE, WORKQ_POLICY_TIMESHARE, WORKQ_POLICY_TIMESHARE -}; - - - -static boolean_t -workqueue_run_one(proc_t p, struct workqueue *wq, boolean_t overcommit, int priority) -{ - boolean_t ran_one; - - if (wq->wq_thidlecount == 0) { - if (overcommit == FALSE) { - if (wq->wq_constrained_threads_scheduled < wq->wq_affinity_max) - workqueue_addnewthread(wq, overcommit); - } else { - workqueue_addnewthread(wq, overcommit); - - if (wq->wq_thidlecount == 0) - return (FALSE); - } - } - ran_one = workqueue_run_nextreq(p, wq, THREAD_NULL, FALSE, overcommit, priority, -1); - /* - * workqueue_run_nextreq is responsible for - * dropping the workqueue lock in all cases - */ - workqueue_lock_spin(p); - - return (ran_one); -} - - - -/* - * workqueue_run_nextreq: - * called with the workqueue lock held... - * responsible for dropping it in all cases - */ -static boolean_t -workqueue_run_nextreq(proc_t p, struct workqueue *wq, thread_t thread, - boolean_t force_oc, boolean_t overcommit, int oc_prio, int oc_affinity) -{ - thread_t th_to_run = THREAD_NULL; - thread_t th_to_park = THREAD_NULL; - int wake_thread = 0; - int reuse_thread = WQ_FLAG_THREAD_REUSE; - uint32_t priority, orig_priority; - uint32_t affinity_tag, orig_affinity_tag; - uint32_t i, n; - uint32_t busycount; - uint32_t us_to_wait; - struct threadlist *tl = NULL; - struct threadlist *ttl = NULL; - struct uthread *uth = NULL; - boolean_t start_timer = FALSE; - boolean_t adjust_counters = TRUE; - uint64_t curtime; - - - KERNEL_DEBUG(0xefffd000 | DBG_FUNC_START, wq, thread, wq->wq_thidlecount, wq->wq_reqcount, 0); - - if (thread != THREAD_NULL) { - uth = get_bsdthread_info(thread); - - if ( (tl = uth->uu_threadlist) == NULL) - panic("wq thread with no threadlist "); - } - /* - * from here until we drop the workq lock - * we can't be pre-empted since we hold - * the lock in spin mode... this is important - * since we have to independently update the priority - * and affinity that the thread is associated with - * and these values are used to index the multi-dimensional - * counter arrays in 'workqueue_callback' - */ -dispatch_overcommit: - - if (overcommit == TRUE || force_oc == TRUE) { - uint32_t min_scheduled = 0; - uint32_t scheduled_count; - uint32_t active_count; - uint32_t t_affinity = 0; - - priority = oc_prio; - - if ((affinity_tag = oc_affinity) == (uint32_t)-1) { - for (affinity_tag = 0; affinity_tag < wq->wq_reqconc[priority]; affinity_tag++) { - /* - * look for the affinity group with the least number of threads - */ - scheduled_count = 0; - active_count = 0; - - for (i = 0; i <= priority; i++) { - scheduled_count += wq->wq_thscheduled_count[i][affinity_tag]; - active_count += wq->wq_thactive_count[i][affinity_tag]; - } - if (active_count == 0) { - t_affinity = affinity_tag; - break; - } - if (affinity_tag == 0 || scheduled_count < min_scheduled) { - min_scheduled = scheduled_count; - t_affinity = affinity_tag; - } - } - affinity_tag = t_affinity; - } - if (thread != THREAD_NULL) { - th_to_run = thread; - goto pick_up_work; - } - goto grab_idle_thread; - } - if (wq->wq_reqcount) { - for (priority = 0; priority < WORKQUEUE_NUMPRIOS; priority++) { - if (wq->wq_requests[priority]) - break; - } - assert(priority < WORKQUEUE_NUMPRIOS); - - if (wq->wq_ocrequests[priority] && (thread != THREAD_NULL || wq->wq_thidlecount)) { - /* - * handle delayed overcommit request... - * they have priority over normal requests - * within a given priority level - */ - wq->wq_reqcount--; - wq->wq_requests[priority]--; - wq->wq_ocrequests[priority]--; - - oc_prio = priority; - overcommit = TRUE; - - goto dispatch_overcommit; - } - } - /* - * if we get here, the work should be handled by a constrained thread - */ - if (wq->wq_reqcount == 0 || wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) { - /* - * no work to do, or we're already at or over the scheduling limit for - * constrained threads... just return or park the thread... - * do not start the timer for this condition... if we don't have any work, - * we'll check again when new work arrives... if we're over the limit, we need 1 or more - * constrained threads to return to the kernel before we can dispatch additional work - */ - if ((th_to_park = thread) == THREAD_NULL) - goto out_of_work; - goto parkit; - } - - curtime = mach_absolute_time(); - - if (thread != THREAD_NULL) { - - affinity_tag = tl->th_affinity_tag; - - /* - * check to see if the affinity group this thread is - * associated with is still within the bounds of the - * specified concurrency for the priority level - * we're considering running work for - */ - if (affinity_tag < wq->wq_reqconc[priority]) { - uint32_t bcount = 0; - uint32_t acount = 0; - uint32_t tcount = 0; - - /* - * we're a worker thread from the pool... currently we - * are considered 'active' which means we're counted - * in "wq_thactive_count" - * add up the active counts of all the priority levels - * up to and including the one we want to schedule - */ - for (i = 0; i <= priority; i++) { - - tcount = wq->wq_thactive_count[i][affinity_tag]; - acount += tcount; - - if (tcount == 0 && wq->wq_thscheduled_count[i][affinity_tag]) { - if (wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i][affinity_tag])) - bcount++; - } - } - if ((acount + bcount) == 1) { - /* - * we're the only active thread associated with our - * affinity group at this priority level and higher, - * and there are no threads considered 'busy', - * so pick up some work and keep going - */ - th_to_run = thread; - goto pick_up_work; - } - if (wq->wq_reqconc[priority] == 1) { - /* - * we have at least one other active or busy thread running at this - * priority level or higher and since we only have - * 1 affinity group to schedule against, no need - * to try and find another... we can't start up another thread to - * service the request and we already have the info - * needed to determine if we need to start a timer or not - */ - if (acount == 1) { - /* - * we're the only active thread, but we must have found - * at least 1 busy thread, so indicate that we need - * to start a timer - */ - busycount = 1; - } else - busycount = 0; - - affinity_tag = 1; - goto cant_schedule; - } - } - /* - * there's more than 1 thread running in this affinity group - * or the concurrency level has been cut back for this priority... - * let's continue on and look for an 'empty' group to run this - * work request in - */ - } - busycount = 0; - - for (affinity_tag = 0; affinity_tag < wq->wq_reqconc[priority]; affinity_tag++) { - boolean_t can_schedule; - - /* - * look for first affinity group that is currently not active - * i.e. no active threads at this priority level or higher - * and no threads that have run recently - */ - for (i = 0; i <= priority; i++) { - can_schedule = FALSE; - - if (wq->wq_thactive_count[i][affinity_tag]) - break; - - if (wq->wq_thscheduled_count[i][affinity_tag] && - wq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i][affinity_tag])) { - busycount++; - break; - } - can_schedule = TRUE; - } - if (can_schedule == TRUE) - break; - } -cant_schedule: - - if (affinity_tag >= wq->wq_reqconc[priority]) { - /* - * we've already got at least 1 thread per - * affinity group in the active state... - */ - if (busycount) { - /* - * we found at least 1 thread in the - * 'busy' state... make sure we start - * the timer because if they are the only - * threads keeping us from scheduling - * this work request, we won't get a callback - * to kick off the timer... we need to - * start it now... - */ - WQ_TIMER_NEEDED(wq, start_timer); - } - KERNEL_DEBUG(0xefffd000 | DBG_FUNC_NONE, wq, busycount, start_timer, 0, 0); - - if (thread != THREAD_NULL) { - /* - * go park this one for later - */ - th_to_park = thread; - goto parkit; - } - goto out_of_work; - } - if (thread != THREAD_NULL) { - /* - * we're overbooked on the affinity group this thread is - * currently associated with, but we have work to do - * and at least 1 idle processor, so we'll just retarget - * this thread to a new affinity group - */ - th_to_run = thread; - goto pick_up_work; - } - -grab_idle_thread: - if (wq->wq_thidlecount == 0) { - /* - * we don't have a thread to schedule, but we have - * work to do and at least 1 affinity group that - * doesn't currently have an active thread... - */ - WQ_TIMER_NEEDED(wq, start_timer); - - KERNEL_DEBUG(0xefffd118, wq, wq->wq_nthreads, start_timer, 0, 0); - - goto no_thread_to_run; - } - /* - * we've got a candidate (affinity group with no currently - * active threads) to start a new thread on... - * we already know there is both work available - * and an idle thread, so activate a thread and then - * fall into the code that pulls a new work request... - */ - TAILQ_FOREACH(ttl, &wq->wq_thidlelist, th_entry) { - if (ttl->th_affinity_tag == affinity_tag || ttl->th_affinity_tag == (uint16_t)-1) { - - TAILQ_REMOVE(&wq->wq_thidlelist, ttl, th_entry); - tl = ttl; - - break; - } - } - if (tl == NULL) { - tl = TAILQ_FIRST(&wq->wq_thidlelist); - TAILQ_REMOVE(&wq->wq_thidlelist, tl, th_entry); - } - wq->wq_thidlecount--; - - TAILQ_INSERT_TAIL(&wq->wq_thrunlist, tl, th_entry); - - if ((tl->th_flags & TH_LIST_SUSPENDED) == TH_LIST_SUSPENDED) { - tl->th_flags &= ~TH_LIST_SUSPENDED; - reuse_thread = 0; - - } else if ((tl->th_flags & TH_LIST_BLOCKED) == TH_LIST_BLOCKED) { - tl->th_flags &= ~TH_LIST_BLOCKED; - wake_thread = 1; - } - tl->th_flags |= TH_LIST_RUNNING | TH_LIST_BUSY; - - wq->wq_threads_scheduled++; - wq->wq_thscheduled_count[priority][affinity_tag]++; - OSAddAtomic(1, &wq->wq_thactive_count[priority][affinity_tag]); - - adjust_counters = FALSE; - th_to_run = tl->th_thread; - -pick_up_work: - if (overcommit == FALSE && force_oc == FALSE) { - wq->wq_reqcount--; - wq->wq_requests[priority]--; - - if ( !(tl->th_flags & TH_LIST_CONSTRAINED)) { - wq->wq_constrained_threads_scheduled++; - tl->th_flags |= TH_LIST_CONSTRAINED; - } - } else { - if (tl->th_flags & TH_LIST_CONSTRAINED) { - wq->wq_constrained_threads_scheduled--; - tl->th_flags &= ~TH_LIST_CONSTRAINED; - } - } - orig_priority = tl->th_priority; - orig_affinity_tag = tl->th_affinity_tag; - - tl->th_priority = priority; - tl->th_affinity_tag = affinity_tag; - - if (adjust_counters == TRUE && (orig_priority != priority || orig_affinity_tag != affinity_tag)) { - /* - * we need to adjust these counters based on this - * thread's new disposition w/r to affinity and priority - */ - OSAddAtomic(-1, &wq->wq_thactive_count[orig_priority][orig_affinity_tag]); - OSAddAtomic(1, &wq->wq_thactive_count[priority][affinity_tag]); - - wq->wq_thscheduled_count[orig_priority][orig_affinity_tag]--; - wq->wq_thscheduled_count[priority][affinity_tag]++; - } - wq->wq_thread_yielded_count = 0; - - workqueue_unlock(p); - - if (orig_affinity_tag != affinity_tag) { - /* - * this thread's affinity does not match the affinity group - * its being placed on (it's either a brand new thread or - * we're retargeting an existing thread to a new group)... - * affinity tag of 0 means no affinity... - * but we want our tags to be 0 based because they - * are used to index arrays, so... - * keep it 0 based internally and bump by 1 when - * calling out to set it - */ - KERNEL_DEBUG(0xefffd114 | DBG_FUNC_START, wq, orig_affinity_tag, 0, 0, 0); - - (void)thread_affinity_set(th_to_run, affinity_tag + 1); - - KERNEL_DEBUG(0xefffd114 | DBG_FUNC_END, wq, affinity_tag, 0, 0, 0); - } - if (orig_priority != priority) { - thread_precedence_policy_data_t precedinfo; - thread_extended_policy_data_t extinfo; - uint32_t policy; -#if CONFIG_EMBEDDED - int retval = 0; - - /* sets the saved importance for apple ios daemon if backgrounded. else returns 0 */ - retval = proc_setthread_saved_importance(th_to_run, workqueue_importance[priority]); - if (retval == 0) { -#endif /* CONFIG_EMBEDDED */ - policy = workqueue_policy[priority]; - - KERNEL_DEBUG(0xefffd120 | DBG_FUNC_START, wq, orig_priority, tl->th_policy, 0, 0); - - if ((orig_priority == WORKQUEUE_BG_PRIOQUEUE) || (priority == WORKQUEUE_BG_PRIOQUEUE)) { - if (orig_priority == WORKQUEUE_BG_PRIOQUEUE) { - /* remove the disk throttle, importance will be reset in anycase */ - proc_restore_workq_bgthreadpolicy(th_to_run); - } - - if (priority == WORKQUEUE_BG_PRIOQUEUE) { - proc_apply_workq_bgthreadpolicy(th_to_run); - } - } - - if (tl->th_policy != policy) { - extinfo.timeshare = policy; - (void)thread_policy_set_internal(th_to_run, THREAD_EXTENDED_POLICY, (thread_policy_t)&extinfo, THREAD_EXTENDED_POLICY_COUNT); - - tl->th_policy = policy; - } - - precedinfo.importance = workqueue_importance[priority]; - (void)thread_policy_set_internal(th_to_run, THREAD_PRECEDENCE_POLICY, (thread_policy_t)&precedinfo, THREAD_PRECEDENCE_POLICY_COUNT); - - - KERNEL_DEBUG(0xefffd120 | DBG_FUNC_END, wq, priority, policy, 0, 0); -#if CONFIG_EMBEDDED - } -#endif /* CONFIG_EMBEDDED */ - } - if (kdebug_enable) { - int lpri = -1; - int laffinity = -1; - int first = -1; - uint32_t code = 0xefffd02c | DBG_FUNC_START; - - for (n = 0; n < WORKQUEUE_NUMPRIOS; n++) { - for (i = 0; i < wq->wq_affinity_max; i++) { - if (wq->wq_thactive_count[n][i]) { - if (lpri != -1) { - KERNEL_DEBUG(code, lpri, laffinity, wq->wq_thactive_count[lpri][laffinity], first, 0); - code = 0xefffd02c; - first = 0; - } - lpri = n; - laffinity = i; - } - } - } - if (lpri != -1) { - if (first == -1) - first = 0xeeeeeeee; - KERNEL_DEBUG(0xefffd02c | DBG_FUNC_END, lpri, laffinity, wq->wq_thactive_count[lpri][laffinity], first, 0); - } - } - /* - * if current thread is reused for work request, does not return via unix_syscall - */ - wq_runreq(p, overcommit, priority, th_to_run, tl, reuse_thread, wake_thread, (thread == th_to_run)); - - KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, wq, thread_tid(th_to_run), overcommit, 1, 0); - - return (TRUE); - -out_of_work: - /* - * we have no work to do or we are fully booked - * w/r to running threads... - */ -no_thread_to_run: - workqueue_unlock(p); - - if (start_timer) - workqueue_interval_timer_start(wq); - - KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, wq, thread_tid(thread), 0, 2, 0); - - return (FALSE); - -parkit: - /* - * this is a workqueue thread with no more - * work to do... park it for now - */ - TAILQ_REMOVE(&wq->wq_thrunlist, tl, th_entry); - tl->th_flags &= ~TH_LIST_RUNNING; - - tl->th_flags |= TH_LIST_BLOCKED; - TAILQ_INSERT_HEAD(&wq->wq_thidlelist, tl, th_entry); - - thread_sched_call(th_to_park, NULL); - - OSAddAtomic(-1, &wq->wq_thactive_count[tl->th_priority][tl->th_affinity_tag]); - wq->wq_thscheduled_count[tl->th_priority][tl->th_affinity_tag]--; - wq->wq_threads_scheduled--; - - if (tl->th_flags & TH_LIST_CONSTRAINED) { - wq->wq_constrained_threads_scheduled--; - wq->wq_lflags &= ~WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT; - tl->th_flags &= ~TH_LIST_CONSTRAINED; - } - if (wq->wq_thidlecount < 100) - us_to_wait = wq_reduce_pool_window_usecs - (wq->wq_thidlecount * (wq_reduce_pool_window_usecs / 100)); - else - us_to_wait = wq_reduce_pool_window_usecs / 100; - - wq->wq_thidlecount++; - wq->wq_lflags &= ~WQL_EXCEEDED_TOTAL_THREAD_LIMIT; - - assert_wait_timeout((caddr_t)tl, (THREAD_INTERRUPTIBLE), us_to_wait, NSEC_PER_USEC); - - workqueue_unlock(p); - - if (start_timer) - workqueue_interval_timer_start(wq); - - KERNEL_DEBUG1(0xefffd018 | DBG_FUNC_START, wq, wq->wq_threads_scheduled, wq->wq_thidlecount, us_to_wait, thread_tid(th_to_park)); - KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, wq, thread_tid(thread), 0, 3, 0); - - thread_block((thread_continue_t)wq_unpark_continue); - /* NOT REACHED */ - - return (FALSE); -} - - -static void -wq_unsuspend_continue(void) -{ - struct uthread *uth = NULL; - thread_t th_to_unsuspend; - struct threadlist *tl; - proc_t p; - - th_to_unsuspend = current_thread(); - uth = get_bsdthread_info(th_to_unsuspend); - - if (uth != NULL && (tl = uth->uu_threadlist) != NULL) { - - if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) { - /* - * most likely a normal resume of this thread occurred... - * it's also possible that the thread was aborted after we - * finished setting it up so that it could be dispatched... if - * so, thread_bootstrap_return will notice the abort and put - * the thread on the path to self-destruction - */ -normal_resume_to_user: - thread_sched_call(th_to_unsuspend, workqueue_callback); - - thread_bootstrap_return(); - } - /* - * if we get here, it's because we've been resumed due to - * an abort of this thread (process is crashing) - */ - p = current_proc(); - - workqueue_lock_spin(p); - - if (tl->th_flags & TH_LIST_SUSPENDED) { - /* - * thread has been aborted while still on our idle - * queue... remove it from our domain... - * workqueue_removethread consumes the lock - */ - workqueue_removethread(tl, 0); - - thread_bootstrap_return(); - } - while ((tl->th_flags & TH_LIST_BUSY)) { - /* - * this thread was aborted after we started making - * it runnable, but before we finished dispatching it... - * we need to wait for that process to finish, - * and we need to ask for a wakeup instead of a - * thread_resume since the abort has already resumed us - */ - tl->th_flags |= TH_LIST_NEED_WAKEUP; - - assert_wait((caddr_t)tl, (THREAD_UNINT)); - - workqueue_unlock(p); - - thread_block(THREAD_CONTINUE_NULL); - - workqueue_lock_spin(p); - } - workqueue_unlock(p); - /* - * we have finished setting up the thread's context... - * thread_bootstrap_return will take us through the abort path - * where the thread will self destruct - */ - goto normal_resume_to_user; - } - thread_bootstrap_return(); -} - - -static void -wq_unpark_continue(void) -{ - struct uthread *uth = NULL; - struct threadlist *tl; - thread_t th_to_unpark; - proc_t p; - - th_to_unpark = current_thread(); - uth = get_bsdthread_info(th_to_unpark); - - if (uth != NULL) { - if ((tl = uth->uu_threadlist) != NULL) { - - if ((tl->th_flags & (TH_LIST_RUNNING | TH_LIST_BUSY)) == TH_LIST_RUNNING) { - /* - * a normal wakeup of this thread occurred... no need - * for any synchronization with the timer and wq_runreq - */ -normal_return_to_user: - thread_sched_call(th_to_unpark, workqueue_callback); - - KERNEL_DEBUG(0xefffd018 | DBG_FUNC_END, tl->th_workq, 0, 0, 0, 0); - - thread_exception_return(); - } - p = current_proc(); - - workqueue_lock_spin(p); - - if ( !(tl->th_flags & TH_LIST_RUNNING)) { - /* - * the timer popped us out and we've not - * been moved off of the idle list - * so we should now self-destruct - * - * workqueue_removethread consumes the lock - */ - workqueue_removethread(tl, 0); - - thread_exception_return(); - } - /* - * the timer woke us up, but we have already - * started to make this a runnable thread, - * but have not yet finished that process... - * so wait for the normal wakeup - */ - while ((tl->th_flags & TH_LIST_BUSY)) { - - assert_wait((caddr_t)tl, (THREAD_UNINT)); - - workqueue_unlock(p); - - thread_block(THREAD_CONTINUE_NULL); - - workqueue_lock_spin(p); - } - /* - * we have finished setting up the thread's context - * now we can return as if we got a normal wakeup - */ - workqueue_unlock(p); - - goto normal_return_to_user; - } - } - thread_exception_return(); -} - - - -static void -wq_runreq(proc_t p, boolean_t overcommit, uint32_t priority, thread_t th, struct threadlist *tl, - int reuse_thread, int wake_thread, int return_directly) -{ - int ret = 0; - boolean_t need_resume = FALSE; - - KERNEL_DEBUG1(0xefffd004 | DBG_FUNC_START, tl->th_workq, tl->th_priority, tl->th_affinity_tag, thread_tid(current_thread()), thread_tid(th)); - - ret = setup_wqthread(p, th, overcommit, priority, reuse_thread, tl); - - if (ret != 0) - panic("setup_wqthread failed %x\n", ret); - - if (return_directly) { - KERNEL_DEBUG(0xefffd000 | DBG_FUNC_END, tl->th_workq, 0, 0, 4, 0); - - thread_exception_return(); - - panic("wq_runreq: thread_exception_return returned ...\n"); - } - if (wake_thread) { - workqueue_lock_spin(p); - - tl->th_flags &= ~TH_LIST_BUSY; - wakeup(tl); - - workqueue_unlock(p); - } else { - KERNEL_DEBUG1(0xefffd014 | DBG_FUNC_END, tl->th_workq, 0, 0, thread_tid(current_thread()), thread_tid(th)); - - workqueue_lock_spin(p); - - if (tl->th_flags & TH_LIST_NEED_WAKEUP) - wakeup(tl); - else - need_resume = TRUE; - - tl->th_flags &= ~(TH_LIST_BUSY | TH_LIST_NEED_WAKEUP); - - workqueue_unlock(p); - - if (need_resume) { - /* - * need to do this outside of the workqueue spin lock - * since thread_resume locks the thread via a full mutex - */ - thread_resume(th); - } - } -} - - -int -setup_wqthread(proc_t p, thread_t th, boolean_t overcommit, uint32_t priority, int reuse_thread, struct threadlist *tl) -{ - uint32_t flags = reuse_thread | WQ_FLAG_THREAD_NEWSPI; - - if (overcommit == TRUE) - flags |= WQ_FLAG_THREAD_OVERCOMMIT; - - flags |= priority; - -#if defined(__i386__) || defined(__x86_64__) - int isLP64 = 0; - - isLP64 = IS_64BIT_PROCESS(p); - /* - * Set up i386 registers & function call. - */ - if (isLP64 == 0) { - x86_thread_state32_t state; - x86_thread_state32_t *ts = &state; - - ts->eip = (int)p->p_wqthread; - ts->eax = (unsigned int)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE); - ts->ebx = (unsigned int)tl->th_thport; - ts->ecx = (unsigned int)(tl->th_stackaddr + PTH_DEFAULT_GUARDSIZE); - ts->edx = (unsigned int)0; - ts->edi = (unsigned int)flags; - ts->esi = (unsigned int)0; - /* - * set stack pointer - */ - ts->esp = (int)((vm_offset_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE) - C_32_STK_ALIGN)); - - thread_set_wq_state32(th, (thread_state_t)ts); - - } else { - x86_thread_state64_t state64; - x86_thread_state64_t *ts64 = &state64; - - ts64->rip = (uint64_t)p->p_wqthread; - ts64->rdi = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE); - ts64->rsi = (uint64_t)(tl->th_thport); - ts64->rdx = (uint64_t)(tl->th_stackaddr + PTH_DEFAULT_GUARDSIZE); - ts64->rcx = (uint64_t)0; - ts64->r8 = (uint64_t)flags; - ts64->r9 = (uint64_t)0; - - /* - * set stack pointer aligned to 16 byte boundary - */ - ts64->rsp = (uint64_t)((tl->th_stackaddr + PTH_DEFAULT_STACKSIZE + PTH_DEFAULT_GUARDSIZE) - C_64_REDZONE_LEN); - - thread_set_wq_state64(th, (thread_state_t)ts64); - } -#else -#error setup_wqthread not defined for this architecture -#endif - return(0); -} - -int -fill_procworkqueue(proc_t p, struct proc_workqueueinfo * pwqinfo) -{ - struct workqueue * wq; - int error = 0; - int activecount; - uint32_t pri, affinity; - - workqueue_lock_spin(p); - if ((wq = p->p_wqptr) == NULL) { - error = EINVAL; - goto out; - } - activecount = 0; - - for (pri = 0; pri < WORKQUEUE_NUMPRIOS; pri++) { - for (affinity = 0; affinity < wq->wq_affinity_max; affinity++) - activecount += wq->wq_thactive_count[pri][affinity]; - } - pwqinfo->pwq_nthreads = wq->wq_nthreads; - pwqinfo->pwq_runthreads = activecount; - pwqinfo->pwq_blockedthreads = wq->wq_threads_scheduled - activecount; - pwqinfo->pwq_state = 0; - - if (wq->wq_lflags & WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT) - pwqinfo->pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT; - - if (wq->wq_lflags & WQL_EXCEEDED_TOTAL_THREAD_LIMIT) - pwqinfo->pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT; - -out: - workqueue_unlock(p); - return(error); -} - -/* Set target concurrency of one of the queue(0,1,2) with specified value */ -int -proc_settargetconc(pid_t pid, int queuenum, int32_t targetconc) -{ - proc_t p, self; - uint64_t addr; - int32_t conc = targetconc; - int error = 0; - vm_map_t oldmap = VM_MAP_NULL; - int gotref = 0; - - self = current_proc(); - if (self->p_pid != pid) { - /* if not on self, hold a refernce on the process */ - - if (pid == 0) - return(EINVAL); - - p = proc_find(pid); - - if (p == PROC_NULL) - return(ESRCH); - gotref = 1; - - } else - p = self; - - if ((addr = p->p_targconc) == (uint64_t)0) { - error = EINVAL; - goto out; - } - - - if ((queuenum >= WQ_MAXPRI_MIN) && (queuenum <= WQ_MAXPRI_MAX)) { - addr += (queuenum * sizeof(int32_t)); - if (gotref == 1) - oldmap = vm_map_switch(get_task_map(p->task)); - error = copyout(&conc, addr, sizeof(int32_t)); - if (gotref == 1) - (void)vm_map_switch(oldmap); - - } else { - error = EINVAL; - } -out: - if (gotref == 1) - proc_rele(p); - return(error); -} - - -/* Set target concurrency on all the prio queues with specified value */ -int -proc_setalltargetconc(pid_t pid, int32_t * targetconcp) -{ - proc_t p, self; - uint64_t addr; - int error = 0; - vm_map_t oldmap = VM_MAP_NULL; - int gotref = 0; - - self = current_proc(); - if (self->p_pid != pid) { - /* if not on self, hold a refernce on the process */ - - if (pid == 0) - return(EINVAL); - - p = proc_find(pid); - - if (p == PROC_NULL) - return(ESRCH); - gotref = 1; - - } else - p = self; - - if ((addr = (uint64_t)p->p_targconc) == (uint64_t)0) { - error = EINVAL; - goto out; - } - - - if (gotref == 1) - oldmap = vm_map_switch(get_task_map(p->task)); - - error = copyout(targetconcp, addr, WQ_PRI_NUM * sizeof(int32_t)); - if (gotref == 1) - (void)vm_map_switch(oldmap); - -out: - if (gotref == 1) - proc_rele(p); - return(error); -} - -int thread_selfid(__unused struct proc *p, __unused struct thread_selfid_args *uap, uint64_t *retval) -{ - thread_t thread = current_thread(); - *retval = thread_tid(thread); - return KERN_SUCCESS; -} - -void -pthread_init(void) -{ - pthread_lck_grp_attr = lck_grp_attr_alloc_init(); - pthread_lck_grp = lck_grp_alloc_init("pthread", pthread_lck_grp_attr); - - /* - * allocate the lock attribute for pthread synchronizers - */ - pthread_lck_attr = lck_attr_alloc_init(); - - workqueue_init_lock((proc_t) get_bsdtask_info(kernel_task)); -#if PSYNCH - pthread_list_mlock = lck_mtx_alloc_init(pthread_lck_grp, pthread_lck_attr); - - pth_global_hashinit(); - psynch_thcall = thread_call_allocate(psynch_wq_cleanup, NULL); - psynch_zoneinit(); -#endif /* PSYNCH */ -} diff --git a/bsd/kern/socket_info.c b/bsd/kern/socket_info.c index ffbaaf456..157b47dc3 100644 --- a/bsd/kern/socket_info.c +++ b/bsd/kern/socket_info.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2005-2012 Apple Inc. All rights reserved. + * Copyright (c) 2005-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ @@ -60,7 +60,8 @@ fill_sockbuf_info(struct sockbuf *sb, struct sockbuf_info *sbi) sbi->sbi_mbmax = sb->sb_mbmax; sbi->sbi_lowat = sb->sb_lowat; sbi->sbi_flags = sb->sb_flags; - sbi->sbi_timeo = (u_int32_t)(sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick; + sbi->sbi_timeo = (u_int32_t)(sb->sb_timeo.tv_sec * hz) + + sb->sb_timeo.tv_usec / tick; if (sbi->sbi_timeo == 0 && sb->sb_timeo.tv_usec != 0) sbi->sbi_timeo = 1; } @@ -75,13 +76,14 @@ fill_common_sockinfo(struct socket *so, struct socket_info *si) si->soi_state = so->so_state; si->soi_pcb = (u_int64_t)VM_KERNEL_ADDRPERM(so->so_pcb); if (so->so_proto) { - si->soi_protocol = so->so_proto->pr_protocol; + si->soi_protocol = SOCK_PROTO(so); if (so->so_proto->pr_domain) - si->soi_family = so->so_proto->pr_domain->dom_family; + si->soi_family = SOCK_DOM(so); else si->soi_family = 0; - } else + } else { si->soi_protocol = si->soi_family = 0; + } si->soi_qlen = so->so_qlen; si->soi_incqlen = so->so_incqlen; si->soi_qlimit = so->so_qlimit; @@ -90,161 +92,162 @@ fill_common_sockinfo(struct socket *so, struct socket_info *si) si->soi_oobmark = so->so_oobmark; fill_sockbuf_info(&so->so_snd, &si->soi_snd); fill_sockbuf_info(&so->so_rcv, &si->soi_rcv); - } errno_t fill_socketinfo(struct socket *so, struct socket_info *si) { errno_t error = 0; - int family; + int domain; short type; short protocol; - + socket_lock(so, 0); si->soi_kind = SOCKINFO_GENERIC; fill_common_sockinfo(so, si); - if (so->so_pcb == 0 || so->so_proto == 0 || so->so_proto->pr_domain == 0) + if (so->so_pcb == NULL || so->so_proto == 0 || + so->so_proto->pr_domain == NULL) goto out; - /* The kind of socket is determined by the triplet {family, type, protocol} */ - family = so->so_proto->pr_domain->dom_family; - type = so->so_proto->pr_type; - protocol = so->so_proto->pr_protocol; - switch (family) { - case AF_INET: - case AF_INET6: { - struct in_sockinfo *insi = &si->soi_proto.pri_in; - struct inpcb *inp = (struct inpcb *)so->so_pcb; - - si->soi_kind = SOCKINFO_IN; - - insi->insi_fport = inp->inp_fport; - insi->insi_lport = inp->inp_lport; - insi->insi_gencnt = inp->inp_gencnt; - insi->insi_flags = inp->inp_flags; - insi->insi_vflag = inp->inp_vflag; - insi->insi_ip_ttl = inp->inp_ip_ttl; - insi->insi_faddr.ina_6 = inp->inp_dependfaddr.inp6_foreign; - insi->insi_laddr.ina_6 = inp->inp_dependladdr.inp6_local; - insi->insi_v4.in4_tos = inp->inp_depend4.inp4_ip_tos; - insi->insi_v6.in6_hlim = inp->inp_depend6.inp6_hlim; - insi->insi_v6.in6_cksum = inp->inp_depend6.inp6_cksum; - insi->insi_v6.in6_ifindex = inp->inp_depend6.inp6_ifindex; - insi->insi_v6.in6_hops = inp->inp_depend6.inp6_hops; - - if (type == SOCK_STREAM && (protocol == 0 || protocol == IPPROTO_TCP) && inp->inp_ppcb != 0) { - struct tcp_sockinfo *tcpsi = &si->soi_proto.pri_tcp; - struct tcpcb *tp= (struct tcpcb *)inp->inp_ppcb; - - si->soi_kind = SOCKINFO_TCP; - - tcpsi->tcpsi_state = tp->t_state; - tcpsi->tcpsi_timer[TCPT_REXMT] = tp->t_timer[TCPT_REXMT]; - tcpsi->tcpsi_timer[TCPT_PERSIST] = tp->t_timer[TCPT_PERSIST]; - tcpsi->tcpsi_timer[TCPT_KEEP] = tp->t_timer[TCPT_KEEP]; - tcpsi->tcpsi_timer[TCPT_2MSL] = tp->t_timer[TCPT_2MSL]; - tcpsi->tcpsi_mss = tp->t_maxseg; - tcpsi->tcpsi_flags = tp->t_flags; - tcpsi->tcpsi_tp = - (u_int64_t)VM_KERNEL_ADDRPERM(tp); - } - break; + /* + * The kind of socket is determined by the triplet + * {domain, type, protocol} + */ + domain = SOCK_DOM(so); + type = SOCK_TYPE(so); + protocol = SOCK_PROTO(so); + switch (domain) { + case PF_INET: + case PF_INET6: { + struct in_sockinfo *insi = &si->soi_proto.pri_in; + struct inpcb *inp = (struct inpcb *)so->so_pcb; + + si->soi_kind = SOCKINFO_IN; + + insi->insi_fport = inp->inp_fport; + insi->insi_lport = inp->inp_lport; + insi->insi_gencnt = inp->inp_gencnt; + insi->insi_flags = inp->inp_flags; + insi->insi_vflag = inp->inp_vflag; + insi->insi_ip_ttl = inp->inp_ip_ttl; + insi->insi_faddr.ina_6 = inp->inp_dependfaddr.inp6_foreign; + insi->insi_laddr.ina_6 = inp->inp_dependladdr.inp6_local; + insi->insi_v4.in4_tos = inp->inp_depend4.inp4_ip_tos; + insi->insi_v6.in6_hlim = 0; + insi->insi_v6.in6_cksum = inp->inp_depend6.inp6_cksum; + insi->insi_v6.in6_ifindex = 0; + insi->insi_v6.in6_hops = inp->inp_depend6.inp6_hops; + + if (type == SOCK_STREAM && (protocol == 0 || + protocol == IPPROTO_TCP) && inp->inp_ppcb != NULL) { + struct tcp_sockinfo *tcpsi = &si->soi_proto.pri_tcp; + struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb; + + si->soi_kind = SOCKINFO_TCP; + + tcpsi->tcpsi_state = tp->t_state; + tcpsi->tcpsi_timer[TCPT_REXMT] = + tp->t_timer[TCPT_REXMT]; + tcpsi->tcpsi_timer[TCPT_PERSIST] = + tp->t_timer[TCPT_PERSIST]; + tcpsi->tcpsi_timer[TCPT_KEEP] = tp->t_timer[TCPT_KEEP]; + tcpsi->tcpsi_timer[TCPT_2MSL] = tp->t_timer[TCPT_2MSL]; + tcpsi->tcpsi_mss = tp->t_maxseg; + tcpsi->tcpsi_flags = tp->t_flags; + tcpsi->tcpsi_tp = + (u_int64_t)VM_KERNEL_ADDRPERM(tp); + } + break; + } + case PF_UNIX: { + struct unpcb *unp = (struct unpcb *)so->so_pcb; + struct un_sockinfo *unsi = &si->soi_proto.pri_un; + + si->soi_kind = SOCKINFO_UN; + + unsi->unsi_conn_pcb = + (uint64_t)VM_KERNEL_ADDRPERM(unp->unp_conn); + if (unp->unp_conn) + unsi->unsi_conn_so = (uint64_t) + VM_KERNEL_ADDRPERM(unp->unp_conn->unp_socket); + + if (unp->unp_addr) { + size_t addrlen = unp->unp_addr->sun_len; + + if (addrlen > SOCK_MAXADDRLEN) + addrlen = SOCK_MAXADDRLEN; + bcopy(unp->unp_addr, &unsi->unsi_addr, addrlen); } - case AF_UNIX: { - struct unpcb *unp = (struct unpcb *)so->so_pcb; - struct un_sockinfo *unsi = &si->soi_proto.pri_un; - - si->soi_kind = SOCKINFO_UN; - - unsi->unsi_conn_pcb = - (uint64_t)VM_KERNEL_ADDRPERM(unp->unp_conn); - if (unp->unp_conn) - unsi->unsi_conn_so = (uint64_t) - VM_KERNEL_ADDRPERM(unp->unp_conn->unp_socket); - - if (unp->unp_addr) { - size_t addrlen = unp->unp_addr->sun_len; - - if (addrlen > SOCK_MAXADDRLEN) - addrlen = SOCK_MAXADDRLEN; - bcopy(unp->unp_addr, &unsi->unsi_addr, addrlen); - } - if (unp->unp_conn && unp->unp_conn->unp_addr) { - size_t addrlen = unp->unp_conn->unp_addr->sun_len; - - if (addrlen > SOCK_MAXADDRLEN) - addrlen = SOCK_MAXADDRLEN; - bcopy(unp->unp_conn->unp_addr, &unsi->unsi_caddr, addrlen); - } - break; + if (unp->unp_conn && unp->unp_conn->unp_addr) { + size_t addrlen = unp->unp_conn->unp_addr->sun_len; + + if (addrlen > SOCK_MAXADDRLEN) + addrlen = SOCK_MAXADDRLEN; + bcopy(unp->unp_conn->unp_addr, &unsi->unsi_caddr, + addrlen); } - case AF_NDRV: { - struct ndrv_cb *ndrv_cb = (struct ndrv_cb *)so->so_pcb; - struct ndrv_info *ndrvsi = &si->soi_proto.pri_ndrv; - - si->soi_kind = SOCKINFO_NDRV; - - /* TDB lock ifnet ???? */ - if (ndrv_cb->nd_if != 0) { - struct ifnet *ifp = ndrv_cb->nd_if; - - ndrvsi->ndrvsi_if_family = ifp->if_family; - ndrvsi->ndrvsi_if_unit = ifp->if_unit; - strlcpy(ndrvsi->ndrvsi_if_name, ifp->if_name, IFNAMSIZ); - } - - break; + break; + } + case PF_NDRV: { + struct ndrv_cb *ndrv_cb = (struct ndrv_cb *)so->so_pcb; + struct ndrv_info *ndrvsi = &si->soi_proto.pri_ndrv; + + si->soi_kind = SOCKINFO_NDRV; + + /* TDB lock ifnet ???? */ + if (ndrv_cb->nd_if != 0) { + struct ifnet *ifp = ndrv_cb->nd_if; + + ndrvsi->ndrvsi_if_family = ifp->if_family; + ndrvsi->ndrvsi_if_unit = ifp->if_unit; + strlcpy(ndrvsi->ndrvsi_if_name, ifp->if_name, IFNAMSIZ); } - case AF_SYSTEM: - if (so->so_proto->pr_protocol == SYSPROTO_EVENT) { - struct kern_event_pcb *ev_pcb = (struct kern_event_pcb *)so->so_pcb; - struct kern_event_info *kesi = &si->soi_proto.pri_kern_event; - - si->soi_kind = SOCKINFO_KERN_EVENT; - - kesi->kesi_vendor_code_filter = ev_pcb->vendor_code_filter; - kesi->kesi_class_filter = ev_pcb->class_filter; - kesi->kesi_subclass_filter = ev_pcb->subclass_filter; - - } else if (so->so_proto->pr_protocol == SYSPROTO_CONTROL) { - struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; - struct kern_ctl_info *kcsi = &si->soi_proto.pri_kern_ctl; - struct kctl *kctl = kcb->kctl; - - - si->soi_kind = SOCKINFO_KERN_CTL; - - if (kctl == 0) - break; - kcsi->kcsi_id = kctl->id; - kcsi->kcsi_reg_unit = kctl->id; - kcsi->kcsi_flags = kctl->flags; - kcsi->kcsi_recvbufsize = kctl->recvbufsize; - kcsi->kcsi_sendbufsize = kctl->sendbufsize; - kcsi->kcsi_unit = kcb->unit; - strlcpy(kcsi->kcsi_name, kctl->name, MAX_KCTL_NAME); - } - break; - - case AF_APPLETALK: - break; - - case AF_ROUTE: - break; - - case AF_PPP: - break; - - default: - break; + break; } -out: + case PF_SYSTEM: + if (SOCK_PROTO(so) == SYSPROTO_EVENT) { + struct kern_event_pcb *ev_pcb = + (struct kern_event_pcb *)so->so_pcb; + struct kern_event_info *kesi = + &si->soi_proto.pri_kern_event; + + si->soi_kind = SOCKINFO_KERN_EVENT; + + kesi->kesi_vendor_code_filter = + ev_pcb->evp_vendor_code_filter; + kesi->kesi_class_filter = ev_pcb->evp_class_filter; + kesi->kesi_subclass_filter = ev_pcb->evp_subclass_filter; + + } else if (SOCK_PROTO(so) == SYSPROTO_CONTROL) { + struct ctl_cb *kcb = (struct ctl_cb *)so->so_pcb; + struct kern_ctl_info *kcsi = + &si->soi_proto.pri_kern_ctl; + struct kctl *kctl = kcb->kctl; + + si->soi_kind = SOCKINFO_KERN_CTL; + + if (kctl == 0) + break; + kcsi->kcsi_id = kctl->id; + kcsi->kcsi_reg_unit = kctl->id; + kcsi->kcsi_flags = kctl->flags; + kcsi->kcsi_recvbufsize = kctl->recvbufsize; + kcsi->kcsi_sendbufsize = kctl->sendbufsize; + kcsi->kcsi_unit = kcb->unit; + strlcpy(kcsi->kcsi_name, kctl->name, MAX_KCTL_NAME); + } + break; + + case PF_ROUTE: + case PF_PPP: + default: + break; + } +out: socket_unlock(so, 0); - - return error; -} + return (error); +} diff --git a/bsd/kern/subr_log.c b/bsd/kern/subr_log.c index 2cd5c3ac2..0f8c00b36 100644 --- a/bsd/kern/subr_log.c +++ b/bsd/kern/subr_log.c @@ -84,7 +84,6 @@ #include /* XXX should be in a common header somewhere */ -extern void klogwakeup(void); extern void logwakeup(void); #define LOG_RDPRI (PZERO + 1) @@ -93,6 +92,7 @@ extern void logwakeup(void); #define LOG_ASYNC 0x04 #define LOG_RDWAIT 0x08 +#define MAX_UNREAD_CHARS (CONFIG_MSG_BSIZE/2) /* All globals should be accessed under LOG_LOCK() */ /* logsoftc only valid while log_open=1 */ @@ -105,8 +105,7 @@ struct logsoftc { int log_open; /* also used in log() */ char smsg_bufc[CONFIG_MSG_BSIZE]; /* static buffer */ struct msgbuf msgbuf = {MSG_MAGIC,sizeof(smsg_bufc),0,0,smsg_bufc}; -struct msgbuf *msgbufp = &msgbuf; -static int logentrypend = 0; +struct msgbuf *msgbufp __attribute__((used)) = &msgbuf; /* the following are implemented in osfmk/kern/printf.c */ extern void bsd_log_lock(void); @@ -189,8 +188,7 @@ logread(__unused dev_t dev, struct uio *uio, int flag) logsoftc.sc_state |= LOG_RDWAIT; LOG_UNLOCK(); /* - * If the wakeup is missed the ligtening bolt will wake this up - * if there are any new characters. If that doesn't do it + * If the wakeup is missed * then wait for 5 sec and reevaluate */ if ((error = tsleep((caddr_t)msgbufp, LOG_RDPRI | PCATCH, @@ -275,18 +273,6 @@ logwakeup(void) LOG_UNLOCK(); } -void -klogwakeup(void) -{ - LOG_LOCK(); - if (logentrypend && log_open) { - logentrypend = 0; /* only reset if someone will be reading */ - LOG_UNLOCK(); - logwakeup(); - } else { - LOG_UNLOCK(); - } -} /*ARGSUSED*/ int @@ -364,7 +350,6 @@ log_putc_locked(char c) mbp = msgbufp; mbp->msg_bufc[mbp->msg_bufx++] = c; - logentrypend = 1; if (mbp->msg_bufx >= msgbufp->msg_size) mbp->msg_bufx = 0; } @@ -386,9 +371,16 @@ log_putc_locked(char c) void log_putc(char c) { + int unread_count = 0; LOG_LOCK(); log_putc_locked(c); + unread_count = msgbufp->msg_bufx - msgbufp->msg_bufr; LOG_UNLOCK(); + + if (unread_count < 0) + unread_count = 0 - unread_count; + if (c == '\n' || unread_count >= MAX_UNREAD_CHARS) + logwakeup(); } diff --git a/bsd/kern/subr_prf.c b/bsd/kern/subr_prf.c index 3d4e18b60..5bb098bbb 100644 --- a/bsd/kern/subr_prf.c +++ b/bsd/kern/subr_prf.c @@ -446,7 +446,6 @@ vprintf(const char *fmt, va_list ap) return 0; } -#if !CONFIG_EMBEDDED /* * Scaled down version of vsprintf(3). @@ -469,7 +468,6 @@ vsprintf(char *buf, const char *cfmt, va_list ap) } return 0; } -#endif /* !CONFIG_EMBEDDED */ /* * Scaled down version of snprintf(3). diff --git a/bsd/kern/sys_domain.c b/bsd/kern/sys_domain.c index 66af22826..d28648810 100644 --- a/bsd/kern/sys_domain.c +++ b/bsd/kern/sys_domain.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -31,25 +31,31 @@ */ #include -#include #include #include -#include +#include #include +struct domain *systemdomain = NULL; /* domain init function */ -void systemdomain_init(void) __attribute__((section("__TEXT, initcode"))); +static void systemdomain_init(struct domain *); -struct domain systemdomain = - { PF_SYSTEM, "system", systemdomain_init, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, { 0, 0} }; +struct domain systemdomain_s = { + .dom_family = PF_SYSTEM, + .dom_name = "system", + .dom_init = systemdomain_init, +}; - -void -systemdomain_init(void) +static void +systemdomain_init(struct domain *dp) { - /* add system domain built in protocol initializers here */ + VERIFY(!(dp->dom_flags & DOM_INITIALIZED)); + VERIFY(systemdomain == NULL); + + systemdomain = dp; - kern_event_init(); - kern_control_init(); + /* add system domain built in protocol initializers here */ + kern_event_init(dp); + kern_control_init(dp); } diff --git a/bsd/kern/sys_generic.c b/bsd/kern/sys_generic.c index cc950bd84..f41cd0018 100644 --- a/bsd/kern/sys_generic.c +++ b/bsd/kern/sys_generic.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -94,6 +94,7 @@ #include #include #include +#include #include #include @@ -103,8 +104,12 @@ #include #include #include +#if CONFIG_TELEMETRY +#include +#endif #include +#include #include #include #include @@ -134,6 +139,8 @@ #include #include +#include + /* XXX should be in a header file somewhere */ void evsofree(struct socket *); void evpipefree(struct pipe *); @@ -159,6 +166,10 @@ __private_extern__ void donefileread(struct proc *p, struct fileproc *fp_ret, in /* Conflict wait queue for when selects collide (opaque type) */ struct wait_queue select_conflict_queue; +#if 13841988 +int temp_debug_13841988 = 0; +#endif + /* * Init routine called from bsd_init.c */ @@ -167,15 +178,15 @@ void select_wait_queue_init(void) { wait_queue_init(&select_conflict_queue, SYNC_POLICY_FIFO); +#if 13841988 + if (PE_parse_boot_argn("temp_debug_13841988", &temp_debug_13841988, sizeof(temp_debug_13841988))) { + kprintf("Temporary debugging for 13841988 enabled\n"); + } +#endif } - -#if NETAT -extern int appletalk_inited; -#endif /* NETAT */ - #define f_flag f_fglob->fg_flag -#define f_type f_fglob->fg_type +#define f_type f_fglob->fg_ops->fo_type #define f_msgcount f_fglob->fg_msgcount #define f_cred f_fglob->fg_cred #define f_ops f_fglob->fg_ops @@ -272,9 +283,6 @@ void donefileread(struct proc *p, struct fileproc *fp, int fd) { proc_fdlock_spin(p); - - fp->f_flags &= ~FP_INCHRREAD; - fp_drop(p, fd, fp, 1); proc_fdunlock(p); } @@ -323,8 +331,6 @@ preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_ error = ENXIO; goto out; } - if (vp->v_type == VCHR) - fp->f_flags |= FP_INCHRREAD; } *fp_ret = fp; @@ -429,7 +435,10 @@ readv_nocancel(struct proc *p, struct readv_nocancel_args *uap, user_ssize_t *re /* finalize uio_t for use and do the IO */ - uio_calculateresid(auio); + error = uio_calculateresid(auio); + if (error) { + goto ExitThisRoutine; + } error = rd_uio(p, uap->fd, auio, retval); ExitThisRoutine: @@ -645,7 +654,10 @@ writev_nocancel(struct proc *p, struct writev_nocancel_args *uap, user_ssize_t * /* finalize uio_t for use and do the IO */ - uio_calculateresid(auio); + error = uio_calculateresid(auio); + if (error) { + goto ExitThisRoutine; + } error = wr_uio(p, uap->fd, auio, retval); ExitThisRoutine: @@ -741,16 +753,16 @@ rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval) int ioctl(struct proc *p, struct ioctl_args *uap, __unused int32_t *retval) { - struct fileproc *fp; - u_long com; + struct fileproc *fp = NULL; int error = 0; - u_int size; - caddr_t datap, memp; - boolean_t is64bit; - int tmp; + u_int size = 0; + caddr_t datap = NULL, memp = NULL; + boolean_t is64bit = FALSE; + int tmp = 0; #define STK_PARAMS 128 char stkbuf[STK_PARAMS]; int fd = uap->fd; + u_long com = uap->com; struct vfs_context context = *vfs_context_current(); AUDIT_ARG(fd, uap->fd); @@ -759,99 +771,29 @@ ioctl(struct proc *p, struct ioctl_args *uap, __unused int32_t *retval) is64bit = proc_is64bit(p); #if CONFIG_AUDIT if (is64bit) - AUDIT_ARG(value64, uap->com); + AUDIT_ARG(value64, com); else - AUDIT_ARG(cmd, CAST_DOWN_EXPLICIT(int, uap->com)); + AUDIT_ARG(cmd, CAST_DOWN_EXPLICIT(int, com)); #endif /* CONFIG_AUDIT */ - proc_fdlock(p); - error = fp_lookup(p,fd,&fp,1); - if (error) { - proc_fdunlock(p); - return(error); - } - - AUDIT_ARG(file, p, fp); - - if ((fp->f_flag & (FREAD | FWRITE)) == 0) { - error = EBADF; - goto out; - } - - context.vc_ucred = fp->f_fglob->fg_cred; - -#if CONFIG_MACF - error = mac_file_check_ioctl(context.vc_ucred, fp->f_fglob, uap->com); - if (error) - goto out; -#endif - -#if NETAT - /* - * ### LD 6/11/97 Hack Alert: this is to get AppleTalk to work - * while implementing an ATioctl system call - */ - { - if (appletalk_inited && ((uap->com & 0x0000FFFF) == 0xff99)) { - u_long fixed_command; - -#ifdef APPLETALK_DEBUG - kprintf("ioctl: special AppleTalk \n"); -#endif - datap = &stkbuf[0]; - *(user_addr_t *)datap = uap->data; - fixed_command = _IOW(0, 0xff99, uap->data); - error = fo_ioctl(fp, fixed_command, datap, &context); - goto out; - } - } - -#endif /* NETAT */ - - - switch (com = uap->com) { - case FIONCLEX: - *fdflags(p, uap->fd) &= ~UF_EXCLOSE; - error =0; - goto out; - case FIOCLEX: - *fdflags(p, uap->fd) |= UF_EXCLOSE; - error =0; - goto out; - } - /* * Interpret high order word to find amount of data to be * copied to/from the user's address space. */ size = IOCPARM_LEN(com); - if (size > IOCPARM_MAX) { - error = ENOTTY; - goto out; - } - memp = NULL; + if (size > IOCPARM_MAX) + return ENOTTY; if (size > sizeof (stkbuf)) { - proc_fdunlock(p); - if ((memp = (caddr_t)kalloc(size)) == 0) { - proc_fdlock(p); - error = ENOMEM; - goto out; - } - proc_fdlock(p); + if ((memp = (caddr_t)kalloc(size)) == 0) + return ENOMEM; datap = memp; } else datap = &stkbuf[0]; - if (com&IOC_IN) { + if (com & IOC_IN) { if (size) { - proc_fdunlock(p); error = copyin(uap->data, datap, size); - if (error) { - if (memp) - kfree(memp, size); - proc_fdlock(p); - goto out; - } - proc_fdlock(p); + if (error) + goto out_nofp; } else { /* XXX - IOC_IN and no size? we should proably return an error here!! */ if (is64bit) { @@ -861,13 +803,13 @@ ioctl(struct proc *p, struct ioctl_args *uap, __unused int32_t *retval) *(uint32_t *)datap = (uint32_t)uap->data; } } - } else if ((com&IOC_OUT) && size) + } else if ((com & IOC_OUT) && size) /* * Zero the buffer so the user always * gets back something deterministic. */ bzero(datap, size); - else if (com&IOC_VOID) { + else if (com & IOC_VOID) { /* XXX - this is odd since IOC_VOID means no parameters */ if (is64bit) { *(user_addr_t *)datap = uap->data; @@ -877,7 +819,36 @@ ioctl(struct proc *p, struct ioctl_args *uap, __unused int32_t *retval) } } + proc_fdlock(p); + error = fp_lookup(p,fd,&fp,1); + if (error) { + proc_fdunlock(p); + goto out_nofp; + } + + AUDIT_ARG(file, p, fp); + + if ((fp->f_flag & (FREAD | FWRITE)) == 0) { + error = EBADF; + goto out; + } + + context.vc_ucred = fp->f_fglob->fg_cred; + +#if CONFIG_MACF + error = mac_file_check_ioctl(context.vc_ucred, fp->f_fglob, com); + if (error) + goto out; +#endif + switch (com) { + case FIONCLEX: + *fdflags(p, fd) &= ~UF_EXCLOSE; + break; + + case FIOCLEX: + *fdflags(p, fd) |= UF_EXCLOSE; + break; case FIONBIO: if ( (tmp = *(int *)datap) ) @@ -899,7 +870,6 @@ ioctl(struct proc *p, struct ioctl_args *uap, __unused int32_t *retval) tmp = *(int *)datap; if (fp->f_type == DTYPE_SOCKET) { ((struct socket *)fp->f_data)->so_pgid = tmp; - error = 0; break; } if (fp->f_type == DTYPE_PIPE) { @@ -922,7 +892,6 @@ ioctl(struct proc *p, struct ioctl_args *uap, __unused int32_t *retval) case FIOGETOWN: if (fp->f_type == DTYPE_SOCKET) { - error = 0; *(int *)datap = ((struct socket *)fp->f_data)->so_pgid; break; } @@ -936,17 +905,17 @@ ioctl(struct proc *p, struct ioctl_args *uap, __unused int32_t *retval) * Copy any data to user, size was * already set and checked above. */ - if (error == 0 && (com&IOC_OUT) && size) + if (error == 0 && (com & IOC_OUT) && size) error = copyout(datap, uap->data, (u_int)size); break; } - proc_fdunlock(p); - if (memp) - kfree(memp, size); - proc_fdlock(p); out: fp_drop(p, fd, fp, 1); proc_fdunlock(p); + +out_nofp: + if (memp) + kfree(memp, size); return(error); } @@ -990,6 +959,7 @@ select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retva th_act = current_thread(); uth = get_bsdthread_info(th_act); sel = &uth->uu_select; + sel->data = &uth->uu_kevent.ss_select_data; retval = (int *)get_bsduthreadrval(th_act); *retval = 0; @@ -1084,16 +1054,16 @@ select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retva } clock_absolutetime_interval_to_deadline( - tvtoabstime(&atv), &sel->abstime); + tvtoabstime(&atv), &sel->data->abstime); } else - sel->abstime = 0; + sel->data->abstime = 0; if ( (error = selcount(p, sel->ibits, uap->nd, &count)) ) { goto continuation; } - sel->count = count; + sel->data->count = count; size = SIZEOF_WAITQUEUE_SET + (count * SIZEOF_WAITQUEUE_LINK); if (uth->uu_allocsize) { if (uth->uu_wqset == 0) @@ -1113,7 +1083,7 @@ select_nocancel(struct proc *p, struct select_nocancel_args *uap, int32_t *retva panic("failed to allocate memory for waitqueue\n"); } bzero(uth->uu_wqset, size); - sel->wql = (char *)uth->uu_wqset + SIZEOF_WAITQUEUE_SET; + sel->data->wql = (char *)uth->uu_wqset + SIZEOF_WAITQUEUE_SET; wait_queue_set_init(uth->uu_wqset, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST)); continuation: @@ -1170,7 +1140,7 @@ selprocess(int error, int sel_pass) if ((error != 0) && (sel_pass == SEL_FIRSTPASS)) unwind = 0; - if (sel->count == 0) + if (sel->data->count == 0) unwind = 0; retry: if (error != 0) { @@ -1181,7 +1151,7 @@ retry: ncoll = nselcoll; OSBitOrAtomic(P_SELECT, &p->p_flag); /* skip scans if the select is just for timeouts */ - if (sel->count) { + if (sel->data->count) { /* * Clear out any dangling refs from prior calls; technically * there should not be any. @@ -1210,7 +1180,7 @@ retry: uint64_t now; clock_get_uptime(&now); - if (now >= sel->abstime) + if (now >= sel->data->abstime) goto done; } @@ -1225,7 +1195,7 @@ retry: * To effect a poll, the timeout argument should be * non-nil, pointing to a zero-valued timeval structure. */ - if (uap->tv && sel->abstime == 0) { + if (uap->tv && sel->data->abstime == 0) { goto done; } @@ -1238,12 +1208,13 @@ retry: OSBitAndAtomic(~((uint32_t)P_SELECT), &p->p_flag); /* if the select is just for timeout skip check */ - if (sel->count &&(sel_pass == SEL_SECONDPASS)) + if (sel->data->count &&(sel_pass == SEL_SECONDPASS)) panic("selprocess: 2nd pass assertwaiting"); /* Wait Queue Subordinate has waitqueue as first element */ - wait_result = wait_queue_assert_wait((wait_queue_t)uth->uu_wqset, - NULL, THREAD_ABORTSAFE, sel->abstime); + wait_result = wait_queue_assert_wait_with_leeway((wait_queue_t)uth->uu_wqset, + NULL, THREAD_ABORTSAFE, + TIMEOUT_URGENCY_USER_NORMAL, sel->data->abstime, 0); if (wait_result != THREAD_AWAKENED) { /* there are no preposted events */ error = tsleep1(NULL, PSOCK | PCATCH, @@ -1337,11 +1308,11 @@ selscan(struct proc *p, struct _select *sel, int nfd, int32_t *retval, } ibits = sel->ibits; obits = sel->obits; - wql = sel->wql; + wql = sel->data->wql; nw = howmany(nfd, NFDBITS); - count = sel->count; + count = sel->data->count; nc = 0; if (count) { @@ -1386,7 +1357,7 @@ selscan(struct proc *p, struct _select *sel, int nfd, int32_t *retval, context.vc_ucred = fp->f_cred; /* The select; set the bit, if true */ - if (fp->f_ops + if (fp->f_ops && fp->f_type && fo_select(fp, flag[msk], wql_ptr, &context)) { optr[fd/NFDBITS] |= (1 << (fd % NFDBITS)); n++; @@ -2161,13 +2132,19 @@ postevent(struct socket *sp, struct sockbuf *sb, int event) */ case EV_RWBYTES: if ((evq->ee_eventmask & EV_RE) && soreadable(sp)) { - if (sp->so_error) { - if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) { - if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) || - (tp->t_state == TCPS_CLOSED)) { - mask |= EV_RE|EV_RESET; - break; - } + /* for AFP/OT purposes; may go away in future */ + if ((SOCK_DOM(sp) == PF_INET || + SOCK_DOM(sp) == PF_INET6) && + SOCK_PROTO(sp) == IPPROTO_TCP && + (sp->so_error == ECONNREFUSED || + sp->so_error == ECONNRESET)) { + if (sp->so_pcb == NULL || + sotoinpcb(sp)->inp_state == + INPCB_STATE_DEAD || + (tp = sototcpcb(sp)) == NULL || + tp->t_state == TCPS_CLOSED) { + mask |= EV_RE|EV_RESET; + break; } } mask |= EV_RE; @@ -2179,13 +2156,19 @@ postevent(struct socket *sp, struct sockbuf *sb, int event) } } if ((evq->ee_eventmask & EV_WR) && sowriteable(sp)) { - if (sp->so_error) { - if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) { - if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) || - (tp->t_state == TCPS_CLOSED)) { - mask |= EV_WR|EV_RESET; - break; - } + /* for AFP/OT purposes; may go away in future */ + if ((SOCK_DOM(sp) == PF_INET || + SOCK_DOM(sp) == PF_INET6) && + SOCK_PROTO(sp) == IPPROTO_TCP && + (sp->so_error == ECONNREFUSED || + sp->so_error == ECONNRESET)) { + if (sp->so_pcb == NULL || + sotoinpcb(sp)->inp_state == + INPCB_STATE_DEAD || + (tp = sototcpcb(sp)) == NULL || + tp->t_state == TCPS_CLOSED) { + mask |= EV_WR|EV_RESET; + break; } } mask |= EV_WR; @@ -2774,6 +2757,7 @@ waitevent_close(struct proc *p, struct fileproc *fp) * * Parameters: uuid_buf Pointer to buffer to receive UUID * timeout Timespec for timout + * spi SPI, skip sandbox check (temporary) * * Returns: 0 Success * EWOULDBLOCK Timeout is too short @@ -2790,6 +2774,18 @@ gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused int32_t *retv mach_timespec_t mach_ts; /* for IOKit call */ __darwin_uuid_t uuid_kern; /* for IOKit call */ + if (!uap->spi) { +#if 13841988 + uint32_t flags; + if (temp_debug_13841988 && (0 == proc_get_darwinbgstate(p->task, &flags)) && (flags & PROC_FLAG_IOS_APPLICATION)) { + printf("Unauthorized access to gethostuuid() by %s(%d)\n", p->p_comm, proc_pid(p)); + return (EPERM); + } +#else + /* Perform sandbox check */ +#endif + } + /* Convert the 32/64 bit timespec into a mach_timespec_t */ if ( proc_is64bit(p) ) { struct user64_timespec ts; @@ -2832,6 +2828,9 @@ gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused int32_t *retv int ledger(struct proc *p, struct ledger_args *args, __unused int32_t *retval) { +#if !CONFIG_MACF +#pragma unused(p) +#endif int rval, pid, len, error; #ifdef LEDGER_DEBUG struct ledger_limit_args lla; @@ -2876,7 +2875,7 @@ ledger(struct proc *p, struct ledger_args *args, __unused int32_t *retval) switch (args->cmd) { #ifdef LEDGER_DEBUG case LEDGER_LIMIT: { - if (!is_suser()) + if (!kauth_cred_issuser(kauth_cred_get())) rval = EPERM; rval = ledger_limit(task, &lla); proc_rele(proc); @@ -2898,7 +2897,7 @@ ledger(struct proc *p, struct ledger_args *args, __unused int32_t *retval) void *buf; int sz; - rval = ledger_entry_info(task, &buf, &len); + rval = ledger_get_task_entry_info_multiple(task, &buf, &len); proc_rele(proc); if ((rval == 0) && (len > 0)) { sz = len * sizeof (struct ledger_entry_info); @@ -2931,3 +2930,22 @@ ledger(struct proc *p, struct ledger_args *args, __unused int32_t *retval) return (rval); } + +#if CONFIG_TELEMETRY +int +telemetry(__unused struct proc *p, struct telemetry_args *args, __unused int32_t *retval) +{ + int error = 0; + + switch (args->cmd) { + case TELEMETRY_CMD_TIMER_EVENT: + error = telemetry_timer_event(args->deadline, args->interval, args->leeway); + break; + default: + error = EINVAL; + break; + } + + return (error); +} +#endif /* CONFIG_TELEMETRY */ diff --git a/bsd/kern/sys_pipe.c b/bsd/kern/sys_pipe.c index 9aa8ac04c..83ea24a35 100644 --- a/bsd/kern/sys_pipe.c +++ b/bsd/kern/sys_pipe.c @@ -148,7 +148,6 @@ #include #define f_flag f_fglob->fg_flag -#define f_type f_fglob->fg_type #define f_msgcount f_fglob->fg_msgcount #define f_cred f_fglob->fg_cred #define f_ops f_fglob->fg_ops @@ -171,14 +170,16 @@ static int pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx); static int pipe_drain(struct fileproc *fp,vfs_context_t ctx); -struct fileops pipeops = - { pipe_read, - pipe_write, - pipe_ioctl, - pipe_select, - pipe_close, - pipe_kqfilter, - pipe_drain }; +static const struct fileops pipeops = { + DTYPE_PIPE, + pipe_read, + pipe_write, + pipe_ioctl, + pipe_select, + pipe_close, + pipe_kqfilter, + pipe_drain +}; static void filt_pipedetach(struct knote *kn); static int filt_piperead(struct knote *kn, long hint); @@ -200,7 +201,7 @@ static int nbigpipe; /* for compatibility sake. no longer used */ static int amountpipes; /* total number of pipes in system */ static int amountpipekva; /* total memory used by pipes */ -int maxpipekva = PIPE_KVAMAX; /* allowing 16MB max. */ +int maxpipekva __attribute__((used)) = PIPE_KVAMAX; /* allowing 16MB max. */ #if PIPE_SYSCTLS SYSCTL_DECL(_kern_ipc); @@ -432,7 +433,6 @@ pipe(proc_t p, __unused struct pipe_args *uap, int32_t *retval) * this is what we've always supported.. */ rf->f_flag = FREAD; - rf->f_type = DTYPE_PIPE; rf->f_data = (caddr_t)rpipe; rf->f_ops = &pipeops; @@ -442,7 +442,6 @@ pipe(proc_t p, __unused struct pipe_args *uap, int32_t *retval) goto freepipes; } wf->f_flag = FWRITE; - wf->f_type = DTYPE_PIPE; wf->f_data = (caddr_t)wpipe; wf->f_ops = &pipeops; @@ -1327,17 +1326,18 @@ pipeclose(struct pipe *cpipe) * free resources */ if (PIPE_MTX(cpipe) != NULL) { - if (ppipe != NULL) { - /* + if (ppipe != NULL) { + /* * since the mutex is shared and the peer is still * alive, we need to release the mutex, not free it */ - PIPE_UNLOCK(cpipe); + PIPE_UNLOCK(cpipe); } else { - /* + /* * peer is gone, so we're the sole party left with - * interest in this mutex... we can just free it + * interest in this mutex... unlock and free it */ + PIPE_UNLOCK(cpipe); lck_mtx_free(PIPE_MTX(cpipe), pipe_mtx_grp); } } @@ -1579,8 +1579,8 @@ fill_pipeinfo(struct pipe * cpipe, struct pipe_info * pinfo) * XXX (st_dev, st_ino) should be unique. */ - pinfo->pipe_handle = (uint64_t)((uintptr_t)cpipe); - pinfo->pipe_peerhandle = (uint64_t)((uintptr_t)(cpipe->pipe_peer)); + pinfo->pipe_handle = (uint64_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe); + pinfo->pipe_peerhandle = (uint64_t)VM_KERNEL_ADDRPERM((uintptr_t)(cpipe->pipe_peer)); pinfo->pipe_status = cpipe->pipe_state; PIPE_UNLOCK(cpipe); diff --git a/bsd/kern/sys_socket.c b/bsd/kern/sys_socket.c index d06b9cb9c..11df996b6 100644 --- a/bsd/kern/sys_socket.c +++ b/bsd/kern/sys_socket.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -97,15 +97,15 @@ static int soo_write(struct fileproc *, struct uio *, int, vfs_context_t ctx); static int soo_close(struct fileglob *, vfs_context_t ctx); static int soo_drain(struct fileproc *, vfs_context_t ctx); -/* TODO: these should be in header file */ -extern int soo_ioctl(struct fileproc *, u_long, caddr_t, vfs_context_t ctx); -extern int soo_stat(struct socket *, void *, int); -extern int soo_select(struct fileproc *, int, void *, vfs_context_t ctx); -extern int soo_kqfilter(struct fileproc *, struct knote *, vfs_context_t ctx); - -struct fileops socketops = { - soo_read, soo_write, soo_ioctl, soo_select, soo_close, - soo_kqfilter, soo_drain +const struct fileops socketops = { + DTYPE_SOCKET, + soo_read, + soo_write, + soo_ioctl, + soo_select, + soo_close, + soo_kqfilter, + soo_drain }; /* ARGSUSED */ @@ -137,7 +137,6 @@ soo_read(struct fileproc *fp, struct uio *uio, __unused int flags, return (error); #endif /* CONFIG_MACF_SOCKET */ -//###LD will have to change fsoreceive = so->so_proto->pr_usrreqs->pru_soreceive; stat = (*fsoreceive)(so, 0, uio, 0, 0, 0); @@ -188,20 +187,33 @@ __private_extern__ int soioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) { int error = 0; - int dropsockref = -1; int int_arg; socket_lock(so, 1); - /* Call the socket filter's ioctl handler for most ioctls */ + /* call the socket filter's ioctl handler anything but ours */ if (IOCGROUP(cmd) != 'i' && IOCGROUP(cmd) != 'r') { - error = sflt_ioctl(so, cmd, data); - if (error != 0) - goto out; + switch (cmd) { + case SIOCGASSOCIDS32: + case SIOCGASSOCIDS64: + case SIOCGCONNIDS32: + case SIOCGCONNIDS64: + case SIOCGCONNINFO32: + case SIOCGCONNINFO64: + case SIOCSCONNORDER: + case SIOCGCONNORDER: + /* don't pass to filter */ + break; + + default: + error = sflt_ioctl(so, cmd, data); + if (error != 0) + goto out; + break; + } } switch (cmd) { - case FIONBIO: /* int */ bcopy(data, &int_arg, sizeof (int_arg)); if (int_arg) @@ -241,77 +253,23 @@ soioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) bcopy(&int_arg, data, sizeof (int_arg)); goto out; - case SIOCSETOT: { /* int */ - /* - * Set socket level options here and then call protocol - * specific routine. - */ - struct socket *cloned_so = NULL; - int cloned_fd; - - bcopy(data, &cloned_fd, sizeof (cloned_fd)); - - /* let's make sure it's either -1 or a valid file descriptor */ - if (cloned_fd != -1) { - error = file_socket(cloned_fd, &cloned_so); - if (error) { - goto out; - } - dropsockref = cloned_fd; - } - - /* Always set socket non-blocking for OT */ - so->so_state |= SS_NBIO; - so->so_options |= SO_DONTTRUNC | SO_WANTMORE; - so->so_flags |= SOF_NOSIGPIPE | SOF_NPX_SETOPTSHUT; - - if (cloned_so && so != cloned_so) { - /* Flags options */ - so->so_options |= - cloned_so->so_options & ~SO_ACCEPTCONN; - - /* SO_LINGER */ - if (so->so_options & SO_LINGER) - so->so_linger = cloned_so->so_linger; - - /* SO_SNDBUF, SO_RCVBUF */ - if (cloned_so->so_snd.sb_hiwat > 0) { - if (sbreserve(&so->so_snd, - cloned_so->so_snd.sb_hiwat) == 0) { - error = ENOBUFS; - goto out; - } - } - if (cloned_so->so_rcv.sb_hiwat > 0) { - if (sbreserve(&so->so_rcv, - cloned_so->so_rcv.sb_hiwat) == 0) { - error = ENOBUFS; - goto out; - } - } - - /* SO_SNDLOWAT, SO_RCVLOWAT */ - so->so_snd.sb_lowat = - (cloned_so->so_snd.sb_lowat > so->so_snd.sb_hiwat) ? - so->so_snd.sb_hiwat : cloned_so->so_snd.sb_lowat; - so->so_rcv.sb_lowat = - (cloned_so->so_rcv.sb_lowat > so->so_rcv.sb_hiwat) ? - so->so_rcv.sb_hiwat : cloned_so->so_rcv.sb_lowat; - - /* SO_SNDTIMEO, SO_RCVTIMEO */ - so->so_snd.sb_timeo = cloned_so->so_snd.sb_timeo; - so->so_rcv.sb_timeo = cloned_so->so_rcv.sb_timeo; - } - - error = (*so->so_proto->pr_usrreqs->pru_control)(so, cmd, - data, 0, p); - /* Just ignore protocols that do not understand it */ - if (error == EOPNOTSUPP) - error = 0; + case SIOCSETOT: /* int; deprecated */ + error = EOPNOTSUPP; + goto out; + case SIOCGASSOCIDS32: /* so_aidreq32 */ + case SIOCGASSOCIDS64: /* so_aidreq64 */ + case SIOCGCONNIDS32: /* so_cidreq32 */ + case SIOCGCONNIDS64: /* so_cidreq64 */ + case SIOCGCONNINFO32: /* so_cinforeq32 */ + case SIOCGCONNINFO64: /* so_cinforeq64 */ + case SIOCSCONNORDER: /* so_cordreq */ + case SIOCGCONNORDER: /* so_cordreq */ + error = (*so->so_proto->pr_usrreqs->pru_control)(so, + cmd, data, NULL, p); goto out; } - } + /* * Interface/routing/protocol specific ioctls: * interface and routing ioctls should have a @@ -324,12 +282,10 @@ soioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) error = rtioctl(cmd, data, p); else error = (*so->so_proto->pr_usrreqs->pru_control)(so, - cmd, data, 0, p); + cmd, data, NULL, p); } out: - if (dropsockref != -1) - file_drop(dropsockref); socket_unlock(so, 1); if (error == EJUSTRETURN) @@ -342,7 +298,6 @@ int soo_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx) { struct socket *so; - int error; proc_t procp = vfs_context_proc(ctx); if ((so = (struct socket *)fp->f_fglob->fg_data) == NULL) { @@ -350,12 +305,7 @@ soo_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx) return (EBADF); } - error = soioctl(so, cmd, data, procp); - - if (error == 0 && cmd == SIOCSETOT) - fp->f_fglob->fg_flag |= FNONBLOCK; - - return (error); + return (soioctl(so, cmd, data, procp)); } int @@ -367,11 +317,11 @@ soo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx) if (so == NULL || so == (struct socket *)-1) return (0); - + procp = vfs_context_proc(ctx); #if CONFIG_MACF_SOCKET - if (mac_socket_check_select(vfs_context_ucred(ctx), so, which) != 0); + if (mac_socket_check_select(vfs_context_ucred(ctx), so, which) != 0) return (0); #endif /* CONFIG_MACF_SOCKET */ @@ -502,3 +452,45 @@ soo_drain(struct fileproc *fp, __unused vfs_context_t ctx) return (error); } + +/* + * 's' group ioctls. + * + * The switch statement below does nothing at runtime, as it serves as a + * compile time check to ensure that all of the socket 's' ioctls (those + * in the 's' group going thru soo_ioctl) that are made available by the + * networking stack is unique. This works as long as this routine gets + * updated each time a new interface ioctl gets added. + * + * Any failures at compile time indicates duplicated ioctl values. + */ +static __attribute__((unused)) void +soioctl_cassert(void) +{ + /* + * This is equivalent to _CASSERT() and the compiler wouldn't + * generate any instructions, thus for compile time only. + */ + switch ((u_long)0) { + case 0: + + /* bsd/sys/sockio.h */ + case SIOCSHIWAT: + case SIOCGHIWAT: + case SIOCSLOWAT: + case SIOCGLOWAT: + case SIOCATMARK: + case SIOCSPGRP: + case SIOCGPGRP: + case SIOCSETOT: + case SIOCGASSOCIDS32: + case SIOCGASSOCIDS64: + case SIOCGCONNIDS32: + case SIOCGCONNIDS64: + case SIOCGCONNINFO32: + case SIOCGCONNINFO64: + case SIOCSCONNORDER: + case SIOCGCONNORDER: + ; + } +} diff --git a/bsd/kern/syscalls.master b/bsd/kern/syscalls.master index 0a2583804..67e8898b8 100644 --- a/bsd/kern/syscalls.master +++ b/bsd/kern/syscalls.master @@ -31,6 +31,9 @@ ; N.B.: makesyscalls.sh and createsyscalls.pl must be updated to account ; for any new argument types. +; If you add a new syscall number to the end of this file, you need to +; increment the value of NUM_SYSENT in bsd/sys/sysent.h. + #include #include #include @@ -215,7 +218,7 @@ 139 AUE_FUTIMES ALL { int futimes(int fd, struct timeval *tptr); } 140 AUE_ADJTIME ALL { int adjtime(struct timeval *delta, struct timeval *olddelta); } 141 AUE_NULL ALL { int nosys(void); } { old getpeername } -142 AUE_SYSCTL ALL { int gethostuuid(unsigned char *uuid_buf, const struct timespec *timeoutp); } +142 AUE_SYSCTL ALL { int gethostuuid(unsigned char *uuid_buf, const struct timespec *timeoutp, int spi) NO_SYSCALL_STUB; } 143 AUE_NULL ALL { int nosys(void); } { old sethostid } 144 AUE_NULL ALL { int nosys(void); } { old getrlimit } 145 AUE_NULL ALL { int nosys(void); } { old setrlimit } @@ -292,25 +295,14 @@ 204 AUE_MUNLOCK ALL { int munlock(caddr_t addr, size_t len); } 205 AUE_UNDELETE ALL { int undelete(user_addr_t path); } -#if NETAT -206 AUE_ATSOCKET ALL { int ATsocket(int proto); } -207 AUE_ATGETMSG UALL { int ATgetmsg(int fd, void *ctlptr, void *datptr, int *flags); } -208 AUE_ATPUTMSG UALL { int ATputmsg(int fd, void *ctlptr, void *datptr, int flags); } -209 AUE_ATPSNDREQ UALL { int ATPsndreq(int fd, unsigned char *buf, int len, int nowait); } -210 AUE_ATPSNDRSP UALL { int ATPsndrsp(int fd, unsigned char *respbuff, int resplen, int datalen); } -211 AUE_ATPGETREQ UALL { int ATPgetreq(int fd, unsigned char *buf, int buflen); } -212 AUE_ATPGETRSP UALL { int ATPgetrsp(int fd, unsigned char *bdsp); } -213 AUE_NULL ALL { int nosys(void); } { Reserved for AppleTalk } -#else -206 AUE_NULL ALL { int nosys(void); } -207 AUE_NULL ALL { int nosys(void); } -208 AUE_NULL ALL { int nosys(void); } -209 AUE_NULL ALL { int nosys(void); } -210 AUE_NULL ALL { int nosys(void); } -211 AUE_NULL ALL { int nosys(void); } -212 AUE_NULL ALL { int nosys(void); } +206 AUE_NULL ALL { int nosys(void); } { old ATsocket } +207 AUE_NULL ALL { int nosys(void); } { old ATgetmsg } +208 AUE_NULL ALL { int nosys(void); } { old ATputmsg } +209 AUE_NULL ALL { int nosys(void); } { old ATsndreq } +210 AUE_NULL ALL { int nosys(void); } { old ATsndrsp } +211 AUE_NULL ALL { int nosys(void); } { old ATgetreq } +212 AUE_NULL ALL { int nosys(void); } { old ATgetrsp } 213 AUE_NULL ALL { int nosys(void); } { Reserved for AppleTalk } -#endif /* NETAT */ 214 AUE_NULL ALL { int nosys(void); } 215 AUE_NULL ALL { int nosys(void); } @@ -370,17 +362,17 @@ 249 AUE_NULL ALL { int nosys(void); } 250 AUE_MINHERIT ALL { int minherit(void *addr, size_t len, int inherit); } #if SYSV_SEM -251 AUE_SEMSYS ALL { int semsys(u_int which, int a2, int a3, int a4, int a5); } +251 AUE_SEMSYS ALL { int semsys(u_int which, int a2, int a3, int a4, int a5) NO_SYSCALL_STUB; } #else 251 AUE_NULL ALL { int nosys(void); } #endif #if SYSV_MSG -252 AUE_MSGSYS ALL { int msgsys(u_int which, int a2, int a3, int a4, int a5); } +252 AUE_MSGSYS ALL { int msgsys(u_int which, int a2, int a3, int a4, int a5) NO_SYSCALL_STUB; } #else 252 AUE_NULL ALL { int nosys(void); } #endif #if SYSV_SHM -253 AUE_SHMSYS ALL { int shmsys(u_int which, int a2, int a3, int a4); } +253 AUE_SHMSYS ALL { int shmsys(u_int which, int a2, int a3, int a4) NO_SYSCALL_STUB; } #else 253 AUE_NULL ALL { int nosys(void); } #endif @@ -417,7 +409,7 @@ 264 AUE_NULL ALL { int nosys(void); } 265 AUE_NULL ALL { int nosys(void); } #endif -266 AUE_SHMOPEN ALL { int shm_open(const char *name, int oflag, int mode); } +266 AUE_SHMOPEN ALL { int shm_open(const char *name, int oflag, int mode) NO_SYSCALL_STUB; } 267 AUE_SHMUNLINK ALL { int shm_unlink(const char *name); } 268 AUE_SEMOPEN ALL { user_addr_t sem_open(const char *name, int oflag, int mode, int value) NO_SYSCALL_STUB; } 269 AUE_SEMCLOSE ALL { int sem_close(sem_t *sem); } @@ -582,6 +574,7 @@ 378 AUE_NULL ALL { int nosys(void); } 379 AUE_NULL ALL { int nosys(void); } 380 AUE_MAC_EXECVE ALL { int __mac_execve(char *fname, char **argp, char **envp, struct mac *mac_p); } +#if CONFIG_MACF 381 AUE_MAC_SYSCALL ALL { int __mac_syscall(char *policy, int call, user_addr_t arg); } 382 AUE_MAC_GET_FILE ALL { int __mac_get_file(char *path_p, struct mac *mac_p); } 383 AUE_MAC_SET_FILE ALL { int __mac_set_file(char *path_p, struct mac *mac_p); } @@ -595,6 +588,21 @@ 391 AUE_MAC_GET_LCID ALL { int __mac_get_lcid(pid_t lcid, struct mac *mac_p); } 392 AUE_MAC_GET_LCTX ALL { int __mac_get_lctx(struct mac *mac_p); } 393 AUE_MAC_SET_LCTX ALL { int __mac_set_lctx(struct mac *mac_p); } +#else +381 AUE_MAC_SYSCALL ALL { int enosys(void); } +382 AUE_MAC_GET_FILE ALL { int nosys(void); } +383 AUE_MAC_SET_FILE ALL { int nosys(void); } +384 AUE_MAC_GET_LINK ALL { int nosys(void); } +385 AUE_MAC_SET_LINK ALL { int nosys(void); } +386 AUE_MAC_GET_PROC ALL { int nosys(void); } +387 AUE_MAC_SET_PROC ALL { int nosys(void); } +388 AUE_MAC_GET_FD ALL { int nosys(void); } +389 AUE_MAC_SET_FD ALL { int nosys(void); } +390 AUE_MAC_GET_PID ALL { int nosys(void); } +391 AUE_MAC_GET_LCID ALL { int nosys(void); } +392 AUE_MAC_GET_LCTX ALL { int nosys(void); } +393 AUE_MAC_SET_LCTX ALL { int nosys(void); } +#endif 394 AUE_SETLCID ALL { int setlcid(pid_t pid, pid_t lcid) NO_SYSCALL_STUB; } 395 AUE_GETLCID ALL { int getlcid(pid_t pid) NO_SYSCALL_STUB; } 396 AUE_NULL ALL { user_ssize_t read_nocancel(int fd, user_addr_t cbuf, user_size_t nbyte) NO_SYSCALL_STUB; } @@ -650,7 +658,11 @@ 423 AUE_SEMWAITSIGNAL ALL { int __semwait_signal_nocancel(int cond_sem, int mutex_sem, int timeout, int relative, int64_t tv_sec, int32_t tv_nsec); } ;#endif 424 AUE_MAC_MOUNT ALL { int __mac_mount(char *type, char *path, int flags, caddr_t data, struct mac *mac_p); } +#if CONFIG_MACF 425 AUE_MAC_GET_MOUNT ALL { int __mac_get_mount(char *path, struct mac *mac_p); } +#else +425 AUE_MAC_GET_MOUNT ALL { int nosys(void); } +#endif 426 AUE_MAC_GETFSSTAT ALL { int __mac_getfsstat(user_addr_t buf, int bufsize, user_addr_t mac, int macsize, int flags); } 427 AUE_FSGETPATH ALL { user_ssize_t fsgetpath(user_addr_t buf, size_t bufsize, user_addr_t fsid, uint64_t objid) NO_SYSCALL_STUB; } { private fsgetpath (File Manager SPI) } 428 AUE_NULL ALL { mach_port_name_t audit_session_self(void); } @@ -660,13 +672,47 @@ 432 AUE_NULL ALL { int audit_session_port(au_asid_t asid, user_addr_t portnamep); } 433 AUE_NULL ALL { int pid_suspend(int pid); } 434 AUE_NULL ALL { int pid_resume(int pid); } -#if CONFIG_EMBEDDED -435 AUE_NULL ALL { int pid_hibernate(int pid); } -436 AUE_NULL ALL { int pid_shutdown_sockets(int pid, int level); } -#else -435 AUE_NULL ALL { int nosys(void); } +435 AUE_NULL ALL { int nosys(void); } 436 AUE_NULL ALL { int nosys(void); } -#endif 437 AUE_NULL ALL { int nosys(void); } { old shared_region_slide_np } 438 AUE_NULL ALL { int shared_region_map_and_slide_np(int fd, uint32_t count, const struct shared_file_mapping_np *mappings, uint32_t slide, uint64_t* slide_start, uint32_t slide_size) NO_SYSCALL_STUB; } 439 AUE_NULL ALL { int kas_info(int selector, void *value, size_t *size); } +#if CONFIG_MEMORYSTATUS +440 AUE_NULL ALL { int memorystatus_control(uint32_t command, int32_t pid, uint32_t flags, user_addr_t buffer, size_t buffersize); } +#else +440 AUE_NULL ALL { int nosys(void); } +#endif +441 AUE_OPEN_RWTC ALL { int guarded_open_np(const char *path, const guardid_t *guard, u_int guardflags, int flags, int mode) NO_SYSCALL_STUB; } +442 AUE_CLOSE ALL { int guarded_close_np(int fd, const guardid_t *guard); } +443 AUE_KQUEUE ALL { int guarded_kqueue_np(const guardid_t *guard, u_int guardflags); } +444 AUE_NULL ALL { int change_fdguard_np(int fd, const guardid_t *guard, u_int guardflags, const guardid_t *nguard, u_int nguardflags, int *fdflagsp); } +445 AUE_NULL ALL { int nosys(void); } { old __proc_suppress } +446 AUE_NULL ALL { int proc_rlimit_control(pid_t pid, int flavor, void *arg); } +#if SOCKETS +447 AUE_CONNECT ALL { int connectx(int s, struct sockaddr *src, socklen_t srclen, struct sockaddr *dsts, socklen_t dstlen, uint32_t ifscope, associd_t aid, connid_t *cid); } +448 AUE_NULL ALL { int disconnectx(int s, associd_t aid, connid_t cid); } +449 AUE_NULL ALL { int peeloff(int s, associd_t aid); } +450 AUE_SOCKET ALL { int socket_delegate(int domain, int type, int protocol, pid_t epid); } +#else +447 AUE_NULL ALL { int nosys(void); } +448 AUE_NULL ALL { int nosys(void); } +449 AUE_NULL ALL { int nosys(void); } +450 AUE_NULL ALL { int nosys(void); } +#endif /* SOCKETS */ +#if CONFIG_TELEMETRY +451 AUE_NULL ALL { int telemetry(uint64_t cmd, uint64_t deadline, uint64_t interval, uint64_t leeway, uint64_t arg4, uint64_t arg5) NO_SYSCALL_STUB; } +#else +451 AUE_NULL ALL { int nosys(void); } +#endif /* TELEMETRY */ +#if CONFIG_PROC_UUID_POLICY +452 AUE_NULL ALL { int proc_uuid_policy(uint32_t operation, uuid_t uuid, size_t uuidlen, uint32_t flags); } +#else +452 AUE_NULL ALL { int nosys(void); } +#endif +#if CONFIG_MEMORYSTATUS +453 AUE_NULL ALL { int memorystatus_get_level(user_addr_t level); } +#else +453 AUE_NULL ALL { int nosys(void); } +#endif +454 AUE_NULL ALL { int system_override(uint64_t timeout, uint64_t flags); } +455 AUE_NULL ALL { int vfs_purge(void); } diff --git a/bsd/kern/sysv_sem.c b/bsd/kern/sysv_sem.c index f172333ef..f98530962 100644 --- a/bsd/kern/sysv_sem.c +++ b/bsd/kern/sysv_sem.c @@ -444,7 +444,8 @@ grow_sem_pool(int new_pool_size) /* Update our id structures to point to the new semaphores */ for(i = 0; i < seminfo.semmni; i++) { if (sema[i].u.sem_perm.mode & SEM_ALLOC) /* ID in use */ - sema[i].u.sem_base += (new_sem_pool - sem_pool); + sema[i].u.sem_base = new_sem_pool + + (sema[i].u.sem_base - sem_pool); } sem_free = sem_pool; @@ -774,10 +775,12 @@ semctl(struct proc *p, struct semctl_args *uap, int32_t *retval) if (IS_64BIT_PROCESS(p)) { struct user64_semid_ds semid_ds64; + bzero(&semid_ds64, sizeof(semid_ds64)); semid_ds_kernelto64(&semakptr->u, &semid_ds64); eval = copyout(&semid_ds64, user_arg.buf, sizeof(semid_ds64)); } else { struct user32_semid_ds semid_ds32; + bzero(&semid_ds32, sizeof(semid_ds32)); semid_ds_kernelto32(&semakptr->u, &semid_ds32); eval = copyout(&semid_ds32, user_arg.buf, sizeof(semid_ds32)); } @@ -853,12 +856,27 @@ semctl(struct proc *p, struct semctl_args *uap, int32_t *retval) eval = EINVAL; goto semctlout; } + /* * Cast down a pointer instead of using 'val' member directly * to avoid introducing endieness and a pad field into the * header file. Ugly, but it works. */ - semakptr->u.sem_base[semnum].semval = CAST_DOWN_EXPLICIT(int,user_arg.buf); + u_int newsemval = CAST_DOWN_EXPLICIT(u_int, user_arg.buf); + + /* + * The check is being performed as unsigned values to match + * eventual destination + */ + if (newsemval > (u_int)seminfo.semvmx) + { +#ifdef SEM_DEBUG + printf("Out of range sem value for set\n"); +#endif + eval = ERANGE; + goto semctlout; + } + semakptr->u.sem_base[semnum].semval = newsemval; semakptr->u.sem_base[semnum].sempid = p->p_pid; /* XXX scottl Should there be a MAC call here? */ semundo_clear(semid, semnum); @@ -1635,9 +1653,11 @@ IPCS_sem_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1, * descriptor to a 32 bit user one. */ if (!IS_64BIT_PROCESS(p)) { + bzero(&semid_ds32, sizeof(semid_ds32)); semid_ds_kernelto32(semid_dsp, &semid_ds32); semid_dsp = &semid_ds32; } else { + bzero(&semid_ds64, sizeof(semid_ds64)); semid_ds_kernelto64(semid_dsp, &semid_ds64); semid_dsp = &semid_ds64; } diff --git a/bsd/kern/trace.codes b/bsd/kern/trace.codes index 7bfcacf18..a31308ad2 100644 --- a/bsd/kern/trace.codes +++ b/bsd/kern/trace.codes @@ -42,11 +42,20 @@ 0x107004c UTrap_SIMD_FP 0x1090000 DecrTrap 0x1090004 DecrSet -0x1090008 TimerCallIntr -0x109000c pmsStep -0x1090010 TimerMigration -0x1090014 rdHPET -0x1090018 set_tsc_deadline +0x1090008 TMR_TimerCallIntr +0x109000c TMR_pmsStep +0x1090010 TMR_TimerMigration +0x1090014 TMR_rdHPET +0x1090018 TMR_set_tsc_deadline +0x109001c TMR_TimerCallEnter +0x1090020 TMR_TimerCallCancel +0x1090024 TMR_TimerQueue +0x1090028 TMR_TimerCallExpire +0x109002c TMR_AsyncDequeue +0x1090030 TMR_TimerUpdate +0x1090034 TMR_TimerEscalate +0x1090038 TMR_TimerOverdue +0x109003C TMR_Rescan 0x10c0000 MACH_SysCall 0x10c0004 MSC_kern_invalid_#1 0x10c0008 MSC_kern_invalid_#2 @@ -172,6 +181,8 @@ 0x10c01f4 MSC_kern_invalid_#125 0x10c01f8 MSC_kern_invalid_#126 0x10c01fc MSC_kern_invalid_#127 +0x1200000 MACH_task_suspend +0x1200004 MACH_task_resume 0x1300004 MACH_Pageout 0x1300008 MACH_vmfault 0x1300100 MACH_purgable_token_add @@ -185,14 +196,18 @@ 0x1300400 MACH_vm_check_zf_delay 0x1300404 MACH_vm_cow_delay 0x1300408 MACH_vm_zf_delay +0x130040c MACH_vm_compressor_delay 0x1300410 MACH_vm_pageout_scan 0x1300414 MACH_vm_pageout_balanceQ 0x1300418 MACH_vm_pageout_freelist 0x130041c MACH_vm_pageout_purge_one 0x1300420 MACH_vm_pageout_cache_evict 0x1300424 MACH_vm_pageout_thread_block +0x1300428 MACH_vm_pageout_jetsam +0x130042C MACH_vm_pageout_token 0x1300480 MACH_vm_upl_page_wait 0x1300484 MACH_vm_iopl_page_wait +0x1300488 MACH_vm_page_wait_block 0x1400000 MACH_SCHED 0x1400004 MACH_STKATTACH 0x1400008 MACH_STKHANDOFF @@ -216,8 +231,12 @@ 0x1400054 MACH_SCHED_URGENCY 0x1400058 MACH_SCHED_REDISPATCH 0x140005C MACH_SCHED_REMOTE_AST -0x1400060 MACH_SCHED_LPA_BROKEN +0x1400060 MACH_SCHED_CHOOSE_PROCESSOR 0x1400064 MACH_DEEP_IDLE +0x1400068 MACH_SCHED_DECAY_PRIORITY +0x140006C MACH_CPU_THROTTLE_DISABLE +0x1400070 MACH_RW_PROMOTE +0x1400074 MACH_RW_DEMOTE 0x1500000 MACH_MSGID_INVALID 0x1600000 MTX_SLEEP 0x1600004 MTX_SLEEP_DEADLINE @@ -258,6 +277,10 @@ 0x1700020 PMAP_flush_TLBS 0x1700024 PMAP_update_interrupt 0x1700028 PMAP_attribute_clear +0x170002c PMAP_reusable +0x1700030 PMAP_query_resident +0x1700034 PMAP_flush_kernel_TLBS +0x1700038 PMAP_flush_delayed_TLBS 0x1900000 MP_TLB_FLUSH 0x1900004 MP_CPUS_CALL 0x1900008 MP_CPUS_CALL_LOCAL @@ -266,6 +289,8 @@ 0x1900014 MP_CPU_FAST_START 0x1900018 MP_CPU_START 0x190001c MP_CPU_DEACTIVATE +0x1a10000 MICROSTACKSHOT_RECORD +0x1a10004 MICROSTACKSHOT_GATHER 0x2010000 L_IP_In_Beg 0x2010004 L_IP_Out_Beg 0x2010008 L_IP_In_End @@ -487,6 +512,7 @@ 0x3011024 UPL_map_enter_upl 0x3011028 UPL_map_remove_upl 0x301102c UPL_commit_range_speculative +0x3018000 hfs_update 0x3020000 P_WrData 0x3020004 P_WrDataDone 0x3020008 P_RdData @@ -511,30 +537,78 @@ 0x3020054 P_PgOutAsyncDone 0x3020058 P_PgInAsync 0x302005C P_PgInAsyncDone -0x3020080 P_WrDataT -0x3020084 P_WrDataTDone -0x3020088 P_RdDataT -0x302008C P_RdDataTDone -0x3020090 P_WrDataAsyncT -0x3020094 P_WrDataAsyncTDone -0x3020098 P_RdDataAsyncT -0x302009C P_RdDataAsyncTDone -0x30200a0 P_WrMetaT -0x30200A4 P_WrMetaTDone -0x30200a8 P_RdMetaT -0x30200AC P_RdMetaTDone -0x30200b0 P_WrMetaAsyncT -0x30200B4 P_WrMetaAsyncTDone -0x30200b8 P_RdMetaAsyncT -0x30200BC P_RdMetaAsyncTDone -0x30200c0 P_PgOutT -0x30200C4 P_PgOutTDone -0x30200c8 P_PgInT -0x30200CC P_PgInTDone -0x30200d0 P_PgOutAsyncT -0x30200D4 P_PgOutAsyncTDone -0x30200d8 P_PgInAsyncT -0x30200DC P_PgInAsyncTDone +0x3020480 P_WrDataT1 +0x3020880 P_WrDataT2 +0x3020C80 P_WrDataT3 +0x3020484 P_WrDataT1Done +0x3020884 P_WrDataT2Done +0x3020C84 P_WrDataT3Done +0x3020488 P_RdDataT1 +0x3020888 P_RdDataT2 +0x3020C88 P_RdDataT3 +0x302048C P_RdDataT1Done +0x302088C P_RdDataT2Done +0x3020C8C P_RdDataT3Done +0x3020490 P_WrDataAsyncT1 +0x3020890 P_WrDataAsyncT2 +0x3020C90 P_WrDataAsyncT3 +0x3020494 P_WrDataAsyncT1Done +0x3020894 P_WrDataAsyncT2Done +0x3020C94 P_WrDataAsyncT3Done +0x3020498 P_RdDataAsyncT1 +0x3020898 P_RdDataAsyncT2 +0x3020C98 P_RdDataAsyncT3 +0x302049C P_RdDataAsyncT1Done +0x302089C P_RdDataAsyncT2Done +0x3020C9C P_RdDataAsyncT3Done +0x30204a0 P_WrMetaT1 +0x30208a0 P_WrMetaT2 +0x3020Ca0 P_WrMetaT3 +0x30204A4 P_WrMetaT1Done +0x30208A4 P_WrMetaT2Done +0x3020CA4 P_WrMetaT3Done +0x30204a8 P_RdMetaT1 +0x30208a8 P_RdMetaT2 +0x3020Ca8 P_RdMetaT3 +0x30204AC P_RdMetaT1Done +0x30208AC P_RdMetaT2Done +0x3020CAC P_RdMetaT3Done +0x30204b0 P_WrMetaAsyncT1 +0x30208b0 P_WrMetaAsyncT2 +0x3020Cb0 P_WrMetaAsyncT3 +0x30204B4 P_WrMetaAsyncT1Done +0x30208B4 P_WrMetaAsyncT2Done +0x3020CB4 P_WrMetaAsyncT3Done +0x30204b8 P_RdMetaAsyncT1 +0x30208b8 P_RdMetaAsyncT2 +0x3020Cb8 P_RdMetaAsyncT3 +0x30204BC P_RdMetaAsyncT1Done +0x30208BC P_RdMetaAsyncT2Done +0x3020CBC P_RdMetaAsyncT3Done +0x30204c0 P_PgOutT1 +0x30208c0 P_PgOutT2 +0x3020Cc0 P_PgOutT3 +0x30204C4 P_PgOutT1Done +0x30208C4 P_PgOutT2Done +0x3020CC4 P_PgOutT3Done +0x30204c8 P_PgInT1 +0x30208c8 P_PgInT2 +0x3020Cc8 P_PgInT3 +0x30204CC P_PgInT1Done +0x30208CC P_PgInT2Done +0x3020CCC P_PgInT3Done +0x30204d0 P_PgOutAsyncT1 +0x30208d0 P_PgOutAsyncT2 +0x3020Cd0 P_PgOutAsyncT3 +0x30204D4 P_PgOutAsyncT1Done +0x30208D4 P_PgOutAsyncT2Done +0x3020CD4 P_PgOutAsyncT3Done +0x30204d8 P_PgInAsyncT1 +0x30208d8 P_PgInAsyncT2 +0x3020Cd8 P_PgInAsyncT3 +0x30204DC P_PgInAsyncT1Done +0x30208DC P_PgInAsyncT2Done +0x3020CDC P_PgInAsyncT3Done 0x3020100 P_WrDataP 0x3020104 P_WrDataPDone 0x3020108 P_RdDataP @@ -559,37 +633,64 @@ 0x3020154 P_PgOutAsyncPDone 0x3020158 P_PgInAsyncP 0x302015C P_PgInAsyncPDone -0x3020200 P_WrDataN -0x3020208 P_RdDataN -0x3020210 P_WrDataAsyncN -0x3020218 P_RdDataAsyncN -0x3020204 P_WrDataNDone -0x302020C P_RdDataNDone -0x3020214 P_WrDataAsyncNDone -0x302021C P_RdDataAsyncNDone -0x3020280 P_WrDataNT -0x3020288 P_RdDataNT -0x3020290 P_WrDataAsyncNT -0x3020298 P_RdDataAsyncNT -0x3020284 P_WrDataNTDone -0x302028C P_RdDataNTDone -0x3020294 P_WrDataAsyncNTDone -0x302029C P_RdDataAsyncNTDone -0x3020300 P_WrDataNP -0x3020308 P_RdDataNP -0x3020310 P_WrDataAsyncNP -0x3020318 P_RdDataAsyncNP -0x3020304 P_WrDataNPDone -0x302030C P_RdDataNPDone -0x3020314 P_WrDataAsyncNPDone -0x302031C P_RdDataAsyncNPDone +0x3020200 P_WrDataN +0x3020208 P_RdDataN +0x3020210 P_WrDataAsyncN +0x3020218 P_RdDataAsyncN +0x3020204 P_WrDataNDone +0x302020C P_RdDataNDone +0x3020214 P_WrDataAsyncNDone +0x302021C P_RdDataAsyncNDone +0x3020680 P_WrDataNT1 +0x3020A80 P_WrDataNT2 +0x3020E80 P_WrDataNT3 +0x3020688 P_RdDataNT1 +0x3020A88 P_RdDataNT2 +0x3020E88 P_RdDataNT3 +0x3020690 P_WrDataAsyncNT1 +0x3020A90 P_WrDataAsyncNT2 +0x3020E90 P_WrDataAsyncNT3 +0x3020698 P_RdDataAsyncNT1 +0x3020A98 P_RdDataAsyncNT2 +0x3020E98 P_RdDataAsyncNT3 +0x3020684 P_WrDataNT1Done +0x3020A84 P_WrDataNT2Done +0x3020E84 P_WrDataNT3Done +0x302068C P_RdDataNT1Done +0x3020A8C P_RdDataNT2Done +0x3020E8C P_RdDataNT3Done +0x3020694 P_WrDataAsyncNT1Done +0x3020A94 P_WrDataAsyncNT2Done +0x3020E94 P_WrDataAsyncNT3Done +0x302069C P_RdDataAsyncNT1Done +0x3020A9C P_RdDataAsyncNT2Done +0x3020E9C P_RdDataAsyncNT3Done +0x3020300 P_WrDataNP +0x3020308 P_RdDataNP +0x3020310 P_WrDataAsyncNP +0x3020318 P_RdDataAsyncNP +0x3020304 P_WrDataNPDone +0x302030C P_RdDataNPDone +0x3020314 P_WrDataAsyncNPDone +0x302031C P_RdDataAsyncNPDone 0x3050004 journal_flush 0x3060000 SPEC_ioctl 0x3060004 SPEC_trim_extent 0x3070004 BootCache_tag 0x3070008 BootCache_batch +0x3110004 OpenThrottleWindow +0x3110008 CauseIOThrottle +0x311000C IO_THROTTLE_DISABLE 0x4010004 proc_exit 0x4010008 force_exit +0x4020004 MEMSTAT_scan +0x4020008 MEMSTAT_jetsam +0x402000C MEMSTAT_jetsam_hiwat +0x4020010 MEMSTAT_freeze +0x4020014 MEMSTAT_latency_coalesce +0x4020018 MEMSTAT_update +0x402001C MEMSTAT_idle_demote +0x4020020 MEMSTAT_clear_errors 0x40c0000 BSC_SysCall 0x40c0004 BSC_exit 0x40c0008 BSC_fork @@ -783,7 +884,7 @@ 0x40c02f8 BSC_lstat 0x40c02fc BSC_pathconf 0x40c0300 BSC_fpathconf -0x40c0304 BSC_#193 +0x40c0304 BSC_obs_getfsstat 0x40c0308 BSC_getrlimit 0x40c030c BSC_setrlimit 0x40c0310 BSC_getdirentries @@ -806,7 +907,7 @@ 0x40c0354 BSC_#213 0x40c0358 BSC_#214 0x40c035c BSC_#215 -0x40c0360 BSC_obs_mkcomplex +0x40c0360 BSC_open_dprotected_np 0x40c0364 BSC_obs_statv 0x40c0368 BSC_obs_lstatv 0x40c036c BSC_obs_fstatv @@ -814,7 +915,7 @@ 0x40c0374 BSC_setattrlist 0x40c0378 BSC_getdirentriesattr 0x40c037c BSC_exchangedata -0x40c0380 BSC_#224 +0x40c0380 BSC_checkuseraccess 0x40c0384 BSC_searchfs 0x40c0388 BSC_delete_Carbon 0x40c038c BSC_copyfile @@ -847,7 +948,7 @@ 0x40c03f8 BSC_semctl 0x40c03fc BSC_semget 0x40c0400 BSC_semop -0x40c0404 BSC_#257 +0x40c0404 BSC_semconfig 0x40c0408 BSC_msgctl 0x40c040c BSC_msgget 0x40c0410 BSC_msgsnd @@ -1029,6 +1130,19 @@ 0x40c06d4 BSC_shared_region_slide_np 0x40c06d8 BSC_shared_region_map_and_slide_np 0x40c06dc BSC_kas_info +0x40c06e0 BSC_memorystatus_control +0x40c06e4 BSC_guarded_open_np +0x40c06e8 BSC_guarded_close_np +0x40c06ec BSC_guarded_kqueue_np +0x40c06f0 BSC_change_fdguard_np +0x40c06f4 BSC___proc_suppress +0x40c06f8 BSC_proc_rlimit_control +0x40c06fc BSC_connectx +0x40c0700 BSC_disconnectx +0x40c0704 BSC_peeloff +0x40c0708 BSC_socket_delegate +0x40c070c BSC_telemetry +0x40c0710 BSC_proc_uuid_policy 0x40e0104 BSC_msync_extended_info 0x40e0264 BSC_pread_extended_info 0x40e0268 BSC_pwrite_extended_info @@ -1041,6 +1155,7 @@ 0x5020008 IES_latency 0x502000c IES_sema 0x5020010 IES_intctxt +0x5020014 IES_intfltr 0x5020018 IES_action 0x502001c IES_filter 0x5030004 TES_client @@ -1052,7 +1167,7 @@ 0x504000c CQ_sema 0x5040010 CQ_psema 0x5040014 CQ_plock -0x5040018 CG_action +0x5040018 CQ_action 0x5070004 PM_SetParent 0x5070008 PM_AddChild 0x507000c PM_RemoveChild @@ -1079,6 +1194,8 @@ 0x5070060 PM_CriticalTemp 0x5070064 PM_OverrideOn 0x5070068 PM_OverrideOff +0x507006c PM_ChangeStateForRoot +0x5070070 PM_SynchronizeTree 0x5070074 PM_ChangeDone 0x5070078 PM_CtrlDriverTardy 0x507007c PM_InterestDriverTardy @@ -1118,9 +1235,9 @@ 0x508003c IOSERVICE_TERM_STOP_NOP 0x5080040 IOSERVICE_TERM_STOP_DEFER 0x5080044 IOSERVICE_TERM_DONE -0x5080048 IOSERVICE_KEXTD_ALIVE -0x508004C IOSERVICE_KEXTD_READY -0x5080050 IOSERVICE_REGISTRY_QUIET +0x5080048 IOSERVICE_KEXTD_ALIVE +0x508004C IOSERVICE_KEXTD_READY +0x5080050 IOSERVICE_REGISTRY_QUIET 0x5230000 HID_Unexpected 0x5230004 HID_KeyboardLEDThreadTrigger 0x5230008 HID_KeyboardLEDThreadActive @@ -1252,6 +1369,21 @@ 0x53101b4 CPUPM_PSTATE_CHOOSE 0x53101b8 CPUPM_PSTATE_COMMIT 0x53101bc CPUPM_PSTATE_CHECK +0x5310200 CPUPM_PST_RESOLVE +0x5310204 CPUPM_PST_LOAD_TXFR +0x5310208 CPUPM_PST_IDLE_EXIT +0x531020C CPUPM_PST_IDLE_ENTRY +0x5310210 CPUPM_PST_TIMER +0x5310214 CPUPM_PST_MAXBUS +0x5310218 CPUPM_PST_MAXINT +0x531021C CPUPM_PST_PLIMIT +0x5310220 CPUPM_PST_SELFSEL +0x5310224 CPUPM_PST_RATELIMIT +0x5310228 CPUPM_PST_RATEUNLIMIT +0x531022C CPUPM_DVFS_PAUSE +0x5310230 CPUPM_DVFS_RESUME +0x5310234 CPUPM_DVFS_ADVANCE +0x5310238 CPUPM_DVFS_TRANSIT 0x531023C CPUPM_TQM 0x5310240 CPUPM_QUIESCE 0x5310244 CPUPM_MBD @@ -1259,28 +1391,6 @@ 0x531024C CPUPM_PST_QOS_RATEUNLIMIT 0x5310250 CPUPM_PST_QOS_SWITCH 0x5310254 CPUPM_FORCED_IDLE -0x531023C CPUPM_TQM -0x5310240 CPUPM_QUIESCE -0x5310244 CPUPM_MBD -0x5310248 CPUPM_PST_RATELIMIT_QOS -0x531024C CPUPM_PST_QOS_RATEUNLIMIT -0x5310250 CPUPM_PST_QOS_SWITCH -0x5310254 CPUPM_FORCED_IDLE -0x5320000 CPUPM_PST_RESOLVE -0x5320004 CPUPM_PST_LOAD_TXFR -0x5320008 CPUPM_PST_IDLE_EXIT -0x532000C CPUPM_PST_IDLE_ENTRY -0x5320010 CPUPM_PST_TIMER -0x5320014 CPUPM_PST_MAXBUS -0x5320018 CPUPM_PST_MAXINT -0x532001C CPUPM_PST_PLIMIT -0x5320020 CPUPM_PST_SELFSEL -0x5320024 CPUPM_PST_RATELIMIT -0x5320028 CPUPM_PST_RATEUNLIMIT -0x532002C CPUPM_DVFS_PAUSE -0x5320030 CPUPM_DVFS_RESUME -0x5320034 CPUPM_DVFS_ADVANCE -0x5320038 CPUPM_DVFS_TRANSIT 0x5330000 HIBERNATE 0x5330004 HIBERNATE_WRITE_IMAGE 0x5330008 HIBERNATE_MACHINE_INIT @@ -1291,6 +1401,8 @@ 0x533001c HIBERNATE_flush_bufs 0x5330020 HIBERNATE_page_list_setall 0x5330024 HIBERNATE_aes_decrypt_cbc +0x5330028 HIBERNATE_flush_compressor +0x533002c HIBERNATE_fastwake_warmup 0x7000004 TRACE_DATA_NEWTHREAD 0x7000008 TRACE_DATA_EXEC 0x7010004 TRACE_STRING_NEWTHREAD @@ -1667,6 +1779,9 @@ 0x210b000c TAL_APP_LAUNCH_VISIBLE 0x210b0010 TAL_APP_LAUNCH_READY 0x210b0014 TAL_ALL_LAUNCH_READY +0x210c0000 NSAPPLICATION_RECEIVED_KEYEVENT +0x210c0004 NSWINDOW_FLUSHED +0x210c0008 NSTEXTVIEW_PROCESSED_KEYEVENT 0x21800000 SMB_smbd_idle 0x21800004 SMB_syscall_opendir 0x21800008 SMB_syscall_readdir @@ -1852,11 +1967,14 @@ 0x25010008 PERF_THD_XSample 0x2501000c PERF_THD_XPend 0x25010010 PERF_THD_XData +0x25010014 PERF_THD_CSwitch 0x25020000 PERF_STK_KSample 0x25020004 PERF_STK_USched 0x25020008 PERF_STK_USample 0x2502000c PERF_STK_KData 0x25020010 PERF_STK_UData +0x25020014 PERF_STK_KHdr +0x25020018 PERF_STK_UHdr 0x25030000 PERF_TMR_AllSched 0x25030004 PERF_TMR_Schedule 0x25030008 PERF_TMR_Handler @@ -1868,6 +1986,92 @@ 0x25040014 PERF_ATS_Sample 0x25050000 PERF_AST_Handler 0x25050004 PERF_AST_Error +0x25060000 PERF_KPC_Handler +0x25060004 PERF_KPC_FCounter +0x25060008 PERF_KPC_Counter +0x2506000c PERF_KPC_Data +0x25060010 PERF_KPC_Config +0x25060014 PERF_KPC_ConfReg +0x25060018 PERF_KPC_Data32 +0x2506001c PERF_KPC_ConfReg32 +0x26100008 imp_assertion_hold +0x2610000c imp_assertion_hold_ext +0x26100020 imp_assertion_externalize +0x26100010 imp_assertion_drop +0x26100014 imp_assertion_drop_ext +0x26110004 imp_boost_task +0x26110008 imp_unboost_task +0x26120004 imp_msg_send +0x26120008 imp_msg_delv +0x26130000 imp_watchport +0x26170000 imp_suppression_inactive +0x26170004 imp_suppression_active +0x26180000 imp_apptype_none +0x26180004 imp_apptype_int_daemon +0x26180008 imp_apptype_std_daemon +0x2618000c imp_apptype_adapt_daemon +0x26180010 imp_apptype_bg_daemon +0x26180014 imp_apptype_default_app +0x26180018 imp_apptype_tal_app +0x26190010 imp_update_task +0x26190020 imp_update_thread +0x26210010 imp_task_int_bg +0x26210014 imp_task_ext_bg +0x26210020 imp_thread_int_bg +0x26210024 imp_thread_ext_bg +0x26220010 imp_task_int_iopol +0x26220014 imp_task_ext_iopol +0x26220020 imp_thread_int_iopol +0x26220024 imp_thread_ext_iopol +0x26230010 imp_task_int_io +0x26230014 imp_task_ext_io +0x26230020 imp_thread_int_io +0x26230024 imp_thread_ext_io +0x26240010 imp_task_int_passive_io +0x26240014 imp_task_ext_passive_io +0x26240020 imp_thread_int_passive_io +0x26240024 imp_thread_ext_passive_io +0x26250010 imp_task_int_bg_plus_gpu +0x26250014 imp_task_ext_bg_plus_gpu +0x26260010 imp_task_int_gpu_deny +0x26260014 imp_task_ext_gpu_deny +0x26270018 imp_task_dbg_iopol +0x26280018 imp_task_tal +0x26290018 imp_task_boost +0x262a0018 imp_task_role +0x262b0018 imp_task_suppressed_cpu +0x262c0018 imp_task_terminated +0x262d0018 imp_task_new_sockets_bg +0x262e0018 imp_task_lowpri_cpu +0x262f0018 imp_task_latency_qos +0x26300018 imp_task_through_qos +0x26310018 imp_task_watchers_bg +0x26320028 imp_thread_pidbind_bg +0x26330028 imp_thread_workq_bg +0x27000000 PERF_PCEVENT +0x27001000 PERF_CPU_IDLE +0x27001100 PERF_CPU_IDLE_TIMER +0x27002000 PERF_VOLT_CHG_SOC +0x27002010 PERF_VOLT_CHG_CPU +0x27002020 PERF_VOLT_CHG_DOM2 +0x27002030 PERF_VOLT_CHG_DOM3 +0x27003000 PERF_PERF_CHG_SOC +0x27003010 PERF_PERF_CHG_CPU +0x27003020 PERF_PERF_CHG_DOM2 +0x27003030 PERF_PERF_CHG_DOM3 +0x2700a000 PERF_ARBITER_EVENT +0x2700a100 PERF_ARBITER_NOTIFY +0x2700a200 PERF_ARBITER_PERF_SET +0x27010100 PERF_CPU_IDL_ACT_TIME +0x2710a500 PERF_FB_CONTROLLER +0x2720a500 PERF_PMC_CONTROLLER +0x2740a500 PERF_STP_CONTROLLER +0x2730a500 PERF_GPU_CONTROLLER +0x2700C000 PERF_CLOCK_GATE +0x2700E000 PERF_SRAMEMA_DOM0 +0x2700E010 PERF_SRAMEMA_DOM1 +0x2700E020 PERF_SRAMEMA_DOM2 +0x2700E030 PERF_SRAMEMA_DOM3 0xff000104 MSG_mach_notify_port_deleted 0xff000114 MSG_mach_notify_port_destroyed 0xff000118 MSG_mach_notify_no_senders @@ -1925,11 +2129,8 @@ 0xff000964 MSG_host_security_set_task_token 0xff000f9c MSG_mach_gss_init_sec_context 0xff000fa0 MSG_clock_get_time -0xff000fa0 MSG_mach_gss_accept_sec_context 0xff000fa4 MSG_clock_get_attributes -0xff000fa4 MSG_mach_gss_log_error 0xff000fa8 MSG_clock_alarm -0xff000fa8 MSG_mach_gss_init_sec_context_v2 0xff000fac MSG_mach_gss_accept_sec_context_v2 0xff000fb0 MSG_mach_gss_hold_cred 0xff000fb4 MSG_mach_gss_unhold_cred diff --git a/bsd/kern/tty.c b/bsd/kern/tty.c index 4d7c5b9fa..01ea986ea 100644 --- a/bsd/kern/tty.c +++ b/bsd/kern/tty.c @@ -2145,7 +2145,7 @@ read: char ibuf[IBUFSIZ]; int icc; - icc = min(uio_resid(uio), IBUFSIZ); + icc = MIN(uio_resid(uio), IBUFSIZ); icc = q_to_b(qp, (u_char *)ibuf, icc); if (icc <= 0) { if (first) @@ -2186,8 +2186,8 @@ slowcase: tty_pgsignal(tp, SIGTSTP, 1); tty_lock(tp); if (first) { - error = ttysleep(tp, &lbolt, TTIPRI | PCATCH, - "ttybg3", 0); + error = ttysleep(tp, &ttread, TTIPRI | PCATCH, + "ttybg3", hz); if (error) break; goto loop; @@ -2366,7 +2366,7 @@ loop: * leftover from last time. */ if (cc == 0) { - cc = min(uio_resid(uio), OBUFSIZ); + cc = MIN(uio_resid(uio), OBUFSIZ); cp = obuf; error = uiomove(cp, cc, uio); if (error) { diff --git a/bsd/kern/tty_ptmx.c b/bsd/kern/tty_ptmx.c index a0be4feb5..0f2133634 100644 --- a/bsd/kern/tty_ptmx.c +++ b/bsd/kern/tty_ptmx.c @@ -85,6 +85,10 @@ #include #include /* DEVFS_LOCK()/DEVFS_UNLOCK() */ +#if CONFIG_MACF +#include +#endif + /* XXX belongs in devfs somewhere - LATER */ int _devfs_setattr(void *, unsigned short, uid_t, gid_t); @@ -98,22 +102,23 @@ int _devfs_setattr(void *, unsigned short, uid_t, gid_t); int ptmx_init(int n_ptys); static void ptsd_start(struct tty *tp); static void ptmx_wakeup(struct tty *tp, int flag); -FREE_BSDSTATIC d_open_t ptsd_open; -FREE_BSDSTATIC d_close_t ptsd_close; -FREE_BSDSTATIC d_read_t ptsd_read; -FREE_BSDSTATIC d_write_t ptsd_write; -FREE_BSDSTATIC d_ioctl_t cptyioctl; /* common ioctl */ -FREE_BSDSTATIC d_stop_t ptsd_stop; -FREE_BSDSTATIC d_reset_t ptsd_reset; -FREE_BSDSTATIC d_devtotty_t ptydevtotty; -FREE_BSDSTATIC d_open_t ptmx_open; -FREE_BSDSTATIC d_close_t ptmx_close; -FREE_BSDSTATIC d_read_t ptmx_read; -FREE_BSDSTATIC d_write_t ptmx_write; -FREE_BSDSTATIC d_stop_t ptmx_stop; /* NO-OP */ -FREE_BSDSTATIC d_reset_t ptmx_reset; -FREE_BSDSTATIC d_select_t ptmx_select; -FREE_BSDSTATIC d_select_t ptsd_select; +__XNU_PRIVATE_EXTERN d_open_t ptsd_open; +__XNU_PRIVATE_EXTERN d_close_t ptsd_close; +__XNU_PRIVATE_EXTERN d_read_t ptsd_read; +__XNU_PRIVATE_EXTERN d_write_t ptsd_write; +__XNU_PRIVATE_EXTERN d_ioctl_t cptyioctl; /* common ioctl */ +__XNU_PRIVATE_EXTERN d_stop_t ptsd_stop; +__XNU_PRIVATE_EXTERN d_reset_t ptsd_reset; +__XNU_PRIVATE_EXTERN d_open_t ptmx_open; +__XNU_PRIVATE_EXTERN d_close_t ptmx_close; +__XNU_PRIVATE_EXTERN d_read_t ptmx_read; +__XNU_PRIVATE_EXTERN d_write_t ptmx_write; +__XNU_PRIVATE_EXTERN d_stop_t ptmx_stop; /* NO-OP */ +__XNU_PRIVATE_EXTERN d_reset_t ptmx_reset; +__XNU_PRIVATE_EXTERN d_select_t ptmx_select; +__XNU_PRIVATE_EXTERN d_select_t ptsd_select; + +extern d_devtotty_t ptydevtotty; static int ptmx_major; /* dynamically assigned major number */ static struct cdevsw ptmx_cdev = { @@ -693,8 +698,8 @@ again: pg_rele(pg); tty_lock(tp); - error = ttysleep(tp, &lbolt, TTIPRI | PCATCH | PTTYBLOCK, "ptsd_bg", - 0); + error = ttysleep(tp, &ptsd_read, TTIPRI | PCATCH | PTTYBLOCK, "ptsd_bg", + hz); if (error) goto out; } @@ -713,9 +718,9 @@ again: int cc; char buf[BUFSIZ]; - cc = min(uio_resid(uio), BUFSIZ); + cc = MIN(uio_resid(uio), BUFSIZ); // Don't copy the very last byte - cc = min(cc, tp->t_canq.c_cc - 1); + cc = MIN(cc, tp->t_canq.c_cc - 1); cc = q_to_b(&tp->t_canq, (u_char *)buf, cc); error = uiomove(buf, cc, uio); if (error) @@ -882,6 +887,10 @@ ptmx_close(dev_t dev, __unused int flags, __unused int fmt, __unused proc_t p) ptmx_free_ioctl(minor(dev), PF_OPEN_M); +#if CONFIG_MACF + mac_pty_notify_close(p, tp, dev, NULL); +#endif + return (0); } @@ -911,8 +920,8 @@ ptmx_read(dev_t dev, struct uio *uio, int flag) if (error) goto out; if (pti->pt_send & TIOCPKT_IOCTL) { - cc = min(uio_resid(uio), - sizeof(tp->t_termios)); + cc = MIN(uio_resid(uio), + (user_ssize_t)sizeof(tp->t_termios)); uiomove((caddr_t)&tp->t_termios, cc, uio); } @@ -942,7 +951,7 @@ ptmx_read(dev_t dev, struct uio *uio, int flag) if (pti->pt_flags & (PF_PKT|PF_UCNTL)) error = ureadc(0, uio); while (uio_resid(uio) > 0 && error == 0) { - cc = q_to_b(&tp->t_outq, (u_char *)buf, min(uio_resid(uio), BUFSIZ)); + cc = q_to_b(&tp->t_outq, (u_char *)buf, MIN(uio_resid(uio), BUFSIZ)); if (cc <= 0) break; error = uiomove(buf, cc, uio); @@ -1187,8 +1196,8 @@ again: while ((uio_resid(uio) > 0 || cc > 0) && tp->t_canq.c_cc < TTYHOG - 1) { if (cc == 0) { - cc = min(uio_resid(uio), BUFSIZ); - cc = min(cc, TTYHOG - 1 - tp->t_canq.c_cc); + cc = MIN(uio_resid(uio), BUFSIZ); + cc = MIN(cc, TTYHOG - 1 - tp->t_canq.c_cc); cp = locbuf; error = uiomove((caddr_t)cp, cc, uio); if (error) @@ -1224,7 +1233,7 @@ again: } while (uio_resid(uio) > 0 || cc > 0) { if (cc == 0) { - cc = min(uio_resid(uio), BUFSIZ); + cc = MIN(uio_resid(uio), BUFSIZ); cp = locbuf; error = uiomove((caddr_t)cp, cc, uio); if (error) @@ -1414,6 +1423,13 @@ cptyioctl(dev_t dev, u_long cmd, caddr_t data, int flag, proc_t p) */ { error = _devfs_setattr(pti->pt_devhandle, 0620, kauth_getuid(), GID_TTY); +#if CONFIG_MACF + if (!error) { + tty_unlock(tp); + mac_pty_notify_grant(p, tp, dev, NULL); + tty_lock(tp); + } +#endif goto out; } diff --git a/bsd/kern/tty_pty.c b/bsd/kern/tty_pty.c index 8fb35c04a..05931c126 100644 --- a/bsd/kern/tty_pty.c +++ b/bsd/kern/tty_pty.c @@ -96,18 +96,18 @@ int _devfs_setattr(void * handle, unsigned short mode, uid_t uid, gid_t gid); static void ptsstart(struct tty *tp); static void ptcwakeup(struct tty *tp, int flag); -__private_extern__ d_open_t ptsopen; -__private_extern__ d_close_t ptsclose; -__private_extern__ d_read_t ptsread; -__private_extern__ d_write_t ptswrite; -__private_extern__ d_ioctl_t ptyioctl; -__private_extern__ d_stop_t ptsstop; -__private_extern__ d_devtotty_t ptydevtotty; -__private_extern__ d_open_t ptcopen; -__private_extern__ d_close_t ptcclose; -__private_extern__ d_read_t ptcread; -__private_extern__ d_write_t ptcwrite; -__private_extern__ d_select_t ptcselect; +__XNU_PRIVATE_EXTERN d_open_t ptsopen; +__XNU_PRIVATE_EXTERN d_close_t ptsclose; +__XNU_PRIVATE_EXTERN d_read_t ptsread; +__XNU_PRIVATE_EXTERN d_write_t ptswrite; +__XNU_PRIVATE_EXTERN d_ioctl_t ptyioctl; +__XNU_PRIVATE_EXTERN d_stop_t ptsstop; +__XNU_PRIVATE_EXTERN d_devtotty_t ptydevtotty; +__XNU_PRIVATE_EXTERN d_open_t ptcopen; +__XNU_PRIVATE_EXTERN d_close_t ptcclose; +__XNU_PRIVATE_EXTERN d_read_t ptcread; +__XNU_PRIVATE_EXTERN d_write_t ptcwrite; +__XNU_PRIVATE_EXTERN d_select_t ptcselect; #if NPTY == 1 #undef NPTY @@ -310,8 +310,8 @@ again: pg_rele(pg); tty_lock(tp); - error = ttysleep(tp, &lbolt, TTIPRI | PCATCH | PTTYBLOCK, "ptsbg", - 0); + error = ttysleep(tp, &ptsread, TTIPRI | PCATCH | PTTYBLOCK, "ptsbg", + hz); if (error) goto out; } diff --git a/bsd/kern/tty_tty.c b/bsd/kern/tty_tty.c index 9cb8339bf..84960728d 100644 --- a/bsd/kern/tty_tty.c +++ b/bsd/kern/tty_tty.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997-2012 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1997-2013 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -76,7 +76,6 @@ /* Forward declarations for cdevsw[] entry */ /* XXX we should consider making these static */ int cttyopen(dev_t dev, int flag, int mode, proc_t p); -int cttyclose(dev_t dev, int flag, int mode, proc_t p); int cttyread(dev_t dev, struct uio *uio, int flag); int cttywrite(dev_t dev, struct uio *uio, int flag); int cttyioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, proc_t p); @@ -86,65 +85,76 @@ static vnode_t cttyvp(proc_t p); int cttyopen(dev_t dev, int flag, __unused int mode, proc_t p) { - vnode_t ttyvp; - int error; + vnode_t ttyvp = cttyvp(p); + struct vfs_context context; + int error = 0; + int cttyflag, doclose = 0; + struct session *sessp; - /* - * A little hack--this device, used by many processes, - * does an open on another device, which can cause unhappiness - * if the second-level open blocks indefinitely (e.g. if the - * master side has hung up). This driver doesn't care - * about serializing opens and closes, so drop the lock. - */ - devsw_unlock(dev, S_IFCHR); + if (ttyvp == NULL) + return (ENXIO); - if ((ttyvp = cttyvp(p)) == NULL) { - error = ENXIO; - } else { - struct vfs_context context; + context.vc_thread = current_thread(); + context.vc_ucred = kauth_cred_proc_ref(p); - context.vc_thread = current_thread(); - context.vc_ucred = kauth_cred_proc_ref(p); + sessp = proc_session(p); + session_lock(sessp); + cttyflag = sessp->s_flags & S_CTTYREF; + session_unlock(sessp); + /* + * A little hack--this device, used by many processes, + * happens to do an open on another device, which can + * cause unhappiness if the second-level open blocks indefinitely + * (as could be the case if the master side has hung up). Since + * we know that this driver doesn't care about the serializing + * opens and closes, we can drop the lock. To avoid opencount leak, + * open the vnode only for the first time. + */ + if (cttyflag == 0) { + devsw_unlock(dev, S_IFCHR); error = VNOP_OPEN(ttyvp, flag, &context); + devsw_lock(dev, S_IFCHR); - kauth_cred_unref(&context.vc_ucred); - vnode_put(ttyvp); + if (error) + goto out; + + /* + * If S_CTTYREF is set, some other thread did an open + * and was able to set the flag, now perform a close, else + * set the flag. + */ + session_lock(sessp); + if (cttyflag == (sessp->s_flags & S_CTTYREF)) + sessp->s_flags |= S_CTTYREF; + else + doclose = 1; + session_unlock(sessp); + + /* + * We have to take a reference here to make sure a close + * gets called during revoke. Note that once a controlling + * tty gets opened by this driver, the only way close will + * get called is when the session leader , whose controlling + * tty is ttyvp, exits and vnode is revoked. We cannot + * redirect close from this driver because underlying controlling + * terminal might change and close may get redirected to a + * wrong vnode causing panic. + */ + if (doclose) { + devsw_unlock(dev, S_IFCHR); + VNOP_CLOSE(ttyvp, flag, &context); + devsw_lock(dev, S_IFCHR); + } else { + error = vnode_ref(ttyvp); + } } +out: + session_rele(sessp); - devsw_lock(dev, S_IFCHR); - return (error); -} - -/* - * This driver is marked D_TRACKCLOSE and so gets a close - * for every open so that ttyvp->v_specinfo->si_count can be kept sane. - */ -int -cttyclose(dev_t dev, int flag, __unused int mode, proc_t p) -{ - vnode_t ttyvp; - int error; - - /* See locking commentary above. */ - - devsw_unlock(dev, S_IFCHR); - - if ((ttyvp = cttyvp(p)) == NULL) { - error = ENXIO; - } else { - struct vfs_context context; - - context.vc_thread = current_thread(); - context.vc_ucred = kauth_cred_proc_ref(p); - - error = VNOP_CLOSE(ttyvp, flag, &context); - - kauth_cred_unref(&context.vc_ucred); - vnode_put(ttyvp); - } + vnode_put(ttyvp); + kauth_cred_unref(&context.vc_ucred); - devsw_lock(dev, S_IFCHR); return (error); } diff --git a/bsd/kern/ubc_subr.c b/bsd/kern/ubc_subr.c index c89ea82ab..9d3276570 100644 --- a/bsd/kern/ubc_subr.c +++ b/bsd/kern/ubc_subr.c @@ -66,6 +66,10 @@ #include /* last */ #include +#include + +#include +#include #include @@ -75,6 +79,7 @@ extern kern_return_t memory_object_pages_resident(memory_object_control_t, extern kern_return_t memory_object_signed(memory_object_control_t control, boolean_t is_signed); extern boolean_t memory_object_is_slid(memory_object_control_t control); +extern boolean_t memory_object_is_signed(memory_object_control_t); extern void Debugger(const char *message); @@ -133,75 +138,16 @@ cs_valid_range( return TRUE; } -/* - * Magic numbers used by Code Signing - */ -enum { - CSMAGIC_REQUIREMENT = 0xfade0c00, /* single Requirement blob */ - CSMAGIC_REQUIREMENTS = 0xfade0c01, /* Requirements vector (internal requirements) */ - CSMAGIC_CODEDIRECTORY = 0xfade0c02, /* CodeDirectory blob */ - CSMAGIC_EMBEDDED_SIGNATURE = 0xfade0cc0, /* embedded form of signature data */ - CSMAGIC_EMBEDDED_SIGNATURE_OLD = 0xfade0b02, /* XXX */ - CSMAGIC_EMBEDDED_ENTITLEMENTS = 0xfade7171, /* embedded entitlements */ - CSMAGIC_DETACHED_SIGNATURE = 0xfade0cc1, /* multi-arch collection of embedded signatures */ - - CSSLOT_CODEDIRECTORY = 0, /* slot index for CodeDirectory */ - CSSLOT_ENTITLEMENTS = 5 -}; - -static const uint32_t supportsScatter = 0x20100; // first version to support scatter option - -/* - * Structure of an embedded-signature SuperBlob - */ -typedef struct __BlobIndex { - uint32_t type; /* type of entry */ - uint32_t offset; /* offset of entry */ -} CS_BlobIndex; - -typedef struct __SuperBlob { - uint32_t magic; /* magic number */ - uint32_t length; /* total length of SuperBlob */ - uint32_t count; /* number of index entries following */ - CS_BlobIndex index[]; /* (count) entries */ - /* followed by Blobs in no particular order as indicated by offsets in index */ -} CS_SuperBlob; - -typedef struct __GenericBlob { - uint32_t magic; /* magic number */ - uint32_t length; /* total length of blob */ - char data[]; -} CS_GenericBlob; - -struct Scatter { - uint32_t count; // number of pages; zero for sentinel (only) - uint32_t base; // first page number - uint64_t targetOffset; // offset in target - uint64_t spare; // reserved -}; - -/* - * C form of a CodeDirectory. - */ -typedef struct __CodeDirectory { - uint32_t magic; /* magic number (CSMAGIC_CODEDIRECTORY) */ - uint32_t length; /* total length of CodeDirectory blob */ - uint32_t version; /* compatibility version */ - uint32_t flags; /* setup and mode flags */ - uint32_t hashOffset; /* offset of hash slot element at index zero */ - uint32_t identOffset; /* offset of identifier string */ - uint32_t nSpecialSlots; /* number of special hash slots */ - uint32_t nCodeSlots; /* number of ordinary (code) hash slots */ - uint32_t codeLimit; /* limit to main image signature range */ - uint8_t hashSize; /* size of each hash in bytes */ - uint8_t hashType; /* type of hash (cdHashType* constants) */ - uint8_t spare1; /* unused (must be zero) */ - uint8_t pageSize; /* log2(page size in bytes); 0 => infinite */ - uint32_t spare2; /* unused (must be zero) */ - /* Version 0x20100 */ - uint32_t scatterOffset; /* offset of optional scatter vector */ - /* followed by dynamic content as located by offset fields above */ -} CS_CodeDirectory; +static void +hex_str( + const unsigned char *hash, + size_t len, + char *buf) +{ + unsigned int n; + for (n = 0; n < len; n++) + snprintf(buf + 2*n, 3, "%02.2x", hash[n]); +} /* @@ -278,9 +224,9 @@ hashes( assert(cs_valid_range(cd, cd + 1, lower_bound, upper_bound)); - if((ntohl(cd->version) >= supportsScatter) && (ntohl(cd->scatterOffset))) { + if((ntohl(cd->version) >= CS_SUPPORTSSCATTER) && (ntohl(cd->scatterOffset))) { /* Get first scatter struct */ - const struct Scatter *scatter = (const struct Scatter*) + const SC_Scatter *scatter = (const SC_Scatter*) ((const char*)cd + ntohl(cd->scatterOffset)); uint32_t hashindex=0, scount, sbase=0; /* iterate all scatter structs */ @@ -358,6 +304,253 @@ hashes( return hash; } + +/* + * cs_validate_codedirectory + * + * Validate that pointers inside the code directory to make sure that + * all offsets and lengths are constrained within the buffer. + * + * Parameters: cd Pointer to code directory buffer + * length Length of buffer + * + * Returns: 0 Success + * EBADEXEC Invalid code signature + */ + +static int +cs_validate_codedirectory(const CS_CodeDirectory *cd, size_t length) +{ + + if (length < sizeof(*cd)) + return EBADEXEC; + if (ntohl(cd->magic) != CSMAGIC_CODEDIRECTORY) + return EBADEXEC; + if (cd->hashSize != SHA1_RESULTLEN) + return EBADEXEC; + if (cd->pageSize != PAGE_SHIFT) + return EBADEXEC; + if (cd->hashType != CS_HASHTYPE_SHA1) + return EBADEXEC; + + if (length < ntohl(cd->hashOffset)) + return EBADEXEC; + + /* check that nSpecialSlots fits in the buffer in front of hashOffset */ + if (ntohl(cd->hashOffset) / SHA1_RESULTLEN < ntohl(cd->nSpecialSlots)) + return EBADEXEC; + + /* check that codeslots fits in the buffer */ + if ((length - ntohl(cd->hashOffset)) / SHA1_RESULTLEN < ntohl(cd->nCodeSlots)) + return EBADEXEC; + + if (ntohl(cd->version) >= CS_SUPPORTSSCATTER && cd->scatterOffset) { + + if (length < ntohl(cd->scatterOffset)) + return EBADEXEC; + + SC_Scatter *scatter = (SC_Scatter *) + (((uint8_t *)cd) + ntohl(cd->scatterOffset)); + uint32_t nPages = 0; + + /* + * Check each scatter buffer, since we don't know the + * length of the scatter buffer array, we have to + * check each entry. + */ + while(1) { + /* check that the end of each scatter buffer in within the length */ + if (((const uint8_t *)scatter) + sizeof(scatter[0]) > (const uint8_t *)cd + length) + return EBADEXEC; + uint32_t scount = ntohl(scatter->count); + if (scount == 0) + break; + if (nPages + scount < nPages) + return EBADEXEC; + nPages += scount; + scatter++; + + /* XXX check that basees doesn't overlap */ + /* XXX check that targetOffset doesn't overlap */ + } +#if 0 /* rdar://12579439 */ + if (nPages != ntohl(cd->nCodeSlots)) + return EBADEXEC; +#endif + } + + if (length < ntohl(cd->identOffset)) + return EBADEXEC; + + /* identifier is NUL terminated string */ + if (cd->identOffset) { + uint8_t *ptr = (uint8_t *)cd + ntohl(cd->identOffset); + if (memchr(ptr, 0, length - ntohl(cd->identOffset)) == NULL) + return EBADEXEC; + } + + return 0; +} + +/* + * + */ + +static int +cs_validate_blob(const CS_GenericBlob *blob, size_t length) +{ + if (length < sizeof(CS_GenericBlob) || length < ntohl(blob->length)) + return EBADEXEC; + return 0; +} + +/* + * cs_validate_csblob + * + * Validate that superblob/embedded code directory to make sure that + * all internal pointers are valid. + * + * Will validate both a superblob csblob and a "raw" code directory. + * + * + * Parameters: buffer Pointer to code signature + * length Length of buffer + * rcd returns pointer to code directory + * + * Returns: 0 Success + * EBADEXEC Invalid code signature + */ + +static int +cs_validate_csblob(const uint8_t *addr, size_t length, + const CS_CodeDirectory **rcd) +{ + const CS_GenericBlob *blob = (const CS_GenericBlob *)(void *)addr; + int error; + + *rcd = NULL; + + error = cs_validate_blob(blob, length); + if (error) + return error; + + length = ntohl(blob->length); + + if (ntohl(blob->magic) == CSMAGIC_EMBEDDED_SIGNATURE) { + const CS_SuperBlob *sb = (const CS_SuperBlob *)blob; + uint32_t n, count = ntohl(sb->count); + + if (length < sizeof(CS_SuperBlob)) + return EBADEXEC; + + /* check that the array of BlobIndex fits in the rest of the data */ + if ((length - sizeof(CS_SuperBlob)) / sizeof(CS_BlobIndex) < count) + return EBADEXEC; + + /* now check each BlobIndex */ + for (n = 0; n < count; n++) { + const CS_BlobIndex *blobIndex = &sb->index[n]; + if (length < ntohl(blobIndex->offset)) + return EBADEXEC; + + const CS_GenericBlob *subBlob = + (const CS_GenericBlob *)(void *)(addr + ntohl(blobIndex->offset)); + + size_t subLength = length - ntohl(blobIndex->offset); + + if ((error = cs_validate_blob(subBlob, subLength)) != 0) + return error; + subLength = ntohl(subBlob->length); + + /* extra validation for CDs, that is also returned */ + if (ntohl(blobIndex->type) == CSSLOT_CODEDIRECTORY) { + const CS_CodeDirectory *cd = (const CS_CodeDirectory *)subBlob; + if ((error = cs_validate_codedirectory(cd, subLength)) != 0) + return error; + *rcd = cd; + } + } + + } else if (ntohl(blob->magic) == CSMAGIC_CODEDIRECTORY) { + + if ((error = cs_validate_codedirectory((const CS_CodeDirectory *)(void *)addr, length)) != 0) + return error; + *rcd = (const CS_CodeDirectory *)blob; + } else { + return EBADEXEC; + } + + if (*rcd == NULL) + return EBADEXEC; + + return 0; +} + +/* + * cs_find_blob_bytes + * + * Find an blob from the superblob/code directory. The blob must have + * been been validated by cs_validate_csblob() before calling + * this. Use cs_find_blob() instead. + * + * Will also find a "raw" code directory if its stored as well as + * searching the superblob. + * + * Parameters: buffer Pointer to code signature + * length Length of buffer + * type type of blob to find + * magic the magic number for that blob + * + * Returns: pointer Success + * NULL Buffer not found + */ + +static const CS_GenericBlob * +cs_find_blob_bytes(const uint8_t *addr, size_t length, uint32_t type, uint32_t magic) +{ + const CS_GenericBlob *blob = (const CS_GenericBlob *)(void *)addr; + + if (ntohl(blob->magic) == CSMAGIC_EMBEDDED_SIGNATURE) { + const CS_SuperBlob *sb = (const CS_SuperBlob *)blob; + size_t n, count = ntohl(sb->count); + + for (n = 0; n < count; n++) { + if (ntohl(sb->index[n].type) != type) + continue; + uint32_t offset = ntohl(sb->index[n].offset); + if (length - sizeof(const CS_GenericBlob) < offset) + return NULL; + blob = (const CS_GenericBlob *)(void *)(addr + offset); + if (ntohl(blob->magic) != magic) + continue; + return blob; + } + } else if (type == CSSLOT_CODEDIRECTORY + && ntohl(blob->magic) == CSMAGIC_CODEDIRECTORY + && magic == CSMAGIC_CODEDIRECTORY) + return blob; + return NULL; +} + + +static const CS_GenericBlob * +cs_find_blob(struct cs_blob *csblob, uint32_t type, uint32_t magic) +{ + if ((csblob->csb_flags & CS_VALID) == 0) + return NULL; + return cs_find_blob_bytes((const uint8_t *)csblob->csb_mem_kaddr, csblob->csb_mem_size, type, magic); +} + +static const uint8_t * +cs_find_special_slot(const CS_CodeDirectory *cd, uint32_t slot) +{ + /* there is no zero special slot since that is the first code slot */ + if (ntohl(cd->nSpecialSlots) < slot || slot == 0) + return NULL; + + return ((const uint8_t *)cd + ntohl(cd->hashOffset) - (SHA1_RESULTLEN * slot)); +} + /* * CODESIGNING * End of routines to navigate code signing data structures in the kernel. @@ -372,96 +565,131 @@ hashes( * Returns: * EINVAL no text vnode associated with the process * EBADEXEC invalid code signing data - * ENOMEM you should reboot * 0 no error occurred * * On success, out_start and out_length will point to the * entitlements blob if found; or will be set to NULL/zero * if there were no entitlements. */ + +static uint8_t sha1_zero[SHA1_RESULTLEN] = { 0 }; + int cs_entitlements_blob_get(proc_t p, void **out_start, size_t *out_length) { - SHA1_CTX context; /* XXX hash agility */ - int error = 0; - struct cs_blob *blob_list_entry; - CS_SuperBlob *super_blob; - CS_BlobIndex *blob_index; - CS_GenericBlob *blob; - CS_CodeDirectory *code_dir; - unsigned char *computed_hash = NULL; - unsigned char *embedded_hash = NULL; - void *start = NULL; - size_t length = 0; - size_t hash_size = 0; - unsigned int i, count; - - if (NULL == p->p_textvp) { - error = EINVAL; - goto out; - } - if (NULL == (blob_list_entry = ubc_cs_blob_get(p->p_textvp, -1, - p->p_textoff))) - goto out; - super_blob = (void *)blob_list_entry->csb_mem_kaddr; - if (CSMAGIC_EMBEDDED_SIGNATURE != ntohl(super_blob->magic)) { - error = EBADEXEC; - goto out; - } - count = ntohl(super_blob->count); - for (i = 0; i < count; ++i) { - blob_index = &super_blob->index[i]; - blob = (void *)((char *)super_blob + ntohl(blob_index->offset)); - switch (ntohl(blob_index->type)) { - case CSSLOT_CODEDIRECTORY: - if (CSMAGIC_CODEDIRECTORY != ntohl(blob->magic)) - break; - code_dir = (void *)blob; - hash_size = code_dir->hashSize; - if (CSSLOT_ENTITLEMENTS <= - ntohl(code_dir->nSpecialSlots)) { - embedded_hash = (void *)((char *)code_dir + - ntohl(code_dir->hashOffset) - - (hash_size * CSSLOT_ENTITLEMENTS)); - } - break; - case CSSLOT_ENTITLEMENTS: - if (CSMAGIC_EMBEDDED_ENTITLEMENTS != ntohl(blob->magic)) - break; - start = (void *)blob; - length = ntohl(blob->length); - break; - default: - break; - } - } - if (NULL == start && NULL == embedded_hash) { - error = 0; - goto out; - } else if (NULL == start || NULL == embedded_hash) { - error = EBADEXEC; - goto out; - } - if (NULL == (computed_hash = kalloc(hash_size))) { - error = ENOMEM; - goto out; + uint8_t computed_hash[SHA1_RESULTLEN]; + const CS_GenericBlob *entitlements; + const CS_CodeDirectory *code_dir; + struct cs_blob *csblob; + const uint8_t *embedded_hash; + SHA1_CTX context; + + *out_start = NULL; + *out_length = 0; + + if (NULL == p->p_textvp) + return EINVAL; + + if ((csblob = ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff)) == NULL) + return 0; + + if ((code_dir = (const CS_CodeDirectory *)cs_find_blob(csblob, CSSLOT_CODEDIRECTORY, CSMAGIC_CODEDIRECTORY)) == NULL) + return 0; + + entitlements = cs_find_blob(csblob, CSSLOT_ENTITLEMENTS, CSMAGIC_EMBEDDED_ENTITLEMENTS); + embedded_hash = cs_find_special_slot(code_dir, CSSLOT_ENTITLEMENTS); + + if (embedded_hash == NULL) { + if (entitlements) + return EBADEXEC; + return 0; + } else if (entitlements == NULL && memcmp(embedded_hash, sha1_zero, SHA1_RESULTLEN) != 0) { + return EBADEXEC; } + SHA1Init(&context); - SHA1Update(&context, start, length); + SHA1Update(&context, entitlements, ntohl(entitlements->length)); SHA1Final(computed_hash, &context); - if (0 != memcmp(computed_hash, embedded_hash, hash_size)) { - error = EBADEXEC; - goto out; - } - error = 0; -out: - if (NULL != computed_hash) - kfree(computed_hash, hash_size); - if (0 == error) { - *out_start = start; - *out_length = length; - } - return error; + if (memcmp(computed_hash, embedded_hash, SHA1_RESULTLEN) != 0) + return EBADEXEC; + + *out_start = (void *)entitlements; + *out_length = ntohl(entitlements->length); + + return 0; +} + +/* Retrieve the codesign identity for a process. + * Returns: + * NULL an error occured + * string the cs_identity + */ + +const char * +cs_identity_get(proc_t p) +{ + const CS_CodeDirectory *code_dir; + struct cs_blob *csblob; + + if (NULL == p->p_textvp) + return NULL; + + if ((csblob = ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff)) == NULL) + return NULL; + + if ((code_dir = (const CS_CodeDirectory *)cs_find_blob(csblob, CSSLOT_CODEDIRECTORY, CSMAGIC_CODEDIRECTORY)) == NULL) + return NULL; + + if (code_dir->identOffset == 0) + return NULL; + + return ((const char *)code_dir) + ntohl(code_dir->identOffset); +} + + + +/* Retrieve the codesign blob for a process. + * Returns: + * EINVAL no text vnode associated with the process + * 0 no error occurred + * + * On success, out_start and out_length will point to the + * cms blob if found; or will be set to NULL/zero + * if there were no blob. + */ + +int +cs_blob_get(proc_t p, void **out_start, size_t *out_length) +{ + struct cs_blob *csblob; + + *out_start = NULL; + *out_length = 0; + + if (NULL == p->p_textvp) + return EINVAL; + + if ((csblob = ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff)) == NULL) + return 0; + + *out_start = (void *)csblob->csb_mem_kaddr; + *out_length = csblob->csb_mem_size; + + return 0; +} + +uint8_t * +cs_get_cdhash(struct proc *p) +{ + struct cs_blob *csblob; + + if (NULL == p->p_textvp) + return NULL; + + if ((csblob = ubc_cs_blob_get(p->p_textvp, -1, p->p_textoff)) == NULL) + return NULL; + + return csblob->csb_sha1; } /* @@ -2386,6 +2614,7 @@ SYSCTL_INT(_vm, OID_AUTO, cs_blob_count_peak, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_b SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_peak, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_blob_size_peak, 0, "Peak size of code signature blobs"); SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_max, CTLFLAG_RD | CTLFLAG_LOCKED, &cs_blob_size_max, 0, "Size of biggest code signature blob"); + kern_return_t ubc_cs_blob_allocate( vm_offset_t *blob_addr_p, @@ -2419,12 +2648,125 @@ ubc_cs_blob_deallocate( #endif /* CS_BLOB_PAGEABLE */ } +int +ubc_cs_sigpup_add( + struct vnode *vp, + vm_address_t address, + vm_size_t size) +{ + kern_return_t kr; + struct ubc_info *uip; + struct cs_blob *blob; + memory_object_control_t control; + const CS_CodeDirectory *cd; + int error; + + control = ubc_getobject(vp, UBC_FLAGS_NONE); + if (control == MEMORY_OBJECT_CONTROL_NULL) + return KERN_INVALID_ARGUMENT; + + if (memory_object_is_signed(control)) + return 0; + + blob = (struct cs_blob *) kalloc(sizeof (struct cs_blob)); + if (blob == NULL) + return ENOMEM; + + /* fill in the new blob */ + blob->csb_cpu_type = CPU_TYPE_ANY; + blob->csb_base_offset = 0; + blob->csb_mem_size = size; + blob->csb_mem_offset = 0; + blob->csb_mem_handle = IPC_PORT_NULL; + blob->csb_mem_kaddr = address; + blob->csb_sigpup = 1; + + /* + * Validate the blob's contents + */ + cd = findCodeDirectory( + (const CS_SuperBlob *) address, + (char *) address, + (char *) address + blob->csb_mem_size); + if (cd == NULL) { + /* no code directory => useless blob ! */ + error = EINVAL; + goto out; + } + + blob->csb_flags = ntohl(cd->flags) | CS_VALID; + blob->csb_end_offset = round_page(ntohl(cd->codeLimit)); + if((ntohl(cd->version) >= CS_SUPPORTSSCATTER) && (ntohl(cd->scatterOffset))) { + const SC_Scatter *scatter = (const SC_Scatter*) + ((const char*)cd + ntohl(cd->scatterOffset)); + blob->csb_start_offset = ntohl(scatter->base) * PAGE_SIZE; + } else { + blob->csb_start_offset = (blob->csb_end_offset - (ntohl(cd->nCodeSlots) * PAGE_SIZE)); + } + + /* + * We don't need to check with the policy module, since the input data is supposed to be already checked + */ + + vnode_lock(vp); + if (! UBCINFOEXISTS(vp)) { + vnode_unlock(vp); + if (cs_debug) + printf("out ubc object\n"); + error = ENOENT; + goto out; + } + uip = vp->v_ubcinfo; + + /* someone raced us to adding the code directory */ + if (uip->cs_blobs != NULL) { + if (cs_debug) + printf("sigpup: vnode already have CD ?\n"); + vnode_unlock(vp); + error = EEXIST; + goto out; + } + + blob->csb_next = uip->cs_blobs; + uip->cs_blobs = blob; + + OSAddAtomic(+1, &cs_blob_count); + OSAddAtomic((SInt32) +blob->csb_mem_size, &cs_blob_size); + + /* mark this vnode's VM object as having "signed pages" */ + kr = memory_object_signed(uip->ui_control, TRUE); + if (kr != KERN_SUCCESS) { + vnode_unlock(vp); + if (cs_debug) + printf("sigpup: not signable ?\n"); + error = ENOENT; + goto out; + } + + vnode_unlock(vp); + + error = 0; +out: + if (error) { + if (cs_debug) + printf("sigpup: not signable ?\n"); + /* we failed; release what we allocated */ + if (blob) { + kfree(blob, sizeof (*blob)); + blob = NULL; + } + } + + return error; +} + int ubc_cs_blob_add( struct vnode *vp, cpu_type_t cputype, off_t base_offset, vm_address_t addr, + off_t blob_offset, vm_size_t size) { kern_return_t kr; @@ -2472,32 +2814,36 @@ ubc_cs_blob_add( /* fill in the new blob */ blob->csb_cpu_type = cputype; + blob->csb_sigpup = 0; blob->csb_base_offset = base_offset; + blob->csb_blob_offset = blob_offset; blob->csb_mem_size = size; blob->csb_mem_offset = 0; blob->csb_mem_handle = blob_handle; blob->csb_mem_kaddr = addr; + blob->csb_flags = 0; /* * Validate the blob's contents */ - cd = findCodeDirectory( - (const CS_SuperBlob *) addr, - (char *) addr, - (char *) addr + blob->csb_mem_size); - if (cd == NULL) { - /* no code directory => useless blob ! */ + + error = cs_validate_csblob((const uint8_t *)addr, size, &cd); + if (error) { + if (cs_debug) + printf("CODESIGNING: csblob invalid: %d\n", error); blob->csb_flags = 0; blob->csb_start_offset = 0; blob->csb_end_offset = 0; + memset(blob->csb_sha1, 0, SHA1_RESULTLEN); + /* let the vnode checker determine if the signature is valid or not */ } else { const unsigned char *sha1_base; int sha1_size; - blob->csb_flags = ntohl(cd->flags) | CS_VALID; + blob->csb_flags = (ntohl(cd->flags) & CS_ALLOWED_MACHO) | CS_VALID; blob->csb_end_offset = round_page(ntohl(cd->codeLimit)); - if((ntohl(cd->version) >= supportsScatter) && (ntohl(cd->scatterOffset))) { - const struct Scatter *scatter = (const struct Scatter*) + if((ntohl(cd->version) >= CS_SUPPORTSSCATTER) && (ntohl(cd->scatterOffset))) { + const SC_Scatter *scatter = (const SC_Scatter*) ((const char*)cd + ntohl(cd->scatterOffset)); blob->csb_start_offset = ntohl(scatter->base) * PAGE_SIZE; } else { @@ -2516,7 +2862,7 @@ ubc_cs_blob_add( * Let policy module check whether the blob's signature is accepted. */ #if CONFIG_MACF - error = mac_vnode_check_signature(vp, blob->csb_sha1, (void*)addr, size); + error = mac_vnode_check_signature(vp, base_offset, blob->csb_sha1, (void*)addr, size); if (error) goto out; #endif @@ -2582,11 +2928,43 @@ ubc_cs_blob_add( */ oblob->csb_cpu_type = cputype; } + /* + * If the same blob moved around in the Mach-O, we + * want to remember the new blob offset to avoid + * coming back here again and again. + */ + oblob->csb_blob_offset = blob_offset; + vnode_unlock(vp); error = EAGAIN; goto out; } else { /* different blob: reject the new one */ + char pathbuf[MAXPATHLEN]; + char new_sha1_str[2*SHA1_RESULTLEN+1]; + char old_sha1_str[2*SHA1_RESULTLEN+1]; + char arch_str[20]; + const char *pathp = "?unknown"; + int pblen = sizeof(pathbuf); + if (vn_getpath(vp, pathbuf, &pblen) == 0) { + /* pblen == strlen(pathbuf) + 1. Assume strlen(pathbuf) > 0 */ + for (pathp = pathbuf + pblen - 2; pathp > pathbuf && pathp[-1] != '/'; pathp--) ; + } + snprintf(arch_str, sizeof(arch_str), "%x", cputype); + hex_str(oblob->csb_sha1, SHA1_RESULTLEN, old_sha1_str); + hex_str(blob->csb_sha1, SHA1_RESULTLEN, new_sha1_str); + kern_asl_msg(LOG_NOTICE, "messagetracer", + 6, + "com.apple.message.domain", "com.apple.kernel.cs.replace", + "com.apple.message.signature", pathp, + "com.apple.message.signature2", arch_str, + "com.apple.message.signature3", old_sha1_str, + "com.apple.message.result", new_sha1_str, + "com.apple.message.summarize", "YES", + NULL + ); + printf("CODESIGNING: rejected new signature for architecture %d of file %s\n", + cputype, pathbuf); vnode_unlock(vp); error = EALREADY; goto out; @@ -2595,7 +2973,6 @@ ubc_cs_blob_add( } - /* mark this vnode's VM object as having "signed pages" */ kr = memory_object_signed(uip->ui_control, TRUE); if (kr != KERN_SUCCESS) { @@ -2628,17 +3005,18 @@ ubc_cs_blob_add( if (cs_debug > 1) { proc_t p; - + const char *name = vnode_getname_printable(vp); p = current_proc(); printf("CODE SIGNING: proc %d(%s) " "loaded %s signatures for file (%s) " "range 0x%llx:0x%llx flags 0x%x\n", p->p_pid, p->p_comm, blob->csb_cpu_type == -1 ? "detached" : "embedded", - vnode_name(vp), + name, blob->csb_base_offset + blob->csb_start_offset, blob->csb_base_offset + blob->csb_end_offset, blob->csb_flags); + vnode_putname_printable(name); } vnode_unlock(vp); @@ -2709,6 +3087,8 @@ ubc_cs_blob_get( } } + if (cs_debug && blob != NULL && blob->csb_sigpup) + printf("found sig pup blob\n"); out: vnode_unlock(vp); @@ -2725,7 +3105,7 @@ ubc_cs_free( blob != NULL; blob = next_blob) { next_blob = blob->csb_next; - if (blob->csb_mem_kaddr != 0) { + if (blob->csb_mem_kaddr != 0 && !blob->csb_sigpup) { ubc_cs_blob_deallocate(blob->csb_mem_kaddr, blob->csb_mem_size); blob->csb_mem_kaddr = 0; @@ -2845,6 +3225,9 @@ cs_validate_page( break; } } + if (blob->csb_sigpup && cs_debug) + printf("checking for a sigpup CD\n"); + blob_addr = kaddr + blob->csb_mem_offset; lower_bound = CAST_DOWN(char *, blob_addr); @@ -2854,9 +3237,11 @@ cs_validate_page( cd = findCodeDirectory(embedded, lower_bound, upper_bound); if (cd != NULL) { if (cd->pageSize != PAGE_SHIFT || - cd->hashType != 0x1 || + cd->hashType != CS_HASHTYPE_SHA1 || cd->hashSize != SHA1_RESULTLEN) { /* bogus blob ? */ + if (blob->csb_sigpup && cs_debug) + printf("page foo bogus sigpup CD\n"); continue; } @@ -2864,19 +3249,30 @@ cs_validate_page( if (offset < blob->csb_start_offset || offset >= blob->csb_end_offset) { /* our page is not covered by this blob */ + if (blob->csb_sigpup && cs_debug) + printf("OOB sigpup CD\n"); continue; } codeLimit = ntohl(cd->codeLimit); - hash = hashes(cd, atop(offset), + if (blob->csb_sigpup && cs_debug) + printf("sigpup codesize %d\n", (int)codeLimit); + + hash = hashes(cd, (unsigned)atop(offset), lower_bound, upper_bound); if (hash != NULL) { bcopy(hash, expected_hash, sizeof (expected_hash)); found_hash = TRUE; + if (blob->csb_sigpup && cs_debug) + printf("sigpup hash\n"); } break; + } else { + if (blob->csb_sigpup && cs_debug) + printf("sig pup had no valid CD\n"); + } } @@ -2900,7 +3296,6 @@ cs_validate_page( } else { size = PAGE_SIZE; - const uint32_t *asha1, *esha1; if ((off_t)(offset + size) > codeLimit) { /* partial page at end of segment */ assert(offset < codeLimit); @@ -2911,25 +3306,33 @@ cs_validate_page( SHA1UpdateUsePhysicalAddress(&sha1ctxt, data, size); SHA1Final(actual_hash, &sha1ctxt); - asha1 = (const uint32_t *) actual_hash; - esha1 = (const uint32_t *) expected_hash; - if (bcmp(expected_hash, actual_hash, SHA1_RESULTLEN) != 0) { + char asha1_str[2*SHA1_RESULTLEN+1]; + char esha1_str[2*SHA1_RESULTLEN+1]; + hex_str(actual_hash, SHA1_RESULTLEN, asha1_str); + hex_str(expected_hash, SHA1_RESULTLEN, esha1_str); if (cs_debug) { printf("CODE SIGNING: cs_validate_page: " - "mobj %p off 0x%llx size 0x%lx: " - "actual [0x%x 0x%x 0x%x 0x%x 0x%x] != " - "expected [0x%x 0x%x 0x%x 0x%x 0x%x]\n", - pager, page_offset, size, - asha1[0], asha1[1], asha1[2], - asha1[3], asha1[4], - esha1[0], esha1[1], esha1[2], - esha1[3], esha1[4]); + "mobj %p off 0x%llx size 0x%lx: actual %s expected %s\n", + pager, page_offset, size, asha1_str, esha1_str); } cs_validate_page_bad_hash++; + if (!*tainted) { + char page_offset_str[20]; + snprintf(page_offset_str, sizeof(page_offset_str), "%llx", page_offset); + kern_asl_msg(LOG_NOTICE, "messagetracer", + 5, + "com.apple.message.domain", "com.apple.kernel.cs.mismatch", + "com.apple.message.signature", page_offset_str, + "com.apple.message.signature2", asha1_str, + "com.apple.message.signature3", esha1_str, + "com.apple.message.summarize", "YES", + NULL + ); + } *tainted = TRUE; } else { - if (cs_debug > 1) { + if (cs_debug > 10) { printf("CODE SIGNING: cs_validate_page: " "mobj %p off 0x%llx size 0x%lx: " "SHA1 OK\n", diff --git a/bsd/kern/uipc_domain.c b/bsd/kern/uipc_domain.c index a015bddae..5a1c62a79 100644 --- a/bsd/kern/uipc_domain.c +++ b/bsd/kern/uipc_domain.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 1998-2012 Apple Inc. All rights reserved. + * Copyright (c) 1998-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ @@ -77,78 +77,113 @@ #include +#include #include -void init_domain(struct domain *dp) __attribute__((section("__TEXT, initcode"))); -void prepend_domain(struct domain *dp) __attribute__((section("__TEXT, initcode"))); - -void pfslowtimo(void *); - -struct protosw *pffindprotonotype(int, int); -struct protosw *pffindprotonotype_locked(int , int , int); -struct domain *pffinddomain(int); -static void net_update_uptime(void); - -/* - * Add/delete 'domain': Link structure into system list, - * invoke the domain init, and then the proto inits. - * To delete, just remove from the list (dom_refs must be zero) - */ - -lck_grp_t *domain_proto_mtx_grp; +static void pr_init_old(struct protosw *, struct domain *); +static void init_proto(struct protosw *, struct domain *); +static void attach_proto(struct protosw *, struct domain *); +static void detach_proto(struct protosw *, struct domain *); +static void dom_init_old(struct domain *); +static void init_domain(struct domain *); +static void attach_domain(struct domain *); +static void detach_domain(struct domain *); +static struct protosw *pffindprotonotype_locked(int, int, int); +static struct domain *pffinddomain_locked(int); + +static boolean_t domain_timeout_run; /* domain timer is scheduled to run */ +static boolean_t domain_draining; +static void domain_sched_timeout(void); +static void domain_timeout(void *); + +lck_grp_t *domain_proto_mtx_grp; lck_attr_t *domain_proto_mtx_attr; static lck_grp_attr_t *domain_proto_mtx_grp_attr; decl_lck_mtx_data(static, domain_proto_mtx); -extern int do_reclaim; +decl_lck_mtx_data(static, domain_timeout_mtx); extern sysctlfn net_sysctl; static u_int64_t _net_uptime; static void -init_proto(struct protosw *pr) +pr_init_old(struct protosw *pp, struct domain *dp) { - TAILQ_INIT(&pr->pr_filter_head); - if (pr->pr_init) - (*pr->pr_init)(); +#pragma unused(dp) + VERIFY(pp->pr_flags & PR_OLD); + VERIFY(pp->pr_old != NULL); - /* Make sure pr_init isn't called again!! */ - pr->pr_init = 0; + if (pp->pr_old->pr_init != NULL) + pp->pr_old->pr_init(); } -void -init_domain(struct domain *dp) +static void +init_proto(struct protosw *pp, struct domain *dp) { - struct protosw *pr; - - if ((dp->dom_mtx = lck_mtx_alloc_init(domain_proto_mtx_grp, domain_proto_mtx_attr)) == NULL) { - printf("init_domain: can't init domain mtx for domain=%s\n", dp->dom_name); - return; /* we have a problem... */ - } + VERIFY(pp->pr_flags & PR_ATTACHED); - if (dp->dom_init) - (*dp->dom_init)(); + if (!(pp->pr_flags & PR_INITIALIZED)) { + TAILQ_INIT(&pp->pr_filter_head); + if (pp->pr_init != NULL) + pp->pr_init(pp, dp); + pp->pr_flags |= PR_INITIALIZED; + } +} - /* and then init the currently installed protos in this domain */ +static void +attach_proto(struct protosw *pp, struct domain *dp) +{ + domain_proto_mtx_lock_assert_held(); + VERIFY(!(pp->pr_flags & PR_ATTACHED)); + VERIFY(pp->pr_domain == NULL); + VERIFY(pp->pr_protosw == NULL); + + TAILQ_INSERT_TAIL(&dp->dom_protosw, pp, pr_entry); + pp->pr_flags |= PR_ATTACHED; + pp->pr_domain = dp; + pp->pr_protosw = pp; + + /* do some cleaning up on user request callbacks */ + pru_sanitize(pp->pr_usrreqs); +} - for (pr = dp->dom_protosw; pr; pr = pr->pr_next) { - if (pr->pr_usrreqs == 0) - panic("domaininit: %ssw[%d] has no usrreqs!", - dp->dom_name, - (int)(pr - dp->dom_protosw)); +static void +detach_proto(struct protosw *pp, struct domain *dp) +{ + domain_proto_mtx_lock_assert_held(); + VERIFY(pp->pr_flags & PR_ATTACHED); + VERIFY(pp->pr_domain == dp); + VERIFY(pp->pr_protosw == pp); + + TAILQ_REMOVE(&dp->dom_protosw, pp, pr_entry); + pp->pr_flags &= ~PR_ATTACHED; + pp->pr_domain = NULL; + pp->pr_protosw = NULL; +} -#if __APPLE__ - /* - * Warn that pr_fasttimo (now pr_unused) is deprecated since rdar://7617868 - */ - if (pr->pr_unused != NULL) { - printf("init_domain: warning %s, proto %d: pr_fasttimo is deprecated and won't be called\n", - dp->dom_name, pr->pr_protocol); - } -#endif +static void +dom_init_old(struct domain *dp) +{ + VERIFY(dp->dom_flags & DOM_OLD); + VERIFY(dp->dom_old != NULL); - init_proto(pr); + if (dp->dom_old->dom_init != NULL) + dp->dom_old->dom_init(); +} +static void +init_domain(struct domain *dp) +{ + VERIFY(dp->dom_flags & DOM_ATTACHED); + + if (!(dp->dom_flags & DOM_INITIALIZED)) { + lck_mtx_init(&dp->dom_mtx_s, domain_proto_mtx_grp, + domain_proto_mtx_attr); + dp->dom_mtx = &dp->dom_mtx_s; + TAILQ_INIT(&dp->dom_protosw); + if (dp->dom_init != NULL) + dp->dom_init(dp); + dp->dom_flags |= DOM_INITIALIZED; } /* Recompute for new protocol */ @@ -164,200 +199,550 @@ init_domain(struct domain *dp) max_datalen = MHLEN - max_hdr; } -void -prepend_domain(struct domain *dp) -{ - lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED); - dp->dom_next = domains; - domains = dp; +static void +attach_domain(struct domain *dp) +{ + domain_proto_mtx_lock_assert_held(); + VERIFY(!(dp->dom_flags & DOM_ATTACHED)); + + TAILQ_INSERT_TAIL(&domains, dp, dom_entry); + dp->dom_flags |= DOM_ATTACHED; +} + +static void +detach_domain(struct domain *dp) +{ + domain_proto_mtx_lock_assert_held(); + VERIFY(dp->dom_flags & DOM_ATTACHED); + + TAILQ_REMOVE(&domains, dp, dom_entry); + dp->dom_flags &= ~DOM_ATTACHED; + + if (dp->dom_flags & DOM_OLD) { + struct domain_old *odp = dp->dom_old; + + VERIFY(odp != NULL); + odp->dom_next = NULL; + odp->dom_mtx = NULL; + } } +/* + * Exported (private) routine, indirection of net_add_domain. + */ void -net_add_domain(struct domain *dp) +net_add_domain_old(struct domain_old *odp) { - int do_unlock; + struct domain *dp; + domain_guard_t guard; + + VERIFY(odp != NULL); + + guard = domain_guard_deploy(); + if ((dp = pffinddomain_locked(odp->dom_family)) != NULL) { + /* + * There is really nothing better than to panic here, + * as the caller would not have been able to handle + * any failures otherwise. + */ + panic("%s: domain (%d,%s) already exists for %s\n", __func__, + dp->dom_family, dp->dom_name, odp->dom_name); + /* NOTREACHED */ + } - kprintf("Adding domain %s (family %d)\n", dp->dom_name, - dp->dom_family); - /* First, link in the domain */ + /* Make sure nothing is currently pointing to the odp. */ + TAILQ_FOREACH(dp, &domains, dom_entry) { + if (dp->dom_old == odp) { + panic("%s: domain %p (%d,%s) is already " + "associated with %p (%d,%s)\n", __func__, + odp, odp->dom_family, odp->dom_name, dp, + dp->dom_family, dp->dom_name); + /* NOTREACHED */ + } + } - do_unlock = domain_proto_mtx_lock(); - prepend_domain(dp); + if (odp->dom_protosw != NULL) { + panic("%s: domain (%d,%s) protocols need to added " + "via net_add_proto\n", __func__, odp->dom_family, + odp->dom_name); + /* NOTREACHED */ + } + dp = _MALLOC(sizeof (*dp), M_TEMP, M_WAITOK | M_ZERO); + if (dp == NULL) { + /* + * There is really nothing better than to panic here, + * as the caller would not have been able to handle + * any failures otherwise. + */ + panic("%s: unable to allocate memory for domain family " + "%d (%s)\n", __func__, odp->dom_family, odp->dom_name); + /* NOTREACHED */ + } + + /* Copy everything but dom_init, dom_mtx, dom_next and dom_refs */ + dp->dom_family = odp->dom_family; + dp->dom_flags = (odp->dom_flags & DOMF_USERFLAGS) | DOM_OLD; + dp->dom_name = odp->dom_name; + dp->dom_init = dom_init_old; + dp->dom_externalize = odp->dom_externalize; + dp->dom_dispose = odp->dom_dispose; + dp->dom_rtattach = odp->dom_rtattach; + dp->dom_rtoffset = odp->dom_rtoffset; + dp->dom_maxrtkey = odp->dom_maxrtkey; + dp->dom_protohdrlen = odp->dom_protohdrlen; + dp->dom_old = odp; + + attach_domain(dp); init_domain(dp); - domain_proto_mtx_unlock(do_unlock); + /* Point the mutex back to the internal structure's */ + odp->dom_mtx = dp->dom_mtx; + domain_guard_release(guard); } +/* + * Exported (private) routine, indirection of net_del_domain. + */ int -net_del_domain(struct domain *dp) -{ register struct domain *dp1, *dp2; - register int retval = 0; - int do_unlock; - - do_unlock = domain_proto_mtx_lock(); - - if (dp->dom_refs) { - domain_proto_mtx_unlock(do_unlock); - return(EBUSY); - } - - for (dp2 = NULL, dp1 = domains; dp1; dp2 = dp1, dp1 = dp1->dom_next) - { if (dp == dp1) +net_del_domain_old(struct domain_old *odp) +{ + struct domain *dp1, *dp2; + int error = 0; + domain_guard_t guard; + + VERIFY(odp != NULL); + + guard = domain_guard_deploy(); + if (odp->dom_refs != 0) { + error = EBUSY; + goto done; + } + + TAILQ_FOREACH_SAFE(dp1, &domains, dom_entry, dp2) { + if (!(dp1->dom_flags & DOM_OLD)) + continue; + VERIFY(dp1->dom_old != NULL); + if (odp == dp1->dom_old) break; } - if (dp1) - { if (dp2) - dp2->dom_next = dp1->dom_next; - else - domains = dp1->dom_next; - } else - retval = EPFNOSUPPORT; - domain_proto_mtx_unlock(do_unlock); + if (dp1 != NULL) { + struct protosw *pp1, *pp2; + + VERIFY(dp1->dom_flags & DOM_OLD); + VERIFY(dp1->dom_old == odp); + + /* Remove all protocols attached to this domain */ + TAILQ_FOREACH_SAFE(pp1, &dp1->dom_protosw, pr_entry, pp2) { + detach_proto(pp1, dp1); + if (pp1->pr_usrreqs->pru_flags & PRUF_OLD) + FREE(pp1->pr_usrreqs, M_TEMP); + if (pp1->pr_flags & PR_OLD) + FREE(pp1, M_TEMP); + } - return(retval); + detach_domain(dp1); + FREE(dp1, M_TEMP); + } else { + error = EPFNOSUPPORT; + } +done: + domain_guard_release(guard); + return (error); } /* + * Internal routine, not exported. + * * net_add_proto - link a protosw into a domain's protosw chain - * - * note: protocols must use their own domain lock before calling net_add_proto + * + * NOTE: Caller must have acquired domain_proto_mtx + */ +int +net_add_proto(struct protosw *pp, struct domain *dp, int doinit) +{ + struct protosw *pp1; + + /* + * This could be called as part of initializing the domain, + * and thus DOM_INITIALIZED may not be set (yet). + */ + domain_proto_mtx_lock_assert_held(); + VERIFY(!(pp->pr_flags & PR_ATTACHED)); + + /* pr_domain is set only after the protocol is attached */ + if (pp->pr_domain != NULL) { + panic("%s: domain (%d,%s), proto %d has non-NULL pr_domain!\n", + __func__, dp->dom_family, dp->dom_name, pp->pr_protocol); + /* NOTREACHED */ + } + + if (pp->pr_usrreqs == NULL) { + panic("%s: domain (%d,%s), proto %d has no usrreqs!\n", + __func__, dp->dom_family, dp->dom_name, pp->pr_protocol); + /* NOTREACHED */ + } + + TAILQ_FOREACH(pp1, &dp->dom_protosw, pr_entry) { + if (pp1->pr_type == pp->pr_type && + pp1->pr_protocol == pp->pr_protocol) + return (EEXIST); + } + + attach_proto(pp, dp); + if (doinit) + net_init_proto(pp, dp); + + return (0); +} + +void +net_init_proto(struct protosw *pp, struct domain *dp) +{ + /* + * This could be called as part of initializing the domain, + * and thus DOM_INITIALIZED may not be set (yet). The protocol + * must have been attached via net_addr_protosw() by now. + */ + domain_proto_mtx_lock_assert_held(); + VERIFY(pp->pr_flags & PR_ATTACHED); + + init_proto(pp, dp); +} + +/* + * Exported (private) routine, indirection of net_add_proto. */ int -net_add_proto(struct protosw *pp, struct domain *dp) -{ register struct protosw *pp1, *pp2; +net_add_proto_old(struct protosw_old *opp, struct domain_old *odp) +{ + struct pr_usrreqs_old *opru; + struct pr_usrreqs *pru = NULL; + struct protosw *pp = NULL, *pp1; + int error = 0; + struct domain *dp; + domain_guard_t guard; + + /* + * This could be called as part of initializing the domain, + * and thus DOM_INITIALIZED may not be set (yet). + */ + guard = domain_guard_deploy(); + + /* Make sure the domain has been added via net_add_domain */ + TAILQ_FOREACH(dp, &domains, dom_entry) { + if (!(dp->dom_flags & DOM_OLD)) + continue; + if (dp->dom_old == odp) + break; + } + if (dp == NULL) { + error = EINVAL; + goto done; + } - for (pp2 = NULL, pp1 = dp->dom_protosw; pp1; pp1 = pp1->pr_next) - { if (pp1->pr_type == pp->pr_type && - pp1->pr_protocol == pp->pr_protocol) { - return(EEXIST); + TAILQ_FOREACH(pp1, &dp->dom_protosw, pr_entry) { + if (pp1->pr_type == opp->pr_type && + pp1->pr_protocol == opp->pr_protocol) { + error = EEXIST; + goto done; } - pp2 = pp1; } - if (pp2 == NULL) - dp->dom_protosw = pp; - else - pp2->pr_next = pp; - init_proto(pp); + if ((opru = opp->pr_usrreqs) == NULL) { + panic("%s: domain (%d,%s), proto %d has no usrreqs!\n", + __func__, odp->dom_family, odp->dom_name, opp->pr_protocol); + /* NOTREACHED */ + } - return(0); + pru = _MALLOC(sizeof (*pru), M_TEMP, M_WAITOK | M_ZERO); + if (pru == NULL) { + error = ENOMEM; + goto done; + } + + pru->pru_flags = PRUF_OLD; + pru->pru_abort = opru->pru_abort; + pru->pru_accept = opru->pru_accept; + pru->pru_attach = opru->pru_attach; + pru->pru_bind = opru->pru_bind; + pru->pru_connect = opru->pru_connect; + pru->pru_connect2 = opru->pru_connect2; + pru->pru_control = opru->pru_control; + pru->pru_detach = opru->pru_detach; + pru->pru_disconnect = opru->pru_disconnect; + pru->pru_listen = opru->pru_listen; + pru->pru_peeraddr = opru->pru_peeraddr; + pru->pru_rcvd = opru->pru_rcvd; + pru->pru_rcvoob = opru->pru_rcvoob; + pru->pru_send = opru->pru_send; + pru->pru_sense = opru->pru_sense; + pru->pru_shutdown = opru->pru_shutdown; + pru->pru_sockaddr = opru->pru_sockaddr; + pru->pru_sosend = opru->pru_sosend; + pru->pru_soreceive = opru->pru_soreceive; + pru->pru_sopoll = opru->pru_sopoll; + + pp = _MALLOC(sizeof (*pp), M_TEMP, M_WAITOK | M_ZERO); + if (pp == NULL) { + error = ENOMEM; + goto done; + } + + /* + * Protocol fast and slow timers are now deprecated. + */ + if (opp->pr_unused != NULL) { + printf("%s: domain (%d,%s), proto %d: pr_fasttimo is " + "deprecated and won't be called\n", __func__, + odp->dom_family, odp->dom_name, opp->pr_protocol); + } + if (opp->pr_unused2 != NULL) { + printf("%s: domain (%d,%s), proto %d: pr_slowtimo is " + "deprecated and won't be called\n", __func__, + odp->dom_family, odp->dom_name, opp->pr_protocol); + } + + /* Copy everything but pr_init, pr_next, pr_domain, pr_protosw */ + pp->pr_type = opp->pr_type; + pp->pr_protocol = opp->pr_protocol; + pp->pr_flags = (opp->pr_flags & PRF_USERFLAGS) | PR_OLD; + pp->pr_input = opp->pr_input; + pp->pr_output = opp->pr_output; + pp->pr_ctlinput = opp->pr_ctlinput; + pp->pr_ctloutput = opp->pr_ctloutput; + pp->pr_usrreqs = pru; + pp->pr_init = pr_init_old; + pp->pr_drain = opp->pr_drain; + pp->pr_sysctl = opp->pr_sysctl; + pp->pr_lock = opp->pr_lock; + pp->pr_unlock = opp->pr_unlock; + pp->pr_getlock = opp->pr_getlock; + pp->pr_old = opp; + + /* attach as well as initialize */ + attach_proto(pp, dp); + net_init_proto(pp, dp); +done: + if (error != 0) { + printf("%s: domain (%d,%s), proto %d: failed to attach, " + "error %d\n", __func__, odp->dom_family, + odp->dom_name, opp->pr_protocol, error); + + if (pru != NULL) + FREE(pru, M_TEMP); + if (pp != NULL) + FREE(pp, M_TEMP); + } + + domain_guard_release(guard); + return (error); } /* + * Internal routine, not exported. + * * net_del_proto - remove a protosw from a domain's protosw chain. * Search the protosw chain for the element with matching data. * Then unlink and return. * - * note: protocols must use their own domain lock before calling net_del_proto + * NOTE: Caller must have acquired domain_proto_mtx */ int net_del_proto(int type, int protocol, struct domain *dp) { - register struct protosw *pp1, *pp2; + struct protosw *pp; + + /* + * This could be called as part of initializing the domain, + * and thus DOM_INITIALIZED may not be set (yet). + */ + domain_proto_mtx_lock_assert_held(); - for (pp2 = NULL, pp1 = dp->dom_protosw; pp1; pp1 = pp1->pr_next) - { if (pp1->pr_type == type && - pp1->pr_protocol == protocol) + TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) { + if (pp->pr_type == type && pp->pr_protocol == protocol) break; - pp2 = pp1; } - if (pp1 == NULL) { - return(ENXIO); - } - if (pp2) - pp2->pr_next = pp1->pr_next; - else - dp->dom_protosw = pp1->pr_next; - return(0); + if (pp == NULL) + return (ENXIO); + + detach_proto(pp, dp); + if (pp->pr_usrreqs->pru_flags & PRUF_OLD) + FREE(pp->pr_usrreqs, M_TEMP); + if (pp->pr_flags & PR_OLD) + FREE(pp, M_TEMP); + + return (0); } +/* + * Exported (private) routine, indirection of net_del_proto. + */ +int +net_del_proto_old(int type, int protocol, struct domain_old *odp) +{ + int error = 0; + struct protosw *pp; + struct domain *dp; + domain_guard_t guard; -#if NS -extern struct domain nsdomain; -#endif -#if ISO -extern struct domain isodomain; -#endif -#if CCITT -extern struct domain ccittdomain; -#endif + /* + * This could be called as part of initializing the domain, + * and thus DOM_INITIALIZED may not be set (yet). + */ + guard = domain_guard_deploy(); + + /* Make sure the domain has been added via net_add_domain */ + TAILQ_FOREACH(dp, &domains, dom_entry) { + if (!(dp->dom_flags & DOM_OLD)) + continue; + if (dp->dom_old == odp) + break; + } + if (dp == NULL) { + error = ENXIO; + goto done; + } + + TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) { + if (pp->pr_type == type && pp->pr_protocol == protocol) + break; + } + if (pp == NULL) { + error = ENXIO; + goto done; + } + detach_proto(pp, dp); + if (pp->pr_usrreqs->pru_flags & PRUF_OLD) + FREE(pp->pr_usrreqs, M_TEMP); + if (pp->pr_flags & PR_OLD) + FREE(pp, M_TEMP); + +done: + domain_guard_release(guard); + return (error); +} + +static void +domain_sched_timeout(void) +{ + lck_mtx_assert(&domain_timeout_mtx, LCK_MTX_ASSERT_OWNED); + + if (!domain_timeout_run && domain_draining) { + domain_timeout_run = TRUE; + timeout(domain_timeout, NULL, hz); + } +} + +void +net_drain_domains(void) +{ + lck_mtx_lock(&domain_timeout_mtx); + domain_draining = TRUE; + domain_sched_timeout(); + lck_mtx_unlock(&domain_timeout_mtx); +} -#if NETAT -extern struct domain atalkdomain; -#endif #if INET6 -extern struct domain inet6domain; +extern struct domain inet6domain_s; #endif #if IPSEC -extern struct domain keydomain; +extern struct domain keydomain_s; #endif -extern struct domain routedomain, ndrvdomain, inetdomain; -extern struct domain systemdomain; +extern struct domain routedomain_s, ndrvdomain_s, inetdomain_s; +extern struct domain systemdomain_s, localdomain_s; + +#if MULTIPATH +extern struct domain mpdomain_s; +#endif /* MULTIPATH */ + +static void +domain_timeout(void *arg) +{ +#pragma unused(arg) + struct protosw *pp; + struct domain *dp; + domain_guard_t guard; + + lck_mtx_lock(&domain_timeout_mtx); + if (domain_draining) { + domain_draining = FALSE; + lck_mtx_unlock(&domain_timeout_mtx); + + guard = domain_guard_deploy(); + TAILQ_FOREACH(dp, &domains, dom_entry) { + TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) { + if (pp->pr_drain != NULL) + (*pp->pr_drain)(); + } + } + domain_guard_release(guard); + + lck_mtx_lock(&domain_timeout_mtx); + } + + /* re-arm the timer if there's work to do */ + domain_timeout_run = FALSE; + domain_sched_timeout(); + lck_mtx_unlock(&domain_timeout_mtx); +} void domaininit(void) { - register struct domain *dp; - int do_unlock; + struct domain *dp; + domain_guard_t guard; /* * allocate lock group attribute and group for domain mutexes */ domain_proto_mtx_grp_attr = lck_grp_attr_alloc_init(); - domain_proto_mtx_grp = lck_grp_alloc_init("domain", domain_proto_mtx_grp_attr); - + domain_proto_mtx_grp = lck_grp_alloc_init("domain", + domain_proto_mtx_grp_attr); + /* * allocate the lock attribute for per domain mutexes */ domain_proto_mtx_attr = lck_attr_alloc_init(); lck_mtx_init(&domain_proto_mtx, domain_proto_mtx_grp, - domain_proto_mtx_attr); + domain_proto_mtx_attr); + lck_mtx_init(&domain_timeout_mtx, domain_proto_mtx_grp, + domain_proto_mtx_attr); + + guard = domain_guard_deploy(); /* - * Add all the static domains to the domains list + * Add all the static domains to the domains list. route domain + * gets added and initialized last, since we need it to attach + * rt_tables[] to everything that's already there. This also + * means that domains added after this point won't get their + * dom_rtattach() called on rt_tables[]. */ - - do_unlock = domain_proto_mtx_lock(); - - prepend_domain(&localdomain); - prepend_domain(&inetdomain); -#if NETAT - prepend_domain(&atalkdomain); -#endif + attach_domain(&inetdomain_s); #if INET6 - prepend_domain(&inet6domain); -#endif - prepend_domain(&routedomain); - + attach_domain(&inet6domain_s); +#endif /* INET6 */ +#if MULTIPATH + attach_domain(&mpdomain_s); +#endif /* MULTIPATH */ + attach_domain(&systemdomain_s); + attach_domain(&localdomain_s); #if IPSEC - prepend_domain(&keydomain); -#endif - -#if NS - prepend_domain(&nsdomain); -#endif -#if ISO - prepend_domain(&isodomain); -#endif -#if CCITT - prepend_domain(&ccittdomain); -#endif - prepend_domain(&ndrvdomain); - - prepend_domain(&systemdomain); + attach_domain(&keydomain_s); +#endif /* IPSEC */ + attach_domain(&ndrvdomain_s); + attach_domain(&routedomain_s); /* must be last domain */ /* * Now ask them all to init (XXX including the routing domain, * see above) */ - for (dp = domains; dp; dp = dp->dom_next) + TAILQ_FOREACH(dp, &domains, dom_entry) init_domain(dp); - domain_proto_mtx_unlock(do_unlock); - timeout(pfslowtimo, NULL, 1); + domain_guard_release(guard); } static __inline__ struct domain * @@ -365,12 +750,11 @@ pffinddomain_locked(int pf) { struct domain *dp; - dp = domains; - while (dp != NULL) - { if (dp->dom_family == pf) { + domain_proto_mtx_lock_assert_held(); + + TAILQ_FOREACH(dp, &domains, dom_entry) { + if (dp->dom_family == pf) break; - } - dp = dp->dom_next; } return (dp); } @@ -378,88 +762,134 @@ pffinddomain_locked(int pf) struct protosw * pffindtype(int family, int type) { - register struct domain *dp; - register struct protosw *pr; - int do_unlock; + struct protosw *pp = NULL; + struct domain *dp; + domain_guard_t guard; - do_unlock = domain_proto_mtx_lock(); - dp = pffinddomain_locked(family); - if (dp == NULL) { - domain_proto_mtx_unlock(do_unlock); - return (NULL); + guard = domain_guard_deploy(); + if ((dp = pffinddomain_locked(family)) == NULL) + goto done; + + TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) { + if (pp->pr_type != 0 && pp->pr_type == type) + goto done; } - for (pr = dp->dom_protosw; pr; pr = pr->pr_next) - if (pr->pr_type && pr->pr_type == type) { - domain_proto_mtx_unlock(do_unlock); - return (pr); - } - domain_proto_mtx_unlock(do_unlock); - return (0); +done: + domain_guard_release(guard); + return (pp); } +/* + * Internal routine, not exported. + */ struct domain * pffinddomain(int pf) { struct domain *dp; - int do_unlock; + domain_guard_t guard; - do_unlock = domain_proto_mtx_lock(); + guard = domain_guard_deploy(); dp = pffinddomain_locked(pf); - domain_proto_mtx_unlock(do_unlock); - return(dp); + domain_guard_release(guard); + return (dp); +} + +/* + * Exported (private) routine, indirection of pffinddomain. + */ +struct domain_old * +pffinddomain_old(int pf) +{ + struct domain_old *odp = NULL; + struct domain *dp; + domain_guard_t guard; + + guard = domain_guard_deploy(); + if ((dp = pffinddomain_locked(pf)) != NULL && (dp->dom_flags & DOM_OLD)) + odp = dp->dom_old; + domain_guard_release(guard); + return (odp); } +/* + * Internal routine, not exported. + */ struct protosw * pffindproto(int family, int protocol, int type) { - register struct protosw *pr; - int do_unlock; - do_unlock = domain_proto_mtx_lock(); - pr = pffindproto_locked(family, protocol, type); - domain_proto_mtx_unlock(do_unlock); - return (pr); + struct protosw *pp; + domain_guard_t guard; + + guard = domain_guard_deploy(); + pp = pffindproto_locked(family, protocol, type); + domain_guard_release(guard); + return (pp); } struct protosw * pffindproto_locked(int family, int protocol, int type) { - register struct domain *dp; - register struct protosw *pr; - struct protosw *maybe = 0; + struct protosw *maybe = NULL; + struct protosw *pp; + struct domain *dp; + + domain_proto_mtx_lock_assert_held(); if (family == 0) return (0); + dp = pffinddomain_locked(family); - if (dp == NULL) { + if (dp == NULL) return (NULL); - } - for (pr = dp->dom_protosw; pr; pr = pr->pr_next) { - if ((pr->pr_protocol == protocol) && (pr->pr_type == type)) - return (pr); - if (type == SOCK_RAW && pr->pr_type == SOCK_RAW && - pr->pr_protocol == 0 && maybe == (struct protosw *)0) - maybe = pr; + TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) { + if ((pp->pr_protocol == protocol) && (pp->pr_type == type)) + return (pp); + + if (type == SOCK_RAW && pp->pr_type == SOCK_RAW && + pp->pr_protocol == 0 && maybe == NULL) + maybe = pp; } return (maybe); } -struct protosw * -pffindprotonotype_locked(int family, int protocol, __unused int type) +/* + * Exported (private) routine, indirection of pffindproto. + */ +struct protosw_old * +pffindproto_old(int family, int protocol, int type) +{ + struct protosw_old *opr = NULL; + struct protosw *pp; + domain_guard_t guard; + + guard = domain_guard_deploy(); + if ((pp = pffindproto_locked(family, protocol, type)) != NULL && + (pp->pr_flags & PR_OLD)) + opr = pp->pr_old; + domain_guard_release(guard); + return (opr); +} + +static struct protosw * +pffindprotonotype_locked(int family, int protocol, int type) { - register struct domain *dp; - register struct protosw *pr; +#pragma unused(type) + struct domain *dp; + struct protosw *pp; + + domain_proto_mtx_lock_assert_held(); if (family == 0) return (0); + dp = pffinddomain_locked(family); - if (dp == NULL) { + if (dp == NULL) return (NULL); - } - for (pr = dp->dom_protosw; pr; pr = pr->pr_next) { - if (pr->pr_protocol == protocol) { - return (pr); - } + + TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) { + if (pp->pr_protocol == protocol) + return (pp); } return (NULL); } @@ -467,25 +897,27 @@ pffindprotonotype_locked(int family, int protocol, __unused int type) struct protosw * pffindprotonotype(int family, int protocol) { - register struct protosw *pr; - int do_unlock; - if (protocol == 0) { + struct protosw *pp; + domain_guard_t guard; + + if (protocol == 0) return (NULL); - } - do_unlock = domain_proto_mtx_lock(); - pr = pffindprotonotype_locked(family, protocol, 0); - domain_proto_mtx_unlock(do_unlock); - return (pr); + + guard = domain_guard_deploy(); + pp = pffindprotonotype_locked(family, protocol, 0); + domain_guard_release(guard); + return (pp); } int -net_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, - user_addr_t newp, size_t newlen, __unused struct proc *p) +net_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, + user_addr_t newp, size_t newlen, struct proc *p) { - register struct domain *dp; - register struct protosw *pr; - int family, protocol, error; - int do_unlock; +#pragma unused(p) + int family, protocol, error = 0; + struct domain *dp; + struct protosw *pp; + domain_guard_t guard; /* * All sysctl names at this level are nonterminal; @@ -499,83 +931,89 @@ net_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, if (family == 0) return (0); - do_unlock = domain_proto_mtx_lock(); - for (dp = domains; dp; dp = dp->dom_next) + + guard = domain_guard_deploy(); + TAILQ_FOREACH(dp, &domains, dom_entry) { if (dp->dom_family == family) - goto found; - domain_proto_mtx_unlock(do_unlock); - return (ENOPROTOOPT); -found: - for (pr = dp->dom_protosw; pr; pr = pr->pr_next) - if (pr->pr_protocol == protocol && pr->pr_sysctl) { - error = (*pr->pr_sysctl)(name + 2, namelen - 2, - (void *)(uintptr_t)oldp, oldlenp, (void *)(uintptr_t)newp, newlen); - domain_proto_mtx_unlock(do_unlock); - return (error); + break; + } + if (dp == NULL) { + error = ENOPROTOOPT; + goto done; + } + + TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) { + if (pp->pr_protocol == protocol && pp->pr_sysctl != NULL) { + error = (*pp->pr_sysctl)(name + 2, namelen - 2, + (void *)(uintptr_t)oldp, oldlenp, + (void *)(uintptr_t)newp, newlen); + goto done; } - domain_proto_mtx_unlock(do_unlock); - return (ENOPROTOOPT); + } + error = ENOPROTOOPT; +done: + domain_guard_release(guard); + return (error); } void pfctlinput(int cmd, struct sockaddr *sa) { - pfctlinput2(cmd, sa, (void*)0); + pfctlinput2(cmd, sa, NULL); } void pfctlinput2(int cmd, struct sockaddr *sa, void *ctlparam) { struct domain *dp; - struct protosw *pr; - int do_unlock; + struct protosw *pp; + domain_guard_t guard; - if (!sa) + if (sa == NULL) return; - do_unlock = domain_proto_mtx_lock(); - for (dp = domains; dp; dp = dp->dom_next) - for (pr = dp->dom_protosw; pr; pr = pr->pr_next) - if (pr->pr_ctlinput) - (*pr->pr_ctlinput)(cmd, sa, ctlparam); - domain_proto_mtx_unlock(do_unlock); + guard = domain_guard_deploy(); + TAILQ_FOREACH(dp, &domains, dom_entry) { + TAILQ_FOREACH(pp, &dp->dom_protosw, pr_entry) { + if (pp->pr_ctlinput != NULL) + (*pp->pr_ctlinput)(cmd, sa, ctlparam); + } + } + domain_guard_release(guard); } void -pfslowtimo(__unused void *arg) +net_update_uptime(void) { - register struct domain *dp; - register struct protosw *pr; - int do_unlock; + struct timeval tv; + microuptime(&tv); + _net_uptime = tv.tv_sec; /* - * Update coarse-grained networking timestamp (in sec.); the idea - * is to piggy-back on the periodic slow timeout callout to update - * the counter returnable via net_uptime(). + * Round up the timer to the nearest integer value because otherwise + * we might setup networking timers that are off by almost 1 second. */ - net_update_uptime(); - - do_unlock = domain_proto_mtx_lock(); - for (dp = domains; dp; dp = dp->dom_next) - for (pr = dp->dom_protosw; pr; pr = pr->pr_next) { - if (pr->pr_slowtimo) - (*pr->pr_slowtimo)(); - if ((do_reclaim || (pr->pr_flags & PR_AGGDRAIN)) && - pr->pr_drain) - (*pr->pr_drain)(); - } - do_reclaim = 0; - domain_proto_mtx_unlock(do_unlock); - timeout(pfslowtimo, NULL, hz/PR_SLOWHZ); + if (tv.tv_usec > 500000) + _net_uptime++; } -static void -net_update_uptime(void) +void +net_update_uptime_secs(uint64_t secs) { - struct timeval tv; + _net_uptime = secs; +} - microuptime(&tv); - _net_uptime = tv.tv_sec; +/* + * Convert our uin64_t net_uptime to a struct timeval. + */ +void +net_uptime2timeval(struct timeval *tv) +{ + if (tv == NULL) + return; + + tv->tv_usec = 0; + tv->tv_sec = net_uptime(); } /* @@ -586,31 +1024,80 @@ net_update_uptime(void) u_int64_t net_uptime(void) { - /* If we get here before pfslowtimo() fires for the first time */ if (_net_uptime == 0) net_update_uptime(); return (_net_uptime); } -int -domain_proto_mtx_lock(void) +void +domain_proto_mtx_lock_assert_held(void) +{ + lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED); +} + +void +domain_proto_mtx_lock_assert_notheld(void) +{ + lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED); +} + +domain_guard_t +domain_guard_deploy(void) { - int held = net_thread_check_lock(NET_THREAD_HELD_DOMAIN); - if (!held) { + net_thread_marks_t marks; + + marks = net_thread_marks_push(NET_THREAD_HELD_DOMAIN); + if (marks != net_thread_marks_none) { + lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED); lck_mtx_lock(&domain_proto_mtx); - net_thread_set_lock(NET_THREAD_HELD_DOMAIN); } - lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED); - return !held; + else + lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED); + + return ((domain_guard_t)(const void*)marks); } void -domain_proto_mtx_unlock(int do_unlock) +domain_guard_release(domain_guard_t guard) { - if (do_unlock) { - net_thread_unset_lock(NET_THREAD_HELD_DOMAIN); + net_thread_marks_t marks = (net_thread_marks_t)(const void*)guard; + + if (marks != net_thread_marks_none) { + lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED); lck_mtx_unlock(&domain_proto_mtx); + net_thread_marks_pop(marks); + } + else lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED); +} + +domain_unguard_t +domain_unguard_deploy(void) +{ + net_thread_marks_t marks; + + marks = net_thread_unmarks_push(NET_THREAD_HELD_DOMAIN); + if (marks != net_thread_marks_none) { + lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_unlock(&domain_proto_mtx); } + else + lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED); + + return ((domain_unguard_t)(const void*)marks); +} + +void +domain_unguard_release(domain_unguard_t unguard) +{ + net_thread_marks_t marks = (net_thread_marks_t)(const void*)unguard; + + if (marks != net_thread_marks_none) { + lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(&domain_proto_mtx); + net_thread_unmarks_pop(marks); + } + else + lck_mtx_assert(&domain_proto_mtx, LCK_MTX_ASSERT_OWNED); } diff --git a/bsd/kern/uipc_mbuf.c b/bsd/kern/uipc_mbuf.c index 3edca9510..91d1ce4ca 100644 --- a/bsd/kern/uipc_mbuf.c +++ b/bsd/kern/uipc_mbuf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2012 Apple Inc. All rights reserved. + * Copyright (c) 1998-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -79,6 +79,8 @@ #include #include +#include + #include #include #include @@ -87,6 +89,7 @@ #include #include +#include #include #include @@ -416,17 +419,41 @@ typedef struct { mcache_audit_t *cl_audit[NMBPBG]; /* array of audits */ } mcl_audit_t; -/* - * Size of data from the beginning of an mbuf that covers m_hdr, pkthdr - * and m_ext structures. If auditing is enabled, we allocate a shadow - * mbuf structure of this size inside each audit structure, and the - * contents of the real mbuf gets copied into it when the mbuf is freed. - * This allows us to pattern-fill the mbuf for integrity check, and to - * preserve any constructed mbuf fields (e.g. mbuf + cluster cache case). - * Note that we don't save the contents of clusters when they are freed; - * we simply pattern-fill them. - */ -#define AUDIT_CONTENTS_SIZE ((MSIZE - MHLEN) + sizeof (_m_ext_t)) +typedef struct { + struct thread *msa_thread; /* thread doing transaction */ + struct thread *msa_pthread; /* previous transaction thread */ + uint32_t msa_tstamp; /* transaction timestamp (ms) */ + uint32_t msa_ptstamp; /* prev transaction timestamp (ms) */ + uint16_t msa_depth; /* pc stack depth */ + uint16_t msa_pdepth; /* previous transaction pc stack */ + void *msa_stack[MCACHE_STACK_DEPTH]; + void *msa_pstack[MCACHE_STACK_DEPTH]; +} mcl_scratch_audit_t; + +typedef struct { + /* + * Size of data from the beginning of an mbuf that covers m_hdr, + * pkthdr and m_ext structures. If auditing is enabled, we allocate + * a shadow mbuf structure of this size inside each audit structure, + * and the contents of the real mbuf gets copied into it when the mbuf + * is freed. This allows us to pattern-fill the mbuf for integrity + * check, and to preserve any constructed mbuf fields (e.g. mbuf + + * cluster cache case). Note that we don't save the contents of + * clusters when they are freed; we simply pattern-fill them. + */ + u_int8_t sc_mbuf[(MSIZE - _MHLEN) + sizeof (_m_ext_t)]; + mcl_scratch_audit_t sc_scratch __attribute__((aligned(8))); +} mcl_saved_contents_t; + +#define AUDIT_CONTENTS_SIZE (sizeof (mcl_saved_contents_t)) + +#define MCA_SAVED_MBUF_PTR(_mca) \ + ((struct mbuf *)(void *)((mcl_saved_contents_t *) \ + (_mca)->mca_contents)->sc_mbuf) +#define MCA_SAVED_MBUF_SIZE \ + (sizeof (((mcl_saved_contents_t *)0)->sc_mbuf)) +#define MCA_SAVED_SCRATCH_PTR(_mca) \ + (&((mcl_saved_contents_t *)(_mca)->mca_contents)->sc_scratch) /* * mbuf specific mcache audit flags @@ -460,6 +487,8 @@ static boolean_t mcltrace; /* debug: stack tracing */ static boolean_t mclfindleak; /* debug: leak detection */ static boolean_t mclexpleak; /* debug: expose leak info to user space */ +static struct timeval mb_start; /* beginning of time */ + /* mbuf leak detection variables */ static struct mleak_table mleak_table; static mleak_stat_t *mleak_stat; @@ -530,9 +559,6 @@ static lck_grp_attr_t *mleak_lock_grp_attr; extern u_int32_t high_sb_max; -/* TODO: should be in header file */ -int do_reclaim = 0; - /* The minimum number of objects that are allocated, to start. */ #define MINCL 32 #define MINBIGCL (MINCL >> 1) @@ -615,11 +641,12 @@ static char *mbuf_dump_buf; * mbuf watchdog is enabled by default on embedded platforms. It is * also toggeable via the kern.ipc.mb_watchdog sysctl. */ -#if CONFIG_EMBEDDED -static unsigned int mb_watchdog = 1; -#else static unsigned int mb_watchdog = 0; -#endif /* CONFIG_EMBEDDED */ + +/* Red zone */ +static u_int32_t mb_redzone_cookie; +static void m_redzone_init(struct mbuf *); +static void m_redzone_verify(struct mbuf *m); /* The following are used to serialize m_clalloc() */ static boolean_t mb_clalloc_busy; @@ -670,6 +697,7 @@ static void mcl_audit_cluster(mcache_audit_t *, void *, size_t, boolean_t, boolean_t); static void mcl_audit_restore_mbuf(struct mbuf *, mcache_audit_t *, boolean_t); static void mcl_audit_save_mbuf(struct mbuf *, mcache_audit_t *); +static void mcl_audit_scratch(mcache_audit_t *); static void mcl_audit_mcheck_panic(struct mbuf *); static void mcl_audit_verify_nextptr(void *, mcache_audit_t *); @@ -772,6 +800,19 @@ static struct mbuf *m_split0(struct mbuf *, int, int, int); /* * Macros used during mbuf and cluster initialization. */ +#define MBUF_INIT_PKTHDR(m) { \ + (m)->m_pkthdr.rcvif = NULL; \ + (m)->m_pkthdr.pkt_hdr = NULL; \ + (m)->m_pkthdr.len = 0; \ + (m)->m_pkthdr.csum_flags = 0; \ + (m)->m_pkthdr.csum_data = 0; \ + (m)->m_pkthdr.vlan_tag = 0; \ + m_classifier_init(m, 0); \ + m_tag_init(m, 1); \ + m_scratch_init(m); \ + m_redzone_init(m); \ +} + #define MBUF_INIT(m, pkthdr, type) { \ _MCHECK(m); \ (m)->m_next = (m)->m_nextpkt = NULL; \ @@ -783,18 +824,7 @@ static struct mbuf *m_split0(struct mbuf *, int, int, int); } else { \ (m)->m_data = (m)->m_pktdat; \ (m)->m_flags = M_PKTHDR; \ - (m)->m_pkthdr.rcvif = NULL; \ - (m)->m_pkthdr.len = 0; \ - (m)->m_pkthdr.header = NULL; \ - (m)->m_pkthdr.csum_flags = 0; \ - (m)->m_pkthdr.csum_data = 0; \ - (m)->m_pkthdr.tso_segsz = 0; \ - (m)->m_pkthdr.vlan_tag = 0; \ - (m)->m_pkthdr.socket_id = 0; \ - (m)->m_pkthdr.vt_nrecs = 0; \ - (m)->m_pkthdr.aux_flags = 0; \ - m_tag_init(m); \ - m_service_class_init(m); \ + MBUF_INIT_PKTHDR(m); \ } \ } @@ -868,7 +898,7 @@ struct mbstat mbstat; */ typedef struct { unsigned int cpu_mtypes[MT_MAX]; -} __attribute__((aligned(CPU_CACHE_SIZE), packed)) mtypes_cpu_t; +} __attribute__((aligned(MAX_CPU_CACHE_LINE_SIZE), packed)) mtypes_cpu_t; typedef struct { mtypes_cpu_t mbs_cpu[1]; @@ -1314,6 +1344,8 @@ mbinit(void) void *buf; thread_t thread = THREAD_NULL; + microuptime(&mb_start); + /* * These MBUF_ values must be equal to their private counterparts. */ @@ -1347,7 +1379,7 @@ mbinit(void) _CASSERT(MBUF_TSO_IPV4 == CSUM_TSO_IPV4); _CASSERT(MBUF_TSO_IPV6 == CSUM_TSO_IPV6); - _CASSERT(MBUF_CSUM_REQ_SUM16 == CSUM_TCP_SUM16); + _CASSERT(MBUF_CSUM_REQ_SUM16 == CSUM_PARTIAL); _CASSERT(MBUF_CSUM_TCP_SUM16 == MBUF_CSUM_REQ_SUM16); _CASSERT(MBUF_CSUM_REQ_IP == CSUM_IP); _CASSERT(MBUF_CSUM_REQ_TCP == CSUM_TCP); @@ -1363,9 +1395,6 @@ mbinit(void) _CASSERT(MBUF_DONTWAIT == M_DONTWAIT); _CASSERT(MBUF_COPYALL == M_COPYALL); - _CASSERT(MBUF_PKTAUXF_INET_RESOLVE_RTR == MAUXF_INET_RESOLVE_RTR); - _CASSERT(MBUF_PKTAUXF_INET6_RESOLVE_RTR == MAUXF_INET6_RESOLVE_RTR); - _CASSERT(MBUF_SC2TC(MBUF_SC_BK_SYS) == MBUF_TC_BK); _CASSERT(MBUF_SC2TC(MBUF_SC_BK) == MBUF_TC_BK); _CASSERT(MBUF_SC2TC(MBUF_SC_BE) == MBUF_TC_BE); @@ -1382,6 +1411,18 @@ mbinit(void) _CASSERT(MBUF_TC2SCVAL(MBUF_TC_VI) == SCVAL_VI); _CASSERT(MBUF_TC2SCVAL(MBUF_TC_VO) == SCVAL_VO); + /* Module specific scratch space (32-bit alignment requirement) */ + _CASSERT(!(offsetof(struct mbuf, m_pkthdr.pkt_mpriv) % + sizeof (uint32_t))); + + /* Initialize random red zone cookie value */ + _CASSERT(sizeof (mb_redzone_cookie) == + sizeof (((struct pkthdr *)0)->redzone)); + read_random(&mb_redzone_cookie, sizeof (mb_redzone_cookie)); + + /* Make sure we don't save more than we should */ + _CASSERT(MCA_SAVED_MBUF_SIZE <= sizeof (struct mbuf)); + if (nmbclusters == 0) nmbclusters = NMBCLUSTERS; @@ -1427,7 +1468,7 @@ mbinit(void) VERIFY(mclaudit != NULL); mcl_audit_con_cache = mcache_create("mcl_audit_contents", - AUDIT_CONTENTS_SIZE, 0, 0, MCR_SLEEP); + AUDIT_CONTENTS_SIZE, sizeof (u_int64_t), 0, MCR_SLEEP); VERIFY(mcl_audit_con_cache != NULL); } mclverify = (mbuf_debug & MCF_VERIFY); @@ -1538,11 +1579,12 @@ mbinit(void) * before alignment is not saved. */ ncpu = ml_get_max_cpus(); - MALLOC(buf, void *, MBUF_MTYPES_SIZE(ncpu) + CPU_CACHE_SIZE, + MALLOC(buf, void *, MBUF_MTYPES_SIZE(ncpu) + CPU_CACHE_LINE_SIZE, M_TEMP, M_WAITOK); VERIFY(buf != NULL); - mbuf_mtypes = (mbuf_mtypes_t *)P2ROUNDUP((intptr_t)buf, CPU_CACHE_SIZE); + mbuf_mtypes = (mbuf_mtypes_t *)P2ROUNDUP((intptr_t)buf, + CPU_CACHE_LINE_SIZE); bzero(mbuf_mtypes, MBUF_MTYPES_SIZE(ncpu)); /* @@ -1570,7 +1612,12 @@ mbinit(void) MALLOC(mbuf_dump_buf, char *, MBUF_DUMP_BUF_SIZE, M_TEMP, M_WAITOK); VERIFY(mbuf_dump_buf != NULL); - printf("mbinit: done [%d MB total pool size, (%d/%d) split]\n", + if (mbuf_debug & MCF_DEBUG) { + printf("%s: MLEN %d, MHLEN %d\n", __func__, + (int)_MLEN, (int)_MHLEN); + } + + printf("%s: done [%d MB total pool size, (%d/%d) split]\n", __func__, (nmbclusters << MCLSHIFT) >> MBSHIFT, (nclusters << MCLSHIFT) >> MBSHIFT, (njcl << MCLSHIFT) >> MBSHIFT); @@ -2060,7 +2107,7 @@ mbuf_slab_audit(void *arg, mcache_obj_t *list, boolean_t alloc) } /* Record this transaction */ if (mcltrace) - mcache_buffer_log(mca, list, m_cache(class)); + mcache_buffer_log(mca, list, m_cache(class), &mb_start); if (alloc) mca->mca_uflags |= MB_INUSE; @@ -2202,7 +2249,7 @@ cslab_free(mbuf_class_t class, mcache_obj_t *list, int purged) mcache_audit_free_verify(mca, m, 0, m_maxsize(MC_MBUF)); } - ms = (struct mbuf *)mca->mca_contents; + ms = MCA_SAVED_MBUF_PTR(mca); } /* Do the cluster sanity checks */ @@ -2413,7 +2460,7 @@ mbuf_cslab_alloc(void *arg, mcache_obj_t ***plist, unsigned int needed, lck_mtx_lock(mbuf_mlock); mca = mcl_audit_buf2mca(MC_MBUF, (mcache_obj_t *)m); - ms = ((struct mbuf *)mca->mca_contents); + ms = MCA_SAVED_MBUF_PTR(mca); cl_mca = mcl_audit_buf2mca(MC_CL, (mcache_obj_t *)cl); /* @@ -2544,7 +2591,7 @@ mbuf_cslab_audit(void *arg, mcache_obj_t *list, boolean_t alloc) mca = mcl_audit_buf2mca(MC_MBUF, (mcache_obj_t *)m); mcl_audit_mbuf(mca, m, TRUE, alloc); if (mcltrace) - mcache_buffer_log(mca, m, m_cache(class)); + mcache_buffer_log(mca, m, m_cache(class), &mb_start); if (alloc) mca->mca_uflags |= MB_COMP_INUSE; @@ -2557,7 +2604,7 @@ mbuf_cslab_audit(void *arg, mcache_obj_t *list, boolean_t alloc) * pattern-filled by the above call to mcl_audit_mbuf(). */ if (!alloc && mclverify) - ms = (struct mbuf *)mca->mca_contents; + ms = MCA_SAVED_MBUF_PTR(mca); /* Do the cluster sanity checks and record its transaction */ cl = ms->m_ext.ext_buf; @@ -2589,7 +2636,7 @@ mbuf_cslab_audit(void *arg, mcache_obj_t *list, boolean_t alloc) size = m_maxsize(MC_16KCL); mcl_audit_cluster(mca, cl, size, alloc, FALSE); if (mcltrace) - mcache_buffer_log(mca, cl, m_cache(class)); + mcache_buffer_log(mca, cl, m_cache(class), &mb_start); if (alloc) mca->mca_uflags |= MB_COMP_INUSE; @@ -2711,16 +2758,17 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize) ppnum_t new_page = pmap_find_phys(kernel_pmap, page); /* - * In the case of no mapper being available the following - * code noops and returns the input page; if there is a - * mapper the appropriate I/O page is returned. + * If there is a mapper the appropriate I/O page is returned; + * zero out the page to discard its past contents to prevent + * exposing leftover kernel memory. */ VERIFY(offset < mcl_pages); - if (mcl_paddr_base) { - bzero((void *)(uintptr_t) page, page_size); - new_page = IOMapperInsertPage(mcl_paddr_base, offset, new_page); + if (mcl_paddr_base != 0) { + bzero((void *)(uintptr_t) page, page_size); + new_page = IOMapperInsertPage(mcl_paddr_base, + offset, new_page); } - mcl_paddr[offset] = new_page << PGSHIFT; + mcl_paddr[offset] = new_page; /* Pattern-fill this fresh page */ if (mclverify) { @@ -2957,8 +3005,7 @@ freelist_populate(mbuf_class_t class, unsigned int num, int wait) struct mbuf *ms; mca = mcl_audit_buf2mca(MC_MBUF, (mcache_obj_t *)m); - ms = ((struct mbuf *) - mca->mca_contents); + ms = MCA_SAVED_MBUF_PTR(mca); ms->m_type = MT_FREE; } else { m->m_type = MT_FREE; @@ -3161,8 +3208,8 @@ m_reclaim(mbuf_class_t class, unsigned int num, boolean_t comp) lck_mtx_unlock(mbuf_mlock); if (bmap != 0) { - /* drain is performed in pfslowtimo(), to avoid deadlocks */ - do_reclaim = 1; + /* signal the domains to drain */ + net_drain_domains(); /* Sigh; we have no other choices but to ask mcache to purge */ for (m = 0; m < NELEM(mbuf_table); m++) { @@ -3264,8 +3311,10 @@ m_free(struct mbuf *m) if (m->m_type == MT_FREE) panic("m_free: freeing an already freed mbuf"); - /* Free the aux data and tags if there is any */ if (m->m_flags & M_PKTHDR) { + /* Check for scratch area overflow */ + m_redzone_verify(m); + /* Free the aux data and tags if there is any */ m_tag_delete_chain(m, NULL); } @@ -3580,16 +3629,25 @@ m_m16kget(struct mbuf *m, int wait) void m_copy_pkthdr(struct mbuf *to, struct mbuf *from) { - /* We will be taking over the tags of 'to' */ - if (to->m_flags & M_PKTHDR) + VERIFY(from->m_flags & M_PKTHDR); + + /* Check for scratch area overflow */ + m_redzone_verify(from); + + if (to->m_flags & M_PKTHDR) { + /* Check for scratch area overflow */ + m_redzone_verify(to); + /* We will be taking over the tags of 'to' */ m_tag_delete_chain(to, NULL); + } to->m_pkthdr = from->m_pkthdr; /* especially tags */ - m_tag_init(from); /* purge tags from src */ - m_service_class_init(from); /* reset svc class from src */ - from->m_pkthdr.aux_flags = 0; /* clear aux flags from src */ + m_classifier_init(from, 0); /* purge classifier info */ + m_tag_init(from, 1); /* purge all tags from src */ + m_scratch_init(from); /* clear src scratch area */ to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT); if ((to->m_flags & M_EXT) == 0) to->m_data = to->m_pktdat; + m_redzone_init(to); /* setup red zone on dst */ } /* @@ -3600,13 +3658,23 @@ m_copy_pkthdr(struct mbuf *to, struct mbuf *from) static int m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how) { - if (to->m_flags & M_PKTHDR) + VERIFY(from->m_flags & M_PKTHDR); + + /* Check for scratch area overflow */ + m_redzone_verify(from); + + if (to->m_flags & M_PKTHDR) { + /* Check for scratch area overflow */ + m_redzone_verify(to); + /* We will be taking over the tags of 'to' */ m_tag_delete_chain(to, NULL); + } to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT); if ((to->m_flags & M_EXT) == 0) to->m_data = to->m_pktdat; to->m_pkthdr = from->m_pkthdr; - m_tag_init(to); + m_redzone_init(to); /* setup red zone on dst */ + m_tag_init(to, 0); /* preserve dst static tags */ return (m_tag_copy_chain(to, from, how)); } @@ -3614,8 +3682,47 @@ void m_copy_pftag(struct mbuf *to, struct mbuf *from) { to->m_pkthdr.pf_mtag = from->m_pkthdr.pf_mtag; +#if PF_ECN to->m_pkthdr.pf_mtag.pftag_hdr = NULL; to->m_pkthdr.pf_mtag.pftag_flags &= ~(PF_TAG_HDR_INET|PF_TAG_HDR_INET6); +#endif /* PF_ECN */ +} + +void +m_classifier_init(struct mbuf *m, uint32_t pktf_mask) +{ + VERIFY(m->m_flags & M_PKTHDR); + + m->m_pkthdr.pkt_proto = 0; + m->m_pkthdr.pkt_flowsrc = 0; + m->m_pkthdr.pkt_flowid = 0; + m->m_pkthdr.pkt_flags &= pktf_mask; /* caller-defined mask */ + /* preserve service class and interface info for loopback packets */ + if (!(m->m_pkthdr.pkt_flags & PKTF_LOOP)) + (void) m_set_service_class(m, MBUF_SC_BE); + if (!(m->m_pkthdr.pkt_flags & PKTF_IFAINFO)) + m->m_pkthdr.pkt_ifainfo = 0; +#if MEASURE_BW + m->m_pkthdr.pkt_bwseq = 0; +#endif /* MEASURE_BW */ +} + +void +m_copy_classifier(struct mbuf *to, struct mbuf *from) +{ + VERIFY(to->m_flags & M_PKTHDR); + VERIFY(from->m_flags & M_PKTHDR); + + to->m_pkthdr.pkt_proto = from->m_pkthdr.pkt_proto; + to->m_pkthdr.pkt_flowsrc = from->m_pkthdr.pkt_flowsrc; + to->m_pkthdr.pkt_flowid = from->m_pkthdr.pkt_flowid; + to->m_pkthdr.pkt_flags = from->m_pkthdr.pkt_flags; + (void) m_set_service_class(to, from->m_pkthdr.pkt_svc); + to->m_pkthdr.pkt_ifainfo = from->m_pkthdr.pkt_ifainfo; + to->m_pkthdr.ipsec_policy = from->m_pkthdr.ipsec_policy; +#if MEASURE_BW + to->m_pkthdr.pkt_bwseq = from->m_pkthdr.pkt_bwseq; +#endif /* MEASURE_BW */ } /* @@ -4166,6 +4273,9 @@ m_freem_list(struct mbuf *m) mt_free++; if (m->m_flags & M_PKTHDR) { + /* Check for scratch area overflow */ + m_redzone_verify(m); + /* Free the aux data and tags if there is any */ m_tag_delete_chain(m, NULL); } @@ -4389,7 +4499,7 @@ m_prepend_2(struct mbuf *m, int len, int how) int MCFail; struct mbuf * -m_copym(struct mbuf *m, int off0, int len, int wait) +m_copym_mode(struct mbuf *m, int off0, int len, int wait, uint32_t mode) { struct mbuf *n, *mhdr = NULL, **np; int off = off0; @@ -4427,7 +4537,12 @@ m_copym(struct mbuf *m, int off0, int len, int wait) goto nospace; if (copyhdr != 0) { - M_COPY_PKTHDR(n, mhdr); + if (mode == M_COPYM_MOVE_HDR) { + M_COPY_PKTHDR(n, mhdr); + } else if (mode == M_COPYM_COPY_HDR) { + if (m_dup_pkthdr(n, mhdr, wait) == 0) + goto nospace; + } if (len == M_COPYALL) n->m_pkthdr.len -= off0; else @@ -4473,6 +4588,13 @@ nospace: return (NULL); } + +struct mbuf * +m_copym(struct mbuf *m, int off0, int len, int wait) +{ + return (m_copym_mode(m, off0, len, wait, M_COPYM_MOVE_HDR)); +} + /* * Equivalent to m_copym except that all necessary mbuf hdrs are allocated * within this routine also, the last mbuf and offset accessed are passed @@ -4481,7 +4603,7 @@ nospace: */ struct mbuf * m_copym_with_hdrs(struct mbuf *m, int off0, int len0, int wait, - struct mbuf **m_lastm, int *m_off) + struct mbuf **m_lastm, int *m_off, uint32_t mode) { struct mbuf *n, **np = NULL; int off = off0, len = len0; @@ -4554,7 +4676,12 @@ m_copym_with_hdrs(struct mbuf *m, int off0, int len0, int wait, } if (copyhdr) { - M_COPY_PKTHDR(n, m); + if (mode == M_COPYM_MOVE_HDR) { + M_COPY_PKTHDR(n, m); + } else if (mode == M_COPYM_COPY_HDR) { + if (m_dup_pkthdr(n, m, wait) == 0) + goto nospace; + } n->m_pkthdr.len = len; copyhdr = 0; } @@ -5304,10 +5431,10 @@ extend: * allocate a new writable mbuf and try again. */ -#if defined(DIAGNOSTIC) +#if DIAGNOSTIC if (!(flags & M_COPYBACK0_COW)) panic("m_copyback0: read-only"); -#endif /* defined(DIAGNOSTIC) */ +#endif /* DIAGNOSTIC */ /* * if we're going to write into the middle of @@ -5417,18 +5544,18 @@ enobufs: return (ENOBUFS); } -char * +uint64_t mcl_to_paddr(char *addr) { vm_offset_t base_phys; if (!MBUF_IN_MAP(addr)) - return (NULL); - base_phys = mcl_paddr[(addr - (char *)mbutl) >> PGSHIFT]; + return (0); + base_phys = mcl_paddr[atop_64(addr - (char *)mbutl)]; if (base_phys == 0) - return (NULL); - return ((char *)((uintptr_t)base_phys | ((uintptr_t)addr & PGOFSET))); + return (0); + return ((uint64_t)(ptoa_64(base_phys) | ((uint64_t)addr & PAGE_MASK))); } /* @@ -5688,6 +5815,8 @@ m_fixhdr(struct mbuf *m0) { u_int len; + VERIFY(m0->m_flags & M_PKTHDR); + len = m_length2(m0, NULL); m0->m_pkthdr.len = len; return (len); @@ -5748,7 +5877,6 @@ m_defrag_offset(struct mbuf *m0, u_int32_t off, int how) if (off > 0) { pktlen -= off; - m_final->m_len -= off; m_final->m_data += off; } @@ -5757,7 +5885,7 @@ m_defrag_offset(struct mbuf *m0, u_int32_t off, int how) * pointer before coming here, as otherwise it will point to * the original mbuf which will get freed upon success. */ - VERIFY(m0->m_pkthdr.header == NULL); + VERIFY(m0->m_pkthdr.pkt_hdr == NULL); if (m_dup_pkthdr(m_final, m0, how) == 0) goto nospace; @@ -5768,6 +5896,7 @@ m_defrag_offset(struct mbuf *m0, u_int32_t off, int how) length = pktlen - progress; if (length > MCLBYTES) length = MCLBYTES; + length -= ((m_new == m_final) ? off : 0); if (m_new == NULL) { if (length > MLEN) @@ -6269,9 +6398,14 @@ mcl_audit_init(void *buf, mcache_audit_t **mca_list, /* Attach the contents buffer if requested */ if (save_contents) { - VERIFY(con != NULL); + mcl_saved_contents_t *msc = + (mcl_saved_contents_t *)(void *)con; + + VERIFY(msc != NULL); + VERIFY(IS_P2ALIGNED(msc, sizeof (u_int64_t))); + VERIFY(con_size == sizeof (*msc)); mca->mca_contents_size = con_size; - mca->mca_contents = con; + mca->mca_contents = msc; con = con->obj_next; bzero(mca->mca_contents, mca->mca_contents_size); } @@ -6372,19 +6506,20 @@ mcl_audit_mbuf(mcache_audit_t *mca, void *addr, boolean_t composite, static void mcl_audit_restore_mbuf(struct mbuf *m, mcache_audit_t *mca, boolean_t composite) { - struct mbuf *ms = (struct mbuf *)mca->mca_contents; + struct mbuf *ms = MCA_SAVED_MBUF_PTR(mca); if (composite) { struct mbuf *next = m->m_next; VERIFY(ms->m_flags == M_EXT && MEXT_RFA(ms) != NULL && MBUF_IS_COMPOSITE(ms)); + VERIFY(mca->mca_contents_size == AUDIT_CONTENTS_SIZE); /* * We could have hand-picked the mbuf fields and restore * them individually, but that will be a maintenance * headache. Instead, restore everything that was saved; * the mbuf layer will recheck and reinitialize anyway. */ - bcopy(ms, m, mca->mca_contents_size); + bcopy(ms, m, MCA_SAVED_MBUF_SIZE); m->m_next = next; } else { /* @@ -6400,8 +6535,9 @@ mcl_audit_restore_mbuf(struct mbuf *m, mcache_audit_t *mca, boolean_t composite) static void mcl_audit_save_mbuf(struct mbuf *m, mcache_audit_t *mca) { + VERIFY(mca->mca_contents_size == AUDIT_CONTENTS_SIZE); _MCHECK(m); - bcopy(m, mca->mca_contents, mca->mca_contents_size); + bcopy(m, MCA_SAVED_MBUF_PTR(mca), MCA_SAVED_MBUF_SIZE); } static void @@ -6425,6 +6561,32 @@ mcl_audit_cluster(mcache_audit_t *mca, void *addr, size_t size, boolean_t alloc, } } +static void +mcl_audit_scratch(mcache_audit_t *mca) +{ + void *stack[MCACHE_STACK_DEPTH + 1]; + mcl_scratch_audit_t *msa; + struct timeval now; + + VERIFY(mca->mca_contents != NULL); + msa = MCA_SAVED_SCRATCH_PTR(mca); + + msa->msa_pthread = msa->msa_thread; + msa->msa_thread = current_thread(); + bcopy(msa->msa_stack, msa->msa_pstack, sizeof (msa->msa_pstack)); + msa->msa_pdepth = msa->msa_depth; + bzero(stack, sizeof (stack)); + msa->msa_depth = OSBacktrace(stack, MCACHE_STACK_DEPTH + 1) - 1; + bcopy(&stack[1], msa->msa_stack, sizeof (mca->mca_pstack)); + + msa->msa_ptstamp = msa->msa_tstamp; + microuptime(&now); + /* tstamp is in ms relative to base_ts */ + msa->msa_tstamp = ((now.tv_usec - mb_start.tv_usec) / 1000); + if ((now.tv_sec - mb_start.tv_sec) > 0) + msa->msa_tstamp += ((now.tv_sec - mb_start.tv_sec) * 1000); +} + static void mcl_audit_mcheck_panic(struct mbuf *m) { @@ -6925,6 +7087,100 @@ done: #undef MBUF_DUMP_BUF_CHK +/* + * Convert between a regular and a packet header mbuf. Caller is responsible + * for setting or clearing M_PKTHDR; this routine does the rest of the work. + */ +int +m_reinit(struct mbuf *m, int hdr) +{ + int ret = 0; + + if (hdr) { + VERIFY(!(m->m_flags & M_PKTHDR)); + if (!(m->m_flags & M_EXT) && + (m->m_data != m->m_dat || m->m_len > 0)) { + /* + * If there's no external cluster attached and the + * mbuf appears to contain user data, we cannot + * safely convert this to a packet header mbuf, + * as the packet header structure might overlap + * with the data. + */ + printf("%s: cannot set M_PKTHDR on altered mbuf %p, " + "m_data %p (expected %p), m_len %d (expected 0)\n", + __func__, m, m->m_data, m->m_dat, m->m_len); + ret = EBUSY; + } else { + VERIFY((m->m_flags & M_EXT) || m->m_data == m->m_dat); + m->m_flags |= M_PKTHDR; + MBUF_INIT_PKTHDR(m); + } + } else { + /* Check for scratch area overflow */ + m_redzone_verify(m); + /* Free the aux data and tags if there is any */ + m_tag_delete_chain(m, NULL); + m->m_flags &= ~M_PKTHDR; + } + + return (ret); +} + +void +m_scratch_init(struct mbuf *m) +{ + VERIFY(m->m_flags & M_PKTHDR); + + bzero(&m->m_pkthdr.pkt_mpriv, sizeof (m->m_pkthdr.pkt_mpriv)); +} + +u_int32_t +m_scratch_get(struct mbuf *m, u_int8_t **p) +{ + VERIFY(m->m_flags & M_PKTHDR); + + if (mcltrace) { + mcache_audit_t *mca; + + lck_mtx_lock(mbuf_mlock); + mca = mcl_audit_buf2mca(MC_MBUF, (mcache_obj_t *)m); + if (mca->mca_uflags & MB_SCVALID) + mcl_audit_scratch(mca); + lck_mtx_unlock(mbuf_mlock); + } + + *p = (u_int8_t *)&m->m_pkthdr.pkt_mpriv; + return (sizeof (m->m_pkthdr.pkt_mpriv)); +} + +static void +m_redzone_init(struct mbuf *m) +{ + VERIFY(m->m_flags & M_PKTHDR); + /* + * Each mbuf has a unique red zone pattern, which is a XOR + * of the red zone cookie and the address of the mbuf. + */ + m->m_pkthdr.redzone = ((u_int32_t)(uintptr_t)m) ^ mb_redzone_cookie; +} + +static void +m_redzone_verify(struct mbuf *m) +{ + u_int32_t mb_redzone; + + VERIFY(m->m_flags & M_PKTHDR); + + mb_redzone = ((u_int32_t)(uintptr_t)m) ^ mb_redzone_cookie; + if (m->m_pkthdr.redzone != mb_redzone) { + panic("mbuf %p redzone violation with value 0x%x " + "(instead of 0x%x, using cookie 0x%x)\n", + m, m->m_pkthdr.redzone, mb_redzone, mb_redzone_cookie); + /* NOTREACHED */ + } +} + SYSCTL_DECL(_kern_ipc); SYSCTL_PROC(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RD | CTLFLAG_LOCKED, diff --git a/bsd/kern/uipc_mbuf2.c b/bsd/kern/uipc_mbuf2.c index 565d005f9..fa97af53c 100644 --- a/bsd/kern/uipc_mbuf2.c +++ b/bsd/kern/uipc_mbuf2.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -106,8 +106,9 @@ #include #include #include -#if INET6 #include +#include +#if INET6 #include #include #endif /* INET6 */ @@ -485,13 +486,6 @@ m_tag_free(struct m_tag *t) t->m_tag_type == KERNEL_TAG_TYPE_MACLABEL) mac_mbuf_tag_destroy(t); #endif -#if INET6 - if (t != NULL && - t->m_tag_id == KERNEL_MODULE_TAG_ID && - t->m_tag_type == KERNEL_TAG_TYPE_INET6 && - t->m_tag_len == sizeof (struct ip6aux)) - ip6_destroyaux((struct ip6aux *)(t + 1)); -#endif /* INET6 */ if (t == NULL) return; @@ -535,8 +529,8 @@ m_tag_prepend(struct mbuf *m, struct m_tag *t) void m_tag_unlink(struct mbuf *m, struct m_tag *t) { - VERIFY(m != NULL && t != NULL); - VERIFY(t->m_tag_cookie == M_TAG_VALID_PATTERN); + VERIFY(m->m_flags & M_PKTHDR); + VERIFY(t != NULL && t->m_tag_cookie == M_TAG_VALID_PATTERN); SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link); } @@ -545,8 +539,6 @@ m_tag_unlink(struct mbuf *m, struct m_tag *t) void m_tag_delete(struct mbuf *m, struct m_tag *t) { - VERIFY(m != NULL && t != NULL); - m_tag_unlink(m, t); m_tag_free(t); } @@ -557,7 +549,7 @@ m_tag_delete_chain(struct mbuf *m, struct m_tag *t) { struct m_tag *p, *q; - VERIFY(m != NULL); + VERIFY(m->m_flags & M_PKTHDR); if (t != NULL) { p = t; @@ -581,7 +573,7 @@ m_tag_locate(struct mbuf *m, u_int32_t id, u_int16_t type, struct m_tag *t) { struct m_tag *p; - VERIFY(m != NULL); + VERIFY(m->m_flags & M_PKTHDR); if (t == NULL) { p = SLIST_FIRST(&m->m_pkthdr.tags); @@ -625,14 +617,6 @@ m_tag_copy(struct m_tag *t, int how) mac_mbuf_tag_copy(t, p); } else #endif -#if INET6 - if (t != NULL && - t->m_tag_id == KERNEL_MODULE_TAG_ID && - t->m_tag_type == KERNEL_TAG_TYPE_INET6 && - t->m_tag_len == sizeof (struct ip6aux)) { - ip6_copyaux((struct ip6aux *)(t + 1), (struct ip6aux *)(p + 1)); - } else -#endif /* INET6 */ bcopy(t + 1, p + 1, t->m_tag_len); /* Copy the data */ return (p); } @@ -648,7 +632,7 @@ m_tag_copy_chain(struct mbuf *to, struct mbuf *from, int how) { struct m_tag *p, *t, *tprev = NULL; - VERIFY(to != NULL && from != NULL); + VERIFY((to->m_flags & M_PKTHDR) && (from->m_flags & M_PKTHDR)); m_tag_delete_chain(to, NULL); SLIST_FOREACH(p, &from->m_pkthdr.tags, m_tag_link) { @@ -668,22 +652,28 @@ m_tag_copy_chain(struct mbuf *to, struct mbuf *from, int how) return (1); } -/* Initialize tags on an mbuf. */ +/* Initialize dynamic and static tags on an mbuf. */ void -m_tag_init(struct mbuf *m) +m_tag_init(struct mbuf *m, int all) { - VERIFY(m != NULL); + VERIFY(m->m_flags & M_PKTHDR); SLIST_INIT(&m->m_pkthdr.tags); - bzero(&m->m_pkthdr.pf_mtag, sizeof (m->m_pkthdr.pf_mtag)); - bzero(&m->m_pkthdr.tcp_mtag, sizeof (m->m_pkthdr.tcp_mtag)); + /* + * If the caller wants to preserve static mbuf tags + * (e.g. m_dup_pkthdr), don't zero them out. + */ + if (all) { + bzero(&m->m_pkthdr.pf_mtag, sizeof (m->m_pkthdr.pf_mtag)); + bzero(&m->m_pkthdr.proto_mtag, sizeof (m->m_pkthdr.proto_mtag)); + } } /* Get first tag in chain. */ struct m_tag * m_tag_first(struct mbuf *m) { - VERIFY(m != NULL); + VERIFY(m->m_flags & M_PKTHDR); return (SLIST_FIRST(&m->m_pkthdr.tags)); } @@ -713,13 +703,6 @@ m_get_traffic_class(struct mbuf *m) return (MBUF_SC2TC(m_get_service_class(m))); } -void -m_service_class_init(struct mbuf *m) -{ - if (m->m_flags & M_PKTHDR) - (void) m_set_service_class(m, MBUF_SC_BE); -} - int m_set_service_class(struct mbuf *m, mbuf_svc_class_t sc) { @@ -728,7 +711,7 @@ m_set_service_class(struct mbuf *m, mbuf_svc_class_t sc) VERIFY(m->m_flags & M_PKTHDR); if (MBUF_VALID_SC(sc)) - m->m_pkthdr.svc = sc; + m->m_pkthdr.pkt_svc = sc; else error = EINVAL; @@ -742,8 +725,8 @@ m_get_service_class(struct mbuf *m) VERIFY(m->m_flags & M_PKTHDR); - if (MBUF_VALID_SC(m->m_pkthdr.svc)) - sc = m->m_pkthdr.svc; + if (MBUF_VALID_SC(m->m_pkthdr.pkt_svc)) + sc = m->m_pkthdr.pkt_svc; else sc = MBUF_SC_BE; @@ -839,3 +822,45 @@ m_service_class_from_val(u_int32_t v) /* NOTREACHED */ return (sc); } + +uint16_t +m_adj_sum16(struct mbuf *m, uint32_t start, uint32_t ulpoff, uint32_t sum) +{ + int len = (ulpoff - start); + + if (len > 0) { + uint32_t adj = m_sum16(m, start, len); + if (adj >= sum) + sum = ~(adj - sum) & 0xffff; + else + sum -= adj; + } else if (len < 0) { + sum += m_sum16(m, ulpoff, -len); + } + + ADDCARRY(sum); + + return (sum); +} + +extern int cpu_in_cksum(struct mbuf *m, int len, int off, uint32_t initial_sum); + +uint16_t +m_sum16(struct mbuf *m, uint32_t off, uint32_t len) +{ + int mlen; + + /* + * Sanity check + * + * Use m_length2() instead of m_length(), as we cannot rely on + * the caller setting m_pkthdr.len correctly, if the mbuf is + * a M_PKTHDR one. + */ + if ((mlen = m_length2(m, NULL)) < (off + len)) { + panic("%s: mbuf len (%d) < off+len (%d+%d)\n", __func__, + mlen, off, len); + } + + return (~cpu_in_cksum(m, len, off, 0) & 0xffff); +} diff --git a/bsd/kern/uipc_proto.c b/bsd/kern/uipc_proto.c index f04b308bf..f8eb5e418 100644 --- a/bsd/kern/uipc_proto.c +++ b/bsd/kern/uipc_proto.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -67,6 +67,7 @@ #include #include #include +#include #include #include #include @@ -74,60 +75,66 @@ /* * Definitions of protocols supported in the UNIX domain. */ +struct domain *localdomain = NULL; +static void pre_unp_init(struct domain *); -int raw_usrreq(void); -static void pre_unp_init(void) __attribute__((section("__TEXT, initcode"))); +extern struct domain localdomain_s; static struct protosw localsw[] = { - { - .pr_type = SOCK_STREAM, - .pr_domain = &localdomain, - .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS|PR_PCBLOCK, - .pr_ctloutput = uipc_ctloutput, - .pr_usrreqs = &uipc_usrreqs, - .pr_lock = unp_lock, - .pr_unlock = unp_unlock, - .pr_getlock = unp_getlock - }, - { - .pr_type = SOCK_DGRAM, - .pr_domain = &localdomain, - .pr_flags = PR_ATOMIC|PR_ADDR|PR_RIGHTS, - .pr_ctloutput = uipc_ctloutput, - .pr_usrreqs = &uipc_usrreqs, - .pr_lock = unp_lock, - .pr_unlock = unp_unlock, - .pr_getlock = unp_getlock - }, - { - .pr_ctlinput = raw_ctlinput, - .pr_usrreqs = &raw_usrreqs, - }, +{ + .pr_type = SOCK_STREAM, + .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS|PR_PCBLOCK, + .pr_ctloutput = uipc_ctloutput, + .pr_usrreqs = &uipc_usrreqs, + .pr_lock = unp_lock, + .pr_unlock = unp_unlock, + .pr_getlock = unp_getlock +}, +{ + .pr_type = SOCK_DGRAM, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_RIGHTS, + .pr_ctloutput = uipc_ctloutput, + .pr_usrreqs = &uipc_usrreqs, + .pr_lock = unp_lock, + .pr_unlock = unp_unlock, + .pr_getlock = unp_getlock +}, +{ + .pr_ctlinput = raw_ctlinput, + .pr_usrreqs = &raw_usrreqs, +}, }; -int local_proto_count = (sizeof (localsw) / sizeof (struct protosw)); +static int local_proto_count = (sizeof (localsw) / sizeof (struct protosw)); static void -pre_unp_init(void) +pre_unp_init(struct domain *dp) { - int i; struct protosw *pr; - struct domain *dp = &localdomain; + int i; + + VERIFY(!(dp->dom_flags & DOM_INITIALIZED)); + VERIFY(localdomain == NULL); + + localdomain = dp; + + for (i = 0, pr = &localsw[0]; i < local_proto_count; i++, pr++) + net_add_proto(pr, dp, 1); - for (i=0, pr = &localsw[0]; i #include #include +#include #include #include #include #include #include #include +#include #include +#include #include #include #include #include #include +#include #include #include #include @@ -115,22 +118,27 @@ #include #endif /* MAC */ - -int so_cache_hw = 0; -int so_cache_timeouts = 0; -int so_cache_max_freed = 0; -int cached_sock_count = 0; -__private_extern__ int max_cached_sock_count = MAX_CACHED_SOCKETS; -struct socket *socket_cache_head = 0; -struct socket *socket_cache_tail = 0; -u_int32_t so_cache_time = 0; -int so_cache_init_done = 0; -struct zone *so_cache_zone; - -static lck_grp_t *so_cache_mtx_grp; -static lck_attr_t *so_cache_mtx_attr; +#if MULTIPATH +#include +#endif /* MULTIPATH */ + +/* TODO: this should be in a header file somewhere */ +extern char *proc_name_address(void *p); + +static u_int32_t so_cache_hw; /* High water mark for socache */ +static u_int32_t so_cache_timeouts; /* number of timeouts */ +static u_int32_t so_cache_max_freed; /* max freed per timeout */ +static u_int32_t cached_sock_count = 0; +STAILQ_HEAD(, socket) so_cache_head; +int max_cached_sock_count = MAX_CACHED_SOCKETS; +static u_int32_t so_cache_time; +static int socketinit_done; +static struct zone *so_cache_zone; + +static lck_grp_t *so_cache_mtx_grp; +static lck_attr_t *so_cache_mtx_attr; static lck_grp_attr_t *so_cache_mtx_grp_attr; -lck_mtx_t *so_cache_mtx; +static lck_mtx_t *so_cache_mtx; #include @@ -141,22 +149,21 @@ static int filt_sowrite(struct knote *kn, long hint); static void filt_sockdetach(struct knote *kn); static int filt_sockev(struct knote *kn, long hint); -static int -sooptcopyin_timeval(struct sockopt *sopt, struct timeval * tv_p); - -static int -sooptcopyout_timeval(struct sockopt *sopt, const struct timeval * tv_p); +static int sooptcopyin_timeval(struct sockopt *, struct timeval *); +static int sooptcopyout_timeval(struct sockopt *, const struct timeval *); static struct filterops soread_filtops = { - .f_isfd = 1, - .f_detach = filt_sordetach, - .f_event = filt_soread, + .f_isfd = 1, + .f_detach = filt_sordetach, + .f_event = filt_soread, }; + static struct filterops sowrite_filtops = { - .f_isfd = 1, - .f_detach = filt_sowdetach, - .f_event = filt_sowrite, + .f_isfd = 1, + .f_detach = filt_sowdetach, + .f_event = filt_sowrite, }; + static struct filterops sock_filtops = { .f_isfd = 1, .f_detach = filt_sockdetach, @@ -165,7 +172,7 @@ static struct filterops sock_filtops = { #define EVEN_MORE_LOCKING_DEBUG 0 int socket_debug = 0; -int socket_zone = M_SOCKET; +static int socket_zone = M_SOCKET; so_gen_t so_gencnt; /* generation count for sockets */ MALLOC_DEFINE(M_SONAME, "soname", "socket name"); @@ -181,27 +188,28 @@ MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES) - SYSCTL_DECL(_kern_ipc); int somaxconn = SOMAXCONN; -SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW | CTLFLAG_LOCKED, &somaxconn, 0, ""); +SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, + CTLFLAG_RW | CTLFLAG_LOCKED, &somaxconn, 0, ""); /* Should we get a maximum also ??? */ static int sosendmaxchain = 65536; static int sosendminchain = 16384; static int sorecvmincopy = 16384; -SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW | CTLFLAG_LOCKED, &sosendminchain, - 0, ""); -SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW | CTLFLAG_LOCKED, &sorecvmincopy, - 0, ""); +SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, + CTLFLAG_RW | CTLFLAG_LOCKED, &sosendminchain, 0, ""); +SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, + CTLFLAG_RW | CTLFLAG_LOCKED, &sorecvmincopy, 0, ""); /* * Set to enable jumbo clusters (if available) for large writes when * the socket is marked with SOF_MULTIPAGES; see below. */ int sosendjcl = 1; -SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl, CTLFLAG_RW | CTLFLAG_LOCKED, &sosendjcl, 0, ""); +SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl, + CTLFLAG_RW | CTLFLAG_LOCKED, &sosendjcl, 0, ""); /* * Set this to ignore SOF_MULTIPAGES and use jumbo clusters for large @@ -215,16 +223,20 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl, CTLFLAG_RW | CTLFLAG_LOCKED, &sosendj * capable. Set this to 1 only for testing/debugging purposes. */ int sosendjcl_ignore_capab = 0; -SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl_ignore_capab, CTLFLAG_RW | CTLFLAG_LOCKED, - &sosendjcl_ignore_capab, 0, ""); +SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl_ignore_capab, + CTLFLAG_RW | CTLFLAG_LOCKED, &sosendjcl_ignore_capab, 0, ""); int sodefunctlog = 0; SYSCTL_INT(_kern_ipc, OID_AUTO, sodefunctlog, CTLFLAG_RW | CTLFLAG_LOCKED, - &sodefunctlog, 0, ""); + &sodefunctlog, 0, ""); int sothrottlelog = 0; SYSCTL_INT(_kern_ipc, OID_AUTO, sothrottlelog, CTLFLAG_RW | CTLFLAG_LOCKED, - &sothrottlelog, 0, ""); + &sothrottlelog, 0, ""); + +int sorestrictrecv = 1; +SYSCTL_INT(_kern_ipc, OID_AUTO, sorestrictrecv, CTLFLAG_RW | CTLFLAG_LOCKED, + &sorestrictrecv, 0, "Enable inbound interface restrictions"); /* * Socket operation routines. @@ -238,26 +250,23 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, sothrottlelog, CTLFLAG_RW | CTLFLAG_LOCKED, extern void postevent(struct socket *, struct sockbuf *, int); extern void evsofree(struct socket *); extern int tcp_notsent_lowat_check(struct socket *so); +extern struct inpcbinfo tcbinfo; /* TODO: these should be in header file */ extern int get_inpcb_str_size(void); extern int get_tcp_str_size(void); -extern struct domain *pffinddomain(int); -extern struct protosw *pffindprotonotype(int, int); -extern int soclose_locked(struct socket *); -extern int soo_kqfilter(struct fileproc *, struct knote *, struct proc *); -#ifdef __APPLE__ +static unsigned int sl_zone_size; /* size of sockaddr_list */ +static struct zone *sl_zone; /* zone for sockaddr_list */ + +static unsigned int se_zone_size; /* size of sockaddr_entry */ +static struct zone *se_zone; /* zone for sockaddr_entry */ vm_size_t so_cache_zone_element_size; -static int sodelayed_copy(struct socket *, struct uio *, struct mbuf **, int *); +static int sodelayed_copy(struct socket *, struct uio *, struct mbuf **, user_ssize_t *); static void cached_sock_alloc(struct socket **, int); static void cached_sock_free(struct socket *); -static void so_cache_timer(void *); - -void soclose_wait_locked(struct socket *so); -int so_isdstlocal(struct socket *so); /* * SOTCDB_NO_DSCP is set by default, to prevent the networking stack from @@ -266,25 +275,24 @@ int so_isdstlocal(struct socket *so); */ __private_extern__ u_int32_t sotcdb = SOTCDB_NO_DSCP; SYSCTL_INT(_kern_ipc, OID_AUTO, sotcdb, CTLFLAG_RW | CTLFLAG_LOCKED, - &sotcdb, 0, ""); + &sotcdb, 0, ""); void socketinit(void) { - vm_size_t str_size; - - if (so_cache_init_done) { + if (socketinit_done) { printf("socketinit: already called...\n"); return; } + socketinit_done = 1; - PE_parse_boot_argn("socket_debug", &socket_debug, sizeof (socket_debug)); + PE_parse_boot_argn("socket_debug", &socket_debug, + sizeof (socket_debug)); /* * allocate lock group attribute and group for socket cache mutex */ so_cache_mtx_grp_attr = lck_grp_attr_alloc_init(); - so_cache_mtx_grp = lck_grp_alloc_init("so_cache", so_cache_mtx_grp_attr); @@ -293,70 +301,72 @@ socketinit(void) */ so_cache_mtx_attr = lck_attr_alloc_init(); - so_cache_init_done = 1; - /* cached sockets mutex */ so_cache_mtx = lck_mtx_alloc_init(so_cache_mtx_grp, so_cache_mtx_attr); + if (so_cache_mtx == NULL) { + panic("%s: unable to allocate so_cache_mtx\n", __func__); + /* NOTREACHED */ + } + STAILQ_INIT(&so_cache_head); - if (so_cache_mtx == NULL) - return; /* we're hosed... */ - - str_size = (vm_size_t)(sizeof (struct socket) + 4 + - get_inpcb_str_size() + 4 + get_tcp_str_size()); + so_cache_zone_element_size = (vm_size_t)(sizeof (struct socket) + 4 + + get_inpcb_str_size() + 4 + get_tcp_str_size()); - so_cache_zone = zinit(str_size, 120000*str_size, 8192, "socache zone"); + so_cache_zone = zinit(so_cache_zone_element_size, + (120000 * so_cache_zone_element_size), 8192, "socache zone"); zone_change(so_cache_zone, Z_CALLERACCT, FALSE); zone_change(so_cache_zone, Z_NOENCRYPT, TRUE); -#if TEMPDEBUG - printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size); -#endif - timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz)); - - so_cache_zone_element_size = str_size; - sflt_init(); + sl_zone_size = sizeof (struct sockaddr_list); + if ((sl_zone = zinit(sl_zone_size, 1024 * sl_zone_size, 1024, + "sockaddr_list")) == NULL) { + panic("%s: unable to allocate sockaddr_list zone\n", __func__); + /* NOTREACHED */ + } + zone_change(sl_zone, Z_CALLERACCT, FALSE); + zone_change(sl_zone, Z_EXPAND, TRUE); + + se_zone_size = sizeof (struct sockaddr_entry); + if ((se_zone = zinit(se_zone_size, 1024 * se_zone_size, 1024, + "sockaddr_entry")) == NULL) { + panic("%s: unable to allocate sockaddr_entry zone\n", __func__); + /* NOTREACHED */ + } + zone_change(se_zone, Z_CALLERACCT, FALSE); + zone_change(se_zone, Z_EXPAND, TRUE); - _CASSERT(_SO_TC_MAX == SO_TC_STATS_MAX); + in_pcbinit(); + sflt_init(); socket_tclass_init(); - - socket_flowadv_init(); +#if MULTIPATH + mp_pcbinit(); +#endif /* MULTIPATH */ } static void cached_sock_alloc(struct socket **so, int waitok) { caddr_t temp; - register uintptr_t offset; + uintptr_t offset; lck_mtx_lock(so_cache_mtx); - if (cached_sock_count) { - cached_sock_count--; - *so = socket_cache_head; - if (*so == 0) - panic("cached_sock_alloc: cached sock is null"); + if (!STAILQ_EMPTY(&so_cache_head)) { + VERIFY(cached_sock_count > 0); - socket_cache_head = socket_cache_head->cache_next; - if (socket_cache_head) - socket_cache_head->cache_prev = 0; - else - socket_cache_tail = 0; + *so = STAILQ_FIRST(&so_cache_head); + STAILQ_REMOVE_HEAD(&so_cache_head, so_cache_ent); + STAILQ_NEXT((*so), so_cache_ent) = NULL; + cached_sock_count--; lck_mtx_unlock(so_cache_mtx); temp = (*so)->so_saved_pcb; bzero((caddr_t)*so, sizeof (struct socket)); -#if TEMPDEBUG - kprintf("cached_sock_alloc - retreiving cached sock %p - " - "count == %d\n", *so, cached_sock_count); -#endif + (*so)->so_saved_pcb = temp; - (*so)->cached_in_sock_layer = 1; } else { -#if TEMPDEBUG - kprintf("Allocating cached sock %p from memory\n", *so); -#endif lck_mtx_unlock(so_cache_mtx); @@ -365,17 +375,18 @@ cached_sock_alloc(struct socket **so, int waitok) else *so = (struct socket *)zalloc_noblock(so_cache_zone); - if (*so == 0) + if (*so == NULL) return; bzero((caddr_t)*so, sizeof (struct socket)); /* - * Define offsets for extra structures into our single block of - * memory. Align extra structures on longword boundaries. + * Define offsets for extra structures into our + * single block of memory. Align extra structures + * on longword boundaries. */ - offset = (uintptr_t) *so; + offset = (uintptr_t)*so; offset += sizeof (struct socket); offset = ALIGN(offset); @@ -387,14 +398,9 @@ cached_sock_alloc(struct socket **so, int waitok) ((struct inpcb *)(void *)(*so)->so_saved_pcb)->inp_saved_ppcb = (caddr_t)offset; -#if TEMPDEBUG - kprintf("Allocating cached socket - %p, pcb=%p tcpcb=%p\n", - *so, (*so)->so_saved_pcb, - ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb); -#endif } - (*so)->cached_in_sock_layer = 1; + (*so)->cached_in_sock_layer = true; } static void @@ -403,76 +409,70 @@ cached_sock_free(struct socket *so) lck_mtx_lock(so_cache_mtx); + so_cache_time = net_uptime(); if (++cached_sock_count > max_cached_sock_count) { --cached_sock_count; lck_mtx_unlock(so_cache_mtx); -#if TEMPDEBUG - kprintf("Freeing overflowed cached socket %p\n", so); -#endif zfree(so_cache_zone, so); } else { -#if TEMPDEBUG - kprintf("Freeing socket %p into cache\n", so); -#endif if (so_cache_hw < cached_sock_count) so_cache_hw = cached_sock_count; - so->cache_next = socket_cache_head; - so->cache_prev = 0; - if (socket_cache_head) - socket_cache_head->cache_prev = so; - else - socket_cache_tail = so; + STAILQ_INSERT_TAIL(&so_cache_head, so, so_cache_ent); so->cache_timestamp = so_cache_time; - socket_cache_head = so; lck_mtx_unlock(so_cache_mtx); } - -#if TEMPDEBUG - kprintf("Freed cached sock %p into cache - count is %d\n", - so, cached_sock_count); -#endif } -static void -so_update_last_owner_locked( - struct socket *so, - proc_t self) +void +so_update_last_owner_locked(struct socket *so, proc_t self) { - if (so->last_pid != 0) - { - if (self == NULL) + if (so->last_pid != 0) { + /* + * last_pid and last_upid should remain zero for sockets + * created using sock_socket. The check above achieves that + */ + if (self == PROC_NULL) self = current_proc(); - - if (self) - { + + if (so->last_upid != proc_uniqueid(self) || + so->last_pid != proc_pid(self)) { so->last_upid = proc_uniqueid(self); so->last_pid = proc_pid(self); + proc_getexecutableuuid(self, so->last_uuid, + sizeof (so->last_uuid)); } } } -static void -so_cache_timer(__unused void *dummy) +void +so_update_policy(struct socket *so) { - register struct socket *p; - register int n_freed = 0; - - lck_mtx_lock(so_cache_mtx); + if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) + (void) inp_update_policy(sotoinpcb(so)); +} - ++so_cache_time; +boolean_t +so_cache_timer(void) +{ + struct socket *p; + int n_freed = 0; + boolean_t rc = FALSE; - while ((p = socket_cache_tail)) { - if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT) + lck_mtx_lock(so_cache_mtx); + so_cache_timeouts++; + so_cache_time = net_uptime(); + + while (!STAILQ_EMPTY(&so_cache_head)) { + VERIFY(cached_sock_count > 0); + p = STAILQ_FIRST(&so_cache_head); + if ((so_cache_time - p->cache_timestamp) < + SO_CACHE_TIME_LIMIT) break; - so_cache_timeouts++; - - if ((socket_cache_tail = p->cache_prev)) - p->cache_prev->cache_next = 0; - if (--cached_sock_count == 0) - socket_cache_head = 0; + STAILQ_REMOVE_HEAD(&so_cache_head, so_cache_ent); + --cached_sock_count; zfree(so_cache_zone, p); @@ -481,11 +481,14 @@ so_cache_timer(__unused void *dummy) break; } } - lck_mtx_unlock(so_cache_mtx); - timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz)); + /* Schedule again if there is more to cleanup */ + if (!STAILQ_EMPTY(&so_cache_head)) + rc = TRUE; + + lck_mtx_unlock(so_cache_mtx); + return (rc); } -#endif /* __APPLE__ */ /* * Get a socket structure from our zone, and initialize it. @@ -507,14 +510,12 @@ soalloc(int waitok, int dom, int type) if (so != NULL) bzero(so, sizeof (*so)); } - /* XXX race condition for reentrant kernel */ -//###LD Atomic add for so_gencnt if (so != NULL) { so->so_gencnt = ++so_gencnt; so->so_zone = socket_zone; #if CONFIG_MACF_SOCKET - /* Convert waitok to M_WAITOK/M_NOWAIT for MAC Framework. */ - if (mac_socket_label_init(so, !waitok) != 0) { + /* Convert waitok to M_WAITOK/M_NOWAIT for MAC Framework. */ + if (mac_socket_label_init(so, !waitok) != 0) { sodealloc(so); return (NULL); } @@ -524,66 +525,76 @@ soalloc(int waitok, int dom, int type) return (so); } -/* - * Returns: 0 Success - * EAFNOSUPPORT - * EPROTOTYPE - * EPROTONOSUPPORT - * ENOBUFS - * :ENOBUFS[AF_UNIX] - * :ENOBUFS[TCP] - * :ENOMEM[TCP] - * :EISCONN[TCP] - * :??? [other protocol families, IPSEC] - */ int -socreate(int dom, struct socket **aso, int type, int proto) +socreate_internal(int dom, struct socket **aso, int type, int proto, + struct proc *p, uint32_t flags, struct proc *ep) { - struct proc *p = current_proc(); - register struct protosw *prp; - register struct socket *so; - register int error = 0; + struct protosw *prp; + struct socket *so; + int error = 0; #if TCPDEBUG extern int tcpconsdebug; #endif - if (proto) + + VERIFY(aso != NULL); + *aso = NULL; + + if (proto != 0) prp = pffindproto(dom, proto, type); else prp = pffindtype(dom, type); - if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) { - if (pffinddomain(dom) == NULL) { + if (prp == NULL || prp->pr_usrreqs->pru_attach == NULL) { + if (pffinddomain(dom) == NULL) return (EAFNOSUPPORT); - } if (proto != 0) { - if (pffindprotonotype(dom, proto) != NULL) { + if (pffindprotonotype(dom, proto) != NULL) return (EPROTOTYPE); - } } return (EPROTONOSUPPORT); } if (prp->pr_type != type) return (EPROTOTYPE); so = soalloc(1, dom, type); - if (so == 0) + if (so == NULL) return (ENOBUFS); + if (flags & SOCF_ASYNC) + so->so_state |= SS_NBIO; +#if MULTIPATH + if (flags & SOCF_MP_SUBFLOW) { + /* + * A multipath subflow socket is used internally in the kernel, + * therefore it does not have a file desciptor associated by + * default. + */ + so->so_state |= SS_NOFDREF; + so->so_flags |= SOF_MP_SUBFLOW; + } +#endif /* MULTIPATH */ + TAILQ_INIT(&so->so_incomp); TAILQ_INIT(&so->so_comp); so->so_type = type; so->last_upid = proc_uniqueid(p); so->last_pid = proc_pid(p); + proc_getexecutableuuid(p, so->last_uuid, sizeof (so->last_uuid)); + + if (ep != PROC_NULL && ep != p) { + so->e_upid = proc_uniqueid(ep); + so->e_pid = proc_pid(ep); + proc_getexecutableuuid(ep, so->e_uuid, sizeof (so->e_uuid)); + so->so_flags |= SOF_DELEGATED; + } so->so_cred = kauth_cred_proc_ref(p); if (!suser(kauth_cred_get(), NULL)) - so->so_state = SS_PRIV; + so->so_state |= SS_PRIV; so->so_proto = prp; -#ifdef __APPLE__ - so->so_rcv.sb_flags |= SB_RECV; /* XXX */ + so->so_rcv.sb_flags |= SB_RECV; so->so_rcv.sb_so = so->so_snd.sb_so = so; -#endif so->next_lock_lr = 0; so->next_unlock_lr = 0; @@ -591,15 +602,15 @@ socreate(int dom, struct socket **aso, int type, int proto) mac_socket_label_associate(kauth_cred_get(), so); #endif /* MAC_SOCKET */ -//### Attachement will create the per pcb lock if necessary and increase refcount /* - * for creation, make sure it's done before - * socket is inserted in lists + * Attachment will create the per pcb lock if necessary and + * increase refcount for creation, make sure it's done before + * socket is inserted in lists. */ so->so_usecount++; error = (*prp->pr_usrreqs->pru_attach)(so, proto, p); - if (error) { + if (error != 0) { /* * Warning: * If so_pcb is not zero, the socket will be leaked, @@ -610,8 +621,8 @@ socreate(int dom, struct socket **aso, int type, int proto) sofreelastref(so, 1); /* will deallocate the socket */ return (error); } -#ifdef __APPLE__ - prp->pr_domain->dom_refs++; + + atomic_add_32(&prp->pr_domain->dom_refs, 1); TAILQ_INIT(&so->so_evlist); /* Attach socket filters for this protocol */ @@ -619,23 +630,26 @@ socreate(int dom, struct socket **aso, int type, int proto) #if TCPDEBUG if (tcpconsdebug == 2) so->so_options |= SO_DEBUG; -#endif #endif so_set_default_traffic_class(so); + /* - * If this is a background thread/task, mark the socket as such. + * If this thread or task is marked to create backgrounded sockets, + * mark the socket as background. */ - if (proc_get_self_isbackground() != 0) { + if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_NEW_SOCKETS_BG)) { socket_set_traffic_mgt_flags(so, TRAFFIC_MGT_SO_BACKGROUND); so->so_background_thread = current_thread(); } switch (dom) { /* - * Don't mark Unix domain or system sockets as eligible for defunct by default. - */ + * Don't mark Unix domain, system or multipath sockets as + * eligible for defunct by default. + */ case PF_LOCAL: case PF_SYSTEM: + case PF_MULTIPATH: so->so_flags |= SOF_NODEFUNCT; break; default: @@ -643,9 +657,52 @@ socreate(int dom, struct socket **aso, int type, int proto) } *aso = so; + return (0); } +/* + * Returns: 0 Success + * EAFNOSUPPORT + * EPROTOTYPE + * EPROTONOSUPPORT + * ENOBUFS + * :ENOBUFS[AF_UNIX] + * :ENOBUFS[TCP] + * :ENOMEM[TCP] + * :??? [other protocol families, IPSEC] + */ +int +socreate(int dom, struct socket **aso, int type, int proto) +{ + return (socreate_internal(dom, aso, type, proto, current_proc(), 0, + PROC_NULL)); +} + +int +socreate_delegate(int dom, struct socket **aso, int type, int proto, pid_t epid) +{ + int error = 0; + struct proc *ep = PROC_NULL; + + if ((proc_selfpid() != epid) && ((ep = proc_find(epid)) == PROC_NULL)) { + error = ESRCH; + goto done; + } + + error = socreate_internal(dom, aso, type, proto, current_proc(), 0, ep); + + /* + * It might not be wise to hold the proc reference when calling + * socreate_internal since it calls soalloc with M_WAITOK + */ +done: + if (ep != PROC_NULL) + proc_rele(ep); + + return (error); +} + /* * Returns: 0 Success * :EINVAL Invalid argument [COMMON_START] @@ -669,14 +726,17 @@ socreate(int dom, struct socket **aso, int type, int proto) * be returned by the tcp_usr_bind function supplied. */ int -sobind(struct socket *so, struct sockaddr *nam) +sobindlock(struct socket *so, struct sockaddr *nam, int dolock) { struct proc *p = current_proc(); int error = 0; - socket_lock(so, 1); - VERIFY(so->so_usecount > 1); + if (dolock) + socket_lock(so, 1); + VERIFY(so->so_usecount > 1); + so_update_last_owner_locked(so, p); + so_update_policy(so); /* * If this is a bind request on a socket that has been marked @@ -684,9 +744,9 @@ sobind(struct socket *so, struct sockaddr *nam) */ if (so->so_flags & SOF_DEFUNCT) { error = EINVAL; - SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n", - __func__, proc_pid(p), so, INP_SOCKAF(so), INP_SOCKTYPE(so), - error)); + SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n", + __func__, proc_pid(p), (uint64_t)VM_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so), error)); goto out; } @@ -696,7 +756,8 @@ sobind(struct socket *so, struct sockaddr *nam) if (error == 0) error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p); out: - socket_unlock(so, 1); + if (dolock) + socket_unlock(so, 1); if (error == EJUSTRETURN) error = 0; @@ -712,17 +773,22 @@ sodealloc(struct socket *so) /* Remove any filters */ sflt_termsock(so); + /* Delete the state allocated for msg queues on a socket */ + if (so->so_flags & SOF_ENABLE_MSGS) { + FREE(so->so_msg_state, M_TEMP); + so->so_msg_state = NULL; + } + VERIFY(so->so_msg_state == NULL); + so->so_gencnt = ++so_gencnt; #if CONFIG_MACF_SOCKET mac_socket_label_destroy(so); #endif /* MAC_SOCKET */ - if (so->cached_in_sock_layer == 1) { + + if (so->cached_in_sock_layer) { cached_sock_free(so); } else { - if (so->cached_in_sock_layer == -1) - panic("sodealloc: double dealloc: so=%p\n", so); - so->cached_in_sock_layer = -1; FREE_ZONE(so, sizeof (*so), so->so_zone); } } @@ -753,7 +819,10 @@ solisten(struct socket *so, int backlog) int error = 0; socket_lock(so, 1); - + + so_update_last_owner_locked(so, p); + so_update_policy(so); + if (so->so_proto == NULL) { error = EINVAL; goto out; @@ -773,23 +842,22 @@ solisten(struct socket *so, int backlog) (so->so_flags & SOF_DEFUNCT)) { error = EINVAL; if (so->so_flags & SOF_DEFUNCT) { - SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n", - __func__, proc_pid(p), so, INP_SOCKAF(so), - INP_SOCKTYPE(so), error)); + SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] " + "(%d)\n", __func__, proc_pid(p), + (uint64_t)VM_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so), error)); } goto out; } - if ((so->so_restrictions & SO_RESTRICT_DENYIN) != 0) { + if ((so->so_restrictions & SO_RESTRICT_DENY_IN) != 0) { error = EPERM; goto out; } error = sflt_listen(so); - - if (error == 0) { + if (error == 0) error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); - } if (error) { if (error == EJUSTRETURN) @@ -829,14 +897,12 @@ sofreelastref(struct socket *so, int dealloc) /* Assume socket is locked */ - if ((!(so->so_flags & SOF_PCBCLEARING)) || - ((so->so_state & SS_NOFDREF) == 0)) { -#ifdef __APPLE__ + if (!(so->so_flags & SOF_PCBCLEARING) || !(so->so_state & SS_NOFDREF)) { selthreadclear(&so->so_snd.sb_sel); selthreadclear(&so->so_rcv.sb_sel); - so->so_rcv.sb_flags &= ~SB_UPCALL; - so->so_snd.sb_flags &= ~SB_UPCALL; -#endif + so->so_rcv.sb_flags &= ~(SB_SEL|SB_UPCALL); + so->so_snd.sb_flags &= ~(SB_SEL|SB_UPCALL); + so->so_event = NULL; return; } if (head != NULL) { @@ -851,12 +917,11 @@ sofreelastref(struct socket *so, int dealloc) * accept(2) may hang after select(2) indicated * that the listening socket was ready. */ -#ifdef __APPLE__ selthreadclear(&so->so_snd.sb_sel); selthreadclear(&so->so_rcv.sb_sel); - so->so_rcv.sb_flags &= ~SB_UPCALL; - so->so_snd.sb_flags &= ~SB_UPCALL; -#endif + so->so_rcv.sb_flags &= ~(SB_SEL|SB_UPCALL); + so->so_snd.sb_flags &= ~(SB_SEL|SB_UPCALL); + so->so_event = NULL; socket_unlock(head, 1); return; } else { @@ -867,15 +932,19 @@ sofreelastref(struct socket *so, int dealloc) so->so_head = NULL; socket_unlock(head, 1); } -#ifdef __APPLE__ - selthreadclear(&so->so_snd.sb_sel); - sbrelease(&so->so_snd); -#endif + sowflush(so); sorflush(so); +#if FLOW_DIVERT + if (so->so_flags & SOF_FLOW_DIVERT) { + flow_divert_detach(so); + } +#endif /* FLOW_DIVERT */ + /* 3932268: disable upcall */ so->so_rcv.sb_flags &= ~SB_UPCALL; so->so_snd.sb_flags &= ~SB_UPCALL; + so->so_event = NULL; if (dealloc) sodealloc(so); @@ -901,7 +970,7 @@ soclose_wait_locked(struct socket *so) so->so_rcv.sb_flags &= ~SB_UPCALL; so->so_snd.sb_flags &= ~SB_UPCALL; so->so_flags |= SOF_CLOSEWAIT; - (void) msleep((caddr_t)&so->so_upcall, mutex_held, (PZERO - 1), + (void) msleep((caddr_t)&so->so_upcallusecount, mutex_held, (PZERO - 1), "soclose_wait_locked", NULL); lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); so->so_flags &= ~SOF_CLOSEWAIT; @@ -921,10 +990,14 @@ soclose_locked(struct socket *so) if (so->so_usecount == 0) { panic("soclose: so=%p refcount=0\n", so); + /* NOTREACHED */ } sflt_notify(so, sock_evt_closing, NULL); + if (so->so_upcallusecount) + soclose_wait_locked(so); + if ((so->so_options & SO_ACCEPTCONN)) { struct socket *sp, *sonext; int socklock = 0; @@ -935,26 +1008,30 @@ soclose_locked(struct socket *so) */ so->so_options &= ~SO_ACCEPTCONN; - for (sp = TAILQ_FIRST(&so->so_incomp); sp != NULL; sp = sonext) { + for (sp = TAILQ_FIRST(&so->so_incomp); + sp != NULL; sp = sonext) { sonext = TAILQ_NEXT(sp, so_list); - /* Radar 5350314 + /* + * Radar 5350314 * skip sockets thrown away by tcpdropdropblreq * they will get cleanup by the garbage collection. * otherwise, remove the incomp socket from the queue * and let soabort trigger the appropriate cleanup. */ - if (sp->so_flags & SOF_OVERFLOW) + if (sp->so_flags & SOF_OVERFLOW) continue; if (so->so_proto->pr_getlock != NULL) { - /* lock ordering for consistency with the rest of the stack, - * we lock the socket first and then grabb the head. + /* + * Lock ordering for consistency with the + * rest of the stack, we lock the socket + * first and then grabb the head. */ socket_unlock(so, 0); socket_lock(sp, 1); socket_lock(so, 0); - socklock = 1; + socklock = 1; } TAILQ_REMOVE(&so->so_incomp, sp, so_list); @@ -967,7 +1044,7 @@ soclose_locked(struct socket *so) (void) soabort(sp); } - if (socklock) + if (socklock) socket_unlock(sp, 1); } @@ -994,7 +1071,7 @@ soclose_locked(struct socket *so) } } } - if (so->so_pcb == 0) { + if (so->so_pcb == NULL) { /* 3915887: mark the socket as ready for dealloc */ so->so_flags |= SOF_PCBCLEARING; goto discard; @@ -1032,35 +1109,44 @@ soclose_locked(struct socket *so) } } drop: - if (so->so_usecount == 0) + if (so->so_usecount == 0) { panic("soclose: usecount is zero so=%p\n", so); - if (so->so_pcb && !(so->so_flags & SOF_PCBCLEARING)) { + /* NOTREACHED */ + } + if (so->so_pcb != NULL && !(so->so_flags & SOF_PCBCLEARING)) { /* * Let NetworkStatistics know this PCB is going away * before we detach it. */ if (nstat_collect && - (so->so_proto->pr_domain->dom_family == AF_INET || - so->so_proto->pr_domain->dom_family == AF_INET6)) + (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6)) nstat_pcb_detach(so->so_pcb); int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); if (error == 0) error = error2; } - if (so->so_usecount <= 0) + if (so->so_usecount <= 0) { panic("soclose: usecount is zero so=%p\n", so); + /* NOTREACHED */ + } discard: - if (so->so_pcb && so->so_state & SS_NOFDREF) + if (so->so_pcb != NULL && !(so->so_flags & SOF_MP_SUBFLOW) && + (so->so_state & SS_NOFDREF)) { panic("soclose: NOFDREF"); + /* NOTREACHED */ + } so->so_state |= SS_NOFDREF; - + + if (so->so_flags & SOF_MP_SUBFLOW) + so->so_flags &= ~SOF_MP_SUBFLOW; + if ((so->so_flags & SOF_KNOTE) != 0) KNOTE(&so->so_klist, SO_FILT_HINT_LOCKED); -#ifdef __APPLE__ - so->so_proto->pr_domain->dom_refs--; + + atomic_add_32(&so->so_proto->pr_domain->dom_refs, -1); evsofree(so); -#endif + so->so_usecount--; sofree(so); return (error); @@ -1072,9 +1158,6 @@ soclose(struct socket *so) int error = 0; socket_lock(so, 1); - if (so->so_upcallusecount) - soclose_wait_locked(so); - if (so->so_retaincnt == 0) { error = soclose_locked(so); } else { @@ -1129,6 +1212,9 @@ soacceptlock(struct socket *so, struct sockaddr **nam, int dolock) if (dolock) socket_lock(so, 1); + so_update_last_owner_locked(so, PROC_NULL); + so_update_policy(so); + if ((so->so_state & SS_NOFDREF) == 0) panic("soaccept: !NOFDREF"); so->so_state &= ~SS_NOFDREF; @@ -1153,10 +1239,9 @@ soacceptfilter(struct socket *so) struct socket *head = so->so_head; /* - * Hold the lock even if this socket - * has not been made visible to the filter(s). - * For sockets with global locks, this protect against the - * head or peer going away + * Hold the lock even if this socket has not been made visible + * to the filter(s). For sockets with global locks, this protects + * against the head or peer going away */ socket_lock(so, 1); if (sogetaddr_locked(so, &remote, 1) != 0 || @@ -1226,7 +1311,10 @@ soconnectlock(struct socket *so, struct sockaddr *nam, int dolock) if (dolock) socket_lock(so, 1); - + + so_update_last_owner_locked(so, p); + so_update_policy(so); + /* * If this is a listening socket or if this is a previously-accepted * socket that has been marked as inactive, reject the connect request. @@ -1234,16 +1322,17 @@ soconnectlock(struct socket *so, struct sockaddr *nam, int dolock) if ((so->so_options & SO_ACCEPTCONN) || (so->so_flags & SOF_DEFUNCT)) { error = EOPNOTSUPP; if (so->so_flags & SOF_DEFUNCT) { - SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n", - __func__, proc_pid(p), so, INP_SOCKAF(so), - INP_SOCKTYPE(so), error)); + SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] " + "(%d)\n", __func__, proc_pid(p), + (uint64_t)VM_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so), error)); } if (dolock) socket_unlock(so, 1); return (error); } - if ((so->so_restrictions & SO_RESTRICT_DENYOUT) != 0) { + if ((so->so_restrictions & SO_RESTRICT_DENY_OUT) != 0) { if (dolock) socket_unlock(so, 1); return (EPERM); @@ -1265,12 +1354,12 @@ soconnectlock(struct socket *so, struct sockaddr *nam, int dolock) * - non-blocking connect returns before completion; */ error = sflt_connectout(so, nam); - - if (error) { + if (error != 0) { if (error == EJUSTRETURN) error = 0; } else { - error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p); + error = (*so->so_proto->pr_usrreqs->pru_connect) + (so, nam, p); } } if (dolock) @@ -1309,6 +1398,62 @@ soconnect2(struct socket *so1, struct socket *so2) return (error); } +int +soconnectxlocked(struct socket *so, struct sockaddr_list **src_sl, + struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, + associd_t aid, connid_t *pcid, uint32_t flags, void *arg, + uint32_t arglen) +{ + int error; + + /* + * If this is a listening socket or if this is a previously-accepted + * socket that has been marked as inactive, reject the connect request. + */ + if ((so->so_options & SO_ACCEPTCONN) || (so->so_flags & SOF_DEFUNCT)) { + error = EOPNOTSUPP; + if (so->so_flags & SOF_DEFUNCT) { + SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] " + "(%d)\n", __func__, proc_pid(p), + (uint64_t)VM_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so), error)); + } + return (error); + } + + if ((so->so_restrictions & SO_RESTRICT_DENY_OUT) != 0) + return (EPERM); + + /* + * If protocol is connection-based, can only connect once + * unless PR_MULTICONN is set. Otherwise, if connected, + * try to disconnect first. This allows user to disconnect + * by connecting to, e.g., a null address. + */ + if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) && + !(so->so_proto->pr_flags & PR_MULTICONN) && + ((so->so_proto->pr_flags & PR_CONNREQUIRED) || + (error = sodisconnectlocked(so)) != 0)) { + error = EISCONN; + } else { + /* + * Run connect filter before calling protocol: + * - non-blocking connect returns before completion; + */ + error = sflt_connectxout(so, dst_sl); + if (error != 0) { + if (error == EJUSTRETURN) + error = 0; + } else { + error = (*so->so_proto->pr_usrreqs->pru_connectx) + (so, src_sl, dst_sl, p, ifscope, aid, pcid, + flags, arg, arglen); + } + } + + return (error); +} + int sodisconnectlocked(struct socket *so) { @@ -1324,10 +1469,9 @@ sodisconnectlocked(struct socket *so) } error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); - - if (error == 0) { + if (error == 0) sflt_notify(so, sock_evt_disconnected, NULL); - } + bad: return (error); } @@ -1344,7 +1488,45 @@ sodisconnect(struct socket *so) return (error); } -#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT) +int +sodisconnectxlocked(struct socket *so, associd_t aid, connid_t cid) +{ + int error; + + /* + * Call the protocol disconnectx handler; let it handle all + * matters related to the connection state of this session. + */ + error = (*so->so_proto->pr_usrreqs->pru_disconnectx)(so, aid, cid); + if (error == 0) { + /* + * The event applies only for the session, not for + * the disconnection of individual subflows. + */ + if (so->so_state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) + sflt_notify(so, sock_evt_disconnected, NULL); + } + return (error); +} + +int +sodisconnectx(struct socket *so, associd_t aid, connid_t cid) +{ + int error; + + socket_lock(so, 1); + error = sodisconnectxlocked(so, aid, cid); + socket_unlock(so, 1); + return (error); +} + +int +sopeelofflocked(struct socket *so, associd_t aid, struct socket **psop) +{ + return ((*so->so_proto->pr_usrreqs->pru_peeloff)(so, aid, psop)); +} + +#define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT) /* * sosendcheck will lock the socket buffer if it isn't locked and @@ -1358,11 +1540,12 @@ sodisconnect(struct socket *so) * sbwait:EINTR * [so_error]:??? */ -static int -sosendcheck(struct socket *so, struct sockaddr *addr, int32_t resid, int32_t clen, - int32_t atomic, int flags, int *sblocked) +int +sosendcheck(struct socket *so, struct sockaddr *addr, user_ssize_t resid, + int32_t clen, int32_t atomic, int flags, int *sblocked, + struct mbuf *control) { - int error = 0; + int error = 0; int32_t space; int assumelock = 0; @@ -1396,9 +1579,9 @@ restart: if (so->so_flags & SOF_DEFUNCT) { defunct: error = EPIPE; - SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n", __func__, - proc_selfpid(), so, INP_SOCKAF(so), INP_SOCKTYPE(so), - error)); + SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n", + __func__, proc_selfpid(), (uint64_t)VM_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so), error)); return (error); } @@ -1421,12 +1604,17 @@ defunct: ENOTCONN : EDESTADDRREQ); } } - space = sbspace(&so->so_snd); + if (so->so_flags & SOF_ENABLE_MSGS) + space = msgq_sbspace(so, control); + else + space = sbspace(&so->so_snd); + if (flags & MSG_OOB) space += 1024; if ((atomic && resid > so->so_snd.sb_hiwat) || clen > so->so_snd.sb_hiwat) return (EMSGSIZE); + if ((space < resid + clen && (atomic || space < (int32_t)so->so_snd.sb_lowat || space < clen)) || (so->so_type == SOCK_STREAM && so_wait_for_if_feedback(so))) { @@ -1434,7 +1622,7 @@ defunct: assumelock) { return (EWOULDBLOCK); } - sbunlock(&so->so_snd, 1); + sbunlock(&so->so_snd, TRUE); /* keep socket locked */ *sblocked = 0; error = sbwait(&so->so_snd); if (error) { @@ -1444,7 +1632,6 @@ defunct: } goto restart; } - return (0); } @@ -1513,25 +1700,26 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags) { struct mbuf **mp; - register struct mbuf *m, *freelist = NULL; - register int32_t space, len, resid; + struct mbuf *m, *freelist = NULL; + user_ssize_t space, len, resid; int clen = 0, error, dontroute, mlen, sendflags; int atomic = sosendallatonce(so) || top; int sblocked = 0; struct proc *p = current_proc(); + struct mbuf *control_copy = NULL; - if (uio) { - // LP64todo - fix this! + if (uio != NULL) resid = uio_resid(uio); - } else { + else resid = top->m_pkthdr.len; - } + KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START), so, resid, so->so_snd.sb_cc, so->so_snd.sb_lowat, so->so_snd.sb_hiwat); socket_lock(so, 1); so_update_last_owner_locked(so, p); - + so_update_policy(so); + if (so->so_type != SOCK_STREAM && (flags & MSG_OOB) != 0) { error = EOPNOTSUPP; socket_unlock(so, 1); @@ -1545,31 +1733,39 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, * of space and resid. On the other hand, a negative resid * causes us to loop sending 0-length segments to the protocol. * - * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM - * type sockets since that's an error. + * Usually, MSG_EOR isn't used on SOCK_STREAM type sockets. + * But it will be used by sockets doing message delivery. + * + * Note: We limit resid to be a positive 32 bits value as we use + * imin() to set bytes_to_copy -- radr://14558484 */ - if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { + if ((int32_t)resid < 0 || (so->so_type == SOCK_STREAM && + !(so->so_flags & SOF_ENABLE_MSGS) && (flags & MSG_EOR))) { error = EINVAL; socket_unlock(so, 1); goto out; } - dontroute = - (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && + dontroute = (flags & MSG_DONTROUTE) && + (so->so_options & SO_DONTROUTE) == 0 && (so->so_proto->pr_flags & PR_ATOMIC); OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd); - if (control) + + if (control != NULL) clen = control->m_len; do { error = sosendcheck(so, addr, resid, clen, atomic, flags, - &sblocked); - if (error) { + &sblocked, control); + if (error) goto release; - } + mp = ⊤ - space = sbspace(&so->so_snd) - clen + ((flags & MSG_OOB) ? - 1024 : 0); + if (so->so_flags & SOF_ENABLE_MSGS) + space = msgq_sbspace(so, control); + else + space = sbspace(&so->so_snd) - clen; + space += ((flags & MSG_OOB) ? 1024 : 0); do { if (uio == NULL) { @@ -1586,11 +1782,10 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, bytes_to_copy = imin(resid, space); - if (sosendminchain > 0) { + if (sosendminchain > 0) chainlength = 0; - } else { + else chainlength = sosendmaxchain; - } /* * Attempt to use larger than system page-size @@ -1606,7 +1801,7 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, do { int num_needed; - int hdrs_needed = (top == 0) ? 1 : 0; + int hdrs_needed = (top == NULL) ? 1 : 0; /* * try to maintain a local cache of mbuf @@ -1688,7 +1883,7 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, } if (freelist == NULL) { - if (top == 0) + if (top == NULL) MGETHDR(freelist, M_WAIT, MT_DATA); else @@ -1705,7 +1900,7 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, * leave room for protocol * headers in first mbuf. */ - if (atomic && top == 0 && + if (atomic && top == NULL && bytes_to_copy < MHLEN) { MH_ALIGN(freelist, bytes_to_copy); @@ -1758,7 +1953,7 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, if (flags & (MSG_HOLD|MSG_SEND)) { /* Enqueue for later, go away if HOLD */ - register struct mbuf *mb1; + struct mbuf *mb1; if (so->so_temp && (flags & MSG_FLUSH)) { m_freem(so->so_temp); so->so_temp = NULL; @@ -1789,22 +1984,22 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, */ ((flags & MSG_EOF) && (so->so_proto->pr_flags & PR_IMPLOPCL) && - (resid <= 0)) ? - PRUS_EOF : - /* If there is more to send set PRUS_MORETOCOME */ - (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0; + (resid <= 0)) ? PRUS_EOF : + /* If there is more to send set PRUS_MORETOCOME */ + (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0; /* * Socket filter processing */ - error = sflt_data_out(so, addr, &top, &control, - (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0); + error = sflt_data_out(so, addr, &top, + &control, (sendflags & MSG_OOB) ? + sock_data_filt_flag_oob : 0); if (error) { if (error == EJUSTRETURN) { error = 0; clen = 0; - control = 0; - top = 0; + control = NULL; + top = NULL; } goto release; @@ -1813,18 +2008,27 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, * End Socket filter processing */ + if (so->so_flags & SOF_ENABLE_MSGS) { + /* + * Make a copy of control mbuf, + * so that msg priority can be + * passed to subsequent mbufs. + */ + control_copy = m_dup(control, M_NOWAIT); + } error = (*so->so_proto->pr_usrreqs->pru_send) - (so, sendflags, top, addr, control, p); -#ifdef __APPLE__ + (so, sendflags, top, addr, control, p); + if (flags & MSG_SEND) so->so_temp = NULL; -#endif + if (dontroute) so->so_options &= ~SO_DONTROUTE; clen = 0; - control = 0; - top = 0; + control = control_copy; + control_copy = NULL; + top = NULL; mp = ⊤ if (error) goto release; @@ -1833,16 +2037,18 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, release: if (sblocked) - sbunlock(&so->so_snd, 0); /* will unlock socket */ + sbunlock(&so->so_snd, FALSE); /* will unlock socket */ else socket_unlock(so, 1); out: - if (top) + if (top != NULL) m_freem(top); - if (control) + if (control != NULL) m_freem(control); - if (freelist) + if (freelist != NULL) m_freem_list(freelist); + if (control_copy != NULL) + m_freem(control_copy); KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END, so, resid, so->so_snd.sb_cc, space, error); @@ -1891,35 +2097,37 @@ int soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { - register struct mbuf *m, **mp, *ml = NULL; - register int flags, len, error, offset; + struct mbuf *m, **mp, *ml = NULL; + struct mbuf *nextrecord, *free_list; + int flags, error, offset; + user_ssize_t len; struct protosw *pr = so->so_proto; - struct mbuf *nextrecord; - int moff, type = 0; - int orig_resid = uio_resid(uio); - struct mbuf *free_list; - int delayed_copy_len; + int moff, type =0; + user_ssize_t orig_resid = uio_resid(uio); + user_ssize_t delayed_copy_len; int can_delay; int need_event; struct proc *p = current_proc(); - // LP64todo - fix this! KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START, so, uio_resid(uio), so->so_rcv.sb_cc, so->so_rcv.sb_lowat, so->so_rcv.sb_hiwat); socket_lock(so, 1); so_update_last_owner_locked(so, p); + so_update_policy(so); #ifdef MORE_LOCKING_DEBUG - if (so->so_usecount == 1) - panic("soreceive: so=%x no other reference on socket\n", so); + if (so->so_usecount == 1) { + panic("%s: so=%x no other reference on socket\n", __func__, so); + /* NOTREACHED */ + } #endif mp = mp0; - if (psa) - *psa = 0; - if (controlp) - *controlp = 0; - if (flagsp) + if (psa != NULL) + *psa = NULL; + if (controlp != NULL) + *controlp = NULL; + if (flagsp != NULL) flags = *flagsp &~ MSG_EOR; else flags = 0; @@ -1933,8 +2141,9 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, struct sockbuf *sb = &so->so_rcv; error = ENOTCONN; - SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n", __func__, - proc_pid(p), so, INP_SOCKAF(so), INP_SOCKTYPE(so), error)); + SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n", + __func__, proc_pid(p), (uint64_t)VM_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so), error)); /* * This socket should have been disconnected and flushed * prior to being returned from sodefunct(); there should @@ -1970,12 +2179,12 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, error = uiomove(mtod(m, caddr_t), imin(uio_resid(uio), m->m_len), uio); m = m_free(m); - } while (uio_resid(uio) && error == 0 && m); + } while (uio_resid(uio) && error == 0 && m != NULL); socket_lock(so, 0); bad: - if (m) + if (m != NULL) m_freem(m); -#ifdef __APPLE__ + if ((so->so_options & SO_WANTOOBFLAG) != 0) { if (error == EWOULDBLOCK || error == EINVAL) { /* @@ -1986,24 +2195,23 @@ bad: */ error = 0; goto nooob; - } else if (error == 0 && flagsp) { + } else if (error == 0 && flagsp != NULL) { *flagsp |= MSG_OOB; } } socket_unlock(so, 1); KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error, 0, 0, 0, 0); -#endif + return (error); } nooob: - if (mp) - *mp = (struct mbuf *)0; + if (mp != NULL) + *mp = NULL; if (so->so_state & SS_ISCONFIRMING && uio_resid(uio)) (*pr->pr_usrreqs->pru_rcvd)(so, 0); - - free_list = (struct mbuf *)0; + free_list = NULL; delayed_copy_len = 0; restart: #ifdef MORE_LOCKING_DEBUG @@ -2022,9 +2230,13 @@ restart: * only be released when the upcall routine returns to its caller. * Because the socket has been officially closed, there can be * no further read on it. + * + * A multipath subflow socket would have its SS_NOFDREF set by + * default, so check for SOF_MP_SUBFLOW socket flag; when the + * socket is closed for real, SOF_MP_SUBFLOW would be cleared. */ if ((so->so_state & (SS_NOFDREF | SS_CANTRCVMORE)) == - (SS_NOFDREF | SS_CANTRCVMORE)) { + (SS_NOFDREF | SS_CANTRCVMORE) && !(so->so_flags & SOF_MP_SUBFLOW)) { socket_unlock(so, 1); return (0); } @@ -2049,11 +2261,11 @@ restart: * we have to do the receive in sections, and thus risk returning * a short count if a timeout or signal occurs after we start. */ - if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && + if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && so->so_rcv.sb_cc < uio_resid(uio)) && (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat)) && - m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { + m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { /* * Panic if we notice inconsistencies in the socket's * receive list; both sb_mb and sb_cc should correctly @@ -2064,7 +2276,7 @@ restart: SB_MB_CHECK(&so->so_rcv); if (so->so_error) { - if (m) + if (m != NULL) goto dontblock; error = so->so_error; if ((flags & MSG_PEEK) == 0) @@ -2072,12 +2284,12 @@ restart: goto release; } if (so->so_state & SS_CANTRCVMORE) { - if (m) + if (m != NULL) goto dontblock; else goto release; } - for (; m; m = m->m_next) + for (; m != NULL; m = m->m_next) if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { m = so->so_rcv.sb_mb; goto dontblock; @@ -2096,7 +2308,7 @@ restart: } SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); - sbunlock(&so->so_rcv, 1); + sbunlock(&so->so_rcv, TRUE); /* keep socket locked */ #if EVEN_MORE_LOCKING_DEBUG if (socket_debug) printf("Waiting for socket data\n"); @@ -2107,9 +2319,11 @@ restart: if (socket_debug) printf("SORECEIVE - sbwait returned %d\n", error); #endif - if (so->so_usecount < 1) - panic("soreceive: after 2nd sblock so=%p ref=%d on " - "socket\n", so, so->so_usecount); + if (so->so_usecount < 1) { + panic("%s: after 2nd sblock so=%p ref=%d on socket\n", + __func__, so, so->so_usecount); + /* NOTREACHED */ + } if (error) { socket_unlock(so, 1); KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error, @@ -2169,7 +2383,7 @@ dontblock: * Process the next record or wait for one. */ socket_lock(so, 0); - sbunlock(&so->so_rcv, 1); + sbunlock(&so->so_rcv, TRUE); /* stay locked */ goto restart; } socket_lock(so, 0); @@ -2201,10 +2415,10 @@ dontblock: } #endif /* CONFIG_MACF_SOCKET_SUBSET */ orig_resid = 0; - if (psa) { + if (psa != NULL) { *psa = dup_sockaddr(mtod(m, struct sockaddr *), - mp0 == 0); - if ((*psa == 0) && (flags & MSG_NEEDSA)) { + mp0 == NULL); + if ((*psa == NULL) && (flags & MSG_NEEDSA)) { error = EWOULDBLOCK; goto release; } @@ -2213,9 +2427,11 @@ dontblock: m = m->m_next; } else { sbfree(&so->so_rcv, m); - if (m->m_next == 0 && so->so_rcv.sb_cc != 0) - panic("soreceive: about to create invalid " - "socketbuf"); + if (m->m_next == NULL && so->so_rcv.sb_cc != 0) { + panic("%s: about to create invalid socketbuf", + __func__); + /* NOTREACHED */ + } MFREE(m, so->so_rcv.sb_mb); m = so->so_rcv.sb_mb; if (m != NULL) { @@ -2255,11 +2471,12 @@ dontblock: } *controlp = m_copy(m, 0, m->m_len); - /* If we failed to allocate an mbuf, + /* + * If we failed to allocate an mbuf, * release any previously allocated - * mbufs for control data. Return + * mbufs for control data. Return * an error. Keep the mbufs in the - * socket as this is using + * socket as this is using * MSG_PEEK flag. */ if (*controlp == NULL) { @@ -2334,9 +2551,9 @@ dontblock: } cm = cmn; } - /* + /* * Update the value of nextrecord in case we received new - * records when the socket was unlocked above for + * records when the socket was unlocked above for * externalizing SCM_RIGHTS. */ if (m != NULL) @@ -2346,6 +2563,28 @@ dontblock: orig_resid = 0; } + /* + * If the socket is a TCP socket with message delivery + * enabled, then create a control msg to deliver the + * relative TCP sequence number for this data. Waiting + * until this point will protect against failures to + * allocate an mbuf for control msgs. + */ + if (so->so_type == SOCK_STREAM && SOCK_PROTO(so) == IPPROTO_TCP && + (so->so_flags & SOF_ENABLE_MSGS) && controlp != NULL) { + struct mbuf *seq_cm; + + seq_cm = sbcreatecontrol((caddr_t)&m->m_pkthdr.msg_seq, + sizeof (uint32_t), SCM_SEQNUM, SOL_SOCKET); + if (seq_cm == NULL) { + /* unable to allocate a control mbuf */ + error = ENOBUFS; + goto release; + } + *controlp = seq_cm; + controlp = &seq_cm->m_next; + } + if (m != NULL) { if (!(flags & MSG_PEEK)) { /* @@ -2356,10 +2595,13 @@ dontblock: * should be either NULL or equal to m->m_nextpkt. * See comments above about SB_LOCK. */ - if (m != so->so_rcv.sb_mb || m->m_nextpkt != nextrecord) - panic("soreceive: post-control !sync so=%p " - "m=%p nextrecord=%p\n", so, m, nextrecord); - + if (m != so->so_rcv.sb_mb || + m->m_nextpkt != nextrecord) { + panic("%s: post-control !sync so=%p m=%p " + "nextrecord=%p\n", __func__, so, m, + nextrecord); + /* NOTREACHED */ + } if (nextrecord == NULL) so->so_rcv.sb_lastrecord = m; } @@ -2384,7 +2626,8 @@ dontblock: need_event = 0; - while (m && (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) { + while (m != NULL && + (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) { if (m->m_type == MT_OOBDATA) { if (type != MT_OOBDATA) break; @@ -2414,7 +2657,7 @@ dontblock: * we must note any additions to the sockbuf when we * block interrupts again. */ - if (mp == 0) { + if (mp == NULL) { SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); if (can_delay && len == m->m_len) { @@ -2475,11 +2718,33 @@ dontblock: sbfree(&so->so_rcv, m); m->m_nextpkt = NULL; - if (mp) { + /* + * If this packet is an unordered packet + * (indicated by M_UNORDERED_DATA flag), remove + * the additional bytes added to the + * receive socket buffer size. + */ + if ((so->so_flags & SOF_ENABLE_MSGS) && + m->m_len && + (m->m_flags & M_UNORDERED_DATA) && + sbreserve(&so->so_rcv, + so->so_rcv.sb_hiwat - m->m_len)) { + if (so->so_msg_state->msg_uno_bytes > + m->m_len) { + so->so_msg_state-> + msg_uno_bytes -= m->m_len; + } else { + so->so_msg_state-> + msg_uno_bytes = 0; + } + m->m_flags &= ~M_UNORDERED_DATA; + } + + if (mp != NULL) { *mp = m; mp = &m->m_next; so->so_rcv.sb_mb = m = m->m_next; - *mp = (struct mbuf *)0; + *mp = NULL; } else { if (free_list == NULL) free_list = m; @@ -2487,7 +2752,7 @@ dontblock: ml->m_next = m; ml = m; so->so_rcv.sb_mb = m = m->m_next; - ml->m_next = 0; + ml->m_next = NULL; } if (m != NULL) { m->m_nextpkt = nextrecord; @@ -2512,14 +2777,15 @@ dontblock: else copy_flag = M_WAIT; *mp = m_copym(m, 0, len, copy_flag); + /* + * Failed to allocate an mbuf? + * Adjust uio_resid back, it was + * adjusted down by len bytes which + * we didn't copy over. + */ if (*mp == NULL) { - /* - * Failed to allocate an mbuf. - * Adjust uio_resid back, it was - * adjusted down by len bytes which - * we didn't copy over - */ - uio_setresid(uio, (uio_resid(uio) + len)); + uio_setresid(uio, + (uio_resid(uio) + len)); break; } } @@ -2557,7 +2823,7 @@ dontblock: * count but without error. Keep sockbuf locked * against other readers. */ - while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 && + while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == NULL && (uio_resid(uio) - delayed_copy_len) > 0 && !sosendallatonce(so) && !nextrecord) { if (so->so_error || so->so_state & SS_CANTRCVMORE) @@ -2611,30 +2877,28 @@ dontblock: goto release; } m = so->so_rcv.sb_mb; - if (m) { + if (m != NULL) { nextrecord = m->m_nextpkt; } SB_MB_CHECK(&so->so_rcv); } } #ifdef MORE_LOCKING_DEBUG - if (so->so_usecount <= 1) - panic("soreceive: after big while so=%p ref=%d on socket\n", - so, so->so_usecount); + if (so->so_usecount <= 1) { + panic("%s: after big while so=%p ref=%d on socket\n", + __func__, so, so->so_usecount); + /* NOTREACHED */ + } #endif - if (m && pr->pr_flags & PR_ATOMIC) { -#ifdef __APPLE__ + if (m != NULL && pr->pr_flags & PR_ATOMIC) { if (so->so_options & SO_DONTTRUNC) { flags |= MSG_RCVMORE; } else { -#endif flags |= MSG_TRUNC; if ((flags & MSG_PEEK) == 0) (void) sbdroprecord(&so->so_rcv); -#ifdef __APPLE__ } -#endif } /* @@ -2649,7 +2913,7 @@ dontblock: flags |= MSG_HAVEMORE; if ((flags & MSG_PEEK) == 0) { - if (m == 0) { + if (m == NULL) { so->so_rcv.sb_mb = nextrecord; /* * First part is an inline SB_EMPTY_FIXUP(). Second @@ -2669,43 +2933,43 @@ dontblock: if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) (*pr->pr_usrreqs->pru_rcvd)(so, flags); } -#ifdef __APPLE__ + if (delayed_copy_len) { error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len); - if (error) goto release; } - if (free_list) { - m_freem_list((struct mbuf *)free_list); - free_list = (struct mbuf *)0; + if (free_list != NULL) { + m_freem_list(free_list); + free_list = NULL; } if (need_event) postevent(so, 0, EV_OOB); -#endif + if (orig_resid == uio_resid(uio) && orig_resid && (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { - sbunlock(&so->so_rcv, 1); + sbunlock(&so->so_rcv, TRUE); /* keep socket locked */ goto restart; } - if (flagsp) + if (flagsp != NULL) *flagsp |= flags; release: #ifdef MORE_LOCKING_DEBUG - if (so->so_usecount <= 1) - panic("soreceive: release so=%p ref=%d on socket\n", + if (so->so_usecount <= 1) { + panic("%s: release so=%p ref=%d on socket\n", __func__, so, so->so_usecount); + /* NOTREACHED */ + } #endif - if (delayed_copy_len) { + if (delayed_copy_len) error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len); - } - if (free_list) { - m_freem_list((struct mbuf *)free_list); - } - sbunlock(&so->so_rcv, 0); /* will unlock socket */ - // LP64todo - fix this! + if (free_list != NULL) + m_freem_list(free_list); + + sbunlock(&so->so_rcv, FALSE); /* will unlock socket */ + KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, so, uio_resid(uio), so->so_rcv.sb_cc, 0, error); @@ -2718,7 +2982,7 @@ release: */ static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, - int *resid) + user_ssize_t *resid) { int error = 0; struct mbuf *m; @@ -2727,15 +2991,13 @@ sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, socket_unlock(so, 0); - while (m && error == 0) { - + while (m != NULL && error == 0) { error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio); - m = m->m_next; } m_freem_list(*free_list); - *free_list = (struct mbuf *)NULL; + *free_list = NULL; *resid = 0; socket_lock(so, 0); @@ -2743,7 +3005,6 @@ sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, return (error); } - /* * Returns: 0 Success * EINVAL @@ -2819,46 +3080,142 @@ done: return (error); } +void +sowflush(struct socket *so) +{ + struct sockbuf *sb = &so->so_snd; +#ifdef notyet + lck_mtx_t *mutex_held; + /* + * XXX: This code is currently commented out, because we may get here + * as part of sofreelastref(), and at that time, pr_getlock() may no + * longer be able to return us the lock; this will be fixed in future. + */ + if (so->so_proto->pr_getlock != NULL) + mutex_held = (*so->so_proto->pr_getlock)(so, 0); + else + mutex_held = so->so_proto->pr_domain->dom_mtx; + + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); +#endif /* notyet */ + + /* + * Obtain lock on the socket buffer (SB_LOCK). This is required + * to prevent the socket buffer from being unexpectedly altered + * while it is used by another thread in socket send/receive. + * + * sblock() must not fail here, hence the assertion. + */ + (void) sblock(sb, SBL_WAIT | SBL_NOINTR | SBL_IGNDEFUNCT); + VERIFY(sb->sb_flags & SB_LOCK); + + sb->sb_flags &= ~(SB_SEL|SB_UPCALL); + sb->sb_flags |= SB_DROP; + sb->sb_upcall = NULL; + sb->sb_upcallarg = NULL; + + sbunlock(sb, TRUE); /* keep socket locked */ + + selthreadclear(&sb->sb_sel); + sbrelease(sb); +} + void sorflush(struct socket *so) { - register struct sockbuf *sb = &so->so_rcv; - register struct protosw *pr = so->so_proto; + struct sockbuf *sb = &so->so_rcv; + struct protosw *pr = so->so_proto; struct sockbuf asb; - -#ifdef MORE_LOCKING_DEBUG +#ifdef notyet lck_mtx_t *mutex_held; - + /* + * XXX: This code is currently commented out, because we may get here + * as part of sofreelastref(), and at that time, pr_getlock() may no + * longer be able to return us the lock; this will be fixed in future. + */ if (so->so_proto->pr_getlock != NULL) mutex_held = (*so->so_proto->pr_getlock)(so, 0); else mutex_held = so->so_proto->pr_domain->dom_mtx; + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); -#endif +#endif /* notyet */ sflt_notify(so, sock_evt_flush_read, NULL); - sb->sb_flags |= SB_NOINTR; - (void) sblock(sb, M_WAIT); socantrcvmore(so); - sbunlock(sb, 1); -#ifdef __APPLE__ + + /* + * Obtain lock on the socket buffer (SB_LOCK). This is required + * to prevent the socket buffer from being unexpectedly altered + * while it is used by another thread in socket send/receive. + * + * sblock() must not fail here, hence the assertion. + */ + (void) sblock(sb, SBL_WAIT | SBL_NOINTR | SBL_IGNDEFUNCT); + VERIFY(sb->sb_flags & SB_LOCK); + + /* + * Copy only the relevant fields from "sb" to "asb" which we + * need for sbrelease() to function. In particular, skip + * sb_sel as it contains the wait queue linkage, which would + * wreak havoc if we were to issue selthreadclear() on "asb". + * Make sure to not carry over SB_LOCK in "asb", as we need + * to acquire it later as part of sbrelease(). + */ + bzero(&asb, sizeof (asb)); + asb.sb_cc = sb->sb_cc; + asb.sb_hiwat = sb->sb_hiwat; + asb.sb_mbcnt = sb->sb_mbcnt; + asb.sb_mbmax = sb->sb_mbmax; + asb.sb_ctl = sb->sb_ctl; + asb.sb_lowat = sb->sb_lowat; + asb.sb_mb = sb->sb_mb; + asb.sb_mbtail = sb->sb_mbtail; + asb.sb_lastrecord = sb->sb_lastrecord; + asb.sb_so = sb->sb_so; + asb.sb_flags = sb->sb_flags; + asb.sb_flags &= ~(SB_LOCK|SB_SEL|SB_KNOTE|SB_UPCALL); + asb.sb_flags |= SB_DROP; + + /* + * Ideally we'd bzero() these and preserve the ones we need; + * but to do that we'd need to shuffle things around in the + * sockbuf, and we can't do it now because there are KEXTS + * that are directly referring to the socket structure. + * + * Setting SB_DROP acts as a barrier to prevent further appends. + * Clearing SB_SEL is done for selthreadclear() below. + */ + sb->sb_cc = 0; + sb->sb_hiwat = 0; + sb->sb_mbcnt = 0; + sb->sb_mbmax = 0; + sb->sb_ctl = 0; + sb->sb_lowat = 0; + sb->sb_mb = NULL; + sb->sb_mbtail = NULL; + sb->sb_lastrecord = NULL; + sb->sb_timeo.tv_sec = 0; + sb->sb_timeo.tv_usec = 0; + sb->sb_upcall = NULL; + sb->sb_upcallarg = NULL; + sb->sb_flags &= ~(SB_SEL|SB_UPCALL); + sb->sb_flags |= SB_DROP; + + sbunlock(sb, TRUE); /* keep socket locked */ + + /* + * Note that selthreadclear() is called on the original "sb" and + * not the local "asb" because of the way wait queue linkage is + * implemented. Given that selwakeup() may be triggered, SB_SEL + * should no longer be set (cleared above.) + */ selthreadclear(&sb->sb_sel); -#endif - asb = *sb; - bzero((caddr_t)sb, sizeof (*sb)); - sb->sb_so = so; /* reestablish link to socket */ - if (asb.sb_flags & SB_KNOTE) { - sb->sb_sel.si_note = asb.sb_sel.si_note; - sb->sb_flags = SB_KNOTE; - } - if (asb.sb_flags & SB_DROP) - sb->sb_flags |= SB_DROP; - if (asb.sb_flags & SB_UNIX) - sb->sb_flags |= SB_UNIX; - if ((pr->pr_flags & PR_RIGHTS) && pr->pr_domain->dom_dispose) { + + if ((pr->pr_flags & PR_RIGHTS) && pr->pr_domain->dom_dispose) (*pr->pr_domain->dom_dispose)(asb.sb_mb); - } + sbrelease(&asb); } @@ -2904,52 +3261,55 @@ sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen) * the top 32-bits assigning tv64.tv_sec to tv_p->tv_sec. */ static int -sooptcopyin_timeval(struct sockopt *sopt, struct timeval * tv_p) +sooptcopyin_timeval(struct sockopt *sopt, struct timeval *tv_p) { int error; if (proc_is64bit(sopt->sopt_p)) { struct user64_timeval tv64; - if (sopt->sopt_valsize < sizeof(tv64)) { + if (sopt->sopt_valsize < sizeof (tv64)) return (EINVAL); - } - sopt->sopt_valsize = sizeof(tv64); + + sopt->sopt_valsize = sizeof (tv64); if (sopt->sopt_p != kernproc) { - error = copyin(sopt->sopt_val, &tv64, sizeof(tv64)); + error = copyin(sopt->sopt_val, &tv64, sizeof (tv64)); if (error != 0) return (error); } else { bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), &tv64, - sizeof(tv64)); + sizeof (tv64)); } - if (tv64.tv_sec < 0 || tv64.tv_sec > LONG_MAX - || tv64.tv_usec < 0 || tv64.tv_usec >= 1000000) { + if (tv64.tv_sec < 0 || tv64.tv_sec > LONG_MAX || + tv64.tv_usec < 0 || tv64.tv_usec >= 1000000) return (EDOM); - } + tv_p->tv_sec = tv64.tv_sec; tv_p->tv_usec = tv64.tv_usec; } else { struct user32_timeval tv32; - if (sopt->sopt_valsize < sizeof(tv32)) { + if (sopt->sopt_valsize < sizeof (tv32)) return (EINVAL); - } - sopt->sopt_valsize = sizeof(tv32); + + sopt->sopt_valsize = sizeof (tv32); if (sopt->sopt_p != kernproc) { - error = copyin(sopt->sopt_val, &tv32, sizeof(tv32)); + error = copyin(sopt->sopt_val, &tv32, sizeof (tv32)); if (error != 0) { return (error); } } else { bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), &tv32, - sizeof(tv32)); + sizeof (tv32)); } -#ifndef __LP64__ // K64todo "comparison is always false due to limited range of data type" - if (tv32.tv_sec < 0 || tv32.tv_sec > LONG_MAX - || tv32.tv_usec < 0 || tv32.tv_usec >= 1000000) { +#ifndef __LP64__ + /* + * K64todo "comparison is always false due to + * limited range of data type" + */ + if (tv32.tv_sec < 0 || tv32.tv_sec > LONG_MAX || + tv32.tv_usec < 0 || tv32.tv_usec >= 1000000) return (EDOM); - } #endif tv_p->tv_sec = tv32.tv_sec; tv_p->tv_usec = tv32.tv_usec; @@ -2978,7 +3338,7 @@ sooptcopyin_timeval(struct sockopt *sopt, struct timeval * tv_p) * their filter to return. */ int -sosetopt(struct socket *so, struct sockopt *sopt) +sosetoptlock(struct socket *so, struct sockopt *sopt, int dolock) { int error, optval; struct linger l; @@ -2987,46 +3347,57 @@ sosetopt(struct socket *so, struct sockopt *sopt) struct mac extmac; #endif /* MAC_SOCKET */ - socket_lock(so, 1); - - if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) - == (SS_CANTRCVMORE | SS_CANTSENDMORE) && + if (sopt->sopt_dir != SOPT_SET) + sopt->sopt_dir = SOPT_SET; + + if (dolock) + socket_lock(so, 1); + + if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) == + (SS_CANTRCVMORE | SS_CANTSENDMORE) && (so->so_flags & SOF_NPX_SETOPTSHUT) == 0) { /* the socket has been shutdown, no more sockopt's */ error = EINVAL; - goto bad; - } - - if (sopt->sopt_dir != SOPT_SET) { - sopt->sopt_dir = SOPT_SET; + goto out; } error = sflt_setsockopt(so, sopt); - if (error) { + if (error != 0) { if (error == EJUSTRETURN) error = 0; - goto bad; + goto out; } - error = 0; if (sopt->sopt_level != SOL_SOCKET) { - if (so->so_proto && so->so_proto->pr_ctloutput) { + if (so->so_proto != NULL && + so->so_proto->pr_ctloutput != NULL) { error = (*so->so_proto->pr_ctloutput)(so, sopt); - socket_unlock(so, 1); - return (error); + goto out; } error = ENOPROTOOPT; } else { + /* + * Allow socket-level (SOL_SOCKET) options to be filtered by + * the protocol layer, if needed. A zero value returned from + * the handler means use default socket-level processing as + * done by the rest of this routine. Otherwise, any other + * return value indicates that the option is unsupported. + */ + if (so->so_proto != NULL && (error = so->so_proto->pr_usrreqs-> + pru_socheckopt(so, sopt)) != 0) + goto out; + + error = 0; switch (sopt->sopt_name) { case SO_LINGER: case SO_LINGER_SEC: error = sooptcopyin(sopt, &l, sizeof (l), sizeof (l)); - if (error) - goto bad; + if (error != 0) + goto out; so->so_linger = (sopt->sopt_name == SO_LINGER) ? l.l_linger : l.l_linger * hz; - if (l.l_onoff) + if (l.l_onoff != 0) so->so_options |= SO_LINGER; else so->so_options &= ~SO_LINGER; @@ -3042,15 +3413,13 @@ sosetopt(struct socket *so, struct sockopt *sopt) case SO_OOBINLINE: case SO_TIMESTAMP: case SO_TIMESTAMP_MONOTONIC: -#ifdef __APPLE__ case SO_DONTTRUNC: case SO_WANTMORE: case SO_WANTOOBFLAG: -#endif error = sooptcopyin(sopt, &optval, sizeof (optval), sizeof (optval)); - if (error) - goto bad; + if (error != 0) + goto out; if (optval) so->so_options |= sopt->sopt_name; else @@ -3063,8 +3432,8 @@ sosetopt(struct socket *so, struct sockopt *sopt) case SO_RCVLOWAT: error = sooptcopyin(sopt, &optval, sizeof (optval), sizeof (optval)); - if (error) - goto bad; + if (error != 0) + goto out; /* * Values < 1 make no sense for any of these @@ -3072,25 +3441,24 @@ sosetopt(struct socket *so, struct sockopt *sopt) */ if (optval < 1) { error = EINVAL; - goto bad; + goto out; } switch (sopt->sopt_name) { case SO_SNDBUF: - case SO_RCVBUF: - { - struct sockbuf *sb = (sopt->sopt_name == SO_SNDBUF) ? - &so->so_snd : &so->so_rcv; - if (sbreserve(sb, (u_int32_t) optval) == 0) { + case SO_RCVBUF: { + struct sockbuf *sb = + (sopt->sopt_name == SO_SNDBUF) ? + &so->so_snd : &so->so_rcv; + if (sbreserve(sb, (u_int32_t)optval) == 0) { error = ENOBUFS; - goto bad; + goto out; } sb->sb_flags |= SB_USRSIZE; sb->sb_flags &= ~SB_AUTOSIZE; sb->sb_idealsize = (u_int32_t)optval; break; } - /* * Make sure the low-water is never greater than * the high-water. @@ -3111,8 +3479,8 @@ sosetopt(struct socket *so, struct sockopt *sopt) case SO_SNDTIMEO: case SO_RCVTIMEO: error = sooptcopyin_timeval(sopt, &tv); - if (error) - goto bad; + if (error != 0) + goto out; switch (sopt->sopt_name) { case SO_SNDTIMEO: @@ -3124,14 +3492,13 @@ sosetopt(struct socket *so, struct sockopt *sopt) } break; - case SO_NKE: - { + case SO_NKE: { struct so_nke nke; error = sooptcopyin(sopt, &nke, sizeof (nke), sizeof (nke)); - if (error) - goto bad; + if (error != 0) + goto out; error = sflt_attach_internal(so, nke.nke_handle); break; @@ -3140,71 +3507,65 @@ sosetopt(struct socket *so, struct sockopt *sopt) case SO_NOSIGPIPE: error = sooptcopyin(sopt, &optval, sizeof (optval), sizeof (optval)); - if (error) - goto bad; - if (optval) + if (error != 0) + goto out; + if (optval != 0) so->so_flags |= SOF_NOSIGPIPE; else so->so_flags &= ~SOF_NOSIGPIPE; - break; case SO_NOADDRERR: error = sooptcopyin(sopt, &optval, sizeof (optval), sizeof (optval)); - if (error) - goto bad; - if (optval) + if (error != 0) + goto out; + if (optval != 0) so->so_flags |= SOF_NOADDRAVAIL; else so->so_flags &= ~SOF_NOADDRAVAIL; - break; case SO_REUSESHAREUID: error = sooptcopyin(sopt, &optval, sizeof (optval), sizeof (optval)); - if (error) - goto bad; - if (optval) + if (error != 0) + goto out; + if (optval != 0) so->so_flags |= SOF_REUSESHAREUID; else so->so_flags &= ~SOF_REUSESHAREUID; break; -#ifdef __APPLE_API_PRIVATE + case SO_NOTIFYCONFLICT: if (kauth_cred_issuser(kauth_cred_get()) == 0) { error = EPERM; - goto bad; + goto out; } error = sooptcopyin(sopt, &optval, sizeof (optval), sizeof (optval)); - if (error) - goto bad; - if (optval) + if (error != 0) + goto out; + if (optval != 0) so->so_flags |= SOF_NOTIFYCONFLICT; else so->so_flags &= ~SOF_NOTIFYCONFLICT; break; -#endif + case SO_RESTRICTIONS: - if (kauth_cred_issuser(kauth_cred_get()) == 0) { - error = EPERM; - goto bad; - } error = sooptcopyin(sopt, &optval, sizeof (optval), sizeof (optval)); - if (error) - goto bad; - so->so_restrictions = (optval & (SO_RESTRICT_DENYIN | - SO_RESTRICT_DENYOUT | SO_RESTRICT_DENYSET)); + if (error != 0) + goto out; + + error = so_set_restrictions(so, optval); break; case SO_LABEL: #if CONFIG_MACF_SOCKET if ((error = sooptcopyin(sopt, &extmac, sizeof (extmac), sizeof (extmac))) != 0) - goto bad; + goto out; error = mac_setsockopt_label(proc_ucred(sopt->sopt_p), so, &extmac); @@ -3213,25 +3574,23 @@ sosetopt(struct socket *so, struct sockopt *sopt) #endif /* MAC_SOCKET */ break; -#ifdef __APPLE_API_PRIVATE case SO_UPCALLCLOSEWAIT: error = sooptcopyin(sopt, &optval, sizeof (optval), sizeof (optval)); - if (error) - goto bad; - if (optval) + if (error != 0) + goto out; + if (optval != 0) so->so_flags |= SOF_UPCALLCLOSEWAIT; else so->so_flags &= ~SOF_UPCALLCLOSEWAIT; break; -#endif case SO_RANDOMPORT: error = sooptcopyin(sopt, &optval, sizeof (optval), sizeof (optval)); - if (error) - goto bad; - if (optval) + if (error != 0) + goto out; + if (optval != 0) so->so_flags |= SOF_BINDRANDOMPORT; else so->so_flags &= ~SOF_BINDRANDOMPORT; @@ -3240,12 +3599,13 @@ sosetopt(struct socket *so, struct sockopt *sopt) case SO_NP_EXTENSIONS: { struct so_np_extensions sonpx; - error = sooptcopyin(sopt, &sonpx, sizeof(sonpx), sizeof(sonpx)); - if (error) - goto bad; + error = sooptcopyin(sopt, &sonpx, sizeof (sonpx), + sizeof (sonpx)); + if (error != 0) + goto out; if (sonpx.npx_mask & ~SONPX_MASK_VALID) { error = EINVAL; - goto bad; + goto out; } /* * Only one bit defined for now @@ -3261,20 +3621,20 @@ sosetopt(struct socket *so, struct sockopt *sopt) case SO_TRAFFIC_CLASS: { error = sooptcopyin(sopt, &optval, sizeof (optval), - sizeof (optval)); - if (error) - goto bad; + sizeof (optval)); + if (error != 0) + goto out; error = so_set_traffic_class(so, optval); - if (error) - goto bad; + if (error != 0) + goto out; break; } case SO_RECV_TRAFFIC_CLASS: { error = sooptcopyin(sopt, &optval, sizeof (optval), - sizeof (optval)); - if (error) - goto bad; + sizeof (optval)); + if (error != 0) + goto out; if (optval == 0) so->so_flags &= ~SOF_RECV_TRAFFIC_CLASS; else @@ -3287,23 +3647,23 @@ sosetopt(struct socket *so, struct sockopt *sopt) error = sooptcopyin(sopt, &so_tcdbg, sizeof (struct so_tcdbg), sizeof (struct so_tcdbg)); - if (error) - goto bad; + if (error != 0) + goto out; error = so_set_tcdbg(so, &so_tcdbg); - if (error) - goto bad; + if (error != 0) + goto out; break; } case SO_PRIVILEGED_TRAFFIC_CLASS: error = priv_check_cred(kauth_cred_get(), PRIV_NET_PRIVILEGED_TRAFFIC_CLASS, 0); - if (error) - goto bad; + if (error != 0) + goto out; error = sooptcopyin(sopt, &optval, sizeof (optval), - sizeof (optval)); - if (error) - goto bad; + sizeof (optval)); + if (error != 0) + goto out; if (optval == 0) so->so_flags &= ~SOF_PRIVILEGED_TRAFFIC_CLASS; else @@ -3316,7 +3676,7 @@ sosetopt(struct socket *so, struct sockopt *sopt) if (error != 0 || (so->so_flags & SOF_DEFUNCT)) { if (error == 0) error = EBADF; - goto bad; + goto out; } /* * Any process can set SO_DEFUNCTOK (clear @@ -3326,18 +3686,45 @@ sosetopt(struct socket *so, struct sockopt *sopt) if (optval == 0 && kauth_cred_issuser(kauth_cred_get()) == 0) { error = EPERM; - goto bad; + goto out; } if (optval) so->so_flags &= ~SOF_NODEFUNCT; else so->so_flags |= SOF_NODEFUNCT; - SODEFUNCTLOG(("%s[%d]: so %p [%d,%d] is now marked as " - "%seligible for defunct\n", __func__, - proc_selfpid(), so, INP_SOCKAF(so), - INP_SOCKTYPE(so), - (so->so_flags & SOF_NODEFUNCT) ? "not " : "")); + if (SOCK_DOM(so) == PF_INET || + SOCK_DOM(so) == PF_INET6) { + char s[MAX_IPv6_STR_LEN]; + char d[MAX_IPv6_STR_LEN]; + struct inpcb *inp = sotoinpcb(so); + + SODEFUNCTLOG(("%s[%d]: so 0x%llx [%s %s:%d -> " + "%s:%d] is now marked as %seligible for " + "defunct\n", __func__, proc_selfpid(), + (uint64_t)VM_KERNEL_ADDRPERM(so), + (SOCK_TYPE(so) == SOCK_STREAM) ? + "TCP" : "UDP", inet_ntop(SOCK_DOM(so), + ((SOCK_DOM(so) == PF_INET) ? + (void *)&inp->inp_laddr.s_addr : + (void *)&inp->in6p_laddr), s, sizeof (s)), + ntohs(inp->in6p_lport), + inet_ntop(SOCK_DOM(so), + (SOCK_DOM(so) == PF_INET) ? + (void *)&inp->inp_faddr.s_addr : + (void *)&inp->in6p_faddr, d, sizeof (d)), + ntohs(inp->in6p_fport), + (so->so_flags & SOF_NODEFUNCT) ? + "not " : "")); + } else { + SODEFUNCTLOG(("%s[%d]: so 0x%llx [%d,%d] is " + "now marked as %seligible for defunct\n", + __func__, proc_selfpid(), + (uint64_t)VM_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so), + (so->so_flags & SOF_NODEFUNCT) ? + "not " : "")); + } break; case SO_ISDEFUNCT: @@ -3364,16 +3751,50 @@ sosetopt(struct socket *so, struct sockopt *sopt) error = so_set_recv_anyif(so, optval); break; + case SO_TRAFFIC_MGT_BACKGROUND: { + /* This option is handled by lower layer(s) */ + error = 0; + break; + } + +#if FLOW_DIVERT + case SO_FLOW_DIVERT_TOKEN: + error = flow_divert_token_set(so, sopt); + break; +#endif /* FLOW_DIVERT */ + + + case SO_DELEGATED: + if ((error = sooptcopyin(sopt, &optval, sizeof (optval), + sizeof (optval))) != 0) + break; + + error = so_set_effective_pid(so, optval, sopt->sopt_p); + break; + + case SO_DELEGATED_UUID: { + uuid_t euuid; + + if ((error = sooptcopyin(sopt, &euuid, sizeof (euuid), + sizeof (euuid))) != 0) + break; + + error = so_set_effective_uuid(so, euuid, sopt->sopt_p); + break; + } + default: error = ENOPROTOOPT; break; } - if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { - (void) ((*so->so_proto->pr_ctloutput)(so, sopt)); + if (error == 0 && so->so_proto != NULL && + so->so_proto->pr_ctloutput != NULL) { + (void) so->so_proto->pr_ctloutput(so, sopt); } } -bad: - socket_unlock(so, 1); +out: + if (dolock) + socket_unlock(so, 1); return (error); } @@ -3407,7 +3828,7 @@ sooptcopyout(struct sockopt *sopt, void *buf, size_t len) } static int -sooptcopyout_timeval(struct sockopt *sopt, const struct timeval * tv_p) +sooptcopyout_timeval(struct sockopt *sopt, const struct timeval *tv_p) { int error; size_t len; @@ -3418,12 +3839,12 @@ sooptcopyout_timeval(struct sockopt *sopt, const struct timeval * tv_p) error = 0; if (proc_is64bit(sopt->sopt_p)) { - len = sizeof(tv64); + len = sizeof (tv64); tv64.tv_sec = tv_p->tv_sec; tv64.tv_usec = tv_p->tv_usec; val = &tv64; } else { - len = sizeof(tv32); + len = sizeof (tv32); tv32.tv_sec = tv_p->tv_sec; tv32.tv_usec = tv_p->tv_usec; val = &tv32; @@ -3447,7 +3868,7 @@ sooptcopyout_timeval(struct sockopt *sopt, const struct timeval * tv_p) * :??? */ int -sogetopt(struct socket *so, struct sockopt *sopt) +sogetoptlock(struct socket *so, struct sockopt *sopt, int dolock) { int error, optval; struct linger l; @@ -3456,35 +3877,43 @@ sogetopt(struct socket *so, struct sockopt *sopt) struct mac extmac; #endif /* MAC_SOCKET */ - if (sopt->sopt_dir != SOPT_GET) { + if (sopt->sopt_dir != SOPT_GET) sopt->sopt_dir = SOPT_GET; - } - socket_lock(so, 1); + if (dolock) + socket_lock(so, 1); error = sflt_getsockopt(so, sopt); - if (error) { + if (error != 0) { if (error == EJUSTRETURN) error = 0; - socket_unlock(so, 1); - return (error); + goto out; } - - error = 0; + if (sopt->sopt_level != SOL_SOCKET) { - if (so->so_proto && so->so_proto->pr_ctloutput) { + if (so->so_proto != NULL && + so->so_proto->pr_ctloutput != NULL) { error = (*so->so_proto->pr_ctloutput)(so, sopt); - socket_unlock(so, 1); - return (error); - } else { - socket_unlock(so, 1); - return (ENOPROTOOPT); + goto out; } + error = ENOPROTOOPT; } else { + /* + * Allow socket-level (SOL_SOCKET) options to be filtered by + * the protocol layer, if needed. A zero value returned from + * the handler means use default socket-level processing as + * done by the rest of this routine. Otherwise, any other + * return value indicates that the option is unsupported. + */ + if (so->so_proto != NULL && (error = so->so_proto->pr_usrreqs-> + pru_socheckopt(so, sopt)) != 0) + goto out; + + error = 0; switch (sopt->sopt_name) { case SO_LINGER: case SO_LINGER_SEC: - l.l_onoff = so->so_options & SO_LINGER; + l.l_onoff = ((so->so_options & SO_LINGER) ? 1 : 0); l.l_linger = (sopt->sopt_name == SO_LINGER) ? so->so_linger : so->so_linger / hz; error = sooptcopyout(sopt, &l, sizeof (l)); @@ -3500,11 +3929,9 @@ sogetopt(struct socket *so, struct sockopt *sopt) case SO_OOBINLINE: case SO_TIMESTAMP: case SO_TIMESTAMP_MONOTONIC: -#ifdef __APPLE__ case SO_DONTTRUNC: case SO_WANTMORE: case SO_WANTOOBFLAG: -#endif optval = so->so_options & sopt->sopt_name; integer: error = sooptcopyout(sopt, &optval, sizeof (optval)); @@ -3514,7 +3941,6 @@ integer: optval = so->so_type; goto integer; -#ifdef __APPLE__ case SO_NREAD: if (so->so_proto->pr_flags & PR_ATOMIC) { int pkt_total; @@ -3522,9 +3948,10 @@ integer: pkt_total = 0; m1 = so->so_rcv.sb_mb; - while (m1) { - if (m1->m_type == MT_DATA || m1->m_type == MT_HEADER || - m1->m_type == MT_OOBDATA) + while (m1 != NULL) { + if (m1->m_type == MT_DATA || + m1->m_type == MT_HEADER || + m1->m_type == MT_OOBDATA) pkt_total += m1->m_len; m1 = m1->m_next; } @@ -3533,11 +3960,11 @@ integer: optval = so->so_rcv.sb_cc - so->so_rcv.sb_ctl; } goto integer; - + case SO_NWRITE: optval = so->so_snd.sb_cc; goto integer; -#endif + case SO_ERROR: optval = so->so_error; so->so_error = 0; @@ -3579,14 +4006,13 @@ integer: optval = (so->so_flags & SOF_REUSESHAREUID); goto integer; -#ifdef __APPLE_API_PRIVATE + case SO_NOTIFYCONFLICT: optval = (so->so_flags & SOF_NOTIFYCONFLICT); goto integer; -#endif + case SO_RESTRICTIONS: - optval = so->so_restrictions & (SO_RESTRICT_DENYIN | - SO_RESTRICT_DENYOUT | SO_RESTRICT_DENYSET); + optval = so_get_restrictions(so); goto integer; case SO_LABEL: @@ -3629,11 +4055,13 @@ integer: case SO_NP_EXTENSIONS: { struct so_np_extensions sonpx; - sonpx.npx_flags = (so->so_flags & SOF_NPX_SETOPTSHUT) ? SONPX_SETOPTSHUT : 0; + sonpx.npx_flags = (so->so_flags & SOF_NPX_SETOPTSHUT) ? + SONPX_SETOPTSHUT : 0; sonpx.npx_mask = SONPX_MASK_VALID; - error = sooptcopyout(sopt, &sonpx, sizeof(struct so_np_extensions)); - break; + error = sooptcopyout(sopt, &sonpx, + sizeof (struct so_np_extensions)); + break; } case SO_TRAFFIC_CLASS: @@ -3645,10 +4073,11 @@ integer: goto integer; case SO_TRAFFIC_CLASS_STATS: - error = sooptcopyout(sopt, &so->so_tc_stats, sizeof(so->so_tc_stats)); + error = sooptcopyout(sopt, &so->so_tc_stats, + sizeof (so->so_tc_stats)); break; - case SO_TRAFFIC_CLASS_DBG: + case SO_TRAFFIC_CLASS_DBG: error = sogetopt_tcdbg(so, sopt); break; @@ -3677,15 +4106,33 @@ integer: optval = so_get_recv_anyif(so); goto integer; + case SO_TRAFFIC_MGT_BACKGROUND: + /* This option is handled by lower layer(s) */ + if (so->so_proto != NULL && + so->so_proto->pr_ctloutput != NULL) { + (void) so->so_proto->pr_ctloutput(so, sopt); + } + break; + +#if FLOW_DIVERT + case SO_FLOW_DIVERT_TOKEN: + error = flow_divert_token_get(so, sopt); + break; +#endif /* FLOW_DIVERT */ + default: error = ENOPROTOOPT; break; } - socket_unlock(so, 1); - return (error); } +out: + if (dolock) + socket_unlock(so, 1); + return (error); } -/* The size limits on our soopt_getm is different from that on FreeBSD. + +/* + * The size limits on our soopt_getm is different from that on FreeBSD. * We limit the size of options to MCLBYTES. This will have to change * if we need to define options that need more space than MCLBYTES. */ @@ -3701,7 +4148,7 @@ soopt_getm(struct sockopt *sopt, struct mbuf **mp) how = sopt->sopt_p != kernproc ? M_WAIT : M_DONTWAIT; MGET(m, how, MT_DATA); - if (m == 0) + if (m == NULL) return (ENOBUFS); if (sopt_size > MLEN) { MCLGET(m, how); @@ -3719,7 +4166,7 @@ soopt_getm(struct sockopt *sopt, struct mbuf **mp) while (sopt_size > 0) { MGET(m, how, MT_DATA); - if (m == 0) { + if (m == NULL) { m_freem(*mp); return (ENOBUFS); } @@ -3767,8 +4214,11 @@ soopt_mcopyin(struct sockopt *sopt, struct mbuf *m) sopt->sopt_val += m->m_len; m = m->m_next; } - if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */ + /* should be allocated enoughly at ip6_sooptmcopyin() */ + if (m != NULL) { panic("soopt_mcopyin"); + /* NOTREACHED */ + } return (0); } @@ -3812,7 +4262,6 @@ soopt_mcopyout(struct sockopt *sopt, struct mbuf *m) void sohasoutofband(struct socket *so) { - if (so->so_pgid < 0) gsignal(-so->so_pgid, SIGURG); else if (so->so_pgid > 0) @@ -3821,12 +4270,15 @@ sohasoutofband(struct socket *so) } int -sopoll(struct socket *so, int events, __unused kauth_cred_t cred, void * wql) +sopoll(struct socket *so, int events, kauth_cred_t cred, void * wql) { +#pragma unused(cred) struct proc *p = current_proc(); int revents = 0; socket_lock(so, 1); + so_update_last_owner_locked(so, PROC_NULL); + so_update_policy(so); if (events & (POLLIN | POLLRDNORM)) if (soreadable(so)) @@ -3865,16 +4317,22 @@ sopoll(struct socket *so, int events, __unused kauth_cred_t cred, void * wql) } int -soo_kqfilter(__unused struct fileproc *fp, struct knote *kn, - __unused struct proc *p) +soo_kqfilter(struct fileproc *fp, struct knote *kn, vfs_context_t ctx) { +#pragma unused(fp) +#if !CONFIG_MACF_SOCKET +#pragma unused(ctx) +#endif /* MAC_SOCKET */ struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; struct klist *skl; socket_lock(so, 1); + so_update_last_owner_locked(so, PROC_NULL); + so_update_policy(so); #if CONFIG_MACF_SOCKET - if (mac_socket_check_kqfilter(proc_ucred(p), kn, so) != 0) { + if (mac_socket_check_kqfilter(proc_ucred(vfs_context_proc(ctx)), + kn, so) != 0) { socket_unlock(so, 1); return (1); } @@ -3899,7 +4357,7 @@ soo_kqfilter(__unused struct fileproc *fp, struct knote *kn, } if (KNOTE_ATTACH(skl, kn)) { - switch(kn->kn_filter) { + switch (kn->kn_filter) { case EVFILT_READ: so->so_rcv.sb_flags |= SB_KNOTE; break; @@ -3942,9 +4400,10 @@ filt_soread(struct knote *kn, long hint) if (so->so_options & SO_ACCEPTCONN) { int isempty; - /* Radar 6615193 handle the listen case dynamically - * for kqueue read filter. This allows to call listen() after registering - * the kqueue EVFILT_READ. + /* + * Radar 6615193 handle the listen case dynamically + * for kqueue read filter. This allows to call listen() + * after registering the kqueue EVFILT_READ. */ kn->kn_data = so->so_qlen; @@ -4000,17 +4459,16 @@ filt_soread(struct knote *kn, long hint) } int64_t lowwat = so->so_rcv.sb_lowat; - if (kn->kn_sfflags & NOTE_LOWAT) - { + if (kn->kn_sfflags & NOTE_LOWAT) { if (kn->kn_sdata > so->so_rcv.sb_hiwat) lowwat = so->so_rcv.sb_hiwat; else if (kn->kn_sdata > lowwat) lowwat = kn->kn_sdata; } - + if ((hint & SO_FILT_HINT_LOCKED) == 0) socket_unlock(so, 1); - + return ((kn->kn_flags & EV_OOBAND) || kn->kn_data >= lowwat); } @@ -4029,8 +4487,7 @@ filt_sowdetach(struct knote *kn) int so_wait_for_if_feedback(struct socket *so) { - if ((so->so_proto->pr_domain->dom_family == AF_INET || - so->so_proto->pr_domain->dom_family == AF_INET6) && + if ((SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) && (so->so_state & SS_ISCONNECTED)) { struct inpcb *inp = sotoinpcb(so); if (INP_WAIT_FOR_IF_FEEDBACK(inp)) @@ -4066,8 +4523,7 @@ filt_sowrite(struct knote *kn, long hint) goto out; } int64_t lowwat = so->so_snd.sb_lowat; - if (kn->kn_sfflags & NOTE_LOWAT) - { + if (kn->kn_sfflags & NOTE_LOWAT) { if (kn->kn_sdata > so->so_snd.sb_hiwat) lowwat = so->so_snd.sb_hiwat; else if (kn->kn_sdata > lowwat) @@ -4085,7 +4541,7 @@ filt_sowrite(struct knote *kn, long hint) out: if ((hint & SO_FILT_HINT_LOCKED) == 0) socket_unlock(so, 1); - return(ret); + return (ret); } static void @@ -4093,7 +4549,7 @@ filt_sockdetach(struct knote *kn) { struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; socket_lock(so, 1); - + if ((so->so_flags & SOF_KNOTE) != 0) if (KNOTE_DETACH(&so->so_klist, kn)) so->so_flags &= ~SOF_KNOTE; @@ -4105,55 +4561,75 @@ filt_sockev(struct knote *kn, long hint) { int ret = 0, locked = 0; struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; + long ev_hint = (hint & SO_FILT_HINT_EV); if ((hint & SO_FILT_HINT_LOCKED) == 0) { socket_lock(so, 1); locked = 1; } - switch (hint & SO_FILT_HINT_EV) { - case SO_FILT_HINT_CONNRESET: + if (ev_hint & SO_FILT_HINT_CONNRESET) { if (kn->kn_sfflags & NOTE_CONNRESET) kn->kn_fflags |= NOTE_CONNRESET; - break; - case SO_FILT_HINT_TIMEOUT: + } + if (ev_hint & SO_FILT_HINT_TIMEOUT) { if (kn->kn_sfflags & NOTE_TIMEOUT) kn->kn_fflags |= NOTE_TIMEOUT; - break; - case SO_FILT_HINT_NOSRCADDR: + } + if (ev_hint & SO_FILT_HINT_NOSRCADDR) { if (kn->kn_sfflags & NOTE_NOSRCADDR) kn->kn_fflags |= NOTE_NOSRCADDR; - break; - case SO_FILT_HINT_IFDENIED: + } + if (ev_hint & SO_FILT_HINT_IFDENIED) { if ((kn->kn_sfflags & NOTE_IFDENIED)) kn->kn_fflags |= NOTE_IFDENIED; - break; - case SO_FILT_HINT_KEEPALIVE: + } + if (ev_hint & SO_FILT_HINT_KEEPALIVE) { if (kn->kn_sfflags & NOTE_KEEPALIVE) kn->kn_fflags |= NOTE_KEEPALIVE; } + if (ev_hint & SO_FILT_HINT_ADAPTIVE_WTIMO) { + if (kn->kn_sfflags & NOTE_ADAPTIVE_WTIMO) + kn->kn_fflags |= NOTE_ADAPTIVE_WTIMO; + } + if (ev_hint & SO_FILT_HINT_ADAPTIVE_RTIMO) { + if (kn->kn_sfflags & NOTE_ADAPTIVE_RTIMO) + kn->kn_fflags |= NOTE_ADAPTIVE_RTIMO; + } + if (ev_hint & SO_FILT_HINT_CONNECTED) { + if (kn->kn_sfflags & NOTE_CONNECTED) + kn->kn_fflags |= NOTE_CONNECTED; + } + if (ev_hint & SO_FILT_HINT_DISCONNECTED) { + if (kn->kn_sfflags & NOTE_DISCONNECTED) + kn->kn_fflags |= NOTE_DISCONNECTED; + } + if (ev_hint & SO_FILT_HINT_CONNINFO_UPDATED) { + if (so->so_proto != NULL && + (so->so_proto->pr_flags & PR_EVCONNINFO) && + (kn->kn_sfflags & NOTE_CONNINFO_UPDATED)) + kn->kn_fflags |= NOTE_CONNINFO_UPDATED; + } if ((kn->kn_sfflags & NOTE_READCLOSED) && - (so->so_state & SS_CANTRCVMORE)) + (so->so_state & SS_CANTRCVMORE)) kn->kn_fflags |= NOTE_READCLOSED; if ((kn->kn_sfflags & NOTE_WRITECLOSED) && - (so->so_state & SS_CANTSENDMORE)) + (so->so_state & SS_CANTSENDMORE)) kn->kn_fflags |= NOTE_WRITECLOSED; if ((kn->kn_sfflags & NOTE_SUSPEND) && - ((hint & SO_FILT_HINT_SUSPEND) || + ((ev_hint & SO_FILT_HINT_SUSPEND) || (so->so_flags & SOF_SUSPENDED))) { - kn->kn_fflags &= - ~(NOTE_SUSPEND | NOTE_RESUME); + kn->kn_fflags &= ~(NOTE_SUSPEND | NOTE_RESUME); kn->kn_fflags |= NOTE_SUSPEND; } if ((kn->kn_sfflags & NOTE_RESUME) && - ((hint & SO_FILT_HINT_RESUME) || + ((ev_hint & SO_FILT_HINT_RESUME) || (so->so_flags & SOF_SUSPENDED) == 0)) { - kn->kn_fflags &= - ~(NOTE_SUSPEND | NOTE_RESUME); + kn->kn_fflags &= ~(NOTE_SUSPEND | NOTE_RESUME); kn->kn_fflags |= NOTE_RESUME; } @@ -4171,38 +4647,40 @@ filt_sockev(struct knote *kn, long hint) if (locked) socket_unlock(so, 1); - return(ret); + return (ret); } void -get_sockev_state(struct socket *so, u_int32_t *statep) { +get_sockev_state(struct socket *so, u_int32_t *statep) +{ u_int32_t state = *(statep); - if (so->so_state & SS_ISCONNECTED) + if (so->so_state & SS_ISCONNECTED) state |= SOCKEV_CONNECTED; - else + else state &= ~(SOCKEV_CONNECTED); - state |= ((so->so_state & SS_ISDISCONNECTED) ? - SOCKEV_DISCONNECTED : 0); + state |= ((so->so_state & SS_ISDISCONNECTED) ? SOCKEV_DISCONNECTED : 0); *(statep) = state; - return; } -#define SO_LOCK_HISTORY_STR_LEN (2 * SO_LCKDBG_MAX * (2 + (2 * sizeof(void *)) + 1) + 1) +#define SO_LOCK_HISTORY_STR_LEN \ + (2 * SO_LCKDBG_MAX * (2 + (2 * sizeof (void *)) + 1) + 1) -__private_extern__ const char * solockhistory_nr(struct socket *so) +__private_extern__ const char * +solockhistory_nr(struct socket *so) { - size_t n = 0; - int i; - static char lock_history_str[SO_LOCK_HISTORY_STR_LEN]; - - bzero(lock_history_str, sizeof(lock_history_str)); - for (i = SO_LCKDBG_MAX - 1; i >= 0; i--) { - n += snprintf(lock_history_str + n, SO_LOCK_HISTORY_STR_LEN - n, "%lx:%lx ", - (uintptr_t) so->lock_lr[(so->next_lock_lr + i) % SO_LCKDBG_MAX], - (uintptr_t) so->unlock_lr[(so->next_unlock_lr + i) % SO_LCKDBG_MAX]); + size_t n = 0; + int i; + static char lock_history_str[SO_LOCK_HISTORY_STR_LEN]; + + bzero(lock_history_str, sizeof (lock_history_str)); + for (i = SO_LCKDBG_MAX - 1; i >= 0; i--) { + n += snprintf(lock_history_str + n, + SO_LOCK_HISTORY_STR_LEN - n, "%p:%p ", + so->lock_lr[(so->next_lock_lr + i) % SO_LCKDBG_MAX], + so->unlock_lr[(so->next_unlock_lr + i) % SO_LCKDBG_MAX]); } - return lock_history_str; + return (lock_history_str); } int @@ -4239,8 +4717,10 @@ socket_unlock(struct socket *so, int refcount) lr_saved = __builtin_return_address(0); - if (so->so_proto == NULL) - panic("socket_unlock null so_proto so=%p\n", so); + if (so->so_proto == NULL) { + panic("%s: null so_proto so=%p\n", __func__, so); + /* NOTREACHED */ + } if (so && so->so_proto->pr_unlock) { error = (*so->so_proto->pr_unlock)(so, refcount, lr_saved); @@ -4253,16 +4733,17 @@ socket_unlock(struct socket *so, int refcount) so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; if (refcount) { - if (so->so_usecount <= 0) - panic("socket_unlock: bad refcount=%d so=%p (%d, %d, %d) lrh=%s", - so->so_usecount, so, so->so_proto->pr_domain->dom_family, - so->so_type, so->so_proto->pr_protocol, - solockhistory_nr(so)); - + if (so->so_usecount <= 0) { + panic("%s: bad refcount=%d so=%p (%d, %d, %d) " + "lrh=%s", __func__, so->so_usecount, so, + SOCK_DOM(so), so->so_type, + SOCK_PROTO(so), solockhistory_nr(so)); + /* NOTREACHED */ + } + so->so_usecount--; - if (so->so_usecount == 0) { + if (so->so_usecount == 0) sofreelastref(so, 1); - } } lck_mtx_unlock(mutex_held); } @@ -4274,8 +4755,8 @@ socket_unlock(struct socket *so, int refcount) void sofree(struct socket *so) { - lck_mtx_t *mutex_held; + if (so->so_proto->pr_getlock != NULL) mutex_held = (*so->so_proto->pr_getlock)(so, 0); else @@ -4318,50 +4799,71 @@ so_isdstlocal(struct socket *so) { struct inpcb *inp = (struct inpcb *)so->so_pcb; - if (so->so_proto->pr_domain->dom_family == AF_INET) { - return inaddr_local(inp->inp_faddr); - } else if (so->so_proto->pr_domain->dom_family == AF_INET6) { - return in6addr_local(&inp->in6p_faddr); - } - return 0; + if (SOCK_DOM(so) == PF_INET) + return (inaddr_local(inp->inp_faddr)); + else if (SOCK_DOM(so) == PF_INET6) + return (in6addr_local(&inp->in6p_faddr)); + + return (0); } int sosetdefunct(struct proc *p, struct socket *so, int level, boolean_t noforce) { + struct sockbuf *rcv, *snd; int err = 0, defunct; + rcv = &so->so_rcv; + snd = &so->so_snd; + defunct = (so->so_flags & SOF_DEFUNCT); if (defunct) { - if (!(so->so_snd.sb_flags & so->so_rcv.sb_flags & SB_DROP)) + if (!(snd->sb_flags & rcv->sb_flags & SB_DROP)) { panic("%s: SB_DROP not set", __func__); + /* NOTREACHED */ + } goto done; } if (so->so_flags & SOF_NODEFUNCT) { if (noforce) { err = EOPNOTSUPP; - SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p " - "[%d,%d] is not eligible for defunct (%d)\n", - __func__, proc_selfpid(), proc_pid(p), level, so, - INP_SOCKAF(so), INP_SOCKTYPE(so), err)); + SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) " + "so 0x%llx [%d,%d] is not eligible for defunct " + "(%d)\n", __func__, proc_selfpid(), proc_pid(p), + level, (uint64_t)VM_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so), err)); return (err); } so->so_flags &= ~SOF_NODEFUNCT; - SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p [%d,%d] " - "defunct by force\n", __func__, proc_selfpid(), proc_pid(p), - level, so, INP_SOCKAF(so), INP_SOCKTYPE(so))); + SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx " + "[%d,%d] defunct by force\n", __func__, proc_selfpid(), + proc_pid(p), level, (uint64_t)VM_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so))); } so->so_flags |= SOF_DEFUNCT; + /* Prevent further data from being appended to the socket buffers */ - so->so_snd.sb_flags |= SB_DROP; - so->so_rcv.sb_flags |= SB_DROP; + snd->sb_flags |= SB_DROP; + rcv->sb_flags |= SB_DROP; + + /* Flush any existing data in the socket buffers */ + if (rcv->sb_cc != 0) { + rcv->sb_flags &= ~SB_SEL; + selthreadclear(&rcv->sb_sel); + sbrelease(rcv); + } + if (snd->sb_cc != 0) { + snd->sb_flags &= ~SB_SEL; + selthreadclear(&snd->sb_sel); + sbrelease(snd); + } done: - SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p [%d,%d] %s " - "defunct\n", __func__, proc_selfpid(), proc_pid(p), level, so, - INP_SOCKAF(so), INP_SOCKTYPE(so), + SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx [%d,%d] %s " + "defunct\n", __func__, proc_selfpid(), proc_pid(p), level, + (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), SOCK_TYPE(so), defunct ? "is already" : "marked as")); return (err); @@ -4372,21 +4874,44 @@ sodefunct(struct proc *p, struct socket *so, int level) { struct sockbuf *rcv, *snd; - if (!(so->so_flags & SOF_DEFUNCT)) + if (!(so->so_flags & SOF_DEFUNCT)) { panic("%s improperly called", __func__); - + /* NOTREACHED */ + } if (so->so_state & SS_DEFUNCT) goto done; rcv = &so->so_rcv; snd = &so->so_snd; - SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p [%d,%d] is now " - "defunct [rcv_si 0x%x, snd_si 0x%x, rcv_fl 0x%x, snd_fl 0x%x]\n", - __func__, proc_selfpid(), proc_pid(p), level, so, - INP_SOCKAF(so), INP_SOCKTYPE(so), - (uint32_t)rcv->sb_sel.si_flags, (uint32_t)snd->sb_sel.si_flags, - (uint16_t)rcv->sb_flags, (uint16_t)snd->sb_flags)); + if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) { + char s[MAX_IPv6_STR_LEN]; + char d[MAX_IPv6_STR_LEN]; + struct inpcb *inp = sotoinpcb(so); + + SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx [%s " + "%s:%d -> %s:%d] is now defunct [rcv_si 0x%x, snd_si 0x%x, " + "rcv_fl 0x%x, snd_fl 0x%x]\n", __func__, proc_selfpid(), + proc_pid(p), level, (uint64_t)VM_KERNEL_ADDRPERM(so), + (SOCK_TYPE(so) == SOCK_STREAM) ? "TCP" : "UDP", + inet_ntop(SOCK_DOM(so), ((SOCK_DOM(so) == PF_INET) ? + (void *)&inp->inp_laddr.s_addr : (void *)&inp->in6p_laddr), + s, sizeof (s)), ntohs(inp->in6p_lport), + inet_ntop(SOCK_DOM(so), (SOCK_DOM(so) == PF_INET) ? + (void *)&inp->inp_faddr.s_addr : (void *)&inp->in6p_faddr, + d, sizeof (d)), ntohs(inp->in6p_fport), + (uint32_t)rcv->sb_sel.si_flags, + (uint32_t)snd->sb_sel.si_flags, + rcv->sb_flags, snd->sb_flags)); + } else { + SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so 0x%llx " + "[%d,%d] is now defunct [rcv_si 0x%x, snd_si 0x%x, " + "rcv_fl 0x%x, snd_fl 0x%x]\n", __func__, proc_selfpid(), + proc_pid(p), level, (uint64_t)VM_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so), (uint32_t)rcv->sb_sel.si_flags, + (uint32_t)snd->sb_sel.si_flags, rcv->sb_flags, + snd->sb_flags)); + } /* * Unwedge threads blocked on sbwait() and sb_lock(). @@ -4395,9 +4920,9 @@ sodefunct(struct proc *p, struct socket *so, int level) sbwakeup(snd); if (rcv->sb_flags & SB_LOCK) - sbunlock(rcv, 1); + sbunlock(rcv, TRUE); /* keep socket locked */ if (snd->sb_flags & SB_LOCK) - sbunlock(snd, 1); + sbunlock(snd, TRUE); /* keep socket locked */ /* * Flush the buffers and disconnect. We explicitly call shutdown @@ -4419,11 +4944,16 @@ sodefunct(struct proc *p, struct socket *so, int level) if (so->so_error == 0) so->so_error = EBADF; - if (rcv->sb_cc != 0) + if (rcv->sb_cc != 0) { + rcv->sb_flags &= ~SB_SEL; + selthreadclear(&rcv->sb_sel); sbrelease(rcv); - if (snd->sb_cc != 0) + } + if (snd->sb_cc != 0) { + snd->sb_flags &= ~SB_SEL; + selthreadclear(&snd->sb_sel); sbrelease(snd); - + } so->so_state |= SS_DEFUNCT; done: @@ -4436,16 +4966,14 @@ so_set_recv_anyif(struct socket *so, int optval) int ret = 0; #if INET6 - if (INP_SOCKAF(so) == AF_INET || INP_SOCKAF(so) == AF_INET6) { + if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) { #else - if (INP_SOCKAF(so) == AF_INET) { + if (SOCK_DOM(so) == PF_INET) { #endif /* !INET6 */ if (optval) sotoinpcb(so)->inp_flags |= INP_RECV_ANYIF; else sotoinpcb(so)->inp_flags &= ~INP_RECV_ANYIF; - } else { - ret = EPROTONOSUPPORT; } return (ret); @@ -4457,12 +4985,382 @@ so_get_recv_anyif(struct socket *so) int ret = 0; #if INET6 - if (INP_SOCKAF(so) == AF_INET || INP_SOCKAF(so) == AF_INET6) { + if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) { #else - if (INP_SOCKAF(so) == AF_INET) { + if (SOCK_DOM(so) == PF_INET) { #endif /* !INET6 */ ret = (sotoinpcb(so)->inp_flags & INP_RECV_ANYIF) ? 1 : 0; } return (ret); } + +int +so_set_restrictions(struct socket *so, uint32_t vals) +{ + int nocell_old, nocell_new; + int ret = 0; + + /* + * Deny-type restrictions are trapdoors; once set they cannot be + * unset for the lifetime of the socket. This allows them to be + * issued by a framework on behalf of the application without + * having to worry that they can be undone. + * + * Note here that socket-level restrictions overrides any protocol + * level restrictions. For instance, SO_RESTRICT_DENY_CELLULAR + * socket restriction issued on the socket has a higher precendence + * than INP_NO_IFT_CELLULAR. The latter is affected by the UUID + * policy PROC_UUID_NO_CELLULAR for unrestricted sockets only, + * i.e. when SO_RESTRICT_DENY_CELLULAR has not been issued. + */ + nocell_old = (so->so_restrictions & SO_RESTRICT_DENY_CELLULAR); + so->so_restrictions |= (vals & (SO_RESTRICT_DENY_IN | + SO_RESTRICT_DENY_OUT | SO_RESTRICT_DENY_CELLULAR)); + nocell_new = (so->so_restrictions & SO_RESTRICT_DENY_CELLULAR); + + /* other than deny cellular, there's nothing more to do */ + if ((nocell_new - nocell_old) == 0) + return (ret); + + /* we can only set, not clear restrictions */ + VERIFY((nocell_new - nocell_old) > 0); + +#if INET6 + if (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6) { +#else + if (SOCK_DOM(so) == PF_INET) { +#endif /* !INET6 */ + /* if deny cellular is now set, do what's needed for INPCB */ + inp_set_nocellular(sotoinpcb(so)); + } + + return (ret); +} + +uint32_t +so_get_restrictions(struct socket *so) +{ + return (so->so_restrictions & (SO_RESTRICT_DENY_IN | + SO_RESTRICT_DENY_OUT | SO_RESTRICT_DENY_CELLULAR)); +} + +struct sockaddr_entry * +sockaddrentry_alloc(int how) +{ + struct sockaddr_entry *se; + + se = (how == M_WAITOK) ? zalloc(se_zone) : zalloc_noblock(se_zone); + if (se != NULL) + bzero(se, se_zone_size); + + return (se); +} + +void +sockaddrentry_free(struct sockaddr_entry *se) +{ + if (se->se_addr != NULL) { + FREE(se->se_addr, M_SONAME); + se->se_addr = NULL; + } + zfree(se_zone, se); +} + +struct sockaddr_entry * +sockaddrentry_dup(const struct sockaddr_entry *src_se, int how) +{ + struct sockaddr_entry *dst_se; + + dst_se = sockaddrentry_alloc(how); + if (dst_se != NULL) { + int len = src_se->se_addr->sa_len; + + MALLOC(dst_se->se_addr, struct sockaddr *, + len, M_SONAME, how | M_ZERO); + if (dst_se->se_addr != NULL) { + bcopy(src_se->se_addr, dst_se->se_addr, len); + } else { + sockaddrentry_free(dst_se); + dst_se = NULL; + } + } + + return (dst_se); +} + +struct sockaddr_list * +sockaddrlist_alloc(int how) +{ + struct sockaddr_list *sl; + + sl = (how == M_WAITOK) ? zalloc(sl_zone) : zalloc_noblock(sl_zone); + if (sl != NULL) { + bzero(sl, sl_zone_size); + TAILQ_INIT(&sl->sl_head); + } + return (sl); +} + +void +sockaddrlist_free(struct sockaddr_list *sl) +{ + struct sockaddr_entry *se, *tse; + + TAILQ_FOREACH_SAFE(se, &sl->sl_head, se_link, tse) { + sockaddrlist_remove(sl, se); + sockaddrentry_free(se); + } + VERIFY(sl->sl_cnt == 0 && TAILQ_EMPTY(&sl->sl_head)); + zfree(sl_zone, sl); +} + +void +sockaddrlist_insert(struct sockaddr_list *sl, struct sockaddr_entry *se) +{ + VERIFY(!(se->se_flags & SEF_ATTACHED)); + se->se_flags |= SEF_ATTACHED; + TAILQ_INSERT_TAIL(&sl->sl_head, se, se_link); + sl->sl_cnt++; + VERIFY(sl->sl_cnt != 0); +} + +void +sockaddrlist_remove(struct sockaddr_list *sl, struct sockaddr_entry *se) +{ + VERIFY(se->se_flags & SEF_ATTACHED); + se->se_flags &= ~SEF_ATTACHED; + VERIFY(sl->sl_cnt != 0); + sl->sl_cnt--; + TAILQ_REMOVE(&sl->sl_head, se, se_link); +} + +struct sockaddr_list * +sockaddrlist_dup(const struct sockaddr_list *src_sl, int how) +{ + struct sockaddr_entry *src_se, *tse; + struct sockaddr_list *dst_sl; + + dst_sl = sockaddrlist_alloc(how); + if (dst_sl == NULL) + return (NULL); + + TAILQ_FOREACH_SAFE(src_se, &src_sl->sl_head, se_link, tse) { + struct sockaddr_entry *dst_se; + + if (src_se->se_addr == NULL) + continue; + + dst_se = sockaddrentry_dup(src_se, how); + if (dst_se == NULL) { + sockaddrlist_free(dst_sl); + return (NULL); + } + + sockaddrlist_insert(dst_sl, dst_se); + } + VERIFY(src_sl->sl_cnt == dst_sl->sl_cnt); + + return (dst_sl); +} + +int +so_set_effective_pid(struct socket *so, int epid, struct proc *p) +{ + struct proc *ep = PROC_NULL; + int error = 0; + + /* pid 0 is reserved for kernel */ + if (epid == 0) { + error = EINVAL; + goto done; + } + + /* + * If this is an in-kernel socket, prevent its delegate + * association from changing unless the socket option is + * coming from within the kernel itself. + */ + if (so->last_pid == 0 && p != kernproc) { + error = EACCES; + goto done; + } + + /* + * If this is issued by a process that's recorded as the + * real owner of the socket, or if the pid is the same as + * the process's own pid, then proceed. Otherwise ensure + * that the issuing process has the necessary privileges. + */ + if (epid != so->last_pid || epid != proc_pid(p)) { + if ((error = priv_check_cred(kauth_cred_get(), + PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0))) { + error = EACCES; + goto done; + } + } + + /* Find the process that corresponds to the effective pid */ + if ((ep = proc_find(epid)) == PROC_NULL) { + error = ESRCH; + goto done; + } + + /* + * If a process tries to delegate the socket to itself, then + * there's really nothing to do; treat it as a way for the + * delegate association to be cleared. Note that we check + * the passed-in proc rather than calling proc_selfpid(), + * as we need to check the process issuing the socket option + * which could be kernproc. Given that we don't allow 0 for + * effective pid, it means that a delegated in-kernel socket + * stays delegated during its lifetime (which is probably OK.) + */ + if (epid == proc_pid(p)) { + so->so_flags &= ~SOF_DELEGATED; + so->e_upid = 0; + so->e_pid = 0; + uuid_clear(so->e_uuid); + } else { + so->so_flags |= SOF_DELEGATED; + so->e_upid = proc_uniqueid(ep); + so->e_pid = proc_pid(ep); + proc_getexecutableuuid(ep, so->e_uuid, sizeof (so->e_uuid)); + } + +done: + if (error == 0 && net_io_policy_log) { + uuid_string_t buf; + + uuid_unparse(so->e_uuid, buf); + log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) " + "euuid %s%s\n", __func__, proc_name_address(p), + proc_pid(p), (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), + SOCK_TYPE(so), so->e_pid, proc_name_address(ep), buf, + ((so->so_flags & SOF_DELEGATED) ? " [delegated]" : "")); + } else if (error != 0 && net_io_policy_log) { + log(LOG_ERR, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d (%s) " + "ERROR (%d)\n", __func__, proc_name_address(p), + proc_pid(p), (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), + SOCK_TYPE(so), epid, (ep == PROC_NULL) ? "PROC_NULL" : + proc_name_address(ep), error); + } + + if (ep != PROC_NULL) + proc_rele(ep); + + return (error); +} + +int +so_set_effective_uuid(struct socket *so, uuid_t euuid, struct proc *p) +{ + uuid_string_t buf; + uuid_t uuid; + int error = 0; + + /* UUID must not be all-zeroes (reserved for kernel) */ + if (uuid_is_null(euuid)) { + error = EINVAL; + goto done;; + } + + /* + * If this is an in-kernel socket, prevent its delegate + * association from changing unless the socket option is + * coming from within the kernel itself. + */ + if (so->last_pid == 0 && p != kernproc) { + error = EACCES; + goto done; + } + + /* Get the UUID of the issuing process */ + proc_getexecutableuuid(p, uuid, sizeof (uuid)); + + /* + * If this is issued by a process that's recorded as the + * real owner of the socket, or if the uuid is the same as + * the process's own uuid, then proceed. Otherwise ensure + * that the issuing process has the necessary privileges. + */ + if (uuid_compare(euuid, so->last_uuid) != 0 || + uuid_compare(euuid, uuid) != 0) { + if ((error = priv_check_cred(kauth_cred_get(), + PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0))) { + error = EACCES; + goto done; + } + } + + /* + * If a process tries to delegate the socket to itself, then + * there's really nothing to do; treat it as a way for the + * delegate association to be cleared. Note that we check + * the uuid of the passed-in proc rather than that of the + * current process, as we need to check the process issuing + * the socket option which could be kernproc itself. Given + * that we don't allow 0 for effective uuid, it means that + * a delegated in-kernel socket stays delegated during its + * lifetime (which is okay.) + */ + if (uuid_compare(euuid, uuid) == 0) { + so->so_flags &= ~SOF_DELEGATED; + so->e_upid = 0; + so->e_pid = 0; + uuid_clear(so->e_uuid); + } else { + so->so_flags |= SOF_DELEGATED; + /* + * Unlike so_set_effective_pid(), we only have the UUID + * here and the process ID is not known. Inherit the + * real {pid,upid} of the socket. + */ + so->e_upid = so->last_upid; + so->e_pid = so->last_pid; + uuid_copy(so->e_uuid, euuid); + } + +done: + if (error == 0 && net_io_policy_log) { + uuid_unparse(so->e_uuid, buf); + log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] epid %d " + "euuid %s%s\n", __func__, proc_name_address(p), proc_pid(p), + (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), + SOCK_TYPE(so), so->e_pid, buf, + ((so->so_flags & SOF_DELEGATED) ? " [delegated]" : "")); + } else if (error != 0 && net_io_policy_log) { + uuid_unparse(euuid, buf); + log(LOG_DEBUG, "%s[%s,%d]: so 0x%llx [%d,%d] euuid %s " + "ERROR (%d)\n", __func__, proc_name_address(p), proc_pid(p), + (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), + SOCK_TYPE(so), buf, error); + } + + return (error); +} + +void +netpolicy_post_msg(uint32_t ev_code, struct netpolicy_event_data *ev_data, + uint32_t ev_datalen) +{ + struct kev_msg ev_msg; + + /* + * A netpolicy event always starts with a netpolicy_event_data + * structure, but the caller can provide for a longer event + * structure to post, depending on the event code. + */ + VERIFY(ev_data != NULL && ev_datalen >= sizeof (*ev_data)); + + bzero(&ev_msg, sizeof (ev_msg)); + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_NETPOLICY_SUBCLASS; + ev_msg.event_code = ev_code; + + ev_msg.dv[0].data_ptr = ev_data; + ev_msg.dv[0].data_length = ev_datalen; + + kev_post_msg(&ev_msg); +} diff --git a/bsd/kern/uipc_socket2.c b/bsd/kern/uipc_socket2.c index 32b896ee8..dae44827b 100644 --- a/bsd/kern/uipc_socket2.c +++ b/bsd/kern/uipc_socket2.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 1998-2012 Apple Inc. All rights reserved. + * Copyright (c) 1998-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ @@ -59,7 +59,6 @@ * SUCH DAMAGE. * * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93 - * $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.9 2001/07/26 18:53:02 peter Exp $ */ /* * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce @@ -83,6 +82,7 @@ #include #include #include +#include #include #include #include @@ -100,8 +100,8 @@ /* TODO: this should be in a header file somewhere */ extern void postevent(struct socket *, struct sockbuf *, int); -#define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4) -#define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5) +#define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4) +#define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5) static inline void sbcompress(struct sockbuf *, struct mbuf *, struct mbuf *); static struct socket *sonewconn_internal(struct socket *, int); @@ -109,6 +109,7 @@ static int sbappendaddr_internal(struct sockbuf *, struct sockaddr *, struct mbuf *, struct mbuf *); static int sbappendcontrol_internal(struct sockbuf *, struct mbuf *, struct mbuf *); +static void soevent_ifdenied(struct socket *); /* * Primitive routines for operating on sockets and socket buffers @@ -116,9 +117,10 @@ static int sbappendcontrol_internal(struct sockbuf *, struct mbuf *, static int soqlimitcompat = 1; static int soqlencomp = 0; -/* Based on the number of mbuf clusters configured, high_sb_max and sb_max can get - * scaled up or down to suit that memory configuration. high_sb_max is a higher - * limit on sb_max that is checked when sb_max gets set through sysctl. +/* + * Based on the number of mbuf clusters configured, high_sb_max and sb_max can + * get scaled up or down to suit that memory configuration. high_sb_max is a + * higher limit on sb_max that is checked when sb_max gets set through sysctl. */ u_int32_t sb_max = SB_MAX; /* XXX should be static */ @@ -131,6 +133,11 @@ __private_extern__ int32_t total_sbmb_cnt = 0; __private_extern__ u_int32_t net_io_policy_throttled = 0; static int sysctl_io_policy_throttled SYSCTL_HANDLER_ARGS; +u_int32_t net_io_policy_log = 0; /* log socket policy changes */ +#if CONFIG_PROC_UUID_POLICY +u_int32_t net_io_policy_uuid = 1; /* enable UUID socket policy */ +#endif /* CONFIG_PROC_UUID_POLICY */ + /* * Procedures to manipulate state flags of socket * and do appropriate wakeups. Normal sequence from the @@ -202,7 +209,8 @@ soisconnected(struct socket *so) wakeup((caddr_t)&so->so_timeo); sorwakeup(so); sowwakeup(so); - soevent(so, SO_FILT_HINT_LOCKED); + soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNECTED | + SO_FILT_HINT_CONNINFO_UPDATED); } } @@ -223,14 +231,16 @@ soisdisconnected(struct socket *so) { so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); - soevent(so, SO_FILT_HINT_LOCKED); + soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_DISCONNECTED | + SO_FILT_HINT_CONNINFO_UPDATED); sflt_notify(so, sock_evt_disconnected, NULL); wakeup((caddr_t)&so->so_timeo); sowwakeup(so); sorwakeup(so); } -/* This function will issue a wakeup like soisdisconnected but it will not +/* + * This function will issue a wakeup like soisdisconnected but it will not * notify the socket filters. This will avoid unlocking the socket * in the midst of closing it. */ @@ -239,7 +249,8 @@ sodisconnectwakeup(struct socket *so) { so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED); - soevent(so, SO_FILT_HINT_LOCKED); + soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_DISCONNECTED | + SO_FILT_HINT_CONNINFO_UPDATED); wakeup((caddr_t)&so->so_timeo); sowwakeup(so); sorwakeup(so); @@ -289,8 +300,7 @@ sonewconn_internal(struct socket *head, int connstatus) if (so_qlen >= (soqlimitcompat ? head->so_qlimit : (3 * head->so_qlimit / 2))) return ((struct socket *)0); - so = soalloc(1, head->so_proto->pr_domain->dom_family, - head->so_type); + so = soalloc(1, SOCK_DOM(head), head->so_type); if (so == NULL) return ((struct socket *)0); /* check if head was closed during the soalloc */ @@ -310,33 +320,33 @@ sonewconn_internal(struct socket *head, int connstatus) so->so_cred = head->so_cred; so->last_pid = head->last_pid; so->last_upid = head->last_upid; + memcpy(so->last_uuid, head->last_uuid, sizeof (so->last_uuid)); + if (head->so_flags & SOF_DELEGATED) { + so->e_pid = head->e_pid; + so->e_upid = head->e_upid; + memcpy(so->e_uuid, head->e_uuid, sizeof (so->e_uuid)); + } /* inherit socket options stored in so_flags */ - so->so_flags = head->so_flags & (SOF_NOSIGPIPE | - SOF_NOADDRAVAIL | - SOF_REUSESHAREUID | - SOF_NOTIFYCONFLICT | - SOF_BINDRANDOMPORT | - SOF_NPX_SETOPTSHUT | - SOF_NODEFUNCT | - SOF_PRIVILEGED_TRAFFIC_CLASS| - SOF_NOTSENT_LOWAT | - SOF_USELRO); + so->so_flags = head->so_flags & + (SOF_NOSIGPIPE | SOF_NOADDRAVAIL | SOF_REUSESHAREUID | + SOF_NOTIFYCONFLICT | SOF_BINDRANDOMPORT | SOF_NPX_SETOPTSHUT | + SOF_NODEFUNCT | SOF_PRIVILEGED_TRAFFIC_CLASS| SOF_NOTSENT_LOWAT | + SOF_USELRO | SOF_DELEGATED); so->so_usecount = 1; so->next_lock_lr = 0; so->next_unlock_lr = 0; -#ifdef __APPLE__ so->so_rcv.sb_flags |= SB_RECV; /* XXX */ so->so_rcv.sb_so = so->so_snd.sb_so = so; TAILQ_INIT(&so->so_evlist); -#endif #if CONFIG_MACF_SOCKET mac_socket_label_associate_accept(head, so); #endif /* inherit traffic management properties of listener */ - so->so_traffic_mgt_flags = head->so_traffic_mgt_flags & (TRAFFIC_MGT_SO_BACKGROUND); + so->so_traffic_mgt_flags = + head->so_traffic_mgt_flags & (TRAFFIC_MGT_SO_BACKGROUND); so->so_background_thread = head->so_background_thread; so->so_traffic_class = head->so_traffic_class; @@ -362,7 +372,8 @@ sonewconn_internal(struct socket *head, int connstatus) } if (head->so_proto->pr_unlock) { socket_lock(head, 0); - /* Radar 7385998 Recheck that the head is still accepting + /* + * Radar 7385998 Recheck that the head is still accepting * to avoid race condition when head is getting closed. */ if ((head->so_options & SO_ACCEPTCONN) == 0) { @@ -372,15 +383,15 @@ sonewconn_internal(struct socket *head, int connstatus) } } -#ifdef __APPLE__ - so->so_proto->pr_domain->dom_refs++; -#endif + atomic_add_32(&so->so_proto->pr_domain->dom_refs, 1); + /* Insert in head appropriate lists */ so->so_head = head; - /* Since this socket is going to be inserted into the incomp - * queue, it can be picked up by another thread in - * tcp_dropdropablreq to get dropped before it is setup.. + /* + * Since this socket is going to be inserted into the incomp + * queue, it can be picked up by another thread in + * tcp_dropdropablreq to get dropped before it is setup.. * To prevent this race, set in-progress flag which can be * cleared later */ @@ -396,10 +407,8 @@ sonewconn_internal(struct socket *head, int connstatus) } head->so_qlen++; -#ifdef __APPLE__ /* Attach socket filters for this protocol */ sflt_initsock(so); -#endif if (connstatus) { so->so_state |= connstatus; @@ -435,7 +444,7 @@ void socantsendmore(struct socket *so) { so->so_state |= SS_CANTSENDMORE; - soevent(so, SO_FILT_HINT_LOCKED); + soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CANTSENDMORE); sflt_notify(so, sock_evt_cantsendmore, NULL); sowwakeup(so); } @@ -444,119 +453,80 @@ void socantrcvmore(struct socket *so) { so->so_state |= SS_CANTRCVMORE; - soevent(so, SO_FILT_HINT_LOCKED); + soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CANTRCVMORE); sflt_notify(so, sock_evt_cantrecvmore, NULL); sorwakeup(so); } /* * Wait for data to arrive at/drain from a socket buffer. - * - * Returns: 0 Success - * EBADF - * msleep:EINTR */ int sbwait(struct sockbuf *sb) { - int error = 0; - uintptr_t lr_saved; + boolean_t nointr = (sb->sb_flags & SB_NOINTR); + void *lr_saved = __builtin_return_address(0); struct socket *so = sb->sb_so; lck_mtx_t *mutex_held; struct timespec ts; + int error = 0; - lr_saved = (uintptr_t) __builtin_return_address(0); + if (so == NULL) { + panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n", + __func__, sb, sb->sb_flags, lr_saved); + /* NOTREACHED */ + } else if (so->so_usecount < 1) { + panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p " + "lrh= %s\n", __func__, sb, sb->sb_flags, so, + so->so_usecount, lr_saved, solockhistory_nr(so)); + /* NOTREACHED */ + } if (so->so_proto->pr_getlock != NULL) mutex_held = (*so->so_proto->pr_getlock)(so, 0); else mutex_held = so->so_proto->pr_domain->dom_mtx; - lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); - sb->sb_flags |= SB_WAIT; + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); - if (so->so_usecount < 1) - panic("sbwait: so=%p refcount=%d\n", so, so->so_usecount); ts.tv_sec = sb->sb_timeo.tv_sec; ts.tv_nsec = sb->sb_timeo.tv_usec * 1000; + + sb->sb_waiters++; + VERIFY(sb->sb_waiters != 0); + error = msleep((caddr_t)&sb->sb_cc, mutex_held, - (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", &ts); + nointr ? PSOCK : PSOCK | PCATCH, + nointr ? "sbwait_nointr" : "sbwait", &ts); - lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); + VERIFY(sb->sb_waiters != 0); + sb->sb_waiters--; - if (so->so_usecount < 1) - panic("sbwait: so=%p refcount=%d\n", so, so->so_usecount); + if (so->so_usecount < 1) { + panic("%s: 2 sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p " + "lrh= %s\n", __func__, sb, sb->sb_flags, so, + so->so_usecount, lr_saved, solockhistory_nr(so)); + /* NOTREACHED */ + } if ((so->so_state & SS_DRAINING) || (so->so_flags & SOF_DEFUNCT)) { error = EBADF; if (so->so_flags & SOF_DEFUNCT) { - SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n", - __func__, proc_selfpid(), so, INP_SOCKAF(so), - INP_SOCKTYPE(so), error)); + SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] " + "(%d)\n", __func__, proc_selfpid(), + (uint64_t)VM_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so), error)); } } return (error); } -/* - * Lock a sockbuf already known to be locked; - * return any error returned from sleep (EINTR). - * - * Returns: 0 Success - * EINTR - */ -int -sb_lock(struct sockbuf *sb) -{ - struct socket *so = sb->sb_so; - lck_mtx_t *mutex_held; - int error = 0; - - if (so == NULL) - panic("sb_lock: null so back pointer sb=%p\n", sb); - - while (sb->sb_flags & SB_LOCK) { - sb->sb_flags |= SB_WANT; - - if (so->so_proto->pr_getlock != NULL) - mutex_held = (*so->so_proto->pr_getlock)(so, 0); - else - mutex_held = so->so_proto->pr_domain->dom_mtx; - lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); - - if (so->so_usecount < 1) - panic("sb_lock: so=%p refcount=%d\n", so, - so->so_usecount); - - error = msleep((caddr_t)&sb->sb_flags, mutex_held, - (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, - "sb_lock", 0); - if (so->so_usecount < 1) - panic("sb_lock: 2 so=%p refcount=%d\n", so, - so->so_usecount); - - if (error == 0 && (so->so_flags & SOF_DEFUNCT)) { - error = EBADF; - SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n", - __func__, proc_selfpid(), so, INP_SOCKAF(so), - INP_SOCKTYPE(so), error)); - } - - if (error) - return (error); - } - sb->sb_flags |= SB_LOCK; - return (0); -} - void sbwakeup(struct sockbuf *sb) { - if (sb->sb_flags & SB_WAIT) { - sb->sb_flags &= ~SB_WAIT; + if (sb->sb_waiters > 0) wakeup((caddr_t)&sb->sb_cc); - } } /* @@ -568,10 +538,10 @@ void sowakeup(struct socket *so, struct sockbuf *sb) { if (so->so_flags & SOF_DEFUNCT) { - SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] si 0x%x, " - "fl 0x%x [%s]\n", __func__, proc_selfpid(), so, - INP_SOCKAF(so), INP_SOCKTYPE(so), - (uint32_t)sb->sb_sel.si_flags, (uint16_t)sb->sb_flags, + SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] si 0x%x, " + "fl 0x%x [%s]\n", __func__, proc_selfpid(), + (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), + SOCK_TYPE(so), (uint32_t)sb->sb_sel.si_flags, sb->sb_flags, (sb->sb_flags & SB_RECV) ? "rcv" : "snd")); } @@ -588,22 +558,23 @@ sowakeup(struct socket *so, struct sockbuf *sb) KNOTE(&sb->sb_sel.si_note, SO_FILT_HINT_LOCKED); } if (sb->sb_flags & SB_UPCALL) { - void (*so_upcall)(struct socket *, caddr_t, int); - caddr_t so_upcallarg; + void (*sb_upcall)(struct socket *, void *, int); + caddr_t sb_upcallarg; - so_upcall = so->so_upcall; - so_upcallarg = so->so_upcallarg; + sb_upcall = sb->sb_upcall; + sb_upcallarg = sb->sb_upcallarg; /* Let close know that we're about to do an upcall */ so->so_upcallusecount++; socket_unlock(so, 0); - (*so_upcall)(so, so_upcallarg, M_DONTWAIT); + (*sb_upcall)(so, sb_upcallarg, M_DONTWAIT); socket_lock(so, 0); so->so_upcallusecount--; /* Tell close that it's safe to proceed */ - if (so->so_flags & SOF_CLOSEWAIT && so->so_upcallusecount == 0) - wakeup((caddr_t)&so->so_upcall); + if ((so->so_flags & SOF_CLOSEWAIT) && + so->so_upcallusecount == 0) + wakeup((caddr_t)&so->so_upcallusecount); } } @@ -665,9 +636,8 @@ soreserve(struct socket *so, u_int32_t sndcc, u_int32_t rcvcc) so->so_snd.sb_lowat = so->so_snd.sb_hiwat; return (0); bad2: -#ifdef __APPLE__ + so->so_snd.sb_flags &= ~SB_SEL; selthreadclear(&so->so_snd.sb_sel); -#endif sbrelease(&so->so_snd); bad: return (ENOBUFS); @@ -776,18 +746,20 @@ sbappendstream(struct sockbuf *sb, struct mbuf *m) { struct socket *so = sb->sb_so; - if (m->m_nextpkt != NULL || (sb->sb_mb != sb->sb_lastrecord)) - panic("sbappendstream: nexpkt %p || mb %p != lastrecord %p\n", - m->m_nextpkt, sb->sb_mb, sb->sb_lastrecord); - - SBLASTMBUFCHK(sb, __func__); - if (m == NULL || (sb->sb_flags & SB_DROP)) { if (m != NULL) m_freem(m); return (0); } + if (m->m_nextpkt != NULL || (sb->sb_mb != sb->sb_lastrecord)) { + panic("sbappendstream: nexpkt %p || mb %p != lastrecord %p\n", + m->m_nextpkt, sb->sb_mb, sb->sb_lastrecord); + /* NOTREACHED */ + } + + SBLASTMBUFCHK(sb, __func__); + if (sb->sb_flags & SB_RECV) { int error = sflt_data_in(so, NULL, &m, NULL, 0); SBLASTRECORDCHK(sb, "sbappendstream 1"); @@ -925,7 +897,7 @@ sbappendrecord(struct sockbuf *sb, struct mbuf *m0) SBLASTRECORDCHK(sb, "sbappendrecord 2"); if (sb->sb_lastrecord != NULL) { sb->sb_lastrecord->m_nextpkt = m0; - } else { + } else { sb->sb_mb = m0; } sb->sb_lastrecord = m0; @@ -1154,7 +1126,6 @@ sbappendcontrol_internal(struct sockbuf *sb, struct mbuf *m0, if (space > sbspace(sb) && !(sb->sb_flags & SB_UNIX)) return (0); n->m_next = m0; /* concatenate data to control */ - SBLASTRECORDCHK(sb, "sbappendcontrol 1"); for (m = control; m->m_next != NULL; m = m->m_next) @@ -1228,6 +1199,336 @@ sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control, return (result); } +/* + * Append a contiguous TCP data blob with TCP sequence number as control data + * as a new msg to the receive socket buffer. + */ +int +sbappendmsgstream_rcv(struct sockbuf *sb, struct mbuf *m, uint32_t seqnum, + int unordered) +{ + struct mbuf *m_eor = NULL; + u_int32_t data_len = 0; + int ret = 0; + struct socket *so = sb->sb_so; + + VERIFY((m->m_flags & M_PKTHDR) && m_pktlen(m) > 0); + VERIFY(so->so_msg_state != NULL); + VERIFY(sb->sb_flags & SB_RECV); + + /* Keep the TCP sequence number in the mbuf pkthdr */ + m->m_pkthdr.msg_seq = seqnum; + + /* find last mbuf and set M_EOR */ + for (m_eor = m; ; m_eor = m_eor->m_next) { + /* + * If the msg is unordered, we need to account for + * these bytes in receive socket buffer size. Otherwise, + * the receive window advertised will shrink because + * of the additional unordered bytes added to the + * receive buffer. + */ + if (unordered) { + m_eor->m_flags |= M_UNORDERED_DATA; + data_len += m_eor->m_len; + so->so_msg_state->msg_uno_bytes += m_eor->m_len; + } else { + m_eor->m_flags &= ~M_UNORDERED_DATA; + } + + if (m_eor->m_next == NULL) + break; + } + + /* set EOR flag at end of byte blob */ + m_eor->m_flags |= M_EOR; + + /* expand the receive socket buffer to allow unordered data */ + if (unordered && !sbreserve(sb, sb->sb_hiwat + data_len)) { + /* + * Could not allocate memory for unordered data, it + * means this packet will have to be delivered in order + */ + printf("%s: could not reserve space for unordered data\n", + __func__); + } + + ret = sbappendrecord(sb, m); + return (ret); +} + +/* + * TCP streams have message based out of order delivery support, or have + * Multipath TCP support, or are regular TCP sockets + */ +int +sbappendstream_rcvdemux(struct socket *so, struct mbuf *m, uint32_t seqnum, + int unordered) +{ + int ret = 0; + + if ((m != NULL) && (m_pktlen(m) <= 0)) { + m_freem(m); + return (ret); + } + + if (so->so_flags & SOF_ENABLE_MSGS) { + ret = sbappendmsgstream_rcv(&so->so_rcv, m, seqnum, unordered); + } +#if MPTCP + else if (so->so_flags & SOF_MPTCP_TRUE) { + ret = sbappendmptcpstream_rcv(&so->so_rcv, m); + } +#endif /* MPTCP */ + else { + ret = sbappendstream(&so->so_rcv, m); + } + return (ret); +} + +#if MPTCP +int +sbappendmptcpstream_rcv(struct sockbuf *sb, struct mbuf *m) +{ + struct socket *so = sb->sb_so; + + VERIFY(m == NULL || (m->m_flags & M_PKTHDR)); + /* SB_NOCOMPRESS must be set prevent loss of M_PKTHDR data */ + VERIFY((sb->sb_flags & (SB_RECV|SB_NOCOMPRESS)) == + (SB_RECV|SB_NOCOMPRESS)); + + if (m == NULL || m_pktlen(m) == 0 || (sb->sb_flags & SB_DROP) || + (so->so_state & SS_CANTRCVMORE)) { + if (m != NULL) + m_freem(m); + return (0); + } + /* the socket is not closed, so SOF_MP_SUBFLOW must be set */ + VERIFY(so->so_flags & SOF_MP_SUBFLOW); + + if (m->m_nextpkt != NULL || (sb->sb_mb != sb->sb_lastrecord)) { + panic("%s: nexpkt %p || mb %p != lastrecord %p\n", __func__, + m->m_nextpkt, sb->sb_mb, sb->sb_lastrecord); + /* NOTREACHED */ + } + + SBLASTMBUFCHK(sb, __func__); + + mptcp_adj_rmap(so, m); + + /* No filter support (SB_RECV) on mptcp subflow sockets */ + + sbcompress(sb, m, sb->sb_mbtail); + sb->sb_lastrecord = sb->sb_mb; + SBLASTRECORDCHK(sb, __func__); + return (1); +} +#endif /* MPTCP */ + +/* + * Append message to send socket buffer based on priority. + */ +int +sbappendmsg_snd(struct sockbuf *sb, struct mbuf *m) +{ + struct socket *so = sb->sb_so; + struct msg_priq *priq; + int set_eor = 0; + + VERIFY(so->so_msg_state != NULL); + + if (m->m_nextpkt != NULL || (sb->sb_mb != sb->sb_lastrecord)) + panic("sbappendstream: nexpkt %p || mb %p != lastrecord %p\n", + m->m_nextpkt, sb->sb_mb, sb->sb_lastrecord); + + SBLASTMBUFCHK(sb, __func__); + + if (m == NULL || (sb->sb_flags & SB_DROP) || so->so_msg_state == NULL) { + if (m != NULL) + m_freem(m); + return (0); + } + + priq = &so->so_msg_state->msg_priq[m->m_pkthdr.msg_pri]; + + /* note if we need to propogate M_EOR to the last mbuf */ + if (m->m_flags & M_EOR) { + set_eor = 1; + + /* Reset M_EOR from the first mbuf */ + m->m_flags &= ~(M_EOR); + } + + if (priq->msgq_head == NULL) { + VERIFY(priq->msgq_tail == NULL && priq->msgq_lastmsg == NULL); + priq->msgq_head = priq->msgq_lastmsg = m; + } else { + VERIFY(priq->msgq_tail->m_next == NULL); + + /* Check if the last message has M_EOR flag set */ + if (priq->msgq_tail->m_flags & M_EOR) { + /* Insert as a new message */ + priq->msgq_lastmsg->m_nextpkt = m; + + /* move the lastmsg pointer */ + priq->msgq_lastmsg = m; + } else { + /* Append to the existing message */ + priq->msgq_tail->m_next = m; + } + } + + /* Update accounting and the queue tail pointer */ + + while (m->m_next != NULL) { + sballoc(sb, m); + priq->msgq_bytes += m->m_len; + m = m->m_next; + } + sballoc(sb, m); + priq->msgq_bytes += m->m_len; + + if (set_eor) { + m->m_flags |= M_EOR; + + /* + * Since the user space can not write a new msg + * without completing the previous one, we can + * reset this flag to start sending again. + */ + priq->msgq_flags &= ~(MSGQ_MSG_NOTDONE); + } + + priq->msgq_tail = m; + + SBLASTRECORDCHK(sb, "sbappendstream 2"); + postevent(0, sb, EV_RWBYTES); + return (1); +} + +/* + * Pull data from priority queues to the serial snd queue + * right before sending. + */ +void +sbpull_unordered_data(struct socket *so, int32_t off, int32_t len) +{ + int32_t topull, i; + struct msg_priq *priq = NULL; + + VERIFY(so->so_msg_state != NULL); + + topull = (off + len) - so->so_msg_state->msg_serial_bytes; + + i = MSG_PRI_MAX; + while (i >= MSG_PRI_MIN && topull > 0) { + struct mbuf *m = NULL, *mqhead = NULL, *mend = NULL; + priq = &so->so_msg_state->msg_priq[i]; + if ((priq->msgq_flags & MSGQ_MSG_NOTDONE) && + priq->msgq_head == NULL) { + /* + * We were in the middle of sending + * a message and we have not seen the + * end of it. + */ + VERIFY(priq->msgq_lastmsg == NULL && + priq->msgq_tail == NULL); + return; + } + if (priq->msgq_head != NULL) { + int32_t bytes = 0, topull_tmp = topull; + /* + * We found a msg while scanning the priority + * queue from high to low priority. + */ + m = priq->msgq_head; + mqhead = m; + mend = m; + + /* + * Move bytes from the priority queue to the + * serial queue. Compute the number of bytes + * being added. + */ + while (mqhead->m_next != NULL && topull_tmp > 0) { + bytes += mqhead->m_len; + topull_tmp -= mqhead->m_len; + mend = mqhead; + mqhead = mqhead->m_next; + } + + if (mqhead->m_next == NULL) { + /* + * If we have only one more mbuf left, + * move the last mbuf of this message to + * serial queue and set the head of the + * queue to be the next message. + */ + bytes += mqhead->m_len; + mend = mqhead; + mqhead = m->m_nextpkt; + if (!(mend->m_flags & M_EOR)) { + /* + * We have not seen the end of + * this message, so we can not + * pull anymore. + */ + priq->msgq_flags |= MSGQ_MSG_NOTDONE; + } else { + /* Reset M_EOR */ + mend->m_flags &= ~(M_EOR); + } + } else { + /* propogate the next msg pointer */ + mqhead->m_nextpkt = m->m_nextpkt; + } + priq->msgq_head = mqhead; + + /* + * if the lastmsg pointer points to + * the mbuf that is being dequeued, update + * it to point to the new head. + */ + if (priq->msgq_lastmsg == m) + priq->msgq_lastmsg = priq->msgq_head; + + m->m_nextpkt = NULL; + mend->m_next = NULL; + + if (priq->msgq_head == NULL) { + /* Moved all messages, update tail */ + priq->msgq_tail = NULL; + VERIFY(priq->msgq_lastmsg == NULL); + } + + /* Move it to serial sb_mb queue */ + if (so->so_snd.sb_mb == NULL) { + so->so_snd.sb_mb = m; + } else { + so->so_snd.sb_mbtail->m_next = m; + } + + priq->msgq_bytes -= bytes; + VERIFY(priq->msgq_bytes >= 0); + sbwakeup(&so->so_snd); + + so->so_msg_state->msg_serial_bytes += bytes; + so->so_snd.sb_mbtail = mend; + so->so_snd.sb_lastrecord = so->so_snd.sb_mb; + + topull = + (off + len) - so->so_msg_state->msg_serial_bytes; + + if (priq->msgq_flags & MSGQ_MSG_NOTDONE) + break; + } else { + --i; + } + } + sblastrecordchk(&so->so_snd, "sbpull_unordered_data"); + sblastmbufchk(&so->so_snd, "sbpull_unordered_data"); +} + /* * Compress mbuf chain m into the socket * buffer sb following mbuf n. If n @@ -1236,7 +1537,7 @@ sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control, static inline void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) { - int eor = 0; + int eor = 0, compress = (!(sb->sb_flags & SB_NOCOMPRESS)); struct mbuf *o; if (m == NULL) { @@ -1247,16 +1548,16 @@ sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) goto done; } - while (m) { + while (m != NULL) { eor |= m->m_flags & M_EOR; - if (m->m_len == 0 && (eor == 0 || + if (compress && m->m_len == 0 && (eor == 0 || (((o = m->m_next) || (o = n)) && o->m_type == m->m_type))) { if (sb->sb_lastrecord == m) sb->sb_lastrecord = m->m_next; m = m_free(m); continue; } - if (n && (n->m_flags & M_EOR) == 0 && + if (compress && n != NULL && (n->m_flags & M_EOR) == 0 && #ifndef __APPLE__ M_WRITABLE(n) && #endif @@ -1268,13 +1569,14 @@ sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) n->m_len += m->m_len; sb->sb_cc += m->m_len; if (m->m_type != MT_DATA && m->m_type != MT_HEADER && - m->m_type != MT_OOBDATA) - /* XXX: Probably don't need.*/ + m->m_type != MT_OOBDATA) { + /* XXX: Probably don't need */ sb->sb_ctl += m->m_len; + } m = m_free(m); continue; } - if (n) + if (n != NULL) n->m_next = m; else sb->sb_mb = m; @@ -1283,10 +1585,10 @@ sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) n = m; m->m_flags &= ~M_EOR; m = m->m_next; - n->m_next = 0; + n->m_next = NULL; } - if (eor) { - if (n) + if (eor != 0) { + if (n != NULL) n->m_flags |= eor; else printf("semi-panic: sbcompress\n"); @@ -1303,11 +1605,23 @@ sb_empty_assert(struct sockbuf *sb, const char *where) sb->sb_mbtail == NULL && sb->sb_lastrecord == NULL)) { panic("%s: sb %p so %p cc %d mbcnt %d mb %p mbtail %p " "lastrecord %p\n", where, sb, sb->sb_so, sb->sb_cc, - sb->sb_mbcnt, sb->sb_mb, sb->sb_mbtail, sb->sb_lastrecord); + sb->sb_mbcnt, sb->sb_mb, sb->sb_mbtail, + sb->sb_lastrecord); /* NOTREACHED */ } } +static void +sbflush_priq(struct msg_priq *priq) +{ + struct mbuf *m; + m = priq->msgq_head; + if (m != NULL) + m_freem_list(m); + priq->msgq_head = priq->msgq_tail = priq->msgq_lastmsg = NULL; + priq->msgq_bytes = priq->msgq_flags = 0; +} + /* * Free all mbufs in a sockbuf. * Check that all resources are reclaimed. @@ -1315,10 +1629,49 @@ sb_empty_assert(struct sockbuf *sb, const char *where) void sbflush(struct sockbuf *sb) { - if (sb->sb_so == NULL) - panic("sbflush sb->sb_so already null sb=%p\n", sb); - (void) sblock(sb, M_WAIT); - while (sb->sb_mbcnt) { + void *lr_saved = __builtin_return_address(0); + struct socket *so = sb->sb_so; +#ifdef notyet + lck_mtx_t *mutex_held; +#endif + u_int32_t i; + + /* so_usecount may be 0 if we get here from sofreelastref() */ + if (so == NULL) { + panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n", + __func__, sb, sb->sb_flags, lr_saved); + /* NOTREACHED */ + } else if (so->so_usecount < 0) { + panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p " + "lrh= %s\n", __func__, sb, sb->sb_flags, so, + so->so_usecount, lr_saved, solockhistory_nr(so)); + /* NOTREACHED */ + } +#ifdef notyet + /* + * XXX: This code is currently commented out, because we may get here + * as part of sofreelastref(), and at that time, pr_getlock() may no + * longer be able to return us the lock; this will be fixed in future. + */ + if (so->so_proto->pr_getlock != NULL) + mutex_held = (*so->so_proto->pr_getlock)(so, 0); + else + mutex_held = so->so_proto->pr_domain->dom_mtx; + + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); +#endif + + /* + * Obtain lock on the socket buffer (SB_LOCK). This is required + * to prevent the socket buffer from being unexpectedly altered + * while it is used by another thread in socket send/receive. + * + * sblock() must not fail here, hence the assertion. + */ + (void) sblock(sb, SBL_WAIT | SBL_NOINTR | SBL_IGNDEFUNCT); + VERIFY(sb->sb_flags & SB_LOCK); + + while (sb->sb_mbcnt > 0) { /* * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty: * we would loop forever. Panic instead. @@ -1327,10 +1680,20 @@ sbflush(struct sockbuf *sb) break; sbdrop(sb, (int)sb->sb_cc); } + + if (!(sb->sb_flags & SB_RECV) && (so->so_flags & SOF_ENABLE_MSGS)) { + VERIFY(so->so_msg_state != NULL); + for (i = MSG_PRI_MIN; i <= MSG_PRI_MAX; ++i) { + sbflush_priq(&so->so_msg_state->msg_priq[i]); + } + so->so_msg_state->msg_serial_bytes = 0; + so->so_msg_state->msg_uno_bytes = 0; + } + sb_empty_assert(sb, __func__); postevent(0, sb, EV_RWBYTES); - sbunlock(sb, 1); /* keep socket locked */ + sbunlock(sb, TRUE); /* keep socket locked */ } /* @@ -1350,9 +1713,18 @@ sbdrop(struct sockbuf *sb, int len) struct mbuf *m, *free_list, *ml; struct mbuf *next, *last; + next = (m = sb->sb_mb) ? m->m_nextpkt : 0; +#if MPTCP + if ((m != NULL) && (len > 0) && + (!(sb->sb_flags & SB_RECV)) && + ((sb->sb_so->so_flags & SOF_MP_SUBFLOW) || + ((SOCK_CHECK_DOM(sb->sb_so, PF_MULTIPATH)) && + (SOCK_CHECK_PROTO(sb->sb_so, IPPROTO_TCP))))) { + mptcp_preproc_sbdrop(m, (unsigned int)len); + } +#endif /* MPTCP */ KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0); - next = (m = sb->sb_mb) ? m->m_nextpkt : 0; free_list = last = m; ml = (struct mbuf *)0; @@ -1375,6 +1747,11 @@ sbdrop(struct sockbuf *sb, int len) */ sb->sb_cc = 0; sb->sb_mbcnt = 0; + if (!(sb->sb_flags & SB_RECV) && + (sb->sb_so->so_flags & SOF_ENABLE_MSGS)) { + sb->sb_so->so_msg_state-> + msg_serial_bytes = 0; + } break; } m = last = next; @@ -1386,7 +1763,7 @@ sbdrop(struct sockbuf *sb, int len) m->m_data += len; sb->sb_cc -= len; if (m->m_type != MT_DATA && m->m_type != MT_HEADER && - m->m_type != MT_OOBDATA) + m->m_type != MT_OOBDATA) sb->sb_ctl -= len; break; } @@ -1479,21 +1856,21 @@ sbcreatecontrol(caddr_t p, int size, int type, int level) return (m); } -struct mbuf** -sbcreatecontrol_mbuf(caddr_t p, int size, int type, int level, struct mbuf** mp) +struct mbuf ** +sbcreatecontrol_mbuf(caddr_t p, int size, int type, int level, struct mbuf **mp) { - struct mbuf* m; + struct mbuf *m; struct cmsghdr *cp; - if (*mp == NULL){ + if (*mp == NULL) { *mp = sbcreatecontrol(p, size, type, level); - return mp; + return (mp); } - if (CMSG_SPACE((u_int)size) + (*mp)->m_len > MLEN){ + if (CMSG_SPACE((u_int)size) + (*mp)->m_len > MLEN) { mp = &(*mp)->m_next; *mp = sbcreatecontrol(p, size, type, level); - return mp; + return (mp); } m = *mp; @@ -1509,7 +1886,7 @@ sbcreatecontrol_mbuf(caddr_t p, int size, int type, int level, struct mbuf** mp) cp->cmsg_level = level; cp->cmsg_type = type; - return mp; + return (mp); } @@ -1518,97 +1895,128 @@ sbcreatecontrol_mbuf(caddr_t p, int size, int type, int level, struct mbuf** mp) * supported by a protocol. Fill in as needed. */ int -pru_abort_notsupp(__unused struct socket *so) +pru_abort_notsupp(struct socket *so) +{ +#pragma unused(so) + return (EOPNOTSUPP); +} + +int +pru_accept_notsupp(struct socket *so, struct sockaddr **nam) { +#pragma unused(so, nam) return (EOPNOTSUPP); } int -pru_accept_notsupp(__unused struct socket *so, __unused struct sockaddr **nam) +pru_attach_notsupp(struct socket *so, int proto, struct proc *p) { +#pragma unused(so, proto, p) return (EOPNOTSUPP); } int -pru_attach_notsupp(__unused struct socket *so, __unused int proto, - __unused struct proc *p) +pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p) { +#pragma unused(so, nam, p) return (EOPNOTSUPP); } int -pru_bind_notsupp(__unused struct socket *so, __unused struct sockaddr *nam, - __unused struct proc *p) +pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p) { +#pragma unused(so, nam, p) return (EOPNOTSUPP); } int -pru_connect_notsupp(__unused struct socket *so, __unused struct sockaddr *nam, - __unused struct proc *p) +pru_connect2_notsupp(struct socket *so1, struct socket *so2) { +#pragma unused(so1, so2) return (EOPNOTSUPP); } int -pru_connect2_notsupp(__unused struct socket *so1, __unused struct socket *so2) +pru_connectx_notsupp(struct socket *so, struct sockaddr_list **src_sl, + struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, + associd_t aid, connid_t *pcid, uint32_t flags, void *arg, + uint32_t arglen) { +#pragma unused(so, src_sl, dst_sl, p, ifscope, aid, pcid, flags, arg, arglen) return (EOPNOTSUPP); } int -pru_control_notsupp(__unused struct socket *so, __unused u_long cmd, - __unused caddr_t data, __unused struct ifnet *ifp, __unused struct proc *p) +pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data, + struct ifnet *ifp, struct proc *p) { +#pragma unused(so, cmd, data, ifp, p) return (EOPNOTSUPP); } int -pru_detach_notsupp(__unused struct socket *so) +pru_detach_notsupp(struct socket *so) { +#pragma unused(so) return (EOPNOTSUPP); } int -pru_disconnect_notsupp(__unused struct socket *so) +pru_disconnect_notsupp(struct socket *so) { +#pragma unused(so) return (EOPNOTSUPP); } int -pru_listen_notsupp(__unused struct socket *so, __unused struct proc *p) +pru_disconnectx_notsupp(struct socket *so, associd_t aid, connid_t cid) { +#pragma unused(so, aid, cid) return (EOPNOTSUPP); } int -pru_peeraddr_notsupp(__unused struct socket *so, __unused struct sockaddr **nam) +pru_listen_notsupp(struct socket *so, struct proc *p) { +#pragma unused(so, p) return (EOPNOTSUPP); } int -pru_rcvd_notsupp(__unused struct socket *so, __unused int flags) +pru_peeloff_notsupp(struct socket *so, associd_t aid, struct socket **psop) { +#pragma unused(so, aid, psop) return (EOPNOTSUPP); } int -pru_rcvoob_notsupp(__unused struct socket *so, __unused struct mbuf *m, - __unused int flags) +pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam) { +#pragma unused(so, nam) return (EOPNOTSUPP); } int -pru_send_notsupp(__unused struct socket *so, __unused int flags, - __unused struct mbuf *m, __unused struct sockaddr *addr, - __unused struct mbuf *control, __unused struct proc *p) +pru_rcvd_notsupp(struct socket *so, int flags) +{ +#pragma unused(so, flags) + return (EOPNOTSUPP); +} +int +pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags) { +#pragma unused(so, m, flags) return (EOPNOTSUPP); } +int +pru_send_notsupp(struct socket *so, int flags, struct mbuf *m, + struct sockaddr *addr, struct mbuf *control, struct proc *p) +{ +#pragma unused(so, flags, m, addr, control, p) + return (EOPNOTSUPP); +} /* * This isn't really a ``null'' operation, but it's the default one @@ -1634,44 +2042,83 @@ pru_sense_null(struct socket *so, void *ub, int isstat64) int -pru_sosend_notsupp(__unused struct socket *so, __unused struct sockaddr *addr, - __unused struct uio *uio, __unused struct mbuf *top, - __unused struct mbuf *control, __unused int flags) - +pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio, + struct mbuf *top, struct mbuf *control, int flags) { +#pragma unused(so, addr, uio, top, control, flags) return (EOPNOTSUPP); } int -pru_soreceive_notsupp(__unused struct socket *so, - __unused struct sockaddr **paddr, - __unused struct uio *uio, __unused struct mbuf **mp0, - __unused struct mbuf **controlp, __unused int *flagsp) +pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr, + struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { +#pragma unused(so, paddr, uio, mp0, controlp, flagsp) return (EOPNOTSUPP); } int -pru_shutdown_notsupp(__unused struct socket *so) +pru_shutdown_notsupp(struct socket *so) { +#pragma unused(so) return (EOPNOTSUPP); } int -pru_sockaddr_notsupp(__unused struct socket *so, __unused struct sockaddr **nam) +pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam) { +#pragma unused(so, nam) return (EOPNOTSUPP); } int -pru_sopoll_notsupp(__unused struct socket *so, __unused int events, - __unused kauth_cred_t cred, __unused void *wql) +pru_sopoll_notsupp(struct socket *so, int events, kauth_cred_t cred, void *wql) { +#pragma unused(so, events, cred, wql) return (EOPNOTSUPP); } +int +pru_socheckopt_null(struct socket *so, struct sockopt *sopt) +{ +#pragma unused(so, sopt) + /* + * Allow all options for set/get by default. + */ + return (0); +} + +void +pru_sanitize(struct pr_usrreqs *pru) +{ +#define DEFAULT(foo, bar) if ((foo) == NULL) (foo) = (bar) + DEFAULT(pru->pru_abort, pru_abort_notsupp); + DEFAULT(pru->pru_accept, pru_accept_notsupp); + DEFAULT(pru->pru_attach, pru_attach_notsupp); + DEFAULT(pru->pru_bind, pru_bind_notsupp); + DEFAULT(pru->pru_connect, pru_connect_notsupp); + DEFAULT(pru->pru_connect2, pru_connect2_notsupp); + DEFAULT(pru->pru_connectx, pru_connectx_notsupp); + DEFAULT(pru->pru_control, pru_control_notsupp); + DEFAULT(pru->pru_detach, pru_detach_notsupp); + DEFAULT(pru->pru_disconnect, pru_disconnect_notsupp); + DEFAULT(pru->pru_disconnectx, pru_disconnectx_notsupp); + DEFAULT(pru->pru_listen, pru_listen_notsupp); + DEFAULT(pru->pru_peeloff, pru_peeloff_notsupp); + DEFAULT(pru->pru_peeraddr, pru_peeraddr_notsupp); + DEFAULT(pru->pru_rcvd, pru_rcvd_notsupp); + DEFAULT(pru->pru_rcvoob, pru_rcvoob_notsupp); + DEFAULT(pru->pru_send, pru_send_notsupp); + DEFAULT(pru->pru_sense, pru_sense_null); + DEFAULT(pru->pru_shutdown, pru_shutdown_notsupp); + DEFAULT(pru->pru_sockaddr, pru_sockaddr_notsupp); + DEFAULT(pru->pru_sopoll, pru_sopoll_notsupp); + DEFAULT(pru->pru_soreceive, pru_soreceive_notsupp); + DEFAULT(pru->pru_sosend, pru_sosend_notsupp); + DEFAULT(pru->pru_socheckopt, pru_socheckopt_null); +#undef DEFAULT +} -#ifdef __APPLE__ /* * The following are macros on BSD and functions on Darwin */ @@ -1683,26 +2130,46 @@ pru_sopoll_notsupp(__unused struct socket *so, __unused int events, int sb_notify(struct sockbuf *sb) { - return ((sb->sb_flags & - (SB_WAIT|SB_SEL|SB_ASYNC|SB_UPCALL|SB_KNOTE)) != 0); + return (sb->sb_waiters > 0 || + (sb->sb_flags & (SB_SEL|SB_ASYNC|SB_UPCALL|SB_KNOTE))); } /* * How much space is there in a socket buffer (so->so_snd or so->so_rcv)? * This is problematical if the fields are unsigned, as the space might * still be negative (cc > hiwat or mbcnt > mbmax). Should detect - * overflow and return 0. + * overflow and return 0. */ int sbspace(struct sockbuf *sb) { - int space = - imin((int)(sb->sb_hiwat - sb->sb_cc), - (int)(sb->sb_mbmax - sb->sb_mbcnt)); + int space = imin((int)(sb->sb_hiwat - sb->sb_cc), + (int)(sb->sb_mbmax - sb->sb_mbcnt)); if (space < 0) space = 0; - return space; + return (space); +} + +/* + * If this socket has priority queues, check if there is enough + * space in the priority queue for this msg. + */ +int +msgq_sbspace(struct socket *so, struct mbuf *control) +{ + int space = 0, error; + u_int32_t msgpri; + VERIFY(so->so_type == SOCK_STREAM && SOCK_PROTO(so) == IPPROTO_TCP && + control != NULL); + error = tcp_get_msg_priority(control, &msgpri); + if (error) + return (0); + space = (so->so_snd.sb_idealsize / MSG_PRI_COUNT) - + so->so_msg_state->msg_priq[msgpri].msgq_bytes; + if (space < 0) + space = 0; + return (space); } /* do we have to send all at once on a socket? */ @@ -1740,15 +2207,15 @@ void sballoc(struct sockbuf *sb, struct mbuf *m) { u_int32_t cnt = 1; - sb->sb_cc += m->m_len; - if (m->m_type != MT_DATA && m->m_type != MT_HEADER && - m->m_type != MT_OOBDATA) + sb->sb_cc += m->m_len; + if (m->m_type != MT_DATA && m->m_type != MT_HEADER && + m->m_type != MT_OOBDATA) sb->sb_ctl += m->m_len; - sb->sb_mbcnt += MSIZE; - + sb->sb_mbcnt += MSIZE; + if (m->m_flags & M_EXT) { - sb->sb_mbcnt += m->m_ext.ext_size; - cnt += (m->m_ext.ext_size >> MSIZESHIFT) ; + sb->sb_mbcnt += m->m_ext.ext_size; + cnt += (m->m_ext.ext_size >> MSIZESHIFT); } OSAddAtomic(cnt, &total_sbmb_cnt); VERIFY(total_sbmb_cnt > 0); @@ -1761,13 +2228,13 @@ sbfree(struct sockbuf *sb, struct mbuf *m) int cnt = -1; sb->sb_cc -= m->m_len; - if (m->m_type != MT_DATA && m->m_type != MT_HEADER && - m->m_type != MT_OOBDATA) + if (m->m_type != MT_DATA && m->m_type != MT_HEADER && + m->m_type != MT_OOBDATA) sb->sb_ctl -= m->m_len; - sb->sb_mbcnt -= MSIZE; + sb->sb_mbcnt -= MSIZE; if (m->m_flags & M_EXT) { - sb->sb_mbcnt -= m->m_ext.ext_size; - cnt -= (m->m_ext.ext_size >> MSIZESHIFT) ; + sb->sb_mbcnt -= m->m_ext.ext_size; + cnt -= (m->m_ext.ext_size >> MSIZESHIFT); } OSAddAtomic(cnt, &total_sbmb_cnt); VERIFY(total_sbmb_cnt >= 0); @@ -1777,47 +2244,120 @@ sbfree(struct sockbuf *sb, struct mbuf *m) * Set lock on sockbuf sb; sleep if lock is already held. * Unless SB_NOINTR is set on sockbuf, sleep is interruptible. * Returns error without lock if sleep is interrupted. - * - * Returns: 0 Success - * EWOULDBLOCK - * sb_lock:EINTR */ int -sblock(struct sockbuf *sb, int wf) +sblock(struct sockbuf *sb, uint32_t flags) { + boolean_t nointr = ((sb->sb_flags & SB_NOINTR) || (flags & SBL_NOINTR)); + void *lr_saved = __builtin_return_address(0); + struct socket *so = sb->sb_so; + void * wchan; int error = 0; - if (sb->sb_flags & SB_LOCK) - error = (wf == M_WAIT) ? sb_lock(sb) : EWOULDBLOCK; - else - sb->sb_flags |= SB_LOCK; + VERIFY((flags & SBL_VALID) == flags); - return (error); + /* so_usecount may be 0 if we get here from sofreelastref() */ + if (so == NULL) { + panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n", + __func__, sb, sb->sb_flags, lr_saved); + /* NOTREACHED */ + } else if (so->so_usecount < 0) { + panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p " + "lrh= %s\n", __func__, sb, sb->sb_flags, so, + so->so_usecount, lr_saved, solockhistory_nr(so)); + /* NOTREACHED */ + } + + if ((sb->sb_flags & SB_LOCK) && !(flags & SBL_WAIT)) + return (EWOULDBLOCK); + + /* + * We may get here from sorflush(), in which case "sb" may not + * point to the real socket buffer. Use the actual socket buffer + * address from the socket instead. + */ + wchan = (sb->sb_flags & SB_RECV) ? + &so->so_rcv.sb_flags : &so->so_snd.sb_flags; + + while (sb->sb_flags & SB_LOCK) { + lck_mtx_t *mutex_held; + + /* + * XXX: This code should be moved up above outside of this loop; + * however, we may get here as part of sofreelastref(), and + * at that time pr_getlock() may no longer be able to return + * us the lock. This will be fixed in future. + */ + if (so->so_proto->pr_getlock != NULL) + mutex_held = (*so->so_proto->pr_getlock)(so, 0); + else + mutex_held = so->so_proto->pr_domain->dom_mtx; + + lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); + + sb->sb_wantlock++; + VERIFY(sb->sb_wantlock != 0); + + error = msleep(wchan, mutex_held, + nointr ? PSOCK : PSOCK | PCATCH, + nointr ? "sb_lock_nointr" : "sb_lock", NULL); + + VERIFY(sb->sb_wantlock != 0); + sb->sb_wantlock--; + + if (error == 0 && (so->so_flags & SOF_DEFUNCT) && + !(flags & SBL_IGNDEFUNCT)) { + error = EBADF; + SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] " + "(%d)\n", __func__, proc_selfpid(), + (uint64_t)VM_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so), error)); + } + + if (error != 0) + return (error); + } + sb->sb_flags |= SB_LOCK; + return (0); } -/* release lock on sockbuf sb */ +/* + * Release lock on sockbuf sb + */ void -sbunlock(struct sockbuf *sb, int keeplocked) +sbunlock(struct sockbuf *sb, boolean_t keeplocked) { + void *lr_saved = __builtin_return_address(0); struct socket *so = sb->sb_so; - void *lr_saved; - lck_mtx_t *mutex_held; - lr_saved = __builtin_return_address(0); + /* so_usecount may be 0 if we get here from sofreelastref() */ + if (so == NULL) { + panic("%s: null so, sb=%p sb_flags=0x%x lr=%p\n", + __func__, sb, sb->sb_flags, lr_saved); + /* NOTREACHED */ + } else if (so->so_usecount < 0) { + panic("%s: sb=%p sb_flags=0x%x sb_so=%p usecount=%d lr=%p " + "lrh= %s\n", __func__, sb, sb->sb_flags, so, + so->so_usecount, lr_saved, solockhistory_nr(so)); + /* NOTREACHED */ + } + VERIFY(sb->sb_flags & SB_LOCK); sb->sb_flags &= ~SB_LOCK; - if (sb->sb_flags & SB_WANT) { - sb->sb_flags &= ~SB_WANT; - if (so->so_usecount < 0) { - panic("sbunlock: b4 wakeup so=%p ref=%d lr=%p " - "sb_flags=%x lrh= %s\n", sb->sb_so, so->so_usecount, - lr_saved, sb->sb_flags, solockhistory_nr(so)); - /* NOTREACHED */ - } - wakeup((caddr_t)&(sb)->sb_flags); + if (sb->sb_wantlock > 0) { + /* + * We may get here from sorflush(), in which case "sb" may not + * point to the real socket buffer. Use the actual socket + * buffer address from the socket instead. + */ + wakeup((sb->sb_flags & SB_RECV) ? &so->so_rcv.sb_flags : + &so->so_snd.sb_flags); } - if (keeplocked == 0) { /* unlock on exit */ + + if (!keeplocked) { /* unlock on exit */ + lck_mtx_t *mutex_held; + if (so->so_proto->pr_getlock != NULL) mutex_held = (*so->so_proto->pr_getlock)(so, 0); else @@ -1825,13 +2365,10 @@ sbunlock(struct sockbuf *sb, int keeplocked) lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); + VERIFY(so->so_usecount != 0); so->so_usecount--; - if (so->so_usecount < 0) - panic("sbunlock: unlock on exit so=%p ref=%d lr=%p " - "sb_flags=%x lrh= %s\n", so, so->so_usecount, lr_saved, - sb->sb_flags, solockhistory_nr(so)); so->unlock_lr[so->next_unlock_lr] = lr_saved; - so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; + so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX; lck_mtx_unlock(mutex_held); } } @@ -1855,9 +2392,88 @@ soevent(struct socket *so, long hint) { if (so->so_flags & SOF_KNOTE) KNOTE(&so->so_klist, hint); + + soevupcall(so, hint); + + /* Don't post an event if this a subflow socket */ + if ((hint & SO_FILT_HINT_IFDENIED) && !(so->so_flags & SOF_MP_SUBFLOW)) + soevent_ifdenied(so); } -#endif /* __APPLE__ */ +void +soevupcall(struct socket *so, u_int32_t hint) +{ + void (*so_event)(struct socket *, void *, uint32_t); + + if ((so_event = so->so_event) != NULL) { + caddr_t so_eventarg = so->so_eventarg; + + hint &= so->so_eventmask; + if (hint != 0) { + socket_unlock(so, 0); + so->so_event(so, so_eventarg, hint); + socket_lock(so, 0); + } + } +} + +static void +soevent_ifdenied(struct socket *so) +{ + struct kev_netpolicy_ifdenied ev_ifdenied; + + bzero(&ev_ifdenied, sizeof (ev_ifdenied)); + /* + * The event consumer is interested about the effective {upid,pid,uuid} + * info which can be different than the those related to the process + * that recently performed a system call on the socket, i.e. when the + * socket is delegated. + */ + if (so->so_flags & SOF_DELEGATED) { + ev_ifdenied.ev_data.eupid = so->e_upid; + ev_ifdenied.ev_data.epid = so->e_pid; + uuid_copy(ev_ifdenied.ev_data.euuid, so->e_uuid); + } else { + ev_ifdenied.ev_data.eupid = so->last_upid; + ev_ifdenied.ev_data.epid = so->last_pid; + uuid_copy(ev_ifdenied.ev_data.euuid, so->last_uuid); + } + + if (++so->so_ifdenied_notifies > 1) { + /* + * Allow for at most one kernel event to be generated per + * socket; so_ifdenied_notifies is reset upon changes in + * the UUID policy. See comments in inp_update_policy. + */ + if (net_io_policy_log) { + uuid_string_t buf; + + uuid_unparse(ev_ifdenied.ev_data.euuid, buf); + log(LOG_DEBUG, "%s[%d]: so 0x%llx [%d,%d] epid %d " + "euuid %s%s has %d redundant events supressed\n", + __func__, so->last_pid, + (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), + SOCK_TYPE(so), ev_ifdenied.ev_data.epid, buf, + ((so->so_flags & SOF_DELEGATED) ? + " [delegated]" : ""), so->so_ifdenied_notifies); + } + } else { + if (net_io_policy_log) { + uuid_string_t buf; + + uuid_unparse(ev_ifdenied.ev_data.euuid, buf); + log(LOG_DEBUG, "%s[%d]: so 0x%llx [%d,%d] epid %d " + "euuid %s%s event posted\n", __func__, + so->last_pid, (uint64_t)VM_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so), + ev_ifdenied.ev_data.epid, buf, + ((so->so_flags & SOF_DELEGATED) ? + " [delegated]" : "")); + } + netpolicy_post_msg(KEV_NETPOLICY_IFDENIED, &ev_ifdenied.ev_data, + sizeof (ev_ifdenied)); + } +} /* * Make a copy of a sockaddr in a malloced buffer of type M_SONAME. @@ -1893,8 +2509,8 @@ sotoxsocket(struct socket *so, struct xsocket *xso) xso->so_state = so->so_state; xso->so_pcb = (_XSOCKET_PTR(caddr_t))VM_KERNEL_ADDRPERM(so->so_pcb); if (so->so_proto) { - xso->xso_protocol = so->so_proto->pr_protocol; - xso->xso_family = so->so_proto->pr_domain->dom_family; + xso->xso_protocol = SOCK_PROTO(so); + xso->xso_family = SOCK_DOM(so); } else { xso->xso_protocol = xso->xso_family = 0; } @@ -1911,37 +2527,35 @@ sotoxsocket(struct socket *so, struct xsocket *xso) } -#if !CONFIG_EMBEDDED void sotoxsocket64(struct socket *so, struct xsocket64 *xso) { - xso->xso_len = sizeof (*xso); - xso->xso_so = (u_int64_t)VM_KERNEL_ADDRPERM(so); - xso->so_type = so->so_type; - xso->so_options = (short)(so->so_options & 0xffff); - xso->so_linger = so->so_linger; - xso->so_state = so->so_state; - xso->so_pcb = (u_int64_t)VM_KERNEL_ADDRPERM(so->so_pcb); - if (so->so_proto) { - xso->xso_protocol = so->so_proto->pr_protocol; - xso->xso_family = so->so_proto->pr_domain->dom_family; - } else { - xso->xso_protocol = xso->xso_family = 0; - } - xso->so_qlen = so->so_qlen; - xso->so_incqlen = so->so_incqlen; - xso->so_qlimit = so->so_qlimit; - xso->so_timeo = so->so_timeo; - xso->so_error = so->so_error; - xso->so_pgid = so->so_pgid; - xso->so_oobmark = so->so_oobmark; - sbtoxsockbuf(&so->so_snd, &xso->so_snd); - sbtoxsockbuf(&so->so_rcv, &xso->so_rcv); - xso->so_uid = kauth_cred_getuid(so->so_cred); -} - -#endif /* !CONFIG_EMBEDDED */ + xso->xso_len = sizeof (*xso); + xso->xso_so = (u_int64_t)VM_KERNEL_ADDRPERM(so); + xso->so_type = so->so_type; + xso->so_options = (short)(so->so_options & 0xffff); + xso->so_linger = so->so_linger; + xso->so_state = so->so_state; + xso->so_pcb = (u_int64_t)VM_KERNEL_ADDRPERM(so->so_pcb); + if (so->so_proto) { + xso->xso_protocol = SOCK_PROTO(so); + xso->xso_family = SOCK_DOM(so); + } else { + xso->xso_protocol = xso->xso_family = 0; + } + xso->so_qlen = so->so_qlen; + xso->so_incqlen = so->so_incqlen; + xso->so_qlimit = so->so_qlimit; + xso->so_timeo = so->so_timeo; + xso->so_error = so->so_error; + xso->so_pgid = so->so_pgid; + xso->so_oobmark = so->so_oobmark; + sbtoxsockbuf(&so->so_snd, &xso->so_snd); + sbtoxsockbuf(&so->so_rcv, &xso->so_rcv); + xso->so_uid = kauth_cred_getuid(so->so_cred); +} + /* * This does the same for sockbufs. Note that the xsockbuf structure, @@ -1972,47 +2586,51 @@ int soisthrottled(struct socket *so) { /* - * On non-embedded, we rely on implicit throttling by the application, - * as we're missing the system-wide "decision maker". + * On non-embedded, we rely on implicit throttling by the + * application, as we're missing the system wide "decision maker" */ return ( -#if CONFIG_EMBEDDED - net_io_policy_throttled && -#endif /* CONFIG_EMBEDDED */ - (so->so_traffic_mgt_flags & TRAFFIC_MGT_SO_BACKGROUND)); + (so->so_traffic_mgt_flags & TRAFFIC_MGT_SO_BACKGROUND)); } int soisprivilegedtraffic(struct socket *so) { - return (so->so_flags & SOF_PRIVILEGED_TRAFFIC_CLASS); + return ((so->so_flags & SOF_PRIVILEGED_TRAFFIC_CLASS) ? 1 : 0); +} + +int +soissrcbackground(struct socket *so) +{ + return ((so->so_traffic_mgt_flags & TRAFFIC_MGT_SO_BACKGROUND) || + IS_SO_TC_BACKGROUND(so->so_traffic_class)); } /* * Here is the definition of some of the basic objects in the kern.ipc * branch of the MIB. */ -SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW|CTLFLAG_LOCKED|CTLFLAG_ANYBODY, 0, "IPC"); +SYSCTL_NODE(_kern, KERN_IPC, ipc, + CTLFLAG_RW|CTLFLAG_LOCKED|CTLFLAG_ANYBODY, 0, "IPC"); /* Check that the maximum socket buffer size is within a range */ static int -sysctl_sb_max(__unused struct sysctl_oid *oidp, __unused void *arg1, - __unused int arg2, struct sysctl_req *req) +sysctl_sb_max SYSCTL_HANDLER_ARGS { +#pragma unused(oidp, arg1, arg2) u_int32_t new_value; int changed = 0; - int error = sysctl_io_number(req, sb_max, sizeof(u_int32_t), &new_value, - &changed); + int error = sysctl_io_number(req, sb_max, sizeof (u_int32_t), + &new_value, &changed); if (!error && changed) { - if (new_value > LOW_SB_MAX && - new_value <= high_sb_max ) { + if (new_value > LOW_SB_MAX && new_value <= high_sb_max) { sb_max = new_value; } else { error = ERANGE; } } - return error; + return (error); } static int @@ -2036,24 +2654,43 @@ sysctl_io_policy_throttled SYSCTL_HANDLER_ARGS return (err); } -SYSCTL_PROC(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, - &sb_max, 0, &sysctl_sb_max, "IU", "Maximum socket buffer size"); +SYSCTL_PROC(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + &sb_max, 0, &sysctl_sb_max, "IU", "Maximum socket buffer size"); + +SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, + CTLFLAG_RD | CTLFLAG_LOCKED, &maxsockets, 0, + "Maximum number of sockets avaliable"); + +SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, + CTLFLAG_RW | CTLFLAG_LOCKED, &sb_efficiency, 0, ""); + +SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, + CTLFLAG_RD | CTLFLAG_LOCKED, &nmbclusters, 0, ""); + +SYSCTL_INT(_kern_ipc, OID_AUTO, njcl, + CTLFLAG_RD | CTLFLAG_LOCKED, &njcl, 0, ""); + +SYSCTL_INT(_kern_ipc, OID_AUTO, njclbytes, + CTLFLAG_RD | CTLFLAG_LOCKED, &njclbytes, 0, ""); + +SYSCTL_INT(_kern_ipc, KIPC_SOQLIMITCOMPAT, soqlimitcompat, + CTLFLAG_RW | CTLFLAG_LOCKED, &soqlimitcompat, 1, + "Enable socket queue limit compatibility"); -SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD | CTLFLAG_LOCKED, - &maxsockets, 0, "Maximum number of sockets avaliable"); -SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW | CTLFLAG_LOCKED, - &sb_efficiency, 0, ""); -SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD | CTLFLAG_LOCKED, - &nmbclusters, 0, ""); -SYSCTL_INT(_kern_ipc, OID_AUTO, njcl, CTLFLAG_RD | CTLFLAG_LOCKED, &njcl, 0, ""); -SYSCTL_INT(_kern_ipc, OID_AUTO, njclbytes, CTLFLAG_RD | CTLFLAG_LOCKED, &njclbytes, 0, ""); -SYSCTL_INT(_kern_ipc, KIPC_SOQLIMITCOMPAT, soqlimitcompat, CTLFLAG_RW | CTLFLAG_LOCKED, - &soqlimitcompat, 1, "Enable socket queue limit compatibility"); SYSCTL_INT(_kern_ipc, OID_AUTO, soqlencomp, CTLFLAG_RW | CTLFLAG_LOCKED, - &soqlencomp, 0, "Listen backlog represents only complete queue"); + &soqlencomp, 0, "Listen backlog represents only complete queue"); SYSCTL_NODE(_kern_ipc, OID_AUTO, io_policy, CTLFLAG_RW, 0, "network IO policy"); SYSCTL_PROC(_kern_ipc_io_policy, OID_AUTO, throttled, - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &net_io_policy_throttled, 0, - sysctl_io_policy_throttled, "I", ""); + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &net_io_policy_throttled, 0, + sysctl_io_policy_throttled, "I", ""); + +SYSCTL_INT(_kern_ipc_io_policy, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED, + &net_io_policy_log, 0, ""); + +#if CONFIG_PROC_UUID_POLICY +SYSCTL_INT(_kern_ipc_io_policy, OID_AUTO, uuid, CTLFLAG_RW | CTLFLAG_LOCKED, + &net_io_policy_uuid, 0, ""); +#endif /* CONFIG_PROC_UUID_POLICY */ diff --git a/bsd/kern/uipc_syscalls.c b/bsd/kern/uipc_syscalls.c index bd2bd5dd3..09f0e4f4a 100644 --- a/bsd/kern/uipc_syscalls.c +++ b/bsd/kern/uipc_syscalls.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -76,6 +76,7 @@ #include #include #include +#include #include #include #include @@ -87,6 +88,7 @@ #include #include #include +#include #include @@ -101,7 +103,7 @@ #endif /* MAC_SOCKET_SUBSET */ #define f_flag f_fglob->fg_flag -#define f_type f_fglob->fg_type +#define f_type f_fglob->fg_ops->fo_type #define f_msgcount f_fglob->fg_msgcount #define f_cred f_fglob->fg_cred #define f_ops f_fglob->fg_ops @@ -125,11 +127,6 @@ #define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3)) -#define HACK_FOR_4056224 1 -#if HACK_FOR_4056224 -static pid_t last_pid_4056224 = 0; -#endif /* HACK_FOR_4056224 */ - /* TODO: should be in header file */ int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int); @@ -137,20 +134,30 @@ static int sendit(struct proc *, int, struct user_msghdr *, uio_t, int, int32_t *); static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t, int32_t *); +static int connectit(struct socket *, struct sockaddr *); static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t, size_t, boolean_t); static int getsockaddr_s(struct socket *, struct sockaddr_storage *, user_addr_t, size_t, boolean_t); +static int getsockaddrlist(struct socket *, struct sockaddr_list **, + user_addr_t, socklen_t, boolean_t); #if SENDFILE static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **, boolean_t); #endif /* SENDFILE */ +static int connectx_nocancel(struct proc *, struct connectx_args *, int *); +static int connectitx(struct socket *, struct sockaddr_list **, + struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *); +static int peeloff_nocancel(struct proc *, struct peeloff_args *, int *); +static int disconnectx_nocancel(struct proc *, struct disconnectx_args *, + int *); +static int socket_common(struct proc *, int, int, int, pid_t, int32_t *, int); /* * System call interface to the socket abstraction. */ -extern struct fileops socketops; +extern const struct fileops socketops; /* * Returns: 0 Success @@ -163,46 +170,68 @@ extern struct fileops socketops; * socreate:EPROTONOSUPPORT * socreate:ENOBUFS * socreate:ENOMEM - * socreate:EISCONN * socreate:??? [other protocol families, IPSEC] */ int -socket(struct proc *p, struct socket_args *uap, int32_t *retval) +socket(struct proc *p, + struct socket_args *uap, + int32_t *retval) +{ + return (socket_common(p, uap->domain, uap->type, uap->protocol, + proc_selfpid(), retval, 0)); +} + +int +socket_delegate(struct proc *p, + struct socket_delegate_args *uap, + int32_t *retval) +{ + return socket_common(p, uap->domain, uap->type, uap->protocol, + uap->epid, retval, 1); +} + +static int +socket_common(struct proc *p, + int domain, + int type, + int protocol, + pid_t epid, + int32_t *retval, + int delegate) { struct socket *so; struct fileproc *fp; int fd, error; - AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol); + AUDIT_ARG(socket, domain, type, protocol); #if CONFIG_MACF_SOCKET_SUBSET - if ((error = mac_socket_check_create(kauth_cred_get(), uap->domain, - uap->type, uap->protocol)) != 0) + if ((error = mac_socket_check_create(kauth_cred_get(), domain, + type, protocol)) != 0) return (error); #endif /* MAC_SOCKET_SUBSET */ + if (delegate) { + error = priv_check_cred(kauth_cred_get(), + PRIV_NET_PRIVILEGED_SOCKET_DELEGATE, 0); + if (error) + return (EACCES); + } + error = falloc(p, &fp, &fd, vfs_context_current()); if (error) { return (error); } fp->f_flag = FREAD|FWRITE; - fp->f_type = DTYPE_SOCKET; fp->f_ops = &socketops; - error = socreate(uap->domain, &so, uap->type, uap->protocol); + if (delegate) + error = socreate_delegate(domain, &so, type, protocol, epid); + else + error = socreate(domain, &so, type, protocol); + if (error) { fp_free(p, fd, fp); } else { - thread_t thread; - struct uthread *ut; - - thread = current_thread(); - ut = get_bsdthread_info(thread); - - /* if this is a backgrounded thread then throttle all new sockets */ - if (proc_get_selfthread_isbackground() != 0) { - so->so_traffic_mgt_flags |= TRAFFIC_MGT_SO_BACKGROUND; - so->so_background_thread = thread; - } fp->f_data = (caddr_t)so; proc_fdlock(p); @@ -227,7 +256,7 @@ socket(struct proc *p, struct socket_args *uap, int32_t *retval) * getsockaddr:EINVAL Invalid argument * getsockaddr:ENOMEM Not enough space * getsockaddr:EFAULT Bad address - * sobind:??? + * sobindlock:??? */ /* ARGSUSED */ int @@ -265,9 +294,9 @@ bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval) AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa); #if CONFIG_MACF_SOCKET_SUBSET if ((error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0) - error = sobind(so, sa); + error = sobindlock(so, sa, 1); /* will lock socket */ #else - error = sobind(so, sa); + error = sobindlock(so, sa, 1); /* will lock socket */ #endif /* MAC_SOCKET_SUBSET */ if (want_free) FREE(sa, M_SONAME); @@ -447,8 +476,10 @@ accept_nocancel(struct proc *p, struct accept_nocancel_args *uap, * the file descriptor should the MAC check fails. */ if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) { + socket_lock(so, 1); so->so_state &= ~(SS_NOFDREF | SS_COMP); so->so_head = NULL; + socket_unlock(so, 1); soclose(so); /* Drop reference on listening socket */ sodereference(head); @@ -470,21 +501,22 @@ accept_nocancel(struct proc *p, struct accept_nocancel_args *uap, fflag = fp->f_flag; error = falloc(p, &fp, &newfd, vfs_context_current()); if (error) { - /* + /* * Probably ran out of file descriptors. * * * Don't put this back on the socket like we used to, that * just causes the client to spin. Drop the socket. */ + socket_lock(so, 1); so->so_state &= ~(SS_NOFDREF | SS_COMP); so->so_head = NULL; + socket_unlock(so, 1); soclose(so); sodereference(head); goto out; } *retval = newfd; - fp->f_type = DTYPE_SOCKET; fp->f_flag = fflag; fp->f_ops = &socketops; fp->f_data = (caddr_t)so; @@ -581,13 +613,12 @@ connect(struct proc *p, struct connect_args *uap, int32_t *retval) } int -connect_nocancel(__unused proc_t p, struct connect_nocancel_args *uap, __unused int32_t *retval) +connect_nocancel(proc_t p, struct connect_nocancel_args *uap, int32_t *retval) { +#pragma unused(p, retval) struct socket *so; struct sockaddr_storage ss; struct sockaddr *sa = NULL; - lck_mtx_t *mutex_held; - boolean_t want_free = TRUE; int error; int fd = uap->s; boolean_t dgram; @@ -612,65 +643,308 @@ connect_nocancel(__unused proc_t p, struct connect_nocancel_args *uap, __unused error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram); } else { error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram); - if (error == 0) { + if (error == 0) sa = (struct sockaddr *)&ss; - want_free = FALSE; - } } if (error != 0) goto out; + error = connectit(so, sa); + + if (sa != NULL && sa != SA(&ss)) + FREE(sa, M_SONAME); + if (error == ERESTART) + error = EINTR; +out: + file_drop(fd); + return (error); +} + +static int +connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval) +{ +#pragma unused(p, retval) + struct sockaddr_list *src_sl = NULL, *dst_sl = NULL; + struct socket *so; + int error, fd = uap->s; + boolean_t dgram; + connid_t cid = CONNID_ANY; + + AUDIT_ARG(fd, uap->s); + error = file_socket(fd, &so); + if (error != 0) + return (error); + if (so == NULL) { + error = EBADF; + goto out; + } + + /* + * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET + * if this is a datagram socket; translate for other types. + */ + dgram = (so->so_type == SOCK_DGRAM); + + /* + * Get socket address(es) now before we obtain socket lock; use + * sockaddr_list for src address for convenience, if present, + * even though it won't hold more than one. + */ + if (uap->src != USER_ADDR_NULL && (error = getsockaddrlist(so, + &src_sl, uap->src, uap->srclen, dgram)) != 0) + goto out; + + error = getsockaddrlist(so, &dst_sl, uap->dsts, uap->dstlen, dgram); + if (error != 0) + goto out; + + VERIFY(dst_sl != NULL && + !TAILQ_EMPTY(&dst_sl->sl_head) && dst_sl->sl_cnt > 0); + + error = connectitx(so, &src_sl, &dst_sl, p, uap->ifscope, + uap->aid, &cid); + if (error == ERESTART) + error = EINTR; + + if (uap->cid != USER_ADDR_NULL) + (void) copyout(&cid, uap->cid, sizeof (cid)); + +out: + file_drop(fd); + if (src_sl != NULL) + sockaddrlist_free(src_sl); + if (dst_sl != NULL) + sockaddrlist_free(dst_sl); + return (error); +} + +int +connectx(struct proc *p, struct connectx_args *uap, int *retval) +{ + /* + * Due to similiarity with a POSIX interface, define as + * an unofficial cancellation point. + */ + __pthread_testcancel(1); + return (connectx_nocancel(p, uap, retval)); +} + +static int +connectit(struct socket *so, struct sockaddr *sa) +{ + int error; + AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa); #if CONFIG_MACF_SOCKET_SUBSET - if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) { - if (want_free) - FREE(sa, M_SONAME); + if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) + return (error); +#endif /* MAC_SOCKET_SUBSET */ + + socket_lock(so, 1); + if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { + error = EALREADY; + goto out; + } + error = soconnectlock(so, sa, 0); + if (error != 0) { + so->so_state &= ~SS_ISCONNECTING; goto out; } + if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { + error = EINPROGRESS; + goto out; + } + while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { + lck_mtx_t *mutex_held; + + if (so->so_proto->pr_getlock != NULL) + mutex_held = (*so->so_proto->pr_getlock)(so, 0); + else + mutex_held = so->so_proto->pr_domain->dom_mtx; + error = msleep((caddr_t)&so->so_timeo, mutex_held, + PSOCK | PCATCH, __func__, 0); + if (so->so_state & SS_DRAINING) { + error = ECONNABORTED; + } + if (error != 0) + break; + } + if (error == 0) { + error = so->so_error; + so->so_error = 0; + } +out: + socket_unlock(so, 1); + return (error); +} + +static int +connectitx(struct socket *so, struct sockaddr_list **src_sl, + struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, + associd_t aid, connid_t *pcid) +{ + struct sockaddr_entry *se; + int error; + + VERIFY(dst_sl != NULL && *dst_sl != NULL); + + TAILQ_FOREACH(se, &(*dst_sl)->sl_head, se_link) { + VERIFY(se->se_addr != NULL); + AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), + se->se_addr); +#if CONFIG_MACF_SOCKET_SUBSET + if ((error = mac_socket_check_connect(kauth_cred_get(), + so, se->se_addr)) != 0) + return (error); #endif /* MAC_SOCKET_SUBSET */ - socket_lock(so, 1); + } + socket_lock(so, 1); if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { - if (want_free) - FREE(sa, M_SONAME); - socket_unlock(so, 1); error = EALREADY; goto out; } - error = soconnectlock(so, sa, 0); - if (error) - goto bad; + error = soconnectxlocked(so, src_sl, dst_sl, p, ifscope, + aid, pcid, 0, NULL, 0); + if (error != 0) { + so->so_state &= ~SS_ISCONNECTING; + goto out; + } if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { - if (want_free) - FREE(sa, M_SONAME); - socket_unlock(so, 1); error = EINPROGRESS; goto out; } while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { + lck_mtx_t *mutex_held; + if (so->so_proto->pr_getlock != NULL) mutex_held = (*so->so_proto->pr_getlock)(so, 0); else mutex_held = so->so_proto->pr_domain->dom_mtx; error = msleep((caddr_t)&so->so_timeo, mutex_held, - PSOCK | PCATCH, "connect", 0); - if ((so->so_state & SS_DRAINING)) { + PSOCK | PCATCH, __func__, 0); + if (so->so_state & SS_DRAINING) { error = ECONNABORTED; } - if (error) + if (error != 0) break; } if (error == 0) { error = so->so_error; so->so_error = 0; } -bad: - so->so_state &= ~SS_ISCONNECTING; +out: socket_unlock(so, 1); - if (want_free) - FREE(sa, M_SONAME); - if (error == ERESTART) - error = EINTR; + return (error); +} + +int +peeloff(struct proc *p, struct peeloff_args *uap, int *retval) +{ + /* + * Due to similiarity with a POSIX interface, define as + * an unofficial cancellation point. + */ + __pthread_testcancel(1); + return (peeloff_nocancel(p, uap, retval)); +} + +static int +peeloff_nocancel(struct proc *p, struct peeloff_args *uap, int *retval) +{ + struct fileproc *fp; + struct socket *mp_so, *so = NULL; + int newfd, fd = uap->s; + short fflag; /* type must match fp->f_flag */ + int error; + + *retval = -1; + + error = fp_getfsock(p, fd, &fp, &mp_so); + if (error != 0) { + if (error == EOPNOTSUPP) + error = ENOTSOCK; + goto out_nofile; + } + if (mp_so == NULL) { + error = EBADF; + goto out; + } + + socket_lock(mp_so, 1); + error = sopeelofflocked(mp_so, uap->aid, &so); + if (error != 0) { + socket_unlock(mp_so, 1); + goto out; + } + VERIFY(so != NULL); + socket_unlock(mp_so, 0); /* keep ref on mp_so for us */ + + fflag = fp->f_flag; + error = falloc(p, &fp, &newfd, vfs_context_current()); + if (error != 0) { + /* drop this socket (probably ran out of file descriptors) */ + soclose(so); + sodereference(mp_so); /* our mp_so ref */ + goto out; + } + + fp->f_flag = fflag; + fp->f_ops = &socketops; + fp->f_data = (caddr_t)so; + + /* + * If the socket has been marked as inactive by sosetdefunct(), + * disallow further operations on it. + */ + if (so->so_flags & SOF_DEFUNCT) { + sodefunct(current_proc(), so, + SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL); + } + + proc_fdlock(p); + procfdtbl_releasefd(p, newfd, NULL); + fp_drop(p, newfd, fp, 1); + proc_fdunlock(p); + + sodereference(mp_so); /* our mp_so ref */ + *retval = newfd; + +out: + file_drop(fd); + +out_nofile: + return (error); +} + +int +disconnectx(struct proc *p, struct disconnectx_args *uap, int *retval) +{ + /* + * Due to similiarity with a POSIX interface, define as + * an unofficial cancellation point. + */ + __pthread_testcancel(1); + return (disconnectx_nocancel(p, uap, retval)); +} + +static int +disconnectx_nocancel(struct proc *p, struct disconnectx_args *uap, int *retval) +{ +#pragma unused(p, retval) + struct socket *so; + int fd = uap->s; + int error; + + error = file_socket(fd, &so); + if (error != 0) + return (error); + if (so == NULL) { + error = EBADF; + goto out; + } + + error = sodisconnectx(so, uap->aid, uap->cid); out: file_drop(fd); return (error); @@ -714,7 +988,6 @@ socketpair(struct proc *p, struct socketpair_args *uap, goto free2; } fp1->f_flag = FREAD|FWRITE; - fp1->f_type = DTYPE_SOCKET; fp1->f_ops = &socketops; fp1->f_data = (caddr_t)so1; sv[0] = fd; @@ -724,7 +997,6 @@ socketpair(struct proc *p, struct socketpair_args *uap, goto free3; } fp2->f_flag = FREAD|FWRITE; - fp2->f_type = DTYPE_SOCKET; fp2->f_ops = &socketops; fp2->f_data = (caddr_t)so2; sv[1] = fd; @@ -868,8 +1140,8 @@ sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, #endif /* MAC_SOCKET_SUBSET */ len = uio_resid(uiop); - error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0, control, - flags); + error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0, + control, flags); if (error != 0) { if (uio_resid(uiop) != len && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) @@ -899,11 +1171,13 @@ int sendto(struct proc *p, struct sendto_args *uap, int32_t *retval) { __pthread_testcancel(1); - return(sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval)); + return (sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval)); } int -sendto_nocancel(struct proc *p, struct sendto_nocancel_args *uap, int32_t *retval) +sendto_nocancel(struct proc *p, + struct sendto_nocancel_args *uap, + int32_t *retval) { struct user_msghdr msg; int error; @@ -934,42 +1208,6 @@ sendto_nocancel(struct proc *p, struct sendto_nocancel_args *uap, int32_t *retva uio_free(auio); } -#if HACK_FOR_4056224 - /* - * Radar 4056224 - * Temporary workaround to let send() and recv() work over - * a pipe for binary compatibility - * This will be removed in the release following Tiger - */ - if (error == ENOTSOCK) { - struct fileproc *fp; - - if (fp_lookup(p, uap->s, &fp, 0) == 0) { - (void) fp_drop(p, uap->s, fp, 0); - - if (fp->f_type == DTYPE_PIPE) { - struct write_args write_uap; - user_ssize_t write_retval; - - if (p->p_pid > last_pid_4056224) { - last_pid_4056224 = p->p_pid; - - printf("%s[%d] uses send/recv " - "on a pipe\n", p->p_comm, p->p_pid); - } - - bzero(&write_uap, sizeof (struct write_args)); - write_uap.fd = uap->s; - write_uap.cbuf = uap->buf; - write_uap.nbyte = uap->len; - - error = write(p, &write_uap, &write_retval); - *retval = (int)write_retval; - } - } - } -#endif /* HACK_FOR_4056224 */ - KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0); return (error); @@ -985,7 +1223,7 @@ int sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval) { __pthread_testcancel(1); - return(sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap, retval)); + return (sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap, retval)); } int @@ -1066,7 +1304,10 @@ sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, int32_t *ret user_msg.msg_iov = CAST_USER_ADDR_T(iovp); /* finish setup of uio_t */ - uio_calculateresid(auio); + error = uio_calculateresid(auio); + if (error) { + goto done; + } } else { user_msg.msg_iov = 0; } @@ -1110,7 +1351,8 @@ static int recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, user_addr_t namelenp, int32_t *retval) { - int len, error; + ssize_t len; + int error; struct mbuf *m, *control = 0; user_addr_t ctlbuf; struct socket *so; @@ -1147,6 +1389,7 @@ recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, */ if (!(so->so_state & SS_DEFUNCT) && !(so->so_state & SS_ISCONNECTED) && + !(so->so_proto->pr_flags & PR_CONNREQUIRED) && (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) goto out1; #endif /* MAC_SOCKET_SUBSET */ @@ -1208,9 +1451,9 @@ recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, struct cmsghdr *cp = mtod(m, struct cmsghdr *); int cp_size = CMSG_ALIGN(cp->cmsg_len); int buflen = m->m_len; - + while (buflen > 0 && len > 0) { - + /* SCM_TIMESTAMP hack because struct timeval has a * different size for 32 bits and 64 bits processes @@ -1220,24 +1463,24 @@ recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer; int tmp_space; struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp); - + tmp_cp->cmsg_level = SOL_SOCKET; tmp_cp->cmsg_type = SCM_TIMESTAMP; - + if (proc_is64bit(p)) { struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp); - + tv64->tv_sec = tv->tv_sec; tv64->tv_usec = tv->tv_usec; - + tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval)); tmp_space = CMSG_SPACE(sizeof(struct user64_timeval)); } else { struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp); - + tv32->tv_sec = tv->tv_sec; tv32->tv_usec = tv->tv_usec; - + tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval)); tmp_space = CMSG_SPACE(sizeof(struct user32_timeval)); } @@ -1250,35 +1493,35 @@ recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, error = copyout(tmp_buffer, ctlbuf, tocopy); if (error) goto out; - + } else { - + if (cp_size > buflen) { panic("cp_size > buflen, something wrong with alignment!"); } - + if (len >= cp_size) { tocopy = cp_size; } else { mp->msg_flags |= MSG_CTRUNC; tocopy = len; } - + error = copyout((caddr_t) cp, ctlbuf, tocopy); if (error) goto out; } - - + + ctlbuf += tocopy; len -= tocopy; - + buflen -= cp_size; cp = (struct cmsghdr *)(void *)((unsigned char *) cp + cp_size); cp_size = CMSG_ALIGN(cp->cmsg_len); } - + m = m->m_next; } mp->msg_controllen = ctlbuf - mp->msg_control; @@ -1354,42 +1597,6 @@ recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap, int32_t *r uio_free(auio); } -#if HACK_FOR_4056224 - /* - * Radar 4056224 - * Temporary workaround to let send() and recv() work over - * a pipe for binary compatibility - * This will be removed in the release following Tiger - */ - if (error == ENOTSOCK && proc_is64bit(p) == 0) { - struct fileproc *fp; - - if (fp_lookup(p, uap->s, &fp, 0) == 0) { - (void) fp_drop(p, uap->s, fp, 0); - - if (fp->f_type == DTYPE_PIPE) { - struct read_args read_uap; - user_ssize_t read_retval; - - if (p->p_pid > last_pid_4056224) { - last_pid_4056224 = p->p_pid; - - printf("%s[%d] uses send/recv on " - "a pipe\n", p->p_comm, p->p_pid); - } - - bzero(&read_uap, sizeof (struct read_args)); - read_uap.fd = uap->s; - read_uap.cbuf = uap->buf; - read_uap.nbyte = uap->len; - - error = read(p, &read_uap, &read_retval); - *retval = (int)read_retval; - } - } - } -#endif /* HACK_FOR_4056224 */ - KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0); return (error); @@ -1495,7 +1702,10 @@ recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, int32_t *ret goto done; /* finish setup of uio_t */ - uio_calculateresid(auio); + error = uio_calculateresid(auio); + if (error) { + goto done; + } error = recvit(p, uap->s, &user_msg, auio, 0, retval); if (!error) { @@ -1618,7 +1828,7 @@ setsockopt(struct proc *p, struct setsockopt_args *uap, &sopt)) != 0) goto out; #endif /* MAC_SOCKET_SUBSET */ - error = sosetopt(so, &sopt); + error = sosetoptlock(so, &sopt, 1); /* will lock socket */ out: file_drop(uap->s); return (error); @@ -1674,7 +1884,7 @@ getsockopt(struct proc *p, struct getsockopt_args *uap, &sopt)) != 0) goto out; #endif /* MAC_SOCKET_SUBSET */ - error = sogetopt((struct socket *)so, &sopt); + error = sogetoptlock((struct socket *)so, &sopt, 1); /* will lock */ if (error == 0) { valsize = sopt.sopt_valsize; error = copyout((caddr_t)&valsize, uap->avalsize, @@ -1845,7 +2055,7 @@ sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type) int error; size_t alloc_buflen = (size_t)buflen; - + if(alloc_buflen > INT_MAX/2) return (EINVAL); #ifdef __LP64__ @@ -1923,7 +2133,7 @@ getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr, * handle it. */ if (translate_unspec && sa->sa_family == AF_UNSPEC && - INP_CHECK_SOCKAF(so, AF_INET) && + SOCK_CHECK_DOM(so, PF_INET) && len == sizeof (struct sockaddr_in)) sa->sa_family = AF_INET; @@ -1960,7 +2170,7 @@ getsockaddr_s(struct socket *so, struct sockaddr_storage *ss, * handle it. */ if (translate_unspec && ss->ss_family == AF_UNSPEC && - INP_CHECK_SOCKAF(so, AF_INET) && + SOCK_CHECK_DOM(so, PF_INET) && len == sizeof (struct sockaddr_in)) ss->ss_family = AF_INET; @@ -1969,21 +2179,95 @@ getsockaddr_s(struct socket *so, struct sockaddr_storage *ss, return (error); } -#if SENDFILE +/* + * Hard limit on the number of source and/or destination addresses + * that can be specified by an application. + */ +#define SOCKADDRLIST_MAX_ENTRIES 64 + +static int +getsockaddrlist(struct socket *so, struct sockaddr_list **slp, + user_addr_t uaddr, socklen_t uaddrlen, boolean_t xlate_unspec) +{ + struct sockaddr_list *sl; + int error = 0; -SYSCTL_DECL(_kern_ipc); + *slp = NULL; + + if (uaddr == USER_ADDR_NULL || uaddrlen == 0) + return (EINVAL); + + sl = sockaddrlist_alloc(M_WAITOK); + if (sl == NULL) + return (ENOMEM); + + VERIFY(sl->sl_cnt == 0); + while (uaddrlen > 0 && sl->sl_cnt < SOCKADDRLIST_MAX_ENTRIES) { + struct sockaddr_storage ss; + struct sockaddr_entry *se; + struct sockaddr *sa; + + if (uaddrlen < sizeof (struct sockaddr)) { + error = EINVAL; + break; + } + + bzero(&ss, sizeof (ss)); + error = copyin(uaddr, (caddr_t)&ss, sizeof (struct sockaddr)); + if (error != 0) + break; + + /* getsockaddr does the same but we need them now */ + if (uaddrlen < ss.ss_len || + ss.ss_len < offsetof(struct sockaddr, sa_data[0])) { + error = EINVAL; + break; + } else if (ss.ss_len > sizeof (ss)) { + /* + * sockaddr_storage size is less than SOCK_MAXADDRLEN, + * so the check here is inclusive. We could user the + * latter instead, but seems like an overkill for now. + */ + error = ENAMETOOLONG; + break; + } + + se = sockaddrentry_alloc(M_WAITOK); + if (se == NULL) + break; + + sockaddrlist_insert(sl, se); + + error = getsockaddr(so, &sa, uaddr, ss.ss_len, xlate_unspec); + if (error != 0) + break; + + VERIFY(sa != NULL && sa->sa_len == ss.ss_len); + se->se_addr = sa; + + uaddr += ss.ss_len; + VERIFY(((signed)uaddrlen - ss.ss_len) >= 0); + uaddrlen -= ss.ss_len; + } + + if (error != 0) + sockaddrlist_free(sl); + else + *slp = sl; + + return (error); +} + +#if SENDFILE #define SFUIOBUFS 64 -static int sendfileuiobufs = SFUIOBUFS; -SYSCTL_INT(_kern_ipc, OID_AUTO, sendfileuiobufs, CTLFLAG_RW | CTLFLAG_LOCKED, &sendfileuiobufs, - 0, ""); /* Macros to compute the number of mbufs needed depending on cluster size */ #define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> (PGSHIFT + 2)) + 1) #define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> PGSHIFT) + 1) -/* Upper send limit in bytes (sendfileuiobufs * PAGESIZE) */ -#define SENDFILE_MAX_BYTES (sendfileuiobufs << PGSHIFT) +/* Upper send limit in bytes (SFUIOBUFS * PAGESIZE) */ +#define SENDFILE_MAX_BYTES (SFUIOBUFS << PGSHIFT) /* Upper send limit in the number of mbuf clusters */ #define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES) @@ -2208,16 +2492,16 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) * mbufs that point to the file pages. */ socket_lock(so, 1); - error = sblock(&so->so_snd, M_WAIT); + error = sblock(&so->so_snd, SBL_WAIT); if (error) { socket_unlock(so, 1); goto done2; } for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { mbuf_t m0 = NULL, m; - unsigned int nbufs = sendfileuiobufs, i; + unsigned int nbufs = SFUIOBUFS, i; uio_t auio; - char uio_buf[UIO_SIZEOF(sendfileuiobufs)]; /* 1 KB !!! */ + char uio_buf[UIO_SIZEOF(SFUIOBUFS)]; /* 1 KB !!! */ size_t uiolen; user_ssize_t rlen; off_t pgoff; @@ -2273,7 +2557,7 @@ sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) pktlen = mbuf_pkt_maxlen(m0); if (pktlen < (size_t)xfsize) xfsize = pktlen; - + auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof (uio_buf)); if (auio == NULL) { @@ -2392,7 +2676,7 @@ retry_space: } goto retry_space; } - + struct mbuf *control = NULL; { /* @@ -2423,7 +2707,7 @@ retry_space: goto done3; } } - sbunlock(&so->so_snd, 0); /* will unlock socket */ + sbunlock(&so->so_snd, FALSE); /* will unlock socket */ /* * Send trailers. Wimp out and use writev(2). */ @@ -2454,7 +2738,7 @@ done: (unsigned int)(sbytes & 0x0ffffffff), error, 0); return (error); done3: - sbunlock(&so->so_snd, 0); /* will unlock socket */ + sbunlock(&so->so_snd, FALSE); /* will unlock socket */ goto done2; } diff --git a/bsd/kern/uipc_usrreq.c b/bsd/kern/uipc_usrreq.c index 2368c19bd..a01ac5eb2 100644 --- a/bsd/kern/uipc_usrreq.c +++ b/bsd/kern/uipc_usrreq.c @@ -73,6 +73,7 @@ #include #include /* XXX must be before */ #include +#include #include #include #include @@ -674,18 +675,32 @@ uipc_sockaddr(struct socket *so, struct sockaddr **nam) } struct pr_usrreqs uipc_usrreqs = { - uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect, - uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect, - uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp, - uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr, - sosend, soreceive, pru_sopoll_notsupp + .pru_abort = uipc_abort, + .pru_accept = uipc_accept, + .pru_attach = uipc_attach, + .pru_bind = uipc_bind, + .pru_connect = uipc_connect, + .pru_connect2 = uipc_connect2, + .pru_detach = uipc_detach, + .pru_disconnect = uipc_disconnect, + .pru_listen = uipc_listen, + .pru_peeraddr = uipc_peeraddr, + .pru_rcvd = uipc_rcvd, + .pru_send = uipc_send, + .pru_sense = uipc_sense, + .pru_shutdown = uipc_shutdown, + .pru_sockaddr = uipc_sockaddr, + .pru_sosend = sosend, + .pru_soreceive = soreceive, }; int uipc_ctloutput(struct socket *so, struct sockopt *sopt) { struct unpcb *unp = sotounpcb(so); - int error; + int error = 0; + pid_t peerpid; + struct socket *peerso; switch (sopt->sopt_dir) { case SOPT_GET: @@ -702,16 +717,41 @@ uipc_ctloutput(struct socket *so, struct sockopt *sopt) } break; case LOCAL_PEERPID: - if (unp->unp_conn != NULL) { - if (unp->unp_conn->unp_socket != NULL) { - pid_t peerpid = unp->unp_conn->unp_socket->last_pid; - error = sooptcopyout(sopt, &peerpid, sizeof (peerpid)); - } else { - panic("peer is connected but has no socket?"); - } - } else { + case LOCAL_PEEREPID: + if (unp->unp_conn == NULL) { error = ENOTCONN; + break; } + peerso = unp->unp_conn->unp_socket; + if (peerso == NULL) + panic("peer is connected but has no socket?"); + unp_get_locks_in_order(so, peerso); + if (sopt->sopt_name == LOCAL_PEEREPID && + peerso->so_flags & SOF_DELEGATED) + peerpid = peerso->e_pid; + else + peerpid = peerso->last_pid; + socket_unlock(peerso, 1); + error = sooptcopyout(sopt, &peerpid, sizeof (peerpid)); + break; + case LOCAL_PEERUUID: + case LOCAL_PEEREUUID: + if (unp->unp_conn == NULL) { + error = ENOTCONN; + break; + } + peerso = unp->unp_conn->unp_socket; + if (peerso == NULL) + panic("peer is connected but has no socket?"); + unp_get_locks_in_order(so, peerso); + if (sopt->sopt_name == LOCAL_PEEREUUID && + peerso->so_flags & SOF_DELEGATED) + error = sooptcopyout(sopt, &peerso->e_uuid, + sizeof (peerso->e_uuid)); + else + error = sooptcopyout(sopt, &peerso->last_uuid, + sizeof (peerso->last_uuid)); + socket_unlock(peerso, 1); break; default: error = EOPNOTSUPP; @@ -723,6 +763,7 @@ uipc_ctloutput(struct socket *so, struct sockopt *sopt) error = EOPNOTSUPP; break; } + return (error); } @@ -1637,7 +1678,6 @@ SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED, (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", "List of active local stream sockets"); -#if !CONFIG_EMBEDDED static int unp_pcblist64 SYSCTL_HANDLER_ARGS @@ -1784,7 +1824,6 @@ SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED, (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist64, "S,xunpcb64", "List of active local stream sockets 64 bit"); -#endif /* !CONFIG_EMBEDDED */ static void unp_shutdown(struct unpcb *unp) @@ -1865,11 +1904,9 @@ unp_externalize(struct mbuf *rights) if (fdalloc(p, 0, &f)) panic("unp_externalize:fdalloc"); fg = rp[i]; - MALLOC_ZONE(fp, struct fileproc *, sizeof (struct fileproc), - M_FILEPROC, M_WAITOK); + fp = fileproc_alloc_init(NULL); if (fp == NULL) panic("unp_externalize: MALLOC_ZONE"); - bzero(fp, sizeof (struct fileproc)); fp->f_iocount = 0; fp->f_fglob = fg; fg_removeuipc(fg); @@ -1949,9 +1986,14 @@ unp_internalize(struct mbuf *control, proc_t p) if (((error = fdgetf_noref(p, fds[i], &tmpfp)) != 0)) { proc_fdunlock(p); return (error); - } else if (!filetype_issendable(tmpfp->f_fglob->fg_type)) { + } else if (!filetype_issendable(FILEGLOB_DTYPE(tmpfp->f_fglob))) { proc_fdunlock(p); return (EINVAL); + } else if (FP_ISGUARDED(tmpfp, GUARD_SOCKET_IPC)) { + error = fp_guard_exception(p, + fds[i], tmpfp, kGUARD_EXC_SOCKET_IPC); + proc_fdunlock(p); + return (error); } } rp = (struct fileglob **)(cm + 1); @@ -2063,12 +2105,12 @@ unp_gc(void) * accessible and not already marked so. * Now check if it is possibly one of OUR sockets. */ - if (fg->fg_type != DTYPE_SOCKET || + if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET || (so = (struct socket *)fg->fg_data) == 0) { lck_mtx_unlock(&fg->fg_lock); continue; } - if (so->so_proto->pr_domain != &localdomain || + if (so->so_proto->pr_domain != localdomain || (so->so_proto->pr_flags&PR_RIGHTS) == 0) { lck_mtx_unlock(&fg->fg_lock); continue; @@ -2184,7 +2226,8 @@ unp_gc(void) tfg = *fpp; - if (tfg->fg_type == DTYPE_SOCKET && tfg->fg_data != NULL) { + if (FILEGLOB_DTYPE(tfg) == DTYPE_SOCKET && + tfg->fg_data != NULL) { so = (struct socket *)(tfg->fg_data); socket_lock(so, 0); diff --git a/bsd/kern/vm_pressure.c b/bsd/kern/vm_pressure.c index f2ae46185..dfb50afbd 100644 --- a/bsd/kern/vm_pressure.c +++ b/bsd/kern/vm_pressure.c @@ -41,8 +41,11 @@ #include #include #include +#include #include +#include + #if CONFIG_MEMORYSTATUS #include #endif @@ -53,12 +56,15 @@ #define VM_PRESSURE_MINIMUM_RSIZE 10 /* MB */ #define VM_PRESSURE_NOTIFY_WAIT_PERIOD 10000 /* milliseconds */ -static void vm_pressure_klist_lock(void); -static void vm_pressure_klist_unlock(void); +void vm_pressure_klist_lock(void); +void vm_pressure_klist_unlock(void); static void vm_dispatch_memory_pressure(void); +void vm_reset_active_list(void); + +#if !(CONFIG_MEMORYSTATUS && CONFIG_JETSAM) static kern_return_t vm_try_pressure_candidates(void); -static void vm_reset_active_list(void); +#endif static lck_mtx_t vm_pressure_klist_mutex; @@ -78,11 +84,11 @@ void vm_pressure_init(lck_grp_t *grp, lck_attr_t *attr) { lck_mtx_init(&vm_pressure_klist_mutex, grp, attr); } -static void vm_pressure_klist_lock(void) { +void vm_pressure_klist_lock(void) { lck_mtx_lock(&vm_pressure_klist_mutex); } -static void vm_pressure_klist_unlock(void) { +void vm_pressure_klist_unlock(void) { lck_mtx_unlock(&vm_pressure_klist_mutex); } @@ -155,41 +161,29 @@ void vm_pressure_proc_cleanup(proc_t p) vm_pressure_klist_unlock(); } +/* + * Used by the vm_pressure_thread which is + * signalled from within vm_pageout_scan(). + */ void consider_vm_pressure_events(void) { vm_dispatch_memory_pressure(); } +#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM + static void vm_dispatch_memory_pressure(void) { - vm_pressure_klist_lock(); - - if (!SLIST_EMPTY(&vm_pressure_klist)) { - - VM_PRESSURE_DEBUG(1, "[vm_pressure] vm_dispatch_memory_pressure\n"); - - if (vm_try_pressure_candidates() == KERN_SUCCESS) { - vm_pressure_klist_unlock(); - return; - } - - } - - VM_PRESSURE_DEBUG(1, "[vm_pressure] could not find suitable event candidate\n"); - - vm_reset_active_list(); - - vm_pressure_klist_unlock(); + /* Update the pressure level and target the foreground or next-largest process as appropriate */ + memorystatus_update_vm_pressure(FALSE); } -#if CONFIG_JETSAM - /* Jetsam aware version. Called with lock held */ -static struct knote * vm_find_knote_from_pid(pid_t pid) { +static struct knote *vm_find_knote_from_pid(pid_t pid, struct klist *list) { struct knote *kn = NULL; - SLIST_FOREACH(kn, &vm_pressure_klist, kn_selnext) { + SLIST_FOREACH(kn, list, kn_selnext) { struct proc *p; pid_t current_pid; @@ -204,83 +198,288 @@ static struct knote * vm_find_knote_from_pid(pid_t pid) { return kn; } -static kern_return_t vm_try_pressure_candidates(void) +int vm_dispatch_pressure_note_to_pid(pid_t pid, boolean_t locked) { + int ret = EINVAL; + struct knote *kn; + + VM_PRESSURE_DEBUG(1, "vm_dispatch_pressure_note_to_pid(): pid %d\n", pid); + + if (!locked) { + vm_pressure_klist_lock(); + } + + /* + * Because we're specifically targeting a process here, we don't care + * if a warning has already been sent and it's moved to the dormant + * list; check that too. + */ + kn = vm_find_knote_from_pid(pid, &vm_pressure_klist); + if (kn) { + KNOTE(&vm_pressure_klist, pid); + ret = 0; + } else { + kn = vm_find_knote_from_pid(pid, &vm_pressure_klist_dormant); + if (!kn) { + KNOTE(&vm_pressure_klist_dormant, pid); + } + } + + if (!locked) { + vm_pressure_klist_unlock(); + } + + return ret; +} + +void vm_find_pressure_foreground_candidates(void) { - struct knote *kn = NULL; - pid_t target_pid = (pid_t)-1; - - /* If memory is low, and there's a pid to target... */ - target_pid = memorystatus_request_vm_pressure_candidate(); - while (target_pid != -1) { - /* ...look it up in the list, and break if found... */ - if ((kn = vm_find_knote_from_pid(target_pid))) { - break; - } + struct knote *kn, *kn_tmp; + struct klist dispatch_klist = { NULL }; - /* ...otherwise, go round again. */ - target_pid = memorystatus_request_vm_pressure_candidate(); - } + vm_pressure_klist_lock(); + proc_list_lock(); + + /* Find the foreground processes. */ + SLIST_FOREACH_SAFE(kn, &vm_pressure_klist, kn_selnext, kn_tmp) { + proc_t p = kn->kn_kq->kq_p; - if (NULL == kn) { - VM_PRESSURE_DEBUG(0, "[vm_pressure] can't find candidate pid\n"); - return KERN_FAILURE; - } + if (memorystatus_is_foreground_locked(p)) { + KNOTE_DETACH(&vm_pressure_klist, kn); + KNOTE_ATTACH(&dispatch_klist, kn); + } + } - /* ...and dispatch the note */ - VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d, free pages %d\n", kn->kn_kq->kq_p->p_pid, memorystatus_available_pages); + SLIST_FOREACH_SAFE(kn, &vm_pressure_klist_dormant, kn_selnext, kn_tmp) { + proc_t p = kn->kn_kq->kq_p; - KNOTE(&vm_pressure_klist, target_pid); - - memorystatus_send_pressure_note(target_pid); + if (memorystatus_is_foreground_locked(p)) { + KNOTE_DETACH(&vm_pressure_klist_dormant, kn); + KNOTE_ATTACH(&dispatch_klist, kn); + } + } - return KERN_SUCCESS; + proc_list_unlock(); + + /* Dispatch pressure notifications accordingly */ + SLIST_FOREACH_SAFE(kn, &dispatch_klist, kn_selnext, kn_tmp) { + proc_t p = kn->kn_kq->kq_p; + + proc_list_lock(); + if (p != proc_ref_locked(p)) { + proc_list_unlock(); + KNOTE_DETACH(&dispatch_klist, kn); + KNOTE_ATTACH(&vm_pressure_klist_dormant, kn); + continue; + } + proc_list_unlock(); + + VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d\n", kn->kn_kq->kq_p->p_pid); + KNOTE(&dispatch_klist, p->p_pid); + KNOTE_DETACH(&dispatch_klist, kn); + KNOTE_ATTACH(&vm_pressure_klist_dormant, kn); + microuptime(&p->vm_pressure_last_notify_tstamp); + memorystatus_send_pressure_note(p->p_pid); + proc_rele(p); + } + + vm_pressure_klist_unlock(); } -static void vm_reset_active_list(void) { - /* No-op */ +void vm_find_pressure_candidate(void) +{ + struct knote *kn = NULL, *kn_max = NULL; + unsigned int resident_max = 0; + pid_t target_pid = -1; + struct klist dispatch_klist = { NULL }; + struct timeval curr_tstamp = {0, 0}; + int elapsed_msecs = 0; + proc_t target_proc = PROC_NULL; + kern_return_t kr = KERN_SUCCESS; + + microuptime(&curr_tstamp); + + vm_pressure_klist_lock(); + + SLIST_FOREACH(kn, &vm_pressure_klist, kn_selnext) {\ + struct mach_task_basic_info basic_info; + mach_msg_type_number_t size = MACH_TASK_BASIC_INFO_COUNT; + unsigned int resident_size = 0; + proc_t p = PROC_NULL; + struct task* t = TASK_NULL; + + p = kn->kn_kq->kq_p; + proc_list_lock(); + if (p != proc_ref_locked(p)) { + p = PROC_NULL; + proc_list_unlock(); + continue; + } + proc_list_unlock(); + + t = (struct task *)(p->task); + + timevalsub(&curr_tstamp, &p->vm_pressure_last_notify_tstamp); + elapsed_msecs = curr_tstamp.tv_sec * 1000 + curr_tstamp.tv_usec / 1000; + + if (elapsed_msecs < VM_PRESSURE_NOTIFY_WAIT_PERIOD) { + proc_rele(p); + continue; + } + + if (!memorystatus_bg_pressure_eligible(p)) { + VM_PRESSURE_DEBUG(1, "[vm_pressure] skipping process %d\n", p->p_pid); + proc_rele(p); + continue; + } + + if( ( kr = task_info(t, MACH_TASK_BASIC_INFO, (task_info_t)(&basic_info), &size)) != KERN_SUCCESS ) { + VM_PRESSURE_DEBUG(1, "[vm_pressure] task_info for pid %d failed\n", p->p_pid); + proc_rele(p); + continue; + } + + /* + * We don't want a small process to block large processes from + * being notified again. + */ + resident_size = (basic_info.resident_size)/(1024 * 1024); + if (resident_size >= VM_PRESSURE_MINIMUM_RSIZE) { + if (resident_size > resident_max) { + resident_max = resident_size; + kn_max = kn; + target_pid = p->p_pid; + target_proc = p; + } + } else { + /* There was no candidate with enough resident memory to scavenge */ + VM_PRESSURE_DEBUG(1, "[vm_pressure] threshold failed for pid %d with %u resident...\n", p->p_pid, resident_size); + } + proc_rele(p); + } + + if (kn_max == NULL || target_pid == -1) { + VM_PRESSURE_DEBUG(1, "[vm_pressure] - no target found!\n"); + goto exit; + } + + VM_DEBUG_EVENT(vm_pageout_scan, VM_PRESSURE_EVENT, DBG_FUNC_NONE, target_pid, resident_max, 0, 0); + VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %u resident\n", kn_max->kn_kq->kq_p->p_pid, resident_max); + + KNOTE_DETACH(&vm_pressure_klist, kn_max); + + target_proc = proc_find(target_pid); + if (target_proc != PROC_NULL) { + KNOTE_ATTACH(&dispatch_klist, kn_max); + KNOTE(&dispatch_klist, target_pid); + KNOTE_ATTACH(&vm_pressure_klist_dormant, kn_max); + memorystatus_send_pressure_note(target_pid); + microuptime(&target_proc->vm_pressure_last_notify_tstamp); + proc_rele(target_proc); + } + +exit: + vm_pressure_klist_unlock(); } -#if DEVELOPMENT || DEBUG +#else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */ -/* Test purposes only */ -boolean_t vm_dispatch_pressure_note_to_pid(pid_t pid) { - struct knote *kn; - +struct knote * +vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int level); + +kern_return_t vm_pressure_notification_without_levels(void); +kern_return_t vm_pressure_notify_dispatch_vm_clients(void); + +kern_return_t +vm_pressure_notify_dispatch_vm_clients(void) +{ vm_pressure_klist_lock(); - - kn = vm_find_knote_from_pid(pid); - if (kn) { - KNOTE(&vm_pressure_klist, pid); + + if (SLIST_EMPTY(&vm_pressure_klist)) { + vm_reset_active_list(); } - + + if (!SLIST_EMPTY(&vm_pressure_klist)) { + + VM_PRESSURE_DEBUG(1, "[vm_pressure] vm_dispatch_memory_pressure\n"); + + if (KERN_SUCCESS == vm_try_pressure_candidates()) { + vm_pressure_klist_unlock(); + return KERN_SUCCESS; + } + } + + VM_PRESSURE_DEBUG(1, "[vm_pressure] could not find suitable event candidate\n"); + vm_pressure_klist_unlock(); - - return kn ? TRUE : FALSE; + + return KERN_FAILURE; } -#endif /* DEVELOPMENT || DEBUG */ +static void vm_dispatch_memory_pressure(void) +{ + memorystatus_update_vm_pressure(FALSE); +} -#else /* CONFIG_MEMORYSTATUS */ +extern vm_pressure_level_t +convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t); -static kern_return_t vm_try_pressure_candidates(void) +struct knote * +vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int level) { - struct knote *kn = NULL, *kn_max = NULL; - unsigned int resident_max = 0; - pid_t target_pid = -1; - struct klist dispatch_klist = { NULL }; - kern_return_t kr = KERN_SUCCESS; - struct timeval curr_tstamp = {0, 0}; - int elapsed_msecs = 0; - proc_t target_proc = PROC_NULL; + struct knote *kn = NULL, *kn_max = NULL; + unsigned int resident_max = 0; + kern_return_t kr = KERN_SUCCESS; + struct timeval curr_tstamp = {0, 0}; + int elapsed_msecs = 0; + int selected_task_importance = 0; + static int pressure_snapshot = -1; + boolean_t pressure_increase = FALSE; + + if (level != -1) { + + if (pressure_snapshot == -1) { + /* + * Initial snapshot. + */ + pressure_snapshot = level; + pressure_increase = TRUE; + } else { + + if (level >= pressure_snapshot) { + pressure_increase = TRUE; + } else { + pressure_increase = FALSE; + } + + pressure_snapshot = level; + } + } + + if ((level > 0) && (pressure_increase) == TRUE) { + /* + * We'll start by considering the largest + * unimportant task in our list. + */ + selected_task_importance = INT_MAX; + } else { + /* + * We'll start by considering the largest + * important task in our list. + */ + selected_task_importance = 0; + } microuptime(&curr_tstamp); - - SLIST_FOREACH(kn, &vm_pressure_klist, kn_selnext) { + + SLIST_FOREACH(kn, candidate_list, kn_selnext) { + struct mach_task_basic_info basic_info; mach_msg_type_number_t size = MACH_TASK_BASIC_INFO_COUNT; unsigned int resident_size = 0; proc_t p = PROC_NULL; struct task* t = TASK_NULL; + int curr_task_importance = 0; + boolean_t consider_knote = FALSE; p = kn->kn_kq->kq_p; proc_list_lock(); @@ -296,29 +495,84 @@ static kern_return_t vm_try_pressure_candidates(void) timevalsub(&curr_tstamp, &p->vm_pressure_last_notify_tstamp); elapsed_msecs = curr_tstamp.tv_sec * 1000 + curr_tstamp.tv_usec / 1000; - if (elapsed_msecs < VM_PRESSURE_NOTIFY_WAIT_PERIOD) { + if ((level == -1) && (elapsed_msecs < VM_PRESSURE_NOTIFY_WAIT_PERIOD)) { proc_rele(p); continue; } + if (level != -1) { + /* + * For the level based notifications, check and see if this knote is + * registered for the current level. + */ + vm_pressure_level_t dispatch_level = convert_internal_pressure_level_to_dispatch_level(level); + + if ((kn->kn_sfflags & dispatch_level) == 0) { + proc_rele(p); + continue; + } + } + if( ( kr = task_info(t, MACH_TASK_BASIC_INFO, (task_info_t)(&basic_info), &size)) != KERN_SUCCESS ) { VM_PRESSURE_DEBUG(1, "[vm_pressure] task_info for pid %d failed with %d\n", p->p_pid, kr); proc_rele(p); continue; } + curr_task_importance = task_importance_estimate(t); + /* * We don't want a small process to block large processes from * being notified again. */ resident_size = (basic_info.resident_size)/(MB); + if (resident_size >= VM_PRESSURE_MINIMUM_RSIZE) { - if (resident_size > resident_max) { - resident_max = resident_size; - kn_max = kn; - target_pid = p->p_pid; - target_proc = p; - } + + if (level > 0) { + /* + * Warning or Critical Pressure. + */ + if (pressure_increase) { + if ((curr_task_importance <= selected_task_importance) && (resident_size > resident_max)) { + if (task_has_been_notified(t, level) == FALSE) { + consider_knote = TRUE; + } + } + } else { + if ((curr_task_importance >= selected_task_importance) && (resident_size > resident_max)) { + if (task_has_been_notified(t, level) == FALSE) { + consider_knote = TRUE; + } + } + } + } else if (level == 0) { + /* + * Pressure back to normal. + */ + if ((curr_task_importance >= selected_task_importance) && (resident_size > resident_max)) { + + if ((task_has_been_notified(t, kVMPressureWarning) == TRUE) || (task_has_been_notified(t, kVMPressureCritical) == TRUE)) { + consider_knote = TRUE; + } + } + } else if (level == -1) { + + /* + * Simple (importance and level)-free behavior based solely on RSIZE. + */ + if (resident_size > resident_max) { + consider_knote = TRUE; + } + } + + + if (consider_knote) { + resident_max = resident_size; + kn_max = kn; + selected_task_importance = curr_task_importance; + consider_knote = FALSE; /* reset for the next candidate */ + } } else { /* There was no candidate with enough resident memory to scavenge */ VM_PRESSURE_DEBUG(0, "[vm_pressure] threshold failed for pid %d with %u resident...\n", p->p_pid, resident_size); @@ -326,33 +580,66 @@ static kern_return_t vm_try_pressure_candidates(void) proc_rele(p); } - if (kn_max == NULL || target_pid == -1) { - return KERN_FAILURE; + if (kn_max) { + VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %u resident\n", kn_max->kn_kq->kq_p->p_pid, resident_max); } - VM_DEBUG_EVENT(vm_pageout_scan, VM_PRESSURE_EVENT, DBG_FUNC_NONE, target_pid, resident_max, 0, 0); - VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %u resident\n", kn_max->kn_kq->kq_p->p_pid, resident_max); + return kn_max; +} + +/* + * vm_pressure_klist_lock is held for this routine. + */ +kern_return_t vm_pressure_notification_without_levels(void) +{ + struct knote *kn_max = NULL; + pid_t target_pid = -1; + struct klist dispatch_klist = { NULL }; + proc_t target_proc = PROC_NULL; + + kn_max = vm_pressure_select_optimal_candidate_to_notify(&vm_pressure_klist, -1); + if (kn_max == NULL) { + return KERN_FAILURE; + } + + target_proc = kn_max->kn_kq->kq_p; + KNOTE_DETACH(&vm_pressure_klist, kn_max); - target_proc = proc_find(target_pid); if (target_proc != PROC_NULL) { + + target_pid = target_proc->p_pid; + + memoryshot(VM_PRESSURE_EVENT, DBG_FUNC_NONE); + KNOTE_ATTACH(&dispatch_klist, kn_max); KNOTE(&dispatch_klist, target_pid); KNOTE_ATTACH(&vm_pressure_klist_dormant, kn_max); microuptime(&target_proc->vm_pressure_last_notify_tstamp); - proc_rele(target_proc); } return KERN_SUCCESS; } +static kern_return_t vm_try_pressure_candidates(void) +{ + /* + * This takes care of candidates that use NOTE_VM_PRESSURE. + * It's a notification without indication of the level + * of memory pressure. + */ + return (vm_pressure_notification_without_levels()); +} + +#endif /* !(CONFIG_MEMORYSTATUS && CONFIG_JETSAM) */ + /* * Remove all elements from the dormant list and place them on the active list. * Called with klist lock held. */ -static void vm_reset_active_list(void) { +void vm_reset_active_list(void) { /* Re-charge the main list from the dormant list if possible */ if (!SLIST_EMPTY(&vm_pressure_klist_dormant)) { struct knote *kn; @@ -366,5 +653,3 @@ static void vm_reset_active_list(void) { } } } - -#endif /* CONFIG_MEMORYSTATUS */ diff --git a/bsd/kern/vm_pressure.h b/bsd/kern/vm_pressure.h index 059e9c23c..5386ddd1a 100644 --- a/bsd/kern/vm_pressure.h +++ b/bsd/kern/vm_pressure.h @@ -30,6 +30,7 @@ #define VM_PRESSURE_H #include +#include void vm_pressure_init(lck_grp_t *grp, lck_attr_t *attr); @@ -39,8 +40,10 @@ void vm_knote_unregister(struct knote *); void consider_vm_pressure_events(void); void vm_pressure_proc_cleanup(proc_t); -#if CONFIG_MEMORYSTATUS && (DEVELOPMENT || DEBUG) -boolean_t vm_dispatch_pressure_note_to_pid(pid_t pid); -#endif +#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM +void vm_find_pressure_foreground_candidates(void); +void vm_find_pressure_candidate(void); +boolean_t vm_dispatch_pressure_note_to_pid(pid_t pid, boolean_t locked); +#endif /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */ #endif /* VM_PRESSURE_H */ diff --git a/bsd/libkern/bcmp.c b/bsd/libkern/bcmp.c deleted file mode 100644 index ea4d1d54f..000000000 --- a/bsd/libkern/bcmp.c +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1987, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#warning bcmp() not optimized for ppc -#if defined(ppc) -/* HP, i386 has its own tuned version of bcmp, so should ppc! */ - -#if defined(LIBC_SCCS) && !defined(lint) -/* static char sccsid[] = "@(#)bcmp.c 8.1 (Berkeley) 6/4/93"; */ -#endif /* LIBC_SCCS and not lint */ - -#include - -/* - * bcmp -- vax cmpc3 instruction - */ -int -bcmp(b1, b2, length) - const void *b1, *b2; - register size_t length; -{ - register char *p1, *p2; - - if (length == 0) - return(0); - p1 = (char *)b1; - p2 = (char *)b2; - do - if (*p1++ != *p2++) - break; - while (--length); - return(length); -} - -#endif /* ppc */ diff --git a/bsd/libkern/inet_ntop.c b/bsd/libkern/inet_ntop.c deleted file mode 100644 index dc0727318..000000000 --- a/bsd/libkern/inet_ntop.c +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/* - * Copyright 1994, 1995 Massachusetts Institute of Technology - * - * Permission to use, copy, modify, and distribute this software and - * its documentation for any purpose and without fee is hereby - * granted, provided that both the above copyright notice and this - * permission notice appear in all copies, that both the above - * copyright notice and this permission notice appear in all - * supporting documentation, and that the name of M.I.T. not be used - * in advertising or publicity pertaining to distribution of the - * software without specific, written prior permission. M.I.T. makes - * no representations about the suitability of this software for any - * purpose. It is provided "as is" without express or implied - * warranty. - * - * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS - * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT - * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include - -#include - -static const char *hexchars = "0123456789abcdef"; - -static const char * -inet_ntop4(const struct in_addr *addr, char *buf, socklen_t len) -{ - const u_int8_t *ap = (const u_int8_t *)&addr->s_addr; - char tmp[MAX_IPv4_STR_LEN]; /* max length of ipv4 addr string */ - int fulllen; - - /* - * snprintf returns number of bytes printed (not including NULL) or - * number of bytes that would have been printed if more than would - * fit - */ - fulllen = snprintf(tmp, sizeof(tmp), "%d.%d.%d.%d", - ap[0], ap[1], ap[2], ap[3]); - if (fulllen >= (int)len) { - return NULL; - } - - bcopy(tmp, buf, fulllen + 1); - - return buf; -} - -static const char * -inet_ntop6(const struct in6_addr *addr, char *dst, socklen_t size) -{ - char hexa[8][5], tmp[MAX_IPv6_STR_LEN]; - int zr[8]; - size_t len; - int32_t i, j, k, skip; - uint8_t x8, hx8; - uint16_t x16; - struct in_addr a4; - - if (addr == NULL) return NULL; - - bzero(tmp, sizeof(tmp)); - - /* check for mapped or compat addresses */ - i = IN6_IS_ADDR_V4MAPPED(addr); - j = IN6_IS_ADDR_V4COMPAT(addr); - if ((i != 0) || (j != 0)) - { - char tmp2[16]; /* max length of ipv4 addr string */ - a4.s_addr = addr->__u6_addr.__u6_addr32[3]; - len = snprintf(tmp, sizeof(tmp), "::%s%s", (i != 0) ? "ffff:" : "", - inet_ntop4(&a4, tmp2, sizeof(tmp2))); - if (len >= size) return NULL; - bcopy(tmp, dst, len + 1); - return dst; - } - - k = 0; - for (i = 0; i < 16; i += 2) - { - j = 0; - skip = 1; - - bzero(hexa[k], 5); - - x8 = addr->__u6_addr.__u6_addr8[i]; - - hx8 = x8 >> 4; - if (hx8 != 0) - { - skip = 0; - hexa[k][j++] = hexchars[hx8]; - } - - hx8 = x8 & 0x0f; - if ((skip == 0) || ((skip == 1) && (hx8 != 0))) - { - skip = 0; - hexa[k][j++] = hexchars[hx8]; - } - - x8 = addr->__u6_addr.__u6_addr8[i + 1]; - - hx8 = x8 >> 4; - if ((skip == 0) || ((skip == 1) && (hx8 != 0))) - { - hexa[k][j++] = hexchars[hx8]; - } - - hx8 = x8 & 0x0f; - hexa[k][j++] = hexchars[hx8]; - - k++; - } - - /* find runs of zeros for :: convention */ - j = 0; - for (i = 7; i >= 0; i--) - { - zr[i] = j; - x16 = addr->__u6_addr.__u6_addr16[i]; - if (x16 == 0) j++; - else j = 0; - zr[i] = j; - } - - /* find longest run of zeros */ - k = -1; - j = 0; - for(i = 0; i < 8; i++) - { - if (zr[i] > j) - { - k = i; - j = zr[i]; - } - } - - for(i = 0; i < 8; i++) - { - if (i != k) zr[i] = 0; - } - - len = 0; - for (i = 0; i < 8; i++) - { - if (zr[i] != 0) - { - /* check for leading zero */ - if (i == 0) tmp[len++] = ':'; - tmp[len++] = ':'; - i += (zr[i] - 1); - continue; - } - for (j = 0; hexa[i][j] != '\0'; j++) tmp[len++] = hexa[i][j]; - if (i != 7) tmp[len++] = ':'; - } - - /* trailing NULL */ - len++; - - if (len > size) return NULL; - bcopy(tmp, dst, len); - return dst; -} - -const char * -inet_ntop(int af, const void *addr, char *buf, socklen_t len) -{ - if(af==AF_INET6) - return inet_ntop6(addr, buf, len); - if(af==AF_INET) - return inet_ntop4(addr, buf, len); - return NULL; -} diff --git a/bsd/libkern/libkern.h b/bsd/libkern/libkern.h index 223f3a526..651a01390 100644 --- a/bsd/libkern/libkern.h +++ b/bsd/libkern/libkern.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -139,16 +139,15 @@ ulmin(u_int32_t a, u_int32_t b) /* Prototypes for non-quad routines. */ extern int ffs(int); -extern int locc(int, char *, u_int); extern u_int32_t random(void); -extern char *rindex(const char *, int); extern int scanc(u_int, u_char *, const u_char *, int); extern int skpc(int, int, char *); extern long strtol(const char*, char **, int); extern u_long strtoul(const char *, char **, int); extern quad_t strtoq(const char *, char **, int); extern u_quad_t strtouq(const char *, char **, int); -extern char *strsep(char **stringp, const char *delim); +extern char *strsep(char **, const char *); +extern void *memchr(const void *, int, size_t); int snprintf(char *, size_t, const char *, ...) __printflike(3,4); @@ -204,19 +203,7 @@ extern void flush_dcache64(addr64_t, unsigned, int); static __inline__ unsigned int clz(unsigned int num) { -#if __i386__ - unsigned int result; - __asm__ volatile( - "bsrl %1, %0\n\t" - "cmovel %2, %0" - : "=r" (result) - : "rm" (num), "r" (63) - ); - return 31 ^ result; - -#else return num?__builtin_clz(num):__builtin_clz(0); -#endif } diff --git a/bsd/libkern/memchr.c b/bsd/libkern/memchr.c new file mode 100644 index 000000000..fced31c67 --- /dev/null +++ b/bsd/libkern/memchr.c @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2012 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include + +void * +memchr(const void *bigptr, int ch, size_t length) +{ + const char *big = (const char *)bigptr; + size_t n; + for (n = 0; n < length; n++) + if (big[n] == ch) + return (void *)&big[n]; + return NULL; +} + diff --git a/bsd/libkern/rindex.c b/bsd/libkern/rindex.c deleted file mode 100644 index 47a881199..000000000 --- a/bsd/libkern/rindex.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1988, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#if defined(LIBC_SCCS) && !defined(lint) -/* static char sccsid[] = "@(#)rindex.c 8.1 (Berkeley) 6/4/93"; */ -#endif /* LIBC_SCCS and not lint */ - -#include - -char * -#ifdef STRRCHR -strrchr(p, ch) -#else -rindex(p, ch) -#endif - register const char *p; - register int ch; -{ - register char *save; - - for (save = NULL;; ++p) { - if (*p == ch) - save = (char *)p; - if (!*p) - return(save); - } - /* NOTREACHED */ -} diff --git a/bsd/libkern/strtol.c b/bsd/libkern/strtol.c deleted file mode 100644 index 7fc24e1aa..000000000 --- a/bsd/libkern/strtol.c +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All rights reserved. - * - * strol.c - The functions strtol() & strtoul() are exported as public API - * via the header file ~driverkit/generalFuncs.h - * - * HISTORY - * 25-Oct-1995 Dean Reece at NeXT - * Created based on BSD4.4's strtol.c & strtoul.c. - * Removed dependency on _ctype_ by static versions of isupper()... - * Added support for "0b101..." binary constants. - * Commented out references to errno. - */ - -/*- - * Copyright (c) 1990, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - - -static inline int -isupper(char c) -{ - return (c >= 'A' && c <= 'Z'); -} - -static inline int -isalpha(char c) -{ - return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')); -} - - -static inline int -isspace(char c) -{ - return (c == ' ' || c == '\t' || c == '\n' || c == '\12'); -} - -static inline int -isdigit(char c) -{ - return (c >= '0' && c <= '9'); -} - -/* - * Convert a string to a long integer. - * - * Ignores `locale' stuff. Assumes that the upper and lower case - * alphabets and digits are each contiguous. - */ -long -strtol(nptr, endptr, base) - const char *nptr; - char **endptr; - register int base; -{ - register const char *s = nptr; - register unsigned long acc; - register int c; - register unsigned long cutoff; - register int neg = 0, any, cutlim; - - /* - * Skip white space and pick up leading +/- sign if any. - * If base is 0, allow 0x for hex and 0 for octal, else - * assume decimal; if base is already 16, allow 0x. - */ - do { - c = *s++; - } while (isspace(c)); - if (c == '-') { - neg = 1; - c = *s++; - } else if (c == '+') - c = *s++; - if ((base == 0 || base == 16) && - c == '0' && (*s == 'x' || *s == 'X')) { - c = s[1]; - s += 2; - base = 16; - } else if ((base == 0 || base == 2) && - c == '0' && (*s == 'b' || *s == 'B')) { - c = s[1]; - s += 2; - base = 2; - } - if (base == 0) - base = c == '0' ? 8 : 10; - - /* - * Compute the cutoff value between legal numbers and illegal - * numbers. That is the largest legal value, divided by the - * base. An input number that is greater than this value, if - * followed by a legal input character, is too big. One that - * is equal to this value may be valid or not; the limit - * between valid and invalid numbers is then based on the last - * digit. For instance, if the range for longs is - * [-2147483648..2147483647] and the input base is 10, - * cutoff will be set to 214748364 and cutlim to either - * 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated - * a value > 214748364, or equal but the next digit is > 7 (or 8), - * the number is too big, and we will return a range error. - * - * Set any if any `digits' consumed; make it negative to indicate - * overflow. - */ - cutoff = neg ? -(unsigned long)LONG_MIN : LONG_MAX; - cutlim = cutoff % (unsigned long)base; - cutoff /= (unsigned long)base; - for (acc = 0, any = 0;; c = *s++) { - if (isdigit(c)) - c -= '0'; - else if (isalpha(c)) - c -= isupper(c) ? 'A' - 10 : 'a' - 10; - else - break; - if (c >= base) - break; - if (any < 0 || acc > cutoff || acc == cutoff && c > cutlim) - any = -1; - else { - any = 1; - acc *= base; - acc += c; - } - } - if (any < 0) { - acc = neg ? LONG_MIN : LONG_MAX; -// errno = ERANGE; - } else if (neg) - acc = -acc; - if (endptr != 0) - *endptr = (char *)(any ? s - 1 : nptr); - return (acc); -} - -/* - * Convert a string to an unsigned long integer. - * - * Ignores `locale' stuff. Assumes that the upper and lower case - * alphabets and digits are each contiguous. - */ -unsigned long -strtoul(nptr, endptr, base) - const char *nptr; - char **endptr; - register int base; -{ - register const char *s = nptr; - register unsigned long acc; - register int c; - register unsigned long cutoff; - register int neg = 0, any, cutlim; - - /* - * See strtol for comments as to the logic used. - */ - do { - c = *s++; - } while (isspace(c)); - if (c == '-') { - neg = 1; - c = *s++; - } else if (c == '+') - c = *s++; - if ((base == 0 || base == 16) && - c == '0' && (*s == 'x' || *s == 'X')) { - c = s[1]; - s += 2; - base = 16; - } else if ((base == 0 || base == 2) && - c == '0' && (*s == 'b' || *s == 'B')) { - c = s[1]; - s += 2; - base = 2; - } - if (base == 0) - base = c == '0' ? 8 : 10; - cutoff = (unsigned long)ULONG_MAX / (unsigned long)base; - cutlim = (unsigned long)ULONG_MAX % (unsigned long)base; - for (acc = 0, any = 0;; c = *s++) { - if (isdigit(c)) - c -= '0'; - else if (isalpha(c)) - c -= isupper(c) ? 'A' - 10 : 'a' - 10; - else - break; - if (c >= base) - break; - if (any < 0 || acc > cutoff || acc == cutoff && c > cutlim) - any = -1; - else { - any = 1; - acc *= base; - acc += c; - } - } - if (any < 0) { - acc = ULONG_MAX; -// errno = ERANGE; - } else if (neg) - acc = -acc; - if (endptr != 0) - *endptr = (char *)(any ? s - 1 : nptr); - return (acc); -} diff --git a/bsd/machine/Makefile b/bsd/machine/Makefile index 83f5ebda9..929228907 100644 --- a/bsd/machine/Makefile +++ b/bsd/machine/Makefile @@ -12,14 +12,16 @@ DATAFILES = \ byte_order.h endian.h fasttrap_isa.h \ limits.h param.h profile.h \ setjmp.h signal.h types.h\ - vmparam.h _structs.h _types.h _limits.h _param.h + vmparam.h _structs.h _types.h _limits.h _param.h \ + _mcontext.h KERNELFILES = \ disklabel.h \ byte_order.h endian.h \ limits.h param.h profile.h \ signal.h spl.h types.h \ - vmparam.h _structs.h _types.h _limits.h _param.h + vmparam.h _structs.h _types.h _limits.h _param.h \ + _mcontext.h INSTALL_MI_LIST = ${DATAFILES} diff --git a/osfmk/machine/timer.h b/bsd/machine/_mcontext.h similarity index 89% rename from osfmk/machine/timer.h rename to bsd/machine/_mcontext.h index ba8e5b5ff..ee9b1d943 100644 --- a/osfmk/machine/timer.h +++ b/bsd/machine/_mcontext.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,13 +25,8 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#ifndef _MACHINE_TIMER_H -#define _MACHINE_TIMER_H - #if defined (__i386__) || defined (__x86_64__) -#include "i386/timer.h" +#include "i386/_mcontext.h" #else #error architecture not supported #endif - -#endif /* _MACHINE_TIMER_H */ diff --git a/bsd/man/man2/connectx.2 b/bsd/man/man2/connectx.2 new file mode 100644 index 000000000..96e85c7ab --- /dev/null +++ b/bsd/man/man2/connectx.2 @@ -0,0 +1,288 @@ +.\" +.\" Copyright (c) 2012 Apple Inc. All rights reserved. +.\" +.\" @APPLE_OSREFERENCE_LICENSE_HEADER_START@ +.\" +.\" This file contains Original Code and/or Modifications of Original Code +.\" as defined in and that are subject to the Apple Public Source License +.\" Version 2.0 (the 'License'). You may not use this file except in +.\" compliance with the License. The rights granted to you under the License +.\" may not be used to create, or enable the creation or redistribution of, +.\" unlawful or unlicensed copies of an Apple operating system, or to +.\" circumvent, violate, or enable the circumvention or violation of, any +.\" terms of an Apple operating system software license agreement. +.\" +.\" Please obtain a copy of the License at +.\" http://www.opensource.apple.com/apsl/ and read it before using this file. +.\" +.\" The Original Code and all software distributed under the License are +.\" distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. +.\" Please see the License for the specific language governing rights and +.\" limitations under the License. +.\" +.\" @APPLE_OSREFERENCE_LICENSE_HEADER_END@ +.\" +.Dd November 14, 2012 +.Dt CONNECTX 2 +.Os Darwin +.Sh NAME +.Nm connectx +.Nd initiate one or more connections on a socket +.Sh SYNOPSIS +.Fd #include +.Ft int +.Fo connectx +.Fa "int socket" +.Fa "const struct sockaddr *saddress" +.Fa "socklen_t saddress_len" +.Fa "const struct sockaddr *daddress" +.Fa "socklen_t daddress_len" +.Fa "unsigned int ifscope" +.Fa "associd_t associd" +.Fa "connid_t *connid" +.Fc +.Sh DESCRIPTION +The parameter +.Fa socket +is a socket. The communication domain of the socket determines the +availability and behavior of +.Fn connectx . +In general, +.Fn connectx +may be used as a substitute for cases when +.Xr bind 2 +and +.Xr connect 2 +are issued in succession. +.Pp +When the source address +.Fa saddress +parameter is specified, +.Fn connectx +binds the connection to one of the addresses, as if +.Xr bind 2 +is used. The length of +.Fa saddress +buffer is specified by +.Fa saddress_len . +This buffer may hold more than one addresses, where each successive address +immediately follows the previous one. The parameter +.Fa ifscope +may also be specified instead of +.Fa saddress , +in order to bind the connection to the interface whose interface index +equals to +.Fa ifscope . +Both +.Fa saddress +and +.Fa ifscope +parameters may be specified in order to add more constraints to the connection. +.Pp +At least one destination address must be specified in the +.Fa daddress +parameter. The +.Fa daddress_len +specifies the length of that buffer. When more than one addresses +is specified, each successive address immediately follows the previous one. +.Pp +Each communications domain interprets the +.Fa saddress +and +.Fa daddress +parameters in its own way. When multiple addresses are specified, one +of the addresses will be chosen. The rules used in selecting the +address vary between communicaton domains. +.Pp +Changes related to the connection state may be monitored by registering for the +.Dv NOTE_CONNINFO_UPDATED +.Xr kqueue 2 +event, using the predefined system filter +.Dv EVFILT_SOCK . +Details regarding the event may be retrieved by calling +.Xr getconninfo 3 . +.Sh MULTIPATH +On a multipath socket, +.Fn connectx +may be used multiple times, in order to establish the initial session +association with the peer socket upon the first connection, and to further +establish additional connections related to that assocication on subsequent +ones. +.Pp +The parameter +.Fa associd +specifies the association identifier. When +.Fn connectx +is initially called to establish an associtation, the association identifier +is not yet known, and +.Dv ASSOCID_ANY +must be specified. After the initial connection is established, the +association identifier may be retrieved using +.Xr getassocids 3 , +and the value may then be used on subsequent +.Fn connectx +calls. +.Pp +If the initial connection is established without any protocol-level +multipath association, the error +.Er EPROTO +will be returned, and the connection can be extracted to a new socket with +the same properties of +.Fa socket , +by calling +.Xr peeloff 2 . +.Pp +An association representing one or more connections, or a single connection +may be dissolved by calling +.Xr disconnectx 2 . +.Sh NON-MULTIPATH +On non-multipath socket, +.Fn connectx +behaves much like a combination of +.Xr bind 2 +and +.Xr connect 2 . +The parameter +.Fa associd +must always be set to +.Dv ASSOCID_ANY . +.Pp +Generally, non-multipath stream sockets may successfully +.Fn connectx +only once; datagram sockets may use +.Fn connectx +multiple times to change their association, after first dissolving the +existing association by calling +.Xr disconnectx 2 . +.Sh RETURN VALUES +Upon successful completion, a value of 0 is returned and the connection +identifier is returned through the +.Fa connid +parameter. If the initial connection establishes an association with +a peer socket, the association identifier may be retrieved by calling +.Xr getassocids 2 . +Both of these identifiers are unique +on a per +.Fa socket +basis. Upon failure, a value of -1 is returned and the global integer +variable +.Va errno +is set to indicate the error. +.Sh ERRORS +The +.Fn connectx +system call will fail if: +.Bl -tag -width Er +.\" ========== +.It Bq Er EACCES +The destination address is a broadcast address and the +socket option +.Dv SO_BROADCAST +is not set. +.\" ========== +.It Bq Er EADDRINUSE +The address is already in use. +.\" ========== +.It Bq Er EADDRNOTAVAIL +The specified address is not available on this machine. +.\" ========== +.It Bq Er EAFNOSUPPORT +Addresses in the specified address family cannot be used with this socket. +.\" ========== +.It Bq Er EALREADY +The socket is non-blocking +and a previous connection attempt +has not yet been completed. +.\" ========== +.It Bq Er EBADF +.Fa socket +is not a valid descriptor. +.\" ========== +.It Bq Er ECONNREFUSED +The attempt to connect was ignored +(because the target is not listening for connections) +or explicitly rejected. +.\" ========== +.It Bq Er EFAULT +The +.Fa address +parameter specifies an area outside +the process address space. +.\" ========== +.It Bq Er EHOSTUNREACH +The target host cannot be reached (e.g., down, disconnected). +.\" ========== +.It Bq Er EINPROGRESS +The socket is non-blocking +and the connection cannot +be completed immediately. +It is possible to +.Xr select 2 +for completion by selecting the socket for writing. +.\" ========== +.It Bq Er EINTR +Its execution was interrupted by a signal. +.\" ========== +.It Bq Er EINVAL +An invalid argument was detected +(e.g., +.Fa address_len +is not valid for the address family, +the specified address family is invalid). +.\" ========== +.It Bq Er EISCONN +The socket is already connected. +.\" ========== +.It Bq Er ENETDOWN +The local network interface is not functioning. +.\" ========== +.It Bq Er ENETUNREACH +The network isn't reachable from this host. +.\" ========== +.It Bq Er ENOBUFS +The system call was unable to allocate a needed memory buffer. +.\" ========== +.It Bq Er ENOTSOCK +.Fa socket +is not a file descriptor for a socket. +.\" ========== +.It Bq Er EOPNOTSUPP +Because +.Fa socket +is listening, no connection is allowed. +.\" ========== +.It Bq Er EPROTO +The connection was successfully established without any protocol-level +association. The connection can be extracted to a new socket using +.Xr peeloff 2 . +.\" ========== +.It Bq Er EPROTOTYPE +.Fa address +has a different type than the socket +that is bound to the specified peer address. +.\" ========== +.It Bq Er ETIMEDOUT +Connection establishment timed out without establishing a connection. +.\" ========== +.It Bq Er ECONNRESET +Remote host reset the connection request. +.Sh SEE ALSO +.Xr accept 2 , +.Xr bind 2 , +.Xr connect 2 , +.Xr disconnectx 2 , +.Xr kqueue 2 , +.Xr peeloff 2 , +.Xr select 2 , +.Xr socket 2 , +.Xr getassocids 3 , +.Xr getconnids 3 , +.Xr getconninfo 3 , +.Xr compat 5 +.Sh HISTORY +The +.Fn connectx +function call appeared in Darwin 13.0.0 diff --git a/bsd/man/man2/disconnectx.2 b/bsd/man/man2/disconnectx.2 new file mode 100644 index 000000000..eed45a0de --- /dev/null +++ b/bsd/man/man2/disconnectx.2 @@ -0,0 +1,133 @@ +.\" +.\" Copyright (c) 2012 Apple Inc. All rights reserved. +.\" +.\" @APPLE_OSREFERENCE_LICENSE_HEADER_START@ +.\" +.\" This file contains Original Code and/or Modifications of Original Code +.\" as defined in and that are subject to the Apple Public Source License +.\" Version 2.0 (the 'License'). You may not use this file except in +.\" compliance with the License. The rights granted to you under the License +.\" may not be used to create, or enable the creation or redistribution of, +.\" unlawful or unlicensed copies of an Apple operating system, or to +.\" circumvent, violate, or enable the circumvention or violation of, any +.\" terms of an Apple operating system software license agreement. +.\" +.\" Please obtain a copy of the License at +.\" http://www.opensource.apple.com/apsl/ and read it before using this file. +.\" +.\" The Original Code and all software distributed under the License are +.\" distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. +.\" Please see the License for the specific language governing rights and +.\" limitations under the License. +.\" +.\" @APPLE_OSREFERENCE_LICENSE_HEADER_END@ +.\" +.Dd November 14, 2012 +.Dt DISCONNECTX 2 +.Os Darwin +.Sh NAME +.Nm disconnectx +.Nd disconnects a connection on a socket +.Sh SYNOPSIS +.Fd #include +.Ft int +.Fo disconnectx +.Fa "int socket" +.Fa "associd_t associd" +.Fa "connid_t connid" +.Fc +.Sh DESCRIPTION +The parameter +.Fa socket +is a socket. The communication domain of the socket determines the +availability and behavior of +.Fn disconnectx . +In general, +.Fn disconnectx +is analogous to +.Xr shutdown 2 +.Dv with SHUT_RDWR +issued on the connection identified by +.Fa connid , +or on all connections associated with the +.Fa associd +association. +.Pp +The parameter +.Fa associd +specifies the association identifier. It may be set to +.Dv ASSOCID_ANY +when there is only one association present; +.Dv ASSOCID_ALL +to specify all existing associations; or one of the identifiers returned from +.Xr getassocids 3 . +.Pp +The parameter +.Fa connid +specifies the connection identifier. It may be set to +.Dv CONNID_ANY +or +.Dv CONNID_ALL , +in which case the association represented by +.Fa associd +will be dissolved; or the value returned from +.Xr connectx 2 +or +.Xr getconnids 3 , +which indicates that the disconnection occurs only on that connection +while keeping the session association intact. For the latter, the connection +associated with +.Fa connid +will no longer be valid upon success. +.Sh RETURN VALUES +The +.Fn disconnectx +function returns the value 0 if successful; otherwise the value of -1 is +returned and the global integer variable +.Va errno +is set to indicate the error. +.Sh ERRORS +The +.Fn disconnectx +system call succeeds unless: +.Bl -tag -width Er +.\" =========== +.It Bq Er EALREADY +Operation already in progress for the session association represented by +.Fa associd . +.\" =========== +.It Bq Er EBADF +.Fa Socket +is not a valid descriptor. +.\" =========== +.It Bq Er EINVAL +The +.Fa associd +or +.Fa connid +argument is invalid or the underlying protocol is no longer attached to +.Fa socket . +.\" =========== +.It Bq Er ENOTCONN +The session association repreresented by +.Fa associd +is not connected. +.\" =========== +.It Bq Er ENOTSOCK +.Fa Socket +is a file, not a socket. +.El +.Sh SEE ALSO +.Xr connectx 2 , +.Xr socket 2 , +.Xr getassocids 3 , +.Xr getconnids 3 , +.Xr getconninfo 3 , +.Xr compat 5 +.Sh HISTORY +The +.Fn disconnectx +function call appeared in Darwin 13.0.0 diff --git a/bsd/man/man2/fcntl.2 b/bsd/man/man2/fcntl.2 index b53a38be1..ce2daefe5 100644 --- a/bsd/man/man2/fcntl.2 +++ b/bsd/man/man2/fcntl.2 @@ -184,7 +184,10 @@ turns data caching on. Get disk device information. Currently this only includes the disk device address that corresponds -to the current file offset. +to the current file offset. Note that if the +file offset is not backed by physical blocks +we can return -1 as the offset. This is subject +to change. .It Dv F_LOG2PHYS_EXT Variant of F_LOG2PHYS that uses the passed in file offset and length. diff --git a/bsd/man/man2/getdtablesize.2 b/bsd/man/man2/getdtablesize.2 index 7465f9aeb..bfb2b8604 100644 --- a/bsd/man/man2/getdtablesize.2 +++ b/bsd/man/man2/getdtablesize.2 @@ -51,9 +51,21 @@ the descriptor table are numbered with small integers starting at 0. The call .Fn getdtablesize returns the size of this table. +.Sh LEGACY +The +.Xr getrlimit 2 +function returns a value of type +.Ft rlim_t . +This interface, returning an +.Ft int , +may have problems representing appropriate values in the future. +Applications should use the +.Xr getrlimit 2 +function instead. .Sh SEE ALSO .Xr close 2 , .Xr dup 2 , +.Xr getrlimit 2 , .Xr open 2 , .Xr select 2 .Sh HISTORY diff --git a/bsd/man/man2/getpriority.2 b/bsd/man/man2/getpriority.2 index 07ed5b6d8..6e6d89635 100644 --- a/bsd/man/man2/getpriority.2 +++ b/bsd/man/man2/getpriority.2 @@ -54,8 +54,7 @@ .Fa "int prio" .Fc .Sh DESCRIPTION -The scheduling -priority of the process, process group, or user as indicated by +The scheduling priority of the process, process group, or user as indicated by .Fa which and .Fa who @@ -63,15 +62,13 @@ is obtained with the .Fn getpriority call and set with the .Fn setpriority -call. Additionally, the current thread can be set to background -state. +call. .Fa Which is one of .Dv PRIO_PROCESS , .Dv PRIO_PGRP , -.Dv PRIO_USER , or -.Dv PRIO_DARWIN_THREAD , +.Dv PRIO_USER , and .Fa who is interpreted relative to @@ -85,30 +82,39 @@ and a user ID for .Dv PRIO_USER ) . A zero value of .Fa who -denotes the current process, process group, user, thread (for PRIO_DARWIN_THREAD). +denotes the current process, process group, or user. .Fa prio is a value in the range -20 to 20. The default priority is 0; lower priorities cause more favorable scheduling. -For PRIO_DARWIN_THREAD, -.Fa prio -is either 0 (to remove current thread from background status) or PRIO_DARWIN_BG -(to set current thread into background state). .Pp The .Fn getpriority call returns the highest priority (lowest numerical value) -enjoyed by any of the specified processes or for PRIO_DARWIN_THREAD -returns 0 when current thread is not in background state or 1 -when the current thread is in background state. +enjoyed by any of the specified processes. The .Fn setpriority call sets the priorities of all of the specified processes -to the specified value. When setting a thread into background -state the scheduling priority is set to lowest value, disk and -network IO are throttled. Network IO will be throttled for any -sockets the thread opens after going into background state. Any -previously opened sockets are not affected. -Only the super-user may lower priorities, but any thread can set +to the specified value. Only the super-user may lower priorities. +.Pp +Additionally, the current thread or process can be placed in a background state +by specifying PRIO_DARWIN_THREAD or PRIO_DARWIN_PROCESS for +.Fa which . +Only a value of zero (the current thread or process) is supported for +.Fa who +when setting or getting background state. +.Fa prio +is either 0 (to remove current thread from background status) or PRIO_DARWIN_BG +(to set current thread into background state). +When a thread or process is in a background state the scheduling priority is set +to the lowest value, disk IO is throttled (with behavior similar to using +.Xr setiopolicy_np 3 +to set a throttleable policy), and network IO is throttled for +any sockets opened after going into background state. Any previously opened +sockets are not affected. +The +.Fn getpriority +call returns 0 when current thread or process is not in background state or 1 +when the current thread is in background state. Any thread or process can set itself into background state. .Sh RETURN VALUES Since @@ -136,8 +142,9 @@ is not one of .Dv PRIO_PROCESS , .Dv PRIO_PGRP , .Dv PRIO_USER , +.Dv PRIO_DARWIN_THREAD , or -.Dv PRIO_DARWIN_THREAD . +.Dv PRIO_DARWIN_PROCESS . .\" ========== .It Bq Er EINVAL .Fa Who @@ -147,7 +154,10 @@ is not a valid process, process group, or user ID. .Fa Who is not 0 when .Fa which -is PRIO_DARWIN_THREAD. +is +.Dv PRIO_DARWIN_THREAD +or +.Dv PRIO_DARWIN_PROCESS . .\" ========== .It Bq Er ESRCH No process can be located using the @@ -199,6 +209,7 @@ has changed. .Sh SEE ALSO .Xr nice 1 , .Xr fork 2 , +.Xr setiopolicy_np 3 , .Xr compat 5 , .Xr renice 8 .Sh HISTORY diff --git a/bsd/man/man2/kqueue.2 b/bsd/man/man2/kqueue.2 index 6ab998c5a..201ad5de3 100644 --- a/bsd/man/man2/kqueue.2 +++ b/bsd/man/man2/kqueue.2 @@ -450,7 +450,7 @@ or similar call. .It NOTE_REAP The process was reaped by the parent via .Xr wait 2 -or similar call. +or similar call. Deprecated, use NOTE_EXIT. .El .Pp On return, @@ -499,6 +499,12 @@ data is in microseconds data is in nanoseconds .It NOTE_ABSOLUTE data is an absolute timeout +.It NOTE_CRITICAL +system makes a best effort to fire this timer as scheduled. +.It NOTE_BACKGROUND +system has extra leeway to coalesce this timer. +.It NOTE_LEEWAY +ext[1] holds user-supplied slop in deadline for timer coalescing. .El .Pp If fflags is not set, the default is milliseconds. The timer will be periodic unless EV_ONESHOT is specified. diff --git a/bsd/man/man2/peeloff.2 b/bsd/man/man2/peeloff.2 new file mode 100644 index 000000000..11e795ccc --- /dev/null +++ b/bsd/man/man2/peeloff.2 @@ -0,0 +1,99 @@ +.\" +.\" Copyright (c) 2012 Apple Inc. All rights reserved. +.\" +.\" @APPLE_OSREFERENCE_LICENSE_HEADER_START@ +.\" +.\" This file contains Original Code and/or Modifications of Original Code +.\" as defined in and that are subject to the Apple Public Source License +.\" Version 2.0 (the 'License'). You may not use this file except in +.\" compliance with the License. The rights granted to you under the License +.\" may not be used to create, or enable the creation or redistribution of, +.\" unlawful or unlicensed copies of an Apple operating system, or to +.\" circumvent, violate, or enable the circumvention or violation of, any +.\" terms of an Apple operating system software license agreement. +.\" +.\" Please obtain a copy of the License at +.\" http://www.opensource.apple.com/apsl/ and read it before using this file. +.\" +.\" The Original Code and all software distributed under the License are +.\" distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER +.\" EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, +.\" INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. +.\" Please see the License for the specific language governing rights and +.\" limitations under the License. +.\" +.\" @APPLE_OSREFERENCE_LICENSE_HEADER_END@ +.\" +.Dd November 14, 2012 +.Dt PEELOFF 2 +.Os Darwin +.Sh NAME +.Nm peeloff +.Nd extracts an association from a socket +.Sh SYNOPSIS +.Fd #include +.Ft int +.Fo peeloff +.Fa "int socket" +.Fa "associd_t associd" +.Fc +.Sh DESCRIPTION +The parameter +.Fa socket +is a socket. The communication domain of the socket determines the +availability and behavior of +.Fn peeloff . +In general, +.Fn peeloff +attempts to extract the association specified by +.Fa associd +into its own separate socket. +.Pp +The parameter +.Fa associd +specifies the association identifier. It may be set to +.Dv ASSOCID_ANY +when there is only one association present; or one of the identifiers +returned from +.Xr getassocids 3 . +.Sh RETURN VALUES +The +.Fn peeloff +function returns -1 on error and the global variable +.Va errno +is set to indicate the error. If it succeeds, it returns a non-negative +integer that is a descriptor for the extracted association. +.Sh ERRORS +The +.Fn peeloff +system call succeeds unless: +.Bl -tag -width Er +.\" =========== +.It Bq Er EBADF +.Fa Socket +is not a valid descriptor. +.\" =========== +.It Bq Er EINVAL +The +.Fa associd +argument is invalid; cannot be extracted; or the underlying protocol +is no longer attached to +.Fa socket . +.\" =========== +.It Bq Er ENOTSOCK +.Fa Socket +is a file, not a socket. +.El +.Sh SEE ALSO +.Xr connectx 2 , +.Xr disconnectx 2 , +.Xr socket 2 , +.Xr getassocids 3 , +.Xr getconnids 3 , +.Xr getconninfo 3 , +.Xr compat 5 +.Sh HISTORY +The +.Fn peeloff +function call appeared in Darwin 13.0.0 diff --git a/bsd/man/man2/searchfs.2 b/bsd/man/man2/searchfs.2 index 511776635..456b8dc7b 100644 --- a/bsd/man/man2/searchfs.2 +++ b/bsd/man/man2/searchfs.2 @@ -608,7 +608,18 @@ An I/O error occurred while reading from or writing to the file system. .Pp . .Sh CAVEATS -The list of searchable attributes currently includes: + +The list of attributes that are valid as search criteria currently includes the +following list of attributes. Note that this list is substantially smaller than +what is available via +.Xr getattrlist 2 +for a particular filesystem object. In general, a filesystem that supports +.Fn searchfs +will typically supply per-item attributes for matched objects that are also +supported by the +.Xr getdirentries 2 +system call. This varies from filesystem to filesystem. + .Pp . .Bl -item -compact diff --git a/bsd/man/man2/setxattr.2 b/bsd/man/man2/setxattr.2 index 240e8298d..3d5f3c5e6 100644 --- a/bsd/man/man2/setxattr.2 +++ b/bsd/man/man2/setxattr.2 @@ -91,13 +91,26 @@ is identical to except that it sets an extended attribute on an open file referenced by file descriptor .Fa fd . -.Sh NOTE +.Sh NOTES On some filesystems, such as .Dv HFS+ , setting the extended attribute .Dv com.apple.ResourceFork will update the modification time (``mtime'') of the file. +.Pp +Due to historical reasons, the +.Dv XATTR_FINDERINFO_NAME +(defined to be +.Dq com.apple.FinderInfo ) +extended attribute must be 32 bytes; see the +.Dv ATTR_CMN_FNDRINFO +section in +.Xr getattrlist 2 . +.Pp +The maximum supported size of extended attribute can be found out using +.Xr pathconf 2 +with _PC_XATTR_SIZE_BITS option. .Sh RETURN VALUES On success, 0 is returned. On failure, -1 is returned and the global variable @@ -174,16 +187,8 @@ Not enough space left on the file system. .Sh SEE ALSO .Xr getxattr 2 , .Xr listxattr 2 , -.Xr removexattr 2 -.Sh NOTES -Due to historical reasons, the -.Dv XATTR_FINDERINFO_NAME -(defined to be -.Dq com.apple.FinderInfo ) -extended attribute must be 32 bytes; see the -.Dv ATTR_CMN_FNDRINFO -section in -.Xr getattrlist 2 . +.Xr removexattr 2 , +.Xr pathconf 2 .Sh HISTORY .Fn setxattr and diff --git a/bsd/man/man2/shm_open.2 b/bsd/man/man2/shm_open.2 index 1b4bfc685..b9e63e86d 100644 --- a/bsd/man/man2/shm_open.2 +++ b/bsd/man/man2/shm_open.2 @@ -28,6 +28,7 @@ .Nd open a shared memory object .Sh SYNOPSIS .Fd #include +.Fd #include .Ft int .Fo shm_open .Fa "const char *name" @@ -122,7 +123,7 @@ not persist across a system reboot. .Pp The system imposes a limit on the number of file descriptors open simultaneously by one process. -.Xr Getdtablesize 2 +.Xr getdtablesize 2 returns the current system limit. .Sh ERRORS The named object is opened unless: diff --git a/bsd/man/man2/sigaltstack.2 b/bsd/man/man2/sigaltstack.2 index c84d99eaf..3ca8c96f3 100644 --- a/bsd/man/man2/sigaltstack.2 +++ b/bsd/man/man2/sigaltstack.2 @@ -68,7 +68,7 @@ the system arranges a switch to the signal stack for the duration of the signal handler's execution. .Pp If -.Dv SA_DISABLE +.Dv SS_DISABLE is set in .Fa ss_flags , .Fa ss_sp @@ -94,7 +94,7 @@ The field will contain the value .Dv SA_ONSTACK if the process is currently on a signal stack and -.Dv SA_DISABLE +.Dv SS_DISABLE if the signal stack is currently disabled. .Sh NOTES The value diff --git a/bsd/man/man2/socket.2 b/bsd/man/man2/socket.2 index 8652a2a21..c19161723 100644 --- a/bsd/man/man2/socket.2 +++ b/bsd/man/man2/socket.2 @@ -228,9 +228,6 @@ is denied. .It Bq Er EAFNOSUPPORT The specified address family is not supported. .\" =========== -.It Bq Er EISCONN -The per-process descriptor table is full. -.\" =========== .It Bq Er EMFILE The per-process descriptor table is full. .\" =========== diff --git a/bsd/man/man3/Makefile b/bsd/man/man3/Makefile index 605cee606..d2efa1a7b 100644 --- a/bsd/man/man3/Makefile +++ b/bsd/man/man3/Makefile @@ -16,7 +16,8 @@ DATAFILES = \ posix_spawnattr_setpgroup.3 \ posix_spawnattr_setsigdefault.3 \ posix_spawnattr_setsigmask.3 \ - posix_spawnattr_setspecialport_np.3 + posix_spawnattr_setspecialport_np.3 \ + getiopolicy_np.3 INSTALL_MAN_LIST = ${DATAFILES} @@ -53,7 +54,8 @@ INSTALL_MAN_LINKS = \ posix_spawnattr_setpgroup.3 posix_spawnattr_getpgroup.3 \ posix_spawnattr_setsigdefault.3 posix_spawnattr_getsigdefault.3 \ posix_spawnattr_setsigmask.3 posix_spawnattr_getsigmask.3 \ - posix_spawnattr_setspecialport_np.3 posix_spawnattr_setexceptionports_np.3 + posix_spawnattr_setspecialport_np.3 posix_spawnattr_setexceptionports_np.3 \ + getiopolicy_np.3 setiopolicy_np.3 INSTALL_MAN_DIR = man3 diff --git a/bsd/man/man3/getiopolicy_np.3 b/bsd/man/man3/getiopolicy_np.3 new file mode 100644 index 000000000..087072411 --- /dev/null +++ b/bsd/man/man3/getiopolicy_np.3 @@ -0,0 +1,179 @@ +.Dd April 30, 2013 +.Dt getiopolicy_np 3 +.Os +.Sh NAME +.Nm getiopolicy_np, setiopolicy_np +.Nd manipulate the I/O policy of a process or thread +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In sys/resource.h +.Ft int +.Fn getiopolicy_np "int iotype" "int scope" +.Ft int +.Fn setiopolicy_np "int iotype" "int scope" "int policy" +.Sh DESCRIPTION +The +.Fn getiopolicy_np +and +.Fn setiopolicy_np +functions are provided to get or set the I/O policies of the current process +or the current thread. The policy of the I/O of the given type +.Fa iotype +can be get or set for the given +.Fa scope . +.Pp +The I/O type is specified in the argument +.Fa iotype . +The currently supported I/O type is +.Dv IOPOL_TYPE_DISK , +which means the I/O policy for I/Os to local disks can be get or set. I/Os to +local disks are I/Os sent to the media without going through a network, +including I/Os to internal and external hard drives, optical media in internal +and external drives, flash drives, floppy disks, ram disks, and mounted disk +images which reside on these media, but not including remote volumes mounted +through networks (AFP, SMB, NFS, etc) or disk images residing on remote volumes. +.Pp +The scope that the I/O policy takes effect is specified in the argument +.Fa scope +as follows: +.Bl -tag -width IOPOL_SCOPE_PROCESS +.It IOPOL_SCOPE_PROCESS +The I/O policy of all I/Os issued by the current process is get or set. +.It IOPOL_SCOPE_THREAD +The I/O policy of all I/Os issued by the current thread is get or set. +.El +.Pp +In +.Fn getiopolicy_np , +the I/O policy of the given I/O type and scope is returned. In +.Fn setiopolicy_np , +the argument +.Fa policy +is an integer which contains the new I/O policy to be set for the given I/O +type and scope. +.Fa Policy +can have the following values: +.Bl -tag -width IOPOL_PASSIVEXXX +.It IOPOL_IMPORTANT +I/Os with the IMPORTANT policy are unrestricted. This policy should only be +used for I/Os that are critical to system responsiveness. +This is the default I/O policy for new threads. +.It IOPOL_STANDARD +The STANDARD policy is for work requested by the user, but that is not the +user's current focus. I/Os with this policy may be delayed slightly to allow +IMPORTANT I/Os to complete quickly. +.It IOPOL_UTILITY +The UTILITY policy is for short-running background work. I/Os with this policy +are throttled to prevent a significant impact on the latency of IMPORTANT and +STANDARD I/Os. +.It IOPOL_THROTTLE +The THROTTLE policy is for long-running I/O intensive background work, such as +backups, search indexing, or file synchronization. I/Os with this policy will +be throttled to avoid impacting performance of higher priority I/Os. +.It IOPOL_PASSIVE +The PASSIVE I/Os are a special type of I/O that are ignored by the other +policies so that the threads issuing lower priority I/Os are not slowed down by +PASSIVE I/Os. The PASSIVE I/O policy is useful for server type applications. +The I/Os generated by these applications are called passive I/Os because these +I/Os are caused directly or indirectly by the I/O requests they receive from +client applications. For example, when an image file is mounted by DiskImages, +DiskImages generate passive I/Os. DiskImages should mark these I/Os using the +PASSIVE I/O policy so that when client applications that access the volume +managed by DiskImages, these client applications will not be slowed down by the +I/Os generated by DiskImages. +.El +.Pp +I/Os with the STANDARD, UTILITY, and THROTTLE policies are called throttleable +I/Os and are of decreasing priority. If a throttleable request occurs within a +small time window of a request of higher priority, the thread that issued the +throttleable I/O is forced to a sleep for a short period. (Both this window and +the sleep period are dependent on the policy of the throttleable I/O.) This +slows down the thread that issues the throttleable I/O so that higher-priority +I/Os can complete with low-latency and receive a greater share of the disk +bandwidth. Furthermore, an IMPORTANT I/O request may bypass a previously issued +throttleable I/O request in kernel or driver queues and be sent to the device +first. In some circumstances, very large throttleable I/O requests will be +broken into smaller requests which are then issued serially. +.Pp +The I/O policy of a newly created process is inherited from its parent +process. The I/O policy of an I/O request is the lowest priority +policy of the current thread and the current process. +.Sh RETURN VALUES +The +.Fn getiopolicy_np +call returns the I/O policy of the given I/O type and scope. If error +happens, -1 is returned. The +.Fn setiopolicy_np +call returns 0 if there is no error, or -1 if there is an error. When error +happens, the error code is stored in the external variable +.Fa errno . +.Sh ERRORS +.Fn Getiopolicy_np +and +.Fn setiopolicy_np +will fail if: +.Bl -tag -width Er +.It Bq Er EINVAL +Io_type or scope is not one of the values defined in this manual. +.El +.Pp +In addition to the errors indicated above, +.Fn setiopolicy_np +will fail if: +.Bl -tag -width Er +.It Bq Er EINVAL +Policy is not one of the values defined in this manual. +.El +.Sh NOTES +The thread or process with a throttleable I/O policy enabled will be generally +prevented from having an adverse effect on the throughput or latency of higher +priority I/Os of other processes. +However, there are a few considerations that users of the throttleable I/O +policies should keep in mind: +.Pp +Consider using the +.Dv F_NOCACHE +.Xr fcntl 2 +command to prevent caching when using a throttleable I/O policy. +This will reduce contention for available caches with IMPORTANT I/O. +.Pp +Large read requests will automatically be broken up into smaller requests +to avoid stalling IMPORTANT I/O requests. +However, due to the consistency guarantees provided to contiguous writes, +this can not be done automatically for large writes. +If a thread or process with a throttleable I/O policy enabled will be issuing +large writes, consider the use of the +.Dv F_SINGLE_WRITER +.Xr fcntl 2 +command. +This will indicate to the system that there is only one thread writing to +the file and allow automatic division of large writes. +.Pp +Write-heavy throttleable I/O workloads may fill a drive's track (write) cache. +Subsequent higher priority writes must then wait for enough of the track cache +to be flushed before they can continue. +If the writes issued as throttleable I/O are small and not contiguous, many +seeks may be incurred before space is available for a subsequent higher +priority write. +Issuers of throttleable I/O should attempt to issue their writes sequentially +or to locations in a single small area of the drive (i.e. different +positions in the same file) to ensure good spacial locality. +.Pp +The +.Dv F_FULLFSYNC +.Xr fcntl 2 +command can cause very long system-wide IO stalls; use this command only if absolutely necessary. +.Sh SEE ALSO +.Xr nice 3 , +.Xr getpriority 2 , +.Xr setpriority 2 , +.Xr fcntl 2 , +.Xr open 2 , +.Xr renice 8 +.Sh HISTORY +The +.Fn getiopolicy_np +and +.Fn setiopolicy_np +function call first appeared in Mac OS X 10.5 (Leopard) . diff --git a/bsd/man/man4/bpf.4 b/bsd/man/man4/bpf.4 index 289a59071..75d455bf8 100644 --- a/bsd/man/man4/bpf.4 +++ b/bsd/man/man4/bpf.4 @@ -45,7 +45,7 @@ After opening the device, the file descriptor must be bound to a specific network interface with the .Dv BIOCSETIF ioctl. -A given interface can be shared be multiple listeners, and the filter +A given interface can be shared by multiple listeners, and the filter underlying each descriptor will see an identical packet stream. .Pp A separate device file is required for each minor device. @@ -74,14 +74,6 @@ can be set with Note that an individual packet larger than this size is necessarily truncated. .Pp -The packet filter will support any link level protocol that has fixed length -headers. Currently, only Ethernet, -.Tn SLIP , -and -.Tn PPP -drivers have been modified to interact with -.Nm . -.Pp A packet can be sent out on the network by writing to a .Nm file descriptor. The writes are unbuffered, meaning only one @@ -116,11 +108,6 @@ require and .Aq Pa net/if.h . .Pp -In addition to -.Dv FIONREAD -the following commands may be applied to any open -.Nm -file. The (third) argument to .Xr ioctl 2 should be a pointer to the type indicated. @@ -140,7 +127,7 @@ with If the requested buffer size cannot be accommodated, the closest allowable size will be set and returned in the argument. A read call will result in -.Er EIO +.Er EINVAL if it is passed a buffer that is not this size. .It Dv BIOCGDLT .Pq Li u_int @@ -151,32 +138,43 @@ The device types, prefixed with .Dq Li DLT_ , are defined in .Aq Pa net/bpf.h . +.It Dv BIOCGDLTLIST +.Pq Li "struct bpf_dltlist" +Returns an array of the available types of the data link layer +underlying the attached interface: +.Bd -literal -offset indent +struct bpf_dltlist { + u_int bfl_len; + u_int *bfl_list; +}; +.Ed +.Pp +The available types are returned in the array pointed to by the +.Va bfl_list +field while their length in u_int is supplied to the +.Va bfl_len +field. +.Er ENOMEM +is returned if there is not enough buffer space and +.Er EFAULT +is returned if a bad address is encountered. +The +.Va bfl_len +field is modified on return to indicate the actual length in u_int +of the array returned. +If +.Va bfl_list +is +.Dv NULL , +the +.Va bfl_len +field is set to indicate the required length of an array in u_int. .It Dv BIOCSDLT .Pq Li u_int -Used to specify the type of data link layer of the interface -attached to the bpf descriptor. If the current interface is -not of the given type then the descriptor will be reattached -to an interface of the given type. If the descriptor has -promiscuous mode set, the new interface will be moved to -promiscuous mode. -.Er EINVAL -is returned if no interface has been specified. -The device types, prefixed with -.Dq Li DLT_ , -are defined in -.Aq Pa net/bpf.h . -.It Dv BIOCGDLTLIST -.Pq Li struct bpf_dltlist -Returns a list of data link types of the the given interface. -A user allocated buffer to hold the list and length of expected -list should be provided in struct bpf_dltlist, defined in -.Aq Pa net/bpf.h . +Changes the type of the data link layer underlying the attached interface. .Er EINVAL -is returned if no interface has been specified. -The device types, prefixed with -.Dq Li DLT_ , -are defined in -.Aq Pa net/bpf.h . +is returned if no interface has been specified or the specified +type is not available for the interface. .It Dv BIOCPROMISC Forces the interface into promiscuous mode. All packets, not just those destined for the local host, are processed. @@ -184,6 +182,9 @@ Since more than one file can be listening on a given interface, a listener that opened its interface non-promiscuously may receive packets promiscuously. This problem can be remedied with an appropriate filter. +.Pp +The interface remains in promiscuous mode until all files listening +promiscuously are closed. .It Dv BIOCFLUSH Flushes the buffer of incoming packets, and resets the statistics that are returned by BIOCGSTATS. @@ -197,7 +198,7 @@ structure. All other fields are undefined. .It Dv BIOCSETIF .Pq Li "struct ifreq" -Sets the hardware interface associate with the file. This +Sets the hardware interface associated with the file. This command must be performed before any packets can be read. The device is indicated by name using the .Li ifr_name @@ -209,7 +210,7 @@ Additionally, performs the actions of .It Dv BIOCSRTIMEOUT .It Dv BIOCGRTIMEOUT .Pq Li "struct timeval" -Set or get the read timeout parameter. +Sets or gets the read timeout parameter. The argument specifies the length of time to wait before timing out on a read request. @@ -239,7 +240,7 @@ kernel because of buffer overflows .El .It Dv BIOCIMMEDIATE .Pq Li u_int -Enable or disable +Enables or disables .Dq immediate mode , based on the truth value of the argument. When immediate mode is enabled, reads return immediately upon packet @@ -250,13 +251,14 @@ This is useful for programs like which must respond to messages in real time. The default for a new file is off. .It Dv BIOCSETF +.It Dv BIOCSETFNR .Pq Li "struct bpf_program" Sets the filter program used by the kernel to discard uninteresting packets. An array of instructions and its length is passed in using the following structure: .Bd -literal struct bpf_program { - int bf_len; + u_int bf_len; struct bpf_insn *bf_insns; }; .Ed @@ -274,6 +276,17 @@ are performed. See section .Sx "FILTER MACHINE" for an explanation of the filter language. +The only difference between +.Dv BIOCSETF +and +.Dv BIOCSETFNR +is +.Dv BIOCSETF +performs the actions of +.Dv BIOCFLUSH +while +.Dv BIOCSETFNR +does not. .It Dv BIOCVERSION .Pq Li "struct bpf_version" Returns the major and minor version numbers of the filter language currently @@ -284,8 +297,8 @@ is less than or equal to the kernel minor. The kernel version number is returned in the following structure: .Bd -literal struct bpf_version { - u_short bv_major; - u_short bv_minor; + u_short bv_major; + u_short bv_minor; }; .Ed .Pp @@ -302,7 +315,7 @@ or haphazard packet matching). .It Dv BIOCSHDRCMPLT .It Dv BIOCGHDRCMPLT .Pq Li u_int -Set or get the status of the +Sets or gets the status of the .Dq header complete flag. Set to zero if the link level source address should be filled in automatically @@ -312,7 +325,7 @@ to zero by default. .It Dv BIOCSSEESENT .It Dv BIOCGSEESENT .Pq Li u_int -Set or get the flag determining whether locally generated packets on the +Sets or gets the flag determining whether locally generated packets on the interface should be returned by BPF. Set to zero to see only incoming packets on the interface. Set to one to see packets originating locally and remotely on the interface. This flag is initialized to one by @@ -324,20 +337,35 @@ Returns the signal that will be sent to a process waiting on the bpf descriptor .Pq Li u_int Sets the signal that should be sent to a process waiting on bpf descriptor upon packet reception. The default is SIGIO. .El +.Sh STANDARD IOCTLS +.Nm +now supports several standard +.Xr ioctl 2 Ns 's +which allow the user to do non-blocking I/O to an open +.I bpf +file descriptor. +.Bl -tag -width SIOCGIFADDR +.It Dv FIONREAD +.Pq Li int +Returns the number of bytes that are immediately available for reading. +.It Dv SIOCGIFADDR +.Pq Li "struct ifreq" +Returns the address associated with the interface. +.El .Sh BPF HEADER The following structure is prepended to each packet returned by .Xr read 2 : .Bd -literal struct bpf_hdr { - struct timeval bh_tstamp; /* time stamp */ - u_long bh_caplen; /* length of captured portion */ - u_long bh_datalen; /* original length of packet */ - u_short bh_hdrlen; /* length of bpf header (this struct + struct BPF_TIMEVAL bh_tstamp; /* time stamp */ + bpf_u_int32 bh_caplen; /* length of captured portion */ + bpf_u_int32 bh_datalen; /* original length of packet */ + u_short bh_hdrlen; /* length of bpf header (this struct plus alignment padding */ }; .Ed .Pp -The fields, whose values are stored in host order, and are: +The fields, whose values are stored in host order, are: .Pp .Bl -tag -compact -width bh_datalen .It Li bh_tstamp @@ -411,10 +439,10 @@ and implicit program counter. The following structure defines the instruction format: .Bd -literal struct bpf_insn { - u_short code; - u_char jt; - u_char jf; - u_long k; + u_short code; + u_char jt; + u_char jf; + bpf_u_int32 k; }; .Ed .Pp @@ -744,8 +772,6 @@ mode on the same hardware interface. This could be fixed in the kernel with additional processing overhead. However, we favor the model where all files must assume that the interface is promiscuous, and if so desired, must utilize a filter to reject foreign packets. -.Pp -Data link protocols with variable length headers are not currently supported. .Sh HISTORY The Enet packet filter was created in 1980 by Mike Accetta and Rick Rashid at Carnegie-Mellon University. Jeffrey Mogul, at diff --git a/bsd/man/man4/netintro.4 b/bsd/man/man4/netintro.4 index ab2b1b277..050aea324 100644 --- a/bsd/man/man4/netintro.4 +++ b/bsd/man/man4/netintro.4 @@ -137,7 +137,7 @@ page alluded to above. .Pp The field .Ar sa_len -contains the total length of the of the structure, +contains the total length of the structure, which may exceed 16 bytes. The following address values for .Ar sa_family diff --git a/bsd/man/man4/tcp.4 b/bsd/man/man4/tcp.4 index f7ecb4087..8d5b79c4f 100644 --- a/bsd/man/man4/tcp.4 +++ b/bsd/man/man4/tcp.4 @@ -210,7 +210,7 @@ and the reverse source route is used in responding. .Pp When a .Tn TCP -socket is set non-blocking, and the connection cannot be established immediatly, +socket is set non-blocking, and the connection cannot be established immediately, .Xr connect 2 returns with the error .Dv EINPROGRESS , @@ -229,7 +229,7 @@ can be retrieved via the socket option .Dv SO_ERROR . .Pp Note that even if the socket is non-blocking, it is possible for the connection -to be established immediatly. In that case +to be established immediately. In that case .Xr connect 2 does not return with .Dv EINPROGRESS . diff --git a/bsd/man/man4/tun.4 b/bsd/man/man4/tun.4 deleted file mode 100644 index 47608b2cd..000000000 --- a/bsd/man/man4/tun.4 +++ /dev/null @@ -1,361 +0,0 @@ -.\" $OpenBSD: tun.4,v 1.2 1996/12/16 16:08:52 deraadt Exp $ -.Dd March 10, 1996 -.Dt TUN 4 -.Os OpenBSD 1.2 -.Sh NAME -.Nm tun -.Nd Tunnel Network Interface -.Sh SYNOPSIS -.Cd "pseudo-device tun 4" -.Sh DESCRIPTION -The -.Nm tun -interface is a software loopback mechanism that can be loosely -described as the network interface analog of the -.Xr pty 4 , -that is, -.Nm tun -does for network interfaces what the -.Nm pty -driver does for terminals. -.Pp -The -.Nm tun -driver, like the -.Nm pty -driver, provides two interfaces: an interface like the usual facility -it is simulating (a network interface in the case of -.Nm tun , -or a terinal for -.Nm pty ) , -and a character-special device -.Dq control -interface. -.Pp -The network interfaces are named -.Sy tun Ns Ar 0 , -.Sy tun Ns Ar 1 , -etc, as many in all as the -.Ar count -figure given on the -.Sy pseudo-device -line. Each one supports the usual network-interface -.Xr ioctl 2 Ns s , -such as -.Dv SIOCSIFADDR -and -.Dv SIOCSIFNETMASK , -and thus can be used with -.Xr ifconfig 8 -like any other interface. At boot time, they are -.Dv POINTOPOINT -interfaces, but this can be changed; see the description of the control -device, below. When the system chooses to transmit a packet on the -network interface, the packet can be read from the control device (it -appears as -.Dq input -there); writing a packet to the control device generates an input -packet on the network interface, as if the (nonexistent) hardware had -just received it. -.Pp -There are two control interfaces. The -.Em data -interface, normally -.Pa /dev/tun Ns Sy N , -is exclusive-open (it cannot be opened if it is already open), is -normally restricted to the super-user, and can -.Dq transmit -and -.Dq receive -packets. The -.Em control -interface, normally -.Pa /dev/tunc Ns Sy N , -cannot send and receive packets, but can be opened by many processes at -once; it is intended for status queries and changes (many of which can -also be implemented with -.Fn ioctl -calls on the data interface). There are a number of status bits that -can be set or cleared via the control interfaces; they are mentioned -below where applicable, and they are all summarized in the discussions -of the control interfaces. -.\" Why isn't .Ss documented in mdoc(7) and mdoc.samples(7)? -.Ss The data interface -The data interface supports -.Xr read 2 , -.Xr write 2 , -and -.Xr ioctl 2 -calls to, respectively, collect -.Dq output -packets, generate -.Dq input -packets, and perform control functions. As mentioned above, this -interface is exclusive-open; if the -.Dv SUONLY -bit is set (which it is by default), it cannot be opened at all except -by the super-user. By default, a -.Fn read -call will return an error -.Pf ( Er EHOSTDOWN ) -if the interface is not -.Dq ready -(which means that the control device is open and the interface's -address has been set); if preferred, the -.Dv RRWAIT -bit can be set, in which case a -.Fn read -call will block (even if non-blocking I/O has been enabled) until the -interface is ready. Once the interface is ready, -.Fn read -will return a packet if one is available; if not, it will either block -until one is or return -.Er EWOULDBLOCK , -depending on whether non-blocking I/O has been enabled. If the packet -is longer than is allowed for in the buffer passed to -.Fn read , -the extra data will be silently dropped. -.Pp -The first byte of data will always be the address family (eg, -.Dv AF_INET ) -of the packet. By default, the packet data follows immediately, but if -the -.Dv PREPADDR -bit is set, the address to which the packet is to be sent is placed -after the address family byte and before the packet data. The size and -layout of the address depends on the address family; for -.Dv AF_INET , -for example, it is a -.Va struct in_addr . -A -.Xr write 2 -call passes a packet in to be -.Dq received -on the pseudo-interface. Each -.Fn write -call supplies exactly one packet; the packet length is taken from the -amount of data provided to -.Fn write . -The first byte must be the address family of the packet, much as in -packets returned by -.Fn read ; -the packet data always follows immediately. -A large number of -.Xr ioctl 2 -calls are also supported. They are defined in -.Aq Pa net/if_tun.h Ns . -.Bl -tag -width TUN_PREPADDR -.It Dv TUNSDEBUG -The argument should be a pointer to an -.Va int ; -this sets the internal debugging variable to that value. What, if -anything, this variable controls is not documented here; see the source -code. -.It Dv TUNGDEBUG -The argument should be a pointer to an -.Va int ; -this stores the internal debugging variable's value into it. -.It Dv TUNSMODE -The argument should be a pointer to an -.Va int ; -its value must be -.Dv IFF_POINTOPOINT -or -.Dv IFF_BROADCAST . -The type of the corresponding -.Em tun Ns Sy n -interface is set to the supplied type. If the value is anything else, -an -.Er EINVAL -error occurs. The interface must be down at the time; if it is up, an -.Er EBUSY -error occurs. -.\" X .It Dv TUNSFLAG -.\" X The interface's flag bits are set as specified in the -.\" X .Va int -.\" X argument. Only some of the bits can be modified; the rest are -.\" X read-only. The bits are defined in -.\" X .Aq Pa net/if_tun.h -.\" X with a -.\" X .Dv TUN_ -.\" X prefix; for example, the bit called -.\" X .Dv RRWAIT -.\" X in this document would be referred to in source code as -.\" X .Dv TUN_RRWAIT . -.\" X The bits are: -.\" X .\" Why isn't the way to create a table like this documented in mdoc(7) -.\" X .\" or mdoc.samples(7)?! -.\" X .Bl -column "TUN_PREPADDR" "RO/RW" -compact -indent-two -.\" X .It Name Ta RO/RW Ta Meaning -.\" X .It Dv TUN_OPEN Ta RO Ta "Data control device is open." -.\" X .It Dv TUN_INITED Ta RO Ta "Initialized." -.\" X .It Dv TUN_RCOLL Ta RO Ta "Select-for-read collision." -.\" X .It Dv TUN_IASET Ta RO Ta "Address has been set." -.\" X .It Dv TUN_DSTADDR Ta RO Ta "Destination address has been set." -.\" X .It Dv TUN_RWAIT Ta RO Ta "A process is blocked in Fn read Ns ." -.\" X .It Dv TUN_ASYNC Ta RO Ta "Generate Dv SIGIO No for readers." -.\" X .It Dv TUN_NBIO Ta RO Ta "Non-blocking I/O for reads." -.\" X .It Dv TUN_BRDADDR Ta RO Ta "Broadcast address has been set." -.\" X .It Dv TUN_PREPADDR Ta RW Ta "Prepend sent-to address for reads." -.\" X .It Dv TUN_STAYUP Ta RW Ta "Don't take interface down on close." -.\" X .It Dv TUN_SUONLY Ta RW Ta "Data control device is super-user only." -.\" X .It Dv TUN_RRWAIT Ta RW Ta "Wait for ready when reading." -.\" X .El -.\" X .It Dv TUNGFLAG -.\" X The interface's flag bits are fetched into the argument -.\" X .Va int . -.\" X The flags and their meanings are as for -.\" X .Dv TUNSFLAG . -.\" X .It Dv FIONBIO -.\" X Turn non-blocking I/O for reads off or on, according as the argument -.\" X .Va int Ns 's -.\" X value is or isn't zero. (Writes are always nonblocking.) -.\" X .It Dv FIOASYNC -.\" X Turn asynchronous I/O for reads (ie, generation of -.\" X .Dv SIGIO -.\" X when data is available to be read) off or on, according as the argument -.\" X .Va int Ns 's -.\" X value is or isn't zero. -.\" X .It Dv FIONREAD -.\" X If any packets are queued to be read, store the size of the first one -.\" X into the argument -.\" X .Va int ; -.\" X otherwise, store zero. -.\" X .It Dv TIOCSPGRP -.\" X Set the process group to receive -.\" X .Dv SIGIO -.\" X signals, when asynchronous I/O is enabled, to the argument -.\" X .Va int -.\" X value. -.\" X .It Dv TIOCGPGRP -.\" X Retrieve the process group value for -.\" X .Dv SIGIO -.\" X signals into the argument -.\" X .Va int -.\" X value. -.El -The data control device also supports -.Xr select 2 -for read; selecting for write is pointless, and always succeeds, since -writes are always nonblocking (if the packet cannot be accepted for a -transient reason (eg, no buffer space available), it is silently -dropped; if the reason is not transient (eg, packet too large), an -error is returned). -.Pp -On the last close of the data device, by default, the interface is -brought down (as if with -.Dq ifconfig tun Ns Sy n down ) ; -if the -.Dv STAYUP -bit is set, this is not done. In either case, all queued packets are -thrown away. (If the interface is up when the data device is not open, -either because of -.Dv STAYUP -or because it was explicitly brought up, output packets are always -thrown away rather than letting them pile up.) -.Ss The control interface -The alternative control interface is a text-based interface designed -for shell-script or human use; it allows control of many of the things -that can be done with -.Fn ioctl -calls on the data interface, and a few more as well. -.Pp -.Fn read Ns s -on the control interface always return a single line of text (or just -the beginning of the line, if the buffer passed to -.Xr read 2 -was too small to take the whole line). The line contains items in the -general format -.Do -.Li item=value -.Dc , -where -.Li item -is a keyword and -.Li value -is a value appropriate to the keyword. This line is intended for human -use; programs should use the -.Fn ioctl -interface. Here is an actual example (broken because of width -restrictions): -.Bd -literal -unit=0 flags=(open,inited,!rcoll,iaset,!dstaddr,!rwait,!async, -!nbio,!brdaddr,prepaddr,stayup,suonly,rrwait) type=broadcast -mtu=1500 coll=0 ipkts=0/0 opkts=0/0 pgrp=0 -.Ed -.Pp -Note that the current file offset is ignored for reads, so using a tool like -.Xr cat 1 -will result in infinite output. Use something more like -.Dq head\ \&-1 -for command-line use. It is possible to -.Xr select 2 -for reading on this device, which will indicate that the device is -readable whenever the state is changed. -.Pp -Writes to the control interface are interpreted as modifications to the -state. Each -.Fn write -call is treated separately. The data written is broken at whitespace -(blanks, tabs, newlines); each resulting fragment has its first -character examined. If this character is a -.Ql \&+ -or -.Ql \&\- , -the rest of the fragment is taken as a flag name, and the flag is -turned on (for -.Ql \&+ ) -or off (for -.Ql \&\- ) . -(Flag names are as generated on reads; they are the same as the -.Dv TUN_ Ns Em xxx -constants, with the leading -.Dv TUN_ -removed and the rest lowercased.) If the first character is -.Ql t , -the second character must be -.Ql b -or -.Ql p , -and the interface type is set to -.Dv IFF_BROADCAST -or -.Dv IFF_POINTOPOINT , -respectively. If the first character is -.Ql g -or -.Ql m , -the rest of the fragment is taken as a number in decimal (possibly with -a leading \&\- sign) and the result is taken as a new process group, -for -.Ql g -or MTU, for -.Ql m . -(The MTU must not be less than 1; attempts to set it so return -.Er EIO . ) -.Pp -This interface is useful for command-line reconfiguration, such as -setting the interface type at boot time, with -.Sh SEE ALSO -.Xr inet 4 , -.Xr intro 4 -.Sh BUGS -The -.Dv SUONLY -bit is a botch, especially since the control interface, which is never -restricted by the kernel, can change it. Access control really should -be handled by the permission bits on the -.Pa /dev -entries for the data and control devices; this bit is a historical -artifact. -.Pp -The process-group values for -.Dv SIGIO -signals should be checked; as it stands, the driver can be used (by -anyone who can open the control or data device) to send any desired -signal to an arbitrary process or process group. (Until this is fixed, -you should be careful to set the permisison bits to allow only root to -open the control device, and either do the same for the data device or -leave the -.Dv SUONLY -bit set.) diff --git a/bsd/miscfs/Makefile b/bsd/miscfs/Makefile index ece064108..1d54c5b95 100644 --- a/bsd/miscfs/Makefile +++ b/bsd/miscfs/Makefile @@ -13,21 +13,11 @@ INSTINC_SUBDIRS = \ specfs \ union -INSTINC_SUBDIRS_I386 = \ - EXPINC_SUBDIRS = \ devfs \ fifofs \ - specfs - -EXPINC_SUBDIRS_I386 = \ - -SETUP_SUBDIRS = \ - -COMP_SUBDIRS = \ - -INST_SUBDIRS = \ - + specfs \ + union include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/miscfs/devfs/Makefile b/bsd/miscfs/devfs/Makefile index 9d29f42e1..46c7c695b 100644 --- a/bsd/miscfs/devfs/Makefile +++ b/bsd/miscfs/devfs/Makefile @@ -7,14 +7,6 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - DATAFILES = \ devfs.h diff --git a/bsd/miscfs/devfs/devfs_fdesc_support.c b/bsd/miscfs/devfs/devfs_fdesc_support.c index c15f3df53..bf4e3bb06 100644 --- a/bsd/miscfs/devfs/devfs_fdesc_support.c +++ b/bsd/miscfs/devfs/devfs_fdesc_support.c @@ -95,9 +95,6 @@ #include #include -/* XXX should be prototyped in header for here, kern_descrip.c */ -extern int soo_stat(struct socket *so, void *ub, int isstat64); - #define FDL_WANT 0x01 #define FDL_LOCKED 0x02 static int fdcache_lock; @@ -222,8 +219,10 @@ loop: vid = vnode_vid(fd->fd_vnode); fdesc_unlock(); - if (vnode_getwithvid(fd->fd_vnode, vid)) + if (vnode_getwithvid(fd->fd_vnode, vid)) { + fdesc_lock(); goto loop; + } *vpp = fd->fd_vnode; (*vpp)->v_type = vtype; @@ -391,7 +390,7 @@ fdesc_attr(int fd, struct vnode_attr *vap, vfs_context_t a_context) if ((error = fp_lookup(p, fd, &fp, 0))) return (error); - switch (fp->f_fglob->fg_type) { + switch (FILEGLOB_DTYPE(fp->f_fglob)) { case DTYPE_VNODE: if((error = vnode_getwithref((struct vnode *) fp->f_fglob->fg_data)) != 0) { break; @@ -416,14 +415,14 @@ fdesc_attr(int fd, struct vnode_attr *vap, vfs_context_t a_context) case DTYPE_SOCKET: case DTYPE_PIPE: #if SOCKETS - if (fp->f_fglob->fg_type == DTYPE_SOCKET) + if (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_SOCKET) error = soo_stat((struct socket *)fp->f_fglob->fg_data, (void *)&stb, 0); else #endif /* SOCKETS */ error = pipe_stat((struct pipe *)fp->f_fglob->fg_data, (void *)&stb, 0); if (error == 0) { - if (fp->f_fglob->fg_type == DTYPE_SOCKET) + if (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_SOCKET) VATTR_RETURN(vap, va_type, VSOCK); else VATTR_RETURN(vap, va_type, VFIFO); @@ -513,7 +512,7 @@ fdesc_setattr(struct vnop_setattr_args *ap) /* * Can setattr the underlying vnode, but not sockets! */ - switch (fp->f_fglob->fg_type) { + switch (FILEGLOB_DTYPE(fp->f_fglob)) { case DTYPE_VNODE: { if ((error = vnode_getwithref((struct vnode *) fp->f_fglob->fg_data)) != 0) @@ -529,8 +528,7 @@ fdesc_setattr(struct vnop_setattr_args *ap) break; default: - kprintf("fp->f_fglob->fg_type = %d\n", fp->f_fglob->fg_type); - error = EBADF; + error = EBADF; break; } diff --git a/bsd/miscfs/devfs/devfs_vfsops.c b/bsd/miscfs/devfs/devfs_vfsops.c index f34edc2c5..b40778e88 100644 --- a/bsd/miscfs/devfs/devfs_vfsops.c +++ b/bsd/miscfs/devfs/devfs_vfsops.c @@ -503,7 +503,7 @@ devfs_kernel_mount(char * mntname) NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(mntname), ctx); if ((error = namei(&nd))) { - printf("devfs_kernel_mount: failed to find directory '%s', %d", + printf("devfs_kernel_mount: failed to find directory '%s', %d\n", mntname, error); return (error); } @@ -567,7 +567,7 @@ devfs_kernel_mount(char * mntname) error = devfs_mount(mp, NULL, USER_ADDR_NULL, ctx); if (error) { - printf("devfs_kernel_mount: mount %s failed: %d", mntname, error); + printf("devfs_kernel_mount: mount %s failed: %d\n", mntname, error); mp->mnt_vtable->vfc_refcount--; vfs_unbusy(mp); diff --git a/bsd/miscfs/devfs/reproto.sh b/bsd/miscfs/devfs/reproto.sh old mode 100644 new mode 100755 diff --git a/bsd/miscfs/fifofs/Makefile b/bsd/miscfs/fifofs/Makefile index d70a3ab16..2694ee502 100644 --- a/bsd/miscfs/fifofs/Makefile +++ b/bsd/miscfs/fifofs/Makefile @@ -7,14 +7,6 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - DATAFILES = \ fifo.h diff --git a/bsd/miscfs/fifofs/fifo_vnops.c b/bsd/miscfs/fifofs/fifo_vnops.c index c1af000e6..23c596618 100644 --- a/bsd/miscfs/fifofs/fifo_vnops.c +++ b/bsd/miscfs/fifofs/fifo_vnops.c @@ -79,9 +79,6 @@ #define VOPFUNC int (*)(void *) -extern int soo_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, vfs_context_t ctx); -extern int soo_select(struct fileproc *fp, int which, void * wql, vfs_context_t ctx); - int (**fifo_vnodeop_p)(void *); struct vnodeopv_entry_desc fifo_vnodeop_entries[] = { { &vnop_default_desc, (VOPFUNC)vn_default_error }, diff --git a/bsd/miscfs/mockfs/mockfs.h b/bsd/miscfs/mockfs/mockfs.h new file mode 100644 index 000000000..3662af4bf --- /dev/null +++ b/bsd/miscfs/mockfs/mockfs.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef MOCKFS_H +#define MOCKFS_H + +#if MOCKFS + +#include +#include +#include + +/* + * mockfs is effectively a "fake" filesystem; the primary motivation for it being that we may have cases + * where our userspace needs are extremely simple/consistent and can provided by a single binary. mockfs + * uses an in-memory tree to define the structure for an extremely simple filesystem, which makes the + * assumption that our root device is in fact a mach-o file, and provides a minimal filesystem to support + * this: the root directory, a mountpoint for devfs (given that very basic userspace code may assume the + * existance of /dev/), and an executable representing the root device. + * + * The functionality supported by mockfs is minimal: it is read-only, and does not support user initiated IO, + * but it supports lookup (so it should be possible for the user to access /dev/). + * + * mockfs is primarily targeted towards memory-backed devices, and will (when possible) attempt to inform the + * VM that we are using a memory-backed device, so that we can eschew IO to the backing device completely, + * and avoid having an extra copy of the data in the UBC (as well as the overhead associated with creating + * that copy). + * + * For the moment, mockfs is not marked in vfs_conf.c as being threadsafe. + */ + +extern lck_attr_t * mockfs_mtx_attr; +extern lck_grp_attr_t * mockfs_grp_attr; +extern lck_grp_t * mockfs_mtx_grp; + +struct mockfs_mount { + lck_mtx_t mockfs_mnt_mtx; /* Mount-wide (and tree-wide) mutex */ + mockfs_fsnode_t mockfs_root; /* Root of the node tree */ + boolean_t mockfs_memory_backed; /* Does the backing store reside in memory */ + boolean_t mockfs_physical_memory; /* (valid if memory backed) */ + uint32_t mockfs_memdev_base; /* Base page of the backing store (valid if memory backed) */ + uint64_t mockfs_memdev_size; /* Size of the backing store (valid if memory backed) */ +}; + +typedef struct mockfs_mount * mockfs_mount_t; + +#endif /* MOCKFS */ + +#endif /* MOCKFS_H */ + diff --git a/bsd/miscfs/mockfs/mockfs_fsnode.c b/bsd/miscfs/mockfs/mockfs_fsnode.c new file mode 100644 index 000000000..c6a2582f7 --- /dev/null +++ b/bsd/miscfs/mockfs/mockfs_fsnode.c @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * For the moment, most operations that change the fsnode will be called only in the context of + * mockfs_mountroot, so they should not need to use a mutex. The exceptions are mockfs_fsnode_vnode, + * and mockfs_fsnode_drop_vnode, which will use a tree-wide mutex (that lives in the mockfs_mount_t + * for the mount). + * + * mockfs_fsnode_child_by_type doesn't require locking right now (we're only looking at the structure of + * the node tree, which should not change during VNOP operations. + */ + +/* mockfs_fsnode_create: + * Given a mount (mp) and mockfs node type (type), creates a new fsnode for that mountpoint (*fsnpp). + * For the moment (while type == fileid) we should have at most one node of any given type. + * + * Returns 0 on success, or an error. + */ +int mockfs_fsnode_create(mount_t mp, uint8_t type, mockfs_fsnode_t * fsnpp) +{ + int rvalue; + uint64_t new_size; + + rvalue = 0; + new_size = 0; + + if (!fsnpp || !mp) { + rvalue = EINVAL; + goto done; + } + + switch (type) { + case MOCKFS_ROOT: + break; + case MOCKFS_DEV: + break; + case MOCKFS_FILE: + /* + * For a regular file, size is meaningful, but it will always be equal to the + * size of the backing device. + */ + new_size = mp->mnt_devvp->v_specinfo->si_devsize; + break; + default: + rvalue = EINVAL; + goto done; + } + + MALLOC(*fsnpp, typeof(*fsnpp), sizeof(**fsnpp), M_TEMP, M_WAITOK | M_ZERO); + + if (!*fsnpp) { + rvalue = ENOMEM; + goto done; + } + + (*fsnpp)->size = new_size; + (*fsnpp)->type = type; + (*fsnpp)->mnt = mp; + +done: + return rvalue; +} + +/* + * mockfs_fsnode_destroy: + * Given a node (fsnp), tears down and deallocates that node and the entire subtree that it is the + * root of (deallocates you, and your children, and your children's children! ...for three months). + * + * Returns 0 on success, or an error. + */ +int mockfs_fsnode_destroy(mockfs_fsnode_t fsnp) +{ + int rvalue; + + rvalue = 0; + + /* + * We will not destroy a root node that is actively pointed to by the mount structure; the + * mount must drop the reference to the mockfs tree before we can deallocate it. + */ + if (!fsnp || (((mockfs_mount_t)fsnp->mnt->mnt_data)->mockfs_root == fsnp)) { + rvalue = EINVAL; + goto done; + } + + /* + * For now, panic in this case; I don't expect anyone to ask us to destroy a node with a live + * vfs reference, but this will tell me if that assumption is untrue. + */ + if (fsnp->vp) + panic("mockfs_fsnode_destroy called on node with live vnode; fsnp = %p (in case gdb is screwing with you)", fsnp); + + /* + * If this node has children, we need to destroy them. + * + * At least for now, we aren't guaranteeing destroy will be clean; we may get partway through + * and encounter an error, in which case we will panic (we may still have a sane tree, but + * we've failed to destroy the subtree, which means someone called destroy when they should + * not have done so). + */ + if (fsnp->child_a) + if ((rvalue = mockfs_fsnode_destroy(fsnp->child_a))) + panic("mockfs_fsnode_destroy failed on child_a; fsnp = %p (in case gdb is screwing with you), rvalue = %d", fsnp, rvalue); + + if (fsnp->child_b) + if ((rvalue = mockfs_fsnode_destroy(fsnp->child_b))) + panic("mockfs_fsnode_destroy failed on child_b; fsnp = %p (in case gdb is screwing with you), rvalue = %d", fsnp, rvalue); + + /* + * We need to orphan this node before we destroy it. + */ + if (fsnp->parent) + if ((rvalue = mockfs_fsnode_orphan(fsnp))) + panic("mockfs_fsnode_orphan failed during destroy; fsnp = %p (in case gdb is screwing with you), rvalue = %d", fsnp, rvalue); + + FREE(fsnp, M_TEMP); +done: + return rvalue; +} + +/* + * mockfs_fsnode_adopt: + * Given two nodes (parent, child), makes one node the child of the other node. + * + * Returns 0 on success, or an error. + */ +int mockfs_fsnode_adopt(mockfs_fsnode_t parent, mockfs_fsnode_t child) +{ + int rvalue; + + rvalue = 0; + + /* + * The child must be an orphan, and the parent cannot be the child. + */ + if ((!parent || !child || child->parent) && (parent != child)) { + rvalue = EINVAL; + goto done; + } + + /* + * Nodes are actually tied to a specific mount, so assert that both nodes belong to the same mount. + */ + if (parent->mnt != child->mnt) { + rvalue = EINVAL; + goto done; + } + + /* + * TODO: Get rid of this check if I ever get around to making the tree non-binary. + * TODO: Enforce that the parent cannot have two children of the same type (for the moment, this is + * implicit in the structure of the tree constructed by mockfs_mountroot, so we don't need to + * worry about it). + * + * Can the parent support another child (food, shelter, unused pointers)? + */ + if (!parent->child_a) { + parent->child_a = child; + child->parent = parent; + } + else if (!parent->child_b) { + parent->child_b = child; + child->parent = parent; + } + else { + rvalue = ENOMEM; + } + +done: + return rvalue; +} + +/* + * mockfs_fsnode_orphan: + * + * Returns 0 on success, or an error. + */ +int mockfs_fsnode_orphan(mockfs_fsnode_t fsnp) +{ + int rvalue; + mockfs_fsnode_t parent; + + rvalue = 0; + + if (!fsnp || !fsnp->parent) { + rvalue = EINVAL; + goto done; + } + + /* + * Disallow orphaning a node with a live vnode for now. + */ + if (fsnp->vp) + panic("mockfs_fsnode_orphan called on node with live vnode; fsnp = %p (in case gdb is screwing with you)", fsnp); + + parent = fsnp->parent; + + if (parent->child_a == fsnp) { + parent->child_a = NULL; + fsnp->parent = NULL; + } + else if (parent->child_b == fsnp) { + parent->child_b = NULL; + fsnp->parent = NULL; + } + else + panic("mockfs_fsnode_orphan insanity, fsnp->parent != parent->child; fsnp = %p (in case gdb is screwing with you)", fsnp); + +done: + return rvalue; +} + +/* + * mockfs_fsnode_child_by_type: + * Given a node (parent) and a type (type), returns the first child (*child) found corresponding to the + * requested type. This method exists to support lookup (which is responsible for mapping names, which + * we have no conception of currently, onto vnodes). + * + * This should be safe, as we are walking the read-only parts of the filesystem structure (not touching + * the vnode). + * + * Returns 0 on success, or an error. + */ +int mockfs_fsnode_child_by_type(mockfs_fsnode_t parent, uint8_t type, mockfs_fsnode_t * child) +{ + int rvalue; + + rvalue = 0; + + if (!parent || !child) { + rvalue = EINVAL; + goto done; + } + + if ((parent->child_a) && (parent->child_a->type == type)) + *child = parent->child_a; + else if ((parent->child_b) && (parent->child_b->type == type)) + *child = parent->child_b; + else + rvalue = ENOENT; + +done: + return rvalue; +} + +/* + * mockfs_fsnode_vnode: + * Given a mockfs node (fsnp), returns a vnode (*vpp) corresponding to the mockfs node; the vnode will + * have an iocount on it. + * + * Returns 0 on success, or an error. + */ +int mockfs_fsnode_vnode(mockfs_fsnode_t fsnp, vnode_t * vpp) +{ + int rvalue; + memory_object_control_t ubc_mem_object; + mockfs_mount_t mockfs_mnt; + struct vnode_fsparam vnfs_param; + + if ((!fsnp) || (!vpp)) { + rvalue = EINVAL; + goto done; + } + + mockfs_mnt = ((mockfs_mount_t) fsnp->mnt->mnt_data); + lck_mtx_lock(&mockfs_mnt->mockfs_mnt_mtx); + + if (fsnp->vp) { + /* + * The vnode already exists; this should be easy. + */ + rvalue = vnode_get(fsnp->vp); + if (!rvalue) { + *vpp = fsnp->vp; + } + } + else { + /* + * We need to create the vnode; this will be unpleasant. + */ + vnfs_param.vnfs_mp = fsnp->mnt; + vnfs_param.vnfs_vtype = (fsnp->type == MOCKFS_FILE) ? VREG : VDIR; + vnfs_param.vnfs_str = "mockfs"; + vnfs_param.vnfs_dvp = (fsnp->type == MOCKFS_ROOT) ? NULL : fsnp->parent->vp; + vnfs_param.vnfs_fsnode = fsnp; + vnfs_param.vnfs_vops = mockfs_vnodeop_p; + vnfs_param.vnfs_markroot = (fsnp->type == MOCKFS_ROOT); + vnfs_param.vnfs_marksystem = 0; + vnfs_param.vnfs_rdev = 0; + vnfs_param.vnfs_filesize = fsnp->size; + vnfs_param.vnfs_cnp = NULL; + vnfs_param.vnfs_flags = VNFS_CANTCACHE | VNFS_NOCACHE; + rvalue = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vnfs_param, &fsnp->vp); + + if ((!rvalue) && (fsnp->type == MOCKFS_FILE) && (mockfs_mnt->mockfs_memory_backed)) { + /* + * We're memory backed; point the pager towards the backing store of the device. + */ + ubc_mem_object = ubc_getobject(fsnp->vp, 0); + + if (!ubc_mem_object) + panic("mockfs_fsvnode failed to get ubc_mem_object for a new vnode"); + + rvalue = pager_map_to_phys_contiguous(ubc_mem_object, 0, (mockfs_mnt->mockfs_memdev_base << PAGE_SHIFT), fsnp->size); + + if (rvalue) + panic("mockfs_fsnode_vnode failed to create fictitious pages for a memory-backed device; rvalue = %d", rvalue); + } + + if (!rvalue) + *vpp = fsnp->vp; + } + + lck_mtx_unlock(&mockfs_mnt->mockfs_mnt_mtx); + +done: + return rvalue; +} + +/* + * mockfs_fsnode_vnode: + * Given a mockfs node (fsnp) that has a vnode associated with it, causes them to drop their + * references to each other. This exists to support mockfs_reclaim. This method will grab the tree + * mutex, as this will mutate the tree. + * + * Returns 0 on success, or an error. + */ +int mockfs_fsnode_drop_vnode(mockfs_fsnode_t fsnp) +{ + int rvalue; + mockfs_mount_t mockfs_mnt; + vnode_t vp; + + rvalue = 0; + + if (!fsnp) { + rvalue = EINVAL; + goto done; + } + + mockfs_mnt = ((mockfs_mount_t) fsnp->mnt->mnt_data); + lck_mtx_lock(&mockfs_mnt->mockfs_mnt_mtx); + + if (!(fsnp->vp)) { + panic("mock_fsnode_drop_vnode: target fsnode does not have an associated vnode"); + } + + vp = fsnp->vp; + fsnp->vp = NULL; + vnode_clearfsnode(vp); + + lck_mtx_unlock(&mockfs_mnt->mockfs_mnt_mtx); +done: + return rvalue; +} + diff --git a/bsd/miscfs/mockfs/mockfs_fsnode.h b/bsd/miscfs/mockfs/mockfs_fsnode.h new file mode 100644 index 000000000..0d9a2b9f9 --- /dev/null +++ b/bsd/miscfs/mockfs/mockfs_fsnode.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef MOCKFS_FSNODE_H +#define MOCKFS_FSNODE_H + +#if MOCKFS + +#include + +/* + * Types for the filesystem nodes; for the moment, these will effectively serve as unique + * identifiers. This can be generalized later, but at least for the moment, the read-only + * nature of the filesystem and the terse semantics (you have VDIR and VREG, and VREG + * always represents the entire backing device) makes this sufficient for now. + * + * TODO: Should this include MOCKFS_SBIN? Right now we tell lookup that when looking in + * MOCKFS_ROOT, "sbin" resolves back onto MOCKFS_ROOT; this is a handy hack for aliasing, + * but may not mesh well with VFS. + */ +enum mockfs_fsnode_type { + MOCKFS_ROOT, + MOCKFS_DEV, + MOCKFS_FILE +}; + +/* + * For the moment, pretend everything is a directory with support for two entries; the + * executable binary is a one-to-one mapping with the backing devnode, so this may + * actually be all we're interested in. + * + * Stash the filesize in here too (this is easier then looking at the devnode for every + * VREG access). + */ +struct mockfs_fsnode { + uint64_t size; /* Bytes of data; 0 unless type is MOCKFS_FILE */ + uint8_t type; /* Serves as a unique identifier for now */ + mount_t mnt; /* The mount that this node belongs to */ + vnode_t vp; /* vnode for this node (if one exists) */ + struct mockfs_fsnode * parent; /* Parent of this node (NULL for root) */ + /* TODO: Replace child_a/child_b with something more flexible */ + struct mockfs_fsnode * child_a; /* TEMPORARY */ + struct mockfs_fsnode * child_b; /* TEMPORARY */ +}; + +typedef struct mockfs_fsnode * mockfs_fsnode_t; + +/* + * See mockfs_fsnode.c for function details. + */ +int mockfs_fsnode_create(mount_t mp, uint8_t type, mockfs_fsnode_t * fsnpp); +int mockfs_fsnode_destroy(mockfs_fsnode_t fsnp); +int mockfs_fsnode_adopt(mockfs_fsnode_t parent, mockfs_fsnode_t child); +int mockfs_fsnode_orphan(mockfs_fsnode_t fsnp); +int mockfs_fsnode_child_by_type(mockfs_fsnode_t parent, uint8_t type, mockfs_fsnode_t * child); +int mockfs_fsnode_vnode(mockfs_fsnode_t fsnp, vnode_t * vpp); +int mockfs_fsnode_drop_vnode(mockfs_fsnode_t fsnp); + +#endif /* MOCKFS */ + +#endif /* MOCKFS_FSNODE_H */ + diff --git a/bsd/miscfs/mockfs/mockfs_vfsops.c b/bsd/miscfs/mockfs/mockfs_vfsops.c new file mode 100644 index 000000000..3aefc8ad6 --- /dev/null +++ b/bsd/miscfs/mockfs/mockfs_vfsops.c @@ -0,0 +1,259 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +lck_attr_t * mockfs_mtx_attr = (lck_attr_t *) 0; +lck_grp_attr_t * mockfs_grp_attr = (lck_grp_attr_t *) 0; +lck_grp_t * mockfs_mtx_grp = (lck_grp_t *) 0; + +int mockfs_mountroot(mount_t mp, vnode_t rvp, __unused vfs_context_t ctx); + +/* + * Functions that are part of the mockfs_vfsops structure. + */ +int mockfs_unmount(__unused struct mount *mp, __unused int mntflags, __unused vfs_context_t ctx); +int mockfs_root(mount_t mp, vnode_t * vpp, __unused vfs_context_t ctx); +int mockfs_sync(__unused struct mount *mp, __unused int waitfor, __unused vfs_context_t ctx); +int mockfs_init(__unused struct vfsconf * vfsc); + +/* + * mockfs_mountroot: + * Given a mount (mp) and a vnode for the root device (rvp), builds a fake filesystem for rvp. This consists + * of three nodes; a directory node (to serve as a mountpoint for devfs), a file node meant to serve as an + * executable frontend for rootvp (we will assume that rootvp is an executable, that the kernel can subsequently + * run), and the root node for the mockfs filesystem. The structure of mockfs is memory-backed; only the + * contents of the file node refer to the backing device. + * + * Returns 0 on success, or an error. + */ +int mockfs_mountroot(mount_t mp, vnode_t rvp, __unused vfs_context_t ctx) +{ + int rvalue = 0; + mockfs_fsnode_t root_fsnode = NULL; + mockfs_fsnode_t dev_fsnode = NULL; + mockfs_fsnode_t file_fsnode = NULL; + mockfs_mount_t mockfs_mount_data = NULL; + dk_memdev_info_t memdev_info; + + /* + * TODO: Validate that the device at least LOOKS like a mach-o (has a sane header); this would prevent us + * from causing EBADMACHO panics further along the boot path. + */ + + /* + * There are no M_MOCKFS* definitions at the moment, just use M_TEMP. + */ + + MALLOC(mockfs_mount_data, mockfs_mount_t, sizeof(*mockfs_mount_data), M_TEMP, M_WAITOK | M_ZERO); + mockfs_fsnode_create(mp, MOCKFS_ROOT, &root_fsnode); + mockfs_fsnode_create(mp, MOCKFS_DEV, &dev_fsnode); + mockfs_fsnode_create(mp, MOCKFS_FILE, &file_fsnode); + + if (!mockfs_mount_data || !root_fsnode || !dev_fsnode || !file_fsnode) { + rvalue = ENOMEM; + goto done; + } + + /* + * If rvp is a memory device (with a few caveats), we can point to the same physical memory as the device + * and avoid pointless paging/copying; query the device node for the information we need to determine + * if we can do this. + */ + bzero(&memdev_info, sizeof(memdev_info)); + + if (!VNOP_IOCTL(rvp, DKIOCGETMEMDEVINFO, (caddr_t)&memdev_info, 0, NULL)) { + /* + * For the moment, we won't try to optimize when mi_phys is true. + */ + if (!mockfs_mount_data->mockfs_physical_memory) { + mockfs_mount_data->mockfs_memory_backed = memdev_info.mi_mdev; + mockfs_mount_data->mockfs_physical_memory = memdev_info.mi_phys; + mockfs_mount_data->mockfs_memdev_base = memdev_info.mi_base; + mockfs_mount_data->mockfs_memdev_size = memdev_info.mi_size; + } + } + + lck_mtx_init(&mockfs_mount_data->mockfs_mnt_mtx, mockfs_mtx_grp, mockfs_mtx_attr); + + /* + * All of the needed nodes/structures have been set up; now we just need to establish the relationships + * between the various mockfs nodes. + */ + if ((rvalue = mockfs_fsnode_adopt(root_fsnode, dev_fsnode))) + goto done; + + if ((rvalue = mockfs_fsnode_adopt(root_fsnode, file_fsnode))) + goto done; + + mockfs_mount_data->mockfs_root = root_fsnode; + mp->mnt_data = (typeof(mp->mnt_data)) mockfs_mount_data; + +done: + if (rvalue) { + if (file_fsnode) + mockfs_fsnode_destroy(file_fsnode); + if (dev_fsnode) + mockfs_fsnode_destroy(dev_fsnode); + if (root_fsnode) + mockfs_fsnode_destroy(root_fsnode); + if (mockfs_mount_data) { + lck_mtx_destroy(&mockfs_mount_data->mockfs_mnt_mtx, mockfs_mtx_grp); + FREE(mockfs_mount_data, M_TEMP); + } + } + + return rvalue; +} + +/* + * mockfs_unmount: + * Given a mount (mp), and associated flags (mntflags), performs the necessary teardown to destroy the mount. + * + * Returns 0 on success, or an error. + */ +int mockfs_unmount(struct mount *mp, int mntflags, __unused vfs_context_t ctx) +{ + int rvalue; + int vflush_flags; + mockfs_fsnode_t root_fsnode; + mockfs_mount_t mockfs_mnt; + + vflush_flags = 0; + mockfs_mnt = (mockfs_mount_t) mp->mnt_data; + + /* + * Reclaim the vnodes for the mount (forcibly, if requested; given that mockfs only support mountroot + * at the moment, this should ALWAYS be forced), + */ + if (mntflags & MNT_FORCE) { + vflush_flags |= FORCECLOSE; + } + + rvalue = vflush(mp, NULL, vflush_flags); + + if (rvalue) + return rvalue; + + /* + * Past this point, errors are likely to be unrecoverable, so panic if we're given any excuse; we + * need to teardown the mockfs_mnt data now, so that VFS can cleanup the mount structure. Note + * that clearing mockfs_root before destroying the fsnode tree is related to an implementation + * detail of mockfs_fsnode_destroy (which will refuse to destroy the root node). + */ + root_fsnode = mockfs_mnt->mockfs_root; + mockfs_mnt->mockfs_root = NULL; + rvalue = mockfs_fsnode_destroy(root_fsnode); + + if (rvalue) + panic("mockfs_unmount: Failed to destroy the fsnode tree"); + + lck_mtx_destroy(&mockfs_mnt->mockfs_mnt_mtx, mockfs_mtx_grp); + FREE(mockfs_mnt, M_TEMP); + mp->mnt_data = NULL; + + return rvalue; +} + +/* + * mockfs_root: + * Given a mount (mp), returns the root vnode (*vpp) for that mount with an iocount. + * + * Returns 0 on success, or an error. + */ +int mockfs_root(mount_t mp, vnode_t * vpp, __unused vfs_context_t ctx) +{ + int rvalue; + + rvalue = mockfs_fsnode_vnode(((mockfs_mount_t) mp->mnt_data)->mockfs_root, vpp); + return rvalue; +} + +/* + * mockfs_sync: + * Returns success because we're a read-only filesystem. + * + * Returns 0. + */ +int mockfs_sync(__unused struct mount *mp, __unused int waitfor, __unused vfs_context_t ctx) +{ + return (0); +} + +/* + * mockfs_init: + * Run once (during VFS initialization); takes care of generic mockfs initialization (which for now, means + * global lock information). + * + * Returns 0 on success, or an error. + */ +int mockfs_init(__unused struct vfsconf * vfsc) +{ + mockfs_mtx_attr = lck_attr_alloc_init(); + mockfs_grp_attr = lck_grp_attr_alloc_init(); + mockfs_mtx_grp = lck_grp_alloc_init("mockfs-mutex", mockfs_grp_attr); + + /* + * If we've failed to allocate this early in boot, something is horrendously wrong; it should be fine to + * panic (for now). + */ + if (!mockfs_mtx_attr || !mockfs_grp_attr || !mockfs_mtx_grp) { + panic("mockfs_init failed to allocate lock information"); + } + + return (0); +} + +struct vfsops mockfs_vfsops = { + NULL, /* mount */ + NULL, /* start */ + mockfs_unmount, /* unmount */ + mockfs_root, /* root */ + NULL, /* quotactl */ + NULL, /* getattr */ + mockfs_sync, /* sync */ + NULL, /* vget */ + NULL, /* fhtovp */ + NULL, /* vptofh */ + mockfs_init, /* init */ + NULL, /* sysctl */ + NULL, /* setattr */ + {NULL} +}; + diff --git a/bsd/miscfs/mockfs/mockfs_vnops.c b/bsd/miscfs/mockfs/mockfs_vnops.c new file mode 100644 index 000000000..7df942594 --- /dev/null +++ b/bsd/miscfs/mockfs/mockfs_vnops.c @@ -0,0 +1,446 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * VOPFUNC macro; why do we have so many distinct definitions of this? + */ +#define VOPFUNC int (*)(void *) + +/* + * VNOP functions that mockfs implements. See xnu/bsd/sys/vnode_if.h for information on what + * each function does in generic terms. + */ +int mockfs_lookup(struct vnop_lookup_args * ap); +int mockfs_getattr(struct vnop_getattr_args * ap); +int mockfs_read(struct vnop_read_args * ap); +int mockfs_strategy(struct vnop_strategy_args * ap); +int mockfs_pagein(struct vnop_pagein_args * ap); +int mockfs_reclaim(__unused struct vnop_reclaim_args * ap); +int mockfs_blockmap(struct vnop_blockmap_args * ap); + +/* + * struct vnop_lookup_args { + * struct vnodeop_desc *a_desc; // We don't care about this (for now) + * vnode_t a_dvp; // vnode for the directory we are performing the lookup in + * vnode_t *a_vpp; // Return parameter: the vnode we matched the lookup to + * struct componentname *a_cnp; // Description of the file we are looking for + * vfs_context_t a_context; // We don't care about this (for now) + * }; + * + * mockfs_lookup: + * Given a vnode for a directory (a_dvp) and a file description (a_cnp), looks for a file matching + * the description in the directory, and give a vnode with an iocount for the file (*a_vpp), if the + * file was found. For mockfs, because we realistically have 3 vnodes, the filesystem information + * is extremely sparse, so the details on naming are all implemented in mockfs_lookup; the generic VFS + * information is enough for us to distinguish between all 3 files. Any lookup not done in the root + * vnode fails, by definition. Each vnode has the following names in relation to the root vnode: + * + * The root vnode: + * "sbin" + * + * The devfs vnode: + * "dev" + * + * The executable vnode + * "launchd" + * + * Returns 0 on success, or an error. + */ +int mockfs_lookup(struct vnop_lookup_args * ap) +{ + char held_char; + int rvalue; + int op; + mockfs_fsnode_t fsnode; + mockfs_fsnode_t target_fsnode; + vnode_t dvp; + vnode_t * vpp; + vfs_context_t ctx; + struct componentname * cnp; + + rvalue = 0; + dvp = ap->a_dvp; + vpp = ap->a_vpp; + cnp = ap->a_cnp; + ctx = ap->a_context; + op = cnp->cn_nameiop; + fsnode = (mockfs_fsnode_t) dvp->v_data; + target_fsnode = NULL; + + if ((op == LOOKUP) && (fsnode->type == MOCKFS_ROOT)) { + /* + * Okay, we're looking in the root directory, so we aren't necessarily + * going to fail. What are we looking for? + */ + + held_char = cnp->cn_nameptr[cnp->cn_namelen]; + cnp->cn_nameptr[cnp->cn_namelen] = '\0'; + + /* + * We'll resolve sbin to /, and launchd to the executable for the moment, so that I don't + * accidentally commit a change to the init_process pathname. We map from name to node type + * here, as mockfs doesn't current use names; just unique types. + */ + if (!strncmp(cnp->cn_nameptr, "sbin", 5)) + target_fsnode = fsnode; + else if (!strncmp(cnp->cn_nameptr, "dev", 4)) + mockfs_fsnode_child_by_type(fsnode, MOCKFS_DEV, &target_fsnode); + else if (!strncmp(cnp->cn_nameptr, "launchd", 8)) + mockfs_fsnode_child_by_type(fsnode, MOCKFS_FILE, &target_fsnode); + else + rvalue = ENOENT; + + cnp->cn_nameptr[cnp->cn_namelen] = held_char; + + if (target_fsnode) + rvalue = mockfs_fsnode_vnode(target_fsnode, vpp); + } + else { + /* + * We aren't looking in root; the query may actually be reasonable, but we're not + * going to support it. + */ + rvalue = ENOENT; + } + + return rvalue; +} + +/* + * struct vnop_getattr_args { + * struct vnodeop_desc *a_desc; // We don't care about this (for now) + * vnode_t a_vp; // Pointer to the vnode we are interested in + * struct vnode_attr *a_vap; // Details the requested attributes, and used to return attributes + * vfs_context_t a_context; // We don't care about this (for now) + * }; + * + * mockfs_getattr: + * Given a vnode (a_vp), returns the attributes requested for that vnode (*a_vap). For mockfs, we don't care + * about the majority of attributes (we are not a fully featured filesystem). We will return a minimal set of + * attributes for any request, regardless of which attributes were requested, to ensure that we look like a sane + * file, and so that permissions are set appropriately to allow execution of the executable vnode. + * + * Returns 0 on success, or an error. + */ +int mockfs_getattr(struct vnop_getattr_args * ap) +{ + /* + * For the moment, we don't actually care about most attributes. We'll + * deal with actually managing attributes as part of the general cleanup. + */ + vnode_t vp; + mockfs_fsnode_t fsnode; + struct vnode_attr * vap; + + vp = ap->a_vp; + fsnode = (mockfs_fsnode_t)vp->v_data; + vap = ap->a_vap; + bzero(vap, sizeof(*vap)); + VATTR_RETURN(vap, va_nlink, 1); /* Simply assert that someone has at least one link to us */ + VATTR_RETURN(vap, va_mode, VREAD | VWRITE | VEXEC); + VATTR_RETURN(vap, va_fileid, fsnode->type); + VATTR_RETURN(vap, va_total_size, fsnode->size); + VATTR_RETURN(vap, va_total_alloc, fsnode->size); + VATTR_RETURN(vap, va_data_size, fsnode->size); + VATTR_RETURN(vap, va_data_alloc, fsnode->size); + + return (0); +} + +/* + * struct vnop_read_args { + * struct vnodeop_desc *a_desc; // We don't care about this (for now) + * vnode_t a_vp; // Pointer to the vnode we are interested in + * struct uio *a_uio; // Description of the request + * int a_ioflag; // IO flags (we don't care about these) + * vfs_context_t a_context; // We don't care about this (for now) + * }; + * + * mockfs_read: + * Given a vnode (a_vp), a set of flags (a_ioflag), and a description of a read request (a_uio), executes the read + * request and returns the resulting data through the description (a_uio). mockfs has very little to do here; we + * merely mandate that any read attempt MUST be on VREG (our MOCKFS_FILE object), as it is the only vnode that has + * a backing store that can support a read (the other node types being purely in-memory hacks). Because we do not + * support VNOP_OPEN, we can probably assume that the kernel is the only entity that will ever issue a VNOP_READ + * (as part of the exec path) to a mockfs vnode. + * + * Returns 0 on success, or an error. + */ +int mockfs_read(struct vnop_read_args * ap) +{ + int rvalue; + vnode_t vp; + mockfs_fsnode_t fsnode; + + vp = ap->a_vp; + fsnode = (mockfs_fsnode_t) vp->v_data; + + /* + * We're just an ugly frontend for the devnode, so we shouldn't need to do much for reads; + * pass the work to cluster_read. + */ + if (vp->v_type == VREG) { + rvalue = cluster_read(vp, ap->a_uio, fsnode->size, ap->a_ioflag); + } + else { + /* + * You've tried to read from a nonregular file; I hate you. + */ + rvalue = ENOTSUP; + } + + return rvalue; +} + +/* + * struct vnop_reclaim_args { + * struct vnodeop_desc *a_desc; // We don't care about this (for now) + * vnode_t a_vp; // Pointer to the vnode we are reclaiming + * vfs_context_t a_context; // We don't care about this (for now) + * }; + * + * mockfs_reclaim: + * Given a vnode (a_vp), performs any cleanup needed to allow VFS to reclaim the vnode. Because the mockfs tree + * is always in memory, we have very little to do as part of reclaim, so we'll just zero a few pointers and let + * VFS reclaim the vnode. + */ +int mockfs_reclaim(struct vnop_reclaim_args * ap) +{ + int rvalue; + vnode_t vp; + mockfs_fsnode_t fsnode; + + vp = ap->a_vp; + fsnode = (mockfs_fsnode_t) vnode_fsnode(vp); + rvalue = mockfs_fsnode_drop_vnode(fsnode); + + return rvalue; +} + +/* + * struct vnop_strategy_args { + * struct vnodeop_desc *a_desc; // We don't care about this (for now) + * struct buf *a_bp; // Description of the desired IO + * }; + * + * mockfs_strategy: + * Given an IO description (a_bp), does any preparations required by the filesystem, and then passes the IO off to + * the appropriate device. mockfs doesn't need to do anything to prepare for the IO, so we simply pass it off to + * our backing device. + * + * Returns 0 on success, or an error. + */ +int mockfs_strategy(struct vnop_strategy_args * ap) +{ + int rvalue; + vnode_t dvp; + + /* + * We'll avoid checking for a memory-backed device here; we already do this for blockmap, which will be + * called as part of the IO path. + */ + + dvp = vfs_devvp(buf_vnode(ap->a_bp)->v_mount); + + if (dvp) { + rvalue = buf_strategy(dvp, ap); + vnode_put(dvp); + } + else { + /* + * I'm not certain this is the BEST error to return for this case. + */ + rvalue = EIO; + } + + return rvalue; +} + +/* + * struct vnop_pagein_args { + * struct vnodeop_desc *a_desc; // We don't care about this (for now) + * vnode_t a_vp; // Pointer to the vnode we are interested in + * upl_t a_pl; // Describes the pages that need to be paged in + * upl_offset_t a_pl_offset; // Offset in the UPL to start placing data at + * off_t a_f_offset; // File offset to begin paging in at + * size_t a_size; // Bytes of data to page in + * int a_flags; // UPL flags (we don't care about these) + * vfs_context_t a_context; // We don't care about this (for now) + * }; + * + * mockfs_pagegin: + * Given a vnode (a_vp), and a region, described by an offset (a_f_offset) and a size (a_size), pages the region + * into the given UPL (a_pl), starting at the UPL offset (a_pl_offset). For mockfs, we don't have anything significant + * to do for pagein, so we largely serve as a wrapper to the cluster_pagein routine. + * + * Returns 0 on success, or an error. + */ +int mockfs_pagein(struct vnop_pagein_args * ap) +{ + mockfs_fsnode_t fsnode; + mockfs_mount_t mockfs_mnt; + + /* + * Nothing special needed from us; just nab the filesize and kick the work over to cluster_pagein. + */ + fsnode = (mockfs_fsnode_t) ap->a_vp->v_data; + mockfs_mnt = ((mockfs_mount_t) fsnode->mnt->mnt_data); + + /* + * If we represent a memory backed device, we should be pointing directly to the backing store; we should never + * see a pagein in this case. + */ + if (mockfs_mnt->mockfs_memory_backed) + panic("mockfs_pagein called for a memory-backed device"); + + return cluster_pagein(ap->a_vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, ap->a_size, fsnode->size, ap->a_flags); +} + +/* + * struct vnop_blockmap_args { + * struct vnodeop_desc *a_desc; // We don't care about this (for now) + * vnode_t a_vp; // Pointer to the vnode we are interested in + * off_t a_foffset; // File offset we are interested in + * size_t a_size; // Size of the region we are interested in + * daddr64_t *a_bpn; // Return parameter: physical block number the region we are interest in starts at + * size_t *a_run; // Return parameter: number of contiguous bytes of data + * void *a_poff; // Unused, as far as I know + * int a_flags; // Used to distinguish reads and writes; we don't care + * vfs_context_t a_context; // We don't care about this (for now) + * }; + * + * mockfs_blockmap: + * Given a vnode (a_vp), and a region, described by an offset (a_foffset), and a size (a_size), tells the caller + * which physical block (on the backing device) the region begins at (*a_bpn), and how many bytes can be read + * before the first discontinuity (*a_run). For mockfs, because only VREG files are eligible for IO, and because + * all VREG files are simply a frontend for the backing device, this mapping will always be one to one, and all we + * need to do is convert the physical offset to the physical block number. + * + * Returns 0 on success, or an error. + */ +int mockfs_blockmap(struct vnop_blockmap_args * ap) +{ + int rvalue; + off_t foffset; + size_t * run; + uint32_t blksize; + daddr64_t * bpn; + vnode_t vp; + mockfs_fsnode_t fsnode; + + rvalue = 0; + foffset = ap->a_foffset; + run = ap->a_run; + bpn = ap->a_bpn; + vp = ap->a_vp; + fsnode = (mockfs_fsnode_t) vp->v_data; + blksize = vp->v_mount->mnt_devblocksize; + + /* + * If we represent a memory backed device, we should be pointing directly to the backing store; all IO should + * be satisfied from the UBC, and any called to blockmap (inidicating an attempted IO to the backing store) + * is therefore disallowed. + */ + if (((mockfs_mount_t) fsnode->mnt->mnt_data)->mockfs_memory_backed) + printf("mockfs_blockmap called for a memory-backed device\n"); + + /* + * This will ultimately be simple; the vnode must be VREG (init), and the mapping will be 1 to 1. + * This also means that their request should always be contiguous, so the run calculation is easy! + */ + if (vp->v_type == VREG) { + *bpn = foffset / blksize; + *run = fsnode->size - foffset; + + if (ap->a_size > *run) { + /* We've been asked for more data than the backing device can provide; we're done. */ + panic("mockfs_blockmap was asked for a region that extended past the end of the backing device"); + } + } + else { + rvalue = ENOTSUP; + } + + return rvalue; +} + +int (**mockfs_vnodeop_p)(void *); +struct vnodeopv_entry_desc mockfs_vnodeop_entries[] = { + { &vnop_default_desc, (VOPFUNC) vn_default_error }, /* default */ + { &vnop_lookup_desc, (VOPFUNC) mockfs_lookup }, /* lookup */ + { &vnop_create_desc, (VOPFUNC) err_create },/* create */ + { &vnop_open_desc, (VOPFUNC) err_open }, /* open */ + { &vnop_mknod_desc, (VOPFUNC) err_mknod }, /* mknod */ + { &vnop_close_desc, (VOPFUNC) err_close }, /* close */ + { &vnop_access_desc, (VOPFUNC) err_access }, /* access */ + { &vnop_getattr_desc, (VOPFUNC) mockfs_getattr }, /* getattr */ + { &vnop_setattr_desc, (VOPFUNC) err_setattr }, /* setattr */ + { &vnop_read_desc, (VOPFUNC) mockfs_read }, /* read */ + { &vnop_write_desc, (VOPFUNC) err_write }, /* write */ + { &vnop_ioctl_desc, (VOPFUNC) err_ioctl }, /* ioctl */ + { &vnop_select_desc, (VOPFUNC) err_select }, /* select */ + { &vnop_mmap_desc, (VOPFUNC) err_mmap }, /* mmap */ + { &vnop_fsync_desc, (VOPFUNC) nop_fsync }, /* fsync */ + { &vnop_remove_desc, (VOPFUNC) err_remove }, /* remove */ + { &vnop_link_desc, (VOPFUNC) err_link }, /* link */ + { &vnop_rename_desc, (VOPFUNC) err_rename }, /* rename */ + { &vnop_mkdir_desc, (VOPFUNC) err_mkdir }, /* mkdir */ + { &vnop_rmdir_desc, (VOPFUNC) err_rmdir }, /* rmdir */ + { &vnop_symlink_desc, (VOPFUNC) err_symlink }, /* symlink */ + { &vnop_readdir_desc, (VOPFUNC) err_readdir }, /* readdir */ + { &vnop_readlink_desc, (VOPFUNC) err_readlink }, /* readlink */ + { &vnop_inactive_desc, (VOPFUNC) err_inactive }, /* inactive */ + { &vnop_reclaim_desc, (VOPFUNC) mockfs_reclaim }, /* reclaim */ + { &vnop_strategy_desc, (VOPFUNC) mockfs_strategy }, /* strategy */ + { &vnop_pathconf_desc, (VOPFUNC) err_pathconf }, /* pathconf */ + { &vnop_advlock_desc, (VOPFUNC) err_advlock }, /* advlock */ + { &vnop_bwrite_desc, (VOPFUNC) err_bwrite }, /* bwrite */ + { &vnop_pagein_desc, (VOPFUNC) mockfs_pagein }, /* pagein */ + { &vnop_pageout_desc, (VOPFUNC) err_pageout }, /* pageout */ + { &vnop_copyfile_desc, (VOPFUNC) err_copyfile }, /* copyfile */ + { &vnop_blktooff_desc, (VOPFUNC) err_blktooff }, /* blktooff */ + { &vnop_offtoblk_desc, (VOPFUNC) err_offtoblk }, /* offtoblk */ + { &vnop_blockmap_desc, (VOPFUNC) mockfs_blockmap }, /* blockmap */ + { (struct vnodeop_desc *) NULL, (VOPFUNC) NULL } +}; + +struct vnodeopv_desc mockfs_vnodeop_opv_desc = { + &mockfs_vnodeop_p, + mockfs_vnodeop_entries +}; + diff --git a/libsyscall/mach/mig_reply_port.h b/bsd/miscfs/mockfs/mockfs_vnops.h similarity index 85% rename from libsyscall/mach/mig_reply_port.h rename to bsd/miscfs/mockfs/mockfs_vnops.h index 54e27879c..4a5314f53 100644 --- a/libsyscall/mach/mig_reply_port.h +++ b/bsd/miscfs/mockfs/mockfs_vnops.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 Apple Inc. All rights reserved. + * Copyright (c) 2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -26,9 +26,14 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#include +#ifndef MOCKFS_VNOPS_H +#define MOCKFS_VNOPS_H -void _mig_fork_child(void); -void _mig_init(int init_done); +#if MOCKFS + +extern int (**mockfs_vnodeop_p)(void *); + +#endif /* MOCKFS */ + +#endif /* MOCKFS_VNOPS_H */ -void _mig_reply_port_callbacks(mach_port_t (*get)(void), void (*set)(mach_port_t)); diff --git a/bsd/miscfs/specfs/Makefile b/bsd/miscfs/specfs/Makefile index 7c6f583e4..2394edf46 100644 --- a/bsd/miscfs/specfs/Makefile +++ b/bsd/miscfs/specfs/Makefile @@ -7,14 +7,6 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - DATAFILES = \ specdev.h @@ -26,7 +18,6 @@ EXPORT_MI_LIST = ${DATAFILES} EXPORT_MI_DIR = miscfs/specfs - include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/miscfs/specfs/spec_vnops.c b/bsd/miscfs/specfs/spec_vnops.c index f3956c8b1..49d021787 100644 --- a/bsd/miscfs/specfs/spec_vnops.c +++ b/bsd/miscfs/specfs/spec_vnops.c @@ -85,6 +85,7 @@ #include #include #include +#include #include @@ -155,42 +156,78 @@ struct vnodeopv_desc spec_vnodeop_opv_desc = static void set_blocksize(vnode_t, dev_t); +#define LOWPRI_TIER1_WINDOW_MSECS 25 +#define LOWPRI_TIER2_WINDOW_MSECS 100 +#define LOWPRI_TIER3_WINDOW_MSECS 500 -#define THROTTLE_LEVEL_NONE -1 -#define THROTTLE_LEVEL_TIER0 0 +#define LOWPRI_TIER1_IO_PERIOD_MSECS 15 +#define LOWPRI_TIER2_IO_PERIOD_MSECS 50 +#define LOWPRI_TIER3_IO_PERIOD_MSECS 200 -#define THROTTLE_LEVEL_THROTTLED 1 -#define THROTTLE_LEVEL_TIER1 1 -#define THROTTLE_LEVEL_TIER2 2 +#define LOWPRI_TIER1_IO_PERIOD_SSD_MSECS 5 +#define LOWPRI_TIER2_IO_PERIOD_SSD_MSECS 15 +#define LOWPRI_TIER3_IO_PERIOD_SSD_MSECS 25 -#define THROTTLE_LEVEL_START 0 -#define THROTTLE_LEVEL_END 2 +int throttle_windows_msecs[THROTTLE_LEVEL_END + 1] = { + 0, + LOWPRI_TIER1_WINDOW_MSECS, + LOWPRI_TIER2_WINDOW_MSECS, + LOWPRI_TIER3_WINDOW_MSECS, +}; + +int throttle_io_period_msecs[THROTTLE_LEVEL_END + 1] = { + 0, + LOWPRI_TIER1_IO_PERIOD_MSECS, + LOWPRI_TIER2_IO_PERIOD_MSECS, + LOWPRI_TIER3_IO_PERIOD_MSECS, +}; + +int throttle_io_period_ssd_msecs[THROTTLE_LEVEL_END + 1] = { + 0, + LOWPRI_TIER1_IO_PERIOD_SSD_MSECS, + LOWPRI_TIER2_IO_PERIOD_SSD_MSECS, + LOWPRI_TIER3_IO_PERIOD_SSD_MSECS, +}; + + +int throttled_count[THROTTLE_LEVEL_END + 1]; struct _throttle_io_info_t { - struct timeval throttle_last_IO_timestamp[THROTTLE_LEVEL_END + 1]; + lck_mtx_t throttle_lock; + struct timeval throttle_last_write_timestamp; - struct timeval throttle_start_IO_period_timestamp; + struct timeval throttle_min_timer_deadline; + struct timeval throttle_window_start_timestamp[THROTTLE_LEVEL_END + 1]; + struct timeval throttle_last_IO_timestamp[THROTTLE_LEVEL_END + 1]; + pid_t throttle_last_IO_pid[THROTTLE_LEVEL_END + 1]; + struct timeval throttle_start_IO_period_timestamp[THROTTLE_LEVEL_END + 1]; - TAILQ_HEAD( , uthread) throttle_uthlist; /* List of throttled uthreads */ + TAILQ_HEAD( , uthread) throttle_uthlist[THROTTLE_LEVEL_END + 1]; /* Lists of throttled uthreads */ + int throttle_next_wake_level; - lck_mtx_t throttle_lock; thread_call_t throttle_timer_call; - int32_t throttle_timer_running; + int32_t throttle_timer_ref; + int32_t throttle_timer_active; + int32_t throttle_io_count; int32_t throttle_io_count_begin; - int32_t throttle_io_period; + int *throttle_io_periods; uint32_t throttle_io_period_num; + int32_t throttle_refcnt; int32_t throttle_alloc; }; struct _throttle_io_info_t _throttle_io_info[LOWPRI_MAX_NUM_DEV]; -static void throttle_info_update_internal(struct _throttle_io_info_t *info, uthread_t ut, int policy, int flags, boolean_t isssd); -static int throttle_get_thread_throttle_level(uthread_t ut, int policy); -__private_extern__ int32_t throttle_legacy_process_count = 0; +int lowpri_throttle_enabled = 1; + + + +static void throttle_info_update_internal(struct _throttle_io_info_t *info, uthread_t ut, int flags, boolean_t isssd); +static int throttle_get_thread_throttle_level(uthread_t ut); /* * Trivial lookup routine that always fails. @@ -293,7 +330,7 @@ spec_open(struct vnop_open_args *ap) devsw_unlock(dev, S_IFCHR); - if (error == 0 && (D_TYPEMASK & cdevsw[maj].d_type) == D_DISK && !vp->v_un.vu_specinfo->si_initted) { + if (error == 0 && cdevsw[maj].d_type == D_DISK && !vp->v_un.vu_specinfo->si_initted) { int isssd = 0; uint64_t throttle_mask = 0; uint32_t devbsdunit = 0; @@ -335,7 +372,7 @@ spec_open(struct vnop_open_args *ap) * opens for writing of any disk block devices. */ if (securelevel >= 2 && cred != FSCRED && - (ap->a_mode & FWRITE) && isdisk(dev, VBLK)) + (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK) return (EPERM); /* * Do not allow opens of block devices that are @@ -423,12 +460,12 @@ spec_read(struct vnop_read_args *ap) switch (vp->v_type) { case VCHR: - if ((D_TYPEMASK & cdevsw[major(vp->v_rdev)].d_type) == D_DISK && vp->v_un.vu_specinfo->si_throttleable) { + if (cdevsw[major(vp->v_rdev)].d_type == D_DISK && vp->v_un.vu_specinfo->si_throttleable) { struct _throttle_io_info_t *throttle_info; throttle_info = &_throttle_io_info[vp->v_un.vu_specinfo->si_devbsdunit]; - throttle_info_update_internal(throttle_info, NULL, -1, 0, vp->v_un.vu_specinfo->si_isssd); + throttle_info_update_internal(throttle_info, NULL, 0, vp->v_un.vu_specinfo->si_isssd); } error = (*cdevsw[major(vp->v_rdev)].d_read) (vp->v_rdev, uio, ap->a_ioflag); @@ -516,12 +553,12 @@ spec_write(struct vnop_write_args *ap) switch (vp->v_type) { case VCHR: - if ((D_TYPEMASK & cdevsw[major(vp->v_rdev)].d_type) == D_DISK && vp->v_un.vu_specinfo->si_throttleable) { + if (cdevsw[major(vp->v_rdev)].d_type == D_DISK && vp->v_un.vu_specinfo->si_throttleable) { struct _throttle_io_info_t *throttle_info; throttle_info = &_throttle_io_info[vp->v_un.vu_specinfo->si_devbsdunit]; - throttle_info_update_internal(throttle_info, NULL, -1, 0, vp->v_un.vu_specinfo->si_isssd); + throttle_info_update_internal(throttle_info, NULL, 0, vp->v_un.vu_specinfo->si_isssd); microuptime(&throttle_info->throttle_last_write_timestamp); } @@ -695,8 +732,10 @@ spec_kqfilter(vnode_t vp, struct knote *kn) /* We can hook into TTYs... */ err = filt_specattach(kn); } else { +#if NETWORKING /* Try a bpf device, as defined in bsd/net/bpf.c */ err = bpfkqfilter(dev, kn); +#endif } return err; @@ -728,34 +767,9 @@ spec_fsync(struct vnop_fsync_args *ap) /* * Just call the device strategy routine */ -extern int hard_throttle_on_root; - void throttle_init(void); -#define LOWPRI_THROTTLE_WINDOW_MSECS 500 -#define LOWPRI_LEGACY_THROTTLE_WINDOW_MSECS 200 -#define LOWPRI_IO_PERIOD_MSECS 200 -#define LOWPRI_IO_PERIOD_SSD_MSECS 20 -#define LOWPRI_TIMER_PERIOD_MSECS 10 - - -int lowpri_throttle_window_msecs = LOWPRI_THROTTLE_WINDOW_MSECS; -int lowpri_legacy_throttle_window_msecs = LOWPRI_LEGACY_THROTTLE_WINDOW_MSECS; -int lowpri_io_period_msecs = LOWPRI_IO_PERIOD_MSECS; -int lowpri_io_period_ssd_msecs = LOWPRI_IO_PERIOD_SSD_MSECS; -int lowpri_timer_period_msecs = LOWPRI_TIMER_PERIOD_MSECS; - -/* - * If a process requiring legacy iothrottle behavior is running on the - * system, use legacy limits for throttle window and max IO size. - */ -#if CONFIG_EMBEDDED -#define THROTTLE_WINDOW (lowpri_throttle_window_msecs) -#else -#define THROTTLE_WINDOW (throttle_legacy_process_count == 0 ? lowpri_throttle_window_msecs : lowpri_legacy_throttle_window_msecs) -#endif - #if 0 #define DEBUG_ALLOC_THROTTLE_INFO(format, debug_info, args...) \ do { \ @@ -767,12 +781,21 @@ int lowpri_timer_period_msecs = LOWPRI_TIMER_PERIOD_MSECS; #define DEBUG_ALLOC_THROTTLE_INFO(format, debug_info, args...) #endif -SYSCTL_INT(_debug, OID_AUTO, lowpri_throttle_window_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &lowpri_throttle_window_msecs, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, lowpri_legacy_throttle_window_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &lowpri_legacy_throttle_window_msecs, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, lowpri_io_period_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &lowpri_io_period_msecs, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, lowpri_io_period_ssd_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &lowpri_io_period_ssd_msecs, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, lowpri_timer_period_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &lowpri_timer_period_msecs, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, lowpri_legacy_process_count, CTLFLAG_RD | CTLFLAG_LOCKED, &throttle_legacy_process_count, 0, ""); + +SYSCTL_INT(_debug, OID_AUTO, lowpri_throttle_tier1_window_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &throttle_windows_msecs[THROTTLE_LEVEL_TIER1], 0, ""); +SYSCTL_INT(_debug, OID_AUTO, lowpri_throttle_tier2_window_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &throttle_windows_msecs[THROTTLE_LEVEL_TIER2], 0, ""); +SYSCTL_INT(_debug, OID_AUTO, lowpri_throttle_tier3_window_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &throttle_windows_msecs[THROTTLE_LEVEL_TIER3], 0, ""); + +SYSCTL_INT(_debug, OID_AUTO, lowpri_throttle_tier1_io_period_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &throttle_io_period_msecs[THROTTLE_LEVEL_TIER1], 0, ""); +SYSCTL_INT(_debug, OID_AUTO, lowpri_throttle_tier2_io_period_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &throttle_io_period_msecs[THROTTLE_LEVEL_TIER2], 0, ""); +SYSCTL_INT(_debug, OID_AUTO, lowpri_throttle_tier3_io_period_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &throttle_io_period_msecs[THROTTLE_LEVEL_TIER3], 0, ""); + +SYSCTL_INT(_debug, OID_AUTO, lowpri_throttle_tier1_io_period_ssd_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &throttle_io_period_ssd_msecs[THROTTLE_LEVEL_TIER1], 0, ""); +SYSCTL_INT(_debug, OID_AUTO, lowpri_throttle_tier2_io_period_ssd_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &throttle_io_period_ssd_msecs[THROTTLE_LEVEL_TIER2], 0, ""); +SYSCTL_INT(_debug, OID_AUTO, lowpri_throttle_tier3_io_period_ssd_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &throttle_io_period_ssd_msecs[THROTTLE_LEVEL_TIER3], 0, ""); + +SYSCTL_INT(_debug, OID_AUTO, lowpri_throttle_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &lowpri_throttle_enabled, 0, ""); + static lck_grp_t *throttle_mtx_grp; static lck_attr_t *throttle_mtx_attr; @@ -852,7 +875,6 @@ throttle_info_ref(struct _throttle_io_info_t *info) return oldValue; } - /* * on entry the throttle_lock is held... * this function is responsible for taking @@ -864,55 +886,146 @@ throttle_info_ref(struct _throttle_io_info_t *info) * to free it */ static uint32_t -throttle_timer_start(struct _throttle_io_info_t *info, boolean_t update_io_count) +throttle_timer_start(struct _throttle_io_info_t *info, boolean_t update_io_count, int wakelevel) { struct timeval elapsed; + struct timeval now; + struct timeval period; uint64_t elapsed_msecs; int throttle_level; - uint64_t deadline; + int level; + int msecs; + boolean_t throttled = FALSE; + boolean_t need_timer = FALSE; + + microuptime(&now); if (update_io_count == TRUE) { info->throttle_io_count_begin = info->throttle_io_count; info->throttle_io_period_num++; - microuptime(&info->throttle_start_IO_period_timestamp); + while (wakelevel >= THROTTLE_LEVEL_THROTTLED) + info->throttle_start_IO_period_timestamp[wakelevel--] = now; + + info->throttle_min_timer_deadline = now; + + msecs = info->throttle_io_periods[THROTTLE_LEVEL_THROTTLED]; + period.tv_sec = msecs / 1000; + period.tv_usec = (msecs % 1000) * 1000; + + timevaladd(&info->throttle_min_timer_deadline, &period); } for (throttle_level = THROTTLE_LEVEL_START; throttle_level < THROTTLE_LEVEL_END; throttle_level++) { - microuptime(&elapsed); - timevalsub(&elapsed, &info->throttle_last_IO_timestamp[throttle_level]); + elapsed = now; + timevalsub(&elapsed, &info->throttle_window_start_timestamp[throttle_level]); elapsed_msecs = (uint64_t)elapsed.tv_sec * (uint64_t)1000 + (elapsed.tv_usec / 1000); - if (elapsed_msecs < (uint64_t)THROTTLE_WINDOW) { - /* - * we had an I/O occur in this level within - * our throttle window, so we need to - * to make sure the timer continues to run - */ - break; + for (level = throttle_level + 1; level <= THROTTLE_LEVEL_END; level++) { + + if (!TAILQ_EMPTY(&info->throttle_uthlist[level])) { + + if (elapsed_msecs < (uint64_t)throttle_windows_msecs[level]) { + /* + * we had an I/O occur at a higher priority tier within + * this tier's throttle window + */ + throttled = TRUE; + } + /* + * we assume that the windows are the same or longer + * as we drop through the throttling tiers... thus + * we can stop looking once we run into a tier with + * threads to schedule regardless of whether it's + * still in its throttling window or not + */ + break; + } } + if (throttled == TRUE) + break; } - if (throttle_level >= THROTTLE_LEVEL_END) { - /* - * we're outside all of the throttle windows... - * don't start a new timer - */ - info->throttle_timer_running = 0; + if (throttled == TRUE) { + uint64_t deadline = 0; + struct timeval target; + struct timeval min_target; - return (THROTTLE_LEVEL_END); - } - if (info->throttle_timer_running == 0) { /* - * take a reference for the timer + * we've got at least one tier still in a throttled window + * so we need a timer running... compute the next deadline + * and schedule it */ - throttle_info_ref(info); + for (level = throttle_level+1; level <= THROTTLE_LEVEL_END; level++) { - info->throttle_timer_running = 1; - } - clock_interval_to_deadline(lowpri_timer_period_msecs, 1000000, &deadline); + if (TAILQ_EMPTY(&info->throttle_uthlist[level])) + continue; + + target = info->throttle_start_IO_period_timestamp[level]; + + msecs = info->throttle_io_periods[level]; + period.tv_sec = msecs / 1000; + period.tv_usec = (msecs % 1000) * 1000; + + timevaladd(&target, &period); + + if (need_timer == FALSE || timevalcmp(&target, &min_target, <)) { + min_target = target; + need_timer = TRUE; + } + } + if (timevalcmp(&info->throttle_min_timer_deadline, &now, >)) { + if (timevalcmp(&info->throttle_min_timer_deadline, &min_target, >)) + min_target = info->throttle_min_timer_deadline; + } + + if (info->throttle_timer_active) { + if (thread_call_cancel(info->throttle_timer_call) == FALSE) { + /* + * couldn't kill the timer because it's already + * been dispatched, so don't try to start a new + * one... once we drop the lock, the timer will + * proceed and eventually re-run this function + */ + need_timer = FALSE; + } else + info->throttle_timer_active = 0; + } + if (need_timer == TRUE) { + /* + * This is defined as an int (32-bit) rather than a 64-bit + * value because it would need a really big period in the + * order of ~500 days to overflow this. So, we let this be + * 32-bit which allows us to use the clock_interval_to_deadline() + * routine. + */ + int target_msecs; + + if (info->throttle_timer_ref == 0) { + /* + * take a reference for the timer + */ + throttle_info_ref(info); - thread_call_enter_delayed(info->throttle_timer_call, deadline); + info->throttle_timer_ref = 1; + } + elapsed = min_target; + timevalsub(&elapsed, &now); + target_msecs = elapsed.tv_sec * 1000 + elapsed.tv_usec / 1000; + + if (target_msecs <= 0) { + /* + * we may have computed a deadline slightly in the past + * due to various factors... if so, just set the timer + * to go off in the near future (we don't need to be precise) + */ + target_msecs = 1; + } + clock_interval_to_deadline(target_msecs, 1000000, &deadline); + thread_call_enter_delayed(info->throttle_timer_call, deadline); + info->throttle_timer_active = 1; + } + } return (throttle_level); } @@ -922,66 +1035,88 @@ throttle_timer(struct _throttle_io_info_t *info) { uthread_t ut, utlist; struct timeval elapsed; + struct timeval now; uint64_t elapsed_msecs; int throttle_level; + int level; + int wake_level; + caddr_t wake_address = NULL; boolean_t update_io_count = FALSE; boolean_t need_wakeup = FALSE; boolean_t need_release = FALSE; + ut = NULL; lck_mtx_lock(&info->throttle_lock); - - microuptime(&elapsed); - timevalsub(&elapsed, &info->throttle_start_IO_period_timestamp); + + info->throttle_timer_active = 0; + microuptime(&now); + + elapsed = now; + timevalsub(&elapsed, &info->throttle_start_IO_period_timestamp[THROTTLE_LEVEL_THROTTLED]); elapsed_msecs = (uint64_t)elapsed.tv_sec * (uint64_t)1000 + (elapsed.tv_usec / 1000); - if (elapsed_msecs >= (uint64_t)info->throttle_io_period) { - /* - * we're closing out the current IO period... - * if we have a waiting thread, wake it up - * after we have reset the I/O window info - */ - need_wakeup = TRUE; - update_io_count = TRUE; - } - if ((throttle_level = throttle_timer_start(info, update_io_count)) == THROTTLE_LEVEL_END) { - /* - * we are now outside of the throttle window - * for all throttle levels... - * - * the timer is not restarted in this case, so - * we need to get rid of the reference we took when - * we started up the timer... we can't do this - * until we are entirely done playing with 'info' - */ - need_release = TRUE; + if (elapsed_msecs >= (uint64_t)info->throttle_io_periods[THROTTLE_LEVEL_THROTTLED]) { + + wake_level = info->throttle_next_wake_level; + + for (level = THROTTLE_LEVEL_START; level < THROTTLE_LEVEL_END; level++) { + + elapsed = now; + timevalsub(&elapsed, &info->throttle_start_IO_period_timestamp[wake_level]); + elapsed_msecs = (uint64_t)elapsed.tv_sec * (uint64_t)1000 + (elapsed.tv_usec / 1000); + + if (elapsed_msecs >= (uint64_t)info->throttle_io_periods[wake_level] && !TAILQ_EMPTY(&info->throttle_uthlist[wake_level])) { + /* + * we're closing out the current IO period... + * if we have a waiting thread, wake it up + * after we have reset the I/O window info + */ + need_wakeup = TRUE; + update_io_count = TRUE; + + info->throttle_next_wake_level = wake_level - 1; + + if (info->throttle_next_wake_level == THROTTLE_LEVEL_START) + info->throttle_next_wake_level = THROTTLE_LEVEL_END; + + break; + } + wake_level--; + + if (wake_level == THROTTLE_LEVEL_START) + wake_level = THROTTLE_LEVEL_END; + } } + if (need_wakeup == TRUE) { + if (!TAILQ_EMPTY(&info->throttle_uthlist[wake_level])) { - TAILQ_FOREACH_SAFE(ut, &info->throttle_uthlist, uu_throttlelist, utlist) { - /* - * if we are now outside of the throttle window release - * all of the currently blocked threads, otherwise - * look for threads that have had their IO policy changed - * by someone else and are no longer throttleable, or are - * not at the current throttle level and unblock them - */ - if (throttle_level == THROTTLE_LEVEL_END || throttle_get_thread_throttle_level(ut, -1) <= throttle_level) { + ut = (uthread_t)TAILQ_FIRST(&info->throttle_uthlist[wake_level]); + TAILQ_REMOVE(&info->throttle_uthlist[wake_level], ut, uu_throttlelist); + ut->uu_on_throttlelist = THROTTLE_LEVEL_NONE; + + wake_address = (caddr_t)&ut->uu_on_throttlelist; + } + } else + wake_level = THROTTLE_LEVEL_START; + + throttle_level = throttle_timer_start(info, update_io_count, wake_level); + + if (wake_address != NULL) + wakeup(wake_address); + + for (level = THROTTLE_LEVEL_THROTTLED; level <= throttle_level; level++) { + + TAILQ_FOREACH_SAFE(ut, &info->throttle_uthlist[level], uu_throttlelist, utlist) { - TAILQ_REMOVE(&info->throttle_uthlist, ut, uu_throttlelist); - ut->uu_on_throttlelist = 0; + TAILQ_REMOVE(&info->throttle_uthlist[level], ut, uu_throttlelist); + ut->uu_on_throttlelist = THROTTLE_LEVEL_NONE; wakeup(&ut->uu_on_throttlelist); } } - if (need_wakeup && !TAILQ_EMPTY(&info->throttle_uthlist)) { - /* - * we've entered a new I/O period and we're still - * in the throttle window, so wakeup the next guy in line - */ - ut = (uthread_t)TAILQ_FIRST(&info->throttle_uthlist); - TAILQ_REMOVE(&info->throttle_uthlist, ut, uu_throttlelist); - ut->uu_on_throttlelist = 0; - - wakeup(&ut->uu_on_throttlelist); + if (info->throttle_timer_active == 0 && info->throttle_timer_ref) { + info->throttle_timer_ref = 0; + need_release = TRUE; } lck_mtx_unlock(&info->throttle_lock); @@ -990,11 +1125,120 @@ throttle_timer(struct _throttle_io_info_t *info) } +static int +throttle_add_to_list(struct _throttle_io_info_t *info, uthread_t ut, int mylevel, boolean_t insert_tail) +{ + boolean_t start_timer = FALSE; + int level = THROTTLE_LEVEL_START; + + if (TAILQ_EMPTY(&info->throttle_uthlist[mylevel])) { + info->throttle_start_IO_period_timestamp[mylevel] = info->throttle_last_IO_timestamp[mylevel]; + start_timer = TRUE; + } + + if (insert_tail == TRUE) + TAILQ_INSERT_TAIL(&info->throttle_uthlist[mylevel], ut, uu_throttlelist); + else + TAILQ_INSERT_HEAD(&info->throttle_uthlist[mylevel], ut, uu_throttlelist); + + ut->uu_on_throttlelist = mylevel; + + if (start_timer == TRUE) { + /* we may need to start or rearm the timer */ + level = throttle_timer_start(info, FALSE, THROTTLE_LEVEL_START); + + if (level == THROTTLE_LEVEL_END) { + if (ut->uu_on_throttlelist >= THROTTLE_LEVEL_THROTTLED) { + TAILQ_REMOVE(&info->throttle_uthlist[ut->uu_on_throttlelist], ut, uu_throttlelist); + + ut->uu_on_throttlelist = THROTTLE_LEVEL_NONE; + } + } + } + return (level); +} + +static void +throttle_init_throttle_window(void) +{ + int throttle_window_size; + + /* + * The hierarchy of throttle window values is as follows: + * - Global defaults + * - Device tree properties + * - Boot-args + * All values are specified in msecs. + */ + + /* Override global values with device-tree properties */ + if (PE_get_default("kern.io_throttle_window_tier1", &throttle_window_size, sizeof(throttle_window_size))) + throttle_windows_msecs[THROTTLE_LEVEL_TIER1] = throttle_window_size; + + if (PE_get_default("kern.io_throttle_window_tier2", &throttle_window_size, sizeof(throttle_window_size))) + throttle_windows_msecs[THROTTLE_LEVEL_TIER2] = throttle_window_size; + + if (PE_get_default("kern.io_throttle_window_tier3", &throttle_window_size, sizeof(throttle_window_size))) + throttle_windows_msecs[THROTTLE_LEVEL_TIER3] = throttle_window_size; + + /* Override with boot-args */ + if (PE_parse_boot_argn("io_throttle_window_tier1", &throttle_window_size, sizeof(throttle_window_size))) + throttle_windows_msecs[THROTTLE_LEVEL_TIER1] = throttle_window_size; + + if (PE_parse_boot_argn("io_throttle_window_tier2", &throttle_window_size, sizeof(throttle_window_size))) + throttle_windows_msecs[THROTTLE_LEVEL_TIER2] = throttle_window_size; + + if (PE_parse_boot_argn("io_throttle_window_tier3", &throttle_window_size, sizeof(throttle_window_size))) + throttle_windows_msecs[THROTTLE_LEVEL_TIER3] = throttle_window_size; +} + +static void +throttle_init_throttle_period(struct _throttle_io_info_t *info, boolean_t isssd) +{ + int throttle_period_size; + + /* + * The hierarchy of throttle period values is as follows: + * - Global defaults + * - Device tree properties + * - Boot-args + * All values are specified in msecs. + */ + + /* Assign global defaults */ + if (isssd == TRUE) + info->throttle_io_periods = &throttle_io_period_ssd_msecs[0]; + else + info->throttle_io_periods = &throttle_io_period_msecs[0]; + + /* Override global values with device-tree properties */ + if (PE_get_default("kern.io_throttle_period_tier1", &throttle_period_size, sizeof(throttle_period_size))) + info->throttle_io_periods[THROTTLE_LEVEL_TIER1] = throttle_period_size; + + if (PE_get_default("kern.io_throttle_period_tier2", &throttle_period_size, sizeof(throttle_period_size))) + info->throttle_io_periods[THROTTLE_LEVEL_TIER2] = throttle_period_size; + + if (PE_get_default("kern.io_throttle_period_tier3", &throttle_period_size, sizeof(throttle_period_size))) + info->throttle_io_periods[THROTTLE_LEVEL_TIER3] = throttle_period_size; + + /* Override with boot-args */ + if (PE_parse_boot_argn("io_throttle_period_tier1", &throttle_period_size, sizeof(throttle_period_size))) + info->throttle_io_periods[THROTTLE_LEVEL_TIER1] = throttle_period_size; + + if (PE_parse_boot_argn("io_throttle_period_tier2", &throttle_period_size, sizeof(throttle_period_size))) + info->throttle_io_periods[THROTTLE_LEVEL_TIER2] = throttle_period_size; + + if (PE_parse_boot_argn("io_throttle_period_tier3", &throttle_period_size, sizeof(throttle_period_size))) + info->throttle_io_periods[THROTTLE_LEVEL_TIER3] = throttle_period_size; + +} + void throttle_init(void) { struct _throttle_io_info_t *info; int i; + int level; /* * allocate lock group attribute and group @@ -1002,6 +1246,9 @@ throttle_init(void) throttle_mtx_grp_attr = lck_grp_attr_alloc_init(); throttle_mtx_grp = lck_grp_alloc_init("throttle I/O", throttle_mtx_grp_attr); + /* Update throttle parameters based on device tree configuration */ + throttle_init_throttle_window(); + /* * allocate the lock attribute */ @@ -1013,36 +1260,72 @@ throttle_init(void) lck_mtx_init(&info->throttle_lock, throttle_mtx_grp, throttle_mtx_attr); info->throttle_timer_call = thread_call_allocate((thread_call_func_t)throttle_timer, (thread_call_param_t)info); - TAILQ_INIT(&info->throttle_uthlist); + for (level = 0; level <= THROTTLE_LEVEL_END; level++) { + TAILQ_INIT(&info->throttle_uthlist[level]); + info->throttle_last_IO_pid[level] = 0; + } + info->throttle_next_wake_level = THROTTLE_LEVEL_END; } } +void +sys_override_io_throttle(int flag) +{ + if (flag == THROTTLE_IO_ENABLE) + lowpri_throttle_enabled = 1; + if (flag == THROTTLE_IO_DISABLE) + lowpri_throttle_enabled = 0; +} + +int rethrottle_removed_from_list = 0; +int rethrottle_moved_to_new_list = 0; /* - * KPI routine - * - * wakeup and remove the specified thread from the throttle queue - * if it's no longer in a throttleable state... + * move a throttled thread to the appropriate state based + * on it's new throttle level... throttle_add_to_list will + * reset the timer deadline if necessary... it may also + * leave the thread off of the queue if we're already outside + * the throttle window for the new level * takes a valid uthread (which may or may not be on the * throttle queue) as input + * + * NOTE: This is called with the task lock held. */ + void -unthrottle_thread(uthread_t ut) +rethrottle_thread(uthread_t ut) { - struct _throttle_io_info_t *info; + struct _throttle_io_info_t *info; + int my_new_level; - if ((info = ut->uu_throttle_info) == NULL) - return; + if ((info = ut->uu_throttle_info) == NULL) + return; - lck_mtx_lock(&info->throttle_lock); + lck_mtx_lock(&info->throttle_lock); - if (ut->uu_on_throttlelist && throttle_get_thread_throttle_level(ut, -1) <= THROTTLE_LEVEL_THROTTLED) { - TAILQ_REMOVE(&info->throttle_uthlist, ut, uu_throttlelist); - ut->uu_on_throttlelist = 0; + if (ut->uu_on_throttlelist >= THROTTLE_LEVEL_THROTTLED) { - wakeup(&ut->uu_on_throttlelist); - } - lck_mtx_unlock(&info->throttle_lock); + my_new_level = throttle_get_thread_throttle_level(ut); + + if (my_new_level != ut->uu_on_throttlelist) { + + TAILQ_REMOVE(&info->throttle_uthlist[ut->uu_on_throttlelist], ut, uu_throttlelist); + ut->uu_on_throttlelist = THROTTLE_LEVEL_NONE; + + if (my_new_level >= THROTTLE_LEVEL_THROTTLED) { + throttle_add_to_list(info, ut, my_new_level, TRUE); + rethrottle_moved_to_new_list++; + } + + /* Thread no longer in window, need to wake it up */ + if (ut->uu_on_throttlelist == THROTTLE_LEVEL_NONE) { + wakeup(&ut->uu_on_throttlelist); + rethrottle_removed_from_list++; + } + } + } + + lck_mtx_unlock(&info->throttle_lock); } @@ -1057,6 +1340,7 @@ void * throttle_info_create(void) { struct _throttle_io_info_t *info; + int level; MALLOC(info, struct _throttle_io_info_t *, sizeof(*info), M_TEMP, M_ZERO | M_WAITOK); /* Should never happen but just in case */ @@ -1069,7 +1353,10 @@ throttle_info_create(void) lck_mtx_init(&info->throttle_lock, throttle_mtx_grp, throttle_mtx_attr); info->throttle_timer_call = thread_call_allocate((thread_call_func_t)throttle_timer, (thread_call_param_t)info); - TAILQ_INIT(&info->throttle_uthlist); + for (level = 0; level <= THROTTLE_LEVEL_END; level++) { + TAILQ_INIT(&info->throttle_uthlist[level]); + } + info->throttle_next_wake_level = THROTTLE_LEVEL_END; /* Take a reference */ OSIncrementAtomic(&info->throttle_refcnt); @@ -1194,55 +1481,50 @@ update_last_io_time(mount_t mp) info = mp->mnt_throttle_info; microuptime(&info->throttle_last_write_timestamp); + if (mp != NULL) + mp->mnt_last_write_completed_timestamp = info->throttle_last_write_timestamp; } int throttle_get_io_policy(uthread_t *ut) { - *ut = get_bsdthread_info(current_thread()); + if (ut != NULL) + *ut = get_bsdthread_info(current_thread()); - return (proc_get_task_selfdiskacc()); + return (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO)); } +int +throttle_get_passive_io_policy(uthread_t *ut) +{ + if (ut != NULL) + *ut = get_bsdthread_info(current_thread()); + + return (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_PASSIVE_IO)); +} static int -throttle_get_thread_throttle_level(uthread_t ut, int policy) -{ - int thread_throttle_level = THROTTLE_LEVEL_NONE; +throttle_get_thread_throttle_level(uthread_t ut) +{ + int thread_throttle_level; if (ut == NULL) ut = get_bsdthread_info(current_thread()); - if (policy == -1) - policy = proc_get_diskacc(ut->uu_thread); + thread_throttle_level = proc_get_effective_thread_policy(ut->uu_thread, TASK_POLICY_IO); - switch (policy) { + /* Bootcache misses should always be throttled */ + if (ut->uu_throttle_bc == TRUE) + thread_throttle_level = THROTTLE_LEVEL_TIER3; - case IOPOL_DEFAULT: - case IOPOL_NORMAL: - thread_throttle_level = THROTTLE_LEVEL_TIER0; - case IOPOL_PASSIVE: - if (ut->uu_throttle_bc == TRUE) - thread_throttle_level = THROTTLE_LEVEL_TIER2; - break; - case IOPOL_THROTTLE: - thread_throttle_level = THROTTLE_LEVEL_TIER2; - break; - case IOPOL_UTILITY: - thread_throttle_level = THROTTLE_LEVEL_TIER1; - break; - default: - printf("unknown I/O policy %d", policy); - break; - } return (thread_throttle_level); } static int -throttle_io_will_be_throttled_internal(void * throttle_info) +throttle_io_will_be_throttled_internal(void * throttle_info, int * mylevel, int * throttling_level) { struct _throttle_io_info_t *info = throttle_info; struct timeval elapsed; @@ -1250,16 +1532,16 @@ throttle_io_will_be_throttled_internal(void * throttle_info) int thread_throttle_level; int throttle_level; - if ((thread_throttle_level = throttle_get_thread_throttle_level(NULL, -1)) < THROTTLE_LEVEL_THROTTLED) - return (0); + if ((thread_throttle_level = throttle_get_thread_throttle_level(NULL)) < THROTTLE_LEVEL_THROTTLED) + return (THROTTLE_DISENGAGED); for (throttle_level = THROTTLE_LEVEL_START; throttle_level < thread_throttle_level; throttle_level++) { microuptime(&elapsed); - timevalsub(&elapsed, &info->throttle_last_IO_timestamp[throttle_level]); + timevalsub(&elapsed, &info->throttle_window_start_timestamp[throttle_level]); elapsed_msecs = (uint64_t)elapsed.tv_sec * (uint64_t)1000 + (elapsed.tv_usec / 1000); - if (elapsed_msecs < (uint64_t)THROTTLE_WINDOW) + if (elapsed_msecs < (uint64_t)throttle_windows_msecs[thread_throttle_level]) break; } if (throttle_level >= thread_throttle_level) { @@ -1268,20 +1550,25 @@ throttle_io_will_be_throttled_internal(void * throttle_info) * that affect the throttle level of this thread, * so go ahead and treat as normal I/O */ - return (0); + return (THROTTLE_DISENGAGED); } + if (mylevel) + *mylevel = thread_throttle_level; + if (throttling_level) + *throttling_level = throttle_level; + if (info->throttle_io_count != info->throttle_io_count_begin) { /* * we've already issued at least one throttleable I/O * in the current I/O window, so avoid issuing another one */ - return (2); + return (THROTTLE_NOW); } /* * we're in the throttle window, so * cut the I/O size back */ - return (1); + return (THROTTLE_ENGAGED); } /* @@ -1304,18 +1591,44 @@ throttle_io_will_be_throttled(__unused int lowpri_window_msecs, mount_t mp) else info = mp->mnt_throttle_info; - return throttle_io_will_be_throttled_internal(info); + return throttle_io_will_be_throttled_internal(info, NULL, NULL); } +/* + * Routine to increment I/O throttling counters maintained in the proc + */ + +static void +throttle_update_proc_stats(pid_t throttling_pid) +{ + proc_t throttling_proc; + proc_t throttled_proc = current_proc(); + + /* The throttled_proc is always the current proc; so we are not concerned with refs */ + OSAddAtomic64(1, &(throttled_proc->was_throttled)); + + /* The throttling pid might have exited by now */ + throttling_proc = proc_find(throttling_pid); + if (throttling_proc != PROC_NULL) { + OSAddAtomic64(1, &(throttling_proc->did_throttle)); + proc_rele(throttling_proc); + } +} +/* + * Block until woken up by the throttle timer or by a rethrottle call. + * As long as we hold the throttle_lock while querying the throttle tier, we're + * safe against seeing an old throttle tier after a rethrottle. + */ uint32_t throttle_lowpri_io(int sleep_amount) { uthread_t ut; struct _throttle_io_info_t *info; int throttle_type = 0; + int mylevel = 0; + int throttling_level = THROTTLE_LEVEL_NONE; int sleep_cnt = 0; - int locked = 0; uint32_t throttle_io_period_num = 0; boolean_t insert_tail = TRUE; @@ -1326,7 +1639,15 @@ throttle_lowpri_io(int sleep_amount) info = ut->uu_throttle_info; - if ((sleep_amount == 0) || (info == NULL)) + if (info == NULL) { + ut->uu_throttle_bc = FALSE; + ut->uu_lowpri_window = 0; + return (0); + } + + lck_mtx_lock(&info->throttle_lock); + + if (sleep_amount == 0) goto done; if (sleep_amount == 1 && ut->uu_throttle_bc == FALSE) @@ -1334,9 +1655,9 @@ throttle_lowpri_io(int sleep_amount) throttle_io_period_num = info->throttle_io_period_num; - while ( (throttle_type = throttle_io_will_be_throttled_internal(info)) ) { + while ( (throttle_type = throttle_io_will_be_throttled_internal(info, &mylevel, &throttling_level)) ) { - if (throttle_type == 1) { + if (throttle_type == THROTTLE_ENGAGED) { if (sleep_amount == 0) break; if (info->throttle_io_period_num < throttle_io_period_num) @@ -1344,30 +1665,20 @@ throttle_lowpri_io(int sleep_amount) if ((info->throttle_io_period_num - throttle_io_period_num) >= (uint32_t)sleep_amount) break; } - if (!locked) { - lck_mtx_lock(&info->throttle_lock); - locked = 1; - } - if (info->throttle_timer_running == 0) { - /* - * try to start the timer since it's - * currently not running. on failure, no - * timer reference to drop since it wasn't started - */ - if (throttle_timer_start(info, TRUE) == THROTTLE_LEVEL_END) + if (ut->uu_on_throttlelist < THROTTLE_LEVEL_THROTTLED) { + if (throttle_add_to_list(info, ut, mylevel, insert_tail) == THROTTLE_LEVEL_END) goto done; } + assert(throttling_level >= THROTTLE_LEVEL_START && throttling_level <= THROTTLE_LEVEL_END); + throttle_update_proc_stats(info->throttle_last_IO_pid[throttling_level]); + KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_THROTTLE, PROCESS_THROTTLED)) | DBG_FUNC_NONE, + info->throttle_last_IO_pid[throttling_level], throttling_level, proc_selfpid(), mylevel, 0); + + if (sleep_cnt == 0) { KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_START, - ut->uu_lowpri_window, info->throttle_io_period, info->throttle_io_count, 0, 0); - } - if (ut->uu_on_throttlelist == 0) { - if (insert_tail == TRUE) - TAILQ_INSERT_TAIL(&info->throttle_uthlist, ut, uu_throttlelist); - else - TAILQ_INSERT_HEAD(&info->throttle_uthlist, ut, uu_throttlelist); - - ut->uu_on_throttlelist = 1; + throttle_windows_msecs[mylevel], info->throttle_io_periods[mylevel], info->throttle_io_count, 0, 0); + throttled_count[mylevel]++; } msleep((caddr_t)&ut->uu_on_throttlelist, &info->throttle_lock, PRIBIO + 1, "throttle_lowpri_io", NULL); @@ -1382,25 +1693,19 @@ throttle_lowpri_io(int sleep_amount) } } done: - if (ut->uu_on_throttlelist) { - if (!locked) { - lck_mtx_lock(&info->throttle_lock); - locked = 1; - } - if (ut->uu_on_throttlelist) { - TAILQ_REMOVE(&info->throttle_uthlist, ut, uu_throttlelist); - - ut->uu_on_throttlelist = 0; - } + if (ut->uu_on_throttlelist >= THROTTLE_LEVEL_THROTTLED) { + TAILQ_REMOVE(&info->throttle_uthlist[ut->uu_on_throttlelist], ut, uu_throttlelist); + ut->uu_on_throttlelist = THROTTLE_LEVEL_NONE; } - if (locked) - lck_mtx_unlock(&info->throttle_lock); - - if (sleep_cnt) + + lck_mtx_unlock(&info->throttle_lock); + + if (sleep_cnt) { KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_END, - ut->uu_lowpri_window, info->throttle_io_period, info->throttle_io_count, 0, 0); - if (info) - throttle_info_rel(info); + throttle_windows_msecs[mylevel], info->throttle_io_periods[mylevel], info->throttle_io_count, 0, 0); + } + + throttle_info_rel(info); ut->uu_throttle_info = NULL; ut->uu_throttle_bc = FALSE; @@ -1413,13 +1718,15 @@ done: * KPI routine * * set a kernel thread's IO policy. policy can be: - * IOPOL_NORMAL, IOPOL_THROTTLE, IOPOL_PASSIVE + * IOPOL_NORMAL, IOPOL_THROTTLE, IOPOL_PASSIVE, IOPOL_UTILITY, IOPOL_STANDARD * * explanations about these policies are in the man page of setiopolicy_np */ void throttle_set_thread_io_policy(int policy) { - proc_apply_thread_selfdiskacc(policy); + proc_set_task_policy(current_task(), current_thread(), + TASK_POLICY_INTERNAL, TASK_POLICY_IOPOL, + policy); } @@ -1438,38 +1745,49 @@ void throttle_info_reset_window(uthread_t ut) } static -void throttle_info_set_initial_window(uthread_t ut, struct _throttle_io_info_t *info, boolean_t BC_throttle) +void throttle_info_set_initial_window(uthread_t ut, struct _throttle_io_info_t *info, boolean_t BC_throttle, boolean_t isssd) { + if (lowpri_throttle_enabled == 0) + return; + + if (info->throttle_io_periods == 0) { + throttle_init_throttle_period(info, isssd); + } if (ut->uu_throttle_info == NULL) { ut->uu_throttle_info = info; throttle_info_ref(info); DEBUG_ALLOC_THROTTLE_INFO("updating info = %p\n", info, info ); - ut->uu_lowpri_window = THROTTLE_WINDOW; + ut->uu_lowpri_window = 1; ut->uu_throttle_bc = BC_throttle; } } static -void throttle_info_update_internal(struct _throttle_io_info_t *info, uthread_t ut, int policy, int flags, boolean_t isssd) +void throttle_info_update_internal(struct _throttle_io_info_t *info, uthread_t ut, int flags, boolean_t isssd) { int thread_throttle_level; - if (THROTTLE_WINDOW == 0) + if (lowpri_throttle_enabled == 0) return; if (ut == NULL) ut = get_bsdthread_info(current_thread()); - thread_throttle_level = throttle_get_thread_throttle_level(ut, policy); - - if (thread_throttle_level == THROTTLE_LEVEL_TIER0 && ISSET(flags, B_PASSIVE)) - thread_throttle_level = THROTTLE_LEVEL_NONE; + thread_throttle_level = throttle_get_thread_throttle_level(ut); - if (thread_throttle_level != THROTTLE_LEVEL_NONE) + if (thread_throttle_level != THROTTLE_LEVEL_NONE) { + if(!ISSET(flags, B_PASSIVE)) { + microuptime(&info->throttle_window_start_timestamp[thread_throttle_level]); + info->throttle_last_IO_pid[thread_throttle_level] = proc_selfpid(); + KERNEL_DEBUG_CONSTANT((FSDBG_CODE(DBG_THROTTLE, OPEN_THROTTLE_WINDOW)) | DBG_FUNC_NONE, + current_proc()->p_pid, thread_throttle_level, 0, 0, 0); + } microuptime(&info->throttle_last_IO_timestamp[thread_throttle_level]); + } + if (thread_throttle_level >= THROTTLE_LEVEL_THROTTLED) { /* @@ -1482,23 +1800,13 @@ void throttle_info_update_internal(struct _throttle_io_info_t *info, uthread_t u * do the delay just before we return from the system * call that triggered this I/O or from vnode_pagein */ - if (info->throttle_io_period == 0) { - - if (isssd == TRUE) - info->throttle_io_period = lowpri_io_period_ssd_msecs; - else - info->throttle_io_period = lowpri_io_period_msecs; - - if (info->throttle_io_period < lowpri_timer_period_msecs) - info->throttle_io_period = lowpri_timer_period_msecs; - } OSAddAtomic(1, &info->throttle_io_count); - throttle_info_set_initial_window(ut, info, FALSE); + throttle_info_set_initial_window(ut, info, FALSE, isssd); } } -void throttle_info_update_by_mount(mount_t mp) +void *throttle_info_update_by_mount(mount_t mp) { struct _throttle_io_info_t *info; uthread_t ut; @@ -1506,9 +1814,6 @@ void throttle_info_update_by_mount(mount_t mp) ut = get_bsdthread_info(current_thread()); - if (ut->uu_lowpri_window) - return; - if (mp != NULL) { if ((mp->mnt_kern_flag & MNTK_SSD) && !ignore_is_ssd) isssd = TRUE; @@ -1516,17 +1821,10 @@ void throttle_info_update_by_mount(mount_t mp) } else info = &_throttle_io_info[LOWPRI_MAX_NUM_DEV - 1]; - if (info->throttle_io_period == 0) { - - if (isssd == TRUE) - info->throttle_io_period = lowpri_io_period_ssd_msecs; - else - info->throttle_io_period = lowpri_io_period_msecs; + if (!ut->uu_lowpri_window) + throttle_info_set_initial_window(ut, info, FALSE, isssd); - if (info->throttle_io_period < lowpri_timer_period_msecs) - info->throttle_io_period = lowpri_timer_period_msecs; - } - throttle_info_set_initial_window(ut, info, FALSE); + return info; } @@ -1539,7 +1837,7 @@ void throttle_info_update_by_mount(mount_t mp) void throttle_info_update(void *throttle_info, int flags) { if (throttle_info) - throttle_info_update_internal(throttle_info, NULL, -1, flags, FALSE); + throttle_info_update_internal(throttle_info, NULL, flags, FALSE); } /* @@ -1561,7 +1859,10 @@ void throttle_info_update_by_mask(void *throttle_info_handle, int flags) throttle_info_update(throttle_info, flags); } - +/* + * KPI routine (private) + * Called to determine if this IO is being throttled to this level so that it can be treated specially + */ int throttle_info_io_will_be_throttled(void * throttle_info, int policy) { struct _throttle_io_info_t *info = throttle_info; @@ -1573,9 +1874,12 @@ int throttle_info_io_will_be_throttled(void * throttle_info, int policy) switch (policy) { case IOPOL_THROTTLE: - thread_throttle_level = THROTTLE_LEVEL_TIER2; + thread_throttle_level = THROTTLE_LEVEL_TIER3; break; case IOPOL_UTILITY: + thread_throttle_level = THROTTLE_LEVEL_TIER2; + break; + case IOPOL_STANDARD: thread_throttle_level = THROTTLE_LEVEL_TIER1; break; default: @@ -1585,10 +1889,10 @@ int throttle_info_io_will_be_throttled(void * throttle_info, int policy) for (throttle_level = THROTTLE_LEVEL_START; throttle_level < thread_throttle_level; throttle_level++) { microuptime(&elapsed); - timevalsub(&elapsed, &info->throttle_last_IO_timestamp[throttle_level]); + timevalsub(&elapsed, &info->throttle_window_start_timestamp[throttle_level]); elapsed_msecs = (uint64_t)elapsed.tv_sec * (uint64_t)1000 + (elapsed.tv_usec / 1000); - if (elapsed_msecs < (uint64_t)THROTTLE_WINDOW) + if (elapsed_msecs < (uint64_t)throttle_windows_msecs[thread_throttle_level]) break; } if (throttle_level >= thread_throttle_level) { @@ -1596,66 +1900,54 @@ int throttle_info_io_will_be_throttled(void * throttle_info, int policy) * we're beyond all of the throttle windows * so go ahead and treat as normal I/O */ - return (0); + return (THROTTLE_DISENGAGED); } /* * we're in the throttle window */ - return (1); -} - -void -throttle_legacy_process_incr(void) -{ - OSIncrementAtomic(&throttle_legacy_process_count); -} - -void -throttle_legacy_process_decr(void) -{ - OSDecrementAtomic(&throttle_legacy_process_count); + return (THROTTLE_ENGAGED); } - int spec_strategy(struct vnop_strategy_args *ap) { buf_t bp; int bflags; - int policy; + int io_tier; + int passive; dev_t bdev; uthread_t ut; mount_t mp; + struct bufattr *bap; int strategy_ret; struct _throttle_io_info_t *throttle_info; boolean_t isssd = FALSE; -#if !CONFIG_EMBEDDED proc_t curproc = current_proc(); -#endif /* !CONFIG_EMBEDDED */ bp = ap->a_bp; bdev = buf_device(bp); mp = buf_vnode(bp)->v_mount; + bap = &bp->b_attr; - policy = throttle_get_io_policy(&ut); + io_tier = throttle_get_io_policy(&ut); + passive = throttle_get_passive_io_policy(&ut); - if (bp->b_flags & B_META) - bp->b_attr.ba_flags |= BA_META; + if (bp->b_flags & B_META) + bap->ba_flags |= BA_META; - if (policy == IOPOL_THROTTLE || policy == IOPOL_UTILITY) { - bp->b_flags |= B_THROTTLED_IO; - bp->b_attr.ba_flags |= BA_THROTTLED_IO; - bp->b_flags &= ~B_PASSIVE; - } else if (policy == IOPOL_PASSIVE) + SET_BUFATTR_IO_TIER(bap, io_tier); + + if (passive) bp->b_flags |= B_PASSIVE; -#if !CONFIG_EMBEDDED if ((curproc != NULL) && ((curproc->p_flag & P_DELAYIDLESLEEP) == P_DELAYIDLESLEEP)) - bp->b_attr.ba_flags |= BA_DELAYIDLESLEEP; -#endif /* !CONFIG_EMBEDDED */ + bap->ba_flags |= BA_DELAYIDLESLEEP; bflags = bp->b_flags; + if (((bflags & B_READ) == 0) && ((bflags & B_ASYNC) == 0)) + bufattr_markquickcomplete(bap); + if (kdebug_enable) { int code = 0; @@ -1669,21 +1961,20 @@ spec_strategy(struct vnop_strategy_args *ap) else if (bflags & B_PAGEIO) code |= DKIO_PAGING; - if (bflags & B_THROTTLED_IO) + if (io_tier != 0) code |= DKIO_THROTTLE; - else if (bflags & B_PASSIVE) + + code |= ((io_tier << DKIO_TIER_SHIFT) & DKIO_TIER_MASK); + + if (bflags & B_PASSIVE) code |= DKIO_PASSIVE; - if (bp->b_attr.ba_flags & BA_NOCACHE) + if (bap->ba_flags & BA_NOCACHE) code |= DKIO_NOCACHE; KERNEL_DEBUG_CONSTANT_IST(KDEBUG_COMMON, FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE, - bp, bdev, (int)buf_blkno(bp), buf_count(bp), 0); + buf_kernel_addrperm_addr(bp), bdev, (int)buf_blkno(bp), buf_count(bp), 0); } - if (((bflags & (B_THROTTLED_IO | B_PASSIVE | B_IOSTREAMING | B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) && - mp && (mp->mnt_kern_flag & MNTK_ROOTDEV)) - hard_throttle_on_root = 1; - if (mp != NULL) { if ((mp->mnt_kern_flag & MNTK_SSD) && !ignore_is_ssd) isssd = TRUE; @@ -1691,12 +1982,13 @@ spec_strategy(struct vnop_strategy_args *ap) } else throttle_info = &_throttle_io_info[LOWPRI_MAX_NUM_DEV - 1]; - throttle_info_update_internal(throttle_info, ut, policy, bflags, isssd); + throttle_info_update_internal(throttle_info, ut, bflags, isssd); if ((bflags & B_READ) == 0) { microuptime(&throttle_info->throttle_last_write_timestamp); if (mp) { + mp->mnt_last_write_issued_timestamp = throttle_info->throttle_last_write_timestamp; INCR_PENDING_IO(buf_count(bp), mp->mnt_pending_write_size); } } else if (mp) { @@ -1740,7 +2032,7 @@ spec_strategy(struct vnop_strategy_args *ap) * If the boot cache indicates this IO should be throttled, * delay the thread. */ - throttle_info_set_initial_window(ut, throttle_info, TRUE); + throttle_info_set_initial_window(ut, throttle_info, TRUE, isssd); } return (0); } @@ -1783,10 +2075,12 @@ spec_close(struct vnop_close_args *ap) * last close) */ sessp = proc_session(p); + devsw_lock(dev, S_IFCHR); if (sessp != SESSION_NULL) { if (vp == sessp->s_ttyvp && vcount(vp) == 1) { struct tty *tp; + devsw_unlock(dev, S_IFCHR); session_lock(sessp); if (vp == sessp->s_ttyvp) { tp = SESSION_TP(sessp); @@ -1803,20 +2097,18 @@ spec_close(struct vnop_close_args *ap) if (NULL != tp) ttyfree(tp); } + devsw_lock(dev, S_IFCHR); } session_rele(sessp); } - devsw_lock(dev, S_IFCHR); - if (--vp->v_specinfo->si_opencount < 0) panic("negative open count (c, %u, %u)", major(dev), minor(dev)); /* - * close always, or close on last reference, or close on revoke + * close on last reference or on vnode revoke call */ - if ((D_TRACKCLOSE & cdevsw[major(dev)].d_type) != 0 || - vcount(vp) == 0 || (flags & IO_REVOKE) != 0) + if (vcount(vp) == 0 || (flags & IO_REVOKE) != 0) error = cdevsw[major(dev)].d_close(dev, flags, S_IFCHR, p); devsw_unlock(dev, S_IFCHR); diff --git a/bsd/miscfs/union/Makefile b/bsd/miscfs/union/Makefile index 513e6bbb9..b0ef42982 100644 --- a/bsd/miscfs/union/Makefile +++ b/bsd/miscfs/union/Makefile @@ -7,14 +7,6 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - DATAFILES = \ union.h diff --git a/bsd/net/Makefile b/bsd/net/Makefile index 5a186e2b6..fba2de704 100644 --- a/bsd/net/Makefile +++ b/bsd/net/Makefile @@ -10,19 +10,12 @@ include $(MakeInc_def) INSTINC_SUBDIRS = \ altq classq pktsched -INSTINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS = \ - altq classq pktsched - -EXPINC_SUBDIRS_I386 = \ - DATAFILES= \ bpf.h dlil.h \ ethernet.h if.h if_arp.h \ if_dl.h if_llc.h if_media.h if_mib.h \ if_types.h if_var.h \ - if_utun.h if_utun_crypto.h if_utun_crypto_ipsec.h \ + if_utun.h \ kext_net.h ndrv.h pfkeyv2.h \ route.h @@ -33,13 +26,15 @@ KERNELFILES= \ PRIVATE_DATAFILES = \ if_vlan_var.h if_ppp.h firewire.h \ ppp_defs.h radix.h if_bond_var.h if_bond_internal.h lacp.h ndrv_var.h \ - netsrc.h raw_cb.h etherdefs.h iso88025.h if_pflog.h pfvar.h \ - if_bridgevar.h ntstat.h iptap.h if_llreach.h + netsrc.h raw_cb.h etherdefs.h if_pflog.h pfvar.h \ + if_bridgevar.h ntstat.h iptap.h if_llreach.h \ + if_utun_crypto.h if_utun_crypto_ipsec.h if_utun_crypto_dtls.h \ + pktap.h if_ipsec.h \ -PRIVATE_KERNELFILES = ${KERNELFILES} \ +PRIVATE_KERNELFILES = $(filter-out radix.h,${KERNELFILES}) \ bpfdesc.h ppp_comp.h \ zlib.h bpf_compat.h net_osdep.h \ - ntstat.h iptap.h if_llreach.h flowadv.h + flowadv.h INSTALL_MI_LIST = ${DATAFILES} diff --git a/bsd/net/altq/Makefile b/bsd/net/altq/Makefile index b8ad37152..6a330882f 100644 --- a/bsd/net/altq/Makefile +++ b/bsd/net/altq/Makefile @@ -6,18 +6,6 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_PPC = \ - -INSTINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_PPC = \ - -EXPINC_SUBDIRS_I386 = \ - DATAFILES= \ KERNELFILES= \ diff --git a/bsd/net/altq/altq_cbq.c b/bsd/net/altq/altq_cbq.c index 31b3573c9..8ced30eeb 100644 --- a/bsd/net/altq/altq_cbq.c +++ b/bsd/net/altq/altq_cbq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011 Apple Inc. All rights reserved. + * Copyright (c) 2007-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -228,6 +228,10 @@ altq_cbq_request(struct ifaltq *altq, enum altrq req, void *arg) case ALTRQ_EVENT: cbq_event(cbqp, (cqev_t)arg); break; + + case ALTRQ_THROTTLE: + default: + break; } return (0); } diff --git a/bsd/net/altq/altq_fairq.c b/bsd/net/altq/altq_fairq.c index 284654761..69dcf2209 100644 --- a/bsd/net/altq/altq_fairq.c +++ b/bsd/net/altq/altq_fairq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 Apple Inc. All rights reserved. + * Copyright (c) 2011-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -260,6 +260,10 @@ altq_fairq_request(struct ifaltq *altq, enum altrq req, void *arg) case ALTRQ_EVENT: fairq_event(fif, (cqev_t)arg); break; + + case ALTRQ_THROTTLE: + default: + break; } return (0); } diff --git a/bsd/net/altq/altq_hfsc.c b/bsd/net/altq/altq_hfsc.c index 1e58df421..adaf0d35c 100644 --- a/bsd/net/altq/altq_hfsc.c +++ b/bsd/net/altq/altq_hfsc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011 Apple Inc. All rights reserved. + * Copyright (c) 2007-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -246,6 +246,10 @@ altq_hfsc_request(struct ifaltq *altq, enum altrq req, void *arg) case ALTRQ_EVENT: hfsc_event(hif, (cqev_t)arg); break; + + case ALTRQ_THROTTLE: + default: + break; } return (0); } diff --git a/bsd/net/altq/altq_qfq.c b/bsd/net/altq/altq_qfq.c index d45437e25..0f7c52983 100644 --- a/bsd/net/altq/altq_qfq.c +++ b/bsd/net/altq/altq_qfq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 Apple Inc. All rights reserved. + * Copyright (c) 2011-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -196,6 +196,10 @@ altq_qfq_request(struct ifaltq *altq, enum altrq req, void *arg) case ALTRQ_EVENT: qfq_event(qif, (cqev_t)arg); break; + + case ALTRQ_THROTTLE: + default: + break; } return (0); } diff --git a/bsd/net/bpf.c b/bsd/net/bpf.c index b1ac5f1e8..207ab752e 100644 --- a/bsd/net/bpf.c +++ b/bsd/net/bpf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -177,11 +177,7 @@ static lck_grp_t *bpf_mlock_grp; static lck_grp_attr_t *bpf_mlock_grp_attr; static lck_attr_t *bpf_mlock_attr; -/* - * Mark a descriptor free by making it point to itself. - * This is probably cheaper than marking with a constant since - * the address should be in a register anyway. - */ +static mbuf_tag_id_t bpf_mtag_id; #endif /* __APPLE__ */ static int bpf_allocbufs(struct bpf_d *); @@ -191,15 +187,15 @@ static void bpf_freed(struct bpf_d *); static void bpf_mcopy(const void *, void *, size_t); static int bpf_movein(struct uio *, int, struct mbuf **, struct sockaddr *, int *); -static int bpf_setif(struct bpf_d *, ifnet_t ifp, u_int32_t dlt); -static void bpf_timed_out(void *, void *); -static void bpf_wakeup(struct bpf_d *); +static int bpf_setif(struct bpf_d *, ifnet_t ifp, u_int32_t dlt, dev_t); +static void bpf_timed_out(void *, void *); +static void bpf_wakeup(struct bpf_d *); static void catchpacket(struct bpf_d *, u_char *, struct mbuf *, u_int, u_int, int, void (*)(const void *, void *, size_t)); static void reset_d(struct bpf_d *); -static int bpf_setf(struct bpf_d *, u_int bf_len, user_addr_t bf_insns); +static int bpf_setf(struct bpf_d *, u_int , user_addr_t , dev_t, u_long); static int bpf_getdltlist(struct bpf_d *, caddr_t, struct proc *); -static int bpf_setdlt(struct bpf_d *, u_int); +static int bpf_setdlt(struct bpf_d *, u_int, dev_t); static int bpf_set_traffic_class(struct bpf_d *, int); static void bpf_set_packet_service_class(struct mbuf *, int); @@ -405,8 +401,9 @@ bpf_movein(struct uio *uio, int linktype, struct mbuf **mp, struct sockaddr *soc #ifdef __APPLE__ /* - * The dynamic addition of a new device node must block all processes that are opening - * the last device so that no process will get an unexpected ENOENT + * The dynamic addition of a new device node must block all processes that + * are opening the last device so that no process will get an unexpected + * ENOENT */ static void bpf_make_dev_t(int maj) @@ -576,9 +573,9 @@ bpf_start_timer(struct bpf_d *d) tv.tv_sec = d->bd_rtout / hz; tv.tv_usec = (d->bd_rtout % hz) * tick; - clock_interval_to_deadline((uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec, - NSEC_PER_USEC, - &deadline); + clock_interval_to_deadline( + (uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec, + NSEC_PER_USEC, &deadline); /* * The state is BPF_IDLE, so the timer hasn't * been started yet, and hasn't gone off yet; @@ -780,6 +777,10 @@ bpfclose(dev_t dev, __unused int flags, __unused int fmt, mac_bpfdesc_label_destroy(d); #endif thread_call_free(d->bd_thread_call); + + while (d->bd_hbuf_read) + msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); + bpf_freed(d); /* Mark free in same context as bpfopen comes to check */ @@ -805,32 +806,14 @@ bpf_sleep(struct bpf_d *d, int pri, const char *wmesg, int timo) return msleep1((caddr_t)d, bpf_mlock, pri, wmesg, abstime); } -static struct inpcb * -bpf_findinpcb(struct inpcbinfo *pcbinfo, uint32_t flowhash) -{ - struct inpcb *inp = NULL; - - if (!flowhash) return (NULL); - - lck_rw_lock_shared(pcbinfo->mtx); - LIST_FOREACH(inp, pcbinfo->listhead, inp_list) { - if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) { - if (inp->inp_flowhash == flowhash) - break; - in_pcb_checkstate(inp, WNT_RELEASE, 0); - } - } - lck_rw_done(pcbinfo->mtx); - - return (inp); -} - /* * Rotate the packet buffers in descriptor d. Move the store buffer * into the hold slot, and the free buffer into the store slot. * Zero the length of the new store buffer. */ #define ROTATE_BUFFERS(d) \ + if (d->bd_hbuf_read) \ + panic("rotating bpf buffers during read"); \ (d)->bd_hbuf = (d)->bd_sbuf; \ (d)->bd_hlen = (d)->bd_slen; \ (d)->bd_sbuf = (d)->bd_fbuf; \ @@ -843,7 +826,8 @@ int bpfread(dev_t dev, struct uio *uio, int ioflag) { struct bpf_d *d; - int timed_out; + caddr_t hbuf; + int timed_out, hbuf_len; int error; lck_mtx_lock(bpf_mlock); @@ -869,6 +853,14 @@ bpfread(dev_t dev, struct uio *uio, int ioflag) timed_out = (d->bd_state == BPF_TIMED_OUT); d->bd_state = BPF_IDLE; + while (d->bd_hbuf_read) + msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); + + d = bpf_dtab[minor(dev)]; + if (d == 0 || d == (void *)1) { + lck_mtx_unlock(bpf_mlock); + return (ENXIO); + } /* * If the hold buffer is empty, then do a timed sleep, which * ends when the timeout expires or when enough packets @@ -914,7 +906,28 @@ bpfread(dev_t dev, struct uio *uio, int ioflag) lck_mtx_unlock(bpf_mlock); return (ENXIO); } + + while (d->bd_hbuf_read) + msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); + + d = bpf_dtab[minor(dev)]; + if (d == 0 || d == (void *)1) { + lck_mtx_unlock(bpf_mlock); + return (ENXIO); + } + if (error == EINTR || error == ERESTART) { + if (d->bd_slen) { + /* + * Sometimes we may be interrupted often and + * the sleep above will not timeout. + * Regardless, we should rotate the buffers + * if there's any new data pending and + * return it. + */ + ROTATE_BUFFERS(d); + break; + } lck_mtx_unlock(bpf_mlock); return (error); } @@ -944,6 +957,7 @@ bpfread(dev_t dev, struct uio *uio, int ioflag) * At this point, we know we have something in the hold slot. */ +#ifdef __APPLE__ /* * Before we move data to userland, we fill out the extended * header fields. @@ -954,41 +968,64 @@ bpfread(dev_t dev, struct uio *uio, int ioflag) p = d->bd_hbuf; while (p < d->bd_hbuf + d->bd_hlen) { struct bpf_hdr_ext *ehp; - struct inpcb *inp; - uint32_t flowhash; - pid_t pid; + uint32_t flowid; + struct so_procinfo soprocinfo; + int found = 0; ehp = (struct bpf_hdr_ext *)(void *)p; - if ((flowhash = ehp->bh_flowhash)) { - if (ehp->bh_flags & BPF_HDR_EXT_FLAGS_TCP) - inp = bpf_findinpcb(&tcbinfo, flowhash); - else - inp = bpf_findinpcb(&udbinfo, flowhash); - if (inp) { - socket_lock(inp->inp_socket, 0); - pid = inp->inp_socket->last_pid; - in_pcb_checkstate(inp, WNT_RELEASE, 1); - socket_unlock(inp->inp_socket, 0); - ehp->bh_pid = pid; - proc_name(pid, ehp->bh_comm, MAXCOMLEN); + if ((flowid = ehp->bh_flowid)) { + if (ehp->bh_proto == IPPROTO_TCP) + found = inp_findinpcb_procinfo(&tcbinfo, + flowid, &soprocinfo); + else if (ehp->bh_proto == IPPROTO_UDP) + found = inp_findinpcb_procinfo(&udbinfo, + flowid, &soprocinfo); + if (found != 0) { + ehp->bh_pid = soprocinfo.spi_pid; + proc_name(ehp->bh_pid, ehp->bh_comm, MAXCOMLEN); } - ehp->bh_flowhash = 0; + ehp->bh_flowid = 0; } p += BPF_WORDALIGN(ehp->bh_hdrlen + ehp->bh_caplen); } } +#endif + /* + * Set the hold buffer read. So we do not + * rotate the buffers until the hold buffer + * read is complete. Also to avoid issues resulting + * from page faults during disk sleep (). + */ + d->bd_hbuf_read = 1; + hbuf = d->bd_hbuf; + hbuf_len = d->bd_hlen; + lck_mtx_unlock(bpf_mlock); + /* * Move data from hold buffer into user space. * We know the entire buffer is transferred since * we checked above that the read buffer is bpf_bufsize bytes. */ - error = UIOMOVE(d->bd_hbuf, d->bd_hlen, UIO_READ, uio); - + error = UIOMOVE(hbuf, hbuf_len, UIO_READ, uio); + + lck_mtx_lock(bpf_mlock); + /* + * Make sure device is still opened + */ + d = bpf_dtab[minor(dev)]; + if (d == 0 || d == (void *)1) { + lck_mtx_unlock(bpf_mlock); + return (ENXIO); + } + + d->bd_hbuf_read = 0; d->bd_fbuf = d->bd_hbuf; d->bd_hbuf = NULL; d->bd_hlen = 0; + wakeup((caddr_t)d); lck_mtx_unlock(bpf_mlock); return (error); + } @@ -1056,8 +1093,8 @@ bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag) int error; char dst_buf[SOCKADDR_HDR_LEN + MAX_DATALINK_HDR_LEN]; int datlen = 0; - int bif_dlt; - int bd_hdrcmplt; + int bif_dlt; + int bd_hdrcmplt; lck_mtx_lock(bpf_mlock); @@ -1154,6 +1191,9 @@ bpfwrite(dev_t dev, struct uio *uio, __unused int ioflag) static void reset_d(struct bpf_d *d) { + if (d->bd_hbuf_read) + panic("resetting buffers during read"); + if (d->bd_hbuf) { /* Free the hold buffer. */ d->bd_fbuf = d->bd_hbuf; @@ -1223,7 +1263,7 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, int n; n = d->bd_slen; - if (d->bd_hbuf) + if (d->bd_hbuf && d->bd_hbuf_read == 0) n += d->bd_hlen; bcopy(&n, addr, sizeof (n)); @@ -1273,20 +1313,22 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, /* * Set link layer read filter. */ - case BIOCSETF32: { /* struct bpf_program32 */ + case BIOCSETF32: + case BIOCSETFNR32: { /* struct bpf_program32 */ struct bpf_program32 prg32; bcopy(addr, &prg32, sizeof (prg32)); error = bpf_setf(d, prg32.bf_len, - CAST_USER_ADDR_T(prg32.bf_insns)); + CAST_USER_ADDR_T(prg32.bf_insns), dev, cmd); break; } - case BIOCSETF64: { /* struct bpf_program64 */ + case BIOCSETF64: + case BIOCSETFNR64: { /* struct bpf_program64 */ struct bpf_program64 prg64; bcopy(addr, &prg64, sizeof (prg64)); - error = bpf_setf(d, prg64.bf_len, prg64.bf_insns); + error = bpf_setf(d, prg64.bf_len, prg64.bf_insns, dev, cmd); break; } @@ -1294,6 +1336,14 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, * Flush read packet buffer. */ case BIOCFLUSH: + while (d->bd_hbuf_read) { + msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); + } + + d = bpf_dtab[minor(dev)]; + if (d == 0 || d == (void *)1) + return (ENXIO); + reset_d(d); break; @@ -1348,7 +1398,7 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, u_int dlt; bcopy(addr, &dlt, sizeof (dlt)); - error = bpf_setdlt(d, dlt); + error = bpf_setdlt(d, dlt, dev); } break; @@ -1362,8 +1412,7 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, struct ifnet *const ifp = d->bd_bif->bif_ifp; snprintf(((struct ifreq *)(void *)addr)->ifr_name, - sizeof (ifr.ifr_name), "%s%d", ifp->if_name, - ifp->if_unit); + sizeof (ifr.ifr_name), "%s", if_name(ifp)); } break; @@ -1379,14 +1428,14 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, if (ifp == NULL) error = ENXIO; else - error = bpf_setif(d, ifp, 0); + error = bpf_setif(d, ifp, 0, dev); break; } /* * Set read timeout. */ - case BIOCSRTIMEOUT32: { /* struct user32_timeval */ + case BIOCSRTIMEOUT32: { /* struct user32_timeval */ struct user32_timeval _tv; struct timeval tv; @@ -1403,7 +1452,7 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, break; } - case BIOCSRTIMEOUT64: { /* struct user64_timeval */ + case BIOCSRTIMEOUT64: { /* struct user64_timeval */ struct user64_timeval _tv; struct timeval tv; @@ -1420,7 +1469,7 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, break; } - /* + /* * Get read timeout. */ case BIOCGRTIMEOUT32: { /* struct user32_timeval */ @@ -1559,9 +1608,36 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, case BIOCGRSIG: /* u_int */ bcopy(&d->bd_sig, addr, sizeof (u_int)); break; +#ifdef __APPLE__ case BIOCSEXTHDR: bcopy(addr, &d->bd_extendedhdr, sizeof (u_int)); break; + + case BIOCGIFATTACHCOUNT: { /* struct ifreq */ + ifnet_t ifp; + struct bpf_if *bp; + + bcopy(addr, &ifr, sizeof (ifr)); + ifr.ifr_name[IFNAMSIZ - 1] = '\0'; + ifp = ifunit(ifr.ifr_name); + if (ifp == NULL) { + error = ENXIO; + break; + } + ifr.ifr_intval = 0; + for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) { + struct bpf_d *bpf_d; + + if (bp->bif_ifp == NULL || bp->bif_ifp != ifp) + continue; + for (bpf_d = bp->bif_dlist; bpf_d; bpf_d = bpf_d->bd_next) { + ifr.ifr_intval += 1; + } + } + bcopy(&ifr, addr, sizeof (ifr)); + break; + } +#endif } lck_mtx_unlock(bpf_mlock); @@ -1574,11 +1650,18 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t addr, __unused int flags, * free it and replace it. Returns EINVAL for bogus requests. */ static int -bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns) +bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns, dev_t dev, u_long cmd) { struct bpf_insn *fcode, *old; u_int flen, size; + while (d->bd_hbuf_read) + msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); + + d = bpf_dtab[minor(dev)]; + if (d == 0 || d == (void *)1) + return (ENXIO); + old = d->bd_filter; if (bf_insns == USER_ADDR_NULL) { if (bf_len != 0) @@ -1602,7 +1685,10 @@ bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns) if (copyin(bf_insns, (caddr_t)fcode, size) == 0 && bpf_validate(fcode, (int)flen)) { d->bd_filter = fcode; - reset_d(d); + + if (cmd == BIOCSETF32 || cmd == BIOCSETF64) + reset_d(d); + if (old != 0) FREE((caddr_t)old, M_DEVBUF); @@ -1618,11 +1704,18 @@ bpf_setf(struct bpf_d *d, u_int bf_len, user_addr_t bf_insns) * Return an errno or 0. */ static int -bpf_setif(struct bpf_d *d, ifnet_t theywant, u_int32_t dlt) +bpf_setif(struct bpf_d *d, ifnet_t theywant, u_int32_t dlt, dev_t dev) { struct bpf_if *bp; int error; - + + while (d->bd_hbuf_read) + msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); + + d = bpf_dtab[minor(dev)]; + if (d == 0 || d == (void *)1) + return (ENXIO); + /* * Look through attached interfaces for the named one. */ @@ -1710,9 +1803,7 @@ bpf_getdltlist(struct bpf_d *d, caddr_t addr, struct proc *p) * Set the data link type of a BPF instance. */ static int -bpf_setdlt(struct bpf_d *d, uint32_t dlt) - - +bpf_setdlt(struct bpf_d *d, uint32_t dlt, dev_t dev) { int error, opromisc; struct ifnet *ifp; @@ -1720,6 +1811,14 @@ bpf_setdlt(struct bpf_d *d, uint32_t dlt) if (d->bd_bif->bif_dlt == dlt) return (0); + + while (d->bd_hbuf_read) + msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); + + d = bpf_dtab[minor(dev)]; + if (d == 0 || d == (void *)1) + return (ENXIO); + ifp = d->bd_bif->bif_ifp; for (bp = bpf_iflist; bp; bp = bp->bif_next) { if (bp->bif_ifp == ifp && bp->bif_dlt == dlt) @@ -1797,6 +1896,15 @@ bpfselect(dev_t dev, int which, void * wql, struct proc *p) return (ENXIO); } + while (d->bd_hbuf_read) + msleep((caddr_t)d, bpf_mlock, PRINET, "bpf_reading", NULL); + + d = bpf_dtab[minor(dev)]; + if (d == 0 || d == (void *)1) { + lck_mtx_unlock(bpf_mlock); + return (ENXIO); + } + switch (which) { case FREAD: if (d->bd_hlen != 0 || @@ -1892,7 +2000,7 @@ filt_bpfread(struct knote *kn, long hint) if (hint == 0) lck_mtx_lock(bpf_mlock); - + if (d->bd_immediate) { /* * If there's data in the hold buffer, it's the @@ -1908,7 +2016,8 @@ filt_bpfread(struct knote *kn, long hint) * If there's no data in either buffer, we're not * ready to read. */ - kn->kn_data = (d->bd_hlen == 0 ? d->bd_slen : d->bd_hlen); + kn->kn_data = ((d->bd_hlen == 0 || d->bd_hbuf_read) + ? d->bd_slen : d->bd_hlen); int64_t lowwat = 1; if (kn->kn_sfflags & NOTE_LOWAT) { @@ -1934,7 +2043,7 @@ filt_bpfread(struct knote *kn, long hint) * no data in the hold buffer and the timer hasn't * expired, we're not ready to read. */ - kn->kn_data = (d->bd_hlen == 0 && d->bd_state == BPF_TIMED_OUT ? + kn->kn_data = ((d->bd_hlen == 0 || d->bd_hbuf_read) && d->bd_state == BPF_TIMED_OUT ? d->bd_slen : d->bd_hlen); ready = (kn->kn_data > 0); } @@ -2103,6 +2212,9 @@ catchpacket(struct bpf_d *d, u_char *pkt, struct mbuf *m, u_int pktlen, int hdrlen, caplen; int do_wakeup = 0; u_char *payload; + struct timeval tv; + struct m_tag *mt = NULL; + struct bpf_mtag *bt = NULL; hdrlen = d->bd_extendedhdr ? d->bd_bif->bif_exthdrlen : d->bd_bif->bif_hdrlen; @@ -2149,7 +2261,6 @@ catchpacket(struct bpf_d *d, u_char *pkt, struct mbuf *m, u_int pktlen, /* * Append the bpf header. */ - struct timeval tv; microtime(&tv); if (d->bd_extendedhdr) { ehp = (struct bpf_hdr_ext *)(void *)(d->bd_sbuf + curlen); @@ -2159,14 +2270,30 @@ catchpacket(struct bpf_d *d, u_char *pkt, struct mbuf *m, u_int pktlen, ehp->bh_datalen = pktlen; ehp->bh_hdrlen = hdrlen; ehp->bh_caplen = totlen - hdrlen; - if (outbound) { - if (m->m_pkthdr.m_fhflags & PF_TAG_FLOWHASH) - ehp->bh_flowhash = m->m_pkthdr.m_flowhash; - ehp->bh_svc = so_svc2tc(m->m_pkthdr.svc); + mt = m_tag_locate(m, bpf_mtag_id, 0, NULL); + if (mt && mt->m_tag_len >= sizeof(*bt)) { + bt = (struct bpf_mtag *)(mt + 1); + ehp->bh_pid = bt->bt_pid; + strlcpy(ehp->bh_comm, bt->bt_comm, + sizeof(ehp->bh_comm)); + ehp->bh_svc = so_svc2tc(bt->bt_svc); + if (bt->bt_direction == BPF_MTAG_DIR_OUT) + ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT; + else + ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN; + m_tag_delete(m, mt); + } else if (outbound) { + /* only do lookups on non-raw INPCB */ + if ((m->m_pkthdr.pkt_flags & (PKTF_FLOW_ID| + PKTF_FLOW_LOCALSRC|PKTF_FLOW_RAWSOCK)) == + (PKTF_FLOW_ID|PKTF_FLOW_LOCALSRC) && + m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) { + ehp->bh_flowid = m->m_pkthdr.pkt_flowid; + ehp->bh_proto = m->m_pkthdr.pkt_proto; + } + ehp->bh_svc = so_svc2tc(m->m_pkthdr.pkt_svc); ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_OUT; - if (m->m_pkthdr.m_fhflags & PF_TAG_TCP) - ehp->bh_flags |= BPF_HDR_EXT_FLAGS_TCP; - } else + } else ehp->bh_flags |= BPF_HDR_EXT_FLAGS_DIR_IN; payload = (u_char *)ehp + hdrlen; caplen = ehp->bh_caplen; @@ -2222,9 +2349,12 @@ bpf_freed(struct bpf_d *d) * been detached from its interface and it yet hasn't been marked * free. */ + if (d->bd_hbuf_read) + panic("bpf buffer freed during read"); + if (d->bd_sbuf != 0) { FREE(d->bd_sbuf, M_DEVBUF); - if (d->bd_hbuf != 0) + if (d->bd_hbuf != 0) FREE(d->bd_hbuf, M_DEVBUF); if (d->bd_fbuf != 0) FREE(d->bd_fbuf, M_DEVBUF); @@ -2273,8 +2403,8 @@ bpf_attach( } if (bp_temp != NULL) { - printf("bpfattach - %s%d with dlt %d is already attached\n", - ifp->if_name, ifp->if_unit, dlt); + printf("bpfattach - %s with dlt %d is already attached\n", + if_name(ifp), dlt); FREE(bp_new, M_DEVBUF); lck_mtx_unlock(bpf_mlock); return EEXIST; @@ -2314,7 +2444,7 @@ bpf_attach( #ifndef __APPLE__ if (bootverbose) - printf("bpf: %s%d attached\n", ifp->if_name, ifp->if_unit); + printf("bpf: %s attached\n", if_name(ifp)); #endif return 0; @@ -2377,15 +2507,10 @@ bpf_init(__unused void *unused) if (bpf_devsw_installed == 0) { bpf_devsw_installed = 1; - - bpf_mlock_grp_attr = lck_grp_attr_alloc_init(); - - bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr); - - bpf_mlock_attr = lck_attr_alloc_init(); - - lck_mtx_init(bpf_mlock, bpf_mlock_grp, bpf_mlock_attr); - + bpf_mlock_grp_attr = lck_grp_attr_alloc_init(); + bpf_mlock_grp = lck_grp_alloc_init("bpf", bpf_mlock_grp_attr); + bpf_mlock_attr = lck_attr_alloc_init(); + lck_mtx_init(bpf_mlock, bpf_mlock_grp, bpf_mlock_attr); maj = cdevsw_add(CDEV_MAJOR, &bpf_cdevsw); if (maj == -1) { if (bpf_mlock_attr) @@ -2406,6 +2531,8 @@ bpf_init(__unused void *unused) for (i = 0 ; i < NBPFILTER; i++) bpf_make_dev_t(maj); + + VERIFY(mbuf_tag_id_find(BPF_CONTROL_NAME, &bpf_mtag_id) == 0); } #else cdevsw_add(&bpf_cdevsw); diff --git a/bsd/net/bpf.h b/bsd/net/bpf.h index 3ed4d951f..00e3ac5d8 100644 --- a/bsd/net/bpf.h +++ b/bsd/net/bpf.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -81,6 +81,7 @@ #include #include #include +#include #ifdef KERNEL #include @@ -151,8 +152,8 @@ struct bpf_version { u_short bv_minor; }; #if defined(__LP64__) -#define __need_struct_timeval32 -#include +#include + #define BPF_TIMEVAL timeval32 #else #define BPF_TIMEVAL timeval @@ -198,7 +199,13 @@ struct bpf_version { #define BIOCGETTC _IOR('B', 122, int) #define BIOCSETTC _IOW('B', 123, int) #define BIOCSEXTHDR _IOW('B', 124, u_int) +#define BIOCGIFATTACHCOUNT _IOWR('B', 125, struct ifreq) #endif /* PRIVATE */ +#define BIOCSETFNR _IOW('B', 126, struct bpf_program) +#ifdef KERNEL_PRIVATE +#define BIOCSETFNR64 _IOW('B',126, struct bpf_program64) +#define BIOCSETFNR32 _IOW('B',126, struct bpf_program32) +#endif /* KERNEL_PRIVATE */ /* * Structure prepended to each packet. @@ -232,12 +239,23 @@ struct bpf_hdr_ext { u_short bh_flags; #define BPF_HDR_EXT_FLAGS_DIR_IN 0x0000 #define BPF_HDR_EXT_FLAGS_DIR_OUT 0x0001 -#define BPF_HDR_EXT_FLAGS_TCP 0x0002 pid_t bh_pid; /* process PID */ char bh_comm[MAXCOMLEN+1]; /* process command */ - u_char _bh_pad2[3]; + u_char _bh_pad2[2]; + u_char bh_proto; /* kernel reserved; 0 in userland */ bpf_u_int32 bh_svc; /* service class */ - bpf_u_int32 bh_flowhash; /* kernel reserved; 0 in userland */ + bpf_u_int32 bh_flowid; /* kernel reserved; 0 in userland */ +}; + +#define BPF_CONTROL_NAME "com.apple.net.bpf" + +struct bpf_mtag { + char bt_comm[MAXCOMLEN]; + pid_t bt_pid; + bpf_u_int32 bt_svc; + unsigned char bt_direction; +#define BPF_MTAG_DIR_IN 0 +#define BPF_MTAG_DIR_OUT 1 }; #endif /* PRIVATE */ @@ -274,15 +292,73 @@ struct bpf_hdr_ext { #define DLT_SLIP_BSDOS 15 /* BSD/OS Serial Line IP */ #define DLT_PPP_BSDOS 16 /* BSD/OS Point-to-point Protocol */ +/* + * 17 was used for DLT_PFLOG in OpenBSD; it no longer is. + * + * It was DLT_LANE8023 in SuSE 6.3, so we defined LINKTYPE_PFLOG + * as 117 so that pflog captures would use a link-layer header type + * value that didn't collide with any other values. On all + * platforms other than OpenBSD, we defined DLT_PFLOG as 117, + * and we mapped between LINKTYPE_PFLOG and DLT_PFLOG. + * + * OpenBSD eventually switched to using 117 for DLT_PFLOG as well. + * + * Don't use 17 for anything else. + */ + +/* + * 18 is used for DLT_PFSYNC in OpenBSD, NetBSD, DragonFly BSD and + * Mac OS X; don't use it for anything else. (FreeBSD uses 121, + * which collides with DLT_HHDLC, even though it doesn't use 18 + * for anything and doesn't appear to have ever used it for anything.) + * + * We define it as 18 on those platforms; it is, unfortunately, used + * for DLT_CIP in Suse 6.3, so we don't define it as DLT_PFSYNC + * in general. As the packet format for it, like that for + * DLT_PFLOG, is not only OS-dependent but OS-version-dependent, + * we don't support printing it in tcpdump except on OSes that + * have the relevant header files, so it's not that useful on + * other platforms. + */ #define DLT_PFSYNC 18 /* Packet filter state syncing */ + #define DLT_ATM_CLIP 19 /* Linux Classical-IP over ATM */ /* - * This value is defined by NetBSD; other platforms should refrain from - * using it for other purposes, so that NetBSD savefiles with a link - * type of 50 can be read as this type on all platforms. + * These values are defined by NetBSD; other platforms should refrain from + * using them for other purposes, so that NetBSD savefiles with link + * types of 50 or 51 can be read as this type on all platforms. */ #define DLT_PPP_SERIAL 50 /* PPP over serial with HDLC encapsulation */ +#define DLT_PPP_ETHER 51 /* PPP over Ethernet */ + +/* + * The Axent Raptor firewall - now the Symantec Enterprise Firewall - uses + * a link-layer type of 99 for the tcpdump it supplies. The link-layer + * header has 6 bytes of unknown data, something that appears to be an + * Ethernet type, and 36 bytes that appear to be 0 in at least one capture + * I've seen. + */ +#define DLT_SYMANTEC_FIREWALL 99 + +/* + * Values between 100 and 103 are used in capture file headers as + * link-layer header type LINKTYPE_ values corresponding to DLT_ types + * that differ between platforms; don't use those values for new DLT_ + * new types. + */ + +/* + * Values starting with 104 are used for newly-assigned link-layer + * header type values; for those link-layer header types, the DLT_ + * value returned by pcap_datalink() and passed to pcap_open_dead(), + * and the LINKTYPE_ value that appears in capture files, are the + * same. + * + * DLT_MATCHING_MIN is the lowest such value; DLT_MATCHING_MAX is + * the highest such value. + */ +#define DLT_MATCHING_MIN 104 /* * This value was defined by libpcap 0.5; platforms that have defined @@ -301,16 +377,6 @@ struct bpf_hdr_ext { #define DLT_C_HDLC 104 /* Cisco HDLC */ #define DLT_CHDLC DLT_C_HDLC -/* - * Reserved for future use. - * Do not pick other numerical value for these unless you have also - * picked up the tcpdump.org top-of-CVS-tree version of "savefile.c", - * which will arrange that capture files for these DLT_ types have - * the same "network" value on all platforms, regardless of what - * value is chosen for their DLT_ type (thus allowing captures made - * on one platform to be read on other platforms, even if the two - * platforms don't use the same numerical values for all DLT_ types). - */ #define DLT_IEEE802_11 105 /* IEEE 802.11 wireless */ /* @@ -319,6 +385,14 @@ struct bpf_hdr_ext { * between platforms; don't use those values for new DLT_ new types. */ +/* + * Frame Relay; BSD/OS has a DLT_FR with a value of 11, but that collides + * with other values. + * DLT_FR and DLT_FRELAY packets start with the Q.922 Frame Relay header + * (DLCI, etc.). + */ +#define DLT_FRELAY 107 + /* * OpenBSD DLT_LOOP, for loopback devices; it's like DLT_NULL, except * that the AF_ type in the link-layer header is in network byte order. @@ -336,17 +410,80 @@ struct bpf_hdr_ext { * between platforms; don't use those values for new DLT_ new types. */ +/* + * Encapsulated packets for IPsec; DLT_ENC is 13 in OpenBSD, but that's + * DLT_SLIP_BSDOS in NetBSD, so we don't use 13 for it in OSes other + * than OpenBSD. + */ +#define DLT_ENC 109 + /* * This is for Linux cooked sockets. */ #define DLT_LINUX_SLL 113 +/* + * Apple LocalTalk hardware. + */ +#define DLT_LTALK 114 + +/* + * Acorn Econet. + */ +#define DLT_ECONET 115 + +/* + * Reserved for use with OpenBSD ipfilter. + */ +#define DLT_IPFILTER 116 + /* * For use in capture-file headers as a link-layer type corresponding * to OpenBSD PF (Packet Filter) log. */ #define DLT_PFLOG 117 +/* + * Registered for Cisco-internal use. + */ +#define DLT_CISCO_IOS 118 + +/* + * Reserved for 802.11 cards using the Prism II chips, with a link-layer + * header including Prism monitor mode information plus an 802.11 + * header. + */ +#define DLT_PRISM_HEADER 119 + +/* + * Reserved for Aironet 802.11 cards, with an Aironet link-layer header + * (see Doug Ambrisko's FreeBSD patches). + */ +#define DLT_AIRONET_HEADER 120 + +/* + * Reserved for Siemens HiPath HDLC. XXX + */ +#define DLT_HHDLC 121 + +/* + * Reserved for RFC 2625 IP-over-Fibre Channel. + */ +#define DLT_IP_OVER_FC 122 + +/* + * Reserved for Full Frontal ATM on Solaris. + */ +#define DLT_SUNATM 123 + +/* + * Reserved as per request from Kent Dahlgren + * for private use. + */ +#define DLT_RIO 124 /* RapidIO */ +#define DLT_PCI_EXP 125 /* PCI Express */ +#define DLT_AURORA 126 /* Xilinx Aurora link layer */ + /* * BSD header for 802.11 plus a number of bits of link-layer information * including radio information. @@ -355,6 +492,28 @@ struct bpf_hdr_ext { #define DLT_IEEE802_11_RADIO 127 #endif +/* + * Reserved for TZSP encapsulation. + */ +#define DLT_TZSP 128 /* Tazmen Sniffer Protocol */ + +/* + * Reserved for Linux ARCNET. + */ +#define DLT_ARCNET_LINUX 129 + +/* + * Juniper-private data link types. + */ +#define DLT_JUNIPER_MLPPP 130 +#define DLT_JUNIPER_MLFR 131 +#define DLT_JUNIPER_ES 132 +#define DLT_JUNIPER_GGSN 133 +#define DLT_JUNIPER_MFR 134 +#define DLT_JUNIPER_ATM2 135 +#define DLT_JUNIPER_SERVICES 136 +#define DLT_JUNIPER_ATM1 137 + /* * Apple IP-over-IEEE 1394, as per a request from Dieter Siegmund * . The header that's presented is an Ethernet-like @@ -372,6 +531,80 @@ struct bpf_hdr_ext { */ #define DLT_APPLE_IP_OVER_IEEE1394 138 +/* + * Various SS7 encapsulations, as per a request from Jeff Morriss + * and subsequent discussions. + */ +#define DLT_MTP2_WITH_PHDR 139 /* pseudo-header with various info, followed by MTP2 */ +#define DLT_MTP2 140 /* MTP2, without pseudo-header */ +#define DLT_MTP3 141 /* MTP3, without pseudo-header or MTP2 */ +#define DLT_SCCP 142 /* SCCP, without pseudo-header or MTP2 or MTP3 */ + +/* + * Reserved for DOCSIS. + */ +#define DLT_DOCSIS 143 + +/* + * Reserved for Linux IrDA. + */ +#define DLT_LINUX_IRDA 144 + +/* + * Reserved for IBM SP switch and IBM Next Federation switch. + */ +#define DLT_IBM_SP 145 +#define DLT_IBM_SN 146 + +/* + * Reserved for private use. If you have some link-layer header type + * that you want to use within your organization, with the capture files + * using that link-layer header type not ever be sent outside your + * organization, you can use these values. + * + * No libpcap release will use these for any purpose, nor will any + * tcpdump release use them, either. + * + * Do *NOT* use these in capture files that you expect anybody not using + * your private versions of capture-file-reading tools to read; in + * particular, do *NOT* use them in products, otherwise you may find that + * people won't be able to use tcpdump, or snort, or Ethereal, or... to + * read capture files from your firewall/intrusion detection/traffic + * monitoring/etc. appliance, or whatever product uses that DLT_ value, + * and you may also find that the developers of those applications will + * not accept patches to let them read those files. + * + * Also, do not use them if somebody might send you a capture using them + * for *their* private type and tools using them for *your* private type + * would have to read them. + * + * Instead, ask "tcpdump-workers@tcpdump.org" for a new DLT_ value, + * as per the comment above, and use the type you're given. + */ +#define DLT_USER0 147 +#define DLT_USER1 148 +#define DLT_USER2 149 +#define DLT_USER3 150 +#define DLT_USER4 151 +#define DLT_USER5 152 +#define DLT_USER6 153 +#define DLT_USER7 154 +#define DLT_USER8 155 +#define DLT_USER9 156 +#define DLT_USER10 157 +#define DLT_USER11 158 +#define DLT_USER12 159 +#define DLT_USER13 160 +#define DLT_USER14 161 +#define DLT_USER15 162 + +#ifdef PRIVATE +/* + * For Apple private usage + */ +#define DLT_PKTAP DLT_USER2 /* rdar://11779467 */ +#endif /* PRIVATE */ + /* * For future use with 802.11 captures - defined by AbsoluteValue * Systems to store a number of bits of link-layer information @@ -384,6 +617,579 @@ struct bpf_hdr_ext { */ #define DLT_IEEE802_11_RADIO_AVS 163 /* 802.11 plus AVS radio header */ +/* + * Juniper-private data link type, as per request from + * Hannes Gredler . The DLT_s are used + * for passing on chassis-internal metainformation such as + * QOS profiles, etc.. + */ +#define DLT_JUNIPER_MONITOR 164 + +/* + * Reserved for BACnet MS/TP. + */ +#define DLT_BACNET_MS_TP 165 + +/* + * Another PPP variant as per request from Karsten Keil . + * + * This is used in some OSes to allow a kernel socket filter to distinguish + * between incoming and outgoing packets, on a socket intended to + * supply pppd with outgoing packets so it can do dial-on-demand and + * hangup-on-lack-of-demand; incoming packets are filtered out so they + * don't cause pppd to hold the connection up (you don't want random + * input packets such as port scans, packets from old lost connections, + * etc. to force the connection to stay up). + * + * The first byte of the PPP header (0xff03) is modified to accomodate + * the direction - 0x00 = IN, 0x01 = OUT. + */ +#define DLT_PPP_PPPD 166 + +/* + * Names for backwards compatibility with older versions of some PPP + * software; new software should use DLT_PPP_PPPD. + */ +#define DLT_PPP_WITH_DIRECTION DLT_PPP_PPPD +#define DLT_LINUX_PPP_WITHDIRECTION DLT_PPP_PPPD + +/* + * Juniper-private data link type, as per request from + * Hannes Gredler . The DLT_s are used + * for passing on chassis-internal metainformation such as + * QOS profiles, cookies, etc.. + */ +#define DLT_JUNIPER_PPPOE 167 +#define DLT_JUNIPER_PPPOE_ATM 168 + +#define DLT_GPRS_LLC 169 /* GPRS LLC */ +#define DLT_GPF_T 170 /* GPF-T (ITU-T G.7041/Y.1303) */ +#define DLT_GPF_F 171 /* GPF-F (ITU-T G.7041/Y.1303) */ + +/* + * Requested by Oolan Zimmer for use in Gcom's T1/E1 line + * monitoring equipment. + */ +#define DLT_GCOM_T1E1 172 +#define DLT_GCOM_SERIAL 173 + +/* + * Juniper-private data link type, as per request from + * Hannes Gredler . The DLT_ is used + * for internal communication to Physical Interface Cards (PIC) + */ +#define DLT_JUNIPER_PIC_PEER 174 + +/* + * Link types requested by Gregor Maier of Endace + * Measurement Systems. They add an ERF header (see + * http://www.endace.com/support/EndaceRecordFormat.pdf) in front of + * the link-layer header. + */ +#define DLT_ERF_ETH 175 /* Ethernet */ +#define DLT_ERF_POS 176 /* Packet-over-SONET */ + +/* + * Requested by Daniele Orlandi for raw LAPD + * for vISDN (http://www.orlandi.com/visdn/). Its link-layer header + * includes additional information before the LAPD header, so it's + * not necessarily a generic LAPD header. + */ +#define DLT_LINUX_LAPD 177 + +/* + * Juniper-private data link type, as per request from + * Hannes Gredler . + * The DLT_ are used for prepending meta-information + * like interface index, interface name + * before standard Ethernet, PPP, Frelay & C-HDLC Frames + */ +#define DLT_JUNIPER_ETHER 178 +#define DLT_JUNIPER_PPP 179 +#define DLT_JUNIPER_FRELAY 180 +#define DLT_JUNIPER_CHDLC 181 + +/* + * Multi Link Frame Relay (FRF.16) + */ +#define DLT_MFR 182 + +/* + * Juniper-private data link type, as per request from + * Hannes Gredler . + * The DLT_ is used for internal communication with a + * voice Adapter Card (PIC) + */ +#define DLT_JUNIPER_VP 183 + +/* + * Arinc 429 frames. + * DLT_ requested by Gianluca Varenni . + * Every frame contains a 32bit A429 label. + * More documentation on Arinc 429 can be found at + * http://www.condoreng.com/support/downloads/tutorials/ARINCTutorial.pdf + */ +#define DLT_A429 184 + +/* + * Arinc 653 Interpartition Communication messages. + * DLT_ requested by Gianluca Varenni . + * Please refer to the A653-1 standard for more information. + */ +#define DLT_A653_ICM 185 + +/* + * USB packets, beginning with a USB setup header; requested by + * Paolo Abeni . + */ +#define DLT_USB 186 + +/* + * Bluetooth HCI UART transport layer (part H:4); requested by + * Paolo Abeni. + */ +#define DLT_BLUETOOTH_HCI_H4 187 + +/* + * IEEE 802.16 MAC Common Part Sublayer; requested by Maria Cruz + * . + */ +#define DLT_IEEE802_16_MAC_CPS 188 + +/* + * USB packets, beginning with a Linux USB header; requested by + * Paolo Abeni . + */ +#define DLT_USB_LINUX 189 + +/* + * Controller Area Network (CAN) v. 2.0B packets. + * DLT_ requested by Gianluca Varenni . + * Used to dump CAN packets coming from a CAN Vector board. + * More documentation on the CAN v2.0B frames can be found at + * http://www.can-cia.org/downloads/?269 + */ +#define DLT_CAN20B 190 + +/* + * IEEE 802.15.4, with address fields padded, as is done by Linux + * drivers; requested by Juergen Schimmer. + */ +#define DLT_IEEE802_15_4_LINUX 191 + +/* + * Per Packet Information encapsulated packets. + * DLT_ requested by Gianluca Varenni . + */ +#define DLT_PPI 192 + +/* + * Header for 802.16 MAC Common Part Sublayer plus a radiotap radio header; + * requested by Charles Clancy. + */ +#define DLT_IEEE802_16_MAC_CPS_RADIO 193 + +/* + * Juniper-private data link type, as per request from + * Hannes Gredler . + * The DLT_ is used for internal communication with a + * integrated service module (ISM). + */ +#define DLT_JUNIPER_ISM 194 + +/* + * IEEE 802.15.4, exactly as it appears in the spec (no padding, no + * nothing); requested by Mikko Saarnivala . + */ +#define DLT_IEEE802_15_4 195 + +/* + * Various link-layer types, with a pseudo-header, for SITA + * (http://www.sita.aero/); requested by Fulko Hew (fulko.hew@gmail.com). + */ +#define DLT_SITA 196 + +/* + * Various link-layer types, with a pseudo-header, for Endace DAG cards; + * encapsulates Endace ERF records. Requested by Stephen Donnelly + * . + */ +#define DLT_ERF 197 + +/* + * Special header prepended to Ethernet packets when capturing from a + * u10 Networks board. Requested by Phil Mulholland + * . + */ +#define DLT_RAIF1 198 + +/* + * IPMB packet for IPMI, beginning with the I2C slave address, followed + * by the netFn and LUN, etc.. Requested by Chanthy Toeung + * . + */ +#define DLT_IPMB 199 + +/* + * Juniper-private data link type, as per request from + * Hannes Gredler . + * The DLT_ is used for capturing data on a secure tunnel interface. + */ +#define DLT_JUNIPER_ST 200 + +/* + * Bluetooth HCI UART transport layer (part H:4), with pseudo-header + * that includes direction information; requested by Paolo Abeni. + */ +#define DLT_BLUETOOTH_HCI_H4_WITH_PHDR 201 + +/* + * AX.25 packet with a 1-byte KISS header; see + * + * http://www.ax25.net/kiss.htm + * + * as per Richard Stearn . + */ +#define DLT_AX25_KISS 202 + +/* + * LAPD packets from an ISDN channel, starting with the address field, + * with no pseudo-header. + * Requested by Varuna De Silva . + */ +#define DLT_LAPD 203 + +/* + * Variants of various link-layer headers, with a one-byte direction + * pseudo-header prepended - zero means "received by this host", + * non-zero (any non-zero value) means "sent by this host" - as per + * Will Barker . + */ +#define DLT_PPP_WITH_DIR 204 /* PPP - don't confuse with DLT_PPP_WITH_DIRECTION */ +#define DLT_C_HDLC_WITH_DIR 205 /* Cisco HDLC */ +#define DLT_FRELAY_WITH_DIR 206 /* Frame Relay */ +#define DLT_LAPB_WITH_DIR 207 /* LAPB */ + +/* + * 208 is reserved for an as-yet-unspecified proprietary link-layer + * type, as requested by Will Barker. + */ + +/* + * IPMB with a Linux-specific pseudo-header; as requested by Alexey Neyman + * . + */ +#define DLT_IPMB_LINUX 209 + +/* + * FlexRay automotive bus - http://www.flexray.com/ - as requested + * by Hannes Kaelber . + */ +#define DLT_FLEXRAY 210 + +/* + * Media Oriented Systems Transport (MOST) bus for multimedia + * transport - http://www.mostcooperation.com/ - as requested + * by Hannes Kaelber . + */ +#define DLT_MOST 211 + +/* + * Local Interconnect Network (LIN) bus for vehicle networks - + * http://www.lin-subbus.org/ - as requested by Hannes Kaelber + * . + */ +#define DLT_LIN 212 + +/* + * X2E-private data link type used for serial line capture, + * as requested by Hannes Kaelber . + */ +#define DLT_X2E_SERIAL 213 + +/* + * X2E-private data link type used for the Xoraya data logger + * family, as requested by Hannes Kaelber . + */ +#define DLT_X2E_XORAYA 214 + +/* + * IEEE 802.15.4, exactly as it appears in the spec (no padding, no + * nothing), but with the PHY-level data for non-ASK PHYs (4 octets + * of 0 as preamble, one octet of SFD, one octet of frame length+ + * reserved bit, and then the MAC-layer data, starting with the + * frame control field). + * + * Requested by Max Filippov . + */ +#define DLT_IEEE802_15_4_NONASK_PHY 215 + +/* + * David Gibson requested this for + * captures from the Linux kernel /dev/input/eventN devices. This + * is used to communicate keystrokes and mouse movements from the + * Linux kernel to display systems, such as Xorg. + */ +#define DLT_LINUX_EVDEV 216 + +/* + * GSM Um and Abis interfaces, preceded by a "gsmtap" header. + * + * Requested by Harald Welte . + */ +#define DLT_GSMTAP_UM 217 +#define DLT_GSMTAP_ABIS 218 + +/* + * MPLS, with an MPLS label as the link-layer header. + * Requested by Michele Marchetto on behalf + * of OpenBSD. + */ +#define DLT_MPLS 219 + +/* + * USB packets, beginning with a Linux USB header, with the USB header + * padded to 64 bytes; required for memory-mapped access. + */ +#define DLT_USB_LINUX_MMAPPED 220 + +/* + * DECT packets, with a pseudo-header; requested by + * Matthias Wenzel . + */ +#define DLT_DECT 221 + +/* + * From: "Lidwa, Eric (GSFC-582.0)[SGT INC]" + * Date: Mon, 11 May 2009 11:18:30 -0500 + * + * DLT_AOS. We need it for AOS Space Data Link Protocol. + * I have already written dissectors for but need an OK from + * legal before I can submit a patch. + * + */ +#define DLT_AOS 222 + +/* + * Wireless HART (Highway Addressable Remote Transducer) + * From the HART Communication Foundation + * IES/PAS 62591 + * + * Requested by Sam Roberts . + */ +#define DLT_WIHART 223 + +/* + * Fibre Channel FC-2 frames, beginning with a Frame_Header. + * Requested by Kahou Lei . + */ +#define DLT_FC_2 224 + +/* + * Fibre Channel FC-2 frames, beginning with an encoding of the + * SOF, and ending with an encoding of the EOF. + * + * The encodings represent the frame delimiters as 4-byte sequences + * representing the corresponding ordered sets, with K28.5 + * represented as 0xBC, and the D symbols as the corresponding + * byte values; for example, SOFi2, which is K28.5 - D21.5 - D1.2 - D21.2, + * is represented as 0xBC 0xB5 0x55 0x55. + * + * Requested by Kahou Lei . + */ +#define DLT_FC_2_WITH_FRAME_DELIMS 225 + +/* + * Solaris ipnet pseudo-header; requested by Darren Reed . + * + * The pseudo-header starts with a one-byte version number; for version 2, + * the pseudo-header is: + * + * struct dl_ipnetinfo { + * u_int8_t dli_version; + * u_int8_t dli_family; + * u_int16_t dli_htype; + * u_int32_t dli_pktlen; + * u_int32_t dli_ifindex; + * u_int32_t dli_grifindex; + * u_int32_t dli_zsrc; + * u_int32_t dli_zdst; + * }; + * + * dli_version is 2 for the current version of the pseudo-header. + * + * dli_family is a Solaris address family value, so it's 2 for IPv4 + * and 26 for IPv6. + * + * dli_htype is a "hook type" - 0 for incoming packets, 1 for outgoing + * packets, and 2 for packets arriving from another zone on the same + * machine. + * + * dli_pktlen is the length of the packet data following the pseudo-header + * (so the captured length minus dli_pktlen is the length of the + * pseudo-header, assuming the entire pseudo-header was captured). + * + * dli_ifindex is the interface index of the interface on which the + * packet arrived. + * + * dli_grifindex is the group interface index number (for IPMP interfaces). + * + * dli_zsrc is the zone identifier for the source of the packet. + * + * dli_zdst is the zone identifier for the destination of the packet. + * + * A zone number of 0 is the global zone; a zone number of 0xffffffff + * means that the packet arrived from another host on the network, not + * from another zone on the same machine. + * + * An IPv4 or IPv6 datagram follows the pseudo-header; dli_family indicates + * which of those it is. + */ +#define DLT_IPNET 226 + +/* + * CAN (Controller Area Network) frames, with a pseudo-header as supplied + * by Linux SocketCAN. See Documentation/networking/can.txt in the Linux + * source. + * + * Requested by Felix Obenhuber . + */ +#define DLT_CAN_SOCKETCAN 227 + +/* + * Raw IPv4/IPv6; different from DLT_RAW in that the DLT_ value specifies + * whether it's v4 or v6. Requested by Darren Reed . + */ +#define DLT_IPV4 228 +#define DLT_IPV6 229 + +/* + * IEEE 802.15.4, exactly as it appears in the spec (no padding, no + * nothing), and with no FCS at the end of the frame; requested by + * Jon Smirl . + */ +#define DLT_IEEE802_15_4_NOFCS 230 + +/* + * Raw D-Bus: + * + * http://www.freedesktop.org/wiki/Software/dbus + * + * messages: + * + * http://dbus.freedesktop.org/doc/dbus-specification.html#message-protocol-messages + * + * starting with the endianness flag, followed by the message type, etc., + * but without the authentication handshake before the message sequence: + * + * http://dbus.freedesktop.org/doc/dbus-specification.html#auth-protocol + * + * Requested by Martin Vidner . + */ +#define DLT_DBUS 231 + +/* + * Juniper-private data link type, as per request from + * Hannes Gredler . + */ +#define DLT_JUNIPER_VS 232 +#define DLT_JUNIPER_SRX_E2E 233 +#define DLT_JUNIPER_FIBRECHANNEL 234 + +/* + * DVB-CI (DVB Common Interface for communication between a PC Card + * module and a DVB receiver). See + * + * http://www.kaiser.cx/pcap-dvbci.html + * + * for the specification. + * + * Requested by Martin Kaiser . + */ +#define DLT_DVB_CI 235 + +/* + * Variant of 3GPP TS 27.010 multiplexing protocol (similar to, but + * *not* the same as, 27.010). Requested by Hans-Christoph Schemmel + * . + */ +#define DLT_MUX27010 236 + +/* + * STANAG 5066 D_PDUs. Requested by M. Baris Demiray + * . + */ +#define DLT_STANAG_5066_D_PDU 237 + +/* + * Juniper-private data link type, as per request from + * Hannes Gredler . + */ +#define DLT_JUNIPER_ATM_CEMIC 238 + +/* + * NetFilter LOG messages + * (payload of netlink NFNL_SUBSYS_ULOG/NFULNL_MSG_PACKET packets) + * + * Requested by Jakub Zawadzki + */ +#define DLT_NFLOG 239 + +/* + * Hilscher Gesellschaft fuer Systemautomation mbH link-layer type + * for Ethernet packets with a 4-byte pseudo-header and always + * with the payload including the FCS, as supplied by their + * netANALYZER hardware and software. + * + * Requested by Holger P. Frommer + */ +#define DLT_NETANALYZER 240 + +/* + * Hilscher Gesellschaft fuer Systemautomation mbH link-layer type + * for Ethernet packets with a 4-byte pseudo-header and FCS and + * with the Ethernet header preceded by 7 bytes of preamble and + * 1 byte of SFD, as supplied by their netANALYZER hardware and + * software. + * + * Requested by Holger P. Frommer + */ +#define DLT_NETANALYZER_TRANSPARENT 241 + +/* + * IP-over-Infiniband, as specified by RFC 4391. + * + * Requested by Petr Sumbera . + */ +#define DLT_IPOIB 242 + +/* + * MPEG-2 transport stream (ISO 13818-1/ITU-T H.222.0). + * + * Requested by Guy Martin . + */ +#define DLT_MPEG_2_TS 243 + +/* + * ng4T GmbH's UMTS Iub/Iur-over-ATM and Iub/Iur-over-IP format as + * used by their ng40 protocol tester. + * + * Requested by Jens Grimmer . + */ +#define DLT_NG40 244 + +/* + * Pseudo-header giving adapter number and flags, followed by an NFC + * (Near-Field Communications) Logical Link Control Protocol (LLCP) PDU, + * as specified by NFC Forum Logical Link Control Protocol Technical + * Specification LLCP 1.1. + * + * Requested by Mike Wakerly . + */ +#define DLT_NFC_LLCP 245 + + +#define DLT_MATCHING_MAX 245 /* highest value in the "matching" range */ /* * The instruction encodings. @@ -569,6 +1375,8 @@ extern void bpfattach(ifnet_t interface, u_int data_link_type, @param data_link_type The data link type of the interface. See the DLT_* defines in bpf.h. @param header_length The length, in bytes, of the data link header. + @param send See the bpf_send_func described above. + @param tap See the bpf_tap_func described above. */ extern errno_t bpf_attach(ifnet_t interface, u_int32_t data_link_type, u_int32_t header_length, bpf_send_func send, bpf_tap_func tap); diff --git a/bsd/net/bpf_filter.c b/bsd/net/bpf_filter.c index 3ec0f2866..47e26fc19 100644 --- a/bsd/net/bpf_filter.c +++ b/bsd/net/bpf_filter.c @@ -407,10 +407,14 @@ bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen) continue; case BPF_ST: + if (pc->k >= BPF_MEMWORDS) + return 0; mem[pc->k] = A; continue; case BPF_STX: + if (pc->k >= BPF_MEMWORDS) + return 0; mem[pc->k] = X; continue; diff --git a/bsd/net/bpfdesc.h b/bsd/net/bpfdesc.h index d96300bc6..8cf632799 100644 --- a/bsd/net/bpfdesc.h +++ b/bsd/net/bpfdesc.h @@ -99,6 +99,7 @@ struct bpf_d { int bd_hlen; /* current length of hold buffer */ int bd_bufsize; /* absolute length of buffers */ + int bd_hbuf_read; /* reading from hbuf */ struct bpf_if *bd_bif; /* interface descriptor */ u_int32_t bd_rtout; /* Read timeout in 'ticks' */ diff --git a/bsd/net/bridgestp.c b/bsd/net/bridgestp.c index c7fc659a7..10d86fa4a 100644 --- a/bsd/net/bridgestp.c +++ b/bsd/net/bridgestp.c @@ -1,7 +1,7 @@ /* $NetBSD: bridgestp.c,v 1.5 2003/11/28 08:56:48 keihan Exp $ */ /* - * Copyright (c) 2009-2010 Apple Inc. All rights reserved. + * Copyright (c) 2009-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -310,7 +310,7 @@ bstp_transmit_tcn(struct bstp_state *bs, struct bstp_port *bp) eh = mtod(m, struct ether_header *); - memcpy(eh->ether_shost, ifnet_lladdr(ifp), ETHER_ADDR_LEN); + memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); memcpy(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN); eh->ether_type = htons(sizeof(bpdu)); @@ -424,7 +424,7 @@ bstp_send_bpdu(struct bstp_state *bs, struct bstp_port *bp, bpdu->cbu_ctl = LLC_UI; bpdu->cbu_protoid = htons(BSTP_PROTO_ID); - memcpy(eh->ether_shost, ifnet_lladdr(ifp), ETHER_ADDR_LEN); + memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); memcpy(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN); switch (bpdu->cbu_bpdutype) { @@ -2114,14 +2114,14 @@ bstp_reinit(struct bstp_state *bs) if (ifp->if_type != IFT_ETHER) continue; - if (bstp_addr_cmp(ifnet_lladdr(ifp), llzero) == 0) + if (bstp_addr_cmp(IF_LLADDR(ifp), llzero) == 0) continue; if (mif == NULL) { mif = ifp; continue; } - if (bstp_addr_cmp(ifnet_lladdr(ifp), ifnet_lladdr(mif)) < 0) { + if (bstp_addr_cmp(IF_LLADDR(ifp), IF_LLADDR(mif)) < 0) { mif = ifp; continue; } @@ -2143,7 +2143,7 @@ bstp_reinit(struct bstp_state *bs) return; } - e_addr = ifnet_lladdr(mif); + e_addr = IF_LLADDR(mif); bs->bs_bridge_pv.pv_dbridge_id = (((uint64_t)bs->bs_bridge_priority) << 48) | (((uint64_t)e_addr[0]) << 40) | diff --git a/bsd/net/bsd_comp.c b/bsd/net/bsd_comp.c deleted file mode 100644 index 3dd6734c0..000000000 --- a/bsd/net/bsd_comp.c +++ /dev/null @@ -1,1153 +0,0 @@ -/* - * Copyright (c) 2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Because this code is derived from the 4.3BSD compress source: - * - * - * Copyright (c) 1985, 1986 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * James A. Woods, derived from original work by Spencer Thomas - * and Joseph Orost. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* - * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce - * support for mandatory and extensible security protections. This notice - * is included in support of clause 2.2 (b) of the Apple Public License, - * Version 2.0. - */ - -/* - * This version is for use with mbufs on BSD-derived systems. - * - */ - -#include -#include -#include -#include -#include - -#define PACKETPTR struct mbuf * -#include - -#if CONFIG_MACF_NET -#include -#endif /* MAC_NET */ - -#if DO_BSD_COMPRESS -/* - * PPP "BSD compress" compression - * The differences between this compression and the classic BSD LZW - * source are obvious from the requirement that the classic code worked - * with files while this handles arbitrarily long streams that - * are broken into packets. They are: - * - * When the code size expands, a block of junk is not emitted by - * the compressor and not expected by the decompressor. - * - * New codes are not necessarily assigned every time an old - * code is output by the compressor. This is because a packet - * end forces a code to be emitted, but does not imply that a - * new sequence has been seen. - * - * The compression ratio is checked at the first end of a packet - * after the appropriate gap. Besides simplifying and speeding - * things up, this makes it more likely that the transmitter - * and receiver will agree when the dictionary is cleared when - * compression is not going well. - */ - -/* - * A dictionary for doing BSD compress. - */ -struct bsd_db { - int totlen; /* length of this structure */ - u_int hsize; /* size of the hash table */ - u_char hshift; /* used in hash function */ - u_char n_bits; /* current bits/code */ - u_char maxbits; - u_char debug; - u_char unit; - u_int16_t seqno; /* sequence # of next packet */ - u_int hdrlen; /* header length to preallocate */ - u_int mru; - u_int maxmaxcode; /* largest valid code */ - u_int max_ent; /* largest code in use */ - u_int in_count; /* uncompressed bytes, aged */ - u_int bytes_out; /* compressed bytes, aged */ - u_int ratio; /* recent compression ratio */ - u_int checkpoint; /* when to next check the ratio */ - u_int clear_count; /* times dictionary cleared */ - u_int incomp_count; /* incompressible packets */ - u_int incomp_bytes; /* incompressible bytes */ - u_int uncomp_count; /* uncompressed packets */ - u_int uncomp_bytes; /* uncompressed bytes */ - u_int comp_count; /* compressed packets */ - u_int comp_bytes; /* compressed bytes */ - u_int16_t *lens; /* array of lengths of codes */ - struct bsd_dict { - union { /* hash value */ - u_int32_t fcode; - struct { -#if BYTE_ORDER == LITTLE_ENDIAN - u_int16_t prefix; /* preceding code */ - u_char suffix; /* last character of new code */ - u_char pad; -#else - u_char pad; - u_char suffix; /* last character of new code */ - u_int16_t prefix; /* preceding code */ -#endif - } hs; - } f; - u_int16_t codem1; /* output of hash table -1 */ - u_int16_t cptr; /* map code to hash table entry */ - } dict[1]; -}; - -#define BSD_OVHD 2 /* BSD compress overhead/packet */ -#define BSD_INIT_BITS BSD_MIN_BITS - -static void bsd_clear(struct bsd_db *db); -static int bsd_check(struct bsd_db *db); -static void *bsd_alloc(u_char *options, int opt_len, int decomp); -static int bsd_init_comp_db(struct bsd_db *db, u_char *options, - int opt_len, - int unit, int hdrlen, int mru, int debug, - int decomp); -static void *bsd_comp_alloc(u_char *options, int opt_len); -static void *bsd_decomp_alloc(u_char *options, int opt_len); -static void bsd_free(void *state); -static int bsd_comp_init(void *state, u_char *options, int opt_len, - int unit, int hdrlen, int debug); -static int bsd_decomp_init(void *state, u_char *options, int opt_len, - int unit, int hdrlen, int mru, int debug); -static int bsd_compress(void *state, struct mbuf **mret, - struct mbuf *mp, int slen, int maxolen); -static void bsd_incomp(void *state, struct mbuf *dmsg); -static int bsd_decompress(void *state, struct mbuf *cmp, - struct mbuf **dmpp); -static void bsd_reset(void *state); -static void bsd_comp_stats(void *state, struct compstat *stats); - -/* - * Procedures exported to if_ppp.c. - */ -struct compressor ppp_bsd_compress = { - CI_BSD_COMPRESS, /* compress_proto */ - bsd_comp_alloc, /* comp_alloc */ - bsd_free, /* comp_free */ - bsd_comp_init, /* comp_init */ - bsd_reset, /* comp_reset */ - bsd_compress, /* compress */ - bsd_comp_stats, /* comp_stat */ - bsd_decomp_alloc, /* decomp_alloc */ - bsd_free, /* decomp_free */ - bsd_decomp_init, /* decomp_init */ - bsd_reset, /* decomp_reset */ - bsd_decompress, /* decompress */ - bsd_incomp, /* incomp */ - bsd_comp_stats, /* decomp_stat */ -}; - -/* - * the next two codes should not be changed lightly, as they must not - * lie within the contiguous general code space. - */ -#define CLEAR 256 /* table clear output code */ -#define FIRST 257 /* first free entry */ -#define LAST 255 - -#define MAXCODE(b) ((1 << (b)) - 1) -#define BADCODEM1 MAXCODE(BSD_MAX_BITS) - -#define BSD_HASH(prefix,suffix,hshift) ((((u_int32_t)(suffix)) << (hshift)) \ - ^ (u_int32_t)(prefix)) -#define BSD_KEY(prefix,suffix) ((((u_int32_t)(suffix)) << 16) \ - + (u_int32_t)(prefix)) - -#define CHECK_GAP 10000 /* Ratio check interval */ - -#define RATIO_SCALE_LOG 8 -#define RATIO_SCALE (1<>RATIO_SCALE_LOG) - -/* - * clear the dictionary - */ -static void -bsd_clear(db) - struct bsd_db *db; -{ - db->clear_count++; - db->max_ent = FIRST-1; - db->n_bits = BSD_INIT_BITS; - db->ratio = 0; - db->bytes_out = 0; - db->in_count = 0; - db->checkpoint = CHECK_GAP; -} - -/* - * If the dictionary is full, then see if it is time to reset it. - * - * Compute the compression ratio using fixed-point arithmetic - * with 8 fractional bits. - * - * Since we have an infinite stream instead of a single file, - * watch only the local compression ratio. - * - * Since both peers must reset the dictionary at the same time even in - * the absence of CLEAR codes (while packets are incompressible), they - * must compute the same ratio. - */ -static int /* 1=output CLEAR */ -bsd_check(db) - struct bsd_db *db; -{ - u_int new_ratio; - - if (db->in_count >= db->checkpoint) { - /* age the ratio by limiting the size of the counts */ - if (db->in_count >= RATIO_MAX - || db->bytes_out >= RATIO_MAX) { - db->in_count -= db->in_count/4; - db->bytes_out -= db->bytes_out/4; - } - - db->checkpoint = db->in_count + CHECK_GAP; - - if (db->max_ent >= db->maxmaxcode) { - /* Reset the dictionary only if the ratio is worse, - * or if it looks as if it has been poisoned - * by incompressible data. - * - * This does not overflow, because - * db->in_count <= RATIO_MAX. - */ - new_ratio = db->in_count << RATIO_SCALE_LOG; - if (db->bytes_out != 0) - new_ratio /= db->bytes_out; - - if (new_ratio < db->ratio || new_ratio < 1 * RATIO_SCALE) { - bsd_clear(db); - return 1; - } - db->ratio = new_ratio; - } - } - return 0; -} - -/* - * Return statistics. - */ -static void -bsd_comp_stats(state, stats) - void *state; - struct compstat *stats; -{ - struct bsd_db *db = (struct bsd_db *) state; - u_int out; - - stats->unc_bytes = db->uncomp_bytes; - stats->unc_packets = db->uncomp_count; - stats->comp_bytes = db->comp_bytes; - stats->comp_packets = db->comp_count; - stats->inc_bytes = db->incomp_bytes; - stats->inc_packets = db->incomp_count; - stats->ratio = db->in_count; - out = db->bytes_out; - if (stats->ratio <= 0x7fffff) - stats->ratio <<= 8; - else - out >>= 8; - if (out != 0) - stats->ratio /= out; -} - -/* - * Reset state, as on a CCP ResetReq. - */ -static void -bsd_reset(state) - void *state; -{ - struct bsd_db *db = (struct bsd_db *) state; - - db->seqno = 0; - bsd_clear(db); - db->clear_count = 0; -} - -/* - * Allocate space for a (de) compressor. - */ -static void * -bsd_alloc(options, opt_len, decomp) - u_char *options; - int opt_len, decomp; -{ - int bits; - u_int newlen, hsize, hshift, maxmaxcode; - struct bsd_db *db; - - if (opt_len < CILEN_BSD_COMPRESS || options[0] != CI_BSD_COMPRESS - || options[1] != CILEN_BSD_COMPRESS - || BSD_VERSION(options[2]) != BSD_CURRENT_VERSION) - return NULL; - bits = BSD_NBITS(options[2]); - switch (bits) { - case 9: /* needs 82152 for both directions */ - case 10: /* needs 84144 */ - case 11: /* needs 88240 */ - case 12: /* needs 96432 */ - hsize = 5003; - hshift = 4; - break; - case 13: /* needs 176784 */ - hsize = 9001; - hshift = 5; - break; - case 14: /* needs 353744 */ - hsize = 18013; - hshift = 6; - break; - case 15: /* needs 691440 */ - hsize = 35023; - hshift = 7; - break; - case 16: /* needs 1366160--far too much, */ - /* hsize = 69001; */ /* and 69001 is too big for cptr */ - /* hshift = 8; */ /* in struct bsd_db */ - /* break; */ - default: - return NULL; - } - - maxmaxcode = MAXCODE(bits); - newlen = sizeof(*db) + (hsize-1) * (sizeof(db->dict[0])); - MALLOC(db, struct bsd_db *, newlen, M_DEVBUF, M_NOWAIT); - if (!db) - return NULL; - bzero(db, sizeof(*db) - sizeof(db->dict)); - - if (!decomp) { - db->lens = NULL; - } else { - MALLOC(db->lens, u_int16_t *, (maxmaxcode+1) * sizeof(db->lens[0]), - M_DEVBUF, M_NOWAIT); - if (!db->lens) { - FREE(db, M_DEVBUF); - return NULL; - } - } - - db->totlen = newlen; - db->hsize = hsize; - db->hshift = hshift; - db->maxmaxcode = maxmaxcode; - db->maxbits = bits; - - return (void *) db; -} - -static void -bsd_free(state) - void *state; -{ - struct bsd_db *db = (struct bsd_db *) state; - - if (db->lens) - FREE(db->lens, M_DEVBUF); - FREE(db, M_DEVBUF); -} - -static void * -bsd_comp_alloc(options, opt_len) - u_char *options; - int opt_len; -{ - return bsd_alloc(options, opt_len, 0); -} - -static void * -bsd_decomp_alloc(options, opt_len) - u_char *options; - int opt_len; -{ - return bsd_alloc(options, opt_len, 1); -} - -/* - * Initialize the database. - */ -static int -bsd_init_comp_db(db, options, opt_len, unit, hdrlen, mru, debug, decomp) - struct bsd_db *db; - u_char *options; - int opt_len, unit, hdrlen, mru, debug, decomp; -{ - int i; - - if (opt_len < CILEN_BSD_COMPRESS || options[0] != CI_BSD_COMPRESS - || options[1] != CILEN_BSD_COMPRESS - || BSD_VERSION(options[2]) != BSD_CURRENT_VERSION - || BSD_NBITS(options[2]) != db->maxbits - || (decomp && db->lens == NULL)) - return 0; - - if (decomp) { - i = LAST+1; - while (i != 0) - db->lens[--i] = 1; - } - i = db->hsize; - while (i != 0) { - db->dict[--i].codem1 = BADCODEM1; - db->dict[i].cptr = 0; - } - - db->unit = unit; - db->hdrlen = hdrlen; - db->mru = mru; -#if !DEBUG - if (debug) -#endif - db->debug = 1; - - bsd_reset(db); - - return 1; -} - -static int -bsd_comp_init(state, options, opt_len, unit, hdrlen, debug) - void *state; - u_char *options; - int opt_len, unit, hdrlen, debug; -{ - return bsd_init_comp_db((struct bsd_db *) state, options, opt_len, - unit, hdrlen, 0, debug, 0); -} - -static int -bsd_decomp_init(state, options, opt_len, unit, hdrlen, mru, debug) - void *state; - u_char *options; - int opt_len, unit, hdrlen, mru, debug; -{ - return bsd_init_comp_db((struct bsd_db *) state, options, opt_len, - unit, hdrlen, mru, debug, 1); -} - - -/* - * compress a packet - * One change from the BSD compress command is that when the - * code size expands, we do not output a bunch of padding. - */ -int /* new slen */ -bsd_compress(state, mret, mp, slen, maxolen) - void *state; - struct mbuf **mret; /* return compressed mbuf chain here */ - struct mbuf *mp; /* from here */ - int slen; /* uncompressed length */ - int maxolen; /* max compressed length */ -{ - struct bsd_db *db = (struct bsd_db *) state; - int hshift = db->hshift; - u_int max_ent = db->max_ent; - u_int n_bits = db->n_bits; - u_int bitno = 32; - u_int32_t accm = 0, fcode; - struct bsd_dict *dictp; - u_char c; - int hval, disp, ent, ilen; - u_char *rptr, *wptr; - u_char *cp_end; - int olen; - struct mbuf *m; - -#define PUTBYTE(v) { \ - ++olen; \ - if (wptr) { \ - *wptr++ = (v); \ - if (wptr >= cp_end) { \ - m->m_len = wptr - mtod(m, u_char *); \ - MGET(m->m_next, M_DONTWAIT, MT_DATA); \ - m = m->m_next; \ - if (m) { \ - m->m_len = 0; \ - if (maxolen - olen > MLEN) \ - MCLGET(m, M_DONTWAIT); \ - wptr = mtod(m, u_char *); \ - cp_end = wptr + M_TRAILINGSPACE(m); \ - } else \ - wptr = NULL; \ - } \ - } \ -} - -#define OUTPUT(ent) { \ - bitno -= n_bits; \ - accm |= ((ent) << bitno); \ - do { \ - PUTBYTE(accm >> 24); \ - accm <<= 8; \ - bitno += 8; \ - } while (bitno <= 24); \ -} - - /* - * If the protocol is not in the range we're interested in, - * just return without compressing the packet. If it is, - * the protocol becomes the first byte to compress. - */ - rptr = mtod(mp, u_char *); - ent = PPP_PROTOCOL(rptr); - if (ent < 0x21 || ent > 0xf9) { - *mret = NULL; - return slen; - } - - /* Don't generate compressed packets which are larger than - the uncompressed packet. */ - if (maxolen > slen) - maxolen = slen; - - /* Allocate one mbuf to start with. */ - MGET(m, M_DONTWAIT, MT_DATA); - *mret = m; - if (m != NULL) { - m->m_len = 0; - if (maxolen + db->hdrlen > MLEN) - MCLGET(m, M_DONTWAIT); - m->m_data += db->hdrlen; - wptr = mtod(m, u_char *); - cp_end = wptr + M_TRAILINGSPACE(m); - } else - wptr = cp_end = NULL; - - /* - * Copy the PPP header over, changing the protocol, - * and install the 2-byte packet sequence number. - */ - if (wptr) { - *wptr++ = PPP_ADDRESS(rptr); /* assumes the ppp header is */ - *wptr++ = PPP_CONTROL(rptr); /* all in one mbuf */ - *wptr++ = 0; /* change the protocol */ - *wptr++ = PPP_COMP; - *wptr++ = db->seqno >> 8; - *wptr++ = db->seqno; - } - ++db->seqno; - - olen = 0; - rptr += PPP_HDRLEN; - slen = mp->m_len - PPP_HDRLEN; - ilen = slen + 1; - for (;;) { - if (slen <= 0) { - mp = mp->m_next; - if (!mp) - break; - rptr = mtod(mp, u_char *); - slen = mp->m_len; - if (!slen) - continue; /* handle 0-length buffers */ - ilen += slen; - } - - slen--; - c = *rptr++; - fcode = BSD_KEY(ent, c); - hval = BSD_HASH(ent, c, hshift); - dictp = &db->dict[hval]; - - /* Validate and then check the entry. */ - if (dictp->codem1 >= max_ent) - goto nomatch; - if (dictp->f.fcode == fcode) { - ent = dictp->codem1+1; - continue; /* found (prefix,suffix) */ - } - - /* continue probing until a match or invalid entry */ - disp = (hval == 0) ? 1 : hval; - do { - hval += disp; - if (hval >= db->hsize) - hval -= db->hsize; - dictp = &db->dict[hval]; - if (dictp->codem1 >= max_ent) - goto nomatch; - } while (dictp->f.fcode != fcode); - ent = dictp->codem1 + 1; /* finally found (prefix,suffix) */ - continue; - - nomatch: - OUTPUT(ent); /* output the prefix */ - - /* code -> hashtable */ - if (max_ent < db->maxmaxcode) { - struct bsd_dict *dictp2; - /* expand code size if needed */ - if (max_ent >= MAXCODE(n_bits)) - db->n_bits = ++n_bits; - - /* Invalidate old hash table entry using - * this code, and then take it over. - */ - dictp2 = &db->dict[max_ent+1]; - if (db->dict[dictp2->cptr].codem1 == max_ent) - db->dict[dictp2->cptr].codem1 = BADCODEM1; - dictp2->cptr = hval; - dictp->codem1 = max_ent; - dictp->f.fcode = fcode; - - db->max_ent = ++max_ent; - } - ent = c; - } - - OUTPUT(ent); /* output the last code */ - db->bytes_out += olen; - db->in_count += ilen; - if (bitno < 32) - ++db->bytes_out; /* count complete bytes */ - - if (bsd_check(db)) - OUTPUT(CLEAR); /* do not count the CLEAR */ - - /* - * Pad dribble bits of last code with ones. - * Do not emit a completely useless byte of ones. - */ - if (bitno != 32) - PUTBYTE((accm | (0xff << (bitno-8))) >> 24); - - if (m != NULL) { - m->m_len = wptr - mtod(m, u_char *); - m->m_next = NULL; - } - - /* - * Increase code size if we would have without the packet - * boundary and as the decompressor will. - */ - if (max_ent >= MAXCODE(n_bits) && max_ent < db->maxmaxcode) - db->n_bits++; - - db->uncomp_bytes += ilen; - ++db->uncomp_count; - if (olen + PPP_HDRLEN + BSD_OVHD > maxolen) { - /* throw away the compressed stuff if it is longer than uncompressed */ - if (*mret != NULL) { - m_freem(*mret); - *mret = NULL; - } - ++db->incomp_count; - db->incomp_bytes += ilen; - } else { - ++db->comp_count; - db->comp_bytes += olen + BSD_OVHD; - } - - return olen + PPP_HDRLEN + BSD_OVHD; -#undef OUTPUT -#undef PUTBYTE -} - - -/* - * Update the "BSD Compress" dictionary on the receiver for - * incompressible data by pretending to compress the incoming data. - */ -static void -bsd_incomp(state, dmsg) - void *state; - struct mbuf *dmsg; -{ - struct bsd_db *db = (struct bsd_db *) state; - u_int hshift = db->hshift; - u_int max_ent = db->max_ent; - u_int n_bits = db->n_bits; - struct bsd_dict *dictp; - u_int32_t fcode; - u_char c; - u_int32_t hval, disp; - int slen, ilen; - u_int bitno = 7; - u_char *rptr; - u_int ent; - - /* - * If the protocol is not in the range we're interested in, - * just return without looking at the packet. If it is, - * the protocol becomes the first byte to "compress". - */ - rptr = mtod(dmsg, u_char *); - ent = PPP_PROTOCOL(rptr); - if (ent < 0x21 || ent > 0xf9) - return; - - db->seqno++; - ilen = 1; /* count the protocol as 1 byte */ - rptr += PPP_HDRLEN; - slen = dmsg->m_len - PPP_HDRLEN; - for (;;) { - if (slen <= 0) { - dmsg = dmsg->m_next; - if (!dmsg) - break; - rptr = mtod(dmsg, u_char *); - slen = dmsg->m_len; - continue; - } - ilen += slen; - - do { - c = *rptr++; - fcode = BSD_KEY(ent, c); - hval = BSD_HASH(ent, c, hshift); - dictp = &db->dict[hval]; - - /* validate and then check the entry */ - if (dictp->codem1 >= max_ent) - goto nomatch; - if (dictp->f.fcode == fcode) { - ent = dictp->codem1+1; - continue; /* found (prefix,suffix) */ - } - - /* continue probing until a match or invalid entry */ - disp = (hval == 0) ? 1 : hval; - do { - hval += disp; - if (hval >= db->hsize) - hval -= db->hsize; - dictp = &db->dict[hval]; - if (dictp->codem1 >= max_ent) - goto nomatch; - } while (dictp->f.fcode != fcode); - ent = dictp->codem1+1; - continue; /* finally found (prefix,suffix) */ - - nomatch: /* output (count) the prefix */ - bitno += n_bits; - - /* code -> hashtable */ - if (max_ent < db->maxmaxcode) { - struct bsd_dict *dictp2; - /* expand code size if needed */ - if (max_ent >= MAXCODE(n_bits)) - db->n_bits = ++n_bits; - - /* Invalidate previous hash table entry - * assigned this code, and then take it over. - */ - dictp2 = &db->dict[max_ent+1]; - if (db->dict[dictp2->cptr].codem1 == max_ent) - db->dict[dictp2->cptr].codem1 = BADCODEM1; - dictp2->cptr = hval; - dictp->codem1 = max_ent; - dictp->f.fcode = fcode; - - db->max_ent = ++max_ent; - db->lens[max_ent] = db->lens[ent]+1; - } - ent = c; - } while (--slen != 0); - } - bitno += n_bits; /* output (count) the last code */ - db->bytes_out += bitno/8; - db->in_count += ilen; - (void)bsd_check(db); - - ++db->incomp_count; - db->incomp_bytes += ilen; - ++db->uncomp_count; - db->uncomp_bytes += ilen; - - /* Increase code size if we would have without the packet - * boundary and as the decompressor will. - */ - if (max_ent >= MAXCODE(n_bits) && max_ent < db->maxmaxcode) - db->n_bits++; -} - - -/* - * Decompress "BSD Compress". - * - * Because of patent problems, we return DECOMP_ERROR for errors - * found by inspecting the input data and for system problems, but - * DECOMP_FATALERROR for any errors which could possibly be said to - * be being detected "after" decompression. For DECOMP_ERROR, - * we can issue a CCP reset-request; for DECOMP_FATALERROR, we may be - * infringing a patent of Motorola's if we do, so we take CCP down - * instead. - * - * Given that the frame has the correct sequence number and a good FCS, - * errors such as invalid codes in the input most likely indicate a - * bug, so we return DECOMP_FATALERROR for them in order to turn off - * compression, even though they are detected by inspecting the input. - */ -int -bsd_decompress(state, cmp, dmpp) - void *state; - struct mbuf *cmp, **dmpp; -{ - struct bsd_db *db = (struct bsd_db *) state; - u_int max_ent = db->max_ent; - u_int32_t accm = 0; - u_int bitno = 32; /* 1st valid bit in accm */ - u_int n_bits = db->n_bits; - u_int tgtbitno = 32-n_bits; /* bitno when we have a code */ - struct bsd_dict *dictp; - int explen, i, seq, len; - u_int incode, oldcode, finchar; - u_char *p, *rptr, *wptr; - struct mbuf *m, *dmp, *mret; - int adrs, ctrl, ilen; - int space, codelen, extra; - - /* - * Save the address/control from the PPP header - * and then get the sequence number. - */ - *dmpp = NULL; - rptr = mtod(cmp, u_char *); - adrs = PPP_ADDRESS(rptr); - ctrl = PPP_CONTROL(rptr); - rptr += PPP_HDRLEN; - len = cmp->m_len - PPP_HDRLEN; - seq = 0; - for (i = 0; i < 2; ++i) { - while (len <= 0) { - cmp = cmp->m_next; - if (cmp == NULL) - return DECOMP_ERROR; - rptr = mtod(cmp, u_char *); - len = cmp->m_len; - } - seq = (seq << 8) + *rptr++; - --len; - } - - /* - * Check the sequence number and give up if it differs from - * the value we're expecting. - */ - if (seq != db->seqno) { - if (db->debug) - printf("bsd_decomp%d: bad sequence # %d, expected %d\n", - db->unit, seq, db->seqno - 1); - return DECOMP_ERROR; - } - ++db->seqno; - - /* - * Allocate one mbuf to start with. - */ - MGETHDR(dmp, M_DONTWAIT, MT_DATA); - if (dmp == NULL) - return DECOMP_ERROR; - mret = dmp; - dmp->m_len = 0; - dmp->m_next = NULL; - MCLGET(dmp, M_DONTWAIT); - dmp->m_data += db->hdrlen; - wptr = mtod(dmp, u_char *); - space = M_TRAILINGSPACE(dmp) - PPP_HDRLEN + 1; -#if CONFIG_MACF_NET - mac_mbuf_label_copy(cmp, dmp); -#endif - - /* - * Fill in the ppp header, but not the last byte of the protocol - * (that comes from the decompressed data). - */ - wptr[0] = adrs; - wptr[1] = ctrl; - wptr[2] = 0; - wptr += PPP_HDRLEN - 1; - - ilen = len; - oldcode = CLEAR; - explen = 0; - for (;;) { - if (len == 0) { - cmp = cmp->m_next; - if (!cmp) /* quit at end of message */ - break; - rptr = mtod(cmp, u_char *); - len = cmp->m_len; - ilen += len; - continue; /* handle 0-length buffers */ - } - - /* - * Accumulate bytes until we have a complete code. - * Then get the next code, relying on the 32-bit, - * unsigned accm to mask the result. - */ - bitno -= 8; - accm |= *rptr++ << bitno; - --len; - if (tgtbitno < bitno) - continue; - incode = accm >> tgtbitno; - accm <<= n_bits; - bitno += n_bits; - - if (incode == CLEAR) { - /* - * The dictionary must only be cleared at - * the end of a packet. But there could be an - * empty mbuf at the end. - */ - if (len > 0 || cmp->m_next != NULL) { - while ((cmp = cmp->m_next) != NULL) - len += cmp->m_len; - if (len > 0) { - m_freem(mret); - if (db->debug) - printf("bsd_decomp%d: bad CLEAR\n", db->unit); - return DECOMP_FATALERROR; /* probably a bug */ - } - } - bsd_clear(db); - explen = ilen = 0; - break; - } - - if (incode > max_ent + 2 || incode > db->maxmaxcode - || (incode > max_ent && oldcode == CLEAR)) { - m_freem(mret); - if (db->debug) { - printf("bsd_decomp%d: bad code 0x%x oldcode=0x%x ", - db->unit, incode, oldcode); - printf("max_ent=0x%x explen=%d seqno=%d\n", - max_ent, explen, db->seqno); - } - return DECOMP_FATALERROR; /* probably a bug */ - } - - /* Special case for KwKwK string. */ - if (incode > max_ent) { - finchar = oldcode; - extra = 1; - } else { - finchar = incode; - extra = 0; - } - - codelen = db->lens[finchar]; - explen += codelen + extra; - if (explen > db->mru + 1) { - m_freem(mret); - if (db->debug) { - printf("bsd_decomp%d: ran out of mru\n", db->unit); -#if DEBUG - while ((cmp = cmp->m_next) != NULL) - len += cmp->m_len; - printf(" len=%d, finchar=0x%x, codelen=%d, explen=%d\n", - len, finchar, codelen, explen); -#endif - } - return DECOMP_FATALERROR; - } - - /* - * For simplicity, the decoded characters go in a single mbuf, - * so we allocate a single extra cluster mbuf if necessary. - */ - if ((space -= codelen + extra) < 0) { - dmp->m_len = wptr - mtod(dmp, u_char *); - MGET(m, M_DONTWAIT, MT_DATA); - if (m == NULL) { - m_freem(mret); - return DECOMP_ERROR; - } - m->m_len = 0; - m->m_next = NULL; - dmp->m_next = m; - MCLGET(m, M_DONTWAIT); - space = M_TRAILINGSPACE(m) - (codelen + extra); - if (space < 0) { - /* now that's what I call *compression*. */ - m_freem(mret); - return DECOMP_ERROR; - } - dmp = m; - wptr = mtod(dmp, u_char *); - } - - /* - * Decode this code and install it in the decompressed buffer. - */ - p = (wptr += codelen); - while (finchar > LAST) { - dictp = &db->dict[db->dict[finchar].cptr]; -#if DEBUG - if (--codelen <= 0 || dictp->codem1 != finchar-1) - goto bad; -#endif - *--p = dictp->f.hs.suffix; - finchar = dictp->f.hs.prefix; - } - *--p = finchar; - -#if DEBUG - if (--codelen != 0) - printf("bsd_decomp%d: short by %d after code 0x%x, max_ent=0x%x\n", - db->unit, codelen, incode, max_ent); -#endif - - if (extra) /* the KwKwK case again */ - *wptr++ = finchar; - - /* - * If not first code in a packet, and - * if not out of code space, then allocate a new code. - * - * Keep the hash table correct so it can be used - * with uncompressed packets. - */ - if (oldcode != CLEAR && max_ent < db->maxmaxcode) { - struct bsd_dict *dictp2; - u_int32_t fcode; - u_int32_t hval, disp; - - fcode = BSD_KEY(oldcode,finchar); - hval = BSD_HASH(oldcode,finchar,db->hshift); - dictp = &db->dict[hval]; - - /* look for a free hash table entry */ - if (dictp->codem1 < max_ent) { - disp = (hval == 0) ? 1 : hval; - do { - hval += disp; - if (hval >= db->hsize) - hval -= db->hsize; - dictp = &db->dict[hval]; - } while (dictp->codem1 < max_ent); - } - - /* - * Invalidate previous hash table entry - * assigned this code, and then take it over - */ - dictp2 = &db->dict[max_ent+1]; - if (db->dict[dictp2->cptr].codem1 == max_ent) { - db->dict[dictp2->cptr].codem1 = BADCODEM1; - } - dictp2->cptr = hval; - dictp->codem1 = max_ent; - dictp->f.fcode = fcode; - - db->max_ent = ++max_ent; - db->lens[max_ent] = db->lens[oldcode]+1; - - /* Expand code size if needed. */ - if (max_ent >= MAXCODE(n_bits) && max_ent < db->maxmaxcode) { - db->n_bits = ++n_bits; - tgtbitno = 32-n_bits; - } - } - oldcode = incode; - } - dmp->m_len = wptr - mtod(dmp, u_char *); - - /* - * Keep the checkpoint right so that incompressible packets - * clear the dictionary at the right times. - */ - db->bytes_out += ilen; - db->in_count += explen; - if (bsd_check(db) && db->debug) { - printf("bsd_decomp%d: peer should have cleared dictionary\n", - db->unit); - } - - ++db->comp_count; - db->comp_bytes += ilen + BSD_OVHD; - ++db->uncomp_count; - db->uncomp_bytes += explen; - - *dmpp = mret; - return DECOMP_OK; - -#if DEBUG - bad: - if (codelen <= 0) { - printf("bsd_decomp%d: fell off end of chain ", db->unit); - printf("0x%x at 0x%x by 0x%x, max_ent=0x%x\n", - incode, finchar, db->dict[finchar].cptr, max_ent); - } else if (dictp->codem1 != finchar-1) { - printf("bsd_decomp%d: bad code chain 0x%x finchar=0x%x ", - db->unit, incode, finchar); - printf("oldcode=0x%x cptr=0x%x codem1=0x%x\n", oldcode, - db->dict[finchar].cptr, dictp->codem1); - } - m_freem(mret); - return DECOMP_FATALERROR; -#endif /* DEBUG */ -} -#endif /* DO_BSD_COMPRESS */ diff --git a/bsd/net/classq/Makefile b/bsd/net/classq/Makefile index 9e99d6fbe..1aa7079e2 100644 --- a/bsd/net/classq/Makefile +++ b/bsd/net/classq/Makefile @@ -6,18 +6,6 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_PPC = \ - -INSTINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_PPC = \ - -EXPINC_SUBDIRS_I386 = \ - DATAFILES= \ KERNELFILES= \ diff --git a/bsd/net/classq/classq.c b/bsd/net/classq/classq.c index 92b76007f..3d9d324ae 100644 --- a/bsd/net/classq/classq.c +++ b/bsd/net/classq/classq.c @@ -144,7 +144,7 @@ _getq_flow(class_queue_t *q, u_int32_t flow) MBUFQ_FOREACH_SAFE(m, &q->mbufq, m_tmp) { if (flow == 0 || ((m->m_flags & M_PKTHDR) && - m->m_pkthdr.m_flowhash == flow)) { + m->m_pkthdr.pkt_flowid == flow)) { /* remove it from the class queue */ MBUFQ_REMOVE(&q->mbufq, m); MBUFQ_NEXT(m) = NULL; @@ -331,7 +331,7 @@ _flushq_flow(class_queue_t *q, u_int32_t flow, u_int32_t *cnt, u_int32_t *len) MBUFQ_FOREACH_SAFE(m, &q->mbufq, m_tmp) { if (flow == 0 || ((m->m_flags & M_PKTHDR) && - m->m_pkthdr.m_flowhash == flow)) { + m->m_pkthdr.pkt_flowid == flow)) { /* remove it from the class queue */ MBUFQ_REMOVE(&q->mbufq, m); MBUFQ_NEXT(m) = NULL; diff --git a/bsd/net/classq/classq.h b/bsd/net/classq/classq.h index fa18ae4ca..b9705acb7 100644 --- a/bsd/net/classq/classq.h +++ b/bsd/net/classq/classq.h @@ -169,9 +169,11 @@ extern void _flushq_flow(class_queue_t *, u_int32_t, u_int32_t *, u_int32_t *); extern void classq_init(void); +#if PF_ECN extern u_int8_t read_dsfield(struct mbuf *, struct pf_mtag *); extern void write_dsfield(struct mbuf *, struct pf_mtag *, u_int8_t); extern int mark_ecn(struct mbuf *, struct pf_mtag *, int); +#endif /* PF_ECN */ #endif /* BSD_KERNEL_PRIVATE */ #ifdef __cplusplus diff --git a/bsd/net/classq/classq_blue.c b/bsd/net/classq/classq_blue.c index 6b67d94d1..fdf21b069 100644 --- a/bsd/net/classq/classq_blue.c +++ b/bsd/net/classq/classq_blue.c @@ -98,6 +98,7 @@ #include #include #include +#include #include #include #include @@ -118,6 +119,7 @@ #include #include +#include /* * Blue is proposed and implemented by Wu-chang Feng . @@ -172,8 +174,16 @@ blue_alloc(struct ifnet *ifp, u_int32_t max_pmark, u_int32_t hold_time, bzero(bp, blue_size); bp->blue_idle = 1; - bp->blue_flags = (flags & BLUEF_USERFLAGS); bp->blue_ifp = ifp; + bp->blue_flags = (flags & BLUEF_USERFLAGS); +#if !PF_ECN + if (bp->blue_flags & BLUEF_ECN) { + bp->blue_flags &= ~BLUEF_ECN; + log(LOG_ERR, "%s: BLUE ECN not available; ignoring " + "BLUEF_ECN flag!\n", if_name(ifp)); + } +#endif /* !PF_ECN */ + if (max_pmark == 0) bp->blue_max_pmark = 1000; @@ -213,6 +223,9 @@ int blue_addq(struct blue *bp, class_queue_t *q, struct mbuf *m, struct pf_mtag *tag) { +#if !PF_ECN +#pragma unused(tag) +#endif /* !PF_ECN */ int droptype; /* @@ -244,12 +257,15 @@ blue_addq(struct blue *bp, class_queue_t *q, struct mbuf *m, droptype = DTYPE_NODROP; if (blue_drop_early(bp) && qlen(q) > 1) { /* mark or drop by blue */ +#if PF_ECN if ((bp->blue_flags & BLUEF_ECN) && - (tag->pftag_flags & PF_TAG_TCP) && /* only for TCP */ + (tag->pftag_proto == IPPROTO_TCP) && /* only for TCP */ mark_ecn(m, tag, bp->blue_flags)) { /* successfully marked. do not drop. */ bp->blue_stats.marked_packets++; - } else { + } else +#endif /* PF_ECN */ + { /* unforced drop by blue */ droptype = DTYPE_EARLY; } @@ -344,7 +360,7 @@ blue_purgeq(struct blue *bp, class_queue_t *q, u_int32_t flow, static int blue_drop_early(struct blue *bp) { - if ((random() % (unsigned)bp->blue_max_pmark) < + if ((RandomULong() % (unsigned)bp->blue_max_pmark) < (unsigned)bp->blue_pmark) { /* drop or mark */ return (1); diff --git a/bsd/net/classq/classq_red.c b/bsd/net/classq/classq_red.c index 825b62db8..63cbd8ede 100644 --- a/bsd/net/classq/classq_red.c +++ b/bsd/net/classq/classq_red.c @@ -97,9 +97,10 @@ #include #include #include +#include #include #include - +#include #include #include @@ -112,6 +113,7 @@ #endif #include +#include /* * ALTQ/RED (Random Early Detection) implementation using 32-bit @@ -246,8 +248,15 @@ red_alloc(struct ifnet *ifp, int weight, int inv_pmax, int th_min, else rp->red_thmax = th_max; - rp->red_flags = (flags & REDF_USERFLAGS); rp->red_ifp = ifp; + rp->red_flags = (flags & REDF_USERFLAGS); +#if !PF_ECN + if (rp->red_flags & REDF_ECN) { + rp->red_flags &= ~REDF_ECN; + log(LOG_ERR, "%s: RED ECN not available; ignoring " + "REDF_ECN flag!\n", if_name(ifp)); + } +#endif /* !PF_ECN */ if (pkttime == 0) /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ @@ -326,6 +335,9 @@ red_getstats(red_t *rp, struct red_stats *sp) int red_addq(red_t *rp, class_queue_t *q, struct mbuf *m, struct pf_mtag *tag) { +#if !PF_ECN +#pragma unused(tag) +#endif /* !PF_ECN */ int avg, droptype; int n; @@ -382,13 +394,16 @@ red_addq(red_t *rp, class_queue_t *q, struct mbuf *m, struct pf_mtag *tag) } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift, rp->red_probd, rp->red_count)) { /* mark or drop by red */ +#if PF_ECN if ((rp->red_flags & REDF_ECN) && - (tag->pftag_flags & PF_TAG_TCP) && /* only TCP */ + (tag->pftag_proto == IPPROTO_TCP) && /* only TCP */ mark_ecn(m, tag, rp->red_flags)) { /* successfully marked. do not drop. */ rp->red_count = 0; rp->red_stats.marked_packets++; - } else { + } else +#endif /* PF_ECN */ + { /* unforced drop by red */ droptype = DTYPE_EARLY; } @@ -458,7 +473,7 @@ drop_early(int fp_len, int fp_probd, int count) * drop probability = (avg - TH_MIN) / d */ - if ((random() % d) < (unsigned)fp_len) { + if ((RandomULong() % d) < (unsigned)fp_len) { /* drop or mark */ return (1); } diff --git a/bsd/net/classq/classq_rio.c b/bsd/net/classq/classq_rio.c index 20a44ee2e..91f7da00a 100644 --- a/bsd/net/classq/classq_rio.c +++ b/bsd/net/classq/classq_rio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2012 Apple Inc. All rights reserved. + * Copyright (c) 2007-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -96,6 +96,7 @@ #include #include #include +#include #include #include #include @@ -113,6 +114,7 @@ #include #include +#include /* * RIO: RED with IN/OUT bit @@ -217,8 +219,20 @@ rio_alloc(struct ifnet *ifp, int weight, struct redparams *params, return (NULL); bzero(rp, rio_size); - rp->rio_flags = (flags & RIOF_USERFLAGS); rp->rio_ifp = ifp; + rp->rio_flags = (flags & RIOF_USERFLAGS); +#if !PF_ECN + if (rp->rio_flags & RIOF_ECN) { + rp->rio_flags &= ~RIOF_ECN; + log(LOG_ERR, "%s: RIO ECN not available; ignoring " + "RIOF_ECN flag!\n", if_name(ifp)); + } + if (rp->rio_flags & RIOF_CLEARDSCP) { + rp->rio_flags &= ~RIOF_CLEARDSCP; + log(LOG_ERR, "%s: RIO ECN not available; ignoring " + "RIOF_CLEARDSCP flag!\n", if_name(ifp)); + } +#endif /* !PF_ECN */ if (pkttime == 0) /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ @@ -341,17 +355,23 @@ dscp2index(u_int8_t dscp) } #endif -#define RIOM_SET_PRECINDEX(t, idx) do { \ - (t)->pftag_qpriv32 = (idx); \ +/* Store RIO precindex in the module private scratch space */ +#define pkt_precidx pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val32 + +#define RIOM_SET_PRECINDEX(pkt, idx) do { \ + (pkt)->pkt_precidx = (idx); \ } while (0) -#define RIOM_GET_PRECINDEX(t) \ - ({ u_int32_t idx; idx = (t)->pftag_qpriv32; \ - RIOM_SET_PRECINDEX(t, 0); idx; }) +#define RIOM_GET_PRECINDEX(pkt) \ + ({ u_int32_t idx; idx = (pkt)->pkt_precidx; \ + RIOM_SET_PRECINDEX(pkt, 0); idx; }) int rio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m, struct pf_mtag *tag) { +#if !PF_ECN +#pragma unused(tag) +#endif /* !PF_ECN */ #define DSCP_MASK 0xfc int avg, droptype; u_int8_t dsfield, odsfield; @@ -359,7 +379,11 @@ rio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m, struct pf_mtag *tag) struct timeval now; struct dropprec_state *prec; +#if PF_ECN dsfield = odsfield = read_dsfield(m, tag); +#else + dsfield = odsfield = 0; +#endif /* !PF_ECN */ dpindex = dscp2index(dsfield); /* @@ -447,13 +471,15 @@ rio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m, struct pf_mtag *tag) rp->rio_precstate[i].qlen++; /* save drop precedence index in mbuf hdr */ - RIOM_SET_PRECINDEX(tag, dpindex); + RIOM_SET_PRECINDEX(&m->m_pkthdr, dpindex); if (rp->rio_flags & RIOF_CLEARDSCP) dsfield &= ~DSCP_MASK; +#if PF_ECN if (dsfield != odsfield) write_dsfield(m, tag, dsfield); +#endif /* PF_ECN */ _addq(q, m); @@ -473,7 +499,7 @@ rio_getq_flow(struct rio *rp, class_queue_t *q, u_int32_t flow, boolean_t purge) VERIFY(m->m_flags & M_PKTHDR); - dpindex = RIOM_GET_PRECINDEX(m_pftag(m)); + dpindex = RIOM_GET_PRECINDEX(&m->m_pkthdr); for (i = dpindex; i < RIO_NDROPPREC; i++) { if (--rp->rio_precstate[i].qlen == 0) { if (rp->rio_precstate[i].idle == 0) { diff --git a/bsd/net/classq/classq_sfb.c b/bsd/net/classq/classq_sfb.c index c0f575a3e..014870ac7 100644 --- a/bsd/net/classq/classq_sfb.c +++ b/bsd/net/classq/classq_sfb.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012 Apple Inc. All rights reserved. + * Copyright (c) 2011-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -28,7 +28,6 @@ #include #include -#include #include #include #include @@ -46,6 +45,7 @@ #include #include #include +#include #include #include @@ -57,6 +57,7 @@ #include #include #include +#include /* * Stochastic Fair Blue @@ -131,7 +132,7 @@ #define SFB_RANDOM(sp, tmin, tmax) ((sfb_random(sp) % (tmax)) + (tmin)) -#define SFB_PKT_PBOX PF_TAG_QUEUE1 /* in penalty box */ +#define SFB_PKT_PBOX 0x1 /* in penalty box */ /* The following mantissa values are in SFB_FP_SHIFT Q format */ #define SFB_MAX_PMARK (1 << SFB_FP_SHIFT) /* Q14 representation of 1.00 */ @@ -176,15 +177,33 @@ #define ABS(v) (((v) > 0) ? (v) : -(v)) -#define SFB_ZONE_MAX 32 /* maximum elements in zone */ -#define SFB_ZONE_NAME "classq_sfb" /* zone name */ +#define SFB_ZONE_MAX 32 /* maximum elements in zone */ +#define SFB_ZONE_NAME "classq_sfb" /* zone name */ + +#define SFB_BINS_ZONE_MAX 32 /* maximum elements in zone */ +#define SFB_BINS_ZONE_NAME "classq_sfb_bins" /* zone name */ + +#define SFB_FCL_ZONE_MAX 32 /* maximum elements in zone */ +#define SFB_FCL_ZONE_NAME "classq_sfb_fcl" /* zone name */ /* Place the flow control entries in current bin on level 0 */ #define SFB_FC_LEVEL 0 +/* Store SFB hash and flags in the module private scratch space */ +#define pkt_sfb_hash8 pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val8 +#define pkt_sfb_hash16 pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val16 +#define pkt_sfb_hash32 pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val32 +#define pkt_sfb_flags pkt_mpriv.__mpriv_u.__mpriv32[1].__mpriv32_u.__val32 + static unsigned int sfb_size; /* size of zone element */ static struct zone *sfb_zone; /* zone for sfb */ +static unsigned int sfb_bins_size; /* size of zone element */ +static struct zone *sfb_bins_zone; /* zone for sfb_bins */ + +static unsigned int sfb_fcl_size; /* size of zone element */ +static struct zone *sfb_fcl_zone; /* zone for sfb_fc_lists */ + /* internal function prototypes */ static u_int32_t sfb_random(struct sfb *); static struct mbuf *sfb_getq_flow(struct sfb *, class_queue_t *, u_int32_t, @@ -194,21 +213,21 @@ static void sfb_calc_holdtime(struct sfb *, u_int64_t); static void sfb_calc_pboxtime(struct sfb *, u_int64_t); static void sfb_calc_hinterval(struct sfb *, u_int64_t *); static void sfb_swap_bins(struct sfb *, u_int32_t); -static inline int sfb_pcheck(struct sfb *, struct pf_mtag *); -static int sfb_penalize(struct sfb *, struct pf_mtag *, struct timespec *); +static inline int sfb_pcheck(struct sfb *, struct pkthdr *); +static int sfb_penalize(struct sfb *, struct pkthdr *, struct timespec *); static void sfb_adjust_bin(struct sfb *, struct sfbbinstats *, struct timespec *, struct timespec *, boolean_t); static void sfb_decrement_bin(struct sfb *, struct sfbbinstats *, struct timespec *, struct timespec *); static void sfb_increment_bin(struct sfb *, struct sfbbinstats *, struct timespec *, struct timespec *); -static inline void sfb_dq_update_bins(struct sfb *, struct pf_mtag *, +static inline void sfb_dq_update_bins(struct sfb *, struct pkthdr *, struct timespec *); -static inline void sfb_eq_update_bins(struct sfb *, struct pf_mtag *); -static int sfb_drop_early(struct sfb *, struct pf_mtag *, u_int16_t *, +static inline void sfb_eq_update_bins(struct sfb *, struct pkthdr *); +static int sfb_drop_early(struct sfb *, struct pkthdr *, u_int16_t *, struct timespec *); -static boolean_t sfb_bin_addfcentry(struct sfb *, struct pf_mtag *); -static void sfb_fclist_append(struct sfb *, struct sfb_fc_list *); +static boolean_t sfb_bin_addfcentry(struct sfb *, struct pkthdr *); +static void sfb_fclist_append(struct sfb *, struct sfb_fcl *); static void sfb_fclists_clean(struct sfb *sp); SYSCTL_NODE(_net_classq, OID_AUTO, sfb, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "SFB"); @@ -275,13 +294,33 @@ sfb_init(void) } zone_change(sfb_zone, Z_EXPAND, TRUE); zone_change(sfb_zone, Z_CALLERACCT, TRUE); + + sfb_bins_size = sizeof (*((struct sfb *)0)->sfb_bins); + sfb_bins_zone = zinit(sfb_bins_size, SFB_BINS_ZONE_MAX * sfb_bins_size, + 0, SFB_BINS_ZONE_NAME); + if (sfb_bins_zone == NULL) { + panic("%s: failed allocating %s", __func__, SFB_BINS_ZONE_NAME); + /* NOTREACHED */ + } + zone_change(sfb_bins_zone, Z_EXPAND, TRUE); + zone_change(sfb_bins_zone, Z_CALLERACCT, TRUE); + + sfb_fcl_size = sizeof (*((struct sfb *)0)->sfb_fc_lists); + sfb_fcl_zone = zinit(sfb_fcl_size, SFB_FCL_ZONE_MAX * sfb_fcl_size, + 0, SFB_FCL_ZONE_NAME); + if (sfb_fcl_zone == NULL) { + panic("%s: failed allocating %s", __func__, SFB_FCL_ZONE_NAME); + /* NOTREACHED */ + } + zone_change(sfb_fcl_zone, Z_EXPAND, TRUE); + zone_change(sfb_fcl_zone, Z_CALLERACCT, TRUE); } static u_int32_t sfb_random(struct sfb *sp) { IFCQ_CONVERT_LOCK(&sp->sfb_ifp->if_snd); - return (random()); + return (RandomULong()); } static void @@ -362,6 +401,7 @@ struct sfb * sfb_alloc(struct ifnet *ifp, u_int32_t qid, u_int32_t qlim, u_int32_t flags) { struct sfb *sp; + int i; VERIFY(ifp != NULL && qlim > 0); @@ -370,27 +410,37 @@ sfb_alloc(struct ifnet *ifp, u_int32_t qid, u_int32_t qlim, u_int32_t flags) log(LOG_ERR, "%s: SFB unable to allocate\n", if_name(ifp)); return (NULL); } - bzero(sp, sfb_size); - if ((sp->sfb_bins = _MALLOC(sizeof (*sp->sfb_bins), M_DEVBUF, - M_WAITOK|M_ZERO)) == NULL) { + + if ((sp->sfb_bins = zalloc(sfb_bins_zone)) == NULL) { log(LOG_ERR, "%s: SFB unable to allocate bins\n", if_name(ifp)); sfb_destroy(sp); return (NULL); } + bzero(sp->sfb_bins, sfb_bins_size); - if ((sp->sfb_fc_lists = _MALLOC(sizeof (*sp->sfb_fc_lists), M_DEVBUF, - M_WAITOK|M_ZERO)) == NULL) { + if ((sp->sfb_fc_lists = zalloc(sfb_fcl_zone)) == NULL) { log(LOG_ERR, "%s: SFB unable to allocate flow control lists\n", if_name(ifp)); sfb_destroy(sp); return(NULL); } + bzero(sp->sfb_fc_lists, sfb_fcl_size); + + for (i = 0; i < SFB_BINS; ++i) + STAILQ_INIT(&SFB_FC_LIST(sp, i)->fclist); - sp->sfb_flags = (flags & SFBF_USERFLAGS); sp->sfb_ifp = ifp; sp->sfb_qlim = qlim; sp->sfb_qid = qid; + sp->sfb_flags = (flags & SFBF_USERFLAGS); +#if !PF_ECN + if (sp->sfb_flags & SFBF_ECN) { + sp->sfb_flags &= ~SFBF_ECN; + log(LOG_ERR, "%s: SFB qid=%d, ECN not available; ignoring " + "SFBF_ECN flag!\n", if_name(ifp), sp->sfb_qid); + } +#endif /* !PF_ECN */ sfb_resetq(sp, -1); @@ -398,10 +448,16 @@ sfb_alloc(struct ifnet *ifp, u_int32_t qid, u_int32_t qlim, u_int32_t flags) } static void -sfb_fclist_append(struct sfb *sp, struct sfb_fc_list *fcl) +sfb_fclist_append(struct sfb *sp, struct sfb_fcl *fcl) { IFCQ_CONVERT_LOCK(&sp->sfb_ifp->if_snd); - ifnet_fclist_append(sp, fcl); + + VERIFY(STAILQ_EMPTY(&fcl->fclist) || fcl->cnt > 0); + sp->sfb_stats.flow_feedback += fcl->cnt; + fcl->cnt = 0; + + flowadv_add(&fcl->fclist); + VERIFY(fcl->cnt == 0 && STAILQ_EMPTY(&fcl->fclist)); } static void @@ -409,10 +465,10 @@ sfb_fclists_clean(struct sfb *sp) { int i; - /* Move all the flow control entries to the ifnet list */ + /* Move all the flow control entries to the flowadv list */ for (i = 0; i < SFB_BINS; ++i) { - struct sfb_fc_list *fcl = SFB_FC_LIST(sp, i); - if (!SLIST_EMPTY(fcl)) + struct sfb_fcl *fcl = SFB_FC_LIST(sp, i); + if (!STAILQ_EMPTY(&fcl->fclist)) sfb_fclist_append(sp, fcl); } } @@ -422,11 +478,11 @@ sfb_destroy(struct sfb *sp) { sfb_fclists_clean(sp); if (sp->sfb_bins != NULL) { - _FREE(sp->sfb_bins, M_DEVBUF); + zfree(sfb_bins_zone, sp->sfb_bins); sp->sfb_bins = NULL; } if (sp->sfb_fc_lists != NULL) { - _FREE(sp->sfb_fc_lists, M_DEVBUF); + zfree(sfb_fcl_zone, sp->sfb_fc_lists); sp->sfb_fc_lists = NULL; } zfree(sfb_zone, sp); @@ -529,9 +585,9 @@ sfb_swap_bins(struct sfb *sp, u_int32_t len) /* clear/adjust bin statistics and flow control lists */ for (i = 0; i < SFB_BINS; i++) { - struct sfb_fc_list *fcl = SFB_FC_LIST(sp, i); + struct sfb_fcl *fcl = SFB_FC_LIST(sp, i); - if (!SLIST_EMPTY(fcl)) + if (!STAILQ_EMPTY(&fcl->fclist)) sfb_fclist_append(sp, fcl); for (j = 0; j < SFB_LEVELS; j++) { @@ -557,7 +613,7 @@ sfb_swap_bins(struct sfb *sp, u_int32_t len) } static inline int -sfb_pcheck(struct sfb *sp, struct pf_mtag *t) +sfb_pcheck(struct sfb *sp, struct pkthdr *pkt) { #if SFB_LEVELS != 2 int i, n; @@ -576,17 +632,17 @@ sfb_pcheck(struct sfb *sp, struct pf_mtag *t) * Level 0: bin index at [0] for set 0; [2] for set 1 * Level 1: bin index at [1] for set 0; [3] for set 1 */ - if (SFB_BINST(sp, 0, SFB_BINMASK(t->pftag_qpriv8[(s << 1)]), + if (SFB_BINST(sp, 0, SFB_BINMASK(pkt->pkt_sfb_hash8[(s << 1)]), s)->pmark < SFB_PMARK_TH || - SFB_BINST(sp, 1, SFB_BINMASK(t->pftag_qpriv8[(s << 1) + 1]), + SFB_BINST(sp, 1, SFB_BINMASK(pkt->pkt_sfb_hash8[(s << 1) + 1]), s)->pmark < SFB_PMARK_TH) return (0); #else /* SFB_LEVELS != 2 */ for (i = 0; i < SFB_LEVELS; i++) { if (s == 0) /* set 0, bin index [0,1] */ - n = SFB_BINMASK(t->pftag_qpriv8[i]); + n = SFB_BINMASK(pkt->pkt_sfb_hash8[i]); else /* set 1, bin index [2,3] */ - n = SFB_BINMASK(t->pftag_qpriv8[i + 2]); + n = SFB_BINMASK(pkt->pkt_sfb_hash8[i + 2]); if (SFB_BINST(sp, i, n, s)->pmark < SFB_PMARK_TH) return (0); @@ -596,12 +652,12 @@ sfb_pcheck(struct sfb *sp, struct pf_mtag *t) } static int -sfb_penalize(struct sfb *sp, struct pf_mtag *t, struct timespec *now) +sfb_penalize(struct sfb *sp, struct pkthdr *pkt, struct timespec *now) { struct timespec delta = { 0, 0 }; /* If minimum pmark of current bins is < SFB_PMARK_TH, we're done */ - if (!sfb_ratelimit || !sfb_pcheck(sp, t)) + if (!sfb_ratelimit || !sfb_pcheck(sp, pkt)) return (0); net_timersub(now, &sp->sfb_pboxfreeze, &delta); @@ -620,22 +676,22 @@ sfb_penalize(struct sfb *sp, struct pf_mtag *t, struct timespec *now) */ #if SFB_LEVELS == 2 /* Level 0: bin index at [0] for set 0; [2] for set 1 */ - n = SFB_BINMASK(t->pftag_qpriv8[(w << 1)]); + n = SFB_BINMASK(pkt->pkt_sfb_hash8[(w << 1)]); bin = SFB_BINST(sp, 0, n, w); if (bin->pkts >= sp->sfb_allocation) sfb_increment_bin(sp, bin, SFB_BINFT(sp, 0, n, w), now); /* Level 0: bin index at [1] for set 0; [3] for set 1 */ - n = SFB_BINMASK(t->pftag_qpriv8[(w << 1) + 1]); + n = SFB_BINMASK(pkt->pkt_sfb_hash8[(w << 1) + 1]); bin = SFB_BINST(sp, 1, n, w); if (bin->pkts >= sp->sfb_allocation) sfb_increment_bin(sp, bin, SFB_BINFT(sp, 1, n, w), now); #else /* SFB_LEVELS != 2 */ for (i = 0; i < SFB_LEVELS; i++) { if (w == 0) /* set 0, bin index [0,1] */ - n = SFB_BINMASK(t->pftag_qpriv8[i]); + n = SFB_BINMASK(pkt->pkt_sfb_hash8[i]); else /* set 1, bin index [2,3] */ - n = SFB_BINMASK(t->pftag_qpriv8[i + 2]); + n = SFB_BINMASK(pkt->pkt_sfb_hash8[i + 2]); bin = SFB_BINST(sp, i, n, w); if (bin->pkts >= sp->sfb_allocation) { @@ -648,7 +704,7 @@ sfb_penalize(struct sfb *sp, struct pf_mtag *t, struct timespec *now) } /* non-conformant or else misclassified flow; queue it anyway */ - t->pftag_flags |= SFB_PKT_PBOX; + pkt->pkt_sfb_flags |= SFB_PKT_PBOX; *(&sp->sfb_pboxfreeze) = *now; return (0); @@ -694,14 +750,14 @@ sfb_increment_bin(struct sfb *sp, struct sfbbinstats *bin, struct timespec *ft, } static inline void -sfb_dq_update_bins(struct sfb *sp, struct pf_mtag *t, struct timespec *now) +sfb_dq_update_bins(struct sfb *sp, struct pkthdr *pkt, struct timespec *now) { #if SFB_LEVELS != 2 || SFB_FC_LEVEL != 0 int i; #endif /* SFB_LEVELS != 2 || SFB_FC_LEVEL != 0 */ struct sfbbinstats *bin; int s, n; - struct sfb_fc_list *fcl = NULL; + struct sfb_fcl *fcl = NULL; s = sp->sfb_current; VERIFY((s + (s ^ 1)) == 1); @@ -711,7 +767,7 @@ sfb_dq_update_bins(struct sfb *sp, struct pf_mtag *t, struct timespec *now) */ #if SFB_LEVELS == 2 && SFB_FC_LEVEL == 0 /* Level 0: bin index at [0] for set 0; [2] for set 1 */ - n = SFB_BINMASK(t->pftag_qpriv8[(s << 1)]); + n = SFB_BINMASK(pkt->pkt_sfb_hash8[(s << 1)]); bin = SFB_BINST(sp, 0, n, s); VERIFY(bin->pkts > 0); @@ -721,12 +777,12 @@ sfb_dq_update_bins(struct sfb *sp, struct pf_mtag *t, struct timespec *now) if (bin->pkts <= (sp->sfb_allocation >> 2)) { /* deliver flow control feedback to the sockets */ fcl = SFB_FC_LIST(sp, n); - if (!SLIST_EMPTY(fcl)) + if (!STAILQ_EMPTY(&fcl->fclist)) sfb_fclist_append(sp, fcl); } /* Level 1: bin index at [1] for set 0; [3] for set 1 */ - n = SFB_BINMASK(t->pftag_qpriv8[(s << 1) + 1]); + n = SFB_BINMASK(pkt->pkt_sfb_hash8[(s << 1) + 1]); bin = SFB_BINST(sp, 1, n, s); VERIFY(bin->pkts > 0); @@ -735,9 +791,9 @@ sfb_dq_update_bins(struct sfb *sp, struct pf_mtag *t, struct timespec *now) #else /* SFB_LEVELS != 2 || SFB_FC_LEVEL != 0 */ for (i = 0; i < SFB_LEVELS; i++) { if (s == 0) /* set 0, bin index [0,1] */ - n = SFB_BINMASK(t->pftag_qpriv8[i]); + n = SFB_BINMASK(pkt->pkt_sfb_hash8[i]); else /* set 1, bin index [2,3] */ - n = SFB_BINMASK(t->pftag_qpriv8[i + 2]); + n = SFB_BINMASK(pkt->pkt_sfb_hash8[i + 2]); bin = SFB_BINST(sp, i, n, s); @@ -750,7 +806,7 @@ sfb_dq_update_bins(struct sfb *sp, struct pf_mtag *t, struct timespec *now) /* deliver flow control feedback to the sockets */ if (i == SFB_FC_LEVEL) { fcl = SFB_FC_LIST(sp, n); - if (!SLIST_EMPTY(fcl)) + if (!STAILQ_EMPTY(&fcl->fclist)) sfb_fclist_append(sp, fcl); } } @@ -759,7 +815,7 @@ sfb_dq_update_bins(struct sfb *sp, struct pf_mtag *t, struct timespec *now) } static inline void -sfb_eq_update_bins(struct sfb *sp, struct pf_mtag *t) +sfb_eq_update_bins(struct sfb *sp, struct pkthdr *pkt) { #if SFB_LEVELS != 2 int i, n; @@ -774,16 +830,18 @@ sfb_eq_update_bins(struct sfb *sp, struct pf_mtag *t) */ #if SFB_LEVELS == 2 /* Level 0: bin index at [0] for set 0; [2] for set 1 */ - SFB_BINST(sp, 0, SFB_BINMASK(t->pftag_qpriv8[(s << 1)]), s)->pkts++; + SFB_BINST(sp, 0, + SFB_BINMASK(pkt->pkt_sfb_hash8[(s << 1)]), s)->pkts++; /* Level 1: bin index at [1] for set 0; [3] for set 1 */ - SFB_BINST(sp, 1, SFB_BINMASK(t->pftag_qpriv8[(s << 1) + 1]), s)->pkts++; + SFB_BINST(sp, 1, + SFB_BINMASK(pkt->pkt_sfb_hash8[(s << 1) + 1]), s)->pkts++; #else /* SFB_LEVELS != 2 */ for (i = 0; i < SFB_LEVELS; i++) { if (s == 0) /* set 0, bin index [0,1] */ - n = SFB_BINMASK(t->pftag_qpriv8[i]); + n = SFB_BINMASK(pkt->pkt_sfb_hash8[i]); else /* set 1, bin index [2,3] */ - n = SFB_BINMASK(t->pftag_qpriv8[i + 2]); + n = SFB_BINMASK(pkt->pkt_sfb_hash8[i + 2]); SFB_BINST(sp, i, n, s)->pkts++; } @@ -791,20 +849,21 @@ sfb_eq_update_bins(struct sfb *sp, struct pf_mtag *t) } static boolean_t -sfb_bin_addfcentry(struct sfb *sp, struct pf_mtag *t) +sfb_bin_addfcentry(struct sfb *sp, struct pkthdr *pkt) { - struct sfb_bin_fcentry *fce; - u_int32_t flowhash; - struct sfb_fc_list *fcl; + struct flowadv_fcentry *fce; + u_int32_t flowsrc, flowid; + struct sfb_fcl *fcl; int s; s = sp->sfb_current; VERIFY((s + (s ^ 1)) == 1); - flowhash = t->pftag_flowhash; + flowsrc = pkt->pkt_flowsrc; + flowid = pkt->pkt_flowid; - if (flowhash == 0) { - sp->sfb_stats.null_flowhash++; + if (flowid == 0) { + sp->sfb_stats.null_flowid++; return (FALSE); } @@ -812,19 +871,22 @@ sfb_bin_addfcentry(struct sfb *sp, struct pf_mtag *t) * Use value at index 0 for set 0 and * value at index 2 for set 1 */ - fcl = SFB_FC_LIST(sp, SFB_BINMASK(t->pftag_qpriv8[(s << 1)])); - SLIST_FOREACH(fce, fcl, fce_link) { - if (fce->fce_flowhash == flowhash) { + fcl = SFB_FC_LIST(sp, SFB_BINMASK(pkt->pkt_sfb_hash8[(s << 1)])); + STAILQ_FOREACH(fce, &fcl->fclist, fce_link) { + if (fce->fce_flowsrc == flowsrc && + fce->fce_flowid == flowid) { /* Already on flow control list; just return */ return (TRUE); } } IFCQ_CONVERT_LOCK(&sp->sfb_ifp->if_snd); - fce = ifnet_fce_alloc(M_WAITOK); + fce = flowadv_alloc_entry(M_WAITOK); if (fce != NULL) { - fce->fce_flowhash = flowhash; - SLIST_INSERT_HEAD(fcl, fce, fce_link); + fce->fce_flowsrc = flowsrc; + fce->fce_flowid = flowid; + STAILQ_INSERT_TAIL(&fcl->fclist, fce, fce_link); + fcl->cnt++; sp->sfb_stats.flow_controlled++; } @@ -835,7 +897,7 @@ sfb_bin_addfcentry(struct sfb *sp, struct pf_mtag *t) * early-drop probability is kept in pmark of each bin of the flow */ static int -sfb_drop_early(struct sfb *sp, struct pf_mtag *t, u_int16_t *pmin, +sfb_drop_early(struct sfb *sp, struct pkthdr *pkt, u_int16_t *pmin, struct timespec *now) { #if SFB_LEVELS != 2 @@ -854,7 +916,7 @@ sfb_drop_early(struct sfb *sp, struct pf_mtag *t, u_int16_t *pmin, */ #if SFB_LEVELS == 2 /* Level 0: bin index at [0] for set 0; [2] for set 1 */ - n = SFB_BINMASK(t->pftag_qpriv8[(s << 1)]); + n = SFB_BINMASK(pkt->pkt_sfb_hash8[(s << 1)]); bin = SFB_BINST(sp, 0, n, s); if (*pmin > (u_int16_t)bin->pmark) *pmin = (u_int16_t)bin->pmark; @@ -866,7 +928,7 @@ sfb_drop_early(struct sfb *sp, struct pf_mtag *t, u_int16_t *pmin, } /* Level 1: bin index at [1] for set 0; [3] for set 1 */ - n = SFB_BINMASK(t->pftag_qpriv8[(s << 1) + 1]); + n = SFB_BINMASK(pkt->pkt_sfb_hash8[(s << 1) + 1]); bin = SFB_BINST(sp, 1, n, s); if (*pmin > (u_int16_t)bin->pmark) *pmin = (u_int16_t)bin->pmark; @@ -879,9 +941,9 @@ sfb_drop_early(struct sfb *sp, struct pf_mtag *t, u_int16_t *pmin, #else /* SFB_LEVELS != 2 */ for (i = 0; i < SFB_LEVELS; i++) { if (s == 0) /* set 0, bin index [0,1] */ - n = SFB_BINMASK(t->pftag_qpriv8[i]); + n = SFB_BINMASK(pkt->pkt_sfb_hash8[i]); else /* set 1, bin index [2,3] */ - n = SFB_BINMASK(t->pftag_qpriv8[i + 2]); + n = SFB_BINMASK(pkt->pkt_sfb_hash8[i + 2]); bin = SFB_BINST(sp, i, n, s); if (*pmin > (u_int16_t)bin->pmark) @@ -909,6 +971,10 @@ sfb_drop_early(struct sfb *sp, struct pf_mtag *t, u_int16_t *pmin, int sfb_addq(struct sfb *sp, class_queue_t *q, struct mbuf *m, struct pf_mtag *t) { +#if !PF_ECN +#pragma unused(t) +#endif /* !PF_ECN */ + struct pkthdr *pkt = &m->m_pkthdr; struct timespec now; int droptype, s; u_int16_t pmin; @@ -928,35 +994,39 @@ sfb_addq(struct sfb *sp, class_queue_t *q, struct mbuf *m, struct pf_mtag *t) VERIFY((s + (s ^ 1)) == 1); } - t->pftag_flags &= ~SFB_PKT_PBOX; - t->pftag_qpriv16[s] = - (SFB_HASH(&t->pftag_flowhash, sizeof (t->pftag_flowhash), + pkt->pkt_sfb_flags = 0; + pkt->pkt_sfb_hash16[s] = + (SFB_HASH(&pkt->pkt_flowid, sizeof (pkt->pkt_flowid), (*sp->sfb_bins)[s].fudge) & SFB_HASHMASK); - t->pftag_qpriv16[s ^ 1] = - (SFB_HASH(&t->pftag_flowhash, sizeof (t->pftag_flowhash), + pkt->pkt_sfb_hash16[s ^ 1] = + (SFB_HASH(&pkt->pkt_flowid, sizeof (pkt->pkt_flowid), (*sp->sfb_bins)[s ^ 1].fudge) & SFB_HASHMASK); /* see if we drop early */ droptype = DTYPE_NODROP; - if (sfb_drop_early(sp, t, &pmin, &now)) { + if (sfb_drop_early(sp, pkt, &pmin, &now)) { /* flow control, mark or drop by sfb */ if ((sp->sfb_flags & SFBF_FLOWCTL) && - (t->pftag_flags & PF_TAG_FLOWADV)) { + (pkt->pkt_flags & PKTF_FLOW_ADV)) { fc_adv = 1; /* drop all during suspension or for non-TCP */ if ((sp->sfb_flags & SFBF_SUSPENDED) || - !(t->pftag_flags & PF_TAG_TCP)) { + pkt->pkt_proto != IPPROTO_TCP) { droptype = DTYPE_EARLY; sp->sfb_stats.drop_early++; } - } else if ((sp->sfb_flags & SFBF_ECN) && - (t->pftag_flags & PF_TAG_TCP) && /* only for TCP */ + } +#if PF_ECN + else if ((sp->sfb_flags & SFBF_ECN) && + (pkt->pkt_proto == IPPROTO_TCP) && /* only for TCP */ ((sfb_random(sp) & SFB_MAX_PMARK) <= pmin) && mark_ecn(m, t, sp->sfb_flags) && !(sp->sfb_flags & SFBF_SUSPENDED)) { /* successfully marked; do not drop. */ sp->sfb_stats.marked_packets++; - } else { + } +#endif /* PF_ECN */ + else { /* unforced drop by sfb */ droptype = DTYPE_EARLY; sp->sfb_stats.drop_early++; @@ -964,7 +1034,7 @@ sfb_addq(struct sfb *sp, class_queue_t *q, struct mbuf *m, struct pf_mtag *t) } /* non-responsive flow penalty? */ - if (droptype == DTYPE_NODROP && sfb_penalize(sp, t, &now)) { + if (droptype == DTYPE_NODROP && sfb_penalize(sp, pkt, &now)) { droptype = DTYPE_FORCED; sp->sfb_stats.drop_pbox++; } @@ -976,7 +1046,7 @@ sfb_addq(struct sfb *sp, class_queue_t *q, struct mbuf *m, struct pf_mtag *t) } if (fc_adv == 1 && droptype != DTYPE_FORCED && - sfb_bin_addfcentry(sp, t)) { + sfb_bin_addfcentry(sp, pkt)) { /* deliver flow control advisory error */ if (droptype == DTYPE_NODROP) { ret = CLASSQEQ_SUCCESS_FC; @@ -999,8 +1069,8 @@ sfb_addq(struct sfb *sp, class_queue_t *q, struct mbuf *m, struct pf_mtag *t) return ((ret != CLASSQEQ_SUCCESS) ? ret : CLASSQEQ_DROPPED); } - if (!(t->pftag_flags & SFB_PKT_PBOX)) - sfb_eq_update_bins(sp, t); + if (!(pkt->pkt_sfb_flags & SFB_PKT_PBOX)) + sfb_eq_update_bins(sp, pkt); else sp->sfb_stats.pbox_packets++; @@ -1013,7 +1083,7 @@ sfb_getq_flow(struct sfb *sp, class_queue_t *q, u_int32_t flow, boolean_t purge) { struct timespec now; struct mbuf *m; - struct pf_mtag *t; + struct pkthdr *pkt; if (!purge && (sp->sfb_flags & SFBF_SUSPENDED)) return (NULL); @@ -1029,7 +1099,7 @@ sfb_getq_flow(struct sfb *sp, class_queue_t *q, u_int32_t flow, boolean_t purge) VERIFY(m->m_flags & M_PKTHDR); - t = m_pftag(m); + pkt = &m->m_pkthdr; if (!purge) { /* calculate EWMA of dequeues */ @@ -1068,14 +1138,14 @@ sfb_getq_flow(struct sfb *sp, class_queue_t *q, u_int32_t flow, boolean_t purge) * this reason. A rule of thumb is to set it to K*D, where D is * the time taken to drain queue. */ - if (t->pftag_flags & SFB_PKT_PBOX) { - t->pftag_flags &= ~SFB_PKT_PBOX; + if (pkt->pkt_sfb_flags & SFB_PKT_PBOX) { + pkt->pkt_sfb_flags &= ~SFB_PKT_PBOX; if (sp->sfb_clearpkts > 0) sp->sfb_clearpkts--; } else if (sp->sfb_clearpkts > 0) { sp->sfb_clearpkts--; } else { - sfb_dq_update_bins(sp, t, &now); + sfb_dq_update_bins(sp, pkt, &now); } return (m); @@ -1116,7 +1186,7 @@ sfb_updateq(struct sfb *sp, cqev_t ev) VERIFY(ifp != NULL); switch (ev) { - case CLASSQ_EV_LINK_SPEED: { + case CLASSQ_EV_LINK_BANDWIDTH: { u_int64_t eff_rate = ifnet_output_linkrate(ifp); /* update parameters only if rate has changed */ @@ -1143,6 +1213,7 @@ sfb_updateq(struct sfb *sp, cqev_t ev) sfb_resetq(sp, ev); break; + case CLASSQ_EV_LINK_LATENCY: case CLASSQ_EV_LINK_MTU: default: break; diff --git a/bsd/net/classq/classq_sfb.h b/bsd/net/classq/classq_sfb.h index 911ad31ef..6fc8726a7 100644 --- a/bsd/net/classq/classq_sfb.h +++ b/bsd/net/classq/classq_sfb.h @@ -31,9 +31,10 @@ #ifdef PRIVATE #ifdef BSD_KERNEL_PRIVATE -#include #include #include +#include +#include #endif /* BSD_KERNEL_PRIVATE */ #ifdef __cplusplus @@ -56,7 +57,7 @@ struct sfbstats { u_int64_t dequeue_avg; u_int64_t rehash_intval; u_int64_t num_rehash; - u_int64_t null_flowhash; + u_int64_t null_flowid; u_int64_t flow_controlled; u_int64_t flow_feedback; }; @@ -78,19 +79,17 @@ struct sfb_stats { }; #ifdef BSD_KERNEL_PRIVATE -struct sfb_bin_fcentry { - SLIST_ENTRY(sfb_bin_fcentry) fce_link; - u_int32_t fce_flowhash; -}; - -SLIST_HEAD(sfb_fc_list, sfb_bin_fcentry); - struct sfb_bins { u_int32_t fudge; struct sfbbinstats stats[SFB_LEVELS][SFB_BINS]; struct timespec freezetime[SFB_LEVELS][SFB_BINS]; }; +struct sfb_fcl { + u_int32_t cnt; + struct flowadv_fclist fclist; +}; + /* SFB flags */ #define SFBF_ECN4 0x01 /* use packet marking for IPv4 packets */ #define SFBF_ECN6 0x02 /* use packet marking for IPv6 packets */ @@ -127,7 +126,7 @@ typedef struct sfb { struct sfb_bins (*sfb_bins)[2]; /* Flow control lists for current set */ - struct sfb_fc_list (*sfb_fc_lists)[SFB_BINS]; + struct sfb_fcl (*sfb_fc_lists)[SFB_BINS]; /* statistics */ struct sfbstats sfb_stats __attribute__((aligned(8))); diff --git a/bsd/net/classq/classq_subr.c b/bsd/net/classq/classq_subr.c index 738c86e23..d62a1337b 100644 --- a/bsd/net/classq/classq_subr.c +++ b/bsd/net/classq/classq_subr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 Apple Inc. All rights reserved. + * Copyright (c) 2011-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -238,10 +238,24 @@ ifclassq_get_maxlen(struct ifclassq *ifq) return (IFCQ_MAXLEN(ifq)); } -u_int32_t -ifclassq_get_len(struct ifclassq *ifq) +int +ifclassq_get_len(struct ifclassq *ifq, mbuf_svc_class_t sc, u_int32_t *packets, + u_int32_t *bytes) { - return (IFCQ_LEN(ifq)); + int err = 0; + + IFCQ_LOCK(ifq); + if (sc == MBUF_SC_UNSPEC) { + VERIFY(packets != NULL); + *packets = IFCQ_LEN(ifq); + } else { + VERIFY(MBUF_VALID_SC(sc)); + VERIFY(packets != NULL && bytes != NULL); + IFCQ_LEN_SC(ifq, sc, packets, bytes, err); + } + IFCQ_UNLOCK(ifq); + + return (err); } errno_t @@ -363,8 +377,10 @@ ifclassq_dequeue_common(struct ifclassq *ifq, mbuf_svc_class_t sc, l += (*head)->m_pkthdr.len; pktlen = (*head)->m_pkthdr.len; - (*head)->m_pkthdr.pf_mtag.pftag_pktseq = +#if MEASURE_BW + (*head)->m_pkthdr.pkt_bwseq = atomic_add_64_ov(&(ifp->if_bw.cur_seq), pktlen); +#endif /* MEASURE_BW */ head = &(*head)->m_nextpkt; i++; @@ -539,8 +555,12 @@ ifclassq_ev2str(cqev_t ev) const char *c; switch (ev) { - case CLASSQ_EV_LINK_SPEED: - c = "LINK_SPEED"; + case CLASSQ_EV_LINK_BANDWIDTH: + c = "LINK_BANDWIDTH"; + break; + + case CLASSQ_EV_LINK_LATENCY: + c = "LINK_LATENCY"; break; case CLASSQ_EV_LINK_MTU: @@ -704,7 +724,7 @@ ifclassq_tbr_set(struct ifclassq *ifq, struct tb_profile *profile, bzero(tbr, sizeof (*tbr)); ifnet_set_start_cycle(ifp, NULL); if (update) - ifclassq_update(ifq, CLASSQ_EV_LINK_SPEED); + ifclassq_update(ifq, CLASSQ_EV_LINK_BANDWIDTH); return (0); } @@ -788,7 +808,7 @@ ifclassq_tbr_set(struct ifclassq *ifq, struct tb_profile *profile, ifnet_set_start_cycle(ifp, NULL); } if (update && tbr->tbr_rate_raw != old_rate) - ifclassq_update(ifq, CLASSQ_EV_LINK_SPEED); + ifclassq_update(ifq, CLASSQ_EV_LINK_BANDWIDTH); return (0); } diff --git a/bsd/net/classq/classq_util.c b/bsd/net/classq/classq_util.c index e8bf3d5bd..9cc141265 100644 --- a/bsd/net/classq/classq_util.c +++ b/bsd/net/classq/classq_util.c @@ -82,6 +82,7 @@ #include +#if PF_ECN /* * read and write diffserv field in IPv4 or IPv6 header */ @@ -302,3 +303,4 @@ mark_ecn(struct mbuf *m, struct pf_mtag *t, int flags) /* not marked */ return (0); } +#endif /* PF_ECN */ diff --git a/bsd/net/classq/if_classq.h b/bsd/net/classq/if_classq.h index 9eb32d8c5..2c0da5f5e 100644 --- a/bsd/net/classq/if_classq.h +++ b/bsd/net/classq/if_classq.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012 Apple Inc. All rights reserved. + * Copyright (c) 2011-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -46,6 +46,7 @@ typedef enum cqrq { CLASSQRQ_PURGE_SC = 2, /* purge service class (and flow) */ CLASSQRQ_EVENT = 3, /* interface events */ CLASSQRQ_THROTTLE = 4, /* throttle packets */ + CLASSQRQ_STAT_SC = 5, /* get service class queue stats */ } cqrq_t; /* classq purge_sc request argument */ @@ -62,6 +63,13 @@ typedef struct cqrq_throttle { u_int32_t level; /* (in/out) throttling level */ } cqrq_throttle_t; +/* classq service class stats request argument */ +typedef struct cqrq_stat_sc { + mbuf_svc_class_t sc; /* (in) service class */ + u_int32_t packets; /* (out) packets enqueued */ + u_int32_t bytes; /* (out) bytes enqueued */ +} cqrq_stat_sc_t; + #if PF_ALTQ #include #endif /* PF_ALTQ */ @@ -185,10 +193,11 @@ struct ifclassq { /* interface event argument for CLASSQRQ_EVENT */ typedef enum cqev { - CLASSQ_EV_LINK_SPEED = 1, /* link speed has changed */ - CLASSQ_EV_LINK_MTU = 2, /* link MTU has changed */ - CLASSQ_EV_LINK_UP = 3, /* link is now up */ - CLASSQ_EV_LINK_DOWN = 4, /* link is now down */ + CLASSQ_EV_LINK_BANDWIDTH = 1, /* link bandwidth has changed */ + CLASSQ_EV_LINK_LATENCY = 2, /* link latency has changed */ + CLASSQ_EV_LINK_MTU = 3, /* link MTU has changed */ + CLASSQ_EV_LINK_UP = 4, /* link is now up */ + CLASSQ_EV_LINK_DOWN = 5, /* link is now down */ } cqev_t; #endif /* BSD_KERNEL_PRIVATE */ @@ -315,6 +324,15 @@ struct if_ifclassq_stats { (_level) = _req.level; \ } while (0) +#define IFCQ_LEN_SC(_ifq, _sc, _packets, _bytes, _err) do { \ + cqrq_stat_sc_t _req = { _sc, 0, 0 }; \ + (_err) = (*(ifq)->ifcq_request)(_ifq, CLASSQRQ_STAT_SC, &_req); \ + if ((_packets) != NULL) \ + (*(_packets)) = _req.packets; \ + if ((_bytes) != NULL) \ + (*(_bytes)) = _req.bytes; \ +} while (0) + #define IFCQ_LEN(_ifcq) ((_ifcq)->ifcq_len) #define IFCQ_QFULL(_ifcq) (IFCQ_LEN(_ifcq) >= (_ifcq)->ifcq_maxlen) #define IFCQ_IS_EMPTY(_ifcq) (IFCQ_LEN(_ifcq) == 0) @@ -336,7 +354,8 @@ extern void ifclassq_teardown(struct ifnet *); extern int ifclassq_pktsched_setup(struct ifclassq *); extern void ifclassq_set_maxlen(struct ifclassq *, u_int32_t); extern u_int32_t ifclassq_get_maxlen(struct ifclassq *); -extern u_int32_t ifclassq_get_len(struct ifclassq *); +extern int ifclassq_get_len(struct ifclassq *, mbuf_svc_class_t, + u_int32_t *, u_int32_t *); extern errno_t ifclassq_enqueue(struct ifclassq *, struct mbuf *); extern errno_t ifclassq_dequeue(struct ifclassq *, u_int32_t, struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *); diff --git a/bsd/net/dlil.c b/bsd/net/dlil.c index af80a4a40..ff2d31cf0 100644 --- a/bsd/net/dlil.c +++ b/bsd/net/dlil.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 1999-2012 Apple Inc. All rights reserved. + * Copyright (c) 1999-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -31,6 +31,7 @@ * is included in support of clause 2.2 (b) of the Apple Public License, * Version 2.0. */ +#include #include #include @@ -49,9 +50,13 @@ #include #include #include +#include #include #include #include +#include +#include +#include #include #include @@ -59,13 +64,15 @@ #include #include #include -#include +#include #include #include #include #include #include +#include +#include #if INET #include @@ -83,22 +90,24 @@ #include #include #include +#include #endif /* INET6 */ -#if NETAT -#include -#endif /* NETAT */ - #include +#include +#include #include #include #include -#if CONFIG_MACF_NET +#if CONFIG_MACF +#include #include -#endif /* MAC_NET */ +#include +#include +#endif #if PF #include @@ -114,7 +123,6 @@ #define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8)) #define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8)) - #define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */ #define MAX_LINKADDR 4 /* LONGWORDS */ #define M_NKE M_IFADDR @@ -190,6 +198,7 @@ struct dlil_ifnet { void *dl_if_uniqueid; /* unique interface id */ size_t dl_if_uniqueid_len; /* length of the unique id */ char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */ + char dl_if_xnamestorage[IFXNAMSIZ]; /* external name storage */ struct { struct ifaddr ifa; /* lladdr ifa */ u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */ @@ -228,6 +237,7 @@ struct dlil_ifnet_dbg { struct ifnet_filter { TAILQ_ENTRY(ifnet_filter) filt_next; u_int32_t filt_skip; + u_int32_t filt_flags; ifnet_t filt_ifp; const char *filt_name; void *filt_cookie; @@ -250,20 +260,7 @@ static lck_grp_t *ifnet_rcv_lock_group; lck_attr_t *ifnet_lock_attr; decl_lck_rw_data(static, ifnet_head_lock); decl_lck_mtx_data(static, dlil_ifnet_lock); -u_int32_t dlil_filter_count = 0; -extern u_int32_t ipv4_ll_arp_aware; - -struct sfb_fc_list ifnet_fclist; -decl_lck_mtx_data(static, ifnet_fclist_lock); - -static unsigned int ifnet_fcezone_size; /* size of ifnet_fce */ -static struct zone *ifnet_fcezone; /* zone for ifnet_fce */ - -#define IFNET_FCEZONE_MAX 32 /* maximum elements in zone */ -#define IFNET_FCEZONE_NAME "ifnet_fcezone" /* zone name */ - -static void ifnet_fc_thread_func(void *, wait_result_t); -static void ifnet_fc_init(void); +u_int32_t dlil_filter_disable_tso_count = 0; #if DEBUG static unsigned int ifnet_debug = 1; /* debugging (enabled) */ @@ -369,12 +366,10 @@ static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t, static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t); static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *); static errno_t ifp_if_framer(struct ifnet *, struct mbuf **, - const struct sockaddr *, const char *, const char * -#if CONFIG_EMBEDDED - , - u_int32_t *, u_int32_t * -#endif /* CONFIG_EMBEDDED */ - ); + const struct sockaddr *, const char *, const char *); +static errno_t ifp_if_framer_extended(struct ifnet *, struct mbuf **, + const struct sockaddr *, const char *, const char *, + u_int32_t *, u_int32_t *); static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func); static void ifp_if_free(struct ifnet *); static void ifp_if_event(struct ifnet *, const struct kev_msg *); @@ -384,7 +379,6 @@ static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *); static void dlil_main_input_thread_func(void *, wait_result_t); static void dlil_input_thread_func(void *, wait_result_t); static void dlil_rxpoll_input_thread_func(void *, wait_result_t); -static void dlil_rxpoll_calc_limits(struct dlil_threading_info *); static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *); static void dlil_terminate_input_thread(struct dlil_threading_info *); static void dlil_input_stats_add(const struct ifnet_stat_increment_param *, @@ -395,6 +389,14 @@ static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *, static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *, const struct ifnet_stat_increment_param *, boolean_t, boolean_t); +#if DEBUG +static void dlil_verify_sum16(void); +#endif /* DEBUG */ +static void dlil_output_cksum_dbg(struct ifnet *, struct mbuf *, uint32_t, + protocol_family_t); +static void dlil_input_cksum_dbg(struct ifnet *, struct mbuf *, char *, + protocol_family_t); + static void ifnet_detacher_thread_func(void *, wait_result_t); static int ifnet_detacher_thread_cont(int); static void ifnet_detach_final(struct ifnet *); @@ -413,14 +415,63 @@ static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *); #endif /* INET6 */ static int sysctl_rxpoll SYSCTL_HANDLER_ARGS; +static int sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS; +static int sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS; +static int sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS; +static int sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS; +static int sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS; static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS; static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS; +static int sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS; +static int sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS; +static int sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS; /* The following are protected by dlil_ifnet_lock */ static TAILQ_HEAD(, ifnet) ifnet_detaching_head; static u_int32_t ifnet_detaching_cnt; static void *ifnet_delayed_run; /* wait channel for detaching thread */ +decl_lck_mtx_data(static, ifnet_fc_lock); + +static uint32_t ifnet_flowhash_seed; + +struct ifnet_flowhash_key { + char ifk_name[IFNAMSIZ]; + uint32_t ifk_unit; + uint32_t ifk_flags; + uint32_t ifk_eflags; + uint32_t ifk_capabilities; + uint32_t ifk_capenable; + uint32_t ifk_output_sched_model; + uint32_t ifk_rand1; + uint32_t ifk_rand2; +}; + +/* Flow control entry per interface */ +struct ifnet_fc_entry { + RB_ENTRY(ifnet_fc_entry) ifce_entry; + u_int32_t ifce_flowhash; + struct ifnet *ifce_ifp; +}; + +static uint32_t ifnet_calc_flowhash(struct ifnet *); +static int ifce_cmp(const struct ifnet_fc_entry *, + const struct ifnet_fc_entry *); +static int ifnet_fc_add(struct ifnet *); +static struct ifnet_fc_entry *ifnet_fc_get(u_int32_t); +static void ifnet_fc_entry_free(struct ifnet_fc_entry *); + +/* protected by ifnet_fc_lock */ +RB_HEAD(ifnet_fc_tree, ifnet_fc_entry) ifnet_fc_tree; +RB_PROTOTYPE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp); +RB_GENERATE(ifnet_fc_tree, ifnet_fc_entry, ifce_entry, ifce_cmp); + +static unsigned int ifnet_fc_zone_size; /* sizeof ifnet_fc_entry */ +static struct zone *ifnet_fc_zone; /* ifnet_fc_entry zone */ + +#define IFNET_FC_ZONE_NAME "ifnet_fc_zone" +#define IFNET_FC_ZONE_MAX 32 + extern void bpfdetach(struct ifnet*); extern void proto_input_run(void); @@ -431,10 +482,14 @@ extern uint32_t tcp_count_opportunistic(unsigned int ifindex, __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *); +#if CONFIG_MACF +int dlil_lladdr_ckreq = 0; +#endif + #if DEBUG -static int dlil_verbose = 1; +int dlil_verbose = 1; #else -static int dlil_verbose = 0; +int dlil_verbose = 0; #endif /* DEBUG */ #if IFNET_INPUT_SANITY_CHK /* sanity checking of input packet lists received */ @@ -445,6 +500,12 @@ struct timespec dlil_dbgrate = { 1, 0 }; SYSCTL_DECL(_net_link_generic_system); +#if CONFIG_MACF +SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_lladdr_ckreq, + CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_lladdr_ckreq, 0, + "Require MACF system info check to expose link-layer address"); +#endif + SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose, CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages"); @@ -461,47 +522,55 @@ SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN, sysctl_rcvq_maxlen, "I", "Default receive queue max length"); -#define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */ +#define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */ static u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY; SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay, CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY, "ilog2 of EWMA decay rate of avg inbound packets"); -#define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */ +#define IF_RXPOLL_MODE_HOLDTIME_MIN (10ULL * 1000 * 1000) /* 10 ms */ +#define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */ static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME; -SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time, - CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime, - "input poll mode freeze time"); +SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime, + IF_RXPOLL_MODE_HOLDTIME, sysctl_rxpoll_mode_holdtime, + "Q", "input poll mode freeze time"); -#define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */ +#define IF_RXPOLL_SAMPLETIME_MIN (1ULL * 1000 * 1000) /* 1 ms */ +#define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */ static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME; -SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, rxpoll_sample_time, - CTLFLAG_RD | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime, - "input poll sampling time"); - -#define IF_RXPOLL_INTERVAL_TIME (1ULL * 1000 * 1000) /* 1 ms */ -static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVAL_TIME; -SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, rxpoll_interval_time, - CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time, - "input poll interval (time)"); - -#define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */ +SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_sample_time, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime, + IF_RXPOLL_SAMPLETIME, sysctl_rxpoll_sample_holdtime, + "Q", "input poll sampling time"); + +#define IF_RXPOLL_INTERVALTIME_MIN (1ULL * 1000) /* 1 us */ +#define IF_RXPOLL_INTERVALTIME (1ULL * 1000 * 1000) /* 1 ms */ +static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVALTIME; +SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_interval_time, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time, + IF_RXPOLL_INTERVALTIME, sysctl_rxpoll_interval_time, + "Q", "input poll interval (time)"); + +#define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */ static u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS; SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts, CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts, IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)"); -#define IF_RXPOLL_WLOWAT 5 +#define IF_RXPOLL_WLOWAT 10 static u_int32_t if_rxpoll_wlowat = IF_RXPOLL_WLOWAT; -SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat, - CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat, IF_RXPOLL_WLOWAT, - "input poll wakeup low watermark"); +SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat, + IF_RXPOLL_WLOWAT, sysctl_rxpoll_wlowat, + "I", "input poll wakeup low watermark"); -#define IF_RXPOLL_WHIWAT 100 +#define IF_RXPOLL_WHIWAT 100 static u_int32_t if_rxpoll_whiwat = IF_RXPOLL_WHIWAT; -SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat, - CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat, IF_RXPOLL_WHIWAT, - "input poll wakeup high watermark"); +SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat, + IF_RXPOLL_WHIWAT, sysctl_rxpoll_whiwat, + "I", "input poll wakeup high watermark"); static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */ SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max, @@ -537,6 +606,91 @@ SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory, CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1, "enable flow-advisory mechanism"); +static uint64_t hwcksum_in_invalidated = 0; +SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, + hwcksum_in_invalidated, CTLFLAG_RD | CTLFLAG_LOCKED, + &hwcksum_in_invalidated, "inbound packets with invalidated hardware cksum"); + +uint32_t hwcksum_dbg = 0; +SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_dbg, + CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg, 0, + "enable hardware cksum debugging"); + +#define HWCKSUM_DBG_PARTIAL_FORCED 0x1 /* forced partial checksum */ +#define HWCKSUM_DBG_PARTIAL_RXOFF_ADJ 0x2 /* adjust start offset */ +#define HWCKSUM_DBG_FINALIZE_FORCED 0x10 /* forced finalize */ +#define HWCKSUM_DBG_MASK \ + (HWCKSUM_DBG_PARTIAL_FORCED | HWCKSUM_DBG_PARTIAL_RXOFF_ADJ | \ + HWCKSUM_DBG_FINALIZE_FORCED) + +static uint32_t hwcksum_dbg_mode = 0; +SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_mode, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_mode, + 0, sysctl_hwcksum_dbg_mode, "I", "hardware cksum debugging mode"); + +static uint64_t hwcksum_dbg_partial_forced = 0; +SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, + hwcksum_dbg_partial_forced, CTLFLAG_RD | CTLFLAG_LOCKED, + &hwcksum_dbg_partial_forced, "packets forced using partial cksum"); + +static uint64_t hwcksum_dbg_partial_forced_bytes = 0; +SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, + hwcksum_dbg_partial_forced_bytes, CTLFLAG_RD | CTLFLAG_LOCKED, + &hwcksum_dbg_partial_forced_bytes, "bytes forced using partial cksum"); + +static uint32_t hwcksum_dbg_partial_rxoff_forced = 0; +SYSCTL_PROC(_net_link_generic_system, OID_AUTO, + hwcksum_dbg_partial_rxoff_forced, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + &hwcksum_dbg_partial_rxoff_forced, 0, + sysctl_hwcksum_dbg_partial_rxoff_forced, "I", + "forced partial cksum rx offset"); + +static uint32_t hwcksum_dbg_partial_rxoff_adj = 0; +SYSCTL_PROC(_net_link_generic_system, OID_AUTO, hwcksum_dbg_partial_rxoff_adj, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_dbg_partial_rxoff_adj, + 0, sysctl_hwcksum_dbg_partial_rxoff_adj, "I", + "adjusted partial cksum rx offset"); + +static uint64_t hwcksum_dbg_verified = 0; +SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, + hwcksum_dbg_verified, CTLFLAG_RD | CTLFLAG_LOCKED, + &hwcksum_dbg_verified, "packets verified for having good checksum"); + +static uint64_t hwcksum_dbg_bad_cksum = 0; +SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, + hwcksum_dbg_bad_cksum, CTLFLAG_RD | CTLFLAG_LOCKED, + &hwcksum_dbg_bad_cksum, "packets with bad hardware calculated checksum"); + +static uint64_t hwcksum_dbg_bad_rxoff = 0; +SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, + hwcksum_dbg_bad_rxoff, CTLFLAG_RD | CTLFLAG_LOCKED, + &hwcksum_dbg_bad_rxoff, "packets with invalid rxoff"); + +static uint64_t hwcksum_dbg_adjusted = 0; +SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, + hwcksum_dbg_adjusted, CTLFLAG_RD | CTLFLAG_LOCKED, + &hwcksum_dbg_adjusted, "packets with rxoff adjusted"); + +static uint64_t hwcksum_dbg_finalized_hdr = 0; +SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, + hwcksum_dbg_finalized_hdr, CTLFLAG_RD | CTLFLAG_LOCKED, + &hwcksum_dbg_finalized_hdr, "finalized headers"); + +static uint64_t hwcksum_dbg_finalized_data = 0; +SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, + hwcksum_dbg_finalized_data, CTLFLAG_RD | CTLFLAG_LOCKED, + &hwcksum_dbg_finalized_data, "finalized payloads"); + +uint32_t hwcksum_tx = 1; +SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_tx, + CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_tx, 0, + "enable transmit hardware checksum offload"); + +uint32_t hwcksum_rx = 1; +SYSCTL_UINT(_net_link_generic_system, OID_AUTO, hwcksum_rx, + CTLFLAG_RW | CTLFLAG_LOCKED, &hwcksum_rx, 0, + "enable receive hardware checksum offload"); + unsigned int net_rxpoll = 1; unsigned int net_affinity = 1; static kern_return_t dlil_affinity_set(struct thread *, u_int32_t); @@ -546,7 +700,6 @@ extern u_int32_t inject_buckets; static lck_grp_attr_t *dlil_grp_attributes = NULL; static lck_attr_t *dlil_lck_attributes = NULL; -#define PROTO_HASH_SLOTS 0x5 #define DLIL_INPUT_CHECK(m, ifp) { \ struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \ @@ -586,11 +739,7 @@ static struct rxpoll_time_tbl rxpoll_tbl[] = { { 0, 0, 0, 0, 0 } }; -/* - * Internal functions. - */ - -static int +int proto_hash_value(u_int32_t protocol_family) { /* @@ -603,13 +752,11 @@ proto_hash_value(u_int32_t protocol_family) return (0); case PF_INET6: return (1); - case PF_APPLETALK: - return (2); case PF_VLAN: - return (3); + return (2); case PF_UNSPEC: default: - return (4); + return (3); } } @@ -740,6 +887,26 @@ ifnet_lock_done(struct ifnet *ifp) lck_rw_done(&ifp->if_lock); } +#if INET6 +__private_extern__ void +if_inet6data_lock_shared(struct ifnet *ifp) +{ + lck_rw_lock_shared(&ifp->if_inet6data_lock); +} + +__private_extern__ void +if_inet6data_lock_exclusive(struct ifnet *ifp) +{ + lck_rw_lock_exclusive(&ifp->if_inet6data_lock); +} + +__private_extern__ void +if_inet6data_lock_done(struct ifnet *ifp) +{ + lck_rw_done(&ifp->if_inet6data_lock); +} +#endif + __private_extern__ void ifnet_head_lock_shared(void) { @@ -913,12 +1080,12 @@ dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp) func = dlil_rxpoll_input_thread_func; VERIFY(inp != dlil_main_input_thread); (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN, - "%s%d_input_poll", ifp->if_name, ifp->if_unit); + "%s_input_poll", if_name(ifp)); } else { func = dlil_input_thread_func; VERIFY(inp != dlil_main_input_thread); (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN, - "%s%d_input", ifp->if_name, ifp->if_unit); + "%s_input", if_name(ifp)); } VERIFY(inp->input_thr == THREAD_NULL); @@ -942,7 +1109,7 @@ dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp) */ if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) { limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN); - dlil_rxpoll_calc_limits(inp); + (void) dlil_rxpoll_set_params(ifp, NULL, FALSE); } else { limit = (u_int32_t)-1; } @@ -981,8 +1148,8 @@ dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp) panic_plain("%s: couldn't create main input thread", __func__); /* NOTREACHED */ } else { - panic_plain("%s: couldn't create %s%d input thread", __func__, - ifp->if_name, ifp->if_unit); + panic_plain("%s: couldn't create %s input thread", __func__, + if_name(ifp)); /* NOTREACHED */ } OSAddAtomic(1, &cur_dlil_input_threads); @@ -1034,8 +1201,8 @@ dlil_terminate_input_thread(struct dlil_threading_info *inp) #endif /* IFNET_INPUT_SANITY_CHK */ if (dlil_verbose) { - printf("%s%d: input thread terminated\n", - ifp->if_name, ifp->if_unit); + printf("%s: input thread terminated\n", + if_name(ifp)); } /* for the extra refcnt from kernel_thread_start() */ @@ -1077,6 +1244,9 @@ dlil_init(void) IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops); IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto); IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs); + IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes); + IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets); + IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes); IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets); IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors) @@ -1090,6 +1260,9 @@ dlil_init(void) IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops); IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto); IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs); + IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_dt_bytes); + IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fpackets); + IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_fbytes); /* * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts. @@ -1099,18 +1272,70 @@ dlil_init(void) _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP); _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT); _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT); - _CASSERT(IF_HWASSIST_CSUM_TCP_SUM16 == IFNET_CSUM_SUM16); + _CASSERT(IF_HWASSIST_CSUM_TCPIPV6 == IFNET_CSUM_TCPIPV6); + _CASSERT(IF_HWASSIST_CSUM_UDPIPV6 == IFNET_CSUM_UDPIPV6); + _CASSERT(IF_HWASSIST_CSUM_FRAGMENT_IPV6 == IFNET_IPV6_FRAGMENT); + _CASSERT(IF_HWASSIST_CSUM_PARTIAL == IFNET_CSUM_PARTIAL); _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING); _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU); _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4); _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6); + /* + * ... as well as the mbuf checksum flags counterparts. + */ + _CASSERT(CSUM_IP == IF_HWASSIST_CSUM_IP); + _CASSERT(CSUM_TCP == IF_HWASSIST_CSUM_TCP); + _CASSERT(CSUM_UDP == IF_HWASSIST_CSUM_UDP); + _CASSERT(CSUM_IP_FRAGS == IF_HWASSIST_CSUM_IP_FRAGS); + _CASSERT(CSUM_FRAGMENT == IF_HWASSIST_CSUM_FRAGMENT); + _CASSERT(CSUM_TCPIPV6 == IF_HWASSIST_CSUM_TCPIPV6); + _CASSERT(CSUM_UDPIPV6 == IF_HWASSIST_CSUM_UDPIPV6); + _CASSERT(CSUM_FRAGMENT_IPV6 == IF_HWASSIST_CSUM_FRAGMENT_IPV6); + _CASSERT(CSUM_PARTIAL == IF_HWASSIST_CSUM_PARTIAL); + _CASSERT(CSUM_VLAN_TAG_VALID == IF_HWASSIST_VLAN_TAGGING); + /* * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info. */ _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN); _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN); + _CASSERT(IFRLOGF_DLIL == IFNET_LOGF_DLIL); + _CASSERT(IFRLOGF_FAMILY == IFNET_LOGF_FAMILY); + _CASSERT(IFRLOGF_DRIVER == IFNET_LOGF_DRIVER); + _CASSERT(IFRLOGF_FIRMWARE == IFNET_LOGF_FIRMWARE); + + _CASSERT(IFRLOGCAT_CONNECTIVITY == IFNET_LOGCAT_CONNECTIVITY); + _CASSERT(IFRLOGCAT_QUALITY == IFNET_LOGCAT_QUALITY); + _CASSERT(IFRLOGCAT_PERFORMANCE == IFNET_LOGCAT_PERFORMANCE); + + _CASSERT(IFRTYPE_FAMILY_ANY == IFNET_FAMILY_ANY); + _CASSERT(IFRTYPE_FAMILY_LOOPBACK == IFNET_FAMILY_LOOPBACK); + _CASSERT(IFRTYPE_FAMILY_ETHERNET == IFNET_FAMILY_ETHERNET); + _CASSERT(IFRTYPE_FAMILY_SLIP == IFNET_FAMILY_SLIP); + _CASSERT(IFRTYPE_FAMILY_TUN == IFNET_FAMILY_TUN); + _CASSERT(IFRTYPE_FAMILY_VLAN == IFNET_FAMILY_VLAN); + _CASSERT(IFRTYPE_FAMILY_PPP == IFNET_FAMILY_PPP); + _CASSERT(IFRTYPE_FAMILY_PVC == IFNET_FAMILY_PVC); + _CASSERT(IFRTYPE_FAMILY_DISC == IFNET_FAMILY_DISC); + _CASSERT(IFRTYPE_FAMILY_MDECAP == IFNET_FAMILY_MDECAP); + _CASSERT(IFRTYPE_FAMILY_GIF == IFNET_FAMILY_GIF); + _CASSERT(IFRTYPE_FAMILY_FAITH == IFNET_FAMILY_FAITH); + _CASSERT(IFRTYPE_FAMILY_STF == IFNET_FAMILY_STF); + _CASSERT(IFRTYPE_FAMILY_FIREWIRE == IFNET_FAMILY_FIREWIRE); + _CASSERT(IFRTYPE_FAMILY_BOND == IFNET_FAMILY_BOND); + _CASSERT(IFRTYPE_FAMILY_CELLULAR == IFNET_FAMILY_CELLULAR); + + _CASSERT(IFRTYPE_SUBFAMILY_ANY == IFNET_SUBFAMILY_ANY); + _CASSERT(IFRTYPE_SUBFAMILY_USB == IFNET_SUBFAMILY_USB); + _CASSERT(IFRTYPE_SUBFAMILY_BLUETOOTH == IFNET_SUBFAMILY_BLUETOOTH); + _CASSERT(IFRTYPE_SUBFAMILY_WIFI == IFNET_SUBFAMILY_WIFI); + _CASSERT(IFRTYPE_SUBFAMILY_THUNDERBOLT == IFNET_SUBFAMILY_THUNDERBOLT); + + _CASSERT(DLIL_MODIDLEN == IFNET_MODIDLEN); + _CASSERT(DLIL_MODARGLEN == IFNET_MODARGLEN); + PE_parse_boot_argn("net_affinity", &net_affinity, sizeof (net_affinity)); @@ -1231,12 +1456,45 @@ dlil_init(void) dlil_lck_attributes); lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes); - ifnet_fc_init(); + /* Setup interface flow control related items */ + lck_mtx_init(&ifnet_fc_lock, dlil_lock_group, dlil_lck_attributes); - lck_attr_free(dlil_lck_attributes); - dlil_lck_attributes = NULL; + ifnet_fc_zone_size = sizeof (struct ifnet_fc_entry); + ifnet_fc_zone = zinit(ifnet_fc_zone_size, + IFNET_FC_ZONE_MAX * ifnet_fc_zone_size, 0, IFNET_FC_ZONE_NAME); + if (ifnet_fc_zone == NULL) { + panic_plain("%s: failed allocating %s", __func__, + IFNET_FC_ZONE_NAME); + /* NOTREACHED */ + } + zone_change(ifnet_fc_zone, Z_EXPAND, TRUE); + zone_change(ifnet_fc_zone, Z_CALLERACCT, FALSE); + /* Initialize interface address subsystem */ ifa_init(); + +#if PF + /* Initialize the packet filter */ + pfinit(); +#endif /* PF */ + + /* Initialize queue algorithms */ + classq_init(); + + /* Initialize packet schedulers */ + pktsched_init(); + + /* Initialize flow advisory subsystem */ + flowadv_init(); + + /* Initialize the pktap virtual interface */ + pktap_init(); + +#if DEBUG + /* Run self-tests */ + dlil_verify_sum16(); +#endif /* DEBUG */ + /* * Create and start up the main DLIL input thread and the interface * detacher threads once everything is initialized. @@ -1249,17 +1507,6 @@ dlil_init(void) /* NOTREACHED */ } thread_deallocate(thread); - -#if PF - /* Initialize the packet filter */ - pfinit(); -#endif /* PF */ - - /* Initialize queue algorithms */ - classq_init(); - - /* Initialize packet schedulers */ - pktsched_init(); } static void @@ -1306,7 +1553,7 @@ if_flt_monitor_leave(struct ifnet *ifp) __private_extern__ int dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter, - interface_filter_t *filter_ref) + interface_filter_t *filter_ref, u_int32_t flags) { int retval = 0; struct ifnet_filter *filter = NULL; @@ -1326,6 +1573,7 @@ dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter, bzero(filter, dlif_filt_size); /* refcnt held above during lookup */ + filter->filt_flags = flags; filter->filt_ifp = ifp; filter->filt_cookie = if_filter->iff_cookie; filter->filt_name = if_filter->iff_name; @@ -1351,19 +1599,19 @@ dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter, * Bump filter count and route_generation ID to let TCP * know it shouldn't do TSO on this connection */ - OSAddAtomic(1, &dlil_filter_count); - if (use_routegenid) + if ((filter->filt_flags & DLIL_IFF_TSO) == 0) { + OSAddAtomic(1, &dlil_filter_disable_tso_count); routegenid_update(); - + } if (dlil_verbose) { - printf("%s%d: %s filter attached\n", ifp->if_name, - ifp->if_unit, if_filter->iff_name); + printf("%s: %s filter attached\n", if_name(ifp), + if_filter->iff_name); } done: ifnet_head_done(); if (retval != 0 && ifp != NULL) { - DLIL_PRINTF("%s%d: failed to attach %s (err=%d)\n", - ifp->if_name, ifp->if_unit, if_filter->iff_name, retval); + DLIL_PRINTF("%s: failed to attach %s (err=%d)\n", + if_name(ifp), if_filter->iff_name, retval); } if (retval != 0 && filter != NULL) zfree(dlif_filt_zone, filter); @@ -1410,9 +1658,8 @@ dlil_detach_filter_internal(interface_filter_t filter, int detached) if_flt_monitor_leave(ifp); lck_mtx_unlock(&ifp->if_flt_lock); if (dlil_verbose) { - printf("%s%d: %s filter detached\n", - ifp->if_name, ifp->if_unit, - filter->filt_name); + printf("%s: %s filter detached\n", + if_name(ifp), filter->filt_name); } goto destroy; } @@ -1441,10 +1688,10 @@ destroy: * Decrease filter count and route_generation ID to let TCP * know it should reevalute doing TSO or not */ - OSAddAtomic(-1, &dlil_filter_count); - if (use_routegenid) + if ((filter->filt_flags & DLIL_IFF_TSO) == 0) { + OSAddAtomic(-1, &dlil_filter_disable_tso_count); routegenid_update(); - + } done: if (retval != 0) { DLIL_PRINTF("failed to detach %s filter (err=%d)\n", @@ -1509,7 +1756,7 @@ dlil_main_input_thread_func(void *v, wait_result_t w) m_cnt = qlen(&inp->rcvq_pkts); m = _getq_all(&inp->rcvq_pkts); - /* Packets exclusive for lo0 */ + /* Packets exclusive to lo0 */ m_cnt_loop = qlen(&inpm->lo_rcvq_pkts); m_loop = _getq_all(&inpm->lo_rcvq_pkts); @@ -1634,21 +1881,24 @@ dlil_rxpoll_input_thread_func(void *v, wait_result_t w) u_int32_t m_cnt, m_size, poll_req = 0; ifnet_model_t mode; struct timespec now, delta; + u_int64_t ival; lck_mtx_lock_spin(&inp->input_lck); + if ((ival = inp->rxpoll_ival) < IF_RXPOLL_INTERVALTIME_MIN) + ival = IF_RXPOLL_INTERVALTIME_MIN; + /* Link parameters changed? */ if (ifp->if_poll_update != 0) { ifp->if_poll_update = 0; - dlil_rxpoll_calc_limits(inp); + (void) dlil_rxpoll_set_params(ifp, NULL, TRUE); } /* Current operating mode */ mode = inp->mode; /* Wait until there is work to be done */ - while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING) && - qempty(&inp->rcvq_pkts)) { + while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) { inp->input_waiting &= ~DLIL_INPUT_RUNNING; (void) msleep(&inp->input_waiting, &inp->input_lck, (PZERO - 1) | PSPIN, inp->input_name, NULL); @@ -1736,11 +1986,11 @@ dlil_rxpoll_input_thread_func(void *v, wait_result_t w) net_timersub(&now, &inp->dbg_lasttime, &delta); if (net_timercmp(&delta, &dlil_dbgrate, >=)) { *(&inp->dbg_lasttime) = *(&now); - printf("%s%d: [%s] pkts avg %d max %d " + printf("%s: [%s] pkts avg %d max %d " "limits [%d/%d], wreq avg %d " "limits [%d/%d], bytes avg %d " - "limits [%d/%d]\n", ifp->if_name, - ifp->if_unit, (inp->mode == + "limits [%d/%d]\n", if_name(ifp), + (inp->mode == IFNET_MODEL_INPUT_POLL_ON) ? "ON" : "OFF", inp->rxpoll_pavg, inp->rxpoll_pmax, @@ -1765,7 +2015,6 @@ dlil_rxpoll_input_thread_func(void *v, wait_result_t w) if (inp->rxpoll_pavg <= inp->rxpoll_plowat && inp->rxpoll_bavg <= inp->rxpoll_blowat && - inp->rxpoll_wavg <= inp->rxpoll_wlowat && inp->mode != IFNET_MODEL_INPUT_POLL_OFF) { mode = IFNET_MODEL_INPUT_POLL_OFF; } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat && @@ -1797,11 +2046,11 @@ skip: errno_t err; if (dlil_verbose) { - printf("%s%d: polling is now %s, " + printf("%s: polling is now %s, " "pkts avg %d max %d limits [%d/%d], " "wreq avg %d limits [%d/%d], " "bytes avg %d limits [%d/%d]\n", - ifp->if_name, ifp->if_unit, + if_name(ifp), (mode == IFNET_MODEL_INPUT_POLL_ON) ? "ON" : "OFF", inp->rxpoll_pavg, inp->rxpoll_pmax, inp->rxpoll_plowat, @@ -1813,8 +2062,8 @@ skip: if ((err = ((*ifp->if_input_ctl)(ifp, IFNET_CTL_SET_INPUT_MODEL, sizeof (p), &p))) != 0) { - printf("%s%d: error setting polling mode " - "to %s (%d)\n", ifp->if_name, ifp->if_unit, + printf("%s: error setting polling mode " + "to %s (%d)\n", if_name(ifp), (mode == IFNET_MODEL_INPUT_POLL_ON) ? "ON" : "OFF", err); } @@ -1828,7 +2077,7 @@ skip: break; case IFNET_MODEL_INPUT_POLL_ON: - net_nsectimer(&if_rxpoll_interval_time, &ts); + net_nsectimer(&ival, &ts); ifnet_set_poll_cycle(ifp, &ts); ifnet_poll(ifp); inp->rxpoll_onreq++; @@ -1858,50 +2107,145 @@ skip: VERIFY(0); /* we should never get here */ } -static void -dlil_rxpoll_calc_limits(struct dlil_threading_info *inp) +/* + * Must be called on an attached ifnet (caller is expected to check.) + * Caller may pass NULL for poll parameters to indicate "auto-tuning." + */ +errno_t +dlil_rxpoll_set_params(struct ifnet *ifp, struct ifnet_poll_params *p, + boolean_t locked) { - struct ifnet *ifp = inp->ifp; + struct dlil_threading_info *inp; u_int64_t sample_holdtime, inbw; - VERIFY(inp != dlil_main_input_thread); - VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL)); + VERIFY(ifp != NULL); + if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) + return (ENXIO); + + if (p != NULL) { + if ((p->packets_lowat == 0 && p->packets_hiwat != 0) || + (p->packets_lowat != 0 && p->packets_hiwat == 0)) + return (EINVAL); + if (p->packets_lowat != 0 && /* hiwat must be non-zero */ + p->packets_lowat >= p->packets_hiwat) + return (EINVAL); + if ((p->bytes_lowat == 0 && p->bytes_hiwat != 0) || + (p->bytes_lowat != 0 && p->bytes_hiwat == 0)) + return (EINVAL); + if (p->bytes_lowat != 0 && /* hiwat must be non-zero */ + p->bytes_lowat >= p->bytes_hiwat) + return (EINVAL); + if (p->interval_time != 0 && + p->interval_time < IF_RXPOLL_INTERVALTIME_MIN) + p->interval_time = IF_RXPOLL_INTERVALTIME_MIN; + } + + if (!locked) + lck_mtx_lock(&inp->input_lck); + + lck_mtx_assert(&inp->input_lck, LCK_MTX_ASSERT_OWNED); + + /* + * Normally, we'd reset the parameters to the auto-tuned values + * if the the input thread detects a change in link rate. If the + * driver provides its own parameters right after a link rate + * changes, but before the input thread gets to run, we want to + * make sure to keep the driver's values. Clearing if_poll_update + * will achieve that. + */ + if (p != NULL && !locked && ifp->if_poll_update != 0) + ifp->if_poll_update = 0; - if ((inbw = ifnet_input_linkrate(ifp)) == 0) { + if ((inbw = ifnet_input_linkrate(ifp)) == 0 && p == NULL) { sample_holdtime = 0; /* polling is disabled */ inp->rxpoll_wlowat = inp->rxpoll_plowat = inp->rxpoll_blowat = 0; inp->rxpoll_whiwat = inp->rxpoll_phiwat = inp->rxpoll_bhiwat = (u_int32_t)-1; + inp->rxpoll_plim = 0; + inp->rxpoll_ival = IF_RXPOLL_INTERVALTIME_MIN; } else { + u_int32_t plowat, phiwat, blowat, bhiwat, plim; + u_int64_t ival; unsigned int n, i; - n = 0; - for (i = 0; rxpoll_tbl[i].speed != 0; i++) { + for (n = 0, i = 0; rxpoll_tbl[i].speed != 0; i++) { if (inbw < rxpoll_tbl[i].speed) break; n = i; } + /* auto-tune if caller didn't specify a value */ + plowat = ((p == NULL || p->packets_lowat == 0) ? + rxpoll_tbl[n].plowat : p->packets_lowat); + phiwat = ((p == NULL || p->packets_hiwat == 0) ? + rxpoll_tbl[n].phiwat : p->packets_hiwat); + blowat = ((p == NULL || p->bytes_lowat == 0) ? + rxpoll_tbl[n].blowat : p->bytes_lowat); + bhiwat = ((p == NULL || p->bytes_hiwat == 0) ? + rxpoll_tbl[n].bhiwat : p->bytes_hiwat); + plim = ((p == NULL || p->packets_limit == 0) ? + if_rxpoll_max : p->packets_limit); + ival = ((p == NULL || p->interval_time == 0) ? + if_rxpoll_interval_time : p->interval_time); + + VERIFY(plowat != 0 && phiwat != 0); + VERIFY(blowat != 0 && bhiwat != 0); + VERIFY(ival >= IF_RXPOLL_INTERVALTIME_MIN); + sample_holdtime = if_rxpoll_sample_holdtime; inp->rxpoll_wlowat = if_rxpoll_wlowat; inp->rxpoll_whiwat = if_rxpoll_whiwat; - inp->rxpoll_plowat = rxpoll_tbl[n].plowat; - inp->rxpoll_phiwat = rxpoll_tbl[n].phiwat; - inp->rxpoll_blowat = rxpoll_tbl[n].blowat; - inp->rxpoll_bhiwat = rxpoll_tbl[n].bhiwat; + inp->rxpoll_plowat = plowat; + inp->rxpoll_phiwat = phiwat; + inp->rxpoll_blowat = blowat; + inp->rxpoll_bhiwat = bhiwat; + inp->rxpoll_plim = plim; + inp->rxpoll_ival = ival; } net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime); net_nsectimer(&sample_holdtime, &inp->sample_holdtime); if (dlil_verbose) { - printf("%s%d: speed %llu bps, sample per %llu nsec, " - "pkt limits [%d/%d], wreq limits [%d/%d], " - "bytes limits [%d/%d]\n", ifp->if_name, ifp->if_unit, - inbw, sample_holdtime, inp->rxpoll_plowat, - inp->rxpoll_phiwat, inp->rxpoll_wlowat, inp->rxpoll_whiwat, - inp->rxpoll_blowat, inp->rxpoll_bhiwat); + printf("%s: speed %llu bps, sample per %llu nsec, " + "poll interval %llu nsec, pkts per poll %u, " + "pkt limits [%u/%u], wreq limits [%u/%u], " + "bytes limits [%u/%u]\n", if_name(ifp), + inbw, sample_holdtime, inp->rxpoll_ival, inp->rxpoll_plim, + inp->rxpoll_plowat, inp->rxpoll_phiwat, inp->rxpoll_wlowat, + inp->rxpoll_whiwat, inp->rxpoll_blowat, inp->rxpoll_bhiwat); } + + if (!locked) + lck_mtx_unlock(&inp->input_lck); + + return (0); +} + +/* + * Must be called on an attached ifnet (caller is expected to check.) + */ +errno_t +dlil_rxpoll_get_params(struct ifnet *ifp, struct ifnet_poll_params *p) +{ + struct dlil_threading_info *inp; + + VERIFY(ifp != NULL && p != NULL); + if (!(ifp->if_eflags & IFEF_RXPOLL) || (inp = ifp->if_inp) == NULL) + return (ENXIO); + + bzero(p, sizeof (*p)); + + lck_mtx_lock(&inp->input_lck); + p->packets_limit = inp->rxpoll_plim; + p->packets_lowat = inp->rxpoll_plowat; + p->packets_hiwat = inp->rxpoll_phiwat; + p->bytes_lowat = inp->rxpoll_blowat; + p->bytes_hiwat = inp->rxpoll_bhiwat; + p->interval_time = inp->rxpoll_ival; + lck_mtx_unlock(&inp->input_lck); + + return (0); } errno_t @@ -1927,24 +2271,30 @@ ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, struct dlil_threading_info *inp; u_int32_t m_cnt = 0, m_size = 0; + if ((m_head == NULL && !poll) || (s == NULL && ext)) { + if (m_head != NULL) + mbuf_freem_list(m_head); + return (EINVAL); + } + + VERIFY(m_head != NULL || (s == NULL && m_tail == NULL && !ext && poll)); + VERIFY(m_tail == NULL || ext); + VERIFY(s != NULL || !ext); + /* * Drop the packet(s) if the parameters are invalid, or if the * interface is no longer attached; else hold an IO refcnt to * prevent it from being detached (will be released below.) */ - if (ifp == NULL || m_head == NULL || (s == NULL && ext) || - (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) { + if (ifp == NULL || (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) { if (m_head != NULL) mbuf_freem_list(m_head); return (EINVAL); } - VERIFY(m_tail == NULL || ext); - VERIFY(s != NULL || !ext); - if (m_tail == NULL) { last = m_head; - while (1) { + while (m_head != NULL) { #if IFNET_INPUT_SANITY_CHK if (dlil_input_sanity_check != 0) DLIL_INPUT_CHECK(last, ifp); @@ -1981,9 +2331,9 @@ ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, } if (last != m_tail) { - panic_plain("%s: invalid input packet chain for %s%d, " - "tail mbuf %p instead of %p\n", __func__, ifp->if_name, - ifp->if_unit, m_tail, last); + panic_plain("%s: invalid input packet chain for %s, " + "tail mbuf %p instead of %p\n", __func__, if_name(ifp), + m_tail, last); } /* @@ -1994,9 +2344,9 @@ ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, * m_size is just an approximation. */ if (ext && s->packets_in != m_cnt) { - panic_plain("%s: input packet count mismatch for %s%d, " - "%d instead of %d\n", __func__, ifp->if_name, - ifp->if_unit, s->packets_in, m_cnt); + panic_plain("%s: input packet count mismatch for %s, " + "%d instead of %d\n", __func__, if_name(ifp), + s->packets_in, m_cnt); } if ((inp = ifp->if_inp) == NULL) @@ -2034,6 +2384,8 @@ ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, lck_mtx_lock_spin(&inp->input_lck); } + VERIFY(m_head != NULL || (m_tail == NULL && m_cnt == 0)); + /* * Because of loopbacked multicast we cannot stuff the ifp in * the rcvif of the packet header: loopback (lo0) packets use a @@ -2041,12 +2393,16 @@ ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, * on their way up the stack. Packets for other interfaces without * dedicated input threads go to the regular list. */ - if (inp == dlil_main_input_thread && ifp == lo_ifp) { - struct dlil_main_threading_info *inpm = - (struct dlil_main_threading_info *)inp; - _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail, m_cnt, m_size); - } else { - _addq_multi(&inp->rcvq_pkts, m_head, m_tail, m_cnt, m_size); + if (m_head != NULL) { + if (inp == dlil_main_input_thread && ifp == lo_ifp) { + struct dlil_main_threading_info *inpm = + (struct dlil_main_threading_info *)inp; + _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail, + m_cnt, m_size); + } else { + _addq_multi(&inp->rcvq_pkts, m_head, m_tail, + m_cnt, m_size); + } } #if IFNET_INPUT_SANITY_CHK @@ -2058,8 +2414,8 @@ ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, count++; if (count != m_cnt) { - panic_plain("%s%d: invalid packet count %d " - "(expected %d)\n", ifp->if_name, ifp->if_unit, + panic_plain("%s: invalid packet count %d " + "(expected %d)\n", if_name(ifp), count, m_cnt); /* NOTREACHED */ } @@ -2095,13 +2451,24 @@ ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, return (0); } -void -ifnet_start(struct ifnet *ifp) +static void +ifnet_start_common(struct ifnet *ifp, int resetfc) { + if (!(ifp->if_eflags & IFEF_TXSTART)) + return; /* - * If the starter thread is inactive, signal it to do work. + * If the starter thread is inactive, signal it to do work, + * unless the interface is being flow controlled from below, + * e.g. a virtual interface being flow controlled by a real + * network interface beneath it. */ lck_mtx_lock_spin(&ifp->if_start_lock); + if (resetfc) { + ifp->if_start_flags &= ~IFSF_FLOW_CONTROLLED; + } else if (ifp->if_start_flags & IFSF_FLOW_CONTROLLED) { + lck_mtx_unlock(&ifp->if_start_lock); + return; + } ifp->if_start_req++; if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL) { wakeup_one((caddr_t)&ifp->if_start_thread); @@ -2109,6 +2476,12 @@ ifnet_start(struct ifnet *ifp) lck_mtx_unlock(&ifp->if_start_lock); } +void +ifnet_start(struct ifnet *ifp) +{ + ifnet_start_common(ifp, 0); +} + static void ifnet_start_thread_fn(void *v, wait_result_t w) { @@ -2145,8 +2518,8 @@ ifnet_start_thread_fn(void *v, wait_result_t w) } } - snprintf(ifname, sizeof (ifname), "%s%d_starter", - ifp->if_name, ifp->if_unit); + snprintf(ifname, sizeof (ifname), "%s_starter", + if_name(ifp)); lck_mtx_lock_spin(&ifp->if_start_lock); @@ -2161,8 +2534,8 @@ ifnet_start_thread_fn(void *v, wait_result_t w) ifnet_purge(ifp); if (dlil_verbose) { - printf("%s%d: starter thread terminated\n", - ifp->if_name, ifp->if_unit); + printf("%s: starter thread terminated\n", + if_name(ifp)); } /* for the extra refcnt from kernel_thread_start() */ @@ -2215,8 +2588,8 @@ ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts) *(&ifp->if_start_cycle) = *ts; if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) - printf("%s%d: restart interval set to %lu nsec\n", - ifp->if_name, ifp->if_unit, ts->tv_nsec); + printf("%s: restart interval set to %lu nsec\n", + if_name(ifp), ts->tv_nsec); } static void @@ -2243,8 +2616,8 @@ ifnet_poll_thread_fn(void *v, wait_result_t w) struct timespec *ts = NULL; struct ifnet_stat_increment_param s; - snprintf(ifname, sizeof (ifname), "%s%d_poller", - ifp->if_name, ifp->if_unit); + snprintf(ifname, sizeof (ifname), "%s_poller", + if_name(ifp)); bzero(&s, sizeof (s)); lck_mtx_lock_spin(&ifp->if_poll_lock); @@ -2264,8 +2637,8 @@ ifnet_poll_thread_fn(void *v, wait_result_t w) lck_mtx_unlock(&ifp->if_poll_lock); if (dlil_verbose) { - printf("%s%d: poller thread terminated\n", - ifp->if_name, ifp->if_unit); + printf("%s: poller thread terminated\n", + if_name(ifp)); } /* for the extra refcnt from kernel_thread_start() */ @@ -2294,15 +2667,15 @@ ifnet_poll_thread_fn(void *v, wait_result_t w) break; } - m_lim = (if_rxpoll_max != 0) ? if_rxpoll_max : + m_lim = (inp->rxpoll_plim != 0) ? inp->rxpoll_plim : MAX((qlimit(&inp->rcvq_pkts)), (inp->rxpoll_phiwat << 2)); if (dlil_verbose > 1) { - printf("%s%d: polling up to %d pkts, " + printf("%s: polling up to %d pkts, " "pkts avg %d max %d, wreq avg %d, " "bytes avg %d\n", - ifp->if_name, ifp->if_unit, m_lim, + if_name(ifp), m_lim, inp->rxpoll_pavg, inp->rxpoll_pmax, inp->rxpoll_wavg, inp->rxpoll_bavg); } @@ -2315,10 +2688,10 @@ ifnet_poll_thread_fn(void *v, wait_result_t w) VERIFY(m_tail != NULL && m_cnt > 0); if (dlil_verbose > 1) { - printf("%s%d: polled %d pkts, " + printf("%s: polled %d pkts, " "pkts avg %d max %d, wreq avg %d, " "bytes avg %d\n", - ifp->if_name, ifp->if_unit, m_cnt, + if_name(ifp), m_cnt, inp->rxpoll_pavg, inp->rxpoll_pmax, inp->rxpoll_wavg, inp->rxpoll_bavg); } @@ -2329,12 +2702,18 @@ ifnet_poll_thread_fn(void *v, wait_result_t w) (void) ifnet_input_common(ifp, m_head, m_tail, &s, TRUE, TRUE); - } else if (dlil_verbose > 1) { - printf("%s%d: no packets, pkts avg %d max %d, " - "wreq avg %d, bytes avg %d\n", ifp->if_name, - ifp->if_unit, inp->rxpoll_pavg, - inp->rxpoll_pmax, inp->rxpoll_wavg, - inp->rxpoll_bavg); + } else { + if (dlil_verbose > 1) { + printf("%s: no packets, " + "pkts avg %d max %d, wreq avg %d, " + "bytes avg %d\n", + if_name(ifp), inp->rxpoll_pavg, + inp->rxpoll_pmax, inp->rxpoll_wavg, + inp->rxpoll_bavg); + } + + (void) ifnet_input_common(ifp, NULL, NULL, + NULL, FALSE, TRUE); } /* Release the io ref count */ @@ -2372,8 +2751,8 @@ ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts) *(&ifp->if_poll_cycle) = *ts; if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) - printf("%s%d: poll interval set to %lu nsec\n", - ifp->if_name, ifp->if_unit, ts->tv_nsec); + printf("%s: poll interval set to %lu nsec\n", + if_name(ifp), ts->tv_nsec); } void @@ -2404,7 +2783,7 @@ void ifnet_update_rcv(struct ifnet *ifp, cqev_t ev) { switch (ev) { - case CLASSQ_EV_LINK_SPEED: + case CLASSQ_EV_LINK_BANDWIDTH: if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) ifp->if_poll_update++; break; @@ -2465,16 +2844,36 @@ ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen) } errno_t -ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *qlen) +ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *pkts) { - if (ifp == NULL || qlen == NULL) - return (EINVAL); + errno_t err; + + if (ifp == NULL || pkts == NULL) + err = EINVAL; else if (!(ifp->if_eflags & IFEF_TXSTART)) - return (ENXIO); + err = ENXIO; + else + err = ifclassq_get_len(&ifp->if_snd, MBUF_SC_UNSPEC, + pkts, NULL); - *qlen = ifclassq_get_len(&ifp->if_snd); + return (err); +} - return (0); +errno_t +ifnet_get_service_class_sndq_len(struct ifnet *ifp, mbuf_svc_class_t sc, + u_int32_t *pkts, u_int32_t *bytes) +{ + errno_t err; + + if (ifp == NULL || !MBUF_VALID_SC(sc) || + (pkts == NULL && bytes == NULL)) + err = EINVAL; + else if (!(ifp->if_eflags & IFEF_TXSTART)) + err = ENXIO; + else + err = ifclassq_get_len(&ifp->if_snd, sc, pkts, bytes); + + return (err); } errno_t @@ -2535,7 +2934,6 @@ ifnet_enqueue(struct ifnet *ifp, struct mbuf *m) } else if (!(ifp->if_flags & IFF_UP)) { m_freem(m); return (ENETDOWN); - } /* enqueue the packet */ @@ -2606,6 +3004,19 @@ ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc, tail, cnt, len)); } +errno_t +ifnet_framer_stub(struct ifnet *ifp, struct mbuf **m, + const struct sockaddr *dest, const char *dest_linkaddr, + const char *frame_type, u_int32_t *pre, u_int32_t *post) +{ + if (pre != NULL) + *pre = 0; + if (post != NULL) + *post = 0; + + return (ifp->if_framer_legacy(ifp, m, dest, dest_linkaddr, frame_type)); +} + static int dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p, char **frame_header_p, protocol_family_t protocol_family) @@ -2704,8 +3115,8 @@ dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m) next_packet = m->m_nextpkt; m->m_nextpkt = NULL; - frame_header = m->m_pkthdr.header; - m->m_pkthdr.header = NULL; + frame_header = m->m_pkthdr.pkt_hdr; + m->m_pkthdr.pkt_hdr = NULL; error = (*ifproto->kpi.v1.input)(ifproto->ifp, ifproto->protocol_family, m, frame_header); if (error != 0 && error != EJUSTRETURN) @@ -2794,7 +3205,15 @@ dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp) atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped); s->dropped = 0; } - + /* + * If we went over the threshold, notify NetworkStatistics. + */ + if (ifp->if_data_threshold && + (ifp->if_ibytes + ifp->if_obytes) - ifp->if_dt_bytes > + ifp->if_data_threshold) { + ifp->if_dt_bytes = ifp->if_ibytes + ifp->if_obytes; + nstat_ifnet_threshold_reached(ifp->if_index); + } /* * No need for atomic operations as they are modified here * only from within the DLIL input thread context. @@ -2846,6 +3265,7 @@ dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m, while (m != NULL) { struct if_proto *ifproto = NULL; int iorefcnt = 0; + uint32_t pktf_mask; /* pkt flags to preserve */ if (ifp_param == NULL) ifp = m->m_pkthdr.rcvif; @@ -2859,8 +3279,8 @@ dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m, next_packet = m->m_nextpkt; m->m_nextpkt = NULL; - frame_header = m->m_pkthdr.header; - m->m_pkthdr.header = NULL; + frame_header = m->m_pkthdr.pkt_hdr; + m->m_pkthdr.pkt_hdr = NULL; /* * Get an IO reference count if the interface is not @@ -2873,8 +3293,19 @@ dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m, goto next; } iorefcnt = 1; + pktf_mask = 0; + } else { + /* + * If this arrived on lo0, preserve interface addr + * info to allow for connectivity between loopback + * and local interface addresses. + */ + pktf_mask = (PKTF_LOOP|PKTF_IFAINFO); } + /* make sure packet comes in clean */ + m_classifier_init(m, pktf_mask); + ifp_inc_traffic_class_in(ifp, m); /* find which protocol family this packet is for */ @@ -2888,9 +3319,41 @@ dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m, protocol_family = 0; } -#if CONFIG_EMBEDDED - iptap_ipf_input(ifp, protocol_family, m, frame_header); -#endif /* CONFIG_EMBEDDED */ + if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK) && + !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) + dlil_input_cksum_dbg(ifp, m, frame_header, + protocol_family); + + /* + * For partial checksum offload, we expect the driver to + * set the start offset indicating the start of the span + * that is covered by the hardware-computed checksum; + * adjust this start offset accordingly because the data + * pointer has been advanced beyond the link-layer header. + * + * Don't adjust if the interface is a bridge member, as + * the adjustment will occur from the context of the + * bridge interface during input. + */ + if (ifp->if_bridge == NULL && (m->m_pkthdr.csum_flags & + (CSUM_DATA_VALID | CSUM_PARTIAL)) == + (CSUM_DATA_VALID | CSUM_PARTIAL)) { + int adj; + + if (frame_header == NULL || + frame_header < (char *)mbuf_datastart(m) || + frame_header > (char *)m->m_data || + (adj = (m->m_data - frame_header)) > + m->m_pkthdr.csum_rx_start) { + m->m_pkthdr.csum_data = 0; + m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID; + hwcksum_in_invalidated++; + } else { + m->m_pkthdr.csum_rx_start -= adj; + } + } + + pktap_input(ifp, protocol_family, m, frame_header); if (m->m_flags & (M_BCAST|M_MCAST)) atomic_add_64(&ifp->if_imcasts, 1); @@ -2941,7 +3404,7 @@ dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m, if_proto_ref(ifproto); } /* extend the list */ - m->m_pkthdr.header = frame_header; + m->m_pkthdr.pkt_hdr = frame_header; if (pkt_first == NULL) { pkt_first = m; } else { @@ -2981,8 +3444,8 @@ if_mcasts_update(struct ifnet *ifp) err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL); if (err == EAFNOSUPPORT) err = 0; - printf("%s%d: %s %d suspended link-layer multicast membership(s) " - "(err=%d)\n", ifp->if_name, ifp->if_unit, + printf("%s: %s %d suspended link-layer multicast membership(s) " + "(err=%d)\n", if_name(ifp), (err == 0 ? "successfully restored" : "failed to restore"), ifp->if_updatemcasts, err); @@ -3213,7 +3676,7 @@ ifp_inc_traffic_class_out(struct ifnet *ifp, struct mbuf *m) * is under the protocol lock. * * An advisory code will be returned if adv is not null. This - * can be used to provide feedback about interface queues to the + * can be used to provide feedback about interface queues to the * application. */ errno_t @@ -3230,11 +3693,11 @@ dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist, mbuf_t send_head = NULL; mbuf_t *send_tail = &send_head; int iorefcnt = 0; -#if CONFIG_EMBEDDED u_int32_t pre = 0, post = 0; -#endif /* CONFIG_EMBEDDED */ + u_int32_t fpkts = 0, fbytes = 0; + int32_t flen = 0; - KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START,0,0,0,0,0); + KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0); /* Get an io refcnt if the interface is attached to prevent ifnet_detach * from happening while this operation is in progress */ @@ -3291,7 +3754,7 @@ preout_again: #if CONFIG_MACF_NET retval = mac_ifnet_check_transmit(ifp, m, proto_family, dlil_get_socket_type(&m, proto_family, raw)); - if (retval) { + if (retval != 0) { m_freem(m); goto cleanup; } @@ -3313,7 +3776,7 @@ preout_again: } #endif /* CONFIG_DTRACE */ - if (raw == 0 && ifp->if_framer) { + if (raw == 0 && ifp->if_framer != NULL) { int rcvif_set = 0; /* @@ -3331,18 +3794,28 @@ preout_again: } retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr, - frame_type -#if CONFIG_EMBEDDED - , - &pre, &post -#endif /* CONFIG_EMBEDDED */ - ); - if (retval) { + frame_type, &pre, &post); + if (retval != 0) { if (retval != EJUSTRETURN) m_freem(m); goto next; } + /* + * For partial checksum offload, adjust the start + * and stuff offsets based on the prepended header. + */ + if ((m->m_pkthdr.csum_flags & + (CSUM_DATA_VALID | CSUM_PARTIAL)) == + (CSUM_DATA_VALID | CSUM_PARTIAL)) { + m->m_pkthdr.csum_tx_stuff += pre; + m->m_pkthdr.csum_tx_start += pre; + } + + if (hwcksum_dbg != 0 && !(ifp->if_flags & IFF_LOOPBACK)) + dlil_output_cksum_dbg(ifp, m, pre, + proto_family); + /* * Clear the ifp if it was set above, and to be * safe, only if it is still the same as the @@ -3370,8 +3843,8 @@ preout_again: } } /* - * Strip away M_PROTO1 bit prior to sending packet to the driver - * as this field may be used by the driver + * Strip away M_PROTO1 bit prior to sending packet + * to the driver as this field may be used by the driver */ m->m_flags &= ~M_PROTO1; @@ -3392,35 +3865,44 @@ preout_again: * If this is a TSO packet, make sure the interface still * advertise TSO capability. */ - - if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) && - !(ifp->if_hwassist & IFNET_TSO_IPV4)) { + if (TSO_IPV4_NOTOK(ifp, m) || TSO_IPV6_NOTOK(ifp, m)) { retval = EMSGSIZE; m_freem(m); goto cleanup; } - if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) && - !(ifp->if_hwassist & IFNET_TSO_IPV6)) { - retval = EMSGSIZE; - m_freem(m); - goto cleanup; - } + /* + * If the packet service class is not background, + * update the timestamp to indicate recent activity + * on a foreground socket. + */ + if (!(m->m_pkthdr.pkt_flags & PKTF_SO_BACKGROUND) && + (m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) && + m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) + ifp->if_fg_sendts = net_uptime(); + + ifp_inc_traffic_class_out(ifp, m); + pktap_output(ifp, proto_family, m, pre, post); /* * Finally, call the driver. */ - if ((ifp->if_eflags & IFEF_SENDLIST) != 0) { + if (ifp->if_eflags & IFEF_SENDLIST) { + if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) { + flen += (m_pktlen(m) - (pre + post)); + m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED; + } *send_tail = m; send_tail = &m->m_nextpkt; } else { -#if CONFIG_EMBEDDED - iptap_ipf_output(ifp, proto_family, (struct mbuf *)m, - pre, post); -#endif /* CONFIG_EMBEDDED */ - ifp_inc_traffic_class_out(ifp, m); + if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) { + flen = (m_pktlen(m) - (pre + post)); + m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED; + } else { + flen = 0; + } KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, - 0,0,0,0,0); + 0, 0, 0, 0, 0); retval = (*ifp->if_output)(ifp, m); if (retval == EQFULL || retval == EQSUSPENDED) { if (adv != NULL && adv->code == FADV_SUCCESS) { @@ -3430,32 +3912,32 @@ preout_again: } retval = 0; } - if (retval && dlil_verbose) { - printf("%s: output error on %s%d retval = %d\n", - __func__, ifp->if_name, ifp->if_unit, + if (retval == 0 && flen > 0) { + fbytes += flen; + fpkts++; + } + if (retval != 0 && dlil_verbose) { + printf("%s: output error on %s retval = %d\n", + __func__, if_name(ifp), retval); } KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, - 0,0,0,0,0); + 0, 0, 0, 0, 0); } - KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0); + KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0); next: m = packetlist; - if (m) { + if (m != NULL) { packetlist = packetlist->m_nextpkt; m->m_nextpkt = NULL; } - } while (m); - - if (send_head) { -#if CONFIG_EMBEDDED - iptap_ipf_output(ifp, proto_family, (struct mbuf *)send_head, - pre, post); -#endif /* CONFIG_EMBEDDED */ - ifp_inc_traffic_class_out(ifp, send_head); + } while (m != NULL); - KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0,0,0,0,0); + if (send_head != NULL) { + VERIFY(ifp->if_eflags & IFEF_SENDLIST); + KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, + 0, 0, 0, 0, 0); retval = (*ifp->if_output)(ifp, send_head); if (retval == EQFULL || retval == EQSUSPENDED) { if (adv != NULL) { @@ -3464,16 +3946,24 @@ next: } retval = 0; } - if (retval && dlil_verbose) { - printf("%s: output error on %s%d retval = %d\n", - __func__, ifp->if_name, ifp->if_unit, retval); + if (retval == 0 && flen > 0) { + fbytes += flen; + fpkts++; } - KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0); + if (retval != 0 && dlil_verbose) { + printf("%s: output error on %s retval = %d\n", + __func__, if_name(ifp), retval); + } + KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0, 0, 0, 0, 0); } - KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END,0,0,0,0,0); + KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END, 0, 0, 0, 0, 0); cleanup: + if (fbytes > 0) + ifp->if_fbytes += fbytes; + if (fpkts > 0) + ifp->if_fpackets += fpkts; if (proto != NULL) if_proto_free(proto); if (packetlist) /* if any packets are left, clean up */ @@ -3668,40 +4158,120 @@ dlil_send_arp_internal(ifnet_t ifp, u_short arpop, proto_media_send_arp arpp; arpp = (proto->proto_kpi == kProtoKPI_v1 ? proto->kpi.v1.send_arp : proto->kpi.v2.send_arp); - if (arpp == NULL) + if (arpp == NULL) { result = ENOTSUP; - else + } else { + switch (arpop) { + case ARPOP_REQUEST: + arpstat.txrequests++; + if (target_hw != NULL) + arpstat.txurequests++; + break; + case ARPOP_REPLY: + arpstat.txreplies++; + break; + } result = arpp(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto); + } if_proto_free(proto); } return (result); } -__private_extern__ errno_t -net_thread_check_lock(u_int32_t flag) +struct net_thread_marks { }; +static const struct net_thread_marks net_thread_marks_base = { }; + +__private_extern__ const net_thread_marks_t net_thread_marks_none = + &net_thread_marks_base; + +__private_extern__ net_thread_marks_t +net_thread_marks_push(u_int32_t push) { - struct uthread *uth = get_bsdthread_info(current_thread()); - return ((uth->uu_network_lock_held & flag) == flag); + static const char *const base = (const void*)&net_thread_marks_base; + u_int32_t pop = 0; + + if (push != 0) { + struct uthread *uth = get_bsdthread_info(current_thread()); + + pop = push & ~uth->uu_network_marks; + if (pop != 0) + uth->uu_network_marks |= pop; + } + + return ((net_thread_marks_t)&base[pop]); } -__private_extern__ void -net_thread_set_lock(u_int32_t flag) +__private_extern__ net_thread_marks_t +net_thread_unmarks_push(u_int32_t unpush) { - struct uthread *uth = get_bsdthread_info(current_thread()); + static const char *const base = (const void*)&net_thread_marks_base; + u_int32_t unpop = 0; + + if (unpush != 0) { + struct uthread *uth = get_bsdthread_info(current_thread()); - VERIFY((uth->uu_network_lock_held & flag) != flag); - uth->uu_network_lock_held |= flag; + unpop = unpush & uth->uu_network_marks; + if (unpop != 0) + uth->uu_network_marks &= ~unpop; + } + + return ((net_thread_marks_t)&base[unpop]); } __private_extern__ void -net_thread_unset_lock(u_int32_t flag) +net_thread_marks_pop(net_thread_marks_t popx) { - struct uthread *uth = get_bsdthread_info(current_thread()); + static const char *const base = (const void*)&net_thread_marks_base; + ptrdiff_t pop = (caddr_t)popx - (caddr_t)base; - VERIFY((uth->uu_network_lock_held & flag) == flag); - uth->uu_network_lock_held &= (~flag); + if (pop != 0) { + static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U; + struct uthread *uth = get_bsdthread_info(current_thread()); + + VERIFY((pop & ones) == pop); + VERIFY((ptrdiff_t)(uth->uu_network_marks & pop) == pop); + uth->uu_network_marks &= ~pop; + } +} + +__private_extern__ void +net_thread_unmarks_pop(net_thread_marks_t unpopx) +{ + static const char *const base = (const void*)&net_thread_marks_base; + ptrdiff_t unpop = (caddr_t)unpopx - (caddr_t)base; + + if (unpop != 0) { + static const ptrdiff_t ones = (ptrdiff_t)(u_int32_t)~0U; + struct uthread *uth = get_bsdthread_info(current_thread()); + + VERIFY((unpop & ones) == unpop); + VERIFY((ptrdiff_t)(uth->uu_network_marks & unpop) == 0); + uth->uu_network_marks |= unpop; + } +} + +__private_extern__ u_int32_t +net_thread_is_marked(u_int32_t check) +{ + if (check != 0) { + struct uthread *uth = get_bsdthread_info(current_thread()); + return (uth->uu_network_marks & check); + } + else + return (0); +} + +__private_extern__ u_int32_t +net_thread_is_unmarked(u_int32_t check) +{ + if (check != 0) { + struct uthread *uth = get_bsdthread_info(current_thread()); + return (~uth->uu_network_marks & check); + } + else + return (0); } static __inline__ int @@ -4046,14 +4616,14 @@ ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol, proto_details->demux_list, proto_details->demux_count); if (dlil_verbose) { - printf("%s%d: attached v1 protocol %d\n", ifp->if_name, - ifp->if_unit, protocol); + printf("%s: attached v1 protocol %d\n", if_name(ifp), + protocol); } end: if (retval != 0 && retval != EEXIST && ifp != NULL) { - DLIL_PRINTF("%s%d: failed to attach v1 protocol %d (err=%d)\n", - ifp->if_name, ifp->if_unit, protocol, retval); + DLIL_PRINTF("%s: failed to attach v1 protocol %d (err=%d)\n", + if_name(ifp), protocol, retval); } ifnet_head_done(); if (retval != 0 && ifproto != NULL) @@ -4102,14 +4672,14 @@ ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol, proto_details->demux_list, proto_details->demux_count); if (dlil_verbose) { - printf("%s%d: attached v2 protocol %d\n", ifp->if_name, - ifp->if_unit, protocol); + printf("%s: attached v2 protocol %d\n", if_name(ifp), + protocol); } end: if (retval != 0 && retval != EEXIST && ifp != NULL) { - DLIL_PRINTF("%s%d: failed to attach v2 protocol %d (err=%d)\n", - ifp->if_name, ifp->if_unit, protocol, retval); + DLIL_PRINTF("%s: failed to attach v2 protocol %d (err=%d)\n", + if_name(ifp), protocol, retval); } ifnet_head_done(); if (retval != 0 && ifproto != NULL) @@ -4163,8 +4733,8 @@ ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family) ifnet_lock_done(ifp); if (dlil_verbose) { - printf("%s%d: detached %s protocol %d\n", ifp->if_name, - ifp->if_unit, (proto->proto_kpi == kProtoKPI_v1) ? + printf("%s: detached %s protocol %d\n", if_name(ifp), + (proto->proto_kpi == kProtoKPI_v1) ? "v1" : "v2", proto_family); } @@ -4460,17 +5030,21 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) } /* - * If the driver supports the new transmit model, create a workloop - * starter thread to invoke the if_start callback where the packets - * may be dequeued and transmitted. + * If the driver supports the new transmit model, calculate flow hash + * and create a workloop starter thread to invoke the if_start callback + * where the packets may be dequeued and transmitted. */ if (ifp->if_eflags & IFEF_TXSTART) { + ifp->if_flowhash = ifnet_calc_flowhash(ifp); + VERIFY(ifp->if_flowhash != 0); + VERIFY(ifp->if_start != NULL); VERIFY(ifp->if_start_thread == THREAD_NULL); ifnet_set_start_cycle(ifp, NULL); ifp->if_start_active = 0; ifp->if_start_req = 0; + ifp->if_start_flags = 0; if ((err = kernel_thread_start(ifnet_start_thread_fn, ifp, &ifp->if_start_thread)) != KERN_SUCCESS) { panic_plain("%s: ifp=%p couldn't get a start thread; " @@ -4479,6 +5053,8 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) } ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP, (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP)); + } else { + ifp->if_flowhash = 0; } /* @@ -4523,11 +5099,20 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) IFMA_UNLOCK(ifma); } - printf("%s%d: attached with %d suspended link-layer multicast " - "membership(s)\n", ifp->if_name, ifp->if_unit, + printf("%s: attached with %d suspended link-layer multicast " + "membership(s)\n", if_name(ifp), ifp->if_updatemcasts); } + /* Clear logging parameters */ + bzero(&ifp->if_log, sizeof (ifp->if_log)); + ifp->if_fg_sendts = 0; + + VERIFY(ifp->if_delegated.ifp == NULL); + VERIFY(ifp->if_delegated.type == 0); + VERIFY(ifp->if_delegated.family == 0); + VERIFY(ifp->if_delegated.subfamily == 0); + ifnet_lock_done(ifp); ifnet_head_done(); @@ -4535,14 +5120,11 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) /* Enable forwarding cached route */ ifp->if_fwd_cacheok = 1; /* Clean up any existing cached routes */ - if (ifp->if_fwd_route.ro_rt != NULL) - rtfree(ifp->if_fwd_route.ro_rt); + ROUTE_RELEASE(&ifp->if_fwd_route); bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route)); - if (ifp->if_src_route.ro_rt != NULL) - rtfree(ifp->if_src_route.ro_rt); + ROUTE_RELEASE(&ifp->if_src_route); bzero(&ifp->if_src_route, sizeof (ifp->if_src_route)); - if (ifp->if_src_route6.ro_rt != NULL) - rtfree(ifp->if_src_route6.ro_rt); + ROUTE_RELEASE(&ifp->if_src_route6); bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6)); lck_mtx_unlock(&ifp->if_cached_route_lock); @@ -4576,6 +5158,8 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) } #endif /* INET6 */ + VERIFY(ifp->if_data_threshold == 0); + /* * Finally, mark this ifnet as attached. */ @@ -4611,7 +5195,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0); if (dlil_verbose) { - printf("%s%d: attached%s\n", ifp->if_name, ifp->if_unit, + printf("%s: attached%s\n", if_name(ifp), (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : ""); } @@ -4640,8 +5224,8 @@ dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr) ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE); VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen); - namelen = snprintf(workbuf, sizeof (workbuf), "%s%d", - ifp->if_name, ifp->if_unit); + namelen = snprintf(workbuf, sizeof (workbuf), "%s", + if_name(ifp)); masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen; socksize = masklen + ifp->if_addrlen; #define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1))) @@ -4738,14 +5322,13 @@ if_purgeaddrs(struct ifnet *ifp) #if INET6 in6_purgeaddrs(ifp); #endif /* INET6 */ -#if NETAT - at_purgeaddrs(ifp); -#endif } errno_t ifnet_detach(ifnet_t ifp) { + struct ifnet *delegated_ifp; + if (ifp == NULL) return (EINVAL); @@ -4782,7 +5365,7 @@ ifnet_detach(ifnet_t ifp) lck_mtx_unlock(&ifp->if_ref_lock); if (dlil_verbose) - printf("%s%d: detaching\n", ifp->if_name, ifp->if_unit); + printf("%s: detaching\n", if_name(ifp)); /* * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will @@ -4797,10 +5380,21 @@ ifnet_detach(ifnet_t ifp) /* Record detach PC stacktrace */ ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach); + /* Clear logging parameters */ + bzero(&ifp->if_log, sizeof (ifp->if_log)); + + /* Clear delegated interface info (reference released below) */ + delegated_ifp = ifp->if_delegated.ifp; + bzero(&ifp->if_delegated, sizeof (ifp->if_delegated)); + ifnet_lock_done(ifp); ifnet_head_done(); lck_mtx_unlock(rnh_lock); + /* Release reference held on the delegated interface */ + if (delegated_ifp != NULL) + ifnet_release(delegated_ifp); + /* Reset Link Quality Metric (unless loopback [lo0]) */ if (ifp != lo_ifp) if_lqm_update(ifp, IFNET_LQM_THRESH_OFF); @@ -4827,6 +5421,7 @@ ifnet_detach(ifnet_t ifp) ifp->if_fwd_cacheok = 0; lck_mtx_unlock(&ifp->if_cached_route_lock); + ifp->if_data_threshold = 0; /* * Drain any deferred IGMPv3/MLDv2 query responses, but keep the * references to the info structures and leave them attached to @@ -4946,8 +5541,8 @@ ifnet_detach_final(struct ifnet *ifp) * common case, so block without using a continuation. */ while (ifp->if_refio > 0) { - printf("%s: Waiting for IO references on %s%d interface " - "to be released\n", __func__, ifp->if_name, ifp->if_unit); + printf("%s: Waiting for IO references on %s interface " + "to be released\n", __func__, if_name(ifp)); (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock, (PZERO - 1), "ifnet_ioref_wait", NULL); } @@ -5025,6 +5620,7 @@ ifnet_detach_final(struct ifnet *ifp) */ if (ifp->if_start_thread != THREAD_NULL) { lck_mtx_lock_spin(&ifp->if_start_lock); + ifp->if_start_flags = 0; ifp->if_start_thread = THREAD_NULL; wakeup_one((caddr_t)&ifp->if_start_thread); lck_mtx_unlock(&ifp->if_start_lock); @@ -5106,7 +5702,8 @@ ifnet_detach_final(struct ifnet *ifp) ifp->if_free = ifp_if_free; ifp->if_demux = ifp_if_demux; ifp->if_event = ifp_if_event; - ifp->if_framer = ifp_if_framer; + ifp->if_framer_legacy = ifp_if_framer; + ifp->if_framer = ifp_if_framer_extended; ifp->if_add_proto = ifp_if_add_proto; ifp->if_del_proto = ifp_if_del_proto; ifp->if_check_multi = ifp_if_check_multi; @@ -5117,6 +5714,12 @@ ifnet_detach_final(struct ifnet *ifp) VERIFY(ifp->if_desc.ifd_desc != NULL); bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE); + /* there shouldn't be any delegation by now */ + VERIFY(ifp->if_delegated.ifp == NULL); + VERIFY(ifp->if_delegated.type == 0); + VERIFY(ifp->if_delegated.family == 0); + VERIFY(ifp->if_delegated.subfamily == 0); + ifnet_lock_done(ifp); #if PF @@ -5139,17 +5742,16 @@ ifnet_detach_final(struct ifnet *ifp) /* Last chance to cleanup any cached route */ lck_mtx_lock(&ifp->if_cached_route_lock); VERIFY(!ifp->if_fwd_cacheok); - if (ifp->if_fwd_route.ro_rt != NULL) - rtfree(ifp->if_fwd_route.ro_rt); + ROUTE_RELEASE(&ifp->if_fwd_route); bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route)); - if (ifp->if_src_route.ro_rt != NULL) - rtfree(ifp->if_src_route.ro_rt); + ROUTE_RELEASE(&ifp->if_src_route); bzero(&ifp->if_src_route, sizeof (ifp->if_src_route)); - if (ifp->if_src_route6.ro_rt != NULL) - rtfree(ifp->if_src_route6.ro_rt); + ROUTE_RELEASE(&ifp->if_src_route6); bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6)); lck_mtx_unlock(&ifp->if_cached_route_lock); + VERIFY(ifp->if_data_threshold == 0); + ifnet_llreach_ifdetach(ifp); dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0); @@ -5170,7 +5772,7 @@ ifnet_detach_final(struct ifnet *ifp) lck_mtx_unlock(&ifp->if_ref_lock); if (dlil_verbose) - printf("%s%d: detached\n", ifp->if_name, ifp->if_unit); + printf("%s: detached\n", if_name(ifp)); /* Release reference held during ifnet attach */ ifnet_release(ifp); @@ -5242,21 +5844,28 @@ ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa) return (EOPNOTSUPP); } -static errno_t ifp_if_framer(struct ifnet *ifp, struct mbuf **m, -const struct sockaddr *sa, const char *ll, const char *t -#if CONFIG_EMBEDDED - , - u_int32_t *pre, u_int32_t *post -#endif /* CONFIG_EMBEDDED */ - ) +static errno_t +ifp_if_framer(struct ifnet *ifp, struct mbuf **m, + const struct sockaddr *sa, const char *ll, const char *t) { #pragma unused(ifp, m, sa, ll, t) + return (ifp_if_framer_extended(ifp, m, sa, ll, t, NULL, NULL)); +} + +static errno_t +ifp_if_framer_extended(struct ifnet *ifp, struct mbuf **m, + const struct sockaddr *sa, const char *ll, const char *t, + u_int32_t *pre, u_int32_t *post) +{ +#pragma unused(ifp, sa, ll, t) m_freem(*m); *m = NULL; -#if CONFIG_EMBEDDED - *pre = 0; - *post = 0; -#endif /* CONFIG_EMBEDDED */ + + if (pre != NULL) + *pre = 0; + if (post != NULL) + *post = 0; + return (EJUSTRETURN); } @@ -5364,6 +5973,7 @@ int dlil_if_acquire(u_int32_t family, const void *uniqueid, dlifp1->dl_if_trace = dlil_if_trace; } ifp1->if_name = dlifp1->dl_if_namestorage; + ifp1->if_xname = dlifp1->dl_if_xnamestorage; /* initialize interface description */ ifp1->if_desc.ifd_maxlen = IF_DESCSIZE; @@ -5388,6 +5998,10 @@ int dlil_if_acquire(u_int32_t family, const void *uniqueid, lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group, ifnet_lock_attr); lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr); +#if INET6 + lck_rw_init(&ifp1->if_inet6data_lock, ifnet_lock_group, ifnet_lock_attr); + ifp1->if_inet6data = NULL; +#endif /* for send data paths */ lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group, @@ -5424,6 +6038,10 @@ dlil_if_release(ifnet_t ifp) dlifp->dl_if_flags &= ~DLIF_INUSE; strncpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ); ifp->if_name = dlifp->dl_if_namestorage; + /* Reset external name (name + unit) */ + ifp->if_xname = dlifp->dl_if_xnamestorage; + snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ, + "%s?", ifp->if_name); lck_mtx_unlock(&dlifp->dl_if_lock); #if CONFIG_MACF_NET /* @@ -5459,11 +6077,11 @@ __private_extern__ void dlil_proto_unplumb_all(struct ifnet *ifp) { /* - * if_proto_hash[0-3] are for PF_INET, PF_INET6, PF_APPLETALK - * and PF_VLAN, where each bucket contains exactly one entry; - * PF_VLAN does not need an explicit unplumb. + * if_proto_hash[0-2] are for PF_INET, PF_INET6 and PF_VLAN, where + * each bucket contains exactly one entry; PF_VLAN does not need an + * explicit unplumb. * - * if_proto_hash[4] is for other protocols; we expect anything + * if_proto_hash[3] is for other protocols; we expect anything * in this bucket to respond to the DETACHING event (which would * have happened by now) and do the unplumb then. */ @@ -5471,9 +6089,6 @@ dlil_proto_unplumb_all(struct ifnet *ifp) #if INET6 (void) proto_unplumb(PF_INET6, ifp); #endif /* INET6 */ -#if NETAT - (void) proto_unplumb(PF_APPLETALK, ifp); -#endif /* NETAT */ } static void @@ -5496,8 +6111,7 @@ ifp_src_route_copyin(struct ifnet *ifp, struct route *src) if (ifp->if_fwd_cacheok) { route_copyin(src, &ifp->if_src_route, sizeof (*src)); } else { - rtfree(src->ro_rt); - src->ro_rt = NULL; + ROUTE_RELEASE(src); } lck_mtx_unlock(&ifp->if_cached_route_lock); } @@ -5525,8 +6139,7 @@ ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src) route_copyin((struct route *)src, (struct route *)&ifp->if_src_route6, sizeof (*src)); } else { - rtfree(src->ro_rt); - src->ro_rt = NULL; + ROUTE_RELEASE(src); } lck_mtx_unlock(&ifp->if_cached_route_lock); } @@ -5542,13 +6155,9 @@ ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip) ifp_src_route_copyout(ifp, &src_rt); - if (src_rt.ro_rt == NULL || !(src_rt.ro_rt->rt_flags & RTF_UP) || - src_ip.s_addr != dst->sin_addr.s_addr || - src_rt.ro_rt->generation_id != route_generation) { - if (src_rt.ro_rt != NULL) { - rtfree(src_rt.ro_rt); - src_rt.ro_rt = NULL; - } else if (dst->sin_family != AF_INET) { + if (ROUTE_UNUSABLE(&src_rt) || src_ip.s_addr != dst->sin_addr.s_addr) { + ROUTE_RELEASE(&src_rt); + if (dst->sin_family != AF_INET) { bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst)); dst->sin_len = sizeof (src_rt.ro_dst); dst->sin_family = AF_INET; @@ -5580,13 +6189,10 @@ ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6) ifp_src_route6_copyout(ifp, &src_rt); - if (src_rt.ro_rt == NULL || !(src_rt.ro_rt->rt_flags & RTF_UP) || - !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr) || - src_rt.ro_rt->generation_id != route_generation) { - if (src_rt.ro_rt != NULL) { - rtfree(src_rt.ro_rt); - src_rt.ro_rt = NULL; - } else if (src_rt.ro_dst.sin6_family != AF_INET6) { + if (ROUTE_UNUSABLE(&src_rt) || + !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr)) { + ROUTE_RELEASE(&src_rt); + if (src_rt.ro_dst.sin6_family != AF_INET6) { bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst)); src_rt.ro_dst.sin6_len = sizeof (src_rt.ro_dst); src_rt.ro_dst.sin6_family = AF_INET6; @@ -5673,7 +6279,8 @@ static int sysctl_rxpoll SYSCTL_HANDLER_ARGS { #pragma unused(arg1, arg2) - int i, err; + uint32_t i; + int err; i = if_rxpoll; @@ -5689,156 +6296,144 @@ sysctl_rxpoll SYSCTL_HANDLER_ARGS } static int -sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS +sysctl_rxpoll_mode_holdtime SYSCTL_HANDLER_ARGS { #pragma unused(arg1, arg2) - int i, err; + uint64_t q; + int err; - i = if_sndq_maxlen; + q = if_rxpoll_mode_holdtime; - err = sysctl_handle_int(oidp, &i, 0, req); + err = sysctl_handle_quad(oidp, &q, 0, req); if (err != 0 || req->newptr == USER_ADDR_NULL) return (err); - if (i < IF_SNDQ_MINLEN) - i = IF_SNDQ_MINLEN; + if (q < IF_RXPOLL_MODE_HOLDTIME_MIN) + q = IF_RXPOLL_MODE_HOLDTIME_MIN; + + if_rxpoll_mode_holdtime = q; - if_sndq_maxlen = i; return (err); } static int -sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS +sysctl_rxpoll_sample_holdtime SYSCTL_HANDLER_ARGS { #pragma unused(arg1, arg2) - int i, err; + uint64_t q; + int err; - i = if_rcvq_maxlen; + q = if_rxpoll_sample_holdtime; - err = sysctl_handle_int(oidp, &i, 0, req); + err = sysctl_handle_quad(oidp, &q, 0, req); if (err != 0 || req->newptr == USER_ADDR_NULL) return (err); - if (i < IF_RCVQ_MINLEN) - i = IF_RCVQ_MINLEN; + if (q < IF_RXPOLL_SAMPLETIME_MIN) + q = IF_RXPOLL_SAMPLETIME_MIN; + + if_rxpoll_sample_holdtime = q; - if_rcvq_maxlen = i; return (err); } -void -ifnet_fclist_append(struct sfb *sp, struct sfb_fc_list *fcl) +static int +sysctl_rxpoll_interval_time SYSCTL_HANDLER_ARGS { - struct sfb_bin_fcentry *fce, *tfce; +#pragma unused(arg1, arg2) + uint64_t q; + int err; - lck_mtx_lock_spin(&ifnet_fclist_lock); + q = if_rxpoll_interval_time; - SLIST_FOREACH_SAFE(fce, fcl, fce_link, tfce) { - SLIST_REMOVE(fcl, fce, sfb_bin_fcentry, fce_link); - SLIST_INSERT_HEAD(&ifnet_fclist, fce, fce_link); - sp->sfb_stats.flow_feedback++; - } - VERIFY(SLIST_EMPTY(fcl) && !SLIST_EMPTY(&ifnet_fclist)); + err = sysctl_handle_quad(oidp, &q, 0, req); + if (err != 0 || req->newptr == USER_ADDR_NULL) + return (err); + + if (q < IF_RXPOLL_INTERVALTIME_MIN) + q = IF_RXPOLL_INTERVALTIME_MIN; - wakeup(&ifnet_fclist); + if_rxpoll_interval_time = q; - lck_mtx_unlock(&ifnet_fclist_lock); + return (err); } -struct sfb_bin_fcentry * -ifnet_fce_alloc(int how) +static int +sysctl_rxpoll_wlowat SYSCTL_HANDLER_ARGS { - struct sfb_bin_fcentry *fce; +#pragma unused(arg1, arg2) + uint32_t i; + int err; - fce = (how == M_WAITOK) ? zalloc(ifnet_fcezone) : - zalloc_noblock(ifnet_fcezone); - if (fce != NULL) - bzero(fce, ifnet_fcezone_size); + i = if_rxpoll_wlowat; - return (fce); -} + err = sysctl_handle_int(oidp, &i, 0, req); + if (err != 0 || req->newptr == USER_ADDR_NULL) + return (err); -void -ifnet_fce_free(struct sfb_bin_fcentry *fce) -{ - zfree(ifnet_fcezone, fce); + if (i == 0 || i >= if_rxpoll_whiwat) + return (EINVAL); + + if_rxpoll_wlowat = i; + return (err); } -static void -ifnet_fc_init(void) +static int +sysctl_rxpoll_whiwat SYSCTL_HANDLER_ARGS { - thread_t thread = THREAD_NULL; +#pragma unused(arg1, arg2) + uint32_t i; + int err; - SLIST_INIT(&ifnet_fclist); - lck_mtx_init(&ifnet_fclist_lock, ifnet_snd_lock_group, NULL); + i = if_rxpoll_whiwat; - ifnet_fcezone_size = P2ROUNDUP(sizeof (struct sfb_bin_fcentry), - sizeof (u_int64_t)); - ifnet_fcezone = zinit(ifnet_fcezone_size, - IFNET_FCEZONE_MAX * ifnet_fcezone_size, 0, IFNET_FCEZONE_NAME); - if (ifnet_fcezone == NULL) { - panic("%s: failed allocating %s", __func__, IFNET_FCEZONE_NAME); - /* NOTREACHED */ - } - zone_change(ifnet_fcezone, Z_EXPAND, TRUE); - zone_change(ifnet_fcezone, Z_CALLERACCT, FALSE); + err = sysctl_handle_int(oidp, &i, 0, req); + if (err != 0 || req->newptr == USER_ADDR_NULL) + return (err); - if (kernel_thread_start(ifnet_fc_thread_func, - NULL, &thread) != KERN_SUCCESS) { - panic("%s: couldn't create flow event advisory thread", - __func__); - /* NOTREACHED */ - } - thread_deallocate(thread); + if (i <= if_rxpoll_wlowat) + return (EINVAL); + + if_rxpoll_whiwat = i; + return (err); } static int -ifnet_fc_thread_cont(int err) +sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS { -#pragma unused(err) - struct sfb_bin_fcentry *fce; - struct inpcb *inp; +#pragma unused(arg1, arg2) + int i, err; - for (;;) { - lck_mtx_assert(&ifnet_fclist_lock, LCK_MTX_ASSERT_OWNED); - while (SLIST_EMPTY(&ifnet_fclist)) { - (void) msleep0(&ifnet_fclist, &ifnet_fclist_lock, - (PSOCK | PSPIN), "ifnet_fc_cont", 0, - ifnet_fc_thread_cont); - /* NOTREACHED */ - } + i = if_sndq_maxlen; - fce = SLIST_FIRST(&ifnet_fclist); - SLIST_REMOVE(&ifnet_fclist, fce, sfb_bin_fcentry, fce_link); - SLIST_NEXT(fce, fce_link) = NULL; - lck_mtx_unlock(&ifnet_fclist_lock); + err = sysctl_handle_int(oidp, &i, 0, req); + if (err != 0 || req->newptr == USER_ADDR_NULL) + return (err); - inp = inp_fc_getinp(fce->fce_flowhash, 0); - if (inp == NULL) { - ifnet_fce_free(fce); - lck_mtx_lock_spin(&ifnet_fclist_lock); - continue; - } - inp_fc_feedback(inp); + if (i < IF_SNDQ_MINLEN) + i = IF_SNDQ_MINLEN; - ifnet_fce_free(fce); - lck_mtx_lock_spin(&ifnet_fclist_lock); - } + if_sndq_maxlen = i; + return (err); } -static void -ifnet_fc_thread_func(void *v, wait_result_t w) +static int +sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS { -#pragma unused(v, w) - lck_mtx_lock(&ifnet_fclist_lock); - (void) msleep0(&ifnet_fclist, &ifnet_fclist_lock, - (PSOCK | PSPIN), "ifnet_fc", 0, ifnet_fc_thread_cont); - /* - * msleep0() shouldn't have returned as PCATCH was not set; - * therefore assert in this case. - */ - lck_mtx_unlock(&ifnet_fclist_lock); - VERIFY(0); +#pragma unused(arg1, arg2) + int i, err; + + i = if_rcvq_maxlen; + + err = sysctl_handle_int(oidp, &i, 0, req); + if (err != 0 || req->newptr == USER_ADDR_NULL) + return (err); + + if (i < IF_RCVQ_MINLEN) + i = IF_RCVQ_MINLEN; + + if_rcvq_maxlen = i; + return (err); } void @@ -5888,6 +6483,85 @@ dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa) &kev.link_data, sizeof (kev)); } +const void * +dlil_ifaddr_bytes(const struct sockaddr_dl *sdl, size_t *sizep, + kauth_cred_t *credp) +{ + const u_int8_t *bytes; + size_t size; + + bytes = CONST_LLADDR(sdl); + size = sdl->sdl_alen; + +#if CONFIG_MACF + if (dlil_lladdr_ckreq) { + switch (sdl->sdl_type) { + case IFT_ETHER: + case IFT_BRIDGE: + case IFT_IEEE1394: + case IFT_IEEE8023ADLAG: + case IFT_L2VLAN: + break; + default: + credp = NULL; + break; + }; + + if (credp && mac_system_check_info(*credp, "net.link.addr")) { + static const u_int8_t unspec[FIREWIRE_EUI64_LEN] = { + [0] = 2 + }; + + switch (sdl->sdl_type) { + case IFT_ETHER: + case IFT_BRIDGE: + case IFT_IEEE8023ADLAG: + case IFT_L2VLAN: + VERIFY(size == ETHER_ADDR_LEN); + bytes = unspec; + break; + case IFT_IEEE1394: + VERIFY(size == FIREWIRE_EUI64_LEN); + bytes = unspec; + break; + default: + VERIFY(FALSE); + break; + }; + } + } +#else +#pragma unused(credp) +#endif + + if (sizep != NULL) *sizep = size; + return (bytes); +} + +void +dlil_report_issues(struct ifnet *ifp, u_int8_t modid[DLIL_MODIDLEN], + u_int8_t info[DLIL_MODARGLEN]) +{ + struct kev_dl_issues kev; + struct timeval tv; + + VERIFY(ifp != NULL); + VERIFY(modid != NULL); + _CASSERT(sizeof (kev.modid) == DLIL_MODIDLEN); + _CASSERT(sizeof (kev.info) == DLIL_MODARGLEN); + + bzero(&kev, sizeof (&kev)); + + microtime(&tv); + kev.timestamp = tv.tv_sec; + bcopy(modid, &kev.modid, DLIL_MODIDLEN); + if (info != NULL) + bcopy(info, &kev.info, DLIL_MODARGLEN); + + dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_ISSUES, + &kev.link_data, sizeof (kev)); +} + errno_t ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr, struct proc *p) @@ -5973,6 +6647,8 @@ ifnet_set_throttle(struct ifnet *ifp, u_int32_t level) if (!(ifp->if_eflags & IFEF_TXSTART)) return (ENXIO); + ifq = &ifp->if_snd; + switch (level) { case IFNET_THROTTLE_OFF: case IFNET_THROTTLE_OPPORTUNISTIC: @@ -5986,18 +6662,755 @@ ifnet_set_throttle(struct ifnet *ifp, u_int32_t level) return (EINVAL); } - ifq = &ifp->if_snd; IFCQ_LOCK(ifq); if (IFCQ_IS_ENABLED(ifq)) IFCQ_SET_THROTTLE(ifq, level, err); IFCQ_UNLOCK(ifq); if (err == 0) { - printf("%s%d: throttling level set to %d\n", ifp->if_name, - ifp->if_unit, level); + printf("%s: throttling level set to %d\n", if_name(ifp), + level); if (level == IFNET_THROTTLE_OFF) ifnet_start(ifp); } return (err); } + +errno_t +ifnet_getset_log(ifnet_t ifp, u_long cmd, struct ifreq *ifr, + struct proc *p) +{ +#pragma unused(p) + errno_t result = 0; + uint32_t flags; + int level, category, subcategory; + + VERIFY(cmd == SIOCSIFLOG || cmd == SIOCGIFLOG); + + if (cmd == SIOCSIFLOG) { + if ((result = priv_check_cred(kauth_cred_get(), + PRIV_NET_INTERFACE_CONTROL, 0)) != 0) + return (result); + + level = ifr->ifr_log.ifl_level; + if (level < IFNET_LOG_MIN || level > IFNET_LOG_MAX) + result = EINVAL; + + flags = ifr->ifr_log.ifl_flags; + if ((flags &= IFNET_LOGF_MASK) == 0) + result = EINVAL; + + category = ifr->ifr_log.ifl_category; + subcategory = ifr->ifr_log.ifl_subcategory; + + if (result == 0) + result = ifnet_set_log(ifp, level, flags, + category, subcategory); + } else { + result = ifnet_get_log(ifp, &level, &flags, &category, + &subcategory); + if (result == 0) { + ifr->ifr_log.ifl_level = level; + ifr->ifr_log.ifl_flags = flags; + ifr->ifr_log.ifl_category = category; + ifr->ifr_log.ifl_subcategory = subcategory; + } + } + + return (result); +} + +int +ifnet_set_log(struct ifnet *ifp, int32_t level, uint32_t flags, + int32_t category, int32_t subcategory) +{ + int err = 0; + + VERIFY(level >= IFNET_LOG_MIN && level <= IFNET_LOG_MAX); + VERIFY(flags & IFNET_LOGF_MASK); + + /* + * The logging level applies to all facilities; make sure to + * update them all with the most current level. + */ + flags |= ifp->if_log.flags; + + if (ifp->if_output_ctl != NULL) { + struct ifnet_log_params l; + + bzero(&l, sizeof (l)); + l.level = level; + l.flags = flags; + l.flags &= ~IFNET_LOGF_DLIL; + l.category = category; + l.subcategory = subcategory; + + /* Send this request to lower layers */ + if (l.flags != 0) { + err = ifp->if_output_ctl(ifp, IFNET_CTL_SET_LOG, + sizeof (l), &l); + } + } else if ((flags & ~IFNET_LOGF_DLIL) && ifp->if_output_ctl == NULL) { + /* + * If targeted to the lower layers without an output + * control callback registered on the interface, just + * silently ignore facilities other than ours. + */ + flags &= IFNET_LOGF_DLIL; + if (flags == 0 && (!ifp->if_log.flags & IFNET_LOGF_DLIL)) + level = 0; + } + + if (err == 0) { + if ((ifp->if_log.level = level) == IFNET_LOG_DEFAULT) + ifp->if_log.flags = 0; + else + ifp->if_log.flags |= flags; + + log(LOG_INFO, "%s: logging level set to %d flags=%b " + "arg=%b, category=%d subcategory=%d\n", if_name(ifp), + ifp->if_log.level, ifp->if_log.flags, + IFNET_LOGF_BITS, flags, IFNET_LOGF_BITS, + category, subcategory); + } + + return (err); +} + +int +ifnet_get_log(struct ifnet *ifp, int32_t *level, uint32_t *flags, + int32_t *category, int32_t *subcategory) +{ + if (level != NULL) + *level = ifp->if_log.level; + if (flags != NULL) + *flags = ifp->if_log.flags; + if (category != NULL) + *category = ifp->if_log.category; + if (subcategory != NULL) + *subcategory = ifp->if_log.subcategory; + + return (0); +} + +int +ifnet_notify_address(struct ifnet *ifp, int af) +{ + struct ifnet_notify_address_params na; + +#if PF + (void) pf_ifaddr_hook(ifp); +#endif /* PF */ + + if (ifp->if_output_ctl == NULL) + return (EOPNOTSUPP); + + bzero(&na, sizeof (na)); + na.address_family = af; + + return (ifp->if_output_ctl(ifp, IFNET_CTL_NOTIFY_ADDRESS, + sizeof (na), &na)); +} + +errno_t +ifnet_flowid(struct ifnet *ifp, uint32_t *flowid) +{ + if (ifp == NULL || flowid == NULL) { + return (EINVAL); + } else if (!(ifp->if_eflags & IFEF_TXSTART) || + !(ifp->if_refflags & IFRF_ATTACHED)) { + return (ENXIO); + } + + *flowid = ifp->if_flowhash; + + return (0); +} + +errno_t +ifnet_disable_output(struct ifnet *ifp) +{ + int err; + + if (ifp == NULL) { + return (EINVAL); + } else if (!(ifp->if_eflags & IFEF_TXSTART) || + !(ifp->if_refflags & IFRF_ATTACHED)) { + return (ENXIO); + } + + if ((err = ifnet_fc_add(ifp)) == 0) { + lck_mtx_lock_spin(&ifp->if_start_lock); + ifp->if_start_flags |= IFSF_FLOW_CONTROLLED; + lck_mtx_unlock(&ifp->if_start_lock); + } + return (err); +} + +errno_t +ifnet_enable_output(struct ifnet *ifp) +{ + if (ifp == NULL) { + return (EINVAL); + } else if (!(ifp->if_eflags & IFEF_TXSTART) || + !(ifp->if_refflags & IFRF_ATTACHED)) { + return (ENXIO); + } + + ifnet_start_common(ifp, 1); + return (0); +} + +void +ifnet_flowadv(uint32_t flowhash) +{ + struct ifnet_fc_entry *ifce; + struct ifnet *ifp; + + ifce = ifnet_fc_get(flowhash); + if (ifce == NULL) + return; + + VERIFY(ifce->ifce_ifp != NULL); + ifp = ifce->ifce_ifp; + + /* flow hash gets recalculated per attach, so check */ + if (ifnet_is_attached(ifp, 1)) { + if (ifp->if_flowhash == flowhash) + (void) ifnet_enable_output(ifp); + ifnet_decr_iorefcnt(ifp); + } + ifnet_fc_entry_free(ifce); +} + +/* + * Function to compare ifnet_fc_entries in ifnet flow control tree + */ +static inline int +ifce_cmp(const struct ifnet_fc_entry *fc1, const struct ifnet_fc_entry *fc2) +{ + return (fc1->ifce_flowhash - fc2->ifce_flowhash); +} + +static int +ifnet_fc_add(struct ifnet *ifp) +{ + struct ifnet_fc_entry keyfc, *ifce; + uint32_t flowhash; + + VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)); + VERIFY(ifp->if_flowhash != 0); + flowhash = ifp->if_flowhash; + + bzero(&keyfc, sizeof (keyfc)); + keyfc.ifce_flowhash = flowhash; + + lck_mtx_lock_spin(&ifnet_fc_lock); + ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc); + if (ifce != NULL && ifce->ifce_ifp == ifp) { + /* Entry is already in ifnet_fc_tree, return */ + lck_mtx_unlock(&ifnet_fc_lock); + return (0); + } + + if (ifce != NULL) { + /* + * There is a different fc entry with the same flow hash + * but different ifp pointer. There can be a collision + * on flow hash but the probability is low. Let's just + * avoid adding a second one when there is a collision. + */ + lck_mtx_unlock(&ifnet_fc_lock); + return (EAGAIN); + } + + /* become regular mutex */ + lck_mtx_convert_spin(&ifnet_fc_lock); + + ifce = zalloc_noblock(ifnet_fc_zone); + if (ifce == NULL) { + /* memory allocation failed */ + lck_mtx_unlock(&ifnet_fc_lock); + return (ENOMEM); + } + bzero(ifce, ifnet_fc_zone_size); + + ifce->ifce_flowhash = flowhash; + ifce->ifce_ifp = ifp; + + RB_INSERT(ifnet_fc_tree, &ifnet_fc_tree, ifce); + lck_mtx_unlock(&ifnet_fc_lock); + return (0); +} + +static struct ifnet_fc_entry * +ifnet_fc_get(uint32_t flowhash) +{ + struct ifnet_fc_entry keyfc, *ifce; + struct ifnet *ifp; + + bzero(&keyfc, sizeof (keyfc)); + keyfc.ifce_flowhash = flowhash; + + lck_mtx_lock_spin(&ifnet_fc_lock); + ifce = RB_FIND(ifnet_fc_tree, &ifnet_fc_tree, &keyfc); + if (ifce == NULL) { + /* Entry is not present in ifnet_fc_tree, return */ + lck_mtx_unlock(&ifnet_fc_lock); + return (NULL); + } + + RB_REMOVE(ifnet_fc_tree, &ifnet_fc_tree, ifce); + + VERIFY(ifce->ifce_ifp != NULL); + ifp = ifce->ifce_ifp; + + /* become regular mutex */ + lck_mtx_convert_spin(&ifnet_fc_lock); + + if (!ifnet_is_attached(ifp, 0)) { + /* + * This ifp is not attached or in the process of being + * detached; just don't process it. + */ + ifnet_fc_entry_free(ifce); + ifce = NULL; + } + lck_mtx_unlock(&ifnet_fc_lock); + + return (ifce); +} + +static void +ifnet_fc_entry_free(struct ifnet_fc_entry *ifce) +{ + zfree(ifnet_fc_zone, ifce); +} + +static uint32_t +ifnet_calc_flowhash(struct ifnet *ifp) +{ + struct ifnet_flowhash_key fh __attribute__((aligned(8))); + uint32_t flowhash = 0; + + if (ifnet_flowhash_seed == 0) + ifnet_flowhash_seed = RandomULong(); + + bzero(&fh, sizeof (fh)); + + (void) snprintf(fh.ifk_name, sizeof (fh.ifk_name), "%s", ifp->if_name); + fh.ifk_unit = ifp->if_unit; + fh.ifk_flags = ifp->if_flags; + fh.ifk_eflags = ifp->if_eflags; + fh.ifk_capabilities = ifp->if_capabilities; + fh.ifk_capenable = ifp->if_capenable; + fh.ifk_output_sched_model = ifp->if_output_sched_model; + fh.ifk_rand1 = RandomULong(); + fh.ifk_rand2 = RandomULong(); + +try_again: + flowhash = net_flowhash(&fh, sizeof (fh), ifnet_flowhash_seed); + if (flowhash == 0) { + /* try to get a non-zero flowhash */ + ifnet_flowhash_seed = RandomULong(); + goto try_again; + } + + return (flowhash); +} + +static void +dlil_output_cksum_dbg(struct ifnet *ifp, struct mbuf *m, uint32_t hoff, + protocol_family_t pf) +{ +#pragma unused(ifp) + uint32_t did_sw; + + if (!(hwcksum_dbg_mode & HWCKSUM_DBG_FINALIZE_FORCED) || + (m->m_pkthdr.csum_flags & (CSUM_TSO_IPV4|CSUM_TSO_IPV6))) + return; + + switch (pf) { + case PF_INET: + did_sw = in_finalize_cksum(m, hoff, m->m_pkthdr.csum_flags); + if (did_sw & CSUM_DELAY_IP) + hwcksum_dbg_finalized_hdr++; + if (did_sw & CSUM_DELAY_DATA) + hwcksum_dbg_finalized_data++; + break; +#if INET6 + case PF_INET6: + /* + * Checksum offload should not have been enabled when + * extension headers exist; that also means that we + * cannot force-finalize packets with extension headers. + * Indicate to the callee should it skip such case by + * setting optlen to -1. + */ + did_sw = in6_finalize_cksum(m, hoff, -1, -1, + m->m_pkthdr.csum_flags); + if (did_sw & CSUM_DELAY_IPV6_DATA) + hwcksum_dbg_finalized_data++; + break; +#endif /* INET6 */ + default: + return; + } +} + +static void +dlil_input_cksum_dbg(struct ifnet *ifp, struct mbuf *m, char *frame_header, + protocol_family_t pf) +{ + uint16_t sum; + uint32_t hlen; + + if (frame_header == NULL || + frame_header < (char *)mbuf_datastart(m) || + frame_header > (char *)m->m_data) { + printf("%s: frame header pointer 0x%llx out of range " + "[0x%llx,0x%llx] for mbuf 0x%llx\n", if_name(ifp), + (uint64_t)VM_KERNEL_ADDRPERM(frame_header), + (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)), + (uint64_t)VM_KERNEL_ADDRPERM(m->m_data), + (uint64_t)VM_KERNEL_ADDRPERM(m)); + return; + } + hlen = (m->m_data - frame_header); + + switch (pf) { + case PF_INET: +#if INET6 + case PF_INET6: +#endif /* INET6 */ + break; + default: + return; + } + + /* + * Force partial checksum offload; useful to simulate cases + * where the hardware does not support partial checksum offload, + * in order to validate correctness throughout the layers above. + */ + if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED) { + uint32_t foff = hwcksum_dbg_partial_rxoff_forced; + + if (foff > (uint32_t)m->m_pkthdr.len) + return; + + m->m_pkthdr.csum_flags &= ~CSUM_RX_FLAGS; + + /* Compute 16-bit 1's complement sum from forced offset */ + sum = m_sum16(m, foff, (m->m_pkthdr.len - foff)); + + m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PARTIAL); + m->m_pkthdr.csum_rx_val = sum; + m->m_pkthdr.csum_rx_start = (foff + hlen); + + hwcksum_dbg_partial_forced++; + hwcksum_dbg_partial_forced_bytes += m->m_pkthdr.len; + } + + /* + * Partial checksum offload verification (and adjustment); + * useful to validate and test cases where the hardware + * supports partial checksum offload. + */ + if ((m->m_pkthdr.csum_flags & + (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) == + (CSUM_DATA_VALID | CSUM_PARTIAL)) { + uint32_t rxoff; + + /* Start offset must begin after frame header */ + rxoff = m->m_pkthdr.csum_rx_start; + if (hlen > rxoff) { + hwcksum_dbg_bad_rxoff++; + if (dlil_verbose) { + printf("%s: partial cksum start offset %d " + "is less than frame header length %d for " + "mbuf 0x%llx\n", if_name(ifp), rxoff, hlen, + (uint64_t)VM_KERNEL_ADDRPERM(m)); + } + return; + } + rxoff -=hlen; + + if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) { + /* + * Compute the expected 16-bit 1's complement sum; + * skip this if we've already computed it above + * when partial checksum offload is forced. + */ + sum = m_sum16(m, rxoff, (m->m_pkthdr.len - rxoff)); + + /* Hardware or driver is buggy */ + if (sum != m->m_pkthdr.csum_rx_val) { + hwcksum_dbg_bad_cksum++; + if (dlil_verbose) { + printf("%s: bad partial cksum value " + "0x%x (expected 0x%x) for mbuf " + "0x%llx [rx_start %d]\n", + if_name(ifp), + m->m_pkthdr.csum_rx_val, sum, + (uint64_t)VM_KERNEL_ADDRPERM(m), + m->m_pkthdr.csum_rx_start); + } + return; + } + } + hwcksum_dbg_verified++; + + /* + * This code allows us to emulate various hardwares that + * perform 16-bit 1's complement sum beginning at various + * start offset values. + */ + if (hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ) { + uint32_t aoff = hwcksum_dbg_partial_rxoff_adj; + + if (aoff == rxoff || aoff > (uint32_t)m->m_pkthdr.len) + return; + + sum = m_adj_sum16(m, rxoff, aoff, sum); + + m->m_pkthdr.csum_rx_val = sum; + m->m_pkthdr.csum_rx_start = (aoff + hlen); + + hwcksum_dbg_adjusted++; + } + } +} + +static int +sysctl_hwcksum_dbg_mode SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + u_int32_t i; + int err; + + i = hwcksum_dbg_mode; + + err = sysctl_handle_int(oidp, &i, 0, req); + if (err != 0 || req->newptr == USER_ADDR_NULL) + return (err); + + if (hwcksum_dbg == 0) + return (ENODEV); + + if ((i & ~HWCKSUM_DBG_MASK) != 0) + return (EINVAL); + + hwcksum_dbg_mode = (i & HWCKSUM_DBG_MASK); + + return (err); +} + +static int +sysctl_hwcksum_dbg_partial_rxoff_forced SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + u_int32_t i; + int err; + + i = hwcksum_dbg_partial_rxoff_forced; + + err = sysctl_handle_int(oidp, &i, 0, req); + if (err != 0 || req->newptr == USER_ADDR_NULL) + return (err); + + if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_FORCED)) + return (ENODEV); + + hwcksum_dbg_partial_rxoff_forced = i; + + return (err); +} + +static int +sysctl_hwcksum_dbg_partial_rxoff_adj SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + u_int32_t i; + int err; + + i = hwcksum_dbg_partial_rxoff_adj; + + err = sysctl_handle_int(oidp, &i, 0, req); + if (err != 0 || req->newptr == USER_ADDR_NULL) + return (err); + + if (!(hwcksum_dbg_mode & HWCKSUM_DBG_PARTIAL_RXOFF_ADJ)) + return (ENODEV); + + hwcksum_dbg_partial_rxoff_adj = i; + + return (err); +} + +#if DEBUG +/* Blob for sum16 verification */ +static uint8_t sumdata[] = { + 0x1f, 0x8b, 0x08, 0x08, 0x4c, 0xe5, 0x9a, 0x4f, 0x00, 0x03, + 0x5f, 0x00, 0x5d, 0x91, 0x41, 0x4e, 0xc4, 0x30, 0x0c, 0x45, + 0xf7, 0x9c, 0xc2, 0x07, 0x18, 0xf5, 0x0e, 0xb0, 0xe2, 0x00, + 0x48, 0x88, 0xa5, 0xdb, 0xba, 0x49, 0x34, 0x69, 0xdc, 0x71, + 0x92, 0xa9, 0xc2, 0x8a, 0x6b, 0x70, 0x3d, 0x4e, 0x82, 0x93, + 0xb4, 0x08, 0xd8, 0xc5, 0xb1, 0xfd, 0xff, 0xb3, 0xfd, 0x4c, + 0x42, 0x5f, 0x1f, 0x9f, 0x11, 0x12, 0x43, 0xb2, 0x04, 0x93, + 0xe0, 0x7b, 0x01, 0x0e, 0x14, 0x07, 0x78, 0xd1, 0x78, 0x75, + 0x71, 0x71, 0xe9, 0x08, 0x84, 0x46, 0xf2, 0xc7, 0x3b, 0x09, + 0xe7, 0xd1, 0xd3, 0x8a, 0x57, 0x92, 0x33, 0xcd, 0x39, 0xcc, + 0xb0, 0x91, 0x89, 0xe0, 0x42, 0x53, 0x8b, 0xb7, 0x8c, 0x42, + 0x60, 0xd9, 0x9f, 0x7a, 0x55, 0x19, 0x76, 0xcb, 0x10, 0x49, + 0x35, 0xac, 0x0b, 0x5a, 0x3c, 0xbb, 0x65, 0x51, 0x8c, 0x90, + 0x7c, 0x69, 0x45, 0x45, 0x81, 0xb4, 0x2b, 0x70, 0x82, 0x85, + 0x55, 0x91, 0x17, 0x90, 0xdc, 0x14, 0x1e, 0x35, 0x52, 0xdd, + 0x02, 0x16, 0xef, 0xb5, 0x40, 0x89, 0xe2, 0x46, 0x53, 0xad, + 0x93, 0x6e, 0x98, 0x30, 0xe5, 0x08, 0xb7, 0xcc, 0x03, 0xbc, + 0x71, 0x86, 0x09, 0x43, 0x0d, 0x52, 0xf5, 0xa2, 0xf5, 0xa2, + 0x56, 0x11, 0x8d, 0xa8, 0xf5, 0xee, 0x92, 0x3d, 0xfe, 0x8c, + 0x67, 0x71, 0x8b, 0x0e, 0x2d, 0x70, 0x77, 0xbe, 0xbe, 0xea, + 0xbf, 0x9a, 0x8d, 0x9c, 0x53, 0x53, 0xe5, 0xe0, 0x4b, 0x87, + 0x85, 0xd2, 0x45, 0x95, 0x30, 0xc1, 0xcc, 0xe0, 0x74, 0x54, + 0x13, 0x58, 0xe8, 0xe8, 0x79, 0xa2, 0x09, 0x73, 0xa4, 0x0e, + 0x39, 0x59, 0x0c, 0xe6, 0x9c, 0xb2, 0x4f, 0x06, 0x5b, 0x8e, + 0xcd, 0x17, 0x6c, 0x5e, 0x95, 0x4d, 0x70, 0xa2, 0x0a, 0xbf, + 0xa3, 0xcc, 0x03, 0xbc, 0x5a, 0xe7, 0x75, 0x06, 0x5e, 0x75, + 0xef, 0x58, 0x8e, 0x15, 0xd1, 0x0a, 0x18, 0xff, 0xdd, 0xe6, + 0x02, 0x3b, 0xb5, 0xb4, 0xa1, 0xe0, 0x72, 0xfc, 0xe3, 0xab, + 0x07, 0xe0, 0x4d, 0x65, 0xea, 0x92, 0xeb, 0xf2, 0x7b, 0x17, + 0x05, 0xce, 0xc6, 0xf6, 0x2b, 0xbb, 0x70, 0x3d, 0x00, 0x95, + 0xe0, 0x07, 0x52, 0x3b, 0x58, 0xfc, 0x7c, 0x69, 0x4d, 0xe9, + 0xf7, 0xa9, 0x66, 0x1e, 0x1e, 0xbe, 0x01, 0x69, 0x98, 0xfe, + 0xc8, 0x28, 0x02, 0x00, 0x00 +}; + +/* Precomputed 16-bit 1's complement sums for various spans of the above data */ +static struct { + int len; + uint16_t sum; +} sumtbl[] = { + { 11, 0xcb6d }, + { 20, 0x20dd }, + { 27, 0xbabd }, + { 32, 0xf3e8 }, + { 37, 0x197d }, + { 43, 0x9eae }, + { 64, 0x4678 }, + { 127, 0x9399 }, + { 256, 0xd147 }, + { 325, 0x0358 } +}; +#define SUMTBL_MAX ((int)sizeof (sumtbl) / (int)sizeof (sumtbl[0])) + +static void +dlil_verify_sum16(void) +{ + struct mbuf *m; + uint8_t *buf; + int n; + + /* Make sure test data plus extra room for alignment fits in cluster */ + _CASSERT((sizeof (sumdata) + (sizeof (uint64_t) * 2)) <= MCLBYTES); + + m = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR); + MH_ALIGN(m, sizeof (uint32_t)); /* 32-bit starting alignment */ + buf = mtod(m, uint8_t *); /* base address */ + + for (n = 0; n < SUMTBL_MAX; n++) { + uint16_t len = sumtbl[n].len; + int i; + + /* Verify for all possible alignments */ + for (i = 0; i < (int)sizeof (uint64_t); i++) { + uint16_t sum; + uint8_t *c; + + /* Copy over test data to mbuf */ + VERIFY(len <= sizeof (sumdata)); + c = buf + i; + bcopy(sumdata, c, len); + + /* Zero-offset test (align by data pointer) */ + m->m_data = (caddr_t)c; + m->m_len = len; + sum = m_sum16(m, 0, len); + + /* Something is horribly broken; stop now */ + if (sum != sumtbl[n].sum) { + panic("%s: broken m_sum16 for len=%d align=%d " + "sum=0x%04x [expected=0x%04x]\n", __func__, + len, i, sum, sumtbl[n].sum); + /* NOTREACHED */ + } + + /* Alignment test by offset (fixed data pointer) */ + m->m_data = (caddr_t)buf; + m->m_len = i + len; + sum = m_sum16(m, i, len); + + /* Something is horribly broken; stop now */ + if (sum != sumtbl[n].sum) { + panic("%s: broken m_sum16 for len=%d offset=%d " + "sum=0x%04x [expected=0x%04x]\n", __func__, + len, i, sum, sumtbl[n].sum); + /* NOTREACHED */ + } +#if INET + /* Simple sum16 contiguous buffer test by aligment */ + sum = b_sum16(c, len); + + /* Something is horribly broken; stop now */ + if (sum != sumtbl[n].sum) { + panic("%s: broken b_sum16 for len=%d align=%d " + "sum=0x%04x [expected=0x%04x]\n", __func__, + len, i, sum, sumtbl[n].sum); + /* NOTREACHED */ + } +#endif /* INET */ + } + } + m_freem(m); + + printf("DLIL: SUM16 self-tests PASSED\n"); +} +#endif /* DEBUG */ + +#define CASE_STRINGIFY(x) case x: return #x + +__private_extern__ const char * +dlil_kev_dl_code_str(u_int32_t event_code) +{ + switch (event_code) { + CASE_STRINGIFY(KEV_DL_SIFFLAGS); + CASE_STRINGIFY(KEV_DL_SIFMETRICS); + CASE_STRINGIFY(KEV_DL_SIFMTU); + CASE_STRINGIFY(KEV_DL_SIFPHYS); + CASE_STRINGIFY(KEV_DL_SIFMEDIA); + CASE_STRINGIFY(KEV_DL_SIFGENERIC); + CASE_STRINGIFY(KEV_DL_ADDMULTI); + CASE_STRINGIFY(KEV_DL_DELMULTI); + CASE_STRINGIFY(KEV_DL_IF_ATTACHED); + CASE_STRINGIFY(KEV_DL_IF_DETACHING); + CASE_STRINGIFY(KEV_DL_IF_DETACHED); + CASE_STRINGIFY(KEV_DL_LINK_OFF); + CASE_STRINGIFY(KEV_DL_LINK_ON); + CASE_STRINGIFY(KEV_DL_PROTO_ATTACHED); + CASE_STRINGIFY(KEV_DL_PROTO_DETACHED); + CASE_STRINGIFY(KEV_DL_LINK_ADDRESS_CHANGED); + CASE_STRINGIFY(KEV_DL_WAKEFLAGS_CHANGED); + CASE_STRINGIFY(KEV_DL_IF_IDLE_ROUTE_REFCNT); + CASE_STRINGIFY(KEV_DL_IFCAP_CHANGED); + CASE_STRINGIFY(KEV_DL_LINK_QUALITY_METRIC_CHANGED); + CASE_STRINGIFY(KEV_DL_NODE_PRESENCE); + CASE_STRINGIFY(KEV_DL_NODE_ABSENCE); + CASE_STRINGIFY(KEV_DL_MASTER_ELECTED); + CASE_STRINGIFY(KEV_DL_ISSUES); + CASE_STRINGIFY(KEV_DL_IFDELEGATE_CHANGED); + default: + break; + } + return (""); +} diff --git a/bsd/net/dlil.h b/bsd/net/dlil.h index 98ca8e878..da72b75f3 100644 --- a/bsd/net/dlil.h +++ b/bsd/net/dlil.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 1999-2012 Apple Inc. All rights reserved. + * Copyright (c) 1999-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,11 +22,11 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ #ifndef DLIL_H -#define DLIL_H +#define DLIL_H #ifdef KERNEL #include @@ -55,9 +55,9 @@ enum { * variants.native_type_length. */ /* Ethernet specific types */ -#define DLIL_DESC_ETYPE2 4 -#define DLIL_DESC_SAP 5 -#define DLIL_DESC_SNAP 6 +#define DLIL_DESC_ETYPE2 4 +#define DLIL_DESC_SAP 5 +#define DLIL_DESC_SNAP 6 #ifdef KERNEL_PRIVATE #include @@ -76,13 +76,13 @@ enum { #define net_timercmp(tvp, uvp, cmp) \ (((tvp)->tv_sec == (uvp)->tv_sec) ? \ - ((tvp)->tv_nsec cmp (uvp)->tv_nsec) : \ - ((tvp)->tv_sec cmp (uvp)->tv_sec)) + ((tvp)->tv_nsec cmp (uvp)->tv_nsec) : \ + ((tvp)->tv_sec cmp (uvp)->tv_sec)) #define net_timeradd(tvp, uvp, vvp) do { \ (vvp)->tv_sec = (tvp)->tv_sec + (uvp)->tv_sec; \ (vvp)->tv_nsec = (tvp)->tv_nsec + (uvp)->tv_nsec; \ - if ((vvp)->tv_nsec >= NSEC_PER_SEC) { \ + if ((vvp)->tv_nsec >= (long)NSEC_PER_SEC) { \ (vvp)->tv_sec++; \ (vvp)->tv_nsec -= NSEC_PER_SEC; \ } \ @@ -100,7 +100,7 @@ enum { #define net_timernsec(tvp, nsp) do { \ *(nsp) = (tvp)->tv_nsec; \ if ((tvp)->tv_sec > 0) \ - *(nsp) += ((tvp)->tv_sec * NSEC_PER_SEC); \ + *(nsp) += ((tvp)->tv_sec * (integer_t)NSEC_PER_SEC); \ } while (0) #define net_nsectimer(nsp, tvp) do { \ @@ -164,6 +164,8 @@ struct dlil_threading_info { #define rxpoll_bmax pstats.ifi_poll_bytes_max #define rxpoll_blowat pstats.ifi_poll_bytes_lowat #define rxpoll_bhiwat pstats.ifi_poll_bytes_hiwat +#define rxpoll_plim pstats.ifi_poll_packets_limit +#define rxpoll_ival pstats.ifi_poll_interval_time struct pktcntr sstats; /* packets and bytes per sampling */ struct timespec mode_holdtime; /* mode holdtime in nsec */ struct timespec mode_lasttime; /* last mode change time in nsec */ @@ -196,7 +198,16 @@ struct dlil_main_threading_info { #define DLIL_PROTO_WAITING 0x10000000 #define DLIL_INPUT_TERMINATE 0x08000000 -__private_extern__ struct dlil_threading_info *dlil_main_input_thread; +/* + * Flags for dlil_attach_filter() + */ +#define DLIL_IFF_TSO 0x01 /* Interface filter supports TSO */ + +extern int dlil_verbose; +extern uint32_t hwcksum_dbg; +extern uint32_t hwcksum_tx; +extern uint32_t hwcksum_rx; +extern struct dlil_threading_info *dlil_main_input_thread; extern void dlil_init(void); @@ -212,15 +223,56 @@ extern errno_t dlil_send_arp_internal(ifnet_t, u_int16_t, const struct sockaddr_dl *, const struct sockaddr *); /* - * The following flags used to check if a network thread already - * owns the lock + * The following constants are used with the net_thread_mark_apply and + * net_thread_is_unmarked functions to control the bits in the uu_network_marks + * field of the uthread structure. */ #define NET_THREAD_HELD_PF 0x1 /* thread is holding PF lock */ -#define NET_THREAD_HELD_DOMAIN 0x2 /* thread is holding domain_proto_mtx */ +#define NET_THREAD_HELD_DOMAIN 0x2 /* thread is holding domain_proto_mtx */ +#define NET_THREAD_CKREQ_LLADDR 0x4 /* thread reqs MACF check for LLADDR */ + +/* + * net_thread_marks_t is a pointer to a phantom structure type used for + * manipulating the uthread:uu_network_marks field. As an example... + * + * static const u_int32_t bits = NET_THREAD_CKREQ_LLADDR; + * struct uthread *uth = get_bsdthread_info(current_thread()); + * + * net_thread_marks_t marks = net_thread_marks_push(bits); + * VERIFY((uth->uu_network_marks & NET_THREAD_CKREQ_LLADDR) != 0); + * net_thread_marks_pop(marks); + * + * The net_thread_marks_push() function returns an encoding of the bits + * that were changed from zero to one in the uu_network_marks field. When + * the net_thread_marks_pop() function later processes that value, it + * resets the bits to their previous value. + * + * The net_thread_unmarks_push() and net_thread_unmarks_pop() functions + * are similar to net_thread_marks_push() and net_thread_marks_pop() except + * they clear the marks bits in the guarded section rather than set them. + * + * The net_thread_is_marked() and net_thread_is_unmarked() functions return + * the subset of the bits that are currently set or cleared (respectively) + * in the uthread:uu_network_marks field. + * + * Finally, the value of the net_thread_marks_none constant is provided for + * comparing for equality with the value returned when no bits in the marks + * field are changed by the push. + * + * It is not significant that a value of type net_thread_marks_t may + * compare as equal to the NULL pointer. + */ +struct net_thread_marks; +typedef const struct net_thread_marks *net_thread_marks_t; + +extern const net_thread_marks_t net_thread_marks_none; -extern errno_t net_thread_check_lock(u_int32_t); -extern void net_thread_set_lock(u_int32_t); -extern void net_thread_unset_lock(u_int32_t); +extern net_thread_marks_t net_thread_marks_push(u_int32_t); +extern net_thread_marks_t net_thread_unmarks_push(u_int32_t); +extern void net_thread_marks_pop(net_thread_marks_t); +extern void net_thread_unmarks_pop(net_thread_marks_t); +extern u_int32_t net_thread_is_marked(u_int32_t); +extern u_int32_t net_thread_is_unmarked(u_int32_t); extern int dlil_output(ifnet_t, protocol_family_t, mbuf_t, void *, const struct sockaddr *, int, struct flowadv *); @@ -237,7 +289,7 @@ extern errno_t dlil_send_arp(ifnet_t, u_int16_t, const struct sockaddr_dl *, const struct sockaddr *, u_int32_t); extern int dlil_attach_filter(ifnet_t, const struct iff_filter *, - interface_filter_t *); + interface_filter_t *, u_int32_t); extern void dlil_detach_filter(interface_filter_t); extern void dlil_proto_unplumb_all(ifnet_t); @@ -266,6 +318,23 @@ extern void dlil_node_present(struct ifnet *, struct sockaddr *, int32_t, int, int, u_int8_t[48]); extern void dlil_node_absent(struct ifnet *, struct sockaddr *); +extern const void *dlil_ifaddr_bytes(const struct sockaddr_dl *, size_t *, + kauth_cred_t *); + +extern void dlil_report_issues(struct ifnet *, u_int8_t[DLIL_MODIDLEN], + u_int8_t[DLIL_MODARGLEN]); + +#define PROTO_HASH_SLOTS 4 + +extern int proto_hash_value(u_int32_t); + +extern const char *dlil_kev_dl_code_str(u_int32_t); + +extern errno_t dlil_rxpoll_set_params(struct ifnet *, + struct ifnet_poll_params *, boolean_t); +extern errno_t dlil_rxpoll_get_params(struct ifnet *, + struct ifnet_poll_params *); + #endif /* BSD_KERNEL_PRIVATE */ #endif /* KERNEL_PRIVATE */ #endif /* KERNEL */ diff --git a/bsd/net/ether_at_pr_module.c b/bsd/net/ether_at_pr_module.c deleted file mode 100644 index e7daa051a..000000000 --- a/bsd/net/ether_at_pr_module.c +++ /dev/null @@ -1,262 +0,0 @@ -/* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1982, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - */ - - - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#if NETAT -extern struct ifqueue atalkintrq; -#endif - - -/* #include "vlan.h" */ -#if NVLAN > 0 -#include -#endif /* NVLAN > 0 */ - -#include - -/* - * Process a received Ethernet packet; - * the packet is in the mbuf chain m without - * the ether header, which is provided separately. - */ -static errno_t -ether_at_input( - __unused ifnet_t ifp, - __unused protocol_family_t protocol_family, - mbuf_t m, - __unused char *frame_header) -{ - errno_t error; - /* - * note: for AppleTalk we need to pass the enet header of the - * packet up stack. To do so, we made sure in that the FULL packet - * is copied in the mbuf by the driver, and only the m_data and - * length have been shifted to make IP and the other guys happy. - */ - - m->m_data -= sizeof(struct ether_header); - m->m_len += sizeof(struct ether_header); - m->m_pkthdr.len += sizeof(struct ether_header); - - error = proto_input(PF_APPLETALK, m); - - if (error) - m_freem(m); - - return error; -} - - - -static errno_t -ether_at_pre_output( - ifnet_t ifp, - __unused protocol_family_t protocol_family, - mbuf_t *m0, - const struct sockaddr *dst_netaddr, - __unused void *route, - char *type, - char *edst) -{ - struct mbuf *m = *m0; - const struct ether_header *eh; - int hlen; /* link layer header length */ - - if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) - return ENETDOWN; - - hlen = ETHER_HDR_LEN; - - /* - * Tell ether_frameout it's ok to loop packet unless negated below. - */ - m->m_flags |= M_LOOP; - - switch (dst_netaddr->sa_family) { - case AF_UNSPEC: - m->m_flags &= ~M_LOOP; - eh = (const struct ether_header *)dst_netaddr->sa_data; - (void)memcpy(edst, eh->ether_dhost, 6); - *(u_short *)type = eh->ether_type; - break; - - case AF_APPLETALK: - eh = (const struct ether_header *)dst_netaddr->sa_data; - (void)memcpy(edst, eh->ether_dhost, 6); - *(u_short *)type = htons(m->m_pkthdr.len); - break; - - default: - printf("%s%d: can't handle af%d\n", ifp->if_name, ifp->if_unit, - dst_netaddr->sa_family); - return EAFNOSUPPORT; - } - - return (0); -} - - - - -static errno_t -ether_at_prmod_ioctl( - ifnet_t ifp, - __unused protocol_family_t protocol_family, - u_int32_t command, - void *data) -{ - int error = 0; - - switch (command) { - - case SIOCSIFADDR: /* struct ifaddr pointer */ - /* - * Note: caller of ifnet_ioctl() passes in pointer to - * struct ifaddr as parameter to SIOCSIFADDR, for legacy - * reasons. - */ - if ((ifp->if_flags & IFF_RUNNING) == 0) { - ifnet_set_flags(ifp, IFF_UP, IFF_UP); - ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL); - } - - break; - - case SIOCGIFADDR: { /* struct ifreq */ - struct ifreq *ifr = data; - - ifnet_lladdr_copy_bytes(ifp, ifr->ifr_addr.sa_data, ETHER_ADDR_LEN); - break; - } - - default: - error = EOPNOTSUPP; - break; - } - return (error); -} - - - -__private_extern__ errno_t -ether_attach_at( - ifnet_t ifp, - __unused protocol_family_t proto_family) -{ - struct ifnet_attach_proto_param proto; - struct ifnet_demux_desc demux[2]; - u_int8_t atalk_snap[5] = {0x08, 0x00, 0x07, 0x80, 0x9b}; - u_int8_t aarp_snap[5] = {0x00, 0x00, 0x00, 0x80, 0xf3}; - int error; - - bzero(demux, sizeof(demux)); - demux[0].type = DLIL_DESC_SNAP; - demux[0].data = atalk_snap; - demux[0].datalen = sizeof(atalk_snap); - demux[1].type = DLIL_DESC_SNAP; - demux[1].data = aarp_snap; - demux[1].datalen = sizeof(aarp_snap); - - bzero(&proto, sizeof(proto)); - proto.demux_list = demux; - proto.demux_count = sizeof(demux) / sizeof(demux[0]); - proto.input = ether_at_input; - proto.pre_output = ether_at_pre_output; - proto.ioctl = ether_at_prmod_ioctl; - - error = ifnet_attach_protocol(ifp, PF_APPLETALK, &proto); - if (error && error != EEXIST) { - printf("WARNING: ether_attach_at failed to attach" - " AppleTalk to %s%d\n", ifp->if_name, ifp->if_unit); - } - return (error); -} - -__private_extern__ void -ether_detach_at( - ifnet_t ifp, - __unused protocol_family_t proto_family) -{ - (void)ifnet_detach_protocol(ifp, PF_APPLETALK); -} diff --git a/bsd/net/ether_if_module.c b/bsd/net/ether_if_module.c index 60b6846d2..3a86d2674 100644 --- a/bsd/net/ether_if_module.c +++ b/bsd/net/ether_if_module.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -59,8 +59,6 @@ * */ - - #include #include #include @@ -79,6 +77,7 @@ #include #include #include +#include #include #include /* For M_LOOP */ #include @@ -106,52 +105,37 @@ #include -#if LLC && CCITT -extern struct ifqueue pkintrq; -#endif - -/* General stuff from if_ethersubr.c - may not need some of it */ - -#include -#if NETAT -extern struct ifqueue atalkintrq; -#endif - - #define memcpy(x,y,z) bcopy(y, x, z) - SYSCTL_DECL(_net_link); -SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Ethernet"); +SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW|CTLFLAG_LOCKED, 0, + "Ethernet"); struct en_desc { - u_int16_t type; /* Type of protocol stored in data */ - u_int32_t protocol_family; /* Protocol family */ - u_int32_t data[2]; /* Protocol data */ + u_int16_t type; /* Type of protocol stored in data */ + u_int32_t protocol_family; /* Protocol family */ + u_int32_t data[2]; /* Protocol data */ }; /* descriptors are allocated in blocks of ETHER_DESC_BLK_SIZE */ -#if CONFIG_EMBEDDED -#define ETHER_DESC_BLK_SIZE (2) /* IP, ARP */ -#else #define ETHER_DESC_BLK_SIZE (10) -#endif /* * Header for the demux list, hangs off of IFP at if_family_cookie */ - struct ether_desc_blk_str { u_int32_t n_max_used; u_int32_t n_count; u_int32_t n_used; struct en_desc block_ptr[1]; }; + /* Size of the above struct before the array of struct en_desc */ -#define ETHER_DESC_HEADER_SIZE ((size_t)offsetof(struct ether_desc_blk_str, block_ptr)) -__private_extern__ u_char etherbroadcastaddr[ETHER_ADDR_LEN] = - { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; +#define ETHER_DESC_HEADER_SIZE \ + ((size_t) offsetof(struct ether_desc_blk_str, block_ptr)) +__private_extern__ u_char etherbroadcastaddr[ETHER_ADDR_LEN] = + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; /* * Release all descriptor entries owned by this protocol (there may be several). @@ -159,122 +143,122 @@ __private_extern__ u_char etherbroadcastaddr[ETHER_ADDR_LEN] = * the unused entries. */ int -ether_del_proto( - ifnet_t ifp, - protocol_family_t protocol_family) +ether_del_proto(ifnet_t ifp, protocol_family_t protocol_family) { - struct ether_desc_blk_str *desc_blk = (struct ether_desc_blk_str *)ifp->if_family_cookie; - u_int32_t current = 0; + struct ether_desc_blk_str *desc_blk = + (struct ether_desc_blk_str *)ifp->if_family_cookie; + u_int32_t current = 0; int found = 0; - + if (desc_blk == NULL) - return 0; - + return (0); + for (current = desc_blk->n_max_used; current > 0; current--) { - if (desc_blk->block_ptr[current - 1].protocol_family == protocol_family) { + if (desc_blk->block_ptr[current - 1].protocol_family == + protocol_family) { found = 1; desc_blk->block_ptr[current - 1].type = 0; desc_blk->n_used--; } } - + if (desc_blk->n_used == 0) { FREE(ifp->if_family_cookie, M_IFADDR); ifp->if_family_cookie = 0; - } - else { + } else { /* Decrement n_max_used */ - for (; desc_blk->n_max_used > 0 && desc_blk->block_ptr[desc_blk->n_max_used - 1].type == 0; desc_blk->n_max_used--) + for (; desc_blk->n_max_used > 0 && + desc_blk->block_ptr[desc_blk->n_max_used - 1].type == 0; + desc_blk->n_max_used--) ; } - - return 0; - } + return (0); +} static int -ether_add_proto_internal( - struct ifnet *ifp, - protocol_family_t protocol, - const struct ifnet_demux_desc *demux) +ether_add_proto_internal(struct ifnet *ifp, protocol_family_t protocol, + const struct ifnet_demux_desc *demux) { struct en_desc *ed; - struct ether_desc_blk_str *desc_blk = (struct ether_desc_blk_str *)ifp->if_family_cookie; + struct ether_desc_blk_str *desc_blk = + (struct ether_desc_blk_str *)ifp->if_family_cookie; u_int32_t i; - + switch (demux->type) { - /* These types are supported */ - /* Top three are preferred */ + /* These types are supported */ + /* Top three are preferred */ + case DLIL_DESC_ETYPE2: + if (demux->datalen != 2) + return (EINVAL); + break; + + case DLIL_DESC_SAP: + if (demux->datalen != 3) + return (EINVAL); + break; + + case DLIL_DESC_SNAP: + if (demux->datalen != 5) + return (EINVAL); + break; + + default: + return (ENOTSUP); + } + + /* Verify a matching descriptor does not exist */ + if (desc_blk != NULL) { + switch (demux->type) { case DLIL_DESC_ETYPE2: - if (demux->datalen != 2) { - return EINVAL; + for (i = 0; i < desc_blk->n_max_used; i++) { + if (desc_blk->block_ptr[i].type == + DLIL_DESC_ETYPE2 && + desc_blk->block_ptr[i].data[0] == + *(u_int16_t*)demux->data) { + return (EADDRINUSE); + } } break; - case DLIL_DESC_SAP: - if (demux->datalen != 3) { - return EINVAL; - } - break; - case DLIL_DESC_SNAP: - if (demux->datalen != 5) { - return EINVAL; + for (i = 0; i < desc_blk->n_max_used; i++) { + if (desc_blk->block_ptr[i].type == + demux->type && + bcmp(desc_blk->block_ptr[i].data, + demux->data, demux->datalen) == 0) { + return (EADDRINUSE); + } } break; - - default: - return ENOTSUP; - } - - // Verify a matching descriptor does not exist. - if (desc_blk != NULL) { - switch (demux->type) { - case DLIL_DESC_ETYPE2: - for (i = 0; i < desc_blk->n_max_used; i++) { - if (desc_blk->block_ptr[i].type == DLIL_DESC_ETYPE2 && - desc_blk->block_ptr[i].data[0] == - *(u_int16_t*)demux->data) { - return EADDRINUSE; - } - } - break; - case DLIL_DESC_SAP: - case DLIL_DESC_SNAP: - for (i = 0; i < desc_blk->n_max_used; i++) { - if (desc_blk->block_ptr[i].type == demux->type && - bcmp(desc_blk->block_ptr[i].data, demux->data, - demux->datalen) == 0) { - return EADDRINUSE; - } - } - break; } } - - // Check for case where all of the descriptor blocks are in use + + /* Check for case where all of the descriptor blocks are in use */ if (desc_blk == NULL || desc_blk->n_used == desc_blk->n_count) { struct ether_desc_blk_str *tmp; - u_int32_t new_count = ETHER_DESC_BLK_SIZE; - u_int32_t new_size; - u_int32_t old_size = 0; - + u_int32_t new_count = ETHER_DESC_BLK_SIZE; + u_int32_t new_size; + u_int32_t old_size = 0; + i = 0; - + if (desc_blk) { new_count += desc_blk->n_count; - old_size = desc_blk->n_count * sizeof(struct en_desc) + ETHER_DESC_HEADER_SIZE; + old_size = desc_blk->n_count * sizeof (struct en_desc) + + ETHER_DESC_HEADER_SIZE; i = desc_blk->n_used; } - - new_size = new_count * sizeof(struct en_desc) + ETHER_DESC_HEADER_SIZE; - + + new_size = new_count * sizeof (struct en_desc) + + ETHER_DESC_HEADER_SIZE; + tmp = _MALLOC(new_size, M_IFADDR, M_WAITOK); - if (tmp == 0) { + if (tmp == NULL) { /* * Remove any previous descriptors set in the call. */ - return ENOMEM; + return (ENOMEM); } bzero(((char *)tmp) + old_size, new_size - old_size); @@ -285,8 +269,7 @@ ether_add_proto_internal( desc_blk = tmp; ifp->if_family_cookie = (uintptr_t)desc_blk; desc_blk->n_count = new_count; - } - else { + } else { /* Find a free entry */ for (i = 0; i < desc_blk->n_count; i++) { if (desc_blk->block_ptr[i].type == 0) { @@ -294,53 +277,50 @@ ether_add_proto_internal( } } } - + /* Bump n_max_used if appropriate */ if (i + 1 > desc_blk->n_max_used) { desc_blk->n_max_used = i + 1; } - + ed = &desc_blk->block_ptr[i]; ed->protocol_family = protocol; ed->data[0] = 0; ed->data[1] = 0; - + switch (demux->type) { - case DLIL_DESC_ETYPE2: - /* 2 byte ethernet raw protocol type is at native_type */ - /* prtocol must be in network byte order */ - ed->type = DLIL_DESC_ETYPE2; - ed->data[0] = *(u_int16_t*)demux->data; - break; - - case DLIL_DESC_SAP: - ed->type = DLIL_DESC_SAP; - bcopy(demux->data, &ed->data[0], 3); - break; - - case DLIL_DESC_SNAP: { - u_int8_t* pDest = ((u_int8_t*)&ed->data[0]) + 3; - ed->type = DLIL_DESC_SNAP; - bcopy(demux->data, pDest, 5); - } - break; + case DLIL_DESC_ETYPE2: + /* 2 byte ethernet raw protocol type is at native_type */ + /* prtocol must be in network byte order */ + ed->type = DLIL_DESC_ETYPE2; + ed->data[0] = *(u_int16_t*)demux->data; + break; + + case DLIL_DESC_SAP: + ed->type = DLIL_DESC_SAP; + bcopy(demux->data, &ed->data[0], 3); + break; + + case DLIL_DESC_SNAP: { + u_int8_t* pDest = ((u_int8_t*)&ed->data[0]) + 3; + ed->type = DLIL_DESC_SNAP; + bcopy(demux->data, pDest, 5); + break; } - + } + desc_blk->n_used++; - - return 0; + + return (0); } int -ether_add_proto( - ifnet_t ifp, - protocol_family_t protocol, - const struct ifnet_demux_desc *demux_list, - u_int32_t demux_count) +ether_add_proto(ifnet_t ifp, protocol_family_t protocol, + const struct ifnet_demux_desc *demux_list, u_int32_t demux_count) { - int error = 0; - u_int32_t i; - + int error = 0; + u_int32_t i; + for (i = 0; i < demux_count; i++) { error = ether_add_proto_internal(ifp, protocol, &demux_list[i]); if (error) { @@ -348,27 +328,25 @@ ether_add_proto( break; } } - - return error; + + return (error); } int -ether_demux( - ifnet_t ifp, - mbuf_t m, - char *frame_header, - protocol_family_t *protocol_family) +ether_demux(ifnet_t ifp, mbuf_t m, char *frame_header, + protocol_family_t *protocol_family) { struct ether_header *eh = (struct ether_header *)(void *)frame_header; - u_short ether_type = eh->ether_type; - u_int16_t type; - u_int8_t *data; - u_int32_t i = 0; - struct ether_desc_blk_str *desc_blk = (struct ether_desc_blk_str *)ifp->if_family_cookie; - u_int32_t maxd = desc_blk ? desc_blk->n_max_used : 0; + u_short ether_type = eh->ether_type; + u_int16_t type; + u_int8_t *data; + u_int32_t i = 0; + struct ether_desc_blk_str *desc_blk = + (struct ether_desc_blk_str *)ifp->if_family_cookie; + u_int32_t maxd = desc_blk ? desc_blk->n_max_used : 0; struct en_desc *ed = desc_blk ? desc_blk->block_ptr : NULL; - u_int32_t extProto1 = 0; - u_int32_t extProto2 = 0; + u_int32_t extProto1 = 0; + u_int32_t extProto2 = 0; if (eh->ether_dhost[0] & 1) { /* Check for broadcast */ @@ -396,16 +374,17 @@ ether_demux( if ((eh->ether_dhost[0] & 1) == 0) { /* - * When the driver is put into promiscuous mode we may receive unicast - * frames that are not intended for our interfaces. They are marked here - * as being promiscuous so the caller may dispose of them after passing - * the packets to any interface filters. + * When the driver is put into promiscuous mode we may receive + * unicast frames that are not intended for our interfaces. + * They are marked here as being promiscuous so the caller may + * dispose of them after passing the packets to any interface + * filters. */ - if (_ether_cmp(eh->ether_dhost, ifnet_lladdr(ifp))) { + if (_ether_cmp(eh->ether_dhost, IF_LLADDR(ifp))) { m->m_flags |= M_PROMISC; } } - + /* check for VLAN */ if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0) { if (EVL_VLANOFTAG(m->m_pkthdr.vlan_tag) != 0) { @@ -414,16 +393,15 @@ ether_demux( } /* the packet is just priority-tagged, clear the bit */ m->m_pkthdr.csum_flags &= ~CSUM_VLAN_TAG_VALID; - } - else if (ether_type == htons(ETHERTYPE_VLAN)) { + } else if (ether_type == htons(ETHERTYPE_VLAN)) { struct ether_vlan_header * evl; evl = (struct ether_vlan_header *)(void *)frame_header; - if (m->m_len < ETHER_VLAN_ENCAP_LEN - || ntohs(evl->evl_proto) == ETHERTYPE_VLAN - || EVL_VLANOFTAG(ntohs(evl->evl_tag)) != 0) { + if (m->m_len < ETHER_VLAN_ENCAP_LEN || + ntohs(evl->evl_proto) == ETHERTYPE_VLAN || + EVL_VLANOFTAG(ntohs(evl->evl_tag)) != 0) { *protocol_family = PF_VLAN; - return 0; + return (0); } /* the packet is just priority-tagged */ @@ -435,23 +413,24 @@ ether_demux( m->m_data += ETHER_VLAN_ENCAP_LEN; m->m_pkthdr.len -= ETHER_VLAN_ENCAP_LEN; m->m_pkthdr.csum_flags = 0; /* can't trust hardware checksum */ - } - + } else if (ether_type == htons(ETHERTYPE_ARP)) + m->m_pkthdr.pkt_flags |= PKTF_INET_RESOLVE; /* ARP packet */ + data = mtod(m, u_int8_t*); - + /* * Determine the packet's protocol type and stuff the protocol into * longs for quick compares. */ - if (ntohs(ether_type) <= 1500) { bcopy(data, &extProto1, sizeof (u_int32_t)); - - // SAP or SNAP + + /* SAP or SNAP */ if ((extProto1 & htonl(0xFFFFFF00)) == htonl(0xAAAA0300)) { - // SNAP + /* SNAP */ type = DLIL_DESC_SNAP; - bcopy(data + sizeof(u_int32_t), &extProto2, sizeof (u_int32_t)); + bcopy(data + sizeof (u_int32_t), &extProto2, + sizeof (u_int32_t)); extProto1 &= htonl(0x000000FF); } else { type = DLIL_DESC_SAP; @@ -460,43 +439,72 @@ ether_demux( } else { type = DLIL_DESC_ETYPE2; } - - /* - * Search through the connected protocols for a match. + + /* + * Search through the connected protocols for a match. */ - switch (type) { - case DLIL_DESC_ETYPE2: - for (i = 0; i < maxd; i++) { - if ((ed[i].type == type) && (ed[i].data[0] == ether_type)) { - *protocol_family = ed[i].protocol_family; - return 0; - } - } - break; - - case DLIL_DESC_SAP: - for (i = 0; i < maxd; i++) { - if ((ed[i].type == type) && (ed[i].data[0] == extProto1)) { - *protocol_family = ed[i].protocol_family; - return 0; - } + case DLIL_DESC_ETYPE2: + for (i = 0; i < maxd; i++) { + if ((ed[i].type == type) && + (ed[i].data[0] == ether_type)) { + *protocol_family = ed[i].protocol_family; + return (0); } - break; - - case DLIL_DESC_SNAP: - for (i = 0; i < maxd; i++) { - if ((ed[i].type == type) && (ed[i].data[0] == extProto1) && - (ed[i].data[1] == extProto2)) { - *protocol_family = ed[i].protocol_family; - return 0; - } + } + break; + + case DLIL_DESC_SAP: + for (i = 0; i < maxd; i++) { + if ((ed[i].type == type) && + (ed[i].data[0] == extProto1)) { + *protocol_family = ed[i].protocol_family; + return (0); } + } break; + + case DLIL_DESC_SNAP: + for (i = 0; i < maxd; i++) { + if ((ed[i].type == type) && + (ed[i].data[0] == extProto1) && + (ed[i].data[1] == extProto2)) { + *protocol_family = ed[i].protocol_family; + return (0); + } + } + break; } - - return ENOENT; -} + + return (ENOENT); +} + +/* + * On embedded, ether_frameout is practicaly ether_frameout_extended. + * On non-embedded, ether_frameout has long been exposed as a public KPI, + * and therefore its signature must remain the same (without the pre- and + * postpend length parameters.) + */ +#if KPI_INTERFACE_EMBEDDED +int +ether_frameout(struct ifnet *ifp, struct mbuf **m, + const struct sockaddr *ndest, const char *edst, + const char *ether_type, u_int32_t *prepend_len, u_int32_t *postpend_len) +#else /* !KPI_INTERFACE_EMBEDDED */ +int +ether_frameout(struct ifnet *ifp, struct mbuf **m, + const struct sockaddr *ndest, const char *edst, + const char *ether_type) +#endif /* KPI_INTERFACE_EMBEDDED */ +{ +#if KPI_INTERFACE_EMBEDDED + return (ether_frameout_extended(ifp, m, ndest, edst, ether_type, + prepend_len, postpend_len)); +#else /* !KPI_INTERFACE_EMBEDDED */ + return (ether_frameout_extended(ifp, m, ndest, edst, ether_type, + NULL, NULL)); +#endif /* !KPI_INTERFACE_EMBEDDED */ +} /* * Ethernet output routine. @@ -505,18 +513,9 @@ ether_demux( * packet leaves a multiple of 512 bytes of data in remainder. */ int -ether_frameout( - struct ifnet *ifp, - struct mbuf **m, - const struct sockaddr *ndest, - const char *edst, - const char *ether_type -#if KPI_INTERFACE_EMBEDDED - , - u_int32_t *prepend_len, - u_int32_t *postpend_len -#endif /* KPI_INTERFACE_EMBEDDED */ - ) +ether_frameout_extended(struct ifnet *ifp, struct mbuf **m, + const struct sockaddr *ndest, const char *edst, + const char *ether_type, u_int32_t *prepend_len, u_int32_t *postpend_len) { struct ether_header *eh; int hlen; /* link layer header length */ @@ -533,112 +532,101 @@ ether_frameout( * reasons and compatibility with the original behavior. */ if ((ifp->if_flags & IFF_SIMPLEX) && - ((*m)->m_flags & M_LOOP)) { - if (lo_ifp) { - if ((*m)->m_flags & M_BCAST) { - struct mbuf *n = m_copy(*m, 0, (int)M_COPYALL); - if (n != NULL) - dlil_output(lo_ifp, ndest->sa_family, n, NULL, ndest, 0, NULL); - } - else { - if (_ether_cmp(edst, ifnet_lladdr(ifp)) == 0) { - dlil_output(lo_ifp, ndest->sa_family, *m, NULL, ndest, 0, NULL); - return EJUSTRETURN; - } - } - } + ((*m)->m_flags & M_LOOP) && lo_ifp != NULL) { + if ((*m)->m_flags & M_BCAST) { + struct mbuf *n = m_copy(*m, 0, (int)M_COPYALL); + if (n != NULL) { + dlil_output(lo_ifp, ndest->sa_family, + n, NULL, ndest, 0, NULL); + } + } else if (_ether_cmp(edst, IF_LLADDR(ifp)) == 0) { + dlil_output(lo_ifp, ndest->sa_family, *m, + NULL, ndest, 0, NULL); + return (EJUSTRETURN); + } } - + /* * Add local net header. If no space in first mbuf, * allocate another. */ M_PREPEND(*m, sizeof (struct ether_header), M_DONTWAIT); - if (*m == 0) { - return (EJUSTRETURN); - } + if (*m == NULL) + return (EJUSTRETURN); + + if (prepend_len != NULL) + *prepend_len = sizeof (struct ether_header); + if (postpend_len != NULL) + *postpend_len = 0; -#if KPI_INTERFACE_EMBEDDED - *prepend_len = sizeof (struct ether_header); - *postpend_len = 0; -#endif /* KPI_INTERFACE_EMBEDDED */ - eh = mtod(*m, struct ether_header *); - (void)memcpy(&eh->ether_type, ether_type, - sizeof(eh->ether_type)); - (void)memcpy(eh->ether_dhost, edst, ETHER_ADDR_LEN); - ifnet_lladdr_copy_bytes(ifp, eh->ether_shost, ETHER_ADDR_LEN); + (void) memcpy(&eh->ether_type, ether_type, sizeof(eh->ether_type)); + (void) memcpy(eh->ether_dhost, edst, ETHER_ADDR_LEN); + ifnet_lladdr_copy_bytes(ifp, eh->ether_shost, ETHER_ADDR_LEN); - return 0; + return (0); } errno_t -ether_check_multi( - __unused ifnet_t ifp, - const struct sockaddr *proto_addr) +ether_check_multi(ifnet_t ifp, const struct sockaddr *proto_addr) { +#pragma unused(ifp) errno_t result = EAFNOSUPPORT; const u_char *e_addr; - + /* * AF_SPEC and AF_LINK don't require translation. We do * want to verify that they specify a valid multicast. */ switch(proto_addr->sa_family) { - case AF_UNSPEC: - e_addr = (const u_char*)&proto_addr->sa_data[0]; - if ((e_addr[0] & 0x01) != 0x01) - result = EADDRNOTAVAIL; - else - result = 0; - break; - - case AF_LINK: - e_addr = CONST_LLADDR((const struct sockaddr_dl*) - (uintptr_t)(size_t)proto_addr); - if ((e_addr[0] & 0x01) != 0x01) - result = EADDRNOTAVAIL; - else - result = 0; - break; + case AF_UNSPEC: + e_addr = (const u_char*)&proto_addr->sa_data[0]; + if ((e_addr[0] & 0x01) != 0x01) + result = EADDRNOTAVAIL; + else + result = 0; + break; + + case AF_LINK: + e_addr = CONST_LLADDR((const struct sockaddr_dl*) + (uintptr_t)(size_t)proto_addr); + if ((e_addr[0] & 0x01) != 0x01) + result = EADDRNOTAVAIL; + else + result = 0; + break; } - - return result; + + return (result); } int -ether_ioctl( - __unused ifnet_t ifp, - __unused u_int32_t command, - __unused void* data) +ether_ioctl(ifnet_t ifp, u_int32_t command, void *data) { - return EOPNOTSUPP; +#pragma unused(ifp, command, data) + return (EOPNOTSUPP); } -__private_extern__ int ether_family_init(void) +__private_extern__ int +ether_family_init(void) { errno_t error = 0; - + /* Register protocol registration functions */ if ((error = proto_register_plumber(PF_INET, APPLE_IF_FAM_ETHERNET, - ether_attach_inet, ether_detach_inet)) != 0) { - printf("proto_register_plumber failed for PF_INET error=%d\n", error); + ether_attach_inet, ether_detach_inet)) != 0) { + printf("proto_register_plumber failed for PF_INET error=%d\n", + error); goto done; } #if INET6 if ((error = proto_register_plumber(PF_INET6, APPLE_IF_FAM_ETHERNET, - ether_attach_inet6, ether_detach_inet6)) != 0) { - printf("proto_register_plumber failed for PF_INET6 error=%d\n", error); + ether_attach_inet6, ether_detach_inet6)) != 0) { + printf("proto_register_plumber failed for PF_INET6 error=%d\n", + error); goto done; } #endif /* INET6 */ -#if NETAT - if ((error = proto_register_plumber(PF_APPLETALK, APPLE_IF_FAM_ETHERNET, - ether_attach_at, ether_detach_at)) != 0) { - printf("proto_register_plumber failed PF_APPLETALK error=%d\n", error); - goto done; - } -#endif /* NETAT */ #if VLAN vlan_family_init(); #endif /* VLAN */ @@ -648,8 +636,7 @@ __private_extern__ int ether_family_init(void) #if IF_BRIDGE bridgeattach(0); #endif /* IF_BRIDGE */ +done: - done: - - return (error); + return (error); } diff --git a/bsd/net/ether_inet6_pr_module.c b/bsd/net/ether_inet6_pr_module.c index 78a2b3f07..02a408921 100644 --- a/bsd/net/ether_inet6_pr_module.c +++ b/bsd/net/ether_inet6_pr_module.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -70,6 +70,7 @@ #include #include +#include #include #include #include @@ -88,10 +89,7 @@ #if INET6 #include #include -#endif - -#if LLC && CCITT -extern struct ifqueue pkintrq; +#include #endif /* #include "vlan.h" */ @@ -101,6 +99,9 @@ extern struct ifqueue pkintrq; #include +static const u_char etherip6allnodes[ETHER_ADDR_LEN] = + { 0x33, 0x33, 0, 0, 0, 1 }; + /* * Process a received Ethernet packet; * the packet is in the mbuf chain m without @@ -134,6 +135,18 @@ ether_inet6_input(ifnet_t ifp, protocol_family_t protocol, ETHER_ADDR_LEN); } + /* Save the Ethernet source address for all-nodes multicasts */ + if (!bcmp(eh->ether_dhost, etherip6allnodes, ETHER_ADDR_LEN)) { + struct ip6aux *ip6a; + + ip6a = ip6_addaux(packet); + if (ip6a) { + ip6a->ip6a_flags |= IP6A_HASEEN; + bcopy(eh->ether_shost, ip6a->ip6a_ehsrc, + ETHER_ADDR_LEN); + } + } + if (proto_input(protocol, packet) != 0) m_freem(packet); } else { @@ -224,9 +237,8 @@ ether_inet6_prmod_ioctl(ifnet_t ifp, protocol_family_t protocol_family, case SIOCGIFADDR: { /* struct ifreq */ struct ifreq *ifr = (struct ifreq *)(void *)data; - - (void) ifnet_lladdr_copy_bytes(ifp, ifr->ifr_addr.sa_data, - ETHER_ADDR_LEN); + (void) ifnet_guarded_lladdr_copy_bytes(ifp, + ifr->ifr_addr.sa_data, ETHER_ADDR_LEN); break; } @@ -258,8 +270,8 @@ ether_attach_inet6(struct ifnet *ifp, protocol_family_t protocol_family) proto.resolve = ether_inet6_resolve_multi; error = ifnet_attach_protocol(ifp, protocol_family, &proto); if (error && error != EEXIST) { - printf("WARNING: %s can't attach ipv6 to %s%d\n", __func__, - ifp->if_name, ifp->if_unit); + printf("WARNING: %s can't attach ipv6 to %s\n", __func__, + if_name(ifp)); } return (error); diff --git a/bsd/net/ether_inet_pr_module.c b/bsd/net/ether_inet_pr_module.c index b8820a5a5..94768329b 100644 --- a/bsd/net/ether_inet_pr_module.c +++ b/bsd/net/ether_inet_pr_module.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -76,13 +76,13 @@ #include #include +#include #include #include #include #include #include #include - #include #include #include @@ -268,7 +268,7 @@ ether_inet_pre_output(ifnet_t ifp, protocol_family_t protocol_family, break; default: - printf("%s%d: can't handle af%d\n", ifp->if_name, ifp->if_unit, + printf("%s: can't handle af%d\n", if_name(ifp), dst_netaddr->sa_family); result = EAFNOSUPPORT; @@ -351,14 +351,13 @@ ether_inet_prmod_ioctl(ifnet_t ifp, protocol_family_t protocol_family, kdp_get_interface() == ifp->if_softc) || (kdp_get_interface() == 0 && ifp->if_unit == 0)) kdp_set_ip_and_mac_addresses(&(IA_SIN(ifa)->sin_addr), - ifnet_lladdr(ifp)); + (struct ether_addr *)IF_LLADDR(ifp)); break; } case SIOCGIFADDR: { /* struct ifreq */ struct ifreq *ifr = data; - - ifnet_lladdr_copy_bytes(ifp, ifr->ifr_addr.sa_data, + ifnet_guarded_lladdr_copy_bytes(ifp, ifr->ifr_addr.sa_data, ETHER_ADDR_LEN); break; } @@ -496,28 +495,38 @@ ether_inet_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl *sender_hw, bzero(ea->arp_tha, sizeof (ea->arp_tha)); bcopy(etherbroadcastaddr, eh->ether_dhost, sizeof (eh->ether_dhost)); + m->m_flags |= M_BCAST; } else { bcopy(CONST_LLADDR(target_hw), ea->arp_tha, sizeof (ea->arp_tha)); bcopy(CONST_LLADDR(target_hw), eh->ether_dhost, sizeof (eh->ether_dhost)); + + if (bcmp(eh->ether_dhost, etherbroadcastaddr, + ETHER_ADDR_LEN) == 0) + m->m_flags |= M_BCAST; } /* Target IP */ bcopy(&target_ip->sin_addr, ea->arp_tpa, sizeof (ea->arp_tpa)); + /* + * PKTF_{INET,INET6}_RESOLVE_RTR are mutually exclusive, so make + * sure only one of them is set (just in case.) + */ + m->m_pkthdr.pkt_flags &= ~(PKTF_INET6_RESOLVE | PKTF_RESOLVE_RTR); + m->m_pkthdr.pkt_flags |= PKTF_INET_RESOLVE; /* * If this is an ARP request for a (default) router, mark * the packet accordingly so that the driver can find out, * in case it needs to perform driver-specific action(s). */ - if (arpop == ARPOP_REQUEST && (target_ip->sin_other & SIN_ROUTER)) { - m->m_pkthdr.aux_flags |= MAUXF_INET_RESOLVE_RTR; - VERIFY(!(m->m_pkthdr.aux_flags & MAUXF_INET6_RESOLVE_RTR)); - } + if (arpop == ARPOP_REQUEST && (target_ip->sin_other & SIN_ROUTER)) + m->m_pkthdr.pkt_flags |= PKTF_RESOLVE_RTR; if (ifp->if_eflags & IFEF_TXSTART) { - /* Use control service class if the interface + /* + * Use control service class if the interface * supports transmit-start model */ (void) m_set_service_class(m, MBUF_SC_CTL); @@ -558,8 +567,8 @@ ether_attach_inet(struct ifnet *ifp, protocol_family_t proto_family) error = ifnet_attach_protocol_v2(ifp, proto_family, &proto); if (error && error != EEXIST) { - printf("WARNING: %s can't attach ip to %s%d\n", __func__, - ifp->if_name, ifp->if_unit); + printf("WARNING: %s can't attach ip to %s\n", __func__, + if_name(ifp)); } return (error); } diff --git a/bsd/net/flowadv.c b/bsd/net/flowadv.c new file mode 100644 index 000000000..99e4d2cad --- /dev/null +++ b/bsd/net/flowadv.c @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* + * Flow Control and Feedback Advisory + * + * Each mbuf that is being sent out through an interface is tagged with a + * unique 32-bit ID which will help to identify all the packets that belong + * to a particular flow at the interface layer. Packets carrying such ID + * would need to be marked with PKTF_FLOW_ID. Normally, this ID is computed + * by the module that generates the flow. There are 3 kinds of flow sources + * that are currently recognized: + * + * a. INPCB (INET/INET6 Protocol Control Block). When a socket is + * connected, the flow hash for the socket is computed and stored in + * the PCB. Further transmissions on the socket will cause the hash + * value to be carried within the mbuf as the flow ID. + * + * b. Interface. When an interface is attached, the flow hash for the + * interface is computed and stored in the ifnet. This value is + * normally ignored for most network drivers, except for those that + * reside atop another driver, e.g. a virtual interface performing + * encapsulation/encryption on the original packet and sending the + * newly-generated packet to another interface. Such interface needs + * to associate all generated packets with the interface flow hash + * value as the flow ID. + * + * c. PF (Packet Filter). When a packet goes through PF and it is not + * already associated with a flow ID, PF will compute a flow hash and + * store it in the packet as flow ID. When the packet is associated + * with a PF state, the state record will have the flow ID stored + * within, in order to avoid recalculating the flow hash. Although PF + * is capable of generating flow IDs, it does not participate in flow + * advisory, and therefore packets whose IDs are computed by PF will + * not have their PKTF_FLOW_ADV packet flag set. + * + * Activation of flow advisory mechanism is done by setting the PKTF_FLOW_ADV + * packet flag; because a flow ID is required, the mechanism will not take + * place unless PKTF_FLOW_ID is set as well. The packet must also carry one + * of the flow source types FLOWSRC_{INPCB,IFNET} in order to identify where + * the flow advisory notification should be delivered to. As noted above, + * FLOWSRC_PF does not participate in this mechanism. + * + * The classq module configured on the interface is responsible for exerting + * flow control to the upper layers. This occurs when the number of packets + * queued for a flow reaches a limit. The module generating the flow will + * cease transmission until further flow advisory notice, and the flow will + * be inserted into the classq's flow control list. + * + * When packets are dequeued from the classq and the number of packets for + * a flow goes below a limit, the classq will transfer its flow control list + * to the global fadv_list. This will then trigger the flow advisory thread + * to run, which will cause the flow source modules to be notified that data + * can now be generated for those previously flow-controlled flows. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +/* Lock group and attribute for fadv_lock */ +static lck_grp_t *fadv_lock_grp; +static lck_grp_attr_t *fadv_lock_grp_attr; +decl_lck_mtx_data(static, fadv_lock); + +/* protected by fadv_lock */ +static STAILQ_HEAD(fadv_head, flowadv_fcentry) fadv_list; +static thread_t fadv_thread = THREAD_NULL; +static uint32_t fadv_active; + +static unsigned int fadv_zone_size; /* size of flowadv_fcentry */ +static struct zone *fadv_zone; /* zone for flowadv_fcentry */ + +#define FADV_ZONE_MAX 32 /* maximum elements in zone */ +#define FADV_ZONE_NAME "fadv_zone" /* zone name */ + +static int flowadv_thread_cont(int); +static void flowadv_thread_func(void *, wait_result_t); + +void +flowadv_init(void) +{ + STAILQ_INIT(&fadv_list); + + /* Setup lock group and attribute for fadv_lock */ + fadv_lock_grp_attr = lck_grp_attr_alloc_init(); + fadv_lock_grp = lck_grp_alloc_init("fadv_lock", fadv_lock_grp_attr); + lck_mtx_init(&fadv_lock, fadv_lock_grp, NULL); + + fadv_zone_size = P2ROUNDUP(sizeof (struct flowadv_fcentry), + sizeof (u_int64_t)); + fadv_zone = zinit(fadv_zone_size, + FADV_ZONE_MAX * fadv_zone_size, 0, FADV_ZONE_NAME); + if (fadv_zone == NULL) { + panic("%s: failed allocating %s", __func__, FADV_ZONE_NAME); + /* NOTREACHED */ + } + zone_change(fadv_zone, Z_EXPAND, TRUE); + zone_change(fadv_zone, Z_CALLERACCT, FALSE); + + if (kernel_thread_start(flowadv_thread_func, NULL, &fadv_thread) != + KERN_SUCCESS) { + panic("%s: couldn't create flow event advisory thread", + __func__); + /* NOTREACHED */ + } + thread_deallocate(fadv_thread); +} + +struct flowadv_fcentry * +flowadv_alloc_entry(int how) +{ + struct flowadv_fcentry *fce; + + fce = (how == M_WAITOK) ? zalloc(fadv_zone) : zalloc_noblock(fadv_zone); + if (fce != NULL) + bzero(fce, fadv_zone_size); + + return (fce); +} + +void +flowadv_free_entry(struct flowadv_fcentry *fce) +{ + zfree(fadv_zone, fce); +} + +void +flowadv_add(struct flowadv_fclist *fcl) +{ + if (STAILQ_EMPTY(fcl)) + return; + + lck_mtx_lock_spin(&fadv_lock); + + STAILQ_CONCAT(&fadv_list, fcl); + VERIFY(!STAILQ_EMPTY(&fadv_list)); + + if (!fadv_active && fadv_thread != THREAD_NULL) + wakeup_one((caddr_t)&fadv_list); + + lck_mtx_unlock(&fadv_lock); +} + +static int +flowadv_thread_cont(int err) +{ +#pragma unused(err) + for (;;) { + lck_mtx_assert(&fadv_lock, LCK_MTX_ASSERT_OWNED); + while (STAILQ_EMPTY(&fadv_list)) { + VERIFY(!fadv_active); + (void) msleep0(&fadv_list, &fadv_lock, (PSOCK | PSPIN), + "flowadv_cont", 0, flowadv_thread_cont); + /* NOTREACHED */ + } + + fadv_active = 1; + for (;;) { + struct flowadv_fcentry *fce; + + VERIFY(!STAILQ_EMPTY(&fadv_list)); + fce = STAILQ_FIRST(&fadv_list); + STAILQ_REMOVE(&fadv_list, fce, + flowadv_fcentry, fce_link); + STAILQ_NEXT(fce, fce_link) = NULL; + + lck_mtx_unlock(&fadv_lock); + switch (fce->fce_flowsrc) { + case FLOWSRC_INPCB: + inp_flowadv(fce->fce_flowid); + break; + + case FLOWSRC_IFNET: + ifnet_flowadv(fce->fce_flowid); + break; + + case FLOWSRC_PF: + default: + break; + } + flowadv_free_entry(fce); + lck_mtx_lock_spin(&fadv_lock); + + /* if there's no pending request, we're done */ + if (STAILQ_EMPTY(&fadv_list)) + break; + } + fadv_active = 0; + } +} + +static void +flowadv_thread_func(void *v, wait_result_t w) +{ +#pragma unused(v, w) + lck_mtx_lock(&fadv_lock); + (void) msleep0(&fadv_list, &fadv_lock, (PSOCK | PSPIN), + "flowadv", 0, flowadv_thread_cont); + /* + * msleep0() shouldn't have returned as PCATCH was not set; + * therefore assert in this case. + */ + lck_mtx_unlock(&fadv_lock); + VERIFY(0); +} diff --git a/bsd/net/flowadv.h b/bsd/net/flowadv.h index 96e6e9e9c..44c9e0868 100644 --- a/bsd/net/flowadv.h +++ b/bsd/net/flowadv.h @@ -29,11 +29,9 @@ #ifndef _NET_FLOWADV_H_ #define _NET_FLOWADV_H_ +#ifdef KERNEL_PRIVATE #include - -#ifdef __cplusplus -extern "C" { -#endif +#include #define FADV_SUCCESS 0 /* success */ #define FADV_FLOW_CONTROLLED 1 /* regular flow control */ @@ -43,8 +41,24 @@ struct flowadv { int32_t code; /* FADV advisory code */ }; -#ifdef __cplusplus -} -#endif +#ifdef BSD_KERNEL_PRIVATE +struct flowadv_fcentry { + STAILQ_ENTRY(flowadv_fcentry) fce_link; + u_int32_t fce_flowsrc; /* FLOWSRC values */ + u_int32_t fce_flowid; +}; + +STAILQ_HEAD(flowadv_fclist, flowadv_fcentry); + +__BEGIN_DECLS + +extern void flowadv_init(void); +extern struct flowadv_fcentry *flowadv_alloc_entry(int); +extern void flowadv_free_entry(struct flowadv_fcentry *); +extern void flowadv_add(struct flowadv_fclist *); + +__END_DECLS +#endif /* BSD_KERNEL_PRIVATE */ +#endif /* KERNEL_PRIVATE */ #endif /* _NET_FLOWADV_H_ */ diff --git a/bsd/net/flowhash.c b/bsd/net/flowhash.c index e63462423..a45796023 100644 --- a/bsd/net/flowhash.c +++ b/bsd/net/flowhash.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 Apple Inc. All rights reserved. + * Copyright (c) 2011-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -68,9 +68,7 @@ static inline u_int64_t mh3_fmix64(u_int64_t); * Intel 64-bit: MurmurHash3_x64_128 * ARM, et al: JHash */ -#if defined(__i386__) -net_flowhash_fn_t *net_flowhash = net_flowhash_mh3_x86_32; -#elif defined(__x86_64__) +#if defined(__x86_64__) net_flowhash_fn_t *net_flowhash = net_flowhash_mh3_x64_128; #else /* !__i386__ && !__x86_64__ */ net_flowhash_fn_t *net_flowhash = net_flowhash_jhash; diff --git a/bsd/net/if.c b/bsd/net/if.c index 1097ff570..280b6d49a 100644 --- a/bsd/net/if.c +++ b/bsd/net/if.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -93,17 +93,14 @@ #include #include #include -#include +#include #include #include #include -#ifdef __APPLE__ #include -//#include #include #include -#endif #if INET || INET6 /*XXX*/ @@ -120,10 +117,11 @@ #include #include #include -#endif -#endif +#include +#endif /* INET6 */ +#endif /* INET || INET6 */ -#if CONFIG_MACF_NET +#if CONFIG_MACF_NET #include #endif @@ -135,6 +133,8 @@ * System initialization */ +extern char *proc_name_address(void *); + /* Lock group and attribute for ifaddr lock */ lck_attr_t *ifa_mtx_attr; lck_grp_t *ifa_mtx_grp; @@ -142,6 +142,12 @@ static lck_grp_attr_t *ifa_mtx_grp_attr; static int ifioctl_ifreq(struct socket *, u_long, struct ifreq *, struct proc *); +static int ifioctl_ifconf(u_long, caddr_t); +static int ifioctl_ifclone(u_long, caddr_t); +static int ifioctl_ifdesc(struct ifnet *, u_long, caddr_t, struct proc *); +static int ifioctl_linkparams(struct ifnet *, u_long, caddr_t, struct proc *); +static int ifioctl_qstats(struct ifnet *, u_long, caddr_t); +static int ifioctl_throttle(struct ifnet *, u_long, caddr_t, struct proc *); static int ifconf(u_long cmd, user_addr_t ifrp, int * ret_space); __private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *); void if_rtproto_del(struct ifnet *ifp, int protocol); @@ -154,9 +160,7 @@ static int if_delmulti_common(struct ifmultiaddr *, struct ifnet *, static int if_rtmtu(struct radix_node *, void *); static void if_rtmtu_update(struct ifnet *); -#if IF_CLONE_LIST -static int if_clone_list(int count, int * total, user_addr_t dst); -#endif /* IF_CLONE_LIST */ +static int if_clone_list(int, int *, user_addr_t); MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); @@ -221,6 +225,15 @@ extern void nd6_setmtu(struct ifnet *); extern lck_mtx_t *nd6_mutex; #endif +SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Link layers"); +SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW|CTLFLAG_LOCKED, 0, + "Generic link-management"); + +SYSCTL_DECL(_net_link_generic_system); + +static uint32_t if_verbose = 0; +SYSCTL_INT(_net_link_generic_system, OID_AUTO, if_verbose, + CTLFLAG_RW | CTLFLAG_LOCKED, &if_verbose, 0, ""); void ifa_init(void) @@ -554,7 +567,7 @@ if_clone_lookup(const char *name, u_int32_t *unitp) { struct if_clone *ifc; const char *cp; - size_t i; + u_int32_t i; for (ifc = LIST_FIRST(&if_cloners); ifc != NULL;) { for (cp = name, i = 0; i < ifc->ifc_namelen; i++, cp++) { @@ -571,7 +584,7 @@ if_clone_lookup(const char *name, u_int32_t *unitp) found_name: if (*cp == '\0') { - i = 0xffff; + i = UINT32_MAX; } else { for (i = 0; *cp != '\0'; cp++) { if (*cp < '0' || *cp > '9') { @@ -639,24 +652,22 @@ if_clone_attach(struct if_clone *ifc) void if_clone_detach(struct if_clone *ifc) { - LIST_REMOVE(ifc, ifc_list); FREE(ifc->ifc_units, M_CLONE); if_cloners_count--; } -#if IF_CLONE_LIST /* * Provide list of interface cloners to userspace. */ static int -if_clone_list(int count, int * total, user_addr_t dst) +if_clone_list(int count, int *ret_total, user_addr_t dst) { char outbuf[IFNAMSIZ]; struct if_clone *ifc; int error = 0; - *total = if_cloners_count; + *ret_total = if_cloners_count; if (dst == USER_ADDR_NULL) { /* Just asking how many there are. */ return (0); @@ -677,7 +688,6 @@ if_clone_list(int count, int * total, user_addr_t dst) return (error); } -#endif /* IF_CLONE_LIST */ /* * Similar to ifa_ifwithaddr, except that this is IPv4 specific @@ -988,81 +998,52 @@ next: IFA_UNLOCK(ifa); continue; } -#ifndef __APPLE__ -/* This breaks tunneling application trying to install a route with - * a specific subnet and the local address as the destination - * It's breaks binary compatibility with previous version of MacOS X - */ - if ( -#if INET6 /* XXX: for maching gif tunnel dst as routing entry gateway */ - addr->sa_family != AF_INET6 && -#endif - ifp->if_flags & IFF_POINTOPOINT) { - /* - * This is a bit broken as it doesn't - * take into account that the remote end may - * be a single node in the network we are - * looking for. - * The trouble is that we don't know the - * netmask for the remote end. - */ - if (ifa->ifa_dstaddr != 0 && - equal(addr, ifa->ifa_dstaddr)) { - IFA_ADDREF_LOCKED(ifa); - IFA_UNLOCK(ifa); - break; - } + /* + * If we're looking up with a scope, + * find using a matching interface. + */ + if (ifscope != IFSCOPE_NONE && + ifp->if_index != ifscope) { IFA_UNLOCK(ifa); - } else -#endif /* __APPLE__*/ - { - /* - * If we're looking up with a scope, - * find using a matching interface. - */ - if (ifscope != IFSCOPE_NONE && - ifp->if_index != ifscope) { - IFA_UNLOCK(ifa); - continue; - } + continue; + } - /* - * Scan all the bits in the ifa's address. - * If a bit dissagrees with what we are - * looking for, mask it with the netmask - * to see if it really matters. - * (A byte at a time) - */ - if (ifa->ifa_netmask == 0) { - IFA_UNLOCK(ifa); - continue; - } - cp = addr_data; - cp2 = ifa->ifa_addr->sa_data; - cp3 = ifa->ifa_netmask->sa_data; - cplim = ifa->ifa_netmask->sa_len - + (char *)ifa->ifa_netmask; - while (cp3 < cplim) - if ((*cp++ ^ *cp2++) & *cp3++) - goto next; /* next address! */ - /* - * If the netmask of what we just found - * is more specific than what we had before - * (if we had one) then remember the new one - * before continuing to search - * for an even better one. - */ - if (ifa_maybe == NULL || - rn_refines((caddr_t)ifa->ifa_netmask, - (caddr_t)ifa_maybe->ifa_netmask)) { - IFA_ADDREF_LOCKED(ifa); /* ifa_maybe */ - IFA_UNLOCK(ifa); - if (ifa_maybe != NULL) - IFA_REMREF(ifa_maybe); - ifa_maybe = ifa; - } else { - IFA_UNLOCK(ifa); - } + /* + * Scan all the bits in the ifa's address. + * If a bit dissagrees with what we are + * looking for, mask it with the netmask + * to see if it really matters. + * (A byte at a time) + */ + if (ifa->ifa_netmask == 0) { + IFA_UNLOCK(ifa); + continue; + } + cp = addr_data; + cp2 = ifa->ifa_addr->sa_data; + cp3 = ifa->ifa_netmask->sa_data; + cplim = ifa->ifa_netmask->sa_len + + (char *)ifa->ifa_netmask; + while (cp3 < cplim) + if ((*cp++ ^ *cp2++) & *cp3++) + goto next; /* next address! */ + /* + * If the netmask of what we just found + * is more specific than what we had before + * (if we had one) then remember the new one + * before continuing to search + * for an even better one. + */ + if (ifa_maybe == NULL || + rn_refines((caddr_t)ifa->ifa_netmask, + (caddr_t)ifa_maybe->ifa_netmask)) { + IFA_ADDREF_LOCKED(ifa); /* ifa_maybe */ + IFA_UNLOCK(ifa); + if (ifa_maybe != NULL) + IFA_REMREF(ifa_maybe); + ifa_maybe = ifa; + } else { + IFA_UNLOCK(ifa); } IFA_LOCK_ASSERT_NOTHELD(ifa); } @@ -1445,23 +1426,11 @@ if_withname(struct sockaddr *sa) return (ifunit(ifname)); } - -/* - * Interface ioctls. - */ -int -ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) +static __attribute__((noinline)) int +ifioctl_ifconf(u_long cmd, caddr_t data) { - char ifname[IFNAMSIZ + 1]; - struct ifnet *ifp = NULL; - struct ifstat *ifs = NULL; int error = 0; - bzero(ifname, sizeof (ifname)); - - /* - * ioctls which don't require ifp, or ifreq ioctls - */ switch (cmd) { case OSIOCGIFCONF32: /* struct ifconf32 */ case SIOCGIFCONF32: { /* struct ifconf32 */ @@ -1470,7 +1439,7 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) error = ifconf(cmd, CAST_USER_ADDR_T(ifc.ifc_req), &ifc.ifc_len); bcopy(&ifc, data, sizeof (ifc)); - goto done; + break; } case SIOCGIFCONF64: /* struct ifconf64 */ @@ -1479,17 +1448,30 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) bcopy(data, &ifc, sizeof (ifc)); error = ifconf(cmd, ifc.ifc_req, &ifc.ifc_len); bcopy(&ifc, data, sizeof (ifc)); - goto done; + break; } -#if IF_CLONE_LIST + default: + VERIFY(0); + /* NOTREACHED */ + } + + return (error); +} + +static __attribute__((noinline)) int +ifioctl_ifclone(u_long cmd, caddr_t data) +{ + int error = 0; + + switch (cmd) { case SIOCIFGCLONERS32: { /* struct if_clonereq32 */ struct if_clonereq32 ifcr; bcopy(data, &ifcr, sizeof (ifcr)); error = if_clone_list(ifcr.ifcr_count, &ifcr.ifcr_total, CAST_USER_ADDR_T(ifcr.ifcru_buffer)); bcopy(&ifcr, data, sizeof (ifcr)); - goto done; + break; } case SIOCIFGCLONERS64: { /* struct if_clonereq64 */ @@ -1498,9 +1480,255 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) error = if_clone_list(ifcr.ifcr_count, &ifcr.ifcr_total, ifcr.ifcru_buffer); bcopy(&ifcr, data, sizeof (ifcr)); - goto done; + break; + } + + default: + VERIFY(0); + /* NOTREACHED */ + } + + return (error); +} + +static __attribute__((noinline)) int +ifioctl_ifdesc(struct ifnet *ifp, u_long cmd, caddr_t data, struct proc *p) +{ + struct if_descreq *ifdr = (struct if_descreq *)(void *)data; + u_int32_t ifdr_len; + int error = 0; + + VERIFY(ifp != NULL); + + switch (cmd) { + case SIOCSIFDESC: { /* struct if_descreq */ + if ((error = proc_suser(p)) != 0) + break; + + ifnet_lock_exclusive(ifp); + bcopy(&ifdr->ifdr_len, &ifdr_len, sizeof (ifdr_len)); + if (ifdr_len > sizeof (ifdr->ifdr_desc) || + ifdr_len > ifp->if_desc.ifd_maxlen) { + error = EINVAL; + ifnet_lock_done(ifp); + break; + } + + bzero(ifp->if_desc.ifd_desc, ifp->if_desc.ifd_maxlen); + if ((ifp->if_desc.ifd_len = ifdr_len) > 0) { + bcopy(ifdr->ifdr_desc, ifp->if_desc.ifd_desc, + MIN(ifdr_len, ifp->if_desc.ifd_maxlen)); + } + ifnet_lock_done(ifp); + break; + } + + case SIOCGIFDESC: { /* struct if_descreq */ + ifnet_lock_shared(ifp); + ifdr_len = MIN(ifp->if_desc.ifd_len, sizeof (ifdr->ifdr_desc)); + bcopy(&ifdr_len, &ifdr->ifdr_len, sizeof (ifdr_len)); + bzero(&ifdr->ifdr_desc, sizeof (ifdr->ifdr_desc)); + if (ifdr_len > 0) { + bcopy(ifp->if_desc.ifd_desc, ifdr->ifdr_desc, ifdr_len); + } + ifnet_lock_done(ifp); + break; + } + + default: + VERIFY(0); + /* NOTREACHED */ + } + + return (error); +} + +static __attribute__((noinline)) int +ifioctl_linkparams(struct ifnet *ifp, u_long cmd, caddr_t data, struct proc *p) +{ + struct if_linkparamsreq *iflpr = + (struct if_linkparamsreq *)(void *)data; + struct ifclassq *ifq; + int error = 0; + + VERIFY(ifp != NULL); + ifq = &ifp->if_snd; + + switch (cmd) { + case SIOCSIFLINKPARAMS: { /* struct if_linkparamsreq */ + struct tb_profile tb = { 0, 0, 0 }; + + if ((error = proc_suser(p)) != 0) + break; + + IFCQ_LOCK(ifq); + if (!IFCQ_IS_READY(ifq)) { + error = ENXIO; + IFCQ_UNLOCK(ifq); + break; + } + bcopy(&iflpr->iflpr_output_tbr_rate, &tb.rate, + sizeof (tb.rate)); + bcopy(&iflpr->iflpr_output_tbr_percent, &tb.percent, + sizeof (tb.percent)); + error = ifclassq_tbr_set(ifq, &tb, TRUE); + IFCQ_UNLOCK(ifq); + break; + } + + case SIOCGIFLINKPARAMS: { /* struct if_linkparamsreq */ + u_int32_t sched_type = PKTSCHEDT_NONE, flags = 0; + u_int64_t tbr_bw = 0, tbr_pct = 0; + + IFCQ_LOCK(ifq); +#if PF_ALTQ + if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq))) { + sched_type = IFCQ_ALTQ(ifq)->altq_type; + flags |= IFLPRF_ALTQ; + } else +#endif /* PF_ALTQ */ + { + if (IFCQ_IS_ENABLED(ifq)) + sched_type = ifq->ifcq_type; + } + bcopy(&sched_type, &iflpr->iflpr_output_sched, + sizeof (iflpr->iflpr_output_sched)); + + if (IFCQ_TBR_IS_ENABLED(ifq)) { + tbr_bw = ifq->ifcq_tbr.tbr_rate_raw; + tbr_pct = ifq->ifcq_tbr.tbr_percent; + } + bcopy(&tbr_bw, &iflpr->iflpr_output_tbr_rate, + sizeof (iflpr->iflpr_output_tbr_rate)); + bcopy(&tbr_pct, &iflpr->iflpr_output_tbr_percent, + sizeof (iflpr->iflpr_output_tbr_percent)); + IFCQ_UNLOCK(ifq); + + if (ifp->if_output_sched_model == + IFNET_SCHED_MODEL_DRIVER_MANAGED) + flags |= IFLPRF_DRVMANAGED; + bcopy(&flags, &iflpr->iflpr_flags, sizeof (iflpr->iflpr_flags)); + bcopy(&ifp->if_output_bw, &iflpr->iflpr_output_bw, + sizeof (iflpr->iflpr_output_bw)); + bcopy(&ifp->if_input_bw, &iflpr->iflpr_input_bw, + sizeof (iflpr->iflpr_input_bw)); + bcopy(&ifp->if_output_lt, &iflpr->iflpr_output_lt, + sizeof (iflpr->iflpr_output_lt)); + bcopy(&ifp->if_input_lt, &iflpr->iflpr_input_lt, + sizeof (iflpr->iflpr_input_lt)); + break; + } + + default: + VERIFY(0); + /* NOTREACHED */ } -#endif /* IF_CLONE_LIST */ + + return (error); +} + +static __attribute__((noinline)) int +ifioctl_qstats(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + struct if_qstatsreq *ifqr = (struct if_qstatsreq *)(void *)data; + u_int32_t ifqr_len, ifqr_slot; + int error = 0; + + VERIFY(ifp != NULL); + + switch (cmd) { + case SIOCGIFQUEUESTATS: { /* struct if_qstatsreq */ + bcopy(&ifqr->ifqr_slot, &ifqr_slot, sizeof (ifqr_slot)); + bcopy(&ifqr->ifqr_len, &ifqr_len, sizeof (ifqr_len)); + error = ifclassq_getqstats(&ifp->if_snd, ifqr_slot, + ifqr->ifqr_buf, &ifqr_len); + if (error != 0) + ifqr_len = 0; + bcopy(&ifqr_len, &ifqr->ifqr_len, sizeof (ifqr_len)); + break; + } + + default: + VERIFY(0); + /* NOTREACHED */ + } + + return (error); +} + +static __attribute__((noinline)) int +ifioctl_throttle(struct ifnet *ifp, u_long cmd, caddr_t data, struct proc *p) +{ + struct if_throttlereq *ifthr = (struct if_throttlereq *)(void *)data; + u_int32_t ifthr_level; + int error = 0; + + VERIFY(ifp != NULL); + + switch (cmd) { + case SIOCSIFTHROTTLE: { /* struct if_throttlereq */ + /* + * XXX: Use priv_check_cred() instead of root check? + */ + if ((error = proc_suser(p)) != 0) + break; + + bcopy(&ifthr->ifthr_level, &ifthr_level, sizeof (ifthr_level)); + error = ifnet_set_throttle(ifp, ifthr_level); + if (error == EALREADY) + error = 0; + break; + } + + case SIOCGIFTHROTTLE: { /* struct if_throttlereq */ + if ((error = ifnet_get_throttle(ifp, &ifthr_level)) == 0) { + bcopy(&ifthr_level, &ifthr->ifthr_level, + sizeof (ifthr_level)); + } + break; + } + + default: + VERIFY(0); + /* NOTREACHED */ + } + + return (error); +} + +/* + * Interface ioctls. + * + * Most of the routines called to handle the ioctls would end up being + * tail-call optimized, which unfortunately causes this routine to + * consume too much stack space; this is the reason for the "noinline" + * attribute used on those routines. + */ +int +ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) +{ + char ifname[IFNAMSIZ + 1]; + struct ifnet *ifp = NULL; + struct ifstat *ifs = NULL; + int error = 0; + + bzero(ifname, sizeof (ifname)); + + /* + * ioctls which don't require ifp, or ifreq ioctls + */ + switch (cmd) { + case OSIOCGIFCONF32: /* struct ifconf32 */ + case SIOCGIFCONF32: /* struct ifconf32 */ + case SIOCGIFCONF64: /* struct ifconf64 */ + case OSIOCGIFCONF64: /* struct ifconf64 */ + error = ifioctl_ifconf(cmd, data); + goto done; + + case SIOCIFGCLONERS32: /* struct if_clonereq32 */ + case SIOCIFGCLONERS64: /* struct if_clonereq64 */ + error = ifioctl_ifclone(cmd, data); + goto done; case SIOCSIFDSTADDR: /* struct ifreq */ case SIOCSIFADDR: /* struct ifreq */ @@ -1522,12 +1750,16 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) case SIOCGIFFLAGS: /* struct ifreq */ case SIOCGIFEFLAGS: /* struct ifreq */ case SIOCGIFCAP: /* struct ifreq */ +#if CONFIG_MACF_NET case SIOCGIFMAC: /* struct ifreq */ + case SIOCSIFMAC: /* struct ifreq */ +#endif /* CONFIG_MACF_NET */ case SIOCGIFMETRIC: /* struct ifreq */ case SIOCGIFMTU: /* struct ifreq */ case SIOCGIFPHYS: /* struct ifreq */ case SIOCSIFFLAGS: /* struct ifreq */ case SIOCSIFCAP: /* struct ifreq */ + case SIOCSIFMETRIC: /* struct ifreq */ case SIOCSIFPHYS: /* struct ifreq */ case SIOCSIFMTU: /* struct ifreq */ case SIOCADDMULTI: /* struct ifreq */ @@ -1539,6 +1771,8 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) case SIOCSIFALTMTU: /* struct ifreq */ case SIOCSIFVLAN: /* struct ifreq */ case SIOCSIFBOND: /* struct ifreq */ + case SIOCGIFLLADDR: /* struct ifreq */ + case SIOCGIFTYPE: /* struct ifreq */ case SIOCGIFPSRCADDR: /* struct ifreq */ case SIOCGIFPDSTADDR: /* struct ifreq */ case SIOCGIFGENERIC: /* struct ifreq */ @@ -1549,9 +1783,14 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) case SIOCGIFGETRTREFCNT: /* struct ifreq */ case SIOCSIFOPPORTUNISTIC: /* struct ifreq */ case SIOCGIFOPPORTUNISTIC: /* struct ifreq */ - case SIOCGIFLINKQUALITYMETRIC: { /* struct ifreq */ + case SIOCGIFLINKQUALITYMETRIC: /* struct ifreq */ + case SIOCSIFLOG: /* struct ifreq */ + case SIOCGIFLOG: /* struct ifreq */ + case SIOCGIFDELEGATE: { /* struct ifreq */ struct ifreq ifr; bcopy(data, &ifr, sizeof (ifr)); + ifr.ifr_name[IFNAMSIZ - 1] = '\0'; + bcopy(&ifr.ifr_name, ifname, IFNAMSIZ); error = ifioctl_ifreq(so, cmd, &ifr, p); bcopy(&ifr, data, sizeof (ifr)); goto done; @@ -1564,38 +1803,34 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) */ dlil_if_lock(); switch (cmd) { - case SIOCSIFPHYADDR: { /* struct ifaliasreq */ - bcopy(((struct ifaliasreq *)(void *)data)->ifra_name, + case SIOCSIFPHYADDR: /* struct {if,in_}aliasreq */ + bcopy(((struct in_aliasreq *)(void *)data)->ifra_name, ifname, IFNAMSIZ); ifp = ifunit(ifname); break; - } #if INET6 - case SIOCSIFPHYADDR_IN6_32: { /* struct in6_aliasreq_32 */ + case SIOCSIFPHYADDR_IN6_32: /* struct in6_aliasreq_32 */ bcopy(((struct in6_aliasreq_32 *)(void *)data)->ifra_name, ifname, IFNAMSIZ); ifp = ifunit(ifname); break; - } - case SIOCSIFPHYADDR_IN6_64: { /* struct in6_aliasreq_64 */ + case SIOCSIFPHYADDR_IN6_64: /* struct in6_aliasreq_64 */ bcopy(((struct in6_aliasreq_64 *)(void *)data)->ifra_name, ifname, IFNAMSIZ); ifp = ifunit(ifname); break; - } -#endif +#endif /* INET6 */ case SIOCSLIFPHYADDR: /* struct if_laddrreq */ - case SIOCGLIFPHYADDR: { /* struct if_laddrreq */ + case SIOCGLIFPHYADDR: /* struct if_laddrreq */ bcopy(((struct if_laddrreq *)(void *)data)->iflr_name, ifname, IFNAMSIZ); ifp = ifunit(ifname); break; - } - case SIOCGIFSTATUS: { /* struct ifstat */ + case SIOCGIFSTATUS: /* struct ifstat */ ifs = _MALLOC(sizeof (*ifs), M_DEVBUF, M_WAITOK); if (ifs == NULL) { error = ENOMEM; @@ -1604,258 +1839,117 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) } bcopy(data, ifs, sizeof (*ifs)); ifs->ifs_name[IFNAMSIZ - 1] = '\0'; - ifp = ifunit(ifs->ifs_name); + bcopy(ifs->ifs_name, ifname, IFNAMSIZ); + ifp = ifunit(ifname); break; - } - case SIOCGIFMEDIA32: { /* struct ifmediareq32 */ + case SIOCGIFMEDIA32: /* struct ifmediareq32 */ bcopy(((struct ifmediareq32 *)(void *)data)->ifm_name, ifname, IFNAMSIZ); ifp = ifunit(ifname); break; - } - case SIOCGIFMEDIA64: { /* struct ifmediareq64 */ + case SIOCGIFMEDIA64: /* struct ifmediareq64 */ bcopy(((struct ifmediareq64 *)(void *)data)->ifm_name, ifname, IFNAMSIZ); ifp = ifunit(ifname); break; - } case SIOCSIFDESC: /* struct if_descreq */ - case SIOCGIFDESC: { /* struct if_descreq */ + case SIOCGIFDESC: /* struct if_descreq */ bcopy(((struct if_descreq *)(void *)data)->ifdr_name, ifname, IFNAMSIZ); ifp = ifunit(ifname); break; - } case SIOCSIFLINKPARAMS: /* struct if_linkparamsreq */ - case SIOCGIFLINKPARAMS: { /* struct if_linkparamsreq */ + case SIOCGIFLINKPARAMS: /* struct if_linkparamsreq */ bcopy(((struct if_linkparamsreq *)(void *)data)->iflpr_name, ifname, IFNAMSIZ); ifp = ifunit(ifname); break; - } - case SIOCGIFQUEUESTATS: { /* struct if_qstatsreq */ + case SIOCGIFQUEUESTATS: /* struct if_qstatsreq */ bcopy(((struct if_qstatsreq *)(void *)data)->ifqr_name, ifname, IFNAMSIZ); ifp = ifunit(ifname); break; - } case SIOCSIFTHROTTLE: /* struct if_throttlereq */ - case SIOCGIFTHROTTLE: { /* struct if_throttlereq */ + case SIOCGIFTHROTTLE: /* struct if_throttlereq */ bcopy(((struct if_throttlereq *)(void *)data)->ifthr_name, ifname, IFNAMSIZ); ifp = ifunit(ifname); break; - } - default: { + default: /* * This is a bad assumption, but the code seems to * have been doing this in the past; caveat emptor. - */ - bcopy(((struct ifreq *)(void *)data)->ifr_name, - ifname, IFNAMSIZ); - ifp = ifunit(ifname); - break; - } - } - dlil_if_unlock(); - - if (ifp == NULL) { - error = ENXIO; - goto done; - } - - switch (cmd) { - case SIOCSIFPHYADDR: /* struct ifaliasreq */ -#if INET6 - case SIOCSIFPHYADDR_IN6_32: /* struct in6_aliasreq_32 */ - case SIOCSIFPHYADDR_IN6_64: /* struct in6_aliasreq_64 */ -#endif - case SIOCSLIFPHYADDR: /* struct if_laddrreq */ - error = proc_suser(p); - if (error != 0) - break; - - error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family, - cmd, data); - if (error != 0) - break; - - ifnet_touch_lastchange(ifp); - break; - - case SIOCGIFSTATUS: /* struct ifstat */ - VERIFY(ifs != NULL); - ifs->ascii[0] = '\0'; - - error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family, - cmd, (caddr_t)ifs); - - bcopy(ifs, data, sizeof (*ifs)); - break; - - case SIOCGLIFPHYADDR: /* struct if_laddrreq */ - case SIOCGIFMEDIA32: /* struct ifmediareq32 */ - case SIOCGIFMEDIA64: /* struct ifmediareq64 */ - error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family, - cmd, data); - break; - - case SIOCSIFDESC: { /* struct if_descreq */ - struct if_descreq *ifdr = (struct if_descreq *)(void *)data; - u_int32_t ifdr_len; - - if ((error = proc_suser(p)) != 0) - break; - - ifnet_lock_exclusive(ifp); - bcopy(&ifdr->ifdr_len, &ifdr_len, sizeof (ifdr_len)); - if (ifdr_len > sizeof (ifdr->ifdr_desc) || - ifdr_len > ifp->if_desc.ifd_maxlen) { - error = EINVAL; - ifnet_lock_done(ifp); - break; - } - - bzero(ifp->if_desc.ifd_desc, ifp->if_desc.ifd_maxlen); - if ((ifp->if_desc.ifd_len = ifdr_len) > 0) { - bcopy(ifdr->ifdr_desc, ifp->if_desc.ifd_desc, - MIN(ifdr_len, ifp->if_desc.ifd_maxlen)); - } - ifnet_lock_done(ifp); - break; - } - - case SIOCGIFDESC: { /* struct if_descreq */ - struct if_descreq *ifdr = (struct if_descreq *)(void *)data; - u_int32_t ifdr_len; - - ifnet_lock_shared(ifp); - ifdr_len = MIN(ifp->if_desc.ifd_len, sizeof (ifdr->ifdr_desc)); - bcopy(&ifdr_len, &ifdr->ifdr_len, sizeof (ifdr_len)); - bzero(&ifdr->ifdr_desc, sizeof (ifdr->ifdr_desc)); - if (ifdr_len > 0) { - bcopy(ifp->if_desc.ifd_desc, ifdr->ifdr_desc, ifdr_len); - } - ifnet_lock_done(ifp); - break; - } - - case SIOCSIFLINKPARAMS: { /* struct if_linkparamsreq */ - struct if_linkparamsreq *iflpr = - (struct if_linkparamsreq *)(void *)data; - struct ifclassq *ifq = &ifp->if_snd; - struct tb_profile tb = { 0, 0, 0 }; - - if ((error = proc_suser(p)) != 0) - break; - - IFCQ_LOCK(ifq); - if (!IFCQ_IS_READY(ifq)) { - error = ENXIO; - IFCQ_UNLOCK(ifq); - break; - } - bcopy(&iflpr->iflpr_output_tbr_rate, &tb.rate, - sizeof (tb.rate)); - bcopy(&iflpr->iflpr_output_tbr_percent, &tb.percent, - sizeof (tb.percent)); - error = ifclassq_tbr_set(ifq, &tb, TRUE); - IFCQ_UNLOCK(ifq); + */ + bcopy(((struct ifreq *)(void *)data)->ifr_name, + ifname, IFNAMSIZ); + ifp = ifunit(ifname); break; } + dlil_if_unlock(); - case SIOCGIFLINKPARAMS: { /* struct if_linkparamsreq */ - struct if_linkparamsreq *iflpr = - (struct if_linkparamsreq *)(void *)data; - struct ifclassq *ifq = &ifp->if_snd; - u_int32_t sched_type = PKTSCHEDT_NONE, flags = 0; - u_int64_t tbr_bw = 0, tbr_pct = 0; + if (ifp == NULL) { + error = ENXIO; + goto done; + } - IFCQ_LOCK(ifq); -#if PF_ALTQ - if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq))) { - sched_type = IFCQ_ALTQ(ifq)->altq_type; - flags |= IFLPRF_ALTQ; - } else -#endif /* PF_ALTQ */ - { - if (IFCQ_IS_ENABLED(ifq)) - sched_type = ifq->ifcq_type; - } - bcopy(&sched_type, &iflpr->iflpr_output_sched, - sizeof (iflpr->iflpr_output_sched)); + switch (cmd) { + case SIOCSIFPHYADDR: /* struct {if,in_}aliasreq */ +#if INET6 + case SIOCSIFPHYADDR_IN6_32: /* struct in6_aliasreq_32 */ + case SIOCSIFPHYADDR_IN6_64: /* struct in6_aliasreq_64 */ +#endif /* INET6 */ + case SIOCSLIFPHYADDR: /* struct if_laddrreq */ + error = proc_suser(p); + if (error != 0) + break; - if (IFCQ_TBR_IS_ENABLED(ifq)) { - tbr_bw = ifq->ifcq_tbr.tbr_rate_raw; - tbr_pct = ifq->ifcq_tbr.tbr_percent; - } - bcopy(&tbr_bw, &iflpr->iflpr_output_tbr_rate, - sizeof (iflpr->iflpr_output_tbr_rate)); - bcopy(&tbr_pct, &iflpr->iflpr_output_tbr_percent, - sizeof (iflpr->iflpr_output_tbr_percent)); - IFCQ_UNLOCK(ifq); + error = ifnet_ioctl(ifp, SOCK_DOM(so), cmd, data); + if (error != 0) + break; - if (ifp->if_output_sched_model == - IFNET_SCHED_MODEL_DRIVER_MANAGED) - flags |= IFLPRF_DRVMANAGED; - bcopy(&flags, &iflpr->iflpr_flags, sizeof (iflpr->iflpr_flags)); - bcopy(&ifp->if_output_bw, &iflpr->iflpr_output_bw, - sizeof (iflpr->iflpr_output_bw)); - bcopy(&ifp->if_input_bw, &iflpr->iflpr_input_bw, - sizeof (iflpr->iflpr_input_bw)); + ifnet_touch_lastchange(ifp); break; - } - case SIOCGIFQUEUESTATS: { /* struct if_qstatsreq */ - struct if_qstatsreq *ifqr = (struct if_qstatsreq *)(void *)data; - u_int32_t ifqr_len, ifqr_slot; + case SIOCGIFSTATUS: /* struct ifstat */ + VERIFY(ifs != NULL); + ifs->ascii[0] = '\0'; - bcopy(&ifqr->ifqr_slot, &ifqr_slot, sizeof (ifqr_slot)); - bcopy(&ifqr->ifqr_len, &ifqr_len, sizeof (ifqr_len)); - error = ifclassq_getqstats(&ifp->if_snd, ifqr_slot, - ifqr->ifqr_buf, &ifqr_len); - if (error != 0) - ifqr_len = 0; - bcopy(&ifqr_len, &ifqr->ifqr_len, sizeof (ifqr_len)); + error = ifnet_ioctl(ifp, SOCK_DOM(so), cmd, (caddr_t)ifs); + + bcopy(ifs, data, sizeof (*ifs)); break; - } - case SIOCSIFTHROTTLE: { /* struct if_throttlereq */ - struct if_throttlereq *ifthr = - (struct if_throttlereq *)(void *)data; - u_int32_t ifthr_level; + case SIOCGLIFPHYADDR: /* struct if_laddrreq */ + case SIOCGIFMEDIA32: /* struct ifmediareq32 */ + case SIOCGIFMEDIA64: /* struct ifmediareq64 */ + error = ifnet_ioctl(ifp, SOCK_DOM(so), cmd, data); + break; - /* - * XXX: Use priv_check_cred() instead of root check? - */ - if ((error = proc_suser(p)) != 0) - break; + case SIOCSIFDESC: /* struct if_descreq */ + case SIOCGIFDESC: /* struct if_descreq */ + error = ifioctl_ifdesc(ifp, cmd, data, p); + break; - bcopy(&ifthr->ifthr_level, &ifthr_level, sizeof (ifthr_level)); - error = ifnet_set_throttle(ifp, ifthr_level); - if (error == EALREADY) - error = 0; + case SIOCSIFLINKPARAMS: /* struct if_linkparamsreq */ + case SIOCGIFLINKPARAMS: /* struct if_linkparamsreq */ + error = ifioctl_linkparams(ifp, cmd, data, p); break; - } - case SIOCGIFTHROTTLE: { /* struct if_throttlereq */ - struct if_throttlereq *ifthr = - (struct if_throttlereq *)(void *)data; - u_int32_t ifthr_level; + case SIOCGIFQUEUESTATS: /* struct if_qstatsreq */ + error = ifioctl_qstats(ifp, cmd, data); + break; - if ((error = ifnet_get_throttle(ifp, &ifthr_level)) == 0) { - bcopy(&ifthr_level, &ifthr->ifthr_level, - sizeof (ifthr_level)); - } + case SIOCSIFTHROTTLE: /* struct if_throttlereq */ + case SIOCGIFTHROTTLE: /* struct if_throttlereq */ + error = ifioctl_throttle(ifp, cmd, data, p); break; - } default: if (so->so_proto == NULL) { @@ -1869,8 +1963,7 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) socket_unlock(so, 1); if (error == EOPNOTSUPP || error == ENOTSUP) { - error = ifnet_ioctl(ifp, - so->so_proto->pr_domain->dom_family, cmd, data); + error = ifnet_ioctl(ifp, SOCK_DOM(so), cmd, data); } break; } @@ -1879,10 +1972,35 @@ done: if (ifs != NULL) _FREE(ifs, M_DEVBUF); + if (if_verbose) { + if (ifname[0] == '\0') + (void) snprintf(ifname, sizeof (ifname), "%s", + "NULL"); + else if (ifp != NULL) + (void) snprintf(ifname, sizeof (ifname), "%s", + if_name(ifp)); + + if (error != 0) { + printf("%s[%s,%d]: ifp %s cmd 0x%08lx (%c%c [%lu] " + "%c %lu) error %d\n", __func__, + proc_name_address(p), proc_pid(p), + ifname, cmd, (cmd & IOC_IN) ? 'I' : ' ', + (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd), + (char)IOCGROUP(cmd), cmd & 0xff, error); + } else if (if_verbose > 1) { + printf("%s[%s,%d]: ifp %s cmd 0x%08lx (%c%c [%lu] " + "%c %lu) OK\n", __func__, + proc_name_address(p), proc_pid(p), + ifname, cmd, (cmd & IOC_IN) ? 'I' : ' ', + (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd), + (char)IOCGROUP(cmd), cmd & 0xff); + } + } + return (error); } -static int +static __attribute__((noinline)) int ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p) { struct ifnet *ifp; @@ -1894,8 +2012,6 @@ ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p) bzero(&ev_data, sizeof (struct net_event_data)); bzero(&ev_msg, sizeof (struct kev_msg)); - ifr->ifr_name[IFNAMSIZ - 1] = '\0'; - switch (cmd) { case SIOCIFCREATE: case SIOCIFCREATE2: @@ -1911,7 +2027,14 @@ ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p) return (if_clone_destroy(ifr->ifr_name)); } + /* + * ioctls which require ifp. Note that we acquire dlil_ifnet_lock + * here to ensure that the ifnet, if found, has been fully attached. + */ + dlil_if_lock(); ifp = ifunit(ifr->ifr_name); + dlil_if_unlock(); + if (ifp == NULL) return (ENXIO); @@ -1939,7 +2062,12 @@ ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p) case SIOCGIFMAC: error = mac_ifnet_label_get(kauth_cred_get(), ifr, ifp); break; -#endif + + case SIOCSIFMAC: + error = mac_ifnet_label_set(kauth_cred_get(), ifr, ifp); + break; +#endif /* CONFIG_MACF_NET */ + case SIOCGIFMETRIC: ifnet_lock_shared(ifp); ifr->ifr_metric = ifp->if_metric; @@ -1958,24 +2086,6 @@ ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p) ifnet_lock_done(ifp); break; - case SIOCGIFWAKEFLAGS: - ifnet_lock_shared(ifp); - ifr->ifr_wake_flags = ifnet_get_wake_flags(ifp); - ifnet_lock_done(ifp); - break; - - case SIOCGIFGETRTREFCNT: - ifnet_lock_shared(ifp); - ifr->ifr_route_refcnt = ifp->if_route_refcnt; - ifnet_lock_done(ifp); - break; - - case SIOCGIFLINKQUALITYMETRIC: - ifnet_lock_shared(ifp); - ifr->ifr_link_quality_metric = ifp->if_lqm; - ifnet_lock_done(ifp); - break; - case SIOCSIFFLAGS: error = proc_suser(p); if (error != 0) @@ -1988,8 +2098,7 @@ ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p) * Note that we intentionally ignore any error from below * for the SIOCSIFFLAGS case. */ - (void) ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family, - cmd, (caddr_t)ifr); + (void) ifnet_ioctl(ifp, SOCK_DOM(so), cmd, (caddr_t)ifr); /* * Send the event even upon error from the driver because @@ -2020,17 +2129,11 @@ ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p) error = EINVAL; break; } - error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family, - cmd, (caddr_t)ifr); + error = ifnet_ioctl(ifp, SOCK_DOM(so), cmd, (caddr_t)ifr); ifnet_touch_lastchange(ifp); break; -#if CONFIG_MACF_NET - case SIOCSIFMAC: - error = mac_ifnet_label_set(kauth_cred_get(), ifr, ifp); - break; -#endif case SIOCSIFMETRIC: error = proc_suser(p); if (error != 0) @@ -2060,8 +2163,7 @@ ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p) if (error != 0) break; - error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family, - cmd, (caddr_t)ifr); + error = ifnet_ioctl(ifp, SOCK_DOM(so), cmd, (caddr_t)ifr); if (error != 0) break; @@ -2097,8 +2199,7 @@ ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p) error = EINVAL; break; } - error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family, - cmd, (caddr_t)ifr); + error = ifnet_ioctl(ifp, SOCK_DOM(so), cmd, (caddr_t)ifr); if (error != 0) break; @@ -2127,7 +2228,7 @@ ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p) if_rtmtu_update(ifp); #if INET6 nd6_setmtu(ifp); -#endif +#endif /* INET6 */ /* Inform all transmit queues about the new MTU */ IFCQ_LOCK(ifq); ifnet_update_sndq(ifq, CLASSQ_EV_LINK_MTU); @@ -2202,22 +2303,59 @@ ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p) if (error != 0) break; - error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family, - cmd, (caddr_t)ifr); + error = ifnet_ioctl(ifp, SOCK_DOM(so), cmd, (caddr_t)ifr); if (error != 0) break; ifnet_touch_lastchange(ifp); break; + case SIOCGIFLLADDR: { + struct sockaddr_dl *sdl = SDL(ifp->if_lladdr->ifa_addr); + + if (sdl->sdl_alen == 0) { + error = EADDRNOTAVAIL; + break; + } + /* If larger than 14-bytes we'll need another mechanism */ + if (sdl->sdl_alen > sizeof (ifr->ifr_addr.sa_data)) { + error = EMSGSIZE; + break; + } + /* Follow the same convention used by SIOCSIFLLADDR */ + bzero(&ifr->ifr_addr, sizeof (ifr->ifr_addr)); + ifr->ifr_addr.sa_family = AF_LINK; + ifr->ifr_addr.sa_len = sdl->sdl_alen; + error = ifnet_guarded_lladdr_copy_bytes(ifp, + &ifr->ifr_addr.sa_data, sdl->sdl_alen); + break; + } + + case SIOCGIFTYPE: + ifr->ifr_type.ift_type = ifp->if_type; + ifr->ifr_type.ift_family = ifp->if_family; + ifr->ifr_type.ift_subfamily = ifp->if_subfamily; + break; + case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: case SIOCGIFGENERIC: case SIOCGIFDEVMTU: case SIOCGIFVLAN: case SIOCGIFBOND: - error = ifnet_ioctl(ifp, so->so_proto->pr_domain->dom_family, - cmd, (caddr_t)ifr); + error = ifnet_ioctl(ifp, SOCK_DOM(so), cmd, (caddr_t)ifr); + break; + + case SIOCGIFWAKEFLAGS: + ifnet_lock_shared(ifp); + ifr->ifr_wake_flags = ifnet_get_wake_flags(ifp); + ifnet_lock_done(ifp); + break; + + case SIOCGIFGETRTREFCNT: + ifnet_lock_shared(ifp); + ifr->ifr_route_refcnt = ifp->if_route_refcnt; + ifnet_lock_done(ifp); break; case SIOCSIFOPPORTUNISTIC: @@ -2225,6 +2363,24 @@ ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p) error = ifnet_getset_opportunistic(ifp, cmd, ifr, p); break; + case SIOCGIFLINKQUALITYMETRIC: + ifnet_lock_shared(ifp); + ifr->ifr_link_quality_metric = ifp->if_lqm; + ifnet_lock_done(ifp); + break; + + case SIOCSIFLOG: + case SIOCGIFLOG: + error = ifnet_getset_log(ifp, cmd, ifr, p); + break; + + case SIOCGIFDELEGATE: + ifnet_lock_shared(ifp); + ifr->ifr_delegated = ((ifp->if_delegated.ifp != NULL) ? + ifp->if_delegated.ifp->if_index : 0); + ifnet_lock_done(ifp); + break; + case SIOCSIFDSTADDR: case SIOCSIFADDR: case SIOCSIFBRDADDR: @@ -2279,8 +2435,7 @@ ifioctl_ifreq(struct socket *so, u_long cmd, struct ifreq *ifr, struct proc *p) } if (error == EOPNOTSUPP || error == ENOTSUP) { - error = ifnet_ioctl(ifp, - so->so_proto->pr_domain->dom_family, cmd, + error = ifnet_ioctl(ifp, SOCK_DOM(so), cmd, (caddr_t)ifr); } break; @@ -2348,8 +2503,8 @@ ifnet_set_promiscuous( } if (newflags != oldflags) { - log(LOG_INFO, "%s%d: promiscuous mode %s%s\n", - ifp->if_name, ifp->if_unit, + log(LOG_INFO, "%s: promiscuous mode %s%s\n", + if_name(ifp), (newflags & IFF_PROMISC) != 0 ? "enable" : "disable", error != 0 ? " failed" : " succeeded"); } @@ -2364,30 +2519,33 @@ ifnet_set_promiscuous( */ /*ARGSUSED*/ static int -ifconf(u_long cmd, user_addr_t ifrp, int * ret_space) +ifconf(u_long cmd, user_addr_t ifrp, int *ret_space) { struct ifnet *ifp = NULL; struct ifaddr *ifa; struct ifreq ifr; int error = 0; size_t space; + net_thread_marks_t marks; + + marks = net_thread_marks_push(NET_THREAD_CKREQ_LLADDR); /* * Zero the ifr buffer to make sure we don't * disclose the contents of the stack. */ - bzero(&ifr, sizeof(struct ifreq)); + bzero(&ifr, sizeof (struct ifreq)); space = *ret_space; ifnet_head_lock_shared(); - for (ifp = ifnet_head.tqh_first; space > sizeof(ifr) && + for (ifp = ifnet_head.tqh_first; space > sizeof (ifr) && ifp; ifp = ifp->if_link.tqe_next) { char workbuf[64]; size_t ifnlen, addrs; - ifnlen = snprintf(workbuf, sizeof(workbuf), - "%s%d", ifp->if_name, ifp->if_unit); - if(ifnlen + 1 > sizeof ifr.ifr_name) { + ifnlen = snprintf(workbuf, sizeof (workbuf), + "%s", if_name(ifp)); + if (ifnlen + 1 > sizeof (ifr.ifr_name)) { error = ENAMETOOLONG; break; } else { @@ -2398,19 +2556,35 @@ ifconf(u_long cmd, user_addr_t ifrp, int * ret_space) addrs = 0; ifa = ifp->if_addrhead.tqh_first; - for ( ; space > sizeof (ifr) && ifa; + for (; space > sizeof (ifr) && ifa; ifa = ifa->ifa_link.tqe_next) { struct sockaddr *sa; + union { + struct sockaddr sa; + struct sockaddr_dl sdl; + uint8_t buf[SOCK_MAXADDRLEN + 1]; + } u; + + /* + * Make sure to accomodate the largest possible + * size of SA(if_lladdr)->sa_len. + */ + _CASSERT(sizeof (u) == (SOCK_MAXADDRLEN + 1)); IFA_LOCK(ifa); sa = ifa->ifa_addr; -#ifndef __APPLE__ - if (curproc->p_prison && prison_if(curproc, sa)) { + addrs++; + + if (ifa == ifp->if_lladdr) { + VERIFY(sa->sa_family == AF_LINK); + bcopy(sa, &u, sa->sa_len); IFA_UNLOCK(ifa); - continue; + ifnet_guarded_lladdr_copy_bytes(ifp, + LLADDR(&u.sdl), u.sdl.sdl_alen); + IFA_LOCK(ifa); + sa = &u.sa; } -#endif - addrs++; + if (cmd == OSIOCGIFCONF32 || cmd == OSIOCGIFCONF64) { struct osockaddr *osa = (struct osockaddr *)(void *)&ifr.ifr_addr; @@ -2418,25 +2592,25 @@ ifconf(u_long cmd, user_addr_t ifrp, int * ret_space) osa->sa_family = sa->sa_family; error = copyout((caddr_t)&ifr, ifrp, sizeof (ifr)); - ifrp += sizeof(struct ifreq); - } else if (sa->sa_len <= sizeof(*sa)) { + ifrp += sizeof (struct ifreq); + } else if (sa->sa_len <= sizeof (*sa)) { ifr.ifr_addr = *sa; error = copyout((caddr_t)&ifr, ifrp, sizeof (ifr)); - ifrp += sizeof(struct ifreq); + ifrp += sizeof (struct ifreq); } else { if (space < - sizeof (ifr) + sa->sa_len - sizeof(*sa)) { + sizeof (ifr) + sa->sa_len - sizeof (*sa)) { IFA_UNLOCK(ifa); break; } - space -= sa->sa_len - sizeof(*sa); + space -= sa->sa_len - sizeof (*sa); error = copyout((caddr_t)&ifr, ifrp, sizeof (ifr.ifr_name)); if (error == 0) { - error = copyout((caddr_t)sa, (ifrp + - offsetof(struct ifreq, ifr_addr)), - sa->sa_len); + error = copyout((caddr_t)sa, (ifrp + + offsetof(struct ifreq, ifr_addr)), + sa->sa_len); } ifrp += (sa->sa_len + offsetof(struct ifreq, ifr_addr)); @@ -2451,16 +2625,17 @@ ifconf(u_long cmd, user_addr_t ifrp, int * ret_space) if (error) break; if (!addrs) { - bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr)); + bzero((caddr_t)&ifr.ifr_addr, sizeof (ifr.ifr_addr)); error = copyout((caddr_t)&ifr, ifrp, sizeof (ifr)); if (error) break; space -= sizeof (ifr); - ifrp += sizeof(struct ifreq); + ifrp += sizeof (struct ifreq); } } ifnet_head_done(); *ret_space -= space; + net_thread_marks_pop(marks); return (error); } @@ -2876,7 +3051,7 @@ if_addmulti(struct ifnet *ifp, const struct sockaddr *sa, /* * Anything other than network-layer protocol domains which hold references * to the underlying link-layer record must use this routine: SIOCADDMULTI - * ioctl, ifnet_add_multicast(), AppleTalk, if_bond. + * ioctl, ifnet_add_multicast(), if_bond. */ int if_addmulti_anon(struct ifnet *ifp, const struct sockaddr *sa, @@ -3086,7 +3261,7 @@ if_delmulti_ifma(struct ifmultiaddr *ifma) /* * Anything other than network-layer protocol domains which hold references * to the underlying link-layer record must use this routine: SIOCDELMULTI - * ioctl, ifnet_remove_multicast(), AppleTalk, if_bond. + * ioctl, ifnet_remove_multicast(), if_bond. */ int if_delmulti_anon(struct ifnet *ifp, const struct sockaddr *sa) @@ -3187,22 +3362,6 @@ if_delmulti_common(struct ifmultiaddr *ifma, struct ifnet *ifp, return (0); } -/* - * We don't use if_setlladdr, our interfaces are responsible for - * handling the SIOCSIFLLADDR ioctl. - */ -#ifndef __APPLE__ -int -if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) -{ - ... -} -#endif - -SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Link layers"); -SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Generic link-management"); - - /* * Shutdown all network activity. Used boot() when halting * system. @@ -3281,8 +3440,6 @@ if_rtproto_del(struct ifnet *ifp, int protocol) { struct radix_node_head *rnh; - if (use_routegenid) - routegenid_update(); if ((protocol <= AF_MAX) && (protocol >= 0) && ((rnh = rt_tables[protocol]) != NULL) && (ifp != NULL)) { lck_mtx_lock(rnh_lock); @@ -3331,9 +3488,7 @@ void if_rtmtu_update(struct ifnet *ifp) (void) rnh->rnh_walktree(rnh, if_rtmtu, ifp); lck_mtx_unlock(rnh_lock); } - - if (use_routegenid) - routegenid_update(); + routegenid_update(); } __private_extern__ void @@ -3383,9 +3538,7 @@ if_data_internal_to_if_data(struct ifnet *ifp, if_data->ifi_lastchange.tv_sec = if_data_int->ifi_lastchange.tv_sec; if_data->ifi_lastchange.tv_usec = if_data_int->ifi_lastchange.tv_usec; -#if IF_LASTCHANGEUPTIME if_data->ifi_lastchange.tv_sec += boottime_sec(); -#endif if_data->ifi_unused2 = 0; COPYFIELD(ifi_hwassist); @@ -3441,9 +3594,7 @@ if_data_internal_to_if_data64(struct ifnet *ifp, if_data64->ifi_lastchange.tv_sec = if_data_int->ifi_lastchange.tv_sec; if_data64->ifi_lastchange.tv_usec = if_data_int->ifi_lastchange.tv_usec; -#if IF_LASTCHANGEUPTIME if_data64->ifi_lastchange.tv_sec += boottime_sec(); -#endif #undef COPYFIELD64 } @@ -3492,6 +3643,9 @@ if_copy_data_extended(struct ifnet *ifp, struct if_data_extended *if_de) bzero(if_de, sizeof (*if_de)); COPY_IF_DE_FIELD64_ATOMIC(ifi_alignerrs); + COPY_IF_DE_FIELD64_ATOMIC(ifi_dt_bytes); + COPY_IF_DE_FIELD64_ATOMIC(ifi_fpackets); + COPY_IF_DE_FIELD64_ATOMIC(ifi_fbytes); #undef COPY_IF_DE_FIELD64_ATOMIC } @@ -3569,10 +3723,9 @@ ifa_remref(struct ifaddr *ifa, int locked) panic("ifa %p attached to ifp is being freed\n", ifa); /* * Some interface addresses are allocated either statically - * or carved out of a larger block; e.g. AppleTalk addresses. - * Only free it if it was allocated via MALLOC or via the - * corresponding per-address family allocator. Otherwise, - * leave it alone. + * or carved out of a larger block. Only free it if it was + * allocated via MALLOC or via the corresponding per-address + * family allocator. Otherwise, leave it alone. */ if (ifa->ifa_debug & IFD_ALLOC) { if (ifa->ifa_free == NULL) { @@ -3626,3 +3779,186 @@ ifa_lock_destroy(struct ifaddr *ifa) IFA_LOCK_ASSERT_NOTHELD(ifa); lck_mtx_destroy(&ifa->ifa_lock, ifa_mtx_grp); } + +/* + * 'i' group ioctls. + * + * The switch statement below does nothing at runtime, as it serves as a + * compile time check to ensure that all of the socket 'i' ioctls (those + * in the 'i' group going thru soo_ioctl) that are made available by the + * networking stack is unique. This works as long as this routine gets + * updated each time a new interface ioctl gets added. + * + * Any failures at compile time indicates duplicated ioctl values. + */ +static __attribute__((unused)) void +ifioctl_cassert(void) +{ + /* + * This is equivalent to _CASSERT() and the compiler wouldn't + * generate any instructions, thus for compile time only. + */ + switch ((u_long)0) { + case 0: + + /* bsd/net/if_ppp.h */ + case SIOCGPPPSTATS: + case SIOCGPPPCSTATS: + +#if INET6 + /* bsd/netinet6/in6_var.h */ + case SIOCSIFADDR_IN6: + case SIOCGIFADDR_IN6: + case SIOCSIFDSTADDR_IN6: + case SIOCSIFNETMASK_IN6: + case SIOCGIFDSTADDR_IN6: + case SIOCGIFNETMASK_IN6: + case SIOCDIFADDR_IN6: + case SIOCAIFADDR_IN6_32: + case SIOCAIFADDR_IN6_64: + case SIOCSIFPHYADDR_IN6_32: + case SIOCSIFPHYADDR_IN6_64: + case SIOCGIFPSRCADDR_IN6: + case SIOCGIFPDSTADDR_IN6: + case SIOCGIFAFLAG_IN6: + case SIOCGDRLST_IN6_32: + case SIOCGDRLST_IN6_64: + case SIOCGPRLST_IN6_32: + case SIOCGPRLST_IN6_64: + case OSIOCGIFINFO_IN6: + case SIOCGIFINFO_IN6: + case SIOCSNDFLUSH_IN6: + case SIOCGNBRINFO_IN6_32: + case SIOCGNBRINFO_IN6_64: + case SIOCSPFXFLUSH_IN6: + case SIOCSRTRFLUSH_IN6: + case SIOCGIFALIFETIME_IN6: + case SIOCSIFALIFETIME_IN6: + case SIOCGIFSTAT_IN6: + case SIOCGIFSTAT_ICMP6: + case SIOCSDEFIFACE_IN6_32: + case SIOCSDEFIFACE_IN6_64: + case SIOCGDEFIFACE_IN6_32: + case SIOCGDEFIFACE_IN6_64: + case SIOCSIFINFO_FLAGS: + case SIOCSSCOPE6: + case SIOCGSCOPE6: + case SIOCGSCOPE6DEF: + case SIOCSIFPREFIX_IN6: + case SIOCGIFPREFIX_IN6: + case SIOCDIFPREFIX_IN6: + case SIOCAIFPREFIX_IN6: + case SIOCCIFPREFIX_IN6: + case SIOCSGIFPREFIX_IN6: + case SIOCPROTOATTACH_IN6_32: + case SIOCPROTOATTACH_IN6_64: + case SIOCPROTODETACH_IN6: + case SIOCLL_START_32: + case SIOCLL_START_64: + case SIOCLL_STOP: + case SIOCAUTOCONF_START: + case SIOCAUTOCONF_STOP: + case SIOCSETROUTERMODE_IN6: + case SIOCLL_CGASTART_32: + case SIOCLL_CGASTART_64: +#endif /* INET6 */ + + /* bsd/sys/sockio.h */ + case SIOCSIFADDR: + case OSIOCGIFADDR: + case SIOCSIFDSTADDR: + case OSIOCGIFDSTADDR: + case SIOCSIFFLAGS: + case SIOCGIFFLAGS: + case OSIOCGIFBRDADDR: + case SIOCSIFBRDADDR: + case OSIOCGIFCONF32: + case OSIOCGIFCONF64: + case OSIOCGIFNETMASK: + case SIOCSIFNETMASK: + case SIOCGIFMETRIC: + case SIOCSIFMETRIC: + case SIOCDIFADDR: + case SIOCAIFADDR: + case SIOCALIFADDR: + case SIOCGLIFADDR: + case SIOCDLIFADDR: + case SIOCGIFADDR: + case SIOCGIFDSTADDR: + case SIOCGIFBRDADDR: + case SIOCGIFCONF32: + case SIOCGIFCONF64: + case SIOCGIFNETMASK: + case SIOCAUTOADDR: + case SIOCAUTONETMASK: + case SIOCARPIPLL: + case SIOCADDMULTI: + case SIOCDELMULTI: + case SIOCGIFMTU: + case SIOCSIFMTU: + case SIOCGIFPHYS: + case SIOCSIFPHYS: + case SIOCSIFMEDIA: + case SIOCGIFMEDIA32: + case SIOCGIFMEDIA64: + case SIOCSIFGENERIC: + case SIOCGIFGENERIC: + case SIOCRSLVMULTI: + case SIOCSIFLLADDR: + case SIOCGIFSTATUS: + case SIOCSIFPHYADDR: + case SIOCGIFPSRCADDR: + case SIOCGIFPDSTADDR: + case SIOCDIFPHYADDR: + case SIOCSLIFPHYADDR: + case SIOCGLIFPHYADDR: + case SIOCGIFDEVMTU: + case SIOCSIFALTMTU: + case SIOCGIFALTMTU: + case SIOCSIFBOND: + case SIOCGIFBOND: + case SIOCPROTOATTACH: + case SIOCPROTODETACH: + case SIOCSIFCAP: + case SIOCGIFCAP: + case SIOCIFCREATE: + case SIOCIFDESTROY: + case SIOCIFCREATE2: + case SIOCSDRVSPEC32: + case SIOCGDRVSPEC32: + case SIOCSDRVSPEC64: + case SIOCGDRVSPEC64: + case SIOCSIFVLAN: + case SIOCGIFVLAN: + case SIOCIFGCLONERS32: + case SIOCIFGCLONERS64: + case SIOCGIFASYNCMAP: + case SIOCSIFASYNCMAP: +#if CONFIG_MACF_NET + case SIOCGIFMAC: + case SIOCSIFMAC: +#endif /* CONFIG_MACF_NET */ + case SIOCSIFKPI: + case SIOCGIFKPI: + case SIOCGIFWAKEFLAGS: + case SIOCGIFGETRTREFCNT: + case SIOCGIFLINKQUALITYMETRIC: + case SIOCSIFOPPORTUNISTIC: + case SIOCGIFOPPORTUNISTIC: + case SIOCSETROUTERMODE: + case SIOCGIFEFLAGS: + case SIOCSIFDESC: + case SIOCGIFDESC: + case SIOCSIFLINKPARAMS: + case SIOCGIFLINKPARAMS: + case SIOCGIFQUEUESTATS: + case SIOCSIFTHROTTLE: + case SIOCGIFTHROTTLE: + case SIOCSIFLOG: + case SIOCGIFLOG: + case SIOCGIFDELEGATE: + case SIOCGIFLLADDR: + case SIOCGIFTYPE: + ; + } +} diff --git a/bsd/net/if.h b/bsd/net/if.h index 8f80f7b2f..0de578479 100644 --- a/bsd/net/if.h +++ b/bsd/net/if.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -58,7 +58,6 @@ * SUCH DAMAGE. * * @(#)if.h 8.1 (Berkeley) 6/10/93 - * $FreeBSD: src/sys/net/if.h,v 1.58.2.2 2001/07/24 19:10:18 brooks Exp $ */ #ifndef _NET_IF_H_ @@ -76,34 +75,37 @@ * events. */ -#define KEV_DL_SUBCLASS 2 - -#define KEV_DL_SIFFLAGS 1 -#define KEV_DL_SIFMETRICS 2 -#define KEV_DL_SIFMTU 3 -#define KEV_DL_SIFPHYS 4 -#define KEV_DL_SIFMEDIA 5 -#define KEV_DL_SIFGENERIC 6 -#define KEV_DL_ADDMULTI 7 -#define KEV_DL_DELMULTI 8 -#define KEV_DL_IF_ATTACHED 9 -#define KEV_DL_IF_DETACHING 10 -#define KEV_DL_IF_DETACHED 11 -#define KEV_DL_LINK_OFF 12 -#define KEV_DL_LINK_ON 13 -#define KEV_DL_PROTO_ATTACHED 14 -#define KEV_DL_PROTO_DETACHED 15 -#define KEV_DL_LINK_ADDRESS_CHANGED 16 -#define KEV_DL_WAKEFLAGS_CHANGED 17 -#define KEV_DL_IF_IDLE_ROUTE_REFCNT 18 -#define KEV_DL_IFCAP_CHANGED 19 -#define KEV_DL_LINK_QUALITY_METRIC_CHANGED 20 -#define KEV_DL_NODE_PRESENCE 21 -#define KEV_DL_NODE_ABSENCE 22 -#define KEV_DL_MASTER_ELECTED 23 +#define KEV_DL_SUBCLASS 2 + +#define KEV_DL_SIFFLAGS 1 +#define KEV_DL_SIFMETRICS 2 +#define KEV_DL_SIFMTU 3 +#define KEV_DL_SIFPHYS 4 +#define KEV_DL_SIFMEDIA 5 +#define KEV_DL_SIFGENERIC 6 +#define KEV_DL_ADDMULTI 7 +#define KEV_DL_DELMULTI 8 +#define KEV_DL_IF_ATTACHED 9 +#define KEV_DL_IF_DETACHING 10 +#define KEV_DL_IF_DETACHED 11 +#define KEV_DL_LINK_OFF 12 +#define KEV_DL_LINK_ON 13 +#define KEV_DL_PROTO_ATTACHED 14 +#define KEV_DL_PROTO_DETACHED 15 +#define KEV_DL_LINK_ADDRESS_CHANGED 16 +#define KEV_DL_WAKEFLAGS_CHANGED 17 +#define KEV_DL_IF_IDLE_ROUTE_REFCNT 18 +#define KEV_DL_IFCAP_CHANGED 19 +#define KEV_DL_LINK_QUALITY_METRIC_CHANGED 20 +#define KEV_DL_NODE_PRESENCE 21 +#define KEV_DL_NODE_ABSENCE 22 +#define KEV_DL_MASTER_ELECTED 23 +#define KEV_DL_ISSUES 24 +#define KEV_DL_IFDELEGATE_CHANGED 25 #include #include +#include #ifdef PRIVATE #include @@ -111,15 +113,15 @@ #endif #endif -#ifdef KERNEL_PRIVATE -#define IF_MAXUNIT 0x7fff /* historical value */ - struct if_clonereq { int ifcr_total; /* total cloners (out) */ int ifcr_count; /* room for this many in user buffer */ char *ifcr_buffer; /* buffer for cloner names */ }; +#ifdef KERNEL_PRIVATE +#define IF_MAXUNIT 0x7fff /* historical value */ + struct if_clonereq64 { int ifcr_total; /* total cloners (out) */ int ifcr_count; /* room for this many in user buffer */ @@ -138,7 +140,7 @@ struct if_clonereq32 { #define IFF_DEBUG 0x4 /* turn on debugging */ #define IFF_LOOPBACK 0x8 /* is a loopback net */ #define IFF_POINTOPOINT 0x10 /* interface is point-to-point link */ -#define IFF_NOTRAILERS 0x20 /* obsolete: avoid use of trailers */ +#define IFF_NOTRAILERS 0x20 /* obsolete: avoid use of trailers */ #define IFF_RUNNING 0x40 /* resources allocated */ #define IFF_NOARP 0x80 /* no address resolution protocol */ #define IFF_PROMISC 0x100 /* receive all packets */ @@ -152,29 +154,29 @@ struct if_clonereq32 { #define IFF_MULTICAST 0x8000 /* supports multicast */ #ifdef PRIVATE -/* extended flags definitions: (all bits are reserved for internal/future use) */ -#define IFEF_AUTOCONFIGURING 0x1 /* allow BOOTP/DHCP replies to enter */ -#define _IFEF_DVR_REENTRY_OK 0x20 /* deprecated */ -#define IFEF_ACCEPT_RTADV 0x40 /* set to accept IPv6 Router Advertisement on the interface */ -#define IFEF_TXSTART 0x80 /* interface has start callback */ -#define IFEF_RXPOLL 0x100 /* interface supports opportunistic input polling */ -#define IFEF_VLAN 0x200 /* interface has one or more vlans */ -#define IFEF_BOND 0x400 /* interface is part of bond */ -#define IFEF_ARPLL 0x800 /* ARP for IPv4LL addresses on this port */ +/* extended flags definitions: (all bits reserved for internal/future use) */ +#define IFEF_AUTOCONFIGURING 0x1 /* allow BOOTP/DHCP replies to enter */ +#define IFEF_IPV6_DISABLED 0x20 /* coupled to ND6_IFF_IFDISABLED */ +#define IFEF_ACCEPT_RTADV 0x40 /* accepts IPv6 RA on the interface */ +#define IFEF_TXSTART 0x80 /* has start callback */ +#define IFEF_RXPOLL 0x100 /* supports opportunistic input poll */ +#define IFEF_VLAN 0x200 /* interface has one or more vlans */ +#define IFEF_BOND 0x400 /* interface is part of bond */ +#define IFEF_ARPLL 0x800 /* ARP for IPv4LL addresses */ #define IFEF_NOWINDOWSCALE 0x1000 /* Don't scale TCP window on iface */ -#define IFEF_NOAUTOIPV6LL 0x2000 /* Interface IPv6 LinkLocal address not provided by kernel */ -#define IFEF_IPV4_ROUTER 0x8000 /* set on internal-network-facing interface when in IPv4 router mode */ -#define IFEF_IPV6_ROUTER 0x10000 /* set on internal-network-facing interface when in IPv6 router mode */ -#define IFEF_LOCALNET_PRIVATE 0x20000 /* local private network */ -#define IFEF_IPV6_ND6ALT 0x40000 /* alternative KPI for IPv6 neighbor discovery */ -#define IFEF_SERVICE_TRIGGERED IFEF_LOCALNET_PRIVATE -#define IFEF_RESTRICTED_RECV 0x80000 /* interface restricts inbound pkts */ -#define IFEF_AWDL 0x100000 /* Apple Wireless Direct Link */ -#define IFEF_NOACKPRI 0x200000 /* Don't use TCP ACK prioritization on interface */ -#define IFEF_SENDLIST 0x10000000 /* Interface supports sending a list of packets */ -#define _IFEF_REUSE 0x20000000 /* deprecated */ -#define _IFEF_INUSE 0x40000000 /* deprecated */ -#define IFEF_UPDOWNCHANGE 0x80000000 /* Interface's up/down state is changing */ +#define IFEF_NOAUTOIPV6LL 0x2000 /* Need explicit IPv6 LL address */ +#define IFEF_IPV4_ROUTER 0x8000 /* interior when in IPv4 router mode */ +#define IFEF_IPV6_ROUTER 0x10000 /* interior when in IPv6 router mode */ +#define IFEF_LOCALNET_PRIVATE 0x20000 /* local private network */ +#define IFEF_SERVICE_TRIGGERED IFEF_LOCALNET_PRIVATE +#define IFEF_IPV6_ND6ALT 0x40000 /* alternative. KPI for ND6 */ +#define IFEF_RESTRICTED_RECV 0x80000 /* interface restricts inbound pkts */ +#define IFEF_AWDL 0x100000 /* Apple Wireless Direct Link */ +#define IFEF_NOACKPRI 0x200000 /* No TCP ACK prioritization */ +#define IFEF_SENDLIST 0x10000000 /* Supports tx packet lists */ +#define IFEF_DIRECTLINK 0x20000000 /* point-to-point topology */ +#define _IFEF_INUSE 0x40000000 /* deprecated */ +#define IFEF_UPDOWNCHANGE 0x80000000 /* up/down state is changing */ #ifdef XNU_KERNEL_PRIVATE /* * Current requirements for an AWDL interface. Setting/clearing IFEF_AWDL @@ -198,7 +200,7 @@ struct if_clonereq32 { * the if_idle_flags field to a non-zero value will cause the networking * stack to aggressively purge expired objects (routes, etc.) */ -#define IFRF_IDLE_NOTIFY 0x1 /* Generate notifications on idle */ +#define IFRF_IDLE_NOTIFY 0x1 /* Generate notifications on idle */ /* flags set internally only: */ #define IFF_CANTCHANGE \ @@ -223,22 +225,23 @@ struct if_clonereq32 { * field. IFCAP_* and IFNET_* do not match one to one and IFNET_* may be * more detailed or differenciated than IFCAP_*. * IFNET_* hwassist flags have corresponding CSUM_* in sys/mbuf.h - */ -#define IFCAP_RXCSUM 0x00001 /* can offload checksum on RX */ -#define IFCAP_TXCSUM 0x00002 /* can offload checksum on TX */ -#define IFCAP_VLAN_MTU 0x00004 /* VLAN-compatible MTU */ -#define IFCAP_VLAN_HWTAGGING 0x00008 /* hardware VLAN tag support */ -#define IFCAP_JUMBO_MTU 0x00010 /* 9000 byte MTU supported */ -#define IFCAP_TSO4 0x00020 /* can do TCP Segmentation Offload */ -#define IFCAP_TSO6 0x00040 /* can do TCP6 Segmentation Offload */ -#define IFCAP_LRO 0x00080 /* can do Large Receive Offload */ -#define IFCAP_AV 0x00100 /* can do 802.1 AV Bridging */ - -#define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM) -#define IFCAP_TSO (IFCAP_TSO4 | IFCAP_TSO6) - -#define IFCAP_VALID (IFCAP_HWCSUM | IFCAP_TSO | IFCAP_LRO | IFCAP_VLAN_MTU | \ - IFCAP_VLAN_HWTAGGING | IFCAP_JUMBO_MTU | IFCAP_AV) + */ +#define IFCAP_RXCSUM 0x00001 /* can offload checksum on RX */ +#define IFCAP_TXCSUM 0x00002 /* can offload checksum on TX */ +#define IFCAP_VLAN_MTU 0x00004 /* VLAN-compatible MTU */ +#define IFCAP_VLAN_HWTAGGING 0x00008 /* hardware VLAN tag support */ +#define IFCAP_JUMBO_MTU 0x00010 /* 9000 byte MTU supported */ +#define IFCAP_TSO4 0x00020 /* can do TCP Segmentation Offload */ +#define IFCAP_TSO6 0x00040 /* can do TCP6 Segmentation Offload */ +#define IFCAP_LRO 0x00080 /* can do Large Receive Offload */ +#define IFCAP_AV 0x00100 /* can do 802.1 AV Bridging */ +#define IFCAP_TXSTATUS 0x00200 /* can return linklevel xmit status */ + +#define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM) +#define IFCAP_TSO (IFCAP_TSO4 | IFCAP_TSO6) + +#define IFCAP_VALID (IFCAP_HWCSUM | IFCAP_TSO | IFCAP_LRO | IFCAP_VLAN_MTU | \ + IFCAP_VLAN_HWTAGGING | IFCAP_JUMBO_MTU | IFCAP_AV | IFCAP_TXSTATUS) #define IFQ_MAXLEN 128 #define IFNET_SLOWHZ 1 /* granularity is 1 second */ @@ -248,7 +251,7 @@ struct if_clonereq32 { * from sysctl and the routing socket */ struct if_msghdr { - unsigned short ifm_msglen; /* to skip over non-understood messages */ + unsigned short ifm_msglen; /* to skip non-understood messages */ unsigned char ifm_version; /* future binary compatability */ unsigned char ifm_type; /* message type */ int ifm_addrs; /* like rtm_addrs */ @@ -262,7 +265,7 @@ struct if_msghdr { * from sysctl and the routing socket */ struct ifa_msghdr { - unsigned short ifam_msglen; /* to skip over non-understood messages */ + unsigned short ifam_msglen; /* to skip non-understood messages */ unsigned char ifam_version; /* future binary compatability */ unsigned char ifam_type; /* message type */ int ifam_addrs; /* like rtm_addrs */ @@ -276,7 +279,7 @@ struct ifa_msghdr { * from the routing socket */ struct ifma_msghdr { - unsigned short ifmam_msglen; /* to skip over non-understood messages */ + unsigned short ifmam_msglen; /* to skip non-understood messages */ unsigned char ifmam_version; /* future binary compatability */ unsigned char ifmam_type; /* message type */ int ifmam_addrs; /* like rtm_addrs */ @@ -286,7 +289,7 @@ struct ifma_msghdr { /* * Message format for use in obtaining information about interfaces - * from sysctl + * from sysctl */ struct if_msghdr2 { u_short ifm_msglen; /* to skip over non-understood messages */ @@ -299,7 +302,7 @@ struct if_msghdr2 { int ifm_snd_maxlen; /* maximum length of send queue */ int ifm_snd_drops; /* number of drops in send queue */ int ifm_timer; /* time until if_watchdog called */ - struct if_data64 ifm_data; /* statistics and other data about if */ + struct if_data64 ifm_data; /* statistics and other data */ }; /* @@ -337,13 +340,13 @@ struct ifdevmtu { user space, a value from SIOCGKEVVENDOR ioctl on a kernel event socket. ifk_type - The type. Types are specific to each module id. ifk_data - The data. ifk_ptr may be a 64bit pointer for 64 bit processes. - + Copying data between user space and kernel space is done using copyin and copyout. A process may be running in 64bit mode. In such a case, the pointer will be a 64bit pointer, not a 32bit pointer. The following sample is a safe way to copy the data in to the kernel from either a 32bit or 64bit process: - + user_addr_t tmp_ptr; if (IS_64BIT_PROCESS(current_proc())) { tmp_ptr = CAST_USER_ADDR_T(ifkpi.ifk_data.ifk_ptr64); @@ -366,10 +369,10 @@ struct ifkpi { } ifk_data; }; -/* Wake capabilities of a interface */ -#define IF_WAKE_ON_MAGIC_PACKET 0x01 +/* Wake capabilities of a interface */ +#define IF_WAKE_ON_MAGIC_PACKET 0x01 #ifdef KERNEL_PRIVATE -#define IF_WAKE_VALID_FLAGS IF_WAKE_ON_MAGIC_PACKET +#define IF_WAKE_VALID_FLAGS IF_WAKE_ON_MAGIC_PACKET #endif /* KERNEL_PRIVATE */ @@ -407,14 +410,54 @@ struct ifreq { #ifdef PRIVATE int ifru_link_quality_metric; #endif /* PRIVATE */ - int ifru_cap[2]; + int ifru_cap[2]; #ifdef PRIVATE struct { uint32_t ifo_flags; -#define IFRIFOF_BLOCK_OPPORTUNISTIC 0x00000001 +#define IFRIFOF_BLOCK_OPPORTUNISTIC 0x00000001 uint32_t ifo_inuse; } ifru_opportunistic; u_int64_t ifru_eflags; + struct { + int32_t ifl_level; + uint32_t ifl_flags; +#define IFRLOGF_DLIL 0x00000001 +#define IFRLOGF_FAMILY 0x00010000 +#define IFRLOGF_DRIVER 0x01000000 +#define IFRLOGF_FIRMWARE 0x10000000 + int32_t ifl_category; +#define IFRLOGCAT_CONNECTIVITY 1 +#define IFRLOGCAT_QUALITY 2 +#define IFRLOGCAT_PERFORMANCE 3 + int32_t ifl_subcategory; + } ifru_log; + u_int32_t ifru_delegated; + struct { + uint32_t ift_type; + uint32_t ift_family; +#define IFRTYPE_FAMILY_ANY 0 +#define IFRTYPE_FAMILY_LOOPBACK 1 +#define IFRTYPE_FAMILY_ETHERNET 2 +#define IFRTYPE_FAMILY_SLIP 3 +#define IFRTYPE_FAMILY_TUN 4 +#define IFRTYPE_FAMILY_VLAN 5 +#define IFRTYPE_FAMILY_PPP 6 +#define IFRTYPE_FAMILY_PVC 7 +#define IFRTYPE_FAMILY_DISC 8 +#define IFRTYPE_FAMILY_MDECAP 9 +#define IFRTYPE_FAMILY_GIF 10 +#define IFRTYPE_FAMILY_FAITH 11 +#define IFRTYPE_FAMILY_STF 12 +#define IFRTYPE_FAMILY_FIREWIRE 13 +#define IFRTYPE_FAMILY_BOND 14 +#define IFRTYPE_FAMILY_CELLULAR 15 + uint32_t ift_subfamily; +#define IFRTYPE_SUBFAMILY_ANY 0 +#define IFRTYPE_SUBFAMILY_USB 1 +#define IFRTYPE_SUBFAMILY_BLUETOOTH 2 +#define IFRTYPE_SUBFAMILY_WIFI 3 +#define IFRTYPE_SUBFAMILY_THUNDERBOLT 4 + } ifru_type; #endif /* PRIVATE */ } ifr_ifru; #define ifr_addr ifr_ifru.ifru_addr /* address */ @@ -428,32 +471,35 @@ struct ifreq { #endif /* __APPLE__ */ #define ifr_metric ifr_ifru.ifru_metric /* metric */ #define ifr_mtu ifr_ifru.ifru_mtu /* mtu */ -#define ifr_phys ifr_ifru.ifru_phys /* physical wire */ -#define ifr_media ifr_ifru.ifru_media /* physical media */ +#define ifr_phys ifr_ifru.ifru_phys /* physical wire */ +#define ifr_media ifr_ifru.ifru_media /* physical media */ #define ifr_data ifr_ifru.ifru_data /* for use by interface */ -#define ifr_devmtu ifr_ifru.ifru_devmtu -#define ifr_intval ifr_ifru.ifru_intval /* integer value */ +#define ifr_devmtu ifr_ifru.ifru_devmtu +#define ifr_intval ifr_ifru.ifru_intval /* integer value */ #ifdef KERNEL_PRIVATE -#define ifr_data64 ifr_ifru.ifru_data64 /* 64-bit pointer */ +#define ifr_data64 ifr_ifru.ifru_data64 /* 64-bit pointer */ #endif /* KERNEL_PRIVATE */ -#define ifr_kpi ifr_ifru.ifru_kpi -#define ifr_wake_flags ifr_ifru.ifru_wake_flags /* wake capabilities of devive */ -#define ifr_route_refcnt ifr_ifru.ifru_route_refcnt /* route references on interface */ +#define ifr_kpi ifr_ifru.ifru_kpi +#define ifr_wake_flags ifr_ifru.ifru_wake_flags /* wake capabilities */ +#define ifr_route_refcnt ifr_ifru.ifru_route_refcnt /* route references count */ #ifdef PRIVATE -#define ifr_link_quality_metric ifr_ifru.ifru_link_quality_metric /* LQM */ +#define ifr_link_quality_metric ifr_ifru.ifru_link_quality_metric /* LQM */ #endif /* PRIVATE */ -#define ifr_reqcap ifr_ifru.ifru_cap[0] /* requested capabilities */ -#define ifr_curcap ifr_ifru.ifru_cap[1] /* current capabilities */ +#define ifr_reqcap ifr_ifru.ifru_cap[0] /* requested capabilities */ +#define ifr_curcap ifr_ifru.ifru_cap[1] /* current capabilities */ #ifdef PRIVATE -#define ifr_opportunistic ifr_ifru.ifru_opportunistic /* current capabilities */ +#define ifr_opportunistic ifr_ifru.ifru_opportunistic #define ifr_eflags ifr_ifru.ifru_eflags /* extended flags */ -#endif +#define ifr_log ifr_ifru.ifru_log /* logging level/flags */ +#define ifr_delegated ifr_ifru.ifru_delegated /* delegated interface index */ +#define ifr_type ifr_ifru.ifru_type /* interface type */ +#endif /* PRIVATE */ }; #define _SIZEOF_ADDR_IFREQ(ifr) \ - ((ifr).ifr_addr.sa_len > sizeof(struct sockaddr) ? \ - (sizeof(struct ifreq) - sizeof(struct sockaddr) + \ - (ifr).ifr_addr.sa_len) : sizeof(struct ifreq)) + ((ifr).ifr_addr.sa_len > sizeof (struct sockaddr) ? \ + (sizeof (struct ifreq) - sizeof (struct sockaddr) + \ + (ifr).ifr_addr.sa_len) : sizeof (struct ifreq)) struct ifaliasreq { char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */ @@ -463,8 +509,8 @@ struct ifaliasreq { }; struct rslvmulti_req { - struct sockaddr *sa; - struct sockaddr **llsa; + struct sockaddr *sa; + struct sockaddr **llsa; }; #if !defined(KERNEL) || defined(KERNEL_PRIVATE) @@ -510,9 +556,9 @@ struct ifmediareq32 { #pragma pack(4) struct ifdrv { - char ifd_name[IFNAMSIZ]; /* if name, e.g. "en0" */ + char ifd_name[IFNAMSIZ]; /* if name, e.g. "en0" */ unsigned long ifd_cmd; - size_t ifd_len; + size_t ifd_len; /* length of ifd_data buffer */ void *ifd_data; }; #pragma pack() @@ -520,14 +566,14 @@ struct ifdrv { #ifdef KERNEL_PRIVATE #pragma pack(4) struct ifdrv32 { - char ifd_name[IFNAMSIZ]; /* if name, e.g. "en0" */ + char ifd_name[IFNAMSIZ]; /* if name, e.g. "en0" */ u_int32_t ifd_cmd; u_int32_t ifd_len; user32_addr_t ifd_data; }; struct ifdrv64 { - char ifd_name[IFNAMSIZ]; /* if name, e.g. "en0" */ + char ifd_name[IFNAMSIZ]; /* if name, e.g. "en0" */ u_int64_t ifd_cmd; u_int64_t ifd_len; user64_addr_t ifd_data; @@ -535,11 +581,11 @@ struct ifdrv64 { #pragma pack() #endif /* KERNEL_PRIVATE */ -/* +/* * Structure used to retrieve aux status data from interfaces. * Kernel suppliers to this interface should respect the formatting * needed by ifconfig(8): each line starts with a TAB and ends with - * a newline. The canonical example to copy and paste is in if_tun.c. + * a newline. */ #define IFSTATMAX 800 /* 10 lines of text */ @@ -571,14 +617,14 @@ struct ifconf { #if defined(KERNEL_PRIVATE) #pragma pack(4) struct ifconf32 { - int ifc_len; /* size of associated buffer */ + int ifc_len; /* size of associated buffer */ struct { user32_addr_t ifcu_req; } ifc_ifcu; }; struct ifconf64 { - int ifc_len; /* size of associated buffer */ + int ifc_len; /* size of associated buffer */ struct { user64_addr_t ifcu_req __attribute__((aligned(8))); } ifc_ifcu; @@ -590,9 +636,9 @@ struct ifconf64 { * DLIL KEV_DL_PROTO_ATTACHED/DETACHED structure */ struct kev_dl_proto_data { - struct net_event_data link_data; - u_int32_t proto_family; - u_int32_t proto_remaining_count; + struct net_event_data link_data; + u_int32_t proto_family; + u_int32_t proto_remaining_count; }; /* @@ -602,19 +648,19 @@ struct if_laddrreq { char iflr_name[IFNAMSIZ]; unsigned int flags; #define IFLR_PREFIX 0x8000 /* in: prefix given out: kernel fills id */ - unsigned int prefixlen; /* in/out */ - struct sockaddr_storage addr; /* in/out */ - struct sockaddr_storage dstaddr; /* out */ + unsigned int prefixlen; /* in/out */ + struct sockaddr_storage addr; /* in/out */ + struct sockaddr_storage dstaddr; /* out */ }; #ifdef PRIVATE /* - * Link Quality Metrics + * Link Quality Metrics * - * IFNET_LQM_THRESH_OFF Metric is not available; device is off. - * IFNET_LQM_THRESH_UNKNOWN Metric is not (yet) known. - * IFNET_LQM_THRESH_POOR Link quality is considered poor by driver. - * IFNET_LQM_THRESH_GOOD Link quality is considered good by driver. + * IFNET_LQM_THRESH_OFF Metric is not available; device is off. + * IFNET_LQM_THRESH_UNKNOWN Metric is not (yet) known. + * IFNET_LQM_THRESH_POOR Link quality is considered poor by driver. + * IFNET_LQM_THRESH_GOOD Link quality is considered good by driver. */ enum { IFNET_LQM_THRESH_OFF = (-2), @@ -685,6 +731,8 @@ struct if_linkparamsreq { u_int32_t iflpr_output_tbr_percent; struct if_bandwidths iflpr_output_bw; struct if_bandwidths iflpr_input_bw; + struct if_latencies iflpr_output_lt; + struct if_latencies iflpr_input_lt; }; /* @@ -765,6 +813,19 @@ enum { IFNET_THROTTLE_MAX = 2, #endif /* XNU_KERNEL_PRIVATE */ }; + +#define DLIL_MODIDLEN 20 /* same as IFNET_MODIDLEN */ +#define DLIL_MODARGLEN 12 /* same as IFNET_MODARGLEN */ + +/* + * DLIL KEV_DL_ISSUES event structure + */ +struct kev_dl_issues { + struct net_event_data link_data; + u_int8_t modid[DLIL_MODIDLEN]; + u_int64_t timestamp; + u_int8_t info[DLIL_MODARGLEN]; +}; #endif /* PRIVATE */ #ifdef KERNEL diff --git a/bsd/net/if_arp.h b/bsd/net/if_arp.h index 0a6c2e874..5ea113253 100644 --- a/bsd/net/if_arp.h +++ b/bsd/net/if_arp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -63,6 +63,7 @@ #ifndef _NET_IF_ARP_H_ #define _NET_IF_ARP_H_ +#include #include #include @@ -120,4 +121,32 @@ struct arpreq { #define ATF_PUBL 0x08 /* publish entry (respond for other host) */ #define ATF_USETRAILERS 0x10 /* has requested trailers */ +struct arpstat { + /* Normal things that happen: */ + uint32_t txrequests; /* # of ARP requests sent by this host. */ + uint32_t txreplies; /* # of ARP replies sent by this host. */ + uint32_t txannounces; /* # of ARP announcements sent by this host. */ + uint32_t rxrequests; /* # of ARP requests received by this host. */ + uint32_t rxreplies; /* # of ARP replies received by this host. */ + uint32_t received; /* # of ARP packets received by this host. */ + + /* Abnormal event and error counting: */ + uint32_t txconflicts; /* # of ARP conflict probes sent */ + uint32_t invalidreqs; /* # of invalid ARP resolve requests */ + uint32_t reqnobufs; /* # of failed requests due to no memory */ + uint32_t dropped; /* # of packets dropped waiting for a reply. */ + uint32_t purged; /* # of packets purged while removing entries */ + uint32_t timeouts; /* # of times with entries removed */ + /* due to timeout. */ + uint32_t dupips; /* # of duplicate IPs detected. */ + + /* General statistics */ + uint32_t inuse; /* # of ARP entries in routing table */ + uint32_t txurequests; /* # of ARP requests sent (unicast) */ +}; + +#ifdef BSD_KERNEL_PRIVATE +extern struct arpstat arpstat; +#endif /* BSD_KERNEL_PRIVATE */ + #endif /* !_NET_IF_ARP_H_ */ diff --git a/bsd/net/if_bond.c b/bsd/net/if_bond.c index 1964a5524..271a07668 100644 --- a/bsd/net/if_bond.c +++ b/bsd/net/if_bond.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2012 Apple Inc. All rights reserved. + * Copyright (c) 2004-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -939,7 +939,7 @@ bond_globals_init(void) } b = NULL; if (ifp != NULL) { - b = bond_globals_create(0x8000, (lacp_system_ref)ifnet_lladdr(ifp)); + b = bond_globals_create(0x8000, (lacp_system_ref)IF_LLADDR(ifp)); } bond_lock(); if (g_bond != NULL) { @@ -1116,7 +1116,7 @@ bond_clone_create(struct if_clone * ifc, u_int32_t unit, __unused void *params) int error; ifbond_ref ifb; ifnet_t ifp; - struct ifnet_init_params bond_init; + struct ifnet_init_eparams bond_init; error = bond_globals_init(); if (error != 0) { @@ -1142,6 +1142,9 @@ bond_clone_create(struct if_clone * ifc, u_int32_t unit, __unused void *params) } bzero(&bond_init, sizeof(bond_init)); + bond_init.ver = IFNET_INIT_CURRENT_VERSION; + bond_init.len = sizeof (bond_init); + bond_init.flags = IFNET_INIT_LEGACY; bond_init.uniqueid = ifb->ifb_name; bond_init.uniqueid_len = strlen(ifb->ifb_name); bond_init.name = ifc->ifc_name; @@ -1153,14 +1156,14 @@ bond_clone_create(struct if_clone * ifc, u_int32_t unit, __unused void *params) bond_init.add_proto = ether_add_proto; bond_init.del_proto = ether_del_proto; bond_init.check_multi = ether_check_multi; - bond_init.framer = ether_frameout; + bond_init.framer_extended = ether_frameout_extended; bond_init.ioctl = bond_ioctl; bond_init.set_bpf_tap = bond_set_bpf_tap; bond_init.detach = bond_if_free; bond_init.broadcast_addr = etherbroadcastaddr; bond_init.broadcast_len = ETHER_ADDR_LEN; bond_init.softc = ifb; - error = ifnet_allocate(&bond_init, &ifp); + error = ifnet_allocate_extended(&bond_init, &ifp); if (error) { ifbond_release(ifb); @@ -1484,8 +1487,8 @@ bond_output(struct ifnet * ifp, struct mbuf * m) m_freem(m); return (0); } - if (m->m_pkthdr.socket_id != 0) { - h = m->m_pkthdr.socket_id; + if (m->m_pkthdr.pkt_flowid != 0) { + h = m->m_pkthdr.pkt_flowid; } else { struct ether_header * eh_p; @@ -1743,7 +1746,7 @@ bond_input(ifnet_t port_ifp, __unused protocol_family_t protocol, mbuf_t m, } m->m_pkthdr.rcvif = ifp; bond_bpf_input(ifp, m, eh_p, bpf_func); - m->m_pkthdr.header = frame_header; + m->m_pkthdr.pkt_hdr = frame_header; dlil_input_packet_list(ifp, m); return 0; @@ -2071,7 +2074,7 @@ bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp) p->po_bond = ifb; /* remember the port's ethernet address so it can be restored */ - ether_addr_copy(&p->po_saved_addr, ifnet_lladdr(port_ifp)); + ether_addr_copy(&p->po_saved_addr, IF_LLADDR(port_ifp)); /* add it to the list of ports */ TAILQ_INSERT_TAIL(&ifb->ifb_port_list, p, po_port_list); @@ -2086,7 +2089,7 @@ bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp) /* first port added to bond determines bond's ethernet address */ if (first) { - ifnet_set_lladdr_and_type(ifp, ifnet_lladdr(port_ifp), ETHER_ADDR_LEN, + ifnet_set_lladdr_and_type(ifp, IF_LLADDR(port_ifp), ETHER_ADDR_LEN, IFT_ETHER); } @@ -2139,7 +2142,7 @@ bond_add_interface(struct ifnet * ifp, struct ifnet * port_ifp) /* re-program the port's ethernet address */ error = if_siflladdr(port_ifp, - (const struct ether_addr *)ifnet_lladdr(ifp)); + (const struct ether_addr *)IF_LLADDR(ifp)); if (error != 0) { /* port doesn't support setting the link address */ printf("bond_add_interface(%s, %s): if_siflladdr failed %d\n", @@ -2307,7 +2310,7 @@ bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp) ifnet_set_mtu(ifp, 0); ifb->ifb_altmtu = 0; } else if (ifbond_flags_lladdr(ifb) == FALSE - && bcmp(&p->po_saved_addr, ifnet_lladdr(ifp), + && bcmp(&p->po_saved_addr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) { new_link_address = TRUE; } @@ -2350,7 +2353,7 @@ bond_remove_interface(ifbond_ref ifb, struct ifnet * port_ifp) scan_ifp = scan_port->po_ifp; error = if_siflladdr(scan_ifp, - (const struct ether_addr *) ifnet_lladdr(ifp)); + (const struct ether_addr *) IF_LLADDR(ifp)); if (error != 0) { printf("bond_remove_interface(%s, %s): " "if_siflladdr (%s) failed %d\n", @@ -3154,16 +3157,6 @@ bond_family_init(void) error); goto done; } -#endif -#if NETAT - error = proto_register_plumber(PF_APPLETALK, APPLE_IF_FAM_BOND, - ether_attach_at, - ether_detach_at); - if (error != 0) { - printf("bond: proto_register_plumber failed for AppleTalk error=%d\n", - error); - goto done; - } #endif error = bond_clone_attach(); if (error != 0) { diff --git a/bsd/net/if_bond_internal.h b/bsd/net/if_bond_internal.h index 99e6058dc..68e8901b7 100644 --- a/bsd/net/if_bond_internal.h +++ b/bsd/net/if_bond_internal.h @@ -28,7 +28,7 @@ #ifndef _NET_IF_BOND_INTERNAL_H_ #ifdef KERNEL_PRIVATE -int bond_family_init(void) __attribute__((section("__TEXT, initcode"))); +int bond_family_init(void); #endif /* KERNEL_PRIVATE */ #endif /* _NET_IF_BOND_INTERNAL_H_ */ diff --git a/bsd/net/if_bridge.c b/bsd/net/if_bridge.c index db581d740..bb2ced374 100644 --- a/bsd/net/if_bridge.c +++ b/bsd/net/if_bridge.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2012 Apple Inc. All rights reserved. + * Copyright (c) 2004-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -99,14 +99,13 @@ * 802.11, VLANs on Ethernet, etc.) Figure out a nice way * to bridge other types of interfaces (FDDI-FDDI, and maybe * consider heterogenous bridges). + * + * - GIF isn't handled due to the lack of IPPROTO_ETHERIP support. */ #include -#define BRIDGE_DEBUG 1 -#ifndef BRIDGE_DEBUG -#define BRIDGE_DEBUG 0 -#endif /* BRIDGE_DEBUG */ +#define BRIDGE_DEBUG 1 #include #include @@ -126,6 +125,8 @@ #include +#include + #include #include @@ -137,13 +138,14 @@ #include #include #include +#include #include /* for struct arpcom */ #include #include #include #include -#ifdef INET6 +#if INET6 #include #include #endif @@ -167,86 +169,93 @@ #include #include #endif /* PFIL_HOOKS */ +#include + +#if BRIDGE_DEBUG +#define BR_DBGF_LIFECYCLE 0x0001 +#define BR_DBGF_INPUT 0x0002 +#define BR_DBGF_OUTPPUT 0x0004 +#define BR_DBGF_RT_TABLE 0x0008 +#define BR_DBGF_DELAYED_CALL 0x0010 +#define BR_DBGF_IOCTL 0x0020 +#define BR_DBGF_MBUF 0x0040 +#define BR_DBGF_MCAST 0x0080 +#endif /* BRIDGE_DEBUG */ + +#define _BRIDGE_LOCK(_sc) lck_mtx_lock(&(_sc)->sc_mtx) +#define _BRIDGE_UNLOCK(_sc) lck_mtx_unlock(&(_sc)->sc_mtx) +#define BRIDGE_LOCK_ASSERT_HELD(_sc) \ + lck_mtx_assert(&(_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED) +#define BRIDGE_LOCK_ASSERT_NOTHELD(_sc) \ + lck_mtx_assert(&(_sc)->sc_mtx, LCK_MTX_ASSERT_NOTOWNED) #if BRIDGE_DEBUG -#define BR_LCKDBG_MAX 4 +#define BR_LCKDBG_MAX 4 -#define BRIDGE_LOCK(_sc) bridge_lock(_sc) -#define BRIDGE_UNLOCK(_sc) bridge_unlock(_sc) -#define BRIDGE_LOCK_ASSERT(_sc) \ - lck_mtx_assert((_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED) +#define BRIDGE_LOCK(_sc) bridge_lock(_sc) +#define BRIDGE_UNLOCK(_sc) bridge_unlock(_sc) #define BRIDGE_LOCK2REF(_sc, _err) _err = bridge_lock2ref(_sc) #define BRIDGE_UNREF(_sc) bridge_unref(_sc) #define BRIDGE_XLOCK(_sc) bridge_xlock(_sc) #define BRIDGE_XDROP(_sc) bridge_xdrop(_sc) -#else /* BRIDGE_DEBUG */ +#else /* !BRIDGE_DEBUG */ -#define BRIDGE_LOCK(_sc) lck_mtx_lock((_sc)->sc_mtx) -#define BRIDGE_UNLOCK(_sc) lck_mtx_unlock((_sc)->sc_mtx) -#define BRIDGE_LOCK_ASSERT(_sc) \ - lck_mtx_assert((_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED) +#define BRIDGE_LOCK(_sc) _BRIDGE_LOCK(_sc) +#define BRIDGE_UNLOCK(_sc) _BRIDGE_UNLOCK(_sc) #define BRIDGE_LOCK2REF(_sc, _err) do { \ - lck_mtx_assert((_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED); \ + BRIDGE_LOCK_ASSERT_HELD(_sc); \ if ((_sc)->sc_iflist_xcnt > 0) \ (_err) = EBUSY; \ else \ (_sc)->sc_iflist_ref++; \ - lck_mtx_unlock((_sc)->sc_mtx); \ + _BRIDGE_UNLOCK(_sc); \ } while (0) #define BRIDGE_UNREF(_sc) do { \ - lck_mtx_lock((_sc)->sc_mtx); \ + _BRIDGE_LOCK(_sc); \ (_sc)->sc_iflist_ref--; \ if (((_sc)->sc_iflist_xcnt > 0) && ((_sc)->sc_iflist_ref == 0)) { \ - lck_mtx_unlock((_sc)->sc_mtx); \ + _BRIDGE_UNLOCK(_sc); \ wakeup(&(_sc)->sc_cv); \ } else \ - lck_mtx_unlock((_sc)->sc_mtx); \ + _BRIDGE_UNLOCK(_sc); \ } while (0) #define BRIDGE_XLOCK(_sc) do { \ - lck_mtx_assert((_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED); \ + BRIDGE_LOCK_ASSERT_HELD(_sc); \ (_sc)->sc_iflist_xcnt++; \ while ((_sc)->sc_iflist_ref > 0) \ - msleep(&(_sc)->sc_cv, (_sc)->sc_mtx, PZERO, \ + msleep(&(_sc)->sc_cv, &(_sc)->sc_mtx, PZERO, \ "BRIDGE_XLOCK", NULL); \ } while (0) #define BRIDGE_XDROP(_sc) do { \ - lck_mtx_assert((_sc)->sc_mtx, LCK_MTX_ASSERT_OWNED); \ + BRIDGE_LOCK_ASSERT_HELD(_sc); \ (_sc)->sc_iflist_xcnt--; \ } while (0) #endif /* BRIDGE_DEBUG */ #if NBPFILTER > 0 -#define BRIDGE_BPF_MTAP_INPUT(sc, m) \ +#define BRIDGE_BPF_MTAP_INPUT(sc, m) \ if (sc->sc_bpf_input) \ bridge_bpf_input(sc->sc_ifp, m) #else /* NBPFILTER */ -#define BRIDGE_BPF_MTAP_INPUT(ifp, m) +#define BRIDGE_BPF_MTAP_INPUT(ifp, m) #endif /* NBPFILTER */ /* - * Size of the route hash table. Must be a power of two. + * Initial size of the route hash table. Must be a power of two. */ -/* APPLE MODIFICATION - per Wasabi performance improvement, change the hash table size */ -#if 0 -#ifndef BRIDGE_RTHASH_SIZE -#define BRIDGE_RTHASH_SIZE 1024 -#endif -#else #ifndef BRIDGE_RTHASH_SIZE -#define BRIDGE_RTHASH_SIZE 256 -#endif +#define BRIDGE_RTHASH_SIZE 16 #endif -/* APPLE MODIFICATION - support for HW checksums */ -#if APPLE_BRIDGE_HWCKSUM_SUPPORT -#include -#include -#endif +/* + * Maximum size of the routing hash table + */ +#define BRIDGE_RTHASH_SIZE_MAX 2048 -#define BRIDGE_RTHASH_MASK (BRIDGE_RTHASH_SIZE - 1) +#define BRIDGE_RTHASH_MASK(sc) ((sc)->sc_rthash_size - 1) /* * Maximum number of addresses to cache. @@ -286,19 +295,22 @@ struct bridge_iflist { TAILQ_ENTRY(bridge_iflist) bif_next; struct ifnet *bif_ifp; /* member if */ struct bstp_port bif_stp; /* STP state */ - uint32_t bif_flags; /* member if flags */ + uint32_t bif_ifflags; /* member if flags */ int bif_savedcaps; /* saved capabilities */ uint32_t bif_addrmax; /* max # of addresses */ uint32_t bif_addrcnt; /* cur. # of addresses */ - uint32_t bif_addrexceeded;/* # of address violations */ + uint32_t bif_addrexceeded; /* # of address violations */ interface_filter_t bif_iff_ref; struct bridge_softc *bif_sc; - char bif_promisc; /* promiscuous mode set */ - char bif_proto_attached; /* protocol attached */ - char bif_filter_attached; /* interface filter attached */ + uint32_t bif_flags; }; +#define BIFF_PROMISC 0x01 /* promiscuous mode set */ +#define BIFF_PROTO_ATTACHED 0x02 /* protocol attached */ +#define BIFF_FILTER_ATTACHED 0x04 /* interface filter attached */ +#define BIFF_MEDIA_ACTIVE 0x08 /* interface media active */ + /* * Bridge route node. */ @@ -314,13 +326,31 @@ struct bridge_rtnode { }; #define brt_ifp brt_dst->bif_ifp +/* + * Bridge delayed function call context + */ +typedef void (*bridge_delayed_func_t)(struct bridge_softc *); + +struct bridge_delayed_call { + struct bridge_softc *bdc_sc; + bridge_delayed_func_t bdc_func; /* Function to call */ + struct timespec bdc_ts; /* Time to call */ + u_int32_t bdc_flags; +}; + +#define BDCF_OUTSTANDING 0x01 /* Delayed call has been scheduled */ +#define BDCF_CANCELLING 0x02 /* May be waiting for call completion */ + /* * Software state for each bridge. */ + +LIST_HEAD(_bridge_rtnode_list, bridge_rtnode); + struct bridge_softc { struct ifnet *sc_ifp; /* make this an interface */ LIST_ENTRY(bridge_softc) sc_list; - lck_mtx_t *sc_mtx; + decl_lck_mtx_data(, sc_mtx); void *sc_cv; uint32_t sc_brtmax; /* max # of addresses */ uint32_t sc_brtcnt; /* cur. # of addresses */ @@ -328,35 +358,44 @@ struct bridge_softc { uint32_t sc_iflist_ref; /* refcount for sc_iflist */ uint32_t sc_iflist_xcnt; /* refcount for sc_iflist */ TAILQ_HEAD(, bridge_iflist) sc_iflist; /* member interface list */ - LIST_HEAD(, bridge_rtnode) *sc_rthash; /* our forwarding table */ - LIST_HEAD(, bridge_rtnode) sc_rtlist; /* list version of above */ + struct _bridge_rtnode_list *sc_rthash; /* our forwarding table */ + struct _bridge_rtnode_list sc_rtlist; /* list version of above */ uint32_t sc_rthash_key; /* key for hash */ + uint32_t sc_rthash_size; /* size of the hash table */ TAILQ_HEAD(, bridge_iflist) sc_spanlist; /* span ports list */ struct bstp_state sc_stp; /* STP state */ uint32_t sc_brtexceeded; /* # of cache drops */ uint32_t sc_filter_flags; /* ipf and flags */ + struct ifnet *sc_ifaddr; /* member mac copied from */ + u_char sc_defaddr[6]; /* Default MAC address */ char sc_if_xname[IFNAMSIZ]; bpf_packet_func sc_bpf_input; bpf_packet_func sc_bpf_output; u_int32_t sc_flags; + struct bridge_delayed_call sc_aging_timer; + struct bridge_delayed_call sc_resize_call; #if BRIDGE_DEBUG - void *lock_lr[BR_LCKDBG_MAX]; /* locking calling history */ + /* + * Locking and unlocking calling history + */ + void *lock_lr[BR_LCKDBG_MAX]; int next_lock_lr; - void *unlock_lr[BR_LCKDBG_MAX]; /* unlocking caller history */ + void *unlock_lr[BR_LCKDBG_MAX]; int next_unlock_lr; #endif /* BRIDGE_DEBUG */ }; -#define SCF_DETACHING 0x1 +#define SCF_DETACHING 0x01 +#define SCF_RESIZING 0x02 +#define SCF_MEDIA_ACTIVE 0x04 -decl_lck_mtx_data(static, bridge_list_mtx_data); -static lck_mtx_t *bridge_list_mtx = &bridge_list_mtx_data; +decl_lck_mtx_data(static, bridge_list_mtx); -int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD; +static int bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD; -static zone_t bridge_rtnode_pool = NULL; +static zone_t bridge_rtnode_pool = NULL; static int bridge_clone_create(struct if_clone *, uint32_t, void *); static int bridge_clone_destroy(struct ifnet *); @@ -367,6 +406,7 @@ static void bridge_mutecaps(struct bridge_softc *); static void bridge_set_ifcap(struct bridge_softc *, struct bridge_iflist *, int); #endif +static errno_t bridge_set_tso(struct bridge_softc *); __private_extern__ void bridge_ifdetach(struct bridge_iflist *, struct ifnet *); static int bridge_init(struct ifnet *); #if HAS_BRIDGE_DUMMYNET @@ -374,11 +414,12 @@ static void bridge_dummynet(struct mbuf *, struct ifnet *); #endif static void bridge_ifstop(struct ifnet *, int); static int bridge_output(struct ifnet *, struct mbuf *); +static void bridge_finalize_cksum(struct ifnet *, struct mbuf *); static void bridge_start(struct ifnet *); __private_extern__ errno_t bridge_input(struct ifnet *, struct mbuf *, void *); #if BRIDGE_MEMBER_OUT_FILTER -static errno_t bridge_iff_output(void *, ifnet_t, protocol_family_t , - mbuf_t *); +static errno_t bridge_iff_output(void *, ifnet_t, protocol_family_t, + mbuf_t *); static int bridge_member_output(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); #endif @@ -387,9 +428,9 @@ static int bridge_enqueue(struct bridge_softc *, struct ifnet *, static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int); static void bridge_forward(struct bridge_softc *, struct bridge_iflist *, - struct mbuf *m); + struct mbuf *); -static void bridge_timer(void *); +static void bridge_aging_timer(struct bridge_softc *sc); static void bridge_broadcast(struct bridge_softc *, struct ifnet *, struct mbuf *, int); @@ -408,9 +449,13 @@ static int bridge_rtdaddr(struct bridge_softc *, const uint8_t *, static int bridge_rtable_init(struct bridge_softc *); static void bridge_rtable_fini(struct bridge_softc *); +static void bridge_rthash_resize(struct bridge_softc *); + static int bridge_rtnode_addr_cmp(const uint8_t *, const uint8_t *); static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *, const uint8_t *, uint16_t); +static int bridge_rtnode_hash(struct bridge_softc *, + struct bridge_rtnode *); static int bridge_rtnode_insert(struct bridge_softc *, struct bridge_rtnode *); static void bridge_rtnode_destroy(struct bridge_softc *, @@ -466,33 +511,39 @@ static int bridge_ioctl_gifsstp32(struct bridge_softc *, void *); static int bridge_ioctl_gifsstp64(struct bridge_softc *, void *); static int bridge_ioctl_sproto(struct bridge_softc *, void *); static int bridge_ioctl_stxhc(struct bridge_softc *, void *); -static int bridge_ioctl_purge(struct bridge_softc *sc, void *arg); +static int bridge_ioctl_purge(struct bridge_softc *sc, void *); static int bridge_ioctl_gfilt(struct bridge_softc *, void *); static int bridge_ioctl_sfilt(struct bridge_softc *, void *); #ifdef PFIL_HOOKS static int bridge_pfil(struct mbuf **, struct ifnet *, struct ifnet *, int); -static int bridge_ip_checkbasic(struct mbuf **mp); +static int bridge_ip_checkbasic(struct mbuf **); #ifdef INET6 -static int bridge_ip6_checkbasic(struct mbuf **mp); +static int bridge_ip6_checkbasic(struct mbuf **); #endif /* INET6 */ static int bridge_fragment(struct ifnet *, struct mbuf *, struct ether_header *, int, struct llc *); #endif /* PFIL_HOOKS */ -static errno_t bridge_set_bpf_tap(ifnet_t ifn, bpf_tap_mode mode, bpf_packet_func bpf_callback); -__private_extern__ errno_t bridge_bpf_input(ifnet_t ifp, struct mbuf *m); -__private_extern__ errno_t bridge_bpf_output(ifnet_t ifp, struct mbuf *m); +static errno_t bridge_set_bpf_tap(ifnet_t, bpf_tap_mode, bpf_packet_func); +__private_extern__ errno_t bridge_bpf_input(ifnet_t, struct mbuf *); +__private_extern__ errno_t bridge_bpf_output(ifnet_t, struct mbuf *); -static void bridge_detach(ifnet_t ifp); +static void bridge_detach(ifnet_t); +static void bridge_link_event(struct ifnet *, u_int32_t); +static void bridge_iflinkevent(struct ifnet *); +static u_int32_t bridge_updatelinkstatus(struct bridge_softc *); +static int interface_media_active(struct ifnet *); +static void bridge_schedule_delayed_call(struct bridge_delayed_call *); +static void bridge_cancel_delayed_call(struct bridge_delayed_call *); -#define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how) +#define m_copypacket(m, how) m_copym(m, 0, M_COPYALL, how) /* The default bridge vlan is 1 (IEEE 802.1Q-2003 Table 9-2) */ #define VLANTAGOF(_m) 0 u_int8_t bstp_etheraddr[ETHER_ADDR_LEN] = - { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; + { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; #if BRIDGESTP static struct bstp_cb_ops bridge_ops = { @@ -503,7 +554,32 @@ static struct bstp_cb_ops bridge_ops = { SYSCTL_DECL(_net_link); SYSCTL_NODE(_net_link, IFT_BRIDGE, bridge, CTLFLAG_RW|CTLFLAG_LOCKED, 0, - "Bridge"); + "Bridge"); + +static int bridge_inherit_mac = 0; /* share MAC with first bridge member */ +SYSCTL_INT(_net_link_bridge, OID_AUTO, inherit_mac, + CTLFLAG_RW|CTLFLAG_LOCKED, + &bridge_inherit_mac, 0, + "Inherit MAC address from the first bridge member"); + +SYSCTL_INT(_net_link_bridge, OID_AUTO, rtable_prune_period, + CTLFLAG_RW|CTLFLAG_LOCKED, + &bridge_rtable_prune_period, 0, + "Interval between pruning of routing table"); + +static unsigned int bridge_rtable_hash_size_max = BRIDGE_RTHASH_SIZE_MAX; +SYSCTL_UINT(_net_link_bridge, OID_AUTO, rtable_hash_size_max, + CTLFLAG_RW|CTLFLAG_LOCKED, + &bridge_rtable_hash_size_max, 0, + "Maximum size of the routing hash table"); + +#if BRIDGE_DEBUG_DELAYED_CALLBACK +static int bridge_delayed_callback_delay = 0; +SYSCTL_INT(_net_link_bridge, OID_AUTO, delayed_callback_delay, + CTLFLAG_RW|CTLFLAG_LOCKED, + &bridge_delayed_callback_delay, 0, + "Delay before calling delayed function"); +#endif #if defined(PFIL_HOOKS) static int pfil_onlyip = 1; /* only pass IP[46] packets when pfil is enabled */ @@ -511,25 +587,25 @@ static int pfil_bridge = 1; /* run pfil hooks on the bridge interface */ static int pfil_member = 1; /* run pfil hooks on the member interface */ static int pfil_ipfw = 0; /* layer2 filter with ipfw */ static int pfil_ipfw_arp = 0; /* layer2 filter with ipfw */ -static int pfil_local_phys = 0; /* run pfil hooks on the physical interface for - locally destined packets */ +static int pfil_local_phys = 0; /* run pfil hooks on the physical interface */ + /* for locally destined packets */ SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_onlyip, CTLFLAG_RW|CTLFLAG_LOCKED, - &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled"); + &pfil_onlyip, 0, "Only pass IP packets when pfil is enabled"); SYSCTL_INT(_net_link_bridge, OID_AUTO, ipfw_arp, CTLFLAG_RW|CTLFLAG_LOCKED, - &pfil_ipfw_arp, 0, "Filter ARP packets through IPFW layer2"); + &pfil_ipfw_arp, 0, "Filter ARP packets through IPFW layer2"); SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_bridge, CTLFLAG_RW|CTLFLAG_LOCKED, - &pfil_bridge, 0, "Packet filter on the bridge interface"); + &pfil_bridge, 0, "Packet filter on the bridge interface"); SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_member, CTLFLAG_RW|CTLFLAG_LOCKED, - &pfil_member, 0, "Packet filter on the member interface"); + &pfil_member, 0, "Packet filter on the member interface"); SYSCTL_INT(_net_link_bridge, OID_AUTO, pfil_local_phys, - CTLFLAG_RW|CTLFLAG_LOCKED, &pfil_local_phys, 0, - "Packet filter on the physical interface for locally destined packets"); + CTLFLAG_RW|CTLFLAG_LOCKED, &pfil_local_phys, 0, + "Packet filter on the physical interface for locally destined packets"); #endif /* PFIL_HOOKS */ #if BRIDGESTP static int log_stp = 0; /* log STP state changes */ SYSCTL_INT(_net_link_bridge, OID_AUTO, log_stp, CTLFLAG_RW, - &log_stp, 0, "Log STP state changes"); + &log_stp, 0, "Log STP state changes"); #endif /* BRIDGESTP */ struct bridge_control { @@ -729,10 +805,10 @@ static const struct bridge_control bridge_control_table64[] = { }; static const unsigned int bridge_control_table_size = - sizeof (bridge_control_table32) / sizeof (bridge_control_table32[0]); + sizeof (bridge_control_table32) / sizeof (bridge_control_table32[0]); static LIST_HEAD(, bridge_softc) bridge_list = - LIST_HEAD_INITIALIZER(bridge_list); + LIST_HEAD_INITIALIZER(bridge_list); static lck_grp_t *bridge_lock_grp = NULL; static lck_attr_t *bridge_lock_attr = NULL; @@ -741,18 +817,18 @@ static if_clone_t bridge_cloner = NULL; static int if_bridge_txstart = 0; SYSCTL_INT(_net_link_bridge, OID_AUTO, txstart, CTLFLAG_RW | CTLFLAG_LOCKED, - &if_bridge_txstart, 0, "Bridge interface uses TXSTART model"); + &if_bridge_txstart, 0, "Bridge interface uses TXSTART model"); #if BRIDGE_DEBUG static int if_bridge_debug = 0; SYSCTL_INT(_net_link_bridge, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED, - &if_bridge_debug, 0, "Bridge debug"); + &if_bridge_debug, 0, "Bridge debug"); -static void printf_ether_header(struct ether_header *eh); -static void printf_mbuf_data(mbuf_t m, size_t offset, size_t len); -static void printf_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix); -static void printf_mbuf(mbuf_t m, const char *prefix, const char *suffix); -static void link_print(struct sockaddr_dl *dl_p); +static void printf_ether_header(struct ether_header *); +static void printf_mbuf_data(mbuf_t, size_t, size_t); +static void printf_mbuf_pkthdr(mbuf_t, const char *, const char *); +static void printf_mbuf(mbuf_t, const char *, const char *); +static void link_print(struct sockaddr_dl *); static void bridge_lock(struct bridge_softc *); static void bridge_unlock(struct bridge_softc *); @@ -766,9 +842,9 @@ bridge_lock(struct bridge_softc *sc) { void *lr_saved = __builtin_return_address(0); - lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED); + BRIDGE_LOCK_ASSERT_NOTHELD(sc); - lck_mtx_lock(sc->sc_mtx); + _BRIDGE_LOCK(sc); sc->lock_lr[sc->next_lock_lr] = lr_saved; sc->next_lock_lr = (sc->next_lock_lr+1) % SO_LCKDBG_MAX; @@ -779,12 +855,12 @@ bridge_unlock(struct bridge_softc *sc) { void *lr_saved = __builtin_return_address(0); - lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED); + BRIDGE_LOCK_ASSERT_HELD(sc); sc->unlock_lr[sc->next_unlock_lr] = lr_saved; sc->next_unlock_lr = (sc->next_unlock_lr+1) % SO_LCKDBG_MAX; - lck_mtx_unlock(sc->sc_mtx); + _BRIDGE_UNLOCK(sc); } static int @@ -793,7 +869,7 @@ bridge_lock2ref(struct bridge_softc *sc) int error = 0; void *lr_saved = __builtin_return_address(0); - lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED); + BRIDGE_LOCK_ASSERT_HELD(sc); if (sc->sc_iflist_xcnt > 0) error = EBUSY; @@ -802,7 +878,8 @@ bridge_lock2ref(struct bridge_softc *sc) sc->unlock_lr[sc->next_unlock_lr] = lr_saved; sc->next_unlock_lr = (sc->next_unlock_lr+1) % SO_LCKDBG_MAX; - lck_mtx_unlock(sc->sc_mtx); + + _BRIDGE_UNLOCK(sc); return (error); } @@ -812,9 +889,9 @@ bridge_unref(struct bridge_softc *sc) { void *lr_saved = __builtin_return_address(0); - lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED); + BRIDGE_LOCK_ASSERT_NOTHELD(sc); - lck_mtx_lock(sc->sc_mtx); + _BRIDGE_LOCK(sc); sc->lock_lr[sc->next_lock_lr] = lr_saved; sc->next_lock_lr = (sc->next_lock_lr+1) % SO_LCKDBG_MAX; @@ -823,10 +900,10 @@ bridge_unref(struct bridge_softc *sc) sc->unlock_lr[sc->next_unlock_lr] = lr_saved; sc->next_unlock_lr = (sc->next_unlock_lr+1) % SO_LCKDBG_MAX; if ((sc->sc_iflist_xcnt > 0) && (sc->sc_iflist_ref == 0)) { - lck_mtx_unlock(sc->sc_mtx); + _BRIDGE_UNLOCK(sc); wakeup(&sc->sc_cv); } else - lck_mtx_unlock(sc->sc_mtx); + _BRIDGE_UNLOCK(sc); } static void @@ -834,14 +911,14 @@ bridge_xlock(struct bridge_softc *sc) { void *lr_saved = __builtin_return_address(0); - lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED); + BRIDGE_LOCK_ASSERT_HELD(sc); sc->sc_iflist_xcnt++; while (sc->sc_iflist_ref > 0) { sc->unlock_lr[sc->next_unlock_lr] = lr_saved; sc->next_unlock_lr = (sc->next_unlock_lr+1) % SO_LCKDBG_MAX; - msleep(&sc->sc_cv, sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL); + msleep(&sc->sc_cv, &sc->sc_mtx, PZERO, "BRIDGE_XLOCK", NULL); sc->lock_lr[sc->next_lock_lr] = lr_saved; sc->next_lock_lr = (sc->next_lock_lr+1) % SO_LCKDBG_MAX; @@ -851,7 +928,7 @@ bridge_xlock(struct bridge_softc *sc) static void bridge_xdrop(struct bridge_softc *sc) { - lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED); + BRIDGE_LOCK_ASSERT_HELD(sc); sc->sc_iflist_xcnt--; } @@ -860,10 +937,13 @@ void printf_mbuf_pkthdr(mbuf_t m, const char *prefix, const char *suffix) { if (m) - printf("%spktlen: %u rcvif: %p header: %p nextpkt: %p%s", + printf("%spktlen: %u rcvif: 0x%llx header: 0x%llx " + "nextpkt: 0x%llx%s", prefix ? prefix : "", (unsigned int)mbuf_pkthdr_len(m), - mbuf_pkthdr_rcvif(m), mbuf_pkthdr_header(m), - mbuf_nextpkt(m), suffix ? suffix : ""); + (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)), + (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_header(m)), + (uint64_t)VM_KERNEL_ADDRPERM(mbuf_nextpkt(m)), + suffix ? suffix : ""); else printf("%s%s\n", prefix, suffix); } @@ -872,11 +952,14 @@ void printf_mbuf(mbuf_t m, const char *prefix, const char *suffix) { if (m) { - printf("%s%p type: %u flags: 0x%x len: %u data: %p maxlen: %u " - "datastart: %p next: %p%s", prefix ? prefix : "", - m, mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m), - mbuf_data(m), (unsigned int)mbuf_maxlen(m), - mbuf_datastart(m), mbuf_next(m), + printf("%s0x%llx type: %u flags: 0x%x len: %u data: 0x%llx " + "maxlen: %u datastart: 0x%llx next: 0x%llx%s", + prefix ? prefix : "", (uint64_t)VM_KERNEL_ADDRPERM(m), + mbuf_type(m), mbuf_flags(m), (unsigned int)mbuf_len(m), + (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)), + (unsigned int)mbuf_maxlen(m), + (uint64_t)VM_KERNEL_ADDRPERM(mbuf_datastart(m)), + (uint64_t)VM_KERNEL_ADDRPERM(mbuf_next(m)), !suffix || (mbuf_flags(m) & MBUF_PKTHDR) ? "" : suffix); if ((mbuf_flags(m) & MBUF_PKTHDR)) printf_mbuf_pkthdr(m, " ", suffix); @@ -925,7 +1008,7 @@ printf_ether_header(struct ether_header *eh) eh->ether_shost[3], eh->ether_shost[4], eh->ether_shost[5], eh->ether_dhost[0], eh->ether_dhost[1], eh->ether_dhost[2], eh->ether_dhost[3], eh->ether_dhost[4], eh->ether_dhost[5], - eh->ether_type); + ntohs(eh->ether_type)); } static void @@ -935,12 +1018,12 @@ link_print(struct sockaddr_dl *dl_p) #if 1 printf("sdl len %d index %d family %d type 0x%x nlen %d alen %d" - " slen %d addr ", dl_p->sdl_len, - dl_p->sdl_index, dl_p->sdl_family, dl_p->sdl_type, - dl_p->sdl_nlen, dl_p->sdl_alen, dl_p->sdl_slen); + " slen %d addr ", dl_p->sdl_len, dl_p->sdl_index, + dl_p->sdl_family, dl_p->sdl_type, dl_p->sdl_nlen, + dl_p->sdl_alen, dl_p->sdl_slen); #endif for (i = 0; i < dl_p->sdl_alen; i++) - printf("%s%x", i ? ":" : "", (CONST_LLADDR(dl_p))[i]); + printf("%s%x", i ? ":" : "", (CONST_LLADDR(dl_p))[i]); printf("\n"); } @@ -952,8 +1035,9 @@ link_print(struct sockaddr_dl *dl_p) * Pseudo-device attach routine. */ __private_extern__ int -bridgeattach(__unused int n) +bridgeattach(int n) { +#pragma unused(n) int error; lck_grp_attr_t *lck_grp_attr = NULL; struct ifnet_clone_params ifnet_clone_params; @@ -972,7 +1056,7 @@ bridgeattach(__unused int n) lck_attr_setdebug(bridge_lock_attr); #endif - lck_mtx_init(bridge_list_mtx, bridge_lock_grp, bridge_lock_attr); + lck_mtx_init(&bridge_list_mtx, bridge_lock_grp, bridge_lock_attr); /* can free the attributes once we've allocated the group lock */ lck_grp_attr_free(lck_grp_attr); @@ -1037,21 +1121,22 @@ SYSCTL_PROC(_net_link_bridge, OID_AUTO, ipfw, CTLTYPE_INT|CTLFLAG_RW, * Create a new bridge instance. */ static int -bridge_clone_create(struct if_clone *ifc, uint32_t unit, __unused void *params) +bridge_clone_create(struct if_clone *ifc, uint32_t unit, void *params) { +#pragma unused(params) struct ifnet *ifp = NULL; - struct bridge_softc *sc; - u_char eaddr[6]; + struct bridge_softc *sc, *sc2; struct ifnet_init_eparams init_params; errno_t error = 0; uint32_t sdl_buffer[offsetof(struct sockaddr_dl, sdl_data) + IFNAMSIZ + ETHER_ADDR_LEN]; struct sockaddr_dl *sdl = (struct sockaddr_dl *)sdl_buffer; + uint8_t eth_hostid[ETHER_ADDR_LEN]; + int fb, retry, has_hostid; - sc = _MALLOC(sizeof (*sc), M_DEVBUF, M_WAITOK); - memset(sc, 0, sizeof (*sc)); + sc = _MALLOC(sizeof (*sc), M_DEVBUF, M_WAITOK | M_ZERO); - sc->sc_mtx = lck_mtx_alloc_init(bridge_lock_grp, bridge_lock_attr); + lck_mtx_init(&sc->sc_mtx, bridge_lock_grp, bridge_lock_attr); sc->sc_brtmax = BRIDGE_RTABLE_MAX; sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT; sc->sc_filter_flags = IFBF_FILT_DEFAULT; @@ -1076,7 +1161,7 @@ bridge_clone_create(struct if_clone *ifc, uint32_t unit, __unused void *params) /* use the interface name as the unique id for ifp recycle */ snprintf(sc->sc_if_xname, sizeof (sc->sc_if_xname), "%s%d", - ifc->ifc_name, unit); + ifc->ifc_name, unit); bzero(&init_params, sizeof (init_params)); init_params.ver = IFNET_INIT_CURRENT_VERSION; init_params.len = sizeof (init_params); @@ -1097,7 +1182,7 @@ bridge_clone_create(struct if_clone *ifc, uint32_t unit, __unused void *params) init_params.add_proto = ether_add_proto; init_params.del_proto = ether_del_proto; init_params.check_multi = ether_check_multi; - init_params.framer = ether_frameout; + init_params.framer_extended = ether_frameout_extended; init_params.softc = sc; init_params.ioctl = bridge_ioctl; init_params.set_bpf_tap = bridge_set_bpf_tap; @@ -1134,53 +1219,52 @@ bridge_clone_create(struct if_clone *ifc, uint32_t unit, __unused void *params) goto done; } -#if 0 /* - * Generate a random ethernet address with a locally administered - * address. + * Generate an ethernet address with a locally administered address. * * Since we are using random ethernet addresses for the bridge, it is * possible that we might have address collisions, so make sure that * this hardware address isn't already in use on another bridge. + * The first try uses the "hostid" and falls back to read_random(); + * for "hostid", we use the MAC address of the first-encountered + * Ethernet-type interface that is currently configured. */ - { - int retry; - - for (retry = 1; retry != 0;) { - struct ifnet *bifp; - struct bridge_softc *sc2; - - read_random(eaddr, ETHER_ADDR_LEN); - eaddr[0] &= ~1; /* clear multicast bit */ - eaddr[0] |= 2; /* set the LAA bit */ - retry = 0; - lck_mtx_lock(bridge_list_mtx); - LIST_FOREACH(sc2, &bridge_list, sc_list) { - bifp = sc2->sc_ifp; - if (memcmp(eaddr, ifnet_lladdr(bifp), - ETHER_ADDR_LEN) == 0) - retry = 1; - } - lck_mtx_unlock(bridge_list_mtx); + fb = 0; + has_hostid = (uuid_get_ethernet(ð_hostid[0]) == 0); + for (retry = 1; retry != 0; ) { + if (fb || has_hostid == 0) { + read_random(&sc->sc_defaddr, ETHER_ADDR_LEN); + sc->sc_defaddr[0] &= ~1; /* clear multicast bit */ + sc->sc_defaddr[0] |= 2; /* set the LAA bit */ + } else { + bcopy(ð_hostid[0], &sc->sc_defaddr, + ETHER_ADDR_LEN); + sc->sc_defaddr[0] &= ~1; /* clear multicast bit */ + sc->sc_defaddr[0] |= 2; /* set the LAA bit */ + sc->sc_defaddr[3] = /* stir it up a bit */ + ((sc->sc_defaddr[3] & 0x0f) << 4) | + ((sc->sc_defaddr[3] & 0xf0) >> 4); + /* + * Mix in the LSB as it's actually pretty significant, + * see rdar://14076061 + */ + sc->sc_defaddr[4] = + (((sc->sc_defaddr[4] & 0x0f) << 4) | + ((sc->sc_defaddr[4] & 0xf0) >> 4)) ^ + sc->sc_defaddr[5]; + sc->sc_defaddr[5] = ifp->if_unit & 0xff; } - } -#else - /* - * Generate a random ethernet address and use the private AC:DE:48 - * OUI code. - */ - { - uint32_t r; - read_random(&r, sizeof (r)); - eaddr[0] = 0xAC; - eaddr[1] = 0xDE; - eaddr[2] = 0x48; - eaddr[3] = (r >> 0) & 0xffu; - eaddr[4] = (r >> 8) & 0xffu; - eaddr[5] = (r >> 16) & 0xffu; + fb = 1; + retry = 0; + lck_mtx_lock(&bridge_list_mtx); + LIST_FOREACH(sc2, &bridge_list, sc_list) { + if (memcmp(sc->sc_defaddr, + IF_LLADDR(sc2->sc_ifp), ETHER_ADDR_LEN) == 0) + retry = 1; + } + lck_mtx_unlock(&bridge_list_mtx); } -#endif memset(sdl, 0, sizeof (sdl_buffer)); sdl->sdl_family = AF_LINK; @@ -1188,10 +1272,12 @@ bridge_clone_create(struct if_clone *ifc, uint32_t unit, __unused void *params) sdl->sdl_alen = ETHER_ADDR_LEN; sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data); memcpy(sdl->sdl_data, sc->sc_if_xname, sdl->sdl_nlen); - memcpy(LLADDR(sdl), eaddr, ETHER_ADDR_LEN); + memcpy(LLADDR(sdl), sc->sc_defaddr, ETHER_ADDR_LEN); + + sc->sc_flags &= ~SCF_MEDIA_ACTIVE; #if BRIDGE_DEBUG - if (if_bridge_debug) + if (if_bridge_debug & BR_DBGF_LIFECYCLE) link_print(sdl); #endif @@ -1201,7 +1287,7 @@ bridge_clone_create(struct if_clone *ifc, uint32_t unit, __unused void *params) goto done; } - error = ifnet_set_lladdr_and_type(ifp, eaddr, ETHER_ADDR_LEN, + error = ifnet_set_lladdr_and_type(ifp, sc->sc_defaddr, ETHER_ADDR_LEN, IFT_ETHER); if (error != 0) { printf("%s: ifnet_set_lladdr_and_type failed %d\n", __func__, @@ -1209,26 +1295,23 @@ bridge_clone_create(struct if_clone *ifc, uint32_t unit, __unused void *params) goto done; } -#if APPLE_BRIDGE_HWCKSUM_SUPPORT - /* - * APPLE MODIFICATION - our bridge can support HW checksums - * (useful if underlying interfaces support them) on TX, - * RX is not that interesting, since the stack just looks to - * see if the packet has been checksummed already (I think) - * but we might as well indicate we support it - */ - ifp->if_capabilities = - IFCAP_CSUM_IPv4_Tx | IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_UDPv4_Tx | - IFCAP_CSUM_IPv4_Rx | IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_UDPv4_Rx; -#endif + ifnet_set_offload(ifp, + IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP | + IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_MULTIPAGES); + + error = bridge_set_tso(sc); + if (error != 0) { + printf("%s: bridge_set_tso failed %d\n", __func__, error); + goto done; + } #if BRIDGESTP bstp_attach(&sc->sc_stp, &bridge_ops); #endif /* BRIDGESTP */ - lck_mtx_lock(bridge_list_mtx); + lck_mtx_lock(&bridge_list_mtx); LIST_INSERT_HEAD(&bridge_list, sc, sc_list); - lck_mtx_unlock(bridge_list_mtx); + lck_mtx_unlock(&bridge_list_mtx); /* attach as ethernet */ error = bpf_attach(ifp, DLT_EN10MB, sizeof (struct ether_header), @@ -1264,6 +1347,8 @@ bridge_clone_destroy(struct ifnet *ifp) bridge_ifstop(ifp, 1); + bridge_cancel_delayed_call(&sc->sc_resize_call); + error = ifnet_set_flags(ifp, 0, IFF_UP); if (error != 0) { printf("%s: ifnet_set_flags failed %d\n", __func__, error); @@ -1293,7 +1378,7 @@ bridge_clone_destroy(struct ifnet *ifp) return (0); } -#define DRVSPEC do { \ +#define DRVSPEC do { \ if (ifd->ifd_cmd >= bridge_control_table_size) { \ error = EINVAL; \ break; \ @@ -1340,7 +1425,6 @@ bridge_clone_destroy(struct ifnet *ifp) error = copyout(&args, ifd->ifd_data, ifd->ifd_len); \ } while (0) - /* * bridge_ioctl: * @@ -1351,17 +1435,18 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data) { struct bridge_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; + struct bridge_iflist *bif; int error = 0; - lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED); + BRIDGE_LOCK_ASSERT_NOTHELD(sc); #if BRIDGE_DEBUG - if (if_bridge_debug) - printf("%s: ifp %p cmd 0x%08lx (%c%c [%lu] %c %lu)\n", - __func__, ifp, cmd, (cmd & IOC_IN) ? 'I' : ' ', + if (if_bridge_debug & BR_DBGF_IOCTL) + printf("%s: ifp %s cmd 0x%08lx (%c%c [%lu] %c %lu)\n", + __func__, ifp->if_xname, cmd, (cmd & IOC_IN) ? 'I' : ' ', (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd), (char)IOCGROUP(cmd), cmd & 0xff); -#endif +#endif /* BRIDGE_DEBUG */ switch (cmd) { @@ -1371,9 +1456,35 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data) break; case SIOCGIFMEDIA32: - case SIOCGIFMEDIA64: - error = EINVAL; + case SIOCGIFMEDIA64: { + struct ifmediareq *ifmr = (struct ifmediareq *)data; + user_addr_t user_addr; + + user_addr = (cmd == SIOCGIFMEDIA64) ? + ((struct ifmediareq64 *)ifmr)->ifmu_ulist : + CAST_USER_ADDR_T(((struct ifmediareq32 *)ifmr)->ifmu_ulist); + + ifmr->ifm_status = IFM_AVALID; + ifmr->ifm_mask = 0; + ifmr->ifm_count = 1; + + BRIDGE_LOCK(sc); + if (!(sc->sc_flags & SCF_DETACHING) && + (sc->sc_flags & SCF_MEDIA_ACTIVE)) { + ifmr->ifm_status |= IFM_ACTIVE; + ifmr->ifm_active = ifmr->ifm_current = + IFM_ETHER | IFM_AUTO; + } else { + ifmr->ifm_active = ifmr->ifm_current = IFM_NONE; + } + BRIDGE_UNLOCK(sc); + + if (user_addr != USER_ADDR_NULL) { + error = copyout(&ifmr->ifm_current, user_addr, + sizeof (int)); + } break; + } case SIOCADDMULTI: case SIOCDELMULTI: @@ -1442,29 +1553,51 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data) error = ifnet_set_lladdr(ifp, ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len); if (error != 0) - printf("%s: ifnet_set_lladdr failed %d\n", __func__, + printf("%s: SIOCSIFLLADDR error %d\n", ifp->if_xname, error); break; case SIOCSIFMTU: - /* Do not allow the MTU to be changed on the bridge */ - error = EINVAL; + if (ifr->ifr_mtu < 576) { + error = EINVAL; + break; + } + BRIDGE_LOCK(sc); + if (TAILQ_EMPTY(&sc->sc_iflist)) { + sc->sc_ifp->if_mtu = ifr->ifr_mtu; + BRIDGE_UNLOCK(sc); + break; + } + TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { + if (bif->bif_ifp->if_mtu != (unsigned)ifr->ifr_mtu) { + printf("%s: invalid MTU: %u(%s) != %d\n", + sc->sc_ifp->if_xname, + bif->bif_ifp->if_mtu, + bif->bif_ifp->if_xname, ifr->ifr_mtu); + error = EINVAL; + break; + } + } + if (!error) + sc->sc_ifp->if_mtu = ifr->ifr_mtu; + BRIDGE_UNLOCK(sc); break; default: error = ether_ioctl(ifp, cmd, data); #if BRIDGE_DEBUG if (error != 0 && error != EOPNOTSUPP) - printf("%s: ether_ioctl ifp %p cmd 0x%08lx " + printf("%s: ifp %s cmd 0x%08lx " "(%c%c [%lu] %c %lu) failed error: %d\n", - __func__, ifp, cmd, (cmd & IOC_IN) ? 'I' : ' ', + __func__, ifp->if_xname, cmd, + (cmd & IOC_IN) ? 'I' : ' ', (cmd & IOC_OUT) ? 'O' : ' ', IOCPARM_LEN(cmd), (char)IOCGROUP(cmd), cmd & 0xff, error); #endif /* BRIDGE_DEBUG */ break; } - lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED); + BRIDGE_LOCK_ASSERT_NOTHELD(sc); return (error); } @@ -1516,13 +1649,106 @@ bridge_set_ifcap(struct bridge_softc *sc, struct bridge_iflist *bif, int set) error = (*ifp->if_ioctl)(ifp, SIOCSIFCAP, (caddr_t)&ifr); IFF_UNLOCKGIANT(ifp); if (error) - printf("%s: error setting interface capabilities " - "on %s\n", __func__, ifnet_name(sc->sc_ifp), - ifnet_unit(sc->sc_ifp), ifp->if_xname); + printf("%s: %s error setting interface capabilities " + "on %s\n", __func__, sc->sc_ifp->if_xname, + ifp->if_xname); } } #endif /* HAS_IF_CAP */ +static errno_t +bridge_set_tso(struct bridge_softc *sc) +{ + struct bridge_iflist *bif; + u_int32_t tso_v4_mtu; + u_int32_t tso_v6_mtu; + ifnet_offload_t offload; + errno_t error = 0; + + /* By default, support TSO */ + offload = sc->sc_ifp->if_hwassist | IFNET_TSO_IPV4 | IFNET_TSO_IPV6; + tso_v4_mtu = IP_MAXPACKET; + tso_v6_mtu = IP_MAXPACKET; + + /* Use the lowest common denominator of the members */ + TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { + ifnet_t ifp = bif->bif_ifp; + + if (ifp == NULL) + continue; + + if (offload & IFNET_TSO_IPV4) { + if (ifp->if_hwassist & IFNET_TSO_IPV4) { + if (tso_v4_mtu > ifp->if_tso_v4_mtu) + tso_v4_mtu = ifp->if_tso_v4_mtu; + } else { + offload &= ~IFNET_TSO_IPV4; + tso_v4_mtu = 0; + } + } + if (offload & IFNET_TSO_IPV6) { + if (ifp->if_hwassist & IFNET_TSO_IPV6) { + if (tso_v6_mtu > ifp->if_tso_v6_mtu) + tso_v6_mtu = ifp->if_tso_v6_mtu; + } else { + offload &= ~IFNET_TSO_IPV6; + tso_v6_mtu = 0; + } + } + } + + if (offload != sc->sc_ifp->if_hwassist) { + error = ifnet_set_offload(sc->sc_ifp, offload); + if (error != 0) { +#if BRIDGE_DEBUG + if (if_bridge_debug & BR_DBGF_LIFECYCLE) + printf("%s: ifnet_set_offload(%s, 0x%x) " + "failed %d\n", __func__, + sc->sc_ifp->if_xname, offload, error); +#endif /* BRIDGE_DEBUG */ + goto done; + } + /* + * For ifnet_set_tso_mtu() sake, the TSO MTU must be at least + * as large as the interface MTU + */ + if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV4) { + if (tso_v4_mtu < sc->sc_ifp->if_mtu) + tso_v4_mtu = sc->sc_ifp->if_mtu; + error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET, + tso_v4_mtu); + if (error != 0) { +#if BRIDGE_DEBUG + if (if_bridge_debug & BR_DBGF_LIFECYCLE) + printf("%s: ifnet_set_tso_mtu(%s, " + "AF_INET, %u) failed %d\n", + __func__, sc->sc_ifp->if_xname, + tso_v4_mtu, error); +#endif /* BRIDGE_DEBUG */ + goto done; + } + } + if (sc->sc_ifp->if_hwassist & IFNET_TSO_IPV6) { + if (tso_v6_mtu < sc->sc_ifp->if_mtu) + tso_v6_mtu = sc->sc_ifp->if_mtu; + error = ifnet_set_tso_mtu(sc->sc_ifp, AF_INET6, + tso_v6_mtu); + if (error != 0) { +#if BRIDGE_DEBUG + if (if_bridge_debug & BR_DBGF_LIFECYCLE) + printf("%s: ifnet_set_tso_mtu(%s, " + "AF_INET6, %u) failed %d\n", + __func__, sc->sc_ifp->if_xname, + tso_v6_mtu, error); +#endif /* BRIDGE_DEBUG */ + goto done; + } + } + } +done: + return (error); +} + /* * bridge_lookup_member: * @@ -1533,15 +1759,12 @@ bridge_lookup_member(struct bridge_softc *sc, const char *name) { struct bridge_iflist *bif; struct ifnet *ifp; - char if_xname[IFNAMSIZ]; - BRIDGE_LOCK_ASSERT(sc); + BRIDGE_LOCK_ASSERT_HELD(sc); TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { ifp = bif->bif_ifp; - snprintf(if_xname, sizeof (if_xname), "%s%d", - ifnet_name(ifp), ifnet_unit(ifp)); - if (strncmp(if_xname, name, sizeof (if_xname)) == 0) + if (strcmp(ifp->if_xname, name) == 0) return (bif); } @@ -1558,7 +1781,7 @@ bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp) { struct bridge_iflist *bif; - BRIDGE_LOCK_ASSERT(sc); + BRIDGE_LOCK_ASSERT_HELD(sc); TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { if (bif->bif_ifp == member_ifp) @@ -1569,9 +1792,10 @@ bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp) } static errno_t -bridge_iff_input(void *cookie, ifnet_t ifp, __unused protocol_family_t protocol, - mbuf_t *data, char **frame_ptr) +bridge_iff_input(void *cookie, ifnet_t ifp, protocol_family_t protocol, + mbuf_t *data, char **frame_ptr) { +#pragma unused(protocol) errno_t error = 0; struct bridge_iflist *bif = (struct bridge_iflist *)cookie; struct bridge_softc *sc = bif->bif_sc; @@ -1588,14 +1812,15 @@ bridge_iff_input(void *cookie, ifnet_t ifp, __unused protocol_family_t protocol, frmlen = (char *)mbuf_data(m) - *frame_ptr; } #if BRIDGE_DEBUG - if (if_bridge_debug) { - printf("%s: %s%d from %s%d m %p data %p frame %p %s " - "frmlen %lu\n", __func__, ifnet_name(sc->sc_ifp), - ifnet_unit(sc->sc_ifp), ifnet_name(ifp), ifnet_unit(ifp), - m, mbuf_data(m), *frame_ptr, + if (if_bridge_debug & BR_DBGF_INPUT) { + printf("%s: %s from %s m 0x%llx data 0x%llx frame 0x%llx %s " + "frmlen %lu\n", __func__, sc->sc_ifp->if_xname, + ifp->if_xname, (uint64_t)VM_KERNEL_ADDRPERM(m), + (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m)), + (uint64_t)VM_KERNEL_ADDRPERM(*frame_ptr), included ? "inside" : "outside", frmlen); - if (if_bridge_debug > 1) { + if (if_bridge_debug & BR_DBGF_MBUF) { printf_mbuf(m, "bridge_iff_input[", "\n"); printf_ether_header((struct ether_header *) (void *)*frame_ptr); @@ -1624,23 +1849,25 @@ bridge_iff_input(void *cookie, ifnet_t ifp, __unused protocol_family_t protocol, (void) mbuf_pkthdr_adjustlen(m, -frmlen); } #if BRIDGE_DEBUG - if (if_bridge_debug > 1) { + if ((if_bridge_debug & BR_DBGF_INPUT) && + (if_bridge_debug & BR_DBGF_MBUF)) { printf("\n"); printf_mbuf(m, "bridge_iff_input]", "\n"); } #endif /* BRIDGE_DEBUG */ out: - lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED); + BRIDGE_LOCK_ASSERT_NOTHELD(sc); return (error); } - #if BRIDGE_MEMBER_OUT_FILTER static errno_t -bridge_iff_output(void *cookie, ifnet_t ifp, __unused protocol_family_t protocol, mbuf_t *data) +bridge_iff_output(void *cookie, ifnet_t ifp, protocol_family_t protocol, + mbuf_t *data) { +#pragma unused(protocol) errno_t error = 0; struct bridge_iflist *bif = (struct bridge_iflist *)cookie; struct bridge_softc *sc = bif->bif_sc; @@ -1650,10 +1877,11 @@ bridge_iff_output(void *cookie, ifnet_t ifp, __unused protocol_family_t protocol goto out; #if BRIDGE_DEBUG - if (if_bridge_debug) { - printf("%s: %s%d from %s%d m %p data %p\n", __func__, - ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp), - ifnet_name(ifp), ifnet_unit(ifp), m, mbuf_data(m)); + if (if_bridge_debug & BR_DBGF_OUTPPUT) { + printf("%s: %s from %s m 0x%llx data 0x%llx\n", __func__, + sc->sc_ifp->if_xname, ifp->if_xname, + (uint64_t)VM_KERNEL_ADDRPERM(m), + (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m))); } #endif /* BRIDGE_DEBUG */ @@ -1664,54 +1892,68 @@ bridge_iff_output(void *cookie, ifnet_t ifp, __unused protocol_family_t protocol } out: - lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_NOTOWNED); + BRIDGE_LOCK_ASSERT_NOTHELD(sc); return (error); } #endif /* BRIDGE_MEMBER_OUT_FILTER */ - static void -bridge_iff_event(void *cookie, ifnet_t ifp, __unused protocol_family_t protocol, - const struct kev_msg *event_msg) +bridge_iff_event(void *cookie, ifnet_t ifp, protocol_family_t protocol, + const struct kev_msg *event_msg) { +#pragma unused(protocol) struct bridge_iflist *bif = (struct bridge_iflist *)cookie; + struct bridge_softc *sc = bif->bif_sc; if (event_msg->vendor_code == KEV_VENDOR_APPLE && - event_msg->kev_class == KEV_NETWORK_CLASS && - event_msg->kev_subclass == KEV_DL_SUBCLASS) { + event_msg->kev_class == KEV_NETWORK_CLASS && + event_msg->kev_subclass == KEV_DL_SUBCLASS) { +#if BRIDGE_DEBUG + if (if_bridge_debug & BR_DBGF_LIFECYCLE) + printf("%s: %s event_code %u - %s\n", __func__, + ifp->if_xname, event_msg->event_code, + dlil_kev_dl_code_str(event_msg->event_code)); +#endif /* BRIDGE_DEBUG */ + switch (event_msg->event_code) { case KEV_DL_IF_DETACHING: - case KEV_DL_IF_DETACHED: + case KEV_DL_IF_DETACHED: { bridge_ifdetach(bif, ifp); break; - + } case KEV_DL_LINK_OFF: case KEV_DL_LINK_ON: { + bridge_iflinkevent(ifp); #if BRIDGESTP bstp_linkstate(ifp, event_msg->event_code); #endif /* BRIDGESTP */ break; } - case KEV_DL_SIFFLAGS: { - if (bif->bif_promisc == 0 && + if ((bif->bif_flags & BIFF_PROMISC) == 0 && (ifp->if_flags & IFF_UP)) { - errno_t error = - ifnet_set_promiscuous(ifp, 1); + errno_t error; + + error = ifnet_set_promiscuous(ifp, 1); if (error != 0) { printf("%s: " - "ifnet_set_promiscuous" - "(%s%d) failed %d\n", - __func__, ifnet_name(ifp), - ifnet_unit(ifp), error); + "ifnet_set_promiscuous (%s)" + " failed %d\n", + __func__, ifp->if_xname, + error); } else { - bif->bif_promisc = 1; + bif->bif_flags |= BIFF_PROMISC; } } break; } - + case KEV_DL_IFCAP_CHANGED: { + BRIDGE_LOCK(sc); + bridge_set_tso(sc); + BRIDGE_UNLOCK(sc); + break; + } default: break; } @@ -1725,13 +1967,14 @@ bridge_iff_event(void *cookie, ifnet_t ifp, __unused protocol_family_t protocol, * interface is detaching. */ static void -bridge_iff_detached(void *cookie, __unused ifnet_t ifp) +bridge_iff_detached(void *cookie, ifnet_t ifp) { struct bridge_iflist *bif = (struct bridge_iflist *)cookie; #if BRIDGE_DEBUG - printf("%s: %s%d\n", __func__, ifnet_name(ifp), ifnet_unit(ifp)); -#endif + if (if_bridge_debug & BR_DBGF_LIFECYCLE) + printf("%s: %s\n", __func__, ifp->if_xname); +#endif /* BRIDGE_DEBUG */ bridge_ifdetach(bif, ifp); @@ -1739,11 +1982,14 @@ bridge_iff_detached(void *cookie, __unused ifnet_t ifp) } static errno_t -bridge_proto_input(ifnet_t ifp, __unused protocol_family_t protocol, - __unused mbuf_t packet, __unused char *header) +bridge_proto_input(ifnet_t ifp, protocol_family_t protocol, mbuf_t packet, + char *header) { - printf("%s: unexpected packet from %s%d\n", __func__, - ifnet_name(ifp), ifnet_unit(ifp)); +#pragma unused(protocol, packet, header) +#if BRIDGE_DEBUG + printf("%s: unexpected packet from %s\n", __func__, + ifp->if_xname); +#endif /* BRIDGE_DEBUG */ return (0); } @@ -1753,15 +1999,18 @@ bridge_attach_protocol(struct ifnet *ifp) int error; struct ifnet_attach_proto_param reg; - printf("%s: %s%d\n", __func__, ifnet_name(ifp), ifnet_unit(ifp)); +#if BRIDGE_DEBUG + if (if_bridge_debug & BR_DBGF_LIFECYCLE) + printf("%s: %s\n", __func__, ifp->if_xname); +#endif /* BRIDGE_DEBUG */ bzero(®, sizeof (reg)); reg.input = bridge_proto_input; error = ifnet_attach_protocol(ifp, PF_BRIDGE, ®); if (error) - printf("%s: ifnet_attach_protocol(%s%d) failed, %d\n", - __func__, ifnet_name(ifp), ifnet_unit(ifp), error); + printf("%s: ifnet_attach_protocol(%s) failed, %d\n", + __func__, ifp->if_xname, error); return (error); } @@ -1769,14 +2018,16 @@ bridge_attach_protocol(struct ifnet *ifp) static int bridge_detach_protocol(struct ifnet *ifp) { - int error; - - printf("%s: %s%d\n", __func__, ifnet_name(ifp), ifnet_unit(ifp)); + int error; +#if BRIDGE_DEBUG + if (if_bridge_debug & BR_DBGF_LIFECYCLE) + printf("%s: %s\n", __func__, ifp->if_xname); +#endif /* BRIDGE_DEBUG */ error = ifnet_detach_protocol(ifp, PF_BRIDGE); if (error) - printf("%s: ifnet_detach_protocol(%s%d) failed, %d\n", - __func__, ifnet_name(ifp), ifnet_unit(ifp), error); + printf("%s: ifnet_detach_protocol(%s) failed, %d\n", + __func__, ifp->if_xname, error); return (error); } @@ -1788,11 +2039,15 @@ bridge_detach_protocol(struct ifnet *ifp) */ static void bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif, - int gone) + int gone) { - struct ifnet *ifs = bif->bif_ifp; + struct ifnet *ifs = bif->bif_ifp, *bifp = sc->sc_ifp; + int lladdr_changed = 0, error, filt_attached; + uint8_t eaddr[ETHER_ADDR_LEN]; + u_int32_t event_code = 0; - BRIDGE_LOCK_ASSERT(sc); + BRIDGE_LOCK_ASSERT_HELD(sc); + VERIFY(ifs != NULL); if (!gone) { switch (ifs->if_type) { @@ -1801,18 +2056,16 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif, /* * Take the interface out of promiscuous mode. */ - if (bif->bif_promisc) + if (bif->bif_flags & BIFF_PROMISC) (void) ifnet_set_promiscuous(ifs, 0); break; case IFT_GIF: - break; - + /* currently not supported */ + /* FALLTHRU */ default: -#ifdef DIAGNOSTIC - panic("bridge_delete_member: impossible"); -#endif - break; + VERIFY(0); + /* NOTREACHED */ } #if HAS_IF_CAP @@ -1821,45 +2074,83 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif, #endif } - if (bif->bif_proto_attached) { + if (bif->bif_flags & BIFF_PROTO_ATTACHED) { /* Respect lock ordering with DLIL lock */ BRIDGE_UNLOCK(sc); (void) bridge_detach_protocol(ifs); BRIDGE_LOCK(sc); } #if BRIDGESTP - if (bif->bif_flags & IFBIF_STP) + if (bif->bif_ifflags & IFBIF_STP) bstp_disable(&bif->bif_stp); #endif /* BRIDGESTP */ - ifs->if_bridge = NULL; BRIDGE_XLOCK(sc); TAILQ_REMOVE(&sc->sc_iflist, bif, bif_next); BRIDGE_XDROP(sc); - ifnet_release(ifs); + /* + * If removing the interface that gave the bridge its mac address, set + * the mac address of the bridge to the address of the next member, or + * to its default address if no members are left. + */ + if (bridge_inherit_mac && sc->sc_ifaddr == ifs) { + ifnet_release(sc->sc_ifaddr); + if (TAILQ_EMPTY(&sc->sc_iflist)) { + bcopy(sc->sc_defaddr, eaddr, ETHER_ADDR_LEN); + sc->sc_ifaddr = NULL; + } else { + struct ifnet *fif = + TAILQ_FIRST(&sc->sc_iflist)->bif_ifp; + bcopy(IF_LLADDR(fif), eaddr, ETHER_ADDR_LEN); + sc->sc_ifaddr = fif; + ifnet_reference(fif); /* for sc_ifaddr */ + } + lladdr_changed = 1; + } #if HAS_IF_CAP - bridge_mutecaps(sc); /* recalcuate now this interface is removed */ + bridge_mutecaps(sc); /* recalculate now this interface is removed */ #endif /* HAS_IF_CAP */ + + error = bridge_set_tso(sc); + if (error != 0) { + printf("%s: bridge_set_tso failed %d\n", __func__, error); + } + bridge_rtdelete(sc, ifs, IFBF_FLUSHALL); KASSERT(bif->bif_addrcnt == 0, ("%s: %d bridge routes referenced", __func__, bif->bif_addrcnt)); -#if BRIDGESTP + filt_attached = bif->bif_flags & BIFF_FILTER_ATTACHED; + + /* + * Update link status of the bridge based on its remaining members + */ + event_code = bridge_updatelinkstatus(sc); + BRIDGE_UNLOCK(sc); + + if (lladdr_changed && + (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) + printf("%s: ifnet_set_lladdr failed %d\n", __func__, error); + + if (event_code != 0) + bridge_link_event(bifp, event_code); + +#if BRIDGESTP bstp_destroy(&bif->bif_stp); /* prepare to free */ - BRIDGE_LOCK(sc); #endif /* BRIDGESTP */ - if (bif->bif_filter_attached) { - /* Respect lock ordering with DLIL lock */ - BRIDGE_UNLOCK(sc); + if (filt_attached) iflt_detach(bif->bif_iff_ref); - BRIDGE_LOCK(sc); - } else { + else _FREE(bif, M_DEVBUF); - } + + ifs->if_bridge = NULL; + ifnet_release(ifs); + + BRIDGE_LOCK(sc); } /* @@ -1870,7 +2161,7 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif, static void bridge_delete_span(struct bridge_softc *sc, struct bridge_iflist *bif) { - BRIDGE_LOCK_ASSERT(sc); + BRIDGE_LOCK_ASSERT_HELD(sc); KASSERT(bif->bif_ifp->if_bridge == NULL, ("%s: not a span interface", __func__)); @@ -1886,9 +2177,11 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif = NULL; - struct ifnet *ifs; - int error = 0; + struct ifnet *ifs, *bifp = sc->sc_ifp; + int error = 0, lladdr_changed = 0; + uint8_t eaddr[ETHER_ADDR_LEN]; struct iff_filter iff; + u_int32_t event_code = 0; ifs = ifunit(req->ifbr_ifsname); if (ifs == NULL) @@ -1901,41 +2194,64 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) if (ifs == bif->bif_ifp) return (EBUSY); - /* Allow the first Ethernet member to define the MTU */ - if (ifs->if_type != IFT_GIF) { - if (TAILQ_EMPTY(&sc->sc_iflist)) - sc->sc_ifp->if_mtu = ifs->if_mtu; - else if (sc->sc_ifp->if_mtu != ifs->if_mtu) { - printf("%s: %s%d: invalid MTU for %s%d", __func__, - ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp), - ifnet_name(ifs), ifnet_unit(ifs)); - return (EINVAL); - } - } - if (ifs->if_bridge == sc) return (EEXIST); if (ifs->if_bridge != NULL) return (EBUSY); - bif = _MALLOC(sizeof (*bif), M_DEVBUF, M_NOWAIT|M_ZERO); + switch (ifs->if_type) { + case IFT_ETHER: + case IFT_L2VLAN: + /* permitted interface types */ + break; + case IFT_GIF: + /* currently not supported */ + /* FALLTHRU */ + default: + return (EINVAL); + } + + bif = _MALLOC(sizeof (*bif), M_DEVBUF, M_NOWAIT | M_ZERO); if (bif == NULL) return (ENOMEM); bif->bif_ifp = ifs; - bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER; + ifnet_reference(ifs); + bif->bif_ifflags = IFBIF_LEARNING | IFBIF_DISCOVER; #if HAS_IF_CAP bif->bif_savedcaps = ifs->if_capenable; #endif /* HAS_IF_CAP */ bif->bif_sc = sc; - ifnet_reference(ifs); + /* Allow the first Ethernet member to define the MTU */ + if (TAILQ_EMPTY(&sc->sc_iflist)) + sc->sc_ifp->if_mtu = ifs->if_mtu; + else if (sc->sc_ifp->if_mtu != ifs->if_mtu) { + printf("%s: %s: invalid MTU for %s", __func__, + sc->sc_ifp->if_xname, + ifs->if_xname); + return (EINVAL); + } + + /* + * Assign the interface's MAC address to the bridge if it's the first + * member and the MAC address of the bridge has not been changed from + * the default (randomly) generated one. + */ + if (bridge_inherit_mac && TAILQ_EMPTY(&sc->sc_iflist) && + !memcmp(IF_LLADDR(sc->sc_ifp), sc->sc_defaddr, ETHER_ADDR_LEN)) { + bcopy(IF_LLADDR(ifs), eaddr, ETHER_ADDR_LEN); + sc->sc_ifaddr = ifs; + ifnet_reference(ifs); /* for sc_ifaddr */ + lladdr_changed = 1; + } ifs->if_bridge = sc; #if BRIDGESTP bstp_create(&sc->sc_stp, &bif->bif_stp, bif->bif_ifp); #endif /* BRIDGESTP */ + /* * XXX: XLOCK HERE!?! */ @@ -1946,13 +2262,15 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) bridge_mutecaps(sc); #endif /* HAS_IF_CAP */ + bridge_set_tso(sc); + + /* + * Place the interface into promiscuous mode. + */ switch (ifs->if_type) { case IFT_ETHER: case IFT_L2VLAN: - /* - * Place the interface into promiscuous mode. - */ error = ifnet_set_promiscuous(ifs, 1); if (error) { /* Ignore error when device is not up */ @@ -1960,19 +2278,25 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) goto out; error = 0; } else { - bif->bif_promisc = 1; + bif->bif_flags |= BIFF_PROMISC; } break; - case IFT_GIF: - break; - default: - error = EINVAL; - goto out; + break; } - /* + /* + * The new member may change the link status of the bridge interface + */ + if (interface_media_active(ifs)) + bif->bif_flags |= BIFF_MEDIA_ACTIVE; + else + bif->bif_flags &= ~BIFF_MEDIA_ACTIVE; + + event_code = bridge_updatelinkstatus(sc); + + /* * Respect lock ordering with DLIL lock for the following operations */ BRIDGE_UNLOCK(sc); @@ -1989,13 +2313,13 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) #endif /* BRIDGE_MEMBER_OUT_FILTER */ iff.iff_event = bridge_iff_event; iff.iff_detached = bridge_iff_detached; - error = iflt_attach(ifs, &iff, &bif->bif_iff_ref); + error = dlil_attach_filter(ifs, &iff, &bif->bif_iff_ref, DLIL_IFF_TSO); if (error != 0) { printf("%s: iflt_attach failed %d\n", __func__, error); BRIDGE_LOCK(sc); goto out; } - bif->bif_filter_attached = 1; + bif->bif_flags |= BIFF_FILTER_ATTACHED; /* * install an dummy "bridge" protocol @@ -2008,7 +2332,14 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) goto out; } } - bif->bif_proto_attached = 1; + bif->bif_flags |= BIFF_PROTO_ATTACHED; + + if (lladdr_changed && + (error = ifnet_set_lladdr(bifp, eaddr, ETHER_ADDR_LEN)) != 0) + printf("%s: ifnet_set_lladdr failed %d\n", __func__, error); + + if (event_code != 0) + bridge_link_event(bifp, event_code); BRIDGE_LOCK(sc); @@ -2035,8 +2366,9 @@ bridge_ioctl_del(struct bridge_softc *sc, void *arg) } static int -bridge_ioctl_purge(__unused struct bridge_softc *sc, __unused void *arg) +bridge_ioctl_purge(struct bridge_softc *sc, void *arg) { +#pragma unused(sc, arg) return (0); } @@ -2052,7 +2384,7 @@ bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg) return (ENOENT); bp = &bif->bif_stp; - req->ifbr_ifsflags = bif->bif_flags; + req->ifbr_ifsflags = bif->bif_ifflags; req->ifbr_state = bp->bp_state; req->ifbr_priority = bp->bp_priority; req->ifbr_path_cost = bp->bp_path_cost; @@ -2101,13 +2433,13 @@ bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg) #if BRIDGESTP if (req->ifbr_ifsflags & IFBIF_STP) { - if ((bif->bif_flags & IFBIF_STP) == 0) { + if ((bif->bif_ifflags & IFBIF_STP) == 0) { error = bstp_enable(&bif->bif_stp); if (error) return (error); } } else { - if ((bif->bif_flags & IFBIF_STP) != 0) + if ((bif->bif_ifflags & IFBIF_STP) != 0) bstp_disable(&bif->bif_stp); } @@ -2123,7 +2455,7 @@ bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg) #endif /* !BRIDGESTP */ /* Save the bits relating to the bridge */ - bif->bif_flags = req->ifbr_ifsflags & IFBIFMASK; + bif->bif_ifflags = req->ifbr_ifsflags & IFBIFMASK; return (0); @@ -2150,8 +2482,7 @@ bridge_ioctl_gcache(struct bridge_softc *sc, void *arg) return (0); } - -#define BRIDGE_IOCTL_GIFS do { \ +#define BRIDGE_IOCTL_GIFS do { \ struct bridge_iflist *bif; \ struct ifbreq breq; \ char *buf, *outbuf; \ @@ -2181,8 +2512,7 @@ bridge_ioctl_gcache(struct bridge_softc *sc, void *arg) break; \ \ snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \ - "%s%d", ifnet_name(bif->bif_ifp), \ - ifnet_unit(bif->bif_ifp)); \ + "%s", bif->bif_ifp->if_xname); \ /* Fill in the ifbreq structure */ \ error = bridge_ioctl_gifflags(sc, &breq); \ if (error) \ @@ -2197,9 +2527,8 @@ bridge_ioctl_gcache(struct bridge_softc *sc, void *arg) break; \ \ snprintf(breq.ifbr_ifsname, sizeof (breq.ifbr_ifsname), \ - "%s%d", ifnet_name(bif->bif_ifp), \ - ifnet_unit(bif->bif_ifp)); \ - breq.ifbr_ifsflags = bif->bif_flags; \ + "%s", bif->bif_ifp->if_xname); \ + breq.ifbr_ifsflags = bif->bif_ifflags; \ breq.ifbr_portno = bif->bif_ifp->if_index & 0xfff; \ memcpy(buf, &breq, sizeof (breq)); \ count++; \ @@ -2236,12 +2565,11 @@ bridge_ioctl_gifs32(struct bridge_softc *sc, void *arg) return (error); } - -#define BRIDGE_IOCTL_RTS do { \ +#define BRIDGE_IOCTL_RTS do { \ struct bridge_rtnode *brt; \ char *buf, *outbuf; \ unsigned int count, buflen, len; \ - struct timespec now; \ + unsigned long now; \ \ if (bac->ifbac_len == 0) \ return (0); \ @@ -2262,16 +2590,15 @@ bridge_ioctl_gifs32(struct bridge_softc *sc, void *arg) LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { \ if (len < sizeof (bareq)) \ goto out; \ - snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname), \ - "%s%d", ifnet_name(brt->brt_ifp), \ - ifnet_unit(brt->brt_ifp)); \ + snprintf(bareq.ifba_ifsname, sizeof (bareq.ifba_ifsname), \ + "%s", brt->brt_ifp->if_xname); \ memcpy(bareq.ifba_dst, brt->brt_addr, sizeof (brt->brt_addr)); \ bareq.ifba_vlan = brt->brt_vlan; \ if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { \ - nanouptime(&now); \ - if ((unsigned long)now.tv_sec < brt->brt_expire) \ + now = (unsigned long) net_uptime(); \ + if (now < brt->brt_expire) \ bareq.ifba_expire = \ - brt->brt_expire - now.tv_sec; \ + brt->brt_expire - now; \ } else \ bareq.ifba_expire = 0; \ bareq.ifba_flags = brt->brt_flags; \ @@ -2283,7 +2610,7 @@ bridge_ioctl_gifs32(struct bridge_softc *sc, void *arg) } \ out: \ BRIDGE_UNLOCK(sc); \ - bac->ifbac_len = sizeof (bareq) * count; \ + bac->ifbac_len = sizeof (bareq) * count; \ error = copyout(outbuf, bac->ifbac_req, bac->ifbac_len); \ BRIDGE_LOCK(sc); \ _FREE(outbuf, M_TEMP); \ @@ -2581,19 +2908,21 @@ bridge_ioctl_addspan(struct bridge_softc *sc, void *arg) switch (ifs->if_type) { case IFT_ETHER: - case IFT_GIF: case IFT_L2VLAN: break; + case IFT_GIF: + /* currently not supported */ + /* FALLTHRU */ default: return (EINVAL); } - bif = _MALLOC(sizeof (*bif), M_DEVBUF, M_NOWAIT|M_ZERO); + bif = _MALLOC(sizeof (*bif), M_DEVBUF, M_NOWAIT | M_ZERO); if (bif == NULL) return (ENOMEM); bif->bif_ifp = ifs; - bif->bif_flags = IFBIF_SPAN; + bif->bif_ifflags = IFBIF_SPAN; ifnet_reference(bif->bif_ifp); @@ -2625,7 +2954,7 @@ bridge_ioctl_delspan(struct bridge_softc *sc, void *arg) return (0); } -#define BRIDGE_IOCTL_GBPARAM do { \ +#define BRIDGE_IOCTL_GBPARAM do { \ struct bstp_state *bs = &sc->sc_stp; \ struct bstp_port *root_port; \ \ @@ -2679,7 +3008,7 @@ bridge_ioctl_grte(struct bridge_softc *sc, void *arg) return (0); } -#define BRIDGE_IOCTL_GIFSSTP do { \ +#define BRIDGE_IOCTL_GIFSSTP do { \ struct bridge_iflist *bif; \ struct bstp_port *bp; \ struct ifbpstpreq bpreq; \ @@ -2688,7 +3017,7 @@ bridge_ioctl_grte(struct bridge_softc *sc, void *arg) \ count = 0; \ TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { \ - if ((bif->bif_flags & IFBIF_STP) != 0) \ + if ((bif->bif_ifflags & IFBIF_STP) != 0) \ count++; \ } \ \ @@ -2710,7 +3039,7 @@ bridge_ioctl_grte(struct bridge_softc *sc, void *arg) if (len < sizeof (bpreq)) \ break; \ \ - if ((bif->bif_flags & IFBIF_STP) == 0) \ + if ((bif->bif_ifflags & IFBIF_STP) == 0) \ continue; \ \ bp = &bif->bif_stp; \ @@ -2795,23 +3124,22 @@ bridge_ifdetach(struct bridge_iflist *bif, struct ifnet *ifp) struct bridge_softc *sc = ifp->if_bridge; #if BRIDGE_DEBUG - printf("%s: %s%d\n", __func__, ifnet_name(ifp), ifnet_unit(ifp)); -#endif + if (if_bridge_debug & BR_DBGF_LIFECYCLE) + printf("%s: %s\n", __func__, ifp->if_xname); +#endif /* BRIDGE_DEBUG */ /* Check if the interface is a bridge member */ if (sc != NULL) { BRIDGE_LOCK(sc); - bif = bridge_lookup_member_if(sc, ifp); if (bif != NULL) bridge_delete_member(sc, bif, 1); - BRIDGE_UNLOCK(sc); return; } /* Check if the interface is a span port */ - lck_mtx_lock(bridge_list_mtx); + lck_mtx_lock(&bridge_list_mtx); LIST_FOREACH(sc, &bridge_list, sc_list) { BRIDGE_LOCK(sc); TAILQ_FOREACH(bif, &sc->sc_spanlist, bif_next) @@ -2819,60 +3147,288 @@ bridge_ifdetach(struct bridge_iflist *bif, struct ifnet *ifp) bridge_delete_span(sc, bif); break; } - BRIDGE_UNLOCK(sc); } - lck_mtx_unlock(bridge_list_mtx); + lck_mtx_unlock(&bridge_list_mtx); } /* - * bridge_init: + * interface_media_active: * - * Initialize a bridge interface. + * Tells if an interface media is active. */ static int -bridge_init(struct ifnet *ifp) +interface_media_active(struct ifnet *ifp) { - struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc; - struct timespec ts; - errno_t error; - - BRIDGE_LOCK_ASSERT(sc); - - if ((ifnet_flags(ifp) & IFF_RUNNING)) - return (0); + struct ifmediareq ifmr; + int status = 0; - ts.tv_sec = bridge_rtable_prune_period; - ts.tv_nsec = 0; - bsd_timeout(bridge_timer, sc, &ts); - - error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING); -#if BRIDGESTP - if (error == 0) - bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */ -#endif /* BRIDGESTP */ + bzero(&ifmr, sizeof(ifmr)); + if (ifnet_ioctl(ifp, 0, SIOCGIFMEDIA, &ifmr) == 0) { + if ((ifmr.ifm_status & IFM_AVALID) && ifmr.ifm_count > 0) + status = ifmr.ifm_status & IFM_ACTIVE ? 1 : 0; + } - return (error); + return (status); } /* - * bridge_ifstop: + * bridge_updatelinkstatus: * - * Stop the bridge interface. + * Update the media active status of the bridge based on the + * media active status of its member. + * If changed, return the corresponding onf/off link event. */ -static void -bridge_ifstop(struct ifnet *ifp, __unused int disable) +static u_int32_t +bridge_updatelinkstatus(struct bridge_softc *sc) { - struct bridge_softc *sc = ifp->if_softc; - - BRIDGE_LOCK_ASSERT(sc); + struct bridge_iflist *bif; + int active_member = 0; + u_int32_t event_code = 0; - if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) - return; + BRIDGE_LOCK_ASSERT_HELD(sc); - bsd_untimeout(bridge_timer, sc); -#if BRIDGESTP - bstp_stop(&sc->sc_stp); + /* + * Find out if we have an active interface + */ + TAILQ_FOREACH(bif, &sc->sc_iflist, bif_next) { + if (bif->bif_flags & BIFF_MEDIA_ACTIVE) { + active_member = 1; + break; + } + } + + if (active_member && !(sc->sc_flags & SCF_MEDIA_ACTIVE)) { + sc->sc_flags |= SCF_MEDIA_ACTIVE; + event_code = KEV_DL_LINK_ON; + } else if (!active_member && (sc->sc_flags & SCF_MEDIA_ACTIVE)) { + sc->sc_flags &= ~SCF_MEDIA_ACTIVE; + event_code = KEV_DL_LINK_OFF; + } + + return (event_code); +} + +/* + * bridge_iflinkevent: + */ +static void +bridge_iflinkevent(struct ifnet *ifp) +{ + struct bridge_softc *sc = ifp->if_bridge; + struct bridge_iflist *bif; + u_int32_t event_code = 0; + +#if BRIDGE_DEBUG + if (if_bridge_debug & BR_DBGF_LIFECYCLE) + printf("%s: %s\n", __func__, ifp->if_xname); +#endif /* BRIDGE_DEBUG */ + + /* Check if the interface is a bridge member */ + if (sc == NULL) + return; + + BRIDGE_LOCK(sc); + bif = bridge_lookup_member_if(sc, ifp); + if (bif != NULL) { + if (interface_media_active(ifp)) + bif->bif_flags |= BIFF_MEDIA_ACTIVE; + else + bif->bif_flags &= ~BIFF_MEDIA_ACTIVE; + + event_code = bridge_updatelinkstatus(sc); + } + BRIDGE_UNLOCK(sc); + + if (event_code != 0) + bridge_link_event(sc->sc_ifp, event_code); +} + +/* + * bridge_delayed_callback: + * + * Makes a delayed call + */ +static void +bridge_delayed_callback(void *param) +{ + struct bridge_delayed_call *call = (struct bridge_delayed_call *)param; + struct bridge_softc *sc = call->bdc_sc; + +#if BRIDGE_DEBUG_DELAYED_CALLBACK + if (bridge_delayed_callback_delay > 0) { + struct timespec ts; + + ts.tv_sec = bridge_delayed_callback_delay; + ts.tv_nsec = 0; + + printf("%s: sleeping for %d seconds\n", + __func__, bridge_delayed_callback_delay); + + msleep(&bridge_delayed_callback_delay, NULL, PZERO, + __func__, &ts); + + printf("%s: awoken\n", __func__); + } +#endif /* BRIDGE_DEBUG_DELAYED_CALLBACK */ + + BRIDGE_LOCK(sc); + +#if BRIDGE_DEBUG + if (if_bridge_debug & BR_DBGF_DELAYED_CALL) + printf("%s: %s call 0x%llx flags 0x%x\n", __func__, + sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call), + call->bdc_flags); +#endif /* BRIDGE_DEBUG */ + + if (call->bdc_flags & BDCF_CANCELLING) { + wakeup(call); + } else { + if ((sc->sc_flags & SCF_DETACHING) == 0) + (*call->bdc_func)(sc); + } + call->bdc_flags &= ~BDCF_OUTSTANDING; + BRIDGE_UNLOCK(sc); +} + +/* + * bridge_schedule_delayed_call: + * + * Schedule a function to be called on a separate thread + * The actual call may be scheduled to run at a given time or ASAP. + */ +static void +bridge_schedule_delayed_call(struct bridge_delayed_call *call) +{ + uint64_t deadline = 0; + struct bridge_softc *sc = call->bdc_sc; + + BRIDGE_LOCK_ASSERT_HELD(sc); + + if ((sc->sc_flags & SCF_DETACHING) || + (call->bdc_flags & (BDCF_OUTSTANDING | BDCF_CANCELLING))) + return; + + if (call->bdc_ts.tv_sec || call->bdc_ts.tv_nsec) { + nanoseconds_to_absolutetime( + (uint64_t)call->bdc_ts.tv_sec * NSEC_PER_SEC + + call->bdc_ts.tv_nsec, &deadline); + clock_absolutetime_interval_to_deadline(deadline, &deadline); + } + + call->bdc_flags = BDCF_OUTSTANDING; + +#if BRIDGE_DEBUG + if (if_bridge_debug & BR_DBGF_DELAYED_CALL) + printf("%s: %s call 0x%llx flags 0x%x\n", __func__, + sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call), + call->bdc_flags); +#endif /* BRIDGE_DEBUG */ + + thread_call_func_delayed((thread_call_func_t)bridge_delayed_callback, + call, deadline); +} + +/* + * bridge_cancel_delayed_call: + * + * Cancel a queued or running delayed call. + * If call is running, does not return until the call is done to + * prevent race condition with the brigde interface getting destroyed + */ +static void +bridge_cancel_delayed_call(struct bridge_delayed_call *call) +{ + boolean_t result; + struct bridge_softc *sc = call->bdc_sc; + + /* + * The call was never scheduled + */ + if (sc == NULL) + return; + + BRIDGE_LOCK_ASSERT_HELD(sc); + + call->bdc_flags |= BDCF_CANCELLING; + + while (call->bdc_flags & BDCF_OUTSTANDING) { +#if BRIDGE_DEBUG + if (if_bridge_debug & BR_DBGF_DELAYED_CALL) + printf("%s: %s call 0x%llx flags 0x%x\n", __func__, + sc->sc_if_xname, (uint64_t)VM_KERNEL_ADDRPERM(call), + call->bdc_flags); +#endif /* BRIDGE_DEBUG */ + result = thread_call_func_cancel( + (thread_call_func_t)bridge_delayed_callback, call, FALSE); + + if (result) { + /* + * We managed to dequeue the delayed call + */ + call->bdc_flags &= ~BDCF_OUTSTANDING; + } else { + /* + * Wait for delayed call do be done running + */ + msleep(call, &sc->sc_mtx, PZERO, __func__, NULL); + } + } + call->bdc_flags &= ~BDCF_CANCELLING; +} + +/* + * bridge_init: + * + * Initialize a bridge interface. + */ +static int +bridge_init(struct ifnet *ifp) +{ + struct bridge_softc *sc = (struct bridge_softc *)ifp->if_softc; + errno_t error; + + BRIDGE_LOCK_ASSERT_HELD(sc); + + if ((ifnet_flags(ifp) & IFF_RUNNING)) + return (0); + + error = ifnet_set_flags(ifp, IFF_RUNNING, IFF_RUNNING); + + /* + * Calling bridge_aging_timer() is OK as there are no entries to + * age so we're just going to arm the timer + */ + bridge_aging_timer(sc); + +#if BRIDGESTP + if (error == 0) + bstp_init(&sc->sc_stp); /* Initialize Spanning Tree */ +#endif /* BRIDGESTP */ + + return (error); +} + +/* + * bridge_ifstop: + * + * Stop the bridge interface. + */ +static void +bridge_ifstop(struct ifnet *ifp, int disable) +{ +#pragma unused(disable) + struct bridge_softc *sc = ifp->if_softc; + + BRIDGE_LOCK_ASSERT_HELD(sc); + + if ((ifnet_flags(ifp) & IFF_RUNNING) == 0) + return; + + bridge_cancel_delayed_call(&sc->sc_aging_timer); + +#if BRIDGESTP + bstp_stop(&sc->sc_stp); #endif /* BRIDGESTP */ bridge_rtflush(sc, IFBF_FLUSHDYN); @@ -2911,6 +3467,8 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m) mflags = m->m_flags; m->m_flags |= M_PROTO1; /* set to avoid loops */ + bridge_finalize_cksum(dst_ifp, m); + #if HAS_IF_CAP /* * If underlying interface can not do VLAN tag insertion itself @@ -2920,9 +3478,8 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m) (dst_ifp->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) { m = ether_vlanencap(m, m->m_pkthdr.ether_vtag); if (m == NULL) { - printf("%s: %s%d: unable to prepend VLAN " - "header\n", __func__, ifnet_name(dst_ifp), - ifnet_unit(dst_ifp)); + printf("%s: %s: unable to prepend VLAN " + "header\n", __func__, dst_ifp->if_xname); (void) ifnet_stat_increment_out(dst_ifp, 0, 0, 1); continue; @@ -2979,11 +3536,7 @@ bridge_dummynet(struct mbuf *m, struct ifnet *ifp) return; } - if (PFIL_HOOKED(&inet_pfil_hook) -#ifdef INET6 - || PFIL_HOOKED(&inet6_pfil_hook) -#endif - ) { + if (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6) { if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0) return; if (m == NULL) @@ -3006,18 +3559,18 @@ bridge_dummynet(struct mbuf *m, struct ifnet *ifp) * enqueue or free the mbuf before returning. */ static int -bridge_member_output(struct ifnet *ifp, struct mbuf *m, - __unused struct sockaddr *sa, __unused struct rtentry *rt) +bridge_member_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, + struct rtentry *rt) { +#pragma unused(sa, rt) struct ether_header *eh; struct ifnet *dst_if; struct bridge_softc *sc; uint16_t vlan; #if BRIDGE_DEBUG - if (if_bridge_debug) - printf("%s: ifp %p %s%d\n", __func__, ifp, ifnet_name(ifp), - ifnet_unit(ifp)); + if (if_bridge_debug & BR_DBGF_OUTPPUT) + printf("%s: ifp %s\n", __func__, ifp->if_xname); #endif /* BRIDGE_DEBUG */ if (m->m_len < ETHER_HDR_LEN) { @@ -3087,7 +3640,7 @@ bridge_member_output(struct ifnet *ifp, struct mbuf *m, * tree, make sure the port is in a state that * allows forwarding. */ - if (dst_if != ifp && (bif->bif_flags & IFBIF_STP) && + if (dst_if != ifp && (bif->bif_ifflags & IFBIF_STP) && bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) continue; @@ -3129,301 +3682,6 @@ sendunicast: } #endif /* BRIDGE_MEMBER_OUT_FILTER */ -#if APPLE_BRIDGE_HWCKSUM_SUPPORT -static struct mbuf * -bridge_fix_txcsum(struct mbuf *m) -{ - /* - * basic tests indicate that the vast majority of packets being - * processed here have an Ethernet header mbuf pre-pended to them - * (the first case below) - * - * the second highest are those where the Ethernet and IP/TCP/UDP - * headers are all in one mbuf (second case below) - * - * the third case has, in fact, never hit for me -- although if I - * comment out the first two cases, that code works for them, so I - * consider it a decent general solution - */ - int amt = ETHER_HDR_LEN; - int hlen = M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data); - int off = M_CSUM_DATA_IPv4_OFFSET(m->m_pkthdr.csum_data); - - /* - * NOTE we should never get vlan-attached packets here; - * support for those COULD be added, but we don't use them - * and it really kinda slows things down to worry about them - */ - -#ifdef DIAGNOSTIC - if (m_tag_find(m, PACKET_TAG_VLAN, NULL) != NULL) { - printf("%s: transmitting packet tagged with VLAN?\n", __func__); - KASSERT(0); - m_freem(m); - return (NULL); - } -#endif - - if (m->m_pkthdr.csum_flags & M_CSUM_IPv4) { - amt += hlen; - } - if (m->m_pkthdr.csum_flags & M_CSUM_TCPv4) { - amt += off + sizeof (uint16_t); - } - - if (m->m_pkthdr.csum_flags & M_CSUM_UDPv4) { - amt += off + sizeof (uint16_t); - } - - if (m->m_len == ETHER_HDR_LEN) { - /* - * this is the case where there's an Ethernet header in an - * mbuf the first mbuf is the Ethernet header -- just strip - * it off and do the checksum - */ - /* set up m_ip so the cksum operations work */ - struct mbuf *m_ip = m->m_next; - - /* APPLE MODIFICATION 22 Apr 2008 - * Clear the m_tag list before setting - * M_PKTHDR. - * - * If this m_buf chain was extended via M_PREPEND(), then - * m_ip->m_pkthdr is identical to m->m_pkthdr (see - * M_MOVE_PKTHDR()). The only thing preventing access to this - * invalid packet header data is the fact that the M_PKTHDR - * flag is clear, i.e., m_ip->m_flag & M_PKTHDR == 0, but we're - * about to set the M_PKTHDR flag, so to be safe we initialize, - * more accurately, we clear, m_ip->m_pkthdr.tags via - * m_tag_init(). - * - * Suppose that we do not do this; if m_pullup(), below, fails, - * then m_ip will be freed along with m_ip->m_pkthdr.tags, but - * we will also free m soon after, via m_freem(), and - * consequently attempt to free m->m_pkthdr.tags in the - * process. The problem is that m->m_pkthdr.tags will have - * already been freed by virtue of being equal to - * m_ip->m_pkthdr.tags. Attempts to dereference - * m->m_pkthdr.tags in m_tag_delete_chain() will result in a - * panic. - */ - m_tag_init(m_ip); - /* END MODIFICATION */ - m_ip->m_flags |= M_PKTHDR; - m_ip->m_pkthdr.csum_flags = m->m_pkthdr.csum_flags; - m_ip->m_pkthdr.csum_data = m->m_pkthdr.csum_data; - m_ip->m_pkthdr.len = m->m_pkthdr.len - ETHER_HDR_LEN; - - /* - * set up the header mbuf so we can prepend it - * back on again later - */ - m->m_pkthdr.csum_flags = 0; - m->m_pkthdr.csum_data = 0; - m->m_pkthdr.len = ETHER_HDR_LEN; - m->m_next = NULL; - - /* now do the checksums we need -- first IP */ - if (m_ip->m_pkthdr.csum_flags & M_CSUM_IPv4) { - /* - * make sure the IP header (or at least the part with - * the cksum) is there - */ - m_ip = m_pullup(m_ip, sizeof (struct ip)); - if (m_ip == NULL) { - printf("%s: failed to flatten header\n", - __func__); - m_freem(m); - return (NULL); - } - - /* now do the checksum */ - { - struct ip *ip = mtod(m_ip, struct ip *); - ip->ip_sum = in_cksum(m_ip, hlen); - -#ifdef VERY_VERY_VERY_DIAGNOSTIC - printf("%s: performed IPv4 checksum\n", - __func__); -#endif - } - } - - /* now do a TCP or UDP delayed checksum */ - if (m_ip->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { - in_delayed_cksum(m_ip); - -#ifdef VERY_VERY_VERY_DIAGNOSTIC - printf("%s: performed TCPv4/UDPv4 checksum\n", - __func__); -#endif - } - - /* now attach the ethernet header back onto the IP packet */ - m->m_next = m_ip; - m->m_pkthdr.len += m_length(m_ip); - - /* - * clear the M_PKTHDR flags on the ip packet (again, - * we re-attach later) - */ - m_ip->m_flags &= ~M_PKTHDR; - - /* and clear any csum flags */ - m->m_pkthdr.csum_flags &= - ~(M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4); - } else if (m->m_len >= amt) { - /* - * everything fits in the first mbuf, so futz with - * m->m_data, m->m_len and m->m_pkthdr.len to make it work - */ - m->m_len -= ETHER_HDR_LEN; - m->m_data += ETHER_HDR_LEN; - m->m_pkthdr.len -= ETHER_HDR_LEN; - - /* now do the checksums we need -- first IP */ - if (m->m_pkthdr.csum_flags & M_CSUM_IPv4) { - struct ip *ip = mtod(m, struct ip *); - ip->ip_sum = in_cksum(m, hlen); - -#ifdef VERY_VERY_VERY_DIAGNOSTIC - printf("%s: performed IPv4 checksum\n", __func__); -#endif - } - - // now do a TCP or UDP delayed checksum - if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { - in_delayed_cksum(m); - -#ifdef VERY_VERY_VERY_DIAGNOSTIC - printf("%s: performed TCPv4/UDPv4 checksum\n", - __func__); -#endif - } - - /* now stick the ethernet header back on */ - m->m_len += ETHER_HDR_LEN; - m->m_data -= ETHER_HDR_LEN; - m->m_pkthdr.len += ETHER_HDR_LEN; - - /* and clear any csum flags */ - m->m_pkthdr.csum_flags &= - ~(M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4); - } else { - struct mbuf *m_ip; - - /* - * general case -- need to simply split it off and deal - * first, calculate how much needs to be made writable - * (we may have a read-only mbuf here) - */ - hlen = M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data); -#if PARANOID - off = M_CSUM_DATA_IPv4_OFFSET(m->m_pkthdr.csum_data); - - if (m->m_pkthdr.csum_flags & M_CSUM_IPv4) { - amt += hlen; - } - - if (m->m_pkthdr.csum_flags & M_CSUM_TCPv4) { - amt += sizeof (struct tcphdr *); - amt += off; - } - - if (m->m_pkthdr.csum_flags & M_CSUM_UDPv4) { - amt += sizeof (struct udphdr *); - amt += off; - } -#endif - - /* - * now split the ethernet header off of the IP packet - * (we'll re-attach later) - */ - m_ip = m_split(m, ETHER_HDR_LEN, M_NOWAIT); - if (m_ip == NULL) { - printf("%s: could not split ether header\n", __func__); - - m_freem(m); - return (NULL); - } - -#if PARANOID - /* - * make sure that the IP packet is writable - * for the portion we need - */ - if (m_makewritable(&m_ip, 0, amt, M_DONTWAIT) != 0) { - printf("%s: could not make %d bytes writable\n", - __func__, amt); - - m_freem(m); - m_freem(m_ip); - return (NULL); - } -#endif - - m_ip->m_pkthdr.csum_flags = m->m_pkthdr.csum_flags; - m_ip->m_pkthdr.csum_data = m->m_pkthdr.csum_data; - - m->m_pkthdr.csum_flags = 0; - m->m_pkthdr.csum_data = 0; - - /* now do the checksums we need -- first IP */ - if (m_ip->m_pkthdr.csum_flags & M_CSUM_IPv4) { - /* - * make sure the IP header (or at least the part - * with the cksum) is there - */ - m_ip = m_pullup(m_ip, sizeof (struct ip)); - if (m_ip == NULL) { - printf("%s: failed to flatten header\n", - __func__); - m_freem(m); - return (NULL); - } - - /* now do the checksum */ - { - struct ip *ip = mtod(m_ip, struct ip *); - ip->ip_sum = in_cksum(m_ip, hlen); - -#ifdef VERY_VERY_VERY_DIAGNOSTIC - printf("%s: performed IPv4 checksum\n", - __func__); -#endif - } - } - - /* now do a TCP or UDP delayed checksum */ - if (m_ip->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) { - in_delayed_cksum(m_ip); - -#ifdef VERY_VERY_VERY_DIAGNOSTIC - printf("%s: performed TCPv4/UDPv4 checksum\n", - __func__); -#endif - } - - // now attach the ethernet header back onto the IP packet - m->m_next = m_ip; - m->m_pkthdr.len += m_length(m_ip); - - /* - * clear the M_PKTHDR flags on the ip packet - * (again, we re-attach later) - */ - m_ip->m_flags &= ~M_PKTHDR; - - /* and clear any csum flags */ - m->m_pkthdr.csum_flags &= - ~(M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4); - } - - return (m); -} -#endif - /* * Output callback. * @@ -3442,37 +3700,10 @@ bridge_output(struct ifnet *ifp, struct mbuf *m) dst_if = NULL; BRIDGE_LOCK(sc); - if (!(m->m_flags & (M_BCAST|M_MCAST))) { + if (!(m->m_flags & (M_BCAST|M_MCAST))) dst_if = bridge_rtlookup(sc, eh->ether_dhost, 0); - } - -#if APPLE_BRIDGE_HWCKSUM_SUPPORT - /* - * APPLE MODIFICATION - if the packet needs a checksum - * (i.e., checksum has been deferred for HW support) - * AND the destination interface doesn't support HW - * checksums, then we need to fix-up the checksum here - */ - if ((m->m_pkthdr.csum_flags & - (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_IPv4)) && - (dst_if == NULL || - (dst_if->if_csum_flags_tx & m->m_pkthdr.csum_flags) != - m->m_pkthdr.csum_flags)) { - m = bridge_fix_txcsum(m); - if (m == NULL) { - BRIDGE_UNLOCK(sc); - return (0); - } - } -#else - if (eh->ether_type == htons(ETHERTYPE_IP)) - mbuf_outbound_finalize(m, PF_INET, sizeof (*eh)); - else - m->m_pkthdr.csum_flags = 0; -#endif /* APPLE_BRIDGE_HWCKSUM_SUPPORT */ - atomic_add_64(&ifp->if_obytes, m->m_pkthdr.len); - atomic_add_64(&ifp->if_opackets, 1); + (void) ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0); #if NBPFILTER > 0 if (sc->sc_bpf_output) @@ -3490,6 +3721,67 @@ bridge_output(struct ifnet *ifp, struct mbuf *m) return (error); } +static void +bridge_finalize_cksum(struct ifnet *ifp, struct mbuf *m) +{ + struct ether_header *eh = mtod(m, struct ether_header *); + uint32_t sw_csum, hwcap; + + if (ifp != NULL) + hwcap = (ifp->if_hwassist | CSUM_DATA_VALID); + else + hwcap = 0; + + /* do in software what the hardware cannot */ + sw_csum = m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_FLAGS(hwcap); + sw_csum &= IF_HWASSIST_CSUM_MASK; + + switch (ntohs(eh->ether_type)) { + case ETHERTYPE_IP: + if ((hwcap & CSUM_PARTIAL) && !(sw_csum & CSUM_DELAY_DATA) && + (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) { + if (m->m_pkthdr.csum_flags & CSUM_TCP) { + uint16_t start = + sizeof (*eh) + sizeof (struct ip); + uint16_t ulpoff = + m->m_pkthdr.csum_data & 0xffff; + m->m_pkthdr.csum_flags |= + (CSUM_DATA_VALID | CSUM_PARTIAL); + m->m_pkthdr.csum_tx_stuff = (ulpoff + start); + m->m_pkthdr.csum_tx_start = start; + } else { + sw_csum |= (CSUM_DELAY_DATA & + m->m_pkthdr.csum_flags); + } + } + (void) in_finalize_cksum(m, sizeof (*eh), sw_csum); + break; + +#if INET6 + case ETHERTYPE_IPV6: + if ((hwcap & CSUM_PARTIAL) && + !(sw_csum & CSUM_DELAY_IPV6_DATA) && + (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) { + if (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) { + uint16_t start = + sizeof (*eh) + sizeof (struct ip6_hdr); + uint16_t ulpoff = + m->m_pkthdr.csum_data & 0xffff; + m->m_pkthdr.csum_flags |= + (CSUM_DATA_VALID | CSUM_PARTIAL); + m->m_pkthdr.csum_tx_stuff = (ulpoff + start); + m->m_pkthdr.csum_tx_start = start; + } else { + sw_csum |= (CSUM_DELAY_IPV6_DATA & + m->m_pkthdr.csum_flags); + } + } + (void) in6_finalize_cksum(m, sizeof (*eh), -1, -1, sw_csum); + break; +#endif /* INET6 */ + } +} + /* * bridge_start: * @@ -3524,7 +3816,7 @@ bridge_start(struct ifnet *ifp) */ static void bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, - struct mbuf *m) + struct mbuf *m) { struct bridge_iflist *dbif; struct ifnet *src_if, *dst_if, *ifp; @@ -3533,12 +3825,12 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, uint8_t *dst; int error; - lck_mtx_assert(sc->sc_mtx, LCK_MTX_ASSERT_OWNED); + BRIDGE_LOCK_ASSERT_HELD(sc); #if BRIDGE_DEBUG - if (if_bridge_debug) - printf("%s: %s%d m%p\n", __func__, ifnet_name(sc->sc_ifp), - ifnet_unit(sc->sc_ifp), m); + if (if_bridge_debug & BR_DBGF_OUTPPUT) + printf("%s: %s m 0x%llx\n", __func__, sc->sc_ifp->if_xname, + (uint64_t)VM_KERNEL_ADDRPERM(m)); #endif /* BRIDGE_DEBUG */ src_if = m->m_pkthdr.rcvif; @@ -3548,7 +3840,7 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, vlan = VLANTAGOF(m); - if ((sbif->bif_flags & IFBIF_STP) && + if ((sbif->bif_ifflags & IFBIF_STP) && sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) goto drop; @@ -3556,7 +3848,7 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, dst = eh->ether_dhost; /* If the interface is learning, record the address. */ - if (sbif->bif_flags & IFBIF_LEARNING) { + if (sbif->bif_ifflags & IFBIF_LEARNING) { error = bridge_rtupdate(sc, eh->ether_shost, vlan, sbif, 0, IFBAF_DYNAMIC); /* @@ -3567,7 +3859,7 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, goto drop; } - if ((sbif->bif_flags & IFBIF_STP) != 0 && + if ((sbif->bif_ifflags & IFBIF_STP) != 0 && sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING) goto drop; @@ -3622,11 +3914,7 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, #if defined(PFIL_HOOKS) /* run the packet filter */ - if (PFIL_HOOKED(&inet_pfil_hook) -#ifdef INET6 - || PFIL_HOOKED(&inet6_pfil_hook) -#endif /* INET6 */ - ) { + if (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6) { BRIDGE_UNLOCK(sc); if (bridge_pfil(&m, ifp, src_if, PFIL_IN) != 0) return; @@ -3637,13 +3925,7 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, #endif /* PFIL_HOOKS */ if (dst_if == NULL) { - /* - * Clear any in-bound checksum flags for this packet. - */ - mbuf_inbound_modified(m); - bridge_broadcast(sc, src_if, m, 1); - return; } @@ -3660,15 +3942,15 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, goto drop; /* Private segments can not talk to each other */ - if (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE) + if (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE) goto drop; - if ((dbif->bif_flags & IFBIF_STP) && + if ((dbif->bif_ifflags & IFBIF_STP) && dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) goto drop; #if HAS_DHCPRA_MASK - /* APPLE MODIFICATION */ + /* APPLE MODIFICATION */ if ((dst_if->if_extflags & IFEXTF_DHCPRA_MASK) != 0) { m = ip_xdhcpra_output(dst_if, m); if (!m) { @@ -3681,11 +3963,7 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, BRIDGE_UNLOCK(sc); #if defined(PFIL_HOOKS) - if (PFIL_HOOKED(&inet_pfil_hook) -#ifdef INET6 - || PFIL_HOOKED(&inet6_pfil_hook) -#endif - ) { + if (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6) { if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0) return; if (m == NULL) @@ -3693,11 +3971,6 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, } #endif /* PFIL_HOOKS */ - /* - * Clear any in-bound checksum flags for this packet. - */ - mbuf_inbound_modified(m); - (void) bridge_enqueue(sc, dst_if, m); return; @@ -3728,7 +4001,7 @@ ether_ntop(char *buf, size_t len, const u_char *ap) * bridging if it is not for us. */ __private_extern__ errno_t -bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header) +bridge_input(struct ifnet *ifp, struct mbuf *m, void *frame_header) { struct bridge_softc *sc = ifp->if_bridge; struct bridge_iflist *bif, *bif2; @@ -3739,18 +4012,18 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header) int error; #if BRIDGE_DEBUG - if (if_bridge_debug) - printf("%s: %s%d from %s%d m %p data %p\n", __func__, - ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp), - ifnet_name(ifp), ifnet_unit(ifp), m, mbuf_data(m)); + if (if_bridge_debug & BR_DBGF_INPUT) + printf("%s: %s from %s m 0x%llx data 0x%llx\n", __func__, + sc->sc_ifp->if_xname, ifp->if_xname, + (uint64_t)VM_KERNEL_ADDRPERM(m), + (uint64_t)VM_KERNEL_ADDRPERM(mbuf_data(m))); #endif /* BRIDGE_DEBUG */ if ((sc->sc_ifp->if_flags & IFF_RUNNING) == 0) { #if BRIDGE_DEBUG - if (if_bridge_debug) - printf("%s: %s%d not running passing along\n", - __func__, ifnet_name(sc->sc_ifp), - ifnet_unit(sc->sc_ifp)); + if (if_bridge_debug & BR_DBGF_INPUT) + printf("%s: %s not running passing along\n", + __func__, sc->sc_ifp->if_xname); #endif /* BRIDGE_DEBUG */ return (0); } @@ -3786,10 +4059,9 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header) if (bif == NULL) { BRIDGE_UNLOCK(sc); #if BRIDGE_DEBUG - if (if_bridge_debug) - printf("%s: %s%d bridge_lookup_member_if failed\n", - __func__, ifnet_name(sc->sc_ifp), - ifnet_unit(sc->sc_ifp)); + if (if_bridge_debug & BR_DBGF_INPUT) + printf("%s: %s bridge_lookup_member_if failed\n", + __func__, sc->sc_ifp->if_xname); #endif /* BRIDGE_DEBUG */ return (0); } @@ -3801,9 +4073,9 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header) if (m->m_flags & (M_BCAST|M_MCAST)) { #if BRIDGE_DEBUG - if (if_bridge_debug) + if (if_bridge_debug & BR_DBGF_MCAST) if ((m->m_flags & M_MCAST)) - printf("%s: mulicast: " + printf("%s: multicast: " "%02x:%02x:%02x:%02x:%02x:%02x\n", __func__, eh->ether_dhost[0], eh->ether_dhost[1], @@ -3826,7 +4098,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header) } } - if ((bif->bif_flags & IFBIF_STP) && + if ((bif->bif_ifflags & IFBIF_STP) && bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) { BRIDGE_UNLOCK(sc); return (0); @@ -3856,8 +4128,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header) * here from ether_input as a bridge is never a member of a * bridge. */ - KASSERT(bifp->if_bridge == NULL, - ("loop created in bridge_input")); + VERIFY(bifp->if_bridge == NULL); mc2 = m_dup(m, M_DONTWAIT); if (mc2 != NULL) { /* Keep the layer3 header aligned */ @@ -3865,9 +4136,9 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header) mc2 = m_copyup(mc2, i, ETHER_ALIGN); } if (mc2 != NULL) { - // mark packet as arriving on the bridge + /* mark packet as arriving on the bridge */ mc2->m_pkthdr.rcvif = bifp; - mc2->m_pkthdr.header = mbuf_data(mc2); + mc2->m_pkthdr.pkt_hdr = mbuf_data(mc2); #if NBPFILTER > 0 if (sc->sc_bpf_input) @@ -3882,10 +4153,9 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header) mbuf_pkthdr_len(mc2), 0); #if BRIDGE_DEBUG - if (if_bridge_debug) - printf("%s: %s%d mcast for us\n", __func__, - ifnet_name(sc->sc_ifp), - ifnet_unit(sc->sc_ifp)); + if (if_bridge_debug & BR_DBGF_MCAST) + printf("%s: %s mcast for us\n", __func__, + sc->sc_ifp->if_xname); #endif /* BRIDGE_DEBUG */ dlil_input_packet_list(bifp, mc2); @@ -3895,35 +4165,34 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header) return (0); } - if ((bif->bif_flags & IFBIF_STP) && + if ((bif->bif_ifflags & IFBIF_STP) && bif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) { BRIDGE_UNLOCK(sc); return (0); } #ifdef DEV_CARP -# define OR_CARP_CHECK_WE_ARE_DST(iface) \ - || ((iface)->if_carp \ - && carp_forus((iface)->if_carp, eh->ether_dhost)) -# define OR_CARP_CHECK_WE_ARE_SRC(iface) \ - || ((iface)->if_carp \ - && carp_forus((iface)->if_carp, eh->ether_shost)) +#define CARP_CHECK_WE_ARE_DST(iface) \ + ((iface)->if_carp &&\ + carp_forus((iface)->if_carp, eh->ether_dhost)) +#define CARP_CHECK_WE_ARE_SRC(iface) \ + ((iface)->if_carp &&\ + carp_forus((iface)->if_carp, eh->ether_shost)) #else -# define OR_CARP_CHECK_WE_ARE_DST(iface) -# define OR_CARP_CHECK_WE_ARE_SRC(iface) +#define CARP_CHECK_WE_ARE_DST(iface) 0 +#define CARP_CHECK_WE_ARE_SRC(iface) 0 #endif #ifdef INET6 -# define OR_PFIL_HOOKED_INET6 \ - || PFIL_HOOKED(&inet6_pfil_hook) +#define PFIL_HOOKED_INET6 PFIL_HOOKED(&inet6_pfil_hook) #else -# define OR_PFIL_HOOKED_INET6 +#define PFIL_HOOKED_INET6 0 #endif #if defined(PFIL_HOOKS) #define PFIL_PHYS(sc, ifp, m) do { \ if (pfil_local_phys && \ - (PFIL_HOOKED(&inet_pfil_hook) OR_PFIL_HOOKED_INET6)) { \ + (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6)) { \ if (bridge_pfil(&m, NULL, ifp, \ PFIL_IN) != 0 || m == NULL) { \ BRIDGE_UNLOCK(sc); \ @@ -3939,14 +4208,14 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header) if ((iface)->if_type == IFT_GIF) \ continue; \ /* It is destined for us. */ \ - if (memcmp(ifnet_lladdr((iface)), eh->ether_dhost, \ - ETHER_ADDR_LEN) == 0 OR_CARP_CHECK_WE_ARE_DST((iface))) { \ + if (memcmp(IF_LLADDR((iface)), eh->ether_dhost, \ + ETHER_ADDR_LEN) == 0 || CARP_CHECK_WE_ARE_DST((iface))) { \ if ((iface)->if_type == IFT_BRIDGE) { \ BRIDGE_BPF_MTAP_INPUT(sc, m); \ /* Filter on the physical interface. */ \ PFIL_PHYS(sc, iface, m); \ } \ - if (bif->bif_flags & IFBIF_LEARNING) { \ + if (bif->bif_ifflags & IFBIF_LEARNING) { \ error = bridge_rtupdate(sc, eh->ether_shost, \ vlan, bif, 0, IFBAF_DYNAMIC); \ if (error && bif->bif_addrmax) { \ @@ -3960,8 +4229,8 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header) } \ \ /* We just received a packet that we sent out. */ \ - if (memcmp(ifnet_lladdr((iface)), eh->ether_shost, \ - ETHER_ADDR_LEN) == 0 OR_CARP_CHECK_WE_ARE_SRC((iface))) { \ + if (memcmp(IF_LLADDR((iface)), eh->ether_shost, \ + ETHER_ADDR_LEN) == 0 || CARP_CHECK_WE_ARE_SRC((iface))) { \ BRIDGE_UNLOCK(sc); \ return (EJUSTRETURN); \ } @@ -3974,8 +4243,8 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header) * bridge, and return the packet back to ether_input for * local processing. */ - if (memcmp(eh->ether_dhost, ifnet_lladdr(bifp), - ETHER_ADDR_LEN) == 0 OR_CARP_CHECK_WE_ARE_DST(bifp)) { + if (memcmp(eh->ether_dhost, IF_LLADDR(bifp), + ETHER_ADDR_LEN) == 0 || CARP_CHECK_WE_ARE_DST(bifp)) { /* Mark the packet as arriving on the bridge interface */ (void) mbuf_pkthdr_setrcvif(m, bifp); @@ -3986,14 +4255,9 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header) * address is valid and not multicast, record * the address. */ - if ((bif->bif_flags & IFBIF_LEARNING) != 0 && - ETHER_IS_MULTICAST(eh->ether_shost) == 0 && - (eh->ether_shost[0] | eh->ether_shost[1] | - eh->ether_shost[2] | eh->ether_shost[3] | - eh->ether_shost[4] | eh->ether_shost[5]) != 0) { + if (bif->bif_ifflags & IFBIF_LEARNING) (void) bridge_rtupdate(sc, eh->ether_shost, vlan, bif, 0, IFBAF_DYNAMIC); - } BRIDGE_BPF_MTAP_INPUT(sc, m); @@ -4006,9 +4270,9 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header) BRIDGE_UNLOCK(sc); #if BRIDGE_DEBUG - if (if_bridge_debug) - printf("%s: %s%d packet for bridge\n", __func__, - ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp)); + if (if_bridge_debug & BR_DBGF_INPUT) + printf("%s: %s packet for bridge\n", __func__, + sc->sc_ifp->if_xname); #endif /* BRIDGE_DEBUG */ dlil_input_packet_list(bifp, m); @@ -4029,7 +4293,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header) * bridge's own MAC address, because the bridge may be * using the SAME MAC address as one of its interfaces */ - if (memcmp(eh->ether_dhost, ifnet_lladdr(ifp), ETHER_ADDR_LEN) == 0) { + if (memcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) { #ifdef VERY_VERY_VERY_DIAGNOSTIC printf("%s: not forwarding packet bound for member " @@ -4044,9 +4308,8 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header) GRAB_OUR_PACKETS(bif2->bif_ifp) } -#undef OR_CARP_CHECK_WE_ARE_DST -#undef OR_CARP_CHECK_WE_ARE_SRC -#undef OR_PFIL_HOOKED_INET6 +#undef CARP_CHECK_WE_ARE_DST +#undef CARP_CHECK_WE_ARE_SRC #undef GRAB_OUR_PACKETS /* @@ -4070,7 +4333,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m, __unused void *frame_header) */ static void bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, - struct mbuf *m, int runfilt) + struct mbuf *m, int runfilt) { #ifndef PFIL_HOOKS #pragma unused(runfilt) @@ -4090,11 +4353,7 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, #ifdef PFIL_HOOKS /* Filter on the bridge interface before broadcasting */ - if (runfilt && (PFIL_HOOKED(&inet_pfil_hook) -#ifdef INET6 - || PFIL_HOOKED(&inet6_pfil_hook) -#endif /* INET6 */ - )) { + if (runfilt && (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6)) { if (bridge_pfil(&m, sc->sc_ifp, NULL, PFIL_OUT) != 0) goto out; if (m == NULL) @@ -4108,14 +4367,15 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, continue; /* Private segments can not talk to each other */ - if (sbif && (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE)) + if (sbif && + (sbif->bif_ifflags & dbif->bif_ifflags & IFBIF_PRIVATE)) continue; - if ((dbif->bif_flags & IFBIF_STP) && + if ((dbif->bif_ifflags & IFBIF_STP) && dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) continue; - if ((dbif->bif_flags & IFBIF_DISCOVER) == 0 && + if ((dbif->bif_ifflags & IFBIF_DISCOVER) == 0 && (m->m_flags & (M_BCAST|M_MCAST)) == 0) continue; @@ -4140,11 +4400,8 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, * pointer so we do not redundantly filter on the bridge for * each interface we broadcast on. */ - if (runfilt && (PFIL_HOOKED(&inet_pfil_hook) -#ifdef INET6 - || PFIL_HOOKED(&inet6_pfil_hook) -#endif - )) { + if (runfilt && + (PFIL_HOOKED(&inet_pfil_hook) || PFIL_HOOKED_INET6)) { if (used == 0) { /* Keep the layer3 header aligned */ int i = min(mc->m_pkthdr.len, max_protohdr); @@ -4207,7 +4464,6 @@ bridge_span(struct bridge_softc *sc, struct mbuf *m) } - /* * bridge_rtupdate: * @@ -4215,12 +4471,12 @@ bridge_span(struct bridge_softc *sc, struct mbuf *m) */ static int bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan, - struct bridge_iflist *bif, int setflags, uint8_t flags) + struct bridge_iflist *bif, int setflags, uint8_t flags) { struct bridge_rtnode *brt; int error; - BRIDGE_LOCK_ASSERT(sc); + BRIDGE_LOCK_ASSERT_HELD(sc); /* Check the source address is valid and not multicast. */ if (ETHER_IS_MULTICAST(dst) || @@ -4256,8 +4512,9 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan, brt = zalloc_noblock(bridge_rtnode_pool); if (brt == NULL) return (ENOMEM); + bzero(brt, sizeof(struct bridge_rtnode)); - if (bif->bif_flags & IFBIF_STICKY) + if (bif->bif_ifflags & IFBIF_STICKY) brt->brt_flags = IFBAF_STICKY; else brt->brt_flags = IFBAF_DYNAMIC; @@ -4272,6 +4529,14 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan, } brt->brt_dst = bif; bif->bif_addrcnt++; +#if BRIDGE_DEBUG + if (if_bridge_debug & BR_DBGF_RT_TABLE) + printf("%s: added %02x:%02x:%02x:%02x:%02x:%02x " + "on %s count %u hashsize %u\n", __func__, + dst[0], dst[1], dst[2], dst[3], dst[4], dst[5], + sc->sc_ifp->if_xname, sc->sc_brtcnt, + sc->sc_rthash_size); +#endif } if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC && @@ -4282,10 +4547,10 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, uint16_t vlan, } if ((flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { - struct timespec now; + unsigned long now; - nanouptime(&now); - brt->brt_expire = now.tv_sec + sc->sc_brttimeout; + now = (unsigned long) net_uptime(); + brt->brt_expire = now + sc->sc_brttimeout; } if (setflags) brt->brt_flags = flags; @@ -4304,7 +4569,7 @@ bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan) { struct bridge_rtnode *brt; - BRIDGE_LOCK_ASSERT(sc); + BRIDGE_LOCK_ASSERT_HELD(sc); if ((brt = bridge_rtnode_lookup(sc, addr, vlan)) == NULL) return (NULL); @@ -4324,7 +4589,7 @@ bridge_rttrim(struct bridge_softc *sc) { struct bridge_rtnode *brt, *nbrt; - BRIDGE_LOCK_ASSERT(sc); + BRIDGE_LOCK_ASSERT_HELD(sc); /* Make sure we actually need to do this. */ if (sc->sc_brtcnt <= sc->sc_brtmax) @@ -4345,27 +4610,23 @@ bridge_rttrim(struct bridge_softc *sc) } /* - * bridge_timer: + * bridge_aging_timer: * - * Aging timer for the bridge. + * Aging periodic timer for the bridge routing table. */ static void -bridge_timer(void *arg) +bridge_aging_timer(struct bridge_softc *sc) { - struct bridge_softc *sc = arg; - - BRIDGE_LOCK(sc); + BRIDGE_LOCK_ASSERT_HELD(sc); bridge_rtage(sc); - BRIDGE_UNLOCK(sc); - - if (sc->sc_ifp->if_flags & IFF_RUNNING) { - struct timespec ts; - - ts.tv_sec = bridge_rtable_prune_period; - ts.tv_nsec = 0; - bsd_timeout(bridge_timer, sc, &ts); + if ((sc->sc_ifp->if_flags & IFF_RUNNING) && + (sc->sc_flags & SCF_DETACHING) == 0) { + sc->sc_aging_timer.bdc_sc = sc; + sc->sc_aging_timer.bdc_func = bridge_aging_timer; + sc->sc_aging_timer.bdc_ts.tv_sec = bridge_rtable_prune_period; + bridge_schedule_delayed_call(&sc->sc_aging_timer); } } @@ -4378,15 +4639,15 @@ static void bridge_rtage(struct bridge_softc *sc) { struct bridge_rtnode *brt, *nbrt; + unsigned long now; - BRIDGE_LOCK_ASSERT(sc); + BRIDGE_LOCK_ASSERT_HELD(sc); + + now = (unsigned long) net_uptime(); LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) { if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { - struct timespec now; - - nanouptime(&now); - if ((unsigned long)now.tv_sec >= brt->brt_expire) + if (now >= brt->brt_expire) bridge_rtnode_destroy(sc, brt); } } @@ -4402,7 +4663,7 @@ bridge_rtflush(struct bridge_softc *sc, int full) { struct bridge_rtnode *brt, *nbrt; - BRIDGE_LOCK_ASSERT(sc); + BRIDGE_LOCK_ASSERT_HELD(sc); LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) { if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) @@ -4421,7 +4682,7 @@ bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr, uint16_t vlan) struct bridge_rtnode *brt; int found = 0; - BRIDGE_LOCK_ASSERT(sc); + BRIDGE_LOCK_ASSERT_HELD(sc); /* * If vlan is zero then we want to delete for all vlans so the lookup @@ -4445,7 +4706,7 @@ bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full) { struct bridge_rtnode *brt, *nbrt; - BRIDGE_LOCK_ASSERT(sc); + BRIDGE_LOCK_ASSERT_HELD(sc); LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) { if (brt->brt_ifp == ifp && (full || @@ -4462,23 +4723,140 @@ bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp, int full) static int bridge_rtable_init(struct bridge_softc *sc) { - int i; + u_int32_t i; sc->sc_rthash = _MALLOC(sizeof (*sc->sc_rthash) * BRIDGE_RTHASH_SIZE, - M_DEVBUF, M_NOWAIT); - if (sc->sc_rthash == NULL) + M_DEVBUF, M_WAITOK | M_ZERO); + if (sc->sc_rthash == NULL) { + printf("%s: no memory\n", __func__); return (ENOMEM); + } + sc->sc_rthash_size = BRIDGE_RTHASH_SIZE; - for (i = 0; i < BRIDGE_RTHASH_SIZE; i++) + for (i = 0; i < sc->sc_rthash_size; i++) LIST_INIT(&sc->sc_rthash[i]); - sc->sc_rthash_key = random(); + sc->sc_rthash_key = RandomULong(); LIST_INIT(&sc->sc_rtlist); return (0); } +/* + * bridge_rthash_delayed_resize: + * + * Resize the routing table hash on a delayed thread call. + */ +static void +bridge_rthash_delayed_resize(struct bridge_softc *sc) +{ + u_int32_t new_rthash_size; + struct _bridge_rtnode_list *new_rthash = NULL; + struct _bridge_rtnode_list *old_rthash = NULL; + u_int32_t i; + struct bridge_rtnode *brt; + int error = 0; + + BRIDGE_LOCK_ASSERT_HELD(sc); + + /* + * Four entries per hash bucket is our ideal load factor + */ + if (sc->sc_brtcnt < sc->sc_rthash_size * 4) + goto out; + + /* + * Doubling the number of hash buckets may be too simplistic + * especially when facing a spike of new entries + */ + new_rthash_size = sc->sc_rthash_size * 2; + + sc->sc_flags |= SCF_RESIZING; + BRIDGE_UNLOCK(sc); + + new_rthash = _MALLOC(sizeof (*sc->sc_rthash) * new_rthash_size, + M_DEVBUF, M_WAITOK | M_ZERO); + + BRIDGE_LOCK(sc); + sc->sc_flags &= ~SCF_RESIZING; + + if (new_rthash == NULL) { + error = ENOMEM; + goto out; + } + if ((sc->sc_flags & SCF_DETACHING)) { + error = ENODEV; + goto out; + } + /* + * Fail safe from here on + */ + old_rthash = sc->sc_rthash; + sc->sc_rthash = new_rthash; + sc->sc_rthash_size = new_rthash_size; + + /* + * Get a new key to force entries to be shuffled around to reduce + * the likelihood they will land in the same buckets + */ + sc->sc_rthash_key = RandomULong(); + + for (i = 0; i < sc->sc_rthash_size; i++) + LIST_INIT(&sc->sc_rthash[i]); + + LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { + LIST_REMOVE(brt, brt_hash); + (void) bridge_rtnode_hash(sc, brt); + } +out: + if (error == 0) { +#if BRIDGE_DEBUG + if (if_bridge_debug & BR_DBGF_RT_TABLE) + printf("%s: %s new size %u\n", __func__, + sc->sc_ifp->if_xname, sc->sc_rthash_size); +#endif /* BRIDGE_DEBUG */ + if (old_rthash) + _FREE(old_rthash, M_DEVBUF); + } else { +#if BRIDGE_DEBUG + printf("%s: %s failed %d\n", __func__, + sc->sc_ifp->if_xname, error); +#endif /* BRIDGE_DEBUG */ + if (new_rthash != NULL) + _FREE(new_rthash, M_DEVBUF); + } +} + +/* + * Resize the number of hash buckets based on the load factor + * Currently only grow + * Failing to resize the hash table is not fatal + */ +static void +bridge_rthash_resize(struct bridge_softc *sc) +{ + BRIDGE_LOCK_ASSERT_HELD(sc); + + if ((sc->sc_flags & SCF_DETACHING) || (sc->sc_flags & SCF_RESIZING)) + return; + + /* + * Four entries per hash bucket is our ideal load factor + */ + if (sc->sc_brtcnt < sc->sc_rthash_size * 4) + return; + /* + * Hard limit on the size of the routing hash table + */ + if (sc->sc_rthash_size >= bridge_rtable_hash_size_max) + return; + + sc->sc_resize_call.bdc_sc = sc; + sc->sc_resize_call.bdc_func = bridge_rthash_delayed_resize; + bridge_schedule_delayed_call(&sc->sc_resize_call); +} + /* * bridge_rtable_fini: * @@ -4487,10 +4865,12 @@ bridge_rtable_init(struct bridge_softc *sc) static void bridge_rtable_fini(struct bridge_softc *sc) { - KASSERT(sc->sc_brtcnt == 0, ("%s: %d bridge routes referenced", __func__, sc->sc_brtcnt)); - _FREE(sc->sc_rthash, M_DEVBUF); + if (sc->sc_rthash) { + _FREE(sc->sc_rthash, M_DEVBUF); + sc->sc_rthash = NULL; + } } /* @@ -4524,7 +4904,7 @@ bridge_rthash(struct bridge_softc *sc, const uint8_t *addr) mix(a, b, c); - return (c & BRIDGE_RTHASH_MASK); + return (c & BRIDGE_RTHASH_MASK(sc)); } #undef mix @@ -4549,13 +4929,13 @@ bridge_rtnode_addr_cmp(const uint8_t *a, const uint8_t *b) */ static struct bridge_rtnode * bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr, - uint16_t vlan) + uint16_t vlan) { struct bridge_rtnode *brt; uint32_t hash; int dir; - BRIDGE_LOCK_ASSERT(sc); + BRIDGE_LOCK_ASSERT_HELD(sc); hash = bridge_rthash(sc, addr); LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) { @@ -4570,19 +4950,20 @@ bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr, } /* - * bridge_rtnode_insert: + * bridge_rtnode_hash: * - * Insert the specified bridge node into the route table. We - * assume the entry is not already in the table. + * Insert the specified bridge node into the route hash table. + * This is used when adding a new node or to rehash when resizing + * the hash table */ static int -bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt) +bridge_rtnode_hash(struct bridge_softc *sc, struct bridge_rtnode *brt) { struct bridge_rtnode *lbrt; uint32_t hash; int dir; - BRIDGE_LOCK_ASSERT(sc); + BRIDGE_LOCK_ASSERT_HELD(sc); hash = bridge_rthash(sc, brt->brt_addr); @@ -4594,8 +4975,18 @@ bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt) do { dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr); - if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) + if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) { +#if BRIDGE_DEBUG + if (if_bridge_debug & BR_DBGF_RT_TABLE) + printf("%s: %s EEXIST " + "%02x:%02x:%02x:%02x:%02x:%02x\n", + __func__, sc->sc_ifp->if_xname, + brt->brt_addr[0], brt->brt_addr[1], + brt->brt_addr[2], brt->brt_addr[3], + brt->brt_addr[4], brt->brt_addr[5]); +#endif return (EEXIST); + } if (dir > 0) { LIST_INSERT_BEFORE(lbrt, brt, brt_hash); goto out; @@ -4607,14 +4998,38 @@ bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt) lbrt = LIST_NEXT(lbrt, brt_hash); } while (lbrt != NULL); -#ifdef DIAGNOSTIC - panic("bridge_rtnode_insert: impossible"); +#if BRIDGE_DEBUG + if (if_bridge_debug & BR_DBGF_RT_TABLE) + printf("%s: %s impossible %02x:%02x:%02x:%02x:%02x:%02x\n", + __func__, sc->sc_ifp->if_xname, + brt->brt_addr[0], brt->brt_addr[1], brt->brt_addr[2], + brt->brt_addr[3], brt->brt_addr[4], brt->brt_addr[5]); #endif out: + return (0); +} + +/* + * bridge_rtnode_insert: + * + * Insert the specified bridge node into the route table. We + * assume the entry is not already in the table. + */ +static int +bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt) +{ + int error; + + error = bridge_rtnode_hash(sc, brt); + if (error != 0) + return (error); + LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list); sc->sc_brtcnt++; + bridge_rthash_resize(sc); + return (0); } @@ -4626,7 +5041,7 @@ out: static void bridge_rtnode_destroy(struct bridge_softc *sc, struct bridge_rtnode *brt) { - BRIDGE_LOCK_ASSERT(sc); + BRIDGE_LOCK_ASSERT_HELD(sc); LIST_REMOVE(brt, brt_hash); @@ -4657,16 +5072,16 @@ bridge_rtable_expire(struct ifnet *ifp, int age) if (age == 0) { bridge_rtdelete(sc, ifp, IFBF_FLUSHDYN); } else { - LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { - struct timespec now; + unsigned long now; - nanouptime(&now); + now = (unsigned long) net_uptime(); + + LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) { /* Cap the expiry time to 'age' */ if (brt->brt_ifp == ifp && - brt->brt_expire > (unsigned long)now.tv_sec + age && + brt->brt_expire > now + age && (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) - brt->brt_expire = - (unsigned long)now.tv_sec + age; + brt->brt_expire = now + age; } } BRIDGE_UNLOCK(sc); @@ -4691,9 +5106,9 @@ bridge_state_change(struct ifnet *ifp, int state) }; if (log_stp) - log(LOG_NOTICE, "%s%d: state changed to %s on %s%d\n", - ifnet_name(sc->sc_ifp), ifnet_unit(sc->sc_ifp), - stpstates[state], ifnet_name(ifp), ifnet_unit(ifp)); + log(LOG_NOTICE, "%s: state changed to %s on %s\n", + sc->sc_ifp->if_xname, + stpstates[state], ifp->if_xname); } #endif /* BRIDGESTP */ @@ -4769,7 +5184,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) break; case ETHERTYPE_IP: -#ifdef INET6 +#if INET6 case ETHERTYPE_IPV6: #endif /* INET6 */ break; @@ -4801,7 +5216,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) case ETHERTYPE_IP: error = bridge_ip_checkbasic(mp); break; -#ifdef INET6 +#if INET6 case ETHERTYPE_IPV6: error = bridge_ip6_checkbasic(mp); break; @@ -4927,7 +5342,7 @@ ipfwpass: ip->ip_sum = in_cksum(*mp, hlen); break; -#ifdef INET6 +#if INET6 case ETHERTYPE_IPV6: if (pfil_bridge && dir == PFIL_OUT && bifp != NULL) error = pfil_run_hooks(&inet6_pfil_hook, mp, bifp, @@ -4983,7 +5398,6 @@ bad: return (error); } - /* * Perform basic checks on header size since * pfil assumes ip_input has already processed @@ -5086,7 +5500,7 @@ bad: return (-1); } -#ifdef INET6 +#if INET6 /* * Same as above, but for IPv6. * Cut-and-pasted from ip6_input.c. @@ -5148,7 +5562,7 @@ bad: */ static int bridge_fragment(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh, - int snap, struct llc *llc) + int snap, struct llc *llc) { struct mbuf *m0; struct ip *ip; @@ -5199,6 +5613,11 @@ out: } #endif /* PFIL_HOOKS */ +/* + * bridge_set_bpf_tap: + * + * Sets ups the BPF callbacks. + */ static errno_t bridge_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func bpf_callback) { @@ -5233,6 +5652,11 @@ bridge_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func bpf_callback) return (0); } +/* + * bridge_detach: + * + * Callback when interface has been detached. + */ static void bridge_detach(ifnet_t ifp) { @@ -5245,17 +5669,22 @@ bridge_detach(ifnet_t ifp) /* Tear down the routing table. */ bridge_rtable_fini(sc); - lck_mtx_lock(bridge_list_mtx); + lck_mtx_lock(&bridge_list_mtx); LIST_REMOVE(sc, sc_list); - lck_mtx_unlock(bridge_list_mtx); + lck_mtx_unlock(&bridge_list_mtx); ifnet_release(ifp); - lck_mtx_free(sc->sc_mtx, bridge_lock_grp); + lck_mtx_destroy(&sc->sc_mtx, bridge_lock_grp); _FREE(sc, M_DEVBUF); } +/* + * bridge_bpf_input: + * + * Invoke the input BPF callback if enabled + */ __private_extern__ errno_t bridge_bpf_input(ifnet_t ifp, struct mbuf *m) { @@ -5263,14 +5692,20 @@ bridge_bpf_input(ifnet_t ifp, struct mbuf *m) if (sc->sc_bpf_input) { if (mbuf_pkthdr_rcvif(m) != ifp) { - printf("%s: rcvif: %p != ifp %p\n", __func__, - mbuf_pkthdr_rcvif(m), ifp); + printf("%s: rcvif: 0x%llx != ifp 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(mbuf_pkthdr_rcvif(m)), + (uint64_t)VM_KERNEL_ADDRPERM(ifp)); } (*sc->sc_bpf_input)(ifp, m); } return (0); } +/* + * bridge_bpf_output: + * + * Invoke the output BPF callback if enabled + */ __private_extern__ errno_t bridge_bpf_output(ifnet_t ifp, struct mbuf *m) { @@ -5281,3 +5716,35 @@ bridge_bpf_output(ifnet_t ifp, struct mbuf *m) } return (0); } + +/* + * bridge_link_event: + * + * Report a data link event on an interface + */ +static void +bridge_link_event(struct ifnet *ifp, u_int32_t event_code) +{ + struct { + struct kern_event_msg header; + u_int32_t unit; + char if_name[IFNAMSIZ]; + } event; + +#if BRIDGE_DEBUG + if (if_bridge_debug & BR_DBGF_LIFECYCLE) + printf("%s: %s event_code %u - %s\n", __func__, ifp->if_xname, + event_code, dlil_kev_dl_code_str(event_code)); +#endif /* BRIDGE_DEBUG */ + + bzero(&event, sizeof (event)); + event.header.total_size = sizeof (event); + event.header.vendor_code = KEV_VENDOR_APPLE; + event.header.kev_class = KEV_NETWORK_CLASS; + event.header.kev_subclass = KEV_DL_SUBCLASS; + event.header.event_code = event_code; + event.header.event_data[0] = ifnet_family(ifp); + event.unit = (u_int32_t)ifnet_unit(ifp); + strncpy(event.if_name, ifnet_name(ifp), IFNAMSIZ); + ifnet_event(ifp, &event.header); +} diff --git a/bsd/net/if_ether.h b/bsd/net/if_ether.h index a0235a74d..4cdc5d28b 100644 --- a/bsd/net/if_ether.h +++ b/bsd/net/if_ether.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -34,7 +34,7 @@ __BEGIN_DECLS /* Not exported */ -extern int ether_family_init(void) __attribute__((section("__TEXT, initcode"))); +extern int ether_family_init(void); /* * These functions may be used for an interface emulating an ethernet @@ -43,19 +43,24 @@ extern int ether_family_init(void) __attribute__((section("__TEXT, initcode"))); * functions when filling out the ifnet_init_params structure. */ errno_t ether_demux(ifnet_t interface, mbuf_t packet, char* header, - protocol_family_t *protocol); + protocol_family_t *protocol); errno_t ether_add_proto(ifnet_t interface, protocol_family_t protocol, - const struct ifnet_demux_desc *demux_list, - u_int32_t demux_count); + const struct ifnet_demux_desc *demux_list, u_int32_t demux_count); errno_t ether_del_proto(ifnet_t interface, protocol_family_t protocol); -errno_t ether_frameout(ifnet_t interface, mbuf_t *packet, - const struct sockaddr *dest, const char *dest_lladdr, - const char *frame_type #if KPI_INTERFACE_EMBEDDED - , - u_int32_t *prepend_len, u_int32_t *postpend_len -#endif /* KPI_INTERFACE_EMBEDDED */ - ); +errno_t ether_frameout(ifnet_t interface, mbuf_t *packet, + const struct sockaddr *dest, const char *dest_lladdr, + const char *frame_type, u_int32_t *prepend_len, u_int32_t *postpend_len); +#else /* !KPI_INTERFACE_EMBEDDED */ +errno_t ether_frameout(ifnet_t interface, mbuf_t *packet, + const struct sockaddr *dest, const char *dest_lladdr, + const char *frame_type); +#endif /* !KPI_INTERFACE_EMBEDDED */ +#ifdef KERNEL_PRIVATE +errno_t ether_frameout_extended(ifnet_t interface, mbuf_t *packet, + const struct sockaddr *dest, const char *dest_lladdr, + const char *frame_type, u_int32_t *prepend_len, u_int32_t *postpend_len); +#endif /* KERNEL_PRIVATE */ errno_t ether_ioctl(ifnet_t interface, u_int32_t command, void* data); errno_t ether_check_multi(ifnet_t ifp, const struct sockaddr *multicast); diff --git a/bsd/net/if_gif.c b/bsd/net/if_gif.c index c638758a2..d6608b77c 100644 --- a/bsd/net/if_gif.c +++ b/bsd/net/if_gif.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,11 +22,11 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* $FreeBSD: src/sys/net/if_gif.c,v 1.4.2.6 2001/07/24 19:10:18 brooks Exp $ */ -/* $KAME: if_gif.c,v 1.47 2001/05/01 05:28:42 itojun Exp $ */ +/* $FreeBSD: src/sys/net/if_gif.c,v 1.4.2.6 2001/07/24 19:10:18 brooks Exp $ */ +/* $KAME: if_gif.c,v 1.47 2001/05/01 05:28:42 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -82,6 +82,7 @@ #include #include #include +#include #include #include @@ -110,48 +111,47 @@ #include #endif -#define GIFNAME "gif" -#define GIFDEV "if_gif" -#define GIF_MAXUNIT 0x7fff /* ifp->if_unit is only 15 bits */ +#define GIFNAME "gif" +#define GIFDEV "if_gif" +#define GIF_MAXUNIT 0x7fff /* ifp->if_unit is only 15 bits */ -#ifndef __APPLE__ -static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface"); -#endif +/* gif lock variables */ +static lck_grp_t *gif_mtx_grp; +static lck_grp_attr_t *gif_mtx_grp_attr; +static lck_attr_t *gif_mtx_attr; +decl_lck_mtx_data(static, gif_mtx_data); +static lck_mtx_t *gif_mtx = &gif_mtx_data; TAILQ_HEAD(gifhead, gif_softc) gifs = TAILQ_HEAD_INITIALIZER(gifs); -#ifdef __APPLE__ -void gifattach(void); -static int gif_encapcheck(const struct mbuf*, int, int, void*); +static int gif_encapcheck(const struct mbuf *, int, int, void *); static errno_t gif_output(ifnet_t ifp, mbuf_t m); static errno_t gif_input(ifnet_t ifp, protocol_family_t protocol_family, - mbuf_t m, char *frame_header); + mbuf_t m, char *frame_header); static errno_t gif_ioctl(ifnet_t ifp, u_long cmd, void *data); -int ngif = 0; /* number of interfaces */ -#endif +static int ngif = 0; /* number of interfaces */ #if INET -struct protosw in_gif_protosw = -{ SOCK_RAW, 0, 0/*IPPROTO_IPV[46]*/, PR_ATOMIC|PR_ADDR, - in_gif_input, 0, 0, 0, - 0, - 0, 0, 0, 0, - 0, - &rip_usrreqs, - 0, rip_unlock, 0, {0, 0}, 0, {0} +static struct protosw in_gif_protosw = +{ + .pr_type = SOCK_RAW, + .pr_protocol = 0, /* IPPROTO_IPV[46] */ + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = in_gif_input, + .pr_usrreqs = &rip_usrreqs, + .pr_unlock = rip_unlock, }; #endif #if INET6 -struct ip6protosw in6_gif_protosw = -{ SOCK_RAW, 0, 0/*IPPROTO_IPV[46]*/, PR_ATOMIC|PR_ADDR, - in6_gif_input, 0, 0, 0, - 0, - 0, 0, 0, 0, - 0, - &rip6_usrreqs, - 0, rip_unlock, 0, {0, 0}, 0, {0} - +static struct ip6protosw in6_gif_protosw = +{ + .pr_type = SOCK_RAW, + .pr_protocol = 0, /* IPPROTO_IPV[46] */ + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = in6_gif_input, + .pr_usrreqs = &rip6_usrreqs, + .pr_unlock = rip_unlock, }; #endif @@ -159,8 +159,8 @@ static if_clone_t gif_cloner = NULL; static int gif_clone_create(struct if_clone *, uint32_t, void *); static int gif_clone_destroy(struct ifnet *); static void gif_delete_tunnel(struct gif_softc *); +static void gif_detach(struct ifnet *); -#ifdef __APPLE__ /* * Theory of operation: initially, one gif interface is created. * Any time a gif interface is configured, if there are no other @@ -172,91 +172,109 @@ static void gif_delete_tunnel(struct gif_softc *); */ /* GIF interface module support */ -static int gif_demux( - ifnet_t ifp, - __unused mbuf_t m, - __unused char *frame_header, - protocol_family_t *protocol_family) +static int +gif_demux( + ifnet_t ifp, + __unused mbuf_t m, + __unused char *frame_header, + protocol_family_t *protocol_family) { + struct gif_softc *sc = ifnet_softc(ifp); + + GIF_LOCK(sc); /* Only one protocol may be attached to a gif interface. */ - *protocol_family = ((struct gif_softc*)ifnet_softc(ifp))->gif_proto; - - return 0; + *protocol_family = sc->gif_proto; + GIF_UNLOCK(sc); + + return (0); } static errno_t gif_add_proto( - ifnet_t ifp, - protocol_family_t protocol_family, - __unused const struct ifnet_demux_desc *demux_array, - __unused u_int32_t demux_count) + ifnet_t ifp, + protocol_family_t protocol_family, + __unused const struct ifnet_demux_desc *demux_array, + __unused u_int32_t demux_count) { /* Only one protocol may be attached at a time */ - struct gif_softc* gif = ifnet_softc(ifp); + struct gif_softc *sc = ifnet_softc(ifp); - if (gif->gif_proto != 0) - printf("gif_add_proto: request add_proto for gif%d\n", ifnet_unit(ifp)); + GIF_LOCK(sc); + if (sc->gif_proto != 0) + printf("gif_add_proto: request add_proto for gif%d\n", + ifnet_unit(ifp)); - gif->gif_proto = protocol_family; + sc->gif_proto = protocol_family; + GIF_UNLOCK(sc); - return 0; + return (0); } static errno_t gif_del_proto( - ifnet_t ifp, - protocol_family_t protocol_family) + ifnet_t ifp, + protocol_family_t protocol_family) { - if (((struct gif_softc*)ifnet_softc(ifp))->gif_proto == protocol_family) - ((struct gif_softc*)ifnet_softc(ifp))->gif_proto = 0; - - return 0; + struct gif_softc *sc = ifnet_softc(ifp); + + GIF_LOCK(sc); + if (sc->gif_proto == protocol_family) + sc->gif_proto = 0; + GIF_UNLOCK(sc); + + return (0); } /* Glue code to attach inet to a gif interface through DLIL */ static errno_t gif_attach_proto_family( - ifnet_t ifp, - protocol_family_t protocol_family) + ifnet_t ifp, + protocol_family_t protocol_family) { - struct ifnet_attach_proto_param reg; - errno_t stat; + struct ifnet_attach_proto_param reg; + errno_t stat; - bzero(®, sizeof(reg)); - reg.input = gif_input; + bzero(®, sizeof (reg)); + reg.input = gif_input; - stat = ifnet_attach_protocol(ifp, protocol_family, ®); - if (stat && stat != EEXIST) { - printf("gif_attach_proto_family can't attach interface fam=%d\n", - protocol_family); - } + stat = ifnet_attach_protocol(ifp, protocol_family, ®); + if (stat && stat != EEXIST) { + printf("gif_attach_proto_family can't attach interface \ + fam=%d\n", protocol_family); + } - return stat; + return (stat); } -#endif - /* Function to setup the first gif interface */ -__private_extern__ void -gifattach(void) +void +gif_init(void) { errno_t result; struct ifnet_clone_params ifnet_clone_params; - struct if_clone *ifc = NULL; + struct if_clone *ifc = NULL; - /* Init the list of interfaces */ + /* Initialize the list of interfaces */ TAILQ_INIT(&gifs); + /* Initialize the gif global lock */ + gif_mtx_grp_attr = lck_grp_attr_alloc_init(); + gif_mtx_grp = lck_grp_alloc_init("gif", gif_mtx_grp_attr); + gif_mtx_attr = lck_attr_alloc_init(); + lck_mtx_init(gif_mtx, gif_mtx_grp, gif_mtx_attr); + /* Register protocol registration functions */ result = proto_register_plumber(PF_INET, APPLE_IF_FAM_GIF, - gif_attach_proto_family, NULL); + gif_attach_proto_family, NULL); if (result != 0) - printf("proto_register_plumber failed for AF_INET error=%d\n", result); - + printf("proto_register_plumber failed for AF_INET error=%d\n", + result); + result = proto_register_plumber(PF_INET6, APPLE_IF_FAM_GIF, - gif_attach_proto_family, NULL); + gif_attach_proto_family, NULL); if (result != 0) - printf("proto_register_plumber failed for AF_INET6 error=%d\n", result); + printf("proto_register_plumber failed for AF_INET6 error=%d\n", + result); ifnet_clone_params.ifc_name = "gif"; ifnet_clone_params.ifc_create = gif_clone_create; @@ -273,61 +291,82 @@ gifattach(void) static errno_t gif_set_bpf_tap( - ifnet_t ifp, - bpf_tap_mode mode, - bpf_packet_func callback) + ifnet_t ifp, + bpf_tap_mode mode, + bpf_packet_func callback) { - struct gif_softc *sc = ifnet_softc(ifp); - + struct gif_softc *sc = ifnet_softc(ifp); + + GIF_LOCK(sc); sc->tap_mode = mode; sc->tap_callback = callback; - - return 0; + GIF_UNLOCK(sc); + + return (0); } +static void +gif_detach(struct ifnet *ifp) +{ + struct gif_softc *sc = ifp->if_softc; + lck_mtx_destroy(&sc->gif_lock, gif_mtx_grp); + _FREE(ifp->if_softc, M_DEVBUF); + ifp->if_softc = NULL; + (void) ifnet_release(ifp); +} static int gif_clone_create(struct if_clone *ifc, uint32_t unit, __unused void *params) { - struct gif_softc *sc = NULL; - struct ifnet_init_params gif_init; - errno_t result = 0; + struct gif_softc *sc = NULL; + struct ifnet_init_params gif_init_params; + errno_t error = 0; + + lck_mtx_lock(gif_mtx); /* Can't create more than GIF_MAXUNIT */ - if (ngif >= GIF_MAXUNIT) - return (ENXIO); + if (ngif >= GIF_MAXUNIT) { + error = ENXIO; + goto done; + } - sc = _MALLOC(sizeof(struct gif_softc), M_DEVBUF, M_WAITOK); + sc = _MALLOC(sizeof (struct gif_softc), M_DEVBUF, M_WAITOK); if (sc == NULL) { - log(LOG_ERR, "gif_clone_create: failed to allocate gif%d\n", unit); - return ENOBUFS; + log(LOG_ERR, "gif_clone_create: failed to allocate gif%d\n", + unit); + error = ENOBUFS; + goto done; } - bzero(sc, sizeof(struct gif_softc)); + bzero(sc, sizeof (struct gif_softc)); /* use the interface name as the unique id for ifp recycle */ - snprintf(sc->gif_ifname, sizeof(sc->gif_ifname), "%s%d", - ifc->ifc_name, unit); - - bzero(&gif_init, sizeof(gif_init)); - gif_init.uniqueid = sc->gif_ifname; - gif_init.uniqueid_len = strlen(sc->gif_ifname); - gif_init.name = GIFNAME; - gif_init.unit = unit; - gif_init.type = IFT_GIF; - gif_init.family = IFNET_FAMILY_GIF; - gif_init.output = gif_output; - gif_init.demux = gif_demux; - gif_init.add_proto = gif_add_proto; - gif_init.del_proto = gif_del_proto; - gif_init.softc = sc; - gif_init.ioctl = gif_ioctl; - gif_init.set_bpf_tap = gif_set_bpf_tap; - - result = ifnet_allocate(&gif_init, &sc->gif_if); - if (result != 0) { - printf("gif_clone_create, ifnet_allocate failed - %d\n", result); + snprintf(sc->gif_ifname, sizeof (sc->gif_ifname), "%s%d", + ifc->ifc_name, unit); + + lck_mtx_init(&sc->gif_lock, gif_mtx_grp, gif_mtx_attr); + + bzero(&gif_init_params, sizeof (gif_init_params)); + gif_init_params.uniqueid = sc->gif_ifname; + gif_init_params.uniqueid_len = strlen(sc->gif_ifname); + gif_init_params.name = GIFNAME; + gif_init_params.unit = unit; + gif_init_params.type = IFT_GIF; + gif_init_params.family = IFNET_FAMILY_GIF; + gif_init_params.output = gif_output; + gif_init_params.demux = gif_demux; + gif_init_params.add_proto = gif_add_proto; + gif_init_params.del_proto = gif_del_proto; + gif_init_params.softc = sc; + gif_init_params.ioctl = gif_ioctl; + gif_init_params.set_bpf_tap = gif_set_bpf_tap; + gif_init_params.detach = gif_detach; + + error = ifnet_allocate(&gif_init_params, &sc->gif_if); + if (error != 0) { + printf("gif_clone_create, ifnet_allocate failed - %d\n", error); _FREE(sc, M_DEVBUF); - return ENOBUFS; + error = ENOBUFS; + goto done; } sc->encap_cookie4 = sc->encap_cookie6 = NULL; @@ -338,12 +377,13 @@ gif_clone_create(struct if_clone *ifc, uint32_t unit, __unused void *params) printf("%s: unable to attach encap4\n", if_name(sc->gif_if)); ifnet_release(sc->gif_if); FREE(sc, M_DEVBUF); - return ENOBUFS; + error = ENOBUFS; + goto done; } #endif #if INET6 sc->encap_cookie6 = encap_attach_func(AF_INET6, -1, - gif_encapcheck, (struct protosw*)&in6_gif_protosw, sc); + gif_encapcheck, (struct protosw *)&in6_gif_protosw, sc); if (sc->encap_cookie6 == NULL) { if (sc->encap_cookie4) { encap_detach(sc->encap_cookie4); @@ -352,7 +392,8 @@ gif_clone_create(struct if_clone *ifc, uint32_t unit, __unused void *params) printf("%s: unable to attach encap6\n", if_name(sc->gif_if)); ifnet_release(sc->gif_if); FREE(sc, M_DEVBUF); - return ENOBUFS; + error = ENOBUFS; + goto done; } #endif sc->gif_called = 0; @@ -362,9 +403,9 @@ gif_clone_create(struct if_clone *ifc, uint32_t unit, __unused void *params) /* turn off ingress filter */ sc->gif_if.if_flags |= IFF_LINK2; #endif - result = ifnet_attach(sc->gif_if, NULL); - if (result != 0) { - printf("gif_clone_create - ifnet_attach failed - %d\n", result); + error = ifnet_attach(sc->gif_if, NULL); + if (error != 0) { + printf("gif_clone_create - ifnet_attach failed - %d\n", error); ifnet_release(sc->gif_if); if (sc->encap_cookie4) { encap_detach(sc->encap_cookie4); @@ -375,51 +416,62 @@ gif_clone_create(struct if_clone *ifc, uint32_t unit, __unused void *params) sc->encap_cookie6 = NULL; } FREE(sc, M_DEVBUF); - return result; + goto done; } #if CONFIG_MACF_NET mac_ifnet_label_init(&sc->gif_if); #endif - bpfattach(sc->gif_if, DLT_NULL, sizeof(u_int)); + bpfattach(sc->gif_if, DLT_NULL, sizeof (u_int)); TAILQ_INSERT_TAIL(&gifs, sc, gif_link); ngif++; - return 0; +done: + lck_mtx_unlock(gif_mtx); + + return (error); } static int gif_clone_destroy(struct ifnet *ifp) { #if defined(INET) || defined(INET6) - int err = 0; + int error = 0; #endif struct gif_softc *sc = ifp->if_softc; + lck_mtx_lock(gif_mtx); TAILQ_REMOVE(&gifs, sc, gif_link); + ngif--; + GIF_LOCK(sc); gif_delete_tunnel(sc); #ifdef INET6 if (sc->encap_cookie6 != NULL) { - err = encap_detach(sc->encap_cookie6); - KASSERT(err == 0, ("gif_clone_destroy: Unexpected error detaching encap_cookie6")); + error = encap_detach(sc->encap_cookie6); + KASSERT(error == 0, ("gif_clone_destroy: Unexpected \ + error detaching encap_cookie6")); } #endif #ifdef INET if (sc->encap_cookie4 != NULL) { - err = encap_detach(sc->encap_cookie4); - KASSERT(err == 0, ("gif_clone_destroy: Unexpected error detaching encap_cookie4")); + error = encap_detach(sc->encap_cookie4); + KASSERT(error == 0, ("gif_clone_destroy: Unexpected \ + error detaching encap_cookie4")); } #endif - err = ifnet_set_flags(ifp, 0, IFF_UP); - if (err != 0) { - printf("gif_clone_destroy: ifnet_set_flags failed %d\n", err); + error = ifnet_set_flags(ifp, 0, IFF_UP); + if (error != 0) { + printf("gif_clone_destroy: ifnet_set_flags failed %d\n", error); } - err = ifnet_detach(ifp); - if (err != 0) - panic("gif_clone_destroy: ifnet_detach(%p) failed %d\n", ifp, err); - FREE(sc, M_DEVBUF); - ngif--; - return 0; + error = ifnet_detach(ifp); + if (error != 0) + panic("gif_clone_destroy: ifnet_detach(%p) failed %d\n", ifp, + error); + + GIF_UNLOCK(sc); + lck_mtx_unlock(gif_mtx); + + return (0); } static int @@ -429,19 +481,21 @@ gif_encapcheck( int proto, void *arg) { + int error = 0; struct ip ip; struct gif_softc *sc; sc = (struct gif_softc *)arg; if (sc == NULL) - return 0; + return (error); + GIF_LOCK(sc); if ((ifnet_flags(sc->gif_if) & IFF_UP) == 0) - return 0; + goto done; /* no physical address */ if (!sc->gif_psrc || !sc->gif_pdst) - return 0; + goto done; switch (proto) { #if INET @@ -453,56 +507,67 @@ gif_encapcheck( break; #endif default: - return 0; + goto done; } - mbuf_copydata((struct mbuf *)(size_t)m, 0, sizeof(ip), &ip); + mbuf_copydata((struct mbuf *)(size_t)m, 0, sizeof (ip), &ip); switch (ip.ip_v) { #if INET case 4: if (sc->gif_psrc->sa_family != AF_INET || sc->gif_pdst->sa_family != AF_INET) - return 0; - return gif_encapcheck4(m, off, proto, arg); + goto done; + error = gif_encapcheck4(m, off, proto, arg); #endif #if INET6 case 6: if (sc->gif_psrc->sa_family != AF_INET6 || sc->gif_pdst->sa_family != AF_INET6) - return 0; - return gif_encapcheck6(m, off, proto, arg); + goto done; + error = gif_encapcheck6(m, off, proto, arg); #endif default: - return 0; + goto done; } +done: + GIF_UNLOCK(sc); + return (error); } static errno_t gif_output( - ifnet_t ifp, - mbuf_t m) + ifnet_t ifp, + mbuf_t m) { struct gif_softc *sc = ifnet_softc(ifp); + struct sockaddr *gif_psrc; + struct sockaddr *gif_pdst; int error = 0; - + + GIF_LOCK(sc); + gif_psrc = sc->gif_psrc; + gif_pdst = sc->gif_pdst; + GIF_UNLOCK(sc); + /* - max_gif_nesting check used to live here. It doesn't anymore - because there is no guaruntee that we won't be called - concurrently from more than one thread. + * max_gif_nesting check used to live here. It doesn't anymore + * because there is no guaruntee that we won't be called + * concurrently from more than one thread. */ - m->m_flags &= ~(M_BCAST|M_MCAST); if (!(ifnet_flags(ifp) & IFF_UP) || - sc->gif_psrc == NULL || sc->gif_pdst == NULL) { + gif_psrc == NULL || gif_pdst == NULL) { ifnet_touch_lastchange(ifp); m_freem(m); /* free it here not in dlil_output */ error = ENETDOWN; goto end; } - bpf_tap_out(ifp, 0, m, &sc->gif_proto, sizeof(sc->gif_proto)); - + bpf_tap_out(ifp, 0, m, &sc->gif_proto, sizeof (sc->gif_proto)); + + GIF_LOCK(sc); + /* inner AF-specific encapsulation */ /* XXX should we check if our outer source is legal? */ @@ -524,17 +589,17 @@ gif_output( goto end; } - end: +end: + GIF_UNLOCK(sc); if (error) { /* the mbuf was freed either by in_gif_output or in here */ ifnet_stat_increment_out(ifp, 0, 0, 1); - } - else { + } else { ifnet_stat_increment_out(ifp, 1, m->m_pkthdr.len, 0); } - if (error == 0) + if (error == 0) error = EJUSTRETURN; /* if no error, packet got sent already */ - return error; + return (error); } /* @@ -542,14 +607,14 @@ gif_output( */ static errno_t gif_input( - ifnet_t ifp, - protocol_family_t protocol_family, - mbuf_t m, - __unused char *frame_header) + ifnet_t ifp, + protocol_family_t protocol_family, + mbuf_t m, + __unused char *frame_header) { struct gif_softc *sc = ifnet_softc(ifp); - - bpf_tap_in(ifp, 0, m, &sc->gif_proto, sizeof(sc->gif_proto)); + + bpf_tap_in(ifp, 0, m, &sc->gif_proto, sizeof (sc->gif_proto)); /* * Put the packet to the network layer input queue according to the @@ -580,7 +645,7 @@ gif_ioctl( void *data) { struct gif_softc *sc = ifnet_softc(ifp); - struct ifreq *ifr = (struct ifreq*)data; + struct ifreq *ifr = (struct ifreq *)data; int error = 0, size; struct sockaddr *dst = NULL, *src = NULL; struct sockaddr *sa; @@ -657,40 +722,40 @@ gif_ioctl( /* sa_family must be equal */ if (src->sa_family != dst->sa_family) - return EINVAL; + return (EINVAL); /* validate sa_len */ switch (src->sa_family) { #if INET case AF_INET: - if (src->sa_len != sizeof(struct sockaddr_in)) - return EINVAL; + if (src->sa_len != sizeof (struct sockaddr_in)) + return (EINVAL); break; #endif #if INET6 case AF_INET6: - if (src->sa_len != sizeof(struct sockaddr_in6)) - return EINVAL; + if (src->sa_len != sizeof (struct sockaddr_in6)) + return (EINVAL); break; #endif default: - return EAFNOSUPPORT; + return (EAFNOSUPPORT); } switch (dst->sa_family) { #if INET case AF_INET: - if (dst->sa_len != sizeof(struct sockaddr_in)) - return EINVAL; + if (dst->sa_len != sizeof (struct sockaddr_in)) + return (EINVAL); break; #endif #if INET6 case AF_INET6: - if (dst->sa_len != sizeof(struct sockaddr_in6)) - return EINVAL; + if (dst->sa_len != sizeof (struct sockaddr_in6)) + return (EINVAL); break; #endif default: - return EAFNOSUPPORT; + return (EAFNOSUPPORT); } /* check sa_family looks sane for the cmd */ @@ -698,19 +763,37 @@ gif_ioctl( case SIOCSIFPHYADDR: if (src->sa_family == AF_INET) break; - return EAFNOSUPPORT; + return (EAFNOSUPPORT); #if INET6 case SIOCSIFPHYADDR_IN6_32: case SIOCSIFPHYADDR_IN6_64: if (src->sa_family == AF_INET6) break; - return EAFNOSUPPORT; + return (EAFNOSUPPORT); #endif /* INET6 */ case SIOCSLIFPHYADDR: /* checks done in the above */ break; } +#define GIF_ORDERED_LOCK(sc, sc2) \ + if (sc < sc2) { \ + GIF_LOCK(sc); \ + GIF_LOCK(sc2); \ + } else { \ + GIF_LOCK(sc2); \ + GIF_LOCK(sc); \ + } + +#define GIF_ORDERED_UNLOCK(sc, sc2) \ + if (sc > sc2) { \ + GIF_UNLOCK(sc); \ + GIF_UNLOCK(sc2); \ + } else { \ + GIF_UNLOCK(sc2); \ + GIF_UNLOCK(sc); \ + } + ifnet_head_lock_shared(); TAILQ_FOREACH(ifp2, &ifnet_head, if_link) { if (strcmp(ifnet_name(ifp2), GIFNAME) != 0) @@ -718,17 +801,24 @@ gif_ioctl( sc2 = ifnet_softc(ifp2); if (sc2 == sc) continue; - if (!sc2->gif_pdst || !sc2->gif_psrc) + /* lock sc and sc2 in increasing order of ifnet index */ + GIF_ORDERED_LOCK(sc, sc2); + if (!sc2->gif_pdst || !sc2->gif_psrc) { + GIF_ORDERED_UNLOCK(sc, sc2); continue; + } if (sc2->gif_pdst->sa_family != dst->sa_family || sc2->gif_pdst->sa_len != dst->sa_len || sc2->gif_psrc->sa_family != src->sa_family || - sc2->gif_psrc->sa_len != src->sa_len) + sc2->gif_psrc->sa_len != src->sa_len) { + GIF_ORDERED_UNLOCK(sc, sc2); continue; + } #ifndef XBONEHACK /* can't configure same pair of address onto two gifs */ if (bcmp(sc2->gif_pdst, dst, dst->sa_len) == 0 && bcmp(sc2->gif_psrc, src, src->sa_len) == 0) { + GIF_ORDERED_UNLOCK(sc, sc2); error = EADDRNOTAVAIL; ifnet_head_done(); goto bad; @@ -736,14 +826,16 @@ gif_ioctl( #endif /* can't configure multiple multi-dest interfaces */ -#define multidest(x) \ +#define multidest(x) \ (((struct sockaddr_in *)(void *)(x))->sin_addr.s_addr == INADDR_ANY) #if INET6 -#define multidest6(x) \ - (IN6_IS_ADDR_UNSPECIFIED(&((struct sockaddr_in6 *)(void *)(x))->sin6_addr)) +#define multidest6(x) \ + (IN6_IS_ADDR_UNSPECIFIED(&((struct sockaddr_in6 *) \ + (void *)(x))->sin6_addr)) #endif if (dst->sa_family == AF_INET && multidest(dst) && multidest(sc2->gif_pdst)) { + GIF_ORDERED_UNLOCK(sc, sc2); error = EADDRNOTAVAIL; ifnet_head_done(); goto bad; @@ -751,37 +843,49 @@ gif_ioctl( #if INET6 if (dst->sa_family == AF_INET6 && multidest6(dst) && multidest6(sc2->gif_pdst)) { + GIF_ORDERED_UNLOCK(sc, sc2); error = EADDRNOTAVAIL; ifnet_head_done(); goto bad; } #endif + GIF_ORDERED_UNLOCK(sc, sc2); } ifnet_head_done(); + GIF_LOCK(sc); if (sc->gif_psrc) FREE((caddr_t)sc->gif_psrc, M_IFADDR); - sa = (struct sockaddr *)_MALLOC(src->sa_len, M_IFADDR, M_WAITOK); - if (sa == NULL) - return ENOBUFS; + sa = (struct sockaddr *)_MALLOC(src->sa_len, M_IFADDR, + M_WAITOK); + if (sa == NULL) { + GIF_UNLOCK(sc); + return (ENOBUFS); + } bcopy((caddr_t)src, (caddr_t)sa, src->sa_len); sc->gif_psrc = sa; if (sc->gif_pdst) FREE((caddr_t)sc->gif_pdst, M_IFADDR); - sa = (struct sockaddr *)_MALLOC(dst->sa_len, M_IFADDR, M_WAITOK); - if (sa == NULL) - return ENOBUFS; + sa = (struct sockaddr *)_MALLOC(dst->sa_len, M_IFADDR, + M_WAITOK); + if (sa == NULL) { + GIF_UNLOCK(sc); + return (ENOBUFS); + } bcopy((caddr_t)dst, (caddr_t)sa, dst->sa_len); sc->gif_pdst = sa; + GIF_UNLOCK(sc); + + ifnet_set_flags(ifp, IFF_RUNNING | IFF_UP, IFF_RUNNING | + IFF_UP); - ifnet_set_flags(ifp, IFF_RUNNING | IFF_UP, IFF_RUNNING | IFF_UP); - error = 0; break; #ifdef SIOCDIFPHYADDR case SIOCDIFPHYADDR: + GIF_LOCK(sc); if (sc->gif_psrc) { FREE((caddr_t)sc->gif_psrc, M_IFADDR); sc->gif_psrc = NULL; @@ -790,15 +894,18 @@ gif_ioctl( FREE((caddr_t)sc->gif_pdst, M_IFADDR); sc->gif_pdst = NULL; } + GIF_UNLOCK(sc); /* change the IFF_{UP, RUNNING} flag as well? */ break; #endif - + case SIOCGIFPSRCADDR: #if INET6 case SIOCGIFPSRCADDR_IN6: #endif /* INET6 */ + GIF_LOCK(sc); if (sc->gif_psrc == NULL) { + GIF_UNLOCK(sc); error = EADDRNOTAVAIL; goto bad; } @@ -807,30 +914,36 @@ gif_ioctl( #if INET case SIOCGIFPSRCADDR: dst = &ifr->ifr_addr; - size = sizeof(ifr->ifr_addr); + size = sizeof (ifr->ifr_addr); break; #endif /* INET */ #if INET6 case SIOCGIFPSRCADDR_IN6: dst = (struct sockaddr *) &(((struct in6_ifreq *)data)->ifr_addr); - size = sizeof(((struct in6_ifreq *)data)->ifr_addr); + size = sizeof (((struct in6_ifreq *)data)->ifr_addr); break; #endif /* INET6 */ default: + GIF_UNLOCK(sc); error = EADDRNOTAVAIL; goto bad; } - if (src->sa_len > size) - return EINVAL; + if (src->sa_len > size) { + GIF_UNLOCK(sc); + return (EINVAL); + } bcopy((caddr_t)src, (caddr_t)dst, src->sa_len); + GIF_UNLOCK(sc); break; - + case SIOCGIFPDSTADDR: #if INET6 case SIOCGIFPDSTADDR_IN6: #endif /* INET6 */ + GIF_LOCK(sc); if (sc->gif_pdst == NULL) { + GIF_UNLOCK(sc); error = EADDRNOTAVAIL; goto bad; } @@ -839,27 +952,33 @@ gif_ioctl( #if INET case SIOCGIFPDSTADDR: dst = &ifr->ifr_addr; - size = sizeof(ifr->ifr_addr); + size = sizeof (ifr->ifr_addr); break; #endif /* INET */ #if INET6 case SIOCGIFPDSTADDR_IN6: dst = (struct sockaddr *) &(((struct in6_ifreq *)data)->ifr_addr); - size = sizeof(((struct in6_ifreq *)data)->ifr_addr); + size = sizeof (((struct in6_ifreq *)data)->ifr_addr); break; #endif /* INET6 */ default: error = EADDRNOTAVAIL; + GIF_UNLOCK(sc); goto bad; } - if (src->sa_len > size) - return EINVAL; + if (src->sa_len > size) { + GIF_UNLOCK(sc); + return (EINVAL); + } bcopy((caddr_t)src, (caddr_t)dst, src->sa_len); + GIF_UNLOCK(sc); break; case SIOCGLIFPHYADDR: + GIF_LOCK(sc); if (sc->gif_psrc == NULL || sc->gif_pdst == NULL) { + GIF_UNLOCK(sc); error = EADDRNOTAVAIL; goto bad; } @@ -868,19 +987,24 @@ gif_ioctl( src = sc->gif_psrc; dst = (struct sockaddr *) &(((struct if_laddrreq *)data)->addr); - size = sizeof(((struct if_laddrreq *)data)->addr); - if (src->sa_len > size) - return EINVAL; + size = sizeof (((struct if_laddrreq *)data)->addr); + if (src->sa_len > size) { + GIF_UNLOCK(sc); + return (EINVAL); + } bcopy((caddr_t)src, (caddr_t)dst, src->sa_len); /* copy dst */ src = sc->gif_pdst; dst = (struct sockaddr *) &(((struct if_laddrreq *)data)->dstaddr); - size = sizeof(((struct if_laddrreq *)data)->dstaddr); - if (src->sa_len > size) - return EINVAL; + size = sizeof (((struct if_laddrreq *)data)->dstaddr); + if (src->sa_len > size) { + GIF_UNLOCK(sc); + return (EINVAL); + } bcopy((caddr_t)src, (caddr_t)dst, src->sa_len); + GIF_UNLOCK(sc); break; case SIOCSIFFLAGS: @@ -891,17 +1015,14 @@ gif_ioctl( error = EOPNOTSUPP; break; } - bad: - return error; +bad: + return (error); } -/* This function is not used in our stack */ -void -gif_delete_tunnel(sc) - struct gif_softc *sc; +static void +gif_delete_tunnel(struct gif_softc *sc) { - /* XXX: NetBSD protects this function with splsoftnet() */ - + GIF_LOCK_ASSERT(sc); if (sc->gif_psrc) { FREE((caddr_t)sc->gif_psrc, M_IFADDR); sc->gif_psrc = NULL; @@ -910,5 +1031,6 @@ gif_delete_tunnel(sc) FREE((caddr_t)sc->gif_pdst, M_IFADDR); sc->gif_pdst = NULL; } + ROUTE_RELEASE(&sc->gif_ro); /* change the IFF_UP flag as well? */ } diff --git a/bsd/net/if_gif.h b/bsd/net/if_gif.h index dfba33645..7fe954f2f 100644 --- a/bsd/net/if_gif.h +++ b/bsd/net/if_gif.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* $KAME: if_gif.h,v 1.7 2000/02/22 14:01:46 itojun Exp $ */ @@ -61,13 +61,16 @@ */ #ifndef _NET_IF_GIF_H_ -#define _NET_IF_GIF_H_ +#define _NET_IF_GIF_H_ #include #include /* xxx sigh, why route have struct route instead of pointer? */ -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE + +extern void gif_init(void); + struct encaptab; struct gif_softc { @@ -91,16 +94,22 @@ struct gif_softc { bpf_tap_mode tap_mode; bpf_packet_func tap_callback; char gif_ifname[IFNAMSIZ]; + decl_lck_mtx_data(, gif_lock); /* lock for gif softc structure */ }; -#define gif_ro gifsc_gifscr.gifscr_ro +#define GIF_LOCK(_sc) lck_mtx_lock(&(_sc)->gif_lock) +#define GIF_UNLOCK(_sc) lck_mtx_unlock(&(_sc)->gif_lock) +#define GIF_LOCK_ASSERT(_sc) lck_mtx_assert(&(_sc)->gif_lock, \ + LCK_MTX_ASSERT_OWNED) + +#define gif_ro gifsc_gifscr.gifscr_ro #if INET6 -#define gif_ro6 gifsc_gifscr.gifscr_ro6 +#define gif_ro6 gifsc_gifscr.gifscr_ro6 #endif -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ -#define GIF_MTU (1280) /* Default MTU */ +#define GIF_MTU (1280) /* Default MTU */ #define GIF_MTU_MIN (1280) /* Minimum MTU */ #define GIF_MTU_MAX (8192) /* Maximum MTU */ diff --git a/bsd/net/if_ipsec.c b/bsd/net/if_ipsec.c new file mode 100644 index 000000000..e1aac8335 --- /dev/null +++ b/bsd/net/if_ipsec.c @@ -0,0 +1,926 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Kernel Control functions */ +static errno_t ipsec_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, + void **unitinfo); +static errno_t ipsec_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit, + void *unitinfo); +static errno_t ipsec_ctl_send(kern_ctl_ref kctlref, u_int32_t unit, + void *unitinfo, mbuf_t m, int flags); +static errno_t ipsec_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, + int opt, void *data, size_t *len); +static errno_t ipsec_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, + int opt, void *data, size_t len); + +/* Network Interface functions */ +static void ipsec_start(ifnet_t interface); +static errno_t ipsec_output(ifnet_t interface, mbuf_t data); +static errno_t ipsec_demux(ifnet_t interface, mbuf_t data, char *frame_header, + protocol_family_t *protocol); +static errno_t ipsec_add_proto(ifnet_t interface, protocol_family_t protocol, + const struct ifnet_demux_desc *demux_array, + u_int32_t demux_count); +static errno_t ipsec_del_proto(ifnet_t interface, protocol_family_t protocol); +static errno_t ipsec_ioctl(ifnet_t interface, u_long cmd, void *data); +static void ipsec_detached(ifnet_t interface); + +/* Protocol handlers */ +static errno_t ipsec_attach_proto(ifnet_t interface, protocol_family_t proto); +static errno_t ipsec_proto_input(ifnet_t interface, protocol_family_t protocol, + mbuf_t m, char *frame_header); +static errno_t ipsec_proto_pre_output(ifnet_t interface, protocol_family_t protocol, + mbuf_t *packet, const struct sockaddr *dest, void *route, + char *frame_type, char *link_layer_dest); + +static kern_ctl_ref ipsec_kctlref; +static u_int32_t ipsec_family; +static OSMallocTag ipsec_malloc_tag; +static SInt32 ipsec_ifcount = 0; + +#define IPSECQ_MAXLEN 256 + +/* Prepend length */ +static void* +ipsec_alloc(size_t size) +{ + size_t *mem = OSMalloc(size + sizeof(size_t), ipsec_malloc_tag); + + if (mem) { + *mem = size + sizeof(size_t); + mem++; + } + + return (void*)mem; +} + +static void +ipsec_free(void *ptr) +{ + size_t *size = ptr; + size--; + OSFree(size, *size, ipsec_malloc_tag); +} + +errno_t +ipsec_register_control(void) +{ + struct kern_ctl_reg kern_ctl; + errno_t result = 0; + + /* Create a tag to allocate memory */ + ipsec_malloc_tag = OSMalloc_Tagalloc(IPSEC_CONTROL_NAME, OSMT_DEFAULT); + + /* Find a unique value for our interface family */ + result = mbuf_tag_id_find(IPSEC_CONTROL_NAME, &ipsec_family); + if (result != 0) { + printf("ipsec_register_control - mbuf_tag_id_find_internal failed: %d\n", result); + return result; + } + + bzero(&kern_ctl, sizeof(kern_ctl)); + strncpy(kern_ctl.ctl_name, IPSEC_CONTROL_NAME, sizeof(kern_ctl.ctl_name)); + kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0; + kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED; /* Require root */ + kern_ctl.ctl_sendsize = 64 * 1024; + kern_ctl.ctl_recvsize = 64 * 1024; + kern_ctl.ctl_connect = ipsec_ctl_connect; + kern_ctl.ctl_disconnect = ipsec_ctl_disconnect; + kern_ctl.ctl_send = ipsec_ctl_send; + kern_ctl.ctl_setopt = ipsec_ctl_setopt; + kern_ctl.ctl_getopt = ipsec_ctl_getopt; + + result = ctl_register(&kern_ctl, &ipsec_kctlref); + if (result != 0) { + printf("ipsec_register_control - ctl_register failed: %d\n", result); + return result; + } + + /* Register the protocol plumbers */ + if ((result = proto_register_plumber(PF_INET, ipsec_family, + ipsec_attach_proto, NULL)) != 0) { + printf("ipsec_register_control - proto_register_plumber(PF_INET, %d) failed: %d\n", + ipsec_family, result); + ctl_deregister(ipsec_kctlref); + return result; + } + + /* Register the protocol plumbers */ + if ((result = proto_register_plumber(PF_INET6, ipsec_family, + ipsec_attach_proto, NULL)) != 0) { + proto_unregister_plumber(PF_INET, ipsec_family); + ctl_deregister(ipsec_kctlref); + printf("ipsec_register_control - proto_register_plumber(PF_INET6, %d) failed: %d\n", + ipsec_family, result); + return result; + } + + return 0; +} + +/* Helpers */ +int +ipsec_interface_isvalid (ifnet_t interface) +{ + struct ipsec_pcb *pcb = NULL; + + if (interface == NULL) + return 0; + + pcb = ifnet_softc(interface); + + if (pcb == NULL) + return 0; + + /* When ctl disconnects, ipsec_unit is set to 0 */ + if (pcb->ipsec_unit == 0) + return 0; + + return 1; +} + +/* Kernel control functions */ + +static errno_t +ipsec_ctl_connect(kern_ctl_ref kctlref, + struct sockaddr_ctl *sac, + void **unitinfo) +{ + struct ifnet_init_eparams ipsec_init; + struct ipsec_pcb *pcb; + errno_t result; + struct ifnet_stats_param stats; + + /* kernel control allocates, interface frees */ + pcb = ipsec_alloc(sizeof(*pcb)); + if (pcb == NULL) + return ENOMEM; + + /* Setup the protocol control block */ + bzero(pcb, sizeof(*pcb)); + *unitinfo = pcb; + pcb->ipsec_ctlref = kctlref; + pcb->ipsec_unit = sac->sc_unit; + + printf("ipsec_ctl_connect: creating interface ipsec%d\n", pcb->ipsec_unit - 1); + + /* Create the interface */ + bzero(&ipsec_init, sizeof(ipsec_init)); + ipsec_init.ver = IFNET_INIT_CURRENT_VERSION; + ipsec_init.len = sizeof (ipsec_init); + ipsec_init.name = "ipsec"; + ipsec_init.start = ipsec_start; + ipsec_init.sndq_maxlen = IPSECQ_MAXLEN; + ipsec_init.unit = pcb->ipsec_unit - 1; + ipsec_init.family = ipsec_family; + ipsec_init.type = IFT_OTHER; + ipsec_init.demux = ipsec_demux; + ipsec_init.add_proto = ipsec_add_proto; + ipsec_init.del_proto = ipsec_del_proto; + ipsec_init.softc = pcb; + ipsec_init.ioctl = ipsec_ioctl; + ipsec_init.detach = ipsec_detached; + + result = ifnet_allocate_extended(&ipsec_init, &pcb->ipsec_ifp); + if (result != 0) { + printf("ipsec_ctl_connect - ifnet_allocate failed: %d\n", result); + ipsec_free(pcb); + return result; + } + OSIncrementAtomic(&ipsec_ifcount); + + /* Set flags and additional information. */ + ifnet_set_mtu(pcb->ipsec_ifp, 1500); + ifnet_set_flags(pcb->ipsec_ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff); + + /* The interface must generate its own IPv6 LinkLocal address, + * if possible following the recommendation of RFC2472 to the 64bit interface ID + */ + ifnet_set_eflags(pcb->ipsec_ifp, IFEF_NOAUTOIPV6LL, IFEF_NOAUTOIPV6LL); + + /* Reset the stats in case as the interface may have been recycled */ + bzero(&stats, sizeof(struct ifnet_stats_param)); + ifnet_set_stat(pcb->ipsec_ifp, &stats); + + /* Attach the interface */ + result = ifnet_attach(pcb->ipsec_ifp, NULL); + if (result != 0) { + printf("ipsec_ctl_connect - ifnet_allocate failed: %d\n", result); + ifnet_release(pcb->ipsec_ifp); + ipsec_free(pcb); + } + + /* Attach to bpf */ + if (result == 0) + bpfattach(pcb->ipsec_ifp, DLT_NULL, 4); + + /* The interfaces resoures allocated, mark it as running */ + if (result == 0) + ifnet_set_flags(pcb->ipsec_ifp, IFF_RUNNING, IFF_RUNNING); + + return result; +} + +static errno_t +ipsec_detach_ip(ifnet_t interface, + protocol_family_t protocol, + socket_t pf_socket) +{ + errno_t result = EPROTONOSUPPORT; + + /* Attempt a detach */ + if (protocol == PF_INET) { + struct ifreq ifr; + + bzero(&ifr, sizeof(ifr)); + snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d", + ifnet_name(interface), ifnet_unit(interface)); + + result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr); + } + else if (protocol == PF_INET6) { + struct in6_ifreq ifr6; + + bzero(&ifr6, sizeof(ifr6)); + snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d", + ifnet_name(interface), ifnet_unit(interface)); + + result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6); + } + + return result; +} + +static void +ipsec_remove_address(ifnet_t interface, + protocol_family_t protocol, + ifaddr_t address, + socket_t pf_socket) +{ + errno_t result = 0; + + /* Attempt a detach */ + if (protocol == PF_INET) { + struct ifreq ifr; + + bzero(&ifr, sizeof(ifr)); + snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d", + ifnet_name(interface), ifnet_unit(interface)); + result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr)); + if (result != 0) { + printf("ipsec_remove_address - ifaddr_address failed: %d", result); + } + else { + result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr); + if (result != 0) { + printf("ipsec_remove_address - SIOCDIFADDR failed: %d", result); + } + } + } + else if (protocol == PF_INET6) { + struct in6_ifreq ifr6; + + bzero(&ifr6, sizeof(ifr6)); + snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d", + ifnet_name(interface), ifnet_unit(interface)); + result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr, + sizeof(ifr6.ifr_addr)); + if (result != 0) { + printf("ipsec_remove_address - ifaddr_address failed (v6): %d", + result); + } + else { + result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6); + if (result != 0) { + printf("ipsec_remove_address - SIOCDIFADDR_IN6 failed: %d", + result); + } + } + } +} + +static void +ipsec_cleanup_family(ifnet_t interface, + protocol_family_t protocol) +{ + errno_t result = 0; + socket_t pf_socket = NULL; + ifaddr_t *addresses = NULL; + int i; + + if (protocol != PF_INET && protocol != PF_INET6) { + printf("ipsec_cleanup_family - invalid protocol family %d\n", protocol); + return; + } + + /* Create a socket for removing addresses and detaching the protocol */ + result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket); + if (result != 0) { + if (result != EAFNOSUPPORT) + printf("ipsec_cleanup_family - failed to create %s socket: %d\n", + protocol == PF_INET ? "IP" : "IPv6", result); + goto cleanup; + } + + /* always set SS_PRIV, we want to close and detach regardless */ + sock_setpriv(pf_socket, 1); + + result = ipsec_detach_ip(interface, protocol, pf_socket); + if (result == 0 || result == ENXIO) { + /* We are done! We either detached or weren't attached. */ + goto cleanup; + } + else if (result != EBUSY) { + /* Uh, not really sure what happened here... */ + printf("ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result); + goto cleanup; + } + + /* + * At this point, we received an EBUSY error. This means there are + * addresses attached. We should detach them and then try again. + */ + result = ifnet_get_address_list_family(interface, &addresses, protocol); + if (result != 0) { + printf("fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n", + ifnet_name(interface), ifnet_unit(interface), + protocol == PF_INET ? "PF_INET" : "PF_INET6", result); + goto cleanup; + } + + for (i = 0; addresses[i] != 0; i++) { + ipsec_remove_address(interface, protocol, addresses[i], pf_socket); + } + ifnet_free_address_list(addresses); + addresses = NULL; + + /* + * The addresses should be gone, we should try the remove again. + */ + result = ipsec_detach_ip(interface, protocol, pf_socket); + if (result != 0 && result != ENXIO) { + printf("ipsec_cleanup_family - ipsec_detach_ip failed: %d\n", result); + } + +cleanup: + if (pf_socket != NULL) + sock_close(pf_socket); + + if (addresses != NULL) + ifnet_free_address_list(addresses); +} + +static errno_t +ipsec_ctl_disconnect(__unused kern_ctl_ref kctlref, + __unused u_int32_t unit, + void *unitinfo) +{ + struct ipsec_pcb *pcb = unitinfo; + ifnet_t ifp = pcb->ipsec_ifp; + errno_t result = 0; + + pcb->ipsec_ctlref = NULL; + pcb->ipsec_unit = 0; + + /* + * We want to do everything in our power to ensure that the interface + * really goes away when the socket is closed. We must remove IP/IPv6 + * addresses and detach the protocols. Finally, we can remove and + * release the interface. + */ + key_delsp_for_ipsec_if(ifp); + + ipsec_cleanup_family(ifp, AF_INET); + ipsec_cleanup_family(ifp, AF_INET6); + + if ((result = ifnet_detach(ifp)) != 0) { + printf("ipsec_ctl_disconnect - ifnet_detach failed: %d\n", result); + } + + return 0; +} + +static errno_t +ipsec_ctl_send(__unused kern_ctl_ref kctlref, + __unused u_int32_t unit, + __unused void *unitinfo, + mbuf_t m, + __unused int flags) +{ + /* Receive messages from the control socket. Currently unused. */ + mbuf_freem(m); + return 0; +} + +static errno_t +ipsec_ctl_setopt(__unused kern_ctl_ref kctlref, + __unused u_int32_t unit, + void *unitinfo, + int opt, + void *data, + size_t len) +{ + struct ipsec_pcb *pcb = unitinfo; + errno_t result = 0; + + /* check for privileges for privileged options */ + switch (opt) { + case IPSEC_OPT_FLAGS: + case IPSEC_OPT_EXT_IFDATA_STATS: + case IPSEC_OPT_SET_DELEGATE_INTERFACE: + if (kauth_cred_issuser(kauth_cred_get()) == 0) { + return EPERM; + } + break; + } + + switch (opt) { + case IPSEC_OPT_FLAGS: + if (len != sizeof(u_int32_t)) + result = EMSGSIZE; + else + pcb->ipsec_flags = *(u_int32_t *)data; + break; + + case IPSEC_OPT_EXT_IFDATA_STATS: + if (len != sizeof(int)) { + result = EMSGSIZE; + break; + } + pcb->ipsec_ext_ifdata_stats = (*(int *)data) ? 1 : 0; + break; + + case IPSEC_OPT_INC_IFDATA_STATS_IN: + case IPSEC_OPT_INC_IFDATA_STATS_OUT: { + struct ipsec_stats_param *utsp = (struct ipsec_stats_param *)data; + + if (utsp == NULL || len < sizeof(struct ipsec_stats_param)) { + result = EINVAL; + break; + } + if (!pcb->ipsec_ext_ifdata_stats) { + result = EINVAL; + break; + } + if (opt == IPSEC_OPT_INC_IFDATA_STATS_IN) + ifnet_stat_increment_in(pcb->ipsec_ifp, utsp->utsp_packets, + utsp->utsp_bytes, utsp->utsp_errors); + else + ifnet_stat_increment_out(pcb->ipsec_ifp, utsp->utsp_packets, + utsp->utsp_bytes, utsp->utsp_errors); + break; + } + + case IPSEC_OPT_SET_DELEGATE_INTERFACE: { + ifnet_t del_ifp = NULL; + char name[IFNAMSIZ]; + + if (len > IFNAMSIZ - 1) { + result = EMSGSIZE; + break; + } + if (len != 0) { /* if len==0, del_ifp will be NULL causing the delegate to be removed */ + bcopy(data, name, len); + name[len] = 0; + result = ifnet_find_by_name(name, &del_ifp); + } + if (result == 0) { + result = ifnet_set_delegate(pcb->ipsec_ifp, del_ifp); + if (del_ifp) + ifnet_release(del_ifp); + } + break; + } + + default: + result = ENOPROTOOPT; + break; + } + + return result; +} + +static errno_t +ipsec_ctl_getopt(__unused kern_ctl_ref kctlref, + __unused u_int32_t unit, + void *unitinfo, + int opt, + void *data, + size_t *len) +{ + struct ipsec_pcb *pcb = unitinfo; + errno_t result = 0; + + switch (opt) { + case IPSEC_OPT_FLAGS: + if (*len != sizeof(u_int32_t)) + result = EMSGSIZE; + else + *(u_int32_t *)data = pcb->ipsec_flags; + break; + + case IPSEC_OPT_EXT_IFDATA_STATS: + if (*len != sizeof(int)) + result = EMSGSIZE; + else + *(int *)data = (pcb->ipsec_ext_ifdata_stats) ? 1 : 0; + break; + + case IPSEC_OPT_IFNAME: + *len = snprintf(data, *len, "%s%d", ifnet_name(pcb->ipsec_ifp), ifnet_unit(pcb->ipsec_ifp)) + 1; + break; + + default: + result = ENOPROTOOPT; + break; + } + + return result; +} + +/* Network Interface functions */ +static errno_t +ipsec_output(ifnet_t interface, + mbuf_t data) +{ + struct ipsec_pcb *pcb = ifnet_softc(interface); + struct ipsec_output_state ipsec_state; + struct route ro; + struct route_in6 ro6; + int length; + struct ip *ip; + struct ip6_hdr *ip6; + struct secpolicy *sp = NULL; + struct ip_out_args ipoa; + struct ip6_out_args ip6oa; + int error = 0; + u_int ip_version = 0; + uint32_t af; + int flags = 0;; + int out_interface_index = 0; + struct flowadv *adv = NULL; + + uint32_t policy_id = 0; + + /* Find policy using ID in mbuf */ + policy_id = data->m_pkthdr.ipsec_policy; + sp = key_getspbyid(policy_id); + + if (sp == NULL) { + printf("ipsec_output: No policy specified, dropping packet.\n"); + goto ipsec_output_err; + } + + /* Validate policy */ + if (sp->ipsec_if != pcb->ipsec_ifp) { + printf("ipsec_output: Selected policy does not match %s interface.\n", pcb->ipsec_ifp->if_xname); + goto ipsec_output_err; + } + + ip = mtod(data, struct ip *); + ip_version = ip->ip_v; + + switch (ip_version) { + case 4: + /* Tap */ + af = AF_INET; + bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af)); + + /* Apply encryption */ + bzero(&ipsec_state, sizeof(ipsec_state)); + ipsec_state.m = data; + ipsec_state.dst = (struct sockaddr *)&sp->spidx.dst; + bzero(&ipsec_state.ro, sizeof(ipsec_state.ro)); + + error = ipsec4_output(&ipsec_state, sp, 0); + data = ipsec_state.m; + if (error || data == NULL) { + printf("ipsec_output: ipsec4_output error.\n"); + goto ipsec_output_err; + } + + /* Set traffic class to OAM, set flow */ + m_set_service_class(data, MBUF_SC_OAM); + data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET; + data->m_pkthdr.pkt_flowid = interface->if_flowhash; + data->m_pkthdr.pkt_proto = ip->ip_p; + data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC); + + /* Flip endian-ness for ip_output */ + ip = mtod(data, struct ip *); + NTOHS(ip->ip_len); + NTOHS(ip->ip_off); + + /* Increment statistics */ + length = mbuf_pkthdr_len(data); + ifnet_stat_increment_out(interface, 1, length, 0); + + /* Send to ip_output */ + bzero(&ro, sizeof(ro)); + + flags = IP_OUTARGS | /* Passing out args to specify interface */ + IP_NOIPSEC; /* To ensure the packet doesn't go through ipsec twice */ + + if (sp->outgoing_if != NULL) { + out_interface_index = sp->outgoing_if->if_index; + } + + bzero(&ipoa, sizeof(ipoa)); + ipoa.ipoa_flowadv.code = 0; + ipoa.ipoa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR; + if (out_interface_index) { + ipoa.ipoa_boundif = out_interface_index; + ipoa.ipoa_flags |= IPOAF_BOUND_IF; + } + + adv = &ipoa.ipoa_flowadv; + + (void) ip_output(data, NULL, &ro, flags, NULL, &ipoa); + data = NULL; + + if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) { + error = ENOBUFS; + ifnet_disable_output(interface); + } + + goto done; + case 6: + af = AF_INET6; + bpf_tap_out(pcb->ipsec_ifp, DLT_NULL, data, &af, sizeof(af)); + + ip6 = mtod(data, struct ip6_hdr *); + + u_char *nexthdrp = &ip6->ip6_nxt; + struct mbuf *mprev = data; + + int needipsectun = 0; + error = ipsec6_output_trans(&ipsec_state, nexthdrp, mprev, sp, flags, &needipsectun); + if (needipsectun) { + error = ipsec6_output_tunnel(&ipsec_state, sp, flags); + if (ipsec_state.tunneled == 4) /* tunneled in IPv4 - packet is gone */ + goto done; + } + data = ipsec_state.m; + if (error || data == NULL) { + printf("ipsec_output: ipsec6_output error.\n"); + goto ipsec_output_err; + } + + /* Set traffic class to OAM, set flow */ + m_set_service_class(data, MBUF_SC_OAM); + data->m_pkthdr.pkt_flowsrc = FLOWSRC_IFNET; + data->m_pkthdr.pkt_flowid = interface->if_flowhash; + data->m_pkthdr.pkt_proto = ip->ip_p; + data->m_pkthdr.pkt_flags = (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC); + + /* Increment statistics */ + length = mbuf_pkthdr_len(data); + ifnet_stat_increment_out(interface, 1, length, 0); + + /* Send to ip6_output */ + bzero(&ro6, sizeof(ro6)); + + flags = IPV6_OUTARGS; + + if (sp->outgoing_if != NULL) { + out_interface_index = sp->outgoing_if->if_index; + } + + bzero(&ip6oa, sizeof(ip6oa)); + ip6oa.ip6oa_flowadv.code = 0; + ip6oa.ip6oa_flags = IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR; + if (out_interface_index) { + ip6oa.ip6oa_boundif = out_interface_index; + ip6oa.ip6oa_flags |= IPOAF_BOUND_IF; + } + + adv = &ip6oa.ip6oa_flowadv; + + (void) ip6_output(data, NULL, &ro6, flags, NULL, NULL, &ip6oa); + data = NULL; + + if (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED) { + error = ENOBUFS; + ifnet_disable_output(interface); + } + + goto done; + default: + printf("ipsec_output: Received unknown packet version %d.\n", ip_version); + error = -1; + goto ipsec_output_err; + } + +done: + if (sp != NULL) { + key_freesp(sp, KEY_SADB_UNLOCKED); + } + return error; + +ipsec_output_err: + if (data) + mbuf_freem(data); + goto done; +} + +static void +ipsec_start(ifnet_t interface) +{ + mbuf_t data; + + for (;;) { + if (ifnet_dequeue(interface, &data) != 0) + break; + (void) ipsec_output(interface, data); + } +} + +/* Network Interface functions */ +static errno_t +ipsec_demux(__unused ifnet_t interface, + mbuf_t data, + __unused char *frame_header, + protocol_family_t *protocol) +{ + struct ip *ip; + u_int ip_version; + + while (data != NULL && mbuf_len(data) < 1) { + data = mbuf_next(data); + } + + if (data == NULL) + return ENOENT; + + ip = mtod(data, struct ip *); + ip_version = ip->ip_v; + + switch(ip_version) { + case 4: + *protocol = PF_INET; + return 0; + case 6: + *protocol = PF_INET6; + return 0; + default: + break; + } + + return 0; +} + +static errno_t +ipsec_add_proto(__unused ifnet_t interface, + protocol_family_t protocol, + __unused const struct ifnet_demux_desc *demux_array, + __unused u_int32_t demux_count) +{ + switch(protocol) { + case PF_INET: + return 0; + case PF_INET6: + return 0; + default: + break; + } + + return ENOPROTOOPT; +} + +static errno_t +ipsec_del_proto(__unused ifnet_t interface, + __unused protocol_family_t protocol) +{ + return 0; +} + +static errno_t +ipsec_ioctl(ifnet_t interface, + u_long command, + void *data) +{ + errno_t result = 0; + + switch(command) { + case SIOCSIFMTU: + ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu); + break; + + case SIOCSIFFLAGS: + /* ifioctl() takes care of it */ + break; + + default: + result = EOPNOTSUPP; + } + + return result; +} + +static void +ipsec_detached( + ifnet_t interface) +{ + struct ipsec_pcb *pcb = ifnet_softc(interface); + + ifnet_release(pcb->ipsec_ifp); + ipsec_free(pcb); + + OSDecrementAtomic(&ipsec_ifcount); +} + +/* Protocol Handlers */ + +static errno_t +ipsec_proto_input(__unused ifnet_t interface, + protocol_family_t protocol, + mbuf_t m, + __unused char *frame_header) +{ + if (proto_input(protocol, m) != 0) + m_freem(m); + + return 0; +} + +static errno_t +ipsec_proto_pre_output(__unused ifnet_t interface, + protocol_family_t protocol, + __unused mbuf_t *packet, + __unused const struct sockaddr *dest, + __unused void *route, + __unused char *frame_type, + __unused char *link_layer_dest) +{ + + *(protocol_family_t *)(void *)frame_type = protocol; + return 0; +} + +static errno_t +ipsec_attach_proto(ifnet_t interface, + protocol_family_t protocol) +{ + struct ifnet_attach_proto_param proto; + errno_t result; + + bzero(&proto, sizeof(proto)); + proto.input = ipsec_proto_input; + proto.pre_output = ipsec_proto_pre_output; + + result = ifnet_attach_protocol(interface, protocol, &proto); + if (result != 0 && result != EEXIST) { + printf("ipsec_attach_inet - ifnet_attach_protocol %d failed: %d\n", + protocol, result); + } + + return result; +} diff --git a/osfmk/kern/etimer.h b/bsd/net/if_ipsec.h similarity index 50% rename from osfmk/kern/etimer.h rename to bsd/net/if_ipsec.h index 8c3674288..670e01f23 100644 --- a/osfmk/kern/etimer.h +++ b/bsd/net/if_ipsec.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2004-2008 Apple Inc. All rights reserved. + * Copyright (c) 2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,53 +22,56 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * @APPLE_FREE_COPYRIGHT@ - */ -/* - * File: etimer.h - * Purpose: Routines for handling the machine independent - * real-time clock. - */ -#ifdef KERNEL_PRIVATE -#ifndef _KERN_ETIMER_H_ -#define _KERN_ETIMER_H_ +#ifndef _NET_IF_IPSEC_H_ +#define _NET_IF_IPSEC_H_ -#define EndOfAllTime 0xFFFFFFFFFFFFFFFFULL +#ifdef BSD_KERNEL_PRIVATE -typedef void (*etimer_intr_t)(int, uint64_t); +#include +#include -extern int setTimerReq(void); -extern void etimer_intr(int inuser, uint64_t iaddr); +/* Control block allocated for each kernel control connection */ +struct ipsec_pcb { + kern_ctl_ref ipsec_ctlref; + ifnet_t ipsec_ifp; + u_int32_t ipsec_unit; + u_int32_t ipsec_flags; + int ipsec_ext_ifdata_stats; +}; -extern void etimer_set_deadline(uint64_t deadline); -#if defined(i386) || defined(x86_64) -extern uint64_t setPop(uint64_t time); -#else -extern int setPop(uint64_t time); -#endif +errno_t ipsec_register_control(void); -extern void etimer_resync_deadlines(void); +/* Helpers */ +int ipsec_interface_isvalid (ifnet_t interface); -extern uint32_t etimer_queue_migrate(int target_cpu); +#endif -/* Kernel trace events associated with timers */ -#define DECR_TRAP_LATENCY MACHDBG_CODE(DBG_MACH_EXCP_DECI, 0) -#define DECR_SET_DEADLINE MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) -#define DECR_TIMER_CALLOUT MACHDBG_CODE(DBG_MACH_EXCP_DECI, 2) -#define DECR_PM_DEADLINE MACHDBG_CODE(DBG_MACH_EXCP_DECI, 3) -#define DECR_TIMER_MIGRATE MACHDBG_CODE(DBG_MACH_EXCP_DECI, 4) -#define DECR_RDHPET MACHDBG_CODE(DBG_MACH_EXCP_DECI, 5) -#define DECR_SET_TSC_DEADLINE MACHDBG_CODE(DBG_MACH_EXCP_DECI, 6) +/* + * Name registered by the ipsec kernel control + */ +#define IPSEC_CONTROL_NAME "com.apple.net.ipsec_control" -#endif /* _KERN_ETIMER_H_ */ +/* + * Socket option names to manage ipsec + */ +#define IPSEC_OPT_FLAGS 1 +#define IPSEC_OPT_IFNAME 2 +#define IPSEC_OPT_EXT_IFDATA_STATS 3 /* get|set (type int) */ +#define IPSEC_OPT_INC_IFDATA_STATS_IN 4 /* set to increment stat counters (type struct ipsec_stats_param) */ +#define IPSEC_OPT_INC_IFDATA_STATS_OUT 5 /* set to increment stat counters (type struct ipsec_stats_param) */ +#define IPSEC_OPT_SET_DELEGATE_INTERFACE 6 /* set the delegate interface (char[]) */ +/* + * ipsec stats parameter structure + */ +struct ipsec_stats_param { + u_int64_t utsp_packets; + u_int64_t utsp_bytes; + u_int64_t utsp_errors; +}; -#endif /* KERNEL_PRIVATE */ +#endif diff --git a/bsd/net/if_llreach.c b/bsd/net/if_llreach.c index db81aa083..2012d8c05 100644 --- a/bsd/net/if_llreach.c +++ b/bsd/net/if_llreach.c @@ -117,6 +117,8 @@ #include #include +#include + #include #include #include @@ -165,7 +167,7 @@ SYSCTL_NODE(_net_link_generic_system, OID_AUTO, llreach_info, #define LL_MIN_RANDOM_FACTOR 512 /* 1024 * 0.5 */ #define LL_MAX_RANDOM_FACTOR 1536 /* 1024 * 1.5 */ #define LL_COMPUTE_RTIME(x) \ - (((LL_MIN_RANDOM_FACTOR * (x >> 10)) + (random() & \ + (((LL_MIN_RANDOM_FACTOR * (x >> 10)) + (RandomULong() & \ ((LL_MAX_RANDOM_FACTOR - LL_MIN_RANDOM_FACTOR) * (x >> 10)))) / 1000) #endif /* !INET6 */ @@ -290,7 +292,7 @@ ifnet_llreach_alloc(struct ifnet *ifp, u_int16_t llproto, void *addr, unsigned int alen, u_int64_t llreach_base) { struct if_llreach find, *lr; - struct timeval now; + struct timeval cnow; if (llreach_base == 0) return (NULL); @@ -337,8 +339,8 @@ found: IFLR_ADDREF_LOCKED(lr); /* for caller */ lr->lr_lastrcvd = net_uptime(); /* current approx. uptime */ lr->lr_baseup = lr->lr_lastrcvd; /* base uptime */ - microtime(&now); - lr->lr_basecal = now.tv_sec; /* base calendar time */ + getmicrotime(&cnow); + lr->lr_basecal = cnow.tv_sec; /* base calendar time */ lr->lr_basereachable = llreach_base; lr->lr_reachable = LL_COMPUTE_RTIME(lr->lr_basereachable * 1000); lr->lr_debug |= IFD_ATTACHED; diff --git a/bsd/net/if_loop.c b/bsd/net/if_loop.c index d051c7611..f659d3582 100644 --- a/bsd/net/if_loop.c +++ b/bsd/net/if_loop.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -120,8 +120,8 @@ #define LO_BPF_TAP_OUT(_m) { \ if (lo_statics[0].bpf_callback != NULL) { \ bpf_tap_out(lo_ifp, DLT_NULL, _m, \ - &((struct loopback_header *)_m->m_pkthdr.header)->protocol,\ - sizeof (u_int32_t)); \ + &((struct loopback_header *)_m->m_pkthdr.pkt_hdr)-> \ + protocol, sizeof (u_int32_t)); \ } \ } @@ -151,15 +151,9 @@ struct loopback_header { void loopattach(void); static errno_t lo_demux(struct ifnet *, struct mbuf *, char *, protocol_family_t *); -#if !KPI_INTERFACE_EMBEDDED -static errno_t lo_framer(struct ifnet *, struct mbuf **, - const struct sockaddr *, - const char *, const char *); -#else static errno_t lo_framer(struct ifnet *, struct mbuf **, const struct sockaddr *, const char *, const char *, u_int32_t *, u_int32_t *); -#endif static errno_t lo_add_proto(struct ifnet *, protocol_family_t, const struct ifnet_demux_desc *, u_int32_t); static errno_t lo_del_proto(struct ifnet *, protocol_family_t); @@ -221,16 +215,10 @@ lo_demux(struct ifnet *ifp, struct mbuf *m, char *frame_header, return (0); } -#if !KPI_INTERFACE_EMBEDDED -static errno_t -lo_framer(struct ifnet *ifp, struct mbuf **m, const struct sockaddr *dest, - const char *dest_linkaddr, const char *frame_type) -#else static errno_t lo_framer(struct ifnet *ifp, struct mbuf **m, const struct sockaddr *dest, const char *dest_linkaddr, const char *frame_type, u_int32_t *prepend_len, u_int32_t *postpend_len) -#endif { #pragma unused(ifp, dest, dest_linkaddr) struct loopback_header *header; @@ -241,10 +229,10 @@ lo_framer(struct ifnet *ifp, struct mbuf **m, const struct sockaddr *dest, return (EJUSTRETURN); } -#if KPI_INTERFACE_EMBEDDED - *prepend_len = sizeof (struct loopback_header); - *postpend_len = 0; -#endif /* KPI_INTERFACE_EMBEDDED */ + if (prepend_len != NULL) + *prepend_len = sizeof (struct loopback_header); + if (postpend_len != NULL) + *postpend_len = 0; header = mtod(*m, struct loopback_header *); bcopy(frame_type, &header->protocol, sizeof (u_int32_t)); @@ -281,8 +269,7 @@ lo_output(struct ifnet *ifp, struct mbuf *m_list) bzero(&s, sizeof(s)); for (m = m_list; m; m = m->m_nextpkt) { - if ((m->m_flags & M_PKTHDR) == 0) - panic("lo_output: no HDR"); + VERIFY(m->m_flags & M_PKTHDR); cnt++; len += m->m_pkthdr.len; @@ -294,14 +281,15 @@ lo_output(struct ifnet *ifp, struct mbuf *m_list) if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = ifp; - m->m_pkthdr.header = mtod(m, char *); - if (apple_hwcksum_tx != 0) { - /* loopback checksums are always OK */ - m->m_pkthdr.csum_data = 0xffff; - m->m_pkthdr.csum_flags = - CSUM_DATA_VALID | CSUM_PSEUDO_HDR | - CSUM_IP_CHECKED | CSUM_IP_VALID; - } + m->m_pkthdr.pkt_flags |= PKTF_LOOP; + m->m_pkthdr.pkt_hdr = mtod(m, char *); + + /* loopback checksums are always OK */ + m->m_pkthdr.csum_data = 0xffff; + m->m_pkthdr.csum_flags = + CSUM_DATA_VALID | CSUM_PSEUDO_HDR | + CSUM_IP_CHECKED | CSUM_IP_VALID; + m_adj(m, sizeof (struct loopback_header)); LO_BPF_TAP_OUT(m); @@ -313,7 +301,7 @@ lo_output(struct ifnet *ifp, struct mbuf *m_list) s.packets_in = cnt; s.packets_out = cnt; s.bytes_in = len; - s.bytes_out = len; + s.bytes_out = len; return (ifnet_input_extended(ifp, m_list, m_tail, &s)); } @@ -330,7 +318,7 @@ lo_pre_enqueue(struct ifnet *ifp, struct mbuf *m0) int error = 0; while (m != NULL) { - VERIFY((m->m_flags & M_PKTHDR)); + VERIFY(m->m_flags & M_PKTHDR); n = m->m_nextpkt; m->m_nextpkt = NULL; @@ -343,14 +331,15 @@ lo_pre_enqueue(struct ifnet *ifp, struct mbuf *m0) if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = ifp; - m->m_pkthdr.header = mtod(m, char *); - if (apple_hwcksum_tx != 0) { - /* loopback checksums are always OK */ - m->m_pkthdr.csum_data = 0xffff; - m->m_pkthdr.csum_flags = - CSUM_DATA_VALID | CSUM_PSEUDO_HDR | - CSUM_IP_CHECKED | CSUM_IP_VALID; - } + m->m_pkthdr.pkt_flags |= PKTF_LOOP; + m->m_pkthdr.pkt_hdr = mtod(m, char *); + + /* loopback checksums are always OK */ + m->m_pkthdr.csum_data = 0xffff; + m->m_pkthdr.csum_flags = + CSUM_DATA_VALID | CSUM_PSEUDO_HDR | + CSUM_IP_CHECKED | CSUM_IP_VALID; + m_adj(m, sizeof (struct loopback_header)); /* @@ -438,10 +427,9 @@ lo_pre_output(struct ifnet *ifp, protocol_family_t protocol_family, #pragma unused(ifp, dst, dst_addr) struct rtentry *rt = route; - (*m)->m_flags |= M_LOOP; + VERIFY((*m)->m_flags & M_PKTHDR); - if (((*m)->m_flags & M_PKTHDR) == 0) - panic("looutput no HDR"); + (*m)->m_flags |= M_LOOP; if (rt != NULL) { u_int32_t rt_flags = rt->rt_flags; @@ -653,7 +641,7 @@ loopattach(void) lo_init.demux = lo_demux; lo_init.add_proto = lo_add_proto; lo_init.del_proto = lo_del_proto; - lo_init.framer = lo_framer; + lo_init.framer_extended = lo_framer; lo_init.softc = &lo_statics[0]; lo_init.ioctl = lo_ioctl; lo_init.set_bpf_tap = lo_set_bpf_tap; diff --git a/bsd/net/if_media.c b/bsd/net/if_media.c deleted file mode 100644 index 7c2e4be3f..000000000 --- a/bsd/net/if_media.c +++ /dev/null @@ -1,538 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* $NetBSD: if_media.c,v 1.1 1997/03/17 02:55:15 thorpej Exp $ */ -/* $FreeBSD: src/sys/net/if_media.c,v 1.9.2.4 2001/07/04 00:12:38 brooks Exp $ */ - -/* - * Copyright (c) 1997 - * Jonathan Stone and Jason R. Thorpe. All rights reserved. - * - * This software is derived from information provided by Matt Thomas. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Jonathan Stone - * and Jason R. Thorpe for the NetBSD Project. - * 4. The names of the authors may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * BSD/OS-compatible network interface media selection. - * - * Where it is safe to do so, this code strays slightly from the BSD/OS - * design. Software which uses the API (device drivers, basically) - * shouldn't notice any difference. - * - * Many thanks to Matt Thomas for providing the information necessary - * to implement this interface. - */ - -#include -#include -#include -#include -#include - -#include -#include - -/* - * Compile-time options: - * IFMEDIA_DEBUG: - * turn on implementation-level debug printfs. - * Useful for debugging newly-ported drivers. - */ - -static struct ifmedia_entry *ifmedia_match(struct ifmedia *ifm, - int flags, int mask); - -#ifdef IFMEDIA_DEBUG -int ifmedia_debug = 0; -static void ifmedia_printword(int); -#endif - -/* - * Initialize if_media struct for a specific interface instance. - */ -void -ifmedia_init(ifm, dontcare_mask, change_callback, status_callback) - struct ifmedia *ifm; - int dontcare_mask; - ifm_change_cb_t change_callback; - ifm_stat_cb_t status_callback; -{ - - LIST_INIT(&ifm->ifm_list); - ifm->ifm_cur = NULL; - ifm->ifm_media = 0; - ifm->ifm_mask = dontcare_mask; /* IF don't-care bits */ - ifm->ifm_change = change_callback; - ifm->ifm_status = status_callback; -} - -void -ifmedia_removeall(ifm) - struct ifmedia *ifm; -{ - struct ifmedia_entry *entry; - - for (entry = LIST_FIRST(&ifm->ifm_list); entry; - entry = LIST_FIRST(&ifm->ifm_list)) { - LIST_REMOVE(entry, ifm_list); - FREE(entry, M_IFADDR); - } -} - -/* - * Add a media configuration to the list of supported media - * for a specific interface instance. - */ -void -ifmedia_add(ifm, mword, data, aux) - struct ifmedia *ifm; - int mword; - int data; - void *aux; -{ - register struct ifmedia_entry *entry; - -#ifdef IFMEDIA_DEBUG - if (ifmedia_debug) { - if (ifm == NULL) { - printf("ifmedia_add: null ifm\n"); - return; - } - printf("Adding entry for "); - ifmedia_printword(mword); - } -#endif - - entry = _MALLOC(sizeof(*entry), M_IFADDR, M_NOWAIT); - if (entry == NULL) - panic("ifmedia_add: can't malloc entry"); - - entry->ifm_media = mword; - entry->ifm_data = data; - entry->ifm_aux = aux; - - LIST_INSERT_HEAD(&ifm->ifm_list, entry, ifm_list); -} - -/* - * Add an array of media configurations to the list of - * supported media for a specific interface instance. - */ -void -ifmedia_list_add(ifm, lp, count) - struct ifmedia *ifm; - struct ifmedia_entry *lp; - int count; -{ - int i; - - for (i = 0; i < count; i++) - ifmedia_add(ifm, lp[i].ifm_media, lp[i].ifm_data, - lp[i].ifm_aux); -} - -/* - * Set the default active media. - * - * Called by device-specific code which is assumed to have already - * selected the default media in hardware. We do _not_ call the - * media-change callback. - */ -void -ifmedia_set(ifm, target) - struct ifmedia *ifm; - int target; - -{ - struct ifmedia_entry *match; - - match = ifmedia_match(ifm, target, ifm->ifm_mask); - - if (match == NULL) { - printf("ifmedia_set: no match for 0x%x/0x%x\n", - target, ~ifm->ifm_mask); - panic("ifmedia_set"); - } - ifm->ifm_cur = match; - -#ifdef IFMEDIA_DEBUG - if (ifmedia_debug) { - printf("ifmedia_set: target "); - ifmedia_printword(target); - printf("ifmedia_set: setting to "); - ifmedia_printword(ifm->ifm_cur->ifm_media); - } -#endif -} - -/* - * Device-independent media ioctl support function. - */ -int -ifmedia_ioctl( - struct ifnet *ifp, - struct ifreq *ifr, - struct ifmedia *ifm, - u_long cmd) -{ - struct ifmedia_entry *match; - struct ifmediareq *ifmr = (struct ifmediareq *) ifr; - int error = 0, sticky; - - if (ifp == NULL || ifr == NULL || ifm == NULL) - return(EINVAL); - - switch (cmd) { - - /* - * Set the current media. - */ - case SIOCSIFMEDIA: - { - struct ifmedia_entry *oldentry; - int oldmedia; - int newmedia = ifr->ifr_media; - - match = ifmedia_match(ifm, newmedia, ifm->ifm_mask); - if (match == NULL) { -#ifdef IFMEDIA_DEBUG - if (ifmedia_debug) { - printf( - "ifmedia_ioctl: no media found for 0x%x\n", - newmedia); - } -#endif - return (ENXIO); - } - - /* - * If no change, we're done. - * XXX Automedia may invole software intervention. - * Keep going in case the the connected media changed. - * Similarly, if best match changed (kernel debugger?). - */ - if ((IFM_SUBTYPE(newmedia) != IFM_AUTO) && - (newmedia == ifm->ifm_media) && - (match == ifm->ifm_cur)) - return 0; - - /* - * We found a match, now make the driver switch to it. - * Make sure to preserve our old media type in case the - * driver can't switch. - */ -#ifdef IFMEDIA_DEBUG - if (ifmedia_debug) { - printf("ifmedia_ioctl: switching %s to ", - ifp->if_xname); - ifmedia_printword(match->ifm_media); - } -#endif - oldentry = ifm->ifm_cur; - oldmedia = ifm->ifm_media; - ifm->ifm_cur = match; - ifm->ifm_media = newmedia; - error = (*ifm->ifm_change)(ifp); - if (error) { - ifm->ifm_cur = oldentry; - ifm->ifm_media = oldmedia; - } - break; - } - - /* - * Get list of available media and current media on interface. - */ - case SIOCGIFMEDIA: - { - struct ifmedia_entry *ep; - int *kptr, count; - int usermax; /* user requested max */ - - kptr = NULL; /* XXX gcc */ - - ifmr->ifm_active = ifmr->ifm_current = ifm->ifm_cur ? - ifm->ifm_cur->ifm_media : IFM_NONE; - ifmr->ifm_mask = ifm->ifm_mask; - ifmr->ifm_status = 0; - (*ifm->ifm_status)(ifp, ifmr); - - count = 0; - usermax = 0; - - /* - * If there are more interfaces on the list, count - * them. This allows the caller to set ifmr->ifm_count - * to 0 on the first call to know how much space to - * allocate. - */ - LIST_FOREACH(ep, &ifm->ifm_list, ifm_list) - usermax++; - - /* - * Don't allow the user to ask for too many - * or a negative number. - */ - if (ifmr->ifm_count > usermax) - ifmr->ifm_count = usermax; - else if (ifmr->ifm_count < 0) - return (EINVAL); - - if (ifmr->ifm_count != 0) { - kptr = (int *) _MALLOC(ifmr->ifm_count * sizeof(int), - M_TEMP, M_WAITOK); - if (kptr == NULL) - return ENOBUFS; - - /* - * Get the media words from the interface's list. - */ - ep = LIST_FIRST(&ifm->ifm_list); - for (; ep != NULL && count < ifmr->ifm_count; - ep = LIST_NEXT(ep, ifm_list), count++) - kptr[count] = ep->ifm_media; - - if (ep != NULL) - error = E2BIG; /* oops! */ - } else { - count = usermax; - } - - /* - * We do the copyout on E2BIG, because that's - * just our way of telling userland that there - * are more. This is the behavior I've observed - * under BSD/OS 3.0 - */ - sticky = error; - if ((error == 0 || error == E2BIG) && ifmr->ifm_count != 0) { - error = copyout((caddr_t)kptr, - CAST_USER_ADDR_T(ifmr->ifm_ulist), - ifmr->ifm_count * sizeof(int)); - } - - if (error == 0) - error = sticky; - - if (ifmr->ifm_count != 0) - FREE(kptr, M_TEMP); - - ifmr->ifm_count = count; - break; - } - - default: - return (EINVAL); - } - - return (error); -} - -/* - * Find media entry matching a given ifm word. - * - */ -static struct ifmedia_entry * -ifmedia_match(ifm, target, mask) - struct ifmedia *ifm; - int target; - int mask; -{ - struct ifmedia_entry *match, *next; - - match = NULL; - mask = ~mask; - - LIST_FOREACH(next, &ifm->ifm_list, ifm_list) { - if ((next->ifm_media & mask) == (target & mask)) { -#if defined(IFMEDIA_DEBUG) || defined(DIAGNOSTIC) - if (match) { - printf("ifmedia_match: multiple match for " - "0x%x/0x%x\n", target, mask); - } -#endif - match = next; - } - } - - return match; -} - -#ifdef IFMEDIA_DEBUG -struct ifmedia_description ifm_type_descriptions[] = - IFM_TYPE_DESCRIPTIONS; - -struct ifmedia_description ifm_subtype_ethernet_descriptions[] = - IFM_SUBTYPE_ETHERNET_DESCRIPTIONS; - -struct ifmedia_description ifm_subtype_ethernet_option_descriptions[] = - IFM_SUBTYPE_ETHERNET_OPTION_DESCRIPTIONS; - -struct ifmedia_description ifm_subtype_tokenring_descriptions[] = - IFM_SUBTYPE_TOKENRING_DESCRIPTIONS; - -struct ifmedia_description ifm_subtype_tokenring_option_descriptions[] = - IFM_SUBTYPE_TOKENRING_OPTION_DESCRIPTIONS; - -struct ifmedia_description ifm_subtype_fddi_descriptions[] = - IFM_SUBTYPE_FDDI_DESCRIPTIONS; - -struct ifmedia_description ifm_subtype_fddi_option_descriptions[] = - IFM_SUBTYPE_FDDI_OPTION_DESCRIPTIONS; - -struct ifmedia_description ifm_subtype_80211_descriptions[] = - IFM_SUBTYPE_IEEE80211_DESCRIPTIONS; - -struct ifmedia_description ifm_subtype_80211_option_descriptions[] = - IFM_SUBTYPE_IEEE80211_OPTION_DESCRIPTIONS; - -struct ifmedia_description ifm_subtype_shared_descriptions[] = - IFM_SUBTYPE_SHARED_DESCRIPTIONS; - -struct ifmedia_description ifm_shared_option_descriptions[] = - IFM_SHARED_OPTION_DESCRIPTIONS; - -struct ifmedia_type_to_subtype { - struct ifmedia_description *subtypes; - struct ifmedia_description *options; -}; - -/* must be in the same order as IFM_TYPE_DESCRIPTIONS */ -struct ifmedia_type_to_subtype ifmedia_types_to_subtypes[] = { - { - &ifm_subtype_ethernet_descriptions[0], - &ifm_subtype_ethernet_option_descriptions[0] - }, - { - &ifm_subtype_tokenring_descriptions[0], - &ifm_subtype_tokenring_option_descriptions[0] - }, - { - &ifm_subtype_fddi_descriptions[0], - &ifm_subtype_fddi_option_descriptions[0] - }, - { - &ifm_subtype_80211_descriptions[0], - &ifm_subtype_80211_option_descriptions[0] - }, -}; - -/* - * print a media word. - */ -static void -ifmedia_printword(ifmw) - int ifmw; -{ - struct ifmedia_description *desc; - struct ifmedia_type_to_subtype *ttos; - int seen_option = 0; - - /* Find the top-level interface type. */ - for (desc = ifm_type_descriptions, ttos = ifmedia_types_to_subtypes; - desc->ifmt_string != NULL; desc++, ttos++) - if (IFM_TYPE(ifmw) == desc->ifmt_word) - break; - if (desc->ifmt_string == NULL) { - printf("\n"); - return; - } - printf(desc->ifmt_string); - - /* - * Check for the shared subtype descriptions first, then the - * type-specific ones. - */ - for (desc = ifm_subtype_shared_descriptions; - desc->ifmt_string != NULL; desc++) - if (IFM_SUBTYPE(ifmw) == desc->ifmt_word) - goto got_subtype; - - for (desc = ttos->subtypes; desc->ifmt_string != NULL; desc++) - if (IFM_SUBTYPE(ifmw) == desc->ifmt_word) - break; - if (desc->ifmt_string == NULL) { - printf(" \n"); - return; - } - - got_subtype: - printf(" %s", desc->ifmt_string); - - /* - * Look for shared options. - */ - for (desc = ifm_shared_option_descriptions; - desc->ifmt_string != NULL; desc++) { - if (ifmw & desc->ifmt_word) { - if (seen_option == 0) - printf(" <"); - printf("%s%s", seen_option++ ? "," : "", - desc->ifmt_string); - } - } - - /* - * Look for subtype-specific options. - */ - for (desc = ttos->options; desc->ifmt_string != NULL; desc++) { - if (ifmw & desc->ifmt_word) { - if (seen_option == 0) - printf(" <"); - printf("%s%s", seen_option++ ? "," : "", - desc->ifmt_string); - } - } - printf("%s\n", seen_option ? ">" : ""); -} -#endif /* IFMEDIA_DEBUG */ diff --git a/bsd/net/if_media.h b/bsd/net/if_media.h index 12cbc871b..afba92a43 100644 --- a/bsd/net/if_media.h +++ b/bsd/net/if_media.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -79,55 +79,8 @@ */ #ifdef KERNEL_PRIVATE +/* sigh; some modules are lazy and thus rely on this */ #include - -/* - * Driver callbacks for media status and change requests. - */ -typedef int (*ifm_change_cb_t)(struct ifnet *ifp); -typedef void (*ifm_stat_cb_t)(struct ifnet *ifp, struct ifmediareq *req); - -/* - * In-kernel representation of a single supported media type. - */ -struct ifmedia_entry { - LIST_ENTRY(ifmedia_entry) ifm_list; - int ifm_media; /* description of this media attachment */ - int ifm_data; /* for driver-specific use */ - void *ifm_aux; /* for driver-specific use */ -}; - -/* - * One of these goes into a network interface's softc structure. - * It is used to keep general media state. - */ -struct ifmedia { - int ifm_mask; /* mask of changes we don't care about */ - int ifm_media; /* current user-set media word */ - struct ifmedia_entry *ifm_cur; /* currently selected media */ - LIST_HEAD(, ifmedia_entry) ifm_list; /* list of all supported media */ - ifm_change_cb_t ifm_change; /* media change driver callback */ - ifm_stat_cb_t ifm_status; /* media status driver callback */ -}; - -/* Initialize an interface's struct if_media field. */ -void ifmedia_init(struct ifmedia *ifm, int dontcare_mask, - ifm_change_cb_t change_callback, ifm_stat_cb_t status_callback); - -/* Add one supported medium to a struct ifmedia. */ -void ifmedia_add(struct ifmedia *ifm, int mword, int data, void *aux); - -/* Add an array (of ifmedia_entry) media to a struct ifmedia. */ -void ifmedia_list_add(struct ifmedia *mp, struct ifmedia_entry *lp, - int count); - -/* Set default media type on initialization. */ -void ifmedia_set(struct ifmedia *ifm, int mword); - -/* Common ioctl function for getting/setting media, called by driver. */ -int ifmedia_ioctl(struct ifnet *ifp, struct ifreq *ifr, - struct ifmedia *ifm, uint32_t cmd); - #endif /* KERNEL_PRIVATE */ /* @@ -242,6 +195,7 @@ int ifmedia_ioctl(struct ifnet *ifp, struct ifreq *ifr, */ #define IFM_AVALID 0x00000001 /* Active bit valid */ #define IFM_ACTIVE 0x00000002 /* Interface attached to working net */ +#define IFM_WAKESAMENET 0x00000004 /* No link transition while asleep */ /* * Macros to extract various bits of information from the media word. diff --git a/bsd/net/if_mib.c b/bsd/net/if_mib.c index a7fd2db5c..92544dd1e 100644 --- a/bsd/net/if_mib.c +++ b/bsd/net/if_mib.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -125,8 +125,8 @@ make_ifmibdata(struct ifnet *ifp, int *name, struct sysctl_req *req) * Make sure the interface is in use */ if (ifnet_is_attached(ifp, 0)) { - snprintf(ifmd.ifmd_name, sizeof(ifmd.ifmd_name), "%s%d", - ifp->if_name, ifp->if_unit); + snprintf(ifmd.ifmd_name, sizeof(ifmd.ifmd_name), "%s", + if_name(ifp)); #define COPY(fld) ifmd.ifmd_##fld = ifp->if_##fld COPY(pcount); @@ -189,7 +189,12 @@ make_ifmibdata(struct ifnet *ifp, int *name, struct sysctl_req *req) if_copy_packet_stats(ifp, &ifmd_supp->ifmd_packet_stats); if_copy_rxpoll_stats(ifp, &ifmd_supp->ifmd_rxpoll_stats); - error = SYSCTL_OUT(req, ifmd_supp, sizeof (*ifmd_supp)); + if (req->oldptr == USER_ADDR_NULL) + req->oldlen = sizeof (*ifmd_supp); + + error = SYSCTL_OUT(req, ifmd_supp, MIN(sizeof (*ifmd_supp), + req->oldlen)); + _FREE(ifmd_supp, M_TEMP); break; } diff --git a/bsd/net/if_pflog.c b/bsd/net/if_pflog.c index 18d6435e0..3bc86c820 100644 --- a/bsd/net/if_pflog.c +++ b/bsd/net/if_pflog.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2010 Apple Inc. All rights reserved. + * Copyright (c) 2007-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -222,8 +222,7 @@ pflog_clone_destroy(struct ifnet *ifp) static errno_t pflogoutput(struct ifnet *ifp, struct mbuf *m) { - printf("%s: freeing data for %s%d\n", __func__, ifp->if_name, - ifp->if_unit); + printf("%s: freeing data for %s\n", __func__, if_name(ifp)); m_freem(m); return (ENOTSUP); } @@ -253,8 +252,7 @@ static errno_t pflogdemux(struct ifnet *ifp, struct mbuf *m, char *h, protocol_family_t *ppf) { #pragma unused(h, ppf) - printf("%s: freeing data for %s%d\n", __func__, ifp->if_name, - ifp->if_unit); + printf("%s: freeing data for %s\n", __func__, if_name(ifp)); m_freem(m); return (EJUSTRETURN); } diff --git a/bsd/net/if_pppvar.h b/bsd/net/if_pppvar.h deleted file mode 100644 index a48282763..000000000 --- a/bsd/net/if_pppvar.h +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * if_pppvar.h - private structures and declarations for PPP. - * - * Copyright (c) 1994 The Australian National University. - * All rights reserved. - * - * Permission to use, copy, modify, and distribute this software and its - * documentation is hereby granted, provided that the above copyright - * notice appears in all copies. This software is provided without any - * warranty, express or implied. The Australian National University - * makes no representations about the suitability of this software for - * any purpose. - * - * IN NO EVENT SHALL THE AUSTRALIAN NATIONAL UNIVERSITY BE LIABLE TO ANY - * PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES - * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF - * THE AUSTRALIAN NATIONAL UNIVERSITY HAVE BEEN ADVISED OF THE POSSIBILITY - * OF SUCH DAMAGE. - * - * THE AUSTRALIAN NATIONAL UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS - * ON AN "AS IS" BASIS, AND THE AUSTRALIAN NATIONAL UNIVERSITY HAS NO - * OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, - * OR MODIFICATIONS. - * - * Copyright (c) 1989 Carnegie Mellon University. - * All rights reserved. - * - * Redistribution and use in source and binary forms are permitted - * provided that the above copyright notice and this paragraph are - * duplicated in all such forms and that any documentation, - * advertising materials, and other materials related to such - * distribution and use acknowledge that the software was developed - * by Carnegie Mellon University. The name of the - * University may not be used to endorse or promote products derived - * from this software without specific prior written permission. - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - * - * $FreeBSD: src/sys/net/if_pppvar.h,v 1.15 2000/01/29 16:56:23 peter Exp $ - */ - -#include -#ifndef DONT_WARN_OBSOLETE -#warning if_pppvar.h is not used by the darwin kernel -#endif - -#ifdef KERNEL_PRIVATE - -/* - * Supported network protocols. These values are used for - * indexing sc_npmode. - */ -#define NP_IP 0 /* Internet Protocol */ -#define NUM_NP 1 /* Number of NPs. */ - -/* - * Structure describing each ppp unit. - */ -struct ppp_softc { - struct ifnet sc_if; /* network-visible interface */ -/*hi*/ u_int sc_flags; /* control/status bits; see if_ppp.h */ - struct callout_handle sc_ch; /* Used for scheduling timeouts */ - void *sc_devp; /* pointer to device-dep structure */ - void (*sc_start)(struct ppp_softc *); /* start output proc */ - void (*sc_ctlp)(struct ppp_softc *); /* rcvd control pkt */ - void (*sc_relinq)(struct ppp_softc *); /* relinquish ifunit */ - void (*sc_setmtu)(struct ppp_softc *); /* set mtu */ - short sc_mru; /* max receive unit */ - pid_t sc_xfer; /* used in transferring unit */ -/*hi*/ struct ifqueue sc_rawq; /* received packets */ -/*net*/ struct ifqueue sc_inq; /* queue of input packets for daemon */ -/*net*/ struct ifqueue sc_fastq; /* interactive output packet q */ - struct mbuf *sc_npqueue; /* output packets not to be sent yet */ - struct mbuf **sc_npqtail; /* ptr to last next ptr in npqueue */ - struct pppstat sc_stats; /* count of bytes/pkts sent/rcvd */ - enum NPmode sc_npmode[NUM_NP]; /* what to do with each NP */ - struct compressor *sc_xcomp; /* transmit compressor */ - void *sc_xc_state; /* transmit compressor state */ - struct compressor *sc_rcomp; /* receive decompressor */ - void *sc_rc_state; /* receive decompressor state */ - time_t sc_last_sent; /* time (secs) last NP pkt sent */ - time_t sc_last_recv; /* time (secs) last NP pkt rcvd */ -#if PPP_FILTER - struct bpf_program sc_pass_filt; /* filter for packets to pass */ - struct bpf_program sc_active_filt; /* filter for "non-idle" packets */ -#endif /* PPP_FILTER */ -#if VJC - struct slcompress *sc_comp; /* vjc control buffer */ -#endif - - /* Device-dependent part for async lines. */ - ext_accm sc_asyncmap; /* async control character map */ - u_int32_t sc_rasyncmap; /* receive async control char map */ - struct mbuf *sc_outm; /* mbuf chain currently being output */ - struct mbuf *sc_m; /* pointer to input mbuf chain */ - struct mbuf *sc_mc; /* pointer to current input mbuf */ - char *sc_mp; /* ptr to next char in input mbuf */ - short sc_ilen; /* length of input packet so far */ - u_short sc_fcs; /* FCS so far (input) */ - u_short sc_outfcs; /* FCS so far for output packet */ - u_char sc_rawin[16]; /* chars as received */ - int sc_rawin_count; /* # in sc_rawin */ -}; - -extern struct ppp_softc ppp_softc[]; - -struct ppp_softc *pppalloc(pid_t pid); -void pppdealloc(struct ppp_softc *sc); -int pppioctl(struct ppp_softc *sc, u_long cmd, caddr_t data, - int flag, struct proc *p); -int pppoutput(struct ifnet *ifp, struct mbuf *m0, - struct sockaddr *dst, struct rtentry *rtp); -void ppp_restart(struct ppp_softc *sc); -void ppppktin(struct ppp_softc *sc, struct mbuf *m, int lost); -struct mbuf *ppp_dequeue(struct ppp_softc *sc); -#endif /* __APPLE_API_PRIVATE */ diff --git a/bsd/net/if_stf.c b/bsd/net/if_stf.c index 41d1c15db..bf29b91f6 100644 --- a/bsd/net/if_stf.c +++ b/bsd/net/if_stf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -175,15 +175,16 @@ static int stf_init_done; static void in_stf_input(struct mbuf *, int); static void stfinit(void); -extern struct domain inetdomain; -struct protosw in_stf_protosw = -{ SOCK_RAW, &inetdomain, IPPROTO_IPV6, PR_ATOMIC|PR_ADDR, - in_stf_input, NULL, NULL, rip_ctloutput, - NULL, - NULL, NULL, NULL, NULL, - NULL, - &rip_usrreqs, - NULL, rip_unlock, NULL, {NULL, NULL}, NULL, {0} + +static struct protosw in_stf_protosw = +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_IPV6, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = in_stf_input, + .pr_ctloutput = rip_ctloutput, + .pr_usrreqs = &rip_usrreqs, + .pr_unlock = rip_unlock, }; static int stf_encapcheck(const struct mbuf *, int, int, void *); @@ -524,7 +525,8 @@ stf_pre_output( struct ip6_hdr *ip6; struct in6_ifaddr *ia6; struct sockaddr_in *dst4; - struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF }; + struct ip_out_args ipoa = + { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF, 0 }; errno_t result = 0; sc = ifnet_softc(ifp); @@ -604,20 +606,16 @@ stf_pre_output( lck_mtx_lock(&sc->sc_ro_mtx); dst4 = (struct sockaddr_in *)(void *)&sc->sc_ro.ro_dst; - if (dst4->sin_family != AF_INET || + if (ROUTE_UNUSABLE(&sc->sc_ro) || dst4->sin_family != AF_INET || bcmp(&dst4->sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)) != 0) { + ROUTE_RELEASE(&sc->sc_ro); /* cache route doesn't match: always the case during the first use */ dst4->sin_family = AF_INET; dst4->sin_len = sizeof(struct sockaddr_in); bcopy(&ip->ip_dst, &dst4->sin_addr, sizeof(dst4->sin_addr)); - if (sc->sc_ro.ro_rt) { - rtfree(sc->sc_ro.ro_rt); - sc->sc_ro.ro_rt = NULL; - } } - result = ip_output_list(m, 0, NULL, &sc->sc_ro, IP_OUTARGS, NULL, - &ipoa); + result = ip_output(m, NULL, &sc->sc_ro, IP_OUTARGS, NULL, &ipoa); lck_mtx_unlock(&sc->sc_ro_mtx); /* Assumption: ip_output will free mbuf on errors */ @@ -893,7 +891,7 @@ stf_ioctl( break; default: - error = EINVAL; + error = EOPNOTSUPP; break; } diff --git a/bsd/net/if_types.h b/bsd/net/if_types.h index b3f8e5b65..4d9697275 100644 --- a/bsd/net/if_types.h +++ b/bsd/net/if_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -127,27 +127,24 @@ #define IFT_SMDSICIP 0x34 /* SMDS InterCarrier Interface */ #define IFT_PROPVIRTUAL 0x35 /* Proprietary Virtual/internal */ #define IFT_PROPMUX 0x36 /* Proprietary Multiplexing */ +/* + * IFT_GIF, IFT_FAITH and IFT_FAITH are not based on IANA assignments. + * Note: IFT_STF has a defined ifType: 0xd7 (215), but we use 0x39. + */ #define IFT_GIF 0x37 /*0xf0*/ #define IFT_FAITH 0x38 /*0xf2*/ #define IFT_STF 0x39 /*0xf3*/ + #define IFT_L2VLAN 0x87 /* Layer 2 Virtual LAN using 802.1Q */ #define IFT_IEEE8023ADLAG 0x88 /* IEEE802.3ad Link Aggregate */ #define IFT_IEEE1394 0x90 /* IEEE1394 High Performance SerialBus*/ #define IFT_BRIDGE 0xd1 /* Transparent bridge interface */ -/* - * These are not based on IANA assignments: - * Note: IFT_STF has a defined ifType: 0xd7 (215), but we use 0x39. - */ -#define IFT_GIF 0x37 /*0xf0*/ -#define IFT_FAITH 0x38 /*0xf2*/ -#define IFT_STF 0x39 /*0xf3*/ - #define IFT_ENC 0xf4 /* Encapsulation */ #define IFT_PFLOG 0xf5 /* Packet filter logging */ #define IFT_PFSYNC 0xf6 /* Packet filter state syncing */ #define IFT_CARP 0xf8 /* Common Address Redundancy Protocol */ - +#define IFT_PKTAP 0xfe /* Packet tap pseudo interface */ #define IFT_CELLULAR 0xff /* Packet Data over Cellular */ #define IFT_PDP IFT_CELLULAR /* deprecated; use IFT_CELLULAR */ diff --git a/bsd/net/if_utun.c b/bsd/net/if_utun.c index 8f766ba3c..c4fe86099 100644 --- a/bsd/net/if_utun.c +++ b/bsd/net/if_utun.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2011 Apple Inc. All rights reserved. + * Copyright (c) 2008-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -71,14 +71,9 @@ static errno_t utun_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *uniti static errno_t utun_output(ifnet_t interface, mbuf_t data); static errno_t utun_demux(ifnet_t interface, mbuf_t data, char *frame_header, protocol_family_t *protocol); -static errno_t utun_framer(ifnet_t interface, mbuf_t *packet, - const struct sockaddr *dest, const char *desk_linkaddr, - const char *frame_type -#if KPI_INTERFACE_EMBEDDED - , - u_int32_t *prepend_len, u_int32_t *postpend_len -#endif /* KPI_INTERFACE_EMBEDDED */ - ); +static errno_t utun_framer(ifnet_t interface, mbuf_t *packet, + const struct sockaddr *dest, const char *desk_linkaddr, + const char *frame_type, u_int32_t *prepend_len, u_int32_t *postpend_len); static errno_t utun_add_proto(ifnet_t interface, protocol_family_t protocol, const struct ifnet_demux_desc *demux_array, u_int32_t demux_count); @@ -142,14 +137,16 @@ utun_register_control(void) strncpy(kern_ctl.ctl_name, UTUN_CONTROL_NAME, sizeof(kern_ctl.ctl_name)); kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0; kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED; /* Require root */ - kern_ctl.ctl_sendsize = 64 * 1024; - kern_ctl.ctl_recvsize = 64 * 1024; + kern_ctl.ctl_sendsize = 512 * 1024; + kern_ctl.ctl_recvsize = 512 * 1024; kern_ctl.ctl_connect = utun_ctl_connect; kern_ctl.ctl_disconnect = utun_ctl_disconnect; kern_ctl.ctl_send = utun_ctl_send; kern_ctl.ctl_setopt = utun_ctl_setopt; kern_ctl.ctl_getopt = utun_ctl_getopt; - + + utun_ctl_init_crypto(); + result = ctl_register(&kern_ctl, &utun_kctlref); if (result != 0) { printf("utun_register_control - ctl_register failed: %d\n", result); @@ -186,7 +183,7 @@ utun_ctl_connect( struct sockaddr_ctl *sac, void **unitinfo) { - struct ifnet_init_params utun_init; + struct ifnet_init_eparams utun_init; struct utun_pcb *pcb; errno_t result; struct ifnet_stats_param stats; @@ -206,20 +203,23 @@ utun_ctl_connect( /* Create the interface */ bzero(&utun_init, sizeof(utun_init)); + utun_init.ver = IFNET_INIT_CURRENT_VERSION; + utun_init.len = sizeof (utun_init); + utun_init.flags = IFNET_INIT_LEGACY; utun_init.name = "utun"; utun_init.unit = pcb->utun_unit - 1; utun_init.family = utun_family; utun_init.type = IFT_OTHER; utun_init.output = utun_output; utun_init.demux = utun_demux; - utun_init.framer = utun_framer; + utun_init.framer_extended = utun_framer; utun_init.add_proto = utun_add_proto; utun_init.del_proto = utun_del_proto; utun_init.softc = pcb; utun_init.ioctl = utun_ioctl; utun_init.detach = utun_detached; - result = ifnet_allocate(&utun_init, &pcb->utun_ifp); + result = ifnet_allocate_extended(&utun_init, &pcb->utun_ifp); if (result != 0) { printf("utun_ctl_connect - ifnet_allocate failed: %d\n", result); utun_free(pcb); @@ -454,6 +454,15 @@ utun_ctl_send( mbuf_t m, __unused int flags) { + /* + * The userland ABI requires the first four bytes have the protocol family + * in network byte order: swap them + */ + if (m_pktlen(m) >= 4) + *(protocol_family_t *)mbuf_data(m) = ntohl(*(protocol_family_t *)mbuf_data(m)); + else + printf("%s - unexpected short mbuf pkt len %d\n", __func__, m_pktlen(m) ); + return utun_pkt_input((struct utun_pcb *)unitinfo, m); } @@ -473,6 +482,7 @@ utun_ctl_setopt( switch (opt) { case UTUN_OPT_FLAGS: case UTUN_OPT_EXT_IFDATA_STATS: + case UTUN_OPT_SET_DELEGATE_INTERFACE: if (kauth_cred_issuser(kauth_cred_get()) == 0) { return EPERM; } @@ -511,6 +521,14 @@ utun_ctl_setopt( result = utun_ctl_start_crypto_data_traffic(kctlref, unit, unitinfo, opt, data, len); break; + case UTUN_OPT_CONFIG_CRYPTO_FRAMER: + result = utun_ctl_config_crypto_framer(kctlref, unit, unitinfo, opt, data, len); + break; + + case UTUN_OPT_UNCONFIG_CRYPTO_FRAMER: + result = utun_ctl_unconfig_crypto_framer(kctlref, unit, unitinfo, opt, data, len); + break; + case UTUN_OPT_EXT_IFDATA_STATS: if (len != sizeof(int)) { result = EMSGSIZE; @@ -539,8 +557,29 @@ utun_ctl_setopt( utsp->utsp_bytes, utsp->utsp_errors); break; } - - default: + + case UTUN_OPT_SET_DELEGATE_INTERFACE: { + ifnet_t del_ifp = NULL; + char name[IFNAMSIZ]; + + if (len > IFNAMSIZ - 1) { + result = EMSGSIZE; + break; + } + if (len != 0) { /* if len==0, del_ifp will be NULL causing the delegate to be removed */ + bcopy(data, name, len); + name[len] = 0; + result = ifnet_find_by_name(name, &del_ifp); + } + if (result == 0) { + result = ifnet_set_delegate(pcb->utun_ifp, del_ifp); + if (del_ifp) + ifnet_release(del_ifp); + } + break; + } + + default: result = ENOPROTOOPT; break; } @@ -600,7 +639,9 @@ utun_output( struct utun_pcb *pcb = ifnet_softc(interface); errno_t result; - bpf_tap_out(pcb->utun_ifp, DLT_NULL, data, 0, 0); + if (m_pktlen(data) >= 4) { + bpf_tap_out(pcb->utun_ifp, DLT_NULL, data, 0, 0); + } if (pcb->utun_flags & UTUN_FLAGS_NO_OUTPUT) { /* flush data */ @@ -619,6 +660,12 @@ utun_output( } } + /* + * The ABI requires the protocol in network byte order + */ + if (m_pktlen(data) >= 4) + *(u_int32_t *)mbuf_data(data) = htonl(*(u_int32_t *)mbuf_data(data)); + length = mbuf_pkthdr_len(data); result = ctl_enqueuembuf(pcb->utun_ctlref, pcb->utun_unit, data, CTL_DATA_EOR); if (result != 0) { @@ -654,7 +701,7 @@ utun_demux( if (data == NULL) return ENOENT; - *protocol = ntohl(*(u_int32_t *)mbuf_data(data)); + *protocol = *(u_int32_t *)mbuf_data(data); return 0; } @@ -664,13 +711,9 @@ utun_framer( mbuf_t *packet, __unused const struct sockaddr *dest, __unused const char *desk_linkaddr, - const char *frame_type -#if KPI_INTERFACE_EMBEDDED - , + const char *frame_type, u_int32_t *prepend_len, - u_int32_t *postpend_len -#endif /* KPI_INTERFACE_EMBEDDED */ - ) + u_int32_t *postpend_len) { if (mbuf_prepend(packet, sizeof(protocol_family_t), MBUF_DONTWAIT) != 0) { printf("utun_framer - ifnet_output prepend failed\n"); @@ -680,13 +723,13 @@ utun_framer( // just return, because the buffer was freed in mbuf_prepend return EJUSTRETURN; } -#if KPI_INTERFACE_EMBEDDED - *prepend_len = sizeof(protocol_family_t); - *postpend_len = 0; -#endif /* KPI_INTERFACE_EMBEDDED */ + if (prepend_len != NULL) + *prepend_len = sizeof(protocol_family_t); + if (postpend_len != NULL) + *postpend_len = 0; // place protocol number at the beginning of the mbuf - *(protocol_family_t *)mbuf_data(*packet) = htonl(*(protocol_family_t *)(uintptr_t)(size_t)frame_type); + *(protocol_family_t *)mbuf_data(*packet) = *(protocol_family_t *)(uintptr_t)(size_t)frame_type; return 0; } @@ -812,18 +855,20 @@ errno_t utun_pkt_input (struct utun_pcb *pcb, mbuf_t m) { errno_t result; - protocol_family_t protocol; + protocol_family_t protocol = 0; mbuf_pkthdr_setrcvif(m, pcb->utun_ifp); - bpf_tap_in(pcb->utun_ifp, DLT_NULL, m, 0, 0); - + if (m_pktlen(m) >= 4) { + protocol = *(u_int32_t *)mbuf_data(m); + + bpf_tap_in(pcb->utun_ifp, DLT_NULL, m, 0, 0); + } if (pcb->utun_flags & UTUN_FLAGS_NO_INPUT) { /* flush data */ mbuf_freem(m); return 0; } - protocol = ntohl(*(u_int32_t *)mbuf_data(m)); // quick exit for keepalive packets if (protocol == AF_UTUN && pcb->utun_flags & UTUN_FLAGS_CRYPTO) { diff --git a/bsd/net/if_utun.h b/bsd/net/if_utun.h index 32379a882..51122c980 100644 --- a/bsd/net/if_utun.h +++ b/bsd/net/if_utun.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2011 Apple Inc. All rights reserved. + * Copyright (c) 2008-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -30,7 +30,9 @@ #ifndef _NET_IF_UTUN_H_ #define _NET_IF_UTUN_H_ +#ifdef PRIVATE #include +#endif /* PRIVATE */ #ifdef KERNEL_PRIVATE @@ -65,6 +67,8 @@ errno_t utun_register_control(void); #define UTUN_OPT_EXT_IFDATA_STATS 3 /* get|set (type int) */ #define UTUN_OPT_INC_IFDATA_STATS_IN 4 /* set to increment stat counters (type struct utun_stats_param) */ #define UTUN_OPT_INC_IFDATA_STATS_OUT 5 /* set to increment stat counters (type struct utun_stats_param) */ + +#ifdef PRIVATE #define UTUN_OPT_ENABLE_CRYPTO 6 #define UTUN_OPT_CONFIG_CRYPTO_KEYS 7 #define UTUN_OPT_UNCONFIG_CRYPTO_KEYS 8 @@ -72,22 +76,22 @@ errno_t utun_register_control(void); #define UTUN_OPT_DISABLE_CRYPTO 10 #define UTUN_OPT_STOP_CRYPTO_DATA_TRAFFIC 11 #define UTUN_OPT_START_CRYPTO_DATA_TRAFFIC 12 +#define UTUN_OPT_CONFIG_CRYPTO_FRAMER 13 +#define UTUN_OPT_UNCONFIG_CRYPTO_FRAMER 14 +#endif /* PRIVATE */ + +#define UTUN_OPT_SET_DELEGATE_INTERFACE 15 /* set the delegate interface (char[]) */ /* * Flags for by UTUN_OPT_FLAGS */ #define UTUN_FLAGS_NO_OUTPUT 0x0001 #define UTUN_FLAGS_NO_INPUT 0x0002 + +#ifdef PRIVATE #define UTUN_FLAGS_CRYPTO 0x0004 #define UTUN_FLAGS_CRYPTO_STOP_DATA_TRAFFIC 0x0008 - -/* - * utun packet type flags - */ -#define UTUN_PKT_TYPE_KEEPALIVE 0x0001 -#define UTUN_PKT_TYPE_IPSEC 0x0002 -#define UTUN_PKT_TYPE_DTLS 0x0004 - +#endif /* PRIVATE */ /* * utun stats parameter structure diff --git a/bsd/net/if_utun_crypto.c b/bsd/net/if_utun_crypto.c index 176f4cd66..553d4874c 100644 --- a/bsd/net/if_utun_crypto.c +++ b/bsd/net/if_utun_crypto.c @@ -35,12 +35,21 @@ #include #include #include +#include + +void +utun_ctl_init_crypto (void) +{ + utun_ctl_init_crypto_dtls(); +} void utun_cleanup_crypto (struct utun_pcb *pcb) { +#if IPSEC utun_cleanup_all_crypto_ipsec(pcb); - // utun_cleanup_all_crypto_dtls(pcb); +#endif + utun_cleanup_all_crypto_dtls(pcb); pcb->utun_flags &= ~UTUN_FLAGS_CRYPTO; } @@ -86,8 +95,14 @@ utun_ctl_enable_crypto (__unused kern_ctl_ref kctlref, if (crypto_args->args_ulen != sizeof(crypto_args->u)) { printf("%s: compatibility mode\n", __FUNCTION__); } + +#if IPSEC if (crypto_args->type == UTUN_CRYPTO_TYPE_IPSEC) { utun_ctl_enable_crypto_ipsec(pcb, crypto_args); + } else +#endif + if (crypto_args->type == UTUN_CRYPTO_TYPE_DTLS) { + utun_ctl_enable_crypto_dtls(pcb, crypto_args); } else { // unsupported return EPROTONOSUPPORT; @@ -100,7 +115,10 @@ utun_ctl_enable_crypto (__unused kern_ctl_ref kctlref, crypto_ctx->type = crypto_args->type; LIST_INIT(&crypto_ctx->keys_listhead); + LIST_INIT(&crypto_ctx->framer_listheads[UTUN_CRYPTO_INNER_TYPE_TO_IDX(UTUN_CRYPTO_INNER_TYPE_IPv4)]); + LIST_INIT(&crypto_ctx->framer_listheads[UTUN_CRYPTO_INNER_TYPE_TO_IDX(UTUN_CRYPTO_INNER_TYPE_IPv6)]); crypto_ctx->valid = 1; + printf("%s: initialized framer lists\n", __FUNCTION__); } // data traffic is stopped by default pcb->utun_flags |= (UTUN_FLAGS_CRYPTO | UTUN_FLAGS_CRYPTO_STOP_DATA_TRAFFIC); @@ -150,8 +168,13 @@ utun_ctl_disable_crypto (__unused kern_ctl_ref kctlref, printf("%s: compatibility mode\n", __FUNCTION__); } +#if IPSEC if (crypto_args->type == UTUN_CRYPTO_TYPE_IPSEC) { utun_ctl_disable_crypto_ipsec(pcb); + } else +#endif + if (crypto_args->type == UTUN_CRYPTO_TYPE_DTLS) { + utun_ctl_disable_crypto_dtls(pcb); } else { // unsupported return EPROTONOSUPPORT; @@ -175,7 +198,7 @@ utun_ctl_config_crypto_keys (__unused kern_ctl_ref kctlref, * - verify the crypto material args passed from user-land. * - check the size of the argument buffer. * - check the direction (IN or OUT) - * - check the type (IPSec or DTLS) + * - check the type (IPSec only) * - crypto material direction and type must match the associated crypto context's. * - we can have a list of crypto materials per context. * - ensure that the crypto context is already valid (don't add crypto material to invalid context). @@ -225,13 +248,16 @@ utun_ctl_config_crypto_keys (__unused kern_ctl_ref kctlref, } // branch-off for ipsec vs. dtls +#if IPSEC if (crypto_keys_args->type == UTUN_CRYPTO_TYPE_IPSEC) { errno_t err; if ((err = utun_ctl_config_crypto_keys_ipsec(pcb, crypto_keys_args, crypto_keys))) { utun_free(crypto_keys); return err; } - } else { + } else +#endif + { // unsupported utun_free(crypto_keys); return EPROTONOSUPPORT; @@ -258,7 +284,7 @@ utun_ctl_unconfig_crypto_keys (__unused kern_ctl_ref kctlref, * - verify the crypto material args passed from user-land. * - check the size of the argument buffer. * - check the direction (IN or OUT) - * - check the type (IPSec or DTLS) + * - check the type (IPSec only) * - crypto material direction and type must match the associated crypto context's. * - we can have a list of crypto materials per context. * - ensure that the crypto context is already valid (don't add crypto material to invalid context). @@ -308,6 +334,7 @@ utun_ctl_unconfig_crypto_keys (__unused kern_ctl_ref kctlref, cur_crypto_keys = nxt_crypto_keys) { nxt_crypto_keys = (__typeof__(nxt_crypto_keys))LIST_NEXT(cur_crypto_keys, chain); // branch-off for ipsec vs. dtls +#if IPSEC if (crypto_keys_args->type == UTUN_CRYPTO_TYPE_IPSEC) { if (crypto_keys_args->u.ipsec_v1.spi == cur_crypto_keys->state.u.ipsec.spi) { errno_t err; @@ -319,7 +346,9 @@ utun_ctl_unconfig_crypto_keys (__unused kern_ctl_ref kctlref, utun_free(cur_crypto_keys); return 0; } - } else { + } else +#endif + { // unsupported return EPROTONOSUPPORT; } @@ -330,6 +359,151 @@ utun_ctl_unconfig_crypto_keys (__unused kern_ctl_ref kctlref, return 0; } +errno_t +utun_ctl_config_crypto_framer (__unused kern_ctl_ref kctlref, + __unused u_int32_t unit, + __unused void *unitinfo, + __unused int opt, + void *data, + size_t len) +{ + struct utun_pcb *pcb = unitinfo; + + /* + * - verify the crypto material args passed from user-land. + * - check the size of the argument buffer. + * - check the direction (IN or OUT) + * - check the type (DTLS only) + * - crypto material direction and type must match the associated crypto context's. + * - we can have a list of crypto materials per context. + * - ensure that the crypto context is already valid (don't add crypto material to invalid context). + * - any error should be equivalent to noop. + */ + if (len < UTUN_CRYPTO_FRAMER_ARGS_HDR_SIZE) { + return EMSGSIZE; + } else { + int idx; + utun_crypto_framer_args_t *framer_args = (__typeof__(framer_args))data; + utun_crypto_ctx_t *crypto_ctx; + + if (framer_args->ver == 0 || framer_args->ver >= UTUN_CRYPTO_FRAMER_ARGS_VER_MAX) { + printf("%s: ver check failed %d\n", __FUNCTION__, (int)framer_args->ver); + return EINVAL; + } + if (framer_args->dir == 0 || framer_args->dir >= UTUN_CRYPTO_DIR_MAX) { + printf("%s: dir check failed %d\n", __FUNCTION__, (int)framer_args->dir); + return EINVAL; + } + if (framer_args->type == 0 || framer_args->type >= UTUN_CRYPTO_TYPE_MAX) { + printf("%s: type check failed %d\n", __FUNCTION__, (int)framer_args->type); + return EINVAL; + } + if (len < UTUN_CRYPTO_FRAMER_ARGS_TOTAL_SIZE(framer_args)) { + printf("%s: vlen check failed (%d,%d)\n", __FUNCTION__, + (int)len, (int)UTUN_CRYPTO_FRAMER_ARGS_TOTAL_SIZE(framer_args)); + return EINVAL; + } + idx = UTUN_CRYPTO_DIR_TO_IDX(framer_args->dir); + crypto_ctx = &pcb->utun_crypto_ctx[idx]; + if (!crypto_ctx->valid) { + return EBADF; + } + if (framer_args->type != crypto_ctx->type) { + // can't add keymat to context with different crypto type + return ENOENT; + } + if (framer_args->args_ulen != sizeof(framer_args->u)) { + printf("%s: compatibility mode\n", __FUNCTION__); + // TODO: + } + + // branch-off for ipsec vs. dtls + if (framer_args->type == UTUN_CRYPTO_TYPE_DTLS) { + errno_t err; + if ((err = utun_ctl_config_crypto_dtls_framer(crypto_ctx, framer_args))) { + return err; + } + } else { + // unsupported + return EPROTONOSUPPORT; + } + } + + return 0; +} + +errno_t +utun_ctl_unconfig_crypto_framer (__unused kern_ctl_ref kctlref, + __unused u_int32_t unit, + __unused void *unitinfo, + __unused int opt, + void *data, + size_t len) +{ + struct utun_pcb *pcb = unitinfo; + + /* + * - verify the crypto material args passed from user-land. + * - check the size of the argument buffer. + * - check the direction (IN or OUT) + * - check the type (DTLS only) + * - crypto material direction and type must match the associated crypto context's. + * - we can have a list of crypto materials per context. + * - ensure that the crypto context is already valid (don't add crypto material to invalid context). + * - any error should be equivalent to noop. + */ + if (len < UTUN_CRYPTO_FRAMER_ARGS_HDR_SIZE) { + return EMSGSIZE; + } else { + int idx; + utun_crypto_framer_args_t *framer_args = (__typeof__(framer_args))data; + utun_crypto_ctx_t *crypto_ctx; + + if (framer_args->ver == 0 || framer_args->ver >= UTUN_CRYPTO_FRAMER_ARGS_VER_MAX) { + printf("%s: ver check failed %d\n", __FUNCTION__, (int)framer_args->ver); + return EINVAL; + } + if (framer_args->dir == 0 || framer_args->dir >= UTUN_CRYPTO_DIR_MAX) { + printf("%s: dir check failed %d\n", __FUNCTION__, (int)framer_args->dir); + return EINVAL; + } + if (framer_args->type == 0 || framer_args->type >= UTUN_CRYPTO_TYPE_MAX) { + printf("%s: type check failed %d\n", __FUNCTION__, (int)framer_args->type); + return EINVAL; + } + if (len < UTUN_CRYPTO_FRAMER_ARGS_TOTAL_SIZE(framer_args)) { + printf("%s: vlen check failed (%d,%d)\n", __FUNCTION__, + (int)len, (int)UTUN_CRYPTO_FRAMER_ARGS_TOTAL_SIZE(framer_args)); + return EINVAL; + } + idx = UTUN_CRYPTO_DIR_TO_IDX(framer_args->dir); + crypto_ctx = &pcb->utun_crypto_ctx[idx]; + if (!crypto_ctx->valid) { + return EBADF; + } + if (framer_args->type != crypto_ctx->type) { + // can't add keymat to context with different crypto type + return ENOENT; + } + if (framer_args->args_ulen != sizeof(framer_args->u)) { + printf("%s: compatibility mode\n", __FUNCTION__); + } + + // branch-off for ipsec vs. dtls + if (framer_args->type == UTUN_CRYPTO_TYPE_DTLS) { + errno_t err; + if ((err = utun_ctl_unconfig_crypto_dtls_framer(crypto_ctx, framer_args))) { + return err; + } + } else { + // unsupported + return EPROTONOSUPPORT; + } + } + + return 0; +} + errno_t utun_ctl_generate_crypto_keys_idx (__unused kern_ctl_ref kctlref, __unused u_int32_t unit, @@ -344,7 +518,7 @@ utun_ctl_generate_crypto_keys_idx (__unused kern_ctl_ref kctlref, * - verify the crypto material index args passed from user-land. * - check the size of the argument buffer. * - check the direction (IN or OUT) - * - check the type (IPSec or DTLS) + * - check the type (IPSec only) * - crypto material direction and type must match the associated crypto context's. * - we can have a list of crypto materials per context. * - any error should be equivalent to noop. @@ -388,12 +562,15 @@ utun_ctl_generate_crypto_keys_idx (__unused kern_ctl_ref kctlref, // traverse crypto materials looking for the right one // branch-off for ipsec vs. dtls +#if IPSEC if (crypto_keys_idx_args->type == UTUN_CRYPTO_TYPE_IPSEC) { errno_t err; if ((err = utun_ctl_generate_crypto_keys_idx_ipsec(crypto_keys_idx_args))) { return err; } - } else { + } else +#endif + { // unsupported return EPROTONOSUPPORT; } @@ -449,7 +626,11 @@ utun_ctl_stop_crypto_data_traffic (__unused kern_ctl_ref kctlref, return EINVAL; } - if (crypto_args->type != UTUN_CRYPTO_TYPE_IPSEC) { + if (crypto_args->type == UTUN_CRYPTO_TYPE_IPSEC) { + // nothing + } else if (crypto_args->type == UTUN_CRYPTO_TYPE_DTLS) { + utun_ctl_stop_datatraffic_crypto_dtls(pcb); + } else { // unsupported return EPROTONOSUPPORT; } @@ -505,7 +686,11 @@ utun_ctl_start_crypto_data_traffic (__unused kern_ctl_ref kctlref, return EINVAL; } - if (crypto_args->type != UTUN_CRYPTO_TYPE_IPSEC) { + if (crypto_args->type == UTUN_CRYPTO_TYPE_IPSEC) { + // nothing + } else if (crypto_args->type == UTUN_CRYPTO_TYPE_DTLS) { + utun_ctl_start_datatraffic_crypto_dtls(pcb); + } else { // unsupported return EPROTONOSUPPORT; } @@ -522,8 +707,13 @@ utun_pkt_crypto_output (struct utun_pcb *pcb, mbuf_t *m) printf("%s: context is invalid %d\n", __FUNCTION__, pcb->utun_crypto_ctx[idx].valid); return -1; } +#if IPSEC if (pcb->utun_crypto_ctx[idx].type == UTUN_CRYPTO_TYPE_IPSEC) { return(utun_pkt_ipsec_output(pcb, m)); + } else +#endif + if (pcb->utun_crypto_ctx[idx].type == UTUN_CRYPTO_TYPE_DTLS) { + return(utun_pkt_dtls_output(pcb, m)); } else { // unsupported printf("%s: type is invalid %d\n", __FUNCTION__, pcb->utun_crypto_ctx[idx].type); diff --git a/bsd/net/if_utun_crypto.h b/bsd/net/if_utun_crypto.h index 804ffa91e..14e92594d 100644 --- a/bsd/net/if_utun_crypto.h +++ b/bsd/net/if_utun_crypto.h @@ -37,9 +37,11 @@ typedef enum utun_crypto_ver { #define UTUN_CRYPTO_KEYS_IPSEC_VER_1 UTUN_CRYPTO_VER_1 #define UTUN_CRYPTO_IPSEC_VER_1 UTUN_CRYPTO_VER_1 +#define UTUN_CRYPTO_DTLS_VER_1 UTUN_CRYPTO_VER_1 #define UTUN_CRYPTO_ARGS_VER_MAX UTUN_CRYPTO_VER_MAX #define UTUN_CRYPTO_KEYS_ARGS_VER_MAX UTUN_CRYPTO_VER_MAX +#define UTUN_CRYPTO_FRAMER_ARGS_VER_MAX UTUN_CRYPTO_VER_MAX typedef enum utun_crypto_dir { UTUN_CRYPTO_DIR_IN = 1, @@ -170,7 +172,7 @@ typedef struct utun_crypto_keys_ipsec_args_v1 { // key_auth and key_enc will actually be stored in utun_crypto_KEYS_args_t.varargs_buf } __attribute__((packed)) utun_crypto_keys_ipsec_args_v1_t; -typedef struct utun_crypto_ctx_dtls_mat_args_v1 { +typedef struct utun_crypto_keys_dtls_args_v1 { // stub for DTLS keying material arguments u_int32_t unused; // place holder } __attribute__((packed)) utun_crypto_keys_dtls_args_v1_t; @@ -199,7 +201,7 @@ typedef struct utun_crypto_ipsec_args_v1 { typedef struct utun_crypto_dtls_args_v1 { // stub for DTLS crypto context arguments - u_int32_t unused; // place holder + int kpi_handle; } __attribute__((packed)) utun_crypto_dtls_args_v1_t; // App's parent structure for starting/stopping crypto @@ -218,6 +220,65 @@ typedef struct utun_crypto_args { u_int8_t varargs_buf[0]; // must be at the end of this struct } __attribute__((aligned(4), packed)) utun_crypto_args_t; +typedef enum { + UTUN_CRYPTO_INNER_TYPE_IPv4 = 1, + UTUN_CRYPTO_INNER_TYPE_IPv6, + UTUN_CRYPTO_INNER_TYPE_MAX, +} utun_crypto_framer_inner_type_t; + +typedef struct utun_crypto_framer_ipsec_args_v1 { + // stub for IPSec framer arguments + u_int32_t unused; // place holder +} __attribute__((packed)) utun_crypto_framer_ipsec_args_v1_t; + +typedef struct utun_crypto_framer_dtls_in_args_v1 { + int in_pattern_len; + int in_pattern_mask_len; + int in_data_offset; + // in_pattern, in_pattern_mask will actually be stored in utun_crypto_framer_args_t.varargs_buf +} __attribute__((packed)) utun_crypto_framer_dtls_in_args_v1_t; + +typedef struct utun_crypto_framer_dtls_out_args_v1 { + int out_pattern_len; + u_int32_t len_field_mask; // 0 means unconfigured + int len_field_offset; + int len_field_extra; + u_int32_t sequence_field; + u_int32_t sequence_field_mask; // 0 means unconfigured + int sequence_field_offset; + // out_pattern will actually be stored in utun_crypto_framer_args_t.varargs_buf +} __attribute__((packed)) utun_crypto_framer_dtls_out_args_v1_t; + +typedef struct utun_crypto_framer_dtls_args_v1 { + // the following depend on utun_crypto_framer_args_t.dir + union { + // don't change the order, number, or size of elements above this line (in this struct). otherwise UTUN_CRYPTO_KEYS_ARGS_HDR_SIZE breaks backwards compatibility + utun_crypto_framer_dtls_in_args_v1_t in; + utun_crypto_framer_dtls_out_args_v1_t out; + // future (additional) versions of the arguments may be placed here + } u; +} __attribute__((packed)) utun_crypto_framer_dtls_args_v1_t; + +// App's parent structure for sending/storing framer arguments +typedef struct utun_crypto_framer_args { + utun_crypto_ver_t ver; + utun_crypto_type_t type; + utun_crypto_dir_t dir; + utun_crypto_framer_inner_type_t inner_type; + u_int32_t args_ulen; + u_int32_t varargs_buflen; + union { + // don't change the order, number, or size of elements above this line (in this struct). otherwise UTUN_CRYPTO_KEYS_ARGS_HDR_SIZE breaks backwards compatibility + utun_crypto_framer_ipsec_args_v1_t ipsec_v1; + utun_crypto_framer_dtls_args_v1_t dtls_v1; + // future (additional) versions of the arguments may be placed here + } u; + u_int8_t varargs_buf[0]; +} __attribute__((aligned(4), packed)) utun_crypto_framer_args_t; + +#define utun_crypto_framer_args_dtls_in(framer) framer->u.dtls_v1.u.in +#define utun_crypto_framer_args_dtls_out(framer) framer->u.dtls_v1.u.out + #ifdef KERNEL_PRIVATE #include @@ -226,6 +287,7 @@ typedef struct utun_crypto_args { #include #include #include +#include struct utun_pcb; @@ -263,12 +325,83 @@ typedef struct utun_crypto_keys { LIST_ENTRY(utun_crypto_keys) chain; } __attribute__((aligned(4), packed)) utun_crypto_keys_t; +// structures used for storing kernel's framer runtime state +typedef struct utun_crypto_framer_ipsec_state { + // stub for kernel's IPSec framer state + u_int32_t unused; // place holder +} __attribute__((packed)) utun_crypto_framer_ipsec_state_t; + +typedef struct utun_crypto_framer_dtls_in_state { + u_int8_t *in_pattern; + int in_pattern_len; + u_int8_t *in_pattern_mask; + u_int8_t *in_pattern_masked; + int in_data_offset; + struct bpf_program in_pattern_filter; +} __attribute__((packed)) utun_crypto_framer_dtls_in_state_t; + +typedef struct utun_crypto_framer_dtls_out_state { + u_int8_t *out_pattern; + int out_pattern_len; + u_int32_t len_field_mask; // 0 means unconfigured + int len_field_offset; + int len_field_extra; + u_int32_t sequence_field; + u_int32_t sequence_field_initval; + u_int32_t sequence_field_mask; // 0 means unconfigured + int sequence_field_offset; +} __attribute__((packed)) utun_crypto_framer_dtls_out_state_t; + +typedef struct utun_crypto_framer_dtls_state { + union { + // don't change the order, number, or size of elements above this line (in this struct). otherwise UTUN_CRYPTO_KEYS_ARGS_HDR_SIZE breaks backwards compatibility + utun_crypto_framer_dtls_in_state_t in; + utun_crypto_framer_dtls_out_state_t out; + // future (additional) versions of the arguments may be placed here + } u; +} __attribute__((packed)) utun_crypto_framer_dtls_state_t; + +// kernel's parent structure for framer state +typedef struct utun_crypto_framer_state { + union { + utun_crypto_framer_ipsec_state_t ipsec; + utun_crypto_framer_dtls_state_t dtls; + } u; +} __attribute__((aligned(4), packed)) utun_crypto_framer_state_t; + +// kernel's parent structure for the framer +typedef struct utun_crypto_framer { + int valid; // is valid? + utun_crypto_type_t type; + utun_crypto_dir_t dir; + utun_crypto_framer_inner_type_t inner_type; + protocol_family_t inner_protocol_family; + utun_crypto_framer_state_t state; // runtime state + LIST_ENTRY(utun_crypto_framer) framer_chain; +} __attribute__((aligned(4), packed)) utun_crypto_framer_t; + +#define UTUN_CRYPTO_INNER_TYPE_TO_IDX(type) (type - 1) +#define UTUN_CRYPTO_IDX_TO_INNER_TYPE(idx) (idx + 1) +#define UTUN_CRYPTO_INNER_TYPE_IDX_MAX UTUN_CRYPTO_INNER_TYPE_TO_IDX(UTUN_CRYPTO_INNER_TYPE_MAX) + +#define UTUN_CRYPTO_DIR_TO_IDX(dir) (dir - 1) +#define UTUN_CRYPTO_IDX_TO_DIR(idx) (idx + 1) +#define UTUN_CRYPTO_DIR_IDX_MAX UTUN_CRYPTO_DIR_TO_IDX(UTUN_CRYPTO_DIR_MAX) + +#define utun_crypto_framer_state_dtls_in(framer) framer->state.u.dtls.u.in +#define utun_crypto_framer_state_dtls_out(framer) framer->state.u.dtls.u.out + // kernel's parent structure for all crypto stuff typedef struct utun_crypto_ctx { int valid; utun_crypto_type_t type; u_int16_t unused; LIST_HEAD(chain, utun_crypto_keys) keys_listhead; + LIST_HEAD(framer_chain, utun_crypto_framer) framer_listheads[UTUN_CRYPTO_INNER_TYPE_IDX_MAX]; + int num_framers; + int kpi_handle; + caddr_t kpi_ref; + int kpi_refcnt; } __attribute__((aligned(4), packed)) utun_crypto_ctx_t; #define UTUN_CRYPTO_KEYS_IDX_ARGS_HDR_SIZE ((size_t)(&((utun_crypto_keys_idx_args_t *)0)->u)) @@ -279,12 +412,39 @@ typedef struct utun_crypto_ctx { #define UTUN_CRYPTO_KEYS_ARGS_VARARGS_BUF(args) ((u_int8_t *)args + UTUN_CRYPTO_KEYS_ARGS_HDR_SIZE + args->args_ulen) #define UTUN_CRYPTO_KEYS_ARGS_TOTAL_SIZE(args) ((size_t)(UTUN_CRYPTO_KEYS_ARGS_HDR_SIZE + args->args_ulen + args->varargs_buflen)) +#define UTUN_CRYPTO_FRAMER_ARGS_HDR_SIZE ((size_t)(&((utun_crypto_framer_args_t *)0)->u)) +#define UTUN_CRYPTO_FRAMER_ARGS_VARARGS_BUF(args) ((u_int8_t *)args + UTUN_CRYPTO_FRAMER_ARGS_HDR_SIZE + args->args_ulen) +#define UTUN_CRYPTO_FRAMER_ARGS_TOTAL_SIZE(args) ((size_t)(UTUN_CRYPTO_FRAMER_ARGS_HDR_SIZE + args->args_ulen + args->varargs_buflen)) + #define UTUN_CRYPTO_ARGS_HDR_SIZE ((size_t)(&((utun_crypto_args_t *)0)->u)) #define UTUN_CRYPTO_ARGS_VARARGS_BUF(args) ((u_int8_t *)args + UTUN_CRYPTO_ARGS_HDR_SIZE + args->args_ulen) #define UTUN_CRYPTO_ARGS_TOTAL_SIZE(args) ((size_t)(UTUN_CRYPTO_ARGS_HDR_SIZE + args->args_ulen + args->varargs_buflen)) -#define UTUN_CRYPTO_DIR_TO_IDX(dir) (dir - 1) -#define UTUN_CRYPTO_IDX_TO_DIR(idx) (idx + 1) +typedef caddr_t (*utun_crypto_kpi_connect_func)(int kpi_handle, struct utun_pcb *utun_ref); + +typedef errno_t (*utun_crypto_kpi_send_func)(caddr_t ref, mbuf_t *pkt); + +typedef struct utun_crypto_kpi_reg { + /* Dispatch functions */ + utun_crypto_type_t crypto_kpi_type; + u_int32_t crypto_kpi_flags; + utun_crypto_kpi_connect_func crypto_kpi_connect; + utun_crypto_kpi_send_func crypto_kpi_send; +} utun_crypto_kpi_reg_t; + +typedef struct utun_crypto_kpi_reg_list { + utun_crypto_kpi_reg_t reg; + struct utun_crypto_kpi_reg_list *next; +} utun_crypto_kpi_reg_list_t; + +void +utun_ctl_init_crypto(void); + +/* + * Summary: registers the crypto KPI's Kext routines with UTUN... so that UTUN can make calls into it (e.g. DTLS) + */ +errno_t +utun_crypto_kpi_register(utun_crypto_kpi_reg_t *reg); void utun_cleanup_crypto(struct utun_pcb *pcb); @@ -321,6 +481,22 @@ utun_ctl_unconfig_crypto_keys(__unused kern_ctl_ref kctlref, void *data, size_t len); +errno_t +utun_ctl_config_crypto_framer(__unused kern_ctl_ref kctlref, + __unused u_int32_t unit, + __unused void *unitinfo, + __unused int opt, + void *data, + size_t len); + +errno_t +utun_ctl_unconfig_crypto_framer(__unused kern_ctl_ref kctlref, + __unused u_int32_t unit, + __unused void *unitinfo, + __unused int opt, + void *data, + size_t len); + errno_t utun_ctl_generate_crypto_keys_idx(__unused kern_ctl_ref kctlref, __unused u_int32_t unit, diff --git a/bsd/net/if_utun_crypto_dtls.c b/bsd/net/if_utun_crypto_dtls.c new file mode 100644 index 000000000..966447af2 --- /dev/null +++ b/bsd/net/if_utun_crypto_dtls.c @@ -0,0 +1,1045 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern errno_t utun_pkt_input (struct utun_pcb *pcb, mbuf_t m); + +static UInt32 dtls_kpi_callbacks_inited = FALSE; +static unsigned int dtls_kpi_flags = 0; +static utun_crypto_kpi_connect_func dtls_kpi_connect = (__typeof__(dtls_kpi_connect))NULL; +static utun_crypto_kpi_send_func dtls_kpi_send = (__typeof__(dtls_kpi_send))NULL; + +// convert this mutex to shared lock +static UInt32 dtls_ctl_mutex_inited = FALSE; +static lck_grp_t *dtls_ctl_mutex_grp = NULL; +static lck_grp_attr_t *dtls_ctl_mutex_grp_attr = NULL; +static lck_attr_t *dtls_ctl_mutex_attr = NULL; +static lck_mtx_t dtls_ctl_mutex; + +#define utun_ctl_get_first_framer(ctx, inner_type) (utun_crypto_framer_t *)LIST_FIRST(&ctx->framer_listheads[UTUN_CRYPTO_INNER_TYPE_TO_IDX(inner_type)]) +#define utun_get_framer_listhead(ctx, inner_type) &ctx->framer_listheads[UTUN_CRYPTO_INNER_TYPE_TO_IDX(inner_type)] + +static void +utun_ctl_clr_dtls_framer (utun_crypto_framer_t *rem_framer) +{ + if (!rem_framer) return; + + // TOFIX: switch to BPF + LIST_REMOVE(rem_framer, framer_chain); // unchain the framer + if (rem_framer->dir == UTUN_CRYPTO_DIR_IN) { + if (utun_crypto_framer_state_dtls_in(rem_framer).in_pattern) { + utun_free(utun_crypto_framer_state_dtls_in(rem_framer).in_pattern); + } + if (utun_crypto_framer_state_dtls_in(rem_framer).in_pattern_mask) { + utun_free(utun_crypto_framer_state_dtls_in(rem_framer).in_pattern_mask); + } + if (utun_crypto_framer_state_dtls_in(rem_framer).in_pattern_masked) { + utun_free(utun_crypto_framer_state_dtls_in(rem_framer).in_pattern_masked); + } + } else { + if (utun_crypto_framer_state_dtls_out(rem_framer).out_pattern) { + utun_free(utun_crypto_framer_state_dtls_out(rem_framer).out_pattern); + } + } + utun_free(rem_framer); + + return; +} + +static void +utun_ctl_clr_dtls_framers (utun_crypto_framer_t *first_framer) +{ + utun_crypto_framer_t *cur_framer, *nxt_framer; + + // check framer->state.u.dtls.u.in.listhead for duplicates; + for (cur_framer = first_framer; + cur_framer != NULL; + cur_framer = nxt_framer) { + nxt_framer = (__typeof__(nxt_framer))LIST_NEXT(cur_framer, framer_chain); + utun_ctl_clr_dtls_framer(cur_framer); + } + + return; +} + +static void +utun_ctl_clr_dtls_all_framers (utun_crypto_ctx_t *crypto_ctx) +{ + utun_ctl_clr_dtls_framers(utun_ctl_get_first_framer(crypto_ctx, UTUN_CRYPTO_INNER_TYPE_IPv4)); + utun_ctl_clr_dtls_framers(utun_ctl_get_first_framer(crypto_ctx, UTUN_CRYPTO_INNER_TYPE_IPv6)); + crypto_ctx->num_framers = 0; +} + +static void +utun_ctl_restart_dtls_framers (utun_crypto_framer_t *first_framer) +{ + utun_crypto_framer_t *cur_framer; + + // check framer->state.u.dtls.u.in.listhead for duplicates; + for (cur_framer = first_framer; + cur_framer != NULL; + cur_framer = (__typeof__(cur_framer))LIST_NEXT(cur_framer, framer_chain)) { + utun_crypto_framer_state_dtls_out(cur_framer).sequence_field = utun_crypto_framer_state_dtls_out(cur_framer).sequence_field_initval; + } + + return; +} + +static void +utun_ctl_restart_dtls_all_framers (utun_crypto_ctx_t *crypto_ctx) +{ + utun_ctl_restart_dtls_framers(utun_ctl_get_first_framer(crypto_ctx, UTUN_CRYPTO_INNER_TYPE_IPv4)); + utun_ctl_restart_dtls_framers(utun_ctl_get_first_framer(crypto_ctx, UTUN_CRYPTO_INNER_TYPE_IPv6)); +} + +static int +is_pattern_all_zeroes (u_int8_t *pattern, + int pattern_len) +{ + int i; + + if (!pattern || !pattern_len) return FALSE; // false if args are NULL + + for (i = 0; i < pattern_len; i++) { + if (pattern[i] != 0) return FALSE; + } + return TRUE; +} + +static int +is_pattern_masked_all_zeroes (u_int8_t *pattern, + u_int8_t *pattern_mask, + int pattern_len) +{ + int i; + + if (!pattern || !pattern_mask || !pattern_len) return FALSE; // false if args are NULL + + for (i = 0; i < pattern_len; i++) { + if ((pattern[i] & pattern_mask[i])) return FALSE; + } + return TRUE; +} + +static void +utun_ctl_calc_dtls_framer_pattern_and_mask (u_int8_t *pattern_masked, u_int8_t *pattern, u_int8_t *mask, int len) +{ + int i; + for (i = 0; i < len; i++) { + pattern_masked[i] = (pattern[i] & mask[i]); + } +} + +static Boolean +utun_ctl_did_dtls_framer_pattern_match (u_int8_t *input, u_int8_t *pattern_masked, int len) +{ + int i; + for (i = 0; i < len; i++) { + if ((input[i] & pattern_masked[i]) != pattern_masked[i]) return FALSE; + } + return TRUE; +} + +static Boolean +utun_pkt_dtls_input_frame_is_data(utun_crypto_ctx_t *crypto_ctx, + mbuf_t *pkt, + protocol_family_t family, + int *striplen) +{ + u_int8_t *p; + utun_crypto_framer_t *cur_framer; + + p = mtod(*pkt, __typeof__(p)); + for (cur_framer = utun_ctl_get_first_framer(crypto_ctx, utun_crypto_framer_protocol_family_to_inner_type(family)); + cur_framer != NULL; + cur_framer = (__typeof__(cur_framer))LIST_NEXT(cur_framer, framer_chain)) { + if (m_pktlen(*pkt) < utun_crypto_framer_state_dtls_in(cur_framer).in_pattern_len) { + continue; + } + if ((*pkt)->m_len < utun_crypto_framer_state_dtls_in(cur_framer).in_pattern_len) { + *pkt = m_pullup(*pkt, utun_crypto_framer_state_dtls_in(cur_framer).in_pattern_len); + if (!*pkt || + (*pkt)->m_len < utun_crypto_framer_state_dtls_in(cur_framer).in_pattern_len) { + return FALSE; + } + p = mtod(*pkt, __typeof__(p)); + } + // TOFIX: switch to BPF + if (utun_ctl_did_dtls_framer_pattern_match(p, + utun_crypto_framer_state_dtls_in(cur_framer).in_pattern_masked, + utun_crypto_framer_state_dtls_in(cur_framer).in_pattern_len)) { + *striplen = utun_crypto_framer_state_dtls_in(cur_framer).in_data_offset; + return TRUE; + } + } + return FALSE; +} + +#define GETLONG(l, cp) { \ + (l) = *(cp)++ << 8; \ + (l) |= *(cp)++; (l) <<= 8; \ + (l) |= *(cp)++; (l) <<= 8; \ + (l) |= *(cp)++; \ + } +#define PUTLONG(l, cp) { \ + *(cp)++ = (u_char) ((l) >> 24); \ + *(cp)++ = (u_char) ((l) >> 16); \ + *(cp)++ = (u_char) ((l) >> 8); \ + *(cp)++ = (u_char) (l); \ + } + +static int +utun_pkt_dtls_output_frame_encapsulate (utun_crypto_ctx_t *crypto_ctx, + mbuf_t *pkt, + protocol_family_t proto) +{ + u_int8_t *p; + utun_crypto_framer_t *cur_framer; + u_int32_t pkt_len; + + // TOFIX: switch to BPF + + if (!crypto_ctx->num_framers) { + return 0; + } + if (proto != AF_INET && proto != AF_INET6) { + printf("%s: unsupported proto %d\n", __FUNCTION__, proto); + return EINVAL; + } + + for (cur_framer = utun_ctl_get_first_framer(crypto_ctx, utun_crypto_framer_protocol_family_to_inner_type(proto)); + cur_framer != NULL && !utun_crypto_framer_state_dtls_out(cur_framer).out_pattern; + cur_framer = (__typeof__(cur_framer))LIST_NEXT(cur_framer, framer_chain)); + if (!cur_framer || + !utun_crypto_framer_state_dtls_out(cur_framer).out_pattern_len) { + return 0; + } + + pkt_len = m_pktlen(*pkt); + + // prepend/encapsulate the output pattern + if (mbuf_prepend(pkt, utun_crypto_framer_state_dtls_out(cur_framer).out_pattern_len, MBUF_DONTWAIT) != 0) { + printf("%s - ifnet_output prepend failed\n", __FUNCTION__); + return ENOBUFS; + } + + p = mtod(*pkt, __typeof__(p)); + memcpy(p, + utun_crypto_framer_state_dtls_out(cur_framer).out_pattern, + utun_crypto_framer_state_dtls_out(cur_framer).out_pattern_len); + // fill a "length" field... if configured + if (utun_crypto_framer_state_dtls_out(cur_framer).len_field_mask) { + u_int32_t tmp; + u_int8_t *q = p + utun_crypto_framer_state_dtls_out(cur_framer).len_field_offset; + GETLONG(tmp, q); + tmp &= ((pkt_len + utun_crypto_framer_state_dtls_out(cur_framer).len_field_extra) & utun_crypto_framer_state_dtls_out(cur_framer).len_field_mask); + q = p + utun_crypto_framer_state_dtls_out(cur_framer).len_field_offset; + PUTLONG(tmp, q); + } + // fill a "sequence" field... if configured + if (utun_crypto_framer_state_dtls_out(cur_framer).sequence_field_mask) { + u_int32_t tmp = (utun_crypto_framer_state_dtls_out(cur_framer).sequence_field & utun_crypto_framer_state_dtls_out(cur_framer).sequence_field_mask); + u_int8_t *q = p + utun_crypto_framer_state_dtls_out(cur_framer).sequence_field_offset; + GETLONG(tmp, q); + tmp &= (utun_crypto_framer_state_dtls_out(cur_framer).sequence_field & utun_crypto_framer_state_dtls_out(cur_framer).sequence_field_mask); + q = p + utun_crypto_framer_state_dtls_out(cur_framer).sequence_field_offset; + PUTLONG(tmp, q); + utun_crypto_framer_state_dtls_out(cur_framer).sequence_field++; + } + return 0; +} + +void +utun_ctl_init_crypto_dtls (void) +{ + if (OSCompareAndSwap(FALSE, TRUE, &dtls_ctl_mutex_inited)) { + if (!dtls_ctl_mutex_grp_attr) + dtls_ctl_mutex_grp_attr = lck_grp_attr_alloc_init(); + if (!dtls_ctl_mutex_grp) + dtls_ctl_mutex_grp = lck_grp_alloc_init("utun-crypto", dtls_ctl_mutex_grp_attr); + if (!dtls_ctl_mutex_attr) + dtls_ctl_mutex_attr = lck_attr_alloc_init(); + + lck_mtx_init(&dtls_ctl_mutex, dtls_ctl_mutex_grp, dtls_ctl_mutex_attr); + } +} + +/* + * Summary: registers the DTLS Kext routines with UTUN... so that UTUN can make calls into DTLS + */ +errno_t +utun_ctl_register_dtls (utun_crypto_kpi_reg_t *reg) +{ + //printf("%s: entering\n", __FUNCTION__); + if (!reg) return EINVAL; + + //printf("%s: type %d\n", __FUNCTION__, reg->crypto_kpi_type); + if (reg->crypto_kpi_type != UTUN_CRYPTO_TYPE_DTLS) { + return EINVAL; + } + + if (!reg->crypto_kpi_connect) { + return EINVAL; + } + + if (!reg->crypto_kpi_send) { + return EINVAL; + } + + // printf("%s: pre-value of dtls_kpi_callbacks_inited %lu\n", __FUNCTION__, + // dtls_kpi_callbacks_inited); + if (OSCompareAndSwap(FALSE, TRUE, &dtls_kpi_callbacks_inited)) { + dtls_kpi_flags = reg->crypto_kpi_flags; + dtls_kpi_connect = reg->crypto_kpi_connect; + dtls_kpi_send = reg->crypto_kpi_send; + } + //printf("%s: post-value of dtls_kpi_callbacks_inited %lu\n", __FUNCTION__, + // dtls_kpi_callbacks_inited); + return 0; +} + +/* + * Summary: enables dtls crypto info for the specified utun. dtls ref is passed into args. + */ +void +utun_ctl_enable_crypto_dtls(struct utun_pcb *pcb, utun_crypto_args_t *args) +{ + utun_crypto_ctx_t *crypto_ctx; + + lck_mtx_lock(&dtls_ctl_mutex); + + //printf("%s: entering, flags %x, kpi-handle %x, kpi-ref %p, kpi-refcnt %d\n", __FUNCTION__, pcb->utun_flags, crypto_ctx->kpi_handle, crypto_ctx->kpi_ref, crypto_ctx->kpi_refcnt); + + crypto_ctx = &pcb->utun_crypto_ctx[UTUN_CRYPTO_DIR_TO_IDX(UTUN_CRYPTO_DIR_IN)]; + if (crypto_ctx->valid) { + printf("%s: dtls already enabled (prev %u, now %u)\n", __FUNCTION__, + crypto_ctx->kpi_handle, args->u.dtls_v1.kpi_handle); + lck_mtx_unlock(&dtls_ctl_mutex); + return; + } + + crypto_ctx = &pcb->utun_crypto_ctx[UTUN_CRYPTO_DIR_TO_IDX(UTUN_CRYPTO_DIR_OUT)]; + if (!crypto_ctx->valid) { + crypto_ctx->kpi_handle = args->u.dtls_v1.kpi_handle; + } else { + printf("%s: dtls already enabled for egress (prev %u, now %u)\n", __FUNCTION__, + crypto_ctx->kpi_handle, args->u.dtls_v1.kpi_handle); + lck_mtx_unlock(&dtls_ctl_mutex); + return; + } + // crypto_ctx->valid will be set in utun_ctl_enable_crypto + lck_mtx_unlock(&dtls_ctl_mutex); + return; +} + +/* + * Summary: disables dtls crypto info for the specified utun. + */ +void +utun_ctl_disable_crypto_dtls(struct utun_pcb *pcb) +{ + utun_crypto_ctx_t *crypto_ctx; + + lck_mtx_lock(&dtls_ctl_mutex); + + //printf("%s: entering, flags %x, kpi-handle %d, kpi-ref %p, kpi-refcnt %d\n", __FUNCTION__, pcb->utun_flags, crypto_ctx->kpi_handle, crypto_ctx->kpi_ref, crypto_ctx->kpi_refcnt); + + crypto_ctx = &pcb->utun_crypto_ctx[UTUN_CRYPTO_DIR_TO_IDX(UTUN_CRYPTO_DIR_IN)]; + if (crypto_ctx->valid && + crypto_ctx->type == UTUN_CRYPTO_TYPE_DTLS) { + utun_ctl_clr_dtls_all_framers(crypto_ctx); + } + + crypto_ctx = &pcb->utun_crypto_ctx[UTUN_CRYPTO_DIR_TO_IDX(UTUN_CRYPTO_DIR_OUT)]; + if (!crypto_ctx->valid || + crypto_ctx->type != UTUN_CRYPTO_TYPE_DTLS) { + lck_mtx_unlock(&dtls_ctl_mutex); + return; + } + if (crypto_ctx->kpi_ref) { + if (dtls_kpi_connect) { + (void)dtls_kpi_connect(crypto_ctx->kpi_handle, NULL); + if (--crypto_ctx->kpi_refcnt == 0) { + crypto_ctx->kpi_ref = (__typeof__(crypto_ctx->kpi_ref))NULL; + crypto_ctx->kpi_handle = UTUN_CRYPTO_DTLS_HANDLE_INVALID; + } else { + // printf("%s: ### dtls_kpi_refcnt %d not yet zero\n", + // __FUNCTION__, crypto_ctx->kpi_refcnt); + } + } else { + printf("%s: ### dtls_ctl_connect unavailable\n", __FUNCTION__); + lck_mtx_unlock(&dtls_ctl_mutex); + return; + } + } else { + if (crypto_ctx->kpi_handle < 0) { + printf("%s: dtls already disabled\n", __FUNCTION__); + lck_mtx_unlock(&dtls_ctl_mutex); + return; + } + crypto_ctx->kpi_handle = UTUN_CRYPTO_DTLS_HANDLE_INVALID; + } + utun_ctl_clr_dtls_all_framers(crypto_ctx); + lck_mtx_unlock(&dtls_ctl_mutex); + return; +} + +static utun_crypto_framer_t * +utun_ctl_get_dtls_in_framer (utun_crypto_framer_t *first_framer, + u_int8_t *in_pattern, + int in_pattern_len, + u_int8_t *in_pattern_mask, + int in_pattern_mask_len) +{ + utun_crypto_framer_t *cur_framer; + + // check framer->u.listhead for duplicates; + for (cur_framer = first_framer; + cur_framer != NULL; + cur_framer = (__typeof__(cur_framer))LIST_NEXT(cur_framer, framer_chain)) { + // TOFIX: use in_pattern_masked + if (utun_crypto_framer_state_dtls_in(cur_framer).in_pattern_len == in_pattern_len && + memcmp(utun_crypto_framer_state_dtls_in(cur_framer).in_pattern, + in_pattern, + in_pattern_len) == 0 && + utun_crypto_framer_state_dtls_in(cur_framer).in_pattern_len == in_pattern_mask_len && + memcmp(utun_crypto_framer_state_dtls_in(cur_framer).in_pattern_mask, + in_pattern_mask, + in_pattern_mask_len) == 0) { + // found + return cur_framer; + } + } + + return NULL; +} + +errno_t +utun_ctl_config_crypto_dtls_framer (utun_crypto_ctx_t *crypto_ctx, + utun_crypto_framer_args_t *args) +{ + utun_crypto_framer_t *framer, *new_framer = NULL, *dup_framer; + + if (args->ver != UTUN_CRYPTO_DTLS_VER_1) { + return EINVAL; + } + if (!args->type || args->type >= UTUN_CRYPTO_INNER_TYPE_MAX) { + return EINVAL; + } + + lck_mtx_lock(&dtls_ctl_mutex); + + if (args->dir == UTUN_CRYPTO_DIR_IN) { + // Input framer (for tunnel hdr detection and decapsulation). there can be several pattern that identify data (vs. control) packets. + + // First, the args need to be verified for errors/inconsistencies + // pattern and mask have to be configured + if (!utun_crypto_framer_args_dtls_in(args).in_pattern_len || + !utun_crypto_framer_args_dtls_in(args).in_pattern_mask_len) { + lck_mtx_unlock(&dtls_ctl_mutex); + printf("%s: invalid dtls in-pattern %d mask %d\n", __FUNCTION__, + utun_crypto_framer_args_dtls_in(args).in_pattern_len, + utun_crypto_framer_args_dtls_in(args).in_pattern_mask_len); + return EINVAL; + } + // pattern and mask lengths have to match + if (utun_crypto_framer_args_dtls_in(args).in_pattern_len != utun_crypto_framer_args_dtls_in(args).in_pattern_mask_len) { + lck_mtx_unlock(&dtls_ctl_mutex); + printf("%s: inconsistent dtls in-pattern %d mask %d\n",__FUNCTION__, + utun_crypto_framer_args_dtls_in(args).in_pattern_len, + utun_crypto_framer_args_dtls_in(args).in_pattern_mask_len); + return EINVAL; + } + // check for len inconsistencies + if ((u_int32_t)utun_crypto_framer_args_dtls_in(args).in_pattern_len + (u_int32_t)utun_crypto_framer_args_dtls_in(args).in_pattern_mask_len != args->varargs_buflen) { + lck_mtx_unlock(&dtls_ctl_mutex); + printf("%s: inconsistent dtls in-pattern %d mask %d, total %d\n",__FUNCTION__, + utun_crypto_framer_args_dtls_in(args).in_pattern_len, + utun_crypto_framer_args_dtls_in(args).in_pattern_mask_len, + args->varargs_buflen); + return EINVAL; + } + // utun_crypto_framer_args_dtls_in(args).in_pattern should not be all zeros + if (is_pattern_all_zeroes(&args->varargs_buf[0], + utun_crypto_framer_args_dtls_in(args).in_pattern_len)) { + lck_mtx_unlock(&dtls_ctl_mutex); + printf("%s: in-pattern is all zeros, len %d\n",__FUNCTION__, + utun_crypto_framer_args_dtls_in(args).in_pattern_len); + return EINVAL; + } + // utun_crypto_framer_args_dtls_in(args).in_pattern_mask should not be all zeros + if (is_pattern_all_zeroes(&args->varargs_buf[utun_crypto_framer_args_dtls_in(args).in_pattern_len], + utun_crypto_framer_args_dtls_in(args).in_pattern_mask_len)) { + lck_mtx_unlock(&dtls_ctl_mutex); + printf("%s: in-pattern-mask is all zeros, len %d\n",__FUNCTION__, + utun_crypto_framer_args_dtls_in(args).in_pattern_mask_len); + return EINVAL; + } + // utun_crypto_framer_args_dtls_in(args).in_pattern & utun_crypto_framer_args_dtls_in(args).in_pattern_mask should not be zeros + if (is_pattern_masked_all_zeroes(&args->varargs_buf[0], + &args->varargs_buf[utun_crypto_framer_args_dtls_in(args).in_pattern_len], + utun_crypto_framer_args_dtls_in(args).in_pattern_len)) { + lck_mtx_unlock(&dtls_ctl_mutex); + printf("%s: in-pattern-masked is all zeros, len %d\n",__FUNCTION__, + utun_crypto_framer_args_dtls_in(args).in_pattern_mask_len); + return EINVAL; + } + + // Secondly, we need to be careful about existing framer configs + if (!(framer = utun_ctl_get_first_framer(crypto_ctx, args->inner_type))) { + // no framers configured + if (!(framer = utun_alloc(sizeof(*framer)))) { + lck_mtx_unlock(&dtls_ctl_mutex); + return ENOBUFS; + } + bzero(framer, sizeof(*framer)); + // fall through to fill-in the 1st framer + } else { + // at least one framer configured... check framer->u.listhead for duplicates; + if ((dup_framer = utun_ctl_get_dtls_in_framer(framer /* could be a list */, + &args->varargs_buf[0], + utun_crypto_framer_args_dtls_in(args).in_pattern_len, + &args->varargs_buf[utun_crypto_framer_args_dtls_in(args).in_pattern_len], + utun_crypto_framer_args_dtls_in(args).in_pattern_mask_len))) { + // duplicate + lck_mtx_unlock(&dtls_ctl_mutex); + printf("%s: ignoring duplicate framer for type %d\n",__FUNCTION__, + args->inner_type); + return 0; + } + + if (!(new_framer = utun_alloc(sizeof(*new_framer)))) { + lck_mtx_unlock(&dtls_ctl_mutex); + return ENOBUFS; + } + bzero(new_framer, sizeof(*new_framer)); + framer = new_framer; + // fall through to fill-in additional framer + } + LIST_INSERT_HEAD(utun_get_framer_listhead(crypto_ctx, args->inner_type), + new_framer, + framer_chain); + + framer->inner_type = args->inner_type; + framer->inner_protocol_family = utun_crypto_framer_inner_type_to_protocol_family(args->inner_type); + // allocate and fill the pattern + if (!(utun_crypto_framer_state_dtls_in(framer).in_pattern = utun_alloc(utun_crypto_framer_args_dtls_in(args).in_pattern_len))) { + utun_ctl_clr_dtls_framer(framer); + lck_mtx_unlock(&dtls_ctl_mutex); + return ENOBUFS; + } + memcpy(utun_crypto_framer_state_dtls_in(framer).in_pattern, + &args->varargs_buf[0], + utun_crypto_framer_args_dtls_in(args).in_pattern_len); + utun_crypto_framer_state_dtls_in(framer).in_pattern_len = utun_crypto_framer_args_dtls_in(args).in_pattern_len; + + // allocate and fill the pattern-mask + if (!(utun_crypto_framer_state_dtls_in(framer).in_pattern_mask = utun_alloc(utun_crypto_framer_args_dtls_in(args).in_pattern_mask_len))) { + utun_ctl_clr_dtls_framer(framer); + lck_mtx_unlock(&dtls_ctl_mutex); + return ENOBUFS; + } + memcpy(utun_crypto_framer_state_dtls_in(framer).in_pattern_mask, + &args->varargs_buf[utun_crypto_framer_args_dtls_in(args).in_pattern_len], + utun_crypto_framer_args_dtls_in(args).in_pattern_mask_len); + utun_crypto_framer_state_dtls_in(framer).in_data_offset = utun_crypto_framer_args_dtls_in(args).in_data_offset; + + if (!(utun_crypto_framer_state_dtls_in(framer).in_pattern_masked = utun_alloc(utun_crypto_framer_args_dtls_in(args).in_pattern_len))) { + utun_ctl_clr_dtls_framer(framer); + lck_mtx_unlock(&dtls_ctl_mutex); + return ENOBUFS; + } + utun_ctl_calc_dtls_framer_pattern_and_mask(utun_crypto_framer_state_dtls_in(framer).in_pattern_masked, + utun_crypto_framer_state_dtls_in(framer).in_pattern, + utun_crypto_framer_state_dtls_in(framer).in_pattern_mask, + utun_crypto_framer_state_dtls_in(framer).in_pattern_len); + // TOFIX: switch to BPF + crypto_ctx->num_framers++; + } else { + // Output Framer (for tunnel hdr encapsulation)... there can only be one for each type of traffic (see caller of this function) + + // pattern and mask have to be configured + if (!utun_crypto_framer_args_dtls_out(args).out_pattern_len) { + lck_mtx_unlock(&dtls_ctl_mutex); + printf("%s: invalid output framer, len %d\n",__FUNCTION__, + utun_crypto_framer_args_dtls_out(args).out_pattern_len); + return EINVAL; + } + // utun_crypto_framer_args_dtls_out(args).out_pattern should not be all zeros; + if (is_pattern_all_zeroes(&args->varargs_buf[0], + utun_crypto_framer_args_dtls_out(args).out_pattern_len)) { + lck_mtx_unlock(&dtls_ctl_mutex); + printf("%s: zeroed output framer, len %d\n",__FUNCTION__, + utun_crypto_framer_args_dtls_out(args).out_pattern_len); + return EINVAL; + } + + // can't have the offset/extra configured while the mask is cleared + if ((utun_crypto_framer_args_dtls_out(args).len_field_offset || utun_crypto_framer_args_dtls_out(args).len_field_extra) && !utun_crypto_framer_args_dtls_out(args).len_field_mask) { + lck_mtx_unlock(&dtls_ctl_mutex); + printf("%s: output framer has invalid length-field %d,%d,%x\n",__FUNCTION__, + (int)utun_crypto_framer_args_dtls_out(args).len_field_offset, + (int)utun_crypto_framer_args_dtls_out(args).len_field_extra, + utun_crypto_framer_args_dtls_out(args).len_field_mask); + return EINVAL; + } + // any length field should be within the bounds of the out-pattern + if (utun_crypto_framer_args_dtls_out(args).len_field_offset >= utun_crypto_framer_args_dtls_out(args).out_pattern_len) { + lck_mtx_unlock(&dtls_ctl_mutex); + return EINVAL; + } + + // can't have the offset configured while the mask is cleared + if ((utun_crypto_framer_args_dtls_out(args).sequence_field || utun_crypto_framer_args_dtls_out(args).sequence_field_offset) && !utun_crypto_framer_args_dtls_out(args).sequence_field_mask) { + lck_mtx_unlock(&dtls_ctl_mutex); + printf("%s: output framer has invalid sequence-field %d,%d,%x\n",__FUNCTION__, + (int)utun_crypto_framer_args_dtls_out(args).sequence_field, + (int)utun_crypto_framer_args_dtls_out(args).sequence_field_offset, + utun_crypto_framer_args_dtls_out(args).sequence_field_mask); + return EINVAL; + } + // any sequence field should be within the bounds of the out-pattern + if (utun_crypto_framer_args_dtls_out(args).sequence_field_offset >= utun_crypto_framer_args_dtls_out(args).out_pattern_len) { + lck_mtx_unlock(&dtls_ctl_mutex); + return EINVAL; + } + + // check for len inconsistencies + if ((u_int32_t)utun_crypto_framer_args_dtls_out(args).out_pattern_len != args->varargs_buflen) { + lck_mtx_unlock(&dtls_ctl_mutex); + return EINVAL; + } + + if (!(framer = utun_ctl_get_first_framer(crypto_ctx, args->inner_type))) { + if (!(framer = utun_alloc(sizeof(*framer)))) { + lck_mtx_unlock(&dtls_ctl_mutex); + return ENOBUFS; + } + bzero(framer, sizeof(*framer)); + LIST_INSERT_HEAD(utun_get_framer_listhead(crypto_ctx, args->inner_type), + new_framer, + framer_chain); + // fall through to fill-in 1st framer + } else { + // only one outbound framer may be configured.. is it a dup? + if (framer->inner_type == args->inner_type && + utun_crypto_framer_state_dtls_out(framer).out_pattern_len == utun_crypto_framer_args_dtls_out(args).out_pattern_len && + utun_crypto_framer_state_dtls_out(framer).out_pattern && + memcmp(utun_crypto_framer_state_dtls_out(framer).out_pattern, + &args->varargs_buf[0], + utun_crypto_framer_args_dtls_out(args).out_pattern_len) == 0) { + // found + lck_mtx_unlock(&dtls_ctl_mutex); + return 0; + } + + // overwrite the previous one + if (utun_crypto_framer_state_dtls_out(framer).out_pattern) { + utun_free(utun_crypto_framer_state_dtls_out(framer).out_pattern); + } + // fall through to fill-in additional framer + } + + framer->inner_type = args->inner_type; + framer->inner_protocol_family = utun_crypto_framer_inner_type_to_protocol_family(args->inner_type); + + // alloc and fill in the out-pattern + if (!(utun_crypto_framer_state_dtls_out(framer).out_pattern = utun_alloc(utun_crypto_framer_args_dtls_out(args).out_pattern_len))) { + utun_ctl_clr_dtls_framer(framer); + lck_mtx_unlock(&dtls_ctl_mutex); + return ENOBUFS; + } + memcpy(utun_crypto_framer_state_dtls_out(framer).out_pattern, + &args->varargs_buf[0], + utun_crypto_framer_args_dtls_out(args).out_pattern_len); + utun_crypto_framer_state_dtls_out(framer).out_pattern_len = utun_crypto_framer_args_dtls_out(args).out_pattern_len; + + utun_crypto_framer_state_dtls_out(framer).len_field_mask = utun_crypto_framer_args_dtls_out(args).len_field_mask; + utun_crypto_framer_state_dtls_out(framer).len_field_offset = utun_crypto_framer_args_dtls_out(args).len_field_offset; + utun_crypto_framer_state_dtls_out(framer).len_field_extra = utun_crypto_framer_args_dtls_out(args).len_field_extra; + utun_crypto_framer_state_dtls_out(framer).sequence_field_initval = utun_crypto_framer_args_dtls_out(args).sequence_field; + utun_crypto_framer_state_dtls_out(framer).sequence_field_mask = utun_crypto_framer_args_dtls_out(args).sequence_field_mask; + utun_crypto_framer_state_dtls_out(framer).sequence_field_offset = utun_crypto_framer_args_dtls_out(args).sequence_field_offset; + crypto_ctx->num_framers = 1; + } + framer->type = args->type; + framer->dir = args->dir; + framer->valid = 1; + + lck_mtx_unlock(&dtls_ctl_mutex); + return 0; +} + +int +utun_ctl_unconfig_crypto_dtls_framer (utun_crypto_ctx_t *crypto_ctx, + utun_crypto_framer_args_t *args) +{ + utun_crypto_framer_t *framer, *rem_framer; + + if (args->ver != UTUN_CRYPTO_DTLS_VER_1) { + return EINVAL; + } + if (!args->type || args->type >= UTUN_CRYPTO_INNER_TYPE_MAX) { + return EINVAL; + } + + lck_mtx_lock(&dtls_ctl_mutex); + + if (args->dir == UTUN_CRYPTO_DIR_IN) { + if (!utun_crypto_framer_args_dtls_in(args).in_pattern_len) { + // no pattern means... clear all + utun_ctl_clr_dtls_framers(utun_ctl_get_first_framer(crypto_ctx, args->inner_type)); + lck_mtx_unlock(&dtls_ctl_mutex); + return 0; + } + + // when both specified, pattern and mask lengths have to match + if (utun_crypto_framer_args_dtls_in(args).in_pattern_mask_len && + utun_crypto_framer_args_dtls_in(args).in_pattern_len != utun_crypto_framer_args_dtls_in(args).in_pattern_mask_len) { + lck_mtx_unlock(&dtls_ctl_mutex); + return EINVAL; + } + // check for len inconsistencies + if ((u_int32_t)utun_crypto_framer_args_dtls_in(args).in_pattern_len + (u_int32_t)utun_crypto_framer_args_dtls_in(args).in_pattern_mask_len != args->varargs_buflen) { + lck_mtx_unlock(&dtls_ctl_mutex); + return EINVAL; + } + // utun_crypto_framer_args_dtls_in(args).in_pattern should not be all zeros + if (is_pattern_all_zeroes(&args->varargs_buf[0], + utun_crypto_framer_args_dtls_in(args).in_pattern_len)) { + lck_mtx_unlock(&dtls_ctl_mutex); + return EINVAL; + } + // when specified, utun_crypto_framer_args_dtls_in(args).in_pattern_mask should not be all zeros + if (utun_crypto_framer_args_dtls_in(args).in_pattern_mask_len && + is_pattern_all_zeroes(&args->varargs_buf[utun_crypto_framer_args_dtls_in(args).in_pattern_len], + utun_crypto_framer_args_dtls_in(args).in_pattern_mask_len)) { + lck_mtx_unlock(&dtls_ctl_mutex); + return EINVAL; + } + // utun_crypto_framer_args_dtls_in(args).in_pattern & utun_crypto_framer_args_dtls_in(args).in_pattern_mask should not be zeros + if (is_pattern_masked_all_zeroes(&args->varargs_buf[0], + &args->varargs_buf[utun_crypto_framer_args_dtls_in(args).in_pattern_len], + utun_crypto_framer_args_dtls_in(args).in_pattern_len)) { + lck_mtx_unlock(&dtls_ctl_mutex); + return EINVAL; + } + + if ((u_int32_t)utun_crypto_framer_args_dtls_in(args).in_pattern_len + (u_int32_t)utun_crypto_framer_args_dtls_in(args).in_pattern_mask_len != args->varargs_buflen) { + lck_mtx_unlock(&dtls_ctl_mutex); + return EINVAL; + } + + if (!(framer = utun_ctl_get_first_framer(crypto_ctx, args->inner_type))) { + // no framers + printf("%s: no framers configured\n", __FUNCTION__); + lck_mtx_unlock(&dtls_ctl_mutex); + return 0; + } else { + if ((rem_framer = utun_ctl_get_dtls_in_framer(framer, + &args->varargs_buf[0], + utun_crypto_framer_args_dtls_in(args).in_pattern_len, + &args->varargs_buf[utun_crypto_framer_args_dtls_in(args).in_pattern_len], + utun_crypto_framer_args_dtls_in(args).in_pattern_mask_len))) { + utun_ctl_clr_dtls_framer(rem_framer); + if (crypto_ctx->num_framers) crypto_ctx->num_framers--; + } else { + printf("%s: no matching ingress framer\n", __FUNCTION__); + } + lck_mtx_unlock(&dtls_ctl_mutex); + return 0; + } + } else { + framer = utun_ctl_get_first_framer(crypto_ctx, args->inner_type); + // overwrite the previous one + if (framer) { + if (framer->inner_type != args->inner_type || + (utun_crypto_framer_args_dtls_out(args).out_pattern_len && + utun_crypto_framer_state_dtls_out(framer).out_pattern_len != utun_crypto_framer_args_dtls_out(args).out_pattern_len) || + (utun_crypto_framer_args_dtls_out(args).out_pattern_len && + memcmp(utun_crypto_framer_state_dtls_out(framer).out_pattern, + &args->varargs_buf[0], + utun_crypto_framer_args_dtls_out(args).out_pattern_len))) { + printf("%s: no matching egress framer\n", __FUNCTION__); + lck_mtx_unlock(&dtls_ctl_mutex); + return EBADF; + } + utun_ctl_clr_dtls_framer(framer); + if (crypto_ctx->num_framers) crypto_ctx->num_framers--; + } + } + + lck_mtx_unlock(&dtls_ctl_mutex); + return 0; +} + +/* + * Summary: enables handling of data traffic + */ +void +utun_ctl_start_datatraffic_crypto_dtls(struct utun_pcb *pcb) +{ + utun_crypto_ctx_t *crypto_ctx; + + lck_mtx_lock(&dtls_ctl_mutex); + + //printf("%s: entering, flags %x, kpi-handle %d, kpi-ref %p, kpi-refcnt %d\n", __FUNCTION__, pcb->utun_flags, crypto_ctx->kpi_handle, crypto_ctx->kpi_ref, crypto_ctx->kpi_refcnt); + + crypto_ctx = &pcb->utun_crypto_ctx[UTUN_CRYPTO_DIR_TO_IDX(UTUN_CRYPTO_DIR_OUT)]; + + if (crypto_ctx->kpi_handle < 0) { + printf("%s: dtls disabled\n", __FUNCTION__); + lck_mtx_unlock(&dtls_ctl_mutex); + return; + } + + if (!crypto_ctx->kpi_ref) { + if (dtls_kpi_connect) { + crypto_ctx->kpi_ref = dtls_kpi_connect(crypto_ctx->kpi_handle, pcb); + if (!crypto_ctx->kpi_ref) { + printf("%s: ### dtls_kpi_connect failed\n", __FUNCTION__); + lck_mtx_unlock(&dtls_ctl_mutex); + return; + } + crypto_ctx->kpi_refcnt++; + } else { + printf("%s: ### dtls_kpi_connect unavailable\n", __FUNCTION__); + lck_mtx_unlock(&dtls_ctl_mutex); + return; + } + } else { + printf("%s: dtls already stitched\n", __FUNCTION__); + lck_mtx_unlock(&dtls_ctl_mutex); + return; + } + utun_ctl_restart_dtls_all_framers(crypto_ctx); // for dynamic egress hdrs + + //printf("%s: leaving, flags %x, kpi-handle %d, kpi-ref %p, kpi-refcnt %d\n", __FUNCTION__, pcb->utun_flags, crypto_ctx->kpi_handle, crypto_ctx->kpi_ref, crypto_ctx->kpi_refcnt); + lck_mtx_unlock(&dtls_ctl_mutex); + return; +} + +/* + * Summary: disables handling of data traffic + */ +void +utun_ctl_stop_datatraffic_crypto_dtls(struct utun_pcb *pcb) +{ + utun_crypto_ctx_t *crypto_ctx; + + lck_mtx_lock(&dtls_ctl_mutex); + + //printf("%s: entering, flags %x, kpi-ref %p, kpi-refcnt %d\n", __FUNCTION__, pcb->utun_flags, crypto_ctx->kpi_ref, crypto_ctx->kpi_refcnt); + + crypto_ctx = &pcb->utun_crypto_ctx[UTUN_CRYPTO_DIR_TO_IDX(UTUN_CRYPTO_DIR_OUT)]; + + if (crypto_ctx->kpi_ref) { + if (dtls_kpi_connect) { + (void)dtls_kpi_connect(crypto_ctx->kpi_handle, NULL); + if (--crypto_ctx->kpi_refcnt == 0) { + crypto_ctx->kpi_ref = (__typeof__(crypto_ctx->kpi_ref))NULL; + crypto_ctx->kpi_handle = UTUN_CRYPTO_DTLS_HANDLE_INVALID; + } else { + // printf("%s: ### dtls_kpi_refcnt %d not yet zero\n", + // __FUNCTION__, crypto_ctx->kpi_refcnt); + } + } else { + printf("%s: dtls_kpi_connect unavailable\n", __FUNCTION__); + lck_mtx_unlock(&dtls_ctl_mutex); + return; + } + } else { + printf("%s: dtls already not-stitched\n", __FUNCTION__); + lck_mtx_unlock(&dtls_ctl_mutex); + return; + } + lck_mtx_unlock(&dtls_ctl_mutex); + return; +} + +#define utun_pkt_dtls_prepend_proto(pkt, pf) do { \ + if (mbuf_prepend(pkt, sizeof(protocol_family_t), MBUF_DONTWAIT) != 0) { \ + printf("%s - ifnet_output prepend failed\n", __FUNCTION__); \ + lck_mtx_unlock(&dtls_ctl_mutex); \ + return EBADF; \ + } \ + *(protocol_family_t *)mbuf_data(*pkt) = pf; \ + } while(0); + +#define utun_pkt_dtls_puntup(pcb, pkt, errstr, rc) do { \ + *(protocol_family_t *)mbuf_data(*pkt) = htonl(*(protocol_family_t *)mbuf_data(*pkt)); \ + rc = ctl_enqueuembuf(pcb->utun_ctlref, pcb->utun_unit, *pkt, CTL_DATA_EOR); \ + if (rc != 0) { \ + printf("%s: - ctl_enqueuembuf failed (rc %d) for %s:\n", __FUNCTION__, rc, (char *)errstr); \ + mbuf_freem(*pkt); \ + ifnet_stat_increment_out(pcb->utun_ifp, 0, 0, 1); \ + lck_mtx_unlock(&dtls_ctl_mutex); \ + return 0; \ + } \ + *pkt = NULL; \ + } while(0); + +int +utun_pkt_dtls_output(struct utun_pcb *pcb, mbuf_t *pkt) +{ + errno_t rc = ENETUNREACH; + int len; + utun_crypto_ctx_t *crypto_ctx; + protocol_family_t proto; + + //printf("%s: entering, flags %x, ifp %p\n", __FUNCTION__, pcb->utun_flags, pcb->utun_ifp); + + if (!(pcb->utun_flags & UTUN_FLAGS_CRYPTO)) { + printf("%s - crypto disabled\n", __FUNCTION__); + return EINVAL; + } + + if (!pcb->utun_ifp) { + printf("%s - utun ifp cleared\n", __FUNCTION__); + return EINVAL; + } + + proto = *(mtod(*pkt, protocol_family_t *)); + + lck_mtx_lock(&dtls_ctl_mutex); + + len = mbuf_pkthdr_len(*pkt); + + crypto_ctx = &pcb->utun_crypto_ctx[UTUN_CRYPTO_DIR_TO_IDX(UTUN_CRYPTO_DIR_OUT)]; + + //printf("%s: entering, kpi-handle %d, kpi-ref %p, kpi-refcnt %d\n", __FUNCTION__, crypto_ctx->kpi_handle, crypto_ctx->kpi_ref, crypto_ctx->kpi_refcnt); + + if (dtls_kpi_send && (crypto_ctx->kpi_handle >= 0) && crypto_ctx->kpi_ref) { + m_adj(*pkt, sizeof(protocol_family_t)); + + if (!(rc = utun_pkt_dtls_output_frame_encapsulate(crypto_ctx, pkt, proto))) { + rc = dtls_kpi_send(crypto_ctx->kpi_ref, pkt); + if (rc) { + printf("%s: DTLS failed to send pkt %d\n", __FUNCTION__, rc); + // + // dtls_kpi_send (by way of so_inject_data_out) frees mbuf during certain error cases, + ifnet_stat_increment_out(pcb->utun_ifp, 0, 0, 1); // increment errors + lck_mtx_unlock(&dtls_ctl_mutex); + return 0; // and drop packet + } + } else if (rc == EINVAL) { + // unsupported proto... fall through and punt (but 1st undo the protocol strip) + utun_pkt_dtls_prepend_proto(pkt, proto); + utun_pkt_dtls_puntup(pcb, pkt, (char *)"unsupported proto", rc); + } else { + // mbuf_prepend failure... mbuf will be already freed + printf("%s: failed to encrypsulate and send pkt %d\n", __FUNCTION__,rc); + ifnet_stat_increment_out(pcb->utun_ifp, 0, 0, 1); // increment errors + lck_mtx_unlock(&dtls_ctl_mutex); + return 0; // and drop packet + } + } else { + utun_pkt_dtls_puntup(pcb, pkt, (char *)"slowpath", rc); + } + + if (!rc) + ifnet_stat_increment_out(pcb->utun_ifp, 1, len, 0); + + lck_mtx_unlock(&dtls_ctl_mutex); + return rc; +} + +int +utun_pkt_dtls_input(struct utun_pcb *pcb, mbuf_t *pkt, __unused protocol_family_t family) +{ + utun_crypto_ctx_t *crypto_ctx; + int striplen = 0; + + //printf("%s: got pkt %d\n", __FUNCTION__,family); + if (!(pcb->utun_flags & UTUN_FLAGS_CRYPTO)) { + printf("%s - crypto disabled\n", __FUNCTION__); + return EINVAL; + } + + if (!pcb->utun_ifp) { + printf("%s - utun ifp cleared\n", __FUNCTION__); + return EINVAL; + } + + lck_mtx_lock(&dtls_ctl_mutex); + + /* + * make sure that family matches what the UTUN was configured for (punt those that don't... along with all that fail to match the data pattern. + */ + crypto_ctx = &pcb->utun_crypto_ctx[UTUN_CRYPTO_DIR_TO_IDX(UTUN_CRYPTO_DIR_IN)]; + if (crypto_ctx->num_framers && + !utun_pkt_dtls_input_frame_is_data(crypto_ctx, pkt, AF_INET, &striplen) && + !utun_pkt_dtls_input_frame_is_data(crypto_ctx, pkt, AF_INET6, &striplen)) { + // control or unknown traffic, so punt up to the plugin + errno_t rc; + + utun_pkt_dtls_prepend_proto(pkt, family); + *(protocol_family_t *)mbuf_data(*pkt) = htonl(*(protocol_family_t *)mbuf_data(*pkt)); + rc = ctl_enqueuembuf(pcb->utun_ctlref, pcb->utun_unit, *pkt, CTL_DATA_EOR); + if (rc != 0) { + // drop packet + printf("%s: - ctl_enqueuembuf failed: %d\n", __FUNCTION__, rc); + mbuf_freem(*pkt); + lck_mtx_unlock(&dtls_ctl_mutex); + return rc; + } + printf("%s: - ctl_enqueuembuf punted a packet up to UTUN ctrl sock: %d\n", __FUNCTION__, rc); + ifnet_stat_increment_in(pcb->utun_ifp, 1, mbuf_pkthdr_len(*pkt), 0); + + *pkt = NULL; + lck_mtx_unlock(&dtls_ctl_mutex); + return 0; + } + if (striplen) { + //printf("%s: - about to strip tunneled hdr of len %d\n", __FUNCTION__, striplen); + m_adj(*pkt, striplen); + } + + utun_pkt_dtls_prepend_proto(pkt, family); + + ifnet_stat_increment_in(pcb->utun_ifp, 1, mbuf_pkthdr_len(*pkt), 0); + + (void)utun_pkt_input(pcb, *pkt); + lck_mtx_unlock(&dtls_ctl_mutex); + return 0; +} diff --git a/bsd/net/if_utun_crypto_dtls.h b/bsd/net/if_utun_crypto_dtls.h new file mode 100644 index 000000000..f5de675f3 --- /dev/null +++ b/bsd/net/if_utun_crypto_dtls.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _NET_IF_UTUN_CRYPTO_DTLS_H_ +#define _NET_IF_UTUN_CRYPTO_DTLS_H_ + +#define UTUN_CRYPTO_DTLS_HANDLE_INVALID -1 + +#ifdef KERNEL_PRIVATE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define utun_cleanup_all_crypto_dtls(pcb) utun_ctl_disable_crypto_dtls(pcb) + +/* + * Summary: initializes global vars needed for any utun crypto based on dtls + */ +void +utun_ctl_init_crypto_dtls(void); + +errno_t +utun_ctl_register_dtls (utun_crypto_kpi_reg_t *reg); + +/* + * Summary: disables all crypto DTLS in one shot + */ +void +utun_cleanup_all_crypto_dtls (struct utun_pcb *pcb); + +/* + * Summary: enables dtls crypto info for the specified utun. dtls ref is passed into args. + */ +void +utun_ctl_enable_crypto_dtls(struct utun_pcb *pcb, utun_crypto_args_t *args); + +/* + * Summary: disables ipsec crypto info for the specified utun. + */ +void +utun_ctl_disable_crypto_dtls(struct utun_pcb *pcb); + +int +utun_ctl_config_crypto_dtls_framer(utun_crypto_ctx_t *crypto_ctx, utun_crypto_framer_args_t *args); + +int +utun_ctl_unconfig_crypto_dtls_framer(utun_crypto_ctx_t *crypto_ctx, utun_crypto_framer_args_t *args); + +/* + * Summary: enables handling of data traffic + */ +void +utun_ctl_start_datatraffic_crypto_dtls(struct utun_pcb *pcb); + +/* + * Summary: disables handling of data traffic + */ +void +utun_ctl_stop_datatraffic_crypto_dtls(struct utun_pcb *pcb); + +int +utun_pkt_dtls_output(struct utun_pcb *pcb, mbuf_t *pkt); + +int +utun_pkt_dtls_input(struct utun_pcb *pcb, mbuf_t *pkt, protocol_family_t family); + +static inline protocol_family_t +utun_crypto_framer_inner_type_to_protocol_family (utun_crypto_framer_inner_type_t type) +{ + if (type == UTUN_CRYPTO_INNER_TYPE_IPv4) { + return PF_INET; + } else { + return PF_INET6; + } +} + +static inline utun_crypto_framer_inner_type_t +utun_crypto_framer_protocol_family_to_inner_type (protocol_family_t family) +{ + if (family == PF_INET) { + return UTUN_CRYPTO_INNER_TYPE_IPv4; + } else { + return UTUN_CRYPTO_INNER_TYPE_IPv6; + } +} + +#endif // KERNEL_PRIVATE + +#endif // _NET_IF_UTUN_CRYPTO_DTLS_H_ diff --git a/bsd/net/if_utun_crypto_ipsec.c b/bsd/net/if_utun_crypto_ipsec.c index 0166ba13c..df1c4711c 100644 --- a/bsd/net/if_utun_crypto_ipsec.c +++ b/bsd/net/if_utun_crypto_ipsec.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012 Apple Inc. All rights reserved. + * Copyright (c) 2011-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -26,7 +26,7 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ - +#if IPSEC #include #include @@ -703,7 +703,8 @@ utun_pkt_ipsec_output (struct utun_pcb *pcb, mbuf_t *pkt) int err; struct route *ro = NULL; struct route ro_copy; - struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF }; + struct ip_out_args ipoa = + { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF, 0 }; if (crypto_keys && crypto_keys->state.u.ipsec.proto == IPPROTO_ESP && @@ -712,7 +713,7 @@ utun_pkt_ipsec_output (struct utun_pcb *pcb, mbuf_t *pkt) // TODO: update stats to increment outgoing packets // TODO: allow empty packets thru - proto = ntohl(*(mtod(*pkt, protocol_family_t *))); + proto = *(mtod(*pkt, protocol_family_t *)); m_adj(*pkt, sizeof(protocol_family_t)); bzero(&ro_copy, sizeof(ro_copy)); @@ -733,9 +734,7 @@ utun_pkt_ipsec_output (struct utun_pcb *pcb, mbuf_t *pkt) new = ipsec4_splithdr(*pkt); if (!new) { printf("%s: ipsec4_splithdr(1) failed\n", __FUNCTION__); - if (ro_copy.ro_rt != NULL) { - rtfree(ro_copy.ro_rt); - } + ROUTE_RELEASE(&ro_copy); *pkt = NULL; return 0; } @@ -766,13 +765,11 @@ utun_pkt_ipsec_output (struct utun_pcb *pcb, mbuf_t *pkt) if (ro->ro_rt) { RT_LOCK(ro->ro_rt); } - if (ro->ro_rt != NULL && - (ro->ro_rt->generation_id != route_generation || - !(ro->ro_rt->rt_flags & RTF_UP) || - dst4->sin_addr.s_addr != ip->ip_dst.s_addr)) { - RT_UNLOCK(ro->ro_rt); - rtfree(ro->ro_rt); - ro->ro_rt = NULL; + if (ROUTE_UNUSABLE(ro) || + dst4->sin_addr.s_addr != ip->ip_dst.s_addr) { + if (ro->ro_rt != NULL) + RT_UNLOCK(ro->ro_rt); + ROUTE_RELEASE(ro); } if (ro->ro_rt == NULL) { dst4->sin_family = AF_INET; @@ -800,9 +797,7 @@ utun_pkt_ipsec_output (struct utun_pcb *pcb, mbuf_t *pkt) new = ipsec4_splithdr(*pkt); if (!new) { printf("%s: ipsec4_splithdr(2) failed\n", __FUNCTION__); - if (ro_copy.ro_rt != NULL) { - rtfree(ro_copy.ro_rt); - } + ROUTE_RELEASE(&ro_copy); *pkt = NULL; return 0; } @@ -810,9 +805,7 @@ utun_pkt_ipsec_output (struct utun_pcb *pcb, mbuf_t *pkt) if ((err = esp4_output(new, sav))) { printf("%s: esp4_output failed (%d)\n", __FUNCTION__, err); - if (ro_copy.ro_rt != NULL) { - rtfree(ro_copy.ro_rt); - } + ROUTE_RELEASE(&ro_copy); *pkt = NULL; return 0; // drop } @@ -829,9 +822,7 @@ utun_pkt_ipsec_output (struct utun_pcb *pcb, mbuf_t *pkt) new = ipsec6_splithdr(*pkt); if (!new) { printf("%s: ipsec6_splithdr(1) failed\n", __FUNCTION__); - if (ro_copy.ro_rt != NULL) { - rtfree(ro_copy.ro_rt); - } + ROUTE_RELEASE(&ro_copy); *pkt = NULL; return 0; } @@ -862,13 +853,11 @@ utun_pkt_ipsec_output (struct utun_pcb *pcb, mbuf_t *pkt) if (ro->ro_rt) { RT_LOCK(ro->ro_rt); } - if (ro->ro_rt != NULL && - (ro->ro_rt->generation_id != route_generation || - !(ro->ro_rt->rt_flags & RTF_UP) || - !IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr, &ip6->ip6_dst))) { - RT_UNLOCK(ro->ro_rt); - rtfree(ro->ro_rt); - ro->ro_rt = NULL; + if (ROUTE_UNUSABLE(ro) || + !IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr, &ip6->ip6_dst)) { + if (ro->ro_rt != NULL) + RT_UNLOCK(ro->ro_rt); + ROUTE_RELEASE(ro); } if (ro->ro_rt == NULL) { bzero(dst6, sizeof(*dst6)); @@ -897,9 +886,7 @@ utun_pkt_ipsec_output (struct utun_pcb *pcb, mbuf_t *pkt) new = ipsec6_splithdr(*pkt); if (!new) { printf("%s: ipsec6_splithdr failed\n", __FUNCTION__); - if (ro_copy.ro_rt != NULL) { - rtfree(ro_copy.ro_rt); - } + ROUTE_RELEASE(&ro_copy); *pkt = NULL; return 0; } @@ -907,9 +894,7 @@ utun_pkt_ipsec_output (struct utun_pcb *pcb, mbuf_t *pkt) if ((err = esp6_output(new, mtod(new, u_char *), new->m_next, sav))) { printf("%s: esp6_output failed (%d)\n", __FUNCTION__, err); - if (ro_copy.ro_rt != NULL) { - rtfree(ro_copy.ro_rt); - } + ROUTE_RELEASE(&ro_copy); *pkt = NULL; return 0; // drop } @@ -917,9 +902,7 @@ utun_pkt_ipsec_output (struct utun_pcb *pcb, mbuf_t *pkt) plen = new->m_pkthdr.len - sizeof(struct ip6_hdr); if (plen > IPV6_MAXPACKET) { printf("%s: esp6_output failed due to invalid len (%d)\n", __FUNCTION__, plen); - if (ro_copy.ro_rt != NULL) { - rtfree(ro_copy.ro_rt); - } + ROUTE_RELEASE(&ro_copy); mbuf_freem(new); *pkt = NULL; return 0; @@ -1081,8 +1064,10 @@ utun_pkt_ipsec_input (struct utun_pcb *pcb, mbuf_t *pkt, protocol_family_t famil printf("%s - ifnet_output prepend failed\n", __FUNCTION__); return ENOBUFS; } - *(protocol_family_t *)mbuf_data(*pkt) = htonl(family); + *(protocol_family_t *)mbuf_data(*pkt) = family; (void)utun_pkt_input(pcb, *pkt); return 0; } + +#endif /* IPSEC */ diff --git a/bsd/net/if_var.h b/bsd/net/if_var.h index 426a78bb5..7b82a0fbe 100644 --- a/bsd/net/if_var.h +++ b/bsd/net/if_var.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -142,8 +142,7 @@ struct net_event_data { }; #if defined(__LP64__) -#define __need_struct_timeval32 -#include +#include #define IF_DATA_TIMEVAL timeval32 #else #define IF_DATA_TIMEVAL timeval @@ -249,6 +248,10 @@ struct if_traffic_class { struct if_data_extended { u_int64_t ifi_alignerrs; /* unaligned (32-bit) input pkts */ + u_int64_t ifi_dt_bytes; /* Data threshold counter */ + u_int64_t ifi_fpackets; /* forwarded packets on interface */ + u_int64_t ifi_fbytes; /* forwarded bytes on interface */ + u_int64_t reserved[12]; /* for future */ }; struct if_packet_stats { @@ -267,6 +270,7 @@ struct if_packet_stats { u_int64_t ifi_tcp_dospacket; u_int64_t ifi_tcp_cleanup; u_int64_t ifi_tcp_synwindow; + u_int64_t reserved[6]; /* UDP */ u_int64_t ifi_udp_port_unreach; u_int64_t ifi_udp_faithprefix; @@ -276,6 +280,7 @@ struct if_packet_stats { u_int64_t ifi_udp_badmcast; u_int64_t ifi_udp_cleanup; u_int64_t ifi_udp_badipsec; + u_int64_t _reserved[4]; }; struct if_description { @@ -289,6 +294,11 @@ struct if_bandwidths { u_int64_t max_bw; /* maximum theoretical bandwidth */ }; +struct if_latencies { + u_int64_t eff_lt; /* effective latency */ + u_int64_t max_lt; /* maximum theoretical latency */ +}; + struct if_rxpoll_stats { u_int32_t ifi_poll_off_req; /* total # of POLL_OFF reqs */ u_int32_t ifi_poll_off_err; /* total # of POLL_OFF errors */ @@ -312,6 +322,9 @@ struct if_rxpoll_stats { u_int32_t ifi_poll_bytes_max; /* largest polled bytes */ u_int32_t ifi_poll_bytes_lowat; /* bytes low watermark */ u_int32_t ifi_poll_bytes_hiwat; /* bytes high watermark */ + + u_int32_t ifi_poll_packets_limit; /* max packets per poll call */ + u_int64_t ifi_poll_interval_time; /* poll interval (nsec) */ }; #endif /* PRIVATE */ @@ -365,17 +378,19 @@ struct if_data_internal { u_int32_t ifi_recvtiming; /* usec spent receiving when timing */ u_int32_t ifi_xmittiming; /* usec spent xmitting when timing */ u_int64_t ifi_alignerrs; /* unaligned (32-bit) input pkts */ -#define IF_LASTCHANGEUPTIME 1 /* lastchange: 1-uptime 0-calendar time */ + u_int64_t ifi_dt_bytes; /* Data threshold counter */ + u_int64_t ifi_fpackets; /* forwarded packets on interface */ + u_int64_t ifi_fbytes; /* forwarded bytes on interface */ struct timeval ifi_lastchange; /* time of last administrative change */ u_int32_t ifi_hwassist; /* HW offload capabilities */ u_int32_t ifi_tso_v4_mtu; /* TCP Segment Offload IPv4 maximum segment size */ u_int32_t ifi_tso_v6_mtu; /* TCP Segment Offload IPv6 maximum segment size */ }; +#if MEASURE_BW /* * Fields per interface to measure perceived bandwidth. */ - struct if_measured_bw { u_int64_t bw; /* measured bandwidth in bytes per ms */ u_int64_t bytes; /* XXX not needed */ @@ -389,7 +404,7 @@ struct if_measured_bw { #define IF_MEASURED_BW_INPROGRESS 0x1 #define IF_MEASURED_BW_CALCULATION 0x2 }; - +#endif /* MEASURE_BW */ #endif /* BSD_KERNEL_PRIVATE */ #ifdef PRIVATE @@ -421,6 +436,9 @@ struct if_measured_bw { #define if_tso_v4_mtu if_data.ifi_tso_v4_mtu #define if_tso_v6_mtu if_data.ifi_tso_v6_mtu #define if_alignerrs if_data.ifi_alignerrs +#define if_dt_bytes if_data.ifi_dt_bytes +#define if_fpackets if_data.ifi_fpackets +#define if_fbytes if_data.ifi_fbytes #endif /* BSD_KERNEL_PRIVATE */ #ifdef BSD_KERNEL_PRIVATE @@ -470,7 +488,7 @@ TAILQ_HEAD(ddesc_head_name, dlil_demux_desc); #define IF_HWASSIST_CSUM_TCPIPV6 0x0020 /* will csum TCPv6, IFNET_CSUM_TCPIPV6 */ #define IF_HWASSIST_CSUM_UDPIPV6 0x0040 /* will csum UDPv6, IFNET_CSUM_UDP */ #define IF_HWASSIST_CSUM_FRAGMENT_IPV6 0x0080 /* will do IPv6 fragmentation, IFNET_IPV6_FRAGMENT */ -#define IF_HWASSIST_CSUM_TCP_SUM16 0x1000 /* simple TCP Sum16 computation, IFNET_CSUM_SUM16 */ +#define IF_HWASSIST_CSUM_PARTIAL 0x1000 /* simple Sum16 computation, IFNET_CSUM_PARTIAL */ #define IF_HWASSIST_CSUM_MASK 0xffff #define IF_HWASSIST_CSUM_FLAGS(hwassist) ((hwassist) & IF_HWASSIST_CSUM_MASK) @@ -484,16 +502,24 @@ TAILQ_HEAD(ddesc_head_name, dlil_demux_desc); #define IF_HWASSIST_TSO_V6 0x00400000 /* will do TCP Segment offload for IPv6, IFNET_TSO_IPV6 */ #endif /* PRIVATE */ +#ifdef PRIVATE +#define IFXNAMSIZ (IFNAMSIZ + 8) /* external name (name + unit) */ +#endif + #ifdef BSD_KERNEL_PRIVATE /* * ifnet is private to BSD portion of kernel */ +#include #include #include +#include #include +#include RB_HEAD(ll_reach_tree, if_llreach); /* define struct ll_reach_tree */ +#define if_name(ifp) ifp->if_xname /* * Structure defining a network interface. * @@ -506,12 +532,13 @@ struct ifnet { decl_lck_rw_data(, if_lock); void *if_softc; /* pointer to driver state */ const char *if_name; /* name, e.g. ``en'' or ``lo'' */ + const char *if_xname; /* external name (name + unit) */ struct if_description if_desc; /* extended description */ TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */ TAILQ_ENTRY(ifnet) if_detaching_link; /* list of detaching ifnets */ decl_lck_mtx_data(, if_ref_lock) - u_int32_t if_refflags; + u_int32_t if_refflags; /* see IFRF flags below */ u_int32_t if_refio; /* number of io ops to the underlying driver */ #define if_list if_link @@ -536,6 +563,7 @@ struct ifnet { struct if_data_internal if_data __attribute__((aligned(8))); ifnet_family_t if_family; /* value assigned by Apple */ + ifnet_subfamily_t if_subfamily; /* value assigned by Apple */ uintptr_t if_family_cookie; ifnet_output_func if_output; ifnet_pre_enqueue_func if_pre_enqueue; @@ -548,14 +576,18 @@ struct ifnet { ifnet_detached_func if_free; ifnet_demux_func if_demux; ifnet_event_func if_event; - ifnet_framer_func if_framer; + ifnet_framer_func if_framer_legacy; + ifnet_framer_extended_func if_framer; ifnet_add_proto_func if_add_proto; ifnet_del_proto_func if_del_proto; ifnet_check_multi if_check_multi; struct proto_hash_entry *if_proto_hash; void *if_kpi_storage; + u_int32_t if_flowhash; /* interface flow control ID */ + decl_lck_mtx_data(, if_start_lock); + u_int32_t if_start_flags; /* see IFSF flags below */ u_int32_t if_start_req; u_int32_t if_start_active; /* output is active */ struct timespec if_start_cycle; /* restart interval */ @@ -567,6 +599,9 @@ struct ifnet { struct if_bandwidths if_output_bw; struct if_bandwidths if_input_bw; + struct if_latencies if_output_lt; + struct if_latencies if_input_lt; + decl_lck_mtx_data(, if_flt_lock) u_int32_t if_flt_busy; u_int32_t if_flt_waiters; @@ -630,17 +665,56 @@ struct ifnet { #endif /* INET6 */ int if_lqm; /* link quality metric */ +#if MEASURE_BW struct if_measured_bw if_bw; +#endif /* MEASURE_BW */ struct tcpstat_local *if_tcp_stat; /* TCP specific stats */ struct udpstat_local *if_udp_stat; /* UDP specific stats */ + + struct { + int32_t level; /* cached logging level */ + u_int32_t flags; /* cached logging flags */ + int32_t category; /* cached category */ + int32_t subcategory; /* cached subcategory */ + } if_log; + + struct { + struct ifnet *ifp; /* delegated ifp */ + u_int32_t type; /* delegated i/f type */ + u_int32_t family; /* delegated i/f family */ + u_int32_t subfamily; /* delegated i/f sub-family */ + } if_delegated; + + u_int64_t if_data_threshold; + u_int32_t if_fg_sendts; /* last send on a fg socket in seconds */ + +#if INET6 + decl_lck_rw_data(, if_inet6data_lock); + void *if_inet6data; +#endif }; +#define IF_TCP_STATINC(_ifp, _s) do { \ + if ((_ifp)->if_tcp_stat != NULL) \ + atomic_add_64(&(_ifp)->if_tcp_stat->_s, 1); \ +} while (0); + +#define IF_UDP_STATINC(_ifp, _s) do { \ + if ((_ifp)->if_udp_stat != NULL) \ + atomic_add_64(&(_ifp)->if_udp_stat->_s, 1); \ +} while (0); + /* - * Valid values for if_useflags + * Valid values for if_refflags */ #define IFRF_ATTACHED 0x1 /* ifnet attach is completely done */ #define IFRF_DETACHING 0x2 /* detach has been requested */ +/* + * Valid values for if_start_flags + */ +#define IFSF_FLOW_CONTROLLED 0x1 /* flow controlled */ + /* * Structure describing a `cloning' interface. */ @@ -881,16 +955,50 @@ struct ifmultiaddr { #define IFMA_REMREF(_ifma) \ ifma_remref(_ifma) -__private_extern__ struct ifnethead ifnet_head; -__private_extern__ struct ifnet **ifindex2ifnet; -__private_extern__ u_int32_t if_sndq_maxlen; -__private_extern__ u_int32_t if_rcvq_maxlen; -__private_extern__ int if_index; -__private_extern__ struct ifaddr **ifnet_addrs; -__private_extern__ lck_attr_t *ifa_mtx_attr; -__private_extern__ lck_grp_t *ifa_mtx_grp; -__private_extern__ lck_grp_t *ifnet_lock_group; -__private_extern__ lck_attr_t *ifnet_lock_attr; +/* + * Indicate whether or not the immediate interface, or the interface delegated + * by it, is a cellular interface (IFT_CELLULAR). Delegated interface type is + * set/cleared along with the delegated ifp; we cache the type for performance + * to avoid dereferencing delegated ifp each time. + * + * Note that this is meant to be used only for accounting and policy purposes; + * certain places need to explicitly know the immediate interface type, and + * this macro should not be used there. + * + * The test is done against IFT_CELLULAR instead of IFNET_FAMILY_CELLULAR to + * handle certain cases where the family isn't set to the latter. + */ +#define IFNET_IS_CELLULAR(_ifp) \ + ((_ifp)->if_type == IFT_CELLULAR || \ + (_ifp)->if_delegated.type == IFT_CELLULAR) + +/* + * Indicate whether or not the immediate interface, or the interface delegated + * by it, is a Wi-Fi interface (IFNET_SUBFAMILY_WIFI). Delegated interface + * subfamily is set/cleared along with the delegated ifp; we cache the subfamily + * for performance to avoid dereferencing delegated ifp each time. + * + * Note that this is meant to be used only for accounting and policy purposes; + * certain places need to explicitly know the immediate interface type, and + * this macro should not be used there. + * + * The test is done against IFNET_SUBFAMILY_WIFI as the family may be set to + * IFNET_FAMILY_ETHERNET (as well as type to IFT_ETHER) which is too generic. + */ +#define IFNET_IS_WIFI(_ifp) \ + ((_ifp)->if_subfamily == IFNET_SUBFAMILY_WIFI || \ + (_ifp)->if_delegated.subfamily == IFNET_SUBFAMILY_WIFI) + +extern struct ifnethead ifnet_head; +extern struct ifnet **ifindex2ifnet; +extern u_int32_t if_sndq_maxlen; +extern u_int32_t if_rcvq_maxlen; +extern int if_index; +extern struct ifaddr **ifnet_addrs; +extern lck_attr_t *ifa_mtx_attr; +extern lck_grp_t *ifa_mtx_grp; +extern lck_grp_t *ifnet_lock_group; +extern lck_attr_t *ifnet_lock_attr; extern ifnet_t lo_ifp; extern uint32_t if_bw_measure_size; extern u_int32_t if_bw_smoothing_val; @@ -928,11 +1036,20 @@ typedef enum { IFNET_LCK_ASSERT_NOTOWNED /* not held */ } ifnet_lock_assert_t; +#define IF_LLADDR(_ifp) \ + (LLADDR(SDL(((_ifp)->if_lladdr)->ifa_addr))) + __private_extern__ void ifnet_lock_assert(struct ifnet *, ifnet_lock_assert_t); __private_extern__ void ifnet_lock_shared(struct ifnet *ifp); __private_extern__ void ifnet_lock_exclusive(struct ifnet *ifp); __private_extern__ void ifnet_lock_done(struct ifnet *ifp); +#if INET6 +__private_extern__ void if_inet6data_lock_shared(struct ifnet *ifp); +__private_extern__ void if_inet6data_lock_exclusive(struct ifnet *ifp); +__private_extern__ void if_inet6data_lock_done(struct ifnet *ifp); +#endif + __private_extern__ void ifnet_head_lock_shared(void); __private_extern__ void ifnet_head_lock_exclusive(void); __private_extern__ void ifnet_head_done(void); @@ -983,15 +1100,17 @@ __private_extern__ struct in_ifaddr *ifa_foraddr(unsigned int); __private_extern__ struct in_ifaddr *ifa_foraddr_scoped(unsigned int, unsigned int); -extern void ifnet_fclist_append(struct sfb *sp, struct sfb_fc_list *fcl); -extern struct sfb_bin_fcentry* ifnet_fce_alloc(int how); -extern void ifnet_fce_free(struct sfb_bin_fcentry *); - struct ifreq; extern errno_t ifnet_getset_opportunistic(struct ifnet *, u_long, struct ifreq *, struct proc *); extern int ifnet_get_throttle(struct ifnet *, u_int32_t *); extern int ifnet_set_throttle(struct ifnet *, u_int32_t); +extern errno_t ifnet_getset_log(struct ifnet *, u_long, + struct ifreq *, struct proc *); +extern int ifnet_set_log(struct ifnet *, int32_t, uint32_t, int32_t, int32_t); +extern int ifnet_get_log(struct ifnet *, int32_t *, uint32_t *, int32_t *, + int32_t *); +extern int ifnet_notify_address(struct ifnet *, int); #if INET6 struct in6_addr; @@ -1024,12 +1143,23 @@ __private_extern__ void if_lqm_update(struct ifnet *, int32_t); __private_extern__ void ifnet_update_sndq(struct ifclassq *, cqev_t); __private_extern__ void ifnet_update_rcv(struct ifnet *, cqev_t); +__private_extern__ void ifnet_flowadv(uint32_t); + __private_extern__ errno_t ifnet_set_input_bandwidths(struct ifnet *, struct if_bandwidths *); __private_extern__ errno_t ifnet_set_output_bandwidths(struct ifnet *, struct if_bandwidths *, boolean_t); __private_extern__ u_int64_t ifnet_output_linkrate(struct ifnet *); __private_extern__ u_int64_t ifnet_input_linkrate(struct ifnet *); + +__private_extern__ errno_t ifnet_set_input_latencies(struct ifnet *, + struct if_latencies *); +__private_extern__ errno_t ifnet_set_output_latencies(struct ifnet *, + struct if_latencies *, boolean_t); + +__private_extern__ errno_t ifnet_framer_stub(struct ifnet *, struct mbuf **, + const struct sockaddr *, const char *, const char *, u_int32_t *, + u_int32_t *); #endif /* BSD_KERNEL_PRIVATE */ #ifdef XNU_KERNEL_PRIVATE /* for uuid.c */ diff --git a/bsd/net/if_vlan.c b/bsd/net/if_vlan.c index 273feea93..1f98f6ac0 100644 --- a/bsd/net/if_vlan.c +++ b/bsd/net/if_vlan.c @@ -441,7 +441,7 @@ ifvlan_get_vlan_parent_retained(ifvlan_ref ifv) { vlan_parent_ref vlp = ifv->ifv_vlp; - if (vlan_parent_flags_detaching(vlp)) { + if (vlp == NULL || vlan_parent_flags_detaching(vlp)) { return (NULL); } vlan_parent_retain(vlp); @@ -942,7 +942,7 @@ vlan_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params) int error; ifvlan_ref ifv; ifnet_t ifp; - struct ifnet_init_params vlan_init; + struct ifnet_init_eparams vlan_init; error = vlan_globals_init(); if (error != 0) { @@ -965,6 +965,9 @@ vlan_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params) } bzero(&vlan_init, sizeof(vlan_init)); + vlan_init.ver = IFNET_INIT_CURRENT_VERSION; + vlan_init.len = sizeof (vlan_init); + vlan_init.flags = IFNET_INIT_LEGACY; vlan_init.uniqueid = ifv->ifv_name; vlan_init.uniqueid_len = strlen(ifv->ifv_name); vlan_init.name = ifc->ifc_name; @@ -976,14 +979,14 @@ vlan_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params) vlan_init.add_proto = ether_add_proto; vlan_init.del_proto = ether_del_proto; vlan_init.check_multi = ether_check_multi; - vlan_init.framer = ether_frameout; + vlan_init.framer_extended = ether_frameout_extended; vlan_init.softc = ifv; vlan_init.ioctl = vlan_ioctl; vlan_init.set_bpf_tap = vlan_set_bpf_tap; vlan_init.detach = vlan_if_free; vlan_init.broadcast_addr = etherbroadcastaddr; vlan_init.broadcast_len = ETHER_ADDR_LEN; - error = ifnet_allocate(&vlan_init, &ifp); + error = ifnet_allocate_extended(&vlan_init, &ifp); if (error) { ifvlan_release(ifv); @@ -1280,7 +1283,7 @@ vlan_input(ifnet_t p, __unused protocol_family_t protocol, } if (tag != 0) { m->m_pkthdr.rcvif = ifp; - m->m_pkthdr.header = frame_header; + m->m_pkthdr.pkt_hdr = frame_header; (void)ifnet_stat_increment_in(ifp, 1, m->m_pkthdr.len + ETHER_HDR_LEN, 0); vlan_bpf_input(ifp, m, bpf_func, frame_header, ETHER_HDR_LEN, @@ -1288,7 +1291,7 @@ vlan_input(ifnet_t p, __unused protocol_family_t protocol, /* We found a vlan interface, inject on that interface. */ dlil_input_packet_list(ifp, m); } else { - m->m_pkthdr.header = frame_header; + m->m_pkthdr.pkt_hdr = frame_header; /* Send priority-tagged packet up through the parent */ dlil_input_packet_list(p, m); } @@ -1414,7 +1417,7 @@ vlan_config(struct ifnet * ifp, struct ifnet * p, int tag) } /* set our ethernet address to that of the parent */ - ifnet_set_lladdr_and_type(ifp, ifnet_lladdr(p), ETHER_ADDR_LEN, IFT_ETHER); + ifnet_set_lladdr_and_type(ifp, IF_LLADDR(p), ETHER_ADDR_LEN, IFT_ETHER); /* no failures past this point */ vlan_lock(); @@ -2154,20 +2157,6 @@ vlan_detach_inet6(struct ifnet *ifp, protocol_family_t protocol_family) } #endif /* INET6 */ -#if NETAT -static errno_t -vlan_attach_at(struct ifnet *ifp, protocol_family_t protocol_family) -{ - return (ether_attach_at(ifp, protocol_family)); -} - -static void -vlan_detach_at(struct ifnet *ifp, protocol_family_t protocol_family) -{ - ether_detach_at(ifp, protocol_family); -} -#endif /* NETAT */ - __private_extern__ int vlan_family_init(void) { @@ -2189,15 +2178,6 @@ vlan_family_init(void) goto done; } #endif -#if NETAT - error = proto_register_plumber(PF_APPLETALK, IFNET_FAMILY_VLAN, - vlan_attach_at, vlan_detach_at); - if (error != 0) { - printf("proto_register_plumber failed for AF_APPLETALK error=%d\n", - error); - goto done; - } -#endif /* NETAT */ error = vlan_clone_attach(); if (error != 0) { printf("proto_register_plumber failed vlan_clone_attach error=%d\n", diff --git a/bsd/net/if_vlan_var.h b/bsd/net/if_vlan_var.h index 069a81d92..cbbec55a6 100644 --- a/bsd/net/if_vlan_var.h +++ b/bsd/net/if_vlan_var.h @@ -87,6 +87,6 @@ struct vlanreq { }; #ifdef KERNEL_PRIVATE -int vlan_family_init(void) __attribute__((section("__TEXT, initcode"))); +int vlan_family_init(void); #endif /* KERNEL_PRIVATE */ #endif /* _NET_IF_VLAN_VAR_H_ */ diff --git a/bsd/net/init.h b/bsd/net/init.h index 570fa12dc..b2545b48a 100644 --- a/bsd/net/init.h +++ b/bsd/net/init.h @@ -59,7 +59,7 @@ errno_t net_init_add(net_init_func_ptr init_func); #ifdef BSD_KERNEL_PRIVATE /* net_init_run is called from bsd_init */ -extern void net_init_run(void) __attribute__((section("__TEXT, initcode"))); +extern void net_init_run(void); #endif /* BSD_KERNEL_PRIVATE */ #endif /* _NET_INIT_H_ */ diff --git a/bsd/net/iptap.c b/bsd/net/iptap.c index c665af150..ead29d8f2 100644 --- a/bsd/net/iptap.c +++ b/bsd/net/iptap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2010 Apple Inc. All rights reserved. + * Copyright (c) 1999-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,413 +25,604 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#include -#include -#include -#include + #include -#include -#include -#include + +#include +#include +#include #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include + #include -#include +#include +#include #include +#include +#include #include -#include -#include -#include -#include -#include +#include +#include +#include +#define _IP_VHL +#include +#include +#include +#include -#define IPTAP_IF_NAME "iptap" -#define IPTAP_PRINTF printf -#define IP_TAP_NOT_USED 0 +#include +#include -#define VALID_PACKET(type, label)\ - if (iptap_clients == 0) \ - goto label; \ - \ - if (type != IFT_ETHER && \ - type != IFT_CELLULAR) \ - goto label +#include -static void *iptap_alloc(size_t); -static void iptap_free(void *); -static errno_t iptap_register_control(void); -static inline void iptap_lock_shared(void); -static inline void iptap_lock_exclusive(void); -static inline void iptap_lock_done(void); -static void iptap_alloc_lock(void); -static void iptap_free_lock(void); +#include -static void iptap_enqueue_mbuf(struct ifnet *, protocol_family_t, struct mbuf *, u_int32_t, u_int32_t, u_int8_t); +#include -/* kernctl callbacks */ -static errno_t iptap_ctl_connect(kern_ctl_ref, struct sockaddr_ctl *, void **); -static errno_t iptap_ctl_disconnect(kern_ctl_ref, u_int32_t, void *); +#include -#if IP_TAP_NOT_USED +#include -static errno_t iptap_deregister_control(void); +struct iptap_softc { + LIST_ENTRY(iptap_softc) iptap_link; + uint32_t iptap_unit; + uint32_t iptap_dlt_raw_count; + uint32_t iptap_dlt_pkttap_count; + struct ifnet *iptap_ifp; +}; -static errno_t iptap_ctl_send(kern_ctl_ref, u_int32_t, void *, mbuf_t, int); -static errno_t iptap_ctl_setopt(kern_ctl_ref, u_int32_t, void *, int, void *, size_t); -static errno_t iptap_ctl_getopt(kern_ctl_ref, u_int32_t, void *, int, void *, size_t *); +static LIST_HEAD(iptap_list, iptap_softc) iptap_list = LIST_HEAD_INITIALIZER(iptap_list); -#endif /* IP_TAP_NOT_USED */ +static void iptap_lock_shared(void); +static void iptap_lock_exclusive(void); +static void iptap_lock_done(void); +static void iptap_alloc_lock(void); -decl_lck_rw_data(static, iptap_mtx); +decl_lck_rw_data(static, iptap_lck_rw); static lck_grp_t *iptap_grp; -static kern_ctl_ref iptap_kernctl; -static unsigned int iptap_clients; -static OSMallocTag iptap_malloc_tag; - -struct iptap_client_t { - LIST_ENTRY(iptap_client_t) _cle; - u_int32_t _unit; -}; - -static LIST_HEAD(, iptap_client_t) _s_iptap_clients; +errno_t iptap_if_output(ifnet_t, mbuf_t); +errno_t iptap_demux(ifnet_t , mbuf_t, char *, protocol_family_t *); +errno_t iptap_add_proto(ifnet_t, protocol_family_t, const struct ifnet_demux_desc *, + u_int32_t); +errno_t iptap_del_proto(ifnet_t, protocol_family_t); +errno_t iptap_getdrvspec(ifnet_t , struct ifdrv64 *); +errno_t iptap_ioctl(ifnet_t, unsigned long, void *); +void iptap_detach(ifnet_t); +errno_t iptap_tap_callback(ifnet_t , u_int32_t , bpf_tap_mode ); +int iptap_clone_create(struct if_clone *, u_int32_t, void *); +int iptap_clone_destroy(struct ifnet *); + +static int iptap_ipf_register(void); +static int iptap_ipf_unregister(void); +static errno_t iptap_ipf_input(void *, mbuf_t *, int, u_int8_t); +static errno_t iptap_ipf_output(void *, mbuf_t *, ipf_pktopts_t); +static void iptap_ipf_detach(void *); + +static ipfilter_t iptap_ipf4, iptap_ipf6; + +void iptap_bpf_tap(struct mbuf *m, u_int32_t proto, int outgoing); + +static struct if_clone iptap_cloner = + IF_CLONE_INITIALIZER(IPTAP_IFNAME, + iptap_clone_create, + iptap_clone_destroy, + 0, + IF_MAXUNIT); + +SYSCTL_DECL(_net_link); +SYSCTL_NODE(_net_link, OID_AUTO, iptap, CTLFLAG_RW|CTLFLAG_LOCKED, 0, + "iptap virtual interface"); + +static int iptap_total_tap_count = 0; +SYSCTL_INT(_net_link_iptap, OID_AUTO, total_tap_count, CTLFLAG_RD | CTLFLAG_LOCKED, + &iptap_total_tap_count, 0, ""); + +static int iptap_log = 0; +SYSCTL_INT(_net_link_iptap, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED, + &iptap_log, 0, ""); + +#define IPTAP_LOG(fmt, ...) \ +do { \ + if ((iptap_log)) \ + printf("%s:%d " fmt, __FUNCTION__, __LINE__, ##__VA_ARGS__); \ +} while(false) __private_extern__ void -iptap_init(void) { - +iptap_init(void) +{ + errno_t error; + iptap_alloc_lock(); - iptap_malloc_tag = OSMalloc_Tagalloc(IPTAP_CONTROL_NAME, OSMT_DEFAULT); - if (iptap_malloc_tag == NULL) { - iptap_free_lock(); - IPTAP_PRINTF("iptap_init failed: unable to allocate malloc tag.\n"); - return; - } + error = if_clone_attach(&iptap_cloner); + if (error != 0) + panic("%s: if_clone_attach() failed, error %d\n", __func__, error); +} + +static void +iptap_alloc_lock(void) +{ + lck_grp_attr_t *grp_attr; + lck_attr_t *attr; - if (iptap_register_control() != 0) { - iptap_free_lock(); - OSMalloc_Tagfree(iptap_malloc_tag); - IPTAP_PRINTF("iptap_init failed: iptap_register_control failure.\n"); - return; - } + grp_attr = lck_grp_attr_alloc_init(); + lck_grp_attr_setdefault(grp_attr); + iptap_grp = lck_grp_alloc_init(IPTAP_IFNAME, grp_attr); + lck_grp_attr_free(grp_attr); - iptap_clients = 0; + attr = lck_attr_alloc_init(); + lck_attr_setdefault(attr); + + lck_rw_init(&iptap_lck_rw, iptap_grp, attr); + lck_attr_free(attr); } -__private_extern__ void -iptap_ipf_input(struct ifnet *ifp, protocol_family_t proto, struct mbuf *mp, char *frame_header) -{ - VALID_PACKET(ifp->if_type, done); - - do { - char *hdr = (char *)mbuf_data(mp); - size_t start = (size_t)((char*)mbuf_datastart(mp)); - size_t o_len = mp->m_len; - - if (frame_header != NULL && (size_t)frame_header >= start && (size_t)frame_header <= (size_t)hdr) { - if (mbuf_setdata(mp, frame_header, o_len + ((size_t)hdr - (size_t)frame_header)) == 0) { - iptap_enqueue_mbuf(ifp, proto, mp, ((size_t)hdr - (size_t)frame_header), 0, IPTAP_INPUT_TAG); - mbuf_setdata(mp, hdr, o_len); - } - } else { - iptap_enqueue_mbuf(ifp, proto, mp, 0, 0, IPTAP_INPUT_TAG); - } - - } while (0); - -done: - return; +static void +iptap_lock_shared(void) +{ + lck_rw_lock_shared(&iptap_lck_rw); } -__private_extern__ void -iptap_ipf_output(struct ifnet *ifp, protocol_family_t proto, struct mbuf *mp, u_int32_t pre, u_int32_t post) -{ - VALID_PACKET(ifp->if_type, done); - - iptap_enqueue_mbuf(ifp, proto, mp, pre, post, IPTAP_OUTPUT_TAG); - -done: - return; +static void +iptap_lock_exclusive(void) +{ + lck_rw_lock_exclusive(&iptap_lck_rw); } static void -iptap_enqueue_mbuf(struct ifnet *ifp, protocol_family_t proto, struct mbuf *mp, u_int32_t pre, u_int32_t post, u_int8_t io) +iptap_lock_done(void) { - errno_t err = 0; - struct iptap_client_t *client = NULL; - mbuf_t copy, itr = (mbuf_t)mp; - iptap_hdr_t header; - u_int32_t len = 0; - - memset(&header, 0x0, sizeof(header)); - header.version = IPTAP_VERSION_1; - header.type = ifp->if_type; - header.unit = ifp->if_unit; - strlcpy(header.if_name, ifp->if_name, sizeof(header.if_name)); - header.hdr_length = sizeof(header); - header.protocol_family = proto; - header.frame_pre_length = pre; - header.frame_pst_length = post; - header.io = io; + lck_rw_done(&iptap_lck_rw); +} + +__private_extern__ int +iptap_clone_create(struct if_clone *ifc, u_int32_t unit, void *params) +{ +#pragma unused(params) + + int error = 0; + struct iptap_softc *iptap = NULL; + struct ifnet_init_params if_init; - do { - len += mbuf_len(itr); - itr = mbuf_next(itr); - } while (itr != NULL); + iptap = _MALLOC(sizeof(struct iptap_softc), M_DEVBUF, M_WAITOK | M_ZERO); + if (iptap == NULL) { + printf("%s: _MALLOC failed\n", __func__); + error = ENOMEM; + goto done; + } + iptap->iptap_unit = unit; + + /* + * We do not use a set_bpf_tap() function as we rather rely on the more + * accurate callback passed to bpf_attach() + */ + bzero(&if_init, sizeof(struct ifnet_init_params)); + if_init.name = ifc->ifc_name; + if_init.unit = unit; + if_init.type = IFT_OTHER; + if_init.family = IFNET_FAMILY_LOOPBACK; + if_init.output = iptap_if_output; + if_init.demux = iptap_demux; + if_init.add_proto = iptap_add_proto; + if_init.del_proto = iptap_del_proto; + if_init.softc = iptap; + if_init.ioctl = iptap_ioctl; + if_init.detach = iptap_detach; + + error = ifnet_allocate(&if_init, &iptap->iptap_ifp); + if (error != 0) { + printf("%s: ifnet_allocate failed, error %d\n", __func__, error); + goto done; + } - iptap_lock_shared(); + ifnet_set_flags(iptap->iptap_ifp, IFF_UP, IFF_UP); - LIST_FOREACH(client, &_s_iptap_clients, _cle) { - - mbuf_dup((mbuf_t)mp, MBUF_DONTWAIT, ©); - if (copy == NULL) - continue; - - err = mbuf_prepend(©, sizeof(header), MBUF_DONTWAIT); - if (err != 0) { - if (copy != NULL) { - mbuf_freem(copy); - copy = NULL; - } - continue; - } - - HTONS(header.unit); - HTONL(header.hdr_length); - HTONL(header.protocol_family); - HTONL(header.frame_pre_length); - HTONL(header.frame_pst_length); - header.length = htonl(len); - - memcpy(mbuf_data(copy), &header, sizeof(header)); - - err = ctl_enqueuembuf(iptap_kernctl, client->_unit, copy, CTL_DATA_EOR); - if (err != 0) { - mbuf_freem(copy); - copy = NULL; - IPTAP_PRINTF("iptap_enqueue_mbuf failed: %d\n", (err)); - continue; - } + error = ifnet_attach(iptap->iptap_ifp, NULL); + if (error != 0) { + printf("%s: ifnet_attach failed - error %d\n", __func__, error); + ifnet_release(iptap->iptap_ifp); + goto done; } + /* + * Attach by default as DLT_PKTAP for packet metadata + * Provide DLT_RAW for legacy + */ + bpf_attach(iptap->iptap_ifp, DLT_PKTAP, sizeof(struct pktap_header), NULL, + iptap_tap_callback); + bpf_attach(iptap->iptap_ifp, DLT_RAW, 0, NULL, + iptap_tap_callback); + + /* Take a reference and add to the global list */ + ifnet_reference(iptap->iptap_ifp); + + iptap_lock_exclusive(); + + if (LIST_EMPTY(&iptap_list)) + iptap_ipf_register(); + LIST_INSERT_HEAD(&iptap_list, iptap, iptap_link); iptap_lock_done(); +done: + if (error != 0) { + if (iptap != NULL) + _FREE(iptap, M_DEVBUF); + } + return (error); } -static void* -iptap_alloc(size_t size) +__private_extern__ int +iptap_clone_destroy(struct ifnet *ifp) { - size_t *mem = OSMalloc(size + sizeof(size_t), iptap_malloc_tag); - - if (mem) { - *mem = size + sizeof(size_t); - mem++; - memset(mem, 0x0, size); - } + int error = 0; + + (void) ifnet_detach(ifp); - return (void*)mem; + return (error); } -static void -iptap_free(void *ptr) +/* + * This function is called whenever a DLT is set on the interface: + * - When interface is attached to a BPF device via BIOCSETIF for the default DLT + * - Whenever a new DLT is selected via BIOCSDLT + * - When the interface is detached from a BPF device (direction is zero) + */ +__private_extern__ errno_t +iptap_tap_callback(ifnet_t ifp, u_int32_t dlt, bpf_tap_mode direction) { - size_t *size = ptr; - size--; - OSFree(size, *size, iptap_malloc_tag); - ptr = NULL; -} + struct iptap_softc *iptap; -static void -iptap_alloc_lock(void) -{ - lck_grp_attr_t *grp_attr; - lck_attr_t *attr; - - grp_attr = lck_grp_attr_alloc_init(); - lck_grp_attr_setdefault(grp_attr); - iptap_grp = lck_grp_alloc_init(IPTAP_IF_NAME, grp_attr); - lck_grp_attr_free(grp_attr); - - attr = lck_attr_alloc_init(); - lck_attr_setdefault(attr); - - lck_rw_init(&iptap_mtx, iptap_grp, attr); - lck_attr_free(attr); + iptap = ifp->if_softc; + if (iptap == NULL) { + printf("%s: if_softc is NULL for ifp %s\n", __func__, + ifp->if_xname); + goto done; + } + switch (dlt) { + case DLT_RAW: + if (direction == 0) { + if (iptap->iptap_dlt_raw_count > 0) { + iptap->iptap_dlt_raw_count--; + OSAddAtomic(-1, &iptap_total_tap_count); + + } + } else { + iptap->iptap_dlt_raw_count++; + OSAddAtomic(1, &iptap_total_tap_count); + } + break; + case DLT_PKTAP: + if (direction == 0) { + if (iptap->iptap_dlt_pkttap_count > 0) { + iptap->iptap_dlt_pkttap_count--; + OSAddAtomic(-1, &iptap_total_tap_count); + } + } else { + iptap->iptap_dlt_pkttap_count++; + OSAddAtomic(1, &iptap_total_tap_count); + } + break; + } +done: + /* + * Attachements count must be positive and we're in trouble + * if we have more that 2**31 attachements + */ + VERIFY(iptap_total_tap_count >= 0); + + return (0); } -static void -iptap_free_lock(void) +__private_extern__ errno_t +iptap_if_output(ifnet_t ifp, mbuf_t m) { - lck_rw_destroy(&iptap_mtx, iptap_grp); - lck_grp_free(iptap_grp); - iptap_grp = NULL; +#pragma unused(ifp) + + mbuf_freem(m); + return (ENOTSUP); } -static inline void -iptap_lock_shared(void) +__private_extern__ errno_t +iptap_demux(ifnet_t ifp, mbuf_t m, char *header, + protocol_family_t *ppf) { - lck_rw_lock_shared(&iptap_mtx); +#pragma unused(ifp) +#pragma unused(m) +#pragma unused(header) +#pragma unused(ppf) + + return (ENOTSUP); } -static inline void -iptap_lock_exclusive(void) +__private_extern__ errno_t +iptap_add_proto(ifnet_t ifp, protocol_family_t pf, + const struct ifnet_demux_desc *dmx, u_int32_t cnt) { - lck_rw_lock_exclusive(&iptap_mtx); +#pragma unused(ifp) +#pragma unused(pf) +#pragma unused(dmx) +#pragma unused(cnt) + + return (0); } -static inline void -iptap_lock_done(void) +__private_extern__ errno_t +iptap_del_proto(ifnet_t ifp, protocol_family_t pf) { - lck_rw_done(&iptap_mtx); +#pragma unused(ifp) +#pragma unused(pf) + + return (0); } -static errno_t -iptap_register_control(void) +__private_extern__ errno_t +iptap_getdrvspec(ifnet_t ifp, struct ifdrv64 *ifd) { - errno_t err = 0; - struct kern_ctl_reg kern_ctl; - - bzero(&kern_ctl, sizeof(kern_ctl)); - strlcpy(kern_ctl.ctl_name, IPTAP_CONTROL_NAME, sizeof(kern_ctl.ctl_name)); - kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0; - kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED; - kern_ctl.ctl_recvsize = IPTAP_BUFFERSZ; - kern_ctl.ctl_connect = iptap_ctl_connect; - kern_ctl.ctl_disconnect = iptap_ctl_disconnect; - kern_ctl.ctl_send = NULL; - kern_ctl.ctl_setopt = NULL; - kern_ctl.ctl_getopt = NULL; - - err = ctl_register(&kern_ctl, &iptap_kernctl); - - return (err); + errno_t error = 0; + struct iptap_softc *iptap; + + iptap = ifp->if_softc; + if (iptap == NULL) { + error = ENOENT; + printf("%s: iptap NULL - error %d\n", __func__, error); + goto done; + } + + switch (ifd->ifd_cmd) { + case PKTP_CMD_TAP_COUNT: { + uint32_t tap_count = iptap->iptap_dlt_raw_count + iptap->iptap_dlt_pkttap_count; + + if (ifd->ifd_len < sizeof(tap_count)) { + printf("%s: PKTP_CMD_TAP_COUNT ifd_len %llu too small - error %d\n", + __func__, ifd->ifd_len, error); + error = EINVAL; + break; + } + error = copyout(&tap_count, ifd->ifd_data, sizeof(tap_count)); + if (error) { + printf("%s: PKTP_CMD_TAP_COUNT copyout - error %d\n", __func__, error); + goto done; + } + break; + } + default: + error = EINVAL; + break; + } + +done: + return (error); } -static errno_t -iptap_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac, void **unitinfo) +__private_extern__ errno_t +iptap_ioctl(ifnet_t ifp, unsigned long cmd, void *data) { -#pragma unused(kctlref) -#pragma unused(unitinfo) - errno_t err = 0; - struct iptap_client_t *client = NULL; + errno_t error = 0; + + if ((cmd & IOC_IN)) { + error = kauth_authorize_generic(kauth_cred_get(), KAUTH_GENERIC_ISSUSER); + if (error) { + goto done; + } + } - client = (struct iptap_client_t *)iptap_alloc(sizeof(struct iptap_client_t)); - if (client != NULL) { - iptap_lock_exclusive(); + switch (cmd) { + case SIOCGDRVSPEC32: { + struct ifdrv64 ifd; + struct ifdrv32 *ifd32 = (struct ifdrv32 *)data; - iptap_clients++; - client->_unit = sac->sc_unit; - LIST_INSERT_HEAD(&_s_iptap_clients, client, _cle); + memcpy(ifd.ifd_name, ifd32->ifd_name, sizeof(ifd.ifd_name)); + ifd.ifd_cmd = ifd32->ifd_cmd; + ifd.ifd_len = ifd32->ifd_len; + ifd.ifd_data = ifd32->ifd_data; - iptap_lock_done(); - } else { - err = ENOMEM; + error = iptap_getdrvspec(ifp, &ifd); + + break; } - - return (err == 0) ? (0) : (err); + case SIOCGDRVSPEC64: { + struct ifdrv64 *ifd64 = (struct ifdrv64 *)data; + + error = iptap_getdrvspec(ifp, ifd64); + + break; + } + default: + error = ENOTSUP; + break; + } +done: + return (error); } -static errno_t -iptap_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo) +__private_extern__ void +iptap_detach(ifnet_t ifp) { -#pragma unused(kctlref) -#pragma unused(unitinfo) - errno_t err = 0; - struct iptap_client_t *client = NULL; + struct iptap_softc *iptap; iptap_lock_exclusive(); - - LIST_FOREACH(client, &_s_iptap_clients, _cle) { - if (client->_unit == unit) { - iptap_clients--; - LIST_REMOVE(client, _cle); - break; - } - } - + + iptap = ifp->if_softc; + ifp->if_softc = NULL; + LIST_REMOVE(iptap, iptap_link); + + if (LIST_EMPTY(&iptap_list)) + iptap_ipf_unregister(); + iptap_lock_done(); + + /* Drop reference as it's no more on the global list */ + ifnet_release(ifp); - /* get rid of all the interfaces before free'ing */ - iptap_free(client); - - if (client == NULL) - panic("iptap_ctl_disconnect: received a disconnect notification without a cache entry.\n"); - - return (err == 0) ? (0) : (err); -} + _FREE(iptap, M_DEVBUF); -#if IP_TAP_NOT_USED + /* This is for the reference taken by ifnet_attach() */ + (void) ifnet_release(ifp); +} -__private_extern__ void -iptap_destroy(void) { - - if (iptap_clients != 0) { - IPTAP_PRINTF("iptap_destroy failed: there are still outstanding clients.\n"); - return; +static int +iptap_ipf_register(void) +{ + struct ipf_filter iptap_ipfinit; + int err = 0; + + IPTAP_LOG("\n"); + + bzero(&iptap_ipfinit, sizeof (iptap_ipfinit)); + iptap_ipfinit.name = IPTAP_IFNAME; + iptap_ipfinit.cookie = &iptap_ipf4; + iptap_ipfinit.ipf_input = iptap_ipf_input; + iptap_ipfinit.ipf_output = iptap_ipf_output; + iptap_ipfinit.ipf_detach = iptap_ipf_detach; + + err = ipf_addv4(&iptap_ipfinit, &iptap_ipf4); + if (err != 0) { + printf("%s: ipf_addv4 for %s0 failed - %d\n", + __func__, IPTAP_IFNAME, err); + goto done; } - - if (iptap_deregister_control() != 0) { - IPTAP_PRINTF("iptap_destroy failed: iptap_deregister_control failed.\n"); + + iptap_ipfinit.cookie = &iptap_ipf6; + err = ipf_addv6(&iptap_ipfinit, &iptap_ipf6); + if (err != 0) { + printf("%s: ipf_addv6 for %s0 failed - %d\n", + __func__, IPTAP_IFNAME, err); + (void) ipf_remove(iptap_ipf4); + iptap_ipf4 = NULL; + goto done; } - - OSMalloc_Tagfree(iptap_malloc_tag); - - iptap_free_lock(); + +done: + return (err); } -static errno_t -iptap_deregister_control(void) +static int +iptap_ipf_unregister(void) { - errno_t err = 0; - - if (iptap_kernctl != NULL) { - err = ctl_deregister(iptap_kernctl); - } else { - err = EINVAL; + int err = 0; + + IPTAP_LOG("\n"); + + if (iptap_ipf4 != NULL) { + err = ipf_remove(iptap_ipf4); + if (err != 0) { + printf("%s: ipf_remove (ipv4) for %s0 failed - %d\n", + __func__, IPTAP_IFNAME, err); + goto done; + } + iptap_ipf4 = NULL; } - - return (err); + + if (iptap_ipf6 != NULL) { + err = ipf_remove(iptap_ipf6); + if (err != 0) { + printf("%s: ipf_remove (ipv6) for %s0 failed - %d\n", + __func__, IPTAP_IFNAME, err); + goto done; + } + iptap_ipf6 = NULL; + } +done: + return (err); } static errno_t -iptap_ctl_send(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, mbuf_t m, int flags) +iptap_ipf_input(void *arg, mbuf_t *mp, int off, u_int8_t proto) { -#pragma unused(kctlref) -#pragma unused(unit) -#pragma unused(unitinfo) -#pragma unused(m) -#pragma unused(flags) - return (KERN_SUCCESS); +#pragma unused(off) +#pragma unused(proto) + + if (arg == (void *)&iptap_ipf4) + iptap_bpf_tap(*mp, AF_INET, 0); + else if (arg == (void *)&iptap_ipf6) + iptap_bpf_tap(*mp, AF_INET6, 0); + else + IPTAP_LOG("%s:%d bad cookie 0x%llx &iptap_ipf4 0x%llx " + "&iptap_ipf6 0x%llx\n", __func__, __LINE__, + (uint64_t)VM_KERNEL_ADDRPERM(arg), + (uint64_t)VM_KERNEL_ADDRPERM(&iptap_ipf4), + (uint64_t)VM_KERNEL_ADDRPERM(&iptap_ipf6)); + + return (0); } static errno_t -iptap_ctl_setopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int opt, void *data, size_t len) -{ -#pragma unused(kctlref) -#pragma unused(unit) -#pragma unused(unitinfo) +iptap_ipf_output(void *arg, mbuf_t *mp, ipf_pktopts_t opt) +{ #pragma unused(opt) -#pragma unused(data) -#pragma unused(len) - return (KERN_SUCCESS); + + if (arg == (void *)&iptap_ipf4) + iptap_bpf_tap(*mp, AF_INET, 1); + else if (arg == (void *)&iptap_ipf6) + iptap_bpf_tap(*mp, AF_INET6, 1); + else + IPTAP_LOG("%s:%d bad cookie 0x%llx &iptap_ipf4 0x%llx " + "&iptap_ipf6 0x%llx\n", __func__, __LINE__, + (uint64_t)VM_KERNEL_ADDRPERM(arg), + (uint64_t)VM_KERNEL_ADDRPERM(&iptap_ipf4), + (uint64_t)VM_KERNEL_ADDRPERM(&iptap_ipf6)); + + return (0); } -static errno_t -iptap_ctl_getopt(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int opt, void *data, size_t *len) +static void +iptap_ipf_detach(void *arg) { -#pragma unused(kctlref) -#pragma unused(unit) -#pragma unused(unitinfo) -#pragma unused(opt) -#pragma unused(data) -#pragma unused(len) - return (KERN_SUCCESS); +#pragma unused(arg) } -#endif /* IP_TAP_NOT_USED */ +__private_extern__ void +iptap_bpf_tap(struct mbuf *m, u_int32_t proto, int outgoing) +{ + struct iptap_softc *iptap; + void (*bpf_tap_func)(ifnet_t , u_int32_t , mbuf_t , void * , size_t ) = + outgoing ? bpf_tap_out : bpf_tap_in; + + iptap_lock_shared(); + LIST_FOREACH(iptap, &iptap_list, iptap_link) { + if (iptap->iptap_dlt_raw_count > 0) { + bpf_tap_func(iptap->iptap_ifp, DLT_RAW, m, + NULL, 0); + } + if (iptap->iptap_dlt_pkttap_count > 0) { + struct { + struct pktap_header hdr; + u_int32_t proto; + } hdr_buffer; + struct pktap_header *hdr = &hdr_buffer.hdr; + size_t hdr_size = sizeof(hdr_buffer); + struct ifnet *ifp = outgoing ? NULL : m->m_pkthdr.rcvif; + + /* Verify the structure is packed */ + _CASSERT(sizeof(hdr_buffer) == sizeof(struct pktap_header) + sizeof(u_int32_t)); + + bzero(hdr, sizeof(hdr_buffer)); + hdr->pth_length = sizeof(struct pktap_header); + hdr->pth_type_next = PTH_TYPE_PACKET; + hdr->pth_dlt = DLT_NULL; + if (ifp != NULL) + snprintf(hdr->pth_ifname, sizeof(hdr->pth_ifname), "%s", + ifp->if_xname); + hdr_buffer.proto = proto; + hdr->pth_flags = outgoing ? PTH_FLAG_DIR_OUT : PTH_FLAG_DIR_IN; + hdr->pth_protocol_family = proto; + hdr->pth_frame_pre_length = 0; + hdr->pth_frame_post_length = 0; + hdr->pth_iftype = ifp != NULL ? ifp->if_type : 0; + hdr->pth_ifunit = ifp != NULL ? ifp->if_unit : 0; + + pktap_fill_proc_info(hdr, proto, m, 0, outgoing, ifp); + + hdr->pth_svc = so_svc2tc(m->m_pkthdr.pkt_svc); + + bpf_tap_func(iptap->iptap_ifp, DLT_PKTAP, m, hdr, hdr_size); + } + } + + iptap_lock_done(); +} diff --git a/bsd/net/iptap.h b/bsd/net/iptap.h index db8b0a22f..0619ca909 100644 --- a/bsd/net/iptap.h +++ b/bsd/net/iptap.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2010 Apple Inc. All rights reserved. + * Copyright (c) 1999-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -28,45 +28,21 @@ #ifndef IPTAP_H #define IPTAP_H +#include #include -#define IPTAP_CONTROL_NAME "com.apple.net.iptap_control" +#ifdef PRIVATE -#define IPTAP_BUFFERSZ (128 * 1024) -#define IPTAP_VERSION_1 0x1 +#include -enum { - IPTAP_OUTPUT_TAG = 0x01, - IPTAP_INPUT_TAG = 0x10, - IPTAP_UNKNOWN_TAG = 0x11 -}; - -#pragma pack(push) -#pragma pack(1) - -typedef struct iptap_hdr_t { - uint32_t hdr_length; - uint8_t version; - uint32_t length; - uint8_t type; - uint16_t unit; - uint8_t io; - uint32_t protocol_family; - uint32_t frame_pre_length; - uint32_t frame_pst_length; - char if_name[IFNAMSIZ]; -} __attribute__ ((__packed__)) iptap_hdr_t; - -#pragma pack(pop) +#define IPTAP_IFNAME "iptap" #ifdef KERNEL_PRIVATE extern void iptap_init(void); -extern void iptap_ipf_input(struct ifnet *, protocol_family_t, struct mbuf *, char *); -extern void iptap_ipf_output(struct ifnet *, protocol_family_t, struct mbuf *, u_int32_t, u_int32_t); -#if 0 -extern void iptap_destroy(void); -#endif #endif /* KERNEL_PRIVATE */ -#endif /* IPTAP_H */ \ No newline at end of file + +#endif /* PRIVATE */ + +#endif /* IPTAP_H */ diff --git a/bsd/net/iso88025.h b/bsd/net/iso88025.h deleted file mode 100644 index 243499354..000000000 --- a/bsd/net/iso88025.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 1998, Larry Lile - * All rights reserved. - * - * For latest sources and information on this driver, please - * go to http://anarchy.stdio.com. - * - * Questions, comments or suggestions should be directed to - * Larry Lile . - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/net/iso88025.h,v 1.3.2.1 2000/10/14 20:01:06 lile Exp $ - * - * Information gathered from tokenring@freebsd, /sys/net/ethernet.h and - * the Mach token ring driver. - */ - -/* - * Fundamental constants relating to iso 802.5 - */ - -#ifndef _NET_ISO88025_H_ -#define _NET_ISO88025_H_ - -/* - * General ISO 802.5 definitions - */ -#define ISO88025_ADDR_LEN 6 -#define ISO88025_HDR_LEN (ISO88025_CF_LEN + (ISO88025_ADDR_LEN * 2)) -#define ISO88025_CF_LEN 2 -#define RCF_LEN 2 -#define RIF_MAX_RD 14 -#define RIF_MAX_LEN 16 - -#define TR_AC 0x10 -#define TR_LLC_FRAME 0x40 - -#define TR_4MBPS 4000000 -#define TR_16MBPS 16000000 -#define TR_100MBPS 100000000 - -/* - * Source routing - */ -#define TR_RII 0x80 -#define TR_RCF_BCST_MASK 0xe000 -#define TR_RCF_LEN_MASK 0x1f00 -#define TR_RCF_DIR 0x0080 -#define TR_RCF_LF_MASK 0x0070 - -#define TR_RCF_RIFLEN(x) ((ntohs(x) & TR_RCF_LEN_MASK) >> 8) - -/* - * Minimum and maximum packet payload lengths. - */ -#define ISO88025_MIN_LEN 0 -#define ISO88025_MAX_LEN 17960 - -/* - * A macro to validate a length with - */ -#define ISO88025_IS_VALID_LEN(foo) \ - ((foo) >= ISO88025_MIN_LEN && (foo) <= ISO88025_MAX_LEN) - -/* - * ISO 802.5 physical header - */ -struct iso88025_header { - u_char ac; /* access control field */ - u_char fc; /* frame control field */ - u_char iso88025_dhost[ISO88025_ADDR_LEN]; /* destination address */ - u_char iso88025_shost[ISO88025_ADDR_LEN]; /* source address */ - u_short rcf; /* route control field */ - u_short rd[RIF_MAX_RD]; /* routing designators */ -}; - -struct iso88025_sockaddr_data { - u_char ether_dhost[ISO88025_ADDR_LEN]; - u_char ether_shost[ISO88025_ADDR_LEN]; - u_char ac; - u_char fc; -}; - -/* - * Structure of a 48-bit iso 802.5 address. - * ( We could also add the 16 bit addresses as a union) - */ -struct iso88025_addr { - u_char octet[ISO88025_ADDR_LEN]; -}; - -#define ISO88025_MAX_MTU 18000 -#define ISO88025_DEFAULT_MTU 1500 -#define senderr(e) { error = (e); goto bad;} - -#endif diff --git a/bsd/net/kext_net.c b/bsd/net/kext_net.c deleted file mode 100644 index c9ba8eb83..000000000 --- a/bsd/net/kext_net.c +++ /dev/null @@ -1,349 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (C) 1999 Apple Computer, Inc. */ - -/* - * Support for Network Kernel Extensions: Socket Filters - * - * Justin C. Walker, 990319 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "kext_net.h" - -/* List of kernel extensions (networking) known to kernel */ -struct nf_list nf_list; - -static int sockfilter_fix_symantec_bug(struct NFDescriptor* theirDesc); - -/* - * Register a global filter for the specified protocol - * Make a few checks and then insert the new descriptor in the - * filter list and, if global, in its protosw's chain. - */ -int -register_sockfilter(struct NFDescriptor *nfp, struct NFDescriptor *nfp1, - struct protosw *pr, int flags) -{ int s; - static int NF_initted = 0; - - if (nfp == NULL) - return(EINVAL); - - /* Fix Symantec's broken NPC kext */ - if (nfp->nf_handle == 0xf1ab02de) { - int err = sockfilter_fix_symantec_bug(nfp); - if (err != 0) - return err; - } - - s = splhigh(); - if (!NF_initted) - { NF_initted = 1; - TAILQ_INIT(&nf_list); - } - - /* - * Install the extension: - * First, put it in the global list of all filters - * Then, if global, install in the protosw's list - */ - TAILQ_INSERT_TAIL(&nf_list, nfp, nf_list); - if (nfp->nf_flags & NFD_GLOBAL) - { if (flags & NFF_BEFORE) - { if (nfp1 == NULL) - { TAILQ_INSERT_HEAD(&pr->pr_sfilter, - nfp, nf_next); - } else - TAILQ_INSERT_BEFORE(nfp1, nfp, nf_next); - } else /* Default: AFTER */ - { if (nfp1 == NULL) - { TAILQ_INSERT_TAIL(&pr->pr_sfilter, - nfp, nf_next); - } else - TAILQ_INSERT_AFTER(&pr->pr_sfilter, nfp1, - nfp, nf_next); - } - } - splx(s); - return(0); -} - -int -unregister_sockfilter(struct NFDescriptor *nfp, struct protosw *pr, __unused int flags) -{ int s; - - s = splhigh(); - TAILQ_REMOVE(&nf_list, nfp, nf_list); - /* Only globals are attached to the protosw entry */ - if (nfp->nf_flags & NFD_GLOBAL) - TAILQ_REMOVE(&pr->pr_sfilter, nfp, nf_next); - splx(s); - return(0); -} - -struct NFDescriptor * -find_nke(unsigned int handle) -{ struct NFDescriptor *nfp; - - nfp = nf_list.tqh_first; - while (nfp) - { if (nfp->nf_handle == handle) - return(nfp); - nfp = nfp->nf_list.tqe_next; - } - return(NULL); -} - -/* - * Insert a previously registered, non-global, NKE into the list of - * active NKEs for this socket. Then invoke its "attach/create" entry. - * Assumed called with protection in place (spl/mutex/whatever) - * XXX: How to which extension is not found, on error. - */ -int -nke_insert(struct socket *so, struct so_nke *np) -{ - struct kextcb *kp, *kp1; - struct NFDescriptor *nf1, *nf2 = NULL; - - if (np->nke_where != NULL) - { if ((nf2 = find_nke(np->nke_where)) == NULL) - { /* ??? */ - return(ENXIO);/* XXX */ - } - } - - if ((nf1 = find_nke(np->nke_handle)) == NULL) - { /* ??? */ - return(ENXIO);/* XXX */ - } - - kp = so->so_ext; - kp1 = NULL; - if (np->nke_flags & NFF_BEFORE) - { if (nf2) - { while (kp) - { if (kp->e_nfd == nf2) - break; - kp1 = kp; - kp = kp->e_next; - } - if (kp == NULL) - return(ENXIO);/* XXX */ - } - } else - { if (nf2) - { while (kp) - { if (kp->e_nfd == nf2) - break; - kp1 = kp; - kp = kp->e_next; - } - if (kp == NULL) - return(ENXIO);/* XXX */ - } - kp1 = kp; - } - /* - * Here with kp1 pointing to the insertion point. - * If null, this is first entry. - * Now, create and insert the descriptor. - */ - - MALLOC(kp, struct kextcb *, sizeof(*kp), M_TEMP, M_WAITOK); - if (kp == NULL) - return(ENOBUFS); /* so_free will clean up */ - bzero(kp, sizeof (*kp)); - if (kp1 == NULL) - { kp->e_next = so->so_ext; - so->so_ext = kp; - } else - { kp->e_next = kp1->e_next; - kp1->e_next = kp; - } - kp->e_fcb = NULL; - kp->e_nfd = nf1; - kp->e_soif = nf1->nf_soif; - kp->e_sout = nf1->nf_soutil; - /* - * Ignore return value for create - * Everyone gets a chance at startup - */ - if (kp->e_soif && kp->e_soif->sf_socreate) - (*kp->e_soif->sf_socreate)(so, so->so_proto, kp); - return(0); -} - -/* - * The following gunk is a fix for Symantec's broken NPC kext - * Symantec's NPC kext does not check that the kextcb->e_fcb - * is not NULL before derefing it. The result is a panic in - * the very few cases where the e_fcb is actually NULL. - * - * This gross chunk of code copies the old function ptrs - * supplied by the kext and wraps a few select ones in - * our own functions that just check for NULL before - * calling in to the kext. - */ - -static struct sockif* g_symantec_if_funcs = NULL; -static struct sockutil* g_symantec_util_funcs = NULL; -static int sym_fix_sbflush(struct sockbuf *, struct kextcb *); -static int sym_fix_sbappend(struct sockbuf *, struct mbuf *, struct kextcb *); -static int sym_fix_soclose(struct socket *, struct kextcb *); -static int sym_fix_sofree(struct socket *, struct kextcb *); -static int sym_fix_soconnect(struct socket *, struct sockaddr *, struct kextcb *); -static int sym_fix_soisconnected(struct socket *, struct kextcb *); -static int sym_fix_sosend(struct socket *, struct sockaddr **, struct uio **, struct mbuf **, - struct mbuf **, int *, struct kextcb *); -static int sym_fix_socantrcvmore(struct socket *, struct kextcb *); -static int sym_fix_socontrol(struct socket *, struct sockopt *, struct kextcb *); - -static int sockfilter_fix_symantec_bug(struct NFDescriptor* theirDesc) -{ - if (!g_symantec_if_funcs ) { - MALLOC(g_symantec_if_funcs, struct sockif*, sizeof(*g_symantec_if_funcs), M_TEMP, M_WAITOK); - - if (!g_symantec_if_funcs) - return ENOMEM; - - *g_symantec_if_funcs = *theirDesc->nf_soif; - } - - if (!g_symantec_util_funcs) { - MALLOC(g_symantec_util_funcs, struct sockutil*, sizeof(*g_symantec_util_funcs), M_TEMP, M_WAITOK); - - if (!g_symantec_util_funcs) - return ENOMEM; - - *g_symantec_util_funcs = *theirDesc->nf_soutil; - } - - if (theirDesc->nf_soutil->su_sbflush) - theirDesc->nf_soutil->su_sbflush = sym_fix_sbflush; - if (theirDesc->nf_soutil->su_sbappend) - theirDesc->nf_soutil->su_sbappend = sym_fix_sbappend; - if (theirDesc->nf_soif->sf_soclose) - theirDesc->nf_soif->sf_soclose = sym_fix_soclose; - if (theirDesc->nf_soif->sf_sofree) - theirDesc->nf_soif->sf_sofree = sym_fix_sofree; - if (theirDesc->nf_soif->sf_soconnect) - theirDesc->nf_soif->sf_soconnect = sym_fix_soconnect; - if (theirDesc->nf_soif->sf_soisconnected) - theirDesc->nf_soif->sf_soisconnected = sym_fix_soisconnected; - if (theirDesc->nf_soif->sf_sosend) - theirDesc->nf_soif->sf_sosend = sym_fix_sosend; - if (theirDesc->nf_soif->sf_socantrcvmore) - theirDesc->nf_soif->sf_socantrcvmore = sym_fix_socantrcvmore; - if (theirDesc->nf_soif->sf_socontrol) - theirDesc->nf_soif->sf_socontrol = sym_fix_socontrol; - - return 0; -} - -static int sym_fix_sbflush(struct sockbuf *p1, struct kextcb *p2) -{ - if (p2->e_fcb != NULL && g_symantec_util_funcs) - return g_symantec_util_funcs->su_sbflush(p1, p2); - else - return 0; -} - -static int sym_fix_sbappend(struct sockbuf *p1, struct mbuf *p2, struct kextcb *p3) -{ - if (p3->e_fcb != NULL && g_symantec_util_funcs) - return g_symantec_util_funcs->su_sbappend(p1, p2, p3); - else - return 0; -} - -static int sym_fix_soclose(struct socket *p1, struct kextcb *p2) -{ - if (p2->e_fcb != NULL && g_symantec_if_funcs) - return g_symantec_if_funcs->sf_soclose(p1, p2); - else - return 0; -} - -static int sym_fix_sofree(struct socket *p1, struct kextcb *p2) -{ - if (p2->e_fcb != NULL && g_symantec_if_funcs) - return g_symantec_if_funcs->sf_sofree(p1, p2); - else - return 0; -} - -static int sym_fix_soconnect(struct socket *p1, struct sockaddr *p2, struct kextcb *p3) -{ - if (p3->e_fcb != NULL && g_symantec_if_funcs) - return g_symantec_if_funcs->sf_soconnect(p1, p2, p3); - else - return 0; -} - -static int sym_fix_soisconnected(struct socket *p1, struct kextcb *p2) -{ - if (p2->e_fcb != NULL && g_symantec_if_funcs) - return g_symantec_if_funcs->sf_soisconnected(p1, p2); - else - return 0; -} - -static int sym_fix_sosend(struct socket *p1, struct sockaddr **p2, struct uio **p3, struct mbuf **p4, - struct mbuf **p5, int *p6, struct kextcb *p7) -{ - if (p7->e_fcb != NULL && g_symantec_if_funcs) - return g_symantec_if_funcs->sf_sosend(p1, p2, p3, p4, p5, p6, p7); - else - return 0; -} - -static int sym_fix_socantrcvmore(struct socket *p1, struct kextcb *p2) -{ - if (p2->e_fcb != NULL && g_symantec_if_funcs) - return g_symantec_if_funcs->sf_socantrcvmore(p1, p2); - else - return 0; -} - -static int sym_fix_socontrol(struct socket *p1, struct sockopt *p2, struct kextcb *p3) -{ - if (p3->e_fcb != NULL && g_symantec_if_funcs) - return g_symantec_if_funcs->sf_socontrol(p1, p2, p3); - else - return 0; -} diff --git a/bsd/net/kext_net.h b/bsd/net/kext_net.h index 48ade0710..32b5ab6a7 100644 --- a/bsd/net/kext_net.h +++ b/bsd/net/kext_net.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 1999-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ @@ -31,7 +31,7 @@ */ #ifndef NET_KEXT_NET_H -#define NET_KEXT_NET_H +#define NET_KEXT_NET_H #include @@ -39,15 +39,16 @@ #include #ifdef BSD_KERNEL_PRIVATE - -#include - /* * Internal implementation bits */ +#include + +struct socket; +struct sockopt; /* Private, internal implementation functions */ -extern void sflt_init(void) __attribute__((section("__TEXT, initcode"))); +extern void sflt_init(void); extern void sflt_initsock(struct socket *so); extern void sflt_termsock(struct socket *so); extern errno_t sflt_attach_internal(struct socket *so, sflt_handle handle); @@ -56,27 +57,29 @@ extern int sflt_ioctl(struct socket *so, u_long cmd, caddr_t data); extern int sflt_bind(struct socket *so, const struct sockaddr *nam); extern int sflt_listen(struct socket *so); extern int sflt_accept(struct socket *head, struct socket *so, - const struct sockaddr *local, - const struct sockaddr *remote); + const struct sockaddr *local, + const struct sockaddr *remote); extern int sflt_getsockname(struct socket *so, struct sockaddr **local); extern int sflt_getpeername(struct socket *so, struct sockaddr **remote); -extern int sflt_connectin(struct socket *head, const struct sockaddr *remote); +extern int sflt_connectin(struct socket *head, + const struct sockaddr *remote); extern int sflt_connectout(struct socket *so, const struct sockaddr *nam); +extern int sflt_connectxout(struct socket *so, + struct sockaddr_list **nam_sl); extern int sflt_setsockopt(struct socket *so, struct sockopt *sopt); extern int sflt_getsockopt(struct socket *so, struct sockopt *sopt); extern int sflt_data_out(struct socket *so, const struct sockaddr *to, - mbuf_t *data, mbuf_t *control, - sflt_data_flag_t flags); + mbuf_t *data, mbuf_t *control, sflt_data_flag_t flags); extern int sflt_data_in(struct socket *so, const struct sockaddr *from, - mbuf_t *data, mbuf_t *control, sflt_data_flag_t flags); + mbuf_t *data, mbuf_t *control, sflt_data_flag_t flags); #endif /* BSD_KERNEL_PRIVATE */ -#define NFF_BEFORE 0x01 -#define NFF_AFTER 0x02 +#define NFF_BEFORE 0x01 +#define NFF_AFTER 0x02 -#define NKE_OK 0 -#define NKE_REMOVE -1 +#define NKE_OK 0 +#define NKE_REMOVE (-1) /* * Interface structure for inserting an installed socket NKE into an @@ -88,14 +91,12 @@ extern int sflt_data_in(struct socket *so, const struct sockaddr *from, */ #pragma pack(4) -struct so_nke -{ unsigned int nke_handle; +struct so_nke { + unsigned int nke_handle; unsigned int nke_where; int nke_flags; /* NFF_BEFORE, NFF_AFTER: net/kext_net.h */ u_int32_t reserved[4]; /* for future use */ }; #pragma pack() - #endif /* NET_KEXT_NET_H */ - diff --git a/bsd/net/kpi_interface.c b/bsd/net/kpi_interface.c index e613eec82..7b64e747b 100644 --- a/bsd/net/kpi_interface.c +++ b/bsd/net/kpi_interface.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2012 Apple Inc. All rights reserved. + * Copyright (c) 2004-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -55,6 +57,11 @@ #include #include #include +#include +#include +#include +#include +#include #ifdef INET #include #endif @@ -64,16 +71,15 @@ #include "net/net_str_id.h" -#if IF_LASTCHANGEUPTIME +#if CONFIG_MACF +#include +#include +#endif + #define TOUCHLASTCHANGE(__if_lastchange) { \ (__if_lastchange)->tv_sec = net_uptime(); \ (__if_lastchange)->tv_usec = 0; \ } -#else -#define TOUCHLASTCHANGE(__if_lastchange) microtime(__if_lastchange) -#endif - -#define _cast_non_const(p) ((void *)(uintptr_t)(p)) static errno_t ifnet_defrouter_llreachinfo(ifnet_t, int, struct ifnet_llreach_info *); @@ -198,9 +204,10 @@ ifnet_allocate_extended(const struct ifnet_init_eparams *einit0, * to point to storage of at least IFNAMSIZ bytes. It is safe * to write to this. */ - strncpy(_cast_non_const(ifp->if_name), einit.name, IFNAMSIZ); + strncpy(__DECONST(char *, ifp->if_name), einit.name, IFNAMSIZ); ifp->if_type = einit.type; ifp->if_family = einit.family; + ifp->if_subfamily = einit.subfamily; ifp->if_unit = einit.unit; ifp->if_output = einit.output; ifp->if_pre_enqueue = einit.pre_enqueue; @@ -209,15 +216,20 @@ ifnet_allocate_extended(const struct ifnet_init_eparams *einit0, ifp->if_output_sched_model = einit.output_sched_model; ifp->if_output_bw.eff_bw = einit.output_bw; ifp->if_output_bw.max_bw = einit.output_bw_max; + ifp->if_output_lt.eff_lt = einit.output_lt; + ifp->if_output_lt.max_lt = einit.output_lt_max; ifp->if_input_poll = einit.input_poll; ifp->if_input_ctl = einit.input_ctl; ifp->if_input_bw.eff_bw = einit.input_bw; ifp->if_input_bw.max_bw = einit.input_bw_max; + ifp->if_input_lt.eff_lt = einit.input_lt; + ifp->if_input_lt.max_lt = einit.input_lt_max; ifp->if_demux = einit.demux; ifp->if_add_proto = einit.add_proto; ifp->if_del_proto = einit.del_proto; ifp->if_check_multi = einit.check_multi; - ifp->if_framer = einit.framer; + ifp->if_framer_legacy = einit.framer; + ifp->if_framer = einit.framer_extended; ifp->if_softc = einit.softc; ifp->if_ioctl = einit.ioctl; ifp->if_set_bpf_tap = einit.set_bpf_tap; @@ -225,6 +237,33 @@ ifnet_allocate_extended(const struct ifnet_init_eparams *einit0, ifp->if_event = einit.event; ifp->if_kpi_storage = einit.detach; + /* Initialize external name (name + unit) */ + snprintf(__DECONST(char *, ifp->if_xname), IFXNAMSIZ, + "%s%d", ifp->if_name, ifp->if_unit); + + /* + * On embedded, framer() is already in the extended form; + * we simply use it as is, unless the caller specifies + * framer_extended() which will then override it. + * + * On non-embedded, framer() has long been exposed as part + * of the public KPI, and therefore its signature must + * remain the same (without the pre- and postpend length + * parameters.) We special case ether_frameout, such that + * it gets mapped to its extended variant. All other cases + * utilize the stub routine which will simply return zeroes + * for those new parameters. + * + * Internally, DLIL will only use the extended callback + * variant which is represented by if_framer. + */ + if (ifp->if_framer == NULL && ifp->if_framer_legacy != NULL) { + if (ifp->if_framer_legacy == ether_frameout) + ifp->if_framer = ether_frameout_extended; + else + ifp->if_framer = ifnet_framer_stub; + } + if (ifp->if_output_bw.eff_bw > ifp->if_output_bw.max_bw) ifp->if_output_bw.max_bw = ifp->if_output_bw.eff_bw; else if (ifp->if_output_bw.eff_bw == 0) @@ -240,14 +279,29 @@ ifnet_allocate_extended(const struct ifnet_init_eparams *einit0, else if (ifp->if_input_bw.max_bw == 0) ifp->if_input_bw = ifp->if_output_bw; - if (ifp->if_ioctl == NULL) - ifp->if_ioctl = ifp_if_ioctl; - /* Pin if_baudrate to 32 bits */ br = MAX(ifp->if_output_bw.max_bw, ifp->if_input_bw.max_bw); if (br != 0) ifp->if_baudrate = (br > 0xFFFFFFFF) ? 0xFFFFFFFF : br; + if (ifp->if_output_lt.eff_lt > ifp->if_output_lt.max_lt) + ifp->if_output_lt.max_lt = ifp->if_output_lt.eff_lt; + else if (ifp->if_output_lt.eff_lt == 0) + ifp->if_output_lt.eff_lt = ifp->if_output_lt.max_lt; + + if (ifp->if_input_lt.eff_lt > ifp->if_input_lt.max_lt) + ifp->if_input_lt.max_lt = ifp->if_input_lt.eff_lt; + else if (ifp->if_input_lt.eff_lt == 0) + ifp->if_input_lt.eff_lt = ifp->if_input_lt.max_lt; + + if (ifp->if_output_lt.max_lt == 0) + ifp->if_output_lt = ifp->if_input_lt; + else if (ifp->if_input_lt.max_lt == 0) + ifp->if_input_lt = ifp->if_output_lt; + + if (ifp->if_ioctl == NULL) + ifp->if_ioctl = ifp_if_ioctl; + if (ifp->if_start != NULL) { ifp->if_eflags |= IFEF_TXSTART; if (ifp->if_pre_enqueue == NULL) @@ -357,6 +411,12 @@ ifnet_family(ifnet_t interface) return ((interface == NULL) ? 0 : interface->if_family); } +ifnet_subfamily_t +ifnet_subfamily(ifnet_t interface) +{ + return ((interface == NULL) ? 0 : interface->if_subfamily); +} + u_int32_t ifnet_unit(ifnet_t interface) { @@ -511,14 +571,10 @@ ifnet_set_idle_flags_locked(ifnet_t ifp, u_int32_t new_flags, u_int32_t mask) ifp->if_want_aggressive_drain = 0; if (ifnet_aggressive_drainers == 0) panic("%s: ifp=%p negative aggdrain!", __func__, ifp); - if (--ifnet_aggressive_drainers == 0) - rt_aggdrain(0); } else if ((after - before) > 0 && ifp->if_want_aggressive_drain == 0) { ifp->if_want_aggressive_drain++; if (++ifnet_aggressive_drainers == 0) panic("%s: ifp=%p wraparound aggdrain!", __func__, ifp); - else if (ifnet_aggressive_drainers == 1) - rt_aggdrain(1); } return (0); @@ -680,12 +736,13 @@ ifnet_capabilities_enabled(ifnet_t ifp) static const ifnet_offload_t offload_mask = (IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP | IFNET_CSUM_FRAGMENT | IFNET_IP_FRAGMENT | IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | - IFNET_IPV6_FRAGMENT | IFNET_CSUM_SUM16 | IFNET_VLAN_TAGGING | - IFNET_VLAN_MTU | IFNET_MULTIPAGES | IFNET_TSO_IPV4 | IFNET_TSO_IPV6); + IFNET_IPV6_FRAGMENT | IFNET_CSUM_PARTIAL | IFNET_VLAN_TAGGING | + IFNET_VLAN_MTU | IFNET_MULTIPAGES | IFNET_TSO_IPV4 | IFNET_TSO_IPV6 | + IFNET_TX_STATUS); static const ifnet_offload_t any_offload_csum = (IFNET_CSUM_IP | IFNET_CSUM_TCP | IFNET_CSUM_UDP | IFNET_CSUM_FRAGMENT | - IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_CSUM_SUM16); + IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6 | IFNET_CSUM_PARTIAL); errno_t ifnet_set_offload(ifnet_t interface, ifnet_offload_t offload) @@ -697,6 +754,21 @@ ifnet_set_offload(ifnet_t interface, ifnet_offload_t offload) ifnet_lock_exclusive(interface); interface->if_hwassist = (offload & offload_mask); + /* + * Hardware capable of partial checksum offload is + * flexible enough to handle any transports utilizing + * Internet Checksumming. Include those transports + * here, and leave the final decision to IP. + */ + if (interface->if_hwassist & IFNET_CSUM_PARTIAL) { + interface->if_hwassist |= (IFNET_CSUM_TCP | IFNET_CSUM_UDP | + IFNET_CSUM_TCPIPV6 | IFNET_CSUM_UDPIPV6); + } + if (dlil_verbose) { + log(LOG_DEBUG, "%s: set offload flags=%b\n", + if_name(interface), + interface->if_hwassist, IFNET_OFFLOADF_BITS); + } ifnet_lock_done(interface); if ((offload & any_offload_csum)) @@ -709,6 +781,8 @@ ifnet_set_offload(ifnet_t interface, ifnet_offload_t offload) ifcaps |= IFCAP_VLAN_MTU; if ((offload & IFNET_VLAN_TAGGING)) ifcaps |= IFCAP_VLAN_HWTAGGING; + if ((offload & IFNET_TX_STATUS)) + ifcaps |= IFNET_TX_STATUS; if (ifcaps != 0) { (void) ifnet_set_capabilities_supported(interface, ifcaps, IFCAP_VALID); @@ -1002,6 +1076,7 @@ ifnet_set_bandwidths(struct ifnet *ifp, struct if_bandwidths *output_bw, if (ifp == NULL) return (EINVAL); + /* set input values first (if any), as output values depend on them */ if (input_bw != NULL) (void) ifnet_set_input_bandwidths(ifp, input_bw); @@ -1019,22 +1094,22 @@ ifnet_set_output_bandwidths(struct ifnet *ifp, struct if_bandwidths *bw, struct ifclassq *ifq; u_int64_t br; + VERIFY(ifp != NULL && bw != NULL); + ifq = &ifp->if_snd; if (!locked) IFCQ_LOCK(ifq); IFCQ_LOCK_ASSERT_HELD(ifq); old_bw = ifp->if_output_bw; - if (bw != NULL) { - if (bw->eff_bw != 0) - ifp->if_output_bw.eff_bw = bw->eff_bw; - if (bw->max_bw != 0) - ifp->if_output_bw.max_bw = bw->max_bw; - if (ifp->if_output_bw.eff_bw > ifp->if_output_bw.max_bw) - ifp->if_output_bw.max_bw = ifp->if_output_bw.eff_bw; - else if (ifp->if_output_bw.eff_bw == 0) - ifp->if_output_bw.eff_bw = ifp->if_output_bw.max_bw; - } + if (bw->eff_bw != 0) + ifp->if_output_bw.eff_bw = bw->eff_bw; + if (bw->max_bw != 0) + ifp->if_output_bw.max_bw = bw->max_bw; + if (ifp->if_output_bw.eff_bw > ifp->if_output_bw.max_bw) + ifp->if_output_bw.max_bw = ifp->if_output_bw.eff_bw; + else if (ifp->if_output_bw.eff_bw == 0) + ifp->if_output_bw.eff_bw = ifp->if_output_bw.max_bw; /* Pin if_baudrate to 32 bits */ br = MAX(ifp->if_output_bw.max_bw, ifp->if_input_bw.max_bw); @@ -1044,7 +1119,7 @@ ifnet_set_output_bandwidths(struct ifnet *ifp, struct if_bandwidths *bw, /* Adjust queue parameters if needed */ if (old_bw.eff_bw != ifp->if_output_bw.eff_bw || old_bw.max_bw != ifp->if_output_bw.max_bw) - ifnet_update_sndq(ifq, CLASSQ_EV_LINK_SPEED); + ifnet_update_sndq(ifq, CLASSQ_EV_LINK_BANDWIDTH); if (!locked) IFCQ_UNLOCK(ifq); @@ -1057,6 +1132,8 @@ ifnet_set_input_bandwidths(struct ifnet *ifp, struct if_bandwidths *bw) { struct if_bandwidths old_bw; + VERIFY(ifp != NULL && bw != NULL); + old_bw = ifp->if_input_bw; if (bw->eff_bw != 0) ifp->if_input_bw.eff_bw = bw->eff_bw; @@ -1069,7 +1146,7 @@ ifnet_set_input_bandwidths(struct ifnet *ifp, struct if_bandwidths *bw) if (old_bw.eff_bw != ifp->if_input_bw.eff_bw || old_bw.max_bw != ifp->if_input_bw.max_bw) - ifnet_update_rcv(ifp, CLASSQ_EV_LINK_SPEED); + ifnet_update_rcv(ifp, CLASSQ_EV_LINK_BANDWIDTH); return (0); } @@ -1113,6 +1190,132 @@ ifnet_bandwidths(struct ifnet *ifp, struct if_bandwidths *output_bw, return (0); } +errno_t +ifnet_set_latencies(struct ifnet *ifp, struct if_latencies *output_lt, + struct if_latencies *input_lt) +{ + if (ifp == NULL) + return (EINVAL); + + if (output_lt != NULL) + (void) ifnet_set_output_latencies(ifp, output_lt, FALSE); + + if (input_lt != NULL) + (void) ifnet_set_input_latencies(ifp, input_lt); + + return (0); +} + +errno_t +ifnet_set_output_latencies(struct ifnet *ifp, struct if_latencies *lt, + boolean_t locked) +{ + struct if_latencies old_lt; + struct ifclassq *ifq; + + VERIFY(ifp != NULL && lt != NULL); + + ifq = &ifp->if_snd; + if (!locked) + IFCQ_LOCK(ifq); + IFCQ_LOCK_ASSERT_HELD(ifq); + + old_lt = ifp->if_output_lt; + if (lt->eff_lt != 0) + ifp->if_output_lt.eff_lt = lt->eff_lt; + if (lt->max_lt != 0) + ifp->if_output_lt.max_lt = lt->max_lt; + if (ifp->if_output_lt.eff_lt > ifp->if_output_lt.max_lt) + ifp->if_output_lt.max_lt = ifp->if_output_lt.eff_lt; + else if (ifp->if_output_lt.eff_lt == 0) + ifp->if_output_lt.eff_lt = ifp->if_output_lt.max_lt; + + /* Adjust queue parameters if needed */ + if (old_lt.eff_lt != ifp->if_output_lt.eff_lt || + old_lt.max_lt != ifp->if_output_lt.max_lt) + ifnet_update_sndq(ifq, CLASSQ_EV_LINK_LATENCY); + + if (!locked) + IFCQ_UNLOCK(ifq); + + return (0); +} + +errno_t +ifnet_set_input_latencies(struct ifnet *ifp, struct if_latencies *lt) +{ + struct if_latencies old_lt; + + VERIFY(ifp != NULL && lt != NULL); + + old_lt = ifp->if_input_lt; + if (lt->eff_lt != 0) + ifp->if_input_lt.eff_lt = lt->eff_lt; + if (lt->max_lt != 0) + ifp->if_input_lt.max_lt = lt->max_lt; + if (ifp->if_input_lt.eff_lt > ifp->if_input_lt.max_lt) + ifp->if_input_lt.max_lt = ifp->if_input_lt.eff_lt; + else if (ifp->if_input_lt.eff_lt == 0) + ifp->if_input_lt.eff_lt = ifp->if_input_lt.max_lt; + + if (old_lt.eff_lt != ifp->if_input_lt.eff_lt || + old_lt.max_lt != ifp->if_input_lt.max_lt) + ifnet_update_rcv(ifp, CLASSQ_EV_LINK_LATENCY); + + return (0); +} + +errno_t +ifnet_latencies(struct ifnet *ifp, struct if_latencies *output_lt, + struct if_latencies *input_lt) +{ + if (ifp == NULL) + return (EINVAL); + + if (output_lt != NULL) + *output_lt = ifp->if_output_lt; + if (input_lt != NULL) + *input_lt = ifp->if_input_lt; + + return (0); +} + +errno_t +ifnet_set_poll_params(struct ifnet *ifp, struct ifnet_poll_params *p) +{ + errno_t err; + + if (ifp == NULL) + return (EINVAL); + else if (!ifnet_is_attached(ifp, 1)) + return (ENXIO); + + err = dlil_rxpoll_set_params(ifp, p, FALSE); + + /* Release the io ref count */ + ifnet_decr_iorefcnt(ifp); + + return (err); +} + +errno_t +ifnet_poll_params(struct ifnet *ifp, struct ifnet_poll_params *p) +{ + errno_t err; + + if (ifp == NULL || p == NULL) + return (EINVAL); + else if (!ifnet_is_attached(ifp, 1)) + return (ENXIO); + + err = dlil_rxpoll_get_params(ifp, p); + + /* Release the io ref count */ + ifnet_decr_iorefcnt(ifp); + + return (err); +} + errno_t ifnet_stat_increment(struct ifnet *ifp, const struct ifnet_stat_increment_param *s) @@ -1250,10 +1453,9 @@ ifnet_lastchange(ifnet_t interface, struct timeval *last_change) return (EINVAL); *last_change = interface->if_data.ifi_lastchange; -#if IF_LASTCHANGEUPTIME /* Crude conversion from uptime to calendar time */ last_change->tv_sec += boottime_sec(); -#endif + return (0); } @@ -1274,18 +1476,32 @@ ifnet_get_address_list_family(ifnet_t interface, ifaddr_t **addresses, sa_family_t family) { return (ifnet_get_address_list_family_internal(interface, addresses, - family, 0, M_NOWAIT)); + family, 0, M_NOWAIT, 0)); } +errno_t +ifnet_get_inuse_address_list(ifnet_t interface, ifaddr_t **addresses) +{ + return (addresses == NULL ? EINVAL : + ifnet_get_address_list_family_internal(interface, addresses, + 0, 0, M_NOWAIT, 1)); +} + +extern uint32_t tcp_find_anypcb_byaddr(struct ifaddr *ifa); + +extern uint32_t udp_find_anypcb_byaddr(struct ifaddr *ifa); + __private_extern__ errno_t ifnet_get_address_list_family_internal(ifnet_t interface, ifaddr_t **addresses, - sa_family_t family, int detached, int how) + sa_family_t family, int detached, int how, int return_inuse_addrs) { SLIST_HEAD(, ifnet_addr_list) ifal_head; struct ifnet_addr_list *ifal, *ifal_tmp; struct ifnet *ifp; int count = 0; errno_t err = 0; + int usecount = 0; + int index = 0; SLIST_INIT(&ifal_head); @@ -1362,13 +1578,32 @@ one: done: SLIST_FOREACH_SAFE(ifal, &ifal_head, ifal_le, ifal_tmp) { SLIST_REMOVE(&ifal_head, ifal, ifnet_addr_list, ifal_le); - if (err == 0) - (*addresses)[--count] = ifal->ifal_ifa; - else + if (err == 0) { + if (return_inuse_addrs) { + usecount = tcp_find_anypcb_byaddr(ifal->ifal_ifa); + usecount += udp_find_anypcb_byaddr(ifal->ifal_ifa); + if (usecount) { + (*addresses)[index] = ifal->ifal_ifa; + index++; + } + else + IFA_REMREF(ifal->ifal_ifa); + } else { + (*addresses)[--count] = ifal->ifal_ifa; + } + } + else { IFA_REMREF(ifal->ifal_ifa); + } FREE(ifal, M_TEMP); } + VERIFY(err == 0 || *addresses == NULL); + if ((err == 0) && (count) && ((*addresses)[0] == NULL)) { + VERIFY(return_inuse_addrs == 1); + FREE(*addresses, M_TEMP); + err = ENXIO; + } return (err); } @@ -1397,7 +1632,8 @@ ifnet_lladdr(ifnet_t interface) /* * if_lladdr points to the permanent link address of - * the interface; it never gets deallocated. + * the interface and it never gets deallocated; internal + * code should simply use IF_LLADDR() for performance. */ ifa = interface->if_lladdr; IFA_LOCK_SPIN(ifa); @@ -1434,31 +1670,76 @@ ifnet_llbroadcast_copy_bytes(ifnet_t interface, void *addr, size_t buffer_len, return (0); } -errno_t -ifnet_lladdr_copy_bytes(ifnet_t interface, void *lladdr, size_t lladdr_len) +static errno_t +ifnet_lladdr_copy_bytes_internal(ifnet_t interface, void *lladdr, + size_t lladdr_len, kauth_cred_t *credp) { - struct sockaddr_dl *sdl; + const u_int8_t *bytes; + size_t bytes_len; struct ifaddr *ifa; + uint8_t sdlbuf[SOCK_MAXADDRLEN + 1]; + errno_t error = 0; + + /* + * Make sure to accomodate the largest possible + * size of SA(if_lladdr)->sa_len. + */ + _CASSERT(sizeof (sdlbuf) == (SOCK_MAXADDRLEN + 1)); if (interface == NULL || lladdr == NULL) return (EINVAL); - /* - * if_lladdr points to the permanent link address of - * the interface; it never gets deallocated. - */ ifa = interface->if_lladdr; IFA_LOCK_SPIN(ifa); - sdl = SDL((void *)ifa->ifa_addr); - if (lladdr_len != sdl->sdl_alen) { + bcopy(ifa->ifa_addr, &sdlbuf, SDL(ifa->ifa_addr)->sdl_len); + IFA_UNLOCK(ifa); + + bytes = dlil_ifaddr_bytes(SDL(&sdlbuf), &bytes_len, credp); + if (bytes_len != lladdr_len) { bzero(lladdr, lladdr_len); - IFA_UNLOCK(ifa); - return (EMSGSIZE); + error = EMSGSIZE; + } else { + bcopy(bytes, lladdr, bytes_len); } - bcopy(LLADDR(sdl), lladdr, lladdr_len); - IFA_UNLOCK(ifa); - return (0); + return (error); +} + +errno_t +ifnet_lladdr_copy_bytes(ifnet_t interface, void *lladdr, size_t length) +{ + return (ifnet_lladdr_copy_bytes_internal(interface, lladdr, length, + NULL)); +} + +errno_t +ifnet_guarded_lladdr_copy_bytes(ifnet_t interface, void *lladdr, size_t length) +{ +#if CONFIG_MACF + kauth_cred_t cred; + net_thread_marks_t marks; +#endif + kauth_cred_t *credp; + errno_t error; + + credp = NULL; +#if CONFIG_MACF + marks = net_thread_marks_push(NET_THREAD_CKREQ_LLADDR); + cred = kauth_cred_proc_ref(current_proc()); + credp = &cred; +#else + credp = NULL; +#endif + + error = ifnet_lladdr_copy_bytes_internal(interface, lladdr, length, + credp); + +#if CONFIG_MACF + kauth_cred_unref(credp); + net_thread_marks_pop(marks); +#endif + + return (error); } static errno_t @@ -1742,6 +2023,7 @@ ifnet_list_free(ifnet_t *interfaces) void ifnet_transmit_burst_start(ifnet_t ifp, mbuf_t pkt) { +#if MEASURE_BW uint32_t orig_flags; if (ifp == NULL || !(pkt->m_flags & M_PKTHDR)) @@ -1754,13 +2036,17 @@ ifnet_transmit_burst_start(ifnet_t ifp, mbuf_t pkt) return; } - ifp->if_bw.start_seq = pkt->m_pkthdr.pf_mtag.pftag_pktseq; + ifp->if_bw.start_seq = pkt->m_pkthdr.pkt_bwseq; ifp->if_bw.start_ts = mach_absolute_time(); +#else /*!MEASURE_BW */ +#pragma unused(ifp, pkt) +#endif /* !MEASURE_BW */ } void ifnet_transmit_burst_end(ifnet_t ifp, mbuf_t pkt) { +#if MEASURE_BW uint64_t oseq, ots, bytes, ts, t; uint32_t flags; @@ -1782,7 +2068,7 @@ ifnet_transmit_burst_end(ifnet_t ifp, mbuf_t pkt) goto done; } - oseq = pkt->m_pkthdr.pf_mtag.pftag_pktseq; + oseq = pkt->m_pkthdr.pkt_bwseq; ots = mach_absolute_time(); if (ifp->if_bw.start_seq > 0 && oseq > ifp->if_bw.start_seq) { @@ -1820,6 +2106,9 @@ ifnet_transmit_burst_end(ifnet_t ifp, mbuf_t pkt) done: flags = ~(IF_MEASURED_BW_INPROGRESS | IF_MEASURED_BW_CALCULATION); OSBitAndAtomic(flags, &ifp->if_bw.flags); +#else /* !MEASURE_BW */ +#pragma unused(ifp, pkt) +#endif /* !MEASURE_BW */ } /****************************************************************************/ @@ -2152,23 +2441,41 @@ fail: /* misc */ /******************************************************************************/ -extern void udp_get_ports_used(unsigned int ifindex, uint8_t *bitfield); -extern void tcp_get_ports_used(unsigned int ifindex, uint8_t *bitfield); - errno_t -ifnet_get_local_ports(ifnet_t ifp, uint8_t *bitfield) +ifnet_get_local_ports_extended(ifnet_t ifp, protocol_family_t protocol, + u_int32_t wildcardok, u_int8_t *bitfield) { + u_int32_t ifindex; + if (bitfield == NULL) return (EINVAL); - bzero(bitfield, 8192); + switch (protocol) { + case PF_UNSPEC: + case PF_INET: + case PF_INET6: + break; + default: + return (EINVAL); + } + + /* bit string is long enough to hold 16-bit port values */ + bzero(bitfield, bitstr_size(65536)); - udp_get_ports_used(ifp ? ifp->if_index : 0, bitfield); - tcp_get_ports_used(ifp ? ifp->if_index : 0, bitfield); + ifindex = (ifp != NULL) ? ifp->if_index : 0; + + udp_get_ports_used(ifindex, protocol, wildcardok, bitfield); + tcp_get_ports_used(ifindex, protocol, wildcardok, bitfield); return (0); } +errno_t +ifnet_get_local_ports(ifnet_t ifp, u_int8_t *bitfield) +{ + return (ifnet_get_local_ports_extended(ifp, PF_UNSPEC, 1, bitfield)); +} + errno_t ifnet_notice_node_presence(ifnet_t ifp, struct sockaddr* sa, int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48]) @@ -2179,7 +2486,7 @@ ifnet_notice_node_presence(ifnet_t ifp, struct sockaddr* sa, int32_t rssi, return(EINVAL); if (sa->sa_family != AF_LINK && sa->sa_family != AF_INET6) return(EINVAL); - + dlil_node_present(ifp, sa, rssi, lqm, npm, srvinfo); return (0); } @@ -2193,7 +2500,7 @@ ifnet_notice_node_absence(ifnet_t ifp, struct sockaddr* sa) return(EINVAL); if (sa->sa_family != AF_LINK && sa->sa_family != AF_INET6) return(EINVAL); - + dlil_node_absent(ifp, sa); return (0); } @@ -2203,7 +2510,95 @@ ifnet_notice_master_elected(ifnet_t ifp) { if (ifp == NULL) return(EINVAL); - + dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_MASTER_ELECTED, NULL, 0); return (0); } + +errno_t +ifnet_tx_compl_status(ifnet_t ifp, mbuf_t m, tx_compl_val_t val) +{ +#pragma unused(ifp, m, val) + /* Dummy function to be implemented XXX */ + return (0); +} + +errno_t +ifnet_report_issues(ifnet_t ifp, u_int8_t modid[IFNET_MODIDLEN], + u_int8_t info[IFNET_MODARGLEN]) +{ + if (ifp == NULL || modid == NULL) + return (EINVAL); + + dlil_report_issues(ifp, modid, info); + return (0); +} + +extern errno_t +ifnet_set_delegate(ifnet_t ifp, ifnet_t delegated_ifp) +{ + ifnet_t odifp = NULL; + + if (ifp == NULL) + return (EINVAL); + else if (!ifnet_is_attached(ifp, 1)) + return (ENXIO); + + ifnet_lock_exclusive(ifp); + odifp = ifp->if_delegated.ifp; + if (odifp != NULL && odifp == delegated_ifp) { + /* delegate info is unchanged; nothing more to do */ + ifnet_lock_done(ifp); + goto done; + } + bzero(&ifp->if_delegated, sizeof (ifp->if_delegated)); + if (delegated_ifp != NULL && ifp != delegated_ifp) { + ifp->if_delegated.ifp = delegated_ifp; + ifnet_reference(delegated_ifp); + ifp->if_delegated.type = delegated_ifp->if_type; + ifp->if_delegated.family = delegated_ifp->if_family; + ifp->if_delegated.subfamily = delegated_ifp->if_subfamily; + printf("%s: is now delegating %s (type 0x%x, family %u, " + "sub-family %u)\n", ifp->if_xname, delegated_ifp->if_xname, + delegated_ifp->if_type, delegated_ifp->if_family, + delegated_ifp->if_subfamily); + } + ifnet_lock_done(ifp); + + if (odifp != NULL) { + if (odifp != delegated_ifp) { + printf("%s: is no longer delegating %s\n", + ifp->if_xname, odifp->if_xname); + } + ifnet_release(odifp); + } + + /* Generate a kernel event */ + dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IFDELEGATE_CHANGED, NULL, 0); + +done: + /* Release the io ref count */ + ifnet_decr_iorefcnt(ifp); + + return (0); +} + +extern errno_t +ifnet_get_delegate(ifnet_t ifp, ifnet_t *pdelegated_ifp) +{ + if (ifp == NULL || pdelegated_ifp == NULL) + return (EINVAL); + else if (!ifnet_is_attached(ifp, 1)) + return (ENXIO); + + ifnet_lock_shared(ifp); + if (ifp->if_delegated.ifp != NULL) + ifnet_reference(ifp->if_delegated.ifp); + *pdelegated_ifp = ifp->if_delegated.ifp; + ifnet_lock_done(ifp); + + /* Release the io ref count */ + ifnet_decr_iorefcnt(ifp); + + return (0); +} diff --git a/bsd/net/kpi_interface.h b/bsd/net/kpi_interface.h index d4df862da..10551a82c 100644 --- a/bsd/net/kpi_interface.h +++ b/bsd/net/kpi_interface.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2004-2012 Apple Inc. All rights reserved. + * Copyright (c) 2004-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /*! @@ -33,7 +33,7 @@ */ #ifndef __KPI_INTERFACE__ -#define __KPI_INTERFACE__ +#define __KPI_INTERFACE__ #ifndef XNU_KERNEL_PRIVATE #include @@ -45,22 +45,15 @@ #include #endif /* KERNEL_PRIVATE */ -#ifndef _SA_FAMILY_T -#define _SA_FAMILY_T -typedef __uint8_t sa_family_t; -#endif +#include #ifdef XNU_KERNEL_PRIVATE -#if CONFIG_EMBEDDED - #define KPI_INTERFACE_EMBEDDED 1 -#else - #define KPI_INTERFACE_EMBEDDED 0 -#endif +#define KPI_INTERFACE_EMBEDDED 0 #else #if TARGET_OS_EMBEDDED - #define KPI_INTERFACE_EMBEDDED 1 +#define KPI_INTERFACE_EMBEDDED 1 #else - #define KPI_INTERFACE_EMBEDDED 0 +#define KPI_INTERFACE_EMBEDDED 0 #endif #endif @@ -85,13 +78,12 @@ struct ifnet_demux_desc; @constant IFNET_FAMILY_DISC A DISC interface. @constant IFNET_FAMILY_MDECAP A MDECAP interface. @constant IFNET_FAMILY_GIF A generic tunnel interface. - @constant IFNET_FAMILY_FAITH A FAITH (IPv4/IPv6 translation) interface. + @constant IFNET_FAMILY_FAITH A FAITH [IPv4/IPv6 translation] interface. @constant IFNET_FAMILY_STF A 6to4 interface. - @constant IFNET_FAMILY_FIREWIRE An IEEE 1394 (firewire) interface. + @constant IFNET_FAMILY_FIREWIRE An IEEE 1394 [Firewire] interface. @constant IFNET_FAMILY_BOND A virtual bonded interface. @constant IFNET_FAMILY_CELLULAR A cellular interface. -*/ - + */ enum { IFNET_FAMILY_ANY = 0, IFNET_FAMILY_LOOPBACK = 1, @@ -110,14 +102,39 @@ enum { IFNET_FAMILY_BOND = 14, IFNET_FAMILY_CELLULAR = 15 }; + /*! @typedef ifnet_family_t @abstract Storage type for the interface family. -*/ + */ typedef u_int32_t ifnet_family_t; +#ifdef KERNEL_PRIVATE +/* + @enum Interface Sub-families + @abstract Constants defining interface sub-families (may also + be viewed as the underlying transport). Some families + (e.g. IFNET_FAMILY_ETHERNET) are often too generic. + These sub-families allow us to further refine the + interface family, e.g. Ethernet over Wi-Fi/USB, etc. + */ +enum { + IFNET_SUBFAMILY_ANY = 0, + IFNET_SUBFAMILY_USB = 1, + IFNET_SUBFAMILY_BLUETOOTH = 2, + IFNET_SUBFAMILY_WIFI = 3, + IFNET_SUBFAMILY_THUNDERBOLT = 4, +}; + +/* + @typedef ifnet_sub_family_t + @abstract Storage type for the interface sub-family. + */ +typedef u_int32_t ifnet_subfamily_t; +#endif /* KERNEL_PRIVATE */ + #ifndef BPF_TAP_MODE_T -#define BPF_TAP_MODE_T +#define BPF_TAP_MODE_T /*! @enum BPF tap mode @abstract Constants defining interface families. @@ -183,6 +200,10 @@ typedef u_int32_t protocol_family_t; supported by the interface can be set with "ifnet_set_tso_mtu". To retreive the real MTU for the TCP IPv6 connection the function "mbuf_get_tso_requested" is used by the driver. Note that if TSO is active, all the packets will be flagged for TSO, not just large packets. + @constant IFNET_TX_STATUS Driver supports returning a per packet + transmission status (pass, fail or other errors) of whether + the packet was successfully transmitted on the link, or the + transmission was aborted, or transmission failed. */ @@ -196,13 +217,15 @@ enum { IFNET_CSUM_UDPIPV6 = 0x00000040, IFNET_IPV6_FRAGMENT = 0x00000080, #ifdef KERNEL_PRIVATE - IFNET_CSUM_SUM16 = 0x00001000, + IFNET_CSUM_PARTIAL = 0x00001000, + IFNET_CSUM_SUM16 = IFNET_CSUM_PARTIAL, #endif /* KERNEL_PRIVATE */ IFNET_VLAN_TAGGING = 0x00010000, IFNET_VLAN_MTU = 0x00020000, IFNET_MULTIPAGES = 0x00100000, IFNET_TSO_IPV4 = 0x00200000, IFNET_TSO_IPV6 = 0x00400000, + IFNET_TX_STATUS = 0x00800000 }; /*! @typedef ifnet_offload_t @@ -210,6 +233,14 @@ enum { */ typedef u_int32_t ifnet_offload_t; +#ifdef KERNEL_PRIVATE +#define IFNET_OFFLOADF_BITS \ + "\020\1CSUM_IP\2CSUM_TCP\3CSUM_UDP\4CSUM_IP_FRAGS\5IP_FRAGMENT" \ + "\6CSUM_TCPIPV6\7CSUM_UDPIPV6\10IPV6_FRAGMENT\15CSUM_PARTIAL" \ + "\20VLAN_TAGGING\21VLAN_MTU\25MULTIPAGES\26TSO_IPV4\27TSO_IPV6" \ + "\30TXSTATUS" +#endif /* KERNEL_PRIVATE */ + /* * Callbacks * @@ -333,7 +364,7 @@ typedef void (*ifnet_event_func)(ifnet_t interface, const struct kev_msg *msg); protocol's pre-output function. @param frame_type The frame type as determined by the protocol's pre-output function. - @param prepend_len The length of prepended bytes to the mbuf. + @param prepend_len The length of prepended bytes to the mbuf. (ONLY used if KPI_INTERFACE_EMBEDDED is defined to 1) @param postpend_len The length of the postpended bytes to the mbuf. (ONLY used if KPI_INTERFACE_EMBEDDED is defined to 1) @@ -345,11 +376,18 @@ typedef void (*ifnet_event_func)(ifnet_t interface, const struct kev_msg *msg); the packet will be freed. */ typedef errno_t (*ifnet_framer_func)(ifnet_t interface, mbuf_t *packet, - const struct sockaddr *dest, const char *desk_linkaddr, const char *frame_type + const struct sockaddr *dest, const char *dest_linkaddr, + const char *frame_type #if KPI_INTERFACE_EMBEDDED , u_int32_t *prepend_len, u_int32_t *postpend_len #endif /* KPI_INTERFACE_EMBEDDED */ ); +#ifdef KERNEL_PRIVATE +typedef errno_t (*ifnet_framer_extended_func)(ifnet_t interface, mbuf_t *packet, + const struct sockaddr *dest, const char *dest_linkaddr, + const char *frame_type, u_int32_t *prepend_len, + u_int32_t *postpend_len); +#endif /* KERNEL_PRIVATE */ /*! @typedef ifnet_add_proto_func @@ -708,10 +746,15 @@ typedef void (*ifnet_input_poll_func)(ifnet_t interface, u_int32_t flags, @abstract Constants defining control commands. @constant IFNET_CTL_SET_INPUT_MODEL Set input model. @constant IFNET_CTL_GET_INPUT_MODEL Get input model. + @constant IFNET_CTL_SET_LOG Set logging level. + @constant IFNET_CTL_GET_LOG Get logging level. */ enum { - IFNET_CTL_SET_INPUT_MODEL = 1, - IFNET_CTL_GET_INPUT_MODEL = 2, + IFNET_CTL_SET_INPUT_MODEL = 1, /* input ctl */ + IFNET_CTL_GET_INPUT_MODEL = 2, /* input ctl */ + IFNET_CTL_SET_LOG = 3, /* output ctl */ + IFNET_CTL_GET_LOG = 4, /* output ctl */ + IFNET_CTL_NOTIFY_ADDRESS = 5 /* output ctl */ }; /* @@ -762,6 +805,131 @@ struct ifnet_model_params { u_int32_t reserved[3]; }; +/* + @enum Interface logging sub-commands. + @abstract Constants defining logging levels/priorities. A level + includes all other levels below it. It is expected that + verbosity increases along with the level. + @constant IFNET_LOG_DEFAULT Revert to default logging level. + @constant IFNET_LOG_ALERT Log actions that must be taken immediately. + @constant IFNET_LOG_CRITICAL Log critical conditions. + @constant IFNET_LOG_ERROR Log error conditions. + @constant IFNET_LOG_WARNING Log warning conditions. + @constant IFNET_LOG_NOTICE Log normal but significant conditions. + @constant IFNET_LOG_INFORMATIONAL Log informational messages. + @constant IFNET_LOG_DEBUG Log debug-level messages. + */ +enum { + IFNET_LOG_DEFAULT = 0, + IFNET_LOG_ALERT = 1, + IFNET_LOG_CRITICAL = 2, + IFNET_LOG_ERROR = 3, + IFNET_LOG_WARNING = 4, + IFNET_LOG_NOTICE = 5, + IFNET_LOG_INFORMATIONAL = 6, + IFNET_LOG_DEBUG = 7 +}; + +#ifdef BSD_KERNEL_PRIVATE +#define IFNET_LOG_MIN IFNET_LOG_DEFAULT +#define IFNET_LOG_MAX IFNET_LOG_DEBUG +#endif /* BSD_KERNEL_PRIVATE */ + +/* + @typedef ifnet_log_level_t + @abstract Storage type for log level/priority. + */ +typedef int32_t ifnet_log_level_t; + +/* + @enum Interface logging facilities + @abstract Constants defining the logging facilities which + are to be configured with the specified logging level. + @constant IFNET_LOGF_DLIL The DLIL layer. + @constant IFNET_LOGF_FAMILY The networking family layer. + @constant IFNET_LOGF_DRIVER The device driver layer. + @constant IFNET_LOGF_FIRMWARE The firmware layer. + */ +enum { + IFNET_LOGF_DLIL = 0x00000001, + IFNET_LOGF_FAMILY = 0x00010000, + IFNET_LOGF_DRIVER = 0x01000000, + IFNET_LOGF_FIRMWARE = 0x10000000 +}; + +#ifdef BSD_KERNEL_PRIVATE +#define IFNET_LOGF_MASK \ + (IFNET_LOGF_DLIL | IFNET_LOGF_FAMILY | IFNET_LOGF_DRIVER | \ + IFNET_LOGF_FIRMWARE) + +#define IFNET_LOGF_BITS \ + "\020\1DLIL\21FAMILY\31DRIVER\35FIRMWARE" + +#endif /* BSD_KERNEL_PRIVATE */ + +/* + @typedef ifnet_log_flags_t + @abstract Storage type for log flags/facilities. + */ +typedef u_int32_t ifnet_log_flags_t; + +/* + @enum Interface logging category + @abstract Constants defininig categories for issues experienced. + @constant IFNET_LOGCAT_CONNECTIVITY Connectivity related issues. + @constant IFNET_LOGCAT_QUALITY Quality/fidelity related issues. + @constant IFNET_LOGCAT_PERFORMANCE Performance related issues. + */ +enum { + IFNET_LOGCAT_CONNECTIVITY = 1, + IFNET_LOGCAT_QUALITY = 2, + IFNET_LOGCAT_PERFORMANCE = 3 +}; + +/* + @typedef ifnet_log_category_t + @abstract Storage type for log category. + */ +typedef int32_t ifnet_log_category_t; + +/* + @typedef ifnet_log_subcategory_t + @abstract Storage type for log subcategory. This is largely opaque + and it can be used for IOReturn values, etc. + */ +typedef int32_t ifnet_log_subcategory_t; + +/* + @struct ifnet_log_params + @discussion This structure is used as parameter to the ifnet + logging sub-commands. + @field level The logging level/priority. + @field flags The logging flags/facilities. + @field category The category of issue. + @field subcategory The subcategory of issue. + */ +struct ifnet_log_params { + ifnet_log_level_t level; + ifnet_log_flags_t flags; + ifnet_log_category_t category; + ifnet_log_subcategory_t subcategory; +}; + +/* + @struct ifnet_notify_address_params + @discussion This structure is used as parameter to the ifnet + address notification sub-command. This is used to indicate + to the family/driver that one or more addresses of the given + address family has been added to, or removed from the list + of addresses on the interface. The driver may query for the + current address list by calling ifnet_get_address_list_family(). + @field address_family The address family of the interface address(es). + */ +struct ifnet_notify_address_params { + sa_family_t address_family; + u_int32_t reserved[3]; +}; + /* @typedef ifnet_ctl_func @discussion ifnet_ctl_func is called by the network stack to inform @@ -809,6 +977,9 @@ typedef errno_t (*ifnet_ctl_func)(ifnet_t interface, ifnet_ctl_cmd_t cmd, @field output_bw The effective output bandwidth (in bits per second.) @field output_bw_max The maximum theoretical output bandwidth (in bits per second.) + @field output_lt The effective output latency (in nanosecond.) + @field output_lt_max The maximum theoretical output latency + (in nanosecond.) @field input_poll The poll function for the interface, valid only if IFNET_INIT_LEGACY is not set and only if IFNET_INIT_INPUT_POLL is set. @@ -821,6 +992,9 @@ typedef errno_t (*ifnet_ctl_func)(ifnet_t interface, ifnet_ctl_cmd_t cmd, @field input_bw The effective input bandwidth (in bits per second.) @field input_bw_max The maximum theoretical input bandwidth (in bits per second.) + @field input_lt The effective input latency (in nanosecond.) + @field input_lt_max The maximum theoretical input latency + (in nanosecond.) @field demux The function used to determine the protocol family of an incoming packet. @field add_proto The function used to attach a protocol to this @@ -828,6 +1002,9 @@ typedef errno_t (*ifnet_ctl_func)(ifnet_t interface, ifnet_ctl_cmd_t cmd, @field del_proto The function used to remove a protocol from this interface. @field framer The function used to frame outbound packets, may be NULL. + @field framer_extended The function used to frame outbound packets, + in the newer form; may be NULL. If specified, it will override + the value set via framer. @field softc Driver specific storage. This value can be retrieved from the ifnet using the ifnet_softc function. @field ioctl The function used to handle ioctls. @@ -863,14 +1040,18 @@ struct ifnet_init_eparams { u_int32_t reserved; /* for future use */ u_int64_t output_bw; /* optional */ u_int64_t output_bw_max; /* optional */ - u_int64_t _reserved[4]; /* for future use */ + u_int64_t output_lt; /* optional */ + u_int64_t output_lt_max; /* optional */ + u_int64_t _reserved[2]; /* for future use */ ifnet_input_poll_func input_poll; /* optional, ignored for legacy model */ ifnet_ctl_func input_ctl; /* required for opportunistic polling */ u_int32_t rcvq_maxlen; /* optional, only for opportunistic polling */ u_int32_t __reserved; /* for future use */ u_int64_t input_bw; /* optional */ u_int64_t input_bw_max; /* optional */ - u_int64_t ___reserved[4]; /* for future use */ + u_int64_t input_lt; /* optional */ + u_int64_t input_lt_max; /* optional */ + u_int64_t ___reserved[2]; /* for future use */ ifnet_demux_func demux; /* required */ ifnet_add_proto_func add_proto; /* required */ ifnet_del_proto_func del_proto; /* required */ @@ -883,7 +1064,14 @@ struct ifnet_init_eparams { ifnet_event_func event; /* optional */ const void *broadcast_addr; /* required for non point-to-point interfaces */ u_int32_t broadcast_len; /* required for non point-to-point interfaces */ - u_int64_t ____reserved[4]; /* for future use */ + ifnet_framer_extended_func framer_extended; /* optional */ + ifnet_subfamily_t subfamily; /* optional */ +#if !defined(__LP64__) + u_int64_t _____reserved[3]; /* for future use */ +#else + u_int32_t ____reserved; /* pad */ + u_int64_t _____reserved[2]; /* for future use */ +#endif /* __LP64__ */ }; #endif /* KERNEL_PRIVATE */ @@ -953,7 +1141,7 @@ struct ifnet_demux_desc { @field detached The function to be called for handling the detach. */ #ifdef KERNEL_PRIVATE -#define demux_list demux_array +#define demux_list demux_array #endif /* KERNEL_PRIVATE */ struct ifnet_attach_proto_param { @@ -1075,7 +1263,7 @@ extern errno_t ifnet_dequeue(ifnet_t interface, mbuf_t *packet); new driver output model, and that the output scheduling model is set to IFNET_SCHED_MODEL_DRIVER_MANAGED. @param interface The interface to dequeue the packet from. - @param tc The service class. + @param sc The service class. @param packet Pointer to the packet being dequeued. @result May return EINVAL if the parameters are invalid, ENXIO if the interface doesn't implement the new driver output model @@ -1084,7 +1272,7 @@ extern errno_t ifnet_dequeue(ifnet_t interface, mbuf_t *packet); is currently no packet available to be dequeued. */ extern errno_t ifnet_dequeue_service_class(ifnet_t interface, - mbuf_svc_class_t tc, mbuf_t *packet); + mbuf_svc_class_t sc, mbuf_t *packet); /* @function ifnet_dequeue_multi @@ -1093,6 +1281,9 @@ extern errno_t ifnet_dequeue_service_class(ifnet_t interface, the output scheduling model is set to IFNET_SCHED_MODEL_NORMAL. The returned packet chain is traversable with mbuf_nextpkt(). @param interface The interface to dequeue the packets from. + @param max The maximum number of packets in the packet chain that + may be returned to the caller; this needs to be a non-zero + value for any packet to be returned. @param first_packet Pointer to the first packet being dequeued. @param last_packet Pointer to the last packet being dequeued. Caller may supply NULL if not interested in value. @@ -1118,7 +1309,10 @@ extern errno_t ifnet_dequeue_multi(ifnet_t interface, u_int32_t max, scheduling model is set to IFNET_SCHED_MODEL_DRIVER_MANAGED. The returned packet chain is traversable with mbuf_nextpkt(). @param interface The interface to dequeue the packets from. - @param tc The service class. + @param sc The service class. + @param max The maximum number of packets in the packet chain that + may be returned to the caller; this needs to be a non-zero + value for any packet to be returned. @param first_packet Pointer to the first packet being dequeued. @param last_packet Pointer to the last packet being dequeued. Caller may supply NULL if not interested in value. @@ -1134,7 +1328,7 @@ extern errno_t ifnet_dequeue_multi(ifnet_t interface, u_int32_t max, is currently no packet available to be dequeued. */ extern errno_t ifnet_dequeue_service_class_multi(ifnet_t interface, - mbuf_svc_class_t tc, u_int32_t max, mbuf_t *first_packet, + mbuf_svc_class_t sc, u_int32_t max, mbuf_t *first_packet, mbuf_t *last_packet, u_int32_t *cnt, u_int32_t *len); /* @@ -1169,7 +1363,7 @@ extern errno_t ifnet_set_sndq_maxlen(ifnet_t interface, u_int32_t maxqlen); interface which implements the new driver output model. @param interface The interface to get the max queue length on. @param maxqlen Pointer to a storage for the maximum number of packets - in the output queue. + in the output queue for all service classes. @result May return EINVAL if the parameters are invalid or ENXIO if the interface doesn't implement the new driver output model. */ @@ -1180,12 +1374,36 @@ extern errno_t ifnet_get_sndq_maxlen(ifnet_t interface, u_int32_t *maxqlen); @discussion Get the current length of the output queue of an interface which implements the new driver output model. @param interface The interface to get the current queue length on. - @param qlen Pointer to a storage for the current number of packets - in the output queue. + @param packets Pointer to a storage for the current number of packets + in the aggregate output queue. This number represents all + enqueued packets regardless of their service classes. @result May return EINVAL if the parameters are invalid or ENXIO if the interface doesn't implement the new driver output model. */ -extern errno_t ifnet_get_sndq_len(ifnet_t interface, u_int32_t *qlen); +extern errno_t ifnet_get_sndq_len(ifnet_t interface, u_int32_t *packets); + +/* + @function ifnet_get_service_class_sndq_len + @discussion Get the current length of the output queue for a specific + service class of an interface which implements the new driver + output model. + @param interface The interface to get the current queue length on. + @param sc The service class. + @param packets Pointer to a storage for the current number of packets + of the specific service class in the output queue; may be + NULL if caller is not interested in getting the value. Note + that multiple service classes may be mapped to an output queue; + this routine reports the packet count of that output queue. + @param bytes Pointer to a storage for the current size (in bytes) of + the output queue specific to the service class; may be NULL if + caller is not interested in getting the value. Note that + multiple service classes may be mapped to an output queue; + this routine reports the length of that output queue. + @result May return EINVAL if the parameters are invalid or ENXIO if + the interface doesn't implement the new driver output model. + */ +extern errno_t ifnet_get_service_class_sndq_len(ifnet_t interface, + mbuf_svc_class_t sc, u_int32_t *packets, u_int32_t *bytes); /* @function ifnet_set_rcvq_maxlen @@ -1215,6 +1433,80 @@ extern errno_t ifnet_set_rcvq_maxlen(ifnet_t interface, u_int32_t maxqlen); */ extern errno_t ifnet_get_rcvq_maxlen(ifnet_t interface, u_int32_t *maxqlen); +/* + @struct ifnet_poll_params + @discussion This structure is used to define various opportunistic + polling parameters for an interface. + @field flags Currently unused/ignored; must be set to zero. + @field packets_limit The maximum number of packets to be dequeued + each time the driver's input poll callback is invoked while + in polling mode; this corresponds to the max_count parameter + of ifnet_input_poll_func. A zero value indicates the use of + default maximum packets defined by the system. + @field packets_lowat Low watermark packet threshold. + @field packets_hiwat High watermark packet threshold. + @field bytes_lowat Low watermark packet threshold. + @field bytes_hiwat High watermark packet threshold. + The low and high watermark inbound packet and bytes thresholds; + these values may be link rate dependent. Polling is enabled + when the average inbound packets or bytes goes above the + corresponding high watermark value; it stays in that mode until + both of the average inbound packets and bytes go below their + corresponding low watermark values. Zero watermark values + indicates the use of default thresholds as defined by the + system. Both low and high watermark values must either be + zeroes, or both must be non-zeroes with low watermark value + being less than the high watermark value. + @field interval_time The interval time between each invocation of + the driver's input poll callback, in nanoseconds. A zero + value indicates the use of default poll interval time as + defined by the system. If a non-zero value is specified and + is less than the minimum interval time, the latter will be + chosen by the system. + */ +struct ifnet_poll_params { + u_int32_t flags; + u_int32_t packets_limit; + u_int32_t packets_lowat; + u_int32_t packets_hiwat; + u_int32_t bytes_lowat; + u_int32_t bytes_hiwat; + u_int64_t interval_time; + u_int64_t reserved[4]; +}; + +typedef struct ifnet_poll_params ifnet_poll_params_t; + +/* + @function ifnet_set_poll_params + @discussion Configures opportunistic input polling parameters on an + interface. This call may be issued post ifnet_attach in order + to modify the interface's polling parameters. The driver may + alter the default values chosen by the system to achieve the + optimal performance for a given link rate or driver dynamics. + @param interface The interface to configure opportunistic polling on. + @param poll_params Pointer to the polling parameters. If NULL, it + implies that the system should revert the interface's polling + parameter to their default values. + @result May return EINVAL if the parameters are invalid or ENXIO if + the interface doesn't implement the new driver input model. + */ +extern errno_t ifnet_set_poll_params(ifnet_t interface, + ifnet_poll_params_t *poll_params); + +/* + @function ifnet_poll_params + @discussion Retrieves opportunistic input polling parameters on an + interface. This call may be issued post ifnet_attach in order + to retrieve the interface's polling parameters. + @param interface The interface to configure opportunistic polling on. + @param poll_params Pointer to the polling parameters. + @result May return EINVAL if the parameters are invalid or ENXIO if + the interface doesn't implement the new driver input model. + */ +extern errno_t ifnet_poll_params(ifnet_t interface, + ifnet_poll_params_t *poll_params); + /* @function ifnet_start @discussion Trigger the transmission at the driver layer on an @@ -1255,6 +1547,55 @@ extern void ifnet_transmit_burst_start(ifnet_t interface, mbuf_t pkt); transmitted. */ extern void ifnet_transmit_burst_end(ifnet_t interface, mbuf_t pkt); + +/* + @function ifnet_flowid + @discussion Returns the interface flow ID value, which can be used + by a (virtual) network interface for participating in the + FLOWSRC_IFNET flow advisory mechanism. The flow ID value + is available after the interface is attached. + @param interface The interface to retrieve the flow ID from. + @param flowid Pointer to the flow ID value. + @result May return EINVAL if the parameters are invalid or ENXIO if + the interface doesn't implement the new driver input model. + */ +extern errno_t ifnet_flowid(ifnet_t interface, u_int32_t *flowid); + +/* + @function ifnet_enable_output + @discussion Enables output on a (virtual) interface if it has been + previously disabled via ifnet_disable_output(). This call + is used to override the flow advisory mechanism typically + used between a (virtual) network interface and a real network + interface beneath it. Under normal circumstances, the flow + advisory mechanism will automatically re-enable the (virtual) + interface's output mechanism when the real network interface + is able to transmit more data. Re-enabling output will cause + the (virtual) interface's start callback to be called again. + @param interface The interface to enable the transmission on. + @result May return EINVAL if the parameters are invalid or ENXIO if + the interface doesn't implement the new driver input model. + */ +extern errno_t ifnet_enable_output(ifnet_t interface); + +/* + @function ifnet_disable_output + @discussion Disables output on a (virtual) interface. Disabling + output will cause the (virtual) interface's start callback + to go idle. This call is typically used by a (virtual) + interface upon receiving flow control feedbacks from the + real network interface beneath it, in order propagate the + flow control condition to the layers above. Under normal + circumstances, the flow advisory mechanism will automatically + re-enable the (virtual) interface's output mechanism when + the real network interface is able to transmit more data, + as long as the (virtual) interface participates in the + FLOWSRC_IFNET flow advisory for the data that it emits. + @param interface The interface to disable the transmission on. + @result May return EINVAL if the parameters are invalid or ENXIO if + the interface doesn't implement the new driver input model. + */ +extern errno_t ifnet_disable_output(ifnet_t interface); #endif /* KERNEL_PRIVATE */ /*! @@ -1326,14 +1667,14 @@ extern errno_t ifnet_detach(ifnet_t interface); /*! @function ifnet_interface_family_find - @discussion Look up the interface family identifier for a string. - If there is no interface family identifier assigned for this string + @discussion Look up the interface family identifier for a string. + If there is no interface family identifier assigned for this string a new interface family identifier is created and assigned. - It is recommended to use the bundle id of the KEXT as the string + It is recommended to use the bundle id of the KEXT as the string to avoid collisions with other KEXTs. The lookup operation is not optimized so a module should call this - function once during startup and cache the interface family identifier. - The interface family identifier for a string will not be re-assigned until + function once during startup and cache the interface family identifier. + The interface family identifier for a string will not be re-assigned until the system reboots. @param module_string A unique string identifying your interface family @param family_id Upon return, a unique interface family identifier for use with @@ -1342,7 +1683,7 @@ extern errno_t ifnet_detach(ifnet_t interface); @result 0 on success, otherwise errno error. */ extern errno_t ifnet_interface_family_find(const char *module_string, ifnet_family_t *family_id); - + /* * Interface manipulation. */ @@ -1366,11 +1707,21 @@ extern const char *ifnet_name(ifnet_t interface); /*! @function ifnet_family @discussion Returns the family of the interface. - @param interface Interface to retrieve the unit number from. + @param interface Interface to retrieve the family from. @result Interface family type. */ extern ifnet_family_t ifnet_family(ifnet_t interface); +#ifdef KERNEL_PRIVATE +/* + @function ifnet_subfamily + @discussion Returns the sub-family of the interface. + @param interface Interface to retrieve the sub-family from. + @result Interface sub-family type. + */ +extern ifnet_subfamily_t ifnet_subfamily(ifnet_t interface); +#endif /* KERNEL_PRIVATE */ + /*! @function ifnet_unit @discussion Returns the unit number of the interface. @@ -1568,8 +1919,8 @@ extern errno_t ifnet_inet6_defrouter_llreachinfo(ifnet_t interface, /*! @function ifnet_set_capabilities_supported @discussion Specify the capabilities supported by the interface. - @discussion This function lets you specify which capabilities are supported - by the interface. Typically this function is called by the driver when + @discussion This function lets you specify which capabilities are supported + by the interface. Typically this function is called by the driver when the interface gets attached to the system. The mask allows to control which capability to set or unset. The kernel will effectively take the lock, then set the @@ -1607,16 +1958,16 @@ extern u_int32_t ifnet_capabilities_supported(ifnet_t interface); This function is intended to be called by the driver. A kext must not call this function on an interface the kext does not own. - - Typically this function is called by the driver when the interface is - created to specify which of the supported capabilities are enabled by - default. This function is also meant to be called when the driver handles + + Typically this function is called by the driver when the interface is + created to specify which of the supported capabilities are enabled by + default. This function is also meant to be called when the driver handles the interface ioctl SIOCSIFCAP. - - The driver should call ifnet_set_offlad() to indicate the corresponding + + The driver should call ifnet_set_offlad() to indicate the corresponding hardware offload bits that will be used by the networking stack. - - It is an error to enable a capability that is not marked as + + It is an error to enable a capability that is not marked as supported by the interface. @param interface Interface to set the capabilities on. @param new_caps The value of the capabilities that should be set or unset. These @@ -1644,9 +1995,9 @@ extern u_int32_t ifnet_capabilities_enabled(ifnet_t interface); VLAN. This replaces the if_hwassist flags field. Any flags unrecognized by the stack will not be set. - Note the system will automatically set the interface capabilities - that correspond to the offload flags modified -- i.e. the driver - does not have to call ifnet_set_capabilities_enabled() and + Note the system will automatically set the interface capabilities + that correspond to the offload flags modified -- i.e. the driver + does not have to call ifnet_set_capabilities_enabled() and ifnet_set_capabilities_supported(). @param interface The interface. @param offload The new set of flags indicating which offload options @@ -1702,7 +2053,7 @@ enum { /*! @function ifnet_set_wake_flags - @discussion Sets the wake properties of the underlying hardware. These are + @discussion Sets the wake properties of the underlying hardware. These are typically set by the driver. @param interface The interface. @param properties Properties to set or unset. @@ -1769,7 +2120,7 @@ extern u_int32_t ifnet_get_link_mib_data_length(ifnet_t interface); @discussion Attaches a protocol to an interface. @param interface The interface. @param protocol_family The protocol family being attached - (PF_INET/PF_APPLETALK/etc...). + (PF_INET/PF_INET6/etc...). @param proto_details Details of the protocol being attached. @result 0 on success otherwise the errno error. */ @@ -1784,7 +2135,7 @@ extern errno_t ifnet_attach_protocol(ifnet_t interface, for packet chains which improve performance. @param interface The interface. @param protocol_family The protocol family being attached - (PF_INET/PF_APPLETALK/etc...). + (PF_INET/PF_INET6/etc...). @param proto_details Details of the protocol being attached. @result 0 on success otherwise the errno error. */ @@ -2027,6 +2378,10 @@ typedef struct if_bandwidths if_bandwidths_t; /* @function ifnet_set_bandwidths + @discussion This function allows a driver to indicate the output + and/or input bandwidth information to the system. Each set + is comprised of the effective and maximum theoretical values. + Each value must be greater than zero. @param interface The interface. @param output_bw The output bandwidth values (in bits per second). May be set to NULL if the caller does not want to alter the @@ -2052,6 +2407,40 @@ extern errno_t ifnet_set_bandwidths(ifnet_t interface, */ extern errno_t ifnet_bandwidths(ifnet_t interface, if_bandwidths_t *output_bw, if_bandwidths_t *input_bw); + +typedef struct if_latencies if_latencies_t; + +/* + @function ifnet_set_latencies + @discussion This function allows a driver to indicate the output + and/or input latency information to the system. Each set + is comprised of the effective and maximum theoretical values. + Each value must be greater than zero. + @param interface The interface. + @param output_lt The output latency values (in nanosecond). + May be set to NULL if the caller does not want to alter the + existing output latency values. + @param input_lt The input latency values (in nanosecond). + May be set to NULL if the caller does not want to alter the + existing input latency values. + @result 0 on success otherwise the errno error. + */ +extern errno_t ifnet_set_latencies(ifnet_t interface, + if_latencies_t *output_lt, if_latencies_t *input_lt); + +/* + @function ifnet_latencies + @param interface The interface. + @param output_lt The output latency values (in nanosecond). + May be set to NULL if the caller does not want to retrieve the + output latency value. + @param input_lt The input latency values (in nanosecond). + May be set to NULL if the caller does not want to retrieve the + input latency value. + @result 0 on success otherwise the errno error. + */ +extern errno_t ifnet_latencies(ifnet_t interface, if_latencies_t *output_lt, + if_latencies_t *input_lt); #endif /* KERNEL_PRIVATE */ /*! @@ -2200,8 +2589,22 @@ extern errno_t ifnet_get_address_list_family(ifnet_t interface, ifaddr_t **addresses, sa_family_t family); #ifdef KERNEL_PRIVATE +/*! + @function ifnet_get_inuse_address_list + @discussion Get a list of addresses on the interface that are in + use by atleast one TCP or UDP socket. The rest of the API is similar + to ifnet_get_address_list. Calling ifnet_free_address_list will + free the array of addresses. Note this only gives a point in time + snapshot of the addresses in use. + @param interface The interface + @param addresses A pointer to a NULL terminated array of ifaddr_ts + @result 0 on success otherwise the errno error. + */ +extern errno_t ifnet_get_inuse_address_list(ifnet_t interface, + ifaddr_t **addresses); + __private_extern__ errno_t ifnet_get_address_list_family_internal(ifnet_t, - ifaddr_t **, sa_family_t, int, int); + ifaddr_t **, sa_family_t, int, int, int); #endif /* KERNEL_PRIVATE */ /*! @@ -2227,7 +2630,7 @@ extern errno_t ifnet_set_lladdr(ifnet_t interface, const void *lladdr, /*! @function ifnet_lladdr_copy_bytes - @discussion Copies the bytes of the link-layer address in to the + @discussion Copies the bytes of the link-layer address into the specified buffer. @param interface The interface to copy the link-layer address from. @param lladdr The buffer to copy the link-layer address in to. @@ -2238,6 +2641,19 @@ extern errno_t ifnet_lladdr_copy_bytes(ifnet_t interface, void *lladdr, size_t length); #ifdef KERNEL_PRIVATE +/*! + @function ifnet_guarded_lladdr_copy_bytes + @discussion Copies the bytes of the link-layer address into the + specified buffer unless the current process is a sandboxed + application without the net.link.addr system info privilege. + @param interface The interface to copy the link-layer address from. + @param lladdr The buffer to copy the link-layer address in to. + @param length The length of the buffer. This value must match the + length of the link-layer address. + */ +extern errno_t ifnet_guarded_lladdr_copy_bytes(ifnet_t interface, void *lladdr, + size_t length); + /*! @function ifnet_lladdr @discussion Returns a pointer to the link-layer address. @@ -2632,7 +3048,7 @@ typedef errno_t (*ifnet_clone_create_func)(if_clone_t ifcloner, u_int32_t unit, /* @typedef ifnet_clone_destroy_func - @discussion ifnet_clone_create_func is called to destroy an interface created + @discussion ifnet_clone_create_func is called to destroy an interface created by an interface cloner. @param interface The interface to destroy. @result Return zero on success or an errno error value on failure. @@ -2656,9 +3072,9 @@ struct ifnet_clone_params { @function ifnet_clone_attach @discussion Attaches a new interface cloner. @param cloner_params The structure that defines an interface cloner. - @param interface A pointer to an opaque handle that represent the interface cloner + @param interface A pointer to an opaque handle that represent the interface cloner that is attached upon success. - @result Returns 0 on success. + @result Returns 0 on success. May return ENOBUFS if there is insufficient memory. May return EEXIST if an interface cloner with the same name is already attached. */ @@ -2668,7 +3084,7 @@ extern errno_t ifnet_clone_attach(struct ifnet_clone_params *cloner_params, if_c @function ifnet_clone_detach @discussion Detaches a previously attached interface cloner. @param ifcloner The opaque handle returned when the interface cloner was attached. - @result Returns 0 on success. + @result Returns 0 on success. */ extern errno_t ifnet_clone_detach(if_clone_t ifcloner); @@ -2678,17 +3094,101 @@ extern errno_t ifnet_clone_detach(if_clone_t ifcloner); /* @function ifnet_get_local_ports - @discussion Returns a bitfield indicating which ports have sockets - open. An interface that supports waking the host on unicast traffic may - use this information to discard incoming unicast packets that don't have - a corresponding bit set instead of waking up the host. For port 0x0001, - bit 1 of the first byte would be set. For port n, bit 1 << (n % 8) of - the (n / 8)'th byte would be set. - @param ifp The interface in question. + @discussion Returns a bitfield indicating which ports of PF_INET + and PF_INET6 protocol families have sockets in the usable + state. An interface that supports waking the host on unicast + traffic may use this information to discard incoming unicast + packets that don't have a corresponding bit set instead of + waking up the host. For port 0x0001, bit 1 of the first byte + would be set. For port n, bit 1 << (n % 8) of the (n / 8)'th + byte would be set. + @param ifp The interface in question. May be NULL, which means + all interfaces. + @param bitfield A pointer to 8192 bytes. + @result Returns 0 on success. + */ +extern errno_t ifnet_get_local_ports(ifnet_t ifp, u_int8_t *bitfield); + +/* + @function ifnet_get_local_ports_extended + @discussion Returns a bitfield indicating which local ports of the + specified protocol have sockets in the usable state. An + interface that supports waking the host on unicast traffic may + use this information to discard incoming unicast packets that + don't have a corresponding bit set instead of waking up the + host. For port 0x0001, bit 1 of the first byte would be set. + For port n, bit 1 << (n % 8) of the (n / 8)'th byte would be + set. + @param ifp The interface in question. May be NULL, which means + all interfaces. + @param protocol The protocol family of the sockets. PF_UNSPEC (0) + means all protocols, otherwise PF_INET or PF_INET6. + @param wildcardok A boolean value (0 or 1) indicating whether or not + the list of local ports should include those that are used + by sockets that aren't bound to any local address. @param bitfield A pointer to 8192 bytes. @result Returns 0 on success. */ -extern errno_t ifnet_get_local_ports(ifnet_t ifp, uint8_t *bitfield); +extern errno_t ifnet_get_local_ports_extended(ifnet_t ifp, + protocol_family_t protocol, u_int32_t wildcardok, u_int8_t *bitfield); + +/******************************************************************************/ +/* for reporting issues */ +/******************************************************************************/ + +#define IFNET_MODIDLEN 20 +#define IFNET_MODARGLEN 12 + +/* + @function ifnet_report_issues + @discussion Provided for network interface families and drivers to + notify the system of issues detected at their layers. + @param ifp The interface experiencing issues. + @param modid The ID of the module reporting issues. It may contain + any value that is unique enough to identify the module, such + as the SHA-1 hash of the bundle ID of the module, e.g. + "com.apple.iokit.IONetworkingFamily" or + "com.apple.iokit.IO80211Family". + @param info An optional, fixed-size array of octets containing opaque + information data used specific to the module/layer reporting + the issues. May be NULL. + @result Returns 0 on success, or EINVAL if arguments are invalid. + */ +extern errno_t ifnet_report_issues(ifnet_t ifp, u_int8_t modid[IFNET_MODIDLEN], + u_int8_t info[IFNET_MODARGLEN]); + +/******************************************************************************/ +/* for interfaces that support link level transmit completion status */ +/******************************************************************************/ +/* + @enum Per packet phy level transmit completion status values + @abstract Constants defining possible completion status values + A driver may support all or some of these values + @constant IFNET_TX_COMPL_SUCCESS link transmission succeeded + @constant IFNET_TX_COMPL_FAIL link transmission failed + @constant IFNET_TX_COMPL_ABORTED link transmission aborted, may retry + @constant IFNET_TX_QUEUE_FULL link level secondary queue full +*/ +enum { + IFNET_TX_COMPL_SUCCESS = 0, /* sent on link */ + IFNET_TX_COMPL_FAIL = 1, /* failed to send on link */ + IFNET_TX_COMPL_ABORTED = 2, /* aborted send, peer asleep */ + IFNET_TX_COMPL_QFULL = 3 /* driver level queue full */ +}; + +typedef u_int32_t tx_compl_val_t; + +/* + @function ifnet_tx_compl_status + @discussion Used as an upcall from IONetwork Family to stack that + indicates the link level completion status of a transmitted + packet. + @param ifp The interface to which the mbuf was sent + @param m The mbuf that was transmitted + @param val indicates the status of the transmission +*/ +extern errno_t ifnet_tx_compl_status(ifnet_t ifp, mbuf_t m, tx_compl_val_t val); + /******************************************************************************/ /* for interfaces that support dynamic node absence/presence events */ /******************************************************************************/ @@ -2711,7 +3211,7 @@ extern errno_t ifnet_get_local_ports(ifnet_t ifp, uint8_t *bitfield); @result Returns 0 on success, or EINVAL if arguments are invalid. */ extern errno_t -ifnet_notice_node_presence(ifnet_t ifp, struct sockaddr* sa, int32_t rssi, +ifnet_notice_node_presence(ifnet_t ifp, struct sockaddr *sa, int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48]); /* @@ -2724,7 +3224,7 @@ ifnet_notice_node_presence(ifnet_t ifp, struct sockaddr* sa, int32_t rssi, detected. @result Returns 0 on success, or EINVAL if arguments are invalid. */ -extern errno_t ifnet_notice_node_absence(ifnet_t ifp, struct sockaddr* sa); +extern errno_t ifnet_notice_node_absence(ifnet_t ifp, struct sockaddr *sa); /* @function ifnet_notice_master_elected @@ -2737,6 +3237,46 @@ extern errno_t ifnet_notice_node_absence(ifnet_t ifp, struct sockaddr* sa); */ extern errno_t ifnet_notice_master_elected(ifnet_t ifp); +/******************************************************************************/ +/* for interface delegation */ +/******************************************************************************/ + +/* + @function ifnet_set_delegate + @discussion Indicate that an interface is delegating another interface + for accounting/restriction purposes. This could be used by a + virtual interface that is going over another interface, where + the virtual interface is to be treated as if it's the underlying + interface for certain operations/checks. + @param ifp The delegating interface. + @param delegated_ifp The delegated interface. If NULL or equal to + the delegating interface itself, any previously-established + delegation is removed. If non-NULL, a reference to the + delegated interface is held by the delegating interface; + this reference is released via a subsequent call to remove + the established association, or when the delegating interface + is detached. + @param Returns 0 on success, EINVAL if arguments are invalid, or + ENXIO if the delegating interface isn't currently attached. + */ +extern errno_t +ifnet_set_delegate(ifnet_t ifp, ifnet_t delegated_ifp); + +/* + @function ifnet_get_delegate + @discussion Retrieve delegated interface information on an interface. + @param ifp The delegating interface. + @param pdelegated_ifp Pointer to the delegated interface. Upon + success, this will contain the delegated interface or + NULL if there is no delegation in place. If non-NULL, + the delegated interface will be returned with a reference + held for caller, and the caller is responsible for releasing + it via ifnet_release(); + @param Returns 0 on success, EINVAL if arguments are invalid, or + ENXIO if the delegating interface isn't currently attached. + */ +extern errno_t +ifnet_get_delegate(ifnet_t ifp, ifnet_t *pdelegated_ifp); #endif /* KERNEL_PRIVATE */ __END_DECLS diff --git a/bsd/net/kpi_interfacefilter.c b/bsd/net/kpi_interfacefilter.c index 7bd9ea69f..82ccd2d3e 100644 --- a/bsd/net/kpi_interfacefilter.c +++ b/bsd/net/kpi_interfacefilter.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003,2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -42,7 +42,7 @@ iflt_attach( { if (interface == NULL) return ENOENT; - return dlil_attach_filter(interface, filter, filter_ref); + return dlil_attach_filter(interface, filter, filter_ref, 0); } void diff --git a/bsd/net/kpi_protocol.c b/bsd/net/kpi_protocol.c index 982dc905a..170dd25f0 100644 --- a/bsd/net/kpi_protocol.c +++ b/bsd/net/kpi_protocol.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2004-2012 Apple Inc. All rights reserved. + * Copyright (c) 2004-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ @@ -71,8 +71,6 @@ struct proto_family_str { proto_unplumb_handler detach_proto; }; -#define PROTO_HASH_SLOTS 5 - static struct proto_input_entry *proto_hash[PROTO_HASH_SLOTS]; static int proto_total_waiting = 0; static struct proto_input_entry *proto_input_add_list = NULL; @@ -81,22 +79,6 @@ static lck_mtx_t *proto_family_mutex = &proto_family_mutex_data; static TAILQ_HEAD(, proto_family_str) proto_family_head = TAILQ_HEAD_INITIALIZER(proto_family_head); -static int -proto_hash_value(protocol_family_t protocol) -{ - switch (protocol) { - case PF_INET: - return (0); - case PF_INET6: - return (1); - case PF_APPLETALK: - return (2); - case PF_VLAN: - return (3); - } - return (4); -} - __private_extern__ void proto_kpi_init(void) { @@ -122,11 +104,10 @@ proto_register_input(protocol_family_t protocol, proto_input_handler input, { struct proto_input_entry *entry; struct dlil_threading_info *inp = dlil_main_input_thread; - struct domain *dp = domains; - int do_unlock; + struct domain *dp; + domain_guard_t guard; entry = _MALLOC(sizeof (*entry), M_IFADDR, M_WAITOK); - if (entry == NULL) return (ENOMEM); @@ -137,11 +118,16 @@ proto_register_input(protocol_family_t protocol, proto_input_handler input, entry->hash = proto_hash_value(protocol); entry->chain = chains; - do_unlock = domain_proto_mtx_lock(); - while (dp && (protocol_family_t)dp->dom_family != protocol) - dp = dp->dom_next; + guard = domain_guard_deploy(); + TAILQ_FOREACH(dp, &domains, dom_entry) { + if (dp->dom_family == (int)protocol) + break; + } + domain_guard_release(guard); + if (dp == NULL) + return (EINVAL); + entry->domain = dp; - domain_proto_mtx_unlock(do_unlock); lck_mtx_lock(&inp->input_lck); entry->next = proto_input_add_list; diff --git a/bsd/net/kpi_protocol.h b/bsd/net/kpi_protocol.h index 46877b8be..222696502 100644 --- a/bsd/net/kpi_protocol.h +++ b/bsd/net/kpi_protocol.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /*! @@ -39,14 +39,13 @@ #include #include - __BEGIN_DECLS /******************************************************************************/ /* Protocol input/inject */ /******************************************************************************/ -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE /*! @typedef protocol_input_handler @discussion protocol_input_handler is called to input a packet. If @@ -97,7 +96,7 @@ extern errno_t proto_register_input(protocol_family_t protocol, @result A errno error on failure. */ extern void proto_unregister_input(protocol_family_t protocol); -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ /*! @function proto_input @@ -181,7 +180,7 @@ extern errno_t proto_register_plumber(protocol_family_t proto_fam, extern void proto_unregister_plumber(protocol_family_t proto_fam, ifnet_family_t if_fam); -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE /* @function proto_plumb @discussion Plumbs a protocol to an actual interface. This will find @@ -212,9 +211,9 @@ extern errno_t proto_plumb(protocol_family_t protocol_family, ifnet_t ifp); extern errno_t proto_unplumb(protocol_family_t protocol_family, ifnet_t ifp); __private_extern__ void -proto_kpi_init(void) __attribute__((section("__TEXT, initcode"))); +proto_kpi_init(void); -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ __END_DECLS #endif /* __KPI_PROTOCOL__ */ diff --git a/bsd/net/ndrv.c b/bsd/net/ndrv.c index dc49773ed..468cee375 100644 --- a/bsd/net/ndrv.c +++ b/bsd/net/ndrv.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997-2008, 2012 Apple Inc. All rights reserved. + * Copyright (c) 1997-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -95,14 +95,14 @@ static int ndrv_do_add_multicast(struct ndrv_cb *np, struct sockopt *sopt); static int ndrv_do_remove_multicast(struct ndrv_cb *np, struct sockopt *sopt); static struct ndrv_multiaddr* ndrv_have_multicast(struct ndrv_cb *np, struct sockaddr* addr); static void ndrv_remove_all_multicast(struct ndrv_cb *np); -static void ndrv_dominit(void) __attribute__((section("__TEXT, initcode"))); +static void ndrv_dominit(struct domain *); u_int32_t ndrv_sendspace = NDRVSNDQ; u_int32_t ndrv_recvspace = NDRVRCVQ; TAILQ_HEAD(, ndrv_cb) ndrvl = TAILQ_HEAD_INITIALIZER(ndrvl); -extern struct domain ndrvdomain; -extern struct protosw ndrvsw; +static struct domain *ndrvdomain = NULL; +extern struct domain ndrvdomain_s; #define NDRV_PROTODEMUX_COUNT 10 @@ -194,13 +194,13 @@ ndrv_input( return EJUSTRETURN; bcopy(frame_header, m->m_data, ifnet_hdrlen(ifp)); - lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_lock(so->so_proto->pr_domain->dom_mtx); + lck_mtx_assert(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(ndrvdomain->dom_mtx); if (sbappendaddr(&(so->so_rcv), (struct sockaddr *)&ndrvsrc, m, (struct mbuf *)0, &error) != 0) { sorwakeup(so); } - lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx); + lck_mtx_unlock(ndrvdomain->dom_mtx); return 0; /* radar 4030377 - always return 0 */ } @@ -234,7 +234,7 @@ ndrv_attach(struct socket *so, int proto, __unused struct proc *p) TAILQ_INIT(&np->nd_dlist); np->nd_signature = NDRV_SIGNATURE; np->nd_socket = so; - np->nd_proto.sp_family = so->so_proto->pr_domain->dom_family; + np->nd_proto.sp_family = SOCK_DOM(so); np->nd_proto.sp_protocol = proto; np->nd_if = NULL; np->nd_proto_family = 0; @@ -299,10 +299,10 @@ ndrv_event(struct ifnet *ifp, __unused protocol_family_t protocol, event->kev_class == KEV_NETWORK_CLASS && event->kev_subclass == KEV_DL_SUBCLASS && event->event_code == KEV_DL_IF_DETACHING) { - lck_mtx_assert(ndrvdomain.dom_mtx, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_lock(ndrvdomain.dom_mtx); + lck_mtx_assert(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(ndrvdomain->dom_mtx); ndrv_handle_ifp_detach(ifnet_family(ifp), ifnet_unit(ifp)); - lck_mtx_unlock(ndrvdomain.dom_mtx); + lck_mtx_unlock(ndrvdomain->dom_mtx); } } @@ -406,7 +406,7 @@ ndrv_disconnect(struct socket *so) static int ndrv_shutdown(struct socket *so) { - lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_OWNED); socantsendmore(so); return 0; } @@ -591,7 +591,12 @@ ndrv_do_disconnect(struct ndrv_cb *np) FREE(np->nd_faddr, M_IFADDR); np->nd_faddr = 0; } - if (so->so_state & SS_NOFDREF) + /* + * A multipath subflow socket would have its SS_NOFDREF set by default, + * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB; + * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared. + */ + if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) ndrv_do_detach(np); soisdisconnected(so); return(0); @@ -832,15 +837,6 @@ ndrv_find_inbound(struct ifnet *ifp, u_int32_t protocol) return NULL; } -static void ndrv_dominit(void) -{ - static int ndrv_dominited = 0; - - if (ndrv_dominited == 0 && - net_add_proto(&ndrvsw, &ndrvdomain) == 0) - ndrv_dominited = 1; -} - static void ndrv_handle_ifp_detach(u_int32_t family, short unit) { @@ -870,7 +866,7 @@ ndrv_handle_ifp_detach(u_int32_t family, short unit) so = np->nd_socket; /* Make sure sending returns an error */ /* Is this safe? Will we drop the funnel? */ - lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(ndrvdomain->dom_mtx, LCK_MTX_ASSERT_OWNED); socantsendmore(so); socantrcvmore(so); } @@ -1046,39 +1042,51 @@ ndrv_remove_all_multicast(struct ndrv_cb* np) } } -struct pr_usrreqs ndrv_usrreqs = { - ndrv_abort, pru_accept_notsupp, ndrv_attach, ndrv_bind, - ndrv_connect, pru_connect2_notsupp, pru_control_notsupp, ndrv_detach, - ndrv_disconnect, pru_listen_notsupp, ndrv_peeraddr, pru_rcvd_notsupp, - pru_rcvoob_notsupp, ndrv_send, pru_sense_null, ndrv_shutdown, - ndrv_sockaddr, sosend, soreceive, pru_sopoll_notsupp +static struct pr_usrreqs ndrv_usrreqs = { + .pru_abort = ndrv_abort, + .pru_attach = ndrv_attach, + .pru_bind = ndrv_bind, + .pru_connect = ndrv_connect, + .pru_detach = ndrv_detach, + .pru_disconnect = ndrv_disconnect, + .pru_peeraddr = ndrv_peeraddr, + .pru_send = ndrv_send, + .pru_shutdown = ndrv_shutdown, + .pru_sockaddr = ndrv_sockaddr, + .pru_sosend = sosend, + .pru_soreceive = soreceive, }; -struct protosw ndrvsw = -{ SOCK_RAW, &ndrvdomain, NDRVPROTO_NDRV, PR_ATOMIC|PR_ADDR, - NULL, ndrv_output, NULL, ndrv_ctloutput, - NULL, - NULL, NULL, NULL, NULL, NULL, - &ndrv_usrreqs, - NULL, NULL, NULL, - { NULL, NULL}, NULL, - { 0 } +static struct protosw ndrvsw[] = { +{ + .pr_type = SOCK_RAW, + .pr_protocol = NDRVPROTO_NDRV, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_output = ndrv_output, + .pr_ctloutput = ndrv_ctloutput, + .pr_usrreqs = &ndrv_usrreqs, +} }; -struct domain ndrvdomain = -{ AF_NDRV, - "NetDriver", - ndrv_dominit, - NULL, - NULL, - NULL, - NULL, - NULL, - 0, - 0, - 0, - 0, - NULL, - 0, - {0, 0} +static int ndrv_proto_count = (sizeof (ndrvsw) / sizeof (struct protosw)); + +struct domain ndrvdomain_s = { + .dom_family = PF_NDRV, + .dom_name = "NetDriver", + .dom_init = ndrv_dominit, }; + +static void +ndrv_dominit(struct domain *dp) +{ + struct protosw *pr; + int i; + + VERIFY(!(dp->dom_flags & DOM_INITIALIZED)); + VERIFY(ndrvdomain == NULL); + + ndrvdomain = dp; + + for (i = 0, pr = &ndrvsw[0]; i < ndrv_proto_count; i++, pr++) + net_add_proto(pr, dp, 1); +} diff --git a/bsd/net/net_osdep.c b/bsd/net/net_osdep.c deleted file mode 100644 index dbe6f6692..000000000 --- a/bsd/net/net_osdep.c +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the project nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include - -const char * -if_name(struct ifnet *ifp) -{ -#define MAXNUMBUF 8 - static char nam[MAXNUMBUF][IFNAMSIZ + 10]; /*enough?*/ - static int ifbufround = 0; - char *cp; - - ifbufround = (ifbufround + 1) % MAXNUMBUF; - cp = nam[ifbufround]; - - snprintf(cp, IFNAMSIZ + 10, "%s%d", ifp->if_name, ifp->if_unit); - return((const char *)cp); -#undef MAXNUMBUF -} diff --git a/bsd/net/net_osdep.h b/bsd/net/net_osdep.h index a17921f57..71d87d020 100644 --- a/bsd/net/net_osdep.h +++ b/bsd/net/net_osdep.h @@ -260,7 +260,6 @@ #ifdef KERNEL_PRIVATE struct ifnet; -extern const char *if_name(struct ifnet *); #define HAVE_OLD_BPF diff --git a/bsd/net/net_stubs.c b/bsd/net/net_stubs.c new file mode 100644 index 000000000..0fe96fe37 --- /dev/null +++ b/bsd/net/net_stubs.c @@ -0,0 +1,2692 @@ +#include + +#if !NETWORKING + +int bpf_attach(void); +int bpf_attach(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int bpf_tap_in(void); +int bpf_tap_in(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int bpf_tap_out(void); +int bpf_tap_out(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int bpfattach(void); +int bpfattach(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ctl_deregister(void); +int ctl_deregister(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ctl_enqueuedata(void); +int ctl_enqueuedata(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ctl_enqueuembuf(void); +int ctl_enqueuembuf(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ctl_getenqueuespace(void); +int ctl_getenqueuespace(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ctl_register(void); +int ctl_register(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ether_add_proto(void); +int ether_add_proto(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ether_check_multi(void); +int ether_check_multi(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ether_del_proto(void); +int ether_del_proto(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ether_demux(void); +int ether_demux(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ether_frameout(void); +int ether_frameout(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ether_ioctl(void); +int ether_ioctl(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int fifo_advlock(void); +int fifo_advlock(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int fifo_close(void); +int fifo_close(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int fifo_inactive(void); +int fifo_inactive(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int fifo_ioctl(void); +int fifo_ioctl(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int fifo_lookup(void); +int fifo_lookup(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int fifo_open(void); +int fifo_open(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int fifo_pathconf(void); +int fifo_pathconf(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int fifo_read(void); +int fifo_read(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int fifo_select(void); +int fifo_select(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int fifo_write(void); +int fifo_write(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifaddr_address(void); +int ifaddr_address(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifaddr_address_family(void); +int ifaddr_address_family(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifaddr_dstaddress(void); +int ifaddr_dstaddress(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifaddr_findbestforaddr(void); +int ifaddr_findbestforaddr(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifaddr_ifnet(void); +int ifaddr_ifnet(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifaddr_netmask(void); +int ifaddr_netmask(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifaddr_reference(void); +int ifaddr_reference(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifaddr_release(void); +int ifaddr_release(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifaddr_withaddr(void); +int ifaddr_withaddr(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifaddr_withdstaddr(void); +int ifaddr_withdstaddr(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifaddr_withnet(void); +int ifaddr_withnet(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifaddr_withroute(void); +int ifaddr_withroute(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int iflt_attach(void); +int iflt_attach(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int iflt_detach(void); +int iflt_detach(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifmaddr_address(void); +int ifmaddr_address(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifmaddr_ifnet(void); +int ifmaddr_ifnet(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifmaddr_lladdress(void); +int ifmaddr_lladdress(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifmaddr_reference(void); +int ifmaddr_reference(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifmaddr_release(void); +int ifmaddr_release(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_add_multicast(void); +int ifnet_add_multicast(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_addrlen(void); +int ifnet_addrlen(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_allocate(void); +int ifnet_allocate(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_attach(void); +int ifnet_attach(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_attach_protocol(void); +int ifnet_attach_protocol(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_baudrate(void); +int ifnet_baudrate(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_capabilities_enabled(void); +int ifnet_capabilities_enabled(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_capabilities_supported(void); +int ifnet_capabilities_supported(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_detach(void); +int ifnet_detach(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_detach_protocol(void); +int ifnet_detach_protocol(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_eflags(void); +int ifnet_eflags(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_event(void); +int ifnet_event(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_family(void); +int ifnet_family(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_subfamily(void); +int ifnet_subfamily(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_find_by_name(void); +int ifnet_find_by_name(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_flags(void); +int ifnet_flags(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_free_address_list(void); +int ifnet_free_address_list(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_free_multicast_list(void); +int ifnet_free_multicast_list(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_get_address_list(void); +int ifnet_get_address_list(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_get_address_list_family(void); +int ifnet_get_address_list_family(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_get_link_mib_data(void); +int ifnet_get_link_mib_data(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_get_link_mib_data_length(void); +int ifnet_get_link_mib_data_length(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_get_multicast_list(void); +int ifnet_get_multicast_list(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_get_service_class_sndq_len(void); +int ifnet_get_service_class_sndq_len(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_get_tso_mtu(void); +int ifnet_get_tso_mtu(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_get_wake_flags(void); +int ifnet_get_wake_flags(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_hdrlen(void); +int ifnet_hdrlen(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_index(void); +int ifnet_index(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_input(void); +int ifnet_input(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_interface_family_find(void); +int ifnet_interface_family_find(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_ioctl(void); +int ifnet_ioctl(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_lastchange(void); +int ifnet_lastchange(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_list_free(void); +int ifnet_list_free(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_list_get(void); +int ifnet_list_get(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_lladdr(void); +int ifnet_lladdr(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_lladdr_copy_bytes(void); +int ifnet_lladdr_copy_bytes(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_llbroadcast_copy_bytes(void); +int ifnet_llbroadcast_copy_bytes(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_metric(void); +int ifnet_metric(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_mtu(void); +int ifnet_mtu(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_name(void); +int ifnet_name(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_offload(void); +int ifnet_offload(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_output(void); +int ifnet_output(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_output_raw(void); +int ifnet_output_raw(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_reference(void); +int ifnet_reference(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_release(void); +int ifnet_release(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_remove_multicast(void); +int ifnet_remove_multicast(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_resolve_multicast(void); +int ifnet_resolve_multicast(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_addrlen(void); +int ifnet_set_addrlen(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_baudrate(void); +int ifnet_set_baudrate(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_capabilities_enabled(void); +int ifnet_set_capabilities_enabled(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_capabilities_supported(void); +int ifnet_set_capabilities_supported(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_delegate(void); +int ifnet_set_delegate(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_eflags(void); +int ifnet_set_eflags(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_flags(void); +int ifnet_set_flags(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_hdrlen(void); +int ifnet_set_hdrlen(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_link_mib_data(void); +int ifnet_set_link_mib_data(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_lladdr(void); +int ifnet_set_lladdr(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_metric(void); +int ifnet_set_metric(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_mtu(void); +int ifnet_set_mtu(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_offload(void); +int ifnet_set_offload(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_promiscuous(void); +int ifnet_set_promiscuous(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_stat(void); +int ifnet_set_stat(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_tso_mtu(void); +int ifnet_set_tso_mtu(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_wake_flags(void); +int ifnet_set_wake_flags(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_softc(void); +int ifnet_softc(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_stat(void); +int ifnet_stat(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_stat_increment(void); +int ifnet_stat_increment(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_stat_increment_in(void); +int ifnet_stat_increment_in(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_stat_increment_out(void); +int ifnet_stat_increment_out(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_touch_lastchange(void); +int ifnet_touch_lastchange(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_type(void); +int ifnet_type(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_unit(void); +int ifnet_unit(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int in_cksum(void); +int in_cksum(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int inet_arp_handle_input(void); +int inet_arp_handle_input(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int inet_arp_init_ifaddr(void); +int inet_arp_init_ifaddr(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int inet_arp_lookup(void); +int inet_arp_lookup(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ipf_addv4(void); +int ipf_addv4(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ipf_addv6(void); +int ipf_addv6(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ipf_inject_input(void); +int ipf_inject_input(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ipf_inject_output(void); +int ipf_inject_output(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ipf_remove(void); +int ipf_remove(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int kev_msg_post(void); +int kev_msg_post(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int kev_vendor_code_find(void); +int kev_vendor_code_find(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_adj(void); +int mbuf_adj(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_adjustlen(void); +int mbuf_adjustlen(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_align_32(void); +int mbuf_align_32(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_alloccluster(void); +int mbuf_alloccluster(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_allocpacket(void); +int mbuf_allocpacket(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_allocpacket_list(void); +int mbuf_allocpacket_list(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_attachcluster(void); +int mbuf_attachcluster(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_clear_csum_performed(void); +int mbuf_clear_csum_performed(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_clear_csum_requested(void); +int mbuf_clear_csum_requested(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_clear_vlan_tag(void); +int mbuf_clear_vlan_tag(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_concatenate(void); +int mbuf_concatenate(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_copy_pkthdr(void); +int mbuf_copy_pkthdr(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_copyback(void); +int mbuf_copyback(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_copydata(void); +int mbuf_copydata(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_copym(void); +int mbuf_copym(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_data(void); +int mbuf_data(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_data_to_physical(void); +int mbuf_data_to_physical(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_datastart(void); +int mbuf_datastart(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_dup(void); +int mbuf_dup(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_flags(void); +int mbuf_flags(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_free(void); +int mbuf_free(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_freecluster(void); +int mbuf_freecluster(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_freem(void); +int mbuf_freem(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_freem_list(void); +int mbuf_freem_list(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_get(void); +int mbuf_get(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_get_csum_performed(void); +int mbuf_get_csum_performed(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_get_csum_requested(void); +int mbuf_get_csum_requested(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_get_mhlen(void); +int mbuf_get_mhlen(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_get_minclsize(void); +int mbuf_get_minclsize(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_get_mlen(void); +int mbuf_get_mlen(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_get_traffic_class(void); +int mbuf_get_traffic_class(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_get_tso_requested(void); +int mbuf_get_tso_requested(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_get_vlan_tag(void); +int mbuf_get_vlan_tag(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_getcluster(void); +int mbuf_getcluster(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_gethdr(void); +int mbuf_gethdr(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_getpacket(void); +int mbuf_getpacket(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_inbound_modified(void); +int mbuf_inbound_modified(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_inet_cksum(void); +int mbuf_inet_cksum(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_is_traffic_class_privileged(void); +int mbuf_is_traffic_class_privileged(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_leadingspace(void); +int mbuf_leadingspace(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_len(void); +int mbuf_len(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_maxlen(void); +int mbuf_maxlen(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_mclget(void); +int mbuf_mclget(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_mclhasreference(void); +int mbuf_mclhasreference(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_next(void); +int mbuf_next(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_nextpkt(void); +int mbuf_nextpkt(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_outbound_finalize(void); +int mbuf_outbound_finalize(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_pkthdr_adjustlen(void); +int mbuf_pkthdr_adjustlen(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_pkthdr_header(void); +int mbuf_pkthdr_header(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_pkthdr_len(void); +int mbuf_pkthdr_len(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_pkthdr_rcvif(void); +int mbuf_pkthdr_rcvif(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_pkthdr_setheader(void); +int mbuf_pkthdr_setheader(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_pkthdr_setlen(void); +int mbuf_pkthdr_setlen(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_pkthdr_setrcvif(void); +int mbuf_pkthdr_setrcvif(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_prepend(void); +int mbuf_prepend(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_pulldown(void); +int mbuf_pulldown(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_pullup(void); +int mbuf_pullup(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_set_csum_performed(void); +int mbuf_set_csum_performed(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_set_csum_requested(void); +int mbuf_set_csum_requested(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_set_traffic_class(void); +int mbuf_set_traffic_class(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_set_vlan_tag(void); +int mbuf_set_vlan_tag(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_setdata(void); +int mbuf_setdata(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_setflags(void); +int mbuf_setflags(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_setflags_mask(void); +int mbuf_setflags_mask(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_setlen(void); +int mbuf_setlen(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_setnext(void); +int mbuf_setnext(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_setnextpkt(void); +int mbuf_setnextpkt(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_settype(void); +int mbuf_settype(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_split(void); +int mbuf_split(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_stats(void); +int mbuf_stats(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_tag_allocate(void); +int mbuf_tag_allocate(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_tag_find(void); +int mbuf_tag_find(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_tag_free(void); +int mbuf_tag_free(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_tag_id_find(void); +int mbuf_tag_id_find(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_add_drvaux(void); +int mbuf_add_drvaux(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_find_drvaux(void); +int mbuf_find_drvaux(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_del_drvaux(void); +int mbuf_del_drvaux(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_trailingspace(void); +int mbuf_trailingspace(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_type(void); +int mbuf_type(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int net_init_add(void); +int net_init_add(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int proto_inject(void); +int proto_inject(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int proto_input(void); +int proto_input(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int proto_register_plumber(void); +int proto_register_plumber(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int proto_unregister_plumber(void); +int proto_unregister_plumber(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sflt_attach(void); +int sflt_attach(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sflt_detach(void); +int sflt_detach(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sflt_register(void); +int sflt_register(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sflt_unregister(void); +int sflt_unregister(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_accept(void); +int sock_accept(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_bind(void); +int sock_bind(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_close(void); +int sock_close(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_connect(void); +int sock_connect(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_connectwait(void); +int sock_connectwait(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_getpeername(void); +int sock_getpeername(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_getsockname(void); +int sock_getsockname(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_getsockopt(void); +int sock_getsockopt(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_gettype(void); +int sock_gettype(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_inject_data_in(void); +int sock_inject_data_in(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_inject_data_out(void); +int sock_inject_data_out(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_ioctl(void); +int sock_ioctl(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_isconnected(void); +int sock_isconnected(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_isnonblocking(void); +int sock_isnonblocking(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_listen(void); +int sock_listen(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_nointerrupt(void); +int sock_nointerrupt(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_receive(void); +int sock_receive(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_receivembuf(void); +int sock_receivembuf(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_send(void); +int sock_send(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_sendmbuf(void); +int sock_sendmbuf(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_setpriv(void); +int sock_setpriv(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_setsockopt(void); +int sock_setsockopt(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_shutdown(void); +int sock_shutdown(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_socket(void); +int sock_socket(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sockopt_copyin(void); +int sockopt_copyin(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sockopt_copyout(void); +int sockopt_copyout(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sockopt_direction(void); +int sockopt_direction(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sockopt_level(void); +int sockopt_level(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sockopt_name(void); +int sockopt_name(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sockopt_valsize(void); +int sockopt_valsize(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int kev_post_msg(void); +int kev_post_msg(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ctl_id_by_name(void); +int ctl_id_by_name(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ctl_name_by_id(void); +int ctl_name_by_id(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_allocate_extended(void); +int ifnet_allocate_extended(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_bandwidths(void); +int ifnet_bandwidths(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_clone_attach(void); +int ifnet_clone_attach(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_clone_detach(void); +int ifnet_clone_detach(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_dequeue(void); +int ifnet_dequeue(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_dequeue_multi(void); +int ifnet_dequeue_multi(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_dequeue_service_class(void); +int ifnet_dequeue_service_class(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_dequeue_service_class_multi(void); +int ifnet_dequeue_service_class_multi(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_enqueue(void); +int ifnet_enqueue(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_get_delegate(void); +int ifnet_get_delegate(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_get_inuse_address_list(void); +int ifnet_get_inuse_address_list(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_get_local_ports(void); +int ifnet_get_local_ports(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_get_local_ports_extended(void); +int ifnet_get_local_ports_extended(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_get_rcvq_maxlen(void); +int ifnet_get_rcvq_maxlen(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_get_sndq_len(void); +int ifnet_get_sndq_len(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_get_sndq_maxlen(void); +int ifnet_get_sndq_maxlen(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_idle_flags(void); +int ifnet_idle_flags(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_inet6_defrouter_llreachinfo(void); +int ifnet_inet6_defrouter_llreachinfo(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_inet_defrouter_llreachinfo(void); +int ifnet_inet_defrouter_llreachinfo(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_input_extended(void); +int ifnet_input_extended(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_latencies(void); +int ifnet_latencies(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_link_quality(void); +int ifnet_link_quality(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_notice_master_elected(void); +int ifnet_notice_master_elected(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_notice_node_absence(void); +int ifnet_notice_node_absence(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_notice_node_presence(void); +int ifnet_notice_node_presence(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_poll_params(void); +int ifnet_poll_params(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_purge(void); +int ifnet_purge(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_report_issues(void); +int ifnet_report_issues(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_bandwidths(void); +int ifnet_set_bandwidths(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_idle_flags(void); +int ifnet_set_idle_flags(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_latencies(void); +int ifnet_set_latencies(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_link_quality(void); +int ifnet_set_link_quality(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_output_sched_model(void); +int ifnet_set_output_sched_model(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_poll_params(void); +int ifnet_set_poll_params(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_rcvq_maxlen(void); +int ifnet_set_rcvq_maxlen(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_set_sndq_maxlen(void); +int ifnet_set_sndq_maxlen(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_start(void); +int ifnet_start(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_transmit_burst_end(void); +int ifnet_transmit_burst_end(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_transmit_burst_start(void); +int ifnet_transmit_burst_start(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_tx_compl_status(void); +int ifnet_tx_compl_status(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_flowid(void); +int ifnet_flowid(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_enable_output(void); +int ifnet_enable_output(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ifnet_disable_output(void); +int ifnet_disable_output(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int in6_localaddr(void); +int in6_localaddr(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int in_localaddr(void); +int in_localaddr(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int in6addr_local(void); +int in6addr_local(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int inaddr_local(void); +int inaddr_local(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int inp_clear_INP_INADDR_ANY(void); +int inp_clear_INP_INADDR_ANY(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ip_gre_output(void); +int ip_gre_output(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int m_cat(void); +int m_cat(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int m_free(void); +int m_free(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int m_freem(void); +int m_freem(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int m_get(void); +int m_get(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int m_gethdr(void); +int m_gethdr(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int m_mtod(void); +int m_mtod(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int m_prepend_2(void); +int m_prepend_2(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int m_pullup(void); +int m_pullup(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int m_split(void); +int m_split(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int m_trailingspace(void); +int m_trailingspace(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_get_driver_scratch(void); +int mbuf_get_driver_scratch(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_get_priority(void); +int mbuf_get_priority(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_get_service_class(void); +int mbuf_get_service_class(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_get_service_class_index(void); +int mbuf_get_service_class_index(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_get_service_class_max_count(void); +int mbuf_get_service_class_max_count(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_get_traffic_class_index(void); +int mbuf_get_traffic_class_index(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_get_traffic_class_max_count(void); +int mbuf_get_traffic_class_max_count(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_is_service_class_privileged(void); +int mbuf_is_service_class_privileged(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mbuf_pkthdr_aux_flags(void); +int mbuf_pkthdr_aux_flags(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int mcl_to_paddr(void); +int mcl_to_paddr(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int net_add_domain(void); +int net_add_domain(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int net_add_domain_old(void); +int net_add_domain_old(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int net_add_proto(void); +int net_add_proto(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int net_add_proto_old(void); +int net_add_proto_old(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int net_del_domain(void); +int net_del_domain(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int net_del_domain_old(void); +int net_del_domain_old(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int net_del_proto(void); +int net_del_proto(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int net_del_proto_old(void); +int net_del_proto_old(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int pffinddomain(void); +int pffinddomain(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int pffinddomain_old(void); +int pffinddomain_old(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int pffindproto(void); +int pffindproto(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int pffindproto_old(void); +int pffindproto_old(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int pru_abort_notsupp(void); +int pru_abort_notsupp(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int pru_accept_notsupp(void); +int pru_accept_notsupp(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int pru_bind_notsupp(void); +int pru_bind_notsupp(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int pru_connect2_notsupp(void); +int pru_connect2_notsupp(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int pru_connect_notsupp(void); +int pru_connect_notsupp(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int pru_disconnect_notsupp(void); +int pru_disconnect_notsupp(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int pru_listen_notsupp(void); +int pru_listen_notsupp(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int pru_peeraddr_notsupp(void); +int pru_peeraddr_notsupp(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int pru_rcvd_notsupp(void); +int pru_rcvd_notsupp(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int pru_rcvoob_notsupp(void); +int pru_rcvoob_notsupp(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int pru_send_notsupp(void); +int pru_send_notsupp(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int pru_sense_null(void); +int pru_sense_null(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int pru_shutdown_notsupp(void); +int pru_shutdown_notsupp(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int pru_sockaddr_notsupp(void); +int pru_sockaddr_notsupp(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int pru_sopoll_notsupp(void); +int pru_sopoll_notsupp(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sbappendaddr(void); +int sbappendaddr(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sbappendrecord(void); +int sbappendrecord(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sbflush(void); +int sbflush(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sbspace(void); +int sbspace(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int soabort(void); +int soabort(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int socantrcvmore(void); +int socantrcvmore(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int socantsendmore(void); +int socantsendmore(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_getlistener(void); +int sock_getlistener(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_gettclassopt(void); +int sock_gettclassopt(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_release(void); +int sock_release(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_retain(void); +int sock_retain(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_settclassopt(void); +int sock_settclassopt(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_catchevents(void); +int sock_catchevents(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_setupcall(void); +int sock_setupcall(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sock_setupcalls(void); +int sock_setupcalls(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sodisconnect(void); +int sodisconnect(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sofree(void); +int sofree(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sofreelastref(void); +int sofreelastref(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int soisconnected(void); +int soisconnected(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int soisconnecting(void); +int soisconnecting(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int soisdisconnected(void); +int soisdisconnected(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int soisdisconnecting(void); +int soisdisconnecting(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sonewconn(void); +int sonewconn(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sooptcopyin(void); +int sooptcopyin(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sooptcopyout(void); +int sooptcopyout(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sopoll(void); +int sopoll(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int soreceive(void); +int soreceive(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int soreserve(void); +int soreserve(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sorwakeup(void); +int sorwakeup(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int sosend(void); +int sosend(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + + + +int utun_ctl_disable_crypto_dtls(void); +int utun_ctl_disable_crypto_dtls(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int utun_ctl_register_dtls(void); +int utun_ctl_register_dtls(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int utun_pkt_dtls_input(void); +int utun_pkt_dtls_input(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + + +int dlil_resolve_multi(void); +int dlil_resolve_multi(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + + +int inet_cksum_simple(void); +int inet_cksum_simple(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + + +int arp_ip_handle_input(void); +int arp_ip_handle_input(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int arp_ifinit(void); +int arp_ifinit(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int arp_lookup_ip(void); +int arp_lookup_ip(void) +{ + panic("stub called in a config with no networking"); + return 0; +} + +int ip_gre_register_input(void); +int ip_gre_register_input(void) +{ + panic("stub called in a config with no networking"); + return 0; + +} + +#endif diff --git a/bsd/net/netsrc.c b/bsd/net/netsrc.c index 7501053e3..052858985 100644 --- a/bsd/net/netsrc.c +++ b/bsd/net/netsrc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 Apple Inc. All rights reserved. + * Copyright (c) 2011-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -207,8 +207,7 @@ netsrc_ipv6(kern_ctl_ref kctl, uint32_t unit, struct netsrc_req *nrq) return (EHOSTUNREACH); in6 = in6_selectsrc(dstsin6, NULL, NULL, &ro, NULL, &storage, nrq->nrq_ifscope, &error); - if (ro.ro_rt) - rtfree(ro.ro_rt); + ROUTE_RELEASE(&ro); if (!in6 || error) return (error); memset(&nrp, 0, sizeof(nrp)); @@ -229,6 +228,8 @@ netsrc_ipv6(kern_ctl_ref kctl, uint32_t unit, struct netsrc_req *nrq) nrp.nrp_flags |= NETSRC_IP6_FLAG_DEPRECATED; if (ia->ia6_flags & IN6_IFF_OPTIMISTIC) nrp.nrp_flags |= NETSRC_IP6_FLAG_OPTIMISTIC; + if (ia->ia6_flags & IN6_IFF_SECURED) + nrp.nrp_flags |= NETSRC_IP6_FLAG_SECURED; sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(sin6); memcpy(&sin6.sin6_addr, in6, sizeof(*in6)); diff --git a/bsd/net/netsrc.h b/bsd/net/netsrc.h index d93c4a014..13fcd456b 100644 --- a/bsd/net/netsrc.h +++ b/bsd/net/netsrc.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2011 Apple Inc. All rights reserved. + * Copyright (c) 2011-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ @@ -54,6 +54,7 @@ struct netsrc_rep { #define NETSRC_IP6_FLAG_TEMPORARY 0x0002 #define NETSRC_IP6_FLAG_DEPRECATED 0x0004 #define NETSRC_IP6_FLAG_OPTIMISTIC 0x0008 +#define NETSRC_IP6_FLAG_SECURED 0x0010 uint16_t nrp_flags; uint16_t nrp_label; uint16_t nrp_precedence; diff --git a/bsd/net/ntstat.c b/bsd/net/ntstat.c index eb83ac3c9..15ad48f34 100644 --- a/bsd/net/ntstat.c +++ b/bsd/net/ntstat.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 Apple Inc. All rights reserved. + * Copyright (c) 2010-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -34,6 +34,8 @@ #include #include #include +#include +#include #include #include @@ -44,6 +46,8 @@ #include #include +#include +#include #include #include @@ -62,22 +66,27 @@ __private_extern__ int nstat_collect = 1; SYSCTL_INT(_net, OID_AUTO, statistics, CTLFLAG_RW | CTLFLAG_LOCKED, &nstat_collect, 0, "Collect detailed statistics"); +static int nstat_privcheck = 0; +SYSCTL_INT(_net, OID_AUTO, statistics_privcheck, CTLFLAG_RW | CTLFLAG_LOCKED, + &nstat_privcheck, 0, "Entitlement check"); + enum { - NSTAT_FLAG_CLEANUP = (0x1 << 0), - NSTAT_FLAG_REQCOUNTS = (0x1 << 1) + NSTAT_FLAG_CLEANUP = (1 << 0), + NSTAT_FLAG_REQCOUNTS = (1 << 1), + NSTAT_FLAG_REQDESCS = (1 << 2) }; typedef struct nstat_control_state { struct nstat_control_state *ncs_next; - u_int32_t ncs_watching; + u_int32_t ncs_watching; decl_lck_mtx_data(, mtx); - kern_ctl_ref ncs_kctl; - u_int32_t ncs_unit; - nstat_src_ref_t ncs_next_srcref; - struct nstat_src *ncs_srcs; - u_int32_t ncs_flags; + kern_ctl_ref ncs_kctl; + u_int32_t ncs_unit; + nstat_src_ref_t ncs_next_srcref; + struct nstat_src *ncs_srcs; + u_int32_t ncs_flags; } nstat_control_state; typedef struct nstat_provider @@ -100,7 +109,8 @@ typedef struct nstat_src struct nstat_src *next; nstat_src_ref_t srcref; nstat_provider *provider; - nstat_provider_cookie_t cookie; + nstat_provider_cookie_t cookie; + uint32_t filter; } nstat_src; static errno_t nstat_control_send_counts(nstat_control_state *, @@ -219,6 +229,7 @@ nstat_lookup_entry( static void nstat_init_route_provider(void); static void nstat_init_tcp_provider(void); static void nstat_init_udp_provider(void); +static void nstat_init_ifnet_provider(void); __private_extern__ void nstat_init(void) @@ -237,6 +248,7 @@ nstat_init(void) nstat_init_route_provider(); nstat_init_tcp_provider(); nstat_init_udp_provider(); + nstat_init_ifnet_provider(); nstat_control_register(); } } @@ -370,6 +382,7 @@ nstat_route_counts( out_counts->nstat_min_rtt = rt_stats->nstat_min_rtt; out_counts->nstat_avg_rtt = rt_stats->nstat_avg_rtt; out_counts->nstat_var_rtt = rt_stats->nstat_var_rtt; + out_counts->nstat_cell_rxbytes = out_counts->nstat_cell_txbytes = 0; } else bzero(out_counts, sizeof(*out_counts)); @@ -731,15 +744,60 @@ nstat_route_rtt( #pragma mark -- TCP Provider -- +/* + * Due to the way the kernel deallocates a process (the process structure + * might be gone by the time we get the PCB detach notification), + * we need to cache the process name. Without this, proc_name() would + * return null and the process name would never be sent to userland. + */ +struct nstat_tcpudp_cookie { + struct inpcb *inp; + char pname[MAXCOMLEN+1]; +}; + +static struct nstat_tcpudp_cookie * +nstat_tcpudp_cookie_alloc( + struct inpcb *inp, + bool ref) +{ + struct nstat_tcpudp_cookie *cookie; + + cookie = OSMalloc(sizeof(*cookie), nstat_malloc_tag); + if (cookie == NULL) + return NULL; + if (ref && in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) + { + OSFree(cookie, sizeof(*cookie), nstat_malloc_tag); + return NULL; + } + bzero(cookie, sizeof(*cookie)); + cookie->inp = inp; + proc_name(inp->inp_socket->last_pid, cookie->pname, + sizeof(cookie->pname)); + + return cookie; +} + +static void +nstat_tcpudp_cookie_release( + struct nstat_tcpudp_cookie *cookie, + int inplock) +{ + in_pcb_checkstate(cookie->inp, WNT_RELEASE, inplock); + OSFree(cookie, sizeof(*cookie), nstat_malloc_tag); +} + static nstat_provider nstat_tcp_provider; static errno_t nstat_tcpudp_lookup( - struct inpcbinfo *inpinfo, - const void *data, - u_int32_t length, + struct inpcbinfo *inpinfo, + const void *data, + u_int32_t length, nstat_provider_cookie_t *out_cookie) { + struct inpcb *inp = NULL; + // parameter validation const nstat_tcp_add_param *param = (const nstat_tcp_add_param*)data; if (length < sizeof(*param)) @@ -754,7 +812,6 @@ nstat_tcpudp_lookup( return EINVAL; } - struct inpcb *inp = NULL; switch (param->local.v4.sin_family) { @@ -801,10 +858,14 @@ nstat_tcpudp_lookup( return EINVAL; } - if (inp == NULL) return ENOENT; + if (inp == NULL) + return ENOENT; // At this point we have a ref to the inpcb - *out_cookie = inp; + *out_cookie = nstat_tcpudp_cookie_alloc(inp, false); + if (*out_cookie == NULL) + in_pcb_checkstate(inp, WNT_RELEASE, 0); + return 0; } @@ -821,9 +882,15 @@ static int nstat_tcp_gone( nstat_provider_cookie_t cookie) { - struct inpcb *inp = (struct inpcb*)cookie; - struct tcpcb *tp = intotcpcb(inp); - return (inp->inp_state == INPCB_STATE_DEAD || tp->t_state == TCPS_TIME_WAIT) ? 1 : 0; + struct nstat_tcpudp_cookie *tucookie = + (struct nstat_tcpudp_cookie *)cookie; + struct inpcb *inp; + struct tcpcb *tp; + + return (!(inp = tucookie->inp) || + !(tp = intotcpcb(inp)) || + inp->inp_state == INPCB_STATE_DEAD || + tp->t_state == TCPS_TIME_WAIT) ? 1 : 0; } static errno_t @@ -832,18 +899,23 @@ nstat_tcp_counts( struct nstat_counts *out_counts, int *out_gone) { - struct inpcb *inp = (struct inpcb*)cookie; - struct tcpcb *tp = intotcpcb(inp); - + struct nstat_tcpudp_cookie *tucookie = + (struct nstat_tcpudp_cookie *)cookie; + struct inpcb *inp; + bzero(out_counts, sizeof(*out_counts)); *out_gone = 0; // if the pcb is in the dead state, we should stop using it - if (inp->inp_state == INPCB_STATE_DEAD || tp->t_state == TCPS_TIME_WAIT) + if (nstat_tcp_gone(cookie)) { *out_gone = 1; - } + if (!(inp = tucookie->inp) || !intotcpcb(inp)) + return EINVAL; + } + inp = tucookie->inp; + struct tcpcb *tp = intotcpcb(inp); atomic_get_64(out_counts->nstat_rxpackets, &inp->inp_stat->rxpackets); atomic_get_64(out_counts->nstat_rxbytes, &inp->inp_stat->rxbytes); @@ -859,6 +931,10 @@ nstat_tcp_counts( out_counts->nstat_var_rtt = tp->t_rttvar; if (out_counts->nstat_avg_rtt < out_counts->nstat_min_rtt) out_counts->nstat_min_rtt = out_counts->nstat_avg_rtt; + atomic_get_64(out_counts->nstat_cell_rxbytes, &inp->inp_cstat->rxbytes); + atomic_get_64(out_counts->nstat_cell_txbytes, &inp->inp_cstat->txbytes); + atomic_get_64(out_counts->nstat_wifi_rxbytes, &inp->inp_wstat->rxbytes); + atomic_get_64(out_counts->nstat_wifi_txbytes, &inp->inp_wstat->txbytes); return 0; } @@ -868,8 +944,10 @@ nstat_tcp_release( nstat_provider_cookie_t cookie, int locked) { - struct inpcb *inp = (struct inpcb*)cookie; - in_pcb_checkstate(inp, WNT_RELEASE, locked); + struct nstat_tcpudp_cookie *tucookie = + (struct nstat_tcpudp_cookie *)cookie; + + nstat_tcpudp_cookie_release(tucookie, locked); } static errno_t @@ -878,23 +956,25 @@ nstat_tcp_add_watcher( { OSIncrementAtomic(&nstat_tcp_watchers); - lck_rw_lock_shared(tcbinfo.mtx); + lck_rw_lock_shared(tcbinfo.ipi_lock); // Add all current tcp inpcbs. Ignore those in timewait struct inpcb *inp; - for (inp = LIST_FIRST(tcbinfo.listhead); inp; inp = LIST_NEXT(inp, inp_list)) + struct nstat_tcpudp_cookie *cookie; + for (inp = LIST_FIRST(tcbinfo.ipi_listhead); inp; inp = LIST_NEXT(inp, inp_list)) { - if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) + cookie = nstat_tcpudp_cookie_alloc(inp, true); + if (cookie == NULL) continue; - - if (nstat_control_source_add(0, state, &nstat_tcp_provider, inp) != 0) + if (nstat_control_source_add(0, state, &nstat_tcp_provider, + cookie) != 0) { - in_pcb_checkstate(inp, WNT_RELEASE, 0); + nstat_tcpudp_cookie_release(cookie, false); break; } } - lck_rw_done(tcbinfo.mtx); + lck_rw_done(tcbinfo.ipi_lock); return 0; } @@ -910,6 +990,8 @@ __private_extern__ void nstat_tcp_new_pcb( struct inpcb *inp) { + struct nstat_tcpudp_cookie *cookie; + if (nstat_tcp_watchers == 0) return; @@ -921,13 +1003,14 @@ nstat_tcp_new_pcb( { // this client is watching tcp // acquire a reference for it - if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) - break; - + cookie = nstat_tcpudp_cookie_alloc(inp, true); + if (cookie == NULL) + continue; // add the source, if that fails, release the reference - if (nstat_control_source_add(0, state, &nstat_tcp_provider, inp) != 0) + if (nstat_control_source_add(0, state, + &nstat_tcp_provider, cookie) != 0) { - in_pcb_checkstate(inp, WNT_RELEASE, 0); + nstat_tcpudp_cookie_release(cookie, false); break; } } @@ -941,6 +1024,7 @@ nstat_pcb_detach(struct inpcb *inp) nstat_control_state *state; nstat_src *src, *prevsrc; nstat_src *dead_list = NULL; + struct nstat_tcpudp_cookie *tucookie; if (inp == NULL || (nstat_tcp_watchers == 0 && nstat_udp_watchers == 0)) return; @@ -949,9 +1033,12 @@ nstat_pcb_detach(struct inpcb *inp) for (state = nstat_controls; state; state = state->ncs_next) { lck_mtx_lock(&state->mtx); for (prevsrc = NULL, src = state->ncs_srcs; src; - prevsrc = src, src = src->next) - if (src->cookie == inp) + prevsrc = src, src = src->next) + { + tucookie = (struct nstat_tcpudp_cookie *)src->cookie; + if (tucookie->inp == inp) break; + } if (src) { // send one last counts notification @@ -986,21 +1073,22 @@ nstat_pcb_detach(struct inpcb *inp) static errno_t nstat_tcp_copy_descriptor( nstat_provider_cookie_t cookie, - void *data, - u_int32_t len) + void *data, + u_int32_t len) { if (len < sizeof(nstat_tcp_descriptor)) { return EINVAL; } - - nstat_tcp_descriptor *desc = (nstat_tcp_descriptor*)data; - struct inpcb *inp = (struct inpcb*)cookie; - struct tcpcb *tp = intotcpcb(inp); - if (inp->inp_state == INPCB_STATE_DEAD) + if (nstat_tcp_gone(cookie)) return EINVAL; + nstat_tcp_descriptor *desc = (nstat_tcp_descriptor*)data; + struct nstat_tcpudp_cookie *tucookie = + (struct nstat_tcpudp_cookie *)cookie; + struct inpcb *inp = tucookie->inp; + struct tcpcb *tp = intotcpcb(inp); bzero(desc, sizeof(*desc)); if (inp->inp_vflag & INP_IPV6) @@ -1035,10 +1123,28 @@ nstat_tcp_copy_descriptor( desc->upid = so->last_upid; desc->pid = so->last_pid; desc->traffic_class = so->so_traffic_class; - proc_name(desc->pid, desc->pname, sizeof(desc->pname)); - desc->pname[sizeof(desc->pname) - 1] = 0; - + if (desc->pname == NULL || desc->pname[0] == 0) + { + strlcpy(desc->pname, tucookie->pname, + sizeof(desc->pname)); + } + else + { + desc->pname[sizeof(desc->pname) - 1] = 0; + strlcpy(tucookie->pname, desc->pname, + sizeof(tucookie->pname)); + } + memcpy(desc->uuid, so->last_uuid, sizeof(so->last_uuid)); + if (so->so_flags & SOF_DELEGATED) { + desc->eupid = so->e_upid; + desc->epid = so->e_pid; + memcpy(desc->euuid, so->e_uuid, sizeof(so->e_uuid)); + } else { + desc->eupid = desc->upid; + desc->epid = desc->pid; + memcpy(desc->euuid, desc->uuid, sizeof(desc->uuid)); + } desc->sndbufsize = so->so_snd.sb_hiwat; desc->sndbufused = so->so_snd.sb_cc; desc->rcvbufsize = so->so_rcv.sb_hiwat; @@ -1082,30 +1188,42 @@ static int nstat_udp_gone( nstat_provider_cookie_t cookie) { - struct inpcb *inp = (struct inpcb*)cookie; - return (inp->inp_state == INPCB_STATE_DEAD) ? 1 : 0; + struct nstat_tcpudp_cookie *tucookie = + (struct nstat_tcpudp_cookie *)cookie; + struct inpcb *inp; + + return (!(inp = tucookie->inp) || + inp->inp_state == INPCB_STATE_DEAD) ? 1 : 0; } static errno_t nstat_udp_counts( nstat_provider_cookie_t cookie, - struct nstat_counts *out_counts, - int *out_gone) + struct nstat_counts *out_counts, + int *out_gone) { - struct inpcb *inp = (struct inpcb*)cookie; + struct nstat_tcpudp_cookie *tucookie = + (struct nstat_tcpudp_cookie *)cookie; *out_gone = 0; // if the pcb is in the dead state, we should stop using it - if (inp->inp_state == INPCB_STATE_DEAD) + if (nstat_udp_gone(cookie)) { *out_gone = 1; + if (!tucookie->inp) + return EINVAL; } + struct inpcb *inp = tucookie->inp; atomic_get_64(out_counts->nstat_rxpackets, &inp->inp_stat->rxpackets); atomic_get_64(out_counts->nstat_rxbytes, &inp->inp_stat->rxbytes); atomic_get_64(out_counts->nstat_txpackets, &inp->inp_stat->txpackets); atomic_get_64(out_counts->nstat_txbytes, &inp->inp_stat->txbytes); + atomic_get_64(out_counts->nstat_cell_rxbytes, &inp->inp_cstat->rxbytes); + atomic_get_64(out_counts->nstat_cell_txbytes, &inp->inp_cstat->txbytes); + atomic_get_64(out_counts->nstat_wifi_rxbytes, &inp->inp_wstat->rxbytes); + atomic_get_64(out_counts->nstat_wifi_txbytes, &inp->inp_wstat->txbytes); return 0; } @@ -1115,33 +1233,37 @@ nstat_udp_release( nstat_provider_cookie_t cookie, int locked) { - struct inpcb *inp = (struct inpcb*)cookie; - in_pcb_checkstate(inp, WNT_RELEASE, locked); + struct nstat_tcpudp_cookie *tucookie = + (struct nstat_tcpudp_cookie *)cookie; + + nstat_tcpudp_cookie_release(tucookie, locked); } static errno_t nstat_udp_add_watcher( nstat_control_state *state) { + struct inpcb *inp; + struct nstat_tcpudp_cookie *cookie; + OSIncrementAtomic(&nstat_udp_watchers); - lck_rw_lock_shared(tcbinfo.mtx); - - // Add all current tcp inpcbs. Ignore those in timewait - struct inpcb *inp; - for (inp = LIST_FIRST(udbinfo.listhead); inp; inp = LIST_NEXT(inp, inp_list)) + lck_rw_lock_shared(udbinfo.ipi_lock); + // Add all current UDP inpcbs. + for (inp = LIST_FIRST(udbinfo.ipi_listhead); inp; inp = LIST_NEXT(inp, inp_list)) { - if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) + cookie = nstat_tcpudp_cookie_alloc(inp, true); + if (cookie == NULL) continue; - - if (nstat_control_source_add(0, state, &nstat_udp_provider, inp) != 0) + if (nstat_control_source_add(0, state, &nstat_udp_provider, + cookie) != 0) { - in_pcb_checkstate(inp, WNT_RELEASE, 0); + nstat_tcpudp_cookie_release(cookie, false); break; } } - lck_rw_done(tcbinfo.mtx); + lck_rw_done(udbinfo.ipi_lock); return 0; } @@ -1157,6 +1279,8 @@ __private_extern__ void nstat_udp_new_pcb( struct inpcb *inp) { + struct nstat_tcpudp_cookie *cookie; + if (nstat_udp_watchers == 0) return; @@ -1168,13 +1292,14 @@ nstat_udp_new_pcb( { // this client is watching tcp // acquire a reference for it - if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) - break; - + cookie = nstat_tcpudp_cookie_alloc(inp, true); + if (cookie == NULL) + continue; // add the source, if that fails, release the reference - if (nstat_control_source_add(0, state, &nstat_udp_provider, inp) != 0) + if (nstat_control_source_add(0, state, + &nstat_udp_provider, cookie) != 0) { - in_pcb_checkstate(inp, WNT_RELEASE, 0); + nstat_tcpudp_cookie_release(cookie, false); break; } } @@ -1193,12 +1318,14 @@ nstat_udp_copy_descriptor( return EINVAL; } - nstat_udp_descriptor *desc = (nstat_udp_descriptor*)data; - struct inpcb *inp = (struct inpcb*)cookie; - - if (inp->inp_state == INPCB_STATE_DEAD) + if (nstat_udp_gone(cookie)) return EINVAL; + struct nstat_tcpudp_cookie *tucookie = + (struct nstat_tcpudp_cookie *)cookie; + nstat_udp_descriptor *desc = (nstat_udp_descriptor*)data; + struct inpcb *inp = tucookie->inp; + bzero(desc, sizeof(*desc)); if (inp->inp_vflag & INP_IPV6) @@ -1226,13 +1353,31 @@ nstat_udp_copy_descriptor( // they're in sync? desc->upid = so->last_upid; desc->pid = so->last_pid; - + proc_name(desc->pid, desc->pname, sizeof(desc->pname)); + if (desc->pname == NULL || desc->pname[0] == 0) + { + strlcpy(desc->pname, tucookie->pname, + sizeof(desc->pname)); + } + else + { + desc->pname[sizeof(desc->pname) - 1] = 0; + strlcpy(tucookie->pname, desc->pname, + sizeof(tucookie->pname)); + } + memcpy(desc->uuid, so->last_uuid, sizeof(so->last_uuid)); + if (so->so_flags & SOF_DELEGATED) { + desc->eupid = so->e_upid; + desc->epid = so->e_pid; + memcpy(desc->euuid, so->e_uuid, sizeof(so->e_uuid)); + } else { + desc->eupid = desc->upid; + desc->epid = desc->pid; + memcpy(desc->euuid, desc->uuid, sizeof(desc->uuid)); + } desc->rcvbufsize = so->so_rcv.sb_hiwat; desc->rcvbufused = so->so_rcv.sb_cc; desc->traffic_class = so->so_traffic_class; - - proc_name(desc->pid, desc->pname, sizeof(desc->pname)); - desc->pname[sizeof(desc->pname) - 1] = 0; } return 0; @@ -1255,6 +1400,270 @@ nstat_init_udp_provider(void) nstat_providers = &nstat_udp_provider; } +#pragma mark -- ifnet Provider -- + +static nstat_provider nstat_ifnet_provider; + +/* + * We store a pointer to the ifnet and the original threshold + * requested by the client. + */ +struct nstat_ifnet_cookie +{ + struct ifnet *ifp; + uint64_t threshold; +}; + +static errno_t +nstat_ifnet_lookup( + const void *data, + u_int32_t length, + nstat_provider_cookie_t *out_cookie) +{ + const nstat_ifnet_add_param *param = (nstat_ifnet_add_param *)data; + struct ifnet *ifp; + boolean_t changed = FALSE; + nstat_control_state *state; + nstat_src *src; + struct nstat_ifnet_cookie *cookie; + + if (length < sizeof(*param) || param->threshold < 1024*1024) + return EINVAL; + if (nstat_privcheck != 0) { + errno_t result = priv_check_cred(kauth_cred_get(), + PRIV_NET_PRIVILEGED_NETWORK_STATISTICS, 0); + if (result != 0) + return result; + } + cookie = OSMalloc(sizeof(*cookie), nstat_malloc_tag); + if (cookie == NULL) + return ENOMEM; + bzero(cookie, sizeof(*cookie)); + + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet_head, if_link) + { + ifnet_lock_exclusive(ifp); + if (ifp->if_index == param->ifindex) + { + cookie->ifp = ifp; + cookie->threshold = param->threshold; + *out_cookie = cookie; + if (!ifp->if_data_threshold || + ifp->if_data_threshold > param->threshold) + { + changed = TRUE; + ifp->if_data_threshold = param->threshold; + } + ifnet_lock_done(ifp); + ifnet_reference(ifp); + break; + } + ifnet_lock_done(ifp); + } + ifnet_head_done(); + + /* + * When we change the threshold to something smaller, we notify + * all of our clients with a description message. + * We won't send a message to the client we are currently serving + * because it has no `ifnet source' yet. + */ + if (changed) + { + lck_mtx_lock(&nstat_mtx); + for (state = nstat_controls; state; state = state->ncs_next) + { + lck_mtx_lock(&state->mtx); + for (src = state->ncs_srcs; src; src = src->next) + { + if (src->provider != &nstat_ifnet_provider) + continue; + nstat_control_send_description(state, src, 0); + } + lck_mtx_unlock(&state->mtx); + } + lck_mtx_unlock(&nstat_mtx); + } + if (cookie->ifp == NULL) + OSFree(cookie, sizeof(*cookie), nstat_malloc_tag); + + return ifp ? 0 : EINVAL; +} + +static int +nstat_ifnet_gone( + nstat_provider_cookie_t cookie) +{ + struct ifnet *ifp; + struct nstat_ifnet_cookie *ifcookie = + (struct nstat_ifnet_cookie *)cookie; + + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet_head, if_link) + { + if (ifp == ifcookie->ifp) + break; + } + ifnet_head_done(); + + return ifp ? 0 : 1; +} + +static errno_t +nstat_ifnet_counts( + nstat_provider_cookie_t cookie, + struct nstat_counts *out_counts, + int *out_gone) +{ + struct nstat_ifnet_cookie *ifcookie = + (struct nstat_ifnet_cookie *)cookie; + struct ifnet *ifp = ifcookie->ifp; + + *out_gone = 0; + + // if the ifnet is gone, we should stop using it + if (nstat_ifnet_gone(cookie)) + { + *out_gone = 1; + return EINVAL; + } + + bzero(out_counts, sizeof(*out_counts)); + out_counts->nstat_rxpackets = ifp->if_ipackets; + out_counts->nstat_rxbytes = ifp->if_ibytes; + out_counts->nstat_txpackets = ifp->if_opackets; + out_counts->nstat_txbytes = ifp->if_obytes; + out_counts->nstat_cell_rxbytes = out_counts->nstat_cell_txbytes = 0; + + return 0; +} + +static void +nstat_ifnet_release( + nstat_provider_cookie_t cookie, + __unused int locked) +{ + struct nstat_ifnet_cookie *ifcookie; + struct ifnet *ifp; + nstat_control_state *state; + nstat_src *src; + uint64_t minthreshold = UINT64_MAX; + + /* + * Find all the clients that requested a threshold + * for this ifnet and re-calculate if_data_threshold. + */ + lck_mtx_lock(&nstat_mtx); + for (state = nstat_controls; state; state = state->ncs_next) + { + lck_mtx_lock(&state->mtx); + for (src = state->ncs_srcs; src; src = src->next) + { + /* Skip the provider we are about to detach. */ + if (src->provider != &nstat_ifnet_provider || + src->cookie == cookie) + continue; + ifcookie = (struct nstat_ifnet_cookie *)src->cookie; + if (ifcookie->threshold < minthreshold) + minthreshold = ifcookie->threshold; + } + lck_mtx_unlock(&state->mtx); + } + lck_mtx_unlock(&nstat_mtx); + /* + * Reset if_data_threshold or disable it. + */ + ifcookie = (struct nstat_ifnet_cookie *)cookie; + ifp = ifcookie->ifp; + if (ifnet_is_attached(ifp, 1)) { + ifnet_lock_exclusive(ifp); + if (minthreshold == UINT64_MAX) + ifp->if_data_threshold = 0; + else + ifp->if_data_threshold = minthreshold; + ifnet_lock_done(ifp); + ifnet_decr_iorefcnt(ifp); + } + ifnet_release(ifp); + OSFree(ifcookie, sizeof(*ifcookie), nstat_malloc_tag); +} + +static errno_t +nstat_ifnet_copy_descriptor( + nstat_provider_cookie_t cookie, + void *data, + u_int32_t len) +{ + nstat_ifnet_descriptor *desc = (nstat_ifnet_descriptor *)data; + struct nstat_ifnet_cookie *ifcookie = + (struct nstat_ifnet_cookie *)cookie; + struct ifnet *ifp = ifcookie->ifp; + + if (len < sizeof(nstat_ifnet_descriptor)) + return EINVAL; + + if (nstat_ifnet_gone(cookie)) + return EINVAL; + + bzero(desc, sizeof(*desc)); + ifnet_lock_shared(ifp); + strlcpy(desc->name, ifp->if_xname, sizeof(desc->name)); + desc->ifindex = ifp->if_index; + desc->threshold = ifp->if_data_threshold; + desc->type = ifp->if_type; + if (ifp->if_desc.ifd_len < sizeof(desc->description)) + memcpy(desc->description, ifp->if_desc.ifd_desc, + sizeof(desc->description)); + ifnet_lock_done(ifp); + + return 0; +} + +static void +nstat_init_ifnet_provider(void) +{ + bzero(&nstat_ifnet_provider, sizeof(nstat_ifnet_provider)); + nstat_ifnet_provider.nstat_provider_id = NSTAT_PROVIDER_IFNET; + nstat_ifnet_provider.nstat_descriptor_length = sizeof(nstat_ifnet_descriptor); + nstat_ifnet_provider.nstat_lookup = nstat_ifnet_lookup; + nstat_ifnet_provider.nstat_gone = nstat_ifnet_gone; + nstat_ifnet_provider.nstat_counts = nstat_ifnet_counts; + nstat_ifnet_provider.nstat_watcher_add = NULL; + nstat_ifnet_provider.nstat_watcher_remove = NULL; + nstat_ifnet_provider.nstat_copy_descriptor = nstat_ifnet_copy_descriptor; + nstat_ifnet_provider.nstat_release = nstat_ifnet_release; + nstat_ifnet_provider.next = nstat_providers; + nstat_providers = &nstat_ifnet_provider; +} + +__private_extern__ void +nstat_ifnet_threshold_reached(unsigned int ifindex) +{ + nstat_control_state *state; + nstat_src *src; + struct ifnet *ifp; + struct nstat_ifnet_cookie *ifcookie; + + lck_mtx_lock(&nstat_mtx); + for (state = nstat_controls; state; state = state->ncs_next) + { + lck_mtx_lock(&state->mtx); + for (src = state->ncs_srcs; src; src = src->next) + { + if (src->provider != &nstat_ifnet_provider) + continue; + ifcookie = (struct nstat_ifnet_cookie *)src->cookie; + ifp = ifcookie->ifp; + if (ifp->if_index != ifindex) + continue; + nstat_control_send_counts(state, src, 0, NULL); + } + lck_mtx_unlock(&state->mtx); + } + lck_mtx_unlock(&nstat_mtx); +} + #pragma mark -- Kernel Control Socket -- static kern_ctl_ref nstat_ctlref = NULL; @@ -1265,6 +1674,7 @@ static errno_t nstat_control_disconnect(kern_ctl_ref kctl, u_int32_t unit, void static errno_t nstat_control_send(kern_ctl_ref kctl, u_int32_t unit, void *uinfo, mbuf_t m, int flags); + static void* nstat_idle_check( __unused thread_call_param_t p0, @@ -1407,7 +1817,7 @@ static errno_t nstat_control_disconnect( __unused kern_ctl_ref kctl, __unused u_int32_t unit, - __unused void *uinfo) + void *uinfo) { u_int32_t watching; nstat_control_state *state = (nstat_control_state*)uinfo; @@ -1458,7 +1868,7 @@ nstat_control_disconnect( // clean it up nstat_control_cleanup_source(NULL, src, FALSE); } - + lck_mtx_destroy(&state->mtx, nstat_lck_grp); OSFree(state, sizeof(*state), nstat_malloc_tag); return 0; @@ -1510,8 +1920,14 @@ nstat_control_send_counts( bzero(&counts.counts, sizeof(counts.counts)); if (src->provider->nstat_counts(src->cookie, &counts.counts, &localgone) == 0) { - result = ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, &counts, - sizeof(counts), CTL_DATA_EOR); + if ((src->filter & NSTAT_FILTER_NOZEROBYTES) && + counts.counts.nstat_rxbytes == 0 && + counts.counts.nstat_txbytes == 0) + result = EAGAIN; + else + result = ctl_enqueuedata(state->ncs_kctl, + state->ncs_unit, &counts, sizeof(counts), + CTL_DATA_EOR); } if (gone) *gone = localgone; @@ -1590,7 +2006,7 @@ nstat_control_handle_add_request( mbuf_t m) { errno_t result; - + // Verify the header fits in the first mbuf if (mbuf_len(m) < offsetof(nstat_msg_add_src_req, param)) { @@ -1654,6 +2070,13 @@ nstat_control_handle_add_all( if (!provider) return ENOENT; if (provider->nstat_watcher_add == NULL) return ENOTSUP; + if (nstat_privcheck != 0) { + result = priv_check_cred(kauth_cred_get(), + PRIV_NET_PRIVILEGED_NETWORK_STATISTICS, 0); + if (result != 0) + return result; + } + // Make sure we don't add the provider twice lck_mtx_lock(&state->mtx); if ((state->ncs_watching & (1 << provider->nstat_provider_id)) != 0) @@ -1661,7 +2084,7 @@ nstat_control_handle_add_all( state->ncs_watching |= (1 << provider->nstat_provider_id); lck_mtx_unlock(&state->mtx); if (result != 0) return result; - + result = provider->nstat_watcher_add(state); if (result != 0) { @@ -1726,6 +2149,7 @@ nstat_control_source_add( } src->provider = provider; src->cookie = cookie; + src->filter = 0; // send the source added message errno_t result = ctl_enqueuembuf(state->ncs_kctl, state->ncs_unit, msg, CTL_DATA_EOR); @@ -1814,6 +2238,7 @@ nstat_control_handle_query_request( int gone; gone = 0; + // XXX ignore IFACE types? if (req.srcref == NSTAT_SRC_REF_ALL || (*srcpp)->srcref == req.srcref) { @@ -1876,41 +2301,81 @@ nstat_control_handle_query_request( static errno_t nstat_control_handle_get_src_description( nstat_control_state *state, - mbuf_t m) + mbuf_t m) { nstat_msg_get_src_description req; + errno_t result; + nstat_src *src; + if (mbuf_copydata(m, 0, sizeof(req), &req) != 0) { return EINVAL; } - // Find the source lck_mtx_lock(&state->mtx); - nstat_src *src; + if (req.srcref == NSTAT_SRC_REF_ALL) + state->ncs_flags |= NSTAT_FLAG_REQDESCS; for (src = state->ncs_srcs; src; src = src->next) + if (req.srcref == NSTAT_SRC_REF_ALL || + src->srcref == req.srcref) + { + result = nstat_control_send_description(state, src, + req.hdr.context); + if (result != 0) + state->ncs_flags &= ~NSTAT_FLAG_REQDESCS; + if (req.srcref != NSTAT_SRC_REF_ALL) + break; + } + lck_mtx_unlock(&state->mtx); + if (req.srcref != NSTAT_SRC_REF_ALL && src == NULL) + result = ENOENT; + else if (req.srcref == NSTAT_SRC_REF_ALL) { - if (src->srcref == req.srcref) - break; - } - - // No source? Done. - if (!src) - { - lck_mtx_unlock(&state->mtx); - return ENOENT; + nstat_msg_hdr success; + success.context = req.hdr.context; + success.type = NSTAT_MSG_TYPE_SUCCESS; + success.pad = 0; + ctl_enqueuedata(state->ncs_kctl, state->ncs_unit, &success, sizeof(success), CTL_DATA_EOR); + result = 0; } - errno_t result = nstat_control_send_description(state, src, req.hdr.context); - lck_mtx_unlock(&state->mtx); - return result; } +static errno_t +nstat_control_handle_set_filter( + nstat_control_state *state, + mbuf_t m) +{ + nstat_msg_set_filter req; + nstat_src *src; + + if (mbuf_copydata(m, 0, sizeof(req), &req) != 0) + return EINVAL; + if (req.srcref == NSTAT_SRC_REF_ALL || + req.srcref == NSTAT_SRC_REF_INVALID) + return EINVAL; + + lck_mtx_lock(&state->mtx); + for (src = state->ncs_srcs; src; src = src->next) + if (req.srcref == src->srcref) + { + src->filter = req.filter; + break; + } + lck_mtx_unlock(&state->mtx); + if (src == NULL) + return ENOENT; + + return 0; + +} + static errno_t nstat_control_send( kern_ctl_ref kctl, u_int32_t unit, - __unused void *uinfo, + void *uinfo, mbuf_t m, __unused int flags) { @@ -1957,7 +2422,11 @@ nstat_control_send( case NSTAT_MSG_TYPE_GET_SRC_DESC: result = nstat_control_handle_get_src_description(state, m); break; - + + case NSTAT_MSG_TYPE_SET_FILTER: + result = nstat_control_handle_set_filter(state, m); + break; + default: result = EINVAL; break; diff --git a/bsd/net/ntstat.h b/bsd/net/ntstat.h index 4696d89ff..6acef925b 100644 --- a/bsd/net/ntstat.h +++ b/bsd/net/ntstat.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2011 Apple Inc. All rights reserved. + * Copyright (c) 2010-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -28,12 +28,14 @@ #ifndef __NTSTAT_H__ #define __NTSTAT_H__ #include +#include +#include #ifdef PRIVATE #pragma pack(push, 4) #pragma mark -- Common Data Structures -- -#define __NSTAT_REVISION__ 1 +#define __NSTAT_REVISION__ 4 typedef u_int32_t nstat_provider_id_t; typedef u_int32_t nstat_src_ref_t; @@ -56,6 +58,11 @@ typedef struct nstat_counts u_int32_t nstat_min_rtt; u_int32_t nstat_avg_rtt; u_int32_t nstat_var_rtt; + + u_int64_t nstat_cell_rxbytes __attribute__((aligned(8))); + u_int64_t nstat_cell_txbytes __attribute__((aligned(8))); + u_int64_t nstat_wifi_rxbytes __attribute__((aligned(8))); + u_int64_t nstat_wifi_txbytes __attribute__((aligned(8))); } nstat_counts; #pragma mark -- Network Statistics Providers -- @@ -63,8 +70,9 @@ typedef struct nstat_counts enum { NSTAT_PROVIDER_ROUTE = 1 - ,NSTAT_PROVIDER_TCP = 2 - ,NSTAT_PROVIDER_UDP = 3 + ,NSTAT_PROVIDER_TCP = 2 + ,NSTAT_PROVIDER_UDP = 3 + ,NSTAT_PROVIDER_IFNET = 4 }; typedef struct nstat_route_add_param @@ -126,6 +134,11 @@ typedef struct nstat_tcp_descriptor u_int64_t upid; u_int32_t pid; char pname[64]; + u_int64_t eupid; + u_int32_t epid; + + uint8_t uuid[16]; + uint8_t euuid[16]; } nstat_tcp_descriptor; typedef struct nstat_tcp_add_param nstat_udp_add_param; @@ -153,6 +166,11 @@ typedef struct nstat_udp_descriptor u_int64_t upid; u_int32_t pid; char pname[64]; + u_int64_t eupid; + u_int32_t epid; + + uint8_t uuid[16]; + uint8_t euuid[16]; } nstat_udp_descriptor; typedef struct nstat_route_descriptor @@ -187,6 +205,24 @@ typedef struct nstat_route_descriptor } nstat_route_descriptor; +typedef struct nstat_ifnet_add_param +{ + u_int32_t ifindex; + u_int64_t threshold; +} nstat_ifnet_add_param; + +#ifndef IF_DESCSIZE +#define IF_DESCSIZE 128 +#endif +typedef struct nstat_ifnet_descriptor +{ + char name[IFNAMSIZ+1]; + u_int32_t ifindex; + u_int64_t threshold; + unsigned int type; + char description[IF_DESCSIZE]; +} nstat_ifnet_descriptor; + #pragma mark -- Network Statistics User Client -- #define NET_STAT_CONTROL_NAME "com.apple.network.statistics" @@ -199,10 +235,11 @@ enum // Requests ,NSTAT_MSG_TYPE_ADD_SRC = 1001 - ,NSTAT_MSG_TYPE_ADD_ALL_SRCS = 1002 + ,NSTAT_MSG_TYPE_ADD_ALL_SRCS = 1002 ,NSTAT_MSG_TYPE_REM_SRC = 1003 ,NSTAT_MSG_TYPE_QUERY_SRC = 1004 - ,NSTAT_MSG_TYPE_GET_SRC_DESC = 1005 + ,NSTAT_MSG_TYPE_GET_SRC_DESC = 1005 + ,NSTAT_MSG_TYPE_SET_FILTER = 1006 // Responses/Notfications ,NSTAT_MSG_TYPE_SRC_ADDED = 10001 @@ -213,10 +250,15 @@ enum enum { - NSTAT_SRC_REF_ALL = 0xffffffff + NSTAT_SRC_REF_ALL = 0xffffffff ,NSTAT_SRC_REF_INVALID = 0 }; +enum +{ + NSTAT_FILTER_NOZEROBYTES = 0x01, +}; + typedef struct nstat_msg_hdr { u_int64_t context; @@ -262,6 +304,13 @@ typedef struct nstat_msg_get_src_description nstat_src_ref_t srcref; } nstat_msg_get_src_description; +typedef struct nstat_msg_set_filter +{ + nstat_msg_hdr hdr; + nstat_src_ref_t srcref; + u_int32_t filter; +} nstat_msg_set_filter; + typedef struct nstat_msg_src_description { nstat_msg_hdr hdr; @@ -334,6 +383,9 @@ void nstat_udp_new_pcb(struct inpcb *inp); void nstat_route_new_entry(struct rtentry *rt); void nstat_pcb_detach(struct inpcb *inp); + +void nstat_ifnet_threshold_reached(unsigned int ifindex); + // locked_add_64 uses atomic operations on 32bit so the 64bit // value can be properly read. The values are only ever incremented // while under the socket lock, so on 64bit we don't actually need diff --git a/bsd/net/pf.c b/bsd/net/pf.c index 50fc5bd03..55e1c27ef 100644 --- a/bsd/net/pf.c +++ b/bsd/net/pf.c @@ -1,5 +1,5 @@ -/* - * Copyright (c) 2007-2012 Apple Inc. All rights reserved. +/* + * Copyright (c) 2007-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -76,6 +76,7 @@ #include #include #include +#include #include #include @@ -86,6 +87,7 @@ #include #include #include +#include #include #include @@ -124,6 +126,12 @@ #include #endif /* DUMMYNET */ +/* + * For RandomULong(), to get a 32 bits random value + * Note that random() returns a 31 bits value, see rdar://11159750 + */ +#include + #define DPFPRINTF(n, x) (pf_status.debug >= (n) ? printf x : ((void)0)) /* @@ -398,9 +406,14 @@ pf_state_lookup_aux(struct pf_state **state, struct pfi_kif *kif, do { \ int action; \ *state = pf_find_state(kif, &key, direction); \ - if (*state != NULL && pd != NULL && \ - pd->flowhash == 0) { \ + if (*state != NULL && pd != NULL && \ + !(pd->pktflags & PKTF_FLOW_ID)) { \ + pd->flowsrc = (*state)->state_key->flowsrc; \ pd->flowhash = (*state)->state_key->flowhash; \ + if (pd->flowhash != 0) { \ + pd->pktflags |= PKTF_FLOW_ID; \ + pd->pktflags &= ~PKTF_FLOW_ADV; \ + } \ } \ if (pf_state_lookup_aux(state, kif, direction, &action)) \ return (action); \ @@ -1469,6 +1482,13 @@ pf_purge_thread_cont(int err) static u_int32_t nloops = 0; int t = 1; /* 1 second */ + /* + * Update coarse-grained networking timestamp (in sec.); the idea + * is to piggy-back on the periodic timeout callout to update + * the counter returnable via net_uptime(). + */ + net_update_uptime(); + lck_rw_lock_shared(pf_perim_lock); lck_mtx_lock(pf_lock); @@ -2012,6 +2032,7 @@ u_int32_t pf_calc_state_key_flowhash(struct pf_state_key *sk) { struct pf_flowhash_key fh __attribute__((aligned(8))); + uint32_t flowhash = 0; bzero(&fh, sizeof (fh)); if (PF_ALEQ(&sk->lan.addr, &sk->ext.addr, sk->af)) { @@ -2031,7 +2052,15 @@ pf_calc_state_key_flowhash(struct pf_state_key *sk) fh.af = sk->af; fh.proto = sk->proto; - return (net_flowhash(&fh, sizeof (fh), pf_hash_seed)); +try_again: + flowhash = net_flowhash(&fh, sizeof (fh), pf_hash_seed); + if (flowhash == 0) { + /* try to get a non-zero flowhash */ + pf_hash_seed = RandomULong(); + goto try_again; + } + + return (flowhash); } static int @@ -2048,7 +2077,8 @@ pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) return (1); return (0); case PF_ADDR_DYNIFTL: - return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); + return (aw1->p.dyn == NULL || aw2->p.dyn == NULL || + aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); case PF_ADDR_NOROUTE: case PF_ADDR_URPFFAILED: return (0); @@ -2407,6 +2437,7 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, pf_mtag->pftag_qid = r->qid; #endif /* PF_ALTQ */ +#if PF_ECN /* add hints for ecn */ pf_mtag->pftag_hdr = mtod(m, struct ip *); /* record address family */ @@ -2423,8 +2454,10 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, break; #endif /* INET6 */ } +#endif /* PF_ECN */ + /* indicate this is TCP */ - pf_mtag->pftag_flags |= PF_TAG_TCP; + m->m_pkthdr.pkt_proto = IPPROTO_TCP; /* Make sure headers are 32-bit aligned */ m->m_data += max_linkhdr; @@ -2502,8 +2535,7 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, bzero(&ro, sizeof (ro)); ip_output(m, NULL, &ro, 0, NULL, NULL); - if (ro.ro_rt != NULL) - rtfree(ro.ro_rt); + ROUTE_RELEASE(&ro); break; } #endif /* INET */ @@ -2520,8 +2552,7 @@ pf_send_tcp(const struct pf_rule *r, sa_family_t af, bzero(&ro6, sizeof (ro6)); ip6_output(m, NULL, &ro6, 0, NULL, NULL, NULL); - if (ro6.ro_rt != NULL) - rtfree(ro6.ro_rt); + ROUTE_RELEASE(&ro6); break; } #endif /* INET6 */ @@ -2552,23 +2583,26 @@ pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, pf_mtag->pftag_qid = r->qid; #endif /* PF_ALTQ */ +#if PF_ECN /* add hints for ecn */ pf_mtag->pftag_hdr = mtod(m0, struct ip *); /* record address family */ - pf_mtag->pftag_flags &= - ~(PF_TAG_HDR_INET | PF_TAG_HDR_INET6 | PF_TAG_TCP); + pf_mtag->pftag_flags &= ~(PF_TAG_HDR_INET | PF_TAG_HDR_INET6); switch (af) { #if INET case AF_INET: pf_mtag->pftag_flags |= PF_TAG_HDR_INET; + m0->m_pkthdr.pkt_proto = IPPROTO_ICMP; break; #endif /* INET */ #if INET6 case AF_INET6: pf_mtag->pftag_flags |= PF_TAG_HDR_INET6; + m0->m_pkthdr.pkt_proto = IPPROTO_ICMPV6; break; #endif /* INET6 */ } +#endif /* PF_ECN */ switch (af) { #if INET @@ -2772,8 +2806,8 @@ int pf_tag_packet(struct mbuf *m, struct pf_mtag *pf_mtag, int tag, unsigned int rtableid, struct pf_pdesc *pd) { - if (tag <= 0 && !PF_RTABLEID_IS_VALID(rtableid) && - (pd == NULL || pd->flowhash == 0)) + if (tag <= 0 && !PF_RTABLEID_IS_VALID(rtableid) && + (pd == NULL || !(pd->pktflags & PKTF_FLOW_ID))) return (0); if (pf_mtag == NULL && (pf_mtag = pf_get_mtag(m)) == NULL) @@ -2783,11 +2817,11 @@ pf_tag_packet(struct mbuf *m, struct pf_mtag *pf_mtag, int tag, pf_mtag->pftag_tag = tag; if (PF_RTABLEID_IS_VALID(rtableid)) pf_mtag->pftag_rtableid = rtableid; - if (pd != NULL && pd->flowhash != 0) { - pf_mtag->pftag_flags |= PF_TAG_FLOWHASH; - pf_mtag->pftag_flowhash = pd->flowhash; - pf_mtag->pftag_flags |= (pd->flags & PFDESC_FLOW_ADV) ? - PF_TAG_FLOWADV : 0; + if (pd != NULL && (pd->pktflags & PKTF_FLOW_ID)) { + m->m_pkthdr.pkt_flowsrc = pd->flowsrc; + m->m_pkthdr.pkt_flowid = pd->flowhash; + m->m_pkthdr.pkt_flags |= pd->pktflags; + m->m_pkthdr.pkt_proto = pd->proto; } return (0); @@ -3019,6 +3053,8 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, if (rpool->cur->addr.type == PF_ADDR_NOROUTE) return (1); if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { + if (rpool->cur->addr.p.dyn == NULL) + return (1); switch (af) { #if INET case AF_INET: @@ -3068,22 +3104,22 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, case AF_INET6: if (rmask->addr32[3] != 0xffffffff) rpool->counter.addr32[3] = - htonl(random()); + RandomULong(); else break; if (rmask->addr32[2] != 0xffffffff) rpool->counter.addr32[2] = - htonl(random()); + RandomULong(); else break; if (rmask->addr32[1] != 0xffffffff) rpool->counter.addr32[1] = - htonl(random()); + RandomULong(); else break; if (rmask->addr32[0] != 0xffffffff) rpool->counter.addr32[0] = - htonl(random()); + RandomULong(); break; #endif /* INET6 */ } @@ -3108,7 +3144,8 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, &raddr, &rmask, af)) goto get_addr; } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { - if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, + if (rpool->cur->addr.p.dyn != NULL && + !pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, &rpool->tblidx, &rpool->counter, &raddr, &rmask, af)) goto get_addr; @@ -3130,6 +3167,8 @@ pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, } } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { rpool->tblidx = -1; + if (rpool->cur->addr.p.dyn == NULL) + return (1); if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, &rpool->tblidx, &rpool->counter, &raddr, &rmask, af)) { @@ -3477,6 +3516,8 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off, case PF_OUT: if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL) { + if (r->rpool.cur->addr.p.dyn == NULL) + return (NULL); switch (pd->af) { #if INET case AF_INET: @@ -3514,6 +3555,8 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off, break; case PF_IN: if (r->src.addr.type == PF_ADDR_DYNIFTL) { + if (r->src.addr.p.dyn == NULL) + return (NULL); switch (pd->af) { #if INET case AF_INET: @@ -3554,6 +3597,8 @@ pf_get_translation_aux(struct pf_pdesc *pd, struct mbuf *m, int off, switch (direction) { case PF_OUT: if (r->dst.addr.type == PF_ADDR_DYNIFTL) { + if (r->dst.addr.p.dyn == NULL) + return (NULL); switch (pd->af) { #if INET case AF_INET: @@ -3873,7 +3918,7 @@ pf_calc_mss(struct pf_addr *addr, sa_family_t af, u_int16_t offer) if (rt && rt->rt_ifp) { mss = rt->rt_ifp->if_mtu - hlen - sizeof (struct tcphdr); mss = max(tcp_mssdflt, mss); - RTFREE(rt); + rtfree(rt); } mss = min(mss, offer); mss = max(mss, 64); /* sanity - at least max opt space */ @@ -3963,6 +4008,7 @@ pf_alloc_state_key(struct pf_state *s, struct pf_state_key *psk) sk->direction = psk->direction; sk->proto_variant = psk->proto_variant; VERIFY(psk->app_state == NULL); + sk->flowsrc = psk->flowsrc; sk->flowhash = psk->flowhash; /* don't touch tree entries, states and refcnt on sk */ } @@ -4361,7 +4407,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], pd->lookup.gid)) r = TAILQ_NEXT(r, entries); - else if (r->prob && r->prob <= (random() % (UINT_MAX - 1) + 1)) + else if (r->prob && r->prob <= (RandomULong() % (UINT_MAX - 1) + 1)) r = TAILQ_NEXT(r, entries); else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); @@ -4699,12 +4745,18 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, psk.gwy.xport = psk.lan.xport; } } - if (pd->flowhash != 0) { - /* flowhash was already computed by upper layers */ + if (pd->pktflags & PKTF_FLOW_ID) { + /* flow hash was already computed outside of PF */ + psk.flowsrc = pd->flowsrc; psk.flowhash = pd->flowhash; } else { + /* compute flow hash and store it in state key */ + psk.flowsrc = FLOWSRC_PF; psk.flowhash = pf_calc_state_key_flowhash(&psk); + pd->flowsrc = psk.flowsrc; pd->flowhash = psk.flowhash; + pd->pktflags |= PKTF_FLOW_ID; + pd->pktflags &= ~PKTF_FLOW_ADV; } if (pf_tag_packet(m, pd->pf_mtag, tag, rtableid, pd)) { @@ -5216,6 +5268,8 @@ pf_test_dummynet(struct pf_rule **rm, int direction, struct pfi_kif *kif, ((pd->flags & PFDESC_IP_FRAG) || (r->flagset & th->th_flags) != r->flags)) r = TAILQ_NEXT(r, entries); + else if (r->prob && r->prob <= (RandomULong() % (UINT_MAX - 1) + 1)) + r = TAILQ_NEXT(r, entries); else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); else { @@ -5399,7 +5453,7 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, pd->proto == IPPROTO_ICMPV6) && (r->type || r->code)) r = TAILQ_NEXT(r, entries); - else if (r->prob && r->prob <= (random() % (UINT_MAX - 1) + 1)) + else if (r->prob && r->prob <= (RandomULong() % (UINT_MAX - 1) + 1)) r = TAILQ_NEXT(r, entries); else if (r->match_tag && !pf_match_tag(m, r, pd->pf_mtag, &tag)) r = TAILQ_NEXT(r, entries); @@ -5526,6 +5580,7 @@ pf_pptp_handler(struct pf_state *s, int direction, int off, gsk->lan.xport.call_id = 0; gsk->gwy.xport.call_id = 0; gsk->ext.xport.call_id = 0; + gsk->flowsrc = FLOWSRC_PF; gsk->flowhash = pf_calc_state_key_flowhash(gsk); memset(gas, 0, sizeof (*gas)); gas->u.grev1.pptp_state = s; @@ -5931,10 +5986,13 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, return (PF_DROP); } - if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { - sws = src->wscale & PF_WSCALE_MASK; - dws = dst->wscale & PF_WSCALE_MASK; - } else + if ((th->th_flags & TH_SYN) == 0) { + sws = (src->wscale & PF_WSCALE_FLAG) ? + (src->wscale & PF_WSCALE_MASK) : TCP_MAX_WINSHIFT; + dws = (dst->wscale & PF_WSCALE_FLAG) ? + (dst->wscale & PF_WSCALE_MASK) : TCP_MAX_WINSHIFT; + } + else sws = dws = 0; /* @@ -6247,9 +6305,10 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, printf("pf: BAD state: "); pf_print_state(*state); pf_print_flags(th->th_flags); - printf(" seq=%u (%u) ack=%u len=%u ackskew=%d " - "pkts=%llu:%llu dir=%s,%s\n", + printf("\n seq=%u (%u) ack=%u len=%u ackskew=%d " + "sws=%u dws=%u pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, pd->p_len, ackskew, + (unsigned int)sws, (unsigned int)dws, (*state)->packets[0], (*state)->packets[1], direction == PF_IN ? "in" : "out", direction == (*state)->state_key->direction ? @@ -6383,9 +6442,15 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, *state = pf_find_state(kif, &key, dx); } - if ((*state) != NULL && pd != NULL && - pd->flowhash == 0) + /* similar to STATE_LOOKUP() */ + if (*state != NULL && pd != NULL && !(pd->pktflags & PKTF_FLOW_ID)) { + pd->flowsrc = (*state)->state_key->flowsrc; pd->flowhash = (*state)->state_key->flowhash; + if (pd->flowhash != 0) { + pd->pktflags |= PKTF_FLOW_ID; + pd->pktflags &= ~PKTF_FLOW_ADV; + } + } if (pf_state_lookup_aux(state, kif, direction, &action)) return (action); @@ -6754,10 +6819,10 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, dst = &(*state)->dst; } - if (src->wscale && dst->wscale) + if (src->wscale && (dst->wscale & PF_WSCALE_FLAG)) dws = dst->wscale & PF_WSCALE_MASK; else - dws = 0; + dws = TCP_MAX_WINSHIFT; /* Demodulate sequence number */ seq = ntohl(th.th_seq) - src->seqdiff; @@ -6899,9 +6964,16 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, *state = pf_find_state(kif, &key, dx); } + /* similar to STATE_LOOKUP() */ if (*state != NULL && pd != NULL && - pd->flowhash == 0) + !(pd->pktflags & PKTF_FLOW_ID)) { + pd->flowsrc = (*state)->state_key->flowsrc; pd->flowhash = (*state)->state_key->flowhash; + if (pd->flowhash != 0) { + pd->pktflags |= PKTF_FLOW_ID; + pd->pktflags &= ~PKTF_FLOW_ADV; + } + } if (pf_state_lookup_aux(state, kif, direction, &action)) return (action); @@ -7310,9 +7382,14 @@ pf_test_state_esp(struct pf_state **state, int direction, struct pfi_kif *kif, } } - if (*state != NULL && pd != NULL && - pd->flowhash == 0) { + /* similar to STATE_LOOKUP() */ + if (*state != NULL && pd != NULL && !(pd->pktflags & PKTF_FLOW_ID)) { + pd->flowsrc = (*state)->state_key->flowsrc; pd->flowhash = (*state)->state_key->flowhash; + if (pd->flowhash != 0) { + pd->pktflags |= PKTF_FLOW_ID; + pd->pktflags &= ~PKTF_FLOW_ADV; + } } if (pf_state_lookup_aux(state, kif, direction, &action)) @@ -7556,11 +7633,11 @@ pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif) if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) goto out; + /* XXX: what is the point of this? */ rtalloc((struct route *)&ro); out: - if (ro.ro_rt != NULL) - RTFREE(ro.ro_rt); + ROUTE_RELEASE(&ro); return (ret); } @@ -7597,11 +7674,10 @@ pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw) return (0); } + /* XXX: what is the point of this? */ rtalloc((struct route *)&ro); - if (ro.ro_rt != NULL) { - RTFREE(ro.ro_rt); - } + ROUTE_RELEASE(&ro); return (ret); } @@ -7614,14 +7690,16 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, #pragma unused(pd) struct mbuf *m0, *m1; struct route iproute; - struct route *ro = NULL; + struct route *ro = &iproute; struct sockaddr_in *dst; struct ip *ip; struct ifnet *ifp = NULL; struct pf_addr naddr; struct pf_src_node *sn = NULL; int error = 0; - int sw_csum = 0; + uint32_t sw_csum; + + bzero(&iproute, sizeof (iproute)); if (m == NULL || *m == NULL || r == NULL || (dir != PF_IN && dir != PF_OUT) || oifp == NULL) @@ -7650,8 +7728,6 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, ip = mtod(m0, struct ip *); - ro = &iproute; - bzero((caddr_t)ro, sizeof (*ro)); dst = satosin((void *)&ro->ro_dst); dst->sin_family = AF_INET; dst->sin_len = sizeof (*dst); @@ -7659,7 +7735,7 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, if (r->rt == PF_FASTROUTE) { rtalloc(ro); - if (ro->ro_rt == 0) { + if (ro->ro_rt == NULL) { ipstat.ips_noroute++; goto bad; } @@ -7707,62 +7783,19 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, ip = mtod(m0, struct ip *); } - /* Copied from ip_output. */ - /* Catch routing changes wrt. hardware checksumming for TCP or UDP. */ - m0->m_pkthdr.csum_flags |= CSUM_IP; - sw_csum = m0->m_pkthdr.csum_flags & - ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist); - - if (ifp->if_hwassist & CSUM_TCP_SUM16) { - /* - * Special case code for GMACE - * frames that can be checksumed by GMACE SUM16 HW: - * frame >64, no fragments, no UDP - */ - if (apple_hwcksum_tx && (m0->m_pkthdr.csum_flags & CSUM_TCP) && - (ntohs(ip->ip_len) > 50) && - (ntohs(ip->ip_len) <= ifp->if_mtu)) { - /* - * Apple GMAC HW, expects: - * STUFF_OFFSET << 16 | START_OFFSET - */ - /* IP+Enet header length */ - u_short offset = ((ip->ip_hl) << 2) + 14; - u_short csumprev = m0->m_pkthdr.csum_data & 0xffff; - m0->m_pkthdr.csum_flags = CSUM_DATA_VALID | - CSUM_TCP_SUM16; /* for GMAC */ - m0->m_pkthdr.csum_data = (csumprev + offset) << 16 ; - m0->m_pkthdr.csum_data += offset; - /* do IP hdr chksum in software */ - sw_csum = CSUM_DELAY_IP; - } else { - /* let the software handle any UDP or TCP checksums */ - sw_csum |= (CSUM_DELAY_DATA & m0->m_pkthdr.csum_flags); - } - } else if (apple_hwcksum_tx == 0) { - sw_csum |= (CSUM_DELAY_DATA | CSUM_DELAY_IP) & - m0->m_pkthdr.csum_flags; - } + ip_output_checksum(ifp, m0, ((ip->ip_hl) << 2), ntohs(ip->ip_len), + &sw_csum); - if (sw_csum & CSUM_DELAY_DATA) { - in_delayed_cksum(m0); - sw_csum &= ~CSUM_DELAY_DATA; - m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - } - - if (apple_hwcksum_tx != 0) { - m0->m_pkthdr.csum_flags &= - IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist); - } else { - m0->m_pkthdr.csum_flags = 0; - } - - if (ntohs(ip->ip_len) <= ifp->if_mtu || - (ifp->if_hwassist & CSUM_FRAGMENT)) { + if (ntohs(ip->ip_len) <= ifp->if_mtu || TSO_IPV4_OK(ifp, m0) || + (!(ip->ip_off & htons(IP_DF)) && + (ifp->if_hwassist & CSUM_FRAGMENT))) { ip->ip_sum = 0; - if (sw_csum & CSUM_DELAY_IP) + if (sw_csum & CSUM_DELAY_IP) { ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); + sw_csum &= ~CSUM_DELAY_IP; + m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_IP; + } error = ifnet_output(ifp, PF_INET, m0, ro->ro_rt, sintosa(dst)); goto done; } @@ -7770,8 +7803,10 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, /* * Too large for interface; fragment if possible. * Must be able to put at least 8 bytes per fragment. + * Balk when DF bit is set or the interface didn't support TSO. */ - if (ip->ip_off & htons(IP_DF)) { + if ((ip->ip_off & htons(IP_DF)) || + (m0->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) { ipstat.ips_cantfrag++; if (r->rt != PF_DUPTO) { icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, @@ -7811,8 +7846,8 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, done: if (r->rt != PF_DUPTO) *m = NULL; - if (ro == &iproute && ro->ro_rt) - RTFREE(ro->ro_rt); + + ROUTE_RELEASE(&iproute); return; bad: @@ -7964,8 +7999,9 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, * is partially-computed (only 16-bit summation), do it in * software below. */ - if (apple_hwcksum_rx && (m->m_pkthdr.csum_flags & - (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) && + if ((m->m_pkthdr.csum_flags & + (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == + (CSUM_DATA_VALID | CSUM_PSEUDO_HDR) && (m->m_pkthdr.csum_data ^ 0xffff) == 0) { return (0); } @@ -8086,10 +8122,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, if (kif->pfik_flags & PFI_IFLAG_SKIP) return (PF_PASS); -#ifdef DIAGNOSTIC - if ((m->m_flags & M_PKTHDR) == 0) - panic("non-M_PKTHDR is passed to pf_test"); -#endif /* DIAGNOSTIC */ + VERIFY(m->m_flags & M_PKTHDR); /* initialize enough of pd for the done label */ h = mtod(m, struct ip *); @@ -8155,10 +8188,11 @@ nonormalize: pd.sc = MBUF_SCIDX(mbuf_get_service_class(m)); pd.tot_len = ntohs(h->ip_len); pd.eh = eh; - if (pd.pf_mtag != NULL && pd.pf_mtag->pftag_flowhash != 0) { - pd.flowhash = pd.pf_mtag->pftag_flowhash; - pd.flags |= (m->m_pkthdr.m_fhflags & PF_TAG_FLOWADV) ? - PFDESC_FLOW_ADV : 0; + + if (m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) { + pd.flowsrc = m->m_pkthdr.pkt_flowsrc; + pd.flowhash = m->m_pkthdr.pkt_flowid; + pd.pktflags = (m->m_pkthdr.pkt_flags & PKTF_FLOW_MASK); } /* handle fragments that didn't get reassembled by normalization */ @@ -8218,7 +8252,7 @@ nonormalize: log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, &ipintrq); + m, off, h, &pd, &a, &ruleset, NULL); break; } @@ -8260,7 +8294,7 @@ nonormalize: log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, &ipintrq); + m, off, h, &pd, &a, &ruleset, NULL); break; } @@ -8295,7 +8329,7 @@ nonormalize: log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, &ipintrq); + m, off, h, &pd, &a, &ruleset, NULL); break; } @@ -8329,7 +8363,7 @@ nonormalize: log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, &ipintrq); + m, off, h, &pd, &a, &ruleset, NULL); break; } @@ -8371,7 +8405,7 @@ nonormalize: break; } else if (s == NULL) { action = pf_test_rule(&r, &s, dir, kif, m, off, - h, &pd, &a, &ruleset, &ipintrq); + h, &pd, &a, &ruleset, NULL); if (action == PF_PASS) break; } @@ -8402,7 +8436,7 @@ nonormalize: log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, dir, kif, m, off, h, - &pd, &a, &ruleset, &ipintrq); + &pd, &a, &ruleset, NULL); break; } @@ -8421,7 +8455,7 @@ done: } if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid) || - pd.flowhash != 0) + (pd.pktflags & PKTF_FLOW_ID)) (void) pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid, &pd); @@ -8434,16 +8468,15 @@ done: pd.pf_mtag->pftag_qid = r->qid; } #endif /* PF_ALTQ */ +#if PF_ECN /* add hints for ecn */ pd.pf_mtag->pftag_hdr = h; /* record address family */ pd.pf_mtag->pftag_flags &= ~PF_TAG_HDR_INET6; pd.pf_mtag->pftag_flags |= PF_TAG_HDR_INET; - /* record TCP vs. non-TCP */ - if (pd.proto == IPPROTO_TCP) - pd.pf_mtag->pftag_flags |= PF_TAG_TCP; - else - pd.pf_mtag->pftag_flags &= ~PF_TAG_TCP; +#endif /* PF_ECN */ + /* record protocol */ + m->m_pkthdr.pkt_proto = pd.proto; } /* @@ -8621,10 +8654,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, if (kif->pfik_flags & PFI_IFLAG_SKIP) return (PF_PASS); -#ifdef DIAGNOSTIC - if ((m->m_flags & M_PKTHDR) == 0) - panic("non-M_PKTHDR is passed to pf_test6"); -#endif /* DIAGNOSTIC */ + VERIFY(m->m_flags & M_PKTHDR); h = mtod(m, struct ip6_hdr *); @@ -8645,10 +8675,10 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); pd.eh = eh; - if (pd.pf_mtag->pftag_flowhash != 0) { - pd.flowhash = pd.pf_mtag->pftag_flowhash; - pd.flags |= (m->m_pkthdr.m_fhflags & PF_TAG_FLOWADV) ? - PFDESC_FLOW_ADV : 0; + if (m->m_pkthdr.pkt_flags & PKTF_FLOW_ID) { + pd.flowsrc = m->m_pkthdr.pkt_flowsrc; + pd.flowhash = m->m_pkthdr.pkt_flowid; + pd.pktflags = (m->m_pkthdr.pkt_flags & PKTF_FLOW_MASK); } if (m->m_pkthdr.len < (int)sizeof (*h)) { @@ -8708,7 +8738,7 @@ nonormalize: switch (nxt) { case IPPROTO_FRAGMENT: { struct ip6_frag ip6f; - + pd.flags |= PFDESC_IP_FRAG; if (!pf_pull_hdr(m, off, &ip6f, sizeof ip6f, NULL, &reason, pd.af)) { @@ -8812,7 +8842,7 @@ nonormalize: log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, &ip6intrq); + m, off, h, &pd, &a, &ruleset, NULL); break; } @@ -8854,7 +8884,7 @@ nonormalize: log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, &ip6intrq); + m, off, h, &pd, &a, &ruleset, NULL); break; } @@ -8889,7 +8919,7 @@ nonormalize: log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, &ip6intrq); + m, off, h, &pd, &a, &ruleset, NULL); break; } @@ -8923,7 +8953,7 @@ nonormalize: log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, dir, kif, - m, off, h, &pd, &a, &ruleset, &ip6intrq); + m, off, h, &pd, &a, &ruleset, NULL); break; } @@ -8966,7 +8996,7 @@ nonormalize: break; } else if (s == NULL) { action = pf_test_rule(&r, &s, dir, kif, m, off, - h, &pd, &a, &ruleset, &ip6intrq); + h, &pd, &a, &ruleset, NULL); if (action == PF_PASS) break; } @@ -8997,7 +9027,7 @@ nonormalize: log = s->log; } else if (s == NULL) action = pf_test_rule(&r, &s, dir, kif, m, off, h, - &pd, &a, &ruleset, &ip6intrq); + &pd, &a, &ruleset, NULL); break; } @@ -9020,7 +9050,8 @@ done: ("pf: dropping packet with dangerous v6 headers\n")); } - if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid) || pd.flowhash != 0) + if ((s && s->tag) || PF_RTABLEID_IS_VALID(r->rtableid) || + (pd.pktflags & PKTF_FLOW_ID)) (void) pf_tag_packet(m, pd.pf_mtag, s ? s->tag : 0, r->rtableid, &pd); @@ -9033,16 +9064,15 @@ done: pd.pf_mtag->pftag_qid = r->qid; } #endif /* PF_ALTQ */ +#if PF_ECN /* add hints for ecn */ pd.pf_mtag->pftag_hdr = h; /* record address family */ pd.pf_mtag->pftag_flags &= ~PF_TAG_HDR_INET; pd.pf_mtag->pftag_flags |= PF_TAG_HDR_INET6; - /* record TCP vs. non-TCP */ - if (pd.proto == IPPROTO_TCP) - pd.pf_mtag->pftag_flags |= PF_TAG_TCP; - else - pd.pf_mtag->pftag_flags &= ~PF_TAG_TCP; +#endif /* PF_ECN */ + /* record protocol */ + m->m_pkthdr.pkt_proto = pd.proto; } if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP || @@ -9279,7 +9309,7 @@ pf_calendar_time_second(void) { struct timeval t; - microtime(&t); + getmicrotime(&t); return (t.tv_sec); } diff --git a/bsd/net/pf_if.c b/bsd/net/pf_if.c index 66d939f92..9dec8f760 100644 --- a/bsd/net/pf_if.c +++ b/bsd/net/pf_if.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011 Apple Inc. All rights reserved. + * Copyright (c) 2007-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -230,14 +230,11 @@ void pfi_attach_ifnet(struct ifnet *ifp) { struct pfi_kif *kif; - char if_name[IFNAMSIZ]; lck_mtx_assert(pf_lock, LCK_MTX_ASSERT_OWNED); pfi_update++; - (void) snprintf(if_name, sizeof (if_name), "%s%d", - ifp->if_name, ifp->if_unit); - if ((kif = pfi_kif_get(if_name)) == NULL) + if ((kif = pfi_kif_get(if_name(ifp))) == NULL) panic("pfi_kif_get failed"); ifnet_lock_exclusive(ifp); diff --git a/bsd/net/pf_ioctl.c b/bsd/net/pf_ioctl.c index 6e76775a7..1346210be 100644 --- a/bsd/net/pf_ioctl.c +++ b/bsd/net/pf_ioctl.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2011 Apple Inc. All rights reserved. + * Copyright (c) 2007-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -132,6 +132,8 @@ struct ip_fw_args; #include #endif /* PF_ALTQ */ +#include + #if 0 static void pfdetach(void); #endif @@ -192,10 +194,11 @@ static void pf_pooladdr_copyout(struct pf_pooladdr *, struct pf_pooladdr *); static void pf_expire_states_and_src_nodes(struct pf_rule *); static void pf_delete_rule_from_ruleset(struct pf_ruleset *, int, struct pf_rule *); +static void pf_addrwrap_setup(struct pf_addr_wrap *); static int pf_rule_setup(struct pfioc_rule *, struct pf_rule *, struct pf_ruleset *); -static void pf_delete_rule_by_owner(char *); -static int pf_delete_rule_by_ticket(struct pfioc_rule *); +static void pf_delete_rule_by_owner(char *, u_int32_t); +static int pf_delete_rule_by_ticket(struct pfioc_rule *, u_int32_t); static void pf_ruleset_cleanup(struct pf_ruleset *, int); static void pf_deleterule_anchor_step_out(struct pf_ruleset **, int, struct pf_rule **); @@ -381,7 +384,7 @@ generate_token(struct proc *p) return (0); } - token_value = (u_int64_t)(uintptr_t)new_token; + token_value = VM_KERNEL_ADDRPERM((u_int64_t)(uintptr_t)new_token); new_token->token.token_value = token_value; new_token->token.pid = proc_pid(p); @@ -544,7 +547,7 @@ pfinit(void) pf_normalize_init(); bzero(&pf_status, sizeof (pf_status)); pf_status.debug = PF_DEBUG_URGENT; - pf_hash_seed = random(); + pf_hash_seed = RandomULong(); /* XXX do our best to avoid a conflict */ pf_status.hostid = random(); @@ -1292,7 +1295,7 @@ static int pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) { struct pf_ruleset *rs; - struct pf_rule *rule, **old_array; + struct pf_rule *rule, **old_array, *r; struct pf_rulequeue *old_rules; int error; u_int32_t old_rcount; @@ -1318,6 +1321,16 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) old_rcount = rs->rules[rs_num].active.rcount; old_array = rs->rules[rs_num].active.ptr_array; + if(old_rcount != 0) { + r = TAILQ_FIRST(rs->rules[rs_num].active.ptr); + while (r) { + if (r->rule_flag & PFRULE_PFM) + pffwrules--; + r = TAILQ_NEXT(r, entries); + } + } + + rs->rules[rs_num].active.ptr = rs->rules[rs_num].inactive.ptr; rs->rules[rs_num].active.ptr_array = @@ -2810,79 +2823,103 @@ pf_ruleset_cleanup(struct pf_ruleset *ruleset, int rs) ++ruleset->rules[rs].inactive.ticket; } +/* + * req_dev encodes the PF interface. Currently, possible values are + * 0 or PFRULE_PFM + */ static int -pf_delete_rule_by_ticket(struct pfioc_rule *pr) +pf_delete_rule_by_ticket(struct pfioc_rule *pr, u_int32_t req_dev) { struct pf_ruleset *ruleset; - struct pf_rule *rule; - int rs_num; + struct pf_rule *rule = NULL; int is_anchor; int error; + int i; is_anchor = (pr->anchor_call[0] != '\0'); if ((ruleset = pf_find_ruleset_with_owner(pr->anchor, pr->rule.owner, is_anchor, &error)) == NULL) return (error); - rs_num = pf_get_ruleset_number(pr->rule.action); - if (rs_num >= PF_RULESET_MAX) { - return (EINVAL); - } - - if (pr->rule.ticket) { - rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); + for (i = 0; i < PF_RULESET_MAX && rule == NULL; i++) { + rule = TAILQ_FIRST(ruleset->rules[i].active.ptr); while (rule && (rule->ticket != pr->rule.ticket)) rule = TAILQ_NEXT(rule, entries); - if (rule == NULL) - return (ENOENT); + } + if (rule == NULL) + return (ENOENT); + else + i--; - if (strcmp(rule->owner, pr->rule.owner)) - return (EACCES); + if (strcmp(rule->owner, pr->rule.owner)) + return (EACCES); delete_rule: - if (rule->anchor && (ruleset != &pf_main_ruleset) && - ((strcmp(ruleset->anchor->owner, "")) == 0) && - ((ruleset->rules[rs_num].active.rcount - 1) == 0)) { - /* set rule & ruleset to parent and repeat */ - struct pf_rule *delete_rule = rule; - struct pf_ruleset *delete_ruleset = ruleset; + if (rule->anchor && (ruleset != &pf_main_ruleset) && + ((strcmp(ruleset->anchor->owner, "")) == 0) && + ((ruleset->rules[i].active.rcount - 1) == 0)) { + /* set rule & ruleset to parent and repeat */ + struct pf_rule *delete_rule = rule; + struct pf_ruleset *delete_ruleset = ruleset; #define parent_ruleset ruleset->anchor->parent->ruleset - if (ruleset->anchor->parent == NULL) - ruleset = &pf_main_ruleset; - else - ruleset = &parent_ruleset; - - rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); - while (rule && - (rule->anchor != delete_ruleset->anchor)) - rule = TAILQ_NEXT(rule, entries); - if (rule == NULL) - panic("%s: rule not found!", __func__); + if (ruleset->anchor->parent == NULL) + ruleset = &pf_main_ruleset; + else + ruleset = &parent_ruleset; - if (delete_rule->rule_flag & PFRULE_PFM) - pffwrules--; + rule = TAILQ_FIRST(ruleset->rules[i].active.ptr); + while (rule && + (rule->anchor != delete_ruleset->anchor)) + rule = TAILQ_NEXT(rule, entries); + if (rule == NULL) + panic("%s: rule not found!", __func__); - pf_delete_rule_from_ruleset(delete_ruleset, - rs_num, delete_rule); - delete_ruleset->rules[rs_num].active.ticket = - ++delete_ruleset->rules[rs_num].inactive.ticket; + /* + * if reqest device != rule's device, bail : + * with error if ticket matches; + * without error if ticket doesn't match (i.e. its just cleanup) + */ + if ((rule->rule_flag & PFRULE_PFM) ^ req_dev) { + if (rule->ticket != pr->rule.ticket) { + return (0); + } else { + return EACCES; + } + } - goto delete_rule; - } else { - if (rule->rule_flag & PFRULE_PFM) - pffwrules--; - pf_delete_rule_from_ruleset(ruleset, rs_num, - rule); - pf_ruleset_cleanup(ruleset, rs_num); + if (delete_rule->rule_flag & PFRULE_PFM) { + pffwrules--; } + + pf_delete_rule_from_ruleset(delete_ruleset, + i, delete_rule); + delete_ruleset->rules[i].active.ticket = + ++delete_ruleset->rules[i].inactive.ticket; + goto delete_rule; + } else { + /* + * process deleting rule only if device that added the + * rule matches device that issued the request + */ + if ((rule->rule_flag & PFRULE_PFM) ^ req_dev) + return EACCES; + if (rule->rule_flag & PFRULE_PFM) + pffwrules--; + pf_delete_rule_from_ruleset(ruleset, i, + rule); + pf_ruleset_cleanup(ruleset, i); } return (0); } +/* + * req_dev encodes the PF interface. Currently, possible values are + * 0 or PFRULE_PFM + */ static void -pf_delete_rule_by_owner(char *owner) +pf_delete_rule_by_owner(char *owner, u_int32_t req_dev) { struct pf_ruleset *ruleset; struct pf_rule *rule, *next; @@ -2893,6 +2930,14 @@ pf_delete_rule_by_owner(char *owner) ruleset = &pf_main_ruleset; while (rule) { next = TAILQ_NEXT(rule, entries); + /* + * process deleting rule only if device that added the + * rule matches device that issued the request + */ + if ((rule->rule_flag & PFRULE_PFM) ^ req_dev) { + rule = next; + continue; + } if (rule->anchor) { if (((strcmp(rule->owner, owner)) == 0) || ((strcmp(rule->owner, "")) == 0)) { @@ -2964,6 +3009,13 @@ pf_deleterule_anchor_step_out(struct pf_ruleset **ruleset_ptr, *rule_ptr = rule; } +static void +pf_addrwrap_setup(struct pf_addr_wrap *aw) +{ + VERIFY(aw); + bzero(&aw->p, sizeof aw->p); +} + static int pf_rule_setup(struct pfioc_rule *pr, struct pf_rule *rule, struct pf_ruleset *ruleset) { @@ -3006,6 +3058,8 @@ pf_rule_setup(struct pfioc_rule *pr, struct pf_rule *rule, if (rule->logif >= PFLOGIFS_MAX) error = EINVAL; #endif /* PFLOG */ + pf_addrwrap_setup(&rule->src.addr); + pf_addrwrap_setup(&rule->dst.addr); if (pf_rtlabel_add(&rule->src.addr) || pf_rtlabel_add(&rule->dst.addr)) error = EBUSY; @@ -3054,6 +3108,7 @@ static int pfioctl_ioc_rule(u_long cmd, int minordev, struct pfioc_rule *pr, struct proc *p) { int error = 0; + u_int32_t req_dev = 0; switch (cmd) { case DIOCADDRULE: { @@ -3314,6 +3369,8 @@ pfioctl_ioc_rule(u_long cmd, int minordev, struct pfioc_rule *pr, struct proc *p if (newrule->logif >= PFLOGIFS_MAX) error = EINVAL; #endif /* PFLOG */ + pf_addrwrap_setup(&newrule->src.addr); + pf_addrwrap_setup(&newrule->dst.addr); if (pf_rtlabel_add(&newrule->src.addr) || pf_rtlabel_add(&newrule->dst.addr)) error = EBUSY; @@ -3452,6 +3509,8 @@ pfioctl_ioc_rule(u_long cmd, int minordev, struct pfioc_rule *pr, struct proc *p } r = TAILQ_NEXT(r, entries); } + if (error != 0) + return (error); } rule = pool_get(&pf_rule_pl, PR_WAITOK); @@ -3510,7 +3569,7 @@ pfioctl_ioc_rule(u_long cmd, int minordev, struct pfioc_rule *pr, struct proc *p error = pf_setup_pfsync_matching(ruleset); pf_ruleset_cleanup(ruleset, rs_num); - rule->ticket = ruleset->rules[rs_num].active.ticket; + rule->ticket = VM_KERNEL_ADDRPERM((u_int64_t)(uintptr_t)rule); pr->rule.ticket = rule->ticket; pf_rule_copyout(rule, &pr->rule); @@ -3528,11 +3587,15 @@ pfioctl_ioc_rule(u_long cmd, int minordev, struct pfioc_rule *pr, struct proc *p break; } + /* get device through which request is made */ + if ((uint8_t)minordev == PFDEV_PFM) + req_dev |= PFRULE_PFM; + if (pr->rule.ticket) { - if ((error = pf_delete_rule_by_ticket(pr))) + if ((error = pf_delete_rule_by_ticket(pr, req_dev))) break; } else - pf_delete_rule_by_owner(pr->rule.owner); + pf_delete_rule_by_owner(pr->rule.owner, req_dev); pr->nr = pffwrules; break; } @@ -4000,6 +4063,7 @@ pfioctl_ioc_pooladdr(u_long cmd, struct pfioc_pooladdr *pp, struct proc *p) } pfi_kif_ref(pa->kif, PFI_KIF_REF_RULE); } + pf_addrwrap_setup(&pa->addr); if (pfi_dynaddr_setup(&pa->addr, pp->af)) { pfi_dynaddr_remove(&pa->addr); pfi_kif_unref(pa->kif, PFI_KIF_REF_RULE); @@ -4111,6 +4175,7 @@ pfioctl_ioc_pooladdr(u_long cmd, struct pfioc_pooladdr *pp, struct proc *p) pfi_kif_ref(newpa->kif, PFI_KIF_REF_RULE); } else newpa->kif = NULL; + pf_addrwrap_setup(&newpa->addr); if (pfi_dynaddr_setup(&newpa->addr, pca->af) || pf_tbladdr_setup(ruleset, &newpa->addr)) { pfi_dynaddr_remove(&newpa->addr); @@ -4727,17 +4792,17 @@ int pf_af_hook(struct ifnet *ifp, struct mbuf **mppn, struct mbuf **mp, unsigned int af, int input, struct ip_fw_args *fwa) { - int error = 0, reentry; + int error = 0; struct mbuf *nextpkt; + net_thread_marks_t marks; + + marks = net_thread_marks_push(NET_THREAD_HELD_PF); - reentry = net_thread_check_lock(NET_THREAD_HELD_PF); - if (!reentry) { + if (marks != net_thread_marks_none) { lck_rw_lock_shared(pf_perim_lock); if (!pf_is_enabled) goto done; - lck_mtx_lock(pf_lock); - net_thread_set_lock(NET_THREAD_HELD_PF); } if (mppn != NULL && *mppn != NULL) @@ -4775,14 +4840,15 @@ pf_af_hook(struct ifnet *ifp, struct mbuf **mppn, struct mbuf **mp, else *mppn = nextpkt; } - if (!reentry) { - net_thread_unset_lock(NET_THREAD_HELD_PF); + + if (marks != net_thread_marks_none) lck_mtx_unlock(pf_lock); - } + done: - if (!reentry) + if (marks != net_thread_marks_none) lck_rw_done(pf_perim_lock); + net_thread_marks_pop(marks); return (error); } @@ -4862,7 +4928,11 @@ pf_inet6_hook(struct ifnet *ifp, struct mbuf **mp, int input, ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist); if (flags & mask) { - in6_delayed_cksum(*mp, sizeof(struct ip6_hdr)); + /* + * Checksum offload should not have been enabled + * when extension headers exist, thus 0 for optlen. + */ + in6_delayed_cksum(*mp); (*mp)->m_pkthdr.csum_flags &= ~mask; } } @@ -4881,30 +4951,19 @@ pf_inet6_hook(struct ifnet *ifp, struct mbuf **mp, int input, #endif /* INET6 */ int -pf_ifaddr_hook(struct ifnet *ifp, unsigned long cmd) +pf_ifaddr_hook(struct ifnet *ifp) { - lck_rw_lock_shared(pf_perim_lock); - lck_mtx_lock(pf_lock); + struct pfi_kif *kif = ifp->if_pf_kif; - switch (cmd) { - case SIOCSIFADDR: - case SIOCAIFADDR: - case SIOCDIFADDR: -#if INET6 - case SIOCAIFADDR_IN6_32: - case SIOCAIFADDR_IN6_64: - case SIOCDIFADDR_IN6: -#endif /* INET6 */ - if (ifp->if_pf_kif != NULL) - pfi_kifaddr_update(ifp->if_pf_kif); - break; - default: - panic("%s: unexpected ioctl %lu", __func__, cmd); - /* NOTREACHED */ - } + if (kif != NULL) { + lck_rw_lock_shared(pf_perim_lock); + lck_mtx_lock(pf_lock); - lck_mtx_unlock(pf_lock); - lck_rw_done(pf_perim_lock); + pfi_kifaddr_update(kif); + + lck_mtx_unlock(pf_lock); + lck_rw_done(pf_perim_lock); + } return (0); } @@ -4965,3 +5024,99 @@ pf_detach_hooks(void) ifnet_head_done(); } #endif + +/* + * 'D' group ioctls. + * + * The switch statement below does nothing at runtime, as it serves as a + * compile time check to ensure that all of the socket 'D' ioctls (those + * in the 'D' group going thru soo_ioctl) that are made available by the + * networking stack is unique. This works as long as this routine gets + * updated each time a new interface ioctl gets added. + * + * Any failures at compile time indicates duplicated ioctl values. + */ +static __attribute__((unused)) void +pfioctl_cassert(void) +{ + /* + * This is equivalent to _CASSERT() and the compiler wouldn't + * generate any instructions, thus for compile time only. + */ + switch ((u_long)0) { + case 0: + + /* bsd/net/pfvar.h */ + case DIOCSTART: + case DIOCSTOP: + case DIOCADDRULE: + case DIOCGETSTARTERS: + case DIOCGETRULES: + case DIOCGETRULE: + case DIOCSTARTREF: + case DIOCSTOPREF: + case DIOCCLRSTATES: + case DIOCGETSTATE: + case DIOCSETSTATUSIF: + case DIOCGETSTATUS: + case DIOCCLRSTATUS: + case DIOCNATLOOK: + case DIOCSETDEBUG: + case DIOCGETSTATES: + case DIOCCHANGERULE: + case DIOCINSERTRULE: + case DIOCDELETERULE: + case DIOCSETTIMEOUT: + case DIOCGETTIMEOUT: + case DIOCADDSTATE: + case DIOCCLRRULECTRS: + case DIOCGETLIMIT: + case DIOCSETLIMIT: + case DIOCKILLSTATES: + case DIOCSTARTALTQ: + case DIOCSTOPALTQ: + case DIOCADDALTQ: + case DIOCGETALTQS: + case DIOCGETALTQ: + case DIOCCHANGEALTQ: + case DIOCGETQSTATS: + case DIOCBEGINADDRS: + case DIOCADDADDR: + case DIOCGETADDRS: + case DIOCGETADDR: + case DIOCCHANGEADDR: + case DIOCGETRULESETS: + case DIOCGETRULESET: + case DIOCRCLRTABLES: + case DIOCRADDTABLES: + case DIOCRDELTABLES: + case DIOCRGETTABLES: + case DIOCRGETTSTATS: + case DIOCRCLRTSTATS: + case DIOCRCLRADDRS: + case DIOCRADDADDRS: + case DIOCRDELADDRS: + case DIOCRSETADDRS: + case DIOCRGETADDRS: + case DIOCRGETASTATS: + case DIOCRCLRASTATS: + case DIOCRTSTADDRS: + case DIOCRSETTFLAGS: + case DIOCRINADEFINE: + case DIOCOSFPFLUSH: + case DIOCOSFPADD: + case DIOCOSFPGET: + case DIOCXBEGIN: + case DIOCXCOMMIT: + case DIOCXROLLBACK: + case DIOCGETSRCNODES: + case DIOCCLRSRCNODES: + case DIOCSETHOSTID: + case DIOCIGETIFACES: + case DIOCSETIFFLAG: + case DIOCCLRIFFLAG: + case DIOCKILLSRCNODES: + case DIOCGIFSPEED: + ; + } +} diff --git a/bsd/net/pf_norm.c b/bsd/net/pf_norm.c index 69283ce6e..d6c5801f5 100644 --- a/bsd/net/pf_norm.c +++ b/bsd/net/pf_norm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2012 Apple Inc. All rights reserved. + * Copyright (c) 2007-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -316,12 +316,14 @@ pf_purge_expired_fragments(void) switch (frag->fr_af) { case AF_INET: - DPFPRINTF(("expiring IPv4 %d(%p) from queue.\n", - ntohs(frag->fr_id), frag)); + DPFPRINTF(("expiring IPv4 %d(0x%llx) from queue.\n", + ntohs(frag->fr_id), + (uint64_t)VM_KERNEL_ADDRPERM(frag))); break; case AF_INET6: - DPFPRINTF(("expiring IPv6 %d(%p) from queue.\n", - ntohl(frag->fr_id6), frag)); + DPFPRINTF(("expiring IPv6 %d(0x%llx) from queue.\n", + ntohl(frag->fr_id6), + (uint64_t)VM_KERNEL_ADDRPERM(frag))); break; default: VERIFY(0 && "only IPv4 and IPv6 supported"); @@ -337,12 +339,14 @@ pf_purge_expired_fragments(void) switch (frag->fr_af) { case AF_INET: - DPFPRINTF(("expiring IPv4 %d(%p) from cache.\n", - ntohs(frag->fr_id), frag)); + DPFPRINTF(("expiring IPv4 %d(0x%llx) from cache.\n", + ntohs(frag->fr_id), + (uint64_t)VM_KERNEL_ADDRPERM(frag))); break; case AF_INET6: - DPFPRINTF(("expiring IPv6 %d(%p) from cache.\n", - ntohl(frag->fr_id6), frag)); + DPFPRINTF(("expiring IPv6 %d(0x%llx) from cache.\n", + ntohl(frag->fr_id6), + (uint64_t)VM_KERNEL_ADDRPERM(frag))); break; default: VERIFY(0 && "only IPv4 and IPv6 supported"); @@ -682,7 +686,8 @@ insert: m->m_pkthdr.len = plen; } - DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len))); + DPFPRINTF(("complete: 0x%llx(%d)\n", + (uint64_t)VM_KERNEL_ADDRPERM(m), ntohs(ip->ip_len))); return (m); drop_fragment: @@ -1013,8 +1018,9 @@ pf_reassemble6(struct mbuf **m0, struct pf_fragment **frag, plen = FR_IP6_PLEN(frent); fr_max = off + plen - (frent->fr_ip6f_hlen - sizeof *ip6); - DPFPRINTF(("%p IPv6 frag plen %u off %u fr_ip6f_hlen %u fr_max %u m_len %u\n", m, - plen, off, frent->fr_ip6f_hlen, fr_max, m->m_len)); + DPFPRINTF(("0x%llx IPv6 frag plen %u off %u fr_ip6f_hlen %u " + "fr_max %u m_len %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, off, + frent->fr_ip6f_hlen, fr_max, m->m_len)); /* strip off headers up to the fragment payload */ m->m_data += frent->fr_ip6f_hlen; @@ -1185,8 +1191,9 @@ pf_reassemble6(struct mbuf **m0, struct pf_fragment **frag, m->m_pkthdr.len = pktlen; } - DPFPRINTF(("complete: %p ip6_plen %d m_pkthdr.len %d\n", - m, ntohs(ip6->ip6_plen), m->m_pkthdr.len)); + DPFPRINTF(("complete: 0x%llx ip6_plen %d m_pkthdr.len %d\n", + (uint64_t)VM_KERNEL_ADDRPERM(m), ntohs(ip6->ip6_plen), + m->m_pkthdr.len)); return m; @@ -1231,8 +1238,8 @@ pf_frag6cache(struct mbuf **m0, struct ip6_hdr *h, struct ip6_frag *fh, */ fr_max = off + plen; - DPFPRINTF(("%p plen %u off %u fr_max %u\n", m, - plen, off, fr_max)); + DPFPRINTF(("0x%llx plen %u off %u fr_max %u\n", + (uint64_t)VM_KERNEL_ADDRPERM(m), plen, off, fr_max)); /* Create a new range queue for this packet */ if (*frag == NULL) { @@ -1650,9 +1657,11 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, if (m == NULL) return (PF_DROP); + VERIFY(m->m_flags & M_PKTHDR); + /* use mtag from concatenated mbuf chain */ pd->pf_mtag = pf_find_mtag(m); -#ifdef DIAGNOSTIC +#if DIAGNOSTIC if (pd->pf_mtag == NULL) { printf("%s: pf_find_mtag returned NULL(1)\n", __func__); if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) { @@ -1697,9 +1706,11 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, goto drop; } + VERIFY(m->m_flags & M_PKTHDR); + /* use mtag from copied and trimmed mbuf chain */ pd->pf_mtag = pf_find_mtag(m); -#ifdef DIAGNOSTIC +#if DIAGNOSTIC if (pd->pf_mtag == NULL) { printf("%s: pf_find_mtag returned NULL(2)\n", __func__); if ((pd->pf_mtag = pf_get_mtag(m)) == NULL) { @@ -1733,14 +1744,12 @@ no_fragment: h->ip_ttl = r->min_ttl; h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); } -#if RANDOM_IP_ID if (r->rule_flag & PFRULE_RANDOMID) { - u_int16_t ip_id = h->ip_id; + u_int16_t oip_id = h->ip_id; h->ip_id = ip_randomid(); - h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0); + h->ip_sum = pf_cksum_fixup(h->ip_sum, oip_id, h->ip_id, 0); } -#endif /* RANDOM_IP_ID */ if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) pd->flags |= PFDESC_IP_REAS; @@ -1980,8 +1989,9 @@ fragment: goto badfrag; fr_max = fragoff + plen - (off - sizeof(struct ip6_hdr)); - DPFPRINTF(("%p IPv6 frag plen %u mff %d off %u fragoff %u fr_max %u\n", m, - plen, mff, off, fragoff, fr_max)); + DPFPRINTF(("0x%llx IPv6 frag plen %u mff %d off %u fragoff %u " + "fr_max %u\n", (uint64_t)VM_KERNEL_ADDRPERM(m), plen, mff, off, + fragoff, fr_max)); if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) { /* Fully buffer all of the fragments */ diff --git a/bsd/net/pfkeyv2.h b/bsd/net/pfkeyv2.h index 3f8089c8f..1de6752ef 100644 --- a/bsd/net/pfkeyv2.h +++ b/bsd/net/pfkeyv2.h @@ -65,6 +65,7 @@ #ifndef _NET_PFKEYV2_H_ #define _NET_PFKEYV2_H_ #include +#include /* This file defines structures and symbols for the PF_KEY Version 2 @@ -103,7 +104,9 @@ you leave this credit intact on any copies of this file. #define SADB_X_SPDEXPIRE 21 #define SADB_X_SPDDELETE2 22 /* by policy id */ #define SADB_GETSASTAT 23 -#define SADB_MAX 23 +#define SADB_X_SPDENABLE 24 /* by policy id */ +#define SADB_X_SPDDISABLE 25 /* by policy id */ +#define SADB_MAX 25 struct sadb_msg { u_int8_t sadb_msg_version; @@ -248,7 +251,12 @@ struct sadb_x_sa2 { u_int16_t sadb_x_sa2_len; u_int16_t sadb_x_sa2_exttype; u_int8_t sadb_x_sa2_mode; - u_int8_t sadb_x_sa2_reserved1; + union { + u_int8_t sadb_x_sa2_reserved1; +#ifdef PRIVATE + u_int8_t sadb_x_sa2_alwaysexpire; +#endif + }; u_int16_t sadb_x_sa2_reserved2; u_int32_t sadb_x_sa2_sequence; u_int32_t sadb_x_sa2_reqid; @@ -271,7 +279,22 @@ struct sadb_x_policy { * [total length of ipsec policy requests] * = (sadb_x_policy_len * sizeof(uint64_t) - sizeof(struct sadb_x_policy)) */ - +#ifdef PRIVATE +/* IPSec Interface Extension: + * IPSec interface can be specified alone, or all three + * of internal, outgoing, and IPSec interfaces must be + * specified. + */ +struct sadb_x_ipsecif { + u_int16_t sadb_x_ipsecif_len; + u_int16_t sadb_x_ipsecif_exttype; + char sadb_x_ipsecif_internal_if[IFXNAMSIZ]; /* Steal packets from this interface */ + char sadb_x_ipsecif_outgoing_if[IFXNAMSIZ]; /* Send packets out on this interface */ + char sadb_x_ipsecif_ipsec_if[IFXNAMSIZ]; /* Direct packets through ipsec interface */ + u_int16_t sadb_x_ipsecif_init_disabled; /* 0 or 1, flag to ignore policy */ + u_int16_t reserved; +}; +#endif /* XXX IPsec Policy Request Extension */ /* * This structure is aligned 8 bytes. @@ -340,7 +363,12 @@ struct sadb_sastat { #define SADB_X_EXT_SA2 19 #define SADB_EXT_SESSION_ID 20 #define SADB_EXT_SASTAT 21 -#define SADB_EXT_MAX 21 +#define SADB_X_EXT_IPSECIF 22 +#define SADB_X_EXT_ADDR_RANGE_SRC_START 23 +#define SADB_X_EXT_ADDR_RANGE_SRC_END 24 +#define SADB_X_EXT_ADDR_RANGE_DST_START 25 +#define SADB_X_EXT_ADDR_RANGE_DST_END 26 +#define SADB_EXT_MAX 26 #define SADB_SATYPE_UNSPEC 0 #define SADB_SATYPE_AH 2 diff --git a/bsd/net/pfvar.h b/bsd/net/pfvar.h index a1f77f8a0..171e1c750 100644 --- a/bsd/net/pfvar.h +++ b/bsd/net/pfvar.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2012 Apple Inc. All rights reserved. + * Copyright (c) 2007-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -99,8 +99,8 @@ extern "C" { #define be64toh(x) htobe64(x) -__private_extern__ lck_rw_t *pf_perim_lock; -__private_extern__ lck_mtx_t *pf_lock; +extern lck_rw_t *pf_perim_lock; +extern lck_mtx_t *pf_lock; struct pool { struct zone *pool_zone; /* pointer to backend zone */ @@ -694,7 +694,7 @@ struct pf_rule { u_int64_t packets[2]; u_int64_t bytes[2]; - u_int32_t ticket; + u_int64_t ticket; #define PF_OWNER_NAME_SIZE 64 char owner[PF_OWNER_NAME_SIZE]; u_int32_t priority; @@ -994,6 +994,7 @@ struct pf_state_key { u_int8_t direction; u_int8_t proto_variant; struct pf_app_state *app_state; + u_int32_t flowsrc; u_int32_t flowhash; RB_ENTRY(pf_state_key) entry_lan_ext; @@ -1346,8 +1347,8 @@ RB_PROTOTYPE_SC(__private_extern__, pf_state_tree_ext_gwy, pf_state_key, RB_HEAD(pfi_ifhead, pfi_kif); /* state tables */ -__private_extern__ struct pf_state_tree_lan_ext pf_statetbl_lan_ext; -__private_extern__ struct pf_state_tree_ext_gwy pf_statetbl_ext_gwy; +extern struct pf_state_tree_lan_ext pf_statetbl_lan_ext; +extern struct pf_state_tree_ext_gwy pf_statetbl_ext_gwy; /* keep synced with pfi_kif, used in RB_FIND */ struct pfi_kif_cmp { @@ -1428,13 +1429,14 @@ struct pf_pdesc { /* state code. Easier than tags */ #define PFDESC_TCP_NORM 0x0001 /* TCP shall be statefully scrubbed */ #define PFDESC_IP_REAS 0x0002 /* IP frags would've been reassembled */ -#define PFDESC_FLOW_ADV 0x0004 /* sender can use flow advisory */ -#define PFDESC_IP_FRAG 0x0008 /* This is a fragment */ +#define PFDESC_IP_FRAG 0x0004 /* This is a fragment */ sa_family_t af; u_int8_t proto; u_int8_t tos; u_int8_t proto_variant; - mbuf_svc_class_t sc; + mbuf_svc_class_t sc; /* mbuf service class (MBUF_SVC) */ + u_int32_t pktflags; /* mbuf packet flags (PKTF) */ + u_int32_t flowsrc; /* flow source (FLOWSRC) */ u_int32_t flowhash; /* flow hash to identify the sender */ }; #endif /* KERNEL */ @@ -2135,29 +2137,29 @@ struct pf_ifspeed { RB_HEAD(pf_src_tree, pf_src_node); RB_PROTOTYPE_SC(__private_extern__, pf_src_tree, pf_src_node, entry, pf_src_compare); -__private_extern__ struct pf_src_tree tree_src_tracking; +extern struct pf_src_tree tree_src_tracking; RB_HEAD(pf_state_tree_id, pf_state); RB_PROTOTYPE_SC(__private_extern__, pf_state_tree_id, pf_state, entry_id, pf_state_compare_id); -__private_extern__ struct pf_state_tree_id tree_id; -__private_extern__ struct pf_state_queue state_list; +extern struct pf_state_tree_id tree_id; +extern struct pf_state_queue state_list; TAILQ_HEAD(pf_poolqueue, pf_pool); -__private_extern__ struct pf_poolqueue pf_pools[2]; -__private_extern__ struct pf_palist pf_pabuf; -__private_extern__ u_int32_t ticket_pabuf; +extern struct pf_poolqueue pf_pools[2]; +extern struct pf_palist pf_pabuf; +extern u_int32_t ticket_pabuf; #if PF_ALTQ TAILQ_HEAD(pf_altqqueue, pf_altq); -__private_extern__ struct pf_altqqueue pf_altqs[2]; -__private_extern__ u_int32_t ticket_altqs_active; -__private_extern__ u_int32_t ticket_altqs_inactive; -__private_extern__ int altqs_inactive_open; -__private_extern__ struct pf_altqqueue *pf_altqs_active; -__private_extern__ struct pf_altqqueue *pf_altqs_inactive; +extern struct pf_altqqueue pf_altqs[2]; +extern u_int32_t ticket_altqs_active; +extern u_int32_t ticket_altqs_inactive; +extern int altqs_inactive_open; +extern struct pf_altqqueue *pf_altqs_active; +extern struct pf_altqqueue *pf_altqs_inactive; #endif /* PF_ALTQ */ -__private_extern__ struct pf_poolqueue *pf_pools_active; -__private_extern__ struct pf_poolqueue *pf_pools_inactive; +extern struct pf_poolqueue *pf_pools_active; +extern struct pf_poolqueue *pf_pools_inactive; __private_extern__ int pf_tbladdr_setup(struct pf_ruleset *, struct pf_addr_wrap *); @@ -2166,15 +2168,15 @@ __private_extern__ void pf_tbladdr_copyout(struct pf_addr_wrap *); __private_extern__ void pf_calc_skip_steps(struct pf_rulequeue *); __private_extern__ u_int32_t pf_calc_state_key_flowhash(struct pf_state_key *); -__private_extern__ struct pool pf_src_tree_pl, pf_rule_pl; -__private_extern__ struct pool pf_state_pl, pf_state_key_pl, pf_pooladdr_pl; -__private_extern__ struct pool pf_state_scrub_pl; +extern struct pool pf_src_tree_pl, pf_rule_pl; +extern struct pool pf_state_pl, pf_state_key_pl, pf_pooladdr_pl; +extern struct pool pf_state_scrub_pl; #if PF_ALTQ -__private_extern__ struct pool pf_altq_pl; +extern struct pool pf_altq_pl; #endif /* PF_ALTQ */ -__private_extern__ struct pool pf_app_state_pl; +extern struct pool pf_app_state_pl; -__private_extern__ struct thread *pf_purge_thread; +extern struct thread *pf_purge_thread; __private_extern__ void pfinit(void); __private_extern__ void pf_purge_thread_fn(void *, wait_result_t); @@ -2194,8 +2196,8 @@ __private_extern__ void pf_print_flags(u_int8_t); __private_extern__ u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, u_int8_t); -__private_extern__ struct ifnet *sync_ifp; -__private_extern__ struct pf_rule pf_default_rule; +extern struct ifnet *sync_ifp; +extern struct pf_rule pf_default_rule; __private_extern__ void pf_addrcpy(struct pf_addr *, struct pf_addr *, u_int8_t); __private_extern__ void pf_rm_rule(struct pf_rulequeue *, struct pf_rule *); @@ -2306,7 +2308,7 @@ __private_extern__ int pfr_ina_commit(struct pfr_table *, u_int32_t, int *, __private_extern__ int pfr_ina_define(struct pfr_table *, user_addr_t, int, int *, int *, u_int32_t, int); -__private_extern__ struct pfi_kif *pfi_all; +extern struct pfi_kif *pfi_all; __private_extern__ void pfi_initialize(void); __private_extern__ struct pfi_kif *pfi_kif_get(const char *); @@ -2339,34 +2341,34 @@ __private_extern__ u_int32_t pf_qname2qid(char *); __private_extern__ void pf_qid2qname(u_int32_t, char *); __private_extern__ void pf_qid_unref(u_int32_t); -__private_extern__ struct pf_status pf_status; -__private_extern__ struct pool pf_frent_pl, pf_frag_pl; +extern struct pf_status pf_status; +extern struct pool pf_frent_pl, pf_frag_pl; struct pf_pool_limit { void *pp; unsigned limit; }; -__private_extern__ struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX]; +extern struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX]; __private_extern__ int pf_af_hook(struct ifnet *, struct mbuf **, struct mbuf **, unsigned int, int, struct ip_fw_args *); -__private_extern__ int pf_ifaddr_hook(struct ifnet *, unsigned long); +__private_extern__ int pf_ifaddr_hook(struct ifnet *); __private_extern__ void pf_ifnet_hook(struct ifnet *, int); /* * The following are defined with "private extern" storage class for * kernel, and "extern" for user-space. */ -__private_extern__ struct pf_anchor_global pf_anchors; -__private_extern__ struct pf_anchor pf_main_anchor; +extern struct pf_anchor_global pf_anchors; +extern struct pf_anchor pf_main_anchor; #define pf_main_ruleset pf_main_anchor.ruleset -__private_extern__ int pf_is_enabled; +extern int pf_is_enabled; #define PF_IS_ENABLED (pf_is_enabled != 0) -__private_extern__ u_int32_t pf_hash_seed; +extern u_int32_t pf_hash_seed; #if PF_ALTQ -__private_extern__ u_int32_t altq_allowed; +extern u_int32_t altq_allowed; #endif /* PF_ALTQ */ /* these ruleset functions can be linked into userland programs (pfctl) */ diff --git a/bsd/net/pktap.c b/bsd/net/pktap.c new file mode 100644 index 000000000..5c1be2939 --- /dev/null +++ b/bsd/net/pktap.c @@ -0,0 +1,1133 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#define _IP_VHL +#include +#include +#include +#include + +#include +#include + +#include + +#include + +#include + +#include + +extern struct inpcbinfo ripcbinfo; + +struct pktap_softc { + LIST_ENTRY(pktap_softc) pktp_link; + uint32_t pktp_unit; + uint32_t pktp_dlt_raw_count; + uint32_t pktp_dlt_pkttap_count; + struct ifnet *pktp_ifp; + struct pktap_filter pktp_filters[PKTAP_MAX_FILTERS]; +}; + +#ifndef PKTAP_DEBUG +#define PKTAP_DEBUG 1 +#endif /* PKTAP_DEBUG */ + +#define PKTAP_FILTER_OK 0 /* Packet passes filter checks */ +#define PKTAP_FILTER_SKIP 1 /* Do not tap this packet */ + +static int pktap_inited = 0; + +SYSCTL_DECL(_net_link); +SYSCTL_NODE(_net_link, IFT_PKTAP, pktap, CTLFLAG_RW|CTLFLAG_LOCKED, 0, + "pktap virtual interface"); + +static int pktap_total_tap_count = 0; +SYSCTL_INT(_net_link_pktap, OID_AUTO, total_tap_count, CTLFLAG_RD | CTLFLAG_LOCKED, + &pktap_total_tap_count, 0, ""); + +static u_int64_t pktap_count_unknown_if_type = 0; +SYSCTL_QUAD(_net_link_pktap, OID_AUTO, count_unknown_if_type, CTLFLAG_RD | CTLFLAG_LOCKED, + &pktap_count_unknown_if_type, ""); + +static int pktap_log = 0; +SYSCTL_INT(_net_link_pktap, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED, + &pktap_log, 0, ""); + +#define PKTAP_LOG(mask, fmt, ...) \ +do { \ + if ((pktap_log & mask)) \ + printf("%s:%d " fmt, __FUNCTION__, __LINE__, ##__VA_ARGS__); \ +} while(false) + +#define PKTP_LOG_FUNC 0x01 +#define PKTP_LOG_FILTER 0x02 +#define PKTP_LOG_INPUT 0x04 +#define PKTP_LOG_OUTPUT 0x08 +#define PKTP_LOG_ERROR 0x10 +#define PKTP_LOG_NOPCB 0x20 + +/* + * pktap_lck_rw protects the global list of pktap interfaces + */ +decl_lck_rw_data(static, pktap_lck_rw_data); +static lck_rw_t *pktap_lck_rw = &pktap_lck_rw_data; +static lck_grp_t *pktap_lck_grp = NULL; +static lck_attr_t *pktap_lck_attr = NULL; + +static LIST_HEAD(pktap_list, pktap_softc) pktap_list = LIST_HEAD_INITIALIZER(pktap_list); + +int pktap_clone_create(struct if_clone *, u_int32_t, void *); +int pktap_clone_destroy(struct ifnet *); + +static struct if_clone pktap_cloner = + IF_CLONE_INITIALIZER(PKTAP_IFNAME, + pktap_clone_create, + pktap_clone_destroy, + 0, + IF_MAXUNIT); + +errno_t pktap_if_output(ifnet_t, mbuf_t); +errno_t pktap_demux(ifnet_t , mbuf_t, char *, protocol_family_t *); +errno_t pktap_add_proto(ifnet_t, protocol_family_t, const struct ifnet_demux_desc *, + u_int32_t); +errno_t pktap_del_proto(ifnet_t, protocol_family_t); +errno_t pktap_getdrvspec(ifnet_t, struct ifdrv64 *); +errno_t pktap_setdrvspec(ifnet_t, struct ifdrv64 *); +errno_t pktap_ioctl(ifnet_t, unsigned long, void *); +void pktap_detach(ifnet_t); +int pktap_filter_evaluate(struct pktap_softc *, struct ifnet *); +void pktap_bpf_tap(struct ifnet *, protocol_family_t , struct mbuf *, + u_int32_t , u_int32_t , int ); +errno_t pktap_tap_callback(ifnet_t , u_int32_t , bpf_tap_mode ); + +static void +pktap_hexdump(int mask, void *addr, size_t len) +{ + unsigned char *buf = addr; + size_t i; + + if (!(pktap_log & mask)) + return; + + for (i = 0; i < len; i++) { + unsigned char h = (buf[i] & 0xf0) >> 4; + unsigned char l = buf[i] & 0x0f; + + if (i != 0) { + if (i % 32 == 0) + printf("\n"); + else if (i % 4 == 0) + printf(" "); + } + printf("%c%c", + h < 10 ? h + '0' : h - 10 + 'a', + l < 10 ? l + '0' : l - 10 + 'a'); + } + if (i % 32 != 0) + printf("\n"); + + return; +} + +__private_extern__ void +pktap_init(void) +{ + int error = 0; + lck_grp_attr_t *lck_grp_attr = NULL; + + /* Make sure we're called only once */ + VERIFY(pktap_inited == 0); + + pktap_inited = 1; + + lck_grp_attr = lck_grp_attr_alloc_init(); + pktap_lck_grp = lck_grp_alloc_init("pktap", lck_grp_attr); + pktap_lck_attr = lck_attr_alloc_init(); +#if PKTAP_DEBUG + lck_attr_setdebug(pktap_lck_attr); +#endif /* PKTAP_DEBUG */ + lck_rw_init(pktap_lck_rw, pktap_lck_grp, pktap_lck_attr); + lck_grp_attr_free(lck_grp_attr); + + LIST_INIT(&pktap_list); + + error = if_clone_attach(&pktap_cloner); + if (error != 0) + panic("%s: if_clone_attach() failed, error %d\n", __func__, error); +} + +__private_extern__ int +pktap_clone_create(struct if_clone *ifc, u_int32_t unit, __unused void *params) +{ + int error = 0; + struct pktap_softc *pktap = NULL; + struct ifnet_init_params if_init; + + PKTAP_LOG(PKTP_LOG_FUNC, "unit %u\n", unit); + + pktap = _MALLOC(sizeof(struct pktap_softc), M_DEVBUF, M_WAITOK | M_ZERO); + if (pktap == NULL) { + printf("%s: _MALLOC failed\n", __func__); + error = ENOMEM; + goto done; + } + pktap->pktp_unit = unit; + + /* + * By default accept packet from physical interfaces + */ + pktap->pktp_filters[0].filter_op = PKTAP_FILTER_OP_PASS; + pktap->pktp_filters[0].filter_param = PKTAP_FILTER_PARAM_IF_TYPE; + pktap->pktp_filters[0].filter_param_if_type = IFT_ETHER; + + pktap->pktp_filters[1].filter_op = PKTAP_FILTER_OP_PASS; + pktap->pktp_filters[1].filter_param = PKTAP_FILTER_PARAM_IF_TYPE; + pktap->pktp_filters[1].filter_param_if_type = IFT_IEEE1394; + /* + * We do not use a set_bpf_tap() function as we rather rely on the more + * accurate callback passed to bpf_attach() + */ + bzero(&if_init, sizeof(struct ifnet_init_params)); + if_init.name = ifc->ifc_name; + if_init.unit = unit; + if_init.type = IFT_PKTAP; + if_init.family = IFNET_FAMILY_LOOPBACK; + if_init.output = pktap_if_output; + if_init.demux = pktap_demux; + if_init.add_proto = pktap_add_proto; + if_init.del_proto = pktap_del_proto; + if_init.softc = pktap; + if_init.ioctl = pktap_ioctl; + if_init.detach = pktap_detach; + + error = ifnet_allocate(&if_init, &pktap->pktp_ifp); + if (error != 0) { + printf("%s: ifnet_allocate failed, error %d\n", __func__, error); + goto done; + } + + ifnet_set_flags(pktap->pktp_ifp, IFF_UP, IFF_UP); + + error = ifnet_attach(pktap->pktp_ifp, NULL); + if (error != 0) { + printf("%s: ifnet_attach failed - error %d\n", __func__, error); + ifnet_release(pktap->pktp_ifp); + goto done; + } + + /* Attach DLT_PKTAP as the default DLT */ + bpf_attach(pktap->pktp_ifp, DLT_PKTAP, sizeof(struct pktap_header), NULL, + pktap_tap_callback); + bpf_attach(pktap->pktp_ifp, DLT_RAW, 0, NULL, pktap_tap_callback); + + /* Take a reference and add to the global list */ + ifnet_reference(pktap->pktp_ifp); + lck_rw_lock_exclusive(pktap_lck_rw); + LIST_INSERT_HEAD(&pktap_list, pktap, pktp_link); + lck_rw_done(pktap_lck_rw); +done: + if (error != 0) { + if (pktap != NULL) + _FREE(pktap, M_DEVBUF); + } + return (error); +} + +__private_extern__ int +pktap_clone_destroy(struct ifnet *ifp) +{ + int error = 0; + + PKTAP_LOG(PKTP_LOG_FUNC, "%s\n", ifp->if_xname); + + (void) ifnet_detach(ifp); + + return (error); +} + +/* + * This function is called whenever a DLT is set on the interface: + * - When interface is attached to a BPF device via BIOCSETIF for the default DLT + * - Whenever a new DLT is selected via BIOCSDLT + * - When the interface is detached from a BPF device (direction is zero) + */ +__private_extern__ errno_t +pktap_tap_callback(ifnet_t ifp, u_int32_t dlt, bpf_tap_mode direction) +{ + struct pktap_softc *pktap; + + pktap = ifp->if_softc; + if (pktap == NULL) { + printf("%s: if_softc is NULL for ifp %s\n", __func__, + ifp->if_xname); + goto done; + } + switch (dlt) { + case DLT_RAW: + if (direction == 0) { + if (pktap->pktp_dlt_raw_count > 0) { + pktap->pktp_dlt_raw_count--; + OSAddAtomic(-1, &pktap_total_tap_count); + + } + } else { + pktap->pktp_dlt_raw_count++; + OSAddAtomic(1, &pktap_total_tap_count); + } + break; + case DLT_PKTAP: + if (direction == 0) { + if (pktap->pktp_dlt_pkttap_count > 0) { + pktap->pktp_dlt_pkttap_count--; + OSAddAtomic(-1, &pktap_total_tap_count); + } + } else { + pktap->pktp_dlt_pkttap_count++; + OSAddAtomic(1, &pktap_total_tap_count); + } + break; + } +done: + /* + * Attachements count must be positive and we're in trouble + * if we have more that 2**31 attachements + */ + VERIFY(pktap_total_tap_count >= 0); + + return (0); +} + +__private_extern__ errno_t +pktap_if_output(ifnet_t ifp, mbuf_t m) +{ + PKTAP_LOG(PKTP_LOG_FUNC, "%s\n", ifp->if_xname); + mbuf_freem(m); + return (ENOTSUP); +} + +__private_extern__ errno_t +pktap_demux(ifnet_t ifp, __unused mbuf_t m, __unused char *header, + __unused protocol_family_t *ppf) +{ + PKTAP_LOG(PKTP_LOG_FUNC, "%s\n", ifp->if_xname); + return (ENOTSUP); +} + +__private_extern__ errno_t +pktap_add_proto(__unused ifnet_t ifp, protocol_family_t pf, + __unused const struct ifnet_demux_desc *dmx, __unused u_int32_t cnt) +{ + PKTAP_LOG(PKTP_LOG_FUNC, "%s pf %u\n", ifp->if_xname, pf); + return (0); +} + +__private_extern__ errno_t +pktap_del_proto(__unused ifnet_t ifp, __unused protocol_family_t pf) +{ + PKTAP_LOG(PKTP_LOG_FUNC, "%s pf %u\n", ifp->if_xname, pf); + return (0); +} + +__private_extern__ errno_t +pktap_getdrvspec(ifnet_t ifp, struct ifdrv64 *ifd) +{ + errno_t error = 0; + struct pktap_softc *pktap; + int i; + + PKTAP_LOG(PKTP_LOG_FUNC, "%s\n", ifp->if_xname); + + pktap = ifp->if_softc; + if (pktap == NULL) { + error = ENOENT; + printf("%s: pktap NULL - error %d\n", __func__, error); + goto done; + } + + switch (ifd->ifd_cmd) { + case PKTP_CMD_FILTER_GET: { + struct x_pktap_filter x_filters[PKTAP_MAX_FILTERS]; + + bzero(&x_filters, sizeof(x_filters)); + + if (ifd->ifd_len < PKTAP_MAX_FILTERS * sizeof(struct x_pktap_filter)) { + printf("%s: PKTP_CMD_FILTER_GET ifd_len %llu too small - error %d\n", + __func__, ifd->ifd_len, error); + error = EINVAL; + break; + } + for (i = 0; i < PKTAP_MAX_FILTERS; i++) { + struct pktap_filter *pktap_filter = pktap->pktp_filters + i; + struct x_pktap_filter *x_filter = x_filters + i; + + x_filter->filter_op = pktap_filter->filter_op; + x_filter->filter_param = pktap_filter->filter_param; + + if (pktap_filter->filter_param == PKTAP_FILTER_PARAM_IF_TYPE) + x_filter->filter_param_if_type = pktap_filter->filter_param_if_type; + else if (pktap_filter->filter_param == PKTAP_FILTER_PARAM_IF_NAME) + strlcpy(x_filter->filter_param_if_name, + pktap_filter->filter_param_if_name, + sizeof(x_filter->filter_param_if_name)); + } + error = copyout(x_filters, ifd->ifd_data, + PKTAP_MAX_FILTERS * sizeof(struct x_pktap_filter)); + if (error) { + printf("%s: PKTP_CMD_FILTER_GET copyout - error %d\n", __func__, error); + goto done; + } + break; + } + case PKTP_CMD_TAP_COUNT: { + uint32_t tap_count = pktap->pktp_dlt_raw_count + pktap->pktp_dlt_pkttap_count; + + if (ifd->ifd_len < sizeof(tap_count)) { + printf("%s: PKTP_CMD_TAP_COUNT ifd_len %llu too small - error %d\n", + __func__, ifd->ifd_len, error); + error = EINVAL; + break; + } + error = copyout(&tap_count, ifd->ifd_data, sizeof(tap_count)); + if (error) { + printf("%s: PKTP_CMD_TAP_COUNT copyout - error %d\n", __func__, error); + goto done; + } + break; + } + default: + error = EINVAL; + break; + } + +done: + return (error); +} + +__private_extern__ errno_t +pktap_setdrvspec(ifnet_t ifp, struct ifdrv64 *ifd) +{ + errno_t error = 0; + struct pktap_softc *pktap; + + PKTAP_LOG(PKTP_LOG_FUNC, "%s\n", ifp->if_xname); + + pktap = ifp->if_softc; + if (pktap == NULL) { + error = ENOENT; + printf("%s: pktap NULL - error %d\n", __func__, error); + goto done; + } + + switch (ifd->ifd_cmd) { + case PKTP_CMD_FILTER_SET: { + struct x_pktap_filter user_filters[PKTAP_MAX_FILTERS]; + int i; + int got_op_none = 0; + + if (ifd->ifd_len != PKTAP_MAX_FILTERS * sizeof(struct x_pktap_filter)) { + printf("%s: PKTP_CMD_FILTER_SET bad ifd_len %llu - error %d\n", + __func__, ifd->ifd_len, error); + error = EINVAL; + break; + } + error = copyin(ifd->ifd_data, &user_filters, ifd->ifd_len); + if (error) { + printf("%s: copyin - error %d\n", __func__, error); + goto done; + } + /* + * Validate user provided parameters + */ + for (i = 0; i < PKTAP_MAX_FILTERS; i++) { + struct x_pktap_filter *x_filter = user_filters + i; + + switch (x_filter->filter_op) { + case PKTAP_FILTER_OP_NONE: + /* Following entries must be PKTAP_FILTER_OP_NONE */ + got_op_none = 1; + break; + case PKTAP_FILTER_OP_PASS: + case PKTAP_FILTER_OP_SKIP: + /* Invalid after PKTAP_FILTER_OP_NONE */ + if (got_op_none) { + error = EINVAL; + break; + } + break; + default: + error = EINVAL; + break; + } + if (error != 0) + break; + + switch (x_filter->filter_param) { + case PKTAP_FILTER_OP_NONE: + if (x_filter->filter_op != PKTAP_FILTER_OP_NONE) { + error = EINVAL; + break; + } + break; + + /* + * Do not allow to tap a pktap from a pktap + */ + case PKTAP_FILTER_PARAM_IF_TYPE: + if (x_filter->filter_param_if_type == IFT_PKTAP || + x_filter->filter_param_if_type > 0xff) { + error = EINVAL; + break; + } + break; + + case PKTAP_FILTER_PARAM_IF_NAME: + if (x_filter->filter_param_if_name == 0 || + strncmp(x_filter->filter_param_if_name, PKTAP_IFNAME, + strlen(PKTAP_IFNAME)) == 0) { + error = EINVAL; + break; + } + break; + + default: + error = EINVAL; + break; + } + if (error != 0) + break; + } + if (error != 0) + break; + for (i = 0; i < PKTAP_MAX_FILTERS; i++) { + struct pktap_filter *pktap_filter = pktap->pktp_filters + i; + struct x_pktap_filter *x_filter = user_filters + i; + + pktap_filter->filter_op = x_filter->filter_op; + pktap_filter->filter_param = x_filter->filter_param; + + if (pktap_filter->filter_param == PKTAP_FILTER_PARAM_IF_TYPE) + pktap_filter->filter_param_if_type = x_filter->filter_param_if_type; + else if (pktap_filter->filter_param == PKTAP_FILTER_PARAM_IF_NAME) { + size_t len; + + strlcpy(pktap_filter->filter_param_if_name, + x_filter->filter_param_if_name, + sizeof(pktap_filter->filter_param_if_name)); + /* + * If name does not end with a number then it's a "wildcard" match + * where we compare the prefix of the interface name + */ + len = strlen(pktap_filter->filter_param_if_name); + if (pktap_filter->filter_param_if_name[len] < '0' || + pktap_filter->filter_param_if_name[len] > '9') + pktap_filter->filter_ifname_prefix_len = len; + } + } + break; + } + default: + error = EINVAL; + break; + } + +done: + return (error); +} + +__private_extern__ errno_t +pktap_ioctl(ifnet_t ifp, unsigned long cmd, void *data) +{ + errno_t error = 0; + + PKTAP_LOG(PKTP_LOG_FUNC, "%s\n", ifp->if_xname); + + if ((cmd & IOC_IN)) { + error = kauth_authorize_generic(kauth_cred_get(), KAUTH_GENERIC_ISSUSER); + if (error) { + PKTAP_LOG(PKTP_LOG_ERROR, + "%s: kauth_authorize_generic(KAUTH_GENERIC_ISSUSER) - error %d\n", + __func__, error); + goto done; + } + } + + switch (cmd) { + case SIOCGDRVSPEC32: { + struct ifdrv64 ifd; + struct ifdrv32 *ifd32 = (struct ifdrv32 *)data; + + memcpy(ifd.ifd_name, ifd32->ifd_name, sizeof(ifd.ifd_name)); + ifd.ifd_cmd = ifd32->ifd_cmd; + ifd.ifd_len = ifd32->ifd_len; + ifd.ifd_data = ifd32->ifd_data; + + error = pktap_getdrvspec(ifp, &ifd); + + break; + } + case SIOCGDRVSPEC64: { + struct ifdrv64 *ifd64 = (struct ifdrv64 *)data; + + error = pktap_getdrvspec(ifp, ifd64); + + break; + } + case SIOCSDRVSPEC32: { + struct ifdrv64 ifd; + struct ifdrv32 *ifd32 = (struct ifdrv32 *)data; + + memcpy(ifd.ifd_name, ifd32->ifd_name, sizeof(ifd.ifd_name)); + ifd.ifd_cmd = ifd32->ifd_cmd; + ifd.ifd_len = ifd32->ifd_len; + ifd.ifd_data = ifd32->ifd_data; + + error = pktap_setdrvspec(ifp, &ifd); + break; + } + case SIOCSDRVSPEC64: { + struct ifdrv64 *ifd64 = (struct ifdrv64 *)data; + + error = pktap_setdrvspec(ifp, ifd64); + + break; + } + default: + error = ENOTSUP; + break; + } +done: + return (error); +} + +__private_extern__ void +pktap_detach(ifnet_t ifp) +{ + struct pktap_softc *pktap; + + PKTAP_LOG(PKTP_LOG_FUNC, "%s\n", ifp->if_xname); + + lck_rw_lock_exclusive(pktap_lck_rw); + + pktap = ifp->if_softc; + ifp->if_softc = NULL; + LIST_REMOVE(pktap, pktp_link); + + lck_rw_done(pktap_lck_rw); + + /* Drop reference as it's no more on the global list */ + ifnet_release(ifp); + + _FREE(pktap, M_DEVBUF); + + /* This is for the reference taken by ifnet_attach() */ + (void) ifnet_release(ifp); +} + +__private_extern__ int +pktap_filter_evaluate(struct pktap_softc *pktap, struct ifnet *ifp) +{ + int i; + int result = PKTAP_FILTER_SKIP; /* Need positive matching rule to pass */ + int match = 0; + + for (i = 0; i < PKTAP_MAX_FILTERS; i++) { + struct pktap_filter *pktap_filter = pktap->pktp_filters + i; + size_t len = pktap_filter->filter_ifname_prefix_len != 0 ? + pktap_filter->filter_ifname_prefix_len : PKTAP_IFXNAMESIZE; + + switch (pktap_filter->filter_op) { + case PKTAP_FILTER_OP_NONE: + match = 1; + break; + + case PKTAP_FILTER_OP_PASS: + if (pktap_filter->filter_param == PKTAP_FILTER_PARAM_IF_TYPE) { + if (pktap_filter->filter_param_if_type == 0 || + ifp->if_type == pktap_filter->filter_param_if_type) { + result = PKTAP_FILTER_OK; + match = 1; + PKTAP_LOG(PKTP_LOG_FILTER, "pass %s match type %u\n", + ifp->if_xname, pktap_filter->filter_param_if_type); + break; + } + } + if (pktap_filter->filter_param == PKTAP_FILTER_PARAM_IF_NAME) { + if (strncmp(ifp->if_xname, pktap_filter->filter_param_if_name, + len) == 0) { + result = PKTAP_FILTER_OK; + match = 1; + PKTAP_LOG(PKTP_LOG_FILTER, "pass %s match name %s\n", + ifp->if_xname, pktap_filter->filter_param_if_name); + break; + } + } + break; + + case PKTAP_FILTER_OP_SKIP: + if (pktap_filter->filter_param == PKTAP_FILTER_PARAM_IF_TYPE) { + if (pktap_filter->filter_param_if_type == 0 || + ifp->if_type == pktap_filter->filter_param_if_type) { + result = PKTAP_FILTER_SKIP; + match = 1; + PKTAP_LOG(PKTP_LOG_FILTER, "skip %s match type %u\n", + ifp->if_xname, pktap_filter->filter_param_if_type); + break; + } + } + if (pktap_filter->filter_param == PKTAP_FILTER_PARAM_IF_NAME) { + if (strncmp(ifp->if_xname, pktap_filter->filter_param_if_name, + len) == 0) { + result = PKTAP_FILTER_SKIP; + match = 1; + PKTAP_LOG(PKTP_LOG_FILTER, "skip %s match name %s\n", + ifp->if_xname, pktap_filter->filter_param_if_name); + break; + } + } + break; + } + if (match) + break; + } + + if (match == 0) { + PKTAP_LOG(PKTP_LOG_FILTER, "%s no match\n", + ifp->if_xname); + } + return (result); +} + +__private_extern__ void +pktap_fill_proc_info(struct pktap_header *hdr, protocol_family_t proto, + struct mbuf *m, u_int32_t pre, int outgoing, struct ifnet *ifp) +{ + int found = 0; + struct so_procinfo soprocinfo; + + /* + * Getting the pid and procname is expensive + * For outgoing, do the lookup only if there's an + * associated socket as indicated by the flowhash + */ + if (outgoing != 0 && (m->m_pkthdr.pkt_flags & + (PKTF_FLOW_ID|PKTF_FLOW_LOCALSRC)) == (PKTF_FLOW_ID|PKTF_FLOW_LOCALSRC) && + m->m_pkthdr.pkt_flowsrc == FLOWSRC_INPCB) { + if (m->m_pkthdr.pkt_flags & PKTF_FLOW_RAWSOCK) + found = inp_findinpcb_procinfo(&ripcbinfo, m->m_pkthdr.pkt_flowid, &soprocinfo); + else if (m->m_pkthdr.pkt_proto == IPPROTO_TCP) + found = inp_findinpcb_procinfo(&tcbinfo, m->m_pkthdr.pkt_flowid, &soprocinfo); + else if (m->m_pkthdr.pkt_proto == IPPROTO_UDP) + found = inp_findinpcb_procinfo(&udbinfo, m->m_pkthdr.pkt_flowid, &soprocinfo); + } else if (outgoing == 0) { + struct inpcb *inp = NULL; + + if (proto == PF_INET) { + struct ip ip; + errno_t error; + size_t hlen; + struct in_addr faddr, laddr; + u_short fport, lport; + struct inpcbinfo *pcbinfo = NULL; + int wildcard = 0; + + error = mbuf_copydata(m, pre, sizeof(struct ip), &ip); + if (error != 0) { + PKTAP_LOG(PKTP_LOG_ERROR, "mbuf_copydata tcp v4 failed for %s\n", + hdr->pth_ifname); + goto done; + } + hlen = IP_VHL_HL(ip.ip_vhl) << 2; + + faddr = ip.ip_src; + laddr = ip.ip_dst; + + if (ip.ip_p == IPPROTO_TCP) { + struct tcphdr th; + + error = mbuf_copydata(m, pre + hlen, + sizeof(struct tcphdr), &th); + if (error != 0) + goto done; + + fport = th.th_sport; + lport = th.th_dport; + + pcbinfo = &tcbinfo; + } else if (ip.ip_p == IPPROTO_UDP) { + struct udphdr uh; + + error = mbuf_copydata(m, pre + hlen, + sizeof(struct udphdr), &uh); + if (error != 0) { + PKTAP_LOG(PKTP_LOG_ERROR, "mbuf_copydata udp v4 failed for %s\n", + hdr->pth_ifname); + goto done; + } + fport = uh.uh_sport; + lport = uh.uh_dport; + + pcbinfo = &udbinfo; + wildcard = 1; + } + if (pcbinfo != NULL) { + inp = in_pcblookup_hash(pcbinfo, faddr, fport, + laddr, lport, wildcard, outgoing ? NULL : ifp); + + if (inp == NULL && hdr->pth_iftype != IFT_LOOP) + PKTAP_LOG(PKTP_LOG_NOPCB, "in_pcblookup_hash no pcb %s\n", + hdr->pth_ifname); + } else { + PKTAP_LOG(PKTP_LOG_NOPCB, "unknown ip_p %u on %s\n", + ip.ip_p, + hdr->pth_ifname); + pktap_hexdump(PKTP_LOG_NOPCB, &ip, sizeof(struct ip)); + } + } else if (proto == PF_INET6) { + struct ip6_hdr ip6; + errno_t error; + struct in6_addr *faddr; + struct in6_addr *laddr; + u_short fport, lport; + struct inpcbinfo *pcbinfo = NULL; + int wildcard = 0; + + error = mbuf_copydata(m, pre, sizeof(struct ip6_hdr), &ip6); + if (error != 0) + goto done; + + faddr = &ip6.ip6_src; + laddr = &ip6.ip6_dst; + + if (ip6.ip6_nxt == IPPROTO_TCP) { + struct tcphdr th; + + error = mbuf_copydata(m, pre + sizeof(struct ip6_hdr), + sizeof(struct tcphdr), &th); + if (error != 0) { + PKTAP_LOG(PKTP_LOG_ERROR, "mbuf_copydata tcp v6 failed for %s\n", + hdr->pth_ifname); + goto done; + } + + fport = th.th_sport; + lport = th.th_dport; + + pcbinfo = &tcbinfo; + } else if (ip6.ip6_nxt == IPPROTO_UDP) { + struct udphdr uh; + + error = mbuf_copydata(m, pre + sizeof(struct ip6_hdr), + sizeof(struct udphdr), &uh); + if (error != 0) { + PKTAP_LOG(PKTP_LOG_ERROR, "mbuf_copydata udp v6 failed for %s\n", + hdr->pth_ifname); + goto done; + } + + fport = uh.uh_sport; + lport = uh.uh_dport; + + pcbinfo = &udbinfo; + wildcard = 1; + } + if (pcbinfo != NULL) { + inp = in6_pcblookup_hash(pcbinfo, faddr, fport, + laddr, lport, wildcard, outgoing ? NULL : ifp); + + if (inp == NULL && hdr->pth_iftype != IFT_LOOP) + PKTAP_LOG(PKTP_LOG_NOPCB, "in6_pcblookup_hash no pcb %s\n", + hdr->pth_ifname); + } else { + PKTAP_LOG(PKTP_LOG_NOPCB, "unknown ip6.ip6_nxt %u on %s\n", + ip6.ip6_nxt, + hdr->pth_ifname); + pktap_hexdump(PKTP_LOG_NOPCB, &ip6, sizeof(struct ip6_hdr)); + } + } + if (inp != NULL) { + if (inp->inp_state != INPCB_STATE_DEAD && inp->inp_socket != NULL) { + found = 1; + inp_get_soprocinfo(inp, &soprocinfo); + } + in_pcb_checkstate(inp, WNT_RELEASE, 0); + } + } + /* + * -1 means PID not found + */ + hdr->pth_pid = -1; + hdr->pth_epid = -1; + if (found != 0) { + hdr->pth_pid = soprocinfo.spi_pid; + if (soprocinfo.spi_pid == 0) + strlcpy(hdr->pth_comm, "mach_kernel", sizeof(hdr->pth_comm)); + else + proc_name(soprocinfo.spi_pid, hdr->pth_comm, MAXCOMLEN); + + /* + * When not delegated, the effective pid is the same as the real pid + */ + if (soprocinfo.spi_epid != soprocinfo.spi_pid) { + hdr->pth_flags |= PTH_FLAG_PROC_DELEGATED; + hdr->pth_epid = soprocinfo.spi_epid; + if (soprocinfo.spi_epid == 0) + strlcpy(hdr->pth_ecomm, "mach_kernel", sizeof(hdr->pth_ecomm)); + else + proc_name(soprocinfo.spi_epid, hdr->pth_ecomm, MAXCOMLEN); + } + } +done: + return; +} + +__private_extern__ void +pktap_bpf_tap(struct ifnet *ifp, protocol_family_t proto, struct mbuf *m, + u_int32_t pre, u_int32_t post, int outgoing) +{ + struct pktap_softc *pktap; + void (*bpf_tap_func)(ifnet_t , u_int32_t , mbuf_t , void * , size_t ) = + outgoing ? bpf_tap_out : bpf_tap_in; + + lck_rw_lock_shared(pktap_lck_rw); + + /* + * No need to take the ifnet_lock as the struct ifnet field if_bpf is + * protected by the BPF subsystem + */ + LIST_FOREACH(pktap, &pktap_list, pktp_link) { + int filter_result; + + filter_result = pktap_filter_evaluate(pktap, ifp); + if (filter_result == PKTAP_FILTER_SKIP) + continue; + + if (pktap->pktp_dlt_raw_count > 0) { + /* We accept only IPv4 and IPv6 packets for the raw DLT */ + if ((proto == AF_INET ||proto == AF_INET6) && + !(m->m_pkthdr.pkt_flags & PKTF_INET_RESOLVE)) { + /* + * We can play just with the length of the first mbuf in the + * chain because bpf_tap_imp() disregard the packet length + * of the mbuf packet header. + */ + if (mbuf_setdata(m, m->m_data + pre, m->m_len - pre) == 0) { + bpf_tap_func(pktap->pktp_ifp, DLT_RAW, m, NULL, 0); + mbuf_setdata(m, m->m_data - pre, m->m_len + pre); + } + } + } + + if (pktap->pktp_dlt_pkttap_count > 0) { + struct { + struct pktap_header hdr; + u_int32_t proto; + } hdr_buffer; + struct pktap_header *hdr = &hdr_buffer.hdr; + size_t hdr_size = sizeof(struct pktap_header); + int unknown_if_type = 0; + size_t data_adjust = 0; + u_int32_t pre_adjust = 0; + + /* Verify the structure is packed */ + _CASSERT(sizeof(hdr_buffer) == sizeof(struct pktap_header) + sizeof(u_int32_t)); + + bzero(&hdr_buffer, sizeof(hdr_buffer)); + hdr->pth_length = sizeof(struct pktap_header); + hdr->pth_type_next = PTH_TYPE_PACKET; + + /* + * Set DLT of packet based on interface type + */ + switch (ifp->if_type) { + case IFT_LOOP: + case IFT_GIF: + case IFT_STF: + case IFT_CELLULAR: + /* + * Packets from pdp interfaces have no loopback + * header that contain the protocol number. + * As BPF just concatenate the header and the + * packet content in a single buffer, + * stash the protocol after the pktap header + * and adjust the size of the header accordingly + */ + hdr->pth_dlt = DLT_NULL; + if (pre == 0) { + hdr_buffer.proto = proto; + hdr_size = sizeof(hdr_buffer); + pre_adjust = sizeof(hdr_buffer.proto); + } + break; + case IFT_ETHER: + case IFT_BRIDGE: + case IFT_L2VLAN: + case IFT_IEEE8023ADLAG: + hdr->pth_dlt = DLT_EN10MB; + break; + case IFT_PPP: + hdr->pth_dlt = DLT_PPP; + break; + case IFT_IEEE1394: + hdr->pth_dlt = DLT_APPLE_IP_OVER_IEEE1394; + break; + case IFT_OTHER: + if (strncmp(ifp->if_name, "utun", strlen("utun")) == 0) { + /* + * For utun: + * - incoming packets do not have the prefix set to four + * - some packets are as small as two bytes! + */ + if (m_pktlen(m) < 4) + goto done; + if (proto != AF_INET && proto != AF_INET6) + goto done; + if (proto == AF_INET && (size_t) m_pktlen(m) - 4 < sizeof(struct ip)) + goto done; + if (proto == AF_INET6 && (size_t) m_pktlen(m) - 4 < sizeof(struct ip6_hdr)) + goto done; + /* + * Skip the protocol in the mbuf as it's in network order + */ + pre = 4; + data_adjust = 4; + hdr->pth_dlt = DLT_NULL; + hdr_buffer.proto = proto; + hdr_size = sizeof(hdr_buffer); + break; + } + default: + if (pre == 0) + hdr->pth_dlt = DLT_RAW; + else + unknown_if_type = 1; + break; + } + if (unknown_if_type) { + PKTAP_LOG(PKTP_LOG_FUNC, "unknown if_type %u for %s\n", + ifp->if_type,ifp->if_xname); + pktap_count_unknown_if_type += 1; + } else { + snprintf(hdr->pth_ifname, sizeof(hdr->pth_ifname), "%s", + ifp->if_xname); + hdr->pth_flags |= outgoing ? PTH_FLAG_DIR_OUT : PTH_FLAG_DIR_IN; + hdr->pth_protocol_family = proto; + hdr->pth_frame_pre_length = pre + pre_adjust; + hdr->pth_frame_post_length = post; + hdr->pth_iftype = ifp->if_type; + hdr->pth_ifunit = ifp->if_unit; + + pktap_fill_proc_info(hdr, proto, m, pre, outgoing, ifp); + + hdr->pth_svc = so_svc2tc(m->m_pkthdr.pkt_svc); + + if (data_adjust == 0) { + bpf_tap_func(pktap->pktp_ifp, DLT_PKTAP, m, hdr, hdr_size); + } else { + /* + * We can play just with the length of the first mbuf in the + * chain because bpf_tap_imp() disregard the packet length + * of the mbuf packet header. + */ + if (mbuf_setdata(m, m->m_data + data_adjust, m->m_len - data_adjust) == 0) { + bpf_tap_func(pktap->pktp_ifp, DLT_PKTAP, m, hdr, hdr_size); + mbuf_setdata(m, m->m_data - data_adjust, m->m_len + data_adjust); + } + } + } + } + } +done: + lck_rw_done(pktap_lck_rw); +} + +__private_extern__ void +pktap_input(struct ifnet *ifp, protocol_family_t proto, struct mbuf *m, + char *frame_header) +{ + char *hdr = (char *)mbuf_data(m); + char *start = (char *)mbuf_datastart(m); + + /* Fast path */ + if (pktap_total_tap_count == 0) + return; + + /* Make sure the frame header is fully contained in the mbuf */ + if (frame_header != NULL && frame_header >= start && frame_header <= hdr) { + size_t o_len = m->m_len; + u_int32_t pre = hdr - frame_header; + + if (mbuf_setdata(m, frame_header, o_len + pre) == 0) { + PKTAP_LOG(PKTP_LOG_INPUT, "ifp %s proto %u pre %u post %u\n", + ifp->if_xname, proto, pre, 0); + + pktap_bpf_tap(ifp, proto, m, pre, 0, 0); + mbuf_setdata(m, hdr, o_len); + } + } else { + PKTAP_LOG(PKTP_LOG_INPUT, "ifp %s proto %u pre %u post %u\n", + ifp->if_xname, proto, 0, 0); + + pktap_bpf_tap(ifp, proto, m, 0, 0, 0); + } +} + +__private_extern__ void +pktap_output(struct ifnet *ifp, protocol_family_t proto, struct mbuf *m, + u_int32_t pre, u_int32_t post) +{ + /* Fast path */ + if (pktap_total_tap_count == 0) + return; + + PKTAP_LOG(PKTP_LOG_OUTPUT, "ifp %s proto %u pre %u post %u\n", + ifp->if_xname, proto, pre, post); + + pktap_bpf_tap(ifp, proto, m, pre, post, 1); +} diff --git a/bsd/net/pktap.h b/bsd/net/pktap.h new file mode 100644 index 000000000..cecb5cb94 --- /dev/null +++ b/bsd/net/pktap.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _NET_PKTAP_H_ +#define _NET_PKTAP_H_ + +#include +#include + +#ifdef PRIVATE + +#define PKTAP_IFNAME "pktap" + +/* To store interface name + unit */ +#define PKTAP_IFXNAMESIZE (IF_NAMESIZE + 8) + +/* + * Commands via SIOCGDRVSPEC/SIOCSDRVSPEC + */ +#define PKTP_CMD_FILTER_GET 1 /* array of PKTAP_MAX_FILTERS * struct pktap_filter */ +#define PKTP_CMD_FILTER_SET 3 /* array of PKTAP_MAX_FILTERS * struct pktap_filter */ +#define PKTP_CMD_TAP_COUNT 4 /* uint32_t number of active bpf tap on the interface */ + +/* + * Filtering is currently based on network interface properties -- + * the interface type and the interface name -- and has two types of + * operations -- pass and skip. + * By default only interfaces of type IFT_ETHER and IFT_CELLULAR pass + * the filter. + * It's possible to include other interfaces by type or by name + * The interface type is evaluated before the interface name + * The first matching rule stops the evaluation. + * A rule with interface type 0 (zero) matches any interfaces + */ +#define PKTAP_FILTER_OP_NONE 0 /* For inactive entries at the end of the list */ +#define PKTAP_FILTER_OP_PASS 1 +#define PKTAP_FILTER_OP_SKIP 2 + +#define PKTAP_FILTER_PARAM_NONE 0 +#define PKTAP_FILTER_PARAM_IF_TYPE 1 +#define PKTAP_FILTER_PARAM_IF_NAME 2 + +#ifdef BSD_KERNEL_PRIVATE +struct pktap_filter { + uint32_t filter_op; + uint32_t filter_param; + union { + uint32_t _filter_if_type; + char _filter_if_name[PKTAP_IFXNAMESIZE]; + } param_; + size_t filter_ifname_prefix_len; +}; + +struct x_pktap_filter { +#else +struct pktap_filter { +#endif /* BSD_KERNEL_PRIVATE */ + uint32_t filter_op; + uint32_t filter_param; + union { + uint32_t _filter_if_type; + char _filter_if_name[PKTAP_IFXNAMESIZE]; + } param_; +}; +#define filter_param_if_type param_._filter_if_type +#define filter_param_if_name param_._filter_if_name + +#define PKTAP_MAX_FILTERS 8 + +/* + * Header for DLT_PKTAP + * + * In theory, there could be several types of blocks in a chain before the actual packet + */ +struct pktap_header { + uint32_t pth_length; /* length of this header */ + uint32_t pth_type_next; /* type of data following */ + uint32_t pth_dlt; /* DLT of packet */ + char pth_ifname[PKTAP_IFXNAMESIZE]; /* interface name */ + uint32_t pth_flags; /* flags */ + uint32_t pth_protocol_family; + uint32_t pth_frame_pre_length; + uint32_t pth_frame_post_length; + pid_t pth_pid; /* process ID */ + char pth_comm[MAXCOMLEN+1]; /* process command name */ + uint32_t pth_svc; /* service class */ + uint16_t pth_iftype; + uint16_t pth_ifunit; + pid_t pth_epid; /* effective process ID */ + char pth_ecomm[MAXCOMLEN+1]; /* effective command name */ +}; + +/* + * + */ +#define PTH_TYPE_NONE 0 /* No more data following */ +#define PTH_TYPE_PACKET 1 /* Actual captured packet data */ + +#define PTH_FLAG_DIR_IN 0x0001 /* Outgoing packet */ +#define PTH_FLAG_DIR_OUT 0x0002 /* Incoming packet */ +#define PTH_FLAG_PROC_DELEGATED 0x0004 /* Process delegated */ +#define PTH_FLAG_IF_DELEGATED 0x0008 /* Interface delegated */ + + +#ifdef BSD_KERNEL_PRIVATE +extern void pktap_init(void); +extern void pktap_input(struct ifnet *, protocol_family_t, struct mbuf *, char *); +extern void pktap_output(struct ifnet *, protocol_family_t, struct mbuf *, + u_int32_t, u_int32_t); +extern void pktap_fill_proc_info(struct pktap_header *, protocol_family_t , + struct mbuf *, u_int32_t , int , struct ifnet *); + +#endif /* BSD_KERNEL_PRIVATE */ +#endif /* PRIVATE */ + +#endif /* _NET_PKTAP_H_ */ diff --git a/bsd/net/pktsched/Makefile b/bsd/net/pktsched/Makefile index ad824436a..a5d9cba8e 100644 --- a/bsd/net/pktsched/Makefile +++ b/bsd/net/pktsched/Makefile @@ -6,18 +6,6 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_PPC = \ - -INSTINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_PPC = \ - -EXPINC_SUBDIRS_I386 = \ - DATAFILES= \ KERNELFILES= \ diff --git a/bsd/net/pktsched/pktsched_cbq.c b/bsd/net/pktsched/pktsched_cbq.c index a923f6c87..0c8663899 100644 --- a/bsd/net/pktsched/pktsched_cbq.c +++ b/bsd/net/pktsched/pktsched_cbq.c @@ -526,7 +526,11 @@ cbq_enqueue(cbq_state_t *cbqp, struct rm_class *cl, struct mbuf *m, } if (cl == NULL) { +#if PF_ALTQ cl = cbq_clh_to_clp(cbqp, t->pftag_qid); +#else /* !PF_ALTQ */ + cl = cbq_clh_to_clp(cbqp, 0); +#endif /* !PF_ALTQ */ if (cl == NULL) { cl = cbqp->ifnp.default_; if (cl == NULL) { diff --git a/bsd/net/pktsched/pktsched_fairq.c b/bsd/net/pktsched/pktsched_fairq.c index f5fc7356b..28d7363e8 100644 --- a/bsd/net/pktsched/pktsched_fairq.c +++ b/bsd/net/pktsched/pktsched_fairq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012 Apple Inc. All rights reserved. + * Copyright (c) 2011-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -666,7 +666,11 @@ fairq_enqueue(struct fairq_if *fif, struct fairq_class *cl, struct mbuf *m, VERIFY(cl == NULL || cl->cl_fif == fif); if (cl == NULL) { +#if PF_ALTQ cl = fairq_clh_to_clp(fif, t->pftag_qid); +#else /* !PF_ALTQ */ + cl = fairq_clh_to_clp(fif, 0); +#endif /* !PF_ALTQ */ if (cl == NULL) { cl = fif->fif_default; if (cl == NULL) { @@ -809,7 +813,7 @@ fairq_addq(struct fairq_class *cl, struct mbuf *m, struct pf_mtag *t) { struct ifclassq *ifq = cl->cl_fif->fif_ifq; fairq_bucket_t *b; - u_int32_t hash = t->pftag_flowhash; + u_int32_t hash = m->m_pkthdr.pkt_flowid; u_int32_t hindex; u_int64_t bw; @@ -901,8 +905,10 @@ fairq_addq(struct fairq_class *cl, struct mbuf *m, struct pf_mtag *t) return (CLASSQEQ_DROPPED); } +#if PF_ECN if (cl->cl_flags & FARF_CLEARDSCP) write_dsfield(m, t, 0); +#endif /* PF_ECN */ _addq(&b->queue, m); @@ -1013,8 +1019,9 @@ fairq_pollq(struct fairq_class *cl, u_int64_t cur_time, int *hit_limit) *hit_limit = 1; cl->cl_bw_current = bw; #if 0 - printf("BW %6lld relative to %6u %d queue %p\n", - bw, cl->cl_bandwidth, *hit_limit, b); + printf("BW %6lld relative to %6u %d queue 0x%llx\n", + bw, cl->cl_bandwidth, *hit_limit, + (uint64_t)VM_KERNEL_ADDRPERM(b)); #endif } return (m); diff --git a/bsd/net/pktsched/pktsched_hfsc.c b/bsd/net/pktsched/pktsched_hfsc.c index c7b405380..894cf7e28 100644 --- a/bsd/net/pktsched/pktsched_hfsc.c +++ b/bsd/net/pktsched/pktsched_hfsc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2012 Apple Inc. All rights reserved. + * Copyright (c) 2007-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -767,7 +767,11 @@ hfsc_enqueue(struct hfsc_if *hif, struct hfsc_class *cl, struct mbuf *m, VERIFY(cl == NULL || cl->cl_hif == hif); if (cl == NULL) { +#if PF_ALTQ cl = hfsc_clh_to_clp(hif, t->pftag_qid); +#else /* !PF_ALTQ */ + cl = hfsc_clh_to_clp(hif, 0); +#endif /* !PF_ALTQ */ if (cl == NULL || HFSC_IS_A_PARENT_CLASS(cl)) { cl = hif->hif_defaultclass; if (cl == NULL) { @@ -971,8 +975,10 @@ hfsc_addq(struct hfsc_class *cl, struct mbuf *m, struct pf_mtag *t) return (CLASSQEQ_DROPPED); } +#if PF_ECN if (cl->cl_flags & HFCF_CLEARDSCP) write_dsfield(m, t, 0); +#endif /* PF_ECN */ _addq(&cl->cl_q, m); @@ -1149,7 +1155,7 @@ hfsc_updateq(struct hfsc_if *hif, struct hfsc_class *cl, cqev_t ev) cl->cl_handle, cl->cl_id, ifclassq_ev2str(ev)); } - if (ev == CLASSQ_EV_LINK_SPEED) + if (ev == CLASSQ_EV_LINK_BANDWIDTH) hfsc_updateq_linkrate(hif, cl); #if CLASSQ_RIO diff --git a/bsd/net/pktsched/pktsched_priq.c b/bsd/net/pktsched/pktsched_priq.c index c3a6f5e56..23fa87fc2 100644 --- a/bsd/net/pktsched/pktsched_priq.c +++ b/bsd/net/pktsched/pktsched_priq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2012 Apple Inc. All rights reserved. + * Copyright (c) 2007-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -100,6 +100,7 @@ static void priq_updateq(struct priq_if *, struct priq_class *, cqev_t); static int priq_throttle(struct priq_if *, cqrq_throttle_t *); static int priq_resumeq(struct priq_if *, struct priq_class *); static int priq_suspendq(struct priq_if *, struct priq_class *); +static int priq_stat_sc(struct priq_if *, cqrq_stat_sc_t *); static inline struct priq_class *priq_clh_to_clp(struct priq_if *, u_int32_t); static const char *priq_style(struct priq_if *); @@ -569,7 +570,11 @@ priq_enqueue(struct priq_if *pif, struct priq_class *cl, struct mbuf *m, VERIFY(cl == NULL || cl->cl_pif == pif); if (cl == NULL) { +#if PF_ALTQ cl = priq_clh_to_clp(pif, t->pftag_qid); +#else /* !PF_ALTQ */ + cl = priq_clh_to_clp(pif, 0); +#endif /* !PF_ALTQ */ if (cl == NULL) { cl = pif->pif_default; if (cl == NULL) { @@ -728,8 +733,10 @@ priq_addq(struct priq_class *cl, struct mbuf *m, struct pf_mtag *t) return (CLASSQEQ_DROPPED); } +#if PF_ECN if (cl->cl_flags & PRCF_CLEARDSCP) write_dsfield(m, t, 0); +#endif /* PF_ECN */ _addq(&cl->cl_q, m); @@ -900,6 +907,27 @@ priq_get_class_stats(struct priq_if *pif, u_int32_t qid, return (0); } +static int +priq_stat_sc(struct priq_if *pif, cqrq_stat_sc_t *sr) +{ + struct ifclassq *ifq = pif->pif_ifq; + struct priq_class *cl; + u_int32_t i; + + IFCQ_LOCK_ASSERT_HELD(ifq); + + VERIFY(sr->sc == MBUF_SC_UNSPEC || MBUF_VALID_SC(sr->sc)); + + i = MBUF_SCIDX(sr->sc); + VERIFY(i < IFCQ_SC_MAX); + + cl = ifq->ifcq_disc_slots[i].cl; + sr->packets = qlen(&cl->cl_q); + sr->bytes = qsize(&cl->cl_q); + + return (0); +} + /* convert a class handle to the corresponding class pointer */ static inline struct priq_class * priq_clh_to_clp(struct priq_if *pif, u_int32_t chandle) @@ -989,6 +1017,10 @@ priq_request_ifclassq(struct ifclassq *ifq, cqrq_t req, void *arg) case CLASSQRQ_THROTTLE: err = priq_throttle(pif, (cqrq_throttle_t *)arg); break; + + case CLASSQRQ_STAT_SC: + err = priq_stat_sc(pif, (cqrq_stat_sc_t *)arg); + break; } return (err); } @@ -1152,7 +1184,7 @@ priq_throttle(struct priq_if *pif, cqrq_throttle_t *tr) { struct ifclassq *ifq = pif->pif_ifq; struct priq_class *cl; - int err; + int err = 0; IFCQ_LOCK_ASSERT_HELD(ifq); VERIFY(!(pif->pif_flags & PRIQIFF_ALTQ)); diff --git a/bsd/net/pktsched/pktsched_qfq.c b/bsd/net/pktsched/pktsched_qfq.c index d7cca36da..cc696f730 100644 --- a/bsd/net/pktsched/pktsched_qfq.c +++ b/bsd/net/pktsched/pktsched_qfq.c @@ -99,6 +99,7 @@ static void qfq_updateq(struct qfq_if *, struct qfq_class *, cqev_t); static int qfq_throttle(struct qfq_if *, cqrq_throttle_t *); static int qfq_resumeq(struct qfq_if *, struct qfq_class *); static int qfq_suspendq(struct qfq_if *, struct qfq_class *); +static int qfq_stat_sc(struct qfq_if *, cqrq_stat_sc_t *); static inline struct qfq_class *qfq_clh_to_clp(struct qfq_if *, u_int32_t); static const char *qfq_style(struct qfq_if *); @@ -981,9 +982,10 @@ qfq_dequeue(struct qfq_if *qif, cqdq_op_t op) qif->qif_V += (u_int64_t)len * QFQ_IWSUM; if (pktsched_verbose > 2) { - log(LOG_DEBUG, "%s: %s qid=%d dequeue m=%p F=0x%llx V=0x%llx", - if_name(QFQIF_IFP(qif)), qfq_style(qif), cl->cl_handle, - m, cl->cl_F, qif->qif_V); + log(LOG_DEBUG, "%s: %s qid=%d dequeue m=0x%llx F=0x%llx " + "V=0x%llx", if_name(QFQIF_IFP(qif)), qfq_style(qif), + cl->cl_handle, (uint64_t)VM_KERNEL_ADDRPERM(m), cl->cl_F, + qif->qif_V); } if (qfq_update_class(qif, grp, cl)) { @@ -1075,7 +1077,11 @@ qfq_enqueue(struct qfq_if *qif, struct qfq_class *cl, struct mbuf *m, VERIFY(cl == NULL || cl->cl_qif == qif); if (cl == NULL) { +#if PF_ALTQ cl = qfq_clh_to_clp(qif, t->pftag_qid); +#else /* !PF_ALTQ */ + cl = qfq_clh_to_clp(qif, 0); +#endif /* !PF_ALTQ */ if (cl == NULL) { cl = qif->qif_default; if (cl == NULL) { @@ -1159,9 +1165,10 @@ qfq_enqueue(struct qfq_if *qif, struct qfq_class *cl, struct mbuf *m, pktsched_bit_set(grp->qfg_index, &qif->qif_bitmaps[s]); if (pktsched_verbose > 2) { - log(LOG_DEBUG, "%s: %s qid=%d enqueue m=%p state=%s 0x%x " + log(LOG_DEBUG, "%s: %s qid=%d enqueue m=0x%llx state=%s 0x%x " "S=0x%llx F=0x%llx V=0x%llx\n", if_name(QFQIF_IFP(qif)), - qfq_style(qif), cl->cl_handle, m, qfq_state2str(s), + qfq_style(qif), cl->cl_handle, + (uint64_t)VM_KERNEL_ADDRPERM(m), qfq_state2str(s), qif->qif_bitmaps[s], cl->cl_S, cl->cl_F, qif->qif_V); } @@ -1362,8 +1369,10 @@ qfq_addq(struct qfq_class *cl, struct mbuf *m, struct pf_mtag *t) return (CLASSQEQ_DROPPED); } +#if PF_ECN if (cl->cl_flags & QFCF_CLEARDSCP) write_dsfield(m, t, 0); +#endif /* PF_ECN */ _addq(&cl->cl_q, m); @@ -1540,6 +1549,27 @@ qfq_get_class_stats(struct qfq_if *qif, u_int32_t qid, return (0); } +static int +qfq_stat_sc(struct qfq_if *qif, cqrq_stat_sc_t *sr) +{ + struct ifclassq *ifq = qif->qif_ifq; + struct qfq_class *cl; + u_int32_t i; + + IFCQ_LOCK_ASSERT_HELD(ifq); + + VERIFY(sr->sc == MBUF_SC_UNSPEC || MBUF_VALID_SC(sr->sc)); + + i = MBUF_SCIDX(sr->sc); + VERIFY(i < IFCQ_SC_MAX); + + cl = ifq->ifcq_disc_slots[i].cl; + sr->packets = qlen(&cl->cl_q); + sr->bytes = qsize(&cl->cl_q); + + return (0); +} + /* convert a class handle to the corresponding class pointer */ static inline struct qfq_class * qfq_clh_to_clp(struct qfq_if *qif, u_int32_t chandle) @@ -1654,9 +1684,11 @@ qfq_dump_groups(struct qfq_if *qif, u_int32_t mask) for (j = 0; j < qif->qif_maxslots; j++) { if (g->qfg_slots[j]) { - log(LOG_DEBUG, "%s: %s bucket %d %p " + log(LOG_DEBUG, "%s: %s bucket %d 0x%llx " "qid %d\n", if_name(QFQIF_IFP(qif)), - qfq_style(qif), j, g->qfg_slots[j], + qfq_style(qif), j, + (uint64_t)VM_KERNEL_ADDRPERM( + g->qfg_slots[j]), g->qfg_slots[j]->cl_handle); } } @@ -1749,6 +1781,9 @@ qfq_request_ifclassq(struct ifclassq *ifq, cqrq_t req, void *arg) case CLASSQRQ_THROTTLE: err = qfq_throttle(qif, (cqrq_throttle_t *)arg); break; + case CLASSQRQ_STAT_SC: + err = qfq_stat_sc(qif, (cqrq_stat_sc_t *)arg); + break; } return (err); } @@ -1912,7 +1947,7 @@ qfq_throttle(struct qfq_if *qif, cqrq_throttle_t *tr) { struct ifclassq *ifq = qif->qif_ifq; struct qfq_class *cl; - int err; + int err = 0; IFCQ_LOCK_ASSERT_HELD(ifq); VERIFY(!(qif->qif_flags & QFQIFF_ALTQ)); diff --git a/bsd/net/pktsched/pktsched_rmclass.c b/bsd/net/pktsched/pktsched_rmclass.c index a5f8e5a84..89b98e14b 100644 --- a/bsd/net/pktsched/pktsched_rmclass.c +++ b/bsd/net/pktsched/pktsched_rmclass.c @@ -1718,8 +1718,11 @@ _rmc_addq(rm_class_t *cl, struct mbuf *m, struct pf_mtag *t) } if (cl->sfb_ != NULL) return (sfb_addq(cl->sfb_, &cl->q_, m, t)); - } else if (cl->flags_ & RMCF_CLEARDSCP) + } +#if PF_ECN + else if (cl->flags_ & RMCF_CLEARDSCP) write_dsfield(m, t, 0); +#endif /* PF_ECN */ /* test for qlen > qlimit is done by caller */ _addq(&cl->q_, m); diff --git a/bsd/net/pktsched/pktsched_tcq.c b/bsd/net/pktsched/pktsched_tcq.c index fb66ee785..308199d3e 100644 --- a/bsd/net/pktsched/pktsched_tcq.c +++ b/bsd/net/pktsched/pktsched_tcq.c @@ -70,6 +70,7 @@ static void tcq_updateq(struct tcq_if *, struct tcq_class *, cqev_t); static int tcq_throttle(struct tcq_if *, cqrq_throttle_t *); static int tcq_resumeq(struct tcq_if *, struct tcq_class *); static int tcq_suspendq(struct tcq_if *, struct tcq_class *); +static int tcq_stat_sc(struct tcq_if *, cqrq_stat_sc_t *); static struct mbuf *tcq_dequeue_cl(struct tcq_if *, struct tcq_class *, mbuf_svc_class_t, cqdq_op_t); static inline struct tcq_class *tcq_clh_to_clp(struct tcq_if *, u_int32_t); @@ -536,7 +537,11 @@ tcq_enqueue(struct tcq_if *tif, struct tcq_class *cl, struct mbuf *m, VERIFY(cl == NULL || cl->cl_tif == tif); if (cl == NULL) { +#if PF_ALTQ cl = tcq_clh_to_clp(tif, t->pftag_qid); +#else /* !PF_ALTQ */ + cl = tcq_clh_to_clp(tif, 0); +#endif /* !PF_ALTQ */ if (cl == NULL) { cl = tif->tif_default; if (cl == NULL) { @@ -689,8 +694,10 @@ tcq_addq(struct tcq_class *cl, struct mbuf *m, struct pf_mtag *t) return (CLASSQEQ_DROPPED); } +#if PF_ECN if (cl->cl_flags & TQCF_CLEARDSCP) write_dsfield(m, t, 0); +#endif /* PF_ECN */ _addq(&cl->cl_q, m); @@ -856,6 +863,27 @@ tcq_get_class_stats(struct tcq_if *tif, u_int32_t qid, return (0); } +static int +tcq_stat_sc(struct tcq_if *tif, cqrq_stat_sc_t *sr) +{ + struct ifclassq *ifq = tif->tif_ifq; + struct tcq_class *cl; + u_int32_t i; + + IFCQ_LOCK_ASSERT_HELD(ifq); + + VERIFY(sr->sc == MBUF_SC_UNSPEC || MBUF_VALID_SC(sr->sc)); + + i = MBUF_SCIDX(sr->sc); + VERIFY(i < IFCQ_SC_MAX); + + cl = ifq->ifcq_disc_slots[i].cl; + sr->packets = qlen(&cl->cl_q); + sr->bytes = qsize(&cl->cl_q); + + return (0); +} + /* convert a class handle to the corresponding class pointer */ static inline struct tcq_class * tcq_clh_to_clp(struct tcq_if *tif, u_int32_t chandle) @@ -951,6 +979,10 @@ tcq_request_ifclassq(struct ifclassq *ifq, cqrq_t req, void *arg) case CLASSQRQ_THROTTLE: err = tcq_throttle(tif, (cqrq_throttle_t *)arg); break; + + case CLASSQRQ_STAT_SC: + err = tcq_stat_sc(tif, (cqrq_stat_sc_t *)arg); + break; } return (err); } @@ -1093,7 +1125,7 @@ tcq_throttle(struct tcq_if *tif, cqrq_throttle_t *tr) { struct ifclassq *ifq = tif->tif_ifq; struct tcq_class *cl; - int err; + int err = 0; IFCQ_LOCK_ASSERT_HELD(ifq); VERIFY(!(tif->tif_flags & TCQIFF_ALTQ)); diff --git a/bsd/net/ppp_deflate.c b/bsd/net/ppp_deflate.c deleted file mode 100644 index 4541def29..000000000 --- a/bsd/net/ppp_deflate.c +++ /dev/null @@ -1,717 +0,0 @@ -/* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * ppp_deflate.c - interface the zlib procedures for Deflate compression - * and decompression (as used by gzip) to the PPP code. - * This version is for use with mbufs on BSD-derived systems. - * - * Copyright (c) 1994 The Australian National University. - * All rights reserved. - * - * Permission to use, copy, modify, and distribute this software and its - * documentation is hereby granted, provided that the above copyright - * notice appears in all copies. This software is provided without any - * warranty, express or implied. The Australian National University - * makes no representations about the suitability of this software for - * any purpose. - * - * IN NO EVENT SHALL THE AUSTRALIAN NATIONAL UNIVERSITY BE LIABLE TO ANY - * PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES - * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF - * THE AUSTRALIAN NATIONAL UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY - * OF SUCH DAMAGE. - * - * THE AUSTRALIAN NATIONAL UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS - * ON AN "AS IS" BASIS, AND THE AUSTRALIAN NATIONAL UNIVERSITY HAS NO - * OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, - * OR MODIFICATIONS. - */ -/* - * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce - * support for mandatory and extensible security protections. This notice - * is included in support of clause 2.2 (b) of the Apple Public License, - * Version 2.0. - */ - -#include -#include -#include -#include -#include -#include - -#define PACKETPTR struct mbuf * -#include - -#if CONFIG_MACF_NET -#include -#endif - -#if DO_DEFLATE - -#define DEFLATE_DEBUG 1 - -/* - * State for a Deflate (de)compressor. - */ -struct deflate_state { - int seqno; - int w_size; - int unit; - int hdrlen; - int mru; - int debug; - z_stream strm; - struct compstat stats; -}; - -#define DEFLATE_OVHD 2 /* Deflate overhead/packet */ - -static void *z_alloc(void *, u_int items, u_int size); -static void z_free(void *, void *ptr); -static void *z_comp_alloc(u_char *options, int opt_len); -static void *z_decomp_alloc(u_char *options, int opt_len); -static void z_comp_free(void *state); -static void z_decomp_free(void *state); -static int z_comp_init(void *state, u_char *options, int opt_len, - int unit, int hdrlen, int debug); -static int z_decomp_init(void *state, u_char *options, int opt_len, - int unit, int hdrlen, int mru, int debug); -static int z_compress(void *state, struct mbuf **mret, - struct mbuf *mp, int slen, int maxolen); -static void z_incomp(void *state, struct mbuf *dmsg); -static int z_decompress(void *state, struct mbuf *cmp, struct mbuf **dmpp); -static void z_comp_reset(void *state); -static void z_decomp_reset(void *state); -static void z_comp_stats(void *state, struct compstat *stats); - -/* - * Procedures exported to if_ppp.c. - */ -struct compressor ppp_deflate = { - CI_DEFLATE, /* compress_proto */ - z_comp_alloc, /* comp_alloc */ - z_comp_free, /* comp_free */ - z_comp_init, /* comp_init */ - z_comp_reset, /* comp_reset */ - z_compress, /* compress */ - z_comp_stats, /* comp_stat */ - z_decomp_alloc, /* decomp_alloc */ - z_decomp_free, /* decomp_free */ - z_decomp_init, /* decomp_init */ - z_decomp_reset, /* decomp_reset */ - z_decompress, /* decompress */ - z_incomp, /* incomp */ - z_comp_stats, /* decomp_stat */ -}; - -struct compressor ppp_deflate_draft = { - CI_DEFLATE_DRAFT, /* compress_proto */ - z_comp_alloc, /* comp_alloc */ - z_comp_free, /* comp_free */ - z_comp_init, /* comp_init */ - z_comp_reset, /* comp_reset */ - z_compress, /* compress */ - z_comp_stats, /* comp_stat */ - z_decomp_alloc, /* decomp_alloc */ - z_decomp_free, /* decomp_free */ - z_decomp_init, /* decomp_init */ - z_decomp_reset, /* decomp_reset */ - z_decompress, /* decompress */ - z_incomp, /* incomp */ - z_comp_stats, /* decomp_stat */ -}; - -/* - * Space allocation and freeing routines for use by zlib routines. - */ -void * -z_alloc(notused, items, size) - void *notused; - u_int items, size; -{ - void *ptr; - - MALLOC(ptr, void *, items * size, M_DEVBUF, M_NOWAIT); - return ptr; -} - -void -z_free(notused, ptr) - void *notused; - void *ptr; -{ - FREE(ptr, M_DEVBUF); -} - -/* - * Allocate space for a compressor. - */ -static void * -z_comp_alloc(options, opt_len) - u_char *options; - int opt_len; -{ - struct deflate_state *state; - int w_size; - - if (opt_len != CILEN_DEFLATE - || (options[0] != CI_DEFLATE && options[0] != CI_DEFLATE_DRAFT) - || options[1] != CILEN_DEFLATE - || DEFLATE_METHOD(options[2]) != DEFLATE_METHOD_VAL - || options[3] != DEFLATE_CHK_SEQUENCE) - return NULL; - w_size = DEFLATE_SIZE(options[2]); - if (w_size < DEFLATE_MIN_SIZE || w_size > DEFLATE_MAX_SIZE) - return NULL; - - MALLOC(state, struct deflate_state *, sizeof(struct deflate_state), - M_DEVBUF, M_NOWAIT); - if (state == NULL) - return NULL; - - state->strm.next_in = NULL; - state->strm.zalloc = z_alloc; - state->strm.zfree = z_free; - if (deflateInit2(&state->strm, Z_DEFAULT_COMPRESSION, DEFLATE_METHOD_VAL, - -w_size, 8, Z_DEFAULT_STRATEGY) != Z_OK) { - FREE(state, M_DEVBUF); - return NULL; - } - - state->w_size = w_size; - bzero(&state->stats, sizeof(state->stats)); - return (void *) state; -} - -static void -z_comp_free(arg) - void *arg; -{ - struct deflate_state *state = (struct deflate_state *) arg; - - deflateEnd(&state->strm); - FREE(state, M_DEVBUF); -} - -static int -z_comp_init(arg, options, opt_len, unit, hdrlen, debug) - void *arg; - u_char *options; - int opt_len, unit, hdrlen, debug; -{ - struct deflate_state *state = (struct deflate_state *) arg; - - if (opt_len < CILEN_DEFLATE - || (options[0] != CI_DEFLATE && options[0] != CI_DEFLATE_DRAFT) - || options[1] != CILEN_DEFLATE - || DEFLATE_METHOD(options[2]) != DEFLATE_METHOD_VAL - || DEFLATE_SIZE(options[2]) != state->w_size - || options[3] != DEFLATE_CHK_SEQUENCE) - return 0; - - state->seqno = 0; - state->unit = unit; - state->hdrlen = hdrlen; - state->debug = debug; - - deflateReset(&state->strm); - - return 1; -} - -static void -z_comp_reset(arg) - void *arg; -{ - struct deflate_state *state = (struct deflate_state *) arg; - - state->seqno = 0; - deflateReset(&state->strm); -} - -int -z_compress(arg, mret, mp, orig_len, maxolen) - void *arg; - struct mbuf **mret; /* compressed packet (out) */ - struct mbuf *mp; /* uncompressed packet (in) */ - int orig_len, maxolen; -{ - struct deflate_state *state = (struct deflate_state *) arg; - u_char *rptr, *wptr; - int proto, olen, wspace, r, flush; - struct mbuf *m; - - /* - * Check that the protocol is in the range we handle. - */ - rptr = mtod(mp, u_char *); - proto = PPP_PROTOCOL(rptr); - if (proto > 0x3fff || proto == 0xfd || proto == 0xfb) { - *mret = NULL; - return orig_len; - } - - /* Allocate one mbuf initially. */ - if (maxolen > orig_len) - maxolen = orig_len; - MGET(m, M_DONTWAIT, MT_DATA); - *mret = m; - if (m != NULL) { - m->m_len = 0; - if (maxolen + state->hdrlen > MLEN) - MCLGET(m, M_DONTWAIT); - wspace = M_TRAILINGSPACE(m); - if (state->hdrlen + PPP_HDRLEN + 2 < wspace) { - m->m_data += state->hdrlen; - wspace -= state->hdrlen; - } - wptr = mtod(m, u_char *); - - /* - * Copy over the PPP header and store the 2-byte sequence number. - */ - wptr[0] = PPP_ADDRESS(rptr); - wptr[1] = PPP_CONTROL(rptr); - wptr[2] = PPP_COMP >> 8; - wptr[3] = PPP_COMP; - wptr += PPP_HDRLEN; - wptr[0] = state->seqno >> 8; - wptr[1] = state->seqno; - wptr += 2; - state->strm.next_out = wptr; - state->strm.avail_out = wspace - (PPP_HDRLEN + 2); - } else { - state->strm.next_out = NULL; - state->strm.avail_out = 1000000; - wptr = NULL; - wspace = 0; - } - ++state->seqno; - - rptr += (proto > 0xff)? 2: 3; /* skip 1st proto byte if 0 */ - state->strm.next_in = rptr; - state->strm.avail_in = mtod(mp, u_char *) + mp->m_len - rptr; - mp = mp->m_next; - flush = (mp == NULL)? Z_PACKET_FLUSH: Z_NO_FLUSH; - olen = 0; - for (;;) { - r = deflate(&state->strm, flush); - if (r != Z_OK) { - printf("z_compress: deflate returned %d (%s)\n", - r, (state->strm.msg? state->strm.msg: "")); - break; - } - if (flush != Z_NO_FLUSH && state->strm.avail_out != 0) - break; /* all done */ - if (state->strm.avail_in == 0 && mp != NULL) { - state->strm.next_in = mtod(mp, u_char *); - state->strm.avail_in = mp->m_len; - mp = mp->m_next; - if (mp == NULL) - flush = Z_PACKET_FLUSH; - } - if (state->strm.avail_out == 0) { - if (m != NULL) { - m->m_len = wspace; - olen += wspace; - MGET(m->m_next, M_DONTWAIT, MT_DATA); - m = m->m_next; - if (m != NULL) { - m->m_len = 0; - if (maxolen - olen > MLEN) - MCLGET(m, M_DONTWAIT); - state->strm.next_out = mtod(m, u_char *); - state->strm.avail_out = wspace = M_TRAILINGSPACE(m); - } - } - if (m == NULL) { - state->strm.next_out = NULL; - state->strm.avail_out = 1000000; - } - } - } - if (m != NULL) - olen += (m->m_len = wspace - state->strm.avail_out); - - /* - * See if we managed to reduce the size of the packet. - */ - if (m != NULL && olen < orig_len) { - state->stats.comp_bytes += olen; - state->stats.comp_packets++; - } else { - if (*mret != NULL) { - m_freem(*mret); - *mret = NULL; - } - state->stats.inc_bytes += orig_len; - state->stats.inc_packets++; - olen = orig_len; - } - state->stats.unc_bytes += orig_len; - state->stats.unc_packets++; - - return olen; -} - -static void -z_comp_stats(arg, stats) - void *arg; - struct compstat *stats; -{ - struct deflate_state *state = (struct deflate_state *) arg; - u_int out; - - *stats = state->stats; - stats->ratio = stats->unc_bytes; - out = stats->comp_bytes + stats->inc_bytes; - if (stats->ratio <= 0x7ffffff) - stats->ratio <<= 8; - else - out >>= 8; - if (out != 0) - stats->ratio /= out; -} - -/* - * Allocate space for a decompressor. - */ -static void * -z_decomp_alloc(options, opt_len) - u_char *options; - int opt_len; -{ - struct deflate_state *state; - int w_size; - - if (opt_len != CILEN_DEFLATE - || (options[0] != CI_DEFLATE && options[0] != CI_DEFLATE_DRAFT) - || options[1] != CILEN_DEFLATE - || DEFLATE_METHOD(options[2]) != DEFLATE_METHOD_VAL - || options[3] != DEFLATE_CHK_SEQUENCE) - return NULL; - w_size = DEFLATE_SIZE(options[2]); - if (w_size < DEFLATE_MIN_SIZE || w_size > DEFLATE_MAX_SIZE) - return NULL; - - MALLOC(state, struct deflate_state *, sizeof(struct deflate_state), - M_DEVBUF, M_NOWAIT); - if (state == NULL) - return NULL; - - state->strm.next_out = NULL; - state->strm.zalloc = z_alloc; - state->strm.zfree = z_free; - if (inflateInit2(&state->strm, -w_size) != Z_OK) { - FREE(state, M_DEVBUF); - return NULL; - } - - state->w_size = w_size; - bzero(&state->stats, sizeof(state->stats)); - return (void *) state; -} - -static void -z_decomp_free(arg) - void *arg; -{ - struct deflate_state *state = (struct deflate_state *) arg; - - inflateEnd(&state->strm); - FREE(state, M_DEVBUF); -} - -static int -z_decomp_init(arg, options, opt_len, unit, hdrlen, mru, debug) - void *arg; - u_char *options; - int opt_len, unit, hdrlen, mru, debug; -{ - struct deflate_state *state = (struct deflate_state *) arg; - - if (opt_len < CILEN_DEFLATE - || (options[0] != CI_DEFLATE && options[0] != CI_DEFLATE_DRAFT) - || options[1] != CILEN_DEFLATE - || DEFLATE_METHOD(options[2]) != DEFLATE_METHOD_VAL - || DEFLATE_SIZE(options[2]) != state->w_size - || options[3] != DEFLATE_CHK_SEQUENCE) - return 0; - - state->seqno = 0; - state->unit = unit; - state->hdrlen = hdrlen; - state->debug = debug; - state->mru = mru; - - inflateReset(&state->strm); - - return 1; -} - -static void -z_decomp_reset(arg) - void *arg; -{ - struct deflate_state *state = (struct deflate_state *) arg; - - state->seqno = 0; - inflateReset(&state->strm); -} - -/* - * Decompress a Deflate-compressed packet. - * - * Because of patent problems, we return DECOMP_ERROR for errors - * found by inspecting the input data and for system problems, but - * DECOMP_FATALERROR for any errors which could possibly be said to - * be being detected "after" decompression. For DECOMP_ERROR, - * we can issue a CCP reset-request; for DECOMP_FATALERROR, we may be - * infringing a patent of Motorola's if we do, so we take CCP down - * instead. - * - * Given that the frame has the correct sequence number and a good FCS, - * errors such as invalid codes in the input most likely indicate a - * bug, so we return DECOMP_FATALERROR for them in order to turn off - * compression, even though they are detected by inspecting the input. - */ -int -z_decompress(arg, mi, mop) - void *arg; - struct mbuf *mi, **mop; -{ - struct deflate_state *state = (struct deflate_state *) arg; - struct mbuf *mo, *mo_head; - u_char *rptr, *wptr; - int rlen, olen, ospace; - int seq, i, flush, r, decode_proto; - u_char hdr[PPP_HDRLEN + DEFLATE_OVHD]; - - *mop = NULL; - rptr = mtod(mi, u_char *); - rlen = mi->m_len; - for (i = 0; i < PPP_HDRLEN + DEFLATE_OVHD; ++i) { - while (rlen <= 0) { - mi = mi->m_next; - if (mi == NULL) - return DECOMP_ERROR; - rptr = mtod(mi, u_char *); - rlen = mi->m_len; - } - hdr[i] = *rptr++; - --rlen; - } - - /* Check the sequence number. */ - seq = (hdr[PPP_HDRLEN] << 8) + hdr[PPP_HDRLEN+1]; - if (seq != state->seqno) { - if (state->debug) - printf("z_decompress%d: bad seq # %d, expected %d\n", - state->unit, seq, state->seqno); - return DECOMP_ERROR; - } - ++state->seqno; - - /* Allocate an output mbuf. */ - MGETHDR(mo, M_DONTWAIT, MT_DATA); - if (mo == NULL) - return DECOMP_ERROR; - mo_head = mo; - mo->m_len = 0; - mo->m_next = NULL; - MCLGET(mo, M_DONTWAIT); - ospace = M_TRAILINGSPACE(mo); - if (state->hdrlen + PPP_HDRLEN < ospace) { - mo->m_data += state->hdrlen; - ospace -= state->hdrlen; - } -#if CONFIG_MACF_NET - mac_mbuf_label_copy(mi, mo); -#endif - - /* - * Fill in the first part of the PPP header. The protocol field - * comes from the decompressed data. - */ - wptr = mtod(mo, u_char *); - wptr[0] = PPP_ADDRESS(hdr); - wptr[1] = PPP_CONTROL(hdr); - wptr[2] = 0; - - /* - * Set up to call inflate. We set avail_out to 1 initially so we can - * look at the first byte of the output and decide whether we have - * a 1-byte or 2-byte protocol field. - */ - state->strm.next_in = rptr; - state->strm.avail_in = rlen; - mi = mi->m_next; - flush = (mi == NULL)? Z_PACKET_FLUSH: Z_NO_FLUSH; - rlen += PPP_HDRLEN + DEFLATE_OVHD; - state->strm.next_out = wptr + 3; - state->strm.avail_out = 1; - decode_proto = 1; - olen = PPP_HDRLEN; - - /* - * Call inflate, supplying more input or output as needed. - */ - for (;;) { - r = inflate(&state->strm, flush); - if (r != Z_OK) { -#if !DEFLATE_DEBUG - if (state->debug) -#endif - printf("z_decompress%d: inflate returned %d (%s)\n", - state->unit, r, (state->strm.msg? state->strm.msg: "")); - m_freem(mo_head); - return DECOMP_FATALERROR; - } - if (flush != Z_NO_FLUSH && state->strm.avail_out != 0) - break; /* all done */ - if (state->strm.avail_in == 0 && mi != NULL) { - state->strm.next_in = mtod(mi, u_char *); - state->strm.avail_in = mi->m_len; - rlen += mi->m_len; - mi = mi->m_next; - if (mi == NULL) - flush = Z_PACKET_FLUSH; - } - if (state->strm.avail_out == 0) { - if (decode_proto) { - state->strm.avail_out = ospace - PPP_HDRLEN; - if ((wptr[3] & 1) == 0) { - /* 2-byte protocol field */ - wptr[2] = wptr[3]; - --state->strm.next_out; - ++state->strm.avail_out; - --olen; - } - decode_proto = 0; - } else { - mo->m_len = ospace; - olen += ospace; - MGET(mo->m_next, M_DONTWAIT, MT_DATA); - mo = mo->m_next; - if (mo == NULL) { - m_freem(mo_head); - return DECOMP_ERROR; - } - MCLGET(mo, M_DONTWAIT); - state->strm.next_out = mtod(mo, u_char *); - state->strm.avail_out = ospace = M_TRAILINGSPACE(mo); - } - } - } - if (decode_proto) { - m_freem(mo_head); - return DECOMP_ERROR; - } - olen += (mo->m_len = ospace - state->strm.avail_out); -#if DEFLATE_DEBUG - if (state->debug && olen > state->mru + PPP_HDRLEN) - printf("ppp_deflate%d: exceeded mru (%d > %d)\n", - state->unit, olen, state->mru + PPP_HDRLEN); -#endif - - state->stats.unc_bytes += olen; - state->stats.unc_packets++; - state->stats.comp_bytes += rlen; - state->stats.comp_packets++; - - *mop = mo_head; - return DECOMP_OK; -} - -/* - * Incompressible data has arrived - add it to the history. - */ -static void -z_incomp(arg, mi) - void *arg; - struct mbuf *mi; -{ - struct deflate_state *state = (struct deflate_state *) arg; - u_char *rptr; - int rlen, proto, r; - - /* - * Check that the protocol is one we handle. - */ - rptr = mtod(mi, u_char *); - proto = PPP_PROTOCOL(rptr); - if (proto > 0x3fff || proto == 0xfd || proto == 0xfb) - return; - - ++state->seqno; - - /* - * Iterate through the mbufs, adding the characters in them - * to the decompressor's history. For the first mbuf, we start - * at the either the 1st or 2nd byte of the protocol field, - * depending on whether the protocol value is compressible. - */ - rlen = mi->m_len; - state->strm.next_in = rptr + 3; - state->strm.avail_in = rlen - 3; - if (proto > 0xff) { - --state->strm.next_in; - ++state->strm.avail_in; - } - for (;;) { - r = inflateIncomp(&state->strm); - if (r != Z_OK) { - /* gak! */ -#if !DEFLATE_DEBUG - if (state->debug) -#endif - printf("z_incomp%d: inflateIncomp returned %d (%s)\n", - state->unit, r, (state->strm.msg? state->strm.msg: "")); - return; - } - mi = mi->m_next; - if (mi == NULL) - break; - state->strm.next_in = mtod(mi, u_char *); - state->strm.avail_in = mi->m_len; - rlen += mi->m_len; - } - - /* - * Update stats. - */ - state->stats.inc_bytes += rlen; - state->stats.inc_packets++; - state->stats.unc_bytes += rlen; - state->stats.unc_packets++; -} - -#endif /* DO_DEFLATE */ diff --git a/bsd/net/radix.c b/bsd/net/radix.c index 51c90586a..fdc7058b3 100644 --- a/bsd/net/radix.c +++ b/bsd/net/radix.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -66,14 +66,10 @@ */ #ifndef _RADIX_H_ #include -#ifdef KERNEL #include #include #define M_DONTWAIT M_NOWAIT #include -#else -#include -#endif #include #include #include @@ -898,9 +894,10 @@ on1: m = mm; } if (m) - log(LOG_ERR, - "rn_delete: Orphaned Mask %p at %p\n", - (void *)m, (void *)x); + log(LOG_ERR, "rn_delete: Orphaned Mask " + "0x%llx at 0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(m), + (uint64_t)VM_KERNEL_ADDRPERM(x)); } } /* @@ -1149,14 +1146,13 @@ void rn_init(void) { char *cp, *cplim; -#ifdef KERNEL struct domain *dom; /* lock already held when rn_init is called */ - for (dom = domains; dom; dom = dom->dom_next) + TAILQ_FOREACH(dom, &domains, dom_entry) { if (dom->dom_maxrtkey > max_keylen) max_keylen = dom->dom_maxrtkey; -#endif + } if (max_keylen == 0) { log(LOG_ERR, "rn_init: radix functions require max_keylen be set\n"); diff --git a/bsd/net/radix.h b/bsd/net/radix.h index 88ce9732f..464ef2349 100644 --- a/bsd/net/radix.h +++ b/bsd/net/radix.h @@ -195,7 +195,7 @@ struct radix_node_head { #define R_Free(p) FREE((caddr_t)p, M_RTABLE); #endif /*KERNEL*/ -void rn_init(void) __attribute__((section("__TEXT, initcode"))); +void rn_init(void); int rn_inithead(void **, int); int rn_refines(void *, void *); struct radix_node diff --git a/bsd/net/raw_cb.c b/bsd/net/raw_cb.c index 64acd6719..2aaaeeb4b 100644 --- a/bsd/net/raw_cb.c +++ b/bsd/net/raw_cb.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -106,7 +106,7 @@ raw_attach(struct socket *so, int proto) if (error) return (error); rp->rcb_socket = so; - rp->rcb_proto.sp_family = so->so_proto->pr_domain->dom_family; + rp->rcb_proto.sp_family = SOCK_DOM(so); rp->rcb_proto.sp_protocol = proto; lck_mtx_lock(raw_mtx); LIST_INSERT_HEAD(&rawcb_list, rp, list); @@ -148,12 +148,19 @@ raw_detach(struct rawcb *rp) void raw_disconnect(struct rawcb *rp) { + struct socket *so = rp->rcb_socket; + #ifdef notdef if (rp->rcb_faddr) m_freem(dtom(rp->rcb_faddr)); rp->rcb_faddr = 0; #endif - if (rp->rcb_socket->so_state & SS_NOFDREF) + /* + * A multipath subflow socket would have its SS_NOFDREF set by default, + * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB; + * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared. + */ + if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) raw_detach(rp); } diff --git a/bsd/net/raw_cb.h b/bsd/net/raw_cb.h index 90655ca98..c944ec005 100644 --- a/bsd/net/raw_cb.h +++ b/bsd/net/raw_cb.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -62,11 +62,9 @@ #ifndef _NET_RAW_CB_H_ #define _NET_RAW_CB_H_ -#include - +#ifdef BSD_KERNEL_PRIVATE #include -#ifdef PRIVATE /* * Raw protocol interface control block. Used * to tie a socket to the generic raw interface. @@ -87,20 +85,19 @@ struct rawcb { */ #define RAWSNDQ 8192 #define RAWRCVQ 8192 -#endif /* PRIVATE */ -#ifdef KERNEL_PRIVATE extern LIST_HEAD(rawcb_list_head, rawcb) rawcb_list; -int raw_attach(struct socket *, int); -void raw_ctlinput(int, struct sockaddr *, void *); -void raw_detach(struct rawcb *); -void raw_disconnect(struct rawcb *); -void raw_init(void); -void raw_input(struct mbuf *, - struct sockproto *, struct sockaddr *, struct sockaddr *); +__BEGIN_DECLS +extern int raw_attach(struct socket *, int); +extern void raw_ctlinput(int, struct sockaddr *, void *); +extern void raw_detach(struct rawcb *); +extern void raw_disconnect(struct rawcb *); +extern void raw_init(struct protosw *, struct domain *); +extern void raw_input(struct mbuf *, struct sockproto *, struct sockaddr *, + struct sockaddr *); +__END_DECLS extern struct pr_usrreqs raw_usrreqs; -#endif /* KERNEL_PRIVATE */ - -#endif +#endif /* BSD_KERNEL_PRIVATE */ +#endif /* _NET_RAW_CB_H_ */ diff --git a/bsd/net/raw_usrreq.c b/bsd/net/raw_usrreq.c index 1284ca8c7..79dcda95a 100644 --- a/bsd/net/raw_usrreq.c +++ b/bsd/net/raw_usrreq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -82,16 +82,22 @@ lck_grp_attr_t *raw_mtx_grp_attr; * Initialize raw connection block q. */ void -raw_init(void) +raw_init(struct protosw *pp, struct domain *dp) { - raw_mtx_grp_attr = lck_grp_attr_alloc_init(); +#pragma unused(pp, dp) + static int raw_initialized = 0; - raw_mtx_grp = lck_grp_alloc_init("rawcb", raw_mtx_grp_attr); + /* This is called by key_init as well, so do it only once */ + if (!raw_initialized) { + raw_initialized = 1; - raw_mtx_attr = lck_attr_alloc_init(); + raw_mtx_grp_attr = lck_grp_attr_alloc_init(); + raw_mtx_grp = lck_grp_alloc_init("rawcb", raw_mtx_grp_attr); + raw_mtx_attr = lck_attr_alloc_init(); - lck_mtx_init(raw_mtx, raw_mtx_grp, raw_mtx_attr); - LIST_INIT(&rawcb_list); + lck_mtx_init(raw_mtx, raw_mtx_grp, raw_mtx_attr); + LIST_INIT(&rawcb_list); + } } @@ -364,9 +370,16 @@ raw_usockaddr(struct socket *so, struct sockaddr **nam) } struct pr_usrreqs raw_usrreqs = { - raw_uabort, pru_accept_notsupp, raw_uattach, raw_ubind, raw_uconnect, - pru_connect2_notsupp, pru_control_notsupp, raw_udetach, - raw_udisconnect, pru_listen_notsupp, raw_upeeraddr, pru_rcvd_notsupp, - pru_rcvoob_notsupp, raw_usend, pru_sense_null, raw_ushutdown, - raw_usockaddr, sosend, soreceive, pru_sopoll_notsupp + .pru_abort = raw_uabort, + .pru_attach = raw_uattach, + .pru_bind = raw_ubind, + .pru_connect = raw_uconnect, + .pru_detach = raw_udetach, + .pru_disconnect = raw_udisconnect, + .pru_peeraddr = raw_upeeraddr, + .pru_send = raw_usend, + .pru_shutdown = raw_ushutdown, + .pru_sockaddr = raw_usockaddr, + .pru_sosend = sosend, + .pru_soreceive = soreceive, }; diff --git a/bsd/net/route.c b/bsd/net/route.c index 132768cfe..03c66249f 100644 --- a/bsd/net/route.c +++ b/bsd/net/route.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -72,9 +72,11 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -88,6 +90,7 @@ #if INET6 #include #include +#include #endif /* INET6 */ #include @@ -97,6 +100,10 @@ #include +#if CONFIG_MACF +#include +#endif + /* * Synchronization notes: * @@ -127,7 +134,7 @@ * * - Routing table lock (rnh_lock). * - * rt_parent, rt_mask, rt_llinfo_free + * rt_parent, rt_mask, rt_llinfo_free, rt_tree_genid * * - Set once during creation and never changes; no locks to read. * @@ -161,7 +168,7 @@ * single-threaded, thus exclusive. This flag will also prevent the * route from being looked up via rt_lookup(). * - * generation_id + * rt_genid * * - Assumes that 32-bit writes are atomic; no locks. * @@ -197,14 +204,12 @@ #define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0) -extern void kdp_set_gateway_mac (void *gatewaymac); +extern void kdp_set_gateway_mac(void *gatewaymac); -extern struct domain routedomain; -struct route_cb route_cb; __private_extern__ struct rtstat rtstat = { 0, 0, 0, 0, 0 }; struct radix_node_head *rt_tables[AF_MAX+1]; -decl_lck_mtx_data(,rnh_lock_data); /* global routing tables mutex */ +decl_lck_mtx_data(, rnh_lock_data); /* global routing tables mutex */ lck_mtx_t *rnh_lock = &rnh_lock_data; static lck_attr_t *rnh_lock_attr; static lck_grp_t *rnh_lock_grp; @@ -301,6 +306,8 @@ static struct sockaddr *sa_trim(struct sockaddr *, int); static struct radix_node *node_lookup(struct sockaddr *, struct sockaddr *, unsigned int); static struct radix_node *node_lookup_default(int); +static struct rtentry *rt_lookup_common(boolean_t, boolean_t, struct sockaddr *, + struct sockaddr *, struct radix_node_head *, unsigned int); static int rn_match_ifscope(struct radix_node *, void *); static struct ifaddr *ifa_ifwithroute_common_locked(int, const struct sockaddr *, const struct sockaddr *, unsigned int); @@ -308,37 +315,17 @@ static struct rtentry *rte_alloc(void); static void rte_free(struct rtentry *); static void rtfree_common(struct rtentry *, boolean_t); static void rte_if_ref(struct ifnet *, int); +static void rt_set_idleref(struct rtentry *); +static void rt_clear_idleref(struct rtentry *); +static void rt_str4(struct rtentry *, char *, uint32_t, char *, uint32_t); +#if INET6 +static void rt_str6(struct rtentry *, char *, uint32_t, char *, uint32_t); +#endif /* INET6 */ -uint32_t route_generation = 0; - -/* - * sockaddr_in with scope ID field; this is used internally to keep - * track of scoped route entries in the routing table. The fact that - * such a value is embedded in the structure is an artifact of the - * current implementation which could change in future. - */ -struct sockaddr_inifscope { - __uint8_t sin_len; - sa_family_t sin_family; - in_port_t sin_port; - struct in_addr sin_addr; - /* - * To avoid possible conflict with an overlaid sockaddr_inarp - * having sin_other set to SIN_PROXY, we use the first 4-bytes - * of sin_zero since sin_srcaddr is one of the unused fields - * in sockaddr_inarp. - */ - union { - char sin_zero[8]; - struct { - __uint32_t ifscope; - } _in_index; - } un; -#define sin_scope_id un._in_index.ifscope -}; - -#define SINIFSCOPE(sa) ((struct sockaddr_inifscope *)(size_t)(sa)) -#define SIN6IFSCOPE(sa) SIN6(sa) +uint32_t route_genid_inet = 0; +#if INET6 +uint32_t route_genid_inet6 = 0; +#endif /* INET6 */ #define ASSERT_SINIFSCOPE(sa) { \ if ((sa)->sa_family != AF_INET || \ @@ -392,12 +379,79 @@ static unsigned int primary6_ifscope = IFSCOPE_NONE; #define RN(r) ((struct radix_node *)r) #define RT_HOST(r) (RT(r)->rt_flags & RTF_HOST) -SYSCTL_DECL(_net_idle_route); +SYSCTL_DECL(_net_route); + +unsigned int rt_verbose; /* verbosity level (0 to disable) */ +SYSCTL_UINT(_net_route, OID_AUTO, verbose, CTLFLAG_RW | CTLFLAG_LOCKED, + &rt_verbose, 0, ""); + +static void +rtable_init(void **table) +{ + struct domain *dom; + + domain_proto_mtx_lock_assert_held(); + + TAILQ_FOREACH(dom, &domains, dom_entry) { + if (dom->dom_rtattach != NULL) + dom->dom_rtattach(&table[dom->dom_family], + dom->dom_rtoffset); + } +} + +/* + * Called by route_dinit(). + */ +void +route_init(void) +{ + int size; + +#if INET6 + _CASSERT(offsetof(struct route, ro_rt) == + offsetof(struct route_in6, ro_rt)); + _CASSERT(offsetof(struct route, ro_srcia) == + offsetof(struct route_in6, ro_srcia)); + _CASSERT(offsetof(struct route, ro_flags) == + offsetof(struct route_in6, ro_flags)); + _CASSERT(offsetof(struct route, ro_dst) == + offsetof(struct route_in6, ro_dst)); +#endif /* INET6 */ + + PE_parse_boot_argn("rte_debug", &rte_debug, sizeof (rte_debug)); + if (rte_debug != 0) + rte_debug |= RTD_DEBUG; + + rnh_lock_grp_attr = lck_grp_attr_alloc_init(); + rnh_lock_grp = lck_grp_alloc_init("route", rnh_lock_grp_attr); + rnh_lock_attr = lck_attr_alloc_init(); + lck_mtx_init(rnh_lock, rnh_lock_grp, rnh_lock_attr); + + rte_mtx_grp_attr = lck_grp_attr_alloc_init(); + rte_mtx_grp = lck_grp_alloc_init(RTE_NAME, rte_mtx_grp_attr); + rte_mtx_attr = lck_attr_alloc_init(); + + lck_mtx_lock(rnh_lock); + rn_init(); /* initialize all zeroes, all ones, mask table */ + lck_mtx_unlock(rnh_lock); + rtable_init((void **)rt_tables); + + if (rte_debug & RTD_DEBUG) + size = sizeof (struct rtentry_dbg); + else + size = sizeof (struct rtentry); + + rte_zone = zinit(size, RTE_ZONE_MAX * size, 0, RTE_ZONE_NAME); + if (rte_zone == NULL) { + panic("%s: failed allocating rte_zone", __func__); + /* NOTREACHED */ + } + zone_change(rte_zone, Z_EXPAND, TRUE); + zone_change(rte_zone, Z_CALLERACCT, FALSE); + zone_change(rte_zone, Z_NOENCRYPT, TRUE); -static int rt_if_idle_expire_timeout = RT_IF_IDLE_EXPIRE_TIMEOUT; -SYSCTL_INT(_net_idle_route, OID_AUTO, expire_timeout, CTLFLAG_RW|CTLFLAG_LOCKED, - &rt_if_idle_expire_timeout, 0, "Default expiration time on routes for " - "interface idle reference counting"); + TAILQ_INIT(&rttrash_head); +} /* * Given a route, determine whether or not it is the non-scoped default @@ -603,7 +657,7 @@ sa_trim(struct sockaddr *sa, int skip) if (sa->sa_len <= skip) return (sa); - for (cp = base + (sa->sa_len - skip); cp > base && cp[-1] == 0;) + for (cp = base + (sa->sa_len - skip); cp > base && cp[-1] == 0; ) cp--; sa->sa_len = (cp - base) + skip; @@ -621,19 +675,29 @@ sa_trim(struct sockaddr *sa, int skip) } /* - * Called by rtm_msg{1,2} routines to "scrub" the scope ID field away from - * the socket address structure, so that clients of the routing socket will + * Called by rtm_msg{1,2} routines to "scrub" socket address structures of + * kernel private information, so that clients of the routing socket will * not be confused by the presence of the information, or the side effect of * the increased length due to that. The source sockaddr is not modified; * instead, the scrubbing happens on the destination sockaddr storage that * is passed in by the caller. + * + * Scrubbing entails: + * - removing embedded scope identifiers from network mask and destination + * IPv4 and IPv6 socket addresses + * - optionally removing global scope interface hardware addresses from + * link-layer interface addresses when the MAC framework check fails. */ struct sockaddr * -rtm_scrub_ifscope(int type, int idx, struct sockaddr *hint, struct sockaddr *sa, - struct sockaddr_storage *ss) +rtm_scrub(int type, int idx, struct sockaddr *hint, struct sockaddr *sa, + void *buf, uint32_t buflen, kauth_cred_t *credp) { + struct sockaddr_storage *ss = (struct sockaddr_storage *)buf; struct sockaddr *ret = sa; + VERIFY(buf != NULL && buflen >= sizeof (*ss)); + bzero(buf, buflen); + switch (idx) { case RTAX_DST: /* @@ -674,7 +738,6 @@ rtm_scrub_ifscope(int type, int idx, struct sockaddr *hint, struct sockaddr *sa, offsetof(struct sockaddr_in6, sin6_addr); if (sa->sa_len > skip && sa->sa_len <= sizeof (*ss)) { - bzero(ss, sizeof (*ss)); bcopy(sa, ss, sa->sa_len); /* * Don't use {sin,sin6}_set_ifscope() as sa_family @@ -698,6 +761,25 @@ rtm_scrub_ifscope(int type, int idx, struct sockaddr *hint, struct sockaddr *sa, } break; } + case RTAX_IFP: { + if (sa->sa_family == AF_LINK && credp) { + struct sockaddr_dl *sdl = SDL(buf); + const void *bytes; + size_t size; + + /* caller should handle worst case: SOCK_MAXADDRLEN */ + VERIFY(buflen >= sa->sa_len); + + bcopy(sa, sdl, sa->sa_len); + bytes = dlil_ifaddr_bytes(sdl, &size, credp); + if (bytes != CONST_LLADDR(sdl)) { + VERIFY(sdl->sdl_alen == size); + bcopy(bytes, LLADDR(sdl), size); + } + ret = (struct sockaddr *)sdl; + } + break; + } default: break; } @@ -724,63 +806,31 @@ rn_match_ifscope(struct radix_node *rn, void *arg) (SIN6IFSCOPE(rt_key(rt))->sin6_scope_id == ma->ifscope)); } -static void -rtable_init(void **table) +/* + * Atomically increment route generation counter + */ +void +routegenid_update(void) { - struct domain *dom; - for (dom = domains; dom; dom = dom->dom_next) - if (dom->dom_rtattach) - dom->dom_rtattach(&table[dom->dom_family], - dom->dom_rtoffset); + routegenid_inet_update(); +#if INET6 + routegenid_inet6_update(); +#endif /* INET6 */ } void -route_init(void) +routegenid_inet_update(void) { - int size; - - PE_parse_boot_argn("rte_debug", &rte_debug, sizeof (rte_debug)); - if (rte_debug != 0) - rte_debug |= RTD_DEBUG; - - rnh_lock_grp_attr = lck_grp_attr_alloc_init(); - rnh_lock_grp = lck_grp_alloc_init("route", rnh_lock_grp_attr); - rnh_lock_attr = lck_attr_alloc_init(); - lck_mtx_init(rnh_lock, rnh_lock_grp, rnh_lock_attr); - - rte_mtx_grp_attr = lck_grp_attr_alloc_init(); - rte_mtx_grp = lck_grp_alloc_init(RTE_NAME, rte_mtx_grp_attr); - rte_mtx_attr = lck_attr_alloc_init(); - - lck_mtx_lock(rnh_lock); - rn_init(); /* initialize all zeroes, all ones, mask table */ - lck_mtx_unlock(rnh_lock); - rtable_init((void **)rt_tables); - - if (rte_debug & RTD_DEBUG) - size = sizeof (struct rtentry_dbg); - else - size = sizeof (struct rtentry); - - rte_zone = zinit(size, RTE_ZONE_MAX * size, 0, RTE_ZONE_NAME); - if (rte_zone == NULL) - panic("route_init: failed allocating rte_zone"); - - zone_change(rte_zone, Z_EXPAND, TRUE); - zone_change(rte_zone, Z_CALLERACCT, FALSE); - zone_change(rte_zone, Z_NOENCRYPT, TRUE); - - TAILQ_INIT(&rttrash_head); + atomic_add_32(&route_genid_inet, 1); } -/* - * Atomically increment route generation counter - */ +#if INET6 void -routegenid_update(void) +routegenid_inet6_update(void) { - (void) atomic_add_32_ov(&route_generation, 1); + atomic_add_32(&route_genid_inet6, 1); } +#endif /* INET6 */ /* * Packet routing routines. @@ -805,18 +855,16 @@ rtalloc_ign_common_locked(struct route *ro, uint32_t ignore, if ((rt = ro->ro_rt) != NULL) { RT_LOCK_SPIN(rt); - if (rt->rt_ifp != NULL && (rt->rt_flags & RTF_UP) && - rt->generation_id == route_generation) { + if (rt->rt_ifp != NULL && !ROUTE_UNUSABLE(ro)) { RT_UNLOCK(rt); return; } RT_UNLOCK(rt); - rtfree_locked(rt); - ro->ro_rt = NULL; + ROUTE_RELEASE_LOCKED(ro); /* rnh_lock already held */ } ro->ro_rt = rtalloc1_common_locked(&ro->ro_dst, 1, ignore, ifscope); if (ro->ro_rt != NULL) { - ro->ro_rt->generation_id = route_generation; + RT_GENID_SYNC(ro->ro_rt); RT_LOCK_ASSERT_NOTHELD(ro->ro_rt); } } @@ -940,7 +988,7 @@ done: struct rtentry * rtalloc1(struct sockaddr *dst, int report, uint32_t ignflags) { - struct rtentry * entry; + struct rtentry *entry; lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); lck_mtx_lock(rnh_lock); entry = rtalloc1_locked(dst, report, ignflags); @@ -952,7 +1000,7 @@ struct rtentry * rtalloc1_scoped(struct sockaddr *dst, int report, uint32_t ignflags, unsigned int ifscope) { - struct rtentry * entry; + struct rtentry *entry; lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); lck_mtx_lock(rnh_lock); entry = rtalloc1_scoped_locked(dst, report, ignflags, ifscope); @@ -975,6 +1023,9 @@ rtfree_common(struct rtentry *rt, boolean_t locked) { struct radix_node_head *rnh; + lck_mtx_assert(rnh_lock, locked ? + LCK_MTX_ASSERT_OWNED : LCK_MTX_ASSERT_NOTOWNED); + /* * Atomically decrement the reference count and if it reaches 0, * and there is a close function defined, call the close function. @@ -993,20 +1044,19 @@ rtfree_common(struct rtentry *rt, boolean_t locked) */ if (!locked) { /* - * Note that we check it again below after grabbing rnh_lock, - * since it is possible that another thread doing a lookup wins - * the race, grabs the rnh_lock first, and bumps up the reference - * count in which case the route should be left alone as it is - * still in use. It's also possible that another thread frees - * the route after we drop rt_lock; to prevent the route from - * being freed, we hold an extra reference. - */ + * Note that we check it again below after grabbing rnh_lock, + * since it is possible that another thread doing a lookup wins + * the race, grabs the rnh_lock first, and bumps up reference + * count in which case the route should be left alone as it is + * still in use. It's also possible that another thread frees + * the route after we drop rt_lock; to prevent the route from + * being freed, we hold an extra reference. + */ RT_ADDREF_LOCKED(rt); RT_UNLOCK(rt); lck_mtx_lock(rnh_lock); RT_LOCK_SPIN(rt); - RT_REMREF_LOCKED(rt); - if (rt->rt_refcnt > 0) { + if (rtunref(rt) > 0) { /* We've lost the race, so abort */ RT_UNLOCK(rt); goto done; @@ -1022,8 +1072,12 @@ rtfree_common(struct rtentry *rt, boolean_t locked) lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); /* Negative refcnt must never happen */ - if (rt->rt_refcnt != 0) + if (rt->rt_refcnt != 0) { panic("rt %p invalid refcnt %d", rt, rt->rt_refcnt); + /* NOTREACHED */ + } + /* Idle refcnt must have been dropped during rtunref() */ + VERIFY(!(rt->rt_flags & RTF_IFREF)); /* * find the tree for that address family @@ -1050,8 +1104,10 @@ rtfree_common(struct rtentry *rt, boolean_t locked) struct rtentry *rt_parent; struct ifaddr *rt_ifa; - if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) + if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) { panic("rt %p freed while in radix tree\n", rt); + /* NOTREACHED */ + } /* * the rtentry must have been removed from the routing table * so it is represented in rttrash; remove that now. @@ -1141,10 +1197,18 @@ rtunref(struct rtentry *p) { RT_LOCK_ASSERT_HELD(p); - if (p->rt_refcnt == 0) + if (p->rt_refcnt == 0) { panic("%s(%p) bad refcnt\n", __func__, p); - - --p->rt_refcnt; + /* NOTREACHED */ + } else if (--p->rt_refcnt == 0) { + /* + * Release any idle reference count held on the interface; + * if the route is eligible, still UP and the refcnt becomes + * non-zero at some point in future before it is purged from + * the routing table, rt_set_idleref() will undo this. + */ + rt_clear_idleref(p); + } if (rte_debug & RTD_DEBUG) rtunref_audit((struct rtentry_dbg *)p); @@ -1158,9 +1222,10 @@ rtunref_audit(struct rtentry_dbg *rte) { uint16_t idx; - if (rte->rtd_inuse != RTD_INUSE) + if (rte->rtd_inuse != RTD_INUSE) { panic("rtunref: on freed rte=%p\n", rte); - + /* NOTREACHED */ + } idx = atomic_add_16_ov(&rte->rtd_refrele_cnt, 1) % CTRACE_HIST_SIZE; if (rte_debug & RTD_TRACE) ctrace_record(&rte->rtd_refrele[idx]); @@ -1174,8 +1239,16 @@ rtref(struct rtentry *p) { RT_LOCK_ASSERT_HELD(p); - if (++p->rt_refcnt == 0) + if (++p->rt_refcnt == 0) { panic("%s(%p) bad refcnt\n", __func__, p); + /* NOTREACHED */ + } else if (p->rt_refcnt == 1) { + /* + * Hold an idle reference count on the interface, + * if the route is eligible for it. + */ + rt_set_idleref(p); + } if (rte_debug & RTD_DEBUG) rtref_audit((struct rtentry_dbg *)p); @@ -1186,16 +1259,17 @@ rtref_audit(struct rtentry_dbg *rte) { uint16_t idx; - if (rte->rtd_inuse != RTD_INUSE) + if (rte->rtd_inuse != RTD_INUSE) { panic("rtref_audit: on freed rte=%p\n", rte); - + /* NOTREACHED */ + } idx = atomic_add_16_ov(&rte->rtd_refhold_cnt, 1) % CTRACE_HIST_SIZE; if (rte_debug & RTD_TRACE) ctrace_record(&rte->rtd_refhold[idx]); } void -rtsetifa(struct rtentry *rt, struct ifaddr* ifa) +rtsetifa(struct rtentry *rt, struct ifaddr *ifa) { lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); @@ -1227,8 +1301,8 @@ rtsetifa(struct rtentry *rt, struct ifaddr* ifa) */ void rtredirect(struct ifnet *ifp, struct sockaddr *dst, struct sockaddr *gateway, - struct sockaddr *netmask, int flags, struct sockaddr *src, - struct rtentry **rtp) + struct sockaddr *netmask, int flags, struct sockaddr *src, + struct rtentry **rtp) { struct rtentry *rt = NULL; int error = 0; @@ -1237,6 +1311,7 @@ rtredirect(struct ifnet *ifp, struct sockaddr *dst, struct sockaddr *gateway, struct ifaddr *ifa = NULL; unsigned int ifscope = (ifp != NULL) ? ifp->if_index : IFSCOPE_NONE; struct sockaddr_storage ss; + int af = src->sa_family; lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); lck_mtx_lock(rnh_lock); @@ -1246,10 +1321,10 @@ rtredirect(struct ifnet *ifp, struct sockaddr *dst, struct sockaddr *gateway, * comparison against rt_gateway below. */ #if INET6 - if ((src->sa_family == AF_INET && ip_doscopedroute) || - (src->sa_family == AF_INET6 && ip6_doscopedroute)) + if ((af == AF_INET && ip_doscopedroute) || + (af == AF_INET6 && ip6_doscopedroute)) #else - if (src->sa_family == AF_INET && ip_doscopedroute) + if (af == AF_INET && ip_doscopedroute) #endif /* !INET6 */ src = sa_copy(src, &ss, &ifscope); @@ -1277,8 +1352,8 @@ rtredirect(struct ifnet *ifp, struct sockaddr *dst, struct sockaddr *gateway, * in_ifinit), so okay to access ifa_addr without locking. */ if (!(flags & RTF_DONE) && rt != NULL && - (!equal(src, rt->rt_gateway) || !equal(rt->rt_ifa->ifa_addr, - ifa->ifa_addr))) { + (!equal(src, rt->rt_gateway) || !equal(rt->rt_ifa->ifa_addr, + ifa->ifa_addr))) { error = EINVAL; } else { IFA_REMREF(ifa); @@ -1360,8 +1435,13 @@ out: } else { if (stat != NULL) (*stat)++; - if (use_routegenid) - routegenid_update(); + + if (af == AF_INET) + routegenid_inet_update(); +#if INET6 + else if (af == AF_INET6) + routegenid_inet6_update(); +#endif /* INET6 */ } lck_mtx_unlock(rnh_lock); bzero((caddr_t)&info, sizeof(info)); @@ -1380,11 +1460,11 @@ rtioctl(unsigned long req, caddr_t data, struct proc *p) { #pragma unused(p) #if INET && MROUTING - return mrt_ioctl(req, data); + return (mrt_ioctl(req, data)); #else #pragma unused(req) #pragma unused(data) - return ENXIO; + return (ENXIO); #endif } @@ -1605,11 +1685,11 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, struct ifaddr *ifa = NULL; struct sockaddr *ndst, *dst = dst0; struct sockaddr_storage ss, mask; - struct timeval curr_calendartime; + struct timeval caltime; int af = dst->sa_family; void (*ifa_rtrequest)(int, struct rtentry *, struct sockaddr *); -#define senderr(x) { error = x ; goto bad; } +#define senderr(x) { error = x; goto bad; } lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); /* @@ -1678,10 +1758,19 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, */ if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == NULL) senderr(ESRCH); - if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) - panic ("rtrequest delete"); + if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) { + panic("rtrequest delete"); + /* NOTREACHED */ + } rt = (struct rtentry *)rn; + RT_LOCK(rt); + rt->rt_flags &= ~RTF_UP; + /* + * Release any idle reference count held on the interface + * as this route is no longer externally visible. + */ + rt_clear_idleref(rt); /* * Take an extra reference to handle the deletion of a route * entry whose reference count is already 0; e.g. an expiring @@ -1691,9 +1780,7 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, * will decrement the reference via rtfree_locked() and then * possibly deallocate it. */ - RT_LOCK(rt); RT_ADDREF_LOCKED(rt); - rt->rt_flags &= ~RTF_UP; /* * For consistency, in case the caller didn't set the flag. @@ -1716,7 +1803,7 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, rt_mask(rt)) { RT_UNLOCK(rt); rnh->rnh_walktree_from(rnh, dst, rt_mask(rt), - rt_fixdelete, rt); + rt_fixdelete, rt); RT_LOCK(rt); } @@ -1757,7 +1844,6 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, set_primary_ifscope(rt_key(rt)->sa_family, IFSCOPE_NONE); } - rt_clear_idleref(rt); RT_UNLOCK(rt); @@ -1782,6 +1868,12 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, /* Dereference or deallocate the route */ rtfree_locked(rt); } + if (af == AF_INET) + routegenid_inet_update(); +#if INET6 + else if (af == AF_INET6) + routegenid_inet6_update(); +#endif /* INET6 */ break; } case RTM_RESOLVE: @@ -1828,6 +1920,11 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, (rt->rt_flags & RTF_PROXY)) { ifscope = IFSCOPE_NONE; flags &= ~RTF_IFSCOPE; + /* + * These types of cloned routes aren't currently + * eligible for idle interface reference counting. + */ + flags |= RTF_NOIFREF; } else { if (flags & RTF_IFSCOPE) { ifscope = (af == AF_INET) ? @@ -1854,9 +1951,10 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, goto makeroute; case RTM_ADD: - if ((flags & RTF_GATEWAY) && !gateway) + if ((flags & RTF_GATEWAY) && !gateway) { panic("rtrequest: RTF_GATEWAY but no gateway"); - + /* NOTREACHED */ + } if (flags & RTF_IFSCOPE) { ifa = ifa_ifwithroute_scoped_locked(flags, dst0, gateway, ifscope); @@ -1866,16 +1964,32 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, if (ifa == NULL) senderr(ENETUNREACH); makeroute: - getmicrotime(&curr_calendartime); if ((rt = rte_alloc()) == NULL) senderr(ENOBUFS); Bzero(rt, sizeof(*rt)); rte_lock_init(rt); - rt->base_calendartime = curr_calendartime.tv_sec; + getmicrotime(&caltime); + rt->base_calendartime = caltime.tv_sec; rt->base_uptime = net_uptime(); RT_LOCK(rt); rt->rt_flags = RTF_UP | flags; + /* + * Point the generation ID to the tree's. + */ + switch (af) { + case AF_INET: + rt->rt_tree_genid = &route_genid_inet; + break; +#if INET6 + case AF_INET6: + rt->rt_tree_genid = &route_genid_inet6; + break; +#endif /* INET6 */ + default: + break; + } + /* * Add the gateway. Possibly re-malloc-ing the storage for it * also add the rt_gwroute if possible. @@ -1913,7 +2027,7 @@ makeroute: /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)netmask, - rnh, rt->rt_nodes); + rnh, rt->rt_nodes); if (rn == 0) { struct rtentry *rt2; /* @@ -1940,8 +2054,7 @@ makeroute: rt2->rt_flags, 0); rtfree_locked(rt2); rn = rnh->rnh_addaddr((caddr_t)ndst, - (caddr_t)netmask, - rnh, rt->rt_nodes); + (caddr_t)netmask, rnh, rt->rt_nodes); } else if (rt2) { /* undo the extra ref we got */ rtfree_locked(rt2); @@ -1986,20 +2099,12 @@ makeroute: (*ret_nrt)->rt_rmx.rmx_expire == 0); rt->rt_rmx = (*ret_nrt)->rt_rmx; rt_setexpire(rt, (*ret_nrt)->rt_expire); - if ((*ret_nrt)->rt_flags & (RTF_CLONING | RTF_PRCLONING)) { + if ((*ret_nrt)->rt_flags & + (RTF_CLONING | RTF_PRCLONING)) { rt->rt_parent = (*ret_nrt); RT_ADDREF_LOCKED(*ret_nrt); } RT_UNLOCK(*ret_nrt); - - /* - * Enable interface reference counting for unicast - * cloned routes and bump up the reference count. - */ - if (rt->rt_parent != NULL && - !(rt->rt_flags & (RTF_BROADCAST | RTF_MULTICAST))) { - rt_set_idleref(rt); - } } /* @@ -2032,6 +2137,15 @@ makeroute: RT_ADDREF_LOCKED(rt); } + if (af == AF_INET) + routegenid_inet_update(); +#if INET6 + else if (af == AF_INET6) + routegenid_inet6_update(); +#endif /* INET6 */ + + RT_GENID_SYNC(rt); + /* * We repeat the same procedures from rt_setgate() here * because they weren't completed when we called it earlier, @@ -2047,7 +2161,7 @@ makeroute: arg.rt0 = rt; RT_UNLOCK(rt); rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt), - rt_fixchange, &arg); + rt_fixchange, &arg); } else { RT_UNLOCK(rt); } @@ -2105,7 +2219,7 @@ rt_fixdelete(struct radix_node *rn, void *vp) RT_LOCK(rt); if (rt->rt_parent == rt0 && - !(rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) { + !(rt->rt_flags & (RTF_CLONING | RTF_PRCLONING))) { /* * Safe to drop rt_lock and use rt_key, since holding * rnh_lock here prevents another thread from calling @@ -2116,7 +2230,7 @@ rt_fixdelete(struct radix_node *rn, void *vp) rt_mask(rt), rt->rt_flags, NULL)); } RT_UNLOCK(rt); - return 0; + return (0); } /* @@ -2147,7 +2261,7 @@ rt_fixchange(struct radix_node *rn, void *vp) RT_LOCK(rt); if (!rt->rt_parent || - (rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) { + (rt->rt_flags & (RTF_CLONING | RTF_PRCLONING))) { RT_UNLOCK(rt); return (0); } @@ -2534,7 +2648,7 @@ rt_set_gwroute(struct rtentry *rt, struct sockaddr *dst, struct rtentry *gwrt) static void rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, - struct sockaddr *netmask) + struct sockaddr *netmask) { u_char *cp1 = (u_char *)src; u_char *cp2 = (u_char *)dst; @@ -2631,16 +2745,25 @@ node_lookup_default(int af) * to the loopback interface). The search rule follows the longest matching * prefix with the additional interface constraint. */ -struct rtentry * -rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask, - struct radix_node_head *rnh, unsigned int ifscope) +static struct rtentry * +rt_lookup_common(boolean_t lookup_only, boolean_t coarse, struct sockaddr *dst, + struct sockaddr *netmask, struct radix_node_head *rnh, unsigned int ifscope) { struct radix_node *rn0, *rn; boolean_t dontcare; int af = dst->sa_family; struct sockaddr_storage dst_ss, mask_ss; + VERIFY(!coarse || ifscope == IFSCOPE_NONE); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); +#if INET6 + /* + * While we have rnh_lock held, see if we need to schedule the timer. + */ + if (nd6_sched_timeout_want) + nd6_sched_timeout(NULL, NULL); +#endif /* INET6 */ if (!lookup_only) netmask = NULL; @@ -2717,7 +2840,7 @@ rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask, */ if (rn != NULL) { struct rtentry *rt = RT(rn); - if (rt->rt_ifp != lo_ifp) { + if (!(rt->rt_ifp->if_flags & IFF_LOOPBACK)) { if (rt->rt_ifp->if_index != ifscope) { /* * Wrong interface; keep the original result @@ -2756,14 +2879,15 @@ rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask, * Use the original result if either of the following is true: * * 1) The scoped search did not yield any result. - * 2) The result from the scoped search is a scoped default route, + * 2) The caller insists on performing a coarse-grained lookup. + * 3) The result from the scoped search is a scoped default route, * and the original (non-scoped) result is not a default route, * i.e. the original result is a more specific host/net route. - * 3) The scoped search yielded a net route but the original + * 4) The scoped search yielded a net route but the original * result is a host route, i.e. the original result is treated * as a more specific route. */ - if (rn == NULL || (rn0 != NULL && + if (rn == NULL || coarse || (rn0 != NULL && ((SA_DEFAULT(rt_key(RT(rn))) && !SA_DEFAULT(rt_key(RT(rn0)))) || (!RT_HOST(rn) && RT_HOST(rn0))))) rn = rn0; @@ -2797,6 +2921,22 @@ rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask, return (RT(rn)); } +struct rtentry * +rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask, + struct radix_node_head *rnh, unsigned int ifscope) +{ + return (rt_lookup_common(lookup_only, FALSE, dst, netmask, + rnh, ifscope)); +} + +struct rtentry * +rt_lookup_coarse(boolean_t lookup_only, struct sockaddr *dst, + struct sockaddr *netmask, struct radix_node_head *rnh) +{ + return (rt_lookup_common(lookup_only, TRUE, dst, netmask, + rnh, IFSCOPE_NONE)); +} + boolean_t rt_validate(struct rtentry *rt) { @@ -2824,30 +2964,72 @@ int rtinit(struct ifaddr *ifa, int cmd, int flags) { int error; + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_lock(rnh_lock); error = rtinit_locked(ifa, cmd, flags); lck_mtx_unlock(rnh_lock); + return (error); } int rtinit_locked(struct ifaddr *ifa, int cmd, int flags) { - struct rtentry *rt; + struct radix_node_head *rnh; + uint8_t nbuf[128]; /* long enough for IPv6 */ + char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN]; + char abuf[MAX_IPv6_STR_LEN]; + struct rtentry *rt = NULL; struct sockaddr *dst; - struct sockaddr *deldst; - struct mbuf *m = 0; - struct rtentry *nrt = 0; - u_int32_t ifa_flags; - int error; + struct sockaddr *netmask; + int error = 0; /* * Holding rnh_lock here prevents the possibility of ifa from * changing (e.g. in_ifinit), so it is safe to access its * ifa_{dst}addr (here and down below) without locking. */ - dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr; + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + + if (flags & RTF_HOST) { + dst = ifa->ifa_dstaddr; + netmask = NULL; + } else { + dst = ifa->ifa_addr; + netmask = ifa->ifa_netmask; + } + + if (dst->sa_len == 0) { + log(LOG_ERR, "%s: %s failed, invalid dst sa_len %d\n", + __func__, rtm2str(cmd), dst->sa_len); + error = EINVAL; + goto done; + } + if (netmask != NULL && netmask->sa_len > sizeof (nbuf)) { + log(LOG_ERR, "%s: %s failed, mask sa_len %d too large\n", + __func__, rtm2str(cmd), dst->sa_len); + error = EINVAL; + goto done; + } + + if (dst->sa_family == AF_INET) { + (void) inet_ntop(AF_INET, &SIN(dst)->sin_addr.s_addr, + abuf, sizeof (abuf)); + } +#if INET6 + else if (dst->sa_family == AF_INET6) { + (void) inet_ntop(AF_INET6, &SIN6(dst)->sin6_addr, + abuf, sizeof (abuf)); + } +#endif /* INET6 */ + + if ((rnh = rt_tables[dst->sa_family]) == NULL) { + error = EINVAL; + goto done; + } + /* * If it's a delete, check that if it exists, it's on the correct * interface or we might scrub a route to another ifa which would @@ -2859,116 +3041,158 @@ rtinit_locked(struct ifaddr *ifa, int cmd, int flags) * If it's a net, mask off the host bits * (Assuming we have a mask) */ - if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) { - m = m_get(M_DONTWAIT, MT_SONAME); - if (m == NULL) { - return(ENOBUFS); - } - deldst = mtod(m, struct sockaddr *); - rt_maskedcopy(dst, deldst, ifa->ifa_netmask); - dst = deldst; + if (netmask != NULL) { + rt_maskedcopy(dst, SA(nbuf), netmask); + dst = SA(nbuf); } /* - * Get an rtentry that is in the routing tree and - * contains the correct info. (if this fails, can't get there). - * We set "report" to FALSE so that if it doesn't exist, - * it doesn't report an error or clone a route, etc. etc. + * Get an rtentry that is in the routing tree and contains + * the correct info. Note that we perform a coarse-grained + * lookup here, in case there is a scoped variant of the + * subnet/prefix route which we should ignore, as we never + * add a scoped subnet/prefix route as part of adding an + * interface address. */ - rt = rtalloc1_locked(dst, 0, 0); - if (rt) { + rt = rt_lookup_coarse(TRUE, dst, NULL, rnh); + if (rt != NULL) { + rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); /* * Ok so we found the rtentry. it has an extra reference * for us at this stage. we won't need that so * lop that off now. */ - RT_LOCK_SPIN(rt); + RT_LOCK(rt); if (rt->rt_ifa != ifa) { + /* + * If the interface address in the rtentry + * doesn't match the interface we are using, + * then we don't want to delete it, so return + * an error. This seems to be the only point + * of this whole RTM_DELETE clause. + */ + if (rt_verbose) { + log(LOG_DEBUG, "%s: not removing " + "route to %s->%s->%s, flags %b, " + "ifaddr %s, rt_ifa 0x%llx != " + "ifa 0x%llx\n", __func__, dbuf, + gbuf, ((rt->rt_ifp != NULL) ? + rt->rt_ifp->if_xname : ""), + rt->rt_flags, RTF_BITS, abuf, + (uint64_t)VM_KERNEL_ADDRPERM( + rt->rt_ifa), + (uint64_t)VM_KERNEL_ADDRPERM(ifa)); + } RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); + rt = NULL; + error = ((flags & RTF_HOST) ? + EHOSTUNREACH : ENETUNREACH); + goto done; + } else if (rt->rt_flags & RTF_STATIC) { /* - * If the interface in the rtentry doesn't match - * the interface we are using, then we don't - * want to delete it, so return an error. - * This seems to be the only point of - * this whole RTM_DELETE clause. + * Don't remove the subnet/prefix route if + * this was manually added from above. */ - if (m) - (void) m_free(m); - return (flags & RTF_HOST ? EHOSTUNREACH - : ENETUNREACH); - } else { + if (rt_verbose) { + log(LOG_DEBUG, "%s: not removing " + "static route to %s->%s->%s, " + "flags %b, ifaddr %s\n", __func__, + dbuf, gbuf, ((rt->rt_ifp != NULL) ? + rt->rt_ifp->if_xname : ""), + rt->rt_flags, RTF_BITS, abuf); + } RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); + rt = NULL; + error = EBUSY; + goto done; } + if (rt_verbose) { + log(LOG_DEBUG, "%s: removing route to " + "%s->%s->%s, flags %b, ifaddr %s\n", + __func__, dbuf, gbuf, + ((rt->rt_ifp != NULL) ? + rt->rt_ifp->if_xname : ""), + rt->rt_flags, RTF_BITS, abuf); + } + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); + rt = NULL; } - /* XXX */ -#if 0 - else { - /* - * One would think that as we are deleting, and we know - * it doesn't exist, we could just return at this point - * with an "ELSE" clause, but apparently not.. - */ - lck_mtx_unlock(rnh_lock); - return (flags & RTF_HOST ? EHOSTUNREACH - : ENETUNREACH); - } -#endif } /* * Do the actual request */ - IFA_LOCK_SPIN(ifa); - ifa_flags = ifa->ifa_flags; - IFA_UNLOCK(ifa); - error = rtrequest_locked(cmd, dst, ifa->ifa_addr, ifa->ifa_netmask, - flags | ifa_flags, &nrt); - if (m) - (void) m_free(m); - /* - * If we are deleting, and we found an entry, then - * it's been removed from the tree.. now throw it away. - */ - if (cmd == RTM_DELETE && error == 0 && (rt = nrt)) { + if ((error = rtrequest_locked(cmd, dst, ifa->ifa_addr, netmask, + flags | ifa->ifa_flags, &rt)) != 0) + goto done; + + VERIFY(rt != NULL); + + rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); + + switch (cmd) { + case RTM_DELETE: /* - * notify any listening routing agents of the change + * If we are deleting, and we found an entry, then it's + * been removed from the tree. Notify any listening + * routing agents of the change and throw it away. */ RT_LOCK(rt); - rt_newaddrmsg(cmd, ifa, error, nrt); - if (use_routegenid) - routegenid_update(); + rt_newaddrmsg(cmd, ifa, error, rt); RT_UNLOCK(rt); + if (rt_verbose) { + log(LOG_DEBUG, "%s: removed route to %s->%s->%s, " + "flags %b, ifaddr %s\n", __func__, dbuf, gbuf, + ((rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : ""), + rt->rt_flags, RTF_BITS, abuf); + } rtfree_locked(rt); - } + break; - /* - * We are adding, and we have a returned routing entry. - * We need to sanity check the result. - */ - if (cmd == RTM_ADD && error == 0 && (rt = nrt)) { - RT_LOCK(rt); + case RTM_ADD: /* - * If it came back with an unexpected interface, then it must - * have already existed or something. (XXX) + * We are adding, and we have a returned routing entry. + * We need to sanity check the result. If it came back + * with an unexpected interface, then it must have already + * existed or something. */ + RT_LOCK(rt); if (rt->rt_ifa != ifa) { void (*ifa_rtrequest) (int, struct rtentry *, struct sockaddr *); if (!(rt->rt_ifa->ifa_ifp->if_flags & - (IFF_POINTOPOINT|IFF_LOOPBACK))) - printf("rtinit: wrong ifa (%p) was (%p)\n", - ifa, rt->rt_ifa); + (IFF_POINTOPOINT|IFF_LOOPBACK))) { + log(LOG_ERR, "%s: %s route to %s->%s->%s, " + "flags %b, ifaddr %s, rt_ifa 0x%llx != " + "ifa 0x%llx\n", __func__, rtm2str(cmd), + dbuf, gbuf, ((rt->rt_ifp != NULL) ? + rt->rt_ifp->if_xname : ""), rt->rt_flags, + RTF_BITS, abuf, + (uint64_t)VM_KERNEL_ADDRPERM(rt->rt_ifa), + (uint64_t)VM_KERNEL_ADDRPERM(ifa)); + } + + if (rt_verbose) { + log(LOG_DEBUG, "%s: %s route to %s->%s->%s, " + "flags %b, ifaddr %s, rt_ifa was 0x%llx " + "now 0x%llx\n", __func__, rtm2str(cmd), + dbuf, gbuf, ((rt->rt_ifp != NULL) ? + rt->rt_ifp->if_xname : ""), rt->rt_flags, + RTF_BITS, abuf, + (uint64_t)VM_KERNEL_ADDRPERM(rt->rt_ifa), + (uint64_t)VM_KERNEL_ADDRPERM(ifa)); + } + /* * Ask that the protocol in question * remove anything it has associated with * this route and ifaddr. */ - IFA_LOCK_SPIN(rt->rt_ifa); ifa_rtrequest = rt->rt_ifa->ifa_rtrequest; - IFA_UNLOCK(rt->rt_ifa); if (ifa_rtrequest != NULL) - ifa_rtrequest(RTM_DELETE, rt, SA(0)); + ifa_rtrequest(RTM_DELETE, rt, NULL); /* * Set the route's ifa. */ @@ -2994,23 +3218,33 @@ rtinit_locked(struct ifaddr *ifa, int cmd, int flags) * we are adding. */ rt->rt_ifp = ifa->ifa_ifp; - rt->rt_rmx.rmx_mtu = ifa->ifa_ifp->if_mtu; /*XXX*/ + /* + * If rmx_mtu is not locked, update it + * to the MTU used by the new interface. + */ + if (!(rt->rt_rmx.rmx_locks & RTV_MTU)) + rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; + /* * Now ask the protocol to check if it needs * any special processing in its new form. */ - IFA_LOCK_SPIN(ifa); ifa_rtrequest = ifa->ifa_rtrequest; - IFA_UNLOCK(ifa); if (ifa_rtrequest != NULL) - ifa_rtrequest(RTM_ADD, rt, SA(0)); + ifa_rtrequest(RTM_ADD, rt, NULL); + } else { + if (rt_verbose) { + log(LOG_DEBUG, "%s: added route to %s->%s->%s, " + "flags %b, ifaddr %s\n", __func__, dbuf, + gbuf, ((rt->rt_ifp != NULL) ? + rt->rt_ifp->if_xname : ""), rt->rt_flags, + RTF_BITS, abuf); + } } /* * notify any listenning routing agents of the change */ - rt_newaddrmsg(cmd, ifa, error, nrt); - if (use_routegenid) - routegenid_update(); + rt_newaddrmsg(cmd, ifa, error, rt); /* * We just wanted to add it; we don't actually need a * reference. This will result in a route that's added @@ -3020,38 +3254,36 @@ rtinit_locked(struct ifaddr *ifa, int cmd, int flags) */ RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); + break; + + default: + VERIFY(0); + /* NOTREACHED */ } +done: return (error); } -u_int64_t -rt_expiry(struct rtentry *rt, u_int64_t base, u_int32_t delta) -{ - u_int64_t retval; - - /* - * If the interface of the route doesn't demand aggressive draining, - * return the expiration time based on the caller-supplied delta. - * Otherwise use the more aggressive route expiration delta (or - * the caller-supplied delta, whichever is less.) - */ - if (rt->rt_ifp == NULL || rt->rt_ifp->if_want_aggressive_drain == 0) - retval = base + delta; - else - retval = base + MIN(rt_if_idle_expire_timeout, delta); - - return (retval); -} - -void +static void rt_set_idleref(struct rtentry *rt) { RT_LOCK_ASSERT_HELD(rt); - rt_clear_idleref(rt); - rt->rt_if_ref_fn = rte_if_ref; - rt->rt_if_ref_fn(rt->rt_ifp, 1); - rt->rt_flags |= RTF_IFREF; + /* + * We currently keep idle refcnt only on unicast cloned routes + * that aren't marked with RTF_NOIFREF. + */ + if (rt->rt_parent != NULL && !(rt->rt_flags & + (RTF_NOIFREF|RTF_BROADCAST | RTF_MULTICAST)) && + (rt->rt_flags & (RTF_UP|RTF_WASCLONED|RTF_IFREF)) == + (RTF_UP|RTF_WASCLONED)) { + rt_clear_idleref(rt); /* drop existing refcnt if any */ + rt->rt_if_ref_fn = rte_if_ref; + /* Become a regular mutex, just in case */ + RT_CONVERT_LOCK(rt); + rt->rt_if_ref_fn(rt->rt_ifp, 1); + rt->rt_flags |= RTF_IFREF; + } } void @@ -3060,6 +3292,9 @@ rt_clear_idleref(struct rtentry *rt) RT_LOCK_ASSERT_HELD(rt); if (rt->rt_if_ref_fn != NULL) { + VERIFY((rt->rt_flags & (RTF_NOIFREF | RTF_IFREF)) == RTF_IFREF); + /* Become a regular mutex, just in case */ + RT_CONVERT_LOCK(rt); rt->rt_if_ref_fn(rt->rt_ifp, -1); rt->rt_flags &= ~RTF_IFREF; rt->rt_if_ref_fn = NULL; @@ -3122,7 +3357,6 @@ rt_lock(struct rtentry *rt, boolean_t spin) void rt_unlock(struct rtentry *rt) { - RT_LOCK_ASSERT_HELD(rt); if (rte_debug & RTD_DEBUG) rte_unlock_debug((struct rtentry_dbg *)rt); lck_mtx_unlock(&rt->rt_lock); @@ -3134,6 +3368,7 @@ rte_lock_debug(struct rtentry_dbg *rte) { uint32_t idx; + RT_LOCK_ASSERT_HELD((struct rtentry *)rte); idx = atomic_add_32_ov(&rte->rtd_lock_cnt, 1) % CTRACE_HIST_SIZE; if (rte_debug & RTD_TRACE) ctrace_record(&rte->rtd_lock[idx]); @@ -3144,6 +3379,7 @@ rte_unlock_debug(struct rtentry_dbg *rte) { uint32_t idx; + RT_LOCK_ASSERT_HELD((struct rtentry *)rte); idx = atomic_add_32_ov(&rte->rtd_unlock_cnt, 1) % CTRACE_HIST_SIZE; if (rte_debug & RTD_TRACE) ctrace_record(&rte->rtd_unlock[idx]); @@ -3166,9 +3402,10 @@ rte_free(struct rtentry *p) return; } - if (p->rt_refcnt != 0) + if (p->rt_refcnt != 0) { panic("rte_free: rte=%p refcnt=%d non-zero\n", p, p->rt_refcnt); - + /* NOTREACHED */ + } zfree(rte_zone, p); } @@ -3180,13 +3417,15 @@ rte_if_ref(struct ifnet *ifp, int cnt) uint32_t old; /* Force cnt to 1 increment/decrement */ - if (cnt < -1 || cnt > 1) + if (cnt < -1 || cnt > 1) { panic("%s: invalid count argument (%d)", __func__, cnt); - + /* NOTREACHED */ + } old = atomic_add_32_ov(&ifp->if_route_refcnt, cnt); - if (cnt < 0 && old == 0) + if (cnt < 0 && old == 0) { panic("%s: ifp=%p negative route refcnt!", __func__, ifp); - + /* NOTREACHED */ + } /* * The following is done without first holding the ifnet lock, * for performance reasons. The relevant ifnet fields, with @@ -3238,14 +3477,17 @@ rte_free_debug(struct rtentry *p) { struct rtentry_dbg *rte = (struct rtentry_dbg *)p; - if (p->rt_refcnt != 0) + if (p->rt_refcnt != 0) { panic("rte_free: rte=%p refcnt=%d\n", p, p->rt_refcnt); - - if (rte->rtd_inuse == RTD_FREED) + /* NOTREACHED */ + } + if (rte->rtd_inuse == RTD_FREED) { panic("rte_free: double free rte=%p\n", rte); - else if (rte->rtd_inuse != RTD_INUSE) + /* NOTREACHED */ + } else if (rte->rtd_inuse != RTD_INUSE) { panic("rte_free: corrupted rte=%p\n", rte); - + /* NOTREACHED */ + } bcopy((caddr_t)p, (caddr_t)&rte->rtd_entry_saved, sizeof (*p)); /* Preserve rt_lock to help catch use-after-free cases */ bzero((caddr_t)p, offsetof(struct rtentry, rt_lock)); @@ -3267,51 +3509,71 @@ ctrace_record(ctrace_t *tr) (void) OSBacktrace(tr->pc, CTRACE_STACK_SIZE); } -__private_extern__ void -route_copyout( - struct route *dst, - const struct route *src, - size_t length) +void +route_copyout(struct route *dst, const struct route *src, size_t length) { - /* Copy everything (rt, dst, flags) from ifnet */ + /* Copy everything (rt, srcif, flags, dst) from src */ bcopy(src, dst, length); /* Hold one reference for the local copy of struct route */ if (dst->ro_rt != NULL) RT_ADDREF(dst->ro_rt); + + /* Hold one reference for the local copy of struct ifaddr */ + if (dst->ro_srcia != NULL) + IFA_ADDREF(dst->ro_srcia); } -__private_extern__ void -route_copyin( - struct route *src, - struct route *dst, - size_t length) +void +route_copyin(struct route *src, struct route *dst, size_t length) { - /* No cached route in the ifnet? */ + /* No cached route at the destination? */ if (dst->ro_rt == NULL) { /* - * Copy everything (rt, dst, flags) from ip_forward(); - * the reference to the route was held at the time - * it was allocated and is kept intact. + * Ditch the address in the cached copy (dst) since + * we're about to take everything there is in src. + */ + if (dst->ro_srcia != NULL) + IFA_REMREF(dst->ro_srcia); + /* + * Copy everything (rt, srcia, flags, dst) from src; the + * references to rt and/or srcia were held at the time + * of storage and are kept intact. */ bcopy(src, dst, length); } else if (src->ro_rt != NULL) { /* - * If the same, update just the ro_flags and ditch the one + * If the same, update srcia and flags, and ditch the route * in the local copy. Else ditch the one that is currently * cached, and cache the new route. */ if (dst->ro_rt == src->ro_rt) { dst->ro_flags = src->ro_flags; + if (dst->ro_srcia != src->ro_srcia) { + if (dst->ro_srcia != NULL) + IFA_REMREF(dst->ro_srcia); + dst->ro_srcia = src->ro_srcia; + } else if (src->ro_srcia != NULL) { + IFA_REMREF(src->ro_srcia); + } rtfree(src->ro_rt); } else { rtfree(dst->ro_rt); + if (dst->ro_srcia != NULL) + IFA_REMREF(dst->ro_srcia); bcopy(src, dst, length); } + } else if (src->ro_srcia != NULL) { + /* + * Ditch src address in the local copy (src) since we're + * not caching the route entry anyway (ro_rt is NULL). + */ + IFA_REMREF(src->ro_srcia); } - /* This function consumes the reference */ + /* This function consumes the references on src */ src->ro_rt = NULL; + src->ro_srcia = NULL; } /* @@ -3325,10 +3587,10 @@ route_copyin( * If the returned route is non-NULL, the caller is responsible for * releasing the reference and unlocking the route. */ -#define senderr(e) { error = (e); goto bad; } +#define senderr(e) { error = (e); goto bad; } errno_t route_to_gwroute(const struct sockaddr *net_dest, struct rtentry *hint0, - struct rtentry **out_route) + struct rtentry **out_route) { uint64_t timenow; struct rtentry *rt = hint0, *hint = hint0; @@ -3565,3 +3827,67 @@ rt_revalidate_gwroute(struct rtentry *rt, struct rtentry *gwrt) RT_UNLOCK(rt); } } + +static void +rt_str4(struct rtentry *rt, char *ds, uint32_t dslen, char *gs, uint32_t gslen) +{ + VERIFY(rt_key(rt)->sa_family == AF_INET); + + if (ds != NULL) + (void) inet_ntop(AF_INET, + &SIN(rt_key(rt))->sin_addr.s_addr, ds, dslen); + if (gs != NULL) { + if (rt->rt_flags & RTF_GATEWAY) { + (void) inet_ntop(AF_INET, + &SIN(rt->rt_gateway)->sin_addr.s_addr, gs, gslen); + } else if (rt->rt_ifp != NULL) { + snprintf(gs, gslen, "link#%u", rt->rt_ifp->if_unit); + } else { + snprintf(gs, gslen, "%s", "link"); + } + } +} + +#if INET6 +static void +rt_str6(struct rtentry *rt, char *ds, uint32_t dslen, char *gs, uint32_t gslen) +{ + VERIFY(rt_key(rt)->sa_family == AF_INET6); + + if (ds != NULL) + (void) inet_ntop(AF_INET6, + &SIN6(rt_key(rt))->sin6_addr, ds, dslen); + if (gs != NULL) { + if (rt->rt_flags & RTF_GATEWAY) { + (void) inet_ntop(AF_INET6, + &SIN6(rt->rt_gateway)->sin6_addr, gs, gslen); + } else if (rt->rt_ifp != NULL) { + snprintf(gs, gslen, "link#%u", rt->rt_ifp->if_unit); + } else { + snprintf(gs, gslen, "%s", "link"); + } + } +} +#endif /* INET6 */ + + +void +rt_str(struct rtentry *rt, char *ds, uint32_t dslen, char *gs, uint32_t gslen) +{ + switch (rt_key(rt)->sa_family) { + case AF_INET: + rt_str4(rt, ds, dslen, gs, gslen); + break; +#if INET6 + case AF_INET6: + rt_str6(rt, ds, dslen, gs, gslen); + break; +#endif /* INET6 */ + default: + if (ds != NULL) + bzero(ds, dslen); + if (gs != NULL) + bzero(gs, gslen); + break; + } +} diff --git a/bsd/net/route.h b/bsd/net/route.h index c5fe155d6..458c28f66 100644 --- a/bsd/net/route.h +++ b/bsd/net/route.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -62,61 +62,18 @@ */ #ifndef _NET_ROUTE_H_ -#define _NET_ROUTE_H_ +#define _NET_ROUTE_H_ #include #include #include #include -/* - * Kernel resident routing tables. - * - * The routing tables are initialized when interface addresses - * are set by making entries for all directly connected interfaces. - */ - -/* - * A route consists of a destination address and a reference - * to a routing entry. These are often held by protocols - * in their control blocks, e.g. inpcb. - */ -#ifdef PRIVATE -struct rtentry; -struct route { - /* - * N.B: struct route must begin with ro_rt and ro_flags - * because the code does some casts of a 'struct route_in6 *' - * to a 'struct route *'. - */ - struct rtentry *ro_rt; - uint32_t ro_flags; /* route flags (see below) */ - struct sockaddr ro_dst; -}; - -#define ROF_SRCIF_SELECTED 0x1 /* source interface was selected */ - -/* - * Route reachability info (private) - */ -struct rt_reach_info { - u_int32_t ri_refcnt; /* reference count */ - u_int32_t ri_probes; /* total # of probes */ - u_int64_t ri_snd_expire; /* transmit expiration (calendar) time */ - u_int64_t ri_rcv_expire; /* receive expiration (calendar) time */ - int32_t ri_rssi; /* received signal strength */ - int32_t ri_lqm; /* link quality metric */ - int32_t ri_npm; /* node proximity metric */ -}; -#else -struct route; -#endif /* PRIVATE */ - /* * These numbers are used by reliable protocols for determining * retransmission behavior and are included in the routing structure. */ struct rt_metrics { - u_int32_t rmx_locks; /* Kernel must leave these values alone */ + u_int32_t rmx_locks; /* Kernel leaves these values alone */ u_int32_t rmx_mtu; /* MTU for this path */ u_int32_t rmx_hopcount; /* max hops expected */ int32_t rmx_expire; /* lifetime for route, e.g. redirect */ @@ -134,15 +91,78 @@ struct rt_metrics { */ #define RTM_RTTUNIT 1000000 /* units for rtt, rttvar, as units per sec */ -#ifdef KERNEL_PRIVATE +#ifdef PRIVATE +#ifndef KERNEL +/* Private declaration for user-space (needed by ip_mroute.h) */ +struct route { +#else /* KERNEL */ +struct route_old { +#endif /* KERNEL */ + void *ro_rt; + uint32_t ro_flags; + struct sockaddr ro_dst; +}; +#endif /* PRIVATE */ + +#ifdef BSD_KERNEL_PRIVATE +#include +#include + /* - * New expiry value (in seconds) when dealing with interfaces which implement - * the if_want_aggressive_drain behavior. Otherwise the event mechanism wouldn't - * fire quick enough to cause any sort of significant gains in performance. + * Kernel resident routing tables. + * + * The routing tables are initialized when interface addresses + * are set by making entries for all directly connected interfaces. */ -#define RT_IF_IDLE_EXPIRE_TIMEOUT 30 -#define RT_IF_IDLE_DRAIN_INTERVAL 10 -#endif /* KERNEL_PRIVATE */ + +/* forward declarations */ +struct ifnet_llreach_info; +struct rt_reach_info; + +/* + * IP route structure + * + * A route consists of a destination address and a reference + * to a routing entry. These are often held by protocols + * in their control blocks, e.g. inpcb. + */ +struct route { + /* + * N.B: struct route must begin with ro_{rt,srcia,flags} + * because the code does some casts of a 'struct route_in6 *' + * to a 'struct route *'. + */ + struct rtentry *ro_rt; + struct ifaddr *ro_srcia; + uint32_t ro_flags; /* route flags (see below) */ + struct sockaddr ro_dst; +}; + +#define ROF_SRCIF_SELECTED 0x1 /* source interface was selected */ + +#define ROUTE_UNUSABLE(_ro) \ + ((_ro)->ro_rt == NULL || \ + ((_ro)->ro_rt->rt_flags & (RTF_UP|RTF_CONDEMNED)) != RTF_UP || \ + RT_GENID_OUTOFSYNC((_ro)->ro_rt)) + +#define _ROUTE_RELEASE_COMMON(_ro, _rnh_locked) do { \ + if ((_ro)->ro_rt != NULL) { \ + RT_LOCK_ASSERT_NOTHELD((_ro)->ro_rt); \ + if (_rnh_locked) \ + rtfree_locked((_ro)->ro_rt); \ + else \ + rtfree((_ro)->ro_rt); \ + (_ro)->ro_rt = NULL; \ + } \ + if ((_ro)->ro_srcia != NULL) { \ + IFA_REMREF((_ro)->ro_srcia); \ + (_ro)->ro_srcia = NULL; \ + (_ro)->ro_flags &= ~ROF_SRCIF_SELECTED; \ + } \ +} while (0) + +#define ROUTE_RELEASE_LOCKED(_ro) _ROUTE_RELEASE_COMMON(_ro, TRUE) +#define ROUTE_RELEASE(_ro) _ROUTE_RELEASE_COMMON(_ro, FALSE) /* * We distinguish between routes to hosts and routes to networks, @@ -152,54 +172,61 @@ struct rt_metrics { * gateways are marked so that the output routines know to address the * gateway rather than the ultimate destination. */ -#ifdef KERNEL_PRIVATE -#include -#ifndef RNF_NORMAL -#include -#endif -struct ifnet_llreach_info; /* forward declaration */ + /* - * Kernel routing entry structure (private). + * Kernel routing entry structure. */ struct rtentry { struct radix_node rt_nodes[2]; /* tree glue, and other values */ -#define rt_key(r) ((struct sockaddr *)(void *)((r)->rt_nodes->rn_key)) -#define rt_mask(r) ((struct sockaddr *)(void *)((r)->rt_nodes->rn_mask)) - struct sockaddr *rt_gateway; /* value */ - int32_t rt_refcnt; /* # held references */ - uint32_t rt_flags; /* up/down?, host/net */ - struct ifnet *rt_ifp; /* the answer: interface to use */ - struct ifaddr *rt_ifa; /* the answer: interface addr to use */ - struct sockaddr *rt_genmask; /* for generation of cloned routes */ - void *rt_llinfo; /* pointer to link level info cache */ - void (*rt_llinfo_get_ri) /* llinfo get reachability info fn */ - (struct rtentry *, struct rt_reach_info *); - void (*rt_llinfo_get_iflri) /* ifnet llinfo get reach. info fn */ - (struct rtentry *, struct ifnet_llreach_info *); - void (*rt_llinfo_purge)(struct rtentry *); /* llinfo purge fn */ - void (*rt_llinfo_free)(void *); /* link level info free function */ - struct rt_metrics rt_rmx; /* metrics used by rx'ing protocols */ - struct rtentry *rt_gwroute; /* implied entry for gatewayed routes */ - struct rtentry *rt_parent; /* cloning parent of this route */ - uint32_t generation_id; /* route generation id */ +#define rt_key(r) (SA((r)->rt_nodes->rn_key)) +#define rt_mask(r) (SA((r)->rt_nodes->rn_mask)) /* * See bsd/net/route.c for synchronization notes. */ decl_lck_mtx_data(, rt_lock); /* lock for routing entry */ - struct nstat_counts *rt_stats; - void (*rt_if_ref_fn)(struct ifnet *, int); /* interface ref func */ - + uint32_t rt_refcnt; /* # held references */ + uint32_t rt_flags; /* up/down?, host/net */ + uint32_t rt_genid; /* route generation id */ + struct sockaddr *rt_gateway; /* value */ + struct ifnet *rt_ifp; /* the answer: interface to use */ + struct ifaddr *rt_ifa; /* the answer: interface addr to use */ + struct sockaddr *rt_genmask; /* for generation of cloned routes */ + void *rt_llinfo; /* pointer to link level info cache */ + void (*rt_llinfo_get_ri) /* llinfo get reachability info fn */ + (struct rtentry *, struct rt_reach_info *); + void (*rt_llinfo_get_iflri) /* ifnet llinfo get reach. info fn */ + (struct rtentry *, struct ifnet_llreach_info *); + void (*rt_llinfo_purge)(struct rtentry *); /* llinfo purge fn */ + void (*rt_llinfo_free)(void *); /* link level info free function */ + struct rt_metrics rt_rmx; /* metrics used by rx'ing protocols */ +#define rt_use rt_rmx.rmx_pksent + struct rtentry *rt_gwroute; /* implied entry for gatewayed routes */ + struct rtentry *rt_parent; /* cloning parent of this route */ + struct nstat_counts *rt_stats; /* route stats */ + void (*rt_if_ref_fn)(struct ifnet *, int); /* interface ref func */ + + uint32_t *rt_tree_genid; /* ptr to per-tree route_genid */ uint64_t rt_expire; /* expiration time in uptime seconds */ uint64_t base_calendartime; /* calendar time upon entry creation */ - uint64_t base_uptime;/* uptime upon entry creation */ + uint64_t base_uptime; /* uptime upon entry creation */ }; -extern void rt_setexpire(struct rtentry *, uint64_t); -#endif /* KERNEL_PRIVATE */ +/* + * Synchronize route entry's generation ID with the tree's. + */ +#define RT_GENID_SYNC(_rt) do { \ + if ((_rt)->rt_tree_genid != NULL) \ + (_rt)->rt_genid = *(_rt)->rt_tree_genid; \ +} while (0) -#ifdef KERNEL_PRIVATE -#define rt_use rt_rmx.rmx_pksent -#endif /* KERNEL_PRIVATE */ +/* + * Indicates whether or not the route entry's generation ID is stale. + */ +#define RT_GENID_OUTOFSYNC(_rt) \ + ((_rt)->rt_tree_genid != NULL && \ + *(_rt)->rt_tree_genid != (_rt)->rt_genid) + +#endif /* BSD_KERNEL_PRIVATE */ #define RTF_UP 0x1 /* route usable */ #define RTF_GATEWAY 0x2 /* destination is a gateway */ @@ -207,31 +234,39 @@ extern void rt_setexpire(struct rtentry *, uint64_t); #define RTF_REJECT 0x8 /* host or net unreachable */ #define RTF_DYNAMIC 0x10 /* created dynamically (by redirect) */ #define RTF_MODIFIED 0x20 /* modified dynamically (by redirect) */ -#define RTF_DONE 0x40 /* message confirmed */ -#define RTF_DELCLONE 0x80 /* delete cloned route */ -#define RTF_CLONING 0x100 /* generate new routes on use */ -#define RTF_XRESOLVE 0x200 /* external daemon resolves name */ -#define RTF_LLINFO 0x400 /* generated by link layer (e.g. ARP) */ -#define RTF_STATIC 0x800 /* manually added */ -#define RTF_BLACKHOLE 0x1000 /* just discard pkts (during updates) */ -#define RTF_PROTO2 0x4000 /* protocol specific routing flag */ -#define RTF_PROTO1 0x8000 /* protocol specific routing flag */ - -#define RTF_PRCLONING 0x10000 /* protocol requires cloning */ -#define RTF_WASCLONED 0x20000 /* route generated through cloning */ -#define RTF_PROTO3 0x40000 /* protocol specific routing flag */ +#define RTF_DONE 0x40 /* message confirmed */ +#define RTF_DELCLONE 0x80 /* delete cloned route */ +#define RTF_CLONING 0x100 /* generate new routes on use */ +#define RTF_XRESOLVE 0x200 /* external daemon resolves name */ +#define RTF_LLINFO 0x400 /* generated by link layer (e.g. ARP) */ +#define RTF_STATIC 0x800 /* manually added */ +#define RTF_BLACKHOLE 0x1000 /* just discard pkts (during updates) */ +#define RTF_NOIFREF 0x2000 /* not eligible for RTF_IFREF */ +#define RTF_PROTO2 0x4000 /* protocol specific routing flag */ +#define RTF_PROTO1 0x8000 /* protocol specific routing flag */ + +#define RTF_PRCLONING 0x10000 /* protocol requires cloning */ +#define RTF_WASCLONED 0x20000 /* route generated through cloning */ +#define RTF_PROTO3 0x40000 /* protocol specific routing flag */ /* 0x80000 unused */ -#define RTF_PINNED 0x100000 /* future use */ +#define RTF_PINNED 0x100000 /* future use */ #define RTF_LOCAL 0x200000 /* route represents a local address */ #define RTF_BROADCAST 0x400000 /* route represents a bcast address */ #define RTF_MULTICAST 0x800000 /* route represents a mcast address */ -#define RTF_IFSCOPE 0x1000000 /* has valid interface scope */ -#define RTF_CONDEMNED 0x2000000 /* defunct; no longer modifiable */ -#define RTF_IFREF 0x4000000 /* route holds a ref to interface */ +#define RTF_IFSCOPE 0x1000000 /* has valid interface scope */ +#define RTF_CONDEMNED 0x2000000 /* defunct; no longer modifiable */ +#define RTF_IFREF 0x4000000 /* route holds a ref to interface */ #define RTF_PROXY 0x8000000 /* proxying, no interface scope */ #define RTF_ROUTER 0x10000000 /* host is a router */ /* 0x20000000 and up unassigned */ +#define RTF_BITS \ + "\020\1UP\2GATEWAY\3HOST\4REJECT\5DYNAMIC\6MODIFIED\7DONE" \ + "\10DELCLONE\11CLONING\12XRESOLVE\13LLINFO\14STATIC\15BLACKHOLE" \ + "\16NOIFREF\17PROTO2\20PROTO1\21PRCLONING\22WASCLONED\23PROTO3" \ + "\25PINNED\26LOCAL\27BROADCAST\30MULTICAST\31IFSCOPE\32CONDEMNED" \ + "\33IFREF\34PROXY\35ROUTER" + /* * Routing statistics. */ @@ -247,36 +282,49 @@ struct rtstat { * Structures for routing messages. */ struct rt_msghdr { - u_short rtm_msglen; /* to skip over non-understood messages */ - u_char rtm_version; /* future binary compatibility */ - u_char rtm_type; /* message type */ - u_short rtm_index; /* index for associated ifp */ - int rtm_flags; /* flags, incl. kern & message, e.g. DONE */ - int rtm_addrs; /* bitmask identifying sockaddrs in msg */ - pid_t rtm_pid; /* identify sender */ - int rtm_seq; /* for sender to identify action */ - int rtm_errno; /* why failed */ - int rtm_use; /* from rtentry */ - u_int32_t rtm_inits; /* which metrics we are initializing */ - struct rt_metrics rtm_rmx; /* metrics themselves */ + u_short rtm_msglen; /* to skip over non-understood messages */ + u_char rtm_version; /* future binary compatibility */ + u_char rtm_type; /* message type */ + u_short rtm_index; /* index for associated ifp */ + int rtm_flags; /* flags, incl. kern & message, e.g. DONE */ + int rtm_addrs; /* bitmask identifying sockaddrs in msg */ + pid_t rtm_pid; /* identify sender */ + int rtm_seq; /* for sender to identify action */ + int rtm_errno; /* why failed */ + int rtm_use; /* from rtentry */ + u_int32_t rtm_inits; /* which metrics we are initializing */ + struct rt_metrics rtm_rmx; /* metrics themselves */ }; struct rt_msghdr2 { - u_short rtm_msglen; /* to skip over non-understood messages */ - u_char rtm_version; /* future binary compatibility */ - u_char rtm_type; /* message type */ - u_short rtm_index; /* index for associated ifp */ - int rtm_flags; /* flags, incl. kern & message, e.g. DONE */ - int rtm_addrs; /* bitmask identifying sockaddrs in msg */ - int32_t rtm_refcnt; /* reference count */ - int rtm_parentflags; /* flags of the parent route */ - int rtm_reserved; /* reserved field set to 0 */ - int rtm_use; /* from rtentry */ - u_int32_t rtm_inits; /* which metrics we are initializing */ - struct rt_metrics rtm_rmx; /* metrics themselves */ + u_short rtm_msglen; /* to skip over non-understood messages */ + u_char rtm_version; /* future binary compatibility */ + u_char rtm_type; /* message type */ + u_short rtm_index; /* index for associated ifp */ + int rtm_flags; /* flags, incl. kern & message, e.g. DONE */ + int rtm_addrs; /* bitmask identifying sockaddrs in msg */ + int32_t rtm_refcnt; /* reference count */ + int rtm_parentflags; /* flags of the parent route */ + int rtm_reserved; /* reserved field set to 0 */ + int rtm_use; /* from rtentry */ + u_int32_t rtm_inits; /* which metrics we are initializing */ + struct rt_metrics rtm_rmx; /* metrics themselves */ }; #ifdef PRIVATE +/* + * Route reachability info. + */ +struct rt_reach_info { + u_int32_t ri_refcnt; /* reference count */ + u_int32_t ri_probes; /* total # of probes */ + u_int64_t ri_snd_expire; /* tx expiration (calendar) time */ + u_int64_t ri_rcv_expire; /* rx expiration (calendar) time */ + int32_t ri_rssi; /* received signal strength */ + int32_t ri_lqm; /* link quality metric */ + int32_t ri_npm; /* node proximity metric */ +}; + /* * Extended routing message header (private). */ @@ -298,33 +346,33 @@ struct rt_msghdr_ext { }; #endif /* PRIVATE */ -#define RTM_VERSION 5 /* Up the ante and ignore older versions */ +#define RTM_VERSION 5 /* Up the ante and ignore older versions */ /* * Message types. */ -#define RTM_ADD 0x1 /* Add Route */ -#define RTM_DELETE 0x2 /* Delete Route */ -#define RTM_CHANGE 0x3 /* Change Metrics or flags */ -#define RTM_GET 0x4 /* Report Metrics */ -#define RTM_LOSING 0x5 /* Kernel Suspects Partitioning */ -#define RTM_REDIRECT 0x6 /* Told to use different route */ -#define RTM_MISS 0x7 /* Lookup failed on this address */ -#define RTM_LOCK 0x8 /* fix specified metrics */ -#define RTM_OLDADD 0x9 /* caused by SIOCADDRT */ -#define RTM_OLDDEL 0xa /* caused by SIOCDELRT */ -#define RTM_RESOLVE 0xb /* req to resolve dst to LL addr */ -#define RTM_NEWADDR 0xc /* address being added to iface */ -#define RTM_DELADDR 0xd /* address being removed from iface */ -#define RTM_IFINFO 0xe /* iface going up/down etc. */ +#define RTM_ADD 0x1 /* Add Route */ +#define RTM_DELETE 0x2 /* Delete Route */ +#define RTM_CHANGE 0x3 /* Change Metrics or flags */ +#define RTM_GET 0x4 /* Report Metrics */ +#define RTM_LOSING 0x5 /* Kernel Suspects Partitioning */ +#define RTM_REDIRECT 0x6 /* Told to use different route */ +#define RTM_MISS 0x7 /* Lookup failed on this address */ +#define RTM_LOCK 0x8 /* fix specified metrics */ +#define RTM_OLDADD 0x9 /* caused by SIOCADDRT */ +#define RTM_OLDDEL 0xa /* caused by SIOCDELRT */ +#define RTM_RESOLVE 0xb /* req to resolve dst to LL addr */ +#define RTM_NEWADDR 0xc /* address being added to iface */ +#define RTM_DELADDR 0xd /* address being removed from iface */ +#define RTM_IFINFO 0xe /* iface going up/down etc. */ #define RTM_NEWMADDR 0xf /* mcast group membership being added to if */ #define RTM_DELMADDR 0x10 /* mcast group membership being deleted */ #ifdef PRIVATE -#define RTM_GET_SILENT 0x11 +#define RTM_GET_SILENT 0x11 #endif /* PRIVATE */ -#define RTM_IFINFO2 0x12 /* */ -#define RTM_NEWMADDR2 0x13 /* */ -#define RTM_GET2 0x14 /* */ +#define RTM_IFINFO2 0x12 /* */ +#define RTM_NEWMADDR2 0x13 /* */ +#define RTM_GET2 0x14 /* */ #ifdef PRIVATE #define RTM_GET_EXT 0x15 #endif /* PRIVATE */ @@ -332,54 +380,45 @@ struct rt_msghdr_ext { /* * Bitmask values for rtm_inits and rmx_locks. */ -#define RTV_MTU 0x1 /* init or lock _mtu */ -#define RTV_HOPCOUNT 0x2 /* init or lock _hopcount */ -#define RTV_EXPIRE 0x4 /* init or lock _expire */ -#define RTV_RPIPE 0x8 /* init or lock _recvpipe */ -#define RTV_SPIPE 0x10 /* init or lock _sendpipe */ -#define RTV_SSTHRESH 0x20 /* init or lock _ssthresh */ -#define RTV_RTT 0x40 /* init or lock _rtt */ -#define RTV_RTTVAR 0x80 /* init or lock _rttvar */ +#define RTV_MTU 0x1 /* init or lock _mtu */ +#define RTV_HOPCOUNT 0x2 /* init or lock _hopcount */ +#define RTV_EXPIRE 0x4 /* init or lock _expire */ +#define RTV_RPIPE 0x8 /* init or lock _recvpipe */ +#define RTV_SPIPE 0x10 /* init or lock _sendpipe */ +#define RTV_SSTHRESH 0x20 /* init or lock _ssthresh */ +#define RTV_RTT 0x40 /* init or lock _rtt */ +#define RTV_RTTVAR 0x80 /* init or lock _rttvar */ /* * Bitmask values for rtm_addrs. */ -#define RTA_DST 0x1 /* destination sockaddr present */ -#define RTA_GATEWAY 0x2 /* gateway sockaddr present */ -#define RTA_NETMASK 0x4 /* netmask sockaddr present */ -#define RTA_GENMASK 0x8 /* cloning mask sockaddr present */ -#define RTA_IFP 0x10 /* interface name sockaddr present */ -#define RTA_IFA 0x20 /* interface addr sockaddr present */ -#define RTA_AUTHOR 0x40 /* sockaddr for author of redirect */ -#define RTA_BRD 0x80 /* for NEWADDR, broadcast or p-p dest addr */ +#define RTA_DST 0x1 /* destination sockaddr present */ +#define RTA_GATEWAY 0x2 /* gateway sockaddr present */ +#define RTA_NETMASK 0x4 /* netmask sockaddr present */ +#define RTA_GENMASK 0x8 /* cloning mask sockaddr present */ +#define RTA_IFP 0x10 /* interface name sockaddr present */ +#define RTA_IFA 0x20 /* interface addr sockaddr present */ +#define RTA_AUTHOR 0x40 /* sockaddr for author of redirect */ +#define RTA_BRD 0x80 /* for NEWADDR, broadcast or p-p dest addr */ /* * Index offsets for sockaddr array for alternate internal encoding. */ -#define RTAX_DST 0 /* destination sockaddr present */ -#define RTAX_GATEWAY 1 /* gateway sockaddr present */ -#define RTAX_NETMASK 2 /* netmask sockaddr present */ -#define RTAX_GENMASK 3 /* cloning mask sockaddr present */ -#define RTAX_IFP 4 /* interface name sockaddr present */ -#define RTAX_IFA 5 /* interface addr sockaddr present */ -#define RTAX_AUTHOR 6 /* sockaddr for author of redirect */ -#define RTAX_BRD 7 /* for NEWADDR, broadcast or p-p dest addr */ -#define RTAX_MAX 8 /* size of array to allocate */ +#define RTAX_DST 0 /* destination sockaddr present */ +#define RTAX_GATEWAY 1 /* gateway sockaddr present */ +#define RTAX_NETMASK 2 /* netmask sockaddr present */ +#define RTAX_GENMASK 3 /* cloning mask sockaddr present */ +#define RTAX_IFP 4 /* interface name sockaddr present */ +#define RTAX_IFA 5 /* interface addr sockaddr present */ +#define RTAX_AUTHOR 6 /* sockaddr for author of redirect */ +#define RTAX_BRD 7 /* for NEWADDR, broadcast or p-p dest addr */ +#define RTAX_MAX 8 /* size of array to allocate */ struct rt_addrinfo { int rti_addrs; struct sockaddr *rti_info[RTAX_MAX]; }; -struct route_cb { - int ip_count; - int ip6_count; - int ipx_count; - int ns_count; - int iso_count; - int any_count; -}; - #ifdef PRIVATE /* * For scoped routing; a zero interface scope value means nil/no scope. @@ -387,7 +426,7 @@ struct route_cb { #define IFSCOPE_NONE 0 #endif /* PRIVATE */ -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE /* * Generic call trace used by some subsystems (e.g. route, ifaddr) */ @@ -407,17 +446,11 @@ extern void ctrace_record(ctrace_t *); lck_mtx_assert(&(_rt)->rt_lock, LCK_MTX_ASSERT_NOTOWNED) #define RT_LOCK(_rt) do { \ - if (!rte_debug) \ - lck_mtx_lock(&(_rt)->rt_lock); \ - else \ - rt_lock(_rt, FALSE); \ + rt_lock(_rt, FALSE); \ } while (0) #define RT_LOCK_SPIN(_rt) do { \ - if (!rte_debug) \ - lck_mtx_lock_spin(&(_rt)->rt_lock); \ - else \ - rt_lock(_rt, TRUE); \ + rt_lock(_rt, TRUE); \ } while (0) #define RT_CONVERT_LOCK(_rt) do { \ @@ -426,20 +459,11 @@ extern void ctrace_record(ctrace_t *); } while (0) #define RT_UNLOCK(_rt) do { \ - if (!rte_debug) \ - lck_mtx_unlock(&(_rt)->rt_lock); \ - else \ - rt_unlock(_rt); \ + rt_unlock(_rt); \ } while (0) #define RT_ADDREF_LOCKED(_rt) do { \ - if (!rte_debug) { \ - RT_LOCK_ASSERT_HELD(_rt); \ - if (++(_rt)->rt_refcnt == 0) \ - panic("RT_ADDREF(%p) bad refcnt\n", _rt); \ - } else { \ - rtref(_rt); \ - } \ + rtref(_rt); \ } while (0) /* @@ -453,14 +477,7 @@ extern void ctrace_record(ctrace_t *); } while (0) #define RT_REMREF_LOCKED(_rt) do { \ - if (!rte_debug) { \ - RT_LOCK_ASSERT_HELD(_rt); \ - if ((_rt)->rt_refcnt == 0) \ - panic("RT_REMREF(%p) bad refcnt\n", _rt); \ - --(_rt)->rt_refcnt; \ - } else { \ - (void) rtunref(_rt); \ - } \ + (void) rtunref(_rt); \ } while (0) /* @@ -473,22 +490,33 @@ extern void ctrace_record(ctrace_t *); RT_UNLOCK(_rt); \ } while (0) -#define RTFREE(_rt) rtfree(_rt) -#define RTFREE_LOCKED(_rt) rtfree_locked(_rt) +/* + * This macro calculates skew in wall clock, just in case the user changes the + * system time. This skew adjustment is required because we now keep the + * expiration times in uptime terms in the kernel, but the userland still + * expects expiration times in terms of calendar times. This is used when + * reporting rt_expire, ln_expire, etc. values to user space. + */ +#define NET_CALCULATE_CLOCKSKEW(cc, ic, cu, iu) \ + ((cc.tv_sec - ic) - (cu - iu)) -extern struct route_cb route_cb; +extern unsigned int rt_verbose; extern struct radix_node_head *rt_tables[AF_MAX+1]; -__private_extern__ lck_mtx_t *rnh_lock; -__private_extern__ int use_routegenid; -__private_extern__ uint32_t route_generation; -__private_extern__ int rttrash; -__private_extern__ unsigned int rte_debug; +extern lck_mtx_t *rnh_lock; +extern uint32_t route_genid_inet; /* INET route generation count */ +#if INET6 +extern uint32_t route_genid_inet6; /* INET6 route generation count */ +#endif /* INET6 */ +extern int rttrash; +extern unsigned int rte_debug; struct ifmultiaddr; struct proc; -extern void route_init(void) __attribute__((section("__TEXT, initcode"))); +extern void route_init(void); extern void routegenid_update(void); +extern void routegenid_inet_update(void); +extern void routegenid_inet6_update(void); extern void rt_ifmsg(struct ifnet *); extern void rt_missmsg(int, struct rt_addrinfo *, int, int); extern void rt_newaddrmsg(int, struct ifaddr *, int, struct rtentry *); @@ -499,6 +527,8 @@ extern unsigned int get_primary_ifscope(int); extern boolean_t rt_primary_default(struct rtentry *, struct sockaddr *); extern struct rtentry *rt_lookup(boolean_t, struct sockaddr *, struct sockaddr *, struct radix_node_head *, unsigned int); +extern struct rtentry *rt_lookup_coarse(boolean_t, struct sockaddr *, + struct sockaddr *, struct radix_node_head *); extern void rtalloc(struct route *); extern void rtalloc_scoped(struct route *, unsigned int); extern void rtalloc_ign(struct route *, uint32_t); @@ -508,8 +538,8 @@ extern struct rtentry *rtalloc1_scoped(struct sockaddr *, int, uint32_t, unsigned int); extern struct rtentry *rtalloc1_scoped_locked(struct sockaddr *, int, uint32_t, unsigned int); -extern void rtfree(struct rtentry *); extern void rtfree_locked(struct rtentry *); +extern void rtfree(struct rtentry *); extern void rtref(struct rtentry *); /* * rtunref will decrement the refcount, rtfree will decrement and free if @@ -536,12 +566,8 @@ extern unsigned int sin_get_ifscope(struct sockaddr *); extern unsigned int sin6_get_ifscope(struct sockaddr *); extern void rt_lock(struct rtentry *, boolean_t); extern void rt_unlock(struct rtentry *); -extern struct sockaddr *rtm_scrub_ifscope(int, int, struct sockaddr *, - struct sockaddr *, struct sockaddr_storage *); -extern u_int64_t rt_expiry(struct rtentry *, u_int64_t, u_int32_t); -extern void rt_set_idleref(struct rtentry *); -extern void rt_clear_idleref(struct rtentry *); -extern void rt_aggdrain(int); +extern struct sockaddr *rtm_scrub(int, int, struct sockaddr *, + struct sockaddr *, void *, uint32_t, kauth_cred_t *); extern boolean_t rt_validate(struct rtentry *); extern void rt_set_proxy(struct rtentry *, boolean_t); extern void rt_set_gwroute(struct rtentry *, struct sockaddr *, @@ -549,12 +575,10 @@ extern void rt_set_gwroute(struct rtentry *, struct sockaddr *, extern void rt_revalidate_gwroute(struct rtentry *, struct rtentry *); extern errno_t route_to_gwroute(const struct sockaddr *, struct rtentry *, struct rtentry **); - -#ifdef XNU_KERNEL_PRIVATE -extern void route_copyin(struct route *src, struct route *dst, size_t length); -extern void route_copyout(struct route *dst, const struct route *src, size_t length); -#endif /* XNU_KERNEL_PRIVATE */ - -#endif /* KERNEL_PRIVATE */ - -#endif +extern void rt_setexpire(struct rtentry *, uint64_t); +extern void rt_str(struct rtentry *, char *, uint32_t, char *, uint32_t); +extern const char *rtm2str(int); +extern void route_copyin(struct route *, struct route *, size_t); +extern void route_copyout(struct route *, const struct route *, size_t); +#endif /* BSD_KERNEL_PRIVATE */ +#endif /* _NET_ROUTE_H_ */ diff --git a/bsd/net/rtsock.c b/bsd/net/rtsock.c index d8a1b60b2..84db38b7a 100644 --- a/bsd/net/rtsock.c +++ b/bsd/net/rtsock.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -60,9 +60,9 @@ * @(#)rtsock.c 8.5 (Berkeley) 11/2/94 */ - #include #include +#include #include #include #include @@ -85,17 +85,23 @@ #include #include -#include - extern struct rtstat rtstat; -extern int check_routeselfref; -extern struct domain routedomain; +extern struct domain routedomain_s; +static struct domain *routedomain = NULL; MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables"); -static struct sockaddr route_dst = { 2, PF_ROUTE, { 0, } }; -static struct sockaddr route_src = { 2, PF_ROUTE, { 0, } }; -static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, { 0, } }; +static struct sockaddr route_dst = { 2, PF_ROUTE, { 0, } }; +static struct sockaddr route_src = { 2, PF_ROUTE, { 0, } }; +static struct sockaddr sa_zero = { sizeof (sa_zero), AF_INET, { 0, } }; + +struct route_cb { + u_int32_t ip_count; /* attached w/ AF_INET */ + u_int32_t ip6_count; /* attached w/ AF_INET6 */ + u_int32_t any_count; /* total attached */ +}; + +static struct route_cb route_cb; struct walkarg { int w_tmemsize; @@ -104,45 +110,47 @@ struct walkarg { struct sysctl_req *w_req; }; +static void route_dinit(struct domain *); +static int rts_abort(struct socket *); +static int rts_attach(struct socket *, int, struct proc *); +static int rts_bind(struct socket *, struct sockaddr *, struct proc *); +static int rts_connect(struct socket *, struct sockaddr *, struct proc *); +static int rts_detach(struct socket *); +static int rts_disconnect(struct socket *); +static int rts_peeraddr(struct socket *, struct sockaddr **); +static int rts_send(struct socket *, int, struct mbuf *, struct sockaddr *, + struct mbuf *, struct proc *); +static int rts_shutdown(struct socket *); +static int rts_sockaddr(struct socket *, struct sockaddr **); + +static int route_output(struct mbuf *, struct socket *); +static void rt_setmetrics(u_int32_t, struct rt_metrics *, struct rtentry *); +static void rt_getmetrics(struct rtentry *, struct rt_metrics *); +static void rt_setif(struct rtentry *, struct sockaddr *, struct sockaddr *, + struct sockaddr *, unsigned int); +static int rt_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *); static struct mbuf *rt_msg1(int, struct rt_addrinfo *); -static int rt_msg2(int, struct rt_addrinfo *, caddr_t, struct walkarg *); -static int rt_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *); -static int sysctl_dumpentry(struct radix_node *rn, void *vw); -static int sysctl_dumpentry_ext(struct radix_node *rn, void *vw); -static int sysctl_iflist(int af, struct walkarg *w); -static int sysctl_iflist2(int af, struct walkarg *w); -static int route_output(struct mbuf *, struct socket *); -static void rt_setmetrics(u_int32_t, struct rt_metrics *, struct rtentry *); -static void rt_getmetrics(struct rtentry *, struct rt_metrics *); -static void rt_setif(struct rtentry *, struct sockaddr *, struct sockaddr *, - struct sockaddr *, unsigned int); -static void rt_drainall(void); - -#ifndef SIN -#define SIN(sa) ((struct sockaddr_in *)(size_t)(sa)) -#endif - -SYSCTL_NODE(_net, OID_AUTO, idle, CTLFLAG_RW|CTLFLAG_LOCKED, 0, - "idle network monitoring"); - -static struct timeval last_ts; - -SYSCTL_NODE(_net_idle, OID_AUTO, route, CTLFLAG_RW|CTLFLAG_LOCKED, 0, - "idle route monitoring"); - -static int rt_if_idle_drain_interval = RT_IF_IDLE_DRAIN_INTERVAL; -SYSCTL_INT(_net_idle_route, OID_AUTO, drain_interval, CTLFLAG_RW, - &rt_if_idle_drain_interval, 0, "Default interval for draining " - "routes when doing interface idle reference counting."); - -/* - * This macro calculates skew in wall clock, just in case the user changes the - * system time. This skew adjustment is required because we now keep the route - * expiration times in uptime terms in the kernel, but the userland still - * expects expiration times in terms of calendar times. - */ -#define CALCULATE_CLOCKSKEW(cc, ic, cu, iu)\ - ((cc.tv_sec - ic) - (cu - iu)) +static int rt_msg2(int, struct rt_addrinfo *, caddr_t, struct walkarg *, + kauth_cred_t *); +static int sysctl_dumpentry(struct radix_node *rn, void *vw); +static int sysctl_dumpentry_ext(struct radix_node *rn, void *vw); +static int sysctl_iflist(int af, struct walkarg *w); +static int sysctl_iflist2(int af, struct walkarg *w); +static int sysctl_rtstat(struct sysctl_req *); +static int sysctl_rttrash(struct sysctl_req *); +static int sysctl_rtsock SYSCTL_HANDLER_ARGS; + +SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD | CTLFLAG_LOCKED, + sysctl_rtsock, ""); + +SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "routing"); + +#define ROUNDUP32(a) \ + ((a) > 0 ? (1 + (((a) - 1) | (sizeof (uint32_t) - 1))) : \ + sizeof (uint32_t)) + +#define ADVANCE32(x, n) \ + (x += ROUNDUP32((n)->sa_len)) /* * It really doesn't make any sense at all for this code to share much @@ -151,81 +159,61 @@ SYSCTL_INT(_net_idle_route, OID_AUTO, drain_interval, CTLFLAG_RW, static int rts_abort(struct socket *so) { - int error; - - error = raw_usrreqs.pru_abort(so); - return error; + return (raw_usrreqs.pru_abort(so)); } /* pru_accept is EOPNOTSUPP */ static int -rts_attach(struct socket *so, int proto, __unused struct proc *p) +rts_attach(struct socket *so, int proto, struct proc *p) { +#pragma unused(p) struct rawcb *rp; int error; - if (sotorawcb(so) != 0) - return EISCONN; /* XXX panic? */ - MALLOC(rp, struct rawcb *, sizeof *rp, M_PCB, M_WAITOK); /* XXX */ - if (rp == 0) - return ENOBUFS; - bzero(rp, sizeof *rp); + VERIFY(so->so_pcb == NULL); + + MALLOC(rp, struct rawcb *, sizeof (*rp), M_PCB, M_WAITOK | M_ZERO); + if (rp == NULL) + return (ENOBUFS); - /* - * The splnet() is necessary to block protocols from sending - * error notifications (like RTM_REDIRECT or RTM_LOSING) while - * this PCB is extant but incompletely initialized. - * Probably we should try to do more of this work beforehand and - * eliminate the spl. - */ so->so_pcb = (caddr_t)rp; - error = raw_attach(so, proto); /* don't use raw_usrreqs.pru_attach, it checks for SS_PRIV */ + /* don't use raw_usrreqs.pru_attach, it checks for SS_PRIV */ + error = raw_attach(so, proto); rp = sotorawcb(so); if (error) { FREE(rp, M_PCB); so->so_pcb = NULL; so->so_flags |= SOF_PCBCLEARING; - return error; + return (error); } - switch(rp->rcb_proto.sp_protocol) { -//####LD route_cb needs looking + switch (rp->rcb_proto.sp_protocol) { case AF_INET: - route_cb.ip_count++; + atomic_add_32(&route_cb.ip_count, 1); break; case AF_INET6: - route_cb.ip6_count++; - break; - case AF_IPX: - route_cb.ipx_count++; - break; - case AF_NS: - route_cb.ns_count++; + atomic_add_32(&route_cb.ip6_count, 1); break; } rp->rcb_faddr = &route_src; - route_cb.any_count++; - /* the socket is already locked when we enter rts_attach */ + atomic_add_32(&route_cb.any_count, 1); + /* the socket is already locked when we enter rts_attach */ soisconnected(so); so->so_options |= SO_USELOOPBACK; - return 0; + return (0); } static int rts_bind(struct socket *so, struct sockaddr *nam, struct proc *p) { - int error; - error = raw_usrreqs.pru_bind(so, nam, p); /* xxx just EINVAL */ - return error; + return (raw_usrreqs.pru_bind(so, nam, p)); /* xxx just EINVAL */ } static int rts_connect(struct socket *so, struct sockaddr *nam, struct proc *p) { - int error; - error = raw_usrreqs.pru_connect(so, nam, p); /* XXX just EINVAL */ - return error; + return (raw_usrreqs.pru_connect(so, nam, p)); /* XXX just EINVAL */ } /* pru_connect2 is EOPNOTSUPP */ @@ -235,35 +223,25 @@ static int rts_detach(struct socket *so) { struct rawcb *rp = sotorawcb(so); - int error; - if (rp != 0) { - switch(rp->rcb_proto.sp_protocol) { - case AF_INET: - route_cb.ip_count--; - break; - case AF_INET6: - route_cb.ip6_count--; - break; - case AF_IPX: - route_cb.ipx_count--; - break; - case AF_NS: - route_cb.ns_count--; - break; - } - route_cb.any_count--; + VERIFY(rp != NULL); + + switch (rp->rcb_proto.sp_protocol) { + case AF_INET: + atomic_add_32(&route_cb.ip_count, -1); + break; + case AF_INET6: + atomic_add_32(&route_cb.ip6_count, -1); + break; } - error = raw_usrreqs.pru_detach(so); - return error; + atomic_add_32(&route_cb.any_count, -1); + return (raw_usrreqs.pru_detach(so)); } static int rts_disconnect(struct socket *so) { - int error; - error = raw_usrreqs.pru_disconnect(so); - return error; + return (raw_usrreqs.pru_disconnect(so)); } /* pru_listen is EOPNOTSUPP */ @@ -271,9 +249,7 @@ rts_disconnect(struct socket *so) static int rts_peeraddr(struct socket *so, struct sockaddr **nam) { - int error; - error = raw_usrreqs.pru_peeraddr(so, nam); - return error; + return (raw_usrreqs.pru_peeraddr(so, nam)); } /* pru_rcvd is EOPNOTSUPP */ @@ -281,11 +257,9 @@ rts_peeraddr(struct socket *so, struct sockaddr **nam) static int rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, - struct mbuf *control, struct proc *p) + struct mbuf *control, struct proc *p) { - int error; - error = raw_usrreqs.pru_send(so, flags, m, nam, control, p); - return error; + return (raw_usrreqs.pru_send(so, flags, m, nam, control, p)); } /* pru_sense is null */ @@ -293,25 +267,28 @@ rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, static int rts_shutdown(struct socket *so) { - int error; - error = raw_usrreqs.pru_shutdown(so); - return error; + return (raw_usrreqs.pru_shutdown(so)); } static int rts_sockaddr(struct socket *so, struct sockaddr **nam) { - int error; - error = raw_usrreqs.pru_sockaddr(so, nam); - return error; + return (raw_usrreqs.pru_sockaddr(so, nam)); } static struct pr_usrreqs route_usrreqs = { - rts_abort, pru_accept_notsupp, rts_attach, rts_bind, - rts_connect, pru_connect2_notsupp, pru_control_notsupp, - rts_detach, rts_disconnect, pru_listen_notsupp, rts_peeraddr, - pru_rcvd_notsupp, pru_rcvoob_notsupp, rts_send, pru_sense_null, - rts_shutdown, rts_sockaddr, sosend, soreceive, pru_sopoll_notsupp + .pru_abort = rts_abort, + .pru_attach = rts_attach, + .pru_bind = rts_bind, + .pru_connect = rts_connect, + .pru_detach = rts_detach, + .pru_disconnect = rts_disconnect, + .pru_peeraddr = rts_peeraddr, + .pru_send = rts_send, + .pru_shutdown = rts_shutdown, + .pru_sockaddr = rts_sockaddr, + .pru_sosend = sosend, + .pru_soreceive = soreceive, }; /*ARGSUSED*/ @@ -326,26 +303,26 @@ route_output(struct mbuf *m, struct socket *so) int len, error = 0; sa_family_t dst_sa_family = 0; struct ifnet *ifp = NULL; -#ifndef __APPLE__ - struct proc *curproc = current_proc(); -#endif struct sockaddr_in dst_in, gate_in; int sendonlytoself = 0; unsigned int ifscope = IFSCOPE_NONE; + struct rawcb *rp = NULL; -#define senderr(e) { error = (e); goto flush;} - if (m == NULL || - ((m->m_len < sizeof(intptr_t)) && (m = m_pullup(m, sizeof(intptr_t))) == 0)) +#define senderr(e) { error = (e); goto flush; } + if (m == NULL || ((m->m_len < sizeof (intptr_t)) && + (m = m_pullup(m, sizeof (intptr_t))) == NULL)) return (ENOBUFS); - if ((m->m_flags & M_PKTHDR) == 0) - panic("route_output"); + VERIFY(m->m_flags & M_PKTHDR); - /* unlock the socket (but keep a reference) it won't be accessed until raw_input appends to it. */ + /* + * Unlock the socket (but keep a reference) it won't be + * accessed until raw_input appends to it. + */ socket_unlock(so, 0); lck_mtx_lock(rnh_lock); len = m->m_pkthdr.len; - if (len < sizeof(*rtm) || + if (len < sizeof (*rtm) || len != mtod(m, struct rt_msghdr *)->rtm_msglen) { info.rti_info[RTAX_DST] = NULL; senderr(EINVAL); @@ -366,7 +343,7 @@ route_output(struct mbuf *m, struct socket *so) * all RTM_GETs to be silent in the future, so this is private for now. */ if (rtm->rtm_type == RTM_GET_SILENT) { - if ((so->so_options & SO_USELOOPBACK) == 0) + if (!(so->so_options & SO_USELOOPBACK)) senderr(EINVAL); sendonlytoself = 1; rtm->rtm_type = RTM_GET; @@ -376,7 +353,7 @@ route_output(struct mbuf *m, struct socket *so) * Perform permission checking, only privileged sockets * may perform operations other than RTM_GET */ - if (rtm->rtm_type != RTM_GET && (so->so_state & SS_PRIV) == 0) { + if (rtm->rtm_type != RTM_GET && !(so->so_state & SS_PRIV)) { info.rti_info[RTAX_DST] = NULL; senderr(EPERM); } @@ -387,14 +364,17 @@ route_output(struct mbuf *m, struct socket *so) info.rti_info[RTAX_DST] = NULL; senderr(EINVAL); } - if (info.rti_info[RTAX_DST] == NULL || (info.rti_info[RTAX_DST]->sa_family >= AF_MAX) || - (info.rti_info[RTAX_GATEWAY] != NULL && (info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX))) { + if (info.rti_info[RTAX_DST] == NULL || + info.rti_info[RTAX_DST]->sa_family >= AF_MAX || + (info.rti_info[RTAX_GATEWAY] != NULL && + info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX)) senderr(EINVAL); - } - if (info.rti_info[RTAX_DST]->sa_family == AF_INET && info.rti_info[RTAX_DST]->sa_len != sizeof (dst_in)) { + if (info.rti_info[RTAX_DST]->sa_family == AF_INET && + info.rti_info[RTAX_DST]->sa_len != sizeof (dst_in)) { /* At minimum, we need up to sin_addr */ - if (info.rti_info[RTAX_DST]->sa_len < offsetof(struct sockaddr_in, sin_zero)) + if (info.rti_info[RTAX_DST]->sa_len < + offsetof(struct sockaddr_in, sin_zero)) senderr(EINVAL); bzero(&dst_in, sizeof (dst_in)); dst_in.sin_len = sizeof (dst_in); @@ -406,9 +386,11 @@ route_output(struct mbuf *m, struct socket *so) } if (info.rti_info[RTAX_GATEWAY] != NULL && - info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET && info.rti_info[RTAX_GATEWAY]->sa_len != sizeof (gate_in)) { + info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET && + info.rti_info[RTAX_GATEWAY]->sa_len != sizeof (gate_in)) { /* At minimum, we need up to sin_addr */ - if (info.rti_info[RTAX_GATEWAY]->sa_len < offsetof(struct sockaddr_in, sin_zero)) + if (info.rti_info[RTAX_GATEWAY]->sa_len < + offsetof(struct sockaddr_in, sin_zero)) senderr(EINVAL); bzero(&gate_in, sizeof (gate_in)); gate_in.sin_len = sizeof (gate_in); @@ -421,8 +403,10 @@ route_output(struct mbuf *m, struct socket *so) if (info.rti_info[RTAX_GENMASK]) { struct radix_node *t; t = rn_addmask((caddr_t)info.rti_info[RTAX_GENMASK], 0, 1); - if (t && Bcmp(info.rti_info[RTAX_GENMASK], t->rn_key, *(u_char *)info.rti_info[RTAX_GENMASK]) == 0) - info.rti_info[RTAX_GENMASK] = (struct sockaddr *)(t->rn_key); + if (t != NULL && Bcmp(info.rti_info[RTAX_GENMASK], + t->rn_key, *(u_char *)info.rti_info[RTAX_GENMASK]) == 0) + info.rti_info[RTAX_GENMASK] = + (struct sockaddr *)(t->rn_key); else senderr(ENOBUFS); } @@ -431,7 +415,8 @@ route_output(struct mbuf *m, struct socket *so) * If RTF_IFSCOPE flag is set, then rtm_index specifies the scope. */ if (rtm->rtm_flags & RTF_IFSCOPE) { - if (info.rti_info[RTAX_DST]->sa_family != AF_INET && info.rti_info[RTAX_DST]->sa_family != AF_INET6) + if (info.rti_info[RTAX_DST]->sa_family != AF_INET && + info.rti_info[RTAX_DST]->sa_family != AF_INET6) senderr(EINVAL); ifscope = rtm->rtm_index; } @@ -451,234 +436,201 @@ route_output(struct mbuf *m, struct socket *so) */ if (info.rti_info[RTAX_DST]->sa_family == AF_INET) sin_set_ifscope(info.rti_info[RTAX_DST], IFSCOPE_NONE); - if (info.rti_info[RTAX_GATEWAY] != NULL && info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET) + if (info.rti_info[RTAX_GATEWAY] != NULL && + info.rti_info[RTAX_GATEWAY]->sa_family == AF_INET) sin_set_ifscope(info.rti_info[RTAX_GATEWAY], IFSCOPE_NONE); switch (rtm->rtm_type) { + case RTM_ADD: + if (info.rti_info[RTAX_GATEWAY] == NULL) + senderr(EINVAL); - case RTM_ADD: - if (info.rti_info[RTAX_GATEWAY] == NULL) - senderr(EINVAL); - -#ifdef __APPLE__ -/* XXX LD11JUL02 Special case for AOL 5.1.2 connectivity issue to AirPort BS (Radar 2969954) - * AOL is adding a circular route ("10.0.1.1/32 10.0.1.1") when establishing its ppp tunnel - * to the AP BaseStation by removing the default gateway and replacing it with their tunnel entry point. - * There is no apparent reason to add this route as there is a valid 10.0.1.1/24 route to the BS. - * That circular route was ignored on previous version of MacOS X because of a routing bug - * corrected with the merge to FreeBSD4.4 (a route generated from an RTF_CLONING route had the RTF_WASCLONED - * flag set but did not have a reference to the parent route) and that entry was left in the RT. This workaround is - * made in order to provide binary compatibility with AOL. - * If we catch a process adding a circular route with a /32 from the routing socket, we error it out instead of - * confusing the routing table with a wrong route to the previous default gateway - */ -{ -#define satosinaddr(sa) (((struct sockaddr_in *)(void *)sa)->sin_addr.s_addr) - - if (check_routeselfref && (info.rti_info[RTAX_DST] && info.rti_info[RTAX_DST]->sa_family == AF_INET) && - (info.rti_info[RTAX_NETMASK] && satosinaddr(info.rti_info[RTAX_NETMASK]) == INADDR_BROADCAST) && - (info.rti_info[RTAX_GATEWAY] && satosinaddr(info.rti_info[RTAX_DST]) == satosinaddr(info.rti_info[RTAX_GATEWAY]))) { - log(LOG_WARNING, "route_output: circular route %ld.%ld.%ld.%ld/32 ignored\n", - (ntohl(satosinaddr(info.rti_info[RTAX_GATEWAY])>>24))&0xff, - (ntohl(satosinaddr(info.rti_info[RTAX_GATEWAY])>>16))&0xff, - (ntohl(satosinaddr(info.rti_info[RTAX_GATEWAY])>>8))&0xff, - (ntohl(satosinaddr(info.rti_info[RTAX_GATEWAY])))&0xff); - - senderr(EINVAL); - } -} -#endif - error = rtrequest_scoped_locked(RTM_ADD, info.rti_info[RTAX_DST], info.rti_info[RTAX_GATEWAY], - info.rti_info[RTAX_NETMASK], rtm->rtm_flags, &saved_nrt, ifscope); - if (error == 0 && saved_nrt) { - RT_LOCK(saved_nrt); -#ifdef __APPLE__ - /* - * If the route request specified an interface with - * IFA and/or IFP, we set the requested interface on - * the route with rt_setif. It would be much better - * to do this inside rtrequest, but that would - * require passing the desired interface, in some - * form, to rtrequest. Since rtrequest is called in - * so many places (roughly 40 in our source), adding - * a parameter is to much for us to swallow; this is - * something for the FreeBSD developers to tackle. - * Instead, we let rtrequest compute whatever - * interface it wants, then come in behind it and - * stick in the interface that we really want. This - * works reasonably well except when rtrequest can't - * figure out what interface to use (with - * ifa_withroute) and returns ENETUNREACH. Ideally - * it shouldn't matter if rtrequest can't figure out - * the interface if we're going to explicitly set it - * ourselves anyway. But practically we can't - * recover here because rtrequest will not do any of - * the work necessary to add the route if it can't - * find an interface. As long as there is a default - * route that leads to some interface, rtrequest will - * find an interface, so this problem should be - * rarely encountered. - * dwiggins@bbn.com - */ - - rt_setif(saved_nrt, info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA], info.rti_info[RTAX_GATEWAY], - ifscope); -#endif - rt_setmetrics(rtm->rtm_inits, - &rtm->rtm_rmx, saved_nrt); - saved_nrt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); - saved_nrt->rt_rmx.rmx_locks |= - (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); - saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK]; - RT_REMREF_LOCKED(saved_nrt); - RT_UNLOCK(saved_nrt); - } - break; + error = rtrequest_scoped_locked(RTM_ADD, + info.rti_info[RTAX_DST], info.rti_info[RTAX_GATEWAY], + info.rti_info[RTAX_NETMASK], rtm->rtm_flags, &saved_nrt, + ifscope); + if (error == 0 && saved_nrt != NULL) { + RT_LOCK(saved_nrt); + /* + * If the route request specified an interface with + * IFA and/or IFP, we set the requested interface on + * the route with rt_setif. It would be much better + * to do this inside rtrequest, but that would + * require passing the desired interface, in some + * form, to rtrequest. Since rtrequest is called in + * so many places (roughly 40 in our source), adding + * a parameter is to much for us to swallow; this is + * something for the FreeBSD developers to tackle. + * Instead, we let rtrequest compute whatever + * interface it wants, then come in behind it and + * stick in the interface that we really want. This + * works reasonably well except when rtrequest can't + * figure out what interface to use (with + * ifa_withroute) and returns ENETUNREACH. Ideally + * it shouldn't matter if rtrequest can't figure out + * the interface if we're going to explicitly set it + * ourselves anyway. But practically we can't + * recover here because rtrequest will not do any of + * the work necessary to add the route if it can't + * find an interface. As long as there is a default + * route that leads to some interface, rtrequest will + * find an interface, so this problem should be + * rarely encountered. + * dwiggins@bbn.com + */ + rt_setif(saved_nrt, + info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA], + info.rti_info[RTAX_GATEWAY], ifscope); + rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, saved_nrt); + saved_nrt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); + saved_nrt->rt_rmx.rmx_locks |= + (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); + saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK]; + RT_REMREF_LOCKED(saved_nrt); + RT_UNLOCK(saved_nrt); + } + break; + + case RTM_DELETE: + error = rtrequest_scoped_locked(RTM_DELETE, + info.rti_info[RTAX_DST], info.rti_info[RTAX_GATEWAY], + info.rti_info[RTAX_NETMASK], rtm->rtm_flags, &saved_nrt, + ifscope); + if (error == 0) { + rt = saved_nrt; + RT_LOCK(rt); + goto report; + } + break; + + case RTM_GET: + case RTM_CHANGE: + case RTM_LOCK: + rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family]; + if (rnh == NULL) + senderr(EAFNOSUPPORT); + /* + * Lookup the best match based on the key-mask pair; + * callee adds a reference and checks for root node. + */ + rt = rt_lookup(TRUE, info.rti_info[RTAX_DST], + info.rti_info[RTAX_NETMASK], rnh, ifscope); + if (rt == NULL) + senderr(ESRCH); + RT_LOCK(rt); - case RTM_DELETE: - error = rtrequest_scoped_locked(RTM_DELETE, info.rti_info[RTAX_DST], - info.rti_info[RTAX_GATEWAY], info.rti_info[RTAX_NETMASK], rtm->rtm_flags, &saved_nrt, ifscope); - if (error == 0) { - rt = saved_nrt; - RT_LOCK(rt); - goto report; + /* + * Holding rnh_lock here prevents the possibility of + * ifa from changing (e.g. in_ifinit), so it is safe + * to access its ifa_addr (down below) without locking. + */ + switch (rtm->rtm_type) { + case RTM_GET: { + struct ifaddr *ifa2; +report: + ifa2 = NULL; + RT_LOCK_ASSERT_HELD(rt); + info.rti_info[RTAX_DST] = rt_key(rt); + dst_sa_family = info.rti_info[RTAX_DST]->sa_family; + info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; + info.rti_info[RTAX_NETMASK] = rt_mask(rt); + info.rti_info[RTAX_GENMASK] = rt->rt_genmask; + if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { + ifp = rt->rt_ifp; + if (ifp != NULL) { + ifnet_lock_shared(ifp); + ifa2 = ifp->if_lladdr; + info.rti_info[RTAX_IFP] = + ifa2->ifa_addr; + IFA_ADDREF(ifa2); + ifnet_lock_done(ifp); + info.rti_info[RTAX_IFA] = + rt->rt_ifa->ifa_addr; + rtm->rtm_index = ifp->if_index; + } else { + info.rti_info[RTAX_IFP] = NULL; + info.rti_info[RTAX_IFA] = NULL; + } + } else if ((ifp = rt->rt_ifp) != NULL) { + rtm->rtm_index = ifp->if_index; } + if (ifa2 != NULL) + IFA_LOCK(ifa2); + len = rt_msg2(rtm->rtm_type, &info, NULL, NULL, NULL); + if (ifa2 != NULL) + IFA_UNLOCK(ifa2); + if (len > rtm->rtm_msglen) { + struct rt_msghdr *new_rtm; + R_Malloc(new_rtm, struct rt_msghdr *, len); + if (new_rtm == NULL) { + RT_UNLOCK(rt); + if (ifa2 != NULL) + IFA_REMREF(ifa2); + senderr(ENOBUFS); + } + Bcopy(rtm, new_rtm, rtm->rtm_msglen); + R_Free(rtm); rtm = new_rtm; + } + if (ifa2 != NULL) + IFA_LOCK(ifa2); + (void) rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, + NULL, NULL); + if (ifa2 != NULL) + IFA_UNLOCK(ifa2); + rtm->rtm_flags = rt->rt_flags; + rt_getmetrics(rt, &rtm->rtm_rmx); + rtm->rtm_addrs = info.rti_addrs; + if (ifa2 != NULL) + IFA_REMREF(ifa2); break; + } - case RTM_GET: case RTM_CHANGE: - case RTM_LOCK: - if ((rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family]) == NULL) - senderr(EAFNOSUPPORT); - + if (info.rti_info[RTAX_GATEWAY] != NULL && + (error = rt_setgate(rt, rt_key(rt), + info.rti_info[RTAX_GATEWAY]))) { + int tmp = error; + RT_UNLOCK(rt); + senderr(tmp); + } /* - * Lookup the best match based on the key-mask pair; - * callee adds a reference and checks for root node. + * If they tried to change things but didn't specify + * the required gateway, then just use the old one. + * This can happen if the user tries to change the + * flags on the default route without changing the + * default gateway. Changing flags still doesn't work. */ - rt = rt_lookup(TRUE, info.rti_info[RTAX_DST], info.rti_info[RTAX_NETMASK], rnh, ifscope); - if (rt == NULL) - senderr(ESRCH); - RT_LOCK(rt); + if ((rt->rt_flags & RTF_GATEWAY) && + info.rti_info[RTAX_GATEWAY] == NULL) + info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; /* - * Holding rnh_lock here prevents the possibility of - * ifa from changing (e.g. in_ifinit), so it is safe - * to access its ifa_addr (down below) without locking. + * On Darwin, we call rt_setif which contains the + * equivalent to the code found at this very spot + * in BSD. */ - switch(rtm->rtm_type) { - - case RTM_GET: { - struct ifaddr *ifa2; - report: - ifa2 = NULL; - RT_LOCK_ASSERT_HELD(rt); - info.rti_info[RTAX_DST] = rt_key(rt); - dst_sa_family = info.rti_info[RTAX_DST]->sa_family; - info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; - info.rti_info[RTAX_NETMASK] = rt_mask(rt); - info.rti_info[RTAX_GENMASK] = rt->rt_genmask; - if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { - ifp = rt->rt_ifp; - if (ifp) { - ifnet_lock_shared(ifp); - ifa2 = ifp->if_lladdr; - info.rti_info[RTAX_IFP] = ifa2->ifa_addr; - IFA_ADDREF(ifa2); - ifnet_lock_done(ifp); - info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; - rtm->rtm_index = ifp->if_index; - } else { - info.rti_info[RTAX_IFP] = NULL; - info.rti_info[RTAX_IFA] = NULL; - } - } else if ((ifp = rt->rt_ifp) != NULL) { - rtm->rtm_index = ifp->if_index; - } - if (ifa2 != NULL) - IFA_LOCK(ifa2); - len = rt_msg2(rtm->rtm_type, &info, (caddr_t)0, - (struct walkarg *)0); - if (ifa2 != NULL) - IFA_UNLOCK(ifa2); - if (len > rtm->rtm_msglen) { - struct rt_msghdr *new_rtm; - R_Malloc(new_rtm, struct rt_msghdr *, len); - if (new_rtm == 0) { - RT_UNLOCK(rt); - if (ifa2 != NULL) - IFA_REMREF(ifa2); - senderr(ENOBUFS); - } - Bcopy(rtm, new_rtm, rtm->rtm_msglen); - R_Free(rtm); rtm = new_rtm; - } - if (ifa2 != NULL) - IFA_LOCK(ifa2); - (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, - (struct walkarg *)0); - if (ifa2 != NULL) - IFA_UNLOCK(ifa2); - rtm->rtm_flags = rt->rt_flags; - rt_getmetrics(rt, &rtm->rtm_rmx); - rtm->rtm_addrs = info.rti_addrs; - if (ifa2 != NULL) - IFA_REMREF(ifa2); - } - break; - - case RTM_CHANGE: - if (info.rti_info[RTAX_GATEWAY] && (error = rt_setgate(rt, - rt_key(rt), info.rti_info[RTAX_GATEWAY]))) { - int tmp = error; - RT_UNLOCK(rt); - senderr(tmp); - } - /* - * If they tried to change things but didn't specify - * the required gateway, then just use the old one. - * This can happen if the user tries to change the - * flags on the default route without changing the - * default gateway. Changing flags still doesn't work. - */ - if ((rt->rt_flags & RTF_GATEWAY) && !info.rti_info[RTAX_GATEWAY]) - info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; - -#ifdef __APPLE__ - /* - * On Darwin, we call rt_setif which contains the - * equivalent to the code found at this very spot - * in BSD. - */ - rt_setif(rt, info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA], info.rti_info[RTAX_GATEWAY], - ifscope); -#endif - - rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, - rt); -#ifndef __APPLE__ - /* rt_setif, called above does this for us on darwin */ - if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest) - rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, info.rti_info[RTAX_GATEWAY]); -#endif - if (info.rti_info[RTAX_GENMASK]) - rt->rt_genmask = info.rti_info[RTAX_GENMASK]; - /* - * Fall into - */ - case RTM_LOCK: - rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); - rt->rt_rmx.rmx_locks |= - (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); - break; - } - RT_UNLOCK(rt); + rt_setif(rt, + info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA], + info.rti_info[RTAX_GATEWAY], ifscope); + + rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, rt); + if (info.rti_info[RTAX_GENMASK]) + rt->rt_genmask = info.rti_info[RTAX_GENMASK]; + /* FALLTHRU */ + case RTM_LOCK: + rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); + rt->rt_rmx.rmx_locks |= + (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); break; - - default: - senderr(EOPNOTSUPP); + } + RT_UNLOCK(rt); + break; + + default: + senderr(EOPNOTSUPP); } flush: - if (rtm) { + if (rtm != NULL) { if (error) rtm->rtm_errno = error; else @@ -689,15 +641,15 @@ flush: rtfree_locked(rt); } lck_mtx_unlock(rnh_lock); - socket_lock(so, 0); /* relock the socket now */ - { - struct rawcb *rp = 0; + + /* relock the socket now */ + socket_lock(so, 0); /* * Check to see if we don't want our own messages. */ - if ((so->so_options & SO_USELOOPBACK) == 0) { + if (!(so->so_options & SO_USELOOPBACK)) { if (route_cb.any_count <= 1) { - if (rtm) + if (rtm != NULL) R_Free(rtm); m_freem(m); return (error); @@ -705,36 +657,37 @@ flush: /* There is another listener, so construct message */ rp = sotorawcb(so); } - if (rtm) { + if (rtm != NULL) { m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm); if (m->m_pkthdr.len < rtm->rtm_msglen) { m_freem(m); m = NULL; - } else if (m->m_pkthdr.len > rtm->rtm_msglen) + } else if (m->m_pkthdr.len > rtm->rtm_msglen) { m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len); + } R_Free(rtm); } - if (sendonlytoself && m) { + if (sendonlytoself && m != NULL) { error = 0; - if (sbappendaddr(&so->so_rcv, &route_src, m, (struct mbuf*)0, &error) != 0) { + if (sbappendaddr(&so->so_rcv, &route_src, m, + NULL, &error) != 0) { sorwakeup(so); } if (error) - return error; + return (error); } else { - struct sockproto route_proto = {PF_ROUTE, 0}; - if (rp) + struct sockproto route_proto = { PF_ROUTE, 0 }; + if (rp != NULL) rp->rcb_proto.sp_family = 0; /* Avoid us */ if (dst_sa_family != 0) route_proto.sp_protocol = dst_sa_family; - if (m) { + if (m != NULL) { socket_unlock(so, 0); raw_input(m, &route_proto, &route_src, &route_dst); socket_lock(so, 0); } - if (rp) + if (rp != NULL) rp->rcb_proto.sp_family = PF_ROUTE; - } } return (error); } @@ -747,20 +700,19 @@ rt_setexpire(struct rtentry *rt, uint64_t expiry) if (expiry) { rt->rt_rmx.rmx_expire = expiry + rt->base_calendartime - rt->base_uptime; - } else + } else { rt->rt_rmx.rmx_expire = 0; + } } static void rt_setmetrics(u_int32_t which, struct rt_metrics *in, struct rtentry *out) { - struct timeval curr_calendar_time; - uint64_t curr_uptime; - - getmicrotime(&curr_calendar_time); - curr_uptime = net_uptime(); + struct timeval caltime; + + getmicrotime(&caltime); -#define metric(f, e) if (which & (f)) out->rt_rmx.e = in->e; +#define metric(f, e) if (which & (f)) out->rt_rmx.e = in->e; metric(RTV_RPIPE, rmx_recvpipe); metric(RTV_SPIPE, rmx_sendpipe); metric(RTV_SSTHRESH, rmx_ssthresh); @@ -770,23 +722,22 @@ rt_setmetrics(u_int32_t which, struct rt_metrics *in, struct rtentry *out) metric(RTV_MTU, rmx_mtu); metric(RTV_EXPIRE, rmx_expire); #undef metric - + if (out->rt_rmx.rmx_expire > 0) { /* account for system time change */ - curr_uptime = net_uptime(); - getmicrotime(&curr_calendar_time); + getmicrotime(&caltime); out->base_calendartime += - CALCULATE_CLOCKSKEW(curr_calendar_time, + NET_CALCULATE_CLOCKSKEW(caltime, out->base_calendartime, - curr_uptime, out->base_uptime); - rt_setexpire(out, - out->rt_rmx.rmx_expire - - out->base_calendartime + + net_uptime(), out->base_uptime); + rt_setexpire(out, + out->rt_rmx.rmx_expire - + out->base_calendartime + out->base_uptime); } else { rt_setexpire(out, 0); } - + VERIFY(out->rt_expire == 0 || out->rt_rmx.rmx_expire != 0); VERIFY(out->rt_expire != 0 || out->rt_rmx.rmx_expire == 0); } @@ -794,32 +745,31 @@ rt_setmetrics(u_int32_t which, struct rt_metrics *in, struct rtentry *out) static void rt_getmetrics(struct rtentry *in, struct rt_metrics *out) { - struct timeval curr_calendar_time; - uint64_t curr_uptime; + struct timeval caltime; VERIFY(in->rt_expire == 0 || in->rt_rmx.rmx_expire != 0); VERIFY(in->rt_expire != 0 || in->rt_rmx.rmx_expire == 0); - - *out = in->rt_rmx; - - if (in->rt_expire) { + + *out = in->rt_rmx; + + if (in->rt_expire != 0) { /* account for system time change */ - getmicrotime(&curr_calendar_time); - curr_uptime = net_uptime(); + getmicrotime(&caltime); in->base_calendartime += - CALCULATE_CLOCKSKEW(curr_calendar_time, - in->base_calendartime, - curr_uptime, in->base_uptime); - + NET_CALCULATE_CLOCKSKEW(caltime, + in->base_calendartime, net_uptime(), in->base_uptime); + out->rmx_expire = in->base_calendartime + in->rt_expire - in->base_uptime; - } else + } else { out->rmx_expire = 0; + } } /* - * Set route's interface given info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA], and gateway. + * Set route's interface given info.rti_info[RTAX_IFP], + * info.rti_info[RTAX_IFA], and gateway. */ static void rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr, @@ -827,17 +777,12 @@ rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr, { struct ifaddr *ifa = NULL; struct ifnet *ifp = NULL; - void (*ifa_rtrequest) - (int, struct rtentry *, struct sockaddr *); + void (*ifa_rtrequest)(int, struct rtentry *, struct sockaddr *); lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); RT_LOCK_ASSERT_HELD(rt); - /* trigger route cache reevaluation */ - if (use_routegenid) - routegenid_update(); - /* Don't update a defunct route */ if (rt->rt_flags & RTF_CONDEMNED) return; @@ -858,11 +803,11 @@ rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr, IFA_REMREF(ifa); ifa = ifaof_ifpforaddr(Ifaaddr ? Ifaaddr : Gate, ifp); } else { - if (ifa) { + if (ifa != NULL) { IFA_REMREF(ifa); - ifa = 0; + ifa = NULL; } - if (Ifpaddr && (ifp = if_withname(Ifpaddr)) ) { + if (Ifpaddr && (ifp = if_withname(Ifpaddr))) { if (Gate) { ifa = ifaof_ifpforaddr(Gate, ifp); } else { @@ -898,7 +843,16 @@ rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr, } } } - if (ifa) { + + /* trigger route cache reevaluation */ + if (rt_key(rt)->sa_family == AF_INET) + routegenid_inet_update(); +#if INET6 + else if (rt_key(rt)->sa_family == AF_INET6) + routegenid_inet6_update(); +#endif /* INET6 */ + + if (ifa != NULL) { struct ifaddr *oifa = rt->rt_ifa; if (oifa != ifa) { if (oifa != NULL) { @@ -934,7 +888,13 @@ rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr, set_primary_ifscope(rt_key(rt)->sa_family, rt->rt_ifp->if_index); } - rt->rt_rmx.rmx_mtu = ifp->if_mtu; + /* + * If rmx_mtu is not locked, update it + * to the MTU used by the new interface. + */ + if (!(rt->rt_rmx.rmx_locks & RTV_MTU)) + rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; + if (rt->rt_ifa != NULL) { IFA_LOCK_SPIN(rt->rt_ifa); ifa_rtrequest = rt->rt_ifa->ifa_rtrequest; @@ -948,6 +908,7 @@ rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr, return; } IFA_REMREF(ifa); + ifa = NULL; } /* XXX: to reset gateway to correct value, at RTM_CHANGE */ @@ -959,15 +920,24 @@ rt_setif(struct rtentry *rt, struct sockaddr *Ifpaddr, struct sockaddr *Ifaaddr, ifa_rtrequest(RTM_ADD, rt, Gate); } + /* + * Workaround for local address routes pointing to the loopback + * interface added by configd, until . + */ + if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) && + (rt->rt_flags & RTF_HOST) && rt->rt_ifa->ifa_ifp == rt->rt_ifp) { + ifa = ifa_ifwithaddr(rt_key(rt)); + if (ifa != NULL) { + if (ifa != rt->rt_ifa) + rtsetifa(rt, ifa); + IFA_REMREF(ifa); + } + } + /* Release extra ref */ RT_REMREF_LOCKED(rt); } -#define ROUNDUP32(a) \ - ((a) > 0 ? (1 + (((a) - 1) | (sizeof(uint32_t) - 1))) : sizeof(uint32_t)) -#define ADVANCE32(x, n) (x += ROUNDUP32((n)->sa_len)) - - /* * Extract the addresses of the passed sockaddrs. * Do a little sanity checking so as to avoid bad memory references. @@ -979,7 +949,7 @@ rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) struct sockaddr *sa; int i; - bzero(rtinfo->rti_info, sizeof(rtinfo->rti_info)); + bzero(rtinfo->rti_info, sizeof (rtinfo->rti_info)); for (i = 0; (i < RTAX_MAX) && (cp < cplim); i++) { if ((rtinfo->rti_addrs & (1 << i)) == 0) continue; @@ -987,14 +957,12 @@ rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) /* * It won't fit. */ - if ( (cp + sa->sa_len) > cplim ) { + if ((cp + sa->sa_len) > cplim) return (EINVAL); - } - /* * there are no more.. quit now * If there are more bits, they are in error. - * I've seen this. route(1) can evidently generate these. + * I've seen this. route(1) can evidently generate these. * This causes kernel to core dump. * for compatibility, If we see this, point to a safe address. */ @@ -1002,7 +970,6 @@ rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo) rtinfo->rti_info[i] = &sa_zero; return (0); /* should be EINVAL but for compat */ } - /* accept it */ rtinfo->rti_info[i] = sa; ADVANCE32(cp, sa); @@ -1022,40 +989,45 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo) case RTM_DELADDR: case RTM_NEWADDR: - len = sizeof(struct ifa_msghdr); + len = sizeof (struct ifa_msghdr); break; case RTM_DELMADDR: case RTM_NEWMADDR: - len = sizeof(struct ifma_msghdr); + len = sizeof (struct ifma_msghdr); break; case RTM_IFINFO: - len = sizeof(struct if_msghdr); + len = sizeof (struct if_msghdr); break; default: - len = sizeof(struct rt_msghdr); + len = sizeof (struct rt_msghdr); } if (len > MCLBYTES) panic("rt_msg1"); m = m_gethdr(M_DONTWAIT, MT_DATA); if (m && len > MHLEN) { MCLGET(m, M_DONTWAIT); - if ((m->m_flags & M_EXT) == 0) { + if (!(m->m_flags & M_EXT)) { m_free(m); m = NULL; } } - if (m == 0) - return (m); + if (m == NULL) + return (NULL); m->m_pkthdr.len = m->m_len = len; - m->m_pkthdr.rcvif = 0; + m->m_pkthdr.rcvif = NULL; rtm = mtod(m, struct rt_msghdr *); bzero((caddr_t)rtm, len); for (i = 0; i < RTAX_MAX; i++) { struct sockaddr *sa, *hint; - struct sockaddr_storage ss; + uint8_t ssbuf[SOCK_MAXADDRLEN + 1]; + + /* + * Make sure to accomodate the largest possible size of sa_len. + */ + _CASSERT(sizeof (ssbuf) == (SOCK_MAXADDRLEN + 1)); if ((sa = rtinfo->rti_info[i]) == NULL) continue; @@ -1067,7 +1039,8 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo) hint = rtinfo->rti_info[RTAX_IFA]; /* Scrub away any trace of embedded interface scope */ - sa = rtm_scrub_ifscope(type, i, hint, sa, &ss); + sa = rtm_scrub(type, i, hint, sa, &ssbuf, + sizeof (ssbuf), NULL); break; default: @@ -1090,7 +1063,8 @@ rt_msg1(int type, struct rt_addrinfo *rtinfo) } static int -rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w) +rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w, + kauth_cred_t* credp) { int i; int len, dlen, second_time = 0; @@ -1102,24 +1076,24 @@ again: case RTM_DELADDR: case RTM_NEWADDR: - len = sizeof(struct ifa_msghdr); + len = sizeof (struct ifa_msghdr); break; case RTM_DELMADDR: case RTM_NEWMADDR: - len = sizeof(struct ifma_msghdr); + len = sizeof (struct ifma_msghdr); break; case RTM_IFINFO: - len = sizeof(struct if_msghdr); + len = sizeof (struct if_msghdr); break; case RTM_IFINFO2: - len = sizeof(struct if_msghdr2); + len = sizeof (struct if_msghdr2); break; case RTM_NEWMADDR2: - len = sizeof(struct ifma_msghdr2); + len = sizeof (struct ifma_msghdr2); break; case RTM_GET_EXT: @@ -1127,20 +1101,25 @@ again: break; case RTM_GET2: - len = sizeof(struct rt_msghdr2); + len = sizeof (struct rt_msghdr2); break; default: - len = sizeof(struct rt_msghdr); + len = sizeof (struct rt_msghdr); } cp0 = cp; if (cp0) cp += len; for (i = 0; i < RTAX_MAX; i++) { struct sockaddr *sa, *hint; - struct sockaddr_storage ss; + uint8_t ssbuf[SOCK_MAXADDRLEN + 1]; - if ((sa = rtinfo->rti_info[i]) == 0) + /* + * Make sure to accomodate the largest possible size of sa_len. + */ + _CASSERT(sizeof (ssbuf) == (SOCK_MAXADDRLEN + 1)); + + if ((sa = rtinfo->rti_info[i]) == NULL) continue; switch (i) { @@ -1150,7 +1129,13 @@ again: hint = rtinfo->rti_info[RTAX_IFA]; /* Scrub away any trace of embedded interface scope */ - sa = rtm_scrub_ifscope(type, i, hint, sa, &ss); + sa = rtm_scrub(type, i, hint, sa, &ssbuf, + sizeof (ssbuf), NULL); + break; + + case RTAX_IFP: + sa = rtm_scrub(type, i, NULL, sa, &ssbuf, + sizeof (ssbuf), credp); break; default: @@ -1165,18 +1150,18 @@ again: } len += dlen; } - if (cp == 0 && w != NULL && !second_time) { + if (cp == NULL && w != NULL && !second_time) { struct walkarg *rw = w; - if (rw->w_req) { + if (rw->w_req != NULL) { if (rw->w_tmemsize < len) { - if (rw->w_tmem) + if (rw->w_tmem != NULL) FREE(rw->w_tmem, M_RTABLE); rw->w_tmem = _MALLOC(len, M_RTABLE, M_WAITOK); - if (rw->w_tmem) + if (rw->w_tmem != NULL) rw->w_tmemsize = len; } - if (rw->w_tmem) { + if (rw->w_tmem != NULL) { cp = rw->w_tmem; second_time = 1; goto again; @@ -1205,12 +1190,12 @@ rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error) struct rt_msghdr *rtm; struct mbuf *m; struct sockaddr *sa = rtinfo->rti_info[RTAX_DST]; - struct sockproto route_proto = {PF_ROUTE, 0}; + struct sockproto route_proto = { PF_ROUTE, 0 }; if (route_cb.any_count == 0) return; m = rt_msg1(type, rtinfo); - if (m == 0) + if (m == NULL) return; rtm = mtod(m, struct rt_msghdr *); rtm->rtm_flags = RTF_DONE | flags; @@ -1225,19 +1210,18 @@ rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error) * socket indicating that the status of a network interface has changed. */ void -rt_ifmsg( - struct ifnet *ifp) +rt_ifmsg(struct ifnet *ifp) { struct if_msghdr *ifm; struct mbuf *m; struct rt_addrinfo info; - struct sockproto route_proto = {PF_ROUTE, 0}; + struct sockproto route_proto = { PF_ROUTE, 0 }; if (route_cb.any_count == 0) return; - bzero((caddr_t)&info, sizeof(info)); + bzero((caddr_t)&info, sizeof (info)); m = rt_msg1(RTM_IFINFO, &info); - if (m == 0) + if (m == NULL) return; ifm = mtod(m, struct if_msghdr *); ifm->ifm_index = ifp->if_index; @@ -1266,7 +1250,7 @@ rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt) int pass; struct mbuf *m = 0; struct ifnet *ifp = ifa->ifa_ifp; - struct sockproto route_proto = {PF_ROUTE, 0}; + struct sockproto route_proto = { PF_ROUTE, 0 }; lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); RT_LOCK_ASSERT_HELD(rt); @@ -1277,7 +1261,7 @@ rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt) /* Become a regular mutex, just in case */ RT_CONVERT_LOCK(rt); for (pass = 1; pass < 3; pass++) { - bzero((caddr_t)&info, sizeof(info)); + bzero((caddr_t)&info, sizeof (info)); if ((cmd == RTM_ADD && pass == 1) || (cmd == RTM_DELETE && pass == 2)) { struct ifa_msghdr *ifam; @@ -1315,7 +1299,7 @@ rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt) (cmd == RTM_DELETE && pass == 1)) { struct rt_msghdr *rtm; - if (rt == 0) + if (rt == NULL) continue; info.rti_info[RTAX_NETMASK] = rt_mask(rt); info.rti_info[RTAX_DST] = sa = rt_key(rt); @@ -1345,23 +1329,25 @@ rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma) struct mbuf *m = 0; struct ifnet *ifp = ifma->ifma_ifp; struct ifma_msghdr *ifmam; - struct sockproto route_proto = {PF_ROUTE, 0}; + struct sockproto route_proto = { PF_ROUTE, 0 }; if (route_cb.any_count == 0) return; /* Lock ifp for if_lladdr */ ifnet_lock_shared(ifp); - bzero((caddr_t)&info, sizeof(info)); + bzero((caddr_t)&info, sizeof (info)); IFMA_LOCK(ifma); info.rti_info[RTAX_IFA] = ifma->ifma_addr; - info.rti_info[RTAX_IFP] = ifp->if_lladdr->ifa_addr; /* lladdr doesn't need lock */ + /* lladdr doesn't need lock */ + info.rti_info[RTAX_IFP] = ifp->if_lladdr->ifa_addr; /* * If a link-layer address is present, present it as a ``gateway'' * (similarly to how ARP entries, e.g., are presented). */ - info.rti_info[RTAX_GATEWAY] = (ifma->ifma_ll != NULL) ? ifma->ifma_ll->ifma_addr : NULL; + info.rti_info[RTAX_GATEWAY] = (ifma->ifma_ll != NULL) ? + ifma->ifma_ll->ifma_addr : NULL; if ((m = rt_msg1(cmd, &info)) == NULL) { IFMA_UNLOCK(ifma); ifnet_lock_done(ifp); @@ -1376,31 +1362,106 @@ rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma) raw_input(m, &route_proto, &route_src, &route_dst); } +const char * +rtm2str(int cmd) +{ + const char *c = "RTM_?"; + + switch (cmd) { + case RTM_ADD: + c = "RTM_ADD"; + break; + case RTM_DELETE: + c = "RTM_DELETE"; + break; + case RTM_CHANGE: + c = "RTM_CHANGE"; + break; + case RTM_GET: + c = "RTM_GET"; + break; + case RTM_LOSING: + c = "RTM_LOSING"; + break; + case RTM_REDIRECT: + c = "RTM_REDIRECT"; + break; + case RTM_MISS: + c = "RTM_MISS"; + break; + case RTM_LOCK: + c = "RTM_LOCK"; + break; + case RTM_OLDADD: + c = "RTM_OLDADD"; + break; + case RTM_OLDDEL: + c = "RTM_OLDDEL"; + break; + case RTM_RESOLVE: + c = "RTM_RESOLVE"; + break; + case RTM_NEWADDR: + c = "RTM_NEWADDR"; + break; + case RTM_DELADDR: + c = "RTM_DELADDR"; + break; + case RTM_IFINFO: + c = "RTM_IFINFO"; + break; + case RTM_NEWMADDR: + c = "RTM_NEWMADDR"; + break; + case RTM_DELMADDR: + c = "RTM_DELMADDR"; + break; + case RTM_GET_SILENT: + c = "RTM_GET_SILENT"; + break; + case RTM_IFINFO2: + c = "RTM_IFINFO2"; + break; + case RTM_NEWMADDR2: + c = "RTM_NEWMADDR2"; + break; + case RTM_GET2: + c = "RTM_GET2"; + break; + case RTM_GET_EXT: + c = "RTM_GET_EXT"; + break; + } + + return (c); +} + /* * This is used in dumping the kernel table via sysctl(). */ -int +static int sysctl_dumpentry(struct radix_node *rn, void *vw) { struct walkarg *w = vw; struct rtentry *rt = (struct rtentry *)rn; int error = 0, size; struct rt_addrinfo info; + kauth_cred_t cred; + + cred = kauth_cred_proc_ref(current_proc()); RT_LOCK(rt); - if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) { - RT_UNLOCK(rt); - return 0; - } - bzero((caddr_t)&info, sizeof(info)); + if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg)) + goto done; + bzero((caddr_t)&info, sizeof (info)); info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); info.rti_info[RTAX_GENMASK] = rt->rt_genmask; if (w->w_op != NET_RT_DUMP2) { - size = rt_msg2(RTM_GET, &info, 0, w); - if (w->w_req && w->w_tmem) { + size = rt_msg2(RTM_GET, &info, NULL, w, &cred); + if (w->w_req != NULL && w->w_tmem != NULL) { struct rt_msghdr *rtm = (struct rt_msghdr *)(void *)w->w_tmem; @@ -1413,12 +1474,10 @@ sysctl_dumpentry(struct radix_node *rn, void *vw) rtm->rtm_errno = 0; rtm->rtm_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size); - RT_UNLOCK(rt); - return (error); } } else { - size = rt_msg2(RTM_GET2, &info, 0, w); - if (w->w_req && w->w_tmem) { + size = rt_msg2(RTM_GET2, &info, NULL, w, &cred); + if (w->w_req != NULL && w->w_tmem != NULL) { struct rt_msghdr2 *rtm = (struct rt_msghdr2 *)(void *)w->w_tmem; @@ -1434,38 +1493,40 @@ sysctl_dumpentry(struct radix_node *rn, void *vw) rtm->rtm_reserved = 0; rtm->rtm_addrs = info.rti_addrs; error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size); - RT_UNLOCK(rt); - return (error); } } + +done: RT_UNLOCK(rt); + kauth_cred_unref(&cred); return (error); } /* * This is used for dumping extended information from route entries. */ -int +static int sysctl_dumpentry_ext(struct radix_node *rn, void *vw) { struct walkarg *w = vw; struct rtentry *rt = (struct rtentry *)rn; int error = 0, size; struct rt_addrinfo info; + kauth_cred_t cred; + + cred = kauth_cred_proc_ref(current_proc()); RT_LOCK(rt); - if (w->w_op == NET_RT_DUMPX_FLAGS && !(rt->rt_flags & w->w_arg)) { - RT_UNLOCK(rt); - return (0); - } + if (w->w_op == NET_RT_DUMPX_FLAGS && !(rt->rt_flags & w->w_arg)) + goto done; bzero(&info, sizeof (info)); info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); info.rti_info[RTAX_GENMASK] = rt->rt_genmask; - - size = rt_msg2(RTM_GET_EXT, &info, 0, w); - if (w->w_req && w->w_tmem) { + + size = rt_msg2(RTM_GET_EXT, &info, NULL, w, &cred); + if (w->w_req != NULL && w->w_tmem != NULL) { struct rt_msghdr_ext *ertm = (struct rt_msghdr_ext *)(void *)w->w_tmem; @@ -1482,33 +1543,32 @@ sysctl_dumpentry_ext(struct radix_node *rn, void *vw) ertm->rtm_ri.ri_rssi = IFNET_RSSI_UNKNOWN; ertm->rtm_ri.ri_lqm = IFNET_LQM_THRESH_OFF; ertm->rtm_ri.ri_npm = IFNET_NPM_THRESH_UNKNOWN; - } - else + } else { rt->rt_llinfo_get_ri(rt, &ertm->rtm_ri); - + } error = SYSCTL_OUT(w->w_req, (caddr_t)ertm, size); - RT_UNLOCK(rt); - return (error); } + +done: RT_UNLOCK(rt); + kauth_cred_unref(&cred); return (error); } /* * rdar://9307819 - * To avoid to call copyout() while holding locks and to cause problems - * in the paging path, sysctl_iflist() and sysctl_iflist2() contstruct + * To avoid to call copyout() while holding locks and to cause problems + * in the paging path, sysctl_iflist() and sysctl_iflist2() contstruct * the list in two passes. In the first pass we compute the total * length of the data we are going to copyout, then we release - * all locks to allocate a temporary buffer that gets filled + * all locks to allocate a temporary buffer that gets filled * in the second pass. * - * Note that we are verifying the assumption that _MALLOC returns a buffer - * that is at least 32 bits aligned and that the messages and addresses are + * Note that we are verifying the assumption that _MALLOC returns a buffer + * that is at least 32 bits aligned and that the messages and addresses are * 32 bits aligned. */ - -int +static int sysctl_iflist(int af, struct walkarg *w) { struct ifnet *ifp; @@ -1518,12 +1578,15 @@ sysctl_iflist(int af, struct walkarg *w) int pass = 0; int total_len = 0, current_len = 0; char *total_buffer = NULL, *cp = NULL; + kauth_cred_t cred; + + cred = kauth_cred_proc_ref(current_proc()); + + bzero((caddr_t)&info, sizeof (info)); - bzero((caddr_t)&info, sizeof(info)); - for (pass = 0; pass < 2; pass++) { ifnet_head_lock_shared(); - + TAILQ_FOREACH(ifp, &ifnet_head, if_link) { if (error) break; @@ -1531,13 +1594,14 @@ sysctl_iflist(int af, struct walkarg *w) continue; ifnet_lock_shared(ifp); /* - * Holding ifnet lock here prevents the link address from - * changing contents, so no need to hold the ifa lock. - * The link address is always present; it's never freed. + * Holding ifnet lock here prevents the link address + * from changing contents, so no need to hold the ifa + * lock. The link address is always present; it's + * never freed. */ ifa = ifp->if_lladdr; info.rti_info[RTAX_IFP] = ifa->ifa_addr; - len = rt_msg2(RTM_IFINFO, &info, (caddr_t)0, NULL); + len = rt_msg2(RTM_IFINFO, &info, NULL, NULL, &cred); if (pass == 0) { total_len += len; } else { @@ -1545,27 +1609,26 @@ sysctl_iflist(int af, struct walkarg *w) if (current_len + len > total_len) { ifnet_lock_done(ifp); - printf("sysctl_iflist: current_len (%d) + len (%d) > total_len (%d)\n", - current_len, len, total_len); error = ENOBUFS; break; } info.rti_info[RTAX_IFP] = ifa->ifa_addr; - len = rt_msg2(RTM_IFINFO, &info, (caddr_t)cp, NULL); + len = rt_msg2(RTM_IFINFO, &info, + (caddr_t)cp, NULL, &cred); info.rti_info[RTAX_IFP] = NULL; - + ifm = (struct if_msghdr *)(void *)cp; ifm->ifm_index = ifp->if_index; ifm->ifm_flags = (u_short)ifp->if_flags; if_data_internal_to_if_data(ifp, &ifp->if_data, - &ifm->ifm_data); + &ifm->ifm_data); ifm->ifm_addrs = info.rti_addrs; cp += len; - VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t))); + VERIFY(IS_P2ALIGNED(cp, sizeof (u_int32_t))); current_len += len; } - while ((ifa = ifa->ifa_link.tqe_next) != 0) { + while ((ifa = ifa->ifa_link.tqe_next) != NULL) { IFA_LOCK(ifa); if (af && af != ifa->ifa_addr->sa_family) { IFA_UNLOCK(ifa); @@ -1574,7 +1637,8 @@ sysctl_iflist(int af, struct walkarg *w) info.rti_info[RTAX_IFA] = ifa->ifa_addr; info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; - len = rt_msg2(RTM_NEWADDR, &info, 0, 0); + len = rt_msg2(RTM_NEWADDR, &info, NULL, NULL, + &cred); if (pass == 0) { total_len += len; } else { @@ -1582,62 +1646,71 @@ sysctl_iflist(int af, struct walkarg *w) if (current_len + len > total_len) { IFA_UNLOCK(ifa); - printf("sysctl_iflist: current_len (%d) + len (%d) > total_len (%d)\n", - current_len, len, total_len); error = ENOBUFS; break; } - len = rt_msg2(RTM_NEWADDR, &info, (caddr_t)cp, NULL); - + len = rt_msg2(RTM_NEWADDR, &info, + (caddr_t)cp, NULL, &cred); + ifam = (struct ifa_msghdr *)(void *)cp; - ifam->ifam_index = ifa->ifa_ifp->if_index; + ifam->ifam_index = + ifa->ifa_ifp->if_index; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_metric = ifa->ifa_metric; ifam->ifam_addrs = info.rti_addrs; cp += len; - VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t))); + VERIFY(IS_P2ALIGNED(cp, + sizeof (u_int32_t))); current_len += len; } IFA_UNLOCK(ifa); } ifnet_lock_done(ifp); - info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = - info.rti_info[RTAX_BRD] = NULL; + info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = + info.rti_info[RTAX_BRD] = NULL; } - + ifnet_head_done(); - - if (error) + + if (error != 0) { + if (error == ENOBUFS) + printf("%s: current_len (%d) + len (%d) > " + "total_len (%d)\n", __func__, current_len, + len, total_len); break; - + } + if (pass == 0) { /* Better to return zero length buffer than ENOBUFS */ if (total_len == 0) total_len = 1; total_len += total_len >> 3; - total_buffer = _MALLOC(total_len, M_RTABLE, M_ZERO | M_WAITOK); + total_buffer = _MALLOC(total_len, M_RTABLE, + M_ZERO | M_WAITOK); if (total_buffer == NULL) { - printf("sysctl_iflist: _MALLOC(%d) failed\n", total_len); + printf("%s: _MALLOC(%d) failed\n", __func__, + total_len); error = ENOBUFS; break; } cp = total_buffer; - VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t))); + VERIFY(IS_P2ALIGNED(cp, sizeof (u_int32_t))); } else { error = SYSCTL_OUT(w->w_req, total_buffer, current_len); if (error) break; } } - + if (total_buffer != NULL) _FREE(total_buffer, M_RTABLE); - - return error; + + kauth_cred_unref(&cred); + return (error); } -int +static int sysctl_iflist2(int af, struct walkarg *w) { struct ifnet *ifp; @@ -1647,12 +1720,17 @@ sysctl_iflist2(int af, struct walkarg *w) int pass = 0; int total_len = 0, current_len = 0; char *total_buffer = NULL, *cp = NULL; + kauth_cred_t cred; - bzero((caddr_t)&info, sizeof(info)); + cred = kauth_cred_proc_ref(current_proc()); + + bzero((caddr_t)&info, sizeof (info)); for (pass = 0; pass < 2; pass++) { + struct ifmultiaddr *ifma; + ifnet_head_lock_shared(); - + TAILQ_FOREACH(ifp, &ifnet_head, if_link) { if (error) break; @@ -1660,13 +1738,14 @@ sysctl_iflist2(int af, struct walkarg *w) continue; ifnet_lock_shared(ifp); /* - * Holding ifnet lock here prevents the link address from - * changing contents, so no need to hold the ifa lock. - * The link address is always present; it's never freed. + * Holding ifnet lock here prevents the link address + * from changing contents, so no need to hold the ifa + * lock. The link address is always present; it's + * never freed. */ ifa = ifp->if_lladdr; info.rti_info[RTAX_IFP] = ifa->ifa_addr; - len = rt_msg2(RTM_IFINFO2, &info, (caddr_t)0, NULL); + len = rt_msg2(RTM_IFINFO2, &info, NULL, NULL, &cred); if (pass == 0) { total_len += len; } else { @@ -1674,15 +1753,14 @@ sysctl_iflist2(int af, struct walkarg *w) if (current_len + len > total_len) { ifnet_lock_done(ifp); - printf("sysctl_iflist2: current_len (%d) + len (%d) > total_len (%d)\n", - current_len, len, total_len); error = ENOBUFS; break; } info.rti_info[RTAX_IFP] = ifa->ifa_addr; - len = rt_msg2(RTM_IFINFO2, &info, (caddr_t)cp, NULL); + len = rt_msg2(RTM_IFINFO2, &info, + (caddr_t)cp, NULL, &cred); info.rti_info[RTAX_IFP] = NULL; - + ifm = (struct if_msghdr2 *)(void *)cp; ifm->ifm_addrs = info.rti_addrs; ifm->ifm_flags = (u_short)ifp->if_flags; @@ -1692,14 +1770,14 @@ sysctl_iflist2(int af, struct walkarg *w) ifm->ifm_snd_drops = ifp->if_snd.ifcq_dropcnt.packets; ifm->ifm_timer = ifp->if_timer; - if_data_internal_to_if_data64(ifp, &ifp->if_data, - &ifm->ifm_data); + if_data_internal_to_if_data64(ifp, + &ifp->if_data, &ifm->ifm_data); cp += len; - VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t))); + VERIFY(IS_P2ALIGNED(cp, sizeof (u_int32_t))); current_len += len; } - while ((ifa = ifa->ifa_link.tqe_next) != 0) { + while ((ifa = ifa->ifa_link.tqe_next) != NULL) { IFA_LOCK(ifa); if (af && af != ifa->ifa_addr->sa_family) { IFA_UNLOCK(ifa); @@ -1708,29 +1786,31 @@ sysctl_iflist2(int af, struct walkarg *w) info.rti_info[RTAX_IFA] = ifa->ifa_addr; info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr; - len = rt_msg2(RTM_NEWADDR, &info, 0, 0); + len = rt_msg2(RTM_NEWADDR, &info, NULL, NULL, + &cred); if (pass == 0) { total_len += len; } else { struct ifa_msghdr *ifam; - + if (current_len + len > total_len) { IFA_UNLOCK(ifa); - printf("sysctl_iflist2: current_len (%d) + len (%d) > total_len (%d)\n", - current_len, len, total_len); error = ENOBUFS; break; } - len = rt_msg2(RTM_NEWADDR, &info, (caddr_t)cp, 0); + len = rt_msg2(RTM_NEWADDR, &info, + (caddr_t)cp, NULL, &cred); ifam = (struct ifa_msghdr *)(void *)cp; - ifam->ifam_index = ifa->ifa_ifp->if_index; + ifam->ifam_index = + ifa->ifa_ifp->if_index; ifam->ifam_flags = ifa->ifa_flags; ifam->ifam_metric = ifa->ifa_metric; ifam->ifam_addrs = info.rti_addrs; cp += len; - VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t))); + VERIFY(IS_P2ALIGNED(cp, + sizeof (u_int32_t))); current_len += len; } IFA_UNLOCK(ifa); @@ -1739,166 +1819,114 @@ sysctl_iflist2(int af, struct walkarg *w) ifnet_lock_done(ifp); break; } - { - struct ifmultiaddr *ifma; - - for (ifma = LIST_FIRST(&ifp->if_multiaddrs); - ifma != NULL; ifma = LIST_NEXT(ifma, ifma_link)) { - struct ifaddr *ifa0; - - IFMA_LOCK(ifma); - if (af && af != ifma->ifma_addr->sa_family) { + + for (ifma = LIST_FIRST(&ifp->if_multiaddrs); + ifma != NULL; ifma = LIST_NEXT(ifma, ifma_link)) { + struct ifaddr *ifa0; + + IFMA_LOCK(ifma); + if (af && af != ifma->ifma_addr->sa_family) { + IFMA_UNLOCK(ifma); + continue; + } + bzero((caddr_t)&info, sizeof (info)); + info.rti_info[RTAX_IFA] = ifma->ifma_addr; + /* + * Holding ifnet lock here prevents the link + * address from changing contents, so no need + * to hold the ifa0 lock. The link address is + * always present; it's never freed. + */ + ifa0 = ifp->if_lladdr; + info.rti_info[RTAX_IFP] = ifa0->ifa_addr; + if (ifma->ifma_ll != NULL) + info.rti_info[RTAX_GATEWAY] = + ifma->ifma_ll->ifma_addr; + len = rt_msg2(RTM_NEWMADDR2, &info, NULL, NULL, + &cred); + if (pass == 0) { + total_len += len; + } else { + struct ifma_msghdr2 *ifmam; + + if (current_len + len > total_len) { IFMA_UNLOCK(ifma); - continue; - } - bzero((caddr_t)&info, sizeof(info)); - info.rti_info[RTAX_IFA] = ifma->ifma_addr; - /* - * Holding ifnet lock here prevents the link - * address from changing contents, so no need - * to hold the ifa0 lock. The link address is - * always present; it's never freed. - */ - ifa0 = ifp->if_lladdr; - info.rti_info[RTAX_IFP] = ifa0->ifa_addr; - if (ifma->ifma_ll != NULL) - info.rti_info[RTAX_GATEWAY] = ifma->ifma_ll->ifma_addr; - len = rt_msg2(RTM_NEWMADDR2, &info, 0, 0); - if (pass == 0) { - total_len += len; - } else { - struct ifma_msghdr2 *ifmam; - - if (current_len + len > total_len) { - IFMA_UNLOCK(ifma); - printf("sysctl_iflist2: current_len (%d) + len (%d) > total_len (%d)\n", - current_len, len, total_len); - error = ENOBUFS; - break; - } - len = rt_msg2(RTM_NEWMADDR2, &info, (caddr_t)cp, 0); - - ifmam = (struct ifma_msghdr2 *)(void *)cp; - ifmam->ifmam_addrs = info.rti_addrs; - ifmam->ifmam_flags = 0; - ifmam->ifmam_index = - ifma->ifma_ifp->if_index; - ifmam->ifmam_refcount = - ifma->ifma_reqcnt; - - cp += len; - VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t))); - current_len += len; + error = ENOBUFS; + break; } - IFMA_UNLOCK(ifma); + len = rt_msg2(RTM_NEWMADDR2, &info, + (caddr_t)cp, NULL, &cred); + + ifmam = + (struct ifma_msghdr2 *)(void *)cp; + ifmam->ifmam_addrs = info.rti_addrs; + ifmam->ifmam_flags = 0; + ifmam->ifmam_index = + ifma->ifma_ifp->if_index; + ifmam->ifmam_refcount = + ifma->ifma_reqcnt; + + cp += len; + VERIFY(IS_P2ALIGNED(cp, + sizeof (u_int32_t))); + current_len += len; } + IFMA_UNLOCK(ifma); } ifnet_lock_done(ifp); - info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = - info.rti_info[RTAX_BRD] = NULL; + info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] = + info.rti_info[RTAX_BRD] = NULL; } ifnet_head_done(); - - if (error) + + if (error) { + if (error == ENOBUFS) + printf("%s: current_len (%d) + len (%d) > " + "total_len (%d)\n", __func__, current_len, + len, total_len); break; - + } + if (pass == 0) { /* Better to return zero length buffer than ENOBUFS */ if (total_len == 0) total_len = 1; total_len += total_len >> 3; - total_buffer = _MALLOC(total_len, M_RTABLE, M_ZERO | M_WAITOK); + total_buffer = _MALLOC(total_len, M_RTABLE, + M_ZERO | M_WAITOK); if (total_buffer == NULL) { - printf("sysctl_iflist2: _MALLOC(%d) failed\n", total_len); + printf("%s: _MALLOC(%d) failed\n", __func__, + total_len); error = ENOBUFS; break; } cp = total_buffer; - VERIFY(IS_P2ALIGNED(cp, sizeof(u_int32_t))); + VERIFY(IS_P2ALIGNED(cp, sizeof (u_int32_t))); } else { error = SYSCTL_OUT(w->w_req, total_buffer, current_len); if (error) break; } } - + if (total_buffer != NULL) _FREE(total_buffer, M_RTABLE); - - return error; + + kauth_cred_unref(&cred); + return (error); } static int sysctl_rtstat(struct sysctl_req *req) { - int error; - - error = SYSCTL_OUT(req, &rtstat, sizeof(struct rtstat)); - if (error) - return (error); - - return 0; + return (SYSCTL_OUT(req, &rtstat, sizeof (struct rtstat))); } static int sysctl_rttrash(struct sysctl_req *req) { - int error; - - error = SYSCTL_OUT(req, &rttrash, sizeof(rttrash)); - if (error) - return (error); - - return 0; -} - -/* - * Called from pfslowtimo(), protected by domain_proto_mtx - */ -static void -rt_drainall(void) -{ - struct timeval delta_ts, current_ts; - - /* - * This test is done without holding rnh_lock; in the even that - * we read stale value, it will only cause an extra (or miss) - * drain and is therefore harmless. - */ - if (ifnet_aggressive_drainers == 0) { - if (timerisset(&last_ts)) - timerclear(&last_ts); - return; - } - - microuptime(¤t_ts); - timersub(¤t_ts, &last_ts, &delta_ts); - - if (delta_ts.tv_sec >= rt_if_idle_drain_interval) { - timerclear(&last_ts); - - in_rtqdrain(); /* protocol cloned routes: INET */ - in_arpdrain(NULL); /* cloned routes: ARP */ -#if INET6 - in6_rtqdrain(); /* protocol cloned routes: INET6 */ - nd6_drain(NULL); /* cloned routes: ND6 */ -#endif /* INET6 */ - - last_ts.tv_sec = current_ts.tv_sec; - last_ts.tv_usec = current_ts.tv_usec; - } -} - -void -rt_aggdrain(int on) -{ - lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); - - if (on) - routedomain.dom_protosw->pr_flags |= PR_AGGDRAIN; - else - routedomain.dom_protosw->pr_flags &= ~PR_AGGDRAIN; + return (SYSCTL_OUT(req, &rttrash, sizeof (rttrash))); } static int @@ -1919,7 +1947,7 @@ sysctl_rtsock SYSCTL_HANDLER_ARGS if (namelen != 3) return (EINVAL); af = name[0]; - Bzero(&w, sizeof(w)); + Bzero(&w, sizeof (w)); w.w_op = name[1]; w.w_arg = name[2]; w.w_req = req; @@ -1960,33 +1988,47 @@ sysctl_rtsock SYSCTL_HANDLER_ARGS error = sysctl_rttrash(req); break; } - if (w.w_tmem) + if (w.w_tmem != NULL) FREE(w.w_tmem, M_RTABLE); return (error); } -SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_rtsock, ""); - /* * Definitions of protocols supported in the ROUTE domain. */ static struct protosw routesw[] = { -{ SOCK_RAW, &routedomain, 0, PR_ATOMIC|PR_ADDR, - 0, route_output, raw_ctlinput, 0, - 0, - raw_init, 0, 0, rt_drainall, - 0, - &route_usrreqs, - 0, 0, 0, - { 0, 0 }, 0, { 0 } +{ + .pr_type = SOCK_RAW, + .pr_protocol = 0, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_output = route_output, + .pr_ctlinput = raw_ctlinput, + .pr_init = raw_init, + .pr_usrreqs = &route_usrreqs, } }; -struct domain routedomain = - { PF_ROUTE, "route", route_init, 0, 0, - routesw, - NULL, NULL, 0, 0, 0, 0, NULL, 0, - { 0, 0 } }; +static int route_proto_count = (sizeof (routesw) / sizeof (struct protosw)); -DOMAIN_SET(route); +struct domain routedomain_s = { + .dom_family = PF_ROUTE, + .dom_name = "route", + .dom_init = route_dinit, +}; + +static void +route_dinit(struct domain *dp) +{ + struct protosw *pr; + int i; + VERIFY(!(dp->dom_flags & DOM_INITIALIZED)); + VERIFY(routedomain == NULL); + + routedomain = dp; + + for (i = 0, pr = &routesw[0]; i < route_proto_count; i++, pr++) + net_add_proto(pr, dp, 1); + + route_init(); +} diff --git a/bsd/net/slip.h b/bsd/net/slip.h deleted file mode 100644 index c933d4402..000000000 --- a/bsd/net/slip.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/*- - * Copyright (c) 1994 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)slip.h 8.1 (Berkeley) 2/12/94 - * $FreeBSD: src/sys/net/slip.h,v 1.9 1999/08/28 00:48:29 peter Exp $ - */ - -#ifndef _NET_SLIP_H_ -#define _NET_SLIP_H_ -#include - -/* Ioctls operating on SLIP ttys. */ -#define SLIOCGUNIT _IOR('t', 88, int) /* get slip unit number */ -#define SLIOCSKEEPAL _IOW('t', 84, int) /* set keepalive */ -#define SLIOCSOUTFILL _IOW('t', 83, int) /* set out fill time */ -#define SLIOCGKEEPAL _IOR('t', 82, int) /* get keepalive time */ -#define SLIOCGOUTFILL _IOR('t', 81, int) /* get out fill time */ -#define SLIOCSUNIT _IOW('t', 80, int) /* set slip unit number */ - -/* - * Definitions of the pseudo-link-level header attached to slip - * packets grabbed by the packet filter (bpf) traffic monitor. - */ -#define SLIP_HDRLEN 16 /* BPF SLIP header length */ - -/* Offsets into BPF SLIP header. */ -#define SLX_DIR 0 /* direction; see below */ -#define SLX_CHDR 1 /* compressed header data */ -#define CHDR_LEN 15 /* length of compressed header data */ - -#define SLIPDIR_IN 0 /* incoming */ -#define SLIPDIR_OUT 1 /* outgoing */ - -#endif /* !_NET_SLIP_H */ diff --git a/bsd/netat/Makefile b/bsd/netat/Makefile deleted file mode 100644 index 3f307255c..000000000 --- a/bsd/netat/Makefile +++ /dev/null @@ -1,51 +0,0 @@ -export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd -export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def -export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule -export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - -CWARNFLAGS += -Wno-sign-compare - -include $(MakeInc_cmd) -include $(MakeInc_def) - -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - -SETUP_SUBDIRS = \ - -COMP_SUBDIRS = \ - -INST_SUBDIRS = \ - -DATAFILES = - -PRIVATE_DATAFILES = \ - appletalk.h atp.h asp.h aurp.h \ - ddp.h ep.h lap.h nbp.h pap.h zip.h \ - adsp.h at_pat.h at_snmp.h at_aarp.h \ - rtmp.h debug.h routing_tables.h sysglue.h at_var.h \ - adsp_internal.h at_ddp_brt.h at_pcb.h - -PRIVATE_KERNELFILES = - -INSTALL_MI_LIST = - -INSTALL_MI_DIR = - -EXPORT_MI_LIST = - -EXPORT_MI_DIR = netat - -INSTALL_MI_LCL_LIST = - -INSTALL_KF_MI_LCL_LIST = - -include $(MakeInc_rule) -include $(MakeInc_dir) - - diff --git a/bsd/netat/adsp.c b/bsd/netat/adsp.c deleted file mode 100644 index 65fe77206..000000000 --- a/bsd/netat/adsp.c +++ /dev/null @@ -1,395 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Change log: - * 06/29/95 - Modified to handle flow control for writing (Tuyen Nguyen) - * Modified for MP, 1996 by Tuyen Nguyen - * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. - */ -#define RESOLVE_DBG -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#ifdef notdefn -struct adsp_debug adsp_dtable[1025]; -int ad_entry = 0; -#endif - -int -adspAllocateCCB(gref) - register gref_t *gref; /* READ queue */ -{ - gbuf_t *ccb_mp; - register CCBPtr sp; - - if (!(ccb_mp = gbuf_alloc(sizeof(CCB), PRI_LO))) { - return (0); - } - bzero((caddr_t) gbuf_rptr(ccb_mp), sizeof(CCB)); - gbuf_wset(ccb_mp,sizeof(CCB)); - gref->info = (caddr_t) ccb_mp; - sp = (CCBPtr)gbuf_rptr(((gbuf_t *)gref->info)); - - sp->pid = gref->pid; /* save the caller process pointer */ - sp->gref = gref; /* save a back pointer to the WRITE queue */ - sp->sp_mp = ccb_mp; /* and its message block */ - return 1; -} - -int -adspRelease(gref) - register gref_t *gref; /* READ queue */ -{ - register CCBPtr sp; - - if (gref->info) { - sp = (CCBPtr)gbuf_rptr(((gbuf_t *)gref->info)); - /* Tells completion routine of close */ - /* packet to remove us. */ - - if (sp->state == sPassive || sp->state == sClosed || - sp->state == sOpening || sp->state == sListening) { - if (sp->state == sListening) - CompleteQueue(&sp->opb, errAborted); - sp->removing = 1; /* Prevent allowing another dspClose. */ - DoClose(sp, errAborted, 0); /* will remove CCB */ - return 0; - } else { /* sClosing & sOpen */ - sp->state = sClosing; - } - - if (CheckOkToClose(sp)) { /* going to close */ - sp->sendCtl = B_CTL_CLOSE; /* Send close advice */ - } else { - CheckSend(sp); /* try one more time to send out data */ - if (sp->state != sClosed) - sp->sendCtl = B_CTL_CLOSE; /* Setup to send close advice */ - } - CheckSend(sp); /* and force out the close */ - sp->removing = 1; /* Prevent allowing another dspClose. */ - sp->state = sClosed; - DoClose(sp, errAborted, 0); /* to closed and remove CCB */ - } - return 0; -} - - - -int -adspWriteHandler(gref, mp) - gref_t *gref; /* WRITE queue */ - gbuf_t *mp; -{ - - register ioc_t *iocbp; - register struct adspcmd *ap; - int error, flag; - void *sp; - - switch(gbuf_type(mp)) { - case MSG_DATA: - if (gref->info == 0) { - gbuf_freem(mp); - return(STR_IGNORE); - } - /* - * Fill in the global stuff - */ - ap = (struct adspcmd *)gbuf_rptr(mp); - ap->gref = gref; - ap->ioc = 0; - ap->mp = mp; - sp = (void *)gbuf_rptr(((gbuf_t *)gref->info)); - switch(ap->csCode) { - case dspWrite: - if ((error = adspWrite(sp, ap))) - gbuf_freem(mp); - return(STR_IGNORE); - case dspAttention: - if ((error = adspAttention(sp, (CCBPtr)ap))) - gbuf_freem(mp); - return(STR_IGNORE); - } - case MSG_IOCTL: - if (gref->info == 0) { - adspioc_ack(EPROTOTYPE, mp, gref); - return(STR_IGNORE); - } - iocbp = (ioc_t *) gbuf_rptr(mp); - if (ADSP_IOCTL(iocbp->ioc_cmd)) { - iocbp->ioc_count = sizeof(*ap) - 1; - if (gbuf_cont(mp) == 0) { - adspioc_ack(EINVAL, mp, gref); - return(STR_IGNORE); - } - ap = (struct adspcmd *) gbuf_rptr(gbuf_cont(mp)); - ap->gref = gref; - ap->ioc = (caddr_t) mp; - ap->mp = gbuf_cont(mp); /* request head */ - ap->ioResult = 0; - - if ((gref->info == 0) && ((iocbp->ioc_cmd != ADSPOPEN) && - (iocbp->ioc_cmd != ADSPCLLISTEN))) { - ap->ioResult = errState; - - adspioc_ack(EINVAL, mp, gref); - return(STR_IGNORE); - } - } else - return(STR_PUTNEXT); /* pass it on down */ - sp = (void *)gbuf_rptr(((gbuf_t *)gref->info)); - switch(iocbp->ioc_cmd) { - case ADSPOPEN: - case ADSPCLLISTEN: - ap->socket = ((CCBPtr)sp)->localSocket; - flag = (adspMode(ap) == ocAccept) ? 1 : 0; - if (flag && ap->socket) { - if (adspDeassignSocket((CCBPtr)sp) >= 0) - ap->socket = 0; - } - if ((ap->socket == 0) && - ((ap->socket = - (at_socket)adspAssignSocket(gref, flag)) == 0)) { - adspioc_ack(EADDRNOTAVAIL, mp, gref); - return(STR_IGNORE); - } - ap->csCode = iocbp->ioc_cmd == ADSPOPEN ? dspInit : dspCLInit; - if ((error = adspInit(sp, ap)) == 0) { - switch(ap->csCode) { - case dspInit: - /* and open the connection */ - ap->csCode = dspOpen; - error = adspOpen(sp, ap); - break; - case dspCLInit: - /* ADSPCLLISTEN */ - ap->csCode = dspCLListen; - error = adspCLListen(sp, ap); - break; - } - } - if (error) - adspioc_ack(error, mp, gref); /* if this failed req complete */ - return(STR_IGNORE); - case ADSPCLOSE: - ap->csCode = dspClose; - if ((error = adspClose(sp, ap))) { - adspioc_ack(error, mp, gref); - break; - } - break; - case ADSPCLREMOVE: - ap->csCode = dspCLRemove; - error = adspClose(sp, ap); - adspioc_ack(error, mp, gref); - return(STR_IGNORE); - case ADSPCLDENY: - ap->csCode = dspCLDeny; - if ((error = adspCLDeny(sp, (CCBPtr)ap))) { - adspioc_ack(error, mp, gref); - } - return(STR_IGNORE); - case ADSPSTATUS: - ap->csCode = dspStatus; - if ((error = adspStatus(sp, ap))) { - adspioc_ack(error, mp, gref); - } - return(STR_IGNORE); - case ADSPREAD: - ap->csCode = dspRead; - if ((error = adspRead(sp, ap))) { - adspioc_ack(error, mp, gref); - } - return(STR_IGNORE); - case ADSPATTENTION: - ap->csCode = dspAttention; - if ((error = adspReadAttention((CCBPtr)sp, ap))) { - adspioc_ack(error, mp, gref); - } - return(STR_IGNORE); - case ADSPOPTIONS: - ap->csCode = dspOptions; - if ((error = adspOptions(sp, ap))) { - adspioc_ack(error, mp, gref); - } - return(STR_IGNORE); - case ADSPRESET: - ap->csCode = dspReset; - if ((error = adspReset(sp, ap))) { - adspioc_ack(error, mp, gref); - } - return(STR_IGNORE); - case ADSPNEWCID: - ap->csCode = dspNewCID; - if ((error = adspNewCID(sp, ap))) { - adspioc_ack(error, mp, gref); - } - return(STR_IGNORE); - default: - return(STR_PUTNEXT); /* pass it on down */ - } - return(STR_IGNORE); - case MSG_PROTO: - default: - gbuf_freem(mp); - } - return(STR_IGNORE); -} - -int -adspReadHandler(gref, mp) - gref_t *gref; - gbuf_t *mp; -{ - int error; - - switch(gbuf_type(mp)) { - case MSG_DATA: - if ((error = adspPacket(gref, mp))) { - gbuf_freem(mp); - } - break; - - case MSG_IOCTL: - default: - return(STR_PUTNEXT); - break; - } - return(STR_IGNORE); -} - -/* - * adsp_sendddp() - * - * Description: - * This procedure a formats a DDP datagram header and calls the - * DDP module to queue it for routing and transmission according to - * the DDP parameters. We always take control of the datagram; - * if there is an error we free it, otherwise we pass it to the next - * layer. We don't need to set the src address fileds because the - * DDP layer fills these in for us. - * - * Calling Sequence: - * ret_status = adsp_sendddp(q, sp, mp, length, dstnetaddr, ddptype); - * - * Formal Parameters: - * sp Caller stream pointer - * mp gbuf_t chain containing the datagram to transmit - * The first mblk contains the ADSP header and space - * for the DDP header. - * length size of data portion of datagram - * dstnetaddr address of 4-byte destination internet address - * ddptype DDP protocol to assign to the datagram - * - * Completion Status: - * 0 Procedure successful completed. - * EMSGSIZE Specified datagram length is too big. - * - * Side Effects: - * NONE - */ -int -adsp_sendddp(sp, mp, length, dstnetaddr, ddptype) - CCBPtr sp; - gbuf_t *mp; - int length; - AddrUnion *dstnetaddr; - int ddptype; -{ - DDPX_FRAME *ddp; - gbuf_t *mlist = mp; - - if (mp == 0) - return EINVAL; - - if (length > DDP_DATA_SIZE) { - gbuf_freel(mlist); - return EMSGSIZE; - } - - while (mp) { - - if (length == 0) - length = gbuf_msgsize(mp) - DDPL_FRAME_LEN; - /* Set up the DDP header */ - - ddp = (DDPX_FRAME *) gbuf_rptr(mp); - UAS_ASSIGN_HTON(ddp->ddpx_length, (length + DDPL_FRAME_LEN)); - UAS_ASSIGN(ddp->ddpx_cksm, 0); - if (sp) { - if (sp->useCheckSum) - UAS_ASSIGN_HTON(ddp->ddpx_cksm, 1); - } - - NET_ASSIGN(ddp->ddpx_dnet, dstnetaddr->a.net); - ddp->ddpx_dnode = dstnetaddr->a.node; - ddp->ddpx_source = sp ? sp->localSocket : ddp->ddpx_dest; - ddp->ddpx_dest = dstnetaddr->a.socket; - - ddp->ddpx_type = ddptype; - length = 0; - mp = gbuf_next(mp); - - } - - DDP_OUTPUT(mlist); - return 0; -} - -void NotifyUser( - __unused CCBPtr sp) - -{ -/* - pidsig(sp->pid, SIGIO); -*/ -} - -void UrgentUser( - __unused CCBPtr sp) -{ -/* - pidsig(sp->pid, SIGURG); -*/ -} diff --git a/bsd/netat/adsp.h b/bsd/netat/adsp.h deleted file mode 100644 index 7641f8d22..000000000 --- a/bsd/netat/adsp.h +++ /dev/null @@ -1,722 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * - * ORIGINS: 82 - * - * (C) COPYRIGHT Apple Computer, Inc. 1992-1996 - * All Rights Reserved - * - */ - -#ifndef _NETAT_ADSP_H_ -#define _NETAT_ADSP_H_ -#include -#include - -#ifdef __APPLE_API_OBSOLETE - -/* ADSP flags for read, write, and close routines */ - -#define ADSP_EOM 0x01 /* Sent or received EOM with data */ -#define ADSP_FLUSH 0x02 /* Send all data in send queue */ -#define ADSP_WAIT 0x04 /* Graceful close, wait 'til snd queue emptys */ - - -/* ADSP events to be fielded by the user event handler */ - -#define ADSP_EV_ATTN 0x02 /* Attention data recvd. */ -#define ADSP_EV_RESET 0x04 /* Forward reset recvd. */ -#define ADSP_EV_CLOSE 0x08 /* Close advice recvd. */ - - -/* ADSP packet control codes */ - -#define ADSP_PROBEACK 0 /* Probe or acknowledgement */ -#define ADSP_OPENCONREQUEST 1 /* Open connection request */ -#define ADSP_OPENCONACK 2 /* Open connection acknowledgement */ -#define ADSP_OPENCONREQACK 3 /* Open connection request + ack */ -#define ADSP_OPENCONDENIAL 4 /* Open connection denial */ -#define ADSP_CLOSEADVICE 5 /* Close connection advice */ -#define ADSP_FORWARDRESET 6 /* Forward reset */ -#define ADSP_FORWARDRESETACK 7 /* Forward reset acknowledgement */ -#define ADSP_RETRANSADVICE 8 /* Retransmit advice */ - - -/* Miscellaneous constants */ - -#define ADSP_MAXDATA 572 /* Maximum data bytes in ADSP packet */ -#define ADSP_MAXATTNDATA 570 /* Maximum data bytes in attn msg */ -#define ADSP_DDPTYPE 7 /* DDP protocol type for ADSP */ -#define ADSP_VERSION 0x0100 /* ADSP version */ - - -/* Some additional ADSP error codes */ - -#define EQUEWASEMP 10001 -#define EONEENTQUE 10002 -#define EQUEBLOCKED 10003 -#define EFWDRESET 10004 -#define EENDOFMSG 10005 -#define EADDRNOTINUSE 10006 - - - -/* Tuning Parameter Block */ - -struct tpb { - unsigned Valid : 1; /* Tuning parameter block is valid */ - unsigned short TransThresh; /* Transmit threshold */ - unsigned TransTimerIntrvl; /* Transmit timer interval */ - unsigned short SndWdwCloThresh; /* Send window closing threshold */ - unsigned SndWdwCloIntrvl; /* Send window closed interval */ - unsigned char SndWdwCloBckoff; /* Send window closed backoff rate */ - unsigned ReTransIntrvl; /* Retransmit interval */ - unsigned char ReTransBckoff; /* Retransmit backoff rate */ - unsigned RestartIntrvl; /* Restart sender interval */ - unsigned char RestartBckoff; /* Restart sender backoff rate */ - unsigned SndQBufSize; /* Send queue buffer size */ - unsigned short RcvQMaxSize; /* Maximum size of the receive queue */ - unsigned short RcvQCpyThresh; /* Receive queue copy threshold */ - unsigned FwdRstIntrvl; /* Forward reset interval */ - unsigned char FwdRstBckoff; /* Forward reset backoff rate */ - unsigned AttnIntrvl; /* Retransmit attn msg interval */ - unsigned char AttnBckoff; /* Retransmit attn msg backoff rate */ - unsigned OpenIntrvl; /* Retransmit open request interval */ - unsigned char OpenMaxRetry; /* Open request maximum retrys */ - unsigned char RetransThresh; /* Retransmit advice threshold */ - unsigned ProbeRetryMax; /* Maximum number of probes */ - unsigned SndByteCntMax; /* Maximum number bytes in send queue */ -}; - - -/* Tuning Parameter Tags */ - -#define ADSP_TRANSTHRESH 1 /* Transmit threshold */ -#define ADSP_TRANSTIMERINTRVL 2 /* Transmit timer interval */ -#define ADSP_SNDWDWCLOTHRESH 3 /* Send window closing threshold */ -#define ADSP_SNDWDWCLOINTRVL 4 /* Send window closed interval */ -#define ADSP_SNDWDWCLOBCKOFF 5 /* Send window closed backoff rate */ -#define ADSP_RETRANSINTRVL 6 /* Retransmit interval */ -#define ADSP_RETRANSBCKOFF 7 /* Retransmit backoff rate */ -#define ADSP_RESTARTINTRVL 8 /* Restart sender interval */ -#define ADSP_RESTARTBCKOFF 9 /* Restart sender backoff rate */ -#define ADSP_SNDQBUFSIZE 10 /* Send queue buffer size */ -#define ADSP_RCVQMAXSIZE 11 /* Receive queue maximum size */ -#define ADSP_RCVQCPYTHRESH 12 /* Receive queue copy threshold */ -#define ADSP_FWDRSTINTRVL 13 /* Forward reset retransmit interval */ -#define ADSP_FWDRSTBCKOFF 14 /* Forward reset backoff rate */ -#define ADSP_ATTNINTRVL 15 /* Rexmit attention message interval */ -#define ADSP_ATTNBCKOFF 16 /* Attention message backoff rate */ -#define ADSP_OPENINTRVL 17 /* Retransmit open request interval */ -#define ADSP_OPENMAXRETRY 18 /* Open request max retrys */ -#define ADSP_RETRANSTHRESH 19 /* Retransmit advice threshold */ -#define ADSP_PROBERETRYMAX 20 -#define ADSP_SNDBYTECNTMAX 21 - -#define TuneParamCnt 21 /* The number of tuning parameters */ - -/* Connection Status Tags */ - -#define ADSP_STATE 1 /* The connection state */ -#define ADSP_SNDSEQ 2 /* Send sequence number */ -#define ADSP_FIRSTRTMTSEQ 3 /* First retransmit sequence number */ -#define ADSP_SNDWDWSEQ 4 /* Send window sequence number */ -#define ADSP_RCVSEQ 5 /* Receive sequence number */ -#define ADSP_ATTNSNDSEQ 6 /* Attn msg send sequence number */ -#define ADSP_ATTNRCVSEQ 7 /* Attn msg receive sequence number */ -#define ADSP_RCVWDW 8 /* Receive window size */ -#define ADSP_ATTNMSGWAIT 9 /* Attn msg is in the receive queue */ - -#define ConStatTagCnt 9 /* Number of connection status tags */ - -#define ADSP_INVALID 0 /* Invalid connection control block */ -#define ADSP_LISTEN 1 /* Waiting for an open con req */ -#define ADSP_OPENING 2 /* No state info, sending open req */ -#define ADSP_MYHALFOPEN 4 /* His state info, sending open req */ -#define ADSP_HISHALFOPEN 8 /* He has my state info, sndng op req */ -#define ADSP_OPEN 16 /* Connection is operational */ -#define ADSP_TORNDOWN 32 /* Probe timer has expired 4 times */ -#define ADSP_CLOSING 64 /* Client close, emptying send Queues */ -#define ADSP_CLOSED 128 /* Close adv rcvd, emptying rcv Queues */ - -/* Management Counters */ - -#define ADSP_ATTNACKRCVD 1 /* Attn msg ack received */ -#define ADSP_ATTNACKACPTD 2 /* Attn msg ack accepted */ -#define ADSP_PROBERCVD 3 /* Probe received */ -#define ADSP_ACKRCVD 4 /* Explicit ack msg received */ -#define ADSP_FWDRSTRCVD 5 /* Forward reset received */ -#define ADSP_FWDRSTACPTD 6 /* Forward reset accepted */ -#define ADSP_FWDRSTACKRCVD 7 /* Forward reset ack received */ -#define ADSP_FWDRSTACKACPTD 8 /* Forward reset ack accepted */ -#define ADSP_ATTNRCVD 9 /* Attn msg received */ -#define ADSP_ATTNACPTD 10 /* Attn msg accepted */ -#define ADSP_DATARCVD 11 /* Data msg received */ -#define ADSP_DATAACPTD 12 /* Data msg Accepted */ -#define ADSP_ACKFIELDCHKD 13 /* Ack field checked */ -#define ADSP_ACKNRSFIELDACPTD 14 /* Next receive seq field accepted */ -#define ADSP_ACKSWSFIELDACPTD 15 /* Send window seq field accepted */ -#define ADSP_ACKREQSTD 16 /* Ack requested */ -#define ADSP_LOWMEM 17 /* Low memory */ -#define ADSP_OPNREQEXP 18 /* Open request timer expired */ -#define ADSP_PROBEEXP 19 /* Probe timer expired */ -#define ADSP_FWDRSTEXP 20 /* Forward reset timer expired */ -#define ADSP_ATTNEXP 21 /* Attention timer expired */ -#define ADSP_TRANSEXP 22 /* Transmit timer expired */ -#define ADSP_RETRANSEXP 23 /* Retransmit timer expired */ -#define ADSP_SNDWDWCLOEXP 24 /* Send window closed timer expired */ -#define ADSP_RESTARTEXP 25 /* Restart sender timer expired */ -#define ADSP_RESLOWEXP 26 /* Resources are low timer expired */ -#define ADSP_RETRANSRCVD 27 /* Retransmit advice received */ - -#define InfoTagCnt 27 - -/* Length of the parameter and status lists */ - -#define ADSP_DEFLEN (TuneParamCnt * 6 + 1) -#define ADSP_STALEN (ConStatTagCnt * 6 + 1) -#define ADSP_INFOLEN (InfoTagCnt * 6 + 1) - -/* from h/ADSP.h */ - -/* result codes */ - -#define controlErr -17 /*I/O System Errors*/ - -#define errENOBUFS -1281 -#define errRefNum -1280 /* bad connection refNum */ -#define errAborted -1279 /* control call was aborted */ -#define errState -1278 /* bad connection state for this operation */ -#define errOpening -1277 /* open connection request failed */ -#define errAttention -1276 /* attention message too long */ -#define errFwdReset -1275 /* read terminated by forward reset */ -#define errDSPQueueSize -1274 /* DSP Read/Write Queue Too small */ -#define errOpenDenied -1273 /* open connection request was denied */ - -/* control codes */ - -#define dspInit 255 /* create a new connection end */ -#define dspRemove 254 /* remove a connection end */ -#define dspOpen 253 /* open a connection */ -#define dspClose 252 /* close a connection */ -#define dspCLInit 251 /* create a connection listener */ -#define dspCLRemove 250 /* remove a connection listener */ -#define dspCLListen 249 /* post a listener request */ -#define dspCLDeny 248 /* deny an open connection request */ -#define dspStatus 247 /* get status of connection end */ -#define dspRead 246 /* read data from the connection */ -#define dspWrite 245 /* write data on the connection */ -#define dspAttention 244 /* send an attention message */ -#define dspOptions 243 /* set connection end options */ -#define dspReset 242 /* forward reset the connection */ -#define dspNewCID 241 /* generate a cid for a connection end */ - - -/* connection opening modes */ - -#define ocRequest 1 /* request a connection with remote */ -#define ocPassive 2 /* wait for a connection request from remote */ -#define ocAccept 3 /* accept request as delivered by listener */ -#define ocEstablish 4 /* consider connection to be open */ - - -/* connection end states */ - -#define sListening 1 /* for connection listeners */ -#define sPassive 2 /* waiting for a connection request from remote */ -#define sOpening 3 /* requesting a connection with remote */ -#define sOpen 4 /* connection is open */ -#define sClosing 5 /* connection is being torn down */ -#define sClosed 6 /* connection end state is closed */ - - - -/* client event flags */ - -#define eClosed 0x80 /* received connection closed advice */ -#define eTearDown 0x40 /* connection closed due to broken connection */ -#define eAttention 0x20 /* received attention message */ -#define eFwdReset 0x10 /* received forward reset advice */ - -/* miscellaneous constants */ - -#define attnBufSize 570 /* size of client attention buffer */ -#define minDSPQueueSize 100 /* Minimum size of receive or send Queue */ -#define defaultDSPQS 16384 /* random guess */ -#define RecvQSize defaultDSPQS -#define SendQSize defaultDSPQS - -/* *** Seems to be a problem in Mac OS X too *** */ -/* Solaris defines u as (curproc->p_user) -#if defined(u) -#undef u -#endif -*/ - -typedef long (*ProcPtr)(void *, void *); /* XXX */ -typedef ProcPtr *ProcHandle; -typedef char *Ptr; -typedef Ptr *Handle; - -/* connection control block */ - -struct TRCCB { - u_char *ccbLink; /* link to next ccb */ - u_short refNum; /* user reference number */ - u_short state; /* state of the connection end */ - u_char userFlags; /* flags for unsolicited connection events */ - u_char localSocket; /* socket number of this connection end */ - at_inet_t remoteAddress; /* internet address of remote end */ - u_short attnCode; /* attention code received */ - u_short attnSize; /* size of received attention data */ - u_char *attnPtr; /* ptr to received attention data */ - u_char reserved[220]; /* for adsp internal use */ -}; - -typedef struct TRCCB TRCCB; -typedef TRCCB *TPCCB; - -/* init connection end parameters */ - -struct TRinitParams { - TPCCB ccbPtr; /* pointer to connection control block */ - ProcPtr userRoutine; /* client routine to call on event */ - u_char *sendQueue; /* client passed send queue buffer */ - u_char *recvQueue; /* client passed receive queue buffer */ - u_char *attnPtr; /* client passed receive attention buffer */ - u_short sendQSize; /* size of send queue (0..64K bytes) */ - u_short recvQSize; /* size of receive queue (0..64K bytes) */ - u_char localSocket; /* local socket number */ -}; - -typedef struct TRinitParams TRinitParams; - -/* open connection parameters */ - -struct TRopenParams { - u_short localCID; /* local connection id */ - u_short remoteCID; /* remote connection id */ - at_inet_t remoteAddress; /* address of remote end */ - at_inet_t filterAddress; /* address filter */ - unsigned long sendSeq; /* local send sequence number */ - u_long recvSeq; /* receive sequence number */ - u_long attnSendSeq; /* attention send sequence number */ - u_long attnRecvSeq; /* attention receive sequence number */ - u_short sendWindow; /* send window size */ - u_char ocMode; /* open connection mode */ - u_char ocInterval; /* open connection request retry interval */ - u_char ocMaximum; /* open connection request retry maximum */ -}; - -typedef struct TRopenParams TRopenParams; - -/* close connection parameters */ - -struct TRcloseParams { - u_char abort; /* abort connection immediately if non-zero */ -}; - -typedef struct TRcloseParams TRcloseParams; - -/* client status parameter block */ - -struct TRstatusParams { - TPCCB ccbPtr; /* pointer to ccb */ - u_short sendQPending; /* pending bytes in send queue */ - u_short sendQFree; /* available buffer space in send queue */ - u_short recvQPending; /* pending bytes in receive queue */ - u_short recvQFree; /* available buffer space in receive queue */ -}; - -typedef struct TRstatusParams TRstatusParams; - -/* read/write parameter block */ - -struct TRioParams { - u_short reqCount; /* requested number of bytes */ - u_short actCount; /* actual number of bytes */ - u_char *dataPtr; /* pointer to data buffer */ - u_char eom; /* indicates logical end of message */ - u_char flush; /* send data now */ - u_char dummy[2]; /*### LD */ -}; - -typedef struct TRioParams TRioParams; - -/* attention parameter block */ - -struct TRattnParams { - u_short attnCode; /* client attention code */ - u_short attnSize; /* size of attention data */ - u_char *attnData; /* pointer to attention data */ - u_char attnInterval; /* retransmit timer in 10-tick intervals */ - u_char dummy[3]; /* ### LD */ -}; - -typedef struct TRattnParams TRattnParams; - -/* client send option parameter block */ - -struct TRoptionParams { - u_short sendBlocking; /* quantum for data packets */ - u_char sendTimer; /* send timer in 10-tick intervals */ - u_char rtmtTimer; /* retransmit timer in 10-tick intervals */ - u_char badSeqMax; /* threshold for sending retransmit advice */ - u_char useCheckSum; /* use ddp packet checksum */ - u_short filler; /* ### LD */ - int newPID; /* ### Temp for backward compatibility 02/11/94 */ -}; - -typedef struct TRoptionParams TRoptionParams; - -/* new cid parameters */ - -struct TRnewcidParams { - u_short newcid; /* new connection id returned */ -}; - -typedef struct TRnewcidParams TRnewcidParams; - -union adsp_command { - TRinitParams initParams; /* dspInit, dspCLInit */ - TRopenParams openParams; /* dspOpen, dspCLListen, dspCLDeny */ - TRcloseParams closeParams; /* dspClose, dspRemove */ - TRioParams ioParams; /* dspRead, dspWrite, dspAttnRead */ - TRattnParams attnParams; /* dspAttention */ - TRstatusParams statusParams; /* dspStatus */ - TRoptionParams optionParams; /* dspOptions */ - TRnewcidParams newCIDParams; /* dspNewCID */ -}; - -/* ADSP CntrlParam ioQElement */ - -struct DSPParamBlock { - struct QElem *qLink; - short qType; - short ioTrap; - Ptr ioCmdAddr; - ProcPtr ioCompletion; - short ioResult; - char *ioNamePtr; - short ioVRefNum; - short ioCRefNum; /* adsp driver refNum */ - short csCode; /* adsp driver control code */ - long qStatus; /* adsp internal use */ - u_short ccbRefNum; /* connection end refNum */ - union adsp_command u; -}; - -typedef struct DSPParamBlock DSPParamBlock; -typedef DSPParamBlock *DSPPBPtr; - -struct adspcmd { - struct adspcmd *qLink; - u_int ccbRefNum; - caddr_t ioc; -#ifdef KERNEL - gref_t *gref; - gbuf_t *mp; -#else - void *gref; - void *mp; -#endif - short ioResult; - u_short ioDirection; - short csCode; - u_short socket; - union adsp_command u; -}; - -/* from h/adsp_frames.h */ - -#ifdef NOT_USED -/* - * LAP Frame Information - */ - -typedef struct { - u_char lap_dest; - u_char lap_src; - u_char lap_type; - u_char lap_data[1]; -} LAP_FRAME; - -#define LAP_FRAME_LEN 3 - -#define MAX_FRAME_SIZE 603 - -#define LAP_DDP 0x01 -#define LAP_DDPX 0x02 - -typedef struct { - ua_short ddp_length; /* length of ddp fields */ - u_char ddp_dest; /* destination socket */ - u_char ddp_source; /* source socket */ - u_char ddp_type; /* protocol type */ - u_char ddp_data[1]; /* data field */ -} DDP_FRAME; - -#define DDPS_FRAME_LEN 5 -#endif /* NOT_USED */ - -typedef struct { - ua_short ddpx_length; /* length and hop count */ - ua_short ddpx_cksm; /* checksum */ - at_net ddpx_dnet; /* destination network number */ - at_net ddpx_snet; /* source network number */ - u_char ddpx_dnode; /* destination node */ - u_char ddpx_snode; /* source node */ - u_char ddpx_dest; /* destination socket */ - u_char ddpx_source; /* source socket */ - u_char ddpx_type; /* protocol type */ - u_char ddpx_data[1]; /* data field */ -} DDPX_FRAME; - -#define DDPL_FRAME_LEN 13 - -#ifdef NOT_USED -typedef struct { - u_char nbp_ctrl_cnt; /* control and tuple count */ - u_char nbp_id; /* enquiry/reply id */ - u_char nbp_data[1]; /* tuple space */ -} NBP_FRAME; - -#define NBP_TYPE_MASK 0xf0 /* mask of ctrl_cnt field */ -#define NBP_CNT_MASK 0x0f /* mask for number of tuples */ -#define NBP_BROADCAST 0x10 /* internet lookup */ -#define NBP_LOOKUP 0x20 /* lookup request */ -#define NBP_REPLY 0x30 /* response to lookup */ - -typedef struct { - u_char atp_control; /* control field */ - u_char atp_map; /* bitmap for acknowlegement */ - ua_short atp_tid; /* transaction id */ - union - { - u_char b[4]; /* user u_chars */ - ua_long dw; - } atp_ub; - u_char atp_data[1]; /* data field */ -} ATP_FRAME; - -#define ATP_FRAME_LEN 8 - -#define ATP_TREQ 0x40 /* transaction request */ -#define ATP_TRESP 0x80 /* response packet */ -#define ATP_TREL 0xc0 /* transaction release packet */ -#define ATP_XO 0x20 /* exactly once flag */ -#define ATP_EOM 0x10 /* end of message flag */ -#define ATP_STS 0x08 /* send transaction status */ - -#define ATP_TYPE(x) ((x)->atp_control & 0xc0) - -typedef struct { - at_net net1; - u_char zonename[33]; -} ZIP_1; - -typedef struct { - at_net net1; - at_net net2; - u_char zonename[33]; -} ZIP_2; - -typedef struct { - u_char zip_command; /* zip command number */ - u_char flags; /* Bit-mapped */ - union - { - ZIP_1 o; /* Packet has one net number */ - ZIP_2 r; /* Packet has cable range */ - } u; -} ZIP_FRAME; - -/* Flags in the ZIP GetNetInfo & NetInfoReply buffer */ - -#define ZIPF_BROADCAST 0x80 -#define ZIPF_ZONE_INVALID 0x80 -#define ZIPF_USE_BROADCAST 0x40 -#define ZIPF_ONE_ZONE 0x20 - -#define ZIP_QUERY 1 /* ZIP Commands in zip frames */ -#define ZIP_REPLY 2 -#define ZIP_TAKEDOWN 3 -#define ZIP_BRINGUP 4 -#define ZIP_GETNETINFO 5 -#define ZIP_NETINFOREPLY 6 -#define ZIP_NOTIFY 7 - -#define ZIP_GETMYZONE 7 /* ZIP commands in atp user u_chars[0] */ -#define ZIP_GETZONELIST 8 -#define ZIP_GETLOCALZONES 9 -#define ZIP_GETYOURZONE 10 - -/* - * Response to Reponder Request type #1. - * - * The first 4 u_chars are actually the 4 ATP user u_chars - * Following this structure are 4 PASCAL strings: - * System Version String. (max 127) - * Finder Version String. (max 127) - * LaserWriter Version String. (max 127) - * AppleShare Version String. (max 24) - */ -typedef struct -{ - u_char UserU_Chars[2]; - ua_short ResponderVersion; - ua_short AtalkVersion; - u_char ROMVersion; - u_char SystemType; - u_char SystemClass; - u_char HdwrConfig; - ua_short ROM85Version; - u_char ResponderLevel; - u_char ResponderLink; - u_char data[1]; -} RESPONDER_FRAME; -#endif /* NOT_USED */ - -/* - * ADSP Frame - */ -typedef struct { - ua_short CID; - ua_long pktFirstByteSeq; - ua_long pktNextRecvSeq; - ua_short pktRecvWdw; - u_char descriptor; /* Bit-Mapped */ - u_char data[1]; -} ADSP_FRAME, *ADSP_FRAMEPtr; - -#define ADSP_FRAME_LEN 13 - -#define ADSP_CONTROL_BIT 0x80 -#define ADSP_ACK_REQ_BIT 0x40 -#define ADSP_EOM_BIT 0x20 -#define ADSP_ATTENTION_BIT 0x10 -#define ADSP_CONTROL_MASK 0x0F - -#define ADSP_CTL_PROBE 0x00 /* Probe or acknowledgement */ -#define ADSP_CTL_OREQ 0x01 /* Open Connection Request */ -#define ADSP_CTL_OACK 0x02 /* Open Request acknowledgment */ -#define ADSP_CTL_OREQACK 0x03 /* Open Request and acknowledgement */ -#define ADSP_CTL_ODENY 0x04 /* Open Request denial */ -#define ADSP_CTL_CLOSE 0x05 /* Close connection advice */ -#define ADSP_CTL_FRESET 0x06 /* Forward Reset */ -#define ADSP_CTL_FRESET_ACK 0x07 /* Forward Reset Acknowledgement */ -#define ADSP_CTL_RETRANSMIT 0x08 /* Retransmit advice */ - -typedef struct { - ua_short version; /* Must be in network byte order */ - ua_short dstCID; /* */ - ua_long pktAttnRecvSeq; /* Must be in network byte order */ -} ADSP_OPEN_DATA, *ADSP_OPEN_DATAPtr; - -#define ADSP_OPEN_FRAME_LEN 8 - -#define ADSP_MAX_DATA_LEN 572 - -/* from h/adsp_ioctl.h */ - -/* - * Defines that correspond to atlog.h in the N & C Appletalk - * sources. - */ - -#define AT_MID_ADSP 212 - -/* Streams ioctl definitions */ - -#define ADSP_IOCTL(i) ((i>>8) == AT_MID_ADSP) -#define ADSPATTNREAD ((AT_MID_ADSP<<8) | 254) /* read attention data */ -#define ADSPOPEN ((AT_MID_ADSP<<8) | 253) /* open a connection */ -#define ADSPCLOSE ((AT_MID_ADSP<<8) | 252) /* close a connection */ -#define ADSPCLINIT ((AT_MID_ADSP<<8) | 251) /* create a conn listener */ -#define ADSPCLREMOVE ((AT_MID_ADSP<<8) | 250) /* remove a conn listener */ -#define ADSPCLLISTEN ((AT_MID_ADSP<<8) | 249) /* post a listener request */ -#define ADSPCLDENY ((AT_MID_ADSP<<8) | 248) /* deny an open connection request */ -#define ADSPSTATUS ((AT_MID_ADSP<<8) | 247) /* get status of conn end */ -#define ADSPREAD ((AT_MID_ADSP<<8) | 246) /* read data from conn */ -#define ADSPWRITE ((AT_MID_ADSP<<8) | 245) /* write data on the conn */ -#define ADSPATTENTION ((AT_MID_ADSP<<8) | 244) /* send attention message */ -#define ADSPOPTIONS ((AT_MID_ADSP<<8) | 243) /* set conn end options */ -#define ADSPRESET ((AT_MID_ADSP<<8) | 242) /* forward reset connection */ -#define ADSPNEWCID ((AT_MID_ADSP<<8) | 241) /* generate a cid conn end */ -#define ADSPBINDREQ ((AT_MID_ADSP<<8) | 240) -#define ADSPGETSOCK ((AT_MID_ADSP<<8) | 239) -#define ADSPGETPEER ((AT_MID_ADSP<<8) | 238) - -#ifdef KERNEL_PRIVATE - -/* from h/adsp_adsp.h */ - -/* Definitions from strgeneric.h (on AIX?) */ -#define STR_IGNORE 0 -#define STR_PUTNEXT 1 -#define STR_PUTBACK 2 -#define STR_QTIME (HZ >> 3) - -struct ccb; -#define CCBPtr struct ccb * -extern int adspInit(CCBPtr sp, struct adspcmd *ap); -extern int adspOpen(register CCBPtr sp, register struct adspcmd *pb); -extern int adspCLListen( register CCBPtr sp, register struct adspcmd *pb); -extern int adspClose(register CCBPtr sp, register struct adspcmd *pb); -extern int adspCLDeny(struct adspcmd *pb, CCBPtr sp); -extern int adspStatus(CCBPtr sp, register struct adspcmd *pb); -extern int adspRead(register CCBPtr sp, register struct adspcmd *pb); -extern int adspWrite(CCBPtr sp, struct adspcmd *pb); -extern int adspAttention(register struct adspcmd *pb, register CCBPtr sp); -extern int adspOptions(CCBPtr sp, struct adspcmd *pb); -extern int adspReset(CCBPtr sp, struct adspcmd *pb); -extern int adspNewCID(CCBPtr sp, struct adspcmd *pb); -extern int adspPacket(gref_t *gref, gbuf_t *mp); - -int adsp_open(gref_t *); -void adsp_input(gbuf_t *); - -#undef CCBPtr - - -struct adsp_debug { - int ad_time; - int ad_seq; - int ad_caller; - int ad_descriptor; - int ad_bits; - short ad_sendCnt; - short ad_sendMax; - int ad_maxSendSeq; - int ad_sendWdwSeq; -}; - -#endif /* KERNEL_PRIVATE */ -#endif /* __APPLE_API_OBSOLETE */ -#endif /* _NETAT_ADSP_H_ */ diff --git a/bsd/netat/adsp_CLDeny.c b/bsd/netat/adsp_CLDeny.c deleted file mode 100644 index 3830fda0b..000000000 --- a/bsd/netat/adsp_CLDeny.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * CLDeny.c - * - * From Mike Shoemaker 9/6/90 - * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -/* - * dspCLDeny - * - * INPUTS: - * --> ccbRefNum refnum of connection listener - * --> remoteCID connection identifier of remote connection end - * --> remoteAddress internet address of remote connection end - * - * OUTPUTS: - * none - * - * ERRORS: - * errRefNum bad connection refnum - * errState not a connection listener - * errAborted request aborted by a Remove call - */ -int adspCLDeny(struct adspcmd *pb, CCBPtr sp) -{ - gbuf_t *mp; - ADSP_FRAMEPtr adspp; - ADSP_OPEN_DATAPtr adspop; - - if (sp == 0) { - pb->ioResult = errRefNum; - return EINVAL; - } - mp = gbuf_alloc(AT_WR_OFFSET + DDPL_FRAME_LEN + ADSP_FRAME_LEN + ADSP_OPEN_FRAME_LEN, - PRI_LO); - gbuf_rinc(mp,AT_WR_OFFSET); - gbuf_wset(mp,DDPL_FRAME_LEN); - adspp = (ADSP_FRAMEPtr)gbuf_wptr(mp); - gbuf_winc(mp,ADSP_FRAME_LEN); - bzero((caddr_t) gbuf_rptr(mp),DDPL_FRAME_LEN + ADSP_FRAME_LEN + ADSP_OPEN_FRAME_LEN); - adspp->descriptor = ADSP_CONTROL_BIT | ADSP_CTL_ODENY; - adspop = (ADSP_OPEN_DATAPtr)gbuf_wptr(mp); - gbuf_winc(mp,ADSP_OPEN_FRAME_LEN); - UAS_ASSIGN_HTON(adspop->dstCID, pb->u.openParams.remoteCID); - UAS_ASSIGN_HTON(adspop->version, 0x100); - adsp_sendddp(sp, mp, - DDPL_FRAME_LEN + ADSP_FRAME_LEN + ADSP_OPEN_FRAME_LEN, - (AddrUnion *)&pb->u.openParams.remoteAddress, DDP_ADSP); - adspioc_ack(0, (gbuf_t *)pb->ioc, pb->gref); - return 0; -} diff --git a/bsd/netat/adsp_CLListen.c b/bsd/netat/adsp_CLListen.c deleted file mode 100644 index a7215f094..000000000 --- a/bsd/netat/adsp_CLListen.c +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* dspCLListen.c - * - * From Mike Shoemaker v01.02 04/19/90 mbs - * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -/* - * dspCLListen - * - * INPUTS: - * --> ccbRefNum refnum of connection end - * --> filterAddress filter for incoming open connection requests - * - * OUTPUTS: - * <-- remoteCID connection identifier of remote connection end - * <-- remoteAddress internet address of remote connection end - * <-- sendSeq initial send sequence number to use - * <-- sendWindow initial size of remote end's receive buffer - * <-- attnSendSeq initial attention send sequence number to use - * - * ERRORS: - * errRefNum bad connection refnum - * errState not a connection listener - * errAborted request aborted by a Remove call - */ -int adspCLListen(sp, pb) /* (DSPPBPtr pb) */ - register CCBPtr sp; - register struct adspcmd *pb; -{ - register struct adspcmd *clpb; - gbuf_t *mp; - - if (sp == 0) { - pb->ioResult = errRefNum; - return EINVAL; - } - - if (sp->state != sListening) { /* But this isn't a connection listener! */ - pb->ioResult = errState; - return EALREADY; - } - - if ((mp = gbuf_copym(pb->mp))) { /* keep a copy of the parameter block */ - pb->ioResult = 1; /* not done */ - adspioc_ack(0, (gbuf_t *)pb->ioc, pb->gref); /* release user ioctl block */ - clpb = (struct adspcmd *)gbuf_rptr(mp); - clpb->ioc = 0; - clpb->mp = mp; - if (qAddToEnd((struct qlink **)&sp->opb, (struct qlink *)clpb)) /* Add to list of listeners */ - return EFAULT; /* bogus, but discriminate from other errors */ - } else { - pb->ioResult = errDSPQueueSize; - return ENOBUFS; - } - return 0; - -} diff --git a/bsd/netat/adsp_Close.c b/bsd/netat/adsp_Close.c deleted file mode 100644 index d769015cd..000000000 --- a/bsd/netat/adsp_Close.c +++ /dev/null @@ -1,513 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1990, 1995-1998 Apple Computer, Inc. - * All Rights Reserved. - */ - -/* dspClose.c - * From Mike Shoemaker v01.16 06/29/90 mbs - */ -/* - * Change log: - * 06/29/95 - Modified to handle flow control for writing (Tuyen Nguyen) - * Modified for MP, 1996 by Tuyen Nguyen - * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - - -static void qRemove(CCBPtr, CCBPtr); - - -/* - * CheckOkToClose - * - * Check to see if it is OK to close this connection cleanly. - * - * INPUTS: - * Stream pointer - * OUTPUTS: - * True if no outstanding transactions and we can close cleanly - */ -int CheckOkToClose(sp) /* (CCBPtr sp) */ - CCBPtr sp; -{ - - if (sp->sData) /* Outstanding data ? */ - return 0; - - if (sp->sapb) /* Outstanding send attention ? */ - return 0; - - if (sp->frpb) /* Outstanding forward reset ? */ - return 0; - - if (sp->sendAttnAck) - return 0; - - if (sp->sendDataAck) - return 0; - - /* - * Must be OK to close - */ - sp->sendCtl |= B_CTL_CLOSE; /* So, need to send close advice */ - sp->callSend = 1; - - return 1; /* It's OK to close */ -} - - -/* - * CompleteQueue - * - * Given the address of the head of a queue of DSP parameter blocks, zero - * the queue, and complete each item on the queue with the given result - * code. - * - * INPUTS: - * qhead Address of ptr to first queue element - * code The result code - * OUTPUTS: - * none - */ -int CompleteQueue(qhead, code) /* (DSPPBPtr FPTR qhead, OSErr code) */ - struct adspcmd **qhead; - int code; -{ - register struct adspcmd *p; - register struct adspcmd *n; - register gref_t *gref; - register int _total = 0; - CCBPtr sp = 0; - - n = *qhead; /* Get first item */ - *qhead = 0; /* Zero out the queue */ - if (n) { - gref = n->gref; - if (gref->info) { - sp = (CCBPtr)gbuf_rptr(((gbuf_t *)gref->info)); - atalk_flush(sp->gref); - } - } - - while ((p = n)) { /* while items left */ - n = (struct adspcmd *)(p->qLink); /* Save next guy */ - p->ioResult = code; - if (sp) { - completepb(sp, p); /* complete the copy of the request */ - _total++; - } else - gbuf_freem(p->mp); - } /* while */ - return(_total); -} - -/* - * RemoveCCB - * - * Called from do close to free up the user's CCB. So, we remove the - * CCB from the list of CCB's. - * - * INPUTS: - * sp pointer to ccb - * pb a remove param block to complete when done - * OUTPUTS: - * none - */ -void RemoveCCB(CCBPtr, struct adspcmd *); - -void RemoveCCB(sp, pb) /* (CCBPtr sp, DSPPBPtr pb) */ - CCBPtr sp; - struct adspcmd *pb; -{ - gref_t *gref; - - if (sp->gref == 0) - return; - /* - * Unlink CCB from list - */ - qRemove((CCB *)AT_ADSP_STREAMS, sp); /* remove sp from active streams queue */ - - if (pb) { - pb->ioResult = 0; - if (pb->ioc) /* is this a current or queued request */ - adspioc_ack(0, (gbuf_t *)pb->ioc, pb->gref); /* current */ - else { - completepb(sp, pb); /* queued */ - } - - if (sp->opb && (pb != sp->opb)) { /* if the pb requested is not the */ - pb = sp->opb; /* waiting open pb, complete it too */ - sp->opb = 0; - pb->ioResult = 0; - completepb(sp, pb); - } else { - sp->opb = 0; - } - } - gref = sp->gref; - sp->gref = 0; - if (gref->info == (char *)sp->sp_mp) { /* queue head is still valid */ - unsigned char skt; - - if ((skt = sp->localSocket) != 0) { - if (adspDeassignSocket(sp) == 0) - ddp_notify_nbp(skt, sp->pid, DDP_ADSP); - } - - if (gref->info) { - gbuf_freem((gbuf_t *)gref->info); /* free the CCB */ - gref->info = 0; - } - } else - gbuf_freem(sp->sp_mp); /* our head is already gone, be sure - * to release our resources too */ -} - -int AbortIO(CCBPtr, short); - -int AbortIO(sp, err) - CCBPtr sp; - short err; -{ - register int _total; - - if (sp->gref == 0) - return 0; - /* - * Complete all outstanding transactions. - */ - _total = CompleteQueue(&sp->sapb, err); /* Abort outstanding send attentions */ - CompleteQueue(&sp->frpb, err); /* Abort outstanding forward resets */ - - if (sp->sbuf_mb) { /* clear the send queue */ - gbuf_freel(sp->sbuf_mb); - sp->sbuf_mb = 0; - } - - if (sp->csbuf_mb) { - gbuf_freem(sp->csbuf_mb); - sp->csbuf_mb = 0; - } - sp->sData = 0; - - return(_total); -} - -/* - * DoClose - * - * Called from several places (probe timeout, recv close advice, - * dspRemove, etc.) to change state of connection to closed and - * complete all outstanding I/O. - * - * Will also remove the CCB if there is a dsp remove pending. - * - * INPUTS: - * sp An ADSP stream - * OUTPUTS: - * none - */ -void DoClose(sp, err, force_abort) /* (CCBPtr sp, OSErr err) */ - register CCBPtr sp; - int err; - int force_abort; -{ - register struct adspcmd *pb, *np; - register gbuf_t *mp; - int aborted_count; - - dPrintf(D_M_ADSP, D_L_TRACE, ("DoClose: pid=%d,e=%d,a=%d,s=%d,r=%d\n", - sp->pid, err, force_abort, sp->localSocket, sp->removing)); - sp->userFlags |= eClosed; /* Set flag */ - sp->state = sClosed; - sp->openState = O_STATE_NOTHING; - - /* - * Clean up any timer elements - */ - RemoveTimerElem(&adspGlobal.slowTimers, &sp->ProbeTimer); - RemoveTimerElem(&adspGlobal.fastTimers, &sp->FlushTimer); - RemoveTimerElem(&adspGlobal.fastTimers, &sp->RetryTimer); - RemoveTimerElem(&adspGlobal.fastTimers, &sp->AttnTimer); - RemoveTimerElem(&adspGlobal.fastTimers, &sp->ResetTimer); - - aborted_count = AbortIO(sp, err); - np = sp->opb; /* Get list of close/removes to complete */ - sp->opb = 0; /* set this list null */ - - while ((pb = np)) { /* Handle all of the close/remove param blks */ - np = (struct adspcmd *)pb->qLink; /* Get next guy (if any) */ - pb->qLink = 0; - pb->ioResult = err; - completepb(sp, pb); - } - if (sp->removing && (force_abort >= 0)) { /* Abort outstanding receives */ - aborted_count += CompleteQueue(&sp->rpb, err); - - if (sp->deferred_mb) { - gbuf_freel(sp->deferred_mb); - sp->deferred_mb = 0; - } - if (sp->attn_mb) { - gbuf_freem(sp->attn_mb); - sp->attn_mb = 0; - } - if (sp->rbuf_mb) { /* clear the rcv queue */ - gbuf_freem(sp->rbuf_mb); - sp->rbuf_mb = 0; - } - if (sp->crbuf_mb) { - gbuf_freem(sp->crbuf_mb); - sp->crbuf_mb = 0; - } - sp->rData = 0; - - /* if our connection has been timed out */ - /* and the user wasn't notified of the TearDown */ - /* because of pending requests on this socket */ - /* then fake a read completion to force the notification */ - - if (force_abort && aborted_count == 0) { - if ((mp = gbuf_alloc(sizeof(struct adspcmd), PRI_HI))) { - pb = (struct adspcmd *)gbuf_rptr(mp); - gbuf_wset(mp,sizeof(struct adspcmd)); - - bzero((caddr_t) pb, sizeof(struct adspcmd)); - pb->mp = mp; - pb->csCode = dspRead; - pb->ioResult = errAborted; - completepb(sp, pb); /* send fake read completion */ - } - } - sp->removing = 0; - RemoveCCB(sp, 0); /* Will call completion routine */ - } - sp->userFlags &= ~eClosed; -} - - -/* - * dspClose - * - * Also called for dspRemove and dspCLRemove. - * Must handle case of multiple close calls being issued (without - * abort bit set) Can only allow one pending remove though. - * - * INPUTS: - * --> ccbRefNum refnum of connection end - * --> abort abort the connection - * - * OUTPUTS: - * none - * - * ERRORS: - * errRefNum Bad connection Refnum - */ -int adspClose(sp, pb) /* (DSPPBPtr pb) */ - register CCBPtr sp; - register struct adspcmd *pb; -{ - register gbuf_t *mp; - - /* Must execute nearly all of this with ints off because user could - * be issuing a second dspRemove while the first is pending. Until - * we can detect this, we must not allow interrupts. - * Also, we can't handle the case where a close was issued earlier, - * and now this is the remove. If the write completion for the - * close advice packet occurs in the middle of this, we might - * foul up. - */ - - if (sp == 0) { - pb->ioResult = errRefNum; - return EINVAL; - } - - /* - * Handle dspCLRemove - */ - if (pb->csCode == (short)dspCLRemove) { /* Remove connection listener */ - if (sp->state != (short)sListening) { /* But it's not a listener! */ - pb->ioResult = errState; - return EINVAL; - } - CompleteQueue(&sp->opb, errAborted); /* Complete all dspListens */ - RemoveCCB(sp, pb); /* Will call completion routine */ - return 0; - } - - - /* - * Either dspClose or dspRemove - */ - - if (sp->removing) { /* Don't allow dspRemove or dspClose */ - /* after one dspRemove has been issued. */ - pb->ioResult = errState; - return EINVAL; - } - - - /* - * The previous Macintosh ADSP allowed you to call close on a - * connection that was in the process of opening or passively - * waiting for an open request. It is also legal to close a - * connection that is already closed. No error will be generated. - * - * It is also legal to issue a second close call while the first - * is still pending. - */ - if (pb->csCode == (short)dspClose) { - if ((sp->state == (short)sPassive) || (sp->state == (short)sOpening)) { - sp->state = sClosed; - DoClose(sp, errAborted, 0); - pb->ioResult = 0; - adspioc_ack(0, (gbuf_t *)pb->ioc, pb->gref); - return 0; - } - - if (sp->state == (word)sClosed) { /* Ok to close a closed connection */ - pb->ioResult = 0; - adspioc_ack(0, (gbuf_t *)pb->ioc, pb->gref); - return 0; - } - if ((sp->state != (word)sOpen) && (sp->state != (word)sClosing)) { - pb->ioResult = errState; - return EINVAL; - } - - sp->state = sClosing; /* No matter what, we're closing */ - } /* dspClose */ - - else { /* dspRemove */ - sp->removing = 1; /* Prevent allowing another dspClose. */ - /* Tells completion routine of close */ - /* packet to remove us. */ - - if (sp->state == sPassive || sp->state == sClosed || - sp->state == sOpening) { - sp->state = sClosed; - DoClose(sp, errAborted, 0); /* Will remove CCB! */ - return 0; - } else /* sClosing & sOpen */ - sp->state = sClosing; - - } /* dspRemove */ - - if (pb->u.closeParams.abort || CheckOkToClose(sp)) /* going to close */ - { - AbortIO(sp, errAborted); - sp->sendCtl = B_CTL_CLOSE; /* Send close advice */ - } - - pb->ioResult = 1; - if ( (mp = gbuf_copym(pb->mp)) ) { /* duplicate user request */ - adspioc_ack(0, (gbuf_t *)pb->ioc, pb->gref); /* release user */ - pb = (struct adspcmd *)gbuf_rptr(mp); /* get new parameter block */ - pb->ioc = 0; - pb->mp = mp; - qAddToEnd((struct qlink **)&sp->opb, (struct qlink *)pb); /* and save it */ - } else { - pb->ioResult = 0; - adspioc_ack(0, (gbuf_t *)pb->ioc, pb->gref); /* release user, and keep no copy - * for kernel bookkeeping, yetch! - */ - } - CheckSend(sp); - - return 0; -} - -static void qRemove(qptr, elem) - register CCBPtr qptr; - register CCBPtr elem; -{ - - while(qptr->ccbLink) { - if ((DSPPBPtr)(qptr->ccbLink) == (DSPPBPtr)elem) { - qptr->ccbLink = elem->ccbLink; - elem->ccbLink = 0; - return; - } - qptr = qptr->ccbLink; - } -} - -int RxClose(sp) - register CCBPtr sp; -{ - register gbuf_t *mp; - register struct adspcmd *pb; - - if ((sp->state == sClosing) || (sp->state == sClosed)) - return 0; - - sp->state = sClosed; - CheckReadQueue(sp); /* try to deliver all remaining data */ - - if ( (mp = gbuf_alloc(sizeof(struct adspcmd), PRI_HI)) ) { - pb = (struct adspcmd *)gbuf_rptr(mp); - gbuf_wset(mp,sizeof(struct adspcmd)); - pb->ioc = 0; - pb->mp = mp; - - pb->csCode = dspClose; - pb->ioResult = 0; - completepb(sp, pb); /* send close completion */ - } - -if ((sp->userFlags & eClosed) == 0) - DoClose(sp, errAborted, -1); /* abort send requests and timers */ - - return 0; -} diff --git a/bsd/netat/adsp_Control.c b/bsd/netat/adsp_Control.c deleted file mode 100644 index 040683e0b..000000000 --- a/bsd/netat/adsp_Control.c +++ /dev/null @@ -1,552 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1990, 1995-1998 Apple Computer, Inc. - * All Rights Reserved. - */ - -/* Control.c - * From Mike Shoemaker v01.25 07/02/90 for MacOS - * 09/07/95 - Modified for performance (Tuyen Nguyen) - * Modified for MP, 1996 by Tuyen Nguyen - * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -/* # of additional ticks to add to any timer that we're queuing up. For - * very short delays (1 and 2), the timer fires before the transmit - * even takes place */ -#define TX_DLY 2 - -int adsp_window = 1; - -int attachData(CCBPtr, gbuf_t *mp); - -/* - * CalcRecvWdw - * - * INPUTS: - * sp ADSP Stream - * OUTPUTS: - * # of bytes in avail in local receive queue - */ -int CalcRecvWdw(sp) /* (CCBPtr sp) */ - CCBPtr sp; -{ - int bytes; - - bytes = calcRecvQ(sp); - bytes = sp->rbuflen - bytes; /* get what is left */ - - if ((bytes <= 16)) { /* %%% this should be zero */ - sp->rbufFull = 1; /* Save flag that our recv buf is full */ - return 0; - } - else - return ((bytes+bytes+bytes) >> 2) + 1; /* %%% */ -} - -int -calcRecvQ(sp) - CCBPtr sp; -{ - int bytes = 0; -#ifdef AT_Socket - register struct mbuf *m, *p; - - if (((sp->gref)->so)->so_rcv.sb_mb) - for (p = ((sp->gref)->so)->so_rcv.sb_mb; p; p = p->m_nextpkt) - for (m = p; m; m = m->m_next) - bytes += m->m_len; -#else - register gbuf_t *mb; - - if (sp->rData) { /* There is data in buffer */ - if ((mb = sp->rbuf_mb)) { - do { - bytes += gbuf_msgsize(mb); - mb = gbuf_next(mb); - } while (mb); - } - if ((mb = sp->crbuf_mb)) - bytes += gbuf_msgsize(mb); - } -#endif - return bytes; -} - -/* - * CheckSend - * - * Check to see if the transmit PB is available and if there is anything - * to transmit. Start off any pending transmit. - * - * Normally called from the write completion routine - * - * INPUTS: - * sp Connection control block - * OUTPUTS: - * true if sent a packet - */ -void CheckSend(sp) /* (CCBPtr sp) */ - register CCBPtr sp; -{ - int i; - int attnMsg; /* True if attention message */ - register gbuf_t *mp; /* send message block */ -#ifdef notdef - register gbuf_t *tmp; - u_char current; -#endif - char *dp; /* a data pointer */ - int use_attention_code; - int len; /* length used in allocd mblk */ - int datalen; /* amount of data attached to mblk */ - gbuf_t *mprev = 0, *mlist = 0; - -top: - - if (sp->state == sClosed) - return; - - /* get a message block to hold DDP and - * ADSP headers + 2 bytes of attention - * code if necessary */ - if ((mp = gbuf_alloc(AT_WR_OFFSET + DDPL_FRAME_LEN + ADSP_FRAME_LEN + ADSP_OPEN_FRAME_LEN + 2, - PRI_LO)) == 0) { - if (mlist) - gbuf_freel(mlist); - return; /* can't get buffers... do nothing! */ - } - sp->callSend = 0; /* Clear flag */ - use_attention_code = 0; - len = 0; - datalen = 0; - - gbuf_rinc(mp,AT_WR_OFFSET); - gbuf_wset(mp,DDPL_FRAME_LEN); /* leave room for DDP header */ - - if (sp->sendCtl) { - short mask = 0; - - i = sp->sendCtl; /* get local copy bitmap of */ - /* which ctl packets to send. */ - attnMsg = 0; - - if (i & 0x1E) /* One of the open ctrl packets */ - { - - /* point past ADSP header (no attention) */ - dp = ((char *) gbuf_wptr(mp)) + ADSP_FRAME_LEN; - UAL_ASSIGN_HTON(sp->f.pktFirstByteSeq, sp->firstRtmtSeq); - - UAS_ASSIGN_HTON(sp->of.version, netw(0x0100)); /* Fill in open connection parms */ - UAS_ASSIGN_HTON(sp->of.dstCID, sp->remCID); /* Destination CID */ - UAL_ASSIGN_HTON(sp->of.pktAttnRecvSeq, sp->attnRecvSeq); - bcopy((caddr_t) &sp->of, (caddr_t) dp, ADSP_OPEN_FRAME_LEN); - len += ADSP_OPEN_FRAME_LEN; - - if (i & B_CTL_OREQ) { - UAS_ASSIGN_HTON(sp->f.CID, sp->locCID); - mask = B_CTL_OREQ; - sp->f.descriptor = ADSP_CONTROL_BIT | ADSP_CTL_OREQ; - } else if (i & B_CTL_OACK) { - UAS_ASSIGN_HTON(sp->f.CID, sp->locCID); - mask = B_CTL_OACK; - sp->f.descriptor = ADSP_CONTROL_BIT | ADSP_CTL_OACK; - } else if (i & B_CTL_OREQACK) { - UAS_ASSIGN_HTON(sp->f.CID, sp->locCID); - mask = B_CTL_OREQACK; - sp->f.descriptor = ADSP_CONTROL_BIT | ADSP_CTL_OREQACK; - } else /* Deny */ - { - UAS_ASSIGN(sp->f.CID, 0); - mask = B_CTL_ODENY; - sp->f.descriptor = ADSP_CONTROL_BIT | ADSP_CTL_ODENY; - UAL_ASSIGN(sp->f.pktFirstByteSeq, 0); - } - - if (i & (B_CTL_OREQ | B_CTL_OREQACK)) - /* Need to start up a timer for it */ - { - /* It's possible that we've received a duplicate - * open request. In this case, there will already be - * a timer queued up for the request+ack - * packet we sent the first time. So remove the timer - * and start another. - */ - RemoveTimerElem(&adspGlobal.slowTimers, &sp->ProbeTimer); - InsertTimerElem(&adspGlobal.slowTimers, &sp->ProbeTimer, - sp->openInterval+1); - } - } else { - /* seq # of next byte to send */ - UAL_ASSIGN_HTON(sp->f.pktFirstByteSeq, sp->sendSeq); - - if (i & B_CTL_CLOSE) { - sp->state = sClosed; /* Now we're closed */ - mask = B_CTL_CLOSE; - sp->f.descriptor = ADSP_CONTROL_BIT | ADSP_CTL_CLOSE; - } else if (i & B_CTL_PROBE) { - mask = B_CTL_PROBE; - sp->f.descriptor = - ADSP_CONTROL_BIT | ADSP_CTL_PROBE | ADSP_ACK_REQ_BIT; - } else if (i & B_CTL_FRESET) { - mask = B_CTL_FRESET; - sp->f.descriptor = ADSP_CONTROL_BIT | ADSP_CTL_FRESET; - InsertTimerElem(&adspGlobal.fastTimers, - &sp->ResetTimer, sp->rtmtInterval+TX_DLY); - } else if (i & B_CTL_FRESETACK) { - mask = B_CTL_FRESETACK; - sp->f.descriptor = ADSP_CONTROL_BIT | ADSP_CTL_FRESET_ACK; - } - else if (i & B_CTL_RETRANSMIT) { - mask = B_CTL_RETRANSMIT; - sp->f.descriptor = ADSP_CONTROL_BIT | ADSP_CTL_RETRANSMIT; - } - else { - dPrintf(D_M_ADSP, D_L_ERROR, ("CheckSend: Control bit error\n")); - } - } /* non open control packet */ - - sp->sendCtl &= ~mask; - goto sendit; - } /* send control packet */ - - if (sp->sendAttnData) /* Send attn ready to go? */ - { - sp->sendAttnData = 0; /* Clear Flags */ - if (sp->sapb) { - sp->sendAttnAck = 0; /* This will also do an Attn Ack */ - - attnMsg = 1; - sp->f.descriptor = ADSP_ATTENTION_BIT | ADSP_ACK_REQ_BIT; - if (gbuf_cont(sp->sapb->mp)) { - gbuf_cont(mp) = gbuf_dupm(gbuf_cont(sp->sapb->mp)); - /* Major hack here. The ADSP Attn code is butted up against - * the end of the adsp packet header, and the length is - * increased by 2. (There is a pad field behind the adsp - * header in the CCB just for this purpose.) - */ - } - use_attention_code++; - - sp->f.data[0] = high(sp->sapb->u.attnParams.attnCode); - sp->f.data[1] = low(sp->sapb->u.attnParams.attnCode); - InsertTimerElem(&adspGlobal.fastTimers, &sp->AttnTimer, - sp->rtmtInterval+TX_DLY); - goto sendit; - } - } /* attn data */ - - if (sp->sendAttnAck) /* Send attn ack ready to go? */ - { - attnMsg = 1; - sp->f.descriptor = ADSP_CONTROL_BIT | ADSP_ATTENTION_BIT; - sp->sendAttnAck = 0; - goto sendit; - } /* attn ack */ - - if ((sp->state == sOpen || sp->state == sClosing) && /* Correct state */ - (!sp->waitingAck) && /* not waiting for an ACK */ - (sp->sData) && /* have data to send */ - (GTE(sp->sendWdwSeq,sp->sendSeq)) && /* he has room to accept it */ - (sp->pktSendCnt < sp->pktSendMax)) /* haven't sent too many pkts - * in a row. */ - { - attnMsg = 0; - if ((datalen = attachData(sp, mp))) /* attach data to mp */ - goto sendit; /* if successful, sendit */ - } - - if (sp->sendDataAck) { - UAL_ASSIGN_HTON(sp->f.pktFirstByteSeq, sp->sendSeq); /* seq # of next byte */ - attnMsg = 0; - sp->f.descriptor = ADSP_CONTROL_BIT; - goto sendit; - } - - /* - * Nothing left to do... - */ - if (mp) - gbuf_freem(mp); - if (mlist) - adsp_sendddp(sp, mlist, 0, &sp->remoteAddress, DDP_ADSP); - return; - -sendit: - - if (attnMsg) { - UAL_ASSIGN_HTON(sp->f.pktFirstByteSeq, sp->attnSendSeq); - UAL_ASSIGN_HTON(sp->f.pktNextRecvSeq, sp->attnRecvSeq); - UAS_ASSIGN(sp->f.pktRecvWdw, 0); /* Always zero in attn pkt */ - } else { - sp->sendDataAck = 0; - UAL_ASSIGN_HTON(sp->f.pktNextRecvSeq, sp->recvSeq); - UAS_ASSIGN_HTON(sp->f.pktRecvWdw, CalcRecvWdw(sp)); - } - if (use_attention_code) { - bcopy((caddr_t) &sp->f, (caddr_t) gbuf_wptr(mp), ADSP_FRAME_LEN + 2); - len += ADSP_FRAME_LEN + 2; - } else { - bcopy((caddr_t) &sp->f, (caddr_t) gbuf_wptr(mp), ADSP_FRAME_LEN); - len += ADSP_FRAME_LEN; - } - gbuf_winc(mp,len); /* update mblk length */ - if (mlist) - gbuf_next(mprev) = mp; - else - mlist = mp; - mprev = mp; - - if (sp->state == sClosed) { /* must have sent a close advice */ - /* send header + data */ - adsp_sendddp(sp, mlist, 0, &sp->remoteAddress, DDP_ADSP); - DoClose(sp, 0, -1); /* complete close! */ - return; - } - if (sp->state == sClosing) /* See if we were waiting on this write */ - CheckOkToClose(sp); - goto top; -} - -/* - * completepb delivers a paramater block with all its appropriate fields - * set back to the user. - * - * The assumptions here are that the PB is not linked to any queue, - * that the fields including ioResult are set, and that the - * kernel is no longer interested in the mblks that may or - * maynot be linked to this pb. - */ -void completepb(sp, pb) - register CCBPtr sp; - register struct adspcmd *pb; -{ - if (sp->gref && (sp->gref->info == (caddr_t)sp->sp_mp)) { - if (gbuf_len(pb->mp) > sizeof(struct adspcmd)) - gbuf_wset(pb->mp,sizeof(struct adspcmd)); - SndMsgUp(sp->gref, pb->mp); - NotifyUser(sp); - } else - gbuf_freem(pb->mp); -} - - -int -attachData(sp, mp) - register CCBPtr sp; - register gbuf_t *mp; -{ - int seq; - int cnt; - char eom = 0; - int bsize; - int diff; - char sendAckReq; - int partial = 0; /* flag for a partial send */ - int tcnt = 0; - register gbuf_t *smp; /* send data message block */ - register gbuf_t *psmp; /* previous message block */ - - sendAckReq = 0; - - if (LT(sp->sendSeq, sp->firstRtmtSeq)) /* Sanity check on send seq */ - sp->sendSeq = sp->firstRtmtSeq; /* seq must be oldest in buffer. */ - - /* This test and assignment was necessary because the retry VBL could - * have fired and reset send Seq to first Rtmt Seq, and then an - * expected ACK comes in that bumps first Rtmt Seq up. Then we - * have the problem that send Seq is less than first Rtmt Seq. - * The easiest fix to this timing dilemma seems to be to reset - * sendSeq to first Rtmt Seq if we're sending the first packet. - */ - UAL_ASSIGN_HTON(sp->f.pktFirstByteSeq, sp->sendSeq); - - if ((smp = sp->sbuf_mb)) /* Get oldest header */ - eom = 1; - else if ((smp = sp->csbuf_mb)) - eom = 0; - - if (smp == 0) { /* this shouldn't happen... */ - sp->sData = 0; - return 0; - } - /* - * Must find next byte to transmit - */ - seq = sp->firstRtmtSeq; /* Seq # of oldest in buffer */ - while ((diff = (sp->sendSeq - seq)) >= ((bsize = gbuf_msgsize(smp)) + eom)) { - seq += bsize + eom; /* update sequence # */ - if (gbuf_next(smp)) { /* if another send buffer */ - smp = gbuf_next(smp); - eom = 1; - } else if (smp == sp->csbuf_mb) { /* seen the current one? */ - smp = 0; - break; - } else if (sp->csbuf_mb) { /* look at it */ - smp = sp->csbuf_mb; - eom = 0; - } else { /* no more buffers */ - smp = 0; - break; - } - } /* while */ - - if (smp) { - if (gbuf_next(smp) == 0) /* last block */ - sendAckReq = 1; - cnt = bsize - diff; /* # of bytes in this block */ - } else - cnt = 0; - - /* - * Check to see if the number of bytes is less than the 'send - * Blocking' setting. If so, then we won't send this data unless - * we're flushing. So we set up a timer to force a flush later. - */ - if ((cnt < sp->sendBlocking) && !sp->writeFlush) { - InsertTimerElem(&adspGlobal.fastTimers, &sp->FlushTimer, - sp->sendInterval); - return 0; /* no data to send */ - } - - if (cnt > ADSP_MAX_DATA_LEN) { /* truncate to one packet */ - cnt = ADSP_MAX_DATA_LEN; - eom = 0; - sendAckReq = 0; /* Won't send ack because end of data */ - partial++; - } - - if (smp) { - /* trim extra bytes off the beginning of the "block" before the copy */ - while (diff) { - if (gbuf_len(smp) > diff) - break; - else - diff -= gbuf_len(smp); - smp = gbuf_cont(smp); - } - if((gbuf_cont(mp) = gbuf_dupm(smp)) == 0) /* copy the data */ - return 0; - smp = gbuf_cont(mp); /* use the new message blocks */ - gbuf_rinc(smp,diff); /* and get to the first byte of data to send */ - } - /* - * Check to see if this many bytes will close the other end's - * receive window. If so, we need to send an ack request along - * with this. sendWdwSeq is the seq # of the last byte that - * the remote has room for - */ - if ((diff = sp->sendWdwSeq + 1 - sp->sendSeq) <= cnt) { - if (diff < cnt) { /* Won't fit exactly */ - eom = 0; /* so can't send EOM */ - cnt = diff; - partial++; - } - sendAckReq = 1; /* Make him tell us new recv. window */ - sp->noXmitFlow = 1; /* Don't do flow control calc. */ - } - - /* trim extra bytes off the tail of the "block" after the copy */ - if (partial && smp) { - psmp = smp; - tcnt = cnt; - while (tcnt && smp) { /* while there are message blocks and data */ - if (tcnt >= gbuf_len(smp)) { - tcnt -= gbuf_len(smp); - if (tcnt) { - psmp = smp; - smp = gbuf_cont(smp); - } else { - if (psmp != smp) { /* not the first item on the list */ - gbuf_cont(psmp) = 0; - gbuf_freem(smp); - smp = psmp; - } else { - gbuf_freem(gbuf_cont(smp)); - gbuf_cont(smp) = 0; - } - break; - } - } else { - gbuf_wset(smp,tcnt); - if (gbuf_cont(smp)) { - gbuf_freem(gbuf_cont(smp)); - gbuf_cont(smp) = 0; - } - break; - } - } - } - - sp->sendSeq += cnt + eom; /* Update sendSeq field */ - - if (GT(sp->sendSeq, sp->maxSendSeq)) /* Keep track of >st ever sent */ - sp->maxSendSeq = sp->sendSeq; - - if (eom) - sp->f.descriptor = ADSP_EOM_BIT; - else - sp->f.descriptor = 0; - - if (sendAckReq || (++sp->pktSendCnt >= sp->pktSendMax)) { - /* Last packet in a series */ - sp->f.descriptor |= ADSP_ACK_REQ_BIT; /* We want an ack to this */ - sp->waitingAck = 1; /* Flag that we're waiting */ - sp->sendStamp = SysTicks(); /* Save time we sent request */ - sp->timerSeq = sp->sendSeq; /* Save seq # we want acked */ - InsertTimerElem(&adspGlobal.fastTimers, &sp->RetryTimer, - sp->rtmtInterval+TX_DLY); - } - return cnt + eom; -} - - - diff --git a/bsd/netat/adsp_Init.c b/bsd/netat/adsp_Init.c deleted file mode 100644 index eb79cdf2b..000000000 --- a/bsd/netat/adsp_Init.c +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1990, 1996-1998 Apple Computer, Inc. - * All Rights Reserved. - */ - -/* dspInit.c - * - * From Mike Shoemaker v01.20 06/29/90 mbs - * Modified for MP, 1996 by Tuyen Nguyen - * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -static void InitContinue(CCBPtr, struct adspcmd *); - -/* - * InitContinue - * - * Handle 2nd half of code for dsp init. We could be called directly by - * the dsp Init routine, or if a socket has to be opened, we get called - * by the completion routine of the dsp open socket. - * - * INPUTS: - * sp The stream we're initing (not yet on list of streams) - * pb The user's dsp Init param block - * soc The socket we're going to use - * OUTPUTS: - * none -*/ -static void InitContinue(sp, pb) /* (CCBPtr sp, DSPPBPtr pb, int soc) */ - CCBPtr sp; - struct adspcmd *pb; -{ - - /* Save connection's socket # in CCB */ - sp->localSocket = pb->socket; - - /* - * Link the new ccb onto queue. Must be done with interrupts off. - */ - qAddToEnd((struct qlink **)AT_ADSP_STREAMS, (struct qlink *)sp); /* Put on linked list of connections */ - return; -} - -/* - * dspInit - * - * Create and initialize a connection end. return ccbRefNum so that client can - * reference this ccb in later calls. The caller provides a pointer to - * ccb which belongs to adsp until the connection end is removed. - * - * If we have to open a socket, we'll have to do an async open socket, and - * finish up in the completion routine - * - * INPUTS: - * --> ccbPtr Pointer to connection control block - * --> adspcmdPtr Pointer to user request block - * - * OUTPUTS: - * <-- ccbRefNum refnum assigned to this connection. - * - * ERRORS: - * EADDRINUSE or 0 - */ -int adspInit(sp, ap) /* (DSPPBPtr pb) */ - CCBPtr sp; - struct adspcmd *ap; -{ - /* - * Set connection end defaults - */ - sp->badSeqMax = 3; /* # of out-of-sequence packets received */ - /* until a retransmit advice packet is sent */ - sp->probeInterval = 6 * 30; /* 30 second probe interval */ - sp->rtmtInterval = 6 * 5; /* Just a guess --- 5 seconds */ - sp->sendBlocking = 16; - sp->sendInterval = 6; - sp->badSeqMax = 3; /* This is the default */ - - sp->ProbeTimer.type = kProbeTimerType; - sp->FlushTimer.type = kFlushTimerType; - sp->RetryTimer.type = kRetryTimerType; - sp->AttnTimer.type = kAttnTimerType; - sp->ResetTimer.type = kResetTimerType; - - if (ap->csCode == dspInit) { /* Only do this if not connection Listener */ - /* - * Initialize send and receive queue. Make sure they are the - * right size - */ - sp->rbuflen = RecvQSize; - sp->rbuf_mb = 0; - sp->sbuflen = SendQSize; - sp->sbuf_mb = 0; - sp->csbuf_mb = 0; - - /* - * Initialize send and receive defaults - */ - - sp->attn_mb = 0; - sp->state = sClosed; /* Set state for connection end */ - /* end dspInit */ - } else { - - /* dspCLInit */ - sp->state = sListening; /* Set state for conn end */ - } /* end dspCLInit */ - /* - * User opens the socket, so continue with the init stuff - */ - InitContinue(sp, ap); - return(0); -} - - -#if 0 -/* - * AdspBad - * - * - * INPUTS: - * --> ap Parameter block - * - */ -int AdspBad(ap) /* (DSPPBPtr pb) */ - struct adspcmd *ap; -{ - dPrintf(D_M_ADSP, D_L_ERROR, - ("Hey! Do you have the right AuthToolbox?")); - ap->ioResult = controlErr; /* Unknown csCode in the param block */ - return EINVAL; -} - -#endif diff --git a/bsd/netat/adsp_NewCID.c b/bsd/netat/adsp_NewCID.c deleted file mode 100644 index 422f9718a..000000000 --- a/bsd/netat/adsp_NewCID.c +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * dspNewCID.c - * - * From v01.04 04/20/90 mbs - * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -/* - * dspNewCID - * - * INPUTS: - * --> ccbRefNum refnum of connection end - * - * OUTPUTS: - * <-- newCID new connection identifier - * - * ERRORS: - * errRefNum bad connection refnum - * errState connection is not closed - */ -int adspNewCID(sp, pb) /* (DSPPBPtr pb) */ - CCBPtr sp; - struct adspcmd *pb; -{ - if (sp == 0) { - pb->ioResult = errRefNum; - return EINVAL; - } - - if (sp->state != sClosed) { /* Can only assign to a closed connection */ - pb->ioResult = errState; - return EINVAL; - } - - /* - * Assign a unique connection ID to this ccb - */ - sp->locCID = pb->u.newCIDParams.newcid = NextCID(); - - pb->ioResult = 0; - adspioc_ack(0, (gbuf_t *)pb->ioc, pb->gref); - return 0; -} diff --git a/bsd/netat/adsp_Open.c b/bsd/netat/adsp_Open.c deleted file mode 100644 index b41ee1198..000000000 --- a/bsd/netat/adsp_Open.c +++ /dev/null @@ -1,299 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* adspOpen.c v01.20 - * - * From v01.20 08/23/90 Mike Shoemaker for MacOS - * Modified for MP, 1996 by Tuyen Nguyen - * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -extern int *adsp_pidM; - - -/* - * NextCID - * - * Create a unique connection ID. - * - * INPUTS: - * none - * OUTPUTS: - * unique connection ID - */ - -unsigned short NextCID(void) -{ - unsigned short num; - register CCB *queue; - - while (1) { - num = ++adspGlobal.lastCID; - /* qfind_w below is in 68K assembly */ - /* point to the first element */ - queue = (CCB *)AT_ADSP_STREAMS; - while (queue) { - /* and scan .. */ - if (queue->locCID == num) - break; - queue = queue->ccbLink; - } - if (queue == (CCBPtr)NULL) - break; - } - return num; -} - -static byte xlateStateTbl[4] = /* The value to be given to the CCB's state. */ -{ /* indexed by ocMode */ - sOpening, /* ocRequest */ - sPassive, /* ocPassive */ - sOpening, /* ocAccept */ - sOpen /* ocEstablish */ -}; -static byte xlateOpenTbl[4] = /* Value to use for open state. */ -{ /* indexed by ocMode */ - O_STATE_OPENWAIT, /* ocRequest */ - O_STATE_LISTEN, /* ocPassive */ - O_STATE_ESTABLISHED, /* ocAccept */ - O_STATE_OPEN /* ocEstablish */ -}; - -/* - * adspOpen - * - * INPUTS: - * --> ccbRefNum refnum of connection end - * --> remoteCID connection id of remote connection end - * --> remoteAddress internet address of remote connection end - * --> filterAddress filter for incoming open connection requests - * --> sendSeq initial send sequence number to use - * --> sendWindow initial size of remote end's receive buffer - * --> recvSeq initial receive sequence number to use - * --> attnSendSeq initial attention send sequence number - * --> attnRecvSeq initial receive sequence number - * --> ocMode connection opening mode - * --> ocMaximum maximum retries of open connection request - * - * OUTPUTS: - * <-- localCID connection identifier of this connection end - * <-- remoteCID connection id of remote connection end - * <-- remoteAddress - * <-- sendSeq - * <-- sendWindow - * <-- attnSendSeq - * - * ERRORS: - * errRefNum bad connection refnum - * errState connection end must be closed - * errOpening open connection attempt failed - * errAborted request aborted by a remove or close call - */ -int adspOpen(sp, pb) /* (DSPPBPtr pb) */ - register CCBPtr sp; - register struct adspcmd *pb; -{ - int ocMode; - register gbuf_t *mp; - - if (sp == 0) { - pb->ioResult = errRefNum; /* Unknown refnum */ - return EINVAL; - } - - if ((sp->state != sClosed) || - (sp->removing)) { /* The CCB must be closed */ - pb->ioResult = errState; - return EALREADY; - } - - ocMode = pb->u.openParams.ocMode; /* get a local copy of open mode */ - if (ocMode == ocRequest) - adsp_pidM[pb->socket] = 0; - - /* - * Save parameters. Fill in defaults if zero - */ - if (pb->u.openParams.ocInterval) - sp->openInterval = pb->u.openParams.ocInterval; - else - sp->openInterval = ocIntervalDefault; - - if (pb->u.openParams.ocMaximum) - sp->openRetrys = pb->u.openParams.ocMaximum; - else - sp->openRetrys = ocMaximumDefault; - - sp->remoteAddress = *((AddrUnionPtr)&pb->u.openParams.remoteAddress); - /* Not used for passive */ - /* - * Clear out send/receive buffers. - */ - if (sp->sbuf_mb) { /* clear the send queue */ - gbuf_freel(sp->sbuf_mb); - sp->sbuf_mb = 0; - } - if (sp->csbuf_mb) { - gbuf_freem(sp->csbuf_mb); - sp->csbuf_mb = 0; - } - if (sp->rbuf_mb) { /* clear the receive queue */ - gbuf_freel(sp->rbuf_mb); - sp->rbuf_mb = 0; - } - if (sp->crbuf_mb) { - gbuf_freem(sp->crbuf_mb); - sp->crbuf_mb = 0; - } - - sp->rData = 0; /* Flag both buffers as empty */ - sp->sData = 0; - sp->recvQPending = 0; /* No bytes in receive queue */ - - /* - * Clear all of those pesky flags - */ - sp->userFlags = 0; - sp->sendDataAck = 0; - sp->sendAttnAck = 0; - sp->sendAttnData = 0; - sp->callSend = 0; - sp->removing = 0; - sp->writeFlush = 0; - - /* - * Reset round-trip timers - */ - sp->roundTrip = sp->rtmtInterval; - sp->deviation = 0; - - /* - * Reset stuff for retransmit advice packet - */ - sp->badSeqCnt = 0; - /* - * Reset flow control variables - */ - sp->pktSendMax = 1; /* Slow start says we should set this to 1 */ - sp->pktSendCnt = 0; - sp->rbufFull = 0; - sp->resentData = 0; - sp->noXmitFlow = 0; - sp->waitingAck = 0; - - /* - * Copy required information out of parameter block - */ - if (ocMode == ocAccept || ocMode == ocEstablish) { - sp->remCID = pb->u.openParams.remoteCID; - sp->sendSeq = sp->firstRtmtSeq = pb->u.openParams.sendSeq; - sp->sendWdwSeq = sp->sendSeq + pb->u.openParams.sendWindow; - sp->attnSendSeq = pb->u.openParams.attnSendSeq; - } else { /* accept or establish */ - sp->remCID = 0; - sp->sendSeq = 0; - sp->sendWdwSeq = 0; - sp->attnSendSeq = 0; - } - - if (ocMode == ocEstablish) { /* Only set these if establish mode */ - sp->recvSeq = pb->u.openParams.recvSeq; - sp->attnRecvSeq = pb->u.openParams.attnRecvSeq; - UAS_ASSIGN_HTON(sp->f.CID, sp->locCID); /* Preset the CID in the ADSP header */ - /* This is done elsewhere for all other modes */ - InsertTimerElem(&adspGlobal.slowTimers, &sp->ProbeTimer, - sp->probeInterval); - } else { /* establish */ - /* All other modes need a CID assigned */ - sp->locCID = NextCID(); - sp->recvSeq = 0; - sp->attnRecvSeq = 0; - } - - /* - * Now set the state variables for this CCB. - */ - - sp->openState = xlateOpenTbl[ocMode-ocRequest]; - sp->state = xlateStateTbl[ocMode-ocRequest]; - - if (ocMode == ocEstablish) { /* For establish call, we're done */ - pb->ioResult = 0; - adspioc_ack(0, (gbuf_t *)pb->ioc, pb->gref); - return 0; - } - - pb->qLink = 0; /* Clear link field before putting on queue */ - mp = gbuf_copym(pb->mp); /* Save parameter block to match later */ - - if (mp == 0) { - pb->ioResult = errDSPQueueSize; - return ENOBUFS; - } - pb->ioResult = 1; /* not open -> not done */ - adspioc_ack(0, (gbuf_t *)pb->ioc, pb->gref); /* release user */ - sp->opb = (struct adspcmd *)gbuf_rptr(mp); - sp->opb->ioc = 0; /* unlink saved pb from ioctl block */ - sp->opb->mp = mp; - - /* - * For request & accept, need to send a packet - */ - if ((ocMode == ocRequest) || (ocMode == ocAccept)) { - sp->sendCtl |= (1 << (ocMode == ocRequest ? - ADSP_CTL_OREQ : ADSP_CTL_OREQACK)); - CheckSend(sp); - } - return 0; -} - -int adspMode(pb) - register struct adspcmd *pb; -{ - return pb->u.openParams.ocMode; -} diff --git a/bsd/netat/adsp_Options.c b/bsd/netat/adsp_Options.c deleted file mode 100644 index f40ce433b..000000000 --- a/bsd/netat/adsp_Options.c +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * dspOptions.c - * - * From v01.06 04/19/90 mbs - * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -/* - * dspOptions - * - * INPUTS: - * --> ccbRefNum refnum of connection end - * --> sendBlocking send blocking threshold - * --> sendTimer send timer interval - * --> rtmtTimer retransmit timer interval - * --> badSeqMax retransmit advice send threshold - * --> useCheckSum generate DDP checksum on internet packets - * - * OUTPUTS: - * none - * - * ERRORS: - * errRefNum bad connection refnum -*/ -int adspOptions(sp, pb) /* (DSPPBPtr pb) */ - CCBPtr sp; - struct adspcmd *pb; -{ - if (sp == 0) { - pb->ioResult = errRefNum; - return EINVAL; - } - - if (pb->u.optionParams.sendBlocking) - sp->sendBlocking = pb->u.optionParams.sendBlocking; - - if (pb->u.optionParams.sendTimer) - sp->sendInterval = pb->u.optionParams.sendTimer; - - /* No longer allowed to set retransmit timer as of ADSP 1.5 */ - /* Use it to specify a command blocking request specific to MacOS - * emulation. */ - if (pb->u.optionParams.rtmtTimer) - sp->delay = pb->u.optionParams.rtmtTimer; - KERNEL_DEBUG(DBG_ADSP_MISC, 0, sp, sp->delay, pb, pb->u.optionParams.rtmtTimer); - - if (pb->u.optionParams.badSeqMax) - sp->badSeqMax = pb->u.optionParams.badSeqMax; - - sp->useCheckSum = pb->u.optionParams.useCheckSum; - if (pb->u.optionParams.newPID) - sp->pid = pb->u.optionParams.newPID; - pb->ioResult = 0; - adspioc_ack(0, (gbuf_t *)pb->ioc, pb->gref); - return 0; - -} diff --git a/bsd/netat/adsp_Packet.c b/bsd/netat/adsp_Packet.c deleted file mode 100644 index c73d82e58..000000000 --- a/bsd/netat/adsp_Packet.c +++ /dev/null @@ -1,837 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Packet.c - * - * v01.23 All incoming packets come here first 06/21/90 mbs - * Modified for MP, 1996 by Tuyen Nguyen - * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -#include -#include - -extern at_ifaddr_t *ifID_home; - -/* - * GleanSession - * - * We just got a packet for this session, glean its address & - * reset probe timer - * - * INPUTS: - * Session - * OUTPUTS: - * none - */ -static void GleanSession(CCBPtr); - -static void GleanSession(sp) /* (CCBPtr sp) */ - CCBPtr sp; -{ - if (sp->openState == O_STATE_OPEN) { - /* This is true for both state = sOpen & sClosing */ - RemoveTimerElem(&adspGlobal.slowTimers, &sp->ProbeTimer); - InsertTimerElem(&adspGlobal.slowTimers, &sp->ProbeTimer, - sp->probeInterval); - sp->probeCntr = 4; - } - -} - - -/* - * The same code handles incoming Open Connection Request, - * Open Request + Ack, Open Connection Ack, Open Connection Denial - * - * We could be in four different states, LISTEN, OPENWAIT, ESTABLISHED, - * OPEN. - */ - -/* - * - * Ok, there are 16 combinations. 8 are do-nothings, 2 have to be - * special cased (Open Deny and Req+Ack on Open session) - * - * Build a table of actions: - * Ignore? - * What to match on (local socket, whole address, DestCID, SrcCID) - * What to send (Ack or Req+Ack) - * Next State (both the ccb state and the open state) - */ - -/* - * - */ -typedef struct { - u_char match; /* Characteristics that have to match - * (Bit-Mapped, see below) */ - char action; /* What to do if CCB matches */ - char send; /* What to send in response - * (Bit mapped, same as sendCtl field of - * CCB) */ - char openState; /* Next Open state */ - char state; /* Next ccb state. */ - char pad; /* Too bad we need this to make structure - * even size */ -} TBL, *TBLPtr; - -#define M_LSOC 0x01 /* bit 0 - Match on local socket */ -#define M_ADDR 0x02 /* bit 1 - Match on whole address */ -#define M_DCID 0x04 /* bit 2 - Match on DestCID */ -#define M_SCID 0x08 /* bit 3 - Match SrcCID */ -#define M_DCIDZERO 0x10 /* bit 4 - Dest CID must be 0 */ -#define M_SCIDZERO 0x20 /* bit 5 - Src CID must be 0 */ -#define M_FILTER 0x40 /* bit 6 - Match address filter */ -#define M_IGNORE 0x80 /* bit 7 - Ignore */ - -#define A_COMPLETE 0x01 /* Complete open parameter block */ -#define A_SAVEPARMS 0x02 /* Save connection parameters */ -#define A_OREQACKOPEN 0x04 /* special case for open Req+Ack on - * OPEN session */ -#define A_GLEAN 0x08 /* We'll be talking back to this guy */ -#define A_DENY 0x10 /* We've been denied! */ - - -/* - * So here's our table - */ - -static TBL tbl[16] = { - -/* - * For Open Request ($81) - * - * LISTENING - * Match on destination socket - * Match on address filter - * Dest CID must be 0 - * Glean connection - * Save Open Connection parameters - * Send OREQACK - * Change state to ESTABLISHED - */ - { M_LSOC + M_DCIDZERO + M_FILTER, - A_SAVEPARMS + A_GLEAN, - B_CTL_OREQACK, - O_STATE_ESTABLISHED, - sOpening, - 0 - }, - -/* - * - * OPENWAIT - * Match on Remote Address & destination socket - * Dest CID must be 0 - * Save Open Connection parameters - * Send Ack - * Change state to ESTABLISHED - */ - { M_LSOC + M_ADDR + M_DCIDZERO, - A_SAVEPARMS + A_GLEAN, - B_CTL_OACK, - O_STATE_ESTABLISHED, - sOpening, - 0 - }, -/* - * - * ESTABLISHED - * Match on Remote Address & SrcCID - * Dest CID must be 0 - * Send Req + Ack - */ - { M_ADDR + M_SCID + M_DCIDZERO, - A_GLEAN, - B_CTL_OACK, - O_STATE_ESTABLISHED, - sOpening, - 0 - }, -/* - * OPEN - * Ignore - */ - { M_IGNORE, - 0, - 0, - 0, - 0, - 0 - }, - -/* - * - * For Open Ack ($82) - * - * LISTENING - * Ignore - */ - { M_IGNORE, - 0, - 0, - 0, - 0, - 0 - }, -/* - * - * OPENWAIT - * Ignore - */ - { M_IGNORE, - 0, - 0, - 0, - 0, - 0 - }, -/* - * - * ESTABLISHED - * Match on SrcCID & DestCID & Address & Local Socket - * Complete Listen or Connect PB - * OPEN - */ - { M_ADDR + M_DCID + M_SCID + M_LSOC, - A_COMPLETE + A_GLEAN, - 0, - O_STATE_OPEN, - sOpen, - 0 - }, -/* - * - * OPEN - * Ignore -*/ - { M_IGNORE, - 0, - 0, - 0, - 0, - 0 - }, - -/* - * - * For Open Request + Ack ($83) - * - * LISTENING - * Ignore -*/ - { M_IGNORE, - 0, - 0, - 0, - 0, - 0 - }, -/* - * - * OPENWAIT - * Match on DestCID & socket - * Do not test remote address -- our open req could have - * been passed to another address by a connection server - * Save Open Connection parameters - * Complete Connect parameter block - * Send Ack - * OPEN - */ - { M_DCID + M_LSOC, - A_COMPLETE + A_SAVEPARMS + A_GLEAN, - B_CTL_OACK, - O_STATE_OPEN, - sOpen, - 0 - }, -/* - * - * ESTABLISHED - * Ignore - */ - { M_IGNORE, - 0, - 0, - 0, - 0, - 0 - }, -/* - * - * OPEN - * Match on Remote Address & SrcCID & DestCID & Local Socket - * If we've never gotten any data - * Send Ack & Retransmit - */ - { M_ADDR + M_DCID + M_SCID + M_LSOC, - A_OREQACKOPEN + A_GLEAN, - B_CTL_OACK, - O_STATE_OPEN, - sOpen, - 0 - }, - -/* - * - * - * For Open Deny ($84) - * - * LISTENING - * Ignore - */ - { M_IGNORE, - 0, - 0, - 0, - 0, - 0 - }, -/* - * - * OPENWAIT - * Match on DestCID & Address - * Source CID must be 0 - * Complete with error - */ - { M_SCIDZERO + M_DCID + M_ADDR, - A_DENY, - 0, - O_STATE_NOTHING, - sClosed, - 0 - }, -/* - * - * ESTABLISHED - * Ignore - */ - { M_IGNORE, - 0, - 0, - 0, - 0, - 0 - }, /* %%% No we probably don't want to ignore in this case */ -/* - * - * OPEN - * Ignore - */ - { M_IGNORE, - 0, - 0, - 0, - 0, - 0 - } -}; - -extern at_ifaddr_t *ifID_table[]; - -/* - * Used to search down queue of sessions for a session waiting for an - * open request. - */ -typedef struct { - AddrUnion addr; - word dstCID; - word srcCID; - byte socket; - byte descriptor; - byte idx; /* Index into state tables */ - TBLPtr t; /* Ptr to entry in table above */ -} MATCH, *MATCHPtr; - -/* - * MatchStream - * - * Called by Rx connection to find which stream (if any) should get this open - * request/ack/req+ack/deny packet. - * - */ -static boolean MatchStream(CCBPtr, MATCHPtr); - -static boolean -MatchStream(sp, m) /* (CCBPtr sp, MATCHPtr m) */ - CCBPtr sp; - MATCHPtr m; -{ - unsigned char match; - struct adspcmd *opb; - - if (sp->openState < O_STATE_LISTEN || - sp->openState > O_STATE_OPEN) - return 0; - - - m->t = &tbl[sp->openState - O_STATE_LISTEN + m->idx]; - - match = m->t->match; /* Get match criteria */ - - if (match & M_IGNORE) /* Ignore this combination */ - return 0; - - if (match & M_LSOC) { /* Match on Local socket */ - if (sp->localSocket != m->socket) - return 0; - } - - if (match & M_ADDR) { /* Match on Address */ - AddrUnion addr; - addr = m->addr; /* Make local copy for efficiency */ - if (sp->remoteAddress.a.node != addr.a.node) - return 0; - if (sp->remoteAddress.a.socket != addr.a.socket) - return 0; - if (sp->remoteAddress.a.net && addr.a.net && - (sp->remoteAddress.a.net != addr.a.net)) - return 0; - - /* - * Handle special case to reject self-sent open request - */ - if ((m->srcCID == sp->locCID) && - (addr.a.node == ifID_home->ifThisNode.s_node) && - /* *** was (addr.a.node == ddpcfg.node_addr.node) && *** */ - ((addr.a.net == 0) || - (ifID_home->ifThisNode.s_net == 0) || - (ifID_home->ifThisNode.s_net == addr.a.net)) ) - /* *** was - (NET_VALUE(ddpcfg.node_addr.net) == 0) || - (NET_VALUE(ddpcfg.node_addr.net) == NET_VALUE(addr.a.net))) ) - *** */ - /* CID's match, and */ - /* If nodeID matches, and */ - /* network matches, */ - return 0; /* then came from us! */ - } - - if (match & M_DCID) { /* Match on DestCID */ - if (sp->locCID != m->dstCID) - return 0; - } - - if (match & M_SCID) { /* Match on SourceCID */ - if (sp->remCID != m->srcCID) - return 0; - } - - if (match & M_DCIDZERO) { /* Destination CID must be 0 */ - if (m->dstCID != 0) - return 0; - } - - if (match & M_SCIDZERO) /* Source CID must be 0 */ - { - if (m->srcCID != 0) - return 0; - } - - if (match & M_FILTER) { /* Check address filter? */ - if ((opb = sp->opb)) /* There should be a param block... */ - { - AddrUnion addr; - addr = m->addr; /* Make local copy for efficiency */ - if ((opb->u.openParams.filterAddress.net && - addr.a.net && - opb->u.openParams.filterAddress.net != addr.a.net) || - (opb->u.openParams.filterAddress.node != 0 && - opb->u.openParams.filterAddress.node != addr.a.node)|| - (opb->u.openParams.filterAddress.socket != 0 && - opb->u.openParams.filterAddress.socket != addr.a.socket)) - return 0; - } - } - - return 1; -} - -/* - * MatchListener - * - * Called by rx connection to see which connection listener (if any) should - * get this incoming open connection request. - * - */ - -static boolean MatchListener(CCBPtr, MATCHPtr); - -static boolean MatchListener(sp, m) /* (CCBPtr sp, MATCHPtr m) */ - CCBPtr sp; - MATCHPtr m; -{ - - if ((sp->state == (word)sListening) && /* This CCB is a listener */ - (sp->localSocket == m->socket)) /* on the right socket */ - return 1; - - return 0; -} - -/* - * RXConnection - * - * We just received one of the 4 Open Connection packets - * Interrupts are masked OFF at this point - * - * INPUTS: - * spPtr Place to put ptr to stream (if we found one -- not - * for listeners) - * f Pointer to ADSP header for packet, data follows behind it - * len # of byte in ADSP header + data - * addr Who sent the packet - * dsoc Where they sent it to - * - * OUTPUTS: - * Returns 1 if packet was ignored - */ -static int RXConnection( - __unused gref_t *gref, /* READ queue */ - CCBPtr *spPtr, - ADSP_FRAMEPtr f, - int len, - AddrUnion addr, - unsigned char dsoc) -{ - CCBPtr sp; - ADSP_OPEN_DATAPtr op; - struct adspcmd *pb; - MATCH m; - gbuf_t *mp; - ADSP_FRAMEPtr adspp; - ADSP_OPEN_DATAPtr adspop; - - op = (ADSP_OPEN_DATAPtr)&f->data[0]; /* Point to Open-Connection parms */ - len -= ADSP_FRAME_LEN; - - if (len < (sizeof(ADSP_OPEN_DATA))) /* Packet too small */ - return 1; - - - if (UAS_VALUE(op->version) != netw(0x0100)) { /* Check version num (on even-byte) */ - /* - * The open request has been denied. Try to send him a denial. - */ - - mp = gbuf_alloc(AT_WR_OFFSET + DDPL_FRAME_LEN + ADSP_FRAME_LEN + ADSP_OPEN_FRAME_LEN, - PRI_LO); - gbuf_rinc(mp,AT_WR_OFFSET); - gbuf_wset(mp,DDPL_FRAME_LEN); - adspp = (ADSP_FRAMEPtr)gbuf_wptr(mp); - gbuf_winc(mp,ADSP_FRAME_LEN); - bzero((caddr_t) gbuf_rptr(mp),DDPL_FRAME_LEN + ADSP_FRAME_LEN + - ADSP_OPEN_FRAME_LEN); - adspp->descriptor = ADSP_CONTROL_BIT | ADSP_CTL_ODENY; - adspop = (ADSP_OPEN_DATAPtr)gbuf_wptr(mp); - gbuf_winc(mp,ADSP_OPEN_FRAME_LEN); - UAS_UAS(adspop->dstCID, f->CID); - UAS_ASSIGN_HTON(adspop->version, 0x100); - adsp_sendddp(0, mp, DDPL_FRAME_LEN + ADSP_FRAME_LEN + - ADSP_OPEN_FRAME_LEN, &addr, DDP_ADSP); - - return 0; - } - m.addr = addr; - m.socket = dsoc; - m.descriptor = f->descriptor; - m.srcCID = UAS_VALUE_NTOH(f->CID); - m.dstCID = UAS_VALUE_NTOH(op->dstCID); /* On even-byte boundry */ - m.idx = ((f->descriptor & ADSP_CONTROL_MASK) - 1) * 4; - - /* - * See if we can find a stream that knows what to do with this packet - */ - if ((sp = (CCBPtr)qfind_m((CCB *)AT_ADSP_STREAMS, &m, (ProcPtr)MatchStream)) == 0) - { - struct adspcmd *p; - struct adspcmd *n; - /* - * No match, so look for connection listeners if this is an - * open request - */ - if ((f->descriptor & ADSP_CONTROL_MASK) != (byte)ADSP_CTL_OREQ) - return 1; - - if ((sp = (CCBPtr)qfind_m((CCB *)AT_ADSP_STREAMS, &m, - (ProcPtr)MatchListener)) == 0) - return 1; - - p = (struct adspcmd *)&sp->opb; - while ((n = (struct adspcmd *)p->qLink)) /* Hunt down list of listens */ - { - /* Check address filter */ - if (((n->u.openParams.filterAddress.net == 0) || - (addr.a.net == 0) || - (n->u.openParams.filterAddress.net == addr.a.net)) && - - ((n->u.openParams.filterAddress.node == 0) || - (n->u.openParams.filterAddress.node == addr.a.node)) && - - ((n->u.openParams.filterAddress.socket == 0) || - (n->u.openParams.filterAddress.socket == addr.a.socket))) { - p->qLink = n->qLink; /* Unlink this param block */ - n->u.openParams.remoteCID = m.srcCID; - *((AddrUnionPtr)&n->u.openParams.remoteAddress) = addr; - n->u.openParams.sendSeq = UAL_VALUE_NTOH(f->pktNextRecvSeq); - n->u.openParams.sendWindow = UAS_VALUE_NTOH(f->pktRecvWdw); - n->u.openParams.attnSendSeq = UAL_VALUE_NTOH(op->pktAttnRecvSeq); - n->ioResult = 0; - completepb(sp, n); /* complete copy of request */ - /* complete(n, 0); */ - return 0; - } /* found CLListen */ - - p = n; /* down the list we go... */ - - } /* while */ - - return 1; - } - - *spPtr = sp; /* Save ptr to stream we just found */ - - sp->openState = m.t->openState; /* Move to next state (may be same) */ - sp->state = m.t->state; /* Move to next state (may be same) */ - - if (m.t->action & A_SAVEPARMS) { /* Need to Save open-conn parms */ - sp->firstRtmtSeq = sp->sendSeq = UAL_VALUE_NTOH(f->pktNextRecvSeq); - sp->sendWdwSeq = UAL_VALUE_NTOH(f->pktNextRecvSeq) + UAS_VALUE_NTOH(f->pktRecvWdw) - 1; - sp->attnSendSeq = UAL_VALUE_NTOH(op->pktAttnRecvSeq); /* on even boundry */ - - - sp->remCID = UAS_VALUE_NTOH(f->CID); /* Save Source CID as RemCID */ - UAS_UAS(sp->of.dstCID, f->CID); /* Save CID in open ctl packet */ - - sp->remoteAddress = addr; /* Save his address */ - - } - - if (m.t->action & A_DENY) { /* We've been denied ! */ - DoClose(sp, errOpenDenied, -1); - } - - if (m.t->action & A_OREQACKOPEN) { - /* Special case for OREQACK */ - /* on an open session */ - RemoveTimerElem(&adspGlobal.fastTimers, &sp->RetryTimer); - sp->sendSeq = sp->firstRtmtSeq; - sp->pktSendCnt = 0; - sp->waitingAck = 0; - sp->callSend = 1; - } - - if (m.t->send) { /* Need to send a response */ - sp->sendCtl |= m.t->send; - sp->callSend = 1; - } - - if (m.t->action & A_COMPLETE) { /* Need to complete open param blk */ - RemoveTimerElem(&adspGlobal.slowTimers, &sp->ProbeTimer); - - if ((pb = sp->opb)) { - sp->opb = 0; - pb->u.openParams.localCID = sp->locCID; - pb->u.openParams.remoteCID = sp->remCID; - pb->u.openParams.remoteAddress = - *((at_inet_t *)&sp->remoteAddress); - pb->u.openParams.sendSeq = sp->sendSeq; - pb->u.openParams.sendWindow = sp->sendWdwSeq - sp->sendSeq; - pb->u.openParams.attnSendSeq = sp->attnSendSeq; - pb->ioResult = 0; - completepb(sp, pb); /* complete(pb, 0); */ - return 0; - } - /* Start probe timer */ - InsertTimerElem(&adspGlobal.slowTimers, &sp->ProbeTimer, - sp->probeInterval); - } - return 0; -} - -/* - * ADSPPacket - * - * When a packet is received by the protocol stack with DDP type equal - * to ADSP, then execution comes here - * - * DS is set to ATALK's DGROUP - * - * This routine, or one of its children MUST call glean packet - * - * INPUTS: - * Pointer to DDP header - * OUTPUTS: - * none - * - * Note that the incoming message block (mp) is usually discarded, either - * by the "ignored" path, or via the "checksend" path. The only case - * where the message is NOT freed is via the RxData case in the - * non control packet switch. I zero mp after the RxData case succeeds - * so that mp will not be freed. - */ -int adspPacket(gref, mp) - /* (bytePtr data, word len, AddrUnion a, byte dsoc) */ - gref_t *gref; - gbuf_t *mp; -{ - unsigned char *bp; - int len; - AddrUnion a; - int dsoc; - register DDPX_FRAME *ddp; /* DDP frame pointer */ - register ADSP_FRAMEPtr f; /* Frame */ - CCBPtr sp; - - sp = 0; /* No stream */ - bp = (unsigned char *)gbuf_rptr(mp); - ddp = (DDPX_FRAME *)bp; - if (ddp->ddpx_type != DDP_ADSP) - return -1; - f = (ADSP_FRAMEPtr)(bp + DDPL_FRAME_LEN); - - len = UAS_VALUE_NTOH(ddp->ddpx_length) & 0x3ff; /* (ten bits of length) */ - len -= DDPL_FRAME_LEN; - if (len < (sizeof(ADSP_FRAME) - 1)) /* Packet too small */ - return -1; /* mark the failure */ - - a.a.net = NET_VALUE(ddp->ddpx_snet); - a.a.node = ddp->ddpx_snode; - a.a.socket = ddp->ddpx_source; - - dsoc = ddp->ddpx_dest; - - if ((sp = (CCBPtr)FindSender(f, a))) - GleanSession(sp); - - if (f->descriptor & ADSP_ATTENTION_BIT) { /* ATTN packet */ - if (sp && RXAttention(sp, mp, f, len)) - goto ignore; - else - mp = 0; /* attention data is being held */ - } /* ATTENTION BIT */ - - else if (f->descriptor & ADSP_CONTROL_BIT) { /* Control packet */ - switch (f->descriptor & ADSP_CONTROL_MASK) { - case ADSP_CTL_PROBE: /* Probe or acknowledgement */ - if (sp) - CheckRecvSeq(sp, f); - break; - - case ADSP_CTL_OREQ: /* Open Connection Request */ - case ADSP_CTL_OREQACK: /* Open Request and acknowledgement */ - case ADSP_CTL_OACK: /* Open Request acknowledgment */ - case ADSP_CTL_ODENY: /* Open Request denial */ - if (RXConnection(gref, &sp, f, len, a, dsoc)) - goto ignore; - break; - - case ADSP_CTL_CLOSE: /* Close connection advice */ - if (sp) { - /* This pkt may also ack some data we sent */ - CheckRecvSeq(sp, f); - RxClose(sp); - sp = 0; - } else - goto ignore; - break; - - case ADSP_CTL_FRESET: /* Forward Reset */ - /* May I rot in hell for the code below... */ - if (sp && (CheckRecvSeq(sp, f), RXFReset(sp, f))) - goto ignore; - break; - - case ADSP_CTL_FRESET_ACK: /* Forward Reset Acknowledgement */ - if (sp && (CheckRecvSeq(sp, f), RXFResetAck(sp, f))) - goto ignore; - break; - - case ADSP_CTL_RETRANSMIT: /* Retransmit advice */ - if (sp) { - /* This pkt may also ack some data we sent */ - CheckRecvSeq(sp, f); - RemoveTimerElem(&adspGlobal.fastTimers, &sp->RetryTimer); - sp->sendSeq = sp->firstRtmtSeq; - sp->pktSendCnt = 0; - sp->waitingAck = 0; - sp->callSend = 1; - } else - goto ignore; - break; - - default: - goto ignore; - } /* switch */ - } /* Control packet */ - - else { /* Data Packet */ - if ((sp == 0) || RXData(sp, mp, f, len)) - goto ignore; - else - mp = 0; /* RXData used up the data, DONT free it! */ - } /* Data Packet */ - - if (mp) - gbuf_freem(mp); - - /* incoming data was not ignored */ - if (sp && sp->callSend) /* If we have a stream & we need to send */ - CheckSend(sp); - - return 0; - -ignore: - gbuf_freem(mp); - return 0; -} diff --git a/bsd/netat/adsp_Read.c b/bsd/netat/adsp_Read.c deleted file mode 100644 index 64f097b26..000000000 --- a/bsd/netat/adsp_Read.c +++ /dev/null @@ -1,414 +0,0 @@ -/* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * - * dspRead.c - * - * From v01.17 08/22/90 mbs - * Modified for MP, 1996 by Tuyen Nguyen - * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -/* - * CheckReadQueue - * - * Checks to see if there is any data in the receive queue. If there - * is data, a pb and the data are queued to the user. - * - * - */ -extern int adsp_check; - -int CheckReadQueue(sp) /* (CCBPtr sp) */ - register CCBPtr sp; -{ - register struct adspcmd *pb; - unsigned short cnt; - char eom = 0; - register gbuf_t *mp; - register gbuf_t *tmp; - gref_t *gref; - - dPrintf(D_M_ADSP, D_L_TRACE, ("CheckReadQueue: sp=0x%x\n", (unsigned)sp)); - KERNEL_DEBUG(DBG_ADSP_READ, 0, sp, sp->rbuf_mb, sp->rpb, sp->delay); - trace_mbufs(D_M_ADSP_LOW, " bCQR m", sp->rbuf_mb); - - while (sp->rData && (pb = sp->rpb)) { /* have data */ - dPrintf(D_M_ADSP, D_L_TRACE, - (" pb=0x%p, gref=0x%p, ioc=0x%p, reqCount=%d (have data)\n", - pb, pb->gref, pb->ioc, pb->u.ioParams.reqCount)); - KERNEL_DEBUG(DBG_ADSP_READ, 1, pb, pb->gref, pb->ioc, pb->u.ioParams.reqCount); - if (pb->u.ioParams.reqCount == 0) { - pb->ioResult = 0; - sp->rpb = pb->qLink; - if (pb->ioc) { - KERNEL_DEBUG(DBG_ADSP_READ, 2, pb, pb->gref, pb->ioc, 0); - adspioc_ack(0, (gbuf_t *)pb->ioc, pb->gref); - } else { - KERNEL_DEBUG(DBG_ADSP_READ, 3, pb, pb->gref, 0, 0); - completepb(sp, pb); - } - continue; - } - - /* take the first packet off of sp->rbuf_mb or sp->crbuf_mb */ - if ((mp = sp->rbuf_mb)) { /* Get header for oldest data */ - KERNEL_DEBUG(DBG_ADSP_READ, 4, pb, mp, gbuf_msgsize(mp), gbuf_next(mp)); - sp->rbuf_mb = gbuf_next(mp); - gbuf_next(mp) = 0; - eom = 1; - } else if ((mp = sp->crbuf_mb)) { - KERNEL_DEBUG(DBG_ADSP_READ, 5, pb, mp, gbuf_msgsize(mp), gbuf_next(mp)); - sp->crbuf_mb = 0; - eom = 0; - } - - /* Get the first (reqCount-actCount) bytes and tack them onto - the end of pb->mp. If eom is set, put the remainder of the - data onto the front of sp->rbuf_mb, otherwise sp->crbuf_mb. */ - cnt = gbuf_msgsize(mp); /* # of data bytes in it. */ - if (cnt > (unsigned short)(pb->u.ioParams.reqCount - pb->u.ioParams.actCount)) { - cnt = pb->u.ioParams.reqCount - pb->u.ioParams.actCount; - /* m_split returns the tail */ - if (!(tmp = (gbuf_t *)m_split(mp, cnt, M_DONTWAIT))) { - cnt = 0; - tmp = mp; - } - if (eom) { - gbuf_next(tmp) = sp->rbuf_mb; - sp->rbuf_mb = tmp; - eom = 0; - } else - sp->crbuf_mb = tmp; - } - if (cnt) { - pb->u.ioParams.actCount += cnt; - gbuf_linkb(pb->mp, mp); - } - - pb->u.ioParams.eom = eom; - /* - * Now clean up receive buffer to remove all of the data - * we just copied - */ - if ((sp->rbuf_mb == 0) && - (sp->crbuf_mb == 0)) /* no more data blocks */ - sp->rData = 0; - /* - * If we've filled the parameter block, unlink it from read - * queue and complete it. We also need to do this if the connection - * is closed && there is no more stuff to read. - */ - if (eom || (pb->u.ioParams.actCount >= pb->u.ioParams.reqCount) || - ((sp->state == sClosed) && (!sp->rData)) ) { - /* end of message, message is full, connection - * is closed and all data has been delivered, - * or we are not to "delay" data delivery. - */ - pb->ioResult = 0; - sp->rpb = pb->qLink; /* dequeue request */ - if (pb->ioc) { /* data to be delivered at the time of the */ - mp = gbuf_cont(pb->mp); /* ioctl call */ - gbuf_cont(pb->mp) = 0; - gref = (gref_t *)pb->gref; - adspioc_ack(0, (gbuf_t *)pb->ioc, pb->gref); - dPrintf(D_M_ADSP, D_L_TRACE, (" (pb->ioc) mp=%p\n", mp)); - KERNEL_DEBUG(DBG_ADSP_READ, 0x0A, pb, mp, - gbuf_next(mp), gbuf_cont(mp)); - SndMsgUp(gref, mp); - dPrintf(D_M_ADSP, D_L_TRACE, - (" (data) size req=%d\n", pb->u.ioParams.actCount)); - KERNEL_DEBUG(DBG_ADSP_READ, 0x0B, pb, pb->ioc, - pb->u.ioParams.reqCount, pb->u.ioParams.actCount); - } else { /* complete an queued async request */ - KERNEL_DEBUG(DBG_ADSP_READ, 0x0C, pb, sp, - pb->u.ioParams.actCount, sp->delay); - completepb(sp, pb); - } - } - } /* while */ - - if ((pb = sp->rpb)) { /* if there is an outstanding request */ - dPrintf(D_M_ADSP, D_L_TRACE, - (" pb=0x%p, ioc=0x%p, reqCount=%d (no more data)\n", - pb, pb->ioc, pb->u.ioParams.reqCount)); - KERNEL_DEBUG(DBG_ADSP_READ, 0x0D, pb, pb->ioc, - pb->u.ioParams.reqCount, pb->u.ioParams.actCount); - - if (sp->state == sClosed) { - while (pb) { - KERNEL_DEBUG(DBG_ADSP_READ, 0x0E, pb, sp, pb->ioc, 0); - pb->ioResult = 0; - pb->u.ioParams.actCount = 0; - pb->u.ioParams.eom = 0; - sp->rpb = pb->qLink; - if (pb->ioc) { - adspioc_ack(0, (gbuf_t *)pb->ioc, pb->gref); - } else { - completepb(sp, pb); - } - pb = sp->rpb; - } - } else if (pb->ioc) { /* if request not complete and this - * is an active ioctl, release user */ - sp->rpb = pb->qLink; - pb->ioResult = 1; - tmp = gbuf_cont(pb->mp); /* detatch perhaps delayed data */ - gbuf_cont(pb->mp) = 0; - if ((mp = gbuf_copym(pb->mp))) { /* otherwise, duplicate user request */ - KERNEL_DEBUG(DBG_ADSP_READ, 0x0F, pb, sp, pb->mp, 0); - adspioc_ack(0, (gbuf_t *)pb->ioc, pb->gref); /* release user */ - pb = (struct adspcmd *)gbuf_rptr(mp); /* get new parameter block */ - pb->ioc = 0; - pb->mp = mp; - gbuf_cont(pb->mp) = tmp; /* reattach data */ - pb->qLink = sp->rpb; /* requeue the duplicate at the head */ - sp->rpb = pb; - } else { /* there is no data left, but no space - * to duplicate the parameter block, so - * put what must be a non EOM message - * back on the current receive queue, and - * error out the user - */ - KERNEL_DEBUG(DBG_ADSP_READ, 0x10, pb, sp, pb->mp, 0); - if (tmp) { - sp->crbuf_mb = tmp; - sp->rData = 1; - } - pb->ioResult = errDSPQueueSize; - adspioc_ack(ENOBUFS, (gbuf_t *)pb->ioc, pb->gref); - } - } - } - /* - * The receive window has opened. If was previously closed, then we - * need to notify the other guy that we now have room to receive more - * data. But, in order to cut down on lots of small data packets, - * we'll wait until the recieve buffer is /14 empy before telling - * him that there's room in our receive buffer. - */ - if (sp->rbufFull && (CalcRecvWdw(sp) > (sp->rbuflen >> 2))) { - sp->rbufFull = 0; - sp->sendDataAck = 1; - sp->callSend = 1; - } - - KERNEL_DEBUG(DBG_ADSP_READ, 0x11, sp, 0, 0, 0); - trace_mbufs(D_M_ADSP_LOW, " eCQR m", sp->rbuf_mb); - return 0; -} - -/* - * CheckAttn - * - * Checks to see if there is any attention data and passes the data back - * in the passed in pb. - * - * INPUTS: - * sp - * pb - * - * OUTPUTS: - * - */ -int CheckAttn(CCBPtr, struct adspcmd *); - -int CheckAttn(sp, pb) /* (CCBPtr sp) */ - register CCBPtr sp; - register struct adspcmd *pb; -{ - gbuf_t *mp; - gref_t *gref = 0; - - dPrintf(D_M_ADSP, D_L_TRACE, - ("CheckAttn: sp=0x%x, pb=0x%x\n", (unsigned)sp, (unsigned)pb)); - - if ((mp = sp->attn_mb)) { - - /* - * Deliver the attention data to the user. - */ - gref = (gref_t *)pb->gref; - pb->u.attnParams.attnSize = sp->attnSize; - pb->u.attnParams.attnCode = sp->attnCode; - if (!sp->attnSize) { - gbuf_freem(mp); - mp = 0; - } - sp->userFlags &= ~eAttention; - /* - * Now clean up receive buffer to remove all of the data - * we just copied - */ - sp->attn_mb = 0; - pb->ioResult = 0; - } else { - /* - * No data... - */ - pb->u.attnParams.attnSize = 0; - pb->u.attnParams.attnCode = 0; - pb->ioResult = 1; /* not done */ - } - adspioc_ack(0, (gbuf_t *)pb->ioc, pb->gref); - if (mp) { - SndMsgUp(gref, mp); - } - return 0; -} - -/* - * adspRead - * - * INPUTS: - * --> sp stream pointer - * --> pb user request parameter block - * - * OUTPUTS: - * <-- actCount actual number of bytes read - * <-- eom one if end-of-message, zero otherwise - * - * ERRORS: - * errRefNum bad connection refnum - * errState - * errFwdReset read terminated by forward reset - * errAborted request aborted by Remove or Close call - */ -int adspRead(sp, pb) /* (DSPPBPtr pb) */ - register CCBPtr sp; - register struct adspcmd *pb; -{ - register gbuf_t *mp; - - dPrintf(D_M_ADSP, D_L_TRACE, - ("adspRead: sp=0x%x, pb=0x%x\n", (unsigned)sp, (unsigned)pb)); - - KERNEL_DEBUG(DBG_ADSP_READ, 0x12, sp, pb, sp->state, sp->rData); - - if (sp == 0) { - pb->ioResult = errRefNum; - return EINVAL; - } - - /* - * It's OK to read on a closed, or closing session - */ - if (sp->state != sOpen && sp->state != sClosing && sp->state != sClosed) { - pb->ioResult = errState; - return EINVAL; - } - if (sp->rData && (sp->rpb == 0)) { /* if data, and no queue of pbs */ - qAddToEnd((struct qlink **)&sp->rpb, (struct qlink *)pb); /* deliver data to user directly */ - CheckReadQueue(sp); - } else if ((pb->u.ioParams.reqCount == 0) && (sp->rpb == 0)) { - /* empty read */ - pb->ioResult = 0; - adspioc_ack(0, (gbuf_t *)pb->ioc, pb->gref); - return 0; - } else { - pb->ioResult = 1; - if ((mp = gbuf_copym(pb->mp))) { /* otherwise, duplicate user request */ - adspioc_ack(0, (gbuf_t *)pb->ioc, pb->gref); /* release user */ - pb = (struct adspcmd *)gbuf_rptr(mp); /* get new parameter block */ - pb->ioc = 0; - pb->mp = mp; - qAddToEnd((struct qlink **)&sp->rpb, (struct qlink *)pb); /* and queue it for later */ - } else { - pb->ioResult = errDSPQueueSize; - return ENOBUFS; - } - } - - if (sp->callSend) { - CheckSend(sp); /* If recv window opened, we might */ - /* send an unsolicited ACK. */ - } - return 0; -} - -/* - * dspReadAttention - * - * INPUTS: - * --> sp stream pointer - * --> pb user request parameter block - * - * OUTPUTS: - * <-- NONE - * - * ERRORS: - * errRefNum bad connection refnum - * errState connection is not in the right state - */ -int adspReadAttention(sp, pb) /* (DSPPBPtr pb) */ - register CCBPtr sp; - register struct adspcmd *pb; -{ - dPrintf(D_M_ADSP, D_L_TRACE, - ("adspReadAttention: sp=0x%x, pb=0x%x\n", (unsigned)sp, (unsigned)pb)); - if (sp == 0) { - pb->ioResult = errRefNum; - return EINVAL; - } - - /* - * It's OK to read on a closed, or closing session - */ - if (sp->state != sOpen && sp->state != sClosing && sp->state != sClosed) { - pb->ioResult = errState; - return EINVAL; - } - - CheckAttn(sp, pb); /* Anything in the attention queue */ - CheckReadQueue(sp); /* check to see if receive window has opened */ - if (sp->callSend) { - CheckSend(sp); /* If recv window opened, we might */ - /* send an unsolicited ACK. */ - } - return 0; -} /* adspReadAttention */ diff --git a/bsd/netat/adsp_RxAttn.c b/bsd/netat/adsp_RxAttn.c deleted file mode 100644 index d912492f3..000000000 --- a/bsd/netat/adsp_RxAttn.c +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * RxAttn.c - * - * From v01.12 06/12/90 mbs - * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -/* - * Used to search down queue of sessions for a session that matches - * sender and source connection ID -*/ -typedef struct -{ - AddrUnion addr; - word srcCID; -} MATCH_SENDER, *MATCH_SENDERPtr; - -/* - * MatchSender - * - */ -static boolean MatchSender(CCBPtr, MATCH_SENDERPtr); - -static boolean MatchSender(sp, m) /* (CCBPtr sp, MATCH_SENDERPtr m) */ - CCBPtr sp; - MATCH_SENDERPtr m; -{ - - if (sp->state != sOpen && sp->state != sClosing) - return 0; - - if (sp->remCID != m->srcCID) - return 0; - - if (sp->remoteAddress.a.node != m->addr.a.node) - return 0; - if (sp->remoteAddress.a.socket != m->addr.a.socket) - return 0; - if (sp->remoteAddress.a.net && m->addr.a.net && - (sp->remoteAddress.a.net != m->addr.a.net)) - return 0; - - return 1; -} - - -/* - * FindSender - * - * Given an ADSP Packet, find the stream it is associated with. - * - * This should only be used for ADSP Packets that could be received - * by an OPEN connection. - * - * INPUTS: - * Pointer to ADSP header & address of sender - * OUTPUTS: - * Pointer to stream if found, else 0 - */ -CCBPtr FindSender(f, a) /* (ADSP_FRAMEPtr f, AddrUnion a) */ - ADSP_FRAMEPtr f; - AddrUnion a; -{ - MATCH_SENDER m; - - m.addr = a; - m.srcCID = UAS_VALUE_NTOH(f->CID); - return (CCBPtr)qfind_m((CCB *)AT_ADSP_STREAMS, &m, (ProcPtr)MatchSender); -} - -/* - * RXAttention - * - * We just got an Attention Packet. - * See if it came from anybody we know. - * Then check to see if it is an attention data packet or acknowledgement - * - * Interrupts are masked OFF at this point. - * - * INPUTS: - * stream pointer - * Pointer to ADSP header, - * Length of header plus data - * OUTPUTS: - * Returns 1 if packet was ignored - */ -int RXAttention(sp, mp, f, len) /* (CCBPtr sp, ADSP_FRAMEPtr f, word len) */ - CCBPtr sp; - gbuf_t *mp; - ADSP_FRAMEPtr f; - int len; -{ - int offset; - struct adspcmd *pb; - long diff; - - if (UAS_VALUE(f->pktRecvWdw)) /* This field must be 0 in attn pkts */ - return 1; - - if ((f->descriptor == - (char)(ADSP_ATTENTION_BIT | ADSP_ACK_REQ_BIT)) && /* Attention Data */ - ((sp->userFlags & eAttention) == 0)) /* & he read the previous */ - { - diff = UAL_VALUE_NTOH(f->pktFirstByteSeq) - sp->attnRecvSeq; - if (diff > 0) /* Hey, he missed one */ - return 1; - - if (diff == 0) /* This is the one we expected */ - { - len -= ADSP_FRAME_LEN; /* remove adsp header */ - if (len < 2) /* Poorly formed attn packet */ - return 1; - sp->attnCode = (f->data[0] << 8) + f->data[1]; /* Save attn code */ - sp->attn_mb = mp; - offset = ((unsigned char *)&f->data[2]) - (unsigned char *)gbuf_rptr(mp); - gbuf_rinc(mp,offset); - sp->attnPtr = (unsigned char *)gbuf_rptr(mp); - mp = 0; /* mp has been queued don't free it */ - - /* Interrupts are off here, or otherwise we have to do - * these three operations automically. - */ - sp->attnSize = len - 2; /* Tell user how many bytes */ - ++sp->attnRecvSeq; - /* Set flag saying we got attn message */ - sp->userFlags |= eAttention; - UrgentUser(sp); /* Notify user */ - /* BEFORE sending acknowledge */ - } /* in sequence */ - - sp->sendAttnAck = 1; /* send attention ack for dupl. & - * expected data */ - sp->callSend = 1; - } /* Attn Data */ - - /* - * Interrupts are OFF here, otherwise we have to do this atomically - */ - /* Check to see if this acknowledges anything */ - if ((sp->attnSendSeq + 1) == UAL_VALUE_NTOH(f->pktNextRecvSeq)) { - sp->attnSendSeq++; - if ((pb = sp->sapb) == 0) { /* We never sent data ? !!! */ - if (mp) - gbuf_freem(mp); - return 0; - } - - sp->sapb = (struct adspcmd *)pb->qLink; /* Unlink from queue */ - - /* Remove timer */ - RemoveTimerElem(&adspGlobal.fastTimers, &sp->AttnTimer); - - pb->ioResult = 0; - if (gbuf_cont(pb->mp)) { - gbuf_freem(gbuf_cont(pb->mp)); /* free the data */ - gbuf_cont(pb->mp) = 0; - } - completepb(sp, pb); /* Done with the send attention */ - - if (sp->sapb) { /* Another send attention pending? */ - sp->sendAttnData = 1; - sp->callSend = 1; - } else { - if (sp->state == sClosing) /* this ack may allow us to close... */ - CheckOkToClose(sp); - } - } - if (mp) - gbuf_freem(mp); - return 0; -} diff --git a/bsd/netat/adsp_RxData.c b/bsd/netat/adsp_RxData.c deleted file mode 100644 index 267e8c313..000000000 --- a/bsd/netat/adsp_RxData.c +++ /dev/null @@ -1,392 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * RxData.c - * - * From v01.28 Handle an incoming Data Packet 06/21/90 mbs - */ -/* - * Change log: - * 06/29/95 - Modified to handle flow control for writing (Tuyen Nguyen) - * Modified for MP, 1996 by Tuyen Nguyen - * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -gbuf_t *releaseData(gbuf_t *, int); - -gbuf_t *releaseData(mp, len) - gbuf_t *mp; - int len; -{ - register gbuf_t *tmp; - register int cnt; - int freeit; - - dPrintf(D_M_ADSP, D_L_TRACE, - ("releaseData: mbuf=0x%x, len=%d\n", (unsigned)mp, len)); - - KERNEL_DEBUG(DBG_ADSP_RCV, 0, mp, len, 0, 0); - - do { - freeit = 1; /* assume we use the whole mblk */ - if ((cnt = gbuf_len(mp)) > len) { - freeit = 0; /* using only part of the mblk */ - cnt = len; - } - gbuf_rinc(mp,cnt); - len -= cnt; - tmp = mp; - mp = gbuf_cont(mp); - if (freeit) { - gbuf_freeb(tmp); - } else - return tmp; /* if we don't use the whole block */ - /* pass back the partial gbuf_t pointer */ - } while (len && mp); - return mp; -} - -/* - * CheckRecvSeq - * - * We just got a non-attention packet. Check the pktNextRecvSeq field - * to see if it acknowledges any of our sent data. - * - * If any data was acked, check to see if we have anything to fill the - * newly opened up remote receive window. Otherwise, if the ACK request - * bit was set, we need to send an Ack Packet - * - * Always called as the result of receiving a packet. Interrupts - * are completely masked when this routine is called. - * - * INPUTS: - * sp stream - * f pointer to ASDP header - * OUTPUTS: - * none - */ -void CheckRecvSeq(sp, f) /* (CCBPtr sp, ADSP_FRAMEPtr f) */ - register CCBPtr sp; - register ADSP_FRAMEPtr f; -{ - int pktNextRecvSeq; - int sendWdwSeq; - int eom; - int hlen; - register gbuf_t *mp; - - if (f->descriptor & ADSP_ACK_REQ_BIT) { /* He wants an Ack */ - sp->sendDataAck = 1; - sp->callSend = 1; - } - - pktNextRecvSeq = UAL_VALUE_NTOH(f->pktNextRecvSeq); /* Local copy */ - - /* - * Make sure the sequence number corresponds to reality -- i.e. for - * unacknowledged data that we have sent - */ - - if (GT(pktNextRecvSeq, sp->maxSendSeq)) /* We've never sent this seq #! */ - goto noack; - - if (GTE(pktNextRecvSeq, sp->timerSeq) && sp->waitingAck) { - /* This acks our Ack Request */ - sp->waitingAck = 0; /* Allow sending more */ - sp->pktSendCnt = 0; /* Reset packet count */ - /* Remove retry timer */ - RemoveTimerElem(&adspGlobal.fastTimers, &sp->RetryTimer); - - if (!sp->resentData) { /* Data sent without retries */ - short diff; /* Signed!! */ - /* All timings done in 6th second base */ - /* The contortions here are to prevent C from promoting - * everything to longs and then using a library routine - * to do the division. As 16-bit words, a DIVU instruction - * is used. - */ - - diff = (((word)(SysTicks() - sp->sendStamp)) / (word)10) - - sp->roundTrip + 1; - - sp->roundTrip += diff >> 3; /* Update average */ - - if (diff < 0) /* Take absolute value */ - diff = -diff; - sp->deviation += (diff - sp->deviation) >> 2; /* Update deviation*/ - - sp->rtmtInterval = sp->roundTrip + - ((short)2 * (short)sp->deviation); - - if (!sp->noXmitFlow && - sp->pktSendMax < 50) /* Bump # of sequential */ - sp->pktSendMax++; /* Packets we'll send */ - - sp->noXmitFlow = 0; - } - else - sp->resentData = 0; - - } /* Acked our data */ - - if (LTE(pktNextRecvSeq, - sp->firstRtmtSeq)) /* Was duplicate ack, so ignore */ - goto noack; - - if (!sp->sData) /* If nothing in send queue, ignore */ - goto noack; - - - do { /* This acks bytes in our buffer */ - if ((mp = sp->sbuf_mb)) { /* Get ptr to oldest data header */ - sp->sbuf_mb = gbuf_next(mp); /* unlink it from send queue */ - eom = 1; - } else { - mp = sp->csbuf_mb; - sp->csbuf_mb = 0; - eom = 0; - } - - if (mp == 0) { /* shouldn't happen! */ - sp->sData = 0; - goto noack; - } - /* - * Does this ack the entire data block we're now pointing at? - */ - if (LTE((sp->firstRtmtSeq + eom + (hlen = gbuf_msgsize(mp))), - pktNextRecvSeq)) { - - gbuf_freem(mp); - - /* Update seq # of oldest byte in bfr */ - sp->firstRtmtSeq += eom + hlen; - - if ((sp->sbuf_mb == 0) && (sp->csbuf_mb == 0)) { - /* If this was only block, then ... */ - sp->sData = 0; /* ... no data in queue */ - sp->writeFlush = 0; - if (sp->state == sClosing) /* this may allow us to close... */ - CheckOkToClose(sp); - atalk_enablew(sp->gref); - break; - } - } /* whole data block acked */ - else /* Only some of the data was acked */ - { - short acked; - - acked = (pktNextRecvSeq - sp->firstRtmtSeq); - mp = releaseData(mp, acked); - if (eom) { - if (mp) { - gbuf_next(mp) = sp->sbuf_mb; - sp->sbuf_mb = mp; - } - } else - sp->csbuf_mb = mp; - - sp->firstRtmtSeq = pktNextRecvSeq; /* Update seq # oldest byte */ - break; - } - } while (LT(sp->firstRtmtSeq, pktNextRecvSeq)); - - if (sp->sData) /* We've got stuff to send */ - sp->callSend = 1; - -noack: - sendWdwSeq = UAS_VALUE_NTOH(f->pktRecvWdw) - 1 + pktNextRecvSeq; - - if (GT(sendWdwSeq, sp->sendWdwSeq)) /* Don't make send window smaller */ - { - sp->callSend = 1; /* His recv wdw opened, so see */ - /* if we can send more data */ - sp->sendWdwSeq = sendWdwSeq; - } -} - -/* - * RXData - * - * We just got a Data Packet - * See if it came from anybody we know. - * - * Called from ADSP Packet with interrupts masked completely OFF - * *** In MacOSX interrupts do not seem to be off! *** - * - * INPUTS: - * Stream pointer - * gbuf_t pointer - * Pointer to ADSP header, (part of the mblk pointer to by mp) - * Length of header plus data - * OUTPUTS: - * Returns 1 if packet was ignored - */ -int RXData(sp, mp, f, len) /* (CCBPtr sp, ADSP_FRAMEPtr f, word len) */ - CCBPtr sp; - register gbuf_t *mp; - ADSP_FRAMEPtr f; - int len; -{ - int offset; - int PktFirstByteSeq; - short cnt; - char eom; - - len -= ADSP_FRAME_LEN; - - /* Does packet have eom bit set? */ - eom = (f->descriptor & ADSP_EOM_BIT) ? 1 : 0; - - dPrintf(D_M_ADSP, D_L_TRACE, - ("RXData: sp=0x%x, mbuf=0x%x, f=0x%x, len=%d, eom=%d\n", - (unsigned)sp, (unsigned)mp, (unsigned)f, len, eom)); - - KERNEL_DEBUG(DBG_ADSP_RCV, 1, sp, mp, len, eom); - - trace_mbufs(D_M_ADSP, " mp", mp); - - PktFirstByteSeq = UAL_VALUE_NTOH(f->pktFirstByteSeq); /* Local copy */ - - if (GT(PktFirstByteSeq, sp->recvSeq)) /* missed a packet (out of order) */ - { - if (sp->badSeqCnt++ > sp->badSeqCnt) /* Need to send rexmit advice */ - sp->sendCtl |= B_CTL_RETRANSMIT; - CheckRecvSeq(sp, f); /* Will set send ACK flag if requested */ - CheckReadQueue(sp); - gbuf_freem(mp); - - KERNEL_DEBUG(DBG_ADSP_RCV, 2, sp, 0, 0, 0); - trace_mbufs(D_M_ADSP, " exRXD m", sp->rbuf_mb); - dPrintf(D_M_ADSP, D_L_TRACE, (" End RXData - missed a packet\n")); - - return 0; - } - - if (LTE(PktFirstByteSeq + len + eom, sp->recvSeq)) { /* duplicate data? */ - CheckRecvSeq(sp, f); /* Will set send ACK flag if requested */ - CheckReadQueue(sp); - gbuf_freem(mp); - - KERNEL_DEBUG(DBG_ADSP_RCV, 3, sp, 0, 0, 0); - trace_mbufs(D_M_ADSP, " exRXD m", sp->rbuf_mb); - dPrintf(D_M_ADSP, D_L_TRACE, (" End RXData - duplicate data\n")); - - return 0; - } - - sp->badSeqCnt = 0; /* reset out of sequence pckt counter */ - - cnt = sp->recvSeq - PktFirstByteSeq; /* # bytes we've seen already */ - - offset = ((unsigned char *)&f->data[cnt]) - (unsigned char *)gbuf_rptr(mp); - gbuf_rinc(mp,offset); - /* point recv mblk to data (past headers) */ - - len -= cnt; /* # of new data bytes */ - - cnt = len; /* # bytes left to deal with */ - - if (!sp->rData) /* Recv bfr is empty */ - { - sp->rData = 1; /* Not empty any more */ - - if ((sp->rpb)->ioc == (caddr_t)mp) { - dPrintf(D_M_ADSP, D_L_TRACE, - ("RXData: (pb->ioc == mp) no stored data\n")); - KERNEL_DEBUG(DBG_ADSP_RCV, 4, sp, sp->rpb, 0, 0); - } - if (eom) - sp->rbuf_mb = mp; - else - sp->crbuf_mb = mp; - } /* Recv queue is empty */ - - /* - * Else, there's already stored data. - */ - else { - gbuf_t *rmp; - /* - * Is this a new "message?" - */ - if (eom) { - if (sp->crbuf_mb) { - gbuf_linkb(sp->crbuf_mb, mp); - mp = sp->crbuf_mb; - sp->crbuf_mb = 0; - } - if ((rmp = sp->rbuf_mb)) { - /* - * Add it to the end - */ - while(gbuf_next(rmp)) - rmp = gbuf_next(rmp); - gbuf_next(rmp) = mp; - } else - sp->rbuf_mb = mp; - } else if (sp->crbuf_mb) - gbuf_linkb(sp->crbuf_mb, mp); - else - sp->crbuf_mb = mp; - } - sp->recvSeq += (cnt + eom); /* We've got these bytes */ - - /* %%% We really should call check recv seq first, but let's - * continue to do it down here. We really want to service the - * received packet first, and maybe reenable scc ints before - * doing anything that might take a long while - */ - - CheckRecvSeq(sp, f); /* Will set send ACK flag if requested */ - CheckReadQueue(sp); - KERNEL_DEBUG(DBG_ADSP_RCV, 5, sp, sp->rbuf_mb, 0, 0); - trace_mbufs(D_M_ADSP, " eRXD m", sp->rbuf_mb); - return 0; -} /* RXData */ diff --git a/bsd/netat/adsp_Status.c b/bsd/netat/adsp_Status.c deleted file mode 100644 index 4643504b9..000000000 --- a/bsd/netat/adsp_Status.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * dspStatus.c - * - * From Mike Shoemaker v01.04 06/15/90 mbs - * Modified for MP, 1996 by Tuyen Nguyen - * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -int calcSendQ(CCBPtr); - -/* - * calcSendFree - * - * INPUTS: - * sp ADSP Stream - * OUTPUTS: - * # of bytes avail in local send queue - */ -int CalcSendQFree(sp) /* (CCBPtr sp) */ - CCBPtr sp; -{ - int bytes; - - bytes = calcSendQ(sp); - bytes = sp->sbuflen - bytes; - - if (bytes < 0) - return 0; - return bytes; -} - -int -calcSendQ(sp) - CCBPtr sp; -{ - register gbuf_t *mp; - int bytes = 0; - - if (sp->sData) { /* There is data in buffer */ - if ((mp = sp->sbuf_mb)) { - do { - bytes += gbuf_msgsize(mp); - mp = gbuf_next(mp); - } while (mp); - } - if ((mp = sp->csbuf_mb)) - bytes += gbuf_msgsize(mp); - } - return bytes; -} - -/* - * dspStatus - * - * INPUTS: - * --> ccbRefNum refnum of connection end - * - * OUTPUTS: - * <-- statusCCB Pointer to the connection control block - * <-- sendQPending bytes waiting to be sent or acknowledged - * <-- sendQFree available buffer in bytes of send queue - * <-- recvQPending bytes waiting to be read from queue - * <-- recvQFree available buffer in bytes of receive queue - * - * ERRORS: - * errRefNum bad connection refnum - * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * -*/ -int adspStatus(sp, pb) /* (DSPPBPtr pb) */ - CCBPtr sp; - register struct adspcmd *pb; -{ - short bytes; - - if (sp == 0) { - pb->ioResult = errRefNum; - return EINVAL; - } - - pb->u.statusParams.ccbPtr = (TPCCB)sp; - - /* - * pending bytes in send queue - */ - if (sp->sData) - bytes = calcSendQ(sp); - else - bytes = 0; - pb->u.statusParams.sendQPending = bytes; - - /* available buffer space in send queue */ - pb->u.statusParams.sendQFree = CalcSendQFree(sp); - - /* - * pending bytes in recv queue - */ - if (sp->rData) - bytes = calcRecvQ(sp); - else - bytes = 0; - pb->u.statusParams.recvQPending = bytes; - - /* available buffer space in receive queue */ - pb->u.statusParams.recvQFree = CalcRecvWdw(sp); - - pb->ioResult = 0; - adspioc_ack(0, (gbuf_t *)pb->ioc, pb->gref); - return 0; - -} diff --git a/bsd/netat/adsp_Timer.c b/bsd/netat/adsp_Timer.c deleted file mode 100644 index 455ab5e33..000000000 --- a/bsd/netat/adsp_Timer.c +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1990, 1996-1998 Apple Computer, Inc. - * All Rights Reserved. - */ - -/* - * Timer.c - * - * From v01.12 06/22/90 mbs - * Modified for MP, 1996 by Tuyen Nguyen - * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include - - -/* - * TrashSession - * - * Cleanly abort a session that might be open. Called if probe timer expires, - * or from AppleTalk event handler (close or network gone away) - * - * Only call if the session is active (I.e. not for closed or listeners) - * - * INPUTS: - * session pointer - * OUTPUTS: - * none - */ -void TrashSession(CCBPtr); - -void TrashSession(sp) /* (CCBPtr sp) */ - CCBPtr sp; -{ - - sp->userFlags |= eTearDown; - sp->removing = 1; - sp->state = sClosed; - - DoClose(sp, errAborted, 1); -} - - -/* - * DoTimerElem - * - * INPUTS: - * - * OUTPUTS: - * - */ -void DoTimerElem(TimerElemPtr); - -void DoTimerElem(t) /* (TimerElemPtr t) */ - TimerElemPtr t; -{ - CCBPtr sp; - - sp = (CCBPtr)((Ptr)t - t->type); /* Recover stream pointer for this guy */ - - if (t->type == kFlushTimerType) { /* flush write data time just fired */ - if (sp->sData) { /* If there's any data, flush it. */ - sp->writeFlush = 1; - goto send; - } - } else if (t->type == kRetryTimerType) { - if (sp->waitingAck) { - - sp->waitingAck = 0; - sp->sendSeq = sp->firstRtmtSeq; - sp->pktSendCnt = 0; - sp->resentData = 1; /* Had to resend data */ - sp->noXmitFlow = 1; /* Don't incr. max packets. */ - - if ((sp->pktSendMax /= 2) == 0) /* Back off on max # packets - * sent */ - sp->pktSendMax = 1; - - if ((sp->roundTrip *= 2) > sp->probeInterval) - sp->roundTrip = sp->probeInterval; - sp->rtmtInterval = sp->roundTrip + ((short)2 * - (short)sp->deviation); - goto send; - } - } else if (t->type == kAttnTimerType) { - if (sp->sapb) { /* Unacknowledged attn pkt */ - sp->sendAttnData = 1; - goto send; - } - } else if (t->type == kResetTimerType) { - if (sp->frpb) { /* Unacknowledged forward reset */ - sp->sendCtl |= B_CTL_FRESET; - goto send; - } - } else if (t->type == kProbeTimerType) { - if (sp->state == sOpen || sp->state == sClosing) { - if (--sp->probeCntr == 0) { /* Connection died */ - TrashSession(sp); - return; - } else { - InsertTimerElem(&adspGlobal.slowTimers, &sp->ProbeTimer, - sp->probeInterval); - sp->sendCtl |= B_CTL_PROBE; - goto send; - } - } else if (sp->state == sOpening) { - if ((sp->openState == O_STATE_OPENWAIT) || - (sp->openState == O_STATE_ESTABLISHED)) - { - if (--sp->openRetrys == 0) { /* Oops, didn't open */ - sp->state = sClosed; - DoClose(sp, errOpening, 1); - return; - } /* open failed */ - else /* Send packet again */ - { - sp->sendCtl |= (sp->openState == O_STATE_OPENWAIT) ? - B_CTL_OREQ : B_CTL_OREQACK; - goto send; - } - } /* we're opening */ - } - } - - else { - dPrintf(D_M_ADSP, D_L_ERROR, ("DoTimerElem:Unknown timer type!\n")); - } - - return; - -send: - CheckSend(sp); -} - -void TimerTick_funnel(void *arg); - -void TimerTick_funnel(__unused void *arg) -{ - atalk_lock(); - TimerTick(); - atalk_unlock(); -} - -static int StopTimer; - -/* - * TimerTick - * - * Called 6 times per second - * INPUTS: - * - * OUTPUTS: - * - */ -void TimerTick() /* (void) */ -{ - - if (StopTimer) { - return; - } - TimerQueueTick(&adspGlobal.slowTimers); - TimerQueueTick(&adspGlobal.fastTimers); - timeout(TimerTick_funnel, (caddr_t)0, HZ/6); -} - -void TimerStop() -{ - StopTimer = 1; - untimeout(TimerTick_funnel, (caddr_t) 0); -} diff --git a/bsd/netat/adsp_TimerElem.c b/bsd/netat/adsp_TimerElem.c deleted file mode 100644 index dd04ee244..000000000 --- a/bsd/netat/adsp_TimerElem.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * TimerElem.c - * - * From v01.00 04/15/90 mbs - * Modified for MP, 1996 by Tuyen Nguyen - * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. -*/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - - -extern void DoTimerElem(TimerElemPtr); /* (TimerElemPtr t); - * External routine called to - * process each one. */ - -/* - * InsertTimerElem - * - * INPUTS: - * qhead Address of ptr to first item in list - * t timer element to link in - * vbl timer value to use - * OUTPUTS: - * void - */ -void InsertTimerElem(qhead, t, val) - /* (TimerElemPtr *qhead, TimerElemPtr t, word val) */ - TimerElemPtr *qhead, t; - int val; -{ - TimerElemPtr p; /* parent pointer */ - TimerElemPtr n; /* current */ - - if (t->onQ) { - /* - * someone else beat us to the punch and put this - * element back on the queue, just return in this case - */ - return; - } - p = (TimerElemPtr)qhead; - - while ((n = p->link)) { - if (val <= n->timer) /* Do we go in front of this? */ - { - n->timer -= val; /* Yes, adjust his delta */ - break; /* and go link us in */ - } - val -= n->timer; /* No, subtract off delta from our value */ - p = n; - } /* while */ - - /* It must go after item pointed to by p and in front of item - * pointed to by n */ - - t->onQ = 1; /* we're linked in now */ - p->link = t; /* parent points to us */ - t->timer = val; /* this is our value */ - t->link = n; /* we point to n */ - -} - - -/* - * RemoveTimerElem - * - * INPUTS: - * qhead Address of ptr to first item in list - * t timer element to link in - * OUTPUTS: - * void - */ -void RemoveTimerElem(qhead, t) /* (TimerElemPtr *qhead, TimerElemPtr t) */ - TimerElemPtr *qhead, t; -{ - TimerElemPtr p; /* parent pointer */ - TimerElemPtr n; /* current */ - - if ( !t->onQ) { - /* - * someone else beat us to the punch and took this - * element off of the queue, just return in this case - */ - return; - } - p = (TimerElemPtr)qhead; - - while ((n = p->link)) /* Get next item in queue */ - { - if (n == t) /* Is it us? */ - { - if ((p->link = n->link)) /* Link our parent to our child */ - { - n->link->timer += t->timer; /* and update child's timer */ - } - n->onQ = 0; /* Not on linked list anymore */ - break; - } - p = n; - } /* while */ - -} - - -/* - * TimerQueueTick - * - * INPUTS: - * qhead Address of ptr to first item in list - * - * OUTPUTS: - * void - */ -void TimerQueueTick(qhead) /* (TimerElemPtr *qhead) */ - TimerElemPtr *qhead; -{ - TimerElemPtr p; /* parent pointer */ - TimerElemPtr n; /* current */ - - p = (TimerElemPtr)qhead; - if (p->link) { /* Is anything on queue? */ - p->link->timer--; /* Yes, decrement by a tick */ - while ((n = p->link) && - (n->timer == 0)) /* Next guy needs to be serviced */ - { - p->link = n->link; /* Unlink us */ - n->onQ = 0; - - DoTimerElem(n); - - p = (TimerElemPtr)qhead; - } /* while */ - } -} diff --git a/bsd/netat/adsp_Write.c b/bsd/netat/adsp_Write.c deleted file mode 100644 index acc5ad4e9..000000000 --- a/bsd/netat/adsp_Write.c +++ /dev/null @@ -1,242 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* dspWrite.c - * From Mike Shoemaker v01.13 06/21/90 mbs for MacOS - */ -/* - * Change log: - * 06/29/95 - Modified to handle flow control for writing (Tuyen Nguyen) - * 09/07/95 - Modified for performance (Tuyen Nguyen) - * Modified for MP, 1996 by Tuyen Nguyen - * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - - -int FillSendQueue(CCBPtr, struct adspcmd *); - -/* - * FillSendQueue - * - * INPUTS: - * sp stream - * OUTPUTS: - * none - */ -int FillSendQueue( /* (CCBPtr sp) */ - register CCBPtr sp, - register struct adspcmd *pb) /* The write PB we're playing with */ -{ - gbuf_t *mb, *nmb; - int eom; /* True if should set eom in header */ - int cnt; /* # of bytes in this write */ - int err = 0; - - cnt = pb->u.ioParams.reqCount - pb->u.ioParams.actCount; - eom = pb->u.ioParams.eom ? F_EOM : 0; - - if (cnt == 0 && eom == 0) /* Nothing to do here, complete it */ - goto unlink; - - /* The 1st mbuf in the pb->mp chain (mb) is the adspcmd structure. - The 2nd mbuf (nmb) will be the beginning of the data. */ - mb = pb->mp; - nmb = gbuf_cont(mb); - if (gbuf_len(mb) > sizeof(struct adspcmd)) { - if ((nmb = gbuf_dupb(mb)) == 0) { - gbuf_wset(mb,sizeof(struct adspcmd)); - err = errDSPQueueSize; - goto unlink; - } - gbuf_wset(mb,sizeof(struct adspcmd)); - gbuf_rinc(nmb,sizeof(struct adspcmd)); - gbuf_cont(nmb) = gbuf_cont(mb); - } else if (nmb == 0) { - if ((nmb = gbuf_alloc(1, PRI_LO)) == 0) { - err = errENOBUFS; - goto unlink; - } - } - gbuf_cont(mb) = 0; - - sp->sData = 1; /* note that there is data to send */ - if ((mb = sp->csbuf_mb)) { /* add to the current message */ - gbuf_linkb(mb, nmb); - } else - sp->csbuf_mb = nmb; /* mark the buffer we are currently filling */ - if (eom) { - if ((mb = sp->sbuf_mb)) { - while (gbuf_next(mb)) - mb = gbuf_next(mb); - gbuf_next(mb) = sp->csbuf_mb; /* add the current item */ - } else - sp->sbuf_mb = sp->csbuf_mb; - sp->csbuf_mb = 0; /* if its done, no current buffer */ - } - pb->u.ioParams.actCount += cnt; /* Update count field in param blk */ - - if (pb->u.ioParams.actCount == pb->u.ioParams.reqCount) { - /* Write is complete */ -unlink: - if (pb->u.ioParams.flush) /* flush the send Q? */ - sp->writeFlush = 1; - - pb->ioResult = err; - if (err) - atalk_notify(sp->gref, EIO); - gbuf_freem(pb->mp); - } - - return 0; -} /* FillSendQueue */ - -/* - * dspWrite - * - * INPUTS: - * --> ccbRefNum refnum of connection end - * --> reqCount requested number of bytes to write - * --> dataPtr pointer to buffer for reading bytes into - * --> eom one if end-of-message, zero otherwise - * - * OUTPUTS: - * <-- actCount actual number of bytes written - * - * ERRORS: - * errRefNum bad connection refnum - * errState connection is not open - * errAborted request aborted by Remove or Close call - */ -int adspWrite(sp, pb) /* (DSPPBPtr pb) */ - CCBPtr sp; - struct adspcmd *pb; -{ - - if (sp == 0) { - pb->ioResult = errRefNum; - return EINVAL; /* no stream, so drop the message */ - } - - if (sp->state != sOpen) { /* Not allowed */ - pb->ioResult = errState; - atalk_notify(sp->gref, ENOTCONN); - gbuf_freem(pb->mp); - return 0; - } - - pb->u.ioParams.actCount = 0; /* Set # of bytes so far to zero */ - - FillSendQueue(sp, pb); /* Copy from write param block to send queue */ - - CheckSend(sp); /* See if we should send anything */ - return 0; -} - -#ifdef notdef -int adsp_check = 1; - -CheckQueue(sp) - CCBPtr sp; -{ - register gbuf_t *mp, *tmp; - unsigned char current; - int current_valid = 0; - - if (adsp_check == 0) - return; - if (mp = sp->sbuf_mb) { - current = *mp->b_rptr; - current_valid = 1; - while (mp) { - tmp = mp; - while (tmp) { - current = CheckData(tmp->b_rptr, tmp->b_wptr - tmp->b_rptr, - current); - tmp = tmp->b_cont; - } - mp = mp->b_next; - } - } - if (mp = sp->csbuf_mb) { - if (current_valid == 0) - current = *mp->b_rptr; - tmp = mp; - while (tmp) { - current = CheckData(tmp->b_rptr, tmp->b_wptr - tmp->b_rptr, - current); - tmp = tmp->b_cont; - } - } -} - - -int adsp_bad_block_count; -char *adsp_bad_block; - -CheckData(block, size, current) - char *block; - int size; - u_char current; -{ - register int anError = 0; - register int i; - - for (i = 0; i < size; i++) { - if ((block[i] & 0xff) != (current & 0xff)) { - if (!anError) { - adsp_bad_block = block; - } - anError++; - } - current++; - } - - if (anError) { - adsp_bad_block_count++; - } - return current; -} -#endif diff --git a/bsd/netat/adsp_attention.c b/bsd/netat/adsp_attention.c deleted file mode 100644 index 095780c1e..000000000 --- a/bsd/netat/adsp_attention.c +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * dspAttention.c - * - * From Mike Shoemaker v01.05 03/16/90 mbs - */ -/* - * Change log: - * 06/29/95 - Modified to handle flow control for writing (Tuyen Nguyen) - * Modified for MP, 1996 by Tuyen Nguyen - * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -/* - * dspAttention - * - * INPUTS: - * --> ccbRefNum refnum of connection end - * --> attnCode client attention code - * --> attnSize size in bytes of attention data - * --> attnData pointer to attention data - * --> attnInterval attention retransmit interval - * (ignored by ADSP 1.5 & up) - * - * OUTPUTS: - * none - * - * ERRORS: - * errRefNum bad connection refnum - * errState connection is not open - * errAttention attention message too long - * errAborted request aborted by Remove or Close call - */ -int adspAttention(register struct adspcmd *pb, register CCBPtr sp) -{ - register gbuf_t *mp, *nmp; - unsigned char uerr; - - if (sp == 0) { - pb->ioResult = errRefNum; - return EINVAL; - } - - if (sp->state != sOpen) { /* If we're not open, tell user to go away */ - pb->ioResult = errState; - uerr = ENOTCONN; -l_err: - atalk_notify(sp->gref, uerr); - gbuf_freem(pb->mp); - return 0; - } - - if (pb->u.attnParams.attnSize > attnBufSize) /* If data too big, bye-bye */ - { - pb->ioResult = errAttention; - uerr = ERANGE; - goto l_err; - } - - /* The 1st mbuf in the pb->mp chain (mp) is the adspcmd structure. - The 2nd mbuf (nmp) will be the beginning of the data. */ - mp = pb->mp; - if (pb->u.attnParams.attnSize) { - nmp = gbuf_cont(mp); - if (gbuf_len(mp) > sizeof(struct adspcmd)) { - if ((nmp = gbuf_dupb(mp)) == 0) { - gbuf_wset(mp, sizeof(struct adspcmd)); - uerr = ENOBUFS; - goto l_err; - } - gbuf_wset(mp, sizeof(struct adspcmd)); - gbuf_rinc(nmp, sizeof(struct adspcmd)); - gbuf_cont(nmp) = gbuf_cont(mp); - gbuf_cont(mp) = nmp; - } - } - pb->ioDirection = 1; /* outgoing attention data */ - if (sp->sapb) { /* Pending attentions already? */ - qAddToEnd((struct qlink **)&sp->sapb, (struct qlink *)pb); /* Just add to end of queue */ - } else { - sp->sendAttnData = 1; /* Start off this attention */ - pb->qLink = 0; - sp->sapb = pb; - CheckSend(sp); - } - pb->ioResult = 1; /* indicate that the IO is not complete */ - return 0; -} diff --git a/bsd/netat/adsp_internal.h b/bsd/netat/adsp_internal.h deleted file mode 100644 index a26ff49af..000000000 --- a/bsd/netat/adsp_internal.h +++ /dev/null @@ -1,411 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#ifndef _NETAT_ADSP_INTERNAL_H_ -#define _NETAT_ADSP_INTERNAL_H_ - -#include - -#ifdef __APPLE_API_OBSOLETE -#ifdef KERNEL_PRIVATE - -/* from h/adsp_portab.h */ - -/* TypeDefs for the basic data bytes. */ - -typedef unsigned char byte, *bytePtr; - -#ifdef NOT_USED -typedef char int8; -typedef short int16; -typedef int int32; -#endif - -typedef unsigned char boolean; - -typedef unsigned short word; - -typedef unsigned int dword; - -#define BYTE_AT(x) (*((byte PTR)(x))) -#define WORD_AT(x) (*((word PTR)(x))) -#define DWORD_AT(x) (*((dword PTR)(x))) - -#define high(x) ((byte)((x) >> 8)) -#define low(x) ((byte)(x)) -#define hlword(h, l) (((byte)(l)) | (((byte)(h)) << 8)) - - -/* - * On a Mac, there is no need to byte-swap data on the network, so - * these macros do nothing - */ - -#define netw(x) x -#define netdw(x) x - -typedef struct -{ - at_net network; /* network number */ - byte nodeid; /* node number */ - byte socket; /* socket number */ -} AddrBlk, *AddrBlkPtr; - -typedef union -{ - at_inet_t a; -} AddrUnion, *AddrUnionPtr; - -/* End Portab.h */ - -/* from h/adsp_internal.h */ - -#undef T_IDLE - -/* -* Default Behavior for ADSP -*/ -#define ocIntervalDefault 6 -#define ocMaximumDefault 10 -#define probeIntervalDefault 180 - -/* -* MACROS for comparing 32-bit sequence numbers -*/ -#define GT(x,y) (((long)(x-y)) > (long) 0) -#define LT(x,y) (((long)(x-y)) < (long) 0) -#define GTE(x,y) (((long)(x-y)) >= (long) 0) -#define LTE(x,y) (((long)(x-y)) <= (long) 0) -#define BETWEEN(x,y,z) (LTE(x,y) && LTE(y,z)) - -/* - * Use the kernel tick counter for SysTicks. - */ - -#define SysTicks() lbolt - -/* - * Timer element used for handling timings - */ -typedef struct timerelem { - struct timerelem *link; - short timer; - char type; - unsigned onQ:1; /* Bit-fields are faster than booleans */ -} TimerElem; - -typedef TimerElem *TimerElemPtr; - -/* - * For AppleTalk Phase 2 event queue - */ -typedef struct { - Ptr qLink; - unsigned short qType; - ProcPtr callAddr; -} LAPEventElem; - -typedef LAPEventElem *LAPEventElemPtr; - -/* - * The Event types we're passed when an AppleTalk transition occurs - */ -#define AOpenTransition 0 -#define ACloseTransition 2 -#define ANetworkTransition 5 - -/* - * The element we're passed when a NetworkTransaction event occurs - */ -typedef struct TNetworkTransition { - Ptr private; /* pointer used internally by NetShare */ - ProcPtr netValidProc; /* pointer to the network valid procedure */ -} TNetworkTransition, *TPNetworkTransition; - -typedef long (*NetworkTransitionProcPtr)(TPNetworkTransition nettrans, - unsigned long thenet); -/* - * This is the connection control block - */ -typedef struct ccb { - /*---These fields may not change order or size-----------*/ - - struct ccb *ccbLink; /* link to next ccb */ - unsigned short state; /* state of the connection end */ - unsigned char userFlags; /* flags for unsolicited connection events */ - unsigned char localSocket; /* socket number of this connection end */ - AddrUnion remoteAddress; /* internet address of remote end */ - unsigned short attnCode; /* attention code received */ - unsigned short attnSize; /* size of received attention data */ - unsigned char *attnPtr; /* ptr to received attention data */ - unsigned short recvQPending; /* # bytes in receive queue %%% */ - /*------------------------------------------------------ */ - - struct adspcmd *opb; /* Outstanding open/close/remove/listens */ - struct adspcmd *spb; /* Outstanding Sends */ - struct adspcmd *sapb; /* Outstanding Send Attentions */ - struct adspcmd *frpb; /* Outstanding Forward Resets */ - struct adspcmd *rpb; /* Outstanding Read Requests */ - - struct ccb *otccbLink; /* link to next ccb */ - int pid; /* Process ID for CCB owner */ - - unsigned short remCID; /* Remote Connection ID */ - unsigned short locCID; /* Local Connection ID */ - int sendSeq; /* Seq number of next char to send to remote */ - int firstRtmtSeq; /* oldest seq # in local send queue */ - int sendWdwSeq; /* Seq # of last char remote has bfr for */ - int recvSeq; /* Seq of # of next char expected from rmte */ - int recvWdw; /* # of bytes local end has buffer space for */ - int attnSendSeq; /* Seq # of next attn pkt to send to remote */ - int attnRecvSeq; /* Seq # of next packet local end expects */ - int maxSendSeq; /* Highest seq # we ever sent on connection */ - - /* These must be in the first 255 bytes of the CCB */ - TimerElem ProbeTimer; /* Timer element for probes (and open) */ - TimerElem FlushTimer; /* Timer element for flushing data */ - TimerElem RetryTimer; /* Timer element for retransmissions */ - TimerElem AttnTimer; /* Timer element for attention packets */ - TimerElem ResetTimer; /* Timer element for forward resets */ - - short openInterval; /* Interval between open connection packets */ - short probeInterval; /* Interval between probes */ - short sendInterval; /* Interval before automatic flush */ - short rtmtInterval; /* Rexmit interval (dynamically determined) */ - - short sendCtl; /* Send control message bits */ - short sendBlocking; /* Flush unsent data if > than sendBlocking */ - short openRetrys; /* # of retrys for Connect & Accept */ - short rbuflen; /* Total size of receive buffer */ - short sbuflen; /* Total size of receive buffer */ - char pad; - char lockFlag; - char badSeqMax; /* retransmit advice send threshold */ - char badSeqCnt; /* # of of out-of-order packets received */ - char useCheckSum; /* true to use DDP checksums */ - char openState; /* Used for opening a connection (see below) */ - - gbuf_t *rbuf_mb; /* message block for the recv buffer */ - gbuf_t *crbuf_mb; - gbuf_t *sbuf_mb; /* message block for the send buffer */ - gbuf_t *csbuf_mb; - gbuf_t *attn_mb; /* message block for the attention buffer */ - gbuf_t *deferred_mb; /* message block deferred for later processing */ - -#ifdef NOT_USED - char ioDone; /* flag for when the adsp header is busy */ -#endif - char probeCntr; /* # of probes we can miss (counts down) */ - char pktSendMax; /* Max # of packets to send without an ack */ - char pktSendCnt; /* # of packets sent so far */ - - int sendStamp; /* Time of last ackRequest */ - int timerSeq; /* Seq # of char corresponding to above time stamp */ - short roundTrip; /* Average Round-Trip time (in 6ths of a second) */ - short deviation; /* deviation from roundTrip time */ - - unsigned sData:1; /* There's data in the send queue */ - unsigned waitingAck:1; /* We're waiting for an ack packet */ - unsigned rData:1; /* There's data in the receive queue */ - unsigned resentData:1; /* True when we resend data due to timeout */ - unsigned sendDataAck:1; /* True if he requested an ack */ - unsigned sendAttnAck:1; /* Must send attn acknowlege */ - unsigned sendAttnData:1; /* Must send attn data */ - unsigned callSend:1; /* Must call CheckSend() */ - unsigned rbufFull:1; /* We've closed our receive window. */ - unsigned noXmitFlow:1; /* True stops incrementing # of xmit - * packets to send in a row after receiving - * an ack packet. */ - unsigned secureCCB:1; /* True if this is a secure connection */ - unsigned removing:1; /* There is a dspRemove pending */ - unsigned writeFlush:1; /* Flush send queue even if # bytes to - * send is less than send blocking. */ - unsigned delay:1; /* do not complete commands until user - * *** NO LONGER USED IN KERNEL *** */ - ADSP_FRAME f; /* Used to send every packet */ - ADSP_OPEN_DATA of; /* Holds the data for the open exchange */ - gref_t *gref; /* The queue associated with the CCB */ - gbuf_t *sp_mp; -} CCB, *CCBPtr; - - -/* - * Change order and die !!! --- See the receive open packet code - */ -#define O_STATE_NOTHING 0 /* Not opening */ -#define O_STATE_LISTEN 1 /* Listening for open request */ -#define O_STATE_OPENWAIT 2 /* Sent Req, waiting for Ack to open - * request */ -#define O_STATE_ESTABLISHED 3 /* Got Req, send Req+Ack,waiting Ack */ -#define O_STATE_OPEN 4 /* Connection is open */ - -/* -* These bits are used in the sendCtl field to indicate what needs to be sent -*/ -#define B_CTL_PROBE 0x0001 -#define B_CTL_OREQ 0x0002 -#define B_CTL_OACK 0x0004 -#define B_CTL_OREQACK 0x0008 -#define B_CTL_ODENY 0x0010 -#define B_CTL_CLOSE 0x0020 -#define B_CTL_FRESET 0x0040 -#define B_CTL_FRESETACK 0x0080 -#define B_CTL_RETRANSMIT 0x0100 - - -#define kProbeTimerType offsetof(CCB, ProbeTimer) -#define kFlushTimerType offsetof(CCB, FlushTimer) -#define kRetryTimerType offsetof(CCB, RetryTimer) -#define kAttnTimerType offsetof(CCB, AttnTimer) -#define kResetTimerType offsetof(CCB, ResetTimer) - -/* - * Used to manage the send receive queue - */ -typedef struct { - short len; /* # of bytes in this fragment */ - char flags; /* See #define's below */ - char data[1]; -} HDR, *HDRPtr; - -#define HDR_LEN 3 /* Yes, I know it really is 4 bytes long... */ - -#define F_GAP 0x03 -#define F_EOM 0x04 -#define F_WRAP 0x08 -#define F_VALID 0x10 -#define F_ENCRYPTED 0x20 /* %%% Needed ??? */ -#define F_LAST 0x40 /* This is last block in buffer */ - - -/* %%% Are these two used anymore? */ -#define sbufPtr(y) (&sp->sbuf[((y) < sp->sbuflen) ? (y) : ((y) - sp->sbuflen)]) -#define rbufPtr(y) (&sp->rbuf[((y) < sp->rbuflen) ? (y) : ((y) - sp->rbuflen)]) - -/* End Internal.h */ - -/* fron h/adsp_supp.h */ - -void CallUserRoutine(CCBPtr sp); /* (CCB FPTR sp); */ - - -/* - * Add queue element to end of queue. Pass Address of ptr to - * 1st element of queue -int qAddToEnd(struct qlink **qhead, struct qlink *qelem); - */ - /* (void FPTR FPTR qhead, void FPTR qelem); */ - -/* - * Hunt down a linked list of queue elements looking for an element with - * 'data' at 'offset' bytes into the queue element. - */ -void *qfind_b(void *qhead, word offset, word data); -void *qfind_w(void *qhead, word offset, word data); -void *qfind_p(void *qhead, word offset, void *ptr); -void *qfind_o(void *qhead, word offset, void *ptr); -void *qfind_m(CCBPtr qhead, void *match, ProcPtr compare_fnx); - - -/* - * Routines to handle sorted timer queues - */ -void InsertTimerElem(TimerElemPtr *qhead, TimerElemPtr t, int val); -void RemoveTimerElem(TimerElemPtr *qhead, TimerElemPtr t); -void TimerQueueTick(TimerElemPtr *qhead); - -/* from h/adsp_global.h */ - -typedef struct { - void *ccbList; /* Ptr to list of connection control blocks */ - - TimerElemPtr slowTimers; /* The probe timer list */ - TimerElemPtr fastTimers; /* The fast timer list */ - - unsigned short lastCID; /* Last connection ID assigned */ - char inTimer; /* We're inside timer routine */ -} GLOBAL; - -extern GLOBAL adspGlobal; - -/* Address of ptr to list of ccb's */ -#define AT_ADSP_STREAMS ((CCB **)&(adspGlobal.ccbList)) - -void CheckSend(CCBPtr); - -struct qlink { - struct qlink *qlinkp; -}; - -int qAddToEnd(struct qlink **, struct qlink *); - -void adspioc_ack(int, gbuf_t *, gref_t *); -int CalcRecvWdw(CCBPtr); -int calcRecvQ(CCBPtr); -int CalcSendQFree(CCBPtr); -int adsp_sendddp(CCBPtr, gbuf_t *, int, AddrUnion *, int); -int CheckReadQueue(CCBPtr); -int CheckOkToClose(CCBPtr); - -int RXData(CCBPtr, gbuf_t *, ADSP_FRAMEPtr, int); -int RXFResetAck(CCBPtr, ADSP_FRAMEPtr); -int RxClose(CCBPtr); -void CheckRecvSeq(CCBPtr, ADSP_FRAMEPtr); -int RXFReset(CCBPtr, ADSP_FRAMEPtr); -int RXAttention(CCBPtr, gbuf_t *, ADSP_FRAMEPtr, int); -CCBPtr FindSender(ADSP_FRAMEPtr, AddrUnion); -void DoClose(CCBPtr, int, int); -void completepb(CCBPtr, struct adspcmd *); -int adspReadAttention(CCBPtr, struct adspcmd *); -int adspMode(struct adspcmd *); -int CompleteQueue(struct adspcmd **, int); - -void CleanupGlobals(void); -void InitGlobals(void); -void TimerStop(void); -void TimerTick(void); - -void SndMsgUp(gref_t *, gbuf_t *); -int adspDeassignSocket(CCBPtr); -unsigned char adspAssignSocket(gref_t *gref, int); -int adspWriteHandler(gref_t *, gbuf_t *); -int adspReadHandler(gref_t *, gbuf_t *); - -int adsp_wput(gref_t *gref, gbuf_t *m); -int adspRelease(gref_t *); -int adsp_close(gref_t *); -int adspAllocateCCB(gref_t *); - -void NotifyUser(CCBPtr); -void UrgentUser(CCBPtr); - -unsigned short NextCID(void); - -#endif /* KERNEL_PRIVATE */ -#endif /* __APPLE_API_OBSOLETE */ - -#endif /* _NETAT_ADSP_INTERNAL_H_ */ diff --git a/bsd/netat/adsp_misc.c b/bsd/netat/adsp_misc.c deleted file mode 100644 index f546b3160..000000000 --- a/bsd/netat/adsp_misc.c +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -/* - * These function replace the Mk68 assembly routines found in qAddToEnd.s and - * q????.s - * Modified for MP, 1996 by Tuyen Nguyen - * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. - */ - - -/* ---------------------------------------------------------------------- - * void qAddToEnd(void *qhead, void *qelem) - * - * INPUTS: - * Ptr to ptr to 1st item in queue - * Ptr to item to add to end of queue - * OUTPUTS: - * none - * - * Assumptions: The link field is the FIRST field of the qelem structure. - * ---------------------------------------------------------------------- - */ -int qAddToEnd(qhead, qelem) - struct qlink **qhead; - struct qlink *qelem; -{ - /* define our own type to access the next field. NOTE THAT THE "NEXT" - * FIELD IS ASSUMED TO BE THE FIRST FIELD OF THE STRUCTURE - */ - - register struct qlink *q; - - /* Scan the linked list to the end and update the previous - * element next field. (do that protocted). - */ - - q = *qhead; - if (q) { - while (q->qlinkp) { - /* are we about to link to ourself */ - if (q == qelem) - goto breakit; - q = q->qlinkp; - } - q->qlinkp = qelem; - } - else { - *qhead = qelem; - } - qelem->qlinkp = (struct qlink *) 0; -breakit: -#ifdef NOTDEF - DPRINTF("%s: qhead=%x added elem=%x\n","qAddToEnd", qhead, qelem); -#endif - return 0; -} - - - -/* ---------------------------------------------------------------------- - * qfind_m - * void* qfind_m(void *qhead, void NPTR match, ProcPtr compare_fnx) - * - * Hunt down a linked list of queue elements calling the compare - * function on each item. When the compare function returns true, - * return ptr to the queue element. - * - * - * INPUTS: - * qhead Address of ptr to first item in queue - * match - * compare_fnx - * OUTPUTS: - * D0 & A0 Ptr to queue element or NIL - * REGISTERS: - * D0,D1,A0,A1 - * ---------------------------------------------------------------------- - */ -void* qfind_m(qhead, match, compare_fnx) - CCBPtr qhead; - void *match; - ProcPtr compare_fnx; -{ - CCBPtr queue_item = qhead; - - while (queue_item) { - if ((*compare_fnx)(queue_item,match)) - break; - - queue_item = queue_item->ccbLink; - } - - return (queue_item); -} diff --git a/bsd/netat/adsp_reset.c b/bsd/netat/adsp_reset.c deleted file mode 100644 index f52f42ab5..000000000 --- a/bsd/netat/adsp_reset.c +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Reset.c - * - * From v01.15 07/11/90 mbs - */ -/* - * Change log: - * 06/29/95 - Modified to handle flow control for writing (Tuyen Nguyen) - * Modified for MP, 1996 by Tuyen Nguyen - * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -/* - * RXFReset - * - * We just got a Forward Reset Packet. - * - * Called with interrupts OFF - * - * INPUTS: - * stream pointer - * Pointer to ADSP header, - * OUTPUTS: - * Returns 1 if packet was ignored - */ -int RXFReset(sp, f) /* (CCBPtr sp, ADSP_FRAMEPtr f) */ - CCBPtr sp; - ADSP_FRAMEPtr f; -{ - unsigned int pktFirstByteSeq; - unsigned int hi; - register gbuf_t *mp; - register struct adspcmd *pb; - - pktFirstByteSeq = UAL_VALUE_NTOH(f->pktFirstByteSeq); - - hi = sp->recvSeq + CalcRecvWdw(sp); - - /* - * Must do this with interrupts OFF - */ - if (BETWEEN(sp->recvSeq, pktFirstByteSeq, hi)) /* Is this acceptable? */ - { - sp->recvSeq = pktFirstByteSeq; - while ((mp = sp->rbuf_mb)) { /* clear the receive queue */ - sp->rbuf_mb = gbuf_next(mp); - gbuf_freem(mp); - } - if (sp->crbuf_mb) { - gbuf_freem(sp->crbuf_mb); - sp->crbuf_mb = 0; - } - sp->rData = 0; - sp->rbufFull = 0; - sp->userFlags |= eFwdReset; /* Set forward reset received Flag */ - - mp = gbuf_alloc(sizeof(struct adspcmd), PRI_HI); - pb = (struct adspcmd *)gbuf_rptr(mp); - gbuf_winc(mp,sizeof(struct adspcmd)); - pb->ioc = 0; - pb->mp = mp; - - pb->csCode = dspReset; - pb->ioResult = 0; - completepb(sp, pb); - sp->userFlags &= ~eFwdReset; - } - - if (LTE(pktFirstByteSeq, hi)) { - sp->sendCtl |= B_CTL_FRESETACK; /* Ack it if it's OK, or a duplicate */ - sp->callSend = 1; - } - - return 0; -} - - -/* - * RXFResetAck - * - * We just got a Forward Reset Acknowledgement packet - * - * Called with interrupts OFF - * - * INPUTS: - * stream pointer - * Pointer to ADSP header, - * OUTPUTS: - * Returns 1 if packet was ignored - */ -int RXFResetAck(sp, f) /* (CCBPtr sp, ADSP_FRAMEPtr f) */ - CCBPtr sp; - ADSP_FRAMEPtr f; -{ - unsigned int PktNextRecvSeq; - - if (sp->frpb == 0) /* Not expecting frwd reset Ack packet */ - return 1; - - PktNextRecvSeq = UAL_VALUE_NTOH(f->pktNextRecvSeq); - - if (BETWEEN(sp->sendSeq, PktNextRecvSeq, sp->sendWdwSeq+1)) { - struct adspcmd *pb; - - RemoveTimerElem(&adspGlobal.fastTimers, &sp->ResetTimer); - /* Remove timer */ - - /* - * Interrupts are OFF here while we muck with the linked list - */ - pb = sp->frpb; /* Unlink copy of user's parameter block */ - sp->frpb = (struct adspcmd *)pb->qLink; - - pb->ioResult = 0; - completepb(sp, pb); /* complete(pb, 0); */ - - if (sp->state == sClosing) /* this ack may allow us to close... */ - CheckOkToClose(sp); - - if (sp->frpb) /* Another to send? */ - { - sp->callSend = 1; - sp->sendCtl |= B_CTL_FRESET; - } - } - - return 0; -} - - -/* - * dspReset - * - * INPUTS: - * --> ccbRefNum refnum of connection end - * - * OUTPUTS: - * none - * - * ERRORS: - * errRefNum bad connection refnum - * errState connection is not open - * errAborted request aborted by Remove or Close call - */ -int adspReset(sp, pb) /* (DSPPBPtr pb) */ - CCBPtr sp; - struct adspcmd *pb; -{ - register gbuf_t *mp; - register struct adspcmd *rpb; - - if (sp == 0) { - pb->ioResult = errRefNum; - return EINVAL; - } - - if (sp->state != sOpen) { - pb->ioResult = errState; - return EINVAL; - } - - - while ((mp = sp->sbuf_mb)) { /* clear the send queue */ - sp->sbuf_mb = gbuf_next(mp); - gbuf_freem(mp); - } - if (sp->csbuf_mb) { - gbuf_freem(sp->csbuf_mb); - sp->csbuf_mb = 0; - } - sp->sData = 0; - sp->writeFlush = 0; - sp->sendCtl |= B_CTL_FRESET; - - sp->firstRtmtSeq = sp->sendSeq; /* Reset sequence #'s */ - if ((mp = gbuf_copym(pb->mp))) { /* copy the parameter block */ - adspioc_ack(0, (gbuf_t *)pb->ioc, pb->gref); /* release user */ - rpb = (struct adspcmd *)gbuf_rptr(mp); - rpb->ioc = 0; /* unlink copy */ - rpb->mp = mp; - - qAddToEnd((struct qlink **)&sp->frpb, (struct qlink *)rpb); - /* Hold on to pb (will be completed when */ - /* forward reset ack is received). */ - } else { /* assume it will work... but keep no - * bookkeeping for it. yetch! */ - adspioc_ack(0, (gbuf_t *)pb->ioc, pb->gref); - } - - CheckSend(sp); - return STR_IGNORE; - -} diff --git a/bsd/netat/adsp_stream.c b/bsd/netat/adsp_stream.c deleted file mode 100644 index 26ad46569..000000000 --- a/bsd/netat/adsp_stream.c +++ /dev/null @@ -1,624 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1995-1998 Apple Computer, Inc. - * All Rights Reserved. - */ - -/* - * 09/07/95 - Modified for performance (Tuyen Nguyen) - * Modified for MP, 1996 by Tuyen Nguyen - * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -void adsp_rput(gref_t *, gbuf_t *); -static void adsp_iocack(gref_t *, gbuf_t *); -static void adsp_iocnak(gref_t *, gbuf_t *, int err); -void adsp_dequeue_ccb(CCB *); -int adspInited = 0; - -GLOBAL adspGlobal; - -/**********/ - -int adsp_pidM[256]; -char adsp_inputC[256]; -CCB *adsp_inputQ[256]; - -extern at_ifaddr_t *ifID_home; - -CCB *ccb_used_list; - -void adsp_input(mp) - gbuf_t *mp; -{ - gref_t *gref; - CCBPtr sp; - at_ddp_t *p; - gbuf_t *mb; - - switch (gbuf_type(mp)) { - case MSG_DATA: - p = (at_ddp_t *)gbuf_rptr(mp); - sp = adsp_inputQ[p->dst_socket]; - if ((sp == 0) || (sp->gref==0) || (sp->state==sClosed)) - { - gbuf_freem(mp); - return; - } - else if (sp->otccbLink != 0) { - do { - if ((sp->remoteAddress.a.node == p->src_node) - && (sp->remoteAddress.a.socket == p->src_socket) - && (sp->remoteAddress.a.net == NET_VALUE(p->src_net))) - break; - } while ((sp = sp->otccbLink) != 0); - if (sp == 0) - { - gbuf_freem(mp); - return; - } - } - if (sp->lockFlag) { - gbuf_next(mp) = 0; - if (sp->deferred_mb) { - for (mb=sp->deferred_mb; gbuf_next(mb); mb=gbuf_next(mb)) ; - gbuf_next(mb) = mp; - } else - sp->deferred_mb = mp; - return; - } - sp->lockFlag = 1; - while (mp) { - adsp_rput(sp->gref, mp); - if ((mp = sp->deferred_mb) != 0) { - sp->deferred_mb = gbuf_next(mp); - gbuf_next(mp) = 0; - } - } - sp->lockFlag = 0; - return; - - case MSG_IOCACK: - case MSG_IOCNAK: - gref = (gref_t *)((ioc_t *)gbuf_rptr(mp))->ioc_private; - break; - - case MSG_IOCTL: -#ifdef APPLETALK_DEBUG - kprintf("unexpected MSG_IOCTL in adsp_input()"); -#endif - /* fall through */ - - default: - gbuf_freem(mp); - return; - } - - adsp_rput(gref, mp); -} - -/**********/ -int adsp_readable(gref_t *); - -int adsp_readable(gref) - gref_t *gref; -{ - int rc; - CCBPtr sp; - - if (gref->info == 0) - /* - * we don't have the structure we need to determine - * if there's data available... we return readable in - * this case to keep from hanging up in the select - * a subsequent read will run into the same missing data - * structure and return an error... the ATselect code does - * this if it can't retrieve the 'gref' structure from the - * file table for the fd specified - */ - return(1); - - sp = (CCBPtr)gbuf_rptr(((gbuf_t *)gref->info)); - rc = sp->rData; - - return rc; -} - -int adsp_writeable(gref_t *); -int adsp_writeable(gref) - gref_t *gref; -{ - int rc; - CCBPtr sp; - - if (gref->info == 0) - /* - * we don't have the structure we need to determine - * if there's room available... we return writeable in - * this case to keep from hanging up in the select - * a subsequent write will run into the same missing data - * structure and return an error... the ATselect code does - * this if it can't retrieve the 'gref' structure from the - * file table for the fd specified - */ - return(1); - - sp = (CCBPtr)gbuf_rptr(((gbuf_t *)gref->info)); - rc = CalcSendQFree(sp); - - return rc; -} - -static void adsp_init(void); - -static void adsp_init(void) -{ - adspInited++; - InitGlobals(); - ccb_used_list = 0; - bzero(adsp_pidM, sizeof(adsp_pidM)); - bzero(adsp_inputC, sizeof(adsp_inputC)); - bzero(adsp_inputQ, sizeof(adsp_inputQ)); -} - -/* - * Description: - * ADSP open and close routines. These routines - * initalize and release the ADSP structures. They do not - * have anything to do with "connections" - */ - -int adsp_open(gref) - gref_t *gref; -{ - register CCBPtr sp; - - if (!adspInited) - adsp_init(); - - if (!adspAllocateCCB(gref)) - return(ENOBUFS); /* can't get buffers */ - - sp = (CCBPtr)gbuf_rptr(((gbuf_t *)gref->info)); - gref->readable = adsp_readable; - gref->writeable = adsp_writeable; - if ((sp->otccbLink = ccb_used_list) != 0) - sp->otccbLink->ccbLink = sp; - ccb_used_list = sp; - return 0; -} - -int adsp_close(gref) - gref_t *gref; -{ - unsigned char localSocket; - - /* make sure we've not yet removed the CCB (e.g., due to TrashSession) */ - if (gref->info) { - CCBPtr sp = (CCBPtr)gbuf_rptr(((gbuf_t *)gref->info)); - localSocket = sp->localSocket; - if (localSocket) - adspRelease(gref); - else - { - adsp_dequeue_ccb(sp); - gbuf_freeb((gbuf_t *)gref->info); - } - } - return 0; -} - - -/* - * Name: - * adsp_rput - * - * Description: - * ADSP streams read put and service routines. - */ - -void adsp_rput(gref, mp) - gref_t *gref; /* READ queue */ - gbuf_t *mp; -{ - switch (gbuf_type(mp)) { - case MSG_HANGUP: - case MSG_IOCACK: - case MSG_IOCNAK: - switch (adspReadHandler(gref, mp)) { - case STR_PUTNEXT: - atalk_putnext(gref, mp); - break; - case STR_IGNORE: - break; - } - break; - case MSG_ERROR: -#ifdef APPLETALK_DEBUG - kprintf("adsp_rput received MSG_ERROR"); -#endif - /* fall through */ - default: - CheckReadQueue((CCBPtr)gbuf_rptr(((gbuf_t *)gref->info))); - CheckSend((CCBPtr)gbuf_rptr(((gbuf_t *)gref->info))); - - switch (gbuf_type(mp)) { - case MSG_IOCTL: - case MSG_DATA: - case MSG_PROTO: - if (adspReadHandler(gref, mp) == STR_PUTNEXT) - atalk_putnext(gref, mp); - break; - default: - atalk_putnext(gref, mp); - break; - } - } -} - -/* - * Name: - * adsp_wput - * - * Description: - * ADSP streams write put and service routines. - * - */ - -int adsp_wput(gref, mp) - gref_t *gref; /* WRITE queue */ - gbuf_t *mp; -{ - int rc; - gbuf_t *xm; - ioc_t *iocbp; - CCBPtr sp; - - if (gref->info) - sp = (CCBPtr)gbuf_rptr(((gbuf_t *)gref->info)); - else - sp = 0; - - if (gbuf_type(mp) == MSG_IOCTL) { - iocbp = (ioc_t *)gbuf_rptr(mp); - switch (iocbp->ioc_cmd) { - case ADSPBINDREQ: - { - unsigned char v; - - if (gbuf_cont(mp) == NULL) { - iocbp->ioc_rval = -1; - adsp_iocnak(gref, mp, EINVAL); - } - v = *(unsigned char *)gbuf_rptr(gbuf_cont(mp)); - if ( (v != 0) - && ((v > DDP_SOCKET_LAST) || (v < 2) - || ddp_socket_inuse(v, DDP_ADSP))) { - iocbp->ioc_rval = -1; - adsp_iocnak(gref, mp, EINVAL); - } - else { - if (v == 0) { - if ((v = adspAssignSocket(gref, 0)) == 0) { - iocbp->ioc_rval = -1; - adsp_iocnak(gref, mp, EINVAL); - return 0; - } - } else { - adsp_inputC[v] = 1; - adsp_inputQ[v] = sp; - adsp_pidM[v] = sp->pid; - adsp_dequeue_ccb(sp); - } - *(unsigned char *)gbuf_rptr(gbuf_cont(mp)) = v; - sp->localSocket = v; - iocbp->ioc_rval = 0; - adsp_iocack(gref, mp); - } - return 0; - } - - case ADSPGETSOCK: - case ADSPGETPEER: - { - at_inet_t *addr; - - if (((xm = gbuf_cont(mp)) == NULL) - && ((xm = gbuf_alloc(sizeof(at_inet_t), PRI_MED)) == NULL)) { - iocbp->ioc_rval = -1; - adsp_iocnak(gref, mp, ENOBUFS); - return 0; - } - gbuf_cont(mp) = xm; - gbuf_wset(xm,sizeof(at_inet_t)); - addr = (at_inet_t *)gbuf_rptr(xm); - if (iocbp->ioc_cmd == ADSPGETSOCK) { - /* Obtain Network and Node Id's from DDP */ - /* *** was ddp_get_cfg() *** */ - addr->net = ifID_home->ifThisNode.s_net; - addr->node = ifID_home->ifThisNode.s_node; - addr->socket = (sp)? sp->localSocket: 0; - } else - if (sp) - *addr = sp->remoteAddress.a; - else { - addr->net = 0; - addr->node = 0; - addr->socket = 0; - } - iocbp->ioc_rval = 0; - adsp_iocack(gref, mp); - return 0; - } - case DDP_IOC_GET_CFG: - /* respond to an DDP_IOC_GET_CFG sent on an adsp fd */ - if (((xm = gbuf_cont(mp)) == NULL) && - (xm = gbuf_alloc(sizeof(ddp_addr_t), PRI_MED)) == NULL) { - iocbp->ioc_rval = -1; - adsp_iocnak(gref, mp, ENOBUFS); - return 0; - } - gbuf_cont(mp) = xm; - gbuf_wset(xm, sizeof(ddp_addr_t)); - /* Obtain Network and Node Id's from DDP */ - { - /* *** was ddp_get_cfg() *** */ - ddp_addr_t *cfgp = - (ddp_addr_t *)gbuf_rptr(gbuf_cont(mp)); - cfgp->inet.net = ifID_home->ifThisNode.s_net; - cfgp->inet.node = ifID_home->ifThisNode.s_node; - cfgp->inet.socket = (sp)? sp->localSocket: 0; - cfgp->ddptype = DDP_ADSP; - } - iocbp->ioc_rval = 0; - adsp_iocack(gref, mp); - return 0; - } /* switch */ - } - - if (!gref->info) - gbuf_freem(mp); - else { - rc = adspWriteHandler(gref, mp); - - switch (rc) { - case STR_PUTNEXT: - if (gbuf_type(mp) == MSG_IOCTL) { - iocbp = (ioc_t *)gbuf_rptr(mp); - iocbp->ioc_private = (void *)gref; - } - DDP_OUTPUT(mp); - break; - case STR_IGNORE: - case STR_IGNORE+99: - break; - default: - gbuf_freem(mp); - break; - } - } - - return 0; -} /* adsp_wput */ - -void adspioc_ack(errno, m, gref) - int errno; - gbuf_t *m; - gref_t *gref; -{ - ioc_t *iocbp; - - if (m == NULL) - return; - iocbp = (ioc_t *) gbuf_rptr(m); - - iocbp->ioc_error = errno; /* set the errno */ - iocbp->ioc_count = gbuf_msgsize(gbuf_cont(m)); - if (gbuf_type(m) == MSG_IOCTL) /* if an ioctl, this is an ack */ - gbuf_set_type(m, MSG_IOCACK); /* and ALWAYS update the user */ - /* ioctl structure */ - trace_mbufs(D_M_ADSP,"A ", m); - SndMsgUp(gref, m); -} - -static void adsp_iocack(gref, m) - gref_t *gref; - register gbuf_t *m; -{ - if (gbuf_type(m) == MSG_IOCTL) - gbuf_set_type(m, MSG_IOCACK); - - if (gbuf_cont(m)) - ((ioc_t *)gbuf_rptr(m))->ioc_count = gbuf_msgsize(gbuf_cont(m)); - else - ((ioc_t *)gbuf_rptr(m))->ioc_count = 0; - - SndMsgUp(gref, m); -} - - -static void adsp_iocnak(gref, m, err) - gref_t *gref; - register gbuf_t *m; - register int err; -{ - if (gbuf_type(m) == MSG_IOCTL) - gbuf_set_type(m, MSG_IOCNAK); - ((ioc_t *)gbuf_rptr(m))->ioc_count = 0; - - if (err == 0) - err = ENXIO; - ((ioc_t *)gbuf_rptr(m))->ioc_error = err; - - if (gbuf_cont(m)) { - gbuf_freem(gbuf_cont(m)); - gbuf_cont(m) = NULL; - } - SndMsgUp(gref, m); -} - -unsigned char -adspAssignSocket(gref, flag) - gref_t *gref; - int flag; -{ - unsigned char sVal, sMax, sMin, sSav = 0, inputC; - CCBPtr sp; - - sMax = flag ? DDP_SOCKET_LAST-46 : DDP_SOCKET_LAST-6; - sMin = DDP_SOCKET_1st_DYNAMIC; - - for (inputC=255, sVal=sMax; sVal >= sMin; sVal--) { - if (!ddp_socket_inuse(sVal, DDP_ADSP)) - break; - else if (flag) { - if (adsp_inputC[sVal] && - /* meaning that raw DDP doesn't have it */ - (adsp_inputC[sVal] < inputC) - && (adsp_inputQ[sVal]->state == sOpen)) { - inputC = adsp_inputC[sVal]; - sSav = sVal; - } - } - } - if (sVal < sMin) { - if (!flag || (inputC == 255)) - return 0; - sVal = sSav; - } - sp = (CCBPtr)gbuf_rptr(((gbuf_t *)gref->info)); - adsp_dequeue_ccb(sp); - adsp_inputC[sVal]++; - sp->otccbLink = adsp_inputQ[sVal]; - adsp_inputQ[sVal] = sp; - if (!flag) - adsp_pidM[sVal] = sp->pid; - return sVal; -} - -int -adspDeassignSocket(sp) - CCBPtr sp; -{ - unsigned char sVal; - CCBPtr curr_sp; - CCBPtr prev_sp; - int pid = 0; - - dPrintf(D_M_ADSP, D_L_TRACE, ("adspDeassignSocket: pid=%d,s=%d\n", - sp->pid, sp->localSocket)); - sVal = sp->localSocket; - if ((curr_sp = adsp_inputQ[sVal]) != 0) { - prev_sp = 0; - while (curr_sp != sp) { - prev_sp = curr_sp; - curr_sp = curr_sp->otccbLink; - } - if (curr_sp) { - if (prev_sp) - prev_sp->otccbLink = sp->otccbLink; - else - adsp_inputQ[sVal] = sp->otccbLink; - if (adsp_inputQ[sVal]) - adsp_inputC[sVal]--; - else { - pid = adsp_pidM[sVal]; - adsp_inputC[sVal] = 0; - adsp_pidM[sVal] = 0; - } - sp->ccbLink = 0; - sp->otccbLink = 0; - sp->localSocket = 0; - return pid ? 0 : 1; - } - } - - dPrintf(D_M_ADSP, D_L_ERROR, - ("adspDeassignSocket: closing, no CCB block, trouble ahead\n")); - return -1; -} /* adspDeassignSocket */ - -/* - * remove CCB from the use list - */ -void -adsp_dequeue_ccb(sp) - CCB *sp; -{ - - if (sp == ccb_used_list) { - if ((ccb_used_list = sp->otccbLink) != 0) - sp->otccbLink->ccbLink = 0; - } else if (sp->ccbLink) { - if ((sp->ccbLink->otccbLink = sp->otccbLink) != 0) - sp->otccbLink->ccbLink = sp->ccbLink; - } - - sp->otccbLink = 0; - sp->ccbLink = 0; -} - -void SndMsgUp(gref, mp) - gref_t *gref; /* WRITE queue */ - gbuf_t *mp; -{ -/* - dPrintf(D_M_ADSP, D_L_TRACE, - ("SndMsgUp: gref=0x%x, mbuf=0x%x\n", (unsigned)gref, (unsigned)mp)); - trace_mbufs(D_M_ADSP, " m", mp); -*/ - atalk_putnext(gref, mp); -} diff --git a/bsd/netat/appletalk.h b/bsd/netat/appletalk.h deleted file mode 100644 index db08c693d..000000000 --- a/bsd/netat/appletalk.h +++ /dev/null @@ -1,317 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * - * ORIGINS: 82 - * - * (C) COPYRIGHT Apple Computer, Inc. 1992-1996 - * All Rights Reserved - * - */ - -/* Miscellaneous definitions for AppleTalk used by all protocol - * modules. - */ - -#ifndef _NETAT_APPLETALK_H_ -#define _NETAT_APPLETALK_H_ -#include - -#include -#include - -#ifdef __APPLE_API_OBSOLETE - -/* - Non-aligned types are used in packet headers. -*/ - -/* New fundemental types: non-aligned variations of u_short and u_long */ -typedef u_char ua_short[2]; /* Unaligned short */ -typedef u_char ua_long[4]; /* Unaligned long */ - -/* Two at_net typedefs; the first is aligned the other isn't */ -typedef u_short at_net_al; /* Aligned AppleTalk network number */ -typedef ua_short at_net_unal; /* Unaligned AppleTalk network number */ - -/* Miscellaneous types */ -typedef u_char at_node; /* AppleTalk node number */ -typedef u_char at_socket; /* AppleTalk socket number */ - -typedef at_net_unal at_net; /* Default: Unaligned AppleTalk network number */ -struct atalk_addr { - u_char atalk_unused; - at_net atalk_net; - at_node atalk_node; -}; - -/* Macros to manipulate unaligned fields */ -#define UAS_ASSIGN(x,s) *(unsigned short *) &(x[0]) = (unsigned short) (s) -#define UAS_UAS(x,y) *(unsigned short *) &(x[0]) = *(unsigned short *) &(y[0]) -#define UAS_VALUE(x) (*(unsigned short *) &(x[0])) -#define UAL_ASSIGN(x,l) *(unsigned long *) &(x[0]) = (unsigned long) (l) -#define UAL_UAL(x,y) *(unsigned long *) &(x[0]) = *(unsigned long *) &(y[0]) -#define UAL_VALUE(x) (*(unsigned long *) &(x[0])) - -/* Macros to assign unaligned fields with byte swapping */ -#define UAS_ASSIGN_HTON(x,s) *(unsigned short *) &(x[0]) = htons((unsigned short) (s)) -#define UAS_ASSIGN_NTOH(x,s) *(unsigned short *) &(x[0]) = ntohs((unsigned short) (s)) -#define UAS_VALUE_HTON(x) htons((*(unsigned short *) &(x[0]))) -#define UAS_VALUE_NTOH(x) ntohs((*(unsigned short *) &(x[0]))) -#define UAL_ASSIGN_HTON(x,l) *(unsigned long *) &(x[0]) = htonl((unsigned long) (l)) -#define UAL_ASSIGN_NTOH(x,l) *(unsigned long *) &(x[0]) = ntohl((unsigned long) (l)) -#define UAL_VALUE_HTON(x) htonl((*(unsigned long *) &(x[0]))) -#define UAL_VALUE_NTOH(x) ntohl((*(unsigned long *) &(x[0]))) - -/* Macros to manipulate at_net variables */ -#define NET_ASSIGN(x,s) *(unsigned short *)&(x[0]) = htons((unsigned short)(s)) -#define NET_ASSIGN_NOSWAP(x,s) *(unsigned short *)&(x[0]) = (unsigned short)(s) -#define NET_NET(x, y) *(unsigned short *)&(x[0]) = *(unsigned short *)&(y[0]) -#define NET_VALUE(x) ntohs((*(unsigned short *) &(x[0]))) -#define NET_VALUE_NOSWAP(x) (*(unsigned short *) &(x[0])) -#define ATALK_ASSIGN(a, net, node, unused ) \ - a.atalk_unused = unused; a.atalk_node = node; NET_ASSIGN(a.atalk_net, net) - -#define NET_EQUAL(a, b) (NET_VALUE(a) == NET_VALUE(b)) -#define NET_NOTEQ(a, b) (NET_VALUE(a) != NET_VALUE(b)) -#define NET_EQUAL0(a) (NET_VALUE(a) == 0) -#define NET_NOTEQ0(a) (NET_VALUE(a) != 0) - - -/* - AppleTalk Internet Address -*/ - -typedef struct at_inet { - u_short net; /* Network Address */ - u_char node; /* Node number */ - u_char socket; /* Socket number */ -} at_inet_t; - -/* - DDP Address for OT -*/ - -typedef struct ddp_addr { - at_inet_t inet; - u_short ddptype; -} ddp_addr_t; - -/* - AppleTalk address -*/ - -struct at_addr { - u_short s_net; /* 16-bit network address */ - u_char s_node; /* 8-bit node # (1-0xfd) */ -}; - -/* - Appletalk sockaddr definition -*/ -struct sockaddr_at { - u_char sat_len; /* total length */ - u_char sat_family; /* address family (AF_APPLETALK) */ - u_char sat_port; /* 8-bit "socket number" */ - struct at_addr sat_addr; /* 16-bit "net" and 8-bit "node */ - char sat_zero[8]; /* used for netrange in netatalk */ -}; - -#define ATADDR_ANYNET (u_short)0x0000 -#define ATADDR_ANYNODE (u_char)0x00 -#define ATADDR_ANYPORT (u_char)0x00 - -#define ATADDR_BCASTNODE (u_char)0xff /* There is no BCAST for NET */ - -/* make sure the net, node and socket numbers are in legal range : - * - * Net# 0 Local Net - * 1 - 0xfffe Legal net nos - * 0xffff Reserved by Apple for future use. - * Node# 0 Illegal - * 1 - 0x7f Legal (user node id's) - * 0x80 - 0xfe Legal (server node id's; 0xfe illegal in - * Phase II nodes) - * 0xff Broadcast - * Socket# 0 Illegal - * 1 - 0xfe Legal - * 0xff Illegal - */ -#define valid_at_addr(addr) \ - ((!(addr) || (addr)->net == 0xffff || (addr)->node == 0 || \ - (addr)->socket == 0 || (addr)->socket == 0xff)? 0: 1) - -/*** * ETHERTYPE_ definitions are in netinet/if_ether.h *** */ -#define ETHERTYPE_AT 0x809B /* AppleTalk protocol */ -#define ETHERTYPE_AARP 0x80F3 /* AppleTalk ARP */ - -/* - DDP protocol types -*/ - -#define DDP_RTMP 0x01 -#define DDP_NBP 0x02 -#define DDP_ATP 0x03 -#define DDP_ECHO 0x04 -#define DDP_RTMP_REQ 0x05 -#define DDP_ZIP 0x06 -#define DDP_ADSP 0x07 - -/* - Protocols for the socket API -*/ - -#define ATPROTO_NONE 0 /* no corresponding DDP type exists */ - -#define ATPROTO_ATP DDP_ATP /* must match DDP type */ -#define ATPROTO_ADSP DDP_ADSP /* must match DDP type */ - -#define ATPROTO_DDP 249 /* *** to be eliminated eventually *** */ -#define ATPROTO_LAP 250 /* *** to be eliminated eventually *** */ - -#define ATPROTO_AURP 251 /* no corresponding DDP type exists */ -#define ATPROTO_ASP 252 /* no corresponding DDP type exists */ -#define ATPROTO_AFP 253 /* no corresponding DDP type exists */ - -#define ATPROTO_RAW 255 /* no corresponding DDP type exists */ - -/* - Options for use with [gs]etsockopt at the DDP level. - First word of comment is data type; bool is stored in int. -*/ -#define DDP_CHKSUM_ON 1 /* int; default = FALSE; - DDP checksums should be used */ -#define DDP_HDRINCL 2 /* int; default = FALSE; - header is included with data */ -#define DDP_GETSOCKNAME 3 /* used to get ddp_addr_t */ -#define DDP_SLFSND_ON 4 /* int; default = FALSE; - packets sent to the cable-multicast address - on this socket will be looped back */ -#define DDP_STRIPHDR 5 /* int; default = FALSE; - drop DDP header on receive (raw) */ - -/* - AppleTalk protocol retry and timeout -*/ - -typedef struct at_retry { - short interval; /* Retry interval in seconds */ - short retries; /* Maximum number of retries */ - u_char backoff; /* Retry backoff, must be 1 through 4 */ -} at_retry_t; - -/* - Basic NBP Definitions needed for AppleTalk framework -*/ - -#define MAX_ZONES 50 - -#define NBP_NVE_STR_SIZE 32 /* Maximum NBP tuple string size */ -typedef struct at_nvestr { - u_char len; - u_char str[NBP_NVE_STR_SIZE]; -} at_nvestr_t; - -/* Entity Name */ -typedef struct at_entity { - at_nvestr_t object; - at_nvestr_t type; - at_nvestr_t zone; -} at_entity_t; - -#define NBP_TUPLE_SIZE ((3*NBP_NVE_STR_SIZE)+3) - /* 3 for field lengths + 3*32 for three names */ -typedef struct at_nbptuple { - at_inet_t enu_addr; - u_char enu_enum; - at_entity_t enu_entity; -} at_nbptuple_t; - -/* - Basic ATP Definitions needed for LibcAT -*/ - -#define ATP_TRESP_MAX 8 /* Maximum number of Tresp pkts */ - -/* Response buffer structure for atp_sendreq() and atp_sendrsp() */ -typedef struct at_resp { - u_char bitmap; /* Bitmap of responses */ - u_char filler[3]; /* Force 68K to RISC alignment */ - struct iovec resp[ATP_TRESP_MAX]; /* Buffer for response data */ - long userdata[ATP_TRESP_MAX]; /* Buffer for response user data */ -} at_resp_t; - -/* - Needed for ASP and ADSP -*/ - -typedef struct { - int maxlen; /* max buffer length */ - int len; /* length of data */ - char *buf; /* pointer to buffer */ -} strbuf_t; - -#define IFID_HOME 1 /* home port in ifID_table */ - -#define ATALK_VALUE(a) ((*(u_long *) &(a))&0x00ffffff) - -#define VERSION_LENGTH 80 /* length of version string */ - -/* struture containing general information regarding the state of - * the Appletalk networking - */ -typedef struct at_state { - unsigned int flags; /* various init flags */ -} at_state_t; - -/* at_state_t 'flags' defines */ -#define AT_ST_STARTED 0x0001 /* set if protocol is fully enabled */ -#define AT_ST_STARTING 0x0002 /* set if interfaces are configured */ -#define AT_ST_MULTIHOME 0x0080 /* set if multihome mode */ -#define AT_ST_ROUTER 0x0100 /* set if we are a router */ -#define AT_ST_IF_CHANGED 0x0200 /* set when state of any I/F - changes (for SNMP) */ -#define AT_ST_RT_CHANGED 0x0400 /* route table changed (for SNMP)*/ -#define AT_ST_ZT_CHANGED 0x0800 /* zone table changed (for SNMP) */ -#define AT_ST_NBP_CHANGED 0x1000 /* if nbp table changed (for SNMP)*/ - -#ifdef KERNEL_PRIVATE -extern at_state_t at_state; /* global state of AT network */ - -#define ROUTING_MODE (at_state.flags & AT_ST_ROUTER) -#define MULTIHOME_MODE (at_state.flags & AT_ST_MULTIHOME) -#define MULTIPORT_MODE (ROUTING_MODE || MULTIHOME_MODE) -#endif /* KERNEL_PRIVATE */ - -/* defines originally from h/at_elap.h */ -#define AT_ADDR 0 -#define ET_ADDR 1 -#define AT_ADDR_NO_LOOP 2 /* disables packets from looping back */ - -#endif /* __APPLE_API_OBSOLETE */ -#endif /* _NETAT_APPLETALK_H_ */ diff --git a/bsd/netat/asp.h b/bsd/netat/asp.h deleted file mode 100644 index ab0c32173..000000000 --- a/bsd/netat/asp.h +++ /dev/null @@ -1,242 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * - * ORIGINS: 82 - * - * (C) COPYRIGHT Apple Computer, Inc. 1992-1996 - * All Rights Reserved - * - */ - -#ifndef _NETAT_ASP_H_ -#define _NETAT_ASP_H_ -#include - -#ifdef __APPLE_API_OBSOLETE - -#define ASP_Version 0x100 - -#define ASPFUNC_CloseSess 1 -#define ASPFUNC_Command 2 -#define ASPFUNC_GetStatus 3 -#define ASPFUNC_OpenSess 4 -#define ASPFUNC_Tickle 5 -#define ASPFUNC_Write 6 -#define ASPFUNC_WriteContinue 7 -#define ASPFUNC_Attention 8 -#define ASPFUNC_CmdReply 9 - -#define ASPIOC 210 /* AT_MID_ASP */ -#define ASPIOC_ClientBind ((ASPIOC<<8) | 1) -#define ASPIOC_CloseSession ((ASPIOC<<8) | 2) -#define ASPIOC_GetLocEntity ((ASPIOC<<8) | 3) -#define ASPIOC_GetRemEntity ((ASPIOC<<8) | 4) -#define ASPIOC_GetSession ((ASPIOC<<8) | 5) -#define ASPIOC_GetStatus ((ASPIOC<<8) | 6) -#define ASPIOC_ListenerBind ((ASPIOC<<8) | 7) -#define ASPIOC_OpenSession ((ASPIOC<<8) | 8) -#define ASPIOC_StatusBlock ((ASPIOC<<8) | 9) -#define ASPIOC_SetPid ((ASPIOC<<8) |10) -#define ASPIOC_GetSessId ((ASPIOC<<8) |11) -#define ASPIOC_EnableSelect ((ASPIOC<<8) |12) /* not needed */ -#define ASPIOC_Look ((ASPIOC<<8) |13) - -#define MOREDATA 1 - -/* The following ASP error codes are defined in Inside AppleTalk: */ - -#define ASPERR_NoError 0 -#define ASPERR_BadVersNum -1066 -#define ASPERR_BufTooSmall -1067 -#define ASPERR_NoMoreSessions -1068 -#define ASPERR_NoServers -1069 -#define ASPERR_ParamErr -1070 -#define ASPERR_ServerBusy -1071 -#define ASPERR_SessClosed -1072 -#define ASPERR_SizeErr -1073 -#define ASPERR_TooManyClients -1074 -#define ASPERR_NoAck -1075 - -/* These ASP error codes were apparently defined later: */ - -#define ASPERR_NoSuchDevice -1058 -#define ASPERR_BindErr -1059 -#define ASPERR_CmdReply -1060 -#define ASPERR_CmdRequest -1061 -#define ASPERR_SystemErr -1062 -#define ASPERR_ProtoErr -1063 -#define ASPERR_NoSuchEntity -1064 -#define ASPERR_RegisterErr -1065 - -typedef struct { - at_inet_t SLSEntityIdentifier; - at_retry_t Retry; - int StatusBufferSize; -} asp_status_cmd_t; - -typedef struct { - at_inet_t SLSEntityIdentifier; - at_retry_t Retry; - unsigned short TickleInterval; - unsigned short SessionTimer; -} asp_open_cmd_t; - -typedef struct { - int Primitive; - int CmdResult; - unsigned short ReqRefNum; - unsigned short Filler; -} asp_cmdreply_req_t; - -typedef struct { - int Primitive; - int CmdResult; -} asp_cmdreply_ind_t; - -typedef struct { - int Primitive; - unsigned short ReqRefNum; - unsigned char ReqType; - unsigned char Filler; -} asp_command_ind_t; - -union asp_primitives { - int Primitive; - asp_cmdreply_ind_t CmdReplyInd; - asp_cmdreply_req_t CmdReplyReq; - asp_command_ind_t CommandInd; -}; - -#ifdef KERNEL_PRIVATE - -#define ASPSTATE_Close 0 -#define ASPSTATE_Idle 1 -#define ASPSTATE_WaitingForGetStatusRsp 2 -#define ASPSTATE_WaitingForOpenSessRsp 3 -#define ASPSTATE_WaitingForCommandRsp 4 -#define ASPSTATE_WaitingForWriteContinue 5 -#define ASPSTATE_WaitingForWriteRsp 6 -#define ASPSTATE_WaitingForWriteContinueRsp 7 -#define ASPSTATE_WaitingForCloseSessRsp 8 -#ifdef NOT_USED -#define ASPSTATE_WaitingForCfgAck 9 -#endif - -/* - * ATP state block - */ -typedef struct { - gref_t *atp_gref; /* gref must be the first entry */ - int pid; /* process id, must be the second entry */ - gbuf_t *atp_msgq; /* data msg, must be the third entry */ - unsigned char dflag; /* structure flag, must be the fourth entry */ - unsigned char filler[3]; -} atp_state_t; - -/* - * ASP word - */ -typedef struct { - unsigned char func; - unsigned char param1; - unsigned short param2; -} asp_word_t; - -struct asp_scb; -typedef void (*asp_tmo_func)(struct asp_scb *); - -/* - * ASP session control block - */ -typedef struct asp_scb { - gref_t *gref; /* read queue pointer, must be the first entry */ - int pid; /* process id, must be the second entry */ - atp_state_t *atp_state; /* atp state info, must be the third entry */ - unsigned char dflag; /* structure flag, must be the fourth entry */ - unsigned char state; - unsigned char sess_id; - unsigned char tmo_delta; - unsigned char tmo_cnt; - unsigned char rem_socket; - unsigned char rem_node; - unsigned char magic_num; - unsigned short snd_seq_num; - unsigned short rcv_seq_num; - unsigned short filler; - unsigned short tickle_tid; - unsigned short tickle_interval; - unsigned short session_timer; - unsigned short attn_tid; - unsigned char attn_flag; - unsigned char req_flag; - gbuf_t *req_msgq; - unsigned short wrt_seq_num; - unsigned char get_wait; - unsigned char ioc_wait; - at_retry_t cmd_retry; - at_inet_t loc_addr; - at_inet_t rem_addr; - at_inet_t svc_addr; - gbuf_t *sess_ioc; - gbuf_t *stat_msg; - asp_tmo_func tmo_func; - struct asp_scb *next_tmo; - struct asp_scb *prev_tmo; - struct asp_scb *sess_scb; - struct asp_scb *next_scb; - struct asp_scb *prev_scb; - unsigned char sel_on; /* not needed */ - unsigned char user; - unsigned char rcv_cnt; - unsigned char snd_stop; - unsigned char reply_socket; - unsigned char if_num; - unsigned char pad[2]; - atlock_t lock; - atlock_t delay_lock; - atevent_t event; - atevent_t delay_event; -} asp_scb_t; - - -int ASPgetmsg(gref_t *, strbuf_t *, strbuf_t *, gbuf_t **, int *, int *); -int ASPputmsg(gref_t *, strbuf_t *, strbuf_t *, gbuf_t *, int , int *); -void asp_init(void); -void asp_ack_reply(gref_t *, gbuf_t *); -void asp_nak_reply(gref_t *, gbuf_t *); -int asp_wput(gref_t *, gbuf_t *); -void asp_clock(void *); -void asp_clock_locked(void *); -int asp_open(gref_t *); -int asp_close(gref_t *); - - -#endif /* KERNEL_PRIVATE */ -#endif /* __APPLE_API_OBSOLETE */ -#endif /* _NETAT_ASP_H_ */ diff --git a/bsd/netat/asp_proto.c b/bsd/netat/asp_proto.c deleted file mode 100644 index 8f1621b54..000000000 --- a/bsd/netat/asp_proto.c +++ /dev/null @@ -1,2313 +0,0 @@ -/* - * Copyright (c) 1995-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Change Log: - * Created February 20, 1995 by Tuyen Nguyen - * Modified for MP, 1996 by Tuyen Nguyen - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - - -#define atpBDSsize (sizeof(struct atpBDS)*ATP_TRESP_MAX) -#define aspCMDsize (atpBDSsize+sizeof(struct atp_set_default)+TOTAL_ATP_HDR_SIZE) -#define SCBS_PER_BLK 16 -#define TICKS_PER_SEC HZ -#define SESS_TMO_RES 2 -#define DEF_SESS_TMO 120 -#define NEXT_SEQ_NUM(x) (x = (x == 65535) ? 0 : (x + 1)) -#define MAX_RCV_CNT 5 -#define BAD_REMADDR(addr) \ - ( (*(long *)&scb->rem_addr != *(long *)&addr) \ - && ((scb->rem_addr.net != addr.net) \ - || (scb->rem_addr.node != addr.node)) ) - -StaticProc asp_scb_t *asp_find_scb(unsigned char, at_inet_t *); -StaticProc asp_scb_t *asp_scb_alloc(void); - -StaticProc void asp_putnext(gref_t *, gbuf_t *); -StaticProc void asp_iocack(gref_t *, gbuf_t *); -StaticProc void asp_iocnak(gref_t *, gbuf_t *, int); -StaticProc void asp_dequeue_scb(asp_scb_t *); -StaticProc void asp_scb_free(asp_scb_t *); -StaticProc void asp_timout(asp_tmo_func, asp_scb_t *, int); -StaticProc void asp_untimout(asp_tmo_func, asp_scb_t *); -StaticProc void asp_hangup(asp_scb_t *); -StaticProc void asp_send_tickle(asp_scb_t *); -StaticProc void asp_send_tickle_locked(void *); -StaticProc void asp_accept(asp_scb_t *scb, asp_scb_t *sess_scb, gbuf_t *m); -StaticProc int asp_send_req(gref_t *, gbuf_t *, at_inet_t *, at_retry_t *, asp_word_t *, - unsigned char , unsigned char, unsigned char); - -extern at_ifaddr_t *ifID_home; -extern int atp_pidM[]; -extern gref_t *atp_inputQ[]; -extern lck_mtx_t *atalk_mutex; -gbuf_t *scb_resource_m = 0; -unsigned char asp_inpC[256]; -asp_scb_t *asp_scbQ[256]; - -static at_retry_t asp_def_retry = {2, -1, 1}; -static unsigned char scb_tmo_cnt; -asp_scb_t *scb_used_list; -static asp_scb_t *scb_tmo_list; -asp_scb_t *scb_free_list; - -int asp_readable(gref_t *); - -int -asp_readable(gref) - gref_t *gref; -{ - return (((asp_scb_t *)gref->info)->sess_ioc ? 1 : 0); -} - -void -asp_init() -{ - scb_tmo_cnt = 1; - scb_tmo_list = 0; - scb_used_list = 0; - scb_free_list = 0; - bzero(asp_inpC, sizeof(asp_inpC)); - bzero(asp_scbQ, sizeof(asp_scbQ)); -} - -/* - * the open routine allocates a state structure - */ -int asp_open(gref) - gref_t *gref; -{ - asp_scb_t *scb; - - /* - * if no asp structure available, return failure - */ - if ((scb = asp_scb_alloc()) == 0) - return ENOBUFS; - - /* - * initialize the gref data structure - */ - gref->info = (void *)scb; - gref->readable = asp_readable; - - /* - * initialize the scb data structure - */ - scb->dflag = 1; - scb->magic_num = 222; - scb->state = ASPSTATE_Idle; - scb->pid = gref->pid; - scb->gref = gref; - scb->session_timer = DEF_SESS_TMO; - scb->cmd_retry = asp_def_retry; - if ((scb->next_scb = scb_used_list) != 0) - scb->next_scb->prev_scb = scb; - scb_used_list = scb; - - /* - * return success - */ - dPrintf(D_M_ASP, D_L_INFO, ("asp_open: pid=%d\n", scb->pid)); - return 0; -} /* asp_open */ - -/* - * the close routine frees all the data structures - */ -int -asp_close(gref) - gref_t *gref; -{ - unsigned char sock_num; - asp_scb_t *scb, *new_scb; - gbuf_t *m; - - scb = (asp_scb_t *)gref->info; - dPrintf(D_M_ASP, D_L_INFO, ("asp_close: loc=%d\n", - scb->loc_addr.socket)); - - if (scb->pid && scb->sess_ioc && (scb->dflag != 1)) { - /* - * send the CloseSess response to peer - */ - if (gbuf_type(scb->sess_ioc) != MSG_PROTO) { - m = scb->sess_ioc; - scb->sess_ioc = gbuf_next(m); - atp_send_rsp(scb->gref, m, TRUE); - } - } - - if (scb->atp_state) { - sock_num = scb->loc_addr.socket; - if ((scb->dflag != 1) && scb->stat_msg) { - untimeout(atp_retry_req, scb->stat_msg); - gbuf_freem(scb->stat_msg); - scb->stat_msg = 0; - } - if (asp_scbQ[sock_num]->next_scb == 0) { - asp_scbQ[sock_num] = 0; - asp_inpC[sock_num] = 0; - dPrintf(D_M_ASP, D_L_INFO, - (" : atp_close(), loc=%d\n", scb->loc_addr.socket)); - atp_close(gref, 0); - } else { - asp_inpC[sock_num]--; - if (scb == asp_scbQ[sock_num]) { - new_scb = scb->next_scb; - new_scb->prev_scb = 0; - asp_scbQ[sock_num] = new_scb; - new_scb->atp_state->atp_gref = new_scb->gref; - new_scb->atp_state->pid = new_scb->pid; - atp_inputQ[sock_num] = new_scb->gref; - } else { - if ((scb->prev_scb->next_scb = scb->next_scb) != 0) - scb->next_scb->prev_scb = scb->prev_scb; - } - scb->next_scb = 0; - } - } else - asp_dequeue_scb(scb); - - /* - * free all allocated blocks if any - */ - if (scb->stat_msg) { - gbuf_freem(scb->stat_msg); - scb->stat_msg = 0; - } - if (scb->sess_ioc) { - gbuf_freel(scb->sess_ioc); - scb->sess_ioc = 0; - } - if (scb->req_msgq) { - gbuf_freel(scb->req_msgq); - scb->req_msgq = 0; - } - - scb->rem_addr.node = 0; - - /* - * stop all timers - */ - scb->tmo_cnt = 0; - asp_untimout(asp_hangup, scb); - untimeout(asp_send_tickle_locked, (void *)scb); /* added for 2225395 */ - - /* - * free the asp session control block - */ - scb->state = ASPSTATE_Close; - asp_scb_free(scb); - return 0; -} /* asp_close */ - -#if DEBUG - -static const char *aspStateStr(int); - -static const char *aspStateStr(state) - int state; -{ - return ((state==ASPSTATE_Close)? "Close": - (state==ASPSTATE_Idle)? "Idle": - (state==ASPSTATE_WaitingForGetStatusRsp)? "GetStatusRsp": - (state==ASPSTATE_WaitingForOpenSessRsp)? "OpenSessRsp": - (state==ASPSTATE_WaitingForCommandRsp)? "CmdRsp": - (state==ASPSTATE_WaitingForWriteContinue)? "WriteCont": - (state==ASPSTATE_WaitingForWriteRsp)? "WriteRsp": - (state==ASPSTATE_WaitingForWriteContinueRsp)? "WriteContRsp": - (state==ASPSTATE_WaitingForCloseSessRsp)? "CloseSessRsp": - "unknown"); -} - -static const char *aspCmdStr(int); - -static const char *aspCmdStr(aspCmd) - int aspCmd; -{ -return ((aspCmd==ASPFUNC_CloseSess)? "CloseSess": - (aspCmd==ASPFUNC_Command)? "Command": - (aspCmd==ASPFUNC_GetStatus)? "GetStatus": - (aspCmd==ASPFUNC_OpenSess)? "OpenSess": - (aspCmd==ASPFUNC_Tickle)? "Tickle": - (aspCmd==ASPFUNC_Write)? "Write": - (aspCmd==ASPFUNC_WriteContinue)? "WriteContinue": - (aspCmd==ASPFUNC_Attention)? "Attention": - (aspCmd==ASPFUNC_CmdReply)? "CmdReply": "unknown"); -} - -static const char *aspIOCStr(int); - -static const char *aspIOCStr(aspIOC) - int aspIOC; -{ -return ( - (aspIOC==ASPIOC_ClientBind)? "ClientBind": - (aspIOC==ASPIOC_CloseSession)? "CloseSession": - (aspIOC==ASPIOC_GetLocEntity)? "GetLocEntity": - (aspIOC==ASPIOC_GetRemEntity)? "GetRemEntity": - (aspIOC==ASPIOC_GetSession)? "GetSession": - (aspIOC==ASPIOC_GetStatus)? "GetStatus": - (aspIOC==ASPIOC_ListenerBind)? "ListenerBind": - (aspIOC==ASPIOC_OpenSession)? "OpenSession": - (aspIOC==ASPIOC_StatusBlock)? "StatusBlock": - (aspIOC==ASPIOC_SetPid)? "SetPid": - (aspIOC==ASPIOC_GetSessId)? "GetSessId": - (aspIOC==ASPIOC_EnableSelect)? "EnableSelect": - (aspIOC==ASPIOC_Look)? "Look": - "unknown" - ); -} -#endif /* DEBUG */ - -#ifdef AT_MBUF_TRACE - -static char mbuf_str[100]; -char *mbuf_totals() -{ - snprintf(mbuf_str, sizeof(mbuf_str), - /* - "dat = %d, prot = %d, ioc = %d, err = %d, hu = %d, ack = %d, nak = %d, ctl = %d", - */ - "dat = %d, prot = %d, ioc = %d, ctl = %d", - mbstat.m_mtypes[MSG_DATA], mbstat.m_mtypes[MSG_PROTO], mbstat.m_mtypes[MSG_IOCTL], - /* - mbstat.m_mtypes[MSG_ERROR], mbstat.m_mtypes[MSG_HANGUP], mbstat.m_mtypes[MSG_IOCACK], - mbstat.m_mtypes[MSG_IOCNAK], - */ - mbstat.m_mtypes[MSG_CTL]); - return(&mbuf_str[0]); -} - -void trace_beg(str, m) - char *str; - gbuf_t *m; -{ - int i = 0, j = 0; - gbuf_t *mdata, *mchain; - - if (m) - for (i = 0, j = 0, mdata = m, mchain = m; mdata; i++) { - mdata = gbuf_cont(mdata); - if (!mdata && mchain) { - mdata = gbuf_next(mchain); - mchain = mdata; - j++; - } - } - dPrintf(D_M_ASP, D_L_TRACE, - ("%s: %s, m# = %d, c# = %d\n", str, mbuf_totals(), i, j)); -} - -void trace_end(str) - char *str; -{ - dPrintf(D_M_ASP, D_L_TRACE, - (" %s: %s\n", str, mbuf_totals())); -} -#endif /* AT_MBUF_TRACE */ - -/* - * the write routine - */ -int asp_wput(gref, m) - gref_t *gref; - gbuf_t *m; -{ - int err; - unsigned char sockSav, sock_num; - gbuf_t *mioc, *mdata; - ioc_t *iocbp; - asp_scb_t *scb, *server_scb, *curr_scb; - at_inet_t *addr; - asp_word_t aw; - union asp_primitives *primitives; - asp_status_cmd_t *status_cmd; - asp_open_cmd_t *open_cmd; - at_retry_t Retry; - - scb = (asp_scb_t *)gref->info; - if (scb->dflag == 0) { - atp_wput(gref, m); - return 0; - } - - if (gbuf_type(m) != MSG_IOCTL) { - dPrintf(D_M_ASP, D_L_WARNING, - ("asp_wput: UNKNOWN message, type=%d\n", - gbuf_type(m))); - gbuf_freem(m); - return 0; - } - - mioc = m; - iocbp = (ioc_t *)gbuf_rptr(mioc); - - dPrintf(D_M_ASP_LOW, D_L_INFO, - ("asp_wput: %s, loc=%d, state=%s\n", - aspIOCStr(iocbp->ioc_cmd), scb->loc_addr.socket, - aspStateStr(scb->state))); - - switch (iocbp->ioc_cmd) { - case ASPIOC_CloseSession: - if ((scb->state == ASPSTATE_Close) || (scb->rem_addr.node == 0)) - break; - - Retry.retries = 3; - Retry.interval = 1; - aw.func = ASPFUNC_CloseSess; - aw.param1 = scb->sess_id; - aw.param2 = 0; - iocbp->ioc_private = (void *)scb; - scb->ioc_wait = (unsigned char)(iocbp->ioc_cmd & 0xff); - iocbp->ioc_cmd = AT_ATP_ISSUE_REQUEST; - asp_send_req(gref, mioc, &scb->rem_addr, &Retry, &aw, - 0, ASPSTATE_WaitingForCloseSessRsp, 0x01); - return 0; - - case ASPIOC_ClientBind: - /* - * open an ATP channel - */ - if ((err = atp_open(gref, 0)) != 0) { - asp_iocnak(gref, mioc, err); - return 0; - } - scb->atp_state = (atp_state_t *)gref->info; - scb->atp_state->pid = scb->pid; - /* - * bind to any available socket - */ - scb->dflag = 2; - sockSav = scb->dflag; - if ((sock_num = (at_socket)atp_bind(gref, 0, &sockSav)) == 0) { - scb->atp_state = (atp_state_t *)0; - atp_close(gref, 0); - gref->info = (void *)scb; - asp_iocnak(gref, mioc, EINVAL); - return 0; - } - gref->info = (void *)scb; - asp_dequeue_scb(scb); - scb->atp_state->dflag = scb->dflag; - scb->loc_addr.socket = sock_num; - asp_scbQ[sock_num] = scb; - asp_inpC[sock_num]++; - atp_pidM[sock_num] = 0; - break; - - case ASPIOC_ListenerBind: - /* - * open an ATP channel - */ - if ((err = atp_open(gref, 0)) != 0) { - asp_iocnak(gref, mioc, err); - return 0; - } - scb->atp_state = (atp_state_t *)gref->info; - scb->atp_state->pid = scb->pid; - /* - * bind to any available socket - */ - if ((sock_num = (at_socket)atp_bind(gref, 0, 0)) == 0) { - scb->atp_state = (atp_state_t *)0; - atp_close(gref, 0); - gref->info = (void *)scb; - asp_iocnak(gref, mioc, EINVAL); - return 0; - } - gref->info = (void *)scb; - asp_dequeue_scb(scb); - scb->atp_state->dflag = scb->dflag; - scb->loc_addr.socket = sock_num; - asp_scbQ[sock_num] = scb; - asp_inpC[sock_num]++; - if (gbuf_cont(mioc)) - *(at_inet_t *)gbuf_rptr(gbuf_cont(mioc)) = scb->loc_addr; - break; - - case ASPIOC_GetLocEntity: - if ((gbuf_cont(mioc) == 0) || (scb->atp_state == 0)) { - asp_iocnak(gref, mioc, EPROTOTYPE); - return 0; - } - *(at_inet_t *)gbuf_rptr(gbuf_cont(mioc)) = scb->loc_addr; - break; - - case ASPIOC_GetRemEntity: - if ((gbuf_cont(mioc) == 0) || (scb->atp_state == 0)) { - asp_iocnak(gref, mioc, EPROTOTYPE); - return 0; - } - *(at_inet_t *)gbuf_rptr(gbuf_cont(mioc)) = scb->rem_addr; - break; - - case ASPIOC_GetSession: - if ((mdata = gbuf_cont(mioc)) == 0) { - asp_iocnak(gref, mioc, EPROTOTYPE); - return 0; - } - addr = (at_inet_t *)gbuf_rptr(mdata); - scb->tickle_interval = (unsigned short)addr->node; - scb->session_timer = addr->net; - server_scb = asp_scbQ[addr->socket]; -/*### LD 10/28/97: changed to make sure we're not accessing a null server_scb */ - if (server_scb == 0) { - asp_iocnak(gref, mioc, EPROTOTYPE); - return 0; - } - if (server_scb->sess_ioc == 0) { - asp_iocnak(gref, mioc, EPROTOTYPE); - return 0; - } - - /* - * open an ATP channel - */ - if ((err = atp_open(gref, 0)) != 0) { - gref->info = (void *)scb; - asp_iocnak(gref, mioc, err); - return 0; - } - scb->atp_state = (atp_state_t *)gref->info; - scb->atp_state->pid = scb->pid; - /* - * bind to any available socket - */ - scb->dflag = 3; - sockSav = scb->dflag; - if ((sock_num = (at_socket)atp_bind(gref, 0, &sockSav)) == 0) { - atp_close(gref, 0); - asp_dequeue_scb(scb); - sock_num = sockSav; - scb->loc_addr.socket = sock_num; - for (curr_scb = asp_scbQ[sock_num]; - curr_scb->next_scb; curr_scb = curr_scb->next_scb) ; - scb->prev_scb = curr_scb; - curr_scb->next_scb = scb; - scb->atp_state = curr_scb->atp_state; - } else { - asp_dequeue_scb(scb); - scb->loc_addr.socket = sock_num; - asp_scbQ[sock_num] = scb; - scb->atp_state->dflag = scb->dflag; - } - gref->info = (void *)scb; - asp_inpC[sock_num]++; - gbuf_cont(mioc) = 0; - asp_accept(server_scb, scb, mdata); - break; - - case ASPIOC_GetStatus: - if ((mdata = gbuf_cont(mioc)) == 0) { - asp_iocnak(gref, mioc, EINVAL); - return 0; - } - gbuf_cont(mioc) = 0; - status_cmd = (asp_status_cmd_t *)gbuf_rptr(mdata); - aw.func = ASPFUNC_GetStatus; - aw.param1 = 0; - aw.param2 = 0; - scb->ioc_wait = (unsigned char)(iocbp->ioc_cmd & 0xff); - iocbp->ioc_cmd = AT_ATP_ISSUE_REQUEST_DEF; - /* bms: make sure this is an ALO request */ - asp_send_req(gref, mioc, &status_cmd->SLSEntityIdentifier, - &status_cmd->Retry, &aw, 0, ASPSTATE_WaitingForGetStatusRsp, 0xff); - gbuf_freeb(mdata); - return 0; - - case ASPIOC_OpenSession: - if ((mdata = gbuf_cont(mioc)) == 0) { - asp_iocnak(gref, mioc, EINVAL); - return 0; - } - gbuf_cont(mioc) = 0; - open_cmd = (asp_open_cmd_t *)gbuf_rptr(mdata); - scb->svc_addr = open_cmd->SLSEntityIdentifier; - scb->rem_addr = scb->svc_addr; - scb->rem_node = scb->rem_addr.node; - scb->rem_addr.node = 0; - scb->tickle_interval = open_cmd->TickleInterval; - scb->session_timer = open_cmd->SessionTimer; - aw.func = ASPFUNC_OpenSess; - aw.param1 = scb->loc_addr.socket; - aw.param2 = htons(ASP_Version); - scb->ioc_wait = (unsigned char)(iocbp->ioc_cmd & 0xff); - iocbp->ioc_cmd = AT_ATP_ISSUE_REQUEST_DEF; - asp_send_req(gref, mioc, &open_cmd->SLSEntityIdentifier, - &open_cmd->Retry, &aw, 1, ASPSTATE_WaitingForOpenSessRsp, 0x01); - gbuf_freeb(mdata); - return 0; - - case ASPIOC_StatusBlock: - /* - * save the server status block - */ - if (scb->stat_msg) - gbuf_freem(scb->stat_msg); - scb->stat_msg = gbuf_cont(mioc); - gbuf_cont(mioc) = 0; - break; - - /* *** Does scb->pid get used in a packet header, - and if so is it in ASP, or in ATP? - If not, do we need this call for anything? - (cap does currently use it in _ANS code.) - *** */ - case ASPIOC_SetPid: - if (gbuf_cont(mioc) == 0) { - asp_iocnak(gref, mioc, EINVAL); - return 0; - } - scb->pid = *(int *)gbuf_rptr(gbuf_cont(mioc)); - break; - - case ASPIOC_GetSessId: - if (gbuf_cont(mioc) == 0) { - asp_iocnak(gref, mioc, EINVAL); - return 0; - } - *(gref_t **)gbuf_rptr(gbuf_cont(mioc)) = gref; - break; - - case ASPIOC_Look: - if (gbuf_cont(mioc) == 0) { - asp_iocnak(gref, mioc, EINVAL); - return 0; - } - if (scb->sess_ioc) { - primitives = (union asp_primitives *)gbuf_rptr(scb->sess_ioc); - if (primitives->Primitive == ASPFUNC_CmdReply) - *(int *)gbuf_rptr(gbuf_cont(mioc)) = 0; - else - *(int *)gbuf_rptr(gbuf_cont(mioc)) = 1; - } else - *(int *)gbuf_rptr(gbuf_cont(mioc)) = -1; - break; - - case DDP_IOC_GET_CFG: - { - struct atp_state *atp = (struct atp_state *)gref->info; - if (atp->dflag) - atp = (struct atp_state *)atp->atp_msgq; - - if (gbuf_cont(mioc) == 0) { - asp_iocnak(gref, mioc, EINVAL); - return 0; - } - /* *** borrowed from ddp_proto.c to handle DDP_IOC_GET_CFG - on atp fd *** */ - scb->state = ASPSTATE_Idle; - { - /* *** was ddp_get_cfg() *** */ - ddp_addr_t *cfgp = - (ddp_addr_t *)gbuf_rptr(gbuf_cont(mioc)); - cfgp->inet.net = ifID_home->ifThisNode.s_net; - cfgp->inet.node = ifID_home->ifThisNode.s_node; - cfgp->inet.socket = atp->atp_socket_no; - cfgp->ddptype = DDP_ATP; - } - gbuf_wset(gbuf_cont(mioc), sizeof(at_inet_t)); - } - break; - - default: - asp_iocnak(gref, mioc, EINVAL); - return 0; - } - - asp_iocack(gref, mioc); - return 0; -} /* asp_wput */ - -/* - * send request routine - */ -StaticProc int -asp_send_req(gref, mioc, dest, retry, awp, xo, state, bitmap) - gref_t *gref; - gbuf_t *mioc; - at_inet_t *dest; - at_retry_t *retry; - asp_word_t *awp; - unsigned char xo; - unsigned char state; - unsigned char bitmap; -{ - int i; - gbuf_t *mdata; - ioc_t *iocbp; - struct atp_set_default *sd; - at_ddp_t *ddp; - at_atp_t *atp; - struct atpBDS *atpBDS; - asp_scb_t *scb = (asp_scb_t *)gref->info; - - /* - * allocate an ATP buffer for the request - */ - if ((gbuf_cont(mioc) = gbuf_alloc(aspCMDsize, PRI_MED)) == 0) { - if (awp->func == ASPFUNC_Tickle) - gbuf_freem(mioc); - else - asp_iocnak(gref, mioc, ENOBUFS); - dPrintf(D_M_ASP, D_L_WARNING, - ("asp_send_req: ENOBUFS, loc=%d\n", scb->loc_addr.socket)); - - return -1; - } - mdata = gbuf_cont(mioc); - iocbp = (ioc_t *)gbuf_rptr(mioc); - - /* - * build the request - */ - atpBDS = (struct atpBDS *)gbuf_rptr(mdata); - gbuf_wset(mdata,atpBDSsize); - for (i=0; i < ATP_TRESP_MAX; i++) { - *(unsigned long *)atpBDS[i].bdsBuffAddr = 1; - *(unsigned short *)atpBDS[i].bdsBuffSz = ATP_DATA_SIZE; - } - sd = (struct atp_set_default *)gbuf_wptr(mdata); - gbuf_winc(mdata,sizeof(struct atp_set_default)); - sd->def_retries = (retry->retries == -1) ? - ATP_INFINITE_RETRIES : retry->retries; - sd->def_rate = retry->interval*TICKS_PER_SEC; - sd->def_BDSlen = atpBDSsize; - ddp = (at_ddp_t *)gbuf_wptr(mdata); - NET_ASSIGN(ddp->src_net, scb->loc_addr.net); - ddp->src_node = scb->loc_addr.node; - NET_ASSIGN(ddp->dst_net, dest->net); - ddp->dst_node = dest->node; - ddp->dst_socket = dest->socket; - UAS_ASSIGN(ddp->checksum, 0); - atp = ATP_ATP_HDR(gbuf_wptr(mdata)); - atp->xo = xo; - atp->xo_relt = xo; - atp->bitmap = bitmap; - gbuf_winc(mdata,TOTAL_ATP_HDR_SIZE); - *(asp_word_t *)atp->user_bytes = *awp; - iocbp->ioc_count = gbuf_len(mdata); - iocbp->ioc_rval = 0; - - /* - * send the request - */ - scb->state = state; - dPrintf(D_M_ASP, D_L_INFO, - ("asp_send_req: %s, loc=%d, rem= %d, len=%d, state=%s\n", - aspCmdStr(awp->func), - scb->loc_addr.socket, ddp->dst_socket, iocbp->ioc_count, - aspStateStr(scb->state))); - - atp_send_req(gref, mioc); - return 0; -} - -/* - * send tickle routine - locked version - */ -StaticProc void -asp_send_tickle_locked(scb) - void *scb; -{ - atalk_lock(); - asp_send_tickle((asp_scb_t *)scb); - atalk_unlock(); -} - - -/* - * send tickle routine - */ -StaticProc void -asp_send_tickle(scb) - asp_scb_t *scb; -{ - gbuf_t *mioc; - at_retry_t retry; - asp_word_t aw; - at_inet_t *dest; - - - /* - * make sure the connection is still there - */ - if (scb->rem_addr.node == 0) { - return; - } - - if ((mioc = gbuf_alloc(sizeof(ioc_t), PRI_HI)) == 0) { - dPrintf(D_M_ASP, D_L_WARNING, - ("asp_send_tickle: ENOBUFS 0, loc=%d, rem=%d\n", - scb->loc_addr.socket,scb->rem_addr.socket)); - timeout(asp_send_tickle_locked, (void *)scb, 10); - return; - } - gbuf_wset(mioc,sizeof(ioc_t)); - gbuf_set_type(mioc, MSG_IOCTL); - - dest = scb->svc_addr.node ? - (at_inet_t *)&scb->svc_addr : (at_inet_t *)&scb->rem_addr; - retry.interval = scb->tickle_interval; - retry.retries = -1; - retry.backoff = 1; - aw.func = ASPFUNC_Tickle; - aw.param1 = scb->sess_id; - aw.param2 = 0; - ((ioc_t *)gbuf_rptr(mioc))->ioc_cr = (void *)scb; - ((ioc_t *)gbuf_rptr(mioc))->ioc_cmd = AT_ATP_ISSUE_REQUEST_TICKLE; - - if (asp_send_req(scb->gref, mioc, dest, &retry, &aw, 0, scb->state, 0)) { - dPrintf(D_M_ASP, D_L_WARNING, - ("asp_send_tickle: ENOBUFS 1, loc=%d, rem=%d\n", - scb->loc_addr.socket,scb->rem_addr.socket)); - - timeout(asp_send_tickle_locked, (void *)scb, 10); - return; - } -} - -/* - * accept connection routine - */ -StaticProc void -asp_accept(scb, sess_scb, m) - asp_scb_t *scb; - asp_scb_t *sess_scb; - gbuf_t *m; -{ - gbuf_t *mdata; - at_ddp_t *ddp; - at_atp_t *atp; - asp_word_t *awp; - at_inet_t rem_addr; - - mdata = scb->sess_ioc; - ddp = (at_ddp_t *)gbuf_rptr(mdata); - atp = (at_atp_t *)(gbuf_rptr(mdata) + DDP_X_HDR_SIZE); - rem_addr.net = NET_VALUE(ddp->src_net); - rem_addr.node = ddp->src_node; - rem_addr.socket = ddp->src_socket; - awp = (asp_word_t *)atp->user_bytes; - - sess_scb->loc_addr.net = NET_VALUE(ddp->dst_net); - sess_scb->loc_addr.node = ddp->dst_node; - NET_ASSIGN(ddp->src_net, sess_scb->loc_addr.net); - ddp->src_node = sess_scb->loc_addr.node; - NET_ASSIGN(ddp->dst_net, rem_addr.net); - ddp->dst_node = rem_addr.node; - ddp->dst_socket = rem_addr.socket; - - sess_scb->sess_id = sess_scb->loc_addr.socket; - sess_scb->rem_socket = rem_addr.socket; - sess_scb->rem_addr = rem_addr; - sess_scb->rem_addr.socket = awp->param1; - sess_scb->reply_socket = sess_scb->rem_addr.socket; - awp->func = sess_scb->loc_addr.socket; - awp->param1 = sess_scb->sess_id; - awp->param2 = 0; - gbuf_freeb(m); - scb->sess_ioc = gbuf_next(mdata); - gbuf_next(mdata) = 0; - asp_timout(asp_hangup, sess_scb, sess_scb->session_timer); - atp_send_rsp(scb->gref, mdata, TRUE); - asp_send_tickle(sess_scb); - dPrintf(D_M_ASP, D_L_INFO, - ("asp_accept: ACCEPT connect request, loc=%d, rem=%x.%x.%d\n", - sess_scb->loc_addr.socket, - sess_scb->rem_addr.net, - sess_scb->rem_addr.node,sess_scb->rem_addr.socket)); -} /* asp_accept */ - -/* - * timer routine - locked version - */ -void asp_clock_locked(arg) - void *arg; -{ - atalk_lock(); - asp_clock(arg); - atalk_unlock(); -} - -/* - * timer routine - */ -void asp_clock(arg) - void *arg; -{ - asp_scb_t *scb; - asp_tmo_func tmo_func; - - if (scb_tmo_list) - scb_tmo_list->tmo_delta--; - while (((scb = scb_tmo_list) != 0) && (scb_tmo_list->tmo_delta == 0)) { - if ((scb_tmo_list = scb->next_tmo) != 0) - scb_tmo_list->prev_tmo = 0; - if ((tmo_func = scb->tmo_func) != 0) { - scb->tmo_func = 0; - (*tmo_func)(scb); - } - } - - if (++scb_tmo_cnt == 0) scb_tmo_cnt++; - timeout(asp_clock_locked, (void *)arg, (1<ioc_cmd == AT_ATP_ISSUE_REQUEST_TICKLE) { - /* - * ignore the ack for the tickle request - */ - scb = (asp_scb_t *)iocbp->ioc_cr; - scb->tickle_tid = (unsigned short)iocbp->ioc_rval; - gbuf_freem(mioc); - return; - } - - scb = (asp_scb_t *)gref->info; - if (scb == 0) { - gbuf_freem(mioc); - return; - } - - if (iocbp->ioc_cmd == AT_ATP_GET_POLL) { - /* - * if no data, just drop the request - */ - if ((mdata = gbuf_cont(mioc)) == 0) { - gbuf_freeb(mioc); - return; - } - - gbuf_set_type(mioc, MSG_IOCTL); - ddp = (at_ddp_t *)gbuf_rptr(mdata); - gbuf_rinc(mdata,DDP_X_HDR_SIZE); - atp = (at_atp_t *)gbuf_rptr(mdata); - gbuf_rinc(mdata,ATP_HDR_SIZE); - rem_addr.net = NET_VALUE(ddp->src_net); - rem_addr.node = ddp->src_node; - rem_addr.socket = ddp->src_socket; - awp = (asp_word_t *)atp->user_bytes; - - if (scb->next_scb) { - /* - * find the responsible scb - */ - if ((scb = asp_find_scb(scb->loc_addr.socket, &rem_addr)) == 0) { - gbuf_freem(mioc); - return; - } - } - dPrintf(D_M_ASP, D_L_INFO, - ("asp_ack_reply: %s, loc=%d, rem=%x.%x.%d\n", - aspCmdStr(awp->func),scb->loc_addr.socket, - NET_VALUE(ddp->src_net) ,ddp->src_node,ddp->src_socket)); - - if (scb->rem_addr.node) - asp_untimout(asp_hangup, scb); - - switch (awp->func) { - case ASPFUNC_GetStatus: - /* - * ignore if this is not a server socket - */ - mx = 0; - if ((scb->dflag != 1) || (scb->stat_msg - && ((mx = gbuf_dupb(scb->stat_msg)) == 0))) - break; - gbuf_freeb(mioc); - - /* - * send the status block - */ - if (gbuf_cont(mdata)) { - gbuf_freem(gbuf_cont(mdata)); - gbuf_cont(mdata) = 0; - } - gbuf_rdec(mdata,TOTAL_ATP_HDR_SIZE); - if ((m = gbuf_alloc( (TOTAL_ATP_HDR_SIZE+atpBDSsize), PRI_MED)) == 0) { - gbuf_freem(mdata); - gbuf_freeb(mx); - goto l_done; - } - bcopy(gbuf_rptr(mdata), gbuf_rptr(m), TOTAL_ATP_HDR_SIZE); - gbuf_freeb(mdata); - mdata = m; - ddp = (at_ddp_t *)gbuf_rptr(mdata); - gbuf_wset(mdata,DDP_X_HDR_SIZE); - atp = (at_atp_t *)gbuf_wptr(mdata); - gbuf_winc(mdata,ATP_HDR_SIZE); - awp = (asp_word_t *)atp->user_bytes; - NET_NET(ddp->src_net, ddp->dst_net); - ddp->src_node = ddp->dst_node; - NET_ASSIGN(ddp->dst_net, rem_addr.net); - ddp->dst_node = rem_addr.node; - ddp->dst_socket = rem_addr.socket; - UAS_ASSIGN(ddp->checksum, 0); - atpBDS = (struct atpBDS *)gbuf_wptr(mdata); - msize = mx ? gbuf_msgsize(mx) : 0; - for (nbds=0; (nbds < ATP_TRESP_MAX) && (msize > 0); nbds++) { - len = msize < ATP_DATA_SIZE ? msize : ATP_DATA_SIZE; - msize -= ATP_DATA_SIZE; - *(long *)atpBDS[nbds].bdsUserData = 0; - UAL_ASSIGN(atpBDS[nbds].bdsBuffAddr, 1); - UAS_ASSIGN(atpBDS[nbds].bdsBuffSz, len); - } - UAS_ASSIGN(atpBDS[0].bdsDataSz, nbds); - gbuf_winc(mdata,atpBDSsize); - gbuf_cont(mdata) = mx; - atp_send_rsp(gref, mdata, FALSE); - goto l_done; - - case ASPFUNC_OpenSess: - /* - * ignore if server is not ready - */ - if ((scb->dflag != 1) || (scb->stat_msg == 0)) - break; - gbuf_freeb(mioc); - - if (gbuf_cont(mdata)) { - gbuf_freem(gbuf_cont(mdata)); - gbuf_cont(mdata) = 0; - } - gbuf_rdec(mdata,TOTAL_ATP_HDR_SIZE); - gbuf_wset(mdata,TOTAL_ATP_HDR_SIZE); - if (awp->param2 != ASP_Version) { - /* - * bad version number, send the OpenSession response - */ - awp->func = 0; - awp->param1 = 0; - awp->param2 = htons((unsigned short)ASPERR_BadVersNum); - dPrintf(D_M_ASP, D_L_INFO, - (" : version=%d\n", - ASPERR_BadVersNum)); - - NET_NET(ddp->src_net, ddp->dst_net); - ddp->src_node = ddp->dst_node; - NET_ASSIGN(ddp->dst_net, rem_addr.net); - ddp->dst_node = rem_addr.node; - ddp->dst_socket = rem_addr.socket; - atp_send_rsp(gref, mdata, FALSE); - return; - } - - /* - * queue the connection request - */ - gbuf_next(mdata) = 0; - if ((m = scb->sess_ioc) == 0) { - scb->sess_ioc = mdata; - if (scb->get_wait) - wakeup(&scb->event); - else - atalk_notify_sel(gref); - } else { - while (gbuf_next(m)) - m = gbuf_next(m); - gbuf_next(m) = mdata; - } - dPrintf(D_M_ASP, D_L_INFO, - (" : QUEUE connect request\n")); - - return; - - case ASPFUNC_Command: - case ASPFUNC_Write: - if ( (scb->sess_id != awp->param1) - || (scb->rcv_seq_num != ntohs(awp->param2)) - || BAD_REMADDR(rem_addr) ) { - char era[8], ra[8]; - snprintf(era, sizeof(era), "%d.%d", scb->rem_addr.node,scb->rem_addr.socket); - snprintf(ra, sizeof(ra), "%d.%d", rem_addr.node,rem_addr.socket); - dPrintf(D_M_ASP, D_L_WARNING, - (" : DROP, id=%d,esn=%d,sn=%d,erem=%s,rem=%s\n", - scb->sess_id,scb->rcv_seq_num,awp->param2,era,ra)); - gbuf_cont(mioc) = 0; - gbuf_rdec(mdata,TOTAL_ATP_HDR_SIZE); - atp_drop_req(gref, mdata); - break; - } - scb->reply_socket = rem_addr.socket; - if (awp->func == ASPFUNC_Write) - scb->wrt_seq_num = scb->rcv_seq_num; - NEXT_SEQ_NUM(scb->rcv_seq_num); - gbuf_set_type(mioc, MSG_PROTO); - gbuf_wset(mioc,sizeof(asp_command_ind_t)); - command_ind = (asp_command_ind_t *)gbuf_rptr(mioc); - command_ind->Primitive = (int)awp->func; - command_ind->ReqRefNum = - ntohs(*(unsigned short *)atp->tid); - command_ind->ReqType = awp->func; - - mdata = gbuf_strip(mdata); - gbuf_cont(mioc) = mdata; - if (scb->req_flag) { - if ((mx = scb->req_msgq) != 0) { - while (gbuf_next(mx)) - mx = gbuf_next(mx); - gbuf_next(mx) = mioc; - } else - scb->req_msgq = mioc; - } else { - scb->req_flag = 1; - asp_putnext(scb->gref, mioc); - } - goto l_done; - - case ASPFUNC_WriteContinue: - if ( (scb->sess_id != awp->param1) - || (scb->snd_seq_num != awp->param2) - || BAD_REMADDR(rem_addr) ) { - break; - } - scb->reply_socket = rem_addr.socket; - gbuf_set_type(mioc, MSG_PROTO); - gbuf_wset(mioc,sizeof(asp_command_ind_t)); - command_ind = (asp_command_ind_t *)gbuf_rptr(mioc); - command_ind->Primitive = (int)awp->func; - command_ind->ReqRefNum = - ntohs(*(unsigned short *)atp->tid); - command_ind->ReqType = awp->func; - - mdata = gbuf_strip(mdata); - gbuf_cont(mioc) = mdata; - asp_putnext(scb->gref, mioc); - goto l_done; - - case ASPFUNC_Tickle: - if (scb->stat_msg) { - sess_scb = asp_scbQ[awp->param1]; - if (sess_scb && sess_scb->next_scb) - sess_scb = asp_find_scb( - sess_scb->loc_addr.socket, &rem_addr); - if (sess_scb) { - if (sess_scb->rem_addr.node) - asp_untimout(asp_hangup, sess_scb); - if (sess_scb->rem_addr.node) - asp_timout(asp_hangup, sess_scb, sess_scb->session_timer); - } - } - dPrintf(D_M_ASP, D_L_INFO, - (" : Tickle, %d -> %d, id=%d\n", - ddp->src_socket,ddp->dst_socket,awp->param1)); - break; - - case ASPFUNC_CloseSess: - if ( (scb->sess_id != awp->param1) - || (scb->state == ASPSTATE_Close) - || (scb->state == ASPSTATE_WaitingForCloseSessRsp) - || (scb->rem_addr.net != rem_addr.net) - || (scb->rem_addr.node != rem_addr.node) ) { - dPrintf(D_M_ASP, D_L_INFO, - (" : CLOSE retry, loc=%d, rem=%x.%x.%d\n", - scb->loc_addr.socket, - scb->rem_addr.net, - scb->rem_addr.node, - scb->rem_addr.socket)); - - break; - } - gbuf_freeb(mioc); - - /* - * build the CloseSess response to be sent to peer - * when the session is closed by the user. - */ - if (gbuf_cont(mdata)) { - gbuf_freem(gbuf_cont(mdata)); - gbuf_cont(mdata) = 0; - } - gbuf_rdec(mdata,TOTAL_ATP_HDR_SIZE); - gbuf_wset(mdata,TOTAL_ATP_HDR_SIZE); - NET_NET(ddp->src_net, ddp->dst_net); - ddp->src_node = ddp->dst_node; - NET_ASSIGN(ddp->dst_net, rem_addr.net); - ddp->dst_node = rem_addr.node; - ddp->dst_socket = rem_addr.socket; - awp->func = 0; - awp->param1 = 0; - awp->param2 = 0; - dPrintf(D_M_ASP,D_L_INFO, - (" : CLOSE, loc=%d, rem=%x.%x.%d\n", - scb->loc_addr.socket, - scb->rem_addr.net, - scb->rem_addr.node, - scb->rem_addr.socket)); - - gbuf_next(mdata) = 0; - if (scb->sess_ioc) - gbuf_freel(scb->sess_ioc); - scb->sess_ioc = mdata; - scb->state = ASPSTATE_Close; - - /* - * notify upstream of the CloseSess from peer - */ - asp_hangup(scb); - return; - - case ASPFUNC_Attention: - if ( (scb->sess_id != awp->param1) - || (scb->rem_addr.net != rem_addr.net) - || (scb->rem_addr.node != rem_addr.node) ) { - break; - } - gbuf_set_type(mioc, MSG_PROTO); - gbuf_wset(mioc,sizeof(asp_command_ind_t)); - command_ind = (asp_command_ind_t *)gbuf_rptr(mioc); - command_ind->Primitive = (int)awp->func; - command_ind->ReqRefNum = - ntohs(*(unsigned short *)atp->tid); - command_ind->ReqType = awp->func; - scb->attn_tid = *(unsigned short *)atp->tid; - scb->attn_flag = 1; - gbuf_rdec(mdata,2); /* attention code */ - - mdata = gbuf_strip(mdata); - gbuf_cont(mioc) = mdata; - asp_putnext(scb->gref, mioc); - goto l_done; - - default: - dPrintf(D_M_ASP, D_L_WARNING, - (" : UNKNOWN func, func=%d\n", - awp->func)); - - break; - } - } - - else if (iocbp->ioc_cmd == AT_ATP_REQUEST_COMPLETE) { - if (scb->next_scb) { - /* - * find the responsible scb - */ - scb = (asp_scb_t *)iocbp->ioc_private; - if ((scb == 0) || (scb->magic_num != 222)) { - dPrintf(D_M_ASP, D_L_ERROR, - ("asp_ack_reply: CAN'T find scb 1\n")); - gbuf_freem(mioc); - return; - } - } - dPrintf(D_M_ASP, D_L_INFO, - ("asp_ack_reply: RSP, loc=%d, rem=%x.%x.%d, state=%s\n", - scb->loc_addr.socket, - scb->rem_addr.net, - scb->rem_addr.node, - scb->rem_addr.socket, - aspStateStr(scb->state))); - - switch (scb->state) { - case ASPSTATE_Close: - case ASPSTATE_Idle: - scb->rem_addr.node = 0; - gbuf_freem(mioc); - if (scb->get_wait) - wakeup(&scb->event); - else - atalk_notify_sel(gref); - return; - - case ASPSTATE_WaitingForGetStatusRsp: - scb->ioc_wait = 0; - scb->state = ASPSTATE_Idle; - mx = gbuf_cont(mioc); - gbuf_cont(mioc) = 0; - mdata = gbuf_cont(mx); - gbuf_cont(mx) = 0; - iocbp->ioc_cmd = ASPIOC_GetStatus; - iocbp->ioc_count = 0; - iocbp->ioc_rval = mdata ? gbuf_msgsize(mdata) : 0; - gbuf_freeb(mx); - atalk_putnext(gref, mioc); - atalk_putnext(gref, mdata); - return; - - case ASPSTATE_WaitingForOpenSessRsp: - scb->ioc_wait = 0; - scb->state = ASPSTATE_Idle; - mx = gbuf_cont(mioc); - gbuf_cont(mioc) = 0; - if (gbuf_cont(mx)) { - gbuf_freem(gbuf_cont(mx)); - gbuf_cont(mx) = 0; - } - iocbp->ioc_cmd = ASPIOC_OpenSession; - iocbp->ioc_rval = 0; - iocbp->ioc_count = 0; - atpBDS = (struct atpBDS *)gbuf_rptr(mx); - awp = (asp_word_t *)atpBDS->bdsUserData; - if (awp->param2) { - gbuf_freeb(mx); - asp_iocnak(gref, mioc, ECONNREFUSED); - } else { - scb->rem_addr.node = scb->rem_node; - scb->rem_addr.socket = awp->func; - /* bms: need to set the reply_socket for client side too. - This makes ALO atten replies sent by the client work. */ - scb->reply_socket = scb->rem_addr.socket; - scb->sess_id = awp->param1; - gbuf_freeb(mx); - atalk_putnext(gref, mioc); - asp_timout(asp_hangup, scb, scb->session_timer); - asp_send_tickle(scb); - dPrintf(D_M_ASP, D_L_INFO, - ("asp_ack_reply: CONNECT, loc=%d, rem=%x.%x.%d\n", - scb->loc_addr.socket, - scb->rem_addr.net, - scb->rem_addr.node, - scb->rem_addr.socket)); - } - return; - - case ASPSTATE_WaitingForCommandRsp: - case ASPSTATE_WaitingForWriteRsp: - case ASPSTATE_WaitingForWriteContinueRsp: - if (scb->rem_addr.node) - asp_untimout(asp_hangup, scb); - NEXT_SEQ_NUM(scb->snd_seq_num); - scb->state = ASPSTATE_Idle; - gbuf_set_type(mioc, MSG_PROTO); - mx = gbuf_cont(mioc); - mdata = gbuf_cont(mx); - gbuf_cont(mioc) = mdata; - atpBDS = (struct atpBDS *)gbuf_rptr(mx); - cmdreply_ind = (asp_cmdreply_ind_t *)gbuf_rptr(mioc); - cmdreply_ind->Primitive = ASPFUNC_CmdReply; - cmdreply_ind->CmdResult = ntohl(*(int *)atpBDS->bdsUserData); - gbuf_wset(mioc,sizeof(asp_cmdreply_ind_t)); - gbuf_freeb(mx); - asp_putnext(scb->gref, mioc); - goto l_done; - - case ASPSTATE_WaitingForCloseSessRsp: - scb->ioc_wait = 0; - scb->state = ASPSTATE_Close; - scb->rem_addr.node = 0; - iocbp->ioc_cmd = ASPIOC_CloseSession; - iocbp->ioc_rval = 0; - if (gbuf_cont(mioc)) { - gbuf_freem(gbuf_cont(mioc)); - gbuf_cont(mioc) = 0; - } - atalk_putnext(scb->gref, mioc); - atp_cancel_req(scb->gref, (unsigned int)scb->tickle_tid); - scb->tickle_tid = 0; - return; - - default: - dPrintf(D_M_ASP, D_L_WARNING, - (" : UNKNOWN state, state=%s\n", - aspStateStr(scb->state))); - break; - } - } - - else { - if (scb->next_scb) { - /* - * find the responsible scb - */ - scb = (asp_scb_t *)iocbp->ioc_cr; - if ((scb == 0) || (scb->magic_num != 222)) { - dPrintf(D_M_ASP, D_L_ERROR, - ("asp_ack_reply: CAN'T find scb 2\n")); - gbuf_freem(mioc); - return; - } - } - - switch (scb->state) { - case ASPSTATE_Close: - scb->rem_addr.node = 0; - break; - } - } - - if (mioc != 0) - gbuf_freem(mioc); - -l_done: - if (scb->rem_addr.node) - asp_timout(asp_hangup, scb, scb->session_timer); -} /* asp_ack_reply */ - -/* - * NAK reply routine - */ -void -asp_nak_reply(gref, mioc) - register gref_t *gref; - register gbuf_t *mioc; -{ - register asp_scb_t *scb; - register ioc_t *iocbp; - - iocbp = (ioc_t *)gbuf_rptr(mioc); - - if (iocbp->ioc_cmd == AT_ATP_ISSUE_REQUEST_TICKLE) { - /* - * no tickle, close session - */ - scb = (asp_scb_t *)iocbp->ioc_cr; - gbuf_freem(mioc); - asp_hangup(scb); - dPrintf(D_M_ASP, D_L_WARNING, - ("tickle_nak: loc=%d, rem=%x.%x.%d, state=%s\n", - scb->loc_addr.socket, - scb->rem_addr.net, - scb->rem_addr.node, - scb->rem_addr.socket, - aspStateStr(scb->state))); - - return; - } - - scb = (asp_scb_t *)gref->info; - if (scb == 0) { - gbuf_freem(mioc); - return; - } - - if (iocbp->ioc_cmd == AT_ATP_REQUEST_COMPLETE) { - if (scb->next_scb) { - /* - * find the responsible scb - */ - scb = (asp_scb_t *)iocbp->ioc_private; - if ((scb == 0) || (scb->magic_num != 222)) { - dPrintf(D_M_ASP, D_L_ERROR, - ("asp_nak_reply: CAN'T find scb 1\n")); - gbuf_freem(mioc); - return; - } - } - dPrintf(D_M_ASP, D_L_WARNING, - ("asp_nak_reply: RSP, loc=%d, rem=%x.%x.%d, state=%s\n", - scb->loc_addr.socket, - scb->rem_addr.net, - scb->rem_addr.node, - scb->rem_addr.socket, - aspStateStr(scb->state))); - - switch (scb->state) { - case ASPSTATE_WaitingForGetStatusRsp: - iocbp->ioc_cmd = ASPIOC_GetStatus; - break; - - case ASPSTATE_WaitingForOpenSessRsp: - iocbp->ioc_cmd = ASPIOC_OpenSession; - break; - - case ASPSTATE_WaitingForCommandRsp: - case ASPSTATE_WaitingForWriteRsp: - case ASPSTATE_WaitingForWriteContinueRsp: - scb->state = ASPSTATE_Idle; - - /* last remaining use of MSG_ERROR */ - gbuf_set_type(mioc, MSG_ERROR); - *gbuf_rptr(mioc) = (u_char)EPROTOTYPE; - gbuf_wset(mioc, 1); - if (gbuf_cont(mioc)) { - gbuf_freem(gbuf_cont(mioc)); - gbuf_cont(mioc) = 0; - } - - asp_putnext(scb->gref, mioc); - return; - - case ASPSTATE_WaitingForCloseSessRsp: - scb->state = ASPSTATE_Close; - /* fall through */ - case ASPSTATE_Close: /* new for PR-2296832 */ - scb->rem_addr.node = 0; - iocbp->ioc_cmd = ASPIOC_CloseSession; - iocbp->ioc_rval = 0; - if (gbuf_cont(mioc)) { - gbuf_freem(gbuf_cont(mioc)); - gbuf_cont(mioc) = 0; - } - gbuf_set_type(mioc, MSG_IOCACK); - atalk_putnext(scb->gref, mioc); - return; - - default: - gbuf_freem(mioc); - return; - } - scb->state = ASPSTATE_Idle; - atalk_putnext(gref, mioc); - } - - else { - if (scb->next_scb) { - /* - * find the responsible scb - */ - scb = (asp_scb_t *)iocbp->ioc_cr; - if ((scb == 0) || (scb->magic_num != 222)) { - dPrintf(D_M_ASP, D_L_ERROR, - ("asp_nak_reply: CAN'T find scb 2\n")); - gbuf_freem(mioc); - return; - } - } - - switch (scb->state) { - case ASPSTATE_Close: - scb->rem_addr.node = 0; - break; - } - - gbuf_freem(mioc); - } -} /* asp_nak_reply */ - -/* - * delete scb from the use list - */ -StaticProc void -asp_dequeue_scb(scb) - asp_scb_t *scb; -{ - - if (scb == scb_used_list) { - if ((scb_used_list = scb->next_scb) != 0) - scb->next_scb->prev_scb = 0; - } else { - if ((scb->prev_scb->next_scb = scb->next_scb) != 0) - scb->next_scb->prev_scb = scb->prev_scb; - } - - scb->next_scb = 0; - scb->prev_scb = 0; -} - -/* - * find scb routine - */ -StaticProc asp_scb_t * -asp_find_scb(sock_num, rem_addr) - unsigned char sock_num; - at_inet_t *rem_addr; -{ - asp_scb_t *scb; - asp_scb_t *alt_scb = 0; - - for (scb = asp_scbQ[sock_num]; scb; scb = scb->next_scb) { - if ((scb->rem_addr.net == rem_addr->net) - && (scb->rem_addr.node == rem_addr->node)) { - if ((scb->rem_addr.socket == rem_addr->socket) - || (scb->rem_socket == rem_addr->socket)) - break; - else if (alt_scb == 0) - alt_scb = scb; - } - } - - if ((scb == 0) && ((scb = alt_scb) == 0)) { - dPrintf(D_M_ASP, D_L_ERROR, - ("asp_find_scb: CAN'T find scb, loc=%d, rem=%x.%x.%d\n", - sock_num, - rem_addr->net, - rem_addr->node, - rem_addr->socket)); - } - - return scb; -} - -/* - * timout routine - */ -StaticProc void -asp_timout(func, scb, seconds) - asp_tmo_func func; - register asp_scb_t *scb; - int seconds; -{ - unsigned char sum; - register asp_scb_t *curr_scb, *prev_scb; - - if (scb->tmo_func) - return; - - scb->tmo_func = func; - scb->tmo_delta = (seconds>>SESS_TMO_RES); - scb->tmo_cnt = scb_tmo_cnt; - - if (scb_tmo_list == 0) { - scb->next_tmo = scb->prev_tmo = 0; - scb_tmo_list = scb; - return; - } - - prev_scb = 0; - curr_scb = scb_tmo_list; - sum = 0; - - while (1) { - sum += curr_scb->tmo_delta; - if (sum > scb->tmo_delta) { - sum -= curr_scb->tmo_delta; - scb->tmo_delta -= sum; - curr_scb->tmo_delta -= scb->tmo_delta; - break; - } - prev_scb = curr_scb; - if ((curr_scb = curr_scb->next_tmo) == 0) { - scb->tmo_delta -= sum; - break; - } - } - - if (prev_scb) { - scb->prev_tmo = prev_scb; - if ((scb->next_tmo = prev_scb->next_tmo) != 0) - prev_scb->next_tmo->prev_tmo = scb; - prev_scb->next_tmo = scb; - } else { - scb->prev_tmo = 0; - scb->next_tmo = scb_tmo_list; - scb_tmo_list->prev_tmo = scb; - scb_tmo_list = scb; - } -} - -/* - * untimout routine - */ -StaticProc void -asp_untimout( - __unused asp_tmo_func tmo_func, - register asp_scb_t *scb) -{ - - if ((scb->tmo_cnt == scb_tmo_cnt) || (scb->tmo_func == 0)) - return; - - if (scb_tmo_list == scb) { - if ((scb_tmo_list = scb->next_tmo) != 0) { - scb_tmo_list->prev_tmo = 0; - scb->next_tmo->tmo_delta += scb->tmo_delta; - } - } else if (scb->prev_tmo) { - if ((scb->prev_tmo->next_tmo = scb->next_tmo) != 0) { - scb->next_tmo->prev_tmo = scb->prev_tmo; - scb->next_tmo->tmo_delta += scb->tmo_delta; - } - scb->prev_tmo = 0; - } - scb->tmo_func = 0; -} - -/* - * hangup routine - */ -StaticProc void -asp_hangup(scb) - asp_scb_t *scb; -{ - /* - * set the state to Close - */ - scb->state = ASPSTATE_Close; - if (scb->tickle_tid) { - atp_cancel_req(scb->gref, (unsigned int)scb->tickle_tid); - scb->tickle_tid = 0; - } - - /* - * notify upstream of the hangup - */ - if (scb->rem_addr.node) { - if (scb->get_wait) - wakeup(&scb->event); - else - atalk_notify_sel(scb->gref); - } -} - -StaticProc void -asp_iocack(gref, mioc) - gref_t *gref; - gbuf_t *mioc; -{ - if (gbuf_cont(mioc)) - ((ioc_t *)gbuf_rptr(mioc))->ioc_count = gbuf_msgsize(gbuf_cont(mioc)); - else - ((ioc_t *)gbuf_rptr(mioc))->ioc_count = 0; - - gbuf_set_type(mioc, MSG_IOCACK); - atalk_putnext(gref, mioc); -} - -StaticProc void -asp_iocnak(gref, mioc, err) - gref_t *gref; - gbuf_t *mioc; - int err; -{ - ((ioc_t *)gbuf_rptr(mioc))->ioc_count = 0; - if (err == 0) - err = ENXIO; - ((ioc_t *)gbuf_rptr(mioc))->ioc_error = err; - ((ioc_t *)gbuf_rptr(mioc))->ioc_rval = -1; - if (gbuf_cont(mioc)) { - gbuf_freem(gbuf_cont(mioc)); - gbuf_cont(mioc) = 0; - } - - gbuf_set_type(mioc, MSG_IOCNAK); - atalk_putnext(gref, mioc); -} - -/* - * the alloc scb routine - */ -StaticProc asp_scb_t * -asp_scb_alloc() -{ - int i; - gbuf_t *m; - asp_scb_t *scb, *scb_array; - - if (scb_free_list == 0) { - if ((m = gbuf_alloc(SCBS_PER_BLK*sizeof(asp_scb_t), PRI_MED)) == 0) - return (asp_scb_t *)0; - bzero((char *)gbuf_rptr(m), SCBS_PER_BLK*sizeof(asp_scb_t)); - gbuf_cont(m) = scb_resource_m; - scb_resource_m = m; - scb_array = (asp_scb_t *)gbuf_rptr(m); - for (i=0; i < SCBS_PER_BLK-1; i++) - scb_array[i].next_scb = (asp_scb_t *)&scb_array[i+1]; - scb_array[i].next_scb = 0; - scb_free_list = (asp_scb_t *)&scb_array[0]; - } - - scb = scb_free_list; - scb_free_list = scb->next_scb; - ATEVENTINIT(scb->event); - ATEVENTINIT(scb->delay_event); - - return scb; -} - -/* - * the free scb routine - */ -StaticProc void -asp_scb_free(scb) - asp_scb_t *scb; -{ - - bzero((char *)scb, sizeof(asp_scb_t)); - scb->next_scb = scb_free_list; - scb_free_list = scb; -} - -/* - * routine to pass up receive data - */ -StaticProc void -asp_putnext(gref, mproto) - gref_t *gref; - gbuf_t *mproto; -{ - gbuf_t *m; - asp_scb_t *scb; - - scb = (asp_scb_t *)gref->info; - - /* - * queue the message. - */ - gbuf_next(mproto) = 0; - if ((m = scb->sess_ioc) == 0) - scb->sess_ioc = mproto; - else { - while (gbuf_next(m)) - m = gbuf_next(m); - gbuf_next(m) = mproto; - } - scb->rcv_cnt++; - if (scb->rcv_cnt >= MAX_RCV_CNT) - scb->snd_stop = 1; - - if (scb->get_wait) - wakeup(&scb->event); - else if (mproto == scb->sess_ioc) - atalk_notify_sel(gref); - -} /* asp_putnext */ - -/* - * The following two routines are direct entries from system - * calls to allow fast sending and recving of ASP data. - */ - -/* in ASPputmsg we expect: - - ASPFUNC_CmdReply - ASPFUNC_Attention - ASPFUNC_Command - ASPFUNC_Write - ASPFUNC_WriteContinue - - bms: Make this callable from the kernel. - If mreq != NULL, then must be called from kernel space and the following apply: - 1) *mreq is data to be sent already in mbuf chains. - 2) datptr->len = size of data -*/ - -int ASPputmsg(gref_t *gref, strbuf_t *ctlptr, strbuf_t *datptr, gbuf_t *mreq, __unused int flags, int *errp) -{ - int i, err, len, offset, remain, size, copy_len; - gbuf_t *mioc, *mdata, *mx; - ioc_t *iocbp; - strbuf_t ctlbuf; - strbuf_t datbuf; - asp_scb_t *scb; - int nbds, result, msize, Primitive; - unsigned char *wptr; - struct atp_set_default *sd; - at_ddp_t *ddp; - at_atp_t *atp; - struct atpBDS *atpBDS; - asp_word_t *awp; - union asp_primitives *primitives; - unsigned short tid; - caddr_t dataptr; - - if ((scb = (asp_scb_t *)gref->info) == 0) { - dPrintf(D_M_ASP, D_L_ERROR, - ("ASPputmsg: stale handle=0x%x, pid=%d\n", - (u_int) gref, gref->pid)); - - *errp = EINVAL; - return -1; - } - - if (scb->state == ASPSTATE_Close) - return 0; - if (scb->snd_stop) { - *errp = EAGAIN; - return -1; - } - - /* - * copy in the control and data info - */ - if (mreq != NULL) { - /* being called from kernel space */ - bcopy (ctlptr, &ctlbuf, sizeof (strbuf_t)); - bcopy (datptr, &datbuf, sizeof (strbuf_t)); - } else { - /* being called from user space */ - if ((err = copyin(CAST_USER_ADDR_T(ctlptr), (caddr_t)&ctlbuf, sizeof(ctlbuf))) != 0) - goto l_err; - if ((err = copyin(CAST_USER_ADDR_T(datptr), (caddr_t)&datbuf, sizeof(datbuf))) != 0) - goto l_err; - } - - /* Radar 5398072: check for bogus length - * Max ASP data is 8 ATP packets - */ - - if ((ctlbuf.len < 0) || (ctlbuf.len > (ATP_DATA_SIZE * 8))) { - err = EINVAL; - goto l_err; - } - if ((datbuf.len < 0) || (datbuf.len > (ATP_DATA_SIZE * 8))) { - err = EINVAL; - goto l_err; - } - - /* - * allocate buffer and copy in the control content - */ - if (!(mioc = gbuf_alloc_wait(ctlbuf.len, TRUE))) { - /* error return should not be possible */ - err = ENOBUFS; - goto l_err; - } - gbuf_set_type(mioc, MSG_IOCTL); /* for later, in ATP */ - gbuf_wset(mioc, ctlbuf.len); - - if (mreq != NULL) { - /* being called from kernel space */ - bcopy (ctlbuf.buf, gbuf_rptr(mioc), ctlbuf.len); - } else { - /* being called from user space */ - if ((err = copyin(CAST_USER_ADDR_T(ctlbuf.buf), (caddr_t)gbuf_rptr(mioc), ctlbuf.len)) != 0) { - gbuf_freem(mioc); - goto l_err; - } - } - - iocbp = (ioc_t *)gbuf_rptr(mioc); - primitives = (union asp_primitives *)gbuf_rptr(mioc); - Primitive = primitives->Primitive; - dPrintf(D_M_ASP, D_L_INFO, - ("ASPputmsg: %s\n", aspCmdStr(Primitive))); - - /* - * copy in the data content into multiple mbuf clusters if - * required. ATP now expects reply data to be placed in - * standard clusters, not the large external clusters that - * were used previously. - */ - - /* set offset for use by some commands */ - offset = (Primitive == ASPFUNC_CmdReply) ? 0 : aspCMDsize; - size = 0; - if (mreq != NULL) { - /* The data from the in-kernel call for use by AFP is passed - * in as one large external cluster. This needs to be copied - * to a chain of standard clusters. - */ - remain = gbuf_len(mreq); - dataptr = mtod(mreq, caddr_t); - } else { - /* copyin from user space */ - remain = datbuf.len; - dataptr = (caddr_t)datbuf.buf; - } - - /* allocate first buffer */ - if (!(mdata = gbuf_alloc_wait((remain + offset > MCLBYTES ? MCLBYTES : remain + offset), TRUE))) { - /* error return should not be possible */ - err = ENOBUFS; - gbuf_freem(mioc); - goto l_err; - } - gbuf_wset(mdata, 0); /* init length to zero */ - gbuf_cont(mioc) = mdata; - - while (remain) { - if (remain + offset > MCLBYTES) - copy_len = MCLBYTES - offset; - else - copy_len = remain; - remain -= copy_len; - if (mreq != NULL) - bcopy (dataptr, (gbuf_rptr(mdata) + offset), copy_len); - else if ((err = copyin(CAST_USER_ADDR_T(dataptr), (caddr_t)(gbuf_rptr(mdata) + offset), copy_len)) != 0) { - gbuf_freem(mioc); - goto l_err; - } - gbuf_wset(mdata, (copy_len + offset)); - size += copy_len + offset; - dataptr += copy_len; - offset = 0; - if (remain) { - /* allocate the next mbuf */ - if ((gbuf_cont(mdata) = m_get((M_WAIT), MSG_DATA)) == 0) { - err = ENOBUFS; - gbuf_freem(mioc); - goto l_err; - } - mdata = gbuf_cont(mdata); - MCLGET(mdata, M_WAIT); - if (!(mdata->m_flags & M_EXT)) { - err = ENOBUFS; - gbuf_freem(mioc); - goto l_err; - } - } - } - mdata = gbuf_cont(mioc); /* code further on down expects this to b e set */ - mdata->m_pkthdr.len = size; /* set packet hdr len */ - - if (mreq != 0) - gbuf_freem(mreq); - - switch (Primitive) { - - case ASPFUNC_Command: - case ASPFUNC_Write: - case ASPFUNC_WriteContinue: - case ASPFUNC_Attention: - /* - * build the command/write/write_continue request - */ - wptr = (unsigned char *)gbuf_rptr(mdata); - atpBDS = (struct atpBDS *)wptr; - wptr += atpBDSsize; - for (i=0; i < ATP_TRESP_MAX; i++) { - *(unsigned long *)atpBDS[i].bdsBuffAddr = 1; - *(unsigned short *)atpBDS[i].bdsBuffSz = ATP_DATA_SIZE; - } - sd = (struct atp_set_default *)wptr; - wptr += sizeof(struct atp_set_default); - sd->def_retries = (scb->cmd_retry.retries == -1) ? - ATP_INFINITE_RETRIES : scb->cmd_retry.retries; - sd->def_rate = scb->cmd_retry.interval*TICKS_PER_SEC; - sd->def_BDSlen = atpBDSsize; - ddp = (at_ddp_t *)wptr; - NET_ASSIGN(ddp->src_net, scb->loc_addr.net); - ddp->src_node = scb->loc_addr.node; - NET_ASSIGN(ddp->dst_net, scb->rem_addr.net); - ddp->dst_node = scb->rem_addr.node; - ddp->dst_socket = scb->rem_addr.socket; - UAS_ASSIGN(ddp->checksum, 0); - atp = ATP_ATP_HDR(wptr); - wptr += TOTAL_ATP_HDR_SIZE; - atp->xo = 1; - atp->xo_relt = 1; - atp->bitmap = 0xff; - awp = (asp_word_t *)atp->user_bytes; - awp->func = (unsigned char)Primitive; - awp->param1 = scb->sess_id; - awp->param2 = htons(scb->snd_seq_num); - iocbp->ioc_private = (void *)scb; - iocbp->ioc_count = gbuf_len(mdata); - iocbp->ioc_rval = 0; - iocbp->ioc_cmd = AT_ATP_ISSUE_REQUEST_DEF; - - /* - * send the command/write/write_continue/attention request - */ - switch (awp->func) { - case ASPFUNC_Command: - scb->state = ASPSTATE_WaitingForCommandRsp; - break; - case ASPFUNC_Write: - scb->state = ASPSTATE_WaitingForWriteRsp; - break; - case ASPFUNC_WriteContinue: - scb->state = ASPSTATE_WaitingForWriteContinueRsp; - awp->param2 = htons(scb->wrt_seq_num); - break; - case ASPFUNC_Attention: - scb->state = ASPSTATE_WaitingForCommandRsp; - atp->xo = 0; - atp->xo_relt = 0; - atp->bitmap = 0x01; - gbuf_wdec(mdata,2); - awp->param2 = htons(*(unsigned short *)gbuf_wptr(mdata)); - break; - } - dPrintf(D_M_ASP,D_L_INFO, - ("ASPputmsg: %s, loc=%d, rem=%x.%x.%d\n", - (awp->func == ASPFUNC_Command ? "CommandReq" : - awp->func == ASPFUNC_Write ? "WriteReq" : - awp->func == ASPFUNC_WriteContinue ? "WriteContinue" : - "AttentionReq"),scb->loc_addr.socket, - NET_VALUE(ddp->dst_net),ddp->dst_node,ddp->dst_socket)); - atp_send_req(gref, mioc); - return 0; - - case ASPFUNC_CmdReply: - - if (scb->req_msgq) { - mx = scb->req_msgq; - scb->req_msgq = gbuf_next(mx); - gbuf_next(mx) = 0; - asp_putnext(scb->gref, mx); - } else - scb->req_flag = 0; - - result = primitives->CmdReplyReq.CmdResult; - tid = primitives->CmdReplyReq.ReqRefNum; - - /* Re-use the original mioc mbuf to send the response. */ - gbuf_rinc(mioc,sizeof(void *)); - gbuf_wset(mioc,0); - ddp = (at_ddp_t *)gbuf_wptr(mioc); - gbuf_winc(mioc,DDP_X_HDR_SIZE); - atp = (at_atp_t *)gbuf_wptr(mioc); - gbuf_winc(mioc,ATP_HDR_SIZE); - NET_ASSIGN(ddp->src_net, scb->loc_addr.net); - ddp->src_node = scb->loc_addr.node; - NET_ASSIGN(ddp->dst_net, scb->rem_addr.net); - ddp->dst_node = scb->rem_addr.node; - ddp->dst_socket = scb->reply_socket; - ddp->type = DDP_ATP; - UAS_ASSIGN(ddp->checksum, 0); - UAS_ASSIGN(atp->tid, htons(tid)); - if (scb->attn_flag && (tid == scb->attn_tid)) { - scb->attn_flag = 0; - atp->xo = 0; - atp->xo_relt = 0; - } else { - atp->xo = 1; - atp->xo_relt = 1; - } - /* setup the atpBDS struct - only the length field is used, - * except for the first one which contains the bds count in - * bdsDataSz. - */ - atpBDS = (struct atpBDS *)gbuf_wptr(mioc); - msize = mdata ? gbuf_msgsize(mdata) : 0; - for (nbds=0; (nbds < ATP_TRESP_MAX) && (msize > 0); nbds++) { - len = msize < ATP_DATA_SIZE ? msize : ATP_DATA_SIZE; - msize -= ATP_DATA_SIZE; - *(long *)atpBDS[nbds].bdsUserData = 0; - UAL_ASSIGN(atpBDS[nbds].bdsBuffAddr, 1); - UAS_ASSIGN(atpBDS[nbds].bdsBuffSz, len); - } - UAS_ASSIGN(atpBDS[0].bdsDataSz, nbds); - *(long *)atpBDS[0].bdsUserData = (long)result; - *(long *)atp->user_bytes = (long)result; - gbuf_winc(mioc,atpBDSsize); - dPrintf(D_M_ASP, D_L_INFO, - ("ASPputmsg: ATP CmdReplyReq, loc=%d, state=%s, msgsize = %d, result = %d, tid = %d\n", - scb->loc_addr.socket, aspStateStr(scb->state), - (mdata ? gbuf_msgsize(mdata) : 0), result, tid)); - atp_send_rsp(gref, mioc, TRUE); - return 0; - } - - /* Not an expected ASPFUNC */ - gbuf_freem(mioc); - err = EOPNOTSUPP; - -l_err: - *errp = err; - return -1; -} /* ASPputmsg */ - - -/* bms: make this callable from kernel. reply date is passed back as a mbuf chain in *mreply */ -int ASPgetmsg(gref_t *gref, strbuf_t *ctlptr, strbuf_t *datptr, gbuf_t **mreply, __unused int *flags, int *errp) -{ - int err, len, sum, rval; - gbuf_t *mproto, *mdata; - strbuf_t ctlbuf; - strbuf_t datbuf; - asp_scb_t *scb; - unsigned char get_wait; - - if ((scb = (asp_scb_t *)gref->info) == 0) { - dPrintf(D_M_ASP, D_L_ERROR, - ("ASPgetmsg: stale handle=0x%x, pid=%d\n", - (u_int) gref, gref->pid)); - - *errp = EINVAL; - return -1; - } - - if (scb->state == ASPSTATE_Close) - return 0; - - /* - * get receive data - */ - while ((mproto = scb->sess_ioc) == 0) { - scb->get_wait = 1; - lck_mtx_assert(atalk_mutex, LCK_MTX_ASSERT_OWNED); - err = msleep(&scb->event, atalk_mutex, PSOCK | PCATCH, "aspgetmsg", 0); - if (err != 0) { - scb->get_wait = 0; - *errp = err; - return -1; - } - if (scb->state == ASPSTATE_Close) { - scb->get_wait = 0; - return 0; - } - } - get_wait = scb->get_wait; - scb->get_wait = 0; - if ((ctlptr == 0) && (datptr == 0)) - return 0; - scb->sess_ioc = gbuf_next(mproto); - mdata = gbuf_cont(mproto); - - /* last remaining use of MSG_ERROR */ - if (gbuf_type(mproto) == MSG_ERROR) { - err = (int)gbuf_rptr(mproto)[0]; - goto l_err; - } - - /* - * copy in the control and data info - */ - if (mreply != NULL) { - /* called from kernel space */ - bcopy (ctlptr, &ctlbuf, sizeof(ctlbuf)); - bcopy (datptr, &datbuf, sizeof(datbuf)); - } else { - /* called from user space */ - if ((err = copyin(CAST_USER_ADDR_T(ctlptr), - (caddr_t)&ctlbuf, sizeof(ctlbuf))) != 0) - goto l_err; - if ((err = copyin(CAST_USER_ADDR_T(datptr), - (caddr_t)&datbuf, sizeof(datbuf))) != 0) - goto l_err; - } - if ((datbuf.maxlen < 0) || (datbuf.maxlen < gbuf_msgsize(mdata))) { - gbuf_next(mproto) = scb->sess_ioc; - scb->sess_ioc = mproto; - return MOREDATA; - } - - if (get_wait == 0) { - /* - * this is a hack to support the select() call. - * we're not supposed to dequeue messages in the Streams - * head's read queue this way; but there is no better way. - */ - if (scb->sess_ioc != 0) - atalk_notify_sel(gref); - - } - - /* - * copy out the control content and info - */ - ctlbuf.len = gbuf_len(mproto); - - if (mreply != NULL) { - /* called from kernel space */ - bcopy (gbuf_rptr(mproto), ctlbuf.buf, ctlbuf.len); - bcopy (&ctlbuf, ctlptr, sizeof(ctlbuf)); - } else { - /* called from user space */ - if ((err = copyout((caddr_t)gbuf_rptr(mproto), - CAST_USER_ADDR_T(ctlbuf.buf), ctlbuf.len)) != 0) - goto l_err; - if ((err = copyout((caddr_t)&ctlbuf, - CAST_USER_ADDR_T(ctlptr), sizeof(ctlbuf))) != 0) - goto l_err; - } - - /* - * copy out the data content and info - */ - for (rval = 0, sum = 0; mdata && (rval == 0); mdata = gbuf_cont(mdata)) - { - len = gbuf_len(mdata); - if (len) { - if ((len + sum) > datbuf.maxlen) { - len = datbuf.maxlen - sum; - rval = MOREDATA; - } - - if (mreply == NULL) { - /* called from user space */ - if ((err = copyout((caddr_t)gbuf_rptr(mdata), CAST_USER_ADDR_T(&datbuf.buf[sum]), len)) != 0) - goto l_err; - } - sum += len; - } - } - datbuf.len = sum; - if (mreply != NULL) { - /* called from kernel space */ - bcopy (&datbuf, datptr, sizeof(datbuf)); - } else { - /* called from user space */ - if ((err = copyout((caddr_t)&datbuf, CAST_USER_ADDR_T(datptr), sizeof(datbuf))) != 0) - goto l_err; - } - - if (mreply != NULL) { - /* called from kernel space */ - /* return the reply data in mbufs, so dont free them. - Just free the proto info */ - mdata = gbuf_cont(mproto); - *mreply = mdata; - gbuf_cont(mproto) = NULL; - gbuf_freem(mproto); - } else { - /* called from user space */ - gbuf_freem(mproto); - } - - if (scb->sess_ioc) - scb->rcv_cnt--; - else { - scb->rcv_cnt = 0; - scb->snd_stop = 0; - } - return rval; - -l_err: - gbuf_next(mproto) = scb->sess_ioc; - scb->sess_ioc = mproto; - *errp = err; - return -1; -} diff --git a/bsd/netat/at.c b/bsd/netat/at.c deleted file mode 100644 index ae6120798..000000000 --- a/bsd/netat/at.c +++ /dev/null @@ -1,781 +0,0 @@ -/* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1998 Apple Computer, Inc. - */ - -/* at.c - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -int lap_online( at_ifaddr_t *, at_if_cfg_t *cfgp); - -extern int routerStart(at_kern_err_t *); -extern void elap_offline(at_ifaddr_t *); -extern at_ifaddr_t *find_ifID(char *); - -extern int xpatcnt; -extern at_ifaddr_t at_interfaces[]; -extern at_ifaddr_t *ifID_home; -extern TAILQ_HEAD(name_registry, _nve_) name_registry; -extern int nve_lock; - -struct etalk_addr etalk_multicast_addr = { - {0x09, 0x00, 0x07, 0xff, 0xff, 0xff}}; -struct etalk_addr ttalk_multicast_addr = { - {0xC0, 0x00, 0x40, 0x00, 0x00, 0x00}}; - -/* called only in router mode */ -static int set_zones(zone_usage_t *ifz) - -/* 1. adds zone to table - 2. looks up each route entry from zone list - 3. sets zone bit in each route entry - - returns 0 if successful - errno if error occurred -*/ -{ - int i; - at_ifaddr_t *ifID; - short zno; - RT_entry *rte; - - if (ifz->zone_name.len <= 0 || ifz->zone_name.len > NBP_NVE_STR_SIZE) - return(ENOSPC); - - zno = zt_add_zone((char *)ifz->zone_name.str, ifz->zone_name.len); - - if (zno == ZT_MAXEDOUT) { - dPrintf(D_M_ELAP, D_L_ERROR, ("set_zones: error: table full\n")); - return(ENOSPC); - } - if (ifz->zone_home) { - ifID_home->ifZoneName = ifz->zone_name; - ifID_home->ifDefZone = zno; - } - - for (i=0; izone_iflist.at_if[i][0]) { - if ((ifID = find_ifID(ifz->zone_iflist.at_if[i]))) { - rte = rt_blookup(ifID->ifThisCableEnd); - if (!rte) { - dPrintf(D_M_ELAP, D_L_ERROR, - ("set_zones: error: can't find route\n")); - } else { - zt_set_zmap(zno, rte->ZoneBitMap); - - /* if first zone for this I/F, - make default */ - if (!ifID->ifDefZone) - ifID->ifDefZone = zno; - } - } - } - } - - return(0); -} /* set_zones */ - -static int -at_domifattach(struct ifnet *ifp, at_ifaddr_t *ifID) -{ - int error; - - if ((error = proto_plumb(PF_APPLETALK, ifp))) { - if (error != EEXIST) - log(LOG_ERR, "%s: proto_plumb returned %d if=%s%d\n", - __func__, error, ifp->if_name, ifp->if_unit); - } else if (ifID) - ifID->at_was_attached = 1; - - return (error); -} - -/* - * Generic internet control operations (ioctl's). - * ifp is 0 if not an interface-specific ioctl. - */ - -int -at_control(so, cmd, data, ifp) - struct socket *so; - u_long cmd; - caddr_t data; - struct ifnet *ifp; -{ - struct ifreq *ifr = (struct ifreq *)data; - int pat_id = 0, error = 0; - at_ifaddr_t *ifID = 0; - struct ifaddr *ifa; - struct sockaddr_dl *sdl; - - if ((cmd & 0xffff) == 0xff99) { - u_long fixed_command; - /* *** this is a temporary hack to get at_send_to_dev() to - work with BSD-style sockets instead of the special purpose - system calls, ATsocket() and ATioctl(). - *** */ - fixed_command = _IOW(0, 0xff99, user_addr_t); - if ((error = at_ioctl((struct atpcb *)so->so_pcb, fixed_command, data, 0))) { - if (((struct atpcb *)so->so_pcb)->proto != ATPROTO_LAP) { - ((struct atpcb *)so->so_pcb)->proto = ATPROTO_LAP; - error = at_ioctl((struct atpcb *)so->so_pcb, fixed_command, data , 0); - } - } - return(error); - - /* *** processing should be - return(EINVAL); - *** */ - } - /* - * Find address for this interface, if it exists. - */ - if (ifp) - for (pat_id = 0; pat_id < xpatcnt; pat_id++) - if (at_interfaces[pat_id].aa_ifp == ifp) { - ifID = &at_interfaces[pat_id]; - break; - } - - switch (cmd) { - - case AIOCGETSTATE: - { - at_state_t *global_state = (at_state_t *)data; - - *global_state = at_state; - return(0); - break; - } - - case AIOCGETIFCFG: - { - at_if_cfg_t *cfgp = (at_if_cfg_t *)data; - - ifID = 0; - if ((at_state.flags & AT_ST_STARTED) && - ifID_home) { - if (strlen(cfgp->ifr_name)) { - TAILQ_FOREACH(ifID, &at_ifQueueHd, aa_link) { - if (!strncmp(ifID->ifName, cfgp->ifr_name, - strlen(ifID->ifName))) - break; - } - } else { - ifID = ifID_home; - strlcpy(cfgp->ifr_name, ifID->ifName, - sizeof(cfgp->ifr_name)); - } - if (ifID && ifID->ifState != LAP_OFFLINE) { - cfgp->flags = ifID->ifFlags; - /* put the IF state into the low order - bits of flags */ - cfgp->flags |= (ifID->ifState & LAP_STATE_MASK); - cfgp->node = ifID->ifThisNode; - cfgp->router = ifID->ifARouter; - cfgp->netStart = ifID->ifThisCableStart; - cfgp->netEnd = ifID->ifThisCableEnd; - cfgp->zonename = ifID->ifZoneName; - return(0); - } else - return(EINVAL); - } else - return(ENOTREADY); - break; - } - - case AIOCSETDEFZONE: - { - at_def_zone_t *defzonep = (at_def_zone_t *)data; - - /* check for root access */ - if ((error = suser(kauth_cred_get(), 0))) - return(EACCES); - - ifID = 0; - if ((at_state.flags & AT_ST_STARTED) && ifID_home) { - if (strlen(defzonep->ifr_name)) { - TAILQ_FOREACH(ifID, &at_ifQueueHd, aa_link) { - if (!strncmp(ifID->ifName, defzonep->ifr_name, - strlen(ifID->ifName))) - break; - } - } else { - ifID = ifID_home; - strlcpy(defzonep->ifr_name, ifID->ifName, - sizeof(defzonep->ifr_name)); - } - - /* In routing mode the default zone is only set for the - default interface. */ - if (ROUTING_MODE && (ifID != ifID_home)) - return(EINVAL); - - if (ifID && ifID->ifState != LAP_OFFLINE) { - if (zonename_equal(&ifID->ifZoneName, - &defzonep->zonename)) - return(0); - else { - /* check the zone name */ - if (MULTIPORT_MODE) { - short zno; - at_ifnames_t ifs_in_zone; - - if (!(zno = zt_find_zname(&defzonep->zonename))) - return(EINVAL); - - getIfUsage(zno-1, &ifs_in_zone); - if (!ifs_in_zone.at_if[ifID->ifPort]) - return(EINVAL); - ifID->ifDefZone = zno+1; - } else { - int i; - at_nvestr_t *zone; - - for (i = 0, zone = getSPLocalZone(i); - zone; - i++, zone = getSPLocalZone(i)) { - if (zonename_equal(zone, - &defzonep->zonename)) - break; - } - if (!zone) - return(EINVAL); - } - ifID->ifZoneName = defzonep->zonename; - (void)regDefaultZone(ifID); - - /* AppleTalk zone was changed. Send event with zone info. */ - atalk_post_msg(ifID->aa_ifp, KEV_ATALK_ZONEUPDATED, 0, &(ifID->ifZoneName)); - - return(0); - } - } else - return(EINVAL); - } else - return(ENOTREADY); - break; - } - - case AIOCREGLOCALZN: - { - at_nvestr_t *zone = (at_nvestr_t *)data; - - if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) - return(ENOTREADY); - - if (MULTIPORT_MODE) - return(EINVAL); - - return(setLocalZones(zone, zone->len)); - - break; - } - case AIOCSETZNUSAGE: - if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) - return(ENOTREADY); - - if (!ROUTING_MODE) - return(EINVAL); - - return(set_zones((zone_usage_t *)data)); - - break; - - case AIOCGETZNUSAGE: - if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) - return(ENOTREADY); - - if (!MULTIPORT_MODE) - return(EINVAL); - - if (getRTRLocalZone((zone_usage_t *)data)) - return(0); - else - return(ENOENT); - break; - - case AIOCNBPREG: - { - at_nbp_reg_t *nbpP = (at_nbp_reg_t *)data; - nve_entry_t nve; - int error2; - - if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) - return(ENOTREADY); - - /* multihoming mode */ - if (MULTIHOME_MODE) { - return(nbp_mh_reg(nbpP)); - } - - /* single port mode or router mode */ - if (nbp_fillin_nve(&nbpP->name, &nve) != 0) { - /* bad tuple... */ - return(EINVAL); - } - - /* In routing mode when the zone is specified, we need to - find an interface on which the specified zone is seeded, so - that the zone multicast will be plausible. */ - if (ROUTING_MODE && !(DEFAULT_ZONE(&nve.zone))) { - /* find first segment (interface) which is seeded for - this zone */ - int finished = FALSE; - int zno; - at_ifnames_t ifs_in_zone; - if (!(zno = zt_find_zname(&nve.zone))) { - return(EINVAL); - } - getIfUsage(zno-1, &ifs_in_zone); - - TAILQ_FOREACH(ifID, &at_ifQueueHd, aa_link) { - if (!ifs_in_zone.at_if[ifID->ifPort]) - /* zone doesn't match */ - continue; - else { - finished = TRUE; - break; - } - } - if (!finished) - return(EINVAL); - } else - ifID = ifID_home; - - nve.address.net = ifID->ifThisNode.s_net; - nve.address.node = ifID->ifThisNode.s_node; - nve.address.socket = nbpP->addr.socket; - nve.ddptype = nbpP->ddptype; - - if (nbp_find_nve(&nve)) - return(EADDRNOTAVAIL); - - /* Normal case; no tuple found for this name, so insert - * this tuple in the registry and return ok response. - */ - if ((error2 = nbp_new_nve_entry(&nve, ifID)) == 0) { - nbpP->addr.net = ifID->ifThisNode.s_net; - nbpP->addr.node = ifID->ifThisNode.s_node; - nbpP->unique_nbp_id = nve.unique_nbp_id; - } - - return(error2); - break; - } - - case AIOCNBPREMOVE: - { - at_nbp_reg_t *nbpP = (at_nbp_reg_t *)data; - nve_entry_t *nve_entry, nve; - - if (!(at_state.flags & AT_ST_STARTED)) - return(ENOTREADY); - - /* delete by id */ - if (nbpP->unique_nbp_id) { - TAILQ_FOREACH(nve_entry, &name_registry, nve_link) { - if (nve_entry->unique_nbp_id == nbpP->unique_nbp_id) { - /* Found a match! */ - nbp_delete_entry(nve_entry); - return(0); - } - } - return(EADDRNOTAVAIL); - } - - /* delete by entity */ - if (nbp_fillin_nve(&nbpP->name, &nve) != 0) { - /* bad tuple... */ - return(EINVAL); - } - - if (MULTIHOME_MODE && DEFAULT_ZONE(&nbpP->name.zone)) { - /* if mhome & *, remove nve from all default zones */ - int found = FALSE; /* if any found & deleted */ - - TAILQ_FOREACH(ifID, &at_ifQueueHd, aa_link) { - nve.zone = ifID->ifZoneName; - nve.zone_hash = nbp_strhash(&nve.zone); - if ((nve_entry = nbp_find_nve(&nve)) == NULL) - continue; - - nbp_delete_entry(nve_entry); - found = TRUE; - } - if (found) - return(0); - else - return(EADDRNOTAVAIL); - } - - if ((nve_entry = nbp_find_nve(&nve)) == NULL) - /* Can't find the tuple we're looking for, send error*/ - return(EADDRNOTAVAIL); - - /* Normal case; tuple found for this name, so delete - * the entry from the registry and return ok response. - */ - nbp_delete_entry(nve_entry); - return(0); - - break; - } - - case AIOCSETROUTER: - { - at_router_params_t *rt = (at_router_params_t *)data; - - /* check for root access */ - if ((error = suser(kauth_cred_get(), 0))) - return(EACCES); - - /* when in routing/multihome mode the AIOCSETROUTER IOCTL - is done first */ - if (at_state.flags & AT_ST_STARTED) - return(EALREADY); - - /* Setup the routing & zip table size for the router */ - if (rt->rtmp_table_sz >= RT_MIN && rt->rtmp_table_sz <= RT_MAX) - RT_maxentry = rt->rtmp_table_sz; - else - RT_maxentry = RT_DEFAULT; - - if (rt->zone_table_sz >= ZT_MIN && rt->zone_table_sz <= ZT_MAX) - ZT_maxentry = rt->zone_table_sz; - else - ZT_maxentry = ZT_DEFAULT; - - if (rt_table_init() == ENOBUFS) - return(ENOBUFS); - - if (rt->router_mix) - RouterMix = (int)rt->router_mix; - else - RouterMix = RT_MIX_DEFAULT; - - add_ddp_handler(RTMP_SOCKET, rtmp_router_input); - - if (rt->multihome) - at_state.flags |= AT_ST_MULTIHOME; - else - at_state.flags |= AT_ST_ROUTER; - break; - } - case AIOCSTARTROUTER: - { - at_kern_err_t *keP = (at_kern_err_t *)data; - - /* check for root access */ - if (suser(kauth_cred_get(), 0)) - return(EACCES); - - if (!(at_state.flags & AT_ST_STARTED)) - return(ENOTREADY); - - bzero(keP, sizeof(at_kern_err_t)); - error = routerStart(keP); - - break; - } - case AIOCGETROUTER: - { - at_router_params_t *rt = (at_router_params_t *)data; - - if (!(at_state.flags & AT_ST_STARTED)) - return(ENOTREADY); - - rt->multihome = (MULTIHOME_MODE)? 1: 0; - rt->rtmp_table_sz = RT_maxentry; - rt->zone_table_sz = ZT_maxentry; - rt->router_mix = RouterMix; - - break; - } - case AIOCSTOPATALK: - { - int *count_only = (int *)data, - ret; - - /* check for root access */ - if ((error = suser(kauth_cred_get(), 0))) - return(EACCES); - - ret = ddp_shutdown(*count_only); - - if (*count_only != 0) - { - *count_only = ret; - return(0); - } - else - { - if (ret == 0) - { - /* AppleTalk was successfully shut down. Send event. */ - atalk_post_msg(0, KEV_ATALK_DISABLED, 0, 0); - return 0; - } - else - return EBUSY; - } - - break; - } - - case SIOCSIFADDR: - /* check for root access */ - if ((error = suser(kauth_cred_get(), 0))) - error = EACCES; - else if (ifID) - error = EEXIST; - else { - if (xpatcnt == 0) { - at_state.flags |= AT_ST_STARTING; - ddp_brt_init(); - } - - /* *** find an empty entry *** */ - ifID = &at_interfaces[xpatcnt]; - bzero((caddr_t)ifID, sizeof(at_ifaddr_t)); - strlcpy(ifID->ifName, ifr->ifr_name, sizeof(ifID->ifName)); - - ifID->aa_ifp = ifp; - ifa = &ifID->aa_ifa; - error = at_domifattach(ifp, ifID); - if (error == EEXIST) { - ifID->at_was_attached = 1; - error = 0; - } - if (error != 0) { - break; - } - /* XXX ethernet-specific */ - ifID->cable_multicast_addr = etalk_multicast_addr; - xpatcnt++; - ifnet_lock_exclusive(ifp); - /* - * Holding ifnet lock here prevents the link address - * from changing contents, so no need to hold the ifa - * lock. The link address is always present; it's - * never freed. - */ - sdl = (struct sockaddr_dl *)ifp->if_lladdr->ifa_addr; - bcopy(LLADDR(sdl), ifID->xaddr, sizeof(ifID->xaddr)); -#ifdef APPLETALK_DEBUG - kprintf("SIOCSIFADDR: local enet address is " - "%x.%x.%x.%x.%x.%x\n", - ifID->xaddr[0], ifID->xaddr[1], - ifID->xaddr[2], ifID->xaddr[3], - ifID->xaddr[4], ifID->xaddr[5]); -#endif - - /* attach the AppleTalk address to the ifnet structure */ - ifa = &ifID->aa_ifa; - ifa_lock_init(ifa); - VERIFY(!(ifa->ifa_debug & IFD_ALLOC)); - ifa->ifa_addr = (struct sockaddr *)&ifID->ifNodeAddress; - ifID->ifNodeAddress.sat_len = sizeof(struct sockaddr_at); - ifID->ifNodeAddress.sat_family = AF_APPLETALK; - /* the address itself will be filled in when ifThisNode - is set */ - IFA_LOCK(ifa); - if_attach_ifa(ifp, ifa); - /* add a reference for at_interfaces[] */ - IFA_ADDREF_LOCKED(ifa); - IFA_UNLOCK(ifa); - ifnet_lock_done(ifp); - } - break; - - /* complete the initialization started in SIOCSIFADDR */ - case AIOCSIFADDR: - { - at_if_cfg_t *cfgp = (at_if_cfg_t *)data; - - if (!(at_state.flags & AT_ST_STARTING)) - return(ENOTREADY); - - if (!(ifID = find_ifID(cfgp->ifr_name))) - return(EINVAL); - - return(lap_online(ifID, cfgp)); - break; - } - -#ifdef NOT_YET - /* *** this can't be added until AT can handle dynamic addition and - deletion of interfaces *** */ - case SIOCDIFADDR: - /* check for root access */ - if (error = suser(kauth_cred_get(), 0)) - error = EACCES; - else if (!ifID) - error = EINVAL; - else - elap_offline(ifID); - break; -#endif - - case SIOCSETOT: { - struct atpcb *at_pcb, *clonedat_pcb; - int cloned_fd = *(int *)data; - - at_pcb = sotoatpcb(so); - - /* let's make sure it's either -1 or a valid file descriptor */ - if (cloned_fd != -1) { - struct socket *cloned_so; - error = file_socket(cloned_fd, &cloned_so); - if (error) - break; - clonedat_pcb = sotoatpcb(cloned_so); - } else { - clonedat_pcb = NULL; - } - - if (clonedat_pcb == NULL) { - at_pcb->ddp_flags |= DDPFLG_STRIPHDR; - } else { - at_pcb->ddp_flags = clonedat_pcb->ddp_flags; - } - file_drop(cloned_fd); - break; - } - - case SIOCPROTOATTACH: - /* check for root access */ - if (suser(kauth_cred_get(), 0) != 0) { - error = EACCES; - break; - } - error = at_domifattach(ifp, ifID); - break; - - case SIOCPROTODETACH: - /* check for root access */ - if (suser(kauth_cred_get(), 0) != 0) { - error = EACCES; - break; - } - if (ifID != NULL) { - error = EBUSY; - break; - } - error = proto_unplumb(PF_APPLETALK, ifp); - break; - - default: - if (ifp == 0 || ifp->if_ioctl == 0) - return (EOPNOTSUPP); - return ifnet_ioctl(ifp, 0, cmd, data); - } - - return(error); -} - -/* From dlil_post_msg() */ -void atalk_post_msg(struct ifnet *ifp, u_long event_code, struct at_addr *address, at_nvestr_t *zone) -{ - struct kev_atalk_data at_event_data; - struct kev_msg ev_msg; - - bzero(&ev_msg, sizeof(struct kev_msg)); - ev_msg.vendor_code = KEV_VENDOR_APPLE; - ev_msg.kev_class = KEV_NETWORK_CLASS; - ev_msg.kev_subclass = KEV_ATALK_SUBCLASS; - ev_msg.event_code = event_code; - - bzero(&at_event_data, sizeof(struct kev_atalk_data)); - - if (ifp != 0) { - strlcpy(&at_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ); - at_event_data.link_data.if_family = ifp->if_family; - at_event_data.link_data.if_unit = (unsigned long) ifp->if_unit; - } - - if (address != 0) { - at_event_data.node_data.address = *address; - } - else if (zone != 0) { - at_event_data.node_data.zone = *zone; - } - - ev_msg.dv[0].data_length = sizeof(struct kev_atalk_data); - ev_msg.dv[0].data_ptr = &at_event_data; - ev_msg.dv[1].data_length = 0; - - kev_post_msg(&ev_msg); -} - - -/* - * This is untested; the code is here only for completeness. - */ -void -at_purgeaddrs(struct ifnet *ifp) -{ - at_ifaddr_t *ifID = NULL; - int pat_id; - - /* Find address for this interface, if it exists */ - for (pat_id = 0; pat_id < xpatcnt; pat_id++) { - if (at_interfaces[pat_id].aa_ifp == ifp) { - ifID = &at_interfaces[pat_id]; - elap_offline(ifID); - } - } -} diff --git a/bsd/netat/at_aarp.h b/bsd/netat/at_aarp.h deleted file mode 100644 index e98b87ffc..000000000 --- a/bsd/netat/at_aarp.h +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#ifndef _NETAT_AT_AARP_H_ -#define _NETAT_AT_AARP_H_ -#include -#ifdef KERNEL_PRIVATE -#include -#endif /* KERNEL_PRIVATE */ - -#ifdef __APPLE_API_OBSOLETE - -/* - * Copyright (c) 1988, 1989 Apple Computer, Inc. - */ - -/* "@(#)at_aarp.h: 2.0, 1.6; 10/4/93; Copyright 1988-89, Apple Computer, Inc." */ - -/* This is a header file for AARP. - * - * Author: R. C. Venkatraman - * Date : 3/2/88 - * - */ - -/* AARP packet */ - -typedef struct { - u_short hardware_type; - u_short stack_type; /* indicates appletalk or xns*/ - u_char hw_addr_len; /* len of hardware addr, e.g - * ethernet addr len, in bytes - */ - u_char stack_addr_len; /* protocol stack addr len, - * e.g., appletalk addr len - * in bytes - */ - u_short aarp_cmd; - struct etalk_addr src_addr; - struct atalk_addr src_at_addr; - struct etalk_addr dest_addr; - struct atalk_addr dest_at_addr; /* desired or dest. at addr */ -} aarp_pkt_t; - - -/* Constants currently defined in AARP */ - -#define AARP_AT_TYPE 0x80F3 /* indicates aarp packet */ -#define AARP_ETHER_HW_TYPE 0x1 -#define AARP_AT_PROTO 0x809B /* indicates stack type */ -#define AARP_ETHER_ADDR_LEN 6 /* in bytes */ -#define AARP_AT_ADDR_LEN 4 /* in bytes */ - -/* AARP cmd definitions */ - -#define AARP_REQ_CMD 0x1 /* address lookup request */ -#define AARP_RESP_CMD 0x2 /* address match response */ -#define AARP_PROBE_CMD 0x3 /* new kid probing... */ - -/* AARP timer and retry counts */ - -#define AARP_MAX_PROBE_RETRIES 20 -#define AARP_PROBE_TIMER_INT HZ/30 /* HZ defines in param.h */ -#define AARP_MAX_REQ_RETRIES 10 -#define AARP_REQ_TIMER_INT HZ/30 -#define AARP_MAX_NODES_TRIED 200 /* max no. of addresses tried */ - /* on the same net before */ - /* giving up on the net# */ -#define AARP_MAX_NETS_TRIED 10 /* max no. of net nos tried */ - /* before giving up on startup*/ - -/* Probe states */ - -#define PROBE_IDLE 0x1 /* There is no node addr */ -#define PROBE_TENTATIVE 0x2 /* probing */ -#define PROBE_DONE 0x3 /* an appletalk addr has been */ - /* assigned for the given node*/ -/* Errors returned by AARP routines */ -#define AARP_ERR_NOT_OURS 1 /* not our appletalk address */ - -#ifdef KERNEL_PRIVATE - -/*************************************************/ -/* Declarations for AARP Address Map Table (AMT) */ -/*************************************************/ - -typedef struct { - struct atalk_addr dest_at_addr; /* net# in network byte order */ - struct etalk_addr dest_addr; - char dummy[2]; /* pad out to struct size of 32 */ - time_t last_time; /* the last time that this addr - * was used. Read in lbolt - * whenever the addr is used. - */ - int no_of_retries; /* number of times we've xmitted */ - gbuf_t *m; /* ptr to msg blk to be sent out */ - at_ifaddr_t *elapp; - int error; - int tmo; -} aarp_amt_t; - -#define AMT_BSIZ 4 /* bucket size */ -#define AMT_NB 64 /* number of buckets */ -#define AMTSIZE (AMT_BSIZ * AMT_NB) - -typedef struct { - aarp_amt_t et_aarp_amt[AMTSIZE]; -} aarp_amt_array; - -#define AMT_HASH(a) \ - ((NET_VALUE(((struct atalk_addr *)&a)->atalk_net) + ((struct atalk_addr *)&a)->atalk_node) % AMT_NB) - -/* at_addr - net # in network byte order */ -#define AMT_LOOK(at, at_addr, elapp) { \ - int n; \ - at = &aarp_table[elapp->ifPort]->et_aarp_amt[AMT_HASH(at_addr) * AMT_BSIZ]; \ - for (n = 0 ; ; at++) { \ - if (at->dest_at_addr.atalk_node == (at_addr).atalk_node && \ - NET_EQUAL(at->dest_at_addr.atalk_net, (at_addr).atalk_net)) \ - break; \ - if (++n >= AMT_BSIZ) { \ - at = NULL; \ - break; \ - } \ - } \ -} - -/* at_addr - net # in network byte order */ -#define NEW_AMT(at, at_addr, elapp) { \ - int n; \ - aarp_amt_t *myat; \ - myat = at = &aarp_table[elapp->ifPort]->et_aarp_amt[AMT_HASH(at_addr) * AMT_BSIZ]; \ - for (n = 0 ; ; at++) { \ - if (at->last_time == 0) \ - break; \ - if (++n >= AMT_BSIZ) { \ - at = aarp_lru_entry(myat); \ - break; \ - } \ - } \ -} - -#define AARP_NET_MCAST(p, elapp) \ - (NET_VALUE((p)->dst_net) == elapp->ifThisNode.s_net) \ - ) /* network-wide broadcast */ - -#define AARP_CABLE_MCAST(p) \ - (NET_VALUE((p)->dst_net) == 0x0000 \ - ) - -#define AARP_BROADCAST(p, elapp) \ - (((p)->dst_node == 0xff) && \ - ( \ - (NET_VALUE((p)->dst_net) == 0x0000) || \ - (NET_VALUE((p)->dst_net) == elapp->ifThisNode.s_net)) \ - ) /* is this some kind of a broadcast address (?) */ - - -#define ETHER_ADDR_EQUAL(addr1p, addr2p) \ - (( \ - ((addr1p)->etalk_addr_octet[0]==(addr2p)->etalk_addr_octet[0]) && \ - ((addr1p)->etalk_addr_octet[1]==(addr2p)->etalk_addr_octet[1]) && \ - ((addr1p)->etalk_addr_octet[2]==(addr2p)->etalk_addr_octet[2]) && \ - ((addr1p)->etalk_addr_octet[3]==(addr2p)->etalk_addr_octet[3]) && \ - ((addr1p)->etalk_addr_octet[4]==(addr2p)->etalk_addr_octet[4]) && \ - ((addr1p)->etalk_addr_octet[5]==(addr2p)->etalk_addr_octet[5]) \ - ) ? 1 : 0 \ - ) - -int aarp_chk_addr(at_ddp_t *, at_ifaddr_t *); -int aarp_rcv_pkt(aarp_pkt_t *, at_ifaddr_t *); -void AARPwakeup(aarp_amt_t *); -int aarp_send_data(gbuf_t *, at_ifaddr_t *, struct atalk_addr *, int); - -#endif /* KERNEL_PRIVATE */ - -#endif /* __APPLE_API_OBSOLETE */ -#endif /* _NETAT_AT_AARP_H_ */ diff --git a/bsd/netat/at_config.h b/bsd/netat/at_config.h deleted file mode 100644 index 5601d3538..000000000 --- a/bsd/netat/at_config.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1988-1993 Apple Computer, Inc. - */ - -#ifndef _NETAT_AT_CONFIG_H_ -#define _NETAT_AT_CONFIG_H_ -#include - -#ifdef __APPLE_API_OBSOLETE - -/* originally from if_cnt.h - * - * defines for if_stat struct. - * note: set IF_TYPE_CNT to number of types supported and make sure - * that defines for those type are LESS than this value - */ - -#define IF_TYPENO_CNT 1 /* number of different types we support */ - /* *** this value was 5, but for now, let's - just start with ethernet *** */ - -/* maximum number of I/F's allowed */ -/* *** "17" corresponds to Shiner *** */ -#define IF_TOTAL_MAX 17 /* max count of any combination of I/F's */ - -typedef struct if_types { - int iftype, - max_interfaces; -} if_types_t; - - /* GET_ZONES defines */ -#define GET_ALL_ZONES 0 -#define GET_LOCAL_ZONES_ONLY 1 - -typedef struct if_zone_info { - at_nvestr_t zone_name; /* the zone name & len */ - unsigned zone_ifs[IF_TYPENO_CNT]; /* bitmapped I/F usage for zone */ - unsigned zone_home; /* TRUE for home zone */ -} if_zone_info_t; - -typedef union if_zone_nve { - at_nvestr_t ifnve; - int zone; -} if_zone_nve_t; - -/* this struct used to obtain local zones for specific - ifID's from the kernel and to set default zones for - specific ifID numbers */ -typedef struct if_zone { - if_zone_nve_t ifzn; - char usage[IF_TOTAL_MAX]; /* I/F usage (1 set if - I/F in this zone */ - int index; /* zone index in ZT_table */ -} if_zone_t; - - -#endif /* __APPLE_API_OBSOLETE */ -#endif /* _NETAT_AT_CONFIG_H_ */ diff --git a/bsd/netat/at_ddp_brt.h b/bsd/netat/at_ddp_brt.h deleted file mode 100644 index 781311a8c..000000000 --- a/bsd/netat/at_ddp_brt.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/* - * Copyright (c) 1988, 1989 Apple Computer, Inc. - */ - -#ifndef _NETAT_AT_DDP_BRT_H_ -#define _NETAT_AT_DDP_BRT_H_ -#include -#ifdef KERNEL_PRIVATE -#ifdef __APPLE_API_OBSOLETE - -typedef struct { - int age_flag; - at_ifaddr_t *ifID; - struct etalk_addr et_addr; - at_net_al net; -} ddp_brt_t; - -#define BRT_SWEEP_INT (10 * PR_SLOWHZ) -#define BRT_BSIZ 4 /* bucket size */ -#define BRT_NB 16 /* number of buckets */ -#define BRTSIZE (BRT_BSIZ * BRT_NB) - -/* age_flag values */ -#define BRT_EMPTY 0 /* the BRT entry is empty */ - /* (or aged out). */ -#define BRT_VALID 1 /* BRT entry contains valid */ - /* tuple */ -#define BRT_GETTING_OLD 2 /* BRT entry is a candidate */ - /* for aging */ - -#define BRT_HASH(a) ((a) % BRT_NB) - -#define BRT_LOOK(brt, dst_net) { \ - int n; \ - brt = &at_ddp_brt[BRT_HASH(dst_net) * BRT_BSIZ]; \ - for (n = 0 ; ; brt++) { \ - if (brt->net == dst_net) \ - break; \ - if (++n >= BRT_BSIZ) { \ - brt = NULL; \ - break; \ - } \ - } \ - } - -#define NEW_BRT(brt, net) { \ - int n; \ - brt = &at_ddp_brt[BRT_HASH(net) * BRT_BSIZ]; \ - for (n = 0 ; ; brt++) { \ - if (brt->age_flag == BRT_EMPTY) \ - break; \ - if (++n >= BRT_BSIZ) { \ - brt = NULL; \ - break; \ - } \ - } \ - } - -/* Best Router Cache */ -extern ddp_brt_t at_ddp_brt[BRTSIZE]; - -#endif /* __APPLE_API_OBSOLETE */ -#endif /* KERNEL_PRIVATE */ -#endif /* _NETAT_AT_DDP_BRT_H_ */ - diff --git a/bsd/netat/at_pat.h b/bsd/netat/at_pat.h deleted file mode 100644 index a34bf693e..000000000 --- a/bsd/netat/at_pat.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/* - * Copyright (c) 1988, 1989 Apple Computer, Inc. - */ - -#ifndef _NETAT_AT_PAT_H_ -#define _NETAT_AT_PAT_H_ -#include - -#ifdef __APPLE_API_OBSOLETE - -/* This is header for the PAT module. This contains a table of pointers that - * should get initialized with the BNET stuff and the ethernet driver. The - * number of interfaces supported should be communicated. Should include - * mbuf.h, if.h, socket.h - * - * Author: R. C. Venkatraman - * Date : 2/29/88 - */ - -typedef struct { - unsigned char dst[6]; - unsigned char src[6]; - unsigned short len; -} enet_header_t; - -typedef struct { - unsigned char dst_sap; - unsigned char src_sap; - unsigned char control; - unsigned char protocol[5]; -} llc_header_t; - -#define ENET_LLC_SIZE (sizeof(enet_header_t)+sizeof(llc_header_t)) -#define SNAP_UI 0x03 /* bits 11000000 reversed!! */ -#define SNAP_AT_SAP 0xaa -#define SNAP_PROTO_AT {0x08, 0x00, 0x07, 0x80, 0x9B} -#define SNAP_PROTO_AARP {0x00, 0x00, 0x00, 0x80, 0xF3} -#define SNAP_HDR_AT {SNAP_AT_SAP, SNAP_AT_SAP, SNAP_UI, SNAP_PROTO_AT} -#define SNAP_HDR_AARP {SNAP_AT_SAP, SNAP_AT_SAP, SNAP_UI, SNAP_PROTO_AARP} - -#define LLC_PROTO_EQUAL(a1, a2) \ - ((*((unsigned long *)(a1)) == *((unsigned long *)(a2))) && \ - (a1[4] == a2[4]) \ - ) -#endif /* __APPLE_API_OBSOLETE */ - -#endif /* _NETAT_AT_PAT_H_ */ diff --git a/bsd/netat/at_pcb.c b/bsd/netat/at_pcb.c deleted file mode 100644 index a1e542bdf..000000000 --- a/bsd/netat/at_pcb.c +++ /dev/null @@ -1,253 +0,0 @@ -/* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/* - * Copyright (c) 1997-1999 Apple Computer, Inc. - * All Rights Reserved. - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1982, 1986, 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)at_pcb.c 8.2 (Berkeley) 1/4/94 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -extern struct atpcb ddp_head; -extern struct atpcb *atp_inputQ[]; -extern CCB *adsp_inputQ[]; -extern at_ifaddr_t *ifID_home; -extern struct { - ddp_handler_func func; - } ddp_handler[]; - -int DDP_chksum_on = FALSE; -int DDP_slfsnd_on = FALSE; - -zone_t atpcb_zone; - -void at_memzone_init(void) -{ - vm_size_t str_size; - - str_size = (vm_size_t)sizeof(struct atpcb); - atpcb_zone = (zone_t)zinit(str_size, 1000*str_size, 8192, "atpcb zone"); -} - -int at_pcballoc(so, head) - struct socket *so; - struct atpcb *head; -{ - register struct atpcb *pcb; - - pcb = (struct atpcb *)zalloc(atpcb_zone); - if (pcb == NULL) - return (ENOBUFS); - bzero((caddr_t)pcb, sizeof(*pcb)); - - /* set the flags to the system defaults */ - if (DDP_chksum_on) - pcb->ddp_flags |= DDPFLG_CHKSUM; - else - pcb->ddp_flags &= ~DDPFLG_CHKSUM; - if (DDP_slfsnd_on) - pcb->ddp_flags |= DDPFLG_SLFSND; - else - pcb->ddp_flags &= ~DDPFLG_SLFSND; - - pcb->atpcb_head = head; - pcb->atpcb_socket = so; - atalk_lock(); /* makes sure the list is locked while inserting atpcb */ - if (head) - insque((queue_t)pcb, (queue_t)head); - so->so_pcb = (caddr_t)pcb; - atalk_unlock(); - - return (0); -} - -int at_pcbdetach(pcb) - struct atpcb *pcb; -{ - struct socket *so = pcb->atpcb_socket; - - /* Notify NBP that we are closing this DDP socket */ - if (pcb->lport) { - ddp_notify_nbp(pcb->lport, pcb->pid, pcb->ddptype); - pcb->lport = 0; - } - - so->so_pcb = 0; - so->so_flags |= SOF_PCBCLEARING; - if ((pcb->atpcb_next) && (pcb->atpcb_prev)) - remque((queue_t)pcb); - zfree(atpcb_zone, pcb); - sofree(so); - return(0); -} - -int ddp_socket_inuse(ddpsock, proto) - u_char ddpsock, proto; -{ - struct atpcb *pcb; - - if ((!proto || (proto == DDP_ATP)) && atp_inputQ[ddpsock]) - return TRUE; - if ((!proto || (proto == DDP_ADSP)) && adsp_inputQ[ddpsock]) - return TRUE; - if (ddp_handler[ddpsock].func) - return TRUE; - for (pcb = ddp_head.atpcb_next; pcb != &ddp_head; - pcb = pcb->atpcb_next) { - if (pcb->lport == ddpsock && - (!pcb->ddptype || !proto || (pcb->ddptype == proto))) - return TRUE; - } - return FALSE; -} - -int at_pcbbind(pcb, nam) - register struct atpcb *pcb; - struct sockaddr *nam; -{ - register struct sockaddr_at *local = (struct sockaddr_at *) nam; - u_char ddpsock = local->sat_port; - - if ((!ifID_home) || (local->sat_family != AF_APPLETALK)) - return(EADDRNOTAVAIL); - - if (pcb->lport != ATADDR_ANYPORT || - pcb->laddr.s_node != ATADDR_ANYNODE || - pcb->laddr.s_net != ATADDR_ANYNET) - return(EINVAL); - - /* Request for dynamic socket? */ - if (ddpsock == 0) { - /* Search table for free one */ - /* *** borrow IP algorithm, instead? *** */ - for (ddpsock = DDP_SOCKET_LAST; - ddpsock >= (DDP_SOCKET_1st_DYNAMIC + 1); - /* sip has 1st */ - ddpsock--) { - if (! ddp_socket_inuse(ddpsock, pcb->ddptype)) - break; - } - if (ddpsock < (DDP_SOCKET_1st_DYNAMIC + 1)) - return(EADDRNOTAVAIL); /* Error if no free sockets */ - } else { - /* Asking to open a socket by its number. - Check if its legal & free. */ - if (ddpsock > DDP_SOCKET_LAST) - return(EINVAL); - if (ddp_socket_inuse(ddpsock, pcb->ddptype)) - return(EADDRNOTAVAIL); - } - - pcb->lport = ddpsock; - /* if address is specified, make sure address matches one of the - interfaces configured for AppleTalk */ - if (local->sat_addr.s_net || local->sat_addr.s_node) { - if (MULTIHOME_MODE) { - at_ifaddr_t *ifID; - TAILQ_FOREACH(ifID, &at_ifQueueHd, aa_link) { - if (ifID->ifThisNode.s_net == local->sat_addr.s_net && - ifID->ifThisNode.s_node == local->sat_addr.s_node) { - pcb->laddr = local->sat_addr; - return(0); - } - } - return(EINVAL); - } else { - /* for single-port and router modes if the local address is - specified, it must match the default interface, which is - what will be put into packets' source address anyway */ - if (ifID_home->ifThisNode.s_net == local->sat_addr.s_net && - ifID_home->ifThisNode.s_node == local->sat_addr.s_node) { - pcb->laddr = local->sat_addr; - return(0); - } - return(EINVAL); - - } - } - return(0); -} diff --git a/bsd/netat/at_pcb.h b/bsd/netat/at_pcb.h deleted file mode 100644 index b936f6751..000000000 --- a/bsd/netat/at_pcb.h +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1997-1999 Apple Computer, Inc. - * All Rights Reserved. - */ -/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ -/* - * Copyright (c) 1982, 1986, 1990, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* at_pcb.h */ -#include - -#ifdef __APPLE_API_OBSOLETE -#ifdef KERNEL_PRIVATE -/* - * Common structure pcb for internet protocol implementation. - * Here are stored pointers to local and foreign host table - * entries, local and foreign socket numbers, and pointers - * up (to a socket structure) and down (to a protocol-specific) - * control block. - */ -struct atpcb; -typedef struct atpcb gref_t; -struct atpcb { - struct atpcb *atpcb_next, /* pointers to other pcb's */ - *atpcb_prev, - *atpcb_head; /* pointer back to chain of atpcb's - for this protocol */ - struct socket *atpcb_socket; /* back pointer to socket */ - u_char ddptype, /* DDP type */ - lport, /* local DDP socket */ - rport; /* remote DDP socket */ - struct at_addr laddr, /* local net and node */ - raddr; /* remote net and node */ - int ddp_flags; /* generic IP/datagram flags */ - caddr_t at_ppcb; /* pointer to per-protocol pcb */ - - /* from the gref structure */ - - void *info; - gbuf_t *ichead; - gbuf_t *ictail; - gbuf_t *rdhead; - gbuf_t *rdtail; - unsigned char proto; /* old-style ATPROTO_* */ - unsigned char errno; - unsigned short sevents; - int pid; - atlock_t lock; - atevent_t event; - atevent_t iocevent; - int (*writeable)(gref_t *gref); - int (*readable)(gref_t *gref); - struct selinfo si; /* BSD 4.4 selinfo structure for - selrecord/selwakeup */ -}; - -#define sotoatpcb(so)((struct atpcb *)(so)->so_pcb) - -/* ddp_flags */ -#define DDPFLG_CHKSUM 0x01 /* DDP checksums to be used on this connection */ -#define DDPFLG_SLFSND 0x02 /* packets sent to the cable-multicast address - on this socket should be looped back */ -#define DDPFLG_HDRINCL 0x08 /* user supplies entire DDP header */ -#define DDPFLG_STRIPHDR 0x200 /* drop DDP header on receive (raw) */ - -int at_pcballoc(struct socket *, struct atpcb *); -int at_pcbdetach(struct atpcb *); -int at_pcbbind(struct atpcb *, struct sockaddr *); - -int atalk_getref(struct fileproc *, int , gref_t ** , struct proc *, int); -int atalk_getref_locked(struct fileproc *, int , gref_t ** , struct proc *, int); - -void atalk_notify(gref_t *, int); -void atalk_notify_sel(gref_t *); - -int atalk_peek(gref_t *, unsigned char *); - -void ddp_putmsg(gref_t *gref, gbuf_t *mp); -int ddp_socket_inuse(u_char, u_char); -void ddp_notify_nbp(unsigned char, int, unsigned char); - -void atalk_putnext(gref_t *gref, gbuf_t *m); -void atalk_enablew(gref_t *gref); -void atalk_flush(gref_t *gref); - -void at_memzone_init(void); - -#endif /* KERNEL_PRIVATE */ -#endif /* __APPLE_API_OBSOLETE */ diff --git a/bsd/netat/at_proto.c b/bsd/netat/at_proto.c deleted file mode 100644 index e6acc4660..000000000 --- a/bsd/netat/at_proto.c +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1998 Apple Computer, Inc. - */ - -/* at_proto.c - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -#include -#include -#include -#include -#include - - -/* - * Dummy usrreqs struct created by Ted for FreeBSD 3.x integration. - * Fill in supported functions as appropriate. - */ -struct pr_usrreqs ddp_usrreqs = { - ddp_pru_abort, pru_accept_notsupp, ddp_pru_attach, ddp_pru_bind, - ddp_pru_connect, pru_connect2_notsupp, ddp_pru_control, ddp_pru_detach, - ddp_pru_disconnect, pru_listen_notsupp, ddp_pru_peeraddr, pru_rcvd_notsupp, - pru_rcvoob_notsupp, ddp_pru_send, pru_sense_null, ddp_pru_shutdown, - ddp_pru_sockaddr, sosend, soreceive, pru_sopoll_notsupp -}; - -extern struct domain atalkdomain; -extern void atalk_dominit(void); - -struct protosw atalksw[] = { - { SOCK_RAW, &atalkdomain, /*protocol*/ 0, PR_ATOMIC|PR_ADDR, - /*input*/ 0, /*output*/ 0, /*clinput*/ 0, ddp_ctloutput, - /*ousrreq*/ 0, - ddp_init, /*fastto*/ 0, /*slowto*/ 0, /*drain*/ 0, - /*sysctl*/ 0, &ddp_usrreqs, - 0, 0, 0, /*lock, unlock, getlock */ - {0, 0}, 0, {0} /* filters */ - } -}; - -struct domain atalkdomain = -{ AF_APPLETALK, - "appletalk", - atalk_dominit, - 0, - 0, - atalksw, - 0, - 0, /* dom_rtattach */ - 0, 0, /* dom_rtoffset, dom_maxrtkey */ - DDP_X_HDR_SIZE, 0, - 0, /* domain global mutex */ - 0, /* domain flags */ - {0, 0} /*reserved[2] */ -}; - -struct domain * atalkdom = &atalkdomain; -lck_mtx_t *atalk_mutex = NULL; - -lck_mtx_t *atalk_cluster_lock = NULL; -static lck_attr_t *atalk_lock_attr; -static lck_grp_t *atalk_lock_grp; -static lck_grp_attr_t *atalk_lock_grp_attr; - -static int at_saved_lock, at_saved_unlock; - -SYSCTL_NODE(_net, PF_APPLETALK, appletalk, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "AppleTalk Family"); - -void -atalk_dominit(void) -{ - atalk_mutex = atalkdom->dom_mtx; - - atalk_lock_grp_attr = lck_grp_attr_alloc_init(); - atalk_lock_grp = lck_grp_alloc_init("appletalk", atalk_lock_grp_attr); - atalk_lock_attr = lck_attr_alloc_init(); - atalk_cluster_lock = lck_mtx_alloc_init(atalk_lock_grp, - atalk_lock_attr); -} - -void -atalk_lock() -{ - int lr_saved; - lr_saved = (unsigned int) __builtin_return_address(0); - - lck_mtx_assert(atalkdom->dom_mtx, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_lock(atalkdom->dom_mtx); - at_saved_lock = lr_saved; -} - -void -atalk_unlock() -{ - int lr_saved; - lr_saved = (unsigned int) __builtin_return_address(0); - - lck_mtx_assert(atalkdom->dom_mtx, LCK_MTX_ASSERT_OWNED); - at_saved_unlock = lr_saved; - lck_mtx_unlock(atalkdom->dom_mtx); - -} - - - - - diff --git a/bsd/netat/at_snmp.h b/bsd/netat/at_snmp.h deleted file mode 100644 index da6249dfc..000000000 --- a/bsd/netat/at_snmp.h +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#ifndef _NETAT_AT_SNMP_H_ -#define _NETAT_AT_SNMP_H_ -#include - -#ifdef __APPLE_API_OBSOLETE - -#define MAX_PHYS_ADDR_SIZE 6 /* maximum physical addr size */ -#define MAX_IFS 25 /* max # interfaces */ -#define IF_NAME_SIZE 6 /* max name length of I/F name */ -#define DDP_ADDR_SIZE 3 -#define ZONE_SIZE NBP_NVE_STR_SIZE -#define OBJECT_SIZE NBP_NVE_STR_SIZE -#define TYPE_SIZE NBP_NVE_STR_SIZE -#define PORT_DESC_SIZE 50 -#define UPDATE_IF_CHANGED 1 /* for ioctls */ -#define UPDATE_UNCONDITIONAL 2 -#define SNMP_NBP_HEADER_SIZE (sizeof(snmpNbpTable_t) - sizeof(snmpNbpEntry_t)) - -typedef struct snmpIfCfg { - u_short ifc_aarpSize; /* AARP table size for this I/F */ - u_short ifc_addrSize; /* Mac address size in bytes */ - u_short ifc_type; /* port type */ - at_net_al ifc_start; /* net range start */ - at_net_al ifc_end; /* net range end */ - struct at_addr ifc_ddpAddr; /* ddp address of port */ - u_short ifc_status; /* port status */ - u_short ifc_netCfg; - u_short ifc_zoneCfg; - at_nvestr_t ifc_zoneName; - u_short ifc_index; - char ifc_name[IF_NAME_SIZE]; /* I/F name (e.g. ent0 */ -} snmpIfCfg_t; - - -typedef struct snmpCfg { - int pad; /* pad for UPDATE flag when ioctl issued */ - u_int cfg_flags; /* at_state flags */ - int cfg_ifCnt; /* # I/F's up */ - snmpIfCfg_t cfg_ifCfg[MAX_IFS]; -} snmpCfg_t; - -typedef struct snmpAarpEnt { - struct at_addr ap_ddpAddr; - u_char ap_physAddr[MAX_PHYS_ADDR_SIZE]; -}snmpAarpEnt_t; - -typedef struct snmpAarp { /* aarp info for 1 I/F */ - time_t at_time; /* the time() we got this table */ - int at_ifno; /* the (SNMP) I/F number of this table */ - int at_maxSize; /* allocated size of as_table in entries */ - int at_usedSize; /* size of at_table used portion */ - snmpAarpEnt_t *at_table; -} snmpAarp_t; - -typedef struct snmpFlags { - int lap_changed; /* set when any I/F state changes */ - int rtmpAdded; /* set when rtmp entry ADDED */ - int zipAdded; /* set when zip entry ADDED */ -} snmpFlags_t; - -typedef struct snmpNbpEntry { - at_nvestr_t nbpe_object; - at_nvestr_t nbpe_type; -}snmpNbpEntry_t; - -typedef struct snmpNbpTable { - int nbpt_entries; - at_nvestr_t nbpt_zone; - snmpNbpEntry_t nbpt_table[1]; -}snmpNbpTable_t; - - - - -typedef struct snmpStats { - - /* ddp group */ - u_int dd_outReq; - u_int dd_outShort; - u_int dd_outLong; - u_int dd_inTotal; - u_int dd_fwdReq; - u_int dd_inLocal; - u_int dd_noHandler; - u_int dd_noRoutes; - u_int dd_tooShort; - u_int dd_tooLong; - u_int dd_inBcastErr; - u_int dd_shortErr; - u_int dd_hopCount; - u_int dd_checkSum; - - /* ATEcho group */ - - u_int ec_echoReq; - u_int ec_echoReply; -} snmpStats_t; - -#define SNMP_TYPE_OTHER 1 -#define SNMP_TYPE_LOCAL 2 -#define SNMP_TYPE_ETHER1 3 -#define SNMP_TYPE_ETHER2 4 -#define SNMP_TYPE_TOKEN 5 -#define SNMP_TYPE_IP 6 -#define SNMP_TYPE_SERIALPPP 7 -#define SNMP_TYPE_SERIALNONSTD 8 -#define SNMP_TYPE_VIRTUAL 9 -#define SNMP_CFG_CONFIGURED 1 -#define SNMP_CFG_GARNERED 2 -#define SNMP_CFG_GUESSED 3 -#define SNMP_CFG_UNCONFIG 4 - -#define SNMP_OBJ_TYPE_AARP 0x0100 -#define SNMP_OBJ_TYPE_ECHO 0x0200 -#define SNMP_OBJ_TYPE_PORT 0x0300 -#define SNMP_OBJ_TYPE_DDP 0x0400 -#define SNMP_OBJ_TYPE_RTMP 0x0500 -#define SNMP_OBJ_TYPE_ZIP 0x0600 -#define SNMP_OBJ_TYPE_NBP 0x0700 -#define SNMP_OBJ_TYPE_MASK 0x0f00 - -#define AARPIFINDEX 2 + SNMP_OBJ_TYPE_AARP -#define AARPNETADDRESS 3 + SNMP_OBJ_TYPE_AARP -#define AARPPHYSADDRESS 4 + SNMP_OBJ_TYPE_AARP -#define ATECHOREPLIES 6 + SNMP_OBJ_TYPE_AARP -#define ATECHOREQUESTS 7 + SNMP_OBJ_TYPE_AARP -#define ATPORTDESCR 8 + SNMP_OBJ_TYPE_PORT -#define ATPORTIFINDEX 10 + SNMP_OBJ_TYPE_PORT -#define ATPORTINDEX 11 + SNMP_OBJ_TYPE_PORT -#define ATPORTNETADDRESS 12 + SNMP_OBJ_TYPE_PORT -#define ATPORTNETCONFIG 13 + SNMP_OBJ_TYPE_PORT -#define ATPORTNETEND 14 + SNMP_OBJ_TYPE_PORT -#define ATPORTNETSTART 15 + SNMP_OBJ_TYPE_PORT -#define ATPORTSTATUS 16 + SNMP_OBJ_TYPE_PORT -#define ATPORTTYPE 18 + SNMP_OBJ_TYPE_PORT -#define ATPORTZONE 19 + SNMP_OBJ_TYPE_PORT -#define ATPORTZONECONFIG 20 + SNMP_OBJ_TYPE_PORT -#define DDPBROADCASTERRORS 21 + SNMP_OBJ_TYPE_DDP -#define DDPCHECKSUMERRORS 22 + SNMP_OBJ_TYPE_DDP -#define DDPFORWREQUESTS 23 + SNMP_OBJ_TYPE_DDP -#define DDPHOPCOUNTERRORS 24 + SNMP_OBJ_TYPE_DDP -#define DDPINLOCALDATAGRAMS 25 + SNMP_OBJ_TYPE_DDP -#define DDPINRECEIVES 26 + SNMP_OBJ_TYPE_DDP -#define DDPNOPROTOCOLHANDLERS 27 + SNMP_OBJ_TYPE_DDP -#define DDPOUTLONGS 28 + SNMP_OBJ_TYPE_DDP -#define DDPOUTNOROUTES 29 + SNMP_OBJ_TYPE_DDP -#define DDPOUTREQUESTS 30 + SNMP_OBJ_TYPE_DDP -#define DDPOUTSHORTS 31 + SNMP_OBJ_TYPE_DDP -#define DDPSHORTDDPERRORS 32 + SNMP_OBJ_TYPE_DDP -#define DDPTOOLONGERRORS 33 + SNMP_OBJ_TYPE_DDP -#define DDPTOOSHORTERRORS 34 + SNMP_OBJ_TYPE_DDP -#define KIPBCASTADDR 35 -#define KIPCORE 36 -#define KIPENTRY 37 -#define KIPHOPCOUNT 38 -#define KIPNETEND 39 -#define KIPNETSTART 40 -#define KIPNEXTHOP 41 -#define KIPSHARE 42 -#define KIPSTATE 43 -#define KIPTABLE 44 -#define KIPTYPE 45 -#define LLAPCOLLISIONS 46 -#define LLAPDEFERS 47 -#define LLAPENTRY 48 -#define LLAPFCSERRORS 49 -#define LLAPIFINDEX 50 -#define LLAPINERRORS 51 -#define LLAPINLENGTHERRORS 52 -#define LLAPINNOHANDLERS 53 -#define LLAPINPKTS 54 -#define LLAPNODATAERRORS 55 -#define LLAPOUTPKTS 56 -#define LLAPRANDOMCTSERRORS 57 -#define NBPINDEX 60 + SNMP_OBJ_TYPE_NBP -#define NBPOBJECT 61 + SNMP_OBJ_TYPE_NBP -#define NBPSTATE 62 + SNMP_OBJ_TYPE_NBP -#define NBPTABLE 63 + SNMP_OBJ_TYPE_NBP -#define NBPTYPE 64 + SNMP_OBJ_TYPE_NBP -#define NBPZONE 65 + SNMP_OBJ_TYPE_NBP -#define RTMPHOPS 67 + SNMP_OBJ_TYPE_RTMP -#define RTMPNEXTHOP 68 + SNMP_OBJ_TYPE_RTMP -#define RTMPPORT 69 + SNMP_OBJ_TYPE_RTMP -#define RTMPRANGEEND 70 + SNMP_OBJ_TYPE_RTMP -#define RTMPRANGESTART 71 + SNMP_OBJ_TYPE_RTMP -#define RTMPSTATE 72 + SNMP_OBJ_TYPE_RTMP -#define RTMPTYPE 74 + SNMP_OBJ_TYPE_RTMP -#define ZIPZONEINDEX 77 + SNMP_OBJ_TYPE_ZIP -#define ZIPZONENAME 78 + SNMP_OBJ_TYPE_ZIP -#define ZIPZONENETEND 79 + SNMP_OBJ_TYPE_ZIP -#define ZIPZONENETSTART 80 + SNMP_OBJ_TYPE_ZIP -#define ZIPZONESTATE 81 + SNMP_OBJ_TYPE_ZIP - -#define SNMP_TYPE(var,type) ((var & SNMP_OBJ_TYPE_MASK) == type) - -#ifdef KERNEL_PRIVATE -int getNbpTable(snmpNbpEntry_t *, int, int); -int getNbpTableSize(void); -snmpAarpEnt_t *getAarp(int *); -#endif - -#endif /* __APPLE_API_OBSOLETE */ -#endif /* _NETAT_AT_SNMP_H_ */ diff --git a/bsd/netat/at_var.h b/bsd/netat/at_var.h deleted file mode 100644 index 1513f9a82..000000000 --- a/bsd/netat/at_var.h +++ /dev/null @@ -1,358 +0,0 @@ -/* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1998 Apple Computer, Inc. - */ - -#ifndef _NETAT_AT_VAR_H_ -#define _NETAT_AT_VAR_H_ - -#include -#ifdef __APPLE_API_OBSOLETE -#include - -/* at_var.h */ - -/* at_var.h contains definitions formerly found in: at/at_lap.h & at/elap.h */ - -/* multicast tracking */ -#define MAX_MCASTS 25 /* #multicast addrs tracked per i/f */ -#define MCAST_TRACK_ADD 1 -#define MCAST_TRACK_DELETE 2 -#define MCAST_TRACK_CHECK 3 - -/* maximum number of I/F's allowed */ -#define IF_TOTAL_MAX 17 /* max count of any combination of I/F's */ - /* 17 == (1+(4*4)); 9 and 13 would also be - reasonable values */ - -#define FDDI_OR_TOKENRING(i) ((i == IFT_FDDI) || (i == IFT_ISO88025)) -#define ETHERNET_ADDR_LEN 6 -#define IFNAMESIZ 16 -typedef struct etalk_addr { - u_char etalk_addr_octet[ETHERNET_ADDR_LEN]; -} etalk_addr_t; -typedef char if_name_t[IFNAMESIZ]; -typedef struct at_ifname_list { - if_name_t at_if[IF_TOTAL_MAX]; -} at_ifnames_t; - -typedef struct at_if_statstics { - u_long fwdBytes; /* bytes received & forwarded */ - u_long fwdPkts; /* pkts received & forwarded */ - u_long droppedBytes; /* bytes received & dropped */ - u_long droppedPkts; /* pkts received & dropped */ - u_long outBytes; /* bytes sent */ - u_long outPkts; /* pkts sent */ - u_long routes; /* count of routes in rtmptable */ -} at_if_statistics_t; - -typedef struct { - u_int unknown_mblks; /* number of unknown streams msgs */ - u_int rcv_bytes; /* number of data bytes received */ - u_int rcv_packets; /* number of packets received */ - u_int xmit_bytes; /* number of data bytes xmited */ - u_int xmit_packets; /* number of packets xmited */ -} at_elap_stats_t; - -typedef struct { - char ifr_name[IFNAMESIZ]; - u_int flags; /* misc. port flags, - (ELAP_CFG_xxx on input - ifFlags on output) */ - struct at_addr node; /* Our node number. */ - struct at_addr router; /* Our router. */ - u_short netStart; /* network start range */ - u_short netEnd; /* network ending range */ - at_nvestr_t zonename; -} at_if_cfg_t; - -typedef struct { - at_entity_t name; - at_inet_t addr; /* net and node are ignored, except in - multihoming mode where "addr" is used - to specify the interface. */ - u_char ddptype; - long unique_nbp_id; -} at_nbp_reg_t; - -typedef struct { - char ifr_name[IFNAMESIZ]; - at_nvestr_t zonename; -} at_def_zone_t; - -typedef struct zone_usage { - int zone_index; /* index in local_zones */ - at_nvestr_t zone_name; /* the zone name & len */ - int zone_home; /* used only to set zones in - router mode */ - at_ifnames_t zone_iflist; /* list of interfaces for - this zone. */ - char usage[IF_TOTAL_MAX]; /* I/F usage (set if - I/F in this zone) */ -} zone_usage_t; - -typedef struct { - short multihome; - short rtmp_table_sz; - short zone_table_sz; - short router_mix; -} at_router_params_t; - - -typedef struct at_kern_err { - int error; /* kernel error # (KE_xxx) */ - int port1; - int port2; - char name1[IFNAMESIZ]; - char name2[IFNAMESIZ]; - u_short net; - u_char node; - u_short netr1b, netr1e; /* net range 1 begin & end */ - u_short netr2b, netr2e; /* net range 2 begin & end */ - u_char rtmp_id; -} at_kern_err_t; - -#define KE_CONF_RANGE 1 -#define KE_CONF_SEED_RNG 2 -#define KE_CONF_SEED1 3 -#define KE_CONF_SEED_NODE 4 -#define KE_NO_ZONES_FOUND 5 -#define KE_NO_SEED 6 -#define KE_INVAL_RANGE 7 -#define KE_SEED_STARTUP 8 -#define KE_BAD_VER 9 -#define KE_RTMP_OVERFLOW 10 -#define KE_ZIP_OVERFLOW 11 - -#ifdef KERNEL_PRIVATE -/* - * Interface address, AppleTalk version. One of these structures - * is allocated for each AppleTalk address on an interface. - * - * The ifaddr structure contains the protocol-independent part - * of the structure and is assumed to be first, as it is in - * "struct in_ifaddr", defined in bsd/netinet/in_var.h. - */ -typedef struct at_ifaddr { - struct ifaddr aa_ifa; -#define aa_ifp aa_ifa.ifa_ifp -#define aa_flags aa_ifa.ifa_flags - - TAILQ_ENTRY(at_ifaddr) aa_link; /* tailq macro glue */ - int at_was_attached; /* 1=attached, 0=detached */ - - /* from pat_unit_t */ - unsigned char mcast[MAX_MCASTS]; - char xaddr[ETHERNET_ADDR_LEN]; - - /* from elap_specifics_t */ - at_elap_stats_t stats; - - /* The DDP sets these values: */ - u_char ifState; /* State of the interface LAP_* */ - u_short ifThisCableStart; - u_short ifThisCableEnd; - struct at_addr ifARouter; - u_char ifRouterState; - u_int ifFlags; /* Flags, see AT_IFF_* */ - struct sockaddr_at ifNodeAddress; -#define ifThisNode ifNodeAddress.sat_addr - /* AppleTalk node ID is ifNodeAddress.sat_addr*/ - - /* for use by ZIP */ - u_char ifNumRetries; - u_char ifGNIScheduled; /* to keep getnetinfo from being scheduled more than once */ - at_nvestr_t ifZoneName; - - /* Added for routing support */ - int ifPort; /* the unique ddp logical port - number, also index into - at_interfaces[] and ifID_table[] */ - char ifName[IFNAMESIZ]; - /* added to support LAP_IOC_GET_IFID */ - u_short ifDefZone; /* Default Zone index in ZoneTable; used - only in routing/multihome modes to be - able to answer a ZIP GetNetInfo request */ - char ifZipNeedQueries; - /* ZIP/RTMP Query flag */ - char ifRoutingState; /* Port (as a router) state */ - at_if_statistics_t - ifStatistics; /* statistics */ - /* end of elap_if structure */ - - u_short flags; /* port specific flags */ - struct etalk_addr ZoneMcastAddr; - /* zone multicast addr */ - struct etalk_addr cable_multicast_addr; - /* AppleTalk broadcast addr */ - - struct at_addr initial_addr; /* temporary value used during startup */ - at_nvestr_t startup_zone; - int startup_error, /* to get error code back from - ZIPwakeup() / AARPwakeup() */ - startup_inprogress; /* to decide whether it's the - middle of an elap_online operation */ - -} at_ifaddr_t; -#endif /* KERNEL_PRIVATE */ - -#define LAP_OFFLINE 0 /* LAP_OFFLINE MUST be 0 */ -#define LAP_ONLINE 1 -#define LAP_ONLINE_FOR_ZIP 2 -#define LAP_ONLINE_ZONELESS 3 /* for non-home router ports */ - -#define NO_ROUTER 1 /* there's no router around */ -#define ROUTER_WARNING 2 /* there's a router around that */ - /* we are ignoring, warning has */ - /* been issued to the user */ -#define ROUTER_AROUND 3 /* A router is around and we've */ - /* noted its presence */ -#define ROUTER_UPDATED 4 /* for mh tracking of routers. Value decremented - with rtmp aging timer, a value of 4 allows a - minimum of 40 secs to laps before we decide - to revert to cable multicasts */ - -/* AppleTalk IOCTLs */ - - -#define AIOCSTOPATALK _IOWR('a', 1, int) /* stop AppleTalk */ -#define AIOCGETIFCFG _IOWR('a', 2, at_if_cfg_t) /* get AT interface cfg */ -#define AIOCNBPREG _IOWR('a', 3, at_nbp_reg_t) /* NBP register */ -#define AIOCNBPREMOVE _IOW('a', 4, at_nbp_reg_t) /* NBP remove */ -#define AIOCGETSTATE _IOR('a', 5, at_state_t) /* get AT global state */ -#define AIOCSETDEFZONE _IOW('a', 6, at_def_zone_t) - /* in single-port, router, and multihome modes, set default zone */ -#define AIOCSETROUTER _IOW('a', 7, at_router_params_t) -#define AIOCGETROUTER _IOR('a', 8, at_router_params_t) -#define AIOCSIFADDR _IOW('a', 9, at_if_cfg_t) /* init AT interface */ -#define AIOCSTARTROUTER _IOR('a',10, at_kern_err_t) /* start AT routing */ -#define AIOCREGLOCALZN _IOW('a',11, at_nvestr_t) - /* in single-port mode, register local zone in kernel table for - future use in error checking NBP registration */ -#define AIOCSETZNUSAGE _IOW('a',12, zone_usage_t) - /* in router mode, set up each zone for interfaces being seeded */ -#define AIOCGETZNUSAGE _IOWR('a',13, zone_usage_t) - /* in router and multihome modes, given a zone index, report zone name - and interfaces corresponding to that zone */ - -/* values for ifFlags */ -#define LAP_STATE_MASK 0xf /* low order bits used to report - IF state, by AIOCGETIFCFG */ -#define AT_IFF_DEFAULT 0x40000 -#define AT_IFF_AURP 0x20000 -#define RTR_NXNET_PORT 0x10000000 /* Non Extended net port */ -#define RTR_XNET_PORT 0x20000000 /* Extended net port */ -#define RTR_SEED_PORT 0x40000000 /* Seed port require config net values*/ - -/* elap_cfg 'flags' defines */ -#define ELAP_CFG_ZONELESS 0x01 /* true if we shouldn't set a zone - (to avoid generating a zip_getnetinfo - when routing) */ -#define ELAP_CFG_HOME 0x02 /* designate home port (one allowed) */ -#define ELAP_CFG_SEED 0x08 /* set if it's a seed port */ - -#ifdef KERNEL_PRIVATE -extern TAILQ_HEAD(at_ifQueueHd, at_ifaddr) at_ifQueueHd; - -int at_control(struct socket *, u_long, caddr_t, struct ifnet *); -int ddp_usrreq(struct socket *, int, struct mbuf *, struct mbuf *, - struct mbuf *); -int ddp_ctloutput(struct socket *, struct sockopt *); -void ddp_init(void); -void ddp_slowtimo(void); -#endif /* KERNEL_PRIVATE */ - -/* - * Define AppleTalk event subclass and specific AppleTalk events. - */ - -#define KEV_ATALK_SUBCLASS 5 - -#define KEV_ATALK_ENABLED 1 /* AppleTalk enabled from user space - node/net set and valid */ -#define KEV_ATALK_DISABLED 2 /* AppleTalk disabled from user space */ -#define KEV_ATALK_ZONEUPDATED 3 /* Zone for this node set/changed */ -#define KEV_ATALK_ROUTERUP 4 /* Seed router found with valid cable range */ -#define KEV_ATALK_ROUTERUP_INVALID 5 /* Seed router found with invalid cable range */ -#define KEV_ATALK_ROUTERDOWN 6 /* Seed router down */ -#define KEV_ATALK_ZONELISTCHANGED 7 /* Zone list changed by router */ - -struct kev_atalk_data { - struct net_event_data link_data; - union { - struct at_addr address; - at_nvestr_t zone; - } node_data; -}; - -#ifdef KERNEL_PRIVATE - -void atalk_post_msg(struct ifnet *ifp, u_long event_code, struct at_addr *address, at_nvestr_t *zone); -void aarp_sched_probe(void *); -void atalk_lock(void); -void atalk_unlock(void); -void appletalk_hack_start(void); -void ddp_input(gbuf_t *, at_ifaddr_t *); -struct etalk_addr; -void ddp_glean(gbuf_t *, at_ifaddr_t *, struct etalk_addr *); - -int pat_output(at_ifaddr_t *, struct mbuf *, unsigned char *, int); - -void ep_input(gbuf_t *, at_ifaddr_t *); -void zip_router_input(gbuf_t *, at_ifaddr_t *); -void nbp_input(gbuf_t *, at_ifaddr_t *); -void sip_input(gbuf_t *, at_ifaddr_t *); - -void ioc_ack(int, gbuf_t *, gref_t *); -int ddp_adjmsg(gbuf_t *, int ); -gbuf_t *ddp_growmsg(gbuf_t *, int ); - -struct proc; -int atalk_openref(gref_t *, int *, struct proc *); - -struct fileglob; -int atalk_closeref(struct fileglob *, gref_t **); - -int _ATputmsg(int, strbuf_t *, strbuf_t *, int , int *, void *); -int _ATgetmsg(int, strbuf_t *, strbuf_t *, int *, int *, void *); -int _ATsocket(int, int *, void *); - -void ddp_start(void); - -typedef void (*ddp_handler_func)(gbuf_t *, at_ifaddr_t *); -void add_ddp_handler(u_char, ddp_handler_func); -void init_ddp_handler(void); - -int elap_wput(gref_t *gref, gbuf_t *m); -int at_ioctl(struct atpcb *, u_long, caddr_t, int ); - -extern void at_purgeaddrs(struct ifnet *); - -#endif /* KERNEL_PRIVATE */ -#endif /* __APPLE_API_OBSOLETE */ -#endif /* _NETAT_AT_VAR_H_ */ diff --git a/bsd/netat/atalk.exp b/bsd/netat/atalk.exp deleted file mode 100644 index 8bad4de70..000000000 --- a/bsd/netat/atalk.exp +++ /dev/null @@ -1,9 +0,0 @@ -#!/unix -* -ATsocket syscall -ATgetmsg syscall -ATputmsg syscall -ATPsndreq syscall -ATPsndrsp syscall -ATPgetreq syscall -ATPgetrsp syscall diff --git a/bsd/netat/atalk.imp b/bsd/netat/atalk.imp deleted file mode 100644 index d36d8e6e9..000000000 --- a/bsd/netat/atalk.imp +++ /dev/null @@ -1,9 +0,0 @@ -#!/unix -* -sys_ATsocket -sys_ATgetmsg -sys_ATputmsg -sys_ATPsndreq -sys_ATPsndrsp -sys_ATPgetreq -sys_ATPgetrsp diff --git a/bsd/netat/atp.h b/bsd/netat/atp.h deleted file mode 100644 index dda3669a9..000000000 --- a/bsd/netat/atp.h +++ /dev/null @@ -1,484 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * - * ORIGINS: 82 - * - * (C) COPYRIGHT Apple Computer, Inc. 1992-1996 - * All Rights Reserved - * - */ - -/* Definitions for ATP protocol and streams module, per - * AppleTalk Transaction Protocol documentation from - * `Inside AppleTalk', July 14, 1986. - */ - -/* - * Copyright (c) 1988, 1989 Apple Computer, Inc. - * - * The information contained herein is subject to change without - * notice and should not be construed as a commitment by Apple - * Computer, Inc. Apple Computer, Inc. assumes no responsibility - * for any errors that may appear. - * - * Confidential and Proprietary to Apple Computer, Inc. - */ - -#ifndef _NETAT_ATP_H_ -#define _NETAT_ATP_H_ -#include - -#ifdef __APPLE_API_OBSOLETE - -/* ATP function codes */ - -#define ATP_CMD_TREQ 0x01 /* TRequest packet */ -#define ATP_CMD_TRESP 0x02 /* TResponse packet */ -#define ATP_CMD_TREL 0x03 /* TRelease packet */ - -/* Miscellaneous definitions */ - -#define ATP_DEF_RETRIES 8 /* Default for maximum retry count */ -#define ATP_DEF_INTERVAL 2 /* Default for retry interval in seconds */ - -#define ATP_TRESP_MAX 8 /* Maximum number of Tresp pkts */ - -#define ATP_HDR_SIZE 8 /* Size of the ATP header */ -#define ATP_DATA_SIZE 578 /* Maximum size of the ATP data area */ - -/* Consts for asynch support */ -#define ATP_ASYNCH_REQ 1 -#define ATP_ASYNCH_RESP 2 - -/* Timer values for XO release timers */ -#define ATP_XO_DEF_REL_TIME 0 -#define ATP_XO_30SEC 0 -#define ATP_XO_1MIN 1 -#define ATP_XO_2MIN 2 -#define ATP_XO_4MIN 3 -#define ATP_XO_8MIN 4 - -typedef struct { -#if BYTE_ORDER == BIG_ENDIAN - unsigned cmd : 2, - xo : 1, - eom : 1, - sts : 1, - xo_relt : 3; -#endif -#if BYTE_ORDER == LITTLE_ENDIAN - unsigned xo_relt : 3, - sts : 1, - eom : 1, - xo : 1, - cmd : 2; -#endif - u_char bitmap; - ua_short tid; - ua_long user_bytes; - u_char data[ATP_DATA_SIZE]; -} at_atp_t; - -#define ATP_ATP_HDR(c) ((at_atp_t *)(&((at_ddp_t *)(c))->data[0])) - -#define TOTAL_ATP_HDR_SIZE (ATP_HDR_SIZE+DDP_X_HDR_SIZE) -#define ATP_CLEAR_CONTROL(c) (*(char *)(c) = 0) - -/* ATP ioctl interface */ - -/* Structure for the atp_set_default call */ - -#define ATP_INFINITE_RETRIES 0xffffffff /* means retry forever - * in the def_retries field - */ - -struct atp_set_default { - u_int def_retries; /* number of retries for a request */ - u_int def_rate; /* retry rate (in seconds/100) NB: the - * system may not be able to resolve - * delays of 100th of a second but will - * instead make a 'best effort' - */ - struct atpBDS *def_bdsp; /* BDS structure associated with this req */ - u_int def_BDSlen; /* size of BDS structure */ -}; - - -/* Return header from requests */ - -struct atp_result { - u_short count; /* the number of packets */ - u_short hdr; /* offset to header in buffer */ - u_short offset[8]; /* offset to the Nth packet in the buffer */ - u_short len[8]; /* length of the Nth packet */ -}; - -struct atpBDS { - ua_short bdsBuffSz; - ua_long bdsBuffAddr; - ua_short bdsDataSz; - unsigned char bdsUserData[4]; -}; - - -typedef struct { - u_short at_atpreq_type; - at_inet_t at_atpreq_to; - u_char at_atpreq_treq_user_bytes[4]; - u_char *at_atpreq_treq_data; - u_short at_atpreq_treq_length; - u_char at_atpreq_treq_bitmap; - u_char at_atpreq_xo; - u_char at_atpreq_xo_relt; - u_short at_atpreq_retry_timeout; - u_short at_atpreq_maximum_retries; - u_char at_atpreq_tresp_user_bytes[ATP_TRESP_MAX][4]; - u_char *at_atpreq_tresp_data[ATP_TRESP_MAX]; - u_short at_atpreq_tresp_lengths[ATP_TRESP_MAX]; - u_long at_atpreq_debug[4]; - u_short at_atpreq_tid; - u_char at_atpreq_tresp_bitmap; - u_char at_atpreq_tresp_eom_seqno; - u_char at_atpreq_got_trel; -} at_atpreq; - - -/* The ATP module ioctl commands */ - -#define AT_ATP_CANCEL_REQUEST (('|'<<8)|1) -#define AT_ATP_ISSUE_REQUEST (('|'<<8)|2) /* ALO */ -#define AT_ATP_ISSUE_REQUEST_DEF (('|'<<8)|3) /* XO */ -#define AT_ATP_ISSUE_REQUEST_DEF_NOTE (('|'<<8)|4) /* XO & nowait -- not needed*/ -#define AT_ATP_ISSUE_REQUEST_NOTE (('|'<<8)|5) /* ALO & nowait */ -#define AT_ATP_GET_POLL (('|'<<8)|6) -#define AT_ATP_RELEASE_RESPONSE (('|'<<8)|7) -#define AT_ATP_REQUEST_COMPLETE (('|'<<8)|8) -#define AT_ATP_SEND_FULL_RESPONSE (('|'<<8)|9) /* not used */ -#define AT_ATP_BIND_REQ (('|'<<8)|10) -#define AT_ATP_GET_CHANID (('|'<<8)|11) -#define AT_ATP_PEEK (('|'<<8)|12) -#define AT_ATP_ISSUE_REQUEST_TICKLE (('|'<<8)|13) /* ALO & nowait */ - -/* These macros don't really depend here, but since they're used only by the - * old ATP and old PAP, they're put here. Unisoft PAP includes this file. - */ -#define R16(x) UAS_VALUE(x) -#define W16(x,v) UAS_ASSIGN(x, v) -#define C16(x,v) UAS_UAS(x, v) - -/* - * these are the dispatch codes for - * the new atp_control system call - */ -#define ATP_SENDREQUEST 0 -#define ATP_GETRESPONSE 1 -#define ATP_SENDRESPONSE 2 -#define ATP_GETREQUEST 3 - -#ifdef KERNEL_PRIVATE - - -/* - * Stuff for accessing protocol headers - */ -#define AT_DDP_HDR(m) ((at_ddp_t *)(gbuf_rptr(m))) -#define AT_ATP_HDR(m) ((at_atp_t *)(&((at_ddp_t *)(gbuf_rptr(m)))->data[0])) - -/* - * Masks for accessing/manipulating the bitmap field in atp headers - */ - -#ifdef ATP_DECLARE -unsigned char atp_mask [] = { - 0x01, 0x02, 0x04, 0x08, - 0x10, 0x20, 0x40, 0x80, -}; - -unsigned char atp_lomask [] = { - 0x00, 0x01, 0x03, 0x07, - 0x0f, 0x1f, 0x3f, 0x7f, - 0xff -}; -#else -extern unsigned char atp_mask []; -extern unsigned char atp_lomask []; -#endif /* ATP_DECLARE */ - -/* - * doubly linked queue types and primitives - */ - -#define ATP_Q_ENTER(hdr, object, entry) { \ - if ((hdr).head) { \ - (hdr).head->entry.prev = (object); \ - (object)->entry.next = (hdr).head; \ - } else { \ - (hdr).tail = (object); \ - (object)->entry.next = NULL; \ - } \ - (object)->entry.prev = NULL; \ - (hdr).head = (object); \ - } - -#define ATP_Q_APPEND(hdr, object, entry) { \ - if ((hdr).head) { \ - (hdr).tail->entry.next = (object); \ - (object)->entry.prev = (hdr).tail; \ - } else { \ - (hdr).head = (object); \ - (object)->entry.prev = NULL; \ - } \ - (object)->entry.next = NULL; \ - (hdr).tail = (object); \ - } - -#define ATP_Q_REMOVE(hdr, object, entry) { \ - if ((object)->entry.prev) { \ - (object)->entry.prev->entry.next = (object)->entry.next;\ - } else { \ - (hdr).head = (object)->entry.next; \ - } \ - if ((object)->entry.next) { \ - (object)->entry.next->entry.prev = (object)->entry.prev;\ - } else { \ - (hdr).tail = (object)->entry.prev; \ - } \ - } - -struct atp_rcb_qhead { - struct atp_rcb *head; - struct atp_rcb *tail; -}; - -struct atp_rcb_q { - struct atp_rcb *prev; - struct atp_rcb *next; -}; - -struct atp_trans_qhead { - struct atp_trans *head; - struct atp_trans *tail; -}; - -struct atp_trans_q { - struct atp_trans *prev; - struct atp_trans *next; -}; - -/* - * Locally saved remote node address - */ - -struct atp_socket { - u_short net; - at_node node; - at_socket socket; -}; - -/* - * transaction control block (local context at requester end) - */ - -struct atp_trans; -typedef void (*atp_tmo_func)(struct atp_trans *); - -struct atp_trans { - struct atp_trans_q tr_list; /* trans list */ - struct atp_state *tr_queue; /* state data structure */ - gbuf_t *tr_xmt; /* message being sent */ - gbuf_t *tr_rcv[8]; /* message being rcvd */ - unsigned int tr_retry; /* # retries left */ - unsigned int tr_timeout; /* timer interval */ - char tr_state; /* current state */ - char tr_rsp_wait; /* waiting for transaction response */ - char filler[2]; - unsigned char tr_xo; /* execute once transaction */ - unsigned char tr_bitmap; /* requested bitmask */ - unsigned short tr_tid; /* transaction id */ - struct atp_socket tr_socket; /* the remote socket id */ - struct atp_trans_q tr_snd_wait; /* list of transactions waiting - for space to send a msg */ - at_socket tr_local_socket; - at_node tr_local_node; - at_net tr_local_net; - gbuf_t *tr_bdsp; /* bds structure pointer */ - unsigned int tr_tmo_delta; - atp_tmo_func tr_tmo_func; - struct atp_trans *tr_tmo_next; - struct atp_trans *tr_tmo_prev; - atlock_t tr_lock; - atevent_t tr_event; -}; - -#define TRANS_TIMEOUT 0 /* waiting for a reply */ -#define TRANS_REQUEST 1 /* waiting to send a request */ -#define TRANS_RELEASE 2 /* waiting to send a release */ -#define TRANS_DONE 3 /* done - waiting for poll to complete */ -#define TRANS_FAILED 4 /* done - waiting for poll to report failure */ -#define TRANS_ABORTING 5 /* waiting on atp_trans_abort list for thread to wakeup */ - -/* - * reply control block (local context at repling end) - */ - -struct atp_rcb { - struct atp_rcb_q rc_list; /* rcb list */ - struct atp_rcb_q rc_tlist; - struct atp_state *rc_queue; /* state data structure */ - gbuf_t *rc_xmt; /* replys being sent */ - gbuf_t *rc_ioctl; /* waiting ioctl */ - char rc_snd[8]; /* replys actually to be sent */ - int rc_pktcnt; /* no of pkts in this trans */ - short rc_state; /* current state */ - unsigned char rc_xo; /* execute once transaction */ - at_node rc_local_node; - at_net rc_local_net; - short rc_rep_waiting; /* in the reply wait list */ - int rc_timestamp; /* reply timer */ - unsigned char rc_bitmap; /* replied bitmask */ - unsigned char rc_not_sent_bitmap; /* replied bitmask */ - unsigned short rc_tid; /* transaction id */ - struct atp_socket rc_socket; /* the remote socket id */ -}; - -#define RCB_UNQUEUED 0 /* newly allocated, not q'd */ -#define RCB_RESPONDING 2 /* waiting all of response from process*/ -#define RCB_RESPONSE_FULL 3 /* got all of response */ -#define RCB_RELEASED 4 /* got our release */ -#define RCB_PENDING 5 /* a no wait rcb is full */ -#define RCB_NOTIFIED 6 -#define RCB_SENDING 7 /* we're currently xmitting this trans */ - -/* - * socket state (per module data structure) - */ - -struct atp_state { - gref_t *atp_gref; /* must be the first entry */ - int atp_pid; /* process id, must be the second entry */ - gbuf_t *atp_msgq; /* data msg, must be the third entry */ - unsigned char dflag; /* structure flag, must be the fourth entry */ - unsigned char filler; - short atp_socket_no; - short atp_flags; /* general flags */ - struct atp_trans_qhead atp_trans_wait; /* pending transaction list */ - struct atp_state *atp_trans_waiting; /* list of atps waiting for a - free transaction */ - unsigned int atp_retry; /* retry count */ - unsigned int atp_timeout; /* retry timeout */ - struct atp_state *atp_rcb_waiting; - struct atp_rcb_qhead atp_rcb; /* active rcbs */ - struct atp_rcb_qhead atp_attached; /* rcb's waiting to be read */ - atlock_t atp_lock; - atevent_t atp_event; - atlock_t atp_delay_lock; - atevent_t atp_delay_event; -}; - - -/* - * atp_state flag definitions - */ -#define ATP_CLOSING 0x08 /* atp stream in process of closing */ - - -/* - * tcb/rcb/state allocation queues - */ - -/* - * Size defines; must be outside following #ifdef to permit - * debugging code to reference independent of ATP_DECLARE - */ -#define NATP_RCB 512 /* the number of ATP RCBs at once */ -#define NATP_STATE 192 /* the number of ATP sockets open at once */ - /* note: I made NATP_STATE == NSOCKETS */ - -#ifdef ATP_DECLARE -struct atp_trans *atp_trans_free_list = NULL; /* free transactions */ -struct atp_rcb *atp_rcb_free_list = NULL; /* free rcbs */ -struct atp_state *atp_free_list = NULL; /* free atp states */ -struct atp_trans_qhead atp_trans_abort; /* aborted trans list */ -struct atp_rcb* atp_rcb_data = NULL; -struct atp_state* atp_state_data=NULL; - - -#else -extern struct atp_trans *atp_trans_free_list; /* free transactions */ -extern struct atp_rcb *atp_rcb_free_list; /* free rcbs */ -extern struct atp_state *atp_free_list; /* free atp states */ -extern struct atp_rcb* atp_rcb_data; -extern struct atp_state* atp_state_data; -extern struct atp_trans_qhead atp_trans_abort; /* aborting trans list */ - -#endif /* ATP_DECLARE */ - -/* prototypes */ -void atp_rcb_timer(struct atp_trans *); -struct atp_rcb *atp_rcb_alloc(struct atp_state *); -struct atp_trans *atp_trans_alloc(struct atp_state *); - -void atp_send_req(gref_t *, gbuf_t *); -void atp_drop_req(gref_t *, gbuf_t *); -void atp_send_rsp(gref_t *, gbuf_t *, int); -void atp_wput(gref_t *, gbuf_t *); -void atp_rput(gref_t *, gbuf_t *); -void atp_retry_req(void *); -void atp_stop(gbuf_t *, int); -void atp_cancel_req(gref_t *, unsigned short); -int atp_open(gref_t *, int); -int atp_bind(gref_t *, unsigned int, unsigned char *); -int atp_close(gref_t *, int); -gbuf_t *atp_build_release(struct atp_trans *); -void atp_req_timeout(struct atp_trans *); -void atp_free(struct atp_trans *); -void atp_x_done(struct atp_trans *); -void atp_send(struct atp_trans *); -void atp_rsp_ind(struct atp_trans *, gbuf_t *); -void atp_trans_free(struct atp_trans *); -void atp_reply(struct atp_rcb *); -void atp_rcb_free(struct atp_rcb *); -void atp_send_replies(struct atp_state *, struct atp_rcb *); -void atp_dequeue_atp(struct atp_state *); -void atp_iocack(struct atp_state *, gbuf_t *); -void atp_req_ind(struct atp_state *, gbuf_t *); -void atp_iocnak(struct atp_state *, gbuf_t *, int); -void atp_trp_timer(void *, int); -void atp_timout(atp_tmo_func, struct atp_trans *, int); -void atp_untimout(atp_tmo_func, struct atp_trans *); -int atp_tid(struct atp_state *); -void atp_init(void); -void atp_link(void); -void atp_unlink(void); -int atp_input(gbuf_t *); -void atp_delete_free_clusters(void *); - -#endif /* KERNEL_PRIVATE */ -#endif /* __APPLE_API_OBSOLETE */ -#endif /* _NETAT_ATP_H_ */ diff --git a/bsd/netat/atp_alloc.c b/bsd/netat/atp_alloc.c deleted file mode 100644 index cf9a4375c..000000000 --- a/bsd/netat/atp_alloc.c +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Modified for MP, 1996 by Tuyen Nguyen */ -/* - * tcb (transaction) allocation routine. If no transaction data structure - * is available then put the module on a queue of modules waiting - * for transaction structures. When a tcb is available it will be - * removed from this list and its write queue will be scheduled. - * Version 1.4 of atp_alloc.c on 89/02/09 17:53:01 - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -/*### MacOSX MCLBYTE is 2048, not 4096 like AIX */ -#define TRPS_PER_BLK 16 - -gbuf_t *atp_resource_m = 0; -extern caddr_t atp_free_cluster_list; -extern struct atp_rcb_qhead atp_need_rel; - - -struct atp_trans *atp_trans_alloc(atp) -struct atp_state *atp; -{ - int i; - gbuf_t *m; - register struct atp_trans *trp, *trp_array; - - if (atp_trans_free_list == 0) { - if ((m = gbuf_alloc_wait(TRPS_PER_BLK*sizeof(struct atp_trans), - atp == NULL)) == 0) - return (struct atp_trans *)0; - bzero(gbuf_rptr(m), TRPS_PER_BLK*sizeof(struct atp_trans)); - trp_array = (struct atp_trans *)gbuf_rptr(m); - for (i=0; i < TRPS_PER_BLK-1; i++) - trp_array[i].tr_list.next = (struct atp_trans *)&trp_array[i+1]; - gbuf_cont(m) = atp_resource_m; - atp_resource_m = m; - trp_array[i].tr_list.next = atp_trans_free_list; - atp_trans_free_list = (struct atp_trans *)&trp_array[0]; - } - - trp = atp_trans_free_list; - atp_trans_free_list = trp->tr_list.next; - trp->tr_queue = atp; - trp->tr_state = TRANS_TIMEOUT; - trp->tr_local_node = 0; - ATEVENTINIT(trp->tr_event); - - dPrintf(D_M_ATP_LOW, D_L_TRACE, - ("atp_trans_alloc(0x%x): alloc'd trp 0x%x\n", - (u_int) atp, (u_int) trp)); - return trp; -} /* atp_trans_alloc */ - -/* - * tcb free routine - if modules are waiting schedule them - * always called at 'lock' - */ - -void atp_trans_free(trp) -register struct atp_trans *trp; -{ - - trp->tr_queue = 0; - trp->tr_list.next = atp_trans_free_list; - atp_trans_free_list = trp; -} - -/* - * This routine allocates a rcb, if none are available it makes sure the - * the write service routine will be called when one is - * always called at 'lock' - */ - -struct atp_rcb *atp_rcb_alloc(atp) -struct atp_state *atp; -{ - register struct atp_rcb *rcbp; - - if ((rcbp = atp_rcb_free_list) != NULL) { - atp_rcb_free_list = rcbp->rc_list.next; - rcbp->rc_queue = atp; - rcbp->rc_pktcnt = 0; - rcbp->rc_local_node = 0; - } - dPrintf(D_M_ATP_LOW, D_L_TRACE, - ("atp_rcb_alloc: allocated rcbp 0x%x\n", (u_int) rcbp)); - return(rcbp); -} - -/* - * Here we free rcbs, if required reschedule other people waiting for them - * always called at 'lock' - */ - -void atp_rcb_free(rcbp) -register struct atp_rcb *rcbp; -{ - register struct atp_state *atp; - register int i; - register int rc_state; - - dPrintf(D_M_ATP_LOW, D_L_TRACE, - ("atp_rcb_free: freeing rcbp 0x%x\n", (u_int) rcbp)); - atp = rcbp->rc_queue; - if ((rc_state = rcbp->rc_state) == -1) { - dPrintf(D_M_ATP, D_L_WARNING, - ("atp_rcb_free(%d): tid=%d,loc=%d,rem=%d\n", - 0, rcbp->rc_tid, - rcbp->rc_socket.socket, atp->atp_socket_no)); - return; - } - rcbp->rc_state = -1; - rcbp->rc_xo = 0; - rcbp->rc_queue = 0; - - if (rcbp->rc_timestamp) { - rcbp->rc_timestamp = 0; - ATP_Q_REMOVE(atp_need_rel, rcbp, rc_tlist); - rcbp->rc_tlist.prev = NULL; - rcbp->rc_tlist.next = NULL; - } - - if (rcbp->rc_xmt) { - gbuf_freem(rcbp->rc_xmt); /* *** bad free is the second mbuf in this chain *** */ - rcbp->rc_xmt = NULL; - for (i=0; i < rcbp->rc_pktcnt; i++) - rcbp->rc_snd[i] = 0; - } - if (atp_free_cluster_list) - atp_delete_free_clusters(NULL); - if (rc_state != RCB_UNQUEUED) { - if (rc_state == RCB_PENDING) { - ATP_Q_REMOVE(atp->atp_attached, rcbp, rc_list); - } else { - ATP_Q_REMOVE(atp->atp_rcb, rcbp, rc_list); - } - } - if (rcbp->rc_ioctl) { - gbuf_freem(rcbp->rc_ioctl); - rcbp->rc_ioctl = NULL; - } - rcbp->rc_list.next = atp_rcb_free_list; - atp_rcb_free_list = rcbp; -} diff --git a/bsd/netat/atp_misc.c b/bsd/netat/atp_misc.c deleted file mode 100644 index 730b55e5f..000000000 --- a/bsd/netat/atp_misc.c +++ /dev/null @@ -1,341 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1996-1998 Apple Computer, Inc. - * All Rights Reserved. - */ - -/* Modified for MP, 1996 by Tuyen Nguyen - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -extern struct atp_rcb_qhead atp_need_rel; -extern struct atp_trans *trp_tmo_rcb; - -/* - * The request timer retries a request, if all retries are used up - * it returns a NAK - */ - -void -atp_req_timeout(trp) -register struct atp_trans *trp; -{ - register gbuf_t *m; - gref_t *gref; - struct atp_state *atp; - struct atp_trans *ctrp; - - if ((atp = trp->tr_queue) == 0) - return; - if (atp->atp_flags & ATP_CLOSING) - return; - - for (ctrp = atp->atp_trans_wait.head; ctrp; ctrp = ctrp->tr_list.next) { - if (ctrp == trp) - break; - } - if (ctrp != trp) - return; - - if ((m = gbuf_cont(trp->tr_xmt)) == NULL) - m = trp->tr_xmt; /* issued via the new interface */ - - if (trp->tr_retry == 0) { - trp->tr_state = TRANS_FAILED; - if (m == trp->tr_xmt) { - trp->tr_xmt = NULL; -l_notify: - gbuf_wset(m,1); - *gbuf_rptr(m) = 99; - gbuf_set_type(m, MSG_DATA); - gref = trp->tr_queue->atp_gref; - atalk_putnext(gref, m); - - return; - } - dPrintf(D_M_ATP_LOW,D_L_INFO, ("atp_req_timeout: skt=%d\n", - trp->tr_local_socket)); - m = trp->tr_xmt; - switch(((ioc_t *)(gbuf_rptr(trp->tr_xmt)))->ioc_cmd) { - case AT_ATP_ISSUE_REQUEST: - trp->tr_xmt = NULL; - if (trp->tr_queue->dflag) - ((ioc_t *)gbuf_rptr(m))->ioc_cmd = AT_ATP_REQUEST_COMPLETE; - else if (trp->tr_bdsp == NULL) { - gbuf_freem(m); - if (trp->tr_rsp_wait) - wakeup(&trp->tr_event); - break; - } - atp_iocnak(trp->tr_queue, m, ETIMEDOUT); - atp_free(trp); - return; - - case AT_ATP_ISSUE_REQUEST_NOTE: - case AT_ATP_ISSUE_REQUEST_TICKLE: - trp->tr_xmt = gbuf_cont(m); - gbuf_cont(m) = NULL; - goto l_notify; - } - } else { - (AT_ATP_HDR(m))->bitmap = trp->tr_bitmap; - - if (trp->tr_retry != (unsigned int) ATP_INFINITE_RETRIES) - trp->tr_retry--; - atp_send(trp); - } -} - - -/* - * atp_free frees up a request, cleaning up the queues and freeing - * the request packet - * always called at 'lock' - */ - -void atp_free(trp) -register struct atp_trans *trp; -{ - register struct atp_state *atp; - register int i; - - dPrintf(D_M_ATP_LOW, D_L_TRACE, - ("atp_free: freeing trp 0x%x\n", (u_int) trp)); - - - if (trp->tr_state == TRANS_ABORTING) { - ATP_Q_REMOVE(atp_trans_abort, trp, tr_list); - trp->tr_state = TRANS_DONE; - } - else { - if (trp->tr_tmo_func) - atp_untimout(atp_req_timeout, trp); - - atp = trp->tr_queue; - ATP_Q_REMOVE(atp->atp_trans_wait, trp, tr_list); - - if (trp->tr_xmt) { - gbuf_freem(trp->tr_xmt); - trp->tr_xmt = NULL; - } - for (i = 0; i < 8; i++) { - if (trp->tr_rcv[i]) { - gbuf_freem(trp->tr_rcv[i]); - trp->tr_rcv[i] = NULL; - } - } - if (trp->tr_bdsp) { - gbuf_freem(trp->tr_bdsp); - trp->tr_bdsp = NULL; - } - - if (trp->tr_rsp_wait) { - trp->tr_state = TRANS_ABORTING; - ATP_Q_APPEND(atp_trans_abort, trp, tr_list); - wakeup(&trp->tr_event); - return; - } - } - - atp_trans_free(trp); -} /* atp_free */ - - -/* - * atp_send transmits a request packet by queuing it (if it isn't already) and - * scheduling the queue - */ - -void atp_send(trp) -register struct atp_trans *trp; -{ - gbuf_t *m; - struct atp_state *atp; - - dPrintf(D_M_ATP_LOW, D_L_OUTPUT, ("atp_send: trp=0x%x, loc=%d\n", - (u_int) trp->tr_queue, trp->tr_local_socket)); - - if ((atp = trp->tr_queue) != 0) { - if (trp->tr_state == TRANS_TIMEOUT) { - if ((m = gbuf_cont(trp->tr_xmt)) == NULL) - m = trp->tr_xmt; - - /* - * Now either release the transaction or start the timer - */ - if (!trp->tr_retry && !trp->tr_bitmap && !trp->tr_xo) { - m = (gbuf_t *)gbuf_copym(m); - atp_x_done(trp); - } else { - m = (gbuf_t *)gbuf_dupm(m); - - atp_timout(atp_req_timeout, trp, trp->tr_timeout); - } - - if (m) { - trace_mbufs(D_M_ATP_LOW, " m", m); - DDP_OUTPUT(m); - } - } - } -} - - -/* - * atp_reply sends all the available messages in the bitmap again - * by queueing us to the write service routine - */ - -void atp_reply(rcbp) -register struct atp_rcb *rcbp; -{ - register struct atp_state *atp; - register int i; - - if ((atp = rcbp->rc_queue) != 0) { - for (i = 0; i < rcbp->rc_pktcnt; i++) { - if (rcbp->rc_bitmap&atp_mask[i]) - rcbp->rc_snd[i] = 1; - else - rcbp->rc_snd[i] = 0; - } - if (rcbp->rc_rep_waiting == 0) { - rcbp->rc_state = RCB_SENDING; - rcbp->rc_rep_waiting = 1; - atp_send_replies(atp, rcbp); - } - } -} - - -/* - * The rcb timer just frees the rcb, this happens when we missed a release for XO - */ - -void atp_rcb_timer(__unused struct atp_trans *junk) -{ - register struct atp_rcb *rcbp; - register struct atp_rcb *next_rcbp; - struct timeval timenow; - -l_again: - getmicrouptime(&timenow); - for (rcbp = atp_need_rel.head; rcbp; rcbp = next_rcbp) { - next_rcbp = rcbp->rc_tlist.next; - - if ((timenow.tv_sec - rcbp->rc_timestamp) > 30) { - atp_rcb_free(rcbp); - goto l_again; - } - } - atp_timout(atp_rcb_timer, trp_tmo_rcb, 10 * HZ); -} - -void atp_iocack(atp, m) -struct atp_state *atp; -register gbuf_t *m; -{ - if (gbuf_type(m) == MSG_IOCTL) - gbuf_set_type(m, MSG_IOCACK); - if (gbuf_cont(m)) - ((ioc_t *)gbuf_rptr(m))->ioc_count = gbuf_msgsize(gbuf_cont(m)); - else - ((ioc_t *)gbuf_rptr(m))->ioc_count = 0; - - if (atp->dflag) - asp_ack_reply(atp->atp_gref, m); - else - atalk_putnext(atp->atp_gref, m); -} - -void atp_iocnak(atp, m, err) -struct atp_state *atp; -register gbuf_t *m; -register int err; -{ - if (gbuf_type(m) == MSG_IOCTL) - gbuf_set_type(m, MSG_IOCNAK); - ((ioc_t *)gbuf_rptr(m))->ioc_count = 0; - ((ioc_t *)gbuf_rptr(m))->ioc_error = err ? err : ENXIO; - ((ioc_t *)gbuf_rptr(m))->ioc_rval = -1; - if (gbuf_cont(m)) { - gbuf_freem(gbuf_cont(m)); - gbuf_cont(m) = NULL; - } - - if (atp->dflag) - asp_nak_reply(atp->atp_gref, m); - else - atalk_putnext(atp->atp_gref, m); -} - -/* - * Generate a transaction id for a socket - */ -static int lasttid; -int atp_tid(atp) -register struct atp_state *atp; -{ - register int i; - register struct atp_trans *trp; - - for (i = lasttid;;) { - i = (i+1)&0xffff; - - for (trp = atp->atp_trans_wait.head; trp; trp = trp->tr_list.next) { - if (trp->tr_tid == i) - break; - } - if (trp == NULL) { - lasttid = i; - return(i); - } - } -} diff --git a/bsd/netat/atp_open.c b/bsd/netat/atp_open.c deleted file mode 100644 index 28847f502..000000000 --- a/bsd/netat/atp_open.c +++ /dev/null @@ -1,280 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1996-1998 Apple Computer, Inc. - * All Rights Reserved. - */ - -/* Modified for MP, 1996 by Tuyen Nguyen - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - */ -#define ATP_DECLARE - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* for kernel_map */ - -#include -#include -#include -#include -#include -#include -#include - -/* - * The init routine creates all the free lists - * Version 1.4 of atp_open.c on 89/02/09 17:53:11 - */ - -int atp_inited = 0; -struct atp_rcb_qhead atp_need_rel; - -/**********/ -int atp_pidM[256]; -gref_t *atp_inputQ[256]; -struct atp_state *atp_used_list; - -int atp_input(mp) - gbuf_t *mp; -{ - register gref_t *gref; - - switch (gbuf_type(mp)) { - case MSG_DATA: - gref = atp_inputQ[((at_ddp_t *)gbuf_rptr(mp))->dst_socket]; - if ((gref == 0) || (gref == (gref_t *)1)) { - dPrintf(D_M_ATP, D_L_WARNING, ("atp_input: no socket, skt=%d\n", - ((at_ddp_t *)gbuf_rptr(mp))->dst_socket)); - gbuf_freem(mp); - return 0; - } - break; - - case MSG_IOCACK: - case MSG_IOCNAK: - gref = (gref_t *)((ioc_t *)gbuf_rptr(mp))->ioc_private; - break; - - case MSG_IOCTL: - default: - dPrintf(D_M_ATP, D_L_WARNING, ("atp_input: unknown msg, type=%d\n", - gbuf_type(mp))); - gbuf_freem(mp); - return 0; - } - - atp_rput(gref, mp); - return 0; -} - -/**********/ -void atp_init(void) -{ - if (!atp_inited) { - atp_inited = 1; - atp_used_list = 0; - atp_trans_abort.head = NULL; - atp_trans_abort.tail = NULL; - - atp_need_rel.head = NULL; - atp_need_rel.tail = NULL; - - bzero(atp_inputQ, sizeof(atp_inputQ)); - bzero(atp_pidM, sizeof(atp_pidM)); - asp_init(); - } -} - -/* - * The open routine allocates a state structure - */ - -/*ARGSUSED*/ -int atp_open(gref, flag) - gref_t *gref; - int flag; -{ - register struct atp_state *atp; - register int i; - vm_offset_t temp; - - /* - * Allocate and init state and reply control block lists - * if this is the first open - */ - if (atp_rcb_data == NULL) { - if (kmem_alloc(kernel_map, &temp, sizeof(struct atp_rcb) * NATP_RCB) != KERN_SUCCESS) - return(ENOMEM); - if (atp_rcb_data == NULL) { - bzero((caddr_t)temp, sizeof(struct atp_rcb) * NATP_RCB); - atp_rcb_data = (struct atp_rcb*)temp; - for (i = 0; i < NATP_RCB; i++) { - atp_rcb_data[i].rc_list.next = atp_rcb_free_list; - atp_rcb_free_list = &atp_rcb_data[i]; - } - } else - kmem_free(kernel_map, temp, sizeof(struct atp_rcb) * NATP_RCB); /* already allocated by another process */ - } - - if (atp_state_data == NULL) { - if (kmem_alloc(kernel_map, &temp, sizeof(struct atp_state) * NATP_STATE) != KERN_SUCCESS) - return(ENOMEM); - if (atp_state_data == NULL) { - bzero((caddr_t)temp, sizeof(struct atp_state) * NATP_STATE); - atp_state_data = (struct atp_state*) temp; - for (i = 0; i < NATP_STATE; i++) { - atp_state_data[i].atp_trans_waiting = atp_free_list; - atp_free_list = &atp_state_data[i]; - } - } else - kmem_free(kernel_map, temp, sizeof(struct atp_state) * NATP_STATE); - } - - - /* - * If no atp structure available return failure - */ - - if ((atp = atp_free_list) == NULL) - return(EAGAIN); - - /* - * Update free list - */ - - atp_free_list = atp->atp_trans_waiting; - - /* - * Initialize the data structure - */ - - atp->dflag = 0; - atp->atp_trans_wait.head = NULL; - atp->atp_trans_waiting = NULL; - atp->atp_gref = gref; - atp->atp_retry = 10; - atp->atp_timeout = HZ/8; - atp->atp_rcb_waiting = NULL; - atp->atp_rcb.head = NULL; - atp->atp_attached.head = NULL; - atp->atp_flags = T_MPSAFE; - atp->atp_socket_no = -1; - atp->atp_pid = gref->pid; - atp->atp_msgq = 0; - ATEVENTINIT(atp->atp_event); - ATEVENTINIT(atp->atp_delay_event); - gref->info = (void *)atp; - - /* - * Return success - */ - - if (flag) { - if ((atp->atp_trans_waiting = atp_used_list) != 0) - atp->atp_trans_waiting->atp_rcb_waiting = atp; - atp_used_list = atp; - } - return(0); -} - -/* - * The close routine frees all the data structures - */ - -/*ARGSUSED*/ -int atp_close(gref, flag) - gref_t *gref; - int flag; -{ - register struct atp_state *atp; - register struct atp_trans *trp; - register struct atp_rcb *rcbp; - int socket; - pid_t pid; - - atp = (struct atp_state *)gref->info; - if (atp->dflag) - atp = (struct atp_state *)atp->atp_msgq; - if (atp->atp_msgq) { - gbuf_freem(atp->atp_msgq); - atp->atp_msgq = 0; - } - - atp->atp_flags |= ATP_CLOSING; - socket = atp->atp_socket_no; - if (socket != -1) - atp_inputQ[socket] = (gref_t *)1; - - /* - * blow away all pending timers - */ - for (trp = atp->atp_trans_wait.head; trp; trp = trp->tr_list.next) - atp_untimout(atp_req_timeout, trp); - - /* - * Release pending transactions + rcbs - */ - while ((trp = atp->atp_trans_wait.head)) - atp_free(trp); - while ((rcbp = atp->atp_rcb.head)) - atp_rcb_free(rcbp); - while ((rcbp = atp->atp_attached.head)) - atp_rcb_free(rcbp); - - if (flag && (socket == -1)) - atp_dequeue_atp(atp); - - /* - * free the state variable - */ - atp->atp_socket_no = -1; - atp->atp_trans_waiting = atp_free_list; - atp_free_list = atp; - - if (socket != -1) { - pid = (pid_t)atp_pidM[socket]; - atp_pidM[socket] = 0; - atp_inputQ[socket] = NULL; - if (pid) - ddp_notify_nbp(socket, pid, DDP_ATP); - } - - return 0; -} diff --git a/bsd/netat/atp_read.c b/bsd/netat/atp_read.c deleted file mode 100644 index 940a58ae3..000000000 --- a/bsd/netat/atp_read.c +++ /dev/null @@ -1,562 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1996-1998 Apple Computer, Inc. - * All Rights Reserved. - */ - -/* Modified for MP, 1996 by Tuyen Nguyen - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -__private_extern__ int atp_resp_seqno2big = 0; - -static void atp_trans_complete(struct atp_trans *); -void atp_x_done_locked(void *); -void atp_treq_event(void *); - -/* - * Decide what to do about received messages - * Version 1.7 of atp_read.c on 89/02/09 17:53:16 - */ - -void atp_treq_event(void *arg) -{ - register gref_t *gref = (gref_t *)arg; - register gbuf_t *m; - register struct atp_state *atp; - - atalk_lock(); - atp = (struct atp_state *)gref->info; - if (atp->dflag) - atp = (struct atp_state *)atp->atp_msgq; - - if (atp->dflag) { - if ((m = gbuf_alloc(sizeof(ioc_t), PRI_HI)) != NULL) { - gbuf_set_type(m, MSG_IOCTL); - gbuf_wset(m,sizeof(ioc_t)); - ((ioc_t *)gbuf_rptr(m))->ioc_cmd = AT_ATP_GET_POLL; - atp_wput(gref, m); - } - } - else if ((m = gbuf_alloc(1, PRI_HI)) != NULL) { - *gbuf_rptr(m) = 0; - gbuf_wset(m,1); - atalk_putnext(gref, m); - } - - if (m == 0) - timeout(atp_treq_event, gref, 10); - atalk_unlock(); -} - -void atp_rput(gref, m) -gref_t *gref; -gbuf_t *m; -{ - register at_atp_t *athp; - register struct atp_state *atp; - gbuf_t *m_asp = NULL; - struct timeval timenow; - u_short temp_net; - - atp = (struct atp_state *)gref->info; - if (atp->dflag) - atp = (struct atp_state *)atp->atp_msgq; - - switch(gbuf_type(m)) { - case MSG_DATA: - /* - * Decode the message, make sure it is an atp - * message - */ - if (((AT_DDP_HDR(m))->type != DDP_ATP) || - (atp->atp_flags & ATP_CLOSING)) { - gbuf_freem(m); - dPrintf(D_M_ATP_LOW, (D_L_INPUT|D_L_ERROR), - ("atp_rput: dropping MSG, not atp\n")); - break; - } - - athp = AT_ATP_HDR(m); - dPrintf(D_M_ATP_LOW, D_L_INPUT, - ("atp_rput MSG_DATA: %s (%d)\n", - (athp->cmd == ATP_CMD_TRESP)? "TRESP": - (athp->cmd == ATP_CMD_TREL)? "TREL": - (athp->cmd == ATP_CMD_TREQ)? "TREQ": "unknown", - athp->cmd)); - trace_mbufs(D_M_ATP_LOW, " r", m); - - switch (athp->cmd) { - - case ATP_CMD_TRESP: - { - register struct atp_trans *trp; - register unsigned int seqno; - register at_ddp_t *ddp; - - /* - * we just got a response, find the trans record - */ - - for (trp = atp->atp_trans_wait.head; trp; trp = trp->tr_list.next) { - if (trp->tr_tid == UAS_VALUE_NTOH(athp->tid)) - break; - } - - /* - * If we can't find one then ignore the message - */ - seqno = athp->bitmap; - if (seqno > 7) { - atp_resp_seqno2big++; - ddp = AT_DDP_HDR(m); - dPrintf(D_M_ATP_LOW, (D_L_INPUT|D_L_ERROR), - ("atp_rput: dropping TRESP seqno too big, tid=%d,loc=%d,rem=%d.%d,seqno=%u\n", - UAS_VALUE_NTOH(athp->tid), - ddp->dst_socket, ddp->src_node, ddp->src_socket, seqno)); - gbuf_freem(m); - return; - } - if (trp == NULL) { - ddp = AT_DDP_HDR(m); - dPrintf(D_M_ATP_LOW, (D_L_INPUT|D_L_ERROR), - ("atp_rput: dropping TRESP, no trp,tid=%d,loc=%d,rem=%d.%d,seqno=%u\n", - UAS_VALUE_NTOH(athp->tid), - ddp->dst_socket, ddp->src_node, ddp->src_socket, seqno)); - gbuf_freem(m); - return; - } - - /* - * If no longer valid, drop it - */ - if (trp->tr_state == TRANS_FAILED) { - ddp = AT_DDP_HDR(m); - dPrintf(D_M_ATP_LOW, (D_L_INPUT|D_L_ERROR), - ("atp_rput: dropping TRESP, failed trp,tid=%d,loc=%d,rem=%d.%d\n", - UAS_VALUE_NTOH(athp->tid), - ddp->dst_socket, ddp->src_node, ddp->src_socket)); - gbuf_freem(m); - return; - } - - /* - * If we have already received it, ignore it - */ - if (!(trp->tr_bitmap&atp_mask[seqno]) || trp->tr_rcv[seqno]) { - ddp = AT_DDP_HDR(m); - dPrintf(D_M_ATP_LOW, (D_L_INPUT|D_L_ERROR), - ("atp_rput: dropping TRESP, duplicate,tid=%d,loc=%d,rem=%d.%d,seqno=%u\n", - UAS_VALUE_NTOH(athp->tid), - ddp->dst_socket, ddp->src_node, ddp->src_socket, seqno)); - gbuf_freem(m); - return; - } - - /* - * Update the received packet bitmap - */ - if (athp->eom) - trp->tr_bitmap &= atp_lomask[seqno]; - else - trp->tr_bitmap &= ~atp_mask[seqno]; - - /* - * Save the message in the trans record - */ - trp->tr_rcv[seqno] = m; - - /* - * If it isn't the first message then - * can the header - */ - if (seqno) - gbuf_rinc(m,DDP_X_HDR_SIZE); - - /* - * If we now have all the responses then return - * the message to the user - */ - if (trp->tr_bitmap == 0) { - - /* - * Cancel the request timer and any - * pending transmits - */ - atp_untimout(atp_req_timeout, trp); - - /* - * Send the results back to the user - */ - atp_x_done(trp); - return; - } - if (athp->sts) { - /* - * If they want treq again, send them - */ - atp_untimout(atp_req_timeout, trp); - atp_send(trp); - return; - } - return; - } - - case ATP_CMD_TREL: - { register struct atp_rcb *rcbp; - register at_ddp_t *ddp; - - /* - * Search for a matching transaction - */ - ddp = AT_DDP_HDR(m); - - for (rcbp = atp->atp_rcb.head; rcbp; rcbp = rcbp->rc_list.next) { - if (rcbp->rc_tid == UAS_VALUE_NTOH(athp->tid) && - rcbp->rc_socket.node == ddp->src_node && - rcbp->rc_socket.net == NET_VALUE(ddp->src_net) && - rcbp->rc_socket.socket == ddp->src_socket) { - /* - * Mark the rcb released - */ - rcbp->rc_not_sent_bitmap = 0; - if (rcbp->rc_state == RCB_SENDING) - rcbp->rc_state = RCB_RELEASED; - else - { - ddp = 0; - atp_rcb_free(rcbp); - } - break; - } - } - - gbuf_freem(m); - return; - } - - - case ATP_CMD_TREQ: - { register struct atp_rcb *rcbp; - register at_ddp_t *ddp; - gbuf_t *m2; - - /* - * If it is a request message, first - * check to see - * if matches something in our active - * request queue - */ - ddp = AT_DDP_HDR(m); - - for (rcbp = atp->atp_rcb.head; rcbp; rcbp = rcbp->rc_list.next) { - if (rcbp->rc_tid == UAS_VALUE_NTOH(athp->tid) && - rcbp->rc_socket.node == ddp->src_node && - rcbp->rc_socket.net == NET_VALUE(ddp->src_net) && - rcbp->rc_socket.socket == ddp->src_socket) - break; - } - /* - * If this is a new req then do - * something with it - */ - if (rcbp == NULL) { - /* - * see if it matches something in the - * attached request queue - * if it does, just release the message - * and go on about our buisness - */ - /* we just did this, why do again? -jjs 4-10-95 */ - for (rcbp = atp->atp_attached.head; rcbp; rcbp = rcbp->rc_list.next) { - if (rcbp->rc_tid == UAS_VALUE_NTOH(athp->tid) && - rcbp->rc_socket.node == ddp->src_node && - rcbp->rc_socket.net == NET_VALUE(ddp->src_net) && - rcbp->rc_socket.socket == ddp->src_socket) { - gbuf_freem(m); - dPrintf(D_M_ATP_LOW, D_L_INPUT, - ("atp_rput: dropping TREQ, matches req queue\n")); - return; - } - } - - /* - * assume someone is interested in - * in an asynchronous incoming request - */ - if ((rcbp = atp_rcb_alloc(atp)) == NULL) { - gbuf_freem(m); - return; - } - rcbp->rc_state = RCB_UNQUEUED; - - rcbp->rc_local_node = ddp->dst_node; - temp_net = NET_VALUE(ddp->dst_net); - NET_ASSIGN_NOSWAP(rcbp->rc_local_net, temp_net); - rcbp->rc_socket.socket = ddp->src_socket; - rcbp->rc_socket.node = ddp->src_node; - rcbp->rc_socket.net = NET_VALUE(ddp->src_net); - rcbp->rc_tid = UAS_VALUE_NTOH(athp->tid); - rcbp->rc_bitmap = athp->bitmap; - rcbp->rc_not_sent_bitmap = athp->bitmap; - rcbp->rc_xo = athp->xo; - /* - * if async then send it as - * data - * otherwise, it is a synchronous ioctl so - * complete it - */ - if (atp->dflag) { /* for ASP? */ - if ((m2 = gbuf_alloc(sizeof(ioc_t), PRI_HI))) { - gbuf_set_type(m2, MSG_DATA); - gbuf_wset(m2,sizeof(ioc_t)); - ((ioc_t *)gbuf_rptr(m2))->ioc_cmd = AT_ATP_GET_POLL; - m_asp = m2; - } - } else if ((m2 = gbuf_alloc(1, PRI_HI))) { - *gbuf_rptr(m2) = 0; - gbuf_wset(m2,1); - atalk_putnext(gref, m2); - } - if (m2 == 0) { - dPrintf(D_M_ATP,D_L_WARNING, - ("atp_rput: out of buffer for TREQ\n")); - timeout(atp_treq_event, gref, 10); - } - rcbp->rc_ioctl = m; - - /* - * move it to the attached list - */ - dPrintf(D_M_ATP_LOW, D_L_INPUT, - ("atp_rput: moving to attached list\n")); - rcbp->rc_state = RCB_PENDING; - ATP_Q_APPEND(atp->atp_attached, rcbp, rc_list); - if (m_asp != NULL) { - atp_req_ind(atp, m_asp); - return; - } - } else { - dPrintf(D_M_ATP_LOW, D_L_INPUT, - ("atp_rput: found match, state:%d\n", - rcbp->rc_state)); - - /* - * Otherwise we have found a matching request - * look for what to do - */ - switch (rcbp->rc_state) { - case RCB_RESPONDING: - case RCB_RESPONSE_FULL: - /* - * If it is one we have in progress - * (either have all the responses - * or are waiting for them) - * update the bitmap and resend - * the replies - */ - getmicrouptime(&timenow); - if (rcbp->rc_timestamp) { - rcbp->rc_timestamp = timenow.tv_sec; - if (rcbp->rc_timestamp == 0) - rcbp->rc_timestamp = 1; - } - rcbp->rc_bitmap = athp->bitmap; - rcbp->rc_not_sent_bitmap = athp->bitmap; - gbuf_freem(m); - atp_reply(rcbp); - return; - - case RCB_RELEASED: - default: - /* - * If we have a release or - * we haven't sent any data yet - * ignore the request - */ - gbuf_freem(m); - return; - } - } - return; - } - - default: - gbuf_freem(m); - break; - } - break; - - case MSG_IOCACK: - if (atp->dflag) - asp_ack_reply(gref, m); - else - atalk_putnext(gref, m); - break; - - case MSG_IOCNAK: - if (atp->dflag) - asp_nak_reply(gref, m); - else - atalk_putnext(gref, m); - break; - - default: - gbuf_freem(m); - } -} /* atp_rput */ - -void -atp_x_done_locked(trp) -void *trp; -{ - atalk_lock(); - atp_x_done((struct atp_trans *)trp); - atalk_unlock(); - -} - -void -atp_x_done(trp) -register struct atp_trans *trp; -{ - struct atp_state *atp; - gbuf_t *m; - - - if ( !trp->tr_xo) - atp_trans_complete(trp); - else { - /* - * If execute once send a release - */ - if ((m = (gbuf_t *)atp_build_release(trp)) != NULL) { - AT_DDP_HDR(m)->src_socket = ((struct atp_state *) - trp->tr_queue)->atp_socket_no; - DDP_OUTPUT(m); - /* - * Now send back the transaction reply to the process - * or notify the process if required - */ - atp_trans_complete(trp); - } else { - - atp = trp->tr_queue; - trp->tr_state = TRANS_RELEASE; - timeout(atp_x_done_locked, trp, 10); - } - } -} - -static void -atp_trans_complete(trp) -register struct atp_trans *trp; -{ register gbuf_t *m; - register int type; - struct atp_state *atp; - - /* we could gbuf_freem(trp->tr_xmt) here if were not planning to - re-use the mbuf later */ - m = trp->tr_xmt; - trp->tr_xmt = NULL; - trp->tr_state = TRANS_DONE; - - if (gbuf_cont(m) == NULL) /* issued via the new interface */ - type = AT_ATP_ISSUE_REQUEST_NOTE; - else { - type = ((ioc_t *)(gbuf_rptr(m)))->ioc_cmd; - /* - * free any data following the ioctl blk - */ - gbuf_freem(gbuf_cont(m)); - gbuf_cont(m) = NULL; - } - dPrintf(D_M_ATP_LOW, D_L_INPUT, ("atp_trans_comp: trp=0x%x type = %s\n", - (u_int) trp, - (type==AT_ATP_ISSUE_REQUEST)? "AT_ATP_ISSUE_REQUEST": - (type==AT_ATP_ISSUE_REQUEST_NOTE)? "AT_ATP_ISSUE_REQUEST_NOTE" : - "unknown")); - - switch(type) { - case AT_ATP_ISSUE_REQUEST: - atp = trp->tr_queue; - if (atp->dflag) { - ((ioc_t *)gbuf_rptr(m))->ioc_count = 0; - ((ioc_t *)gbuf_rptr(m))->ioc_error = 0; - ((ioc_t *)gbuf_rptr(m))->ioc_rval = trp->tr_tid; - ((ioc_t *)gbuf_rptr(m))->ioc_cmd = AT_ATP_REQUEST_COMPLETE; - gbuf_set_type(m, MSG_IOCTL); - atp_rsp_ind(trp, m); - } else { - if (trp->tr_bdsp == NULL) { - gbuf_freem(m); - if (trp->tr_rsp_wait) - wakeup(&trp->tr_event); - } else { - gbuf_set_type(m, MSG_IOCACK); - ((ioc_t *)gbuf_rptr(m))->ioc_count = 0; - ((ioc_t *)gbuf_rptr(m))->ioc_error = 0; - ((ioc_t *)gbuf_rptr(m))->ioc_rval = 0; - atalk_putnext(trp->tr_queue->atp_gref, m); - } - } - break; - - case AT_ATP_ISSUE_REQUEST_NOTE: - gbuf_wset(m,1); - *gbuf_rptr(m) = 1; - gbuf_set_type(m, MSG_DATA); - atalk_putnext(trp->tr_queue->atp_gref, m); - break; - } -} /* atp_trans_complete */ diff --git a/bsd/netat/atp_write.c b/bsd/netat/atp_write.c deleted file mode 100644 index a46fa471f..000000000 --- a/bsd/netat/atp_write.c +++ /dev/null @@ -1,1915 +0,0 @@ -/* - * Copyright (c) 1996-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/* Modified for MP, 1996 by Tuyen Nguyen - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - */ -#define RESOLVE_DBG - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -int asp_pack_bdsp(struct atp_trans *, gbuf_t **); - -static int atp_pack_bdsp(struct atp_trans *, struct atpBDS *); -static int atp_unpack_bdsp(struct atp_state *, gbuf_t *, struct atp_rcb *, - int, int); -void atp_trp_clock(void *arg), atp_trp_clock_locked(void *arg); - -extern struct atp_rcb_qhead atp_need_rel; -extern int atp_inited; -extern struct atp_state *atp_used_list; -extern asp_scb_t *scb_free_list; - -extern gbuf_t *scb_resource_m; -extern gbuf_t *atp_resource_m; -extern gref_t *atp_inputQ[]; -extern int atp_pidM[]; -extern at_ifaddr_t *ifID_home; -extern lck_mtx_t * atalk_mutex; - -static struct atp_trans *trp_tmo_list; -struct atp_trans *trp_tmo_rcb; - -/* first bds entry gives number of bds entries in total (hack) */ -#define get_bds_entries(m) \ - ((gbuf_len(m) > TOTAL_ATP_HDR_SIZE)? \ - (UAS_VALUE(((struct atpBDS *)(AT_ATP_HDR(m)->data))->bdsDataSz)): 0) - -#define atpBDSsize (sizeof(struct atpBDS)*ATP_TRESP_MAX) - -void atp_link(void) -{ - trp_tmo_list = 0; - trp_tmo_rcb = atp_trans_alloc(0); - atp_timout(atp_rcb_timer, trp_tmo_rcb, 10 * HZ); - atp_trp_clock((void *)&atp_inited); - asp_clock((void *)&atp_inited); -} - -void atp_unlink() -{ - untimeout(asp_clock_locked, (void *)&atp_inited); - untimeout(atp_trp_clock_locked, (void *)&atp_inited); - atp_untimout(atp_rcb_timer, trp_tmo_rcb); - trp_tmo_list = 0; - -#ifdef BAD_IDEA - /* allocated in asp_scb_alloc(), which is called - by asp_open() */ - if (scb_resource_m) { - gbuf_freem(scb_resource_m); - scb_resource_m = 0; - scb_free_list = 0; - } - /* allocated in atp_trans_alloc() */ - if (atp_resource_m) { - gbuf_freem(atp_resource_m); - atp_resource_m = 0; - atp_trans_free_list = 0; - } -#endif -} - -/* - * write queue put routine .... filter out other than IOCTLs - * Version 1.8 of atp_write.c on 89/02/09 17:53:26 - */ - -void -atp_wput(gref, m) - register gref_t *gref; - register gbuf_t *m; -{ - register ioc_t *iocbp; - int i, xcnt; - struct atp_state *atp; - struct atp_trans *trp; - struct atp_rcb *rcbp; - at_socket skt; - - atp = (struct atp_state *)gref->info; - if (atp->dflag) - atp = (struct atp_state *)atp->atp_msgq; - - switch(gbuf_type(m)) { - case MSG_DATA: - if (atp->atp_msgq) { - gbuf_freem(m); - dPrintf(D_M_ATP, D_L_WARNING, - ("atp_wput: atp_msgq discarded\n")); - } else - atp->atp_msgq = m; - break; - - case MSG_IOCTL: - /* Need to ensure that all copyin/copyout calls are made at - * put routine time which should be in the user context. (true when - * we are the stream head). The service routine can be called on an - * unpredictable context and copyin/copyout calls will get wrong results - * or even panic the kernel. - */ - iocbp = (ioc_t *)gbuf_rptr(m); - - switch (iocbp->ioc_cmd) { - case AT_ATP_BIND_REQ: - if (gbuf_cont(m) == NULL) { - iocbp->ioc_rval = -1; - atp_iocnak(atp, m, EINVAL); - return; - } - skt = *(at_socket *)gbuf_rptr(gbuf_cont(m)); - if ((skt = (at_socket)atp_bind(gref, (unsigned int)skt, 0)) == 0) - atp_iocnak(atp, m, EINVAL); - else { - *(at_socket *)gbuf_rptr(gbuf_cont(m)) = skt; - iocbp->ioc_rval = 0; - atp_iocack(atp, m); - atp_dequeue_atp(atp); - } - return; - - case AT_ATP_GET_CHANID: - if (gbuf_cont(m) == NULL) { - iocbp->ioc_rval = -1; - atp_iocnak(atp, m, EINVAL); - return; - } - *(gref_t **)gbuf_rptr(gbuf_cont(m)) = gref; - atp_iocack(atp, m); - return; - - /* not the close and not the tickle(?) */ - case AT_ATP_ISSUE_REQUEST_DEF: - case AT_ATP_ISSUE_REQUEST_DEF_NOTE: { - gbuf_t *bds, *tmp, *m2; - at_ddp_t *ddp; - at_atp_t *athp; - - if ((tmp = gbuf_cont(m)) != 0) { - if ((bds = gbuf_dupb(tmp)) == NULL) { - atp_iocnak(atp, m, ENOBUFS); - return; - } - gbuf_rinc(tmp,atpBDSsize); - gbuf_wset(bds,atpBDSsize); - iocbp->ioc_count -= atpBDSsize; - gbuf_cont(tmp) = bds; - } - - /* - * send a response to a transaction - * first check it out - */ - if (iocbp->ioc_count < TOTAL_ATP_HDR_SIZE) { - atp_iocnak(atp, m, EINVAL); - break; - } - - /* - * remove the response from the message - */ - m2 = gbuf_cont(m); - gbuf_cont(m) = NULL; - iocbp->ioc_count = 0; - ddp = AT_DDP_HDR(m2); - athp = AT_ATP_HDR(m2); - if (atp->atp_msgq) { - gbuf_cont(m2) = atp->atp_msgq; - atp->atp_msgq = 0; - } - - /* - * search for the corresponding rcb - */ - for (rcbp = atp->atp_rcb.head; rcbp; rcbp = rcbp->rc_list.next) { - if (rcbp->rc_tid == UAS_VALUE_NTOH(athp->tid) && - rcbp->rc_socket.node == ddp->dst_node && - rcbp->rc_socket.net == NET_VALUE(ddp->dst_net) && - rcbp->rc_socket.socket == ddp->dst_socket) - break; - } - - /* - * If it has already been sent then return an error - */ - if ((rcbp && rcbp->rc_state != RCB_NOTIFIED) || - (rcbp == NULL && athp->xo)) { - atp_iocnak(atp, m, ENOENT); - gbuf_freem(m2); - return; - } - if (rcbp == NULL) { /* a response for an ALO transaction */ - if ((rcbp = atp_rcb_alloc(atp)) == NULL) { - atp_iocnak(atp, m, ENOBUFS); - gbuf_freem(m2); - return; - } - rcbp->rc_ioctl = 0; - rcbp->rc_socket.socket = ddp->dst_socket; - rcbp->rc_socket.node = ddp->dst_node; - rcbp->rc_socket.net = NET_VALUE(ddp->dst_net); - rcbp->rc_tid = UAS_VALUE_NTOH(athp->tid); - rcbp->rc_bitmap = 0xff; - rcbp->rc_xo = 0; - rcbp->rc_state = RCB_SENDING; - ATP_Q_APPEND(atp->atp_rcb, rcbp, rc_list); - } - xcnt = get_bds_entries(m2); - if ((i = atp_unpack_bdsp(atp, m2, rcbp, xcnt, FALSE))) { - if ( !rcbp->rc_xo) - atp_rcb_free(rcbp); - atp_iocnak(atp, m, i); - return; - } - atp_send_replies(atp, rcbp); - - /* - * send the ack back to the responder - */ - atp_iocack(atp, m); - return; - } - - case AT_ATP_GET_POLL: { - if (gbuf_cont(m)) { - gbuf_freem(gbuf_cont(m)); - gbuf_cont(m) = NULL; - iocbp->ioc_count = 0; - } - - /* - * search for a waiting request - */ - if ((rcbp = atp->atp_attached.head)) { - /* - * Got one, move it to the active response Q - */ - gbuf_cont(m) = rcbp->rc_ioctl; - rcbp->rc_ioctl = NULL; - if (rcbp->rc_xo) { - ATP_Q_REMOVE(atp->atp_attached, rcbp, rc_list); - rcbp->rc_state = RCB_NOTIFIED; - ATP_Q_APPEND(atp->atp_rcb, rcbp, rc_list); - } else { - /* detach rcbp from attached queue, - * and free any outstanding resources - */ - atp_rcb_free(rcbp); - } - atp_iocack(atp, m); - } else { - /* - * None available - can out - */ - atp_iocnak(atp, m, EAGAIN); - } - break; - } - - case AT_ATP_CANCEL_REQUEST: { - /* - * Cancel a pending request - */ - if (iocbp->ioc_count != sizeof(int)) { - atp_iocnak(atp, m, EINVAL); - break; - } - i = *(int *)gbuf_rptr(gbuf_cont(m)); - gbuf_freem(gbuf_cont(m)); - gbuf_cont(m) = NULL; - for (trp = atp->atp_trans_wait.head; trp; trp = trp->tr_list.next) { - if (trp->tr_tid == i) - break; - } - if (trp == NULL) - atp_iocnak(atp, m, ENOENT); - else { - atp_free(trp); - atp_iocack(atp, m); - } - break; - } - - case AT_ATP_PEEK: { - unsigned char event; - if (atalk_peek(gref, &event) == -1) - atp_iocnak(atp, m, EAGAIN); - else { - *gbuf_rptr(gbuf_cont(m)) = event; - atp_iocack(atp, m); - } - break; - } - - case DDP_IOC_GET_CFG: -#ifdef APPLETALK_DEBUG - kprintf("atp_wput: DDP_IOC_GET_CFG\n"); -#endif - if (gbuf_cont(m) == 0) { - atp_iocnak(atp, m, EINVAL); - break; - } - { - /* *** was ddp_get_cfg() *** */ - ddp_addr_t *cfgp = - (ddp_addr_t *)gbuf_rptr(gbuf_cont(m)); - cfgp->inet.net = ifID_home->ifThisNode.s_net; - cfgp->inet.node = ifID_home->ifThisNode.s_node; - cfgp->inet.socket = atp->atp_socket_no; - cfgp->ddptype = DDP_ATP; -#ifdef NOT_YET - cfgp->inet.net = atp->atp_gref->laddr.s_net; - cfgp->inet.node = atp->atp_gref->laddr.s_node; - cfgp->inet.socket = atp->atp_gref->lport; - cfgp->ddptype = atp->atp_gref->ddptype; -#endif - } - gbuf_wset(gbuf_cont(m), sizeof(ddp_addr_t)); - atp_iocack(atp, m); - break; - - default: - /* - * Otherwise pass it on, if possible - */ - iocbp->ioc_private = (void *)gref; - DDP_OUTPUT(m); - break; - } - break; - - default: - gbuf_freem(m); - break; - } -} /* atp_wput */ - -gbuf_t *atp_build_release(trp) -register struct atp_trans *trp; -{ - register gbuf_t *m; - register at_ddp_t *ddp; - register at_atp_t *athp; - - /* - * Now try and allocate enough space to send the message - * if none is available the caller will schedule - * a timeout so we can retry for more space soon - */ - if ((m = (gbuf_t *)gbuf_alloc(AT_WR_OFFSET+ATP_HDR_SIZE, PRI_HI)) != NULL) { - gbuf_rinc(m,AT_WR_OFFSET); - gbuf_wset(m,TOTAL_ATP_HDR_SIZE); - ddp = AT_DDP_HDR(m); - ddp->type = DDP_ATP; - UAS_ASSIGN_HTON(ddp->checksum, 0); - ddp->dst_socket = trp->tr_socket.socket; - ddp->dst_node = trp->tr_socket.node; - NET_ASSIGN(ddp->dst_net, trp->tr_socket.net); - ddp->src_node = trp->tr_local_node; - NET_NET(ddp->src_net, trp->tr_local_net); - - /* - * clear the cmd/xo/eom/sts/unused fields - */ - athp = AT_ATP_HDR(m); - ATP_CLEAR_CONTROL(athp); - athp->cmd = ATP_CMD_TREL; - UAS_ASSIGN_HTON(athp->tid, trp->tr_tid); - } - - return (m); -} - -void atp_send_replies(atp, rcbp) - register struct atp_state *atp; - register struct atp_rcb *rcbp; -{ register gbuf_t *m; - register int i, len; - int cnt, offset, space; - register at_atp_t *athp; - register struct atpBDS *bdsp; - register gbuf_t *m1, *m0, *mhdr; -#if DEBUG - gbuf_t *m2 = NULL; -#endif /* DEBUG */ - gbuf_t *mprev = 0, *mlist = 0; - at_socket src_socket = (at_socket)atp->atp_socket_no; - gbuf_t *rc_xmt[ATP_TRESP_MAX]; - struct ddp_atp { - char ddp_atp_hdr[TOTAL_ATP_HDR_SIZE]; - }; - struct timeval timenow; - - if (rcbp->rc_queue != atp) - return; - if (rcbp->rc_not_sent_bitmap == 0) - goto nothing_to_send; - - dPrintf(D_M_ATP_LOW, D_L_OUTPUT, ("atp_send_replies\n")); - /* - * Do this for each message that hasn't been sent - */ - cnt = rcbp->rc_pktcnt; - for (i = 0; i < cnt; i++) { - rc_xmt[i] = 0; - if (rcbp->rc_snd[i]) { - if ((rc_xmt[i] = - gbuf_alloc(AT_WR_OFFSET+TOTAL_ATP_HDR_SIZE,PRI_MED)) - == NULL) { - for (cnt = 0; cnt < i; cnt++) - if (rc_xmt[cnt]) - gbuf_freeb(rc_xmt[cnt]); - goto nothing_to_send; - } - } - } - - m = rcbp->rc_xmt; - m0 = gbuf_cont(m); - if (gbuf_len(m) > TOTAL_ATP_HDR_SIZE) - bdsp = (struct atpBDS *)(AT_ATP_HDR(m)->data); - else - bdsp = 0; - offset = 0; - if (m0) - space = gbuf_msgsize(m0); - else - space = 0; - for (i = 0; i < cnt; i++) { - if (rcbp->rc_snd[i] == 0) { - if ((len = UAS_VALUE(bdsp->bdsBuffSz))) { - offset += len; - space -= len; - } - } else { - mhdr = rc_xmt[i]; - /* setup header fields */ - gbuf_rinc(mhdr,AT_WR_OFFSET); - gbuf_wset(mhdr,TOTAL_ATP_HDR_SIZE); - *(struct ddp_atp *)(gbuf_rptr(mhdr))= *(struct ddp_atp *)(gbuf_rptr(m)); - athp = AT_ATP_HDR(mhdr); - ATP_CLEAR_CONTROL(athp); - athp->cmd = ATP_CMD_TRESP; - athp->bitmap = i; - if (i == (cnt - 1)) - athp->eom = 1; /* for the last fragment */ - if (bdsp) { - UAL_UAL(athp->user_bytes, bdsp->bdsUserData); - if ((len = UAS_VALUE(bdsp->bdsBuffSz)) && m0 != 0 && space > 0) { - if ((m1 = m_copym(m0, offset, len, M_DONTWAIT)) == 0) { - for (i = 0; i < cnt; i++) - if (rc_xmt[i]) - gbuf_freem(rc_xmt[i]); - goto nothing_to_send; - } - offset += len; - space -= len; - gbuf_cont(mhdr) = m1; - } - } - - AT_DDP_HDR(mhdr)->src_socket = src_socket; - dPrintf(D_M_ATP_LOW, D_L_OUTPUT, - ("atp_send_replies: %d, socket=%d, size=%d\n", - i, atp->atp_socket_no, gbuf_msgsize(gbuf_cont(m2)))); - - if (mlist) - gbuf_next(mprev) = mhdr; - else - mlist = mhdr; - mprev = mhdr; - - rcbp->rc_snd[i] = 0; - rcbp->rc_not_sent_bitmap &= ~atp_mask[i]; - if (rcbp->rc_not_sent_bitmap == 0) - break; - } - /* - * on to the next frag - */ - bdsp++; - } - if (mlist) - DDP_OUTPUT(mlist); - - -nothing_to_send: - /* - * If all replies from this reply block have been sent then - * remove it from the queue and mark it so - */ - if (rcbp->rc_queue != atp) - return; - rcbp->rc_rep_waiting = 0; - - /* - * If we are doing execute once re-set the rcb timeout - * each time we send back any part of the response. Note - * that this timer is started when an initial request is - * received. Each response reprimes the timer. Duplicate - * requests do not reprime the timer. - * - * We have sent all of a response so free the - * resources. - */ - if (rcbp->rc_xo && rcbp->rc_state != RCB_RELEASED) { - getmicrouptime(&timenow); - if (rcbp->rc_timestamp == 0) { - rcbp->rc_timestamp = timenow.tv_sec; - if (rcbp->rc_timestamp == 0) - rcbp->rc_timestamp = 1; - ATP_Q_APPEND(atp_need_rel, rcbp, rc_tlist); - } - rcbp->rc_state = RCB_RESPONSE_FULL; - } else - atp_rcb_free(rcbp); -} /* atp_send_replies */ - - -static int -atp_pack_bdsp(trp, bdsp) - register struct atp_trans *trp; - register struct atpBDS *bdsp; -{ - register gbuf_t *m = NULL; - register int i, datsize = 0; - struct atpBDS *bdsbase = bdsp; - int error = 0; - - dPrintf(D_M_ATP, D_L_INFO, ("atp_pack_bdsp: socket=%d\n", - trp->tr_queue->atp_socket_no)); - - for (i = 0; i < ATP_TRESP_MAX; i++, bdsp++) { - unsigned short bufsize = UAS_VALUE(bdsp->bdsBuffSz); - long bufaddr = UAL_VALUE(bdsp->bdsBuffAddr); - - if ((m = trp->tr_rcv[i]) == NULL) - break; - - /* discard ddp hdr on first packet */ - if (i == 0) - gbuf_rinc(m,DDP_X_HDR_SIZE); - - /* this field may contain control information even when - no data is present */ - UAL_UAL(bdsp->bdsUserData, - (((at_atp_t *)(gbuf_rptr(m)))->user_bytes)); - gbuf_rinc(m, ATP_HDR_SIZE); - - if ((bufsize != 0) && (bufaddr != 0)) { - /* user expects data back */ - short tmp = 0; - register char *buf = (char *)bufaddr; - - while (m) { - unsigned short len = (unsigned short)(gbuf_len(m)); - if (len) { - if (len > bufsize) - len = bufsize; - if ((error = copyout((caddr_t)gbuf_rptr(m), - CAST_USER_ADDR_T(&buf[tmp]), - len)) != 0) { - return error; - } - bufsize -= len; - tmp += len; - } - m = gbuf_cont(m); - } - - UAS_ASSIGN(bdsp->bdsDataSz, tmp); - datsize += (int)tmp; - } - gbuf_freem(trp->tr_rcv[i]); - trp->tr_rcv[i] = NULL; - } - - /* report the number of packets */ - UAS_ASSIGN(((struct atpBDS *)bdsbase)->bdsBuffSz, i); - - dPrintf(D_M_ATP, D_L_INFO, (" : size=%d\n", - datsize)); - - return 0; -} /* atp_pack_bdsp */ - - -/* create an mbuf chain with mbuf packet headers for each ATP response packet - * to be sent. m contains the DDP hdr, ATP hdr, and and array of atpBDS structs. - * chained to m is an mbuf that contians the actual data pointed to by the atpBDS - * structs. - */ -static int -atp_unpack_bdsp(atp, m, rcbp, cnt, wait) - struct atp_state *atp; - gbuf_t *m; /* ddp, atp and bdsp gbuf_t */ - register struct atp_rcb *rcbp; - register int cnt, wait; -{ - register struct atpBDS *bdsp; - register gbuf_t *m2, *m1, *m0, *mhdr; - at_atp_t *athp; - int i, len; - at_socket src_socket; - - struct ddp_atp { - char ddp_atp_hdr[TOTAL_ATP_HDR_SIZE]; - }; - gbuf_t *mprev = 0, *mlist = 0; - gbuf_t *rc_xmt[ATP_TRESP_MAX]; - int offset, space = 0; - struct timeval timenow; - - /* - * get the user data structure pointer - */ - bdsp = (struct atpBDS *)(AT_ATP_HDR(m)->data); - - /* - * Guard against bogus count argument. - */ - if ((unsigned) cnt > ATP_TRESP_MAX) { - dPrintf(D_M_ATP, D_L_ERROR, - ("atp_unpack_bdsp: bad bds count 0x%x\n", cnt)); - gbuf_freem(m); - return(EINVAL); - } - if ((src_socket = (at_socket)atp->atp_socket_no) == 0xFF) { - /* comparison was to -1, however src_socket is a u_char */ - gbuf_freem(m); - return EPIPE; - } - - m0 = gbuf_cont(m); - rcbp->rc_xmt = m; - rcbp->rc_pktcnt = cnt; - rcbp->rc_state = RCB_SENDING; - rcbp->rc_not_sent_bitmap = 0; - - if (cnt <= 1) { - /* - * special case this to - * improve AFP write transactions to the server - */ - rcbp->rc_pktcnt = 1; - if ((m2 = gbuf_alloc_wait(AT_WR_OFFSET+TOTAL_ATP_HDR_SIZE, - wait)) == NULL) - return 0; - gbuf_rinc(m2,AT_WR_OFFSET); - gbuf_wset(m2,TOTAL_ATP_HDR_SIZE); - *(struct ddp_atp *)(gbuf_rptr(m2))= *(struct ddp_atp *)(gbuf_rptr(m)); - athp = AT_ATP_HDR(m2); - ATP_CLEAR_CONTROL(athp); - athp->cmd = ATP_CMD_TRESP; - athp->bitmap = 0; - athp->eom = 1; /* there's only 1 fragment */ - - /* *** why only if cnt > 0? *** */ - if (cnt > 0) - UAL_UAL(athp->user_bytes, bdsp->bdsUserData); - if (m0) - if (!append_copy((struct mbuf *)m2, - (struct mbuf *)m0, wait)) { - gbuf_freeb(m2); - return 0; - } - /* - * send the message and mark it as sent - */ - AT_DDP_HDR(m2)->src_socket = src_socket; - dPrintf(D_M_ATP_LOW, D_L_INFO, - ("atp_unpack_bdsp %d, socket=%d, size=%d, cnt=%d\n", - 0,atp->atp_socket_no,gbuf_msgsize(gbuf_cont(m2)),cnt)); - mlist = m2; - goto l_send; - } - - /* create an array of mbuf packet headers for the packets to be sent - * to contain the atp and ddp headers with room at the front for the - * datalink header. - */ - for (i = 0; i < cnt; i++) { - /* all hdrs, packet data and dst addr storage */ - if ((rc_xmt[i] = - gbuf_alloc_wait(AT_WR_OFFSET+TOTAL_ATP_HDR_SIZE, wait)) == NULL) { - for (cnt = 0; cnt < i; cnt++) - if (rc_xmt[cnt]) - gbuf_freeb(rc_xmt[cnt]); - return 0; - } - } - - /* run through the atpBDS structs and create an mbuf for the data - * portion of each packet to be sent. these get chained to the mbufs - * containing the ATP and DDP headers. this code assumes that no ATP - * packet is contained in more than 2 mbufs (e.i crosses mbuf boundary - * no more than one time). - */ - offset = 0; - if (m0) - space = gbuf_msgsize(m0); - for (i = 0; i < cnt; i++) { /* for each hdr mbuf */ - mhdr = rc_xmt[i]; - /* setup header fields */ - gbuf_rinc(mhdr,AT_WR_OFFSET); - gbuf_wset(mhdr,TOTAL_ATP_HDR_SIZE); - *(struct ddp_atp *)(gbuf_rptr(mhdr))= *(struct ddp_atp *)(gbuf_rptr(m)); - athp = AT_ATP_HDR(mhdr); - ATP_CLEAR_CONTROL(athp); - athp->cmd = ATP_CMD_TRESP; - athp->bitmap = i; - if (i == (cnt - 1)) - athp->eom = 1; /* for the last fragment */ - UAL_UAL(athp->user_bytes, bdsp->bdsUserData); - - if ((len = UAS_VALUE(bdsp->bdsBuffSz)) != 0 && m0 != 0 && space > 0) { - if ((m1 = m_copym(m0, offset, len, wait)) == 0) { - for (i = 0; i < cnt; i++) - if (rc_xmt[i]) - gbuf_freem(rc_xmt[i]); - return 0; - } - gbuf_cont(mhdr) = m1; - space -= len; - offset += len; - } - - AT_DDP_HDR(mhdr)->src_socket = src_socket; - dPrintf(D_M_ATP_LOW,D_L_INFO, - ("atp_unpack_bdsp %d, socket=%d, size=%d, cnt=%d\n", - i,atp->atp_socket_no,gbuf_msgsize(gbuf_cont(mhdr)),cnt)); - if (mlist) - gbuf_next(mprev) = mhdr; - else - mlist = mhdr; - mprev = mhdr; - /* - * on to the next frag - */ - bdsp++; - } - /* - * send the message - */ -l_send: - if (rcbp->rc_xo) { - getmicrouptime(&timenow); - if (rcbp->rc_timestamp == 0) { - if ((rcbp->rc_timestamp = timenow.tv_sec) == 0) - rcbp->rc_timestamp = 1; - ATP_Q_APPEND(atp_need_rel, rcbp, rc_tlist); - } - } - - DDP_OUTPUT(mlist); - return 0; - -} /* atp_unpack_bdsp */ - -#define ATP_SOCKET_LAST (DDP_SOCKET_LAST-6) -#define ATP_SOCKET_FIRST (DDP_SOCKET_1st_DYNAMIC) -static unsigned int sNext = 0; -extern unsigned char asp_inpC[]; -extern asp_scb_t *asp_scbQ[]; - -int atp_bind(gref, sVal, flag) - gref_t *gref; - unsigned int sVal; - unsigned char *flag; -{ - unsigned char inpC, sNextUsed = 0; - unsigned int sMin, sMax, sSav = 0; - struct atp_state *atp; - - atp = (struct atp_state *)gref->info; - if (atp->dflag) - atp = (struct atp_state *)atp->atp_msgq; - - sMax = ATP_SOCKET_LAST; - sMin = ATP_SOCKET_FIRST; - if (flag && (*flag == 3)) { - sMin += 40; - if (sMin < sNext) { - sMin = sNext; - sNextUsed = 1; - } - } - - if ( (sVal != 0) && - ((sVal > sMax) || (sVal < 2) || (sVal == 6) || - (ddp_socket_inuse(sVal, DDP_ATP) && - (atp_inputQ[sVal] != (gref_t *)1)))) { - return 0; - } - - if (sVal == 0) { - inpC = 255; -again: - for (sVal=sMin; sVal <= sMax; sVal++) { - if (!ddp_socket_inuse(sVal, DDP_ATP) || - atp_inputQ[sVal] == (gref_t *)1) - break; - else if (flag && (*flag == 3) && asp_scbQ[sVal]) { - if ((asp_scbQ[sVal]->dflag == *flag) - && (asp_inpC[sVal] < inpC) ) { - inpC = asp_inpC[sVal]; - sSav = sVal; - } - } - } - if (sVal > sMax) { - if (flag && (*flag == 3)) { - if (sNextUsed) { - sNextUsed = 0; - sMax = sNext - 1; - sMin = ATP_SOCKET_FIRST+40; - goto again; - } - sNext = 0; - *flag = (unsigned char)sSav; - } - return 0; - } - } - atp->atp_socket_no = (short)sVal; - atp_inputQ[sVal] = gref; - if (flag == 0) - atp_pidM[sVal] = atp->atp_pid; - else if (*flag == 3) { - sNext = sVal + 1; - if (sNext > ATP_SOCKET_LAST) - sNext = 0; - } - - return (int)sVal; -} - -void atp_req_ind(atp, mioc) - register struct atp_state *atp; - register gbuf_t *mioc; -{ - register struct atp_rcb *rcbp; - - if ((rcbp = atp->atp_attached.head) != 0) { - gbuf_cont(mioc) = rcbp->rc_ioctl; - rcbp->rc_ioctl = NULL; - if (rcbp->rc_xo) { - ATP_Q_REMOVE(atp->atp_attached, rcbp, rc_list); - rcbp->rc_state = RCB_NOTIFIED; - ATP_Q_APPEND(atp->atp_rcb, rcbp, rc_list); - } else - atp_rcb_free(rcbp); - if (gbuf_cont(mioc)) - ((ioc_t *)gbuf_rptr(mioc))->ioc_count = gbuf_msgsize(gbuf_cont(mioc)); - else - ((ioc_t *)gbuf_rptr(mioc))->ioc_count = 0; - asp_ack_reply(atp->atp_gref, mioc); - } else - gbuf_freeb(mioc); -} - -void atp_rsp_ind(trp, mioc) - register struct atp_trans *trp; - register gbuf_t *mioc; -{ - register struct atp_state *atp = trp->tr_queue; - register int err; - gbuf_t *xm = 0; - - err = 0; - { - switch (trp->tr_state) { - case TRANS_DONE: - if (asp_pack_bdsp(trp, &xm) < 0) - err = EFAULT; - gbuf_cont(mioc) = trp->tr_xmt; - trp->tr_xmt = NULL; - break; - - case TRANS_FAILED: - err = ETIMEDOUT; - break; - - default: - err = ENOENT; - break; - } - atp_free(trp); - - if (err) { - dPrintf(D_M_ATP, D_L_ERROR, - ("atp_rsp_ind: TRANSACTION error\n")); - atp_iocnak(atp, mioc, err); - } else { - gbuf_cont(gbuf_cont(mioc)) = xm; - atp_iocack(atp, mioc); - } - } -} - -void atp_cancel_req(gref, tid) - gref_t *gref; - unsigned short tid; -{ - struct atp_state *atp; - struct atp_trans *trp; - - atp = (struct atp_state *)gref->info; - if (atp->dflag) - atp = (struct atp_state *)atp->atp_msgq; - - for (trp = atp->atp_trans_wait.head; trp; trp = trp->tr_list.next) { - if (trp->tr_tid == tid) - break; - } - if (trp != NULL) - atp_free(trp); -} - -/* - * remove atp from the use list - */ -void -atp_dequeue_atp(atp) - struct atp_state *atp; -{ - - if (atp == atp_used_list) { - if ((atp_used_list = atp->atp_trans_waiting) != 0) - atp->atp_trans_waiting->atp_rcb_waiting = 0; - } else if (atp->atp_rcb_waiting) { - if ((atp->atp_rcb_waiting->atp_trans_waiting - = atp->atp_trans_waiting) != 0) - atp->atp_trans_waiting->atp_rcb_waiting = atp->atp_rcb_waiting; - } - - atp->atp_trans_waiting = 0; - atp->atp_rcb_waiting = 0; -} - -void -atp_timout(func, trp, ticks) - atp_tmo_func func; - struct atp_trans *trp; - int ticks; -{ - unsigned int sum; - struct atp_trans *curr_trp, *prev_trp; - - if (trp->tr_tmo_func) - return; - - trp->tr_tmo_func = func; - trp->tr_tmo_delta = 1+(ticks>>5); - - if (trp_tmo_list == 0) { - trp->tr_tmo_next = trp->tr_tmo_prev = 0; - trp_tmo_list = trp; - return; - } - - prev_trp = 0; - curr_trp = trp_tmo_list; - sum = 0; - - while (1) { - sum += curr_trp->tr_tmo_delta; - if (sum > trp->tr_tmo_delta) { - sum -= curr_trp->tr_tmo_delta; - trp->tr_tmo_delta -= sum; - curr_trp->tr_tmo_delta -= trp->tr_tmo_delta; - break; - } - prev_trp = curr_trp; - if ((curr_trp = curr_trp->tr_tmo_next) == 0) { - trp->tr_tmo_delta -= sum; - break; - } - } - - if (prev_trp) { - trp->tr_tmo_prev = prev_trp; - if ((trp->tr_tmo_next = prev_trp->tr_tmo_next) != 0) - prev_trp->tr_tmo_next->tr_tmo_prev = trp; - prev_trp->tr_tmo_next = trp; - } else { - trp->tr_tmo_prev = 0; - trp->tr_tmo_next = trp_tmo_list; - trp_tmo_list->tr_tmo_prev = trp; - trp_tmo_list = trp; - } -} - -void -atp_untimout( - __unused atp_tmo_func func, - struct atp_trans *trp) -{ - - if (trp->tr_tmo_func == 0) - return; - - if (trp_tmo_list == trp) { - if ((trp_tmo_list = trp->tr_tmo_next) != 0) { - trp_tmo_list->tr_tmo_prev = 0; - trp->tr_tmo_next->tr_tmo_delta += trp->tr_tmo_delta; - } - } else { - if ((trp->tr_tmo_prev->tr_tmo_next = trp->tr_tmo_next) != 0) { - trp->tr_tmo_next->tr_tmo_prev = trp->tr_tmo_prev; - trp->tr_tmo_next->tr_tmo_delta += trp->tr_tmo_delta; - } - } - trp->tr_tmo_func = 0; -} - -void -atp_trp_clock_locked(arg) - void *arg; -{ - atalk_lock(); - atp_trp_clock(arg); - atalk_unlock(); -} - -void -atp_trp_clock(arg) - void *arg; -{ - struct atp_trans *trp; - atp_tmo_func tr_tmo_func; - - if (trp_tmo_list) - trp_tmo_list->tr_tmo_delta--; - while (((trp = trp_tmo_list) != 0) && (trp_tmo_list->tr_tmo_delta == 0)) { - if ((trp_tmo_list = trp->tr_tmo_next) != 0) - trp_tmo_list->tr_tmo_prev = 0; - if ((tr_tmo_func = trp->tr_tmo_func) != 0) { - trp->tr_tmo_func = 0; - (*tr_tmo_func)(trp); - } - } - - timeout(atp_trp_clock_locked, (void *)arg, (1<<5)); -} - -void -atp_send_req(gref, mioc) - gref_t *gref; - gbuf_t *mioc; -{ - register struct atp_state *atp; - register struct atp_trans *trp; - register ioc_t *iocbp; - register at_atp_t *athp; - register at_ddp_t *ddp; - gbuf_t *m, *m2, *bds; - struct atp_set_default *sdb; - int old; - unsigned int timer; - u_short temp_net; - - atp = (struct atp_state *)((struct atp_state *)gref->info)->atp_msgq; - iocbp = (ioc_t *)gbuf_rptr(mioc); - - if ((trp = atp_trans_alloc(atp)) == NULL) { -l_retry: - ((asp_scb_t *)gref->info)->stat_msg = mioc; - iocbp->ioc_private = (void *)gref; - timeout(atp_retry_req, mioc, 10); - return; - } - - m2 = gbuf_cont(mioc); - if ((bds = gbuf_dupb(m2)) == NULL) { - atp_trans_free(trp); - goto l_retry; - } - gbuf_rinc(m2,atpBDSsize); - gbuf_wset(bds,atpBDSsize); - iocbp->ioc_count -= atpBDSsize; - gbuf_cont(m2) = NULL; - - old = iocbp->ioc_cmd; - iocbp->ioc_cmd = AT_ATP_ISSUE_REQUEST; - sdb = (struct atp_set_default *)gbuf_rptr(m2); - - /* - * The at_snd_req library routine multiplies seconds by 100. - * We need to divide by 100 in order to obtain the timer. - */ - if ((timer = (sdb->def_rate * HZ)/100) == 0) - timer = HZ; - iocbp->ioc_count -= sizeof(struct atp_set_default); - gbuf_rinc(m2,sizeof(struct atp_set_default)); - - trp->tr_retry = sdb->def_retries; - trp->tr_timeout = timer; - trp->tr_bdsp = bds; - trp->tr_tid = atp_tid(atp); - trp->tr_xmt = mioc; - - /* - * Now fill in the header (and remember the bits - * we need to know) - */ - athp = AT_ATP_HDR(m2); - athp->cmd = ATP_CMD_TREQ; - UAS_ASSIGN_HTON(athp->tid, trp->tr_tid); - athp->eom = 0; - athp->sts = 0; - trp->tr_xo = athp->xo; - trp->tr_bitmap = athp->bitmap; - ddp = AT_DDP_HDR(m2); - ddp->type = DDP_ATP; - ddp->src_socket = (at_socket)atp->atp_socket_no; - trp->tr_socket.socket = ddp->dst_socket; - trp->tr_socket.node = ddp->dst_node; - trp->tr_socket.net = NET_VALUE(ddp->dst_net); - trp->tr_local_socket = atp->atp_socket_no; - trp->tr_local_node = ddp->src_node; - temp_net = NET_VALUE(ddp->src_net); - NET_ASSIGN_NOSWAP(trp->tr_local_net, temp_net); - -#ifdef NOT_YET - /* save the local information in the gref */ - atp->atp_gref->laddr.s_net = NET_VALUE(ddp->src_net); - atp->atp_gref->laddr.s_node = ddp->src_node; - atp->atp_gref->lport = ddp->src_node; - atp->atp_gref->ddptype = DDP_ATP; -#endif - - /* - * Put us in the transaction waiting queue - */ - ATP_Q_APPEND(atp->atp_trans_wait, trp, tr_list); - - /* - * Send the message and set the timer - */ - m = (gbuf_t *)copy_pkt(m2, sizeof(llc_header_t)); - if (!trp->tr_retry && !trp->tr_bitmap && !trp->tr_xo) - atp_x_done(trp); /* no reason to tie up resources */ - else - atp_timout(atp_req_timeout, trp, trp->tr_timeout); - if (m) { - trace_mbufs(D_M_ATP_LOW, " s", m); - DDP_OUTPUT(m); - } -} /* atp_send_req */ - -void atp_retry_req(arg) - void *arg; -{ - gbuf_t *m = (gbuf_t *)arg; - gref_t *gref; - - atalk_lock(); - - gref = (gref_t *)((ioc_t *)gbuf_rptr(m))->ioc_private; - if (gref->info) { - ((asp_scb_t *)gref->info)->stat_msg = 0; - atp_send_req(gref, m); - } - atalk_unlock(); -} - -void atp_send_rsp(gref, m, wait) - gref_t *gref; - gbuf_t *m; - int wait; -{ - register struct atp_state *atp; - register struct atp_rcb *rcbp; - register at_atp_t *athp; - register at_ddp_t *ddp; - int s, xcnt; - u_short temp_net; - - atp = (struct atp_state *)gref->info; - if (atp->dflag) - atp = (struct atp_state *)atp->atp_msgq; - ddp = AT_DDP_HDR(m); - athp = AT_ATP_HDR(m); - - /* - * search for the corresponding rcb - */ - for (rcbp = atp->atp_rcb.head; rcbp; rcbp = rcbp->rc_list.next) { - if ( (rcbp->rc_tid == UAS_VALUE_NTOH(athp->tid)) && - (rcbp->rc_socket.node == ddp->dst_node) && - (rcbp->rc_socket.net == NET_VALUE(ddp->dst_net)) && - (rcbp->rc_socket.socket == ddp->dst_socket) ) - break; - } - - /* - * If it has already been sent then drop the request - */ - if ((rcbp && (rcbp->rc_state != RCB_NOTIFIED)) || - (rcbp == NULL && athp->xo) ) { - gbuf_freem(m); - return; - } - - if (rcbp == NULL) { /* a response is being sent for an ALO transaction */ - if ((rcbp = atp_rcb_alloc(atp)) == NULL) { - gbuf_freem(m); - return; - } - rcbp->rc_ioctl = 0; - rcbp->rc_socket.socket = ddp->dst_socket; - rcbp->rc_socket.node = ddp->dst_node; - rcbp->rc_socket.net = NET_VALUE(ddp->dst_net); - rcbp->rc_tid = UAS_VALUE_NTOH(athp->tid); - rcbp->rc_bitmap = 0xff; - rcbp->rc_xo = 0; - rcbp->rc_state = RCB_RESPONSE_FULL; - ATP_Q_APPEND(atp->atp_rcb, rcbp, rc_list); - } - else if (ddp->src_node == 0) { - temp_net = NET_VALUE_NOSWAP(rcbp->rc_local_net); - NET_ASSIGN(ddp->src_net, temp_net); - ddp->src_node = rcbp->rc_local_node; - } - - xcnt = get_bds_entries(m); - s = atp_unpack_bdsp(atp, m, rcbp, xcnt, wait); - if (s == 0) - atp_send_replies(atp, rcbp); -} /* atp_send_rsp */ - -int asp_pack_bdsp(trp, xm) - register struct atp_trans *trp; - gbuf_t **xm; -{ - register struct atpBDS *bdsp; - register gbuf_t *m, *m2; - register int i; - gbuf_t *m_prev = 0, *m_head = 0; - - dPrintf(D_M_ATP, D_L_INFO, ("asp_pack_bdsp: socket=%d\n", - trp->tr_queue->atp_socket_no)); - - if ((m2 = trp->tr_bdsp) == NULL) - return 0; - trp->tr_bdsp = NULL; - bdsp = (struct atpBDS *)gbuf_rptr(m2); - - for (i = 0; (i < ATP_TRESP_MAX && - bdsp < (struct atpBDS *)(gbuf_wptr(m2))); i++) { - if ((m = trp->tr_rcv[i]) == NULL) - break; - if (i == 0) { - /* discard ddp hdr on first packet */ - gbuf_rinc(m,DDP_X_HDR_SIZE); - } - - UAL_UAL(bdsp->bdsUserData, (((at_atp_t *)(gbuf_rptr(m)))->user_bytes)); - gbuf_rinc(m, ATP_HDR_SIZE); - - if (UAL_VALUE(bdsp->bdsBuffAddr)) { - short tmp = 0; - - /* user expects data back */ - m = gbuf_strip(m); - if (m_head == 0) - m_head = m; - else - gbuf_cont(m_prev) = m; - if (m) { - tmp = (short)gbuf_len(m); - while (gbuf_cont(m)) { - m = gbuf_cont(m); - tmp += (short)(gbuf_len(m)); - } - m_prev = m; - } - UAS_ASSIGN(bdsp->bdsDataSz, tmp); - } - trp->tr_rcv[i] = NULL; - bdsp++; - - } - /* - * report the number of packets - */ - UAS_ASSIGN(((struct atpBDS *)gbuf_rptr(m2))->bdsBuffSz, i); - - if (trp->tr_xmt) /* an ioctl block is still held? */ - gbuf_cont(trp->tr_xmt) = m2; - else - trp->tr_xmt = m2; - - if (m_head) - *xm = m_head; - else - *xm = 0; - - dPrintf(D_M_ATP, D_L_INFO, (" : size=%d\n", - gbuf_msgsize(*xm))); - - return 0; -} - -/* - * The following routines are direct entries from system - * calls to allow fast sending and recving of ATP data. - */ - -int -_ATPsndreq(fd, buf, len, nowait, err, proc) - int fd; - unsigned char *buf; - int len; - int nowait; - int *err; - void *proc; -{ - gref_t *gref; - int rc; - unsigned short tid; - unsigned int timer; - register struct atp_state *atp; - register struct atp_trans *trp; - register ioc_t *iocbp; - register at_atp_t *athp; - register at_ddp_t *ddp; - struct atp_set_default *sdb; - gbuf_t *m2, *m, *mioc; - char bds[atpBDSsize]; - - if ((*err = atalk_getref(0, fd, &gref, proc, 1)) != 0) - return -1; - - if ((gref == 0) || ((atp = (struct atp_state *)gref->info) == 0) - || (atp->atp_flags & ATP_CLOSING)) { - dPrintf(D_M_ATP, D_L_ERROR, ("ATPsndreq: stale handle=0x%x, pid=%d\n", - (u_int) gref, gref->pid)); - file_drop(fd); - *err = EINVAL; - return -1; - } - - if (len < atpBDSsize + sizeof(struct atp_set_default) + TOTAL_ATP_HDR_SIZE || - len > atpBDSsize + sizeof(struct atp_set_default) + TOTAL_ATP_HDR_SIZE + - ATP_DATA_SIZE) { - file_drop(fd); - *err = EINVAL; - return -1; - } - - while ((mioc = gbuf_alloc(sizeof(ioc_t), PRI_MED)) == 0) { - struct timespec ts; - /* the vaue of 10n terms of hz is 100ms */ - ts.tv_sec = 0; - ts.tv_nsec = 100 *1000 * NSEC_PER_USEC; - - rc = msleep(&atp->atp_delay_event, atalk_mutex, PSOCK | PCATCH, "atpmioc", &ts); - if (rc != 0) { - *err = rc; - file_drop(fd); - return -1; - } - - } - gbuf_wset(mioc,sizeof(ioc_t)); - len -= atpBDSsize; - while ((m2 = gbuf_alloc(len, PRI_MED)) == 0) { - struct timespec ts; - /* the vaue of 10n terms of hz is 100ms */ - ts.tv_sec = 0; - ts.tv_nsec = 100 *1000 * NSEC_PER_USEC; - - rc = msleep(&atp->atp_delay_event, atalk_mutex, PSOCK | PCATCH, "atpm2", &ts); - if (rc != 0) { - gbuf_freeb(mioc); - file_drop(fd); - *err = rc; - return -1; - } - } - gbuf_wset(m2, len); - gbuf_cont(mioc) = m2; - if (((*err = copyin(CAST_USER_ADDR_T(buf), (caddr_t)bds, atpBDSsize)) != 0) - || ((*err = copyin(CAST_USER_ADDR_T(&buf[atpBDSsize]), - (caddr_t)gbuf_rptr(m2), len)) != 0)) { - gbuf_freem(mioc); - file_drop(fd); - return -1; - } - gbuf_set_type(mioc, MSG_IOCTL); - iocbp = (ioc_t *)gbuf_rptr(mioc); - iocbp->ioc_count = len; - iocbp->ioc_cmd = nowait ? AT_ATP_ISSUE_REQUEST_NOTE : AT_ATP_ISSUE_REQUEST; - sdb = (struct atp_set_default *)gbuf_rptr(m2); - - /* - * The at_snd_req library routine multiplies seconds by 100. - * We need to divide by 100 in order to obtain the timer. - */ - if ((timer = (sdb->def_rate * HZ)/100) == 0) - timer = HZ; - iocbp->ioc_count -= sizeof(struct atp_set_default); - gbuf_rinc(m2,sizeof(struct atp_set_default)); - - /* - * allocate and set up the transaction record - */ - while ((trp = atp_trans_alloc(atp)) == 0) { - struct timespec ts; - /* the vaue of 10n terms of hz is 100ms */ - ts.tv_sec = 0; - ts.tv_nsec = 100 *1000 * NSEC_PER_USEC; - - rc = msleep(&atp->atp_delay_event, atalk_mutex, PSOCK | PCATCH, "atptrp", &ts); - if (rc != 0) { - gbuf_freem(mioc); - file_drop(fd); - *err = rc; - return -1; - } - } - trp->tr_retry = sdb->def_retries; - trp->tr_timeout = timer; - trp->tr_bdsp = NULL; - trp->tr_tid = atp_tid(atp); - tid = trp->tr_tid; - - /* - * remember the IOCTL packet so we can ack it - * later - */ - trp->tr_xmt = mioc; - - /* - * Now fill in the header (and remember the bits - * we need to know) - */ - athp = AT_ATP_HDR(m2); - athp->cmd = ATP_CMD_TREQ; - UAS_ASSIGN_HTON(athp->tid, trp->tr_tid); - athp->eom = 0; - athp->sts = 0; - trp->tr_xo = athp->xo; - trp->tr_bitmap = athp->bitmap; - ddp = AT_DDP_HDR(m2); - ddp->type = DDP_ATP; - ddp->src_socket = (at_socket)atp->atp_socket_no; - ddp->src_node = 0; - trp->tr_socket.socket = ddp->dst_socket; - trp->tr_socket.node = ddp->dst_node; - trp->tr_socket.net = NET_VALUE(ddp->dst_net); - trp->tr_local_socket = atp->atp_socket_no; - -#ifdef NOT_YET - /* save the local information in the gref */ - atp->atp_gref->laddr.s_net = NET_VALUE(ddp->src_net); - atp->atp_gref->laddr.s_node = ddp->src_node; - atp->atp_gref->lport = ddp->src_node; - atp->atp_gref->ddptype = DDP_ATP; -#endif - - /* - * Put us in the transaction waiting queue - */ - ATP_Q_APPEND(atp->atp_trans_wait, trp, tr_list); - - /* - * Send the message and set the timer - */ - m = (gbuf_t *)copy_pkt(m2, sizeof(llc_header_t)); - if ( !trp->tr_retry && !trp->tr_bitmap && !trp->tr_xo) - atp_x_done(trp); /* no reason to tie up resources */ - else - atp_timout(atp_req_timeout, trp, trp->tr_timeout); - if (m) - DDP_OUTPUT(m); - - if (nowait) { - file_drop(fd); - return (int)tid; - } - - /* - * wait for the transaction to complete - */ - while ((trp->tr_state != TRANS_DONE) && (trp->tr_state != TRANS_FAILED) && - (trp->tr_state != TRANS_ABORTING)) { - trp->tr_rsp_wait = 1; - rc = msleep(&trp->tr_event, atalk_mutex, PSOCK | PCATCH, "atpsndreq", 0); - if (rc != 0) { - trp->tr_rsp_wait = 0; - file_drop(fd); - *err = rc; - return -1; - } - } - trp->tr_rsp_wait = 0; - - - if (trp->tr_state == TRANS_FAILED || trp->tr_state == TRANS_ABORTING) { - /* - * transaction timed out, return error - */ - atp_free(trp); - file_drop(fd); - *err = ETIMEDOUT; - return -1; - } - - /* - * copy out the recv data - */ - if ((*err = atp_pack_bdsp(trp, (struct atpBDS *)bds)) != 0) { - atp_free(trp); - file_drop(fd); - return -1; - } - - /* - * copyout the result info - */ - if ((*err = copyout((caddr_t)bds, CAST_USER_ADDR_T(buf), atpBDSsize)) != 0) { - atp_free(trp); - file_drop(fd); - return -1; - } - - atp_free(trp); - file_drop(fd); - - return (int)tid; -} /* _ATPsndreq */ - - -/* entry point for ATP send response. respbuf contains a DDP hdr, - * ATP hdr, and atpBDS array. The bdsDataSz field of the first atpBDS - * struct contains the number of atpBDS structs in the array. resplen - * contains the len of the data in respbuf and datalen contains the - * len of the data buffer holding the response packets which the atpBDS - * struct entries point to. - */ -int -_ATPsndrsp(fd, respbuff, resplen, datalen, err, proc) - int fd; - unsigned char *respbuff; - int resplen; - int datalen; - int *err; - void *proc; -{ - gref_t *gref; - long bufaddr; - gbuf_t *m, *mdata; - short space; - int size; - struct atp_state *atp; - struct atpBDS *bdsp; - int bds_cnt, count, len; - caddr_t dataptr; - - if ((*err = atalk_getref(0, fd, &gref, proc, 1)) != 0) - return -1; - - if ((gref == 0) || ((atp = (struct atp_state *)gref->info) == 0) - || (atp->atp_flags & ATP_CLOSING)) { - dPrintf(D_M_ATP, D_L_ERROR, ("ATPsndrsp: stale handle=0x%x, pid=%d\n", - (u_int) gref, gref->pid)); - - file_drop(fd); - *err = EINVAL; - return -1; - } - - /* - * allocate buffer and copy in the response info - */ - if (resplen < 0 || resplen > TOTAL_ATP_HDR_SIZE + sizeof(struct atpBDS)*ATP_TRESP_MAX) { - file_drop(fd); - *err = EINVAL; - return -1; - } - if ((m = gbuf_alloc_wait(resplen, TRUE)) == 0) { - *err = ENOMEM; - file_drop(fd); - return -1; - } - if ((*err = copyin(CAST_USER_ADDR_T(respbuff), (caddr_t)gbuf_rptr(m), resplen)) != 0) { - gbuf_freeb(m); - file_drop(fd); - return -1; - } - gbuf_wset(m,resplen); - ((at_ddp_t *)gbuf_rptr(m))->src_node = 0; - bdsp = (struct atpBDS *)(gbuf_rptr(m) + TOTAL_ATP_HDR_SIZE); - - /* - * allocate buffers and copy in the response data. - * note that only the size field of the atpBDS field - * is used internally in the kernel. - */ - bds_cnt = get_bds_entries(m); /* count of # entries */ - /* check correctness of parameters */ - if (bds_cnt > ATP_TRESP_MAX) { - gbuf_freem(m); - *err = EINVAL; - file_drop(fd); - return -1; - } - - for (size = 0, count = 0; count < bds_cnt; count++) { - if (UAS_VALUE(bdsp[count].bdsBuffSz) > ATP_DATA_SIZE) { - gbuf_freem(m); - *err = EINVAL; - file_drop(fd); - return -1; - } - size += UAS_VALUE(bdsp[count].bdsBuffSz); - } - if (size > datalen) { - gbuf_freem(m); - *err = EINVAL; - file_drop(fd); - return -1; - } - - /* get the first mbuf */ - if ((mdata = gbuf_alloc_wait((space = (size > MCLBYTES ? MCLBYTES : size)), TRUE)) == 0) { - gbuf_freem(m); - file_drop(fd); - *err = ENOMEM; - return -1; - } - gbuf_cont(m) = mdata; - dataptr = mtod(mdata, caddr_t); - for (count = 0; count < bds_cnt; bdsp++, count++) { - if ((bufaddr = UAL_VALUE(bdsp->bdsBuffAddr)) != 0 && - (len = UAS_VALUE(bdsp->bdsBuffSz)) != 0) { - if (len > space) { /* enough room ? */ - gbuf_wset(mdata, dataptr - mtod(mdata, caddr_t)); /* set len of last mbuf */ - /* allocate the next mbuf */ - if ((gbuf_cont(mdata) = m_get((M_WAIT), MSG_DATA)) == 0) { - gbuf_freem(m); - file_drop(fd); - *err = ENOMEM; - return -1; - } - mdata = gbuf_cont(mdata); - MCLGET(mdata, M_WAIT); - if (!(mdata->m_flags & M_EXT)) { - m_freem(m); - file_drop(fd); - return(0); - } - dataptr = mtod(mdata, caddr_t); - space = MCLBYTES; - } - /* do the copyin */ - if ((*err = copyin(CAST_USER_ADDR_T(bufaddr), dataptr, len)) != 0) { - gbuf_freem(m); - file_drop(fd); - return -1; - } - dataptr += len; - space -= len; - } - } - gbuf_wset(mdata, dataptr - mtod(mdata, caddr_t)); /* set len of last mbuf */ - gbuf_cont(m)->m_pkthdr.len = size; /* set packet hdr len */ - - atp_send_rsp(gref, m, TRUE); - file_drop(fd); - return 0; -} - -int -_ATPgetreq(fd, buf, buflen, err, proc) - int fd; - unsigned char *buf; - int buflen; - int *err; - void *proc; -{ - gref_t *gref; - register struct atp_state *atp; - register struct atp_rcb *rcbp; - register gbuf_t *m, *m_head; - int size, len; - - if ((*err = atalk_getref(0, fd, &gref, proc, 1)) != 0) - return -1; - - if ((gref == 0) || ((atp = (struct atp_state *)gref->info) == 0) - || (atp->atp_flags & ATP_CLOSING)) { - dPrintf(D_M_ATP, D_L_ERROR, ("ATPgetreq: stale handle=0x%x, pid=%d\n", - (u_int) gref, gref->pid)); - file_drop(fd); - *err = EINVAL; - return -1; - } - - if (buflen < DDP_X_HDR_SIZE + ATP_HDR_SIZE) { - file_drop(fd); - *err = EINVAL; - return -1; - } - - if ((rcbp = atp->atp_attached.head) != NULL) { - /* - * Got one, move it to the active response Q - */ - m_head = rcbp->rc_ioctl; - rcbp->rc_ioctl = NULL; - - if (rcbp->rc_xo) { - ATP_Q_REMOVE(atp->atp_attached, rcbp, rc_list); - rcbp->rc_state = RCB_NOTIFIED; - ATP_Q_APPEND(atp->atp_rcb, rcbp, rc_list); - } else { - /* detach rcbp from attached queue, - * and free any outstanding resources - */ - atp_rcb_free(rcbp); - } - - /* - * copyout the request data, including the protocol header - */ - for (size=0, m=m_head; m; m = gbuf_cont(m)) { - if ((len = gbuf_len(m)) > buflen) - len = buflen; - copyout((caddr_t)gbuf_rptr(m), CAST_USER_ADDR_T(&buf[size]), len); - size += len; - if ((buflen -= len) == 0) - break; - } - gbuf_freem(m_head); - - file_drop(fd); - return size; - } - - file_drop(fd); - return -1; -} - -int -_ATPgetrsp(fd, bdsp, err, proc) - int fd; - struct atpBDS *bdsp; - int *err; - void *proc; -{ - gref_t *gref; - register struct atp_state *atp; - register struct atp_trans *trp; - int tid; - char bds[atpBDSsize]; - - if ((*err = atalk_getref(0, fd, &gref, proc, 1)) != 0) - return -1; - - if ((gref == 0) || ((atp = (struct atp_state *)gref->info) == 0) - || (atp->atp_flags & ATP_CLOSING)) { - dPrintf(D_M_ATP, D_L_ERROR, ("ATPgetrsp: stale handle=0x%x, pid=%d\n", - (u_int) gref, gref->pid)); - file_drop(fd); - *err = EINVAL; - return -1; - } - - for (trp = atp->atp_trans_wait.head; trp; trp = trp->tr_list.next) { - dPrintf(D_M_ATP, D_L_INFO, - ("ATPgetrsp: atp:0x%x, trp:0x%x, state:%d\n", - (u_int) atp, (u_int) trp, trp->tr_state)); - - switch (trp->tr_state) { - case TRANS_DONE: - if ((*err = copyin(CAST_USER_ADDR_T(bdsp), - (caddr_t)bds, sizeof(bds))) != 0) { - atp_free(trp); - file_drop(fd); - return -1; - } - if ((*err = atp_pack_bdsp(trp, (struct atpBDS *)bds)) != 0) { - atp_free(trp); - file_drop(fd); - return -1; - } - tid = (int)trp->tr_tid; - atp_free(trp); - if ((*err = copyout((caddr_t)bds, CAST_USER_ADDR_T(bdsp), sizeof(bds))) != 0) { - file_drop(fd); - return -1; - } - file_drop(fd); - return tid; - - case TRANS_FAILED: - /* - * transaction timed out, return error - */ - atp_free(trp); - file_drop(fd); - *err = ETIMEDOUT; - return -1; - - default: - continue; - } - } - - file_drop(fd); - *err = EINVAL; - return -1; -} - -void -atp_drop_req(gref, m) - gref_t *gref; - gbuf_t *m; -{ - struct atp_state *atp; - struct atp_rcb *rcbp; - at_atp_t *athp; - at_ddp_t *ddp; - - atp = (struct atp_state *)gref->info; - if (atp->dflag) - atp = (struct atp_state *)atp->atp_msgq; - ddp = AT_DDP_HDR(m); - athp = AT_ATP_HDR(m); - - /* - * search for the corresponding rcb - */ - for (rcbp = atp->atp_rcb.head; rcbp; rcbp = rcbp->rc_list.next) { - if ( (rcbp->rc_tid == UAS_VALUE_NTOH(athp->tid)) && - (rcbp->rc_socket.node == ddp->src_node) && - (rcbp->rc_socket.net == NET_VALUE(ddp->src_net)) && - (rcbp->rc_socket.socket == ddp->src_socket) ) - break; - } - - /* - * drop the request - */ - if (rcbp) - atp_rcb_free(rcbp); - - gbuf_freem(m); -} diff --git a/bsd/netat/aurp.h b/bsd/netat/aurp.h deleted file mode 100644 index 9df123b1b..000000000 --- a/bsd/netat/aurp.h +++ /dev/null @@ -1,296 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1996 Apple Computer, Inc. - * - * Created April 23, 1996, by Justin C. Walker - * - * File: aurp.h - */ - -#ifdef AURP_SUPPORT - -#ifndef _NETAT_AURP_H_ -#define _NETAT_AURP_H_ -#include - -#ifdef __APPLE_API_OBSOLETE - -/* - * AURP device ioctl (I_STR) 'subcommands' - */ -#define AUC_CFGTNL 0 /* Configure Tunnels */ -#define AUC_SHTDOWN 1 /* Shutdown AURP */ -#define AUC_EXPNET 2 /* Configure exported networks */ -#define AUC_HIDENET 3 /* Configure hidden netowrks */ -#define AUC_UDPPORT 4 /* UPD Port number */ -#define AUC_NETLIST 5 /* List of remote endpoints */ -#define AUC_TIMER 6 /* Configured update interval timer */ -#define AUC_ADDNET 7 /* Add remote endpoints */ -#define AUC_ACCEPTALL 8 /* Accept all nets */ -#define AUC_MAX 9 /* Maximun numbers of access nets */ - -/* Default AURP-over-UDP port */ -#define AURP_SOCKNUM 387 -#define AURP_MAXNETACCESS 64 - -#ifdef KERNEL_PRIVATE - -#define AURPCODE_REG 0 -#define AURPCODE_RTMPPKT 1 -#define AURPCODE_DATAPKT 2 -#define AURPCODE_AURPPROTO 3 -#define AURPCODE_DEBUGINFO 10 -#ifdef NOT_USED -#define AURPCODE_RTINFO 11 /* was used to set up pointers to the - routing table, the zone table, and - several functions */ -#endif -#define AURPCODE_RTUPDATE 12 - -#define AURPSTATE_Unconnected 0 -#define AURPSTATE_Connected 1 -#define AURPSTATE_WaitingForOpenRsp 2 -#define AURPSTATE_WaitingForRIRsp 3 -#define AURPSTATE_WaitingForTickleAck 4 -#define AURPSTATE_WaitingForRIAck1 5 -#define AURPSTATE_WaitingForRIAck2 6 -#define AURPSTATE_WaitingForRIAck3 7 - -#define AURPCMD_RIReq 1 -#define AURPCMD_RIRsp 2 -#define AURPCMD_RIAck 3 -#define AURPCMD_RIUpd 4 -#define AURPCMD_RDReq 5 -#define AURPCMD_ZReq 6 -#define AURPCMD_ZRsp 7 -#define AURPCMD_OpenReq 8 -#define AURPCMD_OpenRsp 9 -#define AURPCMD_Tickle 14 -#define AURPCMD_TickleAck 15 - -#define AURPSUBCODE_ZoneInfo1 1 -#define AURPSUBCODE_ZoneInfo2 2 -#define AURPSUBCODE_GetZoneNets 3 -#define AURPSUBCODE_GetDomainZoneList 4 - -#define AURPEV_Null 0 -#define AURPEV_NetAdded 1 -#define AURPEV_NetDeleted 2 -#define AURPEV_NetRouteChange 3 -#define AURPEV_NetDistChange 4 -#define AURPEV_NetZoneChange 5 - -#define AURP_Version 1 -#define AURP_ProbeRetryInterval 300 -#define AURP_MaxTickleRetry 4 -#define AURP_TickleRetryInterval 30 -#define AURP_MaxRetry 10 -#define AURP_RetryInterval 3 -#define AURP_UpdateRate 1 -#define AURP_UDType 0 -#define AURP_UDNode 1 -#define AURP_UDSize 2 -#define AURP_FirstSeqNum 1 -#define AURP_LastSeqNum 65535 -#define AURP_MaxPktSize 1400 -#define AURP_MaxNetAccess 64 -#define AURP_NetHiden 0x01 - -#define AURPERR_NormalConnectionClose -1 -#define AURPERR_RoutingLoopDetected -2 -#define AURPERR_ConnectionOutOfSync -3 -#define AURPERR_OptionNegotiationError -4 -#define AURPERR_InvalidVersionNumber -5 -#define AURPERR_InsufficientResources -6 -#define AURPERR_AuthenticationError -7 - -#define AURPFLG_NA 0x4000 -#define AURPFLG_ND 0x2000 -#define AURPFLG_NDC 0x1000 -#define AURPFLG_ZC 0x0800 -#define AURPFLG_RMA 0x4000 -#define AURPFLG_HCRA 0x2000 -#define AURPFLG_SZI 0x4000 -#define AURPFLG_LAST 0x8000 - -/* - * AURP state block - */ -typedef struct { - unsigned char get_zi; /* get zone info flag */ - unsigned char rem_node; /* node id of a tunnel peer */ - unsigned char tickle_retry; /* tickle retry count */ - unsigned char rcv_retry; /* data receiver retry count */ - unsigned char snd_state; /* data sender state */ - unsigned char rcv_state; /* data receiver state */ - unsigned char filler[2]; - unsigned short rcv_update_rate; - unsigned short snd_next_entry; /* next entry in RT */ - unsigned short rcv_env; - unsigned short snd_sui; - unsigned short rcv_connection_id; /* data receiver connection id */ - unsigned short snd_connection_id; /* data sender connection id */ - unsigned short rcv_sequence_number; /* data receiver sequence number */ - unsigned short snd_sequence_number; /* data sender sequence number */ - int rcv_tmo; - int snd_tmo; - gbuf_t *rsp_m; - gbuf_t *upd_m; -} aurp_state_t; - -/* - * AURP protocol header - */ -typedef struct { - unsigned short connection_id; - unsigned short sequence_number; - unsigned short command_code; - unsigned short flags; -} aurp_hdr_t; - -extern gref_t *aurp_gref; -extern unsigned char dst_addr_cnt; -extern unsigned char net_access_cnt; -extern unsigned char net_export; -extern unsigned short rcv_connection_id; -extern int net_port; -extern int update_tmo; -extern aurp_state_t aurp_state[]; -extern unsigned short net_access[]; - -struct myq -{ struct mbuf *q_head; - struct mbuf *q_tail; - int q_cnt; -}; - - -#include - -/* - * Quandry: if we use a single socket, we have to rebind on each call. - * If we use separate sockets per tunnel endpoint, we have to examine - * each one on wakeup. What to do; what to do? - */ -struct aurp_global_t -{ int src_addr; /* What's our IP address? */ - int udp_port; /* Local UDP port */ - unsigned short net_access[AURP_MAXNETACCESS]; - long dst_addr[256]; /* Tunnel 'other ends', passed in from user */ - int pid; /* Who are we? */ - struct socket *tunnel; /* IP socket for all IP endpoints */ - int event; /* Sleep queue anchor */ - int event_anchor; /* Sleep queue anchor */ - atlock_t glock; /* aurp_global lock */ - struct uio auio; /* Dummy uio struct for soreceive() */ - /* Statistics */ - unsigned int toosmall; /* size less than domain header, from UDP */ - unsigned int no_mbufs; /* gbuf_to_mbuf failed */ - unsigned int no_gbufs; /* mbuf_to_gbuf failed */ - unsigned int shutdown; /* shutdown flag */ - unsigned int running; /* running flag */ -}; - -#define AE_ATALK 0x01 /* A/talk input event */ -#define AE_UDPIP 0x02 /* UDP/IP input event */ -#define AE_SHUTDOWN 0x04 /* Shutdown AURP process */ - -void aurp_wakeup(struct socket *, caddr_t, int); -struct mbuf *at_gbuf_to_mbuf(gbuf_t *); -gbuf_t *at_mbuf_to_gbuf(struct mbuf *, int); -int at_insert(gbuf_t *m, unsigned int type, unsigned int node); -int ddp_AURPfuncx(int code, void *param, unsigned char node); -int AURPinit(void); -int aurpd_start(void); -void atalk_to_ip(gbuf_t *m); -void AURPaccess(void); -void AURPshutdown(void); -void AURPiocack(gref_t *gref, gbuf_t *m); -void AURPiocnak(gref_t *gref, gbuf_t *m, int error); -void AURPsndZReq(aurp_state_t *state); -void AURPsndZRsp(aurp_state_t *state, gbuf_t *dat_m, int flag); -void AURPsndRIUpd(aurp_state_t *state); -void AURPsndRIReq(aurp_state_t *state); -void AURPsndRIAck(aurp_state_t *state, gbuf_t *m, unsigned short flags); -void AURPsndOpenReq(aurp_state_t *state); -void AURPsndRDReq(aurp_state_t *state); -void AURPrcvZReq(aurp_state_t *state, gbuf_t *m); -void AURPrcvZRsp(aurp_state_t *state, gbuf_t *m); -void AURPrcvRIUpd(aurp_state_t *state, gbuf_t *m); -void AURPrcvRIReq(aurp_state_t *state, gbuf_t *m); -void AURPrcvRIAck(aurp_state_t *state, gbuf_t *m); -void AURPrcvRIRsp(aurp_state_t *state, gbuf_t *m); -void AURPrcvOpenReq(aurp_state_t *state, gbuf_t *m); -void AURPrcvOpenRsp(aurp_state_t *state, gbuf_t *m); -void AURPrcvTickle(aurp_state_t *state, gbuf_t *m); -void AURPrcvTickleAck(aurp_state_t *state, gbuf_t *m); -void AURPrcvRDReq(aurp_state_t *state, gbuf_t *m); -void AURPfreemsg(gbuf_t *m); -void AURPrtupdate(RT_entry *entry, unsigned char ev); -void AURPsend(gbuf_t *mdata, int type, int node); -void AURPcleanup(aurp_state_t *state); -void AURPpurgeri(unsigned char node); -int AURPgetri(short next_entry, unsigned char *buf, short *len); -int AURPsetri(unsigned char node, gbuf_t *m); -int AURPupdateri(unsigned char node, gbuf_t *m); - -/* AURP header for IP tunneling */ -typedef struct aurp_domain -{ char dst_length; - char dst_authority; - short dst_distinguisher; - long dst_address; - char src_length; - char src_authority; - short src_distinguisher; - long src_address; - short version; - short reserved; - short type; -} aurp_domain_t; - -/* AURP/domain header constants */ -#define AUD_Version 0x1 -#define AUD_Atalk 0x2 -#define AUD_AURP 0x3 - -/* IP domain identifier constants */ -#define IP_LENGTH 7 -#define IP_AUTHORITY 1 -#define IP_DISTINGUISHER 0 -/* Need this because the )(*&^%$#@ compiler rounds up the size */ -#define IP_DOMAINSIZE 22 - -/****### LD 9/26/97*/ -extern struct aurp_global_t aurp_global; -#endif /* KERNEL_PRIVATE */ -#endif /* __APPLE_API_OBSOLETE */ -#endif /* _NETAT_AURP_H_ */ - -#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/aurp_aurpd.c b/bsd/netat/aurp_aurpd.c deleted file mode 100644 index 1fed65a63..000000000 --- a/bsd/netat/aurp_aurpd.c +++ /dev/null @@ -1,446 +0,0 @@ -/* - * Copyright (c) 2000-2008 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1996 Apple Computer, Inc. - * - * Created April 25, 1996, by Justin C. Walker - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - * - * File: aurpd.c - */ - -/* - * Kernel process to implement the AURP daemon: - * manage tunnels to remote AURP servers across IP networks - */ -#ifdef AURP_SUPPORT - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#define M_RCVBUF (64 * 1024) -#define M_SNDBUF (64 * 1024) - -extern lck_mtx_t * atalk_mutex; - -static int ip_to_atalk(struct sockaddr_in *fp, register gbuf_t *p_mbuf); -static int aurp_bindrp(struct socket *so); - -struct aurp_global_t aurp_global; - -/* - * Initialize the aurp pipe - - * -Create, initialize, and start the aurpd kernel process; we need - * a process to permit queueing between the socket and the stream, - * which is necessary for orderly access to the socket structure. - * -The user process (aurpd) is there to 'build' the AURP - * stream, act as a 'logging agent' (:-}), and hold open the stream - * during its use. - * -Data and AURP packets from the DDP stream will be fed into the - * UDP tunnel (AURPsend()) - * -Data and AURP packets from the UDP tunnel will be fed into the - * DDP stream (ip_to_atalk(), via the kernel process). - */ -int -aurpd_start() -{ - register int error; - register struct socket *so; - struct mbuf *m; - int maxbuf; - struct sockopt sopt; - - if (suser(kauth_cred_get(), 0) != 0 ) - return(EPERM); - - /* - * Set up state prior to starting kernel process so we can back out - * (error return) if something goes wrong. - */ - bzero((char *)&aurp_global.tunnel, sizeof(aurp_global.tunnel)); - /*lock_alloc(&aurp_global.glock, LOCK_ALLOC_PIN, AURP_EVNT_LOCK, -1);*/ - ATEVENTINIT(aurp_global.event_anchor); - - /* open udp socket */ - if (aurp_global.udp_port == 0) - aurp_global.udp_port = AURP_SOCKNUM; - error = socreate(AF_INET, &aurp_global.tunnel, SOCK_DGRAM, - IPPROTO_UDP); - if (error) - { dPrintf(D_M_AURP, D_L_FATAL, ("AURP: Can't get socket (%d)\n", - error)); - return(error); - } - - so = aurp_global.tunnel; - - if ((error = aurp_bindrp(so)) != 0) - { dPrintf(D_M_AURP, D_L_FATAL, - ("AURP: Can't bind to port %d (error %d)\n", - aurp_global.udp_port, error)); - soclose(so); - return(error); - } - - sblock(&so->so_rcv, M_WAIT); - sblock(&so->so_snd, M_WAIT); - - /* - * Set socket Receive buffer size - */ - m = m_get(M_WAIT, MT_SOOPTS); - if (m == NULL) { - error = ENOBUFS; - goto out; - } else { - maxbuf = M_RCVBUF; - sopt.sopt_val = CAST_USER_ADDR_T(&maxbuf); - sopt.sopt_valsize = sizeof(maxbuf); - sopt.sopt_level = SOL_SOCKET; - sopt.sopt_name = SO_RCVBUF; - sopt.sopt_dir = SOPT_SET; - sopt.sopt_p = kernproc; - if ((error = sosetopt(so, &sopt)) != 0) - goto out; - } - - /* - * Set socket Send buffer size - */ - m = m_get(M_WAIT, MT_SOOPTS); - if (m == NULL) { - error = ENOBUFS; - goto out; - } else { - - maxbuf = M_SNDBUF; - sopt.sopt_val = CAST_USER_ADDR_T(&maxbuf); - sopt.sopt_valsize = sizeof(maxbuf); - sopt.sopt_level = SOL_SOCKET; - sopt.sopt_name = SO_SNDBUF; - sopt.sopt_dir = SOPT_SET; - sopt.sopt_p = kernproc; - if ((error = sosetopt(so, &sopt)) != 0) - goto out; - } - - so->so_upcall = aurp_wakeup; - so->so_upcallarg = (caddr_t)AE_UDPIP; /* Yuck */ - so->so_state |= SS_NBIO; - so->so_rcv.sb_flags |=(SB_SEL|SB_NOINTR); - so->so_snd.sb_flags |=(SB_SEL|SB_NOINTR); - -out: - sbunlock(&so->so_snd, 0); - sbunlock(&so->so_rcv, 0); - - return(error); -} - -int -AURPgetmsg(err) - int *err; -{ register struct socket *so; - register int events; - - so = aurp_global.tunnel; - *err = 0; - - for (;;) - { gbuf_t *from, *p_mbuf; - int flags = MSG_DONTWAIT; - uio_t auio; - char uio_buf[ UIO_SIZEOF(0) ]; - - /* - * Wait for a package to arrive. This will be from the - * IP side - sowakeup() calls aurp_wakeup() - * when a packet arrives - */ - - events = aurp_global.event; - if (((*err == 0) || (*err == EWOULDBLOCK)) && events == 0) - { - lck_mtx_assert(atalk_mutex, LCK_MTX_ASSERT_OWNED); - *err = msleep(&aurp_global.event_anchor, atalk_mutex, PSOCK | PCATCH, "AURPgetmsg", 0); - events = aurp_global.event; - aurp_global.event = 0; - } - - /* - * Shut down if we have the AE_SHUTDOWN event or if we got - * a system error other than EWOULDBLOCK, such as EINTR. - */ - if (((*err != EWOULDBLOCK) && (*err != 0)) || events & AE_SHUTDOWN) - { - dPrintf(D_M_AURP, D_L_SHUTDN_INFO, - ("AURPgetmsg: AE_SHUTDOWN detected--starting shutdown sequence\n")); - aurp_global.shutdown = 1; - while (aurp_global.running) - ; - /*lock_free(&aurp_global.glock);*/ - aurp_global.tunnel = 0; - aurp_global.event = 0; - aurp_global.shutdown = 0; - soclose(so); - if (*err == 0) - *err = ESHUTDOWN; - dPrintf(D_M_AURP, D_L_SHUTDN_INFO, - ("AURPgetmsg: shutdown completed\n")); - return -1; - } - - - - /* - * Set up the nominal uio structure - - * give it no iov's, point off to non-existant user space, - * but make sure the 'resid' count means somehting. - */ - auio = uio_createwithbuffer(0, 0, UIO_SYSSPACE, UIO_READ, - &uio_buf[0], sizeof(uio_buf)); - - /* Keep up an even flow... */ - for (;;) - { -/* - * This should be large enough to encompass a full DDP packet plus - * domain header. - */ -#define A_LARGE_SIZE 700 - - flags = MSG_DONTWAIT; - uio_setresid(auio, A_LARGE_SIZE); - *err = soreceive(so, (struct sockaddr **)&from, auio, &p_mbuf, 0, &flags); - dPrintf(D_M_AURP, D_L_VERBOSE, - ("AURPgetmsg: soreceive returned %d, aurp_global.event==0x%x\n", *err, events)); - /* soreceive() sets *mp to zero! at start */ - if (p_mbuf) - ip_to_atalk((struct sockaddr_in *)from, p_mbuf); - if (*err || (p_mbuf == NULL)) { - /* - * An error occurred in soreceive(), - * so clear the data input event flag - * and break out of this inner loop. - * - * XXX Note that clearing AE_UDPIP here could - * cause us to lose an AE_UDPIP event that - * was posted in aurp_global.event between - * the soreceive() above and the code here. - * The protocol should recover from this - * lost event, though, since the next - * request (a tickle, for example) from - * the other end of the tunnel will cause - * another AE_UDPIP event to be posted, - * which will wake us from the sleep at - * the top of the outer loop. - */ - aurp_global.event &= ~AE_UDPIP; - dPrintf(D_M_AURP, D_L_WARNING, ("AURPgetmsg: spurious soreceive, err==%d, p_mbuf==0x%x\n", *err, (unsigned int) p_mbuf)); - break; - } - } - } - return -1; -} - -/* - * Wakeup the sleeping giant - we've put a message on his queue(s). - * The arg indicates what queue has been updated. - * - * This conforms to the so_upcall function pointer member of struct sockbuf. - */ -void aurp_wakeup(__unused struct socket *so, register caddr_t p, __unused int state) -{ - register int bit; - - bit = (int) p; - aurp_global.event |= bit; - - dPrintf(D_M_AURP, D_L_STATE_CHG, - ("aurp_wakeup: bit 0x%x, aurp_global.event now 0x%x\n", - bit, aurp_global.event)); - - wakeup(&aurp_global.event_anchor); -} - -/* - * Try to bind to the specified reserved port. - * Sort of like sobind(), but no suser() check. - */ -static int -aurp_bindrp(struct socket *so) -{ - struct sockaddr_in sin; - struct proc *p = current_proc(); - int error; - - - bzero(&sin, sizeof(sin)); - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = htons(aurp_global.src_addr); - sin.sin_port = htons(aurp_global.udp_port); - sin.sin_len = sizeof(struct sockaddr_in); - - sblock(&so->so_rcv, M_WAIT); - sblock(&so->so_snd, M_WAIT); - so->so_state |= SS_PRIV; - error = (*so->so_proto->pr_usrreqs->pru_bind)(so, (struct sockaddr *) &sin, p); - sbunlock(&so->so_snd, 0); - sbunlock(&so->so_rcv, 0); - - return (error); -} - -/* - * receive from UDP - * fp is the 'source address' mbuf; p_mbuf is the data mbuf. - * Use the source address to find the 'node number' (index of the address), - * and pass that to the next stage. - */ -int ip_to_atalk(register struct sockaddr_in *rem_addr, register gbuf_t *p_mbuf) -{ - register aurp_domain_t *domain; - unsigned char node; - - - /* determine the node where the packet came from */ - for (node=1; node <= dst_addr_cnt; node++) { - if (aurp_global.dst_addr[node] == *(long *)&rem_addr->sin_addr) - break; - } - if (node > dst_addr_cnt) { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPrecv: invalid node, %d.%lx\n", - rem_addr->sin_port, - rem_addr->sin_addr.s_addr)); - - gbuf_freem(p_mbuf); - FREE(rem_addr, M_SONAME); - return -1; - } - - /* validate the domain */ - domain = (aurp_domain_t *)gbuf_rptr(p_mbuf); - if ( (domain->dst_length != IP_LENGTH) || - (domain->dst_authority != IP_AUTHORITY) || - (domain->version != AUD_Version) || - ((domain->type != AUD_Atalk) && (domain->type != AUD_AURP)) ) { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPrecv: invalid domain, %d.%lx\n", - rem_addr->sin_port, - rem_addr->sin_addr.s_addr)); - - gbuf_freem(p_mbuf); - FREE(rem_addr, M_SONAME); - return -1; - } - - /* Remove domain header */ - p_mbuf->m_pkthdr.len -= IP_DOMAINSIZE; - gbuf_rinc(p_mbuf,IP_DOMAINSIZE); - gbuf_set_type(p_mbuf, MSG_DATA); - - /* forward the packet to the local AppleTalk stack */ - - at_insert(p_mbuf, domain->type, node); - FREE(rem_addr, M_SONAME); - return 0; -} - -/* - * send to UDP - * The real work has been done already. Here, we just cobble together - * a sockaddr for the destination and call sosend(). - */ -void -atalk_to_ip(register gbuf_t *m) -{ register aurp_domain_t *domain; - int error; - int flags = MSG_DONTWAIT; - struct sockaddr_in rem_addr; - - m_mchtype(m, MT_HEADER); - m->m_pkthdr.len = gbuf_msgsize(m); - m->m_pkthdr.rcvif = 0; - - bzero((char *) &rem_addr, sizeof(rem_addr)); - rem_addr.sin_family = PF_INET; - rem_addr.sin_port = aurp_global.udp_port; - rem_addr.sin_len = sizeof (struct sockaddr_in); - domain = (aurp_domain_t *)gbuf_rptr(m); - *(long *) &rem_addr.sin_addr = domain->dst_address; - - aurp_global.running++; - if (aurp_global.shutdown) { - gbuf_freem(m); - aurp_global.running--; - dPrintf(D_M_AURP, D_L_SHUTDN_INFO, - ("atalk_to_ip: detected aurp_global.shutdown state\n")); - return; - } - dPrintf(D_M_AURP, D_L_VERBOSE, ("atalk_to_ip: calling sosend\n")); - error = sosend(aurp_global.tunnel, (struct sockaddr *) &rem_addr, NULL, m, NULL, flags); - if (error) - { /*log error*/ - dPrintf(D_M_AURP, D_L_ERROR, ("AURP: sosend error (%d)\n", - error)); - } - - aurp_global.running--; - return; -} - -#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/aurp_cfg.c b/bsd/netat/aurp_cfg.c deleted file mode 100644 index c8c0e003b..000000000 --- a/bsd/netat/aurp_cfg.c +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1996 Apple Computer, Inc. - * - * Created April 8, 1996 by Tuyen Nguyen - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - * - * File: cfg.c - */ - -#ifdef AURP_SUPPORT - -#define RESOLVE_DBG -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -static int aurp_inited = 0; -static char aurp_minor_no[4]; - -int aurp_open(gref) - gref_t *gref; -{ - extern void AURPcmdx(); - int i; - - if (!aurp_inited) - aurp_inited = 1; - - for (i=1; i < sizeof(aurp_minor_no); i++) { - if (aurp_minor_no[i] == 0) { - aurp_minor_no[i] = (char )i; - break; - } - } - if (i == sizeof(aurp_minor_no)) - return EAGAIN; - if (i == 1) { - aurp_gref = gref; - if (ddp_AURPfuncx(AURPCODE_REG, AURPcmdx, 0)) { - aurp_gref = 0; - aurp_minor_no[i] = 0; - return EPROTOTYPE; - } - } - - gref->info = (void *)&aurp_minor_no[i]; - return 0; -} - -int aurp_close(gref) - gref_t *gref; -{ - if (*(char *)gref->info == 1) { - aurp_gref = 0; - aurp_inited = 0; - ddp_AURPfuncx(AURPCODE_REG, 0, 0); - } - - *(char *)gref->info = 0; - gref->info = 0; - return 0; -} - -#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/aurp_gdata.c b/bsd/netat/aurp_gdata.c deleted file mode 100644 index 8765cfa58..000000000 --- a/bsd/netat/aurp_gdata.c +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1996 Apple Computer, Inc. - * - * Created April 8, 1996 by Tuyen Nguyen - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - * - * File: gdata.c - */ - -#ifdef AURP_SUPPORT - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -gref_t *aurp_gref; -unsigned char dst_addr_cnt; -unsigned char net_access_cnt; -unsigned char net_export; -unsigned short rcv_connection_id; -int net_port; -int update_tmo; -aurp_state_t aurp_state[256]; -unsigned short net_access[AURP_MaxNetAccess]; - -#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/aurp_misc.c b/bsd/netat/aurp_misc.c deleted file mode 100644 index e9fdf41e0..000000000 --- a/bsd/netat/aurp_misc.c +++ /dev/null @@ -1,221 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1996 Apple Computer, Inc. - * - * Created April 8, 1996 by Tuyen Nguyen - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - * - * File: misc.c - */ - -#ifdef AURP_SUPPORT - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -/* */ -void AURPiocack(gref, m) - gref_t *gref; - gbuf_t *m; -{ - /* send ok reply to ioctl command */ - gbuf_set_type(m, MSG_IOCACK); - atalk_putnext(gref, m); -} - -void AURPiocnak(gref, m, error) - gref_t *gref; - gbuf_t *m; - int error; -{ - ioc_t *iocbp = (ioc_t *)gbuf_rptr(m); - - /* send error reply to ioctl command */ - if (gbuf_cont(m)) { - gbuf_freem(gbuf_cont(m)); - gbuf_cont(m) = 0; - } - iocbp->ioc_error = error; - iocbp->ioc_count = 0; - iocbp->ioc_rval = -1; - gbuf_set_type(m, MSG_IOCNAK); - atalk_putnext(gref, m); -} - -/* */ -void AURPupdate(arg) - void *arg; -{ - unsigned char node; - aurp_state_t *state; - - atalk_lock(); - - state = (aurp_state_t *)&aurp_state[1]; - - if (aurp_gref == 0) { - atalk_unlock(); - return; - } - /* - * for every tunnel peer, do the following periodically: - * 1. send zone requests to determine zone names of networks - * that still do not have asssociated zone names. - * 2. send any RI update that are pending - */ - for (node=1; node <= dst_addr_cnt; node++, state++) { - AURPsndZReq(state); - AURPsndRIUpd(state); - } - - /* restart the periodic update timer */ - timeout(AURPupdate, arg, AURP_UpdateRate*10*HZ); - update_tmo = 1; - - atalk_unlock(); -} - -/* */ -void AURPfreemsg(m) - gbuf_t *m; -{ - gbuf_t *tmp_m; - - while ((tmp_m = m) != 0) { - m = gbuf_next(m); - gbuf_next(tmp_m) = 0; - gbuf_freem(tmp_m); - } -} - -/* */ -int AURPinit() -{ - unsigned char node; - aurp_state_t *state = (aurp_state_t *)&aurp_state[1]; - short entry_num; - RT_entry *entry = (RT_entry *)RT_table; - - /* start the periodic update timer */ - timeout(AURPupdate, 0, AURP_UpdateRate*10*HZ); - update_tmo = 1; - - /* initialize AURP flags for entries in the RT table */ - for (entry_num=0; entry_num < RT_maxentry; entry_num++,entry++) - entry->AURPFlag = 0; - - /* initiate connections to peers */ - for (node=1; node <= dst_addr_cnt; node++, state++) { - bzero((char *)state, sizeof(*state)); - state->rem_node = node; - state->snd_state = AURPSTATE_Unconnected; - state->rcv_state = AURPSTATE_Unconnected; - dPrintf(D_M_AURP, D_L_STARTUP_INFO, - ("AURPinit: sending OpenReq to node %u\n", node)); - AURPsndOpenReq(state); - } - - return 0; -} - -/* */ -void AURPcleanup(state) - aurp_state_t *state; -{ - if (state->rsp_m) { - gbuf_freem(state->rsp_m); - state->rsp_m = 0; - } - - if (state->upd_m) { - gbuf_freem(state->upd_m); - state->upd_m = 0; - } -} - -/* - * - */ -void AURPshutdown() -{ - unsigned char node; - aurp_state_t *state = (aurp_state_t *)&aurp_state[1]; - - /* cancel the periodic update timer */ - untimeout(AURPupdate, 0); - update_tmo = 0; - - /* notify tunnel peers of router going-down */ - for (node=1; node <= dst_addr_cnt; node++, state++) { - AURPcleanup(state); - AURPsndRDReq(state); - } - - /* bring down the router */ - aurp_wakeup(NULL, (caddr_t) AE_SHUTDOWN, 0); -} - -void AURPaccess() -{ - unsigned char i; - short entry_num; - RT_entry *entry; - - entry = (RT_entry *)RT_table; - for (entry_num=0; entry_num < RT_maxentry; entry_num++,entry++) - entry->AURPFlag = net_export ? AURP_NetHiden : 0; - - for (i=0; i < net_access_cnt; i++) { - /* export or hide networks as configured */ - if ((entry = rt_blookup(net_access[i])) != 0) - entry->AURPFlag = net_export ? 0 : AURP_NetHiden; - } -} - -#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/aurp_open.c b/bsd/netat/aurp_open.c deleted file mode 100644 index d3bec0441..000000000 --- a/bsd/netat/aurp_open.c +++ /dev/null @@ -1,259 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1996 Apple Computer, Inc. - * - * Created April 8, 1996 by Tuyen Nguyen - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - * - * File: open.c - */ - -#ifdef AURP_SUPPORT - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - - -/* locked version of AURPsndOpenReq */ -void AURPsndOpenReq_locked(state) - aurp_state_t *state; -{ - atalk_lock(); - AURPsndOpenReq(state); - atalk_unlock(); -} - -/* */ -void AURPsndOpenReq(state) - aurp_state_t *state; -{ - int msize; - gbuf_t *m; - aurp_hdr_t *hdrp; - - if (aurp_gref == 0) { - return; - } - if (state->rcv_retry && (state->rcv_state != AURPSTATE_WaitingForOpenRsp)) { - return; - } - - /* stop trying if the retry count exceeds the maximum value */ - if (++state->rcv_retry > AURP_MaxRetry) { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPsndOpenReq: no response, node %u\n", - state->rem_node)); - state->rcv_state = AURPSTATE_Unconnected; - state->rcv_tmo = 0; - state->rcv_retry = 0; - return; - } - - msize = sizeof(aurp_hdr_t) + 3; - if ((m = (gbuf_t *)gbuf_alloc(msize, PRI_MED)) != 0) { - gbuf_wset(m,msize); - - /* construct the open request packet */ - hdrp = (aurp_hdr_t *)gbuf_rptr(m); - if (state->rcv_retry > 1) - hdrp->connection_id = state->rcv_connection_id; - else { - if (++rcv_connection_id == 0) - rcv_connection_id = 1; - hdrp->connection_id = rcv_connection_id; - } - hdrp->sequence_number = 0; - hdrp->command_code = AURPCMD_OpenReq; - hdrp->flags = (AURPFLG_NA | AURPFLG_ND | AURPFLG_NDC | AURPFLG_ZC); - *(short *)(hdrp+1) = AURP_Version; - ((char *)(hdrp+1))[2] = 0; /* option count */ - - /* update state info */ - state->rcv_connection_id = hdrp->connection_id; - state->rcv_state = AURPSTATE_WaitingForOpenRsp; - - /* send the packet */ - dPrintf(D_M_AURP, D_L_TRACE, - ("AURPsndOpenReq: sending AURPCMD_OpenReq, node %u\n", - state->rem_node)); - AURPsend(m, AUD_AURP, state->rem_node); - } - - /* start the retry timer */ - timeout(AURPsndOpenReq_locked, state, AURP_RetryInterval*HZ); - state->rcv_tmo = 1; -} - -/* */ -void AURPrcvOpenReq(state, m) - aurp_state_t *state; - gbuf_t *m; -{ - short rc, version; - aurp_hdr_t *hdrp = (aurp_hdr_t *)gbuf_rptr(m); - unsigned short sui = hdrp->flags; - - /* make sure we're in a valid state to accept it */ - if ((update_tmo == 0) || ((state->snd_state != AURPSTATE_Unconnected) && - (state->snd_state != AURPSTATE_Connected))) { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPrcvOpenReq: unexpected request, update_tmo=0x%x, snd_state=%u\n", (unsigned int) update_tmo, state->snd_state)); - gbuf_freem(m); - return; - } - - /* check for the correct version number */ - version = *(short *)(hdrp+1); - if (version != AURP_Version) { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPrcvOpenReq: invalid version number %d, expected %d\n", version, AURP_Version)); - rc = AURPERR_InvalidVersionNumber; - } else - rc = (short)AURP_UpdateRate; - - /* construct the open response packet */ - gbuf_wset(m,sizeof(aurp_hdr_t)+sizeof(short)); - hdrp->command_code = AURPCMD_OpenRsp; - hdrp->flags = 0; - *(short *)(hdrp+1) = rc; - ((char *)(hdrp+1))[2] = 0; /* option count */ - - /* - * reset if we're in the Connected state and this is - * a completely new open request - */ - if ((state->snd_state == AURPSTATE_Connected) && - ((state->snd_connection_id != hdrp->connection_id) || - (state->snd_sequence_number != AURP_FirstSeqNum))) { - extern void AURPsndTickle(); - if (state->rcv_state == AURPSTATE_Connected) { - state->rcv_state = AURPSTATE_Unconnected; - untimeout(AURPsndTickle, state); - } - state->snd_state = AURPSTATE_Unconnected; - AURPcleanup(state); - AURPpurgeri(state->rem_node); - } - - /* update state info */ - if (state->snd_state == AURPSTATE_Unconnected) { - state->snd_state = AURPSTATE_Connected; - state->snd_sui = sui; - state->snd_connection_id = hdrp->connection_id; - state->snd_sequence_number = AURP_FirstSeqNum; - } - - /* send the packet */ - AURPsend(m, AUD_AURP, state->rem_node); - - /* open connection for the data receiver side if not yet connected */ - if (state->rcv_state == AURPSTATE_Unconnected) { - state->rcv_retry = 0; - state->tickle_retry = 0; - state->rcv_sequence_number = 0; - AURPsndOpenReq(state); - } -} - -/* */ -void AURPrcvOpenRsp(state, m) - aurp_state_t *state; - gbuf_t *m; -{ - extern void AURPsndTickle(); - short rc; - aurp_hdr_t *hdrp = (aurp_hdr_t *)gbuf_rptr(m); - - /* make sure we're in a valid state to accept it */ - if (state->rcv_state != AURPSTATE_WaitingForOpenRsp) { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPrcvOpenRsp: unexpected response\n")); - gbuf_freem(m); - return; - } - - /* check for the correct connection id */ - if (hdrp->connection_id != state->rcv_connection_id) { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPrcvOpenRsp: invalid connection id, r=%d, m=%d\n", - hdrp->connection_id, state->rcv_connection_id)); - gbuf_freem(m); - return; - } - - /* cancel the retry timer */ - untimeout(AURPsndOpenReq_locked, state); - state->rcv_tmo = 0; - state->rcv_retry = 0; - - /* update state info */ - state->rcv_sequence_number = AURP_FirstSeqNum; - state->rcv_env = hdrp->flags; - - /* check for error */ - rc = *(short *)(hdrp+1); - gbuf_freem(m); - if (rc < 0) { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPrcvOpenRsp: error=%d\n", rc)); - return; - } - - /* update state info */ - state->rcv_update_rate = (unsigned short)rc; - state->rcv_state = AURPSTATE_Connected; - dPrintf(D_M_AURP, D_L_TRACE, ("AURPrcvOpenRsp: moved rcv_state to AURPSTATE_Connected\n")); - - /* start tickle */ - timeout(AURPsndTickle, state, AURP_TickleRetryInterval*HZ); - - /* get routing info */ - AURPsndRIReq(state); -} - -#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/aurp_rd.c b/bsd/netat/aurp_rd.c deleted file mode 100644 index 30b15236b..000000000 --- a/bsd/netat/aurp_rd.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1996 Apple Computer, Inc. - * - * Created April 8, 1996 by Tuyen Nguyen - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - * - * File: rd.c - */ - -#ifdef AURP_SUPPORT - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -/* */ -void AURPsndRDReq(state) - aurp_state_t *state; -{ - int msize; - gbuf_t *m; - aurp_hdr_t *hdrp; - - if ((state->rcv_state == AURPSTATE_Unconnected) || - (state->snd_state == AURPSTATE_Unconnected)) - return; - - /* update state info */ - state->rcv_state = AURPSTATE_Unconnected; - state->snd_state = AURPSTATE_Unconnected; - - /* notify tunnel peer of router going-down for the data receiver side */ - msize = sizeof(aurp_hdr_t) + sizeof(short); - if ((m = (gbuf_t *)gbuf_alloc(msize, PRI_MED)) != 0) { - gbuf_wset(m,msize); - - /* construct the router down packet */ - hdrp = (aurp_hdr_t *)gbuf_rptr(m); - hdrp->connection_id = state->rcv_connection_id; - hdrp->sequence_number = 0; - hdrp->command_code = AURPCMD_RDReq; - hdrp->flags = 0; - *(short *)(hdrp+1) = AURPERR_NormalConnectionClose; - - /* send the packet */ - AURPsend(m, AUD_AURP, state->rem_node); - } - - /* notify tunnel peer of router going-down for the data sender side */ - msize = sizeof(aurp_hdr_t) + sizeof(short); - if ((m = (gbuf_t *)gbuf_alloc(msize, PRI_MED)) != 0) { - gbuf_wset(m,msize); - - /* construct the router down packet */ - hdrp = (aurp_hdr_t *)gbuf_rptr(m); - hdrp->connection_id = state->snd_connection_id; - hdrp->sequence_number = state->snd_sequence_number; - hdrp->command_code = AURPCMD_RDReq; - hdrp->flags = 0; - *(short *)(hdrp+1) = AURPERR_NormalConnectionClose; - - /* send the packet */ - AURPsend(m, AUD_AURP, state->rem_node); - } -} - -/* */ -void AURPrcvRDReq(state, m) - aurp_state_t *state; - gbuf_t *m; -{ - /* update state info */ - state->rcv_state = AURPSTATE_Unconnected; - state->snd_state = AURPSTATE_Unconnected; - AURPcleanup(state); - - /* purge all routes associated with the tunnel peer going-down */ - AURPpurgeri(state->rem_node); - - /* respond to the going-down peer with an RI Ack packet */ - AURPsndRIAck(state, m, 0); -} - -#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/aurp_ri.c b/bsd/netat/aurp_ri.c deleted file mode 100644 index c4f36f4a0..000000000 --- a/bsd/netat/aurp_ri.c +++ /dev/null @@ -1,850 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1996 Apple Computer, Inc. - * - * Created April 8, 1996 by Tuyen Nguyen - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - * - * File: ri.c - */ - -#ifdef AURP_SUPPORT - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - - -static void AURPsndRIRsp(aurp_state_t *); - -/* */ -void AURPsndRIAck(state, m, flags) - aurp_state_t *state; - gbuf_t *m; - unsigned short flags; -{ - unsigned short sequence_number; - aurp_hdr_t *hdrp; - int msize = sizeof(aurp_hdr_t); - - if (m) { - sequence_number = ((aurp_hdr_t *)gbuf_rptr(m))->sequence_number; - gbuf_wset(m,sizeof(aurp_hdr_t)); - } else { - sequence_number = state->rcv_sequence_number; - if ((m = (gbuf_t *)gbuf_alloc(msize, PRI_MED)) == 0) - return; - gbuf_wset(m,msize); - } - - /* construct the RI Ack packet */ - hdrp = (aurp_hdr_t *)gbuf_rptr(m); - hdrp->connection_id = state->rcv_connection_id; - hdrp->sequence_number = sequence_number; - hdrp->command_code = AURPCMD_RIAck; - hdrp->flags = flags; - - /* send the packet */ - dPrintf(D_M_AURP, D_L_INFO, ("AURPsndRIAck: node=%d\n", - state->rem_node)); - AURPsend(m, AUD_AURP, state->rem_node); -} - -/* locked version of AURPsndRIReq */ -void AURPsndRIReq_locked(state) - aurp_state_t *state; -{ - atalk_lock(); - AURPsndRIReq(state); - atalk_unlock(); -} - -/* */ -void AURPsndRIReq(state) - aurp_state_t *state; -{ - int msize; - gbuf_t *m; - aurp_hdr_t *hdrp; - - - if (state->rcv_state == AURPSTATE_Unconnected) { - return; - } - if (state->rcv_tmo && (state->rcv_state != AURPSTATE_WaitingForRIRsp)) { - return; - } - - msize = sizeof(aurp_hdr_t); - if ((m = (gbuf_t *)gbuf_alloc(msize, PRI_MED)) != 0) { - gbuf_wset(m,msize); - - /* construct the RI request packet */ - hdrp = (aurp_hdr_t *)gbuf_rptr(m); - hdrp->connection_id = state->rcv_connection_id; - hdrp->sequence_number = 0; - hdrp->command_code = AURPCMD_RIReq; - hdrp->flags = 0; - - /* update state info */ - state->rcv_state = AURPSTATE_WaitingForRIRsp; - - /* send the packet */ - dPrintf(D_M_AURP, D_L_INFO, ("AURPsndRIReq: node=%d\n", - state->rem_node)); - AURPsend(m, AUD_AURP, state->rem_node); - } - - /* start the retry timer */ - timeout(AURPsndRIReq_locked, state, AURP_RetryInterval*HZ); - state->rcv_tmo = 1; -} - -/* locked version of AURPsndRIRsp */ -void AURPsndRIRsp_locked(state) - aurp_state_t *state; -{ - atalk_lock(); - AURPsndRIRsp(state); - atalk_unlock(); -} - -/* */ -void AURPsndRIRsp(state) - aurp_state_t *state; -{ - gbuf_t *m; - aurp_hdr_t *hdrp; - short len = 0; - int msize = 0; - - - /* make sure we're in a valid state to send RI response */ - if ((state->snd_state == AURPSTATE_Unconnected) || - (state->snd_state == AURPSTATE_WaitingForRIAck2)) { - return; - } - - /* update state info */ - state->snd_state = AURPSTATE_WaitingForRIAck1; - - if (state->rsp_m == 0) { - msize = sizeof(aurp_hdr_t); - if ((m = (gbuf_t *)gbuf_alloc(msize+AURP_MaxPktSize, PRI_MED)) == 0) { - timeout(AURPsndRIRsp_locked, state, AURP_RetryInterval*HZ); - state->snd_tmo = 1; - return; - } - gbuf_wset(m,msize); - state->rsp_m = m; - - /* construct the RI response packet */ - hdrp = (aurp_hdr_t *)gbuf_rptr(m); - hdrp->connection_id = state->snd_connection_id; - hdrp->sequence_number = state->snd_sequence_number; - hdrp->command_code = AURPCMD_RIRsp; - hdrp->flags = 0; - - /* get routing info of the local networks */ - state->snd_next_entry = AURPgetri( - state->snd_next_entry, gbuf_wptr(m), &len); - gbuf_winc(m,len); - - /* set the last flag if this is the last response packet */ - if (!state->snd_next_entry) - hdrp->flags = AURPFLG_LAST; - } - - /* keep a copy of the packet for retry */ - m = (gbuf_t *)gbuf_dupb(state->rsp_m); - - /* start the retry timer */ - timeout(AURPsndRIRsp_locked, state, AURP_RetryInterval*HZ); - state->snd_tmo = 1; - - - /* send the packet */ - if (m) { - dPrintf(D_M_AURP, D_L_INFO, ("AURPsndRIRsp: len=%d\n", len)); - AURPsend(m, AUD_AURP, state->rem_node); - } - -} - -void AURPsndRIUpd_locked(state) - aurp_state_t *state; -{ - atalk_lock(); - AURPsndRIUpd(state); - atalk_unlock(); -} - -/* */ -void AURPsndRIUpd(state) - aurp_state_t *state; -{ - gbuf_t *m; - aurp_hdr_t *hdrp; - short len = 0; - int s, msize = 0; - - - /* make sure we're in a valid state to send update */ - if (state->snd_next_entry || (state->upd_m == 0) || - (state->snd_state == AURPSTATE_Unconnected) || - (state->snd_state == AURPSTATE_WaitingForRIAck1)) { - return; - } - - /* update state info */ - state->snd_state = AURPSTATE_WaitingForRIAck2; - - if (state->snd_tmo == 0) { - msize = sizeof(aurp_hdr_t); - m = state->upd_m; - len = gbuf_len(m); - gbuf_rdec(m,msize); - - /* construct the RI update packet */ - hdrp = (aurp_hdr_t *)gbuf_rptr(m); - hdrp->connection_id = state->snd_connection_id; - hdrp->sequence_number = state->snd_sequence_number; - hdrp->command_code = AURPCMD_RIUpd; - hdrp->flags = 0; - } - - /* keep a copy of the packet for retry */ - m = (gbuf_t *)gbuf_dupb(state->upd_m); - - /* start the retry timer */ - timeout(AURPsndRIUpd_locked, state, AURP_RetryInterval*HZ); - state->snd_tmo = 1; - - - /* send the packet */ - if (m) { - dPrintf(D_M_AURP, D_L_INFO, ("AURPsndRIUpd: len=%d\n", len)); - AURPsend(m, AUD_AURP, state->rem_node); - } - -} - -/* */ -void AURPrcvRIReq(state, m) - aurp_state_t *state; - gbuf_t *m; -{ - aurp_hdr_t *hdrp = (aurp_hdr_t *)gbuf_rptr(m); - int s; - - - /* make sure we're in a valid state to accept it */ - if ((state->snd_state == AURPSTATE_Unconnected) || - (state->snd_state == AURPSTATE_WaitingForRIAck2)) { - dPrintf(D_M_AURP, D_L_WARNING, ("AURPrcvRIReq: unexpected request\n")); - gbuf_freem(m); - return; - } - - /* check for the correct connection id */ - if (hdrp->connection_id != state->snd_connection_id) { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPrcvRIReq: invalid connection id, r=%d, m=%d\n", - hdrp->connection_id, state->snd_connection_id)); - gbuf_freem(m); - return; - } - - if (state->snd_state != AURPSTATE_WaitingForRIAck1) { - state->snd_next_entry = 0; - if (state->rsp_m) { - gbuf_freem(state->rsp_m); - state->rsp_m = 0; - } - AURPsndRIRsp(state); - } - - gbuf_freem(m); -} - -/* */ -void AURPrcvRIRsp(state, m) - aurp_state_t *state; - gbuf_t *m; -{ - aurp_hdr_t *hdrp = (aurp_hdr_t *)gbuf_rptr(m); - - - /* make sure we're in a valid state to accept it */ - if (state->rcv_state != AURPSTATE_WaitingForRIRsp) { - dPrintf(D_M_AURP, D_L_WARNING, ("AURPrcvRIRsp: unexpected response\n")); - gbuf_freem(m); - return; - } - - /* check for the correct connection id */ - if (hdrp->connection_id != state->rcv_connection_id) { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPrcvRIRsp: invalid connection id, r=%d, m=%d\n", - hdrp->connection_id, state->rcv_connection_id)); - gbuf_freem(m); - return; - } - - /* check for the correct sequence number */ - if (hdrp->sequence_number != state->rcv_sequence_number) { - if ( ((state->rcv_sequence_number == AURP_FirstSeqNum) && - (hdrp->sequence_number == AURP_LastSeqNum)) || - (hdrp->sequence_number == (state->rcv_sequence_number-1)) ) { - AURPsndRIAck(state, m, AURPFLG_SZI); - } else { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPrcvRIRsp: invalid sequence number, r=%d, m=%d\n", - hdrp->sequence_number, state->rcv_sequence_number)); - gbuf_freem(m); - } - return; - } - gbuf_rinc(m,sizeof(*hdrp)); - if (hdrp->flags & AURPFLG_LAST) - state->rcv_state = AURPSTATE_Connected; - - dPrintf(D_M_AURP, D_L_INFO, ("AURPrcvRIRsp: len=%ld\n", gbuf_len(m))); - - /* cancel the retry timer */ - untimeout(AURPsndRIReq_locked, state); - state->rcv_tmo = 0; - - /* send RI ack */ - AURPsndRIAck(state, 0, AURPFLG_SZI); - - /* update state info */ - if (++state->rcv_sequence_number == 0) - state->rcv_sequence_number = AURP_FirstSeqNum; - - /* process routing info of the tunnel peer */ - if (AURPsetri(state->rem_node, m)) { - dPrintf(D_M_AURP, D_L_ERROR, ("AURPrcvRIRsp: AURPsetri() error\n")); - } - gbuf_freem(m); - - /* set the get zone flag to get zone info later if required */ - if (state->rcv_state == AURPSTATE_Connected) - state->get_zi = 1; -} - -/* */ -void AURPrcvRIUpd(state, m) - aurp_state_t *state; - gbuf_t *m; -{ - aurp_hdr_t *hdrp = (aurp_hdr_t *)gbuf_rptr(m); - - /* make sure we're in a valid state to accept it */ - if (state->rcv_state == AURPSTATE_Unconnected) { - dPrintf(D_M_AURP, D_L_WARNING, ("AURPrcvRIUpd: unexpected response\n")); - gbuf_freem(m); - return; - } - - /* check for the correct connection id */ - if (hdrp->connection_id != state->rcv_connection_id) { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPrcvRIUpd: invalid connection id, r=%d, m=%d\n", - hdrp->connection_id, state->rcv_connection_id)); - gbuf_freem(m); - return; - } - - /* check for the correct sequence number */ - if (hdrp->sequence_number != state->rcv_sequence_number) { - if ( ((state->rcv_sequence_number == AURP_FirstSeqNum) && - (hdrp->sequence_number == AURP_LastSeqNum)) || - (hdrp->sequence_number == (state->rcv_sequence_number-1)) ) { - AURPsndRIAck(state, m, AURPFLG_SZI); - } else { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPrcvRIUpd: invalid sequence number, r=%d, m=%d\n", - hdrp->sequence_number, state->rcv_sequence_number)); - gbuf_freem(m); - } - return; - } - gbuf_rinc(m,sizeof(*hdrp)); - - dPrintf(D_M_AURP, D_L_INFO, ("AURPrcvRIUpd: len=%ld\n", gbuf_len(m))); - - /* send RI ack */ - AURPsndRIAck(state, 0, AURPFLG_SZI); - - /* update state info */ - if (++state->rcv_sequence_number == 0) - state->rcv_sequence_number = AURP_FirstSeqNum; - - /* process update routing info of the tunnel peer */ - if (AURPupdateri(state->rem_node, m)) { - dPrintf(D_M_AURP, D_L_ERROR, ("AURPrcvRIUpd: AURPupdateri() error\n")); - } - - /* set the get zone flag to get zone info later if required */ - state->get_zi = 1; - - gbuf_freem(m); -} - -/* */ -void AURPrcvRIAck(state, m) - aurp_state_t *state; - gbuf_t *m; -{ - gbuf_t *dat_m; - aurp_hdr_t *hdrp = (aurp_hdr_t *)gbuf_rptr(m); - unsigned char snd_state; - int flag; - - dPrintf(D_M_AURP, D_L_INFO, ("AURPrcvRIAck: state=%d\n", - state->snd_state)); - - /* make sure we're in a valid state to accept it */ - snd_state = state->snd_state; - if (((snd_state == AURPSTATE_WaitingForRIAck1) || - (snd_state == AURPSTATE_WaitingForRIAck2)) && - (hdrp->sequence_number == state->snd_sequence_number)) { - - if (snd_state == AURPSTATE_WaitingForRIAck1) { - /* ack from the tunnel peer to our RI response */ - untimeout(AURPsndRIRsp_locked, state); - dat_m = state->rsp_m; - state->rsp_m = 0; - flag = 1; - } else { - /* ack from the tunnel peer to our RI update */ - untimeout(AURPsndRIUpd_locked, state); - dat_m = state->upd_m; - state->upd_m = 0; - flag = 2; - } - state->snd_tmo = 0; - gbuf_rinc(dat_m,sizeof(aurp_hdr_t)); - - /* increment the sequence number */ - if (++state->snd_sequence_number == 0) - state->snd_sequence_number = AURP_FirstSeqNum; - - /* update state info */ - state->snd_state = AURPSTATE_Connected; - - if (state->snd_next_entry) /* more RI responses to send? */ - AURPsndRIRsp(state); - - /* check to see if we need to send ZI responses */ - if (hdrp->flags & AURPFLG_SZI) - AURPsndZRsp(state, dat_m, flag); - else if (dat_m) - gbuf_freem(dat_m); - } - - gbuf_freem(m); -} - -/* */ -int AURPgetri(next_entry, buf, len) - short next_entry; - unsigned char *buf; - short *len; -{ - short entry_num = next_entry; - RT_entry *entry = (RT_entry *)&RT_table[next_entry]; - - for (*len=0; entry_num < RT_maxentry; entry_num++,entry++) { - if ((net_port != entry->NetPort) && - !(entry->AURPFlag & AURP_NetHiden)) { - if ((entry->EntryState & 0x0F) >= RTE_STATE_SUSPECT) { - if (entry->NetStart) { - /* route info for extended network */ - *(short *)buf = entry->NetStart; - buf += sizeof(short); - *buf++ = 0x80 | (entry->NetDist & 0x1F); - *(short *)buf = entry->NetStop; - buf += sizeof(short); - *buf++ = 0; - *len += 6; - } else { - /* route info for non-extended network */ - *(short *)buf = entry->NetStop; - buf += sizeof(short); - *buf++ = (entry->NetDist & 0x1F); - *len += 3; - } - } - } - if (*len > AURP_MaxPktSize) - break; - } - - return (entry_num == RT_maxentry) ? 0 : entry_num; -} - -/* */ -int AURPsetri(node, m) - unsigned char node; - gbuf_t *m; -{ - int tuples_cnt; - unsigned char *tuples_ptr; - RT_entry new_rt, *curr_rt; - - new_rt.NextIRNet = 0; - new_rt.NextIRNode = node; - new_rt.NetPort = net_port; - - /* - * Process all the tuples against our routing table - */ - tuples_ptr = (char *)gbuf_rptr(m); - tuples_cnt = (gbuf_len(m))/3; - - while (tuples_cnt--) { - new_rt.NetDist = TUPLEDIST(tuples_ptr) + 1; - new_rt.EntryState = RTE_STATE_GOOD; - new_rt.NetStart = TUPLENET(tuples_ptr); - tuples_ptr += 3; - if (tuples_ptr[-1] & 0x80) { - new_rt.NetStop = TUPLENET((tuples_ptr)); - tuples_ptr += 3; - tuples_cnt--; - } else { - new_rt.NetStop = new_rt.NetStart; - new_rt.NetStart = 0; - } - if ((new_rt.NetStop == 0) || (new_rt.NetStop < new_rt.NetStart)) { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPsetri: %d, invalid tuple received [%d-%d]\n", - net_port, new_rt.NetStart, new_rt.NetStop)); - continue; - } - - if ((curr_rt = rt_blookup(new_rt.NetStop)) != 0) { /* found? */ - /* ignore loop if present */ - if (curr_rt->NetPort != net_port) - continue; - - if (new_rt.NetDist < 16) { - /* - * check if the definition of the route has changed - */ - if ((new_rt.NetStop != curr_rt->NetStop) || - (new_rt.NetStart != curr_rt->NetStart)) { - if ((new_rt.NetStop == curr_rt->NetStop) && - (new_rt.NetStop == curr_rt->NetStart) && - (new_rt.NetStart == 0)) { - new_rt.NetStart = new_rt.NetStop; - } else if ((new_rt.NetStop == curr_rt->NetStop) && - (new_rt.NetStart == new_rt.NetStop) && - (curr_rt->NetStart == 0)) { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPsetri: [%d-%d] has changed to [%d-%d], Dist=%d\n", - curr_rt->NetStart, curr_rt->NetStop, - new_rt.NetStart, new_rt.NetStop, new_rt.NetDist)); - new_rt.NetStart = 0; - } else { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPsetri: Net Conflict, Curr=[%d-%d], New=[%d-%d]\n", - curr_rt->NetStart,curr_rt->NetStop, - new_rt.NetStart,new_rt.NetStop)); - zt_remove_zones(curr_rt->ZoneBitMap); - rt_delete(curr_rt->NetStop, curr_rt->NetStart); - continue; - } - } - } - - if ((new_rt.NetDist <= curr_rt->NetDist) && - (new_rt.NetDist < 16)) { - /* - * found a shorter or more recent route, - * replace with the new entry - */ - curr_rt->NetDist = new_rt.NetDist; - curr_rt->NextIRNode = new_rt.NextIRNode; - dPrintf(D_M_AURP_LOW,D_L_INFO, - ("AURPsetri: shorter route found [%d-%d], update\n", - new_rt.NetStart,new_rt.NetStop)); - } - - } else { /* no entry found */ - if (new_rt.NetDist < 16) { - new_rt.EntryState = RTE_STATE_GOOD; - dPrintf(D_M_AURP, D_L_INFO, - ("AURPsetri: new_rt [%d-%d], tuple #%d\n", - new_rt.NetStart, new_rt.NetStop, tuples_cnt)); - if (rt_insert(new_rt.NetStop, new_rt.NetStart, - new_rt.NextIRNet, new_rt.NextIRNode, - new_rt.NetDist, new_rt.NetPort, - new_rt.EntryState) == (RT_entry *)0) { - dPrintf(D_M_AURP,D_L_ERROR, - ("AURPsetri: RTMP table full [%d-%d]\n", - new_rt.NetStart,new_rt.NetStop)); - return -1; - } - } - } - } /* end of main while */ - - return 0; -} - -/* */ -int AURPupdateri(node, m) - unsigned char node; - gbuf_t *m; -{ - char ev, ev_len; - RT_entry new_rt, *old_rt; - - while (gbuf_len(m) > 0) { - ev = *gbuf_rptr(m); /* event code */ - gbuf_rinc(m,1); - if (gbuf_rptr(m)[2] & 0x80) { - /* event tuple for extended network */ - new_rt.NetStart = *(unsigned short *)gbuf_rptr(m); - new_rt.NetStop = *(unsigned short *)&gbuf_rptr(m)[3]; - new_rt.NetDist = gbuf_rptr(m)[2] & 0x7f; - ev_len = 5; - } else { - /* event tuple for non-extended network */ - new_rt.NetStart = 0; - new_rt.NetStop = *(unsigned short *)gbuf_rptr(m); - new_rt.NetDist = gbuf_rptr(m)[2]; - ev_len = 3; - } - - switch (ev) { - case AURPEV_Null: - break; - - case AURPEV_NetAdded: - gbuf_rinc(m,ev_len); - new_rt.NextIRNet = 0; - new_rt.NextIRNode = node; - new_rt.NetPort = net_port; - if ((new_rt.NetDist == 0) || (new_rt.NetStop == 0) || - (new_rt.NetStop < new_rt.NetStart)) { - dPrintf(D_M_AURP,D_L_WARNING, - ("AURPupdateri: %d, invalid NetAdded received [%d-%d]\n", - net_port, new_rt.NetStart, new_rt.NetStop)); - break; - } - - if ((old_rt = rt_blookup(new_rt.NetStop)) != 0) { /* found? */ - if (old_rt->NetPort == net_port) { - /* - * process this event as if it was an NDC event; - * update the route's distance - */ - old_rt->NetDist = new_rt.NetDist; - } - } else { -l_add: if ((new_rt.NetDist < 16) && (new_rt.NetDist != NOTIFY_N_DIST)) { - new_rt.EntryState = RTE_STATE_GOOD; - dPrintf(D_M_AURP, D_L_INFO, - ("AURPupdateri: NetAdded [%d-%d]\n", - new_rt.NetStart, new_rt.NetStop)); - if (rt_insert(new_rt.NetStop, new_rt.NetStart, - new_rt.NextIRNet, new_rt.NextIRNode, - new_rt.NetDist, new_rt.NetPort, - new_rt.EntryState) == (RT_entry *)0) { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPupdateri: RTMP table full [%d-%d]\n", - new_rt.NetStart,new_rt.NetStop)); - return 0; - } - } - } - break; - - case AURPEV_NetDeleted: - case AURPEV_NetRouteChange: - gbuf_rinc(m,ev_len); -l_delete: if ((old_rt = rt_blookup(new_rt.NetStop)) != 0) { /* found? */ - if (old_rt->NetPort == net_port) { - zt_remove_zones(old_rt->ZoneBitMap); - rt_delete(old_rt->NetStop, old_rt->NetStart); - } - } - break; - - case AURPEV_NetDistChange: - gbuf_rinc(m,ev_len); - if (new_rt.NetDist == 15) - goto l_delete; /* process this event as if was an ND event */ - if ((old_rt = rt_blookup(new_rt.NetStop)) != 0) { /* found? */ - if (old_rt->NetPort == net_port) { - /* - * update the route's distance - */ - old_rt->NetDist = new_rt.NetDist; - } - } else - goto l_add; /* process this event as if was an NA event */ - break; - - case AURPEV_NetZoneChange: - break; - } - } - - return 0; -} - -/* */ -void AURPpurgeri(node) - unsigned char node; -{ - short entry_num; - RT_entry *entry = (RT_entry *)RT_table; - - /* - * purge all routes associated with the tunnel peer - */ - for (entry_num=0; entry_num < RT_maxentry; entry_num++,entry++) { - if ((net_port == entry->NetPort) && (node == entry->NextIRNode)) { - zt_remove_zones(entry->ZoneBitMap); - rt_delete(entry->NetStop, entry->NetStart); - } - } -} - -/* */ -void AURPrtupdate(entry, ev) - RT_entry *entry; - unsigned char ev; -{ - unsigned char i, node, ev_len, ev_tuple[6]; - gbuf_t *m; - aurp_state_t *state = (aurp_state_t *)&aurp_state[1]; - int s, msize = sizeof(aurp_hdr_t); - - dPrintf(D_M_AURP, D_L_TRACE, ("AURPrtupdate: event=%d, net=[%d-%d]\n", - ev, entry->NetStart, entry->NetStop)); - - /* - * check that the network can be exported; if not, - * we must not make it visible beyond the local networks - */ - if (net_export) { - for (i=0; i < net_access_cnt; i++) { - if ((net_access[i] == entry->NetStart) || - (net_access[i] == entry->NetStop)) - break; - } - if (i == net_access_cnt) - return; - } else { - for (i=0; i < net_access_cnt; i++) { - if ((net_access[i] == entry->NetStart) || - (net_access[i] == entry->NetStop)) - return; - } - } - - /* - * create the update event tuple - */ - ev_tuple[0] = ev; /* event code */ - if (entry->NetStart) { - *(unsigned short *)&ev_tuple[1] = entry->NetStart; - ev_tuple[3] = 0x80 | (entry->NetDist & 0x1F); - *(unsigned short *)&ev_tuple[4] = entry->NetStop; - ev_len = 6; - } else { - *(unsigned short *)&ev_tuple[1] = entry->NetStop; - ev_tuple[3] = (entry->NetDist & 0x1F); - ev_len = 4; - } - - for (node=1; node <= dst_addr_cnt; node++, state++) { - if ((ev == AURPEV_NetAdded) && - (!(state->snd_sui & AURPFLG_NA))) continue; - if ((ev == AURPEV_NetDeleted) && - (!(state->snd_sui & AURPFLG_ND))) continue; - if ((ev == AURPEV_NetDistChange) && - (!(state->snd_sui & AURPFLG_NDC))) continue; - if ((state->snd_state != AURPSTATE_Unconnected) && - (state->snd_state != AURPSTATE_WaitingForRIAck2)) { - if ((m = state->upd_m) == 0) { - /* - * we don't have the RI update buffer yet, allocate one - */ - if ((m = (gbuf_t *)gbuf_alloc(msize+AURP_MaxPktSize, PRI_HI)) == 0) - continue; - state->upd_m = m; - gbuf_rinc(m,msize); - gbuf_wset(m,0); - } - - /* - * add the update event tuple to the RI update buffer; - * the RI update buffer will be sent when the periodic update - * timer expires - */ - bcopy(ev_tuple, gbuf_wptr(m), ev_len); - gbuf_winc(m,ev_len); - - /* - * if the RI update buffer is full, send the RI update now - */ - if (gbuf_len(m) > (AURP_MaxPktSize-6)) { - AURPsndRIUpd(state); - continue; - } - } - } -} - -#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/aurp_rx.c b/bsd/netat/aurp_rx.c deleted file mode 100644 index 416345fac..000000000 --- a/bsd/netat/aurp_rx.c +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1996 Apple Computer, Inc. - * - * Created April 8, 1996 by Tuyen Nguyen - * Modified for Kernel execution, May, 1996, Justin C. Walker - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - * - * File: rx.c - */ -#ifdef AURP_SUPPORT - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -/* - * Not using the stream queue for data; keep this around to handle - * requests from the user proc (mostly setup). - */ -int -aurp_wput(gref, m) - gref_t *gref; - gbuf_t *m; -{ - register ioc_t *iocbp; - register gbuf_t *mdata; - register int temp, error; - - switch (gbuf_type(m)) { - - case MSG_IOCTL: - iocbp = (ioc_t *)gbuf_rptr(m); - switch (iocbp->ioc_cmd) { - case AUC_CFGTNL: /* set up a tunnel, init the AURP daemon */ - mdata = gbuf_cont(m); - temp = (int)(*gbuf_rptr(mdata)); - if (temp != dst_addr_cnt) { - AURPiocnak(gref, m, ENOSPC); - return 0; - } - if ((error = aurpd_start()) != 0) { - AURPiocnak(gref, m, error); - return 0; - } - if (AURPinit()) { - AURPiocnak(gref, m, ENOMEM); - return 0; - } - ddp_AURPfuncx(AURPCODE_AURPPROTO, 0, 0); - AURPaccess(); - break; - - case AUC_SHTDOWN: /* shutdown AURP operation */ - AURPshutdown(); - break; - - case AUC_EXPNET: /* configure networks to be exported */ - case AUC_HIDENET: /* configure networks to be hiden */ - mdata = gbuf_cont(m); - net_access_cnt = (gbuf_len(mdata))/sizeof(short); - if ((net_access_cnt==0) || (net_access_cnt>AURP_MaxNetAccess)) { - AURPiocnak(gref, m, EINVAL); - return 0; - } - bcopy(gbuf_rptr(mdata), net_access, - gbuf_len(mdata)); - if (iocbp->ioc_cmd == AUC_EXPNET) - net_export = 1; - break; - - case AUC_UDPPORT: - mdata = gbuf_cont(m); - aurp_global.udp_port = *(char *)gbuf_rptr(mdata); - break; - - case AUC_NETLIST: - mdata = gbuf_cont(m); - /* - * Compute # addrs, Save for later check - * We cheat with a shift. - */ - dst_addr_cnt = ((gbuf_len(mdata)) >> 2)-1; - bcopy(gbuf_rptr(mdata), &aurp_global.dst_addr, - gbuf_len(mdata)); - aurp_global.src_addr = aurp_global.dst_addr[0]; - aurp_global.dst_addr[0] = 0; - break; - - default: - AURPiocnak(gref, m, EINVAL); - return 0; - } - AURPiocack(gref, m); - break; - - default: - dPrintf(D_M_AURP, D_L_WARNING, - ("aurp_wput: bad msg type=%d\n", gbuf_type(m))); - gbuf_freem(m); - break; - } - - return 0; -} - -/* - * Insert an appletalk packet into the appletalk stack. - * If it's an AURP data packet, just send it up; if it's AURP protocol, - * switch out here. - */ - -int -at_insert(m, type, node) - register gbuf_t *m; - register unsigned int type, node; -{ - register aurp_hdr_t *hdrp; - register aurp_state_t *state; - - if (type == AUD_Atalk) - /* non-AURP proto packet */ - ddp_AURPfuncx(AURPCODE_DATAPKT, m, node); - else - { /* AURP proto packet */ - state = (aurp_state_t *)&aurp_state[node]; - state->tickle_retry = 0; - hdrp = (aurp_hdr_t *)gbuf_rptr(m); - - switch (hdrp->command_code) { - case AURPCMD_RIUpd: - AURPrcvRIUpd(state, m); break; - - case AURPCMD_RIReq: - AURPrcvRIReq(state, m); break; - - case AURPCMD_RIRsp: - AURPrcvRIRsp(state, m); break; - - case AURPCMD_RIAck: - AURPrcvRIAck(state, m); break; - - case AURPCMD_ZReq: - AURPrcvZReq(state, m); break; - - case AURPCMD_ZRsp: - AURPrcvZRsp(state, m); break; - - case AURPCMD_OpenReq: - AURPrcvOpenReq(state, m); break; - - case AURPCMD_OpenRsp: - AURPrcvOpenRsp(state, m); break; - - case AURPCMD_Tickle: - AURPrcvTickle(state, m); break; - - case AURPCMD_TickleAck: - AURPrcvTickleAck(state, m); break; - - case AURPCMD_RDReq: - AURPrcvRDReq(state, m); break; - - default: - dPrintf(D_M_AURP, D_L_WARNING, - ("at_insert: bad proto cmd=%d\n", - hdrp->command_code)); - gbuf_freem(m); - } - } - - return 0; -} - -#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/aurp_tickle.c b/bsd/netat/aurp_tickle.c deleted file mode 100644 index 9b68f9447..000000000 --- a/bsd/netat/aurp_tickle.c +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1996 Apple Computer, Inc. - * - * Created April 8, 1996 by Tuyen Nguyen - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - * - * File: tickle.c - */ - -#ifdef AURP_SUPPORT - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -/* */ -void AURPsndTickle(state) - aurp_state_t *state; -{ - int msize; - gbuf_t *m; - aurp_hdr_t *hdrp; - - atalk_lock(); - - if (state->rcv_state == AURPSTATE_Unconnected) { - atalk_unlock(); - return; - } - /* stop trying if the retry count exceeds the maximum retry value */ - if (++state->tickle_retry > AURP_MaxTickleRetry) { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPsndTickle: no response, %d\n", state->rem_node)); - /* - * the tunnel peer seems to have disappeared, update state info - */ - state->snd_state = AURPSTATE_Unconnected; - state->rcv_state = AURPSTATE_Unconnected; - state->tickle_retry = 0; - AURPcleanup(state); - - /* purge all routes associated with the tunnel peer */ - AURPpurgeri(state->rem_node); - atalk_unlock(); - return; - } - - if (state->tickle_retry > 1) { - msize = sizeof(aurp_hdr_t); - if ((m = (gbuf_t *)gbuf_alloc(msize, PRI_MED)) != 0) { - gbuf_wset(m,msize); - - /* construct the tickle packet */ - hdrp = (aurp_hdr_t *)gbuf_rptr(m); - hdrp->connection_id = state->rcv_connection_id; - hdrp->sequence_number = 0; - hdrp->command_code = AURPCMD_Tickle; - hdrp->flags = 0; - - /* send the packet */ - AURPsend(m, AUD_AURP, state->rem_node); - } - } - - /* start the retry timer */ - timeout(AURPsndTickle, state, AURP_TickleRetryInterval*HZ); - - atalk_unlock(); -} - -/* */ -void AURPrcvTickle(state, m) - aurp_state_t *state; - gbuf_t *m; -{ - aurp_hdr_t *hdrp = (aurp_hdr_t *)gbuf_rptr(m); - - /* make sure we're in a valid state to accept it */ - if (state->snd_state == AURPSTATE_Unconnected) { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPrcvTickle: unexpected request\n")); - gbuf_freem(m); - return; - } - - /* construct the tickle ack packet */ - gbuf_wset(m,sizeof(aurp_hdr_t)); - hdrp->command_code = AURPCMD_TickleAck; - hdrp->flags = 0; - - /* send the packet */ - AURPsend(m, AUD_AURP, state->rem_node); -} - -/* */ -void AURPrcvTickleAck(state, m) - aurp_state_t *state; - gbuf_t *m; -{ - aurp_hdr_t *hdrp = (aurp_hdr_t *)gbuf_rptr(m); - - /* make sure we're in a valid state to accept it */ - if (state->rcv_state == AURPSTATE_Unconnected) { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPrcvTickleAck: unexpected response\n")); - gbuf_freem(m); - return; - } - - /* check for the correct connection id */ - if (hdrp->connection_id != state->rcv_connection_id) { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPrcvTickleAck: invalid connection id, r=%d, m=%d\n", - hdrp->connection_id, state->rcv_connection_id)); - gbuf_freem(m); - return; - } - gbuf_freem(m); - - /* update state info */ - state->tickle_retry = 0; -} - -#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/aurp_tx.c b/bsd/netat/aurp_tx.c deleted file mode 100644 index dcdc11c1f..000000000 --- a/bsd/netat/aurp_tx.c +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1996 Apple Computer, Inc. - * - * Created April 8, 1996 by Tuyen Nguyen - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - * - * File: tx.c - */ - -#ifdef AURP_SUPPORT - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -/* - * Any AURP protocol or appletalk data (ddp) packets flowing through - * are inserted into the kernel aurpd process's (atalk) input queue. - * Assume here that we deal with single packets, i.e., someone earlier - * in the food chain has broken up packet chains. - */ -void AURPsend(mdata, type, node) - gbuf_t *mdata; - int type, node; -{ - struct aurp_domain *domain; - gbuf_t *m; - int msize = AT_WR_OFFSET+32+IP_DOMAINSIZE; - - /* Add the domain header */ - if ((m = gbuf_alloc(msize, PRI_MED)) == 0) { - gbuf_freem(mdata); - dPrintf(D_M_AURP, D_L_WARNING, ("AURPsend: gbuf_alloc failed\n")); - return; - } - gbuf_wset(m,msize); - gbuf_rinc(m,AT_WR_OFFSET+32); - gbuf_cont(m) = mdata; - domain = (struct aurp_domain *)gbuf_rptr(m); - domain->dst_length = IP_LENGTH; - domain->dst_authority = IP_AUTHORITY; - domain->dst_distinguisher = IP_DISTINGUISHER; - domain->src_length = IP_LENGTH; - domain->src_authority = IP_AUTHORITY; - domain->src_distinguisher = IP_DISTINGUISHER; - domain->src_address = aurp_global.src_addr; - domain->version = AUD_Version; - domain->reserved = 0; - domain->type = type; - domain->dst_address = aurp_global.dst_addr[node]; - atalk_to_ip(m); -} - -/* - * Called from within ddp (via ddp_AURPsendx) to handle data (DDP) packets - * sent from the AppleTalk stack, routing updates, and routing info - * initialization. - */ -void AURPcmdx(code, mdata, param) - int code; - gbuf_t *mdata; - int param; -{ - unsigned char node; - gbuf_t *mdata_next; - - if (mdata == 0) - return; - if (aurp_gref == 0) { - if (code != AURPCODE_DEBUGINFO) - AURPfreemsg(mdata); - return; - } - - switch (code) { - case AURPCODE_DATAPKT: /* data packet */ - node = (unsigned char)param; - if (gbuf_next(mdata)) { - mdata_next = gbuf_next(mdata); - gbuf_next(mdata) = 0; - AURPsend(mdata, AUD_Atalk, node); - do { - mdata = mdata_next; - mdata_next = gbuf_next(mdata); - gbuf_next(mdata) = 0; - /* Indicate non-AURP packet, node id of peer */ - AURPsend(mdata, AUD_Atalk, node); - } while (mdata_next); - } else - AURPsend(mdata, AUD_Atalk, node); - break; - - case AURPCODE_RTUPDATE: - AURPrtupdate((RT_entry *)mdata, param); - break; - - case AURPCODE_DEBUGINFO: /* debug info */ - dbgBits = *(dbgBits_t *)mdata; - net_port = param; - break; - - default: - dPrintf(D_M_AURP, D_L_ERROR, ("AURPcmdx: bad code, %d\n", code)); - } -} - -#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/aurp_zi.c b/bsd/netat/aurp_zi.c deleted file mode 100644 index b70b3ecf6..000000000 --- a/bsd/netat/aurp_zi.c +++ /dev/null @@ -1,624 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1996 Apple Computer, Inc. - * - * Created April 8, 1996 by Tuyen Nguyen - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - * - * File: zi.c - */ - -#ifdef AURP_SUPPORT - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -static int AURPgetzi(int, unsigned char *, short *, gbuf_t *, int); -static void AURPsetzi(unsigned char, gbuf_t *, short, short); - -/* */ -void AURPsndZReq(state) - aurp_state_t *state; -{ - gbuf_t *m; - int msize; - aurp_hdr_t *hdrp; - short *net, nets_cnt, net_sent=0, entry_num=0; - RT_entry *entry = RT_table; - - if (!state->get_zi || (state->rcv_state == AURPSTATE_Unconnected)) - return; - -l_more: - msize = sizeof(aurp_hdr_t); - if ((m = (gbuf_t *)gbuf_alloc(msize+AURP_MaxPktSize, PRI_MED)) == 0) { - dPrintf(D_M_AURP, D_L_WARNING, ("AURPsndZReq: node=%d, out of mblk\n", - state->rem_node)); - return; - } - gbuf_wset(m,msize); - - /* construct the ZI request packet */ - hdrp = (aurp_hdr_t *)gbuf_rptr(m); - hdrp->connection_id = state->rcv_connection_id; - hdrp->sequence_number = 0; - hdrp->command_code = AURPCMD_ZReq; - hdrp->flags = 0; - *(short *)(hdrp+1) = AURPSUBCODE_ZoneInfo1; - gbuf_winc(m,sizeof(short)); - - net = (short *)gbuf_wptr(m); - nets_cnt = 0; - - while (entry_num < RT_maxentry) { - /* - * scan the router table, and build the ZI request packet - * with the right entries, i.e., - * - entry in use and not of the net_port - * - with no zones and in an active state - * - talking to the right router - */ - if ( (entry->NetPort == net_port) && entry->NetStop && - ((entry->EntryState & 0x0F) >= RTE_STATE_SUSPECT) && - (!RT_ALL_ZONES_KNOWN(entry)) ) { - *net++ = (entry->NetStart) ? entry->NetStart : entry->NetStop; - nets_cnt++; - } - - if (nets_cnt >= 640) { - /* query only 640 networks per packet */ - dPrintf(D_M_AURP, D_L_INFO, ("AURPsndZReq: node=%d\n", - state->rem_node)); - gbuf_winc(m,(nets_cnt * sizeof(short))); - AURPsend(m, AUD_AURP, state->rem_node); - net_sent = 1; - goto l_more; - } - - entry_num++; - entry++; - } - - if (nets_cnt) { - dPrintf(D_M_AURP, D_L_INFO, ("AURPsndZReq: node=%d\n", - state->rem_node)); - gbuf_winc(m,(nets_cnt * sizeof(short))); - AURPsend(m, AUD_AURP, state->rem_node); - net_sent = 1; - } else - gbuf_freeb(m); - - if (!net_sent) - state->get_zi = 0; -} - -/* */ -void AURPsndZRsp(state, dat_m, flag) - aurp_state_t *state; - gbuf_t *dat_m; - int flag; -{ - short len; - int msize, next_entry = 0; - gbuf_t *m; - aurp_hdr_t *hdrp; - - if ((state->snd_state == AURPSTATE_Unconnected) || (dat_m == 0)) - return; - msize = sizeof(aurp_hdr_t); - - do { - if ((m = (gbuf_t *)gbuf_alloc(msize+AURP_MaxPktSize, PRI_MED)) == 0) { - dPrintf(D_M_AURP, D_L_WARNING, ("AURPsndZRsp: node=%d, out of mblk\n", - state->rem_node)); - return; - } - gbuf_wset(m,msize); - - /* construct the ZI response packet */ - hdrp = (aurp_hdr_t *)gbuf_rptr(m); - hdrp->connection_id = state->snd_connection_id; - hdrp->sequence_number = 0; - hdrp->command_code = AURPCMD_ZRsp; - hdrp->flags = 0; - - /* get zone info of the local networks */ - next_entry = AURPgetzi(next_entry, gbuf_wptr(m), &len, dat_m, flag); - gbuf_winc(m,len); - - /* send the packet */ - dPrintf(D_M_AURP, D_L_INFO, ("AURPsndZRsp: len=%d\n", len)); - AURPsend(m, AUD_AURP, state->rem_node); - - } while (next_entry); - - gbuf_freem(dat_m); -} - -/* */ -void AURPsndGZN(state, dat_m) - aurp_state_t *state; - gbuf_t *dat_m; -{ - short zname_len; - int msize; - gbuf_t *m; - aurp_hdr_t *hdrp; - - if (state->snd_state == AURPSTATE_Unconnected) - return; - - msize = sizeof(aurp_hdr_t); - if ((m = (gbuf_t *)gbuf_alloc(msize+AURP_MaxPktSize, PRI_MED)) == 0) { - dPrintf(D_M_AURP, D_L_WARNING, ("AURPsndGZN: node=%d, out of mblk\n", - state->rem_node)); - return; - } - gbuf_wset(m,msize); - - /* construct the GZN response packet */ - hdrp = (aurp_hdr_t *)gbuf_rptr(m); - hdrp->connection_id = state->snd_connection_id; - hdrp->sequence_number = 0; - hdrp->command_code = AURPCMD_ZRsp; - hdrp->flags = 0; - *(short *)(gbuf_wptr(m)) = AURPSUBCODE_GetZoneNets; - gbuf_winc(m,sizeof(short)); - zname_len = gbuf_len(dat_m); - bcopy(gbuf_rptr(dat_m), gbuf_wptr(m), zname_len); - gbuf_winc(m,zname_len); - *(short *)(gbuf_wptr(m)) = -1; /* number of tuples - proto not supported */ - gbuf_winc(m,sizeof(short)); - - /* send the packet */ - dPrintf(D_M_AURP, D_L_INFO, ("AURPsndGZN: count=%d\n", -1)); - AURPsend(m, AUD_AURP, state->rem_node); -} - -/* */ -void AURPsndGDZL(state, dat_m) - aurp_state_t *state; - gbuf_t *dat_m; -{ - int msize; - gbuf_t *m; - aurp_hdr_t *hdrp; - - if (state->snd_state == AURPSTATE_Unconnected) - return; - - msize = sizeof(aurp_hdr_t); - if ((m = (gbuf_t *)gbuf_alloc(msize+AURP_MaxPktSize, PRI_MED)) == 0) { - dPrintf(D_M_AURP, D_L_WARNING, ("AURPsndGDZL: node=%d, out of mblk\n", - state->rem_node)); - return; - } - gbuf_wset(m,msize); - - /* construct the GDZL response packet */ - hdrp = (aurp_hdr_t *)gbuf_rptr(m); - hdrp->connection_id = state->snd_connection_id; - hdrp->sequence_number = 0; - hdrp->command_code = AURPCMD_ZRsp; - hdrp->flags = 0; - *(short *)(gbuf_wptr(m)) = AURPSUBCODE_GetDomainZoneList; - gbuf_winc(m,sizeof(short)); - *(short *)(gbuf_wptr(m)) = -1; /* start index - proto not supported */ - gbuf_winc(m,sizeof(short)); - - /* send the packet */ - dPrintf(D_M_AURP, D_L_INFO, ("AURPsndGDZL: index=%d\n", -1)); - AURPsend(m, AUD_AURP, state->rem_node); -} - -/* */ -void AURPrcvZReq(state, m) - aurp_state_t *state; - gbuf_t *m; -{ - short sub_code; - aurp_hdr_t *hdrp = (aurp_hdr_t *)gbuf_rptr(m); - - /* make sure we're in a valid state to accept it */ - if (state->snd_state == AURPSTATE_Unconnected) { - dPrintf(D_M_AURP, D_L_WARNING, ("AURPrcvZReq: unexpected response\n")); - gbuf_freem(m); - return; - } - - /* check for the correct connection id */ - if (hdrp->connection_id != state->snd_connection_id) { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPrcvZReq: invalid connection id, r=%d, m=%d\n", - hdrp->connection_id, state->snd_connection_id)); - gbuf_freem(m); - return; - } - - gbuf_rinc(m,sizeof(*hdrp)); - sub_code = *(short *)gbuf_rptr(m); - gbuf_rinc(m,sizeof(short)); - - dPrintf(D_M_AURP, D_L_INFO, ("AURPrcvZReq: len=%ld\n", gbuf_len(m))); - - switch (sub_code) { - case AURPSUBCODE_ZoneInfo1: - AURPsndZRsp(state, m, 0); - return; - - case AURPSUBCODE_GetZoneNets: - AURPsndGZN(state, m); - break; - - case AURPSUBCODE_GetDomainZoneList: - AURPsndGDZL(state, m); - break; - } - - gbuf_freem(m); -} - -/* */ -void AURPrcvZRsp(state, m) - aurp_state_t *state; - gbuf_t *m; -{ - short sub_code, tuples_cnt; - aurp_hdr_t *hdrp = (aurp_hdr_t *)gbuf_rptr(m); - - /* make sure we're in a valid state to accept it */ - if (state->rcv_state == AURPSTATE_Unconnected) { - dPrintf(D_M_AURP, D_L_WARNING, ("AURPrcvZRsp: unexpected response\n")); - gbuf_freem(m); - return; - } - - /* check for the correct connection id */ - if (hdrp->connection_id != state->rcv_connection_id) { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPrcvZRsp: invalid connection id, r=%d, m=%d\n", - hdrp->connection_id, state->rcv_connection_id)); - gbuf_freem(m); - return; - } - - gbuf_rinc(m,sizeof(*hdrp)); - sub_code = *(short *)gbuf_rptr(m); - gbuf_rinc(m,sizeof(short)); - - dPrintf(D_M_AURP, D_L_INFO, ("AURPrcvZRsp: len=%ld\n", gbuf_len(m))); - - switch (sub_code) { - case AURPSUBCODE_ZoneInfo1: - case AURPSUBCODE_ZoneInfo2: - tuples_cnt = *(short *)gbuf_rptr(m); - gbuf_rinc(m,sizeof(short)); - AURPsetzi(state->rem_node, m, sub_code, tuples_cnt); - break; - - case AURPSUBCODE_GetZoneNets: - break; - - case AURPSUBCODE_GetDomainZoneList: - break; - } - - gbuf_freem(m); -} - -/* */ -static int -AURPgetzi(next_entry, buf, len, dat_m, flag) - int next_entry; - unsigned char *buf; - short *len; - gbuf_t *dat_m; - int flag; -{ - static int i_sav=ZT_BYTES-1, j_sav=0, idx_sav=-1; - unsigned char ev, zname_len, *zmap, *zname_base, *zname_sav, *tuples_ptr; - unsigned short net_num, *net, zname_offset; - short *sub_codep, *tuples_cntp, tuples_cnt, dat_len; - int i, j, idx, nets_cnt; - RT_entry *entry; - - /* - * XXX CHS June-98: The compiler complains that some of these - * XXX variables may be used before they're set. I don't think - * XXX that's actually the case, but to check, I'll assign them - * XXX with some test value, and add asserts to check them at - * XXX run-time. The asserts won't be compiled in for production. - */ - zname_sav = tuples_ptr = (unsigned char *) 0xdeadbeef; /* XXX */ - net = (unsigned short *) 0xdeadbeef; /* XXX */ - net_num = 0xdead; /* XXX */ - nets_cnt = 0xfeedface; /* XXX */ - - sub_codep = (short *)buf; - buf += sizeof(short); - tuples_cntp = (short *)buf; - buf += sizeof(short); - *len = sizeof(short) + sizeof(short); - zname_base = buf + sizeof(short); - dat_len = 0; - - /* set the subcode in the ZI response packet */ - *sub_codep = next_entry ? AURPSUBCODE_ZoneInfo2 : AURPSUBCODE_ZoneInfo1; - - switch (flag) { - case 0: /* zone info in response to ZI request */ - net = (unsigned short *)gbuf_rptr(dat_m); - nets_cnt = (gbuf_len(dat_m))/2; - break; - case 1: /* zone info in response to Ack of RI response */ - tuples_ptr = gbuf_rptr(dat_m); - nets_cnt = (gbuf_len(dat_m))/3; - next_entry = 0; - break; - case 2: /* zone info in response to Ack of RI update */ - tuples_ptr = gbuf_rptr(dat_m); - nets_cnt = (gbuf_len(dat_m))/4; - next_entry = 0; - break; - } - - /* - * for each network, find all the zones that it belongs to - */ - assert(nets_cnt != 0xfeedface); /* XXX */ - for (tuples_cnt=0; next_entry < nets_cnt; next_entry++) { - switch(flag) { - case 0: - assert(net != 0xdeadbeef); /* XXX */ - net_num = net[next_entry]; - break; - case 1: - assert(tuples_ptr != 0xdeadbeef); /* XXX */ - net_num = *(unsigned short *)tuples_ptr; - tuples_ptr += 3; - gbuf_rinc(dat_m,3); - if (tuples_ptr[-1] & 0x80) { - tuples_ptr += 3; - gbuf_rinc(dat_m,3); - next_entry++; - } - break; - case 2: - if (gbuf_len(dat_m) <= 0) { - next_entry = nets_cnt; - goto l_done; - } - assert(tuples_ptr != 0xdeadbeef); /* XXX */ - ev = *tuples_ptr++; - net_num = *(unsigned short *)tuples_ptr; - tuples_ptr += 3; - gbuf_rinc(dat_m,4); - if (tuples_ptr[-1] & 0x80) { - tuples_ptr += 2; - gbuf_rinc(dat_m,2); - } - if (ev != AURPEV_NetAdded) - continue; - break; - } - - /* - * find the RT entry associated with the network - */ - assert(net_num != 0xdead); /* XXX */ - if ((entry = rt_blookup(net_num)) == 0) { - dPrintf(D_M_AURP, D_L_WARNING, ("AURPgetzi: invalid net, %d\n", - net_num)); - continue; - } - if ( ((entry->EntryState & 0x0F) < RTE_STATE_SUSPECT) || - !RT_ALL_ZONES_KNOWN(entry) || - (entry->AURPFlag & AURP_NetHiden) ) { - dPrintf(D_M_AURP_LOW, D_L_INFO, ("AURPgetzi: zombie net, net=%d\n", - net_num)); - continue; - } - - if (entry->NetStart == 0) { - if ((idx = zt_ent_zindex(entry->ZoneBitMap)) == 0) - continue; - idx--; /* index in the zone table */ - zname_len = ZT_table[idx].Zone.len; - if (zname_len) { - assert(net_num != 0xdead); /* XXX */ - *(unsigned short *)buf = net_num; - buf += sizeof(short); - if (idx == idx_sav) { - /* use the optimized format */ - assert(zname_sav != 0xdeadbeef); /* XXX */ - zname_offset = zname_sav - zname_base; - *(unsigned short *)buf = (0x8000 | zname_offset); - buf += sizeof(short); - dat_len += 4; - } else { - /* use the long format */ - zname_sav = buf; - *buf++ = zname_len; - bcopy(ZT_table[idx].Zone.str, buf, zname_len); - buf += zname_len; - dat_len += (3 + zname_len); - } - tuples_cnt++; - idx_sav = idx; - } - - } else { - zmap = entry->ZoneBitMap; - for (i=i_sav; i >=0; i--) { - if (!zmap[i]) - continue; - - for (j=j_sav; j < 8; j++) { - if (!((zmap[i] << j) & 0x80)) - continue; - - idx = i*8 + j; /* index in the zone table */ - zname_len = ZT_table[idx].Zone.len; - if (zname_len) { - if ((dat_len+3+zname_len) > AURP_MaxPktSize) { - i_sav = i; - j_sav = j; - goto l_done; - } - - assert(net_num != 0xdead); /* XXX */ - *(unsigned short *)buf = net_num; - buf += sizeof(short); - if (idx == idx_sav) { - /* use the optimized format */ - assert(zname_sav != 0xdeadbeef);/*XXX*/ - zname_offset = zname_sav - zname_base; - *(unsigned short *)buf = (0x8000 | zname_offset); - buf += sizeof(short); - dat_len += 4; - } else { - /* use the long format */ - zname_sav = buf; - *buf++ = zname_len; - bcopy(ZT_table[idx].Zone.str, buf, zname_len); - buf += zname_len; - dat_len += (3 + zname_len); - } - tuples_cnt++; - idx_sav = idx; - } - } - } - } - if ((dat_len+3+32) > AURP_MaxPktSize) { - next_entry++; - break; - } - } - i_sav = ZT_BYTES-1; - j_sav = 0; - -l_done: - *len += dat_len; - if (next_entry == nets_cnt) - next_entry = 0; - - /* set the subcode in the ZI response packet */ - if (next_entry) - *sub_codep = AURPSUBCODE_ZoneInfo2; - - /* set the tuples count in the ZI response packet */ - *tuples_cntp = tuples_cnt; - - idx_sav = -1; - return next_entry; -} - -/* */ -static void -AURPsetzi(node, m, sub_code, tuples_cnt) - unsigned char node; - gbuf_t *m; - short sub_code; - short tuples_cnt; -{ - int rc, tuple_fmt; - unsigned short net_num, zname_offset; - unsigned char *buf = gbuf_rptr(m), *zname_base; - RT_entry *entry; - at_nvestr_t *zname; - - /* compute the base of the zone names of the optimized tuples */ - zname_base = buf + sizeof(short); - - /* process all tuples */ - while (tuples_cnt-- > 0) { - net_num = *(unsigned short *)buf; - buf += sizeof(short); - if (*buf & 0x80) { - /* optimized-format tuple */ - zname_offset = (*(unsigned short *)buf) & 0x7fff; - buf += sizeof(short); - zname = (at_nvestr_t *)(zname_base + zname_offset); - tuple_fmt = 0; - dPrintf(D_M_AURP_LOW, D_L_INFO, - ("AURPsetzi: optimized fmt, net=%d. zlen=%d, zoffset=%d\n ", - net_num, zname->len, zname_offset)); - } else { - /* long-format tuple */ - zname = (at_nvestr_t *)buf; - tuple_fmt = 1; - dPrintf(D_M_AURP_LOW, D_L_INFO, - ("AURPsetzi: long fmt, net=%d, zlen=%d\n ", - net_num, zname->len)); - } - - /* - * find the RT entry associated with the specified network - */ - if ((entry = rt_blookup(net_num)) == 0) { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPsetzi: invalid net, net=%d\n", net_num)); - } else { /* entry found */ - if (entry->EntryState >= RTE_STATE_SUSPECT) { - if ((rc = zt_add_zonename(zname)) == ZT_MAXEDOUT) { - dPrintf(D_M_AURP, D_L_WARNING, - ("AURPsetzi: ZT_table full\n")); - } else { - zt_set_zmap(rc, entry->ZoneBitMap); - RT_SET_ZONE_KNOWN(entry); - } - } - } - if (tuple_fmt) - buf += zname->len+1; - } -} - -#endif /* AURP_SUPPORT */ diff --git a/bsd/netat/ddp.c b/bsd/netat/ddp.c deleted file mode 100644 index 0e2ebea4b..000000000 --- a/bsd/netat/ddp.c +++ /dev/null @@ -1,1420 +0,0 @@ -/* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Modified for MP, 1996 by Tuyen Nguyen - * Added AURP support, April 8, 1996 by Tuyen Nguyen - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - */ - -#define RESOLVE_DBG /* define debug globals in debug.h */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -/* globals */ - -/* Queue of LAP interfaces which have registered themselves with DDP */ -struct at_ifQueueHd at_ifQueueHd; - -extern TAILQ_HEAD(name_registry, _nve_) name_registry; - -snmpStats_t snmpStats; /* snmp ddp & echo stats */ - -extern at_ddp_stats_t at_ddp_stats; /* DDP statistics */ -extern struct atpcb ddp_head; -extern at_ifaddr_t *ifID_home, *ifID_table[]; -extern aarp_amt_array *aarp_table[]; -extern at_ifaddr_t at_interfaces[]; - -/* routing mode special */ -void (*ddp_AURPsendx)(void) = NULL; -at_ifaddr_t *aurp_ifID = 0; - -int pktsIn = 0; -int pktsOut = 0; -int pktsDropped = 0; -int pktsHome = 0; - -extern int *atp_pidM; -extern int *adsp_pidM; -extern struct atpcb *atp_inputQ[]; -extern CCB *adsp_inputQ[]; - -static void fillin_pkt_chain(gbuf_t *); -static int ot_ddp_check_socket(unsigned char ,int pid); - - -struct { - ddp_handler_func func; -} ddp_handler[256]; - -void init_ddp_handler(void) -{ - bzero(ddp_handler, sizeof(ddp_handler)); -} - -void add_ddp_handler(ddp_socket, input_func) - u_char ddp_socket; - ddp_handler_func input_func; -{ - ddp_handler[ddp_socket].func = input_func; -} - -void -ddp_slowtimo() -{ - ddp_brt_sweep(); -} - -/* - * Raw DDP socket option processing. - */ -int ddp_ctloutput(so, sopt) - struct socket *so; - struct sockopt *sopt; -{ - struct atpcb *at_pcb = sotoatpcb(so); - int optval, error = 0; - - if (sopt->sopt_level != ATPROTO_NONE) - return (EINVAL); - - switch (sopt->sopt_dir) { - - case SOPT_GET: - switch (sopt->sopt_name) { - case DDP_HDRINCL: - optval = at_pcb->ddp_flags & DDPFLG_HDRINCL; - error = sooptcopyout(sopt, &optval, sizeof optval); - break; - case DDP_CHKSUM_ON: - optval = at_pcb->ddp_flags & DDPFLG_CHKSUM; - error = sooptcopyout(sopt, &optval, sizeof optval); - break; - case DDP_STRIPHDR: - optval = at_pcb->ddp_flags & DDPFLG_STRIPHDR; - error = sooptcopyout(sopt, &optval, sizeof optval); - break; - case DDP_SLFSND_ON: - optval = at_pcb->ddp_flags & DDPFLG_SLFSND; - error = sooptcopyout(sopt, &optval, sizeof optval); - break; - case DDP_GETSOCKNAME: - { - ddp_addr_t addr; - addr.inet.net = at_pcb->laddr.s_net; - addr.inet.node = at_pcb->laddr.s_node; - addr.inet.socket = at_pcb->lport; - addr.ddptype = at_pcb->ddptype; - error = sooptcopyout(sopt, &addr, sizeof addr); - } - break; - default: - error = ENOPROTOOPT; - break; - } - break; - case SOPT_SET: - switch (sopt->sopt_name) { - case DDP_HDRINCL: - error = sooptcopyin(sopt, &optval, sizeof optval, - sizeof optval); - if (error) - break; - if (optval) - at_pcb->ddp_flags |= DDPFLG_HDRINCL; - else - at_pcb->ddp_flags &= ~DDPFLG_HDRINCL; - break; - case DDP_CHKSUM_ON: - error = sooptcopyin(sopt, &optval, sizeof optval, - sizeof optval); - if (error) - break; - if (optval) - at_pcb->ddp_flags |= DDPFLG_CHKSUM; - else - at_pcb->ddp_flags &= ~DDPFLG_CHKSUM; - break; - case DDP_STRIPHDR: - error = sooptcopyin(sopt, &optval, sizeof optval, - sizeof optval); - if (error) - break; - if (optval) - at_pcb->ddp_flags |= DDPFLG_STRIPHDR; - else - at_pcb->ddp_flags &= ~DDPFLG_STRIPHDR; - break; - case DDP_SLFSND_ON: - error = sooptcopyin(sopt, &optval, sizeof optval, - sizeof optval); - if (error) - break; - if (optval) - at_pcb->ddp_flags |= DDPFLG_SLFSND; - else - at_pcb->ddp_flags &= ~DDPFLG_SLFSND; - break; - default: - error = ENOPROTOOPT; - break; - } - break; - } - - return(error); -} /* ddp_cloutput */ - -/****************************************************************/ -/* */ -/* */ -/* Support Routines */ -/* */ -/* */ -/****************************************************************/ - -/* - * Name: - * ddp_checksum - * - * Description: - * This procedure determines the checksum of an extended DDP datagram. - * Add the unsigned bytes into an unsigned 16-bit accumulator. - * After each add, rotate the sign bit into the low order bit of - * the accumulator. When done, if the checksum is 0, changed into 0xFFFF. - * - * Calling sequence: - * checksum = ddp_checksum(mp, offset) - * - * Parameters: - * mp pointer to the datagram gbuf_t - * offset offset to start at in first gbuf_t block - * - * Return value: - * The DDP checksum. - * - */ - -u_short ddp_checksum(mp, offset) - register gbuf_t *mp; - register int offset; -{ - register u_char *data; - register int length; - register u_short checksum; - - checksum = 0; - - do { - if (offset >= gbuf_len(mp)) - offset -= gbuf_len(mp); - else { - data = ((unsigned char *) gbuf_rptr(mp)) + offset; - length = gbuf_len(mp) - offset; - offset = 0; - /* Portable checksum from 3.0 */ - while (length--) { - checksum += *data++; - checksum = (checksum & 0x8000) ? - ((checksum << 1) | 1) : (checksum << 1); - } - } - } while ( (mp = gbuf_cont(mp)) ); - - if (checksum == 0) - checksum = 0xffff; - - return(checksum); -} - -/* - * ddp_add_if() - * - * Description: - * This procedure is called by each LAP interface when it wants to place - * itself online. The LAP interfaces passes in a pointer to its at_if - * struct, which is added to DDP's list of active interfaces (at_ifQueueHd). - * When DDP wants to transmit a packet, it searches this list for the - * interface to use. - * - * If AT_IFF_DEFAULT is set, then this interface is to be brought online - * as the interface DDP socket addresses are tied to. Of course there can - * be only one default interface; we return an error if it's already set. - * - * Calling Sequence: - * ret_status = ddp_add_if(ifID) - * - * Formal Parameters: - * ifID pointer to LAP interface's at_if struct. - * - * Completion Status: - * 0 Procedure successfully completed. - * EALREADY This interface is already online, or there is - * already a default interface. - * ENOBUFS Cannot allocate input queue - * - */ -int ddp_add_if(ifID) -register at_ifaddr_t *ifID; -{ - int port = -1; - - dPrintf(D_M_DDP, D_L_STARTUP, - ("ddp_add_if: called, ifID:0x%x\n", (u_int) ifID)); - - if (ifID->ifFlags & AT_IFF_DEFAULT) { - if (ifID_home) - return(EEXIST); /* home port already set */ - else { - port = IFID_HOME; - ifID_home = ifID; - } - } else { - for (port=IFID_HOME+1; portifPort = port; /* set ddp port # in ifID */ - - /* Add this interface to the list of online interfaces */ - TAILQ_INSERT_TAIL(&at_ifQueueHd, ifID, aa_link); - - return (0); -} /* ddp_add_if */ - -/* - * ddp_rem_if() - * - * Description: - * This procedure is called by each LAP interface when it wants to take - * itself offline. The LAP interfaces passes in a pointer to its at_if - * struct; DDP's list of active interfaces (at_ifQueueHd) is searched and - * this interface is removed from the list. DDP can still transmit - * packets as long as this interface is not the default interface; the - * sender will just get ENETUNREACH errors when it tries to send to an - * interface that went offline. However, if the default interface is - * taken offline, we no longer have a node ID to use as a source address - * and DDP must return ENETDOWN when a caller tries to send a packet. - * - * Formal Parameters: - * ifID pointer to LAP interface's at_if struct. - */ - -void ddp_rem_if(ifID) - register at_ifaddr_t *ifID; -{ - struct ifaddr *ifa = &ifID->aa_ifa; - - /* un-do processing done in SIOCSIFADDR */ - ifnet_lock_exclusive(ifID->aa_ifp); - IFA_LOCK(ifa); - if (ifa->ifa_debug & IFD_ATTACHED) { - if_detach_ifa(ifID->aa_ifp, ifa); - ifa->ifa_addr = NULL; - } - IFA_UNLOCK(ifa); - /* release reference held for at_interfaces[] */ - IFA_REMREF(ifa); - ifnet_lock_done(ifID->aa_ifp); - - if (ifID->at_was_attached == 0 && ifID->aa_ifp != NULL) { - (void)proto_unplumb(PF_APPLETALK, ifID->aa_ifp); - } - - /* un-do processing done in ddp_add_if() */ - if (ifID->ifPort) { - if (aarp_table[ifID->ifPort]) { - FREE(aarp_table[ifID->ifPort], M_RTABLE); - aarp_table[ifID->ifPort] = NULL; - } - - at_state.flags |= AT_ST_IF_CHANGED; - ifID->aa_ifp = NULL; - - trackrouter_rem_if(ifID); - TAILQ_REMOVE(&at_ifQueueHd, ifID, aa_link); - ifID_table[ifID->ifPort] = NULL; - ifID->ifName[0] = '\0'; - ifID->ifPort = 0; - } - - /* *** deallocate ifID, eventually *** */ -} /* ddp_rem_if */ - -/* - * The user may have registered an NVE with the NBP on a socket. When the - * socket is closed, the NVE should be deleted from NBP's name table. The - * user should delete the NVE before the socket is shut down, but there - * may be circumstances when he can't. So, whenever a DDP socket is closed, - * this routine is used to notify NBP of the socket closure. This would - * help NBP get rid of all NVE's registered on the socket. - */ - -/* *** Do we still need to do this? *** */ -static int ot_ddp_check_socket(socket, pid) - unsigned char socket; - int pid; -{ - int cnt = 0; - gref_t *gref; - - dPrintf(D_M_DDP, D_L_INFO, ("ot_ddp_check_socket: %d\n", socket)); - for (gref = ddp_head.atpcb_next; gref != &ddp_head; gref = gref->atpcb_next) - if (gref->lport == socket && gref->pid == pid) - cnt++; - if ((atp_inputQ[socket] != NULL) && (atp_inputQ[socket] != (gref_t *)1) - && (atp_pidM[socket] == pid)) - cnt++; - if ((adsp_inputQ[socket] != NULL) && (adsp_pidM[socket] == pid)) - cnt++; - - return(cnt); -} - -void ddp_notify_nbp( - unsigned char socket, - int pid, - __unused unsigned char ddptype) -{ - nve_entry_t *nve_entry, *nve_next; - - if (at_state.flags & AT_ST_STARTED) { - /* *** NBP_CLOSE_NOTE processing (from ddp_nbp.c) *** */ - for ((nve_entry = TAILQ_FIRST(&name_registry)); nve_entry; nve_entry = nve_next) { - nve_next = TAILQ_NEXT(nve_entry, nve_link); - if ((at_socket)socket == nve_entry->address.socket && - /* *** check complete address and ddptype here *** */ - pid == nve_entry->pid && - ot_ddp_check_socket(nve_entry->address.socket, - nve_entry->pid) < 2) { - /* NB: nbp_delete_entry calls TAILQ_REMOVE */ - nbp_delete_entry(nve_entry); - } - } - } -} /* ddp_notify_nbp */ - -static void fillin_pkt_chain(m) - gbuf_t *m; -{ - gbuf_t *tmp_m = m; - register at_ddp_t - *ddp = (at_ddp_t *)gbuf_rptr(m), - *tmp_ddp; - u_short tmp; - - if (UAS_VALUE(ddp->checksum)) { - tmp = ddp_checksum(m, 4); - UAS_ASSIGN_HTON(ddp->checksum, tmp); - } - - for (tmp_m=gbuf_next(tmp_m); tmp_m; tmp_m=gbuf_next(tmp_m)) { - tmp_ddp = (at_ddp_t *)gbuf_rptr(tmp_m); - DDPLEN_ASSIGN(tmp_ddp, gbuf_msgsize(tmp_m)); - tmp_ddp->hopcount = - tmp_ddp->unused = 0; - NET_NET(tmp_ddp->src_net, ddp->src_net); - tmp_ddp->src_node = ddp->src_node; - tmp_ddp->src_socket = ddp->src_socket; - if (UAS_VALUE(tmp_ddp->checksum)) { - tmp = ddp_checksum(tmp_m, 4); - UAS_ASSIGN_HTON(ddp->checksum, tmp); - } - } -} - -/* There are various ways a packet may go out.... it may be sent out - * directly to destination node, or sent to a random router or sent - * to a router whose entry exists in Best Router Cache. Following are - * constants used WITHIN this routine to keep track of choice of destination - */ -#define DIRECT_ADDR 1 -#define BRT_ENTRY 2 -#define BRIDGE_ADDR 3 - -/* - * ddp_output() - * - * Remarks : - * Called to queue a atp/ddp data packet on the network interface. - * It returns 0 normally, and an errno in case of error. - * The mbuf chain pointed to by *mp is consumed on success, and - * freed in case of error. - * - */ -int ddp_output(mp, src_socket, src_addr_included) - register gbuf_t **mp; - at_socket src_socket; - int src_addr_included; -{ - register at_ifaddr_t *ifID = ifID_home, *ifIDTmp = NULL; - register at_ddp_t *ddp; - register ddp_brt_t *brt = NULL; - register at_net_al dst_net; - register int len; - struct atalk_addr at_dest; - at_ifaddr_t *ARouterIf = NULL; - int loop = 0; - int error = 0; - int addr_type; - u_char addr_flag = 0; - char *addr = NULL; - register gbuf_t *m; - - KERNEL_DEBUG(DBG_AT_DDP_OUTPUT | DBG_FUNC_START, 0, - 0,0,0,0); - - snmpStats.dd_outReq++; - - m = *mp; - ddp = (at_ddp_t *)gbuf_rptr(m); - - if (!ifID) { - /* Device/Interface not configured */ - dPrintf(D_M_DDP, D_L_ERROR, ("Device/Interface not configured")); - error = ENXIO; - gbuf_freel(*mp); - goto exit_ddp_output; - } - - if ((ddp->dst_socket > (unsigned) (DDP_SOCKET_LAST + 1)) || - (ddp->dst_socket < DDP_SOCKET_1st_RESERVED)) { - dPrintf(D_M_DDP, D_L_ERROR, - ("Illegal destination socket on outgoing packet (0x%x)", - ddp->dst_socket)); - at_ddp_stats.xmit_bad_addr++; - error = ENOTSOCK; - gbuf_freel(*mp); - goto exit_ddp_output; - } - if ((len = gbuf_msgsize(*mp)) > DDP_DATAGRAM_SIZE) { - /* the packet is too large */ - dPrintf(D_M_DDP, D_L_ERROR, - ("Outgoing packet too long (len=%d bytes)", len)); - at_ddp_stats.xmit_bad_length++; - error = EMSGSIZE; - gbuf_freel(*mp); - goto exit_ddp_output; - } - at_ddp_stats.xmit_bytes += len; - at_ddp_stats.xmit_packets++; - - DDPLEN_ASSIGN(ddp, len); - ddp->hopcount = - ddp->unused = 0; - - /* If this packet is for the same node, loop it back - * up... Note that for LocalTalk, dst_net zero means "THIS_NET", so - * address 0.nn is eligible for loopback. For Extended EtherTalk, - * dst_net 0 can be used only for cable-wide or zone-wide - * broadcasts (0.ff) and as such, address of the form 0.nn is NOT - * eligible for loopback. - */ - dst_net = NET_VALUE(ddp->dst_net); - - /* If our packet is destined for the 'virtual' bridge - * address of NODE==0xFE, replace that address with a - * real bridge address. - */ - if ((ddp->dst_node == 0xfe) && - ((dst_net == ATADDR_ANYNET) || - (dst_net >= ifID_home->ifThisCableStart && - dst_net <= ifID_home->ifThisCableEnd))) { - /* if there's a router that's not us, it's in ifID_home */ - NET_ASSIGN(ddp->dst_net, ifID_home->ifARouter.s_net); - dst_net = ifID_home->ifARouter.s_net; - ddp->dst_node = ifID_home->ifARouter.s_node; - } - - if (MULTIHOME_MODE && (ifIDTmp = forUs(ddp))) { - ifID = ifIDTmp; - loop = TRUE; - dPrintf(D_M_DDP_LOW, D_L_USR1, - ("ddp_out: for us if:%s\n", ifIDTmp->ifName)); - } - - if (!loop) - loop = ((ddp->dst_node == ifID->ifThisNode.s_node) && - (dst_net == ifID->ifThisNode.s_net) - ); - if (loop) { - gbuf_t *mdata, *mdata_next; - - if (!MULTIHOME_MODE || !src_addr_included) { - NET_ASSIGN(ddp->src_net, ifID->ifThisNode.s_net); - ddp->src_node = ifID->ifThisNode.s_node; - } - ddp->src_socket = src_socket; - - dPrintf(D_M_DDP_LOW, D_L_OUTPUT, - ("ddp_output: loop to %d:%d port=%d\n", - NET_VALUE(ddp->dst_net), - ddp->dst_node, - ifID->ifPort)); - - fillin_pkt_chain(*mp); - - dPrintf(D_M_DDP, D_L_VERBOSE, - ("Looping back packet from skt 0x%x to skt 0x%x\n", - ddp->src_socket, ddp->dst_socket)); - - for (mdata = *mp; mdata; mdata = mdata_next) { - mdata_next = gbuf_next(mdata); - gbuf_next(mdata) = 0; - ddp_input(mdata, ifID); - } - goto exit_ddp_output; - } - if ((ddp->dst_socket == ZIP_SOCKET) && - (zip_type_packet(*mp) == ZIP_GETMYZONE)) { - ddp->src_socket = src_socket; - error = zip_handle_getmyzone(ifID, *mp); - gbuf_freel(*mp); - goto exit_ddp_output; - } - /* - * find out the interface on which the packet should go out - */ - TAILQ_FOREACH(ifID, &at_ifQueueHd, aa_link) { - if ((ifID->ifThisNode.s_net == dst_net) || (dst_net == 0)) - /* the message is either going out (i) on the same - * NETWORK in case of LocalTalk, or (ii) on the same - * CABLE in case of Extended AppleTalk (EtherTalk). - */ - break; - - if ((ifID->ifThisCableStart <= dst_net) && - (ifID->ifThisCableEnd >= dst_net) - ) - /* We're on EtherTalk and the message is going out to - * some other network on the same cable. - */ - break; - - if (ARouterIf == NULL && ATALK_VALUE(ifID->ifARouter)) - ARouterIf = ifID; - } - dPrintf(D_M_DDP_LOW, D_L_USR1, - ("ddp_output: after search ifid:0x%x %s ifID_home:0x%x\n", - (u_int)ifID, ifID ? ifID->ifName : "", - (u_int)ifID_home)); - - if (ifID) { - /* located the interface where the packet should - * go.... the "first-hop" destination address - * must be the same as real destination address. - */ - addr_type = DIRECT_ADDR; - } else { - /* no, the destination network number does - * not match known network numbers. If we have - * heard from this network recently, BRT table - * may have address of a router we could use! - */ - if (!MULTIPORT_MODE) { - BRT_LOOK (brt, dst_net); - if (brt) { - /* Bingo... BRT has an entry for this network. - * Use the link address as is. - */ - dPrintf(D_M_DDP, D_L_VERBOSE, - ("Found BRT entry to send to net 0x%x", dst_net)); - at_ddp_stats.xmit_BRT_used++; - addr_type = BRT_ENTRY; - ifID = brt->ifID; - } else { - /* No BRT entry available for dest network... do we - * know of any router at all?? - */ - if ((ifID = ARouterIf) != NULL) - addr_type = BRIDGE_ADDR; - else { - dPrintf(D_M_DDP, D_L_WARNING, - ("Found no interface to send pkt")); - at_ddp_stats.xmit_bad_addr++; - error = ENETUNREACH; - gbuf_freel(*mp); - goto exit_ddp_output; - } - } - } - else { /* We are in multiport mode, so we can bypass all the rest - * and directly ask for the routing of the packet - */ - at_ddp_stats.xmit_BRT_used++; - - ifID = ifID_home; - if (!src_addr_included) { - ddp->src_node = ifID->ifThisNode.s_node; - NET_ASSIGN(ddp->src_net, ifID->ifThisNode.s_net); - } - ddp->src_socket = src_socket; - routing_needed(*mp, ifID, TRUE); - - goto exit_ddp_output; - } - } - /* by the time we land here, we know the interface on - * which this packet is going out.... ifID. - */ - if (ifID->ifState == LAP_OFFLINE) { - gbuf_freel(*mp); - goto exit_ddp_output; - } - - switch (addr_type) { - case DIRECT_ADDR : -/* - at_dest.atalk_unused = 0; -*/ - NET_ASSIGN(at_dest.atalk_net, dst_net); - at_dest.atalk_node = ddp->dst_node; - addr_flag = AT_ADDR; - addr = (char *)&at_dest; - break; - case BRT_ENTRY : - addr_flag = ET_ADDR; - addr = (char *)&brt->et_addr; - break; - case BRIDGE_ADDR : - NET_ASSIGN(at_dest.atalk_net, ifID->ifARouter.s_net); - at_dest.atalk_node = ifID->ifARouter.s_node; - addr_flag = AT_ADDR; - addr = (char *)&at_dest; - break; - - } - /* Irrespective of the interface on which - * the packet is going out, we always put the - * same source address on the packet (unless multihoming mode). - */ - if (MULTIHOME_MODE) { - if (!src_addr_included) { - ddp->src_node = ifID->ifThisNode.s_node; - NET_ASSIGN(ddp->src_net, ifID->ifThisNode.s_net); - } - } - else { - ddp->src_node = ifID_home->ifThisNode.s_node; - NET_ASSIGN(ddp->src_net, ifID_home->ifThisNode.s_net); - } - ddp->src_socket = src_socket; - - dPrintf(D_M_DDP_LOW, D_L_OUTPUT, - ("ddp_output: going out to %d:%d skt%d on %s\n", - dst_net, ddp->dst_node, ddp->dst_socket, ifID->ifName)); - - fillin_pkt_chain(*mp); - - { /* begin block */ - struct etalk_addr dest_addr; - struct atalk_addr dest_at_addr; - - loop = TRUE; /* flag to aarp to loopback (default) */ - - m = *mp; - - /* the incoming frame is of the form {flag, address, ddp...} - * where "flag" indicates whether the address is an 802.3 - * (link) address, or an appletalk address. If it's an - * 802.3 address, the packet can just go out to the network - * through PAT, if it's an appletalk address, AT->802.3 address - * resolution needs to be done. - * If 802.3 address is known, strip off the flag and 802.3 - * address, and prepend 802.2 and 802.3 headers. - */ - - if (addr == NULL) { - addr_flag = *(u_char *)gbuf_rptr(m); - gbuf_rinc(m,1); - } - - switch (addr_flag) { - case AT_ADDR_NO_LOOP : - loop = FALSE; - /* pass thru */ - case AT_ADDR : - if (addr == NULL) { - dest_at_addr = *(struct atalk_addr *)gbuf_rptr(m); - gbuf_rinc(m,sizeof(struct atalk_addr)); - } else - dest_at_addr = *(struct atalk_addr *)addr; - break; - case ET_ADDR : - if (addr == NULL) { - dest_addr = *(struct etalk_addr *)gbuf_rptr(m); - gbuf_rinc(m,sizeof(struct etalk_addr)); - } else - dest_addr = *(struct etalk_addr *)addr; - break; - default : - dPrintf(D_M_DDP_LOW,D_L_ERROR, - ("ddp_output: Unknown addr_flag = 0x%x\n", addr_flag)); - gbuf_freel(m); /* unknown address type, chuck it */ - goto exit_ddp_output; - } - - m = gbuf_strip(m); - - /* At this point, rptr points to ddp header for sure */ - if (ifID->ifState == LAP_ONLINE_FOR_ZIP) { - /* see if this is a ZIP packet that we need - * to let through even though network is - * not yet alive!! - */ - if (zip_type_packet(m) == 0) { - gbuf_freel(m); - goto exit_ddp_output; - } - } - - ifID->stats.xmit_packets++; - ifID->stats.xmit_bytes += gbuf_msgsize(m); - snmpStats.dd_outLong++; - - switch (addr_flag) { - case AT_ADDR_NO_LOOP : - case AT_ADDR : - /* - * we don't want elap to be looking into ddp header, so - * it doesn't know net#, consequently can't do - * AMT_LOOKUP. That task left to aarp now. - */ - aarp_send_data(m,ifID, &dest_at_addr, loop); - break; - case ET_ADDR : - pat_output(ifID, m, (unsigned char *)&dest_addr, 0); - break; - } - } /* end block */ - exit_ddp_output: - KERNEL_DEBUG(DBG_AT_DDP_OUTPUT | DBG_FUNC_END, 0, - error, 0, 0, 0); - return(error); -} /* ddp_output */ - -void ddp_input(mp, ifID) - register gbuf_t *mp; - register at_ifaddr_t *ifID; -{ - register at_ddp_t *ddp; /* DDP header */ - register int msgsize; - register at_socket socket; - register int len; - register at_net_al dst_net; - - KERNEL_DEBUG(DBG_AT_DDP_INPUT | DBG_FUNC_START, 0, - ifID, mp, gbuf_len(mp),0); - - /* Makes sure we know the default interface before starting to - * accept incomming packets. If we don't we may end up with a - * null ifID_table[0] and have impredicable results (specially - * in router mode. This is a transitory state (because we can - * begin to receive packet while we're not completly set up yet. - */ - - if (ifID_home == (at_ifaddr_t *)NULL) { - dPrintf(D_M_DDP, D_L_ERROR, - ("dropped incoming packet ifID_home not set yet\n")); - gbuf_freem(mp); - goto out; /* return */ - } - - /* - * if a DDP packet has been broadcast, we're going to get a copy of - * it here; if it originated at user level via a write on a DDP - * socket; when it gets here, the first block in the chain will be - * empty since it only contained the lap level header which will be - * stripped in the lap level immediately below ddp - */ - - if ((mp = (gbuf_t *)ddp_compress_msg(mp)) == NULL) { - dPrintf(D_M_DDP, D_L_ERROR, - ("dropped short incoming ET packet (len %d)", 0)); - snmpStats.dd_inTotal++; - at_ddp_stats.rcv_bad_length++; - goto out; /* return; */ - } - msgsize = gbuf_msgsize(mp); - - at_ddp_stats.rcv_bytes += msgsize; - at_ddp_stats.rcv_packets++; - - /* if the interface pointer is 0, the packet has been - * looped back by 'write' half of DDP. It is of the - * form {extended ddp,...}. The packet is meant to go - * up to some socket on the same node. - */ - if (!ifID) /* if loop back is specified */ - ifID = ifID_home; /* that means the home port */ - - /* the incoming datagram has extended DDP header and is of - * the form {ddp,...}. - */ - if (msgsize < DDP_X_HDR_SIZE) { - dPrintf(D_M_DDP, D_L_ERROR, - ("dropped short incoming ET packet (len %d)", msgsize)); - at_ddp_stats.rcv_bad_length++; - gbuf_freem(mp); - goto out; /* return; */ - } - /* - * At this point, the message is always of the form - * {extended ddp, ... }. - */ - ddp = (at_ddp_t *)gbuf_rptr(mp); - len = DDPLEN_VALUE(ddp); - - if (msgsize != len) { - if (msgsize > len) { - if (len < DDP_X_HDR_SIZE) { - dPrintf(D_M_DDP, D_L_ERROR, - ("Length problems, ddp length %d, buffer length %d", - len, msgsize)); - snmpStats.dd_tooLong++; - at_ddp_stats.rcv_bad_length++; - gbuf_freem(mp); - goto out; /* return; */ - } - /* - * shave off the extra bytes from the end of message - */ - mp = ddp_adjmsg(mp, -(msgsize - len)) ? mp : 0; - if (mp == 0) - goto out; /* return; */ - } else { - dPrintf(D_M_DDP, D_L_ERROR, - ("Length problems, ddp length %d, buffer length %d", - len, msgsize)); - snmpStats.dd_tooShort++; - at_ddp_stats.rcv_bad_length++; - gbuf_freem(mp); - goto out; /* return; */ - } - } - socket = ddp->dst_socket; - - /* - * We want everything in router mode, specially socket 254 for nbp so we need - * to bypass this test when we are a router. - */ - - if (!MULTIPORT_MODE && (socket > DDP_SOCKET_LAST || - socket < DDP_SOCKET_1st_RESERVED)) { - dPrintf(D_M_DDP, D_L_WARNING, - ("Bad dst socket on incoming packet (0x%x)", - ddp->dst_socket)); - at_ddp_stats.rcv_bad_socket++; - gbuf_freem(mp); - goto out; /* return; */ - } - /* - * if the checksum is true, then upstream wants us to calc - */ - if (UAS_VALUE(ddp->checksum) && - (UAS_VALUE_NTOH(ddp->checksum) != ddp_checksum(mp, 4))) { - dPrintf(D_M_DDP, D_L_WARNING, - ("Checksum error on incoming pkt, calc 0x%x, exp 0x%x", - ddp_checksum(mp, 4), UAS_VALUE_NTOH(ddp->checksum))); - snmpStats.dd_checkSum++; - at_ddp_stats.rcv_bad_checksum++; - gbuf_freem(mp); - goto out; /* return; */ - } - -/*############### routing input checking */ - -/* Router mode special: we send "up-stack" packets for this node or coming from any - * other ports, but for the reserved atalk sockets (RTMP, ZIP, NBP [and EP]) - * BTW, the way we know it's for the router and not the home port is that the - * MAC (ethernet) address is always the one of the interface we're on, but - * the AppleTalk address must be the one of the home port. If it's a multicast - * or another AppleTalk address, this is the router job's to figure out where it's - * going to go. - */ - /* *** a duplicate should be sent to any other client that is listening - for packets of this type on a raw DDP socket *** */ - if (ddp_handler[socket].func) { - dPrintf(D_M_DDP,D_L_INPUT, - ("ddp_input: skt %u hdnlr:0x%p\n", - (u_int) socket, ddp_handler[socket].func)); - pktsHome++; - snmpStats.dd_inLocal++; - - (*ddp_handler[socket].func)(mp, ifID); - goto out; /* return; */ - } - dst_net = NET_VALUE(ddp->dst_net); - if ( - /* exact match */ - forUs(ddp) || - /* any node, wildcard or matching net */ - ((ddp->dst_node == 255) && - (((dst_net >= ifID_home->ifThisCableStart) && - (dst_net <= ifID_home->ifThisCableEnd)) || - dst_net == 0)) || - /* this node is not online yet(?) */ - (ifID->ifRoutingState < PORT_ONLINE) - ) { - gref_t *gref; - pktsHome++; - snmpStats.dd_inLocal++; - - if (ddp->type == DDP_ATP) { - if (atp_inputQ[socket] && (atp_inputQ[socket] != (gref_t *)1)) { - /* if there's an ATP pcb */ - atp_input(mp); - goto out; /* return; */ - } - } else if (ddp->type == DDP_ADSP) { - if (adsp_inputQ[socket]) { - /* if there's an ADSP pcb */ - adsp_input(mp); - goto out; /* return; */ - } - } - - /* otherwise look for a DDP pcb; - ATP / raw-DDP and ADSP / raw-DDP are possible */ - for (gref = ddp_head.atpcb_next; gref != &ddp_head; - gref = gref->atpcb_next) - if (gref->lport == socket && - (gref->ddptype == 0 || gref->ddptype == ddp->type)) { - dPrintf(D_M_DDP, D_L_INPUT, - ("ddp_input: streamq, skt %d\n", socket)); - if (gref->atpcb_socket) { - struct sockaddr_at ddp_in; - ddp_in.sat_len = sizeof(ddp_in); - ddp_in.sat_family = AF_APPLETALK; - ddp_in.sat_addr.s_net = NET_VALUE(ddp->src_net); - ddp_in.sat_addr.s_node = ddp->src_node; - ddp_in.sat_port = ddp->src_socket; - - /* strip off DDP header if so indicated by - sockopt */ - if (gref->ddp_flags & DDPFLG_STRIPHDR) { - mp = m_pullup((struct mbuf *)mp, - DDP_X_HDR_SIZE); - if (mp) { - gbuf_rinc(mp, DDP_X_HDR_SIZE); - } else { - /* this should never happen because - msgsize was checked earlier */ - at_ddp_stats.rcv_bad_length++; - goto out; /* return */ - } - } - - if (sbappendaddr(&((gref->atpcb_socket)->so_rcv), - (struct sockaddr *)&ddp_in, - mp, 0, NULL) != 0) { - sorwakeup(gref->atpcb_socket); - } - } else { - atalk_putnext(gref, mp); - } - goto out; /* return */ - } - - at_ddp_stats.rcv_bad_socket++; - gbuf_freem(mp); - snmpStats.dd_noHandler++; - dPrintf(D_M_DDP, D_L_WARNING, - ("ddp_input: dropped pkt for socket %d\n", socket)); - } else { - dPrintf(D_M_DDP, D_L_ROUTING, - ("ddp_input: routing_needed from port=%d sock=%d\n", - ifID->ifPort, ddp->dst_socket)); - - snmpStats.dd_fwdReq++; - if (((pktsIn-pktsHome+200) >= RouterMix) && ((++pktsDropped % 5) == 0)) { - at_ddp_stats.rcv_dropped_nobuf++; - gbuf_freem(mp); - } - else { - routing_needed(mp, ifID, FALSE); - } - } -out: - KERNEL_DEBUG(DBG_AT_DDP_INPUT | DBG_FUNC_END, 0,0,0,0,0); -} /* ddp_input */ - - -/* - * ddp_router_output() - * - * Remarks : - * This is a modified version of ddp_output for router use. - * The main difference is that the interface on which the packet needs - * to be sent is specified and a *destination* AppleTalk address is passed - * as an argument, this address may or may not be the same as the destination - * address found in the ddp packet... This is the trick about routing, the - * AppleTalk destination of the packet may not be the same as the Enet address - * we send the packet too (ie, we may pass the baby to another router). - * - */ -int ddp_router_output(mp, ifID, addr_type, router_net, router_node, enet_addr) - gbuf_t *mp; - at_ifaddr_t *ifID; - int addr_type; - at_net_al router_net; - at_node router_node; - etalk_addr_t *enet_addr; -{ - register at_ddp_t *ddp; - struct atalk_addr at_dest; - int addr_flag = 0; - char *addr = NULL; - register gbuf_t *m; - - if (!ifID) { - dPrintf(D_M_DDP, D_L_WARNING, ("BAD BAD ifID\n")); - gbuf_freel(mp); - return(EPROTOTYPE); - } - ddp = (at_ddp_t *)gbuf_rptr(mp); - -#ifdef AURP_SUPPORT - if (ifID->ifFlags & AT_IFF_AURP) { /* AURP link? */ - if (ddp_AURPsendx) { - fillin_pkt_chain(mp); - if (router_node == 255) - router_node = 0; - ddp_AURPsendx(AURPCODE_DATAPKT, mp, router_node); - return 0; - } else { - gbuf_freel(mp); - return EPROTOTYPE; - } - } -#endif - - /* keep some of the tests for now ####### */ - - if (gbuf_msgsize(mp) > DDP_DATAGRAM_SIZE) { - /* the packet is too large */ - dPrintf(D_M_DDP, D_L_WARNING, - ("ddp_router_output: Packet too large size=%d\n", - gbuf_msgsize(mp))); - gbuf_freel(mp); - return (EMSGSIZE); - } - - switch (addr_type) { - - case AT_ADDR : - - /* - * Check for packet destined to the home stack - */ - - if ((ddp->dst_node == ifID->ifThisNode.s_node) && - (NET_VALUE(ddp->dst_net) == ifID->ifThisNode.s_net)) { - dPrintf(D_M_DDP_LOW, D_L_ROUTING, - ("ddp_r_output: sending back home from port=%d socket=%d\n", - ifID->ifPort, ddp->dst_socket)); - - UAS_ASSIGN(ddp->checksum, 0); - ddp_input(mp, ifID); - return(0); - } - - NET_ASSIGN(at_dest.atalk_net, router_net); - at_dest.atalk_node = router_node; - - addr_flag = AT_ADDR_NO_LOOP; - addr = (char *)&at_dest; - dPrintf(D_M_DDP_LOW, D_L_ROUTING_AT, - ("ddp_r_output: AT_ADDR out port=%d net %d:%d via rte %d:%d", - ifID->ifPort, NET_VALUE(ddp->dst_net), ddp->dst_node, router_net, - router_node)); - break; - - case ET_ADDR : - addr_flag = ET_ADDR; - addr = (char *)enet_addr; - dPrintf(D_M_DDP_LOW, D_L_ROUTING, - ("ddp_r_output: ET_ADDR out port=%d net %d:%d\n", - ifID->ifPort, NET_VALUE(ddp->dst_net), ddp->dst_node)); - break; - } - - if (ifID->ifState == LAP_OFFLINE) { - gbuf_freel(mp); - return 0; - } - - fillin_pkt_chain(mp); - - { /* begin block */ - struct etalk_addr dest_addr; - struct atalk_addr dest_at_addr; - int loop = TRUE; /* flag to aarp to loopback (default) */ - - m = mp; - - /* the incoming frame is of the form {flag, address, ddp...} - * where "flag" indicates whether the address is an 802.3 - * (link) address, or an appletalk address. If it's an - * 802.3 address, the packet can just go out to the network - * through PAT, if it's an appletalk address, AT->802.3 address - * resolution needs to be done. - * If 802.3 address is known, strip off the flag and 802.3 - * address, and prepend 802.2 and 802.3 headers. - */ - - if (addr == NULL) { - addr_flag = *(u_char *)gbuf_rptr(m); - gbuf_rinc(m,1); - } - - switch (addr_flag) { - case AT_ADDR_NO_LOOP : - loop = FALSE; - /* pass thru */ - case AT_ADDR : - if (addr == NULL) { - dest_at_addr = *(struct atalk_addr *)gbuf_rptr(m); - gbuf_rinc(m,sizeof(struct atalk_addr)); - } else - dest_at_addr = *(struct atalk_addr *)addr; - break; - case ET_ADDR : - if (addr == NULL) { - dest_addr = *(struct etalk_addr *)gbuf_rptr(m); - gbuf_rinc(m,sizeof(struct etalk_addr)); - } else - dest_addr = *(struct etalk_addr *)addr; - break; - default : - dPrintf(D_M_DDP_LOW,D_L_ERROR, - ("ddp_router_output: Unknown addr_flag = 0x%x\n", addr_flag)); - - gbuf_freel(m); /* unknown address type, chuck it */ - return 0; - } - - m = gbuf_strip(m); - - /* At this point, rptr points to ddp header for sure */ - if (ifID->ifState == LAP_ONLINE_FOR_ZIP) { - /* see if this is a ZIP packet that we need - * to let through even though network is - * not yet alive!! - */ - if (zip_type_packet(m) == 0) { - gbuf_freel(m); - return 0; - } - } - - ifID->stats.xmit_packets++; - ifID->stats.xmit_bytes += gbuf_msgsize(m); - snmpStats.dd_outLong++; - - switch (addr_flag) { - case AT_ADDR_NO_LOOP : - case AT_ADDR : - /* - * we don't want elap to be looking into ddp header, so - * it doesn't know net#, consequently can't do - * AMT_LOOKUP. That task left to aarp now. - */ - aarp_send_data(m,ifID,&dest_at_addr, loop); - break; - case ET_ADDR : - pat_output(ifID, m, (unsigned char *)&dest_addr, 0); - break; - } - } /* end block */ - - return(0); -} /* ddp_router_output */ - -/*****************************************/ - -#ifdef AURP_SUPPORT - -void rt_delete(NetStop, NetStart) - unsigned short NetStop; - unsigned short NetStart; -{ - RT_entry *found; - - if ((found = rt_bdelete(NetStop, NetStart)) != 0) { - bzero(found, sizeof(RT_entry)); - found->right = RT_table_freelist; - RT_table_freelist = found; - } -} - -int ddp_AURPfuncx(code, param, node) - int code; - void *param; - unsigned char node; -{ - at_ifaddr_t *ifID; - int k; - - switch (code) { - case AURPCODE_DATAPKT: /* data packet */ - if (aurp_ifID) { - dPrintf(D_M_DDP, D_L_TRACE, ("ddp_AURPfuncx: data, 0x%x, %d\n", - (u_int) aurp_ifID, node)); - - ddp_input((gbuf_t *)param, aurp_ifID); - } else - gbuf_freem((gbuf_t *)param); - break; - - case AURPCODE_REG: /* register/deregister */ - if (!ROUTING_MODE) - return -1; - ddp_AURPsendx = (void(*)())param; - - if (param) { - /* register AURP callback function */ - if (aurp_ifID) - return 0; - for (k=(IFID_HOME+1); k < IF_TOTAL_MAX; k++) { - if (ifID_table[k] == 0) { - aurp_ifID = &at_interfaces[k]; - aurp_ifID->ifFlags = RTR_XNET_PORT; - ddp_add_if(aurp_ifID); - aurp_ifID->ifState = LAP_ONLINE; - aurp_ifID->ifRoutingState = PORT_ONLINE; - dPrintf(D_M_DDP, D_L_TRACE, - ("ddp_AURPfuncx: on, 0x%x\n", - (u_int) aurp_ifID)); - - ddp_AURPsendx(AURPCODE_DEBUGINFO, - &dbgBits, aurp_ifID->ifPort); - return 0; - } - } - return -1; - - } else { - /* deregister AURP callback function */ - if (aurp_ifID) { - rtmp_purge(aurp_ifID); - ddp_rem_if(aurp_ifID); - aurp_ifID->ifState = LAP_OFFLINE; - aurp_ifID->ifRoutingState = PORT_OFFLINE; - dPrintf(D_M_DDP, D_L_TRACE, - ("ddp_AURPfuncx: off, 0x%x\n", (u_int) aurp_ifID)); - aurp_ifID = 0; - } - } - break; - - case AURPCODE_AURPPROTO: /* proto type - AURP */ - if (aurp_ifID) { - aurp_ifID->ifFlags |= AT_IFF_AURP; - } - break; - } - - return 0; -} -#endif - -/* checks to see if address of packet is for one of our interfaces - returns *ifID if it's for us, NULL if not -*/ -at_ifaddr_t *forUs(ddp) - register at_ddp_t *ddp; -{ - at_ifaddr_t *ifID; - - TAILQ_FOREACH(ifID, &at_ifQueueHd, aa_link) { - if ((ddp->dst_node == ifID->ifThisNode.s_node) && - (NET_VALUE(ddp->dst_net) == ifID->ifThisNode.s_net) - ) { - dPrintf(D_M_DDP_LOW, D_L_ROUTING, - ("pkt was for port %d\n", ifID->ifPort)); - - return(ifID); - } - } - - return((at_ifaddr_t *)NULL); -} /* forUs */ diff --git a/bsd/netat/ddp.h b/bsd/netat/ddp.h deleted file mode 100644 index c270b6908..000000000 --- a/bsd/netat/ddp.h +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * - * ORIGINS: 82 - * - * (C) COPYRIGHT Apple Computer, Inc. 1992-1996 - * All Rights Reserved - * - */ - -#ifndef _NETAT_DDP_H_ -#define _NETAT_DDP_H_ -#include - -#ifdef __APPLE_API_OBSOLETE - -/* Header and data sizes */ - -#define DDP_HDR_SIZE 5 /* DDP (short) header size */ -#define DDP_X_HDR_SIZE 13 /* DDP extended header size */ -#define DDP_DATA_SIZE 586 /* Maximum DataGram data size */ -#define DDP_DATAGRAM_SIZE 599 /* Maximum DataGram size */ - -/* DDP socket definitions */ - -#define DDP_SOCKET_1st_RESERVED 1 /* First in reserved range */ -#define DDP_SOCKET_1st_EXPERIMENTAL 64 /* First in experimental range */ -#define DDP_SOCKET_1st_DYNAMIC 128 /* First in dynamic range */ -#define DDP_SOCKET_LAST 253 /* Last socket in any range */ - -/* DDP type used to replace "0" on packets sent out, for compatibility - with Open Transport */ -#define DEFAULT_OT_DDPTYPE 11 - -/* DDP well-known sockets */ - -#define RTMP_SOCKET 1 /* RTMP socket number */ -#define NBP_SOCKET 2 /* NIS socket number */ -#define EP_SOCKET 4 /* EP socket number */ -#define ZIP_SOCKET 6 /* ZIP socket number */ - -/* DDP extended header packet format */ - -typedef struct { -#if BYTE_ORDER == BIG_ENDIAN - unsigned unused:2, - hopcount:4, /* hop count/len high order */ - length_H:2; -#endif -#if BYTE_ORDER == LITTLE_ENDIAN - unsigned length_H:2, - hopcount:4, - unused:2; -#endif - u_char length_L; /* len low order */ - ua_short checksum; /* Checksum */ - at_net dst_net; /* Destination network number */ - at_net src_net; /* Source network number */ - at_node dst_node; /* Destination node ID */ - at_node src_node; /* Source node ID */ - at_socket dst_socket; /* Destination socket number */ - at_socket src_socket; /* Source socket number */ - u_char type; /* Protocol type */ - char data[DDP_DATA_SIZE]; -} at_ddp_t; - - -#define DDPLEN_ASSIGN(ddp, len) \ - ddp->length_H = 0x03 & (len >> 8); \ - ddp->length_L = len & 0xff; - -#define DDPLEN_VALUE(ddp) \ - (((u_short)ddp->length_H) << 8) + ddp->length_L - -/* DDP module statistics and configuration */ - -typedef struct at_ddp_stats { - /* General */ - - /* Receive stats */ - u_int rcv_bytes; - u_int rcv_packets; - u_int rcv_bad_length; - u_int rcv_unreg_socket; - u_int rcv_bad_socket; - u_int rcv_bad_checksum; - u_int rcv_dropped_nobuf; - - /* Transmit stats */ - u_int xmit_bytes; - u_int xmit_packets; - u_int xmit_BRT_used; - u_int xmit_bad_length; - u_int xmit_bad_addr; - u_int xmit_dropped_nobuf; -} at_ddp_stats_t; - - -/* DDP streams module ioctls */ - -#define AT_MID_DDP 203 - -#define DDP_IOC_MYIOCTL(i) ((i>>8) == AT_MID_DDP) -#define DDP_IOC_GET_CFG ((AT_MID_DDP<<8) | 1) - -#ifdef NOT_USED -#define DDP_IOC_BIND_SOCK ((AT_MID_DDP<<8) | 2) -#define DDP_IOC_GET_STATS ((AT_MID_DDP<<8) | 3) -#define DDP_IOC_LSTATUS_TABLE ((AT_MID_DDP<<8) | 4) -#define DDP_IOC_ULSTATUS_TABLE ((AT_MID_DDP<<8) | 5) -#define DDP_IOC_RSTATUS_TABLE ((AT_MID_DDP<<8) | 6) -#define DDP_IOC_SET_WROFF ((AT_MID_DDP<<8) | 7 ) -#define DDP_IOC_SET_OPTS ((AT_MID_DDP<<8) | 8 ) -#define DDP_IOC_GET_OPTS ((AT_MID_DDP<<8) | 9 ) -#define DDP_IOC_GET_SOCK ((AT_MID_DDP<<8) | 10) -#define DDP_IOC_GET_PEER ((AT_MID_DDP<<8) | 11) -#define DDP_IOC_SET_PEER ((AT_MID_DDP<<8) | 12) -#define DDP_IOC_SET_PROTO ((AT_MID_DDP<<8) | 13) -#endif - -#ifdef KERNEL_PRIVATE - -#define DDP_MIN_NETWORK 0x0001 -#define DDP_MAX_NETWORK 0xfffe -#define DDP_STARTUP_LOW 0xff00 -#define DDP_STARTUP_HIGH DDP_MAX_NETWORK - -typedef struct { - void **inputQ; - int *pidM; - char **socketM; - char *dbgBits; -} proto_reg_t; - -/* *** note: this counts on the src address always being that of the - home port - *** */ -#define FROM_US(ddp) ((NET_VALUE(ddp->src_net) ==\ - ifID_home->ifThisNode.s_net) && \ - ifID_home->ifThisNode.s_node == ddp->src_node) - -#define RT_LOOKUP_OKAY(ifID, ddp) \ - ((ROUTING_MODE && ifID->ifRoutingState==PORT_ONLINE) || \ - (MULTIHOME_MODE && FROM_US(ddp))) - -#ifdef NOT_YET -/* from sys_glue.c */ - -/* from ddp.c */ -int ddp_bind_socket(ddp_socket_t *socketp); -int ddp_close_socket(ddp_socket_t *socketp); - -/* from ddp_proto.c */ -int ddp_close(gref_t *gref); -void ddp_putmsg(gref_t *gref, gbuf_t *mp); -void ddp_stop(gbuf_t *mioc, gref_t *gref); - -/* in ddp_lap.c */ - -#endif /* NOT_YET */ - -void ddp_bit_reverse(unsigned char *); - -int ddp_pru_abort(struct socket *so); - -int ddp_pru_attach(struct socket *so, int proto, - struct proc *p); -int ddp_pru_bind(struct socket *so, struct sockaddr *nam, - struct proc *p); -int ddp_pru_connect(struct socket *so, struct sockaddr *nam, - struct proc *p); - -int ddp_pru_control(struct socket *so, u_long cmd, caddr_t data, - struct ifnet *ifp, struct proc *p); -int ddp_pru_detach(struct socket *so); -int ddp_pru_disconnect(struct socket *so); - -int ddp_pru_peeraddr(struct socket *so, - struct sockaddr **nam); - -int ddp_pru_send(struct socket *so, int flags, struct mbuf *m, - struct sockaddr *addr, struct mbuf *control, - struct proc *p); - -int ddp_pru_shutdown(struct socket *so); -int ddp_pru_sockaddr(struct socket *so, - struct sockaddr **nam); - -int ddp_output(gbuf_t **, at_socket , int ); -u_short ddp_checksum(gbuf_t *, int); -gbuf_t *ddp_compress_msg(gbuf_t *); - -struct at_ifaddr; -struct etalk_addr; - -int ddp_router_output( - gbuf_t *mp, - struct at_ifaddr *ifID, - int addr_type, - at_net_al router_net, - at_node router_node, - struct etalk_addr *enet_addr); - -struct at_ifaddr *forUs(at_ddp_t *); - -void zip_send_queries(struct at_ifaddr *, at_net_al, at_node); -int zip_handle_getmyzone(struct at_ifaddr *, gbuf_t *); -int zip_type_packet(gbuf_t *); -void zip_sched_getnetinfo (void *); - -int at_unreg_mcast(struct at_ifaddr *, caddr_t); -int at_reg_mcast(struct at_ifaddr *, caddr_t); - -int ddp_shutdown(int); - -void routing_needed(gbuf_t *, struct at_ifaddr *, char); - -int getPhysAddrSize(int); -int getAarpTableSize(int); - -int aarp_init1(struct at_ifaddr *); -int aarp_init2(struct at_ifaddr *); - -int getRtmpTableSize(void); - -void sethzonehash(struct at_ifaddr *); - -int ddp_add_if(struct at_ifaddr *); -void ddp_rem_if(struct at_ifaddr *); - -void ddp_brt_init(void); -void ddp_brt_shutdown(void); - -int setLocalZones(at_nvestr_t *, int); - -void ddp_brt_sweep(void); - - -#endif /* KERNEL_PRIVATE */ -#endif /* __APPLE_API_OBSOLETE */ -#endif /* _NETAT_DDP_H_ */ diff --git a/bsd/netat/ddp.save b/bsd/netat/ddp.save deleted file mode 100644 index 4fa9acea6..000000000 --- a/bsd/netat/ddp.save +++ /dev/null @@ -1,903 +0,0 @@ -/* From ddp.c: - - ddp_shrink_hdr() - ddp_extend_hdr() - - Saved from xnu/bsd/bsd/netat/ddp.c on 4/14/99. -*/ - -#ifdef NOT_USED -/* This routine shrinks the ddp header from long to short, - * It also prepends ALAP header and fills up some of the - * fields as appropriate. - */ -static at_ddp_short_t *ddp_shrink_hdr (mp) -register gbuf_t *mp; -{ - register at_ddp_t *ddp; - register at_ddp_short_t *ddp_short; - register at_llap_hdr_t *llap; - gbuf_t *newmp; - - if ((newmp = (gbuf_t *)gbuf_copym((gbuf_t *) mp)) == (gbuf_t *)NULL) - return ((at_ddp_short_t *)NULL); - gbuf_freem(mp); - mp = newmp; - - ddp = (at_ddp_t *)gbuf_rptr(mp); - gbuf_rinc(mp,((DDP_X_HDR_SIZE - DDP_HDR_SIZE) - LLAP_HDR_SIZE)); - llap = (at_llap_hdr_t *)gbuf_rptr(mp); - ddp_short = (at_ddp_short_t *)(gbuf_rptr(mp) + LLAP_HDR_SIZE); - - llap->destination = ddp->dst_node; - llap->type = LLAP_TYPE_DDP; - ddp_short->length = ddp->length - (DDP_X_HDR_SIZE - DDP_HDR_SIZE); - ddp_short->unused = 0; - return ((at_ddp_short_t *)mp); -} - - -/* mp points to message of the form {llap, short ddp, ...}. - * Get rid of llap, extend ddp header to make it of the form - * {extended ddp, ... } - */ -static gbuf_t *ddp_extend_hdr(mp) -register gbuf_t *mp; -{ - register at_llap_hdr_t *llap; - register at_ddp_short_t *ddp_short; - register at_ddp_t *ddp; - char buf[DDP_HDR_SIZE + LLAP_HDR_SIZE]; - gbuf_t *m1, *m2; - - /* We need to remove the llap header from the packet and extend the - * short DDP header in to a long one. 5 bytes of additional space - * is required in effect, but we can not afford to put these 5 bytes - * in a separate buffer, since the ddp buffer would end up being - * fragmented into two pieces, which is a no-no. So, we first get - * rid of the llap and ddp short headers and then add the extended - * header. - */ - - /* Assuming that the llap and ddp short headers are placed next - * to each other in the same buffer - */ - bcopy(gbuf_rptr(mp), buf, LLAP_HDR_SIZE + DDP_HDR_SIZE); - m1 = ddp_adjmsg(mp, LLAP_HDR_SIZE+DDP_HDR_SIZE) ? mp : 0; - - /* If the message did not contain any ddp data bytes, then m would - * be NULL at this point... and we can't just grow a NULL message, - * we need to ALLOC a new one. - */ - if (m1) { - if ((m2 = (gbuf_t *)ddp_growmsg(m1, -DDP_X_HDR_SIZE)) == NULL) { - dPrintf(D_M_DDP, D_L_WARNING, - ("Dropping packet - no bufs to extend hdr")); - at_ddp_stats.rcv_dropped_nobuf++; - gbuf_freem(m1); - return(NULL); - } - } else - /* Original message mp has already been freed by ddp_adjmsg if we - * managed to arrive here... this case occurs only when the - * message mp did not contain any ddp data bytes, only lap and - * ddp headers - */ - if ((m2 = gbuf_alloc(AT_WR_OFFSET+DDP_X_HDR_SIZE, PRI_MED)) == NULL) { - dPrintf(D_M_DDP,D_L_WARNING, - ("Packet (no data) dropped - no bufs to extend hdr")); - at_ddp_stats.rcv_dropped_nobuf++; - return(NULL); - } else { - gbuf_rinc(m2,AT_WR_OFFSET); - gbuf_wset(m2,DDP_X_HDR_SIZE); - } - - /* By the time we arrive here, m2 points to message of the form - * {Extended DDP, ... } - * mp and m1 are either non-existent or irrelevant. - */ - ddp = (at_ddp_t *)gbuf_rptr(m2); - llap = (at_llap_hdr_t *)buf; - ddp_short = (at_ddp_short_t *)(buf + LLAP_HDR_SIZE); - - ddp->unused = ddp->hopcount = 0; - ddp->length = ddp_short->length + DDP_X_HDR_SIZE - DDP_HDR_SIZE; - UAS_ASSIGN(ddp->checksum, 0); - NET_NET(ddp->dst_net, ifID_home->ifThisNode.atalk_net); - NET_NET(ddp->src_net, ifID_home->ifThisNode.atalk_net); - ddp->src_node = llap->source; - ddp->dst_node = llap->destination; - ddp->dst_socket = ddp_short->dst_socket; - ddp->src_socket = ddp_short->src_socket; - ddp->type = ddp_short->type; - return (m2); -} -#endif - -From sys_dep.c: - -#ifdef _AIX /* This AIX code (to the end of this file) is no longer supported. */ - -int ATsocket(proto) /* AIX version */ - int proto; -{ - int err, rc = -1; - - if (sys_ATsocket) - rc = (*sys_ATsocket)(proto, &err, 0); - else - err = ENXIO; - if (err) - setuerror(err); - return rc; -} - -int ATgetmsg(fd, ctlptr, datptr, flags) /* AIX version */ - int fd; - void *ctlptr; - void *datptr; - int *flags; -{ - int err, rc = -1; - - if (sys_ATgetmsg) - rc = (*sys_ATgetmsg)(fd, ctlptr, datptr, flags, &err, 0); - else - err = ENXIO; - if (err) - setuerror(err); - return rc; -} - -int ATputmsg(fd, ctlptr, datptr, flags) /* AIX version */ - int fd; - void *ctlptr; - void *datptr; - int flags; -{ - int err, rc = -1; - - if (sys_ATputmsg) - rc = (*sys_ATputmsg)(fd, ctlptr, datptr, flags, &err, 0); - else - err = ENXIO; - if (err) - setuerror(err); - return rc; -} - -int ATPsndreq(fd, buf, len, nowait) /* AIX version */ - int fd; - unsigned char *buf; - int len; - int nowait; -{ - int err, rc = -1; - - if (sys_ATPsndreq) - rc = (*sys_ATPsndreq)(fd, buf, len, nowait, &err, 0); - else - err = ENXIO; - if (err) - setuerror(err); - return rc; -} - -int ATPsndrsp(fd, respbuff, resplen, datalen) /* AIX version */ - int fd; - unsigned char *respbuff; - int resplen; - int datalen; -{ - int err, rc = -1; - - if (sys_ATPsndrsp) - rc = (*sys_ATPsndrsp)(fd, respbuff, resplen, datalen, &err, 0); - else - err = ENXIO; - if (err) - setuerror(err); - return rc; -} - -int ATPgetreq(fd, buf, buflen) /* AIX version */ - int fd; - unsigned char *buf; - int buflen; -{ - int err, rc = -1; - - if (sys_ATPgetreq) - rc = (*sys_ATPgetreq)(fd, buf, buflen, &err, 0); - else - err = ENXIO; - if (err) - setuerror(err); - return rc; -} - -int ATPgetrsp(fd, bdsp) /* AIX version */ - int fd; - unsigned char *bdsp; -{ - int err, rc = -1; - - if (sys_ATPgetrsp) - rc = (*sys_ATPgetrsp)(fd, bdsp, &err, 0); - else - err = ENXIO; - if (err) - setuerror(err); - return rc; -} - -void *atalk_kalloc(size) /* AIX version */ - int size; -{ - return (void *)xmalloc(size, 2, pinned_heap); -} - -void atalk_kfree(buf) /* AIX version */ - void *buf; -{ - xmfree(buf, pinned_heap); -} - -int atalk_closeref(fp, grefp) /* AIX version */ - struct file *fp; - gref_t **grefp; -{ - *grefp = (gref_t *)fp->f_data; - fp->f_data = 0; - return 0; -} - -int atalk_openref(gref, retfd, proc) /* AIX version */ - gref_t *gref; - int *retfd; - void *proc; -{ -extern int _ATrw(), _ATioctl(), _ATselect(), _ATclose(), _ATstat(); -static struct fileops fileops = {_ATrw, _ATioctl, _ATselect, _ATclose, _ATstat}; - int err, fd; - struct file *fp; - void *crp; - - crp = (void *)crref(); -#ifdef _AIX - if ((err = ufdcreate(FREAD|FWRITE, - &fileops, 0, DTYPE_OTHER, &fd, crp)) != 0) -#else - if ((err = ufdcreate(FREAD|FWRITE, - &fileops, 0, DTYPE_ATALK, &fd, crp)) != 0) -#endif - return err; - *retfd = fd; - fp = U.U_ufd[fd].fp; - fp->f_data = (void *)gref; - gref->next = (void *)fp; - return 0; -} - -int atalk_getref(fp, fd, grefp, proc) /* AIX version */ - struct file *fp; - int fd; - gref_t **grefp; - struct proc *proc; -{ - if (fp == 0) { - if ((fd < 0) || (fd > U.U_maxofile) || ((fp = U.U_ufd[fd].fp) == 0)) { - *grefp = (gref_t *)0; - return EBADF; - } - } - if ((*grefp = (gref_t *)fp->f_data) == 0) - return EBADF; - return 0; -} - -gbuf_t *gbuf_alloc(size, pri) /* AIX version */ - int size; - int pri; -{ - gbuf_t *m; - - m = (size > MHLEN) ? (gbuf_t *)m_getclustm(M_DONTWAIT, MSG_DATA, size) - : (gbuf_t *)m_gethdr(M_DONTWAIT, MSG_DATA); -#ifdef APPLETALK_DEBUG - kprintf("gbuf_alloc: for size = %d m=%x\n", size, m); -#endif - gbuf_next(m) = 0; - gbuf_cont(m) = 0; - gbuf_wset(m,0); - return m; -} - -void gbuf_freeb(m) /* AIX version */ - gbuf_t *m; -{ - if (m) - m_free(m); -} - -static struct trb *trb_freehead = 0; -static struct trb *trb_freetail = 0; -static struct trb *trb_pendhead = 0; -static int trb_cnt = 0; -static atlock_t trb_lock; - -static void atalk_rem_timeoutcf() /* AIX version */ -{ - register int s; - register struct trb *trb; - register struct trb *tmp_freehead, *tmp_pendhead; - - ATDISABLE(s, trb_lock); - tmp_freehead = trb_freehead; - trb_freehead = 0; - tmp_pendhead = trb_pendhead; - trb_pendhead = 0; - trb_cnt = 0; - ATENABLE(s, trb_lock); - while ((trb = tmp_pendhead) != 0) { - tmp_pendhead = trb->to_next; - while (tstop(trb)); - tfree(trb); - } - while ((trb = tmp_freehead) != 0) { - tmp_freehead = trb->to_next; - tfree(trb); - } - dPrintf(D_M_ATP,D_L_ERROR, "atalk: timer stopped!\n",0,0,0,0,0); -} - -static void atalk_timeoutcf(cnt) /* AIX version */ - int cnt; -{ - register int i; - register struct trb *trb; - - if (trb_freehead == 0) { - for (i=0; i < cnt-1; i++) { - trb = (struct trb *)talloc(); - trb->to_next = trb_freehead; - trb_freehead = trb; - if (!i) trb_freetail = trb; - trb_cnt++; - } - } - ATLOCKINIT(trb_lock); -} - -static void atalk_clock(trb) /* AIX version */ - register struct trb *trb; -{ - register int s; - register struct trb *next; - void (*tof)(); - void *arg; - - ATDISABLE(s, trb_lock); - if (trb_pendhead && trb->func) { - /* - * remove the timeout from the pending queue - */ - if (trb_pendhead == trb) - trb_pendhead = trb->to_next; - else { - for (next=trb_pendhead; next->to_next; next=next->to_next) { - if (next->to_next == trb) { - next->to_next = trb->to_next; - trb->func = 0; - break; - } - } - if (trb->func) { - dPrintf(D_M_ATP,D_L_WARNING, - "atalk_clock: %d,%x,%x\n", trb_cnt,trb,trb_pendhead,0,0); - /* - * we have not found the trb in the pending list - something - * has gone wrong here. maybe the trb has been returned to - * the free list; in which case, we should simply ignore - * this timeout event! - */ - for (next=trb_freehead; next; next=next->to_next) { - if (next == trb) - { - ATENABLE(s, trb_lock); - return; - } - } - /* - * the trb is not in the free list either - something has - * really gone wacky here! all we can do now is put the - * trb back into the free list and hope that it will be ok. - */ - trb->to_next = 0; - if (trb_freehead) - trb_freetail->to_next = trb; - else - trb_freehead = trb; - trb_freetail = trb; - trb_cnt++; - ATENABLE(s, trb_lock); - return; - } - } - - /* - * process the timeout - */ - trb->func = 0; - trb->to_next = 0; - tof = trb->tof; - trb->tof = 0; - arg = (void *)trb->func_data; - trb->func_data = 999; - if (trb_freehead) - trb_freetail->to_next = trb; - else - trb_freehead = trb; - trb_freetail = trb; - trb_cnt++; - ATENABLE(s, trb_lock); - if (tof) { - dPrintf(D_M_ATP,D_L_VERBOSE, "atalk_clock: func=%x, arg=%x, %d\n", - tof,arg,trb_cnt,0,0); - (*tof)(arg); - } else { - dPrintf(D_M_ATP,D_L_ERROR, "atalk_clock: func=%x, arg=%x, %d\n", - tof,arg,trb_cnt,0,0); - } - } else - ATENABLE(s, trb_lock); -} - -void *atalk_timeout(func, arg, ticks) /* AIX version */ - void (*func)(); - void *arg; - int ticks; -{ - register int s; - register struct trb *trb; - - dPrintf(D_M_ATP,D_L_VERBOSE, - "atalk_timeout: func=%x,arg=%x,time=%d, %d,%x\n", func,arg,ticks,trb_cnt,trb_pendhead); - /* - * set up the timeout request - */ - ATDISABLE(s, trb_lock); - if ((trb = trb_freehead) == 0) { - ATENABLE(s, trb_lock); - dPrintf(D_M_ATP,D_L_WARNING, - "atalk_timeout: NO TRB! time=%d, %d\n", ticks,trb_cnt,0,0,0); - return 0; - } - trb_freehead = trb->to_next; - trb->to_next = trb_pendhead; - trb_pendhead = trb; - trb_cnt--; - trb->timeout.it_value.tv_sec = ticks / HZ; - trb->timeout.it_value.tv_nsec = (ticks % HZ) * (NS_PER_SEC / HZ); - trb->knext = 0; - trb->kprev = 0; - trb->flags = 0; - trb->tof = func; - trb->func = (void (*)())atalk_clock; - trb->func_data = (ulong)arg; - trb->ipri = PL_IMP; - trb->id = -1; - - /* - * start the timeout - */ - ATENABLE(s, trb_lock); - tstart(trb); - return (void *)trb; -} - -void atalk_untimeout(func, arg, trb) /* AIX version */ - void (*func)(); - void *arg; - register struct trb *trb; -{ - register int s; - register struct trb *next; - - dPrintf(D_M_ATP,D_L_VERBOSE, - "atalk_untimeout: func=%x,arg=%x, %d\n", func,arg,trb_cnt,0,0); - - ATDISABLE(s, trb_lock); - if (trb == 0) { - for (trb=trb_pendhead; trb; trb=trb->to_next) { - if ((func == trb->tof) && (arg == (void *)trb->func_data)) - break; - } - } - if (trb && (trb->func == (void (*)())atalk_clock) - && (func == trb->tof) && (arg == (void *)trb->func_data)) { - trb->func_data = 999; - if (!(trb->flags & T_PENDING)) - { - trb->tof = 0; - ATENABLE(s, trb_lock); - return; - } - trb->func = 0; - while (tstop(trb)); - if (trb_pendhead == trb) - trb_pendhead = trb->to_next; - else { - for (next=trb_pendhead; next->to_next != trb; next=next->to_next) { - if (next->to_next == 0) { - ATENABLE(s, trb_lock); - dPrintf(D_M_ATP,D_L_WARNING, - "atalk_untimeout: UNKNOWN TRB %x...\n",trb,0,0,0,0); - return; - } - } - next->to_next = trb->to_next; - } - trb->to_next = 0; - trb_freetail->to_next = trb; - trb_freetail = trb; - trb_cnt++; - } - ATENABLE(s, trb_lock); -} - -int config_atalk(dev, cmd, uiop) /* AIX only */ -dev_t dev; -int cmd; -void *uiop; -{ - static int loaded = 0; - int err, nest; - - err = 0; - nest = lockl(&kernel_lock, LOCK_SHORT); - - if (cmd == CFG_INIT) { - if (loaded) - goto out; - vm_protect(0, 4096, 3); - atalk_timeoutcf(256); - atalk_load(); - loaded = 1; - - } else if (cmd == CFG_TERM) { - if (!loaded) - goto out; - atalk_rem_timeoutcf(); - atalk_unload(); - loaded = 0; - - } else - err = EINVAL; - -out: - if (nest != LOCK_NEST) - unlockl(&kernel_lock); - return(err); -} - -#endif - -From sys_glue.c: - -#ifdef _AIX /* AIX code, to the end of this file, is no longer supported. */ - -int _ATselect(fp, corl, reqevents, retevents, notify) /* AIX version */ - void *fp; - int corl; - unsigned short reqevents; - unsigned short *retevents; - void (*notify)(); -{ - int s, err, rc = 0; - gref_t *gref; - unsigned short sevents = 0; - - if ((err = atalk_getref(fp, 0, &gref, 0)) != 0) - return err; - - ATDISABLE(s, gref->lock); - if (reqevents & POLLIN) { - if (gref->rdhead || (gref->readable && (*gref->readable)(gref))) - sevents |= POLLIN; - } - - if (reqevents & POLLOUT) { - if (gref->writeable) { - if ((*gref->writeable)(gref)) - sevents |= POLLOUT; - } else - sevents |= POLLOUT; - } - - if ((sevents == 0) && ((reqevents & POLLSYNC) == 0)) { - if (rc = selreg(corl, 99, gref, reqevents, notify)) { - ATENABLE(s, gref->lock); - goto l_done; - } - - if (reqevents & POLLIN) { - if (gref->rdhead || (gref->readable && (*gref->readable)(gref))) - sevents |= POLLIN; - else - gref->sevents |= POLLIN; - } - - if (reqevents & POLLOUT) { - if (gref->writeable) { - if ((*gref->writeable)(gref)) - sevents |= POLLOUT; - else - gref->sevents |= POLLOUT; - } else - sevents |= POLLOUT; - } - } - ATENABLE(s, gref->lock); - *retevents = sevents; - -l_done: - return rc; -} -#endif /* end AIX section */ - -From drv_dep.c: - - - - -#ifdef _AIX -/* AIX section to end of file (not supported) */ - -/* from beginning of file ... */ -#include -#include -static struct ns_8022 elap_link; /* The SNAP header description */ -static struct ns_user elap_user; /* The interface to the demuxer */ - -int -pat_ifpresent(name) /* AIX */ - char *name; -{ - return (int)ifunit(name); -} - -int -pat_output(pat_id, mlist, dst_addr, type) /* AIX */ - int pat_id; - gbuf_t *mlist; - unsigned char *dst_addr; - int type; -{ - int len; - pat_unit_t *patp; - gbuf_t *m, *m_prev, *new_mlist, *m_temp; - struct ndd *nddp; - short size; - enet_header_t *enet_header; - llc_header_t *llc_header; - - patp = (pat_unit_t *)&pat_units[pat_id]; - if (patp->state != PAT_ONLINE) { - gbuf_freel(mlist); - return ENOTREADY; - } - - if (patp->xtype == IFTYPE_NULLTALK) { - gbuf_freel(mlist); - return 0; - } - - nddp = (void *)patp->nddp; - new_mlist = 0; - - for (m = mlist; m; m = mlist) { - mlist = gbuf_next(m); - gbuf_next(m) = 0; - - gbuf_prepend(m,ENET_LLC_SIZE); - if (m == 0) { - if (mlist) - gbuf_freel(mlist); - if (new_mlist) - gbuf_freel(new_mlist); - return 0; - } - - enet_header = (enet_header_t *)gbuf_rptr(m); - bcopy(dst_addr, enet_header->dst, sizeof(enet_header->dst)); - bcopy(patp->xaddr, enet_header->src, sizeof(enet_header->src)); - size = gbuf_msgsize(m); - enet_header->len = size - sizeof(enet_header_t); - llc_header = (llc_header_t *)(gbuf_rptr(m)+sizeof(enet_header_t)); - *llc_header = (type == AARP_AT_TYPE) ? snap_hdr_aarp : snap_hdr_at; - - m->m_pkthdr.len = size; - m->m_pkthdr.rcvif = 0; - - if (new_mlist) - gbuf_next(m_prev) = m; - else - new_mlist = m; - m_prev = m; - pktsOut++; - } - - if (new_mlist) - (*nddp->ndd_output)(nddp, new_mlist); - - return 0; -} - -int -pat_online (ifName, ifType) /* AIX */ - char *ifName; - char *ifType; -{ - void pat_input(); - int pat_id; - pat_unit_t *patp; - struct ndd *nddp; - char ns_name[8]; - - if ((pat_id = pat_ID(ifName)) == -1) - return (-1); - patp = &pat_units[pat_id]; - - if (patp->xtype == IFTYPE_ETHERTALK) { - ns_name[0] = ifName[0]; - ns_name[1] = 'n'; - strcpy(&ns_name[2], &ifName[1]); - } else if (patp->xtype == IFTYPE_NULLTALK) { - patp->xaddrlen = 6; - bzero(patp->xaddr, patp->xaddrlen); - if (ifType) - *ifType = patp->xtype; - patp->nddp = (void *)0; - patp->state = PAT_ONLINE; - at_statep->flags |= AT_ST_IF_CHANGED; - return (pat_id); - } else - return -1; - - if (ns_alloc(ns_name, &nddp)) - return -1; - - bzero(&elap_user, sizeof(elap_user)); - elap_user.isr = pat_input; - elap_user.pkt_format = NS_HANDLE_HEADERS|NS_INCLUDE_MAC; - - elap_link.filtertype = NS_8022_LLC_DSAP_SNAP; - elap_link.orgcode[0] = 0; - elap_link.orgcode[2] = 0; - elap_link.dsap = DSAP_SNAP; - elap_link.ethertype = 0x80f3; /* AARP SNAP code */ - if (ns_add_filter(nddp, &elap_link, sizeof(elap_link), &elap_user)) - return -1; - - elap_link.orgcode[0] = 0x08; - elap_link.orgcode[2] = 0x07; - elap_link.ethertype = 0x809b; /* DDP SNAP code */ - if (ns_add_filter(nddp, &elap_link, sizeof(elap_link), &elap_user)) { - elap_link.orgcode[0] = 0; - elap_link.orgcode[2] = 0; - elap_link.ethertype = 0x80f3; /* AARP SNAP code */ - (void)ns_del_filter(nddp, &elap_link, sizeof(elap_link)); - return -1; - } - - patp->xaddrlen = nddp->ndd_addrlen; - bcopy(nddp->ndd_physaddr, patp->xaddr, patp->xaddrlen); - - if (ifType) - *ifType = patp->xtype; - - patp->nddp = (void *)nddp; - patp->state = PAT_ONLINE; - at_statep->flags |= AT_ST_IF_CHANGED; - - return (pat_id); -} - -void -pat_offline(pat_id) /* AIX */ - int pat_id; -{ - pat_unit_t *patp = &pat_units[pat_id]; - - if (patp->state == PAT_ONLINE) { - if (patp->xtype != IFTYPE_NULLTALK) { - elap_link.filtertype = NS_8022_LLC_DSAP_SNAP; - elap_link.orgcode[0] = 0; - elap_link.orgcode[2] = 0; - elap_link.dsap = DSAP_SNAP; - elap_link.ethertype = 0x80f3; /* AARP SNAP code */ - (void)ns_del_filter(patp->nddp, &elap_link, sizeof(elap_link)); - elap_link.orgcode[0] = 0x08; - elap_link.orgcode[2] = 0x07; - elap_link.ethertype = 0x809b; /* DDP SNAP code */ - (void)ns_del_filter(patp->nddp, &elap_link, sizeof(elap_link)); - ns_free(patp->nddp); - } - at_statep->flags |= AT_ST_IF_CHANGED; - bzero(patp, sizeof(pat_unit_t)); - } -} - -int -pat_mcast(pat_id, control, data) /* AIX */ - int pat_id; - int control; - unsigned char *data; -{ - struct ndd *nddp; - - nddp = (struct ndd *)pat_units[pat_id].nddp; - return (*nddp->ndd_ctl)(nddp, (control == PAT_REG_MCAST) ? - NDD_ENABLE_ADDRESS : NDD_DISABLE_ADDRESS, - data, nddp->ndd_addrlen); -} - -void -pat_input(nddp, m, unused) /* AIX */ - struct ndd *nddp; - gbuf_t *m; - void *unused; -{ - extern int ddprunning_flag; - llc_header_t *llc_header; - int pat_id; - pat_unit_t *patp; - char src[6]; - enet_header_t *enet_header = (enet_header_t *)gbuf_rptr(m); - - for (pat_id=0, patp = &pat_units[pat_id]; - pat_id < xpatcnt; pat_id++, patp++) { - if ((patp->state == PAT_ONLINE) && (patp->nddp == nddp)) - break; - } - if (pat_id == xpatcnt) { - gbuf_freem(m); - return; - } - - /* Ignore multicast packets from local station */ - if (patp->xtype == IFTYPE_ETHERTALK) { - bcopy((char *)enet_header->src, src, sizeof(src)); - if ((enet_header->dst[0] & 1) && - (bcmp(src, patp->xaddr, sizeof(src)) == 0)) { - gbuf_freem(m); - return; - } - llc_header = (llc_header_t *)(enet_header+1); - } - - gbuf_rinc(m,(ENET_LLC_SIZE)); - (void)fetch_and_add((atomic_p)&ddprunning_flag, 1); - pktsIn++; - if (LLC_PROTO_EQUAL(llc_header->protocol,snap_proto_aarp)) { - patp->aarp_func(gbuf_rptr(m), patp->context); - gbuf_freem(m); - } else if (LLC_PROTO_EQUAL(llc_header->protocol,snap_proto_ddp)) { - /* if we're a router take all pkts */ - if (!ROUTING_MODE) { - if (patp->addr_check(gbuf_rptr(m), patp->context) - == AARP_ERR_NOT_OURS) { - gbuf_freem(m); - (void)fetch_and_add((atomic_p)&ddprunning_flag, -1); - return; - } - } - gbuf_set_type(m, MSG_DATA); - elap_input(m, patp->context, src); - } else - gbuf_freem(m); - (void)fetch_and_add((atomic_p)&ddprunning_flag, -1); -} -#endif /* AIX */ diff --git a/bsd/netat/ddp_aarp.c b/bsd/netat/ddp_aarp.c deleted file mode 100644 index 09e1fc460..000000000 --- a/bsd/netat/ddp_aarp.c +++ /dev/null @@ -1,991 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* Copyright (c) 1988, 1989, 1997, 1998 Apple Computer, Inc. - * - * Modified for MP, 1996 by Tuyen Nguyen - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - */ - -/* at_aarp.c: 2.0, 1.17; 10/4/93; Apple Computer, Inc. */; - -/* This file is at_aarp.c and it contains all the routines used by AARP. This - * is part of the LAP layer. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -static int probing; -/* Following two variables are used to keep track of how many dynamic addresses - * we have tried out at startup. - */ -int no_of_nodes_tried; /* no of node addresses we've tried - * so far, within a network number - */ -int no_of_nets_tried; /* no. of network numbers tried - */ - -struct etalk_addr et_zeroaddr = { - {0, 0, 0, 0, 0, 0}}; - -aarp_amt_t probe_cb; -aarp_amt_array *aarp_table[IF_TOTAL_MAX]; - - -StaticProc int aarp_req_cmd_in(aarp_pkt_t *, at_ifaddr_t*); -StaticProc int aarp_resp_cmd_in(aarp_pkt_t *, at_ifaddr_t*); -StaticProc int aarp_probe_cmd_in(aarp_pkt_t *, at_ifaddr_t*); -StaticProc int aarp_send_resp(at_ifaddr_t *, aarp_pkt_t *); -StaticProc int aarp_send_req(aarp_amt_t *); -StaticProc int aarp_send_probe(void); -StaticProc aarp_amt_t *aarp_lru_entry(aarp_amt_t *); -StaticProc int aarp_glean_info(aarp_pkt_t *, at_ifaddr_t*); -StaticProc int aarp_delete_amt_info(aarp_amt_t *); -StaticProc void aarp_build_pkt(aarp_pkt_t *, at_ifaddr_t*); -StaticProc void aarp_sched_req(void *); -StaticProc int aarp_get_rand_node(at_ifaddr_t *); -StaticProc int aarp_get_next_node(at_ifaddr_t *); -StaticProc int aarp_get_rand_net(at_ifaddr_t *); - -/**************************************************************************** - * aarp_init() - * - ****************************************************************************/ - -int aarp_init1(elapp) - register at_ifaddr_t *elapp; -{ - elapp->ifThisNode.s_net = 0; - elapp->ifThisNode.s_node = 0; - - if (probing != PROBE_TENTATIVE) /* How do I set the initial probe */ - probing = PROBE_IDLE; /* state ???*/ - else { - dPrintf(D_M_AARP,D_L_ERROR, - ("aarp_init: error :probing == PROBE_TENTATIVE\n")); - return(-1); - } - - /* pick a random addr or start with what we have from initial_node addr */ - if (elapp->initial_addr.s_net == 0 && elapp->initial_addr.s_node == 0) { - dPrintf(D_M_AARP, D_L_INFO, - ("aarp_init: pick up a new node number\n")); - aarp_get_rand_node(elapp); - aarp_get_rand_net(elapp); - } - probe_cb.elapp = elapp; - probe_cb.no_of_retries = 0; - probe_cb.error = 0; - - no_of_nodes_tried = 0; /* haven't tried any addresses yet */ - no_of_nets_tried = 0; - - if (aarp_send_probe() == -1) { - probing = PROBE_IDLE; /* not probing any more */ - dPrintf(D_M_AARP, D_L_ERROR, - ("aarp_init: aarp_send_probe returns error\n")); - return(-1); - } - return(ENOTREADY); -} - -int aarp_init2(elapp) - register at_ifaddr_t *elapp; -{ - if (probe_cb.error != 0) { - probing = PROBE_IDLE; /* not probing any more */ - dPrintf(D_M_AARP, D_L_ERROR, - ("aarp_init: probe_cb.error creates error =%d\n", - probe_cb.error)); - return(-1); - } - - if (aarp_table[elapp->ifPort]) - bzero ((caddr_t)&aarp_table[elapp->ifPort]->et_aarp_amt[0], - sizeof(aarp_amt_array)); - else - return(-1); - - elapp->ifThisNode = elapp->initial_addr; - probing = PROBE_DONE; - - /* AppleTalk was successfully started up. Send event with node and net. */ - atalk_post_msg(elapp->aa_ifp, KEV_ATALK_ENABLED, &(elapp->ifThisNode), 0); - - /* Set global flag */ - at_state.flags |= AT_ST_STARTED; - - return(0); -} - -/**************************************************************************** - * aarp_rcv_pkt() - * - * remarks : - * (1) The caller must take care of freeing the real storage (gbuf) - * (2) The incoming packet is of the form {802.3, 802.2, aarp}. - * - ****************************************************************************/ -int aarp_rcv_pkt(pkt, elapp) - aarp_pkt_t *pkt; - at_ifaddr_t *elapp; -{ - switch (ntohs(pkt->aarp_cmd)) { - case AARP_REQ_CMD: - return (aarp_req_cmd_in (pkt, elapp)); - case AARP_RESP_CMD: - return (aarp_resp_cmd_in (pkt, elapp)); - case AARP_PROBE_CMD: - return (aarp_probe_cmd_in (pkt, elapp)); - default: - return (-1); - }/* end of switch*/ -} - -/**************************************************************************** - * aarp_req_cmd_in() - * - ****************************************************************************/ -StaticProc int aarp_req_cmd_in (pkt, elapp) -aarp_pkt_t *pkt; -at_ifaddr_t *elapp; -{ -/* - kprintf("aarp_req_cmd_in: ifThisNode=%d:%d srcNode=%d:%d dstNode=%d:%d\n", - elapp->ifThisNode.s_net, - elapp->ifThisNode.s_node, - NET_VALUE(pkt->src_at_addr.atalk_net), - pkt->src_at_addr.atalk_node, - NET_VALUE(pkt->dest_at_addr.atalk_net), - pkt->dest_at_addr.atalk_node); -*/ - if ((probing == PROBE_DONE) && - (NET_VALUE(pkt->dest_at_addr.atalk_net) == elapp->ifThisNode.s_net) && - (pkt->dest_at_addr.atalk_node == elapp->ifThisNode.s_node)) { - if (aarp_send_resp(elapp, pkt) == -1) - return(-1); - } - /* now to glean some info */ - aarp_glean_info(pkt, elapp); - return (0); -} - - - -/**************************************************************************** - * aarp_resp_cmd_in() - * - ****************************************************************************/ -StaticProc int aarp_resp_cmd_in (pkt, elapp) - aarp_pkt_t *pkt; - at_ifaddr_t *elapp; -{ - register aarp_amt_t *amt_ptr; - gbuf_t *m; - - switch (probing) { - case PROBE_TENTATIVE : - if ((NET_VALUE(pkt->src_at_addr.atalk_net) == - probe_cb.elapp->initial_addr.s_net) && - (pkt->src_at_addr.atalk_node == - probe_cb.elapp->initial_addr.s_node)) { - - /* this is a response to AARP_PROBE_CMD. There's - * someone out there with the address we desire - * for ourselves. - */ - untimeout(aarp_sched_probe, 0); - probe_cb.no_of_retries = 0; - aarp_get_next_node(probe_cb.elapp); - no_of_nodes_tried++; - - if (no_of_nodes_tried == AARP_MAX_NODES_TRIED) { - aarp_get_rand_net(probe_cb.elapp); - aarp_get_rand_node(probe_cb.elapp); - no_of_nodes_tried = 0; - no_of_nets_tried++; - } - if (no_of_nets_tried == AARP_MAX_NETS_TRIED) { - /* We have tried enough nodes and nets, give up. - */ - probe_cb.error = EADDRNOTAVAIL; - AARPwakeup(&probe_cb); - return(0); - } - if (aarp_send_probe() == -1) { - /* expecting aarp_send_probe to fill in - * probe_cb.error - */ - AARPwakeup(&probe_cb); - return(-1); - } - } else { - /* hmmmm! got a response packet while still probing - * for AT address and the AT dest address doesn't - * match!! - * What should I do here?? kkkkkkkkk - */ - return(-1); - } - break; - - case PROBE_DONE : - AMT_LOOK(amt_ptr, pkt->src_at_addr, elapp) - if (amt_ptr == NULL) - return(-1); - if (amt_ptr->tmo) { - untimeout(aarp_sched_req, amt_ptr); - amt_ptr->tmo = 0; - } - - if (amt_ptr->m == NULL) { - /* this may be because of a belated response to - * aarp reaquest. Based on an earlier response, we - * might have already sent the packet out, so - * there's nothing to send now. This is okay, no - * error. - */ - return(0); - } - amt_ptr->dest_addr = pkt->src_addr; - if (FDDI_OR_TOKENRING(elapp->aa_ifp->if_type)) - ddp_bit_reverse((unsigned char *)&amt_ptr->dest_addr); - m = amt_ptr->m; - amt_ptr->m = NULL; - pat_output(amt_ptr->elapp, m, - (unsigned char *)&amt_ptr->dest_addr, 0); - break; - default : - /* probing in a weird state?? */ - return(-1); - } - return(0); -} - - - -/**************************************************************************** - * aarp_probe_cmd_in() - * - ****************************************************************************/ -StaticProc int aarp_probe_cmd_in (pkt, elapp) -register aarp_pkt_t *pkt; -at_ifaddr_t *elapp; -{ - register aarp_amt_t *amt_ptr; - - switch (probing) { - case PROBE_TENTATIVE : - if ((elapp == probe_cb.elapp) && - (NET_VALUE(pkt->src_at_addr.atalk_net) == - probe_cb.elapp->initial_addr.s_net) && - (pkt->src_at_addr.atalk_node == - probe_cb.elapp->initial_addr.s_node)) { - /* some bozo is probing for address I want... and I - * can't tell him to shove off! - */ - untimeout(aarp_sched_probe, 0); - probe_cb.no_of_retries = 0; - aarp_get_next_node(probe_cb.elapp); - no_of_nodes_tried++; - - if (no_of_nodes_tried == AARP_MAX_NODES_TRIED) { - aarp_get_rand_net(probe_cb.elapp); - aarp_get_rand_node(probe_cb.elapp); - no_of_nodes_tried = 0; - no_of_nets_tried++; - } - if (no_of_nets_tried == AARP_MAX_NETS_TRIED) { - /* We have tried enough nodes and nets, give up. - */ - probe_cb.error = EADDRNOTAVAIL; - AARPwakeup(&probe_cb); - return(0); - } - if (aarp_send_probe() == -1) { - /* expecting aarp_send_probe to fill in - * probe_cb.error - */ - AARPwakeup(&probe_cb); - return(-1); - } - } else { - /* somebody's probing... none of my business yet, so - * just ignore the packet - */ - return (0); - } - break; - - case PROBE_DONE : - if ((NET_VALUE(pkt->src_at_addr.atalk_net) == elapp->ifThisNode.s_net) && - (pkt->src_at_addr.atalk_node == elapp->ifThisNode.s_node)) { - if (aarp_send_resp(elapp, pkt) == -1) - return (-1); - return (0); - } - AMT_LOOK(amt_ptr, pkt->src_at_addr, elapp); - - if (amt_ptr) - aarp_delete_amt_info(amt_ptr); - break; - default : - /* probing in a weird state?? */ - return (-1); - } - return (0); -} - - - -/**************************************************************************** - * aarp_chk_addr() - ****************************************************************************/ -int aarp_chk_addr(ddp_hdrp, elapp) - at_ddp_t *ddp_hdrp; - at_ifaddr_t *elapp; -{ - if ((ddp_hdrp->dst_node == elapp->ifThisNode.s_node) && - (NET_VALUE(ddp_hdrp->dst_net) == elapp->ifThisNode.s_net)) { - return(0); /* exact match in address */ - } - - if (AARP_BROADCAST(ddp_hdrp, elapp)) { - return(0); /* some kind of broadcast address */ - } - return (AARP_ERR_NOT_OURS); /* not for us */ -} - - - -/**************************************************************************** - * aarp_send_data() - * - * remarks : - * 1. The message coming in would be of the form {802.3, 802.2, ddp,...} - * - * 2. The message coming in would be freed here if transmission goes - * through okay. If an error is returned by aarp_send_data, the caller - * can assume that the message is not freed. The exception to - * this scenario is the prepended atalk_addr field. This field - * will ALWAYS be removed. If the message is dropped, - * it's not an "error". - * - * Parameter dest_at_addr must have the net # in network byte order - ****************************************************************************/ - -int aarp_send_data(m, elapp, dest_at_addr, loop) - register gbuf_t *m; - register at_ifaddr_t *elapp; - struct atalk_addr *dest_at_addr; /* net# in network byte order */ - int loop; /* if true, loopback broadcasts */ -{ - register aarp_amt_t *amt_ptr; - register at_ddp_t *ddp_hdrp; - int error; - struct timeval timenow; - getmicrouptime(&timenow); - - if (gbuf_len(m) <= 0) - ddp_hdrp = (at_ddp_t *)gbuf_rptr(gbuf_cont(m)); - else - ddp_hdrp = (at_ddp_t *)gbuf_rptr(m); - - if ((ddp_hdrp->dst_node == ddp_hdrp->src_node) && - (NET_VALUE(ddp_hdrp->dst_net) == NET_VALUE(ddp_hdrp->src_net))) { - /* - * we're sending to ourselves - * so loop it back upstream - */ - ddp_input(m, elapp); - return(0); - } - AMT_LOOK(amt_ptr, *dest_at_addr, elapp); - - - if (amt_ptr) { - if (amt_ptr->m) { - /* - * there's already a packet awaiting transmission, so - * drop this one and let the upper layer retransmit - * later. - */ - gbuf_freel(m); - return (0); - } - return (pat_output(elapp, m, - (unsigned char *)&amt_ptr->dest_addr, 0)); - } - /* - * either this is a packet to be broadcasted, or the address - * resolution needs to be done - */ - if (AARP_BROADCAST(ddp_hdrp, elapp)) { - gbuf_t *newm = 0; - struct etalk_addr *dest_addr; - - dest_addr = &elapp->cable_multicast_addr; - if (loop) - newm = (gbuf_t *)gbuf_dupm(m); - - if ( !(error = pat_output(elapp, m, - (unsigned char *)dest_addr, 0))) { - /* - * The message transmitted successfully; - * Also loop a copy back up since this - * is a broadcast message. - */ - if (loop) { - if (newm == NULL) - return (error); - ddp_input(newm, elapp); - } /* endif loop */ - } else { - if (newm) - gbuf_freem(newm); - } - return (error); - } - NEW_AMT(amt_ptr, *dest_at_addr, elapp) - - if (amt_ptr->m) { - /* - * no non-busy slots available in the cache, so - * drop this one and let the upper layer retransmit - * later. - */ - gbuf_freel(m); - return (0); - } - amt_ptr->dest_at_addr = *dest_at_addr; - amt_ptr->dest_at_addr.atalk_unused = 0; - - getmicrouptime(&timenow); - amt_ptr->last_time = timenow.tv_sec; - amt_ptr->m = m; - amt_ptr->elapp = elapp; - amt_ptr->no_of_retries = 0; - - if ((error = aarp_send_req(amt_ptr))) { - aarp_delete_amt_info(amt_ptr); - return(error); - } - return(0); -} - - - -/**************************************************************************** - * aarp_send_resp() - * - * remarks : - * The pkt being passed here is only to "look at". It should neither - * be used for transmission, nor freed. Its contents also must not be - * altered. - * - ****************************************************************************/ -StaticProc int aarp_send_resp(elapp, pkt) - register at_ifaddr_t *elapp; - aarp_pkt_t *pkt; -{ - register aarp_pkt_t *new_pkt; - register gbuf_t *m; - - if ((m = gbuf_alloc(AT_WR_OFFSET+sizeof(aarp_pkt_t), PRI_MED)) == NULL) { - return (-1); - } - gbuf_rinc(m,AT_WR_OFFSET); - gbuf_wset(m,0); - - new_pkt = (aarp_pkt_t *)gbuf_rptr(m); - aarp_build_pkt(new_pkt, elapp); - - new_pkt->aarp_cmd = htons(AARP_RESP_CMD); - new_pkt->dest_addr = pkt->src_addr; - - new_pkt->dest_at_addr = pkt->src_at_addr; - new_pkt->dest_at_addr.atalk_unused = 0; - - ATALK_ASSIGN(new_pkt->src_at_addr, elapp->ifThisNode.s_net, - elapp->ifThisNode.s_node, 0); - - gbuf_winc(m,sizeof(aarp_pkt_t)); - if (FDDI_OR_TOKENRING(elapp->aa_ifp->if_type)) - ddp_bit_reverse((unsigned char *)&new_pkt->dest_addr); - - if (pat_output(elapp, m, (unsigned char *)&new_pkt->dest_addr, - AARP_AT_TYPE)) - return(-1); - return(0); -} - - - -/**************************************************************************** - * aarp_send_req() - * - ****************************************************************************/ - -StaticProc int aarp_send_req (amt_ptr) -register aarp_amt_t *amt_ptr; -{ - register aarp_pkt_t *pkt; - register gbuf_t *m; - int error; - - if ((m = gbuf_alloc(AT_WR_OFFSET+sizeof(aarp_pkt_t), PRI_MED)) == NULL) { - return (ENOBUFS); - } - gbuf_rinc(m,AT_WR_OFFSET); - gbuf_wset(m,0); - - pkt = (aarp_pkt_t *)gbuf_rptr(m); - aarp_build_pkt(pkt, amt_ptr->elapp); - - pkt->aarp_cmd = htons(AARP_REQ_CMD); - pkt->dest_addr = et_zeroaddr; - pkt->dest_at_addr = amt_ptr->dest_at_addr; - pkt->dest_at_addr.atalk_unused = 0; - ATALK_ASSIGN(pkt->src_at_addr, amt_ptr->elapp->ifThisNode.s_net, - amt_ptr->elapp->ifThisNode.s_node, 0); - gbuf_winc(m,sizeof(aarp_pkt_t)); - - amt_ptr->no_of_retries++; - timeout(aarp_sched_req, amt_ptr, AARP_REQ_TIMER_INT); - amt_ptr->tmo = 1; - error = pat_output(amt_ptr->elapp, m, - (unsigned char *)&amt_ptr->elapp->cable_multicast_addr, AARP_AT_TYPE); - if (error) - { - untimeout(aarp_sched_req, amt_ptr); - amt_ptr->tmo = 0; - return(error); - } - - return(0); -} - - - -/**************************************************************************** - * aarp_send_probe() - * - ****************************************************************************/ -StaticProc int aarp_send_probe(void) -{ - register aarp_pkt_t *pkt; - register gbuf_t *m; - - if ((m = gbuf_alloc(AT_WR_OFFSET+sizeof(aarp_pkt_t), PRI_MED)) == NULL) { - probe_cb.error = ENOBUFS; - return (-1); - } - gbuf_rinc(m,AT_WR_OFFSET); - gbuf_wset(m,0); - pkt = (aarp_pkt_t *)gbuf_rptr(m); - aarp_build_pkt(pkt, probe_cb.elapp); - - pkt->aarp_cmd = htons(AARP_PROBE_CMD); - pkt->dest_addr = et_zeroaddr; - - ATALK_ASSIGN(pkt->src_at_addr, probe_cb.elapp->initial_addr.s_net, - probe_cb.elapp->initial_addr.s_node, 0); - - ATALK_ASSIGN(pkt->dest_at_addr, probe_cb.elapp->initial_addr.s_net, - probe_cb.elapp->initial_addr.s_node, 0); - - gbuf_winc(m,sizeof(aarp_pkt_t)); - - probe_cb.error = pat_output(probe_cb.elapp, m, - (unsigned char *)&probe_cb.elapp->cable_multicast_addr, AARP_AT_TYPE); - if (probe_cb.error) { - return(-1); - } - - probing = PROBE_TENTATIVE; - probe_cb.no_of_retries++; - timeout(aarp_sched_probe, 0, AARP_PROBE_TIMER_INT); - - return(0); -} - - - -/**************************************************************************** - * aarp_lru_entry() - * - ****************************************************************************/ - -StaticProc aarp_amt_t *aarp_lru_entry(at) -register aarp_amt_t *at; -{ - register aarp_amt_t *at_ret; - register int i; - - at_ret = at; - - for (i = 1, at++; i < AMT_BSIZ; i++, at++) { - if (at->last_time < at_ret->last_time && (at->m == NULL)) - at_ret = at; - } - return(at_ret); -} - - - -/**************************************************************************** - * aarp_glean_info() - * - ****************************************************************************/ - -StaticProc int aarp_glean_info(pkt, elapp) -register aarp_pkt_t *pkt; -at_ifaddr_t *elapp; -{ - register aarp_amt_t *amt_ptr; - - AMT_LOOK(amt_ptr, pkt->src_at_addr, elapp); - - if (amt_ptr == NULL) { - /* - * amt entry for this address doesn't exist, add it to the cache - */ - NEW_AMT(amt_ptr, pkt->src_at_addr,elapp); - - if (amt_ptr->m) - return(0); /* no non-busy slots available in the cache */ - amt_ptr->dest_at_addr = pkt->src_at_addr; - amt_ptr->dest_at_addr.atalk_unused = 0; - - amt_ptr->last_time = (int)random(); - } - /* - * update the ethernet address - * in either case - */ - amt_ptr->dest_addr = pkt->src_addr; - if (FDDI_OR_TOKENRING(elapp->aa_ifp->if_type)) - ddp_bit_reverse((unsigned char *)&amt_ptr->dest_addr); - return(1); -} - - -/**************************************************************************** - * aarp_delete_amt_info() - * - ****************************************************************************/ - -StaticProc int aarp_delete_amt_info(amt_ptr) -register aarp_amt_t *amt_ptr; -{ - register gbuf_t *m; - amt_ptr->last_time = 0; - ATALK_ASSIGN(amt_ptr->dest_at_addr, 0, 0, 0); - amt_ptr->no_of_retries = 0; - - if (amt_ptr->m) { - m = amt_ptr->m; - amt_ptr->m = NULL; - gbuf_freel(m); - } - return(0); -} - - - -/**************************************************************************** - * aarp_sched_probe() - * - ****************************************************************************/ - -void aarp_sched_probe(__unused void *arg) -{ - - atalk_lock(); - - if (probe_cb.elapp->aa_ifp != 0 && - probe_cb.no_of_retries != AARP_MAX_PROBE_RETRIES) { - if (aarp_send_probe() == -1) - AARPwakeup(&probe_cb); - } else { - probe_cb.error = 0; - AARPwakeup(&probe_cb); - } - - atalk_unlock(); -} - - - -/**************************************************************************** - * aarp_build_pkt() - * - ****************************************************************************/ - -StaticProc void aarp_build_pkt(pkt, elapp) - register aarp_pkt_t *pkt; - at_ifaddr_t *elapp; -{ - pkt->hardware_type = htons(AARP_ETHER_HW_TYPE); - pkt->stack_type = htons(AARP_AT_PROTO); - pkt->hw_addr_len = ETHERNET_ADDR_LEN; - pkt->stack_addr_len = AARP_AT_ADDR_LEN; - bcopy(elapp->xaddr, pkt->src_addr.etalk_addr_octet, sizeof(elapp->xaddr)); - if (FDDI_OR_TOKENRING(elapp->aa_ifp->if_type)) - ddp_bit_reverse(pkt->src_addr.etalk_addr_octet); -} - -/**************************************************************************** - * aarp_sched_req() - * - ****************************************************************************/ - -StaticProc void aarp_sched_req(arg) - void *arg; -{ - int i; - aarp_amt_t *amt_ptr = (aarp_amt_t *)arg; - - atalk_lock(); - - /* - * make sure pointer still valid in case interface removed - * while trying to acquire the funnel. make sure it points - * into one of the amt arrays. - */ - for (i = 0; i < IF_TOTAL_MAX; i++) { - if (aarp_table[i] == NULL || (void *)amt_ptr < (void *)aarp_table[i] || - (void *)amt_ptr >= (void *)(aarp_table[i] + 1)) - continue; /* no match - try next entry */ - - /* - * found match - pointer is valid - */ - if (amt_ptr->tmo == 0) { - atalk_unlock(); - return; - } - if (amt_ptr->no_of_retries < AARP_MAX_REQ_RETRIES) { - if (aarp_send_req(amt_ptr) == 0) { - atalk_unlock(); - return; - } - } - aarp_delete_amt_info(amt_ptr); - break; - } - atalk_unlock(); - - return; -} - - - -/**************************************************************************** - * aarp_get_rand_node() - * - ****************************************************************************/ -StaticProc int aarp_get_rand_node(elapp) -at_ifaddr_t *elapp; -{ - register u_char node; - - /* - * generate a starting node number in the range 1 thru 0xfd. - * we use this as the starting probe point for a given net - * To generate a different node number each time we call - * aarp_get_next_node - */ - node = ((u_char)(random() & 0xff)) % 0xfd + 2; - - elapp->initial_addr.s_node = node; - return(0); -} - - - -StaticProc int aarp_get_next_node(elapp) -at_ifaddr_t *elapp; -{ - register u_char node = elapp->initial_addr.s_node; - - /* - * return the next node number in the range 1 thru 0xfd. - */ - node = (node == 0xfd) ? (1) : (node+1); - - elapp->initial_addr.s_node = node; - return(0); -} - - - - - -/**************************************************************************** - * aarp_get_rand_net() - * - ****************************************************************************/ -StaticProc int aarp_get_rand_net(elapp) -register at_ifaddr_t *elapp; -{ - register at_net_al last_net, new_net; - - if (elapp->ifThisCableStart) { - last_net = elapp->initial_addr.s_net; - /* - * the range of network numbers valid for this - * cable is known. Try to choose a number from - * this range only. - */ - new_net= ((at_net_al)random() & 0xffff); - /* two-byte random number generated... now fit it in - * the prescribed range - */ - new_net = new_net % (unsigned) (elapp->ifThisCableEnd - - elapp->ifThisCableStart + 1) - + elapp->ifThisCableStart; - - if (new_net == last_net) { - if (new_net == elapp->ifThisCableEnd) - new_net = elapp->ifThisCableStart; - else - new_net++; - } - elapp->initial_addr.s_net = new_net; - } else { - /* The range of valid network numbers for this cable - * is not known... choose a network number from - * startup range. - */ - last_net = (elapp->initial_addr.s_net & 0x00ff); - new_net = (at_net_al)random() & 0x00ff; - - if (new_net == last_net) - new_net++; - if (new_net == 0xff) - new_net = 0; - elapp->initial_addr.s_net = (DDP_STARTUP_LOW | new_net); - } - return(0); -} - - -int getAarpTableSize(__unused int elapId) - /* elap_specifics array index (should be - * changed when we add a non-ethernet type - * of I/F to the mix. Unused for now. - */ -{ - return(AMTSIZE); -} - -int getPhysAddrSize(__unused int elapId) - /* elap_specifics array index (should be - * changed when we add a non-ethernet type - * of I/F to the mix. Unused for now. - */ -{ - return(ETHERNET_ADDR_LEN); -} - -#define ENTRY_SIZE sizeof(struct atalk_addr) + sizeof(struct etalk_addr) - -snmpAarpEnt_t *getAarp(elapId) - int *elapId; /* I/F table to retrieve & table - size entries on return */ - -/* gets aarp table for specified interface and builds - a table in SNMP expected format. Returns pointer to said - table and sets elapId to byte size of used portion of table -*/ -{ - int i, cnt=0; - aarp_amt_t *amtp; - static snmpAarpEnt_t snmp[AMTSIZE]; - snmpAarpEnt_t *snmpp; - struct atalk_addr addr; - u_short tmp_net; - - - if (*elapId <0 || *elapId >= IF_TOTAL_MAX) - return NULL; - - - for (i=0, amtp = &(aarp_table[*elapId]->et_aarp_amt[0]), snmpp = snmp; - i < AMTSIZE; i++,amtp++) { - - /* last_time will be 0 if entry was never used */ - if (amtp->last_time) { - /* copy just network & mac address. - * For speed, we assume that the atalk_addr - * & etalk_addr positions in the aarp_amt_t struct - * has not changed and copy both at once - */ - addr.atalk_unused = 0; - tmp_net = UAS_VALUE(amtp->dest_at_addr.atalk_net); - NET_ASSIGN(addr.atalk_net, tmp_net); - addr.atalk_node = amtp->dest_at_addr.atalk_node; - bcopy(&addr, &snmpp->ap_ddpAddr, ENTRY_SIZE); - snmpp++; - cnt++; - - } - } - *elapId = cnt; - return(snmp); -} -/*#endif *//* COMMENTED_OUT */ - diff --git a/bsd/netat/ddp_aep.c b/bsd/netat/ddp_aep.c deleted file mode 100644 index 11beb6b96..000000000 --- a/bsd/netat/ddp_aep.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1997-1998 Apple Computer, Inc. - * All Rights Reserved. - */ - -/* - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -extern snmpStats_t snmpStats; - -/****************************************************************/ -/* */ -/* */ -/* Echo Protocol */ -/* */ -/* */ -/****************************************************************/ - -void ep_input (mp, ifID) - gbuf_t *mp; - register at_ifaddr_t *ifID; -{ - register at_ddp_t *ddp; - - snmpStats.ec_echoReq++; - ddp = (at_ddp_t *)gbuf_rptr(mp); - - /* ep packets that have a source broadcast can cause - * possible broadcast storms, prevent that here - */ - if ( NET_VALUE(ddp->src_net) == 0 || ddp->src_node == 255) { - gbuf_freem(mp); - return; - } - - /* - * Check if this AEP message is for us or need to be forwarded - */ - if (!ROUTING_MODE || - ((ifID->ifThisNode.s_net == NET_VALUE(ddp->dst_net)) - && (ifID->ifThisNode.s_node == ddp->dst_node))) { - - dPrintf(D_M_AEP, D_L_INFO, ("aep_input: received for this port from %d:%d\n", - NET_VALUE(ddp->src_net), ddp->src_node)); - - if (ddp->type == DDP_ECHO && - ddp->data[0] == EP_REQUEST) { - ddp->data[0] = EP_REPLY; - NET_NET(ddp->dst_net, ddp->src_net); - ddp->dst_node = ddp->src_node; - ddp->dst_socket = ddp->src_socket; - /* send the packet out.... */ - snmpStats.ec_echoReply++; - (void)ddp_output(&mp, (at_socket)EP_SOCKET, FALSE); - } else - gbuf_freem(mp); - } - else { - dPrintf(D_M_AEP, D_L_INFO, - ("aep_input: calling routing needed from %d:%d to %d:%d\n", - NET_VALUE(ddp->src_net), ddp->src_node, NET_VALUE(ddp->dst_net), - ddp->dst_node)); - routing_needed(mp, ifID, TRUE); - } - - return; -} diff --git a/bsd/netat/ddp_brt.c b/bsd/netat/ddp_brt.c deleted file mode 100644 index ce35ae98a..000000000 --- a/bsd/netat/ddp_brt.c +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1988, 1989 Apple Computer, Inc. - * - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - */ - -#ifndef lint -/* static char sccsid[] = "@(#)ddp_brt.c: 2.0, 1.7; 10/4/93; Copyright 1988-89, Apple Computer, Inc."; */ -#endif /* lint */ - -/* - * Title: ddp_brt.c - * - * Facility: Best Router Caching. - * - * Author: Kumar Vora, Creation Date: June-15-1989 - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include - -/* Best Router Cache */ -ddp_brt_t at_ddp_brt[BRTSIZE]; -int ddp_brt_sweep_timer; - - -void ddp_glean(mp, ifID, src_addr) - register gbuf_t *mp; - register at_ifaddr_t *ifID; - struct etalk_addr *src_addr; -{ - register at_net_al src_net; - - /* NOT assuming that the incoming packet is in one contiguous - * buffer. - */ - - { - /* The interface is ethertalk, so the message is - * of the form {802.3, 802.2, ddp.... }. Extract the - * 802.3 source address if necessary. Assuming, - * however, that 802.3 and 802.2 headers are in - * one contiguous piece. - */ - { register at_ddp_t *dgp; - - dgp = (at_ddp_t *)(gbuf_rptr(mp)); - src_net = NET_VALUE(dgp->src_net); - } - if (src_net >= ifID->ifThisCableStart && src_net <= ifID->ifThisCableEnd) - /* the packet has come from a net on this cable, - * no need to glean router info. - */ - return; - - if (src_addr != NULL) - { register ddp_brt_t *brt; - - BRT_LOOK (brt, src_net); - if (brt == NULL) { - /* There's no BRT entry corresponding to this - * net. Allocate a new entry. - */ - NEW_BRT(brt, src_net); - if (brt == NULL) - /* No space available in the BRT; - * can't glean info. - */ - return; - brt->net = src_net; - } - /* - * update the router info in either case - */ - brt->et_addr = *src_addr; - brt->age_flag = BRT_VALID; - brt->ifID = ifID; - } - } -} - -void ddp_brt_init() -{ - bzero(at_ddp_brt, sizeof(at_ddp_brt)); - ddp_brt_sweep_timer = 1; -#ifdef NOT_USED - timeout(ddp_brt_sweep_locked, (long)0, BRT_SWEEP_INT * SYS_HZ); -#endif -} - -void ddp_brt_shutdown() -{ -#ifdef NOT_USED - bzero(at_ddp_brt, sizeof(at_ddp_brt)); - if (ddp_brt_sweep_timer) - untimeout(ddp_brt_sweep_locked, 0); -#endif - ddp_brt_sweep_timer = 0; -} - -/* locked version */ -#ifdef NOT_USED -void ddp_brt_sweep_locked() -{ - atalk_lock(); - ddp_brt_sweep(); - atalk_unlock(); -} -#endif - -void ddp_brt_sweep(void) -{ - register ddp_brt_t *brt; - register int i; - - if (ddp_brt_sweep_timer) - if (++ddp_brt_sweep_timer > BRT_SWEEP_INT) { - ddp_brt_sweep_timer = 1; - - brt = at_ddp_brt; - for (i = 0; i < BRTSIZE; i++, brt++) { - switch (brt->age_flag) { - case BRT_EMPTY : - break; - case BRT_VALID : - brt->age_flag = BRT_GETTING_OLD; - break; - case BRT_GETTING_OLD : - bzero(brt, sizeof(ddp_brt_t)); - break; - default : - ATTRACE(AT_MID_DDP,AT_SID_RESOURCE, AT_LV_ERROR, FALSE, - "ddp_brt_sweep : corrupt age flag %d", - brt->age_flag, 0,0); - break; - } - } - } -#ifdef NOT_USED - /* set up the next sweep... */ - timeout(ddp_brt_sweep_locked, (long)0, BRT_SWEEP_INT * SYS_HZ); -#endif - -} - - diff --git a/bsd/netat/ddp_lap.c b/bsd/netat/ddp_lap.c deleted file mode 100644 index 42f81cdad..000000000 --- a/bsd/netat/ddp_lap.c +++ /dev/null @@ -1,1734 +0,0 @@ -/* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1988, 1989, 1993-1998 Apple Computer, Inc. - */ - -/* at_elap.c: 2.0, 1.29; 10/4/93; Apple Computer, Inc. */ - -/* This is the file which implements all the streams driver - * functionality required for EtherTalk. - */ - -/* revision history - - 03-14-94 jjs Changed all functions which assumed only one port would - ever be used. Added validate_msg_size, changed elap_online - to work with the h/w name only (e.g. 'et2'). - - Modified for MP, 1996 by Tuyen Nguyen - Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - -*/ - -#define RESOLVE_DBG /* for debug.h global resolution */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* for kernel_map */ - - -#include -#include - -#include -#include -#include -#include -#include -#include -#include /* rtmp+zip table structs */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -/* globals */ - -at_ifaddr_t at_interfaces[IF_TOTAL_MAX]; - /* index for at_interfaces is not important */ -at_ifaddr_t *ifID_table[IF_TOTAL_MAX]; - /* the table of ifID structures, one per interface - (not just ethernet), - * NOTE: for MH, entry 0 in this table is - * now defined to be the default I/F - */ -at_ifaddr_t *ifID_home; - /* always ifID_table[IFID_HOME] for now, but will be used for - dynamic "home port" assignment, later */ - -at_state_t at_state; /* global state of AT network */ -snmpFlags_t snmpFlags; - -int xpatcnt = 0; - -/* snmp defines */ -#define MAX_BUFSIZE 8192 -#define MAX_RTMP (MAX_BUFSIZE/sizeof(RT_entry)-1) -#define MAX_NBP \ - ((MAX_BUFSIZE - SNMP_NBP_HEADER_SIZE)/sizeof(snmpNbpEntry_t)-1) -#define MAX_NBP_BYTES (MAX_NBP * sizeof(snmpNbpEntry_t)) -#define MAX_ZIP (MAX_BUFSIZE/sizeof(ZT_entry)-1) -#define MAX_RTMP_BYTES (MAX_RTMP * sizeof(RT_entry)) -#define MAX_ZIP_BYTES (MAX_ZIP * sizeof(ZT_entry)) - -/* externs */ -extern TAILQ_HEAD(name_registry, _nve_) name_registry; -extern snmpStats_t snmpStats; -extern short appletalk_inited; -extern int adspInited; -extern struct atpcb ddp_head; -extern gref_t *atp_inputQ[]; -extern struct atp_state *atp_used_list; -extern asp_scb_t *asp_scbQ[]; -extern asp_scb_t *scb_used_list; -extern CCB *adsp_inputQ[]; -extern CCB *ccb_used_list; -extern at_ddp_stats_t at_ddp_stats; -extern lck_mtx_t * atalk_mutex; - -/* protos */ -int rtmp_router_start(at_kern_err_t *); -static void add_route(RT_entry *); -void elap_offline(at_ifaddr_t *); -static int elap_online1(at_ifaddr_t *); -static void elap_online2(at_ifaddr_t *); - int elap_online3(at_ifaddr_t *); -static int re_aarp(at_ifaddr_t *); -static int getSnmpCfg(snmpCfg_t *); - -int routerStart(at_kern_err_t *); - -static int validate_msg_size(gbuf_t *, gref_t *, at_ifaddr_t **); -at_ifaddr_t *find_ifID(char *); -int lap_online( at_ifaddr_t *, at_if_cfg_t *cfgp); - - -at_ifaddr_t *find_ifID(if_name) - char *if_name; -{ - int pat_id; - - if (strlen(if_name)) - for (pat_id=0; pat_id < xpatcnt; pat_id++) { - if (!strcmp(at_interfaces[pat_id].ifName, if_name)) - return(&at_interfaces[pat_id]); - } - - return((at_ifaddr_t *)NULL); -} - -static int validate_msg_size(m, gref, elapp) - register gbuf_t *m; - gref_t *gref; - at_ifaddr_t **elapp; - -/* checks ioctl message type for minimum expected message size & - sends error back if size invalid -*/ -{ - register ioc_t *iocbp; - int i = 0, size = 1; - - *elapp = NULL; - iocbp = (ioc_t *) gbuf_rptr(m); - - dPrintf(D_M_ELAP, D_L_INFO, ("validate_msg_size: ioc_cmd = %d\n", - iocbp->ioc_cmd)); - switch (iocbp->ioc_cmd) { - case LAP_IOC_ADD_ROUTE: - size = sizeof(RT_entry); - break; - case LAP_IOC_GET_ROUTE: - size = sizeof(RT_entry); - break; - case LAP_IOC_GET_ZONE: - size = sizeof(ZT_entryno); - break; - case LAP_IOC_SNMP_GET_CFG: - case LAP_IOC_SNMP_GET_AARP: - case LAP_IOC_SNMP_GET_ZIP: - case LAP_IOC_SNMP_GET_RTMP: - case LAP_IOC_SNMP_GET_NBP: - size = sizeof(int); - break; - - case ELAP_IOC_GET_STATS: - case LAP_IOC_SNMP_GET_DDP: - size = 0; - break; - - default: - dPrintf(D_M_ELAP, D_L_ERROR, ("validate_msg_size: unknown ioctl\n")); - goto error; - } - - if (size == 0) { /* a non-data ioctl */ - return(0); - } - - if (gbuf_cont(m) != NULL) - i = gbuf_len(gbuf_cont(m)); - if (iocbp->ioc_count < size || (gbuf_cont(m) == NULL) || i < size) { - dPrintf(D_M_ELAP, D_L_ERROR, - ("ioctl msg error:s:%d c:%d bcont:%c delta:%d\n", - size, iocbp->ioc_count, - gbuf_cont(m)? 'Y' : 'N', i)); - goto error; - } - else - return(0); -error: - ioc_ack(EMSGSIZE, m, gref); - return (EMSGSIZE); -} /* validate_msg_size */ - -int lap_online(elapp, cfgp) - at_ifaddr_t *elapp; - at_if_cfg_t *cfgp; -{ - int error; - - if (elapp->ifState != LAP_OFFLINE) { - return(EALREADY); - } - - elapp->flags = 0; - if (cfgp->flags & ELAP_CFG_HOME) { - if (ifID_home) { - /* only 1 home allowed! */ - return(EEXIST); - } - dPrintf(D_M_ELAP, D_L_STARTUP, - ("elap_wput home I/F:%s\n", cfgp->ifr_name)); - elapp->flags |= ELAP_CFG_HOME; - } - - if (MULTIPORT_MODE) { - elapp->flags |= ELAP_CFG_ZONELESS; - if (ROUTING_MODE && cfgp->netStart) - elapp->flags |= ELAP_CFG_SEED; - } - - /* (VL) !? */ - if ((!DEFAULT_ZONE(&cfgp->zonename) && - (elapp->flags & ELAP_CFG_HOME)) || MULTIHOME_MODE) { - elapp->startup_zone = cfgp->zonename; - } - - if (elapp->flags & ELAP_CFG_SEED) { - dPrintf(D_M_ELAP, D_L_STARTUP_INFO, - ("elap_wput: found to be seed\n")); - elapp->ifThisCableStart = cfgp->netStart; - elapp->ifThisCableEnd = cfgp->netEnd; - } - else { - dPrintf(D_M_ELAP,D_L_ERROR, - ("elap_wput: we believe we're not seed\n")); - /* from ELAP_IOC_SET_CFG */ - if (ATALK_VALUE(cfgp->node)) { - u_short initial_net; - u_char initial_node; - - initial_node = cfgp->node.s_node; - initial_net = cfgp->node.s_net; - if ((initial_node<0xfe) && (initial_node>0) && - !((initial_net == 0) || - ((initial_net >= DDP_STARTUP_LOW)&& - (initial_net <= DDP_STARTUP_HIGH)))) { - - elapp->initial_addr = cfgp->node; - } - } - } - - elapp->startup_error = 0; - elapp->startup_inprogress = FALSE; - if ((error = elap_online1(elapp))) - ddp_rem_if(elapp); - else - if (!(MULTIPORT_MODE) && - elapp->ifZoneName.len == 1 && - elapp->ifZoneName.str[0] == '*' && - !DEFAULT_ZONE(&cfgp->zonename)) { - nbp_add_multicast(&cfgp->zonename, elapp); - } - return(error); -} /* lap_online */ - -/*********************************************************************** - * elap_wput() - * - **********************************************************************/ -int elap_wput(gref, m) - gref_t *gref; - register gbuf_t *m; -{ - at_ifaddr_t *elapp; - register ioc_t *iocbp; - register at_if_cfg_t *cfgp; - at_elap_stats_t *statsp; - int i,j; - int size, totalsize = 0, tabsize; - gbuf_t *mn; /* new gbuf */ - gbuf_t *mo; /* old gbuf */ - gbuf_t *mt = NULL; /* temp */ - snmpNbpTable_t *nbp; - - - switch (gbuf_type(m)) { - case MSG_DATA: - gbuf_freem(m); - dPrintf(D_M_ELAP,D_L_ERROR, - ("Output data to control channel is ignored\n")); - break; - - case MSG_IOCTL: - iocbp = (ioc_t *) gbuf_rptr(m); - - if (validate_msg_size(m, gref, &elapp)) - break; - - if (elapp) - cfgp = (at_if_cfg_t*) gbuf_rptr(gbuf_cont(m)); - - if (LAP_IOC_MYIOCTL(iocbp->ioc_cmd) || - ELAP_IOC_MYIOCTL(iocbp->ioc_cmd)) { - - switch (iocbp->ioc_cmd) { - case ELAP_IOC_GET_STATS: -#ifdef APPLETALK_DEBUG - kprintf("LAP_IOC_GET_STATS\n"); -#endif - if ( (gbuf_cont(m) == NULL) - || (elapp = find_ifID(gbuf_rptr(gbuf_cont(m)))) == NULL) { - ioc_ack(EINVAL, m, gref); - break; - } - gbuf_freem(gbuf_cont(m)); - if ((gbuf_cont(m) =gbuf_alloc(sizeof(at_elap_stats_t), - PRI_MED)) == NULL) { - ioc_ack(ENOBUFS, m, gref); - break; - } - statsp = ((at_elap_stats_t *)gbuf_rptr(gbuf_cont(m))); - *statsp = elapp->stats; - gbuf_wset(gbuf_cont(m),sizeof(at_elap_stats_t)); - iocbp->ioc_count = sizeof(at_elap_stats_t); - ioc_ack(0, m, gref); - break; - - case LAP_IOC_ADD_ROUTE: -#ifdef APPLETALK_DEBUG - kprintf("LAP_IOC_ADD_ROUTE\n"); -#endif - add_route((RT_entry *)gbuf_rptr(gbuf_cont(m))); - ioc_ack(0, m, gref); - break; - - case LAP_IOC_GET_ZONE: -#ifdef APPLETALK_DEBUG - kprintf("LAP_IOC_GET_ZONE\n"); -#endif - /* return next ZT_entryno from ZT_table - a pointer to the struct ZT_entryno is passed down from - user space and the first byte is cast to a int, if - this int is non-zero, then the first ZT_entry is - returned and subsequent calls with a zero value - will return the next entry in the table. The next - read after the last valid entry will return EINVAL - */ - { - ZT_entryno *pZTe; - - i = *(int *)gbuf_rptr(gbuf_cont(m)); - gbuf_freem(gbuf_cont(m)); - gbuf_cont(m) = NULL; - - pZTe = zt_getNextZone(i); - if (pZTe) { - if ((gbuf_cont(m) = gbuf_alloc(sizeof(ZT_entryno), PRI_MED)) == NULL) { - ioc_ack(ENOBUFS, m, gref); - break; - } - *(ZT_entryno *)gbuf_rptr(gbuf_cont(m)) = *pZTe; - gbuf_wset(gbuf_cont(m),sizeof(ZT_entryno)); - iocbp->ioc_count = sizeof(ZT_entryno); - ioc_ack(0, m, gref); - } - else - ioc_ack(EINVAL, m, gref); - } - break; - - case LAP_IOC_GET_ROUTE: -#ifdef APPLETALK_DEBUG - kprintf("LAP_IOC_GET_ROUTE\n"); -#endif - /* return next RT_entry from RT_table - * a pointer to the struct RT_entry is - * passed down from user space and the first - * byte is cast to a int, if this int is - * non-zero, then the first RT_entry is - * returned and subsequent calls with a - * zero value will return the next entry in - * the table. The next read after the last - * valid entry will return EINVAL - */ - { - RT_entry *pRT; - - i = *(int *)gbuf_rptr(gbuf_cont(m)); - gbuf_freem(gbuf_cont(m)); - gbuf_cont(m) = NULL; - - pRT = rt_getNextRoute(i); - if (pRT) { - if ((gbuf_cont(m) = gbuf_alloc(sizeof(RT_entry), PRI_MED)) == NULL) { - ioc_ack(ENOBUFS, m, gref); - break; - } - *(RT_entry *)gbuf_rptr(gbuf_cont(m)) = *pRT; - gbuf_wset(gbuf_cont(m),sizeof(RT_entry)); - iocbp->ioc_count = sizeof(RT_entry); - ioc_ack(0, m, gref); - } - else - ioc_ack(EINVAL, m, gref); - } - break; - - case LAP_IOC_SNMP_GET_DDP: -#ifdef APPLETALK_DEBUG - kprintf("LAP_IOC_SNMP_GET_DDP\n"); -#endif - if (!(at_state.flags & AT_ST_STARTED)) { - ioc_ack(ENOTREADY, m, gref); - break; - } - if ((gbuf_cont(m) = gbuf_alloc(sizeof(snmpStats_t), - PRI_MED)) == NULL) { - ioc_ack(ENOBUFS, m, gref); - break; - } - - *(snmpStats_t *)gbuf_rptr(gbuf_cont(m)) = snmpStats; - gbuf_wset(gbuf_cont(m),sizeof(snmpStats)); - iocbp->ioc_count = sizeof(snmpStats); - ioc_ack(0, m, gref); - break; - case LAP_IOC_SNMP_GET_CFG: -#ifdef APPLETALK_DEBUG - kprintf("LAP_IOC_SNMP_GET_CFG\n"); -#endif - { - snmpCfg_t snmp; - - i = *(int *)gbuf_rptr(gbuf_cont(m)); - gbuf_freem(gbuf_cont(m)); - gbuf_cont(m) = NULL; - if (!(at_state.flags & AT_ST_STARTED)) { - /* if stack down */ - iocbp->ioc_count = 0; - ioc_ack(ENOTREADY, m, gref); - dPrintf(D_M_ELAP_LOW, D_L_INFO, - ("elap_wput: cfg req, stack down\n")); - break; - } - if (i == UPDATE_IF_CHANGED && - !(at_state.flags & AT_ST_IF_CHANGED)) { - iocbp->ioc_count = 0; - ioc_ack(0, m, gref); - dPrintf(D_M_ELAP_LOW, D_L_INFO, - ("elap_wput: cfg req, unchanged\n")); - break; - } - dPrintf(D_M_ELAP_LOW, D_L_INFO, - ("elap_wput: cfg req, changed\n")); - - if (getSnmpCfg(&snmp)) { - dPrintf(D_M_ELAP,D_L_ERROR, - ("elap_wput:SNMP_GET_CFG error\n")); - ioc_ack(EOPNOTSUPP, m, gref); - break; - } - /* send up only used part of table */ - size = sizeof(snmp) - - sizeof(snmpIfCfg_t) * (MAX_IFS - snmp.cfg_ifCnt); - - if ((gbuf_cont(m) = gbuf_alloc(size, PRI_MED)) == NULL) { - ioc_ack(ENOBUFS, m, gref); - break; - } - bcopy(&snmp,gbuf_rptr(gbuf_cont(m)),size); - gbuf_wset(gbuf_cont(m),size); - iocbp->ioc_count = size; - at_state.flags &= ~AT_ST_IF_CHANGED; - ioc_ack(0, m, gref); - } - break; - - case LAP_IOC_SNMP_GET_AARP: - { - snmpAarpEnt_t *snmpp; - int bytes; -#ifdef APPLETALK_DEBUG - kprintf("LAP_IOC_SNMP_GET_AARP\n"); -#endif - i = *(int *)gbuf_rptr(gbuf_cont(m)); - gbuf_freem(gbuf_cont(m)); - gbuf_cont(m) = NULL; - dPrintf(D_M_ELAP,D_L_INFO, - ("elap_wput:calling getarp,i=%d\n", i)); - snmpp = getAarp(&i); - bytes = i * sizeof(snmpAarpEnt_t); - dPrintf(D_M_ELAP,D_L_INFO, - ("elap_wput:getarp returned, i=%d,bytes=%d\n", - i, bytes)); - if (snmpp) { - if ((gbuf_cont(m) = gbuf_alloc(bytes, PRI_MED)) == NULL) { - ioc_ack(ENOBUFS, m, gref); - break; - } - bcopy(snmpp, gbuf_rptr(gbuf_cont(m)), bytes); - gbuf_wset(gbuf_cont(m),bytes); - iocbp->ioc_count = bytes; - ioc_ack(0, m, gref); - } - else - ioc_ack(EOPNOTSUPP, m, gref); - } - break; - - case LAP_IOC_SNMP_GET_ZIP: -#ifdef APPLETALK_DEBUG - kprintf("LAP_IOC_SNMP_GET_ZIP\n"); -#endif - { /* matching brace NOT in this case */ - - i = *(int *)gbuf_rptr(gbuf_cont(m)); - gbuf_freem(gbuf_cont(m)); - gbuf_cont(m) = NULL; - if (!(at_state.flags & AT_ST_STARTED)) { - ioc_ack(ENOTREADY, m, gref); - break; - } - if (i == UPDATE_IF_CHANGED && - !(at_state.flags & AT_ST_ZT_CHANGED)) { - iocbp->ioc_count = 0; - ioc_ack(0, m, gref); - break; - } - mo=(gbuf_t*)NULL; - tabsize = getZipTableSize(); - - /* retrieve table into multiple gbufs */ - for (i =0; i - MAX_ZIP ? MAX_ZIP : tabsize - i; - size = j < MAX_ZIP ? sizeof(ZT_entry)*j : MAX_ZIP_BYTES; - if ((mn = gbuf_alloc(size, PRI_MED)) == NULL) { - if (gbuf_cont(m)) - gbuf_freem(gbuf_cont(m)); - ioc_ack(ENOBUFS, m, gref); - break; - } - if (!mo) { /* if first new one */ - mt = mn; - totalsize = size; - } - else { - gbuf_cont(mo) = mn; - totalsize += size; - } - mo = mn; - getZipTable((ZT_entry*)gbuf_rptr(mn),i,j); - gbuf_wset(mn,size); - } - if ((gbuf_cont(m) = gbuf_alloc(sizeof(int), PRI_MED)) == NULL) { - if (mt) - gbuf_freem(mt); - iocbp->ioc_count = 0; - ioc_ack(ENOBUFS, m, gref); - break; - } - if (!tabsize) { - dPrintf(D_M_ELAP,D_L_WARNING, - ("elap_wput:snmp: empty zip table\n")); - totalsize = 0; - } - *(int*)gbuf_rptr(gbuf_cont(m)) = totalsize; /* return table size */ - gbuf_wset(gbuf_cont(m),sizeof(int)); - iocbp->ioc_count = sizeof(int); - ioc_ack(0, m, gref); - if (tabsize) - atalk_putnext(gref,mt); /* send up table */ - at_state.flags &= ~AT_ST_ZT_CHANGED; - break; - - case LAP_IOC_SNMP_GET_RTMP: -#ifdef APPLETALK_DEBUG - kprintf("LAP_IOC_SNMP_GET_RTMP\n"); -#endif - i = *(int *)gbuf_rptr(gbuf_cont(m)); - gbuf_freem(gbuf_cont(m)); - gbuf_cont(m) = NULL; - if (!(at_state.flags & AT_ST_STARTED)) { - ioc_ack(ENOTREADY, m, gref); - break; - } - if (i == UPDATE_IF_CHANGED && - !(at_state.flags & AT_ST_RT_CHANGED)) { - iocbp->ioc_count = 0; - ioc_ack(0, m, gref); - break; - } - - mo=(gbuf_t*)NULL; - tabsize = getRtmpTableSize(); - - /* retrieve table into multiple gbufs */ - for (i =0; i - MAX_RTMP ? MAX_RTMP : tabsize - i; - size = j < MAX_RTMP ? sizeof(RT_entry)*j : MAX_RTMP_BYTES; - if ((mn = gbuf_alloc(size, PRI_MED)) == NULL) { - if (gbuf_cont(m)) - gbuf_freem(gbuf_cont(m)); - ioc_ack(ENOBUFS, m, gref); - break; - } - if (!mo) { /* if first new one */ - mt = mn; - totalsize = size; - } - else { - gbuf_cont(mo) = mn; - totalsize += size; - } - mo = mn; - getRtmpTable((RT_entry*)gbuf_rptr(mn),i,j); - gbuf_wset(mn,size); - } - if ((gbuf_cont(m) = gbuf_alloc(sizeof(int), PRI_MED)) == NULL) { - if (mt) - gbuf_freem(mt); - iocbp->ioc_count = 0; - ioc_ack(ENOBUFS, m, gref); - break; - } - if (!tabsize) - totalsize = 0; - *(int*)gbuf_rptr(gbuf_cont(m)) = totalsize; /* return table size */ - gbuf_wset(gbuf_cont(m),sizeof(int)); - iocbp->ioc_count = sizeof(int); - ioc_ack(0, m, gref); - if (tabsize) - atalk_putnext(gref,mt); /* send up table */ - at_state.flags &= ~AT_ST_RT_CHANGED; - break; - - case LAP_IOC_SNMP_GET_NBP: -#ifdef APPLETALK_DEBUG - kprintf("LAP_IOC_SNMP_GET_NBP\n"); -#endif - i = *(int *)gbuf_rptr(gbuf_cont(m)); - gbuf_freem(gbuf_cont(m)); - gbuf_cont(m) = NULL; - if (!(at_state.flags & AT_ST_STARTED)) { - ioc_ack(ENOTREADY, m, gref); - break; - } - if (i == UPDATE_IF_CHANGED && - !(at_state.flags & AT_ST_NBP_CHANGED)) { - iocbp->ioc_count = 0; - ioc_ack(0, m, gref); - dPrintf(D_M_ELAP_LOW, D_L_INFO, - ("elap_wput: nbp req denied, no change\n")); - break; - } - - mo=(gbuf_t*)NULL; - tabsize = getNbpTableSize(); - - /* retrieve table into multiple gbufs */ - for (i =0; i - MAX_NBP ? MAX_NBP : tabsize - i; - size = j < MAX_NBP ? sizeof(snmpNbpEntry_t)*j : MAX_NBP_BYTES; - if (!i) - size += SNMP_NBP_HEADER_SIZE; - if ((mn = gbuf_alloc(size, PRI_MED)) == NULL) { - if (gbuf_cont(m)) - gbuf_freem(gbuf_cont(m)); - ioc_ack(ENOBUFS, m, gref); - break; - } - if (!mo) { /* if first new one */ - mt = mn; - totalsize = size; - nbp = (snmpNbpTable_t*)gbuf_rptr(mn); - nbp->nbpt_entries = tabsize; - nbp->nbpt_zone = ifID_home->ifZoneName; - getNbpTable(nbp->nbpt_table,i,j); - } - else { - gbuf_cont(mo) = mn; - totalsize += size; - getNbpTable((snmpNbpEntry_t *)gbuf_rptr(mn),i,j); - } - mo = mn; - gbuf_wset(mn,size); - } - if ((gbuf_cont(m) = gbuf_alloc(sizeof(int), PRI_MED)) == NULL) { - if (mt) - gbuf_freem(mt); - iocbp->ioc_count = 0; - ioc_ack(ENOBUFS, m, gref); - break; - } - if (!tabsize) - totalsize = 0; - *(int*)gbuf_rptr(gbuf_cont(m)) = totalsize; /* return table size */ - gbuf_wset(gbuf_cont(m),sizeof(int)); - iocbp->ioc_count = sizeof(int); - ioc_ack(0, m, gref); - if (tabsize) - atalk_putnext(gref,mt); /* send up table */ - at_state.flags &= ~AT_ST_NBP_CHANGED; - break; - } - - default: -#ifdef APPLETALK_DEBUG - kprintf("unknown ioctl %d\n", iocbp->ioc_cmd); -#endif - ioc_ack(ENOTTY, m, gref); - dPrintf(D_M_ELAP, D_L_WARNING, - ("elap_wput: unknown ioctl (%d)\n", iocbp->ioc_cmd)); - - if (elapp) - elapp->stats.unknown_mblks++; - break; - } - } - break; - - default: - gbuf_freem(m); - break; - } - - return 0; -} /* elap_wput */ - - -/* Called directly by ddp/zip. - */ -int -elap_dataput(m, elapp, addr_flag, addr) - register gbuf_t *m; - register at_ifaddr_t *elapp; - u_char addr_flag; - char *addr; -{ - register int size; - int error = 0; - struct etalk_addr dest_addr; - struct atalk_addr dest_at_addr; - int loop = TRUE; - /* flag to aarp to loopback (default) */ - - /* the incoming frame is of the form {flag, address, ddp...} - * where "flag" indicates whether the address is an 802.3 - * (link) address, or an appletalk address. If it's an - * 802.3 address, the packet can just go out to the network - * through PAT, if it's an appletalk address, AT->802.3 address - * resolution needs to be done. - * If 802.3 address is known, strip off the flag and 802.3 - * address, and prepend 802.2 and 802.3 headers. - */ - - if (addr == NULL) { - addr_flag = *(u_char *)gbuf_rptr(m); - gbuf_rinc(m,1); - } - - switch (addr_flag) { - case AT_ADDR_NO_LOOP : - loop = FALSE; - /* pass thru */ - case AT_ADDR : - if (addr == NULL) { - dest_at_addr = *(struct atalk_addr *)gbuf_rptr(m); - gbuf_rinc(m,sizeof(struct atalk_addr)); - } else - dest_at_addr = *(struct atalk_addr *)addr; - break; - case ET_ADDR : - if (addr == NULL) { - dest_addr = *(struct etalk_addr *)gbuf_rptr(m); - gbuf_rinc(m,sizeof(struct etalk_addr)); - } else - dest_addr = *(struct etalk_addr *)addr; - break; - default : - gbuf_freel(m); /* unknown address type, chuck it */ - return(EINVAL); - } - - m = gbuf_strip(m); - - /* At this point, rptr points to ddp header for sure */ - if (elapp->ifState == LAP_OFFLINE) { - gbuf_freel(m); - return(ENETDOWN); - } - - if (elapp->ifState == LAP_ONLINE_FOR_ZIP) { - /* see if this is a ZIP packet that we need - * to let through even though network is - * not yet alive!! - */ - if (zip_type_packet(m) == 0) { - gbuf_freel(m); - return(ENETDOWN); - } - } - - elapp->stats.xmit_packets++; - size = gbuf_msgsize(m); - elapp->stats.xmit_bytes += size; - snmpStats.dd_outLong++; - - switch (addr_flag) { - case AT_ADDR_NO_LOOP : - case AT_ADDR : - /* - * we don't want elap to be looking into ddp header, so - * it doesn't know net#, consequently can't do - * AMT_LOOKUP. That task left to aarp now. - */ - error = aarp_send_data(m, elapp, &dest_at_addr, loop); - break; - case ET_ADDR : - error = pat_output(elapp, m, (unsigned char *)&dest_addr, 0); - break; - } - return (error); -} /* elap_dataput */ - -/************************************************************************ - * elap_online() - * - ************************************************************************/ - -static int elap_online1(elapp) - at_ifaddr_t *elapp; -{ - int errno; - - dPrintf(D_M_ELAP, D_L_STARTUP_INFO, ("elap_online:%s elapp:0x%x\n", - (elapp->ifName) ? &elapp->ifName[0] : "NULL interface", (u_int) elapp)); - if (elapp->ifState != LAP_OFFLINE || elapp->startup_inprogress == TRUE) - return (EALREADY); - - at_state.flags |= AT_ST_IF_CHANGED; - - if (elapp->flags & ELAP_CFG_HOME) /* tell ddp_add_if if this is home */ - elapp->ifFlags |= AT_IFF_DEFAULT; - - /* Get DDP started */ - if ((errno = ddp_add_if(elapp))) - return(errno); - - // check if we still have an interface - can be lost when - // ddp_add_if calls malloc - // need to make check here after ddp_add_if completes because - // lap_online will call ddp_rem_if if we fail here - if (elapp->aa_ifp == 0) - return ENOENT; - - /* set up multicast address for cable-wide broadcasts */ - (void)at_reg_mcast(elapp, (caddr_t)&elapp->cable_multicast_addr); - - // need to check again if interface is present - // can be lost in at_reg_mcast - if (elapp->aa_ifp == 0) - return ENOENT; - - elapp->startup_inprogress = TRUE; - if (! (elapp->startup_error = re_aarp(elapp))) { - lck_mtx_assert(atalk_mutex, LCK_MTX_ASSERT_OWNED); - (void)msleep(&elapp->startup_inprogress, atalk_mutex, PSOCK | PCATCH, - "elap_online1", 0); - } - - /* then later, after some timeouts AARPwakeup() is called */ - - return(elapp->startup_error); -} /* elap_online1 */ - -static int re_aarp(elapp) - at_ifaddr_t *elapp; -{ - int errno; - - /* We now call aarp_init() to assign an appletalk node addr */ - errno = aarp_init1(elapp); - /* aarp_init1() returns either -1 or ENOTREADY */ - if (errno == ENOTREADY) - return(0); - else { - dPrintf(D_M_ELAP, D_L_STATE_CHG, - ("elap_online aarp_init for %s\n", elapp->ifName)); - (void)at_unreg_mcast(elapp, (caddr_t)&elapp->cable_multicast_addr); - ddp_rem_if(elapp); - elapp->ifState = LAP_OFFLINE; - return(EADDRNOTAVAIL); - } -} - -/* called from AARPwakeup */ -static void elap_online2(elapp) - at_ifaddr_t *elapp; -{ - if (MULTIPORT_MODE) { - dPrintf(D_M_ELAP,D_L_STARTUP_INFO, - ("elap_online: re_aarp, we know it's a router...\n")); - - if (elapp->flags & ELAP_CFG_SEED) { - /* add route table entry (zones to be added later) */ - dPrintf(D_M_ELAP, D_L_STARTUP_INFO, - ("elap_online: rt_insert Cable %d-%d port =%d as SEED\n", - elapp->ifThisCableStart, elapp->ifThisCableEnd, elapp->ifPort)); - rt_insert(elapp->ifThisCableEnd, - elapp->ifThisCableStart, - 0,0,0, - elapp->ifPort, - RTE_STATE_PERMANENT | RTE_STATE_ZKNOWN | RTE_STATE_GOOD - ); - /* LD 081694: set the RTR_SEED_PORT flag for seed ports */ - elapp->ifFlags |= RTR_SEED_PORT; - } -#if DEBUG - else - dPrintf(D_M_ELAP,D_L_STARTUP_INFO, - ("elap_online: it's a router, but non seed\n")); -#endif - } - - if (elapp->flags & ELAP_CFG_ZONELESS) { - /* ELAP_CFG_ZONELESS tells us that it is a router or in - multihome mode, so we don't want to do the GetNetInfo - exchange with the router. */ - - elapp->ifState = LAP_ONLINE_ZONELESS; - elapp->startup_inprogress = FALSE; - wakeup(&elapp->startup_inprogress); - dPrintf(D_M_ELAP, D_L_STARTUP_INFO, ("elap_online: ack 3\n")); - return; - } - - /* if we don't already have a zone and a multicast address */ - if (*(int *)&elapp->ZoneMcastAddr == 0 || elapp->ifZoneName.len == 0) { - /* hzonehash is a global containing the nbp hash for the startup_zone */ - sethzonehash(elapp); - - /* Get ZIP rolling to get zone multicast address, etc. */ - elapp->ifState = LAP_ONLINE_FOR_ZIP; - (void)zip_control(elapp, ZIP_ONLINE); - /* zip_control (w. control == ZIP_ONLINE) always returns ENOTREADY */ - - /* later, after some timeouts ZIPwakeup() is called. */ - } else { - /* otherwise, we have the zone and the multicast already, - so don't bother with another ZIP GetNetInfo request */ - ZIPwakeup(elapp, 0); - } -} /* elap_online2 */ - -/* called from rtmp_router_start */ -int elap_online3(elapp) - at_ifaddr_t *elapp; -{ - elapp->startup_inprogress = TRUE; - - /* just reset the net range */ - elapp->initial_addr.s_net = 0; - elapp->initial_addr.s_node = 0; - dPrintf(D_M_ELAP_LOW, D_L_STARTUP_INFO, - ("elap_online: goto re_aarp port=%d\n", elapp->ifPort)); - - if ((elapp->startup_error = re_aarp(elapp))) - return(elapp->startup_error); - - /* then later, after some timeouts AARPwakeup() is called */ - - lck_mtx_assert(atalk_mutex, LCK_MTX_ASSERT_OWNED); - (void)msleep(&elapp->startup_inprogress, atalk_mutex, PSOCK | PCATCH, - "elap_online3", 0); - return(elapp->startup_error); -} /* elap_online3 */ - -/**************************************************************************** - * elap_offline() - * - ****************************************************************************/ - -void elap_offline(elapp) - register at_ifaddr_t *elapp; - -{ - dPrintf(D_M_ELAP, D_L_SHUTDN_INFO, ("elap_offline:%s\n", elapp->ifName)); - if (elapp->ifState != LAP_OFFLINE) { - - /* Since AppleTalk is going away, remove the cable - * multicast address and turn the interface off so that all - * AppleTalk packets are dropped in the driver itself. - * Get rid of the zone multicast address prior to going Offline. - */ - (void)at_unreg_mcast(elapp, (caddr_t)&elapp->ZoneMcastAddr); - (void)at_unreg_mcast(elapp, (caddr_t)&elapp->cable_multicast_addr); - elapp->ifState = LAP_OFFLINE; - - if (MULTIPORT_MODE) - RT_DELETE(elapp->ifThisCableEnd, - elapp->ifThisCableStart); - - /* make sure no zip timeouts are left running */ - elapp->ifGNIScheduled = 0; - untimeout(zip_sched_getnetinfo, elapp); - } - ddp_rem_if(elapp); -} /* elap_offline */ - - -static void add_route(rt) -RT_entry *rt; - -/* support ioctl to manually add routes to table. - this is really only for testing -*/ -{ - rt_insert( rt->NetStop, rt->NetStart, rt->NextIRNet, - rt->NextIRNode, rt->NetDist, rt->NetPort, - rt->EntryState); - dPrintf(D_M_ELAP, D_L_STARTUP_INFO, ("adding route: %ud:%ud dist:%ud\n", - rt->NetStart, rt->NetStop,rt->NetDist)); -} - -/* - * ddp_start() - * - * Initialization that takes place each time AppleTalk is restarted. - * - */ -void ddp_start() -{ - TAILQ_INIT(&at_ifQueueHd); - TAILQ_INIT(&name_registry); - bzero(at_interfaces, sizeof(at_interfaces)); - bzero(ifID_table, sizeof(ifID_table)); - bzero(&at_ddp_stats, sizeof(at_ddp_stats_t)); - rtmp_init(); /* initialize trackedrouters */ - - add_ddp_handler(RTMP_SOCKET, rtmp_input); - ifID_home = (at_ifaddr_t *)NULL; - xpatcnt = 0; -} - -int ddp_shutdown(count_only) - int count_only; -{ - at_ifaddr_t *ifID; - asp_scb_t *scb, *scb_next; - struct atp_state *atp, *atp_next; - CCB *sp, *sp_next; - gref_t *gref; - int i, active_skts = 0; /* count of active pids for non-socketized - AppleTalk protocols */ - - /* Network is shutting down... send error messages up on each open - * socket. - *** For now, for ASP, ATP and ADSP, attempt to notify open - sockets, but return EBUSY and don't complete shutdown. *** - */ - - if (!count_only) - nbp_shutdown(); /* clear all known NVE */ - - /* ASP */ - for (scb = scb_used_list; scb; ) { - scb_next = scb->next_scb; - active_skts++; - if (!count_only) { - dPrintf(D_M_ASP, D_L_TRACE, ("asp pid=%d\n", scb->pid)); - atalk_notify(scb->gref, ESHUTDOWN); - } - scb = scb_next; - } - for (i = 0; i < 256 ; i++) { - if ((scb = asp_scbQ[i])) - do { - scb_next = scb->next_scb; - active_skts++; - if (!count_only) { - dPrintf(D_M_ASP, D_L_TRACE, - ("asp pid=%d\n", scb->pid)); - atalk_notify(scb->gref, ESHUTDOWN); - } - scb = scb_next; - } while (scb); - } - - /* ATP */ - for (atp = atp_used_list; atp; ) { - atp_next = atp->atp_trans_waiting; - active_skts++; - if (!count_only) { - dPrintf(D_M_ATP, D_L_TRACE, ("atp pid=%d\n", atp->atp_pid)); - atalk_notify(atp->atp_gref, ESHUTDOWN); - } - atp = atp_next; - } - for (i = 0; i < 256; i++) { - if ((gref = atp_inputQ[i]) && (gref != (gref_t *)1)) { - atp = (struct atp_state *)gref->info; - if (!atp->dflag) { - active_skts++; - if (!count_only) { - dPrintf(D_M_ATP, D_L_TRACE, - ("atp pid=%d\n", atp->atp_pid)); - atalk_notify(atp->atp_gref, ESHUTDOWN); - } - } - } - } - - /* ADSP */ - for (sp = ccb_used_list; sp ; ) { - sp_next = sp->otccbLink; - active_skts++; - if (!count_only) { - dPrintf(D_M_ADSP, D_L_TRACE, ("adsp pid=%d\n", sp->pid)); - atalk_notify(sp->gref, ESHUTDOWN); - } - sp = sp_next; - } - for (i = 0; i < 256 ; i++) { - if ((sp = adsp_inputQ[i])) - do { - sp_next = sp->otccbLink; - active_skts++; - if (!count_only) { - dPrintf(D_M_ADSP, D_L_TRACE, - ("adsp pid=%d\n", sp->pid)); - atalk_notify(sp->gref, ESHUTDOWN); - } - sp = sp_next; - } while (sp); - } - - /* DDP */ - for (gref = ddp_head.atpcb_next; gref != &ddp_head; - gref = gref->atpcb_next) { - if (count_only) { - active_skts++; - } else { - dPrintf(D_M_DDP,D_L_TRACE, ("ddp pid=%d\n", gref->pid)); - atalk_notify(gref, ESHUTDOWN); - } - } - if (count_only) - return(active_skts); - - /* if there are no interfaces in the process of going online, continue shutting down DDP */ - for (i = 0; i < IF_TOTAL_MAX; i++) { - if (at_interfaces[i].startup_inprogress == TRUE) - return(1); - } - if (MULTIPORT_MODE) { - rtmp_shutdown(); - /* free memory allocated for the rtmp/zip tables */ - if (ZT_table) { - FREE(ZT_table, M_RTABLE); - ZT_table = (ZT_entry *)NULL; - } - if (RT_table) { - FREE(RT_table, M_RTABLE); - RT_table = (RT_entry *)NULL; - } - } - - at_state.flags = 0; /* make sure inits are done on restart */ - - wakeup(&ifID_home->startup_inprogress); /* if rtmp_router_start still starting up */ - - /* from original ddp_shutdown() */ - routershutdown(); - ddp_brt_shutdown(); - - if (adspInited) { - CleanupGlobals(); - adspInited = 0; - } - - - dPrintf(D_M_DDP, D_L_VERBOSE, ("DDP shutdown completed")); - - /* - * make sure we don't have a probe timeout hanging around - * it's going to try and make use of an entry in at_interfaces - * which is going to be zero'd out by the call to ddp_start a - * little further down - */ - untimeout(aarp_sched_probe, 0); - - /* *** after an SIOCSIFADDR and before an AIOCSIFADDR, - this is the only place to find the ifID *** */ - for (i = 0; i < IF_TOTAL_MAX; i++) { - ifID = &at_interfaces[i]; - /* do LAP_IOC_OFFLINE processing */ - elap_offline(ifID); - } - ddp_start(); - - return(0); -} /* ddp_shutdown */ - -int routerStart(keP) - at_kern_err_t *keP; -{ - register at_ifaddr_t *ifID; - int error; - struct timespec ts; - - if (! ifID_home) - return(EINVAL); - - /* - * this will cause the ports to glean from the net the relevant - * information before forwarding - */ - TAILQ_FOREACH(ifID, &at_ifQueueHd, aa_link) { - dPrintf(D_M_ELAP, D_L_STARTUP_INFO, - ("routerStart Port %d (%s) set to activating\n", - ifID->ifPort, ifID->ifName)); - ifID->ifRoutingState = PORT_ACTIVATING; - ifID->ifFlags |= RTR_XNET_PORT; - } - - /* - * The next step is to check the information for each port before - * declaring the ports up and forwarding - */ - dPrintf(D_M_ELAP, D_L_STARTUP_INFO, - ("router_start: waiting 20 sec before starting up\n")); - - lck_mtx_assert(atalk_mutex, LCK_MTX_ASSERT_OWNED); - /* sleep for 20 seconds */ - - /* the vaue of 10n terms of hz is 100ms */ - ts.tv_sec = 20; - ts.tv_nsec = 0; - - if ((error = - /* *** eventually this will be the ifID for the interface - being brought up in router mode *** */ - msleep(&ifID_home->startup_inprogress, atalk_mutex, - PSOCK | PCATCH, "routerStart", &ts)) - != EWOULDBLOCK) { -/* - if (!error) - panic("routerStart: spurious interrupt"); -*/ - return(error); - } - - return(rtmp_router_start(keP)); - /* was timeout(rtmp_router_start, 0, 20 * SYS_HZ); */ -} /* routerStart */ - -void ZIPwakeup(elapp, ZipError) - at_ifaddr_t *elapp; - int ZipError; -{ - int error = ZipError; - - if ( (elapp != NULL) && elapp->startup_inprogress) { - - /* was ZIPContinue */ - /* was elapp_online() with jump to ZIP_sleep */ - - /* instead of the goto ZIP_sleep ... */ - switch (ZipError) { - case 0 : /* success */ - elapp->ifState = LAP_ONLINE; - - /* Send event with zone info. */ - atalk_post_msg(elapp->aa_ifp, KEV_ATALK_ZONEUPDATED, 0, &(elapp->ifZoneName)); - - break; - case ZIP_RE_AARP : - /* instead of goto re_aarp; */ - /* We now call aarp_init() to assign an - appletalk node addr */ - if ((elapp->startup_error = re_aarp(elapp))) { - elapp->startup_inprogress = FALSE; - wakeup(&elapp->startup_inprogress); - dPrintf(D_M_ELAP, D_L_STARTUP_INFO, - ("elap_online: ack 2\n")); - } - break; - default : - break; - } - if (ZipError != ZIP_RE_AARP) { - elapp->startup_error = error; - elapp->startup_inprogress = FALSE; - wakeup(&elapp->startup_inprogress); - dPrintf(D_M_ELAP, D_L_STARTUP_INFO, - ("elap_online: ifZipError=%d\n", error)); - } - } -} /* ZIPwakeup */ - -void AARPwakeup(probe_cb) - aarp_amt_t *probe_cb; -{ - int errno; - at_ifaddr_t *elapp; - - elapp = probe_cb->elapp; - if ( (elapp != NULL) && elapp->startup_inprogress && elapp->aa_ifp != 0) { - - /* was AARPContinue */ - errno = aarp_init2(elapp); - /* aarp_init2() returns either -1 or 0 */ - if (errno != 0) { - dPrintf(D_M_ELAP, D_L_STATE_CHG, - ("elap_online aarp_init for %s\n", - elapp->ifName)); - (void)at_unreg_mcast(elapp, (caddr_t)&elapp->ZoneMcastAddr); - (void)at_unreg_mcast(elapp, (caddr_t)&elapp->cable_multicast_addr); - elapp->ifState = LAP_OFFLINE; - ddp_rem_if(elapp); - elapp->startup_error = EADDRNOTAVAIL; - elapp->startup_inprogress = FALSE; - wakeup(&elapp->startup_inprogress); - dPrintf(D_M_ELAP, D_L_STARTUP_INFO, ("elap_online: ack 2\n")); - } else { - dPrintf(D_M_ELAP,D_L_STARTUP_INFO, - ("elap_online: aarp_init returns zero\n")); - elap_online2(elapp); - } - } -} /* AARPwakeup */ - -void ddp_bit_reverse(addr) - unsigned char *addr; -{ -static unsigned char reverse_data[] = { - 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, - 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, - 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, - 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, - 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, - 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4, - 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, - 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc, - 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, - 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, - 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, - 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa, - 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, - 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6, - 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, - 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe, - 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, - 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1, - 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, - 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9, - 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, - 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, - 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, - 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd, - 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, - 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3, - 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, - 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb, - 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, - 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7, - 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, - 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff - }; - - unsigned char k; - - for (k=0; k < 6; k++) - addr[k] = reverse_data[addr[k]]; -} - -static int elap_trackMcast(at_ifaddr_t *, int, caddr_t); - -static int elap_trackMcast(patp, func, addr) - at_ifaddr_t *patp; - int func; - caddr_t addr; -{ - int i, loc=-1; - u_char c; - switch(patp->aa_ifp->if_type) { - case IFT_ETHER: - case IFT_FDDI: - case IFT_L2VLAN: - case IFT_IEEE8023ADLAG: /* bonded ethernet */ - /* set addr to point to unique part of addr */ - c = addr[5]; - - /* first try to find match */ - /* *** save just one byte of the multicast address? *** */ - for (i=0; i< MAX_MCASTS; i++) - if (c == patp->mcast[i]) { - loc = i; - break; - } - - switch (func) { - case MCAST_TRACK_DELETE: - if (loc >= 0) - patp->mcast[loc] = 0; - - break; - case MCAST_TRACK_ADD: - dPrintf(D_M_PAT_LOW, D_L_USR2, ("mctrack:add loc:%d\n", i)); - if (loc >= 0) { - dPrintf(D_M_PAT_LOW, D_L_USR2, ("mctrack:add, addr was there\n")); - return(1); - break; /* already there */ - } - for (i=0; i< MAX_MCASTS; i++) - if (patp->mcast[i] == 0) { - loc = i; - break; - } - dPrintf(D_M_PAT_LOW, D_L_USR2, ("mctrack:add1 loc:%d\n", i)); - if (loc >= 0) { - patp->mcast[loc] = c; - dPrintf(D_M_PAT_LOW, D_L_USR2, ("mctrack:add, adding(%x)\n", - (*(int*)addr)&0xffffff)); - } - else { - /*errno = ENOMEM; */ /*LD 5/7/97 nobody is using that */ - return(-1); - } - break; - case MCAST_TRACK_CHECK: - if (loc >= 0) { - dPrintf(D_M_PAT_LOW, D_L_USR2, ("mctrack:check, addr was there\n")); - return(0); - } - else { - dPrintf(D_M_PAT_LOW, D_L_USR2, ("mctrack:add, addr was NOT there\n")); - return(-1); - } - - default: - /*errno = EINVAL;*/ /*LD 5/7/97 nobody is using that */ - return(-1); - } - - case IFT_ISO88025: /* token ring */ - /* we would use the lowest byte of the addr argument as a value - to shift left a 1 to form the mcast mask for TR. We'll do this - when the time comes - */ - default: - ; - } - return(0); -} - - -static int getSnmpCfg(snmp) - snmpCfg_t *snmp; -{ - int i; - at_ifaddr_t *elapp; - snmpIfCfg_t *ifc; - - snmp->cfg_ifCnt = 0; - - bzero(snmp,sizeof(snmpCfg_t)); - for (i=0, elapp=at_interfaces,ifc=snmp->cfg_ifCfg; - iifState != LAP_OFFLINE) { - snmp->cfg_ifCnt++; - strlcpy(ifc->ifc_name,elapp->ifName, sizeof(ifc->ifc_name)); - ifc->ifc_aarpSize = getAarpTableSize(i); - ifc->ifc_addrSize = getPhysAddrSize(i); - switch (elapp->aa_ifp->if_type) { - case IFT_ETHER: - case IFT_L2VLAN: - case IFT_IEEE8023ADLAG: /* bonded ethernet */ - ifc->ifc_type = SNMP_TYPE_ETHER2; - break; - case IFT_ISO88025: /* token ring */ - ifc->ifc_type = SNMP_TYPE_TOKEN; - break; - case IFT_FDDI: - default: - ifc->ifc_type = SNMP_TYPE_OTHER; - break; - } - ifc->ifc_start = elapp->ifThisCableStart; - ifc->ifc_end = elapp->ifThisCableEnd; - ifc->ifc_ddpAddr= elapp->ifThisNode; - ifc->ifc_status = elapp->ifState == LAP_ONLINE ? 1 : 2; - ifc->ifc_zoneName.len = 0; - if (elapp->ifZoneName.len != 0) { - ifc->ifc_zoneName = elapp->ifZoneName; - } - else if (elapp->ifDefZone) { - ifc->ifc_zoneName = ZT_table[elapp->ifDefZone-1].Zone; - } - else /* temp, debug only */ - ifc->ifc_zoneName = ZT_table[0].Zone; - if (ROUTING_MODE) { - if (elapp->ifFlags & RTR_SEED_PORT) { - ifc->ifc_netCfg = SNMP_CFG_CONFIGURED; - ifc->ifc_zoneCfg = SNMP_CFG_CONFIGURED; - } - else { - ifc->ifc_netCfg = SNMP_CFG_GARNERED; - ifc->ifc_zoneCfg = SNMP_CFG_GARNERED; - } - } - else { /* single-port mode */ - if (elapp->ifRouterState == ROUTER_AROUND) { - ifc->ifc_netCfg = SNMP_CFG_GARNERED; - } - else { - ifc->ifc_netCfg = SNMP_CFG_GUESSED; - ifc->ifc_zoneCfg = SNMP_CFG_UNCONFIG; - } - } - } - } - snmp->cfg_flags = at_state.flags; - - - return(0); -} - -int at_reg_mcast(ifID, data) - at_ifaddr_t *ifID; - caddr_t data; -{ - struct ifnet *nddp = ifID->aa_ifp; - struct sockaddr_dl sdl; - - if (*(int *)data) { - if (!nddp) { - dPrintf(D_M_PAT, D_L_STARTUP, ("pat_mcast: BAD ndpp\n")); - return(-1); - } - - if (elap_trackMcast(ifID, MCAST_TRACK_ADD, data) == 1) - return(0); - - /* this is for ether_output */ - bzero(&sdl, sizeof(sdl)); - sdl.sdl_family = AF_LINK; - sdl.sdl_alen = sizeof(struct etalk_addr); - sdl.sdl_len = offsetof(struct sockaddr_dl, sdl_data) - + sizeof(struct etalk_addr); - bcopy(data, sdl.sdl_data, sizeof(struct etalk_addr)); - /* these next two lines should not really be needed XXX */ - sdl.sdl_index = nddp->if_index; - sdl.sdl_type = IFT_ETHER; - - dPrintf(D_M_PAT, D_L_STARTUP, - ("pat_mcast: adding multicast %08x%04x ifID:0x%x\n", - *(unsigned*)data, (*(unsigned *)(data+2))&0x0000ffff, - (unsigned)ifID)); - - if (if_addmulti_anon(nddp, (struct sockaddr *)&sdl, NULL)) - return -1; - } - return 0; - -} - -int at_unreg_mcast(ifID, data) - at_ifaddr_t *ifID; - caddr_t data; -{ - struct ifnet *nddp = ifID->aa_ifp; - struct sockaddr_dl sdl; - - if (*(int *)data) { - if (!nddp) { - dPrintf(D_M_PAT, D_L_STARTUP, ("pat_mcast: BAD ndpp\n")); - return(-1); - } - - elap_trackMcast(ifID, MCAST_TRACK_DELETE, data); - - /* this is for ether_output */ - bzero(&sdl, sizeof(sdl)); - sdl.sdl_family = AF_LINK; - sdl.sdl_alen = sizeof(struct etalk_addr); - sdl.sdl_len = offsetof(struct sockaddr_dl, sdl_data) - + sizeof(struct etalk_addr); - bcopy(data, sdl.sdl_data, sizeof(struct etalk_addr)); - /* these next two lines should not really be needed XXX */ - sdl.sdl_index = nddp->if_index; - sdl.sdl_type = IFT_ETHER; - - dPrintf(D_M_PAT, D_L_STARTUP, - ("pat_mcast: deleting multicast %08x%04x ifID:0x%x\n", - *(unsigned*)data, (*(unsigned *)(data+2))&0x0000ffff, - (unsigned)ifID)); - bzero(data, sizeof(struct etalk_addr)); - - if (if_delmulti_anon(nddp, (struct sockaddr *)&sdl)) - return -1; - } - return 0; -} -#ifdef NOT_YET -/* *** at_reg_mcast() and at_unreg_mcast() should be replaced as soon as the - new code to allow an AF_LINK address family multicast to be (un)registered - using the SIOCADDMULTI / SIOCDELMULTI ioctls has been completed. - - The issue is that the "struct sockaddr_dl" needed for the AF_LINK does not - fit in the "struct ifreq" that is used for these ioctls, and we do not want - Blue/Classic, which currently uses AF_UNSPEC, to use a different address - family multicast address than Mac OS X uses. - *** */ - -int at_reg_mcast(ifID, data) - at_ifaddr_t *ifID; - caddr_t data; -{ - struct ifnet *nddp = ifID->aa_ifp; - struct sockaddr_dl sdl; - - if (*(int *)data) { - if (!nddp) { - dPrintf(D_M_PAT, D_L_STARTUP, ("pat_mcast: BAD ndpp\n")); - return(-1); - } - if (elap_trackMcast(ifID, MCAST_TRACK_ADD, data) == 1) - return(0); - - sdl.sdl_len = sizeof(struct sockaddr_dl); - sdl.sdl_family = AF_LINK; - sdl.sdl_index = 0; - sdl.sdl_type = nddp->if_type; - sdl.sdl_alen = nddp->if_addrlen; - sdl.sdl_slen = 0; - sdl.sdl_nlen = sprintf(sdl.sdl_data, "%s%d", - nddp->if_name , nddp->if_unit); - bcopy(data, LLADDR(&sdl), sdl.sdl_alen); - - dPrintf(D_M_PAT, D_L_STARTUP, - ("pat_mcast: adding multicast %08x%04x ifID:0x%x\n", - *(unsigned*)data, (*(unsigned *)(data+2))&0x0000ffff, - (unsigned)ifID)); - - if (if_addmulti_anon(nddp, (struct sockaddr *)&sdl, NULL)) - return -1; - } - - return 0; -} - -int at_unreg_mcast(ifID, data) - at_ifaddr_t *ifID; - caddr_t data; -{ - struct ifnet *nddp = ifID->aa_ifp; - struct sockaddr_dl sdl; - - if (*(int *)data) { - if (!nddp) { - dPrintf(D_M_PAT, D_L_STARTUP, ("pat_mcast: BAD ndpp\n")); - return(-1); - } - - elap_trackMcast(ifID, MCAST_TRACK_DELETE, data); - - sdl.sdl_len = sizeof(struct sockaddr_dl); - sdl.sdl_family = AF_LINK; - sdl.sdl_index = 0; - sdl.sdl_type = nddp->if_type; - sdl.sdl_alen = nddp->if_addrlen; - sdl.sdl_slen = 0; - sdl.sdl_nlen = sprintf(sdl.sdl_data, "%s%d", - nddp->if_name , nddp->if_unit); - - dPrintf(D_M_PAT, D_L_STARTUP, - ("pat_mcast: deleting multicast %08x%04x ifID:0x%x\n", - *(unsigned*)data, (*(unsigned *)(data+2))&0x0000ffff, - (unsigned)ifID)); - bzero(data, ETHERNET_ADDR_LEN); - - if (if_delmulti_anon(nddp, (struct sockaddr *)&sdl)) - return(-1); - } - - return 0; -} - -#endif diff --git a/bsd/netat/ddp_nbp.c b/bsd/netat/ddp_nbp.c deleted file mode 100644 index 698b1083e..000000000 --- a/bsd/netat/ddp_nbp.c +++ /dev/null @@ -1,1538 +0,0 @@ -/* - * Copyright (c) 1998-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include /* router */ -#include -#include - -/* reaching for DDP and NBP headers in the datagram */ -#define DATA_DDP(mp) ((at_ddp_t *)(gbuf_rptr(mp))) -#define DATA_NBP(mp) ((at_nbp_t *)((DATA_DDP(mp))->data)) - -/* Get to the nve_entry_t part ofthe buffer */ -#define NVE_ENTRY(mp) (nve_entry_t *)(gbuf_rptr(mp)) - -#ifndef MIN -#define MIN(a,b) ((a)>(b)?(b):(a)) -#endif - -#define errno nbperrno - - /* externs */ -extern at_ifaddr_t *ifID_table[]; -extern at_ifaddr_t *ifID_home; - -TAILQ_HEAD(name_registry, _nve_) name_registry; - - -/* statics */ -static int errno; -static gbuf_t *lzones=0; /* head of local zones list */ -static int lzonecnt=0; /* # zones stored in lzones */ -static u_int hzonehash=0; /* hash val of home zone */ - -static int nbp_lkup_reply(nbp_req_t *, nve_entry_t *); -static int nbp_strcmp(at_nvestr_t *, at_nvestr_t *, u_char); -static int nbp_setup_resp(nbp_req_t *, int); -static int nbp_send_resp(nbp_req_t *); -static int nbp_validate_n_hash(nbp_req_t *, int, int); -static nve_entry_t *nbp_search_nve(nbp_req_t *, at_ifaddr_t *); -static int isZoneLocal(at_nvestr_t *); -static int nbp_enum_gen (nve_entry_t *); -static void nbp_setup_hdr (nbp_req_t *); -static void nbp_upshift (u_char *, int); -static u_char *nbp2zone(at_nbp_t *, u_char *); - -/* macros */ -#define NVE_LOCK nve_lock - - -static long nbp_id_count = 0; - -void sethzonehash(elapp) - at_ifaddr_t *elapp; -{ - if (elapp->startup_zone.len) { - hzonehash = nbp_strhash(&elapp->startup_zone); - } -} - -void nbp_shutdown(void) -{ - /* delete all NVE's and release buffers */ - register nve_entry_t *nve_entry, *nve_next; - - for ((nve_entry = TAILQ_FIRST(&name_registry)); nve_entry; nve_entry = nve_next) { - nve_next = TAILQ_NEXT(nve_entry, nve_link); - - /* NB: nbp_delete_entry calls TAILQ_REMOVE */ - nbp_delete_entry(nve_entry); - } - - if (lzones) { - gbuf_freem(lzones); - lzonecnt = 0; - lzones = NULL; - } -} /* nbp_shutdown */ - -static -u_char *nbp2zone(nbp, maxp) - at_nbp_t *nbp; - u_char *maxp; -{ - - u_char *p; - - p = (u_char*)&nbp->tuple[0].enu_entity; /* p -> object */ - if (p >= maxp) return NULL; - p += (*p +1); /* p -> type */ - if (p >= maxp) return NULL; - p += (*p +1); /* p -> zone */ - if (p >= maxp) return NULL; - if ((p + *p) >= maxp) return NULL; - return(p); -} - -void nbp_input(m, ifID) - register gbuf_t *m; - register at_ifaddr_t *ifID; - -{ - register at_ddp_t *ddp = DATA_DDP(m); - register at_nbp_t *nbp = DATA_NBP(m); - register RT_entry *rt; - register int ddpSent = FALSE; /* true if we re-sent this pkt (don't free) */ - struct etalk_addr mcastAddr; - nbp_req_t nbp_req; - u_char *p; - - /* from original nbp_input() when this function was nbp_handler() */ - if ((gbuf_type(m) != MT_DATA && gbuf_type(m) != MSG_DATA) || - ddp->type != DDP_NBP) { - gbuf_freem(m); - return; - } - - /* Some initializations */ - nbp_req.response = NULL; - nbp_req.request = m; - nbp_req.space_unused = nbp_req.flags = 0; - - dPrintf(D_M_NBP_LOW, D_L_USR1, - ("nbp_input control:%d tuplecount:%d id:%d\n", - nbp->control, nbp->tuple_count, nbp->at_nbp_id)); - switch (nbp->control) { - case NBP_LKUP : - { - at_net_al dst_net; - - dst_net = NET_VALUE(ddp->dst_net); - dPrintf(D_M_NBP_LOW, D_L_USR2, (" LKUP %s\n", - ifID != ifID_home ? "non-home" : "home")); - if ( ROUTING_MODE && (NET_VALUE(ddp->dst_net) != 0) - && ((dst_net < ifID->ifThisCableStart) - || (dst_net > ifID->ifThisCableEnd)) ) { - routing_needed(m, ifID, TRUE); - ddpSent = TRUE; - break; - } - } - - if (nbp_validate_n_hash (&nbp_req, TRUE, FALSE) == 0) { - nbp_req.func = nbp_lkup_reply; - (void) nbp_search_nve(&nbp_req, ifID); - if (nbp_req.response) { - nbp_send_resp(&nbp_req); - } - } -#ifdef NBP_DEBUG - { - char zone[35],object[35],type[35]; - strlcpy(zone,nbp_req.nve.zone.str, sizeof(zone)); - strlcpy(object,nbp_req.nve.object.str, sizeof(object)); - strlcpy(type,nbp_req.nve.type.str, sizeof(type)); - if (ifID != ifID_home) - dPrintf(D_M_NBP_LOW,D_L_USR2, - ("nbp_LKUP for:%s:%s@%s", object, type, zone)); - } -#endif /* NBP_DEBUG */ - - break; - case NBP_FWDRQ: - { - register int zhome=0; - /* true if home zone == destination zone */ - register int zno, i; - register gbuf_t *m2; - register int error_found =0; - register at_ifaddr_t *ifIDorig; - - if (!ROUTING_MODE) /* for routers only! */ - break; - - ifIDorig = ifID; - ifID= NULL; - for (i = 0 ; i < RT_maxentry; i++) { - rt = &RT_table[i]; - if ((rt->EntryState & RTE_STATE_PERMANENT) && - NET_VALUE(ddp->dst_net) >= rt->NetStart && - NET_VALUE(ddp->dst_net) <= rt->NetStop - ) { - /* sanity check */ - if (rt->NetPort >= IF_TOTAL_MAX) { - dPrintf(D_M_NBP,D_L_ERROR, - ("nbp_input:FWDREQ: bad port# from RT_table\n")); - error_found = TRUE; - break; - } - ifID = ifID_table[rt->NetPort]; - if (!ifID) { - dPrintf(D_M_NBP,D_L_ERROR, - ("nbp_input:FWDREQ: ifID %s\n", - !ifID ? "not found" : "invalid")); - error_found = TRUE; - break; - } - if (ifID->ifState == LAP_OFFLINE) { - dPrintf(D_M_NBP,D_L_ERROR, - ("nbp_input:FWDREQ: ifID offline (port %d)\n", - rt->NetPort)); - error_found = TRUE; - break; - } - break; - } - } - if (error_found) /* the port is not correct */ - break; - - if (!ifID) { /* this packet is not for us, let the routing engine handle it */ - routing_needed(m, ifIDorig, TRUE); - ddpSent= TRUE; - break; - } - - /* - * At this point, we have a valid Forward request for one of our - * directly connected port. Convert it to a NBP Lookup - */ - - nbp->control = NBP_LKUP; - NET_ASSIGN(ddp->dst_net, 0); - ddp->dst_node = 255; - - - /*### LD 01/18/94 Check if the dest is also the home zone. */ - - p = nbp2zone(nbp, (u_char *)gbuf_wptr(m)); - if ((p == NULL) || !(zno = zt_find_zname((at_nvestr_t *)p))) { - dPrintf(D_M_NBP,D_L_WARNING, - ("nbp_input: FWDRQ:zone not found\n")); - break; - } - if (isZoneLocal((at_nvestr_t*)p)) - zhome = TRUE; /* one of our ports is in destination zone */ - if (!zt_get_zmcast(ifID, (at_nvestr_t*)p, (char *)&mcastAddr)) { - dPrintf(D_M_NBP,D_L_ERROR, - ("nbp_input: FDWREQ:zt_get_zmcast error\n")); - break; - } - - - if (zhome) { /*### LD 01/18/95 In case our home is here, call back nbp */ - - if (!(m2 = (gbuf_t *)gbuf_copym((gbuf_t *)m))) { - dPrintf(D_M_NBP,D_L_ERROR, - ("nbp_input: FWDRQ:gbuf_copym failed\n")); - break; - } - - ddp = DATA_DDP(m2); - nbp = DATA_NBP(m2); - nbp->control = NBP_LKUP; - NET_ASSIGN(ddp->dst_net, 0); - ddp->dst_node = 255; - dPrintf(D_M_NBP,D_L_INFO, - ("nbp_input: FWDRQ:loop back for us\n")); - nbp_input(m2, ifID_home); - } - - if (FDDI_OR_TOKENRING(ifID->aa_ifp->if_type)) - ddp_bit_reverse((unsigned char *)&mcastAddr); - ddp_router_output(m, ifID, ET_ADDR, 0, 0, &mcastAddr); - ddpSent = TRUE; - } - break; - - case NBP_BRRQ: - { - register int zno; /* zone table entry numb */ - register int ztind; /* zone bitmap index into RT_entry */ - register int ztbit; /* zone bit to check within above index */ - register int zhome=0; /* true if home zone == destination zone */ - register int i; - register gbuf_t *m2, *m3; - register int fromUs = FALSE; - register at_socket ourSkt = 0; /* originating skt */ - - /* for router & MH local only */ - if ((!(MULTIHOME_MODE && FROM_US(ddp))) && !ROUTING_MODE) { - dPrintf(D_M_NBP,D_L_USR2, - ("nbp_input: BRREQ:non router or MH local\n")); - - break; - } - p = nbp2zone(nbp, (u_char *)gbuf_wptr(m)); - if ((p == NULL) || !(zno = zt_find_zname((at_nvestr_t *)p))) { - break; - } - if (MULTIHOME_MODE && ifID->ifRouterState == NO_ROUTER) { - ((at_nvestr_t*)p)->len = 1; - ((at_nvestr_t*)p)->str[0] = '*'; - } - if (isZoneLocal((at_nvestr_t*)p)) { - zhome = TRUE; /* one of our ports is in destination zone */ - } - if (FROM_US(ddp)){ /* save, before we munge it */ - fromUs = TRUE; - ourSkt = ddp->src_socket; - dPrintf(D_M_NBP,D_L_USR2, - ("nbp_input:BRRQ from us net:%d\n", - (int)NET_VALUE(ddp->src_net))); - } - /* from ZT_CLR_ZMAP */ - i = zno - 1; - ztind = i >> 3; - ztbit = 0x80 >> (i % 8); - for (i=0,rt=RT_table; iZoneBitMap[ztind] & ztbit)) /* if zone not in route, skip*/ - continue; -/* dPrintf(D_M_NBP, D_L_USR3, - ("nbp_input: BRREQ: port:%d, entry %d\n", - rt->NetPort, i)); -*/ - - ifID = ifID_table[rt->NetPort]; - if (!ifID) { - dPrintf(D_M_NBP, D_L_ERROR, - ("nbp_input:BRRQ: ifID %s\n", - !ifID ? "not found" : "invalid")); - break; - } - - ddp = DATA_DDP(m); - ddp->src_node = ifID->ifThisNode.s_node; - NET_ASSIGN(ddp->src_net, ifID->ifThisNode.s_net); - ddp->src_socket = NBP_SOCKET; - if (!(m2 = (gbuf_t *)gbuf_copym((gbuf_t *)m))) { - dPrintf(D_M_NBP,D_L_ERROR, - ("nbp_input: BRREQ:gbuf_copym failed\n")); - break; - } - - ddp = DATA_DDP(m2); - nbp = DATA_NBP(m2); -/* nbp->tuple[0].enu_addr.socket = NBP_SOCKET; */ - if (MULTIHOME_MODE && fromUs ) { - /* set the return address of the lookup to that of the - interface it's going out on so that replies come back - on that net */ - dPrintf(D_M_NBP,D_L_USR3, - ("nbp_input: BRREQ: src changed to %d.%d.%d\n", - ifID->ifThisNode.s_net, - ifID->ifThisNode.s_node, ourSkt)); - nbp->tuple[0].enu_addr.net = htons(ifID->ifThisNode.s_net); - nbp->tuple[0].enu_addr.node = ifID->ifThisNode.s_node; - nbp->tuple[0].enu_addr.socket = ourSkt; - ddp->src_socket = NBP_SOCKET; - } -#if DEBUG - else - dPrintf(D_M_NBP, D_L_USR3, - ("nbp_input: BRREQ: not from us\n")); -#endif /* DEBUG */ - dPrintf(D_M_NBP, D_L_USR3, - ("nbp_input dist:%d\n", rt->NetDist)); - if (rt->NetDist == 0) { /* if direct connect, *we* do the LKUP */ - nbp->control = NBP_LKUP; - NET_ASSIGN(ddp->dst_net, 0); - ddp->dst_node = 255; - if (!zt_get_zmcast(ifID, (at_nvestr_t*)p, (char *)&mcastAddr)) { - dPrintf(D_M_NBP,D_L_ERROR, - ("nbp_input: BRRQ:zt_get_zmcast error\n")); - break; - } - if (FDDI_OR_TOKENRING(ifID->aa_ifp->if_type)) - ddp_bit_reverse((unsigned char *)&mcastAddr); - ddp_router_output(m2, ifID, ET_ADDR, 0, 0, &mcastAddr); - } - else { /* else fwd to router */ - ddp->dst_node = 0; - if (rt->NetStart == 0) /* if Ltalk */ - NET_ASSIGN(ddp->dst_net, rt->NetStop); - else - NET_ASSIGN(ddp->dst_net, rt->NetStart); - nbp->control = NBP_FWDRQ; - ddp_router_output(m2, ifID, AT_ADDR, - rt->NextIRNet, rt->NextIRNode, - NULL); - } - } - if (!zhome) - break; - - if (!(m3 = (gbuf_t *)gbuf_copym((gbuf_t *)m))) { - dPrintf(D_M_NBP,D_L_ERROR, - ("nbp_input: BRREQ:gbuf_copym failed\n")); - break; - } - - ddp = DATA_DDP(m3); - nbp = DATA_NBP(m3); - - nbp->control = NBP_LKUP; - NET_ASSIGN(ddp->dst_net, 0); - ddp->dst_node = 255; - dPrintf(D_M_NBP,D_L_INFO, ("nbp_input: BRRQ:loop back for us\n")); - nbp_input(m3, ifID_home); - break; - } - - case NBP_LKUP_REPLY: - - if (!ROUTING_MODE) /* for routers only! */ - break; - - dPrintf(D_M_NBP,D_L_WARNING, - ("nbp_input: routing needed for LKUP_REPLY: from %d.%d\n", - NET_VALUE(ddp->src_net), ddp->src_node)); - routing_needed(m, ifID, TRUE); - ddpSent = TRUE; - break; - - default : - dPrintf(D_M_NBP,D_L_ERROR, - ("nbp_input: unhandled pkt: type:%d\n", nbp->control)); - - routing_needed(m, ifID, TRUE); - ddpSent = TRUE; - break; - } /* switch control */ - - if (!ddpSent) - gbuf_freem(m); - return; -} /* nbp_input */ - -static int nbp_validate_n_hash (nbp_req, wild_ok, checkLocal) - register nbp_req_t *nbp_req; - register int wild_ok; - register int checkLocal; /* if true check if local zone */ -{ - register at_nvestr_t *object, *type, *zone; - at_nbptuple_t *tuple; - register int i, part_wild; - - tuple = DATA_NBP(nbp_req->request)->tuple; - nbp_req->flags = 0; -#ifdef COMMENTED_OUT - { - int net,node,skt; - net = ntohs(tuple->enu_addr.net); - node = tuple->enu_addr.node; - skt = tuple->enu_addr.socket; - dPrintf(D_M_NBP_LOW,D_L_USR4, - ("nbp_validate: tuple addr:%d:%d:%d\n",net,node,skt)); - } -#endif /* COMMENTED_OUT */ - - /* tuple is in the compressed (no "filler") format */ - object = (at_nvestr_t *)&tuple->enu_entity; - type = (at_nvestr_t *)(&object->str[object->len]); - zone = (at_nvestr_t *)(&type->str[type->len]); - - if (object->len > NBP_NVE_STR_SIZE || type->len > NBP_NVE_STR_SIZE || - zone->len > NBP_NVE_STR_SIZE) { - dPrintf(D_M_NBP_LOW, D_L_WARNING, - ("nbp_val_n_hash: bad str len\n")); - errno = EINVAL; - return (-1); - } - -#ifdef NBP_DEBUG - { - char xzone[35],xobject[35],xtype[35]; - strlcpy(xzone,zone->str, sizeof(xzone)); - strlcpy(xobject,object->str, sizeof(xobject)); - strlcpy(xtype,type->str, sizeof(xtype)); - dPrintf(D_M_NBP_LOW, D_L_USR4, - ("nbp_validate: looking for %s:%s@%s\n", - xobject, xtype, xzone)); - } -#endif /* NBP_DEBUG */ - /* Is this request for our zone ?? */ - nbp_req->nve.zone.len = zone->len; - nbp_req->nve.zone_hash = 0; - bcopy(zone->str,nbp_req->nve.zone.str, zone->len); - - if (checkLocal && !isZoneLocal(zone)) { - char str[35]; - strlcpy((char *)str,(char *)zone->str,sizeof(str)); - dPrintf(D_M_NBP_LOW,D_L_WARNING, - ("nbp_val_n_hash bad zone: %s\n", str)); - errno = EINVAL; - return(-1); - } - - if (!DEFAULT_ZONE(zone)) { - nbp_req->nve.zone_hash = nbp_strhash(& nbp_req->nve.zone); - } - - nbp_req->nve.address = tuple->enu_addr; - nbp_req->nve.object.len = object->len; - nbp_req->nve.object_hash = 0; - if (object->len == 1 && (object->str[0] == NBP_ORD_WILDCARD || - object->str[0] == NBP_SPL_WILDCARD)) { - if (wild_ok) - nbp_req->flags |= NBP_WILD_OBJECT; - else { - dPrintf(D_M_NBP_LOW, D_L_WARNING, - ("nbp_val_n_hash: wild not okay\n")); - errno = EINVAL; - return (-1); - } - } else{ - for (i = part_wild = 0; (unsigned) ilen; i++) { - if (object->str[i] == NBP_SPL_WILDCARD) { - if (wild_ok) { - if (part_wild) { - dPrintf(D_M_NBP_LOW, D_L_WARNING, - ("nbp_val_n_hash: too many parts wild\n")); - errno = EINVAL; - return (-1); - } else - part_wild++; - } else { - dPrintf(D_M_NBP_LOW, D_L_WARNING, - ("nbp_val_n_hash: wild not okay2\n")); - errno = EINVAL; - return (-1); - } - } - nbp_req->nve.object.str[i] = object->str[i]; - } - if (!part_wild) - nbp_req->nve.object_hash = - nbp_strhash(&nbp_req->nve.object); - } - - nbp_req->nve.type.len = type->len; - nbp_req->nve.type_hash = 0; - if (type->len == 1 && (type->str[0] == NBP_ORD_WILDCARD || - type->str[0] == NBP_SPL_WILDCARD)) { - if (wild_ok) - nbp_req->flags |= NBP_WILD_TYPE; - else { - dPrintf(D_M_NBP_LOW, D_L_WARNING, - ("nbp_val_n_hash: wild not okay3\n")); - errno = EINVAL; - return (-1); - } - } else { - for (i = part_wild = 0; (unsigned) ilen; i++) { - if (type->str[i] == NBP_SPL_WILDCARD) { - if (wild_ok) { - if (part_wild) { - dPrintf(D_M_NBP_LOW, D_L_WARNING, - ("nbp_val_n_hash: too many parts wild2\n")); - errno = EINVAL; - return (-1); - } else - part_wild++; - } else { - errno = EINVAL; - return (-1); - } - } - nbp_req->nve.type.str[i] = type->str[i]; - } - if (!part_wild) - nbp_req->nve.type_hash = - nbp_strhash(&nbp_req->nve.type); - } -#ifdef NBP_DEBUG - { - char zone[35],object[35],type[35]; - strlcpy(zone,nbp_req.nve.zone.str, sizeof(zone)); - strlcpy(object,nbp_req.nve.object.str, sizeof(object)); - strlcpy(type,nbp_req.nve.type.str, sizeof(type)); - dPrintf(D_M_NBP_LOW,D_L_USR4, - ("nbp_validate: after hash: %s:%s@%s\n", - object, type, zone)); - } -#endif /* NBP_DEBUG */ - return(0); -} /* nbp_validate_n_hash */ - - -/* Upshifts in place */ -static void nbp_upshift (str, count) -register u_char *str; -register int count; -{ - register int i, j; - register u_char ch; - static unsigned char lower_case[] = - {0x8a, 0x8c, 0x8d, 0x8e, 0x96, 0x9a, 0x9f, 0xbe, - 0xbf, 0xcf, 0x9b, 0x8b, 0x88, 0}; - static unsigned char upper_case[] = - {0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0xae, - 0xaf, 0xce, 0xcd, 0xcc, 0xcb, 0}; - - for (j=0 ; j= 'a' && ch <= 'z') - str[j] = ch + 'A' - 'a'; - else if (ch & 0x80) - for (i=0; lower_case[i]; i++) - if (ch == lower_case[i]) - str[j] = upper_case[i]; - } -} - - -u_int nbp_strhash (nvestr) - register at_nvestr_t *nvestr; -{ - /* upshift while hashing */ - register u_int hash = 0; - register int i, len; - union { - u_char h_4char[4]; - int h_int; - } un; - - for (i=0; (unsigned) i < nvestr->len; i+=sizeof(int)) { - len = MIN((size_t)(nvestr->len-i), sizeof(int)); - if (len == sizeof(int)) - bcopy(&(nvestr->str[i]), &un, sizeof(un)); - else { - un.h_int = -1; - for ( ; (unsigned) ilen; i++) - un.h_4char[i % sizeof(int)] = nvestr->str[i]; - } - nbp_upshift (un.h_4char, len); - hash ^= un.h_int; - } - - return (hash); -} /* nbp_strhash */ - -static nve_entry_t *nbp_search_nve (nbp_req, ifID) - register nbp_req_t *nbp_req; - register at_ifaddr_t *ifID; /* NULL ok */ -{ - register nve_entry_t *nve_entry; - -#ifdef NBP_DEBUG - { - char zone[35],object[35],type[35]; - strlcpy(zone,nbp_req.nve.zone.str, sizeof(zone)); - strlcpy(object,nbp_req.nve.object.str, sizeof(object)); - strlcpy(type,nbp_req.nve.type.str, sizeof(type)); - dPrintf(D_M_NBP_LOW, D_L_USR4, - ("nbp_search: looking for %s:%s@%s resp:0x%x\n",object,type,zone, - (u_int) nbp_req->response)); - } -#endif /* NBP_DEBUG */ - TAILQ_FOREACH(nve_entry, &name_registry, nve_link) { - if ((nbp_req->nve.zone_hash) && - ((nbp_req->nve.zone_hash != - nve_entry->zone_hash) && - (nbp_req->nve.zone_hash != hzonehash) - ) - ) { - dPrintf(D_M_NBP_LOW,D_L_USR4, - ("nbp_search: no match for zone, req hash:%x\n", - nbp_req->nve.zone_hash)); - continue; - } - else { /* for this entry's zone OR no zone in request or entry */ - /* only in singleport mode (!MULTIPORT_MODE) with - empty PRAM can an entry have '*' for it's zone - */ - at_nvestr_t *ezone=&nve_entry->zone; - at_nvestr_t *rzone=&nbp_req->nve.zone; - if (!DEFAULT_ZONE(rzone) && !DEFAULT_ZONE(ezone)) { - if (nbp_strcmp (rzone, ezone, 0) != 0) - continue; - } - else { - if (MULTIHOME_MODE && ifID && - (nve_entry->address.net != - ifID->ifThisNode.s_net)) { - dPrintf(D_M_NBP, D_L_USR4, - ("nbp search ifID (%d) & req net (%d) not eq\n", - nve_entry->address.net, - ifID->ifThisNode.s_net)); - continue; - } -#if DEBUG - if (ifID) - dPrintf(D_M_NBP, D_L_USR4, - ("nbp search ifID (%d) & req net (%d) equal\n", - nve_entry->address.net, - ifID->ifThisNode.s_net)); -#endif /* DEBUG */ - } - - } - if (!(nbp_req->flags & NBP_WILD_OBJECT)) { - if ((nbp_req->nve.object_hash) && - (nbp_req->nve.object_hash != - nve_entry->object_hash)) - continue; - else { - if (nbp_strcmp (&nbp_req->nve.object, - &nve_entry->object, - NBP_SPL_WILDCARD) != 0) - continue; - } - } - - - if (!(nbp_req->flags & NBP_WILD_TYPE)) { - if ((nbp_req->nve.type_hash) && - (nbp_req->nve.type_hash !=nve_entry->type_hash)) - continue; - else { - if (nbp_strcmp (&nbp_req->nve.type, - &nve_entry->type, - NBP_SPL_WILDCARD) != 0) - continue; - } - } - - /* Found a match! */ -#ifdef NBP_DEBUG - { - char zone[35],object[35],type[35]; - - strlcpy(zone,nbp_req.nve.zone.str, sizeof(zone)); - strlcpy(object,nbp_req.nve.object.str, sizeof(object)); - strlcpy(type,nbp_req.nve.type.str, sizeof(type)); - dPrintf(D_M_NBP_LOW, D_L_USR2, - ("nbp_search: found %s:%s@%s net:%d\n", - object, type, zone, (int)nve_entry->address.net)); - } -#endif /* NBP_DEBUG */ - if (nbp_req->func != NULL) { - if ((*(nbp_req->func))(nbp_req, nve_entry) != 0) { - /* errno expected to be set by func */ - return (NULL); - } - } else - return (nve_entry); - } - - errno = 0; - return (NULL); -} /* nbp_search_nve */ - -static int nbp_lkup_reply (nbp_req, nve_entry) -register nbp_req_t *nbp_req; -register nve_entry_t *nve_entry; -{ - register at_nbptuple_t *tuple; - register int tuple_size, buf_len; - register int obj_len, type_len; - u_char *p; - - /* size of the current tuple we want to write... */ - tuple_size = nve_entry->object.len + 1 + /* object */ - nve_entry->type.len + 1 + /* type */ - 2 + /* zone */ - sizeof (at_inet_t) + 1; /* addr + enum */ - - buf_len = ((nbp_req->flags & NBP_WILD_MASK) ? DDP_DATA_SIZE:tuple_size); - if (nbp_req->response == NULL) { - if (nbp_setup_resp (nbp_req, buf_len) != 0) - /* errno expected to be set by nbp_setup_resp() */ - return (-1); - } - - if ((nbp_req->space_unused < tuple_size) || - (DATA_NBP(nbp_req->response)->tuple_count == NBP_TUPLE_MAX)) { - if (nbp_send_resp (nbp_req) != 0) - return (-1); - if (nbp_setup_resp (nbp_req, buf_len) != 0) - return (-1); - } - - /* At this point, we have a response buffer that can accommodate the - * tuple we want to write. Write it! - */ - tuple = (at_nbptuple_t *)gbuf_wptr(nbp_req->response); - tuple->enu_addr.net = htons(nve_entry->address.net); - tuple->enu_addr.node = nve_entry->address.node; - tuple->enu_addr.socket = nve_entry->address.socket; - tuple->enu_enum = nve_entry->enumerator; - - /* tuple is in the compressed (no "filler") format */ - p = (u_char *)&tuple->enu_entity.object; - obj_len = nve_entry->object.len + 1; - bcopy(&nve_entry->object, p, obj_len); - p += obj_len; - type_len = nve_entry->type.len + 1; - bcopy(&nve_entry->type, p, type_len); - p += type_len; - p[0] = (u_char)1; - p[1] = '*'; - - nbp_req->space_unused -= tuple_size; - gbuf_winc(nbp_req->response, tuple_size); - - /* increment the tuple count in header by 1 */ - DATA_NBP(nbp_req->response)->tuple_count++; - - return (0); -} - - -static int nbp_strcmp (str1, str2, embedded_wildcard) -register at_nvestr_t *str1, *str2; -register u_char embedded_wildcard; /* If str1 may contain a character - * that's to be treated as an - * embedded wildcard, this character - * is it. Making this special case - * since for zone names, squiggly - * equal is not to be treated as a - * wildcard. - */ -{ - u_char ch1,ch2; - register int i1, i2; - register int reverse = 0; - register int left_index; - - /* Embedded wildcard, if any, could only be in the first string (str1). - * returns 0 if two strings are equal (modulo case), -1 otherwise - */ - - if (str1->len == 0 || str2->len == 0) { - return (-1); - } - - /* Wildcards are not allowed in str2. - * - * If str1 could potentially contain an embedded wildcard, since the - * embedded wildcard matches ZERO OR MORE characters, str1 can not be - * more than 1 character longer than str2. - * - * If str1 is not supposed to have embedded wildcards, the two strs - * must be of equal length. - */ - if ((embedded_wildcard && (str2->len < (unsigned) (str1->len-1))) || - (!embedded_wildcard && (str2->len != str1->len))) { - return (-1); - } - - for (i1 = i2 = left_index = 0; (unsigned) i1 < str1->len ;) { - ch1 = str1->str[i1]; - ch2 = str2->str[i2]; - - if (embedded_wildcard && (ch1==embedded_wildcard)) { - /* hit the embedded wild card... start comparing from - * the other end of the string. - */ - reverse++; - /* But, if embedded wildcard was the last character of - * the string, the two strings match, so return okay. - */ - if (i1 == str1->len-1) { - return (0); - } - - i1 = str1->len - 1; - i2 = str2->len - 1; - - continue; - } - - nbp_upshift(&ch1, 1); - nbp_upshift(&ch2, 1); - - if (ch1 != ch2) { - return (-1); - } - - if (reverse) { - i1--; i2--; - if (i1 == left_index) { - return (0); - } - } else { - i1++; i2++; left_index++; - } - } - return (0); -} - - -static void nbp_setup_hdr (nbp_req) -register nbp_req_t *nbp_req; -{ - register at_ddp_t *ddp; - register at_nbp_t *nbp; - - ddp = DATA_DDP(nbp_req->response); - nbp = DATA_NBP(nbp_req->response); - - ddp->type = DDP_NBP; - UAS_ASSIGN(ddp->checksum, 0); - ddp->unused = ddp->hopcount = 0; - - switch(DATA_NBP(nbp_req->request)->control) { - case NBP_LKUP : - ddp->dst_socket = nbp_req->nve.address.socket; - ddp->dst_node = nbp_req->nve.address.node; - NET_ASSIGN_NOSWAP(ddp->dst_net, nbp_req->nve.address.net); - nbp->control = NBP_LKUP_REPLY; - break; - } - nbp->at_nbp_id = DATA_NBP(nbp_req->request)->at_nbp_id; - return; -} - - -static int nbp_setup_resp (nbp_req, tuples_size) -register nbp_req_t *nbp_req; -register int tuples_size; -{ - int buf_size = tuples_size + DDP_X_HDR_SIZE + NBP_HDR_SIZE; - nbp_req->response = gbuf_alloc(AT_WR_OFFSET+buf_size, PRI_MED); - if (nbp_req->response == NULL) { - errno = ENOBUFS; - return(-1); - } - gbuf_rinc(nbp_req->response, AT_WR_OFFSET); - gbuf_wset(nbp_req->response, DDP_X_HDR_SIZE + NBP_HDR_SIZE); - nbp_setup_hdr(nbp_req); - - DATA_NBP(nbp_req->response)->tuple_count = 0; - nbp_req->space_unused = tuples_size; - - return (0); -} /* nbp_setup_resp */ - - -static int nbp_send_resp (nbp_req) -register nbp_req_t *nbp_req; -{ - int status; - - status = ddp_output(&nbp_req->response, (at_socket)NBP_SOCKET, FALSE); - nbp_req->response = NULL; - errno = status; - return(errno?-1:0); -} - -void nbp_add_multicast(zone, ifID) - at_nvestr_t *zone; - at_ifaddr_t *ifID; -{ - char data[ETHERNET_ADDR_LEN]; - - if (zone->str[0] == '*') - return; - - { - char str[35]; - strlcpy((char *)str,(char *)zone->str,sizeof(str)); - dPrintf(D_M_NBP_LOW, D_L_USR3, - ("nbp_add_multi getting mc for %s\n", str)); - } - zt_get_zmcast(ifID, zone, data); - if (FDDI_OR_TOKENRING(ifID->aa_ifp->if_type)) - ddp_bit_reverse((unsigned char *)data); - dPrintf(D_M_NBP_LOW,D_L_USR3, - ("nbp_add_multi adding 0x%x%x port:%d ifID:0x%x if:%s\n", - *(unsigned*)data, (*(unsigned *)(data+2))&0x0000ffff, - /*i*/0, (u_int) ifID, ifID->ifName)); - - bcopy((caddr_t)data, (caddr_t)&ifID->ZoneMcastAddr, ETHERNET_ADDR_LEN); - (void)at_reg_mcast(ifID, (caddr_t)&ifID->ZoneMcastAddr); -} - -int -getNbpTableSize(void) - -/* for SNMP, returns size in # of entries */ -{ - register nve_entry_t *nve; - register int i=0; - - for (nve = TAILQ_FIRST(&name_registry); nve; nve = TAILQ_NEXT(nve, nve_link), i++) - i++; - return(i); -} - -int -getNbpTable(p, s, c) - snmpNbpEntry_t *p; - int s; /* starting entry */ - int c; /* # entries to copy */ - -/* for SNMP, returns section of nbp table */ -{ - register nve_entry_t *nve; - register int i=0; - static int nextNo=0; /* entry that *next points to */ - static nve_entry_t *next = (nve_entry_t*)NULL; - - if (s && next && nextNo == s) { - nve = next; - i = nextNo; - } - else - nve = TAILQ_FIRST(&name_registry); - - for ( ; nve && c ; nve = TAILQ_NEXT(nve, nve_link), p++,i++) { - if (i>= s) { - p->nbpe_object = nve->object; - p->nbpe_type = nve->type; - c--; - } - } - if (nve) { - next = nve; - nextNo = i; - } else { - next = (nve_entry_t*)NULL; - nextNo = 0; - } - - return 0; -} - - -#define ZONES_PER_BLK 31 /* 31 fits within a 1k blk) */ -#define ZONE_BLK_SIZE ZONES_PER_BLK * sizeof(at_nvestr_t) - -int setLocalZones(newzones, size) - at_nvestr_t *newzones; - int size; -/* updates list of zones which are local to all active ports - missing zones are not deleted, only missing zones are added. -*/ -{ - int bytesread=0; /* #bytes read from tuple */ - int i=0, dupe; - gbuf_t *m; - at_nvestr_t *pnve, *pnew = newzones; - - if (!lzones) { - if(!(lzones = gbuf_alloc(ZONE_BLK_SIZE, PRI_MED))) - return(ENOBUFS); - gbuf_wset(lzones,0); - } - while (bytesread < size) { /* for each new zone */ - { - char str[35]; - strlcpy((char *)str,(char *)pnew->str,sizeof(str)); - } - m = lzones; - pnve = (at_nvestr_t*)gbuf_rptr(m); - dupe = 0; - for (i=0; ilen != pnve->len) - continue; - if (pnew->len > NBP_NVE_STR_SIZE) { - return(0); - } - if (!strncmp((char *)pnew->str, (char *)pnve->str, pnew->len)) { - dupe=1; - continue; - } - } - if (!dupe) { - /* add new zone */ - if (lzonecnt && !(lzonecnt%ZONES_PER_BLK)) { - if(!(gbuf_cont(m) = gbuf_alloc(ZONE_BLK_SIZE, PRI_MED))) - return(ENOBUFS); - gbuf_wset(gbuf_cont(m),0); - pnve = (at_nvestr_t*)gbuf_rptr(gbuf_cont(m)); - } - strlcpy((char *)pnve->str,(char *)pnew->str,sizeof(pnve->str)); - pnve->len = pnew->len; - lzonecnt++; - } - bytesread += (pnew->len+1); - pnew = (at_nvestr_t*) (((char *)pnew) + pnew->len + 1); - } - /* showLocalZones1(); */ - return(0); -} - -/********** -showLocalZones1() -{ - int i; - at_nvestr_t *pnve; - gbuf_t *m; - char str[35]; - - for (i=0; ; i++) { - if (!(pnve = getLocalZone(i))) { - break; - } - strlcpy(str,pnve->str,sizeof(str)); - } -} - -*********/ - -int -isZoneLocal(zone) -at_nvestr_t *zone; -{ - at_nvestr_t *pnve; - int i; - if (DEFAULT_ZONE(zone)) - return(1); - for (i=0; ; i++) { - if (!(pnve = getLocalZone(i))) - break; - if (!nbp_strcmp(pnve,zone,0)) - return(1); - } - return(0); -} - - -#define NULL_PNVESTR (at_nvestr_t *) 0 - -at_nvestr_t *getLocalZone(zno) - int zno; /* zone number in virtual list to - return, 0 for first zone */ -/* returns pointer to a new local zone number zno, - returns null when no zones left. -*/ -{ - zone_usage_t ifz; - ifz.zone_index = zno; - if (MULTIPORT_MODE) - return(getRTRLocalZone(&ifz)); - else - return(getSPLocalZone(zno)); -} - - -at_nvestr_t *getSPLocalZone(zno) - int zno; /* zone number in virtual list to - return, 0 for first zone */ -/* single port mode version */ -{ - int curz=0; /* current zone */ - gbuf_t *m; - at_nvestr_t *pnve; - - if (lzones) { - m = lzones; - pnve = (at_nvestr_t*)gbuf_rptr(m); - } - else - return(NULL_PNVESTR); - if ( zno>=lzonecnt ) - return(NULL_PNVESTR); - for (curz=0; curzlen > NBP_NVE_STR_SIZE) { - return(NULL_PNVESTR); - } - } - else - return(NULL_PNVESTR); - } - return(pnve); -} - -/* The following functions are used in name registration and removal */ - -int nbp_fillin_nve(entity, nve) - at_entity_t *entity; - nve_entry_t *nve; -{ - register int i; - - if (entity->object.len > NBP_NVE_STR_SIZE || - entity->type.len > NBP_NVE_STR_SIZE || - entity->zone.len > NBP_NVE_STR_SIZE) { - dPrintf(D_M_NBP_LOW, D_L_WARNING, - ("nbp_fillin_nve: bad str len\n")); - errno = EINVAL; - return (-1); - } - - nve->zone = entity->zone; - nve->zone_hash = 0; - if (!isZoneLocal(&entity->zone)) { - errno = EINVAL; - return(-1); - } - /* if there's no zone, '*' gets filled in when entry is created */ - if (!DEFAULT_ZONE(&entity->zone)) - nve->zone_hash = nbp_strhash(&nve->zone); - - nve->object = entity->object; - nve->object_hash = 0; - if (entity->object.len == 1 && - (entity->object.str[0] == NBP_ORD_WILDCARD || - entity->object.str[0] == NBP_SPL_WILDCARD)) { - dPrintf(D_M_NBP_LOW, D_L_WARNING, - ("nbp_fillin_nve: wildcard\n")); - errno = EINVAL; - return (-1); - } - for (i = 0; i < entity->object.len; i++) { - if (entity->object.str[i] == NBP_SPL_WILDCARD) { - dPrintf(D_M_NBP_LOW, D_L_WARNING, - ("nbp_fillin_nve: wildcard2\n")); - errno = EINVAL; - return (-1); - } - } - nve->object_hash = nbp_strhash(&nve->object); - - nve->type = entity->type; - nve->type_hash = 0; - if (entity->type.len == 1 && - (entity->type.str[0] == NBP_ORD_WILDCARD || - entity->type.str[0] == NBP_SPL_WILDCARD)) { - errno = EINVAL; - return (-1); - } - for (i = 0; i < entity->type.len; i++) { - if (entity->type.str[i] == NBP_SPL_WILDCARD) { - dPrintf(D_M_NBP_LOW, D_L_WARNING, - ("nbp_fillin_nve: wildcard3\n")); - errno = EINVAL; - return (-1); - } - } - nve->type_hash = nbp_strhash(&nve->type); - - return(0); -} /* nbp_fillin_nve */ - -nve_entry_t *nbp_find_nve(nve) - nve_entry_t *nve; -{ - register nve_entry_t *nve_entry; - - TAILQ_FOREACH(nve_entry, &name_registry, nve_link) { - if (nve->zone_hash && - ((nve->zone_hash != nve_entry->zone_hash) && - (nve->zone_hash != hzonehash))) { - dPrintf(D_M_NBP_LOW,D_L_USR4, - ("nbp_find_nve: no match for zone, req hash:%x\n", - nve->zone_hash)); - continue; - } - - if ((nve->object_hash) && - (nve->object_hash != nve_entry->object_hash)) - continue; - - if ((nve->type_hash) && - (nve->type_hash != nve_entry->type_hash)) - continue; - - /* Found a match! */ - return (nve_entry); - } - - return (NULL); -} /* nbp_find_nve */ - -static int nbp_enum_gen (nve_entry) - register nve_entry_t *nve_entry; -{ - register int new_enum = 0; - register nve_entry_t *ne; - -re_do: - TAILQ_FOREACH(ne, &name_registry, nve_link) { - if ((*(int *)&ne->address == *(int *)&nve_entry->address) && - (ne->enumerator == new_enum)) { - if (new_enum == 255) - return(EADDRNOTAVAIL); - else { - new_enum++; - goto re_do; - } - } - } - - nve_entry->enumerator = new_enum; - return (0); -} - -int nbp_new_nve_entry(nve_entry, ifID) - nve_entry_t *nve_entry; - at_ifaddr_t *ifID; -{ - gbuf_t *tag; - nve_entry_t *new_entry; - at_nvestr_t *zone; - int error; - - if (!(valid_at_addr((at_inet_t *)&nve_entry->address))) { - dPrintf(D_M_NBP_LOW, D_L_WARNING, - ("nbp_new_nve_entry: valid_at_addr\n")); - return(EINVAL); - } - if ((error = nbp_enum_gen(nve_entry))) - return(error); - - nve_entry->unique_nbp_id = ++nbp_id_count; - - /* Got an nve entry on hand.... allocate a buffer, copy the entry - * on to it and stick it in the registry. - */ - if ((tag = gbuf_alloc(sizeof(nve_entry_t), PRI_HI)) == NULL){ - return(ENOBUFS); - } - gbuf_wset(tag, sizeof(nve_entry_t)); - new_entry = (nve_entry_t *)gbuf_rptr(tag); - bcopy(nve_entry, new_entry, sizeof(nve_entry_t)); - - if (DEFAULT_ZONE(&nve_entry->zone)) { - /* put actual zone name in entry instead of "*" */ - /* if single port mode and no zone name, then a router - is down, so use pram zone name hint from elap cfg */ - if (!MULTIPORT_MODE && ifID_home->ifZoneName.str[0] == '*') { - zone = &ifID_home->startup_zone; - } else { - zone = &ifID_home->ifZoneName; - } - new_entry->zone = *zone; - if ( new_entry->zone.len == 0 ) { - new_entry->zone.str[0] = '*'; - new_entry->zone.len = 1; - } - new_entry->zone_hash = nbp_strhash(&new_entry->zone); - } - new_entry->tag = tag; - new_entry->pid = proc_selfpid(); - - TAILQ_INSERT_TAIL(&name_registry, new_entry, nve_link); - at_state.flags |= AT_ST_NBP_CHANGED; - -#ifdef NBP_DEBUG - { - char zone[35],object[35],type[35]; - strlcpy(zone,nbp_req.nve.zone.str, sizeof(zone)); - strlcpy(object,nbp_req.nve.object.str, sizeof(object)); - strlcpy(type,nbp_req.nve.type.str, sizeof(type)); - dPrintf(D_M_NBP_LOW, D_L_USR4, - ("nbp_insert: adding %s:%s@%s addr:%d.%d ", - object, type, zone, - new_entry->address.net, new_entry->address.node)); - } -#endif /* NBP_DEBUG */ - - nbp_add_multicast(&new_entry->zone, ifID); - return (0); -} /* nbp_new_nve_entry */ - -void nbp_delete_entry (nve_entry) - nve_entry_t *nve_entry; -{ - TAILQ_REMOVE(&name_registry, nve_entry, nve_link); - gbuf_freem(nve_entry->tag); - at_state.flags |= AT_ST_NBP_CHANGED; -} - -/* Registration of an NBP entity in multihoming mode, from AIOCNBPREG - in at.c */ -int nbp_mh_reg(nbpP) - at_nbp_reg_t *nbpP; -{ - nve_entry_t nve; - at_ifaddr_t *ifID = 0; - int registered = 0; - int finished = FALSE; - - if (nbp_fillin_nve(&nbpP->name, &nve) != 0) { - /* bad tuple... */ - dPrintf(D_M_NBP_LOW, D_L_WARNING, - ("nbp_mh_reg: bad tuple\n")); - return(EINVAL); - } - nve.address = nbpP->addr; - nve.ddptype = nbpP->ddptype; - - if (DEFAULT_ZONE(&nbpP->name.zone)) { - /* multihoming mode with the default zone specified */ - - /* now find the matching interfaces */ - TAILQ_FOREACH(ifID, &at_ifQueueHd, aa_link) { - if (nbpP->addr.net || nbpP->addr.node) { - /* if address is specified */ - if ((nbpP->addr.net != ifID->ifThisNode.s_net || - nbpP->addr.node != ifID->ifThisNode.s_node)) - continue; - else - /* the address was specified, and - we found the matching interface */ - finished = TRUE; - } else { - /* address is not specified, so fill in - the address for the interface */ - nve.address.net = ifID->ifThisNode.s_net; - nve.address.node = ifID->ifThisNode.s_node; - } - nve.zone = ifID->ifZoneName; - nve.zone_hash = nbp_strhash(&nve.zone); - if (nbp_find_nve(&nve)) - continue; - if (nbp_new_nve_entry(&nve, ifID) == 0) - registered++; - } - if (registered && !nbpP->addr.net && !nbpP->addr.node) { - nbpP->addr.net = ifID_home->ifThisNode.s_net; - nbpP->addr.node = ifID_home->ifThisNode.s_node; - } - } else { - /* multihoming mode with a specific zone specified */ - /* see which segments (interfaces) are seeded for this zone */ - int zno; - at_ifnames_t ifs_in_zone; - if (!(zno = zt_find_zname(&nve.zone))) { - dPrintf(D_M_NBP_LOW, D_L_WARNING, - ("nbp_mh_reg: didn't find zone name\n")); - return(EINVAL); - } - getIfUsage(zno-1, &ifs_in_zone); - - /* now find the matching interfaces */ - TAILQ_FOREACH(ifID, &at_ifQueueHd, aa_link) { - if (!ifs_in_zone.at_if[ifID->ifPort]) - /* zone doesn't match */ - continue; - else - /* the zone matches, so unless the - address is specified and doesn't - match, we only need to do this once */ - finished = TRUE; - - if (nbpP->addr.net || nbpP->addr.node) { - /* address is specified */ - finished = FALSE; - if ((nbpP->addr.net != ifID->ifThisNode.s_net || - nbpP->addr.node != ifID->ifThisNode.s_node)) - continue; - else - /* the address was specified, and - we found the matching interface */ - finished = TRUE; - } else { - /* address is not specified, so fill in - the address for the interface */ - nve.address.net = ifID->ifThisNode.s_net; - nve.address.node = ifID->ifThisNode.s_node; - } - if (nbp_find_nve(&nve)) - continue; - if (nbp_new_nve_entry(&nve, ifID) == 0) - registered++; - if (registered && !nbpP->addr.net && !nbpP->addr.node) { - nbpP->addr.net = ifID->ifThisNode.s_net; - nbpP->addr.node = ifID->ifThisNode.s_node; - } - - } - } - nbpP->unique_nbp_id = (registered > 1)? 0: nve.unique_nbp_id; - - if (registered) - return(0); - else - return(EADDRNOTAVAIL); - -} /* nbp_mh_reg */ diff --git a/bsd/netat/ddp_proto.c b/bsd/netat/ddp_proto.c deleted file mode 100644 index b55d1968c..000000000 --- a/bsd/netat/ddp_proto.c +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1988, 1989, 1997, 1998 Apple Computer, Inc. - * - * Modified for MP, 1996 by Tuyen Nguyen - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - */ - -/* ddp_proto.c: 2.0, 1.23; 10/18/93; Apple Computer, Inc. */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include - -extern at_ifaddr_t *ifID_home; - -void ddp_putmsg(gref, mp) - gref_t *gref; - gbuf_t *mp; -{ - register ioc_t *iocbp; - register int error; - at_ddp_t *ddp; - - switch(gbuf_type(mp)) { - case MSG_DATA : - /* If this message is going out on a socket that's not bound, - * nail it. - */ - ddp = (at_ddp_t *)gbuf_rptr(mp); - if ((ddp->type == DDP_ATP) || (ddp->type == DDP_ADSP)) { - if ((gref == 0) || (gref->lport == 0)) { - int src_addr_included = - ((ddp->type==DDP_ATP) && ddp->src_node)? 1 : 0; - (void)ddp_output(&mp, ddp->src_socket, - src_addr_included); - return; - } - } - - if ((gref == 0) || (gref->lport == 0)) { - gbuf_freel(mp); - if (gref) - atalk_notify(gref, ENOTCONN); - return; - } - if ((error = ddp_output(&mp, gref->lport, 0)) != 0) { - if (gref) - atalk_notify(gref, error); - } - return; - - case MSG_IOCTL : - iocbp = (ioc_t *)gbuf_rptr(mp); - if (DDP_IOC_MYIOCTL(iocbp->ioc_cmd)) { - switch(iocbp->ioc_cmd) { - case DDP_IOC_GET_CFG : - /* Note that DDP_IOC_GET_CFG / AppleTalk ddp_config() - fills in the net and node of the ddp_addr_t param - with the net and node of the default interface, - not the net and node that has been bound, as - getsockname() and sockopt DDP_GETSOCKNAME do. - */ -#ifdef APPLETALK_DEBUG - kprintf("ddp_putmsg: DDP_IOC_GET_CFG\n"); -#endif - if (gbuf_cont(mp)) - gbuf_freem(gbuf_cont(mp)); - if ((gbuf_cont(mp) = - gbuf_alloc(sizeof(ddp_addr_t), - PRI_MED)) == NULL) { - ioc_ack(ENOBUFS, mp, gref); - break; - } - { - /* *** was ddp_get_cfg() *** */ - ddp_addr_t *cfgp = - (ddp_addr_t *)gbuf_rptr(gbuf_cont(mp)); - cfgp->inet.net = ifID_home->ifThisNode.s_net; - cfgp->inet.node = ifID_home->ifThisNode.s_node; -#ifdef NOT_YET - cfgp->inet.net = gref->laddr.s_net; - cfgp->inet.node = gref->laddr.s_node; -#endif - cfgp->inet.socket = gref->lport; - cfgp->ddptype = gref->ddptype; - } - gbuf_wset(gbuf_cont(mp), sizeof(ddp_addr_t)); - iocbp->ioc_count = sizeof(ddp_addr_t); - ioc_ack(0, mp, gref); - break; - default: - ioc_ack(EINVAL, mp, gref); - break; - } - } else { - /* Unknown ioctl */ - ioc_ack(EINVAL, mp, gref); - } - break; - default : -#ifdef APPLETALK_DEBUG - kprintf("unexpected message type in ddp_putmsg: %d/n", - gbuf_type(mp)); -#endif - gbuf_freem(mp); - break; - } - return; -} /* ddp_putmsg */ - -gbuf_t *ddp_compress_msg(mp) -register gbuf_t *mp; -{ - register gbuf_t *tmp; - - while (gbuf_len(mp) == 0) { - tmp = mp; - mp = gbuf_cont(mp); - gbuf_freeb(tmp); - - if (mp == NULL) - break; - } - return (mp); -} diff --git a/bsd/netat/ddp_r_rtmp.c b/bsd/netat/ddp_r_rtmp.c deleted file mode 100644 index bc21d5b40..000000000 --- a/bsd/netat/ddp_r_rtmp.c +++ /dev/null @@ -1,1659 +0,0 @@ -/* - * Copyright (c) 1994, 1996-2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/*-------------------------------------------------------------------------- - * Router RTMP protocol functions: - * - * This file contains Routing specifics to handle RTMP packets and - * the maintenance of the routing table through.... - * - * The entry point for the rtmp input in ddp is valid only when we're - * running in router mode. - * - * - * 0.01 03/22/94 Laurent Dumont Creation - * Modified for MP, 1996 by Tuyen Nguyen - * Added AURP support, April 8, 1996 by Tuyen Nguyen - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - * - *------------------------------------------------------------------------- - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -extern void (*ddp_AURPsendx)(void); -extern at_ifaddr_t *aurp_ifID; -extern at_ifaddr_t *ifID_table[]; -extern at_ifaddr_t *ifID_home; - - -int rtmp_router_start(at_kern_err_t *); -void rtmp_router_start_tmo(void *); - - - -static at_kern_err_t ke; - /* Used to record error discovered in rtmp_update() */ - -void rtmp_timeout(void *arg); -void rtmp_send_port(at_ifaddr_t *); -void rtmp_send_port_locked(void *); -void rtmp_dropper(void *); -static void rtmp_update(at_ifaddr_t *, at_rtmp *, short); -static void rtmp_request(at_ifaddr_t *, at_ddp_t *); -int elap_online3(at_ifaddr_t *); - -extern short ErrorRTMPoverflow, ErrorZIPoverflow; -extern lck_mtx_t * atalk_mutex; - -extern int pktsIn, pktsOut, pktsDropped, pktsHome; - - -/* - * rtmp_router_input: function called by DDP (in router mode) to handle - * all incoming RTMP packets. Listen to the RTMP socket - * for all the connected ports. - * Switch to the relevant rtmp functions. - */ - -void rtmp_router_input(mp, ifID) - register gbuf_t *mp; - register at_ifaddr_t *ifID; -{ - register at_ddp_t *ddp = (at_ddp_t *)gbuf_rptr(mp); - /* NOTE: there is an assumption here that the - * DATA follows the header. */ - - register at_net_al OurNet; - register at_node OurNode; - register at_net_al DstNet; - register at_node DstNode; - short tuples; - RT_entry *Entry; - - if (!ifID || (ifID->ifRoutingState < PORT_ACTIVATING)) { - gbuf_freem(mp); - return; - } - - - OurNet = ifID->ifThisNode.s_net; - OurNode = ifID->ifThisNode.s_node; - - - if (gbuf_type(mp) != MSG_DATA) { - - /* If this is a M_ERROR message, DDP is shutting down, - * nothing to do here...If it's something else, we don't - * understand what it is - */ - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_router_input: Not an M_DATA type\n")); - gbuf_freem(mp); - return; - } - - DstNet = NET_VALUE(ddp->dst_net); - DstNode = ddp->dst_node; - - /* check the kind of RTMP packet we received */ - - switch (ddp->type) { - - case DDP_RTMP: - - tuples = gbuf_len(mp) - DDP_X_HDR_SIZE - RTMP_IDLENGTH; - /* - * we need to make sure that the size of 'tuples' is - * not less than or equal to 0 due to a bad packet - */ - if (tuples <= 0) { - gbuf_freem(mp); - break; - } - - if (tuples % 3) {/* not a valid RTMP data packet */ - gbuf_freem(mp); - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_input: bad number of tuple in RTMP packet\n")); - return; - } - - tuples = tuples / 3; - - rtmp_update(ifID, (at_rtmp *)ddp->data, tuples); - gbuf_freem(mp); - - break; - - case DDP_RTMP_REQ: - - /* we should treat requests a bit differently. - * - if the request if not for the port, route it and also respond - * for this port if not locally connected. - * - if the request for this port, then just respond to it. - */ - - if (!ROUTING_MODE) { - gbuf_freem(mp); - return; - } - if (DstNode == 255) { - if (((DstNet >= CableStart) && (DstNet <= CableStop)) || - DstNet == 0) { - rtmp_request(ifID, ddp); - gbuf_freem(mp); - return; - } - else { - /* check if directly connected port */ - if ((Entry = rt_blookup(DstNet)) && - (Entry->NetDist == 0)) { - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_router_input: request for %d.%d, port %d\n", - DstNet, DstNode, Entry->NetPort)); - rtmp_request(ifID_table[Entry->NetPort], ddp); - gbuf_freem(mp); - return; - } - else { - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_router_input: RTMP packet received for %d.%d, also forward\n", - NET_VALUE(ddp->dst_net),ddp->dst_node)); - routing_needed(mp, ifID, TRUE); - return; - } - } - } - else { - - if ((DstNode == OurNode) && (DstNet == OurNet)) { - rtmp_request(ifID, ddp); - gbuf_freem(mp); - return; - } - else { - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_router_input: RTMP packet received for %d.%d, forward\n", - NET_VALUE(ddp->dst_net), ddp->dst_node)); - routing_needed(mp, ifID, TRUE); - } - } - - break; - - default: - - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_input: RTMP packet type=%d, route it\n", ddp->type)); - routing_needed(mp, ifID, TRUE); - break; - - } -} /* rtmp_router_input */ - -/* - * rtmp_update: - * - */ - -static void rtmp_update(ifID, rtmp, tuple_nb) - register at_ifaddr_t *ifID; - register at_rtmp *rtmp; - register short tuple_nb; -{ - register int PortFlags = ifID->ifFlags; - register at_rtmp_tuple *FirstTuple = (at_rtmp_tuple *)&rtmp->at_rtmp_id[1]; - register at_rtmp_tuple *SecondTuple = (at_rtmp_tuple *)&rtmp->at_rtmp_id[4]; - RT_entry NewRoute, *CurrentRoute; - register u_char SenderNodeID = rtmp->at_rtmp_id[0]; - char *TuplePtr; - short state; - - bzero(&NewRoute, sizeof(RT_entry)); - - /* Make sure this an AppleTalk node sending us the RTMP packet */ - - if (rtmp->at_rtmp_id_length != 8) { - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_update : RTMP ID not as expected Net=%d L=x%x\n", - NET_VALUE(rtmp->at_rtmp_this_net), rtmp->at_rtmp_id_length)); - return; - } - - /* - * If the port is activating, only take the Network range from the - * the RTMP packet received. - * Check if there is a conflict with our seed infos. - */ - - if (ifID->ifRoutingState == PORT_ACTIVATING) { - if (PortFlags & RTR_XNET_PORT) { - if ((PortFlags & RTR_SEED_PORT) && - ((CableStart != TUPLENET(FirstTuple)) || - (CableStop != TUPLENET(SecondTuple)))) { - ifID->ifRoutingState = PORT_ERR_SEED; - ke.error = KE_CONF_SEED_RNG; - ke.port1 = ifID->ifPort; - strlcpy(ke.name1, ifID->ifName, sizeof(ke.name1)); - ke.net = NET_VALUE(rtmp->at_rtmp_this_net); - ke.node = SenderNodeID; - ke.netr1b = TUPLENET(FirstTuple); - ke.netr1e = TUPLENET(SecondTuple); - ke.netr2b = CableStart; - ke.netr2e = CableStop; - RouterError(ifID->ifPort, ERTR_SEED_CONFLICT); - return; - } - CableStart = TUPLENET(FirstTuple); - CableStop = TUPLENET(SecondTuple); -/* - dPrintf(D_M_RTMP, D_L_INFO, - ("rtmp_update: Port #%d activating, set Cable %d-%d\n", - ifID->ifPort, CableStart, CableStop)); -*/ - } - else { /* non extended cable */ - if ((PortFlags & RTR_SEED_PORT) && - (ifID->ifThisCableEnd != NET_VALUE(rtmp->at_rtmp_this_net))) { - ke.error = KE_CONF_SEED1; - ke.port1 = ifID->ifPort; - strlcpy(ke.name1, ifID->ifName,sizeof(ke.name1)); - ke.net = NET_VALUE(rtmp->at_rtmp_this_net); - ke.node = SenderNodeID; - ke.netr1e = ifID->ifThisCableEnd; - ifID->ifRoutingState = PORT_ERR_SEED; - RouterError(ifID->ifPort, ERTR_SEED_CONFLICT); - return; - } - CableStop = NET_VALUE(rtmp->at_rtmp_this_net); - CableStart = 0; - dPrintf(D_M_RTMP, D_L_INFO, - ("rtmp_update: Port #%d NONX activating, set Cable %d-%d\n", - ifID->ifPort, CableStart, CableStop)); - } - } - - /* - * Perform a few sanity checks on the received RTMP data packet - */ - - if ((PortFlags & RTR_XNET_PORT) && (tuple_nb >= 2)) { - - /* The first tuple must be extended */ - - if (! TUPLERANGE(FirstTuple)) { - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_update: bad range value in 1st tuple =%d\n", - TUPLERANGE(FirstTuple))); - return; - } - - if (PortFlags & RTR_SEED_PORT) - if ((TUPLENET(FirstTuple) != CableStart) || - (TUPLENET(SecondTuple) != CableStop)) { - dPrintf(D_M_RTMP, D_L_WARNING, ("rtmp_update: conflict on Seed Port\n")); - ifID->ifRoutingState = PORT_ERR_CABLER; - ke.error = KE_CONF_SEED_NODE; - ke.port1 = ifID->ifPort; - strlcpy(ke.name1, ifID->ifName,sizeof(ke.name1)); - ke.net = NET_VALUE(rtmp->at_rtmp_this_net); - ke.node = SenderNodeID; - ke.netr1b = TUPLENET(FirstTuple); - ke.netr1e = TUPLENET(SecondTuple); - ke.netr2b = CableStart; - ke.netr2e = CableStop; - RouterError(ifID->ifPort, ERTR_CABLE_CONFLICT); - return; - } - - /* check that the tuple matches the range */ - - if ((TUPLENET(SecondTuple) < TUPLENET(FirstTuple)) || - (TUPLENET(FirstTuple) == 0) || - (TUPLENET(FirstTuple) >= DDP_STARTUP_LOW) || - (TUPLENET(SecondTuple) == 0) || - (TUPLENET(SecondTuple) >= DDP_STARTUP_LOW)) { - - /* - * IS THIS NON-FATAL????? - */ - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_update: STARTUP RANGE!!! 1st %d-%d\n", - TUPLENET(FirstTuple), TUPLENET(SecondTuple))); - ifID->ifRoutingState = PORT_ERR_STARTUP; - ke.error = KE_SEED_STARTUP; - ke.port1 = ifID->ifPort; - strlcpy(ke.name1, ifID->ifName,sizeof(ke.name1)); - ke.net = NET_VALUE(rtmp->at_rtmp_this_net); - ke.node = SenderNodeID; - RouterError(ifID->ifPort, ERTR_CABLE_STARTUP); - return; - } - - if (TUPLEDIST(FirstTuple) != 0) { - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_update: Invalid distance in 1st tuple\n")); - return; - } - - if (rtmp->at_rtmp_id[6] != RTMP_VERSION_NUMBER) { - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_update: Invalid RTMP version = x%x\n", - rtmp->at_rtmp_id[6])); - return; - } - - } - else { /* non extended interface or problem in tuple*/ - - if (PortFlags & RTR_XNET_PORT) { - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_update: invalid number of tuple for X-net\n")); - return; - } - - if (TUPLENET(FirstTuple) == 0) { /* non extended RTMP data */ - - if (rtmp->at_rtmp_id[3] > RTMP_VERSION_NUMBER) { - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_update: Invalid non extended RTMP version\n")); - return; - } - - } - else { - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_update: version 1.0 non Xtended net not supported\n")); - ifID->ifRoutingState = PORT_ERR_BADRTMP; - ke.error = KE_BAD_VER; - ke.rtmp_id = rtmp->at_rtmp_id[6]; - ke.net = NET_VALUE(rtmp->at_rtmp_this_net); - ke.node = SenderNodeID; - RouterError(ifID->ifPort, ERTR_RTMP_BAD_VERSION); - return; - } - } - - NewRoute.NextIRNet = NET_VALUE(rtmp->at_rtmp_this_net); - NewRoute.NextIRNode = SenderNodeID; - NewRoute.NetPort = ifID->ifPort; - - /* - * Process the case where a non-seed port needs to acquire the right - * information. - */ - - if (!(PortFlags & RTR_SEED_PORT) && (ifID->ifRoutingState == PORT_ACTIVATING)) { - dPrintf(D_M_RTMP_LOW, D_L_INFO, - ("rtmp_update: Port# %d, set non seed cable %d-%d\n", - ifID->ifPort, TUPLENET(FirstTuple), TUPLENET(SecondTuple))); - - if (PortFlags & RTR_XNET_PORT) { - NewRoute.NetStart = TUPLENET(FirstTuple); - NewRoute.NetStop = TUPLENET(SecondTuple); - ifID->ifThisCableStart = TUPLENET(FirstTuple); - ifID->ifThisCableEnd = TUPLENET(SecondTuple); - - } - else { - - NewRoute.NetStart = 0; - NewRoute.NetStop = NET_VALUE(rtmp->at_rtmp_this_net); - ifID->ifThisCableStart = NET_VALUE(rtmp->at_rtmp_this_net); - ifID->ifThisCableEnd = NET_VALUE(rtmp->at_rtmp_this_net); - } - /* - * Now, check if we already know this route, or we need to add it - * (or modify it in the table accordingly) - */ - - if ((CurrentRoute = rt_blookup(NewRoute.NetStop)) && - (CurrentRoute->NetStop == NewRoute.NetStop) && - (CurrentRoute->NetStart == NewRoute.NetStart)) { -/*LD 7/31/95 tempo########*/ - if (NewRoute.NetPort != CurrentRoute->NetPort) { - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_update: port# %d, not the port we waited for %d\n", - ifID->ifPort, CurrentRoute->NetPort)); - /* propose to age the entry we know... */ - - state = CurrentRoute->EntryState & 0x0F; - /* if entry has been updated recently, just clear the UPDATED - bit. if bit not set, then we can age the entry */ - if (state) { - if (CurrentRoute->EntryState & RTE_STATE_UPDATED) { - CurrentRoute->EntryState &= ~RTE_STATE_UPDATED; - } - else { - state = state >> 1 ; /* decrement state */ - } - } - CurrentRoute->EntryState = (CurrentRoute->EntryState & 0xF0) | state; - } - } - - else { /* add the new route */ - - dPrintf(D_M_RTMP, D_L_INFO, - ("rtmp_update: P# %d, 1st tuple route not known, add %d-%d\n", - ifID->ifPort, NewRoute.NetStart, NewRoute.NetStop)); - - NewRoute.EntryState = RTE_STATE_GOOD|RTE_STATE_UPDATED; - NewRoute.NetDist = 0; - - if (rt_insert(NewRoute.NetStop, NewRoute.NetStart, 0, - 0, NewRoute.NetDist, NewRoute.NetPort, - NewRoute.EntryState) == (RT_entry *)NULL) - - ErrorRTMPoverflow = 1; - } - - } - - if (ifID->ifRoutingState == PORT_ACTIVATING) { - dPrintf(D_M_RTMP, D_L_INFO, - ("rtmp_update: port activating, ignoring remaining tuples\n")); - return; - } - - /* - * Process all the tuples against our routing table - */ - - TuplePtr = (char *)FirstTuple; - - while (tuple_nb-- > 0) { - - if (TUPLEDIST(TuplePtr) == NOTIFY_N_DIST) { - dPrintf(D_M_RTMP, D_L_INFO, - ("rtmp_update: Port# %d, Tuple with Notify Neighbour\n", - ifID->ifPort)); - NewRoute.NetDist = NOTIFY_N_DIST; - NewRoute.EntryState = RTE_STATE_BAD; - } - else { - NewRoute.NetDist = TUPLEDIST(TuplePtr) + 1; - NewRoute.EntryState = RTE_STATE_GOOD; - NewRoute.EntryState = RTE_STATE_GOOD|RTE_STATE_UPDATED; - } - - - if (TUPLERANGE(TuplePtr)) { /* Extended Tuple */ - - - NewRoute.NetStart = TUPLENET(TuplePtr); - TuplePtr += 3; - NewRoute.NetStop = TUPLENET((TuplePtr)); - TuplePtr += 3; - tuple_nb--; - - if ((NewRoute.NetDist == 0) || - (NewRoute.NetStart == 0) || - (NewRoute.NetStop == 0) || - (NewRoute.NetStop < NewRoute.NetStart) || - (NewRoute.NetStart >= DDP_STARTUP_LOW) || - (NewRoute.NetStop >= DDP_STARTUP_LOW)) { - - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_update: P# %d, non valid xtuple received [%d-%d]\n", - ifID->ifPort, NewRoute.NetStart, NewRoute.NetStop)); - - continue; - } - - } - else { /* Non Extended Tuple */ - - NewRoute.NetStart = 0; - NewRoute.NetStop = TUPLENET(TuplePtr); - - TuplePtr += 3; - - if ((NewRoute.NetDist == 0) || - (NewRoute.NetStop == 0) || - (NewRoute.NetStop >= DDP_STARTUP_LOW)) { - - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_update: P# %d, non valid tuple received [%d]\n", - ifID->ifPort, NewRoute.NetStop)); - - continue; - } - } - - if ((CurrentRoute = rt_blookup(NewRoute.NetStop))) { - /* found something... */ - - if (NewRoute.NetDist < 16 || - NewRoute.NetDist == NOTIFY_N_DIST ) { - - /* - * Check if the definition of the route changed - */ - - if (NewRoute.NetStop != CurrentRoute->NetStop || - NewRoute.NetStart != CurrentRoute->NetStart) { - - if (NewRoute.NetStop == CurrentRoute->NetStop && - NewRoute.NetStop == CurrentRoute->NetStart && - NewRoute.NetStart == 0) - - NewRoute.NetStart = NewRoute.NetStop; - - else if (NewRoute.NetStop == CurrentRoute->NetStop && - NewRoute.NetStart == NewRoute.NetStop && - CurrentRoute->NetStart == 0) { - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_update: Range %d-%d has changed to %d-%d Dist=%d\n", - CurrentRoute->NetStart, CurrentRoute->NetStop, - NewRoute.NetStart, NewRoute.NetStop, NewRoute.NetDist)); - NewRoute.NetStart = 0; - } - - else { - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_update: Net Conflict Cur=%d, New=%d\n", - CurrentRoute->NetStop, NewRoute.NetStop)); - CurrentRoute->EntryState = - (CurrentRoute->EntryState & 0xF0) | RTE_STATE_BAD; - continue; - - } - } - - /* - * If we don't know the associated zones - */ - - if (!RT_ALL_ZONES_KNOWN(CurrentRoute)) { - - dPrintf(D_M_RTMP_LOW, D_L_INFO, - ("rtmp_update: Zone unknown for %d-%d state=0x%x\n", - CurrentRoute->NetStart, CurrentRoute->NetStop, - CurrentRoute->EntryState)); - - /* set the flag in the ifID structure telling - * that a scheduling of Zip Query is needed. - */ - - ifID->ifZipNeedQueries = 1; - continue; - } - - if (((CurrentRoute->EntryState & 0x0F) <= RTE_STATE_SUSPECT) && - NewRoute.NetDist != NOTIFY_N_DIST) { - - dPrintf(D_M_RTMP, D_L_INFO, - ("rtmp_update: update suspect entry %d-%d State=%d\n", - NewRoute.NetStart, NewRoute.NetStop, - (CurrentRoute->EntryState & 0x0F))); - - if (NewRoute.NetDist <= CurrentRoute->NetDist) { - CurrentRoute->NetDist = NewRoute.NetDist; - CurrentRoute->NetPort = NewRoute.NetPort; - CurrentRoute->NextIRNode = NewRoute.NextIRNode; - CurrentRoute->NextIRNet = NewRoute.NextIRNet; - CurrentRoute->EntryState = - (CurrentRoute->EntryState & 0xF0) | - (RTE_STATE_GOOD|RTE_STATE_UPDATED); - } - continue; - } - else { - - if (NewRoute.NetDist == NOTIFY_N_DIST) { - - CurrentRoute->EntryState = - (CurrentRoute->EntryState & 0xF0) | RTE_STATE_SUSPECT; - CurrentRoute->NetDist = NOTIFY_N_DIST; - continue; - } - } - - } - - - if ((NewRoute.NetDist <= CurrentRoute->NetDist) && (NewRoute.NetDist <16)) { - - /* Found a shorter or more recent Route, - * Replace with the New entryi - */ - - CurrentRoute->NetDist = NewRoute.NetDist; - CurrentRoute->NetPort = NewRoute.NetPort; - CurrentRoute->NextIRNode = NewRoute.NextIRNode; - CurrentRoute->NextIRNet = NewRoute.NextIRNet; - CurrentRoute->EntryState |= RTE_STATE_UPDATED; - - /* Can we consider now that the entry is updated? */ - dPrintf(D_M_RTMP_LOW, D_L_INFO, - ("rtmp_update: Shorter route found %d-%d, update\n", - NewRoute.NetStart, NewRoute.NetStop)); - -#ifdef AURP_SUPPORT - if (ddp_AURPsendx && (aurp_ifID->ifFlags & AT_IFF_AURP)) - ddp_AURPsendx(AURPCODE_RTUPDATE, - (void *)&NewRoute, AURPEV_NetDistChange); -#endif - } - } - else { /* no entry found */ - - if (NewRoute.NetDist < 16 && NewRoute.NetDist != NOTIFY_N_DIST && - NewRoute.NextIRNet >= ifID->ifThisCableStart && - NewRoute.NextIRNet <= ifID->ifThisCableEnd) { - - NewRoute.EntryState = (RTE_STATE_GOOD|RTE_STATE_UPDATED); - - dPrintf(D_M_RTMP_LOW, D_L_INFO, - ("rtmp_update: NewRoute %d-%d Tuple #%d\n", - NewRoute.NetStart, NewRoute.NetStop, tuple_nb)); - - ifID->ifZipNeedQueries = 1; - - if (rt_insert(NewRoute.NetStop, NewRoute.NetStart, NewRoute.NextIRNet, - NewRoute.NextIRNode, NewRoute.NetDist, NewRoute.NetPort, - NewRoute.EntryState) == (RT_entry *)NULL) - ErrorRTMPoverflow = 1; -#ifdef AURP_SUPPORT - else if (ddp_AURPsendx && (aurp_ifID->ifFlags & AT_IFF_AURP)) - ddp_AURPsendx(AURPCODE_RTUPDATE, - (void *)&NewRoute, AURPEV_NetAdded); -#endif - } - } - - } /* end of main while */ - ifID->ifRouterState = ROUTER_UPDATED; - if (ifID->ifZipNeedQueries) - zip_send_queries(ifID, 0, 0xFF); - -/* - timeout(rtmp_timeout, (caddr_t) ifID, 20*SYS_HZ); -*/ -} /* rtmp_update */ - -/* The RTMP validity timer expired, we need to update the - * state of each routing entry in the table - * because there is only one validity timer and it is always running, - * we can't just age all the entries automatically, as we might be - * aging entries that were just updated. So, when an entry is updated, - * the RTE_STATE_UPDATED bit is set and when the aging routine is called - * it just resets this bit if it is set, only if it is not set will the - * route actually be aged. - * Note there are 4 states for an entry, the state is decremented until - * it reaches the bad state. At this point, the entry is removed - * - * RTE_STATE_GOOD : The entry was valid (will be SUSPECT) - * RTE_STATE_SUSPECT: The entry was suspect (can still be used for routing) - * RTE_STATE_BAD : The entry was bad and is now deleted - * RTE_STATE_UNUSED : Unused or removed entry in the table - */ - -void rtmp_timeout(void *arg) -{ - at_ifaddr_t *ifID = (at_ifaddr_t *)arg; - register u_char state; - short i; - RT_entry *en = &RT_table[0]; - - atalk_lock(); - - if (ifID->ifRoutingState < PORT_ONLINE) { - atalk_unlock(); - return; - } - - /* for multihoming mode, we use ifRouterState to tell if there - is a router out there, so we know when to use cable multicast */ - if (ifID->ifRouterState > NO_ROUTER) - ifID->ifRouterState--; - - for (i = 0 ; i < RT_maxentry; i++,en++) { - - /* we want to age "learned" nets, not directly connected ones */ - state = en->EntryState & 0x0F; - - - if (state > RTE_STATE_UNUSED && - !(en->EntryState & RTE_STATE_PERMANENT) && en->NetStop && - en->NetDist && en->NetPort == ifID->ifPort) { - - /* if entry has been updated recently, just clear the UPDATED - bit. if bit not set, then we can age the entry */ - if (en->EntryState & RTE_STATE_UPDATED) { - en->EntryState &= ~RTE_STATE_UPDATED; - continue; - } - else - state = state >> 1 ; /* decrement state */ - - if (state == RTE_STATE_UNUSED) {/* was BAD, needs to delete */ - dPrintf(D_M_RTMP, D_L_INFO, - ("rtmp_timeout: Bad State for %d-%d (e#%d): remove\n", - en->NetStart, en->NetStop, i)); -#ifdef AURP_SUPPORT - if (ddp_AURPsendx && (aurp_ifID->ifFlags & AT_IFF_AURP)) - ddp_AURPsendx(AURPCODE_RTUPDATE, - (void *)en, AURPEV_NetDeleted); -#endif - - /* then clear the bit in the table concerning this entry. - If the zone Count reaches zero, remove the entry */ - - zt_remove_zones(en->ZoneBitMap); - - RT_DELETE(en->NetStop, en->NetStart); - } - else { - en->EntryState = (en->EntryState & 0xF0) | state; - dPrintf(D_M_RTMP, D_L_INFO, ("Change State for %d-%d to %d (e#%d)\n", - en->NetStart, en->NetStop, state, i)); - } - } - } - timeout(rtmp_timeout, (caddr_t) ifID, 20*SYS_HZ); - - atalk_unlock(); -} - -/* - * rtmp_prep_new_packet: allocate a ddp packet for RTMP use (reply to a RTMP request or - * Route Data Request, or generation of RTMP data packets. - * The ddp header is filled with relevant information, as well as - * the beginning of the rtmp packet with the following info: - * Router's net number (2bytes) - * ID Length = 8 (1byte) - * Router's node ID (1byte) - * Extended Range Start (2bytes) - * Range + dist (0x80) (1byte) - * Extended Range End (2bytes) - * Rtmp version (0x82) (1byte) - * - */ - -gbuf_t *rtmp_prep_new_packet (at_ifaddr_t *, at_net, u_char, char); - -gbuf_t *rtmp_prep_new_packet (ifID, DstNet, DstNode, socket) -register at_ifaddr_t *ifID; -register at_net DstNet; -register u_char DstNode; -register char socket; - -{ - gbuf_t *m; - register at_ddp_t *ddp; - register char * rtmp_data; - - if ((m = gbuf_alloc(AT_WR_OFFSET+1024, PRI_HI)) == NULL) { - dPrintf(D_M_RTMP, D_L_WARNING, ("rtmp_new_packet: Can't allocate mblock\n")); - return ((gbuf_t *)NULL); - } - - gbuf_rinc(m,AT_WR_OFFSET); - gbuf_wset(m,DDP_X_HDR_SIZE + 10); - ddp = (at_ddp_t *)(gbuf_rptr(m)); - - /* - * Prepare the DDP header of the new packet - */ - - - ddp->unused = ddp->hopcount = 0; - - UAS_ASSIGN(ddp->checksum, 0); - - NET_NET(ddp->dst_net, DstNet); - ddp->dst_node = DstNode; - ddp->dst_socket = socket; - - NET_ASSIGN(ddp->src_net, ifID->ifThisNode.s_net); - ddp->src_node = ifID->ifThisNode.s_node; - ddp->src_socket = RTMP_SOCKET; - ddp->type = DDP_RTMP; - - /* - * Prepare the RTMP header (Router Net, ID, Node and Net Tuple - * (this works only if we are on an extended net) - */ - - rtmp_data = ddp->data; - - *rtmp_data++ = (ifID->ifThisNode.s_net & 0xff00) >> 8; - *rtmp_data++ = ifID->ifThisNode.s_net & 0x00ff ; - *rtmp_data++ = 8; - *rtmp_data++ = (u_char)ifID->ifThisNode.s_node; - *rtmp_data++ = (CableStart & 0xff00) >> 8; - *rtmp_data++ = CableStart & 0x00ff ; - *rtmp_data++ = 0x80; /* first tuple, so distance is always zero */ - *rtmp_data++ = (CableStop & 0xff00) >> 8; - *rtmp_data++ = CableStop & 0x00ff ; - *rtmp_data++ = RTMP_VERSION_NUMBER; - - return (m); - - -} -int rtmp_r_find_bridge(at_ifaddr_t *, at_ddp_t *); - -int rtmp_r_find_bridge(ifID, orig_ddp) -register at_ifaddr_t *ifID; -register at_ddp_t *orig_ddp; - -{ - gbuf_t *m; - register int size, status; - register at_ddp_t *ddp; - register char * rtmp_data; - RT_entry *Entry; - - - /* find the bridge for the querried net */ - - Entry = rt_blookup(NET_VALUE(orig_ddp->dst_net)); - - if (Entry == NULL) { - dPrintf(D_M_RTMP, D_L_WARNING, ("rtmp_r_find_bridge: no info for net %d\n", - NET_VALUE(orig_ddp->dst_net))); - return (1); - } - - - size = DDP_X_HDR_SIZE + 10 ; - if ((m = gbuf_alloc(AT_WR_OFFSET+size, PRI_HI)) == NULL) { - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_r_find_bridge: Can't allocate mblock\n")); - return (ENOBUFS); - } - - gbuf_rinc(m,AT_WR_OFFSET); - gbuf_wset(m,size); - ddp = (at_ddp_t *)(gbuf_rptr(m)); - - /* - * Prepare the DDP header of the new packet - */ - - ddp->unused = ddp->hopcount = 0; - - DDPLEN_ASSIGN(ddp, size); - UAS_ASSIGN(ddp->checksum, 0); - - NET_NET(ddp->dst_net, orig_ddp->src_net); - ddp->dst_node = orig_ddp->src_node; - ddp->dst_socket = orig_ddp->src_socket; - - NET_ASSIGN(ddp->src_net, Entry->NextIRNet); - ddp->src_node = Entry->NextIRNode; - ddp->src_socket = RTMP_SOCKET; - ddp->type = DDP_RTMP; - - /* - * Prepare the RTMP header (Router Net, ID, Node and Net Tuple - * (this works only if we are on an extended net) - */ - - rtmp_data = ddp->data; - - *rtmp_data++ = (Entry->NextIRNet & 0xff00) >> 8; - *rtmp_data++ = Entry->NextIRNet & 0x00ff ; - *rtmp_data++ = 8; - *rtmp_data++ = (u_char)Entry->NextIRNode; - *rtmp_data++ = (Entry->NetStart & 0xff00) >> 8; - *rtmp_data++ = Entry->NetStart & 0x00ff ; - *rtmp_data++ = 0x80; /* first tuple, so distance is always zero */ - *rtmp_data++ = (Entry->NetStop & 0xff00) >> 8; - *rtmp_data++ = Entry->NetStop & 0x00ff ; - *rtmp_data++ = RTMP_VERSION_NUMBER; - - - dPrintf(D_M_RTMP, D_L_INFO, ("rtmp_r_find_bridge: for net %d send back router %d.%d\n", - NET_VALUE(orig_ddp->dst_net), Entry->NextIRNet, Entry->NextIRNode)); - if ((status = ddp_router_output(m, ifID, AT_ADDR, NET_VALUE(orig_ddp->src_net), - orig_ddp->src_node, 0))){ - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_r_find_bridge: ddp_router_output failed status=%d\n", status)); - return (status); - } - return (0); -} - -/* - * rtmp_send_table: - * Send the routing table entries in RTMP data packets. - * Use split horizon if specified. The Data packets are sent - * as full DDP packets, if the last packet is full an empty - * packet is sent to tell the recipients that this is the end of - * the table... - * - */ -static int rtmp_send_table(at_ifaddr_t *, at_net, u_char, short, char, short); - -static int rtmp_send_table(ifID, DestNet, DestNode, split_hz, socket, - n_neighbors) - register at_ifaddr_t *ifID; /* interface/port params */ - register at_net DestNet; /* net where to send the table */ - register u_char DestNode; /* node where to send to table */ - short split_hz; /* use split horizon */ - char socket; /* the destination socket to send to */ - short n_neighbors; /* used to send packets telling we are going down */ -{ - - RT_entry *Entry; - char *Buff_ptr; - u_char NewDist; - gbuf_t *m; - short size,status ; - register at_ddp_t *ddp; - register short EntNb = 0, sent_tuple = 0; - - if (ifID->ifRoutingState < PORT_ONLINE) { - dPrintf(D_M_RTMP, D_L_INFO, - ("rtmp_send_table: port %d activating, we don't send anything!\n", - ifID->ifPort)); - return (0); - } - - /* prerare tuples and packets for DDP*/ - /* if split horizon, do not send tuples we can reach on the port we - * want to send too - */ - - Entry = &RT_table[0]; - size = 0; - if (!(m = rtmp_prep_new_packet(ifID, DestNet, DestNode, socket))) { - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_send_table: rtmp_prep_new_packet failed\n")); - return(ENOBUFS); - } - - ddp = (at_ddp_t *)(gbuf_rptr(m)); - Buff_ptr = (char *)((char *)ddp + DDP_X_HDR_SIZE + 10); - - while (EntNb < RT_maxentry) { - - if (Entry->NetStop && ((Entry->EntryState & 0x0F) >= RTE_STATE_SUSPECT)) { - if (!(split_hz && ifID->ifPort == Entry->NetPort)) { - sent_tuple++; - - if (((Entry->EntryState & 0x0F) < RTE_STATE_SUSPECT) || n_neighbors) - NewDist = NOTIFY_N_DIST; - else - NewDist = Entry->NetDist & 0x1F; - - if (Entry->NetStart) { /* Extended */ - *Buff_ptr++ = (Entry->NetStart & 0xFF00) >> 8; - *Buff_ptr++ = (Entry->NetStart & 0x00FF); - *Buff_ptr++ = 0x80 | NewDist; - *Buff_ptr++ = (Entry->NetStop & 0xFF00) >> 8; - *Buff_ptr++ = (Entry->NetStop & 0x00FF); - *Buff_ptr++ = RTMP_VERSION_NUMBER; - size += 6; - } - else { /* non extended tuple */ - *Buff_ptr++ = (Entry->NetStop & 0xFF00) >> 8; - *Buff_ptr++ = (Entry->NetStop & 0x00FF); - *Buff_ptr++ = NewDist; - size += 3; - } - } - } - - if (size > (DDP_DATA_SIZE-20)) { - DDPLEN_ASSIGN(ddp, (size + DDP_X_HDR_SIZE + 10)); - gbuf_winc(m,size); - if ((status = ddp_router_output(m, ifID, AT_ADDR, - NET_VALUE(DestNet),DestNode, 0))){ - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_send_table: ddp_router_output failed status=%d\n", - status)); - return (status); - } - if ((m = rtmp_prep_new_packet (ifID, DestNet, DestNode, socket)) == NULL){ - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_send_table: rtmp_prep_new_poacket failed status=%d\n", - status)); - return (ENOBUFS); - } - ddp = (at_ddp_t *)(gbuf_rptr(m)); - Buff_ptr = (char *)((char *)ddp + DDP_X_HDR_SIZE + 10); - - dPrintf(D_M_RTMP_LOW, D_L_OUTPUT, - ("rtmp_s_tble: Send %d tuples on port %d\n", - sent_tuple, ifID->ifPort)); - sent_tuple = 0; - size = 0; - } - - Entry++; - EntNb++; - } - - /* - * If we have some remaining entries to send, send them now. - * otherwise, the last packet we sent was full, we need to send an empty one - */ - - DDPLEN_ASSIGN(ddp, (size + DDP_X_HDR_SIZE + 10)); - gbuf_winc(m,size); - if ((status = - ddp_router_output(m, ifID, AT_ADDR, NET_VALUE(DestNet),DestNode, 0))){ - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_send_table: ddp_router_output failed status=%d\n", status)); - return (status); - } - dPrintf(D_M_RTMP_LOW, D_L_OUTPUT, - ("rtmp_s_tble: LAST Packet split=%d with %d tuples sent on port %d\n", - split_hz, sent_tuple, ifID->ifPort)); - - return (0); -} - -/* - * rtmp_request: respond to the 3 types of RTMP requests RTMP may receive - * RTMP func =1 : respond with an RTMP Reponse Packet - * RTMP func =2 : respond with the routing table RTMP packet with split horizon - * RTMP func =3 : respond with the routing table RTMP packet no split horizon - * - * see Inside AppleTalk around page 5-18 for "details" - */ - -static void rtmp_request(ifID, ddp) - register at_ifaddr_t *ifID; - register at_ddp_t *ddp; -{ - - short split_horizon = FALSE; - short code; - short error; - - /* We ignore the request if we're activating on that port */ - - if (ifID->ifRoutingState < PORT_ONLINE) - return; - - /* check RTMP function code */ - - code = ddp->data[0]; - - switch (code) { - - case RTMP_REQ_FUNC1: /* RTMP Find Bridge */ - - /* RTMP Request Packet: we send a response with the next IRrange */ - dPrintf(D_M_RTMP, D_L_INPUT, - ( "rtmp_request: find bridge for net %d port %d node %d.%d\n", - NET_VALUE(ddp->dst_net), ifID->ifPort, - NET_VALUE(ddp->src_net), ddp->src_node)); - - if ((error = rtmp_r_find_bridge (ifID, ddp))) { - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_request: Code 1 ddp_r_output failed error=%d\n", - error)); - return; - } - - break; - - case RTMP_REQ_FUNC2: - - split_horizon = TRUE; - - case RTMP_REQ_FUNC3: - - /* RTMP Route Request Packet */ - - dPrintf(D_M_RTMP, D_L_INPUT, - ("rtmp_request: received code=%d from %d.%d for %d.%d\n", - code, NET_VALUE(ddp->src_net), ddp->src_node, - NET_VALUE(ddp->dst_net), ddp->dst_node)); - - rtmp_send_table(ifID, ddp->src_net, ddp->src_node, - split_horizon, ddp->src_socket, 0); - - break; - - default: - - /* unknown type of request */ - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_request : invalid type of request =%d\n", - code)); - break; - } - -} - -/* locked version of rtmp_send_port */ -void rtmp_send_port_locked(void *arg) -{ - at_ifaddr_t *ifID = (at_ifaddr_t *)arg; - atalk_lock(); - rtmp_send_port(ifID); - atalk_unlock(); -} - - -/* - * rtmp_send_all_ports : send the routing table on all connected ports - * check for the port status and if ok, send the - * rtmp tuples to the broadcast address for the port - * usually called on timeout every 10 seconds. - */ - -void rtmp_send_port(ifID) - register at_ifaddr_t *ifID; -{ - at_net DestNet; - - NET_ASSIGN(DestNet, 0); - - if (ifID && ifID->ifRoutingState == PORT_ONLINE) { - dPrintf(D_M_RTMP_LOW, D_L_OUTPUT, - ("rtmp_send_port: do stuff for port=%d\n", - ifID->ifPort)); - if (ifID->ifZipNeedQueries) - zip_send_queries(ifID, 0, 0xFF); - if (!ROUTING_MODE) { - return; - } - rtmp_send_table(ifID, DestNet, 0xFF, 1, RTMP_SOCKET, 0); - } - -#if DEBUG - if (ifID == ifID_home) - dPrintf(D_M_RTMP_LOW, D_L_VERBOSE, - ("I:%5d O:%5d H:%5d dropped:%d\n", - pktsIn, pktsOut, pktsHome, pktsDropped)); - - dPrintf(D_M_RTMP_LOW, D_L_TRACE, - ("rtmp_send_port: func=0x%x, ifID=0x%x\n", - (u_int) rtmp_send_port, (u_int) ifID)); -#endif - - timeout (rtmp_send_port_locked, (caddr_t)ifID, 10 * SYS_HZ); - -} - -/* rtmp_dropper: check the number of packet received every x secondes. - * the actual packet dropping is done in ddp_input - */ - -void rtmp_dropper(__unused void *arg) -{ - - atalk_lock(); - - pktsIn = pktsOut = pktsHome = pktsDropped = 0; - timeout(rtmp_dropper, NULL, 2*SYS_HZ); - - atalk_unlock(); -} - -/* - * rtmp_router_start: perform the sanity checks before declaring the router up - * and running. This function looks for discrepency between the net infos - * for the different ports and seed problems. - * If everything is fine, the state of each port is brought to PORT_ONLINE.\ - * ### LD 01/09/95 Changed to correct Zone problem on non seed ports. - */ - -int rtmp_router_start(at_kern_err_t *keP) -{ - int err = 0; - register at_ifaddr_t *ifID, *ifID2; - register short Index, router_starting_timer = 0; - register RT_entry *Entry; - register at_net_al netStart, netStop; - struct timespec ts; - - - /* clear the static structure used to record routing errors */ - bzero(&ke, sizeof(ke)); - - TAILQ_FOREACH(ifID, &at_ifQueueHd, aa_link) { - - /* if non seed, need to acquire the right node address */ - - if ((ifID->ifFlags & RTR_SEED_PORT) == 0) { - if ((ifID->ifThisCableStart == 0 && ifID->ifThisCableEnd == 0) || - (ifID->ifThisCableStart >= DDP_STARTUP_LOW && - ifID->ifThisCableEnd <= DDP_STARTUP_HIGH)) { - - if (ifID->ifThisCableEnd == 0) { - keP->error = KE_NO_SEED; - keP->port1 = ifID->ifPort; - strlcpy(keP->name1, ifID->ifName,sizeof(keP->name1)); - } - else { - keP->error = KE_INVAL_RANGE; - keP->port1 = ifID->ifPort; - strlcpy(keP->name1, ifID->ifName,sizeof(keP->name1)); - keP->netr1b = ifID->ifThisCableStart; - keP->netr1e = ifID->ifThisCableEnd; - } - ifID->ifRoutingState = PORT_ERR_STARTUP; - RouterError(ifID->ifPort, ERTR_CABLE_STARTUP); - - goto error; - } - - /* we are non seed, so try to acquire the zones for that guy */ - ifID->ifZipNeedQueries = 1; - - dPrintf(D_M_RTMP, D_L_STARTUP, - ("rtmp_router_start: call elap_online for Non Seed port #%d cable =%d-%d\n", - ifID->ifPort, CableStart, CableStop)); - if ((err = elap_online3(ifID))) - goto error; - } - } - - /* Check if we have a problem with the routing table size */ - - if (ErrorRTMPoverflow) { - keP->error = KE_RTMP_OVERFLOW; - goto error; - } - - - /* Now, check that we don't have a conflict in between our interfaces */ - TAILQ_FOREACH(ifID, &at_ifQueueHd, aa_link) { - - /* check if the RoutingState != PORT_ONERROR */ - if (ifID->ifRoutingState < PORT_ACTIVATING) { - goto error; - } - - if ((ifID->ifThisCableStart == 0 && ifID->ifThisCableEnd == 0) || - (ifID->ifThisCableStart >= DDP_STARTUP_LOW && - ifID->ifThisCableEnd <= DDP_STARTUP_HIGH)) { - - if (ifID->ifThisCableEnd == 0) { - keP->error = KE_NO_SEED; - keP->port1 = ifID->ifPort; - strlcpy(keP->name1, ifID->ifName,sizeof(keP->name1)); - } - else { - keP->error = KE_INVAL_RANGE; - keP->port1 = ifID->ifPort; - strlcpy(keP->name1, ifID->ifName,sizeof(keP->name1)); - keP->netr1b = ifID->ifThisCableStart; - keP->netr1e = ifID->ifThisCableEnd; - } - - ifID->ifRoutingState = PORT_ERR_STARTUP; - RouterError(ifID->ifPort, ERTR_CABLE_STARTUP); - - goto error; - } - - /* check the interface address against all other ifs */ - - netStart = ifID->ifThisCableStart; - netStop = ifID->ifThisCableEnd; - - for (ifID2 = TAILQ_NEXT(ifID, aa_link); ifID2; - ifID2 = TAILQ_NEXT(ifID2, aa_link)) { - - if (((netStart >= ifID2->ifThisCableStart) && - (netStart <= ifID2->ifThisCableEnd)) || - ((netStop >= ifID2->ifThisCableStart) && - (netStop <= ifID2->ifThisCableEnd)) || - ((ifID2->ifThisCableStart >= netStart) && - (ifID2->ifThisCableStart <= netStop)) || - ((ifID2->ifThisCableEnd >= netStart) && - (ifID2->ifThisCableEnd <= netStop)) ) { - - keP->error = KE_CONF_RANGE; - keP->port1 = ifID->ifPort; - strlcpy(keP->name1, ifID->ifName,sizeof(keP->name1)); - keP->port2 = ifID2->ifPort; - strlcpy(keP->name2, ifID2->ifName,sizeof(keP->name2)); - keP->netr1b = ifID->ifThisCableStart; - keP->netr1e = ifID->ifThisCableEnd; - ifID->ifRoutingState = PORT_ERR_CABLER; - RouterError(ifID->ifPort, ERTR_CABLE_CONFLICT); - goto error; - } - - } - - /* ### LD 01/04/94: We need to fill in the next IR info in the routing table */ - Entry = rt_blookup(ifID->ifThisCableEnd); - - if (Entry == NULL) { - dPrintf(D_M_RTMP, D_L_ERROR, - ("rtmp_router_start: we don't know our cable range port=%d\n", - ifID->ifPort)); - - goto error; - } - - /* - * Note: At this point, non seed ports may not be aware of their Default zone - */ - - if (!(ifID->ifFlags & RTR_SEED_PORT)) { - ifID->ifDefZone = 0; - Entry->EntryState |= (RTE_STATE_GOOD|RTE_STATE_UPDATED); - } - - ifID->ifRoutingState = PORT_ONLINE; - ifID->ifState = LAP_ONLINE; - - /* set the right net and node for each port */ - Entry->NextIRNet = ifID->ifThisNode.s_net; - Entry->NextIRNode= ifID->ifThisNode.s_node; - - dPrintf(D_M_RTMP, D_L_STARTUP, - ("rtmp_router_start: bring port=%d [%d.%d]... on line\n", - ifID->ifPort, ifID->ifThisNode.s_net, - ifID->ifThisNode.s_node)); - - } - - /* - * Everything is fine, we can begin to babble on the net... - */ - - TAILQ_FOREACH(ifID, &at_ifQueueHd, aa_link) { - if (ifID->ifRoutingState == PORT_ONLINE) { - rtmp_send_port(ifID); - timeout(rtmp_timeout, (caddr_t)ifID, (50+ifID->ifPort) * SYS_HZ); - if (ifID->ifRoutingState < PORT_ACTIVATING) { - goto error; - } - } - } - - /* Check if we have a problem with the routing or zip table size */ - - if (ErrorRTMPoverflow) { - keP->error = KE_RTMP_OVERFLOW; - goto error; - } - if (ErrorZIPoverflow) { - keP->error = KE_ZIP_OVERFLOW; - goto error; - } - - /* sleep for 11 seconds */ - ts.tv_sec = 11; - ts.tv_nsec = 0; - if ((err = - /* *** eventually this will be the ifID for the interface - being brought up in router mode *** */ - /* *** router sends rtmp packets every 10 seconds *** */ - msleep(&ifID_home->startup_inprogress, atalk_mutex, - PSOCK | PCATCH, "router_start1", &ts)) - != EWOULDBLOCK) { - goto error; - } - - /* Is the stack still up ? */ - if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { - err = ECONNABORTED; - goto error; - } - -startZoneInfo: - err = 0; - TAILQ_FOREACH(ifID, &at_ifQueueHd, aa_link) { - - if (ifID->ifRoutingState < PORT_ACTIVATING) { - goto error; - } - - if ((ifID->ifZipNeedQueries) - && (ifID->ifFlags & RTR_SEED_PORT) == 0) { - dPrintf(D_M_RTMP, D_L_STARTUP, - ("rtmp_router_start: send Zip Queries for Port %d\n", - ifID->ifPort)); - zip_send_queries(ifID, 0, 0xFF); - - if (router_starting_timer >= 10) { - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmp_router_start: no received response to ZipNeedQueries\n")); - keP->error = KE_NO_ZONES_FOUND; - keP->port1 = ifID->ifPort; - strlcpy(keP->name1, ifID->ifName,sizeof(keP->name1)); - keP->netr1b = ifID->ifThisCableStart; - keP->netr1e = ifID->ifThisCableEnd; - ifID->ifRoutingState = PORT_ERR_CABLER; - RouterError(ifID->ifPort, ERTR_CABLE_CONFLICT); - goto error; - } - - dPrintf(D_M_RTMP, D_L_STARTUP, - ("rtmp_router_start: waiting for zone info to complete\n")); - /* sleep for 10 seconds */ - ts.tv_sec = 10; - ts.tv_nsec = 0; - if ((err = - /* *** eventually this will be the ifID for the - interface being brought up in router mode *** */ - msleep(&ifID_home->startup_inprogress, atalk_mutex, - PSOCK | PCATCH, "router_start2", &ts)) - != EWOULDBLOCK) { - goto error; - } - - /* Is the stack still up ? */ - if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { - err = ECONNABORTED; - goto error; - } - - err = 0; - router_starting_timer++; - goto startZoneInfo; - } - - } - - /* At This Point, check if we know the default zones for non seed port */ - - TAILQ_FOREACH(ifID, &at_ifQueueHd, aa_link) { - - if (ifID->ifRoutingState < PORT_ACTIVATING) - goto error; - - if (!(ifID->ifFlags & RTR_SEED_PORT)) { - Entry = rt_blookup(ifID->ifThisCableEnd); - - if (Entry == NULL) { - dPrintf(D_M_RTMP, D_L_ERROR, - ("rtmp_router_start: (2)we don't know our cable range port=%d\n", - ifID->ifPort)); - goto error; - } - - dPrintf(D_M_RTMP, D_L_STARTUP, - ("rtmp_router_start: if %s set to permanent\n", - ifID->ifName)); - Entry->NetDist = 0; /* added 4-29-96 jjs, prevent direct - nets from showing non-zero - distance */ - /* upgrade the non seed ports. */ - Entry->EntryState |= RTE_STATE_PERMANENT; - - Index = zt_ent_zindex(Entry->ZoneBitMap); - if (Index <= 0) { - dPrintf(D_M_RTMP, D_L_ERROR, - ("rtmp_router_start: still don't know default zone for port %d\n", - ifID->ifPort)); - } else { - ifID->ifDefZone = Index; - if ((ifID == ifID_home) || MULTIHOME_MODE) { - ifID->ifZoneName = ZT_table[Index-1].Zone; - (void)regDefaultZone(ifID); - - /* Send zone change event */ - atalk_post_msg(ifID->aa_ifp, KEV_ATALK_ZONEUPDATED, 0, &(ifID->ifZoneName)); - } - } - } - } - - /* Check if we have a problem with the routing or zip table size */ - - if (ErrorRTMPoverflow) { - keP->error = KE_RTMP_OVERFLOW; - goto error; - } - if (ErrorZIPoverflow) { - keP->error = KE_ZIP_OVERFLOW; - goto error; - } - - /* - * Handle the Home Port specifics - */ - - /* set the router address as being us no matter what*/ - ifID_home->ifARouter = ifID_home->ifThisNode; - ifID_home->ifRouterState = ROUTER_UPDATED; - - /* prepare the packet dropper timer */ - timeout (rtmp_dropper, NULL, 1*SYS_HZ); - - return(0); - -error: - dPrintf(D_M_RTMP,D_L_ERROR, - ("rtmp_router_start: error type=%d occurred on port %d\n", - ifID->ifRoutingState, ifID->ifPort)); - - /* if there's no keP->error, copy the local ke structure, - since the error occurred asyncronously */ - if ((!keP->error) && ke.error) - bcopy(&ke, keP, sizeof(ke)); - rtmp_shutdown(); - - /* to return the error in keP, the ioctl has to return 0 */ - - return((keP->error)? 0: err); -} /* rtmp_router_start */ - -void rtmp_router_start_tmo(void *arg) -{ - (void)rtmp_router_start_tmo((at_kern_err_t*)arg); -} - -void rtmp_shutdown(void) -{ - register at_ifaddr_t *ifID; - at_net DestNet; - - NET_ASSIGN(DestNet, 0); - - dPrintf(D_M_RTMP, D_L_SHUTDN, - ("rtmp_shutdown:stop sending to all ports\n")); - - untimeout(rtmp_dropper, (void *)0); - untimeout(rtmp_router_start_tmo, (void *)1); /* added for 2225395 */ - untimeout(rtmp_router_start_tmo, (void *)3); /* added for 2225395 */ - - TAILQ_FOREACH(ifID, &at_ifQueueHd, aa_link) { - if (ifID->ifRoutingState > PORT_OFFLINE ) { - if (ifID->ifRoutingState == PORT_ONLINE) { - untimeout(rtmp_send_port_locked, (caddr_t)ifID); - untimeout(rtmp_timeout, (caddr_t) ifID); - } - /* - * it's better to notify the neighbour routers that we are going down - */ - if (ROUTING_MODE) - rtmp_send_table(ifID, DestNet, 0xFF, TRUE, - RTMP_SOCKET, TRUE); - - ifID->ifRoutingState = PORT_OFFLINE; - - dPrintf(D_M_RTMP, D_L_SHUTDN, - ("rtmp_shutdown: routing on port=%d... off line\nStats:\n", - ifID->ifPort)); - dPrintf(D_M_RTMP, D_L_SHUTDN, - ("fwdBytes : %ld\nfwdPackets : %ld\ndroppedBytes : %ld\ndroppedPkts : %ld\n", - ifID->ifStatistics.fwdBytes, ifID->ifStatistics.fwdPkts, - ifID->ifStatistics.droppedBytes, ifID->ifStatistics.droppedPkts)); - - } - } - -} - -/* - * Remove all entries associated with the specified port. - */ -void rtmp_purge(ifID) - at_ifaddr_t *ifID; -{ - u_char state; - int i; - RT_entry *en = &RT_table[0]; - - for (i=0; i < RT_maxentry; i++) { - state = en->EntryState & 0x0F; - if ((state > RTE_STATE_UNUSED) && (state != RTE_STATE_PERMANENT) - && en->NetStop && en->NetDist && (en->NetPort == ifID->ifPort)) { - zt_remove_zones(en->ZoneBitMap); - RT_DELETE(en->NetStop, en->NetStart); - } - en++; - } -} diff --git a/bsd/netat/ddp_r_zip.c b/bsd/netat/ddp_r_zip.c deleted file mode 100644 index 3c377d737..000000000 --- a/bsd/netat/ddp_r_zip.c +++ /dev/null @@ -1,2003 +0,0 @@ -/* - * Copyright (c) 1988-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * 0.01 05/12/94 Laurent Dumont Creation - * - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - */ -/* - * - * Router ZIP protocol functions: - * - * This file contains Routing specifics to handle ZIP requests and responses - * sent and received by a router node. - * - * The entry point for the zip input in ddp is valid only when we're - * running in router mode. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -/* globals */ -extern at_ifaddr_t *ifID_table[], *ifID_home; -extern short ErrorZIPoverflow; - -/********************************************************************** - * Remarks : - * ZIP is implemented as a "peer" of DDP, so the packets coming in - * to ZIP have the same headers as those coming in to DDP {ddp...}. - * Same applies to outgoing packets. Also, unlike DDP, ZIP assumes - * that an incoming packet is in a contiguous gbuf_t. - * - **********************************************************************/ - -static int netinfo_reply_pending; -static void zip_netinfo_reply(at_x_zip_t *, at_ifaddr_t *); -static void zip_getnetinfo(at_ifaddr_t *); -static void zip_getnetinfo_locked(void *); -static void send_phony_reply(void *); - -int zip_reply_received(gbuf_t *, at_ifaddr_t *, int); -int zip_reply_to_getlocalzones(at_ifaddr_t *, gbuf_t *); -int zip_reply_to_getzonelist(at_ifaddr_t *, gbuf_t *); -static void zip_reply_to_getmyzone(at_ifaddr_t *, gbuf_t *); -gbuf_t *zip_prep_query_packet(at_ifaddr_t *, at_net_al, at_node); - -static void zip_send_reply_to_query(gbuf_t *, at_ifaddr_t *); -static void zip_send_ext_reply_to_query(gbuf_t *, at_ifaddr_t *, RT_entry *, u_short); -static gbuf_t *prep_ZIP_reply_packet(gbuf_t *, at_ifaddr_t *); -static void zip_send_getnetinfo_reply(gbuf_t *, at_ifaddr_t *); - -/* - * zip_send_getnetinfo_reply: we received a GetNetInfo packet, we need to reply - * with the right information for the port. - */ -static void zip_send_getnetinfo_reply(m, ifID) - register gbuf_t *m; - register at_ifaddr_t *ifID; -{ - at_nvestr_t *zname; - gbuf_t *m_sent; - at_ddp_t *ddp, *ddp_sent; - short ZoneNameProvided = FALSE; - short RequestIsBroadcasted = FALSE; - u_short znumber, len, packet_length = 0, size, status; - RT_entry *Entry; - char GNIReply[128]; - - ddp = (at_ddp_t *)gbuf_rptr(m); - - /* access the Zone Name info part of the GetNetInfo Request */ - - zname = (at_nvestr_t *)(gbuf_rptr(m) + DDP_X_HDR_SIZE + 6); - - if (zname->len > ZIP_MAX_ZONE_LENGTH) { - dPrintf(D_M_ZIP, D_L_WARNING, - ("zip_s_gni_r: zone len too long l=%d ddplen=%d\n", - zname->len, DDPLEN_VALUE(ddp))); - return; - } - - - if (zname->len) - ZoneNameProvided = TRUE; - - GNIReply[0] = ZIP_NETINFO_REPLY; - GNIReply[1] = ZIP_ZONENAME_INVALID; - - /* check if we are the originator is in the cable range for this interface */ - - if ((NET_VALUE(ddp->src_net) < CableStart || NET_VALUE(ddp->src_net) > CableStop) && - (NET_VALUE(ddp->dst_net) == 0 && ddp->dst_node == 0xff)) { - RequestIsBroadcasted = TRUE; - } - Entry = rt_blookup(CableStop); - - if (Entry != NULL && RT_ALL_ZONES_KNOWN(Entry)) { /* this net is well known... */ - - GNIReply[2] = (Entry->NetStart & 0xFF00) >> 8; - GNIReply[3] = (Entry->NetStart & 0x00FF); - GNIReply[4] = (Entry->NetStop & 0xFF00) >> 8; - GNIReply[5] = (Entry->NetStop & 0x00FF); - - /* copy the zone name found in the request */ - - GNIReply[6] = zname->len; - bcopy(&zname->str, &GNIReply[7], zname->len); - - - if ((znumber = zt_find_zname(zname))) { - - if (ZT_ISIN_ZMAP((znumber), Entry->ZoneBitMap)) { - - GNIReply[1] = 0; /* Zone Valid */ - - if ((len = zt_get_zmcast(ifID, zname, &GNIReply[8+zname->len]))) - GNIReply[7+zname->len] = len; - else { - GNIReply[1] |= ZIP_USE_BROADCAST; - GNIReply[7+zname->len] = 0; /* multicast address length */ - } - packet_length = 8 + zname->len + len; - } - } - - } - - else { /* should not happen, we are supposed to know our net */ - dPrintf(D_M_ZIP, D_L_WARNING, ("zip_s_gni_r: Don't know about our zone infos!!!\n")); - return; - } - - if (zt_ent_zcount(Entry) == 1) - GNIReply[1] |= ZIP_ONE_ZONE; - - if (GNIReply[1] & ZIP_ZONENAME_INVALID) { - - short Index = ifID->ifDefZone; - - if (Index <= 0 || Index >= ZT_MAXEDOUT) { - dPrintf(D_M_ZIP, D_L_WARNING, - ("zip_s_gni_r: Invalid starting index =%d port%d\n", - Index, ifID->ifPort)); - return; - } - - - Index--; - - if ((len = zt_get_zmcast(ifID, &ZT_table[Index].Zone, &GNIReply[8+zname->len]))) - GNIReply[7+zname->len] = len; - else { - GNIReply[1] |= ZIP_USE_BROADCAST; - GNIReply[7+zname->len] = 0; /* multicast address length */ - } - - packet_length = 7 + zname->len + len; - - /* in the case the zone name asked for in the request was invalid, we need - * to copy the good default zone for this net - */ - - GNIReply[packet_length + 1] = ZT_table[Index].Zone.len; - bcopy(&ZT_table[Index].Zone.str, &GNIReply[packet_length + 2], - ZT_table[Index].Zone.len); - packet_length = packet_length +2 + ZT_table[Index].Zone.len; - } - - - /* - * we're finally ready to send out the GetNetInfo Reply - * - */ - - - size = DDP_X_HDR_SIZE + packet_length; - if ((m_sent = gbuf_alloc(AT_WR_OFFSET+size, PRI_HI)) == NULL) { - return; /* was return(ENOBUFS); */ - } - - gbuf_rinc(m_sent,AT_WR_OFFSET); - gbuf_wset(m_sent,size); - ddp_sent = (at_ddp_t *)(gbuf_rptr(m_sent)); - - /* Prepare the DDP header */ - - ddp_sent->unused = ddp_sent->hopcount = 0; - UAS_ASSIGN(ddp->checksum, 0); - DDPLEN_ASSIGN(ddp_sent, size); - NET_ASSIGN(ddp_sent->src_net, ifID->ifThisNode.s_net); - ddp_sent->src_node = ifID->ifThisNode.s_node; - ddp_sent->src_socket = ZIP_SOCKET; - ddp_sent->dst_socket = ddp->src_socket; - - if (RequestIsBroadcasted) { /* if this was a broadcast, must respond from that */ - - NET_ASSIGN(ddp_sent->dst_net, 0); - ddp_sent->dst_node = 0xFF; - } - else { - - NET_NET(ddp_sent->dst_net, ddp->src_net); - ddp_sent->dst_node = ddp->src_node; - } - ddp_sent->type = DDP_ZIP; - - bcopy(&GNIReply, &ddp_sent->data, packet_length); - - dPrintf(D_M_ZIP_LOW, D_L_ROUTING, - ("zip_s_gni_r: send to %d:%d port#%d pack_len=%d\n", - NET_VALUE(ddp_sent->dst_net), ddp_sent->dst_node, - ifID->ifPort, packet_length)); - if ((status = - ddp_router_output(m_sent, ifID, AT_ADDR, - NET_VALUE(ddp_sent->dst_net), ddp_sent->dst_node, 0))) { - dPrintf(D_M_ZIP, D_L_ERROR, - ("zip_s_gni_r: ddp_router_output returns =%d\n", status)); - return; /* was return(status); */ - } -} /* zip_send_getnetinfo_reply */ - - -/* - * build_ZIP_reply_packet: is used to create and send a DDP packet and use the - * provided buffer as a ZIP reply. This is used by zip_send_ext_reply_to_query - * and zip_send_reply_to_query for sending their replies to ZIP queries. - */ -gbuf_t *prep_ZIP_reply_packet(m, ifID) - register gbuf_t *m; /* this is the original zip query */ - register at_ifaddr_t *ifID; -{ - register gbuf_t *m_sent; - register at_ddp_t *ddp, *src_ddp; - - /* access the source Net and Node informations */ - - src_ddp = (at_ddp_t *)gbuf_rptr(m); - - if ((m_sent = gbuf_alloc (AT_WR_OFFSET+1024, PRI_HI)) == NULL) { - return((gbuf_t *)NULL); - } - gbuf_rinc(m_sent,AT_WR_OFFSET); - gbuf_wset(m_sent,DDP_X_HDR_SIZE); - ddp = (at_ddp_t *)(gbuf_rptr(m_sent)); - - /* Prepare the DDP header */ - - ddp->unused = ddp->hopcount = 0; - UAS_ASSIGN(ddp->checksum, 0); - - NET_ASSIGN(ddp->src_net, ifID->ifThisNode.s_net); - ddp->src_node = ifID->ifThisNode.s_node; - ddp->src_socket = ZIP_SOCKET; - - ddp->dst_socket = src_ddp->src_socket; - NET_NET(ddp->dst_net, src_ddp->src_net); - ddp->dst_node = src_ddp->src_node; - - ddp->type = DDP_ZIP; - - return(m_sent); -} -/* - * zip_send_ext_reply_to_query: this function deals with ZIP Queries for extended nets. - * When we recognize an extended net (that might have several zone name associated with - * it), we send A SEPARATE ZIP reply for that network. This is called from the - * regular zip_send_reply_to_query, that just deals with non-ext nets. - */ - -static void zip_send_ext_reply_to_query(mreceived, ifID, Entry, NetAsked) - register gbuf_t *mreceived; - register at_ifaddr_t *ifID; - RT_entry *Entry; /* info about the network we're looking for */ - u_short NetAsked; -{ - register gbuf_t *m; - register at_ddp_t *ddp; - short i, j, reply_length, Index, zone_count, status; - u_char *zmap; - char *ReplyBuff, *ZonesInPacket; - - zone_count = zt_ent_zcount(Entry); - zmap = Entry->ZoneBitMap; - i = ZT_BYTES -1; - - -newPacket: - - if (!(m = prep_ZIP_reply_packet (mreceived, ifID))) { - return; /* was return(ENOBUFS); */ - } - - ddp = (at_ddp_t *)(gbuf_rptr(m)); - ReplyBuff = (char *)(ddp->data); - - - *ReplyBuff++ = 8; /* ZIP function = 8 [extended reply] */ - - ZonesInPacket= ReplyBuff; - *ZonesInPacket= 0; - ReplyBuff ++; - reply_length = 2; /* 1st byte is ZIP reply code, 2nd is network count */ - j= 0; - - /* For all zones, we check if they belong to the map for that Network */ - - for (; i >= 0; i--) { - - /* find the zones defined in this entry bitmap */ - - if (zmap[i]) { - for (; j < 8 ; j++) - if (zmap[i] << j & 0x80) { /* bingo */ - - Index = i*8 + j; /* zone index in zone table */ - - if (reply_length + 3 + ZT_table[Index].Zone.len > DDP_DATA_SIZE) { - - /* we need to send the packet before, this won't fit... */ - - zone_count -= *ZonesInPacket; - - DDPLEN_ASSIGN(ddp, (reply_length + DDP_X_HDR_SIZE)); - gbuf_winc(m,reply_length); - if ((status = - ddp_router_output(m, ifID, AT_ADDR, - NET_VALUE(ddp->dst_net), ddp->dst_node, 0))) { - dPrintf(D_M_ZIP, D_L_ERROR, - ("zip_s_ext_repl: ddp_router_output returns =%d\n", - status)); - return; /* was return (status); */ - } - - goto newPacket; - - } - /* this should fit in this packet, build the NetNumber, ZoneLen, - * ZoneName triple - */ - - if (ZT_table[Index].Zone.len) { - *ZonesInPacket += 1; /* bump NetCount field */ - *ReplyBuff++ = (NetAsked & 0xFF00) >> 8; - *ReplyBuff++ = (NetAsked & 0x00FF) ; - *ReplyBuff++ = ZT_table[Index].Zone.len; - - bcopy(&ZT_table[Index].Zone.str, ReplyBuff, - ZT_table[Index].Zone.len); - - ReplyBuff += ZT_table[Index].Zone.len; - reply_length += ZT_table[Index].Zone.len +3; - } - - } - } - j= 0; /* reset the bit count */ - } - - /* if we have some zone info in a half-empty packet, send it now. - * Remember, for extended nets we send *at least* one Reply - */ - - if (zone_count) { - DDPLEN_ASSIGN(ddp, (reply_length + DDP_X_HDR_SIZE)); - gbuf_winc(m,reply_length); - if ((status = - ddp_router_output(m, ifID, AT_ADDR, - NET_VALUE(ddp->dst_net), ddp->dst_node, 0))) { - dPrintf(D_M_ZIP, D_L_ERROR, - ("zip_s_ext_reply: ddp_router_output returns =%d\n", status)); - return; /* was return (status); */ - } - } - else /* free the buffer not used */ - - gbuf_freem(m); -} /* zip_send_ext_reply_to_query */ - -/* - * zip_send_reply_to_query: we received a ZIPQuery packet, we need to reply - * with the right information for the nets requested (if we have - * the right information. - */ -static void zip_send_reply_to_query(mreceived, ifID) - register gbuf_t *mreceived; - register at_ifaddr_t *ifID; -{ - register gbuf_t *m; - register at_ddp_t *ddp = NULL, *ddp_received; - RT_entry *Entry; - short i, reply_length, Index, status; - u_char network_count; - u_short *NetAsked; - char *ReplyBuff, *ZonesInPacket; - - ddp_received = (at_ddp_t *)gbuf_rptr(mreceived); - - /* access the number of nets requested in the Query */ - network_count = *((char *)(ddp_received->data) + 1); - NetAsked = (u_short *)(ddp_received->data + 2); - - /* check the validity of the Query packet */ - - if (DDPLEN_VALUE(ddp_received) != - (2 + network_count * 2 + DDP_X_HDR_SIZE)) { - - dPrintf(D_M_ZIP, D_L_WARNING, - ("zip_s_reply_to_q: bad length netcount=%d len=%d\n", - network_count, DDPLEN_VALUE(ddp))); - return; /* was return(1); */ - } - - /* walk the Query Network list */ - /* we want to build a response with the network number followed by the zone name - * length and the zone name. If there is more than one zone per network asked, - * we repeat the network number and stick the zone length and zone name. - * We need to be carefull with the max DDP size for data. If we see that a new - * NetNum, ZoneLen, ZoneName sequence won't fit, we send the previous packet and - * begin to build a new one. - */ - -newPacket: - - if (!(m = prep_ZIP_reply_packet (mreceived, ifID))) { - return; /* was return(ENOBUFS); */ - } - - ddp = (at_ddp_t *)(gbuf_rptr(m)); - ReplyBuff = (char *)(ddp->data); - - *ReplyBuff++ = 2; /* ZIP function = 2 [Non extended reply] */ - ZonesInPacket = ReplyBuff; - *ZonesInPacket = 0; - ReplyBuff++; - reply_length = 2; /* 1st byte is ZIP reply code, 2nd is network count */ - - for (i = 0 ; i < network_count ; i ++, NetAsked++) { - Entry = rt_blookup(ntohs(*NetAsked)); - - if (Entry != NULL && ((Entry->EntryState & 0x0F) >= RTE_STATE_SUSPECT) && - RT_ALL_ZONES_KNOWN(Entry)) { /* this net is well known... */ - - if (Entry->NetStart == 0) { /* asking for a NON EXTENDED network */ - - if ( (Index = zt_ent_zindex(Entry->ZoneBitMap)) == 0) - continue; - - Index--; - - if (reply_length + 3 + ZT_table[Index].Zone.len > DDP_DATA_SIZE) { - - /* we need to send the packet before, this won't fit... */ - - DDPLEN_ASSIGN(ddp, (reply_length + DDP_X_HDR_SIZE)); - gbuf_winc(m,reply_length); - - if ((status = - ddp_router_output(m, ifID, AT_ADDR, - NET_VALUE(ddp->dst_net), - ddp->dst_node, 0))) { - dPrintf(D_M_ZIP, D_L_ERROR, - ("zip_s_reply: ddp_router_output returns =%d\n", - status)); - return; /* was return (status); */ - } - - /* this is not nice, I know, but we reenter the loop with - * a packet is sent with the next network field in the Query - */ - - network_count -= i; - goto newPacket; - - } - - /* this should fit in this packet, build the NetNumber, ZoneLen, - * ZoneName triple - */ - - if (ZT_table[Index].Zone.len) { - ZonesInPacket += 1; /* bump NetCount field */ - *ReplyBuff++ = (*NetAsked & 0xFF00) >> 8; - *ReplyBuff++ = (*NetAsked & 0x00FF) ; - *ReplyBuff++ = ZT_table[Index].Zone.len; - bcopy(&ZT_table[Index].Zone.str, ReplyBuff, - ZT_table[Index].Zone.len); - - ReplyBuff += ZT_table[Index].Zone.len; - - reply_length += ZT_table[Index].Zone.len + 3; - - - } - - - } - else { /* extended network, check for multiple zone name attached - * and build a separate packet for each extended network requested - */ - - zip_send_ext_reply_to_query(mreceived, ifID, Entry, ntohs(*NetAsked)); - - } - } - } - - /* If we have a non extended packet (code 2) with some stuff in it, - * we need to send it now - */ - - if ( reply_length > 2) { - DDPLEN_ASSIGN(ddp, (reply_length + DDP_X_HDR_SIZE)); - gbuf_winc(m,reply_length); - if ((status = - ddp_router_output(m, ifID, AT_ADDR, - NET_VALUE(ddp->dst_net), - ddp->dst_node, 0))) { - dPrintf(D_M_ZIP, D_L_ERROR, - ("zip_send_reply: ddp_router_output returns =%d\n", status)); - return; /* was return (status); */ - } - } - else /* free the buffer not used */ - gbuf_freem(m); -} /* zip_send_reply_to_query */ - -/*********************************************************************** - * zip_router_input() - * - **********************************************************************/ - -void zip_router_input (m, ifID) - register gbuf_t *m; - register at_ifaddr_t *ifID; -{ - register at_ddp_t *ddp; - register at_atp_t *atp; - register at_zip_t *zip; - u_char user_bytes[4]; - register u_short user_byte; - - /* variables for ZipNotify processing */ - register char old_zone_len; - register char new_zone_len; - register char *old_zone; - char *new_zone; - - if (gbuf_type(m) != MSG_DATA) { - /* If this is a M_ERROR message, DDP is shutting down, - * nothing to do here...If it's something else, we don't - * understand what it is - */ - dPrintf(D_M_ZIP, D_L_WARNING, ("zip_router_input: not an M_DATA message\n")); - gbuf_freem(m); - return; - } - - if (!ifID) { - dPrintf(D_M_ZIP, D_L_WARNING, ("zip_router_input: BAD ifID\n")); - gbuf_freem(m); - return; - } - - /* - * The ZIP listener receives two types of requests: - * - * ATP requests: GetZoneList, GetLocalZone, or GetMyZone - * ZIP requests: Netinfo, Query, Reply, takedown, bringup - */ - - ddp = (at_ddp_t *)gbuf_rptr(m); - - if (ddp->type == DDP_ZIP) { - zip = (at_zip_t *)(gbuf_rptr(m) + DDP_X_HDR_SIZE); - dPrintf(D_M_ZIP_LOW, D_L_INPUT, - ("zip_input: received a ZIP_DDP command=%d\n", - zip->command)); - switch (zip->command) { - case ZIP_QUERY : /* we received a Zip Query request */ - dPrintf(D_M_ZIP, D_L_INPUT, - ("zip_input: Received a Zip Query in from %d.%d\n", - NET_VALUE(ddp->src_net), ddp->src_node)); - - if (!RT_LOOKUP_OKAY(ifID, ddp)) { - dPrintf(D_M_ZIP, D_L_INPUT, - ("zip_input:: refused ZIP_QUERY from %d:%d\n", - NET_VALUE(ddp->src_net), ddp->src_node)); - } - else - zip_send_reply_to_query(m, ifID); - gbuf_freem(m); - break; - - case ZIP_REPLY : /* we received a Zip Query Reply packet */ - case ZIP_EXTENDED_REPLY: - if (ifID->ifRoutingState == PORT_OFFLINE) { - dPrintf(D_M_ZIP, D_L_INPUT, - ("zip_input: Received a Zip Reply in user mode\n")); - } - else - zip_reply_received(m, ifID, zip->command); - gbuf_freem(m); - break; - - case ZIP_TAKEDOWN : - /* we received a Zip Takedown packet */ - dPrintf(D_M_ZIP, D_L_WARNING, ("zip_input: Received a Zip takedown!!!\n")); - gbuf_freem(m); - break; - - case ZIP_BRINGUP : - /* we received a Zip BringUp packet */ - dPrintf(D_M_ZIP, D_L_WARNING, ("zip_input: Received a Zip BringUp!!!\n")); - gbuf_freem(m); - break; - - case ZIP_GETNETINFO: /* we received a GetNetInfo request */ - dPrintf(D_M_ZIP, D_L_INPUT, - ("zip_input: Received a GetNetInfo Req in from %d.%d\n", - NET_VALUE(ddp->src_net), ddp->src_node)); - if (RT_LOOKUP_OKAY(ifID, ddp)) { - dPrintf(D_M_ZIP, D_L_OUTPUT, - ("zip_input: we, as node %d:%d send GNI reply to %d:%d\n", - ifID->ifThisNode.s_net, ifID->ifThisNode.s_node, - NET_VALUE(ddp->src_net), ddp->src_node)); - zip_send_getnetinfo_reply(m, ifID); - } - gbuf_freem(m); - break; - - - case ZIP_NETINFO_REPLY : - - /* If we are not waiting for a GetNetInfo reply - * to arrive, this must be a broadcast - * message for someone else on the zone, so - * no need to even look at it! - */ - if (!ROUTING_MODE && - ((NET_VALUE(ddp->src_net) != ifID->ifThisNode.s_net) || - (ddp->src_node != ifID->ifThisNode.s_node)) && netinfo_reply_pending) - { - dPrintf(D_M_ZIP, D_L_INPUT, - ("zip_input: Received a GetNetInfo Reply from %d.%d\n", - NET_VALUE(ddp->src_net), ddp->src_node)); - trackrouter(ifID, NET_VALUE(ddp->src_net), ddp->src_node); - zip_netinfo_reply((at_x_zip_t *)zip, ifID); - } - - gbuf_freem(m); - break; - - case ZIP_NOTIFY : - /* processing of ZipNotify message : first, change - * our zone name, then if NIS is open, let NBP demon - process know of this change...(just forward the - * Notify packet - */ - /* First, check if this is really a packet for us */ - old_zone = &zip->data[4]; - if (!zonename_equal(&ifID->ifZoneName, - (at_nvestr_t *)old_zone)) { - /* the old zone name in the packet is not the - * same as ours, so this packet couldn't be - * for us. - */ - gbuf_freem(m); - break; - - } - old_zone_len = *old_zone; - new_zone_len = zip->data[4 + old_zone_len + 1]; - new_zone = old_zone + old_zone_len; - - /* Reset the zone multicast address */ - (void)at_unreg_mcast(ifID, (caddr_t)&ifID->ZoneMcastAddr); - bzero((caddr_t)&ifID->ZoneMcastAddr, ETHERNET_ADDR_LEN); - - /* change the zone name - copy both the length and the string */ - bcopy((caddr_t)new_zone, (caddr_t)&ifID->ifZoneName, - new_zone_len+1); - - /* Send network zone change event and new zone for this interface. */ - atalk_post_msg(ifID->aa_ifp, KEV_ATALK_ZONEUPDATED, 0, &(ifID->ifZoneName)); - atalk_post_msg(ifID->aa_ifp, KEV_ATALK_ZONELISTCHANGED, 0, 0); - - /* add the new zone to the list of local zones */ - if (!MULTIPORT_MODE && !DEFAULT_ZONE(&ifID->ifZoneName)) - (void)setLocalZones(&ifID->ifZoneName, - (ifID->ifZoneName.len+1)); - - /* Before trying to request our new multicast address, - * wait a while... someone might have alredy requested - * it, so we may see some broadcast messages flying - * by... Set up the structures so that it appears that - * we have already requested the NetInfo. - */ - ifID->ifNumRetries = ZIP_NETINFO_RETRIES; - netinfo_reply_pending = 1; - ifID->ifGNIScheduled = 1; - timeout(zip_sched_getnetinfo, (caddr_t) ifID, - 2*ZIP_TIMER_INT); - - gbuf_freem(m); - break; - default : - routing_needed(m, ifID, TRUE); - break; - } - } - else if (ddp->type == DDP_ATP && - RT_LOOKUP_OKAY(ifID, ddp)) { - if (gbuf_len(m) > DDP_X_HDR_SIZE) - atp = (at_atp_t *)(gbuf_rptr(m)+DDP_X_HDR_SIZE); - else - atp = (at_atp_t *)(gbuf_rptr(gbuf_cont(m))); - - /* Get the user bytes in network order */ - - *((u_long*)user_bytes) = UAL_VALUE(atp->user_bytes); - user_byte = user_bytes[0]; /* Get the zeroth byte */ - - dPrintf(D_M_ZIP, D_L_INPUT, - ("zip_input: received a ZIP_ATP command=%d\n", user_byte)); - - switch (user_byte) { - case ZIP_GETMYZONE: - zip_reply_to_getmyzone(ifID, m); - gbuf_freem(m); - break; - - case ZIP_GETZONELIST: - zip_reply_to_getzonelist(ifID, m); - gbuf_freem(m); - break; - - case ZIP_GETLOCALZONES: - zip_reply_to_getlocalzones(ifID, m); - gbuf_freem(m); - break; - - default: - dPrintf(D_M_ZIP, D_L_WARNING, - ("zip_input: received unknown ZIP_ATP command=%d\n", user_byte)); - routing_needed(m, ifID, TRUE); - break; - } - } else { - gbuf_freem(m); - } - return; -} /* zip_router_input */ - -/*********************************************************************** - * zonename_equal() - * - * Remarks : - * - **********************************************************************/ -int zonename_equal (zone1, zone2) - register at_nvestr_t *zone1, *zone2; -{ - register char c1, c2; - register int i; - - if (zone1->len != zone2->len) - return(0); - - for (i=0; i< (int) zone1->len; i++) { - c1 = zone1->str[i]; - c2 = zone2->str[i]; - if (c1 >= 'a' && c1 <= 'z') - c1 += 'A' - 'a'; - if (c2 >= 'a' && c2 <= 'z') - c2 += 'A' - 'a'; - if (c1 & 0x80) - c1 = upshift8(c1); - if (c2 & 0x80) - c2 = upshift8(c2); - if (c1 != c2) - return(0); - } - return(1); -} - - -char upshift8 (ch) - register char ch; -{ - register int i; - - static unsigned char lower_case[] = - {0x8a, 0x8c, 0x8d, 0x8e, 0x96, 0x9a, 0x9f, 0xbe, - 0xbf, 0xcf, 0x9b, 0x8b, 0x88, 0}; - static unsigned char upper_case[] = - {0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0xae, - 0xaf, 0xce, 0xcd, 0xcc, 0xcb, 0}; - - for (i=0; lower_case[i]; i++) - if (ch == lower_case[i]) - return (upper_case[i]); - - return(ch); -} - - -/*********************************************************************** - * zip_netinfo_reply () - * - * Remarks : - * - **********************************************************************/ -static void zip_netinfo_reply (netinfo, ifID) - register at_x_zip_t *netinfo; - register at_ifaddr_t *ifID; -{ - u_char mcast_len; - register at_net_al this_net; - char *default_zone; - register u_char zone_name_len; - - /* There may be multiple zones on the cable.... we need to - * worry about whether or not this packet is addressed - * to us. - */ - /* *** Do we really need to check this? *** */ - if (!zonename_equal((at_nvestr_t *)netinfo->data, &ifID->ifZoneName)) { - dPrintf(D_M_ZIP, D_L_INFO, ("zip_netinfo_reply, !zonename_equal!!!")); - return; - } - - ifID->ifThisCableStart = NET_VALUE(netinfo->cable_range_start); - ifID->ifThisCableEnd = NET_VALUE(netinfo->cable_range_end); - dPrintf(D_M_ZIP, D_L_OUTPUT, ("Zip_netinfo_reply: Set cable to %d-%d\n", - ifID->ifThisCableStart, ifID->ifThisCableEnd)); - - /* The packet is in response to our request */ - ifID->ifGNIScheduled = 0; - untimeout (zip_sched_getnetinfo, (caddr_t) ifID); - netinfo_reply_pending = 0; - zone_name_len = netinfo->data[0]; - mcast_len = netinfo->data[zone_name_len + 1]; - - if (netinfo->flags & ZIP_ZONENAME_INVALID) { - /* copy out the default zone name from packet */ - default_zone = (char *)&netinfo->data[zone_name_len+1+mcast_len+1]; - bcopy((caddr_t)default_zone, (caddr_t)&ifID->ifZoneName, - *default_zone + 1); - } - - /* add the new zone to the list of local zones */ - if (!MULTIPORT_MODE && !DEFAULT_ZONE(&ifID->ifZoneName)) - (void)setLocalZones(&ifID->ifZoneName, (ifID->ifZoneName.len+1)); - - /* get the multicast address out of the GetNetInfo reply, if there is one */ - if (!(netinfo->flags & ZIP_USE_BROADCAST)) { - /* If ZIP_USE_BROADCAST is set, we will use the cable - broadcast address as the multicast address, however - the cable multicast address has already been registered. - */ - /* This packet contains a multicast address, so - * send to elap to register it. - */ - if (FDDI_OR_TOKENRING(ifID->aa_ifp->if_type)) - ddp_bit_reverse(&netinfo->data[zone_name_len + 2]); - - bcopy((caddr_t)&netinfo->data[zone_name_len + 2], - (caddr_t)&ifID->ZoneMcastAddr, ETHERNET_ADDR_LEN); - (void)at_reg_mcast(ifID, (caddr_t)&ifID->ZoneMcastAddr); - } - - this_net = ifID->ifThisNode.s_net; - if ((this_net >= ifID->ifThisCableStart) && - (this_net <= ifID->ifThisCableEnd)) { - /* ThisNet is in the range of valid network numbers - * for the cable. Do nothing. - */ - } else { - /* ThisNet is not in the range of valid network - * numbers for the cable. This may be either because - * the chosen number was from start-up range, or - * because the user has a misconception of where the - * machine is!! Since ThisCableRange is set up, next - * time aarp is invoked, it would select address in - * the right range. - */ - - /* to reset initial_net and initial_node to zero, so - * that aarp is forced to choose new values - */ - ifID->initial_addr.s_net = 0; - ifID->initial_addr.s_node = 0; - - /* Wake up elap_online sleeping on this interface. */ - ZIPwakeup(ifID, ZIP_RE_AARP); - return; - } - - if (!ifID->startup_inprogress) { - /* Send event with zone info. This covers case where we get zone info - after startup. During startup this event is sent from ZIPwakeup. */ - atalk_post_msg(ifID->aa_ifp, KEV_ATALK_ZONEUPDATED, 0, &(ifID->ifZoneName)); - } - - ZIPwakeup(ifID, 0); /* no error */ - return; -} /* zip_netinfo_reply */ - - -/********************************************************************** - * zip_control() - * - **********************************************************************/ -int zip_control (ifID, control) - register at_ifaddr_t *ifID; - int control; -{ - dPrintf(D_M_ZIP, D_L_INFO, ("zip_control called port=%d control=%d\n", - ifID->ifPort, control)); - switch (control) { - case ZIP_ONLINE : - case ZIP_LATE_ROUTER : - if (!ifID->ifGNIScheduled) { - ifID->ifNumRetries = 0; - /* Get the desired zone name from elap and put it in - * ifID for zip_getnetinfo() to use. - */ - if (ifID->startup_zone.len) - ifID->ifZoneName = ifID->startup_zone; - zip_getnetinfo(ifID); - } - break; - case ZIP_NO_ROUTER : - ifID->ifZoneName.len = 1; - ifID->ifZoneName.str[0] = '*'; - ifID->ifZoneName.str[1] = '\0'; - - /* Send event with zone info. */ - atalk_post_msg(ifID->aa_ifp, KEV_ATALK_ZONEUPDATED, 0, &(ifID->ifZoneName)); - - break; - default : - break; - } - return (0); -} - -/* locked version of zip_getnetinfo */ -static void -zip_getnetinfo_locked(void *arg) -{ - at_ifaddr_t *ifID; - - atalk_lock(); - if (arg != NULL) { // make sure it hasn't been closed - ifID = (at_ifaddr_t *)arg; - ifID->ifGNIScheduled = 0; - zip_getnetinfo(ifID); - } - atalk_unlock(); -} - - -/********************************************************************** - * zip_getnetinfo() - * - **********************************************************************/ -static void zip_getnetinfo (ifID) - register at_ifaddr_t *ifID; -{ - register at_x_zip_t *zip; - gbuf_t *m; - register at_ddp_t *ddp; - register struct atalk_addr *at_dest; - register int size; - - - size = DDP_X_HDR_SIZE + ZIP_X_HDR_SIZE + ifID->ifZoneName.len + 1 - + sizeof(struct atalk_addr) + 1; - if ((m = gbuf_alloc (AT_WR_OFFSET+size, PRI_HI)) == NULL) { - /* This time, we're unable to allocate buffer to - * send a packet out, so schedule to send a packet - * out later, and exit. - */ - dPrintf(D_M_ZIP, D_L_WARNING, ("zip_getnetinfo: no buffer, call later port=%d\n", - ifID->ifPort)); - ifID->ifGNIScheduled = 1; - timeout (zip_getnetinfo_locked, (caddr_t) ifID, ZIP_TIMER_INT/10); - return; - } - - gbuf_rinc(m,AT_WR_OFFSET); - gbuf_wset(m,0); - *(u_char *)gbuf_rptr(m) = AT_ADDR; - at_dest = (struct atalk_addr *)(gbuf_rptr(m) + 1); - ddp = (at_ddp_t *)(gbuf_rptr(m) + sizeof(struct atalk_addr) + 1); - zip = (at_x_zip_t *)ddp->data; - gbuf_winc(m,size); - - zip->command = ZIP_GETNETINFO; - zip->flags = 0; - NET_ASSIGN(zip->cable_range_start, 0); - NET_ASSIGN(zip->cable_range_end, 0); - if (ifID->ifZoneName.len) /* has to match reply exactly */ - bcopy((caddr_t)&ifID->ifZoneName, (caddr_t)zip->data, - ifID->ifZoneName.len + 1); - else - zip->data[0] = 0; /* No zone name is availbale */ - - /* let the lap fields be uninitialized, 'cause it doesn't - * matter. - */ - DDPLEN_ASSIGN(ddp, (size - (sizeof(struct atalk_addr) + 1))); - UAS_ASSIGN(ddp->checksum, 0); - ddp->hopcount = ddp->unused = 0; - NET_ASSIGN(ddp->dst_net, 0); /* cable-wide broadcast */ - NET_ASSIGN(ddp->src_net, ifID->ifThisNode.s_net); - /* By this time, AARP is done */ - - ddp->dst_node = 0xff; - ddp->src_node = ifID->ifThisNode.s_node; - ddp->dst_socket = ZIP_SOCKET; - ddp->src_socket = ZIP_SOCKET; - ddp->type = DDP_ZIP; - - at_dest->atalk_unused = 0; - NET_NET(at_dest->atalk_net, ddp->dst_net); - at_dest->atalk_node = ddp->dst_node; - - dPrintf(D_M_ZIP, D_L_INPUT, ("zip_getnetinfo: called for port=%d\n", - ifID->ifPort)); - - if (elap_dataput(m, ifID, 0, NULL)) { - dPrintf(D_M_ZIP, D_L_ERROR, - ("zip_getnetinfo: error sending zip_getnetinfo\n")); - return; - } - - ifID->ifNumRetries++; - netinfo_reply_pending = 1; - ifID->ifGNIScheduled = 1; - timeout (zip_sched_getnetinfo, (caddr_t) ifID, ZIP_TIMER_INT); -} /* zip_getnetinfo */ - - -/********************************************************************** - * zip_sched_getnetinfo() - * - **********************************************************************/ - -void zip_sched_getnetinfo(void *arg) -{ - register at_ifaddr_t *ifID = (at_ifaddr_t *)arg; - - atalk_lock(); - - ifID->ifGNIScheduled = 0; - - if (ifID->ifNumRetries >= ZIP_NETINFO_RETRIES) { - /* enough packets sent.... give up! */ - /* we didn't get any response from the net, so - * assume there's no router around and the given - * zone name, if any, is not valid. Change the - * zone name to "*". - */ - ifID->ifZoneName.len = 1; - ifID->ifZoneName.str[0] = '*'; - ifID->ifZoneName.str[1] = '\0'; - /* Should NBP be notified of this "new" zone name?? */ - netinfo_reply_pending = 0; - - ifID->ifRouterState = NO_ROUTER; - ifID->ifARouter.s_net = 0; - ifID->ifARouter.s_node = 0; - - dPrintf(D_M_ZIP, D_L_INFO, ("zip_sched_getnetinfo: Reset Cable Range\n")); - - ifID->ifThisCableStart = DDP_MIN_NETWORK; - ifID->ifThisCableEnd = DDP_MAX_NETWORK; - - if (ifID->ifState == LAP_ONLINE_FOR_ZIP) - ZIPwakeup (ifID, 0); /* no error */ - } else - zip_getnetinfo(ifID); - - atalk_unlock(); -} - - -/********************************************************************** - * zip_type_packet() - * - * Remarks: - * This routine checks whether or not the packet contained in "m" - * is an (outgoing) ZIP packet. If not, it returns 0. If it is a - * ZIP packet, it returns the ZIP packet type (ZIP command). "m" - * points to a packet with extended DDP header. The rest of the - * DDP data may or may not be in the first gbuf. - * - **********************************************************************/ -int zip_type_packet (m) - register gbuf_t *m; -{ - register at_atp_t *atp; - register at_ddp_t *ddp; - register at_zip_t *zip; - u_char user_bytes[4]; - register int user_byte; - - ddp = (at_ddp_t *)gbuf_rptr(m); - if (ddp->dst_socket == ZIP_SOCKET) { - switch (ddp->type) { - case DDP_ZIP : - if (gbuf_len(m) > DDP_X_HDR_SIZE) - zip = (at_zip_t *)(gbuf_rptr(m) - + DDP_X_HDR_SIZE); - else - zip=(at_zip_t *)(gbuf_rptr(gbuf_cont(m))); - return ((int)zip->command); - case DDP_ATP : - if (gbuf_len(m) > DDP_X_HDR_SIZE) - atp = (at_atp_t *)(gbuf_rptr(m)+DDP_X_HDR_SIZE); - else - atp = (at_atp_t *)(gbuf_rptr(gbuf_cont(m))); - /* Get the user bytes in network order */ - *((u_long*)user_bytes) = UAL_VALUE(atp->user_bytes); - user_byte = user_bytes[0]; /* Get the zeroth byte */ - if ((user_byte == ZIP_GETMYZONE) || - (user_byte == ZIP_GETZONELIST) || - (user_byte == ZIP_GETLOCALZONES)) - return (user_byte); - else - return (0); - default : - return (0); - } - } else - return (0); -} - -/********************************************************************** - * zip_handle_getmyzone() - * - * Remarks: - * Routine to handle ZIP GetMyZone request locally. It generates - * a phony response to the outgoing ATP request and sends it up. - * - * 07/12/94 : remark2 only called from ddp.c / ddp_output - * should only be called from the home port, but - * when we are a router we should know the infos for all - * anyway, so reply locally with what we have in stock... - * - **********************************************************************/ - -int zip_handle_getmyzone(ifID, m) - register at_ifaddr_t *ifID; - register gbuf_t *m; -{ - at_atp_t *atp; - register at_ddp_t *ddp; - register at_ddp_t *r_ddp; - register at_atp_t *r_atp; - gbuf_t *rm; /* reply message */ - register int size; - u_long ulongtmp; - - dPrintf(D_M_ZIP, D_L_INFO, - ("zip_handle_getmyzone: local reply for port=%d\n", - ifID->ifPort)); - - size = DDP_X_HDR_SIZE + ATP_HDR_SIZE + 1 + ifID->ifZoneName.len; - /* space for two headers and the zone name */ - if ((rm = gbuf_alloc(AT_WR_OFFSET+size, PRI_HI)) == NULL) { - dPrintf(D_M_ZIP, D_L_WARNING, - ("zip_handle_getmyzone: no buffer, port=%d\n", - ifID->ifPort)); - return (ENOBUFS); - } - - gbuf_rinc(rm,AT_WR_OFFSET); - gbuf_wset(rm,0); - r_ddp = (at_ddp_t *)(gbuf_rptr(rm)); - r_atp = (at_atp_t *)r_ddp->data; - gbuf_winc(rm,size); - - ddp = (at_ddp_t *)gbuf_rptr(m); - if (gbuf_len(m) > DDP_X_HDR_SIZE) - atp = (at_atp_t *)(gbuf_rptr(m) + DDP_X_HDR_SIZE); - else - atp = (at_atp_t *)(gbuf_rptr(gbuf_cont(m))); - - /* fill up the ddp header for reply */ - DDPLEN_ASSIGN(r_ddp, size); - r_ddp->hopcount = r_ddp->unused = 0; - UAS_ASSIGN(r_ddp->checksum, 0); - NET_ASSIGN(r_ddp->dst_net, ifID->ifThisNode.s_net); - NET_NET(r_ddp->src_net, ddp->dst_net); - r_ddp->dst_node = ifID->ifThisNode.s_node; - r_ddp->src_node = ddp->dst_node; - r_ddp->dst_socket = ddp->src_socket; - r_ddp->src_socket = ZIP_SOCKET; - r_ddp->type = DDP_ATP; - - /* fill up the atp header */ - r_atp->cmd = ATP_CMD_TRESP; - r_atp->xo = 0; - r_atp->eom = 1; - r_atp->sts = 0; - r_atp->xo_relt = 0; - r_atp->bitmap = 0; - UAS_UAS(r_atp->tid, atp->tid); - ulongtmp = 1; - UAL_ASSIGN_HTON(r_atp->user_bytes, ulongtmp); /* no of zones */ - - /* fill up atp data part */ - bcopy((caddr_t) &ifID->ifZoneName, (caddr_t) r_atp->data, ifID->ifZoneName.len+1); - - /* all set to send the packet back up */ - - timeout(send_phony_reply, (caddr_t) rm, HZ/20); - return (0); -} - -static void -send_phony_reply(arg) - void *arg; -{ - gbuf_t *rm = (gbuf_t *)arg; - - atalk_lock(); - ddp_input(rm, ifID_home); - atalk_unlock(); - - return; -} - - -/* - * zip_prep_query_packet: build the actual ddp packet for the zip query - */ - -gbuf_t *zip_prep_query_packet(ifID, RouterNet, RouterNode) - at_ifaddr_t *ifID; - at_net_al RouterNet; /* we want to send the Zip Query to that router */ - at_node RouterNode; -{ - - register gbuf_t *m; - register at_ddp_t *ddp; - - if ((m = gbuf_alloc (AT_WR_OFFSET+1024, PRI_HI)) == NULL) { - dPrintf(D_M_ZIP, D_L_WARNING, - ("zip_send_query_packet: no buffer, port=%d\n", - ifID->ifPort)); - return((gbuf_t *)NULL); - } - gbuf_rinc(m,AT_WR_OFFSET); - gbuf_wset(m,0); - - ddp = (at_ddp_t *)(gbuf_rptr(m)); - - /* Prepare the DDP header */ - - ddp->unused = ddp->hopcount = 0; - UAS_ASSIGN(ddp->checksum, 0); - NET_ASSIGN(ddp->src_net, ifID->ifThisNode.s_net); - ddp->src_node = ifID->ifThisNode.s_node; - ddp->src_socket = ZIP_SOCKET; - - ddp->dst_socket = ZIP_SOCKET; - NET_ASSIGN(ddp->dst_net, RouterNet); - ddp->dst_node = RouterNode; - - ddp->type = DDP_ZIP; - - return (m); -} /* zip_prep_query_packet */ - - -/* - * zip_send_queries: this function send queries for the routing table entries that - * need to know their zones. It scans the routing table for entries with unknown - * zones and build Query packets accordingly. - * Note: this is called on a per port basis. - */ - -void zip_send_queries(ifID, RouterNet, RouterNode) - register at_ifaddr_t *ifID; - at_net_al RouterNet; /* we want to send the Zip Query to that router */ - at_node RouterNode; -{ - RT_entry *Entry = &RT_table[0]; - register gbuf_t *m; - register at_ddp_t *ddp; - int status; - short Query_index, EntryNumber = 0 ; - register u_char port = ifID->ifPort; - char *QueryBuff, *ZoneCount; - short zip_sent = FALSE; - -newPacket: - - if (!(m = zip_prep_query_packet(ifID, RouterNet, RouterNode))) { - return; /* was return (ENOBUFS); */ - } - - ddp = (at_ddp_t *)(gbuf_rptr(m)); - QueryBuff = (char *)ddp->data; - - *QueryBuff++ = ZIP_QUERY; - ZoneCount = QueryBuff; /* network count */ - *ZoneCount = 0; - QueryBuff++; - Query_index = 2; - - - while (EntryNumber < RT_maxentry) { - - /* scan the table, and build the packet with the right entries: - * - entry in use and on the right Port - * - with unknwon zones and in an active state - * - talking to the right router - */ - - if ((Query_index) > 2*254 +2) { - - /* we need to send the packet now, but we can't have more than 256 - * requests for networks: the Netcount field is a 8bit in the zip query - * packet format as defined in Inside Atalk - */ - - dPrintf(D_M_ZIP_LOW, D_L_OUTPUT, - ("zip_send_query: FULL query for %d nets on port#%d.(len=%d)\n", - *ZoneCount, port, Query_index)); - zip_sent = TRUE; - - gbuf_winc(m,DDP_X_HDR_SIZE + Query_index); - DDPLEN_ASSIGN(ddp, (DDP_X_HDR_SIZE + Query_index)); - - if ((status = - ddp_router_output(m, ifID, AT_ADDR, - RouterNet, RouterNode, 0))) { - dPrintf(D_M_ZIP, D_L_ERROR, - ("zip_send_query: ddp_router_output returns =%d\n", status)); - return; /* was return (status); */ - } - - goto newPacket; - } - - - if (((Entry->EntryState & 0x0F) >= RTE_STATE_SUSPECT) && - (Entry->NetStop) && (Entry->NetPort == port) && - (!RT_ALL_ZONES_KNOWN(Entry))){ - - /* we're ready to had that to our list of stuff to send */ - - if (Entry->NetStart) { /* extended net*/ - - *QueryBuff++ = (Entry->NetStart & 0xFF00) >> 8; - *QueryBuff++ = (Entry->NetStart & 0x00FF); - - } - else { - *QueryBuff++ = (Entry->NetStop & 0xFF00) >> 8; - *QueryBuff++ = (Entry->NetStop & 0x00FF); - } - - Query_index += 2; - *ZoneCount += 1;/* bump the number of network requested */ - - } - - Entry++; - EntryNumber++; - - } - - dPrintf(D_M_ZIP_LOW, D_L_OUTPUT, - ("zip_send_query: query for %d nets on port#%d.(len=%d)\n", - *ZoneCount, port, Query_index)); - - if (*ZoneCount) { /* non-full Query needs to be sent */ - zip_sent = TRUE; - gbuf_winc(m,DDP_X_HDR_SIZE + Query_index); - DDPLEN_ASSIGN(ddp, (DDP_X_HDR_SIZE + Query_index)); - - if ((status = - ddp_router_output(m, ifID, AT_ADDR, - RouterNet, RouterNode, 0))) { - dPrintf(D_M_ZIP, D_L_ERROR, - ("zip_send_query: ddp_router_output returns =%d\n", - status)); - return; /* was return (status); */ - } - } - else - gbuf_freem(m); - - if (!zip_sent) /* we didn't need to send anything for that port */ - ifID->ifZipNeedQueries = 0; -} /* zip_send_queries */ - -/* zip_reply_received: we recieved the reply to one of our query, update the - * zone bitmap and stuffs with was we received. - * we receive two types of replies: non extended and extended. - * For extended replies, the network count is the Total of zones for that net. - */ -int -zip_reply_received(m, ifID, reply_type) - register gbuf_t *m; - register at_ifaddr_t *ifID; - int reply_type; -{ - register at_nvestr_t *zname; - RT_entry *Entry = &RT_table[0]; - register at_ddp_t *ddp; - at_net_al Network; - u_short payload_len, result; - u_char network_count; - char *PacketPtr; - - ddp = (at_ddp_t *)gbuf_rptr(m); - - /* access the number of nets provided in the ZIP Reply */ - - network_count = ntohs(*(u_char *)(gbuf_rptr(m) + DDP_X_HDR_SIZE + 1)); - - PacketPtr = (char *)(gbuf_rptr(m) + DDP_X_HDR_SIZE + 2); - - payload_len = DDPLEN_VALUE(ddp) - (DDP_X_HDR_SIZE + 2); - - dPrintf(D_M_ZIP_LOW, D_L_INPUT, ("zip_reply_received from %d:%d type=%d netcount=%d\n", - NET_VALUE(ddp->src_net), ddp->src_node, reply_type, network_count)); - - - while (payload_len > 0 && network_count >0) { - - Network = ntohs(*(at_net_al *)PacketPtr); - PacketPtr += 2; - zname = (at_nvestr_t *)PacketPtr; - if (payload_len) - payload_len = payload_len -(zname->len + 3); - - if (zname->len <= 0) { /* not valid, we got a problem here... */ - dPrintf(D_M_ZIP, D_L_WARNING, - ("zip_reply_received: Problem zlen=0 for net=%d from %d:%d type=%d netcnt=%d\n", - Network, NET_VALUE(ddp->src_net), ddp->src_node, reply_type, network_count)); - payload_len =0; - continue; - } - - - Entry = rt_blookup(Network); - - if (Entry != NULL) { - - if (Entry->EntryState >= RTE_STATE_SUSPECT) { - - result = zt_add_zonename(zname); - - if (result == ZT_MAXEDOUT) { - - dPrintf(D_M_ZIP, D_L_ERROR, - ("zip_reply_received: ZTable full from %d:%d on zone '%s'\n", - NET_VALUE(ddp->src_net), ddp->src_node, zname->str)); - ErrorZIPoverflow = 1; - return(1); - } - - zt_set_zmap(result, Entry->ZoneBitMap); - - RT_SET_ZONE_KNOWN(Entry); - - } - else { - dPrintf(D_M_ZIP, D_L_INPUT, - ("zip_reply_received: entry %d-%d not updated, cause state=%d\n", - Entry->NetStart, Entry->NetStop, Entry->EntryState)); - } - } - else { - dPrintf(D_M_ZIP, D_L_WARNING, - ("zip_reply_received: network %d not found in RT\n", Network)); - } - - - /* now bump the PacketPtr pointer */ - PacketPtr += zname->len + 1; - network_count--; - } - - if ((reply_type == ZIP_REPLY) && network_count > 0) { -#if DEBUG - if (Entry) - dPrintf(D_M_ZIP, D_L_WARNING, - ("zip_reply_received: Problem decoding zone (after net:%d-%d)\n", - Entry->NetStart, Entry->NetStop)); -#endif - ifID->ifZipNeedQueries = 1; - } - else { - ifID->ifZipNeedQueries = 0; -#if DEBUG - if (Entry) - dPrintf(D_M_ZIP_LOW, D_L_INFO, - ("zip_reply_received: entry %d-%d all zones known\n", - Entry->NetStart, Entry->NetStop)); -#endif - } - - return 0; -} - -/* - * zip_reply_to_getmyzone: replies to ZIP GetMyZone received from the Net - */ - -static void zip_reply_to_getmyzone (ifID, m) - register at_ifaddr_t *ifID; - register gbuf_t *m; -{ - at_atp_t *atp; - register at_ddp_t *ddp; - register at_ddp_t *r_ddp; - register at_atp_t *r_atp; - register gbuf_t *rm; /* reply message */ - register int size, Index, status; - char *data_ptr; - RT_entry *Entry; - u_long ulongtmp; - - size = DDP_X_HDR_SIZE + ATP_HDR_SIZE + 1 + ifID->ifZoneName.len; - /* space for two headers and the zone name */ - if ((rm = gbuf_alloc(AT_WR_OFFSET+size, PRI_HI)) == NULL) { - dPrintf(D_M_ZIP, D_L_WARNING, - ("zip_reply_to_getmyzone: no buffer, port=%d\n", ifID->ifPort)); - return; /* was return (ENOBUFS); */ - } - gbuf_rinc(rm,AT_WR_OFFSET); - gbuf_wset(rm,size); - r_ddp = (at_ddp_t *)(gbuf_rptr(rm)); - r_atp = (at_atp_t *)r_ddp->data; - - ddp = (at_ddp_t *)gbuf_rptr(m); - if (gbuf_len(m) > DDP_X_HDR_SIZE) - atp = (at_atp_t *)(gbuf_rptr(m) + DDP_X_HDR_SIZE); - else - atp = (at_atp_t *)(gbuf_rptr(gbuf_cont(m))); - - /* fill up the ddp header for reply */ - DDPLEN_ASSIGN(r_ddp, size); - r_ddp->hopcount = r_ddp->unused = 0; - UAS_ASSIGN(r_ddp->checksum, 0); - - NET_ASSIGN(r_ddp->src_net, ifID->ifThisNode.s_net); - NET_NET(r_ddp->dst_net, ddp->src_net); - - r_ddp->src_node = ifID->ifThisNode.s_node; - r_ddp->dst_node = ddp->src_node; - - r_ddp->dst_socket = ddp->src_socket; - r_ddp->src_socket = ZIP_SOCKET; - r_ddp->type = DDP_ATP; - - /* fill up the atp header */ - r_atp->cmd = ATP_CMD_TRESP; - r_atp->xo = 0; - r_atp->eom = 1; - r_atp->sts = 0; - r_atp->xo_relt = 0; - r_atp->bitmap = 0; - UAS_UAS(r_atp->tid, atp->tid); - ulongtmp = 1; - UAL_ASSIGN_HTON(r_atp->user_bytes, ulongtmp); /* no of zones */ - - data_ptr = (char *)r_atp->data; - - /* - * fill up atp data part with the zone name if we can find it... - */ - - Entry = rt_blookup(NET_VALUE(ddp->src_net)); - if (Entry != NULL && ((Entry->EntryState & 0x0F) >= RTE_STATE_SUSPECT) && - RT_ALL_ZONES_KNOWN(Entry)) { /* this net is well known... */ - - Index = zt_ent_zindex(Entry->ZoneBitMap) -1; - - *data_ptr = ZT_table[Index].Zone.len; - bcopy((caddr_t) &ZT_table[Index].Zone.str, (caddr_t) ++data_ptr, - ZT_table[Index].Zone.len); - - /* all set to send the packet back up */ - dPrintf(D_M_ZIP_LOW, D_L_OUTPUT, - ("zip_reply_to_GMZ: ddp_router_output to %d:%d port %d\n", - NET_VALUE(r_ddp->dst_net), r_ddp->dst_node, ifID->ifPort)); - - if ((status = - ddp_router_output(rm, ifID, AT_ADDR, - NET_VALUE(r_ddp->dst_net), r_ddp->dst_node, 0))) { - dPrintf(D_M_ZIP, D_L_ERROR, - ("zip_reply_to_GMZ: ddp_r_output returns =%d\n", status)); - return; /* was return (status); */ - } - } - else - gbuf_freem(rm); -} - -/* - * zip_reply_to_getzonelist: replies to ZIP GetZoneList requested from the Net - */ - -int -zip_reply_to_getzonelist (ifID, m) - register at_ifaddr_t *ifID; - register gbuf_t *m; -{ - at_atp_t *atp; - register at_ddp_t *ddp; - register at_ddp_t *r_ddp; - register at_atp_t *r_atp; - register gbuf_t *rm; /* reply message */ - register int size, status; - register short Index=0, StartPoint, ZLength, PacketLen=0; - u_long ulongtmp= 0; - char *Reply; - - ddp = (at_ddp_t *)gbuf_rptr(m); - if (gbuf_len(m) > DDP_X_HDR_SIZE) - atp = (at_atp_t *)(gbuf_rptr(m) + DDP_X_HDR_SIZE); - else - atp = (at_atp_t *)(gbuf_rptr(gbuf_cont(m))); - - - /* space for two headers and the zone name */ - - if ((rm = gbuf_alloc(AT_WR_OFFSET+1024, PRI_HI)) == NULL) { - return (ENOBUFS); - } - - gbuf_rinc(rm,AT_WR_OFFSET); - gbuf_wset(rm,0); - r_ddp = (at_ddp_t *)(gbuf_rptr(rm)); - r_atp = (at_atp_t *)r_ddp->data; - - /* fill up the ddp header for reply */ - - r_ddp->hopcount = r_ddp->unused = 0; - UAS_ASSIGN(r_ddp->checksum, 0); - NET_ASSIGN(r_ddp->src_net, ifID->ifThisNode.s_net); - NET_NET(r_ddp->dst_net, ddp->src_net); - r_ddp->src_node = ifID->ifThisNode.s_node; - r_ddp->dst_node = ddp->src_node; - r_ddp->dst_socket = ddp->src_socket; - r_ddp->src_socket = ZIP_SOCKET; - r_ddp->type = DDP_ATP; - - /* fill up the atp header */ - - r_atp->cmd = ATP_CMD_TRESP; - r_atp->xo = 0; - r_atp->eom = 1; - r_atp->sts = 0; - r_atp->xo_relt = 0; - r_atp->bitmap = 0; - UAS_UAS(r_atp->tid, atp->tid); - - Reply = (char *)r_atp->data; - - /* get the start index from the ATP request */ - - StartPoint = (UAL_VALUE_NTOH(atp->user_bytes) & 0xffff) -1; - - /* find the next zone to send */ - - while ((Index < ZT_maxentry) && StartPoint > 0) { - if (ZT_table[Index].Zone.len) - StartPoint--; - Index++; - } - - - dPrintf(D_M_ZIP_LOW, D_L_OUTPUT, ("zip_reply_to_GZL: Index=%d\n", Index)); - /* - * fill up atp data part with the zone name if we can find it... - */ - - while (Index < ZT_maxentry) { - - ZLength = ZT_table[Index].Zone.len; - - if (ZT_table[Index].ZoneCount && ZLength) { - - - if (PacketLen + 8 + ZLength+1 > DDP_DATA_SIZE) /* packet full */ - break; - - *Reply++ = ZLength; - bcopy((caddr_t) &ZT_table[Index].Zone.str, - Reply, ZLength); - Reply += ZLength; - PacketLen += ZLength + 1; - ulongtmp++; - } - Index++; - } - - if (Index >= ZT_maxentry) /* this is the end of the list */ - - ulongtmp += 0x01000000; - - - UAL_ASSIGN_HTON(r_atp->user_bytes, ulongtmp); /* # of zones and flag*/ - - size = DDP_X_HDR_SIZE + ATP_HDR_SIZE + PacketLen; - gbuf_winc(rm,size); - DDPLEN_ASSIGN(r_ddp, size); - - /* all set to send the packet back up */ - - dPrintf(D_M_ZIP_LOW, D_L_OUTPUT, - ("zip_r_GZL: send packet to %d:%d port %d atp_len =%d\n", - NET_VALUE(r_ddp->dst_net), r_ddp->dst_node, ifID->ifPort, PacketLen)); - - - if ((status= ddp_router_output(rm, ifID, AT_ADDR, - NET_VALUE(r_ddp->dst_net), r_ddp->dst_node, 0))) { - dPrintf(D_M_ZIP, D_L_ERROR, ("zip_reply_to_GZL: ddp_router_output returns=%d\n", - status)); - return (status); - } - return (0); - -} - -/* - * zip_reply_to_getlocalzones: replies to ZIP GetLocalZones requested from the Net - */ - -int zip_reply_to_getlocalzones (ifID, m) - register at_ifaddr_t *ifID; - register gbuf_t *m; -{ - at_atp_t *atp; - register at_ddp_t *ddp; - register at_ddp_t *r_ddp; - register at_atp_t *r_atp; - register gbuf_t *rm; /* reply message */ - int size, status; - short Index, Index_wanted, ZLength; - short i,j, packet_len; - short zCount, ZoneCount, ZonesInPacket; - unsigned char *zmap, last_flag = 0; - RT_entry *Entry; - char *Reply; - - u_long ulongtmp = 0; - - Index = Index_wanted = ZLength = i = j = packet_len = zCount = ZoneCount = - ZonesInPacket = 0; - - ddp = (at_ddp_t *)gbuf_rptr(m); - if (gbuf_len(m) > DDP_X_HDR_SIZE) - atp = (at_atp_t *)(gbuf_rptr(m) + DDP_X_HDR_SIZE); - else - atp = (at_atp_t *)(gbuf_rptr(gbuf_cont(m))); - - /* space for two headers and the zone name */ - - if ((rm = gbuf_alloc(AT_WR_OFFSET+1024, PRI_HI)) == NULL) { - return (ENOBUFS); - } - - gbuf_rinc(rm,AT_WR_OFFSET); - gbuf_wset(rm,0); - r_ddp = (at_ddp_t *)(gbuf_rptr(rm)); - r_atp = (at_atp_t *)r_ddp->data; - - Reply = (char *)r_atp->data; - - - /* get the start index from the ATP request */ - - Index_wanted = (UAL_VALUE_NTOH(atp->user_bytes) & 0xffff) -1; - - dPrintf(D_M_ZIP_LOW, D_L_INFO, - ("zip_r_GLZ: for station %d:%d Index_wanted = %d\n", - NET_VALUE(ddp->src_net), ddp->src_node, Index_wanted)); - - Entry = rt_blookup(NET_VALUE(ddp->src_net)); - - if (Entry != NULL && ((Entry->EntryState & 0x0F) >= RTE_STATE_SUSPECT) && - RT_ALL_ZONES_KNOWN(Entry)) { /* this net is well known... */ - - ZoneCount = zt_ent_zcount(Entry) ; - - dPrintf(D_M_ZIP_LOW, D_L_INFO, - ("zip_reply_GLZ: for %d:%d ZoneCount=%d\n", - NET_VALUE(ddp->src_net), ddp->src_node, ZoneCount)); - - zmap = &Entry->ZoneBitMap[0]; - - /* - * first of all, we want to find the "first next zone" in the bitmap, - * to do so, we need to scan the bitmap and add the number of valid - * zones we find until we reach the next zone to be sent in the reply - */ - - if (ZoneCount > Index_wanted) { - - ZoneCount -= Index_wanted; - - /* find the starting point in the bitmap according to index */ - - for (i = 0; Index_wanted >= 0 && i < ZT_BYTES; i++) - if (zmap[i]) { - if (Index_wanted < 8) { - /* how many zones in the bitmap byte */ - for (j = 0, zCount =0; j < 8 ; j++) - if ((zmap[i] << j) & 0x80) - zCount++; - if (Index_wanted < zCount) { - for (j = 0 ; Index_wanted > 0 && j < 8 ; j++) - if ((zmap[i] << j) & 0x80) - Index_wanted--; - break; - } - else - Index_wanted -= zCount; - } - else - for (j = 0 ; j < 8 ; j++) - if ((zmap[i] << j) & 0x80) - Index_wanted--; - } - - /* - * now, we point to the begining of our next zones in the bitmap - */ - - while (i < ZT_BYTES) { - - if (zmap[i]) { - for (; j < 8 ; j++) - if ((zmap[i] << j) & 0x80) { - Index = i*8 + j; /* get the index in ZT */ - - ZLength = ZT_table[Index].Zone.len; - - if (ZT_table[Index].ZoneCount && ZLength) { - if (packet_len + ATP_HDR_SIZE + ZLength + 1 > - DDP_DATA_SIZE) - goto FullPacket; - - *Reply++ = ZLength; - bcopy((caddr_t) &ZT_table[Index].Zone.str, - Reply, ZLength); - Reply += ZLength; - packet_len += ZLength + 1; - ZonesInPacket ++; - dPrintf(D_M_ZIP_LOW, D_L_INFO, - ("zip_reply_GLZ: add z#%d to packet (l=%d)\n", - Index, packet_len)); - } - else { - dPrintf(D_M_ZIP, D_L_WARNING, - ("zip_reply_GLZ: no len for index=%d\n", - Index)); - } - } - } - i++; - j = 0; - } - } - else /* set the "last flag" bit in the reply */ - last_flag = 1; - } - else /* set the "last flag" bit in the reply */ - last_flag = 1; - -FullPacket: - - if (ZonesInPacket == ZoneCount) - last_flag = 1; - - - /* fill up the ddp header for reply */ - - r_ddp->hopcount = r_ddp->unused = 0; - UAS_ASSIGN(r_ddp->checksum, 0); - - NET_ASSIGN(r_ddp->src_net, ifID->ifThisNode.s_net); - NET_NET(r_ddp->dst_net, ddp->src_net); - - r_ddp->src_node = ifID->ifThisNode.s_node; - r_ddp->dst_node = ddp->src_node; - - r_ddp->dst_socket = ddp->src_socket; - r_ddp->src_socket = ZIP_SOCKET; - r_ddp->type = DDP_ATP; - - /* fill up the atp header */ - r_atp->cmd = ATP_CMD_TRESP; - r_atp->xo = 0; - r_atp->eom = 1; - r_atp->sts = 0; - r_atp->xo_relt = 0; - r_atp->bitmap = 0; - UAS_UAS(r_atp->tid, atp->tid); - ulongtmp = ((last_flag << 24) & 0xFF000000) + ZonesInPacket; /* # of zones and flag*/ - UAL_ASSIGN_HTON(r_atp->user_bytes, ulongtmp); - size = DDP_X_HDR_SIZE + ATP_HDR_SIZE + packet_len; - gbuf_winc(rm,size); - DDPLEN_ASSIGN(r_ddp, size); - - /* all set to send the packet back up */ - - dPrintf(D_M_ZIP_LOW, D_L_OUTPUT, - ("zip_r_GLZ: send packet to %d:%d port %d atp_len =%d\n", - NET_VALUE(r_ddp->dst_net), r_ddp->dst_node, ifID->ifPort, packet_len)); - - if ((status= ddp_router_output(rm, ifID, AT_ADDR, - NET_VALUE(r_ddp->dst_net), r_ddp->dst_node, 0))) { - dPrintf(D_M_ZIP, D_L_ERROR, - ("zip_reply_to_GLZ: ddp_router_output returns =%d\n", - status)); - return (status); - } - return (0); -} /* zip_reply_to_getlocalzones */ - -int regDefaultZone(ifID) - at_ifaddr_t *ifID; -{ - char data[ETHERNET_ADDR_LEN]; - - if (!ifID) - return(-1); - - zt_get_zmcast(ifID, &ifID->ifZoneName, data); - if (FDDI_OR_TOKENRING(ifID->aa_ifp->if_type)) - ddp_bit_reverse((unsigned char *)data); - bcopy((caddr_t)data, (caddr_t)&ifID->ZoneMcastAddr, ETHERNET_ADDR_LEN); - (void)at_reg_mcast(ifID, (caddr_t)&ifID->ZoneMcastAddr); - return(0); -} diff --git a/bsd/netat/ddp_rtmp.c b/bsd/netat/ddp_rtmp.c deleted file mode 100644 index cbbc254d9..000000000 --- a/bsd/netat/ddp_rtmp.c +++ /dev/null @@ -1,386 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1993-1998 Apple Computer, Inc. - * All Rights Reserved. - */ - -/* - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - - -/****************************************************************/ -/* */ -/* */ -/* RTMP Protocol */ -/* */ -/* */ -/****************************************************************/ - - -/* rtmp.c: , 1.6; 2/26/93; Apple Computer, Inc." */ - - -#define NROUTERS2TRAK 8 -#define FIFTYSECS 10 -#define NODE(r) ((r)->ifARouter.s_node) -#define NET(r) ((r)->ifARouter.s_net) -#define INUSE(r) (NODE(r)) - -void ddp_age_router(void *arg); - -static struct routerinfo { - struct at_addr ifARouter; - at_ifaddr_t *ifID; - void *tmo; -} trackedrouters[NROUTERS2TRAK]; - -void trackrouter_rem_if(ifID) - register at_ifaddr_t *ifID; -{ - int i; - register struct routerinfo *router; - - for (i = NROUTERS2TRAK; --i >= 0;) { - router = &trackedrouters[i]; - if (trackedrouters[i].ifID == ifID) { - untimeout(ddp_age_router, (caddr_t)router); - break; - } - } -} - - -void routershutdown(void) -{ - int i; - - for (i = NROUTERS2TRAK; --i >= 0;) { - register struct routerinfo *router; - - router = &trackedrouters[i]; - if (INUSE(router)) { - untimeout(ddp_age_router, (caddr_t) router); - bzero((caddr_t) router, sizeof(struct routerinfo)); - } - } -} - -int router_added = 0; -int router_killed = 0; - - - -void trackrouter(ifID, net, node) - register at_ifaddr_t *ifID; - register unsigned short net; - register unsigned char node; -{ - register struct routerinfo *unused = NULL; - int i; - - for (i = NROUTERS2TRAK; --i >= 0;) { - register struct routerinfo *router; - - router = &trackedrouters[(i + node) & (NROUTERS2TRAK-1)]; - if ((NODE(router) == node) && (NET(router) == net)) { - untimeout(ddp_age_router, (caddr_t) router); - timeout(ddp_age_router, (caddr_t) router, 50*SYS_HZ); - unused = NULL; - break; - } - else if (!INUSE(router) && !unused) - unused = router; - } - if (unused) { - router_added++; - - if (ifID->ifARouter.s_net == 0) { - /* Send event that this interface just got a router. This does not - discriminate on whether this router is valid or not. If it is not - valid rtmp_input will send a KEV_ATALK_ROUTERUP_INVALID event. */ - atalk_post_msg(ifID->aa_ifp, KEV_ATALK_ROUTERUP, 0, 0); - } - - unused->ifID = ifID; - NET(unused) = net; - NODE(unused) = node; - ifID->ifRouterState = ROUTER_AROUND; - timeout(ddp_age_router, (caddr_t) unused, 50*SYS_HZ); - - if (NET(ifID) == 0 && NODE(ifID) == 0) { - NET(ifID) = net; - NODE(ifID) = node; - } - } -} - -/* - * This is the timeout function that is called after 50 seconds, - * if no router packets come in. That way we won't send extended - * frames to something that is not there. Untimeout is called if - * an RTMP packet comes in so this routine will not be called. - */ -void ddp_age_router(void *arg) -{ - struct routerinfo *deadrouter = (struct routerinfo*)arg; - register at_ifaddr_t *ourrouter; - - atalk_lock(); - - ourrouter = deadrouter->ifID; - if (ourrouter == NULL) { - atalk_unlock(); - return; - } - - dPrintf(D_M_RTMP, D_L_INFO, - ("ddp_age_router called deadrouter=%d:%d\n", NODE(deadrouter), NET(deadrouter))); - - router_killed++; - - if (NODE(ourrouter) == NODE(deadrouter) && - NET(ourrouter) == NET(deadrouter)) { - register unsigned long atrandom = random(); - register struct routerinfo *newrouter = NULL; - int i; - - bzero((caddr_t) deadrouter, sizeof(struct routerinfo)); - for (i = NROUTERS2TRAK; --i >= 0;) { - newrouter = &trackedrouters[(i + atrandom) & (NROUTERS2TRAK-1)]; - if (INUSE(newrouter)) - break; - else - newrouter = NULL; - } - if (newrouter) { - /* Set our router to another on the list and go on with life */ - NET(ourrouter) = NET(newrouter); - NODE(ourrouter) = NODE(newrouter); - } - else { - /* from gorouterless() */ - /* We have no other routers. */ - ATTRACE(AT_MID_DDP, AT_SID_TIMERS, AT_LV_WARNING, FALSE, - "ddp_age_router entry : ARouter = 0x%x, RouterState = 0x%x", - ATALK_VALUE(ourrouter->ifARouter), ourrouter->ifRouterState, 0); - - switch (ourrouter->ifRouterState) { - case ROUTER_AROUND : - /* This is where we lose our cable. - Reset router fields and state accordingly. */ - ourrouter->ifARouter.s_net = 0; - ourrouter->ifARouter.s_node = 0; - ourrouter->ifThisCableStart = DDP_MIN_NETWORK; - ourrouter->ifThisCableEnd = DDP_MAX_NETWORK; - ourrouter->ifRouterState = NO_ROUTER; - - /* Send event to indicate that we've lost our seed router. */ - atalk_post_msg(ourrouter->aa_ifp, KEV_ATALK_ROUTERDOWN, 0, 0); - - zip_control(ourrouter, ZIP_NO_ROUTER); - break; - case ROUTER_WARNING : - /* there was a router that we were ignoring... - * now, even that's gone. But we want to tackle the - * case where another router may come up after all - * of them have died... - */ - ourrouter->ifRouterState = NO_ROUTER; - break; - } - } - } else - bzero((caddr_t) deadrouter, sizeof(struct routerinfo)); - - atalk_unlock(); - -} /* ddp_age_router */ - -void rtmp_input (mp, ifID) - register gbuf_t *mp; - register at_ifaddr_t *ifID; -{ - register at_net_al this_net; - register at_net_al range_start, range_end; - register at_ddp_t *ddp = (at_ddp_t *)gbuf_rptr(mp); - /* NOTE: there is an assumption here that the - * DATA follows the header. */ - register at_rtmp *rtmp = (at_rtmp *)ddp->data; - - if (gbuf_type(mp) != MSG_DATA) { - /* If this is a M_ERROR message, DDP is shutting down, - * nothing to do here...If it's something else, we don't - * understand what it is - */ - gbuf_freem(mp); - return; - } - - if (!ifID) { - gbuf_freem(mp); - return; - } - if (gbuf_len(mp) < (DDP_X_HDR_SIZE + sizeof(at_rtmp))) { - gbuf_freem(mp); - return; - } - this_net = ifID->ifThisNode.s_net; - if (rtmp->at_rtmp_id_length != 8) { - gbuf_freem(mp); - return; - } - - { - at_rtmp_tuple *tp; - tp = ((at_rtmp_tuple *)&rtmp->at_rtmp_id[1]); - range_start = NET_VALUE(tp->at_rtmp_net); - tp = ((at_rtmp_tuple *)&rtmp->at_rtmp_id[4]); - range_end = NET_VALUE(tp->at_rtmp_net); - - if (ifID->ifRouterState == ROUTER_AROUND) { - if ((ifID->ifThisCableStart == range_start) && - (ifID->ifThisCableEnd == range_end)) { - trackrouter(ifID, - NET_VALUE(rtmp->at_rtmp_this_net), - rtmp->at_rtmp_id[0] - ); - } - } else { - /* There was no router around earlier, one - * probably just came up. - */ - if ((this_net >= DDP_STARTUP_LOW) && - (this_net <= DDP_STARTUP_HIGH)) { - /* we're operating in the startup range, - * ignore the presence of router - */ - if (ifID->ifRouterState == NO_ROUTER) { - dPrintf(D_M_RTMP, D_L_INFO, ("rtmp_input: new router came up, INVALID: net \ - in startup range.\n")); - /* trackrouter sends a KEV_ATALK_ROUTERUP event to note that - a new router has come up when we had none before. */ - trackrouter(ifID, - NET_VALUE(rtmp->at_rtmp_this_net), - rtmp->at_rtmp_id[0] - ); - ifID->ifRouterState = ROUTER_WARNING; - - /* This router is invalid. Send event. */ - atalk_post_msg(ifID->aa_ifp, KEV_ATALK_ROUTERUP_INVALID, 0, 0); - } - } else { - /* our address - * is not in startup range; Is our - * address good for the cable?? - */ - if ((this_net >= range_start) && - (this_net <= range_end)) { - /* Our address is in the range - * valid for this cable... Note - * the router address and then - * get ZIP rolling to get the - * zone info. - */ - ifID->ifThisCableStart = range_start; - ifID->ifThisCableEnd = range_end; - - /* A seed router that gives us back our cable range came up. - It's a valid router and gives us our network back. */ - atalk_post_msg(ifID->aa_ifp, KEV_ATALK_ROUTERUP, 0, 0); - - trackrouter(ifID, - NET_VALUE(rtmp->at_rtmp_this_net), - rtmp->at_rtmp_id[0] - ); - zip_control(ifID, ZIP_LATE_ROUTER); - } else { - /* Our address is not in the - * range valid for this cable.. - * ignore presence of the - * router - */ - if (ifID->ifRouterState == NO_ROUTER) { - /* trackrouter sends a KEV_ATALK_ROUTERUP event to note that - a new router has come up when we had none before. */ - trackrouter(ifID, - NET_VALUE(rtmp->at_rtmp_this_net), - rtmp->at_rtmp_id[0] - ); - ifID->ifRouterState = ROUTER_WARNING; - - /* A new seed router came up, but the cable range is different - than what we had before. */ - atalk_post_msg(ifID->aa_ifp, KEV_ATALK_ROUTERUP_INVALID, 0, 0); - } - } - } - } - } - - gbuf_freem(mp); - return; -} - - -void rtmp_init() -{ - bzero((caddr_t)trackedrouters, sizeof(struct routerinfo)*NROUTERS2TRAK); -} - - diff --git a/bsd/netat/ddp_rtmptable.c b/bsd/netat/ddp_rtmptable.c deleted file mode 100644 index 95acab081..000000000 --- a/bsd/netat/ddp_rtmptable.c +++ /dev/null @@ -1,1144 +0,0 @@ -/* - * Copyright (c) 1994-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/*---------------------------------------------------------------------------- - * - * RTMP & ZIP routing tables access routines - * - * This code implement b-tree search and manipulation of - * of the RTMP routing table and ZIP zone table. - * - * The RTMP routing table is a data block divided in several routing - * entries sorted during insertion in a b-tree form. We use a table and - * not dynamically allocated entries because it allow us to scan the whole - * table when RTMP packets are generated. The routing table entries are sorted - * by there NetStop value (because non extended nets have a NetStart value of - * zero. From any point in the tree, the left side contains Network ranges - * smaller or equal to the current Node, and the right tree points to higher - * values network ranges. - * - * - * 0.01 3/16/94 LD Creation - * Modified for MP, 1996 by Tuyen Nguyen - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - * - *---------------------------------------------------------------------------- - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -RT_entry *RT_table_freelist; /* start of free entry list */ -RT_entry RT_table_start; /* start of the actual entry table */ -RT_entry *RT_table; /* the routing table */ -ZT_entry *ZT_table; /* the Zone Information Protocol table */ -short RT_maxentry; /* Number of entry in RTMP table */ -short ZT_maxentry; /* Number of entry in ZIP table */ - -char errstr[512]; /* used to display meaningfull router errors*/ - -extern at_ifaddr_t *ifID_table[]; -extern at_ifaddr_t *ifID_home; -extern snmpStats_t snmpStats; - -short ErrorRTMPoverflow = 0; /* flag if RTMP table is too small for this net */ -short ErrorZIPoverflow = 0; /* flag if ZIP table is too small for this net */ - - -/* - * This a temporary function : just to display the router error - */ - -void RouterError(__unused short port, short err_number) -{ - switch (err_number) { - - case ERTR_SEED_CONFLICT: - dPrintf(D_M_RTMP, D_L_ERROR, - ("**** RTR Error on port# %d SEED_CONFLICT\n", port)); - break; - - case ERTR_CABLE_CONFLICT: - dPrintf(D_M_RTMP, D_L_ERROR, - ("**** RTR Error on port# %d CABLE_CONFLICT\n", port)); - break; - - case ERTR_RTMP_BAD_VERSION: - dPrintf(D_M_RTMP, D_L_ERROR, - ("**** RTR Error on port# %d RTMP_BAD_VERSION\n", port)); - break; - - case ERTR_CABLE_STARTUP: - dPrintf(D_M_RTMP, D_L_ERROR, - ("**** RTR Error on port# %d RTMP_CABLE_STARTUP\n", - port)); - break; - - default: - dPrintf(D_M_RTMP, D_L_ERROR, - ("**** RTR Error on port# %d WHAT IN THE WORLD IS THIS ONE? code=%d\n", - port, err_number)); - break; - } - dPrintf(D_M_RTMP, D_L_ERROR, ("Explanation: %s\n", errstr)); -} - - -/* - * this function just look for a NetNumber in the routing table, - * no check is done for the validity of the entry - */ - -RT_entry *rt_blookup (NetNumber) -at_net_al NetNumber; -{ - - RT_entry *ptree = &RT_table_start; - at_net_al LowEnd; -/* - dPrintf(D_M_RTMP_LOW, D_L_ROUTING, ("%s : Lookup for Net=%d\n", - "rt_blookup", NetNumber)); -*/ - while (ptree) { - - if (NetNumber > ptree->NetStop) { -/* - dPrintf(D_M_RTMP_LOW, D_L_ROUTING, ("%s : Go Right from #%d\n", - "rt_blookup", ptree->NextIRNet)); -*/ - ptree = ptree->right; - continue; - } - else { - if (ptree->NetStart) - LowEnd = ptree->NetStart; - else - LowEnd = ptree->NetStop; - - if (NetNumber < LowEnd ) { -/* - dPrintf(D_M_RTMP_LOW, D_L_ROUTING, ("%s : Go Left from #%d\n", - "rt_blookup", ptree->NextIRNet)); -*/ - ptree = ptree->left; - continue; - } - - /* we're in the range (either extended or not) - * return the entry found. - */ - -/* dPrintf(D_M_RTMP_LOW, D_L_ROUTING, ("%s : found %04d-%04d Port=%d State=0x%x\n", - "rt_blookup", ptree->NetStart, ptree->NetStop, ptree->NetPort, - ptree->EntryState)); -*/ - - return (ptree); - } - } - - dPrintf(D_M_RTMP_LOW, D_L_ROUTING, ("%s : %04d : NOT FOUND\n", - "rt_blookup", NetNumber)); - return ((RT_entry *)NULL); -} - - -/* Routing table btree insert routine - * Uses a RT_entry parameter as the input, the insert is sorted in - * the tree on the NetStop field. Provision is made for non extented - * net (ie NetStart = 0). - * The function returns the element where the new entry was inserted, or - * NULL if the insert didn't work. (In this cas there is a problem with - * the tree coherency... - * - */ - - -RT_entry *rt_binsert (NewEntry) -RT_entry *NewEntry; -{ - RT_entry *ptree = &RT_table_start; - -#if DEBUG - register at_net_al NetStart = NewEntry->NetStart; -#endif - register at_net_al NetStop = NewEntry->NetStop; - - dPrintf(D_M_RTMP_LOW, D_L_ROUTING, ("rt_binsert: for Net %d-%d state=x%x NextIR %d:%d\n", - NetStart, NetStop, NewEntry->EntryState,NewEntry->NextIRNet, NewEntry->NextIRNode)); - - if (ptree == (RT_entry *)NULL) { - *ptree = *NewEntry; - at_state.flags |= AT_ST_RT_CHANGED; - return (NewEntry); - } - - - while (ptree) { - - if (NetStop > ptree->NetStop) { /* walk the right sub-tree */ - if (ptree->right) - ptree = ptree->right; - else { - ptree->right = NewEntry; - at_state.flags |= AT_ST_RT_CHANGED; - return (ptree); - } - } - else { /* walk the left sub-tree */ - if (ptree->left) - ptree = ptree->left; - else { - ptree->left = NewEntry; - at_state.flags |= AT_ST_RT_CHANGED; - return (ptree); - } - } - - } - - dPrintf(D_M_RTMP, D_L_WARNING, ("%s : ERROR NOT INSERTED Net %d-%d\n", - "rt_binsert", NetStart, NetStop)); - return ((RT_entry *)NULL); -} - -RT_entry *rt_insert(NStop, NStart, NxNet, NxNode, NtDist, NtPort, EntS) - at_net_al NStop, NStart, NxNet; - at_node NxNode; - u_char NtDist, NtPort, EntS; -{ - RT_entry *New; - if ((New = RT_table_freelist)) { - RT_table_freelist = RT_table_freelist->right; - } else - return ((RT_entry *)NULL); - New->right = NULL; - New->NetStop = NStop; - New->NetStart = NStart; - New->NextIRNet = NxNet; - New->NextIRNode = NxNode; - New->NetDist = NtDist; - New->NetPort = NtPort; - New->EntryState = EntS; - bzero(New->ZoneBitMap, sizeof(New->ZoneBitMap)); - at_state.flags |= AT_ST_RT_CHANGED; - return(rt_binsert(New)); -} - -/* - dPrintf(D_M_RTMP_LOW, D_L_ROUTING, ("%s : %04d : NOT FOUND\n", - "rt_blookup", NetNumber)); - * Routing table btree deletion routine - * - */ - -RT_entry *rt_bdelete (at_net_al NetStop, __unused at_net_al NetStart) -{ - - RT_entry *rt_found, *pprevious = NULL, *pnext, *pnextl, *psub; - at_net_al LowEnd; - - rt_found = &RT_table_start; - - dPrintf(D_M_RTMP_LOW, D_L_ROUTING, ("%s : Delete %d-%d\n", - "rt_bdelete", NetStart, NetStop)); - - while (rt_found) { - - if (NetStop > rt_found->NetStop) { - pprevious = rt_found; - rt_found = rt_found->right; - continue; - } - else { - - /* non extended nets cases */ - - if (rt_found->NetStart) - LowEnd = rt_found->NetStart; - else - LowEnd = rt_found->NetStop; - - if (NetStop < LowEnd) { - pprevious = rt_found; - rt_found = rt_found->left; - continue; - } - - /* we're in the range (either extended or not) - * return the entry found. - */ - - break; - } - } - - dPrintf(D_M_RTMP, D_L_ROUTING, ("%s : Delete %d-%d found to delete %d-%d\n", - "rt_bdelete", NetStart, NetStop, rt_found->NetStart,rt_found->NetStop)); - - if (rt_found) { - - - - /* we found the entry, now reorg the sub-trees - * spanning from our node. - */ - - if ((pnext = rt_found->right)) { - - /* Tree pruning: take the left branch of the current - * node and place it at the lowest left branch - * of the current right branch - */ - - psub = pnext; - - /* walk the Right/Left sub tree from current node */ - - while ((pnextl = psub->left)) - psub = pnextl; - - /* plug the old left tree to the new ->Right leftmost node */ - - psub->left = rt_found->left; - - - } else { /* only left sub-tree, simple case */ - - pnext = rt_found->left; - } - - /* Now, plug the current node sub tree to the good pointer of - * our parent node. - */ - - - if (pprevious->left == rt_found) - pprevious->left = pnext; - else - pprevious->right = pnext; - - /* clean-up entry and add to the free-list */ - - at_state.flags |= AT_ST_RT_CHANGED; - return(rt_found); - } - - else { /* Trying to delete something that doesn't exist? */ - - dPrintf(D_M_RTMP, D_L_WARNING, ("%s : %d NOT Removed\n", - "rt_bdelete", NetStop)); - - return ((RT_entry *)NULL); - } - - -} - - -#if DEBUG -RT_entry *rt_sortedshow(RT_entry *parent); -RT_entry *rt_sortedshow(RT_entry *parent) -{ - RT_entry *me; - - me = parent; - - if (parent == NULL) { - me = &RT_table_start; - while (me) - if (me->left) { - parent = me; - me = me->left; - } -/* parent = parent->parent; */ - } - return (parent); -} - -/* - * debug only: display the contents of the routing table - */ - -void rt_show(void); -void rt_show(void) -{ - RT_entry *ptree; - int i=0; - - ptree = &RT_table[0]; - - while (ptree && i < 600 ) { - if (ptree->NetStop) { - dPrintf(D_M_RTMP_LOW, D_L_VERBOSE, - ("%4d-%4d IR=%d:%d Dist=%d\n", - ptree->NetStop, ptree->NetStart, ptree->NextIRNet, - ptree->NextIRNode, (short)ptree->NetDist)); - } else { - dPrintf(D_M_RTMP_LOW, D_L_VERBOSE, - ("%04d : * FREE ENTRY\n", i)); - } - ptree++; - i++; - } -} -#endif /* DEBUG */ - -/* - * prepare the indexing of the free entries in the RTMP table - */ - -int -rt_table_init() -{ - short i; - - if ((RT_table = (RT_entry *)_MALLOC(sizeof(RT_entry)*RT_maxentry, - M_RTABLE, M_WAITOK)) == NULL) { - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmptable: Can't allocate RT_table\n")); - return (ENOMEM); - } - if ((ZT_table = (ZT_entry *)_MALLOC(sizeof(ZT_entry)*ZT_maxentry, - M_RTABLE, M_WAITOK)) == NULL) { - dPrintf(D_M_RTMP, D_L_WARNING, - ("rtmptable: Can't allocate ZT_table\n")); - return (ENOMEM); - } - dPrintf(D_M_RTMP, D_L_STARTUP, ("rt_table_init called\n")); - bzero(&RT_table[0], sizeof(RT_entry)* RT_maxentry); - for (i= 1 ; i < RT_maxentry ; i++) { - (&RT_table[i-1])->right = &RT_table[i]; - } - RT_table_freelist = &RT_table[0]; - - at_state.flags |= AT_ST_RT_CHANGED; - at_state.flags |= AT_ST_ZT_CHANGED; - bzero(&RT_table_start, sizeof(RT_entry)); - - /* also clean up the ZIP table */ - - bzero(&ZT_table[0], sizeof(ZT_entry)* ZT_maxentry); - ErrorRTMPoverflow = 0; - ErrorZIPoverflow = 0; - return(0); -} - -/* - * zt_add_zone: add a zone name in the zone table. - */ - -int -zt_add_zone(name, length) -char *name; -short length; -{ - at_nvestr_t zname; - bcopy(name, &zname.str, length); - zname.len = length; - return (zt_add_zonename(&zname)); -} - -/* - * zt_add_zonename: add a zone name in the zone table. - */ - -int zt_add_zonename(zname) -at_nvestr_t *zname; -{ - register short res,i; - - if ((res = zt_find_zname(zname))) - return(res); - - for (i = 0; i < ZT_maxentry ; i++) { - if (ZT_table[i].ZoneCount == 0 && ZT_table[i].Zone.len == 0) {/* free entry */ - ZT_table[i].Zone = *zname; - dPrintf(D_M_RTMP, D_L_VERBOSE, ("zt_add_zonename: zone #%d %s len=%d\n", - i, ZT_table[i].Zone.str, ZT_table[i].Zone.len)); - at_state.flags |= AT_ST_ZT_CHANGED; - return(i+1); - } - } - /* table full... */ - return (ZT_MAXEDOUT); -} - -/* Adjust zone counts for a removed network entry. - * If the ZoneCount of a zone reaches zero, delete the zone from the zone table - */ -void zt_remove_zones(zmap) -u_char *zmap; -{ - - register u_short i,j, Index; - - for (i=0; i< ZT_BYTES ; i++) { - - if (zmap[i]) { - for (j=0; j < 8 ; j++) - if ((zmap[i] << j) & 0x80) { - Index = i*8 + j; /* get the index in ZT */ - /* 1-23-97 this routine caused a crash once, presumably - zmap bits beyond ZT_table size got set somehow. - prevent that here - */ - if (Index >= ZT_maxentry) { - dPrintf(D_M_RTMP, D_L_ERROR, - ("zt_remove_zones: index (%d) GT ZT_maxentry (%d) (zmap:%d)\n", - Index,ZT_maxentry,i)); - return; - } - dPrintf(D_M_RTMP, D_L_VERBOSE, - ("zt_remove_zones: zone #%d %s was=%d\n", Index, - ZT_table[Index].Zone.str, ZT_table[Index].ZoneCount)); - if (ZT_table[Index].ZoneCount > 0) - ZT_table[Index].ZoneCount--; - if (ZT_table[Index].ZoneCount == 0) - ZT_table[Index].Zone.len = 0; - at_state.flags |= AT_ST_ZT_CHANGED; - } - } - } -} - - - -/* - * zt_compute_hash: compute hash index from the zone name string - */ -static short zt_compute_hash(at_nvestr_t *); - -static short zt_compute_hash(zname) -at_nvestr_t *zname; -{ - register u_short checksum=0, i; - register char c1; - - /* apply the upper name + DDP checksum algorithm */ - - for (i= 0 ; i < zname->len; i++) { - - /* upperize the character */ - - c1 = zname->str[i]; - if (c1 >= 'a' && c1 <= 'z') - c1 += 'A' - 'a'; - if (c1 & 0x80) - c1 = upshift8(c1); - - /* DDP Checksum */ - - checksum += c1; - checksum = ((checksum & 0x8000) ? - (checksum << 1 | 1) : (checksum << 1)); - } - - dPrintf(D_M_RTMP_LOW, D_L_ROUTING, ("zt_comphash: value computed for zone=%s h=%d\n", - zname->str, checksum)); - - if (checksum) - return (checksum); - else - return (0xffff); - -} - -/* - * zt_upper_zname: translate the name string into uppercase - */ - -#if 0 -void zt_upper_zname(zname) -at_nvestr_t *zname; -{ - register short i; - register char c1; - - for (i= 0 ; i < zname->len; i++) { - - c1 = zname->str[i]; - if (c1 >= 'a' && c1 <= 'z') - c1 += 'A' - 'a'; - if (c1 & 0x80) - c1 = upshift8(c1); - - zname->str[i] = c1; - } -} -#endif - -/* - * zt_get_zmcast: calcularte the zone multicast address for a - * given zone name. - * Returns the result in "buffer" - */ - -int -zt_get_zmcast(ifID, zname, buffer) - at_ifaddr_t *ifID; /* we want to know the media type */ - at_nvestr_t *zname; /* source name for multicast address */ - char *buffer; /* resulting Zone Multicast address */ -{ - u_short h; - - h = zt_compute_hash(zname); - -/* - * Find a nice way to decide if it is TokenRing or Ethernet for - * the Multicast address computation.... - */ - - if (ifID->aa_ifp->if_type != IFT_ISO88025) { /* token ring */ - - /* Ethernet case */ - - buffer[0] = 0x09; - buffer[1] = 0x00; - buffer[2] = 0x07; - /* no router, use cable multicast */ - if (MULTIHOME_MODE && ifID->ifRouterState == NO_ROUTER ) { - buffer[3] = buffer[4] = buffer[5] = 0xff; - } - else { - buffer[3] = 0x00; - buffer[4] = 0x00; - buffer[5] = h % 0xFD; - } - dPrintf(D_M_RTMP_LOW, D_L_ROUTING, ("zt_get_multi: computed for h=%d %x %x\n", - h, *(u_int *)&buffer[0], *(u_short *)&buffer[4])); - - return(6); /* returns Multicast address length */ - - } - else { - /* assume it is token ring: note for the magic number computation, - * first see Inside Mac Page 3-10, there is 20 multicast addresses - * for TLAP, and they are from 0xC000 0000 0008 00 to 0xC000 0200 0000 00 - */ - buffer[0] = 0xC0; - buffer[1] = 0x00; - *(u_int *)&buffer[2] = 1 << ((h % 19) + 11); - dPrintf(D_M_RTMP, D_L_WARNING,("zt_get_multi: BROAD not found forr h=%d \n", - h)); - return(6); - } - - - -} - -/* - * zt_ent_zindex: return the first zone index found in the zone map - * return the entry number+1 in the Zone Table, or zero if not found - */ - -int zt_ent_zindex(zmap) -u_char *zmap; -{ - u_short i,j; - - - for (i = 0 ; i < ZT_BYTES ; i++) - - if (zmap[i]) - for (j = 0 ; j < 8 ; j++) - if ((zmap[i] << j) & 0x80) - return (8*i + j +1); - - return (0); -} -/* - * zt_ent_zcount: count the number of actives zone for a routing entry - */ -int -zt_ent_zcount(ent) -RT_entry *ent; -{ - register u_char *zmap; - register u_short i,j; - register int zone_count = 0 ; - - - if (!RT_ALL_ZONES_KNOWN(ent)) - return (0); - zmap = ent->ZoneBitMap; - - for (i = 0 ; i < ZT_BYTES ; i++) { - - if (*zmap) - - for (j = 0 ; j < 8 ; j++) - if ((*zmap << j) & 0x80) - zone_count++; - zmap++; - } - - return (zone_count); -} - -/* - * zt_find_zname: match a zone name in the zone table and return the entry if found - */ -int -zt_find_zname(zname) -at_nvestr_t *zname; -{ - register short i, j, found; - register char c1, c2; - - - if (!zname->len) - return(0); - - for (i = 0 ; i < ZT_maxentry ; i++) { - if (!ZT_table[i].ZoneCount || zname->len != ZT_table[i].Zone.len) - continue; - - found = 1; /* did we get the right one? */ - - for (j = 0 ; j < zname->len ; j++) { - c1 = zname->str[j]; - c2 = ZT_table[i].Zone.str[j]; - if (c1 >= 'a' && c1 <= 'z') - c1 += 'A' - 'a'; - if (c2 >= 'a' && c2 <= 'z') - c2 += 'A' - 'a'; - if (c1 & 0x80) - c1 = upshift8(c1); - if (c2 & 0x80) - c2 = upshift8(c2); - if (c1 != c2) { - found = 0; - break; - } - } - - if (found) - return (i+1); - } - - return(0); -} - - -/* - * zt_set_zmap: set a bit for the corresponding zone map in an entry bitmap - */ -void zt_set_zmap(znum, zmap) - u_short znum; - unsigned char *zmap; -{ - register u_short num = znum -1; - - if (!(zmap[num >> 3] & 0x80 >> (num % 8))) { - zmap[num >> 3] |= 0x80 >> (num % 8); - ZT_table[num].ZoneCount++; - } -} - - -/* - * zt_clr_zmap: clear a bit for the corresponding zone map in an entry bitmap - */ -#if 0 -void zt_clr_zmap(znum, zmap) - u_short znum; - char *zmap; -{ - register u_short num = znum -1; - - if (zmap[num >> 3] & 0x80 >> (num % 8)) { - zmap[num >> 3] ^= 0x80 >> (num % 8); - ZT_table[num].ZoneCount--; - } -} -#endif - -/* - * routing_needed : - * This function performs the actual lookup and forward of packets - * send to the box for routing. - * - * The destination network is looked up in our tables, and if we - * know the next IR to send the packet to, we forward the packet - * on the right port. - * - * If the destination is unknown, we simply dump the packet. - */ - -void routing_needed(mp, ifID, bypass) - gbuf_t *mp; - at_ifaddr_t *ifID; - char bypass; /* set by special socket handlers */ -{ - - register at_ddp_t *ddp; - register int msgsize; - register RT_entry *Entry; - register gbuf_t *tmp_m; - - /* first check the interface is up and forwarding */ - - if (!ifID) { - dPrintf(D_M_RTMP, D_L_WARNING, - ("routing_needed: non valid IFID!\n")); - gbuf_freel(mp); - return; - } - if ((ifID->ifRoutingState < PORT_ONLINE)) { - dPrintf(D_M_RTMP, D_L_WARNING, - ("routing_needed: port %d not online yet\n", - ifID->ifPort)); - gbuf_freel(mp); - return; - } - - ddp = (at_ddp_t *)gbuf_rptr(mp); - msgsize = DDPLEN_VALUE(ddp); - for (tmp_m = gbuf_next(mp); tmp_m; tmp_m = gbuf_next(tmp_m)) - msgsize += DDPLEN_VALUE(((at_ddp_t *)gbuf_rptr(tmp_m))); - - if (ddp->hopcount++ > 15) { - dPrintf(D_M_RTMP, D_L_WARNING, - ("routing_needed: drop packet for %d:%d, hopcount too high\n", - NET_VALUE(ddp->dst_net), ddp->dst_node)); - gbuf_freel(mp); - snmpStats.dd_hopCount++; - return; /* was return(1); */ - } - - if ((Entry = rt_blookup(NET_VALUE(ddp->dst_net)))) { - - dPrintf(D_M_RTMP_LOW, D_L_ROUTING, - ("routing_needed: FOUND for %d.%d p=%d to %d.%d \n", - NET_VALUE(ddp->dst_net), ddp->dst_node, ifID->ifPort, - Entry->NextIRNet, Entry->NextIRNode)); - - /* somehow, come to that point... */ - - /* if multihomed - need to set source address to the interface - * the packet is being sent from. - */ - if (MULTIHOME_MODE) { - NET_ASSIGN(ddp->src_net, ifID_table[Entry->NetPort]->ifThisNode.s_net); - ddp->src_node = ifID_table[Entry->NetPort]->ifThisNode.s_node; - } - - ifID->ifStatistics.fwdPkts++; - ifID->ifStatistics.fwdBytes += msgsize; - - if (Entry->NetDist) /* net not directly connected */ - ddp_router_output(mp, ifID_table[Entry->NetPort], AT_ADDR, - Entry->NextIRNet, Entry->NextIRNode, 0); - else {/* we are directly on this net */ - - /* we want to avoid duplicating broadcast packet on the same net, - * but special sockets handlers are ok to do that (mainly for - * for loopback purpose). So, if the "bypass" flag is set, we don't - * check for that test... [Problem was "movietalk"]. - */ - - if (bypass || ifID_table[Entry->NetPort] != ifID) - ddp_router_output(mp, ifID_table[Entry->NetPort], AT_ADDR, - NET_VALUE(ddp->dst_net), ddp->dst_node, 0); - else { - dPrintf(D_M_RTMP, D_L_ROUTING, - ("routing_needed: bad loopback for add %d.%d from port %d (%d.%d)\n", - NET_VALUE(ddp->dst_net), ddp->dst_node, ifID->ifPort, - NET_VALUE(ddp->src_net), ddp->src_node)); - ifID->ifStatistics.droppedPkts++; - ifID->ifStatistics.droppedBytes += msgsize; - - gbuf_freel(mp); - return; /* was return (2); */ - } - - - } - } - else { - dPrintf(D_M_RTMP, D_L_ROUTING, - ("routing_needed: NOT FOUND for add %d.%d from port %d our %d.%d\n", - NET_VALUE(ddp->dst_net), ddp->dst_node, ifID->ifPort, - ifID_home->ifThisNode.s_net, - ifID_home->ifThisNode.s_node)); - - ifID->ifStatistics.droppedPkts++; - ifID->ifStatistics.droppedBytes += msgsize; - snmpStats.dd_noRoutes++; - - gbuf_freel(mp); - return; /* was return (2); */ - - } - /* return(0); */ -} /* routing_needed */ - -ZT_entryno *zt_getNextZone(first) - int first; - /* a call made with first = TRUE returns the first valid entry in - the ZT_table, if first != TRUE, then then each call returns the - next valid entry in the table. The next call after the last - valid entry was read returns NULL - */ -{ - int i; - static int idx=0; - static ZT_entryno zte; - - if (!ZT_table) - return NULL; - - if (first) - idx=0; - - for (i=idx; iEntryState != RTE_STATE_UNUSED) { - size = i; - return(i); - } - return(0); -} - -int -getZipTableSize(void) -{ - register int i; - register ZT_entry *zt; - static int size=0; - - if (!(at_state.flags & AT_ST_ZT_CHANGED)) - return(size); - - for (i=ZT_maxentry,zt = &ZT_table[ZT_maxentry-1]; i; i--,zt--) - if (zt->ZoneCount) { - size = i; - return(i); - } - return(0); -} - -void -getRtmpTable(d,s,c) - RT_entry *d; /* destination */ - int s; /* starting entry */ - int c; /* # entries to copy */ -{ - register int i,n=0; - register RT_entry *rt; - - for(i=s,rt=&RT_table[s]; iEntryState != RTE_STATE_UNUSED) { - *d++ = *rt; - n++; - } -} - -void -getZipTable(d,s,c) - ZT_entry *d; /* destination */ - int s; /* starting entry */ - int c; /* # entries to copy */ -{ - - bcopy(&ZT_table[s], d, c*sizeof(ZT_entry)); -} - -at_nvestr_t *getRTRLocalZone(ifz) - zone_usage_t *ifz; -{ - unsigned char *zmap = NULL; - RT_entry *route; - int i, j, index; - int zcnt=0; /* zone we're pointing to in the list */ - char zonesChecked[ZT_BYTES]; - at_ifaddr_t *ifID; - - if (ifz->zone_index < 0) { - return((at_nvestr_t*)NULL); - } - bzero(zonesChecked,sizeof(zonesChecked)); - TAILQ_FOREACH(ifID, &at_ifQueueHd, aa_link) { - if (!(route = rt_blookup(ifID->ifThisNode.s_net))) { - return((at_nvestr_t*)NULL); - } - zmap=route->ZoneBitMap; - dPrintf(D_M_RTMP_LOW, D_L_USR1, - ("getRTRLocal: i/f %s, net:%d\n",ifID->ifName, - ifID->ifThisNode.s_net)); - for (i = 0 ; i < ZT_BYTES; i++) { - if (zmap[i]) { - for (j = 0; j < 8 ; j++) - if ( (zmap[i] & (0x80 >> j)) && - !(zonesChecked[i] & (0x80 >> j)) - ) { - zonesChecked[i] |= (0x80 >> j); - if (ifz->zone_index == zcnt) { - index = i * 8 + j; - getIfUsage(index, &ifz->zone_iflist); - ifz->zone_name = ZT_table[index].Zone; - dPrintf(D_M_RTMP_LOW, D_L_USR1, - ("getRTRLocal:zmap:%8x zcnt:%d\n", - *(int*)zmap, zcnt)); - ifz->zone_index = index+1; - return(&ZT_table[index].Zone); - } - zcnt++; - } - } - } - } - dPrintf(D_M_RTMP_LOW, D_L_USR1, - ("getRTRLocal: returning NULL last ent:%d net:%d zmap:%08x\n", - (ifID ? ifID->ifPort : 0), - (ifID ? ifID->ifThisNode.s_net : 0),*(int*)zmap)); - ifz->zone_name.len = 0; - return((at_nvestr_t*)NULL); -} /* getRTRLocalZone */ - -void getIfUsage(zone, ifs_in_zone) - int zone; - at_ifnames_t *ifs_in_zone; - -/* sets the interface name in each element of the array for each I/F in the - requested zone. The array has a 1:1 correspondence with the - ifID_table. Zone is assumed to be valid and local, so if we're in - single port mode, we'll set the home port and thats it. -*/ -{ - u_int zmi; /* zone map index for zone */ - u_char zmb; /* zone map bit mask for zone */ - RT_entry *route; - int cnt=0; - at_ifaddr_t *ifID; - - if (!MULTIPORT_MODE) { - strlcpy(ifs_in_zone->at_if[cnt], ifID_home->ifName, - IFNAMESIZ); - return; - } - bzero(ifs_in_zone, sizeof(at_ifnames_t)); - zmi = zone>>3; - zmb = 0x80>>(zone % 8); - dPrintf(D_M_NBP_LOW, D_L_USR3, ("get_ifs znum:%d zmi%d zmb:%x\n", - zone, zmi, zmb)); - TAILQ_FOREACH(ifID, &at_ifQueueHd, aa_link) { - if (!(route = rt_blookup(ifID->ifThisNode.s_net))) - return; - if (route->ZoneBitMap[zmi] & zmb) { - dPrintf(D_M_NBP_LOW, D_L_USR3, ("zone in port %d \n", - route->NetPort)); - strlcpy(ifs_in_zone->at_if[cnt], - ifID_table[route->NetPort]->ifName, IFNAMESIZ); - cnt++; - } - } - return; -} /* getIfUsage */ diff --git a/bsd/netat/ddp_sip.c b/bsd/netat/ddp_sip.c deleted file mode 100644 index cc4b47495..000000000 --- a/bsd/netat/ddp_sip.c +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/* - * Copyright (c) 1988, 1989 Apple Computer, Inc. - * - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - */ - -#ifndef lint -/* static char sccsid[] = "@(#)sip.c: 2.0, 1.3; 10/18/93; Copyright 1988-89, Apple Computer, Inc."; */ -#endif /* lint */ - -/****************************************************************/ -/* */ -/* */ -/* S I P */ -/* System Information Protocol */ -/* */ -/* */ -/****************************************************************/ - -/* System Information Protocol -- implemented to handle Responder - * Queries. The queries are ATP requests, but the ATP responses are faked - * here in a DDP level handler routine. The responder socket is always - * the 1st socket in the dynamic socket range (128) and it is assumed - * that the node will be registered on that socket. - * - * In A/UX implementation, this implies that /etc/appletalk program will - * register the node name on socket DDP_SOCKET_1st_DYNAMIC (128). - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include /* nbp.h needs the gbuf definiton */ -#include -#include -#include -#include -#include -#include - -#define SIP_SYSINFO_CMD 1 -#define SIP_DATALINK_CMD 6 - -#define SIP_GOOD_RESPONSE 0x1 -#define SIP_BAD_RESPONSE 0xff - -#define SIP_DRIVER_VERSION 0x0001 -#define SIP_RESPONDER_VERSION 0x0001 - -typedef struct { - u_char response; - u_char unused; - u_short responder_version; -} sip_userbytes_t; - -void sip_input( - gbuf_t *mp, - __unused at_ifaddr_t *ifID) -{ - /* Packets arriving here are actually ATP packets, but since - * A/UX only send dummy responses, we're implementing responder as - * a DDP handler - */ - register at_ddp_t *ddp; - register at_atp_t *atp; - register gbuf_t *tmp; - u_char *resp; - sip_userbytes_t ubytes; - - ddp = (at_ddp_t *)gbuf_rptr(mp); - - /* Make sure the packet we got is an ATP packet */ - if (ddp->type != DDP_ATP) { - gbuf_freem(mp); - return; - } - - /* assuming that the whole packet is in one contiguous buffer */ - atp = (at_atp_t *)ddp->data; - - switch(UAL_VALUE_NTOH(atp->user_bytes)) { - case SIP_SYSINFO_CMD : - /* Sending a response with "AppleTalk driver version" (u_short) - * followed by 14 zeros will pacify the interpoll. - * What? You don't understand what it means to send 14 zeroes? - * Tsk, tsk, look up SIP protocol specs for details!! - */ - if ((tmp = (gbuf_t *)ddp_growmsg(mp, 16)) == NULL) { - /* dont have buffers */ - gbuf_freem(mp); - return; - } - if (tmp == mp) - /* extra space allocated on the same buffer block */ - resp = atp->data; - else - resp = (u_char *)gbuf_rptr(tmp); - bzero(resp, 16); - *(u_short *)resp = htons(SIP_DRIVER_VERSION); - - ubytes.response = SIP_GOOD_RESPONSE; - ubytes.unused = 0; - ubytes.responder_version = htons(SIP_RESPONDER_VERSION); - break; - case SIP_DATALINK_CMD : - /* In this case, the magic spell is to send 2 zeroes after - * the "AppleTalk driver version". - */ - if ((tmp = (gbuf_t *)ddp_growmsg(mp, 4)) == NULL) { - /* dont have buffers */ - gbuf_freem(mp); - return; - } - if (tmp == mp) - /* extra space allocated on the same buffer block */ - resp = atp->data; - else - resp = (u_char *)gbuf_rptr(tmp); - bzero(resp, 16); - *(u_short *)resp = htons(SIP_DRIVER_VERSION); - - ubytes.response = SIP_GOOD_RESPONSE; - ubytes.unused = 0; - ubytes.responder_version = htons(SIP_RESPONDER_VERSION); - break; - default : - /* bad request, send a bad command response back */ - ubytes.response = SIP_BAD_RESPONSE; - ubytes.unused = 0; - ubytes.responder_version = htons(SIP_RESPONDER_VERSION); - } - - NET_NET(ddp->dst_net, ddp->src_net); - ddp->dst_node = ddp->src_node; - ddp->dst_socket = ddp->src_socket; - UAL_ASSIGN_HTON(atp->user_bytes, &ubytes); - atp->cmd = ATP_CMD_TRESP; - atp->eom = 1; - atp->sts = 0; - atp->bitmap = 0; - - (void)ddp_output(&mp, DDP_SOCKET_1st_DYNAMIC, FALSE); - return; -} /* sip_input */ diff --git a/bsd/netat/ddp_usrreq.c b/bsd/netat/ddp_usrreq.c deleted file mode 100644 index 17bddda8a..000000000 --- a/bsd/netat/ddp_usrreq.c +++ /dev/null @@ -1,344 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1998 Apple Computer, Inc. - */ - -/* ddp_usrreq.c - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -extern at_ifaddr_t *ifID_home; -extern int xpatcnt; - -struct atpcb ddp_head; -u_long ddp_sendspace = 600, /* *** what should this value be? *** */ - ddp_recvspace = 50 * (600 + sizeof(struct sockaddr_at)); - -int ddp_pru_control(struct socket *so, u_long cmd, caddr_t data, - struct ifnet *ifp, __unused struct proc *p) -{ - return(at_control(so, cmd, data, ifp)); -} - - -int ddp_pru_attach(struct socket *so, int proto, - __unused struct proc *p) -{ - int error = 0; - struct atpcb *pcb = (struct atpcb *)((so)->so_pcb); - - error = soreserve(so, ddp_sendspace, ddp_recvspace); - if (error != 0) - return error; - - error = at_pcballoc(so, &ddp_head); - if (error) - return error; - pcb = (struct atpcb *)((so)->so_pcb); - pcb->pid = proc_selfpid(); - pcb->ddptype = (u_char) proto; /* set in socreate() */ - pcb->proto = ATPROTO_DDP; - - return error; -} - - -int ddp_pru_disconnect(struct socket *so) -{ - - int error = 0; - struct atpcb *pcb = (struct atpcb *)((so)->so_pcb); - - if (pcb == NULL) - return (EINVAL); - - if ((so->so_state & SS_ISCONNECTED) == 0) - return ENOTCONN; - - soisdisconnected(so); - at_pcbdetach(pcb); - - return error; -} - - -int ddp_pru_abort(struct socket *so) -{ - struct atpcb *pcb = (struct atpcb *)((so)->so_pcb); - - if (pcb == NULL) - return (EINVAL); - - soisdisconnected(so); - at_pcbdetach(pcb); - - return 0; -} - -int ddp_pru_detach(struct socket *so) -{ - struct atpcb *pcb = (struct atpcb *)((so)->so_pcb); - - if (pcb == NULL) - return (EINVAL); - - at_pcbdetach(pcb); - return 0; -} - -int ddp_pru_shutdown(struct socket *so) -{ - struct atpcb *pcb = (struct atpcb *)((so)->so_pcb); - - if (pcb == NULL) - return (EINVAL); - - socantsendmore(so); - return 0; -} - - -int ddp_pru_bind(struct socket *so, struct sockaddr *nam, - __unused struct proc *p) -{ - struct atpcb *pcb = (struct atpcb *)((so)->so_pcb); - - if (pcb == NULL) - return (EINVAL); - - return (at_pcbbind(pcb, nam)); -} - - -int ddp_pru_send(struct socket *so, __unused int flags, struct mbuf *m, - struct sockaddr *addr, __unused struct mbuf *control, - __unused struct proc *p) -{ - at_ddp_t *ddp = NULL; - struct atpcb *pcb = (struct atpcb *)((so)->so_pcb); - - if (pcb == NULL) - return (EINVAL); - - /* - * Set type to MSG_DATA. Otherwise looped back packet is not - * recognized by atp_input() and possibly other protocols. - */ - - MCHTYPE(m, MSG_DATA); - - if (!(pcb->ddp_flags & DDPFLG_HDRINCL)) { - /* prepend a DDP header */ - M_PREPEND(m, DDP_X_HDR_SIZE, M_WAIT); - if (m == NULL) - return ENOBUFS; - ddp = mtod(m, at_ddp_t *); - } - - if (so->so_state & SS_ISCONNECTED) { - if (addr) - return EISCONN; - - if (ddp) { - NET_ASSIGN(ddp->dst_net, pcb->raddr.s_net); - ddp->dst_node = pcb->raddr.s_node; - ddp->dst_socket = pcb->rport; - } - } else { - if (addr == NULL) - return ENOTCONN; - - if (ddp) { - struct sockaddr_at *dst = - (struct sockaddr_at *) addr; - NET_ASSIGN(ddp->dst_net, dst->sat_addr.s_net); - ddp->dst_node = dst->sat_addr.s_node; - ddp->dst_socket = dst->sat_port; - } - } - if (ddp) { - DDPLEN_ASSIGN(ddp, m->m_pkthdr.len); - UAS_ASSIGN_HTON(ddp->checksum, - (pcb->ddp_flags & DDPFLG_CHKSUM)? 1: 0); - ddp->type = (pcb->ddptype)? pcb->ddptype: DEFAULT_OT_DDPTYPE; -#ifdef NOT_YET - NET_ASSIGN(ddp->src_net, pcb->laddr.s_net); - ddp->src_node = pcb->laddr.s_node; - ddp->src_socket = pcb->lport; -#endif - } else { - ddp = mtod(m, at_ddp_t *); - } - if (NET_VALUE(ddp->dst_net) == ATADDR_ANYNET && - ddp->dst_node == ATADDR_BCASTNODE && - (pcb->ddp_flags & DDPFLG_SLFSND)) { - struct mbuf *n; - - if ((n = m_dup(m, M_DONTWAIT))) { - at_ifaddr_t - *ifID = ifID_home, - *ifIDTmp = (at_ifaddr_t *)NULL; - - /* as in ddp_output() loop processing, fill in the - rest of the header */ - ddp = mtod(n, at_ddp_t *); - if (MULTIHOME_MODE && (ifIDTmp = forUs(ddp))) - ifID = ifIDTmp; - NET_ASSIGN(ddp->src_net, ifID->ifThisNode.s_net); - ddp->src_node = ifID->ifThisNode.s_node; - ddp->src_socket = pcb->lport; - if (UAS_VALUE_NTOH(ddp->checksum)) - UAS_ASSIGN_HTON(ddp->checksum, ddp_checksum(m, 4)); - ddp_input(n, ifID); - } - } - return(ddp_output(&m, pcb->lport, FALSE)); -} /* ddp_pru_send */ - -int ddp_pru_sockaddr(struct socket *so, - struct sockaddr **nam) -{ - struct atpcb *pcb; - struct sockaddr_at *sat; - - MALLOC(sat, struct sockaddr_at *, sizeof *sat, M_SONAME, M_WAITOK); - if (sat == NULL) - return(ENOMEM); - bzero((caddr_t)sat, sizeof(*sat)); - - if ((pcb = sotoatpcb(so)) == NULL) { - FREE(sat, M_SONAME); - return(EINVAL); - } - - sat->sat_family = AF_APPLETALK; - sat->sat_len = sizeof(*sat); - sat->sat_port = pcb->lport; - sat->sat_addr = pcb->laddr; - - *nam = (struct sockaddr *)sat; - return(0); -} - - -int ddp_pru_peeraddr(struct socket *so, - struct sockaddr **nam) -{ - struct atpcb *pcb; - struct sockaddr_at *sat; - - MALLOC(sat, struct sockaddr_at *, sizeof *sat, M_SONAME, M_WAITOK); - if (sat == NULL) - return (ENOMEM); - bzero((caddr_t)sat, sizeof(*sat)); - - if ((pcb = sotoatpcb(so)) == NULL) { - FREE(sat, M_SONAME); - return(EINVAL); - } - - sat->sat_family = AF_APPLETALK; - sat->sat_len = sizeof(*sat); - sat->sat_port = pcb->rport; - sat->sat_addr = pcb->raddr; - - *nam = (struct sockaddr *)sat; - return(0); -} - - -int ddp_pru_connect(struct socket *so, struct sockaddr *nam, - __unused struct proc *p) -{ - struct atpcb *pcb = (struct atpcb *)((so)->so_pcb); - struct sockaddr_at *faddr = (struct sockaddr_at *) nam; - - if (pcb != NULL) - return (EINVAL); - - if (xpatcnt == 0) - return (EADDRNOTAVAIL); - - if (faddr->sat_family != AF_APPLETALK) - return (EAFNOSUPPORT); - - pcb->raddr = faddr->sat_addr; - soisconnected(so); - return 0; -} - - -/* - * One-time AppleTalk initialization - */ -void ddp_init() -{ - at_memzone_init(); - ddp_head.atpcb_next = ddp_head.atpcb_prev = &ddp_head; - init_ddp_handler(); - - /* Initialize protocols implemented in the kernel */ - add_ddp_handler(EP_SOCKET, ep_input); - add_ddp_handler(ZIP_SOCKET, zip_router_input); - add_ddp_handler(NBP_SOCKET, nbp_input); - add_ddp_handler(DDP_SOCKET_1st_DYNAMIC, sip_input); - - ddp_start(); - - appletalk_hack_start(); -} /* ddp_init */ diff --git a/bsd/netat/debug.h b/bsd/netat/debug.h deleted file mode 100644 index 475fee7b8..000000000 --- a/bsd/netat/debug.h +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Copyright (c) 1988-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/* netat/debug.h */ - -#ifndef _NETAT_DEBUG_H_ -#define _NETAT_DEBUG_H_ -#include -#ifdef __APPLE_API_OBSOLETE -#ifdef PRIVATE - -#define D_L_FATAL 0x00000001 -#define D_L_ERROR 0x00000002 -#define D_L_WARNING 0x00000004 -#define D_L_INFO 0x00000008 -#define D_L_VERBOSE 0x00000010 -#define D_L_STARTUP 0x00000020 -#define D_L_STARTUP_LOW 0x00000040 -#define D_L_SHUTDN 0x00000080 -#define D_L_SHUTDN_LOW 0x00000100 -#define D_L_INPUT 0x00000200 -#define D_L_OUTPUT 0x00000400 -#define D_L_STATS 0x00000800 -#define D_L_STATE_CHG 0x00001000 /* re-aarp, ifState etc. */ -#define D_L_ROUTING 0x00002000 -#define D_L_DNSTREAM 0x00004000 -#define D_L_UPSTREAM 0x00008000 -#define D_L_STARTUP_INFO 0x00010000 -#define D_L_SHUTDN_INFO 0x00020000 -#define D_L_ROUTING_AT 0x00040000 /* atalk address routing */ -#define D_L_USR1 0x01000000 -#define D_L_USR2 0x02000000 -#define D_L_USR3 0x04000000 -#define D_L_USR4 0x08000000 -#define D_L_TRACE 0x10000000 - - -#define D_M_PAT 0x00000001 -#define D_M_PAT_LOW 0x00000002 -#define D_M_ELAP 0x00000004 -#define D_M_ELAP_LOW 0x00000008 -#define D_M_DDP 0x00000010 -#define D_M_DDP_LOW 0x00000020 -#define D_M_NBP 0x00000040 -#define D_M_NBP_LOW 0x00000080 -#define D_M_ZIP 0x00000100 -#define D_M_ZIP_LOW 0x00000200 -#define D_M_RTMP 0x00000400 -#define D_M_RTMP_LOW 0x00000800 -#define D_M_ATP 0x00001000 -#define D_M_ATP_LOW 0x00002000 -#define D_M_ADSP 0x00004000 -#define D_M_ADSP_LOW 0x00008000 -#define D_M_AEP 0x00010000 -#define D_M_AARP 0x00020000 -#define D_M_ASP 0x00040000 -#define D_M_ASP_LOW 0x00080000 -#define D_M_AURP 0x00100000 -#define D_M_AURP_LOW 0x00200000 -#define D_M_TRACE 0x10000000 - - /* macros for working with atp data at the lap level. - * These are for tracehook performance measurements only!!! - * It is assumed that the ddp & atp headers are at the top of the - * mblk, occupy contiguous memory and the atp headers are of the - * extended type only. - */ - -typedef struct dbgBits { - unsigned long dbgMod; /* debug module bitmap (used in dPrintf) */ - unsigned long dbgLev; /* debug level bitmap */ -} dbgBits_t; - -extern dbgBits_t dbgBits; - - /* macros for debugging */ -#if DEBUG -#define dPrintf(mod, lev, p) \ - if (((mod) & dbgBits.dbgMod) && ((lev) & dbgBits.dbgLev)) {\ - printf p; \ - } -#else -#define dPrintf(mod, lev, p) -#endif - -/* 8/5/98 LD: Adds MacOSX kernel debugging facility */ -/* note: kdebug must be added to the "RELEASE" config in conf/MASTER.ppc */ - -#include -#if KDEBUG -/* - Strings for the "trace/codes" file: - -0x02650004 AT_DDPinput - -0x02680000 AT_ADSP_Misc -0x02680004 AT_ADSP_RxData -0x02680008 AT_ADSP_SndData -0x0268000C AT_ADSP_Read -0x02680010 AT_ADSP_Write -0x02680014 AT_ADSP_mbuf -0x02680018 AT_ADSP_putnext -0x0268001c AT_ADSP_ATrw - -*/ - -/* usage: - KERNEL_DEBUG(DBG_AT_DDP_INPUT | DBG_FUNC_START, 0,0,0,0,0); - KERNEL_DEBUG(DBG_AT_DDP_INPUT, 0,0,0,0,0); - KERNEL_DEBUG(DBG_AT_DDP_INPUT | DBG_FUNC_END, 0,0,0,0,0); -*/ - -#define DBG_AT_DDP_INPUT NETDBG_CODE(DBG_NETDDP, 1) -#define DBG_AT_DDP_OUTPUT NETDBG_CODE(DBG_NETDDP, 2) - -#define DBG_ADSP_MISC NETDBG_CODE(DBG_NETADSP, 0) -#define DBG_ADSP_RCV NETDBG_CODE(DBG_NETADSP, 1) -#define DBG_ADSP_SND NETDBG_CODE(DBG_NETADSP, 2) -#define DBG_ADSP_READ NETDBG_CODE(DBG_NETADSP, 3) -#define DBG_ADSP_WRITE NETDBG_CODE(DBG_NETADSP, 4) -#define DBG_ADSP_MBUF NETDBG_CODE(DBG_NETADSP, 5) -#define DBG_ADSP_PNEXT NETDBG_CODE(DBG_NETADSP, 6) -#define DBG_ADSP_ATRW NETDBG_CODE(DBG_NETADSP, 7) -#endif - -#define trace_mbufs(pri, str, start)\ -{ if (start)\ -{ int _i; gbuf_t *_tmp;\ - for (_tmp=start, _i=0; _tmp && _i < 10; _tmp = gbuf_cont(_tmp), _i++) {\ - dPrintf(pri, D_L_TRACE, ("%s=0x%p, len=%ld %s\n",\ - str, _tmp, gbuf_len(_tmp),\ - (((struct mbuf *)_tmp)->m_flags & M_EXT)?"CL":""));\ - KERNEL_DEBUG(DBG_ADSP_MBUF, 0, _tmp, gbuf_len(_tmp), gbuf_next(_tmp), \ - ((struct mbuf *)_tmp)->m_flags & M_EXT);\ -}}} - -/* from h/atlog.h */ - -/* These pointers are non-NULL if logging or tracing are activated. */ -#ifndef LOG_DRIVER -extern char *log_errp; -extern char *log_trcp; -#endif /* LOG_DRIVER */ - -/* ATTRACE() macro. Use this routine for calling - * streams tracing and logging. If `log' is TRUE, then - * this event will also be logged if logging is on. - */ -#if !defined(lint) && defined(AT_DEBUG) -#define ATTRACE(mid,sid,level,log,fmt,arg1,arg2,arg3) \ - if (log_trcp || (log && log_errp)) { \ - strlog(mid,sid,level,SL_TRACE | \ - (log ? SL_ERROR : 0) | \ - (level <= AT_LV_FATAL ? SL_FATAL : 0), \ - fmt,arg1,arg2,arg3); \ - } -#else -#define ATTRACE(mid,sid,level,log,fmt,arg1,arg2,arg3) \ -/* printf(fmt, arg1, arg2, arg3); */ - -#endif - - -/* Levels for AppleTalk tracing */ - -#define AT_LV_FATAL 1 -#define AT_LV_ERROR 3 -#define AT_LV_WARNING 5 -#define AT_LV_INFO 7 -#define AT_LV_VERBOSE 9 - - -/* Sub-ids for AppleTalk tracing, add more if you can't figure - * out where your event belongs. - */ - -#define AT_SID_INPUT 1 /* Network incoming packets */ -#define AT_SID_OUTPUT 2 /* Network outgoing packets */ -#define AT_SID_TIMERS 3 /* Protocol timers */ -#define AT_SID_FLOWCTRL 4 /* Protocol flow control */ -#define AT_SID_USERREQ 5 /* User requests */ -#define AT_SID_RESOURCE 6 /* Resource limitations */ - - - -/* Module ID's for AppleTalk subsystems */ - -#define AT_MID(n) (200+n) - -/* -#define AT_MID_MISC AT_MID(0) not used -#define AT_MID_LLAP AT_MID(1) not_used -#define AT_MID_ELAP 202 moved to lap.h -#define AT_MID_DDP 203 moved to ddp.h -#define AT_MID_RTMP AT_MID(4) not used -#define AT_MID_NBP AT_MID(5) not used -#define AT_MID_EP AT_MID(6) not used -#define AT_MID_ATP AT_MID(7) not used -#define AT_MID_ZIP AT_MID(8) not needed -#define AT_MID_PAP AT_MID(9) not used -#define AT_MID_ASP AT_MID(10) redefined in adsp.h -#define AT_MID_AFP AT_MID(11) not used -#define AT_MID_ADSP 212 moved to adsp.h -#define AT_MID_NBPD AT_MID(13) not used -#define AT_MID_LAP 214 moved to lap.h -#define AT_MID_LAST 214 -*/ - -#ifdef AT_MID_STRINGS -static char *at_mid_strings[] = { - "misc", - "LLAP", - "ELAP", - "DDP", - "RTMP", - "NBP", - "EP", - "ATP", - "ZIP", - "PAP", - "ASP", - "AFP", - "ADSP", - "NBPD", - "LAP" -}; -#endif - - -#ifndef SL_FATAL -/* Don't define these if they're already defined */ - -/* Flags for log messages */ - -#define SL_FATAL 01 /* indicates fatal error */ -#define SL_NOTIFY 02 /* logger must notify administrator */ -#define SL_ERROR 04 /* include on the error log */ -#define SL_TRACE 010 /* include on the trace log */ - -#endif - -#endif /* PRIVATE */ -#endif /* __APPLE_API_OBSOLETE */ -#endif /* _NETAT_DEBUG_H_ */ - diff --git a/bsd/netat/drv_dep.c b/bsd/netat/drv_dep.c deleted file mode 100644 index 6add8260f..000000000 --- a/bsd/netat/drv_dep.c +++ /dev/null @@ -1,315 +0,0 @@ -/* - * Copyright (c) 2000-2012 Apple, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright 1994 Apple Computer, Inc. - * All Rights Reserved. - * - * Tuyen A. Nguyen. (December 5, 1994) - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DSAP_SNAP 0xaa - -extern int adspInited; - -static llc_header_t snap_hdr_at = SNAP_HDR_AT; -static llc_header_t snap_hdr_aarp = SNAP_HDR_AARP; -static unsigned char snap_proto_ddp[5] = SNAP_PROTO_AT; -static unsigned char snap_proto_aarp[5] = SNAP_PROTO_AARP; - -static void at_input_packet(protocol_family_t protocol, mbuf_t m); - -struct ifqueue atalkintrq; /* appletalk and aarp packet input queue */ - -short appletalk_inited = 0; - -void atalk_load(void); -void atalk_unload(void); - -extern int pktsIn, pktsOut; - - -void atalk_load() -{ - atp_init(); - atp_link(); - adspInited = 0; - -/* adsp_init(); - for 2225395 - this happens in adsp_open and is undone on ADSP_UNLINK -*/ - domain_proto_mtx_unlock(TRUE); - proto_register_input(PF_APPLETALK, at_input_packet, NULL, 0); - domain_proto_mtx_lock(); -} /* atalk_load */ - -/* Undo everything atalk_load() did. */ -void atalk_unload() /* not currently used */ -{ - atp_unlink(); - -#ifdef NOT_YET - { - extern gbuf_t *scb_resource_m; - extern gbuf_t *atp_resource_m; - if (scb_resource_m) { - gbuf_freem(scb_resource_m); - scb_resource_m = 0; - scb_free_list = 0; - } - /* allocated in atp_trans_alloc() */ - if (atp_resource_m) { - gbuf_freem(atp_resource_m); - atp_resource_m = 0; - atp_trans_free_list = 0; - } - } -#endif - - appletalk_inited = 0; -} /* atalk_unload */ - -void appletalk_hack_start(void) -{ - if (!appletalk_inited) { - atalk_load(); - atalkintrq.ifq_maxlen = IFQ_MAXLEN; - appletalk_inited = 1; - } -} /* appletalk_hack_start */ - -int pat_output(patp, mlist, dst_addr, type) - at_ifaddr_t *patp; - struct mbuf *mlist; /* packet chain */ - unsigned char *dst_addr; /* for atalk addr - net # must be in network byte order */ - int type; -{ - struct mbuf *m, *m1; - llc_header_t *llc_header; - struct sockaddr dst; - - if (! patp->aa_ifp) { - for (m = mlist; m; m = mlist) { - mlist = m->m_nextpkt; - m->m_nextpkt = 0; - m_freem(m); - } - return ENOTREADY; - } - - /* this is for ether_output */ - dst.sa_family = AF_APPLETALK; - dst.sa_len = 2 + sizeof(struct etalk_addr); - bcopy (dst_addr, &dst.sa_data[0], sizeof(struct etalk_addr)); - - /* packet chains are used on output and can be tested using aufs */ - for (m = mlist; m; m = mlist) { - mlist = m->m_nextpkt; - m->m_nextpkt = 0; - - M_PREPEND(m, sizeof(llc_header_t), M_DONTWAIT); - if (m == 0) { - continue; - } - - llc_header = mtod(m, llc_header_t *); - *llc_header = - (type == AARP_AT_TYPE) ? snap_hdr_aarp : snap_hdr_at; - - for (m->m_pkthdr.len = 0, m1 = m; m1; m1 = m1->m_next) - m->m_pkthdr.len += m1->m_len; - m->m_pkthdr.rcvif = 0; - - /* *** Note: AT is sending out mbufs of type MSG_DATA, - not MT_DATA. *** */ -#ifdef APPLETALK_DEBUG - if (m->m_next && - !((m->m_next)->m_flags & M_EXT)) - kprintf("po: mlen= %d, m2len= %d\n", m->m_len, - (m->m_next)->m_len); -#endif - atalk_unlock(); - dlil_output(patp->aa_ifp, PF_APPLETALK, m, NULL, &dst, 0, NULL); - atalk_lock(); - - pktsOut++; - } - - return 0; -} /* pat_output */ - -static void -at_input_packet( - __unused protocol_family_t protocol, - mbuf_t m) -{ - struct mbuf *m1; - struct ifnet *ifp; - llc_header_t *llc_header; - at_ifaddr_t *ifID; - char src[6]; - enet_header_t *enet_header; - - if (!appletalk_inited) { - m_freem(m); - return; - } - - if ((m->m_flags & M_PKTHDR) == 0) { -#ifdef APPLETALK_DEBUG - kprintf("atalkintr: no HDR on packet received"); -#endif - m_freem(m); - return; - } - - /* make sure the interface this packet was received on is configured - for AppleTalk */ - ifp = m->m_pkthdr.rcvif; - TAILQ_FOREACH(ifID, &at_ifQueueHd, aa_link) { - if (ifID->aa_ifp && (ifID->aa_ifp == ifp)) - break; - } - /* if we didn't find a matching interface */ - if (!ifID) { - m_freem(m); - return; /* was EAFNOSUPPORT */ - } - - /* make sure the entire packet header is in the current mbuf */ - if (m->m_len < ENET_LLC_SIZE && - (m = m_pullup(m, ENET_LLC_SIZE)) == 0) { -#ifdef APPLETALK_DEBUG - kprintf("atalkintr: packet too small\n"); -#endif - m_freem(m); - return; - } - enet_header = mtod(m, enet_header_t *); - - /* Ignore multicast packets from local station */ - /* *** Note: code for IFTYPE_TOKENTALK may be needed here. *** */ - if (ifID->aa_ifp->if_type == IFT_ETHER || - ifID->aa_ifp->if_type == IFT_L2VLAN || - ifID->aa_ifp->if_type == IFT_IEEE8023ADLAG) { - bcopy((char *)enet_header->src, src, sizeof(src)); - -#ifdef COMMENT /* In order to receive packets from the Blue Box, we cannot - reject packets whose source address matches our local address. - */ - if ((enet_header->dst[0] & 1) && - (bcmp(src, ifID->xaddr, sizeof(src)) == 0)) { - /* Packet rejected: think it's a local mcast. */ - m_freem(m); - return; /* was EAFNOSUPPORT */ - } -#endif /* COMMENT */ - - llc_header = (llc_header_t *)(enet_header+1); - - /* advance the mbuf pointers past the ethernet header */ - m->m_data += ENET_LLC_SIZE; - m->m_len -= ENET_LLC_SIZE; - - pktsIn++; - - if (LLC_PROTO_EQUAL(llc_header->protocol,snap_proto_aarp)) { - (void)aarp_rcv_pkt(mtod(m, aarp_pkt_t *), ifID); - m_freem(m); - } - else if (LLC_PROTO_EQUAL(llc_header->protocol, snap_proto_ddp)) { - /* if we're a router take all pkts */ - if (!ROUTING_MODE) { - if (aarp_chk_addr(mtod(m, at_ddp_t *), ifID) - == AARP_ERR_NOT_OURS) { -#ifdef APPLETALK_DEBUG - kprintf("pat_input: Packet Rejected: not for us? dest=%x.%x.%x.%x.%x.%x LLC_PROTO= %02x%02x\n", - enet_header->dst[0], enet_header->dst[1], - enet_header->dst[2], enet_header->dst[3], - enet_header->dst[4], enet_header->dst[5], - llc_header->protocol[3], - llc_header->protocol[4]); -#endif - m_freem(m); - return; /* was EAFNOSUPPORT */ - } - } - MCHTYPE(m, MSG_DATA); /* set the mbuf type */ - - ifID->stats.rcv_packets++; - for (m1 = m; m1; m1 = m1->m_next) - ifID->stats.rcv_bytes += m1->m_len; - - if (!MULTIPORT_MODE) - ddp_glean(m, ifID, (struct etalk_addr *)src); - - ddp_input(m, ifID); - } else { -#ifdef APPLETALK_DEBUG - kprintf("pat_input: Packet Rejected: wrong LLC_PROTO = %02x%02x\n", - llc_header->protocol[3], - llc_header->protocol[4]); -#endif - m_freem(m); - } - } -} diff --git a/bsd/netat/lap.h b/bsd/netat/lap.h deleted file mode 100644 index bcedbfbcc..000000000 --- a/bsd/netat/lap.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1988, 1989 Apple Computer, Inc. - */ - -/* Definitions for generic access to AppleTalk link level protocols. - */ - -#ifndef _NETAT_LAP_H_ -#define _NETAT_LAP_H_ -#include - -#ifdef __APPLE_API_OBSOLETE - -#define AT_MID_ELAP 202 - -/* elap ioctl's */ - -#define ELAP_IOC_MYIOCTL(i) ((i>>8) == AT_MID_ELAP) -#define ELAP_IOC_GET_STATS ((AT_MID_ELAP<<8) | 2) - -#define AT_MID_LAP 214 - -/* Generic LAP ioctl's. Each LAP may implement other ioctl's specific to - * its functionality. - */ -#define LAP_IOC_MYIOCTL(i) ((i>>8) == AT_MID_LAP) -#define LAP_IOC_ADD_ROUTE ((AT_MID_LAP<<8) | 9) -#define LAP_IOC_GET_ZONE ((AT_MID_LAP<<8) | 12) -#define LAP_IOC_GET_ROUTE ((AT_MID_LAP<<8) | 13) -#define LAP_IOC_SNMP_GET_CFG ((AT_MID_LAP<<8) | 21) -#define LAP_IOC_SNMP_GET_AARP ((AT_MID_LAP<<8) | 22) -#define LAP_IOC_SNMP_GET_RTMP ((AT_MID_LAP<<8) | 23) -#define LAP_IOC_SNMP_GET_ZIP ((AT_MID_LAP<<8) | 24) -#define LAP_IOC_SNMP_GET_DDP ((AT_MID_LAP<<8) | 25) -#define LAP_IOC_SNMP_GET_NBP ((AT_MID_LAP<<8) | 26) -#define LAP_IOC_SNMP_GET_PORTS ((AT_MID_LAP<<8) | 27) - -#ifdef NOT_USED - -#define ELAP_IOC_GET_CFG ((AT_MID_ELAP<<8) | 1) /* not used */ -#define ELAP_IOC_SET_CFG ((AT_MID_ELAP<<8) | 3) /* not used */ -#define ELAP_IOC_SET_ZONE ((AT_MID_ELAP<<8) | 4) /* not used */ -#define ELAP_IOC_SWITCHZONE ((AT_MID_ELAP<<8) | 5) /* not used */ - -#define LAP_IOC_ONLINE ((AT_MID_LAP<<8) | 1) /* not used */ -#define LAP_IOC_OFFLINE ((AT_MID_LAP<<8) | 2) /* not used */ -#define LAP_IOC_GET_IFS_STAT ((AT_MID_LAP<<8) | 3) /* not used */ -#define LAP_IOC_ADD_ZONE ((AT_MID_LAP<<8) | 4) /* not used */ -#define LAP_IOC_ROUTER_START ((AT_MID_LAP<<8) | 5) /* not used */ -#define LAP_IOC_ROUTER_SHUTDOWN ((AT_MID_LAP<<8) | 6) /* not used */ -#define LAP_IOC_ROUTER_INIT ((AT_MID_LAP<<8) | 7) /* not used */ -#define LAP_IOC_GET_IFID ((AT_MID_LAP<<8) | 8) /* not used */ -#define LAP_IOC_GET_DBG ((AT_MID_LAP<<8) | 10) /* not used */ -#define LAP_IOC_SET_DBG ((AT_MID_LAP<<8) | 11) /* not used */ -#define LAP_IOC_ADD_IFNAME ((AT_MID_LAP<<8) | 14) /* not used */ -#define LAP_IOC_DO_DEFER ((AT_MID_LAP<<8) | 15) /* not used */ -#define LAP_IOC_DO_DELAY ((AT_MID_LAP<<8) | 16) /* not used */ -#define LAP_IOC_SHUT_DOWN ((AT_MID_LAP<<8) | 17) /* not used */ -#define LAP_IOC_CHECK_STATE ((AT_MID_LAP<<8) | 18) /* not used */ -#define LAP_IOC_DEL_IFNAME ((AT_MID_LAP<<8) | 19) /* not used */ -#define LAP_IOC_SET_MIX ((AT_MID_LAP<<8) | 20) /* not used */ -#define LAP_IOC_SET_LOCAL_ZONES ((AT_MID_LAP<<8) | 28) /* not used */ -#define LAP_IOC_GET_LOCAL_ZONE ((AT_MID_LAP<<8) | 29) /* not used */ -#define LAP_IOC_IS_ZONE_LOCAL ((AT_MID_LAP<<8) | 30) /* not used */ -#define LAP_IOC_GET_MODE ((AT_MID_LAP<<8) | 31) /* not used */ -#define LAP_IOC_GET_IF_NAMES ((AT_MID_LAP<<8) | 32) /* not used */ -#define LAP_IOC_GET_DEFAULT_ZONE ((AT_MID_LAP<<8) | 33) /* not used */ -#define LAP_IOC_SET_DEFAULT_ZONES ((AT_MID_LAP<<8) | 34) /* not used */ - -#endif /* NOT_USED */ - -#endif /* __APPLE_API_OBSOLETE */ -#endif /* _NETAT_LAP_H_ */ - diff --git a/bsd/netat/nbp.h b/bsd/netat/nbp.h deleted file mode 100644 index 39c49daef..000000000 --- a/bsd/netat/nbp.h +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * - * - * ORIGINS: 82 - * - * (C) COPYRIGHT Apple Computer, Inc. 1992-1996 - * All Rights Reserved - * - */ -/* - * Copyright (c) 1988, 1989 Apple Computer, Inc. - * - * The information contained herein is subject to change without - * notice and should not be construed as a commitment by Apple - * Computer, Inc. Apple Computer, Inc. assumes no responsibility - * for any errors that may appear. - * - * Confidential and Proprietary to Apple Computer, Inc. - */ -/* - * Title: nbp.h - * - * Facility: Include file for NBP kernel module. - * - * Author: Kumar Vora, Creation Date: May-1-1989 - * - * History: - * X01-001 Kumar Vora May-1-1989 - * Initial Creation. - */ - -#ifndef _NETAT_NBP_H_ -#define _NETAT_NBP_H_ -#include - -#ifdef __APPLE_API_OBSOLETE - -/* NBP packet types */ - -#define NBP_BRRQ 0x01 /* Broadcast request */ -#define NBP_LKUP 0x02 /* Lookup */ -#define NBP_LKUP_REPLY 0x03 /* Lookup reply */ -#define NBP_FWDRQ 0x04 /* Forward Request (router only) */ - -/* *** the following may be discontinued in the near future *** */ - -#define NBP_CONFIRM 0x09 /* Confirm, not sent on wire */ - -#ifdef NOT_USED -#define NBP_REGISTER 0x07 /* Register a name */ -#define NBP_DELETE 0x08 /* Delete a name */ -#define NBP_STATUS_REPLY 0x0a /* Status on register/delete */ -#define NBP_CLOSE_NOTE 0x0b /* Close notification from DDP */ -#endif - -/* *** **************************************************** *** */ - -/* Protocol defaults */ - -#define NBP_RETRY_COUNT 8 /* Maximum repeats */ -#define NBP_RETRY_INTERVAL 1 /* Retry timeout */ - -/* Special (partial) wildcard character */ -#define NBP_SPL_WILDCARD 0xC5 -#define NBP_ORD_WILDCARD '=' - -/* Packet definitions */ - -#define NBP_TUPLE_MAX 15 /* Maximum number of tuples in one DDP packet */ -#define NBP_HDR_SIZE 2 - -typedef struct at_nbp { -#if BYTE_ORDER == BIG_ENDIAN - unsigned - control : 4, - tuple_count : 4; -#endif -#if BYTE_ORDER == LITTLE_ENDIAN - unsigned - tuple_count : 4, - control : 4; -#endif - u_char at_nbp_id; - at_nbptuple_t tuple[NBP_TUPLE_MAX]; -} at_nbp_t; - -#define DEFAULT_ZONE(zone) (!(zone)->len || ((zone)->len == 1 && (zone)->str[0] == '*')) - -#ifdef KERNEL_PRIVATE - -/* Struct for name registry */ -typedef struct _nve_ { - TAILQ_ENTRY(_nve_) nve_link; /* tailq macro glue */ - gbuf_t *tag; /*pointer to the parent gbuf_t*/ - /* *** there's no reason why tag has to - be an mbuf *** */ - at_nvestr_t zone; - u_int zone_hash; - at_nvestr_t object; - u_int object_hash; - at_nvestr_t type; - u_int type_hash; - at_inet_t address; - u_char ddptype; - u_char enumerator; - int pid; - long unique_nbp_id; /* long to be compatible with OT */ -} nve_entry_t; - -#define NBP_WILD_OBJECT 0x01 -#define NBP_WILD_TYPE 0x02 -#define NBP_WILD_MASK 0x03 - -struct nbp_req; -typedef struct nbp_req nbp_req_t; -struct nbp_req { - int (*func)(nbp_req_t *, nve_entry_t *); - gbuf_t *response; /* the response datagram */ - int space_unused; /* Space available in the resp */ - /* packet. */ - gbuf_t *request; /* The request datagram */ - /* Saved for return address */ - nve_entry_t nve; - u_char flags; /* Flags to indicate whether or */ - /* not the request tuple has */ - /* wildcards in it */ -}; - -extern int nbp_insert_entry(nve_entry_t *); -extern u_int nbp_strhash (at_nvestr_t *); -extern nve_entry_t *nbp_find_nve(nve_entry_t *); -extern int nbp_fillin_nve(at_entity_t *, nve_entry_t *); - -extern at_nvestr_t *getSPLocalZone(int); -extern at_nvestr_t *getLocalZone(int); - -struct at_ifaddr; -void nbp_add_multicast( at_nvestr_t *, struct at_ifaddr *); -void nbp_shutdown(void ); - -int nbp_mh_reg(at_nbp_reg_t *); -int nbp_new_nve_entry(nve_entry_t *, struct at_ifaddr *); -void nbp_delete_entry(nve_entry_t *); - - - -#endif /* KERNEL_PRIVATE */ -#endif /* __APPLE_API_OBSOLETE */ -#endif /* _NETAT_NBP_H_ */ diff --git a/bsd/netat/pap.h b/bsd/netat/pap.h deleted file mode 100644 index a9ce98aff..000000000 --- a/bsd/netat/pap.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * - * ORIGINS: 82 - * - * (C) COPYRIGHT Apple Computer, Inc. 1992-1996 - * All Rights Reserved - * - */ - -/* Definitions for ATP protocol and streams module, per - * AppleTalk Transaction Protocol documentation from - * `Inside AppleTalk', July 14, 1986. - */ - -#ifndef _NETAT_PAP_H_ -#define _NETAT_PAP_H_ -#include - -#ifdef __APPLE_API_OBSOLETE - -#define AT_PAP_DATA_SIZE 512 /* Maximum PAP data size */ -#define AT_PAP_STATUS_SIZE 255 /* Maximum PAP status length */ -#define PAP_TIMEOUT 120 - -/* PAP packet types */ - -#define AT_PAP_TYPE_OPEN_CONN 0x01 /* Open-Connection packet */ -#define AT_PAP_TYPE_OPEN_CONN_REPLY 0x02 /* Open-Connection-Reply packet */ -#define AT_PAP_TYPE_SEND_DATA 0x03 /* Send-Data packet */ -#define AT_PAP_TYPE_DATA 0x04 /* Data packet */ -#define AT_PAP_TYPE_TICKLE 0x05 /* Tickle packet */ -#define AT_PAP_TYPE_CLOSE_CONN 0x06 /* Close-Connection packet */ -#define AT_PAP_TYPE_CLOSE_CONN_REPLY 0x07 /* Close-Connection-Reply pkt */ -#define AT_PAP_TYPE_SEND_STATUS 0x08 /* Send-Status packet */ -#define AT_PAP_TYPE_SEND_STS_REPLY 0x09 /* Send-Status-Reply packet */ -#define AT_PAP_TYPE_READ_LW 0x0A /* Read LaserWriter Message */ - - -/* PAP packet structure */ - -typedef struct { - u_char at_pap_connection_id; - u_char at_pap_type; - u_char at_pap_sequence_number[2]; - u_char at_pap_responding_socket; - u_char at_pap_flow_quantum; - u_char at_pap_wait_time_or_result[2]; - u_char at_pap_buffer[AT_PAP_DATA_SIZE]; -} at_pap; - - -/* ioctl definitions */ - -#define AT_PAP_SETHDR (('~'<<8)|0) -#define AT_PAP_READ (('~'<<8)|1) -#define AT_PAP_WRITE (('~'<<8)|2) -#define AT_PAP_WRITE_EOF (('~'<<8)|3) -#define AT_PAP_WRITE_FLUSH (('~'<<8)|4) -#define AT_PAP_READ_IGNORE (('~'<<8)|5) -#define AT_PAPD_SET_STATUS (('~'<<8)|40) -#define AT_PAPD_GET_NEXT_JOB (('~'<<8)|41) - -extern char at_pap_status[]; -extern char *pap_status (); - -#define NPAPSERVERS 10 /* the number of active PAP servers/node */ -#define NPAPSESSIONS 40 /* the number of active PAP sockets/node */ - -#define AT_PAP_HDR_SIZE (DDP_X_HDR_SIZE + ATP_HDR_SIZE) - -#define ATP_DDP_HDR(c) ((at_ddp_t *)(c)) - -#define PAP_SOCKERR "Unable to open PAP socket" -#define P_NOEXIST "Printer not found" -#define P_UNREACH "Unable to establish PAP session" - -struct pap_state { - u_char pap_inuse; /* true if this one is allocated */ - u_char pap_tickle; /* true if we are tickling the other end */ - u_char pap_request; /* bitmap from a received request */ - u_char pap_eof; /* true if we have received an EOF */ - u_char pap_eof_sent; /* true if we have sent an EOF */ - u_char pap_sent; /* true if we have sent anything (and - therefore may have to send an eof - on close) */ - u_char pap_error; /* error message from read request */ - u_char pap_timer; /* a timeout is pending */ - u_char pap_closing; /* the link is closing and/or closed */ - u_char pap_request_count; /* number of outstanding requests */ - u_char pap_req_timer; /* the request timer is running */ - u_char pap_ending; /* we are waiting for atp to flush */ - u_char pap_read_ignore; /* we are in 'read with ignore' mode */ - - u_char pap_req_socket; - at_inet_t pap_to; - int pap_flow; - - u_short pap_send_count; /* the sequence number to send on the - next send data request */ - u_short pap_rcv_count; /* the sequence number expected to - receive on the next request */ - u_short pap_tid; /* ATP transaction ID for responses */ - u_char pap_connID; /* our connection ID */ - - int pap_ignore_id; /* the transaction ID for read ignore */ - int pap_tickle_id; /* the transaction ID for tickles */ -}; - -#endif /* __APPLE_API_OBSOLETE */ -#endif /* _NETAT_PAP_H_ */ diff --git a/bsd/netat/routing_tables.h b/bsd/netat/routing_tables.h deleted file mode 100644 index 9d2985c07..000000000 --- a/bsd/netat/routing_tables.h +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * This include file defines the RTMP table and ZIP table - * for the AppleTalk AIX router - * - * - * 0.01 03/16/94 LD Creation - * 0.10 08/19/94 LD merged - * - */ - -#ifndef _NETAT_ROUTING_TABLES_H_ -#define _NETAT_ROUTING_TABLES_H_ -#include -#ifdef PRIVATE - -/* RTMP table entry state bitmap (EntryState) values */ - -#define RTE_STATE_UNUSED 0 /* this entry is not in used */ -#define RTE_STATE_BAD 2 /* The route is almost ready to be removed */ -#define RTE_STATE_SUSPECT 4 /* didn't received an update for route */ -#define RTE_STATE_GOOD 8 /* this route is 100% valid */ -#define RTE_STATE_ZKNOWN 16 /* we know the zones for this entry */ -#define RTE_STATE_UPDATED 32 /* set when updated from received rtmp table */ -#define RTE_STATE_BKUP 64 /* for future use : AURP */ -#define RTE_STATE_PERMANENT 128 /* This is a directly attached route */ - -#define PORT_ONLINE 32 /* router port in forwarding state */ -#define PORT_SEEDING 31 /* router port seeding */ -#define PORT_ACTIVATING 16 /* router port waiting for net infos */ -#define PORT_ERR_NOZONE 6 /* router no zones for non seed port*/ -#define PORT_ERR_BADRTMP 5 /* router problem bad rtmp version*/ -#define PORT_ERR_STARTUP 4 /* router problem cable in start range*/ -#define PORT_ERR_CABLER 3 /* router problem bad cable range*/ -#define PORT_ERR_SEED 2 /* router startup seeding problem */ -#define PORT_ONERROR 1 /* router port with generic problem*/ -#define PORT_OFFLINE 0 /* router port disabled/not ready */ - -#define ZT_MAX 1024 /* Don't allow more zones than that */ -#define ZT_MIN 32 /* Minimum for a good behaviour*/ -#define ZT_DEFAULT 512 /* Minimum for a good behaviour*/ -#define RT_MAX 4096 /* Don't allow more entries than that */ -#define RT_MIN 128 /* Minimum for a good behaviour*/ -#define RT_DEFAULT 1024 /* Minimum for a good behaviour*/ -#define ZT_BYTES (ZT_MAX/8) /* Bytes in Zone Bitmap */ -#define ZT_MAXEDOUT ZT_MAX+1 /* reached the entry limit.. */ -#define RT_MIX_DEFAULT 2000 /* default for nbr of ppsec */ - - -#define NOTIFY_N_DIST 31 /* Notify Neighbor distance (when shutdown or so) */ - -/* Useful macros to access the RTMP tuple fields */ - -#define TUPLENET(x) NET_VALUE(((at_rtmp_tuple *)(x))->at_rtmp_net) -#define TUPLEDIST(x) ((((at_rtmp_tuple *)(x))->at_rtmp_data) & RTMP_DISTANCE) -#define TUPLERANGE(x) ((((at_rtmp_tuple *)(x))->at_rtmp_data) & RTMP_RANGE_FLAG) - -#define CableStart ifID->ifThisCableStart -#define CableStop ifID->ifThisCableEnd - -#define RTMP_IDLENGTH 4 /* RTMP packet Node header length */ - - -#define RTMP_VERSION_NUMBER 0x82 /* V2 only version of RTMP supported */ - -#define ERTR_SEED_CONFLICT 0x101 /* Conflict between port information and net - * value received for the port (fatal for Rtr) - */ -#define ERTR_CABLE_CONFLICT 0x102 /* Conflict between port information and net - * information received in a RTMP packet - */ - -#define ERTR_RTMP_BAD_VERSION 0x103 /* We received a non phase-II RTMP packet - * that's bad... We can't deal with it - */ - -#define ERTR_CABLE_STARTUP 0x104 /* the cable range we're on happen to - * be in the startup range. Shouldn't - */ - -#define ERTR_CABLE_NOZONE 0x105 /* We haven't found any zones for that port - * after all the timeout expired - */ - - -/* RTMP table entry */ - -typedef struct rt_entry { - - struct rt_entry *left; /* btree left pointer */ - struct rt_entry *right; /* btree right pointer */ - - at_net_al NetStop; /* Last net # in the range, or network # if - non extended network */ - at_net_al NetStart; /* Starting network number in the range, 0 - non extended network */ - at_net_al NextIRNet; /* Network number of next Internet Router */ - at_node NextIRNode; /* Node ID of next Router */ - u_char ZoneBitMap[ZT_BYTES]; /* One bit per Zone defined for this entry */ - u_char NetDist; /* Distance in hops of the destination net */ - u_char NetPort; /* Physical port number to forward to */ - u_char EntryState; /* State of the entry bitmap field */ - u_char RTMPFlag; - u_char AURPFlag; - -} RT_entry; - - -/* ZIP Table entry */ - -typedef struct { - - u_short ZoneCount; /* Count of reference to zone entry */ - at_nvestr_t Zone; /* zone name as a Network Visible Entity */ - -} ZT_entry; - -/* for zone retrieval to user space only */ -typedef struct { - unsigned short entryno; /* zone table entry number (1st = 0) */ - ZT_entry zt; /* the zone table entry */ -} ZT_entryno; - -#ifdef KERNEL_PRIVATE - -/* Macros for Routing table B-tree easy access */ - -#define RT_DELETE(NetStop, NetStart) {\ - RT_entry *found; \ - if ((found = rt_bdelete(NetStop, NetStart))) { \ - memset(found, '\0', sizeof(RT_entry)); \ - found->right = RT_table_freelist; \ - RT_table_freelist = found; \ - } \ -} - -/* Set/Reset and test the All zones known bit in for the entry field */ - -#define RT_ALL_ZONES_KNOWN(entry) ((entry)->EntryState & RTE_STATE_ZKNOWN) -#define RT_SET_ZONE_KNOWN(entry) ((entry)->EntryState |= RTE_STATE_ZKNOWN) -#define RT_CLR_ZONE_KNOWN(entry) ((entry)->EntryState ^= RTE_STATE_ZKNOWN) - -/* - * check if a zone number is in a given zone map - */ -#define ZT_ISIN_ZMAP(znum, zmap) ((zmap)[(znum-1) >> 3] & 0x80 >> (znum-1) % 8) - -/* remove a zone from the zone bitmap, and check if the zone - * is still in use by someone else. - */ - -#define ZT_CLR_ZMAP(num, zmap) { \ - if ((zmap)[(num-1) >> 3] & 0x80 >> (num-1) % 8) { \ - (zmap)[(num-1) >> 3] ^= 0x80 >> (num-1) % 8; \ - ZT_table[(num-1)].ZoneCount-- ; \ - } \ -} - -/* set a bit in an entry bit map */ - -#define ZT_SET_ZMAP(num, zmap) { \ - if (!zmap[(num-1) >> 3] & 0x80 >> (num-1) % 8) { \ - zmap[(num-1) >> 3] |= 0x80 >> (num-1) % 8; \ - ZT_table[(num-1)].ZoneCount++ ; \ - } \ -} - -extern int regDefaultZone(at_ifaddr_t *); -extern int zonename_equal(at_nvestr_t *, at_nvestr_t *); - -extern RT_entry *RT_table_freelist; -extern RT_entry RT_table_start; -extern RT_entry *RT_table; -extern RT_entry *rt_binsert (RT_entry *); -extern RT_entry *rt_insert( at_net_al NStop, at_net_al NStart, at_net_al NxNet, - at_node NxNode, u_char NtDist, u_char NtPort, u_char EntS); -extern RT_entry *rt_bdelete (at_net_al NetStop, at_net_al NetStart); -extern RT_entry *rt_blookup(int); -extern RT_entry *rt_getNextRoute(int); - -extern ZT_entry *ZT_table; -extern short RT_maxentry; -extern short ZT_maxentry; - -extern int RouterMix; - -extern int zt_add_zone(char *, short); -extern int zt_add_zonename(at_nvestr_t *); -extern int zt_ent_zindex(u_char *); -extern ZT_entryno *zt_getNextZone(int); -extern void zt_remove_zones(u_char *); -extern void zt_set_zmap(u_short, unsigned char *); -extern void rtmp_router_input(gbuf_t *, at_ifaddr_t *); -void trackrouter(at_ifaddr_t *, unsigned short, unsigned char); -int zt_find_zname(at_nvestr_t *); -struct at_nvestr *getRTRLocalZone(struct zone_usage *); -int zt_ent_zcount(RT_entry *); -int zt_get_zmcast(at_ifaddr_t *, at_nvestr_t *, char *); - -void getRtmpTable(RT_entry *, int, int c); -void getZipTable(ZT_entry *, int, int c); -int getZipTableSize(void); - -int rt_table_init(void ); -void getIfUsage( int, at_ifnames_t *); - - -#endif /* KERNEL_PRIVATE */ - -#endif /* PRIVATE */ -#endif /* _NETAT_ROUTING_TABLES_H_ */ diff --git a/bsd/netat/rtmp.h b/bsd/netat/rtmp.h deleted file mode 100644 index 1404657da..000000000 --- a/bsd/netat/rtmp.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/* - * Copyright (c) 1988, 1989 Apple Computer, Inc. - */ - -#ifndef _NETAT_RTMP_H_ -#define _NETAT_RTMP_H_ -#include - -#ifdef __APPLE_API_OBSOLETE - -/* Changed 03-22-94 for router support LD */ - -/* RTMP function codes */ -#define RTMP_REQ_FUNC1 0x01 /* RTMP request function code=1 */ -#define RTMP_REQ_FUNC2 0x02 /* Route Data Req with Split Horizon */ -#define RTMP_REQ_FUNC3 0x03 /* Route Data Req no Split Horizon */ - - -#define RTMP_ROUTER_AGE 50 /* Number of seconds to age router */ - -/* RTMP response and data packet format */ - -typedef struct { - at_net at_rtmp_this_net; - u_char at_rtmp_id_length; - u_char at_rtmp_id[1]; -} at_rtmp; - -/* RTMP network/distance data tuples */ - -#define RTMP_TUPLE_SIZE 3 - -/* Extended AppleTalk tuple can be thought of as two of - * these tuples back to back. - */ - -#define RTMP_RANGE_FLAG 0x80 -#define RTMP_DISTANCE 0x0f - -typedef struct { - at_net at_rtmp_net; - unsigned char at_rtmp_data; -} at_rtmp_tuple; - -#ifdef KERNEL_PRIVATE - -void rtmp_purge(at_ifaddr_t *); -void rtmp_shutdown(void); -void rtmp_input (gbuf_t *, at_ifaddr_t *); -void RouterError(short, short); -void rtmp_init(void); -int zip_control (at_ifaddr_t *, int); -void routershutdown(void); -void trackrouter_rem_if(at_ifaddr_t *); -char upshift8(char); -void ZIPwakeup(at_ifaddr_t *, int); - -int elap_dataput(gbuf_t *, at_ifaddr_t *, u_char, char *); - -#endif /* KERNEL_PRIVATE */ - -#endif /* __APPLE_API_OBSOLETE */ -#endif /* _NETAT_RTMP_H_ */ diff --git a/bsd/netat/sys_dep.c b/bsd/netat/sys_dep.c deleted file mode 100644 index 47a18d89c..000000000 --- a/bsd/netat/sys_dep.c +++ /dev/null @@ -1,357 +0,0 @@ -/* - * Copyright (c) 1995-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Change Log: - * Created February 20, 1995 by Tuyen Nguyen - * Modified for MP, 1996 by Tuyen Nguyen - * Modified, March 17, 1997 by Tuyen Nguyen for MacOSX. - */ -#include -#include -#include -#include -#include -#include -#include /* for p_fd in fdflags */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int); - -extern at_ifaddr_t *ifID_home; /* default interface */ -extern lck_mtx_t * atalk_mutex; - -#define f_flag f_fglob->fg_flag -#define f_type f_fglob->fg_type -#define f_msgcount f_fglob->fg_msgcount -#define f_cred f_fglob->fg_cred -#define f_ops f_fglob->fg_ops -#define f_offset f_fglob->fg_offset -#define f_data f_fglob->fg_data - - -int _ATkqfilter(struct fileproc *, struct knote *, vfs_context_t); -int _ATselect(struct fileproc *, int, void *, vfs_context_t); -int _ATioctl(struct fileproc *, u_long, caddr_t, vfs_context_t); -int _ATwrite(struct fileproc *, struct uio *, int, vfs_context_t); -int _ATread(struct fileproc *, struct uio *, int, vfs_context_t); -int _ATclose(struct fileglob *, vfs_context_t); - -int ATsocket(proc, uap, retval) - struct proc *proc; - struct ATsocket_args *uap; - int *retval; -{ - int err; - atalk_lock(); - if (1 /* _ATsocket*/) { - /* required check for all AppleTalk system calls */ - if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { - *retval = -1; - err = ENOTREADY; - } else { - *retval = _ATsocket((int)uap->proto, (int *)&err, (void *)proc); - } - } else { - *retval = -1; - err = ENXIO; - } - atalk_unlock(); - return err; -} - -int ATgetmsg(proc, uap, retval) - struct proc *proc; - struct ATgetmsg_args *uap; - int *retval; -{ - int err; - - atalk_lock(); - if (1 /* _ATgetmsg */) { - /* required check for all AppleTalk system calls */ - if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { - *retval = -1; - err = ENOTREADY; - } else { - *retval = - (*_ATgetmsg)(uap->fd, uap->ctlptr, uap->datptr, - uap->flags, &err, proc); - } - } else { - *retval = -1; - err = ENXIO; - } - atalk_unlock(); - return err; -} - -int ATputmsg(proc, uap, retval) - struct proc *proc; - struct ATputmsg_args *uap; - int *retval; -{ - int err; - - atalk_lock(); - if (1 /* _ATputmsg */) { - /* required check for all AppleTalk system calls */ - if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { - *retval = -1; - err = ENOTREADY; - } else { - *retval = - _ATputmsg(uap->fd, uap->ctlptr, uap->datptr, - uap->flags, &err, proc); - } - } else { - *retval = -1; - err = ENXIO; - } - atalk_unlock(); - return err; -} - -int ATPsndreq(proc, uap, retval) - struct proc *proc; - struct ATPsndreq_args *uap; - int *retval; -{ - int err; - - atalk_lock(); - if (1 /* _ATPsndreq */) { - /* required check for all AppleTalk system calls */ - if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { - *retval = -1; - err = ENOTREADY; - } else { - *retval = - _ATPsndreq(uap->fd, uap->buf, uap->len, - uap->nowait, &err, proc); - } - } else { - *retval = -1; - err= ENXIO; - } - atalk_unlock(); - return err; -} - -int ATPsndrsp(proc, uap, retval) - struct proc *proc; - struct ATPsndrsp_args *uap; - int *retval; -{ - int err; - - atalk_lock(); - if (1 /*_ATPsndrsp*/) { - /* required check for all AppleTalk system calls */ - if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { - *retval = -1; - err = ENOTREADY; - } else { - *retval = - _ATPsndrsp(uap->fd, uap->respbuff, - uap->resplen, uap->datalen, &err, proc); - } - } else { - *retval = -1; - err = ENXIO; - } - atalk_unlock(); - return err; -} - -int ATPgetreq(proc, uap, retval) - struct proc *proc; - struct ATPgetreq_args *uap; - int *retval; -{ - int err; - - atalk_lock(); - if (1 /* _ATPgetreq */) { - /* required check for all AppleTalk system calls */ - if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { - *retval = -1; - err = ENOTREADY; - } else { - *retval = - _ATPgetreq(uap->fd, uap->buf, uap->buflen, - &err, proc); - } - } else { - *retval = -1; - err = ENXIO; - } - atalk_unlock(); - return err; -} - -int ATPgetrsp(proc, uap, retval) - struct proc *proc; - struct ATPgetrsp_args *uap; - int *retval; -{ - int err = 0; - - atalk_lock(); - if (1 /*_ATPgetrsp*/) { - /* required check for all AppleTalk system calls */ - if (!(at_state.flags & AT_ST_STARTED) || !ifID_home) { - *retval = -1; - err = ENOTREADY; - } else { - *retval = - _ATPgetrsp(uap->fd, (struct atpBDS *)uap->bdsp, &err, proc); - } - } else { - *retval = -1; - err = ENXIO; - } - atalk_unlock(); - return err; -} - -int atalk_closeref(fg, grefp) - struct fileglob *fg; - gref_t **grefp; -{ - if ((*grefp = (gref_t *)fg->fg_data)) { - fg->fg_data = 0; - return(0); - } - return(EBADF); -} - -int atalk_openref(gref, retfd, proc) - gref_t *gref; - int *retfd; - struct proc *proc; -{ - static struct fileops fileops = - {_ATread, _ATwrite, _ATioctl, _ATselect, _ATclose, _ATkqfilter, 0}; - int err, fd; - struct fileproc *fp; - - lck_mtx_assert(atalk_mutex, LCK_MTX_ASSERT_OWNED); - - proc_fdlock(proc); - if ((err = falloc_locked(proc, &fp, &fd, vfs_context_current(), 1)) != 0) { - proc_fdunlock(proc); - return err; - } - - fp->f_flag = FREAD|FWRITE; - /*##### LD 5/7/96 Warning: we don't have a "DTYPE_OTHER" for - * MacOSX, so defines DTYPE_ATALK as DTYPE_SOCKET... - */ - fp->f_type = DTYPE_ATALK+1; - fp->f_ops = &fileops; - fp->f_data = (void *)gref; - - procfdtbl_releasefd(proc, fd, NULL); - *retfd = fd; - fp_drop(proc, fd, fp, 1); - proc_fdunlock(proc); -/* - kprintf("atalk_openref: fp = 0x%x, gref = 0x%x\n", (u_int)fp, (u_int)gref); -*/ - return 0; -} - -/* - * go from file descriptor to gref, which has been saved in fp->f_data - * - * This routine returns with an iocount on the fileproc when the fp is null - * as it converts fd to fileproc. Callers of this api who pass fp as null - * need to drop the iocount when they are done with the fp - */ -int atalk_getref(fp, fd, grefp, proc, droponerr) -struct fileproc *fp; -int fd; -gref_t **grefp; -struct proc *proc; -int droponerr; -{ - int error; - - proc_fdlock(proc); - error = atalk_getref_locked(fp, fd, grefp, proc, droponerr); - proc_fdunlock(proc); - return error; -} - -int atalk_getref_locked(fp, fd, grefp, proc, droponerr) -struct fileproc *fp; -int fd; -gref_t **grefp; -struct proc *proc; -int droponerr; -{ - lck_mtx_assert(atalk_mutex, LCK_MTX_ASSERT_OWNED); - if (fp == 0) { - int error = fp_lookup(proc, fd, &fp, 1); - - if (error) { - - *grefp = (gref_t *) 0; - return EBADF; - } - } - *grefp = (gref_t *)fp->f_data; - if (fp->f_type != (DTYPE_ATALK+1) || *grefp == 0 || *grefp == (gref_t *)(-1)) { - if (droponerr) - fp_drop(proc, fd, fp, 1); - printf("atalk_getref_locked EBADF f_data: %p\n", fp->f_data); - return EBADF; - } - - if ((*grefp)->errno) { - if (droponerr) - fp_drop(proc, fd, fp, 1); - return (int)(*grefp)->errno; - } - return 0; -} diff --git a/bsd/netat/sys_glue.c b/bsd/netat/sys_glue.c deleted file mode 100644 index acb307fbc..000000000 --- a/bsd/netat/sys_glue.c +++ /dev/null @@ -1,1332 +0,0 @@ -/* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1995 Apple Computer, Inc. - * - * Change Log: - * Created, March 17, 1997 by Tuyen Nguyen for MacOSX. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -int _ATkqfilter(struct fileproc *, struct knote *, vfs_context_t); -int _ATselect(struct fileproc *, int, void *, vfs_context_t); -int _ATioctl(struct fileproc *, u_long, caddr_t, vfs_context_t); -int _ATwrite(struct fileproc *, struct uio *, int, vfs_context_t); -int _ATread(struct fileproc *, struct uio *, int, vfs_context_t); -int _ATclose(struct fileglob *, vfs_context_t); - -int _ATrw(struct fileproc *, enum uio_rw, struct uio *, vfs_context_t); - -extern struct atpcb ddp_head; -extern lck_mtx_t * atalk_mutex; - -int atp_free_cluster_timeout_set = 0; - -int gref_alloc(gref_t **); - - -/* bms: make gref_close non static so its callable from kernel */ -int gref_close(gref_t *gref); - -SYSCTL_DECL(_net_appletalk); -dbgBits_t dbgBits; -SYSCTL_STRUCT(_net_appletalk, OID_AUTO, debug, CTLFLAG_WR, - &dbgBits, dbgBits, "AppleTalk Debug Flags"); -int RouterMix = RT_MIX_DEFAULT; /* default for nbr of ppsec */ -SYSCTL_INT(_net_appletalk, OID_AUTO, routermix, CTLFLAG_WR, - &RouterMix, 0, "Appletalk RouterMix"); -at_ddp_stats_t at_ddp_stats; /* DDP statistics */ -SYSCTL_STRUCT(_net_appletalk, OID_AUTO, ddpstats, CTLFLAG_RD | CTLFLAG_LOCKED, - &at_ddp_stats, at_ddp_stats, "AppleTalk DDP Stats"); -extern int atp_resp_seqno2big; -SYSCTL_INT(_net_appletalk, OID_AUTO, atp_resp_seqno2big, CTLFLAG_RD | CTLFLAG_LOCKED, - &atp_resp_seqno2big, 0, "Appletalk ATP seqno too big count"); - -static void ioccmd_t_32_to_64( ioccmd_t *from_p, user_ioccmd_t *to_p ); -static void ioccmd_t_64_to_32( user_ioccmd_t *from_p, ioccmd_t *to_p ); - -extern lck_mtx_t *atalk_cluster_lock; -caddr_t atp_free_cluster_list = NULL; - -void gref_wput(gref_t *, gbuf_t *m); - -void gref_wput(gref, m) - gref_t *gref; - gbuf_t *m; -{ - switch (gref->proto) { - case ATPROTO_DDP: - ddp_putmsg(gref, m); break; - case ATPROTO_LAP: - elap_wput(gref, m); break; - case ATPROTO_ATP: - atp_wput(gref, m); break; - case ATPROTO_ASP: - asp_wput(gref, m); break; -#ifdef AURP_SUPPORT - case ATPROTO_AURP: - aurp_wput(gref, m); break; -#endif - case ATPROTO_ADSP: - adsp_wput(gref, m); break; - case ATPROTO_NONE: - if (gbuf_type(m) == MSG_IOCTL) { - gbuf_freem(gbuf_cont(m)); - gbuf_cont(m) = 0; - ((ioc_t *)gbuf_rptr(m))->ioc_rval = -1; - ((ioc_t *)gbuf_rptr(m))->ioc_error = EPROTOTYPE; - gbuf_set_type(m, MSG_IOCNAK); - atalk_putnext(gref, m); - } else - gbuf_freem(m); - break; - default: - gbuf_freem(m); - break; - } -} - -int _ATsocket(proto, err, proc) - int proto; - int *err; - void *proc; -{ - int fd; - gref_t *gref; - - /* make sure the specified protocol id is valid */ - switch (proto) { - - /* ATPROTO_DDP and ATPROTO_LAP have been replaced with - BSD-style socket interface. */ - - case ATPROTO_ATP: - case ATPROTO_ASP: -#ifdef AURP_SUPPORT - case ATPROTO_AURP: -#endif - case ATPROTO_ADSP: - break; - default: - *err = EPROTOTYPE; -#ifdef APPLETALK_DEBUG - kprintf("_ATsocket: error EPROTOTYPE =%d\n", *err); -#endif - return -1; - } - - /* allocate a protocol channel */ - if ((*err = gref_alloc(&gref)) != 0) { -#ifdef APPLETALK_DEBUG - kprintf("_ATsocket: error gref_open =%d\n", *err); -#endif - return -1; - } - gref->proto = proto; - gref->pid = proc_pid((struct proc *)proc); - - /* open the specified protocol */ - switch (gref->proto) { - - /* ATPROTO_DDP and ATPROTO_LAP have been replaced with - BSD-style socket interface. */ - - case ATPROTO_ATP: - *err = atp_open(gref, 1); break; - case ATPROTO_ASP: - *err = asp_open(gref); break; -#ifdef AURP_SUPPORT - case ATPROTO_AURP: - *err = aurp_open(gref); break; -#endif - case ATPROTO_ADSP: - *err = adsp_open(gref); break; - } - - /* create the descriptor for the channel */ - if (*err) { -#ifdef APPLETALK_DEBUG - kprintf("_ATsocket: open failed for %d proto; err = %d\n", - gref->proto, *err); -#endif - gref->proto = ATPROTO_NONE; - } - if (*err || (*err = atalk_openref(gref, &fd, proc))) { -#ifdef APPLETALK_DEBUG - kprintf("_ATsocket: error atalk_openref =%d\n", *err); -#endif - (void)gref_close(gref); - return -1; - } -/* - kprintf("_ATsocket: proto=%d return=%d fd=%d\n", proto, *err, fd); -*/ - return fd; -} /* _ATsocket */ - -int _ATgetmsg(fd, ctlptr, datptr, flags, err, proc) - int fd; - strbuf_t *ctlptr; - strbuf_t *datptr; - int *flags; - int *err; - void *proc; -{ - int rc = -1; - gref_t *gref; - - if ((*err = atalk_getref(0, fd, &gref, proc, 1)) == 0) { - switch (gref->proto) { - case ATPROTO_ASP: - rc = ASPgetmsg(gref, ctlptr, datptr, NULL, flags, err); - break; - case ATPROTO_AURP: -#ifdef AURP_SUPPORT - rc = AURPgetmsg(err); - break; -#endif - default: - *err = EPROTONOSUPPORT; - break; - } - file_drop(fd); - } - -/* kprintf("_ATgetmsg: return=%d\n", *err);*/ - return rc; -} - -int _ATputmsg(fd, ctlptr, datptr, flags, err, proc) - int fd; - strbuf_t *ctlptr; - strbuf_t *datptr; - int flags; - int *err; - void *proc; -{ - int rc = -1; - gref_t *gref; - - if ((*err = atalk_getref(0, fd, &gref, proc, 1)) == 0) { - switch (gref->proto) { - case ATPROTO_ASP: - rc = ASPputmsg(gref, ctlptr, datptr, NULL, flags, err); break; - default: - *err = EPROTONOSUPPORT; break; - } - file_drop(fd); - } - -/* kprintf("_ATputmsg: return=%d\n", *err); */ - return rc; -} - -int _ATclose( - struct fileglob *fg, - __unused vfs_context_t ctx) -{ - int err; - gref_t *gref; - - if ((err = atalk_closeref(fg, &gref)) == 0) { - atalk_lock(); - (void)gref_close(gref); - atalk_unlock(); - } - - return err; -} - -int _ATrw(fp, rw, uio, ctx) - struct fileproc *fp; - enum uio_rw rw; - struct uio *uio; - vfs_context_t ctx; -{ - int err, len, clen = 0, res; - gref_t *gref; - gbuf_t *m, *mhead, *mprev; - proc_t p = vfs_context_proc(ctx); - - /* no need to get/drop iocount as the fp already has one */ - if ((err = atalk_getref_locked(fp, 0, &gref, p, 1)) != 0) - return err; - - // LP64todo - fix this! - if ((len = uio_resid(uio)) == 0) - return 0; - - - if (rw == UIO_READ) { - KERNEL_DEBUG(DBG_ADSP_ATRW, 0, gref, len, gref->rdhead, 0); - while ((gref->errno == 0) && ((mhead = gref->rdhead) == 0)) { - gref->sevents |= POLLMSG; - err = msleep(&gref->event, atalk_mutex, PSOCK | PCATCH, "AT read", 0); - gref->sevents &= ~POLLMSG; - if (err != 0) - return err; - KERNEL_DEBUG(DBG_ADSP_ATRW, 1, gref, gref->rdhead, mhead, gbuf_next(mhead)); - } - - if (gref->errno) - return EPIPE; - if ((gref->rdhead = gbuf_next(mhead)) == 0) - gref->rdtail = 0; - - KERNEL_DEBUG(DBG_ADSP_ATRW, 2, gref, gref->rdhead, mhead, gbuf_next(mhead)); - - -//##### LD TEST 08/05 -// simple_lock(&gref->lock); - - gbuf_next(mhead) = 0; - - for (mprev=0, m=mhead; m && len; len-=clen) { - if ((clen = gbuf_len(m)) > 0) { - if (clen > len) - clen = len; - uio->uio_rw = UIO_READ; - if ((res = uiomove((caddr_t)gbuf_rptr(m), - clen, uio))) { - KERNEL_DEBUG(DBG_ADSP_ATRW, 3, m, clen, - len, gbuf_cont(m)); - break; - } - if (gbuf_len(m) > len) { - gbuf_rinc(m,clen); - break; - } - } - mprev = m; - m = gbuf_cont(m); - } - if (m) { - KERNEL_DEBUG(DBG_ADSP_ATRW, 4, m, gbuf_len(m), mprev, gref->rdhead); - if (mprev) - gbuf_cont(mprev) = 0; - else - mhead = 0; - if (gref->rdhead == 0) - gref->rdtail = m; - gbuf_next(m) = gref->rdhead; - gref->rdhead = m; - } - if (mhead) - gbuf_freem(mhead); -//### LD TEST -// simple_unlock(&gref->lock); - } else { - if (gref->writeable) { - while (!(*gref->writeable)(gref)) { - /* flow control on, wait to be enabled to write */ - gref->sevents |= POLLSYNC; - err = msleep(&gref->event, atalk_mutex, PSOCK | PCATCH, "AT write", 0); - gref->sevents &= ~POLLSYNC; - if (err != 0) - return err; - } - } - - - /* allocate a buffer to copy in the write data */ - if ((m = gbuf_alloc(AT_WR_OFFSET+len, PRI_MED)) == 0) - return ENOBUFS; - gbuf_rinc(m,AT_WR_OFFSET); - gbuf_wset(m,len); - - /* copy in the write data */ - uio->uio_rw = UIO_WRITE; - if ((res = uiomove((caddr_t)gbuf_rptr(m), len, uio))) { -#ifdef APPLETALK_DEBUG - kprintf("_ATrw: UIO_WRITE: res=%d\n", res); -#endif - gbuf_freeb(m); - return EIO; - } - - /* forward the write data to the appropriate protocol module */ - gref_wput(gref, m); - } - - return 0; -} /* _ATrw */ - -int _ATread( - struct fileproc *fp, - struct uio *uio, - __unused int flags, - vfs_context_t ctx) -{ - int stat; - - atalk_lock(); - stat = _ATrw(fp, UIO_READ, uio, ctx); - atalk_unlock(); - return stat; -} - -int _ATwrite( - struct fileproc *fp, - struct uio *uio, - __unused int flags, - vfs_context_t ctx) -{ - int stat; - - atalk_lock(); - stat = _ATrw(fp, UIO_WRITE, uio, ctx); - atalk_unlock(); - - return stat; -} - -/* Most of the processing from _ATioctl, so that it can be called - from the new ioctl code */ -/* bms: update to be callable from kernel */ -int at_ioctl(gref_t *gref, u_long cmd, caddr_t arg, int fromKernel) -{ - int err = 0, len; - u_int size; - gbuf_t *m, *mdata; - ioc_t *ioc; - user_addr_t user_arg; - user_ioccmd_t user_ioccmd; - boolean_t is64bit; - - /* error if not for us */ - if ((cmd & 0xffff) != 0xff99) - return EOPNOTSUPP; - - size = IOCPARM_LEN(cmd); - if (size != sizeof(user_addr_t)) - return EINVAL; - - user_arg = *((user_addr_t *)arg); - - /* copy in ioc command info */ - is64bit = proc_is64bit(current_proc()); - if (fromKernel) { - ioccmd_t tmp; - bcopy (CAST_DOWN(caddr_t, user_arg), &tmp, sizeof (tmp)); - ioccmd_t_32_to_64(&tmp, &user_ioccmd); - } - else { - if (is64bit) { - err = copyin(user_arg, (caddr_t)&user_ioccmd, sizeof(user_ioccmd)); - } - else { - ioccmd_t tmp; - err = copyin(user_arg, (caddr_t)&tmp, sizeof(tmp)); - ioccmd_t_32_to_64(&tmp, &user_ioccmd); - } - if (err != 0) { -#ifdef APPLETALK_DEBUG - kprintf("at_ioctl: err = %d, copyin(%llx, %x, %d)\n", err, - user_arg, (caddr_t)&user_ioccmd, sizeof(user_ioccmd)); -#endif - return err; - } - } - - /* allocate a buffer to create an ioc command - first mbuf contains ioc command */ - if ((m = gbuf_alloc(sizeof(ioc_t), PRI_HI)) == 0) - return ENOBUFS; - gbuf_wset(m, sizeof(ioc_t)); /* mbuf->m_len */ - gbuf_set_type(m, MSG_IOCTL); /* mbuf->m_type */ - - /* create the ioc command - second mbuf contains the actual ASP command */ - if (user_ioccmd.ic_len) { - if ((gbuf_cont(m) = gbuf_alloc(user_ioccmd.ic_len, PRI_HI)) == 0) { - gbuf_freem(m); -#ifdef APPLETALK_DEBUG - kprintf("at_ioctl: gbuf_alloc err=%d\n",ENOBUFS); -#endif - return ENOBUFS; - } - gbuf_wset(gbuf_cont(m), user_ioccmd.ic_len); /* mbuf->m_len */ - if (fromKernel) - bcopy (CAST_DOWN(caddr_t, user_ioccmd.ic_dp), gbuf_rptr(gbuf_cont(m)), user_ioccmd.ic_len); - else { - if ((err = copyin(user_ioccmd.ic_dp, (caddr_t)gbuf_rptr(gbuf_cont(m)), user_ioccmd.ic_len)) != 0) { - gbuf_freem(m); - return err; - } - } - } - ioc = (ioc_t *) gbuf_rptr(m); - ioc->ioc_cmd = user_ioccmd.ic_cmd; - ioc->ioc_count = user_ioccmd.ic_len; - ioc->ioc_error = 0; - ioc->ioc_rval = 0; - - /* send the ioc command to the appropriate recipient */ - gref_wput(gref, m); - - /* wait for the ioc ack */ - while ((m = gref->ichead) == 0) { - gref->sevents |= POLLPRI; -#ifdef APPLETALK_DEBUG - kprintf("sleep gref = 0x%x\n", (unsigned)gref); -#endif - err = msleep(&gref->iocevent, atalk_mutex, PSOCK | PCATCH, "AT ioctl", 0); - gref->sevents &= ~POLLPRI; - if (err != 0) { -#ifdef APPLETALK_DEBUG - kprintf("at_ioctl: EINTR\n"); -#endif - return err; - } - } - - /* PR-2224797 */ - if (gbuf_next(m) == m) /* error case */ - gbuf_next(m) = 0; - - gref->ichead = gbuf_next(m); - - -#ifdef APPLETALK_DEBUG - kprintf("at_ioctl: woke up from ioc sleep gref = 0x%x\n", - (unsigned)gref); -#endif - - /* process the ioc response */ - ioc = (ioc_t *) gbuf_rptr(m); - if ((err = ioc->ioc_error) == 0) { - user_ioccmd.ic_timout = ioc->ioc_rval; - user_ioccmd.ic_len = 0; - mdata = gbuf_cont(m); - if (mdata && user_ioccmd.ic_dp) { - user_ioccmd.ic_len = gbuf_msgsize(mdata); - for (len = 0; mdata; mdata = gbuf_cont(mdata)) { - if (fromKernel) - bcopy (gbuf_rptr(mdata), CAST_DOWN(caddr_t, (user_ioccmd.ic_dp + len)), gbuf_len(mdata)); - else { - if ((err = copyout((caddr_t)gbuf_rptr(mdata), (user_ioccmd.ic_dp + len), gbuf_len(mdata))) < 0) { -#ifdef APPLETALK_DEBUG - kprintf("at_ioctl: len=%d error copyout=%d from=%x to=%x gbuf_len=%x\n", - len, err, (caddr_t)gbuf_rptr(mdata), (caddr_t)&user_ioccmd.ic_dp[len], gbuf_len(mdata)); -#endif - goto l_done; - } - } - len += gbuf_len(mdata); - } - } - - if (fromKernel) { - ioccmd_t tmp; - ioccmd_t_64_to_32(&user_ioccmd, &tmp); - bcopy (&tmp, CAST_DOWN(caddr_t, user_arg), sizeof(tmp)); - } - else { - if (is64bit) { - err = copyout((caddr_t)&user_ioccmd, user_arg, sizeof(user_ioccmd)); - } - else { - ioccmd_t tmp; - ioccmd_t_64_to_32(&user_ioccmd, &tmp); - err = copyout((caddr_t)&tmp, user_arg, sizeof(tmp)); - } - if (err != 0) { - goto l_done; - } - } - } - -l_done: - gbuf_freem(m); - /*kprintf("at_ioctl: I_done=%d\n", err);*/ - return err; -} /* at_ioctl */ - -int _ATioctl( - struct fileproc *fp, - u_long cmd, - register caddr_t arg, - __unused vfs_context_t ctx) -{ - int err; - gref_t *gref; - - atalk_lock(); - /* No need to get a reference on fp as it already has one */ - if ((err = atalk_getref_locked(fp, 0, &gref, 0, 0)) != 0) { -#ifdef APPLETALK_DEBUG - kprintf("_ATioctl: atalk_getref err = %d\n", err); -#endif - } - else - err = at_ioctl(gref, cmd, arg, 0); - - atalk_unlock(); - - return err; -} - -int _ATselect(fp, which, wql, ctx) - struct fileproc *fp; - int which; - void * wql; - vfs_context_t ctx; -{ - int err, rc = 0; - gref_t *gref; - proc_t proc = vfs_context_proc(ctx); - - /* Radar 4128949: Drop the proc_fd lock here to avoid lock inversion issues with the other AT calls - * select() is already holding a reference on the fd, so it won't go away during the time it is unlocked. - */ - proc_fdunlock(proc); - - atalk_lock(); - /* no need to drop the iocount as select covers that */ - err = atalk_getref_locked(fp, 0, &gref, 0, 0); - atalk_unlock(); - - /* Safe to re-grab the proc_fdlock at that point */ - proc_fdlock(proc); - if (err != 0) - rc = 1; - else { - if (which == FREAD) { - if (gref->rdhead || (gref->readable && (*gref->readable)(gref))) - rc = 1; - else { - gref->sevents |= POLLIN; - selrecord(proc, &gref->si, wql); - } - } - else if (which == POLLOUT) { - if (gref->writeable) { - if ((*gref->writeable)(gref)) - rc = 1; - else { - gref->sevents |= POLLOUT; - selrecord(proc, &gref->si, wql); - } - } else - rc = 1; - } - } - - return rc; -} - -int _ATkqfilter( - __unused struct fileproc *fp, - __unused struct knote *kn, - __unused vfs_context_t ctx) -{ - return (EOPNOTSUPP); -} - -void atalk_putnext(gref, m) - gref_t *gref; - gbuf_t *m; -{ - - - /* *** potential leak? *** */ - gbuf_next(m) = 0; - - switch (gbuf_type(m)) { - case MSG_IOCACK: - case MSG_IOCNAK: - if (gref->ichead) - gbuf_next(gref->ichead) = m; - else { - gref->ichead = m; - if (gref->sevents & POLLPRI) { -#ifdef APPLETALK_DEBUG - kprintf("wakeup gref = 0x%x\n", (unsigned)gref); -#endif - wakeup(&gref->iocevent); - } - } - break; - case MSG_ERROR: - /* *** this processing was moved to atalk_notify *** */ - panic("atalk_putnext receved MSG_ERROR"); - break; - default: - if (gref->errno) - gbuf_freem(m); - else - if (gref->rdhead) { - gbuf_next(gref->rdtail) = m; - gref->rdtail = m; - } else { - gref->rdhead = m; - if (gref->sevents & POLLMSG) { - gref->sevents &= ~POLLMSG; - wakeup(&gref->event); - } - if (gref->sevents & POLLIN) { - gref->sevents &= ~POLLIN; - selwakeup(&gref->si); - } - gref->rdtail = m; - } - } /* switch gbuf_type(m) */ - -} /* atalk_putnext */ - -void atalk_enablew(gref) - gref_t *gref; -{ - if (gref->sevents & POLLSYNC) - wakeup(&gref->event); -} - -void atalk_flush(gref) - gref_t *gref; -{ - - if (gref->rdhead) { - gbuf_freel(gref->rdhead); - gref->rdhead = 0; - } - if (gref->ichead) { - gbuf_freel(gref->ichead); - gref->ichead = 0; - } -} - -/* - * Notify an appletalk user of an asynchronous error; - * just wake up so that he can collect error status. - */ -void atalk_notify(gref, errno) - register gref_t *gref; - int errno; -{ - - if (gref->atpcb_socket) { - /* For DDP -- - This section is patterned after udp_notify() in - netinet/udp_usrreq.c - */ - gref->atpcb_socket->so_error = errno; - sorwakeup(gref->atpcb_socket); - sowwakeup(gref->atpcb_socket); - } else { - /* for ATP, ASP, and ADSP */ - if (gref->errno == 0) { - gref->errno = errno; - /* clear out data waiting to be read */ - if (gref->rdhead) { - gbuf_freel(gref->rdhead); - gref->rdhead = 0; - } - /* blocked read */ - if (gref->sevents & POLLMSG) { - gref->sevents &= ~POLLMSG; - wakeup(&gref->event); - } - /* select */ - if (gref->sevents & POLLIN) { - gref->sevents &= ~POLLIN; - selwakeup(&gref->si); - } - } - } -} /* atalk_notify */ - -void atalk_notify_sel(gref) - gref_t *gref; -{ - - if (gref->sevents & POLLIN) { - gref->sevents &= ~POLLIN; - selwakeup(&gref->si); - } -} - -int atalk_peek(gref, event) - gref_t *gref; - unsigned char *event; -{ - int rc; - - if (gref->rdhead) { - *event = *gbuf_rptr(gref->rdhead); - rc = 0; - } else - rc = -1; - - return rc; -} - -#if 0 -static gbuf_t *trace_msg; - -void atalk_settrace(char * str, p1, p2, p3, p4, p5) -{ - int len; - gbuf_t *m, *nextm; - char trace_buf[256]; - - sprintf(trace_buf, str, p1, p2, p3, p4, p5); - len = strlen(trace_buf); -#ifdef APPLETALK_DEBUG - kprintf("atalk_settrace: gbufalloc size=%d\n", len+1); -#endif - if ((m = gbuf_alloc(len+1, PRI_MED)) == 0) - return; - gbuf_wset(m,len); - strcpy(gbuf_rptr(m), trace_buf); - if (trace_msg) { - for (nextm=trace_msg; gbuf_cont(nextm); nextm=gbuf_cont(nextm)) ; - gbuf_cont(nextm) = m; - } else - trace_msg = m; -} - -void atalk_gettrace(m) - gbuf_t *m; -{ - if (trace_msg) { - gbuf_cont(m) = trace_msg; - trace_msg = 0; - } -} -#endif /* 0 */ - -#define GREF_PER_BLK 32 -static gref_t *gref_free_list = 0; -extern gbuf_t *atp_resource_m; - -int gref_alloc(grefp) - gref_t **grefp; -{ - int i; - gbuf_t *m; - gref_t *gref, *gref_array; - - *grefp = (gref_t *)NULL; - - if (gref_free_list == 0) { -#ifdef APPLETALK_DEBUG - kprintf("gref_alloc: gbufalloc size=%d\n", GREF_PER_BLK*sizeof(gref_t)); -#endif - if ((m = gbuf_alloc(GREF_PER_BLK*sizeof(gref_t),PRI_HI)) == 0) - return ENOBUFS; - bzero(gbuf_rptr(m), GREF_PER_BLK*sizeof(gref_t)); - gref_array = (gref_t *)gbuf_rptr(m); - for (i=0; i < GREF_PER_BLK-1; i++) - gref_array[i].atpcb_next = (gref_t *)&gref_array[i+1]; - gbuf_cont(m) = atp_resource_m; - atp_resource_m = m; - gref_array[i].atpcb_next = gref_free_list; - gref_free_list = (gref_t *)&gref_array[0]; - } - - gref = gref_free_list; - gref_free_list = gref->atpcb_next; - ATEVENTINIT(gref->event); - ATEVENTINIT(gref->iocevent); - - /* *** just for now *** */ - gref->atpcb_socket = (struct socket *)NULL; - - *grefp = gref; - return 0; -} /* gref_alloc */ - -/* bms: make gref_close callable from kernel */ -int gref_close(gref_t *gref) -{ - int rc; - - switch (gref->proto) { - - /* ATPROTO_DDP and ATPROTO_LAP have been replaced with - BSD-style socket interface. */ - - case ATPROTO_ATP: - rc = atp_close(gref, 1); break; - case ATPROTO_ASP: - rc = asp_close(gref); break; -#ifdef AURP_SUPPORT - case ATPROTO_AURP: - rc = aurp_close(gref); break; - break; -#endif - case ATPROTO_ADSP: - rc = adsp_close(gref); break; - default: - rc = 0; - break; - } - - if (rc == 0) { - atalk_flush(gref); - selthreadclear(&gref->si); - - /* from original gref_free() */ - bzero((char *)gref, sizeof(gref_t)); - gref->atpcb_next = gref_free_list; - gref_free_list = gref; - } - - return rc; -} - -/* - temp fix for bug 2731148 - until this code is re-written to use standard clusters - Deletes any free clusters on the free list. -*/ -void atp_delete_free_clusters(__unused void *junk) -{ - caddr_t cluster; - caddr_t cluster_list; - - /* check for free clusters on the free_cluster_list to be deleted */ - - untimeout(&atp_delete_free_clusters, NULL); - - lck_mtx_lock(atalk_cluster_lock); - - atp_free_cluster_timeout_set = 0; - - cluster_list = atp_free_cluster_list; - atp_free_cluster_list = NULL; - - lck_mtx_unlock(atalk_cluster_lock); - - while ((cluster = cluster_list)) - { - cluster_list = *((caddr_t*)cluster); - FREE(cluster, M_MCLUST); - } -} - - -/* - Used as the "free" routine for over-size clusters allocated using - m_lgbuf_alloc(). -*/ - -void m_lgbuf_free(caddr_t, u_int, caddr_t); - -void m_lgbuf_free( - caddr_t buf, - __unused u_int size, - __unused caddr_t arg) /* not needed, but they're in m_free() */ -{ - int t; - - /* move to free_cluster_list to be deleted later */ - caddr_t cluster = (caddr_t)buf; - - lck_mtx_lock(atalk_cluster_lock); - - *((caddr_t*)cluster) = atp_free_cluster_list; - atp_free_cluster_list = cluster; - - if ((t = atp_free_cluster_timeout_set) == 0) - atp_free_cluster_timeout_set = 1; - - lck_mtx_unlock(atalk_cluster_lock); - - if (t == 0) - timeout(&atp_delete_free_clusters, NULL, (1 * HZ)); -} - -/* - Used to allocate an mbuf when there is the possibility that it may - need to be larger than the size of a standard cluster. -*/ - -struct mbuf *m_lgbuf_alloc(size, wait) - int size, wait; -{ - struct mbuf *m; - - if (atp_free_cluster_list) - atp_delete_free_clusters(NULL); /* delete any free clusters on the free list */ - - /* Radar 5398094 - * check that the passed size is within admissible boundaries - * The max data size being ASP of 4576 (8 * ATP_DATA_SIZE), - * allow for extra space for control data - */ - - if (size < 0 || size > (ATP_DATA_SIZE * 10)) - return(NULL); - - /* If size is too large, allocate a cluster, otherwise, use the - standard mbuf allocation routines.*/ - if (size > MCLBYTES) { - void *buf; - if (NULL == - (buf = (void *)_MALLOC(size, M_MCLUST, - (wait)? M_WAITOK: M_NOWAIT))) { - return(NULL); - } - if (NULL == - (m = m_clattach(NULL, MSG_DATA, buf, m_lgbuf_free, size, 0, - (wait)? M_WAIT: M_DONTWAIT))) { - m_lgbuf_free(buf, 0, 0); - return(NULL); - } - } else { - m = m_gethdr(((wait)? M_WAIT: M_DONTWAIT), MSG_DATA); - if (m && ((size_t)size > MHLEN)) { - MCLGET(m, ((wait)? M_WAIT: M_DONTWAIT)); - if (!(m->m_flags & M_EXT)) { - (void)m_free(m); - return(NULL); - } - } - } - - return(m); -} /* m_lgbuf_alloc */ - -/* - gbuf_alloc() is a wrapper for m_lgbuf_alloc(), which is used to - allocate an mbuf when there is the possibility that it may need - to be larger than the size of a standard cluster. - - gbuf_alloc() sets the mbuf lengths, unlike the standard mbuf routines. -*/ - -gbuf_t *gbuf_alloc_wait(size, wait) - int size, wait; -{ - gbuf_t *m = (gbuf_t *)m_lgbuf_alloc(size, wait); - - /* Standard mbuf allocation routines assume that the caller - will set the size. */ - if (m) { - m->m_pkthdr.len = size; - m->m_len = size; - } - - return(m); -} - -int gbuf_msgsize(m) - gbuf_t *m; -{ - int size; - - for (size=0; m; m=gbuf_cont(m)) - size += gbuf_len(m); - return size; -} - -int append_copy(m1, m2, wait) - struct mbuf *m1, *m2; - int wait; -{ - if ((!(m1->m_flags & M_EXT)) && (!(m2->m_flags & M_EXT)) && - (m_trailingspace(m1) >= m2->m_len)) { - /* splat the data from one into the other */ - bcopy(mtod(m2, caddr_t), mtod(m1, caddr_t) + m1->m_len, - (u_int)m2->m_len); - m1->m_len += m2->m_len; - if (m1->m_flags & M_PKTHDR) - m1->m_pkthdr.len += m2->m_len; - return 1; - } - if ((m1->m_next = m_copym(m2, 0, m2->m_len, - (wait)? M_WAIT: M_DONTWAIT)) == NULL) - return 0; - return 1; -} /* append_copy */ - -/* - Copy an mbuf chain, referencing existing external storage, if any. - Leave space for a header in the new chain, if the space has been - left in the origin chain. -*/ -struct mbuf *copy_pkt(mlist, pad) - struct mbuf *mlist; /* the mbuf chain to be copied */ - int pad; /* hint as to how long the header might be - If pad is < 0, leave the same amount of space - as there was in the original. */ -{ - struct mbuf *new_m; - int len; - - if (pad < 0) - len = m_leadingspace(mlist); - else - len = min(pad, m_leadingspace(mlist)); - - /* preserve space for the header at the beginning of the mbuf */ - if (len) { - mlist->m_data -= (len); - mlist->m_len += (len); - if (mlist->m_flags & M_PKTHDR) - mlist->m_pkthdr.len += (len); - new_m = m_copym(mlist, 0, M_COPYALL, M_DONTWAIT); - m_adj(mlist, len); - m_adj(new_m, len); - } else - new_m = m_copym(mlist, 0, M_COPYALL, M_DONTWAIT); - - return(new_m); -} - -void gbuf_linkb(m1, m2) - gbuf_t *m1; - gbuf_t *m2; -{ - while (gbuf_cont(m1) != 0) - m1 = gbuf_cont(m1); - gbuf_cont(m1) = m2; -} - -void gbuf_linkpkt(m1, m2) - gbuf_t *m1; - gbuf_t *m2; -{ - while (gbuf_next(m1) != 0) - m1 = gbuf_next(m1); - gbuf_next(m1) = m2; -} - -int gbuf_freel(m) - gbuf_t *m; -{ - gbuf_t *tmp_m; - - while ((tmp_m = m) != 0) { - m = gbuf_next(m); - gbuf_next(tmp_m) = 0; - gbuf_freem(tmp_m); - } - return (0); -} - -/* free empty mbufs at the front of the chain */ -gbuf_t *gbuf_strip(m) - gbuf_t *m; -{ - gbuf_t *tmp_m; - - while (m && gbuf_len(m) == 0) { - tmp_m = m; - m = gbuf_cont(m); - gbuf_freeb(tmp_m); - } - return(m); -} - -/**************************************/ - -int ddp_adjmsg(m, len) - gbuf_t *m; - int len; -{ - int buf_len; - gbuf_t *curr_m, *prev_m; - - if (m == (gbuf_t *)0) - return 0; - - if (len > 0) { - for (curr_m=m; curr_m;) { - buf_len = gbuf_len(curr_m); - if (len < buf_len) { - gbuf_rinc(curr_m,len); - return 1; - } - len -= buf_len; - gbuf_rinc(curr_m,buf_len); - if ((curr_m = gbuf_cont(curr_m)) == 0) { - gbuf_freem(m); - return 0; - } - } - - } else if (len < 0) { - len = -len; -l_cont: prev_m = 0; - for (curr_m=m; gbuf_cont(curr_m); - prev_m=curr_m, curr_m=gbuf_cont(curr_m)) ; - buf_len = gbuf_len(curr_m); - if (len < buf_len) { - gbuf_wdec(curr_m,len); - return 1; - } - if (prev_m == 0) - return 0; - gbuf_cont(prev_m) = 0; - gbuf_freeb(curr_m); - len -= buf_len; - goto l_cont; - - } - - return 1; -} - -/* - * The message chain, m is grown in size by len contiguous bytes. - * If len is non-negative, len bytes are added to the - * end of the gbuf_t chain. If len is negative, the - * bytes are added to the front. ddp_growmsg only adds bytes to - * message blocks of the same type. - * It returns a pointer to the new gbuf_t on sucess, 0 on failure. - */ - -gbuf_t *ddp_growmsg(mp, len) - gbuf_t *mp; - int len; -{ - gbuf_t *m, *d; - - if ((m = mp) == (gbuf_t *) 0) - return ((gbuf_t *) 0); - - if (len <= 0) { - len = -len; - if ((d = gbuf_alloc(len, PRI_MED)) == 0) - return ((gbuf_t *) 0); - gbuf_set_type(d, gbuf_type(m)); - gbuf_wset(d,len); - /* link in new gbuf_t */ - gbuf_cont(d) = m; - return (d); - - } else { - register int count; - /* - * Add to tail. - */ - if ((count = gbuf_msgsize(m)) < 0) - return ((gbuf_t *) 0); - /* find end of chain */ - for ( ; m; m = gbuf_cont(m)) { - if (gbuf_len(m) >= count) - break; - count -= gbuf_len(m); - } - /* m now points to gbuf_t to add to */ - if ((d = gbuf_alloc(len, PRI_MED)) == 0) - return ((gbuf_t *) 0); - gbuf_set_type(d, gbuf_type(m)); - /* link in new gbuf_t */ - gbuf_cont(d) = gbuf_cont(m); - gbuf_cont(m) = d; - gbuf_wset(d,len); - return (d); - } -} - -/* - * return the MSG_IOCACK/MSG_IOCNAK. Note that the same message - * block is used as the vehicle, and that if there is an error return, - * then linked blocks are lopped off. BEWARE of multiple references. - * Used by other appletalk modules, so it is not static! - */ - -void ioc_ack(errno, m, gref) - int errno; - register gbuf_t *m; - register gref_t *gref; -{ - ioc_t *iocbp = (ioc_t *)gbuf_rptr(m); - - /*kprintf("ioc_ack: m=%x gref=%x errno=%d\n", m, gref, errno);*/ - if ((iocbp->ioc_error = errno) != 0) - { /* errno != 0, then there is an error, get rid of linked blocks! */ - - if (gbuf_cont(m)) { - gbuf_freem(gbuf_cont(m)); - gbuf_cont(m) = 0; - } - gbuf_set_type(m, MSG_IOCNAK); - iocbp->ioc_count = 0; /* only make zero length if error */ - iocbp->ioc_rval = -1; - } else - gbuf_set_type(m, MSG_IOCACK); - - atalk_putnext(gref, m); -} - - -static void ioccmd_t_32_to_64( ioccmd_t *from_p, user_ioccmd_t *to_p ) -{ - to_p->ic_cmd = from_p->ic_cmd; - to_p->ic_timout = from_p->ic_timout; - to_p->ic_len = from_p->ic_len; - to_p->ic_dp = CAST_USER_ADDR_T(from_p->ic_dp); -} - - -static void ioccmd_t_64_to_32( user_ioccmd_t *from_p, ioccmd_t *to_p ) -{ - to_p->ic_cmd = from_p->ic_cmd; - to_p->ic_timout = from_p->ic_timout; - to_p->ic_len = from_p->ic_len; - to_p->ic_dp = CAST_DOWN(caddr_t, from_p->ic_dp); -} diff --git a/bsd/netat/sysglue.h b/bsd/netat/sysglue.h deleted file mode 100644 index 10d74f988..000000000 --- a/bsd/netat/sysglue.h +++ /dev/null @@ -1,222 +0,0 @@ -/* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Title: sysglue.h - AppleTalk protocol to Unix System V/streams interface - * - * Facility: AppleTalk Protocol Execution Environment - * - * Author: Gregory Burns, Creation Date: Jun-3-1988 - * - * History: - * X01-001 Gregory Burns 3-Jun-1988 - * Initial Creation. - * - */ - -#ifndef _NETAT_SYSGLUE_H_ -#define _NETAT_SYSGLUE_H_ -#include -#include - -#ifdef __APPLE_API_OBSOLETE - -/* - The following is originally from netat/h/localglue.h, which was - included in netat/h/sysglue.h: -*/ - -typedef struct { - int ic_cmd; - int ic_timout; - int ic_len; - char *ic_dp; -} ioccmd_t; - -#ifdef KERNEL -#ifdef KERNEL_PRIVATE - -/* LP64 version of ioccmd_t. all pointers - * grow when we're dealing with a 64-bit process. - * WARNING - keep in sync with ioccmd_t - */ -typedef struct { - int ic_cmd; - int ic_timout; - int ic_len; - user_addr_t ic_dp __attribute__((aligned(8))); -} user_ioccmd_t; - -#endif // KERNEL_PRIVATE -#endif // KERNEL - -typedef struct { - int ioc_cmd; - void *ioc_cr; - int ioc_id; - int ioc_count; - int ioc_error; - int ioc_rval; - void *ioc_private; - int ioc_filler[4]; -} ioc_t; - -/* - * Want these definitions outside the KERNEL define for admin - * program access. - */ -#ifdef _AIX -#define MSG_DATA 0x00 -#define MSG_PROTO 0x01 -#define MSG_IOCTL 0x0e -#define MSG_ERROR 0x8a -#define MSG_HANGUP 0x89 -#define MSG_IOCACK 0x81 -#define MSG_IOCNAK 0x82 -#define MSG_CTL 0x0d -#else -/* ### LD 5/3/97 MacOSX porting note: - * Cannot use MSG_DATA = 0, because MT_FREE is defined as 0 - * and the sanity check in m_free cause a panic. - */ - -#define MSG_DATA (MT_MAX - 1) -#define MSG_PROTO (MT_MAX - 2) -#define MSG_IOCTL (MT_MAX - 3) -#define MSG_ERROR (MT_MAX - 4) -#define MSG_HANGUP (MT_MAX - 5) -#define MSG_IOCACK (MT_MAX - 6) -#define MSG_IOCNAK (MT_MAX - 7) -#define MSG_CTL (MT_MAX - 8) -#endif - -#ifdef KERNEL -#ifdef KERNEL_PRIVATE - -#define SYS_HZ HZ /* Number of clock (SYS_SETTIMER) ticks per second */ -#define HZ hz /* HZ ticks definition used throughout AppleTalk */ - -/* returned when the operation is not possible at this - * time (ie when starting up or shutting down. - * right now, uses ESHUTDOWN because ENOTREADY is not defined - * in MacOSX. Need to find a better Error code ###LD - */ -#define ENOTREADY ESHUTDOWN - -/* T_MPSAFE is used only in atp_open. I suspect it's a - * trick to accelerate local atp transactions. - */ -#define T_MPSAFE 0 - -#define INTERRUPTIBLE 1 -#define POLLIN 0x0001 -#define POLLOUT 0x0002 -#define POLLPRI 0x0004 -#define POLLMSG 0x0080 -#define POLLSYNC 0x8000 -#define POLLMSG 0x0080 - -/* - * Define a new Data Type for file. it was DTYPE_OTHER for - * AIX, for MacOSX there is no such define so defines - * DTYPE_ATALK - */ - -#define DTYPE_ATALK -1 - -#define AT_WR_OFFSET 38 -#ifndef EVENT_NULL -#define EVENT_NULL -1 -#define LOCK_HANDLER 2 -#endif -typedef int atevent_t; - -typedef int atlock_t; -#define ATEVENTINIT(a) (a = (atevent_t) EVENT_NULL) -#define DDP_OUTPUT(m) ddp_putmsg(0,m) -#define StaticProc static - -#define PRI_LO 1 -#define PRI_MED 2 -#define PRI_HI 3 - -typedef struct mbuf gbuf_t; - -/* prototypes for the gbuf routines */ - -struct mbuf *m_lgbuf_alloc(int size, int wait); -gbuf_t *gbuf_alloc_wait(int size, int wait); -gbuf_t *gbuf_copym(gbuf_t *mlist); -gbuf_t *gbuf_strip(gbuf_t *m); -int gbuf_freel(gbuf_t *m); -void gbuf_linkb(gbuf_t *m1, gbuf_t *m2); -void gbuf_linkpkt(gbuf_t *m1, gbuf_t *m2); -int gbuf_msgsize(gbuf_t *m); -struct mbuf *copy_pkt(struct mbuf *, int); -int append_copy(struct mbuf *, struct mbuf *, int); - -#define gbuf_cont(m) m->m_next -#define gbuf_next(m) m->m_nextpkt -#define gbuf_rptr(m) m->m_data -#define gbuf_rinc(m,len) {m->m_data += len; m->m_len -= len;} -#define gbuf_rdec(m,len) {m->m_data -= len; m->m_len += len;} -#define gbuf_wptr(m) (m->m_data + m->m_len) -#define gbuf_winc(m,len) (m->m_len += len) -#define gbuf_wdec(m,len) (m->m_len -= len) -#define gbuf_wset(m,len) (m->m_len = len) -#define gbuf_type(m) m->m_type -#define gbuf_len(m) m->m_len - -#define gbuf_alloc(size, pri) (gbuf_alloc_wait(size, FALSE)) -#define gbuf_copym(mlist) ((gbuf_t *)copy_pkt(mlist, -1)) - -#define gbuf_prepend(m,len) M_PREPEND(m,len,M_DONTWAIT) -#define gbuf_freem(mlist) m_freem((struct mbuf *)mlist) -#define gbuf_freeb(m) (void)m_free((struct mbuf *)m) -#define gbuf_set_type(m, mtype) MCHTYPE(m, mtype) - -/* Duplicate a single mbuf, attaching existing external storage. */ -#define gbuf_dupb_wait(m, wait) ((gbuf_t *)m_copym(m, 0, gbuf_len(m), (wait)? M_WAIT: M_DONTWAIT)) -#define gbuf_dupb(m) (gbuf_dupb_wait(m, FALSE)) -/* Duplicate an mbuf chain, attaching existing external storage. */ -#define gbuf_dupm(mlist) ((gbuf_t *)copy_pkt(mlist, -1)) - /* *** was ((gbuf_t *)m_copym(mlist, 0, M_COPYALL, M_DONTWAIT)) *** */ - -#undef timeoutcf -#undef timeout -#undef untimeout - -struct atpBDS; -int _ATPgetrsp(int, struct atpBDS *, int *, void *); -int _ATPgetreq(int , unsigned char *, int , int *, void *); -int _ATPsndrsp(int , unsigned char *, int , int, int *, void *); -int _ATPsndreq(int , unsigned char *, int , int, int *, void *); - -#endif /* KERNEL_PRIVATE */ -#endif /* KERNEL */ -#endif /* __APPLE_API_OBSOLETE */ -#endif /* _NETAT_SYSGLUE_H_ */ diff --git a/bsd/netat/zip.h b/bsd/netat/zip.h deleted file mode 100644 index 175280c07..000000000 --- a/bsd/netat/zip.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * - * ORIGINS: 82 - * - * (C) COPYRIGHT Apple Computer, Inc. 1992-1996 - * All Rights Reserved - * - */ - -#ifndef _NETAT_ZIP_H_ -#define _NETAT_ZIP_H_ -#include - -#ifdef __APPLE_API_OBSOLETE - -/* Definitions for ZIP, per AppleTalk Zone Information Protocol - * documentation from `Inside AppleTalk', July 14, 1986. - */ - -/* ZIP packet types */ - -#define ZIP_QUERY 1 /* ZIP zone query packet */ -#define ZIP_REPLY 2 /* ZIP query reply packet */ -#define ZIP_TAKEDOWN 3 /* ZIP takedown packet */ -#define ZIP_BRINGUP 4 /* ZIP bringup packet */ -#define ZIP_GETNETINFO 5 /* ZIP DDP get net info packet */ -#define ZIP_NETINFO_REPLY 6 /* ZIP GetNetInfo Reply */ -#define ZIP_NOTIFY 7 /* Notification of zone name change */ -#define ZIP_EXTENDED_REPLY 8 /* ZIP extended query reply packet */ - -#define ZIP_GETMYZONE 7 /* ZIP ATP get my zone packet */ -#define ZIP_GETZONELIST 8 /* ZIP ATP get zone list packet */ -#define ZIP_GETLOCALZONES 9 /* ZIP ATP get cable list packet*/ - -#define ZIP_HDR_SIZE 2 -#define ZIP_DATA_SIZE 584 - - -#define ZIP_MAX_ZONE_LENGTH 32 /* Max length for a Zone Name */ - -typedef struct at_zip { - u_char command; - u_char flags; - char data[ZIP_DATA_SIZE]; -} at_zip_t; - -#define ZIP_ZIP(c) ((at_zip_t *)(&((at_ddp_t *)(c))->data[0])) - -typedef struct { - char command; - char flags; - at_net cable_range_start; - at_net cable_range_end; - u_char data[1]; -} at_x_zip_t; - -#define ZIP_X_HDR_SIZE 6 - -/* flags for ZipNetInfoReply packet */ -#define ZIP_ZONENAME_INVALID 0x80 -#define ZIP_USE_BROADCAST 0x40 -#define ZIP_ONE_ZONE 0x20 - -#define ZIP_NETINFO_RETRIES 3 -#define ZIP_TIMER_INT HZ /* HZ defined in param.h */ - -/* ZIP control codes */ -#define ZIP_ONLINE 1 -#define ZIP_LATE_ROUTER 2 -#define ZIP_NO_ROUTER 3 - -#define ZIP_RE_AARP -1 - -#endif /* __APPLE_API_OBSOLETE */ -#endif /* _NETAT_ZIP_H_ */ diff --git a/bsd/netinet/Makefile b/bsd/netinet/Makefile index 3a98cd8d6..dcd60c164 100644 --- a/bsd/netinet/Makefile +++ b/bsd/netinet/Makefile @@ -7,14 +7,6 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - DATAFILES = \ bootp.h icmp6.h if_ether.h icmp_var.h \ igmp.h igmp_var.h in.h in_pcb.h \ @@ -31,10 +23,15 @@ PRIVATE_DATAFILES = \ ip_flowid.h \ ip_fw.h ip_fw2.h \ tcp_debug.h \ - in_gif.h ip_compat.h + in_gif.h ip_compat.h \ + flow_divert_proto.h + +ifeq ($(PLATFORM),iPhoneOS) +PRIVATE_DATAFILES += mptcp_var.h +endif PRIVATE_KERNELFILES = ${KERNELFILES} \ - ip_ecn.h ip_encap.h + ip_ecn.h ip_encap.h INSTALL_MI_LIST = ${DATAFILES} diff --git a/bsd/netinet/cpu_in_cksum.c b/bsd/netinet/cpu_in_cksum.c new file mode 100644 index 000000000..a579371a4 --- /dev/null +++ b/bsd/netinet/cpu_in_cksum.c @@ -0,0 +1,396 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/*- + * Copyright (c) 2008 Joerg Sonnenberger . + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +int cpu_in_cksum(struct mbuf *, int, int, uint32_t); + +#define PREDICT_FALSE(_exp) __builtin_expect((_exp), 0) + +/* + * Checksum routine for Internet Protocol family headers (Portable Version). + * + * This routine is very heavily used in the network + * code and should be modified for each CPU to be as fast as possible. + * + * A discussion of different implementation techniques can be found in + * RFC 1071. + * + * The default implementation for 32-bit architectures is using + * a 32-bit accumulator and operating on 16-bit operands. + * + * The default implementation for 64-bit architectures is using + * a 64-bit accumulator and operating on 32-bit operands. + * + * Both versions are unrolled to handle 32 Byte / 64 Byte fragments as core + * of the inner loop. After each iteration of the inner loop, a partial + * reduction is done to avoid carry in long packets. + */ + +#if ULONG_MAX == 0xffffffffUL +/* 32-bit version */ +int +cpu_in_cksum(struct mbuf *m, int len, int off, uint32_t initial_sum) +{ + int mlen; + uint32_t sum, partial; + unsigned int final_acc; + uint8_t *data; + boolean_t needs_swap, started_on_odd; + + VERIFY(len >= 0); + VERIFY(off >= 0); + + needs_swap = FALSE; + started_on_odd = FALSE; + sum = (initial_sum >> 16) + (initial_sum & 0xffff); + + for (;;) { + if (PREDICT_FALSE(m == NULL)) { + printf("%s: out of data\n", __func__); + return (-1); + } + mlen = m->m_len; + if (mlen > off) { + mlen -= off; + data = mtod(m, uint8_t *) + off; + goto post_initial_offset; + } + off -= mlen; + if (len == 0) + break; + m = m->m_next; + } + + for (; len > 0; m = m->m_next) { + if (PREDICT_FALSE(m == NULL)) { + printf("%s: out of data\n", __func__); + return (-1); + } + mlen = m->m_len; + data = mtod(m, uint8_t *); +post_initial_offset: + if (mlen == 0) + continue; + if (mlen > len) + mlen = len; + len -= mlen; + + partial = 0; + if ((uintptr_t)data & 1) { + /* Align on word boundary */ + started_on_odd = !started_on_odd; +#if BYTE_ORDER == LITTLE_ENDIAN + partial = *data << 8; +#else + partial = *data; +#endif + ++data; + --mlen; + } + needs_swap = started_on_odd; + while (mlen >= 32) { + __builtin_prefetch(data + 32); + partial += *(uint16_t *)(void *)data; + partial += *(uint16_t *)(void *)(data + 2); + partial += *(uint16_t *)(void *)(data + 4); + partial += *(uint16_t *)(void *)(data + 6); + partial += *(uint16_t *)(void *)(data + 8); + partial += *(uint16_t *)(void *)(data + 10); + partial += *(uint16_t *)(void *)(data + 12); + partial += *(uint16_t *)(void *)(data + 14); + partial += *(uint16_t *)(void *)(data + 16); + partial += *(uint16_t *)(void *)(data + 18); + partial += *(uint16_t *)(void *)(data + 20); + partial += *(uint16_t *)(void *)(data + 22); + partial += *(uint16_t *)(void *)(data + 24); + partial += *(uint16_t *)(void *)(data + 26); + partial += *(uint16_t *)(void *)(data + 28); + partial += *(uint16_t *)(void *)(data + 30); + data += 32; + mlen -= 32; + if (PREDICT_FALSE(partial & 0xc0000000)) { + if (needs_swap) + partial = (partial << 8) + + (partial >> 24); + sum += (partial >> 16); + sum += (partial & 0xffff); + partial = 0; + } + } + if (mlen & 16) { + partial += *(uint16_t *)(void *)data; + partial += *(uint16_t *)(void *)(data + 2); + partial += *(uint16_t *)(void *)(data + 4); + partial += *(uint16_t *)(void *)(data + 6); + partial += *(uint16_t *)(void *)(data + 8); + partial += *(uint16_t *)(void *)(data + 10); + partial += *(uint16_t *)(void *)(data + 12); + partial += *(uint16_t *)(void *)(data + 14); + data += 16; + mlen -= 16; + } + /* + * mlen is not updated below as the remaining tests + * are using bit masks, which are not affected. + */ + if (mlen & 8) { + partial += *(uint16_t *)(void *)data; + partial += *(uint16_t *)(void *)(data + 2); + partial += *(uint16_t *)(void *)(data + 4); + partial += *(uint16_t *)(void *)(data + 6); + data += 8; + } + if (mlen & 4) { + partial += *(uint16_t *)(void *)data; + partial += *(uint16_t *)(void *)(data + 2); + data += 4; + } + if (mlen & 2) { + partial += *(uint16_t *)(void *)data; + data += 2; + } + if (mlen & 1) { +#if BYTE_ORDER == LITTLE_ENDIAN + partial += *data; +#else + partial += *data << 8; +#endif + started_on_odd = !started_on_odd; + } + + if (needs_swap) + partial = (partial << 8) + (partial >> 24); + sum += (partial >> 16) + (partial & 0xffff); + /* + * Reduce sum to allow potential byte swap + * in the next iteration without carry. + */ + sum = (sum >> 16) + (sum & 0xffff); + } + final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff); + final_acc = (final_acc >> 16) + (final_acc & 0xffff); + return (~final_acc & 0xffff); +} + +#else +/* 64-bit version */ +int +cpu_in_cksum(struct mbuf *m, int len, int off, uint32_t initial_sum) +{ + int mlen; + uint64_t sum, partial; + unsigned int final_acc; + uint8_t *data; + boolean_t needs_swap, started_on_odd; + + VERIFY(len >= 0); + VERIFY(off >= 0); + + needs_swap = FALSE; + started_on_odd = FALSE; + sum = initial_sum; + + for (;;) { + if (PREDICT_FALSE(m == NULL)) { + printf("%s: out of data\n", __func__); + return (-1); + } + mlen = m->m_len; + if (mlen > off) { + mlen -= off; + data = mtod(m, uint8_t *) + off; + goto post_initial_offset; + } + off -= mlen; + if (len == 0) + break; + m = m->m_next; + } + + for (; len > 0; m = m->m_next) { + if (PREDICT_FALSE(m == NULL)) { + printf("%s: out of data\n", __func__); + return (-1); + } + mlen = m->m_len; + data = mtod(m, uint8_t *); +post_initial_offset: + if (mlen == 0) + continue; + if (mlen > len) + mlen = len; + len -= mlen; + + partial = 0; + if ((uintptr_t)data & 1) { + /* Align on word boundary */ + started_on_odd = !started_on_odd; +#if BYTE_ORDER == LITTLE_ENDIAN + partial = *data << 8; +#else + partial = *data; +#endif + ++data; + --mlen; + } + needs_swap = started_on_odd; + if ((uintptr_t)data & 2) { + if (mlen < 2) + goto trailing_bytes; + partial += *(uint16_t *)(void *)data; + data += 2; + mlen -= 2; + } + while (mlen >= 64) { + __builtin_prefetch(data + 32); + __builtin_prefetch(data + 64); + partial += *(uint32_t *)(void *)data; + partial += *(uint32_t *)(void *)(data + 4); + partial += *(uint32_t *)(void *)(data + 8); + partial += *(uint32_t *)(void *)(data + 12); + partial += *(uint32_t *)(void *)(data + 16); + partial += *(uint32_t *)(void *)(data + 20); + partial += *(uint32_t *)(void *)(data + 24); + partial += *(uint32_t *)(void *)(data + 28); + partial += *(uint32_t *)(void *)(data + 32); + partial += *(uint32_t *)(void *)(data + 36); + partial += *(uint32_t *)(void *)(data + 40); + partial += *(uint32_t *)(void *)(data + 44); + partial += *(uint32_t *)(void *)(data + 48); + partial += *(uint32_t *)(void *)(data + 52); + partial += *(uint32_t *)(void *)(data + 56); + partial += *(uint32_t *)(void *)(data + 60); + data += 64; + mlen -= 64; + if (PREDICT_FALSE(partial & (3ULL << 62))) { + if (needs_swap) + partial = (partial << 8) + + (partial >> 56); + sum += (partial >> 32); + sum += (partial & 0xffffffff); + partial = 0; + } + } + /* + * mlen is not updated below as the remaining tests + * are using bit masks, which are not affected. + */ + if (mlen & 32) { + partial += *(uint32_t *)(void *)data; + partial += *(uint32_t *)(void *)(data + 4); + partial += *(uint32_t *)(void *)(data + 8); + partial += *(uint32_t *)(void *)(data + 12); + partial += *(uint32_t *)(void *)(data + 16); + partial += *(uint32_t *)(void *)(data + 20); + partial += *(uint32_t *)(void *)(data + 24); + partial += *(uint32_t *)(void *)(data + 28); + data += 32; + } + if (mlen & 16) { + partial += *(uint32_t *)(void *)data; + partial += *(uint32_t *)(void *)(data + 4); + partial += *(uint32_t *)(void *)(data + 8); + partial += *(uint32_t *)(void *)(data + 12); + data += 16; + } + if (mlen & 8) { + partial += *(uint32_t *)(void *)data; + partial += *(uint32_t *)(void *)(data + 4); + data += 8; + } + if (mlen & 4) { + partial += *(uint32_t *)(void *)data; + data += 4; + } + if (mlen & 2) { + partial += *(uint16_t *)(void *)data; + data += 2; + } +trailing_bytes: + if (mlen & 1) { +#if BYTE_ORDER == LITTLE_ENDIAN + partial += *data; +#else + partial += *data << 8; +#endif + started_on_odd = !started_on_odd; + } + + if (needs_swap) + partial = (partial << 8) + (partial >> 56); + sum += (partial >> 32) + (partial & 0xffffffff); + /* + * Reduce sum to allow potential byte swap + * in the next iteration without carry. + */ + sum = (sum >> 32) + (sum & 0xffffffff); + } + final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) + + ((sum >> 16) & 0xffff) + (sum & 0xffff); + final_acc = (final_acc >> 16) + (final_acc & 0xffff); + final_acc = (final_acc >> 16) + (final_acc & 0xffff); + return (~final_acc & 0xffff); +} +#endif /* ULONG_MAX != 0xffffffffUL */ diff --git a/bsd/netinet/dhcp_options.h b/bsd/netinet/dhcp_options.h index 674416198..14e7eda8a 100644 --- a/bsd/netinet/dhcp_options.h +++ b/bsd/netinet/dhcp_options.h @@ -181,7 +181,7 @@ typedef struct { typedef ptrlist_t dhcpol_t; -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE void dhcpol_init(dhcpol_t * list); void dhcpol_free(dhcpol_t * list); @@ -274,5 +274,5 @@ dhcpoa_freespace(dhcpoa_t * oa_p); int dhcpoa_size(dhcpoa_t * oa_p); -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET_DHCP_OPTIONS_H */ diff --git a/bsd/netinet/flow_divert.c b/bsd/netinet/flow_divert.c new file mode 100644 index 000000000..8fbe88a80 --- /dev/null +++ b/bsd/netinet/flow_divert.c @@ -0,0 +1,3266 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if INET6 +#include +#endif /* INET6 */ +#include +#include +#include + +#define FLOW_DIVERT_CONNECT_STARTED 0x00000001 +#define FLOW_DIVERT_READ_CLOSED 0x00000002 +#define FLOW_DIVERT_WRITE_CLOSED 0x00000004 +#define FLOW_DIVERT_TUNNEL_RD_CLOSED 0x00000008 +#define FLOW_DIVERT_TUNNEL_WR_CLOSED 0x00000010 +#define FLOW_DIVERT_TRANSFERRED 0x00000020 + +#define FDLOG(level, pcb, format, ...) do { \ + if (level <= (pcb)->log_level) { \ + log((level > LOG_NOTICE ? LOG_NOTICE : level), "%s (%u): " format "\n", __FUNCTION__, (pcb)->hash, __VA_ARGS__); \ + } \ +} while (0) + +#define FDLOG0(level, pcb, msg) do { \ + if (level <= (pcb)->log_level) { \ + log((level > LOG_NOTICE ? LOG_NOTICE : level), "%s (%u): %s\n", __FUNCTION__, (pcb)->hash, msg); \ + } \ +} while (0) + +#define FDRETAIN(pcb) if ((pcb) != NULL) OSIncrementAtomic(&(pcb)->ref_count) +#define FDRELEASE(pcb) \ + do { \ + if ((pcb) != NULL && 1 == OSDecrementAtomic(&(pcb)->ref_count)) { \ + flow_divert_pcb_destroy(pcb); \ + } \ + } while (0) + +#define FDLOCK(pcb) lck_mtx_lock(&(pcb)->mtx) +#define FDUNLOCK(pcb) lck_mtx_unlock(&(pcb)->mtx) + +#define FD_CTL_SENDBUFF_SIZE (2 * FLOW_DIVERT_CHUNK_SIZE) +#define FD_CTL_RCVBUFF_SIZE (128 * 1024) + +#define GROUP_BIT_CTL_ENQUEUE_BLOCKED 0 + +#define GROUP_COUNT_MAX 32 +#define FLOW_DIVERT_MAX_NAME_SIZE 4096 +#define FLOW_DIVERT_MAX_KEY_SIZE 1024 + +#define DNS_SERVICE_GROUP_UNIT (GROUP_COUNT_MAX + 1) + +struct flow_divert_trie_node +{ + uint16_t start; + uint16_t length; + uint16_t child_map; + uint32_t group_unit; +}; + +struct flow_divert_trie +{ + struct flow_divert_trie_node *nodes; + uint16_t *child_maps; + uint8_t *bytes; + void *memory; + size_t nodes_count; + size_t child_maps_count; + size_t bytes_count; + size_t nodes_free_next; + size_t child_maps_free_next; + size_t bytes_free_next; + uint16_t root; +}; + +#define CHILD_MAP_SIZE 256 +#define NULL_TRIE_IDX 0xffff +#define TRIE_NODE(t, i) ((t)->nodes[(i)]) +#define TRIE_CHILD(t, i, b) (((t)->child_maps + (CHILD_MAP_SIZE * TRIE_NODE(t, i).child_map))[(b)]) +#define TRIE_BYTE(t, i) ((t)->bytes[(i)]) + +static struct flow_divert_pcb nil_pcb; + +decl_lck_rw_data(static, g_flow_divert_group_lck); +static struct flow_divert_group **g_flow_divert_groups = NULL; +static uint32_t g_active_group_count = 0; +static struct flow_divert_trie g_signing_id_trie; + +static lck_grp_attr_t *flow_divert_grp_attr = NULL; +static lck_attr_t *flow_divert_mtx_attr = NULL; +static lck_grp_t *flow_divert_mtx_grp = NULL; +static errno_t g_init_result = 0; + +static kern_ctl_ref g_flow_divert_kctl_ref = NULL; + +static struct protosw g_flow_divert_in_protosw; +static struct pr_usrreqs g_flow_divert_in_usrreqs; +#if INET6 +static struct ip6protosw g_flow_divert_in6_protosw; +static struct pr_usrreqs g_flow_divert_in6_usrreqs; +#endif /* INET6 */ + +static struct protosw *g_tcp_protosw = NULL; +static struct ip6protosw *g_tcp6_protosw = NULL; + +static inline int +flow_divert_pcb_cmp(const struct flow_divert_pcb *pcb_a, const struct flow_divert_pcb *pcb_b) +{ + return memcmp(&pcb_a->hash, &pcb_b->hash, sizeof(pcb_a->hash)); +} + +RB_PROTOTYPE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp); +RB_GENERATE(fd_pcb_tree, flow_divert_pcb, rb_link, flow_divert_pcb_cmp); + +static const char * +flow_divert_packet_type2str(uint8_t packet_type) +{ + switch (packet_type) { + case FLOW_DIVERT_PKT_CONNECT: + return "connect"; + case FLOW_DIVERT_PKT_CONNECT_RESULT: + return "connect result"; + case FLOW_DIVERT_PKT_DATA: + return "data"; + case FLOW_DIVERT_PKT_CLOSE: + return "close"; + case FLOW_DIVERT_PKT_READ_NOTIFY: + return "read notification"; + case FLOW_DIVERT_PKT_PROPERTIES_UPDATE: + return "properties update"; + case FLOW_DIVERT_PKT_APP_MAP_UPDATE: + return "app map update"; + case FLOW_DIVERT_PKT_APP_MAP_CREATE: + return "app map create"; + default: + return "unknown"; + } +} + +static struct flow_divert_pcb * +flow_divert_pcb_lookup(uint32_t hash, struct flow_divert_group *group) +{ + struct flow_divert_pcb key_item; + struct flow_divert_pcb *fd_cb = NULL; + + key_item.hash = hash; + + lck_rw_lock_shared(&group->lck); + fd_cb = RB_FIND(fd_pcb_tree, &group->pcb_tree, &key_item); + FDRETAIN(fd_cb); + lck_rw_done(&group->lck); + + return fd_cb; +} + +static errno_t +flow_divert_pcb_insert(struct flow_divert_pcb *fd_cb, uint32_t ctl_unit) +{ + int error = 0; + struct flow_divert_pcb *exist = NULL; + struct flow_divert_group *group; + static uint32_t g_nextkey = 1; + static uint32_t g_hash_seed = 0; + errno_t result = 0; + int try_count = 0; + + if (ctl_unit == 0 || ctl_unit >= GROUP_COUNT_MAX) { + return EINVAL; + } + + socket_unlock(fd_cb->so, 0); + lck_rw_lock_shared(&g_flow_divert_group_lck); + + if (g_flow_divert_groups == NULL || g_active_group_count == 0) { + FDLOG0(LOG_ERR, &nil_pcb, "No active groups, flow divert cannot be used for this socket"); + error = ENETUNREACH; + goto done; + } + + group = g_flow_divert_groups[ctl_unit]; + if (group == NULL) { + FDLOG(LOG_ERR, &nil_pcb, "Group for control unit %u is NULL, flow divert cannot be used for this socket", ctl_unit); + error = ENETUNREACH; + goto done; + } + + socket_lock(fd_cb->so, 0); + + do { + uint32_t key[2]; + uint32_t idx; + + key[0] = g_nextkey++; + key[1] = RandomULong(); + + if (g_hash_seed == 0) { + g_hash_seed = RandomULong(); + } + + fd_cb->hash = net_flowhash(key, sizeof(key), g_hash_seed); + + for (idx = 1; idx < GROUP_COUNT_MAX; idx++) { + struct flow_divert_group *curr_group = g_flow_divert_groups[idx]; + if (curr_group != NULL && curr_group != group) { + lck_rw_lock_shared(&curr_group->lck); + exist = RB_FIND(fd_pcb_tree, &curr_group->pcb_tree, fd_cb); + lck_rw_done(&curr_group->lck); + if (exist != NULL) { + break; + } + } + } + + if (exist == NULL) { + lck_rw_lock_exclusive(&group->lck); + exist = RB_INSERT(fd_pcb_tree, &group->pcb_tree, fd_cb); + lck_rw_done(&group->lck); + } + } while (exist != NULL && try_count++ < 3); + + if (exist == NULL) { + fd_cb->group = group; + FDRETAIN(fd_cb); /* The group now has a reference */ + } else { + fd_cb->hash = 0; + result = EEXIST; + } + + socket_unlock(fd_cb->so, 0); + +done: + lck_rw_done(&g_flow_divert_group_lck); + socket_lock(fd_cb->so, 0); + + return result; +} + +static struct flow_divert_pcb * +flow_divert_pcb_create(socket_t so) +{ + struct flow_divert_pcb *new_pcb = NULL; + + MALLOC_ZONE(new_pcb, struct flow_divert_pcb *, sizeof(*new_pcb), M_FLOW_DIVERT_PCB, M_WAITOK); + if (new_pcb == NULL) { + FDLOG0(LOG_ERR, &nil_pcb, "failed to allocate a pcb"); + return NULL; + } + + memset(new_pcb, 0, sizeof(*new_pcb)); + + lck_mtx_init(&new_pcb->mtx, flow_divert_mtx_grp, flow_divert_mtx_attr); + new_pcb->so = so; + new_pcb->log_level = nil_pcb.log_level; + + FDRETAIN(new_pcb); /* Represents the socket's reference */ + + return new_pcb; +} + +static void +flow_divert_pcb_destroy(struct flow_divert_pcb *fd_cb) +{ + FDLOG(LOG_INFO, fd_cb, "Destroying, app tx %u, app rx %u, tunnel tx %u, tunnel rx %u", + fd_cb->bytes_written_by_app, fd_cb->bytes_read_by_app, fd_cb->bytes_sent, fd_cb->bytes_received); + + if (fd_cb->local_address != NULL) { + FREE(fd_cb->local_address, M_SONAME); + } + if (fd_cb->remote_address != NULL) { + FREE(fd_cb->remote_address, M_SONAME); + } + if (fd_cb->connect_token != NULL) { + mbuf_freem(fd_cb->connect_token); + } + FREE_ZONE(fd_cb, sizeof(*fd_cb), M_FLOW_DIVERT_PCB); +} + +static void +flow_divert_pcb_remove(struct flow_divert_pcb *fd_cb) +{ + if (fd_cb->group != NULL) { + struct flow_divert_group *group = fd_cb->group; + lck_rw_lock_exclusive(&group->lck); + FDLOG(LOG_INFO, fd_cb, "Removing from group %d, ref count = %d", group->ctl_unit, fd_cb->ref_count); + RB_REMOVE(fd_pcb_tree, &group->pcb_tree, fd_cb); + fd_cb->group = NULL; + FDRELEASE(fd_cb); /* Release the group's reference */ + lck_rw_done(&group->lck); + } +} + +static int +flow_divert_packet_init(struct flow_divert_pcb *fd_cb, uint8_t packet_type, mbuf_t *packet) +{ + struct flow_divert_packet_header hdr; + int error = 0; + + error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, packet); + if (error) { + FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error); + return error; + } + + hdr.packet_type = packet_type; + hdr.conn_id = htonl(fd_cb->hash); + + /* Lay down the header */ + error = mbuf_copyback(*packet, 0, sizeof(hdr), &hdr, MBUF_DONTWAIT); + if (error) { + FDLOG(LOG_ERR, fd_cb, "mbuf_copyback(hdr) failed: %d", error); + mbuf_freem(*packet); + *packet = NULL; + return error; + } + + return 0; +} + +static int +flow_divert_packet_append_tlv(mbuf_t packet, uint8_t type, size_t length, const void *value) +{ + size_t net_length = htonl(length); + int error = 0; + + error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(type), &type, MBUF_DONTWAIT); + if (error) { + FDLOG(LOG_ERR, &nil_pcb, "failed to append the type (%d)", type); + return error; + } + + error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), sizeof(net_length), &net_length, MBUF_DONTWAIT); + if (error) { + FDLOG(LOG_ERR, &nil_pcb, "failed to append the length (%lu)", length); + return error; + } + + error = mbuf_copyback(packet, mbuf_pkthdr_len(packet), length, value, MBUF_DONTWAIT); + if (error) { + FDLOG0(LOG_ERR, &nil_pcb, "failed to append the value"); + return error; + } + + return error; +} + +static int +flow_divert_packet_find_tlv(mbuf_t packet, int offset, uint8_t type, int *err, int next) +{ + size_t cursor = offset; + int error = 0; + size_t curr_length; + uint8_t curr_type; + + *err = 0; + + do { + if (!next) { + error = mbuf_copydata(packet, cursor, sizeof(curr_type), &curr_type); + if (error) { + *err = ENOENT; + return -1; + } + } else { + next = 0; + curr_type = FLOW_DIVERT_TLV_NIL; + } + + if (curr_type != type) { + cursor += sizeof(curr_type); + error = mbuf_copydata(packet, cursor, sizeof(curr_length), &curr_length); + if (error) { + *err = error; + return -1; + } + + cursor += (sizeof(curr_length) + ntohl(curr_length)); + } + } while (curr_type != type); + + return cursor; +} + +static int +flow_divert_packet_get_tlv(mbuf_t packet, int offset, uint8_t type, size_t buff_len, void *buff, size_t *val_size) +{ + int error = 0; + size_t length; + int tlv_offset; + + tlv_offset = flow_divert_packet_find_tlv(packet, offset, type, &error, 0); + if (tlv_offset < 0) { + return error; + } + + error = mbuf_copydata(packet, tlv_offset + sizeof(type), sizeof(length), &length); + if (error) { + return error; + } + + length = ntohl(length); + + if (val_size != NULL) { + *val_size = length; + } + + if (buff != NULL && buff_len > 0) { + size_t to_copy = (length < buff_len) ? length : buff_len; + error = mbuf_copydata(packet, tlv_offset + sizeof(type) + sizeof(length), to_copy, buff); + if (error) { + return error; + } + } + + return 0; +} + +static int +flow_divert_packet_compute_hmac(mbuf_t packet, struct flow_divert_group *group, uint8_t *hmac) +{ + mbuf_t curr_mbuf = packet; + + if (g_crypto_funcs == NULL || group->token_key == NULL) { + return ENOPROTOOPT; + } + + cchmac_di_decl(g_crypto_funcs->ccsha1_di, hmac_ctx); + g_crypto_funcs->cchmac_init_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, group->token_key_size, group->token_key); + + while (curr_mbuf != NULL) { + g_crypto_funcs->cchmac_update_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, mbuf_len(curr_mbuf), mbuf_data(curr_mbuf)); + curr_mbuf = mbuf_next(curr_mbuf); + } + + g_crypto_funcs->cchmac_final_fn(g_crypto_funcs->ccsha1_di, hmac_ctx, hmac); + + return 0; +} + +static int +flow_divert_packet_verify_hmac(mbuf_t packet, uint32_t ctl_unit) +{ + int error = 0; + struct flow_divert_group *group = NULL; + int hmac_offset; + uint8_t packet_hmac[SHA_DIGEST_LENGTH]; + uint8_t computed_hmac[SHA_DIGEST_LENGTH]; + mbuf_t tail; + + lck_rw_lock_shared(&g_flow_divert_group_lck); + + if (g_flow_divert_groups != NULL && g_active_group_count > 0) { + group = g_flow_divert_groups[ctl_unit]; + } + + if (group == NULL) { + lck_rw_done(&g_flow_divert_group_lck); + return ENOPROTOOPT; + } + + lck_rw_lock_shared(&group->lck); + + if (group->token_key == NULL) { + error = ENOPROTOOPT; + goto done; + } + + hmac_offset = flow_divert_packet_find_tlv(packet, 0, FLOW_DIVERT_TLV_HMAC, &error, 0); + if (hmac_offset < 0) { + goto done; + } + + error = flow_divert_packet_get_tlv(packet, hmac_offset, FLOW_DIVERT_TLV_HMAC, sizeof(packet_hmac), packet_hmac, NULL); + if (error) { + goto done; + } + + /* Chop off the HMAC TLV */ + error = mbuf_split(packet, hmac_offset, MBUF_WAITOK, &tail); + if (error) { + goto done; + } + + mbuf_free(tail); + + error = flow_divert_packet_compute_hmac(packet, group, computed_hmac); + if (error) { + goto done; + } + + if (memcmp(packet_hmac, computed_hmac, sizeof(packet_hmac))) { + FDLOG0(LOG_WARNING, &nil_pcb, "HMAC in token does not match computed HMAC"); + error = EINVAL; + goto done; + } + +done: + lck_rw_done(&group->lck); + lck_rw_done(&g_flow_divert_group_lck); + return error; +} + +static void +flow_divert_add_data_statistics(struct flow_divert_pcb *fd_cb, int data_len, Boolean send) +{ + struct inpcb *inp = NULL; + struct ifnet *ifp = NULL; + Boolean cell = FALSE; + Boolean wifi = FALSE; + + inp = sotoinpcb(fd_cb->so); + if (inp == NULL) { + return; + } + + ifp = inp->inp_last_outifp; + if (ifp != NULL) { + cell = IFNET_IS_CELLULAR(ifp); + wifi = (!cell && IFNET_IS_WIFI(ifp)); + } + + if (send) { + INP_ADD_STAT(inp, cell, wifi, txpackets, 1); + INP_ADD_STAT(inp, cell, wifi, txbytes, data_len); + } else { + INP_ADD_STAT(inp, cell, wifi, rxpackets, 1); + INP_ADD_STAT(inp, cell, wifi, rxbytes, data_len); + } +} + +static errno_t +flow_divert_check_no_cellular(struct flow_divert_pcb *fd_cb) +{ + struct inpcb *inp = NULL; + struct ifnet *ifp = NULL; + + inp = sotoinpcb(fd_cb->so); + if ((inp != NULL) && (inp->inp_flags & INP_NO_IFT_CELLULAR)) { + ifp = inp->inp_last_outifp; + if (ifp != NULL) { + if (IFNET_IS_CELLULAR(ifp)) { + return EHOSTUNREACH; + } + } + } + + return 0; +} + +static void +flow_divert_update_closed_state(struct flow_divert_pcb *fd_cb, int how, Boolean tunnel) +{ + if (how != SHUT_RD) { + fd_cb->flags |= FLOW_DIVERT_WRITE_CLOSED; + if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) { + fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED; + /* If the tunnel is not accepting writes any more, then flush the send buffer */ + sbflush(&fd_cb->so->so_snd); + } + } + if (how != SHUT_WR) { + fd_cb->flags |= FLOW_DIVERT_READ_CLOSED; + if (tunnel || !(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) { + fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED; + } + } +} + +static uint16_t +trie_node_alloc(struct flow_divert_trie *trie) +{ + if (trie->nodes_free_next < trie->nodes_count) { + uint16_t node_idx = trie->nodes_free_next++; + TRIE_NODE(trie, node_idx).child_map = NULL_TRIE_IDX; + return node_idx; + } else { + return NULL_TRIE_IDX; + } +} + +static uint16_t +trie_child_map_alloc(struct flow_divert_trie *trie) +{ + if (trie->child_maps_free_next < trie->child_maps_count) { + return trie->child_maps_free_next++; + } else { + return NULL_TRIE_IDX; + } +} + +static uint16_t +trie_bytes_move(struct flow_divert_trie *trie, uint16_t bytes_idx, size_t bytes_size) +{ + uint16_t start = trie->bytes_free_next; + if (start + bytes_size <= trie->bytes_count) { + if (start != bytes_idx) { + memmove(&TRIE_BYTE(trie, start), &TRIE_BYTE(trie, bytes_idx), bytes_size); + } + trie->bytes_free_next += bytes_size; + return start; + } else { + return NULL_TRIE_IDX; + } +} + +static uint16_t +flow_divert_trie_insert(struct flow_divert_trie *trie, uint16_t string_start, size_t string_len) +{ + uint16_t current = trie->root; + uint16_t child = trie->root; + uint16_t string_end = string_start + string_len; + uint16_t string_idx = string_start; + uint16_t string_remainder = string_len; + + while (child != NULL_TRIE_IDX) { + uint16_t parent = current; + uint16_t node_idx; + uint16_t current_end; + + current = child; + child = NULL_TRIE_IDX; + + current_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length; + + for (node_idx = TRIE_NODE(trie, current).start; + node_idx < current_end && + string_idx < string_end && + TRIE_BYTE(trie, node_idx) == TRIE_BYTE(trie, string_idx); + node_idx++, string_idx++); + + string_remainder = string_end - string_idx; + + if (node_idx < (TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length)) { + /* + * We did not reach the end of the current node's string. + * We need to split the current node into two: + * 1. A new node that contains the prefix of the node that matches + * the prefix of the string being inserted. + * 2. The current node modified to point to the remainder + * of the current node's string. + */ + uint16_t prefix = trie_node_alloc(trie); + if (prefix == NULL_TRIE_IDX) { + FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while splitting an existing node"); + return NULL_TRIE_IDX; + } + + /* + * Prefix points to the portion of the current nodes's string that has matched + * the input string thus far. + */ + TRIE_NODE(trie, prefix).start = TRIE_NODE(trie, current).start; + TRIE_NODE(trie, prefix).length = (node_idx - TRIE_NODE(trie, current).start); + + /* + * Prefix has the current node as the child corresponding to the first byte + * after the split. + */ + TRIE_NODE(trie, prefix).child_map = trie_child_map_alloc(trie); + if (TRIE_NODE(trie, prefix).child_map == NULL_TRIE_IDX) { + FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while splitting an existing node"); + return NULL_TRIE_IDX; + } + TRIE_CHILD(trie, prefix, TRIE_BYTE(trie, node_idx)) = current; + + /* Parent has the prefix as the child correspoding to the first byte in the prefix */ + TRIE_CHILD(trie, parent, TRIE_BYTE(trie, TRIE_NODE(trie, prefix).start)) = prefix; + + /* Current node is adjusted to point to the remainder */ + TRIE_NODE(trie, current).start = node_idx; + TRIE_NODE(trie, current).length -= TRIE_NODE(trie, prefix).length; + + /* We want to insert the new leaf (if any) as a child of the prefix */ + current = prefix; + } + + if (string_remainder > 0) { + /* + * We still have bytes in the string that have not been matched yet. + * If the current node has children, iterate to the child corresponding + * to the next byte in the string. + */ + if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) { + child = TRIE_CHILD(trie, current, TRIE_BYTE(trie, string_idx)); + } + } + } /* while (child != NULL_TRIE_IDX) */ + + if (string_remainder > 0) { + /* Add a new leaf containing the remainder of the string */ + uint16_t leaf = trie_node_alloc(trie); + if (leaf == NULL_TRIE_IDX) { + FDLOG0(LOG_ERR, &nil_pcb, "Ran out of trie nodes while inserting a new leaf"); + return NULL_TRIE_IDX; + } + + TRIE_NODE(trie, leaf).start = trie_bytes_move(trie, string_idx, string_remainder); + if (TRIE_NODE(trie, leaf).start == NULL_TRIE_IDX) { + FDLOG0(LOG_ERR, &nil_pcb, "Ran out of bytes while inserting a new leaf"); + return NULL_TRIE_IDX; + } + TRIE_NODE(trie, leaf).length = string_remainder; + + /* Set the new leaf as the child of the current node */ + if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) { + TRIE_NODE(trie, current).child_map = trie_child_map_alloc(trie); + if (TRIE_NODE(trie, current).child_map == NULL_TRIE_IDX) { + FDLOG0(LOG_ERR, &nil_pcb, "Ran out of child maps while inserting a new leaf"); + return NULL_TRIE_IDX; + } + } + TRIE_CHILD(trie, current, TRIE_BYTE(trie, TRIE_NODE(trie, leaf).start)) = leaf; + current = leaf; + } /* else duplicate or this string is a prefix of one of the existing strings */ + + return current; +} + +static uint16_t +flow_divert_trie_search(struct flow_divert_trie *trie, const uint8_t *string_bytes) +{ + uint16_t current = trie->root; + uint16_t string_idx = 0; + + while (current != NULL_TRIE_IDX) { + uint16_t next = NULL_TRIE_IDX; + uint16_t node_end = TRIE_NODE(trie, current).start + TRIE_NODE(trie, current).length; + uint16_t node_idx; + + for (node_idx = TRIE_NODE(trie, current).start; + node_idx < node_end && string_bytes[string_idx] != '\0' && string_bytes[string_idx] == TRIE_BYTE(trie, node_idx); + node_idx++, string_idx++); + + if (node_idx == node_end) { + if (string_bytes[string_idx] == '\0') { + return current; /* Got an exact match */ + } else if (TRIE_NODE(trie, current).child_map != NULL_TRIE_IDX) { + next = TRIE_CHILD(trie, current, string_bytes[string_idx]); + } + } + current = next; + } + + return NULL_TRIE_IDX; +} + +static int +flow_divert_get_src_proc(struct socket *so, proc_t *proc, boolean_t match_delegate) +{ + int release = 0; + + if (!match_delegate && + (so->so_flags & SOF_DELEGATED) && + (*proc == PROC_NULL || (*proc)->p_pid != so->e_pid)) + { + *proc = proc_find(so->e_pid); + release = 1; + } else if (*proc == PROC_NULL) { + *proc = current_proc(); + } + + if (*proc != PROC_NULL) { + if ((*proc)->p_pid == 0) { + if (release) { + proc_rele(*proc); + } + release = 0; + *proc = PROC_NULL; + } + } + + return release; +} + +static int +flow_divert_send_packet(struct flow_divert_pcb *fd_cb, mbuf_t packet, Boolean enqueue) +{ + int error; + + if (fd_cb->group == NULL) { + fd_cb->so->so_error = ECONNABORTED; + soisdisconnected(fd_cb->so); + return ECONNABORTED; + } + + lck_rw_lock_shared(&fd_cb->group->lck); + + if (MBUFQ_EMPTY(&fd_cb->group->send_queue)) { + error = ctl_enqueuembuf(g_flow_divert_kctl_ref, fd_cb->group->ctl_unit, packet, CTL_DATA_EOR); + } else { + error = ENOBUFS; + } + + if (error == ENOBUFS) { + if (enqueue) { + if (!lck_rw_lock_shared_to_exclusive(&fd_cb->group->lck)) { + lck_rw_lock_exclusive(&fd_cb->group->lck); + } + MBUFQ_ENQUEUE(&fd_cb->group->send_queue, packet); + error = 0; + } + OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &fd_cb->group->atomic_bits); + } + + lck_rw_done(&fd_cb->group->lck); + + return error; +} + +static int +flow_divert_send_connect(struct flow_divert_pcb *fd_cb, struct sockaddr *to, proc_t p) +{ + mbuf_t connect_packet = NULL; + int error = 0; + + error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT, &connect_packet); + if (error) { + goto done; + } + + error = flow_divert_packet_append_tlv(connect_packet, + FLOW_DIVERT_TLV_TRAFFIC_CLASS, + sizeof(fd_cb->so->so_traffic_class), + &fd_cb->so->so_traffic_class); + if (error) { + goto done; + } + + if (fd_cb->so->so_flags & SOF_DELEGATED) { + error = flow_divert_packet_append_tlv(connect_packet, + FLOW_DIVERT_TLV_PID, + sizeof(fd_cb->so->e_pid), + &fd_cb->so->e_pid); + if (error) { + goto done; + } + + error = flow_divert_packet_append_tlv(connect_packet, + FLOW_DIVERT_TLV_UUID, + sizeof(fd_cb->so->e_uuid), + &fd_cb->so->e_uuid); + if (error) { + goto done; + } + } else { + error = flow_divert_packet_append_tlv(connect_packet, + FLOW_DIVERT_TLV_PID, + sizeof(fd_cb->so->e_pid), + &fd_cb->so->last_pid); + if (error) { + goto done; + } + + error = flow_divert_packet_append_tlv(connect_packet, + FLOW_DIVERT_TLV_UUID, + sizeof(fd_cb->so->e_uuid), + &fd_cb->so->last_uuid); + if (error) { + goto done; + } + } + + if (fd_cb->connect_token != NULL) { + unsigned int token_len = m_length(fd_cb->connect_token); + mbuf_concatenate(connect_packet, fd_cb->connect_token); + mbuf_pkthdr_adjustlen(connect_packet, token_len); + fd_cb->connect_token = NULL; + } else { + uint32_t ctl_unit = htonl(fd_cb->control_group_unit); + int port; + int release_proc; + + error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit); + if (error) { + goto done; + } + + error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_ADDRESS, to->sa_len, to); + if (error) { + goto done; + } + + if (to->sa_family == AF_INET) { + port = ntohs((satosin(to))->sin_port); + } +#if INET6 + else { + port = ntohs((satosin6(to))->sin6_port); + } +#endif + + error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_TARGET_PORT, sizeof(port), &port); + if (error) { + goto done; + } + + release_proc = flow_divert_get_src_proc(fd_cb->so, &p, FALSE); + if (p != PROC_NULL) { + proc_lock(p); + if (p->p_csflags & CS_VALID) { + const char *signing_id = cs_identity_get(p); + if (signing_id != NULL) { + error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_SIGNING_ID, strlen(signing_id), signing_id); + } + + if (error == 0) { + unsigned char cdhash[SHA1_RESULTLEN]; + error = proc_getcdhash(p, cdhash); + if (error == 0) { + error = flow_divert_packet_append_tlv(connect_packet, FLOW_DIVERT_TLV_CDHASH, sizeof(cdhash), cdhash); + } + } + } + proc_unlock(p); + + if (release_proc) { + proc_rele(p); + } + } + } + + error = flow_divert_send_packet(fd_cb, connect_packet, TRUE); + if (error) { + goto done; + } + +done: + if (error && connect_packet != NULL) { + mbuf_free(connect_packet); + } + + return error; +} + +static int +flow_divert_send_connect_result(struct flow_divert_pcb *fd_cb) +{ + int error = 0; + mbuf_t packet = NULL; + int rbuff_space = 0; + + error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CONNECT_RESULT, &packet); + if (error) { + FDLOG(LOG_ERR, fd_cb, "failed to create a connect result packet: %d", error); + goto done; + } + + rbuff_space = sbspace(&fd_cb->so->so_rcv); + if (rbuff_space < 0) { + rbuff_space = 0; + } + rbuff_space = htonl(rbuff_space); + error = flow_divert_packet_append_tlv(packet, + FLOW_DIVERT_TLV_SPACE_AVAILABLE, + sizeof(rbuff_space), + &rbuff_space); + if (error) { + goto done; + } + + error = flow_divert_send_packet(fd_cb, packet, TRUE); + if (error) { + goto done; + } + +done: + if (error && packet != NULL) { + mbuf_free(packet); + } + + return error; +} + +static int +flow_divert_send_close(struct flow_divert_pcb *fd_cb, int how) +{ + int error = 0; + mbuf_t packet = NULL; + uint32_t zero = 0; + + error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_CLOSE, &packet); + if (error) { + FDLOG(LOG_ERR, fd_cb, "failed to create a close packet: %d", error); + goto done; + } + + error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(zero), &zero); + if (error) { + FDLOG(LOG_ERR, fd_cb, "failed to add the error code TLV: %d", error); + goto done; + } + + how = htonl(how); + error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_HOW, sizeof(how), &how); + if (error) { + FDLOG(LOG_ERR, fd_cb, "failed to add the how flag: %d", error); + goto done; + } + + error = flow_divert_send_packet(fd_cb, packet, TRUE); + if (error) { + goto done; + } + +done: + if (error && packet != NULL) { + mbuf_free(packet); + } + + return error; +} + +static int +flow_divert_tunnel_how_closed(struct flow_divert_pcb *fd_cb) +{ + if ((fd_cb->flags & (FLOW_DIVERT_TUNNEL_RD_CLOSED|FLOW_DIVERT_TUNNEL_WR_CLOSED)) == + (FLOW_DIVERT_TUNNEL_RD_CLOSED|FLOW_DIVERT_TUNNEL_WR_CLOSED)) + { + return SHUT_RDWR; + } else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_RD_CLOSED) { + return SHUT_RD; + } else if (fd_cb->flags & FLOW_DIVERT_TUNNEL_WR_CLOSED) { + return SHUT_WR; + } + + return -1; +} + +/* + * Determine what close messages if any need to be sent to the tunnel. Returns TRUE if the tunnel is closed for both reads and + * writes. Returns FALSE otherwise. + */ +static void +flow_divert_send_close_if_needed(struct flow_divert_pcb *fd_cb) +{ + int how = -1; + + /* Do not send any close messages if there is still data in the send buffer */ + if (fd_cb->so->so_snd.sb_cc == 0) { + if ((fd_cb->flags & (FLOW_DIVERT_READ_CLOSED|FLOW_DIVERT_TUNNEL_RD_CLOSED)) == FLOW_DIVERT_READ_CLOSED) { + /* Socket closed reads, but tunnel did not. Tell tunnel to close reads */ + how = SHUT_RD; + } + if ((fd_cb->flags & (FLOW_DIVERT_WRITE_CLOSED|FLOW_DIVERT_TUNNEL_WR_CLOSED)) == FLOW_DIVERT_WRITE_CLOSED) { + /* Socket closed writes, but tunnel did not. Tell tunnel to close writes */ + if (how == SHUT_RD) { + how = SHUT_RDWR; + } else { + how = SHUT_WR; + } + } + } + + if (how != -1) { + FDLOG(LOG_INFO, fd_cb, "sending close, how = %d", how); + if (flow_divert_send_close(fd_cb, how) != ENOBUFS) { + /* Successfully sent the close packet. Record the ways in which the tunnel has been closed */ + if (how != SHUT_RD) { + fd_cb->flags |= FLOW_DIVERT_TUNNEL_WR_CLOSED; + } + if (how != SHUT_WR) { + fd_cb->flags |= FLOW_DIVERT_TUNNEL_RD_CLOSED; + } + } + } + + if (flow_divert_tunnel_how_closed(fd_cb) == SHUT_RDWR) { + soisdisconnected(fd_cb->so); + } +} + +static errno_t +flow_divert_send_data_packet(struct flow_divert_pcb *fd_cb, mbuf_t data, size_t data_len, Boolean force) +{ + mbuf_t packet; + mbuf_t last; + int error = 0; + + error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_DATA, &packet); + if (error) { + FDLOG(LOG_ERR, fd_cb, "flow_divert_packet_init failed: %d", error); + return error; + } + + last = m_last(packet); + mbuf_setnext(last, data); + mbuf_pkthdr_adjustlen(packet, data_len); + + error = flow_divert_send_packet(fd_cb, packet, force); + + if (error) { + mbuf_setnext(last, NULL); + mbuf_free(packet); + } else { + fd_cb->bytes_sent += data_len; + flow_divert_add_data_statistics(fd_cb, data_len, TRUE); + } + + return error; +} + +static void +flow_divert_send_buffered_data(struct flow_divert_pcb *fd_cb, Boolean force) +{ + size_t to_send; + size_t sent = 0; + int error = 0; + mbuf_t buffer; + + to_send = fd_cb->so->so_snd.sb_cc; + buffer = fd_cb->so->so_snd.sb_mb; + + if (buffer == NULL && to_send > 0) { + FDLOG(LOG_ERR, fd_cb, "Send buffer is NULL, but size is supposed to be %lu", to_send); + return; + } + + /* Ignore the send window if force is enabled */ + if (!force && (to_send > fd_cb->send_window)) { + to_send = fd_cb->send_window; + } + + while (sent < to_send) { + mbuf_t data; + size_t data_len; + + data_len = to_send - sent; + if (data_len > FLOW_DIVERT_CHUNK_SIZE) { + data_len = FLOW_DIVERT_CHUNK_SIZE; + } + + error = mbuf_copym(buffer, sent, data_len, MBUF_DONTWAIT, &data); + if (error) { + FDLOG(LOG_ERR, fd_cb, "mbuf_copym failed: %d", error); + break; + } + + error = flow_divert_send_data_packet(fd_cb, data, data_len, force); + if (error) { + mbuf_free(data); + break; + } + + sent += data_len; + } + + if (sent > 0) { + FDLOG(LOG_DEBUG, fd_cb, "sent %lu bytes of buffered data", sent); + if (fd_cb->send_window >= sent) { + fd_cb->send_window -= sent; + } else { + fd_cb->send_window = 0; + } + sbdrop(&fd_cb->so->so_snd, sent); + sowwakeup(fd_cb->so); + } +} + +static int +flow_divert_send_app_data(struct flow_divert_pcb *fd_cb, mbuf_t data) +{ + size_t to_send = mbuf_pkthdr_len(data); + size_t sent = 0; + int error = 0; + mbuf_t remaining_data = data; + mbuf_t pkt_data = NULL; + + if (to_send > fd_cb->send_window) { + to_send = fd_cb->send_window; + } + + if (fd_cb->so->so_snd.sb_cc > 0) { + to_send = 0; /* If the send buffer is non-empty, then we can't send anything */ + } + + while (sent < to_send) { + size_t pkt_data_len; + + pkt_data = remaining_data; + + if ((to_send - sent) > FLOW_DIVERT_CHUNK_SIZE) { + pkt_data_len = FLOW_DIVERT_CHUNK_SIZE; + error = mbuf_split(pkt_data, pkt_data_len, MBUF_DONTWAIT, &remaining_data); + if (error) { + FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error); + pkt_data = NULL; + break; + } + } else { + pkt_data_len = to_send - sent; + remaining_data = NULL; + } + + error = flow_divert_send_data_packet(fd_cb, pkt_data, pkt_data_len, FALSE); + + if (error) { + break; + } + + pkt_data = NULL; + sent += pkt_data_len; + } + + fd_cb->send_window -= sent; + + error = 0; + + if (pkt_data != NULL) { + if (sbspace(&fd_cb->so->so_snd) > 0) { + if (!sbappendstream(&fd_cb->so->so_snd, pkt_data)) { + FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with pkt_data, send buffer size = %u, send_window = %u\n", + fd_cb->so->so_snd.sb_cc, fd_cb->send_window); + } + } else { + error = ENOBUFS; + } + } + + if (remaining_data != NULL) { + if (sbspace(&fd_cb->so->so_snd) > 0) { + if (!sbappendstream(&fd_cb->so->so_snd, remaining_data)) { + FDLOG(LOG_ERR, fd_cb, "sbappendstream failed with remaining_data, send buffer size = %u, send_window = %u\n", + fd_cb->so->so_snd.sb_cc, fd_cb->send_window); + } + } else { + error = ENOBUFS; + } + } + + return error; +} + +static int +flow_divert_send_read_notification(struct flow_divert_pcb *fd_cb, uint32_t read_count) +{ + int error = 0; + mbuf_t packet = NULL; + uint32_t net_read_count = htonl(read_count); + + error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_READ_NOTIFY, &packet); + if (error) { + FDLOG(LOG_ERR, fd_cb, "failed to create a read notification packet: %d", error); + goto done; + } + + error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_READ_COUNT, sizeof(net_read_count), &net_read_count); + if (error) { + FDLOG(LOG_ERR, fd_cb, "failed to add the read count: %d", error); + goto done; + } + + error = flow_divert_send_packet(fd_cb, packet, TRUE); + if (error) { + goto done; + } + +done: + if (error && packet != NULL) { + mbuf_free(packet); + } + + return error; +} + +static int +flow_divert_send_traffic_class_update(struct flow_divert_pcb *fd_cb, int traffic_class) +{ + int error = 0; + mbuf_t packet = NULL; + + error = flow_divert_packet_init(fd_cb, FLOW_DIVERT_PKT_PROPERTIES_UPDATE, &packet); + if (error) { + FDLOG(LOG_ERR, fd_cb, "failed to create a properties update packet: %d", error); + goto done; + } + + error = flow_divert_packet_append_tlv(packet, FLOW_DIVERT_TLV_TRAFFIC_CLASS, sizeof(traffic_class), &traffic_class); + if (error) { + FDLOG(LOG_ERR, fd_cb, "failed to add the traffic class: %d", error); + goto done; + } + + error = flow_divert_send_packet(fd_cb, packet, TRUE); + if (error) { + goto done; + } + +done: + if (error && packet != NULL) { + mbuf_free(packet); + } + + return error; +} + +static void +flow_divert_handle_connect_result(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset) +{ + uint32_t connect_error; + uint32_t ctl_unit = 0; + int error = 0; + struct flow_divert_group *grp = NULL; + struct sockaddr_storage local_address; + int out_if_index = 0; + struct sockaddr_storage remote_address; + uint32_t send_window; + + memset(&local_address, 0, sizeof(local_address)); + memset(&remote_address, 0, sizeof(remote_address)); + + error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(connect_error), &connect_error, NULL); + if (error) { + FDLOG(LOG_ERR, fd_cb, "failed to get the connect result: %d", error); + return; + } + + FDLOG(LOG_INFO, fd_cb, "received connect result %u", connect_error); + + error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_SPACE_AVAILABLE, sizeof(send_window), &send_window, NULL); + if (error) { + FDLOG(LOG_ERR, fd_cb, "failed to get the send window: %d", error); + return; + } + + error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit, NULL); + if (error) { + FDLOG(LOG_ERR, fd_cb, "failed to get the control unit: %d", error); + return; + } + + error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOCAL_ADDR, sizeof(local_address), &local_address, NULL); + if (error) { + FDLOG0(LOG_NOTICE, fd_cb, "No local address provided"); + } + + error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_address), &remote_address, NULL); + if (error) { + FDLOG0(LOG_NOTICE, fd_cb, "No remote address provided"); + } + + error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL); + if (error) { + FDLOG0(LOG_NOTICE, fd_cb, "No output if index provided"); + } + + connect_error = ntohl(connect_error); + ctl_unit = ntohl(ctl_unit); + + lck_rw_lock_shared(&g_flow_divert_group_lck); + + if (connect_error == 0) { + if (ctl_unit == 0 || ctl_unit >= GROUP_COUNT_MAX) { + FDLOG(LOG_ERR, fd_cb, "Connect result contains an invalid control unit: %u", ctl_unit); + error = EINVAL; + } else if (g_flow_divert_groups == NULL || g_active_group_count == 0) { + FDLOG0(LOG_ERR, fd_cb, "No active groups, dropping connection"); + error = EINVAL; + } else { + grp = g_flow_divert_groups[ctl_unit]; + if (grp == NULL) { + error = ECONNRESET; + } + } + } + + FDLOCK(fd_cb); + if (fd_cb->so != NULL) { + struct inpcb *inp = NULL; + struct ifnet *ifp = NULL; + struct flow_divert_group *old_group; + + socket_lock(fd_cb->so, 0); + + if (!(fd_cb->so->so_state & SS_ISCONNECTING)) { + goto done; + } + + inp = sotoinpcb(fd_cb->so); + + if (connect_error || error) { + goto set_socket_state; + } + + if (local_address.ss_family != 0) { + if (local_address.ss_len > sizeof(local_address)) { + local_address.ss_len = sizeof(local_address); + } + fd_cb->local_address = dup_sockaddr((struct sockaddr *)&local_address, 1); + } else { + error = EINVAL; + goto set_socket_state; + } + + if (remote_address.ss_family != 0) { + if (remote_address.ss_len > sizeof(remote_address)) { + remote_address.ss_len = sizeof(remote_address); + } + fd_cb->remote_address = dup_sockaddr((struct sockaddr *)&remote_address, 1); + } else { + error = EINVAL; + goto set_socket_state; + } + + ifnet_head_lock_shared(); + if (out_if_index > 0 && out_if_index <= if_index) { + ifp = ifindex2ifnet[out_if_index]; + } + + if (ifp != NULL) { + inp->inp_last_outifp = ifp; + } else { + error = EINVAL; + } + ifnet_head_done(); + + if (error) { + goto set_socket_state; + } + + if (fd_cb->group == NULL) { + error = EINVAL; + goto set_socket_state; + } + + old_group = fd_cb->group; + + lck_rw_lock_exclusive(&old_group->lck); + lck_rw_lock_exclusive(&grp->lck); + + RB_REMOVE(fd_pcb_tree, &old_group->pcb_tree, fd_cb); + if (RB_INSERT(fd_pcb_tree, &grp->pcb_tree, fd_cb) != NULL) { + panic("group with unit %u already contains a connection with hash %u", grp->ctl_unit, fd_cb->hash); + } + + fd_cb->group = grp; + + lck_rw_done(&grp->lck); + lck_rw_done(&old_group->lck); + + fd_cb->send_window = ntohl(send_window); + flow_divert_send_buffered_data(fd_cb, FALSE); + +set_socket_state: + if (!connect_error && !error) { + FDLOG0(LOG_INFO, fd_cb, "sending connect result"); + error = flow_divert_send_connect_result(fd_cb); + } + + if (connect_error || error) { + if (!connect_error) { + flow_divert_update_closed_state(fd_cb, SHUT_RDWR, FALSE); + fd_cb->so->so_error = error; + flow_divert_send_close_if_needed(fd_cb); + } else { + flow_divert_update_closed_state(fd_cb, SHUT_RDWR, TRUE); + fd_cb->so->so_error = connect_error; + } + soisdisconnected(fd_cb->so); + } else { + soisconnected(fd_cb->so); + } + +done: + socket_unlock(fd_cb->so, 0); + } + FDUNLOCK(fd_cb); + + lck_rw_done(&g_flow_divert_group_lck); +} + +static void +flow_divert_handle_close(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset) +{ + uint32_t close_error; + int error = 0; + int how; + + error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_ERROR_CODE, sizeof(close_error), &close_error, NULL); + if (error) { + FDLOG(LOG_ERR, fd_cb, "failed to get the close error: %d", error); + return; + } + + error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_HOW, sizeof(how), &how, NULL); + if (error) { + FDLOG(LOG_ERR, fd_cb, "failed to get the close how flag: %d", error); + return; + } + + how = ntohl(how); + + FDLOG(LOG_INFO, fd_cb, "close received, how = %d", how); + + FDLOCK(fd_cb); + if (fd_cb->so != NULL) { + socket_lock(fd_cb->so, 0); + + fd_cb->so->so_error = ntohl(close_error); + + flow_divert_update_closed_state(fd_cb, how, TRUE); + + how = flow_divert_tunnel_how_closed(fd_cb); + if (how == SHUT_RDWR) { + soisdisconnected(fd_cb->so); + } else if (how == SHUT_RD) { + socantrcvmore(fd_cb->so); + } else if (how == SHUT_WR) { + socantsendmore(fd_cb->so); + } + + socket_unlock(fd_cb->so, 0); + } + FDUNLOCK(fd_cb); +} + +static void +flow_divert_handle_data(struct flow_divert_pcb *fd_cb, mbuf_t packet, size_t offset) +{ + int error = 0; + mbuf_t data = NULL; + size_t data_size; + + data_size = (mbuf_pkthdr_len(packet) - offset); + + FDLOG(LOG_DEBUG, fd_cb, "received %lu bytes of data", data_size); + + error = mbuf_split(packet, offset, MBUF_DONTWAIT, &data); + if (error || data == NULL) { + FDLOG(LOG_ERR, fd_cb, "mbuf_split failed: %d", error); + return; + } + + FDLOCK(fd_cb); + if (fd_cb->so != NULL) { + socket_lock(fd_cb->so, 0); + if (flow_divert_check_no_cellular(fd_cb)) { + flow_divert_update_closed_state(fd_cb, SHUT_RDWR, TRUE); + flow_divert_send_close(fd_cb, SHUT_RDWR); + soisdisconnected(fd_cb->so); + } else if (!(fd_cb->so->so_state & SS_CANTRCVMORE)) { + if (sbappendstream(&fd_cb->so->so_rcv, data)) { + fd_cb->bytes_received += data_size; + flow_divert_add_data_statistics(fd_cb, data_size, FALSE); + fd_cb->sb_size = fd_cb->so->so_rcv.sb_cc; + sorwakeup(fd_cb->so); + data = NULL; + } else { + FDLOG0(LOG_ERR, fd_cb, "received data, but appendstream failed"); + } + } + socket_unlock(fd_cb->so, 0); + } + FDUNLOCK(fd_cb); + + if (data != NULL) { + mbuf_free(data); + } +} + +static void +flow_divert_handle_read_notification(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset) +{ + uint32_t read_count; + int error = 0; + + error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_READ_COUNT, sizeof(read_count), &read_count, NULL); + if (error) { + FDLOG(LOG_ERR, fd_cb, "failed to get the read count: %d", error); + return; + } + + FDLOG(LOG_DEBUG, fd_cb, "received a read notification for %u bytes", read_count); + + FDLOCK(fd_cb); + if (fd_cb->so != NULL) { + socket_lock(fd_cb->so, 0); + fd_cb->send_window += ntohl(read_count); + flow_divert_send_buffered_data(fd_cb, FALSE); + socket_unlock(fd_cb->so, 0); + } + FDUNLOCK(fd_cb); +} + +static void +flow_divert_handle_group_init(struct flow_divert_group *group, mbuf_t packet, int offset) +{ + int error = 0; + size_t key_size = 0; + int log_level; + + error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, 0, NULL, &key_size); + if (error) { + FDLOG(LOG_ERR, &nil_pcb, "failed to get the key size: %d", error); + return; + } + + if (key_size == 0 || key_size > FLOW_DIVERT_MAX_KEY_SIZE) { + FDLOG(LOG_ERR, &nil_pcb, "Invalid key size: %lu", key_size); + return; + } + + error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOG_LEVEL, sizeof(log_level), &log_level, NULL); + if (!error) { + nil_pcb.log_level = log_level; + } + + lck_rw_lock_exclusive(&group->lck); + + MALLOC(group->token_key, uint8_t *, key_size, M_TEMP, M_WAITOK); + error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_TOKEN_KEY, key_size, group->token_key, NULL); + if (error) { + FDLOG(LOG_ERR, &nil_pcb, "failed to get the token key: %d", error); + FREE(group->token_key, M_TEMP); + group->token_key = NULL; + lck_rw_done(&group->lck); + return; + } + + group->token_key_size = key_size; + + lck_rw_done(&group->lck); +} + +static void +flow_divert_handle_properties_update(struct flow_divert_pcb *fd_cb, mbuf_t packet, int offset) +{ + int error = 0; + struct sockaddr_storage local_address; + int out_if_index = 0; + struct sockaddr_storage remote_address; + + FDLOG0(LOG_INFO, fd_cb, "received a properties update"); + + memset(&local_address, 0, sizeof(local_address)); + memset(&remote_address, 0, sizeof(remote_address)); + + error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_LOCAL_ADDR, sizeof(local_address), &local_address, NULL); + if (error) { + FDLOG0(LOG_INFO, fd_cb, "No local address provided"); + } + + error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_REMOTE_ADDR, sizeof(remote_address), &remote_address, NULL); + if (error) { + FDLOG0(LOG_INFO, fd_cb, "No remote address provided"); + } + + error = flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_OUT_IF_INDEX, sizeof(out_if_index), &out_if_index, NULL); + if (error) { + FDLOG0(LOG_INFO, fd_cb, "No output if index provided"); + } + + FDLOCK(fd_cb); + if (fd_cb->so != NULL) { + struct inpcb *inp = NULL; + struct ifnet *ifp = NULL; + + socket_lock(fd_cb->so, 0); + + inp = sotoinpcb(fd_cb->so); + + if (local_address.ss_family != 0) { + if (local_address.ss_len > sizeof(local_address)) { + local_address.ss_len = sizeof(local_address); + } + fd_cb->local_address = dup_sockaddr((struct sockaddr *)&local_address, 1); + } + + if (remote_address.ss_family != 0) { + if (remote_address.ss_len > sizeof(remote_address)) { + remote_address.ss_len = sizeof(remote_address); + } + fd_cb->remote_address = dup_sockaddr((struct sockaddr *)&remote_address, 1); + } + + ifnet_head_lock_shared(); + if (out_if_index > 0 && out_if_index <= if_index) { + ifp = ifindex2ifnet[out_if_index]; + } + + if (ifp != NULL) { + inp->inp_last_outifp = ifp; + } + ifnet_head_done(); + + socket_unlock(fd_cb->so, 0); + } + FDUNLOCK(fd_cb); +} + +static void +flow_divert_handle_app_map_create(mbuf_t packet, int offset) +{ + size_t bytes_mem_size; + size_t child_maps_mem_size; + int cursor; + int error = 0; + struct flow_divert_trie new_trie; + int insert_error = 0; + size_t nodes_mem_size; + int prefix_count = 0; + int signing_id_count = 0; + + lck_rw_lock_exclusive(&g_flow_divert_group_lck); + + /* Re-set the current trie */ + if (g_signing_id_trie.memory != NULL) { + FREE(g_signing_id_trie.memory, M_TEMP); + } + memset(&g_signing_id_trie, 0, sizeof(g_signing_id_trie)); + g_signing_id_trie.root = NULL_TRIE_IDX; + + memset(&new_trie, 0, sizeof(new_trie)); + + /* Get the number of shared prefixes in the new set of signing ID strings */ + flow_divert_packet_get_tlv(packet, offset, FLOW_DIVERT_TLV_PREFIX_COUNT, sizeof(prefix_count), &prefix_count, NULL); + + /* Compute the number of signing IDs and the total amount of bytes needed to store them */ + for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0); + cursor >= 0; + cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) + { + size_t sid_size = 0; + flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size); + new_trie.bytes_count += sid_size; + signing_id_count++; + } + + if (signing_id_count == 0) { + lck_rw_done(&g_flow_divert_group_lck); + return; + } + + new_trie.nodes_count = (prefix_count + signing_id_count + 1); /* + 1 for the root node */ + new_trie.child_maps_count = (prefix_count + 1); /* + 1 for the root node */ + + FDLOG(LOG_INFO, &nil_pcb, "Nodes count = %lu, child maps count = %lu, bytes_count = %lu", + new_trie.nodes_count, new_trie.child_maps_count, new_trie.bytes_count); + + nodes_mem_size = (sizeof(*new_trie.nodes) * new_trie.nodes_count); + child_maps_mem_size = (sizeof(*new_trie.child_maps) * CHILD_MAP_SIZE * new_trie.child_maps_count); + bytes_mem_size = (sizeof(*new_trie.bytes) * new_trie.bytes_count); + + MALLOC(new_trie.memory, void *, nodes_mem_size + child_maps_mem_size + bytes_mem_size, M_TEMP, M_WAITOK); + if (new_trie.memory == NULL) { + FDLOG(LOG_ERR, &nil_pcb, "Failed to allocate %lu bytes of memory for the signing ID trie", + nodes_mem_size + child_maps_mem_size + bytes_mem_size); + return; + } + + /* Initialize the free lists */ + new_trie.nodes = (struct flow_divert_trie_node *)new_trie.memory; + new_trie.nodes_free_next = 0; + memset(new_trie.nodes, 0, nodes_mem_size); + + new_trie.child_maps = (uint16_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size); + new_trie.child_maps_free_next = 0; + memset(new_trie.child_maps, 0xff, child_maps_mem_size); + + new_trie.bytes = (uint8_t *)(void *)((uint8_t *)new_trie.memory + nodes_mem_size + child_maps_mem_size); + new_trie.bytes_free_next = 0; + + /* The root is an empty node */ + new_trie.root = trie_node_alloc(&new_trie); + + /* Add each signing ID to the trie */ + for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0); + cursor >= 0; + cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) + { + size_t sid_size = 0; + flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size); + if (new_trie.bytes_free_next + sid_size <= new_trie.bytes_count) { + boolean_t is_dns; + uint16_t new_node_idx; + flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, sid_size, &TRIE_BYTE(&new_trie, new_trie.bytes_free_next), NULL); + is_dns = (sid_size == sizeof(FLOW_DIVERT_DNS_SERVICE_SIGNING_ID) - 1 && + !memcmp(&TRIE_BYTE(&new_trie, new_trie.bytes_free_next), + FLOW_DIVERT_DNS_SERVICE_SIGNING_ID, + sid_size)); + new_node_idx = flow_divert_trie_insert(&new_trie, new_trie.bytes_free_next, sid_size); + if (new_node_idx != NULL_TRIE_IDX) { + if (is_dns) { + FDLOG(LOG_NOTICE, &nil_pcb, "Setting group unit for %s to %d", FLOW_DIVERT_DNS_SERVICE_SIGNING_ID, DNS_SERVICE_GROUP_UNIT); + TRIE_NODE(&new_trie, new_node_idx).group_unit = DNS_SERVICE_GROUP_UNIT; + } + } else { + insert_error = EINVAL; + break; + } + } else { + FDLOG0(LOG_ERR, &nil_pcb, "No place to put signing ID for insertion"); + insert_error = ENOBUFS; + break; + } + } + + if (!insert_error) { + g_signing_id_trie = new_trie; + } else { + FREE(new_trie.memory, M_TEMP); + } + + lck_rw_done(&g_flow_divert_group_lck); +} + +static void +flow_divert_handle_app_map_update(struct flow_divert_group *group, mbuf_t packet, int offset) +{ + int error = 0; + int cursor; + size_t max_size = 0; + uint8_t *signing_id; + uint32_t ctl_unit; + + lck_rw_lock_shared(&group->lck); + ctl_unit = group->ctl_unit; + lck_rw_done(&group->lck); + + for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0); + cursor >= 0; + cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) + { + size_t sid_size = 0; + flow_divert_packet_get_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, 0, NULL, &sid_size); + if (sid_size > max_size) { + max_size = sid_size; + } + } + + MALLOC(signing_id, uint8_t *, max_size + 1, M_TEMP, M_WAITOK); + if (signing_id == NULL) { + FDLOG(LOG_ERR, &nil_pcb, "Failed to allocate a string to hold the signing ID (size %lu)", max_size); + return; + } + + for (cursor = flow_divert_packet_find_tlv(packet, offset, FLOW_DIVERT_TLV_SIGNING_ID, &error, 0); + cursor >= 0; + cursor = flow_divert_packet_find_tlv(packet, cursor, FLOW_DIVERT_TLV_SIGNING_ID, &error, 1)) + { + size_t signing_id_len = 0; + uint16_t node; + + flow_divert_packet_get_tlv(packet, + cursor, FLOW_DIVERT_TLV_SIGNING_ID, max_size, signing_id, &signing_id_len); + + signing_id[signing_id_len] = '\0'; + + lck_rw_lock_exclusive(&g_flow_divert_group_lck); + + node = flow_divert_trie_search(&g_signing_id_trie, signing_id); + if (node != NULL_TRIE_IDX) { + if (TRIE_NODE(&g_signing_id_trie, node).group_unit != DNS_SERVICE_GROUP_UNIT) { + FDLOG(LOG_INFO, &nil_pcb, "Setting %s to ctl unit %u", signing_id, group->ctl_unit); + TRIE_NODE(&g_signing_id_trie, node).group_unit = ctl_unit; + } + } else { + FDLOG(LOG_ERR, &nil_pcb, "Failed to find signing ID %s", signing_id); + } + + lck_rw_done(&g_flow_divert_group_lck); + } + + FREE(signing_id, M_TEMP); +} + +static int +flow_divert_input(mbuf_t packet, struct flow_divert_group *group) +{ + struct flow_divert_packet_header hdr; + int error = 0; + struct flow_divert_pcb *fd_cb; + + if (mbuf_pkthdr_len(packet) < sizeof(hdr)) { + FDLOG(LOG_ERR, &nil_pcb, "got a bad packet, length (%lu) < sizeof hdr (%lu)", mbuf_pkthdr_len(packet), sizeof(hdr)); + error = EINVAL; + goto done; + } + + error = mbuf_copydata(packet, 0, sizeof(hdr), &hdr); + if (error) { + FDLOG(LOG_ERR, &nil_pcb, "mbuf_copydata failed for the header: %d", error); + error = ENOBUFS; + goto done; + } + + hdr.conn_id = ntohl(hdr.conn_id); + + if (hdr.conn_id == 0) { + switch (hdr.packet_type) { + case FLOW_DIVERT_PKT_GROUP_INIT: + flow_divert_handle_group_init(group, packet, sizeof(hdr)); + break; + case FLOW_DIVERT_PKT_APP_MAP_CREATE: + flow_divert_handle_app_map_create(packet, sizeof(hdr)); + break; + case FLOW_DIVERT_PKT_APP_MAP_UPDATE: + flow_divert_handle_app_map_update(group, packet, sizeof(hdr)); + break; + default: + FDLOG(LOG_WARNING, &nil_pcb, "got an unknown message type: %d", hdr.packet_type); + break; + } + goto done; + } + + fd_cb = flow_divert_pcb_lookup(hdr.conn_id, group); /* This retains the PCB */ + if (fd_cb == NULL) { + if (hdr.packet_type != FLOW_DIVERT_PKT_CLOSE && hdr.packet_type != FLOW_DIVERT_PKT_READ_NOTIFY) { + FDLOG(LOG_NOTICE, &nil_pcb, "got a %s message from group %d for an unknown pcb: %u", flow_divert_packet_type2str(hdr.packet_type), group->ctl_unit, hdr.conn_id); + } + goto done; + } + + switch (hdr.packet_type) { + case FLOW_DIVERT_PKT_CONNECT_RESULT: + flow_divert_handle_connect_result(fd_cb, packet, sizeof(hdr)); + break; + case FLOW_DIVERT_PKT_CLOSE: + flow_divert_handle_close(fd_cb, packet, sizeof(hdr)); + break; + case FLOW_DIVERT_PKT_DATA: + flow_divert_handle_data(fd_cb, packet, sizeof(hdr)); + break; + case FLOW_DIVERT_PKT_READ_NOTIFY: + flow_divert_handle_read_notification(fd_cb, packet, sizeof(hdr)); + break; + case FLOW_DIVERT_PKT_PROPERTIES_UPDATE: + flow_divert_handle_properties_update(fd_cb, packet, sizeof(hdr)); + break; + default: + FDLOG(LOG_WARNING, fd_cb, "got an unknown message type: %d", hdr.packet_type); + break; + } + + FDRELEASE(fd_cb); + +done: + mbuf_free(packet); + return error; +} + +static void +flow_divert_close_all(struct flow_divert_group *group) +{ + struct flow_divert_pcb *fd_cb; + SLIST_HEAD(, flow_divert_pcb) tmp_list; + + SLIST_INIT(&tmp_list); + + lck_rw_lock_exclusive(&group->lck); + + MBUFQ_DRAIN(&group->send_queue); + + RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) { + FDRETAIN(fd_cb); + SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry); + } + + lck_rw_done(&group->lck); + + while (!SLIST_EMPTY(&tmp_list)) { + fd_cb = SLIST_FIRST(&tmp_list); + FDLOCK(fd_cb); + SLIST_REMOVE_HEAD(&tmp_list, tmp_list_entry); + if (fd_cb->so != NULL) { + socket_lock(fd_cb->so, 0); + flow_divert_pcb_remove(fd_cb); + flow_divert_update_closed_state(fd_cb, SHUT_RDWR, TRUE); + fd_cb->so->so_error = ECONNABORTED; + socket_unlock(fd_cb->so, 0); + } + FDUNLOCK(fd_cb); + FDRELEASE(fd_cb); + } +} + +void +flow_divert_detach(struct socket *so) +{ + struct flow_divert_pcb *fd_cb = so->so_fd_pcb; + + VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL); + + so->so_flags &= ~SOF_FLOW_DIVERT; + so->so_fd_pcb = NULL; + + FDLOG(LOG_INFO, fd_cb, "Detaching, ref count = %d", fd_cb->ref_count); + + if (fd_cb->group != NULL) { + /* Last-ditch effort to send any buffered data */ + flow_divert_send_buffered_data(fd_cb, TRUE); + + /* Remove from the group */ + flow_divert_pcb_remove(fd_cb); + } + + socket_unlock(so, 0); + FDLOCK(fd_cb); + fd_cb->so = NULL; + FDUNLOCK(fd_cb); + socket_lock(so, 0); + + FDRELEASE(fd_cb); /* Release the socket's reference */ +} + +static int +flow_divert_close(struct socket *so) +{ + struct flow_divert_pcb *fd_cb = so->so_fd_pcb; + + VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL); + + FDLOG0(LOG_INFO, fd_cb, "Closing"); + + soisdisconnecting(so); + sbflush(&so->so_rcv); + + flow_divert_send_buffered_data(fd_cb, TRUE); + flow_divert_update_closed_state(fd_cb, SHUT_RDWR, FALSE); + flow_divert_send_close_if_needed(fd_cb); + + /* Remove from the group */ + flow_divert_pcb_remove(fd_cb); + + return 0; +} + +static int +flow_divert_disconnectx(struct socket *so, associd_t aid, connid_t cid __unused) +{ + if (aid != ASSOCID_ANY && aid != ASSOCID_ALL) { + return (EINVAL); + } + + return (flow_divert_close(so)); +} + +static int +flow_divert_shutdown(struct socket *so) +{ + struct flow_divert_pcb *fd_cb = so->so_fd_pcb; + + VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL); + + FDLOG0(LOG_INFO, fd_cb, "Can't send more"); + + socantsendmore(so); + + flow_divert_update_closed_state(fd_cb, SHUT_WR, FALSE); + flow_divert_send_close_if_needed(fd_cb); + + return 0; +} + +static int +flow_divert_rcvd(struct socket *so, int flags __unused) +{ + struct flow_divert_pcb *fd_cb = so->so_fd_pcb; + uint32_t latest_sb_size; + uint32_t read_count; + + VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL); + + latest_sb_size = fd_cb->so->so_rcv.sb_cc; + + if (fd_cb->sb_size < latest_sb_size) { + panic("flow divert rcvd event handler (%u): saved rcv buffer size (%u) is less than latest rcv buffer size (%u)", + fd_cb->hash, fd_cb->sb_size, latest_sb_size); + } + + read_count = fd_cb->sb_size - latest_sb_size; + + FDLOG(LOG_DEBUG, fd_cb, "app read %u bytes", read_count); + + if (read_count > 0 && flow_divert_send_read_notification(fd_cb, read_count) == 0) { + fd_cb->bytes_read_by_app += read_count; + fd_cb->sb_size = latest_sb_size; + } + + return 0; +} + +static errno_t +flow_divert_dup_addr(sa_family_t family, struct sockaddr *addr, + struct sockaddr **dup) +{ + int error = 0; + struct sockaddr *result; + struct sockaddr_storage ss; + + if (addr != NULL) { + result = addr; + } else { + memset(&ss, 0, sizeof(ss)); + ss.ss_family = family; + if (ss.ss_family == AF_INET) { + ss.ss_len = sizeof(struct sockaddr_in); + } +#if INET6 + else if (ss.ss_family == AF_INET6) { + ss.ss_len = sizeof(struct sockaddr_in6); + } +#endif /* INET6 */ + else { + error = EINVAL; + } + result = (struct sockaddr *)&ss; + } + + if (!error) { + *dup = dup_sockaddr(result, 1); + if (*dup == NULL) { + error = ENOBUFS; + } + } + + return error; +} + +static errno_t +flow_divert_getpeername(struct socket *so, struct sockaddr **sa) +{ + struct flow_divert_pcb *fd_cb = so->so_fd_pcb; + + VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL); + + return flow_divert_dup_addr(so->so_proto->pr_domain->dom_family, + fd_cb->remote_address, + sa); +} + +static errno_t +flow_divert_getsockaddr(struct socket *so, struct sockaddr **sa) +{ + struct flow_divert_pcb *fd_cb = so->so_fd_pcb; + + VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL); + + return flow_divert_dup_addr(so->so_proto->pr_domain->dom_family, + fd_cb->local_address, + sa); +} + +static errno_t +flow_divert_ctloutput(struct socket *so, struct sockopt *sopt) +{ + struct flow_divert_pcb *fd_cb = so->so_fd_pcb; + + VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL); + + if (sopt->sopt_name == SO_TRAFFIC_CLASS) { + if (sopt->sopt_dir == SOPT_SET && fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) { + flow_divert_send_traffic_class_update(fd_cb, so->so_traffic_class); + } + } + + if (SOCK_DOM(so) == PF_INET) { + return g_tcp_protosw->pr_ctloutput(so, sopt); + } +#if INET6 + else if (SOCK_DOM(so) == PF_INET6) { + return g_tcp6_protosw->pr_ctloutput(so, sopt); + } +#endif + return 0; +} + +errno_t +flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p) +{ + struct flow_divert_pcb *fd_cb = so->so_fd_pcb; + int error = 0; + struct inpcb *inp = sotoinpcb(so); + struct sockaddr_in *sinp; + + VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL); + + if (fd_cb->group == NULL) { + error = ENETUNREACH; + goto done; + } + + if (inp == NULL) { + error = EINVAL; + goto done; + } else if (inp->inp_state == INPCB_STATE_DEAD) { + if (so->so_error) { + error = so->so_error; + so->so_error = 0; + } else { + error = EINVAL; + } + goto done; + } + + sinp = (struct sockaddr_in *)(void *)to; + if (sinp->sin_family == AF_INET && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { + error = EAFNOSUPPORT; + goto done; + } + + if ((fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED) && !(fd_cb->flags & FLOW_DIVERT_TRANSFERRED)) { + error = EALREADY; + goto done; + } + + if (fd_cb->flags & FLOW_DIVERT_TRANSFERRED) { + FDLOG0(LOG_INFO, fd_cb, "fully transferred"); + fd_cb->flags &= ~FLOW_DIVERT_TRANSFERRED; + if (fd_cb->remote_address != NULL) { + soisconnected(fd_cb->so); + goto done; + } + } + + FDLOG0(LOG_INFO, fd_cb, "Connecting"); + + error = flow_divert_send_connect(fd_cb, to, p); + if (error) { + goto done; + } + + fd_cb->flags |= FLOW_DIVERT_CONNECT_STARTED; + + soisconnecting(so); + +done: + return error; +} + +static int +flow_divert_connectx_out_common(struct socket *so, int af, + struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl, + struct proc *p, uint32_t ifscope __unused, associd_t aid __unused, + connid_t *pcid, uint32_t flags __unused, void *arg __unused, + uint32_t arglen __unused) +{ + struct sockaddr_entry *src_se = NULL, *dst_se = NULL; + struct inpcb *inp = sotoinpcb(so); + int error; + + if (inp == NULL) { + return (EINVAL); + } + + VERIFY(dst_sl != NULL); + + /* select source (if specified) and destination addresses */ + error = in_selectaddrs(af, src_sl, &src_se, dst_sl, &dst_se); + if (error != 0) { + return (error); + } + + VERIFY(*dst_sl != NULL && dst_se != NULL); + VERIFY(src_se == NULL || *src_sl != NULL); + VERIFY(dst_se->se_addr->sa_family == af); + VERIFY(src_se == NULL || src_se->se_addr->sa_family == af); + + error = flow_divert_connect_out(so, dst_se->se_addr, p); + + if (error == 0 && pcid != NULL) { + *pcid = 1; /* there is only 1 connection for a TCP */ + } + + return (error); +} + +static int +flow_divert_connectx_out(struct socket *so, struct sockaddr_list **src_sl, + struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, + associd_t aid, connid_t *pcid, uint32_t flags, void *arg, + uint32_t arglen) +{ + return (flow_divert_connectx_out_common(so, AF_INET, src_sl, dst_sl, + p, ifscope, aid, pcid, flags, arg, arglen)); +} + +#if INET6 +static int +flow_divert_connectx6_out(struct socket *so, struct sockaddr_list **src_sl, + struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, + associd_t aid, connid_t *pcid, uint32_t flags, void *arg, + uint32_t arglen) +{ + return (flow_divert_connectx_out_common(so, AF_INET6, src_sl, dst_sl, + p, ifscope, aid, pcid, flags, arg, arglen)); +} +#endif /* INET6 */ + +static int +flow_divert_getconninfo(struct socket *so, connid_t cid, uint32_t *flags, + uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len, + user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type, + user_addr_t aux_data __unused, uint32_t *aux_len) +{ + int error = 0; + struct flow_divert_pcb *fd_cb = so->so_fd_pcb; + struct ifnet *ifp = NULL; + struct inpcb *inp = sotoinpcb(so); + + VERIFY((so->so_flags & SOF_FLOW_DIVERT)); + + if (so->so_fd_pcb == NULL || inp == NULL) { + error = EINVAL; + goto out; + } + + if (cid != CONNID_ANY && cid != CONNID_ALL && cid != 1) { + error = EINVAL; + goto out; + } + + ifp = inp->inp_last_outifp; + *ifindex = ((ifp != NULL) ? ifp->if_index : 0); + *soerror = so->so_error; + *flags = 0; + + if (so->so_state & SS_ISCONNECTED) { + *flags |= (CIF_CONNECTED | CIF_PREFERRED); + } + + if (fd_cb->local_address == NULL) { + struct sockaddr_in sin; + bzero(&sin, sizeof(sin)); + sin.sin_len = sizeof(sin); + sin.sin_family = AF_INET; + *src_len = sin.sin_len; + if (src != USER_ADDR_NULL) { + error = copyout(&sin, src, sin.sin_len); + if (error != 0) { + goto out; + } + } + } else { + *src_len = fd_cb->local_address->sa_len; + if (src != USER_ADDR_NULL) { + error = copyout(fd_cb->local_address, src, fd_cb->local_address->sa_len); + if (error != 0) { + goto out; + } + } + } + + if (fd_cb->remote_address == NULL) { + struct sockaddr_in sin; + bzero(&sin, sizeof(sin)); + sin.sin_len = sizeof(sin); + sin.sin_family = AF_INET; + *dst_len = sin.sin_len; + if (dst != USER_ADDR_NULL) { + error = copyout(&sin, dst, sin.sin_len); + if (error != 0) { + goto out; + } + } + } else { + *dst_len = fd_cb->remote_address->sa_len; + if (dst != USER_ADDR_NULL) { + error = copyout(fd_cb->remote_address, dst, fd_cb->remote_address->sa_len); + if (error != 0) { + goto out; + } + } + } + + *aux_type = 0; + *aux_len = 0; + +out: + return error; +} + +static int +flow_divert_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp __unused, struct proc *p __unused) +{ + int error = 0; + + switch (cmd) { + case SIOCGCONNINFO32: { + struct so_cinforeq32 cifr; + bcopy(data, &cifr, sizeof (cifr)); + error = flow_divert_getconninfo(so, cifr.scir_cid, &cifr.scir_flags, + &cifr.scir_ifindex, &cifr.scir_error, cifr.scir_src, + &cifr.scir_src_len, cifr.scir_dst, &cifr.scir_dst_len, + &cifr.scir_aux_type, cifr.scir_aux_data, + &cifr.scir_aux_len); + if (error == 0) { + bcopy(&cifr, data, sizeof (cifr)); + } + break; + } + + case SIOCGCONNINFO64: { + struct so_cinforeq64 cifr; + bcopy(data, &cifr, sizeof (cifr)); + error = flow_divert_getconninfo(so, cifr.scir_cid, &cifr.scir_flags, + &cifr.scir_ifindex, &cifr.scir_error, cifr.scir_src, + &cifr.scir_src_len, cifr.scir_dst, &cifr.scir_dst_len, + &cifr.scir_aux_type, cifr.scir_aux_data, + &cifr.scir_aux_len); + if (error == 0) { + bcopy(&cifr, data, sizeof (cifr)); + } + break; + } + + default: + error = EOPNOTSUPP; + } + + return error; +} + +static int +flow_divert_in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct proc *p) +{ + int error = flow_divert_control(so, cmd, data, ifp, p); + + if (error == EOPNOTSUPP) { + error = in_control(so, cmd, data, ifp, p); + } + + return error; +} + +static int +flow_divert_in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct proc *p) +{ + int error = flow_divert_control(so, cmd, data, ifp, p); + + if (error == EOPNOTSUPP) { + error = in6_control(so, cmd, data, ifp, p); + } + + return error; +} + +static errno_t +flow_divert_data_out(struct socket *so, int flags, mbuf_t data, struct sockaddr *to, mbuf_t control, struct proc *p __unused) +{ + struct flow_divert_pcb *fd_cb = so->so_fd_pcb; + int error = 0; + struct inpcb *inp; + + VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL); + + inp = sotoinpcb(so); + if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) { + error = ECONNRESET; + goto done; + } + + if (control && mbuf_len(control) > 0) { + error = EINVAL; + goto done; + } + + if (flags & MSG_OOB) { + error = EINVAL; + goto done; /* We don't support OOB data */ + } + + error = flow_divert_check_no_cellular(fd_cb); + if (error) { + goto done; + } + + /* Implicit connect */ + if (!(fd_cb->flags & FLOW_DIVERT_CONNECT_STARTED)) { + FDLOG0(LOG_INFO, fd_cb, "implicit connect"); + error = flow_divert_connect_out(so, to, NULL); + if (error) { + goto done; + } + } + + FDLOG(LOG_DEBUG, fd_cb, "app wrote %lu bytes", mbuf_pkthdr_len(data)); + + fd_cb->bytes_written_by_app += mbuf_pkthdr_len(data); + error = flow_divert_send_app_data(fd_cb, data); + if (error) { + goto done; + } + + data = NULL; + + if (flags & PRUS_EOF) { + flow_divert_shutdown(so); + } + +done: + if (data) { + mbuf_free(data); + } + if (control) { + mbuf_free(control); + } + return error; +} + +boolean_t +flow_divert_is_dns_service(struct socket *so) +{ + uint32_t ctl_unit = 0; + flow_divert_check_policy(so, NULL, TRUE, &ctl_unit); + FDLOG(LOG_INFO, &nil_pcb, "Check for DNS resulted in %u", ctl_unit); + return (ctl_unit == DNS_SERVICE_GROUP_UNIT); +} + +errno_t +flow_divert_check_policy(struct socket *so, proc_t p, boolean_t match_delegate, uint32_t *ctl_unit) +{ + int error = EPROTOTYPE; + + if (ctl_unit != NULL) { + *ctl_unit = 0; + } + + if (SOCK_DOM(so) != PF_INET +#if INET6 + && SOCK_DOM(so) != PF_INET6 +#endif + ) + { + return error; + } + + if (g_signing_id_trie.root != NULL_TRIE_IDX) { + int release_proc = flow_divert_get_src_proc(so, &p, match_delegate); + if (p != PROC_NULL) { + proc_lock(p); + if (p->p_csflags & CS_VALID) { + const char *signing_id = cs_identity_get(p); + if (signing_id != NULL) { + uint16_t result = NULL_TRIE_IDX; + lck_rw_lock_shared(&g_flow_divert_group_lck); + result = flow_divert_trie_search(&g_signing_id_trie, (const uint8_t *)signing_id); + if (result != NULL_TRIE_IDX) { + uint32_t unit = TRIE_NODE(&g_signing_id_trie, result).group_unit; + + error = 0; + + FDLOG(LOG_INFO, &nil_pcb, "%s matched, ctl_unit = %u", signing_id, unit); + + if (ctl_unit != NULL) { + *ctl_unit = unit; + } + } + lck_rw_done(&g_flow_divert_group_lck); + } + } + proc_unlock(p); + + if (release_proc) { + proc_rele(p); + } + } + } + + return error; +} + +static void +flow_divert_set_protosw(struct socket *so) +{ + so->so_flags |= SOF_FLOW_DIVERT; + if (SOCK_DOM(so) == PF_INET) { + so->so_proto = &g_flow_divert_in_protosw; + } +#if INET6 + else { + so->so_proto = (struct protosw *)&g_flow_divert_in6_protosw; + } +#endif /* INET6 */ +} + +static errno_t +flow_divert_attach(struct socket *so, uint32_t flow_id, uint32_t ctl_unit) +{ + int error = 0; + struct flow_divert_pcb *fd_cb = NULL; + struct ifnet *ifp = NULL; + struct inpcb *inp = NULL; + struct socket *old_so; + mbuf_t recv_data = NULL; + + socket_unlock(so, 0); + + FDLOG(LOG_INFO, &nil_pcb, "Attaching socket to flow %u", flow_id); + + /* Find the flow divert control block */ + lck_rw_lock_shared(&g_flow_divert_group_lck); + if (g_flow_divert_groups != NULL && g_active_group_count > 0) { + struct flow_divert_group *group = g_flow_divert_groups[ctl_unit]; + if (group != NULL) { + fd_cb = flow_divert_pcb_lookup(flow_id, group); + } + } + lck_rw_done(&g_flow_divert_group_lck); + + if (fd_cb == NULL) { + error = ENOENT; + goto done; + } + + FDLOCK(fd_cb); + + /* Dis-associate the flow divert control block from its current socket */ + old_so = fd_cb->so; + + inp = sotoinpcb(old_so); + + VERIFY(inp != NULL); + + socket_lock(old_so, 0); + soisdisconnected(old_so); + old_so->so_flags &= ~SOF_FLOW_DIVERT; + old_so->so_fd_pcb = NULL; + old_so->so_proto = pffindproto(SOCK_DOM(old_so), IPPROTO_TCP, SOCK_STREAM); + fd_cb->so = NULL; + /* Save the output interface */ + ifp = inp->inp_last_outifp; + if (old_so->so_rcv.sb_cc > 0) { + error = mbuf_dup(old_so->so_rcv.sb_mb, MBUF_DONTWAIT, &recv_data); + sbflush(&old_so->so_rcv); + } + socket_unlock(old_so, 0); + + /* Associate the new socket with the flow divert control block */ + socket_lock(so, 0); + so->so_fd_pcb = fd_cb; + inp = sotoinpcb(so); + inp->inp_last_outifp = ifp; + if (recv_data != NULL) { + if (sbappendstream(&so->so_rcv, recv_data)) { + sorwakeup(so); + } + } + flow_divert_set_protosw(so); + socket_unlock(so, 0); + + fd_cb->so = so; + fd_cb->flags |= FLOW_DIVERT_TRANSFERRED; + + FDUNLOCK(fd_cb); + +done: + socket_lock(so, 0); + + if (fd_cb != NULL) { + FDRELEASE(fd_cb); /* Release the reference obtained via flow_divert_pcb_lookup */ + } + + return error; +} + +errno_t +flow_divert_pcb_init(struct socket *so, uint32_t ctl_unit) +{ + errno_t error = 0; + struct flow_divert_pcb *fd_cb; + + if (so->so_flags & SOF_FLOW_DIVERT) { + return EALREADY; + } + + fd_cb = flow_divert_pcb_create(so); + if (fd_cb != NULL) { + error = flow_divert_pcb_insert(fd_cb, ctl_unit); + if (error) { + FDLOG(LOG_ERR, fd_cb, "pcb insert failed: %d", error); + FDRELEASE(fd_cb); + } else { + fd_cb->log_level = LOG_NOTICE; + fd_cb->control_group_unit = ctl_unit; + so->so_fd_pcb = fd_cb; + + flow_divert_set_protosw(so); + + FDLOG0(LOG_INFO, fd_cb, "Created"); + } + } else { + error = ENOMEM; + } + + return error; +} + +errno_t +flow_divert_token_set(struct socket *so, struct sockopt *sopt) +{ + uint32_t ctl_unit = 0; + uint32_t key_unit = 0; + uint32_t flow_id = 0; + int error = 0; + mbuf_t token = NULL; + + if (so->so_flags & SOF_FLOW_DIVERT) { + error = EALREADY; + goto done; + } + + if (g_init_result) { + FDLOG(LOG_ERR, &nil_pcb, "flow_divert_init failed (%d), cannot use flow divert", g_init_result); + error = ENOPROTOOPT; + goto done; + } + + if (SOCK_TYPE(so) != SOCK_STREAM || + SOCK_PROTO(so) != IPPROTO_TCP || + (SOCK_DOM(so) != PF_INET +#if INET6 + && SOCK_DOM(so) != PF_INET6 +#endif + )) + { + error = EINVAL; + goto done; + } else { + struct tcpcb *tp = sototcpcb(so); + if (tp == NULL || tp->t_state != TCPS_CLOSED) { + error = EINVAL; + goto done; + } + } + + error = soopt_getm(sopt, &token); + if (error) { + goto done; + } + + error = soopt_mcopyin(sopt, token); + if (error) { + goto done; + } + + error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(key_unit), (void *)&key_unit, NULL); + if (!error) { + key_unit = ntohl(key_unit); + } else if (error != ENOENT) { + FDLOG(LOG_ERR, &nil_pcb, "Failed to get the key unit from the token: %d", error); + goto done; + } else { + key_unit = 0; + } + + error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), (void *)&ctl_unit, NULL); + if (error) { + FDLOG(LOG_ERR, &nil_pcb, "Failed to get the control socket unit from the token: %d", error); + goto done; + } + + /* A valid kernel control unit is required */ + ctl_unit = ntohl(ctl_unit); + if (ctl_unit == 0 || ctl_unit >= GROUP_COUNT_MAX) { + FDLOG(LOG_ERR, &nil_pcb, "Got an invalid control socket unit: %u", ctl_unit); + error = EINVAL; + goto done; + } + + socket_unlock(so, 0); + error = flow_divert_packet_verify_hmac(token, (key_unit != 0 ? key_unit : ctl_unit)); + socket_lock(so, 0); + + if (error) { + FDLOG(LOG_ERR, &nil_pcb, "HMAC verfication failed: %d", error); + goto done; + } + + error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_FLOW_ID, sizeof(flow_id), (void *)&flow_id, NULL); + if (error && error != ENOENT) { + FDLOG(LOG_ERR, &nil_pcb, "Failed to get the flow ID from the token: %d", error); + goto done; + } + + if (flow_id == 0) { + error = flow_divert_pcb_init(so, ctl_unit); + if (error == 0) { + struct flow_divert_pcb *fd_cb = so->so_fd_pcb; + int log_level = LOG_NOTICE; + + error = flow_divert_packet_get_tlv(token, 0, FLOW_DIVERT_TLV_LOG_LEVEL, + sizeof(log_level), &log_level, NULL); + if (error == 0) { + fd_cb->log_level = log_level; + } + error = 0; + + fd_cb->connect_token = token; + token = NULL; + } + } else { + error = flow_divert_attach(so, flow_id, ctl_unit); + } + +done: + if (token != NULL) { + mbuf_freem(token); + } + + return error; +} + +errno_t +flow_divert_token_get(struct socket *so, struct sockopt *sopt) +{ + uint32_t ctl_unit; + int error = 0; + uint8_t hmac[SHA_DIGEST_LENGTH]; + struct flow_divert_pcb *fd_cb = so->so_fd_pcb; + mbuf_t token = NULL; + struct flow_divert_group *control_group = NULL; + + if (!(so->so_flags & SOF_FLOW_DIVERT)) { + error = EINVAL; + goto done; + } + + VERIFY((so->so_flags & SOF_FLOW_DIVERT) && so->so_fd_pcb != NULL); + + if (fd_cb->group == NULL) { + error = EINVAL; + goto done; + } + + error = mbuf_gethdr(MBUF_DONTWAIT, MBUF_TYPE_HEADER, &token); + if (error) { + FDLOG(LOG_ERR, fd_cb, "failed to allocate the header mbuf: %d", error); + goto done; + } + + ctl_unit = htonl(fd_cb->group->ctl_unit); + + error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_CTL_UNIT, sizeof(ctl_unit), &ctl_unit); + if (error) { + goto done; + } + + error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_FLOW_ID, sizeof(fd_cb->hash), &fd_cb->hash); + if (error) { + goto done; + } + + socket_unlock(so, 0); + lck_rw_lock_shared(&g_flow_divert_group_lck); + + if (g_flow_divert_groups != NULL && g_active_group_count > 0 && + fd_cb->control_group_unit > 0 && fd_cb->control_group_unit < GROUP_COUNT_MAX) + { + control_group = g_flow_divert_groups[fd_cb->control_group_unit]; + } + + if (control_group != NULL) { + lck_rw_lock_shared(&control_group->lck); + ctl_unit = htonl(control_group->ctl_unit); + error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_KEY_UNIT, sizeof(ctl_unit), &ctl_unit); + if (!error) { + error = flow_divert_packet_compute_hmac(token, control_group, hmac); + } + lck_rw_done(&control_group->lck); + } else { + error = ENOPROTOOPT; + } + + lck_rw_done(&g_flow_divert_group_lck); + socket_lock(so, 0); + + if (error) { + goto done; + } + + error = flow_divert_packet_append_tlv(token, FLOW_DIVERT_TLV_HMAC, sizeof(hmac), hmac); + if (error) { + goto done; + } + + error = soopt_mcopyout(sopt, token); + if (error) { + token = NULL; /* For some reason, soopt_mcopyout() frees the mbuf if it fails */ + goto done; + } + +done: + if (token != NULL) { + mbuf_freem(token); + } + + return error; +} + +static errno_t +flow_divert_kctl_connect(kern_ctl_ref kctlref __unused, struct sockaddr_ctl *sac, void **unitinfo) +{ + struct flow_divert_group *new_group; + int error = 0; + + if (sac->sc_unit >= GROUP_COUNT_MAX) { + error = EINVAL; + goto done; + } + + *unitinfo = NULL; + + MALLOC_ZONE(new_group, struct flow_divert_group *, sizeof(*new_group), M_FLOW_DIVERT_GROUP, M_WAITOK); + if (new_group == NULL) { + error = ENOBUFS; + goto done; + } + + memset(new_group, 0, sizeof(*new_group)); + + lck_rw_init(&new_group->lck, flow_divert_mtx_grp, flow_divert_mtx_attr); + RB_INIT(&new_group->pcb_tree); + new_group->ctl_unit = sac->sc_unit; + MBUFQ_INIT(&new_group->send_queue); + + lck_rw_lock_exclusive(&g_flow_divert_group_lck); + + if (g_flow_divert_groups == NULL) { + MALLOC(g_flow_divert_groups, + struct flow_divert_group **, + GROUP_COUNT_MAX * sizeof(struct flow_divert_group *), + M_TEMP, + M_WAITOK | M_ZERO); + } + + if (g_flow_divert_groups == NULL) { + error = ENOBUFS; + } else if (g_flow_divert_groups[sac->sc_unit] != NULL) { + error = EALREADY; + } else { + g_flow_divert_groups[sac->sc_unit] = new_group; + g_active_group_count++; + } + + lck_rw_done(&g_flow_divert_group_lck); + + *unitinfo = new_group; + +done: + if (error != 0 && new_group != NULL) { + FREE_ZONE(new_group, sizeof(*new_group), M_FLOW_DIVERT_GROUP); + } + return error; +} + +static errno_t +flow_divert_kctl_disconnect(kern_ctl_ref kctlref __unused, uint32_t unit, void *unitinfo) +{ + struct flow_divert_group *group = NULL; + errno_t error = 0; + uint16_t node = 0; + + if (unit >= GROUP_COUNT_MAX) { + return EINVAL; + } + + FDLOG(LOG_INFO, &nil_pcb, "disconnecting group %d", unit); + + lck_rw_lock_exclusive(&g_flow_divert_group_lck); + + if (g_flow_divert_groups == NULL || g_active_group_count == 0) { + panic("flow divert group %u is disconnecting, but no groups are active (groups = %p, active count = %u", unit, + g_flow_divert_groups, g_active_group_count); + } + + group = g_flow_divert_groups[unit]; + + if (group != (struct flow_divert_group *)unitinfo) { + panic("group with unit %d (%p) != unit info (%p)", unit, group, unitinfo); + } + + if (group != NULL) { + flow_divert_close_all(group); + if (group->token_key != NULL) { + memset(group->token_key, 0, group->token_key_size); + FREE(group->token_key, M_TEMP); + group->token_key = NULL; + group->token_key_size = 0; + } + FREE_ZONE(group, sizeof(*group), M_FLOW_DIVERT_GROUP); + g_flow_divert_groups[unit] = NULL; + g_active_group_count--; + } else { + error = EINVAL; + } + + if (g_active_group_count == 0) { + FREE(g_flow_divert_groups, M_TEMP); + g_flow_divert_groups = NULL; + } + + /* Remove all signing IDs that point to this unit */ + for (node = 0; node < g_signing_id_trie.nodes_count; node++) { + if (TRIE_NODE(&g_signing_id_trie, node).group_unit == unit) { + TRIE_NODE(&g_signing_id_trie, node).group_unit = 0; + } + } + + lck_rw_done(&g_flow_divert_group_lck); + + return error; +} + +static errno_t +flow_divert_kctl_send(kern_ctl_ref kctlref __unused, uint32_t unit __unused, void *unitinfo, mbuf_t m, int flags __unused) +{ + return flow_divert_input(m, (struct flow_divert_group *)unitinfo); +} + +static void +flow_divert_kctl_rcvd(kern_ctl_ref kctlref __unused, uint32_t unit __unused, void *unitinfo, int flags __unused) +{ + struct flow_divert_group *group = (struct flow_divert_group *)unitinfo; + + if (!OSTestAndClear(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits)) { + struct flow_divert_pcb *fd_cb; + SLIST_HEAD(, flow_divert_pcb) tmp_list; + + lck_rw_lock_shared(&g_flow_divert_group_lck); + lck_rw_lock_exclusive(&group->lck); + + while (!MBUFQ_EMPTY(&group->send_queue)) { + mbuf_t next_packet; + FDLOG0(LOG_DEBUG, &nil_pcb, "trying ctl_enqueuembuf again"); + next_packet = MBUFQ_FIRST(&group->send_queue); + int error = ctl_enqueuembuf(g_flow_divert_kctl_ref, group->ctl_unit, next_packet, CTL_DATA_EOR); + if (error) { + FDLOG(LOG_DEBUG, &nil_pcb, "ctl_enqueuembuf returned an error: %d", error); + OSTestAndSet(GROUP_BIT_CTL_ENQUEUE_BLOCKED, &group->atomic_bits); + lck_rw_done(&group->lck); + lck_rw_done(&g_flow_divert_group_lck); + return; + } + MBUFQ_DEQUEUE(&group->send_queue, next_packet); + } + + SLIST_INIT(&tmp_list); + + RB_FOREACH(fd_cb, fd_pcb_tree, &group->pcb_tree) { + FDRETAIN(fd_cb); + SLIST_INSERT_HEAD(&tmp_list, fd_cb, tmp_list_entry); + } + + lck_rw_done(&group->lck); + + SLIST_FOREACH(fd_cb, &tmp_list, tmp_list_entry) { + FDLOCK(fd_cb); + if (fd_cb->so != NULL) { + socket_lock(fd_cb->so, 0); + if (fd_cb->group != NULL) { + flow_divert_send_buffered_data(fd_cb, FALSE); + } + socket_unlock(fd_cb->so, 0); + } + FDUNLOCK(fd_cb); + FDRELEASE(fd_cb); + } + + lck_rw_done(&g_flow_divert_group_lck); + } +} + +static int +flow_divert_kctl_init(void) +{ + struct kern_ctl_reg ctl_reg; + int result; + + memset(&ctl_reg, 0, sizeof(ctl_reg)); + + strncpy(ctl_reg.ctl_name, FLOW_DIVERT_CONTROL_NAME, sizeof(ctl_reg.ctl_name)); + ctl_reg.ctl_name[sizeof(ctl_reg.ctl_name)-1] = '\0'; + ctl_reg.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED; + ctl_reg.ctl_sendsize = FD_CTL_SENDBUFF_SIZE; + ctl_reg.ctl_recvsize = FD_CTL_RCVBUFF_SIZE; + + ctl_reg.ctl_connect = flow_divert_kctl_connect; + ctl_reg.ctl_disconnect = flow_divert_kctl_disconnect; + ctl_reg.ctl_send = flow_divert_kctl_send; + ctl_reg.ctl_rcvd = flow_divert_kctl_rcvd; + + result = ctl_register(&ctl_reg, &g_flow_divert_kctl_ref); + + if (result) { + FDLOG(LOG_ERR, &nil_pcb, "flow_divert_kctl_init - ctl_register failed: %d\n", result); + return result; + } + + return 0; +} + +void +flow_divert_init(void) +{ + memset(&nil_pcb, 0, sizeof(nil_pcb)); + nil_pcb.log_level = LOG_INFO; + + g_tcp_protosw = pffindproto(AF_INET, IPPROTO_TCP, SOCK_STREAM); + + VERIFY(g_tcp_protosw != NULL); + + memcpy(&g_flow_divert_in_protosw, g_tcp_protosw, sizeof(g_flow_divert_in_protosw)); + memcpy(&g_flow_divert_in_usrreqs, g_tcp_protosw->pr_usrreqs, sizeof(g_flow_divert_in_usrreqs)); + + g_flow_divert_in_usrreqs.pru_connect = flow_divert_connect_out; + g_flow_divert_in_usrreqs.pru_connectx = flow_divert_connectx_out; + g_flow_divert_in_usrreqs.pru_control = flow_divert_in_control; + g_flow_divert_in_usrreqs.pru_disconnect = flow_divert_close; + g_flow_divert_in_usrreqs.pru_disconnectx = flow_divert_disconnectx; + g_flow_divert_in_usrreqs.pru_peeraddr = flow_divert_getpeername; + g_flow_divert_in_usrreqs.pru_rcvd = flow_divert_rcvd; + g_flow_divert_in_usrreqs.pru_send = flow_divert_data_out; + g_flow_divert_in_usrreqs.pru_shutdown = flow_divert_shutdown; + g_flow_divert_in_usrreqs.pru_sockaddr = flow_divert_getsockaddr; + + g_flow_divert_in_protosw.pr_usrreqs = &g_flow_divert_in_usrreqs; + g_flow_divert_in_protosw.pr_ctloutput = flow_divert_ctloutput; + + /* + * Socket filters shouldn't attach/detach to/from this protosw + * since pr_protosw is to be used instead, which points to the + * real protocol; if they do, it is a bug and we should panic. + */ + g_flow_divert_in_protosw.pr_filter_head.tqh_first = + (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef; + g_flow_divert_in_protosw.pr_filter_head.tqh_last = + (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef; + +#if INET6 + g_tcp6_protosw = (struct ip6protosw *)pffindproto(AF_INET6, IPPROTO_TCP, SOCK_STREAM); + + VERIFY(g_tcp6_protosw != NULL); + + memcpy(&g_flow_divert_in6_protosw, g_tcp6_protosw, sizeof(g_flow_divert_in6_protosw)); + memcpy(&g_flow_divert_in6_usrreqs, g_tcp6_protosw->pr_usrreqs, sizeof(g_flow_divert_in6_usrreqs)); + + g_flow_divert_in6_usrreqs.pru_connect = flow_divert_connect_out; + g_flow_divert_in6_usrreqs.pru_connectx = flow_divert_connectx6_out; + g_flow_divert_in6_usrreqs.pru_control = flow_divert_in6_control; + g_flow_divert_in6_usrreqs.pru_disconnect = flow_divert_close; + g_flow_divert_in6_usrreqs.pru_disconnectx = flow_divert_disconnectx; + g_flow_divert_in6_usrreqs.pru_peeraddr = flow_divert_getpeername; + g_flow_divert_in6_usrreqs.pru_rcvd = flow_divert_rcvd; + g_flow_divert_in6_usrreqs.pru_send = flow_divert_data_out; + g_flow_divert_in6_usrreqs.pru_shutdown = flow_divert_shutdown; + g_flow_divert_in6_usrreqs.pru_sockaddr = flow_divert_getsockaddr; + + g_flow_divert_in6_protosw.pr_usrreqs = &g_flow_divert_in6_usrreqs; + g_flow_divert_in6_protosw.pr_ctloutput = flow_divert_ctloutput; + /* + * Socket filters shouldn't attach/detach to/from this protosw + * since pr_protosw is to be used instead, which points to the + * real protocol; if they do, it is a bug and we should panic. + */ + g_flow_divert_in6_protosw.pr_filter_head.tqh_first = + (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef; + g_flow_divert_in6_protosw.pr_filter_head.tqh_last = + (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef; +#endif /* INET6 */ + + flow_divert_grp_attr = lck_grp_attr_alloc_init(); + if (flow_divert_grp_attr == NULL) { + FDLOG0(LOG_ERR, &nil_pcb, "lck_grp_attr_alloc_init failed"); + g_init_result = ENOMEM; + goto done; + } + + flow_divert_mtx_grp = lck_grp_alloc_init(FLOW_DIVERT_CONTROL_NAME, flow_divert_grp_attr); + if (flow_divert_mtx_grp == NULL) { + FDLOG0(LOG_ERR, &nil_pcb, "lck_grp_alloc_init failed"); + g_init_result = ENOMEM; + goto done; + } + + flow_divert_mtx_attr = lck_attr_alloc_init(); + if (flow_divert_mtx_attr == NULL) { + FDLOG0(LOG_ERR, &nil_pcb, "lck_attr_alloc_init failed"); + g_init_result = ENOMEM; + goto done; + } + + g_init_result = flow_divert_kctl_init(); + if (g_init_result) { + goto done; + } + + lck_rw_init(&g_flow_divert_group_lck, flow_divert_mtx_grp, flow_divert_mtx_attr); + + memset(&g_signing_id_trie, 0, sizeof(g_signing_id_trie)); + g_signing_id_trie.root = NULL_TRIE_IDX; + +done: + if (g_init_result != 0) { + if (flow_divert_mtx_attr != NULL) { + lck_attr_free(flow_divert_mtx_attr); + flow_divert_mtx_attr = NULL; + } + if (flow_divert_mtx_grp != NULL) { + lck_grp_free(flow_divert_mtx_grp); + flow_divert_mtx_grp = NULL; + } + if (flow_divert_grp_attr != NULL) { + lck_grp_attr_free(flow_divert_grp_attr); + flow_divert_grp_attr = NULL; + } + + if (g_flow_divert_kctl_ref != NULL) { + ctl_deregister(g_flow_divert_kctl_ref); + g_flow_divert_kctl_ref = NULL; + } + } +} diff --git a/bsd/netinet/flow_divert.h b/bsd/netinet/flow_divert.h new file mode 100644 index 000000000..522deb241 --- /dev/null +++ b/bsd/netinet/flow_divert.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef __FLOW_DIVERT_H__ +#define __FLOW_DIVERT_H__ + +#include + +struct flow_divert_group; + +struct flow_divert_pcb { + decl_lck_mtx_data(, mtx); + socket_t so; + RB_ENTRY(flow_divert_pcb) rb_link; + uint32_t hash; + mbuf_t connect_token; + struct sockaddr *local_address; + struct sockaddr *remote_address; + uint32_t flags; + uint32_t send_window; + uint32_t sb_size; + struct flow_divert_group *group; + uint32_t control_group_unit; + int32_t ref_count; + uint32_t bytes_written_by_app; + uint32_t bytes_read_by_app; + uint32_t bytes_sent; + uint32_t bytes_received; + uint8_t log_level; + SLIST_ENTRY(flow_divert_pcb) tmp_list_entry; +}; + +RB_HEAD(fd_pcb_tree, flow_divert_pcb); + +struct flow_divert_group { + decl_lck_rw_data(, lck); + struct fd_pcb_tree pcb_tree; + uint32_t ctl_unit; + uint8_t atomic_bits; + MBUFQ_HEAD(send_queue_head) send_queue; + uint8_t *token_key; + size_t token_key_size; +}; + +void flow_divert_init(void); +void flow_divert_detach(struct socket *so); +errno_t flow_divert_token_set(struct socket *so, struct sockopt *sopt); +errno_t flow_divert_token_get(struct socket *so, struct sockopt *sopt); +errno_t flow_divert_pcb_init(struct socket *so, uint32_t ctl_unit); +errno_t flow_divert_check_policy(struct socket *so, proc_t p, boolean_t match_delegate, uint32_t *ctl_unit); +errno_t flow_divert_connect_out(struct socket *so, struct sockaddr *to, proc_t p); +void flow_divert_so_init(struct socket *so, proc_t p); +boolean_t flow_divert_is_dns_service(struct socket *so); + +#endif /* __FLOW_DIVERT_H__ */ diff --git a/bsd/netinet/flow_divert_proto.h b/bsd/netinet/flow_divert_proto.h new file mode 100644 index 000000000..06c79d3e7 --- /dev/null +++ b/bsd/netinet/flow_divert_proto.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef __FLOW_DIVERT_PROTO_H__ +#define __FLOW_DIVERT_PROTO_H__ + +#define FLOW_DIVERT_CONTROL_NAME "com.apple.flow-divert" + +#define FLOW_DIVERT_PKT_CONNECT 1 +#define FLOW_DIVERT_PKT_CONNECT_RESULT 2 +#define FLOW_DIVERT_PKT_DATA 3 +#define FLOW_DIVERT_PKT_CLOSE 4 +#define FLOW_DIVERT_PKT_READ_NOTIFY 5 +#define FLOW_DIVERT_PKT_GROUP_INIT 6 +#define FLOW_DIVERT_PKT_PROPERTIES_UPDATE 7 +#define FLOW_DIVERT_PKT_APP_MAP_UPDATE 8 +#define FLOW_DIVERT_PKT_APP_MAP_CREATE 9 + +#define FLOW_DIVERT_TLV_NIL 0 +#define FLOW_DIVERT_TLV_ERROR_CODE 5 +#define FLOW_DIVERT_TLV_HOW 7 +#define FLOW_DIVERT_TLV_READ_COUNT 8 +#define FLOW_DIVERT_TLV_SPACE_AVAILABLE 9 +#define FLOW_DIVERT_TLV_CTL_UNIT 10 +#define FLOW_DIVERT_TLV_LOCAL_ADDR 11 +#define FLOW_DIVERT_TLV_REMOTE_ADDR 12 +#define FLOW_DIVERT_TLV_OUT_IF_INDEX 13 +#define FLOW_DIVERT_TLV_TRAFFIC_CLASS 14 +#define FLOW_DIVERT_TLV_NO_CELLULAR 15 +#define FLOW_DIVERT_TLV_FLOW_ID 16 +#define FLOW_DIVERT_TLV_TOKEN_KEY 17 +#define FLOW_DIVERT_TLV_HMAC 18 +#define FLOW_DIVERT_TLV_KEY_UNIT 19 +#define FLOW_DIVERT_TLV_LOG_LEVEL 20 +#define FLOW_DIVERT_TLV_TARGET_HOSTNAME 21 +#define FLOW_DIVERT_TLV_TARGET_ADDRESS 22 +#define FLOW_DIVERT_TLV_TARGET_PORT 23 +#define FLOW_DIVERT_TLV_CDHASH 24 +#define FLOW_DIVERT_TLV_SIGNING_ID 25 +#define FLOW_DIVERT_TLV_PID 26 +#define FLOW_DIVERT_TLV_UUID 27 +#define FLOW_DIVERT_TLV_PREFIX_COUNT 28 + +#define FLOW_DIVERT_CHUNK_SIZE 4096 + +#define FLOW_DIVERT_TOKEN_GETOPT_MAX_SIZE 128 + +#define FLOW_DIVERT_DNS_SERVICE_SIGNING_ID "com.apple.mDNSResponder" + +struct flow_divert_packet_header { + uint8_t packet_type; + uint32_t conn_id; +}; + +#endif /* __FLOW_DIVERT_PROTO_H__ */ diff --git a/bsd/netinet/icmp6.h b/bsd/netinet/icmp6.h index 841d3b390..da271fa32 100644 --- a/bsd/netinet/icmp6.h +++ b/bsd/netinet/icmp6.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000,2008-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -675,7 +675,7 @@ struct icmp6stat { #define ICMPV6CTL_ND6_OPTIMISTIC_DAD 26 /* RFC 4429 */ #define ICMPV6CTL_MAXID 27 -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #define ICMPV6CTL_NAMES { \ { 0, 0 }, \ { 0, 0 }, \ @@ -706,14 +706,13 @@ struct icmp6stat { { "nd6_optimistic_dad", CTLTYPE_INT }, \ } -#define RTF_PROBEMTU RTF_PROTO1 - # ifdef __STDC__ struct rtentry; struct rttimer; struct in6_multi; # endif -void icmp6_init(void); +struct ip6protosw; +void icmp6_init(struct ip6protosw *, struct domain *); void icmp6_paramerror(struct mbuf *, int); void icmp6_error(struct mbuf *, int, int, int); void icmp6_error2(struct mbuf *, int, int, int, struct ifnet *); @@ -728,68 +727,63 @@ void icmp6_mtudisc_update(struct ip6ctlparam *, int); extern lck_rw_t icmp6_ifs_rwlock; /* XXX: is this the right place for these macros? */ -#define icmp6_ifstat_inc(ifp, tag) \ -do { \ - lck_rw_lock_shared(&icmp6_ifs_rwlock); \ - if ((ifp) && (ifp)->if_index <= if_index \ - && (ifp)->if_index < icmp6_ifstatmax \ - && icmp6_ifstat && icmp6_ifstat[(ifp)->if_index]) { \ - icmp6_ifstat[(ifp)->if_index]->tag++; \ +/* N.B.: if_inet6data is never freed once set, so we don't need to lock */ +#define icmp6_ifstat_inc(_ifp, _tag) do { \ + if (_ifp != NULL && IN6_IFEXTRA(_ifp) != NULL) { \ + IN6_IFEXTRA(_ifp)->icmp6_ifstat._tag++; \ } \ - lck_rw_done(&icmp6_ifs_rwlock); \ } while (0) -#define icmp6_ifoutstat_inc(ifp, type, code) \ -do { \ - icmp6_ifstat_inc(ifp, ifs6_out_msg); \ - if (type < ICMP6_INFOMSG_MASK) \ - icmp6_ifstat_inc(ifp, ifs6_out_error); \ - switch (type) { \ - case ICMP6_DST_UNREACH: \ - icmp6_ifstat_inc(ifp, ifs6_out_dstunreach); \ - if (code == ICMP6_DST_UNREACH_ADMIN) \ - icmp6_ifstat_inc(ifp, ifs6_out_adminprohib); \ - break; \ - case ICMP6_PACKET_TOO_BIG: \ - icmp6_ifstat_inc(ifp, ifs6_out_pkttoobig); \ - break; \ - case ICMP6_TIME_EXCEEDED: \ - icmp6_ifstat_inc(ifp, ifs6_out_timeexceed); \ - break; \ - case ICMP6_PARAM_PROB: \ - icmp6_ifstat_inc(ifp, ifs6_out_paramprob); \ - break; \ - case ICMP6_ECHO_REQUEST: \ - icmp6_ifstat_inc(ifp, ifs6_out_echo); \ - break; \ - case ICMP6_ECHO_REPLY: \ - icmp6_ifstat_inc(ifp, ifs6_out_echoreply); \ - break; \ - case MLD_LISTENER_QUERY: \ - icmp6_ifstat_inc(ifp, ifs6_out_mldquery); \ - break; \ - case MLD_LISTENER_REPORT: \ - icmp6_ifstat_inc(ifp, ifs6_out_mldreport); \ - break; \ - case MLD_LISTENER_DONE: \ - icmp6_ifstat_inc(ifp, ifs6_out_mlddone); \ - break; \ - case ND_ROUTER_SOLICIT: \ - icmp6_ifstat_inc(ifp, ifs6_out_routersolicit); \ - break; \ - case ND_ROUTER_ADVERT: \ - icmp6_ifstat_inc(ifp, ifs6_out_routeradvert); \ - break; \ - case ND_NEIGHBOR_SOLICIT: \ - icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit); \ - break; \ - case ND_NEIGHBOR_ADVERT: \ - icmp6_ifstat_inc(ifp, ifs6_out_neighboradvert); \ - break; \ - case ND_REDIRECT: \ - icmp6_ifstat_inc(ifp, ifs6_out_redirect); \ - break; \ - } \ +#define icmp6_ifoutstat_inc(ifp, type, code) do { \ + icmp6_ifstat_inc(ifp, ifs6_out_msg); \ + if (type < ICMP6_INFOMSG_MASK) \ + icmp6_ifstat_inc(ifp, ifs6_out_error); \ + switch (type) { \ + case ICMP6_DST_UNREACH: \ + icmp6_ifstat_inc(ifp, ifs6_out_dstunreach); \ + if (code == ICMP6_DST_UNREACH_ADMIN) \ + icmp6_ifstat_inc(ifp, ifs6_out_adminprohib);\ + break; \ + case ICMP6_PACKET_TOO_BIG: \ + icmp6_ifstat_inc(ifp, ifs6_out_pkttoobig); \ + break; \ + case ICMP6_TIME_EXCEEDED: \ + icmp6_ifstat_inc(ifp, ifs6_out_timeexceed); \ + break; \ + case ICMP6_PARAM_PROB: \ + icmp6_ifstat_inc(ifp, ifs6_out_paramprob); \ + break; \ + case ICMP6_ECHO_REQUEST: \ + icmp6_ifstat_inc(ifp, ifs6_out_echo); \ + break; \ + case ICMP6_ECHO_REPLY: \ + icmp6_ifstat_inc(ifp, ifs6_out_echoreply); \ + break; \ + case MLD_LISTENER_QUERY: \ + icmp6_ifstat_inc(ifp, ifs6_out_mldquery); \ + break; \ + case MLD_LISTENER_REPORT: \ + icmp6_ifstat_inc(ifp, ifs6_out_mldreport); \ + break; \ + case MLD_LISTENER_DONE: \ + icmp6_ifstat_inc(ifp, ifs6_out_mlddone); \ + break; \ + case ND_ROUTER_SOLICIT: \ + icmp6_ifstat_inc(ifp, ifs6_out_routersolicit); \ + break; \ + case ND_ROUTER_ADVERT: \ + icmp6_ifstat_inc(ifp, ifs6_out_routeradvert); \ + break; \ + case ND_NEIGHBOR_SOLICIT: \ + icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit);\ + break; \ + case ND_NEIGHBOR_ADVERT: \ + icmp6_ifstat_inc(ifp, ifs6_out_neighboradvert); \ + break; \ + case ND_REDIRECT: \ + icmp6_ifstat_inc(ifp, ifs6_out_redirect); \ + break; \ + } \ } while (0) extern int icmp6_rediraccept; /* accept/process redirects */ @@ -800,6 +794,6 @@ extern int icmp6_redirtimeout; /* cache time for redirect routes */ #define ICMP6_NODEINFO_TMPADDROK 0x4 #define ICMP6_NODEINFO_GLOBALOK 0x8 -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* !_NETINET_ICMP6_H_ */ diff --git a/bsd/netinet/icmp_var.h b/bsd/netinet/icmp_var.h index d4332dcd3..bb3021d02 100644 --- a/bsd/netinet/icmp_var.h +++ b/bsd/netinet/icmp_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -95,7 +95,7 @@ struct icmpstat { #define ICMPCTL_TIMESTAMP 4 /* allow replies to time stamp requests */ #define ICMPCTL_MAXID 5 -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #define ICMPCTL_NAMES { \ { 0, 0 }, \ { "maskrepl", CTLTYPE_INT }, \ @@ -117,5 +117,5 @@ extern int badport_bandlim(int); #define BANDLIM_MAX 4 extern struct icmpstat icmpstat; -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET_ICMP_VAR_H_ */ diff --git a/bsd/netinet/if_ether.h b/bsd/netinet/if_ether.h index e796437dd..9ed719225 100644 --- a/bsd/netinet/if_ether.h +++ b/bsd/netinet/if_ether.h @@ -141,7 +141,7 @@ struct sockaddr_inarp { #define RTF_USETRAILERS RTF_PROTO1 /* use trailers */ #define RTF_ANNOUNCE RTF_PROTO2 /* announce new arp entry */ -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE extern u_char ether_ipmulticast_min[ETHER_ADDR_LEN]; extern u_char ether_ipmulticast_max[ETHER_ADDR_LEN]; extern struct ifqueue arpintrq; @@ -149,6 +149,6 @@ extern struct ifqueue arpintrq; int arpresolve(struct ifnet *, struct rtentry *, struct mbuf *, struct sockaddr *, u_char *, struct rtentry *); void arp_ifinit(struct ifnet *, struct ifaddr *); -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET_IF_ETHER_H_ */ diff --git a/bsd/netinet/igmp.c b/bsd/netinet/igmp.c index e8442d135..ceacb9611 100644 --- a/bsd/netinet/igmp.c +++ b/bsd/netinet/igmp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -111,22 +111,6 @@ #include #include -#ifdef IGMP_DEBUG -__inline__ char * -inet_ntoa(struct in_addr ina) -{ - static char buf[4*sizeof "123"]; - unsigned char *ucp = (unsigned char *)&ina; - - snprintf(buf, sizeof(buf), "%d.%d.%d.%d", - ucp[0] & 0xff, - ucp[1] & 0xff, - ucp[2] & 0xff, - ucp[3] & 0xff); - return buf; -} -#endif - SLIST_HEAD(igmp_inm_relhead, in_multi); static void igi_initvar(struct igmp_ifinfo *, struct ifnet *, int); @@ -134,39 +118,40 @@ static struct igmp_ifinfo *igi_alloc(int); static void igi_free(struct igmp_ifinfo *); static void igi_delete(const struct ifnet *, struct igmp_inm_relhead *); static void igmp_dispatch_queue(struct igmp_ifinfo *, struct ifqueue *, - int, const int, struct ifnet *); -static void igmp_final_leave(struct in_multi *, struct igmp_ifinfo *); + int, const int); +static void igmp_final_leave(struct in_multi *, struct igmp_ifinfo *, + struct igmp_tparams *); static int igmp_handle_state_change(struct in_multi *, - struct igmp_ifinfo *); -static int igmp_initial_join(struct in_multi *, struct igmp_ifinfo *); + struct igmp_ifinfo *, struct igmp_tparams *); +static int igmp_initial_join(struct in_multi *, struct igmp_ifinfo *, + struct igmp_tparams *); static int igmp_input_v1_query(struct ifnet *, const struct ip *, const struct igmp *); static int igmp_input_v2_query(struct ifnet *, const struct ip *, const struct igmp *); static int igmp_input_v3_query(struct ifnet *, const struct ip *, /*const*/ struct igmpv3 *); -static int igmp_input_v3_group_query(struct in_multi *, +static int igmp_input_v3_group_query(struct in_multi *, int, /*const*/ struct igmpv3 *); -static int igmp_input_v1_report(struct ifnet *, /*const*/ struct ip *, - /*const*/ struct igmp *); -static int igmp_input_v2_report(struct ifnet *, /*const*/ struct ip *, - /*const*/ struct igmp *); -void igmp_sendpkt(struct mbuf *, struct ifnet *); +static int igmp_input_v1_report(struct ifnet *, struct mbuf *, + /*const*/ struct ip *, /*const*/ struct igmp *); +static int igmp_input_v2_report(struct ifnet *, struct mbuf *, + /*const*/ struct ip *, /*const*/ struct igmp *); +static void igmp_sendpkt(struct mbuf *); static __inline__ int igmp_isgroupreported(const struct in_addr); -static struct mbuf * - igmp_ra_alloc(void); +static struct mbuf *igmp_ra_alloc(void); #ifdef IGMP_DEBUG -static const char * igmp_rec_type_to_str(const int); +static const char *igmp_rec_type_to_str(const int); #endif -static void igmp_set_version(struct igmp_ifinfo *, const int); +static uint32_t igmp_set_version(struct igmp_ifinfo *, const int); static void igmp_flush_relq(struct igmp_ifinfo *, struct igmp_inm_relhead *); static int igmp_v1v2_queue_report(struct in_multi *, const int); static void igmp_v1v2_process_group_timer(struct in_multi *, const int); static void igmp_v1v2_process_querier_timers(struct igmp_ifinfo *); -static void igmp_v2_update_group(struct in_multi *, const int); +static uint32_t igmp_v2_update_group(struct in_multi *, const int); static void igmp_v3_cancel_link_timers(struct igmp_ifinfo *); -static void igmp_v3_dispatch_general_query(struct igmp_ifinfo *); +static uint32_t igmp_v3_dispatch_general_query(struct igmp_ifinfo *); static struct mbuf * igmp_v3_encap_report(struct ifnet *, struct mbuf *); static int igmp_v3_enqueue_group_record(struct ifqueue *, @@ -183,16 +168,34 @@ static int sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS; static int sysctl_igmp_gsr SYSCTL_HANDLER_ARGS; static int sysctl_igmp_default_version SYSCTL_HANDLER_ARGS; -struct mbuf *m_raopt; /* Router Alert option */ +static int igmp_timeout_run; /* IGMP timer is scheduled to run */ +static void igmp_timeout(void *); +static void igmp_sched_timeout(void); +static struct mbuf *m_raopt; /* Router Alert option */ + +static int querier_present_timers_running; /* IGMPv1/v2 older version + * querier present */ static int interface_timers_running; /* IGMPv3 general - * query response */ + * query response */ static int state_change_timers_running; /* IGMPv3 state-change - * retransmit */ + * retransmit */ static int current_state_timers_running; /* IGMPv1/v2 host * report; IGMPv3 g/sg * query response */ +/* + * Subsystem lock macros. + */ +#define IGMP_LOCK() \ + lck_mtx_lock(&igmp_mtx) +#define IGMP_LOCK_ASSERT_HELD() \ + lck_mtx_assert(&igmp_mtx, LCK_MTX_ASSERT_OWNED) +#define IGMP_LOCK_ASSERT_NOTHELD() \ + lck_mtx_assert(&igmp_mtx, LCK_MTX_ASSERT_NOTOWNED) +#define IGMP_UNLOCK() \ + lck_mtx_unlock(&igmp_mtx) + static LIST_HEAD(, igmp_ifinfo) igi_head; static struct igmpstat_v3 igmpstat_v3 = { .igps_version = IGPS_VERSION_3, @@ -305,16 +308,42 @@ static int igmp_timers_are_running; static unsigned int igi_size; /* size of zone element */ static struct zone *igi_zone; /* zone for igmp_ifinfo */ +/* Store IGMPv3 record count in the module private scratch space */ +#define vt_nrecs pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val16[0] + +static __inline void +igmp_save_context(struct mbuf *m, struct ifnet *ifp) +{ + m->m_pkthdr.rcvif = ifp; +} + +static __inline void +igmp_scrub_context(struct mbuf *m) +{ + m->m_pkthdr.rcvif = NULL; +} + #ifdef IGMP_DEBUG -static __inline char * -inet_ntoa_haddr(in_addr_t haddr) +static __inline const char * +inet_ntop_haddr(in_addr_t haddr, char *buf, socklen_t size) { struct in_addr ia; ia.s_addr = htonl(haddr); - return (inet_ntoa(ia)); + return (inet_ntop(AF_INET, &ia, buf, size)); } #endif + +/* + * Restore context from a queued IGMP output chain. + * Return saved ifp. + */ +static __inline struct ifnet * +igmp_restore_context(struct mbuf *m) +{ + return (m->m_pkthdr.rcvif); +} + /* * Retrieve or set default IGMP version. */ @@ -325,7 +354,7 @@ sysctl_igmp_default_version SYSCTL_HANDLER_ARGS int error; int new; - lck_mtx_lock(&igmp_mtx); + IGMP_LOCK(); error = SYSCTL_OUT(req, arg1, sizeof(int)); if (error || !req->newptr) @@ -342,13 +371,13 @@ sysctl_igmp_default_version SYSCTL_HANDLER_ARGS goto out_locked; } - IGMP_PRINTF(("change igmp_default_version from %d to %d\n", - igmp_default_version, new)); + IGMP_PRINTF(("%s: change igmp_default_version from %d to %d\n", + __func__, igmp_default_version, new)); igmp_default_version = new; out_locked: - lck_mtx_unlock(&igmp_mtx); + IGMP_UNLOCK(); return (error); } @@ -363,7 +392,7 @@ sysctl_igmp_gsr SYSCTL_HANDLER_ARGS int error; int i; - lck_mtx_lock(&igmp_mtx); + IGMP_LOCK(); i = igmp_gsrdelay.tv_sec; @@ -379,7 +408,7 @@ sysctl_igmp_gsr SYSCTL_HANDLER_ARGS igmp_gsrdelay.tv_sec = i; out_locked: - lck_mtx_unlock(&igmp_mtx); + IGMP_UNLOCK(); return (error); } @@ -408,7 +437,7 @@ sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS if (namelen != 1) return (EINVAL); - lck_mtx_lock(&igmp_mtx); + IGMP_LOCK(); if (name[0] <= 0 || name[0] > (u_int)if_index) { error = ENOENT; @@ -448,7 +477,7 @@ sysctl_igmp_ifinfo SYSCTL_HANDLER_ARGS } out_locked: - lck_mtx_unlock(&igmp_mtx); + IGMP_UNLOCK(); return (error); } @@ -459,7 +488,7 @@ out_locked: */ static void igmp_dispatch_queue(struct igmp_ifinfo *igi, struct ifqueue *ifq, int limit, - const int loop, struct ifnet *ifp) + const int loop) { struct mbuf *m; struct ip *ip; @@ -471,13 +500,15 @@ igmp_dispatch_queue(struct igmp_ifinfo *igi, struct ifqueue *ifq, int limit, IF_DEQUEUE(ifq, m); if (m == NULL) break; - IGMP_PRINTF(("%s: dispatch %p from %p\n", __func__, ifq, m)); + IGMP_PRINTF(("%s: dispatch 0x%llx from 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(ifq), + (uint64_t)VM_KERNEL_ADDRPERM(m))); ip = mtod(m, struct ip *); if (loop) m->m_flags |= M_IGMP_LOOP; if (igi != NULL) IGI_UNLOCK(igi); - igmp_sendpkt(m, ifp); + igmp_sendpkt(m); if (igi != NULL) IGI_LOCK(igi); if (--limit == 0) @@ -541,14 +572,14 @@ igmp_domifattach(struct ifnet *ifp, int how) { struct igmp_ifinfo *igi; - IGMP_PRINTF(("%s: called for ifp %p(%s)\n", - __func__, ifp, ifp->if_name)); + IGMP_PRINTF(("%s: called for ifp 0x%llx(%s)\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name)); igi = igi_alloc(how); if (igi == NULL) return (NULL); - lck_mtx_lock(&igmp_mtx); + IGMP_LOCK(); IGI_LOCK(igi); igi_initvar(igi, ifp, 0); @@ -562,10 +593,10 @@ igmp_domifattach(struct ifnet *ifp, int how) LIST_INSERT_HEAD(&igi_head, igi, igi_link); - lck_mtx_unlock(&igmp_mtx); + IGMP_UNLOCK(); - IGMP_PRINTF(("allocate igmp_ifinfo for ifp %p(%s)\n", - ifp, ifp->if_name)); + IGMP_PRINTF(("%s: allocate igmp_ifinfo for ifp 0x%llx(%s)\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name)); return (igi); } @@ -579,7 +610,7 @@ igmp_domifreattach(struct igmp_ifinfo *igi) { struct ifnet *ifp; - lck_mtx_lock(&igmp_mtx); + IGMP_LOCK(); IGI_LOCK(igi); VERIFY(!(igi->igi_debug & IFD_ATTACHED)); @@ -595,10 +626,10 @@ igmp_domifreattach(struct igmp_ifinfo *igi) LIST_INSERT_HEAD(&igi_head, igi, igi_link); - lck_mtx_unlock(&igmp_mtx); + IGMP_UNLOCK(); - IGMP_PRINTF(("reattached igmp_ifinfo for ifp %p(%s)\n", - ifp, ifp->if_name)); + IGMP_PRINTF(("%s: reattached igmp_ifinfo for ifp 0x%llx(%s)\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name)); } /* @@ -611,12 +642,12 @@ igmp_domifdetach(struct ifnet *ifp) SLIST_INIT(&inm_dthead); - IGMP_PRINTF(("%s: called for ifp %p(%s%d)\n", - __func__, ifp, ifp->if_name, ifp->if_unit)); + IGMP_PRINTF(("%s: called for ifp 0x%llx(%s%d)\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(ifp), ifp->if_name, ifp->if_unit)); - lck_mtx_lock(&igmp_mtx); + IGMP_LOCK(); igi_delete(ifp, (struct igmp_inm_relhead *)&inm_dthead); - lck_mtx_unlock(&igmp_mtx); + IGMP_UNLOCK(); /* Now that we're dropped all locks, release detached records */ IGMP_REMOVE_DETACHED_INM(&inm_dthead); @@ -633,7 +664,7 @@ igi_delete(const struct ifnet *ifp, struct igmp_inm_relhead *inm_dthead) { struct igmp_ifinfo *igi, *tigi; - lck_mtx_assert(&igmp_mtx, LCK_MTX_ASSERT_OWNED); + IGMP_LOCK_ASSERT_HELD(); LIST_FOREACH_SAFE(igi, &igi_head, igi_link, tigi) { IGI_LOCK(igi); @@ -654,7 +685,8 @@ igi_delete(const struct ifnet *ifp, struct igmp_inm_relhead *inm_dthead) } IGI_UNLOCK(igi); } - panic("%s: igmp_ifinfo not found for ifp %p\n", __func__, ifp); + panic("%s: igmp_ifinfo not found for ifp %p(%s)\n", __func__, + ifp, ifp->if_xname); } __private_extern__ void @@ -779,8 +811,8 @@ igi_remref(struct igmp_ifinfo *igi) /* Now that we're dropped all locks, release detached records */ IGMP_REMOVE_DETACHED_INM(&inm_dthead); - IGMP_PRINTF(("%s: freeing igmp_ifinfo for ifp %p(%s%d)\n", - __func__, ifp, ifp->if_name, ifp->if_unit)); + IGMP_PRINTF(("%s: freeing igmp_ifinfo for ifp 0x%llx(%s)\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); igi_free(igi); } @@ -796,6 +828,9 @@ igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip, struct igmp_ifinfo *igi; struct in_multi *inm; struct in_multistep step; + struct igmp_tparams itp = { 0, 0, 0, 0 }; + + IGMP_LOCK_ASSERT_NOTHELD(); /* * IGMPv1 Host Membership Queries SHOULD always be addressed to @@ -806,7 +841,7 @@ igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip, if (!in_allhosts(ip->ip_dst) || !in_nullhost(igmp->igmp_group)) { IGMPSTAT_INC(igps_rcv_badqueries); OIGMPSTAT_INC(igps_rcv_badqueries); - return (0); + goto done; } IGMPSTAT_INC(igps_rcv_gen_queries); @@ -815,19 +850,20 @@ igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip, IGI_LOCK(igi); if (igi->igi_flags & IGIF_LOOPBACK) { - IGMP_PRINTF(("ignore v1 query on IGIF_LOOPBACK ifp %p(%s%d)\n", - ifp, ifp->if_name, ifp->if_unit)); + IGMP_PRINTF(("%s: ignore v1 query on IGIF_LOOPBACK " + "ifp 0x%llx(%s)\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); IGI_UNLOCK(igi); - return (0); + goto done; } /* * Switch to IGMPv1 host compatibility mode. */ - igmp_set_version(igi, IGMP_VERSION_1); + itp.qpt = igmp_set_version(igi, IGMP_VERSION_1); IGI_UNLOCK(igi); - IGMP_PRINTF(("process v1 query on ifp %p(%s%d)\n", ifp, ifp->if_name, - ifp->if_unit)); + IGMP_PRINTF(("%s: process v1 query on ifp 0x%llx(%s)\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); /* * Start the timers in all of our group records @@ -838,9 +874,7 @@ igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip, IN_FIRST_MULTI(step, inm); while (inm != NULL) { INM_LOCK(inm); - if (inm->inm_ifp != ifp) - goto next; - if (inm->inm_timer != 0) + if (inm->inm_ifp != ifp || inm->inm_timer != 0) goto next; switch (inm->inm_state) { @@ -855,9 +889,8 @@ igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip, case IGMP_SLEEPING_MEMBER: case IGMP_AWAKENING_MEMBER: inm->inm_state = IGMP_REPORTING_MEMBER; - inm->inm_timer = IGMP_RANDOM_DELAY( - IGMP_V1V2_MAX_RI * PR_SLOWHZ); - current_state_timers_running = 1; + inm->inm_timer = IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI); + itp.cst = 1; break; case IGMP_LEAVING_MEMBER: break; @@ -867,6 +900,8 @@ next: IN_NEXT_MULTI(step, inm); } in_multihead_lock_done(); +done: + igmp_set_timeout(&itp); return (0); } @@ -882,6 +917,9 @@ igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip, struct in_multi *inm; int is_general_query; uint16_t timer; + struct igmp_tparams itp = { 0, 0, 0, 0 }; + + IGMP_LOCK_ASSERT_NOTHELD(); is_general_query = 0; @@ -894,7 +932,7 @@ igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip, * If this was not sent to the all-hosts group, ignore it. */ if (!in_allhosts(ip->ip_dst)) - return (0); + goto done; IGMPSTAT_INC(igps_rcv_gen_queries); is_general_query = 1; } else { @@ -907,30 +945,31 @@ igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip, IGI_LOCK(igi); if (igi->igi_flags & IGIF_LOOPBACK) { - IGMP_PRINTF(("ignore v2 query on IGIF_LOOPBACK ifp %p(%s%d)\n", - ifp, ifp->if_name, ifp->if_unit)); + IGMP_PRINTF(("%s: ignore v2 query on IGIF_LOOPBACK " + "ifp 0x%llx(%s)\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); IGI_UNLOCK(igi); - return(0); + goto done; } /* * Ignore v2 query if in v1 Compatibility Mode. */ if (igi->igi_version == IGMP_VERSION_1) { IGI_UNLOCK(igi); - return (0); + goto done; } - igmp_set_version(igi, IGMP_VERSION_2); + itp.qpt = igmp_set_version(igi, IGMP_VERSION_2); IGI_UNLOCK(igi); - timer = igmp->igmp_code * PR_SLOWHZ / IGMP_TIMER_SCALE; + timer = igmp->igmp_code / IGMP_TIMER_SCALE; if (timer == 0) timer = 1; if (is_general_query) { struct in_multistep step; - IGMP_PRINTF(("process v2 general query on ifp %p(%s%d)\n", - ifp, ifp->if_name, ifp->if_unit)); + IGMP_PRINTF(("%s: process v2 general query on ifp 0x%llx(%s)\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); /* * For each reporting group joined on this * interface, kick the report timer. @@ -940,7 +979,7 @@ igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip, while (inm != NULL) { INM_LOCK(inm); if (inm->inm_ifp == ifp) - igmp_v2_update_group(inm, timer); + itp.cst += igmp_v2_update_group(inm, timer); INM_UNLOCK(inm); IN_NEXT_MULTI(step, inm); } @@ -955,14 +994,17 @@ igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip, in_multihead_lock_done(); if (inm != NULL) { INM_LOCK(inm); - IGMP_PRINTF(("process v2 query %s on ifp %p(%s%d)\n", - inet_ntoa(igmp->igmp_group), ifp, ifp->if_name, - ifp->if_unit)); - igmp_v2_update_group(inm, timer); + IGMP_INET_PRINTF(igmp->igmp_group, + ("process v2 query %s on ifp 0x%llx(%s)\n", + _igmp_inet_buf, + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); + itp.cst = igmp_v2_update_group(inm, timer); INM_UNLOCK(inm); INM_REMREF(inm); /* from IN_LOOKUP_MULTI */ } } +done: + igmp_set_timeout(&itp); return (0); } @@ -982,13 +1024,13 @@ igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip, * Unlike IGMPv3, the delay per group should be jittered * to avoid bursts of IGMPv2 reports. */ -static void +static uint32_t igmp_v2_update_group(struct in_multi *inm, const int timer) { - IGMP_PRINTF(("%s: %s/%s%d timer=%d\n", __func__, - inet_ntoa(inm->inm_addr), inm->inm_ifp->if_name, - inm->inm_ifp->if_unit, timer)); + IGMP_INET_PRINTF(inm->inm_addr, ("%s: %s/%s timer=%d\n", + __func__, _igmp_inet_buf, if_name(inm->inm_ifp), + timer)); INM_LOCK_ASSERT_HELD(inm); @@ -1012,7 +1054,6 @@ igmp_v2_update_group(struct in_multi *inm, const int timer) IGMP_PRINTF(("%s: ->REPORTING\n", __func__)); inm->inm_state = IGMP_REPORTING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY(timer); - current_state_timers_running = 1; break; case IGMP_SLEEPING_MEMBER: IGMP_PRINTF(("%s: ->AWAKENING\n", __func__)); @@ -1021,6 +1062,8 @@ igmp_v2_update_group(struct in_multi *inm, const int timer) case IGMP_LEAVING_MEMBER: break; } + + return (inm->inm_timer); } /* @@ -1039,11 +1082,14 @@ igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip, uint32_t maxresp, nsrc, qqi; uint16_t timer; uint8_t qrv; + struct igmp_tparams itp = { 0, 0, 0, 0 }; + + IGMP_LOCK_ASSERT_NOTHELD(); is_general_query = 0; - IGMP_PRINTF(("process v3 query on ifp %p(%s%d)\n", ifp, ifp->if_name, - ifp->if_unit)); + IGMP_PRINTF(("%s: process v3 query on ifp 0x%llx(%s)\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); maxresp = igmpv3->igmp_code; /* in 1/10ths of a second */ if (maxresp >= 128) { @@ -1070,7 +1116,7 @@ igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip, (IGMP_EXP(igmpv3->igmp_qqi) + 3); } - timer = maxresp * PR_SLOWHZ / IGMP_TIMER_SCALE; + timer = maxresp / IGMP_TIMER_SCALE; if (timer == 0) timer = 1; @@ -1092,7 +1138,7 @@ igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip, if (!in_allhosts(ip->ip_dst) || nsrc > 0) { IGMPSTAT_INC(igps_rcv_badqueries); OIGMPSTAT_INC(igps_rcv_badqueries); - return (0); + goto done; } is_general_query = 1; } else { @@ -1108,10 +1154,11 @@ igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip, IGI_LOCK(igi); if (igi->igi_flags & IGIF_LOOPBACK) { - IGMP_PRINTF(("ignore v3 query on IGIF_LOOPBACK ifp %p(%s%d)\n", - ifp, ifp->if_name, ifp->if_unit)); + IGMP_PRINTF(("%s: ignore v3 query on IGIF_LOOPBACK " + "ifp 0x%llx(%s)\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); IGI_UNLOCK(igi); - return (0); + goto done; } /* @@ -1121,20 +1168,20 @@ igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip, * timer expires. */ if (igi->igi_version != IGMP_VERSION_3) { - IGMP_PRINTF(("ignore v3 query in v%d mode on ifp %p(%s%d)\n", - igi->igi_version, ifp, ifp->if_name, ifp->if_unit)); + IGMP_PRINTF(("%s: ignore v3 query in v%d mode on " + "ifp 0x%llx(%s)\n", __func__, igi->igi_version, + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); IGI_UNLOCK(igi); - return (0); + goto done; } - igmp_set_version(igi, IGMP_VERSION_3); + itp.qpt = igmp_set_version(igi, IGMP_VERSION_3); igi->igi_rv = qrv; igi->igi_qi = qqi; - igi->igi_qri = maxresp; - + igi->igi_qri = MAX(timer, IGMP_QRI_MIN); - IGMP_PRINTF(("%s: qrv %d qi %d qri %d\n", __func__, qrv, qqi, - maxresp)); + IGMP_PRINTF(("%s: qrv %d qi %d qri %d\n", __func__, igi->igi_rv, + igi->igi_qi, igi->igi_qri)); if (is_general_query) { /* @@ -1145,11 +1192,10 @@ igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip, * not schedule any other reports. * Otherwise, reset the interface timer. */ - IGMP_PRINTF(("process v3 general query on ifp %p(%s%d)\n", - ifp, ifp->if_name, ifp->if_unit)); + IGMP_PRINTF(("%s: process v3 general query on ifp 0x%llx(%s)\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) { - igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer); - interface_timers_running = 1; + itp.it = igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer); } IGI_UNLOCK(igi); } else { @@ -1164,11 +1210,9 @@ igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip, IN_LOOKUP_MULTI(&igmpv3->igmp_group, ifp, inm); in_multihead_lock_done(); if (inm == NULL) - return (0); + goto done; INM_LOCK(inm); -#ifndef __APPLE__ - /* TODO: need ratecheck equivalent */ if (nsrc > 0) { if (!ratecheck(&inm->inm_lastgsrtv, &igmp_gsrdelay)) { @@ -1177,13 +1221,12 @@ igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip, IGMPSTAT_INC(igps_drop_gsr_queries); INM_UNLOCK(inm); INM_REMREF(inm); /* from IN_LOOKUP_MULTI */ - return (0); + goto done; } } -#endif - IGMP_PRINTF(("process v3 %s query on ifp %p(%s%d)\n", - inet_ntoa(igmpv3->igmp_group), ifp, ifp->if_name, - ifp->if_unit)); + IGMP_INET_PRINTF(igmpv3->igmp_group, + ("process v3 %s query on ifp 0x%llx(%s)\n", _igmp_inet_buf, + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); /* * If there is a pending General Query response * scheduled sooner than the selected delay, no @@ -1192,15 +1235,22 @@ igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip, * group-specific or group-and-source query. */ IGI_LOCK(igi); - if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) { - IGI_UNLOCK(igi); - igmp_input_v3_group_query(inm, timer, igmpv3); - } else { - IGI_UNLOCK(igi); + itp.it = igi->igi_v3_timer; + IGI_UNLOCK(igi); + if (itp.it == 0 || itp.it >= timer) { + (void) igmp_input_v3_group_query(inm, timer, igmpv3); + itp.cst = inm->inm_timer; } INM_UNLOCK(inm); INM_REMREF(inm); /* from IN_LOOKUP_MULTI */ } +done: + if (itp.it > 0) { + IGMP_PRINTF(("%s: v3 general query response scheduled in " + "T+%d seconds on ifp 0x%llx(%s)\n", __func__, itp.it, + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); + } + igmp_set_timeout(&itp); return (0); } @@ -1211,7 +1261,7 @@ igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip, * Return <0 if any error occured. Currently this is ignored. */ static int -igmp_input_v3_group_query(struct in_multi *inm, +igmp_input_v3_group_query(struct in_multi *inm, int timer, /*const*/ struct igmpv3 *igmpv3) { int retval; @@ -1252,7 +1302,6 @@ igmp_input_v3_group_query(struct in_multi *inm, } inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY(timer); - current_state_timers_running = 1; return (retval); } @@ -1263,7 +1312,6 @@ igmp_input_v3_group_query(struct in_multi *inm, if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) { timer = min(inm->inm_timer, timer); inm->inm_timer = IGMP_RANDOM_DELAY(timer); - current_state_timers_running = 1; return (retval); } @@ -1298,7 +1346,6 @@ igmp_input_v3_group_query(struct in_multi *inm, __func__)); inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY(timer); - current_state_timers_running = 1; } } @@ -1311,7 +1358,7 @@ igmp_input_v3_group_query(struct in_multi *inm, * NOTE: 0.0.0.0 workaround breaks const correctness. */ static int -igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip, +igmp_input_v1_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip, /*const*/ struct igmp *igmp) { struct in_ifaddr *ia; @@ -1320,7 +1367,8 @@ igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip, IGMPSTAT_INC(igps_rcv_reports); OIGMPSTAT_INC(igps_rcv_reports); - if (ifp->if_flags & IFF_LOOPBACK) + if ((ifp->if_flags & IFF_LOOPBACK) || + (m->m_pkthdr.pkt_flags & PKTF_LOOP)) return (0); if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr) || @@ -1347,8 +1395,9 @@ igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip, } } - IGMP_PRINTF(("process v1 report %s on ifp %p(%s%d)\n", - inet_ntoa(igmp->igmp_group), ifp, ifp->if_name, ifp->if_unit)); + IGMP_INET_PRINTF(igmp->igmp_group, + ("process v1 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf, + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); /* * IGMPv1 report suppression. @@ -1394,16 +1443,18 @@ igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip, case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_AWAKENING_MEMBER: - IGMP_PRINTF(("report suppressed for %s on ifp %p(%s%d)\n", - inet_ntoa(igmp->igmp_group), ifp, ifp->if_name, - ifp->if_unit)); + IGMP_INET_PRINTF(igmp->igmp_group, + ("report suppressed for %s on ifp 0x%llx(%s)\n", + _igmp_inet_buf, + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); case IGMP_SLEEPING_MEMBER: inm->inm_state = IGMP_SLEEPING_MEMBER; break; case IGMP_REPORTING_MEMBER: - IGMP_PRINTF(("report suppressed for %s on ifp %p(%s%d)\n", - inet_ntoa(igmp->igmp_group), ifp, ifp->if_name, - ifp->if_unit)); + IGMP_INET_PRINTF(igmp->igmp_group, + ("report suppressed for %s on ifp 0x%llx(%s)\n", + _igmp_inet_buf, + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); if (igi->igi_version == IGMP_VERSION_1) inm->inm_state = IGMP_LAZY_MEMBER; else if (igi->igi_version == IGMP_VERSION_2) @@ -1428,7 +1479,7 @@ igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip, * NOTE: 0.0.0.0 workaround breaks const correctness. */ static int -igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip, +igmp_input_v2_report(struct ifnet *ifp, struct mbuf *m, /*const*/ struct ip *ip, /*const*/ struct igmp *igmp) { struct in_ifaddr *ia; @@ -1453,7 +1504,8 @@ igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip, IGMPSTAT_INC(igps_rcv_reports); OIGMPSTAT_INC(igps_rcv_reports); - if (ifp->if_flags & IFF_LOOPBACK) { + if ((ifp->if_flags & IFF_LOOPBACK) || + (m->m_pkthdr.pkt_flags & PKTF_LOOP)) { if (ia != NULL) IFA_REMREF(&ia->ia_ifa); return (0); @@ -1485,8 +1537,9 @@ igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip, if (ia != NULL) IFA_REMREF(&ia->ia_ifa); - IGMP_PRINTF(("process v2 report %s on ifp %p(%s%d)\n", - inet_ntoa(igmp->igmp_group), ifp, ifp->if_name, ifp->if_unit)); + IGMP_INET_PRINTF(igmp->igmp_group, + ("process v2 report %s on ifp 0x%llx(%s)\n", _igmp_inet_buf, + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); /* * IGMPv2 report suppression. @@ -1532,9 +1585,10 @@ igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip, case IGMP_REPORTING_MEMBER: case IGMP_IDLE_MEMBER: case IGMP_AWAKENING_MEMBER: - IGMP_PRINTF(("report suppressed for %s on ifp %p(%s%d)\n", - inet_ntoa(igmp->igmp_group), ifp, ifp->if_name, - ifp->if_unit)); + IGMP_INET_PRINTF(igmp->igmp_group, + ("report suppressed for %s on ifp 0x%llx(%s)\n", + _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(ifp), + if_name(ifp))); case IGMP_LAZY_MEMBER: inm->inm_state = IGMP_LAZY_MEMBER; break; @@ -1562,7 +1616,8 @@ igmp_input(struct mbuf *m, int off) int minlen; int queryver; - IGMP_PRINTF(("%s: called w/mbuf (%p,%d)\n", __func__, m, off)); + IGMP_PRINTF(("%s: called w/mbuf (0x%llx,%d)\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(m), off)); ifp = m->m_pkthdr.rcvif; @@ -1717,7 +1772,7 @@ igmp_input(struct mbuf *m, int off) case IGMP_v1_HOST_MEMBERSHIP_REPORT: if (!igmp_v1enable) break; - if (igmp_input_v1_report(ifp, ip, igmp) != 0) { + if (igmp_input_v1_report(ifp, m, ip, igmp) != 0) { m_freem(m); return; } @@ -1726,11 +1781,9 @@ igmp_input(struct mbuf *m, int off) case IGMP_v2_HOST_MEMBERSHIP_REPORT: if (!igmp_v2enable) break; -#ifndef __APPLE__ if (!ip_checkrouteralert(m)) IGMPSTAT_INC(igps_rcv_nora); -#endif - if (igmp_input_v2_report(ifp, ip, igmp) != 0) { + if (igmp_input_v2_report(ifp, m, ip, igmp) != 0) { m_freem(m); return; } @@ -1741,17 +1794,15 @@ igmp_input(struct mbuf *m, int off) * Hosts do not need to process IGMPv3 membership reports, * as report suppression is no longer required. */ -#ifndef __APPLE__ if (!ip_checkrouteralert(m)) IGMPSTAT_INC(igps_rcv_nora); -#endif break; default: break; } - lck_mtx_assert(&igmp_mtx, LCK_MTX_ASSERT_NOTOWNED); + IGMP_LOCK_ASSERT_NOTHELD(); /* * Pass all valid IGMP packets up to any process(es) listening on a * raw IGMP socket. @@ -1759,58 +1810,88 @@ igmp_input(struct mbuf *m, int off) rip_input(m, off); } - /* - * IGMP slowtimo handler. - * Combiles both the slow and fast timer into one. We loose some responsivness but - * allows the system to avoid having a pr_fasttimo, thus allowing for power savings. - * + * Schedule IGMP timer based on various parameters; caller must ensure that + * lock ordering is maintained as this routine acquires IGMP global lock. */ void -igmp_slowtimo(void) +igmp_set_timeout(struct igmp_tparams *itp) +{ + IGMP_LOCK_ASSERT_NOTHELD(); + VERIFY(itp != NULL); + + if (itp->qpt != 0 || itp->it != 0 || itp->cst != 0 || itp->sct != 0) { + IGMP_LOCK(); + if (itp->qpt != 0) + querier_present_timers_running = 1; + if (itp->it != 0) + interface_timers_running = 1; + if (itp->cst != 0) + current_state_timers_running = 1; + if (itp->sct != 0) + state_change_timers_running = 1; + igmp_sched_timeout(); + IGMP_UNLOCK(); + } +} + +/* + * IGMP timer handler (per 1 second). + */ +static void +igmp_timeout(void *arg) { +#pragma unused(arg) struct ifqueue scq; /* State-change packets */ struct ifqueue qrq; /* Query response packets */ struct ifnet *ifp; struct igmp_ifinfo *igi; struct in_multi *inm; - int loop = 0, uri_fasthz = 0; + int loop = 0, uri_sec = 0; SLIST_HEAD(, in_multi) inm_dthead; SLIST_INIT(&inm_dthead); - lck_mtx_lock(&igmp_mtx); + /* + * Update coarse-grained networking timestamp (in sec.); the idea + * is to piggy-back on the timeout callout to update the counter + * returnable via net_uptime(). + */ + net_update_uptime(); + + IGMP_LOCK(); - LIST_FOREACH(igi, &igi_head, igi_link) { - IGI_LOCK(igi); - igmp_v1v2_process_querier_timers(igi); - IGI_UNLOCK(igi); - } + IGMP_PRINTF(("%s: qpt %d, it %d, cst %d, sct %d\n", __func__, + querier_present_timers_running, interface_timers_running, + current_state_timers_running, state_change_timers_running)); /* - * NOTE: previously handled by fasttimo - * - * Quick check to see if any work needs to be done, in order to - * minimize the overhead of fasttimo processing. + * IGMPv1/v2 querier present timer processing. */ - if (!current_state_timers_running && - !interface_timers_running && - !state_change_timers_running) { - lck_mtx_unlock(&igmp_mtx); - return; + if (querier_present_timers_running) { + querier_present_timers_running = 0; + LIST_FOREACH(igi, &igi_head, igi_link) { + IGI_LOCK(igi); + igmp_v1v2_process_querier_timers(igi); + if (igi->igi_v1_timer > 0 || igi->igi_v2_timer > 0) + querier_present_timers_running = 1; + IGI_UNLOCK(igi); + } } /* * IGMPv3 General Query response timer processing. */ if (interface_timers_running) { + IGMP_PRINTF(("%s: interface timers running\n", __func__)); interface_timers_running = 0; LIST_FOREACH(igi, &igi_head, igi_link) { IGI_LOCK(igi); if (igi->igi_v3_timer == 0) { /* Do nothing. */ } else if (--igi->igi_v3_timer == 0) { - igmp_v3_dispatch_general_query(igi); + if (igmp_v3_dispatch_general_query(igi) > 0) + interface_timers_running = 1; } else { interface_timers_running = 1; } @@ -1831,6 +1912,8 @@ igmp_slowtimo(void) memset(&scq, 0, sizeof(struct ifqueue)); scq.ifq_maxlen = IGMP_MAX_STATE_CHANGE_PACKETS; + IGMP_PRINTF(("%s: state change timers running\n", __func__)); + /* * IGMPv1/v2/v3 host report and state-change timer processing. * Note: Processing a v3 group timer may remove a node. @@ -1841,7 +1924,7 @@ igmp_slowtimo(void) IGI_LOCK(igi); ifp = igi->igi_ifp; loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0; - uri_fasthz = IGMP_RANDOM_DELAY(igi->igi_uri * PR_SLOWHZ); + uri_sec = IGMP_RANDOM_DELAY(igi->igi_uri); IGI_UNLOCK(igi); in_multihead_lock_shared(); @@ -1860,7 +1943,7 @@ igmp_slowtimo(void) break; case IGMP_VERSION_3: igmp_v3_process_group_timers(igi, &qrq, - &scq, inm, uri_fasthz); + &scq, inm, uri_sec); break; } IGI_UNLOCK(igi); @@ -1873,11 +1956,11 @@ next: IGI_LOCK(igi); if (igi->igi_version == IGMP_VERSION_1 || igi->igi_version == IGMP_VERSION_2) { - igmp_dispatch_queue(igi, &igi->igi_v2q, 0, loop, ifp); + igmp_dispatch_queue(igi, &igi->igi_v2q, 0, loop); } else if (igi->igi_version == IGMP_VERSION_3) { IGI_UNLOCK(igi); - igmp_dispatch_queue(NULL, &qrq, 0, loop, ifp); - igmp_dispatch_queue(NULL, &scq, 0, loop, ifp); + igmp_dispatch_queue(NULL, &qrq, 0, loop); + igmp_dispatch_queue(NULL, &scq, 0, loop); VERIFY(qrq.ifq_len == 0); VERIFY(scq.ifq_len == 0); IGI_LOCK(igi); @@ -1902,12 +1985,28 @@ next: } out_locked: - lck_mtx_unlock(&igmp_mtx); + /* re-arm the timer if there's work to do */ + igmp_timeout_run = 0; + igmp_sched_timeout(); + IGMP_UNLOCK(); /* Now that we're dropped all locks, release detached records */ IGMP_REMOVE_DETACHED_INM(&inm_dthead); } +static void +igmp_sched_timeout(void) +{ + IGMP_LOCK_ASSERT_HELD(); + + if (!igmp_timeout_run && + (querier_present_timers_running || current_state_timers_running || + interface_timers_running || state_change_timers_running)) { + igmp_timeout_run = 1; + timeout(igmp_timeout, NULL, hz); + } +} + /* * Free the in_multi reference(s) for this IGMP lifecycle. * @@ -1964,6 +2063,7 @@ igmp_v1v2_process_group_timer(struct in_multi *inm, const int igmp_version) { int report_timer_expired; + IGMP_LOCK_ASSERT_HELD(); INM_LOCK_ASSERT_HELD(inm); IGI_LOCK_ASSERT_HELD(inm->inm_igi); @@ -1973,6 +2073,7 @@ igmp_v1v2_process_group_timer(struct in_multi *inm, const int igmp_version) report_timer_expired = 1; } else { current_state_timers_running = 1; + /* caller will schedule timer */ return; } @@ -2010,11 +2111,12 @@ igmp_v1v2_process_group_timer(struct in_multi *inm, const int igmp_version) static void igmp_v3_process_group_timers(struct igmp_ifinfo *igi, struct ifqueue *qrq, struct ifqueue *scq, - struct in_multi *inm, const int uri_fasthz) + struct in_multi *inm, const int uri_sec) { int query_response_timer_expired; int state_change_retransmit_timer_expired; + IGMP_LOCK_ASSERT_HELD(); INM_LOCK_ASSERT_HELD(inm); IGI_LOCK_ASSERT_HELD(igi); VERIFY(igi == inm->inm_igi); @@ -2026,7 +2128,7 @@ igmp_v3_process_group_timers(struct igmp_ifinfo *igi, * During a transition from v1/v2 compatibility mode back to v3, * a group record in REPORTING state may still have its group * timer active. This is a no-op in this function; it is easier - * to deal with it here than to complicate the slow-timeout path. + * to deal with it here than to complicate the timeout path. */ if (inm->inm_timer == 0) { query_response_timer_expired = 0; @@ -2034,6 +2136,7 @@ igmp_v3_process_group_timers(struct igmp_ifinfo *igi, query_response_timer_expired = 1; } else { current_state_timers_running = 1; + /* caller will schedule timer */ } if (inm->inm_sctimer == 0) { @@ -2042,9 +2145,10 @@ igmp_v3_process_group_timers(struct igmp_ifinfo *igi, state_change_retransmit_timer_expired = 1; } else { state_change_timers_running = 1; + /* caller will schedule timer */ } - /* We are in fasttimo, so be quick about it. */ + /* We are in timer callback, so be quick about it. */ if (!state_change_retransmit_timer_expired && !query_response_timer_expired) return; @@ -2087,8 +2191,9 @@ igmp_v3_process_group_timers(struct igmp_ifinfo *igi, * reset the timer. */ if (--inm->inm_scrv > 0) { - inm->inm_sctimer = uri_fasthz; + inm->inm_sctimer = uri_sec; state_change_timers_running = 1; + /* caller will schedule timer */ } /* * Retransmit the previously computed state-change @@ -2100,9 +2205,9 @@ igmp_v3_process_group_timers(struct igmp_ifinfo *igi, (void) igmp_v3_merge_state_changes(inm, scq); inm_commit(inm); - IGMP_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__, - inet_ntoa(inm->inm_addr), inm->inm_ifp->if_name, - inm->inm_ifp->if_unit)); + IGMP_INET_PRINTF(inm->inm_addr, + ("%s: T1 -> T0 for %s/%s\n", __func__, + _igmp_inet_buf, if_name(inm->inm_ifp))); /* * If we are leaving the group for good, make sure @@ -2165,24 +2270,23 @@ igmp_v3_suppress_group_record(struct in_multi *inm) * Switch to a different IGMP version on the given interface, * as per Section 7.2.1. */ -static void +static uint32_t igmp_set_version(struct igmp_ifinfo *igi, const int igmp_version) { int old_version_timer; IGI_LOCK_ASSERT_HELD(igi); - IGMP_PRINTF(("%s: switching to v%d on ifp %p(%s%d)\n", __func__, - igmp_version, igi->igi_ifp, igi->igi_ifp->if_name, - igi->igi_ifp->if_unit)); + IGMP_PRINTF(("%s: switching to v%d on ifp 0x%llx(%s)\n", __func__, + igmp_version, (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), + if_name(igi->igi_ifp))); if (igmp_version == IGMP_VERSION_1 || igmp_version == IGMP_VERSION_2) { /* * Compute the "Older Version Querier Present" timer as per - * Section 8.12. + * Section 8.12, in seconds. */ old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri; - old_version_timer *= PR_SLOWHZ; if (igmp_version == IGMP_VERSION_1) { igi->igi_v1_timer = old_version_timer; @@ -2206,6 +2310,8 @@ igmp_set_version(struct igmp_ifinfo *igi, const int igmp_version) } IGI_LOCK_ASSERT_HELD(igi); + + return (MAX(igi->igi_v1_timer, igi->igi_v2_timer)); } /* @@ -2226,12 +2332,12 @@ igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi) IGI_LOCK_ASSERT_HELD(igi); - IGMP_PRINTF(("%s: cancel v3 timers on ifp %p(%s%d)\n", __func__, - igi->igi_ifp, igi->igi_ifp->if_name, igi->igi_ifp->if_unit)); + IGMP_PRINTF(("%s: cancel v3 timers on ifp 0x%llx(%s)\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), if_name(igi->igi_ifp))); /* * Stop the v3 General Query Response on this link stone dead. - * If fasttimo is woken up due to interface_timers_running, + * If timer is woken up due to interface_timers_running, * the flag will be cleared if there are no pending link timers. */ igi->igi_v3_timer = 0; @@ -2320,10 +2426,11 @@ igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi) * Revert to IGMPv3. */ if (igi->igi_version != IGMP_VERSION_3) { - IGMP_PRINTF(("%s: transition from v%d -> v%d on %p(%s%d)\n", - __func__, igi->igi_version, IGMP_VERSION_3, - igi->igi_ifp, igi->igi_ifp->if_name, - igi->igi_ifp->if_unit)); + IGMP_PRINTF(("%s: transition from v%d -> v%d " + "on 0x%llx(%s)\n", __func__, + igi->igi_version, IGMP_VERSION_3, + (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), + if_name(igi->igi_ifp))); igi->igi_version = IGMP_VERSION_3; IF_DRAIN(&igi->igi_v2q); } @@ -2336,22 +2443,25 @@ igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi) * If IGMPv2 is enabled, revert to IGMPv2. */ if (!igmp_v2enable) { - IGMP_PRINTF(("%s: transition from v%d -> v%d on %p(%s%d)\n", - __func__, igi->igi_version, IGMP_VERSION_3, - igi->igi_ifp, igi->igi_ifp->if_name, - igi->igi_ifp->if_unit)); + IGMP_PRINTF(("%s: transition from v%d -> v%d " + "on 0x%llx(%s%d)\n", __func__, + igi->igi_version, IGMP_VERSION_3, + (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), + igi->igi_ifp->if_name, igi->igi_ifp->if_unit)); igi->igi_v2_timer = 0; igi->igi_version = IGMP_VERSION_3; IF_DRAIN(&igi->igi_v2q); } else { --igi->igi_v2_timer; if (igi->igi_version != IGMP_VERSION_2) { - IGMP_PRINTF(("%s: transition from v%d -> v%d on %p(%s%d)\n", - __func__, igi->igi_version, IGMP_VERSION_2, - igi->igi_ifp, igi->igi_ifp->if_name, - igi->igi_ifp->if_unit)); + IGMP_PRINTF(("%s: transition from v%d -> v%d " + "on 0x%llx(%s)\n", __func__, + igi->igi_version, IGMP_VERSION_2, + (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), + if_name(igi->igi_ifp))); igi->igi_version = IGMP_VERSION_2; IF_DRAIN(&igi->igi_gq); + igmp_v3_cancel_link_timers(igi); } } } else if (igi->igi_v1_timer > 0) { @@ -2364,10 +2474,11 @@ igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi) * If IGMPv1 is enabled, reset IGMPv2 timer if running. */ if (!igmp_v1enable) { - IGMP_PRINTF(("%s: transition from v%d -> v%d on %p(%s%d)\n", - __func__, igi->igi_version, IGMP_VERSION_3, - igi->igi_ifp, igi->igi_ifp->if_name, - igi->igi_ifp->if_unit)); + IGMP_PRINTF(("%s: transition from v%d -> v%d " + "on 0x%llx(%s%d)\n", __func__, + igi->igi_version, IGMP_VERSION_3, + (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), + igi->igi_ifp->if_name, igi->igi_ifp->if_unit)); igi->igi_v1_timer = 0; igi->igi_version = IGMP_VERSION_3; IF_DRAIN(&igi->igi_v2q); @@ -2375,9 +2486,10 @@ igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi) --igi->igi_v1_timer; } if (igi->igi_v2_timer > 0) { - IGMP_PRINTF(("%s: cancel v2 timer on %p(%s%d)\n", - __func__, igi->igi_ifp, igi->igi_ifp->if_name, - igi->igi_ifp->if_unit)); + IGMP_PRINTF(("%s: cancel v2 timer on 0x%llx(%s%d)\n", + __func__, + (uint64_t)VM_KERNEL_ADDRPERM(igi->igi_ifp), + igi->igi_ifp->if_name, igi->igi_ifp->if_unit)); igi->igi_v2_timer = 0; } } @@ -2433,6 +2545,8 @@ igmp_v1v2_queue_report(struct in_multi *inm, const int type) else ip->ip_dst = inm->inm_addr; + igmp_save_context(m, ifp); + m->m_flags |= M_IGMPV2; if (inm->inm_igi->igi_flags & IGIF_LOOPBACK) m->m_flags |= M_IGMP_LOOP; @@ -2442,16 +2556,17 @@ igmp_v1v2_queue_report(struct in_multi *inm, const int type) * in_multihead_lock in shared or exclusive mode, we can't call * igmp_sendpkt() here since that will eventually call ip_output(), * which will try to lock in_multihead_lock and cause a deadlock. - * Instead we defer the work to the igmp_slowtimo() thread, thus + * Instead we defer the work to the igmp_timeout() thread, thus * avoiding unlocking in_multihead_lock here. */ if (IF_QFULL(&inm->inm_igi->igi_v2q)) { IGMP_PRINTF(("%s: v1/v2 outbound queue full\n", __func__)); error = ENOMEM; m_freem(m); - } else + } else { IF_ENQUEUE(&inm->inm_igi->igi_v2q, m); - + VERIFY(error == 0); + } return (error); } @@ -2474,12 +2589,15 @@ igmp_v1v2_queue_report(struct in_multi *inm, const int type) * compute source filter lists. */ int -igmp_change_state(struct in_multi *inm) +igmp_change_state(struct in_multi *inm, struct igmp_tparams *itp) { struct igmp_ifinfo *igi; struct ifnet *ifp; int error = 0; + VERIFY(itp != NULL); + bzero(itp, sizeof (*itp)); + INM_LOCK_ASSERT_HELD(inm); VERIFY(inm->inm_igi != NULL); IGI_LOCK_ASSERT_NOTHELD(inm->inm_igi); @@ -2508,18 +2626,18 @@ igmp_change_state(struct in_multi *inm) inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode)); if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) { IGMP_PRINTF(("%s: initial join\n", __func__)); - error = igmp_initial_join(inm, igi); + error = igmp_initial_join(inm, igi, itp); goto out; } else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) { IGMP_PRINTF(("%s: final leave\n", __func__)); - igmp_final_leave(inm, igi); + igmp_final_leave(inm, igi, itp); goto out; } } else { IGMP_PRINTF(("%s: filter set change\n", __func__)); } - error = igmp_handle_state_change(inm, igi); + error = igmp_handle_state_change(inm, igi, itp); out: return (error); } @@ -2535,7 +2653,8 @@ out: * initial state of the membership. */ static int -igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi) +igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi, + struct igmp_tparams *itp) { struct ifnet *ifp; struct ifqueue *ifq; @@ -2543,10 +2662,12 @@ igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi) INM_LOCK_ASSERT_HELD(inm); IGI_LOCK_ASSERT_NOTHELD(igi); + VERIFY(itp != NULL); - IGMP_PRINTF(("%s: initial join %s on ifp %p(%s%d)\n", - __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp, - inm->inm_ifp->if_name, inm->inm_ifp->if_unit)); + IGMP_INET_PRINTF(inm->inm_addr, + ("%s: initial join %s on ifp 0x%llx(%s)\n", __func__, + _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp), + if_name(inm->inm_ifp))); error = 0; syncstates = 1; @@ -2602,9 +2723,9 @@ igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi) IGI_LOCK_ASSERT_HELD(igi); if (error == 0) { - inm->inm_timer = IGMP_RANDOM_DELAY( - IGMP_V1V2_MAX_RI * PR_SLOWHZ); - current_state_timers_running = 1; + inm->inm_timer = + IGMP_RANDOM_DELAY(IGMP_V1V2_MAX_RI); + itp->cst = 1; } break; @@ -2625,6 +2746,7 @@ igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi) IF_DRAIN(ifq); retval = igmp_v3_enqueue_group_record(ifq, inm, 1, 0, 0); + itp->cst = (ifq->ifq_len > 0); IGMP_PRINTF(("%s: enqueue record = %d\n", __func__, retval)); if (retval <= 0) { @@ -2635,7 +2757,7 @@ igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi) /* * Schedule transmission of pending state-change * report up to RV times for this link. The timer - * will fire at the next igmp_fasttimo (~200ms), + * will fire at the next igmp_timeout (1 second), * giving us an opportunity to merge the reports. */ if (igi->igi_flags & IGIF_LOOPBACK) { @@ -2645,7 +2767,7 @@ igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi) inm->inm_scrv = igi->igi_rv; } inm->inm_sctimer = 1; - state_change_timers_running = 1; + itp->sct = 1; error = 0; break; @@ -2660,9 +2782,9 @@ igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi) */ if (syncstates) { inm_commit(inm); - IGMP_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__, - inet_ntoa(inm->inm_addr), inm->inm_ifp->if_name, - inm->inm_ifp->if_unit)); + IGMP_INET_PRINTF(inm->inm_addr, + ("%s: T1 -> T0 for %s/%s\n", __func__, + _igmp_inet_buf, if_name(inm->inm_ifp))); } return (error); @@ -2672,17 +2794,20 @@ igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi) * Issue an intermediate state change during the IGMP life-cycle. */ static int -igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi) +igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi, + struct igmp_tparams *itp) { struct ifnet *ifp; - int retval; + int retval = 0; INM_LOCK_ASSERT_HELD(inm); IGI_LOCK_ASSERT_NOTHELD(igi); + VERIFY(itp != NULL); - IGMP_PRINTF(("%s: state change for %s on ifp %p(%s%d)\n", - __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp, - inm->inm_ifp->if_name, inm->inm_ifp->if_unit)); + IGMP_INET_PRINTF(inm->inm_addr, + ("%s: state change for %s on ifp 0x%llx(%s)\n", __func__, + _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp), + if_name(inm->inm_ifp))); ifp = inm->inm_ifp; @@ -2700,18 +2825,21 @@ igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi) } IGMP_PRINTF(("%s: nothing to do\n", __func__)); inm_commit(inm); - IGMP_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__, - inet_ntoa(inm->inm_addr), inm->inm_ifp->if_name)); - return (0); + IGMP_INET_PRINTF(inm->inm_addr, + ("%s: T1 -> T0 for %s/%s\n", __func__, + _igmp_inet_buf, inm->inm_ifp->if_name)); + goto done; } IF_DRAIN(&inm->inm_scq); retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0); + itp->cst = (inm->inm_scq.ifq_len > 0); IGMP_PRINTF(("%s: enqueue record = %d\n", __func__, retval)); if (retval <= 0) { IGI_UNLOCK(igi); - return (-retval); + retval *= -1; + goto done; } /* * If record(s) were enqueued, start the state-change @@ -2719,10 +2847,10 @@ igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi) */ inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : igi->igi_rv); inm->inm_sctimer = 1; - state_change_timers_running = 1; + itp->sct = 1; IGI_UNLOCK(igi); - - return (0); +done: + return (retval); } /* @@ -2735,16 +2863,19 @@ igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi) * to INCLUDE {} for immediate transmission. */ static void -igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi) +igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi, + struct igmp_tparams *itp) { int syncstates = 1; INM_LOCK_ASSERT_HELD(inm); IGI_LOCK_ASSERT_NOTHELD(igi); + VERIFY(itp != NULL); - IGMP_PRINTF(("%s: final leave %s on ifp %p(%s%d)\n", - __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp, - inm->inm_ifp->if_name, inm->inm_ifp->if_unit)); + IGMP_INET_PRINTF(inm->inm_addr, + ("%s: final leave %s on ifp 0x%llx(%s)\n", __func__, + _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp), + if_name(inm->inm_ifp))); switch (inm->inm_state) { case IGMP_NOT_MEMBER: @@ -2766,7 +2897,9 @@ igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi) "mode\n", __func__); /* NOTREACHED */ } - igmp_v1v2_queue_report(inm, IGMP_HOST_LEAVE_MESSAGE); + /* scheduler timer if enqueue is successful */ + itp->cst = (igmp_v1v2_queue_report(inm, + IGMP_HOST_LEAVE_MESSAGE) == 0); INM_LOCK_ASSERT_HELD(inm); IGI_LOCK_ASSERT_HELD(igi); @@ -2776,7 +2909,7 @@ igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi) /* * Stop group timer and all pending reports. * Immediately enqueue a state-change report - * TO_IN {} to be sent on the next fast timeout, + * TO_IN {} to be sent on the next timeout, * giving us an opportunity to merge reports. */ IF_DRAIN(&inm->inm_scq); @@ -2786,10 +2919,10 @@ igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi) } else { inm->inm_scrv = igi->igi_rv; } - IGMP_PRINTF(("%s: Leaving %s/%s%d with %d " + IGMP_INET_PRINTF(inm->inm_addr, + ("%s: Leaving %s/%s with %d " "pending retransmissions.\n", __func__, - inet_ntoa(inm->inm_addr), - inm->inm_ifp->if_name, inm->inm_ifp->if_unit, + _igmp_inet_buf, if_name(inm->inm_ifp), inm->inm_scrv)); if (inm->inm_scrv == 0) { inm->inm_state = IGMP_NOT_MEMBER; @@ -2811,13 +2944,14 @@ igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi) retval = igmp_v3_enqueue_group_record( &inm->inm_scq, inm, 1, 0, 0); + itp->cst = (inm->inm_scq.ifq_len > 0); KASSERT(retval != 0, ("%s: enqueue record = %d\n", __func__, retval)); inm->inm_state = IGMP_LEAVING_MEMBER; inm->inm_sctimer = 1; - state_change_timers_running = 1; + itp->sct = 1; syncstates = 0; } } @@ -2832,13 +2966,13 @@ igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi) if (syncstates) { inm_commit(inm); - IGMP_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__, - inet_ntoa(inm->inm_addr), inm->inm_ifp->if_name, - inm->inm_ifp->if_unit)); + IGMP_INET_PRINTF(inm->inm_addr, + ("%s: T1 -> T0 for %s/%s\n", __func__, + _igmp_inet_buf, if_name(inm->inm_ifp))); inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; - IGMP_PRINTF(("%s: T1 now MCAST_UNDEFINED for %s/%s%d\n", - __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_name, - inm->inm_ifp->if_unit)); + IGMP_INET_PRINTF(inm->inm_addr, + ("%s: T1 now MCAST_UNDEFINED for %s/%s\n", + __func__, _igmp_inet_buf, if_name(inm->inm_ifp))); } } @@ -2963,9 +3097,10 @@ igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm, return (igmp_v3_enqueue_filter_change(ifq, inm)); if (type == IGMP_DO_NOTHING) { - IGMP_PRINTF(("%s: nothing to do for %s/%s%d\n", - __func__, inet_ntoa(inm->inm_addr), - inm->inm_ifp->if_name, inm->inm_ifp->if_unit)); + IGMP_INET_PRINTF(inm->inm_addr, + ("%s: nothing to do for %s/%s\n", + __func__, _igmp_inet_buf, + if_name(inm->inm_ifp))); return (0); } @@ -2978,9 +3113,10 @@ igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm, if (record_has_sources) minrec0len += sizeof(in_addr_t); - IGMP_PRINTF(("%s: queueing %s for %s/%s%d\n", __func__, - igmp_rec_type_to_str(type), inet_ntoa(inm->inm_addr), - inm->inm_ifp->if_name, inm->inm_ifp->if_unit)); + IGMP_INET_PRINTF(inm->inm_addr, + ("%s: queueing %s for %s/%s\n", __func__, + igmp_rec_type_to_str(type), _igmp_inet_buf, + if_name(inm->inm_ifp))); /* * Check if we have a packet in the tail of the queue for this @@ -3021,6 +3157,8 @@ igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm, if (m == NULL) return (-ENOMEM); + igmp_save_context(m, ifp); + IGMP_PRINTF(("%s: allocated first packet\n", __func__)); } @@ -3065,8 +3203,12 @@ igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm, } msrcs = 0; RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) { - IGMP_PRINTF(("%s: visit node %s\n", __func__, - inet_ntoa_haddr(ims->ims_haddr))); +#ifdef IGMP_DEBUG + char buf[MAX_IPv4_STR_LEN]; + + inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf)); + IGMP_PRINTF(("%s: visit node %s\n", __func__, buf)); +#endif now = ims_get_mode(inm, ims, 1); IGMP_PRINTF(("%s: node is %d\n", __func__, now)); if ((now != mode) || @@ -3113,7 +3255,6 @@ igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm, if (m != m0) { IGMP_PRINTF(("%s: enqueueing first packet\n", __func__)); m->m_pkthdr.vt_nrecs = 1; - m->m_pkthdr.rcvif = ifp; IF_ENQUEUE(ifq, m); } else { m->m_pkthdr.vt_nrecs++; @@ -3144,6 +3285,7 @@ igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm, } if (m == NULL) return (-ENOMEM); + igmp_save_context(m, ifp); md = m_getptr(m, 0, &off); pig = (struct igmp_grouprec *)(void *) (mtod(md, uint8_t *) + off); @@ -3163,8 +3305,12 @@ igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm, msrcs = 0; RB_FOREACH_FROM(ims, ip_msource_tree, nims) { - IGMP_PRINTF(("%s: visit node %s\n", __func__, - inet_ntoa_haddr(ims->ims_haddr))); +#ifdef IGMP_DEBUG + char buf[MAX_IPv4_STR_LEN]; + + inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf)); + IGMP_PRINTF(("%s: visit node %s\n", __func__, buf)); +#endif now = ims_get_mode(inm, ims, 1); if ((now != mode) || (now == mode && mode == MCAST_UNDEFINED)) { @@ -3194,7 +3340,6 @@ igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm, nbytes += (msrcs * sizeof(in_addr_t)); IGMP_PRINTF(("%s: enqueueing next packet\n", __func__)); - m->m_pkthdr.rcvif = ifp; IF_ENQUEUE(ifq, m); } @@ -3307,6 +3452,7 @@ igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm) return (-ENOMEM); } m->m_pkthdr.vt_nrecs = 0; + igmp_save_context(m, ifp); m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE - sizeof(struct igmp_grouprec)) / sizeof(in_addr_t); @@ -3357,8 +3503,12 @@ igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm) if (nims == NULL) nims = RB_MIN(ip_msource_tree, &inm->inm_srcs); RB_FOREACH_FROM(ims, ip_msource_tree, nims) { - IGMP_PRINTF(("%s: visit node %s\n", - __func__, inet_ntoa_haddr(ims->ims_haddr))); +#ifdef IGMP_DEBUG + char buf[MAX_IPv4_STR_LEN]; + + inet_ntop_haddr(ims->ims_haddr, buf, sizeof(buf)); + IGMP_PRINTF(("%s: visit node %s\n", __func__, buf)); +#endif now = ims_get_mode(inm, ims, 1); then = ims_get_mode(inm, ims, 0); IGMP_PRINTF(("%s: mode: t0 %d, t1 %d\n", @@ -3425,7 +3575,6 @@ igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm) * packet if it wasn't already queued. */ m->m_pkthdr.vt_nrecs++; - m->m_pkthdr.rcvif = ifp; if (m != m0) IF_ENQUEUE(ifq, m); nbytes += npbytes; @@ -3467,8 +3616,8 @@ igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq) gq = &inm->inm_scq; #ifdef IGMP_DEBUG if (gq->ifq_head == NULL) { - IGMP_PRINTF(("%s: WARNING: queue for inm %p is empty\n", - __func__, inm)); + IGMP_PRINTF(("%s: WARNING: queue for inm 0x%llx is empty\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm))); } #endif @@ -3501,7 +3650,8 @@ igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq) if (!domerge && IF_QFULL(gq)) { IGMP_PRINTF(("%s: outbound queue full, skipping whole " - "packet %p\n", __func__, m)); + "packet 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(m))); n = m->m_nextpkt; if (!docopy) { IF_REMQUEUE(gq, m); @@ -3512,13 +3662,15 @@ igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq) } if (!docopy) { - IGMP_PRINTF(("%s: dequeueing %p\n", __func__, m)); + IGMP_PRINTF(("%s: dequeueing 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(m))); n = m->m_nextpkt; IF_REMQUEUE(gq, m); m0 = m; m = n; } else { - IGMP_PRINTF(("%s: copying %p\n", __func__, m)); + IGMP_PRINTF(("%s: copying 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(m))); m0 = m_dup(m, M_NOWAIT); if (m0 == NULL) return (ENOMEM); @@ -3527,15 +3679,17 @@ igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq) } if (!domerge) { - IGMP_PRINTF(("%s: queueing %p to ifscq %p)\n", - __func__, m0, ifscq)); - m0->m_pkthdr.rcvif = inm->inm_ifp; + IGMP_PRINTF(("%s: queueing 0x%llx to ifscq 0x%llx)\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(m0), + (uint64_t)VM_KERNEL_ADDRPERM(ifscq))); IF_ENQUEUE(ifscq, m0); } else { struct mbuf *mtl; /* last mbuf of packet mt */ - IGMP_PRINTF(("%s: merging %p with ifscq tail %p)\n", - __func__, m0, mt)); + IGMP_PRINTF(("%s: merging 0x%llx with ifscq tail " + "0x%llx)\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(m0), + (uint64_t)VM_KERNEL_ADDRPERM(mt))); mtl = m_last(mt); m0->m_flags &= ~M_PKTHDR; @@ -3553,7 +3707,7 @@ igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq) /* * Respond to a pending IGMPv3 General Query. */ -static void +static uint32_t igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi) { struct ifnet *ifp; @@ -3606,16 +3760,17 @@ next: IGI_LOCK(igi); loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0; igmp_dispatch_queue(igi, &igi->igi_gq, IGMP_MAX_RESPONSE_BURST, - loop, ifp); + loop); IGI_LOCK_ASSERT_HELD(igi); /* - * Slew transmission of bursts over 500ms intervals. + * Slew transmission of bursts over 1 second intervals. */ if (igi->igi_gq.ifq_head != NULL) { igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY( IGMP_RESPONSE_BURST_INTERVAL); - interface_timers_running = 1; } + + return (igi->igi_v3_timer); } /* @@ -3623,22 +3778,25 @@ next: * * Must not be called with inm_lock or igi_lock held. */ -void -igmp_sendpkt(struct mbuf *m, struct ifnet *ifp) +static void +igmp_sendpkt(struct mbuf *m) { struct ip_moptions *imo; struct mbuf *ipopts, *m0; - int error; + int error; struct route ro; + struct ifnet *ifp; - IGMP_PRINTF(("%s: transmit %p\n", __func__, m)); + IGMP_PRINTF(("%s: transmit 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(m))); + ifp = igmp_restore_context(m); /* * Check if the ifnet is still attached. */ if (ifp == NULL || !ifnet_is_attached(ifp, 0)) { - IGMP_PRINTF(("%s: dropped %p as ifp u went away.\n", - __func__, m)); + IGMP_PRINTF(("%s: dropped 0x%llx as ifp went away.\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(m))); m_freem(m); OSAddAtomic(1, &ipstat.ips_noroute); return; @@ -3681,13 +3839,15 @@ igmp_sendpkt(struct mbuf *m, struct ifnet *ifp) * already freed the original mbuf chain. * This means that we don't have to m_freem(m) here. */ - IGMP_PRINTF(("%s: dropped %p\n", __func__, m)); + IGMP_PRINTF(("%s: dropped 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(m))); IMO_REMREF(imo); atomic_add_32(&ipstat.ips_odropped, 1); return; } } + igmp_scrub_context(m0); m->m_flags &= ~(M_PROTOFLAGS | M_IGMP_LOOP); m0->m_pkthdr.rcvif = lo_ifp; #ifdef MAC @@ -3695,22 +3855,21 @@ igmp_sendpkt(struct mbuf *m, struct ifnet *ifp) #endif if (ifp->if_eflags & IFEF_TXSTART) { - /* Use control service class if the interface supports + /* + * Use control service class if the interface supports * transmit-start model. */ (void) m_set_service_class(m0, MBUF_SC_CTL); } bzero(&ro, sizeof (ro)); error = ip_output(m0, ipopts, &ro, 0, imo, NULL); - if (ro.ro_rt != NULL) { - rtfree(ro.ro_rt); - ro.ro_rt = NULL; - } + ROUTE_RELEASE(&ro); IMO_REMREF(imo); if (error) { - IGMP_PRINTF(("%s: ip_output(%p) = %d\n", __func__, m0, error)); + IGMP_PRINTF(("%s: ip_output(0x%llx) = %d\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(m0), error)); return; } @@ -3823,8 +3982,16 @@ igmp_rec_type_to_str(const int type) #endif void -igmp_init(void) +igmp_init(struct protosw *pp, struct domain *dp) { +#pragma unused(dp) + static int igmp_initialized = 0; + + VERIFY((pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED); + + if (igmp_initialized) + return; + igmp_initialized = 1; IGMP_PRINTF(("%s: initializing\n", __func__)); diff --git a/bsd/netinet/igmp_var.h b/bsd/netinet/igmp_var.h index 30a0cacb3..0d299ef24 100644 --- a/bsd/netinet/igmp_var.h +++ b/bsd/netinet/igmp_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -167,14 +167,21 @@ struct igmp_ifinfo_u { #define IGMP_VERSION_3 3 /* Default */ #endif /* PRIVATE */ -#ifdef XNU_KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #include #define IGMP_DEBUG 1 #ifdef IGMP_DEBUG -extern char * inet_ntoa(struct in_addr); extern int igmp_debug; #define IGMP_PRINTF(x) do { if (igmp_debug) printf x; } while (0) +#define IGMP_INET_PRINTF(addr, x) do { \ + if (igmp_debug) { \ + char _igmp_inet_buf[MAX_IPv4_STR_LEN]; \ + inet_ntop(AF_INET, &(addr), _igmp_inet_buf, \ + sizeof(_igmp_inet_buf)); \ + printf x; \ + } \ +} while (0) #else #define IGMP_PRINTF(x) #endif @@ -226,7 +233,7 @@ extern int igmp_debug; #define IGMP_MAX_STATE_CHANGE_PACKETS 8 /* # of packets per state change */ #define IGMP_MAX_RESPONSE_PACKETS 16 /* # of packets for general query */ #define IGMP_MAX_RESPONSE_BURST 4 /* # of responses to send at once */ -#define IGMP_RESPONSE_BURST_INTERVAL (PR_SLOWHZ) /* 500ms */ +#define IGMP_RESPONSE_BURST_INTERVAL 1 /* 1 second */ /* * IGMP-specific mbuf flags. @@ -299,22 +306,32 @@ struct igmp_ifinfo { */ #define IGMP_IFINFO(ifp) ((ifp)->if_igi) -extern void igmp_init(void) __attribute__((section("__TEXT, initcode"))); -extern int igmp_change_state(struct in_multi *); +/* + * IGMP timer schedule parameters + */ +struct igmp_tparams { + int qpt; /* querier_present_timers_running */ + int it; /* interface_timers_running */ + int cst; /* current_state_timers_running */ + int sct; /* state_change_timers_running */ +}; + +extern void igmp_init(struct protosw *, struct domain *); +extern int igmp_change_state(struct in_multi *, struct igmp_tparams *); extern struct igmp_ifinfo *igmp_domifattach(struct ifnet *, int); extern void igmp_domifreattach(struct igmp_ifinfo *); extern void igmp_domifdetach(struct ifnet *); extern void igmp_input(struct mbuf *, int); extern int igmp_joingroup(struct in_multi *); extern void igmp_leavegroup(struct in_multi *); -extern void igmp_slowtimo(void); +extern void igmp_set_timeout(struct igmp_tparams *); extern void igi_addref(struct igmp_ifinfo *, int); extern void igi_remref(struct igmp_ifinfo *); __private_extern__ void igmp_initsilent(struct ifnet *, struct igmp_ifinfo *); SYSCTL_DECL(_net_inet_igmp); -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ /* * Names for IGMP sysctl objects @@ -322,11 +339,11 @@ SYSCTL_DECL(_net_inet_igmp); #define IGMPCTL_STATS 1 /* statistics (read-only) */ #define IGMPCTL_MAXID 2 -#ifdef XNU_KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #define IGMPCTL_NAMES { \ { 0, 0 }, \ { "stats", CTLTYPE_STRUCT }, \ } -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif diff --git a/bsd/netinet/in.c b/bsd/netinet/in.c index 1df980df6..6229060c5 100644 --- a/bsd/netinet/in.c +++ b/bsd/netinet/in.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -58,7 +58,6 @@ * SUCH DAMAGE. * * @(#)in.c 8.4 (Berkeley) 1/9/95 - * $FreeBSD: src/sys/netinet/in.c,v 1.44.2.5 2001/08/13 16:26:17 ume Exp $ */ #include @@ -73,43 +72,51 @@ #include #include #include -#include +#include +#include +#include #include #include #include #include #include +#include +#if PF +#include +#endif /* PF */ #include #include #include - #include -#include - #include - #include #include #include -#include - -#if PF -#include -#endif /* PF */ +static int inctl_associd(struct socket *, u_long, caddr_t); +static int inctl_connid(struct socket *, u_long, caddr_t); +static int inctl_conninfo(struct socket *, u_long, caddr_t); +static int inctl_autoaddr(struct ifnet *, struct ifreq *); +static int inctl_arpipll(struct ifnet *, struct ifreq *); +static int inctl_setrouter(struct ifnet *, struct ifreq *); +static int inctl_ifaddr(struct ifnet *, struct in_ifaddr *, u_long, + struct ifreq *); +static int inctl_lifaddr(struct ifnet *, u_long, struct if_laddrreq *); +static int inctl_ifdstaddr(struct ifnet *, struct in_ifaddr *, u_long, + struct ifreq *); +static int inctl_ifbrdaddr(struct ifnet *, struct in_ifaddr *, u_long, + struct ifreq *); +static int inctl_ifnetmask(struct ifnet *, struct in_ifaddr *, u_long, + struct ifreq *); static int in_mask2len(struct in_addr *); static void in_len2mask(struct in_addr *, int); -static int in_lifaddr_ioctl(struct socket *, u_long, struct if_laddrreq *, - struct ifnet *, struct proc *); -static int in_setrouter(struct ifnet *, int); - -static void in_socktrim(struct sockaddr_in *); -static int in_ifinit(struct ifnet *, - struct in_ifaddr *, struct sockaddr_in *, int); +static void in_socktrim(struct sockaddr_in *); +static int in_ifinit(struct ifnet *, struct in_ifaddr *, + struct sockaddr_in *, int); #define IA_HASH_INIT(ia) { \ (ia)->ia_hash.tqe_next = (void *)(uintptr_t)-1; \ @@ -129,12 +136,18 @@ static void in_ifaddr_detached(struct ifaddr *); static void in_ifaddr_free(struct ifaddr *); static void in_ifaddr_trace(struct ifaddr *, int); +static int in_getassocids(struct socket *, uint32_t *, user_addr_t); +static int in_getconnids(struct socket *, associd_t, uint32_t *, user_addr_t); +static int in_getconninfo(struct socket *, connid_t, uint32_t *, + uint32_t *, int32_t *, user_addr_t, socklen_t *, user_addr_t, socklen_t *, + uint32_t *, user_addr_t, uint32_t *); + static int subnetsarelocal = 0; -SYSCTL_INT(_net_inet_ip, OID_AUTO, subnets_are_local, CTLFLAG_RW | CTLFLAG_LOCKED, - &subnetsarelocal, 0, ""); +SYSCTL_INT(_net_inet_ip, OID_AUTO, subnets_are_local, + CTLFLAG_RW | CTLFLAG_LOCKED, &subnetsarelocal, 0, ""); /* Track whether or not the SIOCARPIPLL ioctl has been called */ -__private_extern__ u_int32_t ipv4_ll_arp_aware = 0; +u_int32_t ipv4_ll_arp_aware = 0; #define INIFA_TRACE_HIST_SIZE 32 /* size of trace history */ @@ -191,10 +204,11 @@ inaddr_local(struct in_addr in) struct sockaddr_in sin; int local = 0; - if (ntohl(in.s_addr) == INADDR_LOOPBACK || IN_LINKLOCAL(ntohl(in.s_addr))) { + if (ntohl(in.s_addr) == INADDR_LOOPBACK || + IN_LINKLOCAL(ntohl(in.s_addr))) { local = 1; } else if (ntohl(in.s_addr) >= INADDR_UNSPEC_GROUP && - ntohl(in.s_addr) <= INADDR_MAX_LOCAL_GROUP) { + ntohl(in.s_addr) <= INADDR_MAX_LOCAL_GROUP) { local = 1; } else { sin.sin_family = AF_INET; @@ -230,8 +244,8 @@ in_localaddr(struct in_addr in) if (subnetsarelocal) { lck_rw_lock_shared(in_ifaddr_rwlock); - for (ia = in_ifaddrhead.tqh_first; ia; - ia = ia->ia_link.tqe_next) { + for (ia = in_ifaddrhead.tqh_first; ia != NULL; + ia = ia->ia_link.tqe_next) { IFA_LOCK(&ia->ia_ifa); if ((i & ia->ia_netmask) == ia->ia_net) { IFA_UNLOCK(&ia->ia_ifa); @@ -243,8 +257,8 @@ in_localaddr(struct in_addr in) lck_rw_done(in_ifaddr_rwlock); } else { lck_rw_lock_shared(in_ifaddr_rwlock); - for (ia = in_ifaddrhead.tqh_first; ia; - ia = ia->ia_link.tqe_next) { + for (ia = in_ifaddrhead.tqh_first; ia != NULL; + ia = ia->ia_link.tqe_next) { IFA_LOCK(&ia->ia_ifa); if ((i & ia->ia_subnetmask) == ia->ia_subnet) { IFA_UNLOCK(&ia->ia_ifa); @@ -263,20 +277,20 @@ in_localaddr(struct in_addr in) * that may not be forwarded, or whether datagrams to that destination * may be forwarded. */ -int +boolean_t in_canforward(struct in_addr in) { u_int32_t i = ntohl(in.s_addr); u_int32_t net; if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i)) - return (0); + return (FALSE); if (IN_CLASSA(i)) { net = i & IN_CLASSA_NET; if (net == 0 || net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT)) - return (0); + return (FALSE); } - return (1); + return (TRUE); } /* @@ -285,15 +299,15 @@ in_canforward(struct in_addr in) static void in_socktrim(struct sockaddr_in *ap) { - char *cplim = (char *) &ap->sin_addr; - char *cp = (char *) (&ap->sin_addr + 1); + char *cplim = (char *)&ap->sin_addr; + char *cp = (char *)(&ap->sin_addr + 1); - ap->sin_len = 0; - while (--cp >= cplim) - if (*cp) { - (ap)->sin_len = cp - (char *) (ap) + 1; - break; - } + ap->sin_len = 0; + while (--cp >= cplim) + if (*cp) { + (ap)->sin_len = cp - (char *)(ap) + 1; + break; + } } static int @@ -303,18 +317,18 @@ in_mask2len(struct in_addr *mask) u_char *p; p = (u_char *)mask; - for (x = 0; x < sizeof(*mask); x++) { + for (x = 0; x < sizeof (*mask); x++) { if (p[x] != 0xff) break; } y = 0; - if (x < sizeof(*mask)) { + if (x < sizeof (*mask)) { for (y = 0; y < 8; y++) { if ((p[x] & (0x80 >> y)) == 0) break; } } - return x * 8 + y; + return (x * 8 + y); } static void @@ -338,890 +352,1139 @@ in_domifattach(struct ifnet *ifp) { int error; + VERIFY(ifp != NULL); + if ((error = proto_plumb(PF_INET, ifp)) && error != EEXIST) - log(LOG_ERR, "%s: proto_plumb returned %d if=%s%d\n", - __func__, error, ifp->if_name, ifp->if_unit); + log(LOG_ERR, "%s: proto_plumb returned %d if=%s\n", + __func__, error, if_name(ifp)); return (error); } -/* - * Generic internet control operations (ioctl's). - * Ifp is 0 if not an interface-specific ioctl. - * - * Returns: 0 Success - * EINVAL - * EADDRNOTAVAIL - * EDESTADDRREQ - * EPERM - * ENOBUFS - * EBUSY - * EOPNOTSUPP - * proc_suser:EPERM - * suser:EPERM - * in_lifaddr_ioctl:??? - * dlil_ioctl:??? - * in_ifinit:??? - * dlil_plumb_protocol:??? - * dlil_unplumb_protocol:??? - */ -/* ARGSUSED */ -int -in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, - struct proc *p) +static __attribute__((noinline)) int +inctl_associd(struct socket *so, u_long cmd, caddr_t data) { - struct in_ifaddr *ia = NULL; - struct ifaddr *ifa; - struct sockaddr_in oldaddr; int error = 0; - int hostIsNew, maskIsNew; - struct kev_msg ev_msg; - struct kev_in_data in_event_data; + union { + struct so_aidreq32 a32; + struct so_aidreq64 a64; + } u; - bzero(&in_event_data, sizeof (struct kev_in_data)); - bzero(&ev_msg, sizeof (struct kev_msg)); + VERIFY(so != NULL); switch (cmd) { - case SIOCALIFADDR: /* struct if_laddrreq */ - case SIOCDLIFADDR: /* struct if_laddrreq */ - if ((error = proc_suser(p)) != 0) - return (error); - /* FALLTHRU */ - case SIOCGLIFADDR: { /* struct if_laddrreq */ - struct if_laddrreq iflr; + case SIOCGASSOCIDS32: /* struct so_aidreq32 */ + bcopy(data, &u.a32, sizeof (u.a32)); + error = in_getassocids(so, &u.a32.sar_cnt, u.a32.sar_aidp); + if (error == 0) + bcopy(&u.a32, data, sizeof (u.a32)); + break; - if (ifp == NULL) - return (EINVAL); + case SIOCGASSOCIDS64: /* struct so_aidreq64 */ + bcopy(data, &u.a64, sizeof (u.a64)); + error = in_getassocids(so, &u.a64.sar_cnt, u.a64.sar_aidp); + if (error == 0) + bcopy(&u.a64, data, sizeof (u.a64)); + break; - bcopy(data, &iflr, sizeof (iflr)); - error = in_lifaddr_ioctl(so, cmd, &iflr, ifp, p); - bcopy(&iflr, data, sizeof (iflr)); - return (error); - } + default: + VERIFY(0); + /* NOTREACHED */ } - /* - * Find address for this interface, if it exists. - * - * If an alias address was specified, find that one instead of - * the first one on the interface. - */ - if (ifp != NULL) { - struct in_ifaddr *iap; - struct sockaddr_in sin; + return (error); +} - bcopy(&((struct ifreq *)(void *)data)->ifr_addr, - &sin, sizeof (sin)); +static __attribute__((noinline)) int +inctl_connid(struct socket *so, u_long cmd, caddr_t data) +{ + int error = 0; + union { + struct so_cidreq32 c32; + struct so_cidreq64 c64; + } u; - lck_rw_lock_shared(in_ifaddr_rwlock); - for (iap = in_ifaddrhead.tqh_first; iap != NULL; - iap = iap->ia_link.tqe_next) { - if (iap->ia_ifp != ifp) - continue; + VERIFY(so != NULL); - IFA_LOCK(&iap->ia_ifa); - if (sin.sin_addr.s_addr == - iap->ia_addr.sin_addr.s_addr) { - ia = iap; - IFA_UNLOCK(&iap->ia_ifa); - break; - } else if (ia == NULL) { - ia = iap; - if (sin.sin_family != AF_INET) { - IFA_UNLOCK(&iap->ia_ifa); - break; - } - } - IFA_UNLOCK(&iap->ia_ifa); - } - /* take a reference on ia before releasing lock */ - if (ia != NULL) - IFA_ADDREF(&ia->ia_ifa); - lck_rw_done(in_ifaddr_rwlock); + switch (cmd) { + case SIOCGCONNIDS32: /* struct so_cidreq32 */ + bcopy(data, &u.c32, sizeof (u.c32)); + error = in_getconnids(so, u.c32.scr_aid, &u.c32.scr_cnt, + u.c32.scr_cidp); + if (error == 0) + bcopy(&u.c32, data, sizeof (u.c32)); + break; + + case SIOCGCONNIDS64: /* struct so_cidreq64 */ + bcopy(data, &u.c64, sizeof (u.c64)); + error = in_getconnids(so, u.c64.scr_aid, &u.c64.scr_cnt, + u.c64.scr_cidp); + if (error == 0) + bcopy(&u.c64, data, sizeof (u.c64)); + break; + + default: + VERIFY(0); + /* NOTREACHED */ } + return (error); +} + +static __attribute__((noinline)) int +inctl_conninfo(struct socket *so, u_long cmd, caddr_t data) +{ + int error = 0; + union { + struct so_cinforeq32 ci32; + struct so_cinforeq64 ci64; + } u; + + VERIFY(so != NULL); + switch (cmd) { - case SIOCAUTOADDR: /* struct ifreq */ - case SIOCARPIPLL: /* struct ifreq */ - case SIOCSETROUTERMODE: /* struct ifreq */ - if ((error = proc_suser(p)) != 0) { - goto done; - } - if (ifp == NULL) { - error = EADDRNOTAVAIL; - goto done; - } + case SIOCGCONNINFO32: /* struct so_cinforeq32 */ + bcopy(data, &u.ci32, sizeof (u.ci32)); + error = in_getconninfo(so, u.ci32.scir_cid, &u.ci32.scir_flags, + &u.ci32.scir_ifindex, &u.ci32.scir_error, u.ci32.scir_src, + &u.ci32.scir_src_len, u.ci32.scir_dst, &u.ci32.scir_dst_len, + &u.ci32.scir_aux_type, u.ci32.scir_aux_data, + &u.ci32.scir_aux_len); + if (error == 0) + bcopy(&u.ci32, data, sizeof (u.ci32)); break; - case SIOCAIFADDR: /* struct ifaliasreq */ - case SIOCDIFADDR: { /* struct ifreq */ - struct sockaddr_in addr, dstaddr; + case SIOCGCONNINFO64: /* struct so_cinforeq64 */ + bcopy(data, &u.ci64, sizeof (u.ci64)); + error = in_getconninfo(so, u.ci64.scir_cid, &u.ci64.scir_flags, + &u.ci64.scir_ifindex, &u.ci64.scir_error, u.ci64.scir_src, + &u.ci64.scir_src_len, u.ci64.scir_dst, &u.ci64.scir_dst_len, + &u.ci64.scir_aux_type, u.ci64.scir_aux_data, + &u.ci64.scir_aux_len); + if (error == 0) + bcopy(&u.ci64, data, sizeof (u.ci64)); + break; - if (ifp == NULL) { - error = EADDRNOTAVAIL; - goto done; - } + default: + VERIFY(0); + /* NOTREACHED */ + } - if (cmd == SIOCAIFADDR) { - bcopy(&((struct in_aliasreq *)(void *)data)-> - ifra_addr, &addr, sizeof (addr)); - bcopy(&((struct in_aliasreq *)(void *)data)-> - ifra_dstaddr, &dstaddr, sizeof (dstaddr)); - } else { - VERIFY(cmd == SIOCDIFADDR); - bcopy(&((struct ifreq *)(void *)data)->ifr_addr, - &addr, sizeof (addr)); - bzero(&dstaddr, sizeof (dstaddr)); - } + return (error); +} - if (addr.sin_family == AF_INET) { - struct in_ifaddr *oia; +/* + * Caller passes in the ioctl data pointer directly via "ifr", with the + * expectation that this routine always uses bcopy() or other byte-aligned + * memory accesses. + */ +static __attribute__((noinline)) int +inctl_autoaddr(struct ifnet *ifp, struct ifreq *ifr) +{ + int error = 0, intval; - lck_rw_lock_shared(in_ifaddr_rwlock); - for (oia = ia; ia; ia = ia->ia_link.tqe_next) { - IFA_LOCK(&ia->ia_ifa); - if (ia->ia_ifp == ifp && - ia->ia_addr.sin_addr.s_addr == - addr.sin_addr.s_addr) { - IFA_ADDREF_LOCKED(&ia->ia_ifa); - IFA_UNLOCK(&ia->ia_ifa); - break; - } - IFA_UNLOCK(&ia->ia_ifa); - } - lck_rw_done(in_ifaddr_rwlock); - if (oia != NULL) - IFA_REMREF(&oia->ia_ifa); - if ((ifp->if_flags & IFF_POINTOPOINT) && - (cmd == SIOCAIFADDR) && - (dstaddr.sin_addr.s_addr == INADDR_ANY)) { - error = EDESTADDRREQ; - goto done; - } - } else if (cmd == SIOCAIFADDR) { - error = EINVAL; - goto done; - } - if (cmd == SIOCDIFADDR && ia == NULL) { - error = EADDRNOTAVAIL; - goto done; + VERIFY(ifp != NULL); + + bcopy(&ifr->ifr_intval, &intval, sizeof (intval)); + + ifnet_lock_exclusive(ifp); + if (intval) { + /* + * An interface in IPv4 router mode implies that it + * is configured with a static IP address and should + * not act as a DHCP client; prevent SIOCAUTOADDR from + * being set in that mode. + */ + if (ifp->if_eflags & IFEF_IPV4_ROUTER) { + intval = 0; /* be safe; clear flag if set */ + error = EBUSY; + } else { + ifp->if_eflags |= IFEF_AUTOCONFIGURING; } - /* FALLTHROUGH */ } - case SIOCSIFADDR: /* struct ifreq */ - case SIOCSIFNETMASK: /* struct ifreq */ - case SIOCSIFDSTADDR: { /* struct ifreq */ - struct sockaddr_in addr; + if (!intval) + ifp->if_eflags &= ~IFEF_AUTOCONFIGURING; + ifnet_lock_done(ifp); - if (cmd == SIOCAIFADDR) { - /* fell thru from above; just repeat it */ - bcopy(&((struct in_aliasreq *)(void *)data)-> - ifra_addr, &addr, sizeof (addr)); + return (error); +} + +/* + * Caller passes in the ioctl data pointer directly via "ifr", with the + * expectation that this routine always uses bcopy() or other byte-aligned + * memory accesses. + */ +static __attribute__((noinline)) int +inctl_arpipll(struct ifnet *ifp, struct ifreq *ifr) +{ + int error = 0, intval; + + VERIFY(ifp != NULL); + + bcopy(&ifr->ifr_intval, &intval, sizeof (intval)); + ipv4_ll_arp_aware = 1; + + ifnet_lock_exclusive(ifp); + if (intval) { + /* + * An interface in IPv4 router mode implies that it + * is configured with a static IP address and should + * not have to deal with IPv4 Link-Local Address; + * prevent SIOCARPIPLL from being set in that mode. + */ + if (ifp->if_eflags & IFEF_IPV4_ROUTER) { + intval = 0; /* be safe; clear flag if set */ + error = EBUSY; } else { - VERIFY(cmd == SIOCDIFADDR || cmd == SIOCSIFADDR || - cmd == SIOCSIFNETMASK || cmd == SIOCSIFDSTADDR); - bcopy(&((struct ifreq *)(void *)data)->ifr_addr, - &addr, sizeof (addr)); + ifp->if_eflags |= IFEF_ARPLL; } + } + if (!intval) + ifp->if_eflags &= ~IFEF_ARPLL; + ifnet_lock_done(ifp); - /* socket is NULL if called from in_purgeaddrs() */ - if (so != NULL && (so->so_state & SS_PRIV) == 0) { - error = EPERM; - goto done; - } - /* in case it's NULL, make sure it came from the kernel */ - if (so == NULL && p != kernproc) { - error = EPERM; - goto done; - } - if (ifp == NULL) { + return (error); +} + +/* + * Handle SIOCSETROUTERMODE to set or clear the IPv4 router mode flag on + * the interface. When in this mode, IPv4 Link-Local Address support is + * disabled in ARP, and DHCP client support is disabled in IP input; turning + * any of them on would cause an error to be returned. Entering or exiting + * this mode will result in the removal of IPv4 addresses currently configured + * on the interface. + * + * Caller passes in the ioctl data pointer directly via "ifr", with the + * expectation that this routine always uses bcopy() or other byte-aligned + * memory accesses. + */ +static __attribute__((noinline)) int +inctl_setrouter(struct ifnet *ifp, struct ifreq *ifr) +{ + int error = 0, intval; + + VERIFY(ifp != NULL); + + /* Router mode isn't valid for loopback */ + if (ifp->if_flags & IFF_LOOPBACK) + return (ENODEV); + + bcopy(&ifr->ifr_intval, &intval, sizeof (intval)); + + ifnet_lock_exclusive(ifp); + if (intval) { + ifp->if_eflags |= IFEF_IPV4_ROUTER; + ifp->if_eflags &= ~(IFEF_ARPLL | IFEF_AUTOCONFIGURING); + } else { + ifp->if_eflags &= ~IFEF_IPV4_ROUTER; + } + ifnet_lock_done(ifp); + + /* purge all IPv4 addresses configured on this interface */ + in_purgeaddrs(ifp); + + return (error); +} + +/* + * Caller passes in the ioctl data pointer directly via "ifr", with the + * expectation that this routine always uses bcopy() or other byte-aligned + * memory accesses. + */ +static __attribute__((noinline)) int +inctl_ifaddr(struct ifnet *ifp, struct in_ifaddr *ia, u_long cmd, + struct ifreq *ifr) +{ + struct kev_in_data in_event_data; + struct kev_msg ev_msg; + struct sockaddr_in addr; + struct ifaddr *ifa; + int error = 0; + + VERIFY(ifp != NULL); + + bzero(&in_event_data, sizeof (struct kev_in_data)); + bzero(&ev_msg, sizeof (struct kev_msg)); + + switch (cmd) { + case SIOCGIFADDR: /* struct ifreq */ + if (ia == NULL) { error = EADDRNOTAVAIL; - goto done; - } - if (addr.sin_family != AF_INET && cmd == SIOCSIFADDR) { - error = EINVAL; - goto done; + break; } - if (ia == NULL) { - ia = in_ifaddr_alloc(M_WAITOK); - if (ia == NULL) { - error = ENOBUFS; - goto done; - } - ifnet_lock_exclusive(ifp); - ifa = &ia->ia_ifa; - IFA_LOCK(ifa); - /* Hold a reference for this routine */ - IFA_ADDREF_LOCKED(ifa); - IA_HASH_INIT(ia); - ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr; - ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr; - ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask; - ia->ia_sockmask.sin_len = 8; - if (ifp->if_flags & IFF_BROADCAST) { - ia->ia_broadaddr.sin_len = sizeof (ia->ia_addr); - ia->ia_broadaddr.sin_family = AF_INET; - } - ia->ia_ifp = ifp; - if (!(ifp->if_flags & IFF_LOOPBACK)) - in_interfaces++; - /* if_attach_ifa() holds a reference for ifa_link */ - if_attach_ifa(ifp, ifa); - /* - * If we have to go through in_ifinit(), make sure - * to avoid installing route(s) based on this address - * via PFC_IFUP event, before the link resolver (ARP) - * initializes it. - */ - if (cmd == SIOCAIFADDR || cmd == SIOCSIFADDR) - ifa->ifa_debug |= IFD_NOTREADY; - IFA_UNLOCK(ifa); - ifnet_lock_done(ifp); - lck_rw_lock_exclusive(in_ifaddr_rwlock); - /* Hold a reference for ia_link */ - IFA_ADDREF(ifa); - TAILQ_INSERT_TAIL(&in_ifaddrhead, ia, ia_link); - lck_rw_done(in_ifaddr_rwlock); - /* discard error */ - (void) in_domifattach(ifp); - error = 0; - } - break; - } - - case SIOCPROTOATTACH: /* struct ifreq */ - case SIOCPROTODETACH: /* struct ifreq */ - if ((error = proc_suser(p)) != 0) { - goto done; - } - if (ifp == NULL) { - error = EADDRNOTAVAIL; - goto done; - } - break; - - case SIOCSIFBRDADDR: /* struct ifreq */ - if ((so->so_state & SS_PRIV) == 0) { - error = EPERM; - goto done; - } - /* FALLTHROUGH */ - case SIOCGIFADDR: /* struct ifreq */ - case SIOCGIFNETMASK: /* struct ifreq */ - case SIOCGIFDSTADDR: /* struct ifreq */ - case SIOCGIFBRDADDR: /* struct ifreq */ - if (ia == NULL) { - error = EADDRNOTAVAIL; - goto done; - } - break; - } - - switch (cmd) { - case SIOCAUTOADDR: { /* struct ifreq */ - int intval; - - VERIFY(ifp != NULL); - bcopy(&((struct ifreq *)(void *)data)->ifr_intval, - &intval, sizeof (intval)); - - ifnet_lock_exclusive(ifp); - if (intval) { - /* - * An interface in IPv4 router mode implies that it - * is configured with a static IP address and should - * not act as a DHCP client; prevent SIOCAUTOADDR from - * being set in that mode. - */ - if (ifp->if_eflags & IFEF_IPV4_ROUTER) { - intval = 0; /* be safe; clear flag if set */ - error = EBUSY; - } else { - ifp->if_eflags |= IFEF_AUTOCONFIGURING; - } - } - if (!intval) - ifp->if_eflags &= ~IFEF_AUTOCONFIGURING; - ifnet_lock_done(ifp); - break; - } - - case SIOCARPIPLL: { /* struct ifreq */ - int intval; - - VERIFY(ifp != NULL); - bcopy(&((struct ifreq *)(void *)data)->ifr_intval, - &intval, sizeof (intval)); - ipv4_ll_arp_aware = 1; - - ifnet_lock_exclusive(ifp); - if (intval) { - /* - * An interface in IPv4 router mode implies that it - * is configured with a static IP address and should - * not have to deal with IPv4 Link-Local Address; - * prevent SIOCARPIPLL from being set in that mode. - */ - if (ifp->if_eflags & IFEF_IPV4_ROUTER) { - intval = 0; /* be safe; clear flag if set */ - error = EBUSY; - } else { - ifp->if_eflags |= IFEF_ARPLL; - } - } - if (!intval) - ifp->if_eflags &= ~IFEF_ARPLL; - ifnet_lock_done(ifp); - break; - } - - case SIOCGIFADDR: /* struct ifreq */ - VERIFY(ia != NULL); IFA_LOCK(&ia->ia_ifa); - bcopy(&ia->ia_addr, &((struct ifreq *)(void *)data)->ifr_addr, - sizeof (struct sockaddr_in)); + bcopy(&ia->ia_addr, &ifr->ifr_addr, sizeof (addr)); IFA_UNLOCK(&ia->ia_ifa); break; - case SIOCGIFBRDADDR: /* struct ifreq */ + case SIOCSIFADDR: /* struct ifreq */ VERIFY(ia != NULL); - if ((ifp->if_flags & IFF_BROADCAST) == 0) { - error = EINVAL; - break; + bcopy(&ifr->ifr_addr, &addr, sizeof (addr)); + /* + * If this is a new address, the reference count for the + * hash table has been taken at creation time above. + */ + error = in_ifinit(ifp, ia, &addr, 1); + if (error == 0) { + (void) ifnet_notify_address(ifp, AF_INET); } - IFA_LOCK(&ia->ia_ifa); - bcopy(&ia->ia_broadaddr, - &((struct ifreq *)(void *)data)->ifr_broadaddr, - sizeof (struct sockaddr_in)); - IFA_UNLOCK(&ia->ia_ifa); break; - case SIOCGIFDSTADDR: /* struct ifreq */ - VERIFY(ia != NULL); - if ((ifp->if_flags & IFF_POINTOPOINT) == 0) { - error = EINVAL; - break; - } - IFA_LOCK(&ia->ia_ifa); - bcopy(&ia->ia_dstaddr, - &((struct ifreq *)(void *)data)->ifr_dstaddr, - sizeof (struct sockaddr_in)); - IFA_UNLOCK(&ia->ia_ifa); - break; + case SIOCAIFADDR: { /* struct {if,in_}aliasreq */ + struct in_aliasreq *ifra = (struct in_aliasreq *)ifr; + struct sockaddr_in broadaddr, mask; + int hostIsNew, maskIsNew; - case SIOCGIFNETMASK: /* struct ifreq */ VERIFY(ia != NULL); - IFA_LOCK(&ia->ia_ifa); - bcopy(&ia->ia_sockmask, - &((struct ifreq *)(void *)data)->ifr_addr, - sizeof (struct sockaddr_in)); - IFA_UNLOCK(&ia->ia_ifa); - break; + bcopy(&ifra->ifra_addr, &addr, sizeof (addr)); + bcopy(&ifra->ifra_broadaddr, &broadaddr, sizeof (broadaddr)); + bcopy(&ifra->ifra_mask, &mask, sizeof (mask)); + + maskIsNew = 0; + hostIsNew = 1; + error = 0; - case SIOCSIFDSTADDR: /* struct ifreq */ - VERIFY(ifp != NULL && ia != NULL); - if ((ifp->if_flags & IFF_POINTOPOINT) == 0) { - error = EINVAL; - break; - } - IFA_LOCK(&ia->ia_ifa); - oldaddr = ia->ia_dstaddr; - bcopy(&((struct ifreq *)(void *)data)->ifr_dstaddr, - &ia->ia_dstaddr, sizeof (struct sockaddr_in)); - if (ia->ia_dstaddr.sin_family == AF_INET) - ia->ia_dstaddr.sin_len = sizeof (struct sockaddr_in); - IFA_UNLOCK(&ia->ia_ifa); - /* - * NOTE: SIOCSIFDSTADDR is defined with struct ifreq - * as parameter, but here we are sending it down - * to the interface with a pointer to struct ifaddr, - * for legacy reasons. - */ - error = ifnet_ioctl(ifp, PF_INET, SIOCSIFDSTADDR, ia); IFA_LOCK(&ia->ia_ifa); - if (error == EOPNOTSUPP) { - error = 0; + if (ia->ia_addr.sin_family == AF_INET) { + if (addr.sin_len == 0) { + addr = ia->ia_addr; + hostIsNew = 0; + } else if (addr.sin_addr.s_addr == + ia->ia_addr.sin_addr.s_addr) { + hostIsNew = 0; + } } - if (error) { - ia->ia_dstaddr = oldaddr; + if (mask.sin_len) { IFA_UNLOCK(&ia->ia_ifa); - break; + in_ifscrub(ifp, ia, 0); + IFA_LOCK(&ia->ia_ifa); + ia->ia_sockmask = mask; + ia->ia_subnetmask = + ntohl(ia->ia_sockmask.sin_addr.s_addr); + maskIsNew = 1; } - IFA_LOCK_ASSERT_HELD(&ia->ia_ifa); - - ev_msg.vendor_code = KEV_VENDOR_APPLE; - ev_msg.kev_class = KEV_NETWORK_CLASS; - ev_msg.kev_subclass = KEV_INET_SUBCLASS; - - ev_msg.event_code = KEV_INET_SIFDSTADDR; - - if (ia->ia_ifa.ifa_dstaddr) { - in_event_data.ia_dstaddr = ((struct sockaddr_in *) - (void *)ia->ia_ifa.ifa_dstaddr)->sin_addr; + if ((ifp->if_flags & IFF_POINTOPOINT) && + (broadaddr.sin_family == AF_INET)) { + IFA_UNLOCK(&ia->ia_ifa); + in_ifscrub(ifp, ia, 0); + IFA_LOCK(&ia->ia_ifa); + ia->ia_dstaddr = broadaddr; + ia->ia_dstaddr.sin_len = sizeof (struct sockaddr_in); + maskIsNew = 1; /* We lie; but the effect's the same */ + } + if (addr.sin_family == AF_INET && (hostIsNew || maskIsNew)) { + IFA_UNLOCK(&ia->ia_ifa); + error = in_ifinit(ifp, ia, &addr, 0); } else { - in_event_data.ia_dstaddr.s_addr = INADDR_ANY; + IFA_UNLOCK(&ia->ia_ifa); } + if (error == 0) { + (void) ifnet_notify_address(ifp, AF_INET); + } + IFA_LOCK(&ia->ia_ifa); + if ((ifp->if_flags & IFF_BROADCAST) && + (broadaddr.sin_family == AF_INET)) + ia->ia_broadaddr = broadaddr; - in_event_data.ia_addr = ia->ia_addr.sin_addr; - in_event_data.ia_net = ia->ia_net; - in_event_data.ia_netmask = ia->ia_netmask; - in_event_data.ia_subnet = ia->ia_subnet; - in_event_data.ia_subnetmask = ia->ia_subnetmask; - in_event_data.ia_netbroadcast = ia->ia_netbroadcast; - IFA_UNLOCK(&ia->ia_ifa); - (void) strncpy(&in_event_data.link_data.if_name[0], - ifp->if_name, IFNAMSIZ); - in_event_data.link_data.if_family = ifp->if_family; - in_event_data.link_data.if_unit = (u_int32_t) ifp->if_unit; - - ev_msg.dv[0].data_ptr = &in_event_data; - ev_msg.dv[0].data_length = sizeof (struct kev_in_data); - ev_msg.dv[1].data_length = 0; + /* + * Report event. + */ + if ((error == 0) || (error == EEXIST)) { + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_INET_SUBCLASS; - kev_post_msg(&ev_msg); + if (hostIsNew) + ev_msg.event_code = KEV_INET_NEW_ADDR; + else + ev_msg.event_code = KEV_INET_CHANGED_ADDR; - lck_mtx_lock(rnh_lock); - IFA_LOCK(&ia->ia_ifa); - if (ia->ia_flags & IFA_ROUTE) { - ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&oldaddr; - IFA_UNLOCK(&ia->ia_ifa); - rtinit_locked(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST); - IFA_LOCK(&ia->ia_ifa); - ia->ia_ifa.ifa_dstaddr = - (struct sockaddr *)&ia->ia_dstaddr; + if (ia->ia_ifa.ifa_dstaddr) { + in_event_data.ia_dstaddr = + ((struct sockaddr_in *)(void *)ia-> + ia_ifa.ifa_dstaddr)->sin_addr; + } else { + in_event_data.ia_dstaddr.s_addr = INADDR_ANY; + } + in_event_data.ia_addr = ia->ia_addr.sin_addr; + in_event_data.ia_net = ia->ia_net; + in_event_data.ia_netmask = ia->ia_netmask; + in_event_data.ia_subnet = ia->ia_subnet; + in_event_data.ia_subnetmask = ia->ia_subnetmask; + in_event_data.ia_netbroadcast = ia->ia_netbroadcast; IFA_UNLOCK(&ia->ia_ifa); - rtinit_locked(&(ia->ia_ifa), (int)RTM_ADD, - RTF_HOST|RTF_UP); + (void) strncpy(&in_event_data.link_data.if_name[0], + ifp->if_name, IFNAMSIZ); + in_event_data.link_data.if_family = ifp->if_family; + in_event_data.link_data.if_unit = ifp->if_unit; + + ev_msg.dv[0].data_ptr = &in_event_data; + ev_msg.dv[0].data_length = sizeof (struct kev_in_data); + ev_msg.dv[1].data_length = 0; + + kev_post_msg(&ev_msg); } else { IFA_UNLOCK(&ia->ia_ifa); } - lck_mtx_unlock(rnh_lock); break; + } - case SIOCSIFBRDADDR: /* struct ifreq */ + case SIOCDIFADDR: /* struct ifreq */ VERIFY(ia != NULL); - if ((ifp->if_flags & IFF_BROADCAST) == 0) { - error = EINVAL; + error = ifnet_ioctl(ifp, PF_INET, SIOCDIFADDR, ia); + if (error == EOPNOTSUPP) + error = 0; + if (error != 0) break; - } - IFA_LOCK(&ia->ia_ifa); - bcopy(&((struct ifreq *)(void *)data)->ifr_broadaddr, - &ia->ia_broadaddr, sizeof (struct sockaddr_in)); - ev_msg.vendor_code = KEV_VENDOR_APPLE; - ev_msg.kev_class = KEV_NETWORK_CLASS; - ev_msg.kev_subclass = KEV_INET_SUBCLASS; + /* Fill out the kernel event information */ + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_INET_SUBCLASS; - ev_msg.event_code = KEV_INET_SIFBRDADDR; + ev_msg.event_code = KEV_INET_ADDR_DELETED; + IFA_LOCK(&ia->ia_ifa); if (ia->ia_ifa.ifa_dstaddr) { in_event_data.ia_dstaddr = ((struct sockaddr_in *) (void *)ia->ia_ifa.ifa_dstaddr)->sin_addr; } else { in_event_data.ia_dstaddr.s_addr = INADDR_ANY; } - in_event_data.ia_addr = ia->ia_addr.sin_addr; - in_event_data.ia_net = ia->ia_net; - in_event_data.ia_netmask = ia->ia_netmask; - in_event_data.ia_subnet = ia->ia_subnet; - in_event_data.ia_subnetmask = ia->ia_subnetmask; - in_event_data.ia_netbroadcast = ia->ia_netbroadcast; + in_event_data.ia_addr = ia->ia_addr.sin_addr; + in_event_data.ia_net = ia->ia_net; + in_event_data.ia_netmask = ia->ia_netmask; + in_event_data.ia_subnet = ia->ia_subnet; + in_event_data.ia_subnetmask = ia->ia_subnetmask; + in_event_data.ia_netbroadcast = ia->ia_netbroadcast; IFA_UNLOCK(&ia->ia_ifa); (void) strncpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ); in_event_data.link_data.if_family = ifp->if_family; - in_event_data.link_data.if_unit = (u_int32_t) ifp->if_unit; + in_event_data.link_data.if_unit = (u_int32_t)ifp->if_unit; ev_msg.dv[0].data_ptr = &in_event_data; - ev_msg.dv[0].data_length = sizeof (struct kev_in_data); + ev_msg.dv[0].data_length = sizeof(struct kev_in_data); ev_msg.dv[1].data_length = 0; - kev_post_msg(&ev_msg); - break; - - case SIOCSIFADDR: { /* struct ifreq */ - struct sockaddr_in addr; + ifa = &ia->ia_ifa; + lck_rw_lock_exclusive(in_ifaddr_rwlock); + /* Release ia_link reference */ + IFA_REMREF(ifa); + TAILQ_REMOVE(&in_ifaddrhead, ia, ia_link); + IFA_LOCK(ifa); + if (IA_IS_HASHED(ia)) + in_iahash_remove(ia); + IFA_UNLOCK(ifa); + lck_rw_done(in_ifaddr_rwlock); - VERIFY(ifp != NULL && ia != NULL); - bcopy(&((struct ifreq *)(void *)data)->ifr_addr, - &addr, sizeof (addr)); /* - * If this is a new address, the reference count for the - * hash table has been taken at creation time above. + * in_ifscrub kills the interface route. */ - error = in_ifinit(ifp, ia, &addr, 1); -#if PF - if (!error) - (void) pf_ifaddr_hook(ifp, cmd); -#endif /* PF */ - break; - } + in_ifscrub(ifp, ia, 0); + ifnet_lock_exclusive(ifp); + IFA_LOCK(ifa); + /* if_detach_ifa() releases ifa_link reference */ + if_detach_ifa(ifp, ifa); + /* Our reference to this address is dropped at the bottom */ + IFA_UNLOCK(ifa); - case SIOCPROTOATTACH: /* struct ifreq */ - VERIFY(ifp != NULL); - error = in_domifattach(ifp); - break; + /* invalidate route caches */ + routegenid_inet_update(); - case SIOCPROTODETACH: /* struct ifreq */ - VERIFY(ifp != NULL); - /* - * If an IPv4 address is still present, refuse to detach. + /* + * If the interface supports multicast, and no address is left, + * remove the "all hosts" multicast group from that interface. */ - ifnet_lock_shared(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - IFA_LOCK(ifa); - if (ifa->ifa_addr->sa_family == AF_INET) { + if ((ifp->if_flags & IFF_MULTICAST) || + ifp->if_allhostsinm != NULL) { + + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + IFA_LOCK(ifa); + if (ifa->ifa_addr->sa_family == AF_INET) { + IFA_UNLOCK(ifa); + break; + } IFA_UNLOCK(ifa); - break; } - IFA_UNLOCK(ifa); + ifnet_lock_done(ifp); + + lck_mtx_lock(&ifp->if_addrconfig_lock); + if (ifa == NULL && ifp->if_allhostsinm != NULL) { + struct in_multi *inm = ifp->if_allhostsinm; + ifp->if_allhostsinm = NULL; + + in_delmulti(inm); + /* release the reference for allhostsinm */ + INM_REMREF(inm); + } + lck_mtx_unlock(&ifp->if_addrconfig_lock); + } else { + ifnet_lock_done(ifp); } - ifnet_lock_done(ifp); + + /* Post the kernel event */ + kev_post_msg(&ev_msg); + + /* + * See if there is any IPV4 address left and if so, + * reconfigure KDP to use current primary address. + */ + ifa = ifa_ifpgetprimary(ifp, AF_INET); if (ifa != NULL) { - error = EBUSY; - break; - } + /* + * NOTE: SIOCSIFADDR is defined with struct ifreq + * as parameter, but here we are sending it down + * to the interface with a pointer to struct ifaddr, + * for legacy reasons. + */ + error = ifnet_ioctl(ifp, PF_INET, SIOCSIFADDR, ifa); + if (error == EOPNOTSUPP) + error = 0; - error = proto_unplumb(PF_INET, ifp); + /* Release reference from ifa_ifpgetprimary() */ + IFA_REMREF(ifa); + } + (void) ifnet_notify_address(ifp, AF_INET); break; - case SIOCSETROUTERMODE: { /* struct ifreq */ - int intval; + default: + VERIFY(0); + /* NOTREACHED */ + } - VERIFY(ifp != NULL); - bcopy(&((struct ifreq *)(void *)data)->ifr_intval, - &intval, sizeof (intval)); + return (error); +} - error = in_setrouter(ifp, intval); - break; - } +/* + * Caller passes in the ioctl data pointer directly via "ifr", with the + * expectation that this routine always uses bcopy() or other byte-aligned + * memory accesses. + */ +static __attribute__((noinline)) int +inctl_ifdstaddr(struct ifnet *ifp, struct in_ifaddr *ia, u_long cmd, + struct ifreq *ifr) +{ + struct kev_in_data in_event_data; + struct kev_msg ev_msg; + struct sockaddr_in dstaddr; + int error = 0; - case SIOCSIFNETMASK: { /* struct ifreq */ - struct sockaddr_in addr; - in_addr_t i; + VERIFY(ifp != NULL); + + if (!(ifp->if_flags & IFF_POINTOPOINT)) + return (EINVAL); - VERIFY(ifp != NULL && ia != NULL); - bcopy(&((struct ifreq *)(void *)data)->ifr_addr, - &addr, sizeof (addr)); - i = addr.sin_addr.s_addr; + bzero(&in_event_data, sizeof (struct kev_in_data)); + bzero(&ev_msg, sizeof (struct kev_msg)); + + switch (cmd) { + case SIOCGIFDSTADDR: /* struct ifreq */ + if (ia == NULL) { + error = EADDRNOTAVAIL; + break; + } + IFA_LOCK(&ia->ia_ifa); + bcopy(&ia->ia_dstaddr, &ifr->ifr_dstaddr, sizeof (dstaddr)); + IFA_UNLOCK(&ia->ia_ifa); + break; + case SIOCSIFDSTADDR: /* struct ifreq */ + VERIFY(ia != NULL); IFA_LOCK(&ia->ia_ifa); - ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr = i); - ev_msg.vendor_code = KEV_VENDOR_APPLE; - ev_msg.kev_class = KEV_NETWORK_CLASS; - ev_msg.kev_subclass = KEV_INET_SUBCLASS; + dstaddr = ia->ia_dstaddr; + bcopy(&ifr->ifr_dstaddr, &ia->ia_dstaddr, sizeof (dstaddr)); + if (ia->ia_dstaddr.sin_family == AF_INET) + ia->ia_dstaddr.sin_len = sizeof (struct sockaddr_in); + IFA_UNLOCK(&ia->ia_ifa); + /* + * NOTE: SIOCSIFDSTADDR is defined with struct ifreq + * as parameter, but here we are sending it down + * to the interface with a pointer to struct ifaddr, + * for legacy reasons. + */ + error = ifnet_ioctl(ifp, PF_INET, SIOCSIFDSTADDR, ia); + IFA_LOCK(&ia->ia_ifa); + if (error == EOPNOTSUPP) + error = 0; + if (error != 0) { + ia->ia_dstaddr = dstaddr; + IFA_UNLOCK(&ia->ia_ifa); + break; + } + IFA_LOCK_ASSERT_HELD(&ia->ia_ifa); - ev_msg.event_code = KEV_INET_SIFNETMASK; + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_INET_SUBCLASS; + + ev_msg.event_code = KEV_INET_SIFDSTADDR; if (ia->ia_ifa.ifa_dstaddr) { - in_event_data.ia_dstaddr = ((struct sockaddr_in *) - (void *)ia->ia_ifa.ifa_dstaddr)->sin_addr; + in_event_data.ia_dstaddr = ((struct sockaddr_in *) + (void *)ia->ia_ifa.ifa_dstaddr)->sin_addr; } else { in_event_data.ia_dstaddr.s_addr = INADDR_ANY; } - in_event_data.ia_addr = ia->ia_addr.sin_addr; - in_event_data.ia_net = ia->ia_net; - in_event_data.ia_netmask = ia->ia_netmask; - in_event_data.ia_subnet = ia->ia_subnet; - in_event_data.ia_subnetmask = ia->ia_subnetmask; - in_event_data.ia_netbroadcast = ia->ia_netbroadcast; + + in_event_data.ia_addr = ia->ia_addr.sin_addr; + in_event_data.ia_net = ia->ia_net; + in_event_data.ia_netmask = ia->ia_netmask; + in_event_data.ia_subnet = ia->ia_subnet; + in_event_data.ia_subnetmask = ia->ia_subnetmask; + in_event_data.ia_netbroadcast = ia->ia_netbroadcast; IFA_UNLOCK(&ia->ia_ifa); (void) strncpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ); in_event_data.link_data.if_family = ifp->if_family; - in_event_data.link_data.if_unit = (u_int32_t) ifp->if_unit; + in_event_data.link_data.if_unit = (u_int32_t)ifp->if_unit; ev_msg.dv[0].data_ptr = &in_event_data; ev_msg.dv[0].data_length = sizeof (struct kev_in_data); ev_msg.dv[1].data_length = 0; kev_post_msg(&ev_msg); - break; - } - - case SIOCAIFADDR: { /* struct ifaliasreq */ - struct sockaddr_in addr, broadaddr, mask; - - VERIFY(ifp != NULL && ia != NULL); - bcopy(&((struct ifaliasreq *)(void *)data)->ifra_addr, - &addr, sizeof (addr)); - bcopy(&((struct ifaliasreq *)(void *)data)->ifra_broadaddr, - &broadaddr, sizeof (broadaddr)); - bcopy(&((struct ifaliasreq *)(void *)data)->ifra_mask, - &mask, sizeof (mask)); - - maskIsNew = 0; - hostIsNew = 1; - error = 0; + lck_mtx_lock(rnh_lock); IFA_LOCK(&ia->ia_ifa); - if (ia->ia_addr.sin_family == AF_INET) { - if (addr.sin_len == 0) { - addr = ia->ia_addr; - hostIsNew = 0; - } else if (addr.sin_addr.s_addr == - ia->ia_addr.sin_addr.s_addr) { - hostIsNew = 0; - } - } - if (mask.sin_len) { - IFA_UNLOCK(&ia->ia_ifa); - in_ifscrub(ifp, ia, 0); - IFA_LOCK(&ia->ia_ifa); - ia->ia_sockmask = mask; - ia->ia_subnetmask = - ntohl(ia->ia_sockmask.sin_addr.s_addr); - maskIsNew = 1; - } - if ((ifp->if_flags & IFF_POINTOPOINT) && - (broadaddr.sin_family == AF_INET)) { + if (ia->ia_flags & IFA_ROUTE) { + ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&dstaddr; IFA_UNLOCK(&ia->ia_ifa); - in_ifscrub(ifp, ia, 0); + rtinit_locked(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST); IFA_LOCK(&ia->ia_ifa); - ia->ia_dstaddr = broadaddr; - ia->ia_dstaddr.sin_len = sizeof (struct sockaddr_in); - maskIsNew = 1; /* We lie; but the effect's the same */ - } - if (addr.sin_family == AF_INET && (hostIsNew || maskIsNew)) { + ia->ia_ifa.ifa_dstaddr = + (struct sockaddr *)&ia->ia_dstaddr; IFA_UNLOCK(&ia->ia_ifa); - error = in_ifinit(ifp, ia, &addr, 0); + rtinit_locked(&(ia->ia_ifa), (int)RTM_ADD, + RTF_HOST|RTF_UP); } else { IFA_UNLOCK(&ia->ia_ifa); } -#if PF - if (!error) - (void) pf_ifaddr_hook(ifp, cmd); -#endif /* PF */ - IFA_LOCK(&ia->ia_ifa); - if ((ifp->if_flags & IFF_BROADCAST) && - (broadaddr.sin_family == AF_INET)) - ia->ia_broadaddr = broadaddr; + lck_mtx_unlock(rnh_lock); + break; - /* - * Report event. - */ - if ((error == 0) || (error == EEXIST)) { - ev_msg.vendor_code = KEV_VENDOR_APPLE; - ev_msg.kev_class = KEV_NETWORK_CLASS; - ev_msg.kev_subclass = KEV_INET_SUBCLASS; - if (hostIsNew) - ev_msg.event_code = KEV_INET_NEW_ADDR; - else - ev_msg.event_code = KEV_INET_CHANGED_ADDR; - if (ia->ia_ifa.ifa_dstaddr) { - in_event_data.ia_dstaddr = - ((struct sockaddr_in *)(void *)ia-> - ia_ifa.ifa_dstaddr)->sin_addr; - } else { - in_event_data.ia_dstaddr.s_addr = INADDR_ANY; - } - in_event_data.ia_addr = ia->ia_addr.sin_addr; - in_event_data.ia_net = ia->ia_net; - in_event_data.ia_netmask = ia->ia_netmask; - in_event_data.ia_subnet = ia->ia_subnet; - in_event_data.ia_subnetmask = ia->ia_subnetmask; - in_event_data.ia_netbroadcast = ia->ia_netbroadcast; - IFA_UNLOCK(&ia->ia_ifa); - (void) strncpy(&in_event_data.link_data.if_name[0], - ifp->if_name, IFNAMSIZ); - in_event_data.link_data.if_family = ifp->if_family; - in_event_data.link_data.if_unit = ifp->if_unit; + default: + VERIFY(0); + /* NOTREACHED */ + } - ev_msg.dv[0].data_ptr = &in_event_data; - ev_msg.dv[0].data_length = sizeof (struct kev_in_data); - ev_msg.dv[1].data_length = 0; + return (error); +} - kev_post_msg(&ev_msg); +/* + * Caller passes in the ioctl data pointer directly via "ifr", with the + * expectation that this routine always uses bcopy() or other byte-aligned + * memory accesses. + */ +static __attribute__((noinline)) int +inctl_ifbrdaddr(struct ifnet *ifp, struct in_ifaddr *ia, u_long cmd, + struct ifreq *ifr) +{ + struct kev_in_data in_event_data; + struct kev_msg ev_msg; + int error = 0; + + VERIFY(ifp != NULL); + + if (ia == NULL) + return (EADDRNOTAVAIL); + + if (!(ifp->if_flags & IFF_BROADCAST)) + return (EINVAL); + + bzero(&in_event_data, sizeof (struct kev_in_data)); + bzero(&ev_msg, sizeof (struct kev_msg)); + + switch (cmd) { + case SIOCGIFBRDADDR: /* struct ifreq */ + IFA_LOCK(&ia->ia_ifa); + bcopy(&ia->ia_broadaddr, &ifr->ifr_broadaddr, + sizeof (struct sockaddr_in)); + IFA_UNLOCK(&ia->ia_ifa); + break; + + case SIOCSIFBRDADDR: /* struct ifreq */ + IFA_LOCK(&ia->ia_ifa); + bcopy(&ifr->ifr_broadaddr, &ia->ia_broadaddr, + sizeof (struct sockaddr_in)); + + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_INET_SUBCLASS; + + ev_msg.event_code = KEV_INET_SIFBRDADDR; + + if (ia->ia_ifa.ifa_dstaddr) { + in_event_data.ia_dstaddr = ((struct sockaddr_in *) + (void *)ia->ia_ifa.ifa_dstaddr)->sin_addr; } else { - IFA_UNLOCK(&ia->ia_ifa); + in_event_data.ia_dstaddr.s_addr = INADDR_ANY; } + in_event_data.ia_addr = ia->ia_addr.sin_addr; + in_event_data.ia_net = ia->ia_net; + in_event_data.ia_netmask = ia->ia_netmask; + in_event_data.ia_subnet = ia->ia_subnet; + in_event_data.ia_subnetmask = ia->ia_subnetmask; + in_event_data.ia_netbroadcast = ia->ia_netbroadcast; + IFA_UNLOCK(&ia->ia_ifa); + (void) strncpy(&in_event_data.link_data.if_name[0], + ifp->if_name, IFNAMSIZ); + in_event_data.link_data.if_family = ifp->if_family; + in_event_data.link_data.if_unit = (u_int32_t)ifp->if_unit; + + ev_msg.dv[0].data_ptr = &in_event_data; + ev_msg.dv[0].data_length = sizeof (struct kev_in_data); + ev_msg.dv[1].data_length = 0; + + kev_post_msg(&ev_msg); break; + + default: + VERIFY(0); + /* NOTREACHED */ } - case SIOCDIFADDR: /* struct ifreq */ - VERIFY(ifp != NULL && ia != NULL); - error = ifnet_ioctl(ifp, PF_INET, SIOCDIFADDR, ia); - if (error == EOPNOTSUPP) - error = 0; - if (error != 0) { + return (error); +} + +/* + * Caller passes in the ioctl data pointer directly via "ifr", with the + * expectation that this routine always uses bcopy() or other byte-aligned + * memory accesses. + */ +static __attribute__((noinline)) int +inctl_ifnetmask(struct ifnet *ifp, struct in_ifaddr *ia, u_long cmd, + struct ifreq *ifr) +{ + struct kev_in_data in_event_data; + struct kev_msg ev_msg; + struct sockaddr_in mask; + int error = 0; + + VERIFY(ifp != NULL); + + bzero(&in_event_data, sizeof (struct kev_in_data)); + bzero(&ev_msg, sizeof (struct kev_msg)); + + switch (cmd) { + case SIOCGIFNETMASK: /* struct ifreq */ + if (ia == NULL) { + error = EADDRNOTAVAIL; break; } + IFA_LOCK(&ia->ia_ifa); + bcopy(&ia->ia_sockmask, &ifr->ifr_addr, sizeof (mask)); + IFA_UNLOCK(&ia->ia_ifa); + break; - /* Fill out the kernel event information */ - ev_msg.vendor_code = KEV_VENDOR_APPLE; - ev_msg.kev_class = KEV_NETWORK_CLASS; - ev_msg.kev_subclass = KEV_INET_SUBCLASS; + case SIOCSIFNETMASK: { /* struct ifreq */ + in_addr_t i; - ev_msg.event_code = KEV_INET_ADDR_DELETED; + bcopy(&ifr->ifr_addr, &mask, sizeof (mask)); + i = mask.sin_addr.s_addr; + VERIFY(ia != NULL); IFA_LOCK(&ia->ia_ifa); + ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr = i); + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_INET_SUBCLASS; + + ev_msg.event_code = KEV_INET_SIFNETMASK; + if (ia->ia_ifa.ifa_dstaddr) { - in_event_data.ia_dstaddr = ((struct sockaddr_in *) - (void *)ia->ia_ifa.ifa_dstaddr)->sin_addr; + in_event_data.ia_dstaddr = ((struct sockaddr_in *) + (void *)ia->ia_ifa.ifa_dstaddr)->sin_addr; } else { in_event_data.ia_dstaddr.s_addr = INADDR_ANY; } - in_event_data.ia_addr = ia->ia_addr.sin_addr; - in_event_data.ia_net = ia->ia_net; - in_event_data.ia_netmask = ia->ia_netmask; - in_event_data.ia_subnet = ia->ia_subnet; - in_event_data.ia_subnetmask = ia->ia_subnetmask; - in_event_data.ia_netbroadcast = ia->ia_netbroadcast; + in_event_data.ia_addr = ia->ia_addr.sin_addr; + in_event_data.ia_net = ia->ia_net; + in_event_data.ia_netmask = ia->ia_netmask; + in_event_data.ia_subnet = ia->ia_subnet; + in_event_data.ia_subnetmask = ia->ia_subnetmask; + in_event_data.ia_netbroadcast = ia->ia_netbroadcast; IFA_UNLOCK(&ia->ia_ifa); (void) strncpy(&in_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ); in_event_data.link_data.if_family = ifp->if_family; - in_event_data.link_data.if_unit = (u_int32_t) ifp->if_unit; + in_event_data.link_data.if_unit = (u_int32_t)ifp->if_unit; ev_msg.dv[0].data_ptr = &in_event_data; - ev_msg.dv[0].data_length = sizeof(struct kev_in_data); + ev_msg.dv[0].data_length = sizeof (struct kev_in_data); ev_msg.dv[1].data_length = 0; - ifa = &ia->ia_ifa; - lck_rw_lock_exclusive(in_ifaddr_rwlock); - /* Release ia_link reference */ - IFA_REMREF(ifa); - TAILQ_REMOVE(&in_ifaddrhead, ia, ia_link); - IFA_LOCK(ifa); - if (IA_IS_HASHED(ia)) - in_iahash_remove(ia); - IFA_UNLOCK(ifa); - lck_rw_done(in_ifaddr_rwlock); + kev_post_msg(&ev_msg); + break; + } + + default: + VERIFY(0); + /* NOTREACHED */ + } + + return (error); +} + +/* + * Generic INET control operations (ioctl's). + * + * ifp is NULL if not an interface-specific ioctl. + * + * Most of the routines called to handle the ioctls would end up being + * tail-call optimized, which unfortunately causes this routine to + * consume too much stack space; this is the reason for the "noinline" + * attribute used on those routines. + * + * If called directly from within the networking stack (as opposed to via + * pru_control), the socket parameter may be NULL. + */ +int +in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, + struct proc *p) +{ + struct ifreq *ifr = (struct ifreq *)(void *)data; + struct sockaddr_in addr, dstaddr; + struct sockaddr_in sin, *sa = NULL; + boolean_t privileged = (proc_suser(p) == 0); + boolean_t so_unlocked = FALSE; + struct in_ifaddr *ia = NULL; + struct ifaddr *ifa; + int error = 0; + + /* In case it's NULL, make sure it came from the kernel */ + VERIFY(so != NULL || p == kernproc); + + /* + * ioctls which don't require ifp, but require socket. + */ + switch (cmd) { + case SIOCGASSOCIDS32: /* struct so_aidreq32 */ + case SIOCGASSOCIDS64: /* struct so_aidreq64 */ + return (inctl_associd(so, cmd, data)); + /* NOTREACHED */ + + case SIOCGCONNIDS32: /* struct so_cidreq32 */ + case SIOCGCONNIDS64: /* struct so_cidreq64 */ + return (inctl_connid(so, cmd, data)); + /* NOTREACHED */ + + case SIOCGCONNINFO32: /* struct so_cinforeq32 */ + case SIOCGCONNINFO64: /* struct so_cinforeq64 */ + return (inctl_conninfo(so, cmd, data)); + /* NOTREACHED */ + } + + /* + * The rest of ioctls require ifp; reject if we don't have one; + * return ENXIO to be consistent with ifioctl(). + */ + if (ifp == NULL) + return (ENXIO); + + /* + * ioctls which require ifp but not interface address. + */ + switch (cmd) { + case SIOCAUTOADDR: /* struct ifreq */ + if (!privileged) + return (EPERM); + return (inctl_autoaddr(ifp, ifr)); + /* NOTREACHED */ + + case SIOCARPIPLL: /* struct ifreq */ + if (!privileged) + return (EPERM); + return (inctl_arpipll(ifp, ifr)); + /* NOTREACHED */ + + case SIOCSETROUTERMODE: /* struct ifreq */ + if (!privileged) + return (EPERM); + return (inctl_setrouter(ifp, ifr)); + /* NOTREACHED */ + + case SIOCPROTOATTACH: /* struct ifreq */ + if (!privileged) + return (EPERM); + return (in_domifattach(ifp)); + /* NOTREACHED */ + + case SIOCPROTODETACH: /* struct ifreq */ + if (!privileged) + return (EPERM); /* - * in_ifscrub kills the interface route. + * If an IPv4 address is still present, refuse to detach. */ - in_ifscrub(ifp, ia, 0); - ifnet_lock_exclusive(ifp); - IFA_LOCK(ifa); - /* if_detach_ifa() releases ifa_link reference */ - if_detach_ifa(ifp, ifa); - /* Our reference to this address is dropped at the bottom */ - IFA_UNLOCK(ifa); + ifnet_lock_shared(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + IFA_LOCK(ifa); + if (ifa->ifa_addr->sa_family == AF_INET) { + IFA_UNLOCK(ifa); + break; + } + IFA_UNLOCK(ifa); + } + ifnet_lock_done(ifp); + return ((ifa == NULL) ? proto_unplumb(PF_INET, ifp) : EBUSY); + /* NOTREACHED */ - /* - * If the interface supports multicast, and no address is left, - * remove the "all hosts" multicast group from that interface. - */ - if ((ifp->if_flags & IFF_MULTICAST) != 0 || - ifp->if_allhostsinm != NULL ) { + case SIOCALIFADDR: /* struct if_laddrreq */ + case SIOCDLIFADDR: /* struct if_laddrreq */ + if (!privileged) + return (EPERM); + /* FALLTHRU */ + case SIOCGLIFADDR: { /* struct if_laddrreq */ + struct if_laddrreq iflr; + + bcopy(data, &iflr, sizeof (iflr)); + error = inctl_lifaddr(ifp, cmd, &iflr); + bcopy(&iflr, data, sizeof (iflr)); + return (error); + /* NOTREACHED */ + } + } + + /* + * ioctls which require interface address; obtain sockaddr_in. + */ + switch (cmd) { + case SIOCAIFADDR: /* struct {if,in_}aliasreq */ + if (!privileged) + return (EPERM); + bcopy(&((struct in_aliasreq *)(void *)data)->ifra_addr, + &sin, sizeof (sin)); + sa = &sin; + break; + + case SIOCDIFADDR: /* struct ifreq */ + case SIOCSIFADDR: /* struct ifreq */ + case SIOCSIFDSTADDR: /* struct ifreq */ + case SIOCSIFNETMASK: /* struct ifreq */ + case SIOCSIFBRDADDR: /* struct ifreq */ + if (!privileged) + return (EPERM); + /* FALLTHRU */ + case SIOCGIFADDR: /* struct ifreq */ + case SIOCGIFDSTADDR: /* struct ifreq */ + case SIOCGIFNETMASK: /* struct ifreq */ + case SIOCGIFBRDADDR: /* struct ifreq */ + bcopy(&ifr->ifr_addr, &sin, sizeof (sin)); + sa = &sin; + break; + } + + /* + * Find address for this interface, if it exists. + * + * If an alias address was specified, find that one instead of + * the first one on the interface, if possible. + */ + VERIFY(ia == NULL); + if (sa != NULL) { + struct in_ifaddr *iap; + /* + * Any failures from this point on must take into account + * a non-NULL "ia" with an outstanding reference count, and + * therefore requires IFA_REMREF. Jump to "done" label + * instead of calling return if "ia" is valid. + */ + lck_rw_lock_shared(in_ifaddr_rwlock); + TAILQ_FOREACH(iap, INADDR_HASH(sa->sin_addr.s_addr), ia_hash) { + IFA_LOCK(&iap->ia_ifa); + if (iap->ia_ifp == ifp && + iap->ia_addr.sin_addr.s_addr == + sa->sin_addr.s_addr) { + ia = iap; + IFA_UNLOCK(&iap->ia_ifa); + break; + } + IFA_UNLOCK(&iap->ia_ifa); + } + /* take a reference on ia before releasing lock */ + if (ia != NULL) + IFA_ADDREF(&ia->ia_ifa); + lck_rw_done(in_ifaddr_rwlock); + + if (ia == NULL) { + ifnet_lock_shared(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - IFA_LOCK(ifa); - if (ifa->ifa_addr->sa_family == AF_INET) { - IFA_UNLOCK(ifa); + iap = ifatoia(ifa); + IFA_LOCK(&iap->ia_ifa); + if (iap->ia_addr.sin_family == AF_INET) { + ia = iap; + IFA_UNLOCK(&iap->ia_ifa); break; } - IFA_UNLOCK(ifa); + IFA_UNLOCK(&iap->ia_ifa); } + /* take a reference on ia before releasing lock */ + if (ia != NULL) + IFA_ADDREF(&ia->ia_ifa); ifnet_lock_done(ifp); + } + } - lck_mtx_lock(&ifp->if_addrconfig_lock); - if (ifa == NULL && ifp->if_allhostsinm != NULL) { - struct in_multi *inm = ifp->if_allhostsinm; - ifp->if_allhostsinm = NULL; + /* + * Unlock the socket since ifnet_ioctl() may be invoked by + * one of the ioctl handlers below. Socket will be re-locked + * prior to returning. + */ + if (so != NULL) { + socket_unlock(so, 0); + so_unlocked = TRUE; + } - in_delmulti(inm); - /* release the reference for allhostsinm */ - INM_REMREF(inm); + switch (cmd) { + case SIOCAIFADDR: /* struct {if,in_}aliasreq */ + case SIOCDIFADDR: /* struct ifreq */ + if (cmd == SIOCAIFADDR) { + bcopy(&((struct in_aliasreq *)(void *)data)-> + ifra_addr, &addr, sizeof (addr)); + bcopy(&((struct in_aliasreq *)(void *)data)-> + ifra_dstaddr, &dstaddr, sizeof (dstaddr)); + } else { + VERIFY(cmd == SIOCDIFADDR); + bcopy(&((struct ifreq *)(void *)data)->ifr_addr, + &addr, sizeof (addr)); + bzero(&dstaddr, sizeof (dstaddr)); + } + + if (addr.sin_family == AF_INET) { + struct in_ifaddr *oia; + + lck_rw_lock_shared(in_ifaddr_rwlock); + for (oia = ia; ia; ia = ia->ia_link.tqe_next) { + IFA_LOCK(&ia->ia_ifa); + if (ia->ia_ifp == ifp && + ia->ia_addr.sin_addr.s_addr == + addr.sin_addr.s_addr) { + IFA_ADDREF_LOCKED(&ia->ia_ifa); + IFA_UNLOCK(&ia->ia_ifa); + break; + } + IFA_UNLOCK(&ia->ia_ifa); } - lck_mtx_unlock(&ifp->if_addrconfig_lock); + lck_rw_done(in_ifaddr_rwlock); + if (oia != NULL) + IFA_REMREF(&oia->ia_ifa); + if ((ifp->if_flags & IFF_POINTOPOINT) && + (cmd == SIOCAIFADDR) && + (dstaddr.sin_addr.s_addr == INADDR_ANY)) { + error = EDESTADDRREQ; + goto done; + } + } else if (cmd == SIOCAIFADDR) { + error = EINVAL; + goto done; + } + if (cmd == SIOCDIFADDR && ia == NULL) { + error = EADDRNOTAVAIL; + goto done; + } + /* FALLTHROUGH */ + case SIOCSIFADDR: /* struct ifreq */ + case SIOCSIFDSTADDR: /* struct ifreq */ + case SIOCSIFNETMASK: /* struct ifreq */ + if (cmd == SIOCAIFADDR) { + /* fell thru from above; just repeat it */ + bcopy(&((struct in_aliasreq *)(void *)data)-> + ifra_addr, &addr, sizeof (addr)); } else { - ifnet_lock_done(ifp); + VERIFY(cmd == SIOCDIFADDR || cmd == SIOCSIFADDR || + cmd == SIOCSIFNETMASK || cmd == SIOCSIFDSTADDR); + bcopy(&((struct ifreq *)(void *)data)->ifr_addr, + &addr, sizeof (addr)); } - /* Post the kernel event */ - kev_post_msg(&ev_msg); - - /* - * See if there is any IPV4 address left and if so, - * reconfigure KDP to use current primary address. - */ - ifa = ifa_ifpgetprimary(ifp, AF_INET); - if (ifa != NULL) { + if (addr.sin_family != AF_INET && cmd == SIOCSIFADDR) { + error = EINVAL; + goto done; + } + if (ia == NULL) { + ia = in_ifaddr_alloc(M_WAITOK); + if (ia == NULL) { + error = ENOBUFS; + goto done; + } + ifnet_lock_exclusive(ifp); + ifa = &ia->ia_ifa; + IFA_LOCK(ifa); + /* Hold a reference for this routine */ + IFA_ADDREF_LOCKED(ifa); + IA_HASH_INIT(ia); + ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr; + ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr; + ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask; + ia->ia_sockmask.sin_len = 8; + if (ifp->if_flags & IFF_BROADCAST) { + ia->ia_broadaddr.sin_len = sizeof (ia->ia_addr); + ia->ia_broadaddr.sin_family = AF_INET; + } + ia->ia_ifp = ifp; + if (!(ifp->if_flags & IFF_LOOPBACK)) + in_interfaces++; + /* if_attach_ifa() holds a reference for ifa_link */ + if_attach_ifa(ifp, ifa); /* - * NOTE: SIOCSIFADDR is defined with struct ifreq - * as parameter, but here we are sending it down - * to the interface with a pointer to struct ifaddr, - * for legacy reasons. + * If we have to go through in_ifinit(), make sure + * to avoid installing route(s) based on this address + * via PFC_IFUP event, before the link resolver (ARP) + * initializes it. */ - error = ifnet_ioctl(ifp, PF_INET, SIOCSIFADDR, ifa); - if (error == EOPNOTSUPP) - error = 0; - - /* Release reference from ifa_ifpgetprimary() */ - IFA_REMREF(ifa); + if (cmd == SIOCAIFADDR || cmd == SIOCSIFADDR) + ifa->ifa_debug |= IFD_NOTREADY; + IFA_UNLOCK(ifa); + ifnet_lock_done(ifp); + lck_rw_lock_exclusive(in_ifaddr_rwlock); + /* Hold a reference for ia_link */ + IFA_ADDREF(ifa); + TAILQ_INSERT_TAIL(&in_ifaddrhead, ia, ia_link); + lck_rw_done(in_ifaddr_rwlock); + /* discard error */ + (void) in_domifattach(ifp); + error = 0; } -#if PF - (void) pf_ifaddr_hook(ifp, cmd); -#endif /* PF */ break; + } -#ifdef __APPLE__ - case SIOCSETOT: { /* int */ - /* - * Inspiration from tcp_ctloutput() and ip_ctloutput() - * Special ioctl for OpenTransport sockets - */ - struct inpcb *inp, *cloned_inp; - int error2 = 0; - int cloned_fd; - - bcopy(data, &cloned_fd, sizeof (cloned_fd)); - - inp = sotoinpcb(so); - if (inp == NULL) { - break; - } + switch (cmd) { + case SIOCGIFDSTADDR: /* struct ifreq */ + case SIOCSIFDSTADDR: /* struct ifreq */ + error = inctl_ifdstaddr(ifp, ia, cmd, ifr); + break; - /* let's make sure it's either -1 or a valid file descriptor */ - if (cloned_fd != -1) { - struct socket *cloned_so; - error2 = file_socket(cloned_fd, &cloned_so); - if (error2) { - break; - } - cloned_inp = sotoinpcb(cloned_so); - file_drop(cloned_fd); - } else { - cloned_inp = NULL; - } + case SIOCGIFBRDADDR: /* struct ifreq */ + case SIOCSIFBRDADDR: /* struct ifreq */ + error = inctl_ifbrdaddr(ifp, ia, cmd, ifr); + break; - if (cloned_inp == NULL) { - /* OT always uses IP_PORTRANGE_HIGH */ - inp->inp_flags &= ~(INP_LOWPORT); - inp->inp_flags |= INP_HIGHPORT; - /* - * For UDP, OT allows broadcast by default; - * for TCP we want to see MSG_OOB when we - * receive urgent data. - */ - if (so->so_type == SOCK_DGRAM) - so->so_options |= SO_BROADCAST; - else if (so->so_type == SOCK_STREAM) - so->so_options |= SO_WANTOOBFLAG; - } else { - inp->inp_ip_tos = cloned_inp->inp_ip_tos; - inp->inp_ip_ttl = cloned_inp->inp_ip_ttl; - inp->inp_flags = cloned_inp->inp_flags; + case SIOCGIFNETMASK: /* struct ifreq */ + case SIOCSIFNETMASK: /* struct ifreq */ + error = inctl_ifnetmask(ifp, ia, cmd, ifr); + break; - /* Multicast options */ - if (cloned_inp->inp_moptions != NULL) - error2 = imo_clone(cloned_inp, inp); - } + case SIOCGIFADDR: /* struct ifreq */ + case SIOCSIFADDR: /* struct ifreq */ + case SIOCAIFADDR: /* struct {if,in_}aliasreq */ + case SIOCDIFADDR: /* struct ifreq */ + error = inctl_ifaddr(ifp, ia, cmd, ifr); break; - } -#endif /* __APPLE__ */ default: error = EOPNOTSUPP; + break; } - done: - if (ia != NULL) { +done: + if (ia != NULL) IFA_REMREF(&ia->ia_ifa); - } + if (so_unlocked) + socket_lock(so, 0); + return (error); } @@ -1241,9 +1504,8 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, * EADDRNOTAVAIL on prefix match failed/specified address not found * other values may be returned from in_ioctl() */ -static int -in_lifaddr_ioctl(struct socket *so, u_long cmd, struct if_laddrreq *iflr, - struct ifnet *ifp, struct proc *p) +static __attribute__((noinline)) int +inctl_lifaddr(struct ifnet *ifp, u_long cmd, struct if_laddrreq *iflr) { struct ifaddr *ifa; @@ -1252,59 +1514,57 @@ in_lifaddr_ioctl(struct socket *so, u_long cmd, struct if_laddrreq *iflr, switch (cmd) { case SIOCGLIFADDR: /* address must be specified on GET with IFLR_PREFIX */ - if ((iflr->flags & IFLR_PREFIX) == 0) + if (!(iflr->flags & IFLR_PREFIX)) break; - /*FALLTHROUGH*/ + /* FALLTHROUGH */ case SIOCALIFADDR: case SIOCDLIFADDR: /* address must be specified on ADD and DELETE */ if (iflr->addr.ss_family != AF_INET) - return EINVAL; - if (iflr->addr.ss_len != sizeof(struct sockaddr_in)) - return EINVAL; + return (EINVAL); + if (iflr->addr.ss_len != sizeof (struct sockaddr_in)) + return (EINVAL); /* XXX need improvement */ - if (iflr->dstaddr.ss_family - && iflr->dstaddr.ss_family != AF_INET) - return EINVAL; - if (iflr->dstaddr.ss_family - && iflr->dstaddr.ss_len != sizeof(struct sockaddr_in)) - return EINVAL; + if (iflr->dstaddr.ss_family && + iflr->dstaddr.ss_family != AF_INET) + return (EINVAL); + if (iflr->dstaddr.ss_family && + iflr->dstaddr.ss_len != sizeof (struct sockaddr_in)) + return (EINVAL); break; - default: /*shouldn't happen*/ - return EOPNOTSUPP; + default: + /* shouldn't happen */ + VERIFY(0); + /* NOTREACHED */ } - if (sizeof(struct in_addr) * 8 < iflr->prefixlen) - return EINVAL; + if (sizeof (struct in_addr) * 8 < iflr->prefixlen) + return (EINVAL); switch (cmd) { - case SIOCALIFADDR: - { + case SIOCALIFADDR: { struct in_aliasreq ifra; if (iflr->flags & IFLR_PREFIX) - return EINVAL; - - /* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */ - bzero(&ifra, sizeof(ifra)); - bcopy(iflr->iflr_name, ifra.ifra_name, - sizeof(ifra.ifra_name)); + return (EINVAL); + /* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR). */ + bzero(&ifra, sizeof (ifra)); + bcopy(iflr->iflr_name, ifra.ifra_name, sizeof (ifra.ifra_name)); bcopy(&iflr->addr, &ifra.ifra_addr, iflr->addr.ss_len); - - if (iflr->dstaddr.ss_family) { /*XXX*/ + if (iflr->dstaddr.ss_family) { /* XXX */ bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr, - iflr->dstaddr.ss_len); + sizeof (struct sockaddr_in)); } - ifra.ifra_mask.sin_family = AF_INET; - ifra.ifra_mask.sin_len = sizeof(struct sockaddr_in); + ifra.ifra_mask.sin_len = sizeof (struct sockaddr_in); in_len2mask(&ifra.ifra_mask.sin_addr, iflr->prefixlen); - return in_control(so, SIOCAIFADDR, (caddr_t)&ifra, ifp, p); - } + return (in_control(NULL, SIOCAIFADDR, (caddr_t)&ifra, + ifp, kernproc)); + } + case SIOCGLIFADDR: - case SIOCDLIFADDR: - { + case SIOCDLIFADDR: { struct in_ifaddr *ia; struct in_addr mask, candidate; struct in_addr match = { 0 }; @@ -1322,13 +1582,13 @@ in_lifaddr_ioctl(struct socket *so, u_long cmd, struct if_laddrreq *iflr, /* if you set extra bits, that's wrong */ if (match.s_addr != sin->sin_addr.s_addr) - return EINVAL; + return (EINVAL); cmp = 1; } else { if (cmd == SIOCGLIFADDR) { /* on getting an address, take the 1st match */ - cmp = 0; /*XXX*/ + cmp = 0; /* XXX */ } else { /* on deleting an address, do exact match */ in_len2mask(&mask, 32); @@ -1350,7 +1610,7 @@ in_lifaddr_ioctl(struct socket *so, u_long cmd, struct if_laddrreq *iflr, IFA_UNLOCK(ifa); break; } - candidate.s_addr = ((struct sockaddr_in *)&ifa->ifa_addr)->sin_addr.s_addr; + candidate.s_addr = SIN(&ifa->ifa_addr)->sin_addr.s_addr; candidate.s_addr &= mask.s_addr; IFA_UNLOCK(ifa); if (candidate.s_addr == match.s_addr) @@ -1360,7 +1620,7 @@ in_lifaddr_ioctl(struct socket *so, u_long cmd, struct if_laddrreq *iflr, IFA_ADDREF(ifa); ifnet_lock_done(ifp); if (!ifa) - return EADDRNOTAVAIL; + return (EADDRNOTAVAIL); ia = (struct in_ifaddr *)ifa; if (cmd == SIOCGLIFADDR) { @@ -1370,73 +1630,36 @@ in_lifaddr_ioctl(struct socket *so, u_long cmd, struct if_laddrreq *iflr, if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { bcopy(&ia->ia_dstaddr, &iflr->dstaddr, - ia->ia_dstaddr.sin_len); - } else + ia->ia_dstaddr.sin_len); + } else { bzero(&iflr->dstaddr, sizeof(iflr->dstaddr)); - + } iflr->prefixlen = - in_mask2len(&ia->ia_sockmask.sin_addr); - - iflr->flags = 0; /*XXX*/ + in_mask2len(&ia->ia_sockmask.sin_addr); + iflr->flags = 0; /* XXX */ IFA_UNLOCK(ifa); IFA_REMREF(ifa); - return 0; + return (0); } else { - struct in_aliasreq ifra; - - /* fill in_aliasreq and do ioctl(SIOCDIFADDR_IN6) */ - bzero(&ifra, sizeof(ifra)); - bcopy(iflr->iflr_name, ifra.ifra_name, - sizeof(ifra.ifra_name)); + struct ifreq ifr; + /* fill ifreq and do ioctl(SIOCDIFADDR) */ + bzero(&ifr, sizeof (ifr)); + bcopy(iflr->iflr_name, ifr.ifr_name, + sizeof (ifr.ifr_name)); IFA_LOCK(ifa); - bcopy(&ia->ia_addr, &ifra.ifra_addr, - ia->ia_addr.sin_len); - if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { - bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr, - ia->ia_dstaddr.sin_len); - } - bcopy(&ia->ia_sockmask, &ifra.ifra_dstaddr, - ia->ia_sockmask.sin_len); + bcopy(&ia->ia_addr, &ifr.ifr_addr, + sizeof (struct sockaddr_in)); IFA_UNLOCK(ifa); IFA_REMREF(ifa); - return in_control(so, SIOCDIFADDR, (caddr_t)&ifra, - ifp, p); + return (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, + ifp, kernproc)); } - } } - - return EOPNOTSUPP; /*just for safety*/ -} - -/* - * Handle SIOCSETROUTERMODE to set or clear the IPv4 router mode flag on - * the interface. When in this mode, IPv4 Link-Local Address support is - * disabled in ARP, and DHCP client support is disabled in IP input; turning - * any of them on would cause an error to be returned. Entering or exiting - * this mode will result in the removal of IPv4 addresses currently configured - * on the interface. - */ -static int -in_setrouter(struct ifnet *ifp, int enable) -{ - if (ifp->if_flags & IFF_LOOPBACK) - return (ENODEV); - - ifnet_lock_exclusive(ifp); - if (enable) { - ifp->if_eflags |= IFEF_IPV4_ROUTER; - ifp->if_eflags &= ~(IFEF_ARPLL | IFEF_AUTOCONFIGURING); - } else { - ifp->if_eflags &= ~IFEF_IPV4_ROUTER; } - ifnet_lock_done(ifp); - - /* purge all IPv4 addresses configured on this interface */ - in_purgeaddrs(ifp); - return (0); + return (EOPNOTSUPP); /* just for safety */ } /* @@ -1470,7 +1693,7 @@ in_ifscrub(struct ifnet *ifp, struct in_ifaddr *ia, int locked) static void in_iahash_remove(struct in_ifaddr *ia) { - lck_rw_assert(in_ifaddr_rwlock, LCK_RW_ASSERT_EXCLUSIVE); + lck_rw_assert(in_ifaddr_rwlock, LCK_RW_ASSERT_EXCLUSIVE); IFA_LOCK_ASSERT_HELD(&ia->ia_ifa); if (!IA_IS_HASHED(ia)) { @@ -1492,7 +1715,7 @@ in_iahash_remove(struct in_ifaddr *ia) static void in_iahash_insert(struct in_ifaddr *ia) { - lck_rw_assert(in_ifaddr_rwlock, LCK_RW_ASSERT_EXCLUSIVE); + lck_rw_assert(in_ifaddr_rwlock, LCK_RW_ASSERT_EXCLUSIVE); IFA_LOCK_ASSERT_HELD(&ia->ia_ifa); if (ia->ia_addr.sin_family != AF_INET) { @@ -1502,15 +1725,15 @@ in_iahash_insert(struct in_ifaddr *ia) panic("attempt to double-insert ia %p into hash table\n", ia); /* NOTREACHED */ } - TAILQ_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia, ia_hash); + TAILQ_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr), + ia, ia_hash); IFA_ADDREF_LOCKED(&ia->ia_ifa); } /* - * Some point to point interfaces that are tunnels - * borrow the address from an underlying interface (e.g. - * VPN server). In order for source address selection logic to - * find the underlying interface first, we add the address + * Some point to point interfaces that are tunnels borrow the address from + * an underlying interface (e.g. VPN server). In order for source address + * selection logic to find the underlying interface first, we add the address * of borrowing point to point interfaces at the end of the list. * (see rdar://6733789) * @@ -1522,7 +1745,7 @@ in_iahash_insert_ptp(struct in_ifaddr *ia) struct in_ifaddr *tmp_ifa; struct ifnet *tmp_ifp; - lck_rw_assert(in_ifaddr_rwlock, LCK_RW_ASSERT_EXCLUSIVE); + lck_rw_assert(in_ifaddr_rwlock, LCK_RW_ASSERT_EXCLUSIVE); IFA_LOCK_ASSERT_HELD(&ia->ia_ifa); if (ia->ia_addr.sin_family != AF_INET) { @@ -1562,11 +1785,8 @@ in_iahash_insert_ptp(struct in_ifaddr *ia) * and routing table entry. */ static int -in_ifinit( - struct ifnet *ifp, - struct in_ifaddr *ia, - struct sockaddr_in *sin, - int scrub) +in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin, + int scrub) { u_int32_t i = ntohl(sin->sin_addr.s_addr); struct sockaddr_in oldaddr; @@ -1586,7 +1806,13 @@ in_ifinit( in_iahash_remove(ia); } ia->ia_addr = *sin; - ia->ia_addr.sin_len = sizeof (*sin); + /* + * Interface addresses should not contain port or sin_zero information. + */ + SIN(&ia->ia_addr)->sin_family = AF_INET; + SIN(&ia->ia_addr)->sin_len = sizeof (struct sockaddr_in); + SIN(&ia->ia_addr)->sin_port = 0; + bzero(&SIN(&ia->ia_addr)->sin_zero, sizeof (sin->sin_zero)); if ((ifp->if_flags & IFF_POINTOPOINT)) in_iahash_insert_ptp(ia); else @@ -1686,9 +1912,9 @@ in_ifinit( ia->ia_ifa.ifa_metric = ifp->if_metric; if (ifp->if_flags & IFF_BROADCAST) { ia->ia_broadaddr.sin_addr.s_addr = - htonl(ia->ia_subnet | ~ia->ia_subnetmask); + htonl(ia->ia_subnet | ~ia->ia_subnetmask); ia->ia_netbroadcast.s_addr = - htonl(ia->ia_net | ~ ia->ia_netmask); + htonl(ia->ia_net | ~ ia->ia_netmask); } else if (ifp->if_flags & IFF_LOOPBACK) { ia->ia_ifa.ifa_dstaddr = ia->ia_ifa.ifa_addr; flags |= RTF_HOST; @@ -1700,10 +1926,11 @@ in_ifinit( IFA_REMREF(&ia->ia_ifa); return (0); } - ia->ia_dstaddr.sin_len = sizeof (*sin); + ia->ia_dstaddr.sin_len = sizeof (struct sockaddr_in); flags |= RTF_HOST; } IFA_UNLOCK(&ia->ia_ifa); + if ((error = rtinit_locked(&(ia->ia_ifa), (int)RTM_ADD, flags)) == 0) { IFA_LOCK(&ia->ia_ifa); ia->ia_flags |= IFA_ROUTE; @@ -1729,13 +1956,16 @@ in_ifinit( inm = in_addmulti(&addr, ifp); if (inm != NULL) { - /* keep the reference on inm added by - * in_addmulti above for storing the - * pointer in allhostsinm + /* + * Keep the reference on inm added by + * in_addmulti above for storing the + * pointer in allhostsinm. */ ifp->if_allhostsinm = inm; } else { - printf("Failed to add membership to all-hosts multicast address on interface %s%d\n", ifp->if_name, ifp->if_unit); + printf("%s: failed to add membership to " + "all-hosts multicast address on %s\n", + __func__, if_name(ifp)); } } lck_mtx_unlock(&ifp->if_addrconfig_lock); @@ -1743,29 +1973,35 @@ in_ifinit( /* Release extra reference taken above */ IFA_REMREF(&ia->ia_ifa); + + if (error == 0) { + /* invalidate route caches */ + routegenid_inet_update(); + } + return (error); } - /* - * Return 1 if the address might be a local broadcast address. + * Return TRUE if the address might be a local broadcast address. */ -int +boolean_t in_broadcast(struct in_addr in, struct ifnet *ifp) { struct ifaddr *ifa; u_int32_t t; if (in.s_addr == INADDR_BROADCAST || in.s_addr == INADDR_ANY) - return (1); - if ((ifp->if_flags & IFF_BROADCAST) == 0) - return (0); + return (TRUE); + if (!(ifp->if_flags & IFF_BROADCAST)) + return (FALSE); t = ntohl(in.s_addr); + /* * Look through the list of addresses for a match * with a broadcast address. */ -#define ia ((struct in_ifaddr *)ifa) +#define ia ((struct in_ifaddr *)ifa) ifnet_lock_shared(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { IFA_LOCK(ifa); @@ -1784,12 +2020,12 @@ in_broadcast(struct in_addr in, struct ifnet *ifp) ia->ia_subnetmask != (u_int32_t)0xffffffff) { IFA_UNLOCK(ifa); ifnet_lock_done(ifp); - return (1); + return (TRUE); } IFA_UNLOCK(ifa); } ifnet_lock_done(ifp); - return (0); + return (FALSE); #undef ia } @@ -1799,6 +2035,8 @@ in_purgeaddrs(struct ifnet *ifp) struct ifaddr **ifap; int err, i; + VERIFY(ifp != NULL); + /* * Be nice, and try the civilized way first. If we can't get * rid of them this way, then do it the rough way. We must @@ -1806,18 +2044,21 @@ in_purgeaddrs(struct ifnet *ifp) * removed from the global list and arrays. */ err = ifnet_get_address_list_family_internal(ifp, &ifap, AF_INET, 1, - M_WAITOK); + M_WAITOK, 0); if (err == 0 && ifap != NULL) { + struct ifreq ifr; + + bzero(&ifr, sizeof (ifr)); + (void) snprintf(ifr.ifr_name, sizeof (ifr.ifr_name), + "%s", if_name(ifp)); + for (i = 0; ifap[i] != NULL; i++) { - struct ifaliasreq ifr; struct ifaddr *ifa; ifa = ifap[i]; - bzero(&ifr, sizeof (ifr)); IFA_LOCK(ifa); - ifr.ifra_addr = *ifa->ifa_addr; - if (ifa->ifa_dstaddr != NULL) - ifr.ifra_broadaddr = *ifa->ifa_dstaddr; + bcopy(ifa->ifa_addr, &ifr.ifr_addr, + sizeof (struct sockaddr_in)); IFA_UNLOCK(ifa); err = in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp, kernproc); @@ -1840,50 +2081,129 @@ in_purgeaddrs(struct ifnet *ifp) sizeof (s_dstaddr)); IFA_UNLOCK(ifa); - printf("%s: SIOCDIFADDR ifp=%p ifa_addr=%s " - "ifa_dstaddr=%s (err=%d)\n", __func__, ifp, - s_addr, s_dstaddr, err); + printf("%s: SIOCDIFADDR ifp=%s ifa_addr=%s " + "ifa_dstaddr=%s (err=%d)\n", __func__, + ifp->if_xname, s_addr, s_dstaddr, err); } } ifnet_free_address_list(ifap); } else if (err != 0 && err != ENXIO) { printf("%s: error retrieving list of AF_INET addresses for " - "ifp=%p (err=%d)\n", __func__, ifp, err); + "ifp=%s (err=%d)\n", __func__, ifp->if_xname, err); } } -int inet_aton(char *cp, struct in_addr *pin); +/* + * Select endpoint address(es). For now just take the first matching + * address and discard the rest, if present. + */ int -inet_aton(char * cp, struct in_addr * pin) +in_selectaddrs(int af, struct sockaddr_list **src_sl, + struct sockaddr_entry **src_se, struct sockaddr_list **dst_sl, + struct sockaddr_entry **dst_se) { - u_char * b = (unsigned char *)pin; - int i; - char * p; - - for (p = cp, i = 0; i < 4; i++) { - u_int32_t l = strtoul(p, 0, 0); - if (l > 255) - return (FALSE); - b[i] = l; - p = strchr(p, '.'); - if (i < 3 && p == NULL) - return (FALSE); - p++; - } - return (TRUE); -} + struct sockaddr_entry *se; + int error = 0; -int inet_ntoa2(struct in_addr * pin, char * cp, const int len); -int inet_ntoa2(struct in_addr * pin, char * cp, const int len) -{ - int ret; + VERIFY(src_sl != NULL && dst_sl != NULL && *dst_sl != NULL); + VERIFY(src_se != NULL && dst_se != NULL); + + *src_se = *dst_se = NULL; + + /* pick a source address, if available */ + if (*src_sl != NULL) { + TAILQ_FOREACH(se, &(*src_sl)->sl_head, se_link) { + VERIFY(se->se_addr != NULL); + /* + * Take the first source address, or the first + * one with matching address family. + */ + if (af == AF_UNSPEC || se->se_addr->sa_family == af) { + sockaddrlist_remove(*src_sl, se); + *src_se = se; + break; + } + } + /* get rid of the rest */ + TAILQ_FOREACH(se, &(*src_sl)->sl_head, se_link) { + sockaddrlist_remove(*src_sl, se); + sockaddrentry_free(se); + } + if (*src_se != NULL) { + /* insert the first src address back in */ + sockaddrlist_insert(*src_sl, *src_se); + VERIFY((*src_sl)->sl_cnt == 1); + /* destination address must be of this family */ + af = (*src_se)->se_addr->sa_family; + } else { + /* no usable source address with matching family */ + VERIFY(af != AF_UNSPEC); + error = EAFNOSUPPORT; + goto out; + } + } + /* pick a (matching) destination address */ + TAILQ_FOREACH(se, &(*dst_sl)->sl_head, se_link) { + VERIFY(se->se_addr != NULL); + /* + * Take the first destination address; if source is specified, + * find one which uses the same address family. + */ + if (af == AF_UNSPEC || se->se_addr->sa_family == af) { + sockaddrlist_remove(*dst_sl, se); + *dst_se = se; + break; + } + } + /* get rid of the rest */ + TAILQ_FOREACH(se, &(*dst_sl)->sl_head, se_link) { + sockaddrlist_remove(*dst_sl, se); + sockaddrentry_free(se); + } + if (*dst_se != NULL) { + /* insert the first dst address back in */ + sockaddrlist_insert(*dst_sl, *dst_se); + VERIFY((*dst_sl)->sl_cnt == 1); + } else { + /* source and destination address families don't match */ + error = EAFNOSUPPORT; + goto out; + } + + af = (*dst_se)->se_addr->sa_family; + VERIFY(*src_se == NULL || (*src_se)->se_addr->sa_family == af); - /* address is in network byte order */ - ret = snprintf(cp, len, "%u.%u.%u.%u", pin->s_addr & 0xFF, - (pin->s_addr >> 8) & 0xFF, (pin->s_addr >> 16) & 0xFF, - (pin->s_addr >> 24) & 0xFF); + /* verify address length */ + switch (af) { + case AF_INET: + if ((*dst_se)->se_addr->sa_len != + sizeof (struct sockaddr_in)) { + error = EAFNOSUPPORT; + goto out; + } + break; +#if INET6 + case AF_INET6: + if ((*dst_se)->se_addr->sa_len != + sizeof (struct sockaddr_in6)) { + error = EAFNOSUPPORT; + goto out; + } + break; +#endif /* INET6 */ + default: + error = EAFNOSUPPORT; + goto out; + } - return ret < len ? TRUE : FALSE; + /* if source address is specified, length must match destination */ + if (*src_se != NULL && (*src_se)->se_addr->sa_len != + (*dst_se)->se_addr->sa_len) { + error = EAFNOSUPPORT; + goto out; + } +out: + return (error); } /* @@ -2036,3 +2356,157 @@ in_ifaddr_trace(struct ifaddr *ifa, int refhold) idx = atomic_add_16_ov(cnt, 1) % INIFA_TRACE_HIST_SIZE; ctrace_record(&tr[idx]); } + +/* + * Handle SIOCGASSOCIDS ioctl for PF_INET domain. + */ +static int +in_getassocids(struct socket *so, uint32_t *cnt, user_addr_t aidp) +{ + struct inpcb *inp = sotoinpcb(so); + associd_t aid; + + if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) + return (EINVAL); + + /* INPCB has no concept of association */ + aid = ASSOCID_ANY; + *cnt = 0; + + /* just asking how many there are? */ + if (aidp == USER_ADDR_NULL) + return (0); + + return (copyout(&aid, aidp, sizeof (aid))); +} + +/* + * Handle SIOCGCONNIDS ioctl for PF_INET domain. + */ +static int +in_getconnids(struct socket *so, associd_t aid, uint32_t *cnt, + user_addr_t cidp) +{ + struct inpcb *inp = sotoinpcb(so); + connid_t cid; + + if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) + return (EINVAL); + + if (aid != ASSOCID_ANY && aid != ASSOCID_ALL) + return (EINVAL); + + /* if connected, return 1 connection count */ + *cnt = ((so->so_state & SS_ISCONNECTED) ? 1 : 0); + + /* just asking how many there are? */ + if (cidp == USER_ADDR_NULL) + return (0); + + /* if INPCB is connected, assign it connid 1 */ + cid = ((*cnt != 0) ? 1 : CONNID_ANY); + + return (copyout(&cid, cidp, sizeof (cid))); +} + +/* + * Handle SIOCGCONNINFO ioctl for PF_INET domain. + */ +static int +in_getconninfo(struct socket *so, connid_t cid, uint32_t *flags, + uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len, + user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type, + user_addr_t aux_data, uint32_t *aux_len) +{ +#pragma unused(aux_data) + struct inpcb *inp = sotoinpcb(so); + struct sockaddr_in sin; + struct ifnet *ifp = NULL; + int error = 0; + u_int32_t copy_len = 0; + + /* + * Don't test for INPCB_STATE_DEAD since this may be called + * after SOF_PCBCLEARING is set, e.g. after tcp_close(). + */ + if (inp == NULL) { + error = EINVAL; + goto out; + } + + if (cid != CONNID_ANY && cid != CONNID_ALL && cid != 1) { + error = EINVAL; + goto out; + } + + ifp = inp->inp_last_outifp; + *ifindex = ((ifp != NULL) ? ifp->if_index : 0); + *soerror = so->so_error; + *flags = 0; + if (so->so_state & SS_ISCONNECTED) + *flags |= (CIF_CONNECTED | CIF_PREFERRED); + if (inp->inp_flags & INP_BOUND_IF) + *flags |= CIF_BOUND_IF; + if (!(inp->inp_flags & INP_INADDR_ANY)) + *flags |= CIF_BOUND_IP; + if (!(inp->inp_flags & INP_ANONPORT)) + *flags |= CIF_BOUND_PORT; + + bzero(&sin, sizeof (sin)); + sin.sin_len = sizeof (sin); + sin.sin_family = AF_INET; + + /* source address and port */ + sin.sin_port = inp->inp_lport; + sin.sin_addr.s_addr = inp->inp_laddr.s_addr; + if (*src_len == 0) { + *src_len = sin.sin_len; + } else { + if (src != USER_ADDR_NULL) { + copy_len = min(*src_len, sizeof (sin)); + error = copyout(&sin, src, copy_len); + if (error != 0) + goto out; + *src_len = copy_len; + } + } + + /* destination address and port */ + sin.sin_port = inp->inp_fport; + sin.sin_addr.s_addr = inp->inp_faddr.s_addr; + if (*dst_len == 0) { + *dst_len = sin.sin_len; + } else { + if (dst != USER_ADDR_NULL) { + copy_len = min(*dst_len, sizeof (sin)); + error = copyout(&sin, dst, copy_len); + if (error != 0) + goto out; + *dst_len = copy_len; + } + } + + *aux_type = 0; + *aux_len = 0; + if (SOCK_PROTO(so) == IPPROTO_TCP) { + struct conninfo_tcp tcp_ci; + + *aux_type = CIAUX_TCP; + if (*aux_len == 0) { + *aux_len = sizeof (tcp_ci); + } else { + if (aux_data != USER_ADDR_NULL) { + copy_len = min(*aux_len, sizeof (tcp_ci)); + bzero(&tcp_ci, sizeof (tcp_ci)); + tcp_getconninfo(so, &tcp_ci); + error = copyout(&tcp_ci, aux_data, copy_len); + if (error != 0) + goto out; + *aux_len = copy_len; + } + } + } + +out: + return (error); +} diff --git a/bsd/netinet/in.h b/bsd/netinet/in.h index 4e0d49b9e..a3f565e57 100644 --- a/bsd/netinet/in.h +++ b/bsd/netinet/in.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -71,15 +71,9 @@ #include #endif -#ifndef _IN_ADDR_T -#define _IN_ADDR_T -typedef __uint32_t in_addr_t; /* base type for internet address */ -#endif +#include -#ifndef _IN_PORT_T -#define _IN_PORT_T -typedef __uint16_t in_port_t; -#endif +#include /* * POSIX 1003.1-2003 @@ -389,6 +383,35 @@ struct sockaddr_in { char sin_zero[8]; }; +#ifdef PRIVATE +/* + * sockaddr_in with scope ID field; this is used internally to keep + * track of scoped route entries in the routing table. The fact that + * such a value is embedded in the structure is an artifact of the + * current implementation which could change in future. + */ +struct sockaddr_inifscope { + __uint8_t sin_len; + sa_family_t sin_family; + in_port_t sin_port; + struct in_addr sin_addr; + /* + * To avoid possible conflict with an overlaid sockaddr_inarp + * having sin_other set to SIN_PROXY, we use the first 4-bytes + * of sin_zero since sin_srcaddr is one of the unused fields + * in sockaddr_inarp. + */ + union { + char sin_zero[8]; + struct { + __uint32_t ifscope; + } _in_index; + } un; +#define sin_scope_id un._in_index.ifscope +}; + +#endif /* PRIVATE */ + #define INET_ADDRSTRLEN 16 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) @@ -570,7 +593,7 @@ struct __msfilterreq { struct sockaddr_storage *msfr_srcs; }; -#ifdef XNU_KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE struct __msfilterreq32 { uint32_t msfr_ifindex; /* interface index */ uint32_t msfr_fmode; /* filter mode for group */ @@ -588,7 +611,7 @@ struct __msfilterreq64 { struct sockaddr_storage msfr_group; /* group address */ user64_addr_t msfr_srcs; }; -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* __MSFILTERREQ_DEFINED */ #pragma pack() @@ -659,8 +682,7 @@ struct in_pktinfo { */ #define IPPROTO_MAXID (IPPROTO_AH + 1) /* don't list to IPPROTO_MAX */ -#ifdef KERNEL_PRIVATE - +#ifdef BSD_KERNEL_PRIVATE #define CTL_IPPROTO_NAMES { \ { "ip", CTLTYPE_NODE }, \ { "icmp", CTLTYPE_NODE }, \ @@ -715,8 +737,7 @@ struct in_pktinfo { { 0, 0 }, \ { "ipsec", CTLTYPE_NODE }, \ } - -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ /* * Names for IP sysctl objects @@ -741,7 +762,7 @@ struct in_pktinfo { #define IPCTL_GIF_TTL 16 /* default TTL for gif encap packet */ #define IPCTL_MAXID 17 -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #define IPCTL_NAMES { \ { 0, 0 }, \ @@ -762,35 +783,51 @@ struct in_pktinfo { { "keepfaith", CTLTYPE_INT }, \ { "gifttl", CTLTYPE_INT }, \ } -#endif /* KERNEL_PRIVATE */ - +#endif /* BSD_KERNEL_PRIVATE */ #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ - /* INET6 stuff */ #define __KAME_NETINET_IN_H_INCLUDED_ #include #undef __KAME_NETINET_IN_H_INCLUDED_ #ifdef KERNEL -#ifdef KERNEL_PRIVATE -struct ifnet; struct mbuf; /* forward declarations for Standard C */ +#ifdef BSD_KERNEL_PRIVATE +#include -extern int in_broadcast(struct in_addr, struct ifnet *); -extern int in_canforward(struct in_addr); +struct ip; +struct ifnet; +struct mbuf; -#define in_cksum(m, l) inet_cksum(m, 0, 0, l) -#define in_cksum_skip(m, l, o) inet_cksum(m, 0, o, (l) - (o)) - -extern u_int16_t inet_cksum(struct mbuf *m, unsigned int proto, - unsigned int offset, unsigned int transport_len); -extern u_short in_addword(u_short, u_short); -extern u_short in_pseudo(u_int, u_int, u_int); - -extern int in_localaddr(struct in_addr); +extern boolean_t in_broadcast(struct in_addr, struct ifnet *); +extern boolean_t in_canforward(struct in_addr); extern u_int32_t in_netof(struct in_addr); -extern int inaddr_local(struct in_addr); +extern uint16_t inet_cksum(struct mbuf *, uint32_t, uint32_t, uint32_t); +extern uint16_t in_addword(uint16_t, uint16_t); +extern uint16_t in_pseudo(uint32_t, uint32_t, uint32_t); +extern uint16_t in_pseudo64(uint64_t, uint64_t, uint64_t); +extern uint16_t in_cksum_hdr_opt(const struct ip *); +extern uint16_t ip_cksum_hdr_dir(struct mbuf *, uint32_t, int); +extern uint32_t in_finalize_cksum(struct mbuf *, uint32_t, uint32_t); +extern uint16_t b_sum16(const void *buf, int len); + +#define in_cksum(_m, _l) \ + inet_cksum(_m, 0, 0, _l) +#define ip_cksum_hdr_in(_m, _l) \ + ip_cksum_hdr_dir(_m, _l, 0) +#define ip_cksum_hdr_out(_m, _l) \ + ip_cksum_hdr_dir(_m, _l, 1) + +#define in_cksum_hdr(_ip) \ + (~b_sum16(_ip, sizeof (struct ip)) & 0xffff) + +#define in_cksum_offset(_m, _o) \ + ((void) in_finalize_cksum(_m, _o, CSUM_DELAY_IP)) +#define in_delayed_cksum(_m) \ + ((void) in_finalize_cksum(_m, 0, CSUM_DELAY_DATA)) +#define in_delayed_cksum_offset(_m, _o) \ + ((void) in_finalize_cksum(_m, _o, CSUM_DELAY_DATA)) #define in_hosteq(s, t) ((s).s_addr == (t).s_addr) #define in_nullhost(x) ((x).s_addr == INADDR_ANY) @@ -799,14 +836,27 @@ extern int inaddr_local(struct in_addr); #define SIN(s) ((struct sockaddr_in *)(void *)s) #define satosin(sa) SIN(sa) #define sintosa(sin) ((struct sockaddr *)(void *)(sin)) +#define SINIFSCOPE(s) ((struct sockaddr_inifscope *)(void *)(s)) +#endif /* BSD_KERNEL_PRIVATE */ + +#ifdef KERNEL_PRIVATE +/* exported for ApplicationFirewall */ +extern int in_localaddr(struct in_addr); +extern int inaddr_local(struct in_addr); #endif /* KERNEL_PRIVATE */ + #define MAX_IPv4_STR_LEN 16 #define MAX_IPv6_STR_LEN 64 -extern const char *inet_ntop(int, const void *, char *, socklen_t); /* in libkern */ +extern int inet_aton(const char *, struct in_addr *); /* in libkern */ +extern char *inet_ntoa(struct in_addr); /* in libkern */ +extern char *inet_ntoa_r(struct in_addr ina, char *buf, + size_t buflen); /* in libkern */ +extern const char *inet_ntop(int, const void *, char *, socklen_t); /* in libkern*/ +extern int inet_pton(int af, const char *, void *); /* in libkern */ #endif /* KERNEL */ -#ifndef KERNEL +#ifndef KERNEL #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) __BEGIN_DECLS int bindresvport(int, struct sockaddr_in *); @@ -814,6 +864,5 @@ struct sockaddr; int bindresvport_sa(int, struct sockaddr *); __END_DECLS #endif -#endif - +#endif /* !KERNEL */ #endif /* _NETINET_IN_H_ */ diff --git a/bsd/netinet/in_arp.c b/bsd/netinet/in_arp.c index 7dd09e904..a18147ce2 100644 --- a/bsd/netinet/in_arp.c +++ b/bsd/netinet/in_arp.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2004-2012 Apple Inc. All rights reserved. + * Copyright (c) 2004-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -79,64 +79,15 @@ #include #include #include + #include #include #include -#define CONST_LLADDR(s) ((const u_char*)((s)->sdl_data + (s)->sdl_nlen)) -#define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0) +#define CONST_LLADDR(s) ((const u_char*)((s)->sdl_data + (s)->sdl_nlen)) static const size_t MAX_HW_LEN = 10; -SYSCTL_DECL(_net_link_ether); -SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW|CTLFLAG_LOCKED, 0, ""); - -/* timer values */ -static int arpt_prune = (5*60*1); /* walk list every 5 minutes */ -static int arpt_keep = (20*60); /* once resolved, good for 20 more minutes */ -static int arpt_down = 20; /* once declared down, don't send for 20 sec */ - -/* Apple Hardware SUM16 checksuming */ -int apple_hwcksum_tx = 1; -int apple_hwcksum_rx = 1; - -static int arp_llreach_base = (LL_BASE_REACHABLE / 1000); /* seconds */ - -SYSCTL_INT(_net_link_ether_inet, OID_AUTO, prune_intvl, - CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_prune, 0, ""); - -SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age, - CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_keep, 0, ""); - -SYSCTL_INT(_net_link_ether_inet, OID_AUTO, host_down_time, - CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_down, 0, ""); - -SYSCTL_INT(_net_link_ether_inet, OID_AUTO, apple_hwcksum_tx, - CTLFLAG_RW | CTLFLAG_LOCKED, &apple_hwcksum_tx, 0, ""); - -SYSCTL_INT(_net_link_ether_inet, OID_AUTO, apple_hwcksum_rx, - CTLFLAG_RW | CTLFLAG_LOCKED, &apple_hwcksum_rx, 0, ""); - -SYSCTL_INT(_net_link_ether_inet, OID_AUTO, arp_llreach_base, - CTLFLAG_RW | CTLFLAG_LOCKED, &arp_llreach_base, LL_BASE_REACHABLE, - "default ARP link-layer reachability max lifetime (in seconds)"); - -struct llinfo_arp { - /* - * The following are protected by rnh_lock - */ - LIST_ENTRY(llinfo_arp) la_le; - struct rtentry *la_rt; - /* - * The following are protected by rt_lock - */ - struct mbuf *la_hold; /* last packet until resolved/timeout */ - struct if_llreach *la_llreach; /* link-layer reachability record */ - u_int64_t la_lastused; /* last used timestamp */ - u_int32_t la_asked; /* # of requests sent */ - u_int32_t la_persist; /* expirable, but stays around */ -}; - /* * Synchronization notes: * @@ -162,26 +113,91 @@ struct llinfo_arp { * it is simply removed from the global list but the memory is not * freed until the route itself is freed. */ +struct llinfo_arp { + /* + * The following are protected by rnh_lock + */ + LIST_ENTRY(llinfo_arp) la_le; + struct rtentry *la_rt; + /* + * The following are protected by rt_lock + */ + struct mbuf *la_hold; /* last packet until resolved/timeout */ + struct if_llreach *la_llreach; /* link-layer reachability record */ + u_int64_t la_lastused; /* last used timestamp */ + u_int32_t la_asked; /* # of requests sent */ + u_int32_t la_maxtries; /* retry limit */ +}; static LIST_HEAD(, llinfo_arp) llinfo_arp; -static int arp_inuse, arp_allocated; +static int arp_timeout_run; /* arp_timeout is scheduled to run */ +static void arp_timeout(void *); +static void arp_sched_timeout(struct timeval *); + +static void arptfree(struct llinfo_arp *, void *); +static errno_t arp_lookup_route(const struct in_addr *, int, + int, route_t *, unsigned int); +static int arp_getstat SYSCTL_HANDLER_ARGS; + +static struct llinfo_arp *arp_llinfo_alloc(int); +static void arp_llinfo_free(void *); +static void arp_llinfo_purge(struct rtentry *); +static void arp_llinfo_get_ri(struct rtentry *, struct rt_reach_info *); +static void arp_llinfo_get_iflri(struct rtentry *, struct ifnet_llreach_info *); + +static __inline void arp_llreach_use(struct llinfo_arp *); +static __inline int arp_llreach_reachable(struct llinfo_arp *); +static void arp_llreach_alloc(struct rtentry *, struct ifnet *, void *, + unsigned int, boolean_t); + +extern int tvtohz(struct timeval *); + +static int arpinit_done; + +SYSCTL_DECL(_net_link_ether); +SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW|CTLFLAG_LOCKED, 0, ""); + +/* timer values */ +static int arpt_prune = (5*60*1); /* walk list every 5 minutes */ +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, prune_intvl, + CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_prune, 0, ""); + +static int arpt_keep = (20*60); /* once resolved, good for 20 more minutes */ +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age, + CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_keep, 0, ""); + +static int arpt_down = 20; /* once declared down, don't send for 20 sec */ +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, host_down_time, + CTLFLAG_RW | CTLFLAG_LOCKED, &arpt_down, 0, ""); + +static int arp_llreach_base = (LL_BASE_REACHABLE / 1000); /* seconds */ +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, arp_llreach_base, + CTLFLAG_RW | CTLFLAG_LOCKED, &arp_llreach_base, LL_BASE_REACHABLE, + "default ARP link-layer reachability max lifetime (in seconds)"); + +#define ARP_UNICAST_LIMIT 5 /* # of probes until ARP refresh broadcast */ +static u_int32_t arp_unicast_lim = ARP_UNICAST_LIMIT; +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, arp_unicast_lim, + CTLFLAG_RW | CTLFLAG_LOCKED, &arp_unicast_lim, ARP_UNICAST_LIMIT, + "number of unicast ARP refresh probes before using broadcast"); static u_int32_t arp_maxtries = 5; -static int useloopback = 1; /* use loopback interface for local traffic */ -static int arp_proxyall = 0; -static int arp_sendllconflict = 0; - -SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_RW | CTLFLAG_LOCKED, - &arp_maxtries, 0, ""); -SYSCTL_INT(_net_link_ether_inet, OID_AUTO, useloopback, CTLFLAG_RW | CTLFLAG_LOCKED, - &useloopback, 0, ""); -SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW | CTLFLAG_LOCKED, - &arp_proxyall, 0, ""); -SYSCTL_INT(_net_link_ether_inet, OID_AUTO, sendllconflict, CTLFLAG_RW | CTLFLAG_LOCKED, - &arp_sendllconflict, 0, ""); +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries, + CTLFLAG_RW | CTLFLAG_LOCKED, &arp_maxtries, 0, ""); -static int log_arp_warnings = 0; /* Thread safe: no accumulated state */ +static int useloopback = 1; /* use loopback interface for local traffic */ +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, useloopback, + CTLFLAG_RW | CTLFLAG_LOCKED, &useloopback, 0, ""); + +static int arp_proxyall = 0; +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, + CTLFLAG_RW | CTLFLAG_LOCKED, &arp_proxyall, 0, ""); + +static int arp_sendllconflict = 0; +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, sendllconflict, + CTLFLAG_RW | CTLFLAG_LOCKED, &arp_sendllconflict, 0, ""); +static int log_arp_warnings = 0; /* Thread safe: no accumulated state */ SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_warnings, CTLFLAG_RW | CTLFLAG_LOCKED, &log_arp_warnings, 0, @@ -199,23 +215,21 @@ SYSCTL_INT(_net_link_ether_inet, OID_AUTO, send_conflicting_probes, &send_conflicting_probes, 0, "send conflicting link-local arp probes"); -static errno_t arp_lookup_route(const struct in_addr *, int, - int, route_t *, unsigned int); -static void arptimer(void *); -static struct llinfo_arp *arp_llinfo_alloc(void); -static void arp_llinfo_free(void *); -static void arp_llinfo_purge(struct rtentry *); -static void arp_llinfo_get_ri(struct rtentry *, struct rt_reach_info *); -static void arp_llinfo_get_iflri(struct rtentry *, struct ifnet_llreach_info *); +static int arp_verbose; +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, verbose, + CTLFLAG_RW | CTLFLAG_LOCKED, &arp_verbose, 0, ""); -static __inline void arp_llreach_use(struct llinfo_arp *); -static __inline int arp_llreach_reachable(struct llinfo_arp *); -static void arp_llreach_alloc(struct rtentry *, struct ifnet *, void *, - unsigned int, boolean_t); +struct arpstat arpstat; +SYSCTL_PROC(_net_link_ether_inet, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, arp_getstat, "S,arpstat", + "ARP statistics (struct arpstat, net/if_arp.h)"); -extern u_int32_t ipv4_ll_arp_aware; +/* these are deprecated (read-only); use net.link.generic.system node instead */ +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, apple_hwcksum_tx, + CTLFLAG_RD | CTLFLAG_LOCKED, &hwcksum_tx, 0, ""); -static int arpinit_done; +SYSCTL_INT(_net_link_ether_inet, OID_AUTO, apple_hwcksum_rx, + CTLFLAG_RD | CTLFLAG_LOCKED, &hwcksum_rx, 0, ""); static struct zone *llinfo_arp_zone; #define LLINFO_ARP_ZONE_MAX 256 /* maximum elements in zone */ @@ -224,10 +238,7 @@ static struct zone *llinfo_arp_zone; void arp_init(void) { - if (arpinit_done) { - log(LOG_NOTICE, "arp_init called more than once (ignored)\n"); - return; - } + VERIFY(!arpinit_done); LIST_INIT(&llinfo_arp); @@ -241,15 +252,19 @@ arp_init(void) zone_change(llinfo_arp_zone, Z_CALLERACCT, FALSE); arpinit_done = 1; - - /* start timer */ - timeout(arptimer, (caddr_t)0, hz); } static struct llinfo_arp * -arp_llinfo_alloc(void) +arp_llinfo_alloc(int how) { - return (zalloc(llinfo_arp_zone)); + struct llinfo_arp *la; + + la = (how == M_WAITOK) ? zalloc(llinfo_arp_zone) : + zalloc_noblock(llinfo_arp_zone); + if (la != NULL) + bzero(la, sizeof (*la)); + + return (la); } static void @@ -266,6 +281,7 @@ arp_llinfo_free(void *arg) if (la->la_hold != NULL) { m_freem(la->la_hold); la->la_hold = NULL; + arpstat.purged++; } /* Purge any link-layer info caching */ @@ -400,16 +416,16 @@ arp_llreach_reachable(struct llinfo_arp *la) why = "haven't heard from it in a while"; } - if (log_arp_warnings) { + if (arp_verbose > 1) { char tmp[MAX_IPv4_STR_LEN]; u_int64_t now = net_uptime(); - log(LOG_DEBUG, "%s%d: ARP probe(s) needed for %s; " + log(LOG_DEBUG, "%s: ARP probe(s) needed for %s; " "%s [lastused %lld, lastrcvd %lld] secs ago\n", - lr->lr_ifp->if_name, lr->lr_ifp->if_unit, inet_ntop(AF_INET, + if_name(lr->lr_ifp), inet_ntop(AF_INET, &SIN(rt_key(la->la_rt))->sin_addr, tmp, sizeof (tmp)), why, - (la->la_lastused ? (int64_t)(now - la->la_lastused) : -1), - (lr->lr_lastrcvd ? (int64_t)(now - lr->lr_lastrcvd) : -1)); + (la->la_lastused ? (int64_t)(now - la->la_lastused) : -1), + (lr->lr_lastrcvd ? (int64_t)(now - lr->lr_lastrcvd) : -1)); } return (0); @@ -426,8 +442,9 @@ arp_llreach_alloc(struct rtentry *rt, struct ifnet *ifp, void *addr, { VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0); VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0); - if (arp_llreach_base != 0 && - rt->rt_expire != 0 && rt->rt_ifp != lo_ifp && + + if (arp_llreach_base != 0 && rt->rt_expire != 0 && + !(rt->rt_ifp->if_flags & IFF_LOOPBACK) && ifp->if_addrlen == IF_LLREACH_MAXLEN && /* Ethernet */ alen == ifp->if_addrlen) { struct llinfo_arp *la = rt->rt_llinfo; @@ -472,147 +489,214 @@ arp_llreach_alloc(struct rtentry *rt, struct ifnet *ifp, void *addr, } } - if (log_arp_warnings && lr != NULL && why != NULL) { + /* Bump up retry ceiling to accomodate unicast retries */ + if (lr != NULL) + la->la_maxtries = arp_maxtries + arp_unicast_lim; + + if (arp_verbose > 1 && lr != NULL && why != NULL) { char tmp[MAX_IPv4_STR_LEN]; - log(LOG_DEBUG, "%s%d: %s%s for %s\n", ifp->if_name, - ifp->if_unit, type, why, inet_ntop(AF_INET, + log(LOG_DEBUG, "%s: %s%s for %s\n", if_name(ifp), + type, why, inet_ntop(AF_INET, &SIN(rt_key(rt))->sin_addr, tmp, sizeof (tmp))); } } } +struct arptf_arg { + int draining; + uint32_t killed; + uint32_t aging; + uint32_t sticky; + uint32_t found; +}; + /* * Free an arp entry. */ static void -arptfree(struct llinfo_arp *la) +arptfree(struct llinfo_arp *la, void *arg) { + struct arptf_arg *ap = arg; struct rtentry *rt = la->la_rt; - struct sockaddr_dl *sdl; lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); - RT_LOCK_ASSERT_HELD(rt); - if (rt->rt_refcnt > 0 && (sdl = SDL(rt->rt_gateway)) && - sdl->sdl_family == AF_LINK) { - sdl->sdl_alen = 0; - la->la_asked = 0; - rt->rt_flags &= ~RTF_REJECT; + /* rnh_lock acquired by caller protects rt from going away */ + RT_LOCK(rt); + + VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0); + VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0); + + ap->found++; + if (rt->rt_expire == 0 || (rt->rt_flags & RTF_STATIC)) { + ap->sticky++; + /* ARP entry is permanent? */ + if (!(rt->rt_flags & RTF_STATIC)) { + RT_UNLOCK(rt); + return; + } + } + + /* ARP entry hasn't expired and we're not draining? */ + if (!ap->draining && rt->rt_expire > net_uptime()) { RT_UNLOCK(rt); - } else if (la->la_persist) { + ap->aging++; + return; + } + + if (rt->rt_refcnt > 0) { /* - * Instead of issuing RTM_DELETE, stop this route entry - * from holding an interface idle reference count; if - * the route is later reused, arp_validate() will revert - * this action. + * ARP entry has expired, with outstanding refcnt. + * If we're not draining, force ARP query to be + * generated next time this entry is used. */ - if (rt->rt_refcnt == 0) - rt_clear_idleref(rt); + if (!ap->draining) { + struct sockaddr_dl *sdl = SDL(rt->rt_gateway); + if (sdl != NULL) + sdl->sdl_alen = 0; + la->la_asked = 0; + rt->rt_flags &= ~RTF_REJECT; + } RT_UNLOCK(rt); - } else { + } else if (!(rt->rt_flags & RTF_STATIC)) { /* - * Safe to drop rt_lock and use rt_key, since holding + * ARP entry has no outstanding refcnt, and we're either + * draining or it has expired; delete it from the routing + * table. Safe to drop rt_lock and use rt_key, since holding * rnh_lock here prevents another thread from calling * rt_setgate() on this route. */ RT_UNLOCK(rt); - rtrequest_locked(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), - 0, NULL); + rtrequest_locked(RTM_DELETE, rt_key(rt), NULL, + rt_mask(rt), 0, NULL); + arpstat.timeouts++; + ap->killed++; + } else { + /* ARP entry is static; let it linger */ + RT_UNLOCK(rt); } } void -in_arpdrain(void *ignored_arg) +in_arpdrain(void *arg) { -#pragma unused (ignored_arg) +#pragma unused(arg) struct llinfo_arp *la, *ola; - uint64_t timenow; + struct arptf_arg farg; + + if (arp_verbose) + log(LOG_DEBUG, "%s: draining ARP entries\n", __func__); lck_mtx_lock(rnh_lock); la = llinfo_arp.lh_first; - timenow = net_uptime(); - while ((ola = la) != 0) { - struct rtentry *rt = la->la_rt; + bzero(&farg, sizeof (farg)); + farg.draining = 1; + while ((ola = la) != NULL) { la = la->la_le.le_next; - RT_LOCK(rt); - VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0); - VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0); - if (rt->rt_expire && rt->rt_expire <= timenow) - arptfree(ola); /* timer has expired, clear */ - else - RT_UNLOCK(rt); + arptfree(ola, &farg); + } + if (arp_verbose) { + log(LOG_DEBUG, "%s: found %u, aging %u, sticky %u, killed %u\n", + __func__, farg.found, farg.aging, farg.sticky, farg.killed); } lck_mtx_unlock(rnh_lock); } -void -arp_validate(struct rtentry *rt) +/* + * Timeout routine. Age arp_tab entries periodically. + */ +static void +arp_timeout(void *arg) { - struct llinfo_arp *la = rt->rt_llinfo; +#pragma unused(arg) + struct llinfo_arp *la, *ola; + struct timeval atv; + struct arptf_arg farg; - RT_LOCK_ASSERT_HELD(rt); - /* - * If this is a persistent ARP entry, make it count towards the - * interface idleness just like before arptfree() was called. - */ - if (la->la_persist) - rt_set_idleref(rt); + lck_mtx_lock(rnh_lock); + la = llinfo_arp.lh_first; + bzero(&farg, sizeof (farg)); + while ((ola = la) != NULL) { + la = la->la_le.le_next; + arptfree(ola, &farg); + } + if (arp_verbose) { + log(LOG_DEBUG, "%s: found %u, aging %u, sticky %u, killed %u\n", + __func__, farg.found, farg.aging, farg.sticky, farg.killed); + } + atv.tv_usec = 0; + atv.tv_sec = arpt_prune; + /* re-arm the timer if there's work to do */ + arp_timeout_run = 0; + if (farg.aging > 0) + arp_sched_timeout(&atv); + else if (arp_verbose) + log(LOG_DEBUG, "%s: not rescheduling timer\n", __func__); + lck_mtx_unlock(rnh_lock); } -/* - * Timeout routine. Age arp_tab entries periodically. - */ -/* ARGSUSED */ static void -arptimer(void *ignored_arg) +arp_sched_timeout(struct timeval *atv) { -#pragma unused (ignored_arg) - in_arpdrain(NULL); - timeout(arptimer, (caddr_t)0, arpt_prune * hz); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + + if (!arp_timeout_run) { + struct timeval tv; + + if (atv == NULL) { + tv.tv_usec = 0; + tv.tv_sec = MAX(arpt_prune / 5, 1); + atv = &tv; + } + if (arp_verbose) { + log(LOG_DEBUG, "%s: timer scheduled in " + "T+%llus.%lluu\n", __func__, + (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec); + } + arp_timeout_run = 1; + timeout(arp_timeout, NULL, tvtohz(atv)); + } } /* - * Parallel to llc_rtrequest. + * ifa_rtrequest() callback */ static void -arp_rtrequest( - int req, - struct rtentry *rt, - __unused struct sockaddr *sa) +arp_rtrequest(int req, struct rtentry *rt, struct sockaddr *sa) { +#pragma unused(sa) struct sockaddr *gate = rt->rt_gateway; struct llinfo_arp *la = rt->rt_llinfo; - static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK, 0, 0, 0, 0, 0, {0}}; + static struct sockaddr_dl null_sdl = + { .sdl_len = sizeof (null_sdl), .sdl_family = AF_LINK }; uint64_t timenow; + char buf[MAX_IPv4_STR_LEN]; - if (!arpinit_done) { - panic("%s: ARP has not been initialized", __func__); - /* NOTREACHED */ - } + VERIFY(arpinit_done); lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); RT_LOCK_ASSERT_HELD(rt); if (rt->rt_flags & RTF_GATEWAY) return; + timenow = net_uptime(); switch (req) { - case RTM_ADD: /* * XXX: If this is a manually added route to interface * such as older version of routed or gated might provide, * restore cloning bit. */ - if ((rt->rt_flags & RTF_HOST) == 0 && rt_mask(rt) != NULL && - SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff) + if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != NULL && + SIN(rt_mask(rt))->sin_addr.s_addr != INADDR_BROADCAST) rt->rt_flags |= RTF_CLONING; + if (rt->rt_flags & RTF_CLONING) { /* * Case 1: This route should come from a route to iface. */ - if (rt_setgate(rt, rt_key(rt), - (struct sockaddr *)&null_sdl) == 0) { + if (rt_setgate(rt, rt_key(rt), SA(&null_sdl)) == 0) { gate = rt->rt_gateway; SDL(gate)->sdl_type = rt->rt_ifp->if_type; SDL(gate)->sdl_index = rt->rt_ifp->if_index; @@ -632,39 +716,47 @@ arp_rtrequest( dlil_send_arp(rt->rt_ifp, ARPOP_REQUEST, SDL(gate), rt_key(rt), NULL, rt_key(rt), 0); RT_LOCK(rt); + arpstat.txannounces++; } - /*FALLTHROUGH*/ + /* FALLTHRU */ case RTM_RESOLVE: if (gate->sa_family != AF_LINK || - gate->sa_len < sizeof(null_sdl)) { - if (log_arp_warnings) - log(LOG_DEBUG, "arp_rtrequest: bad gateway value\n"); + gate->sa_len < sizeof (null_sdl)) { + arpstat.invalidreqs++; + log(LOG_ERR, "%s: route to %s has bad gateway address " + "(sa_family %u sa_len %u) on %s\n", + __func__, inet_ntop(AF_INET, + &SIN(rt_key(rt))->sin_addr.s_addr, buf, + sizeof (buf)), gate->sa_family, gate->sa_len, + if_name(rt->rt_ifp)); break; } SDL(gate)->sdl_type = rt->rt_ifp->if_type; SDL(gate)->sdl_index = rt->rt_ifp->if_index; - if (la != 0) + + if (la != NULL) break; /* This happens on a route change */ + /* * Case 2: This route may come from cloning, or a manual route * add with a LL address. */ - rt->rt_llinfo = la = arp_llinfo_alloc(); + rt->rt_llinfo = la = arp_llinfo_alloc(M_WAITOK); if (la == NULL) { - if (log_arp_warnings) - log(LOG_DEBUG, "%s: malloc failed\n", __func__); + arpstat.reqnobufs++; break; } - rt->rt_llinfo_get_ri = arp_llinfo_get_ri; - rt->rt_llinfo_get_iflri = arp_llinfo_get_iflri; - rt->rt_llinfo_purge = arp_llinfo_purge; - rt->rt_llinfo_free = arp_llinfo_free; - - arp_inuse++, arp_allocated++; - Bzero(la, sizeof(*la)); - la->la_rt = rt; + rt->rt_llinfo_get_ri = arp_llinfo_get_ri; + rt->rt_llinfo_get_iflri = arp_llinfo_get_iflri; + rt->rt_llinfo_purge = arp_llinfo_purge; + rt->rt_llinfo_free = arp_llinfo_free; rt->rt_flags |= RTF_LLINFO; + la->la_rt = rt; LIST_INSERT_HEAD(&llinfo_arp, la, la_le); + arpstat.inuse++; + + /* We have at least one entry; arm the timer if not already */ + arp_sched_timeout(NULL); /* * This keeps the multicast addresses from showing up @@ -676,33 +768,29 @@ arp_rtrequest( if (IN_MULTICAST(ntohl(SIN(rt_key(rt))->sin_addr.s_addr))) { RT_UNLOCK(rt); dlil_resolve_multi(rt->rt_ifp, rt_key(rt), gate, - sizeof(struct sockaddr_dl)); + sizeof (struct sockaddr_dl)); RT_LOCK(rt); rt_setexpire(rt, 0); - } - else if (in_broadcast(SIN(rt_key(rt))->sin_addr, rt->rt_ifp)) { - struct sockaddr_dl *gate_ll = SDL(gate); - size_t broadcast_len; + } else if (in_broadcast(SIN(rt_key(rt))->sin_addr, + rt->rt_ifp)) { + struct sockaddr_dl *gate_ll = SDL(gate); + size_t broadcast_len; ifnet_llbroadcast_copy_bytes(rt->rt_ifp, - LLADDR(gate_ll), sizeof(gate_ll->sdl_data), + LLADDR(gate_ll), sizeof (gate_ll->sdl_data), &broadcast_len); gate_ll->sdl_alen = broadcast_len; gate_ll->sdl_family = AF_LINK; - gate_ll->sdl_len = sizeof(struct sockaddr_dl); + gate_ll->sdl_len = sizeof (struct sockaddr_dl); /* In case we're called before 1.0 sec. has elapsed */ rt_setexpire(rt, MAX(timenow, 1)); - } else if (IN_LINKLOCAL(ntohl(SIN(rt_key(rt))->sin_addr.s_addr))) { - /* - * The persistent bit implies that once the ARP - * entry has reached it expiration time, the idle - * reference count to the interface will be released, - * but the ARP entry itself stays in the routing table - * until it is explicitly removed. - */ - la->la_persist = 1; + } else if (IN_LINKLOCAL(ntohl(SIN(rt_key(rt))-> + sin_addr.s_addr))) { rt->rt_flags |= RTF_STATIC; } + /* Set default maximum number of retries */ + la->la_maxtries = arp_maxtries; + /* Become a regular mutex, just in case */ RT_CONVERT_LOCK(rt); IFA_LOCK_SPIN(rt->rt_ifa); @@ -741,6 +829,12 @@ arp_rtrequest( } } rt->rt_ifp = lo_ifp; + /* + * If rmx_mtu is not locked, update it + * to the MTU used by the new interface. + */ + if (!(rt->rt_rmx.rmx_locks & RTV_MTU)) + rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; } } else { IFA_UNLOCK(rt->rt_ifa); @@ -748,9 +842,8 @@ arp_rtrequest( break; case RTM_DELETE: - if (la == 0) + if (la == NULL) break; - arp_inuse--; /* * Unchain it but defer the actual freeing until the route * itself is to be freed. rt->rt_llinfo still points to @@ -760,6 +853,7 @@ arp_rtrequest( LIST_REMOVE(la, la_le); la->la_le.le_next = NULL; la->la_le.le_prev = NULL; + arpstat.inuse--; /* * Purge any link-layer info caching. @@ -771,6 +865,7 @@ arp_rtrequest( if (la->la_hold != NULL) { m_freem(la->la_hold); la->la_hold = NULL; + arpstat.purged++; } } } @@ -779,16 +874,16 @@ arp_rtrequest( * convert hardware address to hex string for logging errors. */ static const char * -sdl_addr_to_hex(const struct sockaddr_dl *sdl, char * orig_buf, int buflen) +sdl_addr_to_hex(const struct sockaddr_dl *sdl, char *orig_buf, int buflen) { - char * buf = orig_buf; - int i; - const u_char * lladdr = (u_char *)(size_t)sdl->sdl_data; - int maxbytes = buflen / 3; - + char *buf = orig_buf; + int i; + const u_char *lladdr = (u_char *)(size_t)sdl->sdl_data; + int maxbytes = buflen / 3; + if (maxbytes > sdl->sdl_alen) { maxbytes = sdl->sdl_alen; - } + } *buf = '\0'; for (i = 0; i < maxbytes; i++) { snprintf(buf, 3, "%02x", lladdr[i]); @@ -810,7 +905,8 @@ static errno_t arp_lookup_route(const struct in_addr *addr, int create, int proxy, route_t *route, unsigned int ifscope) { - struct sockaddr_inarp sin = {sizeof(sin), AF_INET, 0, {0}, {0}, 0, 0}; + struct sockaddr_inarp sin = + { sizeof (sin), AF_INET, 0, { 0 }, { 0 }, 0, 0 }; const char *why = NULL; errno_t error = 0; route_t rt; @@ -827,7 +923,7 @@ arp_lookup_route(const struct in_addr *addr, int create, int proxy, if (IN_LINKLOCAL(ntohl(addr->s_addr))) ifscope = IFSCOPE_NONE; - rt = rtalloc1_scoped((struct sockaddr*)&sin, create, 0, ifscope); + rt = rtalloc1_scoped((struct sockaddr *)&sin, create, 0, ifscope); if (rt == NULL) return (ENETUNREACH); @@ -845,10 +941,10 @@ arp_lookup_route(const struct in_addr *addr, int create, int proxy, } if (error != 0) { - if (create && log_arp_warnings) { + if (create && (arp_verbose || log_arp_warnings)) { char tmp[MAX_IPv4_STR_LEN]; - log(LOG_DEBUG, "arplookup link#%d %s failed: %s\n", - ifscope, inet_ntop(AF_INET, addr, tmp, + log(LOG_DEBUG, "%s: link#%d %s failed: %s\n", + __func__, ifscope, inet_ntop(AF_INET, addr, tmp, sizeof (tmp)), why); } @@ -869,7 +965,7 @@ arp_lookup_route(const struct in_addr *addr, int create, int proxy, rt->rt_flags |= RTF_CONDEMNED; RT_UNLOCK(rt); rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, - rt_mask(rt), rt->rt_flags, 0); + rt_mask(rt), rt->rt_flags, NULL); rtfree(rt); } else { RT_REMREF_LOCKED(rt); @@ -898,8 +994,8 @@ arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest, { route_t route = NULL; /* output route */ errno_t result = 0; - struct sockaddr_dl *gateway; - struct llinfo_arp *llinfo = NULL; + struct sockaddr_dl *gateway; + struct llinfo_arp *llinfo = NULL; uint64_t timenow; int unreachable = 0; @@ -926,7 +1022,7 @@ arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest, } if (packet->m_flags & M_BCAST) { - size_t broadcast_len; + size_t broadcast_len; bzero(ll_dest, ll_dest_len); result = ifnet_llbroadcast_copy_bytes(ifp, LLADDR(ll_dest), ll_dest_len - offsetof(struct sockaddr_dl, sdl_data), @@ -934,7 +1030,7 @@ arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest, if (result == 0) { ll_dest->sdl_alen = broadcast_len; ll_dest->sdl_family = AF_LINK; - ll_dest->sdl_len = sizeof(struct sockaddr_dl); + ll_dest->sdl_len = sizeof (struct sockaddr_dl); } goto release; } @@ -942,8 +1038,8 @@ arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest, if (route != NULL) RT_UNLOCK(route); result = dlil_resolve_multi(ifp, - (const struct sockaddr*)net_dest, - (struct sockaddr*)ll_dest, ll_dest_len); + (const struct sockaddr *)net_dest, + (struct sockaddr *)ll_dest, ll_dest_len); if (route != NULL) RT_LOCK(route); goto release; @@ -976,17 +1072,16 @@ arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest, } if (result || route == NULL || (llinfo = route->rt_llinfo) == NULL) { - char tmp[MAX_IPv4_STR_LEN]; - /* In case result is 0 but no route, return an error */ if (result == 0) result = EHOSTUNREACH; - if (log_arp_warnings && - route != NULL && route->rt_llinfo == NULL) - log(LOG_DEBUG, "arpresolve: can't allocate llinfo " - "for %s\n", inet_ntop(AF_INET, &net_dest->sin_addr, - tmp, sizeof(tmp))); + if (route != NULL && route->rt_llinfo == NULL) { + char tmp[MAX_IPv4_STR_LEN]; + log(LOG_ERR, "%s: can't allocate llinfo for %s\n", + __func__, inet_ntop(AF_INET, &net_dest->sin_addr, + tmp, sizeof (tmp))); + } goto release; } @@ -1021,47 +1116,60 @@ arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest, * Route wasn't complete/valid. We need to arp. */ if (packet != NULL) { - if (llinfo->la_hold != NULL) + if (llinfo->la_hold != NULL) { m_freem(llinfo->la_hold); + arpstat.dropped++; + } llinfo->la_hold = packet; } if (route->rt_expire) { route->rt_flags &= ~RTF_REJECT; - if (llinfo->la_asked == 0 || - route->rt_expire != timenow) { + if (llinfo->la_asked == 0 || route->rt_expire != timenow) { rt_setexpire(route, timenow); - if (llinfo->la_asked++ < arp_maxtries) { + if (llinfo->la_asked++ < llinfo->la_maxtries) { + struct if_llreach *lr = llinfo->la_llreach; struct ifaddr *rt_ifa = route->rt_ifa; + struct sockaddr_dl *hw_dest = NULL, sdl; struct sockaddr *sa; - u_int32_t rtflags; + u_int32_t rtflags, alen; /* Become a regular mutex, just in case */ RT_CONVERT_LOCK(route); /* Update probe count, if applicable */ - if (llinfo->la_llreach != NULL) { - IFLR_LOCK_SPIN(llinfo->la_llreach); - llinfo->la_llreach->lr_probes++; - IFLR_UNLOCK(llinfo->la_llreach); + if (lr != NULL) { + IFLR_LOCK_SPIN(lr); + lr->lr_probes++; + alen = ifp->if_addrlen; + /* Ethernet only for now */ + if (alen == IF_LLREACH_MAXLEN && + lr->lr_probes <= arp_unicast_lim) { + bzero(&sdl, sizeof (sdl)); + sdl.sdl_alen = alen; + bcopy(&lr->lr_key.addr, + LLADDR(&sdl), alen); + hw_dest = &sdl; + } + IFLR_UNLOCK(lr); } IFA_LOCK_SPIN(rt_ifa); IFA_ADDREF_LOCKED(rt_ifa); sa = rt_ifa->ifa_addr; IFA_UNLOCK(rt_ifa); - arp_llreach_use(llinfo); /* Mark use timestamp */ + arp_llreach_use(llinfo); /* Mark use tstamp */ rtflags = route->rt_flags; RT_UNLOCK(route); - dlil_send_arp(ifp, ARPOP_REQUEST, NULL, - sa, NULL, (const struct sockaddr*)net_dest, - rtflags); + dlil_send_arp(ifp, ARPOP_REQUEST, NULL, sa, + (const struct sockaddr_dl *)hw_dest, + (const struct sockaddr *)net_dest, rtflags); IFA_REMREF(rt_ifa); RT_LOCK(route); result = EJUSTRETURN; goto release; } else { route->rt_flags |= RTF_REJECT; - rt_setexpire(route, rt_expiry(route, - route->rt_expire, arpt_down)); + rt_setexpire(route, + route->rt_expire + arpt_down); llinfo->la_asked = 0; /* * Clear la_hold; don't free the packet since @@ -1079,6 +1187,9 @@ arp_lookup_ip(ifnet_t ifp, const struct sockaddr_in *net_dest, result = EJUSTRETURN; release: + if (result == EHOSTUNREACH) + arpstat.dropped++; + if (route != NULL) { if (route == hint) { RT_REMREF_LOCKED(route); @@ -1092,14 +1203,11 @@ release: } errno_t -arp_ip_handle_input( - ifnet_t ifp, - u_short arpop, - const struct sockaddr_dl *sender_hw, - const struct sockaddr_in *sender_ip, - const struct sockaddr_in *target_ip) +arp_ip_handle_input(ifnet_t ifp, u_short arpop, + const struct sockaddr_dl *sender_hw, const struct sockaddr_in *sender_ip, + const struct sockaddr_in *target_ip) { - char ipv4str[MAX_IPv4_STR_LEN]; + char ipv4str[MAX_IPv4_STR_LEN]; struct sockaddr_dl proxied; struct sockaddr_dl *gateway, *target_hw = NULL; struct ifaddr *ifa; @@ -1107,14 +1215,16 @@ arp_ip_handle_input( struct in_ifaddr *best_ia = NULL; struct sockaddr_in best_ia_sin; route_t route = NULL; - char buf[3 * MAX_HW_LEN]; // enough for MAX_HW_LEN byte hw address + char buf[3 * MAX_HW_LEN]; /* enough for MAX_HW_LEN byte hw address */ struct llinfo_arp *llinfo; errno_t error; int created_announcement = 0; int bridged = 0, is_bridge = 0; + arpstat.received++; + /* Do not respond to requests for 0.0.0.0 */ - if (target_ip->sin_addr.s_addr == 0 && arpop == ARPOP_REQUEST) + if (target_ip->sin_addr.s_addr == INADDR_ANY && arpop == ARPOP_REQUEST) goto done; if (ifp->if_bridge) @@ -1122,6 +1232,9 @@ arp_ip_handle_input( if (ifp->if_type == IFT_BRIDGE) is_bridge = 1; + if (arpop == ARPOP_REPLY) + arpstat.rxreplies++; + /* * Determine if this ARP is for us * For a bridge, we want to check the address irrespective @@ -1158,9 +1271,9 @@ arp_ip_handle_input( IFA_UNLOCK(&ia->ia_ifa); } -#define BDG_MEMBER_MATCHES_ARP(addr, ifp, ia) \ - (ia->ia_ifp->if_bridge == ifp->if_softc && \ - !bcmp(ifnet_lladdr(ia->ia_ifp), ifnet_lladdr(ifp), ifp->if_addrlen) && \ +#define BDG_MEMBER_MATCHES_ARP(addr, ifp, ia) \ + (ia->ia_ifp->if_bridge == ifp->if_softc && \ + bcmp(IF_LLADDR(ia->ia_ifp), IF_LLADDR(ifp), ifp->if_addrlen) == 0 && \ addr == ia->ia_addr.sin_addr.s_addr) /* * Check the case when bridge shares its MAC address with @@ -1185,6 +1298,7 @@ arp_ip_handle_input( IFA_UNLOCK(&ia->ia_ifa); } } +#undef BDG_MEMBER_MATCHES_ARP lck_rw_done(in_ifaddr_rwlock); /* @@ -1217,39 +1331,46 @@ arp_ip_handle_input( match: /* If the packet is from this interface, ignore the packet */ - if (!bcmp(CONST_LLADDR(sender_hw), ifnet_lladdr(ifp), sender_hw->sdl_alen)) { + if (bcmp(CONST_LLADDR(sender_hw), IF_LLADDR(ifp), + sender_hw->sdl_alen) == 0) goto done; - } /* Check for a conflict */ - if (!bridged && sender_ip->sin_addr.s_addr == best_ia_sin.sin_addr.s_addr) { - struct kev_msg ev_msg; + if (!bridged && + sender_ip->sin_addr.s_addr == best_ia_sin.sin_addr.s_addr) { + struct kev_msg ev_msg; struct kev_in_collision *in_collision; - u_char storage[sizeof(struct kev_in_collision) + MAX_HW_LEN]; - bzero(&ev_msg, sizeof(struct kev_msg)); - bzero(storage, (sizeof(struct kev_in_collision) + MAX_HW_LEN)); - in_collision = (struct kev_in_collision*)(void *)storage; - log(LOG_ERR, "%s%d duplicate IP address %s sent from address %s\n", - ifp->if_name, ifp->if_unit, - inet_ntop(AF_INET, &sender_ip->sin_addr, ipv4str, sizeof(ipv4str)), - sdl_addr_to_hex(sender_hw, buf, sizeof(buf))); + u_char storage[sizeof (struct kev_in_collision) + MAX_HW_LEN]; + + bzero(&ev_msg, sizeof (struct kev_msg)); + bzero(storage, (sizeof (struct kev_in_collision) + MAX_HW_LEN)); + in_collision = (struct kev_in_collision *)(void *)storage; + log(LOG_ERR, "%s duplicate IP address %s sent from " + "address %s\n", if_name(ifp), + inet_ntop(AF_INET, &sender_ip->sin_addr, ipv4str, + sizeof (ipv4str)), sdl_addr_to_hex(sender_hw, buf, + sizeof (buf))); /* Send a kernel event so anyone can learn of the conflict */ in_collision->link_data.if_family = ifp->if_family; in_collision->link_data.if_unit = ifp->if_unit; - strncpy(&in_collision->link_data.if_name[0], ifp->if_name, IFNAMSIZ); + strncpy(&in_collision->link_data.if_name[0], + ifp->if_name, IFNAMSIZ); in_collision->ia_ipaddr = sender_ip->sin_addr; - in_collision->hw_len = sender_hw->sdl_alen < MAX_HW_LEN ? sender_hw->sdl_alen : MAX_HW_LEN; - bcopy(CONST_LLADDR(sender_hw), (caddr_t)in_collision->hw_addr, in_collision->hw_len); + in_collision->hw_len = (sender_hw->sdl_alen < MAX_HW_LEN) ? + sender_hw->sdl_alen : MAX_HW_LEN; + bcopy(CONST_LLADDR(sender_hw), (caddr_t)in_collision->hw_addr, + in_collision->hw_len); ev_msg.vendor_code = KEV_VENDOR_APPLE; ev_msg.kev_class = KEV_NETWORK_CLASS; ev_msg.kev_subclass = KEV_INET_SUBCLASS; ev_msg.event_code = KEV_INET_ARPCOLLISION; ev_msg.dv[0].data_ptr = in_collision; - ev_msg.dv[0].data_length = sizeof(struct kev_in_collision) + in_collision->hw_len; + ev_msg.dv[0].data_length = + sizeof (struct kev_in_collision) + in_collision->hw_len; ev_msg.dv[1].data_length = 0; kev_post_msg(&ev_msg); - + arpstat.dupips++; goto respond; } @@ -1266,18 +1387,17 @@ match: if (error == 0) RT_LOCK_ASSERT_HELD(route); - if (error || route == 0 || route->rt_gateway == 0) { - if (arpop != ARPOP_REQUEST) { + if (error || route == NULL || route->rt_gateway == NULL) { + if (arpop != ARPOP_REQUEST) goto respond; - } - if (arp_sendllconflict - && send_conflicting_probes != 0 - && (ifp->if_eflags & IFEF_ARPLL) != 0 - && IN_LINKLOCAL(ntohl(target_ip->sin_addr.s_addr)) - && sender_ip->sin_addr.s_addr == 0) { + + if (arp_sendllconflict && send_conflicting_probes != 0 && + (ifp->if_eflags & IFEF_ARPLL) && + IN_LINKLOCAL(ntohl(target_ip->sin_addr.s_addr)) && + sender_ip->sin_addr.s_addr == INADDR_ANY) { /* - * Verify this ARP probe doesn't conflict with an IPv4LL we know of - * on another interface. + * Verify this ARP probe doesn't conflict with + * an IPv4LL we know of on another interface. */ if (route != NULL) { RT_REMREF_LOCKED(route); @@ -1291,67 +1411,76 @@ match: error = arp_lookup_route(&target_ip->sin_addr, 0, 0, &route, ifp->if_index); - if (error == 0) - RT_LOCK_ASSERT_HELD(route); + if (error != 0 || route == NULL || + route->rt_gateway == NULL) + goto respond; - if (error == 0 && route && route->rt_gateway) { - gateway = SDL(route->rt_gateway); - if (route->rt_ifp != ifp && gateway->sdl_alen != 0 - && (gateway->sdl_alen != sender_hw->sdl_alen - || bcmp(CONST_LLADDR(gateway), CONST_LLADDR(sender_hw), - gateway->sdl_alen) != 0)) { - /* - * A node is probing for an IPv4LL we know exists on a - * different interface. We respond with a conflicting probe - * to force the new device to pick a different IPv4LL - * address. - */ - if (log_arp_warnings) { - log(LOG_INFO, - "arp: %s on %s%d sent probe for %s, already on %s%d\n", - sdl_addr_to_hex(sender_hw, buf, sizeof(buf)), - ifp->if_name, ifp->if_unit, - inet_ntop(AF_INET, &target_ip->sin_addr, ipv4str, - sizeof(ipv4str)), - route->rt_ifp->if_name, route->rt_ifp->if_unit); - log(LOG_INFO, - "arp: sending conflicting probe to %s on %s%d\n", - sdl_addr_to_hex(sender_hw, buf, sizeof(buf)), - ifp->if_name, ifp->if_unit); - } - /* Mark use timestamp */ - if (route->rt_llinfo != NULL) - arp_llreach_use(route->rt_llinfo); - /* We're done with the route */ - RT_REMREF_LOCKED(route); - RT_UNLOCK(route); - route = NULL; - /* - * Send a conservative unicast "ARP probe". - * This should force the other device to pick a new number. - * This will not force the device to pick a new number if the device - * has already assigned that number. - * This will not imply to the device that we own that address. - * The link address is always present; it's never freed. - */ - ifnet_lock_shared(ifp); - ifa = ifp->if_lladdr; - IFA_ADDREF(ifa); - ifnet_lock_done(ifp); - dlil_send_arp_internal(ifp, ARPOP_REQUEST, - SDL(ifa->ifa_addr), - (const struct sockaddr*)sender_ip, sender_hw, - (const struct sockaddr*)target_ip); - IFA_REMREF(ifa); - ifa = NULL; + RT_LOCK_ASSERT_HELD(route); + + gateway = SDL(route->rt_gateway); + if (route->rt_ifp != ifp && gateway->sdl_alen != 0 && + (gateway->sdl_alen != sender_hw->sdl_alen || + bcmp(CONST_LLADDR(gateway), CONST_LLADDR(sender_hw), + gateway->sdl_alen) != 0)) { + /* + * A node is probing for an IPv4LL we know + * exists on a different interface. We respond + * with a conflicting probe to force the new + * device to pick a different IPv4LL address. + */ + if (arp_verbose || log_arp_warnings) { + log(LOG_INFO, "arp: %s on %s sent " + "probe for %s, already on %s\n", + sdl_addr_to_hex(sender_hw, buf, + sizeof (buf)), if_name(ifp), + inet_ntop(AF_INET, + &target_ip->sin_addr, ipv4str, + sizeof (ipv4str)), + if_name(route->rt_ifp)); + log(LOG_INFO, "arp: sending " + "conflicting probe to %s on %s\n", + sdl_addr_to_hex(sender_hw, buf, + sizeof (buf)), if_name(ifp)); } + /* Mark use timestamp */ + if (route->rt_llinfo != NULL) + arp_llreach_use(route->rt_llinfo); + /* We're done with the route */ + RT_REMREF_LOCKED(route); + RT_UNLOCK(route); + route = NULL; + /* + * Send a conservative unicast "ARP probe". + * This should force the other device to pick + * a new number. This will not force the + * device to pick a new number if the device + * has already assigned that number. This will + * not imply to the device that we own that + * address. The link address is always + * present; it's never freed. + */ + ifnet_lock_shared(ifp); + ifa = ifp->if_lladdr; + IFA_ADDREF(ifa); + ifnet_lock_done(ifp); + dlil_send_arp_internal(ifp, ARPOP_REQUEST, + SDL(ifa->ifa_addr), + (const struct sockaddr *)sender_ip, + sender_hw, + (const struct sockaddr *)target_ip); + IFA_REMREF(ifa); + ifa = NULL; + arpstat.txconflicts++; } goto respond; - } else if (keep_announcements != 0 - && target_ip->sin_addr.s_addr == sender_ip->sin_addr.s_addr) { - /* don't create entry if link-local address and link-local is disabled */ - if (!IN_LINKLOCAL(ntohl(sender_ip->sin_addr.s_addr)) - || (ifp->if_eflags & IFEF_ARPLL) != 0) { + } else if (keep_announcements != 0 && + target_ip->sin_addr.s_addr == sender_ip->sin_addr.s_addr) { + /* + * Don't create entry if link-local address and + * link-local is disabled + */ + if (!IN_LINKLOCAL(ntohl(sender_ip->sin_addr.s_addr)) || + (ifp->if_eflags & IFEF_ARPLL)) { if (route != NULL) { RT_REMREF_LOCKED(route); RT_UNLOCK(route); @@ -1368,13 +1497,12 @@ match: if (error == 0) RT_LOCK_ASSERT_HELD(route); - if (error == 0 && route != NULL && route->rt_gateway != NULL) { + if (error == 0 && route != NULL && + route->rt_gateway != NULL) created_announcement = 1; - } } - if (created_announcement == 0) { + if (created_announcement == 0) goto respond; - } } else { goto respond; } @@ -1383,24 +1511,24 @@ match: RT_LOCK_ASSERT_HELD(route); VERIFY(route->rt_expire == 0 || route->rt_rmx.rmx_expire != 0); VERIFY(route->rt_expire != 0 || route->rt_rmx.rmx_expire == 0); + gateway = SDL(route->rt_gateway); if (!bridged && route->rt_ifp != ifp) { - if (!IN_LINKLOCAL(ntohl(sender_ip->sin_addr.s_addr)) || (ifp->if_eflags & IFEF_ARPLL) == 0) { - if (log_arp_warnings) - log(LOG_ERR, "arp: %s is on %s%d but got reply from %s on %s%d\n", - inet_ntop(AF_INET, &sender_ip->sin_addr, ipv4str, - sizeof(ipv4str)), - route->rt_ifp->if_name, - route->rt_ifp->if_unit, - sdl_addr_to_hex(sender_hw, buf, sizeof(buf)), - ifp->if_name, ifp->if_unit); + if (!IN_LINKLOCAL(ntohl(sender_ip->sin_addr.s_addr)) || + !(ifp->if_eflags & IFEF_ARPLL)) { + if (arp_verbose || log_arp_warnings) + log(LOG_ERR, "arp: %s is on %s but got " + "reply from %s on %s\n", + inet_ntop(AF_INET, &sender_ip->sin_addr, + ipv4str, sizeof (ipv4str)), + if_name(route->rt_ifp), + sdl_addr_to_hex(sender_hw, buf, + sizeof (buf)), if_name(ifp)); goto respond; - } - else { + } else { /* Don't change a permanent address */ - if (route->rt_expire == 0) { + if (route->rt_expire == 0) goto respond; - } /* * We're about to check and/or change the route's ifp @@ -1420,7 +1548,8 @@ match: * accessed now that we have acquired rnh_lock. */ gateway = SDL(route->rt_gateway); - if ((gateway->sdl_alen != 0 && route->rt_parent && + if ((gateway->sdl_alen != 0 && + route->rt_parent != NULL && route->rt_parent->rt_ifp == route->rt_ifp) || (route->rt_flags & RTF_CONDEMNED)) { RT_REMREF_LOCKED(route); @@ -1444,6 +1573,13 @@ match: } /* Change the interface when the existing route is on */ route->rt_ifp = ifp; + /* + * If rmx_mtu is not locked, update it + * to the MTU used by the new interface. + */ + if (!(route->rt_rmx.rmx_locks & RTV_MTU)) + route->rt_rmx.rmx_mtu = route->rt_ifp->if_mtu; + rtsetifa(route, &best_ia->ia_ifa); gateway->sdl_index = ifp->if_index; RT_UNLOCK(route); @@ -1458,25 +1594,26 @@ match: RT_LOCK_ASSERT_HELD(route); } - if (gateway->sdl_alen && bcmp(LLADDR(gateway), CONST_LLADDR(sender_hw), gateway->sdl_alen)) { - if (route->rt_expire && log_arp_warnings) { + if (gateway->sdl_alen != 0 && bcmp(LLADDR(gateway), + CONST_LLADDR(sender_hw), gateway->sdl_alen) != 0) { + if (route->rt_expire != 0 && + (arp_verbose || log_arp_warnings)) { char buf2[3 * MAX_HW_LEN]; - log(LOG_INFO, "arp: %s moved from %s to %s on %s%d\n", + log(LOG_INFO, "arp: %s moved from %s to %s on %s\n", inet_ntop(AF_INET, &sender_ip->sin_addr, ipv4str, - sizeof(ipv4str)), - sdl_addr_to_hex(gateway, buf, sizeof(buf)), - sdl_addr_to_hex(sender_hw, buf2, sizeof(buf2)), - ifp->if_name, ifp->if_unit); - } - else if (route->rt_expire == 0) { - if (log_arp_warnings) { + sizeof (ipv4str)), + sdl_addr_to_hex(gateway, buf, sizeof (buf)), + sdl_addr_to_hex(sender_hw, buf2, sizeof (buf2)), + if_name(ifp)); + } else if (route->rt_expire == 0) { + if (arp_verbose || log_arp_warnings) { log(LOG_ERR, "arp: %s attempts to modify " - "permanent entry for %s on %s%d\n", + "permanent entry for %s on %s\n", sdl_addr_to_hex(sender_hw, buf, - sizeof(buf)), + sizeof (buf)), inet_ntop(AF_INET, &sender_ip->sin_addr, - ipv4str, sizeof(ipv4str)), - ifp->if_name, ifp->if_unit); + ipv4str, sizeof (ipv4str)), + if_name(ifp)); } goto respond; } @@ -1487,13 +1624,8 @@ match: bcopy(CONST_LLADDR(sender_hw), LLADDR(gateway), gateway->sdl_alen); /* Update the expire time for the route and clear the reject flag */ - if (route->rt_expire) { - uint64_t timenow; - - timenow = net_uptime(); - rt_setexpire(route, - rt_expiry(route, timenow, arpt_keep)); - } + if (route->rt_expire != 0) + rt_setexpire(route, net_uptime() + arpt_keep); route->rt_flags &= ~RTF_REJECT; /* cache the gateway (sender HW) address */ @@ -1504,12 +1636,11 @@ match: llinfo = route->rt_llinfo; llinfo->la_asked = 0; if (llinfo->la_hold) { - struct mbuf *m0; - m0 = llinfo->la_hold; + struct mbuf *m0 = llinfo->la_hold; llinfo->la_hold = NULL; - RT_UNLOCK(route); - dlil_output(ifp, PF_INET, m0, (caddr_t)route, rt_key(route), 0, NULL); + dlil_output(ifp, PF_INET, m0, (caddr_t)route, + rt_key(route), 0, NULL); RT_REMREF(route); route = NULL; } @@ -1527,6 +1658,8 @@ respond: if (arpop != ARPOP_REQUEST) goto done; + arpstat.rxrequests++; + /* If we are not the target, check if we should proxy */ if (target_ip->sin_addr.s_addr != best_ia_sin.sin_addr.s_addr) { /* @@ -1546,12 +1679,12 @@ respond: * proxying for. */ if (route->rt_ifp != ifp && - (route->rt_ifp->if_bridge != ifp->if_bridge || - ifp->if_bridge == NULL)) { - RT_REMREF_LOCKED(route); - RT_UNLOCK(route); - goto done; - } + (route->rt_ifp->if_bridge != ifp->if_bridge || + ifp->if_bridge == NULL)) { + RT_REMREF_LOCKED(route); + RT_UNLOCK(route); + goto done; + } proxied = *SDL(route->rt_gateway); target_hw = &proxied; } else { @@ -1590,13 +1723,13 @@ respond: } dlil_send_arp(ifp, ARPOP_REPLY, - target_hw, (const struct sockaddr*)target_ip, - sender_hw, (const struct sockaddr*)sender_ip, 0); + target_hw, (const struct sockaddr *)target_ip, + sender_hw, (const struct sockaddr *)sender_ip, 0); done: if (best_ia != NULL) IFA_REMREF(&best_ia->ia_ifa); - return 0; + return (0); } void @@ -1611,3 +1744,13 @@ arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa) IFA_UNLOCK(ifa); dlil_send_arp(ifp, ARPOP_REQUEST, NULL, sa, NULL, sa, 0); } + +static int +arp_getstat SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + if (req->oldptr == USER_ADDR_NULL) + req->oldlen = (size_t)sizeof (struct arpstat); + + return (SYSCTL_OUT(req, &arpstat, MIN(sizeof (arpstat), req->oldlen))); +} diff --git a/bsd/netinet/in_arp.h b/bsd/netinet/in_arp.h index 56573aad9..fdbc9e2d1 100644 --- a/bsd/netinet/in_arp.h +++ b/bsd/netinet/in_arp.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2009-2011 Apple Inc. All rights reserved. + * Copyright (c) 2009-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,12 +22,13 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ #ifndef _NETINET_IN_ARP_H_ #define _NETINET_IN_ARP_H_ +#ifdef KERNEL #include struct sockaddr; @@ -35,109 +36,102 @@ struct sockaddr_dl; struct sockaddr_in; /*! - @function inet_arp_lookup - @discussion This function will check the routing table for a cached - arp entry or trigger an arp query to resolve the ip address to a - link-layer address. - - Arp entries are stored in the routing table. This function will - lookup the ip destination in the routing table. If the - destination requires forwarding to a gateway, the route of the - gateway will be looked up. The route entry is inspected to - determine if the link layer destination address is known. If - unknown, the arp generation function for IP attached to the - interface is called to create an arp request packet. - @param interface The interface the packet is being sent on. - @param ip_dest The ip destination of the packet. - @param ll_dest On output, the link-layer destination. - @param ll_dest_len The length of the buffer for ll_dest. - @param hint Any routing hint passed down from the protocol. - @param packet The packet being transmitted. - @result May return an error such as EHOSTDOWN or ENETUNREACH. If - this function returns EJUSTRETURN, the packet has been queued - and will be sent when an arp response is received. If any other - value is returned, the caller is responsible for disposing of - the packet. + * @function inet_arp_lookup + * @discussion This function will check the routing table for a cached + * arp entry or trigger an arp query to resolve the ip address to a + * link-layer address. + * + * Arp entries are stored in the routing table. This function will + * lookup the ip destination in the routing table. If the + * destination requires forwarding to a gateway, the route of the + * gateway will be looked up. The route entry is inspected to + * determine if the link layer destination address is known. If + * unknown, the arp generation function for IP attached to the + * interface is called to create an arp request packet. + * @param interface The interface the packet is being sent on. + * @param ip_dest The ip destination of the packet. + * @param ll_dest On output, the link-layer destination. + * @param ll_dest_len The length of the buffer for ll_dest. + * @param hint Any routing hint passed down from the protocol. + * @param packet The packet being transmitted. + * @result May return an error such as EHOSTDOWN or ENETUNREACH. If + * this function returns EJUSTRETURN, the packet has been queued + * and will be sent when an arp response is received. If any other + * value is returned, the caller is responsible for disposing of + * the packet. */ #ifdef BSD_KERNEL_PRIVATE +extern errno_t arp_lookup_ip(ifnet_t interface, + const struct sockaddr_in *ip_dest, struct sockaddr_dl *ll_dest, + size_t ll_dest_len, route_t hint, mbuf_t packet); #define inet_arp_lookup arp_lookup_ip #else extern errno_t inet_arp_lookup(ifnet_t interface, const struct sockaddr_in *ip_dest, struct sockaddr_dl *ll_dest, size_t ll_dest_len, route_t hint, mbuf_t packet); -#endif /* BSD_KERNEL_PRIVATE */ -#ifdef KERNEL_PRIVATE -struct in_addr; -extern void arp_init(void); -extern void in_arpdrain(void *); -extern void arp_validate(struct rtentry *); -extern void arp_llreach_set_reachable(struct ifnet *, void *, unsigned int); -/* arp_lookup_ip is obsolete, use inet_arp_lookup */ -extern errno_t arp_lookup_ip(ifnet_t interface, - const struct sockaddr_in *ip_dest, struct sockaddr_dl *ll_dest, - size_t ll_dest_len, route_t hint, mbuf_t packet); -#endif /* KERNEL_PRIVATE */ +#endif /* !BSD_KERNEL_PRIVATE */ /*! - @function inet_arp_handle_input - @discussion This function should be called by code that handles - inbound arp packets. The caller should parse the ARP packet to - pull out the operation and the relevant addresses. If a response - is required, the proto_media_send_arp function will be called. - - This function will lookup the sender in the routing table and - add an arp entry if necessary. Any queued packets waiting for - the arp resolution will also be transmitted. - @param interface The interface the packet was received on. - @param arp_op The arp operation, ARPOP_REQUEST or ARPOP_REPLY - @param sender_hw The sender hardware address from the arp payload. - @param sender_ip The sender IP address from the arp payload. - @param target_ip The target IP address from the arp payload. - @result 0 on success or an errno error value on failure. + * @function inet_arp_handle_input + * @discussion This function should be called by code that handles + * inbound arp packets. The caller should parse the ARP packet to + * pull out the operation and the relevant addresses. If a response + * is required, the proto_media_send_arp function will be called. + * + * This function will lookup the sender in the routing table and + * add an arp entry if necessary. Any queued packets waiting for + * the arp resolution will also be transmitted. + * @param interface The interface the packet was received on. + * @param arp_op The arp operation, ARPOP_REQUEST or ARPOP_REPLY + * @param sender_hw The sender hardware address from the arp payload. + * @param sender_ip The sender IP address from the arp payload. + * @param target_ip The target IP address from the arp payload. + * @result 0 on success or an errno error value on failure. */ #ifdef BSD_KERNEL_PRIVATE +extern errno_t arp_ip_handle_input(ifnet_t ifp, u_int16_t arpop, + const struct sockaddr_dl *sender_hw, const struct sockaddr_in *sender_ip, + const struct sockaddr_in *target_ip); #define inet_arp_handle_input arp_ip_handle_input #else extern errno_t inet_arp_handle_input(ifnet_t ifp, u_int16_t arpop, - const struct sockaddr_dl *sender_hw, - const struct sockaddr_in *sender_ip, - const struct sockaddr_in *target_ip); -#endif /* KERNEL_PRIVATE */ -#ifdef KERNEL_PRIVATE -/* arp_ip_handle_input is obsolete, use inet_arp_handle_input */ -extern errno_t arp_ip_handle_input(ifnet_t ifp, u_int16_t arpop, - const struct sockaddr_dl *sender_hw, - const struct sockaddr_in *sender_ip, - const struct sockaddr_in *target_ip); -#endif /* BSD_KERNEL_PRIVATE */ + const struct sockaddr_dl *sender_hw, const struct sockaddr_in *sender_ip, + const struct sockaddr_in *target_ip); +#endif /* !BSD_KERNEL_PRIVATE */ /*! - @function inet_arp_init_ifaddr - @discussion This function should be called in two places, when an IP - address is added and when the hardware address changes. This - function will setup the ifaddr_t for use with the IP ARP - functions. This function will also trigger the transmission of a - gratuitous ARP packet. - - When the SIOCSIFADDR ioctl is handled, the data parameter will - be an ifaddr_t. If this is an IP address, inet_arp_init_ifaddr - should be called. This is usually performed in the protocol - attachment's ioctl handler. - - When the event handler for the protocol attachment receives a - KEV_DL_LINK_ADDRESS_CHANGED event, the event handler should call - inet_arp_init_ifaddr for each interface ip address. - - For an example, see bsd/net/ether_inet_pr_module.c in xnu. - Search for inet_arp_init_ifaddr. - @param interface The interface the packet was received on. - @param ipaddr The ip interface address. + * @function inet_arp_init_ifaddr + * @discussion This function should be called in two places, when an IP + * address is added and when the hardware address changes. This + * function will setup the ifaddr_t for use with the IP ARP + * functions. This function will also trigger the transmission of a + * gratuitous ARP packet. + * + * When the SIOCSIFADDR ioctl is handled, the data parameter will + * be an ifaddr_t. If this is an IP address, inet_arp_init_ifaddr + * should be called. This is usually performed in the protocol + * attachment's ioctl handler. + * + * When the event handler for the protocol attachment receives a + * KEV_DL_LINK_ADDRESS_CHANGED event, the event handler should call + * inet_arp_init_ifaddr for each interface ip address. + * + * For an example, see bsd/net/ether_inet_pr_module.c in xnu. + * Search for inet_arp_init_ifaddr. + * @param interface The interface the packet was received on. + * @param ipaddr The ip interface address. */ #ifdef BSD_KERNEL_PRIVATE -/* inet_arp_init_ifaddr is aliased to arp_ifinit */ +/* inet_arp_init_ifaddr is aliased to arp_ifinit (if_ether.h) */ #define inet_arp_init_ifaddr arp_ifinit #else extern void inet_arp_init_ifaddr(ifnet_t interface, ifaddr_t ipaddr); -#endif +#endif /* !BSD_KERNEL_PRIVATE */ +#ifdef BSD_KERNEL_PRIVATE +extern void arp_init(void); +extern void in_arpdrain(void *); +extern void arp_llreach_set_reachable(struct ifnet *, void *, unsigned int); +#endif /* BSD_KERNEL_PRIVATE */ +#endif /* KERNEL */ #endif /* _NETINET_IN_ARP_H_ */ diff --git a/bsd/netinet/in_cksum.c b/bsd/netinet/in_cksum.c index 594e27577..4e6d453cf 100644 --- a/bsd/netinet/in_cksum.c +++ b/bsd/netinet/in_cksum.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -61,14 +61,14 @@ */ #include +#include #include -#include #include +#include #include +#define _IP_VHL #include -#include - -#define DBG_FNC_IN_CKSUM NETDBG_CODE(DBG_NETIP, (3 << 8)) +#include /* * Checksum routine for Internet Protocol family headers (Portable Version). @@ -76,199 +76,415 @@ * This routine is very heavily used in the network * code and should be modified for each CPU to be as fast as possible. */ - -union s_util { - char c[2]; - u_short s; -}; +#define REDUCE16 { \ + q_util.q = sum; \ + l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ + sum = l_util.s[0] + l_util.s[1]; \ + ADDCARRY(sum); \ +} union l_util { - u_int16_t s[2]; - u_int32_t l; + uint16_t s[2]; + uint32_t l; }; union q_util { - u_int16_t s[4]; - u_int32_t l[2]; - u_int64_t q; + uint16_t s[4]; + uint32_t l[2]; + uint64_t q; }; -#define ADDCARRY(x) do { if (x > 65535) { x -= 65535; } } while (0) +#define PREDICT_FALSE(_exp) __builtin_expect((_exp), 0) + +static uint16_t in_cksumdata(const void *buf, int len); + +/* + * Portable version of 16-bit 1's complement sum function that works + * on a contiguous buffer. This is used mainly for instances where + * the caller is certain about the buffer requirements, e.g. for IP + * header checksum calculation, though it is capable of being used + * on any arbitrary data span. The platform-specific cpu_in_cksum() + * routine might be better-optmized, so use that instead for large + * data span. + * + * The logic is borrowed from + */ + +#if ULONG_MAX == 0xffffffffUL +/* 32-bit version */ +static uint16_t +in_cksumdata(const void *buf, int mlen) +{ + uint32_t sum, partial; + unsigned int final_acc; + uint8_t *data = (void *)buf; + boolean_t needs_swap, started_on_odd; + + VERIFY(mlen >= 0); + + needs_swap = FALSE; + started_on_odd = FALSE; + + sum = 0; + partial = 0; + + if ((uintptr_t)data & 1) { + /* Align on word boundary */ + started_on_odd = !started_on_odd; +#if BYTE_ORDER == LITTLE_ENDIAN + partial = *data << 8; +#else + partial = *data; +#endif + ++data; + --mlen; + } + needs_swap = started_on_odd; + while (mlen >= 32) { + __builtin_prefetch(data + 32); + partial += *(uint16_t *)(void *)data; + partial += *(uint16_t *)(void *)(data + 2); + partial += *(uint16_t *)(void *)(data + 4); + partial += *(uint16_t *)(void *)(data + 6); + partial += *(uint16_t *)(void *)(data + 8); + partial += *(uint16_t *)(void *)(data + 10); + partial += *(uint16_t *)(void *)(data + 12); + partial += *(uint16_t *)(void *)(data + 14); + partial += *(uint16_t *)(void *)(data + 16); + partial += *(uint16_t *)(void *)(data + 18); + partial += *(uint16_t *)(void *)(data + 20); + partial += *(uint16_t *)(void *)(data + 22); + partial += *(uint16_t *)(void *)(data + 24); + partial += *(uint16_t *)(void *)(data + 26); + partial += *(uint16_t *)(void *)(data + 28); + partial += *(uint16_t *)(void *)(data + 30); + data += 32; + mlen -= 32; + if (PREDICT_FALSE(partial & 0xc0000000)) { + if (needs_swap) + partial = (partial << 8) + + (partial >> 24); + sum += (partial >> 16); + sum += (partial & 0xffff); + partial = 0; + } + } + if (mlen & 16) { + partial += *(uint16_t *)(void *)data; + partial += *(uint16_t *)(void *)(data + 2); + partial += *(uint16_t *)(void *)(data + 4); + partial += *(uint16_t *)(void *)(data + 6); + partial += *(uint16_t *)(void *)(data + 8); + partial += *(uint16_t *)(void *)(data + 10); + partial += *(uint16_t *)(void *)(data + 12); + partial += *(uint16_t *)(void *)(data + 14); + data += 16; + mlen -= 16; + } + /* + * mlen is not updated below as the remaining tests + * are using bit masks, which are not affected. + */ + if (mlen & 8) { + partial += *(uint16_t *)(void *)data; + partial += *(uint16_t *)(void *)(data + 2); + partial += *(uint16_t *)(void *)(data + 4); + partial += *(uint16_t *)(void *)(data + 6); + data += 8; + } + if (mlen & 4) { + partial += *(uint16_t *)(void *)data; + partial += *(uint16_t *)(void *)(data + 2); + data += 4; + } + if (mlen & 2) { + partial += *(uint16_t *)(void *)data; + data += 2; + } + if (mlen & 1) { +#if BYTE_ORDER == LITTLE_ENDIAN + partial += *data; +#else + partial += *data << 8; +#endif + started_on_odd = !started_on_odd; + } -#define REDUCE32 \ - { \ - q_util.q = sum; \ - sum = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ - } -#define REDUCE16 \ - { \ - q_util.q = sum; \ - l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ - sum = l_util.s[0] + l_util.s[1]; \ - ADDCARRY(sum); \ - } + if (needs_swap) + partial = (partial << 8) + (partial >> 24); + sum += (partial >> 16) + (partial & 0xffff); + sum = (sum >> 16) + (sum & 0xffff); -#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);} + final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff); + final_acc = (final_acc >> 16) + (final_acc & 0xffff); + + return (final_acc); +} + +#else +/* 64-bit version */ +static uint16_t +in_cksumdata(const void *buf, int mlen) +{ + uint64_t sum, partial; + unsigned int final_acc; + uint8_t *data = (void *)buf; + boolean_t needs_swap, started_on_odd; + + VERIFY(mlen >= 0); + + needs_swap = FALSE; + started_on_odd = FALSE; + + sum = 0; + partial = 0; + + if ((uintptr_t)data & 1) { + /* Align on word boundary */ + started_on_odd = !started_on_odd; +#if BYTE_ORDER == LITTLE_ENDIAN + partial = *data << 8; +#else + partial = *data; +#endif + ++data; + --mlen; + } + needs_swap = started_on_odd; + if ((uintptr_t)data & 2) { + if (mlen < 2) + goto trailing_bytes; + partial += *(uint16_t *)(void *)data; + data += 2; + mlen -= 2; + } + while (mlen >= 64) { + __builtin_prefetch(data + 32); + __builtin_prefetch(data + 64); + partial += *(uint32_t *)(void *)data; + partial += *(uint32_t *)(void *)(data + 4); + partial += *(uint32_t *)(void *)(data + 8); + partial += *(uint32_t *)(void *)(data + 12); + partial += *(uint32_t *)(void *)(data + 16); + partial += *(uint32_t *)(void *)(data + 20); + partial += *(uint32_t *)(void *)(data + 24); + partial += *(uint32_t *)(void *)(data + 28); + partial += *(uint32_t *)(void *)(data + 32); + partial += *(uint32_t *)(void *)(data + 36); + partial += *(uint32_t *)(void *)(data + 40); + partial += *(uint32_t *)(void *)(data + 44); + partial += *(uint32_t *)(void *)(data + 48); + partial += *(uint32_t *)(void *)(data + 52); + partial += *(uint32_t *)(void *)(data + 56); + partial += *(uint32_t *)(void *)(data + 60); + data += 64; + mlen -= 64; + if (PREDICT_FALSE(partial & (3ULL << 62))) { + if (needs_swap) + partial = (partial << 8) + + (partial >> 56); + sum += (partial >> 32); + sum += (partial & 0xffffffff); + partial = 0; + } + } + /* + * mlen is not updated below as the remaining tests + * are using bit masks, which are not affected. + */ + if (mlen & 32) { + partial += *(uint32_t *)(void *)data; + partial += *(uint32_t *)(void *)(data + 4); + partial += *(uint32_t *)(void *)(data + 8); + partial += *(uint32_t *)(void *)(data + 12); + partial += *(uint32_t *)(void *)(data + 16); + partial += *(uint32_t *)(void *)(data + 20); + partial += *(uint32_t *)(void *)(data + 24); + partial += *(uint32_t *)(void *)(data + 28); + data += 32; + } + if (mlen & 16) { + partial += *(uint32_t *)(void *)data; + partial += *(uint32_t *)(void *)(data + 4); + partial += *(uint32_t *)(void *)(data + 8); + partial += *(uint32_t *)(void *)(data + 12); + data += 16; + } + if (mlen & 8) { + partial += *(uint32_t *)(void *)data; + partial += *(uint32_t *)(void *)(data + 4); + data += 8; + } + if (mlen & 4) { + partial += *(uint32_t *)(void *)data; + data += 4; + } + if (mlen & 2) { + partial += *(uint16_t *)(void *)data; + data += 2; + } +trailing_bytes: + if (mlen & 1) { +#if BYTE_ORDER == LITTLE_ENDIAN + partial += *data; +#else + partial += *data << 8; +#endif + started_on_odd = !started_on_odd; + } + + if (needs_swap) + partial = (partial << 8) + (partial >> 56); + sum += (partial >> 32) + (partial & 0xffffffff); + sum = (sum >> 32) + (sum & 0xffffffff); + + final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) + + ((sum >> 16) & 0xffff) + (sum & 0xffff); + final_acc = (final_acc >> 16) + (final_acc & 0xffff); + final_acc = (final_acc >> 16) + (final_acc & 0xffff); + + return (final_acc); +} +#endif /* ULONG_MAX != 0xffffffffUL */ -u_int16_t inet_cksum_simple(struct mbuf *, int); +/* + * Perform 16-bit 1's complement sum on a contiguous span. + */ +uint16_t +b_sum16(const void *buf, int len) +{ + return (in_cksumdata(buf, len)); +} -u_int16_t +uint16_t inet_cksum_simple(struct mbuf *, int); +/* + * For the exported _in_cksum symbol in BSDKernel symbol set. + */ +uint16_t inet_cksum_simple(struct mbuf *m, int len) { return (inet_cksum(m, 0, 0, len)); } -u_short -in_addword(u_short a, u_short b) +uint16_t +in_addword(uint16_t a, uint16_t b) { - union l_util l_util; - u_int32_t sum = a + b; + uint64_t sum = a + b; - REDUCE; + ADDCARRY(sum); return (sum); } -u_short -in_pseudo(u_int a, u_int b, u_int c) +uint16_t +in_pseudo(uint32_t a, uint32_t b, uint32_t c) { - u_int64_t sum; + uint64_t sum; union q_util q_util; union l_util l_util; - sum = (u_int64_t) a + b + c; + sum = (uint64_t)a + b + c; REDUCE16; return (sum); - } - -u_int16_t -inet_cksum(struct mbuf *m, unsigned int nxt, unsigned int skip, - unsigned int len) +uint16_t +in_pseudo64(uint64_t a, uint64_t b, uint64_t c) { - u_short *w; - u_int32_t sum = 0; - int mlen = 0; - int byte_swapped = 0; - union s_util s_util; + uint64_t sum; + union q_util q_util; union l_util l_util; - KERNEL_DEBUG(DBG_FNC_IN_CKSUM | DBG_FUNC_START, len,0,0,0,0); + sum = a + b + c; + REDUCE16; + return (sum); +} - /* sanity check */ - if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.len < skip + len) { - panic("inet_cksum: mbuf len (%d) < off+len (%d+%d)\n", - m->m_pkthdr.len, skip, len); +/* + * May be used on IP header with options. + */ +uint16_t +in_cksum_hdr_opt(const struct ip *ip) +{ + return (~b_sum16(ip, (IP_VHL_HL(ip->ip_vhl) << 2)) & 0xffff); +} + +/* + * A wrapper around the simple in_cksum_hdr() and the more complicated + * inet_cksum(); the former is chosen if the IP header is simple, + * contiguous and 32-bit aligned. Also does some stats accounting. + */ +uint16_t +ip_cksum_hdr_dir(struct mbuf *m, uint32_t hlen, int out) +{ + struct ip *ip = mtod(m, struct ip *); + + if (out) { + ipstat.ips_snd_swcsum++; + ipstat.ips_snd_swcsum_bytes += hlen; + } else { + ipstat.ips_rcv_swcsum++; + ipstat.ips_rcv_swcsum_bytes += hlen; } - /* include pseudo header checksum? */ - if (nxt != 0) { - struct ip *iph; + if (hlen == sizeof (*ip) && + m->m_len >= sizeof (*ip) && IP_HDR_ALIGNED_P(ip)) + return (in_cksum_hdr(ip)); - if (m->m_len < sizeof (struct ip)) - panic("inet_cksum: bad mbuf chain"); + return (inet_cksum(m, 0, 0, hlen)); +} - iph = mtod(m, struct ip *); - sum = in_pseudo(iph->ip_src.s_addr, iph->ip_dst.s_addr, - htonl(len + nxt)); - } +/* + * m MUST contain at least an IP header, if nxt is specified; + * nxt is the upper layer protocol number; + * off is an offset where TCP/UDP/ICMP header starts; + * len is a total length of a transport segment (e.g. TCP header + TCP payload) + */ +uint16_t +inet_cksum(struct mbuf *m, uint32_t nxt, uint32_t off, uint32_t len) +{ + uint32_t sum; - if (skip != 0) { - for (; skip && m; m = m->m_next) { - if (m->m_len > skip) { - mlen = m->m_len - skip; - w = (u_short *)(void *)(m->m_data+skip); - goto skip_start; - } else { - skip -= m->m_len; - } - } - } - for (;m && len; m = m->m_next) { - if (m->m_len == 0) - continue; - w = mtod(m, u_short *); - - if (mlen == -1) { - /* - * The first byte of this mbuf is the continuation - * of a word spanning between this mbuf and the - * last mbuf. - * - * s_util.c[0] is already saved when scanning previous - * mbuf. - */ - s_util.c[1] = *(char *)w; - sum += s_util.s; - w = (u_short *)(void *)((char *)w + 1); - mlen = m->m_len - 1; - len--; - } else { - mlen = m->m_len; - } -skip_start: - if (len < mlen) - mlen = len; + sum = m_sum16(m, off, len); + + /* include pseudo header checksum? */ + if (nxt != 0) { + struct ip *ip; + unsigned char buf[sizeof ((*ip))] __attribute__((aligned(8))); + uint32_t mlen; - len -= mlen; /* - * Force to even boundary. + * Sanity check + * + * Use m_length2() instead of m_length(), as we cannot rely on + * the caller setting m_pkthdr.len correctly, if the mbuf is + * a M_PKTHDR one. */ - if ((1 & (uintptr_t) w) && (mlen > 0)) { - REDUCE; - sum <<= 8; - s_util.c[0] = *(u_char *)w; - w = (u_short *)(void *)((char *)w + 1); - mlen--; - byte_swapped = 1; + if ((mlen = m_length2(m, NULL)) < sizeof (*ip)) { + panic("%s: mbuf %p too short (%d) for IPv4 header", + __func__, m, mlen); + /* NOTREACHED */ } + /* - * Unroll the loop to make overhead from - * branches &c small. + * In case the IP header is not contiguous, or not 32-bit + * aligned, copy it to a local buffer. Note here that we + * expect the data pointer to point to the IP header. */ - while ((mlen -= 32) >= 0) { - sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; - sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; - sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; - sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; - w += 16; - } - mlen += 32; - while ((mlen -= 8) >= 0) { - sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; - w += 4; - } - mlen += 8; - if (mlen == 0 && byte_swapped == 0) - continue; - REDUCE; - while ((mlen -= 2) >= 0) { - sum += *w++; + if ((sizeof (*ip) > m->m_len) || + !IP_HDR_ALIGNED_P(mtod(m, caddr_t))) { + m_copydata(m, 0, sizeof (*ip), (caddr_t)buf); + ip = (struct ip *)(void *)buf; + } else { + ip = (struct ip *)(void *)(m->m_data); } - if (byte_swapped) { - REDUCE; - sum <<= 8; - byte_swapped = 0; - if (mlen == -1) { - s_util.c[1] = *(char *)w; - sum += s_util.s; - mlen = 0; - } else - mlen = -1; - } else if (mlen == -1) - s_util.c[0] = *(char *)w; - } - if (len) - printf("cksum: out of data by %d\n", len); - if (mlen == -1) { - /* The last mbuf has odd # of bytes. Follow the - standard (the odd byte may be shifted left by 8 bits - or not as determined by endian-ness of the machine) */ - s_util.c[1] = 0; - sum += s_util.s; + + /* add pseudo header checksum */ + sum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htonl(len + nxt)); + + /* fold in carry bits */ + ADDCARRY(sum); } - REDUCE; - KERNEL_DEBUG(DBG_FNC_IN_CKSUM | DBG_FUNC_END, 0,0,0,0,0); + return (~sum & 0xffff); } - diff --git a/bsd/netinet/in_dhcp.c b/bsd/netinet/in_dhcp.c index b0bad24cb..001e1058f 100644 --- a/bsd/netinet/in_dhcp.c +++ b/bsd/netinet/in_dhcp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1988-2011 Apple Inc. All rights reserved. + * Copyright (c) 1988-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -536,11 +536,7 @@ dhcp_select(struct dhcp_context * context) = htons((u_short) (current_time.tv_sec - context->start_time.tv_sec)); request->dhcp.dp_xid = htonl(context->xid); -#ifdef RANDOM_IP_ID request->ip.ip_id = ip_randomid(); -#else - request->ip.ip_id = htons(ip_id++); -#endif error = send_packet(context->ifp, request, request_size); if (error != 0) { printf("dhcp: send_packet failed with %d\n", error); @@ -737,11 +733,7 @@ dhcp_init(struct dhcp_context * context) = htons((u_short)(current_time.tv_sec - context->start_time.tv_sec)); request->dhcp.dp_xid = htonl(context->xid); -#ifdef RANDOM_IP_ID request->ip.ip_id = ip_randomid(); -#else - request->ip.ip_id = htons(ip_id++); -#endif error = send_packet(context->ifp, request, request_size); if (error != 0) { printf("dhcp: send_packet failed with %d\n", error); @@ -862,8 +854,7 @@ dhcp_context_create(struct ifnet * ifp, int max_try, /* enable reception of DHCP packets before an address is assigned */ snprintf(context->ifr.ifr_name, - sizeof(context->ifr.ifr_name), "%s%d", ifp->if_name, - ifp->if_unit); + sizeof(context->ifr.ifr_name), "%s", if_name(ifp)); context->ifr.ifr_intval = 1; error = ifioctl(context->so, SIOCAUTOADDR, (caddr_t)&context->ifr, procp); @@ -886,7 +877,7 @@ dhcp_context_create(struct ifnet * ifp, int max_try, sin.sin_family = AF_INET; sin.sin_port = htons(IPPORT_BOOTPC); sin.sin_addr.s_addr = INADDR_ANY; - error = sobind(context->so, (struct sockaddr *)&sin); + error = sobindlock(context->so, (struct sockaddr *)&sin, 1); if (error) { printf("dhcp: sobind failed, %d\n", error); goto failed; diff --git a/bsd/netinet/in_gif.c b/bsd/netinet/in_gif.c index c65eceb68..cc700ae37 100644 --- a/bsd/netinet/in_gif.c +++ b/bsd/netinet/in_gif.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,10 +22,10 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* $KAME: in_gif.c,v 1.54 2001/05/14 14:02:16 itojun Exp $ */ +/* $KAME: in_gif.c,v 1.54 2001/05/14 14:02:16 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -89,7 +89,7 @@ #include #endif /* MROUTING */ -#include +#include #include @@ -105,19 +105,25 @@ in_gif_output( __unused struct rtentry *rt) { struct gif_softc *sc = ifnet_softc(ifp); - struct sockaddr_in *dst = (struct sockaddr_in *)(void *)&sc->gif_ro.ro_dst; - struct sockaddr_in *sin_src = (struct sockaddr_in *)(void *)sc->gif_psrc; - struct sockaddr_in *sin_dst = (struct sockaddr_in *)(void *)sc->gif_pdst; + struct sockaddr_in *dst = (struct sockaddr_in *) + (void *)&sc->gif_ro.ro_dst; + struct sockaddr_in *sin_src = (struct sockaddr_in *) + (void *)sc->gif_psrc; + struct sockaddr_in *sin_dst = (struct sockaddr_in *) + (void *)sc->gif_pdst; struct ip iphdr; /* capsule IP header, host byte ordered */ int proto, error; u_int8_t tos; - struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF }; + struct ip_out_args ipoa = + { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF, 0 }; + + GIF_LOCK_ASSERT(sc); if (sin_src == NULL || sin_dst == NULL || sin_src->sin_family != AF_INET || sin_dst->sin_family != AF_INET) { m_freem(m); - return EAFNOSUPPORT; + return (EAFNOSUPPORT); } switch (family) { @@ -127,81 +133,77 @@ in_gif_output( struct ip *ip; proto = IPPROTO_IPV4; - if (mbuf_len(m) < sizeof(*ip)) { - m = m_pullup(m, sizeof(*ip)); + if (mbuf_len(m) < sizeof (*ip)) { + m = m_pullup(m, sizeof (*ip)); if (!m) - return ENOBUFS; + return (ENOBUFS); } ip = mtod(m, struct ip *); tos = ip->ip_tos; break; } -#endif /*INET*/ +#endif /* INET */ #if INET6 case AF_INET6: { struct ip6_hdr *ip6; proto = IPPROTO_IPV6; - if (mbuf_len(m) < sizeof(*ip6)) { - m = m_pullup(m, sizeof(*ip6)); + if (mbuf_len(m) < sizeof (*ip6)) { + m = m_pullup(m, sizeof (*ip6)); if (!m) - return ENOBUFS; + return (ENOBUFS); } ip6 = mtod(m, struct ip6_hdr *); tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; break; } -#endif /*INET6*/ +#endif /* INET6 */ default: #if DEBUG printf("in_gif_output: warning: unknown family %d passed\n", family); #endif m_freem(m); - return EAFNOSUPPORT; + return (EAFNOSUPPORT); } - bzero(&iphdr, sizeof(iphdr)); + bzero(&iphdr, sizeof (iphdr)); iphdr.ip_src = sin_src->sin_addr; /* bidirectional configured tunnel mode */ if (sin_dst->sin_addr.s_addr != INADDR_ANY) iphdr.ip_dst = sin_dst->sin_addr; else { m_freem(m); - return ENETUNREACH; + return (ENETUNREACH); } iphdr.ip_p = proto; /* version will be set in ip_output() */ iphdr.ip_ttl = ip_gif_ttl; - iphdr.ip_len = m->m_pkthdr.len + sizeof(struct ip); + iphdr.ip_len = m->m_pkthdr.len + sizeof (struct ip); if (ifp->if_flags & IFF_LINK1) ip_ecn_ingress(ECN_ALLOWED, &iphdr.ip_tos, &tos); else ip_ecn_ingress(ECN_NOCARE, &iphdr.ip_tos, &tos); /* prepend new IP header */ - M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); - if (m && mbuf_len(m) < sizeof(struct ip)) - m = m_pullup(m, sizeof(struct ip)); + M_PREPEND(m, sizeof (struct ip), M_DONTWAIT); + if (m && mbuf_len(m) < sizeof (struct ip)) + m = m_pullup(m, sizeof (struct ip)); if (m == NULL) { printf("ENOBUFS in in_gif_output %d\n", __LINE__); - return ENOBUFS; + return (ENOBUFS); } - bcopy(&iphdr, mtod(m, struct ip *), sizeof(struct ip)); + bcopy(&iphdr, mtod(m, struct ip *), sizeof (struct ip)); - if (dst->sin_family != sin_dst->sin_family || + if (ROUTE_UNUSABLE(&sc->gif_ro) || + dst->sin_family != sin_dst->sin_family || dst->sin_addr.s_addr != sin_dst->sin_addr.s_addr || - (sc->gif_ro.ro_rt != NULL && - (sc->gif_ro.ro_rt->generation_id != route_generation || - sc->gif_ro.ro_rt->rt_ifp == ifp))) { + (sc->gif_ro.ro_rt != NULL && sc->gif_ro.ro_rt->rt_ifp == ifp)) { /* cache route doesn't match or recursive route */ dst->sin_family = sin_dst->sin_family; - dst->sin_len = sizeof(struct sockaddr_in); + dst->sin_len = sizeof (struct sockaddr_in); dst->sin_addr = sin_dst->sin_addr; - if (sc->gif_ro.ro_rt) { - rtfree(sc->gif_ro.ro_rt); - sc->gif_ro.ro_rt = NULL; - } + ROUTE_RELEASE(&sc->gif_ro); #if 0 sc->gif_if.if_mtu = GIF_MTU; #endif @@ -211,7 +213,7 @@ in_gif_output( rtalloc(&sc->gif_ro); if (sc->gif_ro.ro_rt == NULL) { m_freem(m); - return ENETUNREACH; + return (ENETUNREACH); } /* if it constitutes infinite encapsulation, punt. */ @@ -219,17 +221,18 @@ in_gif_output( if (sc->gif_ro.ro_rt->rt_ifp == ifp) { RT_UNLOCK(sc->gif_ro.ro_rt); m_freem(m); - return ENETUNREACH; /*XXX*/ + return (ENETUNREACH); /* XXX */ } #if 0 ifp->if_mtu = sc->gif_ro.ro_rt->rt_ifp->if_mtu - - sizeof(struct ip); + - sizeof (struct ip); #endif RT_UNLOCK(sc->gif_ro.ro_rt); } error = ip_output(m, NULL, &sc->gif_ro, IP_OUTARGS, NULL, &ipoa); - return(error); + + return (error); } void @@ -246,7 +249,7 @@ in_gif_input(m, off) proto = ip->ip_p; - gifp = ((struct gif_softc*)encap_getarg(m))->gif_if; + gifp = ((struct gif_softc *)encap_getarg(m))->gif_if; if (gifp == NULL || (gifp->if_flags & IFF_UP) == 0) { m_freem(m); @@ -262,8 +265,8 @@ in_gif_input(m, off) case IPPROTO_IPV4: { af = AF_INET; - if (mbuf_len(m) < sizeof(*ip)) { - m = m_pullup(m, sizeof(*ip)); + if (mbuf_len(m) < sizeof (*ip)) { + m = m_pullup(m, sizeof (*ip)); if (!m) return; } @@ -281,8 +284,8 @@ in_gif_input(m, off) struct ip6_hdr *ip6; u_int8_t itos; af = AF_INET6; - if (mbuf_len(m) < sizeof(*ip6)) { - m = m_pullup(m, sizeof(*ip6)); + if (mbuf_len(m) < sizeof (*ip6)) { + m = m_pullup(m, sizeof (*ip6)); if (!m) return; } @@ -303,29 +306,17 @@ in_gif_input(m, off) return; } #ifdef __APPLE__ - /* Should we free m if dlil_input returns an error? */ - if (m->m_pkthdr.rcvif) /* replace the rcvif by gifp for dlil to route it correctly */ + /* Replace the rcvif by gifp for dlil to route it correctly */ + if (m->m_pkthdr.rcvif) m->m_pkthdr.rcvif = gifp; ifnet_input(gifp, m, NULL); #else gif_input(m, af, gifp); #endif - return; -} - -static __inline__ void* -_cast_non_const(const void * ptr) { - union { - const void* cval; - void* val; - } ret; - - ret.cval = ptr; - return (ret.val); } /* - * we know that we are in IFF_UP, outer address available, and outer family + * We know that we are in IFF_UP, outer address available, and outer family * matched the physical addr family. see gif_encapcheck(). */ int @@ -346,7 +337,9 @@ gif_encapcheck4( src = (struct sockaddr_in *)(void *)sc->gif_psrc; dst = (struct sockaddr_in *)(void *)sc->gif_pdst; - mbuf_copydata((struct mbuf *)(size_t)m, 0, sizeof(ip), &ip); + GIF_LOCK_ASSERT(sc); + + mbuf_copydata((struct mbuf *)(size_t)m, 0, sizeof (ip), &ip); /* check for address match */ addrmatch = 0; @@ -355,27 +348,26 @@ gif_encapcheck4( if (dst->sin_addr.s_addr == ip.ip_src.s_addr) addrmatch |= 2; if (addrmatch != 3) - return 0; + return (0); /* martian filters on outer source - NOT done in ip_input! */ if (IN_MULTICAST(ntohl(ip.ip_src.s_addr))) - return 0; + return (0); switch ((ntohl(ip.ip_src.s_addr) & 0xff000000) >> 24) { case 0: case 127: case 255: - return 0; + return (0); } /* reject packets with broadcast on source */ lck_rw_lock_shared(in_ifaddr_rwlock); for (ia4 = TAILQ_FIRST(&in_ifaddrhead); ia4; - ia4 = TAILQ_NEXT(ia4, ia_link)) - { + ia4 = TAILQ_NEXT(ia4, ia_link)) { if ((ifnet_flags(ia4->ia_ifa.ifa_ifp) & IFF_BROADCAST) == 0) continue; IFA_LOCK(&ia4->ia_ifa); if (ip.ip_src.s_addr == ia4->ia_broadaddr.sin_addr.s_addr) { IFA_UNLOCK(&ia4->ia_ifa); lck_rw_done(in_ifaddr_rwlock); - return 0; + return (0); } IFA_UNLOCK(&ia4->ia_ifa); } @@ -387,9 +379,9 @@ gif_encapcheck4( struct sockaddr_in sin; struct rtentry *rt; - bzero(&sin, sizeof(sin)); + bzero(&sin, sizeof (sin)); sin.sin_family = AF_INET; - sin.sin_len = sizeof(struct sockaddr_in); + sin.sin_len = sizeof (struct sockaddr_in); sin.sin_addr = ip.ip_src; rt = rtalloc1_scoped((struct sockaddr *)&sin, 0, 0, m->m_pkthdr.rcvif->if_index); @@ -400,11 +392,11 @@ gif_encapcheck4( RT_UNLOCK(rt); rtfree(rt); } - return 0; + return (0); } RT_UNLOCK(rt); rtfree(rt); } - return 32 * 2; + return (32 * 2); } diff --git a/bsd/netinet/in_gif.h b/bsd/netinet/in_gif.h index c2dfdda44..69247eed8 100644 --- a/bsd/netinet/in_gif.h +++ b/bsd/netinet/in_gif.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,10 +22,10 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* $KAME: in_gif.h,v 1.3 2000/02/22 14:01:59 itojun Exp $ */ +/* $KAME: in_gif.h,v 1.3 2000/02/22 14:01:59 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -57,12 +57,11 @@ */ #ifndef _NETINET_IN_GIF_H_ -#define _NETINET_IN_GIF_H_ +#define _NETINET_IN_GIF_H_ #include -#ifdef KERNEL -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE -#define GIF_TTL 30 +#define GIF_TTL 30 extern int ip_gif_ttl; @@ -70,6 +69,5 @@ void in_gif_input(struct mbuf *, int); int in_gif_output(struct ifnet *, int, struct mbuf *, struct rtentry *); int gif_encapcheck4(const struct mbuf *, int, int, void *); -#endif /* KERNEL_PRIVATE */ -#endif /* KERNEL */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET_IN_GIF_H_ */ diff --git a/bsd/netinet/in_mcast.c b/bsd/netinet/in_mcast.c index deded6a55..13f0b9aa9 100644 --- a/bsd/netinet/in_mcast.c +++ b/bsd/netinet/in_mcast.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 Apple Inc. All rights reserved. + * Copyright (c) 2010-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -243,9 +243,6 @@ static void ipms_free(struct ip_msource *); static struct in_msource *inms_alloc(int); static void inms_free(struct in_msource *); -#define IMO_CAST_TO_NONCONST(x) ((struct ip_moptions *)(void *)(uintptr_t)x) -#define INM_CAST_TO_NONCONST(x) ((struct in_multi *)(void *)(uintptr_t)x) - static __inline int ip_msource_cmp(const struct ip_msource *a, const struct ip_msource *b) { @@ -345,7 +342,7 @@ imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp, int idx; int nmships; - IMO_LOCK_ASSERT_HELD(IMO_CAST_TO_NONCONST(imo)); + IMO_LOCK_ASSERT_HELD(__DECONST(struct ip_moptions *, imo)); gsin = (struct sockaddr_in *)(uintptr_t)(size_t)group; @@ -388,7 +385,7 @@ imo_match_source(const struct ip_moptions *imo, const size_t gidx, struct ip_msource *ims; const sockunion_t *psa; - IMO_LOCK_ASSERT_HELD(IMO_CAST_TO_NONCONST(imo)); + IMO_LOCK_ASSERT_HELD(__DECONST(struct ip_moptions *, imo)); VERIFY(src->sa_family == AF_INET); VERIFY(gidx != (size_t)-1 && gidx < imo->imo_num_memberships); @@ -420,7 +417,7 @@ imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp, struct in_msource *ims; int mode; - IMO_LOCK_ASSERT_HELD(IMO_CAST_TO_NONCONST(imo)); + IMO_LOCK_ASSERT_HELD(__DECONST(struct ip_moptions *, imo)); VERIFY(ifp != NULL); gidx = imo_match_group(imo, ifp, group); @@ -861,7 +858,8 @@ imf_rollback(struct in_mfilter *imf) lims->imsl_st[1] = lims->imsl_st[0]; } else { /* revert source added t1 */ - IGMP_PRINTF(("%s: free inms %p\n", __func__, lims)); + IGMP_PRINTF(("%s: free inms 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(lims))); RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); inms_free(lims); imf->imf_nsrc--; @@ -921,7 +919,8 @@ imf_reap(struct in_mfilter *imf) lims = (struct in_msource *)ims; if ((lims->imsl_st[0] == MCAST_UNDEFINED) && (lims->imsl_st[1] == MCAST_UNDEFINED)) { - IGMP_PRINTF(("%s: free inms %p\n", __func__, lims)); + IGMP_PRINTF(("%s: free inms 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(lims))); RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); inms_free(lims); imf->imf_nsrc--; @@ -942,7 +941,8 @@ imf_purge(struct in_mfilter *imf) RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { lims = (struct in_msource *)ims; - IGMP_PRINTF(("%s: free inms %p\n", __func__, lims)); + IGMP_PRINTF(("%s: free inms 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(lims))); RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); inms_free(lims); imf->imf_nsrc--; @@ -969,6 +969,7 @@ inm_get_source(struct in_multi *inm, const in_addr_t haddr, struct ip_msource *ims, *nims; #ifdef IGMP_DEBUG struct in_addr ia; + char buf[MAX_IPv4_STR_LEN]; #endif INM_LOCK_ASSERT_HELD(inm); @@ -986,8 +987,9 @@ inm_get_source(struct in_multi *inm, const in_addr_t haddr, ims = nims; #ifdef IGMP_DEBUG ia.s_addr = htonl(haddr); - IGMP_PRINTF(("%s: allocated %s as %p\n", __func__, - inet_ntoa(ia), ims)); + inet_ntop(AF_INET, &ia, buf, sizeof(buf)); + IGMP_PRINTF(("%s: allocated %s as 0x%llx\n", __func__, + buf, (uint64_t)VM_KERNEL_ADDRPERM(ims))); #endif } @@ -1007,7 +1009,7 @@ uint8_t ims_get_mode(const struct in_multi *inm, const struct ip_msource *ims, uint8_t t) { - INM_LOCK_ASSERT_HELD(INM_CAST_TO_NONCONST(inm)); + INM_LOCK_ASSERT_HELD(__DECONST(struct in_multi *, inm)); t = !!t; if (inm->inm_st[t].iss_ex > 0 && @@ -1034,22 +1036,26 @@ ims_merge(struct ip_msource *ims, const struct in_msource *lims, #endif if (lims->imsl_st[0] == MCAST_EXCLUDE) { - IGMP_PRINTF(("%s: t1 ex -= %d on %s\n", - __func__, n, inet_ntoa(ia))); + IGMP_INET_PRINTF(ia, + ("%s: t1 ex -= %d on %s\n", + __func__, n, _igmp_inet_buf)); ims->ims_st[1].ex -= n; } else if (lims->imsl_st[0] == MCAST_INCLUDE) { - IGMP_PRINTF(("%s: t1 in -= %d on %s\n", - __func__, n, inet_ntoa(ia))); + IGMP_INET_PRINTF(ia, + ("%s: t1 in -= %d on %s\n", + __func__, n, _igmp_inet_buf)); ims->ims_st[1].in -= n; } if (lims->imsl_st[1] == MCAST_EXCLUDE) { - IGMP_PRINTF(("%s: t1 ex += %d on %s\n", - __func__, n, inet_ntoa(ia))); + IGMP_INET_PRINTF(ia, + ("%s: t1 ex += %d on %s\n", + __func__, n, _igmp_inet_buf)); ims->ims_st[1].ex += n; } else if (lims->imsl_st[1] == MCAST_INCLUDE) { - IGMP_PRINTF(("%s: t1 in += %d on %s\n", - __func__, n, inet_ntoa(ia))); + IGMP_INET_PRINTF(ia, + ("%s: t1 in += %d on %s\n", + __func__, n, _igmp_inet_buf)); ims->ims_st[1].in += n; } } @@ -1183,7 +1189,9 @@ inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf) inm->inm_st[1].iss_asm++; } - IGMP_PRINTF(("%s: merged imf %p to inm %p\n", __func__, imf, inm)); + IGMP_PRINTF(("%s: merged imf 0x%llx to inm 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(imf), + (uint64_t)VM_KERNEL_ADDRPERM(inm))); inm_print(inm); out_reap: @@ -1205,7 +1213,8 @@ inm_commit(struct in_multi *inm) INM_LOCK_ASSERT_HELD(inm); - IGMP_PRINTF(("%s: commit inm %p\n", __func__, inm)); + IGMP_PRINTF(("%s: commit inm 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(inm))); IGMP_PRINTF(("%s: pre commit:\n", __func__)); inm_print(inm); @@ -1230,7 +1239,8 @@ inm_reap(struct in_multi *inm) ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 || ims->ims_stp != 0) continue; - IGMP_PRINTF(("%s: free ims %p\n", __func__, ims)); + IGMP_PRINTF(("%s: free ims 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(ims))); RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); ipms_free(ims); inm->inm_nsrc--; @@ -1248,7 +1258,8 @@ inm_purge(struct in_multi *inm) INM_LOCK_ASSERT_HELD(inm); RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { - IGMP_PRINTF(("%s: free ims %p\n", __func__, ims)); + IGMP_PRINTF(("%s: free ims 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(ims))); RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); ipms_free(ims); inm->inm_nsrc--; @@ -1271,10 +1282,12 @@ in_joingroup(struct ifnet *ifp, const struct in_addr *gina, struct in_mfilter timf; struct in_multi *inm = NULL; int error = 0; + struct igmp_tparams itp; - IGMP_PRINTF(("%s: join %s on %p(%s%d))\n", __func__, - inet_ntoa(*gina), ifp, ifp->if_name, ifp->if_unit)); + IGMP_INET_PRINTF(*gina, ("%s: join %s on 0x%llx(%s))\n", __func__, + _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); + bzero(&itp, sizeof (itp)); *pinm = NULL; /* @@ -1302,15 +1315,17 @@ in_joingroup(struct ifnet *ifp, const struct in_addr *gina, } IGMP_PRINTF(("%s: doing igmp downcall\n", __func__)); - error = igmp_change_state(inm); + error = igmp_change_state(inm, &itp); if (error) { IGMP_PRINTF(("%s: failed to update source\n", __func__)); + imf_rollback(imf); goto out_inm_release; } out_inm_release: if (error) { - IGMP_PRINTF(("%s: dropping ref on %p\n", __func__, inm)); + IGMP_PRINTF(("%s: dropping ref on 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(inm))); INM_UNLOCK(inm); INM_REMREF(inm); } else { @@ -1318,6 +1333,9 @@ out_inm_release: *pinm = inm; /* keep refcount from in_getmulti() */ } + /* schedule timer now that we've dropped the lock(s) */ + igmp_set_timeout(&itp); + return (error); } @@ -1335,7 +1353,9 @@ in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf) { struct in_mfilter timf; int error, lastref; + struct igmp_tparams itp; + bzero(&itp, sizeof (itp)); error = 0; INM_LOCK_ASSERT_NOTHELD(inm); @@ -1343,10 +1363,11 @@ in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf) in_multihead_lock_exclusive(); INM_LOCK(inm); - IGMP_PRINTF(("%s: leave inm %p, %s/%s%d, imf %p\n", __func__, - inm, inet_ntoa(inm->inm_addr), + IGMP_INET_PRINTF(inm->inm_addr, + ("%s: leave inm 0x%llx, %s/%s%d, imf 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(inm), _igmp_inet_buf, (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_name), - inm->inm_ifp->if_unit, imf)); + inm->inm_ifp->if_unit, (uint64_t)VM_KERNEL_ADDRPERM(imf))); /* * If no imf was specified (i.e. kernel consumer), @@ -1370,7 +1391,7 @@ in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf) KASSERT(error == 0, ("%s: failed to merge inm state\n", __func__)); IGMP_PRINTF(("%s: doing igmp downcall\n", __func__)); - error = igmp_change_state(inm); + error = igmp_change_state(inm, &itp); #if IGMP_DEBUG if (error) IGMP_PRINTF(("%s: failed igmp downcall\n", __func__)); @@ -1384,6 +1405,9 @@ in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf) if (lastref) INM_REMREF(inm); /* for in_multihead list */ + /* schedule timer now that we've dropped the lock(s) */ + igmp_set_timeout(&itp); + return (error); } @@ -1441,7 +1465,9 @@ inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) uint16_t fmode; int error, doblock; unsigned int ifindex = 0; + struct igmp_tparams itp; + bzero(&itp, sizeof (itp)); ifp = NULL; error = 0; doblock = 0; @@ -1475,8 +1501,9 @@ inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) if (sopt->sopt_name == IP_BLOCK_SOURCE) doblock = 1; - IGMP_PRINTF(("%s: imr_interface = %s, ifp = %p\n", - __func__, inet_ntoa(mreqs.imr_interface), ifp)); + IGMP_INET_PRINTF(mreqs.imr_interface, + ("%s: imr_interface = %s, ifp = 0x%llx\n", __func__, + _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(ifp))); break; } @@ -1559,8 +1586,9 @@ inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) */ ims = imo_match_source(imo, idx, &ssa->sa); if ((ims != NULL && doblock) || (ims == NULL && !doblock)) { - IGMP_PRINTF(("%s: source %s %spresent\n", __func__, - inet_ntoa(ssa->sin.sin_addr), doblock ? "" : "not ")); + IGMP_INET_PRINTF(ssa->sin.sin_addr, + ("%s: source %s %spresent\n", __func__, + _igmp_inet_buf, doblock ? "" : "not ")); error = EADDRNOTAVAIL; goto out_imo_locked; } @@ -1596,7 +1624,7 @@ inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) } IGMP_PRINTF(("%s: doing igmp downcall\n", __func__)); - error = igmp_change_state(inm); + error = igmp_change_state(inm, &itp); INM_UNLOCK(inm); #if IGMP_DEBUG if (error) @@ -1614,6 +1642,10 @@ out_imf_rollback: out_imo_locked: IMO_UNLOCK(imo); IMO_REMREF(imo); /* from inp_findmoptions() */ + + /* schedule timer now that we've dropped the lock(s) */ + igmp_set_timeout(&itp); + return (error); } @@ -1729,8 +1761,8 @@ inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt) return (EADDRNOTAVAIL); if ((size_t) msfr.msfr_nsrcs > - SIZE_MAX / sizeof(struct sockaddr_storage)) - msfr.msfr_nsrcs = SIZE_MAX / sizeof(struct sockaddr_storage); + UINT32_MAX / sizeof(struct sockaddr_storage)) + msfr.msfr_nsrcs = UINT32_MAX / sizeof(struct sockaddr_storage); if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) msfr.msfr_nsrcs = in_mcast_maxsocksrc; @@ -1854,9 +1886,9 @@ inp_getmoptions(struct inpcb *inp, struct sockopt *sopt) * If socket is neither of type SOCK_RAW or SOCK_DGRAM, * or is a divert socket, reject it. */ - if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || - (inp->inp_socket->so_proto->pr_type != SOCK_RAW && - inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { + if (SOCK_PROTO(inp->inp_socket) == IPPROTO_DIVERT || + (SOCK_TYPE(inp->inp_socket) != SOCK_RAW && + SOCK_TYPE(inp->inp_socket) != SOCK_DGRAM)) { return (EOPNOTSUPP); } @@ -2009,7 +2041,6 @@ inp_lookup_mcast_ifp(const struct inpcb *inp, if (ro.ro_rt != NULL) { ifp = ro.ro_rt->rt_ifp; VERIFY(ifp != NULL); - rtfree(ro.ro_rt); } else { struct in_ifaddr *ia; struct ifnet *mifp; @@ -2028,6 +2059,7 @@ inp_lookup_mcast_ifp(const struct inpcb *inp, } lck_rw_done(in_ifaddr_rwlock); } + ROUTE_RELEASE(&ro); } return (ifp); @@ -2053,7 +2085,9 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt) struct in_msource *lims; size_t idx; int error, is_new; + struct igmp_tparams itp; + bzero(&itp, sizeof (itp)); ifp = NULL; imf = NULL; error = 0; @@ -2107,8 +2141,9 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt) ifp = inp_lookup_mcast_ifp(inp, &gsa->sin, mreqs.imr_interface); - IGMP_PRINTF(("%s: imr_interface = %s, ifp = %p\n", - __func__, inet_ntoa(mreqs.imr_interface), ifp)); + IGMP_INET_PRINTF(mreqs.imr_interface, + ("%s: imr_interface = %s, ifp = 0x%llx\n", __func__, + _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(ifp))); break; } @@ -2306,7 +2341,7 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt) goto out_imf_rollback; } IGMP_PRINTF(("%s: doing igmp downcall\n", __func__)); - error = igmp_change_state(inm); + error = igmp_change_state(inm, &itp); INM_UNLOCK(inm); if (error) { IGMP_PRINTF(("%s: failed igmp downcall\n", @@ -2336,6 +2371,10 @@ out_imo_free: out_imo_locked: IMO_UNLOCK(imo); IMO_REMREF(imo); /* from inp_findmoptions() */ + + /* schedule timer now that we've dropped the lock(s) */ + igmp_set_timeout(&itp); + return (error); } @@ -2359,7 +2398,9 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt) size_t idx; int error, is_final; unsigned int ifindex = 0; + struct igmp_tparams itp; + bzero(&itp, sizeof (itp)); ifp = NULL; error = 0; is_final = 1; @@ -2411,8 +2452,9 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt) if (!in_nullhost(mreqs.imr_interface)) ifp = ip_multicast_if(&mreqs.imr_interface, &ifindex); - IGMP_PRINTF(("%s: imr_interface = %s, ifp = %p\n", - __func__, inet_ntoa(mreqs.imr_interface), ifp)); + IGMP_INET_PRINTF(mreqs.imr_interface, + ("%s: imr_interface = %s, ifp = 0x%llx\n", __func__, + _igmp_inet_buf, (uint64_t)VM_KERNEL_ADDRPERM(ifp))); break; @@ -2500,8 +2542,9 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt) } ims = imo_match_source(imo, idx, &ssa->sa); if (ims == NULL) { - IGMP_PRINTF(("%s: source %s %spresent\n", __func__, - inet_ntoa(ssa->sin.sin_addr), "not ")); + IGMP_INET_PRINTF(ssa->sin.sin_addr, + ("%s: source %s %spresent\n", __func__, + _igmp_inet_buf, "not ")); error = EADDRNOTAVAIL; goto out_locked; } @@ -2537,7 +2580,7 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt) } IGMP_PRINTF(("%s: doing igmp downcall\n", __func__)); - error = igmp_change_state(inm); + error = igmp_change_state(inm, &itp); if (error) { IGMP_PRINTF(("%s: failed igmp downcall\n", __func__)); } @@ -2567,6 +2610,10 @@ out_imf_rollback: out_locked: IMO_UNLOCK(imo); IMO_REMREF(imo); /* from inp_findmoptions() */ + + /* schedule timer now that we've dropped the lock(s) */ + igmp_set_timeout(&itp); + return (error); } @@ -2628,14 +2675,16 @@ inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) } else { ifp = ip_multicast_if(&addr, &ifindex); if (ifp == NULL) { - IGMP_PRINTF(("%s: can't find ifp for addr=%s\n", - __func__, inet_ntoa(addr))); + IGMP_INET_PRINTF(addr, + ("%s: can't find ifp for addr=%s\n", + __func__, _igmp_inet_buf)); return (EADDRNOTAVAIL); } } + /* XXX remove? */ #ifdef IGMP_DEBUG0 - IGMP_PRINTF(("%s: ifp = %p, addr = %s\n", __func__, ifp, - inet_ntoa(addr))); + IGMP_PRINTF(("%s: ifp = 0x%llx, addr = %s\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(ifp), inet_ntoa(addr))); #endif } @@ -2672,9 +2721,12 @@ inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt) struct in_mfilter *imf; struct ip_moptions *imo; struct in_multi *inm; - size_t idx; + size_t idx; int error; - user_addr_t tmp_ptr; + user_addr_t tmp_ptr; + struct igmp_tparams itp; + + bzero(&itp, sizeof (itp)); if (IS_64BIT_PROCESS(current_proc())) { error = sooptcopyin(sopt, &msfr64, @@ -2695,8 +2747,8 @@ inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt) } if ((size_t) msfr.msfr_nsrcs > - SIZE_MAX / sizeof(struct sockaddr_storage)) - msfr.msfr_nsrcs = SIZE_MAX / sizeof(struct sockaddr_storage); + UINT32_MAX / sizeof(struct sockaddr_storage)) + msfr.msfr_nsrcs = UINT32_MAX / sizeof(struct sockaddr_storage); if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) return (ENOBUFS); @@ -2834,7 +2886,7 @@ inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt) } IGMP_PRINTF(("%s: doing igmp downcall\n", __func__)); - error = igmp_change_state(inm); + error = igmp_change_state(inm, &itp); INM_UNLOCK(inm); #ifdef IGMP_DEBUG if (error) @@ -2853,6 +2905,9 @@ out_imo_locked: IMO_UNLOCK(imo); IMO_REMREF(imo); /* from inp_findmoptions() */ + /* schedule timer now that we've dropped the lock(s) */ + igmp_set_timeout(&itp); + return (error); } @@ -2882,9 +2937,9 @@ inp_setmoptions(struct inpcb *inp, struct sockopt *sopt) * If socket is neither of type SOCK_RAW or SOCK_DGRAM, * or is a divert socket, reject it. */ - if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || - (inp->inp_socket->so_proto->pr_type != SOCK_RAW && - inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) + if (SOCK_PROTO(inp->inp_socket) == IPPROTO_DIVERT || + (SOCK_TYPE(inp->inp_socket) != SOCK_RAW && + SOCK_TYPE(inp->inp_socket) != SOCK_DGRAM)) return (EOPNOTSUPP); switch (sopt->sopt_name) { @@ -3129,8 +3184,9 @@ sysctl_ip_mcast_filters SYSCTL_HANDLER_ARGS group.s_addr = name[1]; if (!IN_MULTICAST(ntohl(group.s_addr))) { - IGMP_PRINTF(("%s: group %s is not multicast\n", - __func__, inet_ntoa(group))); + IGMP_INET_PRINTF(group, + ("%s: group %s is not multicast\n", + __func__, _igmp_inet_buf)); ifnet_head_done(); return (EINVAL); } @@ -3162,8 +3218,8 @@ sysctl_ip_mcast_filters SYSCTL_HANDLER_ARGS #ifdef IGMP_DEBUG struct in_addr ina; ina.s_addr = htonl(ims->ims_haddr); - IGMP_PRINTF(("%s: visit node %s\n", __func__, - inet_ntoa(ina))); + IGMP_INET_PRINTF(ina, + ("%s: visit node %s\n", __func__, _igmp_inet_buf)); #endif /* * Only copy-out sources which are in-mode. @@ -3597,6 +3653,7 @@ inm_mode_str(const int mode) static const char *inm_statestrs[] = { "not-member\n", "silent\n", + "reporting\n", "idle\n", "lazy\n", "sleeping\n", @@ -3621,26 +3678,28 @@ void inm_print(const struct in_multi *inm) { int t; + char buf[MAX_IPv4_STR_LEN]; - INM_LOCK_ASSERT_HELD(INM_CAST_TO_NONCONST(inm)); + INM_LOCK_ASSERT_HELD(__DECONST(struct in_multi *, inm)); if (igmp_debug == 0) return; - printf("%s: --- begin inm %p ---\n", __func__, inm); - printf("addr %s ifp %p(%s%d) ifma %p\n", - inet_ntoa(inm->inm_addr), - inm->inm_ifp, - inm->inm_ifp->if_name, - inm->inm_ifp->if_unit, - inm->inm_ifma); + inet_ntop(AF_INET, &inm->inm_addr, buf, sizeof(buf)); + printf("%s: --- begin inm 0x%llx ---\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(inm)); + printf("addr %s ifp 0x%llx(%s) ifma 0x%llx\n", + buf, + (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifp), + if_name(inm->inm_ifp), + (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_ifma)); printf("timer %u state %s refcount %u scq.len %u\n", inm->inm_timer, inm_state_str(inm->inm_state), inm->inm_refcount, inm->inm_scq.ifq_len); - printf("igi %p nsrc %lu sctimer %u scrv %u\n", - inm->inm_igi, + printf("igi 0x%llx nsrc %lu sctimer %u scrv %u\n", + (uint64_t)VM_KERNEL_ADDRPERM(inm->inm_igi), inm->inm_nsrc, inm->inm_sctimer, inm->inm_scrv); @@ -3652,7 +3711,8 @@ inm_print(const struct in_multi *inm) inm->inm_st[t].iss_in, inm->inm_st[t].iss_rec); } - printf("%s: --- end inm %p ---\n", __func__, inm); + printf("%s: --- end inm 0x%llx ---\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(inm)); } #else diff --git a/bsd/netinet/in_pcb.c b/bsd/netinet/in_pcb.c index 485e8dbcd..a3681b8b4 100644 --- a/bsd/netinet/in_pcb.c +++ b/bsd/netinet/in_pcb.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -70,22 +70,20 @@ #include #include #include -#ifndef __APPLE__ -#include -#endif #include #include #include #include #include +#include +#include + #include #include #include -#ifdef __APPLE__ #include -#endif #include #include @@ -109,16 +107,44 @@ #include #include + #include +#include -#if IPSEC -extern int ipsec_bypass; +#if FLOW_DIVERT +#include #endif -#define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8)) -#define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1)) +static lck_grp_t *inpcb_lock_grp; +static lck_attr_t *inpcb_lock_attr; +static lck_grp_attr_t *inpcb_lock_grp_attr; +decl_lck_mtx_data(static, inpcb_lock); /* global INPCB lock */ +decl_lck_mtx_data(static, inpcb_timeout_lock); + +static TAILQ_HEAD(, inpcbinfo) inpcb_head = TAILQ_HEAD_INITIALIZER(inpcb_head); + +static u_int16_t inpcb_timeout_run = 0; /* INPCB timer is scheduled to run */ +static boolean_t inpcb_garbage_collecting = FALSE; /* gc timer is scheduled */ +static boolean_t inpcb_ticking = FALSE; /* "slow" timer is scheduled */ +static boolean_t inpcb_fast_timer_on = FALSE; +static void inpcb_sched_timeout(struct timeval *); +static void inpcb_timeout(void *); +int inpcb_timeout_lazy = 10; /* 10 seconds leeway for lazy timers */ +extern int tvtohz(struct timeval *); + +#if CONFIG_PROC_UUID_POLICY +static void inp_update_cellular_policy(struct inpcb *, boolean_t); +#if FLOW_DIVERT +static void inp_update_flow_divert_policy(struct inpcb *, boolean_t); +#endif /* FLOW_DIVERT */ +#endif /* !CONFIG_PROC_UUID_POLICY */ + +#if IPSEC +extern int ipsec_bypass; +#endif /* IPSEC */ -struct in_addr zeroin_addr; +#define DBG_FNC_PCB_LOOKUP NETDBG_CODE(DBG_NETTCP, (6 << 8)) +#define DBG_FNC_PCB_HLOOKUP NETDBG_CODE(DBG_NETTCP, ((6 << 8) | 1)) /* * These configure the range of local port addresses assigned to @@ -126,17 +152,12 @@ struct in_addr zeroin_addr; */ int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ -#ifndef __APPLE__ -int ipport_firstauto = IPPORT_RESERVED; /* 1024 */ -int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */ -#else -int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ -int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */ -#endif +int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ +int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */ int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ -#define RANGECHK(var, min, max) \ +#define RANGECHK(var, min, max) \ if ((var) < (min)) { (var) = (min); } \ else if ((var) > (max)) { (var) = (max); } @@ -144,8 +165,9 @@ static int sysctl_net_ipport_check SYSCTL_HANDLER_ARGS { #pragma unused(arg1, arg2) - int error = sysctl_handle_int(oidp, - oidp->oid_arg1, oidp->oid_arg2, req); + int error; + + error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error) { RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); @@ -154,25 +176,32 @@ sysctl_net_ipport_check SYSCTL_HANDLER_ARGS RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); } - return error; + return (error); } #undef RANGECHK -SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports"); - -SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, - &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); -SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, - &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); -SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, - &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); -SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, - &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); -SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, - &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); -SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, - &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); +SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, + CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP Ports"); + +SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, + CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, + &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); +SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, + CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, + &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); +SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, + CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, + &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); +SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, + CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, + &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); +SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, + CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, + &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); +SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, + CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_LOCKED, + &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); extern int udp_use_randomport; extern int tcp_use_randomport; @@ -189,7 +218,7 @@ struct inp_flowhash_key_addr { }; struct inp_flowhash_key { - struct inp_flowhash_key_addr infh_laddr; + struct inp_flowhash_key_addr infh_laddr; struct inp_flowhash_key_addr infh_faddr; u_int32_t infh_lport; u_int32_t infh_fport; @@ -199,14 +228,19 @@ struct inp_flowhash_key { u_int32_t infh_rand2; }; -u_int32_t inp_hash_seed = 0; +static u_int32_t inp_hash_seed = 0; + +static int infc_cmp(const struct inpcb *, const struct inpcb *); + +/* Flags used by inp_fc_getinp */ +#define INPFC_SOLOCKED 0x1 +#define INPFC_REMOVE 0x2 +static struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t); + +static void inp_fc_feedback(struct inpcb *); +extern void tcp_remove_from_time_wait(struct inpcb *inp); -static __inline int infc_cmp(const struct inpcb *, - const struct inpcb *); -lck_grp_t *inp_lck_grp; -lck_grp_attr_t *inp_lck_grp_attr; -lck_attr_t *inp_lck_attr; -decl_lck_mtx_data(, inp_fc_lck); +decl_lck_mtx_data(static, inp_fc_lck); RB_HEAD(inp_fc_tree, inpcb) inp_fc_tree; RB_PROTOTYPE(inp_fc_tree, inpcb, infc_link, infc_cmp); @@ -222,25 +256,235 @@ struct inpcb key_inp; * in_pcb.c: manage the Protocol Control Blocks. */ -/* - * Initialize data structures required to deliver - * flow advisories. - */ void -socket_flowadv_init(void) +in_pcbinit(void) { - inp_lck_grp_attr = lck_grp_attr_alloc_init(); - inp_lck_grp = lck_grp_alloc_init("inp_lck_grp", inp_lck_grp_attr); + static int inpcb_initialized = 0; - inp_lck_attr = lck_attr_alloc_init(); - lck_mtx_init(&inp_fc_lck, inp_lck_grp, inp_lck_attr); + VERIFY(!inpcb_initialized); + inpcb_initialized = 1; + inpcb_lock_grp_attr = lck_grp_attr_alloc_init(); + inpcb_lock_grp = lck_grp_alloc_init("inpcb", inpcb_lock_grp_attr); + inpcb_lock_attr = lck_attr_alloc_init(); + lck_mtx_init(&inpcb_lock, inpcb_lock_grp, inpcb_lock_attr); + lck_mtx_init(&inpcb_timeout_lock, inpcb_lock_grp, inpcb_lock_attr); + + /* + * Initialize data structures required to deliver + * flow advisories. + */ + lck_mtx_init(&inp_fc_lck, inpcb_lock_grp, inpcb_lock_attr); lck_mtx_lock(&inp_fc_lck); RB_INIT(&inp_fc_tree); bzero(&key_inp, sizeof(key_inp)); lck_mtx_unlock(&inp_fc_lck); } +#define INPCB_HAVE_TIMER_REQ(req) (((req).intimer_lazy > 0) || \ + ((req).intimer_fast > 0) || ((req).intimer_nodelay > 0)) +static void +inpcb_timeout(void *arg) +{ +#pragma unused(arg) + struct inpcbinfo *ipi; + boolean_t t, gc; + struct intimercount gccnt, tmcnt; + struct timeval leeway; + + /* + * Update coarse-grained networking timestamp (in sec.); the idea + * is to piggy-back on the timeout callout to update the counter + * returnable via net_uptime(). + */ + net_update_uptime(); + + lck_mtx_lock_spin(&inpcb_timeout_lock); + gc = inpcb_garbage_collecting; + inpcb_garbage_collecting = FALSE; + bzero(&gccnt, sizeof(gccnt)); + bzero(&tmcnt, sizeof(tmcnt)); + + t = inpcb_ticking; + inpcb_ticking = FALSE; + + if (gc || t) { + lck_mtx_unlock(&inpcb_timeout_lock); + + lck_mtx_lock(&inpcb_lock); + TAILQ_FOREACH(ipi, &inpcb_head, ipi_entry) { + if (INPCB_HAVE_TIMER_REQ(ipi->ipi_gc_req)) { + bzero(&ipi->ipi_gc_req, + sizeof(ipi->ipi_gc_req)); + if (gc && ipi->ipi_gc != NULL) { + ipi->ipi_gc(ipi); + gccnt.intimer_lazy += + ipi->ipi_gc_req.intimer_lazy; + gccnt.intimer_fast += + ipi->ipi_gc_req.intimer_fast; + gccnt.intimer_nodelay += + ipi->ipi_gc_req.intimer_nodelay; + } + } + if (INPCB_HAVE_TIMER_REQ(ipi->ipi_timer_req)) { + bzero(&ipi->ipi_timer_req, + sizeof(ipi->ipi_timer_req)); + if (t && ipi->ipi_timer != NULL) { + ipi->ipi_timer(ipi); + tmcnt.intimer_lazy += + ipi->ipi_timer_req.intimer_lazy; + tmcnt.intimer_lazy += + ipi->ipi_timer_req.intimer_fast; + tmcnt.intimer_nodelay += + ipi->ipi_timer_req.intimer_nodelay; + } + } + } + lck_mtx_unlock(&inpcb_lock); + lck_mtx_lock_spin(&inpcb_timeout_lock); + } + + /* lock was dropped above, so check first before overriding */ + if (!inpcb_garbage_collecting) + inpcb_garbage_collecting = INPCB_HAVE_TIMER_REQ(gccnt); + if (!inpcb_ticking) + inpcb_ticking = INPCB_HAVE_TIMER_REQ(tmcnt); + + /* re-arm the timer if there's work to do */ + inpcb_timeout_run--; + VERIFY(inpcb_timeout_run >= 0 && inpcb_timeout_run < 2); + + bzero(&leeway, sizeof(leeway)); + leeway.tv_sec = inpcb_timeout_lazy; + if (gccnt.intimer_nodelay > 0 || tmcnt.intimer_nodelay > 0) + inpcb_sched_timeout(NULL); + else if ((gccnt.intimer_fast + tmcnt.intimer_fast) <= 5) + /* be lazy when idle with little activity */ + inpcb_sched_timeout(&leeway); + else + inpcb_sched_timeout(NULL); + + lck_mtx_unlock(&inpcb_timeout_lock); +} + +static void +inpcb_sched_timeout(struct timeval *leeway) +{ + lck_mtx_assert(&inpcb_timeout_lock, LCK_MTX_ASSERT_OWNED); + + if (inpcb_timeout_run == 0 && + (inpcb_garbage_collecting || inpcb_ticking)) { + lck_mtx_convert_spin(&inpcb_timeout_lock); + inpcb_timeout_run++; + if (leeway == NULL) { + inpcb_fast_timer_on = TRUE; + timeout(inpcb_timeout, NULL, hz); + } else { + inpcb_fast_timer_on = FALSE; + timeout_with_leeway(inpcb_timeout, NULL, hz, + tvtohz(leeway)); + } + } else if (inpcb_timeout_run == 1 && + leeway == NULL && !inpcb_fast_timer_on) { + /* + * Since the request was for a fast timer but the + * scheduled timer is a lazy timer, try to schedule + * another instance of fast timer also + */ + lck_mtx_convert_spin(&inpcb_timeout_lock); + inpcb_timeout_run++; + inpcb_fast_timer_on = TRUE; + timeout(inpcb_timeout, NULL, hz); + } +} + +void +inpcb_gc_sched(struct inpcbinfo *ipi, u_int32_t type) +{ + struct timeval leeway; + lck_mtx_lock_spin(&inpcb_timeout_lock); + inpcb_garbage_collecting = TRUE; + switch (type) { + case INPCB_TIMER_NODELAY: + atomic_add_32(&ipi->ipi_gc_req.intimer_nodelay, 1); + inpcb_sched_timeout(NULL); + break; + case INPCB_TIMER_FAST: + atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1); + inpcb_sched_timeout(NULL); + break; + default: + atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1); + leeway.tv_sec = inpcb_timeout_lazy; + leeway.tv_usec = 0; + inpcb_sched_timeout(&leeway); + break; + } + lck_mtx_unlock(&inpcb_timeout_lock); +} + +void +inpcb_timer_sched(struct inpcbinfo *ipi, u_int32_t type) +{ + struct timeval leeway; + lck_mtx_lock_spin(&inpcb_timeout_lock); + inpcb_ticking = TRUE; + switch (type) { + case INPCB_TIMER_NODELAY: + atomic_add_32(&ipi->ipi_timer_req.intimer_nodelay, 1); + inpcb_sched_timeout(NULL); + break; + case INPCB_TIMER_FAST: + atomic_add_32(&ipi->ipi_timer_req.intimer_fast, 1); + inpcb_sched_timeout(NULL); + break; + default: + atomic_add_32(&ipi->ipi_timer_req.intimer_lazy, 1); + leeway.tv_sec = inpcb_timeout_lazy; + leeway.tv_usec = 0; + inpcb_sched_timeout(&leeway); + break; + } + lck_mtx_unlock(&inpcb_timeout_lock); +} + +void +in_pcbinfo_attach(struct inpcbinfo *ipi) +{ + struct inpcbinfo *ipi0; + + lck_mtx_lock(&inpcb_lock); + TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) { + if (ipi0 == ipi) { + panic("%s: ipi %p already in the list\n", + __func__, ipi); + /* NOTREACHED */ + } + } + TAILQ_INSERT_TAIL(&inpcb_head, ipi, ipi_entry); + lck_mtx_unlock(&inpcb_lock); +} + +int +in_pcbinfo_detach(struct inpcbinfo *ipi) +{ + struct inpcbinfo *ipi0; + int error = 0; + + lck_mtx_lock(&inpcb_lock); + TAILQ_FOREACH(ipi0, &inpcb_head, ipi_entry) { + if (ipi0 == ipi) + break; + } + if (ipi0 != NULL) + TAILQ_REMOVE(&inpcb_head, ipi0, ipi_entry); + else + error = ENXIO; + lck_mtx_unlock(&inpcb_lock); + + return (error); +} + /* * Allocate a PCB and associate it with the socket. * @@ -250,36 +494,25 @@ socket_flowadv_init(void) * ipsec_init_policy:??? [IPSEC] */ int -in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, __unused struct proc *p) +in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, struct proc *p) { +#pragma unused(p) struct inpcb *inp; - caddr_t temp; -#if IPSEC -#ifndef __APPLE__ - int error; -#endif -#endif + caddr_t temp; #if CONFIG_MACF_NET int mac_error; -#endif +#endif /* CONFIG_MACF_NET */ - if (so->cached_in_sock_layer == 0) { -#if TEMPDEBUG - printf("PCBALLOC calling zalloc for socket %x\n", so); -#endif - inp = (struct inpcb *) zalloc(pcbinfo->ipi_zone); - if (inp == NULL) - return (ENOBUFS); - bzero((caddr_t)inp, sizeof(*inp)); - } - else { -#if TEMPDEBUG - printf("PCBALLOC reusing PCB for socket %x\n", so); -#endif - inp = (struct inpcb *)(void *)so->so_saved_pcb; - temp = inp->inp_saved_ppcb; - bzero((caddr_t) inp, sizeof(*inp)); - inp->inp_saved_ppcb = temp; + if (!so->cached_in_sock_layer) { + inp = (struct inpcb *)zalloc(pcbinfo->ipi_zone); + if (inp == NULL) + return (ENOBUFS); + bzero((caddr_t)inp, sizeof (*inp)); + } else { + inp = (struct inpcb *)(void *)so->so_saved_pcb; + temp = inp->inp_saved_ppcb; + bzero((caddr_t)inp, sizeof (*inp)); + inp->inp_saved_ppcb = temp; } inp->inp_gencnt = ++pcbinfo->ipi_gencnt; @@ -288,108 +521,115 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, __unused struct proc * #if CONFIG_MACF_NET mac_error = mac_inpcb_label_init(inp, M_WAITOK); if (mac_error != 0) { - if (so->cached_in_sock_layer == 0) + if (!so->cached_in_sock_layer) zfree(pcbinfo->ipi_zone, inp); return (mac_error); } mac_inpcb_label_associate(so, inp); -#endif - // make sure inp_stat is always 64bit aligned - inp->inp_stat = (struct inp_stat*)P2ROUNDUP(inp->inp_stat_store, sizeof(u_int64_t)); - if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) - + sizeof(*inp->inp_stat) > sizeof(inp->inp_stat_store)) { - panic("insufficient space to align inp_stat"); +#endif /* CONFIG_MACF_NET */ + /* make sure inp_stat is always 64-bit aligned */ + inp->inp_stat = (struct inp_stat *)P2ROUNDUP(inp->inp_stat_store, + sizeof (u_int64_t)); + if (((uintptr_t)inp->inp_stat - (uintptr_t)inp->inp_stat_store) + + sizeof (*inp->inp_stat) > sizeof (inp->inp_stat_store)) { + panic("%s: insufficient space to align inp_stat", __func__); + /* NOTREACHED */ + } + + /* make sure inp_cstat is always 64-bit aligned */ + inp->inp_cstat = (struct inp_stat *)P2ROUNDUP(inp->inp_cstat_store, + sizeof (u_int64_t)); + if (((uintptr_t)inp->inp_cstat - (uintptr_t)inp->inp_cstat_store) + + sizeof (*inp->inp_cstat) > sizeof (inp->inp_cstat_store)) { + panic("%s: insufficient space to align inp_cstat", __func__); + /* NOTREACHED */ + } + + /* make sure inp_wstat is always 64-bit aligned */ + inp->inp_wstat = (struct inp_stat *)P2ROUNDUP(inp->inp_wstat_store, + sizeof (u_int64_t)); + if (((uintptr_t)inp->inp_wstat - (uintptr_t)inp->inp_wstat_store) + + sizeof (*inp->inp_wstat) > sizeof (inp->inp_wstat_store)) { + panic("%s: insufficient space to align inp_wstat", __func__); + /* NOTREACHED */ } so->so_pcb = (caddr_t)inp; if (so->so_proto->pr_flags & PR_PCBLOCK) { - lck_mtx_init(&inp->inpcb_mtx, pcbinfo->mtx_grp, pcbinfo->mtx_attr); + lck_mtx_init(&inp->inpcb_mtx, pcbinfo->ipi_lock_grp, + pcbinfo->ipi_lock_attr); } -#if IPSEC -#ifndef __APPLE__ - if (ipsec_bypass == 0) { - error = ipsec_init_policy(so, &inp->inp_sp); - if (error != 0) { - zfree(pcbinfo->ipi_zone, inp); - return error; - } - } -#endif -#endif /*IPSEC*/ + #if INET6 - if (INP_SOCKAF(so) == AF_INET6 && !ip6_mapped_addr_on) + if (SOCK_DOM(so) == PF_INET6 && !ip6_mapped_addr_on) inp->inp_flags |= IN6P_IPV6_V6ONLY; -#endif - -#if INET6 + if (ip6_auto_flowlabel) inp->inp_flags |= IN6P_AUTOFLOWLABEL; -#endif - lck_rw_lock_exclusive(pcbinfo->mtx); +#endif /* INET6 */ + + (void) inp_update_policy(inp); + + lck_rw_lock_exclusive(pcbinfo->ipi_lock); inp->inp_gencnt = ++pcbinfo->ipi_gencnt; - LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list); + LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list); pcbinfo->ipi_count++; - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); return (0); } - /* - in_pcblookup_local_and_cleanup does everything - in_pcblookup_local does but it checks for a socket - that's going away. Since we know that the lock is - held read+write when this funciton is called, we - can safely dispose of this socket like the slow - timer would usually do and return NULL. This is - great for bind. -*/ -struct inpcb* -in_pcblookup_local_and_cleanup( - struct inpcbinfo *pcbinfo, - struct in_addr laddr, - u_int lport_arg, - int wild_okay) + * in_pcblookup_local_and_cleanup does everything + * in_pcblookup_local does but it checks for a socket + * that's going away. Since we know that the lock is + * held read+write when this funciton is called, we + * can safely dispose of this socket like the slow + * timer would usually do and return NULL. This is + * great for bind. + */ +struct inpcb * +in_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, struct in_addr laddr, + u_int lport_arg, int wild_okay) { struct inpcb *inp; - + /* Perform normal lookup */ inp = in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay); - + /* Check if we found a match but it's waiting to be disposed */ - if (inp && inp->inp_wantcnt == WNT_STOPUSING) { + if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) { struct socket *so = inp->inp_socket; - + lck_mtx_lock(&inp->inpcb_mtx); - + if (so->so_usecount == 0) { if (inp->inp_state != INPCB_STATE_DEAD) in_pcbdetach(inp); - in_pcbdispose(inp); + in_pcbdispose(inp); /* will unlock & destroy */ inp = NULL; - } - else { + } else { lck_mtx_unlock(&inp->inpcb_mtx); } } - - return inp; + + return (inp); } -#ifdef __APPLE_API_PRIVATE static void in_pcb_conflict_post_msg(u_int16_t port) { - /* - * Radar 5523020 send a kernel event notification if a non-participating socket tries to bind - * the port a socket who has set SOF_NOTIFYCONFLICT owns. + /* + * Radar 5523020 send a kernel event notification if a + * non-participating socket tries to bind the port a socket + * who has set SOF_NOTIFYCONFLICT owns. */ - struct kev_msg ev_msg; + struct kev_msg ev_msg; struct kev_in_portinuse in_portinuse; - bzero(&in_portinuse, sizeof(struct kev_in_portinuse)); - bzero(&ev_msg, sizeof(struct kev_msg)); + bzero(&in_portinuse, sizeof (struct kev_in_portinuse)); + bzero(&ev_msg, sizeof (struct kev_msg)); in_portinuse.port = ntohs(port); /* port in host order */ in_portinuse.req_pid = proc_selfpid(); ev_msg.vendor_code = KEV_VENDOR_APPLE; @@ -397,12 +637,15 @@ in_pcb_conflict_post_msg(u_int16_t port) ev_msg.kev_subclass = KEV_INET_SUBCLASS; ev_msg.event_code = KEV_INET_PORTINUSE; ev_msg.dv[0].data_ptr = &in_portinuse; - ev_msg.dv[0].data_length = sizeof(struct kev_in_portinuse); + ev_msg.dv[0].data_length = sizeof (struct kev_in_portinuse); ev_msg.dv[1].data_length = 0; kev_post_msg(&ev_msg); } -#endif + /* + * Bind an INPCB to an address and/or port. This routine should not alter + * the caller-supplied local address "nam". + * * Returns: 0 Success * EADDRNOTAVAIL Address not available. * EINVAL Invalid argument @@ -417,7 +660,6 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) { struct socket *so = inp->inp_socket; unsigned short *lastport; - struct sockaddr_in *sin; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; u_short lport = 0, rand_port = 0; int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); @@ -426,34 +668,34 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ return (EADDRNOTAVAIL); - if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY) + if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) return (EINVAL); - if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) + if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT))) wild = 1; socket_unlock(so, 0); /* keep reference on socket */ - lck_rw_lock_exclusive(pcbinfo->mtx); - if (nam) { + lck_rw_lock_exclusive(pcbinfo->ipi_lock); + if (nam != NULL) { struct ifnet *outif = NULL; - sin = (struct sockaddr_in *)(void *)nam; - if (nam->sa_len != sizeof (*sin)) { - lck_rw_done(pcbinfo->mtx); + if (nam->sa_len != sizeof (struct sockaddr_in)) { + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); return (EINVAL); } -#ifdef notdef +#if 0 /* * We should check the family, but old programs * incorrectly fail to initialize it. */ - if (sin->sin_family != AF_INET) { - lck_rw_done(pcbinfo->mtx); + if (nam->sa_family != AF_INET) { + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); return (EAFNOSUPPORT); } -#endif - lport = sin->sin_port; - if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { +#endif /* 0 */ + lport = SIN(nam)->sin_port; + + if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr))) { /* * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; * allow complete duplication of binding if @@ -463,136 +705,150 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) */ if (so->so_options & SO_REUSEADDR) reuseport = SO_REUSEADDR|SO_REUSEPORT; - } else if (sin->sin_addr.s_addr != INADDR_ANY) { + } else if (SIN(nam)->sin_addr.s_addr != INADDR_ANY) { + struct sockaddr_in sin; struct ifaddr *ifa; - sin->sin_port = 0; /* yech... */ - if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin)) == 0) { - lck_rw_done(pcbinfo->mtx); + + /* Sanitized for interface address searches */ + bzero(&sin, sizeof (sin)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof (struct sockaddr_in); + sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr; + + ifa = ifa_ifwithaddr(SA(&sin)); + if (ifa == NULL) { + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); return (EADDRNOTAVAIL); - } - else { + } else { + /* + * Opportunistically determine the outbound + * interface that may be used; this may not + * hold true if we end up using a route + * going over a different interface, e.g. + * when sending to a local address. This + * will get updated again after sending. + */ IFA_LOCK(ifa); outif = ifa->ifa_ifp; IFA_UNLOCK(ifa); IFA_REMREF(ifa); } } - if (lport) { + if (lport != 0) { struct inpcb *t; + uid_t u; /* GROSS */ -#if !CONFIG_EMBEDDED if (ntohs(lport) < IPPORT_RESERVED) { cred = kauth_cred_proc_ref(p); - error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0); + error = priv_check_cred(cred, + PRIV_NETINET_RESERVEDPORT, 0); kauth_cred_unref(&cred); if (error != 0) { - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); return (EACCES); } } -#endif - if (kauth_cred_getuid(so->so_cred) && - !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { - t = in_pcblookup_local_and_cleanup(inp->inp_pcbinfo, - sin->sin_addr, lport, INPLOOKUP_WILDCARD); - if (t && - (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || - ntohl(t->inp_laddr.s_addr) != INADDR_ANY || - (t->inp_socket->so_options & - SO_REUSEPORT) == 0) && - (kauth_cred_getuid(so->so_cred) != - kauth_cred_getuid(t->inp_socket->so_cred)) && - ((t->inp_socket->so_flags & SOF_REUSESHAREUID) == 0) && - (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || - ntohl(t->inp_laddr.s_addr) != INADDR_ANY)) - { -#ifdef __APPLE_API_PRIVATE - - if ((t->inp_socket->so_flags & SOF_NOTIFYCONFLICT) && ((so->so_flags & SOF_NOTIFYCONFLICT) == 0)) - conflict = 1; - - lck_rw_done(pcbinfo->mtx); + if (!IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) && + (u = kauth_cred_getuid(so->so_cred)) != 0 && + (t = in_pcblookup_local_and_cleanup( + inp->inp_pcbinfo, SIN(nam)->sin_addr, lport, + INPLOOKUP_WILDCARD)) != NULL && + (SIN(nam)->sin_addr.s_addr != INADDR_ANY || + t->inp_laddr.s_addr != INADDR_ANY || + !(t->inp_socket->so_options & SO_REUSEPORT)) && + (u != kauth_cred_getuid(t->inp_socket->so_cred)) && + !(t->inp_socket->so_flags & SOF_REUSESHAREUID) && + (SIN(nam)->sin_addr.s_addr != INADDR_ANY || + t->inp_laddr.s_addr != INADDR_ANY)) { + if ((t->inp_socket->so_flags & + SOF_NOTIFYCONFLICT) && + !(so->so_flags & SOF_NOTIFYCONFLICT)) + conflict = 1; + + lck_rw_done(pcbinfo->ipi_lock); + + if (conflict) + in_pcb_conflict_post_msg(lport); - if (conflict) - in_pcb_conflict_post_msg(lport); -#else - lck_rw_done(pcbinfo->mtx); -#endif /* __APPLE_API_PRIVATE */ - - socket_lock(so, 0); - return (EADDRINUSE); - } + socket_lock(so, 0); + return (EADDRINUSE); } - t = in_pcblookup_local_and_cleanup(pcbinfo, sin->sin_addr, - lport, wild); - if (t && + t = in_pcblookup_local_and_cleanup(pcbinfo, + SIN(nam)->sin_addr, lport, wild); + if (t != NULL && (reuseport & t->inp_socket->so_options) == 0) { #if INET6 - if (ntohl(sin->sin_addr.s_addr) != - INADDR_ANY || - ntohl(t->inp_laddr.s_addr) != - INADDR_ANY || - INP_SOCKAF(so) != AF_INET6 || - INP_SOCKAF(t->inp_socket) != AF_INET6) + if (SIN(nam)->sin_addr.s_addr != INADDR_ANY || + t->inp_laddr.s_addr != INADDR_ANY || + SOCK_DOM(so) != PF_INET6 || + SOCK_DOM(t->inp_socket) != PF_INET6) #endif /* INET6 */ { -#ifdef __APPLE_API_PRIVATE - if ((t->inp_socket->so_flags & SOF_NOTIFYCONFLICT) && ((so->so_flags & SOF_NOTIFYCONFLICT) == 0)) + if ((t->inp_socket->so_flags & + SOF_NOTIFYCONFLICT) && + !(so->so_flags & SOF_NOTIFYCONFLICT)) conflict = 1; - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); if (conflict) in_pcb_conflict_post_msg(lport); -#else - lck_rw_done(pcbinfo->mtx); -#endif /* __APPLE_API_PRIVATE */ socket_lock(so, 0); return (EADDRINUSE); } } } - inp->inp_laddr = sin->sin_addr; + inp->inp_laddr = SIN(nam)->sin_addr; inp->inp_last_outifp = outif; } if (lport == 0) { u_short first, last; int count; - randomport = (so->so_flags & SOF_BINDRANDOMPORT) || - (so->so_type == SOCK_STREAM ? tcp_use_randomport : udp_use_randomport); + randomport = (so->so_flags & SOF_BINDRANDOMPORT) || + (so->so_type == SOCK_STREAM ? tcp_use_randomport : + udp_use_randomport); + + /* + * TODO: + * + * The following should be moved into its own routine and + * thus can be shared with in6_pcbsetport(); the latter + * currently duplicates the logic. + */ inp->inp_flags |= INP_ANONPORT; if (inp->inp_flags & INP_HIGHPORT) { first = ipport_hifirstauto; /* sysctl */ last = ipport_hilastauto; - lastport = &pcbinfo->lasthi; + lastport = &pcbinfo->ipi_lasthi; } else if (inp->inp_flags & INP_LOWPORT) { cred = kauth_cred_proc_ref(p); - error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0); + error = priv_check_cred(cred, + PRIV_NETINET_RESERVEDPORT, 0); kauth_cred_unref(&cred); if (error != 0) { - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); - return error; + return (error); } first = ipport_lowfirstauto; /* 1023 */ last = ipport_lowlastauto; /* 600 */ - lastport = &pcbinfo->lastlow; + lastport = &pcbinfo->ipi_lastlow; } else { first = ipport_firstauto; /* sysctl */ last = ipport_lastauto; - lastport = &pcbinfo->lastport; + lastport = &pcbinfo->ipi_lastport; } /* No point in randomizing if only one port is available */ if (first == last) - randomport = 0; + randomport = 0; /* * Simple check to ensure all ports are not used up causing * a deadlock here. @@ -605,14 +861,15 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) * counting down */ if (randomport) { - read_random(&rand_port, sizeof(rand_port)); - *lastport = first - (rand_port % (first - last)); + read_random(&rand_port, sizeof (rand_port)); + *lastport = + first - (rand_port % (first - last)); } count = first - last; do { if (count-- < 0) { /* completely used? */ - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); inp->inp_laddr.s_addr = INADDR_ANY; inp->inp_last_outifp = NULL; @@ -623,20 +880,21 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) *lastport = first; lport = htons(*lastport); } while (in_pcblookup_local_and_cleanup(pcbinfo, - inp->inp_laddr, lport, wild)); + inp->inp_laddr, lport, wild)); } else { /* * counting up */ if (randomport) { - read_random(&rand_port, sizeof(rand_port)); - *lastport = first + (rand_port % (first - last)); + read_random(&rand_port, sizeof (rand_port)); + *lastport = + first + (rand_port % (first - last)); } count = last - first; do { if (count-- < 0) { /* completely used? */ - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); inp->inp_laddr.s_addr = INADDR_ANY; inp->inp_last_outifp = NULL; @@ -647,7 +905,7 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) *lastport = first; lport = htons(*lastport); } while (in_pcblookup_local_and_cleanup(pcbinfo, - inp->inp_laddr, lport, wild)); + inp->inp_laddr, lport, wild)); } } socket_lock(so, 0); @@ -656,24 +914,28 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) inp->inp_laddr.s_addr = INADDR_ANY; inp->inp_lport = 0; inp->inp_last_outifp = NULL; - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); return (EAGAIN); } - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); sflt_notify(so, sock_evt_bound, NULL); return (0); } /* - * Transform old in_pcbconnect() into an inner subroutine for new - * in_pcbconnect(): Do some validity-checking on the remote - * address (in mbuf 'nam') and then determine local host address - * (i.e., which interface) to use to access that remote host. + * Transform old in_pcbconnect() into an inner subroutine for new + * in_pcbconnect(); do some validity-checking on the remote address + * (in "nam") and then determine local host address (i.e., which + * interface) to use to access that remote host. + * + * This routine may alter the caller-supplied remote address "nam". * - * This preserves definition of in_pcbconnect(), while supporting a - * slightly different version for T/TCP. (This is more than - * a bit of a kludge, but cleaning up the internal interfaces would - * have forced minor changes in every protocol). + * The caller may override the bound-to-interface setting of the socket + * by specifying the ifscope parameter (e.g. from IP_PKTINFO.) + * + * This routine might return an ifp with a reference held if the caller + * provides a non-NULL outif, even in the error case. The caller is + * responsible for releasing its reference. * * Returns: 0 Success * EINVAL Invalid argument @@ -681,199 +943,237 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) * EADDRNOTAVAIL Address not available */ int -in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, - struct sockaddr_in *plocal_sin, struct ifnet **outif) +in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr, + unsigned int ifscope, struct ifnet **outif) { - struct in_ifaddr *ia; - struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam; - - if (nam->sa_len != sizeof (*sin)) + boolean_t nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR); + struct route *ro = &inp->inp_route; + struct in_ifaddr *ia = NULL; + struct sockaddr_in sin; + int error = 0; + + if (outif != NULL) + *outif = NULL; + if (nam->sa_len != sizeof (struct sockaddr_in)) return (EINVAL); - if (sin->sin_family != AF_INET) + if (SIN(nam)->sin_family != AF_INET) return (EAFNOSUPPORT); - if (sin->sin_port == 0) + if (SIN(nam)->sin_port == 0) return (EADDRNOTAVAIL); - lck_rw_lock_shared(in_ifaddr_rwlock); - if (!TAILQ_EMPTY(&in_ifaddrhead)) { - ia = TAILQ_FIRST(&in_ifaddrhead); - /* - * If the destination address is INADDR_ANY, - * use the primary local address. - * If the supplied address is INADDR_BROADCAST, - * and the primary interface supports broadcast, - * choose the broadcast address for that interface. - */ - IFA_LOCK_SPIN(&ia->ia_ifa); - if (sin->sin_addr.s_addr == INADDR_ANY) - sin->sin_addr = IA_SIN(ia)->sin_addr; - else if (sin->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST && - (ia->ia_ifp->if_flags & IFF_BROADCAST)) - sin->sin_addr = satosin(&ia->ia_broadaddr)->sin_addr; - IFA_UNLOCK(&ia->ia_ifa); - ia = NULL; + /* + * If the destination address is INADDR_ANY, + * use the primary local address. + * If the supplied address is INADDR_BROADCAST, + * and the primary interface supports broadcast, + * choose the broadcast address for that interface. + */ + if (SIN(nam)->sin_addr.s_addr == INADDR_ANY || + SIN(nam)->sin_addr.s_addr == (u_int32_t)INADDR_BROADCAST) { + lck_rw_lock_shared(in_ifaddr_rwlock); + if (!TAILQ_EMPTY(&in_ifaddrhead)) { + ia = TAILQ_FIRST(&in_ifaddrhead); + IFA_LOCK_SPIN(&ia->ia_ifa); + if (SIN(nam)->sin_addr.s_addr == INADDR_ANY) { + SIN(nam)->sin_addr = IA_SIN(ia)->sin_addr; + } else if (ia->ia_ifp->if_flags & IFF_BROADCAST) { + SIN(nam)->sin_addr = + SIN(&ia->ia_broadaddr)->sin_addr; + } + IFA_UNLOCK(&ia->ia_ifa); + ia = NULL; + } + lck_rw_done(in_ifaddr_rwlock); + } + /* + * Otherwise, if the socket has already bound the source, just use it. + */ + if (inp->inp_laddr.s_addr != INADDR_ANY) { + VERIFY(ia == NULL); + *laddr = inp->inp_laddr; + return (0); } - lck_rw_done(in_ifaddr_rwlock); - - if (inp->inp_laddr.s_addr == INADDR_ANY) { - struct route *ro; - unsigned int ifscope = IFSCOPE_NONE; - unsigned int nocell; - /* - * If the socket is bound to a specifc interface, the - * optional scoped takes precedence over that if it - * is set by the caller. - */ - ia = (struct in_ifaddr *)0; - if (outif != NULL && *outif != NULL) - ifscope = (*outif)->if_index; - else if (inp->inp_flags & INP_BOUND_IF) - ifscope = inp->inp_boundifp->if_index; + /* + * If the ifscope is specified by the caller (e.g. IP_PKTINFO) + * then it overrides the sticky ifscope set for the socket. + */ + if (ifscope == IFSCOPE_NONE && (inp->inp_flags & INP_BOUND_IF)) + ifscope = inp->inp_boundifp->if_index; - nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0; - /* - * If route is known or can be allocated now, - * our src addr is taken from the i/f, else punt. - * Note that we should check the address family of the cached - * destination, in case of sharing the cache with IPv6. - */ - ro = &inp->inp_route; + /* + * If route is known or can be allocated now, + * our src addr is taken from the i/f, else punt. + * Note that we should check the address family of the cached + * destination, in case of sharing the cache with IPv6. + */ + if (ro->ro_rt != NULL) + RT_LOCK_SPIN(ro->ro_rt); + if (ROUTE_UNUSABLE(ro) || ro->ro_dst.sa_family != AF_INET || + SIN(&ro->ro_dst)->sin_addr.s_addr != SIN(nam)->sin_addr.s_addr || + (inp->inp_socket->so_options & SO_DONTROUTE)) { if (ro->ro_rt != NULL) - RT_LOCK_SPIN(ro->ro_rt); - if (ro->ro_rt && (ro->ro_dst.sa_family != AF_INET || - satosin(&ro->ro_dst)->sin_addr.s_addr != - sin->sin_addr.s_addr || - inp->inp_socket->so_options & SO_DONTROUTE || - ro->ro_rt->generation_id != route_generation)) { RT_UNLOCK(ro->ro_rt); - rtfree(ro->ro_rt); - ro->ro_rt = NULL; - } - if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/ - (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) { - if (ro->ro_rt != NULL) - RT_UNLOCK(ro->ro_rt); - /* No route yet, so try to acquire one */ - bzero(&ro->ro_dst, sizeof(struct sockaddr_in)); - ro->ro_dst.sa_family = AF_INET; - ro->ro_dst.sa_len = sizeof(struct sockaddr_in); - ((struct sockaddr_in *)(void *)&ro->ro_dst)->sin_addr = - sin->sin_addr; - rtalloc_scoped(ro, ifscope); - if (ro->ro_rt != NULL) - RT_LOCK_SPIN(ro->ro_rt); - } + ROUTE_RELEASE(ro); + } + if (!(inp->inp_socket->so_options & SO_DONTROUTE) && + (ro->ro_rt == NULL || ro->ro_rt->rt_ifp == NULL)) { + if (ro->ro_rt != NULL) + RT_UNLOCK(ro->ro_rt); + ROUTE_RELEASE(ro); + /* No route yet, so try to acquire one */ + bzero(&ro->ro_dst, sizeof (struct sockaddr_in)); + ro->ro_dst.sa_family = AF_INET; + ro->ro_dst.sa_len = sizeof (struct sockaddr_in); + SIN(&ro->ro_dst)->sin_addr = SIN(nam)->sin_addr; + rtalloc_scoped(ro, ifscope); + if (ro->ro_rt != NULL) + RT_LOCK_SPIN(ro->ro_rt); + } + /* Sanitized local copy for interface address searches */ + bzero(&sin, sizeof (sin)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof (struct sockaddr_in); + sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr; + /* + * If we did not find (or use) a route, assume dest is reachable + * on a directly connected network and try to find a corresponding + * interface to take the source address from. + */ + if (ro->ro_rt == NULL) { + VERIFY(ia == NULL); + ia = ifatoia(ifa_ifwithdstaddr(SA(&sin))); + if (ia == NULL) + ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope)); + error = ((ia == NULL) ? ENETUNREACH : 0); + goto done; + } + RT_LOCK_ASSERT_HELD(ro->ro_rt); + /* + * If the outgoing interface on the route found is not + * a loopback interface, use the address from that interface. + */ + if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) { + VERIFY(ia == NULL); /* * If the route points to a cellular interface and the * caller forbids our using interfaces of such type, * pretend that there is no route. */ - if (nocell && ro->ro_rt != NULL) { - RT_LOCK_ASSERT_HELD(ro->ro_rt); - if (ro->ro_rt->rt_ifp->if_type == IFT_CELLULAR) { - RT_UNLOCK(ro->ro_rt); - rtfree(ro->ro_rt); - ro->ro_rt = NULL; - soevent(inp->inp_socket, - (SO_FILT_HINT_LOCKED | - SO_FILT_HINT_IFDENIED)); - } - } - /* - * If we found a route, use the address - * corresponding to the outgoing interface - * unless it is the loopback (in case a route - * to our address on another net goes to loopback). - */ - if (ro->ro_rt != NULL) { + if (nocell && IFNET_IS_CELLULAR(ro->ro_rt->rt_ifp)) { + RT_UNLOCK(ro->ro_rt); + ROUTE_RELEASE(ro); + error = EHOSTUNREACH; + } else { /* Become a regular mutex */ RT_CONVERT_LOCK(ro->ro_rt); - if (!(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) { - ia = ifatoia(ro->ro_rt->rt_ifa); - if (ia) { - IFA_ADDREF(&ia->ia_ifa); - } - } + ia = ifatoia(ro->ro_rt->rt_ifa); + IFA_ADDREF(&ia->ia_ifa); RT_UNLOCK(ro->ro_rt); + error = 0; } - if (ia == 0) { - u_short fport = sin->sin_port; - - sin->sin_port = 0; - ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin))); - if (ia == 0) { - ia = ifatoia(ifa_ifwithnet_scoped(sintosa(sin), - ifscope)); - } - sin->sin_port = fport; - if (ia == 0) { - lck_rw_lock_shared(in_ifaddr_rwlock); - ia = TAILQ_FIRST(&in_ifaddrhead); - if (ia) - IFA_ADDREF(&ia->ia_ifa); - lck_rw_done(in_ifaddr_rwlock); - } - /* - * If the source address belongs to a cellular interface - * and the socket forbids our using interfaces of such - * type, pretend that there is no source address. - */ - if (nocell && ia != NULL && - ia->ia_ifa.ifa_ifp->if_type == IFT_CELLULAR) { + goto done; + } + VERIFY(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK); + RT_UNLOCK(ro->ro_rt); + /* + * The outgoing interface is marked with 'loopback net', so a route + * to ourselves is here. + * Try to find the interface of the destination address and then + * take the address from there. That interface is not necessarily + * a loopback interface. + */ + VERIFY(ia == NULL); + ia = ifatoia(ifa_ifwithdstaddr(SA(&sin))); + if (ia == NULL) + ia = ifatoia(ifa_ifwithaddr_scoped(SA(&sin), ifscope)); + if (ia == NULL) + ia = ifatoia(ifa_ifwithnet_scoped(SA(&sin), ifscope)); + if (ia == NULL) { + RT_LOCK(ro->ro_rt); + ia = ifatoia(ro->ro_rt->rt_ifa); + if (ia != NULL) + IFA_ADDREF(&ia->ia_ifa); + RT_UNLOCK(ro->ro_rt); + } + error = ((ia == NULL) ? ENETUNREACH : 0); + +done: + /* + * If the destination address is multicast and an outgoing + * interface has been set as a multicast option, use the + * address of that interface as our source address. + */ + if (error == 0 && IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) && + inp->inp_moptions != NULL) { + struct ip_moptions *imo; + struct ifnet *ifp; + + imo = inp->inp_moptions; + IMO_LOCK(imo); + if (imo->imo_multicast_ifp != NULL && (ia == NULL || + ia->ia_ifp != imo->imo_multicast_ifp)) { + ifp = imo->imo_multicast_ifp; + if (ia != NULL) IFA_REMREF(&ia->ia_ifa); - ia = NULL; - soevent(inp->inp_socket, - (SO_FILT_HINT_LOCKED | - SO_FILT_HINT_IFDENIED)); + lck_rw_lock_shared(in_ifaddr_rwlock); + TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { + if (ia->ia_ifp == ifp) + break; } - if (ia == 0) - return (EADDRNOTAVAIL); + if (ia != NULL) + IFA_ADDREF(&ia->ia_ifa); + lck_rw_done(in_ifaddr_rwlock); + if (ia == NULL) + error = EADDRNOTAVAIL; } + IMO_UNLOCK(imo); + } + /* + * Don't do pcblookup call here; return interface in laddr + * and exit to caller, that will do the lookup. + */ + if (ia != NULL) { /* - * If the destination address is multicast and an outgoing - * interface has been set as a multicast option, use the - * address of that interface as our source address. + * If the source address belongs to a cellular interface + * and the socket forbids our using interfaces of such + * type, pretend that there is no source address. */ - if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) && - inp->inp_moptions != NULL) { - struct ip_moptions *imo; - struct ifnet *ifp; - - imo = inp->inp_moptions; - IMO_LOCK(imo); - if (imo->imo_multicast_ifp != NULL && (ia == NULL || - ia->ia_ifp != imo->imo_multicast_ifp)) { - ifp = imo->imo_multicast_ifp; - if (ia) - IFA_REMREF(&ia->ia_ifa); - lck_rw_lock_shared(in_ifaddr_rwlock); - TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) { - if (ia->ia_ifp == ifp) - break; - } - if (ia) - IFA_ADDREF(&ia->ia_ifa); - lck_rw_done(in_ifaddr_rwlock); - if (ia == 0) { - IMO_UNLOCK(imo); - return (EADDRNOTAVAIL); - } + IFA_LOCK_SPIN(&ia->ia_ifa); + if (nocell && IFNET_IS_CELLULAR(ia->ia_ifa.ifa_ifp)) { + IFA_UNLOCK(&ia->ia_ifa); + error = EHOSTUNREACH; + } else if (error == 0) { + *laddr = ia->ia_addr.sin_addr; + if (outif != NULL) { + struct ifnet *ifp; + + if (ro->ro_rt != NULL) + ifp = ro->ro_rt->rt_ifp; + else + ifp = ia->ia_ifp; + + VERIFY(ifp != NULL); + IFA_CONVERT_LOCK(&ia->ia_ifa); + ifnet_reference(ifp); /* for caller */ + if (*outif != NULL) + ifnet_release(*outif); + *outif = ifp; } - IMO_UNLOCK(imo); + IFA_UNLOCK(&ia->ia_ifa); + } else { + IFA_UNLOCK(&ia->ia_ifa); } - /* - * Don't do pcblookup call here; return interface in plocal_sin - * and exit to caller, that will do the lookup. - */ - IFA_LOCK_SPIN(&ia->ia_ifa); - *plocal_sin = ia->ia_addr; - if (outif != NULL) - *outif = ia->ia_ifp; - IFA_UNLOCK(&ia->ia_ifa); IFA_REMREF(&ia->ia_ifa); + ia = NULL; + } + + if (nocell && error == EHOSTUNREACH) { + soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED | + SO_FILT_HINT_IFDENIED)); } - return(0); + + return (error); } /* @@ -882,12 +1182,15 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, * Both address and port must be specified in argument sin. * If don't have a local address for this socket yet, * then pick one. + * + * The caller may override the bound-to-interface setting of the socket + * by specifying the ifscope parameter (e.g. from IP_PKTINFO.) */ int in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, - struct ifnet **outif) + unsigned int ifscope, struct ifnet **outif) { - struct sockaddr_in ifaddr; + struct in_addr laddr; struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam; struct inpcb *pcb; int error; @@ -895,22 +1198,22 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, /* * Call inner routine, to assign local interface address. */ - if ((error = in_pcbladdr(inp, nam, &ifaddr, outif)) != 0) - return(error); + if ((error = in_pcbladdr(inp, nam, &laddr, ifscope, outif)) != 0) + return (error); socket_unlock(inp->inp_socket, 0); pcb = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port, - inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr.sin_addr, + inp->inp_laddr.s_addr ? inp->inp_laddr : laddr, inp->inp_lport, 0, NULL); socket_lock(inp->inp_socket, 0); - /* Check if the socket is still in a valid state. When we unlock this - * embryonic socket, it can get aborted if another thread is closing + /* + * Check if the socket is still in a valid state. When we unlock this + * embryonic socket, it can get aborted if another thread is closing * the listener (radar 7947600). */ - if ((inp->inp_socket->so_flags & SOF_ABORTED) != 0) { - return ECONNREFUSED; - } + if ((inp->inp_socket->so_flags & SOF_ABORTED) != 0) + return (ECONNREFUSED); if (pcb != NULL) { in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0); @@ -918,53 +1221,64 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p, } if (inp->inp_laddr.s_addr == INADDR_ANY) { if (inp->inp_lport == 0) { - error = in_pcbbind(inp, (struct sockaddr *)0, p); + error = in_pcbbind(inp, NULL, p); if (error) - return (error); + return (error); } - if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) { - /*lock inversion issue, mostly with udp multicast packets */ + if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { + /* + * Lock inversion issue, mostly with udp + * multicast packets. + */ socket_unlock(inp->inp_socket, 0); - lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx); + lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); socket_lock(inp->inp_socket, 0); } - inp->inp_laddr = ifaddr.sin_addr; + inp->inp_laddr = laddr; + /* no reference needed */ inp->inp_last_outifp = (outif != NULL) ? *outif : NULL; inp->inp_flags |= INP_INADDR_ANY; - } - else { - if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) { - /*lock inversion issue, mostly with udp multicast packets */ + } else { + if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { + /* + * Lock inversion issue, mostly with udp + * multicast packets. + */ socket_unlock(inp->inp_socket, 0); - lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx); + lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); socket_lock(inp->inp_socket, 0); } } inp->inp_faddr = sin->sin_addr; inp->inp_fport = sin->sin_port; in_pcbrehash(inp); - lck_rw_done(inp->inp_pcbinfo->mtx); + lck_rw_done(inp->inp_pcbinfo->ipi_lock); return (0); } void in_pcbdisconnect(struct inpcb *inp) { + struct socket *so = inp->inp_socket; inp->inp_faddr.s_addr = INADDR_ANY; inp->inp_fport = 0; - if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) { - /*lock inversion issue, mostly with udp multicast packets */ - socket_unlock(inp->inp_socket, 0); - lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx); - socket_lock(inp->inp_socket, 0); + if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { + /* lock inversion issue, mostly with udp multicast packets */ + socket_unlock(so, 0); + lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); + socket_lock(so, 0); } in_pcbrehash(inp); - lck_rw_done(inp->inp_pcbinfo->mtx); - - if (inp->inp_socket->so_state & SS_NOFDREF) + lck_rw_done(inp->inp_pcbinfo->ipi_lock); + /* + * A multipath subflow socket would have its SS_NOFDREF set by default, + * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB; + * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared. + */ + if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) in_pcbdetach(inp); } @@ -973,130 +1287,130 @@ in_pcbdetach(struct inpcb *inp) { struct socket *so = inp->inp_socket; - if (so->so_pcb == 0) { /* we've been called twice */ - panic("in_pcbdetach: inp=%p so=%p proto=%d so_pcb is null!\n", - inp, so, so->so_proto->pr_protocol); + if (so->so_pcb == NULL) { + /* PCB has been disposed */ + panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__, + inp, so, SOCK_PROTO(so)); + /* NOTREACHED */ } #if IPSEC - if (ipsec_bypass == 0) { - ipsec4_delete_pcbpolicy(inp); + if (inp->inp_sp != NULL) { + (void) ipsec4_delete_pcbpolicy(inp); } -#endif /*IPSEC*/ +#endif /* IPSEC */ /* mark socket state as dead */ - if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) - panic("in_pcbdetach so=%p prot=%x couldn't set to STOPUSING\n", so, so->so_proto->pr_protocol); + if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) { + panic("%s: so=%p proto=%d couldn't set to STOPUSING\n", + __func__, so, SOCK_PROTO(so)); + /* NOTREACHED */ + } -#if TEMPDEBUG - if (so->cached_in_sock_layer) - printf("in_pcbdetach for cached socket %x flags=%x\n", so, so->so_flags); - else - printf("in_pcbdetach for allocated socket %x flags=%x\n", so, so->so_flags); -#endif - if ((so->so_flags & SOF_PCBCLEARING) == 0) { - struct rtentry *rt; + if (!(so->so_flags & SOF_PCBCLEARING)) { struct ip_moptions *imo; inp->inp_vflag = 0; - if (inp->inp_options) - (void)m_free(inp->inp_options); - if ((rt = inp->inp_route.ro_rt) != NULL) { - inp->inp_route.ro_rt = NULL; - rtfree(rt); + if (inp->inp_options != NULL) { + (void) m_free(inp->inp_options); + inp->inp_options = NULL; } + ROUTE_RELEASE(&inp->inp_route); imo = inp->inp_moptions; inp->inp_moptions = NULL; if (imo != NULL) IMO_REMREF(imo); sofreelastref(so, 0); inp->inp_state = INPCB_STATE_DEAD; - so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */ + /* makes sure we're not called twice from so_close */ + so->so_flags |= SOF_PCBCLEARING; + + inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST); } } -void -in_pcbdispose(struct inpcb *inp) +void +in_pcbdispose(struct inpcb *inp) { struct socket *so = inp->inp_socket; struct inpcbinfo *ipi = inp->inp_pcbinfo; -#if TEMPDEBUG - if (inp->inp_state != INPCB_STATE_DEAD) { - printf("in_pcbdispose: not dead yet? so=%p\n", so); + if (so != NULL && so->so_usecount != 0) { + panic("%s: so %p [%d,%d] usecount %d lockhistory %s\n", + __func__, so, SOCK_DOM(so), SOCK_TYPE(so), so->so_usecount, + solockhistory_nr(so)); + /* NOTREACHED */ + } else if (inp->inp_wantcnt != WNT_STOPUSING) { + if (so != NULL) { + panic_plain("%s: inp %p invalid wantcnt %d, so %p " + "[%d,%d] usecount %d retaincnt %d state 0x%x " + "flags 0x%x lockhistory %s\n", __func__, inp, + inp->inp_wantcnt, so, SOCK_DOM(so), SOCK_TYPE(so), + so->so_usecount, so->so_retaincnt, so->so_state, + so->so_flags, solockhistory_nr(so)); + /* NOTREACHED */ + } else { + panic("%s: inp %p invalid wantcnt %d no socket\n", + __func__, inp, inp->inp_wantcnt); + /* NOTREACHED */ + } } -#endif - if (so && so->so_usecount != 0) - panic("%s: so %p so_usecount %d so_lockhistory %s\n", - __func__, so, so->so_usecount, - (so != NULL) ? solockhistory_nr(so) : "--"); - lck_rw_assert(ipi->mtx, LCK_RW_ASSERT_EXCLUSIVE); + lck_rw_assert(ipi->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE); inp->inp_gencnt = ++ipi->ipi_gencnt; /* access ipi in in_pcbremlists */ in_pcbremlists(inp); - if (so) { + if (so != NULL) { if (so->so_proto->pr_flags & PR_PCBLOCK) { sofreelastref(so, 0); - if (so->so_rcv.sb_cc || so->so_snd.sb_cc) { -#if TEMPDEBUG - printf("in_pcbdispose sb not cleaned up so=%p rc_cci=%x snd_cc=%x\n", - so, so->so_rcv.sb_cc, so->so_snd.sb_cc); -#endif + if (so->so_rcv.sb_cc > 0 || so->so_snd.sb_cc > 0) { + /* + * selthreadclear() already called + * during sofreelastref() above. + */ sbrelease(&so->so_rcv); sbrelease(&so->so_snd); } - if (so->so_head != NULL) - panic("in_pcbdispose, so=%p head still exist\n", so); - lck_mtx_unlock(&inp->inpcb_mtx); - lck_mtx_destroy(&inp->inpcb_mtx, ipi->mtx_grp); + if (so->so_head != NULL) { + panic("%s: so=%p head still exist\n", + __func__, so); + /* NOTREACHED */ + } + lck_mtx_unlock(&inp->inpcb_mtx); + lck_mtx_destroy(&inp->inpcb_mtx, ipi->ipi_lock_grp); } - so->so_flags |= SOF_PCBCLEARING; /* makes sure we're not called twice from so_close */ - so->so_saved_pcb = (caddr_t) inp; - so->so_pcb = 0; - inp->inp_socket = 0; + /* makes sure we're not called twice from so_close */ + so->so_flags |= SOF_PCBCLEARING; + so->so_saved_pcb = (caddr_t)inp; + so->so_pcb = NULL; + inp->inp_socket = NULL; #if CONFIG_MACF_NET mac_inpcb_label_destroy(inp); -#endif +#endif /* CONFIG_MACF_NET */ /* * In case there a route cached after a detach (possible * in the tcp case), make sure that it is freed before * we deallocate the structure. */ - if (inp->inp_route.ro_rt != NULL) { - rtfree(inp->inp_route.ro_rt); - inp->inp_route.ro_rt = NULL; - } - if (so->cached_in_sock_layer == 0) { + ROUTE_RELEASE(&inp->inp_route); + if (!so->cached_in_sock_layer) { zfree(ipi->ipi_zone, inp); } sodealloc(so); } -#if TEMPDEBUG - else - printf("in_pcbdispose: no socket for inp=%p\n", inp); -#endif } /* - * The calling convention of in_setsockaddr() and in_setpeeraddr() was + * The calling convention of in_getsockaddr() and in_getpeeraddr() was * modified to match the pru_sockaddr() and pru_peeraddr() entry points * in struct pr_usrreqs, so that protocols can just reference then directly - * without the need for a wrapper function. The socket must have a valid - * (i.e., non-nil) PCB, but it should be impossible to get an invalid one - * except through a kernel programming error, so it is acceptable to panic - * (or in this case trap) if the PCB is invalid. (Actually, we don't trap - * because there actually /is/ a programming error somewhere... XXX) - * - * Returns: 0 Success - * ENOBUFS No buffer space available - * ECONNRESET Connection reset + * without the need for a wrapper function. */ int -in_setsockaddr(struct socket *so, struct sockaddr **nam) +in_getsockaddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp; struct sockaddr_in *sin; @@ -1104,77 +1418,118 @@ in_setsockaddr(struct socket *so, struct sockaddr **nam) /* * Do the malloc first in case it blocks. */ - MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK); + MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK); if (sin == NULL) - return ENOBUFS; - bzero(sin, sizeof *sin); + return (ENOBUFS); + bzero(sin, sizeof (*sin)); sin->sin_family = AF_INET; - sin->sin_len = sizeof(*sin); + sin->sin_len = sizeof (*sin); - inp = sotoinpcb(so); - if (!inp) { + if ((inp = sotoinpcb(so)) == NULL) { FREE(sin, M_SONAME); - return ECONNRESET; + return (EINVAL); } sin->sin_port = inp->inp_lport; sin->sin_addr = inp->inp_laddr; *nam = (struct sockaddr *)sin; - return 0; + return (0); } int -in_setpeeraddr(struct socket *so, struct sockaddr **nam) +in_getsockaddr_s(struct socket *so, struct sockaddr_storage *ss) { + struct sockaddr_in *sin = SIN(ss); struct inpcb *inp; - struct sockaddr_in *sin; - /* - * Do the malloc first in case it blocks. - */ - MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK); - if (sin == NULL) - return ENOBUFS; - bzero((caddr_t)sin, sizeof (*sin)); + VERIFY(ss != NULL); + bzero(ss, sizeof (*ss)); + sin->sin_family = AF_INET; - sin->sin_len = sizeof(*sin); + sin->sin_len = sizeof (*sin); - inp = sotoinpcb(so); - if (!inp) { + if ((inp = sotoinpcb(so)) == NULL || + (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + return (inp == NULL ? EINVAL : EPROTOTYPE); + + sin->sin_port = inp->inp_lport; + sin->sin_addr = inp->inp_laddr; + return (0); +} + +int +in_getpeeraddr(struct socket *so, struct sockaddr **nam) +{ + struct inpcb *inp; + struct sockaddr_in *sin; + + /* + * Do the malloc first in case it blocks. + */ + MALLOC(sin, struct sockaddr_in *, sizeof (*sin), M_SONAME, M_WAITOK); + if (sin == NULL) + return (ENOBUFS); + bzero((caddr_t)sin, sizeof (*sin)); + sin->sin_family = AF_INET; + sin->sin_len = sizeof (*sin); + + if ((inp = sotoinpcb(so)) == NULL) { FREE(sin, M_SONAME); - return ECONNRESET; + return (EINVAL); } sin->sin_port = inp->inp_fport; sin->sin_addr = inp->inp_faddr; *nam = (struct sockaddr *)sin; - return 0; + return (0); +} + +int +in_getpeeraddr_s(struct socket *so, struct sockaddr_storage *ss) +{ + struct sockaddr_in *sin = SIN(ss); + struct inpcb *inp; + + VERIFY(ss != NULL); + bzero(ss, sizeof (*ss)); + + sin->sin_family = AF_INET; + sin->sin_len = sizeof (*sin); + + if ((inp = sotoinpcb(so)) == NULL || + (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { + return (inp == NULL ? EINVAL : EPROTOTYPE); + } + + sin->sin_port = inp->inp_fport; + sin->sin_addr = inp->inp_faddr; + return (0); } void in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, - int errno, void (*notify)(struct inpcb *, int)) + int errno, void (*notify)(struct inpcb *, int)) { struct inpcb *inp; - lck_rw_lock_shared(pcbinfo->mtx); + lck_rw_lock_shared(pcbinfo->ipi_lock); - LIST_FOREACH(inp, pcbinfo->listhead, inp_list) { + LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { #if INET6 - if ((inp->inp_vflag & INP_IPV4) == 0) + if (!(inp->inp_vflag & INP_IPV4)) continue; -#endif +#endif /* INET6 */ if (inp->inp_faddr.s_addr != faddr.s_addr || inp->inp_socket == NULL) - continue; - if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) + continue; + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) continue; socket_lock(inp->inp_socket, 1); (*notify)(inp, errno); - (void)in_pcb_checkstate(inp, WNT_RELEASE, 1); + (void) in_pcb_checkstate(inp, WNT_RELEASE, 1); socket_unlock(inp->inp_socket, 1); } - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); } /* @@ -1186,16 +1541,17 @@ in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, void in_losing(struct inpcb *inp) { + boolean_t release = FALSE; struct rtentry *rt; struct rt_addrinfo info; if ((rt = inp->inp_route.ro_rt) != NULL) { - struct in_ifaddr *ia; + struct in_ifaddr *ia = NULL; - bzero((caddr_t)&info, sizeof(info)); + bzero((caddr_t)&info, sizeof (info)); RT_LOCK(rt); info.rti_info[RTAX_DST] = - (struct sockaddr *)&inp->inp_route.ro_dst; + (struct sockaddr *)&inp->inp_route.ro_dst; info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); @@ -1208,22 +1564,24 @@ in_losing(struct inpcb *inp) rt->rt_flags |= RTF_CONDEMNED; RT_UNLOCK(rt); (void) rtrequest(RTM_DELETE, rt_key(rt), - rt->rt_gateway, rt_mask(rt), rt->rt_flags, - (struct rtentry **)0); + rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); } else { RT_UNLOCK(rt); } /* if the address is gone keep the old route in the pcb */ - if ((ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) { - inp->inp_route.ro_rt = NULL; - rtfree(rt); - IFA_REMREF(&ia->ia_ifa); + if (inp->inp_laddr.s_addr != INADDR_ANY && + (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) { + /* + * Address is around; ditch the route. A new route + * can be allocated the next time output is attempted. + */ + release = TRUE; } - /* - * A new route can be allocated - * the next time output is attempted. - */ + if (ia != NULL) + IFA_REMREF(&ia->ia_ifa); } + if (rt == NULL || release) + ROUTE_RELEASE(&inp->inp_route); } /* @@ -1231,24 +1589,29 @@ in_losing(struct inpcb *inp) * and allocate a (hopefully) better one. */ void -in_rtchange(struct inpcb *inp, __unused int errno) +in_rtchange(struct inpcb *inp, int errno) { +#pragma unused(errno) + boolean_t release = FALSE; struct rtentry *rt; if ((rt = inp->inp_route.ro_rt) != NULL) { - struct in_ifaddr *ia; + struct in_ifaddr *ia = NULL; - if ((ia = ifa_foraddr(inp->inp_laddr.s_addr)) == NULL) { - return; /* we can't remove the route now. not sure if still ok to use src */ + /* if address is gone, keep the old route */ + if (inp->inp_laddr.s_addr != INADDR_ANY && + (ia = ifa_foraddr(inp->inp_laddr.s_addr)) != NULL) { + /* + * Address is around; ditch the route. A new route + * can be allocated the next time output is attempted. + */ + release = TRUE; } - IFA_REMREF(&ia->ia_ifa); - rtfree(rt); - inp->inp_route.ro_rt = NULL; - /* - * A new route can be allocated the next time - * output is attempted. - */ + if (ia != NULL) + IFA_REMREF(&ia->ia_ifa); } + if (rt == NULL || release) + ROUTE_RELEASE(&inp->inp_route); } /* @@ -1256,13 +1619,13 @@ in_rtchange(struct inpcb *inp, __unused int errno) */ struct inpcb * in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, - unsigned int lport_arg, int wild_okay) + unsigned int lport_arg, int wild_okay) { struct inpcb *inp; int matchwild = 3, wildcard; u_short lport = lport_arg; - KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0,0,0,0,0); + KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_START, 0, 0, 0, 0, 0); if (!wild_okay) { struct inpcbhead *head; @@ -1270,12 +1633,13 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, * Look for an unconnected (wildcard foreign addr) PCB that * matches the local address and port we're looking for. */ - head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; + head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, + pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { #if INET6 - if ((inp->inp_vflag & INP_IPV4) == 0) + if (!(inp->inp_vflag & INP_IPV4)) continue; -#endif +#endif /* INET6 */ if (inp->inp_faddr.s_addr == INADDR_ANY && inp->inp_laddr.s_addr == laddr.s_addr && inp->inp_lport == lport) { @@ -1288,7 +1652,7 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, /* * Not found. */ - KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0,0,0,0,0); + KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, 0, 0, 0, 0, 0); return (NULL); } else { struct inpcbporthead *porthash; @@ -1300,8 +1664,8 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, * First see if this local port is in use by looking on the * port hash list. */ - porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, - pcbinfo->porthashmask)]; + porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, + pcbinfo->ipi_porthashmask)]; LIST_FOREACH(phd, porthash, phd_hash) { if (phd->phd_port == lport) break; @@ -1314,15 +1678,16 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { wildcard = 0; #if INET6 - if ((inp->inp_vflag & INP_IPV4) == 0) + if (!(inp->inp_vflag & INP_IPV4)) continue; -#endif +#endif /* INET6 */ if (inp->inp_faddr.s_addr != INADDR_ANY) wildcard++; if (inp->inp_laddr.s_addr != INADDR_ANY) { if (laddr.s_addr == INADDR_ANY) wildcard++; - else if (inp->inp_laddr.s_addr != laddr.s_addr) + else if (inp->inp_laddr.s_addr != + laddr.s_addr) continue; } else { if (laddr.s_addr != INADDR_ANY) @@ -1337,7 +1702,8 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, } } } - KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match,0,0,0,0); + KERNEL_DEBUG(DBG_FNC_PCB_LOOKUP | DBG_FUNC_END, match, + 0, 0, 0, 0); return (match); } } @@ -1346,21 +1712,18 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, * Check if PCB exists in hash list. */ int -in_pcblookup_hash_exists( - struct inpcbinfo *pcbinfo, - struct in_addr faddr, - u_int fport_arg, - struct in_addr laddr, - u_int lport_arg, - int wildcard, - uid_t *uid, - gid_t *gid, - struct ifnet *ifp) +in_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in_addr faddr, + u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard, + uid_t *uid, gid_t *gid, struct ifnet *ifp) { struct inpcbhead *head; struct inpcb *inp; u_short fport = fport_arg, lport = lport_arg; - int found; + int found = 0; + struct inpcb *local_wild = NULL; +#if INET6 + struct inpcb *local_wild_mapped = NULL; +#endif /* INET6 */ *uid = UID_MAX; *gid = GID_MAX; @@ -1369,21 +1732,23 @@ in_pcblookup_hash_exists( * We may have found the pcb in the last lookup - check this first. */ - lck_rw_lock_shared(pcbinfo->mtx); + lck_rw_lock_shared(pcbinfo->ipi_lock); /* * First look for an exact match. */ - head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, - pcbinfo->hashmask)]; + head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, + pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { #if INET6 - if ((inp->inp_vflag & INP_IPV4) == 0) + if (!(inp->inp_vflag & INP_IPV4)) continue; -#endif - if (ip_restrictrecvif && ifp != NULL && - (ifp->if_eflags & IFEF_RESTRICTED_RECV) && - !(inp->inp_flags & INP_RECV_ANYIF)) +#endif /* INET6 */ + if (inp_restricted(inp, ifp)) + continue; + + if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && + (inp->inp_flags & INP_NO_IFT_CELLULAR)) continue; if (inp->inp_faddr.s_addr == faddr.s_addr && @@ -1399,122 +1764,118 @@ in_pcblookup_hash_exists( *gid = kauth_cred_getgid( inp->inp_socket->so_cred); } - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); return (found); } } - if (wildcard) { - struct inpcb *local_wild = NULL; -#if INET6 - struct inpcb *local_wild_mapped = NULL; -#endif - head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, - pcbinfo->hashmask)]; - LIST_FOREACH(inp, head, inp_hash) { -#if INET6 - if ((inp->inp_vflag & INP_IPV4) == 0) - continue; -#endif - if (ip_restrictrecvif && ifp != NULL && - (ifp->if_eflags & IFEF_RESTRICTED_RECV) && - !(inp->inp_flags & INP_RECV_ANYIF)) - continue; + if (!wildcard) { + /* + * Not found. + */ + lck_rw_done(pcbinfo->ipi_lock); + return (0); + } - if (inp->inp_faddr.s_addr == INADDR_ANY && - inp->inp_lport == lport) { - if (inp->inp_laddr.s_addr == laddr.s_addr) { - if ((found = (inp->inp_socket != NULL))) { - *uid = kauth_cred_getuid( - inp->inp_socket->so_cred); - *gid = kauth_cred_getgid( - inp->inp_socket->so_cred); - } - lck_rw_done(pcbinfo->mtx); - return (found); - } - else if (inp->inp_laddr.s_addr == INADDR_ANY) { + head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, + pcbinfo->ipi_hashmask)]; + LIST_FOREACH(inp, head, inp_hash) { #if INET6 - if (inp->inp_socket && - INP_CHECK_SOCKAF(inp->inp_socket, - AF_INET6)) - local_wild_mapped = inp; - else + if (!(inp->inp_vflag & INP_IPV4)) + continue; #endif /* INET6 */ - local_wild = inp; - } - } - } - if (local_wild == NULL) { -#if INET6 - if (local_wild_mapped != NULL) { - if ((found = (local_wild_mapped->inp_socket != NULL))) { + if (inp_restricted(inp, ifp)) + continue; + + if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && + (inp->inp_flags & INP_NO_IFT_CELLULAR)) + continue; + + if (inp->inp_faddr.s_addr == INADDR_ANY && + inp->inp_lport == lport) { + if (inp->inp_laddr.s_addr == laddr.s_addr) { + if ((found = (inp->inp_socket != NULL))) { *uid = kauth_cred_getuid( - local_wild_mapped->inp_socket->so_cred); + inp->inp_socket->so_cred); *gid = kauth_cred_getgid( - local_wild_mapped->inp_socket->so_cred); + inp->inp_socket->so_cred); } - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); return (found); - } + } else if (inp->inp_laddr.s_addr == INADDR_ANY) { +#if INET6 + if (inp->inp_socket && + SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) + local_wild_mapped = inp; + else #endif /* INET6 */ - lck_rw_done(pcbinfo->mtx); - return (0); + local_wild = inp; + } } - if (local_wild != NULL) { - if ((found = (local_wild->inp_socket != NULL))) { + } + if (local_wild == NULL) { +#if INET6 + if (local_wild_mapped != NULL) { + if ((found = (local_wild_mapped->inp_socket != NULL))) { *uid = kauth_cred_getuid( - local_wild->inp_socket->so_cred); + local_wild_mapped->inp_socket->so_cred); *gid = kauth_cred_getgid( - local_wild->inp_socket->so_cred); + local_wild_mapped->inp_socket->so_cred); } - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); return (found); } +#endif /* INET6 */ + lck_rw_done(pcbinfo->ipi_lock); + return (0); } - - /* - * Not found. - */ - lck_rw_done(pcbinfo->mtx); - return (0); + if ((found = (local_wild->inp_socket != NULL))) { + *uid = kauth_cred_getuid( + local_wild->inp_socket->so_cred); + *gid = kauth_cred_getgid( + local_wild->inp_socket->so_cred); + } + lck_rw_done(pcbinfo->ipi_lock); + return (found); } /* * Lookup PCB in hash list. */ struct inpcb * -in_pcblookup_hash( - struct inpcbinfo *pcbinfo, - struct in_addr faddr, - u_int fport_arg, - struct in_addr laddr, - u_int lport_arg, - int wildcard, - struct ifnet *ifp) +in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, + u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard, + struct ifnet *ifp) { struct inpcbhead *head; struct inpcb *inp; u_short fport = fport_arg, lport = lport_arg; + struct inpcb *local_wild = NULL; +#if INET6 + struct inpcb *local_wild_mapped = NULL; +#endif /* INET6 */ /* * We may have found the pcb in the last lookup - check this first. */ - lck_rw_lock_shared(pcbinfo->mtx); + lck_rw_lock_shared(pcbinfo->ipi_lock); /* * First look for an exact match. */ - head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)]; + head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, + pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { #if INET6 - if ((inp->inp_vflag & INP_IPV4) == 0) + if (!(inp->inp_vflag & INP_IPV4)) continue; -#endif - if (ip_restrictrecvif && ifp != NULL && - (ifp->if_eflags & IFEF_RESTRICTED_RECV) && - !(inp->inp_flags & INP_RECV_ANYIF)) +#endif /* INET6 */ + if (inp_restricted(inp, ifp)) + continue; + + if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && + (inp->inp_flags & INP_NO_IFT_CELLULAR)) continue; if (inp->inp_faddr.s_addr == faddr.s_addr && @@ -1524,86 +1885,87 @@ in_pcblookup_hash( /* * Found. */ - if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) { - lck_rw_done(pcbinfo->mtx); + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != + WNT_STOPUSING) { + lck_rw_done(pcbinfo->ipi_lock); return (inp); - } - else { /* it's there but dead, say it isn't found */ - lck_rw_done(pcbinfo->mtx); + } else { + /* it's there but dead, say it isn't found */ + lck_rw_done(pcbinfo->ipi_lock); return (NULL); } } } - if (wildcard) { - struct inpcb *local_wild = NULL; -#if INET6 - struct inpcb *local_wild_mapped = NULL; -#endif - head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; - LIST_FOREACH(inp, head, inp_hash) { + if (!wildcard) { + /* + * Not found. + */ + lck_rw_done(pcbinfo->ipi_lock); + return (NULL); + } + + head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, + pcbinfo->ipi_hashmask)]; + LIST_FOREACH(inp, head, inp_hash) { #if INET6 - if ((inp->inp_vflag & INP_IPV4) == 0) - continue; -#endif - if (ip_restrictrecvif && ifp != NULL && - (ifp->if_eflags & IFEF_RESTRICTED_RECV) && - !(inp->inp_flags & INP_RECV_ANYIF)) - continue; + if (!(inp->inp_vflag & INP_IPV4)) + continue; +#endif /* INET6 */ + if (inp_restricted(inp, ifp)) + continue; - if (inp->inp_faddr.s_addr == INADDR_ANY && - inp->inp_lport == lport) { - if (inp->inp_laddr.s_addr == laddr.s_addr) { - if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) { - lck_rw_done(pcbinfo->mtx); - return (inp); - } - else { /* it's there but dead, say it isn't found */ - lck_rw_done(pcbinfo->mtx); - return (NULL); - } + if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && + (inp->inp_flags & INP_NO_IFT_CELLULAR)) + continue; + + if (inp->inp_faddr.s_addr == INADDR_ANY && + inp->inp_lport == lport) { + if (inp->inp_laddr.s_addr == laddr.s_addr) { + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != + WNT_STOPUSING) { + lck_rw_done(pcbinfo->ipi_lock); + return (inp); + } else { + /* it's dead; say it isn't found */ + lck_rw_done(pcbinfo->ipi_lock); + return (NULL); } - else if (inp->inp_laddr.s_addr == INADDR_ANY) { + } else if (inp->inp_laddr.s_addr == INADDR_ANY) { #if INET6 - if (INP_CHECK_SOCKAF(inp->inp_socket, - AF_INET6)) - local_wild_mapped = inp; - else + if (SOCK_CHECK_DOM(inp->inp_socket, PF_INET6)) + local_wild_mapped = inp; + else #endif /* INET6 */ local_wild = inp; - } } } - if (local_wild == NULL) { + } + if (local_wild == NULL) { #if INET6 - if (local_wild_mapped != NULL) { - if (in_pcb_checkstate(local_wild_mapped, WNT_ACQUIRE, 0) != WNT_STOPUSING) { - lck_rw_done(pcbinfo->mtx); - return (local_wild_mapped); - } - else { /* it's there but dead, say it isn't found */ - lck_rw_done(pcbinfo->mtx); - return (NULL); - } + if (local_wild_mapped != NULL) { + if (in_pcb_checkstate(local_wild_mapped, + WNT_ACQUIRE, 0) != WNT_STOPUSING) { + lck_rw_done(pcbinfo->ipi_lock); + return (local_wild_mapped); + } else { + /* it's dead; say it isn't found */ + lck_rw_done(pcbinfo->ipi_lock); + return (NULL); } -#endif /* INET6 */ - lck_rw_done(pcbinfo->mtx); - return (NULL); - } - if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) { - lck_rw_done(pcbinfo->mtx); - return (local_wild); - } - else { /* it's there but dead, say it isn't found */ - lck_rw_done(pcbinfo->mtx); - return (NULL); } +#endif /* INET6 */ + lck_rw_done(pcbinfo->ipi_lock); + return (NULL); + } + if (in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) { + lck_rw_done(pcbinfo->ipi_lock); + return (local_wild); } - /* - * Not found. + * It's either not found or is already dead. */ - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); return (NULL); } @@ -1619,33 +1981,40 @@ in_pcbinshash(struct inpcb *inp, int locked) struct inpcbport *phd; u_int32_t hashkey_faddr; - if (!locked) { - if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) { - /*lock inversion issue, mostly with udp multicast packets */ - socket_unlock(inp->inp_socket, 0); - lck_rw_lock_exclusive(pcbinfo->mtx); - socket_lock(inp->inp_socket, 0); + if (!locked) { + if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) { + /* + * Lock inversion issue, mostly with udp + * multicast packets + */ + socket_unlock(inp->inp_socket, 0); + lck_rw_lock_exclusive(pcbinfo->ipi_lock); + socket_lock(inp->inp_socket, 0); if (inp->inp_state == INPCB_STATE_DEAD) { - /* The socket got dropped when it was unlocked */ - lck_rw_done(pcbinfo->mtx); - return(ECONNABORTED); + /* + * The socket got dropped when + * it was unlocked + */ + lck_rw_done(pcbinfo->ipi_lock); + return (ECONNABORTED); } - } - } + } + } #if INET6 if (inp->inp_vflag & INP_IPV6) hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; else #endif /* INET6 */ - hashkey_faddr = inp->inp_faddr.s_addr; + hashkey_faddr = inp->inp_faddr.s_addr; - inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, pcbinfo->hashmask); + inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, + inp->inp_fport, pcbinfo->ipi_hashmask); - pcbhash = &pcbinfo->hashbase[inp->hash_element]; + pcbhash = &pcbinfo->ipi_hashbase[inp->inp_hash_element]; - pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport, - pcbinfo->porthashmask)]; + pcbporthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(inp->inp_lport, + pcbinfo->ipi_porthashmask)]; /* * Go through port list and look for a head for this lport. @@ -1661,10 +2030,11 @@ in_pcbinshash(struct inpcb *inp, int locked) * If none exists, malloc one and tack it on. */ if (phd == NULL) { - MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_WAITOK); + MALLOC(phd, struct inpcbport *, sizeof (struct inpcbport), + M_PCB, M_WAITOK); if (phd == NULL) { if (!locked) - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); return (ENOBUFS); /* XXX */ } phd->phd_port = inp->inp_lport; @@ -1675,7 +2045,7 @@ in_pcbinshash(struct inpcb *inp, int locked) LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); LIST_INSERT_HEAD(pcbhash, inp, inp_hash); if (!locked) - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); return (0); } @@ -1696,10 +2066,11 @@ in_pcbrehash(struct inpcb *inp) hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; else #endif /* INET6 */ - hashkey_faddr = inp->inp_faddr.s_addr; - inp->hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, - inp->inp_fport, inp->inp_pcbinfo->hashmask); - head = &inp->inp_pcbinfo->hashbase[inp->hash_element]; + hashkey_faddr = inp->inp_faddr.s_addr; + + inp->inp_hash_element = INP_PCBHASH(hashkey_faddr, inp->inp_lport, + inp->inp_fport, inp->inp_pcbinfo->ipi_hashmask); + head = &inp->inp_pcbinfo->ipi_hashbase[inp->inp_hash_element]; LIST_REMOVE(inp, inp_hash); LIST_INSERT_HEAD(head, inp, inp_hash); @@ -1724,108 +2095,133 @@ in_pcbremlists(struct inpcb *inp) FREE(phd, M_PCB); } } - LIST_REMOVE(inp, inp_list); + + if (inp->inp_flags2 & INP2_TIMEWAIT) { + /* Remove from time-wait queue */ + tcp_remove_from_time_wait(inp); + inp->inp_flags2 &= ~INP2_TIMEWAIT; + VERIFY(inp->inp_pcbinfo->ipi_twcount != 0); + inp->inp_pcbinfo->ipi_twcount--; + } else { + /* Remove from global inp list if it is not time-wait */ + LIST_REMOVE(inp, inp_list); + } if (inp->inp_flags2 & INP2_IN_FCTREE) { - inp_fc_getinp(inp->inp_flowhash, - (INPFC_SOLOCKED|INPFC_REMOVE)); + inp_fc_getinp(inp->inp_flowhash, (INPFC_SOLOCKED|INPFC_REMOVE)); VERIFY(!(inp->inp_flags2 & INP2_IN_FCTREE)); } + inp->inp_pcbinfo->ipi_count--; } -/* Mechanism used to defer the memory release of PCBs - * The pcb list will contain the pcb until the ripper can clean it up if - * the following conditions are met: 1) state "DEAD", 2) wantcnt is STOPUSING - * 3) usecount is null +/* + * Mechanism used to defer the memory release of PCBs + * The pcb list will contain the pcb until the reaper can clean it up if + * the following conditions are met: + * 1) state "DEAD", + * 2) wantcnt is STOPUSING + * 3) usecount is 0 * This function will be called to either mark the pcb as -*/ + */ int in_pcb_checkstate(struct inpcb *pcb, int mode, int locked) { - - volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt; + volatile UInt32 *wantcnt = (volatile UInt32 *)&pcb->inp_wantcnt; UInt32 origwant; UInt32 newwant; switch (mode) { + case WNT_STOPUSING: + /* + * Try to mark the pcb as ready for recycling. CAS with + * STOPUSING, if success we're good, if it's in use, will + * be marked later + */ + if (locked == 0) + socket_lock(pcb->inp_socket, 1); + pcb->inp_state = INPCB_STATE_DEAD; - case WNT_STOPUSING: /* try to mark the pcb as ready for recycling */ - - /* compareswap with STOPUSING, if success we're good, if it's in use, will be marked later */ +stopusing: + if (pcb->inp_socket->so_usecount < 0) { + panic("%s: pcb=%p so=%p usecount is negative\n", + __func__, pcb, pcb->inp_socket); + /* NOTREACHED */ + } + if (locked == 0) + socket_unlock(pcb->inp_socket, 1); - if (locked == 0) - socket_lock(pcb->inp_socket, 1); - pcb->inp_state = INPCB_STATE_DEAD; + inpcb_gc_sched(pcb->inp_pcbinfo, INPCB_TIMER_FAST); -stopusing: - if (pcb->inp_socket->so_usecount < 0) - panic("in_pcb_checkstate STOP pcb=%p so=%p usecount is negative\n", pcb, pcb->inp_socket); - if (locked == 0) - socket_unlock(pcb->inp_socket, 1); + origwant = *wantcnt; + if ((UInt16) origwant == 0xffff) /* should stop using */ + return (WNT_STOPUSING); + newwant = 0xffff; + if ((UInt16) origwant == 0) { + /* try to mark it as unsuable now */ + OSCompareAndSwap(origwant, newwant, wantcnt); + } + return (WNT_STOPUSING); + break; + case WNT_ACQUIRE: + /* + * Try to increase reference to pcb. If WNT_STOPUSING + * should bail out. If socket state DEAD, try to set count + * to STOPUSING, return failed otherwise increase cnt. + */ + do { origwant = *wantcnt; - if ((UInt16) origwant == 0xffff ) /* should stop using */ + if ((UInt16) origwant == 0xffff) { + /* should stop using */ return (WNT_STOPUSING); - newwant = 0xffff; - if ((UInt16) origwant == 0) {/* try to mark it as unsuable now */ - OSCompareAndSwap(origwant, newwant, wantcnt) ; } - return (WNT_STOPUSING); - break; + newwant = origwant + 1; + } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); + return (WNT_ACQUIRE); + break; - case WNT_ACQUIRE: /* try to increase reference to pcb */ - /* if WNT_STOPUSING should bail out */ - /* - * if socket state DEAD, try to set count to STOPUSING, return failed - * otherwise increase cnt - */ - do { - origwant = *wantcnt; - if ((UInt16) origwant == 0xffff ) {/* should stop using */ -// printf("in_pcb_checkstate: ACQ PCB was STOPUSING while release. odd pcb=%p\n", pcb); - return (WNT_STOPUSING); - } - newwant = origwant + 1; - } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); - return (WNT_ACQUIRE); - break; - - case WNT_RELEASE: /* release reference. if result is null and pcb state is DEAD, - set wanted bit to STOPUSING - */ - - if (locked == 0) - socket_lock(pcb->inp_socket, 1); + case WNT_RELEASE: + /* + * Release reference. If result is null and pcb state + * is DEAD, set wanted bit to STOPUSING + */ + if (locked == 0) + socket_lock(pcb->inp_socket, 1); - do { - origwant = *wantcnt; - if ((UInt16) origwant == 0x0 ) - panic("in_pcb_checkstate pcb=%p release with zero count", pcb); - if ((UInt16) origwant == 0xffff ) {/* should stop using */ -#if TEMPDEBUG - printf("in_pcb_checkstate: REL PCB was STOPUSING while release. odd pcb=%p\n", pcb); -#endif - if (locked == 0) - socket_unlock(pcb->inp_socket, 1); - return (WNT_STOPUSING); - } - newwant = origwant - 1; - } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); - - if (pcb->inp_state == INPCB_STATE_DEAD) - goto stopusing; - if (pcb->inp_socket->so_usecount < 0) - panic("in_pcb_checkstate RELEASE pcb=%p so=%p usecount is negative\n", pcb, pcb->inp_socket); - - if (locked == 0) - socket_unlock(pcb->inp_socket, 1); - return (WNT_RELEASE); - break; + do { + origwant = *wantcnt; + if ((UInt16) origwant == 0x0) { + panic("%s: pcb=%p release with zero count", + __func__, pcb); + /* NOTREACHED */ + } + if ((UInt16) origwant == 0xffff) { + /* should stop using */ + if (locked == 0) + socket_unlock(pcb->inp_socket, 1); + return (WNT_STOPUSING); + } + newwant = origwant - 1; + } while (!OSCompareAndSwap(origwant, newwant, wantcnt)); + + if (pcb->inp_state == INPCB_STATE_DEAD) + goto stopusing; + if (pcb->inp_socket->so_usecount < 0) { + panic("%s: RELEASE pcb=%p so=%p usecount is negative\n", + __func__, pcb, pcb->inp_socket); + /* NOTREACHED */ + } - default: + if (locked == 0) + socket_unlock(pcb->inp_socket, 1); + return (WNT_RELEASE); + break; - panic("in_pcb_checkstate: so=%p not a valid state =%x\n", pcb->inp_socket, mode); + default: + panic("%s: so=%p not a valid state =%x\n", __func__, + pcb->inp_socket, mode); + /* NOTREACHED */ } /* NOTREACHED */ @@ -1838,36 +2234,32 @@ stopusing: * not change. We intentionally avoid copying pointers. */ void -inpcb_to_compat( - struct inpcb *inp, - struct inpcb_compat *inp_compat) +inpcb_to_compat(struct inpcb *inp, struct inpcb_compat *inp_compat) { - bzero(inp_compat, sizeof(*inp_compat)); + bzero(inp_compat, sizeof (*inp_compat)); inp_compat->inp_fport = inp->inp_fport; inp_compat->inp_lport = inp->inp_lport; inp_compat->nat_owner = 0; - inp_compat->nat_cookie = inp->nat_cookie; + inp_compat->nat_cookie = 0; inp_compat->inp_gencnt = inp->inp_gencnt; inp_compat->inp_flags = inp->inp_flags; inp_compat->inp_flow = inp->inp_flow; inp_compat->inp_vflag = inp->inp_vflag; inp_compat->inp_ip_ttl = inp->inp_ip_ttl; inp_compat->inp_ip_p = inp->inp_ip_p; - inp_compat->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign; - inp_compat->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local; + inp_compat->inp_dependfaddr.inp6_foreign = + inp->inp_dependfaddr.inp6_foreign; + inp_compat->inp_dependladdr.inp6_local = + inp->inp_dependladdr.inp6_local; inp_compat->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; - inp_compat->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim; + inp_compat->inp_depend6.inp6_hlim = 0; inp_compat->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; - inp_compat->inp_depend6.inp6_ifindex = inp->inp_depend6.inp6_ifindex; + inp_compat->inp_depend6.inp6_ifindex = 0; inp_compat->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; } -#if !CONFIG_EMBEDDED - void -inpcb_to_xinpcb64( - struct inpcb *inp, - struct xinpcb64 *xinp) +inpcb_to_xinpcb64(struct inpcb *inp, struct xinpcb64 *xinp) { xinp->inp_fport = inp->inp_fport; xinp->inp_lport = inp->inp_lport; @@ -1880,15 +2272,12 @@ inpcb_to_xinpcb64( xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign; xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local; xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; - xinp->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim; + xinp->inp_depend6.inp6_hlim = 0; xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; - xinp->inp_depend6.inp6_ifindex = inp->inp_depend6.inp6_ifindex; + xinp->inp_depend6.inp6_ifindex = 0; xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; } -#endif /* !CONFIG_EMBEDDED */ - - /* * The following routines implement this scheme: * @@ -1923,15 +2312,13 @@ inp_route_copyout(struct inpcb *inp, struct route *dst) lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED); /* - * If the route in the PCB is not for IPv4, blow it away; + * If the route in the PCB is stale or not for IPv4, blow it away; * this is possible in the case of IPv4-mapped address case. */ - if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) { - rtfree(src->ro_rt); - src->ro_rt = NULL; - } + if (ROUTE_UNUSABLE(src) || rt_key(src->ro_rt)->sa_family != AF_INET) + ROUTE_RELEASE(src); - route_copyout(dst, src, sizeof(*dst)); + route_copyout(dst, src, sizeof (*dst)); } void @@ -1945,14 +2332,14 @@ inp_route_copyin(struct inpcb *inp, struct route *src) if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) panic("%s: wrong or corrupted route: %p", __func__, src); - route_copyin(src, dst, sizeof(*src)); + route_copyin(src, dst, sizeof (*src)); } /* * Handler for setting IP_FORCE_OUT_IFP/IP_BOUND_IF/IPV6_BOUND_IF socket option. */ int -inp_bindif(struct inpcb *inp, unsigned int ifscope) +inp_bindif(struct inpcb *inp, unsigned int ifscope, struct ifnet **pifp) { struct ifnet *ifp = NULL; @@ -1982,35 +2369,68 @@ inp_bindif(struct inpcb *inp, unsigned int ifscope) inp->inp_flags |= INP_BOUND_IF; /* Blow away any cached route in the PCB */ - if (inp->inp_route.ro_rt != NULL) { - rtfree(inp->inp_route.ro_rt); - inp->inp_route.ro_rt = NULL; - } + ROUTE_RELEASE(&inp->inp_route); + + if (pifp != NULL) + *pifp = ifp; return (0); } /* - * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option. + * Handler for setting IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option, + * as well as for setting PROC_UUID_NO_CELLULAR policy. */ -int -inp_nocellular(struct inpcb *inp, unsigned int val) +void +inp_set_nocellular(struct inpcb *inp) { - if (val) { - inp->inp_flags |= INP_NO_IFT_CELLULAR; - } else if (inp->inp_flags & INP_NO_IFT_CELLULAR) { - /* once set, it cannot be unset */ - return (EINVAL); - } + inp->inp_flags |= INP_NO_IFT_CELLULAR; /* Blow away any cached route in the PCB */ - if (inp->inp_route.ro_rt != NULL) { - rtfree(inp->inp_route.ro_rt); - inp->inp_route.ro_rt = NULL; + ROUTE_RELEASE(&inp->inp_route); +} + +/* + * Handler for clearing IP_NO_IFT_CELLULAR/IPV6_NO_IFT_CELLULAR socket option, + * as well as for clearing PROC_UUID_NO_CELLULAR policy. + */ +void +inp_clear_nocellular(struct inpcb *inp) +{ + struct socket *so = inp->inp_socket; + + /* + * SO_RESTRICT_DENY_CELLULAR socket restriction issued on the socket + * has a higher precendence than INP_NO_IFT_CELLULAR. Clear the flag + * if and only if the socket is unrestricted. + */ + if (so != NULL && !(so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) { + inp->inp_flags &= ~INP_NO_IFT_CELLULAR; + + /* Blow away any cached route in the PCB */ + ROUTE_RELEASE(&inp->inp_route); } +} - return (0); +#if FLOW_DIVERT +/* + * Called when PROC_UUID_FLOW_DIVERT is set. + */ +void +inp_set_flow_divert(struct inpcb *inp) +{ + inp->inp_flags2 |= INP2_WANT_FLOW_DIVERT; +} + +/* + * Called when PROC_UUID_FLOW_DIVERT is cleared. + */ +void +inp_clear_flow_divert(struct inpcb *inp) +{ + inp->inp_flags2 &= ~INP2_WANT_FLOW_DIVERT; } +#endif /* FLOW_DIVERT */ /* * Calculate flow hash for an inp, used by an interface to identify a @@ -2050,14 +2470,13 @@ try_again: inp->inp_flowhash = flowhash; /* Insert the inp into inp_fc_tree */ - - lck_mtx_lock(&inp_fc_lck); + lck_mtx_lock_spin(&inp_fc_lck); tmp_inp = RB_FIND(inp_fc_tree, &inp_fc_tree, inp); if (tmp_inp != NULL) { /* * There is a different inp with the same flowhash. * There can be a collision on flow hash but the - * probability is low. Let's recompute the + * probability is low. Let's recompute the * flowhash. */ lck_mtx_unlock(&inp_fc_lck); @@ -2065,11 +2484,24 @@ try_again: inp_hash_seed = RandomULong(); goto try_again; } + RB_INSERT(inp_fc_tree, &inp_fc_tree, inp); inp->inp_flags2 |= INP2_IN_FCTREE; lck_mtx_unlock(&inp_fc_lck); - return flowhash; + return (flowhash); +} + +void +inp_flowadv(uint32_t flowhash) +{ + struct inpcb *inp; + + inp = inp_fc_getinp(flowhash, 0); + + if (inp == NULL) + return; + inp_fc_feedback(inp); } /* @@ -2079,10 +2511,10 @@ static inline int infc_cmp(const struct inpcb *inp1, const struct inpcb *inp2) { return (memcmp(&(inp1->inp_flowhash), &(inp2->inp_flowhash), - sizeof(inp1->inp_flowhash))); + sizeof(inp1->inp_flowhash))); } -struct inpcb * +static struct inpcb * inp_fc_getinp(u_int32_t flowhash, u_int32_t flags) { struct inpcb *inp = NULL; @@ -2105,6 +2537,7 @@ inp_fc_getinp(u_int32_t flowhash, u_int32_t flags) inp->inp_flags2 &= ~INP2_IN_FCTREE; return (NULL); } + if (in_pcb_checkstate(inp, WNT_ACQUIRE, locked) == WNT_STOPUSING) inp = NULL; lck_mtx_unlock(&inp_fc_lck); @@ -2112,13 +2545,13 @@ inp_fc_getinp(u_int32_t flowhash, u_int32_t flags) return (inp); } -void +static void inp_fc_feedback(struct inpcb *inp) { struct socket *so = inp->inp_socket; /* we already hold a want_cnt on this inp, socket can't be null */ - VERIFY (so != NULL); + VERIFY(so != NULL); socket_lock(so, 1); if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { @@ -2137,7 +2570,7 @@ inp_fc_feedback(struct inpcb *inp) } inp_reset_fc_state(inp); - if (so->so_proto->pr_type == SOCK_STREAM) + if (SOCK_TYPE(so) == SOCK_STREAM) inp_fc_unthrottle_tcp(inp); socket_unlock(so, 1); @@ -2170,20 +2603,19 @@ inp_set_fc_state(struct inpcb *inp, int advcode) { struct inpcb *tmp_inp = NULL; /* - * If there was a feedback from the interface when + * If there was a feedback from the interface when * send operation was in progress, we should ignore * this flow advisory to avoid a race between setting * flow controlled state and receiving feedback from * the interface */ if (inp->inp_flags & INP_FC_FEEDBACK) - return(0); + return (0); inp->inp_flags &= ~(INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED); - if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash, INPFC_SOLOCKED)) - != NULL) { - if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) - == WNT_STOPUSING) + if ((tmp_inp = inp_fc_getinp(inp->inp_flowhash, + INPFC_SOLOCKED)) != NULL) { + if (in_pcb_checkstate(tmp_inp, WNT_RELEASE, 1) == WNT_STOPUSING) return (0); VERIFY(tmp_inp == inp); switch (advcode) { @@ -2201,7 +2633,7 @@ inp_set_fc_state(struct inpcb *inp, int advcode) } return (1); } - return(0); + return (0); } /* @@ -2211,7 +2643,7 @@ int inp_flush(struct inpcb *inp, int optval) { u_int32_t flowhash = inp->inp_flowhash; - struct rtentry *rt; + struct ifnet *rtifp, *oifp; /* Either all classes or one of the valid ones */ if (optval != SO_TC_ALL && !SO_VALID_TC(optval)) @@ -2221,11 +2653,15 @@ inp_flush(struct inpcb *inp, int optval) if (flowhash == 0) return (0); - /* We need a cached route for the interface */ - if ((rt = inp->inp_route.ro_rt) != NULL) { - struct ifnet *ifp = rt->rt_ifp; - if_qflush_sc(ifp, so_tc2msc(optval), flowhash, NULL, NULL, 0); - } + /* Grab the interfaces from the route and pcb */ + rtifp = ((inp->inp_route.ro_rt != NULL) ? + inp->inp_route.ro_rt->rt_ifp : NULL); + oifp = inp->inp_last_outifp; + + if (rtifp != NULL) + if_qflush_sc(rtifp, so_tc2msc(optval), flowhash, NULL, NULL, 0); + if (oifp != NULL && oifp != rtifp) + if_qflush_sc(oifp, so_tc2msc(optval), flowhash, NULL, NULL, 0); return (0); } @@ -2233,7 +2669,8 @@ inp_flush(struct inpcb *inp, int optval) /* * Clear the INP_INADDR_ANY flag (special case for PPP only) */ -void inp_clear_INP_INADDR_ANY(struct socket *so) +void +inp_clear_INP_INADDR_ANY(struct socket *so) { struct inpcb *inp = NULL; @@ -2245,3 +2682,218 @@ void inp_clear_INP_INADDR_ANY(struct socket *so) socket_unlock(so, 1); } +void +inp_get_soprocinfo(struct inpcb *inp, struct so_procinfo *soprocinfo) +{ + struct socket *so = inp->inp_socket; + + soprocinfo->spi_pid = so->last_pid; + /* + * When not delegated, the effective pid is the same as the real pid + */ + if (so->so_flags & SOF_DELEGATED) + soprocinfo->spi_epid = so->e_pid; + else + soprocinfo->spi_epid = so->last_pid; +} + +int +inp_findinpcb_procinfo(struct inpcbinfo *pcbinfo, uint32_t flowhash, + struct so_procinfo *soprocinfo) +{ + struct inpcb *inp = NULL; + int found = 0; + + bzero(soprocinfo, sizeof (struct so_procinfo)); + + if (!flowhash) + return (-1); + + lck_rw_lock_shared(pcbinfo->ipi_lock); + LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { + if (inp->inp_state != INPCB_STATE_DEAD && + inp->inp_socket != NULL && + inp->inp_flowhash == flowhash) { + found = 1; + inp_get_soprocinfo(inp, soprocinfo); + break; + } + } + lck_rw_done(pcbinfo->ipi_lock); + + return (found); +} + +#if CONFIG_PROC_UUID_POLICY +static void +inp_update_cellular_policy(struct inpcb *inp, boolean_t set) +{ + struct socket *so = inp->inp_socket; + int before, after; + + VERIFY(so != NULL); + VERIFY(inp->inp_state != INPCB_STATE_DEAD); + + before = (inp->inp_flags & INP_NO_IFT_CELLULAR); + if (set) { + inp_set_nocellular(inp); + } else { + inp_clear_nocellular(inp); + } + after = (inp->inp_flags & INP_NO_IFT_CELLULAR); + if (net_io_policy_log && (before != after)) { + static const char *ok = "OK"; + static const char *nok = "NOACCESS"; + uuid_string_t euuid_buf; + pid_t epid; + + if (so->so_flags & SOF_DELEGATED) { + uuid_unparse(so->e_uuid, euuid_buf); + epid = so->e_pid; + } else { + uuid_unparse(so->last_uuid, euuid_buf); + epid = so->last_pid; + } + + /* allow this socket to generate another notification event */ + so->so_ifdenied_notifies = 0; + + log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d " + "euuid %s%s %s->%s\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), + SOCK_TYPE(so), epid, euuid_buf, + (so->so_flags & SOF_DELEGATED) ? + " [delegated]" : "", + ((before < after) ? ok : nok), + ((before < after) ? nok : ok)); + } +} + +#if FLOW_DIVERT +static void +inp_update_flow_divert_policy(struct inpcb *inp, boolean_t set) +{ + struct socket *so = inp->inp_socket; + int before, after; + + VERIFY(so != NULL); + VERIFY(inp->inp_state != INPCB_STATE_DEAD); + + if (set && !(inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { + set = !flow_divert_is_dns_service(so); + } + + before = (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT); + if (set) { + inp_set_flow_divert(inp); + } else { + inp_clear_flow_divert(inp); + } + after = (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT); + if (net_io_policy_log && (before != after)) { + static const char *wanted = "WANTED"; + static const char *unwanted = "UNWANTED"; + uuid_string_t euuid_buf; + pid_t epid; + + if (so->so_flags & SOF_DELEGATED) { + uuid_unparse(so->e_uuid, euuid_buf); + epid = so->e_pid; + } else { + uuid_unparse(so->last_uuid, euuid_buf); + epid = so->last_pid; + } + + log(LOG_DEBUG, "%s: so 0x%llx [%d,%d] epid %d " + "euuid %s%s %s->%s\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(so), SOCK_DOM(so), + SOCK_TYPE(so), epid, euuid_buf, + (so->so_flags & SOF_DELEGATED) ? + " [delegated]" : "", + ((before < after) ? unwanted : wanted), + ((before < after) ? wanted : unwanted)); + } +} +#endif /* FLOW_DIVERT */ +#endif /* !CONFIG_PROC_UUID_POLICY */ + +int +inp_update_policy(struct inpcb *inp) +{ +#if CONFIG_PROC_UUID_POLICY + struct socket *so = inp->inp_socket; + uint32_t pflags = 0; + int32_t ogencnt; + int err = 0; + + if (!net_io_policy_uuid || + so == NULL || inp->inp_state == INPCB_STATE_DEAD) + return (0); + + /* + * Kernel-created sockets that aren't delegating other sockets + * are currently exempted from UUID policy checks. + */ + if (so->last_pid == 0 && !(so->so_flags & SOF_DELEGATED)) + return (0); + + ogencnt = so->so_policy_gencnt; + err = proc_uuid_policy_lookup(((so->so_flags & SOF_DELEGATED) ? + so->e_uuid : so->last_uuid), &pflags, &so->so_policy_gencnt); + + /* + * Discard cached generation count if the entry is gone (ENOENT), + * so that we go thru the checks below. + */ + if (err == ENOENT && ogencnt != 0) + so->so_policy_gencnt = 0; + + /* + * If the generation count has changed, inspect the policy flags + * and act accordingly. If a policy flag was previously set and + * the UUID is no longer present in the table (ENOENT), treat it + * as if the flag has been cleared. + */ + if ((err == 0 || err == ENOENT) && ogencnt != so->so_policy_gencnt) { + /* update cellular policy for this socket */ + if (err == 0 && (pflags & PROC_UUID_NO_CELLULAR)) { + inp_update_cellular_policy(inp, TRUE); + } else if (!(pflags & PROC_UUID_NO_CELLULAR)) { + inp_update_cellular_policy(inp, FALSE); + } +#if FLOW_DIVERT + /* update flow divert policy for this socket */ + if (err == 0 && (pflags & PROC_UUID_FLOW_DIVERT)) { + inp_update_flow_divert_policy(inp, TRUE); + } else if (!(pflags & PROC_UUID_FLOW_DIVERT)) { + inp_update_flow_divert_policy(inp, FALSE); + } +#endif /* FLOW_DIVERT */ + } + + return ((err == ENOENT) ? 0 : err); +#else /* !CONFIG_PROC_UUID_POLICY */ +#pragma unused(inp) + return (0); +#endif /* !CONFIG_PROC_UUID_POLICY */ +} + +boolean_t +inp_restricted(struct inpcb *inp, struct ifnet *ifp) +{ + VERIFY(inp != NULL); + + if (!sorestrictrecv) + return (FALSE); + + if (ifp == NULL || !(ifp->if_eflags & IFEF_RESTRICTED_RECV)) + return (FALSE); + + if (inp->inp_flags & INP_RECV_ANYIF) + return (FALSE); + + if ((inp->inp_flags & INP_BOUND_IF) && inp->inp_boundifp == ifp) + return (FALSE); + + return (TRUE); +} diff --git a/bsd/netinet/in_pcb.h b/bsd/netinet/in_pcb.h index acd942f63..a43b45a17 100644 --- a/bsd/netinet/in_pcb.h +++ b/bsd/netinet/in_pcb.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -68,39 +68,34 @@ */ #ifndef _NETINET_IN_PCB_H_ -#define _NETINET_IN_PCB_H_ +#define _NETINET_IN_PCB_H_ #include #include #include -#ifdef KERNEL_PRIVATE #ifdef BSD_KERNEL_PRIVATE +#include #include -#endif /* BSD_KERNEL_PRIVATE */ #include -#endif /* KERNEL_PRIVATE */ +#include +#endif /* BSD_KERNEL_PRIVATE */ #include /* for IPSEC */ -#ifdef KERNEL_PRIVATE - -#define in6pcb inpcb /* for KAME src sync over BSD*'s */ -#define in6p_sp inp_sp /* for KAME src sync over BSD*'s */ -#endif /* KERNEL_PRIVATE */ - #ifdef BSD_KERNEL_PRIVATE /* - * Common structure pcb for internet protocol implementation. - * Here are stored pointers to local and foreign host table - * entries, local and foreign socket numbers, and pointers - * up (to a socket structure) and down (to a protocol-specific) - * control block. + * struct inpcb is the common protocol control block structure used in most + * IP transport protocols. + * + * Pointers to local and foreign host table entries, local and foreign socket + * numbers, and pointers up (to a socket structure) and down (to a + * protocol-specific control block) are stored here. */ LIST_HEAD(inpcbhead, inpcb); LIST_HEAD(inpcbporthead, inpcbport); #endif /* BSD_KERNEL_PRIVATE */ - typedef u_quad_t inp_gen_t; + /* * PCB with AF_INET6 null bind'ed laddr can receive AF_INET input packet. * So, AF_INET6 null laddr is also used as AF_INET null laddr, by utilizing @@ -111,7 +106,7 @@ struct in_addr_4in6 { struct in_addr ia46_addr4; }; -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE /* * NB: the zone allocator is type-stable EXCEPT FOR THE FIRST TWO LONGS * of the structure. Therefore, it is important that the members in @@ -131,46 +126,56 @@ struct inp_stat { u_int64_t txbytes; }; - +/* + * struct inpcb captures the network layer state for TCP, UDP and raw IPv6 + * and IPv6 sockets. In the case of TCP, further per-connection state is + * hung off of inp_ppcb most of the time. + */ struct inpcb { + decl_lck_mtx_data(, inpcb_mtx); /* inpcb per-socket mutex */ LIST_ENTRY(inpcb) inp_hash; /* hash list */ - int inp_wantcnt; /* pcb wanted count. protected by pcb list lock */ - int inp_state; /* state of this pcb, in use, recycled, ready for recycling... */ - u_short inp_fport; /* foreign port */ - u_short inp_lport; /* local port */ LIST_ENTRY(inpcb) inp_list; /* list for all PCBs of this proto */ void *inp_ppcb; /* pointer to per-protocol pcb */ - struct inpcbinfo *inp_pcbinfo; /* PCB list info */ - struct socket *inp_socket; /* back pointer to socket */ - u_int32_t nat_cookie; /* Cookie stored and returned to NAT */ + struct inpcbinfo *inp_pcbinfo; /* PCB list info */ + struct socket *inp_socket; /* back pointer to socket */ LIST_ENTRY(inpcb) inp_portlist; /* list for this PCB's local port */ RB_ENTRY(inpcb) infc_link; /* link for flowhash RB tree */ - struct inpcbport *inp_phd; /* head of this list */ + struct inpcbport *inp_phd; /* head of this list */ inp_gen_t inp_gencnt; /* generation count of this instance */ + int inp_hash_element; /* array index of pcb's hash list */ + int inp_wantcnt; /* wanted count; atomically updated */ + int inp_state; /* state (INUSE/CACHED/DEAD) */ + u_short inp_fport; /* foreign port */ + u_short inp_lport; /* local port */ u_int32_t inp_flags; /* generic IP/datagram flags */ u_int32_t inp_flags2; /* generic IP/datagram flags #2 */ - u_int32_t inp_flow; + u_int32_t inp_flow; /* IPv6 flow information */ u_char inp_sndinprog_cnt; /* outstanding send operations */ u_char inp_vflag; /* INP_IPV4 or INP_IPV6 */ u_char inp_ip_ttl; /* time to live proto */ u_char inp_ip_p; /* protocol proto */ - /* protocol dependent part */ + + struct ifnet *inp_boundifp; /* interface for INP_BOUND_IF */ + struct ifnet *inp_last_outifp; /* last known outgoing interface */ + u_int32_t inp_flowhash; /* flow hash */ + + /* Protocol-dependent part */ union { /* foreign host table entry */ - struct in_addr_4in6 inp46_foreign; - struct in6_addr inp6_foreign; + struct in_addr_4in6 inp46_foreign; + struct in6_addr inp6_foreign; } inp_dependfaddr; union { /* local host table entry */ - struct in_addr_4in6 inp46_local; - struct in6_addr inp6_local; + struct in_addr_4in6 inp46_local; + struct in6_addr inp6_local; } inp_dependladdr; union { /* placeholder for routing entry */ - struct route inp4_route; - struct route_in6 inp6_route; + struct route inp4_route; + struct route_in6 inp6_route; } inp_dependroute; struct { /* type of service proto */ @@ -183,9 +188,6 @@ struct inpcb { struct { /* IP options */ struct mbuf *inp6_options; - u_int8_t inp6_hlim; - u_int8_t unused_uint8_1; - ushort unused_uint16_1; /* IP6 options for outgoing packets */ struct ip6_pktopts *inp6_outputopts; /* IP multicast options */ @@ -194,64 +196,60 @@ struct inpcb { struct icmp6_filter *inp6_icmp6filt; /* IPV6_CHECKSUM setsockopt */ int inp6_cksum; - u_short inp6_ifindex; short inp6_hops; } inp_depend6; - int hash_element; /* Array index of pcb's hash list */ caddr_t inp_saved_ppcb; /* place to save pointer while cached */ - struct inpcbpolicy *inp_sp; - decl_lck_mtx_data( ,inpcb_mtx); /* inpcb per-socket mutex */ - struct ifnet *inp_boundifp; /* interface for INP_BOUND_IF */ - struct ifnet *inp_last_outifp; /* last known outgoing interface */ - u_int32_t inp_reserved[2]; /* reserved for future use */ - u_int32_t inp_flowhash; /* flow hash */ - #if CONFIG_MACF_NET struct label *inp_label; /* MAC label */ #endif +#if IPSEC + struct inpcbpolicy *inp_sp; /* for IPSec */ +#endif /* IPSEC */ struct inp_stat *inp_stat; - u_int8_t inp_stat_store[sizeof(struct inp_stat) + sizeof(u_int64_t)]; + struct inp_stat *inp_cstat; /* cellular data */ + struct inp_stat *inp_wstat; /* Wi-Fi data */ + u_int8_t inp_stat_store[sizeof (struct inp_stat) + sizeof (u_int64_t)]; + u_int8_t inp_cstat_store[sizeof (struct inp_stat) + sizeof (u_int64_t)]; + u_int8_t inp_wstat_store[sizeof (struct inp_stat) + sizeof (u_int64_t)]; }; -#endif /* KERNEL_PRIVATE */ - -/* - * The range of the generation count, as used in this implementation, - * is 9e19. We would have to create 300 billion connections per - * second for this number to roll over in a year. This seems sufficiently - * unlikely that we simply don't concern ourselves with that possibility. - */ +#define INP_ADD_STAT(_inp, _cnt_cellular, _cnt_wifi, _a, _n) do { \ + locked_add_64(&((_inp)->inp_stat->_a), (_n)); \ + if (_cnt_cellular) \ + locked_add_64(&((_inp)->inp_cstat->_a), (_n)); \ + if (_cnt_wifi) \ + locked_add_64(&((_inp)->inp_wstat->_a), (_n)); \ +} while (0); +#endif /* BSD_KERNEL_PRIVATE */ /* * Interface exported to userland by various protocols which use * inpcbs. Hack alert -- only define if struct xsocket is in scope. */ +#pragma pack(4) +#if defined(__LP64__) +struct _inpcb_list_entry { + u_int32_t le_next; + u_int32_t le_prev; +}; +#define _INPCB_PTR(x) u_int32_t +#define _INPCB_LIST_ENTRY(x) struct _inpcb_list_entry +#else /* !__LP64__ */ +#define _INPCB_PTR(x) x +#define _INPCB_LIST_ENTRY(x) LIST_ENTRY(x) +#endif /* !__LP64__ */ + +#ifdef XNU_KERNEL_PRIVATE /* * This is a copy of the inpcb as it shipped in Panther. This structure * is filled out in a copy function. This allows the inpcb to change * without breaking userland tools. - * + * * CAUTION: Many fields may not be filled out. Fewer may be filled out * in the future. Code defensively. */ - -#pragma pack(4) - -#if defined(__LP64__) -struct _inpcb_list_entry { - u_int32_t le_next; - u_int32_t le_prev; -}; -#define _INPCB_PTR(x) u_int32_t -#define _INPCB_LIST_ENTRY(x) struct _inpcb_list_entry -#else -#define _INPCB_PTR(x) x -#define _INPCB_LIST_ENTRY(x) LIST_ENTRY(x) -#endif - -#ifdef KERNEL_PRIVATE struct inpcb_compat { #else struct inpcbinfo; @@ -265,41 +263,41 @@ struct inpcbpolicy; struct inpcb { #endif /* KERNEL_PRIVATE */ _INPCB_LIST_ENTRY(inpcb) inp_hash; /* hash list */ - struct in_addr reserved1; /* APPLE reserved: inp_faddr defined in protcol indep. part */ - struct in_addr reserved2; /* APPLE reserved */ - u_short inp_fport; /* foreign port */ - u_short inp_lport; /* local port */ - _INPCB_LIST_ENTRY(inpcb) inp_list; /* list for all PCBs of this proto */ - _INPCB_PTR(caddr_t) inp_ppcb; /* pointer to per-protocol pcb */ - _INPCB_PTR(struct inpcbinfo *) inp_pcbinfo; /* PCB list info */ - _INPCB_PTR(void *) inp_socket; /* back pointer to socket */ - u_char nat_owner; /* Used to NAT TCP/UDP traffic */ + struct in_addr reserved1; /* reserved */ + struct in_addr reserved2; /* reserved */ + u_short inp_fport; /* foreign port */ + u_short inp_lport; /* local port */ + _INPCB_LIST_ENTRY(inpcb) inp_list; /* list for all peer PCBs */ + _INPCB_PTR(caddr_t) inp_ppcb; /* per-protocol pcb */ + _INPCB_PTR(struct inpcbinfo *) inp_pcbinfo; /* PCB list info */ + _INPCB_PTR(void *) inp_socket; /* back pointer to socket */ + u_char nat_owner; /* Used to NAT TCP/UDP traffic */ u_int32_t nat_cookie; /* Cookie stored and returned to NAT */ - _INPCB_LIST_ENTRY(inpcb) inp_portlist; /* list for this PCB's local port */ - _INPCB_PTR(struct inpcbport *) inp_phd; /* head of this list */ + _INPCB_LIST_ENTRY(inpcb) inp_portlist; /* this PCB's local port list */ + _INPCB_PTR(struct inpcbport *) inp_phd; /* head of this list */ inp_gen_t inp_gencnt; /* generation count of this instance */ - int inp_flags; /* generic IP/datagram flags */ + int inp_flags; /* generic IP/datagram flags */ u_int32_t inp_flow; - u_char inp_vflag; + u_char inp_vflag; u_char inp_ip_ttl; /* time to live proto */ u_char inp_ip_p; /* protocol proto */ /* protocol dependent part */ union { /* foreign host table entry */ - struct in_addr_4in6 inp46_foreign; - struct in6_addr inp6_foreign; + struct in_addr_4in6 inp46_foreign; + struct in6_addr inp6_foreign; } inp_dependfaddr; union { /* local host table entry */ - struct in_addr_4in6 inp46_local; - struct in6_addr inp6_local; + struct in_addr_4in6 inp46_local; + struct in6_addr inp6_local; } inp_dependladdr; union { /* placeholder for routing entry */ - u_char inp4_route[20]; - u_char inp6_route[32]; + u_char inp4_route[20]; + u_char inp6_route[32]; } inp_dependroute; struct { /* type of service proto */ @@ -312,31 +310,31 @@ struct inpcb { struct { /* IP options */ - _INPCB_PTR(struct mbuf *) inp6_options; - u_int8_t inp6_hlim; - u_int8_t unused_uint8_1; - ushort unused_uint16_1; + _INPCB_PTR(struct mbuf *) inp6_options; + u_int8_t inp6_hlim; + u_int8_t unused_uint8_1; + ushort unused_uint16_1; /* IP6 options for outgoing packets */ - _INPCB_PTR(struct ip6_pktopts *) inp6_outputopts; + _INPCB_PTR(struct ip6_pktopts *) inp6_outputopts; /* IP multicast options */ - _INPCB_PTR(struct ip6_moptions *) inp6_moptions; + _INPCB_PTR(struct ip6_moptions *) inp6_moptions; /* ICMPv6 code type filter */ - _INPCB_PTR(struct icmp6_filter *) inp6_icmp6filt; + _INPCB_PTR(struct icmp6_filter *) inp6_icmp6filt; /* IPV6_CHECKSUM setsockopt */ int inp6_cksum; u_short inp6_ifindex; short inp6_hops; } inp_depend6; - int hash_element; /* Array index of pcb's hash list */ - _INPCB_PTR(caddr_t) inp_saved_ppcb; /* place to save pointer while cached */ - _INPCB_PTR(struct inpcbpolicy *) inp_sp; - u_int32_t reserved[3]; /* For future use */ + int hash_element; /* Array index of pcb's hash list */ + _INPCB_PTR(caddr_t) inp_saved_ppcb; /* pointer while cached */ + _INPCB_PTR(struct inpcbpolicy *) inp_sp; + u_int32_t reserved[3]; /* reserved */ }; struct xinpcb { u_int32_t xi_len; /* length of this structure */ -#ifdef KERNEL_PRIVATE +#ifdef XNU_KERNEL_PRIVATE struct inpcb_compat xi_inp; #else struct inpcb xi_inp; @@ -345,101 +343,93 @@ struct xinpcb { u_quad_t xi_alignment_hack; }; -#if !CONFIG_EMBEDDED - struct inpcb64_list_entry { u_int64_t le_next; u_int64_t le_prev; }; struct xinpcb64 { - u_int64_t xi_len; /* length of this structure */ - u_int64_t xi_inpp; - u_short inp_fport; /* foreign port */ - u_short inp_lport; /* local port */ - struct inpcb64_list_entry - inp_list; /* list for all PCBs of this proto */ - u_int64_t inp_ppcb; /* pointer to per-protocol pcb */ - u_int64_t inp_pcbinfo; /* PCB list info */ - struct inpcb64_list_entry - inp_portlist; /* list for this PCB's local port */ - u_int64_t inp_phd; /* head of this list */ - inp_gen_t inp_gencnt; /* generation count of this instance */ - int inp_flags; /* generic IP/datagram flags */ - u_int32_t inp_flow; - u_char inp_vflag; - u_char inp_ip_ttl; /* time to live */ - u_char inp_ip_p; /* protocol */ - union { /* foreign host table entry */ - struct in_addr_4in6 inp46_foreign; - struct in6_addr inp6_foreign; - } inp_dependfaddr; - union { /* local host table entry */ - struct in_addr_4in6 inp46_local; - struct in6_addr inp6_local; - } inp_dependladdr; + u_int64_t xi_len; /* length of this structure */ + u_int64_t xi_inpp; + u_short inp_fport; /* foreign port */ + u_short inp_lport; /* local port */ + struct inpcb64_list_entry inp_list; /* list for all PCBs */ + u_int64_t inp_ppcb; /* ptr to per-protocol PCB */ + u_int64_t inp_pcbinfo; /* PCB list info */ + struct inpcb64_list_entry inp_portlist; /* this PCB's local port list */ + u_int64_t inp_phd; /* head of this list */ + inp_gen_t inp_gencnt; /* current generation count */ + int inp_flags; /* generic IP/datagram flags */ + u_int32_t inp_flow; + u_char inp_vflag; + u_char inp_ip_ttl; /* time to live */ + u_char inp_ip_p; /* protocol */ + union { /* foreign host table entry */ + struct in_addr_4in6 inp46_foreign; + struct in6_addr inp6_foreign; + } inp_dependfaddr; + union { /* local host table entry */ + struct in_addr_4in6 inp46_local; + struct in6_addr inp6_local; + } inp_dependladdr; struct { - u_char inp4_ip_tos; /* type of service */ - } inp_depend4; + u_char inp4_ip_tos; /* type of service */ + } inp_depend4; struct { - u_int8_t inp6_hlim; - int inp6_cksum; - u_short inp6_ifindex; - short inp6_hops; - } inp_depend6; - struct xsocket64 xi_socket; - u_quad_t xi_alignment_hack; + u_int8_t inp6_hlim; + int inp6_cksum; + u_short inp6_ifindex; + short inp6_hops; + } inp_depend6; + struct xsocket64 xi_socket; + u_quad_t xi_alignment_hack; }; -#endif /* !CONFIG_EMBEDDED */ - #ifdef PRIVATE - struct xinpcb_list_entry { u_int64_t le_next; u_int64_t le_prev; }; struct xinpcb_n { - u_int32_t xi_len; /* length of this structure */ - u_int32_t xi_kind; /* XSO_INPCB */ - u_int64_t xi_inpp; - u_short inp_fport; /* foreign port */ - u_short inp_lport; /* local port */ - u_int64_t inp_ppcb; /* pointer to per-protocol pcb */ - inp_gen_t inp_gencnt; /* generation count of this instance */ - int inp_flags; /* generic IP/datagram flags */ - u_int32_t inp_flow; - u_char inp_vflag; - u_char inp_ip_ttl; /* time to live */ - u_char inp_ip_p; /* protocol */ - union { /* foreign host table entry */ - struct in_addr_4in6 inp46_foreign; - struct in6_addr inp6_foreign; - } inp_dependfaddr; - union { /* local host table entry */ - struct in_addr_4in6 inp46_local; - struct in6_addr inp6_local; - } inp_dependladdr; + u_int32_t xi_len; /* length of this structure */ + u_int32_t xi_kind; /* XSO_INPCB */ + u_int64_t xi_inpp; + u_short inp_fport; /* foreign port */ + u_short inp_lport; /* local port */ + u_int64_t inp_ppcb; /* pointer to per-protocol pcb */ + inp_gen_t inp_gencnt; /* generation count of this instance */ + int inp_flags; /* generic IP/datagram flags */ + u_int32_t inp_flow; + u_char inp_vflag; + u_char inp_ip_ttl; /* time to live */ + u_char inp_ip_p; /* protocol */ + union { /* foreign host table entry */ + struct in_addr_4in6 inp46_foreign; + struct in6_addr inp6_foreign; + } inp_dependfaddr; + union { /* local host table entry */ + struct in_addr_4in6 inp46_local; + struct in6_addr inp6_local; + } inp_dependladdr; struct { - u_char inp4_ip_tos; /* type of service */ - } inp_depend4; + u_char inp4_ip_tos; /* type of service */ + } inp_depend4; struct { - u_int8_t inp6_hlim; - int inp6_cksum; - u_short inp6_ifindex; - short inp6_hops; - } inp_depend6; + u_int8_t inp6_hlim; + int inp6_cksum; + u_short inp6_ifindex; + short inp6_hops; + } inp_depend6; u_int32_t inp_flowhash; }; - #endif /* PRIVATE */ struct xinpgen { - u_int32_t xig_len; /* length of this structure */ - u_int xig_count; /* number of PCBs at this time */ - inp_gen_t xig_gen; /* generation count at this time */ - so_gen_t xig_sogen; /* socket generation count at this time */ + u_int32_t xig_len; /* length of this structure */ + u_int xig_count; /* number of PCBs at this time */ + inp_gen_t xig_gen; /* generation count at this time */ + so_gen_t xig_sogen; /* current socket generation count */ }; #pragma pack() @@ -447,8 +437,8 @@ struct xinpgen { /* * These defines are for use with the inpcb. */ -#define INP_IPV4 0x1 -#define INP_IPV6 0x2 +#define INP_IPV4 0x1 +#define INP_IPV6 0x2 #define inp_faddr inp_dependfaddr.inp46_foreign.ia46_addr4 #define inp_laddr inp_dependladdr.inp46_local.ia46_addr4 #define in6p_faddr inp_dependfaddr.inp6_foreign @@ -463,7 +453,6 @@ struct xinpgen { #define in6p_ip6_hlim inp_depend6.inp6_hlim #define in6p_hops inp_depend6.inp6_hops /* default hop limit */ #define in6p_ip6_nxt inp_ip_p -#define in6p_flowinfo inp_flow #define in6p_vflag inp_vflag #define in6p_options inp_depend6.inp6_options #define in6p_outputopts inp_depend6.inp6_outputopts @@ -471,247 +460,299 @@ struct xinpgen { #define in6p_icmp6filt inp_depend6.inp6_icmp6filt #define in6p_cksum inp_depend6.inp6_cksum #define in6p_ifindex inp_depend6.inp6_ifindex -#define in6p_flags inp_flags /* for KAME src sync over BSD*'s */ -#define in6p_socket inp_socket /* for KAME src sync over BSD*'s */ -#endif /* BSD_KERNEL_PRIVATE */ - -#define in6p_lport inp_lport /* for KAME src sync over BSD*'s */ -#define in6p_fport inp_fport /* for KAME src sync over BSD*'s */ -#define in6p_ppcb inp_ppcb /* for KAME src sync over BSD*'s */ - -#ifdef BSD_KERNEL_PRIVATE +#define in6p_flags inp_flags +#define in6p_flags2 inp_flags2 +#define in6p_socket inp_socket +#define in6p_lport inp_lport +#define in6p_fport inp_fport +#define in6p_ppcb inp_ppcb #define in6p_state inp_state #define in6p_wantcnt inp_wantcnt #define in6p_last_outifp inp_last_outifp -#endif /* BSD_KERNEL_PRIVATE */ +#define in6pcb inpcb +#if IPSEC +#define in6p_sp inp_sp +#endif /* IPSEC */ -#ifdef BSD_KERNEL_PRIVATE struct inpcbport { LIST_ENTRY(inpcbport) phd_hash; struct inpcbhead phd_pcblist; u_short phd_port; }; -struct inpcbinfo { /* XXX documentation, prefixes */ - struct inpcbhead *hashbase; -#ifdef __APPLE__ - u_int32_t hashsize; /* in elements */ -#endif - u_long hashmask; /* u_long as expected by hash functions */ - struct inpcbporthead *porthashbase; - u_long porthashmask; /* u_long as expected by hash functions */ - struct inpcbhead *listhead; - u_short lastport; - u_short lastlow; - u_short lasthi; - void *ipi_zone; /* zone to allocate pcbs from */ - u_int ipi_count; /* number of pcbs in this list */ - u_quad_t ipi_gencnt; /* current generation count */ -#ifdef __APPLE__ -#ifdef _KERN_LOCKS_H_ - lck_attr_t *mtx_attr; /* mutex attributes */ - lck_grp_t *mtx_grp; /* mutex group definition */ - lck_grp_attr_t *mtx_grp_attr; /* mutex group attributes */ - lck_rw_t *mtx; /* global mutex for the pcblist*/ -#else - void *mtx_attr; /* mutex attributes */ - void *mtx_grp; /* mutex group definition */ - void *mtx_grp_attr; /* mutex group attributes */ - void *mtx; /* global mutex for the pcblist*/ -#endif -#endif +struct intimercount { + u_int32_t intimer_lazy; /* lazy requests for timer scheduling */ + u_int32_t intimer_fast; /* fast requests, can be coalesced */ + u_int32_t intimer_nodelay; /* fast requests, never coalesced */ }; -#define INP_PCBHASH(faddr, lport, fport, mask) \ +typedef void (*inpcb_timer_func_t)(struct inpcbinfo *); + +/* + * Global data structure for each high-level protocol (UDP, TCP, ...) in both + * IPv4 and IPv6. Holds inpcb lists and information for managing them. Each + * pcbinfo is protected by a RW lock: ipi_lock. + * + * All INPCB pcbinfo entries are linked together via ipi_entry. + */ +struct inpcbinfo { + /* + * Glue to all PCB infos, as well as garbage collector and + * timer callbacks, protected by inpcb_lock. Callout request + * counts are atomically updated. + */ + TAILQ_ENTRY(inpcbinfo) ipi_entry; + inpcb_timer_func_t ipi_gc; + inpcb_timer_func_t ipi_timer; + struct intimercount ipi_gc_req; + struct intimercount ipi_timer_req; + + /* + * Per-protocol lock protecting pcb list, pcb count, etc. + */ + lck_rw_t *ipi_lock; + + /* + * List and count of pcbs on the protocol. + */ + struct inpcbhead *ipi_listhead; + uint32_t ipi_count; + + /* + * Count of pcbs marked with INP2_TIMEWAIT flag. + */ + uint32_t ipi_twcount; + + /* + * Generation count -- incremented each time a connection is + * allocated or freed. + */ + uint64_t ipi_gencnt; + + /* + * Fields associated with port lookup and allocation. + */ + uint16_t ipi_lastport; + uint16_t ipi_lastlow; + uint16_t ipi_lasthi; + + /* + * Zone from which inpcbs are allocated for this protocol. + */ + struct zone *ipi_zone; + + /* + * Per-protocol hash of pcbs, hashed by local and foreign + * addresses and port numbers. + */ + struct inpcbhead *ipi_hashbase; + u_long ipi_hashmask; + + /* + * Per-protocol hash of pcbs, hashed by only local port number. + */ + struct inpcbporthead *ipi_porthashbase; + u_long ipi_porthashmask; + + /* + * Misc. + */ + lck_attr_t *ipi_lock_attr; + lck_grp_t *ipi_lock_grp; + lck_grp_attr_t *ipi_lock_grp_attr; +}; + +#define INP_PCBHASH(faddr, lport, fport, mask) \ (((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask)) -#define INP_PCBPORTHASH(lport, mask) \ +#define INP_PCBPORTHASH(lport, mask) \ (ntohs((lport)) & (mask)) -#define INP_IS_FLOW_CONTROLLED(_inp_) ((_inp_)->inp_flags & INP_FLOW_CONTROLLED) -#define INP_IS_FLOW_SUSPENDED(_inp_) \ - (((_inp_)->inp_flags & INP_FLOW_SUSPENDED) || \ +#define INP_IS_FLOW_CONTROLLED(_inp_) \ + ((_inp_)->inp_flags & INP_FLOW_CONTROLLED) +#define INP_IS_FLOW_SUSPENDED(_inp_) \ + (((_inp_)->inp_flags & INP_FLOW_SUSPENDED) || \ ((_inp_)->inp_socket->so_flags & SOF_SUSPENDED)) -#define INP_WAIT_FOR_IF_FEEDBACK(_inp_) \ +#define INP_WAIT_FOR_IF_FEEDBACK(_inp_) \ (((_inp_)->inp_flags & (INP_FLOW_CONTROLLED | INP_FLOW_SUSPENDED)) != 0) #endif /* BSD_KERNEL_PRIVATE */ -/* flags in inp_flags: */ -#ifdef BSD_KERNEL_PRIVATE -#define INP_RECVOPTS 0x01 /* receive incoming IP options */ -#define INP_RECVRETOPTS 0x02 /* receive IP options for reply */ -#define INP_RECVDSTADDR 0x04 /* receive IP dst address */ -#define INP_HDRINCL 0x08 /* user supplies entire IP header */ -#define INP_HIGHPORT 0x10 /* user wants "high" port binding */ -#define INP_LOWPORT 0x20 /* user wants "low" port binding */ -#endif /* BSD_KERNEL_PRIVATE */ -#define INP_ANONPORT 0x40 /* port chosen for user */ -#ifdef BSD_KERNEL_PRIVATE -#define INP_RECVIF 0x80 /* receive incoming interface */ -#define INP_MTUDISC 0x100 /* user can do MTU discovery */ -#ifdef __APPLE__ -#define INP_STRIPHDR 0x200 /* Strip headers in raw_ip, for OT support */ -#endif -#define INP_RECV_ANYIF 0x400 /* don't restrict inbound interface */ -#endif /* BSD_KERNEL_PRIVATE */ -#define INP_INADDR_ANY 0x800 /* local address wasn't specified */ - +/* + * Flags for inp_flags. + * + * Some of these are publicly defined for legacy reasons, as they are + * (unfortunately) used by certain applications to determine, at compile + * time, whether or not the OS supports certain features. + */ #ifdef BSD_KERNEL_PRIVATE -#define INP_RECVTTL 0x1000 -#define INP_UDP_NOCKSUM 0x2000 /* Turn off outbound UDP checksum */ -#define INP_BOUND_IF 0x4000 /* bind socket to an ifindex */ +#define INP_RECVOPTS 0x00000001 /* receive incoming IP options */ +#define INP_RECVRETOPTS 0x00000002 /* receive IP options for reply */ +#define INP_RECVDSTADDR 0x00000004 /* receive IP dst address */ +#define INP_HDRINCL 0x00000008 /* user supplies entire IP header */ +#define INP_HIGHPORT 0x00000010 /* user wants "high" port binding */ +#define INP_LOWPORT 0x00000020 /* user wants "low" port binding */ #endif /* BSD_KERNEL_PRIVATE */ -#define IN6P_IPV6_V6ONLY 0x8000 /* restrict AF_INET6 socket for v6 */ +#define INP_ANONPORT 0x00000040 /* port chosen for user */ #ifdef BSD_KERNEL_PRIVATE -#define IN6P_PKTINFO 0x10000 /* receive IP6 dst and I/F */ -#define IN6P_HOPLIMIT 0x20000 /* receive hoplimit */ -#define IN6P_HOPOPTS 0x40000 /* receive hop-by-hop options */ -#define IN6P_DSTOPTS 0x80000 /* receive dst options after rthdr */ -#define IN6P_RTHDR 0x100000 /* receive routing header */ -#define IN6P_RTHDRDSTOPTS 0x200000 /* receive dstoptions before rthdr */ -#define IN6P_TCLASS 0x400000 /* receive traffic class value */ -#define IN6P_AUTOFLOWLABEL 0x800000 /* attach flowlabel automatically */ +#define INP_RECVIF 0x00000080 /* receive incoming interface */ +#define INP_MTUDISC 0x00000100 /* unused */ +#define INP_STRIPHDR 0x00000200 /* strip hdrs in raw_ip (for OT) */ +#define INP_RECV_ANYIF 0x00000400 /* don't restrict inbound iface */ +#define INP_INADDR_ANY 0x00000800 /* local address wasn't specified */ +#define INP_IN6ADDR_ANY INP_INADDR_ANY +#define INP_RECVTTL 0x00001000 /* receive incoming IP TTL */ +#define INP_UDP_NOCKSUM 0x00002000 /* turn off outbound UDP checksum */ +#define INP_BOUND_IF 0x00004000 /* bind socket to an interface */ #endif /* BSD_KERNEL_PRIVATE */ -#define IN6P_BINDV6ONLY 0x1000000 /* do not grab IPv4 traffic */ +#define IN6P_IPV6_V6ONLY 0x00008000 /* restrict AF_INET6 socket for v6 */ #ifdef BSD_KERNEL_PRIVATE -#define IN6P_RFC2292 0x2000000 /* used RFC2292 API on the socket */ -#define IN6P_MTU 0x4000000 /* receive path MTU */ -#define INP_PKTINFO 0x8000000 /* receive and send PKTINFO for IPv4 */ -#define INP_FLOW_SUSPENDED 0x10000000 /* flow suspended */ -#define INP_NO_IFT_CELLULAR 0x20000000 /* do not use IFT_CELLULAR route */ -#define INP_FLOW_CONTROLLED 0x40000000 /* flow controlled */ -#define INP_FC_FEEDBACK 0x80000000 /* got interface flow adv feedback */ - -#define INP_CONTROLOPTS (INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|\ - INP_RECVIF|INP_RECVTTL|INP_PKTINFO|\ - IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|\ - IN6P_DSTOPTS|IN6P_RTHDR|IN6P_RTHDRDSTOPTS|\ - IN6P_TCLASS|IN6P_RFC2292|IN6P_MTU) - -#define INP_UNMAPPABLEOPTS (IN6P_HOPOPTS|IN6P_DSTOPTS|IN6P_RTHDR|\ - IN6P_TCLASS|IN6P_AUTOFLOWLABEL) - - /* for KAME src sync over BSD*'s */ -#define IN6P_HIGHPORT INP_HIGHPORT -#define IN6P_LOWPORT INP_LOWPORT -#define IN6P_ANONPORT INP_ANONPORT -#define IN6P_RECVIF INP_RECVIF -#define IN6P_MTUDISC INP_MTUDISC -#define IN6P_RECV_ANYIF INP_RECV_ANYIF -#define IN6P_CONTROLOPTS INP_CONTROLOPTS -#define IN6P_NO_IFT_CELLULAR INP_NO_IFT_CELLULAR - -/* Overflowed INP flags; use INP2 prefix to avoid misuse */ -#define INP2_IN_FCTREE 0x2 /* in inp_fc_tree */ - /* - * socket AF version is {newer than,or include} - * actual datagram AF version - */ - -#define INPLOOKUP_WILDCARD 1 -#ifdef __APPLE__ -#define INPCB_ALL_OWNERS 0xff -#define INPCB_NO_OWNER 0x0 -#define INPCB_OWNED_BY_X 0x80 -#define INPCB_MAX_IDS 7 -#endif /* __APPLE__ */ +#define IN6P_PKTINFO 0x00010000 /* receive IP6 dst and I/F */ +#define IN6P_HOPLIMIT 0x00020000 /* receive hoplimit */ +#define IN6P_HOPOPTS 0x00040000 /* receive hop-by-hop options */ +#define IN6P_DSTOPTS 0x00080000 /* receive dst options after rthdr */ +#define IN6P_RTHDR 0x00100000 /* receive routing header */ +#define IN6P_RTHDRDSTOPTS 0x00200000 /* receive dstoptions before rthdr */ +#define IN6P_TCLASS 0x00400000 /* receive traffic class value */ +#define IN6P_AUTOFLOWLABEL 0x00800000 /* attach flowlabel automatically */ #endif /* BSD_KERNEL_PRIVATE */ -#define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb) -#define sotoin6pcb(so) sotoinpcb(so) /* for KAME src sync over BSD*'s */ +#define IN6P_BINDV6ONLY 0x01000000 /* do not grab IPv4 traffic */ #ifdef BSD_KERNEL_PRIVATE -#define INP_SOCKAF(so) so->so_proto->pr_domain->dom_family -#define INP_SOCKTYPE(so) so->so_proto->pr_type +#define IN6P_RFC2292 0x02000000 /* used RFC2292 API on the socket */ +#define IN6P_MTU 0x04000000 /* receive path MTU */ +#define INP_PKTINFO 0x08000000 /* rcv and snd PKTINFO for IPv4 */ +#define INP_FLOW_SUSPENDED 0x10000000 /* flow suspended */ +#define INP_NO_IFT_CELLULAR 0x20000000 /* do not use cellular interface */ +#define INP_FLOW_CONTROLLED 0x40000000 /* flow controlled */ +#define INP_FC_FEEDBACK 0x80000000 /* got interface flow adv feedback */ + +#define INP_CONTROLOPTS \ + (INP_RECVOPTS|INP_RECVRETOPTS|INP_RECVDSTADDR|INP_RECVIF|INP_RECVTTL| \ + INP_PKTINFO|IN6P_PKTINFO|IN6P_HOPLIMIT|IN6P_HOPOPTS|IN6P_DSTOPTS| \ + IN6P_RTHDR|IN6P_RTHDRDSTOPTS|IN6P_TCLASS|IN6P_RFC2292|IN6P_MTU) + +#define INP_UNMAPPABLEOPTS \ + (IN6P_HOPOPTS|IN6P_DSTOPTS|IN6P_RTHDR| IN6P_TCLASS|IN6P_AUTOFLOWLABEL) -#define INP_CHECK_SOCKAF(so, af) (INP_SOCKAF(so) == af) -#define INP_CHECK_SOCKTYPE(so, type) (INP_SOCKTYPE(so) == type) +/* + * Flags for inp_flags2. + * + * Overflowed INP flags; use INP2 prefix to avoid misuse. + */ +#define INP2_TIMEWAIT 0x00000001 /* in TIMEWAIT */ +#define INP2_IN_FCTREE 0x00000002 /* in inp_fc_tree */ +#define INP2_WANT_FLOW_DIVERT 0x00000004 /* flow divert is desired */ -extern int ipport_lowfirstauto; -extern int ipport_lowlastauto; -extern int ipport_firstauto; -extern int ipport_lastauto; -#endif /* BSD_KERNEL_PRIVATE */ +/* + * Flags passed to in_pcblookup*() functions. + */ +#define INPLOOKUP_WILDCARD 1 -extern int ipport_hifirstauto; -extern int ipport_hilastauto; +#define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb) +#define sotoin6pcb(so) sotoinpcb(so) struct sysctl_req; -#ifdef BSD_KERNEL_PRIVATE - -#define INPCB_STATE_INUSE 0x1 /* freshly allocated PCB, it's in use */ -#define INPCB_STATE_CACHED 0x2 /* this pcb is sitting in a a cache */ -#define INPCB_STATE_DEAD 0x3 /* should treat as gone, will be garbage collected and freed */ - -#define WNT_STOPUSING 0xffff /* marked as ready to be garbaged collected, should be treated as not found */ -#define WNT_ACQUIRE 0x1 /* that pcb is being acquired, do not recycle this time */ -#define WNT_RELEASE 0x2 /* release acquired mode, can be garbage collected when wantcnt is null */ - -extern void in_losing(struct inpcb *); -extern void in_rtchange(struct inpcb *, int); -extern int in_pcballoc(struct socket *, struct inpcbinfo *, struct proc *); -extern int in_pcbbind(struct inpcb *, struct sockaddr *, struct proc *); -extern int in_pcbconnect(struct inpcb *, struct sockaddr *, struct proc *, - struct ifnet **); -extern void in_pcbdetach(struct inpcb *); -extern void in_pcbdispose (struct inpcb *); -extern void in_pcbdisconnect(struct inpcb *); -extern int in_pcbinshash(struct inpcb *, int); -extern int in_pcbladdr(struct inpcb *, struct sockaddr *, - struct sockaddr_in *, struct ifnet **); +extern int ipport_lowfirstauto; +extern int ipport_lowlastauto; +extern int ipport_firstauto; +extern int ipport_lastauto; +extern int ipport_hifirstauto; +extern int ipport_hilastauto; + +/* freshly allocated PCB, it's in use */ +#define INPCB_STATE_INUSE 0x1 +/* this pcb is sitting in a a cache */ +#define INPCB_STATE_CACHED 0x2 +/* should treat as gone, will be garbage collected and freed */ +#define INPCB_STATE_DEAD 0x3 + +/* marked as ready to be garbaged collected, should be treated as not found */ +#define WNT_STOPUSING 0xffff +/* that pcb is being acquired, do not recycle this time */ +#define WNT_ACQUIRE 0x1 +/* release acquired mode, can be garbage collected when wantcnt is null */ +#define WNT_RELEASE 0x2 + +extern void in_pcbinit(void); +extern void in_pcbinfo_attach(struct inpcbinfo *); +extern int in_pcbinfo_detach(struct inpcbinfo *); + +/* type of timer to be scheduled by inpcb_gc_sched and inpcb_timer_sched */ +enum { + INPCB_TIMER_LAZY = 0x1, + INPCB_TIMER_FAST, + INPCB_TIMER_NODELAY +}; +extern void inpcb_gc_sched(struct inpcbinfo *, u_int32_t type); +extern void inpcb_timer_sched(struct inpcbinfo *, u_int32_t type); + +extern void in_losing(struct inpcb *); +extern void in_rtchange(struct inpcb *, int); +extern int in_pcballoc(struct socket *, struct inpcbinfo *, struct proc *); +extern int in_pcbbind(struct inpcb *, struct sockaddr *, struct proc *); +extern int in_pcbconnect(struct inpcb *, struct sockaddr *, struct proc *, + unsigned int, struct ifnet **); +extern void in_pcbdetach(struct inpcb *); +extern void in_pcbdispose(struct inpcb *); +extern void in_pcbdisconnect(struct inpcb *); +extern int in_pcbinshash(struct inpcb *, int); +extern int in_pcbladdr(struct inpcb *, struct sockaddr *, struct in_addr *, + unsigned int, struct ifnet **); extern struct inpcb *in_pcblookup_local(struct inpcbinfo *, struct in_addr, - u_int, int); + u_int, int); extern struct inpcb *in_pcblookup_local_and_cleanup(struct inpcbinfo *, - struct in_addr, u_int, int); + struct in_addr, u_int, int); extern struct inpcb *in_pcblookup_hash(struct inpcbinfo *, struct in_addr, - u_int, struct in_addr, u_int, int, struct ifnet *); -extern int in_pcblookup_hash_exists(struct inpcbinfo *, struct in_addr, - u_int, struct in_addr, u_int, int, uid_t *, gid_t *, struct ifnet *); -extern void in_pcbnotifyall(struct inpcbinfo *, struct in_addr, int, - void (*)(struct inpcb *, int)); -extern void in_pcbrehash(struct inpcb *); -extern int in_setpeeraddr(struct socket *so, struct sockaddr **nam); -extern int in_setsockaddr(struct socket *so, struct sockaddr **nam); -extern int in_pcb_checkstate(struct inpcb *pcb, int mode, int locked); - -extern void in_pcbremlists(struct inpcb *inp); -extern void inpcb_to_compat(struct inpcb *inp, - struct inpcb_compat *inp_compat); -#if !CONFIG_EMBEDDED -extern void inpcb_to_xinpcb64(struct inpcb *inp, - struct xinpcb64 *xinp); -#endif -extern int get_pcblist_n(short , struct sysctl_req *, struct inpcbinfo *); -extern void inpcb_get_ports_used(unsigned int , uint8_t *, struct inpcbinfo *); - -#define INPCB_OPPORTUNISTIC_THROTTLEON 0x0001 -#define INPCB_OPPORTUNISTIC_SETCMD 0x0002 -extern uint32_t inpcb_count_opportunistic(unsigned int , struct inpcbinfo *, u_int32_t); -extern void inp_route_copyout(struct inpcb *, struct route *); -extern void inp_route_copyin(struct inpcb *, struct route *); -extern int inp_bindif(struct inpcb *, unsigned int); -extern int inp_nocellular(struct inpcb *, unsigned int); + u_int, struct in_addr, u_int, int, struct ifnet *); +extern int in_pcblookup_hash_exists(struct inpcbinfo *, struct in_addr, + u_int, struct in_addr, u_int, int, uid_t *, gid_t *, struct ifnet *); +extern void in_pcbnotifyall(struct inpcbinfo *, struct in_addr, int, + void (*)(struct inpcb *, int)); +extern void in_pcbrehash(struct inpcb *); +extern int in_getpeeraddr(struct socket *, struct sockaddr **); +extern int in_getpeeraddr_s(struct socket *, struct sockaddr_storage *); +extern int in_getsockaddr(struct socket *, struct sockaddr **); +extern int in_getsockaddr_s(struct socket *, struct sockaddr_storage *); +extern int in_pcb_checkstate(struct inpcb *, int, int); +extern void in_pcbremlists(struct inpcb *); +extern void inpcb_to_compat(struct inpcb *, struct inpcb_compat *); +extern void inpcb_to_xinpcb64(struct inpcb *, struct xinpcb64 *); +extern int get_pcblist_n(short, struct sysctl_req *, struct inpcbinfo *); +extern void inpcb_get_ports_used(u_int32_t, int, u_int32_t, bitstr_t *, + struct inpcbinfo *); +#define INPCB_OPPORTUNISTIC_THROTTLEON 0x0001 +#define INPCB_OPPORTUNISTIC_SETCMD 0x0002 +extern uint32_t inpcb_count_opportunistic(unsigned int, struct inpcbinfo *, + u_int32_t); +extern uint32_t inpcb_find_anypcb_byaddr(struct ifaddr *, struct inpcbinfo *); +extern void inp_route_copyout(struct inpcb *, struct route *); +extern void inp_route_copyin(struct inpcb *, struct route *); +extern int inp_bindif(struct inpcb *, unsigned int, struct ifnet **); +extern void inp_set_nocellular(struct inpcb *); +extern void inp_clear_nocellular(struct inpcb *); +#if FLOW_DIVERT +extern void inp_set_flow_divert(struct inpcb *); +extern void inp_clear_flow_divert(struct inpcb *); +#endif /* FLOW_DIVERT */ extern u_int32_t inp_calc_flowhash(struct inpcb *); -extern void socket_flowadv_init(void); - -/* Flags used by inp_fc_getinp */ -#define INPFC_SOLOCKED 0x1 -#define INPFC_REMOVE 0x2 -extern struct inpcb *inp_fc_getinp(u_int32_t, u_int32_t); -extern void inp_fc_feedback(struct inpcb *); -extern void inp_reset_fc_state(struct inpcb *); -extern int inp_set_fc_state(struct inpcb *, int advcode); -extern void inp_fc_unthrottle_tcp(struct inpcb *); -extern int inp_flush(struct inpcb *, int); +extern void inp_reset_fc_state(struct inpcb *); +extern int inp_set_fc_state(struct inpcb *, int advcode); +extern void inp_fc_unthrottle_tcp(struct inpcb *); +extern void inp_flowadv(uint32_t); +extern int inp_flush(struct inpcb *, int); +extern int inp_findinpcb_procinfo(struct inpcbinfo *, uint32_t, struct so_procinfo *); +extern void inp_get_soprocinfo(struct inpcb *, struct so_procinfo *); +extern int inp_update_policy(struct inpcb *); +extern boolean_t inp_restricted(struct inpcb *, struct ifnet *); #endif /* BSD_KERNEL_PRIVATE */ - #ifdef KERNEL_PRIVATE -extern void inp_clear_INP_INADDR_ANY(struct socket *so); +/* exported for PPP */ +extern void inp_clear_INP_INADDR_ANY(struct socket *); #endif /* KERNEL_PRIVATE */ - #endif /* !_NETINET_IN_PCB_H_ */ diff --git a/bsd/netinet/in_pcblist.c b/bsd/netinet/in_pcblist.c index 00ef00324..09cc79674 100644 --- a/bsd/netinet/in_pcblist.c +++ b/bsd/netinet/in_pcblist.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2010-2012 Apple Inc. All rights reserved. + * Copyright (c) 2010-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -74,6 +74,7 @@ #include #include +#include #include #include @@ -84,26 +85,26 @@ #include #include #include +#include #ifndef ROUNDUP64 -#define ROUNDUP64(x) P2ROUNDUP((x), sizeof(u_int64_t)) +#define ROUNDUP64(x) P2ROUNDUP((x), sizeof (u_int64_t)) #endif #ifndef ADVANCE64 -#define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n)) +#define ADVANCE64(p, n) (void*)((char *)(p) + ROUNDUP64(n)) #endif +static void sotoxsocket_n(struct socket *, struct xsocket_n *); +static void sbtoxsockbuf_n(struct sockbuf *, struct xsockbuf_n *); +static void sbtoxsockstat_n(struct socket *, struct xsockstat_n *); +static void inpcb_to_xinpcb_n(struct inpcb *, struct xinpcb_n *); +static void tcpcb_to_xtcpcb_n(struct tcpcb *, struct xtcpcb_n *); -void sotoxsocket_n(struct socket *, struct xsocket_n *); -void sbtoxsockbuf_n(struct sockbuf *, struct xsockbuf_n *); -void sbtoxsockstat_n(struct socket *, struct xsockstat_n *); -void inpcb_to_xinpcb_n(struct inpcb *, struct xinpcb_n *); -void tcpcb_to_xtcpcb_n(struct tcpcb *, struct xtcpcb_n *); - -__private_extern__ void +static void sotoxsocket_n(struct socket *so, struct xsocket_n *xso) { - xso->xso_len = sizeof(struct xsocket_n); + xso->xso_len = sizeof (struct xsocket_n); xso->xso_kind = XSO_SOCKET; if (so != NULL) { @@ -114,8 +115,8 @@ sotoxsocket_n(struct socket *so, struct xsocket_n *xso) xso->so_state = so->so_state; xso->so_pcb = (u_int64_t)(uintptr_t)so->so_pcb; if (so->so_proto) { - xso->xso_protocol = so->so_proto->pr_protocol; - xso->xso_family = so->so_proto->pr_domain->dom_family; + xso->xso_protocol = SOCK_PROTO(so); + xso->xso_family = SOCK_DOM(so); } else { xso->xso_protocol = xso->xso_family = 0; } @@ -130,10 +131,10 @@ sotoxsocket_n(struct socket *so, struct xsocket_n *xso) } } -__private_extern__ void +static void sbtoxsockbuf_n(struct sockbuf *sb, struct xsockbuf_n *xsb) { - xsb->xsb_len = sizeof(struct xsockbuf_n); + xsb->xsb_len = sizeof (struct xsockbuf_n); xsb->xsb_kind = (sb->sb_flags & SB_RECV) ? XSO_RCVBUF : XSO_SNDBUF; if (sb != NULL) { @@ -143,21 +144,21 @@ sbtoxsockbuf_n(struct sockbuf *sb, struct xsockbuf_n *xsb) xsb->sb_mbmax = sb->sb_mbmax; xsb->sb_lowat = sb->sb_lowat; xsb->sb_flags = sb->sb_flags; - xsb->sb_timeo = (short) - (sb->sb_timeo.tv_sec * hz) + sb->sb_timeo.tv_usec / tick; + xsb->sb_timeo = (short)(sb->sb_timeo.tv_sec * hz) + + sb->sb_timeo.tv_usec / tick; if (xsb->sb_timeo == 0 && sb->sb_timeo.tv_usec != 0) xsb->sb_timeo = 1; } } -__private_extern__ void +static void sbtoxsockstat_n(struct socket *so, struct xsockstat_n *xst) { int i; - xst->xst_len = sizeof(struct xsockstat_n); + xst->xst_len = sizeof (struct xsockstat_n); xst->xst_kind = XSO_STATS; - + for (i = 0; i < SO_TC_STATS_MAX; i++) { xst->xst_tc_stats[i].rxpackets = so->so_tc_stats[i].rxpackets; xst->xst_tc_stats[i].rxbytes = so->so_tc_stats[i].rxbytes; @@ -166,10 +167,10 @@ sbtoxsockstat_n(struct socket *so, struct xsockstat_n *xst) } } -__private_extern__ void +static void inpcb_to_xinpcb_n(struct inpcb *inp, struct xinpcb_n *xinp) { - xinp->xi_len = sizeof(struct xinpcb_n); + xinp->xi_len = sizeof (struct xinpcb_n); xinp->xi_kind = XSO_INPCB; xinp->xi_inpp = (u_int64_t)(uintptr_t)inp; xinp->inp_fport = inp->inp_fport; @@ -184,9 +185,9 @@ inpcb_to_xinpcb_n(struct inpcb *inp, struct xinpcb_n *xinp) xinp->inp_dependfaddr.inp6_foreign = inp->inp_dependfaddr.inp6_foreign; xinp->inp_dependladdr.inp6_local = inp->inp_dependladdr.inp6_local; xinp->inp_depend4.inp4_ip_tos = inp->inp_depend4.inp4_ip_tos; - xinp->inp_depend6.inp6_hlim = inp->inp_depend6.inp6_hlim; + xinp->inp_depend6.inp6_hlim = 0; xinp->inp_depend6.inp6_cksum = inp->inp_depend6.inp6_cksum; - xinp->inp_depend6.inp6_ifindex = inp->inp_depend6.inp6_ifindex; + xinp->inp_depend6.inp6_ifindex = 0; xinp->inp_depend6.inp6_hops = inp->inp_depend6.inp6_hops; xinp->inp_flowhash = inp->inp_flowhash; } @@ -196,7 +197,7 @@ tcpcb_to_xtcpcb_n(struct tcpcb *tp, struct xtcpcb_n *xt) { int i; - xt->xt_len = sizeof(struct xtcpcb_n); + xt->xt_len = sizeof (struct xtcpcb_n); xt->xt_kind = XSO_TCPCB; xt->t_segq = (u_int32_t)(uintptr_t)tp->t_segq.lh_first; @@ -249,7 +250,6 @@ tcpcb_to_xtcpcb_n(struct tcpcb *tp, struct xtcpcb_n *xt) xt->snd_recover = tp->snd_recover; xt->snd_cwnd_prev = tp->snd_cwnd_prev; xt->snd_ssthresh_prev = tp->snd_ssthresh_prev; - xt->t_badrxtwin = tp->t_badrxtwin; } __private_extern__ int @@ -261,29 +261,28 @@ get_pcblist_n(short proto, struct sysctl_req *req, struct inpcbinfo *pcbinfo) inp_gen_t gencnt; struct xinpgen xig; void *buf = NULL; - size_t item_size = ROUNDUP64(sizeof(struct xinpcb_n)) + - ROUNDUP64(sizeof(struct xsocket_n)) + - 2 * ROUNDUP64(sizeof(struct xsockbuf_n)) + - ROUNDUP64(sizeof(struct xsockstat_n)); + size_t item_size = ROUNDUP64(sizeof (struct xinpcb_n)) + + ROUNDUP64(sizeof (struct xsocket_n)) + + 2 * ROUNDUP64(sizeof (struct xsockbuf_n)) + + ROUNDUP64(sizeof (struct xsockstat_n)); if (proto == IPPROTO_TCP) - item_size += ROUNDUP64(sizeof(struct xtcpcb_n)); + item_size += ROUNDUP64(sizeof (struct xtcpcb_n)); /* * The process of preparing the PCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ - lck_rw_lock_exclusive(pcbinfo->mtx); + lck_rw_lock_exclusive(pcbinfo->ipi_lock); if (req->oldptr == USER_ADDR_NULL) { - n = pcbinfo->ipi_count; - req->oldidx = 2 * (sizeof xig) - + (n + n/8) * item_size; - goto done; + n = pcbinfo->ipi_count; + req->oldidx = 2 * (sizeof (xig)) + (n + n/8) * item_size; + goto done; } if (req->newptr != USER_ADDR_NULL) { - error = EPERM; - goto done; + error = EPERM; + goto done; } /* @@ -292,61 +291,70 @@ get_pcblist_n(short proto, struct sysctl_req *req, struct inpcbinfo *pcbinfo) gencnt = pcbinfo->ipi_gencnt; n = pcbinfo->ipi_count; - bzero(&xig, sizeof(xig)); - xig.xig_len = sizeof xig; + bzero(&xig, sizeof (xig)); + xig.xig_len = sizeof (xig); xig.xig_count = n; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; - error = SYSCTL_OUT(req, &xig, sizeof xig); + error = SYSCTL_OUT(req, &xig, sizeof (xig)); if (error) { - goto done; + goto done; + } + /* + * We are done if there is no pcb + */ + if (n == 0) { + goto done; } - /* - * We are done if there is no pcb - */ - if (n == 0) { - goto done; - } buf = _MALLOC(item_size, M_TEMP, M_WAITOK); - if (buf == 0) { - error = ENOMEM; - goto done; + if (buf == NULL) { + error = ENOMEM; + goto done; } - inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK); - if (inp_list == 0) { - error = ENOMEM; - goto done; + inp_list = _MALLOC(n * sizeof (*inp_list), M_TEMP, M_WAITOK); + if (inp_list == NULL) { + error = ENOMEM; + goto done; } - for (inp = pcbinfo->listhead->lh_first, i = 0; inp && i < n; - inp = inp->inp_list.le_next) { - if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) - inp_list[i++] = inp; + for (inp = pcbinfo->ipi_listhead->lh_first, i = 0; inp && i < n; + inp = inp->inp_list.le_next) { + if (inp->inp_gencnt <= gencnt && + inp->inp_state != INPCB_STATE_DEAD) + inp_list[i++] = inp; } n = i; error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; - if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) { + if (inp->inp_gencnt <= gencnt && + inp->inp_state != INPCB_STATE_DEAD) { struct xinpcb_n *xi = (struct xinpcb_n *)buf; - struct xsocket_n *xso = (struct xsocket_n *)ADVANCE64(xi, sizeof(*xi)); - struct xsockbuf_n *xsbrcv = (struct xsockbuf_n *)ADVANCE64(xso, sizeof(*xso)); - struct xsockbuf_n *xsbsnd = (struct xsockbuf_n *)ADVANCE64(xsbrcv, sizeof(*xsbrcv)); - struct xsockstat_n *xsostats = (struct xsockstat_n *)ADVANCE64(xsbsnd, sizeof(*xsbsnd)); - + struct xsocket_n *xso = (struct xsocket_n *) + ADVANCE64(xi, sizeof (*xi)); + struct xsockbuf_n *xsbrcv = (struct xsockbuf_n *) + ADVANCE64(xso, sizeof (*xso)); + struct xsockbuf_n *xsbsnd = (struct xsockbuf_n *) + ADVANCE64(xsbrcv, sizeof (*xsbrcv)); + struct xsockstat_n *xsostats = (struct xsockstat_n *) + ADVANCE64(xsbsnd, sizeof (*xsbsnd)); + bzero(buf, item_size); inpcb_to_xinpcb_n(inp, xi); sotoxsocket_n(inp->inp_socket, xso); - sbtoxsockbuf_n(inp->inp_socket ? &inp->inp_socket->so_rcv : NULL, xsbrcv); - sbtoxsockbuf_n(inp->inp_socket ? &inp->inp_socket->so_snd : NULL, xsbsnd); + sbtoxsockbuf_n(inp->inp_socket ? + &inp->inp_socket->so_rcv : NULL, xsbrcv); + sbtoxsockbuf_n(inp->inp_socket ? + &inp->inp_socket->so_snd : NULL, xsbsnd); sbtoxsockstat_n(inp->inp_socket, xsostats); if (proto == IPPROTO_TCP) { - struct xtcpcb_n *xt = (struct xtcpcb_n *)ADVANCE64(xsostats, sizeof(*xsostats)); - + struct xtcpcb_n *xt = (struct xtcpcb_n *) + ADVANCE64(xsostats, sizeof (*xsostats)); + /* * inp->inp_ppcb, can only be NULL on * an initialization race window. @@ -354,8 +362,9 @@ get_pcblist_n(short proto, struct sysctl_req *req, struct inpcbinfo *pcbinfo) */ if (inp->inp_ppcb == NULL) continue; - - tcpcb_to_xtcpcb_n((struct tcpcb *)inp->inp_ppcb, xt); + + tcpcb_to_xtcpcb_n((struct tcpcb *) + inp->inp_ppcb, xt); } error = SYSCTL_OUT(req, buf, item_size); } @@ -368,38 +377,67 @@ get_pcblist_n(short proto, struct sysctl_req *req, struct inpcbinfo *pcbinfo) * while we were processing this request, and it * might be necessary to retry. */ - bzero(&xig, sizeof(xig)); - xig.xig_len = sizeof xig; + bzero(&xig, sizeof (xig)); + xig.xig_len = sizeof (xig); xig.xig_gen = pcbinfo->ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = pcbinfo->ipi_count; - error = SYSCTL_OUT(req, &xig, sizeof xig); + error = SYSCTL_OUT(req, &xig, sizeof (xig)); } done: - lck_rw_done(pcbinfo->mtx); - if (inp_list) + lck_rw_done(pcbinfo->ipi_lock); + if (inp_list != NULL) FREE(inp_list, M_TEMP); - if (buf) + if (buf != NULL) FREE(buf, M_TEMP); - return error; + return (error); } __private_extern__ void -inpcb_get_ports_used(unsigned int ifindex, uint8_t *bitfield, struct inpcbinfo *pcbinfo) +inpcb_get_ports_used(uint32_t ifindex, int protocol, uint32_t wildcardok, + bitstr_t *bitfield, struct inpcbinfo *pcbinfo) { - lck_rw_lock_shared(pcbinfo->mtx); - struct inpcb *inp; - inp_gen_t gencnt = pcbinfo->ipi_gencnt; - for (inp = LIST_FIRST(pcbinfo->listhead); inp; inp = LIST_NEXT(inp, inp_list)) { - if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD && - (ifindex == 0 || inp->inp_last_outifp == NULL || ifindex == inp->inp_last_outifp->if_index)) { - uint16_t port = ntohs(inp->inp_lport); - bitfield[port / 8] |= 1 << (port & 0x7); - } + struct socket *so; + inp_gen_t gencnt; + uint32_t iswildcard; + + lck_rw_lock_shared(pcbinfo->ipi_lock); + gencnt = pcbinfo->ipi_gencnt; + for (inp = LIST_FIRST(pcbinfo->ipi_listhead); inp; + inp = LIST_NEXT(inp, inp_list)) { + uint16_t port; + + if (inp->inp_gencnt > gencnt || + inp->inp_state == INPCB_STATE_DEAD) + continue; + + if ((so = inp->inp_socket) == NULL || + (so->so_state & SS_DEFUNCT)) + continue; + + if (!(protocol == PF_UNSPEC || + (protocol == PF_INET && (inp->inp_vflag & INP_IPV4)) || + (protocol == PF_INET6 && (inp->inp_vflag & INP_IPV6)))) + continue; + + iswildcard = (((inp->inp_vflag & INP_IPV4) && + inp->inp_laddr.s_addr == INADDR_ANY) || + ((inp->inp_vflag & INP_IPV6) && + IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))); + + if (!wildcardok && iswildcard) + continue; + + if (!iswildcard && + !(ifindex == 0 || inp->inp_last_outifp == NULL || + ifindex == inp->inp_last_outifp->if_index)) + continue; + + port = ntohs(inp->inp_lport); + bit_set(bitfield, port); } - - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); } __private_extern__ uint32_t @@ -407,13 +445,13 @@ inpcb_count_opportunistic(unsigned int ifindex, struct inpcbinfo *pcbinfo, u_int32_t flags) { uint32_t opportunistic = 0; - - lck_rw_lock_shared(pcbinfo->mtx); - struct inpcb *inp; - inp_gen_t gencnt = pcbinfo->ipi_gencnt; - for (inp = LIST_FIRST(pcbinfo->listhead); - inp; inp = LIST_NEXT(inp, inp_list)) { + inp_gen_t gencnt; + + lck_rw_lock_shared(pcbinfo->ipi_lock); + gencnt = pcbinfo->ipi_gencnt; + for (inp = LIST_FIRST(pcbinfo->ipi_listhead); + inp != NULL; inp = LIST_NEXT(inp, inp_list)) { if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD && inp->inp_socket != NULL && @@ -436,9 +474,10 @@ inpcb_count_opportunistic(unsigned int ifindex, struct inpcbinfo *pcbinfo, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_RESUME)); } - SOTHROTTLELOG(("throttle[%d]: so %p [%d,%d] " - "%s\n", so->last_pid, so, INP_SOCKAF(so), - INP_SOCKTYPE(so), + SOTHROTTLELOG(("throttle[%d]: so 0x%llx " + "[%d,%d] %s\n", so->last_pid, + (uint64_t)VM_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so), (so->so_flags & SOF_SUSPENDED) ? "SUSPENDED" : "RESUMED")); socket_unlock(so, 1); @@ -446,7 +485,54 @@ inpcb_count_opportunistic(unsigned int ifindex, struct inpcbinfo *pcbinfo, } } - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); return (opportunistic); } + +__private_extern__ uint32_t +inpcb_find_anypcb_byaddr(struct ifaddr *ifa, struct inpcbinfo *pcbinfo) +{ + struct inpcb *inp; + inp_gen_t gencnt = pcbinfo->ipi_gencnt; + struct socket *so = NULL; + int af; + + if ((ifa->ifa_addr->sa_family != AF_INET) && + (ifa->ifa_addr->sa_family != AF_INET6)) { + return (0); + } + + lck_rw_lock_shared(pcbinfo->ipi_lock); + for (inp = LIST_FIRST(pcbinfo->ipi_listhead); + inp != NULL; inp = LIST_NEXT(inp, inp_list)) { + + if (inp->inp_gencnt <= gencnt && + inp->inp_state != INPCB_STATE_DEAD && + inp->inp_socket != NULL) { + so = inp->inp_socket; + af = SOCK_DOM(so); + if (af != ifa->ifa_addr->sa_family) + continue; + if (inp->inp_last_outifp != ifa->ifa_ifp) + continue; + + if (af == AF_INET) { + if (inp->inp_laddr.s_addr == + (satosin(ifa->ifa_addr))->sin_addr.s_addr) { + lck_rw_done(pcbinfo->ipi_lock); + return (1); + } + } + if (af == AF_INET6) { + if (IN6_ARE_ADDR_EQUAL(IFA_IN6(ifa), + &inp->in6p_laddr)) { + lck_rw_done(pcbinfo->ipi_lock); + return (1); + } + } + } + } + lck_rw_done(pcbinfo->ipi_lock); + return (0); +} diff --git a/bsd/netinet/in_proto.c b/bsd/netinet/in_proto.c index f3f3be8e7..6e3507bec 100644 --- a/bsd/netinet/in_proto.c +++ b/bsd/netinet/in_proto.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -69,10 +69,13 @@ #include #include +#include + #include #include #include +#include #include #include #include @@ -101,210 +104,270 @@ #include #endif /* IPSEC */ -#if IPXIP -#include -#endif +static void in_dinit(struct domain *); +static void ip_proto_input(protocol_family_t, mbuf_t); -extern struct domain inetdomain; -static struct pr_usrreqs nousrreqs; -extern struct pr_usrreqs icmp_dgram_usrreqs; +extern struct domain inetdomain_s; +static struct pr_usrreqs nousrreqs; +extern struct pr_usrreqs icmp_dgram_usrreqs; extern int icmp_dgram_ctloutput(struct socket *, struct sockopt *); +struct domain *inetdomain = NULL; + +/* Thanks to PPP, this still needs to be exported */ +lck_mtx_t *inet_domain_mutex; -struct protosw inetsw[] = { -{ 0, &inetdomain, 0, 0, - 0, 0, 0, 0, - 0, - ip_init, 0, ip_slowtimo, ip_drain, - 0, - &nousrreqs, - 0, 0, 0, { 0, 0 }, 0, { 0 } +static struct protosw inetsw[] = { +{ + .pr_type = 0, + .pr_protocol = 0, + .pr_init = ip_init, + .pr_drain = ip_drain, + .pr_usrreqs = &nousrreqs, }, -{ SOCK_DGRAM, &inetdomain, IPPROTO_UDP, PR_ATOMIC|PR_ADDR|PR_PROTOLOCK|PR_PCBLOCK, - udp_input, 0, udp_ctlinput, udp_ctloutput, - 0, - udp_init, 0, udp_slowtimo, 0, - 0, - &udp_usrreqs, - udp_lock, udp_unlock, udp_getlock, { 0, 0 }, 0, { 0 } +{ + .pr_type = SOCK_DGRAM, + .pr_protocol = IPPROTO_UDP, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_PROTOLOCK|PR_PCBLOCK| + PR_EVCONNINFO, + .pr_input = udp_input, + .pr_ctlinput = udp_ctlinput, + .pr_ctloutput = udp_ctloutput, + .pr_init = udp_init, + .pr_usrreqs = &udp_usrreqs, + .pr_lock = udp_lock, + .pr_unlock = udp_unlock, + .pr_getlock = udp_getlock, }, -{ SOCK_STREAM, &inetdomain, IPPROTO_TCP, - PR_CONNREQUIRED|PR_WANTRCVD|PR_PCBLOCK|PR_PROTOLOCK|PR_DISPOSE, - tcp_input, 0, tcp_ctlinput, tcp_ctloutput, - 0, - tcp_init, 0, tcp_slowtimo, tcp_drain, - 0, - &tcp_usrreqs, - tcp_lock, tcp_unlock, tcp_getlock, { 0, 0 }, 0, { 0 } +{ + .pr_type = SOCK_STREAM, + .pr_protocol = IPPROTO_TCP, + .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_PCBLOCK| + PR_PROTOLOCK|PR_DISPOSE|PR_EVCONNINFO, + .pr_input = tcp_input, + .pr_ctlinput = tcp_ctlinput, + .pr_ctloutput = tcp_ctloutput, + .pr_init = tcp_init, + .pr_drain = tcp_drain, + .pr_usrreqs = &tcp_usrreqs, + .pr_lock = tcp_lock, + .pr_unlock = tcp_unlock, + .pr_getlock = tcp_getlock, }, -{ SOCK_RAW, &inetdomain, IPPROTO_RAW, PR_ATOMIC|PR_ADDR, - rip_input, 0, rip_ctlinput, rip_ctloutput, - 0, - 0, 0, 0, 0, - 0, - &rip_usrreqs, - 0, rip_unlock, 0, { 0, 0 }, 0, { 0 } +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_RAW, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = rip_input, + .pr_ctlinput = rip_ctlinput, + .pr_ctloutput = rip_ctloutput, + .pr_usrreqs = &rip_usrreqs, + .pr_unlock = rip_unlock, }, -{ SOCK_RAW, &inetdomain, IPPROTO_ICMP, PR_ATOMIC|PR_ADDR|PR_LASTHDR, - icmp_input, 0, 0, rip_ctloutput, - 0, - 0, 0, 0, 0, - 0, - &rip_usrreqs, - 0, rip_unlock, 0, { 0, 0 }, 0, { 0 } +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_ICMP, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = icmp_input, + .pr_ctloutput = rip_ctloutput, + .pr_usrreqs = &rip_usrreqs, + .pr_unlock = rip_unlock, }, -{ SOCK_DGRAM, &inetdomain, IPPROTO_ICMP, PR_ATOMIC|PR_ADDR|PR_LASTHDR, - icmp_input, 0, 0, icmp_dgram_ctloutput, - 0, - 0, 0, 0, 0, - 0, - &icmp_dgram_usrreqs, - 0, rip_unlock, 0, { 0, 0 }, 0, { 0 } +{ + .pr_type = SOCK_DGRAM, + .pr_protocol = IPPROTO_ICMP, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = icmp_input, + .pr_ctloutput = icmp_dgram_ctloutput, + .pr_usrreqs = &icmp_dgram_usrreqs, + .pr_unlock = rip_unlock, }, -{ SOCK_RAW, &inetdomain, IPPROTO_IGMP, PR_ATOMIC|PR_ADDR|PR_LASTHDR, - igmp_input, 0, 0, rip_ctloutput, - 0, - igmp_init, 0, igmp_slowtimo, 0, - 0, - &rip_usrreqs, - 0, rip_unlock, 0, { 0, 0 }, 0, { 0 } +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_IGMP, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = igmp_input, + .pr_ctloutput = rip_ctloutput, + .pr_init = igmp_init, + .pr_usrreqs = &rip_usrreqs, + .pr_unlock = rip_unlock, }, #if MROUTING -{ SOCK_RAW, &inetdomain, IPPROTO_RSVP, PR_ATOMIC|PR_ADDR|PR_LASTHDR, - rsvp_input, 0, 0, rip_ctloutput, - 0, - 0, 0, 0, 0, - 0, - &rip_usrreqs, - 0, rip_unlock, 0, { 0, 0 }, 0, { 0 } +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_RSVP, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = rsvp_input, + .pr_ctloutput = rip_ctloutput, + .pr_usrreqs = &rip_usrreqs, + .pr_unlock = rip_unlock, }, #endif /* MROUTING */ +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_GRE, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = gre_input, + .pr_ctlinput = rip_ctlinput, + .pr_ctloutput = rip_ctloutput, + .pr_usrreqs = &rip_usrreqs, + .pr_unlock = rip_unlock, +}, #if IPSEC -{ SOCK_RAW, &inetdomain, IPPROTO_AH, PR_ATOMIC|PR_ADDR|PR_PROTOLOCK, - ah4_input, 0, 0, 0, - 0, - 0, 0, 0, 0, - 0, - &nousrreqs, - 0, 0, 0, { 0, 0 }, 0, { 0 } +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_AH, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_PROTOLOCK, + .pr_input = ah4_input, + .pr_usrreqs = &nousrreqs, }, #if IPSEC_ESP -{ SOCK_RAW, &inetdomain, IPPROTO_ESP, PR_ATOMIC|PR_ADDR|PR_PROTOLOCK, - esp4_input, 0, 0, 0, - 0, - 0, 0, 0, 0, - 0, - &nousrreqs, - 0, 0, 0, { 0, 0 }, 0, { 0 } +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_ESP, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_PROTOLOCK, + .pr_input = esp4_input, + .pr_usrreqs = &nousrreqs, }, -#endif -{ SOCK_RAW, &inetdomain, IPPROTO_IPCOMP, PR_ATOMIC|PR_ADDR|PR_PROTOLOCK, - ipcomp4_input, 0, 0, 0, - 0, - 0, 0, 0, 0, - 0, - &nousrreqs, - 0, 0, 0, { 0, 0 }, 0, { 0 } +#endif /* IPSEC_ESP */ +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_IPCOMP, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_PROTOLOCK, + .pr_input = ipcomp4_input, + .pr_usrreqs = &nousrreqs, }, #endif /* IPSEC */ -{ SOCK_RAW, &inetdomain, IPPROTO_IPV4, PR_ATOMIC|PR_ADDR|PR_LASTHDR, - encap4_input, 0, 0, rip_ctloutput, - 0, - encap_init, 0, 0, 0, - 0, - &rip_usrreqs, - 0, rip_unlock, 0, { 0, 0 }, 0, { 0 } +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_IPV4, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = encap4_input, + .pr_ctloutput = rip_ctloutput, + .pr_init = encap4_init, + .pr_usrreqs = &rip_usrreqs, + .pr_unlock = rip_unlock, }, -# if INET6 -{ SOCK_RAW, &inetdomain, IPPROTO_IPV6, PR_ATOMIC|PR_ADDR|PR_LASTHDR, - encap4_input, 0, 0, rip_ctloutput, - 0, - encap_init, 0, 0, 0, - 0, - &rip_usrreqs, - 0, rip_unlock, 0, { 0, 0 }, 0, { 0 } +#if INET6 +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_IPV6, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = encap4_input, + .pr_ctloutput = rip_ctloutput, + .pr_init = encap4_init, + .pr_usrreqs = &rip_usrreqs, + .pr_unlock = rip_unlock, }, -#endif +#endif /* INET6 */ #if IPDIVERT -{ SOCK_RAW, &inetdomain, IPPROTO_DIVERT, PR_ATOMIC|PR_ADDR|PR_PCBLOCK, - div_input, 0, 0, ip_ctloutput, - 0, - div_init, 0, 0, 0, - 0, - &div_usrreqs, - div_lock, div_unlock, div_getlock, { 0, 0 }, 0, { 0 } -}, -#endif -#if IPXIP -{ SOCK_RAW, &inetdomain, IPPROTO_IDP, PR_ATOMIC|PR_ADDR|PR_LASTHDR, - ipxip_input, 0, ipxip_ctlinput, 0, - 0, - 0, 0, 0, 0, - 0, - &rip_usrreqs, - 0, rip_unlock, 0, { 0, 0 }, 0, { 0 } -}, -#endif -#if NSIP -{ SOCK_RAW, &inetdomain, IPPROTO_IDP, PR_ATOMIC|PR_ADDR|PR_LASTHDR, - idpip_input, 0, nsip_ctlinput, 0, - 0, - 0, 0, 0, 0, - 0, - &rip_usrreqs, - 0, rip_unlock, 0, { 0, 0 }, 0, { 0 } +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_DIVERT, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_PCBLOCK, + .pr_input = div_input, + .pr_ctloutput = ip_ctloutput, + .pr_init = div_init, + .pr_usrreqs = &div_usrreqs, + .pr_lock = div_lock, + .pr_unlock = div_unlock, + .pr_getlock = div_getlock, }, -#endif - /* raw wildcard */ -{ SOCK_RAW, &inetdomain, 0, PR_ATOMIC|PR_ADDR|PR_LASTHDR, - rip_input, 0, 0, rip_ctloutput, - 0, - rip_init, 0, 0, 0, - 0, - &rip_usrreqs, - 0, rip_unlock, 0, { 0, 0 }, 0, { 0 } +#endif /* IPDIVERT */ +/* raw wildcard */ +{ + .pr_type = SOCK_RAW, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = rip_input, + .pr_ctloutput = rip_ctloutput, + .pr_init = rip_init, + .pr_usrreqs = &rip_usrreqs, + .pr_unlock = rip_unlock, }, }; -extern int in_inithead(void **, int); +static int in_proto_count = (sizeof (inetsw) / sizeof (struct protosw)); + +struct domain inetdomain_s = { + .dom_family = PF_INET, + .dom_flags = DOM_REENTRANT, + .dom_name = "internet", + .dom_init = in_dinit, + .dom_rtattach = in_inithead, + .dom_rtoffset = 32, + .dom_maxrtkey = sizeof (struct sockaddr_in), + .dom_protohdrlen = sizeof (struct tcpiphdr), +}; + +/* Initialize the PF_INET domain, and add in the pre-defined protos */ +void +in_dinit(struct domain *dp) +{ + struct protosw *pr; + int i; + domain_unguard_t unguard; + + VERIFY(!(dp->dom_flags & DOM_INITIALIZED)); + VERIFY(inetdomain == NULL); -int in_proto_count = (sizeof (inetsw) / sizeof (struct protosw)); + inetdomain = dp; -extern void in_dinit(void) __attribute__((section("__TEXT, initcode"))); -/* A routing init function, and a header size */ -struct domain inetdomain = - { AF_INET, - "internet", - in_dinit, - 0, - 0, - inetsw, - 0, - in_inithead, - 32, - sizeof(struct sockaddr_in), - sizeof(struct tcpiphdr), - 0, - 0, - 0, - { 0, 0} - }; + /* + * Attach first, then initialize; ip_init() needs raw IP handler. + */ + for (i = 0, pr = &inetsw[0]; i < in_proto_count; i++, pr++) + net_add_proto(pr, dp, 0); + for (i = 0, pr = &inetsw[0]; i < in_proto_count; i++, pr++) + net_init_proto(pr, dp); -DOMAIN_SET(inet); + inet_domain_mutex = dp->dom_mtx; -SYSCTL_NODE(_net, PF_INET, inet, CTLFLAG_RW|CTLFLAG_LOCKED, 0, - "Internet Family"); + unguard = domain_unguard_deploy(); + i = proto_register_input(PF_INET, ip_proto_input, NULL, 1); + if (i != 0) { + panic("%s: failed to register PF_INET protocol: %d\n", + __func__, i); + /* NOTREACHED */ + } + domain_unguard_release(unguard); +} -SYSCTL_NODE(_net_inet, IPPROTO_IP, ip, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP"); -SYSCTL_NODE(_net_inet, IPPROTO_ICMP, icmp, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "ICMP"); -SYSCTL_NODE(_net_inet, IPPROTO_UDP, udp, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "UDP"); -SYSCTL_NODE(_net_inet, IPPROTO_TCP, tcp, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "TCP"); -SYSCTL_NODE(_net_inet, IPPROTO_IGMP, igmp, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IGMP"); +static void +ip_proto_input(protocol_family_t protocol, mbuf_t packet_list) +{ +#pragma unused(protocol) + mbuf_t packet; + int how_many = 0 ; + + /* ip_input should handle a list of packets but does not yet */ + for (packet = packet_list; packet; packet = packet_list) { + how_many++; + packet_list = mbuf_nextpkt(packet); + mbuf_setnextpkt(packet, NULL); + ip_input(packet); + } +} + +SYSCTL_NODE(_net, PF_INET, inet, + CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Internet Family"); + +SYSCTL_NODE(_net_inet, IPPROTO_IP, ip, + CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP"); +SYSCTL_NODE(_net_inet, IPPROTO_ICMP, icmp, + CTLFLAG_RW|CTLFLAG_LOCKED, 0, "ICMP"); +SYSCTL_NODE(_net_inet, IPPROTO_UDP, udp, + CTLFLAG_RW|CTLFLAG_LOCKED, 0, "UDP"); +SYSCTL_NODE(_net_inet, IPPROTO_TCP, tcp, + CTLFLAG_RW|CTLFLAG_LOCKED, 0, "TCP"); +SYSCTL_NODE(_net_inet, IPPROTO_IGMP, igmp, + CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IGMP"); #if IPSEC -SYSCTL_NODE(_net_inet, IPPROTO_AH, ipsec, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IPSEC"); +SYSCTL_NODE(_net_inet, IPPROTO_AH, ipsec, + CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IPSEC"); #endif /* IPSEC */ -SYSCTL_NODE(_net_inet, IPPROTO_RAW, raw, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "RAW"); +SYSCTL_NODE(_net_inet, IPPROTO_RAW, raw, + CTLFLAG_RW|CTLFLAG_LOCKED, 0, "RAW"); #if IPDIVERT -SYSCTL_NODE(_net_inet, IPPROTO_DIVERT, div, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "DIVERT"); -#endif - +SYSCTL_NODE(_net_inet, IPPROTO_DIVERT, div, + CTLFLAG_RW|CTLFLAG_LOCKED, 0, "DIVERT"); +#endif /* IPDIVERT */ diff --git a/bsd/netinet/in_rmx.c b/bsd/netinet/in_rmx.c index ca9a4247e..aa43a3caa 100644 --- a/bsd/netinet/in_rmx.c +++ b/bsd/netinet/in_rmx.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -53,7 +53,6 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: src/sys/netinet/in_rmx.c,v 1.37.2.1 2001/05/14 08:23:49 ru Exp $ */ /* @@ -87,40 +86,53 @@ #include extern int tvtohz(struct timeval *); -extern int in_inithead(void **head, int off); -#ifdef __APPLE__ -static void in_rtqtimo(void *rock); -#endif +static int in_rtqtimo_run; /* in_rtqtimo is scheduled to run */ +static void in_rtqtimo(void *); +static void in_sched_rtqtimo(struct timeval *); +static struct radix_node *in_addroute(void *, void *, struct radix_node_head *, + struct radix_node *); +static struct radix_node *in_deleteroute(void *, void *, + struct radix_node_head *); +static struct radix_node *in_matroute(void *, struct radix_node_head *); static struct radix_node *in_matroute_args(void *, struct radix_node_head *, rn_matchf_t *f, void *); +static void in_clsroute(struct radix_node *, struct radix_node_head *); +static int in_rtqkill(struct radix_node *, void *); + +static int in_ifadownkill(struct radix_node *, void *); -#define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */ +#define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */ /* * Do what we need to do when inserting a route. */ static struct radix_node * in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, - struct radix_node *treenodes) + struct radix_node *treenodes) { struct rtentry *rt = (struct rtentry *)treenodes; struct sockaddr_in *sin = (struct sockaddr_in *)(void *)rt_key(rt); struct radix_node *ret; + char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN]; + uint32_t flags = rt->rt_flags; + boolean_t verbose = (rt_verbose > 1); lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); RT_LOCK_ASSERT_HELD(rt); + if (verbose) + rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); + /* * For IP, all unicast non-host routes are automatically cloning. */ if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) rt->rt_flags |= RTF_MULTICAST; - if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) { + if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) rt->rt_flags |= RTF_PRCLONING; - } /* * A little bit of help for both IP output and input: @@ -145,19 +157,19 @@ in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, /* Become a regular mutex */ RT_CONVERT_LOCK(rt); IFA_LOCK_SPIN(rt->rt_ifa); - if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr - == sin->sin_addr.s_addr) + if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr == + sin->sin_addr.s_addr) rt->rt_flags |= RTF_LOCAL; IFA_UNLOCK(rt->rt_ifa); } } - if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) - && rt->rt_ifp) + if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) && + rt->rt_ifp) rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; ret = rn_addroute(v_arg, n_arg, head, treenodes); - if (ret == NULL && rt->rt_flags & RTF_HOST) { + if (ret == NULL && (rt->rt_flags & RTF_HOST)) { struct rtentry *rt2; /* * We are trying to add a host route, but can't. @@ -166,12 +178,30 @@ in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, */ rt2 = rtalloc1_scoped_locked(rt_key(rt), 0, RTF_CLONING | RTF_PRCLONING, sin_get_ifscope(rt_key(rt))); - if (rt2) { + if (rt2 != NULL) { + char dbufc[MAX_IPv4_STR_LEN]; + RT_LOCK(rt2); + if (verbose) + rt_str(rt2, dbufc, sizeof (dbufc), NULL, 0); + if ((rt2->rt_flags & RTF_LLINFO) && (rt2->rt_flags & RTF_HOST) && rt2->rt_gateway != NULL && rt2->rt_gateway->sa_family == AF_LINK) { + if (verbose) { + log(LOG_DEBUG, "%s: unable to insert " + "route to %s;%s, flags=%b, due to " + "existing ARP route %s->%s " + "flags=%b, attempting to delete\n", + __func__, dbuf, + (rt->rt_ifp != NULL) ? + rt->rt_ifp->if_xname : "", + rt->rt_flags, RTF_BITS, dbufc, + (rt2->rt_ifp != NULL) ? + rt2->rt_ifp->if_xname : "", + rt2->rt_flags, RTF_BITS); + } /* * Safe to drop rt_lock and use rt_key, * rt_gateway, since holding rnh_lock here @@ -179,18 +209,64 @@ in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, * rt_setgate() on this route. */ RT_UNLOCK(rt2); - rtrequest_locked(RTM_DELETE, rt_key(rt2), + (void) rtrequest_locked(RTM_DELETE, rt_key(rt2), rt2->rt_gateway, rt_mask(rt2), - rt2->rt_flags, 0); + rt2->rt_flags, NULL); ret = rn_addroute(v_arg, n_arg, head, - treenodes); + treenodes); } else { RT_UNLOCK(rt2); } rtfree_locked(rt2); } } - return ret; + + if (!verbose) + goto done; + + if (ret != NULL) { + if (flags != rt->rt_flags) { + log(LOG_DEBUG, "%s: route to %s->%s->%s inserted, " + "oflags=%b, flags=%b\n", __func__, + dbuf, gbuf, (rt->rt_ifp != NULL) ? + rt->rt_ifp->if_xname : "", flags, RTF_BITS, + rt->rt_flags, RTF_BITS); + } else { + log(LOG_DEBUG, "%s: route to %s->%s->%s inserted, " + "flags=%b\n", __func__, dbuf, gbuf, + (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", + rt->rt_flags, RTF_BITS); + } + } else { + log(LOG_DEBUG, "%s: unable to insert route to %s->%s->%s, " + "flags=%b, already exists\n", __func__, dbuf, gbuf, + (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", + rt->rt_flags, RTF_BITS); + } +done: + return (ret); +} + +static struct radix_node * +in_deleteroute(void *v_arg, void *netmask_arg, struct radix_node_head *head) +{ + struct radix_node *rn; + + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + + rn = rn_delete(v_arg, netmask_arg, head); + if (rt_verbose > 1 && rn != NULL) { + char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN]; + struct rtentry *rt = (struct rtentry *)rn; + + RT_LOCK(rt); + rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); + log(LOG_DEBUG, "%s: route to %s->%s->%s deleted, " + "flags=%b\n", __func__, dbuf, gbuf, (rt->rt_ifp != NULL) ? + rt->rt_ifp->if_xname : "", rt->rt_flags, RTF_BITS); + RT_UNLOCK(rt); + } + return (rn); } /* @@ -205,16 +281,24 @@ in_validate(struct radix_node *rn) /* This is first reference? */ if (rt->rt_refcnt == 0) { + if (rt_verbose > 2) { + char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN]; + + rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); + log(LOG_DEBUG, "%s: route to %s->%s->%s validated, " + "flags=%b\n", __func__, dbuf, gbuf, + (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", + rt->rt_flags, RTF_BITS); + } + + /* + * It's one of ours; unexpire it. If the timer is already + * scheduled, let it run later as it won't re-arm itself + * if there's nothing to do. + */ if (rt->rt_flags & RTPRF_OURS) { - /* It's one of ours; unexpire it */ rt->rt_flags &= ~RTPRF_OURS; rt_setexpire(rt, 0); - } else if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) == - (RTF_LLINFO | RTF_HOST) && rt->rt_llinfo != NULL && - rt->rt_gateway != NULL && - rt->rt_gateway->sa_family == AF_LINK) { - /* It's ARP; let it be handled there */ - arp_validate(rt); } } return (rn); @@ -248,53 +332,35 @@ in_matroute_args(void *v_arg, struct radix_node_head *head, return (rn); } -static int rtq_reallyold = 60*60; - /* one hour is ``really old'' */ -SYSCTL_INT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire, CTLFLAG_RW | CTLFLAG_LOCKED, - &rtq_reallyold , 0, - "Default expiration time on dynamically learned routes"); - -static int rtq_minreallyold = 10; - /* never automatically crank down to less */ -SYSCTL_INT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW | CTLFLAG_LOCKED, - &rtq_minreallyold , 0, - "Minimum time to attempt to hold onto dynamically learned routes"); - -static int rtq_toomany = 128; - /* 128 cached routes is ``too many'' */ -SYSCTL_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW | CTLFLAG_LOCKED, - &rtq_toomany , 0, "Upper limit on dynamically learned routes"); - -#ifdef __APPLE__ -/* XXX LD11JUL02 Special case for AOL 5.1.2 connectivity issue to AirPort BS (Radar 2969954) - * AOL is adding a circular route ("10.0.1.1/32 10.0.1.1") when establishing its ppp tunnel - * to the AP BaseStation by removing the default gateway and replacing it with their tunnel entry point. - * There is no apparent reason to add this route as there is a valid 10.0.1.1/24 route to the BS. - * That circular route was ignored on previous version of MacOS X because of a routing bug - * corrected with the merge to FreeBSD4.4 (a route generated from an RTF_CLONING route had the RTF_WASCLONED - * flag set but did not have a reference to the parent route) and that entry was left in the RT. This workaround is - * made in order to provide binary compatibility with AOL. - * If we catch a process adding a circular route with a /32 from the routing socket, we error it out instead of - * confusing the routing table with a wrong route to the previous default gateway - * If for some reason a circular route is needed, turn this sysctl (net.inet.ip.check_route_selfref) to zero. - */ -int check_routeselfref = 1; -SYSCTL_INT(_net_inet_ip, OID_AUTO, check_route_selfref, CTLFLAG_RW | CTLFLAG_LOCKED, - &check_routeselfref , 0, ""); -#endif +/* one hour is ``really old'' */ +static uint32_t rtq_reallyold = 60*60; +SYSCTL_UINT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire, + CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_reallyold, 0, + "Default expiration time on dynamically learned routes"); -int use_routegenid = 1; -SYSCTL_INT(_net_inet_ip, OID_AUTO, use_route_genid, CTLFLAG_RW | CTLFLAG_LOCKED, - &use_routegenid , 0, ""); +/* never automatically crank down to less */ +static uint32_t rtq_minreallyold = 10; +SYSCTL_UINT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire, + CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_minreallyold, 0, + "Minimum time to attempt to hold onto dynamically learned routes"); + +/* 128 cached routes is ``too many'' */ +static uint32_t rtq_toomany = 128; +SYSCTL_UINT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, + CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_toomany, 0, + "Upper limit on dynamically learned routes"); /* * On last reference drop, mark the route as belong to us so that it can be * timed out. */ static void -in_clsroute(struct radix_node *rn, __unused struct radix_node_head *head) +in_clsroute(struct radix_node *rn, struct radix_node_head *head) { +#pragma unused(head) + char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN]; struct rtentry *rt = (struct rtentry *)rn; + boolean_t verbose = (rt_verbose > 1); lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); RT_LOCK_ASSERT_HELD(rt); @@ -305,15 +371,29 @@ in_clsroute(struct radix_node *rn, __unused struct radix_node_head *head) if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST) return; - if ((rt->rt_flags & (RTF_WASCLONED | RTPRF_OURS)) != RTF_WASCLONED) + if (rt->rt_flags & RTPRF_OURS) + return; + + if (!(rt->rt_flags & (RTF_WASCLONED | RTF_DYNAMIC))) return; + if (verbose) + rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); + /* * Delete the route immediately if RTF_DELCLONE is set or * if route caching is disabled (rtq_reallyold set to 0). * Otherwise, let it expire and be deleted by in_rtqkill(). */ if ((rt->rt_flags & RTF_DELCLONE) || rtq_reallyold == 0) { + int err; + + if (verbose) { + log(LOG_DEBUG, "%s: deleting route to %s->%s->%s, " + "flags=%b\n", __func__, dbuf, gbuf, + (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", + rt->rt_flags, RTF_BITS); + } /* * Delete the route from the radix tree but since we are * called when the route's reference count is 0, don't @@ -324,30 +404,48 @@ in_clsroute(struct radix_node *rn, __unused struct radix_node_head *head) * calling rt_setgate() on this route. */ RT_UNLOCK(rt); - if (rtrequest_locked(RTM_DELETE, (struct sockaddr *)rt_key(rt), - rt->rt_gateway, rt_mask(rt), rt->rt_flags, &rt) == 0) { + err = rtrequest_locked(RTM_DELETE, rt_key(rt), + rt->rt_gateway, rt_mask(rt), rt->rt_flags, &rt); + if (err == 0) { /* Now let the caller free it */ RT_LOCK(rt); RT_REMREF_LOCKED(rt); } else { RT_LOCK(rt); + if (!verbose) + rt_str(rt, dbuf, sizeof (dbuf), + gbuf, sizeof (gbuf)); + log(LOG_ERR, "%s: error deleting route to " + "%s->%s->%s, flags=%b, err=%d\n", __func__, + dbuf, gbuf, (rt->rt_ifp != NULL) ? + rt->rt_ifp->if_xname : "", rt->rt_flags, + RTF_BITS, err); } } else { uint64_t timenow; timenow = net_uptime(); rt->rt_flags |= RTPRF_OURS; - rt_setexpire(rt, - rt_expiry(rt, timenow, rtq_reallyold)); + rt_setexpire(rt, timenow + rtq_reallyold); + + if (verbose) { + log(LOG_DEBUG, "%s: route to %s->%s->%s invalidated, " + "flags=%b, expire=T+%u\n", __func__, dbuf, gbuf, + (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", + rt->rt_flags, RTF_BITS, rt->rt_expire - timenow); + } + + /* We have at least one entry; arm the timer if not already */ + in_sched_rtqtimo(NULL); } } struct rtqk_arg { struct radix_node_head *rnh; - int draining; - int killed; - int found; int updating; + int draining; + uint32_t killed; + uint32_t found; uint64_t nextstop; }; @@ -361,22 +459,38 @@ in_rtqkill(struct radix_node *rn, void *rock) { struct rtqk_arg *ap = rock; struct rtentry *rt = (struct rtentry *)rn; - int err; + boolean_t verbose = (rt_verbose > 1); uint64_t timenow; + int err; timenow = net_uptime(); lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); RT_LOCK(rt); if (rt->rt_flags & RTPRF_OURS) { - ap->found++; + char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN]; + + if (verbose) + rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); + ap->found++; VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0); VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0); if (ap->draining || rt->rt_expire <= timenow) { - if (rt->rt_refcnt > 0) - panic("rtqkill route really not free"); - + if (rt->rt_refcnt > 0) { + panic("%s: route %p marked with RTPRF_OURS " + "with non-zero refcnt (%u)", __func__, + rt, rt->rt_refcnt); + /* NOTREACHED */ + } + if (verbose) { + log(LOG_DEBUG, "%s: deleting route to " + "%s->%s->%s, flags=%b, draining=%d\n", + __func__, dbuf, gbuf, (rt->rt_ifp != NULL) ? + rt->rt_ifp->if_xname : "", rt->rt_flags, + RTF_BITS, ap->draining); + } + RT_ADDREF_LOCKED(rt); /* for us to free below */ /* * Delete this route since we're done with it; * the route may be freed afterwards, so we @@ -388,58 +502,81 @@ in_rtqkill(struct radix_node *rn, void *rock) */ RT_UNLOCK(rt); err = rtrequest_locked(RTM_DELETE, rt_key(rt), - rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); - if (err) { - log(LOG_WARNING, "in_rtqkill: error %d\n", err); + rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); + if (err != 0) { + RT_LOCK(rt); + if (!verbose) + rt_str(rt, dbuf, sizeof (dbuf), + gbuf, sizeof (gbuf)); + log(LOG_ERR, "%s: error deleting route to " + "%s->%s->%s, flags=%b, err=%d\n", __func__, + dbuf, gbuf, (rt->rt_ifp != NULL) ? + rt->rt_ifp->if_xname : "", rt->rt_flags, + RTF_BITS, err); + RT_UNLOCK(rt); } else { ap->killed++; } + rtfree_locked(rt); } else { - if (ap->updating && - (rt->rt_expire - timenow) > - rt_expiry(rt, 0, rtq_reallyold)) { - rt_setexpire(rt, rt_expiry(rt, - timenow, rtq_reallyold)); + uint64_t expire = (rt->rt_expire - timenow); + + if (ap->updating && expire > rtq_reallyold) { + rt_setexpire(rt, timenow + rtq_reallyold); + if (verbose) { + log(LOG_DEBUG, "%s: route to " + "%s->%s->%s, flags=%b, adjusted " + "expire=T+%u (was T+%u)\n", + __func__, dbuf, gbuf, + (rt->rt_ifp != NULL) ? + rt->rt_ifp->if_xname : "", + rt->rt_flags, RTF_BITS, + (rt->rt_expire - timenow), expire); + } } - ap->nextstop = lmin(ap->nextstop, - rt->rt_expire); + ap->nextstop = lmin(ap->nextstop, rt->rt_expire); RT_UNLOCK(rt); } } else { RT_UNLOCK(rt); } - return 0; + return (0); } -static void -in_rtqtimo_funnel(void *rock) -{ - in_rtqtimo(rock); - -} -#define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */ +#define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */ static int rtq_timeout = RTQ_TIMEOUT; static void -in_rtqtimo(void *rock) +in_rtqtimo(void *targ) { - struct radix_node_head *rnh = rock; +#pragma unused(targ) + struct radix_node_head *rnh; struct rtqk_arg arg; struct timeval atv; static uint64_t last_adjusted_timeout = 0; + boolean_t verbose = (rt_verbose > 1); uint64_t timenow; + uint32_t ours; lck_mtx_lock(rnh_lock); - /* Get the timestamp after we acquire the lock for better accuracy */ - timenow = net_uptime(); + rnh = rt_tables[AF_INET]; + VERIFY(rnh != NULL); - arg.found = arg.killed = 0; + /* Get the timestamp after we acquire the lock for better accuracy */ + timenow = net_uptime(); + if (verbose) { + log(LOG_DEBUG, "%s: initial nextstop is T+%u seconds\n", + __func__, rtq_timeout); + } + bzero(&arg, sizeof (arg)); arg.rnh = rnh; arg.nextstop = timenow + rtq_timeout; - arg.draining = arg.updating = 0; rnh->rnh_walktree(rnh, in_rtqkill, &arg); - + if (verbose) { + log(LOG_DEBUG, "%s: found %u, killed %u\n", __func__, + arg.found, arg.killed); + } /* * Attempt to be somewhat dynamic about this: * If there are ``too many'' routes sitting around taking up space, @@ -448,19 +585,19 @@ in_rtqtimo(void *rock) * than once in rtq_timeout seconds, to keep from cranking down too * hard. */ - if((arg.found - arg.killed > rtq_toomany) - && ((timenow - last_adjusted_timeout) >= (uint64_t)rtq_timeout) - && rtq_reallyold > rtq_minreallyold) { - rtq_reallyold = 2*rtq_reallyold / 3; - if(rtq_reallyold < rtq_minreallyold) { + ours = (arg.found - arg.killed); + if (ours > rtq_toomany && + ((timenow - last_adjusted_timeout) >= (uint64_t)rtq_timeout) && + rtq_reallyold > rtq_minreallyold) { + rtq_reallyold = 2 * rtq_reallyold / 3; + if (rtq_reallyold < rtq_minreallyold) rtq_reallyold = rtq_minreallyold; - } last_adjusted_timeout = timenow; -#if DIAGNOSTIC - log(LOG_DEBUG, "in_rtqtimo: adjusted rtq_reallyold to %d\n", - rtq_reallyold); -#endif + if (verbose) { + log(LOG_DEBUG, "%s: adjusted rtq_reallyold to %d " + "seconds\n", __func__, rtq_reallyold); + } arg.found = arg.killed = 0; arg.updating = 1; rnh->rnh_walktree(rnh, in_rtqkill, &arg); @@ -468,21 +605,53 @@ in_rtqtimo(void *rock) atv.tv_usec = 0; atv.tv_sec = arg.nextstop - timenow; + /* re-arm the timer only if there's work to do */ + in_rtqtimo_run = 0; + if (ours > 0) + in_sched_rtqtimo(&atv); + else if (verbose) + log(LOG_DEBUG, "%s: not rescheduling timer\n", __func__); lck_mtx_unlock(rnh_lock); - timeout(in_rtqtimo_funnel, rock, tvtohz(&atv)); +} + +static void +in_sched_rtqtimo(struct timeval *atv) +{ + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + + if (!in_rtqtimo_run) { + struct timeval tv; + + if (atv == NULL) { + tv.tv_usec = 0; + tv.tv_sec = MAX(rtq_timeout / 10, 1); + atv = &tv; + } + if (rt_verbose > 1) { + log(LOG_DEBUG, "%s: timer scheduled in " + "T+%llus.%lluu\n", __func__, + (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec); + } + in_rtqtimo_run = 1; + timeout(in_rtqtimo, NULL, tvtohz(atv)); + } } void in_rtqdrain(void) { - struct radix_node_head *rnh = rt_tables[AF_INET]; + struct radix_node_head *rnh; struct rtqk_arg arg; - arg.found = arg.killed = 0; + + if (rt_verbose > 1) + log(LOG_DEBUG, "%s: draining routes\n", __func__); + + lck_mtx_lock(rnh_lock); + rnh = rt_tables[AF_INET]; + VERIFY(rnh != NULL); + bzero(&arg, sizeof (arg)); arg.rnh = rnh; - arg.nextstop = 0; arg.draining = 1; - arg.updating = 0; - lck_mtx_lock(rnh_lock); rnh->rnh_walktree(rnh, in_rtqkill, &arg); lck_mtx_unlock(rnh_lock); } @@ -495,27 +664,30 @@ in_inithead(void **head, int off) { struct radix_node_head *rnh; -#ifdef __APPLE__ - if (*head) - return 1; -#endif + /* If called from route_init(), make sure it is exactly once */ + VERIFY(head != (void **)&rt_tables[AF_INET] || *head == NULL); - if(!rn_inithead(head, off)) - return 0; + if (!rn_inithead(head, off)) + return (0); - if(head != (void **)&rt_tables[AF_INET]) /* BOGUS! */ - return 1; /* only do this for the real routing table */ + /* + * We can get here from nfs_subs.c as well, in which case this + * won't be for the real routing table and thus we're done; + * this also takes care of the case when we're called more than + * once from anywhere but route_init(). + */ + if (head != (void **)&rt_tables[AF_INET]) + return (1); /* only do this for the real routing table */ rnh = *head; rnh->rnh_addaddr = in_addroute; + rnh->rnh_deladdr = in_deleteroute; rnh->rnh_matchaddr = in_matroute; rnh->rnh_matchaddr_args = in_matroute_args; rnh->rnh_close = in_clsroute; - in_rtqtimo(rnh); /* kick off timeout first time */ - return 1; + return (1); } - /* * This zaps old routes when the interface goes down or interface * address is deleted. In the latter case, it deletes static routes @@ -534,13 +706,25 @@ struct in_ifadown_arg { static int in_ifadownkill(struct radix_node *rn, void *xap) { + char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN]; struct in_ifadown_arg *ap = xap; struct rtentry *rt = (struct rtentry *)rn; + boolean_t verbose = (rt_verbose != 0); int err; + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + RT_LOCK(rt); if (rt->rt_ifa == ap->ifa && (ap->del || !(rt->rt_flags & RTF_STATIC))) { + rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); + if (verbose) { + log(LOG_DEBUG, "%s: deleting route to %s->%s->%s, " + "flags=%b\n", __func__, dbuf, gbuf, + (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", + rt->rt_flags, RTF_BITS); + } + RT_ADDREF_LOCKED(rt); /* for us to free below */ /* * We need to disable the automatic prune that happens * in this case in rtrequest() because it will blow @@ -555,14 +739,24 @@ in_ifadownkill(struct radix_node *rn, void *xap) rt->rt_flags &= ~(RTF_CLONING | RTF_PRCLONING); RT_UNLOCK(rt); err = rtrequest_locked(RTM_DELETE, rt_key(rt), - rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); - if (err) { - log(LOG_WARNING, "in_ifadownkill: error %d\n", err); + rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); + if (err != 0) { + RT_LOCK(rt); + if (!verbose) + rt_str(rt, dbuf, sizeof (dbuf), + gbuf, sizeof (gbuf)); + log(LOG_ERR, "%s: error deleting route to " + "%s->%s->%s, flags=%b, err=%d\n", __func__, + dbuf, gbuf, (rt->rt_ifp != NULL) ? + rt->rt_ifp->if_xname : "", rt->rt_flags, + RTF_BITS, err); + RT_UNLOCK(rt); } + rtfree_locked(rt); } else { RT_UNLOCK(rt); } - return 0; + return (0); } int @@ -582,8 +776,7 @@ in_ifadown(struct ifaddr *ifa, int delete) return (1); /* trigger route cache reevaluation */ - if (use_routegenid) - routegenid_update(); + routegenid_inet_update(); arg.rnh = rnh = rt_tables[AF_INET]; arg.ifa = ifa; diff --git a/bsd/netinet/in_systm.h b/bsd/netinet/in_systm.h index 8a617b0a7..3ea2612a6 100644 --- a/bsd/netinet/in_systm.h +++ b/bsd/netinet/in_systm.h @@ -84,8 +84,7 @@ typedef __uint32_t n_long; /* long as received from the net */ typedef __uint32_t n_time; /* ms since 00:00 GMT, byte rev */ -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE n_time iptime(void); -#endif /* KERNEL_PRIVATE */ - -#endif +#endif /* BSD_KERNEL_PRIVATE */ +#endif /* _NETINET_IN_SYSTM_H_ */ diff --git a/bsd/netinet/in_tclass.c b/bsd/netinet/in_tclass.c index 02d9ccc86..45d86992c 100644 --- a/bsd/netinet/in_tclass.c +++ b/bsd/netinet/in_tclass.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2012 Apple Inc. All rights reserved. + * Copyright (c) 2009-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -55,6 +55,7 @@ #include #include #include +#include extern char *proc_name_address(void *p); @@ -78,7 +79,6 @@ static int set_pname_tclass(struct so_tcdbg *); static int flush_pid_tclass(struct so_tcdbg *); static int purge_tclass_for_proc(void); static int flush_tclass_for_proc(void); -static void so_set_lro(struct socket*, int); int get_tclass_for_curr_proc(int *); static lck_grp_attr_t *tclass_lck_grp_attr = NULL; /* mutex group attributes */ @@ -87,6 +87,13 @@ static lck_attr_t *tclass_lck_attr = NULL; /* mutex attributes */ decl_lck_mtx_data(static, tclass_lock_data); static lck_mtx_t *tclass_lock = &tclass_lock_data; +/* + * If there is no foreground activity on the interface for bg_switch_time + * seconds, the background connections can switch to foreground TCP + * congestion control. + */ +#define TCP_BG_SWITCH_TIME 2 + /* * Must be called with tclass_lock held */ @@ -289,21 +296,21 @@ set_pid_tclass(struct so_tcdbg *so_tcdbg) fp = fdp->fd_ofiles[i]; if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 || - fp->f_fglob->fg_type != DTYPE_SOCKET) + FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_SOCKET) continue; so = (struct socket *)fp->f_fglob->fg_data; - if (so->so_proto->pr_domain->dom_family != AF_INET && - so->so_proto->pr_domain->dom_family != AF_INET6) + if (SOCK_DOM(so) != PF_INET && SOCK_DOM(so) != PF_INET6) continue; socket_lock(so, 1); if (tclass != -1) { error = so_set_traffic_class(so, tclass); if (error != 0) { printf("%s: so_set_traffic_class" - "(so=%p, fd=%d, tclass=%d) " + "(so=0x%llx, fd=%d, tclass=%d) " "failed %d\n", __func__, - so, i, tclass, error); + (uint64_t)VM_KERNEL_ADDRPERM(so), + i, tclass, error); error = 0; } } @@ -373,15 +380,16 @@ flush_pid_tclass(struct so_tcdbg *so_tcdbg) fp = fdp->fd_ofiles[i]; if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 || - fp->f_fglob->fg_type != DTYPE_SOCKET) + FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_SOCKET) continue; so = (struct socket *)fp->f_fglob->fg_data; error = sock_setsockopt(so, SOL_SOCKET, SO_FLUSH, &tclass, sizeof (tclass)); if (error != 0) { - printf("%s: setsockopt(SO_FLUSH) (so=%p, fd=%d, " - "tclass=%d) failed %d\n", __func__, so, i, tclass, + printf("%s: setsockopt(SO_FLUSH) (so=0x%llx, fd=%d, " + "tclass=%d) failed %d\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(so), i, tclass, error); error = 0; } @@ -652,17 +660,13 @@ so_set_traffic_class(struct socket *so, int optval) VERIFY(SO_VALID_TC(optval)); so->so_traffic_class = optval; - if ((INP_SOCKAF(so) == AF_INET || - INP_SOCKAF(so) == AF_INET6) && - INP_SOCKTYPE(so) == SOCK_STREAM) { + if ((SOCK_DOM(so) == PF_INET || + SOCK_DOM(so) == PF_INET6) && + SOCK_TYPE(so) == SOCK_STREAM) set_tcp_stream_priority(so); - /* Set/unset use of Large Receive Offload */ - so_set_lro(so, optval); - } - - if ((INP_SOCKAF(so) == AF_INET || - INP_SOCKAF(so) == AF_INET6) && + if ((SOCK_DOM(so) == PF_INET || + SOCK_DOM(so) == PF_INET6) && optval != oldval && (optval == SO_TC_BK_SYS || oldval == SO_TC_BK_SYS)) { /* @@ -672,9 +676,10 @@ so_set_traffic_class(struct socket *so, int optval) if (oldval == SO_TC_BK_SYS) inp_reset_fc_state(so->so_pcb); - SOTHROTTLELOG(("throttle[%d]: so %p [%d,%d] " - "opportunistic %s\n", so->last_pid, - so, INP_SOCKAF(so), INP_SOCKTYPE(so), + SOTHROTTLELOG(("throttle[%d]: so 0x%llx " + "[%d,%d] opportunistic %s\n", so->last_pid, + (uint64_t)VM_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so), (optval == SO_TC_BK_SYS) ? "ON" : "OFF")); } } @@ -688,7 +693,7 @@ so_set_default_traffic_class(struct socket *so) int sotc = -1; if (tfp_count > 0 && - (INP_SOCKAF(so) == AF_INET || INP_SOCKAF(so) == AF_INET6)) { + (SOCK_DOM(so) == PF_INET || SOCK_DOM(so) == PF_INET6)) { get_tclass_for_curr_proc(&sotc); } @@ -776,9 +781,21 @@ so_recv_data_stat(struct socket *so, struct mbuf *m, size_t off) __private_extern__ void set_tcp_stream_priority(struct socket *so) { - struct tcpcb *tp = intotcpcb(sotoinpcb(so)); - int old_cc = tp->tcp_cc_index; + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); + struct ifnet *outifp; + u_char old_cc = tp->tcp_cc_index; int recvbg = IS_TCP_RECV_BG(so); + bool is_local, fg_active = false; + u_int32_t uptime; + + VERIFY((SOCK_CHECK_DOM(so, PF_INET) + || SOCK_CHECK_DOM(so, PF_INET6)) + && SOCK_CHECK_TYPE(so, SOCK_STREAM) + && SOCK_CHECK_PROTO(so, IPPROTO_TCP)); + + outifp = inp->inp_last_outifp; + uptime = net_uptime(); /* * If the socket was marked as a background socket or if the @@ -787,8 +804,34 @@ set_tcp_stream_priority(struct socket *so) * background. The variable sotcdb which can be set with sysctl * is used to disable these settings for testing. */ - if (soisthrottled(so) || IS_SO_TC_BACKGROUND(so->so_traffic_class)) { - if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0) { + if (soissrcbackground(so)) { + if (outifp == NULL || (outifp->if_flags & IFF_LOOPBACK)) + is_local = true; + else + is_local = false; + + /* Check if there has been recent foreground activity */ + if ((outifp != NULL && + outifp->if_fg_sendts > 0 && + (int)(uptime - outifp->if_fg_sendts) <= + TCP_BG_SWITCH_TIME) || + net_io_policy_throttled) + fg_active = true; + + /* + * If the interface that the connection is using is + * loopback, do not use background congestion + * control algorithm. + * + * If there has been recent foreground activity or if + * there was an indication that a foreground application + * is going to use networking (net_io_policy_throttled), + * switch the backgroung streams to use background + * congestion control algorithm. Otherwise, even background + * flows can move into foreground. + */ + if ((sotcdb & SOTCDB_NO_SENDTCPBG) != 0 || + is_local || !fg_active) { if (old_cc == TCP_CC_ALGO_BACKGROUND_INDEX) tcp_set_foreground_cc(so); } else { @@ -797,7 +840,8 @@ set_tcp_stream_priority(struct socket *so) } /* Set receive side background flags */ - if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0) + if ((sotcdb & SOTCDB_NO_RECVTCPBG) != 0 || + is_local || !fg_active) tcp_clear_recv_bg(so); else tcp_set_recv_bg(so); @@ -808,9 +852,9 @@ set_tcp_stream_priority(struct socket *so) } if (old_cc != tp->tcp_cc_index || recvbg != IS_TCP_RECV_BG(so)) { - SOTHROTTLELOG(("throttle[%d]: so %p [%d,%d] TCP %s send; " - "%s recv\n", so->last_pid, so, INP_SOCKAF(so), - INP_SOCKTYPE(so), + SOTHROTTLELOG(("throttle[%d]: so 0x%llx [%d,%d] TCP %s send; " + "%s recv\n", so->last_pid, (uint64_t)VM_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so), (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX) ? "background" : "foreground", IS_TCP_RECV_BG(so) ? "background" : "foreground")); @@ -859,6 +903,8 @@ set_packet_service_class(struct mbuf *m, struct socket *so, if (soisthrottled(so) && !IS_MBUF_SC_BACKGROUND(msc)) msc = MBUF_SC_BK; + if (soissrcbackground(so)) + m->m_pkthdr.pkt_flags |= PKTF_SO_BACKGROUND; /* * Set the traffic class in the mbuf packet header svc field */ @@ -876,13 +922,14 @@ set_packet_service_class(struct mbuf *m, struct socket *so, (void) m_set_service_class(m, msc); /* - * Set the privileged traffic auxiliary flag if applicable, or clear it. + * Set the privileged traffic auxiliary flag if applicable, + * or clear it. */ if (!(sotcdb & SOTCDB_NO_PRIVILEGED) && soisprivilegedtraffic(so) && msc != MBUF_SC_UNSPEC) - m->m_pkthdr.aux_flags |= MAUXF_PRIO_PRIVILEGED; + m->m_pkthdr.pkt_flags |= PKTF_PRIO_PRIVILEGED; else - m->m_pkthdr.aux_flags &= ~MAUXF_PRIO_PRIVILEGED; + m->m_pkthdr.pkt_flags &= ~PKTF_PRIO_PRIVILEGED; no_mbtc: /* @@ -987,6 +1034,8 @@ so_tc_update_stats(struct mbuf *m, struct socket *so, mbuf_svc_class_t msc) __private_extern__ void socket_tclass_init(void) { + _CASSERT(_SO_TC_MAX == SO_TC_STATS_MAX); + tclass_lck_grp_attr = lck_grp_attr_alloc_init(); tclass_lck_grp = lck_grp_alloc_init("tclass", tclass_lck_grp_attr); tclass_lck_attr = lck_attr_alloc_init(); @@ -1073,17 +1122,30 @@ so_svc2tc(mbuf_svc_class_t svc) } /* - * LRO is turned on for AV streaming and background classes. + * LRO is turned on for AV streaming class. */ -static void +void so_set_lro(struct socket *so, int optval) { - if ((optval == SO_TC_BK) || - (optval == SO_TC_BK_SYS) || - (optval == SO_TC_AV)) { + if (optval == SO_TC_AV) { so->so_flags |= SOF_USELRO; } else { - so->so_flags &= ~SOF_USELRO; + if (so->so_flags & SOF_USELRO) { + /* transition to non LRO class */ + so->so_flags &= ~SOF_USELRO; + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = NULL; + if (inp) { + tp = intotcpcb(inp); + if (tp && (tp->t_flagsext & TF_LRO_OFFLOADED)) { + tcp_lro_remove_state(inp->inp_laddr, + inp->inp_faddr, + inp->inp_lport, + inp->inp_fport); + tp->t_flagsext &= ~TF_LRO_OFFLOADED; + } + } + } } } diff --git a/bsd/netinet/in_var.h b/bsd/netinet/in_var.h index 74100b4a3..9baa7ac54 100644 --- a/bsd/netinet/in_var.h +++ b/bsd/netinet/in_var.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -58,19 +58,16 @@ * SUCH DAMAGE. * * @(#)in_var.h 8.2 (Berkeley) 1/9/95 - * $FreeBSD: src/sys/netinet/in_var.h,v 1.33.2.2 2001/07/17 10:50:01 ru Exp $ */ #ifndef _NETINET_IN_VAR_H_ -#define _NETINET_IN_VAR_H_ +#define _NETINET_IN_VAR_H_ #include #include -#ifdef __APPLE__ #include -#endif -#ifdef XNU_KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #include /* @@ -80,84 +77,80 @@ * of the structure and is assumed to be first. */ struct in_ifaddr { - struct ifaddr ia_ifa; /* protocol-independent info */ -#define ia_ifp ia_ifa.ifa_ifp -#define ia_flags ia_ifa.ifa_flags - /* ia_{,sub}net{,mask} in host order */ - u_int32_t ia_net; /* network number of interface */ - u_int32_t ia_netmask; /* mask of net part */ - u_int32_t ia_subnet; /* subnet number, including net */ - u_int32_t ia_subnetmask; /* mask of subnet part */ - struct in_addr ia_netbroadcast; /* to recognize net broadcasts */ - TAILQ_ENTRY(in_ifaddr) ia_link; /* tailq macro glue */ - struct sockaddr_in ia_addr; /* reserve space for interface name */ - struct sockaddr_in ia_dstaddr; /* reserve space for broadcast addr */ -#define ia_broadaddr ia_dstaddr - struct sockaddr_in ia_sockmask; /* reserve space for general netmask */ - TAILQ_ENTRY(in_ifaddr) ia_hash; /* hash bucket entry */ + struct ifaddr ia_ifa; /* protocol-independent info */ +#define ia_ifp ia_ifa.ifa_ifp +#define ia_flags ia_ifa.ifa_flags + /* ia_{,sub}net{,mask} in host order */ + u_int32_t ia_net; /* network number of interface */ + u_int32_t ia_netmask; /* mask of net part */ + u_int32_t ia_subnet; /* subnet number, including net */ + u_int32_t ia_subnetmask; /* mask of subnet part */ + struct in_addr ia_netbroadcast; /* to recognize net broadcasts */ + TAILQ_ENTRY(in_ifaddr) ia_link; /* tailq macro glue */ + struct sockaddr_in ia_addr; /* reserve space for interface name */ + struct sockaddr_in ia_dstaddr; /* reserve space for broadcast addr */ +#define ia_broadaddr ia_dstaddr + struct sockaddr_in ia_sockmask; /* reserve space for general netmask */ + TAILQ_ENTRY(in_ifaddr) ia_hash; /* hash bucket entry */ }; #define ifatoia(ifa) ((struct in_ifaddr *)(void *)(ifa)) -#endif /* XNU_KERNEL_PRIVATE */ - -struct in_aliasreq { - char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */ - struct sockaddr_in ifra_addr; - struct sockaddr_in ifra_broadaddr; -#define ifra_dstaddr ifra_broadaddr - struct sockaddr_in ifra_mask; - u_int32_t ifra_unused; /* not used: used to be 'dlt' */ +#endif /* BSD_KERNEL_PRIVATE */ + +struct in_aliasreq { + char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */ + struct sockaddr_in ifra_addr; + struct sockaddr_in ifra_broadaddr; +#define ifra_dstaddr ifra_broadaddr + struct sockaddr_in ifra_mask; }; /* - * Event data, internet style. + * Event data, inet style. */ struct kev_in_data { - struct net_event_data link_data; - struct in_addr ia_addr; - u_int32_t ia_net; /* network number of interface */ - u_int32_t ia_netmask; /* mask of net part */ - u_int32_t ia_subnet; /* subnet number, including net */ - u_int32_t ia_subnetmask; /* mask of subnet part */ - struct in_addr ia_netbroadcast;/* to recognize net broadcasts */ - struct in_addr ia_dstaddr; + struct net_event_data link_data; + struct in_addr ia_addr; /* interface address */ + u_int32_t ia_net; /* network number of interface */ + u_int32_t ia_netmask; /* mask of net part */ + u_int32_t ia_subnet; /* subnet number, including net */ + u_int32_t ia_subnetmask; /* mask of subnet part */ + struct in_addr ia_netbroadcast; /* to recognize net broadcasts */ + struct in_addr ia_dstaddr; }; struct kev_in_collision { - struct net_event_data link_data; /* link colliding arp was received on */ - struct in_addr ia_ipaddr; /* IP address we and another node are using */ - u_char hw_len; /* length of hardware address */ - u_char hw_addr[0]; /* variable length hardware address */ + struct net_event_data link_data; /* link where ARP was received on */ + struct in_addr ia_ipaddr; /* conflicting IP address */ + u_char hw_len; /* length of hardware address */ + u_char hw_addr[0]; /* variable length hardware address */ }; #ifdef __APPLE_API_PRIVATE struct kev_in_portinuse { - u_int16_t port; /* conflicting port number in host order */ - u_int32_t req_pid; /* PID port requestor */ - u_int32_t reserved[2]; + u_int16_t port; /* conflicting port number in host order */ + u_int32_t req_pid; /* PID port requestor */ + u_int32_t reserved[2]; }; -#endif - +#endif /* __APPLE_API_PRIVATE */ /* * Define inet event subclass and specific inet events. */ - -#define KEV_INET_SUBCLASS 1 - -#define KEV_INET_NEW_ADDR 1 -#define KEV_INET_CHANGED_ADDR 2 -#define KEV_INET_ADDR_DELETED 3 -#define KEV_INET_SIFDSTADDR 4 -#define KEV_INET_SIFBRDADDR 5 -#define KEV_INET_SIFNETMASK 6 -#define KEV_INET_ARPCOLLISION 7 /* use kev_in_collision */ - +#define KEV_INET_SUBCLASS 1 /* inet subclass identifier */ + +#define KEV_INET_NEW_ADDR 1 /* Userland configured IP address */ +#define KEV_INET_CHANGED_ADDR 2 /* Address changed event */ +#define KEV_INET_ADDR_DELETED 3 /* IPv6 address was deleted */ +#define KEV_INET_SIFDSTADDR 4 /* Dest. address was set */ +#define KEV_INET_SIFBRDADDR 5 /* Broadcast address was set */ +#define KEV_INET_SIFNETMASK 6 /* Netmask was set */ +#define KEV_INET_ARPCOLLISION 7 /* ARP collision detected */ #ifdef __APPLE_API_PRIVATE -#define KEV_INET_PORTINUSE 8 /* use ken_in_portinuse */ +#define KEV_INET_PORTINUSE 8 /* use ken_in_portinuse */ #endif -#ifdef XNU_KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #include #include #include @@ -165,33 +158,28 @@ struct kev_in_portinuse { * Given a pointer to an in_ifaddr (ifaddr), * return a pointer to the addr as a sockaddr_in. */ -#define IA_SIN(ia) (&(((struct in_ifaddr *)(ia))->ia_addr)) -#define IA_DSTSIN(ia) (&(((struct in_ifaddr *)(ia))->ia_dstaddr)) +#define IA_SIN(ia) (&(((struct in_ifaddr *)(ia))->ia_addr)) +#define IA_DSTSIN(ia) (&(((struct in_ifaddr *)(ia))->ia_dstaddr)) -#define IN_LNAOF(in, ifa) \ +#define IN_LNAOF(in, ifa) \ ((ntohl((in).s_addr) & ~((struct in_ifaddr *)(ifa)->ia_subnetmask)) /* * Hash table for IPv4 addresses. */ -__private_extern__ TAILQ_HEAD(in_ifaddrhead, in_ifaddr) in_ifaddrhead; -__private_extern__ TAILQ_HEAD(in_ifaddrhashhead, in_ifaddr) *in_ifaddrhashtbl; -__private_extern__ lck_rw_t *in_ifaddr_rwlock; +extern TAILQ_HEAD(in_ifaddrhead, in_ifaddr) in_ifaddrhead; +extern TAILQ_HEAD(in_ifaddrhashhead, in_ifaddr) *in_ifaddrhashtbl; +extern lck_rw_t *in_ifaddr_rwlock; #define INADDR_HASH(x) (&in_ifaddrhashtbl[inaddr_hashval(x)]) -extern struct ifqueue ipintrq; /* ip packet input queue */ -extern struct in_addr zeroin_addr; extern u_char inetctlerrmap[]; -extern int apple_hwcksum_tx; -extern int apple_hwcksum_rx; - /* * Macro for finding the interface (ifnet structure) corresponding to one * of our IP addresses. */ -#define INADDR_TO_IFP(addr, ifp) \ +#define INADDR_TO_IFP(addr, ifp) \ /* struct in_addr addr; */ \ /* struct ifnet *ifp; */ \ { \ @@ -215,7 +203,7 @@ extern int apple_hwcksum_rx; * to a given interface (ifnet structure). Caller is responsible for freeing * the reference. */ -#define IFP_TO_IA(ifp, ia) \ +#define IFP_TO_IA(ifp, ia) \ /* struct ifnet *ifp; */ \ /* struct in_ifaddr *ia; */ \ { \ @@ -299,10 +287,11 @@ struct igmp_ifinfo; * for the group the state change record is generated and transmitted, * and kept if retransmissions are necessary. * + * The request count here is a count of requests for this address, not a + * count of pointers to this structure. + * * FUTURE: inm_link is now only used when groups are being purged - * on a detaching ifnet. It could be demoted to a SLIST_ENTRY, but - * because it is at the very start of the struct, we can't do this - * w/o breaking the ABI for ifmcstat. + * on a detaching ifnet. It could be demoted to a SLIST_ENTRY. */ struct in_multi { decl_lck_mtx_data(, inm_lock); @@ -411,7 +400,7 @@ struct in_multistep { * * Must be called with in_multihead_lock held. */ -#define IN_LOOKUP_MULTI(addr, ifp, inm) \ +#define IN_LOOKUP_MULTI(addr, ifp, inm) \ /* struct in_addr *addr; */ \ /* struct ifnet *ifp; */ \ /* struct in_multi *inm; */ \ @@ -440,7 +429,7 @@ do { \ * * Must be called with in_multihead_lock held. */ -#define IN_NEXT_MULTI(step, inm) \ +#define IN_NEXT_MULTI(step, inm) \ /* struct in_multistep step; */ \ /* struct in_multi *inm; */ \ do { \ @@ -449,7 +438,7 @@ do { \ (step).i_inm = LIST_NEXT((step).i_inm, inm_link); \ } while (0) -#define IN_FIRST_MULTI(step, inm) \ +#define IN_FIRST_MULTI(step, inm) \ /* struct in_multistep step; */ \ /* struct in_multi *inm; */ \ do { \ @@ -458,17 +447,21 @@ do { \ IN_NEXT_MULTI((step), (inm)); \ } while (0) -struct route; -struct ip_moptions; +extern lck_mtx_t *inet_domain_mutex; +extern struct domain *inetdomain; + +struct ip_moptions; struct inpcb; /* * Return values for imo_multi_filter(). */ -#define MCAST_PASS 0 /* Pass */ -#define MCAST_NOTGMEMBER 1 /* This host not a member of group */ -#define MCAST_NOTSMEMBER 2 /* This host excluded source */ -#define MCAST_MUTED 3 /* [deprecated] */ +#define MCAST_PASS 0 /* Pass */ +#define MCAST_NOTGMEMBER 1 /* This host not a member of group */ +#define MCAST_NOTSMEMBER 2 /* This host excluded source */ +#define MCAST_MUTED 3 /* [deprecated] */ + +extern u_int32_t ipv4_ll_arp_aware; extern void in_ifaddr_init(void); extern int imo_multi_filter(const struct ip_moptions *, const struct ifnet *, @@ -482,7 +475,7 @@ extern void inm_release(struct in_multi *); extern void in_multi_init(void); extern struct in_multi *in_addmulti(struct in_addr *, struct ifnet *); extern void in_delmulti(struct in_multi *); -extern int in_leavegroup(struct in_multi *, /*const*/ struct in_mfilter *); +extern int in_leavegroup(struct in_multi *, struct in_mfilter *); extern int in_multi_detach(struct in_multi *); extern void inm_addref(struct in_multi *, int); extern void inm_remref(struct in_multi *, int); @@ -491,6 +484,7 @@ extern uint8_t ims_get_mode(const struct in_multi *, const struct ip_msource *, uint8_t); extern int in_control(struct socket *, u_long, caddr_t, struct ifnet *, struct proc *); +extern int in_inithead(void **, int); extern void in_rtqdrain(void); extern struct radix_node *in_validate(struct radix_node *); extern void ip_input(struct mbuf *); @@ -498,18 +492,19 @@ extern int in_ifadown(struct ifaddr *ifa, int); extern void in_ifscrub(struct ifnet *, struct in_ifaddr *, int); extern u_int32_t inaddr_hashval(u_int32_t); extern void in_purgeaddrs(struct ifnet *); -extern void imf_leave(struct in_mfilter *); -extern void imf_purge(struct in_mfilter *); - -__private_extern__ int inp_join_group(struct inpcb *, struct sockopt *); -__private_extern__ int inp_leave_group(struct inpcb *, struct sockopt *); -__private_extern__ void in_multihead_lock_exclusive(void); -__private_extern__ void in_multihead_lock_shared(void); -__private_extern__ void in_multihead_lock_assert(int); -__private_extern__ void in_multihead_lock_done(void); -#endif /* XNU_KERNEL_PRIVATE */ - +extern int in_selectaddrs(int af, struct sockaddr_list **, + struct sockaddr_entry **, struct sockaddr_list **, + struct sockaddr_entry **); +extern void gre_input(struct mbuf *, int); +extern void imf_leave(struct in_mfilter *); +extern void imf_purge(struct in_mfilter *); +extern int inp_join_group(struct inpcb *, struct sockopt *); +extern int inp_leave_group(struct inpcb *, struct sockopt *); +extern void in_multihead_lock_exclusive(void); +extern void in_multihead_lock_shared(void); +extern void in_multihead_lock_assert(int); +extern void in_multihead_lock_done(void); +#endif /* BSD_KERNEL_PRIVATE */ /* INET6 stuff */ #include - #endif /* _NETINET_IN_VAR_H_ */ diff --git a/bsd/netinet/ip.h b/bsd/netinet/ip.h index eea0a00f9..0f68a7080 100644 --- a/bsd/netinet/ip.h +++ b/bsd/netinet/ip.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -234,7 +234,7 @@ struct ip_timestamp { */ #define MAXTTL 255 /* maximum time to live (seconds) */ #define IPDEFTTL 64 /* default ttl, from RFC 1340 */ -#define IPFRAGTTL 60 /* time to live for frags, slowhz */ +#define IPFRAGTTL 30 /* time to live for frags (seconds) */ #define IPTTLDEC 1 /* subtracted when forwarding */ #define IP_MSS 576 /* default maximum segment size */ diff --git a/bsd/netinet/ip6.h b/bsd/netinet/ip6.h index a740ddc49..3982daf0d 100644 --- a/bsd/netinet/ip6.h +++ b/bsd/netinet/ip6.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -284,14 +284,14 @@ struct ip6_frag { */ #define IPV6_MAXHLIM 255 /* maximum hoplimit */ #define IPV6_DEFHLIM 64 /* default hlim */ -#define IPV6_FRAGTTL 120 /* ttl for fragment packets, in slowtimo tick */ +#define IPV6_FRAGTTL 60 /* ttl for fragment packets (seconds) */ #define IPV6_HLIMDEC 1 /* subtracted when forwarding */ #define IPV6_MMTU 1280 /* minimal MTU and reassembly. 1024 + 256 */ #define IPV6_MAXPACKET 65535 /* ip6 max packet size without Jumbo payload*/ #define IPV6_MAXOPTHDR 2048 /* max option header size, 256 64-bit words */ -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE /* * IP6_EXTHDR_CHECK ensures that region between the IP6 header and the * target header (including IPv6 itself, extension headers and @@ -313,12 +313,14 @@ do { \ if ((m)->m_len < (off) + (hlen)) { \ ip6stat.ip6s_exthdrtoolong++; \ m_freem(m); \ + (m) = NULL; \ action; \ } \ } else { \ if ((m)->m_len < (off) + (hlen)) { \ ip6stat.ip6s_exthdrtoolong++; \ m_freem(m); \ + (m) = NULL; \ action; \ } \ } \ @@ -327,6 +329,7 @@ do { \ ip6stat.ip6s_tooshort++; \ in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated); \ m_freem(m); \ + (m) = NULL; \ action; \ } \ } \ @@ -346,5 +349,5 @@ do { \ #define IP6_EXTHDR_GET0(val, typ, m, off, len) \ M_STRUCT_GET0(val, typ, m, off, len) -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* !_NETINET_IP6_H_ */ diff --git a/bsd/netinet/ip_divert.c b/bsd/netinet/ip_divert.c index 1af24caec..16692077e 100644 --- a/bsd/netinet/ip_divert.c +++ b/bsd/netinet/ip_divert.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -140,42 +140,56 @@ static struct sockaddr_in divsrc = { sizeof(divsrc), AF_INET, 0, { 0 }, { 0,0,0, /* Internal functions */ static int div_output(struct socket *so, - struct mbuf *m, struct sockaddr *addr, struct mbuf *control); + struct mbuf *m, struct sockaddr_in *addr, struct mbuf *control); extern int load_ipfw(void); /* * Initialize divert connection block queue. */ void -div_init(void) +div_init(struct protosw *pp, struct domain *dp) { +#pragma unused(dp) + static int div_initialized = 0; struct inpcbinfo *pcbinfo; + + VERIFY((pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED); + + if (div_initialized) + return; + div_initialized = 1; + LIST_INIT(&divcb); - divcbinfo.listhead = &divcb; + divcbinfo.ipi_listhead = &divcb; /* * XXX We don't use the hash list for divert IP, but it's easier * to allocate a one entry hash list than it is to check all - * over the place for hashbase == NULL. + * over the place for ipi_hashbase == NULL. */ - divcbinfo.hashbase = hashinit(1, M_PCB, &divcbinfo.hashmask); - divcbinfo.porthashbase = hashinit(1, M_PCB, &divcbinfo.porthashmask); - divcbinfo.ipi_zone = (void *) zinit(sizeof(struct inpcb),(maxsockets * sizeof(struct inpcb)), + divcbinfo.ipi_hashbase = hashinit(1, M_PCB, &divcbinfo.ipi_hashmask); + divcbinfo.ipi_porthashbase = hashinit(1, M_PCB, &divcbinfo.ipi_porthashmask); + divcbinfo.ipi_zone = zinit(sizeof(struct inpcb),(maxsockets * sizeof(struct inpcb)), 4096, "divzone"); pcbinfo = &divcbinfo; /* * allocate lock group attribute and group for udp pcb mutexes */ - pcbinfo->mtx_grp_attr = lck_grp_attr_alloc_init(); + pcbinfo->ipi_lock_grp_attr = lck_grp_attr_alloc_init(); + + pcbinfo->ipi_lock_grp = lck_grp_alloc_init("divcb", pcbinfo->ipi_lock_grp_attr); - pcbinfo->mtx_grp = lck_grp_alloc_init("divcb", pcbinfo->mtx_grp_attr); - /* * allocate the lock attribute for divert pcb mutexes */ - pcbinfo->mtx_attr = lck_attr_alloc_init(); + pcbinfo->ipi_lock_attr = lck_attr_alloc_init(); - if ((pcbinfo->mtx = lck_rw_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr)) == NULL) - return; /* pretty much dead if this fails... */ + if ((pcbinfo->ipi_lock = lck_rw_alloc_init(pcbinfo->ipi_lock_grp, + pcbinfo->ipi_lock_attr)) == NULL) { + panic("%s: unable to allocate PCB lock\n", __func__); + /* NOTREACHED */ + } + + in_pcbinfo_attach(&divcbinfo); #if IPFIREWALL if (!IPFW_LOADED) { @@ -200,7 +214,6 @@ div_input(struct mbuf *m, __unused int off) * * Setup generic address and protocol structures for div_input routine, * then pass them along with mbuf chain. - * ###LOCK called in ip_mutex from ip_output/ip_input */ void divert_packet(struct mbuf *m, int incoming, int port, int rule) @@ -272,14 +285,13 @@ divert_packet(struct mbuf *m, int incoming, int port, int rule) * (see div_output for the other half of this.) */ snprintf(divsrc.sin_zero, sizeof(divsrc.sin_zero), - "%s%d", m->m_pkthdr.rcvif->if_name, - m->m_pkthdr.rcvif->if_unit); + "%s", if_name(m->m_pkthdr.rcvif)); } /* Put packet on socket queue, if any */ sa = NULL; nport = htons((u_int16_t)port); - lck_rw_lock_shared(divcbinfo.mtx); + lck_rw_lock_shared(divcbinfo.ipi_lock); LIST_FOREACH(inp, &divcb, inp_list) { if (inp->inp_lport == nport) sa = inp->inp_socket; @@ -297,7 +309,7 @@ divert_packet(struct mbuf *m, int incoming, int port, int rule) OSAddAtomic(1, &ipstat.ips_noproto); OSAddAtomic(-1, &ipstat.ips_delivered); } - lck_rw_done(divcbinfo.mtx); + lck_rw_done(divcbinfo.ipi_lock); } /* @@ -309,12 +321,11 @@ divert_packet(struct mbuf *m, int incoming, int port, int rule) * ###LOCK called in inet_proto mutex when from div_send. */ static int -div_output(struct socket *so, struct mbuf *m, struct sockaddr *addr, +div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin, struct mbuf *control) { struct inpcb *const inp = sotoinpcb(so); struct ip *const ip = mtod(m, struct ip *); - struct sockaddr_in *sin = (struct sockaddr_in *)(void *)addr; int error = 0; mbuf_svc_class_t msc = MBUF_SC_UNSPEC; @@ -322,6 +333,7 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr *addr, msc = mbuf_service_class_from_control(control); m_freem(control); /* XXX */ + control = NULL; } /* Loopback avoidance and state recovery */ if (sin) { @@ -358,7 +370,7 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr *addr, /* Reinject packet into the system as incoming or outgoing */ if (!sin || sin->sin_addr.s_addr == 0) { struct ip_out_args ipoa = - { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF }; + { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF, 0 }; struct route ro; struct ip_moptions *imo; @@ -407,15 +419,16 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr *addr, /* If no luck with the name above. check by IP address. */ if (m->m_pkthdr.rcvif == NULL) { + struct sockaddr_in _sin; /* - * Make sure there are no distractions - * for ifa_ifwithaddr. Clear the port and the ifname. - * Maybe zap all 8 bytes at once using a 64bit write? + * Make sure there are no distractions for + * ifa_ifwithaddr; use sanitized version. */ - bzero(sin->sin_zero, sizeof(sin->sin_zero)); - /* *((u_int64_t *)sin->sin_zero) = 0; */ /* XXX ?? */ - sin->sin_port = 0; - if (!(ifa = ifa_ifwithaddr((struct sockaddr *) sin))) { + bzero(&_sin, sizeof (_sin)); + _sin.sin_family = AF_INET; + _sin.sin_len = sizeof (struct sockaddr_in); + _sin.sin_addr.s_addr = sin->sin_addr.s_addr; + if (!(ifa = ifa_ifwithaddr(SA(&_sin)))) { error = EADDRNOTAVAIL; goto cantsend; } @@ -464,8 +477,11 @@ div_attach(struct socket *so, int proto, struct proc *p) so->so_state |= SS_ISCONNECTED; #ifdef MORE_DICVLOCK_DEBUG - printf("div_attach: so=%p sopcb=%p lock=%x ref=%x\n", - so, so->so_pcb, &(((struct inpcb *)so->so_pcb)->inpcb_mtx), so->so_usecount); + printf("div_attach: so=0x%llx sopcb=0x%llx lock=0x%llx ref=%x\n", + (uint64_t)VM_KERNEL_ADDRPERM(so), + (uint64_t)VM_KERNEL_ADDRPERM(so->so_pcb), + (uint64_t)VM_KERNEL_ADDRPERM(&(sotoinpcb(so)->inpcb_mtx)), + so->so_usecount); #endif return 0; } @@ -476,8 +492,11 @@ div_detach(struct socket *so) struct inpcb *inp; #ifdef MORE_DICVLOCK_DEBUG - printf("div_detach: so=%p sopcb=%p lock=%x ref=%x\n", - so, so->so_pcb, &(((struct inpcb *)so->so_pcb)->inpcb_mtx), so->so_usecount); + printf("div_detach: so=0x%llx sopcb=0x%llx lock=0x%llx ref=%x\n", + (uint64_t)VM_KERNEL_ADDRPERM(so), + (uint64_t)VM_KERNEL_ADDRPERM(so->so_pcb), + (uint64_t)VM_KERNEL_ADDRPERM(&(sotoinpcb(so)->inpcb_mtx)), + so->so_usecount); #endif inp = sotoinpcb(so); if (inp == 0) @@ -545,7 +564,7 @@ div_send(struct socket *so, __unused int flags, struct mbuf *m, struct sockaddr } /* Send packet */ - return div_output(so, m, nam, control); + return div_output(so, m, SIN(nam), control); } #if 0 @@ -562,17 +581,17 @@ div_pcblist SYSCTL_HANDLER_ARGS * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ - lck_rw_lock_exclusive(divcbinfo.mtx); + lck_rw_lock_exclusive(divcbinfo.ipi_lock); if (req->oldptr == USER_ADDR_NULL) { n = divcbinfo.ipi_count; req->oldidx = 2 * (sizeof xig) + (n + n/8) * sizeof(struct xinpcb); - lck_rw_done(divcbinfo.mtx); + lck_rw_done(divcbinfo.ipi_lock); return 0; } if (req->newptr != USER_ADDR_NULL) { - lck_rw_done(divcbinfo.mtx); + lck_rw_done(divcbinfo.ipi_lock); return EPERM; } @@ -589,17 +608,17 @@ div_pcblist SYSCTL_HANDLER_ARGS xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) { - lck_rw_done(divcbinfo.mtx); + lck_rw_done(divcbinfo.ipi_lock); return error; } inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK); if (inp_list == 0) { - lck_rw_done(divcbinfo.mtx); + lck_rw_done(divcbinfo.ipi_lock); return ENOMEM; } - for (inp = LIST_FIRST(divcbinfo.listhead), i = 0; inp && i < n; + for (inp = LIST_FIRST(divcbinfo.ipi_listhead), i = 0; inp && i < n; inp = LIST_NEXT(inp, inp_list)) { #ifdef __APPLE__ if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) @@ -641,7 +660,7 @@ div_pcblist SYSCTL_HANDLER_ARGS error = SYSCTL_OUT(req, &xig, sizeof xig); } FREE(inp_list, M_TEMP); - lck_rw_done(divcbinfo.mtx); + lck_rw_done(divcbinfo.ipi_lock); return error; } #endif @@ -657,10 +676,11 @@ div_lock(struct socket *so, int refcount, void *lr) lr_saved = lr; #ifdef MORE_DICVLOCK_DEBUG - printf("div_lock: so=%p sopcb=%p lock=%p ref=%x lr=%p\n", - so, so->so_pcb, so->so_pcb ? - &(((struct inpcb *)so->so_pcb)->inpcb_mtx) : NULL, - so->so_usecount, lr_saved); + printf("div_lock: so=0x%llx sopcb=0x%llx lock=0x%llx ref=%x " + "lr=0x%llx\n", (uint64_t)VM_KERNEL_ADDRPERM(so), + (uint64_t)VM_KERNEL_ADDRPERM(so->so_pcb), so->so_pcb ? + (uint64_t)VM_KERNEL_ADDRPERM(&(sotoinpcb(so)->inpcb_mtx)) : NULL, + so->so_usecount, (uint64_t)VM_KERNEL_ADDRPERM(lr_saved)); #endif if (so->so_pcb) { lck_mtx_lock(&((struct inpcb *)so->so_pcb)->inpcb_mtx); @@ -698,9 +718,10 @@ div_unlock(struct socket *so, int refcount, void *lr) lr_saved = lr; #ifdef MORE_DICVLOCK_DEBUG - printf("div_unlock: so=%p sopcb=%p lock=%p ref=%x lr=%p\n", - so, so->so_pcb, so->so_pcb ? - &(((struct inpcb *)so->so_pcb)->inpcb_mtx) : NULL, + printf("div_unlock: so=0x%llx sopcb=0x%llx lock=0x%llx ref=%x " + "lr=0x%llx\n", (uint64_t)VM_KERNEL_ADDRPERM(so), + (uint64_t)VM_KERNEL_ADDRPERM(so->so_pcb), so->so_pcb ? + (uint64_t)VM_KERNEL_ADDRPERM(&(sotoinpcb(so)->inpcb_mtx)) : NULL, so->so_usecount, lr_saved); #endif if (refcount) @@ -719,11 +740,11 @@ div_unlock(struct socket *so, int refcount, void *lr) mutex_held = &((struct inpcb *)so->so_pcb)->inpcb_mtx; if (so->so_usecount == 0 && (inp->inp_wantcnt == WNT_STOPUSING)) { - lck_rw_lock_exclusive(divcbinfo.mtx); + lck_rw_lock_exclusive(divcbinfo.ipi_lock); if (inp->inp_state != INPCB_STATE_DEAD) in_pcbdetach(inp); in_pcbdispose(inp); - lck_rw_done(divcbinfo.mtx); + lck_rw_done(divcbinfo.ipi_lock); return (0); } lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); @@ -750,12 +771,18 @@ div_getlock(struct socket *so, __unused int locktype) } } - struct pr_usrreqs div_usrreqs = { - div_abort, pru_accept_notsupp, div_attach, div_bind, - pru_connect_notsupp, pru_connect2_notsupp, in_control, div_detach, - div_disconnect, pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp, - pru_rcvoob_notsupp, div_send, pru_sense_null, div_shutdown, - in_setsockaddr, sosend, soreceive, pru_sopoll_notsupp + .pru_abort = div_abort, + .pru_attach = div_attach, + .pru_bind = div_bind, + .pru_control = in_control, + .pru_detach = div_detach, + .pru_disconnect = div_disconnect, + .pru_peeraddr = in_getpeeraddr, + .pru_send = div_send, + .pru_shutdown = div_shutdown, + .pru_sockaddr = in_getsockaddr, + .pru_sosend = sosend, + .pru_soreceive = soreceive, }; diff --git a/bsd/netinet/ip_divert.h b/bsd/netinet/ip_divert.h index 80fe08e93..e4a89ef6f 100644 --- a/bsd/netinet/ip_divert.h +++ b/bsd/netinet/ip_divert.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Apple Inc. All rights reserved. + * Copyright (c) 2008-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -64,6 +64,7 @@ #define _NETINET_IP_DIVERT_H_ #if IPDIVERT +#ifdef BSD_KERNEL_PRIVATE /* * Divert socket definitions. */ @@ -107,7 +108,7 @@ divert_find_info(struct mbuf *m) return mtag ? divert_info(mtag) : 0; } -extern void div_init(void) __attribute__((section("__TEXT, initcode"))); +extern void div_init(struct protosw *, struct domain *); extern void div_input(struct mbuf *, int); lck_mtx_t * div_getlock(struct socket *, int ); @@ -116,5 +117,6 @@ int div_lock(struct socket *, int , void *); extern void divert_packet(struct mbuf *m, int incoming, int port, int rule); extern struct pr_usrreqs div_usrreqs; +#endif /* BSD_KERNEL_PRIVATE */ #endif /* IPDIVERT */ #endif /* _NETINET_IP_DIVERT_H_ */ diff --git a/bsd/netinet/ip_dummynet.c b/bsd/netinet/ip_dummynet.c index 5ebcb2a51..be38174c0 100644 --- a/bsd/netinet/ip_dummynet.c +++ b/bsd/netinet/ip_dummynet.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -623,7 +623,8 @@ heap_extract(struct dn_heap *h, void *obj) int child, father, maxelt = h->elements - 1 ; if (maxelt < 0) { - printf("dummynet: warning, extract from empty heap 0x%p\n", h); + printf("dummynet: warning, extract from empty heap 0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(h)); return ; } father = 0 ; /* default: move up smallest child */ @@ -698,7 +699,8 @@ dn_tag_get(struct mbuf *m) if (!(mtag != NULL && mtag->m_tag_id == KERNEL_MODULE_TAG_ID && mtag->m_tag_type == KERNEL_TAG_TYPE_DUMMYNET)) - panic("packet on dummynet queue w/o dummynet tag: %p", m); + panic("packet on dummynet queue w/o dummynet tag: 0x%llx", + (uint64_t)VM_KERNEL_ADDRPERM(m)); return (struct dn_pkt_tag *)(mtag+1); } @@ -1104,20 +1106,22 @@ dummynet_send(struct mbuf *m) n = m->m_nextpkt; m->m_nextpkt = NULL; pkt = dn_tag_get(m); - - DPRINTF(("dummynet_send m: %p dn_dir: %d dn_flags: 0x%x\n", - m, pkt->dn_dir, pkt->dn_flags)); - + + DPRINTF(("dummynet_send m: 0x%llx dn_dir: %d dn_flags: 0x%x\n", + (uint64_t)VM_KERNEL_ADDRPERM(m), pkt->dn_dir, + pkt->dn_flags)); + switch (pkt->dn_dir) { case DN_TO_IP_OUT: { - struct route tmp_rt = pkt->dn_ro; + struct route tmp_rt; + + /* route is already in the packet's dn_ro */ + bzero(&tmp_rt, sizeof (tmp_rt)); + /* Force IP_RAWOUTPUT as the IP header is fully formed */ pkt->dn_flags |= IP_RAWOUTPUT | IP_FORWARDING; (void)ip_output(m, NULL, &tmp_rt, pkt->dn_flags, NULL, NULL); - if (tmp_rt.ro_rt) { - rtfree(tmp_rt.ro_rt); - tmp_rt.ro_rt = NULL; - } + ROUTE_RELEASE(&tmp_rt); break ; } case DN_TO_IP_IN : @@ -1125,14 +1129,8 @@ dummynet_send(struct mbuf *m) break ; #ifdef INET6 case DN_TO_IP6_OUT: { - struct route_in6 ro6; - - ro6 = pkt->dn_ro6; - - ip6_output(m, NULL, &ro6, IPV6_FORWARDING, NULL, NULL, NULL); - - if (ro6.ro_rt) - rtfree(ro6.ro_rt); + /* routes already in the packet's dn_{ro6,pmtu} */ + ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL); break; } case DN_TO_IP6_IN: @@ -1167,7 +1165,7 @@ if_tx_rdy(struct ifnet *ifp) break ; if (p == NULL) { char buf[32]; - snprintf(buf, sizeof(buf), "%s%d",ifp->if_name, ifp->if_unit); + snprintf(buf, sizeof(buf), "%s", if_name(ifp)); for (i = 0; i < HASHSIZE; i++) SLIST_FOREACH(p, &pipehash[i], next) if (!strcmp(p->if_name, buf) ) { @@ -1177,8 +1175,8 @@ if_tx_rdy(struct ifnet *ifp) } } if (p != NULL) { - DPRINTF(("dummynet: ++ tx rdy from %s%d - qlen %d\n", ifp->if_name, - ifp->if_unit, IFCQ_LEN(&ifp->if_snd))); + DPRINTF(("dummynet: ++ tx rdy from %s - qlen %d\n", if_name(ifp), + IFCQ_LEN(&ifp->if_snd))); p->numbytes = 0 ; /* mark ready for I/O */ ready_event_wfq(p, &head, &tail); } @@ -1532,12 +1530,12 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa, int cl struct dn_pipe *pipe ; u_int64_t len = m->m_pkthdr.len ; struct dn_flow_queue *q = NULL ; - int is_pipe; + int is_pipe = 0; struct timespec ts; struct timeval tv; - - DPRINTF(("dummynet_io m: %p pipe: %d dir: %d client: %d\n", - m, pipe_nr, dir, client)); + + DPRINTF(("dummynet_io m: 0x%llx pipe: %d dir: %d client: %d\n", + (uint64_t)VM_KERNEL_ADDRPERM(m), pipe_nr, dir, client)); #if IPFIREWALL #if IPFW2 @@ -1646,9 +1644,7 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa, int cl * a pointer into *ro so it needs to be updated. */ if (fwa->fwa_ro) { - pkt->dn_ro = *(fwa->fwa_ro); - if (fwa->fwa_ro->ro_rt) - RT_ADDREF(fwa->fwa_ro->ro_rt); + route_copyout(&pkt->dn_ro, fwa->fwa_ro, sizeof (pkt->dn_ro)); } if (fwa->fwa_dst) { if (fwa->fwa_dst == (struct sockaddr_in *)&fwa->fwa_ro->ro_dst) /* dst points into ro */ @@ -1658,14 +1654,12 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa, int cl } } else if (dir == DN_TO_IP6_OUT) { if (fwa->fwa_ro6) { - pkt->dn_ro6 = *(fwa->fwa_ro6); - if (fwa->fwa_ro6->ro_rt) - RT_ADDREF(fwa->fwa_ro6->ro_rt); + route_copyout((struct route *)&pkt->dn_ro6, + (struct route *)fwa->fwa_ro6, sizeof (pkt->dn_ro6)); } if (fwa->fwa_ro6_pmtu) { - pkt->dn_ro6_pmtu = *(fwa->fwa_ro6_pmtu); - if (fwa->fwa_ro6_pmtu->ro_rt) - RT_ADDREF(fwa->fwa_ro6_pmtu->ro_rt); + route_copyout((struct route *)&pkt->dn_ro6_pmtu, + (struct route *)fwa->fwa_ro6_pmtu, sizeof (pkt->dn_ro6_pmtu)); } if (fwa->fwa_dst6) { if (fwa->fwa_dst6 == (struct sockaddr_in6 *)&fwa->fwa_ro6->ro_dst) /* dst points into ro */ @@ -1796,17 +1790,14 @@ dropit: } /* - * Below, the rtfree is only needed when (pkt->dn_dir == DN_TO_IP_OUT) + * Below, the ROUTE_RELEASE is only needed when (pkt->dn_dir == DN_TO_IP_OUT) * Doing this would probably save us the initial bzero of dn_pkt */ #define DN_FREE_PKT(_m) do { \ struct m_tag *tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, NULL); \ if (tag) { \ struct dn_pkt_tag *n = (struct dn_pkt_tag *)(tag+1); \ - if (n->dn_ro.ro_rt != NULL) { \ - rtfree(n->dn_ro.ro_rt); \ - n->dn_ro.ro_rt = NULL; \ - } \ + ROUTE_RELEASE(&n->dn_ro); \ } \ m_tag_delete(_m, tag); \ m_freem(_m); \ @@ -2365,8 +2356,10 @@ char* dn_copy_set_32(struct dn_flow_set *set, char *bp) printf("dummynet: ++ at %d: wrong slot (have %d, " "should be %d)\n", copied, q->hash_slot, i); if (q->fs != set) - printf("dummynet: ++ at %d: wrong fs ptr (have %p, should be %p)\n", - i, q->fs, set); + printf("dummynet: ++ at %d: wrong fs ptr " + "(have 0x%llx, should be 0x%llx)\n", i, + (uint64_t)VM_KERNEL_ADDRPERM(q->fs), + (uint64_t)VM_KERNEL_ADDRPERM(set)); copied++ ; cp_queue_to_32_user( q, qp ); /* cleanup pointers */ @@ -2395,8 +2388,10 @@ char* dn_copy_set_64(struct dn_flow_set *set, char *bp) printf("dummynet: ++ at %d: wrong slot (have %d, " "should be %d)\n", copied, q->hash_slot, i); if (q->fs != set) - printf("dummynet: ++ at %d: wrong fs ptr (have %p, should be %p)\n", - i, q->fs, set); + printf("dummynet: ++ at %d: wrong fs ptr " + "(have 0x%llx, should be 0x%llx)\n", i, + (uint64_t)VM_KERNEL_ADDRPERM(q->fs), + (uint64_t)VM_KERNEL_ADDRPERM(set)); copied++ ; //bcopy(q, qp, sizeof(*q)); cp_queue_to_64_user( q, qp ); diff --git a/bsd/netinet/ip_dummynet.h b/bsd/netinet/ip_dummynet.h index b55a36b93..dde647d0b 100644 --- a/bsd/netinet/ip_dummynet.h +++ b/bsd/netinet/ip_dummynet.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -417,7 +417,7 @@ struct dn_pipe { /* a pipe */ SLIST_HEAD(dn_pipe_head, dn_pipe); -#ifdef KERNEL +#ifdef BSD_KERNEL_PRIVATE void ip_dn_init(void); /* called from raw_ip.c:load_ipfw() */ @@ -695,7 +695,6 @@ ip_dn_claim_rule(struct mbuf *m) } else return (NULL); } -#endif /* KERNEL */ - +#endif /* BSD_KERNEL_PRIVATE */ #endif /* PRIVATE */ #endif /* _IP_DUMMYNET_H */ diff --git a/bsd/netinet/ip_ecn.h b/bsd/netinet/ip_ecn.h index d2245197e..ae06c45c0 100644 --- a/bsd/netinet/ip_ecn.h +++ b/bsd/netinet/ip_ecn.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -60,11 +60,11 @@ */ #include -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #define ECN_ALLOWED 1 /* ECN allowed */ #define ECN_FORBIDDEN 0 /* ECN forbidden */ #define ECN_NOCARE (-1) /* no consideration to ECN */ extern void ip_ecn_ingress(int, u_int8_t *, const u_int8_t *); extern void ip_ecn_egress(int, const u_int8_t *, u_int8_t *); -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ diff --git a/bsd/netinet/ip_encap.c b/bsd/netinet/ip_encap.c index db393938f..f0dfd14de 100644 --- a/bsd/netinet/ip_encap.c +++ b/bsd/netinet/ip_encap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -90,6 +90,7 @@ #include #include #include +#include #include #include @@ -120,6 +121,7 @@ MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); #endif +static void encap_init(struct protosw *, struct domain *); static void encap_add(struct encaptab *); static int mask_match(const struct encaptab *, const struct sockaddr *, const struct sockaddr *); @@ -132,14 +134,18 @@ LIST_HEAD(, encaptab) encaptab; LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(&encaptab); #endif -void -encap_init() +static void +encap_init(struct protosw *pp, struct domain *dp) { - static int initialized = 0; +#pragma unused(dp) + static int encap_initialized = 0; + + VERIFY((pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED); - if (initialized) + /* This gets called by more than one protocols, so initialize once */ + if (encap_initialized) return; - initialized++; + encap_initialized = 1; #if 0 /* * we cannot use LIST_INIT() here, since drivers may want to call @@ -152,6 +158,18 @@ encap_init() #endif } +void +encap4_init(struct protosw *pp, struct domain *dp) +{ + encap_init(pp, dp); +} + +void +encap6_init(struct ip6protosw *pp, struct domain *dp) +{ + encap_init((struct protosw *)pp, dp); +} + #if INET void encap4_input(m, off) diff --git a/bsd/netinet/ip_encap.h b/bsd/netinet/ip_encap.h index 1c65ea956..34f39d29d 100644 --- a/bsd/netinet/ip_encap.h +++ b/bsd/netinet/ip_encap.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -58,9 +58,7 @@ #ifndef _NETINET_IP_ENCAP_H_ #define _NETINET_IP_ENCAP_H_ -#include - -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE struct encaptab { LIST_ENTRY(encaptab) chain; @@ -75,7 +73,13 @@ struct encaptab { void *arg; /* passed via m->m_pkthdr.aux */ }; -void encap_init(void) __attribute__((section("__TEXT, initcode"))); +struct protosw; +struct ip6protosw; +struct domain; + +__BEGIN_DECLS +void encap4_init(struct protosw *, struct domain *); +void encap6_init(struct ip6protosw *, struct domain *); void encap4_input(struct mbuf *, int); int encap6_input(struct mbuf **, int *, int); const struct encaptab *encap_attach(int, int, const struct sockaddr *, @@ -86,6 +90,7 @@ const struct encaptab *encap_attach_func(int, int, const struct protosw *, void *); int encap_detach(const struct encaptab *); void *encap_getarg(struct mbuf *); -#endif /* KERNEL_PRIVATE */ +__END_DECLS +#endif /* BSD_KERNEL_PRIVATE */ #endif /*_NETINET_IP_ENCAP_H_*/ diff --git a/bsd/netinet/ip_flowid.h b/bsd/netinet/ip_flowid.h index 1fe210311..aa3ac23c9 100644 --- a/bsd/netinet/ip_flowid.h +++ b/bsd/netinet/ip_flowid.h @@ -78,7 +78,7 @@ struct ip_flow_id { #define IS_IP6_FLOW_ID(id) ((id)->addr_type == 6) -#ifdef KERNEL +#ifdef BSD_KERNEL_PRIVATE struct route_in6; struct sockaddr_in6; struct pf_rule; @@ -127,6 +127,6 @@ struct ip_fw_args { #define fwa_dst fwa_dst_._fwa_dst #define fwa_dst6 fwa_dst_._fwa_dst6 -#endif /* KERNEL */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* __IP_FLOWID_H__ */ diff --git a/bsd/netinet/ip_fw.h b/bsd/netinet/ip_fw.h index 53ead3fa0..1eb861d3b 100644 --- a/bsd/netinet/ip_fw.h +++ b/bsd/netinet/ip_fw.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -299,7 +299,7 @@ struct ipfw_dyn_rule { /* * Main firewall chains definitions and global var's definitions. */ -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #define IP_FW_PORT_DYNT_FLAG 0x10000 #define IP_FW_PORT_TEE_FLAG 0x20000 @@ -322,7 +322,7 @@ extern int fw_one_pass; extern int fw_enable; #define IPFW_LOADED (ip_fw_chk_ptr != NULL) extern struct ipfw_flow_id last_pkt ; -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* !IPFW2 */ #endif /* __APPLE_API_OBSOLETE */ diff --git a/bsd/netinet/ip_fw2.c b/bsd/netinet/ip_fw2.c index a8422fa31..be17daaa5 100644 --- a/bsd/netinet/ip_fw2.c +++ b/bsd/netinet/ip_fw2.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2012 Apple Inc. All rights reserved. + * Copyright (c) 2004-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -182,6 +182,37 @@ SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged"); +/* + * IP FW Stealth Logging: + */ +typedef enum ipfw_stealth_stats_type { + IPFW_STEALTH_STATS_UDP, + IPFW_STEALTH_STATS_TCP, + IPFW_STEALTH_STATS_UDPv6, + IPFW_STEALTH_STATS_TCPv6, + IPFW_STEALTH_STATS_MAX, +} ipfw_stealth_stats_type_t; + +#define IPFW_STEALTH_TIMEOUT_SEC 30 + +#define DYN_KEEPALIVE_LEEWAY 15 + +// Piggybagging Stealth stats with ipfw_tick(). +#define IPFW_STEALTH_TIMEOUT_FREQUENCY (30 / dyn_keepalive_period) + +static const char* ipfw_stealth_stats_str [IPFW_STEALTH_STATS_MAX] = { + "UDP", "TCP", "UDP v6", "TCP v6", +}; + +static uint32_t ipfw_stealth_stats_needs_flush = FALSE; +static uint32_t ipfw_stealth_stats[IPFW_STEALTH_STATS_MAX]; + +static void ipfw_stealth_flush_stats(void); +void ipfw_stealth_stats_incr_udp(void); +void ipfw_stealth_stats_incr_tcp(void); +void ipfw_stealth_stats_incr_udpv6(void); +void ipfw_stealth_stats_incr_tcpv6(void); + /* * Description of dynamic rules. * @@ -240,7 +271,7 @@ static u_int32_t dyn_short_lifetime = 5; * than dyn_keepalive_period. */ -static u_int32_t dyn_keepalive_interval = 20; +static u_int32_t dyn_keepalive_interval = 25; static u_int32_t dyn_keepalive_period = 5; static u_int32_t dyn_keepalive = 1; /* do send keepalives */ @@ -368,6 +399,52 @@ void ipfwsyslog( int level, const char *format,...) kev_post_msg(&ev_msg); } +static inline void ipfw_stealth_stats_incr(uint32_t type) +{ + if (type >= IPFW_STEALTH_STATS_MAX) + return; + + ipfw_stealth_stats[type]++; + + if (!ipfw_stealth_stats_needs_flush) { + ipfw_stealth_stats_needs_flush = TRUE; + } +} + +void ipfw_stealth_stats_incr_udp(void) +{ + ipfw_stealth_stats_incr(IPFW_STEALTH_STATS_UDP); +} + +void ipfw_stealth_stats_incr_tcp(void) +{ + ipfw_stealth_stats_incr(IPFW_STEALTH_STATS_TCP); +} + +void ipfw_stealth_stats_incr_udpv6(void) +{ + ipfw_stealth_stats_incr(IPFW_STEALTH_STATS_UDPv6); +} + +void ipfw_stealth_stats_incr_tcpv6(void) +{ + ipfw_stealth_stats_incr(IPFW_STEALTH_STATS_TCPv6); +} + +static void ipfw_stealth_flush_stats(void) +{ + int i; + + for (i = 0; i < IPFW_STEALTH_STATS_MAX; i++) { + if (ipfw_stealth_stats[i]) { + ipfwsyslog (LOG_INFO, "Stealth Mode connection attempt to %s %d times", + ipfw_stealth_stats_str[i], ipfw_stealth_stats[i]); + ipfw_stealth_stats[i] = 0; + } + } + ipfw_stealth_stats_needs_flush = FALSE; +} + /* * This macro maps an ip pointer into a layer3 header pointer of type T */ @@ -1018,29 +1095,31 @@ verify_rev_path(struct in_addr src, struct ifnet *ifp) static struct route ro; struct sockaddr_in *dst; + bzero(&ro, sizeof (ro)); dst = (struct sockaddr_in *)&(ro.ro_dst); /* Check if we've cached the route from the previous call. */ if (src.s_addr != dst->sin_addr.s_addr) { - ro.ro_rt = NULL; - - bzero(dst, sizeof(*dst)); dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = src; rtalloc_ign(&ro, RTF_CLONING|RTF_PRCLONING); } - if (ro.ro_rt != NULL) + if (ro.ro_rt != NULL) { RT_LOCK_SPIN(ro.ro_rt); - else + } else { + ROUTE_RELEASE(&ro); return 0; /* No route */ + } if ((ifp == NULL) || - (ro.ro_rt->rt_ifp->if_index != ifp->if_index)) { - RT_UNLOCK(ro.ro_rt); - return 0; + (ro.ro_rt->rt_ifp->if_index != ifp->if_index)) { + RT_UNLOCK(ro.ro_rt); + ROUTE_RELEASE(&ro); + return 0; } RT_UNLOCK(ro.ro_rt); + ROUTE_RELEASE(&ro); return 1; } @@ -1617,7 +1696,8 @@ lookup_dyn_parent(struct ip_flow_id *pkt, struct ip_fw *rule) pkt->src_port == q->id.src_port && pkt->dst_port == q->id.dst_port) { q->expire = timenow.tv_sec + dyn_short_lifetime; - DEB(printf("ipfw: lookup_dyn_parent found 0x%p\n",q);) + DEB(printf("ipfw: lookup_dyn_parent found " + "0x%llx\n", (uint64_t)VM_KERNEL_ADDRPERM(q));) return q; } } @@ -1836,9 +1916,8 @@ send_reject(struct ip_fw_args *args, int code, int offset, __unused int ip_len) struct route sro; /* fake route */ bzero (&sro, sizeof (sro)); - ip_output_list(m, 0, NULL, &sro, 0, NULL, NULL); - if (sro.ro_rt) - RTFREE(sro.ro_rt); + ip_output(m, NULL, &sro, 0, NULL, NULL); + ROUTE_RELEASE(&sro); } } m_freem(args->fwa_m); @@ -3888,6 +3967,14 @@ ipfw_tick(__unused void * unused) int i; ipfw_dyn_rule *q; struct timeval timenow; + static int stealth_cnt = 0; + + if (ipfw_stealth_stats_needs_flush) { + stealth_cnt++; + if (!(stealth_cnt % IPFW_STEALTH_TIMEOUT_FREQUENCY)) { + ipfw_stealth_flush_stats(); + } + } if (dyn_keepalive == 0 || ipfw_dyn_v == NULL || dyn_count == 0) goto done; @@ -3935,12 +4022,12 @@ ipfw_tick(__unused void * unused) mnext = m->m_nextpkt; m->m_nextpkt = NULL; bzero (&sro, sizeof (sro)); - ip_output_list(m, 0, NULL, &sro, 0, NULL, NULL); - if (sro.ro_rt) - RTFREE(sro.ro_rt); + ip_output(m, NULL, &sro, 0, NULL, NULL); + ROUTE_RELEASE(&sro); } done: - timeout(ipfw_tick, NULL, dyn_keepalive_period*hz); + timeout_with_leeway(ipfw_tick, NULL, dyn_keepalive_period*hz, + DYN_KEEPALIVE_LEEWAY*hz); } void diff --git a/bsd/netinet/ip_fw2.h b/bsd/netinet/ip_fw2.h index 10566531d..5e7a3ffdf 100644 --- a/bsd/netinet/ip_fw2.h +++ b/bsd/netinet/ip_fw2.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2011 Apple Inc. All rights reserved. + * Copyright (c) 2008-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -493,7 +493,7 @@ struct _ipfw_dyn_rule { /* * Main firewall chains definitions and global var's definitions. */ -#ifdef KERNEL +#ifdef BSD_KERNEL_PRIVATE #pragma pack(4) struct ip_fw_32{ @@ -617,10 +617,6 @@ typedef struct _ipfw_insn_pipe_32{ } ipfw_insn_pipe_32; -#endif /* KERNEL */ - -#ifdef KERNEL - #define IPFW_DEFAULT_RULE 65535 #if IPFIREWALL @@ -653,7 +649,7 @@ extern int fw_one_pass; extern int fw_enable; #define IPFW_LOADED (ip_fw_chk_ptr != NULL) #endif /* IPFIREWALL */ -#endif /* KERNEL */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* __APPLE_API_OBSOLETE */ #endif /* _IPFW2_H */ diff --git a/bsd/netinet/ip_fw2_compat.h b/bsd/netinet/ip_fw2_compat.h index bd968d810..def315c4d 100644 --- a/bsd/netinet/ip_fw2_compat.h +++ b/bsd/netinet/ip_fw2_compat.h @@ -129,7 +129,7 @@ struct ipfw_dyn_rule_compat { /* combination of TCP flags) */ } ; -#ifdef KERNEL +#ifdef BSD_KERNEL_PRIVATE #pragma pack(4) struct ip_fw_compat_32 { @@ -248,7 +248,7 @@ struct ipfw_dyn_rule_compat_64 { u_int32_t state ; /* state of this rule (typ. a */ /* combination of TCP flags) */ } ; -#endif /* KERNEL */ +#endif /* BSD_KERNEL_PRIVATE */ #define IP_FW_GETNSRCP_COMPAT(rule) ((rule)->fw_nports & 0x0f) diff --git a/bsd/netinet/ip_icmp.c b/bsd/netinet/ip_icmp.c index 576067247..160c3eaac 100644 --- a/bsd/netinet/ip_icmp.c +++ b/bsd/netinet/ip_icmp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -143,11 +143,7 @@ SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW | CTLFLAG_LOCKED, /* Default values in case CONFIG_ICMP_BANDLIM is not defined in the MASTER file */ #ifndef CONFIG_ICMP_BANDLIM -#if !CONFIG_EMBEDDED #define CONFIG_ICMP_BANDLIM 250 -#else /* CONFIG_EMBEDDED */ -#define CONFIG_ICMP_BANDLIM 50 -#endif /* CONFIG_EMBEDDED */ #endif /* CONFIG_ICMP_BANDLIM */ /* @@ -183,8 +179,6 @@ int icmpprintfs = 0; static void icmp_reflect(struct mbuf *); static void icmp_send(struct mbuf *, struct mbuf *); -extern struct protosw inetsw[]; - /* * Generate an error packet of type error * in response to bad packet ip. @@ -210,7 +204,8 @@ icmp_error( #if ICMPPRINTFS if (icmpprintfs) - printf("icmp_error(%p, %x, %d)\n", oip, type, code); + printf("icmp_error(0x%llx, %x, %d)\n", + (uint64_t)VM_KERNEL_ADDRPERM(oip), type, code); #endif if (type != ICMP_REDIRECT) icmpstat.icps_error++; @@ -309,19 +304,13 @@ freeit: m_freem(n); } -static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET, - 0 , { 0 }, { 0,0,0,0,0,0,0,0 } }; -static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET, - 0 , { 0 }, { 0,0,0,0,0,0,0,0 } }; -static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET, - 0 , { 0 }, { 0,0,0,0,0,0,0,0 } }; - /* * Process a received ICMP message. */ void icmp_input(struct mbuf *m, int hlen) { + struct sockaddr_in icmpsrc, icmpdst, icmpgw; struct icmp *icp; struct ip *ip = mtod(m, struct ip *); int icmplen; @@ -381,6 +370,18 @@ icmp_input(struct mbuf *m, int hlen) */ if (icp->icmp_type > ICMP_MAXTYPE) goto raw; + + /* Initialize */ + bzero(&icmpsrc, sizeof (icmpsrc)); + icmpsrc.sin_len = sizeof (struct sockaddr_in); + icmpsrc.sin_family = AF_INET; + bzero(&icmpdst, sizeof (icmpdst)); + icmpdst.sin_len = sizeof (struct sockaddr_in); + icmpdst.sin_family = AF_INET; + bzero(&icmpgw, sizeof (icmpgw)); + icmpgw.sin_len = sizeof (struct sockaddr_in); + icmpgw.sin_family = AF_INET; + icmpstat.icps_inhist[icp->icmp_type]++; code = icp->icmp_code; switch (icp->icmp_type) { @@ -645,6 +646,7 @@ static void icmp_reflect(struct mbuf *m) { struct ip *ip = mtod(m, struct ip *); + struct sockaddr_in icmpdst; struct in_ifaddr *ia; struct in_addr t; struct mbuf *opts = NULL; @@ -691,6 +693,11 @@ icmp_reflect(struct mbuf *m) } match: lck_rw_done(in_ifaddr_rwlock); + + /* Initialize */ + bzero(&icmpdst, sizeof (icmpdst)); + icmpdst.sin_len = sizeof (struct sockaddr_in); + icmpdst.sin_family = AF_INET; icmpdst.sin_addr = t; if ((ia == (struct in_ifaddr *)0) && m->m_pkthdr.rcvif) ia = (struct in_ifaddr *)ifaof_ifpforaddr( @@ -812,9 +819,9 @@ icmp_send(struct mbuf *m, struct mbuf *opts) struct icmp *icp; struct route ro; struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, - IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR }; + IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR, 0 }; - if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL) { + if (!(m->m_pkthdr.pkt_flags & PKTF_LOOP) && m->m_pkthdr.rcvif != NULL) { ipoa.ipoa_boundif = m->m_pkthdr.rcvif->if_index; ipoa.ipoa_flags |= IPOAF_BOUND_IF; } @@ -842,8 +849,7 @@ icmp_send(struct mbuf *m, struct mbuf *opts) #endif bzero(&ro, sizeof ro); (void) ip_output(m, opts, &ro, IP_OUTARGS, NULL, &ipoa); - if (ro.ro_rt) - rtfree(ro.ro_rt); + ROUTE_RELEASE(&ro); } n_time @@ -852,7 +858,7 @@ iptime(void) struct timeval atv; u_int32_t t; - microtime(&atv); + getmicrotime(&atv); t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000; return (htonl(t)); } @@ -919,9 +925,9 @@ ip_next_mtu(int mtu, int dir) int badport_bandlim(int which) { - static struct timeval lticks[BANDLIM_MAX + 1]; + static uint64_t lticks[BANDLIM_MAX + 1]; static int lpackets[BANDLIM_MAX + 1]; - struct timeval time; + uint64_t time = net_uptime(); int secs; const char *bandlimittype[] = { @@ -940,15 +946,13 @@ badport_bandlim(int which) if (icmplim <= 0 || which > BANDLIM_MAX || which < 0) return(0); - getmicrouptime(&time); + secs = time - lticks[which]; - secs = time.tv_sec - lticks[which].tv_sec ; - /* * reset stats when cumulative delta exceeds one second. */ - if ((secs > 1) || (secs == 1 && (lticks[which].tv_usec > time.tv_usec))) { + if (secs > 1) { if (lpackets[which] > icmplim) { printf("%s from %d to %d packets per second\n", bandlimittype[which], @@ -956,8 +960,7 @@ badport_bandlim(int which) icmplim ); } - lticks[which].tv_sec = time.tv_sec; - lticks[which].tv_usec = time.tv_usec; + lticks[which] = time; lpackets[which] = 0; } @@ -985,7 +988,6 @@ badport_bandlim(int which) #include #include -extern struct domain inetdomain; extern u_int32_t rip_sendspace; extern u_int32_t rip_recvspace; extern struct inpcbinfo ripcbinfo; @@ -1002,11 +1004,19 @@ __private_extern__ int icmp_dgram_attach(struct socket *so, int proto, struct pr __private_extern__ int icmp_dgram_ctloutput(struct socket *so, struct sockopt *sopt); __private_extern__ struct pr_usrreqs icmp_dgram_usrreqs = { - rip_abort, pru_accept_notsupp, icmp_dgram_attach, rip_bind, rip_connect, - pru_connect2_notsupp, in_control, rip_detach, rip_disconnect, - pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp, - pru_rcvoob_notsupp, icmp_dgram_send, pru_sense_null, rip_shutdown, - in_setsockaddr, sosend, soreceive, pru_sopoll_notsupp + .pru_abort = rip_abort, + .pru_attach = icmp_dgram_attach, + .pru_bind = rip_bind, + .pru_connect = rip_connect, + .pru_control = in_control, + .pru_detach = rip_detach, + .pru_disconnect = rip_disconnect, + .pru_peeraddr = in_getpeeraddr, + .pru_send = icmp_dgram_send, + .pru_shutdown = rip_shutdown, + .pru_sockaddr = in_getsockaddr, + .pru_sosend = sosend, + .pru_soreceive = soreceive, }; /* Like rip_attach but without root privilege enforcement */ @@ -1082,8 +1092,8 @@ icmp_dgram_ctloutput(struct socket *so, struct sockopt *sopt) } __private_extern__ int -icmp_dgram_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, - struct mbuf *control, struct proc *p) +icmp_dgram_send(struct socket *so, int flags, struct mbuf *m, + struct sockaddr *nam, struct mbuf *control, struct proc *p) { struct ip *ip; struct inpcb *inp = sotoinpcb(so); @@ -1091,12 +1101,20 @@ icmp_dgram_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *n struct icmp *icp; struct in_ifaddr *ia = NULL; int icmplen; + int error = EINVAL; + + if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { + if (inp != NULL) + error = EPROTOTYPE; + goto bad; + } if ((inp->inp_flags & INP_HDRINCL) != 0) { - /* Expect 32-bit aligned data pointer on strict-align platforms */ + /* Expect 32-bit aligned data ptr on strict-align platforms */ MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); /* - * This is not raw IP, we liberal only for fields TOS, id and TTL + * This is not raw IP, we liberal only for fields TOS, + * id and TTL. */ ip = mtod(m, struct ip *); @@ -1110,13 +1128,16 @@ icmp_dgram_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *n goto bad; if (hlen < 20 || hlen > 40 || ip->ip_len != m->m_pkthdr.len) goto bad; - /* Bogus fragments can tie up peer resources */ + /* Bogus fragments can tie up peer resources */ if ((ip->ip_off & ~IP_DF) != 0) goto bad; /* Allow only ICMP even for user provided IP header */ if (ip->ip_p != IPPROTO_ICMP) goto bad; - /* To prevent spoofing, specified source address must be one of ours */ + /* + * To prevent spoofing, specified source address must + * be one of ours. + */ if (ip->ip_src.s_addr != INADDR_ANY) { socket_unlock(so, 0); lck_rw_lock_shared(in_ifaddr_rwlock); @@ -1172,10 +1193,16 @@ ours: default: goto bad; } - return rip_send(so, flags, m, nam, control, p); + return (rip_send(so, flags, m, nam, control, p)); bad: - m_freem(m); - return EINVAL; + VERIFY(error != 0); + + if (m != NULL) + m_freem(m); + if (control != NULL) + m_freem(control); + + return (error); } #endif /* __APPLE__ */ diff --git a/bsd/netinet/ip_icmp.h b/bsd/netinet/ip_icmp.h index 44ce3cbf9..3438a1bdb 100644 --- a/bsd/netinet/ip_icmp.h +++ b/bsd/netinet/ip_icmp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -227,10 +227,9 @@ struct icmp { (type) == ICMP_IREQ || (type) == ICMP_IREQREPLY || \ (type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY) -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE void icmp_error(struct mbuf *, int, int, n_long, u_int32_t); void icmp_input(struct mbuf *, int); int ip_next_mtu(int, int); -#endif /* KERNEL_PRIVATE */ - -#endif +#endif /* BSD_KERNEL_PRIVATE */ +#endif /* _NETINET_IP_ICMP_H_ */ diff --git a/bsd/netinet/ip_id.c b/bsd/netinet/ip_id.c index 46c7fecd9..4feff3e9c 100644 --- a/bsd/netinet/ip_id.c +++ b/bsd/netinet/ip_id.c @@ -1,27 +1,44 @@ -/* $OpenBSD: ip_id.c,v 1.2 1999/08/26 13:37:01 provos Exp $ */ - /* - * Copyright 1998 Niels Provos - * All rights reserved. + * Copyright (c) 2002-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. * - * Theo de Raadt came up with the idea of using - * such a mathematical system to generate more random (yet non-repeating) - * ids to solve the resolver/named problem. But Niels designed the - * actual system based on the constraints. + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/*- + * Copyright (c) 2008 Michael J. Silbersack. + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. + * notice unmodified, this list of conditions, and the following + * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Niels Provos. - * 4. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES @@ -33,187 +50,166 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD: src/sys/netinet/ip_id.c,v 1.1.2.1 2001/07/19 06:37:26 kris Exp $ */ -/* - * seed = random 15bit - * n = prime, g0 = generator to n, - * j = random so that gcd(j,n-1) == 1 - * g = g0^j mod n will be a generator again. +/* + * IP ID generation is a fascinating topic. + * + * In order to avoid ID collisions during packet reassembly, common sense + * dictates that the period between reuse of IDs be as large as possible. + * This leads to the classic implementation of a system-wide counter, thereby + * ensuring that IDs repeat only once every 2^16 packets. + * + * Subsequent security researchers have pointed out that using a global + * counter makes ID values predictable. This predictability allows traffic + * analysis, idle scanning, and even packet injection in specific cases. + * These results suggest that IP IDs should be as random as possible. + * + * The "searchable queues" algorithm used in this IP ID implementation was + * proposed by Amit Klein. It is a compromise between the above two + * viewpoints that has provable behavior that can be tuned to the user's + * requirements. + * + * The basic concept is that we supplement a standard random number generator + * with a queue of the last L IDs that we have handed out to ensure that all + * IDs have a period of at least L. * - * X[0] = random seed. - * X[n] = a*X[n-1]+b mod m is a Linear Congruential Generator - * with a = 7^(even random) mod m, - * b = random with gcd(b,m) == 1 - * m = 31104 and a maximal period of m-1. + * To efficiently implement this idea, we keep two data structures: a + * circular array of IDs of size L and a bitstring of 65536 bits. * - * The transaction id is determined by: - * id[n] = seed xor (g^X[n] mod n) + * To start, we ask the RNG for a new ID. A quick index into the bitstring + * is used to determine if this is a recently used value. The process is + * repeated until a value is returned that is not in the bitstring. * - * Effectivly the id is restricted to the lower 15 bits, thus - * yielding two different cycles by toggling the msb on and off. - * This avoids reuse issues caused by reseeding. + * Having found a usable ID, we remove the ID stored at the current position + * in the queue from the bitstring and replace it with our new ID. Our new + * ID is then added to the bitstring and the queue pointer is incremented. + * + * The lower limit of 512 was chosen because there doesn't seem to be much + * point to having a smaller value. The upper limit of 32768 was chosen for + * two reasons. First, every step above 32768 decreases the entropy. Taken + * to an extreme, 65533 would offer 1 bit of entropy. Second, the number of + * attempts it takes the algorithm to find an unused ID drastically + * increases, killing performance. The default value of 4096 was chosen + * because it provides a good tradeoff between randomness and non-repetition, + * while taking performance into account. + * + * With L=4096, the queue will use 8K of memory. The bitstring always uses + * 8K of memory (2^16/8). This yields to around 7% ID collisions. No memory + * is allocated until the use of random ids is enabled. */ #include #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include -#if RANDOM_IP_ID -#define RU_OUT 180 /* Time after wich will be reseeded */ -#define RU_MAX 30000 /* Uniq cycle, avoid blackjack prediction */ -#define RU_GEN 2 /* Starting generator */ -#define RU_N 32749 /* RU_N-1 = 2*2*3*2729 */ -#define RU_AGEN 7 /* determine ru_a as RU_AGEN^(2*rand) */ -#define RU_M 31104 /* RU_M = 2^7*3^5 - don't change */ - -#define PFAC_N 3 -static u_int16_t pfacts[PFAC_N] = { - 2, - 3, - 2729 -}; - -static u_int16_t ru_x; -static u_int16_t ru_seed, ru_seed2; -static u_int16_t ru_a, ru_b; -static u_int16_t ru_g; -static u_int16_t ru_counter = 0; -static u_int16_t ru_msb = 0; -static time_t ru_reseed; -static u_int32_t tmp; /* Storage for unused random */ - -static u_int16_t pmod(u_int16_t, u_int16_t, u_int16_t); -static void ip_initid(void); -u_int16_t ip_randomid(void); - -extern u_short ip_id; -extern int ip_use_randomid; /* - * Do a fast modular exponation, returned value will be in the range - * of 0 - (mod-1) + * Size of L (see comments above on the lower and upper limits.) */ +#define ARRAY_SIZE (4096) + +static uint16_t *id_array = NULL; +static bitstr_t *id_bits = NULL; +static uint32_t array_ptr = 0; +static uint32_t random_id_statistics = 0; +static uint64_t random_id_collisions = 0; +static uint64_t random_id_total = 0; + +decl_lck_mtx_data(static, ipid_lock); +static lck_attr_t *ipid_lock_attr; +static lck_grp_t *ipid_lock_grp; +static lck_grp_attr_t *ipid_lock_grp_attr; + +SYSCTL_UINT(_net_inet_ip, OID_AUTO, random_id_statistics, + CTLFLAG_RW | CTLFLAG_LOCKED, &random_id_statistics, 0, + "Enable IP ID statistics"); +SYSCTL_QUAD(_net_inet_ip, OID_AUTO, random_id_collisions, + CTLFLAG_RD | CTLFLAG_LOCKED, &random_id_collisions, + "Count of IP ID collisions"); +SYSCTL_QUAD(_net_inet_ip, OID_AUTO, random_id_total, + CTLFLAG_RD | CTLFLAG_LOCKED, &random_id_total, + "Count of IP IDs created"); -#ifdef __STDC__ -static u_int16_t -pmod(u_int16_t gen, u_int16_t exp, u_int16_t mod) -#else -static u_int16_t -pmod(gen, exp, mod) - u_int16_t gen, exp, mod; -#endif -{ - u_int16_t s, t, u; - - s = 1; - t = gen; - u = exp; - - while (u) { - if (u & 1) - s = (s*t) % mod; - u >>= 1; - t = (t*t) % mod; - } - return (s); -} - -/* - * Initalizes the seed and chooses a suitable generator. Also toggles - * the msb flag. The msb flag is used to generate two distinct - * cycles of random numbers and thus avoiding reuse of ids. - * - * This function is called from id_randomid() when needed, an - * application does not have to worry about it. +/* + * Called once from ip_init(). */ -static void +void ip_initid(void) { - u_int16_t j, i; - int noprime = 1; - struct timeval timenow; - - getmicrotime(&timenow); - read_random((void *) &tmp, sizeof(tmp)); - ru_x = (tmp & 0xFFFF) % RU_M; - - /* 15 bits of random seed */ - ru_seed = (tmp >> 16) & 0x7FFF; - read_random((void *) &tmp, sizeof(tmp)); - ru_seed2 = tmp & 0x7FFF; - - read_random((void *) &tmp, sizeof(tmp)); - - /* Determine the LCG we use */ - ru_b = (tmp & 0xfffe) | 1; - ru_a = pmod(RU_AGEN, (tmp >> 16) & 0xfffe, RU_M); - while (ru_b % 3 == 0) - ru_b += 2; - - read_random((void *) &tmp, sizeof(tmp)); - j = tmp % RU_N; - tmp = tmp >> 16; - - /* - * Do a fast gcd(j,RU_N-1), so we can find a j with - * gcd(j, RU_N-1) == 1, giving a new generator for - * RU_GEN^j mod RU_N - */ - - while (noprime) { - for (i=0; i=PFAC_N) - noprime = 0; - else - j = (j+1) % RU_N; + VERIFY(id_array == NULL); + VERIFY(id_bits == NULL); + + _CASSERT(ARRAY_SIZE >= 512 && ARRAY_SIZE <= 32768); + + ipid_lock_grp_attr = lck_grp_attr_alloc_init(); + ipid_lock_grp = lck_grp_alloc_init("ipid", ipid_lock_grp_attr); + ipid_lock_attr = lck_attr_alloc_init(); + lck_mtx_init(&ipid_lock, ipid_lock_grp, ipid_lock_attr); + + id_array = (uint16_t *)_MALLOC(ARRAY_SIZE * sizeof (uint16_t), + M_TEMP, M_WAITOK | M_ZERO); + id_bits = (bitstr_t *)_MALLOC(bitstr_size(65536), M_TEMP, + M_WAITOK | M_ZERO); + if (id_array == NULL || id_bits == NULL) { + /* Just in case; neither or both. */ + if (id_array != NULL) { + _FREE(id_array, M_TEMP); + id_array = NULL; + } + if (id_bits != NULL) { + _FREE(id_bits, M_TEMP); + id_bits = NULL; + } } - - ru_g = pmod(RU_GEN,j,RU_N); - ru_counter = 0; - - ru_reseed = timenow.tv_sec + RU_OUT; - ru_msb = ru_msb == 0x8000 ? 0 : 0x8000; } -u_int16_t +uint16_t ip_randomid(void) { - int i, n; - struct timeval timenow; + uint16_t new_id; - /* if net.inet.ip.random_id is disabled, - * reverts to the incrementing ip_id + /* + * If net.inet.ip.random_id is disabled, revert to incrementing ip_id. + * Given that we don't allow the size of the array to change, accessing + * id_array and id_bits prior to acquiring the lock below is safe. */ + if (id_array == NULL || ip_use_randomid == 0) + return (htons(ip_id++)); - if (ip_use_randomid == 0) - return htons(ip_id++); - - - getmicrotime(&timenow); - if (ru_counter >= RU_MAX || timenow.tv_sec > ru_reseed) - ip_initid(); - - if (!tmp) - read_random((void *) &tmp, sizeof(tmp)); - - /* Skip a random number of ids */ - n = tmp & 0x3; tmp = tmp >> 2; - if (ru_counter + n >= RU_MAX) - ip_initid(); - - for (i = 0; i <= n; i++) - /* Linear Congruential Generator */ - ru_x = (ru_a*ru_x + ru_b) % RU_M; - - ru_counter += i; - - return (ru_seed ^ pmod(ru_g,ru_seed2 ^ ru_x,RU_N)) | ru_msb; + /* + * To avoid a conflict with the zeros that the array is initially + * filled with, we never hand out an id of zero. bit_test() below + * uses single memory access, therefore no lock is needed. + */ + new_id = 0; + do { + if (random_id_statistics && new_id != 0) + random_id_collisions++; + read_random(&new_id, sizeof (new_id)); + } while (bit_test(id_bits, new_id) || new_id == 0); + + /* + * These require serialization to maintain correctness. + */ + lck_mtx_lock_spin(&ipid_lock); + bit_clear(id_bits, id_array[array_ptr]); + bit_set(id_bits, new_id); + id_array[array_ptr] = new_id; + if (++array_ptr == ARRAY_SIZE) + array_ptr = 0; + lck_mtx_unlock(&ipid_lock); + + if (random_id_statistics) + random_id_total++; + + return (new_id); } - -#endif /* RANDOM_IP_ID */ diff --git a/bsd/netinet/ip_input.c b/bsd/netinet/ip_input.c index 6953044bd..99d474d3c 100644 --- a/bsd/netinet/ip_input.c +++ b/bsd/netinet/ip_input.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -58,7 +58,6 @@ * SUCH DAMAGE. * * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 - * $FreeBSD: src/sys/netinet/ip_input.c,v 1.130.2.25 2001/08/29 21:41:37 jesper Exp $ */ /* * NOTICE: This file was modified by SPARTA, Inc. in 2007 to introduce @@ -81,12 +80,17 @@ #include #include #include +#include +#include #include +#include #include +#include #include #include +#include #include @@ -96,6 +100,11 @@ #include #include #include +#include +#include +#if PF +#include +#endif /* PF */ #include #include @@ -105,114 +114,131 @@ #include #include #include -#include - #include #include - #include - -/* needed for AUTOCONFIGURING: */ #include #include #include -#include +#include + +#if DUMMYNET +#include +#endif /* DUMMYNET */ #if CONFIG_MACF_NET #include -#endif - -#include -#include - -#define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 0) -#define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 2) -#define DBG_FNC_IP_INPUT NETDBG_CODE(DBG_NETIP, (2 << 8)) - +#endif /* CONFIG_MACF_NET */ #if IPSEC #include #include -#endif +#endif /* IPSEC */ -#if DUMMYNET -#include -#endif - -#if PF -#include -#endif /* PF */ - -#include +#define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 0) +#define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 2) +#define DBG_FNC_IP_INPUT NETDBG_CODE(DBG_NETIP, (2 << 8)) #if IPSEC extern int ipsec_bypass; extern lck_mtx_t *sadb_mutex; -lck_grp_t *sadb_stat_mutex_grp; -lck_grp_attr_t *sadb_stat_mutex_grp_attr; -lck_attr_t *sadb_stat_mutex_attr; +lck_grp_t *sadb_stat_mutex_grp; +lck_grp_attr_t *sadb_stat_mutex_grp_attr; +lck_attr_t *sadb_stat_mutex_attr; decl_lck_mtx_data(, sadb_stat_mutex_data); -lck_mtx_t *sadb_stat_mutex = &sadb_stat_mutex_data; - -#endif +lck_mtx_t *sadb_stat_mutex = &sadb_stat_mutex_data; +#endif /* IPSEC */ +#if MROUTING int rsvp_on = 0; static int ip_rsvp_on; struct socket *ip_rsvpd; +#endif /* MROUTING */ + +MBUFQ_HEAD(fq_head); + +static int frag_timeout_run; /* frag timer is scheduled to run */ +static void frag_timeout(void *); +static void frag_sched_timeout(void); + +static struct ipq *ipq_alloc(int); +static void ipq_free(struct ipq *); +static void ipq_updateparams(void); + +decl_lck_mtx_data(static, ipqlock); +static lck_attr_t *ipqlock_attr; +static lck_grp_t *ipqlock_grp; +static lck_grp_attr_t *ipqlock_grp_attr; + +/* Packet reassembly stuff */ +#define IPREASS_NHASH_LOG2 6 +#define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2) +#define IPREASS_HMASK (IPREASS_NHASH - 1) +#define IPREASS_HASH(x, y) \ + (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK) + +/* IP fragment reassembly queues (protected by ipqlock) */ +static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH]; /* ip reassembly queues */ +static int maxnipq; /* max packets in reass queues */ +static u_int32_t maxfragsperpacket; /* max frags/packet in reass queues */ +static u_int32_t nipq; /* # of packets in reass queues */ +static u_int32_t ipq_limit; /* ipq allocation limit */ +static u_int32_t ipq_count; /* current # of allocated ipq's */ static int sysctl_ipforwarding SYSCTL_HANDLER_ARGS; +static int sysctl_maxnipq SYSCTL_HANDLER_ARGS; +static int sysctl_maxfragsperpacket SYSCTL_HANDLER_ARGS; -int ipforwarding = 0; +int ipforwarding = 0; SYSCTL_PROC(_net_inet_ip, IPCTL_FORWARDING, forwarding, - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ipforwarding, 0, - sysctl_ipforwarding, "I", "Enable IP forwarding between interfaces"); + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ipforwarding, 0, + sysctl_ipforwarding, "I", "Enable IP forwarding between interfaces"); -static int ipsendredirects = 1; /* XXX */ -SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW | CTLFLAG_LOCKED, - &ipsendredirects, 0, "Enable sending IP redirects"); +static int ipsendredirects = 1; /* XXX */ +SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, + CTLFLAG_RW | CTLFLAG_LOCKED, &ipsendredirects, 0, + "Enable sending IP redirects"); -int ip_defttl = IPDEFTTL; +int ip_defttl = IPDEFTTL; SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW | CTLFLAG_LOCKED, - &ip_defttl, 0, "Maximum TTL on IP packets"); - -static int ip_dosourceroute = 0; -SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW | CTLFLAG_LOCKED, - &ip_dosourceroute, 0, "Enable forwarding source routed IP packets"); - -static int ip_acceptsourceroute = 0; -SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute, - CTLFLAG_RW | CTLFLAG_LOCKED, &ip_acceptsourceroute, 0, - "Enable accepting source routed IP packets"); - -static int ip_keepfaith = 0; -SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RD | CTLFLAG_LOCKED, - &ip_keepfaith, 0, ""); - -static int nipq = 0; /* total # of reass queues */ -static int maxnipq; -SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_RW | CTLFLAG_LOCKED, - &maxnipq, 0, - "Maximum number of IPv4 fragment reassembly queue entries"); - -static int maxfragsperpacket; -SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW | CTLFLAG_LOCKED, - &maxfragsperpacket, 0, - "Maximum number of IPv4 fragments allowed per packet"); + &ip_defttl, 0, "Maximum TTL on IP packets"); + +static int ip_dosourceroute = 0; +SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, + CTLFLAG_RW | CTLFLAG_LOCKED, &ip_dosourceroute, 0, + "Enable forwarding source routed IP packets"); + +static int ip_acceptsourceroute = 0; +SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute, + CTLFLAG_RW | CTLFLAG_LOCKED, &ip_acceptsourceroute, 0, + "Enable accepting source routed IP packets"); -static int maxfrags; -SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfrags, CTLFLAG_RW | CTLFLAG_LOCKED, - &maxfrags, 0, "Maximum number of IPv4 fragments allowed"); +static int ip_sendsourcequench = 0; +SYSCTL_INT(_net_inet_ip, OID_AUTO, sendsourcequench, + CTLFLAG_RW | CTLFLAG_LOCKED, &ip_sendsourcequench, 0, + "Enable the transmission of source quench packets"); -static int currentfrags = 0; +SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &maxnipq, 0, sysctl_maxnipq, + "I", "Maximum number of IPv4 fragment reassembly queue entries"); -int ip_doscopedroute = 1; +SYSCTL_UINT(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_RD | CTLFLAG_LOCKED, + &nipq, 0, "Current number of IPv4 fragment reassembly queue entries"); + +SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragsperpacket, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &maxfragsperpacket, 0, + sysctl_maxfragsperpacket, "I", + "Maximum number of IPv4 fragments allowed per packet"); + +int ip_doscopedroute = 1; SYSCTL_INT(_net_inet_ip, OID_AUTO, scopedroute, CTLFLAG_RD | CTLFLAG_LOCKED, - &ip_doscopedroute, 0, "Enable IPv4 scoped routing"); + &ip_doscopedroute, 0, "Enable IPv4 scoped routing"); -int ip_restrictrecvif = 1; -SYSCTL_INT(_net_inet_ip, OID_AUTO, restrictrecvif, CTLFLAG_RW | CTLFLAG_LOCKED, - &ip_restrictrecvif, 0, "Enable inbound interface restrictions"); +static uint32_t ip_adj_clear_hwcksum = 0; +SYSCTL_UINT(_net_inet_ip, OID_AUTO, adj_clear_hwcksum, + CTLFLAG_RW | CTLFLAG_LOCKED, &ip_adj_clear_hwcksum, 0, + "Invalidate hwcksum info when adjusting length"); /* * XXX - Setting ip_checkinterface mostly implements the receive side of @@ -227,19 +253,15 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, restrictrecvif, CTLFLAG_RW | CTLFLAG_LOCKED, * to the loopback interface instead of the interface where the * packets for those addresses are received. */ -static int ip_checkinterface = 0; +static int ip_checkinterface = 0; SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW | CTLFLAG_LOCKED, - &ip_checkinterface, 0, "Verify packet arrives on correct interface"); + &ip_checkinterface, 0, "Verify packet arrives on correct interface"); #if DIAGNOSTIC -static int ipprintfs = 0; +static int ipprintfs = 0; #endif -extern int in_proto_count; -extern struct domain inetdomain; -extern struct protosw inetsw[]; struct protosw *ip_protox[IPPROTO_MAX]; -static int ipqmaxlen = IFQ_MAXLEN; static lck_grp_attr_t *in_ifaddr_rwlock_grp_attr; static lck_grp_t *in_ifaddr_rwlock_grp; @@ -255,44 +277,22 @@ struct in_ifaddrhashhead *in_ifaddrhashtbl; /* inet addr hash table */ static u_int32_t inaddr_nhash; /* hash table size */ static u_int32_t inaddr_hashp; /* next largest prime */ -struct ifqueue ipintrq; -SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW | CTLFLAG_LOCKED, - &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue"); -SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD | CTLFLAG_LOCKED, - &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue"); - +static int ip_getstat SYSCTL_HANDLER_ARGS; struct ipstat ipstat; -SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED, - &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)"); - -/* Packet reassembly stuff */ -#define IPREASS_NHASH_LOG2 6 -#define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2) -#define IPREASS_HMASK (IPREASS_NHASH - 1) -#define IPREASS_HASH(x,y) \ - (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK) - -static struct ipq ipq[IPREASS_NHASH]; -static TAILQ_HEAD(ipq_list, ipq) ipq_list = - TAILQ_HEAD_INITIALIZER(ipq_list); -const int ipintrq_present = 1; -lck_mtx_t *ip_mutex; -lck_attr_t *ip_mutex_attr; -lck_grp_t *ip_mutex_grp; -lck_grp_attr_t *ip_mutex_grp_attr; -lck_mtx_t *inet_domain_mutex; +SYSCTL_PROC(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, ip_getstat, "S,ipstat", + "IP statistics (struct ipstat, netinet/ip_var.h)"); #if IPCTL_DEFMTU SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW | CTLFLAG_LOCKED, - &ip_mtu, 0, "Default MTU"); -#endif + &ip_mtu, 0, "Default MTU"); +#endif /* IPCTL_DEFMTU */ #if IPSTEALTH static int ipstealth = 0; SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW | CTLFLAG_LOCKED, - &ipstealth, 0, ""); -#endif - + &ipstealth, 0, ""); +#endif /* IPSTEALTH */ /* Firewall hooks */ #if IPFIREWALL @@ -304,21 +304,23 @@ int fw_one_pass = 0; #if DUMMYNET ip_dn_io_t *ip_dn_io_ptr; -#endif +#endif /* DUMMYNET */ -SYSCTL_NODE(_net_inet_ip, OID_AUTO, linklocal, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "link local"); +SYSCTL_NODE(_net_inet_ip, OID_AUTO, linklocal, + CTLFLAG_RW | CTLFLAG_LOCKED, 0, "link local"); struct ip_linklocal_stat ip_linklocal_stat; -SYSCTL_STRUCT(_net_inet_ip_linklocal, OID_AUTO, stat, CTLFLAG_RD | CTLFLAG_LOCKED, - &ip_linklocal_stat, ip_linklocal_stat, - "Number of link local packets with TTL less than 255"); +SYSCTL_STRUCT(_net_inet_ip_linklocal, OID_AUTO, stat, + CTLFLAG_RD | CTLFLAG_LOCKED, &ip_linklocal_stat, ip_linklocal_stat, + "Number of link local packets with TTL less than 255"); -SYSCTL_NODE(_net_inet_ip_linklocal, OID_AUTO, in, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "link local input"); +SYSCTL_NODE(_net_inet_ip_linklocal, OID_AUTO, in, + CTLFLAG_RW | CTLFLAG_LOCKED, 0, "link local input"); int ip_linklocal_in_allowbadttl = 1; -SYSCTL_INT(_net_inet_ip_linklocal_in, OID_AUTO, allowbadttl, CTLFLAG_RW | CTLFLAG_LOCKED, - &ip_linklocal_in_allowbadttl, 0, - "Allow incoming link local packets with TTL less than 255"); +SYSCTL_INT(_net_inet_ip_linklocal_in, OID_AUTO, allowbadttl, + CTLFLAG_RW | CTLFLAG_LOCKED, &ip_linklocal_in_allowbadttl, 0, + "Allow incoming link local packets with TTL less than 255"); /* @@ -333,38 +335,30 @@ static struct ip_srcrt { struct in_addr dst; /* final destination */ char nop; /* one NOP to align */ char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */ - struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)]; + struct in_addr route[MAX_IPOPTLEN / sizeof (struct in_addr)]; } ip_srcrt; -static void in_ifaddrhashtbl_init(void); -static void save_rte(u_char *, struct in_addr); -static int ip_dooptions(struct mbuf *, int, struct sockaddr_in *); -static void ip_forward(struct mbuf *, int, struct sockaddr_in *); -static void ip_freef(struct ipq *); +static void in_ifaddrhashtbl_init(void); +static void save_rte(u_char *, struct in_addr); +static int ip_dooptions(struct mbuf *, int, struct sockaddr_in *); +static void ip_forward(struct mbuf *, int, struct sockaddr_in *); +static void frag_freef(struct ipqhead *, struct ipq *); #if IPDIVERT #ifdef IPDIVERT_44 -static struct mbuf *ip_reass(struct mbuf *, - struct ipq *, struct ipq *, u_int32_t *, u_int16_t *); -#else -static struct mbuf *ip_reass(struct mbuf *, - struct ipq *, struct ipq *, u_int16_t *, u_int16_t *); -#endif -#else -static struct mbuf *ip_reass(struct mbuf *, struct ipq *, struct ipq *); -#endif +static struct mbuf *ip_reass(struct mbuf *, u_int32_t *, u_int16_t *); +#else /* !IPDIVERT_44 */ +static struct mbuf *ip_reass(struct mbuf *, u_int16_t *, u_int16_t *); +#endif /* !IPDIVERT_44 */ +#else /* !IPDIVERT */ +static struct mbuf *ip_reass(struct mbuf *); +#endif /* !IPDIVERT */ static void ip_fwd_route_copyout(struct ifnet *, struct route *); static void ip_fwd_route_copyin(struct ifnet *, struct route *); -void ipintr(void); -void in_dinit(void); static inline u_short ip_cksum(struct mbuf *, int); -#if RANDOM_IP_ID -extern u_short ip_id; - -int ip_use_randomid = 1; +int ip_use_randomid = 1; SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW | CTLFLAG_LOCKED, - &ip_use_randomid, 0, "Randomize IP packets IDs"); -#endif + &ip_use_randomid, 0, "Randomize IP packets IDs"); /* * On platforms which require strict alignment (currently for anything but @@ -382,14 +376,14 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW | CTLFLAG_LOCKED, struct ifnet *__ifp = (_ifp); \ atomic_add_64(&(__ifp)->if_alignerrs, 1); \ if (((_m)->m_flags & M_PKTHDR) && \ - (_m)->m_pkthdr.header != NULL) \ - (_m)->m_pkthdr.header = NULL; \ + (_m)->m_pkthdr.pkt_hdr != NULL) \ + (_m)->m_pkthdr.pkt_hdr = NULL; \ _n = m_defrag_offset(_m, max_linkhdr, M_NOWAIT); \ if (_n == NULL) { \ atomic_add_32(&ipstat.ips_toosmall, 1); \ m_freem(_m); \ (_m) = NULL; \ - _action \ + _action; \ } else { \ VERIFY(_n != (_m)); \ (_m) = _n; \ @@ -398,90 +392,112 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW | CTLFLAG_LOCKED, } while (0) #endif /* !__i386__ && !__x86_64__ */ +/* + * GRE input handler function, settable via ip_gre_register_input() for PPTP. + */ +static gre_input_func_t gre_input_func; + /* * IP initialization: fill in IP protocol switch table. * All protocols not implemented in kernel go to raw IP protocol handler. */ void -ip_init(void) +ip_init(struct protosw *pp, struct domain *dp) { + static int ip_initialized = 0; struct protosw *pr; + struct timeval tv; int i; - static int ip_initialized = 0; - if (!ip_initialized) - { - PE_parse_boot_argn("net.inet.ip.scopedroute", - &ip_doscopedroute, sizeof (ip_doscopedroute)); - - in_ifaddr_init(); - - in_ifaddr_rwlock_grp_attr = lck_grp_attr_alloc_init(); - in_ifaddr_rwlock_grp = lck_grp_alloc_init("in_ifaddr_rwlock", - in_ifaddr_rwlock_grp_attr); - in_ifaddr_rwlock_attr = lck_attr_alloc_init(); - lck_rw_init(in_ifaddr_rwlock, in_ifaddr_rwlock_grp, - in_ifaddr_rwlock_attr); - - TAILQ_INIT(&in_ifaddrhead); - in_ifaddrhashtbl_init(); - - ip_moptions_init(); - - pr = pffindproto_locked(PF_INET, IPPROTO_RAW, SOCK_RAW); - if (pr == 0) - panic("ip_init"); - for (i = 0; i < IPPROTO_MAX; i++) - ip_protox[i] = pr; - for (pr = inetdomain.dom_protosw; pr; pr = pr->pr_next) { - if (pr->pr_domain == NULL) - continue; /* If uninitialized, skip */ - if (pr->pr_domain->dom_family == PF_INET && - pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) - ip_protox[pr->pr_protocol] = pr; - } - for (i = 0; i < IPREASS_NHASH; i++) - ipq[i].next = ipq[i].prev = &ipq[i]; + domain_proto_mtx_lock_assert_held(); + VERIFY((pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED); - maxnipq = nmbclusters / 32; - maxfrags = maxnipq * 2; - maxfragsperpacket = 128; /* enough for 64k in 512 byte fragments */ + /* ipq_alloc() uses mbufs for IP fragment queue structures */ + _CASSERT(sizeof (struct ipq) <= _MLEN); -#if RANDOM_IP_ID - { - struct timeval timenow; - getmicrotime(&timenow); - ip_id = timenow.tv_sec & 0xffff; - } -#endif - ipintrq.ifq_maxlen = ipqmaxlen; + /* + * Some ioctls (e.g. SIOCAIFADDR) use ifaliasreq struct, which is + * interchangeable with in_aliasreq; they must have the same size. + */ + _CASSERT(sizeof (struct ifaliasreq) == sizeof (struct in_aliasreq)); - ipf_init(); + if (ip_initialized) + return; + ip_initialized = 1; - ip_mutex_grp_attr = lck_grp_attr_alloc_init(); + PE_parse_boot_argn("net.inet.ip.scopedroute", + &ip_doscopedroute, sizeof (ip_doscopedroute)); - ip_mutex_grp = lck_grp_alloc_init("ip", ip_mutex_grp_attr); + in_ifaddr_init(); - ip_mutex_attr = lck_attr_alloc_init(); + in_ifaddr_rwlock_grp_attr = lck_grp_attr_alloc_init(); + in_ifaddr_rwlock_grp = lck_grp_alloc_init("in_ifaddr_rwlock", + in_ifaddr_rwlock_grp_attr); + in_ifaddr_rwlock_attr = lck_attr_alloc_init(); + lck_rw_init(in_ifaddr_rwlock, in_ifaddr_rwlock_grp, + in_ifaddr_rwlock_attr); - if ((ip_mutex = lck_mtx_alloc_init(ip_mutex_grp, ip_mutex_attr)) == NULL) { - printf("ip_init: can't alloc ip_mutex\n"); - return; + TAILQ_INIT(&in_ifaddrhead); + in_ifaddrhashtbl_init(); + + ip_moptions_init(); + + pr = pffindproto_locked(PF_INET, IPPROTO_RAW, SOCK_RAW); + if (pr == NULL) { + panic("%s: Unable to find [PF_INET,IPPROTO_RAW,SOCK_RAW]\n", + __func__); + /* NOTREACHED */ + } + + /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */ + for (i = 0; i < IPPROTO_MAX; i++) + ip_protox[i] = pr; + /* + * Cycle through IP protocols and put them into the appropriate place + * in ip_protox[], skipping protocols IPPROTO_{IP,RAW}. + */ + VERIFY(dp == inetdomain && dp->dom_family == PF_INET); + TAILQ_FOREACH(pr, &dp->dom_protosw, pr_entry) { + VERIFY(pr->pr_domain == dp); + if (pr->pr_protocol != 0 && pr->pr_protocol != IPPROTO_RAW) { + /* Be careful to only index valid IP protocols. */ + if (pr->pr_protocol < IPPROTO_MAX) + ip_protox[pr->pr_protocol] = pr; } + } -#if IPSEC - - sadb_stat_mutex_grp_attr = lck_grp_attr_alloc_init(); - sadb_stat_mutex_grp = lck_grp_alloc_init("sadb_stat", sadb_stat_mutex_grp_attr); - sadb_stat_mutex_attr = lck_attr_alloc_init(); + /* IP fragment reassembly queue lock */ + ipqlock_grp_attr = lck_grp_attr_alloc_init(); + ipqlock_grp = lck_grp_alloc_init("ipqlock", ipqlock_grp_attr); + ipqlock_attr = lck_attr_alloc_init(); + lck_mtx_init(&ipqlock, ipqlock_grp, ipqlock_attr); - lck_mtx_init(sadb_stat_mutex, sadb_stat_mutex_grp, sadb_stat_mutex_attr); + lck_mtx_lock(&ipqlock); + /* Initialize IP reassembly queue. */ + for (i = 0; i < IPREASS_NHASH; i++) + TAILQ_INIT(&ipq[i]); -#endif - arp_init(); + maxnipq = nmbclusters / 32; + maxfragsperpacket = 128; /* enough for 64k in 512 byte fragments */ + ipq_updateparams(); + lck_mtx_unlock(&ipqlock); - ip_initialized = 1; - } + getmicrotime(&tv); + ip_id = RandomULong() ^ tv.tv_usec; + ip_initid(); + + ipf_init(); + +#if IPSEC + sadb_stat_mutex_grp_attr = lck_grp_attr_alloc_init(); + sadb_stat_mutex_grp = lck_grp_alloc_init("sadb_stat", + sadb_stat_mutex_grp_attr); + sadb_stat_mutex_attr = lck_attr_alloc_init(); + lck_mtx_init(sadb_stat_mutex, sadb_stat_mutex_grp, + sadb_stat_mutex_attr); + +#endif + arp_init(); } /* @@ -495,7 +511,8 @@ in_ifaddrhashtbl_init(void) if (in_ifaddrhashtbl != NULL) return; - PE_parse_boot_argn("inaddr_nhash", &inaddr_nhash, sizeof (inaddr_nhash)); + PE_parse_boot_argn("inaddr_nhash", &inaddr_nhash, + sizeof (inaddr_nhash)); if (inaddr_nhash == 0) inaddr_nhash = INADDR_NHASH; @@ -536,53 +553,10 @@ inaddr_hashval(u_int32_t key) return (0); } -static void -ip_proto_input( - protocol_family_t __unused protocol, - mbuf_t packet_list) -{ - mbuf_t packet; - int how_many = 0 ; - - /* ip_input should handle a list of packets but does not yet */ - - for (packet = packet_list; packet; packet = packet_list) { - how_many++; - packet_list = mbuf_nextpkt(packet); - mbuf_setnextpkt(packet, NULL); - ip_input(packet); - } -} - -/* Initialize the PF_INET domain, and add in the pre-defined protos */ -void -in_dinit(void) -{ - int i; - struct protosw *pr; - struct domain *dp; - static int inetdomain_initted = 0; - - if (!inetdomain_initted) - { - dp = &inetdomain; - dp->dom_flags = DOM_REENTRANT; - - for (i=0, pr = &inetsw[0]; idom_mtx; - inetdomain_initted = 1; - - domain_proto_mtx_unlock(TRUE); - proto_register_input(PF_INET, ip_proto_input, NULL, 1); - domain_proto_mtx_lock(); - } -} - void ip_proto_dispatch_in_wrapper(struct mbuf *m, int hlen, u_int8_t proto) { - ip_proto_dispatch_in(m, hlen, proto, 0); + ip_proto_dispatch_in(m, hlen, proto, 0); } __private_extern__ void @@ -590,7 +564,7 @@ ip_proto_dispatch_in(struct mbuf *m, int hlen, u_int8_t proto, ipfilter_t inject_ipfref) { struct ipfilter *filter; - int seen = (inject_ipfref == 0); + int seen = (inject_ipfref == NULL); int changed_header = 0; struct ip *ip; void (*pr_input)(struct mbuf *, int len); @@ -611,18 +585,21 @@ ip_proto_dispatch_in(struct mbuf *m, int hlen, u_int8_t proto, * into filter(s). */ IP_HDR_ALIGNMENT_FIXUP(m, - m->m_pkthdr.rcvif, - ipf_unref(); return;); + m->m_pkthdr.rcvif, ipf_unref()); + + /* ipf_unref() already called */ + if (m == NULL) + return; changed_header = 1; ip = mtod(m, struct ip *); ip->ip_len = htons(ip->ip_len + hlen); ip->ip_off = htons(ip->ip_off); ip->ip_sum = 0; - ip->ip_sum = in_cksum(m, hlen); + ip->ip_sum = ip_cksum_hdr_in(m, hlen); } result = filter->ipf_filter.ipf_input( - filter->ipf_filter.cookie, (mbuf_t*)&m, + filter->ipf_filter.cookie, (mbuf_t *)&m, hlen, proto); if (result == EJUSTRETURN) { ipf_unref(); @@ -639,7 +616,7 @@ ip_proto_dispatch_in(struct mbuf *m, int hlen, u_int8_t proto, } /* Perform IP header alignment fixup (post-filters), if needed */ - IP_HDR_ALIGNMENT_FIXUP(m, m->m_pkthdr.rcvif, return;); + IP_HDR_ALIGNMENT_FIXUP(m, m->m_pkthdr.rcvif, return); /* * If there isn't a specific lock for the protocol @@ -672,9 +649,8 @@ void ip_input(struct mbuf *m) { struct ip *ip; - struct ipq *fp; struct in_ifaddr *ia = NULL; - unsigned int hlen, checkif; + unsigned int hlen, checkif; u_short sum = 0; struct in_addr pkt_dst; #if IPFIREWALL @@ -685,16 +661,21 @@ ip_input(struct mbuf *m) struct ip_fw_args args; struct m_tag *tag; #endif - ipfilter_t inject_filter_ref = 0; + ipfilter_t inject_filter_ref = NULL; + struct ifnet *inifp; /* Check if the mbuf is still valid after interface filter processing */ MBUF_INPUT_CHECK(m, m->m_pkthdr.rcvif); + inifp = m->m_pkthdr.rcvif; + VERIFY(inifp != NULL); /* Perform IP header alignment fixup, if needed */ - IP_HDR_ALIGNMENT_FIXUP(m, m->m_pkthdr.rcvif, goto bad;); + IP_HDR_ALIGNMENT_FIXUP(m, inifp, goto bad); + + m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED; #if IPFIREWALL || DUMMYNET - bzero(&args, sizeof(struct ip_fw_args)); + bzero(&args, sizeof (struct ip_fw_args)); /* * Don't bother searching for tag(s) if there's none. @@ -706,7 +687,7 @@ ip_input(struct mbuf *m) #if DUMMYNET if ((tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) { - struct dn_pkt_tag *dn_tag; + struct dn_pkt_tag *dn_tag; dn_tag = (struct dn_pkt_tag *)(tag+1); args.fwa_ipfw_rule = dn_tag->dn_ipfw_rule; @@ -719,7 +700,7 @@ ip_input(struct mbuf *m) #if IPDIVERT if ((tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DIVERT, NULL)) != NULL) { - struct divert_tag *div_tag; + struct divert_tag *div_tag; div_tag = (struct divert_tag *)(tag+1); args.fwa_divert_rule = div_tag->cookie; @@ -730,7 +711,7 @@ ip_input(struct mbuf *m) if ((tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, NULL)) != NULL) { - struct ip_fwd_tag *ipfwd_tag; + struct ip_fwd_tag *ipfwd_tag; ipfwd_tag = (struct ip_fwd_tag *)(tag+1); args.fwa_next_hop = ipfwd_tag->next_hop; @@ -739,12 +720,13 @@ ip_input(struct mbuf *m) } #if DIAGNOSTIC - if (m == NULL || (m->m_flags & M_PKTHDR) == 0) + if (m == NULL || !(m->m_flags & M_PKTHDR)) panic("ip_input no HDR"); #endif #if DUMMYNET - if (args.fwa_ipfw_rule || args.fwa_pf_rule) { /* dummynet already filtered us */ + if (args.fwa_ipfw_rule || args.fwa_pf_rule) { + /* dummynet already filtered us */ ip = mtod(m, struct ip *); hlen = IP_VHL_HL(ip->ip_vhl) << 2; inject_filter_ref = ipf_get_inject_filter(m); @@ -757,21 +739,21 @@ ip_input(struct mbuf *m) } #endif /* DUMMYNET */ ipfw_tags_done: -#endif /* IPFIREWALL || DUMMYNET*/ +#endif /* IPFIREWALL || DUMMYNET */ /* * No need to process packet twice if we've already seen it. */ if (!SLIST_EMPTY(&m->m_pkthdr.tags)) inject_filter_ref = ipf_get_inject_filter(m); - if (inject_filter_ref != 0) { + if (inject_filter_ref != NULL) { ip = mtod(m, struct ip *); hlen = IP_VHL_HL(ip->ip_vhl) << 2; - DTRACE_IP6(receive, struct mbuf *, m, struct inpcb *, NULL, - struct ip *, ip, struct ifnet *, m->m_pkthdr.rcvif, - struct ip *, ip, struct ip6_hdr *, NULL); - + DTRACE_IP6(receive, struct mbuf *, m, struct inpcb *, NULL, + struct ip *, ip, struct ifnet *, inifp, + struct ip *, ip, struct ip6_hdr *, NULL); + ip->ip_len = ntohs(ip->ip_len) - hlen; ip->ip_off = ntohs(ip->ip_off); ip_proto_dispatch_in(m, hlen, ip->ip_p, inject_filter_ref); @@ -779,18 +761,18 @@ ipfw_tags_done: } OSAddAtomic(1, &ipstat.ips_total); - if (m->m_pkthdr.len < sizeof(struct ip)) + if (m->m_pkthdr.len < sizeof (struct ip)) goto tooshort; if (m->m_len < sizeof (struct ip) && - (m = m_pullup(m, sizeof (struct ip))) == 0) { + (m = m_pullup(m, sizeof (struct ip))) == NULL) { OSAddAtomic(1, &ipstat.ips_toosmall); return; } ip = mtod(m, struct ip *); - KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr, - ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len); + KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr, ip->ip_src.s_addr, + ip->ip_p, ip->ip_off, ip->ip_len); if (IP_VHL_V(ip->ip_vhl) != IPVERSION) { OSAddAtomic(1, &ipstat.ips_badvers); @@ -798,12 +780,12 @@ ipfw_tags_done: } hlen = IP_VHL_HL(ip->ip_vhl) << 2; - if (hlen < sizeof(struct ip)) { /* minimum header length */ + if (hlen < sizeof (struct ip)) { /* minimum header length */ OSAddAtomic(1, &ipstat.ips_badhlen); goto bad; } if (hlen > m->m_len) { - if ((m = m_pullup(m, hlen)) == 0) { + if ((m = m_pullup(m, hlen)) == NULL) { OSAddAtomic(1, &ipstat.ips_badhlen); return; } @@ -813,14 +795,28 @@ ipfw_tags_done: /* 127/8 must not appear on wire - RFC1122 */ if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { - if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) { + /* + * Allow for the following exceptions: + * + * 1. If the packet was sent to loopback (i.e. rcvif + * would have been set earlier at output time.) + * + * 2. If the packet was sent out on loopback from a local + * source address which belongs to a non-loopback + * interface (i.e. rcvif may not necessarily be a + * loopback interface, hence the test for PKTF_LOOP.) + * Unlike IPv6, there is no interface scope ID, and + * therefore we don't care so much about PKTF_IFINFO. + */ + if (!(inifp->if_flags & IFF_LOOPBACK) && + !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) { OSAddAtomic(1, &ipstat.ips_badaddr); goto bad; } } - /* IPv4 Link-Local Addresses as defined in */ - if ((IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) || + /* IPv4 Link-Local Addresses as defined in RFC3927 */ + if ((IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) || IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)))) { ip_linklocal_stat.iplls_in_total++; if (ip->ip_ttl != MAXTTL) { @@ -836,9 +832,9 @@ ipfw_tags_done: goto bad; } - DTRACE_IP6(receive, struct mbuf *, m, struct inpcb *, NULL, - struct ip *, ip, struct ifnet *, m->m_pkthdr.rcvif, - struct ip *, ip, struct ip6_hdr *, NULL); + DTRACE_IP6(receive, struct mbuf *, m, struct inpcb *, NULL, + struct ip *, ip, struct ifnet *, inifp, + struct ip *, ip, struct ip6_hdr *, NULL); /* * Naively assume we can attribute inbound data to the route we would @@ -853,7 +849,7 @@ ipfw_tags_done: */ if (nstat_collect) { struct rtentry *rt = - ifnet_cached_rtlookup_inet(m->m_pkthdr.rcvif, ip->ip_src); + ifnet_cached_rtlookup_inet(inifp, ip->ip_src); if (rt != NULL) { nstat_route_rx(rt, 1, m->m_pkthdr.len, 0); rtfree(rt); @@ -866,7 +862,7 @@ ipfw_tags_done: #if BYTE_ORDER != BIG_ENDIAN NTOHS(ip->ip_len); #endif - + if (ip->ip_len < hlen) { OSAddAtomic(1, &ipstat.ips_badlen); goto bad; @@ -887,10 +883,22 @@ tooshort: goto bad; } if (m->m_pkthdr.len > ip->ip_len) { - /* Invalidate hwcksuming */ - m->m_pkthdr.csum_flags = 0; - m->m_pkthdr.csum_data = 0; + /* + * Invalidate hardware checksum info if ip_adj_clear_hwcksum + * is set; useful to handle buggy drivers. Note that this + * should not be enabled by default, as we may get here due + * to link-layer padding. + */ + if (ip_adj_clear_hwcksum && + (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) && + !(inifp->if_flags & IFF_LOOPBACK) && + !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) { + m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID; + m->m_pkthdr.csum_data = 0; + ipstat.ips_adj_hwcsum_clr++; + } + ipstat.ips_adj++; if (m->m_len == m->m_pkthdr.len) { m->m_len = ip->ip_len; m->m_pkthdr.len = ip->ip_len; @@ -898,6 +906,8 @@ tooshort: m_adj(m, ip->ip_len - m->m_pkthdr.len); } + /* for consistency */ + m->m_pkthdr.pkt_proto = ip->ip_p; #if DUMMYNET check_with_pf: @@ -907,13 +917,14 @@ check_with_pf: if (PF_IS_ENABLED) { int error; #if DUMMYNET - error = pf_af_hook(m->m_pkthdr.rcvif, NULL, &m, AF_INET, TRUE, &args); + error = pf_af_hook(inifp, NULL, &m, AF_INET, TRUE, &args); #else - error = pf_af_hook(m->m_pkthdr.rcvif, NULL, &m, AF_INET, TRUE, NULL); + error = pf_af_hook(inifp, NULL, &m, AF_INET, TRUE, NULL); #endif /* DUMMYNET */ if (error != 0 || m == NULL) { if (m != NULL) { - panic("%s: unexpected packet %p\n", __func__, m); + panic("%s: unexpected packet %p\n", + __func__, m); /* NOTREACHED */ } /* Already freed by callee */ @@ -952,20 +963,21 @@ iphack: i = ip_fw_chk_ptr(&args); m = args.fwa_m; - if ( (i & IP_FW_PORT_DENY_FLAG) || m == NULL) { /* drop */ + if ((i & IP_FW_PORT_DENY_FLAG) || m == NULL) { /* drop */ if (m) m_freem(m); return; } ip = mtod(m, struct ip *); /* just in case m changed */ - - if (i == 0 && args.fwa_next_hop == NULL) { /* common case */ + + if (i == 0 && args.fwa_next_hop == NULL) { /* common case */ goto pass; } #if DUMMYNET - if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG) != 0) { + if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG) != 0) { /* Send packet to the appropriate pipe */ - ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args, DN_CLIENT_IPFW); + ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args, + DN_CLIENT_IPFW); return; } #endif /* DUMMYNET */ @@ -988,8 +1000,9 @@ iphack: return; } #endif /* IPFIREWALL */ +#if IPSEC | IPFIREWALL pass: - +#endif /* * Process options and, if not destined for us, * ship it on. ip_dooptions returns 1 when an @@ -998,21 +1011,27 @@ pass: */ ip_nhops = 0; /* for source routed packets */ #if IPFIREWALL - if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, args.fwa_next_hop)) { -#else + if (hlen > sizeof (struct ip) && + ip_dooptions(m, 0, args.fwa_next_hop)) { +#else /* !IPFIREWALL */ if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, NULL)) { -#endif +#endif /* !IPFIREWALL */ return; } - /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no - * matter if it is destined to another node, or whether it is - * a multicast one, RSVP wants it! and prevents it from being forwarded - * anywhere else. Also checks if the rsvp daemon is running before +#if MROUTING + /* + * greedy RSVP, snatches any PATH packet of the RSVP protocol and no + * matter if it is destined to another node, or whether it is + * a multicast one, RSVP wants it! and prevents it from being forwarded + * anywhere else. Also checks if the rsvp daemon is running before * grabbing the packet. - */ - if (rsvp_on && ip->ip_p==IPPROTO_RSVP) + */ + if (rsvp_on && ip->ip_p == IPPROTO_RSVP) { + ip_setdstifaddr_info(m, inifp->if_index, NULL); goto ours; + } +#endif /* MROUTING */ /* * Check our list of addresses, to see if the packet is for us. @@ -1020,9 +1039,10 @@ pass: * we receive might be for us (and let the upper layers deal * with it). */ - if (TAILQ_EMPTY(&in_ifaddrhead) && - (m->m_flags & (M_MCAST|M_BCAST)) == 0) + if (TAILQ_EMPTY(&in_ifaddrhead) && !(m->m_flags & (M_MCAST|M_BCAST))) { + ip_setdstifaddr_info(m, inifp->if_index, NULL); goto ours; + } /* * Cache the destination address of the packet; this may be @@ -1031,9 +1051,9 @@ pass: #if IPFIREWALL pkt_dst = args.fwa_next_hop == NULL ? ip->ip_dst : args.fwa_next_hop->sin_addr; -#else +#else /* !IPFIREWALL */ pkt_dst = ip->ip_dst; -#endif +#endif /* !IPFIREWALL */ /* * Enable a consistency check between the destination address @@ -1049,13 +1069,14 @@ pass: * to the loopback interface instead of the interface where * the packets are received. */ - checkif = ip_checkinterface && (ipforwarding == 0) && - ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) + checkif = ip_checkinterface && (ipforwarding == 0) && + !(inifp->if_flags & IFF_LOOPBACK) && + !(m->m_pkthdr.pkt_flags & PKTF_LOOP) #if IPFIREWALL && (args.fwa_next_hop == NULL); -#else +#else /* !IPFIREWALL */ ; -#endif +#endif /* !IPFIREWALL */ /* * Check for exact addresses in the hash bucket. @@ -1067,14 +1088,12 @@ pass: * arrived via the correct interface if checking is * enabled. */ - IFA_LOCK_SPIN(&ia->ia_ifa); - if (IA_SIN(ia)->sin_addr.s_addr == pkt_dst.s_addr && - (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif)) { - IFA_UNLOCK(&ia->ia_ifa); + if (IA_SIN(ia)->sin_addr.s_addr == pkt_dst.s_addr && + (!checkif || ia->ia_ifp == inifp)) { + ip_setdstifaddr_info(m, 0, ia); lck_rw_done(in_ifaddr_rwlock); goto ours; } - IFA_UNLOCK(&ia->ia_ifa); } lck_rw_done(in_ifaddr_rwlock); @@ -1086,32 +1105,28 @@ pass: * handled via ip_forward() and ether_frameout() with the loopback * into the stack for SIMPLEX interfaces handled by ether_frameout(). */ - if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) { + if (inifp->if_flags & IFF_BROADCAST) { struct ifaddr *ifa; - struct ifnet *ifp = m->m_pkthdr.rcvif; - ifnet_lock_shared(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - IFA_LOCK_SPIN(ifa); + + ifnet_lock_shared(inifp); + TAILQ_FOREACH(ifa, &inifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) { - IFA_UNLOCK(ifa); continue; } ia = ifatoia(ifa); if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == pkt_dst.s_addr || ia->ia_netbroadcast.s_addr == pkt_dst.s_addr) { - IFA_UNLOCK(ifa); - ifnet_lock_done(ifp); + ip_setdstifaddr_info(m, 0, ia); + ifnet_lock_done(inifp); goto ours; } - IFA_UNLOCK(ifa); } - ifnet_lock_done(ifp); + ifnet_lock_done(inifp); } if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { struct in_multi *inm; - struct ifnet *ifp = m->m_pkthdr.rcvif; #if MROUTING if (ip_mrouter) { /* @@ -1122,11 +1137,9 @@ pass: * ip_mforward() returns a non-zero value, the packet * must be discarded, else it may be accepted below. */ - lck_mtx_lock(ip_mutex); - if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) { + if (ip_mforward && ip_mforward(ip, inifp, m, 0) != 0) { OSAddAtomic(1, &ipstat.ips_cantforward); m_freem(m); - lck_mtx_unlock(ip_mutex); return; } @@ -1135,8 +1148,10 @@ pass: * all multicast IGMP packets, whether or not this * host belongs to their destination groups. */ - if (ip->ip_p == IPPROTO_IGMP) + if (ip->ip_p == IPPROTO_IGMP) { + ip_setdstifaddr_info(m, inifp->if_index, NULL); goto ours; + } OSAddAtomic(1, &ipstat.ips_forward); } #endif /* MROUTING */ @@ -1145,34 +1160,36 @@ pass: * arrival interface. */ in_multihead_lock_shared(); - IN_LOOKUP_MULTI(&ip->ip_dst, ifp, inm); + IN_LOOKUP_MULTI(&ip->ip_dst, inifp, inm); in_multihead_lock_done(); if (inm == NULL) { OSAddAtomic(1, &ipstat.ips_notmember); m_freem(m); return; } + ip_setdstifaddr_info(m, inifp->if_index, NULL); INM_REMREF(inm); goto ours; } - if (ip->ip_dst.s_addr == (u_int32_t)INADDR_BROADCAST) - goto ours; - if (ip->ip_dst.s_addr == INADDR_ANY) + if (ip->ip_dst.s_addr == (u_int32_t)INADDR_BROADCAST || + ip->ip_dst.s_addr == INADDR_ANY) { + ip_setdstifaddr_info(m, inifp->if_index, NULL); goto ours; + } /* Allow DHCP/BootP responses through */ - if (m->m_pkthdr.rcvif != NULL - && (m->m_pkthdr.rcvif->if_eflags & IFEF_AUTOCONFIGURING) - && hlen == sizeof(struct ip) - && ip->ip_p == IPPROTO_UDP) { + if ((inifp->if_eflags & IFEF_AUTOCONFIGURING) && + hlen == sizeof (struct ip) && ip->ip_p == IPPROTO_UDP) { struct udpiphdr *ui; - if (m->m_len < sizeof(struct udpiphdr) - && (m = m_pullup(m, sizeof(struct udpiphdr))) == 0) { + + if (m->m_len < sizeof (struct udpiphdr) && + (m = m_pullup(m, sizeof (struct udpiphdr))) == NULL) { OSAddAtomic(1, &udpstat.udps_hdrops); return; } ui = mtod(m, struct udpiphdr *); if (ntohs(ui->ui_dport) == IPPORT_BOOTPC) { + ip_setdstifaddr_info(m, inifp->if_index, NULL); goto ours; } ip = mtod(m, struct ip *); /* in case it changed */ @@ -1196,147 +1213,44 @@ pass: ours: /* * If offset or IP_MF are set, must reassemble. - * Otherwise, nothing need be done. - * (We could look in the reassembly queue to see - * if the packet was previously fragmented, - * but it's not worth the time; just let them time out.) */ - if (ip->ip_off & (IP_MF | IP_OFFMASK | IP_RF)) { - - /* If maxnipq is 0, never accept fragments. */ - if (maxnipq == 0) { - - OSAddAtomic(1, &ipstat.ips_fragments); - OSAddAtomic(1, &ipstat.ips_fragdropped); - goto bad; - } - - /* - * If we will exceed the number of fragments in queues, timeout the - * oldest fragemented packet to make space. - */ - lck_mtx_lock(ip_mutex); - if (currentfrags >= maxfrags) { - fp = TAILQ_LAST(&ipq_list, ipq_list); - OSAddAtomic(fp->ipq_nfrags, &ipstat.ips_fragtimeout); - - if (ip->ip_id == fp->ipq_id && - ip->ip_src.s_addr == fp->ipq_src.s_addr && - ip->ip_dst.s_addr == fp->ipq_dst.s_addr && - ip->ip_p == fp->ipq_p) { - /* - * If we match the fragment queue we were going to - * discard, drop this packet too. - */ - OSAddAtomic(1, &ipstat.ips_fragdropped); - ip_freef(fp); - lck_mtx_unlock(ip_mutex); - goto bad; - } - - ip_freef(fp); - } - - sum = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id); - /* - * Look for queue of fragments - * of this datagram. - */ - for (fp = ipq[sum].next; fp != &ipq[sum]; fp = fp->next) - if (ip->ip_id == fp->ipq_id && - ip->ip_src.s_addr == fp->ipq_src.s_addr && - ip->ip_dst.s_addr == fp->ipq_dst.s_addr && -#if CONFIG_MACF_NET - mac_ipq_label_compare(m, fp) && -#endif - ip->ip_p == fp->ipq_p) - goto found; - - /* - * Enforce upper bound on number of fragmented packets - * for which we attempt reassembly; - * If maxnipq is -1, accept all fragments without limitation. - */ - if ((nipq > maxnipq) && (maxnipq > 0)) { - /* - * drop the oldest fragment before proceeding further - */ - fp = TAILQ_LAST(&ipq_list, ipq_list); - OSAddAtomic(fp->ipq_nfrags, &ipstat.ips_fragtimeout); - ip_freef(fp); - } - - fp = NULL; - -found: - /* - * Adjust ip_len to not reflect header, - * convert offset of this to bytes. - */ - ip->ip_len -= hlen; - if (ip->ip_off & IP_MF) { - /* - * Make sure that fragments have a data length - * that's a non-zero multiple of 8 bytes. - */ - if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) { - OSAddAtomic(1, &ipstat.ips_toosmall); - lck_mtx_unlock(ip_mutex); - goto bad; - } - m->m_flags |= M_FRAG; - } else { - /* Clear the flag in case packet comes from loopback */ - m->m_flags &= ~M_FRAG; - } - ip->ip_off <<= 3; - + if (ip->ip_off & ~(IP_DF | IP_RF)) { /* - * Attempt reassembly; if it succeeds, proceed. * ip_reass() will return a different mbuf, and update * the divert info in div_info and args.fwa_divert_rule. */ - OSAddAtomic(1, &ipstat.ips_fragments); - m->m_pkthdr.header = ip; #if IPDIVERT - m = ip_reass(m, fp, &ipq[sum], - (u_int16_t *)&div_info, &args.fwa_divert_rule); + m = ip_reass(m, (u_int16_t *)&div_info, &args.fwa_divert_rule); #else - m = ip_reass(m, fp, &ipq[sum]); + m = ip_reass(m); #endif - if (m == 0) { - lck_mtx_unlock(ip_mutex); - return; - } - OSAddAtomic(1, &ipstat.ips_reassembled); - ip = mtod(m, struct ip *); - /* Get the header length of the reassembled packet */ - hlen = IP_VHL_HL(ip->ip_vhl) << 2; - + if (m == NULL) + return; + ip = mtod(m, struct ip *); + /* Get the header length of the reassembled packet */ + hlen = IP_VHL_HL(ip->ip_vhl) << 2; #if IPDIVERT - /* Restore original checksum before diverting packet */ - if (div_info != 0) { - ip->ip_len += hlen; - + /* Restore original checksum before diverting packet */ + if (div_info != 0) { #if BYTE_ORDER != BIG_ENDIAN - HTONS(ip->ip_len); - HTONS(ip->ip_off); + HTONS(ip->ip_len); + HTONS(ip->ip_off); #endif - - ip->ip_sum = 0; - ip->ip_sum = in_cksum(m, hlen); - + ip->ip_sum = 0; + ip->ip_sum = ip_cksum_hdr_in(m, hlen); #if BYTE_ORDER != BIG_ENDIAN - NTOHS(ip->ip_off); - NTOHS(ip->ip_len); + NTOHS(ip->ip_off); + NTOHS(ip->ip_len); #endif - - ip->ip_len -= hlen; - } + } #endif - lck_mtx_unlock(ip_mutex); - } else - ip->ip_len -= hlen; + } + + /* + * Further protocols expect the packet length to be w/o the + * IP header. + */ + ip->ip_len -= hlen; #if IPDIVERT /* @@ -1349,7 +1263,7 @@ found: struct mbuf *clone = NULL; /* Clone packet if we're doing a 'tee' */ - if ((div_info & IP_FW_PORT_TEE_FLAG) != 0) + if (div_info & IP_FW_PORT_TEE_FLAG) clone = m_dup(m, M_DONTWAIT); /* Restore packet header fields to original values */ @@ -1378,69 +1292,136 @@ found: * note that we do not visit this with protocols with pcb layer * code - like udp/tcp/raw ip. */ - if (ipsec_bypass == 0 && (ip_protox[ip->ip_p]->pr_flags & PR_LASTHDR) != 0) { + if (ipsec_bypass == 0 && (ip_protox[ip->ip_p]->pr_flags & PR_LASTHDR)) { if (ipsec4_in_reject(m, NULL)) { IPSEC_STAT_INCREMENT(ipsecstat.in_polvio); - goto bad; + goto bad; } } -#endif +#endif /* IPSEC */ /* * Switch out to protocol's input routine. */ OSAddAtomic(1, &ipstat.ips_delivered); - { + #if IPFIREWALL - if (args.fwa_next_hop && ip->ip_p == IPPROTO_TCP) { - /* TCP needs IPFORWARD info if available */ - struct m_tag *fwd_tag; - struct ip_fwd_tag *ipfwd_tag; - - fwd_tag = m_tag_create(KERNEL_MODULE_TAG_ID, - KERNEL_TAG_TYPE_IPFORWARD, sizeof (*ipfwd_tag), - M_NOWAIT, m); - if (fwd_tag == NULL) { - goto bad; - } - - ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1); - ipfwd_tag->next_hop = args.fwa_next_hop; - - m_tag_prepend(m, fwd_tag); - - KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr, - ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len); - - if (sw_lro) { - m = tcp_lro(m, hlen); - if (m == NULL) - return; - } - /* TCP deals with its own locking */ - ip_proto_dispatch_in(m, hlen, ip->ip_p, 0); - } else { - KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr, - ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len); - - ip_proto_dispatch_in(m, hlen, ip->ip_p, 0); - } -#else + if (args.fwa_next_hop && ip->ip_p == IPPROTO_TCP) { + /* TCP needs IPFORWARD info if available */ + struct m_tag *fwd_tag; + struct ip_fwd_tag *ipfwd_tag; + + fwd_tag = m_tag_create(KERNEL_MODULE_TAG_ID, + KERNEL_TAG_TYPE_IPFORWARD, sizeof (*ipfwd_tag), + M_NOWAIT, m); + if (fwd_tag == NULL) + goto bad; + + ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1); + ipfwd_tag->next_hop = args.fwa_next_hop; + + m_tag_prepend(m, fwd_tag); + + KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr, + ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len); + + /* TCP deals with its own locking */ + ip_proto_dispatch_in(m, hlen, ip->ip_p, 0); + } else { + KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr, + ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len); + if ((sw_lro) && (ip->ip_p == IPPROTO_TCP)) { m = tcp_lro(m, hlen); if (m == NULL) return; } + ip_proto_dispatch_in(m, hlen, ip->ip_p, 0); -#endif - - return; } +#else /* !IPFIREWALL */ + if ((sw_lro) && (ip->ip_p == IPPROTO_TCP)) { + m = tcp_lro(m, hlen); + if (m == NULL) + return; + } + ip_proto_dispatch_in(m, hlen, ip->ip_p, 0); +#endif /* !IPFIREWALL */ + return; + bad: - KERNEL_DEBUG(DBG_LAYER_END, 0,0,0,0,0); + KERNEL_DEBUG(DBG_LAYER_END, 0, 0, 0, 0, 0); m_freem(m); } +static void +ipq_updateparams(void) +{ + lck_mtx_assert(&ipqlock, LCK_MTX_ASSERT_OWNED); + /* + * -1 for unlimited allocation. + */ + if (maxnipq < 0) + ipq_limit = 0; + /* + * Positive number for specific bound. + */ + if (maxnipq > 0) + ipq_limit = maxnipq; + /* + * Zero specifies no further fragment queue allocation -- set the + * bound very low, but rely on implementation elsewhere to actually + * prevent allocation and reclaim current queues. + */ + if (maxnipq == 0) + ipq_limit = 1; + /* + * Arm the purge timer if not already and if there's work to do + */ + frag_sched_timeout(); +} + +static int +sysctl_maxnipq SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int error, i; + + lck_mtx_lock(&ipqlock); + i = maxnipq; + error = sysctl_handle_int(oidp, &i, 0, req); + if (error || req->newptr == USER_ADDR_NULL) + goto done; + /* impose bounds */ + if (i < -1 || i > (nmbclusters / 4)) { + error = EINVAL; + goto done; + } + maxnipq = i; + ipq_updateparams(); +done: + lck_mtx_unlock(&ipqlock); + return (error); +} + +static int +sysctl_maxfragsperpacket SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int error, i; + + lck_mtx_lock(&ipqlock); + i = maxfragsperpacket; + error = sysctl_handle_int(oidp, &i, 0, req); + if (error || req->newptr == USER_ADDR_NULL) + goto done; + maxfragsperpacket = i; + ipq_updateparams(); /* see if we need to arm timer */ +done: + lck_mtx_unlock(&ipqlock); + return (error); +} + /* * Take incoming datagram fragment and try to reassemble it into * whole datagram. If a chain for reassembly of this datagram already @@ -1448,29 +1429,176 @@ bad: * * When IPDIVERT enabled, keep additional state with each packet that * tells us if we need to divert or tee the packet we're building. + * + * The IP header is *NOT* adjusted out of iplen. */ - static struct mbuf * #if IPDIVERT -ip_reass(struct mbuf *m, struct ipq *fp, struct ipq *where, +ip_reass(struct mbuf *m, #ifdef IPDIVERT_44 - u_int32_t *divinfo, + u_int32_t *divinfo, #else /* IPDIVERT_44 */ - u_int16_t *divinfo, + u_int16_t *divinfo, #endif /* IPDIVERT_44 */ - u_int16_t *divcookie) + u_int16_t *divcookie) #else /* IPDIVERT */ -ip_reass(struct mbuf *m, struct ipq *fp, struct ipq *where) +ip_reass(struct mbuf *m) #endif /* IPDIVERT */ { - struct ip *ip = mtod(m, struct ip *); - struct mbuf *p = 0, *q, *nq; - struct mbuf *t; - int hlen = IP_VHL_HL(ip->ip_vhl) << 2; - int i, next; + struct ip *ip; + struct mbuf *p, *q, *nq, *t; + struct ipq *fp = NULL; + struct ipqhead *head; + int i, hlen, next; u_int8_t ecn, ecn0; + uint32_t csum, csum_flags; + uint16_t hash; + struct fq_head dfq; + + MBUFQ_INIT(&dfq); /* for deferred frees */ + + /* If maxnipq or maxfragsperpacket is 0, never accept fragments. */ + if (maxnipq == 0 || maxfragsperpacket == 0) { + ipstat.ips_fragments++; + ipstat.ips_fragdropped++; + m_freem(m); + if (nipq > 0) { + lck_mtx_lock(&ipqlock); + frag_sched_timeout(); /* purge stale fragments */ + lck_mtx_unlock(&ipqlock); + } + return (NULL); + } + + ip = mtod(m, struct ip *); + hlen = IP_VHL_HL(ip->ip_vhl) << 2; + + lck_mtx_lock(&ipqlock); + + hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id); + head = &ipq[hash]; + + /* + * Look for queue of fragments + * of this datagram. + */ + TAILQ_FOREACH(fp, head, ipq_list) { + if (ip->ip_id == fp->ipq_id && + ip->ip_src.s_addr == fp->ipq_src.s_addr && + ip->ip_dst.s_addr == fp->ipq_dst.s_addr && +#if CONFIG_MACF_NET + mac_ipq_label_compare(m, fp) && +#endif + ip->ip_p == fp->ipq_p) + goto found; + } + + fp = NULL; + + /* + * Attempt to trim the number of allocated fragment queues if it + * exceeds the administrative limit. + */ + if ((nipq > (unsigned)maxnipq) && (maxnipq > 0)) { + /* + * drop something from the tail of the current queue + * before proceeding further + */ + struct ipq *fq = TAILQ_LAST(head, ipqhead); + if (fq == NULL) { /* gak */ + for (i = 0; i < IPREASS_NHASH; i++) { + struct ipq *r = TAILQ_LAST(&ipq[i], ipqhead); + if (r) { + ipstat.ips_fragtimeout += r->ipq_nfrags; + frag_freef(&ipq[i], r); + break; + } + } + } else { + ipstat.ips_fragtimeout += fq->ipq_nfrags; + frag_freef(head, fq); + } + } + +found: + /* + * Leverage partial checksum offload for IP fragments. Narrow down + * the scope to cover only UDP without IP options, as that is the + * most common case. + * + * Perform 1's complement adjustment of octets that got included/ + * excluded in the hardware-calculated checksum value. Ignore cases + * where the value includes or excludes the IP header span, as the + * sum for those octets would already be 0xffff and thus no-op. + */ + if (ip->ip_p == IPPROTO_UDP && hlen == sizeof (struct ip) && + (m->m_pkthdr.csum_flags & + (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) == + (CSUM_DATA_VALID | CSUM_PARTIAL)) { + uint32_t start; + + start = m->m_pkthdr.csum_rx_start; + csum = m->m_pkthdr.csum_rx_val; - lck_mtx_assert(ip_mutex, LCK_MTX_ASSERT_OWNED); + if (start != 0 && start != hlen) { +#if BYTE_ORDER != BIG_ENDIAN + if (start < hlen) { + HTONS(ip->ip_len); + HTONS(ip->ip_off); + } +#endif + /* callee folds in sum */ + csum = m_adj_sum16(m, start, hlen, csum); +#if BYTE_ORDER != BIG_ENDIAN + if (start < hlen) { + NTOHS(ip->ip_off); + NTOHS(ip->ip_len); + } +#endif + } + csum_flags = m->m_pkthdr.csum_flags; + } else { + csum = 0; + csum_flags = 0; + } + + /* Invalidate checksum */ + m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID; + + ipstat.ips_fragments++; + + /* + * Adjust ip_len to not reflect header, + * convert offset of this to bytes. + */ + ip->ip_len -= hlen; + if (ip->ip_off & IP_MF) { + /* + * Make sure that fragments have a data length + * that's a non-zero multiple of 8 bytes. + */ + if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) { + OSAddAtomic(1, &ipstat.ips_toosmall); + /* + * Reassembly queue may have been found if previous + * fragments were valid; given that this one is bad, + * we need to drop it. Make sure to set fp to NULL + * if not already, since we don't want to decrement + * ipq_nfrags as it doesn't include this packet. + */ + fp = NULL; + goto dropfrag; + } + m->m_flags |= M_FRAG; + } else { + /* Clear the flag in case packet comes from loopback */ + m->m_flags &= ~M_FRAG; + } + ip->ip_off <<= 3; + + m->m_pkthdr.pkt_hdr = ip; + + /* Previous ip_reass() started here. */ /* * Presence of header sizes in mbufs * would confuse code below. @@ -1478,24 +1606,22 @@ ip_reass(struct mbuf *m, struct ipq *fp, struct ipq *where) m->m_data += hlen; m->m_len -= hlen; - if (m->m_pkthdr.csum_flags & CSUM_TCP_SUM16) - m->m_pkthdr.csum_flags = 0; /* * If first fragment to arrive, create a reassembly queue. */ - if (fp == 0) { - if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL) + if (fp == NULL) { + fp = ipq_alloc(M_DONTWAIT); + if (fp == NULL) goto dropfrag; - fp = mtod(t, struct ipq *); #if CONFIG_MACF_NET if (mac_ipq_label_init(fp, M_NOWAIT) != 0) { - m_free(t); + ipq_free(fp); fp = NULL; goto dropfrag; } mac_ipq_label_associate(m, fp); #endif - insque((void*)fp, (void*)where); + TAILQ_INSERT_HEAD(head, fp, ipq_list); nipq++; fp->ipq_nfrags = 1; fp->ipq_ttl = IPFRAGTTL; @@ -1505,16 +1631,32 @@ ip_reass(struct mbuf *m, struct ipq *fp, struct ipq *where) fp->ipq_dst = ip->ip_dst; fp->ipq_frags = m; m->m_nextpkt = NULL; + /* + * If the first fragment has valid checksum offload + * info, the rest of fragments are eligible as well. + */ + if (csum_flags != 0) { + fp->ipq_csum = csum; + fp->ipq_csum_flags = csum_flags; + } #if IPDIVERT + /* + * Transfer firewall instructions to the fragment structure. + * Only trust info in the fragment at offset 0. + */ + if (ip->ip_off == 0) { #ifdef IPDIVERT_44 - fp->ipq_div_info = 0; + fp->ipq_div_info = *divinfo; #else - fp->ipq_divert = 0; + fp->ipq_divert = *divinfo; #endif - fp->ipq_div_cookie = 0; -#endif - TAILQ_INSERT_HEAD(&ipq_list, fp, ipq_list); - goto inserted; + fp->ipq_div_cookie = *divcookie; + } + *divinfo = 0; + *divcookie = 0; +#endif /* IPDIVERT */ + m = NULL; /* nothing to return */ + goto done; } else { fp->ipq_nfrags++; #if CONFIG_MACF_NET @@ -1522,7 +1664,7 @@ ip_reass(struct mbuf *m, struct ipq *fp, struct ipq *where) #endif } -#define GETIP(m) ((struct ip*)((m)->m_pkthdr.header)) +#define GETIP(m) ((struct ip *)((m)->m_pkthdr.pkt_hdr)) /* * Handle ECN by comparing this segment with the first one; @@ -1553,7 +1695,7 @@ ip_reass(struct mbuf *m, struct ipq *fp, struct ipq *where) * segment. If it provides all of our data, drop us, otherwise * stick new segment in the proper place. * - * If some of the data is dropped from the the preceding + * If some of the data is dropped from the preceding * segment, then it's checksum is invalidated. */ if (p) { @@ -1562,7 +1704,7 @@ ip_reass(struct mbuf *m, struct ipq *fp, struct ipq *where) if (i >= ip->ip_len) goto dropfrag; m_adj(m, i); - m->m_pkthdr.csum_flags = 0; + fp->ipq_csum_flags = 0; ip->ip_off += i; ip->ip_len -= i; } @@ -1578,25 +1720,32 @@ ip_reass(struct mbuf *m, struct ipq *fp, struct ipq *where) * if they are completely covered, dequeue them. */ for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off; - q = nq) { - i = (ip->ip_off + ip->ip_len) - - GETIP(q)->ip_off; + q = nq) { + i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off; if (i < GETIP(q)->ip_len) { GETIP(q)->ip_len -= i; GETIP(q)->ip_off += i; m_adj(q, i); - q->m_pkthdr.csum_flags = 0; + fp->ipq_csum_flags = 0; break; } nq = q->m_nextpkt; m->m_nextpkt = nq; - OSAddAtomic(1, &ipstat.ips_fragdropped); + ipstat.ips_fragdropped++; fp->ipq_nfrags--; - m_freem(q); + /* defer freeing until after lock is dropped */ + MBUFQ_ENQUEUE(&dfq, q); } -inserted: - currentfrags++; + /* + * If this fragment contains similar checksum offload info + * as that of the existing ones, accumulate checksum. Otherwise, + * invalidate checksum offload info for the entire datagram. + */ + if (csum_flags != 0 && csum_flags == fp->ipq_csum_flags) + fp->ipq_csum += csum; + else if (fp->ipq_csum_flags != 0) + fp->ipq_csum_flags = 0; #if IPDIVERT /* @@ -1605,15 +1754,15 @@ inserted: */ if (ip->ip_off == 0) { #ifdef IPDIVERT_44 - fp->ipq_div_info = *divinfo; + fp->ipq_div_info = *divinfo; #else - fp->ipq_divert = *divinfo; + fp->ipq_divert = *divinfo; #endif - fp->ipq_div_cookie = *divcookie; + fp->ipq_div_cookie = *divcookie; } *divinfo = 0; *divcookie = 0; -#endif +#endif /* IPDIVERT */ /* * Check for complete reassembly and perform frag per packet @@ -1629,20 +1778,22 @@ inserted: for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) { if (GETIP(q)->ip_off != next) { if (fp->ipq_nfrags > maxfragsperpacket) { - OSAddAtomic(fp->ipq_nfrags, &ipstat.ips_fragdropped); - ip_freef(fp); + ipstat.ips_fragdropped += fp->ipq_nfrags; + frag_freef(head, fp); } - return (0); + m = NULL; /* nothing to return */ + goto done; } next += GETIP(q)->ip_len; } /* Make sure the last packet didn't have the IP_MF flag */ if (p->m_flags & M_FRAG) { if (fp->ipq_nfrags > maxfragsperpacket) { - OSAddAtomic(fp->ipq_nfrags, &ipstat.ips_fragdropped); - ip_freef(fp); + ipstat.ips_fragdropped += fp->ipq_nfrags; + frag_freef(head, fp); } - return (0); + m = NULL; /* nothing to return */ + goto done; } /* @@ -1651,10 +1802,11 @@ inserted: q = fp->ipq_frags; ip = GETIP(q); if (next + (IP_VHL_HL(ip->ip_vhl) << 2) > IP_MAXPACKET) { - OSAddAtomic(1, &ipstat.ips_toolong); - OSAddAtomic(fp->ipq_nfrags, &ipstat.ips_fragdropped); - ip_freef(fp); - return (0); + ipstat.ips_toolong++; + ipstat.ips_fragdropped += fp->ipq_nfrags; + frag_freef(head, fp); + m = NULL; /* nothing to return */ + goto done; } /* @@ -1662,22 +1814,39 @@ inserted: */ m = q; t = m->m_next; - m->m_next = 0; + m->m_next = NULL; m_cat(m, t); nq = q->m_nextpkt; - q->m_nextpkt = 0; + q->m_nextpkt = NULL; for (q = nq; q != NULL; q = nq) { nq = q->m_nextpkt; q->m_nextpkt = NULL; - if (q->m_pkthdr.csum_flags & CSUM_TCP_SUM16) - m->m_pkthdr.csum_flags = 0; - else { - m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags; - m->m_pkthdr.csum_data += q->m_pkthdr.csum_data; - } m_cat(m, q); } + /* + * Store partial hardware checksum info from the fragment queue; + * the receive start offset is set to 20 bytes (see code at the + * top of this routine.) + */ + if (fp->ipq_csum_flags != 0) { + csum = fp->ipq_csum; + + ADDCARRY(csum); + + m->m_pkthdr.csum_rx_val = csum; + m->m_pkthdr.csum_rx_start = sizeof (struct ip); + m->m_pkthdr.csum_flags = fp->ipq_csum_flags; + } else if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) || + (m->m_pkthdr.pkt_flags & PKTF_LOOP)) { + /* loopback checksums are always OK */ + m->m_pkthdr.csum_data = 0xffff; + m->m_pkthdr.csum_flags &= ~CSUM_PARTIAL; + m->m_pkthdr.csum_flags = + CSUM_DATA_VALID | CSUM_PSEUDO_HDR | + CSUM_IP_CHECKED | CSUM_IP_VALID; + } + #if IPDIVERT /* * Extract firewall instructions from the fragment structure. @@ -1688,48 +1857,69 @@ inserted: *divinfo = fp->ipq_divert; #endif *divcookie = fp->ipq_div_cookie; -#endif +#endif /* IPDIVERT */ #if CONFIG_MACF_NET mac_mbuf_label_associate_ipq(fp, m); mac_ipq_label_destroy(fp); #endif /* - * Create header for new ip packet by - * modifying header of first packet; - * dequeue and discard fragment reassembly header. + * Create header for new ip packet by modifying header of first + * packet; dequeue and discard fragment reassembly header. * Make header visible. */ - ip->ip_len = next; + ip->ip_len = (IP_VHL_HL(ip->ip_vhl) << 2) + next; ip->ip_src = fp->ipq_src; ip->ip_dst = fp->ipq_dst; - remque((void*)fp); - TAILQ_REMOVE(&ipq_list, fp, ipq_list); - currentfrags -= fp->ipq_nfrags; - nipq--; - (void) m_free(dtom(fp)); + + fp->ipq_frags = NULL; /* return to caller as 'm' */ + frag_freef(head, fp); + fp = NULL; + m->m_len += (IP_VHL_HL(ip->ip_vhl) << 2); m->m_data -= (IP_VHL_HL(ip->ip_vhl) << 2); /* some debugging cruft by sklower, below, will go away soon */ - if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */ - int plen = 0; - for (t = m; t; t = t->m_next) - plen += t->m_len; - m->m_pkthdr.len = plen; - } + if (m->m_flags & M_PKTHDR) /* XXX this should be done elsewhere */ + m_fixhdr(m); + ipstat.ips_reassembled++; + + /* arm the purge timer if not already and if there's work to do */ + frag_sched_timeout(); + lck_mtx_unlock(&ipqlock); + /* perform deferred free (if needed) now that lock is dropped */ + if (!MBUFQ_EMPTY(&dfq)) + MBUFQ_DRAIN(&dfq); + VERIFY(MBUFQ_EMPTY(&dfq)); return (m); +done: + VERIFY(m == NULL); + /* arm the purge timer if not already and if there's work to do */ + frag_sched_timeout(); + lck_mtx_unlock(&ipqlock); + /* perform deferred free (if needed) */ + if (!MBUFQ_EMPTY(&dfq)) + MBUFQ_DRAIN(&dfq); + VERIFY(MBUFQ_EMPTY(&dfq)); + return (NULL); + dropfrag: #if IPDIVERT *divinfo = 0; *divcookie = 0; -#endif - OSAddAtomic(1, &ipstat.ips_fragdropped); - if (fp != 0) +#endif /* IPDIVERT */ + ipstat.ips_fragdropped++; + if (fp != NULL) fp->ipq_nfrags--; + /* arm the purge timer if not already and if there's work to do */ + frag_sched_timeout(); + lck_mtx_unlock(&ipqlock); m_freem(m); - return (0); - + /* perform deferred free (if needed) */ + if (!MBUFQ_EMPTY(&dfq)) + MBUFQ_DRAIN(&dfq); + VERIFY(MBUFQ_EMPTY(&dfq)); + return (NULL); #undef GETIP } @@ -1738,38 +1928,47 @@ dropfrag: * associated datagrams. */ static void -ip_freef(struct ipq *fp) +frag_freef(struct ipqhead *fhp, struct ipq *fp) { - lck_mtx_assert(ip_mutex, LCK_MTX_ASSERT_OWNED); - currentfrags -= fp->ipq_nfrags; - m_freem_list(fp->ipq_frags); - remque((void*)fp); - TAILQ_REMOVE(&ipq_list, fp, ipq_list); - (void) m_free(dtom(fp)); + lck_mtx_assert(&ipqlock, LCK_MTX_ASSERT_OWNED); + + fp->ipq_nfrags = 0; + if (fp->ipq_frags != NULL) { + m_freem_list(fp->ipq_frags); + fp->ipq_frags = NULL; + } + TAILQ_REMOVE(fhp, fp, ipq_list); nipq--; + ipq_free(fp); } /* - * IP timer processing; - * if a timer expires on a reassembly - * queue, discard it. + * IP reassembly timer processing */ -void -ip_slowtimo(void) +static void +frag_timeout(void *arg) { +#pragma unused(arg) struct ipq *fp; int i; - lck_mtx_lock(ip_mutex); + + /* + * Update coarse-grained networking timestamp (in sec.); the idea + * is to piggy-back on the timeout callout to update the counter + * returnable via net_uptime(). + */ + net_update_uptime(); + + lck_mtx_lock(&ipqlock); for (i = 0; i < IPREASS_NHASH; i++) { - fp = ipq[i].next; - if (fp == 0) - continue; - while (fp != &ipq[i]) { - --fp->ipq_ttl; - fp = fp->next; - if (fp->prev->ipq_ttl == 0) { - OSAddAtomic(fp->ipq_nfrags, &ipstat.ips_fragtimeout); - ip_freef(fp->prev); + for (fp = TAILQ_FIRST(&ipq[i]); fp; ) { + struct ipq *fpp; + + fpp = fp; + fp = TAILQ_NEXT(fp, ipq_list); + if (--fpp->ipq_ttl == 0) { + ipstat.ips_fragtimeout += fpp->ipq_nfrags; + frag_freef(&ipq[i], fpp); } } } @@ -1778,35 +1977,93 @@ ip_slowtimo(void) * (due to the limit being lowered), drain off * enough to get down to the new limit. */ - if (maxnipq >= 0 && nipq > maxnipq) { - for (i = 0; i < IPREASS_NHASH; i++) { - while (nipq > maxnipq && - (ipq[i].next != &ipq[i])) { - OSAddAtomic(ipq[i].next->ipq_nfrags, &ipstat.ips_fragdropped); - ip_freef(ipq[i].next); + if (maxnipq >= 0 && nipq > (unsigned)maxnipq) { + for (i = 0; i < IPREASS_NHASH; i++) { + while (nipq > (unsigned)maxnipq && + !TAILQ_EMPTY(&ipq[i])) { + ipstat.ips_fragdropped += + TAILQ_FIRST(&ipq[i])->ipq_nfrags; + frag_freef(&ipq[i], TAILQ_FIRST(&ipq[i])); } } } - lck_mtx_unlock(ip_mutex); + /* re-arm the purge timer if there's work to do */ + frag_timeout_run = 0; + frag_sched_timeout(); + lck_mtx_unlock(&ipqlock); +} + +static void +frag_sched_timeout(void) +{ + lck_mtx_assert(&ipqlock, LCK_MTX_ASSERT_OWNED); + + if (!frag_timeout_run && nipq > 0) { + frag_timeout_run = 1; + timeout(frag_timeout, NULL, hz); + } } /* * Drain off all datagram fragments. */ -void -ip_drain(void) +static void +frag_drain(void) { - int i; + int i; - lck_mtx_lock(ip_mutex); + lck_mtx_lock(&ipqlock); for (i = 0; i < IPREASS_NHASH; i++) { - while (ipq[i].next != &ipq[i]) { - OSAddAtomic(ipq[i].next->ipq_nfrags, &ipstat.ips_fragdropped); - ip_freef(ipq[i].next); + while (!TAILQ_EMPTY(&ipq[i])) { + ipstat.ips_fragdropped += + TAILQ_FIRST(&ipq[i])->ipq_nfrags; + frag_freef(&ipq[i], TAILQ_FIRST(&ipq[i])); } } - lck_mtx_unlock(ip_mutex); - in_rtqdrain(); + lck_mtx_unlock(&ipqlock); +} + +static struct ipq * +ipq_alloc(int how) +{ + struct mbuf *t; + struct ipq *fp; + + /* + * See comments in ipq_updateparams(). Keep the count separate + * from nipq since the latter represents the elements already + * in the reassembly queues. + */ + if (ipq_limit > 0 && ipq_count > ipq_limit) + return (NULL); + + t = m_get(how, MT_FTABLE); + if (t != NULL) { + atomic_add_32(&ipq_count, 1); + fp = mtod(t, struct ipq *); + bzero(fp, sizeof (*fp)); + } else { + fp = NULL; + } + return (fp); +} + +static void +ipq_free(struct ipq *fp) +{ + (void) m_free(dtom(fp)); + atomic_add_32(&ipq_count, -1); +} + +/* + * Drain callback + */ +void +ip_drain(void) +{ + frag_drain(); /* fragments */ + in_rtqdrain(); /* protocol cloned routes */ + in_arpdrain(NULL); /* cloned routes: ARP */ } /* @@ -1822,8 +2079,9 @@ ip_drain(void) * 0 if the packet should be processed further. */ static int -ip_dooptions(struct mbuf *m, __unused int pass, struct sockaddr_in *next_hop) +ip_dooptions(struct mbuf *m, int pass, struct sockaddr_in *next_hop) { +#pragma unused(pass) struct ip *ip = mtod(m, struct ip *); u_char *cp; struct ip_timestamp *ipt; @@ -1832,7 +2090,7 @@ ip_dooptions(struct mbuf *m, __unused int pass, struct sockaddr_in *next_hop) struct in_addr *sin, dst; n_time ntime; struct sockaddr_in ipaddr = { - sizeof (ipaddr), AF_INET , 0 , { 0 }, { 0, } }; + sizeof (ipaddr), AF_INET, 0, { 0 }, { 0, } }; /* Expect 32-bit aligned data pointer on strict-align platforms */ MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); @@ -1847,12 +2105,13 @@ ip_dooptions(struct mbuf *m, __unused int pass, struct sockaddr_in *next_hop) if (opt == IPOPT_NOP) optlen = 1; else { - if (cnt < IPOPT_OLEN + sizeof(*cp)) { + if (cnt < IPOPT_OLEN + sizeof (*cp)) { code = &cp[IPOPT_OLEN] - (u_char *)ip; goto bad; } optlen = cp[IPOPT_OLEN]; - if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) { + if (optlen < IPOPT_OLEN + sizeof (*cp) || + optlen > cnt) { code = &cp[IPOPT_OLEN] - (u_char *)ip; goto bad; } @@ -1873,7 +2132,7 @@ ip_dooptions(struct mbuf *m, __unused int pass, struct sockaddr_in *next_hop) */ case IPOPT_LSRR: case IPOPT_SSRR: - if (optlen < IPOPT_OFFSET + sizeof(*cp)) { + if (optlen < IPOPT_OFFSET + sizeof (*cp)) { code = &cp[IPOPT_OLEN] - (u_char *)ip; goto bad; } @@ -1882,9 +2141,8 @@ ip_dooptions(struct mbuf *m, __unused int pass, struct sockaddr_in *next_hop) goto bad; } ipaddr.sin_addr = ip->ip_dst; - ia = (struct in_ifaddr *) - ifa_ifwithaddr((struct sockaddr *)&ipaddr); - if (ia == 0) { + ia = (struct in_ifaddr *)ifa_ifwithaddr(SA(&ipaddr)); + if (ia == NULL) { if (opt == IPOPT_SSRR) { type = ICMP_UNREACH; code = ICMP_UNREACH_SRCFAIL; @@ -1897,13 +2155,12 @@ ip_dooptions(struct mbuf *m, __unused int pass, struct sockaddr_in *next_hop) * yet; nothing to do except forward. */ break; - } - else { + } else { IFA_REMREF(&ia->ia_ifa); ia = NULL; } off--; /* 0 origin */ - if (off > optlen - (int)sizeof(struct in_addr)) { + if (off > optlen - (int)sizeof (struct in_addr)) { /* * End of source route. Should be for us. */ @@ -1922,15 +2179,19 @@ ip_dooptions(struct mbuf *m, __unused int pass, struct sockaddr_in *next_hop) */ nosourcerouting: log(LOG_WARNING, - "attempted source route from %s to %s\n", - inet_ntop(AF_INET, &ip->ip_src, buf, sizeof(buf)), - inet_ntop(AF_INET, &ip->ip_dst, buf2, sizeof(buf2))); + "attempted source route from %s " + "to %s\n", + inet_ntop(AF_INET, &ip->ip_src, + buf, sizeof (buf)), + inet_ntop(AF_INET, &ip->ip_dst, + buf2, sizeof (buf2))); type = ICMP_UNREACH; code = ICMP_UNREACH_SRCFAIL; goto bad; } else { /* - * Not acting as a router, so silently drop. + * Not acting as a router, + * so silently drop. */ OSAddAtomic(1, &ipstat.ips_cantforward); m_freem(m); @@ -1941,32 +2202,31 @@ nosourcerouting: /* * locate outgoing interface */ - (void)memcpy(&ipaddr.sin_addr, cp + off, - sizeof(ipaddr.sin_addr)); + (void) memcpy(&ipaddr.sin_addr, cp + off, + sizeof (ipaddr.sin_addr)); if (opt == IPOPT_SSRR) { #define INA struct in_ifaddr * if ((ia = (INA)ifa_ifwithdstaddr( - (struct sockaddr *)&ipaddr)) == 0) { - ia = (INA)ifa_ifwithnet( - (struct sockaddr *)&ipaddr); + SA(&ipaddr))) == NULL) { + ia = (INA)ifa_ifwithnet(SA(&ipaddr)); } } else { ia = ip_rtaddr(ipaddr.sin_addr); } - if (ia == 0) { + if (ia == NULL) { type = ICMP_UNREACH; code = ICMP_UNREACH_SRCFAIL; goto bad; } ip->ip_dst = ipaddr.sin_addr; IFA_LOCK(&ia->ia_ifa); - (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr), - sizeof(struct in_addr)); + (void) memcpy(cp + off, &(IA_SIN(ia)->sin_addr), + sizeof (struct in_addr)); IFA_UNLOCK(&ia->ia_ifa); IFA_REMREF(&ia->ia_ifa); ia = NULL; - cp[IPOPT_OFFSET] += sizeof(struct in_addr); + cp[IPOPT_OFFSET] += sizeof (struct in_addr); /* * Let ip_intr's mcast routing check handle mcast pkts */ @@ -1974,7 +2234,7 @@ nosourcerouting: break; case IPOPT_RR: - if (optlen < IPOPT_OFFSET + sizeof(*cp)) { + if (optlen < IPOPT_OFFSET + sizeof (*cp)) { code = &cp[IPOPT_OFFSET] - (u_char *)ip; goto bad; } @@ -1986,29 +2246,28 @@ nosourcerouting: * If no space remains, ignore. */ off--; /* 0 origin */ - if (off > optlen - (int)sizeof(struct in_addr)) + if (off > optlen - (int)sizeof (struct in_addr)) break; - (void)memcpy(&ipaddr.sin_addr, &ip->ip_dst, - sizeof(ipaddr.sin_addr)); + (void) memcpy(&ipaddr.sin_addr, &ip->ip_dst, + sizeof (ipaddr.sin_addr)); /* * locate outgoing interface; if we're the destination, * use the incoming interface (should be same). */ - if ((ia = (INA)ifa_ifwithaddr((struct sockaddr *) - &ipaddr)) == 0) { - if ((ia = ip_rtaddr(ipaddr.sin_addr)) == 0) { + if ((ia = (INA)ifa_ifwithaddr(SA(&ipaddr))) == NULL) { + if ((ia = ip_rtaddr(ipaddr.sin_addr)) == NULL) { type = ICMP_UNREACH; code = ICMP_UNREACH_HOST; goto bad; } } IFA_LOCK(&ia->ia_ifa); - (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr), - sizeof(struct in_addr)); + (void) memcpy(cp + off, &(IA_SIN(ia)->sin_addr), + sizeof (struct in_addr)); IFA_UNLOCK(&ia->ia_ifa); IFA_REMREF(&ia->ia_ifa); ia = NULL; - cp[IPOPT_OFFSET] += sizeof(struct in_addr); + cp[IPOPT_OFFSET] += sizeof (struct in_addr); break; case IPOPT_TS: @@ -2023,7 +2282,7 @@ nosourcerouting: goto bad; } if (ipt->ipt_ptr > - ipt->ipt_len - (int)sizeof(int32_t)) { + ipt->ipt_len - (int)sizeof (int32_t)) { if (++ipt->ipt_oflw == 0) { code = (u_char *)&ipt->ipt_ptr - (u_char *)ip; @@ -2038,41 +2297,41 @@ nosourcerouting: break; case IPOPT_TS_TSANDADDR: - if (ipt->ipt_ptr - 1 + sizeof(n_time) + - sizeof(struct in_addr) > ipt->ipt_len) { + if (ipt->ipt_ptr - 1 + sizeof (n_time) + + sizeof (struct in_addr) > ipt->ipt_len) { code = (u_char *)&ipt->ipt_ptr - (u_char *)ip; goto bad; } ipaddr.sin_addr = dst; - ia = (INA)ifaof_ifpforaddr((struct sockaddr *) - &ipaddr, m->m_pkthdr.rcvif); - if (ia == 0) + ia = (INA)ifaof_ifpforaddr(SA(&ipaddr), + m->m_pkthdr.rcvif); + if (ia == NULL) continue; IFA_LOCK(&ia->ia_ifa); - (void)memcpy(sin, &IA_SIN(ia)->sin_addr, - sizeof(struct in_addr)); + (void) memcpy(sin, &IA_SIN(ia)->sin_addr, + sizeof (struct in_addr)); IFA_UNLOCK(&ia->ia_ifa); - ipt->ipt_ptr += sizeof(struct in_addr); + ipt->ipt_ptr += sizeof (struct in_addr); IFA_REMREF(&ia->ia_ifa); ia = NULL; break; case IPOPT_TS_PRESPEC: - if (ipt->ipt_ptr - 1 + sizeof(n_time) + - sizeof(struct in_addr) > ipt->ipt_len) { + if (ipt->ipt_ptr - 1 + sizeof (n_time) + + sizeof (struct in_addr) > ipt->ipt_len) { code = (u_char *)&ipt->ipt_ptr - (u_char *)ip; goto bad; } - (void)memcpy(&ipaddr.sin_addr, sin, - sizeof(struct in_addr)); - if ((ia = (struct in_ifaddr*)ifa_ifwithaddr( - (struct sockaddr *)&ipaddr)) == 0) + (void) memcpy(&ipaddr.sin_addr, sin, + sizeof (struct in_addr)); + if ((ia = (struct in_ifaddr *)ifa_ifwithaddr( + SA(&ipaddr))) == NULL) continue; IFA_REMREF(&ia->ia_ifa); ia = NULL; - ipt->ipt_ptr += sizeof(struct in_addr); + ipt->ipt_ptr += sizeof (struct in_addr); break; default: @@ -2082,9 +2341,9 @@ nosourcerouting: goto bad; } ntime = iptime(); - (void)memcpy(cp + ipt->ipt_ptr - 1, &ntime, - sizeof(n_time)); - ipt->ipt_ptr += sizeof(n_time); + (void) memcpy(cp + ipt->ipt_ptr - 1, &ntime, + sizeof (n_time)); + ipt->ipt_ptr += sizeof (n_time); } } if (forward && ipforwarding) { @@ -2093,12 +2352,74 @@ nosourcerouting: } return (0); bad: - ip->ip_len -= IP_VHL_HL(ip->ip_vhl) << 2; /* XXX icmp_error adds in hdr length */ + /* XXX icmp_error adds in hdr length */ + ip->ip_len -= IP_VHL_HL(ip->ip_vhl) << 2; icmp_error(m, type, code, 0, 0); OSAddAtomic(1, &ipstat.ips_badoptions); return (1); } +/* + * Check for the presence of the IP Router Alert option [RFC2113] + * in the header of an IPv4 datagram. + * + * This call is not intended for use from the forwarding path; it is here + * so that protocol domains may check for the presence of the option. + * Given how FreeBSD's IPv4 stack is currently structured, the Router Alert + * option does not have much relevance to the implementation, though this + * may change in future. + * Router alert options SHOULD be passed if running in IPSTEALTH mode and + * we are not the endpoint. + * Length checks on individual options should already have been peformed + * by ip_dooptions() therefore they are folded under DIAGNOSTIC here. + * + * Return zero if not present or options are invalid, non-zero if present. + */ +int +ip_checkrouteralert(struct mbuf *m) +{ + struct ip *ip = mtod(m, struct ip *); + u_char *cp; + int opt, optlen, cnt, found_ra; + + found_ra = 0; + cp = (u_char *)(ip + 1); + cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip); + for (; cnt > 0; cnt -= optlen, cp += optlen) { + opt = cp[IPOPT_OPTVAL]; + if (opt == IPOPT_EOL) + break; + if (opt == IPOPT_NOP) + optlen = 1; + else { +#ifdef DIAGNOSTIC + if (cnt < IPOPT_OLEN + sizeof (*cp)) + break; +#endif + optlen = cp[IPOPT_OLEN]; +#ifdef DIAGNOSTIC + if (optlen < IPOPT_OLEN + sizeof (*cp) || optlen > cnt) + break; +#endif + } + switch (opt) { + case IPOPT_RA: +#ifdef DIAGNOSTIC + if (optlen != IPOPT_OFFSET + sizeof (uint16_t) || + (*((uint16_t *)(void *)&cp[IPOPT_OFFSET]) != 0)) + break; + else +#endif + found_ra = 1; + break; + default: + break; + } + } + + return (found_ra); +} + /* * Given address of next destination (final or next hop), * return internet address info of interface to be used to get there. @@ -2111,20 +2432,22 @@ ip_rtaddr(struct in_addr dst) struct route ro; bzero(&ro, sizeof (ro)); - sin = (struct sockaddr_in *)(void *)&ro.ro_dst; + sin = SIN(&ro.ro_dst); sin->sin_family = AF_INET; sin->sin_len = sizeof (*sin); sin->sin_addr = dst; rtalloc_ign(&ro, RTF_PRCLONING); - if (ro.ro_rt == NULL) + if (ro.ro_rt == NULL) { + ROUTE_RELEASE(&ro); return (NULL); + } RT_LOCK(ro.ro_rt); if ((rt_ifa = ro.ro_rt->rt_ifa) != NULL) IFA_ADDREF(rt_ifa); RT_UNLOCK(ro.ro_rt); - rtfree(ro.ro_rt); + ROUTE_RELEASE(&ro); return ((struct in_ifaddr *)rt_ifa); } @@ -2143,10 +2466,10 @@ save_rte(u_char *option, struct in_addr dst) if (ipprintfs) printf("save_rte: olen %d\n", olen); #endif - if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst))) + if (olen > sizeof (ip_srcrt) - (1 + sizeof (dst))) return; bcopy(option, ip_srcrt.srcopt, olen); - ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr); + ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof (struct in_addr); ip_srcrt.dst = dst; } @@ -2162,16 +2485,17 @@ ip_srcroute(void) struct mbuf *m; if (ip_nhops == 0) - return ((struct mbuf *)0); + return (NULL); + m = m_get(M_DONTWAIT, MT_HEADER); - if (m == 0) - return ((struct mbuf *)0); + if (m == NULL) + return (NULL); -#define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt)) +#define OPTSIZ (sizeof (ip_srcrt.nop) + sizeof (ip_srcrt.srcopt)) /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */ - m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) + - OPTSIZ; + m->m_len = ip_nhops * sizeof (struct in_addr) + + sizeof (struct in_addr) + OPTSIZ; #if DIAGNOSTIC if (ipprintfs) printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len); @@ -2184,7 +2508,8 @@ ip_srcroute(void) *(mtod(m, struct in_addr *)) = *p--; #if DIAGNOSTIC if (ipprintfs) - printf(" hops %lx", (u_int32_t)ntohl(mtod(m, struct in_addr *)->s_addr)); + printf(" hops %lx", + (u_int32_t)ntohl(mtod(m, struct in_addr *)->s_addr)); #endif /* @@ -2192,10 +2517,10 @@ ip_srcroute(void) */ ip_srcrt.nop = IPOPT_NOP; ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF; - (void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr), + (void) memcpy(mtod(m, caddr_t) + sizeof (struct in_addr), &ip_srcrt.nop, OPTSIZ); q = (struct in_addr *)(void *)(mtod(m, caddr_t) + - sizeof(struct in_addr) + OPTSIZ); + sizeof (struct in_addr) + OPTSIZ); #undef OPTSIZ /* * Record return path as an IP source route, @@ -2227,8 +2552,9 @@ ip_srcroute(void) * XXX should be deleted; last arg currently ignored. */ void -ip_stripoptions(struct mbuf *m, __unused struct mbuf *mopt) +ip_stripoptions(struct mbuf *m, struct mbuf *mopt) { +#pragma unused(mopt) int i; struct ip *ip = mtod(m, struct ip *); caddr_t opts; @@ -2244,7 +2570,7 @@ ip_stripoptions(struct mbuf *m, __unused struct mbuf *mopt) m->m_len -= olen; if (m->m_flags & M_PKTHDR) m->m_pkthdr.len -= olen; - ip->ip_vhl = IP_MAKE_VHL(IPVERSION, sizeof(struct ip) >> 2); + ip->ip_vhl = IP_MAKE_VHL(IPVERSION, sizeof (struct ip) >> 2); } u_char inetctlerrmap[PRC_NCMDS] = { @@ -2273,8 +2599,7 @@ sysctl_ipforwarding SYSCTL_HANDLER_ARGS struct ifnet *ifp = ifindex2ifnet[i]; if (ifp != NULL) { lck_mtx_lock(&ifp->if_cached_route_lock); - if (ifp->if_fwd_route.ro_rt != NULL) - rtfree(ifp->if_fwd_route.ro_rt); + ROUTE_RELEASE(&ifp->if_fwd_route); bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route)); lck_mtx_unlock(&ifp->if_cached_route_lock); @@ -2303,7 +2628,7 @@ ip_fwd_route_copyout(struct ifnet *ifp, struct route *dst) if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) panic("%s: wrong or corrupted route: %p", __func__, src); - route_copyout(dst, src, sizeof(*dst)); + route_copyout(dst, src, sizeof (*dst)); lck_mtx_unlock(&ifp->if_cached_route_lock); } @@ -2321,7 +2646,7 @@ ip_fwd_route_copyin(struct ifnet *ifp, struct route *src) panic("%s: wrong or corrupted route: %p", __func__, src); if (ifp->if_fwd_cacheok) - route_copyin(src, dst, sizeof(*src)); + route_copyin(src, dst, sizeof (*src)); lck_mtx_unlock(&ifp->if_cached_route_lock); } @@ -2354,9 +2679,13 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop) struct mbuf *mcopy; n_long dest; struct in_addr pkt_dst; - u_int32_t nextmtu = 0; - struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, 0 }; - struct ifnet *ifp = m->m_pkthdr.rcvif; + u_int32_t nextmtu = 0, len; + struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, 0, 0 }; + struct ifnet *rcvifp = m->m_pkthdr.rcvif; +#if IPSEC + struct secpolicy *sp = NULL; + int ipsecerror; +#endif /* IPSEC */ #if PF struct pf_mtag *pf_mtag; #endif /* PF */ @@ -2367,10 +2696,10 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop) * Cache the destination address of the packet; this may be * changed by use of 'ipfw fwd'. */ - pkt_dst = next_hop ? next_hop->sin_addr : ip->ip_dst; -#else + pkt_dst = ((next_hop != NULL) ? next_hop->sin_addr : ip->ip_dst); +#else /* !IPFIREWALL */ pkt_dst = ip->ip_dst; -#endif +#endif /* !IPFIREWALL */ #if DIAGNOSTIC if (ipprintfs) @@ -2379,14 +2708,14 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop) ip->ip_ttl); #endif - if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(pkt_dst) == 0) { + if (m->m_flags & (M_BCAST|M_MCAST) || !in_canforward(pkt_dst)) { OSAddAtomic(1, &ipstat.ips_cantforward); m_freem(m); return; } #if IPSTEALTH if (!ipstealth) { -#endif +#endif /* IPSTEALTH */ if (ip->ip_ttl <= IPTTLDEC) { icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0); @@ -2394,7 +2723,7 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop) } #if IPSTEALTH } -#endif +#endif /* IPSTEALTH */ #if PF pf_mtag = pf_find_mtag(m); @@ -2404,16 +2733,12 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop) } #endif /* PF */ - ip_fwd_route_copyout(ifp, &fwd_rt); + ip_fwd_route_copyout(rcvifp, &fwd_rt); + + sin = SIN(&fwd_rt.ro_dst); + if (ROUTE_UNUSABLE(&fwd_rt) || pkt_dst.s_addr != sin->sin_addr.s_addr) { + ROUTE_RELEASE(&fwd_rt); - sin = (struct sockaddr_in *)(void *)&fwd_rt.ro_dst; - if (fwd_rt.ro_rt == NULL || - fwd_rt.ro_rt->generation_id != route_generation || - pkt_dst.s_addr != sin->sin_addr.s_addr) { - if (fwd_rt.ro_rt != NULL) { - rtfree(fwd_rt.ro_rt); - fwd_rt.ro_rt = NULL; - } sin->sin_family = AF_INET; sin->sin_len = sizeof (*sin); sin->sin_addr = pkt_dst; @@ -2445,11 +2770,11 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop) #if IPSTEALTH if (!ipstealth) { -#endif +#endif /* IPSTEALTH */ ip->ip_ttl -= IPTTLDEC; #if IPSTEALTH } -#endif +#endif /* IPSTEALTH */ /* * If forwarding packet using same interface that it came in on, @@ -2461,8 +2786,8 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop) */ RT_LOCK_SPIN(rt); if (rt->rt_ifp == m->m_pkthdr.rcvif && - (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 && - satosin(rt_key(rt))->sin_addr.s_addr != 0 && + !(rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) && + satosin(rt_key(rt))->sin_addr.s_addr != INADDR_ANY && ipsendredirects && !srcrt && rt->rt_ifa != NULL) { struct in_ifaddr *ia = (struct in_ifaddr *)rt->rt_ifa; u_int32_t src = ntohl(ip->ip_src.s_addr); @@ -2475,12 +2800,16 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop) dest = satosin(rt->rt_gateway)->sin_addr.s_addr; else dest = pkt_dst.s_addr; - /* Router requirements says to only send host redirects */ + /* + * Router requirements says to only send + * host redirects. + */ type = ICMP_REDIRECT; code = ICMP_REDIRECT_HOST; #if DIAGNOSTIC if (ipprintfs) - printf("redirect (%d) to %lx\n", code, (u_int32_t)dest); + printf("redirect (%d) to %lx\n", code, + (u_int32_t)dest); #endif } IFA_UNLOCK(&ia->ia_ifa); @@ -2488,10 +2817,10 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop) RT_UNLOCK(rt); #if IPFIREWALL - if (next_hop) { + if (next_hop != NULL) { /* Pass IPFORWARD info if available */ struct m_tag *tag; - struct ip_fwd_tag *ipfwd_tag; + struct ip_fwd_tag *ipfwd_tag; tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_IPFORWARD, @@ -2507,21 +2836,34 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop) m_tag_prepend(m, tag); } -#endif - error = ip_output_list(m, 0, NULL, &fwd_rt, - IP_FORWARDING | IP_OUTARGS, 0, &ipoa); +#endif /* IPFIREWALL */ + + /* Mark this packet as being forwarded from another interface */ + m->m_pkthdr.pkt_flags |= PKTF_FORWARDED; + len = m_pktlen(m); + + error = ip_output(m, NULL, &fwd_rt, IP_FORWARDING | IP_OUTARGS, + NULL, &ipoa); /* Refresh rt since the route could have changed while in IP */ rt = fwd_rt.ro_rt; - if (error) { + if (error != 0) { OSAddAtomic(1, &ipstat.ips_cantforward); } else { + /* + * Increment stats on the source interface; the ones + * for destination interface has been taken care of + * during output above by virtue of PKTF_FORWARDED. + */ + rcvifp->if_fpackets++; + rcvifp->if_fbytes += len; + OSAddAtomic(1, &ipstat.ips_forward); - if (type) + if (type != 0) { OSAddAtomic(1, &ipstat.ips_redirectsent); - else { - if (mcopy) { + } else { + if (mcopy != NULL) { /* * If we didn't have to go thru ipflow and * the packet was successfully consumed by @@ -2537,7 +2879,6 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop) goto done; switch (error) { - case 0: /* forwarded, but need redirect */ /* type, code set above */ break; @@ -2554,105 +2895,107 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop) case EMSGSIZE: type = ICMP_UNREACH; code = ICMP_UNREACH_NEEDFRAG; -#ifndef IPSEC - if (rt != NULL) { + + if (rt == NULL) { + break; + } else { RT_LOCK_SPIN(rt); if (rt->rt_ifp != NULL) nextmtu = rt->rt_ifp->if_mtu; RT_UNLOCK(rt); } -#else +#ifdef IPSEC + if (ipsec_bypass) + break; + /* * If the packet is routed over IPsec tunnel, tell the * originator the tunnel MTU. * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz * XXX quickhack!!! */ - if (rt != NULL) { - struct secpolicy *sp = NULL; - int ipsecerror; - int ipsechdr; - struct route *ro; + sp = ipsec4_getpolicybyaddr(mcopy, IPSEC_DIR_OUTBOUND, + IP_FORWARDING, &ipsecerror); - RT_LOCK_SPIN(rt); - if (rt->rt_ifp != NULL) - nextmtu = rt->rt_ifp->if_mtu; - RT_UNLOCK(rt); + if (sp == NULL) + break; - if (ipsec_bypass) { - OSAddAtomic(1, &ipstat.ips_cantfrag); - break; - } - sp = ipsec4_getpolicybyaddr(mcopy, - IPSEC_DIR_OUTBOUND, - IP_FORWARDING, - &ipsecerror); + /* + * find the correct route for outer IPv4 + * header, compute tunnel MTU. + */ + nextmtu = 0; - if (sp != NULL) { - /* count IPsec header size */ - ipsechdr = ipsec_hdrsiz(sp); + if (sp->req != NULL && + sp->req->saidx.mode == IPSEC_MODE_TUNNEL) { + struct secasindex saidx; + struct secasvar *sav; + struct route *ro; + struct ip *ipm; + int ipsechdr; - /* - * find the correct route for outer IPv4 - * header, compute tunnel MTU. - */ - nextmtu = 0; - - if (sp->req != NULL) { - if (sp->req->saidx.mode == IPSEC_MODE_TUNNEL) { - struct secasindex saidx; - struct ip *ipm; - struct secasvar *sav; - - ipm = mtod(mcopy, struct ip *); - bcopy(&sp->req->saidx, &saidx, sizeof(saidx)); - saidx.mode = sp->req->saidx.mode; - saidx.reqid = sp->req->saidx.reqid; - sin = (struct sockaddr_in *)&saidx.src; - if (sin->sin_len == 0) { - sin->sin_len = sizeof(*sin); - sin->sin_family = AF_INET; - sin->sin_port = IPSEC_PORT_ANY; - bcopy(&ipm->ip_src, &sin->sin_addr, - sizeof(sin->sin_addr)); - } - sin = (struct sockaddr_in *)&saidx.dst; - if (sin->sin_len == 0) { - sin->sin_len = sizeof(*sin); - sin->sin_family = AF_INET; - sin->sin_port = IPSEC_PORT_ANY; - bcopy(&ipm->ip_dst, &sin->sin_addr, - sizeof(sin->sin_addr)); - } - sav = key_allocsa_policy(&saidx); - if (sav != NULL) { - lck_mtx_lock(sadb_mutex); - if (sav->sah != NULL) { - ro = &sav->sah->sa_route; - if (ro->ro_rt != NULL) { - RT_LOCK(ro->ro_rt); - if (ro->ro_rt->rt_ifp != NULL) { - nextmtu = ro->ro_rt->rt_ifp->if_mtu; - nextmtu -= ipsechdr; - } - RT_UNLOCK(ro->ro_rt); - } - } - key_freesav(sav, KEY_SADB_LOCKED); - lck_mtx_unlock(sadb_mutex); + /* count IPsec header size */ + ipsechdr = ipsec_hdrsiz(sp); + + ipm = mtod(mcopy, struct ip *); + bcopy(&sp->req->saidx, &saidx, sizeof (saidx)); + saidx.mode = sp->req->saidx.mode; + saidx.reqid = sp->req->saidx.reqid; + sin = SIN(&saidx.src); + if (sin->sin_len == 0) { + sin->sin_len = sizeof (*sin); + sin->sin_family = AF_INET; + sin->sin_port = IPSEC_PORT_ANY; + bcopy(&ipm->ip_src, &sin->sin_addr, + sizeof (sin->sin_addr)); + } + sin = SIN(&saidx.dst); + if (sin->sin_len == 0) { + sin->sin_len = sizeof (*sin); + sin->sin_family = AF_INET; + sin->sin_port = IPSEC_PORT_ANY; + bcopy(&ipm->ip_dst, &sin->sin_addr, + sizeof (sin->sin_addr)); + } + sav = key_allocsa_policy(&saidx); + if (sav != NULL) { + lck_mtx_lock(sadb_mutex); + if (sav->sah != NULL) { + ro = &sav->sah->sa_route; + if (ro->ro_rt != NULL) { + RT_LOCK(ro->ro_rt); + if (ro->ro_rt->rt_ifp != NULL) { + nextmtu = ro->ro_rt-> + rt_ifp->if_mtu; + nextmtu -= ipsechdr; } + RT_UNLOCK(ro->ro_rt); } } - key_freesp(sp, KEY_SADB_UNLOCKED); + key_freesav(sav, KEY_SADB_LOCKED); + lck_mtx_unlock(sadb_mutex); } } -#endif /*IPSEC*/ - OSAddAtomic(1, &ipstat.ips_cantfrag); + key_freesp(sp, KEY_SADB_UNLOCKED); +#endif /* IPSEC */ break; case ENOBUFS: - type = ICMP_SOURCEQUENCH; - code = 0; + /* + * A router should not generate ICMP_SOURCEQUENCH as + * required in RFC1812 Requirements for IP Version 4 Routers. + * Source quench could be a big problem under DoS attacks, + * or if the underlying interface is rate-limited. + * Those who need source quench packets may re-enable them + * via the net.inet.ip.sendsourcequench sysctl. + */ + if (ip_sendsourcequench == 0) { + m_freem(mcopy); + goto done; + } else { + type = ICMP_SOURCEQUENCH; + code = 0; + } break; case EACCES: /* ipfw denied packet */ @@ -2660,125 +3003,128 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop) goto done; } + if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG) + OSAddAtomic(1, &ipstat.ips_cantfrag); + icmp_error(mcopy, type, code, dest, nextmtu); done: - ip_fwd_route_copyin(ifp, &fwd_rt); + ip_fwd_route_copyin(rcvifp, &fwd_rt); } int -ip_savecontrol( - struct inpcb *inp, - struct mbuf **mp, - struct ip *ip, - struct mbuf *m) +ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip, + struct mbuf *m) { *mp = NULL; if (inp->inp_socket->so_options & SO_TIMESTAMP) { struct timeval tv; - microtime(&tv); - mp = sbcreatecontrol_mbuf((caddr_t) &tv, sizeof(tv), - SCM_TIMESTAMP, SOL_SOCKET, mp); + getmicrotime(&tv); + mp = sbcreatecontrol_mbuf((caddr_t)&tv, sizeof (tv), + SCM_TIMESTAMP, SOL_SOCKET, mp); if (*mp == NULL) { goto no_mbufs; } } - if ((inp->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) { + if (inp->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) { uint64_t time; time = mach_absolute_time(); - mp = sbcreatecontrol_mbuf((caddr_t) &time, sizeof(time), - SCM_TIMESTAMP_MONOTONIC, SOL_SOCKET, mp); - + mp = sbcreatecontrol_mbuf((caddr_t)&time, sizeof (time), + SCM_TIMESTAMP_MONOTONIC, SOL_SOCKET, mp); if (*mp == NULL) { goto no_mbufs; } - } + } if (inp->inp_flags & INP_RECVDSTADDR) { - mp = sbcreatecontrol_mbuf((caddr_t) &ip->ip_dst, - sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP, mp); + mp = sbcreatecontrol_mbuf((caddr_t)&ip->ip_dst, + sizeof (struct in_addr), IP_RECVDSTADDR, IPPROTO_IP, mp); if (*mp == NULL) { goto no_mbufs; } } #ifdef notyet - /* XXX + /* + * XXX * Moving these out of udp_input() made them even more broken * than they already were. */ /* options were tossed already */ if (inp->inp_flags & INP_RECVOPTS) { - mp = sbcreatecontrol_mbuf((caddr_t) opts_deleted_above, - sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP, mp); + mp = sbcreatecontrol_mbuf((caddr_t)opts_deleted_above, + sizeof (struct in_addr), IP_RECVOPTS, IPPROTO_IP, mp); if (*mp == NULL) { goto no_mbufs; } } /* ip_srcroute doesn't do what we want here, need to fix */ if (inp->inp_flags & INP_RECVRETOPTS) { - mp = sbcreatecontrol_mbuf((caddr_t) ip_srcroute(), - sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP, mp); + mp = sbcreatecontrol_mbuf((caddr_t)ip_srcroute(), + sizeof (struct in_addr), IP_RECVRETOPTS, IPPROTO_IP, mp); if (*mp == NULL) { goto no_mbufs; } } -#endif +#endif /* notyet */ if (inp->inp_flags & INP_RECVIF) { struct ifnet *ifp; - struct sdlbuf { - struct sockaddr_dl sdl; - u_char pad[32]; - } sdlbuf; - struct sockaddr_dl *sdp; - struct sockaddr_dl *sdl2 = &sdlbuf.sdl; + uint8_t sdlbuf[SOCK_MAXADDRLEN + 1]; + struct sockaddr_dl *sdl2 = SDL(&sdlbuf); + + /* + * Make sure to accomodate the largest possible + * size of SA(if_lladdr)->sa_len. + */ + _CASSERT(sizeof (sdlbuf) == (SOCK_MAXADDRLEN + 1)); ifnet_head_lock_shared(); if ((ifp = m->m_pkthdr.rcvif) != NULL && ifp->if_index && (ifp->if_index <= if_index)) { struct ifaddr *ifa = ifnet_addrs[ifp->if_index - 1]; + struct sockaddr_dl *sdp; if (!ifa || !ifa->ifa_addr) goto makedummy; IFA_LOCK_SPIN(ifa); - sdp = (struct sockaddr_dl *)(void *)ifa->ifa_addr; + sdp = SDL(ifa->ifa_addr); /* * Change our mind and don't try copy. */ - if ((sdp->sdl_family != AF_LINK) || - (sdp->sdl_len > sizeof(sdlbuf))) { + if (sdp->sdl_family != AF_LINK) { IFA_UNLOCK(ifa); goto makedummy; } + /* the above _CASSERT ensures sdl_len fits in sdlbuf */ bcopy(sdp, sdl2, sdp->sdl_len); IFA_UNLOCK(ifa); } else { makedummy: - sdl2->sdl_len - = offsetof(struct sockaddr_dl, sdl_data[0]); + sdl2->sdl_len = + offsetof(struct sockaddr_dl, sdl_data[0]); sdl2->sdl_family = AF_LINK; sdl2->sdl_index = 0; sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; } ifnet_head_done(); - mp = sbcreatecontrol_mbuf((caddr_t) sdl2, sdl2->sdl_len, - IP_RECVIF, IPPROTO_IP, mp); + mp = sbcreatecontrol_mbuf((caddr_t)sdl2, sdl2->sdl_len, + IP_RECVIF, IPPROTO_IP, mp); if (*mp == NULL) { goto no_mbufs; } } if (inp->inp_flags & INP_RECVTTL) { - mp = sbcreatecontrol_mbuf((caddr_t)&ip->ip_ttl, sizeof(ip->ip_ttl), - IP_RECVTTL, IPPROTO_IP, mp); + mp = sbcreatecontrol_mbuf((caddr_t)&ip->ip_ttl, + sizeof (ip->ip_ttl), IP_RECVTTL, IPPROTO_IP, mp); if (*mp == NULL) { goto no_mbufs; } } - if ((inp->inp_socket->so_flags & SOF_RECV_TRAFFIC_CLASS) != 0) { + if (inp->inp_socket->so_flags & SOF_RECV_TRAFFIC_CLASS) { int tc = m_get_traffic_class(m); - mp = sbcreatecontrol_mbuf((caddr_t) &tc, sizeof(tc), - SO_TRAFFIC_CLASS, SOL_SOCKET, mp); + mp = sbcreatecontrol_mbuf((caddr_t)&tc, sizeof (tc), + SO_TRAFFIC_CLASS, SOL_SOCKET, mp); if (*mp == NULL) { goto no_mbufs; } @@ -2786,32 +3132,33 @@ makedummy: if (inp->inp_flags & INP_PKTINFO) { struct in_pktinfo pi; - bzero(&pi, sizeof(struct in_pktinfo)); - bcopy(&ip->ip_dst, &pi.ipi_addr, sizeof(struct in_addr)); - pi.ipi_ifindex = (m && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0; - - mp = sbcreatecontrol_mbuf((caddr_t)&pi, sizeof(struct in_pktinfo), - IP_RECVPKTINFO, IPPROTO_IP, mp); + bzero(&pi, sizeof (struct in_pktinfo)); + bcopy(&ip->ip_dst, &pi.ipi_addr, sizeof (struct in_addr)); + pi.ipi_ifindex = (m != NULL && m->m_pkthdr.rcvif != NULL) ? + m->m_pkthdr.rcvif->if_index : 0; + + mp = sbcreatecontrol_mbuf((caddr_t)&pi, + sizeof (struct in_pktinfo), IP_RECVPKTINFO, IPPROTO_IP, mp); if (*mp == NULL) { goto no_mbufs; } } - return 0; + return (0); no_mbufs: ipstat.ips_pktdropcntrl++; - return ENOBUFS; + return (ENOBUFS); } +#if MROUTING int ip_rsvp_init(struct socket *so) { - if (so->so_type != SOCK_RAW || - so->so_proto->pr_protocol != IPPROTO_RSVP) - return EOPNOTSUPP; + if (so->so_type != SOCK_RAW || SOCK_PROTO(so) != IPPROTO_RSVP) + return (EOPNOTSUPP); if (ip_rsvpd != NULL) - return EADDRINUSE; + return (EADDRINUSE); ip_rsvpd = so; /* @@ -2823,7 +3170,7 @@ ip_rsvp_init(struct socket *so) rsvp_on++; } - return 0; + return (0); } int @@ -2838,53 +3185,155 @@ ip_rsvp_done(void) ip_rsvp_on = 0; rsvp_on--; } - return 0; + return (0); } +#endif /* MROUTING */ static inline u_short ip_cksum(struct mbuf *m, int hlen) { - u_short sum; - struct ip *ip; - - ip = mtod(m, struct ip *); - - if ((IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) == 0) - || (apple_hwcksum_rx == 0) || - ((m->m_pkthdr.csum_flags & CSUM_TCP_SUM16) && ip->ip_p != IPPROTO_TCP)) { - m->m_pkthdr.csum_flags = 0; /* invalidate HW generated checksum flags */ - - } if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); - } else if (!(m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) || - apple_hwcksum_tx == 0) { + } else if (!(m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) && + !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) { /* - * Either this is not loopback packet coming from an interface - * that does not support checksum offloading, or it is loopback - * packet that has undergone software checksumming at the send - * side because apple_hwcksum_tx was set to 0. In this case, - * calculate the checksum in software to validate the packet. + * The packet arrived on an interface which isn't capable + * of performing IP header checksum; compute it now. */ - sum = in_cksum(m, hlen); + sum = ip_cksum_hdr_in(m, hlen); } else { - /* - * This is a loopback packet without any valid checksum since - * the send side has bypassed it (apple_hwcksum_tx set to 1). - * We get here because apple_hwcksum_rx was set to 0, and so - * we pretend that all is well. - */ sum = 0; - m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR | - CSUM_IP_CHECKED | CSUM_IP_VALID; - m->m_pkthdr.csum_data = 0xffff; + m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR | + CSUM_IP_CHECKED | CSUM_IP_VALID); + m->m_pkthdr.csum_data = 0xffff; } - if (sum) { + if (sum != 0) OSAddAtomic(1, &ipstat.ips_badsum); + + return (sum); +} + +static int +ip_getstat SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + if (req->oldptr == USER_ADDR_NULL) + req->oldlen = (size_t)sizeof (struct ipstat); + + return (SYSCTL_OUT(req, &ipstat, MIN(sizeof (ipstat), req->oldlen))); +} + +void +ip_setsrcifaddr_info(struct mbuf *m, uint32_t src_idx, struct in_ifaddr *ia) +{ + VERIFY(m->m_flags & M_PKTHDR); + + /* + * If the source ifaddr is specified, pick up the information + * from there; otherwise just grab the passed-in ifindex as the + * caller may not have the ifaddr available. + */ + if (ia != NULL) { + m->m_pkthdr.pkt_flags |= PKTF_IFAINFO; + m->m_pkthdr.src_ifindex = ia->ia_ifp->if_index; + } else { + m->m_pkthdr.src_ifindex = src_idx; + if (src_idx != 0) + m->m_pkthdr.pkt_flags |= PKTF_IFAINFO; + } +} + +void +ip_setdstifaddr_info(struct mbuf *m, uint32_t dst_idx, struct in_ifaddr *ia) +{ + VERIFY(m->m_flags & M_PKTHDR); + + /* + * If the destination ifaddr is specified, pick up the information + * from there; otherwise just grab the passed-in ifindex as the + * caller may not have the ifaddr available. + */ + if (ia != NULL) { + m->m_pkthdr.pkt_flags |= PKTF_IFAINFO; + m->m_pkthdr.dst_ifindex = ia->ia_ifp->if_index; + } else { + m->m_pkthdr.dst_ifindex = dst_idx; + if (dst_idx != 0) + m->m_pkthdr.pkt_flags |= PKTF_IFAINFO; + } +} + +int +ip_getsrcifaddr_info(struct mbuf *m, uint32_t *src_idx, uint32_t *iaf) +{ + VERIFY(m->m_flags & M_PKTHDR); + + if (!(m->m_pkthdr.pkt_flags & PKTF_IFAINFO)) + return (-1); + + if (src_idx != NULL) + *src_idx = m->m_pkthdr.src_ifindex; + + if (iaf != NULL) + *iaf = 0; + + return (0); +} + +int +ip_getdstifaddr_info(struct mbuf *m, uint32_t *dst_idx, uint32_t *iaf) +{ + VERIFY(m->m_flags & M_PKTHDR); + + if (!(m->m_pkthdr.pkt_flags & PKTF_IFAINFO)) + return (-1); + + if (dst_idx != NULL) + *dst_idx = m->m_pkthdr.dst_ifindex; + + if (iaf != NULL) + *iaf = 0; + + return (0); +} + +/* + * Protocol input handler for IPPROTO_GRE. + */ +void +gre_input(struct mbuf *m, int off) +{ + gre_input_func_t fn = gre_input_func; + + /* + * If there is a registered GRE input handler, pass mbuf to it. + */ + if (fn != NULL) { + lck_mtx_unlock(inet_domain_mutex); + m = fn(m, off, (mtod(m, struct ip *))->ip_p); + lck_mtx_lock(inet_domain_mutex); } - return sum; + /* + * If no matching tunnel that is up is found, we inject + * the mbuf to raw ip socket to see if anyone picks it up. + */ + if (m != NULL) + rip_input(m, off); +} + +/* + * Private KPI for PPP/PPTP. + */ +int +ip_gre_register_input(gre_input_func_t fn) +{ + lck_mtx_lock(inet_domain_mutex); + gre_input_func = fn; + lck_mtx_unlock(inet_domain_mutex); + + return (0); } diff --git a/bsd/netinet/ip_mroute.c b/bsd/netinet/ip_mroute.c index f33537ef8..6f9fdee99 100644 --- a/bsd/netinet/ip_mroute.c +++ b/bsd/netinet/ip_mroute.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -716,10 +716,6 @@ add_vif(struct vifctl *vifcp) * Set interface to fake encapsulator interface */ ifp = &multicast_decap_if[vifcp->vifc_vifi]; - /* - * Prepare cached route entry - */ - bzero(&vifp->v_route, sizeof(vifp->v_route)); } else { log(LOG_ERR, "source routed tunnels not supported\n"); return EOPNOTSUPP; @@ -1107,10 +1103,10 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, ip->ip_ttl++; /* compensate for -1 in *_send routines */ if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { vifp = viftable + vifi; - printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s%d)\n", + printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s)\n", ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), vifi, (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "", - vifp->v_ifp->if_name, vifp->v_ifp->if_unit); + if_name(vifp->v_ifp)); } return (ip_mdq(m, ifp, NULL, vifi)); } else if (imo != NULL) { @@ -1538,11 +1534,7 @@ encap_send(struct ip *ip, struct vif *vifp, struct mbuf *m) */ ip_copy = mtod(mb_copy, struct ip *); *ip_copy = multicast_encap_iphdr; -#if RANDOM_IP_ID ip_copy->ip_id = ip_randomid(); -#else - ip_copy->ip_id = htons(ip_id++); -#endif ip_copy->ip_len += len; ip_copy->ip_src = vifp->v_lcl_addr; ip_copy->ip_dst = vifp->v_rmt_addr; @@ -1813,11 +1805,12 @@ static void tbf_send_packet(struct vif *vifp, struct mbuf *m) { int error; - static struct route ro; + struct route ro; + bzero(&ro, sizeof (ro)); if (vifp->v_flags & VIFF_TUNNEL) { /* If tunnel options */ - ip_output(m, (struct mbuf *)0, &vifp->v_route, + ip_output(m, (struct mbuf *)0, &ro, IP_FORWARDING, (struct ip_moptions *)0, NULL); } else { struct ip_moptions *imo; @@ -1848,6 +1841,7 @@ done: log(LOG_DEBUG, "phyint_send on vif %d err %d\n", vifp - viftable, error); } + ROUTE_RELEASE(&ro); } /* determine the current time and then @@ -2124,7 +2118,6 @@ ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd) static int (*old_mrt_ioctl)(); static void (*old_proto4_input)(); static int (*old_legal_vif_num)(); - extern struct protosw inetsw[]; case LKM_E_LOAD: if(lkmexists(lkmtp) || ip_mrtproto) diff --git a/bsd/netinet/ip_mroute.h b/bsd/netinet/ip_mroute.h index f234e20ab..41985f3fa 100644 --- a/bsd/netinet/ip_mroute.h +++ b/bsd/netinet/ip_mroute.h @@ -93,9 +93,9 @@ #define MRT_ASSERT 107 /* enable PIM assert processing */ -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #define GET_TIME(t) microtime(&t) -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #ifndef CONFIG_MAXVIFS #define CONFIG_MAXVIFS 32 /* 4635538 temp workaround */ @@ -189,8 +189,11 @@ struct sioc_vif_req { }; #ifdef PRIVATE +#ifndef KERNEL /* * The kernel's virtual-interface structure. + * + * XXX: This is unused and is currently exposed for netstat. */ struct tbf; struct ifnet; @@ -211,12 +214,13 @@ struct vif { u_int v_rsvp_on; /* RSVP listening on this vif */ struct socket *v_rsvpd; /* RSVP daemon socket */ }; -#endif /* * The kernel's multicast forwarding cache entry structure * (A field for the type of service (mfc_tos) is to be added * at a future point) + * + * XXX: This is unused and is currently exposed for netstat. */ struct mfc { struct in_addr mfc_origin; /* IP origin of mcasts */ @@ -231,6 +235,8 @@ struct mfc { struct rtdetq *mfc_stall; /* q of packets awaiting mfc */ struct mfc *mfc_next; /* next mfc entry */ }; +#endif /* !KERNEL */ +#endif /* PRIVATE */ /* * Struct used to communicate from kernel to multicast router @@ -250,7 +256,7 @@ struct igmpmsg { #define MFCTBLSIZ CONFIG_MFCTBLSIZ -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE /* * Argument structure used for pkt info. while upcall is made */ @@ -303,5 +309,5 @@ extern int (*mrt_ioctl)(u_long, caddr_t); extern int (*mrt_ioctl)(u_long, caddr_t, struct proc *); #endif -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET_IP_MROUTE_H_ */ diff --git a/bsd/netinet/ip_output.c b/bsd/netinet/ip_output.c index aece80368..3b0143b3c 100644 --- a/bsd/netinet/ip_output.c +++ b/bsd/netinet/ip_output.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -58,7 +58,6 @@ * SUCH DAMAGE. * * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 - * $FreeBSD: src/sys/netinet/ip_output.c,v 1.99.2.16 2001/07/19 06:37:26 kris Exp $ */ /* * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce @@ -67,7 +66,7 @@ * Version 2.0. */ -#define _IP_VHL +#define _IP_VHL #include #include @@ -80,9 +79,14 @@ #include #include #include +#include #include #include +#include + +#include +#include #include #include @@ -90,6 +94,7 @@ #include #include #include +#include #include #include @@ -97,23 +102,16 @@ #include #include #include - #include #if CONFIG_MACF_NET #include -#endif - -#include -#include -#include +#endif /* CONFIG_MACF_NET */ -#define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1) -#define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3) -#define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1) -#define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1) - -#define SWAP16(v) ((((v) & 0xff) << 8) | ((v) >> 8)) +#define DBG_LAYER_BEG NETDBG_CODE(DBG_NETIP, 1) +#define DBG_LAYER_END NETDBG_CODE(DBG_NETIP, 3) +#define DBG_FNC_IP_OUTPUT NETDBG_CODE(DBG_NETIP, (1 << 8) | 1) +#define DBG_FNC_IPSEC4_OUTPUT NETDBG_CODE(DBG_NETIP, (2 << 8) | 1) #if IPSEC #include @@ -121,13 +119,16 @@ #if IPSEC_DEBUG #include #else -#define KEYDEBUG(lev,arg) +#define KEYDEBUG(lev, arg) #endif -#endif /*IPSEC*/ +#endif /* IPSEC */ +#if IPFIREWALL #include +#if IPDIVERT #include -#include +#endif /* IPDIVERT */ +#endif /* IPFIREWALL */ #if DUMMYNET #include @@ -137,50 +138,47 @@ #include #endif /* PF */ -#if IPFIREWALL_FORWARD_DEBUG -#define print_ip(a) printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\ - (ntohl(a.s_addr)>>16)&0xFF,\ - (ntohl(a.s_addr)>>8)&0xFF,\ - (ntohl(a.s_addr))&0xFF); -#endif +#if IPFIREWALL_FORWARD && IPFIREWALL_FORWARD_DEBUG +#define print_ip(a) \ + printf("%ld.%ld.%ld.%ld", (ntohl(a.s_addr) >> 24) & 0xFF, \ + (ntohl(a.s_addr) >> 16) & 0xFF, \ + (ntohl(a.s_addr) >> 8) & 0xFF, \ + (ntohl(a.s_addr)) & 0xFF); +#endif /* IPFIREWALL_FORWARD && IPFIREWALL_FORWARD_DEBUG */ u_short ip_id; -static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); -static void ip_mloopback(struct ifnet *, struct mbuf *, - struct sockaddr_in *, int); -static int ip_pcbopts(int, struct mbuf **, struct mbuf *); -static void imo_trace(struct ip_moptions *, int); - static void ip_out_cksum_stats(int, u_int32_t); +static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *); +static int ip_optcopy(struct ip *, struct ip *); +static int ip_pcbopts(int, struct mbuf **, struct mbuf *); +static void imo_trace(struct ip_moptions *, int); +static void ip_mloopback(struct ifnet *, struct ifnet *, struct mbuf *, + struct sockaddr_in *, int); static struct ifaddr *in_selectsrcif(struct ip *, struct route *, unsigned int); -int ip_optcopy(struct ip *, struct ip *); -void in_delayed_cksum_offset(struct mbuf *, int ); -void in_cksum_offset(struct mbuf* , size_t ); - -extern struct protosw inetsw[]; - extern struct ip_linklocal_stat ip_linklocal_stat; -extern lck_mtx_t *ip_mutex; /* temporary: for testing */ #if IPSEC extern int ipsec_bypass; #endif -static int ip_maxchainsent = 0; -SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent, CTLFLAG_RW | CTLFLAG_LOCKED, - &ip_maxchainsent, 0, "use dlil_output_list"); +static int ip_maxchainsent = 0; +SYSCTL_INT(_net_inet_ip, OID_AUTO, maxchainsent, + CTLFLAG_RW | CTLFLAG_LOCKED, &ip_maxchainsent, 0, + "use dlil_output_list"); #if DEBUG static int forge_ce = 0; -SYSCTL_INT(_net_inet_ip, OID_AUTO, forge_ce, CTLFLAG_RW | CTLFLAG_LOCKED, - &forge_ce, 0, "Forge ECN CE"); +SYSCTL_INT(_net_inet_ip, OID_AUTO, forge_ce, + CTLFLAG_RW | CTLFLAG_LOCKED, &forge_ce, 0, + "Forge ECN CE"); #endif /* DEBUG */ static int ip_select_srcif_debug = 0; -SYSCTL_INT(_net_inet_ip, OID_AUTO, select_srcif_debug, CTLFLAG_RW | CTLFLAG_LOCKED, - &ip_select_srcif_debug, 0, "log source interface selection debug info"); +SYSCTL_INT(_net_inet_ip, OID_AUTO, select_srcif_debug, + CTLFLAG_RW | CTLFLAG_LOCKED, &ip_select_srcif_debug, 0, + "log source interface selection debug info"); #define IMO_TRACE_HIST_SIZE 32 /* size of trace history */ @@ -221,102 +219,103 @@ static struct zone *imo_zone; /* zone for ip_moptions */ * The mbuf opt, if present, will not be freed. */ int -ip_output( - struct mbuf *m0, - struct mbuf *opt, - struct route *ro, - int flags, - struct ip_moptions *imo, - struct ip_out_args *ipoa) +ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags, + struct ip_moptions *imo, struct ip_out_args *ipoa) { - int error; - error = ip_output_list(m0, 0, opt, ro, flags, imo, ipoa); - return error; + return (ip_output_list(m0, 0, opt, ro, flags, imo, ipoa)); } /* - * Returns: 0 Success - * ENOMEM - * EADDRNOTAVAIL - * ENETUNREACH - * EHOSTUNREACH - * EACCES - * EMSGSIZE - * ENOBUFS - * ipsec4_getpolicybyaddr:??? [IPSEC 4th argument, contents modified] - * ipsec4_getpolicybysock:??? [IPSEC 4th argument, contents modified] - * key_spdacquire:??? [IPSEC] - * ipsec4_output:??? [IPSEC] - * ip_dn_io_ptr:??? [dummynet] - * dlil_output:??? [DLIL] - * dlil_output_list:??? [DLIL] + * IP output. The packet in mbuf chain m contains a skeletal IP + * header (with len, off, ttl, proto, tos, src, dst). + * The mbuf chain containing the packet will be freed. + * The mbuf opt, if present, will not be freed. + * + * Route ro MUST be non-NULL; if ro->ro_rt is valid, route lookup would be + * skipped and ro->ro_rt would be used. Otherwise the result of route + * lookup is stored in ro->ro_rt. * - * Notes: The ipsec4_getpolicyby{addr|sock} function error returns are - * only used as the error return from this function where one of - * these functions fails to return a policy. + * In the IP forwarding case, the packet will arrive with options already + * inserted, so must have a NULL opt pointer. */ int -ip_output_list( - struct mbuf *m0, - int packetchain, - struct mbuf *opt, - struct route *ro, - int flags, - struct ip_moptions *imo, - struct ip_out_args *ipoa) +ip_output_list(struct mbuf *m0, int packetchain, struct mbuf *opt, + struct route *ro, int flags, struct ip_moptions *imo, + struct ip_out_args *ipoa) { struct ip *ip; - struct ifnet *ifp = NULL; + struct ifnet *ifp = NULL; /* not refcnt'd */ struct mbuf *m = m0, *prevnxt = NULL, **mppn = &prevnxt; int hlen = sizeof (struct ip); int len = 0, error = 0; struct sockaddr_in *dst = NULL; struct in_ifaddr *ia = NULL, *src_ia = NULL; - int isbroadcast, sw_csum; struct in_addr pkt_dst; - struct ipf_pktopts *ippo = NULL, ipf_pktopts; + struct ipf_pktopts *ippo = NULL; + ipfilter_t inject_filter_ref = NULL; + struct mbuf *packetlist; + uint32_t sw_csum, pktcnt = 0, scnt = 0, bytecnt = 0; + unsigned int ifscope = IFSCOPE_NONE; + struct flowadv *adv = NULL; #if IPSEC - struct ipsec_output_state ipsec_state; - struct route *ipsec_saved_route = NULL; struct socket *so = NULL; struct secpolicy *sp = NULL; -#endif -#if IPFIREWALL_FORWARD - int fwd_rewrite_src = 0; -#endif +#endif /* IPSEC */ #if IPFIREWALL - int off; + int ipfwoff; struct sockaddr_in *next_hop_from_ipfwd_tag = NULL; -#endif +#endif /* IPFIREWALL */ #if IPFIREWALL || DUMMYNET - struct ip_fw_args args; - struct m_tag *tag; -#endif - int didfilter = 0; - ipfilter_t inject_filter_ref = 0; + struct m_tag *tag; +#endif /* IPFIREWALL || DUMMYNET */ #if DUMMYNET - struct route saved_route; struct ip_out_args saved_ipoa; struct sockaddr_in dst_buf; #endif /* DUMMYNET */ - struct mbuf * packetlist; - int pktcnt = 0, tso = 0; - u_int32_t bytecnt = 0; - unsigned int ifscope = IFSCOPE_NONE; - unsigned int nocell = 0; - boolean_t select_srcif, srcbound; - struct flowadv *adv = NULL; - - KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0); - + struct { #if IPSEC - bzero(&ipsec_state, sizeof(ipsec_state)); + struct ipsec_output_state ipsec_state; #endif /* IPSEC */ +#if IPFIREWALL || DUMMYNET + struct ip_fw_args args; +#endif /* IPFIREWALL || DUMMYNET */ +#if IPFIREWALL_FORWARD + struct route sro_fwd; +#endif /* IPFIREWALL_FORWARD */ +#if DUMMYNET + struct route saved_route; +#endif /* DUMMYNET */ + struct ipf_pktopts ipf_pktopts; + } ipobz; +#define ipsec_state ipobz.ipsec_state +#define args ipobz.args +#define sro_fwd ipobz.sro_fwd +#define saved_route ipobz.saved_route +#define ipf_pktopts ipobz.ipf_pktopts + union { + struct { + boolean_t select_srcif : 1; /* set once */ + boolean_t srcbound : 1; /* set once */ + boolean_t nocell : 1; /* set once */ + boolean_t isbroadcast : 1; + boolean_t didfilter : 1; +#if IPFIREWALL_FORWARD + boolean_t fwd_rewrite_src : 1; +#endif /* IPFIREWALL_FORWARD */ + }; + uint32_t raw; + } ipobf = { .raw = 0 }; + + KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0); + VERIFY(m0->m_flags & M_PKTHDR); packetlist = m0; -#if IPFIREWALL || DUMMYNET - bzero(&args, sizeof(struct ip_fw_args)); + /* zero out {ipsec_state, args, sro_fwd, saved_route, ipf_pktops} */ + bzero(&ipobz, sizeof (ipobz)); + ippo = &ipf_pktopts; + +#if IPFIREWALL || DUMMYNET if (SLIST_EMPTY(&m0->m_pkthdr.tags)) goto ipfw_tags_done; @@ -334,7 +333,7 @@ ip_output_list( ro = &saved_route; imo = NULL; - bcopy(&dn_tag->dn_dst, &dst_buf, sizeof(dst_buf)); + bcopy(&dn_tag->dn_dst, &dst_buf, sizeof (dst_buf)); dst = &dst_buf; ifp = dn_tag->dn_ifp; flags = dn_tag->dn_flags; @@ -375,24 +374,29 @@ ipfw_tags_done: #endif /* IPFIREWALL || DUMMYNET */ m = m0; + m->m_pkthdr.pkt_flags &= ~(PKTF_LOOP|PKTF_IFAINFO); -#if DIAGNOSTIC - if ( !m || (m->m_flags & M_PKTHDR) != 0) - panic("ip_output no HDR"); - if (!ro) - panic("ip_output no route, proto = %d", - mtod(m, struct ip *)->ip_p); -#endif +#if IPSEC + if (ipsec_bypass == 0 && !(flags & IP_NOIPSEC)) { + /* If packet is bound to an interface, check bound policies */ + if ((flags & IP_OUTARGS) && (ipoa != NULL) && + (ipoa->ipoa_flags & IPOAF_BOUND_IF) && + ipoa->ipoa_boundif != IFSCOPE_NONE) { + if (ipsec4_getpolicybyinterface(m, IPSEC_DIR_OUTBOUND, + &flags, ipoa, &sp) != 0) + goto bad; + } + } +#endif /* IPSEC */ - bzero(&ipf_pktopts, sizeof(struct ipf_pktopts)); - ippo = &ipf_pktopts; + VERIFY(ro != NULL); if (ip_doscopedroute && (flags & IP_OUTARGS)) { /* * In the forwarding case, only the ifscope value is used, * as source interface selection doesn't take place. */ - if ((select_srcif = (!(flags & IP_FORWARDING) && + if ((ipobf.select_srcif = (!(flags & IP_FORWARDING) && (ipoa->ipoa_flags & IPOAF_SELECT_SRCIF)))) { ipf_pktopts.ippo_flags |= IPPOF_SELECT_SRCIF; } @@ -404,22 +408,30 @@ ipfw_tags_done: (IPPOF_BOUND_IF | (ifscope << IPPOF_SHIFT_IFSCOPE)); } - if ((srcbound = (ipoa->ipoa_flags & IPOAF_BOUND_SRCADDR))) + /* double negation needed for bool bit field */ + ipobf.srcbound = !!(ipoa->ipoa_flags & IPOAF_BOUND_SRCADDR); + if (ipobf.srcbound) ipf_pktopts.ippo_flags |= IPPOF_BOUND_SRCADDR; } else { - select_srcif = FALSE; - srcbound = FALSE; + ipobf.select_srcif = FALSE; + ipobf.srcbound = FALSE; ifscope = IFSCOPE_NONE; + if (flags & IP_OUTARGS) { + ipoa->ipoa_boundif = IFSCOPE_NONE; + ipoa->ipoa_flags &= ~(IPOAF_SELECT_SRCIF | + IPOAF_BOUND_IF | IPOAF_BOUND_SRCADDR); + } } if ((flags & IP_OUTARGS) && (ipoa->ipoa_flags & IPOAF_NO_CELLULAR)) { - nocell = 1; + ipobf.nocell = TRUE; ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR; } if (flags & IP_OUTARGS) { adv = &ipoa->ipoa_flowadv; adv->code = FADV_SUCCESS; + ipoa->ipoa_retflags = 0; } #if DUMMYNET @@ -439,35 +451,42 @@ ipfw_tags_done: RT_UNLOCK(ro->ro_rt); } #if IPSEC - if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) { + if (ipsec_bypass == 0 && !(flags & IP_NOIPSEC)) { so = ipsec_getsocket(m); - (void)ipsec_setsocket(m, NULL); + (void) ipsec_setsocket(m, NULL); } #endif /* IPSEC */ -#if IPFIREWALL +#if IPFIREWALL if (args.fwa_ipfw_rule != NULL) goto skip_ipsec; -#endif /* #if IPFIREWALL */ +#endif /* IPFIREWALL */ if (args.fwa_pf_rule != NULL) goto sendit; } #endif /* DUMMYNET */ #if IPSEC - if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) { + if (ipsec_bypass == 0 && !(flags & IP_NOIPSEC)) { so = ipsec_getsocket(m); - (void)ipsec_setsocket(m, NULL); + (void) ipsec_setsocket(m, NULL); } -#endif -loopit: +#endif /* IPSEC */ + +loopit: + ipobf.isbroadcast = FALSE; + ipobf.didfilter = FALSE; +#if IPFIREWALL_FORWARD + ipobf.fwd_rewrite_src = FALSE; +#endif /* IPFIREWALL_FORWARD */ + + VERIFY(m->m_flags & M_PKTHDR); /* - * No need to proccess packet twice if we've - * already seen it + * No need to proccess packet twice if we've already seen it. */ if (!SLIST_EMPTY(&m->m_pkthdr.tags)) inject_filter_ref = ipf_get_inject_filter(m); else - inject_filter_ref = 0; + inject_filter_ref = NULL; if (opt) { m = ip_insertoptions(m, opt, &len); @@ -480,20 +499,22 @@ loopit: } } ip = mtod(m, struct ip *); + #if IPFIREWALL /* * rdar://8542331 * - * When dealing with a packet chain, we need to reset "next_hop" because - * "dst" may have been changed to the gateway address below for the previous - * packet of the chain. This could cause the route to be inavertandly changed - * to the route to the gateway address (instead of the route to the destination). + * When dealing with a packet chain, we need to reset "next_hop" + * because "dst" may have been changed to the gateway address below + * for the previous packet of the chain. This could cause the route + * to be inavertandly changed to the route to the gateway address + * (instead of the route to the destination). */ args.fwa_next_hop = next_hop_from_ipfwd_tag; pkt_dst = args.fwa_next_hop ? args.fwa_next_hop->sin_addr : ip->ip_dst; -#else +#else /* !IPFIREWALL */ pkt_dst = ip->ip_dst; -#endif +#endif /* !IPFIREWALL */ /* * We must not send if the packet is destined to network zero. @@ -507,14 +528,10 @@ loopit: /* * Fill in IP header. */ - if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { + if (!(flags & (IP_FORWARDING|IP_RAWOUTPUT))) { ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2); ip->ip_off &= IP_DF; -#if RANDOM_IP_ID ip->ip_id = ip_randomid(); -#else - ip->ip_id = htons(ip_id++); -#endif OSAddAtomic(1, &ipstat.ips_localout); } else { hlen = IP_VHL_HL(ip->ip_vhl) << 2; @@ -523,17 +540,17 @@ loopit: #if DEBUG /* For debugging, we let the stack forge congestion */ if (forge_ce != 0 && - ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT1 || - (ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT0)) { + ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT1 || + (ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_ECT0)) { ip->ip_tos = (ip->ip_tos & ~IPTOS_ECN_MASK) | IPTOS_ECN_CE; forge_ce--; } #endif /* DEBUG */ - KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr, - ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len); + KERNEL_DEBUG(DBG_LAYER_BEG, ip->ip_dst.s_addr, ip->ip_src.s_addr, + ip->ip_p, ip->ip_off, ip->ip_len); - dst = (struct sockaddr_in *)(void *)&ro->ro_dst; + dst = SIN(&ro->ro_dst); /* * If there is a cached route, @@ -544,15 +561,15 @@ loopit: */ if (ro->ro_rt != NULL) { - if (ro->ro_rt->generation_id != route_generation && - ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0) && - (ip->ip_src.s_addr != INADDR_ANY)) { + if (ROUTE_UNUSABLE(ro) && ip->ip_src.s_addr != INADDR_ANY && + !(flags & (IP_ROUTETOIF | IP_FORWARDING))) { src_ia = ifa_foraddr(ip->ip_src.s_addr); if (src_ia == NULL) { error = EADDRNOTAVAIL; goto bad; } IFA_REMREF(&src_ia->ia_ifa); + src_ia = NULL; } /* * Test rt_flags without holding rt_lock for performance @@ -560,25 +577,23 @@ loopit: * caught by the layer below (since it uses this route * as a hint) or during the next transmit. */ - if ((ro->ro_rt->rt_flags & RTF_UP) == 0 || - dst->sin_family != AF_INET || - dst->sin_addr.s_addr != pkt_dst.s_addr) { - rtfree(ro->ro_rt); - ro->ro_rt = NULL; - } + if (ROUTE_UNUSABLE(ro) || dst->sin_family != AF_INET || + dst->sin_addr.s_addr != pkt_dst.s_addr) + ROUTE_RELEASE(ro); + /* * If we're doing source interface selection, we may not * want to use this route; only synch up the generation * count otherwise. */ - if (!select_srcif && ro->ro_rt != NULL && - ro->ro_rt->generation_id != route_generation) - ro->ro_rt->generation_id = route_generation; + if (!ipobf.select_srcif && ro->ro_rt != NULL && + RT_GENID_OUTOFSYNC(ro->ro_rt)) + RT_GENID_SYNC(ro->ro_rt); } if (ro->ro_rt == NULL) { - bzero(dst, sizeof(*dst)); + bzero(dst, sizeof (*dst)); dst->sin_family = AF_INET; - dst->sin_len = sizeof(*dst); + dst->sin_len = sizeof (*dst); dst->sin_addr = pkt_dst; } /* @@ -586,10 +601,11 @@ loopit: * short circuit routing lookup. */ if (flags & IP_ROUTETOIF) { - if (ia) + if (ia != NULL) IFA_REMREF(&ia->ia_ifa); - if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0) { - if ((ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { + if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL) { + ia = ifatoia(ifa_ifwithnet(sintosa(dst))); + if (ia == NULL) { OSAddAtomic(1, &ipstat.ips_noroute); error = ENETUNREACH; goto bad; @@ -597,20 +613,31 @@ loopit: } ifp = ia->ia_ifp; ip->ip_ttl = 1; - isbroadcast = in_broadcast(dst->sin_addr, ifp); + ipobf.isbroadcast = in_broadcast(dst->sin_addr, ifp); + /* + * For consistency with other cases below. Loopback + * multicast case is handled separately by ip_mloopback(). + */ + if ((ifp->if_flags & IFF_LOOPBACK) && + !IN_MULTICAST(ntohl(pkt_dst.s_addr))) { + m->m_pkthdr.rcvif = ifp; + ip_setsrcifaddr_info(m, ifp->if_index, NULL); + ip_setdstifaddr_info(m, ifp->if_index, NULL); + } } else if (IN_MULTICAST(ntohl(pkt_dst.s_addr)) && imo != NULL && (ifp = imo->imo_multicast_ifp) != NULL) { /* * Bypass the normal routing lookup for multicast * packets if the interface is specified. */ - isbroadcast = 0; + ipobf.isbroadcast = FALSE; if (ia != NULL) IFA_REMREF(&ia->ia_ifa); /* Macro takes reference on ia */ IFP_TO_IA(ifp, ia); } else { + struct ifaddr *ia0 = NULL; boolean_t cloneok = FALSE; /* * Perform source interface selection; the source IP address @@ -620,24 +647,24 @@ loopit: * or if we haven't done source interface selection on this * route (for this PCB instance) before. */ - if (select_srcif && ip->ip_src.s_addr != INADDR_ANY && - (ro->ro_rt == NULL || !(ro->ro_rt->rt_flags & RTF_UP) || - ro->ro_rt->generation_id != route_generation || + if (ipobf.select_srcif && + ip->ip_src.s_addr != INADDR_ANY && (ROUTE_UNUSABLE(ro) || !(ro->ro_flags & ROF_SRCIF_SELECTED))) { - struct ifaddr *ifa; - /* Find the source interface */ - ifa = in_selectsrcif(ip, ro, ifscope); + ia0 = in_selectsrcif(ip, ro, ifscope); /* * If the source address belongs to a cellular interface * and the caller forbids our using interfaces of such - * type, pretend that there is no source address. + * type, pretend that there is no route. */ - if (nocell && ifa != NULL && - ifa->ifa_ifp->if_type == IFT_CELLULAR) { - IFA_REMREF(ifa); - error = EADDRNOTAVAIL; + if (ipobf.nocell && ia0 != NULL && + IFNET_IS_CELLULAR(ia0->ifa_ifp)) { + IFA_REMREF(ia0); + ia0 = NULL; + error = EHOSTUNREACH; + if (flags & IP_OUTARGS) + ipoa->ipoa_retflags |= IPOARF_IFDENIED; goto bad; } @@ -649,8 +676,8 @@ loopit: * there's no interface having such an address, * so bail out. */ - if (ifa == NULL && (!(flags & IP_RAWOUTPUT) || - srcbound) && ifscope != lo_ifp->if_index) { + if (ia0 == NULL && (!(flags & IP_RAWOUTPUT) || + ipobf.srcbound) && ifscope != lo_ifp->if_index) { error = EADDRNOTAVAIL; goto bad; } @@ -669,10 +696,9 @@ loopit: * gateway points to that of the default gateway on * the primary interface of the system. */ - if (ifa != NULL) { + if (ia0 != NULL) { if (ifscope == IFSCOPE_NONE) - ifscope = ifa->ifa_ifp->if_index; - IFA_REMREF(ifa); + ifscope = ia0->ifa_ifp->if_index; cloneok = (!(flags & IP_RAWOUTPUT) && !(IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)))); } @@ -729,13 +755,15 @@ loopit: * caller forbids our using interfaces of such type, * pretend that there is no route. */ - if (nocell && ro->ro_rt != NULL) { + if (ipobf.nocell && ro->ro_rt != NULL) { RT_LOCK_SPIN(ro->ro_rt); - if (ro->ro_rt->rt_ifp->if_type == - IFT_CELLULAR) { + if (IFNET_IS_CELLULAR(ro->ro_rt->rt_ifp)) { RT_UNLOCK(ro->ro_rt); - rtfree(ro->ro_rt); - ro->ro_rt = NULL; + ROUTE_RELEASE(ro); + if (flags & IP_OUTARGS) { + ipoa->ipoa_retflags |= + IPOARF_IFDENIED; + } } else { RT_UNLOCK(ro->ro_rt); } @@ -745,35 +773,75 @@ loopit: if (ro->ro_rt == NULL) { OSAddAtomic(1, &ipstat.ips_noroute); error = EHOSTUNREACH; + if (ia0 != NULL) { + IFA_REMREF(ia0); + ia0 = NULL; + } goto bad; } - if (ia) + if (ia != NULL) IFA_REMREF(&ia->ia_ifa); RT_LOCK_SPIN(ro->ro_rt); ia = ifatoia(ro->ro_rt->rt_ifa); - if (ia) { + if (ia != NULL) { /* Become a regular mutex */ RT_CONVERT_LOCK(ro->ro_rt); IFA_ADDREF(&ia->ia_ifa); } + /* + * Note: ia_ifp may not be the same as rt_ifp; the latter + * is what we use for determining outbound i/f, mtu, etc. + */ ifp = ro->ro_rt->rt_ifp; ro->ro_rt->rt_use++; if (ro->ro_rt->rt_flags & RTF_GATEWAY) { - dst = (struct sockaddr_in *)(void *) - ro->ro_rt->rt_gateway; + dst = SIN(ro->ro_rt->rt_gateway); } if (ro->ro_rt->rt_flags & RTF_HOST) { - isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); + /* double negation needed for bool bit field */ + ipobf.isbroadcast = + !!(ro->ro_rt->rt_flags & RTF_BROADCAST); } else { /* Become a regular mutex */ RT_CONVERT_LOCK(ro->ro_rt); - isbroadcast = in_broadcast(dst->sin_addr, ifp); + ipobf.isbroadcast = in_broadcast(dst->sin_addr, ifp); + } + /* + * For consistency with IPv6, as well as to ensure that + * IP_RECVIF is set correctly for packets that are sent + * to one of the local addresses. ia (rt_ifa) would have + * been fixed up by rt_setif for local routes. This + * would make it appear as if the packet arrives on the + * interface which owns the local address. Loopback + * multicast case is handled separately by ip_mloopback(). + */ + if (ia != NULL && (ifp->if_flags & IFF_LOOPBACK) && + !IN_MULTICAST(ntohl(pkt_dst.s_addr))) { + uint32_t srcidx; + + m->m_pkthdr.rcvif = ia->ia_ifa.ifa_ifp; + + if (ia0 != NULL) + srcidx = ia0->ifa_ifp->if_index; + else if ((ro->ro_flags & ROF_SRCIF_SELECTED) && + ro->ro_srcia != NULL) + srcidx = ro->ro_srcia->ifa_ifp->if_index; + else + srcidx = 0; + + ip_setsrcifaddr_info(m, srcidx, NULL); + ip_setdstifaddr_info(m, 0, ia); } RT_UNLOCK(ro->ro_rt); + if (ia0 != NULL) { + IFA_REMREF(ia0); + ia0 = NULL; + } } if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) { + struct ifnet *srcifp = NULL; struct in_multi *inm; u_int32_t vif; u_int8_t ttl = IP_DEFAULT_MULTICAST_TTL; @@ -785,7 +853,7 @@ loopit: * still points to the address in "ro". (It may have been * changed to point to a gateway address, above.) */ - dst = (struct sockaddr_in *)(void *)&ro->ro_dst; + dst = SIN(&ro->ro_dst); /* * See if the caller provided any multicast options */ @@ -794,17 +862,17 @@ loopit: vif = imo->imo_multicast_vif; ttl = imo->imo_multicast_ttl; loop = imo->imo_multicast_loop; - if ((flags & IP_RAWOUTPUT) == 0) + if (!(flags & IP_RAWOUTPUT)) ip->ip_ttl = ttl; if (imo->imo_multicast_ifp != NULL) ifp = imo->imo_multicast_ifp; IMO_UNLOCK(imo); #if MROUTING - if (vif != -1 && ((flags & IP_RAWOUTPUT) == 0 || + if (vif != -1 && (!(flags & IP_RAWOUTPUT) || ip->ip_src.s_addr == INADDR_ANY)) ip->ip_src.s_addr = ip_mcast_src(vif); #endif /* MROUTING */ - } else if ((flags & IP_RAWOUTPUT) == 0) { + } else if (!(flags & IP_RAWOUTPUT)) { vif = -1; ip->ip_ttl = ttl; } @@ -812,7 +880,7 @@ loopit: * Confirm that the outgoing interface supports multicast. */ if (imo == NULL || vif == -1) { - if ((ifp->if_flags & IFF_MULTICAST) == 0) { + if (!(ifp->if_flags & IFF_MULTICAST)) { OSAddAtomic(1, &ipstat.ips_noroute); error = ENETUNREACH; goto bad; @@ -829,6 +897,7 @@ loopit: IFA_LOCK_SPIN(&ia1->ia_ifa); if (ia1->ia_ifp == ifp) { ip->ip_src = IA_SIN(ia1)->sin_addr; + srcifp = ifp; IFA_UNLOCK(&ia1->ia_ifa); break; } @@ -852,10 +921,11 @@ loopit: */ if (!TAILQ_EMPTY(&ipv4_filters)) { struct ipfilter *filter; - int seen = (inject_filter_ref == 0); + int seen = (inject_filter_ref == NULL); if (imo != NULL) { - ipf_pktopts.ippo_flags |= IPPOF_MCAST_OPTS; + ipf_pktopts.ippo_flags |= + IPPOF_MCAST_OPTS; ipf_pktopts.ippo_mcast_ifnet = ifp; ipf_pktopts.ippo_mcast_ttl = ttl; ipf_pktopts.ippo_mcast_loop = loop; @@ -863,20 +933,26 @@ loopit: ipf_ref(); - /* 4135317 - always pass network byte order to filter */ - + /* + * 4135317 - always pass network byte + * order to filter + */ #if BYTE_ORDER != BIG_ENDIAN HTONS(ip->ip_len); HTONS(ip->ip_off); #endif - TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) { if (seen == 0) { - if ((struct ipfilter *)inject_filter_ref == filter) + if ((struct ipfilter *) + inject_filter_ref == filter) seen = 1; - } else if (filter->ipf_filter.ipf_output) { + } else if (filter->ipf_filter. + ipf_output != NULL) { errno_t result; - result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo); + result = filter->ipf_filter. + ipf_output(filter-> + ipf_filter.cookie, + (mbuf_t *)&m, ippo); if (result == EJUSTRETURN) { ipf_unref(); INM_REMREF(inm); @@ -892,16 +968,14 @@ loopit: /* set back to host byte order */ ip = mtod(m, struct ip *); - #if BYTE_ORDER != BIG_ENDIAN NTOHS(ip->ip_len); NTOHS(ip->ip_off); #endif - ipf_unref(); - didfilter = 1; + ipobf.didfilter = TRUE; } - ip_mloopback(ifp, m, dst, hlen); + ip_mloopback(srcifp, ifp, m, dst, hlen); } #if MROUTING else { @@ -917,12 +991,12 @@ loopit: * above, will be forwarded by the ip_input() routine, * if necessary. */ - if (ip_mrouter && (flags & IP_FORWARDING) == 0) { + if (ip_mrouter && !(flags & IP_FORWARDING)) { /* - * Check if rsvp daemon is running. If not, don't - * set ip_moptions. This ensures that the packet - * is multicast and not just sent down one link - * as prescribed by rsvpd. + * Check if rsvp daemon is running. If not, + * don't set ip_moptions. This ensures that + * the packet is multicast and not just sent + * down one link as prescribed by rsvpd. */ if (!rsvp_on) imo = NULL; @@ -962,12 +1036,13 @@ loopit: ip->ip_src = IA_SIN(ia)->sin_addr; IFA_UNLOCK(&ia->ia_ifa); #if IPFIREWALL_FORWARD - /* Keep note that we did this - if the firewall changes + /* + * Keep note that we did this - if the firewall changes * the next-hop, our interface may change, changing the * default source IP. It's a shame so much effort happens - * twice. Oh well. + * twice. Oh well. */ - fwd_rewrite_src++; + ipobf.fwd_rewrite_src = TRUE; #endif /* IPFIREWALL_FORWARD */ } @@ -976,12 +1051,12 @@ loopit: * and verify user is allowed to send * such a packet. */ - if (isbroadcast) { - if ((ifp->if_flags & IFF_BROADCAST) == 0) { + if (ipobf.isbroadcast) { + if (!(ifp->if_flags & IFF_BROADCAST)) { error = EADDRNOTAVAIL; goto bad; } - if ((flags & IP_ALLOWBROADCAST) == 0) { + if (!(flags & IP_ALLOWBROADCAST)) { error = EACCES; goto bad; } @@ -1001,7 +1076,7 @@ sendit: if (PF_IS_ENABLED) { int rc; - m0 = m; /* Save for later */ + m0 = m; /* Save for later */ #if DUMMYNET args.fwa_m = m; args.fwa_next_hop = dst; @@ -1040,23 +1115,25 @@ sendit: hlen = IP_VHL_HL(ip->ip_vhl) << 2; } #endif /* PF */ - /* - * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt - */ - if (IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) || IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) { + /* + * Force IP TTL to 255 following draft-ietf-zeroconf-ipv4-linklocal.txt + */ + if (IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) || + IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) { ip_linklocal_stat.iplls_out_total++; if (ip->ip_ttl != MAXTTL) { ip_linklocal_stat.iplls_out_badttl++; ip->ip_ttl = MAXTTL; } - } + } - if (!didfilter && !TAILQ_EMPTY(&ipv4_filters)) { + if (!ipobf.didfilter && !TAILQ_EMPTY(&ipv4_filters)) { struct ipfilter *filter; - int seen = (inject_filter_ref == 0); + int seen = (inject_filter_ref == NULL); ipf_pktopts.ippo_flags &= ~IPPOF_MCAST_OPTS; - /* Check that a TSO frame isn't passed to a filter. + /* + * Check that a TSO frame isn't passed to a filter. * This could happen if a filter is inserted while * TCP is sending the TSO packet. */ @@ -1068,19 +1145,20 @@ sendit: ipf_ref(); /* 4135317 - always pass network byte order to filter */ - #if BYTE_ORDER != BIG_ENDIAN HTONS(ip->ip_len); HTONS(ip->ip_off); #endif - TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) { if (seen == 0) { - if ((struct ipfilter *)inject_filter_ref == filter) + if ((struct ipfilter *)inject_filter_ref == + filter) seen = 1; } else if (filter->ipf_filter.ipf_output) { errno_t result; - result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo); + result = filter->ipf_filter. + ipf_output(filter->ipf_filter.cookie, + (mbuf_t *)&m, ippo); if (result == EJUSTRETURN) { ipf_unref(); goto done; @@ -1091,37 +1169,39 @@ sendit: } } } - /* set back to host byte order */ ip = mtod(m, struct ip *); - #if BYTE_ORDER != BIG_ENDIAN NTOHS(ip->ip_len); NTOHS(ip->ip_off); #endif - ipf_unref(); } #if IPSEC /* temporary for testing only: bypass ipsec alltogether */ - if (ipsec_bypass != 0 || (flags & IP_NOIPSEC) != 0) + if (ipsec_bypass != 0 || (flags & IP_NOIPSEC)) goto skip_ipsec; - KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_START, 0,0,0,0,0); - - - /* get SP for this packet */ - if (so == NULL) - sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error); - else - sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error); + KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_START, 0, 0, 0, 0, 0); + /* May have been set above if packet was bound */ if (sp == NULL) { - IPSEC_STAT_INCREMENT(ipsecstat.out_inval); - KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 0,0,0,0,0); - goto bad; + /* get SP for this packet */ + if (so == NULL) + sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, + flags, &error); + else + sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, + so, &error); + + if (sp == NULL) { + IPSEC_STAT_INCREMENT(ipsecstat.out_inval); + KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, + 0, 0, 0, 0, 0); + goto bad; + } } error = 0; @@ -1134,20 +1214,32 @@ sendit: * This packet is just discarded. */ IPSEC_STAT_INCREMENT(ipsecstat.out_polvio); - KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 1,0,0,0,0); + KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, + 1, 0, 0, 0, 0); goto bad; case IPSEC_POLICY_BYPASS: case IPSEC_POLICY_NONE: /* no need to do IPsec. */ - KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 2,0,0,0,0); + KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, + 2, 0, 0, 0, 0); goto skip_ipsec; case IPSEC_POLICY_IPSEC: if (sp->req == NULL) { /* acquire a policy */ error = key_spdacquire(sp); - KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 3,0,0,0,0); + KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, + 3, 0, 0, 0, 0); + goto bad; + } + if (sp->ipsec_if) { + /* Verify the redirect to ipsec interface */ + if (sp->ipsec_if == ifp) { + /* Set policy for mbuf */ + m->m_pkthdr.ipsec_policy = sp->id; + goto skip_ipsec; + } goto bad; } break; @@ -1156,13 +1248,14 @@ sendit: default: printf("ip_output: Invalid policy found. %d\n", sp->policy); } - { + { ipsec_state.m = m; if (flags & IP_ROUTETOIF) { - bzero(&ipsec_state.ro, sizeof(ipsec_state.ro)); - } else - route_copyout(&ipsec_state.ro, ro, sizeof(ipsec_state.ro)); - ipsec_state.dst = (struct sockaddr *)dst; + bzero(&ipsec_state.ro, sizeof (ipsec_state.ro)); + } else { + route_copyout(&ipsec_state.ro, ro, sizeof (ipsec_state.ro)); + } + ipsec_state.dst = SA(dst); ip->ip_sum = 0; @@ -1170,11 +1263,8 @@ sendit: * XXX * delayed checksums are not currently compatible with IPsec */ - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) in_delayed_cksum(m); - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - } - #if BYTE_ORDER != BIG_ENDIAN HTONS(ip->ip_len); @@ -1182,13 +1272,23 @@ sendit: #endif DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL, - struct ip *, ip, struct ifnet *, ifp, - struct ip *, ip, struct ip6_hdr *, NULL); + struct ip *, ip, struct ifnet *, ifp, + struct ip *, ip, struct ip6_hdr *, NULL); error = ipsec4_output(&ipsec_state, sp, flags); m0 = m = ipsec_state.m; +#if DUMMYNET + /* + * If we're about to use the route in ipsec_state + * and this came from dummynet, cleaup now. + */ + if (ro == &saved_route && + (!(flags & IP_ROUTETOIF) || ipsec_state.tunneled)) + ROUTE_RELEASE(ro); +#endif /* DUMMYNET */ + if (flags & IP_ROUTETOIF) { /* * if we have tunnel mode SA, we may need to ignore @@ -1196,14 +1296,12 @@ sendit: */ if (ipsec_state.tunneled) { flags &= ~IP_ROUTETOIF; - ipsec_saved_route = ro; ro = &ipsec_state.ro; } } else { - ipsec_saved_route = ro; ro = &ipsec_state.ro; } - dst = (struct sockaddr_in *)(void *)ipsec_state.dst; + dst = SIN(ipsec_state.dst); if (error) { /* mbuf is already reclaimed in ipsec4_output. */ m0 = NULL; @@ -1216,55 +1314,59 @@ sendit: break; default: printf("ip4_output (ipsec): error code %d\n", error); - /*fall through*/ + /* FALLTHRU */ case ENOENT: /* don't show these error codes to the user */ error = 0; break; } - KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 4,0,0,0,0); + KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, + 4, 0, 0, 0, 0); goto bad; } - } + } /* be sure to update variables that are affected by ipsec4_output() */ ip = mtod(m, struct ip *); #ifdef _IP_VHL hlen = IP_VHL_HL(ip->ip_vhl) << 2; -#else +#else /* !_IP_VHL */ hlen = ip->ip_hl << 2; -#endif +#endif /* !_IP_VHL */ /* Check that there wasn't a route change and src is still valid */ - if (ro->ro_rt != NULL && ro->ro_rt->generation_id != route_generation) { - if ((src_ia = ifa_foraddr(ip->ip_src.s_addr)) == NULL && - ((flags & (IP_ROUTETOIF | IP_FORWARDING)) == 0)) { + if (ROUTE_UNUSABLE(ro)) { + ROUTE_RELEASE(ro); + VERIFY(src_ia == NULL); + if (ip->ip_src.s_addr != INADDR_ANY && + !(flags & (IP_ROUTETOIF | IP_FORWARDING)) && + (src_ia = ifa_foraddr(ip->ip_src.s_addr)) == NULL) { error = EADDRNOTAVAIL; KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, - 5,0,0,0,0); + 5, 0, 0, 0, 0); goto bad; } - rtfree(ro->ro_rt); - ro->ro_rt = NULL; - if (src_ia != NULL) + if (src_ia != NULL) { IFA_REMREF(&src_ia->ia_ifa); + src_ia = NULL; + } } if (ro->ro_rt == NULL) { - if ((flags & IP_ROUTETOIF) == 0) { - printf("ip_output: can't update route after " - "IPsec processing\n"); - error = EHOSTUNREACH; /*XXX*/ + if (!(flags & IP_ROUTETOIF)) { + printf("%s: can't update route after " + "IPsec processing\n", __func__); + error = EHOSTUNREACH; /* XXX */ KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, - 6,0,0,0,0); + 6, 0, 0, 0, 0); goto bad; } } else { - if (ia) + if (ia != NULL) IFA_REMREF(&ia->ia_ifa); RT_LOCK_SPIN(ro->ro_rt); ia = ifatoia(ro->ro_rt->rt_ifa); - if (ia) { + if (ia != NULL) { /* Become a regular mutex */ RT_CONVERT_LOCK(ro->ro_rt); IFA_ADDREF(&ia->ia_ifa); @@ -1274,13 +1376,12 @@ sendit: } /* make it flipped, again. */ - #if BYTE_ORDER != BIG_ENDIAN NTOHS(ip->ip_len); NTOHS(ip->ip_off); #endif - - KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, 7,0xff,0xff,0xff,0xff); + KERNEL_DEBUG(DBG_FNC_IPSEC4_OUTPUT | DBG_FUNC_END, + 7, 0xff, 0xff, 0xff, 0xff); /* Pass to filters again */ if (!TAILQ_EMPTY(&ipv4_filters)) { @@ -1288,7 +1389,8 @@ sendit: ipf_pktopts.ippo_flags &= ~IPPOF_MCAST_OPTS; - /* Check that a TSO frame isn't passed to a filter. + /* + * Check that a TSO frame isn't passed to a filter. * This could happen if a filter is inserted while * TCP is sending the TSO packet. */ @@ -1300,16 +1402,16 @@ sendit: ipf_ref(); /* 4135317 - always pass network byte order to filter */ - #if BYTE_ORDER != BIG_ENDIAN HTONS(ip->ip_len); HTONS(ip->ip_off); #endif - TAILQ_FOREACH(filter, &ipv4_filters, ipf_link) { if (filter->ipf_filter.ipf_output) { errno_t result; - result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo); + result = filter->ipf_filter. + ipf_output(filter->ipf_filter.cookie, + (mbuf_t *)&m, ippo); if (result == EJUSTRETURN) { ipf_unref(); goto done; @@ -1320,19 +1422,16 @@ sendit: } } } - /* set back to host byte order */ ip = mtod(m, struct ip *); - #if BYTE_ORDER != BIG_ENDIAN NTOHS(ip->ip_len); NTOHS(ip->ip_off); #endif - ipf_unref(); } skip_ipsec: -#endif /*IPSEC*/ +#endif /* IPSEC */ #if IPFIREWALL /* @@ -1345,38 +1444,38 @@ skip_ipsec: args.fwa_m = m; args.fwa_next_hop = dst; args.fwa_oif = ifp; - off = ip_fw_chk_ptr(&args); + ipfwoff = ip_fw_chk_ptr(&args); m = args.fwa_m; dst = args.fwa_next_hop; - /* - * On return we must do the following: - * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new) - * 1<=off<= 0xffff -> DIVERT - * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe - * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet - * dst != old -> IPFIREWALL_FORWARD - * off==0, dst==old -> accept - * If some of the above modules is not compiled in, then - * we should't have to check the corresponding condition - * (because the ipfw control socket should not accept - * unsupported rules), but better play safe and drop - * packets in case of doubt. - */ + /* + * On return we must do the following: + * IP_FW_PORT_DENY_FLAG -> drop the pkt (XXX new) + * 1<=off<= 0xffff -> DIVERT + * (off & IP_FW_PORT_DYNT_FLAG) -> send to a DUMMYNET pipe + * (off & IP_FW_PORT_TEE_FLAG) -> TEE the packet + * dst != old -> IPFIREWALL_FORWARD + * off==0, dst==old -> accept + * If some of the above modules is not compiled in, then + * we should't have to check the corresponding condition + * (because the ipfw control socket should not accept + * unsupported rules), but better play safe and drop + * packets in case of doubt. + */ m0 = m; - if ( (off & IP_FW_PORT_DENY_FLAG) || m == NULL) { + if ((ipfwoff & IP_FW_PORT_DENY_FLAG) || m == NULL) { if (m) m_freem(m); - error = EACCES ; - goto done ; + error = EACCES; + goto done; } ip = mtod(m, struct ip *); - if (off == 0 && dst == old) {/* common case */ - goto pass ; + if (ipfwoff == 0 && dst == old) { /* common case */ + goto pass; } #if DUMMYNET - if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) { + if (DUMMYNET_LOADED && (ipfwoff & IP_FW_PORT_DYNT_FLAG) != 0) { /* * pass the pkt to dummynet. Need to include * pipe number, m, ifp, ro, dst because these are @@ -1392,27 +1491,25 @@ skip_ipsec: if (flags & IP_OUTARGS) args.fwa_ipoa = ipoa; - error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT, + error = ip_dn_io_ptr(m, ipfwoff & 0xffff, DN_TO_IP_OUT, &args, DN_CLIENT_IPFW); goto done; } #endif /* DUMMYNET */ #if IPDIVERT - if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) { + if (ipfwoff != 0 && (ipfwoff & IP_FW_PORT_DYNT_FLAG) == 0) { struct mbuf *clone = NULL; /* Clone packet if we're doing a 'tee' */ - if ((off & IP_FW_PORT_TEE_FLAG) != 0) + if ((ipfwoff & IP_FW_PORT_TEE_FLAG) != 0) clone = m_dup(m, M_DONTWAIT); /* * XXX * delayed checksums are not currently compatible * with divert sockets. */ - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) in_delayed_cksum(m); - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - } /* Restore packet header fields to original values */ @@ -1422,7 +1519,8 @@ skip_ipsec: #endif /* Deliver packet to divert input routine */ - divert_packet(m, 0, off & 0xffff, args.fwa_divert_rule); + divert_packet(m, 0, ipfwoff & 0xffff, + args.fwa_divert_rule); /* If 'tee', continue with original packet */ if (clone != NULL) { @@ -1432,11 +1530,11 @@ skip_ipsec: } goto done; } -#endif - +#endif /* IPDIVERT */ #if IPFIREWALL_FORWARD - /* Here we check dst to make sure it's directly reachable on the - * interface we previously thought it was. + /* + * Here we check dst to make sure it's directly reachable on + * the interface we previously thought it was. * If it isn't (which may be likely in some situations) we have * to re-route it (ie, find a route for the next-hop and the * associated interface) and set them here. This is nested @@ -1444,27 +1542,25 @@ skip_ipsec: * such control is nigh impossible. So we do it here. * And I'm babbling. */ - if (off == 0 && old != dst) { + if (ipfwoff == 0 && old != dst) { struct in_ifaddr *ia_fw; + struct route *ro_fwd = &sro_fwd; - /* It's changed... */ - /* There must be a better way to do this next line... */ - static struct route sro_fwd, *ro_fwd = &sro_fwd; #if IPFIREWALL_FORWARD_DEBUG printf("IPFIREWALL_FORWARD: New dst ip: "); print_ip(dst->sin_addr); printf("\n"); -#endif +#endif /* IPFIREWALL_FORWARD_DEBUG */ /* * We need to figure out if we have been forwarded - * to a local socket. If so then we should somehow + * to a local socket. If so then we should somehow * "loop back" to ip_input, and get directed to the * PCB as if we had received this packet. This is * because it may be dificult to identify the packets * you want to forward until they are being output * and have selected an interface. (e.g. locally * initiated packets) If we used the loopback inteface, - * we would not be able to control what happens + * we would not be able to control what happens * as the packet runs through ip_input() as * it is done through a ISR. */ @@ -1486,7 +1582,7 @@ skip_ipsec: lck_rw_done(in_ifaddr_rwlock); if (ia_fw) { /* tell ip_input "dont filter" */ - struct m_tag *fwd_tag; + struct m_tag *fwd_tag; struct ip_fwd_tag *ipfwd_tag; fwd_tag = m_tag_create(KERNEL_MODULE_TAG_ID, @@ -1504,49 +1600,35 @@ skip_ipsec: if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = lo_ifp; - if ((~IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) & - m->m_pkthdr.csum_flags) == 0) { - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - m->m_pkthdr.csum_flags |= - CSUM_DATA_VALID | CSUM_PSEUDO_HDR; - m->m_pkthdr.csum_data = 0xffff; - } - m->m_pkthdr.csum_flags |= - CSUM_IP_CHECKED | CSUM_IP_VALID; - } - else if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - in_delayed_cksum(m); - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - ip->ip_sum = in_cksum(m, hlen); - } #if BYTE_ORDER != BIG_ENDIAN HTONS(ip->ip_len); HTONS(ip->ip_off); #endif + mbuf_outbound_finalize(m, PF_INET, 0); - /* we need to call dlil_output to run filters - * and resync to avoid recursion loops. + /* + * we need to call dlil_output to run filters + * and resync to avoid recursion loops. */ if (lo_ifp) { - dlil_output(lo_ifp, PF_INET, m, 0, - (struct sockaddr *)dst, 0, adv); - } - else { - printf("ip_output: no loopback ifp for forwarding!!!\n"); + dlil_output(lo_ifp, PF_INET, m, NULL, + SA(dst), 0, adv); + } else { + printf("%s: no loopback ifp for " + "forwarding!!!\n", __func__); } goto done; } - /* Some of the logic for this was - * nicked from above. + /* + * Some of the logic for this was nicked from above. * * This rewrites the cached route in a local PCB. * Is this what we want to do? */ - bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst)); + ROUTE_RELEASE(ro_fwd); + bcopy(dst, &ro_fwd->ro_dst, sizeof (*dst)); - ro_fwd->ro_rt = NULL; rtalloc_ign(ro_fwd, RTF_PRCLONING); if (ro_fwd->ro_rt == NULL) { @@ -1565,19 +1647,22 @@ skip_ipsec: ifp = ro_fwd->ro_rt->rt_ifp; ro_fwd->ro_rt->rt_use++; if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY) - dst = (struct sockaddr_in *)(void *)ro_fwd->ro_rt->rt_gateway; + dst = SIN(ro_fwd->ro_rt->rt_gateway); if (ro_fwd->ro_rt->rt_flags & RTF_HOST) { - isbroadcast = - (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST); + /* double negation needed for bool bit field */ + ipobf.isbroadcast = + !!(ro_fwd->ro_rt->rt_flags & RTF_BROADCAST); } else { /* Become a regular mutex */ RT_CONVERT_LOCK(ro_fwd->ro_rt); - isbroadcast = in_broadcast(dst->sin_addr, ifp); + ipobf.isbroadcast = + in_broadcast(dst->sin_addr, ifp); } RT_UNLOCK(ro_fwd->ro_rt); - rtfree(ro->ro_rt); + ROUTE_RELEASE(ro); ro->ro_rt = ro_fwd->ro_rt; - dst = (struct sockaddr_in *)(void *)&ro_fwd->ro_dst; + ro_fwd->ro_rt = NULL; + dst = SIN(&ro_fwd->ro_dst); /* * If we added a default src ip earlier, @@ -1585,20 +1670,20 @@ skip_ipsec: * interface, do it again, from the new one. */ if (ia_fw != NULL) { - if (fwd_rewrite_src) { + if (ipobf.fwd_rewrite_src) { IFA_LOCK_SPIN(&ia_fw->ia_ifa); ip->ip_src = IA_SIN(ia_fw)->sin_addr; IFA_UNLOCK(&ia_fw->ia_ifa); } IFA_REMREF(&ia_fw->ia_ifa); } - goto pass ; + goto pass; } #endif /* IPFIREWALL_FORWARD */ - /* - * if we get here, none of the above matches, and - * we have to drop the pkt - */ + /* + * if we get here, none of the above matches, and + * we have to drop the pkt + */ m_freem(m); error = EACCES; /* not sure this is the right error msg */ goto done; @@ -1606,77 +1691,26 @@ skip_ipsec: pass: #endif /* IPFIREWALL */ -#if __APPLE__ - /* Do not allow loopback address to wind up on a wire */ - if ((ifp->if_flags & IFF_LOOPBACK) == 0 && - ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || - (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) { + + /* 127/8 must not appear on wire - RFC1122 */ + if (!(ifp->if_flags & IFF_LOOPBACK) && + ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || + (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) { OSAddAtomic(1, &ipstat.ips_badaddr); m_freem(m); - /* - * Do not simply drop the packet just like a firewall -- we want the - * the application to feel the pain. - * Return ENETUNREACH like ip6_output does in some similar cases. - * This can startle the otherwise clueless process that specifies - * loopback as the source address. - */ - error = ENETUNREACH; + error = EADDRNOTAVAIL; goto done; } -#endif - m->m_pkthdr.csum_flags |= CSUM_IP; - tso = (ifp->if_hwassist & IFNET_TSO_IPV4) && (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4); - sw_csum = m->m_pkthdr.csum_flags - & ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist); - - if ((ifp->if_hwassist & CSUM_TCP_SUM16) != 0) { - /* - * Special case code for GMACE - * frames that can be checksumed by GMACE SUM16 HW: - * frame >64, no fragments, no UDP - */ - if (apple_hwcksum_tx && (m->m_pkthdr.csum_flags & CSUM_TCP) - && (ip->ip_len > 50) && (ip->ip_len <= ifp->if_mtu)) { - /* Apple GMAC HW, expects STUFF_OFFSET << 16 | START_OFFSET */ - u_short offset = (IP_VHL_HL(ip->ip_vhl) << 2) +14 ; /* IP+Enet header length */ - u_short csumprev= m->m_pkthdr.csum_data & 0xFFFF; - m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_TCP_SUM16; /* for GMAC */ - m->m_pkthdr.csum_data = (csumprev + offset) << 16 ; - m->m_pkthdr.csum_data += offset; - sw_csum = CSUM_DELAY_IP; /* do IP hdr chksum in software */ - } else { - /* let the software handle any UDP or TCP checksums */ - sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags); - } - } else if (apple_hwcksum_tx == 0) { - sw_csum |= (CSUM_DELAY_DATA | CSUM_DELAY_IP) & - m->m_pkthdr.csum_flags; - } - - if (sw_csum & CSUM_DELAY_DATA) { - in_delayed_cksum(m); - sw_csum &= ~CSUM_DELAY_DATA; - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - } - - if (apple_hwcksum_tx != 0) { - m->m_pkthdr.csum_flags &= - IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist); - } else { - m->m_pkthdr.csum_flags = 0; - } + ip_output_checksum(ifp, m, (IP_VHL_HL(ip->ip_vhl) << 2), + ip->ip_len, &sw_csum); /* * If small enough for interface, or the interface will take * care of the fragmentation for us, can just send directly. */ - if ((u_short)ip->ip_len <= ifp->if_mtu || tso || - ifp->if_hwassist & CSUM_FRAGMENT) { - if (tso) - m->m_pkthdr.csum_flags |= CSUM_TSO_IPV4; - - + if ((u_short)ip->ip_len <= ifp->if_mtu || TSO_IPV4_OK(ifp, m) || + (!(ip->ip_off & IP_DF) && (ifp->if_hwassist & CSUM_FRAGMENT))) { #if BYTE_ORDER != BIG_ENDIAN HTONS(ip->ip_len); HTONS(ip->ip_off); @@ -1684,30 +1718,36 @@ pass: ip->ip_sum = 0; if (sw_csum & CSUM_DELAY_IP) { - ip->ip_sum = in_cksum(m, hlen); + ip->ip_sum = ip_cksum_hdr_out(m, hlen); + sw_csum &= ~CSUM_DELAY_IP; + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IP; } -#ifndef __APPLE__ - /* Record statistics for this interface address. */ - if (!(flags & IP_FORWARDING) && ia != NULL) { - ia->ia_ifa.if_opackets++; - ia->ia_ifa.if_obytes += m->m_pkthdr.len; - } -#endif - #if IPSEC /* clean ipsec history once it goes out of the node */ - if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) + if (ipsec_bypass == 0 && !(flags & IP_NOIPSEC)) ipsec_delaux(m); -#endif +#endif /* IPSEC */ + if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) && + (m->m_pkthdr.tso_segsz > 0)) + scnt += m->m_pkthdr.len / m->m_pkthdr.tso_segsz; + else + scnt++; + if (packetchain == 0) { - if (ro->ro_rt && nstat_collect) - nstat_route_tx(ro->ro_rt, 1, m->m_pkthdr.len, 0); + if (ro->ro_rt != NULL && nstat_collect) + nstat_route_tx(ro->ro_rt, scnt, + m->m_pkthdr.len, 0); + error = dlil_output(ifp, PF_INET, m, ro->ro_rt, - (struct sockaddr *)dst, 0, adv); + SA(dst), 0, adv); + scnt = 0; goto done; - } - else { /* packet chaining allows us to reuse the route for all packets */ + } else { + /* + * packet chaining allows us to reuse the + * route for all packets + */ bytecnt += m->m_pkthdr.len; mppn = &m->m_nextpkt; m = m->m_nextpkt; @@ -1717,12 +1757,14 @@ sendchain: #endif /* PF */ if (pktcnt > ip_maxchainsent) ip_maxchainsent = pktcnt; - if (ro->ro_rt && nstat_collect) - nstat_route_tx(ro->ro_rt, pktcnt, bytecnt, 0); - //send + if (ro->ro_rt != NULL && nstat_collect) + nstat_route_tx(ro->ro_rt, scnt, + bytecnt, 0); + error = dlil_output(ifp, PF_INET, packetlist, - ro->ro_rt, (struct sockaddr *)dst, 0, adv); + ro->ro_rt, SA(dst), 0, adv); pktcnt = 0; + scnt = 0; bytecnt = 0; goto done; @@ -1735,10 +1777,10 @@ sendchain: /* * Too large for interface; fragment if possible. * Must be able to put at least 8 bytes per fragment. + * Balk when DF bit is set or the interface didn't support TSO. */ - - if (ip->ip_off & IP_DF || (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) || - pktcnt > 0) { + if ((ip->ip_off & IP_DF) || pktcnt > 0 || + (m->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) { error = EMSGSIZE; /* * This case can happen if the user changed the MTU @@ -1749,9 +1791,9 @@ sendchain: */ if (ro->ro_rt) { RT_LOCK_SPIN(ro->ro_rt); - if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) - && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) - && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { + if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) && + !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) && + (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; } RT_UNLOCK(ro->ro_rt); @@ -1769,60 +1811,68 @@ sendchain: goto bad; } - KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr, - ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len); + KERNEL_DEBUG(DBG_LAYER_END, ip->ip_dst.s_addr, + ip->ip_src.s_addr, ip->ip_p, ip->ip_off, ip->ip_len); for (m = m0; m; m = m0) { m0 = m->m_nextpkt; m->m_nextpkt = 0; #if IPSEC /* clean ipsec history once it goes out of the node */ - if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) + if (ipsec_bypass == 0 && !(flags & IP_NOIPSEC)) ipsec_delaux(m); -#endif +#endif /* IPSEC */ if (error == 0) { -#ifndef __APPLE__ - /* Record statistics for this interface address. */ - if (ia != NULL) { - ia->ia_ifa.if_opackets++; - ia->ia_ifa.if_obytes += m->m_pkthdr.len; + if ((packetchain != 0) && (pktcnt > 0)) { + panic("%s: mix of packet in packetlist is " + "wrong=%p", __func__, packetlist); + /* NOTREACHED */ + } + if (ro->ro_rt != NULL && nstat_collect) { + nstat_route_tx(ro->ro_rt, 1, + m->m_pkthdr.len, 0); } -#endif - if ((packetchain != 0) && (pktcnt > 0)) - panic("ip_output: mix of packet in packetlist is wrong=%p", packetlist); - if (ro->ro_rt && nstat_collect) - nstat_route_tx(ro->ro_rt, 1, m->m_pkthdr.len, 0); error = dlil_output(ifp, PF_INET, m, ro->ro_rt, - (struct sockaddr *)dst, 0, adv); - } else + SA(dst), 0, adv); + } else { m_freem(m); + } } if (error == 0) OSAddAtomic(1, &ipstat.ips_fragmented); done: - if (ia) { + if (ia != NULL) { IFA_REMREF(&ia->ia_ifa); ia = NULL; } #if IPSEC - if (ipsec_bypass == 0 && (flags & IP_NOIPSEC) == 0) { - if (ipsec_state.ro.ro_rt) - rtfree(ipsec_state.ro.ro_rt); + ROUTE_RELEASE(&ipsec_state.ro); if (sp != NULL) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ip_output call free SP:%x\n", sp)); + printf("DP ip_output call free SP:%x\n", sp)); key_freesp(sp, KEY_SADB_UNLOCKED); } - } #endif /* IPSEC */ +#if DUMMYNET + ROUTE_RELEASE(&saved_route); +#endif /* DUMMYNET */ +#if IPFIREWALL_FORWARD + ROUTE_RELEASE(&sro_fwd); +#endif /* IPFIREWALL_FORWARD */ - KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error,0,0,0,0); + KERNEL_DEBUG(DBG_FNC_IP_OUTPUT | DBG_FUNC_END, error, 0, 0, 0, 0); return (error); bad: m_freem(m0); goto done; + +#undef ipsec_state +#undef args +#undef sro_fwd +#undef saved_route +#undef ipf_pktopts } int @@ -1836,9 +1886,9 @@ ip_fragment(struct mbuf *m, struct ifnet *ifp, unsigned long mtu, int sw_csum) ip = mtod(m, struct ip *); #ifdef _IP_VHL hlen = IP_VHL_HL(ip->ip_vhl) << 2; -#else +#else /* !_IP_VHL */ hlen = ip->ip_hl << 2; -#endif +#endif /* !_IP_VHL */ firstlen = len = (mtu - hlen) &~ 7; if (len < 8) { @@ -1850,11 +1900,9 @@ ip_fragment(struct mbuf *m, struct ifnet *ifp, unsigned long mtu, int sw_csum) * if the interface will not calculate checksums on * fragmented packets, then do it here. */ - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA && - (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) { + if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) && + !(ifp->if_hwassist & CSUM_IP_FRAGS)) in_delayed_cksum(m); - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - } /* * Loop through length of segment after first fragment, @@ -1864,7 +1912,7 @@ ip_fragment(struct mbuf *m, struct ifnet *ifp, unsigned long mtu, int sw_csum) mhlen = sizeof (struct ip); for (off = hlen + len; off < (u_short)ip->ip_len; off += len) { MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */ - if (m == 0) { + if (m == NULL) { error = ENOBUFS; OSAddAtomic(1, &ipstat.ips_odropped); goto sendorfree; @@ -1887,23 +1935,22 @@ ip_fragment(struct mbuf *m, struct ifnet *ifp, unsigned long mtu, int sw_csum) mhip->ip_off |= IP_MF; mhip->ip_len = htons((u_short)(len + mhlen)); m->m_next = m_copy(m0, off, len); - if (m->m_next == 0) { + if (m->m_next == NULL) { (void) m_free(m); error = ENOBUFS; /* ??? */ OSAddAtomic(1, &ipstat.ips_odropped); goto sendorfree; } m->m_pkthdr.len = mhlen + len; - m->m_pkthdr.rcvif = 0; + m->m_pkthdr.rcvif = NULL; m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags; - m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id; + M_COPY_CLASSIFIER(m, m0); M_COPY_PFTAG(m, m0); - m_set_service_class(m, m0->m_pkthdr.svc); #if CONFIG_MACF_NET mac_netinet_fragment(m0, m); -#endif +#endif /* CONFIG_MACF_NET */ #if BYTE_ORDER != BIG_ENDIAN HTONS(mhip->ip_off); @@ -1911,7 +1958,8 @@ ip_fragment(struct mbuf *m, struct ifnet *ifp, unsigned long mtu, int sw_csum) mhip->ip_sum = 0; if (sw_csum & CSUM_DELAY_IP) { - mhip->ip_sum = in_cksum(m, mhlen); + mhip->ip_sum = ip_cksum_hdr_out(m, mhlen); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IP; } *mnext = m; mnext = &m->m_nextpkt; @@ -1940,7 +1988,8 @@ ip_fragment(struct mbuf *m, struct ifnet *ifp, unsigned long mtu, int sw_csum) ip->ip_sum = 0; if (sw_csum & CSUM_DELAY_IP) { - ip->ip_sum = in_cksum(m, hlen); + ip->ip_sum = ip_cksum_hdr_out(m, hlen); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IP; } sendorfree: if (error) @@ -1965,51 +2014,64 @@ ip_out_cksum_stats(int proto, u_int32_t len) } } -void -in_delayed_cksum_offset(struct mbuf *m0, int ip_offset) +/* + * Process a delayed payload checksum calculation (outbound path.) + * + * hoff is the number of bytes beyond the mbuf data pointer which + * points to the IP header. + * + * Returns a bitmask representing all the work done in software. + */ +uint32_t +in_finalize_cksum(struct mbuf *m, uint32_t hoff, uint32_t csum_flags) { + unsigned char buf[15 << 2] __attribute__((aligned(8))); struct ip *ip; - unsigned char buf[sizeof(struct ip)]; - u_short csum, offset, ip_len; + uint32_t offset, _hlen, mlen, hlen, len, sw_csum; + uint16_t csum, ip_len; - /* Save copy of first mbuf pointer and the ip_offset before modifying */ - struct mbuf *m = m0; - int ip_offset_copy = ip_offset; + _CASSERT(sizeof (csum) == sizeof (uint16_t)); + VERIFY(m->m_flags & M_PKTHDR); - while (ip_offset >= m->m_len) { - ip_offset -= m->m_len; - m = m->m_next; - if (m == NULL) { - printf("in_delayed_cksum_withoffset failed - " - "ip_offset wasn't in the packet\n"); - return; - } + sw_csum = (csum_flags & m->m_pkthdr.csum_flags); + + if ((sw_csum &= (CSUM_DELAY_IP | CSUM_DELAY_DATA)) == 0) + goto done; + + mlen = m->m_pkthdr.len; /* total mbuf len */ + + /* sanity check (need at least simple IP header) */ + if (mlen < (hoff + sizeof (*ip))) { + panic("%s: mbuf %p pkt len (%u) < hoff+ip_hdr " + "(%u+%u)\n", __func__, m, mlen, hoff, + (uint32_t)sizeof (*ip)); + /* NOTREACHED */ } /* - * In case the IP header is not contiguous, or not 32-bit - * aligned, copy it to a local buffer. + * In case the IP header is not contiguous, or not 32-bit aligned, + * or if we're computing the IP header checksum, copy it to a local + * buffer. Copy only the simple IP header here (IP options case + * is handled below.) */ - if ((ip_offset + sizeof(struct ip) > m->m_len) || - !IP_HDR_ALIGNED_P(mtod(m, caddr_t) + ip_offset)) { -#if DEBUG - printf("delayed m_pullup, m->len: %d off: %d\n", - m->m_len, ip_offset); -#endif - m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf); - + if ((sw_csum & CSUM_DELAY_IP) || (hoff + sizeof (*ip)) > m->m_len || + !IP_HDR_ALIGNED_P(mtod(m, caddr_t) + hoff)) { + m_copydata(m, hoff, sizeof (*ip), (caddr_t)buf); ip = (struct ip *)(void *)buf; + _hlen = sizeof (*ip); } else { - ip = (struct ip*)(void *)(m->m_data + ip_offset); + ip = (struct ip *)(void *)(m->m_data + hoff); + _hlen = 0; } - /* Gross */ - if (ip_offset) { - m->m_len -= ip_offset; - m->m_data += ip_offset; - } + hlen = IP_VHL_HL(ip->ip_vhl) << 2; /* IP header len */ - offset = IP_VHL_HL(ip->ip_vhl) << 2 ; + /* sanity check */ + if (mlen < (hoff + hlen)) { + panic("%s: mbuf %p pkt too short (%d) for IP header (%u), " + "hoff %u", __func__, m, mlen, hlen, hoff); + /* NOTREACHED */ + } /* * We could be in the context of an IP or interface filter; in the @@ -2017,172 +2079,116 @@ in_delayed_cksum_offset(struct mbuf *m0, int ip_offset) * the latter it would be in network order. Because of this, we * attempt to interpret the length field by comparing it against * the actual packet length. If the comparison fails, byte swap - * the length and check again. If it still fails, then the packet - * is bogus and we give up. + * the length and check again. If it still fails, use the actual + * packet length. This also covers the trailing bytes case. */ ip_len = ip->ip_len; - if (ip_len != (m0->m_pkthdr.len - ip_offset_copy)) { - ip_len = SWAP16(ip_len); - if (ip_len != (m0->m_pkthdr.len - ip_offset_copy)) { - printf("in_delayed_cksum_offset: ip_len %d (%d) " - "doesn't match actual length %d\n", ip->ip_len, - ip_len, (m0->m_pkthdr.len - ip_offset_copy)); - return; + if (ip_len != (mlen - hoff)) { + ip_len = OSSwapInt16(ip_len); + if (ip_len != (mlen - hoff)) { + printf("%s: mbuf 0x%llx proto %d IP len %d (%x) " + "[swapped %d (%x)] doesn't match actual packet " + "length; %d is used instead\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(m), ip->ip_p, + ip->ip_len, ip->ip_len, ip_len, ip_len, + (mlen - hoff)); + ip_len = mlen - hoff; } } - csum = in_cksum_skip(m, ip_len, offset); + len = ip_len - hlen; /* csum span */ - /* Update stats */ - ip_out_cksum_stats(ip->ip_p, ip_len - offset); + if (sw_csum & CSUM_DELAY_DATA) { + uint16_t ulpoff; - if (m0->m_pkthdr.csum_flags & CSUM_UDP && csum == 0) - csum = 0xffff; - offset += m0->m_pkthdr.csum_data & 0xFFFF; /* checksum offset */ + /* + * offset is added to the lower 16-bit value of csum_data, + * which is expected to contain the ULP offset; therefore + * CSUM_PARTIAL offset adjustment must be undone. + */ + if ((m->m_pkthdr.csum_flags & (CSUM_PARTIAL|CSUM_DATA_VALID)) == + (CSUM_PARTIAL|CSUM_DATA_VALID)) { + /* + * Get back the original ULP offset (this will + * undo the CSUM_PARTIAL logic in ip_output.) + */ + m->m_pkthdr.csum_data = (m->m_pkthdr.csum_tx_stuff - + m->m_pkthdr.csum_tx_start); + } - /* Gross */ - if (ip_offset) { - if (M_LEADINGSPACE(m) < ip_offset) - panic("in_delayed_cksum_offset - chain modified!\n"); - m->m_len += ip_offset; - m->m_data -= ip_offset; - } + ulpoff = (m->m_pkthdr.csum_data & 0xffff); /* ULP csum offset */ + offset = hoff + hlen; /* ULP header */ - if (offset > ip_len) /* bogus offset */ - return; + if (mlen < (ulpoff + sizeof (csum))) { + panic("%s: mbuf %p pkt len (%u) proto %d invalid ULP " + "cksum offset (%u) cksum flags 0x%x\n", __func__, + m, mlen, ip->ip_p, ulpoff, m->m_pkthdr.csum_flags); + /* NOTREACHED */ + } - /* Insert the checksum in the existing chain */ - if (offset + ip_offset + sizeof(u_short) > m->m_len) { - char tmp[2]; + csum = inet_cksum(m, 0, offset, len); -#if DEBUG - printf("delayed m_copyback, m->len: %d off: %d p: %d\n", - m->m_len, offset + ip_offset, ip->ip_p); -#endif - *(u_short *)(void *)tmp = csum; - m_copyback(m, offset + ip_offset, 2, tmp); - } else if (IP_HDR_ALIGNED_P(mtod(m, caddr_t) + ip_offset)) { - *(u_short *)(void *)(m->m_data + offset + ip_offset) = csum; - } else { - bcopy(&csum, (m->m_data + offset + ip_offset), sizeof (csum)); - } -} + /* Update stats */ + ip_out_cksum_stats(ip->ip_p, len); -void -in_delayed_cksum(struct mbuf *m) -{ - in_delayed_cksum_offset(m, 0); -} + /* RFC1122 4.1.3.4 */ + if (csum == 0 && (m->m_pkthdr.csum_flags & CSUM_UDP)) + csum = 0xffff; -void -in_cksum_offset(struct mbuf* m, size_t ip_offset) -{ - struct ip* ip = NULL; - int hlen = 0; - unsigned char buf[sizeof(struct ip)]; - int swapped = 0; - - /* Save copy of first mbuf pointer and the ip_offset before modifying */ - struct mbuf* m0 = m; - size_t ip_offset_copy = ip_offset; - - while (ip_offset >= m->m_len) { - ip_offset -= m->m_len; - m = m->m_next; - if (m == NULL) { - printf("in_cksum_offset failed - ip_offset wasn't " - "in the packet\n"); - return; + /* Insert the checksum in the ULP csum field */ + offset += ulpoff; + if (offset + sizeof (csum) > m->m_len) { + m_copyback(m, offset, sizeof (csum), &csum); + } else if (IP_HDR_ALIGNED_P(mtod(m, char *) + hoff)) { + *(uint16_t *)(void *)(mtod(m, char *) + offset) = csum; + } else { + bcopy(&csum, (mtod(m, char *) + offset), sizeof (csum)); } + m->m_pkthdr.csum_flags &= + ~(CSUM_DELAY_DATA | CSUM_DATA_VALID | CSUM_PARTIAL); } - /* - * In case the IP header is not contiguous, or not 32-bit - * aligned, copy it to a local buffer. - */ - if ((ip_offset + sizeof(struct ip) > m->m_len) || - !IP_HDR_ALIGNED_P(mtod(m, caddr_t) + ip_offset)) { -#if DEBUG - printf("in_cksum_offset - delayed m_pullup, m->len: %d " - "off: %lu\n", m->m_len, ip_offset); -#endif - m_copydata(m, ip_offset, sizeof(struct ip), (caddr_t) buf); + if (sw_csum & CSUM_DELAY_IP) { + /* IP header must be in the local buffer */ + VERIFY(_hlen == sizeof (*ip)); + if (_hlen != hlen) { + VERIFY(hlen <= sizeof (buf)); + m_copydata(m, hoff, hlen, (caddr_t)buf); + ip = (struct ip *)(void *)buf; + _hlen = hlen; + } - ip = (struct ip *)(void *)buf; - ip->ip_sum = 0; - m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, - (caddr_t)&ip->ip_sum); - } else { - ip = (struct ip*)(void *)(m->m_data + ip_offset); + /* + * Compute the IP header checksum as if the IP length + * is the length which we believe is "correct"; see + * how ip_len gets calculated above. Note that this + * is done on the local copy and not on the real one. + */ + ip->ip_len = htons(ip_len); ip->ip_sum = 0; - } + csum = in_cksum_hdr_opt(ip); - /* Gross */ - if (ip_offset) { - m->m_len -= ip_offset; - m->m_data += ip_offset; - } + /* Update stats */ + ipstat.ips_snd_swcsum++; + ipstat.ips_snd_swcsum_bytes += hlen; -#ifdef _IP_VHL - hlen = IP_VHL_HL(ip->ip_vhl) << 2; -#else - hlen = ip->ip_hl << 2; -#endif - /* - * We could be in the context of an IP or interface filter; in the - * former case, ip_len would be in host order while for the latter - * it would be in network (correct) order. Because of this, we - * attempt to interpret the length field by comparing it against - * the actual packet length. If the comparison fails, byte swap - * the length and check again. If it still fails, then the packet - * is bogus and we give up. - */ - if (ntohs(ip->ip_len) != (m0->m_pkthdr.len - ip_offset_copy)) { - ip->ip_len = SWAP16(ip->ip_len); - swapped = 1; - if (ntohs(ip->ip_len) != (m0->m_pkthdr.len - ip_offset_copy)) { - ip->ip_len = SWAP16(ip->ip_len); - printf("in_cksum_offset: ip_len %d (%d) " - "doesn't match actual length %lu\n", - ip->ip_len, SWAP16(ip->ip_len), - (m0->m_pkthdr.len - ip_offset_copy)); - return; + /* + * Insert only the checksum in the existing IP header + * csum field; all other fields are left unchanged. + */ + offset = hoff + offsetof(struct ip, ip_sum); + if (offset + sizeof (csum) > m->m_len) { + m_copyback(m, offset, sizeof (csum), &csum); + } else if (IP_HDR_ALIGNED_P(mtod(m, char *) + hoff)) { + *(uint16_t *)(void *)(mtod(m, char *) + offset) = csum; + } else { + bcopy(&csum, (mtod(m, char *) + offset), sizeof (csum)); } + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IP; } - ip->ip_sum = 0; - ip->ip_sum = in_cksum(m, hlen); - if (swapped) - ip->ip_len = SWAP16(ip->ip_len); - - /* Gross */ - if (ip_offset) { - if (M_LEADINGSPACE(m) < ip_offset) - panic("in_cksum_offset - chain modified!\n"); - m->m_len += ip_offset; - m->m_data -= ip_offset; - } - - /* - * Insert the checksum in the existing chain if IP header not - * contiguous, or if it's not 32-bit aligned, i.e. all the cases - * where it was copied to a local buffer. - */ - if (ip_offset + sizeof(struct ip) > m->m_len) { - char tmp[2]; - -#if DEBUG - printf("in_cksum_offset m_copyback, m->len: %u off: %lu " - "p: %d\n", m->m_len, - ip_offset + offsetof(struct ip, ip_sum), ip->ip_p); -#endif - *(u_short *)(void *)tmp = ip->ip_sum; - m_copyback(m, ip_offset + offsetof(struct ip, ip_sum), 2, tmp); - } else if (!IP_HDR_ALIGNED_P(mtod(m, caddr_t) + ip_offset)) { - bcopy(&ip->ip_sum, - (m->m_data + ip_offset + offsetof(struct ip, ip_sum)), - sizeof (u_short)); - } +done: + return (sw_csum); } /* @@ -2193,46 +2199,43 @@ in_cksum_offset(struct mbuf* m, size_t ip_offset) * XXX This routine assumes that the packet has no options in place. */ static struct mbuf * -ip_insertoptions(m, opt, phlen) - register struct mbuf *m; - struct mbuf *opt; - int *phlen; +ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen) { - register struct ipoption *p = mtod(opt, struct ipoption *); + struct ipoption *p = mtod(opt, struct ipoption *); struct mbuf *n; - register struct ip *ip = mtod(m, struct ip *); + struct ip *ip = mtod(m, struct ip *); unsigned optlen; - optlen = opt->m_len - sizeof(p->ipopt_dst); + optlen = opt->m_len - sizeof (p->ipopt_dst); if (optlen + (u_short)ip->ip_len > IP_MAXPACKET) return (m); /* XXX should fail */ if (p->ipopt_dst.s_addr) ip->ip_dst = p->ipopt_dst; if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { MGETHDR(n, M_DONTWAIT, MT_HEADER); /* MAC-OK */ - if (n == 0) + if (n == NULL) return (m); n->m_pkthdr.rcvif = 0; #if CONFIG_MACF_NET mac_mbuf_label_copy(m, n); -#endif +#endif /* CONFIG_MACF_NET */ n->m_pkthdr.len = m->m_pkthdr.len + optlen; - m->m_len -= sizeof(struct ip); - m->m_data += sizeof(struct ip); + m->m_len -= sizeof (struct ip); + m->m_data += sizeof (struct ip); n->m_next = m; m = n; - m->m_len = optlen + sizeof(struct ip); + m->m_len = optlen + sizeof (struct ip); m->m_data += max_linkhdr; - (void)memcpy(mtod(m, void *), ip, sizeof(struct ip)); + (void) memcpy(mtod(m, void *), ip, sizeof (struct ip)); } else { m->m_data -= optlen; m->m_len += optlen; m->m_pkthdr.len += optlen; - ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); + ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof (struct ip)); } ip = mtod(m, struct ip *); bcopy(p->ipopt_list, ip + 1, optlen); - *phlen = sizeof(struct ip) + optlen; + *phlen = sizeof (struct ip) + optlen; ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2); ip->ip_len += optlen; return (m); @@ -2242,11 +2245,10 @@ ip_insertoptions(m, opt, phlen) * Copy options from ip to jp, * omitting those not copied during fragmentation. */ -int -ip_optcopy(ip, jp) - struct ip *ip, *jp; +static int +ip_optcopy(struct ip *ip, struct ip *jp) { - register u_char *cp, *dp; + u_char *cp, *dp; int opt, optlen, cnt; cp = (u_char *)(ip + 1); @@ -2263,13 +2265,17 @@ ip_optcopy(ip, jp) continue; } #if DIAGNOSTIC - if (cnt < IPOPT_OLEN + sizeof(*cp)) + if (cnt < IPOPT_OLEN + sizeof (*cp)) { panic("malformed IPv4 option passed to ip_optcopy"); + /* NOTREACHED */ + } #endif optlen = cp[IPOPT_OLEN]; #if DIAGNOSTIC - if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) + if (optlen < IPOPT_OLEN + sizeof (*cp) || optlen > cnt) { panic("malformed IPv4 option passed to ip_optcopy"); + /* NOTREACHED */ + } #endif /* bogus lengths should have been caught by ip_dooptions */ if (optlen > cnt) @@ -2288,45 +2294,42 @@ ip_optcopy(ip, jp) * IP socket option processing. */ int -ip_ctloutput(so, sopt) - struct socket *so; - struct sockopt *sopt; +ip_ctloutput(struct socket *so, struct sockopt *sopt) { struct inpcb *inp = sotoinpcb(so); int error, optval; error = optval = 0; - if (sopt->sopt_level != IPPROTO_IP) { + if (sopt->sopt_level != IPPROTO_IP) return (EINVAL); - } switch (sopt->sopt_dir) { case SOPT_SET: switch (sopt->sopt_name) { - case IP_OPTIONS: #ifdef notyet case IP_RETOPTS: #endif - { + case IP_OPTIONS: { struct mbuf *m; + if (sopt->sopt_valsize > MLEN) { error = EMSGSIZE; break; } MGET(m, sopt->sopt_p != kernproc ? M_WAIT : M_DONTWAIT, MT_HEADER); - if (m == 0) { + if (m == NULL) { error = ENOBUFS; break; } m->m_len = sopt->sopt_valsize; - error = sooptcopyin(sopt, mtod(m, char *), m->m_len, - m->m_len); + error = sooptcopyin(sopt, mtod(m, char *), + m->m_len, m->m_len); if (error) break; - - return (ip_pcbopts(sopt->sopt_name, &inp->inp_options, - m)); + + return (ip_pcbopts(sopt->sopt_name, + &inp->inp_options, m)); } case IP_TOS: @@ -2337,8 +2340,8 @@ ip_ctloutput(so, sopt) case IP_RECVIF: case IP_RECVTTL: case IP_RECVPKTINFO: - error = sooptcopyin(sopt, &optval, sizeof optval, - sizeof optval); + error = sooptcopyin(sopt, &optval, sizeof (optval), + sizeof (optval)); if (error) break; @@ -2402,7 +2405,7 @@ ip_ctloutput(so, sopt) } /* Verify interface name parameter is sane */ - if (sopt->sopt_valsize > sizeof(ifname)) { + if (sopt->sopt_valsize > sizeof (ifname)) { error = EINVAL; break; } @@ -2442,10 +2445,10 @@ ip_ctloutput(so, sopt) */ ifnet_release(ifp); } - error = inp_bindif(inp, ifscope); + error = inp_bindif(inp, ifscope, NULL); } break; -#endif +#endif /* CONFIG_FORCE_OUT_IFP */ /* * Multicast socket options are processed by the in_mcast * module. @@ -2472,8 +2475,8 @@ ip_ctloutput(so, sopt) break; case IP_PORTRANGE: - error = sooptcopyin(sopt, &optval, sizeof optval, - sizeof optval); + error = sooptcopyin(sopt, &optval, sizeof (optval), + sizeof (optval)); if (error) break; @@ -2500,8 +2503,7 @@ ip_ctloutput(so, sopt) break; #if IPSEC - case IP_IPSEC_POLICY: - { + case IP_IPSEC_POLICY: { caddr_t req = NULL; size_t len = 0; int priv; @@ -2522,14 +2524,15 @@ ip_ctloutput(so, sopt) m_freem(m); break; } -#endif /*IPSEC*/ +#endif /* IPSEC */ #if TRAFFIC_MGT - case IP_TRAFFIC_MGT_BACKGROUND: - { - unsigned background = 0; - error = sooptcopyin(sopt, &background, sizeof(background), sizeof(background)); - if (error) + case IP_TRAFFIC_MGT_BACKGROUND: { + unsigned background = 0; + + error = sooptcopyin(sopt, &background, + sizeof (background), sizeof (background)); + if (error) break; if (background) { @@ -2575,7 +2578,7 @@ ip_ctloutput(so, sopt) if (error) break; - error = inp_bindif(inp, optval); + error = inp_bindif(inp, optval, NULL); break; case IP_NO_IFT_CELLULAR: @@ -2591,7 +2594,14 @@ ip_ctloutput(so, sopt) if (error) break; - error = inp_nocellular(inp, optval); + /* once set, it cannot be unset */ + if (!optval && (inp->inp_flags & INP_NO_IFT_CELLULAR)) { + error = EINVAL; + break; + } + + error = so_set_restrictions(so, + SO_RESTRICT_DENY_CELLULAR); break; case IP_OUT_IF: @@ -2609,13 +2619,13 @@ ip_ctloutput(so, sopt) switch (sopt->sopt_name) { case IP_OPTIONS: case IP_RETOPTS: - if (inp->inp_options) - error = sooptcopyout(sopt, - mtod(inp->inp_options, - char *), - inp->inp_options->m_len); - else + if (inp->inp_options) { + error = sooptcopyout(sopt, + mtod(inp->inp_options, char *), + inp->inp_options->m_len); + } else { sopt->sopt_valsize = 0; + } break; case IP_TOS: @@ -2672,7 +2682,7 @@ ip_ctloutput(so, sopt) optval = OPTBIT(INP_PKTINFO); break; } - error = sooptcopyout(sopt, &optval, sizeof optval); + error = sooptcopyout(sopt, &optval, sizeof (optval)); break; case IP_MULTICAST_IF: @@ -2685,13 +2695,12 @@ ip_ctloutput(so, sopt) break; #if IPSEC - case IP_IPSEC_POLICY: - { + case IP_IPSEC_POLICY: { struct mbuf *m = NULL; caddr_t req = NULL; size_t len = 0; - if (m != 0) { + if (m != NULL) { req = mtod(m, caddr_t); len = m->m_len; } @@ -2702,13 +2711,14 @@ ip_ctloutput(so, sopt) m_freem(m); break; } -#endif /*IPSEC*/ +#endif /* IPSEC */ #if TRAFFIC_MGT - case IP_TRAFFIC_MGT_BACKGROUND: - { - unsigned background = (so->so_traffic_mgt_flags & TRAFFIC_MGT_SO_BACKGROUND); - return (sooptcopyout(sopt, &background, sizeof(background))); + case IP_TRAFFIC_MGT_BACKGROUND: { + unsigned background = (so->so_traffic_mgt_flags & + TRAFFIC_MGT_SO_BACKGROUND) ? 1 : 0; + return (sooptcopyout(sopt, &background, + sizeof (background))); break; } #endif /* TRAFFIC_MGT */ @@ -2745,44 +2755,41 @@ ip_ctloutput(so, sopt) * with destination address if source routed. */ static int -ip_pcbopts( - __unused int optname, - struct mbuf **pcbopt, - register struct mbuf *m) +ip_pcbopts(int optname, struct mbuf **pcbopt, struct mbuf *m) { - register int cnt, optlen; - register u_char *cp; +#pragma unused(optname) + int cnt, optlen; + u_char *cp; u_char opt; /* turn off any old options */ if (*pcbopt) - (void)m_free(*pcbopt); + (void) m_free(*pcbopt); *pcbopt = 0; if (m == (struct mbuf *)0 || m->m_len == 0) { /* * Only turning off any previous options. */ if (m) - (void)m_free(m); + (void) m_free(m); return (0); } -#ifndef vax - if (m->m_len % sizeof(int32_t)) + if (m->m_len % sizeof (int32_t)) goto bad; -#endif + /* * IP first-hop destination address will be stored before * actual options; move other options back * and clear it when none present. */ - if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) + if (m->m_data + m->m_len + sizeof (struct in_addr) >= &m->m_dat[MLEN]) goto bad; cnt = m->m_len; - m->m_len += sizeof(struct in_addr); - cp = mtod(m, u_char *) + sizeof(struct in_addr); + m->m_len += sizeof (struct in_addr); + cp = mtod(m, u_char *) + sizeof (struct in_addr); ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt); - bzero(mtod(m, caddr_t), sizeof(struct in_addr)); + bzero(mtod(m, caddr_t), sizeof (struct in_addr)); for (; cnt > 0; cnt -= optlen, cp += optlen) { opt = cp[IPOPT_OPTVAL]; @@ -2791,10 +2798,10 @@ ip_pcbopts( if (opt == IPOPT_NOP) optlen = 1; else { - if (cnt < IPOPT_OLEN + sizeof(*cp)) + if (cnt < IPOPT_OLEN + sizeof (*cp)) goto bad; optlen = cp[IPOPT_OLEN]; - if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) + if (optlen < IPOPT_OLEN + sizeof (*cp) || optlen > cnt) goto bad; } switch (opt) { @@ -2812,35 +2819,35 @@ ip_pcbopts( * A is first hop destination, which doesn't appear in * actual IP option, but is stored before the options. */ - if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) + if (optlen < IPOPT_MINOFF - 1 + sizeof (struct in_addr)) goto bad; - m->m_len -= sizeof(struct in_addr); - cnt -= sizeof(struct in_addr); - optlen -= sizeof(struct in_addr); + m->m_len -= sizeof (struct in_addr); + cnt -= sizeof (struct in_addr); + optlen -= sizeof (struct in_addr); cp[IPOPT_OLEN] = optlen; /* * Move first hop before start of options. */ bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), - sizeof(struct in_addr)); + sizeof (struct in_addr)); /* * Then copy rest of options back * to close up the deleted entry. */ ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] + - sizeof(struct in_addr)), + sizeof (struct in_addr)), (caddr_t)&cp[IPOPT_OFFSET+1], - (unsigned)cnt + sizeof(struct in_addr)); + (unsigned)cnt + sizeof (struct in_addr)); break; } } - if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) + if (m->m_len > MAX_IPOPTLEN + sizeof (struct in_addr)) goto bad; *pcbopt = m; return (0); bad: - (void)m_free(m); + (void) m_free(m); return (EINVAL); } @@ -2986,18 +2993,23 @@ ip_allocmoptions(int how) * replicating that code here. */ static void -ip_mloopback(ifp, m, dst, hlen) - struct ifnet *ifp; - register struct mbuf *m; - register struct sockaddr_in *dst; - int hlen; +ip_mloopback(struct ifnet *srcifp, struct ifnet *origifp, struct mbuf *m, + struct sockaddr_in *dst, int hlen) { - register struct ip *ip; struct mbuf *copym; - int sw_csum = (apple_hwcksum_tx == 0); + struct ip *ip; - copym = m_copy(m, 0, M_COPYALL); - if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) + if (lo_ifp == NULL) + return; + + /* + * Copy the packet header as it's needed for the checksum + * Make sure to deep-copy IP header portion in case the data + * is in an mbuf cluster, so that we can safely override the IP + * header portion later. + */ + copym = m_copym_mode(m, 0, M_COPYALL, M_DONTWAIT, M_COPYM_COPY_HDR); + if (copym != NULL && ((copym->m_flags & M_EXT) || copym->m_len < hlen)) copym = m_pullup(copym, hlen); if (copym == NULL) @@ -3008,83 +3020,65 @@ ip_mloopback(ifp, m, dst, hlen) * than the interface's MTU. Can this possibly matter? */ ip = mtod(copym, struct ip *); - #if BYTE_ORDER != BIG_ENDIAN HTONS(ip->ip_len); HTONS(ip->ip_off); #endif - ip->ip_sum = 0; - ip->ip_sum = in_cksum(copym, hlen); + ip->ip_sum = ip_cksum_hdr_out(copym, hlen); + /* - * NB: - * It's not clear whether there are any lingering - * reentrancy problems in other areas which might - * be exposed by using ip_input directly (in - * particular, everything which modifies the packet - * in-place). Yet another option is using the - * protosw directly to deliver the looped back - * packet. For the moment, we'll err on the side - * of safety by using if_simloop(). + * Mark checksum as valid unless receive checksum offload is + * disabled; if so, compute checksum in software. If the + * interface itself is lo0, this will be overridden by if_loop. */ -#if 1 /* XXX */ - if (dst->sin_family != AF_INET) { - printf("ip_mloopback: bad address family %d\n", - dst->sin_family); - dst->sin_family = AF_INET; - } -#endif - - /* - * Mark checksum as valid or calculate checksum for loopback. - * - * This is done this way because we have to embed the ifp of - * the interface we will send the original copy of the packet - * out on in the mbuf. ip_input will check if_hwassist of the - * embedded ifp and ignore all csum_flags if if_hwassist is 0. - * The UDP checksum has not been calculated yet. - */ - if (sw_csum || (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) { - if (!sw_csum && IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist)) { - copym->m_pkthdr.csum_flags |= - CSUM_DATA_VALID | CSUM_PSEUDO_HDR | - CSUM_IP_CHECKED | CSUM_IP_VALID; - copym->m_pkthdr.csum_data = 0xffff; - } else { - + if (hwcksum_rx) { + copym->m_pkthdr.csum_flags &= ~CSUM_PARTIAL; + copym->m_pkthdr.csum_flags |= + CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + copym->m_pkthdr.csum_data = 0xffff; + } else if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { #if BYTE_ORDER != BIG_ENDIAN - NTOHS(ip->ip_len); + NTOHS(ip->ip_len); #endif - - in_delayed_cksum(copym); - + in_delayed_cksum(copym); #if BYTE_ORDER != BIG_ENDIAN - HTONS(ip->ip_len); + HTONS(ip->ip_len); #endif - - } - } + } /* - * TedW: - * We need to send all loopback traffic down to dlil in case - * a filter has tapped-in. + * Stuff the 'real' ifp into the pkthdr, to be used in matching + * in ip_input(); we need the loopback ifp/dl_tag passed as args + * to make the loopback driver compliant with the data link + * requirements. */ + copym->m_pkthdr.rcvif = origifp; /* - * Stuff the 'real' ifp into the pkthdr, to be used in matching - * in ip_input(); we need the loopback ifp/dl_tag passed as args - * to make the loopback driver compliant with the data link - * requirements. + * Also record the source interface (which owns the source address). + * This is basically a stripped down version of ifa_foraddr(). */ - if (lo_ifp) { - copym->m_pkthdr.rcvif = ifp; - dlil_output(lo_ifp, PF_INET, copym, 0, - (struct sockaddr *) dst, 0, NULL); - } else { - printf("Warning: ip_output call to dlil_find_dltag failed!\n"); - m_freem(copym); + if (srcifp == NULL) { + struct in_ifaddr *ia; + + lck_rw_lock_shared(in_ifaddr_rwlock); + TAILQ_FOREACH(ia, INADDR_HASH(ip->ip_src.s_addr), ia_hash) { + IFA_LOCK_SPIN(&ia->ia_ifa); + if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_src.s_addr) { + srcifp = ia->ia_ifp; + IFA_UNLOCK(&ia->ia_ifa); + break; + } + IFA_UNLOCK(&ia->ia_ifa); + } + lck_rw_done(in_ifaddr_rwlock); } + if (srcifp != NULL) + ip_setsrcifaddr_info(copym, srcifp->if_index, NULL); + ip_setdstifaddr_info(copym, origifp->if_index, NULL); + + dlil_output(lo_ifp, PF_INET, copym, NULL, SA(dst), 0, NULL); } /* @@ -3106,6 +3100,8 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope) struct ifnet *rt_ifp; char s_src[MAX_IPv4_STR_LEN], s_dst[MAX_IPv4_STR_LEN]; + VERIFY(src.s_addr != INADDR_ANY); + if (ip_select_srcif_debug) { (void) inet_ntop(AF_INET, &src.s_addr, s_src, sizeof (s_src)); (void) inet_ntop(AF_INET, &dst.s_addr, s_dst, sizeof (s_dst)); @@ -3138,7 +3134,7 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope) if (scope == IFSCOPE_NONE) { scope = rt_ifp->if_index; if (scope != get_primary_ifscope(AF_INET) && - ro->ro_rt->generation_id != route_generation) + ROUTE_UNUSABLE(ro)) scope = get_primary_ifscope(AF_INET); } @@ -3204,7 +3200,7 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope) sin.sin_addr = dst; lck_mtx_lock(rnh_lock); - if ((rt = rt_lookup(TRUE, (struct sockaddr *)&sin, NULL, + if ((rt = rt_lookup(TRUE, SA(&sin), NULL, rt_tables[AF_INET], IFSCOPE_NONE)) != NULL) { RT_LOCK(rt); /* @@ -3315,9 +3311,7 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope) } RT_UNLOCK(ro->ro_rt); - rtfree(ro->ro_rt); - ro->ro_rt = NULL; - ro->ro_flags &= ~ROF_SRCIF_SELECTED; + ROUTE_RELEASE(ro); /* * If the destination is IPv4 LLA and the route's interface @@ -3352,8 +3346,13 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope) if (ro->ro_rt != NULL && (!IN_LINKLOCAL(ntohl(dst.s_addr)) || (ro->ro_rt->rt_gateway->sa_family == AF_LINK && SDL(ro->ro_rt->rt_gateway)->sdl_alen != 0))) { + if (ifa != NULL) + IFA_ADDREF(ifa); /* for route */ + if (ro->ro_srcia != NULL) + IFA_REMREF(ro->ro_srcia); + ro->ro_srcia = ifa; ro->ro_flags |= ROF_SRCIF_SELECTED; - ro->ro_rt->generation_id = route_generation; + RT_GENID_SYNC(ro->ro_rt); } if (ro->ro_rt != NULL) @@ -3361,3 +3360,85 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope) return (ifa); } + +void +ip_output_checksum(struct ifnet *ifp, struct mbuf *m, int hlen, int ip_len, + uint32_t *sw_csum) +{ + int tso = TSO_IPV4_OK(ifp, m); + uint32_t hwcap = ifp->if_hwassist; + + m->m_pkthdr.csum_flags |= CSUM_IP; + + if (!hwcksum_tx) { + /* do all in software; hardware checksum offload is disabled */ + *sw_csum = (CSUM_DELAY_DATA | CSUM_DELAY_IP) & + m->m_pkthdr.csum_flags; + } else { + /* do in software what the hardware cannot */ + *sw_csum = m->m_pkthdr.csum_flags & + ~IF_HWASSIST_CSUM_FLAGS(hwcap); + } + + if (hlen != sizeof (struct ip)) { + *sw_csum |= ((CSUM_DELAY_DATA | CSUM_DELAY_IP) & + m->m_pkthdr.csum_flags); + } else if (!(*sw_csum & CSUM_DELAY_DATA) && (hwcap & CSUM_PARTIAL)) { + /* + * Partial checksum offload, if non-IP fragment, and TCP only + * (no UDP support, as the hardware may not be able to convert + * +0 to -0 (0xffff) per RFC1122 4.1.3.4.) + */ + if (hwcksum_tx && !tso && + (m->m_pkthdr.csum_flags & CSUM_TCP) && + ip_len <= ifp->if_mtu) { + uint16_t start = sizeof (struct ip); + uint16_t ulpoff = m->m_pkthdr.csum_data & 0xffff; + m->m_pkthdr.csum_flags |= + (CSUM_DATA_VALID | CSUM_PARTIAL); + m->m_pkthdr.csum_tx_stuff = (ulpoff + start); + m->m_pkthdr.csum_tx_start = start; + /* do IP hdr chksum in software */ + *sw_csum = CSUM_DELAY_IP; + } else { + *sw_csum |= (CSUM_DELAY_DATA & m->m_pkthdr.csum_flags); + } + } + + if (*sw_csum & CSUM_DELAY_DATA) { + in_delayed_cksum(m); + *sw_csum &= ~CSUM_DELAY_DATA; + } + + if (hwcksum_tx) { + /* + * Drop off bits that aren't supported by hardware; + * also make sure to preserve non-checksum related bits. + */ + m->m_pkthdr.csum_flags = + ((m->m_pkthdr.csum_flags & + (IF_HWASSIST_CSUM_FLAGS(hwcap) | CSUM_DATA_VALID)) | + (m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_MASK)); + } else { + /* drop all bits; hardware checksum offload is disabled */ + m->m_pkthdr.csum_flags = 0; + } +} + +/* + * GRE protocol output for PPP/PPTP + */ +int +ip_gre_output(struct mbuf *m) +{ + struct route ro; + int error; + + bzero(&ro, sizeof (ro)); + + error = ip_output(m, NULL, &ro, 0, NULL, NULL); + + ROUTE_RELEASE(&ro); + + return (error); +} diff --git a/bsd/netinet/ip_var.h b/bsd/netinet/ip_var.h index 763c1e919..700fc6431 100644 --- a/bsd/netinet/ip_var.h +++ b/bsd/netinet/ip_var.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -81,10 +81,10 @@ struct ipovly { struct in_addr ih_dst; /* destination internet address */ }; -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #if CONFIG_MACF_NET struct label; -#endif +#endif /* CONFIG_MACF_NET */ /* * Ip reassembly queue structure. Each fragment * being reassembled is attached to one of these structures. @@ -92,25 +92,26 @@ struct label; * be reclaimed if memory becomes tight. */ struct ipq { - struct ipq *next,*prev; /* to other reass headers */ - u_char ipq_ttl; /* time for reass q to live */ - u_char ipq_p; /* protocol of this fragment */ - u_short ipq_id; /* sequence id for reassembly */ + TAILQ_ENTRY(ipq) ipq_list; /* to other reass headers */ struct mbuf *ipq_frags; /* to ip headers of fragments */ - struct in_addr ipq_src,ipq_dst; - u_int32_t ipq_nfrags; - TAILQ_ENTRY(ipq) ipq_list; #if CONFIG_MACF_NET struct label *ipq_label; /* MAC label */ -#endif +#endif /* CONFIG_MACF_NET */ + u_char ipq_ttl; /* time for reass q to live */ + u_char ipq_p; /* protocol of this fragment */ + u_short ipq_id; /* sequence id for reassembly */ + struct in_addr ipq_src, ipq_dst; + u_int32_t ipq_nfrags; /* # frags in this packet */ + uint32_t ipq_csum_flags; /* checksum flags */ + uint32_t ipq_csum; /* partial checksum value */ #if IPDIVERT #ifdef IPDIVERT_44 u_int32_t ipq_div_info; /* ipfw divert port & flags */ -#else - u_int16_t ipq_divert; /* ipfw divert port (Maintain backward compat.) */ -#endif +#else /* !IPDIVERT_44 */ + u_int16_t ipq_divert; /* ipfw divert port (legacy) */ +#endif /* !IPDIVERT_44 */ u_int16_t ipq_div_cookie; /* ipfw divert cookie */ -#endif +#endif /* IPDIVERT */ }; /* @@ -119,10 +120,9 @@ struct ipq { * The actual length of the options (including ipopt_dst) * is in m_len. */ -#endif /* KERNEL_PRIVATE */ -#define MAX_IPOPTLEN 40 -#ifdef XNU_KERNEL_PRIVATE - +#endif /* BSD_KERNEL_PRIVATE */ +#define MAX_IPOPTLEN 40 +#ifdef BSD_KERNEL_PRIVATE struct ipoption { struct in_addr ipopt_dst; /* first-hop dst if source routed */ char ipopt_list[MAX_IPOPTLEN]; /* options proper */ @@ -182,42 +182,45 @@ struct ip_moptions { struct ip_fwd_tag { struct sockaddr_in *next_hop; /* next_hop */ }; - -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ struct ipstat { - u_int32_t ips_total; /* total packets received */ - u_int32_t ips_badsum; /* checksum bad */ - u_int32_t ips_tooshort; /* packet too short */ - u_int32_t ips_toosmall; /* not enough data */ - u_int32_t ips_badhlen; /* ip header length < data size */ - u_int32_t ips_badlen; /* ip length < ip header length */ - u_int32_t ips_fragments; /* fragments received */ - u_int32_t ips_fragdropped; /* frags dropped (dups, out of space) */ - u_int32_t ips_fragtimeout; /* fragments timed out */ - u_int32_t ips_forward; /* packets forwarded */ - u_int32_t ips_fastforward; /* packets fast forwarded */ - u_int32_t ips_cantforward; /* packets rcvd for unreachable dest */ - u_int32_t ips_redirectsent; /* packets forwarded on same net */ - u_int32_t ips_noproto; /* unknown or unsupported protocol */ - u_int32_t ips_delivered; /* datagrams delivered to upper level*/ - u_int32_t ips_localout; /* total ip packets generated here */ - u_int32_t ips_odropped; /* lost packets due to nobufs, etc. */ - u_int32_t ips_reassembled; /* total packets reassembled ok */ - u_int32_t ips_fragmented; /* datagrams successfully fragmented */ - u_int32_t ips_ofragments; /* output fragments created */ - u_int32_t ips_cantfrag; /* don't fragment flag was set, etc. */ - u_int32_t ips_badoptions; /* error in option processing */ - u_int32_t ips_noroute; /* packets discarded due to no route */ - u_int32_t ips_badvers; /* ip version != 4 */ - u_int32_t ips_rawout; /* total raw ip packets generated */ - u_int32_t ips_toolong; /* ip length > max ip packet size */ - u_int32_t ips_notmember; /* multicasts for unregistered grps */ - u_int32_t ips_nogif; /* no match gif found */ - u_int32_t ips_badaddr; /* invalid address on header */ -#ifdef PRIVATE - u_int32_t ips_pktdropcntrl; /* pkt dropped, no mbufs for control data */ -#endif /* PRIVATE */ + u_int32_t ips_total; /* total packets received */ + u_int32_t ips_badsum; /* checksum bad */ + u_int32_t ips_tooshort; /* packet too short */ + u_int32_t ips_toosmall; /* not enough data */ + u_int32_t ips_badhlen; /* ip header length < data size */ + u_int32_t ips_badlen; /* ip length < ip header length */ + u_int32_t ips_fragments; /* fragments received */ + u_int32_t ips_fragdropped; /* frags dropped (dups, out of space) */ + u_int32_t ips_fragtimeout; /* fragments timed out */ + u_int32_t ips_forward; /* packets forwarded */ + u_int32_t ips_fastforward; /* packets fast forwarded */ + u_int32_t ips_cantforward; /* packets rcvd for unreachable dest */ + u_int32_t ips_redirectsent; /* packets forwarded on same net */ + u_int32_t ips_noproto; /* unknown or unsupported protocol */ + u_int32_t ips_delivered; /* datagrams delivered to upper level */ + u_int32_t ips_localout; /* total ip packets generated here */ + u_int32_t ips_odropped; /* lost packets due to nobufs, etc. */ + u_int32_t ips_reassembled; /* total packets reassembled ok */ + u_int32_t ips_fragmented; /* datagrams successfully fragmented */ + u_int32_t ips_ofragments; /* output fragments created */ + u_int32_t ips_cantfrag; /* don't fragment flag was set, etc. */ + u_int32_t ips_badoptions; /* error in option processing */ + u_int32_t ips_noroute; /* packets discarded due to no route */ + u_int32_t ips_badvers; /* ip version != 4 */ + u_int32_t ips_rawout; /* total raw ip packets generated */ + u_int32_t ips_toolong; /* ip length > max ip packet size */ + u_int32_t ips_notmember; /* multicasts for unregistered grps */ + u_int32_t ips_nogif; /* no match gif found */ + u_int32_t ips_badaddr; /* invalid address on header */ + u_int32_t ips_pktdropcntrl; /* pkt dropped, no mbufs for ctl data */ + u_int32_t ips_rcv_swcsum; /* ip hdr swcksum (inbound), packets */ + u_int32_t ips_rcv_swcsum_bytes; /* ip hdr swcksum (inbound), bytes */ + u_int32_t ips_snd_swcsum; /* ip hdr swcksum (outbound), packets */ + u_int32_t ips_snd_swcsum_bytes; /* ip hdr swcksum (outbound), bytes */ + u_int32_t ips_adj; /* total packets trimmed/adjusted */ + u_int32_t ips_adj_hwcsum_clr; /* hwcksum discarded during adj */ }; struct ip_linklocal_stat { @@ -228,16 +231,21 @@ struct ip_linklocal_stat { }; #ifdef KERNEL_PRIVATE +/* forward declarations for ip_output() */ +struct ip_out_args; +struct ip_moptions; +#endif /* KERNEL_PRIVATE */ + +#ifdef BSD_KERNEL_PRIVATE /* flags passed to ip_output as last parameter */ -#define IP_FORWARDING 0x1 /* most of ip header exists */ -#define IP_RAWOUTPUT 0x2 /* raw ip header exists */ -#define IP_NOIPSEC 0x4 /* No IPSec processing */ -#define IP_ROUTETOIF SO_DONTROUTE /* bypass routing tables (0x0010) */ -#define IP_ALLOWBROADCAST SO_BROADCAST /* can send broadcast packets (0x0020) */ -#define IP_OUTARGS 0x100 /* has ancillary output info */ +#define IP_FORWARDING 0x1 /* most of ip header exists */ +#define IP_RAWOUTPUT 0x2 /* raw ip header exists */ +#define IP_NOIPSEC 0x4 /* No IPSec processing */ +#define IP_ROUTETOIF SO_DONTROUTE /* bypass routing tables (0x0010) */ +#define IP_ALLOWBROADCAST SO_BROADCAST /* can send broadcast pkts (0x0020) */ +#define IP_OUTARGS 0x100 /* has ancillary output info */ -#ifdef XNU_KERNEL_PRIVATE -#define IP_HDR_ALIGNED_P(_ip) ((((uintptr_t)(_ip)) & ((uintptr_t)3)) == 0) +#define IP_HDR_ALIGNED_P(_ip) ((((uintptr_t)(_ip)) & ((uintptr_t)3)) == 0) /* * On platforms which require strict alignment (currently for anything but @@ -254,7 +262,6 @@ struct ip_linklocal_stat { } \ } while (0) #endif /* !__i386__ && !__x86_64__ */ -#endif /* XNU_KERNEL_PRIVATE */ struct ip; struct inpcb; @@ -265,32 +272,48 @@ struct sockopt; /* * Extra information passed to ip_output when IP_OUTARGS is set. + * + * Upon returning an error to the caller, ip_output may indicate through + * ipoa_retflags any additional information regarding the error. */ struct ip_out_args { unsigned int ipoa_boundif; /* boundif interface index */ struct flowadv ipoa_flowadv; /* flow advisory code */ - u_int32_t ipoa_flags; /* IPOAF flags (see below) */ + u_int32_t ipoa_flags; /* IPOAF output flags (see below) */ #define IPOAF_SELECT_SRCIF 0x00000001 /* src interface selection */ #define IPOAF_BOUND_IF 0x00000002 /* boundif value is valid */ #define IPOAF_BOUND_SRCADDR 0x00000004 /* bound to src address */ #define IPOAF_NO_CELLULAR 0x00000010 /* skip IFT_CELLULAR */ + u_int32_t ipoa_retflags; /* IPOARF return flags (see below) */ +#define IPOARF_IFDENIED 0x00000001 /* denied access to interface */ }; -extern struct ipstat ipstat; -#if !defined(RANDOM_IP_ID) || RANDOM_IP_ID == 0 -extern u_short ip_id; /* ip packet ctr, for ids */ -#endif -extern int ip_defttl; /* default IP ttl */ -extern int ipforwarding; /* ip forwarding */ +extern struct ipstat ipstat; +extern int ip_use_randomid; +extern u_short ip_id; /* ip packet ctr, for ids */ +extern int ip_defttl; /* default IP ttl */ +extern int ipforwarding; /* ip forwarding */ extern struct protosw *ip_protox[]; +extern struct pr_usrreqs rip_usrreqs; +extern int ip_doscopedroute; + +#if MROUTING +extern int (*legal_vif_num)(int); +extern u_int32_t (*ip_mcast_src)(int); +extern int rsvp_on; extern struct socket *ip_rsvpd; /* reservation protocol daemon */ extern struct socket *ip_mrouter; /* multicast routing daemon */ -extern int (*legal_vif_num)(int); -extern u_int32_t (*ip_mcast_src)(int); -extern int rsvp_on; -extern struct pr_usrreqs rip_usrreqs; -extern int ip_doscopedroute; -extern int ip_restrictrecvif; + +extern void rsvp_input(struct mbuf *, int); +extern int ip_rsvp_init(struct socket *); +extern int ip_rsvp_done(void); +extern int ip_rsvp_vif_init(struct socket *, struct sockopt *); +extern int ip_rsvp_vif_done(struct socket *, struct sockopt *); +extern void ip_rsvp_force_done(struct socket *); +extern void ipip_input(struct mbuf *, int); +extern int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, + struct ip_moptions *); +#endif /* MROUTING */ extern void ip_moptions_init(void); extern struct ip_moptions *ip_allocmoptions(int); @@ -299,41 +322,42 @@ extern int inp_setmoptions(struct inpcb *, struct sockopt *); extern void imo_addref(struct ip_moptions *, int); extern void imo_remref(struct ip_moptions *); -int ip_ctloutput(struct socket *, struct sockopt *sopt); -void ip_drain(void); -void ip_init(void) __attribute__((section("__TEXT, initcode"))); -extern int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, - struct ip_moptions *); +struct protosw; +struct domain; + +extern int ip_checkrouteralert(struct mbuf *); +extern int ip_ctloutput(struct socket *, struct sockopt *sopt); +extern void ip_drain(void); +extern void ip_init(struct protosw *, struct domain *); extern int ip_output(struct mbuf *, struct mbuf *, struct route *, int, struct ip_moptions *, struct ip_out_args *); extern int ip_output_list(struct mbuf *, int, struct mbuf *, struct route *, int, struct ip_moptions *, struct ip_out_args *); -struct in_ifaddr *ip_rtaddr(struct in_addr); -int ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *, - struct mbuf *); -void ip_slowtimo(void); -struct mbuf * - ip_srcroute(void); -void ip_stripoptions(struct mbuf *, struct mbuf *); -#if RANDOM_IP_ID -u_int16_t - ip_randomid(void); -#endif -int rip_ctloutput(struct socket *, struct sockopt *); -void rip_ctlinput(int, struct sockaddr *, void *); -void rip_init(void) __attribute__((section("__TEXT, initcode"))); -void rip_input(struct mbuf *, int); -int rip_output(struct mbuf *, struct socket *, u_int32_t, struct mbuf *); -int rip_unlock(struct socket *, int, void *); -void ipip_input(struct mbuf *, int); -void rsvp_input(struct mbuf *, int); -int ip_rsvp_init(struct socket *); -int ip_rsvp_done(void); -int ip_rsvp_vif_init(struct socket *, struct sockopt *); -int ip_rsvp_vif_done(struct socket *, struct sockopt *); -void ip_rsvp_force_done(struct socket *); -void ip_proto_dispatch_in_wrapper(struct mbuf *, int, u_int8_t); -void in_delayed_cksum(struct mbuf *m); +extern void ip_output_checksum(struct ifnet *, struct mbuf *, int, int, + uint32_t *); +extern struct in_ifaddr *ip_rtaddr(struct in_addr); +extern int ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *, + struct mbuf *); +extern struct mbuf *ip_srcroute(void); +extern void ip_stripoptions(struct mbuf *, struct mbuf *); +extern void ip_initid(void); +extern u_int16_t ip_randomid(void); +extern void ip_proto_dispatch_in_wrapper(struct mbuf *, int, u_int8_t); +extern int ip_fragment(struct mbuf *, struct ifnet *, unsigned long, int); + +extern void ip_setsrcifaddr_info(struct mbuf *, uint32_t, struct in_ifaddr *); +extern void ip_setdstifaddr_info(struct mbuf *, uint32_t, struct in_ifaddr *); +extern int ip_getsrcifaddr_info(struct mbuf *, uint32_t *, uint32_t *); +extern int ip_getdstifaddr_info(struct mbuf *, uint32_t *, uint32_t *); + +extern int rip_ctloutput(struct socket *, struct sockopt *); +extern void rip_ctlinput(int, struct sockaddr *, void *); +extern void rip_init(struct protosw *, struct domain *); +extern void rip_input(struct mbuf *, int); +extern int rip_output(struct mbuf *, struct socket *, u_int32_t, struct mbuf *); +extern int rip_unlock(struct socket *, int, void *); +extern int rip_send(struct socket *, int, struct mbuf *, struct sockaddr *, + struct mbuf *, struct proc *); extern void tcp_in_cksum_stats(u_int32_t); extern void tcp_out_cksum_stats(u_int32_t); @@ -341,10 +365,18 @@ extern void tcp_out_cksum_stats(u_int32_t); extern void udp_in_cksum_stats(u_int32_t); extern void udp_out_cksum_stats(u_int32_t); -int rip_send(struct socket *, int , struct mbuf *, struct sockaddr *, - struct mbuf *, struct proc *); - -extern int ip_fragment(struct mbuf *, struct ifnet *, unsigned long, int); +#if INET6 +extern void tcp_in6_cksum_stats(u_int32_t); +extern void tcp_out6_cksum_stats(u_int32_t); +extern void udp_in6_cksum_stats(u_int32_t); +extern void udp_out6_cksum_stats(u_int32_t); +#endif /* INET6 */ +#endif /* BSD_KERNEL_PRIVATE */ +#ifdef KERNEL_PRIVATE +/* for PPP/PPTP */ +extern int ip_gre_output(struct mbuf *); +typedef struct mbuf *(*gre_input_func_t)(struct mbuf *, int, int); +extern int ip_gre_register_input(gre_input_func_t); #endif /* KERNEL_PRIVATE */ #endif /* !_NETINET_IP_VAR_H_ */ diff --git a/bsd/netinet/kpi_ipfilter.c b/bsd/netinet/kpi_ipfilter.c index ecab6700a..edf1fd729 100644 --- a/bsd/netinet/kpi_ipfilter.c +++ b/bsd/netinet/kpi_ipfilter.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2012 Apple Inc. All rights reserved. + * Copyright (c) 2004-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -136,8 +136,7 @@ ipf_add( /* This will force TCP to re-evaluate its use of TSO */ OSAddAtomic(1, &kipf_count); - if (use_routegenid) - routegenid_update(); + routegenid_update(); return 0; } @@ -194,8 +193,7 @@ ipf_remove( /* This will force TCP to re-evaluate its use of TSO */ OSAddAtomic(-1, &kipf_count); - if (use_routegenid) - routegenid_update(); + routegenid_update(); } return 0; @@ -271,7 +269,7 @@ ipf_injectv4_out(mbuf_t data, ipfilter_t filter_ref, ipf_pktopts_t options) errno_t error = 0; struct m_tag *mtag = NULL; struct ip_moptions *imo = NULL; - struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, 0 }; + struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, 0, 0 }; /* Make the IP header contiguous in the mbuf */ if ((size_t)m->m_len < sizeof (struct ip)) { @@ -327,8 +325,7 @@ ipf_injectv4_out(mbuf_t data, ipfilter_t filter_ref, ipf_pktopts_t options) IP_ALLOWBROADCAST | IP_RAWOUTPUT | IP_OUTARGS, imo, &ipoa); /* Release the route */ - if (ro.ro_rt) - rtfree(ro.ro_rt); + ROUTE_RELEASE(&ro); if (imo != NULL) IMO_REMREF(imo); @@ -346,7 +343,7 @@ ipf_injectv6_out(mbuf_t data, ipfilter_t filter_ref, ipf_pktopts_t options) errno_t error = 0; struct m_tag *mtag = NULL; struct ip6_moptions *im6o = NULL; - struct ip6_out_args ip6oa = { IFSCOPE_NONE, { 0 }, 0 }; + struct ip6_out_args ip6oa = { IFSCOPE_NONE, { 0 }, 0, 0 }; /* Make the IP header contiguous in the mbuf */ if ((size_t)m->m_len < sizeof(struct ip6_hdr)) { @@ -398,8 +395,7 @@ ipf_injectv6_out(mbuf_t data, ipfilter_t filter_ref, ipf_pktopts_t options) error = ip6_output(m, NULL, &ro, IPV6_OUTARGS, im6o, NULL, &ip6oa); /* Release the route */ - if (ro.ro_rt) - rtfree(ro.ro_rt); + ROUTE_RELEASE(&ro); if (im6o != NULL) IM6O_REMREF(im6o); diff --git a/bsd/netinet/lro_ext.h b/bsd/netinet/lro_ext.h index db2a4322e..1dbf135fd 100644 --- a/bsd/netinet/lro_ext.h +++ b/bsd/netinet/lro_ext.h @@ -35,6 +35,7 @@ extern int sw_lro; extern int lrodebug; +extern unsigned int coalesc_sz; /* flow return values */ #define TCP_LRO_NAN 0x00 /* No flow exists */ diff --git a/bsd/netinet/mp_pcb.c b/bsd/netinet/mp_pcb.c new file mode 100644 index 000000000..9dfc68ed3 --- /dev/null +++ b/bsd/netinet/mp_pcb.c @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +static lck_grp_t *mp_lock_grp; +static lck_attr_t *mp_lock_attr; +static lck_grp_attr_t *mp_lock_grp_attr; +decl_lck_mtx_data(static, mp_lock); /* global MULTIPATH lock */ +decl_lck_mtx_data(static, mp_timeout_lock); + +static TAILQ_HEAD(, mppcbinfo) mppi_head = TAILQ_HEAD_INITIALIZER(mppi_head); + +static boolean_t mp_timeout_run; /* MP timer is scheduled to run */ +static boolean_t mp_garbage_collecting; +static boolean_t mp_ticking; +static void mp_sched_timeout(void); +static void mp_timeout(void *); + +void +mp_pcbinit(void) +{ + static int mp_initialized = 0; + + VERIFY(!mp_initialized); + mp_initialized = 1; + + mp_lock_grp_attr = lck_grp_attr_alloc_init(); + mp_lock_grp = lck_grp_alloc_init("multipath", mp_lock_grp_attr); + mp_lock_attr = lck_attr_alloc_init(); + lck_mtx_init(&mp_lock, mp_lock_grp, mp_lock_attr); + lck_mtx_init(&mp_timeout_lock, mp_lock_grp, mp_lock_attr); +} + +static void +mp_timeout(void *arg) +{ +#pragma unused(arg) + struct mppcbinfo *mppi; + boolean_t t, gc; + uint32_t t_act = 0; + uint32_t gc_act = 0; + + /* + * Update coarse-grained networking timestamp (in sec.); the idea + * is to piggy-back on the timeout callout to update the counter + * returnable via net_uptime(). + */ + net_update_uptime(); + + lck_mtx_lock_spin(&mp_timeout_lock); + gc = mp_garbage_collecting; + mp_garbage_collecting = FALSE; + + t = mp_ticking; + mp_ticking = FALSE; + + if (gc || t) { + lck_mtx_unlock(&mp_timeout_lock); + + lck_mtx_lock(&mp_lock); + TAILQ_FOREACH(mppi, &mppi_head, mppi_entry) { + if ((gc && mppi->mppi_gc != NULL) || + (t && mppi->mppi_timer != NULL)) { + lck_mtx_lock(&mppi->mppi_lock); + if (gc && mppi->mppi_gc != NULL) + gc_act += mppi->mppi_gc(mppi); + if (t && mppi->mppi_timer != NULL) + t_act += mppi->mppi_timer(mppi); + lck_mtx_unlock(&mppi->mppi_lock); + } + } + lck_mtx_unlock(&mp_lock); + + lck_mtx_lock_spin(&mp_timeout_lock); + } + + /* lock was dropped above, so check first before overriding */ + if (!mp_garbage_collecting) + mp_garbage_collecting = (gc_act != 0); + if (!mp_ticking) + mp_ticking = (t_act != 0); + + /* re-arm the timer if there's work to do */ + mp_timeout_run = FALSE; + mp_sched_timeout(); + lck_mtx_unlock(&mp_timeout_lock); +} + +static void +mp_sched_timeout(void) +{ + lck_mtx_assert(&mp_timeout_lock, LCK_MTX_ASSERT_OWNED); + + if (!mp_timeout_run && (mp_garbage_collecting || mp_ticking)) { + lck_mtx_convert_spin(&mp_timeout_lock); + mp_timeout_run = TRUE; + timeout(mp_timeout, NULL, hz); + } +} + +void +mp_gc_sched(void) +{ + lck_mtx_lock_spin(&mp_timeout_lock); + mp_garbage_collecting = TRUE; + mp_sched_timeout(); + lck_mtx_unlock(&mp_timeout_lock); +} + +void +mptcp_timer_sched(void) +{ + lck_mtx_lock_spin(&mp_timeout_lock); + mp_ticking = TRUE; + mp_sched_timeout(); + lck_mtx_unlock(&mp_timeout_lock); +} + +void +mp_pcbinfo_attach(struct mppcbinfo *mppi) +{ + struct mppcbinfo *mppi0; + + lck_mtx_lock(&mp_lock); + TAILQ_FOREACH(mppi0, &mppi_head, mppi_entry) { + if (mppi0 == mppi) { + panic("%s: mppi %p already in the list\n", + __func__, mppi); + /* NOTREACHED */ + } + } + TAILQ_INSERT_TAIL(&mppi_head, mppi, mppi_entry); + lck_mtx_unlock(&mp_lock); +} + +int +mp_pcbinfo_detach(struct mppcbinfo *mppi) +{ + struct mppcbinfo *mppi0; + int error = 0; + + lck_mtx_lock(&mp_lock); + TAILQ_FOREACH(mppi0, &mppi_head, mppi_entry) { + if (mppi0 == mppi) + break; + } + if (mppi0 != NULL) + TAILQ_REMOVE(&mppi_head, mppi0, mppi_entry); + else + error = ENXIO; + lck_mtx_unlock(&mp_lock); + + return (error); +} + +int +mp_pcballoc(struct socket *so, struct mppcbinfo *mppi) +{ + struct mppcb *mpp; + + VERIFY(sotomppcb(so) == NULL); + + lck_mtx_lock(&mppi->mppi_lock); + if (mppi->mppi_count >= mptcp_socket_limit) { + lck_mtx_unlock(&mppi->mppi_lock); + mptcplog((LOG_ERR, "Reached MPTCP socket limit.")); + return (ENOBUFS); + } + lck_mtx_unlock(&mppi->mppi_lock); + + mpp = zalloc(mppi->mppi_zone); + if (mpp == NULL) + return (ENOBUFS); + + bzero(mpp, mppi->mppi_size); + lck_mtx_init(&mpp->mpp_lock, mppi->mppi_lock_grp, mppi->mppi_lock_attr); + mpp->mpp_pcbinfo = mppi; + mpp->mpp_state = MPPCB_STATE_INUSE; + mpp->mpp_socket = so; + so->so_pcb = mpp; + + lck_mtx_lock(&mppi->mppi_lock); + mpp->mpp_flags |= MPP_ATTACHED; + TAILQ_INSERT_TAIL(&mppi->mppi_pcbs, mpp, mpp_entry); + mppi->mppi_count++; + lck_mtx_unlock(&mppi->mppi_lock); + + return (0); +} + +void +mp_pcbdetach(struct mppcb *mpp) +{ + struct socket *so = mpp->mpp_socket; + + VERIFY(so->so_pcb == mpp); + + mpp->mpp_state = MPPCB_STATE_DEAD; + if (!(so->so_flags & SOF_PCBCLEARING)) + so->so_flags |= SOF_PCBCLEARING; + + mp_gc_sched(); +} + +void +mp_pcbdispose(struct mppcb *mpp) +{ + struct mppcbinfo *mppi = mpp->mpp_pcbinfo; + + VERIFY(mppi != NULL); + + lck_mtx_assert(&mppi->mppi_lock, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(&mpp->mpp_lock, LCK_MTX_ASSERT_OWNED); + + VERIFY(mpp->mpp_state == MPPCB_STATE_DEAD); + + VERIFY(mpp->mpp_flags & MPP_ATTACHED); + mpp->mpp_flags &= ~MPP_ATTACHED; + TAILQ_REMOVE(&mppi->mppi_pcbs, mpp, mpp_entry); + VERIFY(mppi->mppi_count != 0); + mppi->mppi_count--; + + VERIFY(mpp->mpp_socket != NULL); + VERIFY(mpp->mpp_socket->so_usecount == 0); + mpp->mpp_socket->so_pcb = NULL; + mpp->mpp_socket = NULL; + + lck_mtx_unlock(&mpp->mpp_lock); + lck_mtx_destroy(&mpp->mpp_lock, mppi->mppi_lock_grp); + zfree(mppi->mppi_zone, mpp); +} diff --git a/bsd/netinet/mp_pcb.h b/bsd/netinet/mp_pcb.h new file mode 100644 index 000000000..3c317a692 --- /dev/null +++ b/bsd/netinet/mp_pcb.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _NETINET_MP_PCB_H_ +#define _NETINET_MP_PCB_H_ + +#ifdef BSD_KERNEL_PRIVATE +#include +#include +#include + +/* Keep in sync with bsd/dev/dtrace/scripts/mptcp.d */ +typedef enum mppcb_state { + MPPCB_STATE_INUSE = 1, + MPPCB_STATE_DEAD = 2, +} mppcb_state_t; + +/* + * Multipath Protocol Control Block + */ +struct mppcb { + TAILQ_ENTRY(mppcb) mpp_entry; /* glue to all PCBs */ + decl_lck_mtx_data(, mpp_lock); /* per PCB lock */ + struct mppcbinfo *mpp_pcbinfo; /* PCB info */ + void *mpp_pcbe; /* ptr to per-protocol ext */ + struct socket *mpp_socket; /* back pointer to socket */ + uint32_t mpp_flags; /* PCB flags */ + mppcb_state_t mpp_state; /* PCB state */ +}; + +#define sotomppcb(so) ((struct mppcb *)((so)->so_pcb)) + +/* valid values for mpp_flags */ +#define MPP_ATTACHED 0x1 +#define MPP_DEFUNCT 0x2 + +/* + * Multipath PCB Information + */ +struct mppcbinfo { + TAILQ_ENTRY(mppcbinfo) mppi_entry; /* glue to all PCB info */ + TAILQ_HEAD(, mppcb) mppi_pcbs; /* list of PCBs */ + uint32_t mppi_count; /* # of PCBs in list */ + struct zone *mppi_zone; /* zone for this PCB */ + uint32_t mppi_size; /* size of PCB structure */ + lck_grp_t *mppi_lock_grp; /* lock grp */ + lck_attr_t *mppi_lock_attr; /* lock attr */ + lck_grp_attr_t *mppi_lock_grp_attr; /* lock grp attr */ + decl_lck_mtx_data(, mppi_lock); /* global PCB lock */ + uint32_t (*mppi_gc)(struct mppcbinfo *); /* garbage collector func */ + uint32_t (*mppi_timer)(struct mppcbinfo *); /* timer func */ +}; + +__BEGIN_DECLS +extern void mp_pcbinit(void); +extern void mp_pcbinfo_attach(struct mppcbinfo *); +extern int mp_pcbinfo_detach(struct mppcbinfo *); +extern int mp_pcballoc(struct socket *, struct mppcbinfo *); +extern void mp_pcbdetach(struct mppcb *); +extern void mp_pcbdispose(struct mppcb *); +extern void mp_gc_sched(void); +extern void mptcp_timer_sched(void); +__END_DECLS + +#endif /* BSD_KERNEL_PRIVATE */ +#endif /* !_NETINET_MP_PCB_H_ */ diff --git a/bsd/netinet/mp_proto.c b/bsd/netinet/mp_proto.c new file mode 100644 index 000000000..08bb5102f --- /dev/null +++ b/bsd/netinet/mp_proto.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include +#if MPTCP +#include +#endif /* MPTCP */ + +extern struct domain mpdomain_s; +static struct domain *mpdomain = NULL; + +static void mp_dinit(struct domain *); +lck_mtx_t *mp_domain_mutex; + +static struct protosw mpsw[] = { +#if MPTCP +{ + .pr_type = SOCK_STREAM, + .pr_protocol = IPPROTO_TCP, + .pr_flags = PR_CONNREQUIRED|PR_MULTICONN|PR_EVCONNINFO| + PR_WANTRCVD|PR_PCBLOCK|PR_PROTOLOCK, + .pr_ctloutput = mptcp_ctloutput, + .pr_init = mptcp_init, + .pr_usrreqs = &mptcp_usrreqs, + .pr_lock = mptcp_lock, + .pr_unlock = mptcp_unlock, + .pr_getlock = mptcp_getlock, +}, +#endif /* MPTCP */ +}; + +static int mp_proto_count = (sizeof (mpsw) / sizeof (struct protosw)); + +struct domain mpdomain_s = { + .dom_family = PF_MULTIPATH, + .dom_flags = DOM_REENTRANT, + .dom_name = "multipath", + .dom_init = mp_dinit, +}; + +/* Initialize the PF_MULTIPATH domain, and add in the pre-defined protos */ +void +mp_dinit(struct domain *dp) +{ + struct protosw *pr; + int i; + + VERIFY(!(dp->dom_flags & DOM_INITIALIZED)); + VERIFY(mpdomain == NULL); + + mpdomain = dp; + + for (i = 0, pr = &mpsw[0]; i < mp_proto_count; i++, pr++) + net_add_proto(pr, dp, 1); + + mp_domain_mutex = dp->dom_mtx; +} diff --git a/bsd/netinet/mptcp.c b/bsd/netinet/mptcp.c new file mode 100644 index 000000000..8487a1c3f --- /dev/null +++ b/bsd/netinet/mptcp.c @@ -0,0 +1,665 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int mptcp_enable = 1; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_enable, 0, "Enable Multipath TCP Support"); + +int mptcp_dbg = 0; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_dbg, 0, "Enable Multipath TCP Debugging"); + +/* Number of times to try negotiating MPTCP on SYN retransmissions */ +int mptcp_mpcap_retries = MPTCP_CAPABLE_RETRIES; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mptcp_cap_retr, + CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_mpcap_retries, 0, "Number of MP Capable SYN Retries"); + +/* + * By default, DSS checksum is turned off, revisit if we ever do + * MPTCP for non SSL Traffic. + */ +int mptcp_dss_csum = 0; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, dss_csum, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_dss_csum, 0, "Enable DSS checksum"); + +/* + * When mptcp_fail_thresh number of retransmissions are sent, subflow failover + * is attempted on a different path. + */ +int mptcp_fail_thresh = 1; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, fail, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_fail_thresh, 0, "Failover threshold"); + + +/* + * MPTCP subflows have TCP keepalives set to ON + */ +int mptcp_subflow_keeptime = 60; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, keepalive, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_subflow_keeptime, 0, "Keepalive in seconds"); + +/* + * MP_PRIO option. + */ +int mptcp_mpprio_enable = 1; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mpprio, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_mpprio_enable, 0, "Enable MP_PRIO option"); + +/* + * REMOVE_ADDR option. + */ +int mptcp_remaddr_enable = 1; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, remaddr, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_remaddr_enable, 0, "Enable REMOVE_ADDR option"); + +/* + * MPTCP input, called when data has been read from a subflow socket. + */ +void +mptcp_input(struct mptses *mpte, struct mbuf *m) +{ + struct socket *mp_so; + struct mptcb *mp_tp = NULL; + u_int64_t mb_dsn; + u_int32_t mb_datalen; + int count = 0; + struct mbuf *save = NULL; + struct mbuf *freelist = NULL, *tail = NULL; + + VERIFY(m->m_flags & M_PKTHDR); + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + mp_so = mpte->mpte_mppcb->mpp_socket; + + DTRACE_MPTCP(input); + + /* + * Each mbuf contains MPTCP Data Sequence Map + * Process the data for reassembly, delivery to MPTCP socket + * client, etc. + * + */ + count = mp_so->so_rcv.sb_cc; + + VERIFY(m != NULL); + /* + * In the degraded fallback case, data is accepted without DSS map + */ + if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) { + /* XXX need a check that this is indeed degraded */ + if (sbappendstream(&mp_so->so_rcv, m)) + sorwakeup(mp_so); + DTRACE_MPTCP5(receive__degraded, struct mbuf *, m, + struct socket *, mp_so, + struct sockbuf *, &mp_so->so_rcv, + struct sockbuf *, &mp_so->so_snd, + struct mptses *, mpte); + count = mp_so->so_rcv.sb_cc - count; + mptcplog3((LOG_DEBUG, "%s: fread %d bytes\n", __func__, count)); + return; + } + + mp_tp = mpte->mpte_mptcb; + VERIFY(mp_tp != NULL); + + MPT_LOCK(mp_tp); + do { + save = m->m_next; + m->m_next = NULL; + + mb_dsn = m->m_pkthdr.mp_dsn; + mb_datalen = m->m_pkthdr.mp_rlen; + + if (MPTCP_SEQ_GT(mb_dsn, mp_tp->mpt_rcvatmark)) { + tcpstat.tcps_mp_oodata++; + MPT_UNLOCK(mp_tp); + m_freem(m); + return; + /* + * Reassembly queue support here in future. Per spec, + * senders must implement retransmission timer to + * retransmit unacked data. Dropping out of order + * gives a slight hit on performance but allows us to + * deploy MPTCP and protects us against in-window DoS + * attacks that attempt to use up memory by sending + * out of order data. When doing load sharing across + * subflows, out of order support is a must. + */ + } + + if (MPTCP_SEQ_LT(mb_dsn, mp_tp->mpt_rcvatmark)) { + VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP); + VERIFY(m->m_flags & M_PKTHDR); + VERIFY(m->m_len >= (int)mb_datalen); + VERIFY(m->m_pkthdr.len >= (int)mb_datalen); + if (MPTCP_SEQ_LEQ((mb_dsn + mb_datalen), + mp_tp->mpt_rcvatmark)) { + if (freelist == NULL) + freelist = tail = m; + else { + tail->m_next = m; + tail = m; + } + m = save; + continue; + } else { + m_adj(m, (mp_tp->mpt_rcvatmark - mb_dsn)); + } + mptcplog((LOG_INFO, "%s: %llu %d 2 \n", __func__, + mp_tp->mpt_rcvatmark, m->m_pkthdr.len)); + } + + MPT_UNLOCK(mp_tp); + if (sbappendstream(&mp_so->so_rcv, m)) { + sorwakeup(mp_so); + } + DTRACE_MPTCP6(receive, struct mbuf *, m, struct socket *, mp_so, + struct sockbuf *, &mp_so->so_rcv, + struct sockbuf *, &mp_so->so_snd, + struct mptses *, mpte, + struct mptcb *, mp_tp); + MPT_LOCK(mp_tp); + count = mp_so->so_rcv.sb_cc - count; + tcpstat.tcps_mp_rcvtotal++; + tcpstat.tcps_mp_rcvbytes += count; + mptcplog3((LOG_DEBUG, "%s: read %d bytes\n", __func__, count)); + /* + * The data received at the MPTCP layer will never exceed the + * receive window because anything to the right of the + * receive window will be trimmed at the subflow level. + */ + mp_tp->mpt_rcvwnd = mptcp_sbspace(mp_tp); + mp_tp->mpt_rcvatmark += count; + m = save; + count = mp_so->so_rcv.sb_cc; + } while (m); + MPT_UNLOCK(mp_tp); + + if (freelist) + m_freem(freelist); +} + +/* + * MPTCP output. + */ +int +mptcp_output(struct mptses *mpte) +{ + struct mptsub *mpts; + struct mptsub *mpts_tried = NULL; + struct socket *mp_so; + int error = 0; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + mp_so = mpte->mpte_mppcb->mpp_socket; + if (mp_so->so_state & SS_CANTSENDMORE) { + return (EPIPE); + } + +try_again: + /* get the "best" subflow to be used for transmission */ + mpts = mptcp_get_subflow(mpte, NULL); + if (mpts == NULL) { + mptcplog((LOG_ERR, "%s: mp_so 0x%llx has no usable subflow\n", + __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so))); + goto out; + } + + mptcplog3((LOG_INFO, "%s: mp_so 0x%llx cid %d \n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_connid)); + + /* In case there's just one flow, we reattempt later */ + MPTS_LOCK(mpts); + if ((mpts_tried != NULL) && ((mpts == mpts_tried) || + (mpts->mpts_flags & MPTSF_FAILINGOVER))) { + MPTS_UNLOCK(mpts); + MPTS_LOCK(mpts_tried); + mpts_tried->mpts_flags &= ~MPTSF_FAILINGOVER; + mpts_tried->mpts_flags |= MPTSF_ACTIVE; + MPTS_UNLOCK(mpts_tried); + MPT_LOCK(mpte->mpte_mptcb); + mptcp_start_timer(mpte->mpte_mptcb, MPTT_REXMT); + MPT_UNLOCK(mpte->mpte_mptcb); + mptcplog((LOG_INFO, "%s: mp_so 0x%llx retry later\n", + __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so))); + goto out; + } + + DTRACE_MPTCP3(output, struct mptses *, mpte, struct mptsub *, mpts, + struct socket *, mp_so); + error = mptcp_subflow_output(mpte, mpts); + if (error) { + /* can be a temporary loss of source address or other error */ + mpts->mpts_flags |= MPTSF_FAILINGOVER; + mpts->mpts_flags &= ~MPTSF_ACTIVE; + mpts_tried = mpts; + MPTS_UNLOCK(mpts); + mptcplog((LOG_INFO, "%s: error = %d \n", __func__, error)); + goto try_again; + } + /* The model is to have only one active flow at a time */ + mpts->mpts_flags |= MPTSF_ACTIVE; + MPTS_UNLOCK(mpts); + if (mpte->mpte_active_sub == NULL) { + mpte->mpte_active_sub = mpts; + } else if (mpte->mpte_active_sub != mpts) { + MPTS_LOCK(mpte->mpte_active_sub); + mpte->mpte_active_sub->mpts_flags &= ~MPTSF_ACTIVE; + MPTS_UNLOCK(mpte->mpte_active_sub); + mpte->mpte_active_sub = mpts; + } +out: + /* subflow errors should not be percolated back up */ + return (0); +} + +/* + * Return the most eligible subflow to be used for sending data. + * This function also serves to check if any alternate subflow is available + * or not. + */ +struct mptsub * +mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore) +{ + struct mptsub *mpts; + struct mptsub *fallback = NULL; + struct socket *so = NULL; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + + TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { + MPTS_LOCK_SPIN(mpts); + + if ((ignore) && (mpts == ignore)) { + MPTS_UNLOCK(mpts); + continue; + } + + /* There can only be one subflow in degraded state */ + if (mpts->mpts_flags & MPTSF_MP_DEGRADED) { + MPTS_UNLOCK(mpts); + break; + } + + if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE)) { + MPTS_UNLOCK(mpts); + continue; + } + + if (mpts->mpts_flags & MPTSF_SUSPENDED) { + MPTS_UNLOCK(mpts); + continue; + } + + if (mpts->mpts_flags & MPTSF_FAILINGOVER) { + so = mpts->mpts_socket; + if ((so) && (!(so->so_flags & SOF_PCBCLEARING))) { + socket_lock(so, 1); + if (so->so_snd.sb_cc == 0) { + mpts->mpts_flags &= ~MPTSF_FAILINGOVER; + so->so_flags &= ~SOF_MP_TRYFAILOVER; + fallback = mpts; + socket_unlock(so, 1); + } else { + fallback = mpts; + socket_unlock(so, 1); + MPTS_UNLOCK(mpts); + continue; + } + } else { + MPTS_UNLOCK(mpts); + continue; + } + } + + if (mpts->mpts_flags & MPTSF_PREFERRED) { + MPTS_UNLOCK(mpts); + break; + } + + /* When there are no preferred flows, use first one in list */ + if (fallback == NULL) + fallback = mpts; + + MPTS_UNLOCK(mpts); + } + /* + * If there is no preferred or backup subflow, and there is no active + * subflow use the last usable subflow. + */ + if (mpts == NULL) { + return (fallback); + } + + return (mpts); +} + +void +mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event) +{ + MPT_LOCK_ASSERT_HELD(mp_tp); + + DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, + uint32_t, event); + + switch (mp_tp->mpt_state) { + case MPTCPS_CLOSED: + case MPTCPS_LISTEN: + mp_tp->mpt_state = MPTCPS_CLOSED; + break; + + case MPTCPS_ESTABLISHED: + if (event == MPCE_CLOSE) + mp_tp->mpt_state = MPTCPS_FIN_WAIT_1; + else if (event == MPCE_RECV_DATA_FIN) + mp_tp->mpt_state = MPTCPS_CLOSE_WAIT; + break; + + case MPTCPS_CLOSE_WAIT: + if (event == MPCE_CLOSE) + mp_tp->mpt_state = MPTCPS_LAST_ACK; + break; + + case MPTCPS_FIN_WAIT_1: + if (event == MPCE_RECV_DATA_ACK) + mp_tp->mpt_state = MPTCPS_FIN_WAIT_2; + else if (event == MPCE_RECV_DATA_FIN) + mp_tp->mpt_state = MPTCPS_CLOSING; + break; + + case MPTCPS_CLOSING: + if (event == MPCE_RECV_DATA_ACK) + mp_tp->mpt_state = MPTCPS_TIME_WAIT; + break; + + case MPTCPS_LAST_ACK: + if (event == MPCE_RECV_DATA_ACK) + mp_tp->mpt_state = MPTCPS_CLOSED; + break; + + case MPTCPS_FIN_WAIT_2: + if (event == MPCE_RECV_DATA_FIN) + mp_tp->mpt_state = MPTCPS_TIME_WAIT; + break; + + case MPTCPS_TIME_WAIT: + break; + + case MPTCPS_FASTCLOSE_WAIT: + if (event == MPCE_CLOSE) + mp_tp->mpt_state = MPTCPS_CLOSED; + break; + + default: + VERIFY(0); + /* NOTREACHED */ + } + DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, + uint32_t, event); + mptcplog((LOG_INFO, "%s: state = %d\n", + __func__, mp_tp->mpt_state)); +} + +/* + * Update the mptcb send state variables, but the actual sbdrop occurs + * in MPTCP layer + */ +void +mptcp_data_ack_rcvd(struct mptcb *mp_tp, struct tcpcb *tp, u_int64_t full_dack) +{ + u_int64_t acked = 0; + + acked = full_dack - mp_tp->mpt_snduna; + + if (acked) { + mp_tp->mpt_snduna += acked; + } + if ((full_dack == mp_tp->mpt_sndmax) && + (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_1)) { + mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_ACK); + tp->t_mpflags &= ~TMPF_SEND_DFIN; + } +} + +/* If you change this function, match up mptcp_update_rcv_state_f */ +void +mptcp_update_dss_rcv_state(struct mptcp_dsn_opt *dss_info, struct tcpcb *tp, + uint16_t csum) +{ + struct mptcb *mp_tp = tptomptp(tp); + u_int64_t full_dsn = 0; + + NTOHL(dss_info->mdss_dsn); + NTOHL(dss_info->mdss_subflow_seqn); + NTOHS(dss_info->mdss_data_len); + + /* XXX for autosndbuf grow sb here */ + MPT_LOCK(mp_tp); + MPTCP_EXTEND_DSN(mp_tp->mpt_rcvnxt, dss_info->mdss_dsn, full_dsn); + MPT_UNLOCK(mp_tp); + mptcp_update_rcv_state_meat(mp_tp, tp, + full_dsn, dss_info->mdss_subflow_seqn, dss_info->mdss_data_len, + csum); + +} + +void +mptcp_update_rcv_state_meat(struct mptcb *mp_tp, struct tcpcb *tp, + u_int64_t full_dsn, u_int32_t seqn, u_int16_t mdss_data_len, + uint16_t csum) +{ + if (mdss_data_len == 0) { + mptcplog((LOG_INFO, "%s: Received infinite mapping.", + __func__)); + if ((mp_tp->mpt_flags & MPTCPF_CHECKSUM) && (csum != 0)) { + mptcplog((LOG_ERR, "%s: Bad checksum value %x \n", + __func__, csum)); + } + mptcp_notify_mpfail(tp->t_inpcb->inp_socket); + return; + } + MPT_LOCK(mp_tp); + if (mptcp_dbg >= MP_VERBOSE_DEBUG_1) + printf("%s: seqn = %x len = %x full = %llx rcvnxt = %llu \n", + __func__, seqn, mdss_data_len, full_dsn, + mp_tp->mpt_rcvnxt); + + /* Process a Data FIN packet , handled in mptcp_do_fin_opt */ + if ((seqn == 0) && (mdss_data_len == 1)) { + mptcplog((LOG_INFO, "%s: Data FIN DSS opt state = %d \n", + __func__, mp_tp->mpt_state)); + MPT_UNLOCK(mp_tp); + return; + } + MPT_UNLOCK(mp_tp); + mptcp_notify_mpready(tp->t_inpcb->inp_socket); + tp->t_rcv_map.mpt_dsn = full_dsn; + tp->t_rcv_map.mpt_sseq = seqn; + tp->t_rcv_map.mpt_len = mdss_data_len; + tp->t_rcv_map.mpt_csum = csum; + tp->t_mpflags |= TMPF_EMBED_DSN; +} + + +void +mptcp_update_rcv_state_f(struct mptcp_dss_ack_opt *dss_info, struct tcpcb *tp, + uint16_t csum) +{ + u_int64_t full_dsn = 0; + struct mptcb *mp_tp = tptomptp(tp); + + NTOHL(dss_info->mdss_dsn); + NTOHL(dss_info->mdss_subflow_seqn); + NTOHS(dss_info->mdss_data_len); + MPT_LOCK(mp_tp); + MPTCP_EXTEND_DSN(mp_tp->mpt_rcvnxt, dss_info->mdss_dsn, full_dsn); + MPT_UNLOCK(mp_tp); + mptcp_update_rcv_state_meat(mp_tp, tp, + full_dsn, + dss_info->mdss_subflow_seqn, + dss_info->mdss_data_len, + csum); +} + +void +mptcp_update_rcv_state_g(struct mptcp_dss64_ack32_opt *dss_info, + struct tcpcb *tp, uint16_t csum) +{ + u_int64_t dsn = mptcp_ntoh64(dss_info->mdss_dsn); + struct mptcb *mp_tp = tptomptp(tp); + + NTOHL(dss_info->mdss_subflow_seqn); + NTOHS(dss_info->mdss_data_len); + mptcp_update_rcv_state_meat(mp_tp, tp, + dsn, + dss_info->mdss_subflow_seqn, + dss_info->mdss_data_len, + csum); +} + +/* + * MPTCP Checksum support + * The checksum is calculated whenever the MPTCP DSS option is included + * in the TCP packet. The checksum includes the sum of the MPTCP psuedo + * header and the actual data indicated by the length specified in the + * DSS option. + */ + +uint16_t +mptcp_input_csum(struct tcpcb *tp, struct mbuf *m, int off) +{ + struct mptcb *mp_tp = tptomptp(tp); + uint32_t sum = 0; + uint64_t dsn; + uint32_t sseq; + uint16_t len; + uint16_t csum; + + if (mp_tp == NULL) + return (0); + + if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM)) + return (0); + + if (!(tp->t_mpflags & TMPF_EMBED_DSN)) + return (0); + + if (tp->t_mpflags & TMPF_TCP_FALLBACK) + return (0); + + /* + * The remote side may send a packet with fewer bytes than the + * claimed DSS checksum length. + */ + if ((int)m_length2(m, NULL) < (off + tp->t_rcv_map.mpt_len)) + return (0xffff); + + if (tp->t_rcv_map.mpt_len != 0) + sum = m_sum16(m, off, tp->t_rcv_map.mpt_len); + + dsn = mptcp_hton64(tp->t_rcv_map.mpt_dsn); + sseq = htonl(tp->t_rcv_map.mpt_sseq); + len = htons(tp->t_rcv_map.mpt_len); + csum = tp->t_rcv_map.mpt_csum; + sum += in_pseudo64(dsn, sseq, (len + csum)); + ADDCARRY(sum); + DTRACE_MPTCP3(checksum__result, struct tcpcb *, tp, struct mbuf *, m, + uint32_t, sum); + mptcplog((LOG_INFO, "%s: sum = %x \n", __func__, sum)); + return (~sum & 0xffff); +} + +void +mptcp_output_csum(struct tcpcb *tp, struct mbuf *m, int32_t len, + unsigned hdrlen, u_int64_t dss_val, u_int32_t *sseqp) +{ + struct mptcb *mp_tp = tptomptp(tp); + u_int32_t sum = 0; + uint32_t sseq; + uint16_t dss_len; + uint16_t csum = 0; + uint16_t *csump = NULL; + + if (mp_tp == NULL) + return; + + if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM)) + return; + + if (sseqp == NULL) + return; + + if (len) + sum = m_sum16(m, hdrlen, len); + + dss_val = mptcp_hton64(dss_val); + sseq = *sseqp; + dss_len = *(uint16_t *)(void *)((u_char*)sseqp + sizeof (u_int32_t)); + sum += in_pseudo64(dss_val, sseq, (dss_len + csum)); + + ADDCARRY(sum); + sum = ~sum & 0xffff; + csump = (uint16_t *)(void *)((u_char*)sseqp + sizeof (u_int32_t) + + sizeof (uint16_t)); + DTRACE_MPTCP3(checksum__result, struct tcpcb *, tp, struct mbuf *, m, + uint32_t, sum); + *csump = sum; + mptcplog3((LOG_INFO, "%s: sum = %x \n", __func__, sum)); +} diff --git a/bsd/netinet/mptcp.h b/bsd/netinet/mptcp.h new file mode 100644 index 000000000..0dc8c9c61 --- /dev/null +++ b/bsd/netinet/mptcp.h @@ -0,0 +1,382 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _NETINET_MPTCP_H_ +#define _NETINET_MPTCP_H_ + +#ifdef BSD_KERNEL_PRIVATE + +#include + +#if BYTE_ORDER == BIG_ENDIAN +#define mptcp_hton64(x) (x) +#define mptcp_ntoh64(x) (x) +#else /* LITTLE_ENDIAN */ +#define mptcp_hton64(x) __DARWIN_OSSwapInt64(x) +#define mptcp_ntoh64(x) __DARWIN_OSSwapInt64(x) +#endif + +/* + * MPTCP Option Subtype Field values + */ +#define MPO_CAPABLE 0x0 +#define MPO_JOIN 0x1 +#define MPO_DSS 0x2 +#define MPO_ADD_ADDR 0x3 +#define MPO_REMOVE_ADDR 0x4 +#define MPO_PRIO 0x5 +#define MPO_FAIL 0x6 +#define MPO_FASTCLOSE 0x7 + +/* MPTCP Protocol version */ +#define MP_DRAFT_VERSION_12 0x0 + +/* + * MPTCP MP_CAPABLE TCP Option definitions + * + * Used to establish an MPTCP connection and first subflow. + */ +struct mptcp_mpcapable_opt_common { + u_int8_t mmco_kind; + u_int8_t mmco_len; +#if BYTE_ORDER == LITTLE_ENDIAN + u_int8_t mmco_version:4, + mmco_subtype:4; +#else /* BIG_ENDIAN */ + u_int8_t mmco_subtype:4, + mmco_version:4; +#endif +#define MPCAP_PROPOSAL_SBIT 0x01 /* SHA1 Algorithm */ +#define MPCAP_HBIT 0x01 /* alias of MPCAP_PROPOSAL_SBIT */ +#define MPCAP_GBIT 0x02 /* must be 0 */ +#define MPCAP_FBIT 0x04 /* must be 0 */ +#define MPCAP_EBIT 0x08 /* must be 0 */ +#define MPCAP_DBIT 0x10 /* must be 0 */ +#define MPCAP_CBIT 0x20 /* must be 0 */ +#define MPCAP_BBIT 0x40 /* Extensibility bit, must be 0 */ +#define MPCAP_ABIT 0x80 /* alias of MPCAP_CHECKSUM_CBIT */ +#define MPCAP_CHECKSUM_CBIT 0x80 /* DSS Checksum bit */ + u_int8_t mmco_flags; +} __attribute__((__packed__)); + +struct mptcp_mpcapable_opt_rsp { + struct mptcp_mpcapable_opt_common mmc_common; + mptcp_key_t mmc_localkey; +} __attribute__((__packed__)); + +struct mptcp_mpcapable_opt_rsp1 { + struct mptcp_mpcapable_opt_common mmc_common; + mptcp_key_t mmc_localkey; + mptcp_key_t mmc_remotekey; +} __attribute__((__packed__)); + +/* + * MPTCP MP_JOIN TCP Option definitions + * + * Used to add subflows to an existing MP_CAPABLE connection. + */ + +/* MP_JOIN Option for SYN */ +struct mptcp_mpjoin_opt_req { + u_int8_t mmjo_kind; + u_int8_t mmjo_len; +#define MPTCP_BACKUP 0x1 + u_int8_t mmjo_subtype_bkp; + u_int8_t mmjo_addr_id; + u_int32_t mmjo_peer_token; + u_int32_t mmjo_rand; +} __attribute__((__packed__)); + +/* MP_JOIN Option for SYN/ACK */ +struct mptcp_mpjoin_opt_rsp { + u_int8_t mmjo_kind; + u_int8_t mmjo_len; +#define MPTCP_BACKUP 0x1 + u_int8_t mmjo_subtype_bkp; + u_int8_t mmjo_addr_id; + u_int64_t mmjo_mac; /* Truncated message auth code */ + u_int32_t mmjo_rand; +} __attribute__((__packed__)); + +/* MP_Join Option for ACK */ +struct mptcp_mpjoin_opt_rsp2 { + u_int8_t mmjo_kind; + u_int8_t mmjo_len; +#if BYTE_ORDER == LITTLE_ENDIAN + u_int8_t mmjo_reserved1:4, + mmjo_subtype:4; +#else /* BIG_ENDIAN */ + u_int8_t mmjo_subtype:4, + mmjo_reserved1:4; +#endif + u_int8_t mmjo_reserved2; + u_int8_t mmjo_mac[20]; /* This is 160 bits HMAC SHA-1 per RFC */ +} __attribute__((__packed__)); + + +/* + * MPTCP ADD_ADDR and REMOVE_ADDR TCP Options + * + * ADD_ADDR option shall be ignored by this implementation + * REMOVE_ADDR option shall be sent to help flush dead subflows + */ + +/* Add Address Option */ +struct mptcp_addaddr_opt { + u_int8_t ma_kind; + u_int8_t ma_len; +#if BYTE_ORDER == LITTLE_ENDIAN + u_int8_t ma_ipver:4, + ma_subtype:4; +#else /* BIG_ENDIAN */ + u_int8_t ma_subtype:4, + ma_ipver:4; +#endif +#define MA_IPVer_V4 4 /* IPv4 Address tagged to the option */ +#define MA_IPVer_V6 6 /* IPv6 Address tagged to the option */ + u_int8_t ma_addr_id; +} __attribute__((__packed__)); + +/* Address sent in the ADD_ADDR option */ +struct mptcp_addr_family_val { + union { + struct in_addr ma_v4_addr; + struct in6_addr ma_v6_addr; + } ma_addr; + /* u_int16_t ma_ports; */ /* optional field */ +} __attribute__((__packed__)); + +/* Remove Address Option */ +struct mptcp_remaddr_opt { + u_int8_t mr_kind; + u_int8_t mr_len; +#if BYTE_ORDER == LITTLE_ENDIAN + u_int8_t mr_rest:4, + mr_subtype:4; +#else /* BIG_ENDIAN */ + u_int8_t mr_subtype:4, + mr_rest:4; +#endif + u_int8_t mr_addr_id; +} __attribute__((__packed__)); + +/* + * MPTCP Data Sequence Signal (DSS) TCP Options + * + * Used to map subflow sequence space to MPTCP data sequence space. + * Used to send Data ACKs + */ + +/* + * DSS Option variants coded as flags in the DSS option flags field + */ +#define MDSS_A 0x01 /* Data ACK present if set */ +#define MDSS_a 0x02 /* 64-bit Data ACK present if set */ +#define MDSS_M 0x04 /* Data Sequence Number present if set */ +#define MDSS_m 0x08 /* 64-bit Data Sequence Number present if set */ +#define MDSS_F 0x10 /* Data FIN present */ + +/* DSS fields common to all DSS option variants */ +struct mptcp_dss_copt { + u_int8_t mdss_kind; + u_int8_t mdss_len; +#if BYTE_ORDER == LITTLE_ENDIAN + u_int8_t mdss_reserved1:4, + mdss_subtype:4; +#else /* BIG_ENDIAN */ + u_int8_t mdss_subtype:4, + mdss_reserved1:4; +#endif + u_int8_t mdss_flags; +}__attribute__((__packed__)); + +/* 32-bit DSS option */ +struct mptcp_dsn_opt { + struct mptcp_dss_copt mdss_copt; + u_int32_t mdss_dsn; /* Data Sequence Number */ + u_int32_t mdss_subflow_seqn; /* Relative Subflow Seq Num */ + u_int16_t mdss_data_len; /* Data Length */ + /* u_int16_t mdss_xsum; */ /* Data checksum - optional */ + +}__attribute__((__packed__)); + +/* 64-bit DSS option */ +struct mptcp_dsn64_opt { + struct mptcp_dss_copt mdss_copt; + u_int64_t mdss_dsn; /* Data Sequence Number */ + u_int32_t mdss_subflow_seqn; /* Relative Subflow Seq Num */ + u_int16_t mdss_data_len; /* Data Length */ + /* u_int16_t mdss_xsum; */ /* Data checksum - optional */ +}__attribute__((__packed__)); + +/* 32-bit DSS Data ACK option */ +struct mptcp_data_ack_opt { + struct mptcp_dss_copt mdss_copt; + u_int32_t mdss_ack; +}__attribute__((__packed__)); + +/* 64-bit DSS Data ACK option */ +struct mptcp_data_ack64_opt { + struct mptcp_dss_copt mdss_copt; + u_int64_t mdss_ack; +}__attribute__((__packed__)); + +/* 32-bit DSS+Data ACK option */ +struct mptcp_dss_ack_opt { + struct mptcp_dss_copt mdss_copt; + u_int32_t mdss_ack; /* Data ACK */ + u_int32_t mdss_dsn; /* Data Sequence Number */ + u_int32_t mdss_subflow_seqn; /* Relative Subflow Seq Num */ + u_int16_t mdss_data_len; /* Data Length */ + /* u_int16_t mdss_xsum; */ /* Data checksum - optional */ +}__attribute__((__packed__)); + +/* 64-bit DSS+Data ACK option */ +struct mptcp_dss64_ack64_opt { + struct mptcp_dss_copt mdss_copt; + u_int64_t mdss_ack; /* Data ACK */ + u_int64_t mdss_dsn; /* Data Sequence Number */ + u_int32_t mdss_subflow_seqn; /* Relative Subflow Seq Num */ + u_int16_t mdss_data_len; /* Data Length */ + /* u_int16_t mdss_xsum; */ /* Data checksum - optional */ +}__attribute__((__packed__)); + +/* DSS+Data ACK mixed option variants */ +struct mptcp_dss32_ack64_opt { + struct mptcp_dss_copt mdss_copt; + u_int64_t mdss_ack; /* Data ACK */ + u_int32_t mdss_dsn; /* Data Sequence Number */ + u_int32_t mdss_subflow_seqn; /* Relative Subflow Seq Num */ + u_int16_t mdss_data_len; /* Data Length */ + /* u_int16_t mdss_xsum; */ /* Data checksum - optional */ +}__attribute__((__packed__)); + +struct mptcp_dss64_ack32_opt { + struct mptcp_dss_copt mdss_copt; + u_int32_t mdss_ack; /* Data ACK */ + u_int64_t mdss_dsn; /* Data Sequence Number */ + u_int32_t mdss_subflow_seqn; /* Relative Subflow Seq Num */ + u_int16_t mdss_data_len; /* Data Length */ + /* u_int16_t mdss_xsum; */ /* Data checksum - optional */ +}__attribute__((__packed__)); + + +/* + * MPTCP Fast Close Option + * + * MPTCP connection is aborted if the FastClose option is received. + * In future, we may send this option if a MPTCP socket level abort + * API is supported. + */ +struct mptcp_fastclose_opt { + u_int8_t mfast_kind; + u_int8_t mfast_len; +#if BYTE_ORDER == LITTLE_ENDIAN + u_int8_t mfast_reserved:4, + mfast_subtype:4; +#else /* BIG_ENDIAN */ + u_int8_t mfast_subtype:4, + mfast_reserved:4; +#endif + u_int8_t mfast_reserved1; + u_int64_t mfast_key; /* Option receiver's key */ +}__attribute__((__packed__)); + +/* + * MPTCP MP_FAIL Option + * + * When DSS checksum is ON, and checksum fails, remote peer may send + * this option to indicate the failure. Likewise, we may send this + * option. + */ +struct mptcp_mpfail_opt { + u_int8_t mfail_kind; + u_int8_t mfail_len; +#if BYTE_ORDER == LITTLE_ENDIAN + u_int8_t mfail_reserved:4, + mfail_subtype:4; +#else /* BIG_ENDIAN */ + u_int8_t mfail_subtype:4, + mfail_reserved:4; +#endif + u_int8_t mfail_reserved1:8; + u_int64_t mfail_dsn; +}__attribute__((__packed__)); + + +/* + * MPTCP MP_PRIO Option + * + * When a subflow becomes unusable (due to bad radio coverage) or + * it is the costlier path or it is not the preferred path, the receiver may + * use this option to let the sender know of its path preference. + */ + +/* Option to change priority of self */ +struct mptcp_mpprio_opt { + u_int8_t mpprio_kind; + u_int8_t mpprio_len; +#define MPTCP_MPPRIO_BKP 0x1 +#if BYTE_ORDER == LITTLE_ENDIAN + u_int8_t mpprio_flags:4, + mpprio_subtype:4; +#else /* BIG_ENDIAN */ + u_int8_t mpprio_subtype:4, + mpprio_flags:4; +#endif +}__attribute__((__packed__)); + +/* Option to change priority of some other subflow(s) using addr_id */ +struct mptcp_mpprio_addr_opt { + u_int8_t mpprio_kind; + u_int8_t mpprio_len; +#define MPTCP_MPPRIO_BKP 0x1 +#if BYTE_ORDER == LITTLE_ENDIAN + u_int8_t mpprio_flags:4, + mpprio_subtype:4; +#else /* BIG_ENDIAN */ + u_int8_t mpprio_subtype:4, + mpprio_flags:4; +#endif + u_int8_t mpprio_addrid; +}__attribute__((__packed__)); + +/* + * MPTCP Checksum Psuedo Header + * + */ +struct mptcp_pseudohdr { + u_int64_t mphdr_dsn; /* Data Sequence Number */ + u_int32_t mphdr_ssn; /* Subflow Sequence Number */ + u_int16_t mphdr_len; /* Data-Level Length */ + u_int16_t mphdr_xsum; /* MPTCP Level Checksum */ +}__attribute__((__packed__)); + +#endif /* BSD_KERNEL_PRIVATE */ + +#endif /* _NETINET_MPTCP_H_ */ diff --git a/bsd/netinet/mptcp_opt.c b/bsd/netinet/mptcp_opt.c new file mode 100644 index 000000000..a4ea96ef6 --- /dev/null +++ b/bsd/netinet/mptcp_opt.c @@ -0,0 +1,1887 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +static int mptcp_validate_join_hmac(struct tcpcb *, u_char*, int); +static int mptcp_snd_mpprio(struct tcpcb *tp, u_char *cp, int optlen); + +/* + * MPTCP Options Output Processing + */ + +static unsigned +mptcp_setup_first_subflow_syn_opts(struct socket *so, int flags, u_char *opt, + unsigned optlen) +{ + struct tcpcb *tp = sototcpcb(so); + struct mptcb *mp_tp = NULL; + mp_tp = tptomptp(tp); + + if (!(so->so_flags & SOF_MP_SUBFLOW)) + return (optlen); + + /* + * Avoid retransmitting the MP_CAPABLE option. + */ + if (tp->t_rxtshift > mptcp_mpcap_retries) + return (optlen); + + if ((flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) { + struct mptcp_mpcapable_opt_rsp mptcp_opt; + mptcp_key_t mp_localkey = 0; + + mp_localkey = mptcp_get_localkey(mp_tp); + if (mp_localkey == 0) { + /* an embryonic connection was closed from above */ + return (optlen); + } + bzero(&mptcp_opt, + sizeof (struct mptcp_mpcapable_opt_rsp)); + mptcp_opt.mmc_common.mmco_kind = TCPOPT_MULTIPATH; + mptcp_opt.mmc_common.mmco_len = + sizeof (struct mptcp_mpcapable_opt_rsp); + mptcp_opt.mmc_common.mmco_subtype = MPO_CAPABLE; + MPT_LOCK_SPIN(mp_tp); + mptcp_opt.mmc_common.mmco_version = mp_tp->mpt_version; + mptcp_opt.mmc_common.mmco_flags |= MPCAP_PROPOSAL_SBIT; + if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) + mptcp_opt.mmc_common.mmco_flags |= + MPCAP_CHECKSUM_CBIT; + MPT_UNLOCK(mp_tp); + mptcp_opt.mmc_localkey = mp_localkey; + memcpy(opt + optlen, &mptcp_opt, + mptcp_opt.mmc_common.mmco_len); + optlen += mptcp_opt.mmc_common.mmco_len; + if (mptcp_dbg >= MP_VERBOSE_DEBUG_2) { + printf("%s: SYN_ACK localkey = %llx \n", + __func__, mp_localkey); + } + } else { + /* Only the SYN flag is set */ + struct mptcp_mpcapable_opt_common mptcp_opt; + mptcp_key_t mp_localkey = 0; + mp_localkey = mptcp_get_localkey(mp_tp); + so->so_flags |= SOF_MPTCP_CLIENT; + if (mp_localkey == 0) { + /* an embryonic connection was closed */ + return (optlen); + } + bzero(&mptcp_opt, + sizeof (struct mptcp_mpcapable_opt_common)); + mptcp_opt.mmco_kind = TCPOPT_MULTIPATH; + mptcp_opt.mmco_len = + sizeof (struct mptcp_mpcapable_opt_common) + + sizeof (mptcp_key_t); + mptcp_opt.mmco_subtype = MPO_CAPABLE; + MPT_LOCK_SPIN(mp_tp); + mptcp_opt.mmco_version = mp_tp->mpt_version; + mptcp_opt.mmco_flags |= MPCAP_PROPOSAL_SBIT; + if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) + mptcp_opt.mmco_flags |= MPCAP_CHECKSUM_CBIT; + MPT_UNLOCK(mp_tp); + (void) memcpy(opt + optlen, &mptcp_opt, + sizeof (struct mptcp_mpcapable_opt_common)); + optlen += sizeof (struct mptcp_mpcapable_opt_common); + (void) memcpy(opt + optlen, &mp_localkey, + sizeof (mptcp_key_t)); + optlen += sizeof (mptcp_key_t); + } + + return (optlen); +} + +static unsigned +mptcp_setup_join_subflow_syn_opts(struct socket *so, int flags, u_char *opt, + unsigned optlen) +{ + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = NULL; + + if (!inp) + return (optlen); + + tp = intotcpcb(inp); + if (!tp) + return (optlen); + + if (!tp->t_mptcb) + return (optlen); + + if ((flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) { + struct mptcp_mpjoin_opt_rsp mpjoin_rsp; + bzero(&mpjoin_rsp, sizeof (mpjoin_rsp)); + mpjoin_rsp.mmjo_kind = TCPOPT_MULTIPATH; + mpjoin_rsp.mmjo_len = sizeof (mpjoin_rsp); + mpjoin_rsp.mmjo_subtype_bkp = MPO_JOIN << 4; + if (tp->t_mpflags & TMPF_BACKUP_PATH) + mpjoin_rsp.mmjo_subtype_bkp |= MPTCP_BACKUP; + mpjoin_rsp.mmjo_addr_id = tp->t_local_aid; + mptcp_get_rands(tp->t_local_aid, tptomptp(tp), + &mpjoin_rsp.mmjo_rand, NULL); + mpjoin_rsp.mmjo_mac = mptcp_get_trunced_hmac(tp->t_local_aid, + tptomptp(tp)); + memcpy(opt + optlen, &mpjoin_rsp, mpjoin_rsp.mmjo_len); + optlen += mpjoin_rsp.mmjo_len; + } else { + struct mptcp_mpjoin_opt_req mpjoin_req; + bzero(&mpjoin_req, sizeof (mpjoin_req)); + mpjoin_req.mmjo_kind = TCPOPT_MULTIPATH; + mpjoin_req.mmjo_len = sizeof (mpjoin_req); + mpjoin_req.mmjo_subtype_bkp = MPO_JOIN << 4; + /* A secondary subflow is started off as backup */ + mpjoin_req.mmjo_subtype_bkp |= MPTCP_BACKUP; + tp->t_mpflags |= TMPF_BACKUP_PATH; + mpjoin_req.mmjo_addr_id = tp->t_local_aid; + mpjoin_req.mmjo_peer_token = mptcp_get_remotetoken(tp->t_mptcb); + if (mpjoin_req.mmjo_peer_token == 0) { + if (mptcp_dbg >= MP_ERR_DEBUG) + printf("%s: zero peer token \n", __func__); + } + mptcp_get_rands(tp->t_local_aid, tptomptp(tp), + &mpjoin_req.mmjo_rand, NULL); + memcpy(opt + optlen, &mpjoin_req, mpjoin_req.mmjo_len); + optlen += mpjoin_req.mmjo_len; + } + return (optlen); +} + +unsigned +mptcp_setup_join_ack_opts(struct tcpcb *tp, u_char *opt, unsigned optlen) +{ + unsigned new_optlen; + struct mptcp_mpjoin_opt_rsp2 join_rsp2; + + if ((MAX_TCPOPTLEN - optlen) < sizeof (struct mptcp_mpjoin_opt_rsp2)) { + printf("%s: no space left %d \n", __func__, optlen); + return (optlen); + } + + bzero(&join_rsp2, sizeof (struct mptcp_mpjoin_opt_rsp2)); + join_rsp2.mmjo_kind = TCPOPT_MULTIPATH; + join_rsp2.mmjo_len = sizeof (struct mptcp_mpjoin_opt_rsp2); + join_rsp2.mmjo_subtype = MPO_JOIN; + mptcp_get_hmac(tp->t_local_aid, tptomptp(tp), + (u_char*)&join_rsp2.mmjo_mac, + sizeof (join_rsp2.mmjo_mac)); + memcpy(opt + optlen, &join_rsp2, join_rsp2.mmjo_len); + new_optlen = optlen + join_rsp2.mmjo_len; + return (new_optlen); +} + +unsigned +mptcp_setup_syn_opts(struct socket *so, int flags, u_char *opt, unsigned optlen) +{ + unsigned new_optlen; + + if (mptcp_enable == 0) { + /* do nothing */ + return (optlen); + } + + if (!(so->so_flags & SOF_MP_SEC_SUBFLOW)) { + new_optlen = mptcp_setup_first_subflow_syn_opts(so, flags, opt, + optlen); + } else { + /* + * To simulate SYN_ACK with no join opt, comment this line on + * OS X server side. This serves as a testing hook. + */ + new_optlen = mptcp_setup_join_subflow_syn_opts(so, flags, opt, + optlen); + } + return (new_optlen); +} + +static int +mptcp_send_mpfail(struct tcpcb *tp, u_char *opt, unsigned int optlen) +{ +#pragma unused(tp, opt, optlen) + + struct mptcb *mp_tp = NULL; + struct mptcp_mpfail_opt fail_opt; + uint64_t dsn; + int len = sizeof (struct mptcp_mpfail_opt); + + mp_tp = tptomptp(tp); + if (mp_tp == NULL) { + tp->t_mpflags &= ~TMPF_SND_MPFAIL; + return (optlen); + } + + /* if option space low give up */ + if ((MAX_TCPOPTLEN - optlen) < sizeof (struct mptcp_mpfail_opt)) { + tp->t_mpflags &= ~TMPF_SND_MPFAIL; + return (optlen); + } + + MPT_LOCK(mp_tp); + dsn = mp_tp->mpt_rcvnxt; + MPT_UNLOCK(mp_tp); + + bzero(&fail_opt, sizeof (fail_opt)); + fail_opt.mfail_kind = TCPOPT_MULTIPATH; + fail_opt.mfail_len = len; + fail_opt.mfail_subtype = MPO_FAIL; + fail_opt.mfail_dsn = mptcp_hton64(dsn); + memcpy(opt + optlen, &fail_opt, len); + optlen += len; + tp->t_mpflags &= ~TMPF_SND_MPFAIL; + if (mptcp_dbg >= MP_ERR_DEBUG) + printf("%s: %d \n", __func__, tp->t_local_aid); + return (optlen); +} + +static int +mptcp_send_infinite_mapping(struct tcpcb *tp, u_char *opt, unsigned int optlen) +{ + struct mptcp_dsn_opt infin_opt; + struct mptcb *mp_tp = NULL; + size_t len = sizeof (struct mptcp_dsn_opt); + struct socket *so = tp->t_inpcb->inp_socket; + int error = 0; + int csum_len = 0; + + if (!so) + return (optlen); + + mp_tp = tptomptp(tp); + if (mp_tp == NULL) + return (optlen); + + MPT_LOCK(mp_tp); + if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) + csum_len = 2; + + /* try later */ + if ((MAX_TCPOPTLEN - optlen) < (len + csum_len)) { + MPT_UNLOCK(mp_tp); + return (optlen); + } + bzero(&infin_opt, sizeof (infin_opt)); + infin_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH; + infin_opt.mdss_copt.mdss_len = len + csum_len; + infin_opt.mdss_copt.mdss_subtype = MPO_DSS; + infin_opt.mdss_copt.mdss_flags |= MDSS_M; + if (mp_tp->mpt_flags & MPTCPF_RECVD_MPFAIL) { + infin_opt.mdss_dsn = (u_int32_t) + MPTCP_DATASEQ_LOW32(mp_tp->mpt_dsn_at_csum_fail); + error = mptcp_get_map_for_dsn(so, mp_tp->mpt_dsn_at_csum_fail, + &infin_opt.mdss_subflow_seqn); + } else { + infin_opt.mdss_dsn = (u_int32_t) + MPTCP_DATASEQ_LOW32(mp_tp->mpt_snduna); + infin_opt.mdss_subflow_seqn = tp->snd_una - tp->iss; + } + MPT_UNLOCK(mp_tp); + if (error != 0) + return (optlen); + if ((infin_opt.mdss_dsn == 0) || (infin_opt.mdss_subflow_seqn == 0)) { + return (optlen); + } + infin_opt.mdss_dsn = htonl(infin_opt.mdss_dsn); + infin_opt.mdss_subflow_seqn = htonl(infin_opt.mdss_subflow_seqn); + infin_opt.mdss_data_len = 0; + + memcpy(opt + optlen, &infin_opt, len); + optlen += len; + if (csum_len != 0) { + /* The checksum field is set to 0 for infinite mapping */ + uint16_t csum = 0; + memcpy(opt + optlen, &csum, csum_len); + optlen += csum_len; + } + + if (mptcp_dbg == MP_VERBOSE_DEBUG_1) { + printf("%s: dsn = %x, seq = %x len = %x\n", __func__, + ntohl(infin_opt.mdss_dsn), + ntohl(infin_opt.mdss_subflow_seqn), + ntohs(infin_opt.mdss_data_len)); + } + + /* so->so_flags &= ~SOF_MPTCP_CLIENT; */ + tp->t_mpflags |= TMPF_INFIN_SENT; + tcpstat.tcps_estab_fallback++; + return (optlen); +} + + +static int +mptcp_ok_to_fin(struct tcpcb *tp, u_int64_t dsn, u_int32_t datalen) +{ + struct mptcb *mp_tp = NULL; + mp_tp = tptomptp(tp); + + MPT_LOCK(mp_tp); + dsn = (mp_tp->mpt_sndmax & MPTCP_DATASEQ_LOW32_MASK) | dsn; + if ((dsn + datalen) == mp_tp->mpt_sndmax) { + MPT_UNLOCK(mp_tp); + return (1); + } + MPT_UNLOCK(mp_tp); + return (0); +} + + +/* Must be called from tcp_output to fill in the fast close option */ +static int +mptcp_send_fastclose(struct tcpcb *tp, u_char *opt, unsigned int optlen, + int flags) +{ + struct mptcp_fastclose_opt fastclose_opt; + struct mptcb *mp_tp = tptomptp(tp); + + /* Only ACK flag should be set */ + if (flags != TH_ACK) + return (optlen); + + if ((MAX_TCPOPTLEN - optlen) < + sizeof (struct mptcp_fastclose_opt)) { + return (optlen); + } + + bzero(&fastclose_opt, sizeof (struct mptcp_fastclose_opt)); + fastclose_opt.mfast_kind = TCPOPT_MULTIPATH; + fastclose_opt.mfast_len = sizeof (struct mptcp_fastclose_opt); + fastclose_opt.mfast_subtype = MPO_FASTCLOSE; + MPT_LOCK_SPIN(mp_tp); + fastclose_opt.mfast_key = mptcp_get_remotekey(mp_tp); + MPT_UNLOCK(mp_tp); + memcpy(opt + optlen, &fastclose_opt, fastclose_opt.mfast_len); + optlen += fastclose_opt.mfast_len; + + return (optlen); +} + +unsigned int +mptcp_setup_opts(struct tcpcb *tp, int32_t off, u_char *opt, + unsigned int optlen, int flags, int datalen, + unsigned int **dss_lenp, u_int8_t **finp, u_int64_t *dss_valp, + u_int32_t **sseqp) +{ + struct inpcb *inp = (struct inpcb *)tp->t_inpcb; + struct socket *so = inp->inp_socket; + struct mptcb *mp_tp = tptomptp(tp); + boolean_t do_csum = FALSE; + boolean_t send_64bit_dsn = FALSE; + boolean_t send_64bit_ack = FALSE; + + if (mptcp_enable == 0) { + /* do nothing */ + return (optlen); + } + + if (mp_tp == NULL) { + return (optlen); + } + + if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) + do_csum = TRUE; + + /* tcp_output handles the SYN path separately */ + if (flags & TH_SYN) + return (optlen); + + if ((MAX_TCPOPTLEN - optlen) < + sizeof (struct mptcp_mpcapable_opt_common)) { + if (mptcp_dbg >= MP_ERR_DEBUG) { + printf("MPTCP ERROR %s: no space left %d flags %x " + "tp->t_mpflags %x" + "len %d\n", __func__, optlen, flags, tp->t_mpflags, + datalen); + } + return (optlen); + } + + if (tp->t_mpflags & TMPF_FASTCLOSE) { + optlen = mptcp_send_fastclose(tp, opt, optlen, flags); + VERIFY(datalen == 0); + return (optlen); + } + + if (tp->t_mpflags & TMPF_TCP_FALLBACK) { + if (tp->t_mpflags & TMPF_SND_MPFAIL) + optlen = mptcp_send_mpfail(tp, opt, optlen); + else if (!(tp->t_mpflags & TMPF_INFIN_SENT)) + optlen = mptcp_send_infinite_mapping(tp, opt, optlen); + return (optlen); + } + + if (tp->t_mpflags & TMPF_SND_MPPRIO) { + optlen = mptcp_snd_mpprio(tp, opt, optlen); + return (optlen); + } + + if ((tp->t_mpflags & TMPF_PREESTABLISHED) && + (!(tp->t_mpflags & TMPF_SENT_KEYS)) && + (!(tp->t_mpflags & TMPF_JOINED_FLOW))) { + struct mptcp_mpcapable_opt_rsp1 mptcp_opt; + if ((MAX_TCPOPTLEN - optlen) < + sizeof (struct mptcp_mpcapable_opt_rsp1)) + return (optlen); + bzero(&mptcp_opt, sizeof (struct mptcp_mpcapable_opt_rsp1)); + mptcp_opt.mmc_common.mmco_kind = TCPOPT_MULTIPATH; + mptcp_opt.mmc_common.mmco_len = + sizeof (struct mptcp_mpcapable_opt_rsp1); + mptcp_opt.mmc_common.mmco_subtype = MPO_CAPABLE; + mptcp_opt.mmc_common.mmco_version = MP_DRAFT_VERSION_12; + /* HMAC-SHA1 is the proposal */ + mptcp_opt.mmc_common.mmco_flags |= MPCAP_PROPOSAL_SBIT; + MPT_LOCK(mp_tp); + if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) + mptcp_opt.mmc_common.mmco_flags |= MPCAP_CHECKSUM_CBIT; + mptcp_opt.mmc_localkey = mptcp_get_localkey(mp_tp); + mptcp_opt.mmc_remotekey = mptcp_get_remotekey(mp_tp); + MPT_UNLOCK(mp_tp); + memcpy(opt + optlen, &mptcp_opt, mptcp_opt.mmc_common.mmco_len); + optlen += mptcp_opt.mmc_common.mmco_len; + tp->t_mpflags |= TMPF_SENT_KEYS; + so->so_flags |= SOF_MPTCP_TRUE; + tp->t_mpflags &= ~TMPF_PREESTABLISHED; + tp->t_mpflags |= TMPF_MPTCP_TRUE; + + if (!tp->t_mpuna) { + tp->t_mpuna = tp->snd_una; + } else { + /* its a retransmission of the MP_CAPABLE ACK */ + } + if (mptcp_dbg >= MP_ERR_DEBUG) { + printf("MPTCP SUCCESS %s: established.\n", __func__); + } + return (optlen); + } else if (tp->t_mpflags & TMPF_MPTCP_TRUE) { + if (tp->t_mpflags & TMPF_SND_REM_ADDR) { + int rem_opt_len = sizeof (struct mptcp_remaddr_opt); + if ((optlen + rem_opt_len) <= MAX_TCPOPTLEN) { + mptcp_send_remaddr_opt(tp, + (struct mptcp_remaddr_opt *)(opt + optlen)); + optlen += rem_opt_len; + return (optlen); + } else { + tp->t_mpflags &= ~TMPF_SND_REM_ADDR; + } + } + } + + if ((tp->t_mpflags & TMPF_JOINED_FLOW) && + (tp->t_mpflags & TMPF_PREESTABLISHED) && + (!(tp->t_mpflags & TMPF_RECVD_JOIN)) && + (tp->t_mpflags & TMPF_SENT_JOIN) && + (!(tp->t_mpflags & TMPF_MPTCP_TRUE))) { + /* Do the ACK part */ + optlen = mptcp_setup_join_ack_opts(tp, opt, optlen); + if (!tp->t_mpuna) { + tp->t_mpuna = tp->snd_una; + } + /* Start a timer to retransmit the ACK */ + tp->t_timer[TCPT_JACK_RXMT] = + OFFSET_FROM_START(tp, tcp_jack_rxmt); + return (optlen); + } + + if (!(tp->t_mpflags & TMPF_MPTCP_TRUE)) + return (optlen); + + /* From here on, all options are sent only if MPTCP_TRUE */ + + MPT_LOCK(mp_tp); + if (mp_tp->mpt_flags & MPTCPF_SND_64BITDSN) { + send_64bit_dsn = TRUE; + } + if (mp_tp->mpt_flags & MPTCPF_SND_64BITACK) { + send_64bit_ack = TRUE; + } + MPT_UNLOCK(mp_tp); + +#define CHECK_OPTLEN { \ + if ((MAX_TCPOPTLEN - optlen) < len) { \ + if (mptcp_dbg >= MP_ERR_DEBUG) { \ + printf("MPTCP ERROR %s: len %d optlen %d \n", \ + __func__, \ + len, optlen); \ + } \ + return (optlen); \ + } \ +} + +#define DO_FIN(dsn_opt) { \ + int sndfin = 0; \ + sndfin = mptcp_ok_to_fin(tp, dsn_opt.mdss_dsn, datalen); \ + if (sndfin) { \ + dsn_opt.mdss_copt.mdss_flags |= MDSS_F; \ + *finp = opt + optlen + offsetof(struct mptcp_dss_copt, \ + mdss_flags); \ + dsn_opt.mdss_data_len += 1; \ + } \ +} + +#define CHECK_DATALEN { \ + /* MPTCP socket does not support IP options */ \ + if ((datalen + optlen + len) > tp->t_maxopd) { \ + if (mptcp_dbg >= MP_VERBOSE_DEBUG_2) \ + printf("%s: nosp %d len %d opt %d %d %d\n", \ + __func__, datalen, len, optlen, \ + tp->t_maxseg, tp->t_maxopd); \ + /* remove option length from payload len */ \ + datalen = tp->t_maxopd - optlen - len; \ + } \ +} + + if ((tp->t_mpflags & TMPF_SEND_DSN) && + (send_64bit_dsn)) { + /* + * If there was the need to send 64-bit Data ACK along + * with 64-bit DSN, then 26 or 28 bytes would be used. + * With timestamps and NOOP padding that will cause + * overflow. Hence, in the rare event that both 64-bit + * DSN and 64-bit ACK have to be sent, delay the send of + * 64-bit ACK until our 64-bit DSN is acked with a 64-bit ack. + * XXX If this delay causes issue, remove the 2-byte padding. + */ + struct mptcp_dss64_ack32_opt dsn_ack_opt; + unsigned int len = sizeof (dsn_ack_opt); + + if (do_csum) { + len += 2; + } + + CHECK_OPTLEN; + + bzero(&dsn_ack_opt, sizeof (dsn_ack_opt)); + dsn_ack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH; + dsn_ack_opt.mdss_copt.mdss_subtype = MPO_DSS; + dsn_ack_opt.mdss_copt.mdss_len = len; + dsn_ack_opt.mdss_copt.mdss_flags |= + MDSS_M | MDSS_m | MDSS_A; + + CHECK_DATALEN; + + mptcp_output_getm_dsnmap64(so, off, (u_int32_t)datalen, + &dsn_ack_opt.mdss_dsn, + &dsn_ack_opt.mdss_subflow_seqn, + &dsn_ack_opt.mdss_data_len); + + *dss_valp = dsn_ack_opt.mdss_dsn; + + if ((dsn_ack_opt.mdss_data_len == 0) || + (dsn_ack_opt.mdss_dsn == 0)) { + return (optlen); + } + + if (tp->t_mpflags & TMPF_SEND_DFIN) { + DO_FIN(dsn_ack_opt); + } + + MPT_LOCK(mp_tp); + dsn_ack_opt.mdss_ack = + htonl(MPTCP_DATAACK_LOW32(mp_tp->mpt_rcvnxt)); + MPT_UNLOCK(mp_tp); + + dsn_ack_opt.mdss_dsn = mptcp_hton64(dsn_ack_opt.mdss_dsn); + dsn_ack_opt.mdss_subflow_seqn = htonl( + dsn_ack_opt.mdss_subflow_seqn); + dsn_ack_opt.mdss_data_len = htons( + dsn_ack_opt.mdss_data_len); + *dss_lenp = (unsigned int *)(void *)(opt + optlen + + offsetof(struct mptcp_dss64_ack32_opt, mdss_data_len)); + + memcpy(opt + optlen, &dsn_ack_opt, sizeof (dsn_ack_opt)); + + if (do_csum) { + *sseqp = (u_int32_t *)(void *)(opt + optlen + + offsetof(struct mptcp_dss64_ack32_opt, + mdss_subflow_seqn)); + } + optlen += len; + if (mptcp_dbg == MP_VERBOSE_DEBUG_2) { + printf("%s: long DSS = %llx ACK = %llx \n", + __func__, + mptcp_ntoh64(dsn_ack_opt.mdss_dsn), + mptcp_ntoh64(dsn_ack_opt.mdss_ack)); + } + tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW; + return (optlen); + } + + if ((tp->t_mpflags & TMPF_SEND_DSN) && + (!send_64bit_dsn) && + !(tp->t_mpflags & TMPF_MPTCP_ACKNOW)) { + struct mptcp_dsn_opt dsn_opt; + unsigned int len = sizeof (struct mptcp_dsn_opt); + + if (do_csum) { + len += 2; + } + + CHECK_OPTLEN; + + bzero(&dsn_opt, sizeof (dsn_opt)); + dsn_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH; + dsn_opt.mdss_copt.mdss_subtype = MPO_DSS; + dsn_opt.mdss_copt.mdss_len = len; + dsn_opt.mdss_copt.mdss_flags |= MDSS_M; + + CHECK_DATALEN; + + mptcp_output_getm_dsnmap32(so, off, (u_int32_t)datalen, + &dsn_opt.mdss_dsn, + &dsn_opt.mdss_subflow_seqn, &dsn_opt.mdss_data_len, + dss_valp); + + if ((dsn_opt.mdss_data_len == 0) || + (dsn_opt.mdss_dsn == 0)) { + return (optlen); + } + + if (tp->t_mpflags & TMPF_SEND_DFIN) { + DO_FIN(dsn_opt); + } + + dsn_opt.mdss_dsn = htonl(dsn_opt.mdss_dsn); + dsn_opt.mdss_subflow_seqn = htonl(dsn_opt.mdss_subflow_seqn); + dsn_opt.mdss_data_len = htons(dsn_opt.mdss_data_len); + *dss_lenp = (unsigned int *)(void *)(opt + optlen + + offsetof(struct mptcp_dsn_opt, mdss_data_len)); + memcpy(opt + optlen, &dsn_opt, sizeof (dsn_opt)); + if (do_csum) { + *sseqp = (u_int32_t *)(void *)(opt + optlen + + offsetof(struct mptcp_dsn_opt, mdss_subflow_seqn)); + } + optlen += len; + if (mptcp_dbg == MP_VERBOSE_DEBUG_2) { + printf("%s: DSS option. dsn = %x, seq = %x len = %x\n", + __func__, + ntohl(dsn_opt.mdss_dsn), + ntohl(dsn_opt.mdss_subflow_seqn), + ntohs(dsn_opt.mdss_data_len)); + } + tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW; + return (optlen); + } + + /* 32-bit Data ACK option */ + if ((tp->t_mpflags & TMPF_MPTCP_ACKNOW) && + (!send_64bit_ack) && + !(tp->t_mpflags & TMPF_SEND_DSN) && + !(tp->t_mpflags & TMPF_SEND_DFIN)) { + + struct mptcp_data_ack_opt dack_opt; + unsigned int len = 0; +do_ack32_only: + len = sizeof (dack_opt); + + CHECK_OPTLEN; + + bzero(&dack_opt, len); + dack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH; + dack_opt.mdss_copt.mdss_len = len; + dack_opt.mdss_copt.mdss_subtype = MPO_DSS; + dack_opt.mdss_copt.mdss_flags |= MDSS_A; + MPT_LOCK_SPIN(mp_tp); + dack_opt.mdss_ack = + htonl(MPTCP_DATAACK_LOW32(mp_tp->mpt_rcvnxt)); + MPT_UNLOCK(mp_tp); + memcpy(opt + optlen, &dack_opt, len); + optlen += len; + VERIFY(optlen <= MAX_TCPOPTLEN); + tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW; + return (optlen); + } + + /* 64-bit Data ACK option */ + if ((tp->t_mpflags & TMPF_MPTCP_ACKNOW) && + (send_64bit_ack) && + !(tp->t_mpflags & TMPF_SEND_DSN) && + !(tp->t_mpflags & TMPF_SEND_DFIN)) { + struct mptcp_data_ack64_opt dack_opt; + unsigned int len = 0; +do_ack64_only: + len = sizeof (dack_opt); + + CHECK_OPTLEN; + + bzero(&dack_opt, len); + dack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH; + dack_opt.mdss_copt.mdss_len = len; + dack_opt.mdss_copt.mdss_subtype = MPO_DSS; + dack_opt.mdss_copt.mdss_flags |= (MDSS_A | MDSS_a); + MPT_LOCK_SPIN(mp_tp); + dack_opt.mdss_ack = mptcp_hton64(mp_tp->mpt_rcvnxt); + /* + * The other end should retransmit 64-bit DSN until it + * receives a 64-bit ACK. + */ + mp_tp->mpt_flags &= ~MPTCPF_SND_64BITACK; + MPT_UNLOCK(mp_tp); + memcpy(opt + optlen, &dack_opt, len); + optlen += len; + VERIFY(optlen <= MAX_TCPOPTLEN); + tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW; + return (optlen); + } + + /* 32-bit DSS+Data ACK option */ + if ((tp->t_mpflags & TMPF_SEND_DSN) && + (!send_64bit_dsn) && + (!send_64bit_ack) && + (tp->t_mpflags & TMPF_MPTCP_ACKNOW)) { + struct mptcp_dss_ack_opt dss_ack_opt; + unsigned int len = sizeof (dss_ack_opt); + + if (do_csum) + len += 2; + + CHECK_OPTLEN; + + bzero(&dss_ack_opt, sizeof (dss_ack_opt)); + dss_ack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH; + dss_ack_opt.mdss_copt.mdss_len = len; + dss_ack_opt.mdss_copt.mdss_subtype = MPO_DSS; + dss_ack_opt.mdss_copt.mdss_flags |= MDSS_A | MDSS_M; + MPT_LOCK_SPIN(mp_tp); + dss_ack_opt.mdss_ack = + htonl(MPTCP_DATAACK_LOW32(mp_tp->mpt_rcvnxt)); + MPT_UNLOCK(mp_tp); + + CHECK_DATALEN; + + mptcp_output_getm_dsnmap32(so, off, (u_int32_t)datalen, + &dss_ack_opt.mdss_dsn, + &dss_ack_opt.mdss_subflow_seqn, + &dss_ack_opt.mdss_data_len, + dss_valp); + + if ((dss_ack_opt.mdss_data_len == 0) || + (dss_ack_opt.mdss_dsn == 0)) { + goto do_ack32_only; + } + + if (tp->t_mpflags & TMPF_SEND_DFIN) { + DO_FIN(dss_ack_opt); + } + + dss_ack_opt.mdss_dsn = htonl(dss_ack_opt.mdss_dsn); + dss_ack_opt.mdss_subflow_seqn = + htonl(dss_ack_opt.mdss_subflow_seqn); + dss_ack_opt.mdss_data_len = htons(dss_ack_opt.mdss_data_len); + *dss_lenp = (unsigned int *)(void *)(opt + optlen + + offsetof(struct mptcp_dss_ack_opt, mdss_data_len)); + memcpy(opt + optlen, &dss_ack_opt, sizeof (dss_ack_opt)); + if (do_csum) { + *sseqp = (u_int32_t *)(void *)(opt + optlen + + offsetof(struct mptcp_dss_ack_opt, + mdss_subflow_seqn)); + } + + optlen += len; + + if (optlen > MAX_TCPOPTLEN) + panic("optlen too large"); + tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW; + return (optlen); + } + + /* 32-bit DSS + 64-bit DACK option */ + if ((tp->t_mpflags & TMPF_SEND_DSN) && + (!send_64bit_dsn) && + (send_64bit_ack) && + (tp->t_mpflags & TMPF_MPTCP_ACKNOW)) { + struct mptcp_dss32_ack64_opt dss_ack_opt; + unsigned int len = sizeof (dss_ack_opt); + + if (do_csum) + len += 2; + + CHECK_OPTLEN; + + bzero(&dss_ack_opt, sizeof (dss_ack_opt)); + dss_ack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH; + dss_ack_opt.mdss_copt.mdss_len = len; + dss_ack_opt.mdss_copt.mdss_subtype = MPO_DSS; + dss_ack_opt.mdss_copt.mdss_flags |= MDSS_M | MDSS_A | MDSS_a; + MPT_LOCK_SPIN(mp_tp); + dss_ack_opt.mdss_ack = + mptcp_hton64(mp_tp->mpt_rcvnxt); + MPT_UNLOCK(mp_tp); + + CHECK_DATALEN; + + mptcp_output_getm_dsnmap32(so, off, (u_int32_t)datalen, + &dss_ack_opt.mdss_dsn, &dss_ack_opt.mdss_subflow_seqn, + &dss_ack_opt.mdss_data_len, dss_valp); + + if ((dss_ack_opt.mdss_data_len == 0) || + (dss_ack_opt.mdss_dsn == 0)) { + goto do_ack64_only; + } + + if (tp->t_mpflags & TMPF_SEND_DFIN) { + DO_FIN(dss_ack_opt); + } + + dss_ack_opt.mdss_dsn = htonl(dss_ack_opt.mdss_dsn); + dss_ack_opt.mdss_subflow_seqn = + htonl(dss_ack_opt.mdss_subflow_seqn); + dss_ack_opt.mdss_data_len = htons(dss_ack_opt.mdss_data_len); + *dss_lenp = (unsigned int *)(void *)(opt + optlen + + offsetof(struct mptcp_dss32_ack64_opt, mdss_data_len)); + memcpy(opt + optlen, &dss_ack_opt, sizeof (dss_ack_opt)); + if (do_csum) { + *sseqp = (u_int32_t *)(void *)(opt + optlen + + offsetof(struct mptcp_dss32_ack64_opt, + mdss_subflow_seqn)); + } + + optlen += len; + + if (optlen > MAX_TCPOPTLEN) + panic("optlen too large"); + tp->t_mpflags &= ~TMPF_MPTCP_ACKNOW; + return (optlen); + } + + if (tp->t_mpflags & TMPF_SEND_DFIN) { + struct mptcp_dss_ack_opt dss_ack_opt; + unsigned int len = sizeof (struct mptcp_dss_ack_opt); + + if (do_csum) + len += 2; + + CHECK_OPTLEN; + + bzero(&dss_ack_opt, sizeof (dss_ack_opt)); + + MPT_LOCK(mp_tp); + /* Data FIN occupies one sequence space */ + if ((mp_tp->mpt_sndnxt + 1) != mp_tp->mpt_sndmax) { + MPT_UNLOCK(mp_tp); + if (mptcp_dbg == MP_VERBOSE_DEBUG_2) + printf("%s: Fin state %d %llu %llu\n", __func__, + mp_tp->mpt_state, mp_tp->mpt_sndnxt, + mp_tp->mpt_sndmax); + return (optlen); + } + + dss_ack_opt.mdss_copt.mdss_kind = TCPOPT_MULTIPATH; + dss_ack_opt.mdss_copt.mdss_len = len; + dss_ack_opt.mdss_copt.mdss_subtype = MPO_DSS; + dss_ack_opt.mdss_copt.mdss_flags |= MDSS_A | MDSS_M | MDSS_F; + dss_ack_opt.mdss_ack = + htonl(MPTCP_DATAACK_LOW32(mp_tp->mpt_rcvnxt)); + dss_ack_opt.mdss_dsn = + htonl(MPTCP_DATASEQ_LOW32(mp_tp->mpt_sndnxt)); + MPT_UNLOCK(mp_tp); + dss_ack_opt.mdss_subflow_seqn = 0; + dss_ack_opt.mdss_data_len = 1; + dss_ack_opt.mdss_data_len = htons(dss_ack_opt.mdss_data_len); + memcpy(opt + optlen, &dss_ack_opt, sizeof (dss_ack_opt)); + if (do_csum) { + *dss_valp = mp_tp->mpt_sndnxt; + *sseqp = (u_int32_t *)(void *)(opt + optlen + + offsetof(struct mptcp_dss_ack_opt, + mdss_subflow_seqn)); + } + optlen += len; + } + + return (optlen); +} + +/* + * MPTCP Options Input Processing + */ + + +static int +mptcp_valid_mpcapable_common_opt(u_char *cp, u_int32_t mptcp_version) +{ + struct mptcp_mpcapable_opt_common *rsp = + (struct mptcp_mpcapable_opt_common *)cp; + + /* mmco_kind, mmco_len and mmco_subtype are validated before */ + + /* In future, there can be more than one version supported */ + if (rsp->mmco_version != mptcp_version) + return (0); + + if (!(rsp->mmco_flags & MPCAP_PROPOSAL_SBIT)) + return (0); + + if (rsp->mmco_flags & (MPCAP_BBIT | MPCAP_CBIT | MPCAP_DBIT | + MPCAP_EBIT | MPCAP_FBIT | MPCAP_GBIT)) + return (0); + + return (1); +} + + +static void +mptcp_do_mpcapable_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, + int optlen) +{ + struct mptcp_mpcapable_opt_rsp1 *rsp1 = NULL; + struct mptcp_mpcapable_opt_rsp *rsp = NULL; + struct mptcb *mp_tp = tptomptp(tp); + +#define MPTCP_OPT_ERROR_PATH(tp) { \ + tp->t_mpflags |= TMPF_RESET; \ + tcpstat.tcps_invalid_mpcap++; \ + if (tp->t_inpcb->inp_socket != NULL) { \ + soevent(tp->t_inpcb->inp_socket, \ + SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST); \ + } \ +} + + if (mp_tp == NULL) { + if (mptcp_dbg == MP_ERR_DEBUG) + printf("MPTCP ERROR %s: NULL mpsocket \n", __func__); + tcpstat.tcps_invalid_mpcap++; + return; + } + + /* Validate the kind, len, flags */ + if (mptcp_valid_mpcapable_common_opt(cp, mp_tp->mpt_version) != 1) { + tcpstat.tcps_invalid_mpcap++; + return; + } + + /* A SYN contains only the MP_CAPABLE option */ + if ((th->th_flags & (TH_SYN | TH_ACK)) == TH_SYN) { + /* XXX passive side not supported yet */ + return; + } else if ((th->th_flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) { + + /* A SYN/ACK contains peer's key and flags */ + if (optlen != sizeof (struct mptcp_mpcapable_opt_rsp)) { + /* complain */ + if (mptcp_dbg == MP_ERR_DEBUG) { + printf("%s: SYN_ACK optlen = %d, sizeof mp opt \ + = %lu \n", __func__, optlen, + sizeof (struct mptcp_mpcapable_opt_rsp)); + } + tcpstat.tcps_invalid_mpcap++; + return; + } + + /* + * If checksum flag is set, enable MPTCP checksum, even if + * it was not negotiated on the first SYN. + */ + if (((struct mptcp_mpcapable_opt_common *)cp)->mmco_flags & + MPCAP_CHECKSUM_CBIT) + mp_tp->mpt_flags |= MPTCPF_CHECKSUM; + + rsp = (struct mptcp_mpcapable_opt_rsp *)cp; + MPT_LOCK_SPIN(mp_tp); + mp_tp->mpt_remotekey = rsp->mmc_localkey; + MPT_UNLOCK(mp_tp); + tp->t_mpflags |= TMPF_PREESTABLISHED; + + if (mptcp_dbg > MP_VERBOSE_DEBUG_1) { + printf("SYN_ACK pre established, optlen = %d, tp \ + state = %d sport = %x dport = %x key = %llx \n", + optlen, tp->t_state, th->th_sport, th->th_dport, + mp_tp->mpt_remotekey); + } + + } else if ((th->th_flags & TH_ACK) && + (tp->t_mpflags & TMPF_PREESTABLISHED)) { + + /* + * Verify checksum flag is set, if we initially negotiated + * checksum. + */ + if ((mp_tp->mpt_flags & MPTCPF_CHECKSUM) && + !(((struct mptcp_mpcapable_opt_common *)cp)->mmco_flags & + MPCAP_CHECKSUM_CBIT)) { + if (mptcp_dbg == MP_ERR_DEBUG) { + printf("%s: checksum negotiation failure \n", + __func__); + } + MPTCP_OPT_ERROR_PATH(tp); + return; + } + + if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM) && + (((struct mptcp_mpcapable_opt_common *)cp)->mmco_flags & + MPCAP_CHECKSUM_CBIT)) { + if (mptcp_dbg == MP_ERR_DEBUG) { + printf("%s: checksum negotiation failure 2.\n", + __func__); + } + MPTCP_OPT_ERROR_PATH(tp); + return; + } + + /* + * The ACK of a three way handshake contains peer's key and + * flags. + */ + if (optlen != sizeof (struct mptcp_mpcapable_opt_rsp1)) { + /* complain */ + if (mptcp_dbg == MP_ERR_DEBUG) { + printf("%s: ACK optlen = %d , sizeof mp option \ + = %lu, state = %d \n", + __func__, + optlen, + sizeof (struct mptcp_mpcapable_opt_rsp1), + tp->t_state); + } + MPTCP_OPT_ERROR_PATH(tp); + return; + } + + rsp1 = (struct mptcp_mpcapable_opt_rsp1 *)cp; + /* Skipping MPT_LOCK for invariant key */ + if (rsp1->mmc_remotekey != *mp_tp->mpt_localkey) { + if (mptcp_dbg == MP_ERR_DEBUG) { + printf("MPTCP ERROR %s: key mismatch locally " + "stored key. rsp = %llx local = %llx \n", + __func__, rsp1->mmc_remotekey, + *mp_tp->mpt_localkey); + } + tp->t_mpflags &= ~TMPF_PREESTABLISHED; + MPTCP_OPT_ERROR_PATH(tp); + return; + } else { + /* We received both keys. Almost an MPTCP connection */ + /* Skipping MPT_LOCK for invariant key */ + if (mp_tp->mpt_remotekey != rsp1->mmc_localkey) { + if (mptcp_dbg == MP_ERR_DEBUG) { + printf("MPTCP ERROR %s: keys don't" + " match\n", __func__); + } + tp->t_mpflags &= ~TMPF_PREESTABLISHED; + MPTCP_OPT_ERROR_PATH(tp); + return; + } + tp->t_mpflags &= ~TMPF_PREESTABLISHED; + tp->t_mpflags |= TMPF_MPTCP_RCVD_KEY; + tp->t_mpflags |= TMPF_MPTCP_TRUE; + tp->t_inpcb->inp_socket->so_flags |= SOF_MPTCP_TRUE; + MPT_LOCK(mp_tp); + DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, + uint32_t, 0 /* event */); + mp_tp->mpt_state = MPTCPS_ESTABLISHED; + MPT_UNLOCK(mp_tp); + if (mptcp_dbg >= MP_VERBOSE_DEBUG_2) { + printf("MPTCP SUCCESS %s: rem key = %llx local \ + key = %llx \n", + __func__, mp_tp->mpt_remotekey, + *mp_tp->mpt_localkey); + } + } + if (tp->t_mpuna) { + tp->t_mpuna = 0; + } + } +} + + +static void +mptcp_do_mpjoin_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen) +{ +#define MPTCP_JOPT_ERROR_PATH(tp) { \ + tp->t_mpflags |= TMPF_RESET; \ + tcpstat.tcps_invalid_joins++; \ + if (tp->t_inpcb->inp_socket != NULL) { \ + soevent(tp->t_inpcb->inp_socket, \ + SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST); \ + } \ +} + int error = 0; + struct mptcb *mp_tp = tptomptp(tp); + + if ((th->th_flags & (TH_SYN | TH_ACK)) == TH_SYN) { + /* We won't accept join requests as an active opener */ + if (tp->t_inpcb->inp_socket->so_flags & SOF_MPTCP_CLIENT) { + MPTCP_JOPT_ERROR_PATH(tp); + return; + } + + if (optlen != sizeof (struct mptcp_mpjoin_opt_req)) { + if (mptcp_dbg == MP_ERR_DEBUG) { + printf("SYN: unexpected optlen = %d, mp option" + "= %lu\n", + optlen, + sizeof (struct mptcp_mpjoin_opt_req)); + } + /* send RST and close */ + MPTCP_JOPT_ERROR_PATH(tp); + return; + } + /* not supported yet */ + return; +#ifdef MPTCP_NOTYET + struct mptcp_mpjoin_opt_req *join_req = + (struct mptcp_mpjoin_opt_req *)cp; + mp_so = mptcp_find_mpso(join_req->mmjo_peer_token); + if (!mp_so) { + if (mptcp_dbg >= MP_ERR_DEBUG) + printf("%s: cannot find mp_so token = %x\n", + __func__, join_req->mmjo_peer_token); + /* send RST */ + MPTCP_JOPT_ERROR_PATH(tp); + return; + } + if (tp->t_mpflags & TMPF_PREESTABLISHED) { + return; + } + mp_so->ms_remote_addr_id = join_req->mmjo_addr_id; + mp_so->ms_remote_rand = join_req->mmjo_rand; + tp->t_mpflags |= TMPF_PREESTABLISHED | TMPF_JOINED_FLOW; + tp->t_mpflags |= TMPF_RECVD_JOIN; + tp->t_inpcb->inp_socket->so_flags |= SOF_MP_SEC_SUBFLOW; + if (join_req->mmjo_subtype & MPTCP_BACKUP) { + tp->t_mpflags |= TMPF_BACKUP_PATH; + } +#endif + } else if ((th->th_flags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) { + struct mptcp_mpjoin_opt_rsp *join_rsp = + (struct mptcp_mpjoin_opt_rsp *)cp; + + if (optlen != sizeof (struct mptcp_mpjoin_opt_rsp)) { + if (mptcp_dbg >= MP_ERR_DEBUG) { + printf("SYN_ACK: unexpected optlen = %d mp " + "option = %lu\n", optlen, + sizeof (struct mptcp_mpjoin_opt_rsp)); + } + tp->t_mpflags &= ~TMPF_PREESTABLISHED; + /* send RST and close */ + MPTCP_JOPT_ERROR_PATH(tp); + return; + } + + if (mp_tp == NULL) { + if (mptcp_dbg >= MP_ERR_DEBUG) + printf("%s: cannot find mp_tp in SYN_ACK\n", + __func__); + tp->t_mpflags &= ~TMPF_PREESTABLISHED; + /* send RST and close */ + MPTCP_JOPT_ERROR_PATH(tp); + return; + } + + mptcp_set_raddr_rand(tp->t_local_aid, + tptomptp(tp), + join_rsp->mmjo_addr_id, join_rsp->mmjo_rand); + error = mptcp_validate_join_hmac(tp, + (u_char*)&join_rsp->mmjo_mac, SHA1_TRUNCATED); + if (error) { + if (mptcp_dbg >= MP_ERR_DEBUG) { + printf("%s: SYN_ACK error = %d \n", __func__, + error); + } + tp->t_mpflags &= ~TMPF_PREESTABLISHED; + /* send RST and close */ + MPTCP_JOPT_ERROR_PATH(tp); + return; + } + tp->t_mpflags |= TMPF_SENT_JOIN; + } else if ((th->th_flags & TH_ACK) && + (tp->t_mpflags & TMPF_PREESTABLISHED)) { + struct mptcp_mpjoin_opt_rsp2 *join_rsp2 = + (struct mptcp_mpjoin_opt_rsp2 *)cp; + + if (optlen != sizeof (struct mptcp_mpjoin_opt_rsp2)) { + if (mptcp_dbg >= MP_ERR_DEBUG) { + printf("ACK: unexpected optlen = %d mp option " + "= %lu \n", optlen, + sizeof (struct mptcp_mpjoin_opt_rsp2)); + } + tp->t_mpflags &= ~TMPF_PREESTABLISHED; + /* send RST and close */ + MPTCP_JOPT_ERROR_PATH(tp); + return; + } + + if (mp_tp == NULL) { + tp->t_mpflags &= ~TMPF_PREESTABLISHED; + MPTCP_JOPT_ERROR_PATH(tp); + return; + } + + error = mptcp_validate_join_hmac(tp, join_rsp2->mmjo_mac, + SHA1_RESULTLEN); + if (error) { + if (mptcp_dbg >= MP_ERR_DEBUG) { + printf("%s: ACK error = %d\n", __func__, + error); + } + tp->t_mpflags &= ~TMPF_PREESTABLISHED; + MPTCP_JOPT_ERROR_PATH(tp); + return; + } + tp->t_mpflags |= TMPF_MPTCP_TRUE; + tp->t_mpflags &= ~TMPF_PREESTABLISHED; + tp->t_flags |= TF_ACKNOW; + tp->t_mpflags |= TMPF_MPTCP_ACKNOW; + tp->t_inpcb->inp_socket->so_flags |= SOF_MPTCP_TRUE; + if (mptcp_dbg >= MP_ERR_DEBUG) { + printf("MPTCP SUCCESS %s: join \n", __func__); + } + } +} + +static int +mptcp_validate_join_hmac(struct tcpcb *tp, u_char* hmac, int mac_len) +{ + u_char digest[SHA1_RESULTLEN] = {0}; + struct mptcb *mp_tp = NULL; + mptcp_key_t rem_key, loc_key; + u_int32_t rem_rand, loc_rand; + + mp_tp = tp->t_mptcb; + if (mp_tp == NULL) + return (-1); + + rem_rand = loc_rand = 0; + + MPT_LOCK(mp_tp); + rem_key = mp_tp->mpt_remotekey; + loc_key = *mp_tp->mpt_localkey; + MPT_UNLOCK(mp_tp); + + mptcp_get_rands(tp->t_local_aid, mp_tp, &loc_rand, &rem_rand); + if ((rem_rand == 0) || (loc_rand == 0)) + return (-1); + + mptcp_hmac_sha1(rem_key, loc_key, rem_rand, loc_rand, + digest, sizeof (digest)); + + if (bcmp(digest, hmac, mac_len) == 0) + return (0); /* matches */ + else { + printf("%s: remote key %llx local key %llx remote rand %x " + "local rand %x \n", __func__, rem_key, loc_key, + rem_rand, loc_rand); + return (-1); + } +} + +static void +mptcp_do_dss_opt_ack_meat(u_int64_t full_dack, struct tcpcb *tp) +{ + struct mptcb *mp_tp = tptomptp(tp); + int close_notify = 0; + + if (mp_tp == NULL) + return; + + MPT_LOCK(mp_tp); + if (MPTCP_SEQ_LEQ(full_dack, mp_tp->mpt_sndmax) && + MPTCP_SEQ_GEQ(full_dack, mp_tp->mpt_snduna)) { + mptcp_data_ack_rcvd(mp_tp, tp, full_dack); + if ((mp_tp->mpt_state == MPTCPS_CLOSED) || + (mp_tp->mpt_state > MPTCPS_FIN_WAIT_2)) + close_notify = 1; + MPT_UNLOCK(mp_tp); + mptcp_notify_mpready(tp->t_inpcb->inp_socket); + if (close_notify) + mptcp_notify_close(tp->t_inpcb->inp_socket); + if (mp_tp->mpt_flags & MPTCPF_RCVD_64BITACK) { + mp_tp->mpt_flags &= ~MPTCPF_RCVD_64BITACK; + mp_tp->mpt_flags &= ~MPTCPF_SND_64BITDSN; + } + } else { + MPT_UNLOCK(mp_tp); + if (mptcp_dbg == MP_VERBOSE_DEBUG_2) { + printf("%s: unexpected dack %llx snduna %llx " + "sndmax %llx\n", __func__, full_dack, + mp_tp->mpt_snduna, mp_tp->mpt_sndmax); + } + } + + if (mptcp_dbg == MP_VERBOSE_DEBUG_2) { + printf("%s: full_dack = %llu \n", __func__, full_dack); + } +} + +static void +mptcp_do_dss_opt_meat(u_char *cp, struct tcpcb *tp) +{ + struct mptcp_dss_copt *dss_rsp = (struct mptcp_dss_copt *)cp; + u_int64_t full_dack = 0; + struct mptcb *mp_tp = tptomptp(tp); + int csum_len = 0; + +#define MPTCP_DSS_OPT_SZ_CHK(len, expected_len) { \ + if (len != expected_len) { \ + if (mptcp_dbg >= MP_ERR_DEBUG) { \ + printf("MPTCP ERROR %s: bad len = %d" \ + "dss: %x \n", __func__, \ + len, \ + dss_rsp->mdss_flags); \ + } \ + return; \ + } \ +} + if (mp_tp == NULL) + return; + + if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) + csum_len = 2; + + dss_rsp->mdss_flags &= (MDSS_A|MDSS_a|MDSS_M|MDSS_m); + switch (dss_rsp->mdss_flags) { + case (MDSS_M): + { + /* 32-bit DSS, No Data ACK */ + struct mptcp_dsn_opt *dss_rsp1; + dss_rsp1 = (struct mptcp_dsn_opt *)cp; + + MPTCP_DSS_OPT_SZ_CHK(dss_rsp1->mdss_copt.mdss_len, + sizeof (struct mptcp_dsn_opt) + csum_len); + if (csum_len == 0) + mptcp_update_dss_rcv_state(dss_rsp1, tp, 0); + else + mptcp_update_dss_rcv_state(dss_rsp1, tp, + *(uint16_t *)(void *)(cp + + (dss_rsp1->mdss_copt.mdss_len - csum_len))); + break; + } + case (MDSS_A): + { + /* 32-bit Data ACK, no DSS */ + struct mptcp_data_ack_opt *dack_opt; + dack_opt = (struct mptcp_data_ack_opt *)cp; + + MPTCP_DSS_OPT_SZ_CHK(dack_opt->mdss_copt.mdss_len, + sizeof (struct mptcp_data_ack_opt)); + + u_int32_t dack = dack_opt->mdss_ack; + NTOHL(dack); + MPT_LOCK_SPIN(mp_tp); + MPTCP_EXTEND_DSN(mp_tp->mpt_snduna, dack, full_dack); + MPT_UNLOCK(mp_tp); + mptcp_do_dss_opt_ack_meat(full_dack, tp); + break; + } + case (MDSS_M | MDSS_A): + { + /* 32-bit Data ACK + 32-bit DSS */ + struct mptcp_dss_ack_opt *dss_ack_rsp; + dss_ack_rsp = (struct mptcp_dss_ack_opt *)cp; + + MPTCP_DSS_OPT_SZ_CHK(dss_ack_rsp->mdss_copt.mdss_len, + sizeof (struct mptcp_dss_ack_opt) + csum_len); + + u_int32_t dack = dss_ack_rsp->mdss_ack; + NTOHL(dack); + MPT_LOCK_SPIN(mp_tp); + MPTCP_EXTEND_DSN(mp_tp->mpt_snduna, dack, full_dack); + MPT_UNLOCK(mp_tp); + mptcp_do_dss_opt_ack_meat(full_dack, tp); + if (csum_len == 0) + mptcp_update_rcv_state_f(dss_ack_rsp, tp, 0); + else + mptcp_update_rcv_state_f(dss_ack_rsp, tp, + *(uint16_t *)(void *)(cp + + (dss_ack_rsp->mdss_copt.mdss_len - + csum_len))); + break; + } + case (MDSS_M | MDSS_m): + { + /* 64-bit DSS , No Data ACK */ + struct mptcp_dsn64_opt *dsn64; + dsn64 = (struct mptcp_dsn64_opt *)cp; + u_int64_t full_dsn; + + MPTCP_DSS_OPT_SZ_CHK(dsn64->mdss_copt.mdss_len, + sizeof (struct mptcp_dsn64_opt) + csum_len); + + if (mptcp_dbg == MP_VERBOSE_DEBUG_4) { + printf("%s: 64-bit M present.\n", __func__); + } + + MPT_LOCK_SPIN(mp_tp); + mp_tp->mpt_flags |= MPTCPF_SND_64BITACK; + MPT_UNLOCK(mp_tp); + + full_dsn = mptcp_ntoh64(dsn64->mdss_dsn); + NTOHL(dsn64->mdss_subflow_seqn); + NTOHS(dsn64->mdss_data_len); + if (csum_len == 0) + mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn, + dsn64->mdss_subflow_seqn, + dsn64->mdss_data_len, + 0); + else + mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn, + dsn64->mdss_subflow_seqn, + dsn64->mdss_data_len, + *(uint16_t *)(void *)(cp + + dsn64->mdss_copt.mdss_len - csum_len)); + break; + } + case (MDSS_A | MDSS_a): + { + /* 64-bit Data ACK, no DSS */ + struct mptcp_data_ack64_opt *dack64; + dack64 = (struct mptcp_data_ack64_opt *)cp; + + MPTCP_DSS_OPT_SZ_CHK(dack64->mdss_copt.mdss_len, + sizeof (struct mptcp_data_ack64_opt)); + + + if (mptcp_dbg == MP_VERBOSE_DEBUG_4) { + printf("%s: 64-bit A present. \n", __func__); + } + + MPT_LOCK_SPIN(mp_tp); + mp_tp->mpt_flags |= MPTCPF_RCVD_64BITACK; + MPT_UNLOCK(mp_tp); + + full_dack = mptcp_ntoh64(dack64->mdss_ack); + mptcp_do_dss_opt_ack_meat(full_dack, tp); + break; + } + case (MDSS_M | MDSS_m | MDSS_A): + { + /* 64-bit DSS + 32-bit Data ACK */ + struct mptcp_dss64_ack32_opt *dss_ack_rsp; + dss_ack_rsp = (struct mptcp_dss64_ack32_opt *)cp; + + MPTCP_DSS_OPT_SZ_CHK(dss_ack_rsp->mdss_copt.mdss_len, + sizeof (struct mptcp_dss64_ack32_opt) + csum_len); + + if (mptcp_dbg == MP_VERBOSE_DEBUG_4) { + printf("%s: 64-bit M and 32-bit A present.\n", + __func__); + } + + u_int32_t dack = dss_ack_rsp->mdss_ack; + NTOHL(dack); + MPT_LOCK_SPIN(mp_tp); + mp_tp->mpt_flags |= MPTCPF_SND_64BITACK; + MPTCP_EXTEND_DSN(mp_tp->mpt_snduna, dack, full_dack); + MPT_UNLOCK(mp_tp); + mptcp_do_dss_opt_ack_meat(full_dack, tp); + if (csum_len == 0) + mptcp_update_rcv_state_g(dss_ack_rsp, tp, 0); + else + mptcp_update_rcv_state_g(dss_ack_rsp, tp, + *(uint16_t *)(void *)(cp + + dss_ack_rsp->mdss_copt.mdss_len - + csum_len)); + break; + } + case (MDSS_M | MDSS_A | MDSS_a): + { + /* 32-bit DSS + 64-bit Data ACK */ + struct mptcp_dss32_ack64_opt *dss32_ack64_opt; + dss32_ack64_opt = (struct mptcp_dss32_ack64_opt *)cp; + u_int64_t full_dsn; + + MPTCP_DSS_OPT_SZ_CHK( + dss32_ack64_opt->mdss_copt.mdss_len, + sizeof (struct mptcp_dss32_ack64_opt) + csum_len); + + if (mptcp_dbg == MP_VERBOSE_DEBUG_4) { + printf("%s: 32-bit M and 64-bit A present.\n", + __func__); + } + full_dack = mptcp_ntoh64(dss32_ack64_opt->mdss_ack); + mptcp_do_dss_opt_ack_meat(full_dack, tp); + NTOHL(dss32_ack64_opt->mdss_dsn); + MPT_LOCK_SPIN(mp_tp); + mp_tp->mpt_flags |= MPTCPF_RCVD_64BITACK; + MPTCP_EXTEND_DSN(mp_tp->mpt_rcvnxt, + dss32_ack64_opt->mdss_dsn, full_dsn); + MPT_UNLOCK(mp_tp); + NTOHL(dss32_ack64_opt->mdss_subflow_seqn); + NTOHS(dss32_ack64_opt->mdss_data_len); + if (csum_len == 0) + mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn, + dss32_ack64_opt->mdss_subflow_seqn, + dss32_ack64_opt->mdss_data_len, 0); + else + mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn, + dss32_ack64_opt->mdss_subflow_seqn, + dss32_ack64_opt->mdss_data_len, + *(uint16_t *)(void *)(cp + + dss32_ack64_opt->mdss_copt.mdss_len - + csum_len)); + break; + } + case (MDSS_M | MDSS_m | MDSS_A | MDSS_a): + { + /* 64-bit DSS + 64-bit Data ACK */ + struct mptcp_dss64_ack64_opt *dss64_ack64; + dss64_ack64 = (struct mptcp_dss64_ack64_opt *)cp; + u_int64_t full_dsn; + + MPTCP_DSS_OPT_SZ_CHK(dss64_ack64->mdss_copt.mdss_len, + sizeof (struct mptcp_dss64_ack64_opt) + csum_len); + + if (mptcp_dbg == MP_VERBOSE_DEBUG_4) { + printf("%s: 64-bit M and 64-bit A present.\n", + __func__); + } + MPT_LOCK_SPIN(mp_tp); + mp_tp->mpt_flags |= MPTCPF_RCVD_64BITACK; + mp_tp->mpt_flags |= MPTCPF_SND_64BITACK; + MPT_UNLOCK(mp_tp); + full_dsn = mptcp_ntoh64(dss64_ack64->mdss_dsn); + full_dack = mptcp_ntoh64(dss64_ack64->mdss_dsn); + mptcp_do_dss_opt_ack_meat(full_dack, tp); + NTOHL(dss64_ack64->mdss_subflow_seqn); + NTOHS(dss64_ack64->mdss_data_len); + if (csum_len == 0) + mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn, + dss64_ack64->mdss_subflow_seqn, + dss64_ack64->mdss_data_len, 0); + else + mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn, + dss64_ack64->mdss_subflow_seqn, + dss64_ack64->mdss_data_len, + *(uint16_t *)(void *)(cp + + dss64_ack64->mdss_copt.mdss_len - + csum_len)); + break; + } + default: + if (mptcp_dbg >= MP_ERR_DEBUG) { + printf("MPTCP ERROR %s: File bug, DSS flags = %x\n", + __func__, dss_rsp->mdss_flags); + } + break; + } +} + + +static void +mptcp_do_fin_opt(struct tcpcb *tp) +{ + struct mptcb *mp_tp = (struct mptcb *)tp->t_mptcb; + + if (!(tp->t_mpflags & TMPF_RECV_DFIN)) { + if (mp_tp != NULL) { + MPT_LOCK(mp_tp); + mp_tp->mpt_rcvnxt += 1; + mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_FIN); + MPT_UNLOCK(mp_tp); + } + tp->t_mpflags |= TMPF_RECV_DFIN; + } + + tp->t_mpflags |= TMPF_MPTCP_ACKNOW; + /* + * Since this is a data level FIN, TCP needs to be explicitly told + * to send back an ACK on which the Data ACK is piggybacked. + */ + tp->t_flags |= TF_ACKNOW; +} + +static void +mptcp_do_dss_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, int optlen) +{ +#pragma unused(th, optlen) + struct mptcb *mp_tp = (struct mptcb *)tp->t_mptcb; + + if (!mp_tp) + return; + + if (tp->t_mpflags & TMPF_MPTCP_TRUE) { + struct mptcp_dss_copt *dss_rsp = (struct mptcp_dss_copt *)cp; + + if (dss_rsp->mdss_subtype == MPO_DSS) { + if (mptcp_dbg > MP_VERBOSE_DEBUG_4) { + printf("%s: DSS option received: %d ", + __func__, dss_rsp->mdss_flags); + } + if (dss_rsp->mdss_flags & MDSS_F) { + if (mptcp_dbg >= MP_VERBOSE_DEBUG_1) + printf("%s: received FIN\n", __func__); + mptcp_do_fin_opt(tp); + } + + mptcp_do_dss_opt_meat(cp, tp); + } + } +} + +static void +mptcp_do_fastclose_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th) +{ + struct mptcb *mp_tp = NULL; + struct mptcp_fastclose_opt *fc_opt = (struct mptcp_fastclose_opt *)cp; + + if (th->th_flags != TH_ACK) + return; + + if (mptcp_dbg > MP_VERBOSE_DEBUG_2) + printf("%s: received \n", __func__); + + if (fc_opt->mfast_len != sizeof (struct mptcp_fastclose_opt)) { + tcpstat.tcps_invalid_opt++; + return; + } + + mp_tp = (struct mptcb *)tp->t_mptcb; + if (!mp_tp) + return; + + if (fc_opt->mfast_key != mptcp_get_localkey(mp_tp)) { + tcpstat.tcps_invalid_opt++; + return; + } + + /* + * fastclose could make us more vulnerable to attacks, hence + * accept only those that are at the next expected sequence number. + */ + if (th->th_seq != tp->rcv_nxt) { + tcpstat.tcps_invalid_opt++; + return; + } + + MPT_LOCK(mp_tp); + if (mp_tp->mpt_state != MPTCPS_FASTCLOSE_WAIT) { + mp_tp->mpt_state = MPTCPS_FASTCLOSE_WAIT; + DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, + uint32_t, 0 /* event */); + mptcp_start_timer(mp_tp, MPTT_FASTCLOSE); + } + MPT_UNLOCK(mp_tp); + + /* Reset this flow */ + tp->t_mpflags |= TMPF_RESET; + + if (tp->t_inpcb->inp_socket != NULL) { + soevent(tp->t_inpcb->inp_socket, + SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST); + } +} + + +static void +mptcp_do_mpfail_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th) +{ + struct mptcb *mp_tp = NULL; + struct mptcp_mpfail_opt *fail_opt = (struct mptcp_mpfail_opt *)cp; + + if ((th->th_flags != TH_ACK) || (th->th_flags != TH_RST)) + return; + + if (fail_opt->mfail_len != sizeof (struct mptcp_mpfail_opt)) + return; + + mp_tp = (struct mptcb *)tp->t_mptcb; + if (mp_tp == NULL) + return; + + MPT_LOCK(mp_tp); + mp_tp->mpt_flags |= MPTCPF_RECVD_MPFAIL; + mp_tp->mpt_dsn_at_csum_fail = mptcp_hton64(fail_opt->mfail_dsn); + MPT_UNLOCK(mp_tp); + + mptcp_notify_mpfail(tp->t_inpcb->inp_socket); +} + +int +tcp_do_mptcp_options(struct tcpcb *tp, u_char *cp, struct tcphdr *th, + struct tcpopt *to, int optlen) +{ + int mptcp_subtype; + + /* All MPTCP options have atleast 4 bytes */ + if (optlen < 4) + return (0); + + mptcp_subtype = (cp[2] >> 4); + + switch (mptcp_subtype) { + case MPO_CAPABLE: + mptcp_do_mpcapable_opt(tp, cp, th, optlen); + break; + case MPO_JOIN: + mptcp_do_mpjoin_opt(tp, cp, th, optlen); + break; + case MPO_DSS: + mptcp_do_dss_opt(tp, cp, th, optlen); + break; + case MPO_FASTCLOSE: + mptcp_do_fastclose_opt(tp, cp, th); + break; + case MPO_FAIL: + mptcp_do_mpfail_opt(tp, cp, th); + break; + case MPO_ADD_ADDR: /* fall through */ + case MPO_REMOVE_ADDR: /* fall through */ + case MPO_PRIO: + to->to_flags |= TOF_MPTCP; + break; + default: + printf("%s: type = %d\n", __func__, mptcp_subtype); + break; + } + return (0); +} + +/* + * MPTCP ADD_ADDR and REMOVE_ADDR options + */ + +/* + * ADD_ADDR is only placeholder code - not sent on wire + * The ADD_ADDR option is not sent on wire because of security issues + * around connection hijacking. + */ +void +mptcp_send_addaddr_opt(struct tcpcb *tp, struct mptcp_addaddr_opt *opt) +{ + + opt->ma_kind = TCPOPT_MULTIPATH; + opt->ma_len = sizeof (struct mptcp_addaddr_opt); + opt->ma_subtype = MPO_ADD_ADDR; + opt->ma_addr_id = tp->t_local_aid; +#ifdef MPTCP_NOTYET + struct inpcb *inp = tp->t_inpcb; + if (inp->inp_vflag == AF_INET) { + opt->ma_ipver = MA_IPVer_V4; + bcopy((char *)&sin->sin_addr.s_addr, (char *)opt + opt->ma_len, + sizeof (in_addr_t)); + opt->ma_len += sizeof (in_addr_t); + } else if (inp->inp_vflag == AF_INET6) { + opt->ma_ipver = MA_IPVer_V6; + bcopy((char *)&sin6->sin6_addr, (char *)opt + opt->ma_len, + sizeof (struct in6_addr)); + opt->ma_len += sizeof (struct in6_addr); + } +#if 0 + if (tp->t_mp_port) { + /* add ports XXX */ + } +#endif +#endif +} + +/* REMOVE_ADDR option is sent when a source address goes away */ +void +mptcp_send_remaddr_opt(struct tcpcb *tp, struct mptcp_remaddr_opt *opt) +{ + if (mptcp_dbg >= MP_ERR_DEBUG) + printf("%s: local id %d remove id %d \n", __func__, + tp->t_local_aid, tp->t_rem_aid); + + bzero(opt, sizeof (opt)); + opt->mr_kind = TCPOPT_MULTIPATH; + opt->mr_len = sizeof (opt); + opt->mr_subtype = MPO_REMOVE_ADDR; + opt->mr_addr_id = tp->t_rem_aid; + tp->t_mpflags &= ~TMPF_SND_REM_ADDR; +} + +/* + * MPTCP MP_PRIO option + */ + +#if 0 +/* + * Current implementation drops incoming MP_PRIO option and this code is + * just a placeholder. The option is dropped because only the mobile client can + * decide which of the subflows is preferred (usually wifi is preferred + * over Cellular). + */ +void +mptcp_do_mpprio_opt(struct tcpcb *tp, u_char *cp, struct tcphdr *th, + int optlen) +{ + int bkp = 0; + struct mptcp_mpprio_opt *mpprio = (struct mptcp_mpprio_opt *)cp; + + if ((tp == NULL) || !(tp->t_mpflags & TMPF_MPTCP_TRUE)) + return; + + if ((mpprio->mpprio_len != sizeof (struct mptcp_mpprio_addr_opt)) && + (mpprio->mpprio_len != sizeof (struct mptcp_mpprio_opt))) + return; +} +#endif + +/* We send MP_PRIO option based on the values set by the SIOCSCONNORDER ioctl */ +static int +mptcp_snd_mpprio(struct tcpcb *tp, u_char *cp, int optlen) +{ + struct mptcp_mpprio_addr_opt mpprio; + + if (tp->t_state != TCPS_ESTABLISHED) { + tp->t_mpflags &= ~TMPF_SND_MPPRIO; + return (optlen); + } + + if (mptcp_mpprio_enable != 1) { + tp->t_mpflags &= ~TMPF_SND_MPPRIO; + return (optlen); + } + + if ((MAX_TCPOPTLEN - optlen) < + (int)sizeof (mpprio)) + return (optlen); + + bzero(&mpprio, sizeof (mpprio)); + mpprio.mpprio_kind = TCPOPT_MULTIPATH; + mpprio.mpprio_len = sizeof (mpprio); + mpprio.mpprio_subtype = MPO_PRIO; + if (tp->t_mpflags & TMPF_BACKUP_PATH) + mpprio.mpprio_flags |= MPTCP_MPPRIO_BKP; + mpprio.mpprio_addrid = tp->t_local_aid; + memcpy(cp + optlen, &mpprio, sizeof (mpprio)); + optlen += sizeof (mpprio); + tp->t_mpflags &= ~TMPF_SND_MPPRIO; + if (mptcp_dbg >= MP_ERR_DEBUG) + printf("%s: aid = %d \n", __func__, tp->t_local_aid); + return (optlen); +} diff --git a/bsd/netinet/mptcp_opt.h b/bsd/netinet/mptcp_opt.h new file mode 100644 index 000000000..cbf8fc3f0 --- /dev/null +++ b/bsd/netinet/mptcp_opt.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _NETINET_MPTCP_OPT_H_ +#define _NETINET_MPTCP_OPT_H_ + +#ifdef BSD_KERNEL_PRIVATE + +/* + * Try setting up an MPTCP connection by making atleast 3 attempts, + * that is 2 retransmissions - needed for Weak WiFi and long delay cellular. + * This number must be bumped higher when we are assured that middleboxes + * are not the reason for retries. Generally, on weak wifi and cold start + * cellular, more than 2 retries are necessary. + */ +#define MPTCP_CAPABLE_RETRIES (2) + +__BEGIN_DECLS +extern int tcp_do_mptcp_options(struct tcpcb *, u_char *, struct tcphdr *, + struct tcpopt *, int); +extern unsigned mptcp_setup_syn_opts(struct socket *, int, u_char*, unsigned); +extern unsigned mptcp_setup_join_ack_opts(struct tcpcb *, u_char*, unsigned); +extern void mptcp_update_dss_send_state(struct mptcb *, u_int64_t); +extern void mptcp_send_addaddr_opt(struct tcpcb *, struct mptcp_addaddr_opt *); +extern void mptcp_send_remaddr_opt(struct tcpcb *, struct mptcp_remaddr_opt *); +extern unsigned int mptcp_setup_opts(struct tcpcb *, int, u_char *, + unsigned int, int, int, unsigned int **, u_int8_t **, u_int64_t *, + u_int32_t **); +extern void mptcp_update_dss_rcv_state(struct mptcp_dsn_opt *, struct tcpcb *, + uint16_t); +extern void mptcp_update_rcv_state_f(struct mptcp_dss_ack_opt *, + struct tcpcb *, uint16_t); +extern void mptcp_update_rcv_state_g(struct mptcp_dss64_ack32_opt *, + struct tcpcb *, uint16_t); +extern void mptcp_update_rcv_state_meat(struct mptcb *, struct tcpcb *, + u_int64_t, u_int32_t, u_int16_t, uint16_t); +extern void mptcp_data_ack_rcvd(struct mptcb *, struct tcpcb *, u_int64_t); +__END_DECLS + +#endif /* BSD_KERNEL_PRIVATE */ +#endif /* !_NETINET_MPTCP_OPT_H_ */ diff --git a/bsd/netinet/mptcp_seq.h b/bsd/netinet/mptcp_seq.h new file mode 100644 index 000000000..a444baa08 --- /dev/null +++ b/bsd/netinet/mptcp_seq.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _NETINET_MPTCP_SEQ_H_ +#define _NETINET_MPTCP_SEQ_H_ + +/* + * Use 64-bit modulo arithmetic for comparing + * Data Sequence Numbers and Data ACKs. Implies + * 2**63 space is available for sending data. + */ +#define MPTCP_SEQ_LT(a, b) ((int64_t)((a) - (b)) < 0) +#define MPTCP_SEQ_LEQ(a, b) ((int64_t)((a) - (b)) <= 0) +#define MPTCP_SEQ_GT(a, b) ((int64_t)((a) - (b)) > 0) +#define MPTCP_SEQ_GEQ(a, b) ((int64_t)((a) - (b)) >= 0) + +#endif /* _NETINET_MPTCP_SEQ_H_ */ diff --git a/bsd/netinet/mptcp_subr.c b/bsd/netinet/mptcp_subr.c new file mode 100644 index 000000000..83e1955f1 --- /dev/null +++ b/bsd/netinet/mptcp_subr.c @@ -0,0 +1,4648 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if INET6 +#include +#include +#endif /* INET6 */ +#include + +/* + * Notes on MPTCP implementation. + * + * MPTCP is implemented as protocol in PF_MULTIPATH + * communication domain. The structure mtcbinfo describes the MPTCP instance + * of a Multipath protocol in that domain. It is used to keep track of all + * MPTCP PCB instances in the system, and is protected by the global lock + * mppi_lock. + * + * An MPTCP socket is opened by calling socket(PF_MULTIPATH, SOCK_STREAM, + * IPPROTO_TCP). Upon success, a Multipath PCB gets allocated and along with + * it comes an MPTCP Session and an MPTCP PCB. All three structures are + * allocated from the same memory block, and each structure has a pointer + * to the adjacent ones. The layout is defined by the mpp_mtp structure. + * The socket lock (mpp_lock) is used to protect accesses to the Multipath + * PCB (mppcb) as well as the MPTCP Session (mptses). + * + * The MPTCP Session is an MPTCP-specific extension to the Multipath PCB; + * in particular, the list of subflows as well as the MPTCP thread. + * + * A functioning MPTCP Session consists of one or more subflow sockets. Each + * subflow socket is essentially a regular PF_INET/PF_INET6 TCP socket, and is + * represented by the mptsub structure. Because each subflow requires access + * to the MPTCP Session, the MPTCP socket's so_usecount is bumped up for each + * subflow. This gets decremented prior to the subflow's destruction. The + * subflow lock (mpts_lock) is used to protect accesses to the subflow. + * + * To handle events (read, write, control) from the subflows, an MPTCP thread + * is created; currently, there is one thread per MPTCP Session. In order to + * prevent the MPTCP socket from being destroyed while being accessed by the + * MPTCP thread, we bump up the MPTCP socket's so_usecount for the thread, + * which will be decremented prior to the thread's termination. The thread + * lock (mpte_thread_lock) is used to synchronize its signalling. + * + * Lock ordering is defined as follows: + * + * mtcbinfo (mppi_lock) + * mp_so (mpp_lock) + * mpts (mpts_lock) + * so (inpcb_mtx) + * mptcb (mpt_lock) + * + * It is not a requirement that all of the above locks need to be acquired + * in succession, but the correct lock ordering must be followed when there + * are more than one locks that need to be held. The MPTCP thread lock is + * is not constrained by this arrangement, because none of the other locks + * is ever acquired while holding mpte_thread_lock; therefore it may be called + * at any moment to signal the thread. + * + * An MPTCP socket will be destroyed when its so_usecount drops to zero; this + * work is done by the MPTCP garbage collector which is invoked on demand by + * the PF_MULTIPATH garbage collector. This process will take place once all + * of the subflows have been destroyed, and the MPTCP thread be instructed to + * self-terminate. + */ + +static void mptcp_sesdestroy(struct mptses *); +static void mptcp_thread_signal_locked(struct mptses *); +static void mptcp_thread_terminate_signal(struct mptses *); +static void mptcp_thread_dowork(struct mptses *); +static void mptcp_thread_func(void *, wait_result_t); +static void mptcp_thread_destroy(struct mptses *); +static void mptcp_key_pool_init(void); +static void mptcp_attach_to_subf(struct socket *, struct mptcb *, connid_t); +static void mptcp_detach_mptcb_from_subf(struct mptcb *, struct socket *); +static void mptcp_conn_properties(struct mptcb *); +static void mptcp_init_statevars(struct mptcb *); + +static uint32_t mptcp_gc(struct mppcbinfo *); +static int mptcp_subflow_socreate(struct mptses *, struct mptsub *, + int, struct proc *, struct socket **); +static int mptcp_subflow_soclose(struct mptsub *, struct socket *); +static int mptcp_subflow_soconnectx(struct mptses *, struct mptsub *); +static int mptcp_subflow_soreceive(struct socket *, struct sockaddr **, + struct uio *, struct mbuf **, struct mbuf **, int *); +static void mptcp_subflow_rupcall(struct socket *, void *, int); +static void mptcp_subflow_input(struct mptses *, struct mptsub *); +static void mptcp_subflow_wupcall(struct socket *, void *, int); +static void mptcp_subflow_eupcall(struct socket *, void *, uint32_t); +static void mptcp_update_last_owner(struct mptsub *, struct socket *); + +/* + * Possible return values for subflow event handlers. Note that success + * values must be greater or equal than MPTS_EVRET_OK. Values less than that + * indicate errors or actions which require immediate attention; they will + * prevent the rest of the handlers from processing their respective events + * until the next round of events processing. + */ +typedef enum { + MPTS_EVRET_DELETE = 1, /* delete this subflow */ + MPTS_EVRET_OK = 2, /* OK */ + MPTS_EVRET_CONNECT_PENDING = 3, /* resume pended connects */ + MPTS_EVRET_DISCONNECT_FALLBACK = 4, /* abort all but preferred */ + MPTS_EVRET_OK_UPDATE = 5, /* OK with conninfo update */ +} ev_ret_t; + +static ev_ret_t mptcp_subflow_events(struct mptses *, struct mptsub *); +static ev_ret_t mptcp_subflow_connreset_ev(struct mptses *, struct mptsub *); +static ev_ret_t mptcp_subflow_cantrcvmore_ev(struct mptses *, struct mptsub *); +static ev_ret_t mptcp_subflow_cantsendmore_ev(struct mptses *, struct mptsub *); +static ev_ret_t mptcp_subflow_timeout_ev(struct mptses *, struct mptsub *); +static ev_ret_t mptcp_subflow_nosrcaddr_ev(struct mptses *, struct mptsub *); +static ev_ret_t mptcp_subflow_failover_ev(struct mptses *, struct mptsub *); +static ev_ret_t mptcp_subflow_ifdenied_ev(struct mptses *, struct mptsub *); +static ev_ret_t mptcp_subflow_suspend_ev(struct mptses *, struct mptsub *); +static ev_ret_t mptcp_subflow_resume_ev(struct mptses *, struct mptsub *); +static ev_ret_t mptcp_subflow_connected_ev(struct mptses *, struct mptsub *); +static ev_ret_t mptcp_subflow_disconnected_ev(struct mptses *, struct mptsub *); +static ev_ret_t mptcp_subflow_mpstatus_ev(struct mptses *, struct mptsub *); +static ev_ret_t mptcp_subflow_mustrst_ev(struct mptses *, struct mptsub *); +static const char *mptcp_evret2str(ev_ret_t); + +static mptcp_key_t *mptcp_reserve_key(void); +static int mptcp_do_sha1(mptcp_key_t *, char *, int); +static int mptcp_init_authparms(struct mptcb *); +static int mptcp_delete_ok(struct mptses *mpte, struct mptsub *mpts); + +static unsigned int mptsub_zone_size; /* size of mptsub */ +static struct zone *mptsub_zone; /* zone for mptsub */ + +static unsigned int mptopt_zone_size; /* size of mptopt */ +static struct zone *mptopt_zone; /* zone for mptopt */ + +static unsigned int mpt_subauth_entry_size; /* size of subf auth entry */ +static struct zone *mpt_subauth_zone; /* zone of subf auth entry */ + +struct mppcbinfo mtcbinfo; + +static struct mptcp_keys_pool_head mptcp_keys_pool; + +#define MPTCP_SUBFLOW_WRITELEN (8 * 1024) /* bytes to write each time */ +#define MPTCP_SUBFLOW_READLEN (8 * 1024) /* bytes to read each time */ + +SYSCTL_DECL(_net_inet); + +SYSCTL_NODE(_net_inet, OID_AUTO, mptcp, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "MPTCP"); + +uint32_t mptcp_verbose = 0; /* more noise if greater than 1 */ +SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, verbose, CTLFLAG_RW|CTLFLAG_LOCKED, + &mptcp_verbose, 0, "MPTCP verbosity level"); + +SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, pcbcount, CTLFLAG_RD|CTLFLAG_LOCKED, + &mtcbinfo.mppi_count, 0, "Number of active PCBs"); + +/* + * Since there is one kernel thread per mptcp socket, imposing an artificial + * limit on number of allowed mptcp sockets. + */ +uint32_t mptcp_socket_limit = MPPCB_LIMIT; +SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, sk_lim, CTLFLAG_RW|CTLFLAG_LOCKED, + &mptcp_socket_limit, 0, "MPTCP socket limit"); + +static struct protosw mptcp_subflow_protosw; +static struct pr_usrreqs mptcp_subflow_usrreqs; +#if INET6 +static struct ip6protosw mptcp_subflow_protosw6; +static struct pr_usrreqs mptcp_subflow_usrreqs6; +#endif /* INET6 */ + +/* + * Protocol pr_init callback. + */ +void +mptcp_init(struct protosw *pp, struct domain *dp) +{ +#pragma unused(dp) + static int mptcp_initialized = 0; + struct protosw *prp; +#if INET6 + struct ip6protosw *prp6; +#endif /* INET6 */ + + VERIFY((pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED); + + /* do this only once */ + if (mptcp_initialized) + return; + mptcp_initialized = 1; + + /* + * Since PF_MULTIPATH gets initialized after PF_INET/INET6, + * we must be able to find IPPROTO_TCP entries for both. + */ + prp = pffindproto_locked(PF_INET, IPPROTO_TCP, SOCK_STREAM); + VERIFY(prp != NULL); + bcopy(prp, &mptcp_subflow_protosw, sizeof (*prp)); + bcopy(prp->pr_usrreqs, &mptcp_subflow_usrreqs, + sizeof (mptcp_subflow_usrreqs)); + mptcp_subflow_protosw.pr_entry.tqe_next = NULL; + mptcp_subflow_protosw.pr_entry.tqe_prev = NULL; + mptcp_subflow_protosw.pr_usrreqs = &mptcp_subflow_usrreqs; + mptcp_subflow_usrreqs.pru_soreceive = mptcp_subflow_soreceive; + mptcp_subflow_usrreqs.pru_rcvoob = pru_rcvoob_notsupp; + /* + * Socket filters shouldn't attach/detach to/from this protosw + * since pr_protosw is to be used instead, which points to the + * real protocol; if they do, it is a bug and we should panic. + */ + mptcp_subflow_protosw.pr_filter_head.tqh_first = + (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef; + mptcp_subflow_protosw.pr_filter_head.tqh_last = + (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef; + +#if INET6 + prp6 = (struct ip6protosw *)pffindproto_locked(PF_INET6, + IPPROTO_TCP, SOCK_STREAM); + VERIFY(prp6 != NULL); + bcopy(prp6, &mptcp_subflow_protosw6, sizeof (*prp6)); + bcopy(prp6->pr_usrreqs, &mptcp_subflow_usrreqs6, + sizeof (mptcp_subflow_usrreqs6)); + mptcp_subflow_protosw6.pr_entry.tqe_next = NULL; + mptcp_subflow_protosw6.pr_entry.tqe_prev = NULL; + mptcp_subflow_protosw6.pr_usrreqs = &mptcp_subflow_usrreqs6; + mptcp_subflow_usrreqs6.pru_soreceive = mptcp_subflow_soreceive; + mptcp_subflow_usrreqs6.pru_rcvoob = pru_rcvoob_notsupp; + /* + * Socket filters shouldn't attach/detach to/from this protosw + * since pr_protosw is to be used instead, which points to the + * real protocol; if they do, it is a bug and we should panic. + */ + mptcp_subflow_protosw6.pr_filter_head.tqh_first = + (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef; + mptcp_subflow_protosw6.pr_filter_head.tqh_last = + (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef; +#endif /* INET6 */ + + bzero(&mtcbinfo, sizeof (mtcbinfo)); + TAILQ_INIT(&mtcbinfo.mppi_pcbs); + mtcbinfo.mppi_size = sizeof (struct mpp_mtp); + if ((mtcbinfo.mppi_zone = zinit(mtcbinfo.mppi_size, + 1024 * mtcbinfo.mppi_size, 8192, "mptcb")) == NULL) { + panic("%s: unable to allocate MPTCP PCB zone\n", __func__); + /* NOTREACHED */ + } + zone_change(mtcbinfo.mppi_zone, Z_CALLERACCT, FALSE); + zone_change(mtcbinfo.mppi_zone, Z_EXPAND, TRUE); + + mtcbinfo.mppi_lock_grp_attr = lck_grp_attr_alloc_init(); + mtcbinfo.mppi_lock_grp = lck_grp_alloc_init("mppcb", + mtcbinfo.mppi_lock_grp_attr); + mtcbinfo.mppi_lock_attr = lck_attr_alloc_init(); + lck_mtx_init(&mtcbinfo.mppi_lock, mtcbinfo.mppi_lock_grp, + mtcbinfo.mppi_lock_attr); + mtcbinfo.mppi_gc = mptcp_gc; + + mtcbinfo.mppi_timer = mptcp_timer; + + /* attach to MP domain for garbage collection to take place */ + mp_pcbinfo_attach(&mtcbinfo); + + mptsub_zone_size = sizeof (struct mptsub); + if ((mptsub_zone = zinit(mptsub_zone_size, 1024 * mptsub_zone_size, + 8192, "mptsub")) == NULL) { + panic("%s: unable to allocate MPTCP subflow zone\n", __func__); + /* NOTREACHED */ + } + zone_change(mptsub_zone, Z_CALLERACCT, FALSE); + zone_change(mptsub_zone, Z_EXPAND, TRUE); + + mptopt_zone_size = sizeof (struct mptopt); + if ((mptopt_zone = zinit(mptopt_zone_size, 128 * mptopt_zone_size, + 1024, "mptopt")) == NULL) { + panic("%s: unable to allocate MPTCP option zone\n", __func__); + /* NOTREACHED */ + } + zone_change(mptopt_zone, Z_CALLERACCT, FALSE); + zone_change(mptopt_zone, Z_EXPAND, TRUE); + + mpt_subauth_entry_size = sizeof (struct mptcp_subf_auth_entry); + if ((mpt_subauth_zone = zinit(mpt_subauth_entry_size, + 1024 * mpt_subauth_entry_size, 8192, "mptauth")) == NULL) { + panic("%s: unable to allocate MPTCP address auth zone \n", + __func__); + /* NOTREACHED */ + } + zone_change(mpt_subauth_zone, Z_CALLERACCT, FALSE); + zone_change(mpt_subauth_zone, Z_EXPAND, TRUE); + + /* Set up a list of unique keys */ + mptcp_key_pool_init(); + +} + +/* + * Create an MPTCP session, called as a result of opening a MPTCP socket. + */ +struct mptses * +mptcp_sescreate(struct socket *mp_so, struct mppcb *mpp) +{ + struct mppcbinfo *mppi; + struct mptses *mpte; + struct mptcb *mp_tp; + int error = 0; + + VERIFY(mpp != NULL); + mppi = mpp->mpp_pcbinfo; + VERIFY(mppi != NULL); + + mpte = &((struct mpp_mtp *)mpp)->mpp_ses; + mp_tp = &((struct mpp_mtp *)mpp)->mtcb; + + /* MPTCP Multipath PCB Extension */ + bzero(mpte, sizeof (*mpte)); + VERIFY(mpp->mpp_pcbe == NULL); + mpp->mpp_pcbe = mpte; + mpte->mpte_mppcb = mpp; + mpte->mpte_mptcb = mp_tp; + + TAILQ_INIT(&mpte->mpte_sopts); + TAILQ_INIT(&mpte->mpte_subflows); + mpte->mpte_associd = ASSOCID_ANY; + mpte->mpte_connid_last = CONNID_ANY; + + lck_mtx_init(&mpte->mpte_thread_lock, mppi->mppi_lock_grp, + mppi->mppi_lock_attr); + + /* + * XXX: adi@apple.com + * + * This can be rather expensive if we have lots of MPTCP sockets, + * but we need a kernel thread for this model to work. Perhaps we + * could amortize the costs by having one worker thread per a group + * of MPTCP sockets. + */ + if (kernel_thread_start(mptcp_thread_func, mpte, + &mpte->mpte_thread) != KERN_SUCCESS) { + error = ENOBUFS; + goto out; + } + mp_so->so_usecount++; /* for thread */ + + /* MPTCP Protocol Control Block */ + bzero(mp_tp, sizeof (*mp_tp)); + lck_mtx_init(&mp_tp->mpt_lock, mppi->mppi_lock_grp, + mppi->mppi_lock_attr); + mp_tp->mpt_mpte = mpte; + +out: + if (error != 0) + lck_mtx_destroy(&mpte->mpte_thread_lock, mppi->mppi_lock_grp); + DTRACE_MPTCP5(session__create, struct socket *, mp_so, + struct sockbuf *, &mp_so->so_rcv, + struct sockbuf *, &mp_so->so_snd, + struct mppcb *, mpp, int, error); + + return ((error != 0) ? NULL : mpte); +} + +/* + * Destroy an MPTCP session. + */ +static void +mptcp_sesdestroy(struct mptses *mpte) +{ + struct mptcb *mp_tp; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + + mp_tp = mpte->mpte_mptcb; + VERIFY(mp_tp != NULL); + + /* + * MPTCP Multipath PCB Extension section + */ + mptcp_flush_sopts(mpte); + VERIFY(TAILQ_EMPTY(&mpte->mpte_subflows) && mpte->mpte_numflows == 0); + + lck_mtx_destroy(&mpte->mpte_thread_lock, + mpte->mpte_mppcb->mpp_pcbinfo->mppi_lock_grp); + + /* + * MPTCP Protocol Control Block section + */ + lck_mtx_destroy(&mp_tp->mpt_lock, + mpte->mpte_mppcb->mpp_pcbinfo->mppi_lock_grp); + + DTRACE_MPTCP2(session__destroy, struct mptses *, mpte, + struct mptcb *, mp_tp); +} + +/* + * Allocate an MPTCP socket option structure. + */ +struct mptopt * +mptcp_sopt_alloc(int how) +{ + struct mptopt *mpo; + + mpo = (how == M_WAITOK) ? zalloc(mptopt_zone) : + zalloc_noblock(mptopt_zone); + if (mpo != NULL) { + bzero(mpo, mptopt_zone_size); + } + + return (mpo); +} + +/* + * Free an MPTCP socket option structure. + */ +void +mptcp_sopt_free(struct mptopt *mpo) +{ + VERIFY(!(mpo->mpo_flags & MPOF_ATTACHED)); + + zfree(mptopt_zone, mpo); +} + +/* + * Add a socket option to the MPTCP socket option list. + */ +void +mptcp_sopt_insert(struct mptses *mpte, struct mptopt *mpo) +{ + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + VERIFY(!(mpo->mpo_flags & MPOF_ATTACHED)); + mpo->mpo_flags |= MPOF_ATTACHED; + TAILQ_INSERT_TAIL(&mpte->mpte_sopts, mpo, mpo_entry); +} + +/* + * Remove a socket option from the MPTCP socket option list. + */ +void +mptcp_sopt_remove(struct mptses *mpte, struct mptopt *mpo) +{ + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + VERIFY(mpo->mpo_flags & MPOF_ATTACHED); + mpo->mpo_flags &= ~MPOF_ATTACHED; + TAILQ_REMOVE(&mpte->mpte_sopts, mpo, mpo_entry); +} + +/* + * Search for an existing socket option. + */ +struct mptopt * +mptcp_sopt_find(struct mptses *mpte, struct sockopt *sopt) +{ + struct mptopt *mpo; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + + TAILQ_FOREACH(mpo, &mpte->mpte_sopts, mpo_entry) { + if (mpo->mpo_level == sopt->sopt_level && + mpo->mpo_name == sopt->sopt_name) + break; + } + VERIFY(mpo == NULL || sopt->sopt_valsize == sizeof (int)); + + return (mpo); +} + +/* + * Flushes all recorded socket options from an MP socket. + */ +void +mptcp_flush_sopts(struct mptses *mpte) +{ + struct mptopt *mpo, *tmpo; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + + TAILQ_FOREACH_SAFE(mpo, &mpte->mpte_sopts, mpo_entry, tmpo) { + mptcp_sopt_remove(mpte, mpo); + mptcp_sopt_free(mpo); + } + VERIFY(TAILQ_EMPTY(&mpte->mpte_sopts)); +} + +/* + * Allocate a MPTCP subflow structure. + */ +struct mptsub * +mptcp_subflow_alloc(int how) +{ + struct mptsub *mpts; + + mpts = (how == M_WAITOK) ? zalloc(mptsub_zone) : + zalloc_noblock(mptsub_zone); + if (mpts != NULL) { + bzero(mpts, mptsub_zone_size); + lck_mtx_init(&mpts->mpts_lock, mtcbinfo.mppi_lock_grp, + mtcbinfo.mppi_lock_attr); + } + + return (mpts); +} + +/* + * Deallocate a subflow structure, called when all of the references held + * on it have been released. This implies that the subflow has been deleted. + */ +void +mptcp_subflow_free(struct mptsub *mpts) +{ + MPTS_LOCK_ASSERT_HELD(mpts); + + VERIFY(mpts->mpts_refcnt == 0); + VERIFY(!(mpts->mpts_flags & MPTSF_ATTACHED)); + VERIFY(mpts->mpts_mpte == NULL); + VERIFY(mpts->mpts_socket == NULL); + + if (mpts->mpts_src_sl != NULL) { + sockaddrlist_free(mpts->mpts_src_sl); + mpts->mpts_src_sl = NULL; + } + if (mpts->mpts_dst_sl != NULL) { + sockaddrlist_free(mpts->mpts_dst_sl); + mpts->mpts_dst_sl = NULL; + } + MPTS_UNLOCK(mpts); + lck_mtx_destroy(&mpts->mpts_lock, mtcbinfo.mppi_lock_grp); + + zfree(mptsub_zone, mpts); +} + +/* + * Create an MPTCP subflow socket. + */ +static int +mptcp_subflow_socreate(struct mptses *mpte, struct mptsub *mpts, int dom, + struct proc *p, struct socket **so) +{ + struct mptopt smpo, *mpo, *tmpo; + struct socket *mp_so; + int error; + + *so = NULL; + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + mp_so = mpte->mpte_mppcb->mpp_socket; + + /* + * Create the subflow socket (multipath subflow, non-blocking.) + * + * This will cause SOF_MP_SUBFLOW socket flag to be set on the subflow + * socket; it will be cleared when the socket is peeled off or closed. + * It also indicates to the underlying TCP to handle MPTCP options. + * A multipath subflow socket implies SS_NOFDREF state. + */ + if ((error = socreate_internal(dom, so, SOCK_STREAM, + IPPROTO_TCP, p, SOCF_ASYNC | SOCF_MP_SUBFLOW, PROC_NULL)) != 0) { + mptcplog((LOG_ERR, "MPTCP ERROR %s: mp_so 0x%llx unable to " + "create subflow socket error %d\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), error)); + return (error); + } + + socket_lock(*so, 0); + VERIFY((*so)->so_flags & SOF_MP_SUBFLOW); + VERIFY(((*so)->so_state & (SS_NBIO|SS_NOFDREF)) == + (SS_NBIO|SS_NOFDREF)); + + /* prevent the socket buffers from being compressed */ + (*so)->so_rcv.sb_flags |= SB_NOCOMPRESS; + (*so)->so_snd.sb_flags |= SB_NOCOMPRESS; + + bzero(&smpo, sizeof (smpo)); + smpo.mpo_flags |= MPOF_SUBFLOW_OK; + smpo.mpo_level = SOL_SOCKET; + smpo.mpo_intval = 1; + + /* disable SIGPIPE */ + smpo.mpo_name = SO_NOSIGPIPE; + if ((error = mptcp_subflow_sosetopt(mpte, *so, &smpo)) != 0) + goto out; + + /* find out if the subflow's source address goes away */ + smpo.mpo_name = SO_NOADDRERR; + if ((error = mptcp_subflow_sosetopt(mpte, *so, &smpo)) != 0) + goto out; + + /* enable keepalive */ + smpo.mpo_name = SO_KEEPALIVE; + if ((error = mptcp_subflow_sosetopt(mpte, *so, &smpo)) != 0) + goto out; + + /* + * Limit the receive socket buffer size to 64k. + * + * We need to take into consideration the window scale option + * which could be negotiated in one subflow but disabled in + * another subflow. + * XXX This can be improved in the future. + */ + smpo.mpo_name = SO_RCVBUF; + smpo.mpo_intval = MPTCP_RWIN_MAX; + if ((error = mptcp_subflow_sosetopt(mpte, *so, &smpo)) != 0) + goto out; + + /* N.B.: set by sosetopt */ + VERIFY(!((*so)->so_rcv.sb_flags & SB_AUTOSIZE)); + /* Prevent automatic socket buffer sizing. */ + (*so)->so_snd.sb_flags &= ~SB_AUTOSIZE; + + smpo.mpo_level = IPPROTO_TCP; + smpo.mpo_intval = mptcp_subflow_keeptime; + smpo.mpo_name = TCP_KEEPALIVE; + if ((error = mptcp_subflow_sosetopt(mpte, *so, &smpo)) != 0) + goto out; + + /* replay setsockopt(2) on the subflow sockets for eligible options */ + TAILQ_FOREACH_SAFE(mpo, &mpte->mpte_sopts, mpo_entry, tmpo) { + int interim; + + if (!(mpo->mpo_flags & MPOF_SUBFLOW_OK)) + continue; + + /* + * Skip those that are handled internally; these options + * should not have been recorded and marked with the + * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case. + */ + if (mpo->mpo_level == SOL_SOCKET && + (mpo->mpo_name == SO_NOSIGPIPE || + mpo->mpo_name == SO_NOADDRERR || + mpo->mpo_name == SO_KEEPALIVE)) + continue; + + interim = (mpo->mpo_flags & MPOF_INTERIM); + if (mptcp_subflow_sosetopt(mpte, *so, mpo) != 0 && interim) { + char buf[32]; + mptcplog((LOG_ERR, "%s: mp_so 0x%llx sopt %s val %d " + "interim record removed\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), + mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name, + buf, sizeof (buf)), mpo->mpo_intval)); + mptcp_sopt_remove(mpte, mpo); + mptcp_sopt_free(mpo); + continue; + } + } + + /* + * We need to receive everything that the subflow socket has, + * so use a customized socket receive function. We will undo + * this when the socket is peeled off or closed. + */ + mpts->mpts_oprotosw = (*so)->so_proto; + switch (dom) { + case PF_INET: + (*so)->so_proto = &mptcp_subflow_protosw; + break; +#if INET6 + case PF_INET6: + (*so)->so_proto = (struct protosw *)&mptcp_subflow_protosw6; + break; +#endif /* INET6 */ + default: + VERIFY(0); + /* NOTREACHED */ + } + +out: + socket_unlock(*so, 0); + + DTRACE_MPTCP4(subflow__create, struct mptses *, mpte, + struct mptsub *, mpts, int, dom, int, error); + + return (error); +} + +/* + * Close an MPTCP subflow socket. + * + * Note that this may be called on an embryonic subflow, and the only + * thing that is guaranteed valid is the protocol-user request. + */ +static int +mptcp_subflow_soclose(struct mptsub *mpts, struct socket *so) +{ + MPTS_LOCK_ASSERT_HELD(mpts); + + socket_lock(so, 0); + VERIFY(so->so_flags & SOF_MP_SUBFLOW); + VERIFY((so->so_state & (SS_NBIO|SS_NOFDREF)) == (SS_NBIO|SS_NOFDREF)); + + /* restore protocol-user requests */ + VERIFY(mpts->mpts_oprotosw != NULL); + so->so_proto = mpts->mpts_oprotosw; + socket_unlock(so, 0); + + mpts->mpts_socket = NULL; /* may already be NULL */ + + DTRACE_MPTCP5(subflow__close, struct mptsub *, mpts, + struct socket *, so, + struct sockbuf *, &so->so_rcv, + struct sockbuf *, &so->so_snd, + struct mptses *, mpts->mpts_mpte); + + return (soclose(so)); +} + +/* + * Connect an MPTCP subflow socket. + * + * This may be called inline as part of adding a subflow, or asynchronously + * by the thread (upon progressing to MPTCPF_JOIN_READY). Note that in the + * pending connect case, the subflow socket may have been bound to an interface + * and/or a source IP address which may no longer be around by the time this + * routine is called; in that case the connect attempt will most likely fail. + */ +static int +mptcp_subflow_soconnectx(struct mptses *mpte, struct mptsub *mpts) +{ + struct socket *so; + int af, error; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + MPTS_LOCK_ASSERT_HELD(mpts); + + VERIFY((mpts->mpts_flags & (MPTSF_CONNECTING|MPTSF_CONNECTED)) == + MPTSF_CONNECTING); + VERIFY(mpts->mpts_socket != NULL); + so = mpts->mpts_socket; + af = mpts->mpts_family; + + if (af == AF_INET || af == AF_INET6) { + struct sockaddr_entry *dst_se; + char dbuf[MAX_IPv6_STR_LEN]; + + dst_se = TAILQ_FIRST(&mpts->mpts_dst_sl->sl_head); + VERIFY(dst_se != NULL); + + mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx dst %s[%d] cid %d " + "[pended %s]\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mpte->mpte_mppcb->mpp_socket), + inet_ntop(af, ((af == AF_INET) ? + (void *)&SIN(dst_se->se_addr)->sin_addr.s_addr : + (void *)&SIN6(dst_se->se_addr)->sin6_addr), + dbuf, sizeof (dbuf)), ((af == AF_INET) ? + ntohs(SIN(dst_se->se_addr)->sin_port) : + ntohs(SIN6(dst_se->se_addr)->sin6_port)), + mpts->mpts_connid, + ((mpts->mpts_flags & MPTSF_CONNECT_PENDING) ? + "YES" : "NO"))); + } + + mpts->mpts_flags &= ~MPTSF_CONNECT_PENDING; + + socket_lock(so, 0); + mptcp_attach_to_subf(so, mpte->mpte_mptcb, mpts->mpts_connid); + /* connect the subflow socket */ + error = soconnectxlocked(so, &mpts->mpts_src_sl, &mpts->mpts_dst_sl, + mpts->mpts_mpcr.mpcr_proc, mpts->mpts_mpcr.mpcr_ifscope, + mpte->mpte_associd, NULL, TCP_CONNREQF_MPTCP, + &mpts->mpts_mpcr, sizeof (mpts->mpts_mpcr)); + socket_unlock(so, 0); + + DTRACE_MPTCP3(subflow__connect, struct mptses *, mpte, + struct mptsub *, mpts, int, error); + + return (error); +} + +/* + * MPTCP subflow socket receive routine, derived from soreceive(). + */ +static int +mptcp_subflow_soreceive(struct socket *so, struct sockaddr **psa, + struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) +{ +#pragma unused(uio) + int flags, error = 0; + struct proc *p = current_proc(); + struct mbuf *m, **mp = mp0; + struct mbuf *nextrecord; + + socket_lock(so, 1); + VERIFY(so->so_proto->pr_flags & PR_CONNREQUIRED); + +#ifdef MORE_LOCKING_DEBUG + if (so->so_usecount == 1) { + panic("%s: so=%x no other reference on socket\n", __func__, so); + /* NOTREACHED */ + } +#endif + /* + * We return all that is there in the subflow's socket receive buffer + * to the MPTCP layer, so we require that the caller passes in the + * expected parameters. + */ + if (mp == NULL || controlp != NULL) { + socket_unlock(so, 1); + return (EINVAL); + } + *mp = NULL; + if (psa != NULL) + *psa = NULL; + if (flagsp != NULL) + flags = *flagsp &~ MSG_EOR; + else + flags = 0; + + if (flags & (MSG_PEEK|MSG_OOB|MSG_NEEDSA|MSG_WAITALL|MSG_WAITSTREAM)) { + socket_unlock(so, 1); + return (EOPNOTSUPP); + } + flags |= (MSG_DONTWAIT|MSG_NBIO); + + /* + * If a recv attempt is made on a previously-accepted socket + * that has been marked as inactive (disconnected), reject + * the request. + */ + if (so->so_flags & SOF_DEFUNCT) { + struct sockbuf *sb = &so->so_rcv; + + error = ENOTCONN; + SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n", + __func__, proc_pid(p), (uint64_t)VM_KERNEL_ADDRPERM(so), + SOCK_DOM(so), SOCK_TYPE(so), error)); + /* + * This socket should have been disconnected and flushed + * prior to being returned from sodefunct(); there should + * be no data on its receive list, so panic otherwise. + */ + if (so->so_state & SS_DEFUNCT) + sb_empty_assert(sb, __func__); + socket_unlock(so, 1); + return (error); + } + + /* + * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE) + * and if so just return to the caller. This could happen when + * soreceive() is called by a socket upcall function during the + * time the socket is freed. The socket buffer would have been + * locked across the upcall, therefore we cannot put this thread + * to sleep (else we will deadlock) or return EWOULDBLOCK (else + * we may livelock), because the lock on the socket buffer will + * only be released when the upcall routine returns to its caller. + * Because the socket has been officially closed, there can be + * no further read on it. + * + * A multipath subflow socket would have its SS_NOFDREF set by + * default, so check for SOF_MP_SUBFLOW socket flag; when the + * socket is closed for real, SOF_MP_SUBFLOW would be cleared. + */ + if ((so->so_state & (SS_NOFDREF | SS_CANTRCVMORE)) == + (SS_NOFDREF | SS_CANTRCVMORE) && !(so->so_flags & SOF_MP_SUBFLOW)) { + socket_unlock(so, 1); + return (0); + } + + /* + * For consistency with soreceive() semantics, we need to obey + * SB_LOCK in case some other code path has locked the buffer. + */ + error = sblock(&so->so_rcv, 0); + if (error != 0) { + socket_unlock(so, 1); + return (error); + } + + m = so->so_rcv.sb_mb; + if (m == NULL) { + /* + * Panic if we notice inconsistencies in the socket's + * receive list; both sb_mb and sb_cc should correctly + * reflect the contents of the list, otherwise we may + * end up with false positives during select() or poll() + * which could put the application in a bad state. + */ + SB_MB_CHECK(&so->so_rcv); + + if (so->so_error != 0) { + error = so->so_error; + so->so_error = 0; + goto release; + } + + if (so->so_state & SS_CANTRCVMORE) { + goto release; + } + + if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING))) { + error = ENOTCONN; + goto release; + } + + /* + * MSG_DONTWAIT is implicitly defined and this routine will + * never block, so return EWOULDBLOCK when there is nothing. + */ + error = EWOULDBLOCK; + goto release; + } + + OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgrcv); + SBLASTRECORDCHK(&so->so_rcv, "mptcp_subflow_soreceive 1"); + SBLASTMBUFCHK(&so->so_rcv, "mptcp_subflow_soreceive 1"); + + while (m != NULL) { + nextrecord = m->m_nextpkt; + sbfree(&so->so_rcv, m); + + if (mp != NULL) { + *mp = m; + mp = &m->m_next; + so->so_rcv.sb_mb = m = m->m_next; + *mp = NULL; + } + + if (m != NULL) { + m->m_nextpkt = nextrecord; + if (nextrecord == NULL) + so->so_rcv.sb_lastrecord = m; + } else { + m = so->so_rcv.sb_mb = nextrecord; + SB_EMPTY_FIXUP(&so->so_rcv); + } + SBLASTRECORDCHK(&so->so_rcv, "mptcp_subflow_soreceive 2"); + SBLASTMBUFCHK(&so->so_rcv, "mptcp_subflow_soreceive 2"); + } + + DTRACE_MPTCP3(subflow__receive, struct socket *, so, + struct sockbuf *, &so->so_rcv, struct sockbuf *, &so->so_snd); + /* notify protocol that we drained all the data */ + if ((so->so_proto->pr_flags & PR_WANTRCVD) && so->so_pcb != NULL) + (*so->so_proto->pr_usrreqs->pru_rcvd)(so, flags); + + if (flagsp != NULL) + *flagsp |= flags; + +release: + sbunlock(&so->so_rcv, FALSE); /* will unlock socket */ + return (error); + +} + + +/* + * Prepare an MPTCP subflow socket for peeloff(2); basically undo + * the work done earlier when the subflow socket was created. + */ +void +mptcp_subflow_sopeeloff(struct mptses *mpte, struct mptsub *mpts, + struct socket *so) +{ + struct mptopt smpo; + struct socket *mp_so; + int p, c; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + mp_so = mpte->mpte_mppcb->mpp_socket; + MPTS_LOCK_ASSERT_HELD(mpts); + + socket_lock(so, 0); + VERIFY(so->so_flags & SOF_MP_SUBFLOW); + VERIFY((so->so_state & (SS_NBIO|SS_NOFDREF)) == (SS_NBIO|SS_NOFDREF)); + + /* inherit MPTCP socket states */ + if (!(mp_so->so_state & SS_NBIO)) + so->so_state &= ~SS_NBIO; + + /* + * At this point, the socket is not yet closed, as there is at least + * one outstanding usecount previously held by mpts_socket from + * socreate(). Atomically clear SOF_MP_SUBFLOW and SS_NOFDREF here. + */ + so->so_flags &= ~SOF_MP_SUBFLOW; + so->so_state &= ~SS_NOFDREF; + so->so_state &= ~SOF_MPTCP_TRUE; + + /* allow socket buffers to be compressed */ + so->so_rcv.sb_flags &= ~SB_NOCOMPRESS; + so->so_snd.sb_flags &= ~SB_NOCOMPRESS; + + /* + * Allow socket buffer auto sizing. + * + * This will increase the current 64k buffer size to whatever is best. + */ + so->so_rcv.sb_flags |= SB_AUTOSIZE; + so->so_snd.sb_flags |= SB_AUTOSIZE; + + /* restore protocol-user requests */ + VERIFY(mpts->mpts_oprotosw != NULL); + so->so_proto = mpts->mpts_oprotosw; + + bzero(&smpo, sizeof (smpo)); + smpo.mpo_flags |= MPOF_SUBFLOW_OK; + smpo.mpo_level = SOL_SOCKET; + + /* inherit SOF_NOSIGPIPE from parent MP socket */ + p = (mp_so->so_flags & SOF_NOSIGPIPE); + c = (so->so_flags & SOF_NOSIGPIPE); + smpo.mpo_intval = ((p - c) > 0) ? 1 : 0; + smpo.mpo_name = SO_NOSIGPIPE; + if ((p - c) != 0) + (void) mptcp_subflow_sosetopt(mpte, so, &smpo); + + /* inherit SOF_NOADDRAVAIL from parent MP socket */ + p = (mp_so->so_flags & SOF_NOADDRAVAIL); + c = (so->so_flags & SOF_NOADDRAVAIL); + smpo.mpo_intval = ((p - c) > 0) ? 1 : 0; + smpo.mpo_name = SO_NOADDRERR; + if ((p - c) != 0) + (void) mptcp_subflow_sosetopt(mpte, so, &smpo); + + /* inherit SO_KEEPALIVE from parent MP socket */ + p = (mp_so->so_options & SO_KEEPALIVE); + c = (so->so_options & SO_KEEPALIVE); + smpo.mpo_intval = ((p - c) > 0) ? 1 : 0; + smpo.mpo_name = SO_KEEPALIVE; + if ((p - c) != 0) + (void) mptcp_subflow_sosetopt(mpte, so, &smpo); + + /* unset TCP level default keepalive option */ + p = (intotcpcb(sotoinpcb(mp_so)))->t_keepidle; + c = (intotcpcb(sotoinpcb(so)))->t_keepidle; + smpo.mpo_level = IPPROTO_TCP; + smpo.mpo_intval = 0; + smpo.mpo_name = TCP_KEEPALIVE; + if ((p - c) != 0) + (void) mptcp_subflow_sosetopt(mpte, so, &smpo); + socket_unlock(so, 0); + + DTRACE_MPTCP5(subflow__peeloff, struct mptses *, mpte, + struct mptsub *, mpts, struct socket *, so, + struct sockbuf *, &so->so_rcv, struct sockbuf *, &so->so_snd); +} + +/* + * Establish an initial MPTCP connection (if first subflow and not yet + * connected), or add a subflow to an existing MPTCP connection. + */ +int +mptcp_subflow_add(struct mptses *mpte, struct mptsub *mpts, + struct proc *p, uint32_t ifscope) +{ + struct sockaddr_entry *se, *src_se = NULL, *dst_se = NULL; + struct socket *mp_so, *so = NULL; + struct mptsub_connreq mpcr; + struct mptcb *mp_tp; + int af, error = 0; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + mp_so = mpte->mpte_mppcb->mpp_socket; + mp_tp = mpte->mpte_mptcb; + + MPTS_LOCK(mpts); + VERIFY(!(mpts->mpts_flags & (MPTSF_CONNECTING|MPTSF_CONNECTED))); + VERIFY(mpts->mpts_mpte == NULL); + VERIFY(mpts->mpts_socket == NULL); + VERIFY(mpts->mpts_dst_sl != NULL); + VERIFY(mpts->mpts_connid == CONNID_ANY); + + /* select source (if specified) and destination addresses */ + if ((error = in_selectaddrs(AF_UNSPEC, &mpts->mpts_src_sl, &src_se, + &mpts->mpts_dst_sl, &dst_se)) != 0) + goto out; + + VERIFY(mpts->mpts_dst_sl != NULL && dst_se != NULL); + VERIFY(src_se == NULL || mpts->mpts_src_sl != NULL); + af = mpts->mpts_family = dst_se->se_addr->sa_family; + VERIFY(src_se == NULL || src_se->se_addr->sa_family == af); + VERIFY(af == AF_INET || af == AF_INET6); + + /* + * If the source address is not specified, allocate a storage for + * it, so that later on we can fill it in with the actual source + * IP address chosen by the underlying layer for the subflow after + * it is connected. + */ + if (mpts->mpts_src_sl == NULL) { + mpts->mpts_src_sl = + sockaddrlist_dup(mpts->mpts_dst_sl, M_WAITOK); + if (mpts->mpts_src_sl == NULL) { + error = ENOBUFS; + goto out; + } + se = TAILQ_FIRST(&mpts->mpts_src_sl->sl_head); + VERIFY(se != NULL && se->se_addr != NULL && + se->se_addr->sa_len == dst_se->se_addr->sa_len); + bzero(se->se_addr, se->se_addr->sa_len); + se->se_addr->sa_len = dst_se->se_addr->sa_len; + se->se_addr->sa_family = dst_se->se_addr->sa_family; + } + + /* create the subflow socket */ + if ((error = mptcp_subflow_socreate(mpte, mpts, af, p, &so)) != 0) + goto out; + + /* + * XXX: adi@apple.com + * + * This probably needs to be made smarter, but for now simply + * increment the counter, while avoiding 0 (CONNID_ANY) and + * -1 (CONNID_ALL). Assume that an MPTCP connection will not + * live too long with (2^32)-2 subflow connection attempts. + */ + mpte->mpte_connid_last++; + if (mpte->mpte_connid_last == CONNID_ALL || + mpte->mpte_connid_last == CONNID_ANY) + mpte->mpte_connid_last++; + + mpts->mpts_connid = mpte->mpte_connid_last; + VERIFY(mpts->mpts_connid != CONNID_ANY && + mpts->mpts_connid != CONNID_ALL); + + /* bind subflow socket to the specified interface */ + if (ifscope != IFSCOPE_NONE) { + socket_lock(so, 0); + error = inp_bindif(sotoinpcb(so), ifscope, &mpts->mpts_outif); + if (error != 0) { + socket_unlock(so, 0); + (void) mptcp_subflow_soclose(mpts, so); + goto out; + } + VERIFY(mpts->mpts_outif != NULL); + mpts->mpts_flags |= MPTSF_BOUND_IF; + + mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx bindif %s[%d] " + "cid %d\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), + mpts->mpts_outif->if_xname, + ifscope, mpts->mpts_connid)); + socket_unlock(so, 0); + } + + /* if source address and/or port is specified, bind to it */ + if (src_se != NULL) { + struct sockaddr *sa = src_se->se_addr; + uint32_t mpts_flags = 0; + in_port_t lport; + + switch (af) { + case AF_INET: + if (SIN(sa)->sin_addr.s_addr != INADDR_ANY) + mpts_flags |= MPTSF_BOUND_IP; + if ((lport = SIN(sa)->sin_port) != 0) + mpts_flags |= MPTSF_BOUND_PORT; + break; +#if INET6 + case AF_INET6: + VERIFY(af == AF_INET6); + if (!IN6_IS_ADDR_UNSPECIFIED(&SIN6(sa)->sin6_addr)) + mpts_flags |= MPTSF_BOUND_IP; + if ((lport = SIN6(sa)->sin6_port) != 0) + mpts_flags |= MPTSF_BOUND_PORT; + break; +#endif /* INET6 */ + } + + error = sobindlock(so, sa, 1); /* will lock/unlock socket */ + if (error != 0) { + (void) mptcp_subflow_soclose(mpts, so); + goto out; + } + mpts->mpts_flags |= mpts_flags; + + if (af == AF_INET || af == AF_INET6) { + char sbuf[MAX_IPv6_STR_LEN]; + + mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx bindip %s[%d] " + "cid %d\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), + inet_ntop(af, ((af == AF_INET) ? + (void *)&SIN(sa)->sin_addr.s_addr : + (void *)&SIN6(sa)->sin6_addr), sbuf, sizeof (sbuf)), + ntohs(lport), mpts->mpts_connid)); + } + } + + /* + * Insert the subflow into the list, and associate the MPTCP PCB + * as well as the the subflow socket. From this point on, removing + * the subflow needs to be done via mptcp_subflow_del(). + */ + TAILQ_INSERT_TAIL(&mpte->mpte_subflows, mpts, mpts_entry); + mpte->mpte_numflows++; + + atomic_bitset_32(&mpts->mpts_flags, MPTSF_ATTACHED); + mpts->mpts_mpte = mpte; + mpts->mpts_socket = so; + MPTS_ADDREF_LOCKED(mpts); /* for being in MPTCP subflow list */ + MPTS_ADDREF_LOCKED(mpts); /* for subflow socket */ + mp_so->so_usecount++; /* for subflow socket */ + + /* register for subflow socket read/write events */ + (void) sock_setupcalls(so, mptcp_subflow_rupcall, mpts, + mptcp_subflow_wupcall, mpts); + + /* + * Register for subflow socket control events; ignore + * SO_FILT_HINT_CONNINFO_UPDATED from below since we + * will generate it here. + */ + (void) sock_catchevents(so, mptcp_subflow_eupcall, mpts, + SO_FILT_HINT_CONNRESET | SO_FILT_HINT_CANTRCVMORE | + SO_FILT_HINT_CANTSENDMORE | SO_FILT_HINT_TIMEOUT | + SO_FILT_HINT_NOSRCADDR | SO_FILT_HINT_IFDENIED | + SO_FILT_HINT_SUSPEND | SO_FILT_HINT_RESUME | + SO_FILT_HINT_CONNECTED | SO_FILT_HINT_DISCONNECTED | + SO_FILT_HINT_MPFAILOVER | SO_FILT_HINT_MPSTATUS | + SO_FILT_HINT_MUSTRST); + + /* sanity check */ + VERIFY(!(mpts->mpts_flags & + (MPTSF_CONNECTING|MPTSF_CONNECTED|MPTSF_CONNECT_PENDING))); + + bzero(&mpcr, sizeof (mpcr)); + mpcr.mpcr_proc = p; + mpcr.mpcr_ifscope = ifscope; + /* + * Indicate to the TCP subflow whether or not it should establish + * the initial MPTCP connection, or join an existing one. Fill + * in the connection request structure with additional info needed + * by the underlying TCP (to be used in the TCP options, etc.) + */ + MPT_LOCK(mp_tp); + if (mp_tp->mpt_state < MPTCPS_ESTABLISHED && mpte->mpte_numflows == 1) { + if (mp_tp->mpt_state == MPTCPS_CLOSED) { + mp_tp->mpt_localkey = mptcp_reserve_key(); + mptcp_conn_properties(mp_tp); + } + MPT_UNLOCK(mp_tp); + soisconnecting(mp_so); + mpcr.mpcr_type = MPTSUB_CONNREQ_MP_ENABLE; + } else { + if (!(mp_tp->mpt_flags & MPTCPF_JOIN_READY)) + mpts->mpts_flags |= MPTSF_CONNECT_PENDING; + MPT_UNLOCK(mp_tp); + mpcr.mpcr_type = MPTSUB_CONNREQ_MP_ADD; + } + + mpts->mpts_mpcr = mpcr; + mpts->mpts_flags |= MPTSF_CONNECTING; + + if (af == AF_INET || af == AF_INET6) { + char dbuf[MAX_IPv6_STR_LEN]; + + mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx dst %s[%d] cid %d " + "[pending %s]\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), + inet_ntop(af, ((af == AF_INET) ? + (void *)&SIN(dst_se->se_addr)->sin_addr.s_addr : + (void *)&SIN6(dst_se->se_addr)->sin6_addr), + dbuf, sizeof (dbuf)), ((af == AF_INET) ? + ntohs(SIN(dst_se->se_addr)->sin_port) : + ntohs(SIN6(dst_se->se_addr)->sin6_port)), + mpts->mpts_connid, + ((mpts->mpts_flags & MPTSF_CONNECT_PENDING) ? + "YES" : "NO"))); + } + + /* connect right away if first attempt, or if join can be done now */ + if (!(mpts->mpts_flags & MPTSF_CONNECT_PENDING)) + error = mptcp_subflow_soconnectx(mpte, mpts); + +out: + MPTS_UNLOCK(mpts); + if (error == 0) { + soevent(mp_so, SO_FILT_HINT_LOCKED | + SO_FILT_HINT_CONNINFO_UPDATED); + } + return (error); +} + +static int +mptcp_delete_ok(struct mptses *mpte, struct mptsub *mpts) +{ + int ret = 1; + struct mptcb *mp_tp = NULL; + + MPTE_LOCK_ASSERT_HELD(mpte); + mp_tp = mpte->mpte_mptcb; + VERIFY(mp_tp != NULL); + MPTS_LOCK(mpts); + MPT_LOCK(mp_tp); + if ((mpts->mpts_soerror == 0) && + (mpts->mpts_flags & MPTSF_ACTIVE) && + (mp_tp->mpt_state != MPTCPS_CLOSED) && + (mp_tp->mpt_state <= MPTCPS_TIME_WAIT)) + ret = 0; + MPT_UNLOCK(mp_tp); + MPTS_UNLOCK(mpts); + return (ret); +} + +/* + * Delete/remove a subflow from an MPTCP. The underlying subflow socket + * will no longer be accessible after a subflow is deleted, thus this + * should occur only after the subflow socket has been disconnected. + * If peeloff(2) is called, leave the socket open. + */ +void +mptcp_subflow_del(struct mptses *mpte, struct mptsub *mpts, boolean_t close) +{ + struct socket *mp_so, *so; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + mp_so = mpte->mpte_mppcb->mpp_socket; + + MPTS_LOCK(mpts); + so = mpts->mpts_socket; + VERIFY(so != NULL); + + mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx [u=%d,r=%d] cid %d " + "[close %s] %d %x\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), + mp_so->so_usecount, + mp_so->so_retaincnt, mpts->mpts_connid, + (close ? "YES" : "NO"), mpts->mpts_soerror, + mpts->mpts_flags)); + + VERIFY(mpts->mpts_mpte == mpte); + VERIFY(mpts->mpts_connid != CONNID_ANY && + mpts->mpts_connid != CONNID_ALL); + + VERIFY(mpts->mpts_flags & MPTSF_ATTACHED); + atomic_bitclear_32(&mpts->mpts_flags, MPTSF_ATTACHED); + TAILQ_REMOVE(&mpte->mpte_subflows, mpts, mpts_entry); + VERIFY(mpte->mpte_numflows != 0); + mpte->mpte_numflows--; + + /* + * Drop references held by this subflow socket; there + * will be no further upcalls made from this point. + */ + (void) sock_setupcalls(so, NULL, NULL, NULL, NULL); + (void) sock_catchevents(so, NULL, NULL, 0); + mptcp_detach_mptcb_from_subf(mpte->mpte_mptcb, so); + if (close) + (void) mptcp_subflow_soclose(mpts, so); + + VERIFY(mp_so->so_usecount != 0); + mp_so->so_usecount--; /* for subflow socket */ + mpts->mpts_mpte = NULL; + mpts->mpts_socket = NULL; + MPTS_UNLOCK(mpts); + + MPTS_REMREF(mpts); /* for MPTCP subflow list */ + MPTS_REMREF(mpts); /* for subflow socket */ + + soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED); +} + +/* + * Disconnect a subflow socket. + */ +void +mptcp_subflow_disconnect(struct mptses *mpte, struct mptsub *mpts, + boolean_t deleteok) +{ + struct socket *so; + struct mptcb *mp_tp; + int send_dfin = 0; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + MPTS_LOCK_ASSERT_HELD(mpts); + + VERIFY(mpts->mpts_mpte == mpte); + VERIFY(mpts->mpts_socket != NULL); + VERIFY(mpts->mpts_connid != CONNID_ANY && + mpts->mpts_connid != CONNID_ALL); + + if (mpts->mpts_flags & (MPTSF_DISCONNECTING|MPTSF_DISCONNECTED)) + return; + + mpts->mpts_flags |= MPTSF_DISCONNECTING; + + /* + * If this is coming from disconnectx(2) or issued as part of + * closing the MPTCP socket, the subflow shouldn't stick around. + * Otherwise let it linger around in case the upper layers need + * to retrieve its conninfo. + */ + if (deleteok) + mpts->mpts_flags |= MPTSF_DELETEOK; + + so = mpts->mpts_socket; + mp_tp = mpte->mpte_mptcb; + MPT_LOCK(mp_tp); + if (mp_tp->mpt_state > MPTCPS_ESTABLISHED) + send_dfin = 1; + MPT_UNLOCK(mp_tp); + + socket_lock(so, 0); + if (!(so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) && + (so->so_state & SS_ISCONNECTED)) { + mptcplog((LOG_DEBUG, "%s: cid %d fin %d [linger %s]\n", + __func__, mpts->mpts_connid, send_dfin, + (deleteok ? "NO" : "YES"))); + + if (send_dfin) + mptcp_send_dfin(so); + (void) soshutdownlock(so, SHUT_RD); + (void) soshutdownlock(so, SHUT_WR); + (void) sodisconnectlocked(so); + } + socket_unlock(so, 0); + /* + * Generate a disconnect event for this subflow socket, in case + * the lower layer doesn't do it; this is needed because the + * subflow socket deletion relies on it. This will also end up + * generating SO_FILT_HINT_CONNINFO_UPDATED on the MPTCP socket; + * we cannot do that here because subflow lock is currently held. + */ + mptcp_subflow_eupcall(so, mpts, SO_FILT_HINT_DISCONNECTED); +} + +/* + * Subflow socket read upcall. + * + * Called when the associated subflow socket posted a read event. The subflow + * socket lock has been released prior to invoking the callback. Note that the + * upcall may occur synchronously as a result of MPTCP performing an action on + * it, or asynchronously as a result of an event happening at the subflow layer. + * Therefore, to maintain lock ordering, the only lock that can be acquired + * here is the thread lock, for signalling purposes. + */ +static void +mptcp_subflow_rupcall(struct socket *so, void *arg, int waitf) +{ +#pragma unused(so, waitf) + struct mptsub *mpts = arg; + struct mptses *mpte = mpts->mpts_mpte; + + VERIFY(mpte != NULL); + + lck_mtx_lock(&mpte->mpte_thread_lock); + mptcp_thread_signal_locked(mpte); + lck_mtx_unlock(&mpte->mpte_thread_lock); +} + +/* + * Subflow socket input. + * + * Called in the context of the MPTCP thread, for reading data from the + * underlying subflow socket and delivering it to MPTCP. + */ +static void +mptcp_subflow_input(struct mptses *mpte, struct mptsub *mpts) +{ + struct mbuf *m = NULL; + struct socket *so; + int error; + struct mptsub *mpts_alt = NULL; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + MPTS_LOCK_ASSERT_HELD(mpts); + + DTRACE_MPTCP2(subflow__input, struct mptses *, mpte, + struct mptsub *, mpts); + + if (!(mpts->mpts_flags & MPTSF_CONNECTED)) + return; + + so = mpts->mpts_socket; + + error = sock_receive_internal(so, NULL, &m, 0, NULL); + if (error != 0 && error != EWOULDBLOCK) { + mptcplog((LOG_ERR, "%s: cid %d error %d\n", + __func__, mpts->mpts_connid, error)); + MPTS_UNLOCK(mpts); + mpts_alt = mptcp_get_subflow(mpte, mpts); + if (mpts_alt == NULL) { + mptcplog((LOG_ERR, "%s: no alt path cid %d\n", + __func__, mpts->mpts_connid)); + mpte->mpte_mppcb->mpp_socket->so_error = error; + } + MPTS_LOCK(mpts); + } else if (error == 0) { + mptcplog3((LOG_DEBUG, "%s: cid %d \n", + __func__, mpts->mpts_connid)); + } + + /* In fallback, make sure to accept data on all but one subflow */ + if ((mpts->mpts_flags & MPTSF_MP_DEGRADED) && + (!(mpts->mpts_flags & MPTSF_ACTIVE))) { + m_freem(m); + return; + } + + if (m != NULL) { + /* + * Release subflow lock since this may trigger MPTCP to send, + * possibly on a different subflow. An extra reference has + * been held on the subflow by the MPTCP thread before coming + * here, so we can be sure that it won't go away, in the event + * the MP socket lock gets released. + */ + MPTS_UNLOCK(mpts); + mptcp_input(mpte, m); + MPTS_LOCK(mpts); + } +} + +/* + * Subflow socket write upcall. + * + * Called when the associated subflow socket posted a read event. The subflow + * socket lock has been released prior to invoking the callback. Note that the + * upcall may occur synchronously as a result of MPTCP performing an action on + * it, or asynchronously as a result of an event happening at the subflow layer. + * Therefore, to maintain lock ordering, the only lock that can be acquired + * here is the thread lock, for signalling purposes. + */ +static void +mptcp_subflow_wupcall(struct socket *so, void *arg, int waitf) +{ +#pragma unused(so, waitf) + struct mptsub *mpts = arg; + struct mptses *mpte = mpts->mpts_mpte; + + VERIFY(mpte != NULL); + + lck_mtx_lock(&mpte->mpte_thread_lock); + mptcp_thread_signal_locked(mpte); + lck_mtx_unlock(&mpte->mpte_thread_lock); +} + +/* + * Subflow socket output. + * + * Called for sending data from MPTCP to the underlying subflow socket. + */ +int +mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts) +{ + struct socket *mp_so, *so; + size_t sb_cc = 0, tot_sent = 0; + struct mbuf *sb_mb; + int error = 0; + u_int64_t mpt_dsn = 0; + struct mptcb *mp_tp = mpte->mpte_mptcb; + struct mbuf *mpt_mbuf = NULL; + unsigned int off = 0; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + MPTS_LOCK_ASSERT_HELD(mpts); + mp_so = mpte->mpte_mppcb->mpp_socket; + so = mpts->mpts_socket; + + DTRACE_MPTCP2(subflow__output, struct mptses *, mpte, + struct mptsub *, mpts); + + /* subflow socket is suspended? */ + if (mpts->mpts_flags & MPTSF_SUSPENDED) { + mptcplog((LOG_ERR, "%s: mp_so 0x%llx cid %d is flow " + "controlled\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_connid)); + goto out; + } + + /* subflow socket is not MPTCP capable? */ + if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE) && + !(mpts->mpts_flags & MPTSF_MP_DEGRADED)) { + mptcplog((LOG_ERR, "%s: mp_so 0x%llx cid %d not " + "MPTCP capable\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mpts->mpts_connid)); + goto out; + } + + /* Remove Addr Option is not sent reliably as per I-D */ + if (mpte->mpte_flags & MPTE_SND_REM_ADDR) { + struct tcpcb *tp = intotcpcb(sotoinpcb(so)); + tp->t_rem_aid = mpte->mpte_lost_aid; + if (mptcp_remaddr_enable) + tp->t_mpflags |= TMPF_SND_REM_ADDR; + mpte->mpte_flags &= ~MPTE_SND_REM_ADDR; + } + + /* + * The mbuf chains containing the metadata (as well as pointing to + * the user data sitting at the MPTCP output queue) would then be + * sent down to the subflow socket. + * + * Some notes on data sequencing: + * + * a. Each mbuf must be a M_PKTHDR. + * b. MPTCP metadata is stored in the mptcp_pktinfo structure + * in the mbuf pkthdr structure. + * c. Each mbuf containing the MPTCP metadata must have its + * pkt_flags marked with the PKTF_MPTCP flag. + */ + + /* First, drop acknowledged data */ + sb_mb = mp_so->so_snd.sb_mb; + if (sb_mb == NULL) { + goto out; + } + + VERIFY(sb_mb->m_pkthdr.pkt_flags & PKTF_MPTCP); + + mpt_mbuf = sb_mb; + while (mpt_mbuf && mpt_mbuf->m_pkthdr.mp_rlen == 0) { + mpt_mbuf = mpt_mbuf->m_next; + } + if (mpt_mbuf && (mpt_mbuf->m_pkthdr.pkt_flags & PKTF_MPTCP)) { + mpt_dsn = mpt_mbuf->m_pkthdr.mp_dsn; + } else { + goto out; + } + + MPT_LOCK(mp_tp); + if (MPTCP_SEQ_LT(mpt_dsn, mp_tp->mpt_snduna)) { + int len = 0; + len = mp_tp->mpt_snduna - mpt_dsn; + sbdrop(&mp_so->so_snd, len); + + } + + /* + * In degraded mode, we don't receive data acks, so force free + * mbufs less than snd_nxt + */ + mpt_dsn = mp_so->so_snd.sb_mb->m_pkthdr.mp_dsn; + if ((mpts->mpts_flags & MPTSF_MP_DEGRADED) && + MPTCP_SEQ_LT(mpt_dsn, mp_tp->mpt_sndnxt)) { + int len = 0; + len = mp_tp->mpt_sndnxt - mpt_dsn; + sbdrop(&mp_so->so_snd, len); + mp_tp->mpt_snduna = mp_tp->mpt_sndnxt; + } + + /* + * Adjust the subflow's notion of next byte to send based on + * the last unacknowledged byte + */ + if (MPTCP_SEQ_LT(mpts->mpts_sndnxt, mp_tp->mpt_snduna)) { + mpts->mpts_sndnxt = mp_tp->mpt_snduna; + } + + /* + * Adjust the top level notion of next byte used for retransmissions + * and sending FINs. + */ + if (MPTCP_SEQ_LT(mp_tp->mpt_sndnxt, mp_tp->mpt_snduna)) { + mp_tp->mpt_sndnxt = mp_tp->mpt_snduna; + } + + + /* Now determine the offset from which to start transmitting data */ + sb_mb = mp_so->so_snd.sb_mb; + sb_cc = mp_so->so_snd.sb_cc; + if (sb_mb == NULL) { + MPT_UNLOCK(mp_tp); + goto out; + } + if (MPTCP_SEQ_LT(mpts->mpts_sndnxt, mp_tp->mpt_sndmax)) { + off = mpts->mpts_sndnxt - mp_tp->mpt_snduna; + sb_cc -= off; + } else { + MPT_UNLOCK(mp_tp); + goto out; + } + MPT_UNLOCK(mp_tp); + + mpt_mbuf = sb_mb; + mpt_dsn = mpt_mbuf->m_pkthdr.mp_dsn; + + while (mpt_mbuf && ((mpt_mbuf->m_pkthdr.mp_rlen == 0) || + (mpt_mbuf->m_pkthdr.mp_rlen <= off))) { + off -= mpt_mbuf->m_pkthdr.mp_rlen; + mpt_mbuf = mpt_mbuf->m_next; + mpt_dsn = mpt_mbuf->m_pkthdr.mp_dsn; + } + if ((mpts->mpts_connid == 2) || (mpts->mpts_flags & MPTSF_MP_DEGRADED)) + mptcplog((LOG_INFO, "%s: snduna = %llu off = %d id = %d" + " %llu \n", + __func__, + mp_tp->mpt_snduna, off, mpts->mpts_connid, + mpts->mpts_sndnxt)); + + VERIFY(mpt_mbuf && (mpt_mbuf->m_pkthdr.pkt_flags & PKTF_MPTCP)); + + while (tot_sent < sb_cc) { + struct mbuf *m; + size_t mlen, len = 0; + + mlen = mpt_mbuf->m_pkthdr.mp_rlen; + mlen -= off; + if (mlen == 0) + goto out; + + if (mlen > sb_cc) { + panic("%s: unexpected %lu %lu \n", __func__, + mlen, sb_cc); + } + + m = m_copym_mode(mpt_mbuf, off, mlen, M_DONTWAIT, + M_COPYM_COPY_HDR); + if (m == NULL) { + error = ENOBUFS; + break; + } + + /* Create a DSN mapping for the data (m_copym does it) */ + mpt_dsn = mpt_mbuf->m_pkthdr.mp_dsn; + m->m_pkthdr.pkt_flags |= PKTF_MPTCP; + m->m_pkthdr.pkt_flags &= ~PKTF_MPSO; + m->m_pkthdr.mp_dsn = mpt_dsn + off; + m->m_pkthdr.mp_rseq = mpts->mpts_rel_seq; + m->m_pkthdr.mp_rlen = mlen; + mpts->mpts_rel_seq += mlen; + m->m_pkthdr.len = mlen; + + /* last contiguous mapping is stored for error cases */ + if (mpts->mpts_lastmap.mptsl_dsn + + mpts->mpts_lastmap.mptsl_len == mpt_dsn) { + mpts->mpts_lastmap.mptsl_len += tot_sent; + } else if (MPTCP_SEQ_LT((mpts->mpts_lastmap.mptsl_dsn + + mpts->mpts_lastmap.mptsl_len), mpt_dsn)) { + if (m->m_pkthdr.mp_dsn == 0) + panic("%s %llu", __func__, mpt_dsn); + mpts->mpts_lastmap.mptsl_dsn = m->m_pkthdr.mp_dsn; + mpts->mpts_lastmap.mptsl_sseq = m->m_pkthdr.mp_rseq; + mpts->mpts_lastmap.mptsl_len = m->m_pkthdr.mp_rlen; + } + + error = sock_sendmbuf(so, NULL, m, 0, &len); + DTRACE_MPTCP7(send, struct mbuf *, m, struct socket *, so, + struct sockbuf *, &so->so_rcv, + struct sockbuf *, &so->so_snd, + struct mptses *, mpte, struct mptsub *, mpts, + size_t, mlen); + if (error != 0) { + mptcplog((LOG_ERR, "%s: len = %zd error = %d \n", + __func__, len, error)); + break; + } + mpts->mpts_sndnxt += mlen; + MPT_LOCK(mp_tp); + if (MPTCP_SEQ_LT(mp_tp->mpt_sndnxt, mpts->mpts_sndnxt)) { + if (MPTCP_DATASEQ_HIGH32(mpts->mpts_sndnxt) > + MPTCP_DATASEQ_HIGH32(mp_tp->mpt_sndnxt)) + mp_tp->mpt_flags |= MPTCPF_SND_64BITDSN; + mp_tp->mpt_sndnxt = mpts->mpts_sndnxt; + } + MPT_UNLOCK(mp_tp); + if (len != mlen) { + mptcplog((LOG_ERR, "%s: cid %d wrote %d " + "(expected %d)\n", __func__, + mpts->mpts_connid, len, mlen)); + } + tot_sent += mlen; + off = 0; + mpt_mbuf = mpt_mbuf->m_next; + } + + if (error != 0 && error != EWOULDBLOCK) { + mptcplog((LOG_ERR, "MPTCP ERROR %s: cid %d error %d\n", + __func__, mpts->mpts_connid, error)); + } if (error == 0) { + if ((mpts->mpts_connid == 2) || + (mpts->mpts_flags & MPTSF_MP_DEGRADED)) + mptcplog((LOG_DEBUG, "%s: cid %d wrote %d %d\n", + __func__, mpts->mpts_connid, tot_sent, + sb_cc)); + MPT_LOCK(mp_tp); + mptcp_cancel_timer(mp_tp, MPTT_REXMT); + MPT_UNLOCK(mp_tp); + } +out: + return (error); +} + +/* + * Subflow socket control event upcall. + * + * Called when the associated subflow socket posted one or more control events. + * The subflow socket lock has been released prior to invoking the callback. + * Note that the upcall may occur synchronously as a result of MPTCP performing + * an action on it, or asynchronously as a result of an event happening at the + * subflow layer. Therefore, to maintain lock ordering, the only lock that can + * be acquired here is the thread lock, for signalling purposes. + */ +static void +mptcp_subflow_eupcall(struct socket *so, void *arg, uint32_t events) +{ +#pragma unused(so) + struct mptsub *mpts = arg; + struct mptses *mpte = mpts->mpts_mpte; + + VERIFY(mpte != NULL); + + lck_mtx_lock(&mpte->mpte_thread_lock); + atomic_bitset_32(&mpts->mpts_evctl, events); + mptcp_thread_signal_locked(mpte); + lck_mtx_unlock(&mpte->mpte_thread_lock); +} + +/* + * Subflow socket control events. + * + * Called for handling events related to the underlying subflow socket. + */ +static ev_ret_t +mptcp_subflow_events(struct mptses *mpte, struct mptsub *mpts) +{ + uint32_t events; + ev_ret_t ret = MPTS_EVRET_OK; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + MPTS_LOCK_ASSERT_HELD(mpts); + + /* bail if there's nothing to process */ + if ((events = mpts->mpts_evctl) == 0) + return (ret); + + if (events & (SO_FILT_HINT_CONNRESET|SO_FILT_HINT_MUSTRST| + SO_FILT_HINT_CANTRCVMORE|SO_FILT_HINT_CANTSENDMORE| + SO_FILT_HINT_TIMEOUT|SO_FILT_HINT_NOSRCADDR| + SO_FILT_HINT_IFDENIED|SO_FILT_HINT_SUSPEND| + SO_FILT_HINT_DISCONNECTED)) { + events |= SO_FILT_HINT_MPFAILOVER; + } + + DTRACE_MPTCP3(subflow__events, struct mptses *, mpte, + struct mptsub *, mpts, uint32_t, events); + + mptcplog2((LOG_DEBUG, "%s: cid %d events=%b\n", __func__, + mpts->mpts_connid, events, SO_FILT_HINT_BITS)); + + if ((events & SO_FILT_HINT_MPFAILOVER) && (ret >= MPTS_EVRET_OK)) { + ev_ret_t error = mptcp_subflow_failover_ev(mpte, mpts); + events &= ~SO_FILT_HINT_MPFAILOVER; + ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); + } + if ((events & SO_FILT_HINT_CONNRESET) && (ret >= MPTS_EVRET_OK)) { + ev_ret_t error = mptcp_subflow_connreset_ev(mpte, mpts); + events &= ~SO_FILT_HINT_CONNRESET; + ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); + } + if ((events & SO_FILT_HINT_MUSTRST) && (ret >= MPTS_EVRET_OK)) { + ev_ret_t error = mptcp_subflow_mustrst_ev(mpte, mpts); + events &= ~SO_FILT_HINT_MUSTRST; + ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); + } + if ((events & SO_FILT_HINT_CANTRCVMORE) && (ret >= MPTS_EVRET_OK)) { + ev_ret_t error = mptcp_subflow_cantrcvmore_ev(mpte, mpts); + events &= ~SO_FILT_HINT_CANTRCVMORE; + ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); + } + if ((events & SO_FILT_HINT_CANTSENDMORE) && (ret >= MPTS_EVRET_OK)) { + ev_ret_t error = mptcp_subflow_cantsendmore_ev(mpte, mpts); + events &= ~SO_FILT_HINT_CANTSENDMORE; + ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); + } + if ((events & SO_FILT_HINT_TIMEOUT) && (ret >= MPTS_EVRET_OK)) { + ev_ret_t error = mptcp_subflow_timeout_ev(mpte, mpts); + events &= ~SO_FILT_HINT_TIMEOUT; + ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); + } + if ((events & SO_FILT_HINT_NOSRCADDR) && (ret >= MPTS_EVRET_OK)) { + ev_ret_t error = mptcp_subflow_nosrcaddr_ev(mpte, mpts); + events &= ~SO_FILT_HINT_NOSRCADDR; + ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); + } + if ((events & SO_FILT_HINT_IFDENIED) && (ret >= MPTS_EVRET_OK)) { + ev_ret_t error = mptcp_subflow_ifdenied_ev(mpte, mpts); + events &= ~SO_FILT_HINT_IFDENIED; + ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); + } + if ((events & SO_FILT_HINT_SUSPEND) && (ret >= MPTS_EVRET_OK)) { + ev_ret_t error = mptcp_subflow_suspend_ev(mpte, mpts); + events &= ~SO_FILT_HINT_SUSPEND; + ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); + } + if ((events & SO_FILT_HINT_RESUME) && (ret >= MPTS_EVRET_OK)) { + ev_ret_t error = mptcp_subflow_resume_ev(mpte, mpts); + events &= ~SO_FILT_HINT_RESUME; + ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); + } + if ((events & SO_FILT_HINT_CONNECTED) && (ret >= MPTS_EVRET_OK)) { + ev_ret_t error = mptcp_subflow_connected_ev(mpte, mpts); + events &= ~SO_FILT_HINT_CONNECTED; + ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); + } + if ((events & SO_FILT_HINT_MPSTATUS) && (ret >= MPTS_EVRET_OK)) { + ev_ret_t error = mptcp_subflow_mpstatus_ev(mpte, mpts); + events &= ~SO_FILT_HINT_MPSTATUS; + ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); + } + if ((events & SO_FILT_HINT_DISCONNECTED) && (ret >= MPTS_EVRET_OK)) { + ev_ret_t error = mptcp_subflow_disconnected_ev(mpte, mpts); + events &= ~SO_FILT_HINT_DISCONNECTED; + ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error); + } + /* + * We should be getting only events specified via sock_catchevents(), + * so loudly complain if we have any unprocessed one(s). + */ + if (events != 0 || ret < MPTS_EVRET_OK) { + mptcplog((LOG_ERR, "%s%s: cid %d evret %s (%d)" + " unhandled events=%b\n", + (events != 0) ? "MPTCP_ERROR " : "", + __func__, mpts->mpts_connid, + mptcp_evret2str(ret), ret, events, SO_FILT_HINT_BITS)); + } + + /* clear the ones we've processed */ + atomic_bitclear_32(&mpts->mpts_evctl, ~events); + + return (ret); +} + +/* + * Handle SO_FILT_HINT_CONNRESET subflow socket event. + */ +static ev_ret_t +mptcp_subflow_connreset_ev(struct mptses *mpte, struct mptsub *mpts) +{ + struct socket *mp_so, *so; + struct mptcb *mp_tp; + boolean_t linger; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + MPTS_LOCK_ASSERT_HELD(mpts); + VERIFY(mpte->mpte_mppcb != NULL); + mp_so = mpte->mpte_mppcb->mpp_socket; + mp_tp = mpte->mpte_mptcb; + so = mpts->mpts_socket; + + linger = (!(mpts->mpts_flags & MPTSF_DELETEOK) && + !(mp_so->so_flags & SOF_PCBCLEARING)); + + mptcplog((LOG_DEBUG, "%s: cid %d [linger %s]\n", __func__, + mpts->mpts_connid, (linger ? "YES" : "NO"))); + + if (mpts->mpts_soerror == 0) + mpts->mpts_soerror = ECONNREFUSED; + + /* + * We got a TCP RST for this subflow connection. + * + * Right now, we simply propagate ECONNREFUSED to the MPTCP socket + * client if the MPTCP connection has not been established. Otherwise + * we close the socket. + */ + mptcp_subflow_disconnect(mpte, mpts, !linger); + + MPT_LOCK(mp_tp); + if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) { + mp_so->so_error = ECONNREFUSED; + } + MPT_UNLOCK(mp_tp); + + /* + * Keep the subflow socket around, unless the MPTCP socket has + * been detached or the subflow has been disconnected explicitly, + * in which case it should be deleted right away. + */ + return (linger ? MPTS_EVRET_OK : MPTS_EVRET_DELETE); +} + +/* + * Handle SO_FILT_HINT_CANTRCVMORE subflow socket event. + */ +static ev_ret_t +mptcp_subflow_cantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts) +{ + struct socket *so; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + MPTS_LOCK_ASSERT_HELD(mpts); + + so = mpts->mpts_socket; + + mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__, mpts->mpts_connid)); + + /* + * We got a FIN for this subflow connection. This subflow socket + * is no longer available for receiving data; + * The FIN may arrive with data. The data is handed up to the + * mptcp socket and the subflow is disconnected. + */ + + return (MPTS_EVRET_OK); /* keep the subflow socket around */ +} + +/* + * Handle SO_FILT_HINT_CANTSENDMORE subflow socket event. + */ +static ev_ret_t +mptcp_subflow_cantsendmore_ev(struct mptses *mpte, struct mptsub *mpts) +{ + struct socket *so; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + MPTS_LOCK_ASSERT_HELD(mpts); + + so = mpts->mpts_socket; + + mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__, mpts->mpts_connid)); + return (MPTS_EVRET_OK); /* keep the subflow socket around */ +} + +/* + * Handle SO_FILT_HINT_TIMEOUT subflow socket event. + */ +static ev_ret_t +mptcp_subflow_timeout_ev(struct mptses *mpte, struct mptsub *mpts) +{ + struct socket *mp_so, *so; + struct mptcb *mp_tp; + boolean_t linger; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + MPTS_LOCK_ASSERT_HELD(mpts); + VERIFY(mpte->mpte_mppcb != NULL); + mp_so = mpte->mpte_mppcb->mpp_socket; + mp_tp = mpte->mpte_mptcb; + so = mpts->mpts_socket; + + linger = (!(mpts->mpts_flags & MPTSF_DELETEOK) && + !(mp_so->so_flags & SOF_PCBCLEARING)); + + mptcplog((LOG_NOTICE, "%s: cid %d [linger %s]\n", __func__, + mpts->mpts_connid, (linger ? "YES" : "NO"))); + + if (mpts->mpts_soerror == 0) + mpts->mpts_soerror = ETIMEDOUT; + + /* + * The subflow connection has timed out. + * + * Right now, we simply propagate ETIMEDOUT to the MPTCP socket + * client if the MPTCP connection has not been established. Otherwise + * drop it. + */ + mptcp_subflow_disconnect(mpte, mpts, !linger); + + MPT_LOCK(mp_tp); + if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) { + mp_so->so_error = ETIMEDOUT; + } + MPT_UNLOCK(mp_tp); + + /* + * Keep the subflow socket around, unless the MPTCP socket has + * been detached or the subflow has been disconnected explicitly, + * in which case it should be deleted right away. + */ + return (linger ? MPTS_EVRET_OK : MPTS_EVRET_DELETE); +} + +/* + * Handle SO_FILT_HINT_NOSRCADDR subflow socket event. + */ +static ev_ret_t +mptcp_subflow_nosrcaddr_ev(struct mptses *mpte, struct mptsub *mpts) +{ + struct socket *mp_so, *so; + struct mptcb *mp_tp; + boolean_t linger; + struct tcpcb *tp = NULL; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + MPTS_LOCK_ASSERT_HELD(mpts); + + VERIFY(mpte->mpte_mppcb != NULL); + mp_so = mpte->mpte_mppcb->mpp_socket; + mp_tp = mpte->mpte_mptcb; + so = mpts->mpts_socket; + + /* Not grabbing socket lock as t_local_aid is write once only */ + tp = intotcpcb(sotoinpcb(so)); + /* + * This overwrites any previous mpte_lost_aid to avoid storing + * too much state when the typical case has only two subflows. + */ + mpte->mpte_flags |= MPTE_SND_REM_ADDR; + mpte->mpte_lost_aid = tp->t_local_aid; + + linger = (!(mpts->mpts_flags & MPTSF_DELETEOK) && + !(mp_so->so_flags & SOF_PCBCLEARING)); + + mptcplog((LOG_DEBUG, "%s: cid %d [linger %s]\n", __func__, + mpts->mpts_connid, (linger ? "YES" : "NO"))); + + if (mpts->mpts_soerror == 0) + mpts->mpts_soerror = EADDRNOTAVAIL; + + /* + * The subflow connection has lost its source address. + * + * Right now, we simply propagate EADDRNOTAVAIL to the MPTCP socket + * client if the MPTCP connection has not been established. If it + * has been established with one subflow , we keep the MPTCP + * connection valid without any subflows till closed by application. + * This lets tcp connection manager decide whether to close this or + * not as it reacts to reachability changes too. + */ + mptcp_subflow_disconnect(mpte, mpts, !linger); + + MPT_LOCK(mp_tp); + if ((mp_tp->mpt_state < MPTCPS_ESTABLISHED) && + (mp_so->so_flags & SOF_NOADDRAVAIL)) { + mp_so->so_error = EADDRNOTAVAIL; + } + MPT_UNLOCK(mp_tp); + + /* + * Keep the subflow socket around, unless the MPTCP socket has + * been detached or the subflow has been disconnected explicitly, + * in which case it should be deleted right away. + */ + return (linger ? MPTS_EVRET_OK : MPTS_EVRET_DELETE); +} + +/* + * Handle SO_FILT_HINT_MPFAILOVER subflow socket event + */ +static ev_ret_t +mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts) +{ + struct mptsub *mpts_alt = NULL; + struct socket *so = NULL; + struct socket *mp_so; + int altpath_exists = 0; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + MPTS_LOCK_ASSERT_HELD(mpts); + mp_so = mpte->mpte_mppcb->mpp_socket; + mptcplog2((LOG_NOTICE, "%s: mp_so 0x%llx\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so))); + + MPTS_UNLOCK(mpts); + mpts_alt = mptcp_get_subflow(mpte, mpts); + + /* + * If there is no alternate eligible subflow, ignore the + * failover hint. + */ + if (mpts_alt == NULL) { + mptcplog2((LOG_WARNING, "%s: no alternate path\n", __func__)); + MPTS_LOCK(mpts); + goto done; + } + MPTS_LOCK(mpts_alt); + altpath_exists = 1; + so = mpts_alt->mpts_socket; + if (mpts_alt->mpts_flags & MPTSF_FAILINGOVER) { + socket_lock(so, 1); + /* All data acknowledged */ + if (so->so_snd.sb_cc == 0) { + so->so_flags &= ~SOF_MP_TRYFAILOVER; + mpts_alt->mpts_flags &= ~MPTSF_FAILINGOVER; + } else { + /* no alternate path available */ + altpath_exists = 0; + } + socket_unlock(so, 1); + } + if (altpath_exists) { + mpts_alt->mpts_flags |= MPTSF_ACTIVE; + struct mptcb *mp_tp = mpte->mpte_mptcb; + /* Bring the subflow's notion of snd_nxt into the send window */ + MPT_LOCK(mp_tp); + mpts_alt->mpts_sndnxt = mp_tp->mpt_snduna; + MPT_UNLOCK(mp_tp); + mpte->mpte_active_sub = mpts_alt; + socket_lock(so, 1); + sowwakeup(so); + socket_unlock(so, 1); + } + MPTS_UNLOCK(mpts_alt); + + if (altpath_exists) { + soevent(mp_so, + SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED); + mptcplog((LOG_NOTICE, "%s: mp_so 0x%llx switched from " + "%d to %d\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), + mpts->mpts_connid, mpts_alt->mpts_connid)); + tcpstat.tcps_mp_switches++; + } + + MPTS_LOCK(mpts); + if (altpath_exists) { + mpts->mpts_flags |= MPTSF_FAILINGOVER; + mpts->mpts_flags &= ~MPTSF_ACTIVE; + } else { + so = mpts->mpts_socket; + socket_lock(so, 1); + so->so_flags &= ~SOF_MP_TRYFAILOVER; + socket_unlock(so, 1); + } +done: + MPTS_LOCK_ASSERT_HELD(mpts); + return (MPTS_EVRET_OK); +} + +/* + * Handle SO_FILT_HINT_IFDENIED subflow socket event. + */ +static ev_ret_t +mptcp_subflow_ifdenied_ev(struct mptses *mpte, struct mptsub *mpts) +{ + struct socket *mp_so, *so; + struct mptcb *mp_tp; + boolean_t linger; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + MPTS_LOCK_ASSERT_HELD(mpts); + VERIFY(mpte->mpte_mppcb != NULL); + mp_so = mpte->mpte_mppcb->mpp_socket; + mp_tp = mpte->mpte_mptcb; + so = mpts->mpts_socket; + + linger = (!(mpts->mpts_flags & MPTSF_DELETEOK) && + !(mp_so->so_flags & SOF_PCBCLEARING)); + + mptcplog((LOG_DEBUG, "%s: cid %d [linger %s]\n", __func__, + mpts->mpts_connid, (linger ? "YES" : "NO"))); + + if (mpts->mpts_soerror == 0) + mpts->mpts_soerror = EHOSTUNREACH; + + /* + * The subflow connection cannot use the outgoing interface. + * + * Right now, we simply propagate EHOSTUNREACH to the MPTCP socket + * client if the MPTCP connection has not been established. If it + * has been established, let the upper layer call disconnectx. + */ + mptcp_subflow_disconnect(mpte, mpts, !linger); + MPTS_UNLOCK(mpts); + + soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_IFDENIED); + + MPT_LOCK(mp_tp); + if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) { + mp_so->so_error = EHOSTUNREACH; + } + MPT_UNLOCK(mp_tp); + + MPTS_LOCK(mpts); + /* + * Keep the subflow socket around, unless the MPTCP socket has + * been detached or the subflow has been disconnected explicitly, + * in which case it should be deleted right away. + */ + return (linger ? MPTS_EVRET_OK : MPTS_EVRET_DELETE); +} + +/* + * Handle SO_FILT_HINT_SUSPEND subflow socket event. + */ +static ev_ret_t +mptcp_subflow_suspend_ev(struct mptses *mpte, struct mptsub *mpts) +{ + struct socket *so; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + MPTS_LOCK_ASSERT_HELD(mpts); + + so = mpts->mpts_socket; + + /* the subflow connection is being flow controlled */ + mpts->mpts_flags |= MPTSF_SUSPENDED; + + mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__, + mpts->mpts_connid)); + + return (MPTS_EVRET_OK); /* keep the subflow socket around */ +} + +/* + * Handle SO_FILT_HINT_RESUME subflow socket event. + */ +static ev_ret_t +mptcp_subflow_resume_ev(struct mptses *mpte, struct mptsub *mpts) +{ + struct socket *so; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + MPTS_LOCK_ASSERT_HELD(mpts); + + so = mpts->mpts_socket; + + /* the subflow connection is no longer flow controlled */ + mpts->mpts_flags &= ~MPTSF_SUSPENDED; + + mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__, mpts->mpts_connid)); + + return (MPTS_EVRET_OK); /* keep the subflow socket around */ +} + +/* + * Handle SO_FILT_HINT_CONNECTED subflow socket event. + */ +static ev_ret_t +mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts) +{ + char buf0[MAX_IPv6_STR_LEN], buf1[MAX_IPv6_STR_LEN]; + struct sockaddr_entry *src_se, *dst_se; + struct sockaddr_storage src; + struct socket *mp_so, *so; + struct mptcb *mp_tp; + struct ifnet *outifp; + int af, error = 0; + boolean_t mpok = FALSE; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + VERIFY(mpte->mpte_mppcb != NULL); + mp_so = mpte->mpte_mppcb->mpp_socket; + mp_tp = mpte->mpte_mptcb; + + MPTS_LOCK_ASSERT_HELD(mpts); + so = mpts->mpts_socket; + af = mpts->mpts_family; + + if (mpts->mpts_flags & MPTSF_CONNECTED) + return (MPTS_EVRET_OK); + + if ((mpts->mpts_flags & MPTSF_DISCONNECTED) || + (mpts->mpts_flags & MPTSF_DISCONNECTING)) { + return (MPTS_EVRET_OK); + } + + /* + * The subflow connection has been connected. Find out whether it + * is connected as a regular TCP or as a MPTCP subflow. The idea is: + * + * a. If MPTCP connection is not yet established, then this must be + * the first subflow connection. If MPTCP failed to negotiate, + * indicate to the MPTCP socket client via EPROTO, that the + * underlying TCP connection may be peeled off via peeloff(2). + * Otherwise, mark the MPTCP socket as connected. + * + * b. If MPTCP connection has been established, then this must be + * one of the subsequent subflow connections. If MPTCP failed + * to negotiate, disconnect the connection since peeloff(2) + * is no longer possible. + * + * Right now, we simply unblock any waiters at the MPTCP socket layer + * if the MPTCP connection has not been established. + */ + socket_lock(so, 0); + + if (so->so_state & SS_ISDISCONNECTED) { + /* + * With MPTCP joins, a connection is connected at the subflow + * level, but the 4th ACK from the server elevates the MPTCP + * subflow to connected state. So there is a small window + * where the subflow could get disconnected before the + * connected event is processed. + */ + socket_unlock(so, 0); + return (MPTS_EVRET_OK); + } + + mpts->mpts_soerror = 0; + mpts->mpts_flags &= ~MPTSF_CONNECTING; + mpts->mpts_flags |= MPTSF_CONNECTED; + if (sototcpcb(so)->t_mpflags & TMPF_MPTCP_TRUE) + mpts->mpts_flags |= MPTSF_MP_CAPABLE; + + VERIFY(mpts->mpts_dst_sl != NULL); + dst_se = TAILQ_FIRST(&mpts->mpts_dst_sl->sl_head); + VERIFY(dst_se != NULL && dst_se->se_addr != NULL && + dst_se->se_addr->sa_family == af); + + VERIFY(mpts->mpts_src_sl != NULL); + src_se = TAILQ_FIRST(&mpts->mpts_src_sl->sl_head); + VERIFY(src_se != NULL && src_se->se_addr != NULL && + src_se->se_addr->sa_family == af); + + /* get/check source IP address */ + switch (af) { + case AF_INET: { + error = in_getsockaddr_s(so, &src); + if (error == 0) { + struct sockaddr_in *ms = SIN(src_se->se_addr); + struct sockaddr_in *s = SIN(&src); + + VERIFY(s->sin_len == ms->sin_len); + VERIFY(ms->sin_family == AF_INET); + + if ((mpts->mpts_flags & MPTSF_BOUND_IP) && + bcmp(&ms->sin_addr, &s->sin_addr, + sizeof (ms->sin_addr)) != 0) { + mptcplog((LOG_ERR, "%s: cid %d local " + "address %s (expected %s)\n", __func__, + mpts->mpts_connid, inet_ntop(AF_INET, + (void *)&s->sin_addr.s_addr, buf0, + sizeof (buf0)), inet_ntop(AF_INET, + (void *)&ms->sin_addr.s_addr, buf1, + sizeof (buf1)))); + } + bcopy(s, ms, sizeof (*s)); + } + break; + } +#if INET6 + case AF_INET6: { + error = in6_getsockaddr_s(so, &src); + if (error == 0) { + struct sockaddr_in6 *ms = SIN6(src_se->se_addr); + struct sockaddr_in6 *s = SIN6(&src); + + VERIFY(s->sin6_len == ms->sin6_len); + VERIFY(ms->sin6_family == AF_INET6); + + if ((mpts->mpts_flags & MPTSF_BOUND_IP) && + bcmp(&ms->sin6_addr, &s->sin6_addr, + sizeof (ms->sin6_addr)) != 0) { + mptcplog((LOG_ERR, "%s: cid %d local " + "address %s (expected %s)\n", __func__, + mpts->mpts_connid, inet_ntop(AF_INET6, + (void *)&s->sin6_addr, buf0, + sizeof (buf0)), inet_ntop(AF_INET6, + (void *)&ms->sin6_addr, buf1, + sizeof (buf1)))); + } + bcopy(s, ms, sizeof (*s)); + } + break; + } +#endif /* INET6 */ + default: + VERIFY(0); + /* NOTREACHED */ + } + + if (error != 0) { + mptcplog((LOG_ERR, "%s: cid %d getsockaddr failed (%d)\n", + __func__, mpts->mpts_connid, error)); + } + + /* get/verify the outbound interface */ + outifp = sotoinpcb(so)->inp_last_outifp; /* could be NULL */ + if (mpts->mpts_flags & MPTSF_BOUND_IF) { + VERIFY(mpts->mpts_outif != NULL); + if (mpts->mpts_outif != outifp) { + mptcplog((LOG_ERR, "%s: cid %d outif %s " + "(expected %s)\n", __func__, mpts->mpts_connid, + ((outifp != NULL) ? outifp->if_xname : "NULL"), + mpts->mpts_outif->if_xname)); + if (outifp == NULL) + outifp = mpts->mpts_outif; + } + } else { + mpts->mpts_outif = outifp; + } + + socket_unlock(so, 0); + + mptcplog((LOG_DEBUG, "%s: cid %d outif %s %s[%d] -> %s[%d] " + "is %s\n", __func__, mpts->mpts_connid, ((outifp != NULL) ? + outifp->if_xname : "NULL"), inet_ntop(af, (af == AF_INET) ? + (void *)&SIN(src_se->se_addr)->sin_addr.s_addr : + (void *)&SIN6(src_se->se_addr)->sin6_addr, buf0, sizeof (buf0)), + ((af == AF_INET) ? ntohs(SIN(src_se->se_addr)->sin_port) : + ntohs(SIN6(src_se->se_addr)->sin6_port)), + inet_ntop(af, ((af == AF_INET) ? + (void *)&SIN(dst_se->se_addr)->sin_addr.s_addr : + (void *)&SIN6(dst_se->se_addr)->sin6_addr), buf1, sizeof (buf1)), + ((af == AF_INET) ? ntohs(SIN(dst_se->se_addr)->sin_port) : + ntohs(SIN6(dst_se->se_addr)->sin6_port)), + ((mpts->mpts_flags & MPTSF_MP_CAPABLE) ? + "MPTCP capable" : "a regular TCP"))); + + mpok = (mpts->mpts_flags & MPTSF_MP_CAPABLE); + MPTS_UNLOCK(mpts); + + soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED); + + MPT_LOCK(mp_tp); + if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) { + /* case (a) above */ + if (!mpok) { + mp_tp->mpt_flags |= MPTCPF_PEEL_OFF; + (void) mptcp_drop(mpte, mp_tp, EPROTO); + MPT_UNLOCK(mp_tp); + } else { + if (mptcp_init_authparms(mp_tp) != 0) { + mp_tp->mpt_flags |= MPTCPF_PEEL_OFF; + (void) mptcp_drop(mpte, mp_tp, EPROTO); + MPT_UNLOCK(mp_tp); + mpok = FALSE; + } else { + mp_tp->mpt_state = MPTCPS_ESTABLISHED; + mpte->mpte_associd = mpts->mpts_connid; + DTRACE_MPTCP2(state__change, + struct mptcb *, mp_tp, + uint32_t, 0 /* event */); + mptcp_init_statevars(mp_tp); + MPT_UNLOCK(mp_tp); + + (void) mptcp_setconnorder(mpte, + mpts->mpts_connid, 1); + soisconnected(mp_so); + } + } + MPTS_LOCK(mpts); + if (mpok) { + /* Initialize the relative sequence number */ + mpts->mpts_rel_seq = 1; + mpts->mpts_flags |= MPTSF_MPCAP_CTRSET; + mpte->mpte_nummpcapflows++; + MPT_LOCK_SPIN(mp_tp); + mpts->mpts_sndnxt = mp_tp->mpt_snduna; + MPT_UNLOCK(mp_tp); + } + } else if (mpok) { + MPT_UNLOCK(mp_tp); + /* + * case (b) above + * In case of additional flows, the MPTCP socket is not + * MPTSF_MP_CAPABLE until an ACK is received from server + * for 3-way handshake. TCP would have guaranteed that this + * is an MPTCP subflow. + */ + MPTS_LOCK(mpts); + mpts->mpts_flags |= MPTSF_MPCAP_CTRSET; + mpte->mpte_nummpcapflows++; + mpts->mpts_rel_seq = 1; + MPT_LOCK_SPIN(mp_tp); + mpts->mpts_sndnxt = mp_tp->mpt_snduna; + MPT_UNLOCK(mp_tp); + } + MPTS_LOCK_ASSERT_HELD(mpts); + + return (MPTS_EVRET_OK); /* keep the subflow socket around */ +} + +/* + * Handle SO_FILT_HINT_DISCONNECTED subflow socket event. + */ +static ev_ret_t +mptcp_subflow_disconnected_ev(struct mptses *mpte, struct mptsub *mpts) +{ + struct socket *mp_so, *so; + struct mptcb *mp_tp; + boolean_t linger; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + MPTS_LOCK_ASSERT_HELD(mpts); + VERIFY(mpte->mpte_mppcb != NULL); + mp_so = mpte->mpte_mppcb->mpp_socket; + mp_tp = mpte->mpte_mptcb; + so = mpts->mpts_socket; + + linger = (!(mpts->mpts_flags & MPTSF_DELETEOK) && + !(mp_so->so_flags & SOF_PCBCLEARING)); + + mptcplog2((LOG_DEBUG, "%s: cid %d [linger %s]\n", __func__, + mpts->mpts_connid, (linger ? "YES" : "NO"))); + + if (mpts->mpts_flags & MPTSF_DISCONNECTED) + return (linger ? MPTS_EVRET_OK : MPTS_EVRET_DELETE); + + /* + * Clear flags that are used by getconninfo to return state. + * Retain like MPTSF_DELETEOK, MPTSF_ACTIVE for internal purposes. + */ + mpts->mpts_flags &= ~(MPTSF_CONNECTING|MPTSF_CONNECT_PENDING| + MPTSF_CONNECTED|MPTSF_DISCONNECTING|MPTSF_PREFERRED| + MPTSF_MP_CAPABLE|MPTSF_MP_READY|MPTSF_MP_DEGRADED| + MPTSF_SUSPENDED|MPTSF_ACTIVE); + mpts->mpts_flags |= MPTSF_DISCONNECTED; + + /* + * The subflow connection has been disconnected. + * + * Right now, we simply unblock any waiters at the MPTCP socket layer + * if the MPTCP connection has not been established. + */ + MPTS_UNLOCK(mpts); + + soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED); + + if (mpts->mpts_flags & MPTSF_MPCAP_CTRSET) { + mpte->mpte_nummpcapflows--; + mpts->mpts_flags &= ~MPTSF_MPCAP_CTRSET; + } + + MPT_LOCK(mp_tp); + if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) { + MPT_UNLOCK(mp_tp); + soisdisconnected(mp_so); + } else { + MPT_UNLOCK(mp_tp); + } + + MPTS_LOCK(mpts); + /* + * The underlying subflow socket has been disconnected; + * it is no longer useful to us. Keep the subflow socket + * around, unless the MPTCP socket has been detached or + * the subflow has been disconnected explicitly, in which + * case it should be deleted right away. + */ + return (linger ? MPTS_EVRET_OK : MPTS_EVRET_DELETE); +} + +/* + * Handle SO_FILT_HINT_MPSTATUS subflow socket event + */ +static ev_ret_t +mptcp_subflow_mpstatus_ev(struct mptses *mpte, struct mptsub *mpts) +{ + struct socket *mp_so, *so; + struct mptcb *mp_tp; + ev_ret_t ret = MPTS_EVRET_OK_UPDATE; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + VERIFY(mpte->mpte_mppcb != NULL); + mp_so = mpte->mpte_mppcb->mpp_socket; + mp_tp = mpte->mpte_mptcb; + + MPTS_LOCK_ASSERT_HELD(mpts); + so = mpts->mpts_socket; + + socket_lock(so, 0); + MPT_LOCK(mp_tp); + + if (sototcpcb(so)->t_mpflags & TMPF_MPTCP_TRUE) + mpts->mpts_flags |= MPTSF_MP_CAPABLE; + else + mpts->mpts_flags &= ~MPTSF_MP_CAPABLE; + + if (sototcpcb(so)->t_mpflags & TMPF_TCP_FALLBACK) { + if (mpts->mpts_flags & MPTSF_MP_DEGRADED) + goto done; + mpts->mpts_flags |= MPTSF_MP_DEGRADED; + } + else + mpts->mpts_flags &= ~MPTSF_MP_DEGRADED; + + if (sototcpcb(so)->t_mpflags & TMPF_MPTCP_READY) + mpts->mpts_flags |= MPTSF_MP_READY; + else + mpts->mpts_flags &= ~MPTSF_MP_READY; + + if (mpts->mpts_flags & MPTSF_MP_DEGRADED) { + mp_tp->mpt_flags |= MPTCPF_FALLBACK_TO_TCP; + mp_tp->mpt_flags &= ~MPTCPF_JOIN_READY; + } + + if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) { + VERIFY(!(mp_tp->mpt_flags & MPTCPF_JOIN_READY)); + ret = MPTS_EVRET_DISCONNECT_FALLBACK; + } else if (mpts->mpts_flags & MPTSF_MP_READY) { + mp_tp->mpt_flags |= MPTCPF_JOIN_READY; + ret = MPTS_EVRET_CONNECT_PENDING; + } + + mptcplog2((LOG_DEBUG, "%s: mp_so 0x%llx mpt_flags=%b cid %d " + "mptsf=%b\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mpte->mpte_mppcb->mpp_socket), + mp_tp->mpt_flags, MPTCPF_BITS, mpts->mpts_connid, + mpts->mpts_flags, MPTSF_BITS)); +done: + MPT_UNLOCK(mp_tp); + socket_unlock(so, 0); + + return (ret); +} + +/* + * Handle SO_FILT_HINT_MUSTRST subflow socket event + */ +static ev_ret_t +mptcp_subflow_mustrst_ev(struct mptses *mpte, struct mptsub *mpts) +{ + struct socket *mp_so, *so; + struct mptcb *mp_tp; + boolean_t linger; + + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + MPTS_LOCK_ASSERT_HELD(mpts); + VERIFY(mpte->mpte_mppcb != NULL); + mp_so = mpte->mpte_mppcb->mpp_socket; + mp_tp = mpte->mpte_mptcb; + so = mpts->mpts_socket; + + linger = (!(mpts->mpts_flags & MPTSF_DELETEOK) && + !(mp_so->so_flags & SOF_PCBCLEARING)); + + if (mpts->mpts_soerror == 0) + mpts->mpts_soerror = ECONNABORTED; + + so->so_error = ECONNABORTED; + + /* We got an invalid option or a fast close */ + socket_lock(so, 0); + struct tcptemp *t_template; + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = NULL; + + tp = intotcpcb(inp); + + t_template = tcp_maketemplate(tp); + if (t_template) { + unsigned int ifscope, nocell = 0; + + if (inp->inp_flags & INP_BOUND_IF) + ifscope = inp->inp_boundifp->if_index; + else + ifscope = IFSCOPE_NONE; + + if (inp->inp_flags & INP_NO_IFT_CELLULAR) + nocell = 1; + + tcp_respond(tp, t_template->tt_ipgen, + &t_template->tt_t, (struct mbuf *)NULL, + tp->rcv_nxt, tp->snd_una, TH_RST, ifscope, nocell); + (void) m_free(dtom(t_template)); + mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx cid %d \n", + __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), + so, mpts->mpts_connid)); + } + socket_unlock(so, 0); + mptcp_subflow_disconnect(mpte, mpts, !linger); + MPTS_UNLOCK(mpts); + + soevent(mp_so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNINFO_UPDATED); + + MPT_LOCK(mp_tp); + if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) { + mp_so->so_error = ECONNABORTED; + } + MPT_UNLOCK(mp_tp); + + MPTS_LOCK(mpts); + /* + * Keep the subflow socket around unless the subflow has been + * disconnected explicitly. + */ + return (linger ? MPTS_EVRET_OK : MPTS_EVRET_DELETE); +} + +static const char * +mptcp_evret2str(ev_ret_t ret) +{ + const char *c = "UNKNOWN"; + + switch (ret) { + case MPTS_EVRET_DELETE: + c = "MPTS_EVRET_DELETE"; + break; + case MPTS_EVRET_CONNECT_PENDING: + c = "MPTS_EVRET_CONNECT_PENDING"; + break; + case MPTS_EVRET_DISCONNECT_FALLBACK: + c = "MPTS_EVRET_DISCONNECT_FALLBACK"; + break; + case MPTS_EVRET_OK: + c = "MPTS_EVRET_OK"; + break; + case MPTS_EVRET_OK_UPDATE: + c = "MPTS_EVRET_OK_UPDATE"; + break; + } + return (c); +} + +/* + * Add a reference to a subflow structure; used by MPTS_ADDREF(). + */ +void +mptcp_subflow_addref(struct mptsub *mpts, int locked) +{ + if (!locked) + MPTS_LOCK(mpts); + else + MPTS_LOCK_ASSERT_HELD(mpts); + + if (++mpts->mpts_refcnt == 0) { + panic("%s: mpts %p wraparound refcnt\n", __func__, mpts); + /* NOTREACHED */ + } + if (!locked) + MPTS_UNLOCK(mpts); +} + +/* + * Remove a reference held on a subflow structure; used by MPTS_REMREF(); + */ +void +mptcp_subflow_remref(struct mptsub *mpts) +{ + MPTS_LOCK(mpts); + if (mpts->mpts_refcnt == 0) { + panic("%s: mpts %p negative refcnt\n", __func__, mpts); + /* NOTREACHED */ + } + if (--mpts->mpts_refcnt > 0) { + MPTS_UNLOCK(mpts); + return; + } + /* callee will unlock and destroy lock */ + mptcp_subflow_free(mpts); +} + +/* + * Issues SOPT_SET on an MPTCP subflow socket; socket must already be locked, + * caller must ensure that the option can be issued on subflow sockets, via + * MPOF_SUBFLOW_OK flag. + */ +int +mptcp_subflow_sosetopt(struct mptses *mpte, struct socket *so, + struct mptopt *mpo) +{ + struct socket *mp_so; + struct sockopt sopt; + char buf[32]; + int error; + + VERIFY(mpo->mpo_flags & MPOF_SUBFLOW_OK); + mpo->mpo_flags &= ~MPOF_INTERIM; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + mp_so = mpte->mpte_mppcb->mpp_socket; + + bzero(&sopt, sizeof (sopt)); + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = mpo->mpo_level; + sopt.sopt_name = mpo->mpo_name; + sopt.sopt_val = CAST_USER_ADDR_T(&mpo->mpo_intval); + sopt.sopt_valsize = sizeof (int); + sopt.sopt_p = kernproc; + + error = sosetoptlock(so, &sopt, 0); /* already locked */ + if (error == 0) { + mptcplog2((LOG_DEBUG, "%s: mp_so 0x%llx sopt %s " + "val %d set successful\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), + mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name, + buf, sizeof (buf)), mpo->mpo_intval)); + } else { + mptcplog((LOG_ERR, "%s: mp_so 0x%llx sopt %s " + "val %d set error %d\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), + mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name, + buf, sizeof (buf)), mpo->mpo_intval, error)); + } + return (error); +} + +/* + * Issues SOPT_GET on an MPTCP subflow socket; socket must already be locked, + * caller must ensure that the option can be issued on subflow sockets, via + * MPOF_SUBFLOW_OK flag. + */ +int +mptcp_subflow_sogetopt(struct mptses *mpte, struct socket *so, + struct mptopt *mpo) +{ + struct socket *mp_so; + struct sockopt sopt; + char buf[32]; + int error; + + VERIFY(mpo->mpo_flags & MPOF_SUBFLOW_OK); + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + mp_so = mpte->mpte_mppcb->mpp_socket; + + bzero(&sopt, sizeof (sopt)); + sopt.sopt_dir = SOPT_GET; + sopt.sopt_level = mpo->mpo_level; + sopt.sopt_name = mpo->mpo_name; + sopt.sopt_val = CAST_USER_ADDR_T(&mpo->mpo_intval); + sopt.sopt_valsize = sizeof (int); + sopt.sopt_p = kernproc; + + error = sogetoptlock(so, &sopt, 0); /* already locked */ + if (error == 0) { + mptcplog2((LOG_DEBUG, "%s: mp_so 0x%llx sopt %s " + "val %d get successful\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), + mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name, + buf, sizeof (buf)), mpo->mpo_intval)); + } else { + mptcplog((LOG_ERR, "%s: mp_so 0x%llx sopt %s get error %d\n", + __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), + mptcp_sopt2str(mpo->mpo_level, + mpo->mpo_name, buf, sizeof (buf)), error)); + } + return (error); +} + + +/* + * MPTCP garbage collector. + * + * This routine is called by the MP domain on-demand, periodic callout, + * which is triggered when a MPTCP socket is closed. The callout will + * repeat as long as this routine returns a non-zero value. + */ +static uint32_t +mptcp_gc(struct mppcbinfo *mppi) +{ + struct mppcb *mpp, *tmpp; + uint32_t active = 0; + + lck_mtx_assert(&mppi->mppi_lock, LCK_MTX_ASSERT_OWNED); + + mptcplog3((LOG_DEBUG, "%s: running\n", __func__)); + + TAILQ_FOREACH_SAFE(mpp, &mppi->mppi_pcbs, mpp_entry, tmpp) { + struct socket *mp_so; + struct mptses *mpte; + struct mptcb *mp_tp; + + VERIFY(mpp->mpp_flags & MPP_ATTACHED); + mp_so = mpp->mpp_socket; + VERIFY(mp_so != NULL); + mpte = mptompte(mpp); + VERIFY(mpte != NULL); + mp_tp = mpte->mpte_mptcb; + VERIFY(mp_tp != NULL); + + mptcplog3((LOG_DEBUG, "%s: mp_so 0x%llx found " + "(u=%d,r=%d,s=%d)\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mp_so->so_usecount, + mp_so->so_retaincnt, mpp->mpp_state)); + + if (!lck_mtx_try_lock(&mpp->mpp_lock)) { + mptcplog3((LOG_DEBUG, "%s: mp_so 0x%llx skipped " + "(u=%d,r=%d)\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), + mp_so->so_usecount, mp_so->so_retaincnt)); + active++; + continue; + } + + /* check again under the lock */ + if (mp_so->so_usecount > 1) { + boolean_t wakeup = FALSE; + struct mptsub *mpts, *tmpts; + + mptcplog3((LOG_DEBUG, "%s: mp_so 0x%llx skipped " + "[u=%d,r=%d] %d %d\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), + mp_so->so_usecount, mp_so->so_retaincnt, + mp_tp->mpt_gc_ticks, + mp_tp->mpt_state)); + MPT_LOCK(mp_tp); + if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_1) { + if (mp_tp->mpt_gc_ticks > 0) + mp_tp->mpt_gc_ticks--; + if (mp_tp->mpt_gc_ticks == 0) { + wakeup = TRUE; + if (mp_tp->mpt_localkey != NULL) { + mptcp_free_key( + mp_tp->mpt_localkey); + mp_tp->mpt_localkey = NULL; + } + } + } + MPT_UNLOCK(mp_tp); + if (wakeup) { + TAILQ_FOREACH_SAFE(mpts, + &mpte->mpte_subflows, mpts_entry, tmpts) { + MPTS_LOCK(mpts); + mpts->mpts_flags |= MPTSF_DELETEOK; + if (mpts->mpts_soerror == 0) + mpts->mpts_soerror = ETIMEDOUT; + mptcp_subflow_eupcall(mpts->mpts_socket, + mpts, SO_FILT_HINT_DISCONNECTED); + MPTS_UNLOCK(mpts); + } + } + lck_mtx_unlock(&mpp->mpp_lock); + active++; + continue; + } + + if (mpp->mpp_state != MPPCB_STATE_DEAD) { + mptcplog3((LOG_DEBUG, "%s: mp_so 0x%llx skipped " + "[u=%d,r=%d,s=%d]\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), + mp_so->so_usecount, mp_so->so_retaincnt, + mpp->mpp_state)); + lck_mtx_unlock(&mpp->mpp_lock); + active++; + continue; + } + + /* + * The PCB has been detached, and there is exactly 1 refnct + * held by the MPTCP thread. Signal that thread to terminate, + * after which the last refcnt will be released. That will + * allow it to be destroyed below during the next round. + */ + if (mp_so->so_usecount == 1) { + mptcplog2((LOG_DEBUG, "%s: mp_so 0x%llx scheduled for " + "termination [u=%d,r=%d]\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), + mp_so->so_usecount, mp_so->so_retaincnt)); + /* signal MPTCP thread to terminate */ + mptcp_thread_terminate_signal(mpte); + lck_mtx_unlock(&mpp->mpp_lock); + active++; + continue; + } + + mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx destroyed [u=%d,r=%d]\n", + __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), + mp_so->so_usecount, mp_so->so_retaincnt)); + DTRACE_MPTCP4(dispose, struct socket *, mp_so, + struct sockbuf *, &mp_so->so_rcv, + struct sockbuf *, &mp_so->so_snd, + struct mppcb *, mpp); + + mp_pcbdispose(mpp); + } + + return (active); +} + +/* + * Drop a MPTCP connection, reporting the specified error. + */ +struct mptses * +mptcp_drop(struct mptses *mpte, struct mptcb *mp_tp, int errno) +{ + struct socket *mp_so; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + MPT_LOCK_ASSERT_HELD(mp_tp); + VERIFY(mpte->mpte_mptcb == mp_tp); + mp_so = mpte->mpte_mppcb->mpp_socket; + + mp_tp->mpt_state = MPTCPS_CLOSED; + DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, + uint32_t, 0 /* event */); + + if (errno == ETIMEDOUT && mp_tp->mpt_softerror != 0) + errno = mp_tp->mpt_softerror; + mp_so->so_error = errno; + + return (mptcp_close(mpte, mp_tp)); +} + +/* + * Close a MPTCP control block. + */ +struct mptses * +mptcp_close(struct mptses *mpte, struct mptcb *mp_tp) +{ + struct socket *mp_so; + struct mptsub *mpts, *tmpts; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + MPT_LOCK_ASSERT_HELD(mp_tp); + VERIFY(mpte->mpte_mptcb == mp_tp); + mp_so = mpte->mpte_mppcb->mpp_socket; + if (mp_tp->mpt_localkey != NULL) { + mptcp_free_key(mp_tp->mpt_localkey); + mp_tp->mpt_localkey = NULL; + } + + MPT_UNLOCK(mp_tp); + soisdisconnected(mp_so); + + MPT_LOCK(mp_tp); + if (mp_tp->mpt_flags & MPTCPF_PEEL_OFF) { + return (NULL); + } + MPT_UNLOCK(mp_tp); + + /* Clean up all subflows */ + TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) { + MPTS_LOCK(mpts); + mptcp_subflow_disconnect(mpte, mpts, TRUE); + MPTS_UNLOCK(mpts); + mptcp_subflow_del(mpte, mpts, TRUE); + } + MPT_LOCK(mp_tp); + + return (NULL); +} + +void +mptcp_notify_close(struct socket *so) +{ + soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_DISCONNECTED)); +} + +/* + * Signal MPTCP thread to wake up. + */ +void +mptcp_thread_signal(struct mptses *mpte) +{ + lck_mtx_lock(&mpte->mpte_thread_lock); + mptcp_thread_signal_locked(mpte); + lck_mtx_unlock(&mpte->mpte_thread_lock); +} + +/* + * Signal MPTCP thread to wake up (locked version) + */ +static void +mptcp_thread_signal_locked(struct mptses *mpte) +{ + lck_mtx_assert(&mpte->mpte_thread_lock, LCK_MTX_ASSERT_OWNED); + + mpte->mpte_thread_reqs++; + if (!mpte->mpte_thread_active && mpte->mpte_thread != THREAD_NULL) + wakeup_one((caddr_t)&mpte->mpte_thread); +} + +/* + * Signal MPTCP thread to terminate. + */ +static void +mptcp_thread_terminate_signal(struct mptses *mpte) +{ + lck_mtx_lock(&mpte->mpte_thread_lock); + if (mpte->mpte_thread != THREAD_NULL) { + mpte->mpte_thread = THREAD_NULL; + mpte->mpte_thread_reqs++; + if (!mpte->mpte_thread_active) + wakeup_one((caddr_t)&mpte->mpte_thread); + } + lck_mtx_unlock(&mpte->mpte_thread_lock); +} + +/* + * MPTCP thread workloop. + */ +static void +mptcp_thread_dowork(struct mptses *mpte) +{ + struct socket *mp_so; + struct mptsub *mpts, *tmpts; + boolean_t connect_pending = FALSE, disconnect_fallback = FALSE; + boolean_t conninfo_update = FALSE; + + MPTE_LOCK(mpte); /* same as MP socket lock */ + VERIFY(mpte->mpte_mppcb != NULL); + mp_so = mpte->mpte_mppcb->mpp_socket; + VERIFY(mp_so != NULL); + + TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) { + ev_ret_t ret; + + MPTS_LOCK(mpts); + MPTS_ADDREF_LOCKED(mpts); /* for us */ + + /* Update process ownership based on parent mptcp socket */ + mptcp_update_last_owner(mpts, mp_so); + + mptcp_subflow_input(mpte, mpts); + ret = mptcp_subflow_events(mpte, mpts); + + if (mpts->mpts_flags & MPTSF_ACTIVE) { + mptcplog3((LOG_INFO, "%s: cid %d \n", __func__, + mpts->mpts_connid)); + (void) mptcp_subflow_output(mpte, mpts); + } + + /* + * If MPTCP socket is closed, disconnect all subflows. + * This will generate a disconnect event which will + * be handled during the next iteration, causing a + * non-zero error to be returned above. + */ + if (mp_so->so_flags & SOF_PCBCLEARING) + mptcp_subflow_disconnect(mpte, mpts, FALSE); + MPTS_UNLOCK(mpts); + + switch (ret) { + case MPTS_EVRET_OK_UPDATE: + conninfo_update = TRUE; + break; + case MPTS_EVRET_OK: + /* nothing to do */ + break; + case MPTS_EVRET_DELETE: + if (mptcp_delete_ok(mpte, mpts)) { + mptcp_subflow_del(mpte, mpts, TRUE); + } + break; + case MPTS_EVRET_CONNECT_PENDING: + connect_pending = TRUE; + break; + case MPTS_EVRET_DISCONNECT_FALLBACK: + disconnect_fallback = TRUE; + break; + } + MPTS_REMREF(mpts); /* ours */ + } + + if (conninfo_update) { + soevent(mp_so, SO_FILT_HINT_LOCKED | + SO_FILT_HINT_CONNINFO_UPDATED); + } + + if (!connect_pending && !disconnect_fallback) { + MPTE_UNLOCK(mpte); + return; + } + + TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) { + MPTS_LOCK(mpts); + if (disconnect_fallback) { + struct socket *so = NULL; + struct inpcb *inp = NULL; + struct tcpcb *tp = NULL; + + if (mpts->mpts_flags & MPTSF_MP_DEGRADED) { + MPTS_UNLOCK(mpts); + continue; + } + + mpts->mpts_flags |= MPTSF_MP_DEGRADED; + + if (mpts->mpts_flags & (MPTSF_DISCONNECTING| + MPTSF_DISCONNECTED)) { + MPTS_UNLOCK(mpts); + continue; + } + so = mpts->mpts_socket; + + /* + * The MPTCP connection has degraded to a fallback + * mode, so there is no point in keeping this subflow + * regardless of its MPTCP-readiness state, unless it + * is the primary one which we use for fallback. This + * assumes that the subflow used for fallback is the + * ACTIVE one. + */ + + socket_lock(so, 1); + inp = sotoinpcb(so); + tp = intotcpcb(inp); + tp->t_mpflags &= + ~(TMPF_MPTCP_READY|TMPF_MPTCP_TRUE); + tp->t_mpflags |= TMPF_TCP_FALLBACK; + if (mpts->mpts_flags & MPTSF_ACTIVE) { + socket_unlock(so, 1); + MPTS_UNLOCK(mpts); + continue; + } + tp->t_mpflags |= TMPF_RESET; + soevent(so, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST); + socket_unlock(so, 1); + + } else if (connect_pending) { + /* + * The MPTCP connection has progressed to a state + * where it supports full multipath semantics; allow + * additional joins to be attempted for all subflows + * that are in the PENDING state. + */ + if (mpts->mpts_flags & MPTSF_CONNECT_PENDING) { + (void) mptcp_subflow_soconnectx(mpte, mpts); + } + } + MPTS_UNLOCK(mpts); + } + + MPTE_UNLOCK(mpte); +} + +/* + * MPTCP thread. + */ +static void +mptcp_thread_func(void *v, wait_result_t w) +{ +#pragma unused(w) + struct mptses *mpte = v; + struct timespec *ts = NULL; + + VERIFY(mpte != NULL); + + lck_mtx_lock_spin(&mpte->mpte_thread_lock); + + for (;;) { + lck_mtx_assert(&mpte->mpte_thread_lock, LCK_MTX_ASSERT_OWNED); + + if (mpte->mpte_thread != THREAD_NULL) { + (void) msleep(&mpte->mpte_thread, + &mpte->mpte_thread_lock, (PZERO - 1) | PSPIN, + __func__, ts); + } + + /* MPTCP socket is closed? */ + if (mpte->mpte_thread == THREAD_NULL) { + lck_mtx_unlock(&mpte->mpte_thread_lock); + /* callee will destroy thread lock */ + mptcp_thread_destroy(mpte); + /* NOTREACHED */ + return; + } + + mpte->mpte_thread_active = 1; + for (;;) { + uint32_t reqs = mpte->mpte_thread_reqs; + + lck_mtx_unlock(&mpte->mpte_thread_lock); + mptcp_thread_dowork(mpte); + lck_mtx_lock_spin(&mpte->mpte_thread_lock); + + /* if there's no pending request, we're done */ + if (reqs == mpte->mpte_thread_reqs || + mpte->mpte_thread == THREAD_NULL) + break; + } + mpte->mpte_thread_reqs = 0; + mpte->mpte_thread_active = 0; + } +} + +/* + * Destroy a MTCP thread, to be called in the MPTCP thread context + * upon receiving an indication to self-terminate. This routine + * will not return, as the current thread is terminated at the end. + */ +static void +mptcp_thread_destroy(struct mptses *mpte) +{ + struct socket *mp_so; + + MPTE_LOCK(mpte); /* same as MP socket lock */ + VERIFY(mpte->mpte_thread == THREAD_NULL); + VERIFY(mpte->mpte_mppcb != NULL); + + mptcp_sesdestroy(mpte); + + mp_so = mpte->mpte_mppcb->mpp_socket; + VERIFY(mp_so != NULL); + VERIFY(mp_so->so_usecount != 0); + mp_so->so_usecount--; /* for thread */ + mpte->mpte_mppcb->mpp_flags |= MPP_DEFUNCT; + MPTE_UNLOCK(mpte); + + /* for the extra refcnt from kernel_thread_start() */ + thread_deallocate(current_thread()); + /* this is the end */ + thread_terminate(current_thread()); + /* NOTREACHED */ +} + +/* + * Protocol pr_lock callback. + */ +int +mptcp_lock(struct socket *mp_so, int refcount, void *lr) +{ + struct mppcb *mpp = sotomppcb(mp_so); + void *lr_saved; + + if (lr == NULL) + lr_saved = __builtin_return_address(0); + else + lr_saved = lr; + + if (mpp == NULL) { + panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__, + mp_so, lr_saved, solockhistory_nr(mp_so)); + /* NOTREACHED */ + } + lck_mtx_lock(&mpp->mpp_lock); + + if (mp_so->so_usecount < 0) { + panic("%s: so=%p so_pcb=%p lr=%p ref=%x lrh= %s\n", __func__, + mp_so, mp_so->so_pcb, lr_saved, mp_so->so_usecount, + solockhistory_nr(mp_so)); + /* NOTREACHED */ + } + if (refcount != 0) + mp_so->so_usecount++; + mp_so->lock_lr[mp_so->next_lock_lr] = lr_saved; + mp_so->next_lock_lr = (mp_so->next_lock_lr + 1) % SO_LCKDBG_MAX; + + return (0); +} + +/* + * Protocol pr_unlock callback. + */ +int +mptcp_unlock(struct socket *mp_so, int refcount, void *lr) +{ + struct mppcb *mpp = sotomppcb(mp_so); + void *lr_saved; + + if (lr == NULL) + lr_saved = __builtin_return_address(0); + else + lr_saved = lr; + + if (mpp == NULL) { + panic("%s: so=%p NO PCB usecount=%x lr=%p lrh= %s\n", __func__, + mp_so, mp_so->so_usecount, lr_saved, + solockhistory_nr(mp_so)); + /* NOTREACHED */ + } + lck_mtx_assert(&mpp->mpp_lock, LCK_MTX_ASSERT_OWNED); + + if (refcount != 0) + mp_so->so_usecount--; + + if (mp_so->so_usecount < 0) { + panic("%s: so=%p usecount=%x lrh= %s\n", __func__, + mp_so, mp_so->so_usecount, solockhistory_nr(mp_so)); + /* NOTREACHED */ + } + mp_so->unlock_lr[mp_so->next_unlock_lr] = lr_saved; + mp_so->next_unlock_lr = (mp_so->next_unlock_lr + 1) % SO_LCKDBG_MAX; + lck_mtx_unlock(&mpp->mpp_lock); + + return (0); +} + +/* + * Protocol pr_getlock callback. + */ +lck_mtx_t * +mptcp_getlock(struct socket *mp_so, int locktype) +{ +#pragma unused(locktype) + struct mppcb *mpp = sotomppcb(mp_so); + + if (mpp == NULL) { + panic("%s: so=%p NULL so_pcb %s\n", __func__, mp_so, + solockhistory_nr(mp_so)); + /* NOTREACHED */ + } + if (mp_so->so_usecount < 0) { + panic("%s: so=%p usecount=%x lrh= %s\n", __func__, + mp_so, mp_so->so_usecount, solockhistory_nr(mp_so)); + /* NOTREACHED */ + } + return (&mpp->mpp_lock); +} + +/* + * Key generation functions + */ +static void +mptcp_generate_unique_key(struct mptcp_key_entry *key_entry) +{ + struct mptcp_key_entry *key_elm; +try_again: + read_random(&key_entry->mkey_value, sizeof (key_entry->mkey_value)); + if (key_entry->mkey_value == 0) + goto try_again; + mptcp_do_sha1(&key_entry->mkey_value, key_entry->mkey_digest, + sizeof (key_entry->mkey_digest)); + + LIST_FOREACH(key_elm, &mptcp_keys_pool, mkey_next) { + if (key_elm->mkey_value == key_entry->mkey_value) { + goto try_again; + } + if (bcmp(key_elm->mkey_digest, key_entry->mkey_digest, 4) == + 0) { + goto try_again; + } + } +} + +static mptcp_key_t * +mptcp_reserve_key(void) +{ + struct mptcp_key_entry *key_elm; + struct mptcp_key_entry *found_elm = NULL; + + lck_mtx_lock(&mptcp_keys_pool.mkph_lock); + LIST_FOREACH(key_elm, &mptcp_keys_pool, mkey_next) { + if (key_elm->mkey_flags == MKEYF_FREE) { + key_elm->mkey_flags = MKEYF_INUSE; + found_elm = key_elm; + break; + } + } + lck_mtx_unlock(&mptcp_keys_pool.mkph_lock); + + if (found_elm) { + return (&found_elm->mkey_value); + } + + key_elm = (struct mptcp_key_entry *) + zalloc(mptcp_keys_pool.mkph_key_entry_zone); + key_elm->mkey_flags = MKEYF_INUSE; + + lck_mtx_lock(&mptcp_keys_pool.mkph_lock); + mptcp_generate_unique_key(key_elm); + LIST_INSERT_HEAD(&mptcp_keys_pool, key_elm, mkey_next); + mptcp_keys_pool.mkph_count += 1; + lck_mtx_unlock(&mptcp_keys_pool.mkph_lock); + return (&key_elm->mkey_value); +} + +static caddr_t +mptcp_get_stored_digest(mptcp_key_t *key) +{ + struct mptcp_key_entry *key_holder; + caddr_t digest = NULL; + + lck_mtx_lock(&mptcp_keys_pool.mkph_lock); + key_holder = (struct mptcp_key_entry *)(void *)((caddr_t)key - + offsetof(struct mptcp_key_entry, mkey_value)); + if (key_holder->mkey_flags != MKEYF_INUSE) + panic_plain("%s", __func__); + digest = &key_holder->mkey_digest[0]; + lck_mtx_unlock(&mptcp_keys_pool.mkph_lock); + return (digest); +} + +void +mptcp_free_key(mptcp_key_t *key) +{ + struct mptcp_key_entry *key_holder; + struct mptcp_key_entry *key_elm; + int pt = RandomULong(); + + mptcplog((LOG_INFO, "%s\n", __func__)); + + lck_mtx_lock(&mptcp_keys_pool.mkph_lock); + key_holder = (struct mptcp_key_entry *)(void*)((caddr_t)key - + offsetof(struct mptcp_key_entry, mkey_value)); + key_holder->mkey_flags = MKEYF_FREE; + + LIST_REMOVE(key_holder, mkey_next); + mptcp_keys_pool.mkph_count -= 1; + + /* Free half the time */ + if (pt & 0x01) { + zfree(mptcp_keys_pool.mkph_key_entry_zone, key_holder); + } else { + /* Insert it at random point to avoid early reuse */ + int i = 0; + if (mptcp_keys_pool.mkph_count > 1) { + pt = pt % (mptcp_keys_pool.mkph_count - 1); + LIST_FOREACH(key_elm, &mptcp_keys_pool, mkey_next) { + if (++i >= pt) { + LIST_INSERT_AFTER(key_elm, key_holder, + mkey_next); + break; + } + } + if (i < pt) + panic("missed insertion"); + } else { + LIST_INSERT_HEAD(&mptcp_keys_pool, key_holder, + mkey_next); + } + mptcp_keys_pool.mkph_count += 1; + } + lck_mtx_unlock(&mptcp_keys_pool.mkph_lock); +} + +static void +mptcp_key_pool_init(void) +{ + int i; + struct mptcp_key_entry *key_entry; + + LIST_INIT(&mptcp_keys_pool); + mptcp_keys_pool.mkph_count = 0; + + mptcp_keys_pool.mkph_key_elm_sz = (vm_size_t) + (sizeof (struct mptcp_key_entry)); + mptcp_keys_pool.mkph_key_entry_zone = zinit( + mptcp_keys_pool.mkph_key_elm_sz, + MPTCP_MX_KEY_ALLOCS * mptcp_keys_pool.mkph_key_elm_sz, + MPTCP_MX_PREALLOC_ZONE_SZ, "mptkeys"); + if (mptcp_keys_pool.mkph_key_entry_zone == NULL) { + panic("%s: unable to allocate MPTCP keys zone \n", __func__); + /* NOTREACHED */ + } + zone_change(mptcp_keys_pool.mkph_key_entry_zone, Z_CALLERACCT, FALSE); + zone_change(mptcp_keys_pool.mkph_key_entry_zone, Z_EXPAND, TRUE); + + for (i = 0; i < MPTCP_KEY_PREALLOCS_MX; i++) { + key_entry = (struct mptcp_key_entry *) + zalloc(mptcp_keys_pool.mkph_key_entry_zone); + key_entry->mkey_flags = MKEYF_FREE; + mptcp_generate_unique_key(key_entry); + LIST_INSERT_HEAD(&mptcp_keys_pool, key_entry, mkey_next); + mptcp_keys_pool.mkph_count += 1; + } + lck_mtx_init(&mptcp_keys_pool.mkph_lock, mtcbinfo.mppi_lock_grp, + mtcbinfo.mppi_lock_attr); +} + +/* + * MPTCP Join support + */ + +static void +mptcp_attach_to_subf(struct socket *so, struct mptcb *mp_tp, + connid_t conn_id) +{ + struct tcpcb *tp = sototcpcb(so); + struct mptcp_subf_auth_entry *sauth_entry; + MPT_LOCK_ASSERT_NOTHELD(mp_tp); + + MPT_LOCK_SPIN(mp_tp); + tp->t_mptcb = mp_tp; + MPT_UNLOCK(mp_tp); + /* + * As long as the mpts_connid is unique it can be used as the + * address ID for additional subflows. + * The address ID of the first flow is implicitly 0. + */ + if (mp_tp->mpt_state == MPTCPS_CLOSED) { + tp->t_local_aid = 0; + } else { + tp->t_local_aid = conn_id; + tp->t_mpflags |= (TMPF_PREESTABLISHED | TMPF_JOINED_FLOW); + so->so_flags |= SOF_MP_SEC_SUBFLOW; + } + sauth_entry = zalloc(mpt_subauth_zone); + sauth_entry->msae_laddr_id = tp->t_local_aid; + sauth_entry->msae_raddr_id = 0; + sauth_entry->msae_raddr_rand = 0; +try_again: + sauth_entry->msae_laddr_rand = RandomULong(); + if (sauth_entry->msae_laddr_rand == 0) + goto try_again; + LIST_INSERT_HEAD(&mp_tp->mpt_subauth_list, sauth_entry, msae_next); +} + +static void +mptcp_detach_mptcb_from_subf(struct mptcb *mp_tp, struct socket *so) +{ + struct mptcp_subf_auth_entry *sauth_entry; + struct tcpcb *tp = sototcpcb(so); + int found = 0; + + if (tp == NULL) + return; + + MPT_LOCK(mp_tp); + LIST_FOREACH(sauth_entry, &mp_tp->mpt_subauth_list, msae_next) { + if (sauth_entry->msae_laddr_id == tp->t_local_aid) { + found = 1; + break; + } + } + if (found) { + LIST_REMOVE(sauth_entry, msae_next); + zfree(mpt_subauth_zone, sauth_entry); + } + tp->t_mptcb = NULL; + MPT_UNLOCK(mp_tp); +} + +void +mptcp_get_rands(mptcp_addr_id addr_id, struct mptcb *mp_tp, u_int32_t *lrand, + u_int32_t *rrand) +{ + struct mptcp_subf_auth_entry *sauth_entry; + MPT_LOCK_ASSERT_NOTHELD(mp_tp); + + MPT_LOCK(mp_tp); + LIST_FOREACH(sauth_entry, &mp_tp->mpt_subauth_list, msae_next) { + if (sauth_entry->msae_laddr_id == addr_id) { + if (lrand) + *lrand = sauth_entry->msae_laddr_rand; + if (rrand) + *rrand = sauth_entry->msae_raddr_rand; + break; + } + } + MPT_UNLOCK(mp_tp); +} + +void +mptcp_set_raddr_rand(mptcp_addr_id laddr_id, struct mptcb *mp_tp, + mptcp_addr_id raddr_id, u_int32_t raddr_rand) +{ + struct mptcp_subf_auth_entry *sauth_entry; + MPT_LOCK_ASSERT_NOTHELD(mp_tp); + + MPT_LOCK(mp_tp); + LIST_FOREACH(sauth_entry, &mp_tp->mpt_subauth_list, msae_next) { + if (sauth_entry->msae_laddr_id == laddr_id) { + if ((sauth_entry->msae_raddr_id != 0) && + (sauth_entry->msae_raddr_id != raddr_id)) { + mptcplog((LOG_ERR, "MPTCP ERROR %s: mismatched" + " address ids %d %d \n", __func__, raddr_id, + sauth_entry->msae_raddr_id)); + MPT_UNLOCK(mp_tp); + return; + } + sauth_entry->msae_raddr_id = raddr_id; + if ((sauth_entry->msae_raddr_rand != 0) && + (sauth_entry->msae_raddr_rand != raddr_rand)) { + mptcplog((LOG_ERR, "%s: dup SYN_ACK %d %d \n", + __func__, raddr_rand, + sauth_entry->msae_raddr_rand)); + MPT_UNLOCK(mp_tp); + return; + } + sauth_entry->msae_raddr_rand = raddr_rand; + MPT_UNLOCK(mp_tp); + return; + } + } + MPT_UNLOCK(mp_tp); +} + +/* + * SHA1 support for MPTCP + */ +static int +mptcp_do_sha1(mptcp_key_t *key, char *sha_digest, int digest_len) +{ + SHA1_CTX sha1ctxt; + const unsigned char *sha1_base; + int sha1_size; + + if (digest_len != SHA1_RESULTLEN) { + return (FALSE); + } + + sha1_base = (const unsigned char *) key; + sha1_size = sizeof (mptcp_key_t); + SHA1Init(&sha1ctxt); + SHA1Update(&sha1ctxt, sha1_base, sha1_size); + SHA1Final(sha_digest, &sha1ctxt); + return (TRUE); +} + +void +mptcp_hmac_sha1(mptcp_key_t key1, mptcp_key_t key2, + u_int32_t rand1, u_int32_t rand2, u_char *digest, int digest_len) +{ + SHA1_CTX sha1ctxt; + mptcp_key_t key_ipad[8] = {0}; /* key XOR'd with inner pad */ + mptcp_key_t key_opad[8] = {0}; /* key XOR'd with outer pad */ + u_int32_t data[2]; + int i; + + bzero(digest, digest_len); + + /* Set up the Key for HMAC */ + key_ipad[0] = key1; + key_ipad[1] = key2; + + key_opad[0] = key1; + key_opad[1] = key2; + + /* Set up the message for HMAC */ + data[0] = rand1; + data[1] = rand2; + + /* Key is 512 block length, so no need to compute hash */ + + /* Compute SHA1(Key XOR opad, SHA1(Key XOR ipad, data)) */ + + for (i = 0; i < 8; i++) { + key_ipad[i] ^= 0x3636363636363636; + key_opad[i] ^= 0x5c5c5c5c5c5c5c5c; + } + + /* Perform inner SHA1 */ + SHA1Init(&sha1ctxt); + SHA1Update(&sha1ctxt, (unsigned char *)key_ipad, sizeof (key_ipad)); + SHA1Update(&sha1ctxt, (unsigned char *)data, sizeof (data)); + SHA1Final(digest, &sha1ctxt); + + /* Perform outer SHA1 */ + SHA1Init(&sha1ctxt); + SHA1Update(&sha1ctxt, (unsigned char *)key_opad, sizeof (key_opad)); + SHA1Update(&sha1ctxt, (unsigned char *)digest, SHA1_RESULTLEN); + SHA1Final(digest, &sha1ctxt); +} + +/* + * corresponds to MAC-B = MAC (Key=(Key-B+Key-A), Msg=(R-B+R-A)) + * corresponds to MAC-A = MAC (Key=(Key-A+Key-B), Msg=(R-A+R-B)) + */ +void +mptcp_get_hmac(mptcp_addr_id aid, struct mptcb *mp_tp, u_char *digest, + int digest_len) +{ + uint32_t lrand, rrand; + mptcp_key_t localkey, remotekey; + MPT_LOCK_ASSERT_NOTHELD(mp_tp); + + if (digest_len != SHA1_RESULTLEN) + return; + + lrand = rrand = 0; + mptcp_get_rands(aid, mp_tp, &lrand, &rrand); + MPT_LOCK_SPIN(mp_tp); + localkey = *mp_tp->mpt_localkey; + remotekey = mp_tp->mpt_remotekey; + MPT_UNLOCK(mp_tp); + mptcp_hmac_sha1(localkey, remotekey, lrand, rrand, digest, + digest_len); +} + +u_int64_t +mptcp_get_trunced_hmac(mptcp_addr_id aid, struct mptcb *mp_tp) +{ + u_char digest[SHA1_RESULTLEN]; + u_int64_t trunced_digest; + + mptcp_get_hmac(aid, mp_tp, &digest[0], sizeof (digest)); + bcopy(digest, &trunced_digest, 8); + return (trunced_digest); +} + +/* + * Authentication data generation + */ +int +mptcp_generate_token(char *sha_digest, int sha_digest_len, caddr_t token, + int token_len) +{ + VERIFY(token_len == sizeof (u_int32_t)); + VERIFY(sha_digest_len == SHA1_RESULTLEN); + + /* Most significant 32 bits of the SHA1 hash */ + bcopy(sha_digest, token, sizeof (u_int32_t)); + return (TRUE); +} + +int +mptcp_generate_idsn(char *sha_digest, int sha_digest_len, caddr_t idsn, + int idsn_len) +{ + VERIFY(idsn_len == sizeof (u_int64_t)); + VERIFY(sha_digest_len == SHA1_RESULTLEN); + + /* + * Least significant 64 bits of the SHA1 hash + */ + + idsn[7] = sha_digest[12]; + idsn[6] = sha_digest[13]; + idsn[5] = sha_digest[14]; + idsn[4] = sha_digest[15]; + idsn[3] = sha_digest[16]; + idsn[2] = sha_digest[17]; + idsn[1] = sha_digest[18]; + idsn[0] = sha_digest[19]; + return (TRUE); +} + +static int +mptcp_init_authparms(struct mptcb *mp_tp) +{ + caddr_t local_digest = NULL; + char remote_digest[MPTCP_SHA1_RESULTLEN]; + MPT_LOCK_ASSERT_HELD(mp_tp); + + /* Only Version 0 is supported for auth purposes */ + if (mp_tp->mpt_version != MP_DRAFT_VERSION_12) + return (-1); + + /* Setup local and remote tokens and Initial DSNs */ + local_digest = mptcp_get_stored_digest(mp_tp->mpt_localkey); + mptcp_generate_token(local_digest, SHA1_RESULTLEN, + (caddr_t)&mp_tp->mpt_localtoken, sizeof (mp_tp->mpt_localtoken)); + mptcp_generate_idsn(local_digest, SHA1_RESULTLEN, + (caddr_t)&mp_tp->mpt_local_idsn, sizeof (u_int64_t)); + + if (!mptcp_do_sha1(&mp_tp->mpt_remotekey, remote_digest, + SHA1_RESULTLEN)) { + mptcplog((LOG_ERR, "MPTCP ERROR %s: unexpected failure", + __func__)); + return (-1); + } + mptcp_generate_token(remote_digest, SHA1_RESULTLEN, + (caddr_t)&mp_tp->mpt_remotetoken, sizeof (mp_tp->mpt_localtoken)); + mptcp_generate_idsn(remote_digest, SHA1_RESULTLEN, + (caddr_t)&mp_tp->mpt_remote_idsn, sizeof (u_int64_t)); + return (0); +} + +static void +mptcp_init_statevars(struct mptcb *mp_tp) +{ + MPT_LOCK_ASSERT_HELD(mp_tp); + + /* The subflow SYN is also first MPTCP byte */ + mp_tp->mpt_snduna = mp_tp->mpt_sndmax = mp_tp->mpt_local_idsn + 1; + mp_tp->mpt_sndnxt = mp_tp->mpt_snduna; + + mp_tp->mpt_rcvatmark = mp_tp->mpt_rcvnxt = mp_tp->mpt_remote_idsn + 1; +} + +static void +mptcp_conn_properties(struct mptcb *mp_tp) +{ + /* There is only Version 0 at this time */ + mp_tp->mpt_version = MP_DRAFT_VERSION_12; + + /* Set DSS checksum flag */ + if (mptcp_dss_csum) + mp_tp->mpt_flags |= MPTCPF_CHECKSUM; + + /* Set up receive window */ + mp_tp->mpt_rcvwnd = mptcp_sbspace(mp_tp); + + /* Set up gc ticks */ + mp_tp->mpt_gc_ticks = MPT_GC_TICKS; +} + +/* + * Helper Functions + */ +mptcp_token_t +mptcp_get_localtoken(void* mptcb_arg) +{ + struct mptcb *mp_tp = (struct mptcb *)mptcb_arg; + return (mp_tp->mpt_localtoken); +} + +mptcp_token_t +mptcp_get_remotetoken(void* mptcb_arg) +{ + struct mptcb *mp_tp = (struct mptcb *)mptcb_arg; + return (mp_tp->mpt_remotetoken); +} + +u_int64_t +mptcp_get_localkey(void* mptcb_arg) +{ + struct mptcb *mp_tp = (struct mptcb *)mptcb_arg; + if (mp_tp->mpt_localkey != NULL) + return (*mp_tp->mpt_localkey); + else + return (0); +} + +u_int64_t +mptcp_get_remotekey(void* mptcb_arg) +{ + struct mptcb *mp_tp = (struct mptcb *)mptcb_arg; + return (mp_tp->mpt_remotekey); +} + +void +mptcp_send_dfin(struct socket *so) +{ + struct tcpcb *tp = NULL; + struct inpcb *inp = NULL; + + inp = sotoinpcb(so); + if (!inp) + return; + + tp = intotcpcb(inp); + if (!tp) + return; + + if (!(tp->t_mpflags & TMPF_RESET)) + tp->t_mpflags |= TMPF_SEND_DFIN; +} + +/* + * Data Sequence Mapping routines + */ +void +mptcp_insert_dsn(struct mppcb *mpp, struct mbuf *m) +{ + struct mptcb *mp_tp; + + if (m == NULL) + return; + + mp_tp = &((struct mpp_mtp *)mpp)->mtcb; + MPT_LOCK(mp_tp); + if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) { + MPT_UNLOCK(mp_tp); + panic("%s: data write before establishment.", + __func__); + return; + } + + while (m) { + VERIFY(m->m_flags & M_PKTHDR); + m->m_pkthdr.pkt_flags |= (PKTF_MPTCP | PKTF_MPSO); + m->m_pkthdr.mp_dsn = mp_tp->mpt_sndmax; + m->m_pkthdr.mp_rlen = m_pktlen(m); + mp_tp->mpt_sndmax += m_pktlen(m); + m = m->m_next; + } + MPT_UNLOCK(mp_tp); +} + +void +mptcp_preproc_sbdrop(struct mbuf *m, unsigned int len) +{ + u_int32_t sub_len = 0; + + while (m) { + VERIFY(m->m_flags & M_PKTHDR); + + if (m->m_pkthdr.pkt_flags & PKTF_MPTCP) { + sub_len = m->m_pkthdr.mp_rlen; + + if (sub_len < len) { + m->m_pkthdr.mp_dsn += sub_len; + if (!(m->m_pkthdr.pkt_flags & PKTF_MPSO)) { + m->m_pkthdr.mp_rseq += sub_len; + } + m->m_pkthdr.mp_rlen = 0; + len -= sub_len; + } else { + /* sub_len >= len */ + m->m_pkthdr.mp_dsn += len; + if (!(m->m_pkthdr.pkt_flags & PKTF_MPSO)) { + m->m_pkthdr.mp_rseq += len; + } + mptcplog3((LOG_INFO, + "%s: %llu %u %d %d\n", __func__, + m->m_pkthdr.mp_dsn, m->m_pkthdr.mp_rseq, + m->m_pkthdr.mp_rlen, len)); + m->m_pkthdr.mp_rlen -= len; + return; + } + } else { + panic("%s: MPTCP tag not set", __func__); + /* NOTREACHED */ + } + m = m->m_next; + } +} + +/* Obtain the DSN mapping stored in the mbuf */ +void +mptcp_output_getm_dsnmap32(struct socket *so, int off, uint32_t datalen, + u_int32_t *dsn, u_int32_t *relseq, u_int16_t *data_len, u_int64_t *dsn64p) +{ + u_int64_t dsn64; + + mptcp_output_getm_dsnmap64(so, off, datalen, &dsn64, relseq, data_len); + *dsn = (u_int32_t)MPTCP_DATASEQ_LOW32(dsn64); + *dsn64p = dsn64; +} + +void +mptcp_output_getm_dsnmap64(struct socket *so, int off, uint32_t datalen, + u_int64_t *dsn, u_int32_t *relseq, u_int16_t *data_len) +{ + struct mbuf *m = so->so_snd.sb_mb; + struct mbuf *mnext = NULL; + uint32_t runlen = 0; + u_int64_t dsn64; + uint32_t contig_len = 0; + + if (m == NULL) + return; + + if (off < 0) + return; + /* + * In the subflow socket, the DSN sequencing can be discontiguous, + * but the subflow sequence mapping is contiguous. Use the subflow + * sequence property to find the right mbuf and corresponding dsn + * mapping. + */ + + while (m) { + VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP); + VERIFY(m->m_flags & M_PKTHDR); + + if ((unsigned int)off >= m->m_pkthdr.mp_rlen) { + off -= m->m_pkthdr.mp_rlen; + m = m->m_next; + } else { + break; + } + } + + if (m == NULL) { + panic("%s: bad offset", __func__); + /* NOTREACHED */ + } + + dsn64 = m->m_pkthdr.mp_dsn + off; + *dsn = dsn64; + *relseq = m->m_pkthdr.mp_rseq + off; + + /* + * Now find the last contiguous byte and its length from + * start. + */ + runlen = m->m_pkthdr.mp_rlen - off; + contig_len = runlen; + + /* If datalen does not span multiple mbufs, return */ + if (datalen <= runlen) { + *data_len = min(datalen, UINT16_MAX); + return; + } + + mnext = m->m_next; + while (datalen > runlen) { + if (mnext == NULL) { + panic("%s: bad datalen = %d, %d %d", __func__, datalen, + runlen, off); + /* NOTREACHED */ + } + VERIFY(mnext->m_flags & M_PKTHDR); + VERIFY(mnext->m_pkthdr.pkt_flags & PKTF_MPTCP); + + /* + * case A. contiguous DSN stream + * case B. discontiguous DSN stream + */ + if (mnext->m_pkthdr.mp_dsn == (dsn64 + runlen)) { + /* case A */ + runlen += mnext->m_pkthdr.mp_rlen; + contig_len += mnext->m_pkthdr.mp_rlen; + mptcplog3((LOG_INFO, "%s: contig \n", + __func__)); + } else { + /* case B */ + mptcplog((LOG_INFO, "%s: discontig %d %d \n", + __func__, datalen, contig_len)); + break; + } + mnext = mnext->m_next; + } + datalen = min(datalen, UINT16_MAX); + *data_len = min(datalen, contig_len); + mptcplog3((LOG_INFO, "%s: %llu %u %d %d \n", __func__, + *dsn, *relseq, *data_len, off)); +} + +/* + * MPTCP's notion of the next insequence Data Sequence number is adjusted + * here. It must be called from mptcp_adj_rmap() which is called only after + * reassembly of out of order data. The rcvnxt variable must + * be updated only when atleast some insequence new data is received. + */ +static void +mptcp_adj_rcvnxt(struct tcpcb *tp, struct mbuf *m) +{ + struct mptcb *mp_tp = tptomptp(tp); + + if (mp_tp == NULL) + return; + MPT_LOCK(mp_tp); + if ((MPTCP_SEQ_GEQ(mp_tp->mpt_rcvnxt, m->m_pkthdr.mp_dsn)) && + (MPTCP_SEQ_LEQ(mp_tp->mpt_rcvnxt, (m->m_pkthdr.mp_dsn + + m->m_pkthdr.mp_rlen)))) { + mp_tp->mpt_rcvnxt = m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen; + } + MPT_UNLOCK(mp_tp); +} + +/* + * Note that this is called only from tcp_input() which may trim data + * after the dsn mapping is inserted into the mbuf. When it trims data + * tcp_input calls m_adj() which does not remove the m_pkthdr even if the + * m_len becomes 0 as a result of trimming the mbuf. The dsn map insertion + * cannot be delayed after trim, because data can be in the reassembly + * queue for a while and the DSN option info in tp will be overwritten for + * every new packet received. + * The dsn map will be adjusted just prior to appending to subflow sockbuf + * with mptcp_adj_rmap() + */ +void +mptcp_insert_rmap(struct tcpcb *tp, struct mbuf *m) +{ + VERIFY(!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)); + + if (tp->t_mpflags & TMPF_EMBED_DSN) { + VERIFY(m->m_flags & M_PKTHDR); + m->m_pkthdr.mp_dsn = tp->t_rcv_map.mpt_dsn; + m->m_pkthdr.mp_rseq = tp->t_rcv_map.mpt_sseq; + m->m_pkthdr.mp_rlen = tp->t_rcv_map.mpt_len; + m->m_pkthdr.pkt_flags |= PKTF_MPTCP; + tp->t_mpflags &= ~TMPF_EMBED_DSN; + tp->t_mpflags |= TMPF_MPTCP_ACKNOW; + } +} + +void +mptcp_adj_rmap(struct socket *so, struct mbuf *m) +{ + u_int64_t dsn; + u_int32_t sseq, datalen; + struct tcpcb *tp = intotcpcb(sotoinpcb(so)); + u_int32_t old_rcvnxt = 0; + + if (m_pktlen(m) == 0) + return; + + if (m->m_pkthdr.pkt_flags & PKTF_MPTCP) { + VERIFY(m->m_flags & M_PKTHDR); + + dsn = m->m_pkthdr.mp_dsn; + sseq = m->m_pkthdr.mp_rseq + tp->irs; + datalen = m->m_pkthdr.mp_rlen; + } else { + /* data arrived without an DSS option mapping */ + mptcp_notify_mpfail(so); + return; + } + + /* In the common case, data is in window and in sequence */ + if (m->m_pkthdr.len == (int)datalen) { + mptcp_adj_rcvnxt(tp, m); + return; + } + + if (m->m_pkthdr.len > (int)datalen) { + panic("%s: mbuf len = %d expected = %d", __func__, + m->m_pkthdr.len, datalen); + } + + old_rcvnxt = tp->rcv_nxt - m->m_pkthdr.len; + if (SEQ_GT(old_rcvnxt, sseq)) { + /* data trimmed from the left */ + int off = old_rcvnxt - sseq; + m->m_pkthdr.mp_dsn += off; + m->m_pkthdr.mp_rseq += off; + m->m_pkthdr.mp_rlen -= off; + } else if (old_rcvnxt == sseq) { + /* + * Data was trimmed from the right + */ + m->m_pkthdr.mp_rlen = m->m_pkthdr.len; + } else { + /* XXX handle gracefully with reass or fallback in January */ + panic("%s: partial map %u %u", __func__, old_rcvnxt, sseq); + /* NOTREACHED */ + } + mptcp_adj_rcvnxt(tp, m); + +} + +/* + * Following routines help with failure detection and failover of data + * transfer from one subflow to another. + */ +void +mptcp_act_on_txfail(struct socket *so) +{ + struct tcpcb *tp = NULL; + struct inpcb *inp = sotoinpcb(so); + + if (inp == NULL) + return; + + tp = intotcpcb(inp); + if (tp == NULL) + return; + + if (tp->t_state != TCPS_ESTABLISHED) + mptcplog((LOG_INFO, "%s: state = %d \n", __func__, + tp->t_state)); + + if (so->so_flags & SOF_MP_TRYFAILOVER) { + return; + } + + so->so_flags |= SOF_MP_TRYFAILOVER; + soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MPFAILOVER)); +} + +/* + * Support for MP_FAIL option + */ +int +mptcp_get_map_for_dsn(struct socket *so, u_int64_t dsn_fail, u_int32_t *tcp_seq) +{ + struct mbuf *m = so->so_snd.sb_mb; + u_int64_t dsn; + int off = 0; + u_int32_t datalen; + + if (m == NULL) + return (-1); + + while (m != NULL) { + VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP); + VERIFY(m->m_flags & M_PKTHDR); + dsn = m->m_pkthdr.mp_dsn; + datalen = m->m_pkthdr.mp_rlen; + if (MPTCP_SEQ_LEQ(dsn, dsn_fail) && + (MPTCP_SEQ_GEQ(dsn + datalen, dsn_fail))) { + off = dsn_fail - dsn; + *tcp_seq = m->m_pkthdr.mp_rseq + off; + return (0); + } + + m = m->m_next; + } + + /* + * If there was no mbuf data and a fallback to TCP occurred, there's + * not much else to do. + */ + + mptcplog((LOG_ERR, "%s: %llu not found \n", __func__, dsn_fail)); + return (-1); +} + +/* + * Support for sending contiguous MPTCP bytes in subflow + */ +int32_t +mptcp_adj_sendlen(struct socket *so, int32_t off, int32_t len) +{ + u_int64_t mdss_dsn = 0; + u_int32_t mdss_subflow_seq = 0; + u_int16_t mdss_data_len = 0; + + if (len == 0) + return (len); + + mptcp_output_getm_dsnmap64(so, off, (u_int32_t)len, + &mdss_dsn, &mdss_subflow_seq, &mdss_data_len); + + return (mdss_data_len); +} + +int32_t +mptcp_sbspace(struct mptcb *mpt) +{ + struct sockbuf *sb; + uint32_t rcvbuf; + int32_t space; + + MPT_LOCK_ASSERT_HELD(mpt); + MPTE_LOCK_ASSERT_HELD(mpt->mpt_mpte); + + sb = &mpt->mpt_mpte->mpte_mppcb->mpp_socket->so_rcv; + rcvbuf = sb->sb_hiwat; + space = ((int32_t)imin((rcvbuf - sb->sb_cc), + (sb->sb_mbmax - sb->sb_mbcnt))); + if (space < 0) + space = 0; + /* XXX check if it's too small? */ + + return (space); +} + +/* + * Support Fallback to Regular TCP + */ +void +mptcp_notify_mpready(struct socket *so) +{ + struct tcpcb *tp = NULL; + + if (so == NULL) + return; + + tp = intotcpcb(sotoinpcb(so)); + + if (tp == NULL) + return; + + DTRACE_MPTCP4(multipath__ready, struct socket *, so, + struct sockbuf *, &so->so_rcv, struct sockbuf *, &so->so_snd, + struct tcpcb *, tp); + + if (!(tp->t_mpflags & TMPF_MPTCP_TRUE)) + return; + + if (tp->t_mpflags & TMPF_MPTCP_READY) + return; + + tp->t_mpflags &= ~TMPF_TCP_FALLBACK; + tp->t_mpflags |= TMPF_MPTCP_READY; + + soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MPSTATUS)); +} + +void +mptcp_notify_mpfail(struct socket *so) +{ + struct tcpcb *tp = NULL; + + if (so == NULL) + return; + + tp = intotcpcb(sotoinpcb(so)); + + if (tp == NULL) + return; + + DTRACE_MPTCP4(multipath__failed, struct socket *, so, + struct sockbuf *, &so->so_rcv, struct sockbuf *, &so->so_snd, + struct tcpcb *, tp); + + if (tp->t_mpflags & TMPF_TCP_FALLBACK) + return; + + tp->t_mpflags &= ~(TMPF_MPTCP_READY|TMPF_MPTCP_TRUE); + tp->t_mpflags |= TMPF_TCP_FALLBACK; + + soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MPSTATUS)); +} + +/* + * Keepalive helper function + */ +boolean_t +mptcp_ok_to_keepalive(struct mptcb *mp_tp) +{ + boolean_t ret = 1; + VERIFY(mp_tp != NULL); + MPT_LOCK(mp_tp); + if (mp_tp->mpt_state >= MPTCPS_CLOSE_WAIT) { + ret = 0; + } + MPT_UNLOCK(mp_tp); + return (ret); +} + +/* + * MPTCP t_maxseg adjustment function + */ +int +mptcp_adj_mss(struct tcpcb *tp, boolean_t mtudisc) +{ + int mss_lower = 0; + struct mptcb *mp_tp = tptomptp(tp); + +#define MPTCP_COMPUTE_LEN { \ + mss_lower = sizeof (struct mptcp_dss_ack_opt); \ + MPT_LOCK(mp_tp); \ + if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) \ + mss_lower += 2; \ + else \ + /* adjust to 32-bit boundary + EOL */ \ + mss_lower += 2; \ + MPT_UNLOCK(mp_tp); \ +} + if (mp_tp == NULL) + return (0); + + /* + * For the first subflow and subsequent subflows, adjust mss for + * most common MPTCP option size, for case where tcp_mss is called + * during option processing and MTU discovery. + */ + if ((tp->t_mpflags & TMPF_PREESTABLISHED) && + (!(tp->t_mpflags & TMPF_JOINED_FLOW))) { + MPTCP_COMPUTE_LEN; + } + + if ((tp->t_mpflags & TMPF_PREESTABLISHED) && + (tp->t_mpflags & TMPF_SENT_JOIN)) { + MPTCP_COMPUTE_LEN; + } + + if ((mtudisc) && (tp->t_mpflags & TMPF_MPTCP_TRUE)) { + MPTCP_COMPUTE_LEN; + } + + return (mss_lower); +} + +/* + * Update the pid, upid, uuid of the subflow so, based on parent so + */ +void +mptcp_update_last_owner(struct mptsub *mpts, struct socket *parent_mpso) +{ + struct socket *subflow_so = mpts->mpts_socket; + + MPTS_LOCK_ASSERT_HELD(mpts); + + socket_lock(subflow_so, 0); + if ((subflow_so->last_pid != parent_mpso->last_pid) || + (subflow_so->last_upid != parent_mpso->last_upid)) { + subflow_so->last_upid = parent_mpso->last_upid; + subflow_so->last_pid = parent_mpso->last_pid; + uuid_copy(subflow_so->last_uuid, parent_mpso->last_uuid); + } + so_update_policy(subflow_so); + socket_unlock(subflow_so, 0); +} + +static void +fill_mptcp_subflow(struct socket *so, mptcp_flow_t *flow, struct mptsub *mpts) +{ + struct inpcb *inp; + + tcp_getconninfo(so, &flow->flow_ci); + inp = sotoinpcb(so); +#if INET6 + if ((inp->inp_vflag & INP_IPV6) != 0) { + flow->flow_src.ss_family = AF_INET6; + flow->flow_dst.ss_family = AF_INET6; + flow->flow_src.ss_len = sizeof(struct sockaddr_in6); + flow->flow_dst.ss_len = sizeof(struct sockaddr_in6); + SIN6(&flow->flow_src)->sin6_port = inp->in6p_lport; + SIN6(&flow->flow_dst)->sin6_port = inp->in6p_fport; + SIN6(&flow->flow_src)->sin6_addr = inp->in6p_laddr; + SIN6(&flow->flow_dst)->sin6_addr = inp->in6p_faddr; + } else +#endif + { + flow->flow_src.ss_family = AF_INET; + flow->flow_dst.ss_family = AF_INET; + flow->flow_src.ss_len = sizeof(struct sockaddr_in); + flow->flow_dst.ss_len = sizeof(struct sockaddr_in); + SIN(&flow->flow_src)->sin_port = inp->inp_lport; + SIN(&flow->flow_dst)->sin_port = inp->inp_fport; + SIN(&flow->flow_src)->sin_addr = inp->inp_laddr; + SIN(&flow->flow_dst)->sin_addr = inp->inp_faddr; + } + flow->flow_flags = mpts->mpts_flags; + flow->flow_cid = mpts->mpts_connid; +} + +static int +mptcp_pcblist SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + int error = 0, f; + size_t n, len; + struct mppcb *mpp; + struct mptses *mpte; + struct mptcb *mp_tp; + struct mptsub *mpts; + struct socket *so; + conninfo_mptcp_t mptcpci; + mptcp_flow_t *flows; + + if (req->newptr != USER_ADDR_NULL) + return (EPERM); + + lck_mtx_lock(&mtcbinfo.mppi_lock); + n = mtcbinfo.mppi_count; + if (req->oldptr == USER_ADDR_NULL) { + lck_mtx_unlock(&mtcbinfo.mppi_lock); + req->oldidx = (n + n/8) * sizeof(conninfo_mptcp_t) + + 4 * (n + n/8) * sizeof(mptcp_flow_t); + return (0); + } + TAILQ_FOREACH(mpp, &mtcbinfo.mppi_pcbs, mpp_entry) { + bzero(&mptcpci, sizeof(mptcpci)); + lck_mtx_lock(&mpp->mpp_lock); + VERIFY(mpp->mpp_flags & MPP_ATTACHED); + mpte = mptompte(mpp); + VERIFY(mpte != NULL); + mp_tp = mpte->mpte_mptcb; + VERIFY(mp_tp != NULL); + len = sizeof(*flows) * mpte->mpte_numflows; + flows = _MALLOC(len, M_TEMP, M_WAITOK | M_ZERO); + if (flows == NULL) { + lck_mtx_unlock(&mpp->mpp_lock); + break; + } + /* N.B. we don't take the mpt_lock just for the state. */ + mptcpci.mptcpci_state = mp_tp->mpt_state; + mptcpci.mptcpci_nflows = mpte->mpte_numflows; + mptcpci.mptcpci_len = sizeof(mptcpci) + + sizeof(*flows) * (mptcpci.mptcpci_nflows - 1); + error = SYSCTL_OUT(req, &mptcpci, + sizeof(mptcpci) - sizeof(*flows)); + if (error) { + lck_mtx_unlock(&mpp->mpp_lock); + FREE(flows, M_TEMP); + break; + } + f = 0; + TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { + MPTS_LOCK(mpts); + so = mpts->mpts_socket; + socket_lock(so, 0); + fill_mptcp_subflow(so, &flows[f], mpts); + socket_unlock(so, 0); + MPTS_UNLOCK(mpts); + f++; + } + lck_mtx_unlock(&mpp->mpp_lock); + error = SYSCTL_OUT(req, flows, len); + FREE(flows, M_TEMP); + if (error) + break; + } + lck_mtx_unlock(&mtcbinfo.mppi_lock); + + return (error); +} + +SYSCTL_PROC(_net_inet_mptcp, OID_AUTO, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, mptcp_pcblist, "S,conninfo_mptcp_t", + "List of active MPTCP connections"); diff --git a/bsd/netinet/mptcp_timer.c b/bsd/netinet/mptcp_timer.c new file mode 100644 index 000000000..322aba514 --- /dev/null +++ b/bsd/netinet/mptcp_timer.c @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include + +/* + * MPTCP Retransmission Timer comes into play only when subflow level + * data is acked, but Data ACK is not received. Time is in seconds. + */ +static u_int32_t mptcp_rto = 3; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rto, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_rto, 0, "MPTCP Retransmission Timeout"); + +static int mptcp_nrtos = 3; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, nrto, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_rto, 0, "MPTCP Retransmissions"); + +/* + * MPTCP connections timewait interval in seconds. + */ +static u_int32_t mptcp_tw = 60; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, tw, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_tw, 0, "MPTCP Timewait Period"); + +#define TIMEVAL_TO_HZ(_tv_) ((_tv_).tv_sec * hz + (_tv_).tv_usec / hz) + +static int +mptcp_timer_demux(struct mptses *mpte, uint32_t now_msecs) +{ + struct mptcb *mp_tp = NULL; + mp_tp = mpte->mpte_mptcb; + int resched_timer = 0; + + DTRACE_MPTCP2(timer, struct mptses *, mpte, struct mptcb *, mp_tp); + mptcplog2((LOG_DEBUG, "%s: running %d\n", __func__, + mp_tp->mpt_timer_vals)); + + MPTE_LOCK_ASSERT_HELD(mpte); + MPT_LOCK(mp_tp); + switch (mp_tp->mpt_timer_vals) { + case MPTT_REXMT: + if (mp_tp->mpt_rxtstart == 0) + break; + if ((now_msecs - mp_tp->mpt_rxtstart) > + (mptcp_rto*hz)) { + if (MPTCP_SEQ_GT(mp_tp->mpt_snduna, + mp_tp->mpt_rtseq)) { + mp_tp->mpt_timer_vals = 0; + mp_tp->mpt_rtseq = 0; + break; + } + mp_tp->mpt_rxtshift++; + if (mp_tp->mpt_rxtshift > mptcp_nrtos) { + mp_tp->mpt_softerror = ETIMEDOUT; + DTRACE_MPTCP1(error, struct mptcb *, mp_tp); + } else { + mp_tp->mpt_sndnxt = mp_tp->mpt_rtseq; + MPT_UNLOCK(mp_tp); + mptcplog((LOG_DEBUG, "%s: REXMT %d times.\n", + __func__, mp_tp->mpt_rxtshift)); + mptcp_output(mpte); + MPT_LOCK(mp_tp); + } + } else { + resched_timer = 1; + } + break; + case MPTT_TW: + /* Allows for break before make XXX */ + if (mp_tp->mpt_timewait == 0) + VERIFY(0); + if ((now_msecs - mp_tp->mpt_timewait) > + (mptcp_tw * hz)) { + mp_tp->mpt_softerror = ETIMEDOUT; + DTRACE_MPTCP1(error, struct mptcb *, mp_tp); + } else { + resched_timer = 1; + } + break; + case MPTT_FASTCLOSE: + /* TODO XXX */ + break; + default: + break; + } + MPT_UNLOCK(mp_tp); + + return (resched_timer); +} + +uint32_t +mptcp_timer(struct mppcbinfo *mppi) +{ + struct mppcb *mpp, *tmpp; + struct timeval now; + u_int32_t now_msecs; + uint32_t resched_timer = 0; + + lck_mtx_assert(&mppi->mppi_lock, LCK_MTX_ASSERT_OWNED); + + microuptime(&now); + now_msecs = TIMEVAL_TO_HZ(now); + TAILQ_FOREACH_SAFE(mpp, &mppi->mppi_pcbs, mpp_entry, tmpp) { + struct socket *mp_so; + struct mptses *mpte; + + mp_so = mpp->mpp_socket; + VERIFY(mp_so != NULL); + mpte = mptompte(mpp); + VERIFY(mpte != NULL); + MPTE_LOCK(mpte); + VERIFY(mpp->mpp_flags & MPP_ATTACHED); + + if (mpp->mpp_flags & MPP_DEFUNCT) { + MPTE_UNLOCK(mpte); + continue; + } + + if (mptcp_timer_demux(mpte, now_msecs)) + resched_timer = 1; + MPTE_UNLOCK(mpte); + } + + return (resched_timer); +} + +void +mptcp_start_timer(struct mptcb *mp_tp, int timer_type) +{ + struct timeval now; + + microuptime(&now); + + MPT_LOCK_ASSERT_HELD(mp_tp); + + DTRACE_MPTCP2(start__timer, struct mptcb *, mp_tp, int, timer_type); + mptcplog((LOG_DEBUG, "%s %d\n", __func__, timer_type)); + + switch (timer_type) { + case MPTT_REXMT: + mp_tp->mpt_timer_vals |= MPTT_REXMT; + mp_tp->mpt_rxtstart = TIMEVAL_TO_HZ(now); + mp_tp->mpt_rxtshift = 0; + mp_tp->mpt_rtseq = mp_tp->mpt_sndnxt; + break; + case MPTT_TW: + mp_tp->mpt_timer_vals |= MPTT_TW; + mp_tp->mpt_timewait = TIMEVAL_TO_HZ(now); + break; + case MPTT_FASTCLOSE: + /* NO-OP */ + break; + default: + VERIFY(0); + /* NOTREACHED */ + } + mptcp_timer_sched(); +} + +void +mptcp_cancel_timer(struct mptcb *mp_tp, int timer_type) +{ + MPT_LOCK_ASSERT_HELD(mp_tp); + + DTRACE_MPTCP2(cancel__timer, struct mptcb *, mp_tp, int, timer_type); + mptcplog3((LOG_DEBUG, "%s %d\n", __func__, timer_type)); + + switch (timer_type) { + case MPTT_REXMT: + mp_tp->mpt_rxtstart = 0; + mp_tp->mpt_rxtshift = 0; + mp_tp->mpt_timer_vals = 0; + break; + case MPTT_TW: + /* NO-OP */ + break; + case MPTT_FASTCLOSE: + /* NO-OP */ + break; + default: + break; + } +} + +void +mptcp_cancel_all_timers(struct mptcb *mp_tp) +{ + mptcp_cancel_timer(mp_tp, MPTT_REXMT); + mptcp_cancel_timer(mp_tp, MPTT_TW); + mptcp_cancel_timer(mp_tp, MPTT_FASTCLOSE); +} diff --git a/bsd/netinet/mptcp_timer.h b/bsd/netinet/mptcp_timer.h new file mode 100644 index 000000000..b5dac77a6 --- /dev/null +++ b/bsd/netinet/mptcp_timer.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _NETINET_MPTCP_TIMER_H_ +#define _NETINET_MPTCP_TIMER_H_ + +#ifdef BSD_KERNEL_PRIVATE + +#define MPT_REXMT 0 /* retransmit */ +#define MPT_TIMEWAIT 1 /* timewait timer */ + +__BEGIN_DECLS +extern uint32_t mptcp_timer(struct mppcbinfo *); +extern void mptcp_start_timer(struct mptcb *, int); +extern void mptcp_cancel_timer(struct mptcb *, int); +extern void mptcp_cancel_all_timers(struct mptcb *); +__END_DECLS + +#endif /* BSD_KERNEL_PRIVATE */ +#endif /* !_NETINET_MPTCP_TIMER_H_ */ diff --git a/bsd/netinet/mptcp_usrreq.c b/bsd/netinet/mptcp_usrreq.c new file mode 100644 index 000000000..d4ea19cd1 --- /dev/null +++ b/bsd/netinet/mptcp_usrreq.c @@ -0,0 +1,1954 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static int mptcp_usr_attach(struct socket *, int, struct proc *); +static int mptcp_usr_detach(struct socket *); +static int mptcp_attach(struct socket *, struct proc *); +static int mptcp_detach(struct socket *, struct mppcb *); +static int mptcp_connectx(struct mptses *, struct sockaddr_list **, + struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *, + uint32_t, void *, uint32_t); +static int mptcp_usr_connectx(struct socket *, struct sockaddr_list **, + struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *, + uint32_t, void *, uint32_t); +static int mptcp_getassocids(struct mptses *, uint32_t *, user_addr_t); +static int mptcp_getconnids(struct mptses *, associd_t, uint32_t *, + user_addr_t); +static int mptcp_getconninfo(struct mptses *, connid_t *, uint32_t *, + uint32_t *, int32_t *, user_addr_t, socklen_t *, user_addr_t, socklen_t *, + uint32_t *, user_addr_t, uint32_t *); +static int mptcp_usr_control(struct socket *, u_long, caddr_t, struct ifnet *, + struct proc *); +static int mptcp_disconnectx(struct mptses *, associd_t, connid_t); +static int mptcp_usr_disconnectx(struct socket *, associd_t, connid_t); +static struct mptses *mptcp_usrclosed(struct mptses *); +static int mptcp_usr_peeloff(struct socket *, associd_t, struct socket **); +static int mptcp_peeloff(struct mptses *, associd_t, struct socket **); +static int mptcp_usr_rcvd(struct socket *, int); +static int mptcp_usr_send(struct socket *, int, struct mbuf *, + struct sockaddr *, struct mbuf *, struct proc *); +static int mptcp_usr_shutdown(struct socket *); +static int mptcp_uiotombuf(struct uio *, int, int, uint32_t, struct mbuf **); +static int mptcp_usr_sosend(struct socket *, struct sockaddr *, struct uio *, + struct mbuf *, struct mbuf *, int); +static int mptcp_usr_socheckopt(struct socket *, struct sockopt *); +static int mptcp_setopt_apply(struct mptses *, struct mptopt *); +static int mptcp_setopt(struct mptses *, struct sockopt *); +static int mptcp_getopt(struct mptses *, struct sockopt *); +static int mptcp_default_tcp_optval(struct mptses *, struct sockopt *, int *); +static void mptcp_connorder_helper(struct mptsub *mpts); + +struct pr_usrreqs mptcp_usrreqs = { + .pru_attach = mptcp_usr_attach, + .pru_connectx = mptcp_usr_connectx, + .pru_control = mptcp_usr_control, + .pru_detach = mptcp_usr_detach, + .pru_disconnectx = mptcp_usr_disconnectx, + .pru_peeloff = mptcp_usr_peeloff, + .pru_rcvd = mptcp_usr_rcvd, + .pru_send = mptcp_usr_send, + .pru_shutdown = mptcp_usr_shutdown, + .pru_sosend = mptcp_usr_sosend, + .pru_soreceive = soreceive, + .pru_socheckopt = mptcp_usr_socheckopt, +}; + +/* + * Attaches an MPTCP control block to a socket. + */ +static int +mptcp_usr_attach(struct socket *mp_so, int proto, struct proc *p) +{ +#pragma unused(proto) + int error; + + VERIFY(sotomppcb(mp_so) == NULL); + + error = mptcp_attach(mp_so, p); + if (error != 0) + goto out; + /* + * XXX: adi@apple.com + * + * Might want to use a different SO_LINGER timeout than TCP's? + */ + if ((mp_so->so_options & SO_LINGER) && mp_so->so_linger == 0) + mp_so->so_linger = TCP_LINGERTIME * hz; +out: + return (error); +} + +/* + * Detaches an MPTCP control block from a socket. + */ +static int +mptcp_usr_detach(struct socket *mp_so) +{ + struct mppcb *mpp = sotomppcb(mp_so); + int error = 0; + + VERIFY(mpp != NULL); + VERIFY(mpp->mpp_socket != NULL); + + error = mptcp_detach(mp_so, mpp); + return (error); +} + +/* + * Attach MPTCP protocol to socket, allocating MP control block, + * MPTCP session, control block, buffer space, etc. + */ +static int +mptcp_attach(struct socket *mp_so, struct proc *p) +{ +#pragma unused(p) + struct mptses *mpte; + struct mptcb *mp_tp; + struct mppcb *mpp; + int error = 0; + + if (mp_so->so_snd.sb_hiwat == 0 || mp_so->so_rcv.sb_hiwat == 0) { + error = soreserve(mp_so, tcp_sendspace, MPTCP_RWIN_MAX); + if (error != 0) + goto out; + } + + /* + * MPTCP socket buffers cannot be compressed, due to the + * fact that each mbuf chained via m_next is a M_PKTHDR + * which carries some MPTCP metadata. + */ + mp_so->so_snd.sb_flags |= SB_NOCOMPRESS; + mp_so->so_rcv.sb_flags |= SB_NOCOMPRESS; + + /* Disable socket buffer auto-tuning. */ + mp_so->so_rcv.sb_flags &= ~SB_AUTOSIZE; + mp_so->so_snd.sb_flags &= ~SB_AUTOSIZE; + + if ((error = mp_pcballoc(mp_so, &mtcbinfo)) != 0) + goto out; + + mpp = sotomppcb(mp_so); + VERIFY(mpp != NULL); + + mpte = mptcp_sescreate(mp_so, mpp); + if (mpte == NULL) { + mp_pcbdetach(mpp); + error = ENOBUFS; + goto out; + } + mp_tp = mpte->mpte_mptcb; + VERIFY(mp_tp != NULL); + + MPT_LOCK(mp_tp); + mp_tp->mpt_state = MPTCPS_CLOSED; + MPT_UNLOCK(mp_tp); + +out: + return (error); +} + +/* + * Called when the socket layer loses its final reference to the socket; + * at this point, there is only one case in which we will keep things + * around: time wait. + */ +static int +mptcp_detach(struct socket *mp_so, struct mppcb *mpp) +{ + struct mptses *mpte; + struct mppcbinfo *mppi; + + VERIFY(mp_so->so_pcb == mpp); + VERIFY(mpp->mpp_socket == mp_so); + + mppi = mpp->mpp_pcbinfo; + VERIFY(mppi != NULL); + + mpte = &((struct mpp_mtp *)mpp)->mpp_ses; + VERIFY(mpte->mpte_mppcb == mpp); + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + + /* + * We are done with this MPTCP socket (it has been closed); + * trigger all subflows to be disconnected, if not already, + * by initiating the PCB detach sequence (SOF_PCBCLEARING + * will be set.) + */ + mp_pcbdetach(mpp); + + (void) mptcp_disconnectx(mpte, ASSOCID_ALL, CONNID_ALL); + + /* + * XXX: adi@apple.com + * + * Here, we would want to handle time wait state. + */ + + return (0); +} + +/* + * Common subroutine to open a MPTCP connection to one of the remote hosts + * specified by dst_sl. This includes allocating and establishing a + * subflow TCP connection, either initially to establish MPTCP connection, + * or to join an existing one. Returns a connection handle upon success. + */ +static int +mptcp_connectx(struct mptses *mpte, struct sockaddr_list **src_sl, + struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, + associd_t aid, connid_t *pcid, uint32_t flags, void *arg, + uint32_t arglen) +{ +#pragma unused(p, aid, flags, arg, arglen) + struct mptsub *mpts; + struct socket *mp_so; + int error = 0; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + mp_so = mpte->mpte_mppcb->mpp_socket; + + VERIFY(dst_sl != NULL && *dst_sl != NULL); + VERIFY(pcid != NULL); + + mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so))); + DTRACE_MPTCP3(connectx, struct mptses *, mpte, associd_t, aid, + struct socket *, mp_so); + + mpts = mptcp_subflow_alloc(M_WAITOK); + if (mpts == NULL) { + error = ENOBUFS; + goto out; + } + MPTS_ADDREF(mpts); /* for this routine */ + + if (src_sl != NULL) { + mpts->mpts_src_sl = *src_sl; + *src_sl = NULL; + } + mpts->mpts_dst_sl = *dst_sl; + *dst_sl = NULL; + + error = mptcp_subflow_add(mpte, mpts, p, ifscope); + if (error == 0 && pcid != NULL) + *pcid = mpts->mpts_connid; + +out: + if (mpts != NULL) { + if ((error != 0) && (error != EWOULDBLOCK)) { + MPTS_LOCK(mpts); + if (mpts->mpts_flags & MPTSF_ATTACHED) { + MPTS_UNLOCK(mpts); + MPTS_REMREF(mpts); + mptcp_subflow_del(mpte, mpts, TRUE); + return (error); + } + MPTS_UNLOCK(mpts); + } + MPTS_REMREF(mpts); + } + + return (error); +} + +/* + * User-protocol pru_connectx callback. + */ +static int +mptcp_usr_connectx(struct socket *mp_so, struct sockaddr_list **src_sl, + struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, + associd_t aid, connid_t *pcid, uint32_t flags, void *arg, + uint32_t arglen) +{ +#pragma unused(arg, arglen) + struct mppcb *mpp = sotomppcb(mp_so); + struct mptses *mpte; + int error = 0; + + if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) { + error = EINVAL; + goto out; + } + mpte = mptompte(mpp); + VERIFY(mpte != NULL); + + error = mptcp_connectx(mpte, src_sl, dst_sl, p, ifscope, + aid, pcid, flags, arg, arglen); +out: + return (error); +} + +/* + * Handle SIOCGASSOCIDS ioctl for PF_MULTIPATH domain. + */ +static int +mptcp_getassocids(struct mptses *mpte, uint32_t *cnt, user_addr_t aidp) +{ + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + + /* MPTCP has at most 1 association */ + *cnt = (mpte->mpte_associd != ASSOCID_ANY) ? 1 : 0; + + /* just asking how many there are? */ + if (aidp == USER_ADDR_NULL) + return (0); + + return (copyout(&mpte->mpte_associd, aidp, + sizeof (mpte->mpte_associd))); +} + +/* + * Handle SIOCGCONNIDS ioctl for PF_MULTIPATH domain. + */ +static int +mptcp_getconnids(struct mptses *mpte, associd_t aid, uint32_t *cnt, + user_addr_t cidp) +{ + struct mptsub *mpts; + int error = 0; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + + if (aid != ASSOCID_ANY && aid != ASSOCID_ALL && + aid != mpte->mpte_associd) + return (EINVAL); + + *cnt = mpte->mpte_numflows; + + /* just asking how many there are? */ + if (cidp == USER_ADDR_NULL) + return (0); + + TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { + if ((error = copyout(&mpts->mpts_connid, cidp, + sizeof (mpts->mpts_connid))) != 0) + break; + + cidp += sizeof (mpts->mpts_connid); + } + + return (error); +} + +/* + * Handle SIOCGCONNINFO ioctl for PF_MULTIPATH domain. + */ +static int +mptcp_getconninfo(struct mptses *mpte, connid_t *cid, uint32_t *flags, + uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len, + user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type, + user_addr_t aux_data, uint32_t *aux_len) +{ +#pragma unused(aux_data) + struct sockaddr_entry *se; + struct ifnet *ifp = NULL; + struct mptsub *mpts; + int error = 0; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + + if (*cid == CONNID_ALL) + return (EINVAL); + + TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { + if (mpts->mpts_connid == *cid || *cid == CONNID_ANY) + break; + } + if (mpts == NULL) + return ((*cid == CONNID_ANY) ? ENXIO : EINVAL); + + MPTS_LOCK(mpts); + ifp = mpts->mpts_outif; + *cid = mpts->mpts_connid; + *ifindex = ((ifp != NULL) ? ifp->if_index : 0); + *soerror = mpts->mpts_soerror; + *flags = 0; + if (mpts->mpts_flags & MPTSF_CONNECTING) + *flags |= CIF_CONNECTING; + if (mpts->mpts_flags & MPTSF_CONNECTED) + *flags |= CIF_CONNECTED; + if (mpts->mpts_flags & MPTSF_DISCONNECTING) + *flags |= CIF_DISCONNECTING; + if (mpts->mpts_flags & MPTSF_DISCONNECTED) + *flags |= CIF_DISCONNECTED; + if (mpts->mpts_flags & MPTSF_BOUND_IF) + *flags |= CIF_BOUND_IF; + if (mpts->mpts_flags & MPTSF_BOUND_IP) + *flags |= CIF_BOUND_IP; + if (mpts->mpts_flags & MPTSF_BOUND_PORT) + *flags |= CIF_BOUND_PORT; + if (mpts->mpts_flags & MPTSF_PREFERRED) + *flags |= CIF_PREFERRED; + if (mpts->mpts_flags & MPTSF_MP_CAPABLE) + *flags |= CIF_MP_CAPABLE; + if (mpts->mpts_flags & MPTSF_MP_DEGRADED) + *flags |= CIF_MP_DEGRADED; + if (mpts->mpts_flags & MPTSF_MP_READY) + *flags |= CIF_MP_READY; + if (mpts->mpts_flags & MPTSF_ACTIVE) + *flags |= CIF_MP_ACTIVE; + + VERIFY(mpts->mpts_src_sl != NULL); + se = TAILQ_FIRST(&mpts->mpts_src_sl->sl_head); + VERIFY(se != NULL && se->se_addr != NULL); + *src_len = se->se_addr->sa_len; + if (src != USER_ADDR_NULL) { + error = copyout(se->se_addr, src, se->se_addr->sa_len); + if (error != 0) + goto out; + } + + VERIFY(mpts->mpts_dst_sl != NULL); + se = TAILQ_FIRST(&mpts->mpts_dst_sl->sl_head); + VERIFY(se != NULL && se->se_addr != NULL); + *dst_len = se->se_addr->sa_len; + if (dst != USER_ADDR_NULL) { + error = copyout(se->se_addr, dst, se->se_addr->sa_len); + if (error != 0) + goto out; + } + + *aux_type = 0; + *aux_len = 0; + if (mpts->mpts_socket != NULL) { + struct conninfo_tcp tcp_ci; + + *aux_type = CIAUX_TCP; + *aux_len = sizeof (tcp_ci); + + if (aux_data != USER_ADDR_NULL) { + struct socket *so = mpts->mpts_socket; + + VERIFY(SOCK_PROTO(so) == IPPROTO_TCP); + bzero(&tcp_ci, sizeof (tcp_ci)); + socket_lock(so, 0); + tcp_getconninfo(so, &tcp_ci); + socket_unlock(so, 0); + error = copyout(&tcp_ci, aux_data, sizeof (tcp_ci)); + if (error != 0) + goto out; + } + } +out: + MPTS_UNLOCK(mpts); + return (error); +} + +/* + * Handle SIOCSCONNORDER + */ +int +mptcp_setconnorder(struct mptses *mpte, connid_t cid, uint32_t rank) +{ + struct mptsub *mpts, *mpts1; + int error = 0; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + mptcplog((LOG_DEBUG, "%s: cid %d rank %d \n", __func__, cid, rank)); + + if (cid == CONNID_ANY || cid == CONNID_ALL) { + error = EINVAL; + goto out; + } + + TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { + if (mpts->mpts_connid == cid) + break; + } + if (mpts == NULL) { + error = ENXIO; + goto out; + } + + if (rank == 0 || rank > 1) { + /* + * If rank is 0, determine whether this should be the + * primary or backup subflow, depending on what we have. + * + * Otherwise, if greater than 0, make it a backup flow. + */ + TAILQ_FOREACH(mpts1, &mpte->mpte_subflows, mpts_entry) { + MPTS_LOCK(mpts1); + if (mpts1->mpts_flags & MPTSF_PREFERRED) { + MPTS_UNLOCK(mpts1); + break; + } + MPTS_UNLOCK(mpts1); + } + + MPTS_LOCK(mpts); + mpts->mpts_flags &= ~MPTSF_PREFERRED; + mpts->mpts_rank = rank; + if (mpts1 != NULL && mpts != mpts1) { + /* preferred subflow found; set rank as necessary */ + if (rank == 0) + mpts->mpts_rank = (mpts1->mpts_rank + 1); + } else if (rank == 0) { + /* no preferred one found; promote this */ + rank = 1; + } + MPTS_UNLOCK(mpts); + } + + if (rank == 1) { + /* + * If rank is 1, promote this subflow to be preferred. + */ + TAILQ_FOREACH(mpts1, &mpte->mpte_subflows, mpts_entry) { + MPTS_LOCK(mpts1); + if (mpts1 != mpts && + (mpts1->mpts_flags & MPTSF_PREFERRED)) { + mpts1->mpts_flags &= ~MPTSF_PREFERRED; + if (mpte->mpte_nummpcapflows > 1) + mptcp_connorder_helper(mpts1); + } else if (mpts1 == mpts) { + mpts1->mpts_rank = 1; + if (mpts1->mpts_flags & MPTSF_MP_CAPABLE) { + mpts1->mpts_flags |= MPTSF_PREFERRED; + if (mpte->mpte_nummpcapflows > 1) + mptcp_connorder_helper(mpts1); + } + } + MPTS_UNLOCK(mpts1); + } + } + +out: + return (error); +} + +static void +mptcp_connorder_helper(struct mptsub *mpts) +{ + struct socket *so = mpts->mpts_socket; + struct tcpcb *tp = NULL; + + socket_lock(so, 0); + + tp = intotcpcb(sotoinpcb(so)); + tp->t_mpflags |= TMPF_SND_MPPRIO; + if (mpts->mpts_flags & MPTSF_PREFERRED) + tp->t_mpflags &= ~TMPF_BACKUP_PATH; + else + tp->t_mpflags |= TMPF_BACKUP_PATH; + mptcplog((LOG_DEBUG, "%s cid %d flags %x", __func__, + mpts->mpts_connid, mpts->mpts_flags)); + socket_unlock(so, 0); + +} + +/* + * Handle SIOCSGONNORDER + */ +int +mptcp_getconnorder(struct mptses *mpte, connid_t cid, uint32_t *rank) +{ + struct mptsub *mpts; + int error = 0; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + VERIFY(rank != NULL); + *rank = 0; + + if (cid == CONNID_ANY || cid == CONNID_ALL) { + error = EINVAL; + goto out; + } + + TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { + if (mpts->mpts_connid == cid) + break; + } + if (mpts == NULL) { + error = ENXIO; + goto out; + } + + MPTS_LOCK(mpts); + *rank = mpts->mpts_rank; + MPTS_UNLOCK(mpts); +out: + return (error); +} + +/* + * User-protocol pru_control callback. + */ +static int +mptcp_usr_control(struct socket *mp_so, u_long cmd, caddr_t data, + struct ifnet *ifp, struct proc *p) +{ +#pragma unused(ifp, p) + struct mppcb *mpp = sotomppcb(mp_so); + struct mptses *mpte; + int error = 0; + + if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) { + error = EINVAL; + goto out; + } + mpte = mptompte(mpp); + VERIFY(mpte != NULL); + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + + switch (cmd) { + case SIOCGASSOCIDS32: { /* struct so_aidreq32 */ + struct so_aidreq32 aidr; + bcopy(data, &aidr, sizeof (aidr)); + error = mptcp_getassocids(mpte, &aidr.sar_cnt, + aidr.sar_aidp); + if (error == 0) + bcopy(&aidr, data, sizeof (aidr)); + break; + } + + case SIOCGASSOCIDS64: { /* struct so_aidreq64 */ + struct so_aidreq64 aidr; + bcopy(data, &aidr, sizeof (aidr)); + error = mptcp_getassocids(mpte, &aidr.sar_cnt, + aidr.sar_aidp); + if (error == 0) + bcopy(&aidr, data, sizeof (aidr)); + break; + } + + case SIOCGCONNIDS32: { /* struct so_cidreq32 */ + struct so_cidreq32 cidr; + bcopy(data, &cidr, sizeof (cidr)); + error = mptcp_getconnids(mpte, cidr.scr_aid, &cidr.scr_cnt, + cidr.scr_cidp); + if (error == 0) + bcopy(&cidr, data, sizeof (cidr)); + break; + } + + case SIOCGCONNIDS64: { /* struct so_cidreq64 */ + struct so_cidreq64 cidr; + bcopy(data, &cidr, sizeof (cidr)); + error = mptcp_getconnids(mpte, cidr.scr_aid, &cidr.scr_cnt, + cidr.scr_cidp); + if (error == 0) + bcopy(&cidr, data, sizeof (cidr)); + break; + } + + case SIOCGCONNINFO32: { /* struct so_cinforeq32 */ + struct so_cinforeq32 cifr; + bcopy(data, &cifr, sizeof (cifr)); + error = mptcp_getconninfo(mpte, &cifr.scir_cid, + &cifr.scir_flags, &cifr.scir_ifindex, &cifr.scir_error, + cifr.scir_src, &cifr.scir_src_len, cifr.scir_dst, + &cifr.scir_dst_len, &cifr.scir_aux_type, cifr.scir_aux_data, + &cifr.scir_aux_len); + if (error == 0) + bcopy(&cifr, data, sizeof (cifr)); + break; + } + + case SIOCGCONNINFO64: { /* struct so_cinforeq64 */ + struct so_cinforeq64 cifr; + bcopy(data, &cifr, sizeof (cifr)); + error = mptcp_getconninfo(mpte, &cifr.scir_cid, + &cifr.scir_flags, &cifr.scir_ifindex, &cifr.scir_error, + cifr.scir_src, &cifr.scir_src_len, cifr.scir_dst, + &cifr.scir_dst_len, &cifr.scir_aux_type, cifr.scir_aux_data, + &cifr.scir_aux_len); + if (error == 0) + bcopy(&cifr, data, sizeof (cifr)); + break; + } + + case SIOCSCONNORDER: { /* struct so_cordreq */ + struct so_cordreq cor; + bcopy(data, &cor, sizeof (cor)); + error = mptcp_setconnorder(mpte, cor.sco_cid, cor.sco_rank); + if (error == 0) + bcopy(&cor, data, sizeof (cor)); + break; + } + + case SIOCGCONNORDER: { /* struct so_cordreq */ + struct so_cordreq cor; + bcopy(data, &cor, sizeof (cor)); + error = mptcp_getconnorder(mpte, cor.sco_cid, &cor.sco_rank); + if (error == 0) + bcopy(&cor, data, sizeof (cor)); + break; + } + + default: + error = EOPNOTSUPP; + break; + } +out: + return (error); +} + +/* + * Initiate a disconnect. MPTCP-level disconnection is specified by + * CONNID_{ANY,ALL}. Otherwise, selectively disconnect a subflow + * connection while keeping the MPTCP-level connection (association). + */ +static int +mptcp_disconnectx(struct mptses *mpte, associd_t aid, connid_t cid) +{ + struct mptsub *mpts; + struct socket *mp_so; + struct mptcb *mp_tp; + int error = 0; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + + mp_so = mpte->mpte_mppcb->mpp_socket; + mp_tp = mpte->mpte_mptcb; + + mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx aid %d cid %d\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), aid, cid)); + DTRACE_MPTCP5(disconnectx, struct mptses *, mpte, associd_t, aid, + connid_t, cid, struct socket *, mp_so, struct mptcb *, mp_tp); + + VERIFY(aid == ASSOCID_ANY || aid == ASSOCID_ALL || + aid == mpte->mpte_associd); + + /* terminate the association? */ + if (cid == CONNID_ANY || cid == CONNID_ALL) { + /* if we're not detached, go thru socket state checks */ + if (!(mp_so->so_flags & SOF_PCBCLEARING)) { + if (!(mp_so->so_state & (SS_ISCONNECTED| + SS_ISCONNECTING))) { + error = ENOTCONN; + goto out; + } + if (mp_so->so_state & SS_ISDISCONNECTING) { + error = EALREADY; + goto out; + } + } + MPT_LOCK(mp_tp); + mptcp_cancel_all_timers(mp_tp); + if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) { + (void) mptcp_close(mpte, mp_tp); + MPT_UNLOCK(mp_tp); + } else if ((mp_so->so_options & SO_LINGER) && + mp_so->so_linger == 0) { + (void) mptcp_drop(mpte, mp_tp, 0); + MPT_UNLOCK(mp_tp); + } else { + MPT_UNLOCK(mp_tp); + soisdisconnecting(mp_so); + sbflush(&mp_so->so_rcv); + if (mptcp_usrclosed(mpte) != NULL) + (void) mptcp_output(mpte); + } + } else { + TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { + if (mpts->mpts_connid != cid) + continue; + MPTS_LOCK(mpts); + mptcp_subflow_disconnect(mpte, mpts, FALSE); + MPTS_UNLOCK(mpts); + break; + } + + if (mpts == NULL) { + error = EINVAL; + goto out; + } + } + + if (error == 0) + mptcp_thread_signal(mpte); + + if ((mp_so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) == + (SS_CANTRCVMORE | SS_CANTSENDMORE)) { + /* the socket has been shutdown, no more sockopt's */ + mptcp_flush_sopts(mpte); + } + +out: + return (error); +} + +/* + * User-protocol pru_disconnectx callback. + */ +static int +mptcp_usr_disconnectx(struct socket *mp_so, associd_t aid, connid_t cid) +{ + struct mppcb *mpp = sotomppcb(mp_so); + struct mptses *mpte; + int error = 0; + + if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) { + error = EINVAL; + goto out; + } + mpte = mptompte(mpp); + VERIFY(mpte != NULL); + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + + if (aid != ASSOCID_ANY && aid != ASSOCID_ALL && + aid != mpte->mpte_associd) { + error = EINVAL; + goto out; + } + + error = mptcp_disconnectx(mpte, aid, cid); +out: + return (error); +} + +/* + * User issued close, and wish to trail thru shutdown states. + */ +static struct mptses * +mptcp_usrclosed(struct mptses *mpte) +{ + struct socket *mp_so; + struct mptcb *mp_tp; + struct mptsub *mpts; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + mp_so = mpte->mpte_mppcb->mpp_socket; + mp_tp = mpte->mpte_mptcb; + + MPT_LOCK(mp_tp); + mptcp_close_fsm(mp_tp, MPCE_CLOSE); + + if (mp_tp->mpt_state == TCPS_CLOSED) { + mpte = mptcp_close(mpte, mp_tp); + MPT_UNLOCK(mp_tp); + } else if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) { + MPT_UNLOCK(mp_tp); + soisdisconnected(mp_so); + } else { + mp_tp->mpt_sndmax += 1; /* adjust for Data FIN */ + MPT_UNLOCK(mp_tp); + + TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { + MPTS_LOCK(mpts); + mptcp_subflow_disconnect(mpte, mpts, FALSE); + MPTS_UNLOCK(mpts); + } + } + /* + * XXX: adi@apple.com + * + * Do we need to handle time wait specially here? We need to handle + * the case where MPTCP has been established, but we have not usable + * subflow to use. Do we want to wait a while before forcibly + * tearing this MPTCP down, in case we have one or more subflows + * that are flow controlled? + */ + + return (mpte); +} + +/* + * User-protocol pru_peeloff callback. + */ +static int +mptcp_usr_peeloff(struct socket *mp_so, associd_t aid, struct socket **psop) +{ + struct mppcb *mpp = sotomppcb(mp_so); + struct mptses *mpte; + int error = 0; + + VERIFY(psop != NULL); + + if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) { + error = EINVAL; + goto out; + } + mpte = mptompte(mpp); + VERIFY(mpte != NULL); + + error = mptcp_peeloff(mpte, aid, psop); +out: + return (error); +} + +/* + * Transform a previously connected TCP subflow connection which has + * failed to negotiate MPTCP to its own socket which can be externalized + * with a file descriptor. Valid only when the MPTCP socket is not + * yet associated (MPTCP-level connection has not been established.) + */ +static int +mptcp_peeloff(struct mptses *mpte, associd_t aid, struct socket **psop) +{ + struct socket *so = NULL, *mp_so; + struct mptsub *mpts; + int error = 0; + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + mp_so = mpte->mpte_mppcb->mpp_socket; + + VERIFY(psop != NULL); + *psop = NULL; + + DTRACE_MPTCP3(peeloff, struct mptses *, mpte, associd_t, aid, + struct socket *, mp_so); + + /* peeloff cannot happen after an association is established */ + if (mpte->mpte_associd != ASSOCID_ANY) { + error = EINVAL; + goto out; + } + + if (aid != ASSOCID_ANY && aid != ASSOCID_ALL) { + error = EINVAL; + goto out; + } + + TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { + MPTS_LOCK(mpts); + if (mpts->mpts_flags & MPTSF_MP_CAPABLE) { + panic("%s: so %p is MPTCP capable but mp_so %p " + "aid is %d\n", __func__, so, mp_so, + mpte->mpte_associd); + /* NOTREACHED */ + } + MPTS_ADDREF_LOCKED(mpts); /* for us */ + so = mpts->mpts_socket; + VERIFY(so != NULL); + /* + * This subflow socket is about to be externalized; make it + * appear as if it has the same properties as the MPTCP socket, + * undo what's done earlier in mptcp_subflow_add(). + */ + mptcp_subflow_sopeeloff(mpte, mpts, so); + MPTS_UNLOCK(mpts); + + mptcp_subflow_del(mpte, mpts, FALSE); + MPTS_REMREF(mpts); /* ours */ + /* + * XXX adi@apple.com + * + * Here we need to make sure the subflow socket is not + * flow controlled; need to clear both INP_FLOW_CONTROLLED + * and INP_FLOW_SUSPENDED on the subflow socket, since + * we will no longer be monitoring its events. + */ + break; + } + + if (so == NULL) { + error = EINVAL; + goto out; + } + *psop = so; + + mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so))); +out: + return (error); +} + +/* + * After a receive, possible send some update to peer. + */ +static int +mptcp_usr_rcvd(struct socket *mp_so, int flags) +{ +#pragma unused(flags) + struct mppcb *mpp = sotomppcb(mp_so); + struct mptses *mpte; + int error = 0; + + if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) { + error = EINVAL; + goto out; + } + mpte = mptompte(mpp); + VERIFY(mpte != NULL); + + error = mptcp_output(mpte); +out: + return (error); +} + +/* + * Do a send by putting data in the output queue. + */ +static int +mptcp_usr_send(struct socket *mp_so, int prus_flags, struct mbuf *m, + struct sockaddr *nam, struct mbuf *control, struct proc *p) +{ +#pragma unused(nam, p) + struct mppcb *mpp = sotomppcb(mp_so); + struct mptses *mpte; + int error = 0; + + if (prus_flags & (PRUS_OOB|PRUS_EOF)) { + error = EOPNOTSUPP; + goto out; + } + + if (nam != NULL) { + error = EOPNOTSUPP; + goto out; + } + + if (control != NULL && control->m_len != 0) { + error = EOPNOTSUPP; + goto out; + } + + if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) { + error = ECONNRESET; + goto out; + } + mpte = mptompte(mpp); + VERIFY(mpte != NULL); + + if (!(mp_so->so_state & SS_ISCONNECTED)) { + error = ENOTCONN; + goto out; + } + + mptcp_insert_dsn(mpp, m); + VERIFY(mp_so->so_snd.sb_flags & SB_NOCOMPRESS); + (void) sbappendstream(&mp_so->so_snd, m); + m = NULL; + + if (mpte != NULL) { + /* + * XXX: adi@apple.com + * + * PRUS_MORETOCOME could be set, but we don't check it now. + */ + error = mptcp_output(mpte); + } + +out: + if (error) { + if (m != NULL) + m_freem(m); + if (control != NULL) + m_freem(control); + } + return (error); +} + +/* + * Mark the MPTCP connection as being incapable of further output. + */ +static int +mptcp_usr_shutdown(struct socket *mp_so) +{ + struct mppcb *mpp = sotomppcb(mp_so); + struct mptses *mpte; + int error = 0; + + if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) { + error = EINVAL; + goto out; + } + mpte = mptompte(mpp); + VERIFY(mpte != NULL); + + socantsendmore(mp_so); + + mpte = mptcp_usrclosed(mpte); + if (mpte != NULL) + error = mptcp_output(mpte); +out: + return (error); +} + +/* + * Copy the contents of uio into a properly sized mbuf chain. + */ +static int +mptcp_uiotombuf(struct uio *uio, int how, int space, uint32_t align, + struct mbuf **top) +{ + struct mbuf *m, *mb, *nm = NULL, *mtail = NULL; + user_ssize_t resid, tot, len, progress; /* must be user_ssize_t */ + int error; + + VERIFY(top != NULL && *top == NULL); + + /* + * space can be zero or an arbitrary large value bound by + * the total data supplied by the uio. + */ + resid = uio_resid(uio); + if (space > 0) + tot = imin(resid, space); + else + tot = resid; + + /* + * The smallest unit is a single mbuf with pkthdr. + * We can't align past it. + */ + if (align >= MHLEN) + return (EINVAL); + + /* + * Give us the full allocation or nothing. + * If space is zero return the smallest empty mbuf. + */ + if ((len = tot + align) == 0) + len = 1; + + /* Loop and append maximum sized mbufs to the chain tail. */ + while (len > 0) { + uint32_t m_needed = 1; + + if (njcl > 0 && len > MBIGCLBYTES) + mb = m_getpackets_internal(&m_needed, 1, + how, 1, M16KCLBYTES); + else if (len > MCLBYTES) + mb = m_getpackets_internal(&m_needed, 1, + how, 1, MBIGCLBYTES); + else if (len >= (signed)MINCLSIZE) + mb = m_getpackets_internal(&m_needed, 1, + how, 1, MCLBYTES); + else + mb = m_gethdr(how, MT_DATA); + + /* Fail the whole operation if one mbuf can't be allocated. */ + if (mb == NULL) { + if (nm != NULL) + m_freem(nm); + return (ENOBUFS); + } + + /* Book keeping. */ + VERIFY(mb->m_flags & M_PKTHDR); + len -= ((mb->m_flags & M_EXT) ? mb->m_ext.ext_size : MHLEN); + if (mtail != NULL) + mtail->m_next = mb; + else + nm = mb; + mtail = mb; + } + + m = nm; + m->m_data += align; + + progress = 0; + /* Fill all mbufs with uio data and update header information. */ + for (mb = m; mb != NULL; mb = mb->m_next) { + len = imin(M_TRAILINGSPACE(mb), tot - progress); + + error = uiomove(mtod(mb, char *), len, uio); + if (error != 0) { + m_freem(m); + return (error); + } + + /* each mbuf is M_PKTHDR chained via m_next */ + mb->m_len = len; + mb->m_pkthdr.len = len; + + progress += len; + } + VERIFY(progress == tot); + *top = m; + return (0); +} + +/* + * MPTCP socket protocol-user socket send routine, derived from sosend(). + */ +static int +mptcp_usr_sosend(struct socket *mp_so, struct sockaddr *addr, struct uio *uio, + struct mbuf *top, struct mbuf *control, int flags) +{ +#pragma unused(addr) + int32_t space; + user_ssize_t resid; + int error, sendflags; + struct proc *p = current_proc(); + int sblocked = 0; + + /* UIO is required for now, due to per-mbuf M_PKTHDR constrains */ + if (uio == NULL || top != NULL) { + error = EINVAL; + goto out; + } + resid = uio_resid(uio); + + socket_lock(mp_so, 1); + so_update_last_owner_locked(mp_so, p); + so_update_policy(mp_so); + + VERIFY(mp_so->so_type == SOCK_STREAM); + VERIFY(!(mp_so->so_flags & SOF_MP_SUBFLOW)); + + if ((flags & (MSG_OOB|MSG_DONTROUTE|MSG_HOLD|MSG_SEND|MSG_FLUSH)) || + (mp_so->so_flags & SOF_ENABLE_MSGS)) { + error = EOPNOTSUPP; + socket_unlock(mp_so, 1); + goto out; + } + + /* + * In theory resid should be unsigned. However, space must be + * signed, as it might be less than 0 if we over-committed, and we + * must use a signed comparison of space and resid. On the other + * hand, a negative resid causes us to loop sending 0-length + * segments to the protocol. + */ + if (resid < 0 || (flags & MSG_EOR) || control != NULL) { + error = EINVAL; + socket_unlock(mp_so, 1); + goto out; + } + + OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd); + + do { + error = sosendcheck(mp_so, NULL, resid, 0, 0, flags, + &sblocked, NULL); + if (error != 0) + goto release; + + space = sbspace(&mp_so->so_snd); + do { + socket_unlock(mp_so, 0); + /* + * Copy the data from userland into an mbuf chain. + */ + error = mptcp_uiotombuf(uio, M_WAITOK, space, 0, &top); + if (error != 0) { + socket_lock(mp_so, 0); + goto release; + } + VERIFY(top != NULL); + space -= resid - uio_resid(uio); + resid = uio_resid(uio); + socket_lock(mp_so, 0); + + /* + * Compute flags here, for pru_send and NKEs. + */ + sendflags = (resid > 0 && space > 0) ? + PRUS_MORETOCOME : 0; + + /* + * Socket filter processing + */ + VERIFY(control == NULL); + error = sflt_data_out(mp_so, NULL, &top, &control, 0); + if (error != 0) { + if (error == EJUSTRETURN) { + error = 0; + top = NULL; + /* always free control if any */ + } + goto release; + } + if (control != NULL) { + m_freem(control); + control = NULL; + } + + /* + * Pass data to protocol. + */ + error = (*mp_so->so_proto->pr_usrreqs->pru_send) + (mp_so, sendflags, top, NULL, NULL, p); + + top = NULL; + if (error != 0) + goto release; + } while (resid != 0 && space > 0); + } while (resid != 0); + +release: + if (sblocked) + sbunlock(&mp_so->so_snd, FALSE); /* will unlock socket */ + else + socket_unlock(mp_so, 1); +out: + if (top != NULL) + m_freem(top); + if (control != NULL) + m_freem(control); + + return (error); +} + +/* + * Called to filter SOPT_{SET,GET} for SOL_SOCKET level socket options. + * This routine simply indicates to the caller whether or not to proceed + * further with the given socket option. This is invoked by sosetoptlock() + * and sogetoptlock(). + */ +static int +mptcp_usr_socheckopt(struct socket *mp_so, struct sockopt *sopt) +{ +#pragma unused(mp_so) + int error = 0; + + VERIFY(sopt->sopt_level == SOL_SOCKET); + + /* + * We could check for sopt_dir (set/get) here, but we'll just + * let the caller deal with it as appropriate; therefore the + * following is a superset of the socket options which we + * allow for set/get. + * + * XXX: adi@apple.com + * + * Need to consider the following cases: + * + * a. In the event peeloff(2) occurs on the subflow socket, + * we may want to issue those options which are now + * handled at the MP socket. In that case, we will need + * to record them in mptcp_setopt() so that they can + * be replayed during peeloff. + * + * b. Certain socket options don't have a clear definition + * on the expected behavior post connect(2). At the time + * those options are issued on the MP socket, there may + * be existing subflow sockets that are already connected. + */ + switch (sopt->sopt_name) { + case SO_LINGER: /* MP */ + case SO_LINGER_SEC: /* MP */ + case SO_TYPE: /* MP */ + case SO_NREAD: /* MP */ + case SO_NWRITE: /* MP */ + case SO_ERROR: /* MP */ + case SO_SNDBUF: /* MP */ + case SO_RCVBUF: /* MP */ + case SO_SNDLOWAT: /* MP */ + case SO_RCVLOWAT: /* MP */ + case SO_SNDTIMEO: /* MP */ + case SO_RCVTIMEO: /* MP */ + case SO_NKE: /* MP */ + case SO_NOSIGPIPE: /* MP */ + case SO_NOADDRERR: /* MP */ + case SO_LABEL: /* MP */ + case SO_PEERLABEL: /* MP */ + case SO_DEFUNCTOK: /* MP */ + case SO_ISDEFUNCT: /* MP */ + case SO_TRAFFIC_CLASS_DBG: /* MP */ + /* + * Tell the caller that these options are to be processed. + */ + break; + + case SO_DEBUG: /* MP + subflow */ + case SO_KEEPALIVE: /* MP + subflow */ + case SO_USELOOPBACK: /* MP + subflow */ + case SO_RANDOMPORT: /* MP + subflow */ + case SO_TRAFFIC_CLASS: /* MP + subflow */ + case SO_RECV_TRAFFIC_CLASS: /* MP + subflow */ + case SO_PRIVILEGED_TRAFFIC_CLASS: /* MP + subflow */ + case SO_RECV_ANYIF: /* MP + subflow */ + case SO_RESTRICTIONS: /* MP + subflow */ + case SO_FLUSH: /* MP + subflow */ + /* + * Tell the caller that these options are to be processed; + * these will also be recorded later by mptcp_setopt(). + * + * NOTE: Only support integer option value for now. + */ + if (sopt->sopt_valsize != sizeof (int)) + error = EINVAL; + break; + + default: + /* + * Tell the caller to stop immediately and return an error. + */ + error = ENOPROTOOPT; + break; + } + + return (error); +} + +/* + * Issue SOPT_SET for all MPTCP subflows (for integer option values.) + */ +static int +mptcp_setopt_apply(struct mptses *mpte, struct mptopt *mpo) +{ + struct socket *mp_so; + struct mptsub *mpts; + struct mptopt smpo; + int error = 0; + + /* just bail now if this isn't applicable to subflow sockets */ + if (!(mpo->mpo_flags & MPOF_SUBFLOW_OK)) { + error = ENOPROTOOPT; + goto out; + } + + /* + * Skip those that are handled internally; these options + * should not have been recorded and marked with the + * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case. + */ + if (mpo->mpo_level == SOL_SOCKET && + (mpo->mpo_name == SO_NOSIGPIPE || mpo->mpo_name == SO_NOADDRERR)) { + error = ENOPROTOOPT; + goto out; + } + + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + mp_so = mpte->mpte_mppcb->mpp_socket; + + /* + * Don't bother going further if there's no subflow; mark the option + * with MPOF_INTERIM so that we know whether or not to remove this + * option upon encountering an error while issuing it during subflow + * socket creation. + */ + if (mpte->mpte_numflows == 0) { + VERIFY(TAILQ_EMPTY(&mpte->mpte_subflows)); + mpo->mpo_flags |= MPOF_INTERIM; + /* return success */ + goto out; + } + + bzero(&smpo, sizeof (smpo)); + smpo.mpo_flags |= MPOF_SUBFLOW_OK; + smpo.mpo_level = mpo->mpo_level; + smpo.mpo_name = mpo->mpo_name; + + /* grab exisiting values in case we need to rollback */ + TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { + struct socket *so; + + MPTS_LOCK(mpts); + mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL|MPTSF_SOPT_INPROG); + mpts->mpts_oldintval = 0; + smpo.mpo_intval = 0; + VERIFY(mpts->mpts_socket != NULL); + so = mpts->mpts_socket; + socket_lock(so, 0); + if (mptcp_subflow_sogetopt(mpte, so, &smpo) == 0) { + mpts->mpts_flags |= MPTSF_SOPT_OLDVAL; + mpts->mpts_oldintval = smpo.mpo_intval; + } + socket_unlock(so, 0); + MPTS_UNLOCK(mpts); + } + + /* apply socket option */ + TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { + struct socket *so; + + MPTS_LOCK(mpts); + mpts->mpts_flags |= MPTSF_SOPT_INPROG; + VERIFY(mpts->mpts_socket != NULL); + so = mpts->mpts_socket; + socket_lock(so, 0); + error = mptcp_subflow_sosetopt(mpte, so, mpo); + socket_unlock(so, 0); + MPTS_UNLOCK(mpts); + if (error != 0) + break; + } + + /* cleanup, and rollback if needed */ + TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { + struct socket *so; + + MPTS_LOCK(mpts); + if (!(mpts->mpts_flags & MPTSF_SOPT_INPROG)) { + /* clear in case it's set */ + mpts->mpts_flags &= ~MPTSF_SOPT_OLDVAL; + mpts->mpts_oldintval = 0; + MPTS_UNLOCK(mpts); + continue; + } + if (!(mpts->mpts_flags & MPTSF_SOPT_OLDVAL)) { + mpts->mpts_flags &= ~MPTSF_SOPT_INPROG; + VERIFY(mpts->mpts_oldintval == 0); + MPTS_UNLOCK(mpts); + continue; + } + /* error during sosetopt, so roll it back */ + if (error != 0) { + VERIFY(mpts->mpts_socket != NULL); + so = mpts->mpts_socket; + socket_lock(so, 0); + smpo.mpo_intval = mpts->mpts_oldintval; + (void) mptcp_subflow_sosetopt(mpte, so, &smpo); + socket_unlock(so, 0); + } + mpts->mpts_oldintval = 0; + mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL|MPTSF_SOPT_INPROG); + MPTS_UNLOCK(mpts); + } + +out: + return (error); +} + +/* + * Handle SOPT_SET for socket options issued on MP socket. + */ +static int +mptcp_setopt(struct mptses *mpte, struct sockopt *sopt) +{ + int error = 0, optval, level, optname, rec = 1; + struct mptopt smpo, *mpo = NULL; + struct socket *mp_so; + char buf[32]; + + level = sopt->sopt_level; + optname = sopt->sopt_name; + + VERIFY(sopt->sopt_dir == SOPT_SET); + VERIFY(level == SOL_SOCKET || level == IPPROTO_TCP); + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + mp_so = mpte->mpte_mppcb->mpp_socket; + + /* + * Record socket options which are applicable to subflow sockets so + * that we can replay them for new ones; see mptcp_usr_socheckopt() + * for the list of eligible socket-level options. + */ + if (level == SOL_SOCKET) { + switch (optname) { + case SO_DEBUG: + case SO_KEEPALIVE: + case SO_USELOOPBACK: + case SO_RANDOMPORT: + case SO_TRAFFIC_CLASS: + case SO_RECV_TRAFFIC_CLASS: + case SO_PRIVILEGED_TRAFFIC_CLASS: + case SO_RECV_ANYIF: + case SO_RESTRICTIONS: + /* record it */ + break; + case SO_FLUSH: + /* don't record it */ + rec = 0; + break; + default: + /* nothing to do; just return success */ + goto out; + } + } else { + switch (optname) { + case TCP_NODELAY: + case TCP_RXT_FINDROP: + case TCP_KEEPALIVE: + case TCP_KEEPINTVL: + case TCP_KEEPCNT: + case TCP_CONNECTIONTIMEOUT: + case TCP_RXT_CONNDROPTIME: + case PERSIST_TIMEOUT: + /* eligible; record it */ + break; + default: + /* not eligible */ + error = ENOPROTOOPT; + goto out; + } + } + + if ((error = sooptcopyin(sopt, &optval, sizeof (optval), + sizeof (optval))) != 0) + goto out; + + if (rec) { + /* search for an existing one; if not found, allocate */ + if ((mpo = mptcp_sopt_find(mpte, sopt)) == NULL) + mpo = mptcp_sopt_alloc(M_WAITOK); + + if (mpo == NULL) { + error = ENOBUFS; + } else { + mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx sopt %s " + "val %d %s\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), + mptcp_sopt2str(level, optname, buf, + sizeof (buf)), optval, + (mpo->mpo_flags & MPOF_ATTACHED) ? + "updated" : "recorded")); + + /* initialize or update, as needed */ + mpo->mpo_intval = optval; + if (!(mpo->mpo_flags & MPOF_ATTACHED)) { + mpo->mpo_level = level; + mpo->mpo_name = optname; + mptcp_sopt_insert(mpte, mpo); + } + VERIFY(mpo->mpo_flags & MPOF_ATTACHED); + /* this can be issued on the subflow socket */ + mpo->mpo_flags |= MPOF_SUBFLOW_OK; + } + } else { + bzero(&smpo, sizeof (smpo)); + mpo = &smpo; + mpo->mpo_flags |= MPOF_SUBFLOW_OK; + mpo->mpo_level = level; + mpo->mpo_name = optname; + mpo->mpo_intval = optval; + } + VERIFY(mpo == NULL || error == 0); + + /* issue this socket option on existing subflows */ + if (error == 0) { + error = mptcp_setopt_apply(mpte, mpo); + if (error != 0 && (mpo->mpo_flags & MPOF_ATTACHED)) { + VERIFY(mpo != &smpo); + mptcp_sopt_remove(mpte, mpo); + mptcp_sopt_free(mpo); + } + if (mpo == &smpo) + mpo->mpo_flags &= ~MPOF_INTERIM; + } +out: + if (error == 0 && mpo != NULL) { + mptcplog((LOG_ERR, "%s: mp_so 0x%llx sopt %s val %d set %s\n", + __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), + mptcp_sopt2str(level, optname, buf, + sizeof (buf)), optval, (mpo->mpo_flags & MPOF_INTERIM) ? + "pending" : "successful")); + } else if (error != 0) { + mptcplog((LOG_ERR, "%s: mp_so 0x%llx sopt %s can't be issued " + "error %d\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mptcp_sopt2str(level, + optname, buf, sizeof (buf)), error)); + } + return (error); +} + +/* + * Handle SOPT_GET for socket options issued on MP socket. + */ +static int +mptcp_getopt(struct mptses *mpte, struct sockopt *sopt) +{ + int error = 0, optval; + + VERIFY(sopt->sopt_dir == SOPT_GET); + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + + /* + * We only handle SOPT_GET for TCP level socket options; we should + * not get here for socket level options since they are already + * handled at the socket layer. + */ + if (sopt->sopt_level != IPPROTO_TCP) { + error = ENOPROTOOPT; + goto out; + } + + switch (sopt->sopt_name) { + case TCP_NODELAY: + case TCP_RXT_FINDROP: + case TCP_KEEPALIVE: + case TCP_KEEPINTVL: + case TCP_KEEPCNT: + case TCP_CONNECTIONTIMEOUT: + case TCP_RXT_CONNDROPTIME: + case PERSIST_TIMEOUT: + /* eligible; get the default value just in case */ + error = mptcp_default_tcp_optval(mpte, sopt, &optval); + break; + default: + /* not eligible */ + error = ENOPROTOOPT; + break; + } + + /* + * Search for a previously-issued TCP level socket option and + * return the recorded option value. This assumes that the + * value did not get modified by the lower layer after it was + * issued at setsockopt(2) time. If not found, we'll return + * the default value obtained ealier. + */ + if (error == 0) { + struct mptopt *mpo; + + if ((mpo = mptcp_sopt_find(mpte, sopt)) != NULL) + optval = mpo->mpo_intval; + + error = sooptcopyout(sopt, &optval, sizeof (int)); + } +out: + return (error); +} + +/* + * Return default values for TCP socket options. Ideally we would query the + * subflow TCP socket, but that requires creating a subflow socket before + * connectx(2) time. To simplify things, just return the default values + * that we know of. + */ +static int +mptcp_default_tcp_optval(struct mptses *mpte, struct sockopt *sopt, int *optval) +{ + int error = 0; + + VERIFY(sopt->sopt_level == IPPROTO_TCP); + VERIFY(sopt->sopt_dir == SOPT_GET); + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + + /* try to do what tcp_newtcpcb() does */ + switch (sopt->sopt_name) { + case TCP_NODELAY: + case TCP_RXT_FINDROP: + case TCP_KEEPINTVL: + case TCP_KEEPCNT: + case TCP_CONNECTIONTIMEOUT: + case TCP_RXT_CONNDROPTIME: + *optval = 0; + break; + + case TCP_KEEPALIVE: + *optval = mptcp_subflow_keeptime; + break; + + case PERSIST_TIMEOUT: + *optval = tcp_max_persist_timeout; + break; + + default: + error = ENOPROTOOPT; + break; + } + return (error); +} + +/* + * MPTCP SOPT_{SET,GET} socket option handler, for options issued on the MP + * socket, at SOL_SOCKET and IPPROTO_TCP levels. The former is restricted + * to those that are allowed by mptcp_usr_socheckopt(). + */ +int +mptcp_ctloutput(struct socket *mp_so, struct sockopt *sopt) +{ + struct mppcb *mpp = sotomppcb(mp_so); + struct mptses *mpte; + int error = 0; + + if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) { + error = EINVAL; + goto out; + } + mpte = mptompte(mpp); + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + + /* we only handle socket and TCP-level socket options for MPTCP */ + if (sopt->sopt_level != SOL_SOCKET && sopt->sopt_level != IPPROTO_TCP) { + char buf[32]; + mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx sopt %s level not " + "handled\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), + mptcp_sopt2str(sopt->sopt_level, + sopt->sopt_name, buf, sizeof (buf)))); + error = EINVAL; + goto out; + } + + switch (sopt->sopt_dir) { + case SOPT_SET: + error = mptcp_setopt(mpte, sopt); + break; + + case SOPT_GET: + error = mptcp_getopt(mpte, sopt); + break; + } +out: + return (error); +} + +/* + * Return a string representation of + */ +const char * +mptcp_sopt2str(int level, int optname, char *dst, int size) +{ + char lbuf[32], obuf[32]; + const char *l = lbuf, *o = obuf; + + (void) snprintf(lbuf, sizeof (lbuf), "0x%x", level); + (void) snprintf(obuf, sizeof (obuf), "0x%x", optname); + + switch (level) { + case SOL_SOCKET: + l = "SOL_SOCKET"; + switch (optname) { + case SO_LINGER: + o = "SO_LINGER"; + break; + case SO_LINGER_SEC: + o = "SO_LINGER_SEC"; + break; + case SO_DEBUG: + o = "SO_DEBUG"; + break; + case SO_KEEPALIVE: + o = "SO_KEEPALIVE"; + break; + case SO_USELOOPBACK: + o = "SO_USELOOPBACK"; + break; + case SO_TYPE: + o = "SO_TYPE"; + break; + case SO_NREAD: + o = "SO_NREAD"; + break; + case SO_NWRITE: + o = "SO_NWRITE"; + break; + case SO_ERROR: + o = "SO_ERROR"; + break; + case SO_SNDBUF: + o = "SO_SNDBUF"; + break; + case SO_RCVBUF: + o = "SO_RCVBUF"; + break; + case SO_SNDLOWAT: + o = "SO_SNDLOWAT"; + break; + case SO_RCVLOWAT: + o = "SO_RCVLOWAT"; + break; + case SO_SNDTIMEO: + o = "SO_SNDTIMEO"; + break; + case SO_RCVTIMEO: + o = "SO_RCVTIMEO"; + break; + case SO_NKE: + o = "SO_NKE"; + break; + case SO_NOSIGPIPE: + o = "SO_NOSIGPIPE"; + break; + case SO_NOADDRERR: + o = "SO_NOADDRERR"; + break; + case SO_RESTRICTIONS: + o = "SO_RESTRICTIONS"; + break; + case SO_LABEL: + o = "SO_LABEL"; + break; + case SO_PEERLABEL: + o = "SO_PEERLABEL"; + break; + case SO_RANDOMPORT: + o = "SO_RANDOMPORT"; + break; + case SO_TRAFFIC_CLASS: + o = "SO_TRAFFIC_CLASS"; + break; + case SO_RECV_TRAFFIC_CLASS: + o = "SO_RECV_TRAFFIC_CLASS"; + break; + case SO_TRAFFIC_CLASS_DBG: + o = "SO_TRAFFIC_CLASS_DBG"; + break; + case SO_PRIVILEGED_TRAFFIC_CLASS: + o = "SO_PRIVILEGED_TRAFFIC_CLASS"; + break; + case SO_DEFUNCTOK: + o = "SO_DEFUNCTOK"; + break; + case SO_ISDEFUNCT: + o = "SO_ISDEFUNCT"; + break; + case SO_OPPORTUNISTIC: + o = "SO_OPPORTUNISTIC"; + break; + case SO_FLUSH: + o = "SO_FLUSH"; + break; + case SO_RECV_ANYIF: + o = "SO_RECV_ANYIF"; + break; + } + break; + case IPPROTO_TCP: + l = "IPPROTO_TCP"; + switch (optname) { + case TCP_KEEPALIVE: + o = "TCP_KEEPALIVE"; + break; + case TCP_KEEPINTVL: + o = "TCP_KEEPINTVL"; + break; + case TCP_KEEPCNT: + o = "TCP_KEEPCNT"; + break; + case TCP_CONNECTIONTIMEOUT: + o = "TCP_CONNECTIONTIMEOUT"; + break; + case TCP_RXT_CONNDROPTIME: + o = "TCP_RXT_CONNDROPTIME"; + break; + case PERSIST_TIMEOUT: + o = "PERSIST_TIMEOUT"; + break; + } + break; + } + + (void) snprintf(dst, size, "<%s,%s>", l, o); + return (dst); +} diff --git a/bsd/netinet/mptcp_var.h b/bsd/netinet/mptcp_var.h new file mode 100644 index 000000000..c2ac1c018 --- /dev/null +++ b/bsd/netinet/mptcp_var.h @@ -0,0 +1,604 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _NETINET_MPTCP_VAR_H_ +#define _NETINET_MPTCP_VAR_H_ + +#ifdef PRIVATE +#include +#include +#endif + +#ifdef BSD_KERNEL_PRIVATE +#include +#include +#include +#include +#include + +/* + * MPTCP Session + * + * This is an extension to the multipath PCB specific for MPTCP, protected by + * the per-PCB mpp_lock (also the socket's lock); MPTCP thread signalling uses + * its own mpte_thread_lock due to lock ordering constraints. + */ +struct mptses { + struct mppcb *mpte_mppcb; /* back ptr to multipath PCB */ + struct mptcb *mpte_mptcb; /* ptr to MPTCP PCB */ + TAILQ_HEAD(, mptopt) mpte_sopts; /* list of socket options */ + TAILQ_HEAD(, mptsub) mpte_subflows; /* list of subflows */ + uint16_t mpte_numflows; /* # of subflows in list */ + uint16_t mpte_nummpcapflows; /* # of MP_CAP subflows */ + associd_t mpte_associd; /* MPTCP association ID */ + connid_t mpte_connid_last; /* last used connection ID */ + /* + * Threading (protected by mpte_thread_lock) + */ + decl_lck_mtx_data(, mpte_thread_lock); /* thread lock */ + struct thread *mpte_thread; /* worker thread */ + uint32_t mpte_thread_active; /* thread is running */ + uint32_t mpte_thread_reqs; /* # of requests for thread */ + struct mptsub *mpte_active_sub; /* ptr to last active subf */ + u_int8_t mpte_flags; /* per mptcp session flags */ + u_int8_t mpte_lost_aid; /* storing lost address id */ +}; + +/* + * Valid values for mpte_flags. + */ +#define MPTE_SND_REM_ADDR 0x01 /* Send Remove_addr option */ + +#define mptompte(mp) ((struct mptses *)(mp)->mpp_pcbe) + +#define MPTE_LOCK_ASSERT_HELD(_mpte) \ + lck_mtx_assert(&(_mpte)->mpte_mppcb->mpp_lock, LCK_MTX_ASSERT_OWNED) + +#define MPTE_LOCK_ASSERT_NOTHELD(_mpte) \ + lck_mtx_assert(&(_mpte)->mpte_mppcb->mpp_lock, LCK_MTX_ASSERT_NOTOWNED) + +#define MPTE_LOCK(_mpte) \ + lck_mtx_lock(&(_mpte)->mpte_mppcb->mpp_lock) + +#define MPTE_LOCK_SPIN(_mpte) \ + lck_mtx_lock_spin(&(_mpte)->mpte_mppcb->mpp_lock) + +#define MPTE_CONVERT_LOCK(_mpte) do { \ + MPTE_LOCK_ASSERT_HELD(_mpte); \ + lck_mtx_convert_spin(&(_mpte)->mpte_mppcb->mpp_lock); \ +} while (0) + +#define MPTE_UNLOCK(_mpte) \ + lck_mtx_unlock(&(_mpte)->mpte_mppcb->mpp_lock) + +/* + * MPTCP socket options + */ +struct mptopt { + TAILQ_ENTRY(mptopt) mpo_entry; /* glue to other options */ + uint32_t mpo_flags; /* see flags below */ + int mpo_level; /* sopt_level */ + int mpo_name; /* sopt_name */ + int mpo_intval; /* sopt_val */ +}; + +#define MPOF_ATTACHED 0x1 /* attached to MP socket */ +#define MPOF_SUBFLOW_OK 0x2 /* can be issued on subflow socket */ +#define MPOF_INTERIM 0x4 /* has not been issued on any subflow */ + +/* + * Structure passed down to TCP during subflow connection establishment + * containing information pertaining to the MPTCP. + */ +struct mptsub_connreq { + uint32_t mpcr_type; /* see MPTSUB_CONNREQ_* below */ + uint32_t mpcr_ifscope; /* ifscope parameter to connectx(2) */ + struct proc *mpcr_proc; /* process issuing connectx(2) */ +}; + +/* valid values for mpcr_type */ +#define MPTSUB_CONNREQ_MP_ENABLE 1 /* enable MPTCP */ +#define MPTSUB_CONNREQ_MP_ADD 2 /* join an existing MPTCP */ + +/* + * MPTCP subflow + * + * Protected by the the per-subflow mpts_lock. Note that mpts_flags + * and mpts_evctl are modified via atomic operations. + */ +struct mptsub { + decl_lck_mtx_data(, mpts_lock); /* per-subflow lock */ + TAILQ_ENTRY(mptsub) mpts_entry; /* glue to peer subflows */ + uint32_t mpts_refcnt; /* reference count */ + uint32_t mpts_flags; /* see flags below */ + uint32_t mpts_evctl; /* subflow control events */ + uint32_t mpts_family; /* address family */ + connid_t mpts_connid; /* subflow connection ID */ + int mpts_oldintval; /* sopt_val before sosetopt */ + uint32_t mpts_rank; /* subflow priority/rank */ + int32_t mpts_soerror; /* most recent subflow error */ + struct mptses *mpts_mpte; /* back ptr to MPTCP session */ + struct socket *mpts_socket; /* subflow socket */ + struct sockaddr_list *mpts_src_sl; /* source list */ + struct sockaddr_list *mpts_dst_sl; /* destination list */ + struct ifnet *mpts_outif; /* outbound interface */ + u_int64_t mpts_sndnxt; /* next byte to send in mp so */ + u_int32_t mpts_rel_seq; /* running count of subflow # */ + struct { + u_int64_t mptsl_dsn; /* Data Sequence Number */ + u_int32_t mptsl_sseq; /* Corresponding Data Seq */ + u_int32_t mptsl_len; /* length of mapping */ + } mpts_lastmap; + struct protosw *mpts_oprotosw; /* original protosw */ + struct mptsub_connreq mpts_mpcr; /* connection request */ +}; + +/* + * Valid values for mpts_flags. In particular: + * + * - MP_CAPABLE means that the connection is successfully established as + * MPTCP and data transfer may occur, but is not yet ready for multipath- + * related semantics until MP_READY. I.e. if this is on the first subflow, + * it causes the MPTCP socket to transition to a connected state, except + * that additional subflows will not be established; they will be marked + * with PENDING and will be processed when the first subflow is marked + * with MP_READY. + * + * - MP_READY implies that an MP_CAPABLE connection has been confirmed as + * an MPTCP connection. See notes above. + * + * - MP_DEGRADED implies that the connection has lost its MPTCP capabilities + * but data transfer on the MPTCP socket is unaffected. Any existing + * PENDING subflows will be disconnected, and further attempts to connect + * additional subflows will be rejected. + * + * Note that these are per-subflow flags. The setting and clearing of MP_READY + * reflects the state of the MPTCP connection with regards to its multipath + * semantics, via the MPTCPF_JOIN_READY flag. Until that flag is set (meaning + * until at least a subflow is marked with MP_READY), further connectx(2) + * attempts to join will be queued. When the flag is cleared (after it has + * been set), further connectx(2) will fail (and existing queued ones will be + * aborted) and the MPTCP connection loses all of its multipath semantics. + * + * Keep in sync with bsd/dev/dtrace/scripts/mptcp.d. + */ +#define MPTSF_ATTACHED 0x1 /* attached to MPTCP PCB */ +#define MPTSF_CONNECTING 0x2 /* connection was attempted */ +#define MPTSF_CONNECT_PENDING 0x4 /* will connect when MPTCP is ready */ +#define MPTSF_CONNECTED 0x8 /* connection is established */ +#define MPTSF_DISCONNECTING 0x10 /* disconnection was attempted */ +#define MPTSF_DISCONNECTED 0x20 /* has been disconnected */ +#define MPTSF_MP_CAPABLE 0x40 /* connected as a MPTCP subflow */ +#define MPTSF_MP_READY 0x80 /* MPTCP has been confirmed */ +#define MPTSF_MP_DEGRADED 0x100 /* has lost its MPTCP capabilities */ +#define MPTSF_SUSPENDED 0x200 /* write-side is flow controlled */ +#define MPTSF_BOUND_IF 0x400 /* subflow bound to an interface */ +#define MPTSF_BOUND_IP 0x800 /* subflow bound to a src address */ +#define MPTSF_BOUND_PORT 0x1000 /* subflow bound to a src port */ +#define MPTSF_PREFERRED 0x2000 /* primary/preferred subflow */ +#define MPTSF_SOPT_OLDVAL 0x4000 /* old option value is valid */ +#define MPTSF_SOPT_INPROG 0x8000 /* sosetopt in progress */ +#define MPTSF_DELETEOK 0x10000 /* subflow can be deleted */ +#define MPTSF_FAILINGOVER 0x20000 /* subflow not used for output */ +#define MPTSF_ACTIVE 0x40000 /* subflow currently in use */ +#define MPTSF_MPCAP_CTRSET 0x80000 /* mpcap counter */ + +#define MPTSF_BITS \ + "\020\1ATTACHED\2CONNECTING\3PENDING\4CONNECTED\5DISCONNECTING" \ + "\6DISCONNECTED\7MP_CAPABLE\10MP_READY\11MP_DEGRADED\12SUSPENDED" \ + "\13BOUND_IF\14BOUND_IP\15BOUND_PORT\16PREFERRED\17SOPT_OLDVAL" \ + "\20SOPT_INPROG\21NOLINGER\22FAILINGOVER\23ACTIVE\24MPCAP_CTRSET" + +#define MPTS_LOCK_ASSERT_HELD(_mpts) \ + lck_mtx_assert(&(_mpts)->mpts_lock, LCK_MTX_ASSERT_OWNED) + +#define MPTS_LOCK_ASSERT_NOTHELD(_mpts) \ + lck_mtx_assert(&(_mpts)->mpts_lock, LCK_MTX_ASSERT_NOTOWNED) + +#define MPTS_LOCK(_mpts) \ + lck_mtx_lock(&(_mpts)->mpts_lock) + +#define MPTS_LOCK_SPIN(_mpts) \ + lck_mtx_lock_spin(&(_mpts)->mpts_lock) + +#define MPTS_CONVERT_LOCK(_mpts) do { \ + MPTS_LOCK_ASSERT_HELD(_mpts); \ + lck_mtx_convert_spin(&(_mpts)->mpts_lock); \ +} while (0) + +#define MPTS_UNLOCK(_mpts) \ + lck_mtx_unlock(&(_mpts)->mpts_lock) + +#define MPTS_ADDREF(_mpts) \ + mptcp_subflow_addref(_mpts, 0) + +#define MPTS_ADDREF_LOCKED(_mpts) \ + mptcp_subflow_addref(_mpts, 1) + +#define MPTS_REMREF(_mpts) \ + mptcp_subflow_remref(_mpts) + +/* + * MPTCP states + * Keep in sync with bsd/dev/dtrace/mptcp.d + */ +typedef enum mptcp_state { + MPTCPS_CLOSED = 0, /* closed */ + MPTCPS_LISTEN = 1, /* not yet implemented */ + MPTCPS_ESTABLISHED = 2, /* MPTCP connection established */ + MPTCPS_CLOSE_WAIT = 3, /* rcvd DFIN, waiting for close */ + MPTCPS_FIN_WAIT_1 = 4, /* have closed, sent DFIN */ + MPTCPS_CLOSING = 5, /* closed xchd DFIN, waiting DFIN ACK */ + MPTCPS_LAST_ACK = 6, /* had DFIN and close; await DFIN ACK */ + MPTCPS_FIN_WAIT_2 = 7, /* have closed, DFIN is acked */ + MPTCPS_TIME_WAIT = 8, /* in 2*MSL quiet wait after close */ + MPTCPS_FASTCLOSE_WAIT = 9, /* sent MP_FASTCLOSE */ +} mptcp_state_t; + +typedef u_int64_t mptcp_key_t; +typedef u_int32_t mptcp_token_t; +typedef u_int8_t mptcp_addr_id; + + +/* Address ID list */ +struct mptcp_subf_auth_entry { + LIST_ENTRY(mptcp_subf_auth_entry) msae_next; + u_int32_t msae_laddr_rand; /* Local nonce */ + u_int32_t msae_raddr_rand; /* Remote nonce */ + mptcp_addr_id msae_laddr_id; /* Local addr ID */ + mptcp_addr_id msae_raddr_id; /* Remote addr ID */ +}; + +/* + * MPTCP Protocol Control Block + * + * Protected by per-MPTCP mpt_lock. + * Keep in sync with bsd/dev/dtrace/scripts/mptcp.d. + */ +struct mptcb { + decl_lck_mtx_data(, mpt_lock); /* per MPTCP PCB lock */ + struct mptses *mpt_mpte; /* back ptr to MPTCP session */ + mptcp_state_t mpt_state; /* MPTCP state */ + u_int32_t mpt_flags; /* see flags below */ + u_int32_t mpt_refcnt; /* references held on mptcb */ + u_int32_t mpt_version; /* MPTCP proto version */ + int mpt_softerror; /* error not yet reported */ + /* + * Authentication and metadata invariants + */ + mptcp_key_t *mpt_localkey; /* in network byte order */ + mptcp_key_t mpt_remotekey; /* in network byte order */ + mptcp_token_t mpt_localtoken; /* HMAC SHA1 of local key */ + mptcp_token_t mpt_remotetoken; /* HMAC SHA1 of remote key */ + + /* + * Timer vars for scenarios where subflow level acks arrive, but + * Data ACKs do not. + */ + int mpt_rxtshift; /* num of consecutive retrans */ + u_int32_t mpt_rxtstart; /* time at which rxt started */ + u_int64_t mpt_rtseq; /* seq # being tracked */ + u_int32_t mpt_timer_vals; /* timer related values */ + u_int32_t mpt_timewait; /* timewait */ + /* + * Sending side + */ + u_int64_t mpt_snduna; /* DSN of last unacked byte */ + u_int64_t mpt_sndnxt; /* DSN of next byte to send */ + u_int64_t mpt_sndmax; /* DSN of max byte sent */ + u_int64_t mpt_local_idsn; /* First byte's DSN */ + u_int32_t mpt_sndwnd; + /* + * Receiving side + */ + u_int64_t mpt_rcvnxt; /* Next expected DSN */ + u_int64_t mpt_rcvatmark; /* mpsocket marker of rcvnxt */ + u_int64_t mpt_remote_idsn; /* Peer's IDSN */ + u_int32_t mpt_rcvwnd; + LIST_HEAD(, mptcp_subf_auth_entry) mpt_subauth_list; /* address IDs */ + /* + * Fastclose + */ + u_int64_t mpt_dsn_at_csum_fail; /* MPFail Opt DSN */ + /* + * Zombie handling + */ +#define MPT_GC_TICKS (60) + int32_t mpt_gc_ticks; /* Used for zombie deletion */ +}; + +/* valid values for mpt_flags (see also notes on mpts_flags above) */ +#define MPTCPF_CHECKSUM 0x1 /* checksum DSS option */ +#define MPTCPF_FALLBACK_TO_TCP 0x2 /* Fallback to TCP */ +#define MPTCPF_JOIN_READY 0x4 /* Ready to start 2 or more subflows */ +#define MPTCPF_RECVD_MPFAIL 0x8 /* Received MP_FAIL option */ +#define MPTCPF_PEEL_OFF 0x10 /* Peel off this socket */ +#define MPTCPF_SND_64BITDSN 0x20 /* Send full 64-bit DSN */ +#define MPTCPF_SND_64BITACK 0x40 /* Send 64-bit ACK response */ +#define MPTCPF_RCVD_64BITACK 0x80 /* Received 64-bit Data ACK */ + +#define MPTCPF_BITS \ + "\020\1CHECKSUM\2FALLBACK_TO_TCP\3JOIN_READY\4RECVD_MPFAIL\5PEEL_OFF" \ + "\6SND_64BITDSN\7SND_64BITACK\10RCVD_64BITACK" + +/* valid values for mpt_timer_vals */ +#define MPTT_REXMT 0x01 /* Starting Retransmit Timer */ +#define MPTT_TW 0x02 /* Starting Timewait Timer */ +#define MPTT_FASTCLOSE 0x04 /* Starting Fastclose wait timer */ + +#define MPT_LOCK_ASSERT_HELD(_mpt) \ + lck_mtx_assert(&(_mpt)->mpt_lock, LCK_MTX_ASSERT_OWNED) + +#define MPT_LOCK_ASSERT_NOTHELD(_mpt) \ + lck_mtx_assert(&(_mpt)->mpt_lock, LCK_MTX_ASSERT_NOTOWNED) + +#define MPT_LOCK(_mpt) \ + lck_mtx_lock(&(_mpt)->mpt_lock) + +#define MPT_LOCK_SPIN(_mpt) \ + lck_mtx_lock_spin(&(_mpt)->mpt_lock) + +#define MPT_CONVERT_LOCK(_mpt) do { \ + MPT_LOCK_ASSERT_HELD(_mpt); \ + lck_mtx_convert_spin(&(_mpt)->mpt_lock); \ +} while (0) + +#define MPT_UNLOCK(_mpt) \ + lck_mtx_unlock(&(_mpt)->mpt_lock) + +/* events for close FSM */ +#define MPCE_CLOSE 0x1 +#define MPCE_RECV_DATA_ACK 0x2 +#define MPCE_RECV_DATA_FIN 0x4 + +/* mptcb manipulation */ +#define tptomptp(tp) ((struct mptcb *)((tp)->t_mptcb)) + +/* + * MPTCP control block and state structures are allocated along with + * the MP protocol control block; the folllowing represents the layout. + */ +struct mpp_mtp { + struct mppcb mpp; /* Multipath PCB */ + struct mptses mpp_ses; /* MPTCP session */ + struct mptcb mtcb; /* MPTCP PCB */ +}; + +#ifdef SYSCTL_DECL +SYSCTL_DECL(_net_inet_mptcp); +#endif /* SYSCTL_DECL */ + +extern struct mppcbinfo mtcbinfo; +extern struct pr_usrreqs mptcp_usrreqs; + +/* Encryption algorithm related definitions */ +#define MPTCP_SHA1_RESULTLEN 20 +#define SHA1_TRUNCATED 8 + +/* List of valid keys to use for MPTCP connections */ +#define MPTCP_KEY_DIGEST_LEN (MPTCP_SHA1_RESULTLEN) +#define MPTCP_MX_KEY_ALLOCS (256) +#define MPTCP_KEY_PREALLOCS_MX (16) +#define MPTCP_MX_PREALLOC_ZONE_SZ (8192) + +struct mptcp_key_entry { + LIST_ENTRY(mptcp_key_entry) mkey_next; + mptcp_key_t mkey_value; +#define MKEYF_FREE 0x0 +#define MKEYF_INUSE 0x1 + u_int32_t mkey_flags; + char mkey_digest[MPTCP_KEY_DIGEST_LEN]; +}; + +/* structure for managing unique key list */ +struct mptcp_keys_pool_head { + struct mptcp_key_entry *lh_first; /* list of keys */ + u_int32_t mkph_count; /* total keys in pool */ + vm_size_t mkph_key_elm_sz; /* size of key entry */ + struct zone *mkph_key_entry_zone; /* zone for key entry */ + decl_lck_mtx_data(, mkph_lock); /* lock for key list */ +}; + +/* MPTCP Receive Window */ +#define MPTCP_RWIN_MAX (1<<16) + +/* MPTCP Debugging Levels */ +#define MP_NODEBUG 0x0 +#define MP_ERR_DEBUG 0x1 +#define MP_VERBOSE_DEBUG_1 0x2 +#define MP_VERBOSE_DEBUG_2 0x3 +#define MP_VERBOSE_DEBUG_3 0x4 +#define MP_VERBOSE_DEBUG_4 0x5 /* output path debugging */ + +/* Mask to obtain 32-bit portion of data sequence number */ +#define MPTCP_DATASEQ_LOW32_MASK (0xffffffff) +#define MPTCP_DATASEQ_LOW32(seq) (seq & MPTCP_DATASEQ_LOW32_MASK) + +/* Mask to obtain upper 32-bit portion of data sequence number */ +#define MPTCP_DATASEQ_HIGH32_MASK (0xffffffff00000000) +#define MPTCP_DATASEQ_HIGH32(seq) (seq & MPTCP_DATASEQ_HIGH32_MASK) + +/* Mask to obtain 32-bit portion of data ack */ +#define MPTCP_DATAACK_LOW32_MASK (0xffffffff) +#define MPTCP_DATAACK_LOW32(ack) (ack & MPTCP_DATAACK_LOW32_MASK) + +/* Mask to obtain upper 32-bit portion of data ack */ +#define MPTCP_DATAACK_HIGH32_MASK (0xffffffff00000000) +#define MPTCP_DATAACK_HIGH32(ack) (ack & MPTCP_DATAACK_HIGH32_MASK) + +/* + * x is the 64-bit data sequence number, y the 32-bit data seq number to be + * extended. z is y extended to the appropriate 64-bit value. + * This algorithm is based on the fact that subflow level window sizes are + * at the maximum 2**30 (in reality, they are a lot lesser). A high throughput + * application sending on a large number of subflows can in theory have very + * large MPTCP level send and receive windows. In which case, 64 bit DSNs + * must be sent in place of 32 bit DSNs on wire. For us, with 2 subflows at + * 512K each, sequence wraparound detection can be done by checking whether + * the 32-bit value obtained on wire is 2**31 bytes apart from the stored + * lower 32-bits of the Data Sequence Number. Bogus DSNs are dropped by + * comparing against rwnd. Bogus DSNs within rwnd cannot be protected against + * and are as weak as bogus TCP sequence numbers. + */ +#define MPTCP_EXTEND_DSN(x, y, z) { \ + if ((MPTCP_DATASEQ_LOW32(x) > y) && \ + ((((u_int32_t)MPTCP_DATASEQ_LOW32(x)) - (u_int32_t)y) >= \ + (u_int32_t)(1 << 31))) { \ + /* \ + * y wrapped around and x and y are 2**31 bytes apart \ + */ \ + z = MPTCP_DATASEQ_HIGH32(x) + 0x100000000; \ + z |= y; \ + } else if ((MPTCP_DATASEQ_LOW32(x) < y) && \ + (((u_int32_t)y - \ + ((u_int32_t)MPTCP_DATASEQ_LOW32(x))) >= \ + (u_int32_t)(1 << 31))) { \ + /* \ + * x wrapped around and x and y are 2**31 apart \ + */ \ + z = MPTCP_DATASEQ_HIGH32(x) - 0x100000000; \ + z |= y; \ + } else { \ + z = MPTCP_DATASEQ_HIGH32(x) | y; \ + } \ +} + +#define mptcplog(x) do { if (mptcp_verbose >= 1) log x; } while (0) +#define mptcplog2(x) do { if (mptcp_verbose >= 2) log x; } while (0) +#define mptcplog3(x) do { if (mptcp_verbose >= 3) log x; } while (0) + +extern int mptcp_enable; /* Multipath TCP */ +extern int mptcp_dbg; /* Multipath TCP DBG */ +extern int mptcp_mpcap_retries; /* Multipath TCP retries */ +extern int mptcp_join_retries; /* Multipath TCP Join retries */ +extern int mptcp_dss_csum; /* Multipath DSS Option checksum */ +extern int mptcp_fail_thresh; /* Multipath failover thresh of retransmits */ +extern int mptcp_subflow_keeptime; /* Multipath subflow TCP_KEEPALIVE opt */ +extern int mptcp_mpprio_enable; /* MP_PRIO option enable/disable */ +extern int mptcp_remaddr_enable;/* REMOVE_ADDR option enable/disable */ +extern uint32_t mptcp_verbose; /* verbose and mptcp_dbg must be unified */ +#define MPPCB_LIMIT 16 +extern uint32_t mptcp_socket_limit; /* max number of mptcp sockets allowed */ +extern int tcp_jack_rxmt; /* Join ACK retransmission value in msecs */ + +__BEGIN_DECLS +extern void mptcp_init(struct protosw *, struct domain *); +extern int mptcp_ctloutput(struct socket *, struct sockopt *); +extern struct mptses *mptcp_sescreate(struct socket *, struct mppcb *); +extern void mptcp_drain(void); +extern struct mptses *mptcp_drop(struct mptses *, struct mptcb *, int); +extern struct mptses *mptcp_close(struct mptses *, struct mptcb *); +extern int mptcp_lock(struct socket *, int, void *); +extern int mptcp_unlock(struct socket *, int, void *); +extern lck_mtx_t *mptcp_getlock(struct socket *, int); +extern void mptcp_thread_signal(struct mptses *); +extern void mptcp_flush_sopts(struct mptses *); +extern int mptcp_setconnorder(struct mptses *, connid_t, uint32_t); +extern int mptcp_getconnorder(struct mptses *, connid_t, uint32_t *); + +extern struct mptopt *mptcp_sopt_alloc(int); +extern const char *mptcp_sopt2str(int, int, char *, int); +extern void mptcp_sopt_free(struct mptopt *); +extern void mptcp_sopt_insert(struct mptses *, struct mptopt *); +extern void mptcp_sopt_remove(struct mptses *, struct mptopt *); +extern struct mptopt *mptcp_sopt_find(struct mptses *, struct sockopt *); + +extern struct mptsub *mptcp_subflow_alloc(int); +extern void mptcp_subflow_free(struct mptsub *); +extern void mptcp_subflow_addref(struct mptsub *, int); +extern int mptcp_subflow_add(struct mptses *, struct mptsub *, + struct proc *, uint32_t); +extern void mptcp_subflow_del(struct mptses *, struct mptsub *, boolean_t); +extern void mptcp_subflow_remref(struct mptsub *); +extern int mptcp_subflow_output(struct mptses *, struct mptsub *); +extern void mptcp_subflow_disconnect(struct mptses *, struct mptsub *, + boolean_t); +extern void mptcp_subflow_sopeeloff(struct mptses *, struct mptsub *, + struct socket *); +extern int mptcp_subflow_sosetopt(struct mptses *, struct socket *, + struct mptopt *); +extern int mptcp_subflow_sogetopt(struct mptses *, struct socket *, + struct mptopt *); + +extern void mptcp_input(struct mptses *, struct mbuf *); +extern int mptcp_output(struct mptses *); +extern void mptcp_close_fsm(struct mptcb *, uint32_t); + +extern mptcp_token_t mptcp_get_localtoken(void *); +extern mptcp_token_t mptcp_get_remotetoken(void *); + +extern u_int64_t mptcp_get_localkey(void *); +extern u_int64_t mptcp_get_remotekey(void *); + +extern void mptcp_free_key(mptcp_key_t *key); +extern void mptcp_hmac_sha1(mptcp_key_t, mptcp_key_t, u_int32_t, u_int32_t, + u_char*, int); +extern void mptcp_get_hmac(mptcp_addr_id, struct mptcb *, u_char *, int); +extern void mptcp_get_rands(mptcp_addr_id, struct mptcb *, u_int32_t *, + u_int32_t *); +extern void mptcp_set_raddr_rand(mptcp_addr_id, struct mptcb *, mptcp_addr_id, + u_int32_t); +extern u_int64_t mptcp_get_trunced_hmac(mptcp_addr_id, struct mptcb *mp_tp); +extern int mptcp_generate_token(char *, int, caddr_t, int); +extern int mptcp_generate_idsn(char *, int, caddr_t, int); +extern boolean_t mptcp_ok_to_keepalive(struct mptcb *); +extern void mptcp_insert_dsn(struct mppcb *, struct mbuf *); +extern void mptcp_output_getm_dsnmap32(struct socket *, int, uint32_t, + u_int32_t *, u_int32_t *, u_int16_t *, u_int64_t *); +extern void mptcp_output_getm_dsnmap64(struct socket *, int, uint32_t, + u_int64_t *, u_int32_t *, u_int16_t *); +extern void mptcp_send_dfin(struct socket *); +extern void mptcp_act_on_txfail(struct socket *); +extern struct mptsub *mptcp_get_subflow(struct mptses *, struct mptsub *); +extern int mptcp_get_map_for_dsn(struct socket *, u_int64_t, u_int32_t *); +extern int32_t mptcp_adj_sendlen(struct socket *so, int32_t off, int32_t len); +extern int32_t mptcp_sbspace(struct mptcb *); +extern void mptcp_notify_mpready(struct socket *); +extern void mptcp_notify_mpfail(struct socket *); +extern void mptcp_notify_close(struct socket *); +__END_DECLS + +#endif /* BSD_KERNEL_PRIVATE */ +#ifdef PRIVATE +typedef struct mptcp_flow { + uint32_t flow_flags; + connid_t flow_cid; + struct sockaddr_storage flow_src; + struct sockaddr_storage flow_dst; + conninfo_tcp_t flow_ci; +} mptcp_flow_t; + +typedef struct conninfo_mptcp { + size_t mptcpci_len; + size_t mptcpci_nflows; + uint32_t mptcpci_state; + mptcp_flow_t mptcpci_flows[1]; +} conninfo_mptcp_t; + +#endif /* PRIVATE */ +#endif /* _NETINET_MPTCP_VAR_H_ */ diff --git a/bsd/netinet/raw_ip.c b/bsd/netinet/raw_ip.c index f06517a26..89d1a5f3a 100644 --- a/bsd/netinet/raw_ip.c +++ b/bsd/netinet/raw_ip.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -150,40 +150,49 @@ ip_dn_ctl_t *ip_dn_ctl_ptr; * Initialize raw connection block q. */ void -rip_init() +rip_init(struct protosw *pp, struct domain *dp) { - struct inpcbinfo *pcbinfo; +#pragma unused(dp) + static int rip_initialized = 0; + struct inpcbinfo *pcbinfo; + + VERIFY((pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED); + + if (rip_initialized) + return; + rip_initialized = 1; LIST_INIT(&ripcb); - ripcbinfo.listhead = &ripcb; + ripcbinfo.ipi_listhead = &ripcb; /* * XXX We don't use the hash list for raw IP, but it's easier * to allocate a one entry hash list than it is to check all - * over the place for hashbase == NULL. + * over the place for ipi_hashbase == NULL. */ - ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask); - ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask); + ripcbinfo.ipi_hashbase = hashinit(1, M_PCB, &ripcbinfo.ipi_hashmask); + ripcbinfo.ipi_porthashbase = hashinit(1, M_PCB, &ripcbinfo.ipi_porthashmask); - ripcbinfo.ipi_zone = (void *) zinit(sizeof(struct inpcb), - (4096 * sizeof(struct inpcb)), - 4096, "ripzone"); + ripcbinfo.ipi_zone = zinit(sizeof(struct inpcb), + (4096 * sizeof(struct inpcb)), 4096, "ripzone"); pcbinfo = &ripcbinfo; /* * allocate lock group attribute and group for udp pcb mutexes */ - pcbinfo->mtx_grp_attr = lck_grp_attr_alloc_init(); + pcbinfo->ipi_lock_grp_attr = lck_grp_attr_alloc_init(); + pcbinfo->ipi_lock_grp = lck_grp_alloc_init("ripcb", pcbinfo->ipi_lock_grp_attr); - pcbinfo->mtx_grp = lck_grp_alloc_init("ripcb", pcbinfo->mtx_grp_attr); - /* * allocate the lock attribute for udp pcb mutexes */ - pcbinfo->mtx_attr = lck_attr_alloc_init(); - - if ((pcbinfo->mtx = lck_rw_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr)) == NULL) - return; /* pretty much dead if this fails... */ + pcbinfo->ipi_lock_attr = lck_attr_alloc_init(); + if ((pcbinfo->ipi_lock = lck_rw_alloc_init(pcbinfo->ipi_lock_grp, + pcbinfo->ipi_lock_attr)) == NULL) { + panic("%s: unable to allocate PCB lock\n", __func__); + /* NOTREACHED */ + } + in_pcbinfo_attach(&ripcbinfo); } static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET , 0, {0}, {0,0,0,0,0,0,0,0,} }; @@ -197,17 +206,18 @@ rip_input(m, iphlen) struct mbuf *m; int iphlen; { - register struct ip *ip = mtod(m, struct ip *); - register struct inpcb *inp; + struct ip *ip = mtod(m, struct ip *); + struct inpcb *inp; struct inpcb *last = 0; struct mbuf *opts = 0; int skipit = 0, ret = 0; + struct ifnet *ifp = m->m_pkthdr.rcvif; /* Expect 32-bit aligned data pointer on strict-align platforms */ MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); ripsrc.sin_addr = ip->ip_src; - lck_rw_lock_shared(ripcbinfo.mtx); + lck_rw_lock_shared(ripcbinfo.ipi_lock); LIST_FOREACH(inp, &ripcb, inp_list) { #if INET6 if ((inp->inp_vflag & INP_IPV4) == 0) @@ -221,6 +231,14 @@ rip_input(m, iphlen) if (inp->inp_faddr.s_addr && inp->inp_faddr.s_addr != ip->ip_src.s_addr) continue; + + if (inp_restricted(inp, ifp)) + continue; + + if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && + (inp->inp_flags & INP_NO_IFT_CELLULAR)) + continue; + if (last) { struct mbuf *n = m_copy(m, 0, (int)M_COPYALL); @@ -336,7 +354,7 @@ unlock: * Keep the list locked because socket filter may force the socket lock * to be released when calling sbappendaddr() -- see rdar://7627704 */ - lck_rw_done(ripcbinfo.mtx); + lck_rw_done(ripcbinfo.ipi_lock); } /* @@ -350,10 +368,11 @@ rip_output( u_int32_t dst, struct mbuf *control) { - register struct ip *ip; - register struct inpcb *inp = sotoinpcb(so); + struct ip *ip; + struct inpcb *inp = sotoinpcb(so); int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST; - struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF }; + struct ip_out_args ipoa = + { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF, 0 }; struct ip_moptions *imo; int error = 0; mbuf_svc_class_t msc = MBUF_SC_UNSPEC; @@ -362,6 +381,14 @@ rip_output( msc = mbuf_service_class_from_control(control); m_freem(control); + control = NULL; + } + + if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { + if (m != NULL) + m_freem(m); + VERIFY(control == NULL); + return (inp == NULL ? EINVAL : EPROTOTYPE); } flags |= IP_OUTARGS; @@ -412,11 +439,7 @@ rip_output( return EINVAL; } if (ip->ip_id == 0) -#if RANDOM_IP_ID ip->ip_id = ip_randomid(); -#else - ip->ip_id = htons(ip_id++); -#endif /* XXX prevent ip_output from overwriting header fields */ flags |= IP_RAWOUTPUT; OSAddAtomic(1, &ipstat.ips_rawout); @@ -432,15 +455,15 @@ rip_output( } #endif /*IPSEC*/ - if (inp->inp_route.ro_rt != NULL && - inp->inp_route.ro_rt->generation_id != route_generation) { - rtfree(inp->inp_route.ro_rt); - inp->inp_route.ro_rt = NULL; - } + if (ROUTE_UNUSABLE(&inp->inp_route)) + ROUTE_RELEASE(&inp->inp_route); set_packet_service_class(m, so, msc, 0); - m->m_pkthdr.m_flowhash = inp->inp_flowhash; - m->m_pkthdr.m_fhflags |= PF_TAG_FLOWHASH; + m->m_pkthdr.pkt_flowsrc = FLOWSRC_INPCB; + m->m_pkthdr.pkt_flowid = inp->inp_flowhash; + m->m_pkthdr.pkt_flags |= (PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC | + PKTF_FLOW_RAWSOCK); + m->m_pkthdr.pkt_proto = inp->inp_ip_p; #if CONFIG_MACF_NET mac_mbuf_label_associate_inpcb(inp, m); @@ -473,10 +496,9 @@ rip_output( * Always discard the cached route for unconnected * socket or if it is a multicast route. */ - if (rt == NULL) { - rtfree(inp->inp_route.ro_rt); - inp->inp_route.ro_rt = NULL; - } + if (rt == NULL) + ROUTE_RELEASE(&inp->inp_route); + /* * If this is a connected socket and the destination * route is unicast, update outif with that of the @@ -484,8 +506,18 @@ rip_output( */ if (rt != NULL && (outif = rt->rt_ifp) != inp->inp_last_outifp) inp->inp_last_outifp = outif; + } else { + ROUTE_RELEASE(&inp->inp_route); } + /* + * If output interface was cellular, and this socket is denied + * access to it, generate an event. + */ + if (error != 0 && (ipoa.ipoa_retflags & IPOARF_IFDENIED) && + (inp->inp_flags & INP_NO_IFT_CELLULAR)) + soevent(so, (SO_FILT_HINT_LOCKED|SO_FILT_HINT_IFDENIED)); + return (error); } @@ -786,6 +818,8 @@ SYSCTL_INT(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED, &rip_sendspace, 0, "Maximum outgoing raw IP datagram size"); SYSCTL_INT(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED, &rip_recvspace, 0, "Maximum incoming raw IP datagram size"); +SYSCTL_UINT(_net_inet_raw, OID_AUTO, pcbcount, CTLFLAG_RD | CTLFLAG_LOCKED, + &ripcbinfo.ipi_count, 0, "Number of active PCBs"); static int rip_attach(struct socket *so, int proto, struct proc *p) @@ -847,31 +881,47 @@ rip_disconnect(struct socket *so) } __private_extern__ int -rip_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p) +rip_bind(struct socket *so, struct sockaddr *nam, struct proc *p) { +#pragma unused(p) struct inpcb *inp = sotoinpcb(so); - struct sockaddr_in *addr = (struct sockaddr_in *)(void *)nam; + struct sockaddr_in sin; struct ifaddr *ifa = NULL; struct ifnet *outif = NULL; - if (nam->sa_len != sizeof(*addr)) - return EINVAL; + if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + return (inp == NULL ? EINVAL : EPROTOTYPE); - if (TAILQ_EMPTY(&ifnet_head) || ((addr->sin_family != AF_INET) && - (addr->sin_family != AF_IMPLINK)) || - (addr->sin_addr.s_addr && - (ifa = ifa_ifwithaddr((struct sockaddr *)addr)) == 0)) { - return EADDRNOTAVAIL; - } - else if (ifa) { + if (nam->sa_len != sizeof (struct sockaddr_in)) + return (EINVAL); + + /* Sanitized local copy for interface address searches */ + bzero(&sin, sizeof (sin)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof (struct sockaddr_in); + sin.sin_addr.s_addr = SIN(nam)->sin_addr.s_addr; + + if (TAILQ_EMPTY(&ifnet_head) || + (sin.sin_family != AF_INET && sin.sin_family != AF_IMPLINK) || + (sin.sin_addr.s_addr && (ifa = ifa_ifwithaddr(SA(&sin))) == 0)) { + return (EADDRNOTAVAIL); + } else if (ifa) { + /* + * Opportunistically determine the outbound + * interface that may be used; this may not + * hold true if we end up using a route + * going over a different interface, e.g. + * when sending to a local address. This + * will get updated again after sending. + */ IFA_LOCK(ifa); outif = ifa->ifa_ifp; IFA_UNLOCK(ifa); IFA_REMREF(ifa); } - inp->inp_laddr = addr->sin_addr; + inp->inp_laddr = sin.sin_addr; inp->inp_last_outifp = outif; - return 0; + return (0); } __private_extern__ int @@ -880,6 +930,8 @@ rip_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p) struct inpcb *inp = sotoinpcb(so); struct sockaddr_in *addr = (struct sockaddr_in *)(void *)nam; + if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + return (inp == NULL ? EINVAL : EPROTOTYPE); if (nam->sa_len != sizeof(*addr)) return EINVAL; if (TAILQ_EMPTY(&ifnet_head)) @@ -901,26 +953,43 @@ rip_shutdown(struct socket *so) } __private_extern__ int -rip_send(struct socket *so, __unused int flags, struct mbuf *m, struct sockaddr *nam, - struct mbuf *control, __unused struct proc *p) +rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, + struct mbuf *control, struct proc *p) { +#pragma unused(flags, p) struct inpcb *inp = sotoinpcb(so); - register u_int32_t dst; + u_int32_t dst; + int error = 0; + + if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { + error = (inp == NULL ? EINVAL : EPROTOTYPE); + goto bad; + } if (so->so_state & SS_ISCONNECTED) { - if (nam) { - m_freem(m); - return EISCONN; + if (nam != NULL) { + error = EISCONN; + goto bad; } dst = inp->inp_faddr.s_addr; } else { if (nam == NULL) { - m_freem(m); - return ENOTCONN; + error = ENOTCONN; + goto bad; } dst = ((struct sockaddr_in *)(void *)nam)->sin_addr.s_addr; } - return rip_output(m, so, dst, control); + return (rip_output(m, so, dst, control)); + +bad: + VERIFY(error != 0); + + if (m != NULL) + m_freem(m); + if (control != NULL) + m_freem(control); + + return (error); } /* note: rip_unlock is called from different protos instead of the generic socket_unlock, @@ -947,17 +1016,17 @@ rip_unlock(struct socket *so, int refcount, void *debug) if (so->so_usecount == 0 && (inp->inp_wantcnt == WNT_STOPUSING)) { /* cleanup after last reference */ lck_mtx_unlock(so->so_proto->pr_domain->dom_mtx); - lck_rw_lock_exclusive(ripcbinfo.mtx); + lck_rw_lock_exclusive(ripcbinfo.ipi_lock); if (inp->inp_state != INPCB_STATE_DEAD) { #if INET6 - if (INP_CHECK_SOCKAF(so, AF_INET6)) + if (SOCK_CHECK_DOM(so, PF_INET6)) in6_pcbdetach(inp); else #endif /* INET6 */ in_pcbdetach(inp); } in_pcbdispose(inp); - lck_rw_done(ripcbinfo.mtx); + lck_rw_done(ripcbinfo.ipi_lock); return(0); } } @@ -980,17 +1049,17 @@ rip_pcblist SYSCTL_HANDLER_ARGS * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ - lck_rw_lock_exclusive(ripcbinfo.mtx); + lck_rw_lock_exclusive(ripcbinfo.ipi_lock); if (req->oldptr == USER_ADDR_NULL) { n = ripcbinfo.ipi_count; req->oldidx = 2 * (sizeof xig) + (n + n/8) * sizeof(struct xinpcb); - lck_rw_done(ripcbinfo.mtx); + lck_rw_done(ripcbinfo.ipi_lock); return 0; } if (req->newptr != USER_ADDR_NULL) { - lck_rw_done(ripcbinfo.mtx); + lck_rw_done(ripcbinfo.ipi_lock); return EPERM; } @@ -1007,24 +1076,24 @@ rip_pcblist SYSCTL_HANDLER_ARGS xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) { - lck_rw_done(ripcbinfo.mtx); + lck_rw_done(ripcbinfo.ipi_lock); return error; } /* * We are done if there is no pcb */ if (n == 0) { - lck_rw_done(ripcbinfo.mtx); + lck_rw_done(ripcbinfo.ipi_lock); return 0; } inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK); if (inp_list == 0) { - lck_rw_done(ripcbinfo.mtx); + lck_rw_done(ripcbinfo.ipi_lock); return ENOMEM; } - for (inp = ripcbinfo.listhead->lh_first, i = 0; inp && i < n; + for (inp = ripcbinfo.ipi_listhead->lh_first, i = 0; inp && i < n; inp = inp->inp_list.le_next) { if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) inp_list[i++] = inp; @@ -1062,14 +1131,13 @@ rip_pcblist SYSCTL_HANDLER_ARGS error = SYSCTL_OUT(req, &xig, sizeof xig); } FREE(inp_list, M_TEMP); - lck_rw_done(ripcbinfo.mtx); + lck_rw_done(ripcbinfo.ipi_lock); return error; } SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, rip_pcblist, "S,xinpcb", "List of active raw IP sockets"); -#if !CONFIG_EMBEDDED static int rip_pcblist64 SYSCTL_HANDLER_ARGS @@ -1084,17 +1152,17 @@ rip_pcblist64 SYSCTL_HANDLER_ARGS * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ - lck_rw_lock_exclusive(ripcbinfo.mtx); + lck_rw_lock_exclusive(ripcbinfo.ipi_lock); if (req->oldptr == USER_ADDR_NULL) { n = ripcbinfo.ipi_count; req->oldidx = 2 * (sizeof xig) + (n + n/8) * sizeof(struct xinpcb64); - lck_rw_done(ripcbinfo.mtx); + lck_rw_done(ripcbinfo.ipi_lock); return 0; } if (req->newptr != USER_ADDR_NULL) { - lck_rw_done(ripcbinfo.mtx); + lck_rw_done(ripcbinfo.ipi_lock); return EPERM; } @@ -1111,24 +1179,24 @@ rip_pcblist64 SYSCTL_HANDLER_ARGS xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) { - lck_rw_done(ripcbinfo.mtx); + lck_rw_done(ripcbinfo.ipi_lock); return error; } /* * We are done if there is no pcb */ if (n == 0) { - lck_rw_done(ripcbinfo.mtx); + lck_rw_done(ripcbinfo.ipi_lock); return 0; } inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK); if (inp_list == 0) { - lck_rw_done(ripcbinfo.mtx); + lck_rw_done(ripcbinfo.ipi_lock); return ENOMEM; } - for (inp = ripcbinfo.listhead->lh_first, i = 0; inp && i < n; + for (inp = ripcbinfo.ipi_listhead->lh_first, i = 0; inp && i < n; inp = inp->inp_list.le_next) { if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) inp_list[i++] = inp; @@ -1165,14 +1233,13 @@ rip_pcblist64 SYSCTL_HANDLER_ARGS error = SYSCTL_OUT(req, &xig, sizeof xig); } FREE(inp_list, M_TEMP); - lck_rw_done(ripcbinfo.mtx); + lck_rw_done(ripcbinfo.ipi_lock); return error; } SYSCTL_PROC(_net_inet_raw, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, rip_pcblist64, "S,xinpcb64", "List of active raw IP sockets"); -#endif /* !CONFIG_EMBEDDED */ static int @@ -1180,9 +1247,9 @@ rip_pcblist_n SYSCTL_HANDLER_ARGS { #pragma unused(oidp, arg1, arg2) int error = 0; - + error = get_pcblist_n(IPPROTO_IP, req, &ripcbinfo); - + return error; } @@ -1190,10 +1257,18 @@ SYSCTL_PROC(_net_inet_raw, OID_AUTO, pcblist_n, CTLFLAG_RD | CTLFLAG_LOCKED, 0, rip_pcblist_n, "S,xinpcb_n", "List of active raw IP sockets"); struct pr_usrreqs rip_usrreqs = { - rip_abort, pru_accept_notsupp, rip_attach, rip_bind, rip_connect, - pru_connect2_notsupp, in_control, rip_detach, rip_disconnect, - pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp, - pru_rcvoob_notsupp, rip_send, pru_sense_null, rip_shutdown, - in_setsockaddr, sosend, soreceive, pru_sopoll_notsupp + .pru_abort = rip_abort, + .pru_attach = rip_attach, + .pru_bind = rip_bind, + .pru_connect = rip_connect, + .pru_control = in_control, + .pru_detach = rip_detach, + .pru_disconnect = rip_disconnect, + .pru_peeraddr = in_getpeeraddr, + .pru_send = rip_send, + .pru_shutdown = rip_shutdown, + .pru_sockaddr = in_getsockaddr, + .pru_sosend = sosend, + .pru_soreceive = soreceive, }; /* DSEP Review Done pl-20051213-v02 @3253 */ diff --git a/bsd/netinet/tcp.h b/bsd/netinet/tcp.h index 99264dcbe..da955548f 100644 --- a/bsd/netinet/tcp.h +++ b/bsd/netinet/tcp.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2011 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -134,6 +134,9 @@ struct tcphdr { (TCPOPT_NOP<<24|TCPOPT_NOP<<16|(ccopt)<<8|TCPOLEN_CC) #define TCPOPT_SIGNATURE 19 /* Keyed MD5: RFC 2385 */ #define TCPOLEN_SIGNATURE 18 +#if MPTCP +#define TCPOPT_MULTIPATH 30 +#endif /* Option definitions */ #define TCPOPT_SACK_PERMIT_HDR \ @@ -163,15 +166,6 @@ struct tcphdr { */ #define TCP_MINMSS 216 -/* - * TCP_MINMSSOVERLOAD is defined to be 1000 which should cover any type - * of interactive TCP session. - * See tcp_subr.c tcp_minmssoverload SYSCTL declaration and tcp_input.c - * for more comments. - * Setting this to "0" disables the minmssoverload check. - */ -#define TCP_MINMSSOVERLOAD 1000 - /* * Default maximum segment size for TCP6. * With an IP6 MSS of 1280, this is 1220, @@ -207,17 +201,28 @@ struct tcphdr { #define TCP_RXT_CONNDROPTIME 0x80 /* time after which tcp retransmissions will be * stopped and the connection will be dropped */ -#define TCP_RXT_FINDROP 0x100 /* when this option is set, drop a connection +#define TCP_RXT_FINDROP 0x100 /* when this option is set, drop a connection * after retransmitting the FIN 3 times. It will * prevent holding too many mbufs in socket * buffer queues. */ +#define TCP_KEEPINTVL 0x101 /* interval between keepalives */ +#define TCP_KEEPCNT 0x102 /* number of keepalives before close */ +#define TCP_SENDMOREACKS 0x103 /* always ack every other packet */ #ifdef PRIVATE #define TCP_INFO 0x200 /* retrieve tcp_info structure */ #define TCP_NOTSENT_LOWAT 0x201 /* Low water mark for TCP unsent data */ #define TCP_MEASURE_SND_BW 0x202 /* Measure sender's bandwidth for this connection */ #define TCP_MEASURE_BW_BURST 0x203 /* Burst size to use for bandwidth measurement */ #define TCP_PEER_PID 0x204 /* Lookup pid of the process we're connected to */ +#define TCP_ADAPTIVE_READ_TIMEOUT 0x205 /* Read timeout used as a multiple of RTT */ +/* + * Enable message delivery on a socket, this feature is currently unsupported and + * is subjected to change in future. + */ +#define TCP_ENABLE_MSGS 0x206 +#define TCP_ADAPTIVE_WRITE_TIMEOUT 0x207 /* Write timeout used as a multiple of RTT */ + /* * The TCP_INFO socket option is a private API and is subject to change */ @@ -230,6 +235,10 @@ struct tcphdr { #define TCPI_FLAG_LOSSRECOVERY 0x01 /* Currently in loss recovery */ +/* + * Add new fields to this structure at the end only. This will preserve + * binary compatibility. + */ struct tcp_info { u_int8_t tcpi_state; /* TCP FSM state. */ u_int8_t tcpi_options; /* Options enabled on conn. */ @@ -245,6 +254,7 @@ struct tcp_info { u_int32_t tcpi_rttcur; /* Most recent value of RTT */ u_int32_t tcpi_srtt; /* Smoothed RTT */ u_int32_t tcpi_rttvar; /* RTT variance */ + u_int32_t tcpi_rttbest; /* Best RTT we've seen */ u_int32_t tcpi_snd_ssthresh; /* Slow start threshold. */ u_int32_t tcpi_snd_cwnd; /* Send congestion window. */ @@ -258,17 +268,32 @@ struct tcp_info { int32_t tcpi_last_outif; /* if_index of interface used to send last */ u_int32_t tcpi_snd_sbbytes; /* bytes in snd buffer including data inflight */ + u_int64_t tcpi_txpackets __attribute__((aligned(8))); /* total packets sent */ u_int64_t tcpi_txbytes __attribute__((aligned(8))); /* total bytes sent */ u_int64_t tcpi_txretransmitbytes __attribute__((aligned(8))); /* total bytes retransmitted */ u_int64_t tcpi_txunacked __attribute__((aligned(8))); /* current number of bytes not acknowledged */ + u_int64_t tcpi_rxpackets __attribute__((aligned(8))); /* total packets received */ u_int64_t tcpi_rxbytes __attribute__((aligned(8))); /* total bytes received */ u_int64_t tcpi_rxduplicatebytes __attribute__((aligned(8))); /* total duplicate bytes received */ - u_int64_t tcpi_snd_bw __attribute__((aligned(8))); /* measured send bandwidth in bits/sec */ + u_int64_t tcpi_rxoutoforderbytes __attribute__((aligned(8))); + /* total out of order bytes received */ + u_int64_t tcpi_snd_bw __attribute__((aligned(8))); /* measured send bandwidth in bits/sec */ + u_int8_t tcpi_synrexmits; /* Number of syn retransmits before connect */ + u_int8_t tcpi_unused1; + u_int16_t tcpi_unused2; + u_int64_t tcpi_cell_rxpackets __attribute((aligned(8))); /* packets received over cellular */ + u_int64_t tcpi_cell_rxbytes __attribute((aligned(8))); /* bytes received over cellular */ + u_int64_t tcpi_cell_txpackets __attribute((aligned(8))); /* packets transmitted over cellular */ + u_int64_t tcpi_cell_txbytes __attribute((aligned(8))); /* bytes transmitted over cellular */ + u_int64_t tcpi_wifi_rxpackets __attribute((aligned(8))); /* packets received over Wi-Fi */ + u_int64_t tcpi_wifi_rxbytes __attribute((aligned(8))); /* bytes received over Wi-Fi */ + u_int64_t tcpi_wifi_txpackets __attribute((aligned(8))); /* packets transmitted over Wi-Fi */ + u_int64_t tcpi_wifi_txbytes __attribute((aligned(8))); /* bytes transmitted over Wi-Fi */ }; struct tcp_measure_bw_burst { @@ -301,6 +326,17 @@ struct info_tuple { #define itpl_remote_sin itpl_remoteaddr._itpl_sin #define itpl_remote_sin6 itpl_remoteaddr._itpl_sin6 +/* + * TCP connection info auxiliary data (CIAUX_TCP) + * + * Do not add new fields to this structure, just add them to tcp_info + * structure towards the end. This will preserve binary compatibility. + */ +typedef struct conninfo_tcp { + pid_t tcpci_peer_pid; /* loopback peer PID if > 0 */ + struct tcp_info tcpci_tcp_info; /* TCP info */ +} conninfo_tcp_t; + #pragma pack() #endif /* PRIVATE */ diff --git a/bsd/netinet/tcp_input.c b/bsd/netinet/tcp_input.c index 7d29a31c2..005d449c2 100644 --- a/bsd/netinet/tcp_input.c +++ b/bsd/netinet/tcp_input.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -79,7 +79,7 @@ #include #include #include - +#include #include /* before tcp_seq.h, for tcp_random18() */ #include @@ -88,6 +88,7 @@ #include #include #include +#include #include #include @@ -111,6 +112,7 @@ #include #include #include +#include #include #if INET6 #include @@ -136,13 +138,17 @@ struct tcphdr tcp_savetcp; #include #include +#if MPTCP +#include +#include +#include +#endif /* MPTCP */ #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETTCP, 0) #define DBG_LAYER_END NETDBG_CODE(DBG_NETTCP, 2) #define DBG_FNC_TCP_INPUT NETDBG_CODE(DBG_NETTCP, (3 << 8)) #define DBG_FNC_TCP_NEWCONN NETDBG_CODE(DBG_NETTCP, (7 << 8)) -static int tcprexmtthresh = 2; tcp_cc tcp_ccgen; #if IPSEC @@ -241,7 +247,7 @@ u_int32_t tcp_autorcvbuf_max = 512 * 1024; SYSCTL_INT(_net_inet_tcp, OID_AUTO, autorcvbufmax, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_autorcvbuf_max, 0, "Maximum receive socket buffer size"); -int sw_lro = 1; +int sw_lro = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, lro, CTLFLAG_RW | CTLFLAG_LOCKED, &sw_lro, 0, "Used to coalesce TCP packets"); @@ -249,17 +255,30 @@ int lrodebug = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, lrodbg, CTLFLAG_RW | CTLFLAG_LOCKED, &lrodebug, 0, "Used to debug SW LRO"); -int lro_start = 3; +int lro_start = 4; SYSCTL_INT(_net_inet_tcp, OID_AUTO, lro_startcnt, CTLFLAG_RW | CTLFLAG_LOCKED, &lro_start, 0, "Segments for starting LRO computed as power of 2"); extern int tcp_do_autosendbuf; +int limited_txmt = 1; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, limited_transmit, CTLFLAG_RW | CTLFLAG_LOCKED, + &limited_txmt, 0, "Enable limited transmit"); + +int early_rexmt = 1; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, early_rexmt, CTLFLAG_RW | CTLFLAG_LOCKED, + &early_rexmt, 0, "Enable Early Retransmit"); + +int sack_ackadv = 1; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_ackadv, CTLFLAG_RW | CTLFLAG_LOCKED, + &sack_ackadv, 0, "Use SACK with cumulative ack advancement as a dupack"); + #if CONFIG_IFEF_NOWINDOWSCALE int tcp_obey_ifef_nowindowscale = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, obey_ifef_nowindowscale, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_obey_ifef_nowindowscale, 0, ""); #endif + /* This limit will determine when the receive socket buffer tuning will * kick in. Currently it will start when the bw*delay measured in * last RTT is more than half of the current hiwat on the buffer. @@ -278,6 +297,7 @@ extern int tcp_acc_iaj_high; extern int tcp_acc_iaj_react_limit; extern struct zone *tcp_reass_zone; +int tcprexmtthresh = 3; u_int32_t tcp_now; struct timeval tcp_uptime; /* uptime when tcp_now was last updated */ @@ -291,15 +311,17 @@ static void tcp_dooptions(struct tcpcb *, u_char *, int, struct tcphdr *, struct tcpopt *, unsigned int); static void tcp_pulloutofband(struct socket *, struct tcphdr *, struct mbuf *, int); -static int tcp_reass(struct tcpcb *, struct tcphdr *, int *, - struct mbuf *); -static void tcp_xmit_timer(struct tcpcb *, int); +static int tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *, + struct ifnet *); +static void tcp_xmit_timer(struct tcpcb *, int, u_int32_t, tcp_seq); static inline unsigned int tcp_maxmtu(struct rtentry *); static inline int tcp_stretch_ack_enable(struct tcpcb *tp); +static inline void tcp_adaptive_rwtimo_check(struct tcpcb *, int); #if TRAFFIC_MGT static inline void update_iaj_state(struct tcpcb *tp, uint32_t tlen, int reset_size); -void compute_iaj(struct tcpcb *tp); +void compute_iaj(struct tcpcb *tp, int nlropkts, int lro_delay_factor); +static void compute_iaj_meat(struct tcpcb *tp, uint32_t cur_iaj); #endif /* TRAFFIC_MGT */ #if INET6 @@ -314,33 +336,39 @@ static void tcp_sbsnd_trim(struct sockbuf *sbsnd); static inline void tcp_sbrcv_tstmp_check(struct tcpcb *tp); static inline void tcp_sbrcv_reserve(struct tcpcb *tp, struct sockbuf *sb, u_int32_t newsize, u_int32_t idealsize); +static void tcp_bad_rexmt_restore_state(struct tcpcb *tp, struct tcphdr *th); +static int tcp_detect_bad_rexmt(struct tcpcb *tp, struct tcpopt *to); +static void tcp_compute_rtt(struct tcpcb *tp, struct tcpopt *to, + struct tcphdr *th); +/* + * Constants used for resizing receive socket buffer + * when timestamps are not supported + */ #define TCPTV_RCVNOTS_QUANTUM 100 #define TCP_RCVNOTS_BYTELEVEL 204800 -/* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */ -#if INET6 -#define ND6_HINT(tp) \ -do { \ - if ((tp) && (tp)->t_inpcb && \ - ((tp)->t_inpcb->inp_vflag & INP_IPV6) != 0 && \ - (tp)->t_inpcb->in6p_route.ro_rt) \ - nd6_nud_hint((tp)->t_inpcb->in6p_route.ro_rt, NULL, 0); \ -} while (0) -#else -#define ND6_HINT(tp) -#endif + +/* + * Constants used for limiting early retransmits + * to 10 per minute. + */ +#define TCP_EARLY_REXMT_WIN (60 * TCP_RETRANSHZ) /* 60 seconds */ +#define TCP_EARLY_REXMT_LIMIT 10 extern void add_to_time_wait(struct tcpcb *, uint32_t delay); extern void postevent(struct socket *, struct sockbuf *, int); extern void ipfwsyslog( int level, const char *format,...); -extern int ChkAddressOK( __uint32_t dstaddr, __uint32_t srcaddr ); extern int fw_verbose; #if IPFIREWALL +extern void ipfw_stealth_stats_incr_tcp(void); + #define log_in_vain_log( a ) { \ if ( (log_in_vain == 3 ) && (fw_verbose == 2)) { /* Apple logging, log to ipfw.log */ \ ipfwsyslog a ; \ + } else if ( (log_in_vain == 4 ) && (fw_verbose == 2)) { \ + ipfw_stealth_stats_incr_tcp(); \ } \ else log a ; \ } @@ -354,7 +382,8 @@ int tcp_rcvsspktcnt = TCP_RCV_SS_PKTCOUNT; SYSCTL_INT(_net_inet_tcp, OID_AUTO, rcvsspktcnt, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_rcvsspktcnt, 0, "packets to be seen before receiver stretches acks"); -#define DELAY_ACK(tp, th) (CC_ALGO(tp)->delay_ack != NULL && CC_ALGO(tp)->delay_ack(tp, th)) +#define DELAY_ACK(tp, th) \ + (CC_ALGO(tp)->delay_ack != NULL && CC_ALGO(tp)->delay_ack(tp, th)) static int tcp_dropdropablreq(struct socket *head); static void tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th); @@ -417,33 +446,71 @@ isqrt(unsigned int val) { return(g); } +/* +* With LRO, roughly estimate the inter arrival time between +* each sub coalesced packet as an average. Count the delay +* cur_iaj to be the delay between the last packet received +* and the first packet of the LRO stream. Due to round off errors +* cur_iaj may be the same as lro_delay_factor. Averaging has +* round off errors too. lro_delay_factor may be close to 0 +* in steady state leading to lower values fed to compute_iaj_meat. +*/ void -compute_iaj(struct tcpcb *tp) +compute_iaj(struct tcpcb *tp, int nlropkts, int lro_delay_factor) { - /* When accumulated IAJ reaches MAX_ACC_IAJ in milliseconds, throttle the - * receive window to a minimum of MIN_IAJ_WIN packets + uint32_t cur_iaj = tcp_now - tp->iaj_rcv_ts; + uint32_t timediff = 0; + + if (cur_iaj >= lro_delay_factor) { + cur_iaj = cur_iaj - lro_delay_factor; + } + + compute_iaj_meat(tp, cur_iaj); + + if (nlropkts <= 1) + return; + + nlropkts--; + + timediff = lro_delay_factor/nlropkts; + + while (nlropkts > 0) + { + compute_iaj_meat(tp, timediff); + nlropkts--; + } +} + +static +void compute_iaj_meat(struct tcpcb *tp, uint32_t cur_iaj) +{ + /* When accumulated IAJ reaches MAX_ACC_IAJ in milliseconds, + * throttle the receive window to a minimum of MIN_IAJ_WIN packets */ #define MAX_ACC_IAJ (tcp_acc_iaj_high_thresh + tcp_acc_iaj_react_limit) +#define IAJ_DIV_SHIFT 4 +#define IAJ_ROUNDUP_CONST (1 << (IAJ_DIV_SHIFT - 1)) uint32_t allowed_iaj, acc_iaj = 0; - uint32_t cur_iaj = tcp_now - tp->iaj_rcv_ts; uint32_t mean, temp; int32_t cur_iaj_dev; + cur_iaj_dev = (cur_iaj - tp->avg_iaj); - /* Allow a jitter of "allowed_iaj" milliseconds. Some connections may have a - * constant jitter more than that. We detect this by using - * standard deviation. + /* Allow a jitter of "allowed_iaj" milliseconds. Some connections + * may have a constant jitter more than that. We detect this by + * using standard deviation. */ allowed_iaj = tp->avg_iaj + tp->std_dev_iaj; if (allowed_iaj < tcp_allowed_iaj) allowed_iaj = tcp_allowed_iaj; - /* Initially when the connection starts, the senders congestion window - * is small. During this period we avoid throttling a connection because - * we do not have a good starting point for allowed_iaj. IAJ_IGNORE_PKTCNT - * is used to quietly gloss over the first few packets. + /* Initially when the connection starts, the senders congestion + * window is small. During this period we avoid throttling a + * connection because we do not have a good starting point for + * allowed_iaj. IAJ_IGNORE_PKTCNT is used to quietly gloss over + * the first few packets. */ if (tp->iaj_pktcnt > IAJ_IGNORE_PKTCNT) { if ( cur_iaj <= allowed_iaj ) { @@ -451,6 +518,7 @@ compute_iaj(struct tcpcb *tp) acc_iaj = tp->acc_iaj - 2; else acc_iaj = 0; + } else { acc_iaj = tp->acc_iaj + (cur_iaj - allowed_iaj); } @@ -463,18 +531,25 @@ compute_iaj(struct tcpcb *tp) /* Compute weighted average where the history has a weight of * 15 out of 16 and the current value has a weight of 1 out of 16. * This will make the short-term measurements have more weight. + * + * The addition of 8 will help to round-up the value + * instead of round-down */ - tp->avg_iaj = (((tp->avg_iaj << 4) - tp->avg_iaj) + cur_iaj) >> 4; + tp->avg_iaj = (((tp->avg_iaj << IAJ_DIV_SHIFT) - tp->avg_iaj) + + cur_iaj + IAJ_ROUNDUP_CONST) >> IAJ_DIV_SHIFT; /* Compute Root-mean-square of deviation where mean is a weighted - * average as described above + * average as described above. */ temp = tp->std_dev_iaj * tp->std_dev_iaj; - mean = (((temp << 4) - temp) + (cur_iaj_dev * cur_iaj_dev)) >> 4; + mean = (((temp << IAJ_DIV_SHIFT) - temp) + + (cur_iaj_dev * cur_iaj_dev) + + IAJ_ROUNDUP_CONST) >> IAJ_DIV_SHIFT; tp->std_dev_iaj = isqrt(mean); - DTRACE_TCP3(iaj, struct tcpcb *, tp, uint32_t, cur_iaj, uint32_t, allowed_iaj); + DTRACE_TCP3(iaj, struct tcpcb *, tp, uint32_t, cur_iaj, + uint32_t, allowed_iaj); return; } @@ -509,19 +584,21 @@ tcp_bwmeas_check(struct tcpcb *tp) } static int -tcp_reass(tp, th, tlenp, m) - register struct tcpcb *tp; - register struct tcphdr *th; - int *tlenp; - struct mbuf *m; +tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m, + struct ifnet *ifp) { struct tseg_qent *q; struct tseg_qent *p = NULL; struct tseg_qent *nq; struct tseg_qent *te = NULL; - struct socket *so = tp->t_inpcb->inp_socket; - int flags; + struct inpcb *inp = tp->t_inpcb; + struct socket *so = inp->inp_socket; + int flags = 0; int dowakeup = 0; + struct mbuf *oodata = NULL; + int copy_oodata = 0; + boolean_t cell = IFNET_IS_CELLULAR(ifp); + boolean_t wifi = (!cell && IFNET_IS_WIFI(ifp)); /* * Call with th==0 after become established to @@ -599,13 +676,14 @@ tcp_reass(tp, th, tlenp, m) tcpstat.tcps_rcvduppack++; tcpstat.tcps_rcvdupbyte += *tlenp; if (nstat_collect) { - nstat_route_rx(tp->t_inpcb->inp_route.ro_rt, 1, *tlenp, NSTAT_RX_FLAG_DUPLICATE); - locked_add_64(&tp->t_inpcb->inp_stat->rxpackets, 1); - locked_add_64(&tp->t_inpcb->inp_stat->rxbytes, *tlenp); + nstat_route_rx(inp->inp_route.ro_rt, 1, *tlenp, NSTAT_RX_FLAG_DUPLICATE); + INP_ADD_STAT(inp, cell, wifi, rxpackets, 1); + INP_ADD_STAT(inp, cell, wifi, rxbytes, *tlenp); tp->t_stat.rxduplicatebytes += *tlenp; } m_freem(m); zfree(tcp_reass_zone, te); + te = NULL; tcp_reass_qsize--; /* * Try to present any queued data @@ -613,7 +691,7 @@ tcp_reass(tp, th, tlenp, m) * This is needed after the 3-WHS * completes. */ - goto present; /* ??? */ + goto present; } m_adj(m, i); *tlenp -= i; @@ -623,9 +701,9 @@ tcp_reass(tp, th, tlenp, m) tcpstat.tcps_rcvoopack++; tcpstat.tcps_rcvoobyte += *tlenp; if (nstat_collect) { - nstat_route_rx(tp->t_inpcb->inp_route.ro_rt, 1, *tlenp, NSTAT_RX_FLAG_OUT_OF_ORDER); - locked_add_64(&tp->t_inpcb->inp_stat->rxpackets, 1); - locked_add_64(&tp->t_inpcb->inp_stat->rxbytes, *tlenp); + nstat_route_rx(inp->inp_route.ro_rt, 1, *tlenp, NSTAT_RX_FLAG_OUT_OF_ORDER); + INP_ADD_STAT(inp, cell, wifi, rxpackets, 1); + INP_ADD_STAT(inp, cell, wifi, rxbytes, *tlenp); tp->t_stat.rxoutoforderbytes += *tlenp; } @@ -663,6 +741,15 @@ tcp_reass(tp, th, tlenp, m) LIST_INSERT_AFTER(p, te, tqe_q); } + /* + * New out-of-order data exists, and is pointed to by + * queue entry te. Set copy_oodata to 1 so out-of-order data + * can be copied off to sockbuf after in-order data + * is copied off. + */ + if (!(so->so_state & SS_CANTRCVMORE)) + copy_oodata = 1; + present: /* * Present data to user, advancing rcv_nxt through @@ -674,11 +761,18 @@ present: if (!q || q->tqe_th->th_seq != tp->rcv_nxt) { /* Stop using LRO once out of order packets arrive */ if (tp->t_flagsext & TF_LRO_OFFLOADED) { - tcp_lro_remove_state(tp->t_inpcb->inp_laddr, - tp->t_inpcb->inp_faddr, + tcp_lro_remove_state(inp->inp_laddr, inp->inp_faddr, th->th_dport, th->th_sport); tp->t_flagsext &= ~TF_LRO_OFFLOADED; } + + /* + * continue processing if out-of-order data + * can be delivered + */ + if (q && (so->so_flags & SOF_ENABLE_MSGS)) + goto msg_unordered_delivery; + return (0); } do { @@ -686,46 +780,78 @@ present: flags = q->tqe_th->th_flags & TH_FIN; nq = LIST_NEXT(q, tqe_q); LIST_REMOVE(q, tqe_q); - if (so->so_state & SS_CANTRCVMORE) + if (so->so_state & SS_CANTRCVMORE) { m_freem(q->tqe_m); - else { + } else { so_recv_data_stat(so, q->tqe_m, 0); /* XXXX */ - if (sbappendstream(&so->so_rcv, q->tqe_m)) + if (so->so_flags & SOF_ENABLE_MSGS) { + /* + * Append the inorder data as a message to the + * receive socket buffer. Also check to see if + * the data we are about to deliver is the same + * data that we wanted to pass up to the user + * out of order. If so, reset copy_oodata -- + * the received data filled a gap, and + * is now in order! + */ + if (q == te) + copy_oodata = 0; + } + if (sbappendstream_rcvdemux(so, q->tqe_m, + q->tqe_th->th_seq - (tp->irs + 1), 0)) dowakeup = 1; if (tp->t_flagsext & TF_LRO_OFFLOADED) { tcp_update_lro_seq(tp->rcv_nxt, - tp->t_inpcb->inp_laddr, - tp->t_inpcb->inp_faddr, th->th_dport, th->th_sport); + inp->inp_laddr, inp->inp_faddr, + th->th_dport, th->th_sport); } } zfree(tcp_reass_zone, q); tcp_reass_qsize--; q = nq; } while (q && q->tqe_th->th_seq == tp->rcv_nxt); - ND6_HINT(tp); #if INET6 - if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) { + if ((inp->inp_vflag & INP_IPV6) != 0) { KERNEL_DEBUG(DBG_LAYER_BEG, - ((tp->t_inpcb->inp_fport << 16) | tp->t_inpcb->inp_lport), - (((tp->t_inpcb->in6p_laddr.s6_addr16[0] & 0xffff) << 16) | - (tp->t_inpcb->in6p_faddr.s6_addr16[0] & 0xffff)), + ((inp->inp_fport << 16) | inp->inp_lport), + (((inp->in6p_laddr.s6_addr16[0] & 0xffff) << 16) | + (inp->in6p_faddr.s6_addr16[0] & 0xffff)), 0,0,0); } else #endif { KERNEL_DEBUG(DBG_LAYER_BEG, - ((tp->t_inpcb->inp_fport << 16) | tp->t_inpcb->inp_lport), - (((tp->t_inpcb->inp_laddr.s_addr & 0xffff) << 16) | - (tp->t_inpcb->inp_faddr.s_addr & 0xffff)), + ((inp->inp_fport << 16) | inp->inp_lport), + (((inp->inp_laddr.s_addr & 0xffff) << 16) | + (inp->inp_faddr.s_addr & 0xffff)), 0,0,0); } + +msg_unordered_delivery: + /* Deliver out-of-order data as a message */ + if (te && (so->so_flags & SOF_ENABLE_MSGS) && copy_oodata && te->tqe_len) { + /* + * make a copy of the mbuf to be delivered up to + * the user, and add it to the sockbuf + */ + oodata = m_copym(te->tqe_m, 0, M_COPYALL, M_DONTWAIT); + if (oodata != NULL) { + if (sbappendmsgstream_rcv(&so->so_rcv, oodata, + te->tqe_th->th_seq - (tp->irs + 1), 1)) { + dowakeup = 1; + tcpstat.tcps_msg_unopkts++; + } else { + tcpstat.tcps_msg_unoappendfail++; + } + } + } + if (dowakeup) sorwakeup(so); /* done with socket lock held */ return (flags); - } /* @@ -751,6 +877,31 @@ tcp_reduce_congestion_window( tp->t_maxseg * tcprexmtthresh; } +/* + * The application wants to get an event if there + * is a stall during read. Set the initial keepalive + * timeout to be equal to twice RTO. + */ +static inline void +tcp_adaptive_rwtimo_check(struct tcpcb *tp, int tlen) +{ + if (tp->t_adaptive_rtimo > 0 && tlen > 0 && + tp->t_state == TCPS_ESTABLISHED) { + tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, + (TCP_REXMTVAL(tp) << 1)); + tp->t_flagsext |= TF_DETECT_READSTALL; + tp->t_rtimo_probes = 0; + } +} + +inline void +tcp_keepalive_reset(struct tcpcb *tp) +{ + tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, + TCP_CONN_KEEPIDLE(tp)); + tp->t_flagsext &= ~(TF_DETECT_READSTALL); + tp->t_rtimo_probes = 0; +} /* * TCP input routine, follows pages 65-76 of the @@ -762,8 +913,8 @@ tcp6_input(struct mbuf **mp, int *offp, int proto) { #pragma unused(proto) register struct mbuf *m = *mp; - struct in6_ifaddr *ia6; - struct ifnet *ifp = ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL) ? m->m_pkthdr.rcvif: NULL; + uint32_t ia6_flags; + struct ifnet *ifp = m->m_pkthdr.rcvif; IP6_EXTHDR_CHECK(m, *offp, sizeof(struct tcphdr), return IPPROTO_DONE); @@ -774,26 +925,19 @@ tcp6_input(struct mbuf **mp, int *offp, int proto) * draft-itojun-ipv6-tcp-to-anycast * better place to put this in? */ - ia6 = ip6_getdstifaddr(m); - if (ia6 != NULL) { - IFA_LOCK_SPIN(&ia6->ia_ifa); - if (ia6->ia6_flags & IN6_IFF_ANYCAST) { + if (ip6_getdstifaddr_info(m, NULL, &ia6_flags) == 0) { + if (ia6_flags & IN6_IFF_ANYCAST) { struct ip6_hdr *ip6; - IFA_UNLOCK(&ia6->ia_ifa); - IFA_REMREF(&ia6->ia_ifa); ip6 = mtod(m, struct ip6_hdr *); icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, (caddr_t)&ip6->ip6_dst - (caddr_t)ip6); - - if (ifp != NULL && ifp->if_tcp_stat != NULL) - atomic_add_64(&ifp->if_tcp_stat->icmp6unreach, 1); - + + IF_TCP_STATINC(ifp, icmp6unreach); + return (IPPROTO_DONE); } - IFA_UNLOCK(&ia6->ia_ifa); - IFA_REMREF(&ia6->ia_ifa); } tcp_input(m, *offp); @@ -827,11 +971,10 @@ tcp_cansbgrow(struct sockbuf *sb) return(0); } -void -tcp_sbrcv_reserve(struct tcpcb *tp, - struct sockbuf *sbrcv, - u_int32_t newsize, - u_int32_t idealsize) { +static void +tcp_sbrcv_reserve(struct tcpcb *tp, struct sockbuf *sbrcv, + u_int32_t newsize, u_int32_t idealsize) +{ /* newsize should not exceed max */ newsize = min(newsize, tcp_autorcvbuf_max); @@ -866,10 +1009,22 @@ static void tcp_sbrcv_grow(struct tcpcb *tp, struct sockbuf *sbrcv, struct tcpopt *to, u_int32_t pktlen) { + /* + * Do not grow the receive socket buffer if + * - auto resizing is disabled, globally or on this socket + * - the high water mark has already reached the maximum + * - the stream is in background and receive side is being + * throttled + * - if there are segments in reassembly queue indicating loss, + * do not need to increase recv window during recovery as more + * data is not going to be sent. + */ if (tcp_do_autorcvbuf == 0 || (sbrcv->sb_flags & SB_AUTOSIZE) == 0 || tcp_cansbgrow(sbrcv) == 0 || - sbrcv->sb_hiwat >= tcp_autorcvbuf_max) { + sbrcv->sb_hiwat >= tcp_autorcvbuf_max || + (tp->t_flagsext & TF_RECV_THROTTLE) || + !LIST_EMPTY(&tp->t_segq)) { /* Can not resize the socket buffer, just return */ goto out; } @@ -882,8 +1037,7 @@ tcp_sbrcv_grow(struct tcpcb *tp, struct sockbuf *sbrcv, goto out; } - if ((tp->t_flags & (TF_REQ_TSTMP | TF_RCVD_TSTMP)) != - (TF_REQ_TSTMP | TF_RCVD_TSTMP)) { + if (!TSTMP_SUPPORTED(tp)) { /* * Timestamp option is not supported on this connection. * If the connection reached a state to indicate that @@ -1052,11 +1206,13 @@ tcp_sbrcv_tstmp_check(struct tcpcb *tp) { * tcp_rcvsspktcnt to cover slow-start and tcp_maxrcvidle to identify idle * state. */ - static inline int - tcp_stretch_ack_enable(struct tcpcb *tp) { - if (tp->rcv_by_unackwin >= (maxseg_unacked * tp->t_maxseg) && +static inline int +tcp_stretch_ack_enable(struct tcpcb *tp) +{ + if (!(tp->t_flagsext & TF_NOSTRETCHACK) && + tp->rcv_by_unackwin >= (maxseg_unacked * tp->t_maxseg) && TSTMP_GT(tp->rcv_unackwin + tcp_maxrcvidle, tcp_now) && - (((tp->t_flagsext & TF_RCVUNACK_WAITSS) == 0) || + (!(tp->t_flagsext & TF_RCVUNACK_WAITSS) || (tp->rcv_waitforss >= tcp_rcvsspktcnt))) { return(1); } @@ -1077,6 +1233,88 @@ tcp_reset_stretch_ack(struct tcpcb *tp) tp->rcv_unackwin = tcp_now + tcp_rcvunackwin; } +/* + * The last packet was a retransmission, check if this ack + * indicates that the retransmission was spurious. + * + * If the connection supports timestamps, we could use it to + * detect if the last retransmit was not needed. Otherwise, + * we check if the ACK arrived within RTT/2 window, then it + * was a mistake to do the retransmit in the first place. + * + * This function will return 1 if it is a spurious retransmit, + * 0 otherwise. + */ +static int +tcp_detect_bad_rexmt(struct tcpcb *tp, struct tcpopt *to) +{ + int32_t tdiff, bad_rexmt_win; + tdiff = (int32_t)(tcp_now - tp->t_rxtstart); + bad_rexmt_win = (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); + + if (TSTMP_SUPPORTED(tp) && tp->t_rxtstart > 0 && + (to->to_flags & TOF_TS) != 0 && + to->to_tsecr != 0 && + TSTMP_LT(to->to_tsecr, tp->t_rxtstart)) { + return (1); + } else if (tp->t_rxtshift == 1 && + tdiff < bad_rexmt_win) { + return(1); + } + return(0); +} + + +/* + * Restore congestion window state if a spurious timeout + * was detected. + */ +static void +tcp_bad_rexmt_restore_state(struct tcpcb *tp, struct tcphdr *th) +{ + if (TSTMP_SUPPORTED(tp)) { + u_int32_t fsize, acked; + fsize = tp->snd_max - th->th_ack; + acked = BYTES_ACKED(th, tp); + + /* + * Implement bad retransmit recovery as + * described in RFC 4015. + */ + tp->snd_ssthresh = tp->snd_ssthresh_prev; + + /* Initialize cwnd to the initial window */ + if (CC_ALGO(tp)->cwnd_init != NULL) + CC_ALGO(tp)->cwnd_init(tp); + + tp->snd_cwnd = fsize + min(acked, tp->snd_cwnd); + + } else { + tp->snd_cwnd = tp->snd_cwnd_prev; + tp->snd_ssthresh = tp->snd_ssthresh_prev; + if (tp->t_flags & TF_WASFRECOVERY) + ENTER_FASTRECOVERY(tp); + } + tp->snd_recover = tp->snd_recover_prev; + tp->snd_nxt = tp->snd_max; + tp->t_rxtshift = 0; + tp->t_rxtstart = 0; + + /* Fix send socket buffer to reflect the change in cwnd */ + tcp_bad_rexmt_fix_sndbuf(tp); + + /* + * This RTT might reflect the extra delay induced + * by the network. Skip using this sample for RTO + * calculation and mark the connection so we can + * recompute RTT when the next eligible sample is + * found. + */ + tp->t_flagsext |= TF_RECOMPUTE_RTT; + tp->t_badrexmt_time = tcp_now; + tp->t_rtttime = 0; +} + void tcp_input(m, off0) struct mbuf *m; @@ -1084,11 +1322,10 @@ tcp_input(m, off0) { register struct tcphdr *th; register struct ip *ip = NULL; - register struct ipovly *ipov; register struct inpcb *inp; u_char *optp = NULL; int optlen = 0; - int len, tlen, off; + int tlen, off; int drop_hdrlen; register struct tcpcb *tp = 0; register int thflags; @@ -1100,7 +1337,7 @@ tcp_input(m, off0) #endif int dropsocket = 0; int iss = 0, nosock = 0; - u_int32_t tiwin; + u_int32_t tiwin, sack_bytes_acked = 0; struct tcpopt to; /* options in this segment */ struct sockaddr_in *next_hop = NULL; #if TCPDEBUG @@ -1110,20 +1347,23 @@ tcp_input(m, off0) u_char ip_ecn = IPTOS_ECN_NOTECT; unsigned int ifscope, nocell = 0; uint8_t isconnected, isdisconnected; - struct ifnet *ifp = ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL) ? m->m_pkthdr.rcvif: NULL; - int nlropkts = m->m_pkthdr.lro_npkts; - int mauxf_sw_lro_pkt = (m->m_pkthdr.aux_flags & MAUXF_SW_LRO_PKT) ? 1 : 0; - int turnoff_lro = 0; + struct ifnet *ifp = m->m_pkthdr.rcvif; + int pktf_sw_lro_pkt = (m->m_pkthdr.pkt_flags & PKTF_SW_LRO_PKT) ? 1 : 0; + int nlropkts = (pktf_sw_lro_pkt == 1) ? m->m_pkthdr.lro_npkts : 1; + int turnoff_lro = 0, win; +#if MPTCP + struct mptcb *mp_tp = NULL; + uint16_t mptcp_csum = 0; +#endif /* MPTCP */ + boolean_t cell = IFNET_IS_CELLULAR(ifp); + boolean_t wifi = (!cell && IFNET_IS_WIFI(ifp)); + #define TCP_INC_VAR(stat, npkts) do { \ - if (mauxf_sw_lro_pkt) { \ - stat += npkts; \ - } else { \ - stat++; \ - } \ + stat += npkts; \ } while (0) TCP_INC_VAR(tcpstat.tcps_rcvtotal, nlropkts); - + /* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */ if (!SLIST_EMPTY(&m->m_pkthdr.tags)) { fwd_tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, @@ -1132,19 +1372,20 @@ tcp_input(m, off0) fwd_tag = NULL; } if (fwd_tag != NULL) { - struct ip_fwd_tag *ipfwd_tag = (struct ip_fwd_tag *)(fwd_tag+1); - + struct ip_fwd_tag *ipfwd_tag = + (struct ip_fwd_tag *)(fwd_tag+1); + next_hop = ipfwd_tag->next_hop; m_tag_delete(m, fwd_tag); } - + #if INET6 struct ip6_hdr *ip6 = NULL; int isipv6; #endif /* INET6 */ int rstreason; /* For badport_bandlim accounting purposes */ struct proc *proc0=current_proc(); - + KERNEL_DEBUG(DBG_FNC_TCP_INPUT | DBG_FUNC_START,0,0,0,0,0); #if INET6 @@ -1154,7 +1395,10 @@ tcp_input(m, off0) #if INET6 if (isipv6) { - /* Expect 32-bit aligned data pointer on strict-align platforms */ + /* + * Expect 32-bit aligned data pointer on + * strict-align platforms + */ MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); /* IP6_EXTHDR_CHECK() is already done at tcp6_input() */ @@ -1162,44 +1406,8 @@ tcp_input(m, off0) tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0; th = (struct tcphdr *)(void *)((caddr_t)ip6 + off0); - if ((apple_hwcksum_rx != 0) && (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) { - if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) - th->th_sum = m->m_pkthdr.csum_data; - else { - /* - * There is no established protocol for the case - * where IPv6 psuedoheader checksum is not computed - * with our current drivers. Current drivers set - * CSUM_PSEUDO_HDR. So if we do get here, we should - * recalculate checksum. - */ - if (in6_cksum(m, IPPROTO_TCP, off0, tlen)) { - th->th_sum = 0; - } else { - th->th_sum = 0xffff; - } - } - - th->th_sum ^= 0xffff; - if (th->th_sum) { - tcpstat.tcps_rcvbadsum++; - - if (ifp != NULL && ifp->if_tcp_stat != NULL) - atomic_add_64(&ifp->if_tcp_stat->badformat, 1); - - goto dropnosock; - } - } - else { - if (in6_cksum(m, IPPROTO_TCP, off0, tlen)) { - tcpstat.tcps_rcvbadsum++; - - if (ifp != NULL && ifp->if_tcp_stat != NULL) - atomic_add_64(&ifp->if_tcp_stat->badformat, 1); - - goto dropnosock; - } - } + if (tcp_input_checksum(AF_INET6, m, th, off0, tlen)) + goto dropnosock; KERNEL_DEBUG(DBG_LAYER_BEG, ((th->th_dport << 16) | th->th_sport), (((ip6->ip6_src.s6_addr16[0]) << 16) | (ip6->ip6_dst.s6_addr16[0])), @@ -1214,17 +1422,14 @@ tcp_input(m, off0) */ if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { /* XXX stat */ - - if (ifp != NULL && ifp->if_tcp_stat != NULL) - atomic_add_64(&ifp->if_tcp_stat->unspecv6, 1); - + IF_TCP_STATINC(ifp, unspecv6); goto dropnosock; } - DTRACE_TCP5(receive, sruct mbuf *, m, struct inpcb *, NULL, + DTRACE_TCP5(receive, struct mbuf *, m, struct inpcb *, NULL, struct ip6_hdr *, ip6, struct tcpcb *, NULL, struct tcphdr *, th); - - ip_ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; + + ip_ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; } else #endif /* INET6 */ { @@ -1235,9 +1440,6 @@ tcp_input(m, off0) if (off0 > sizeof (struct ip)) { ip_stripoptions(m, (struct mbuf *)0); off0 = sizeof(struct ip); - if (m->m_pkthdr.csum_flags & CSUM_TCP_SUM16) - m->m_pkthdr.csum_flags = 0; /* invalidate hwcksuming */ - } if (m->m_len < sizeof (struct tcpiphdr)) { if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) { @@ -1250,63 +1452,12 @@ tcp_input(m, off0) MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); ip = mtod(m, struct ip *); - ipov = (struct ipovly *)ip; th = (struct tcphdr *)(void *)((caddr_t)ip + off0); tlen = ip->ip_len; - if (m->m_pkthdr.aux_flags & MAUXF_SW_LRO_DID_CSUM) { - goto skip_checksum; - } - if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { - if (m->m_pkthdr.csum_flags & CSUM_TCP_SUM16) { - u_short pseudo; - char b[9]; - - bcopy(ipov->ih_x1, b, sizeof (ipov->ih_x1)); - bzero(ipov->ih_x1, sizeof (ipov->ih_x1)); - ipov->ih_len = (u_short)tlen; -#if BYTE_ORDER != BIG_ENDIAN - HTONS(ipov->ih_len); -#endif - pseudo = in_cksum(m, sizeof (struct ip)); - bcopy(b, ipov->ih_x1, sizeof (ipov->ih_x1)); - - th->th_sum = in_addword(pseudo, (m->m_pkthdr.csum_data & 0xFFFF)); - } else { - if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) - th->th_sum = m->m_pkthdr.csum_data; - else - th->th_sum = in_pseudo(ip->ip_src.s_addr, - ip->ip_dst.s_addr, htonl(m->m_pkthdr.csum_data + - ip->ip_len + IPPROTO_TCP)); - } - th->th_sum ^= 0xffff; - } else { - char b[9]; - /* - * Checksum extended TCP header and data. - */ - bcopy(ipov->ih_x1, b, sizeof (ipov->ih_x1)); - bzero(ipov->ih_x1, sizeof (ipov->ih_x1)); - ipov->ih_len = (u_short)tlen; -#if BYTE_ORDER != BIG_ENDIAN - HTONS(ipov->ih_len); -#endif - len = sizeof (struct ip) + tlen; - th->th_sum = in_cksum(m, len); - bcopy(b, ipov->ih_x1, sizeof (ipov->ih_x1)); - - tcp_in_cksum_stats(len); - } - if (th->th_sum) { - tcpstat.tcps_rcvbadsum++; - - if (ifp != NULL && ifp->if_tcp_stat != NULL) - atomic_add_64(&ifp->if_tcp_stat->badformat, 1); - if (lrodebug) printf("tcp_input: bad xsum len = %d, tlen = %d, flags = %x, csum_flags = %x.\n",len, tlen, m->m_flags, m->m_pkthdr.csum_flags); + if (tcp_input_checksum(AF_INET, m, th, off0, tlen)) goto dropnosock; - } -skip_checksum: + #if INET6 /* Re-initialization for later version check */ ip->ip_v = IPVERSION; @@ -1315,7 +1466,7 @@ skip_checksum: DTRACE_TCP5(receive, struct mbuf *, m, struct inpcb *, NULL, struct ip *, ip, struct tcpcb *, NULL, struct tcphdr *, th); - + KERNEL_DEBUG(DBG_LAYER_BEG, ((th->th_dport << 16) | th->th_sport), (((ip->ip_src.s_addr & 0xffff) << 16) | (ip->ip_dst.s_addr & 0xffff)), th->th_seq, th->th_ack, th->th_win); @@ -1329,10 +1480,7 @@ skip_checksum: off = th->th_off << 2; if (off < sizeof (struct tcphdr) || off > tlen) { tcpstat.tcps_rcvbadoff++; - - if (ifp != NULL && ifp->if_tcp_stat != NULL) - atomic_add_64(&ifp->if_tcp_stat->badformat, 1); - + IF_TCP_STATINC(ifp, badformat); goto dropnosock; } tlen -= off; /* tlen is used instead of ti->ti_len */ @@ -1351,7 +1499,6 @@ skip_checksum: return; } ip = mtod(m, struct ip *); - ipov = (struct ipovly *)ip; th = (struct tcphdr *)(void *)((caddr_t)ip + off0); } } @@ -1386,10 +1533,7 @@ skip_checksum: * This is a violation of the TCP specification. */ if (drop_synfin && (thflags & (TH_SYN|TH_FIN)) == (TH_SYN|TH_FIN)) { - - if (ifp != NULL && ifp->if_tcp_stat != NULL) - atomic_add_64(&ifp->if_tcp_stat->synfin, 1); - + IF_TCP_STATINC(ifp, synfin); goto dropnosock; } #endif @@ -1410,16 +1554,16 @@ skip_checksum: calculate_tcp_clock(); /* - * Record the interface where this segment arrived on; this does not - * affect normal data output (for non-detached TCP) as it provides a - * hint about which route and interface to use for sending in the - * absence of a PCB, when scoped routing (and thus source interface - * selection) are enabled. - */ - if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL) - ifscope = m->m_pkthdr.rcvif->if_index; - else - ifscope = IFSCOPE_NONE; + * Record the interface where this segment arrived on; this does not + * affect normal data output (for non-detached TCP) as it provides a + * hint about which route and interface to use for sending in the + * absence of a PCB, when scoped routing (and thus source interface + * selection) are enabled. + */ + if ((m->m_pkthdr.pkt_flags & PKTF_LOOP) || m->m_pkthdr.rcvif == NULL) + ifscope = IFSCOPE_NONE; + else + ifscope = m->m_pkthdr.rcvif->if_index; /* * Convert TCP protocol specific fields to host format. @@ -1506,10 +1650,9 @@ findpcb: IPSEC_STAT_INCREMENT(ipsec6stat.in_polvio); if (in_pcb_checkstate(inp, WNT_RELEASE, 0) == WNT_STOPUSING) inp = NULL; // pretend we didn't find it - - if (ifp != NULL && ifp->if_tcp_stat != NULL) - atomic_add_64(&ifp->if_tcp_stat->badformatipsec, 1); - + + IF_TCP_STATINC(ifp, badformatipsec); + goto dropnosock; } } else @@ -1518,10 +1661,9 @@ findpcb: IPSEC_STAT_INCREMENT(ipsecstat.in_polvio); if (in_pcb_checkstate(inp, WNT_RELEASE, 0) == WNT_STOPUSING) inp = NULL; // pretend we didn't find it - - if (ifp != NULL && ifp->if_tcp_stat != NULL) - atomic_add_64(&ifp->if_tcp_stat->badformatipsec, 1); - + + IF_TCP_STATINC(ifp, badformatipsec); + goto dropnosock; } } @@ -1567,6 +1709,7 @@ findpcb: ntohs(th->th_sport), thflags); break; case 3: + case 4: if ((thflags & TH_SYN) && !(thflags & TH_ACK) && !(m->m_flags & (M_BCAST | M_MCAST)) && #if INET6 @@ -1601,10 +1744,7 @@ findpcb: } } rstreason = BANDLIM_RST_CLOSEDPORT; - - if (ifp != NULL && ifp->if_tcp_stat != NULL) - atomic_add_64(&ifp->if_tcp_stat->noconnnolist, 1); - + IF_TCP_STATINC(ifp, noconnnolist); goto dropwithresetnosock; } so = inp->inp_socket; @@ -1631,10 +1771,7 @@ findpcb: tp = intotcpcb(inp); if (tp == 0) { rstreason = BANDLIM_RST_CLOSEDPORT; - - if (ifp != NULL && ifp->if_tcp_stat != NULL) - atomic_add_64(&ifp->if_tcp_stat->noconnlist, 1); - + IF_TCP_STATINC(ifp, noconnlist); goto dropwithreset; } if (tp->t_state == TCPS_CLOSED) @@ -1651,8 +1788,9 @@ findpcb: goto drop; #endif - /* Radar 7377561: Avoid processing packets while closing a listen socket */ - if (tp->t_state == TCPS_LISTEN && (so->so_options & SO_ACCEPTCONN) == 0) + /* Avoid processing packets while closing a listen socket */ + if (tp->t_state == TCPS_LISTEN && + (so->so_options & SO_ACCEPTCONN) == 0) goto drop; if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) { @@ -1695,10 +1833,8 @@ findpcb: * If it is from this socket, drop it, it must be forged. */ if ((thflags & (TH_RST|TH_ACK|TH_SYN)) != TH_SYN) { - - if (ifp != NULL && ifp->if_tcp_stat != NULL) - atomic_add_64(&ifp->if_tcp_stat->listbadsyn, 1); - + IF_TCP_STATINC(ifp, listbadsyn); + if (thflags & TH_RST) { goto drop; } @@ -1762,43 +1898,20 @@ findpcb: * for the exchange. * * If we do not forbid deprecated addresses, we accept - * the SYN packet. RFC2462 does not suggest dropping - * SYN in this case. - * If we decipher RFC2462 5.5.4, it says like this: - * 1. use of deprecated addr with existing - * communication is okay - "SHOULD continue to be - * used" - * 2. use of it with new communication: - * (2a) "SHOULD NOT be used if alternate address - * with sufficient scope is available" - * (2b) nothing mentioned otherwise. - * Here we fall into (2b) case as we have no choice in - * our source address selection - we must obey the peer. - * - * The wording in RFC2462 is confusing, and there are - * multiple description text for deprecated address - * handling - worse, they are not exactly the same. - * I believe 5.5.4 is the best one, so we follow 5.5.4. + * the SYN packet. RFC 4862 forbids dropping SYN in + * this case. */ if (isipv6 && !ip6_use_deprecated) { - struct in6_ifaddr *ia6; - - ia6 = ip6_getdstifaddr(m); - if (ia6 != NULL) { - IFA_LOCK_SPIN(&ia6->ia_ifa); - if (ia6->ia6_flags & IN6_IFF_DEPRECATED) { - IFA_UNLOCK(&ia6->ia_ifa); - IFA_REMREF(&ia6->ia_ifa); + uint32_t ia6_flags; + + if (ip6_getdstifaddr_info(m, NULL, + &ia6_flags) == 0) { + if (ia6_flags & IN6_IFF_DEPRECATED) { tp = NULL; rstreason = BANDLIM_RST_OPENPORT; - - if (ifp != NULL && ifp->if_tcp_stat != NULL) - atomic_add_64(&ifp->if_tcp_stat->deprecate6, 1); - + IF_TCP_STATINC(ifp, deprecate6); goto dropwithreset; } - IFA_UNLOCK(&ia6->ia_ifa); - IFA_REMREF(&ia6->ia_ifa); } } #endif @@ -1958,6 +2071,10 @@ findpcb: tp->t_flags |= tp0->t_flags & (TF_NOPUSH|TF_NOOPT|TF_NODELAY); tp->t_flagsext |= (tp0->t_flagsext & TF_RXTFINDROP); tp->t_keepinit = tp0->t_keepinit; + tp->t_keepcnt = tp0->t_keepcnt; + tp->t_keepintvl = tp0->t_keepintvl; + tp->t_adaptive_wtimo = tp0->t_adaptive_wtimo; + tp->t_adaptive_rtimo = tp0->t_adaptive_rtimo; tp->t_inpcb->inp_ip_ttl = tp0->t_inpcb->inp_ip_ttl; if ((so->so_flags & SOF_NOTSENT_LOWAT) != 0) tp->t_notsent_lowat = tp0->t_notsent_lowat; @@ -1972,70 +2089,9 @@ findpcb: } lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); - /* - * Radar 3529618 - * This is the second part of the MSS DoS prevention code (after - * minmss on the sending side) and it deals with too many too small - * tcp packets in a too short timeframe (1 second). - * - * For every full second we count the number of received packets - * and bytes. If we get a lot of packets per second for this connection - * (tcp_minmssoverload) we take a closer look at it and compute the - * average packet size for the past second. If that is less than - * tcp_minmss we get too many packets with very small payload which - * is not good and burdens our system (and every packet generates - * a wakeup to the process connected to our socket). We can reasonable - * expect this to be small packet DoS attack to exhaust our CPU - * cycles. - * - * Care has to be taken for the minimum packet overload value. This - * value defines the minimum number of packets per second before we - * start to worry. This must not be too low to avoid killing for - * example interactive connections with many small packets like - * telnet or SSH. - * - * Setting either tcp_minmssoverload or tcp_minmss to "0" disables - * this check. - * - * Account for packet if payload packet, skip over ACK, etc. - */ if (tp->t_state == TCPS_ESTABLISHED && tlen > 0) { - if (TSTMP_GT(tp->rcv_reset, tcp_now)) { - tp->rcv_pps++; - tp->rcv_byps += tlen + off; - if (tp->rcv_byps > tp->rcv_maxbyps) - tp->rcv_maxbyps = tp->rcv_byps; - /* - * Setting either tcp_minmssoverload or tcp_minmss to "0" disables - * the check. - */ - if (tcp_minmss && tcp_minmssoverload && tp->rcv_pps > tcp_minmssoverload) { - if ((tp->rcv_byps / tp->rcv_pps) < tcp_minmss) { - char ipstrbuf[MAX_IPv6_STR_LEN]; - printf("too many small tcp packets from " - "%s:%u, av. %ubyte/packet, " - "dropping connection\n", -#if INET6 - isipv6 ? - inet_ntop(AF_INET6, &inp->in6p_faddr, ipstrbuf, - sizeof(ipstrbuf)) : -#endif - inet_ntop(AF_INET, &inp->inp_faddr, ipstrbuf, - sizeof(ipstrbuf)), - inp->inp_fport, - tp->rcv_byps / tp->rcv_pps); - tp = tcp_drop(tp, ECONNRESET); -/* tcpstat.tcps_minmssdrops++; */ - goto drop; - } - } - } else { - tp->rcv_reset = tcp_now + TCP_RETRANSHZ; - tp->rcv_pps = 1; - tp->rcv_byps = tlen + off; - } - - /* Evaluate the rate of arrival of packets to see if the + /* + * Evaluate the rate of arrival of packets to see if the * receiver can reduce the ack traffic. The algorithm to * stretch acks will be enabled if the connection meets * certain criteria defined in tcp_stretch_ack_enable function. @@ -2061,18 +2117,18 @@ findpcb: /* * Keep track of how many bytes were received in the LRO packet */ - if ((mauxf_sw_lro_pkt) && (nlropkts > 2)) { + if ((pktf_sw_lro_pkt) && (nlropkts > 2)) { tp->t_lropktlen += tlen; } /* - Explicit Congestion Notification - Flag that we need to send ECT if - + The IP Congestion experienced flag was set. - + Socket is in established state - + We negotiated ECN in the TCP setup - + This isn't a pure ack (tlen > 0) - + The data is in the valid window - - TE_SENDECE will be cleared when we receive a packet with TH_CWR set. + * Explicit Congestion Notification - Flag that we need to send ECT if + * + The IP Congestion experienced flag was set. + * + Socket is in established state + * + We negotiated ECN in the TCP setup + * + This isn't a pure ack (tlen > 0) + * + The data is in the valid window + * + * TE_SENDECE will be cleared when we receive a packet with TH_CWR set. */ if (ip_ecn == IPTOS_ECN_CE && tp->t_state == TCPS_ESTABLISHED && ((tp->ecn_flags & (TE_ECN_ON)) == (TE_ECN_ON)) && tlen > 0 && @@ -2082,19 +2138,21 @@ findpcb: } /* - Clear TE_SENDECE if TH_CWR is set. This is harmless, so we don't - bother doing extensive checks for state and whatnot. + * Clear TE_SENDECE if TH_CWR is set. This is harmless, so we don't + * bother doing extensive checks for state and whatnot. */ if ((thflags & TH_CWR) == TH_CWR) { tp->ecn_flags &= ~TE_SENDECE; } - /* If we received an explicit notification of congestion in + /* + * If we received an explicit notification of congestion in * ip tos ecn bits or by the CWR bit in TCP header flags, reset * the ack-strteching state. */ - if (tp->t_state == TCPS_ESTABLISHED && (tp->t_flags & TF_STRETCHACK) != 0 && - ((ip_ecn == IPTOS_ECN_CE) || ((thflags & TH_CWR) == TH_CWR))) + if (tp->t_state == TCPS_ESTABLISHED && + (ip_ecn == IPTOS_ECN_CE || + (thflags & TH_CWR))) tcp_reset_stretch_ack(tp); /* @@ -2109,28 +2167,40 @@ findpcb: turnoff_lro = 1; } + /* Update rcvtime as a new segment was received on the connection */ + tp->t_rcvtime = tcp_now; + /* * Segment received on connection. * Reset idle time and keep-alive timer. */ - tp->t_rcvtime = tcp_now; if (TCPS_HAVEESTABLISHED(tp->t_state)) - tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, TCP_KEEPIDLE(tp)); + tcp_keepalive_reset(tp); /* * Process options if not in LISTEN state, * else do it below (after getting remote address). */ - if (tp->t_state != TCPS_LISTEN && optp) + if (tp->t_state != TCPS_LISTEN && optp) { tcp_dooptions(tp, optp, optlen, th, &to, ifscope); - - if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) { - if (to.to_flags & TOF_SCALE) { - tp->t_flags |= TF_RCVD_SCALE; - tp->requested_s_scale = to.to_requested_s_scale; - tp->snd_wnd = th->th_win << tp->snd_scale; - tiwin = tp->snd_wnd; +#if MPTCP + mptcp_csum = mptcp_input_csum(tp, m, drop_hdrlen); + if (mptcp_csum) { + tp->t_mpflags |= TMPF_SND_MPFAIL; + tp->t_mpflags &= ~TMPF_EMBED_DSN; + mptcp_notify_mpfail(so); + m_freem(m); + tcpstat.tcps_mp_badcsum++; + tcp_check_timer_state(tp); + tcp_unlock(so, 1, 0); + KERNEL_DEBUG(DBG_FNC_TCP_INPUT | + DBG_FUNC_END,0,0,0,0,0); + return; } + mptcp_insert_rmap(tp, m); +#endif /* MPTCP */ + } + if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) { if (to.to_flags & TOF_TS) { tp->t_flags |= TF_RCVD_TSTMP; tp->ts_recent = to.to_tsval; @@ -2138,9 +2208,9 @@ findpcb: } if (to.to_flags & TOF_MSS) tcp_mss(tp, to.to_mss, ifscope); - if (tp->sack_enable) { + if (SACK_ENABLED(tp)) { if (!(to.to_flags & TOF_SACK)) - tp->sack_enable = 0; + tp->t_flagsext &= ~(TF_SACK_ENABLE); else tp->t_flags |= TF_SACK_PERMIT; } @@ -2168,7 +2238,7 @@ findpcb: TCP_INC_VAR(tp->iaj_pktcnt, nlropkts); } - if (m->m_pkthdr.aux_flags & MAUXF_SW_LRO_PKT) { + if (m->m_pkthdr.pkt_flags & PKTF_SW_LRO_PKT) { seg_size = m->m_pkthdr.lro_pktlen; } if ( tp->iaj_size == 0 || seg_size > tp->iaj_size || @@ -2183,7 +2253,11 @@ findpcb: /* Compute inter-arrival jitter taking this packet * as the second packet */ - compute_iaj(tp); + if (pktf_sw_lro_pkt) + compute_iaj(tp, nlropkts, + m->m_pkthdr.lro_elapsed); + else + compute_iaj(tp, 1, 0); } if (seg_size < tp->iaj_size) { /* There is a smaller packet in the stream. @@ -2193,7 +2267,7 @@ findpcb: * If there are too many packets smaller than iaj_size, * we try to learn the iaj_size again. */ - tp->iaj_small_pkt++; + TCP_INC_VAR(tp->iaj_small_pkt, nlropkts); if (tp->iaj_small_pkt > RESET_IAJ_SIZE_THRESH) { update_iaj_state(tp, seg_size, 1); } else { @@ -2256,51 +2330,32 @@ findpcb: SEQ_LEQ(th->th_ack, tp->snd_max) && tp->snd_cwnd >= tp->snd_ssthresh && (!IN_FASTRECOVERY(tp) && - ((!tp->sack_enable && tp->t_dupacks < tcprexmtthresh) || - (tp->sack_enable && to.to_nsacks == 0 && + ((!(SACK_ENABLED(tp)) && tp->t_dupacks < tp->t_rexmtthresh) || + (SACK_ENABLED(tp) && to.to_nsacks == 0 && TAILQ_EMPTY(&tp->snd_holes))))) { /* * this is a pure ack for outstanding data. */ ++tcpstat.tcps_predack; + /* * "bad retransmit" recovery */ - if (tp->t_rxtshift == 1 && - TSTMP_LT(tcp_now, tp->t_badrxtwin)) { + if (tp->t_rxtshift > 0 && + tcp_detect_bad_rexmt(tp, &to)) { ++tcpstat.tcps_sndrexmitbad; - tp->snd_cwnd = tp->snd_cwnd_prev; - tp->snd_ssthresh = - tp->snd_ssthresh_prev; - tp->snd_recover = tp->snd_recover_prev; - if (tp->t_flags & TF_WASFRECOVERY) - ENTER_FASTRECOVERY(tp); - tp->snd_nxt = tp->snd_max; - tp->t_badrxtwin = 0; - tp->t_rxtshift = 0; - tp->rxt_start = 0; - tcp_bad_rexmt_fix_sndbuf(tp); - DTRACE_TCP5(cc, void, NULL, struct inpcb *, tp->t_inpcb, + tcp_bad_rexmt_restore_state(tp, th); + + DTRACE_TCP5(cc, void, NULL, + struct inpcb *, tp->t_inpcb, struct tcpcb *, tp, struct tcphdr *, th, int32_t, TCP_CC_BAD_REXMT_RECOVERY); } - /* - * Recalculate the transmit timer / rtt. - * - * Some boxes send broken timestamp replies - * during the SYN+ACK phase, ignore - * timestamps of 0 or we could calculate a - * huge RTT and blow up the retransmit timer. - */ - if (((to.to_flags & TOF_TS) != 0) && (to.to_tsecr != 0) && - TSTMP_GEQ(tcp_now, to.to_tsecr)) { - tcp_xmit_timer(tp, - tcp_now - to.to_tsecr); - } else if (tp->t_rtttime && - SEQ_GT(th->th_ack, tp->t_rtseq)) { - tcp_xmit_timer(tp, tcp_now - tp->t_rtttime); - } - acked = th->th_ack - tp->snd_una; + + /* Recalculate the RTT */ + tcp_compute_rtt(tp, &to, th); + + acked = BYTES_ACKED(th, tp); tcpstat.tcps_rcvackpack++; tcpstat.tcps_rcvackbyte += acked; @@ -2316,6 +2371,10 @@ findpcb: int32_t, TCP_CC_INSEQ_ACK_RCVD); sbdrop(&so->so_snd, acked); + if (so->so_flags & SOF_ENABLE_MSGS) { + VERIFY(acked <= so->so_msg_state->msg_serial_bytes); + so->so_msg_state->msg_serial_bytes -= acked; + } tcp_sbsnd_trim(&so->so_snd); if (SEQ_GT(tp->snd_una, tp->snd_recover) && @@ -2328,9 +2387,13 @@ findpcb: * to th_ack. */ tp->snd_wl2 = th->th_ack; - tp->t_dupacks = 0; + + if (tp->t_dupacks > 0) { + tp->t_dupacks = 0; + tp->t_rexmtthresh = tcprexmtthresh; + } + m_freem(m); - ND6_HINT(tp); /* some progress has been done */ /* * If all outstanding data are acked, stop @@ -2374,17 +2437,17 @@ findpcb: */ if (turnoff_lro) { tcp_lro_remove_state(tp->t_inpcb->inp_laddr, - tp->t_inpcb->inp_faddr, - tp->t_inpcb->inp_lport, - tp->t_inpcb->inp_fport); + tp->t_inpcb->inp_faddr, + tp->t_inpcb->inp_lport, + tp->t_inpcb->inp_fport); tp->t_flagsext &= ~TF_LRO_OFFLOADED; tp->t_idleat = tp->rcv_nxt; - } else if (sw_lro && !mauxf_sw_lro_pkt && !isipv6 && - (so->so_flags & SOF_USELRO) && - (m->m_pkthdr.rcvif->if_type != IFT_CELLULAR) && + } else if (sw_lro && !pktf_sw_lro_pkt && !isipv6 && + (so->so_flags & SOF_USELRO) && + !IFNET_IS_CELLULAR(m->m_pkthdr.rcvif) && (m->m_pkthdr.rcvif->if_type != IFT_LOOP) && ((th->th_seq - tp->irs) > - (tp->t_maxseg << lro_start)) && + (tp->t_maxseg << lro_start)) && ((tp->t_idleat == 0) || ((th->th_seq - tp->t_idleat) > (tp->t_maxseg << lro_start)))) { tp->t_flagsext |= TF_LRO_OFFLOADED; @@ -2393,7 +2456,7 @@ findpcb: } /* Clean receiver SACK report if present */ - if (tp->sack_enable && tp->rcv_numsacks) + if (SACK_ENABLED(tp) && tp->rcv_numsacks) tcp_clean_sackreport(tp); ++tcpstat.tcps_preddat; tp->rcv_nxt += tlen; @@ -2410,15 +2473,23 @@ findpcb: TCP_INC_VAR(tcpstat.tcps_rcvpack, nlropkts); tcpstat.tcps_rcvbyte += tlen; if (nstat_collect) { - if (m->m_pkthdr.aux_flags & MAUXF_SW_LRO_PKT) { - locked_add_64(&inp->inp_stat->rxpackets, m->m_pkthdr.lro_npkts); - } - else { - locked_add_64(&inp->inp_stat->rxpackets, 1); + if (m->m_pkthdr.pkt_flags & PKTF_SW_LRO_PKT) { + INP_ADD_STAT(inp, cell, wifi, rxpackets, + m->m_pkthdr.lro_npkts); + } else { + INP_ADD_STAT(inp, cell, wifi, rxpackets, 1); } - locked_add_64(&inp->inp_stat->rxbytes, tlen); + INP_ADD_STAT(inp, cell, wifi, rxbytes, tlen); } - ND6_HINT(tp); /* some progress has been done */ + + /* + * Calculate the RTT on the receiver only if the + * connection is in streaming mode and the last + * packet was not an end-of-write + */ + if ((tp->t_flags & TF_STRETCHACK) && + !(tp->t_flagsext & TF_STREAMEOW)) + tcp_compute_rtt(tp, &to, th); tcp_sbrcv_grow(tp, &so->so_rcv, &to, tlen); @@ -2427,8 +2498,16 @@ findpcb: */ so_recv_data_stat(so, m, 0); m_adj(m, drop_hdrlen); /* delayed header drop */ - if (sbappendstream(&so->so_rcv, m)) + + /* + * If message delivery (SOF_ENABLE_MSGS) is enabled on + * this socket, deliver the packet received as an + * in-order message with sequence number attached to it. + */ + if (sbappendstream_rcvdemux(so, m, + th->th_seq - (tp->irs + 1), 0)) { sorwakeup(so); + } #if INET6 if (isipv6) { KERNEL_DEBUG(DBG_LAYER_END, ((th->th_dport << 16) | th->th_sport), @@ -2452,6 +2531,9 @@ findpcb: tp->t_flags |= TF_ACKNOW; tcp_output(tp); } + + tcp_adaptive_rwtimo_check(tp, tlen); + tcp_check_timer_state(tp); tcp_unlock(so, 1, 0); KERNEL_DEBUG(DBG_FNC_TCP_INPUT | DBG_FUNC_END,0,0,0,0,0); @@ -2466,11 +2548,7 @@ findpcb: * but not less than advertised window. */ lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); - - { int win; - win = tcp_sbspace(tp); - if (win < 0) win = 0; else { /* clip rcv window to 4K for modems */ @@ -2478,7 +2556,21 @@ findpcb: win = min(win, slowlink_wsize); } tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt)); +#if MPTCP + /* + * Ensure that the subflow receive window isn't greater + * than the connection level receive window. + */ + if ((tp->t_mpflags & TMPF_MPTCP_TRUE) && + (mp_tp = tptomptp(tp))) { + MPT_LOCK(mp_tp); + if (tp->rcv_wnd > mp_tp->mpt_rcvwnd) { + tp->rcv_wnd = mp_tp->mpt_rcvwnd; + tcpstat.tcps_mp_reducedwin++; + } + MPT_UNLOCK(mp_tp); } +#endif /* MPTCP */ switch (tp->t_state) { @@ -2535,7 +2627,8 @@ findpcb: laddr = inp->inp_laddr; if (inp->inp_laddr.s_addr == INADDR_ANY) inp->inp_laddr = ip->ip_dst; - if (in_pcbconnect(inp, (struct sockaddr *)sin, proc0, NULL)) { + if (in_pcbconnect(inp, (struct sockaddr *)sin, proc0, + IFSCOPE_NONE, NULL)) { inp->inp_laddr = laddr; FREE(sin, M_SONAME); goto drop; @@ -2545,9 +2638,9 @@ findpcb: tcp_dooptions(tp, optp, optlen, th, &to, ifscope); - if (tp->sack_enable) { + if (SACK_ENABLED(tp)) { if (!(to.to_flags & TOF_SACK)) - tp->sack_enable = 0; + tp->t_flagsext &= ~(TF_SACK_ENABLE); else tp->t_flags |= TF_SACK_PERMIT; } @@ -2573,11 +2666,20 @@ findpcb: struct tcpcb *, tp, int32_t, TCPS_SYN_RECEIVED); tp->t_state = TCPS_SYN_RECEIVED; tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, - tp->t_keepinit ? tp->t_keepinit : tcp_keepinit); + TCP_CONN_KEEPINIT(tp)); dropsocket = 0; /* committed to socket */ if (inp->inp_flowhash == 0) inp->inp_flowhash = inp_calc_flowhash(inp); +#if INET6 + /* update flowinfo - RFC 6437 */ + if (inp->inp_flow == 0 && + inp->in6p_flags & IN6P_AUTOFLOWLABEL) { + inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; + inp->inp_flow |= + (htonl(inp->inp_flowhash) & IPV6_FLOWLABEL_MASK); + } +#endif /* INET6 */ /* reset the incomp processing flag */ so->so_flags &= ~(SOF_INCOMP_INPROGRESS); @@ -2606,12 +2708,19 @@ findpcb: (SEQ_LEQ(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))) { rstreason = BANDLIM_RST_OPENPORT; - - if (ifp != NULL && ifp->if_tcp_stat != NULL) - atomic_add_64(&ifp->if_tcp_stat->ooopacket, 1); - + IF_TCP_STATINC(ifp, ooopacket); goto dropwithreset; } + + /* + * In SYN_RECEIVED state, if we recv some SYNS with + * window scale and others without, window scaling should + * be disabled. Otherwise the window advertised will be + * lower if we assume scaling and the other end does not. + */ + if ((thflags & TH_SYN) && + !(to.to_flags & TOF_SCALE)) + tp->t_flags &= ~TF_RCVD_SCALE; break; /* @@ -2631,10 +2740,7 @@ findpcb: (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { rstreason = BANDLIM_UNLIMITED; - - if (ifp != NULL && ifp->if_tcp_stat != NULL) - atomic_add_64(&ifp->if_tcp_stat->ooopacket, 1); - + IF_TCP_STATINC(ifp, ooopacket); goto dropwithreset; } if (thflags & TH_RST) { @@ -2710,11 +2816,22 @@ findpcb: DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp, struct tcpcb *, tp, int32_t, TCPS_ESTABLISHED); tp->t_state = TCPS_ESTABLISHED; - tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, TCP_KEEPIDLE(tp)); + tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, + TCP_CONN_KEEPIDLE(tp)); if (nstat_collect) nstat_route_connect_success(tp->t_inpcb->inp_route.ro_rt); } - isconnected = TRUE; +#if MPTCP + /* + * Do not send the connect notification for additional + * subflows until ACK for 3-way handshake arrives. + */ + if ((!(tp->t_mpflags & TMPF_MPTCP_TRUE)) && + (tp->t_mpflags & TMPF_SENT_JOIN)) { + isconnected = FALSE; + } else +#endif /* MPTCP */ + isconnected = TRUE; } else { /* * Received initial SYN in SYN-SENT[*] state => simul- @@ -2853,12 +2970,12 @@ trimthenstep6: if ((SEQ_GEQ(th->th_seq, tp->last_ack_sent) && SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) || (tp->rcv_wnd == 0 && - ((tp->last_ack_sent == th->th_seq) || ((tp->last_ack_sent -1) == th->th_seq)))) { + ((tp->last_ack_sent == th->th_seq) || + ((tp->last_ack_sent -1) == th->th_seq)))) { switch (tp->t_state) { case TCPS_SYN_RECEIVED: - if (ifp != NULL && ifp->if_tcp_stat != NULL) - atomic_add_64(&ifp->if_tcp_stat->rstinsynrcv, 1); + IF_TCP_STATINC(ifp, rstinsynrcv); so->so_error = ECONNREFUSED; goto close; @@ -2876,14 +2993,10 @@ trimthenstep6: so->so_error = ECONNRESET; close: postevent(so, 0, EV_RESET); - DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, int32_t, TCPS_CLOSED); - soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_CONNRESET)); - tp->t_state = TCPS_CLOSED; tcpstat.tcps_drops++; tp = tcp_close(tp); break; @@ -2926,9 +3039,10 @@ trimthenstep6: tcpstat.tcps_rcvdupbyte += tlen; tcpstat.tcps_pawsdrop++; if (nstat_collect) { - nstat_route_rx(tp->t_inpcb->inp_route.ro_rt, 1, tlen, NSTAT_RX_FLAG_DUPLICATE); - locked_add_64(&inp->inp_stat->rxpackets, 1); - locked_add_64(&inp->inp_stat->rxbytes, tlen); + nstat_route_rx(tp->t_inpcb->inp_route.ro_rt, + 1, tlen, NSTAT_RX_FLAG_DUPLICATE); + INP_ADD_STAT(inp, cell, wifi, rxpackets, 1); + INP_ADD_STAT(inp, cell, wifi, rxbytes, tlen); tp->t_stat.rxduplicatebytes += tlen; } if (tlen) @@ -2946,10 +3060,7 @@ trimthenstep6: */ if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) { rstreason = BANDLIM_RST_OPENPORT; - - if (ifp != NULL && ifp->if_tcp_stat != NULL) - atomic_add_64(&ifp->if_tcp_stat->dospacket, 1); - + IF_TCP_STATINC(ifp, dospacket); goto dropwithreset; } @@ -2994,9 +3105,10 @@ trimthenstep6: tcpstat.tcps_rcvpartdupbyte += todrop; } if (nstat_collect) { - nstat_route_rx(tp->t_inpcb->inp_route.ro_rt, 1, todrop, NSTAT_RX_FLAG_DUPLICATE); - locked_add_64(&inp->inp_stat->rxpackets, 1); - locked_add_64(&inp->inp_stat->rxbytes, todrop); + nstat_route_rx(tp->t_inpcb->inp_route.ro_rt, 1, + todrop, NSTAT_RX_FLAG_DUPLICATE); + INP_ADD_STAT(inp, cell, wifi, rxpackets, 1); + INP_ADD_STAT(inp, cell, wifi, rxbytes, todrop); tp->t_stat.rxduplicatebytes += todrop; } drop_hdrlen += todrop; /* drop from the top afterwards */ @@ -3011,18 +3123,19 @@ trimthenstep6: } /* - * If new data are received on a connection after the - * user processes are gone, then RST the other end. + * If new data are received on a connection after the user processes + * are gone, then RST the other end. Note that an MPTCP subflow socket + * would have SS_NOFDREF set by default, so check to make sure that + * we test for SOF_MP_SUBFLOW socket flag (which would be cleared when + * the socket is closed.) */ - if ((so->so_state & SS_NOFDREF) && + if (!(so->so_flags & SOF_MP_SUBFLOW) && + (so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && tlen) { tp = tcp_close(tp); tcpstat.tcps_rcvafterclose++; rstreason = BANDLIM_UNLIMITED; - - if (ifp != NULL && ifp->if_tcp_stat != NULL) - atomic_add_64(&ifp->if_tcp_stat->cleanup, 1); - + IF_TCP_STATINC(ifp, cleanup); goto dropwithreset; } @@ -3102,10 +3215,7 @@ trimthenstep6: tp = tcp_drop(tp, ECONNRESET); rstreason = BANDLIM_UNLIMITED; postevent(so, 0, EV_RESET); - - if (ifp != NULL && ifp->if_tcp_stat != NULL) - atomic_add_64(&ifp->if_tcp_stat->synwindow, 1); - + IF_TCP_STATINC(ifp, synwindow); goto dropwithreset; } @@ -3127,6 +3237,7 @@ trimthenstep6: /* * Ack processing. */ + switch (tp->t_state) { /* @@ -3162,7 +3273,8 @@ trimthenstep6: DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp, struct tcpcb *, tp, int32_t, TCPS_ESTABLISHED); tp->t_state = TCPS_ESTABLISHED; - tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, TCP_KEEPIDLE(tp)); + tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, + TCP_CONN_KEEPIDLE(tp)); if (nstat_collect) nstat_route_connect_success(tp->t_inpcb->inp_route.ro_rt); } @@ -3172,12 +3284,21 @@ trimthenstep6: */ if (tlen == 0 && (thflags & TH_FIN) == 0) (void) tcp_reass(tp, (struct tcphdr *)0, &tlen, - (struct mbuf *)0); + NULL, ifp); tp->snd_wl1 = th->th_seq - 1; /* FALLTHROUGH */ - - isconnected = TRUE; +#if MPTCP + /* + * Do not send the connect notification for additional subflows + * until ACK for 3-way handshake arrives. + */ + if ((!(tp->t_mpflags & TMPF_MPTCP_TRUE)) && + (tp->t_mpflags & TMPF_SENT_JOIN)) { + isconnected = FALSE; + } else +#endif /* MPTCP */ + isconnected = TRUE; /* * In ESTABLISHED state: drop duplicate ACKs; ACK out of range @@ -3198,19 +3319,155 @@ trimthenstep6: tcpstat.tcps_rcvacktoomuch++; goto dropafterack; } - if (tp->sack_enable && + if (SACK_ENABLED(tp) && (to.to_nsacks > 0 || !TAILQ_EMPTY(&tp->snd_holes))) - tcp_sack_doack(tp, &to, th->th_ack); + tcp_sack_doack(tp, &to, th->th_ack, &sack_bytes_acked); +#if MPTCP + if ((tp->t_mpuna) && (SEQ_GEQ(th->th_ack, tp->t_mpuna))) { +#if 0 + if ((tp->t_mpflags & TMPF_MPTCP_TRUE) && + !(tp->t_mpflags & TMPF_MPTCP_READY)) { + printf("%s: fallback? %x %x \n", __func__, + th->th_ack, tp->t_mpuna); + tp->t_mpuna = 0; + } +#endif + if (tp->t_mpflags & TMPF_PREESTABLISHED) { + /* MP TCP establishment succeeded */ + tp->t_mpuna = 0; + if (tp->t_mpflags & TMPF_JOINED_FLOW) { + if (tp->t_mpflags & TMPF_SENT_JOIN) { + tp->t_mpflags &= + ~TMPF_PREESTABLISHED; + tp->t_mpflags |= + TMPF_MPTCP_TRUE; + so->so_flags |= SOF_MPTCP_TRUE; + if (mptcp_dbg >= MP_ERR_DEBUG) + printf("MPTCP SUCCESS" + "%s \n",__func__); + tp->t_timer[TCPT_JACK_RXMT] = 0; + tp->t_mprxtshift = 0; + isconnected = TRUE; + } else { + isconnected = FALSE; + } + } else { + isconnected = TRUE; + tp->t_mpflags &= ~TMPF_SENT_KEYS; + + } + } + } +#endif /* MPTCP */ + /* + * If we have outstanding data (other than + * a window probe), this is a completely + * duplicate ack (ie, window info didn't + * change) and the ack is the biggest we've seen. + */ if (SEQ_LEQ(th->th_ack, tp->snd_una)) { if (tlen == 0 && tiwin == tp->snd_wnd) { +process_dupack: +#if MPTCP + /* + * MPTCP options that are ignored must + * not be treated as duplicate ACKs. + */ + if (to.to_flags & TOF_MPTCP) { + goto drop; + } +#endif /* MPTCP */ tcpstat.tcps_rcvdupack++; + ++tp->t_dupacks; + /* + * Check if we need to reset the limit on early + * retransmit + */ + if (TSTMP_GEQ(tcp_now, + (tp->t_early_rexmt_win + TCP_EARLY_REXMT_WIN))) + tp->t_early_rexmt_count = 0; + + /* + * Is early retransmit needed? We check for + * this when the connection is waiting for + * more duplicate acks to enter fast recovery. + */ + if (early_rexmt && + tp->t_early_rexmt_count < TCP_EARLY_REXMT_LIMIT && + !IN_FASTRECOVERY(tp) && + SEQ_GT(tp->snd_max, tp->snd_una) && + (tp->t_dupacks == 1 || + (SACK_ENABLED(tp) && + !TAILQ_EMPTY(&tp->snd_holes)))) { + /* + * If there are only a few outstanding + * segments on the connection, we might need + * to lower the retransmit threshold. This + * will allow us to do Early Retransmit as + * described in RFC 5827. + */ + u_int32_t obytes, snd_off; + int32_t snd_len; + if (SACK_ENABLED(tp) && + !TAILQ_EMPTY(&tp->snd_holes)) { + obytes = (tp->snd_max - tp->snd_fack) + + tp->sackhint.sack_bytes_rexmit; + } else { + obytes = (tp->snd_max - tp->snd_una); + } + + /* In order to lower retransmit threshold the + * following two conditions must be met. + * 1. the amount of outstanding data is less + * than 4*SMSS bytes + * 2. there is no unsent data ready for + * transmission or the advertised window + * will limit sending new segments. + */ + snd_off = tp->snd_max - tp->snd_una; + snd_len = min(so->so_snd.sb_cc, tp->snd_wnd) - snd_off; + if (obytes < (tp->t_maxseg << 2) && + snd_len <= 0) { + u_int32_t osegs; + + + osegs = obytes / tp->t_maxseg; + if ((osegs * tp->t_maxseg) < obytes) + osegs++; + + /* + * Since the connection might have already + * received some dupacks, we add them to + * to the outstanding segments count to get + * the correct retransmit threshold. + * + * By checking for early retransmit after + * receiving some duplicate acks when SACK + * is supported, the connection will be able + * to enter fast recovery even if multiple + * segments are lost in the same window. + */ + osegs += tp->t_dupacks; + if (osegs < 4) { + tcpstat.tcps_early_rexmt++; + tp->t_rexmtthresh = ((osegs - 1) > 1) ? + (osegs - 1) : 1; + tp->t_rexmtthresh = min(tp->t_rexmtthresh, + tcprexmtthresh); + tp->t_rexmtthresh = max(tp->t_rexmtthresh, + tp->t_dupacks); + DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, + struct tcpcb *, tp, struct tcphdr *, th, + int32_t, TCP_CC_EARLY_RETRANSMIT); + if (tp->t_early_rexmt_count == 0) + tp->t_early_rexmt_win = tcp_now; + tp->t_early_rexmt_count++; + } + } + } /* - * If we have outstanding data (other than - * a window probe), this is a completely - * duplicate ack (ie, window info didn't - * change), the ack is the biggest we've - * seen and we've seen exactly our rexmt - * threshhold of them, assume a packet + * If we've seen exactly our rexmt threshold + * of duplicate acks, assume a packet * has been dropped and retransmit it. * Kludge snd_nxt & the congestion * window so we send only this one @@ -3229,11 +3486,12 @@ trimthenstep6: * network. */ if (tp->t_timer[TCPT_REXMT] == 0 || - th->th_ack != tp->snd_una) + (th->th_ack != tp->snd_una && sack_bytes_acked == 0)) { tp->t_dupacks = 0; - else if (++tp->t_dupacks > tcprexmtthresh || + tp->t_rexmtthresh = tcprexmtthresh; + } else if (tp->t_dupacks > tp->t_rexmtthresh || IN_FASTRECOVERY(tp)) { - if (tp->sack_enable && IN_FASTRECOVERY(tp)) { + if (SACK_ENABLED(tp) && IN_FASTRECOVERY(tp)) { int awnd; /* @@ -3258,7 +3516,7 @@ trimthenstep6: (void) tcp_output(tp); goto drop; - } else if (tp->t_dupacks == tcprexmtthresh) { + } else if (tp->t_dupacks == tp->t_rexmtthresh) { tcp_seq onxt = tp->snd_nxt; /* @@ -3268,7 +3526,7 @@ trimthenstep6: * check to see if we're in newreno * recovery. */ - if (tp->sack_enable) { + if (SACK_ENABLED(tp)) { if (IN_FASTRECOVERY(tp)) { tp->t_dupacks = 0; break; @@ -3295,7 +3553,7 @@ trimthenstep6: if ((tp->ecn_flags & TE_ECN_ON) == TE_ECN_ON) { tp->ecn_flags |= TE_SENDCWR; } - if (tp->sack_enable) { + if (SACK_ENABLED(tp)) { tcpstat.tcps_sack_recovery_episode++; tp->sack_newdata = tp->snd_nxt; tp->snd_cwnd = tp->t_maxseg; @@ -3318,9 +3576,30 @@ trimthenstep6: struct tcpcb *, tp, struct tcphdr *, th, int32_t, TCP_CC_ENTER_FASTRECOVERY); goto drop; + } else if (limited_txmt && + ALLOW_LIMITED_TRANSMIT(tp) && + (!(SACK_ENABLED(tp)) || sack_bytes_acked > 0) && + (so->so_snd.sb_cc - (tp->snd_max - tp->snd_una)) > 0) { + u_int32_t incr = (tp->t_maxseg * tp->t_dupacks); + + /* Use Limited Transmit algorithm on the first two + * duplicate acks when there is new data to transmit + */ + tp->snd_cwnd += incr; + tcpstat.tcps_limited_txt++; + (void) tcp_output(tp); + + DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, + struct tcpcb *, tp, struct tcphdr *, th, + int32_t, TCP_CC_LIMITED_TRANSMIT); + + /* Reset snd_cwnd back to normal */ + tp->snd_cwnd -= incr; } - } else + } else { tp->t_dupacks = 0; + tp->t_rexmtthresh = tcprexmtthresh; + } break; } /* @@ -3329,7 +3608,7 @@ trimthenstep6: */ if (IN_FASTRECOVERY(tp)) { if (SEQ_LT(th->th_ack, tp->snd_recover)) { - if (tp->sack_enable) + if (SACK_ENABLED(tp)) tcp_sack_partialack(tp, th); else tcp_newreno_partial_ack(tp, th); @@ -3341,8 +3620,6 @@ trimthenstep6: EXIT_FASTRECOVERY(tp); if (CC_ALGO(tp)->post_fr != NULL) CC_ALGO(tp)->post_fr(tp, th); - tp->t_dupacks = 0; - DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, struct tcpcb *, tp, struct tcphdr *, th, int32_t, TCP_CC_EXIT_FASTRECOVERY); @@ -3353,6 +3630,7 @@ trimthenstep6: * counter. */ tp->t_dupacks = 0; + tp->t_rexmtthresh = tcprexmtthresh; } @@ -3379,57 +3657,30 @@ trimthenstep6: } process_ACK: - acked = th->th_ack - tp->snd_una; + acked = BYTES_ACKED(th, tp); tcpstat.tcps_rcvackpack++; tcpstat.tcps_rcvackbyte += acked; /* - * If we just performed our first retransmit, and the ACK - * arrives within our recovery window, then it was a mistake - * to do the retransmit in the first place. Recover our - * original cwnd and ssthresh, and proceed to transmit where - * we left off. + * If the last packet was a retransmit, make sure + * it was not spurious. + * + * If the ack has ECE bit set, skip bad + * retransmit recovery. */ - if (tp->t_rxtshift == 1 && - TSTMP_LT(tcp_now, tp->t_badrxtwin)) { + if (tp->t_rxtshift > 0 && + (thflags & TH_ECE) == 0 && + tcp_detect_bad_rexmt(tp, &to)) { ++tcpstat.tcps_sndrexmitbad; - tp->snd_cwnd = tp->snd_cwnd_prev; - tp->snd_ssthresh = tp->snd_ssthresh_prev; - tp->snd_recover = tp->snd_recover_prev; - if (tp->t_flags & TF_WASFRECOVERY) - ENTER_FASTRECOVERY(tp); - tp->snd_nxt = tp->snd_max; - tp->t_badrxtwin = 0; /* XXX probably not required */ - tp->t_rxtshift = 0; - tp->rxt_start = 0; - tcp_bad_rexmt_fix_sndbuf(tp); + tcp_bad_rexmt_restore_state(tp, th); DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, struct tcpcb *, tp, struct tcphdr *, th, int32_t, TCP_CC_BAD_REXMT_RECOVERY); } - /* - * If we have a timestamp reply, update smoothed - * round trip time. If no timestamp is present but - * transmit timer is running and timed sequence - * number was acked, update smoothed round trip time. - * Since we now have an rtt measurement, cancel the - * timer backoff (cf., Phil Karn's retransmit alg.). - * Recompute the initial retransmit timer. - * Also makes sure we have a valid time stamp in hand - * - * Some boxes send broken timestamp replies - * during the SYN+ACK phase, ignore - * timestamps of 0 or we could calculate a - * huge RTT and blow up the retransmit timer. - */ - if (((to.to_flags & TOF_TS) != 0) && (to.to_tsecr != 0) && - TSTMP_GEQ(tcp_now, to.to_tsecr)) { - tcp_xmit_timer(tp, tcp_now - to.to_tsecr); - } else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) { - tcp_xmit_timer(tp, tcp_now - tp->t_rtttime); - } + /* Recalculate the RTT */ + tcp_compute_rtt(tp, &to, th); /* * If all outstanding data is acked, stop retransmit @@ -3455,7 +3706,7 @@ process_ACK: /* * Reduce the congestion window if we haven't done so. */ - if (!tp->sack_enable && !IN_FASTRECOVERY(tp) && + if (!SACK_ENABLED(tp) && !IN_FASTRECOVERY(tp) && SEQ_GEQ(th->th_ack, tp->snd_recover)) { tcp_reduce_congestion_window(tp); DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, @@ -3483,9 +3734,17 @@ process_ACK: if (acked > so->so_snd.sb_cc) { tp->snd_wnd -= so->so_snd.sb_cc; sbdrop(&so->so_snd, (int)so->so_snd.sb_cc); + if (so->so_flags & SOF_ENABLE_MSGS) { + so->so_msg_state->msg_serial_bytes -= + (int)so->so_snd.sb_cc; + } ourfinisacked = 1; } else { sbdrop(&so->so_snd, acked); + if (so->so_flags & SOF_ENABLE_MSGS) { + so->so_msg_state->msg_serial_bytes -= + acked; + } tcp_sbsnd_trim(&so->so_snd); tp->snd_wnd -= acked; ourfinisacked = 0; @@ -3501,7 +3760,7 @@ process_ACK: EXIT_FASTRECOVERY(tp); tp->snd_una = th->th_ack; - if (tp->sack_enable) { + if (SACK_ENABLED(tp)) { if (SEQ_GT(tp->snd_una, tp->snd_recover)) tp->snd_recover = tp->snd_una; } @@ -3529,19 +3788,24 @@ process_ACK: /* * If we can't receive any more * data, then closing user can proceed. - * Starting the timer is contrary to the + * Starting the TCPT_2MSL timer is contrary to the * specification, but if we don't get a FIN * we'll hang forever. */ if (so->so_state & SS_CANTRCVMORE) { - add_to_time_wait(tp, tcp_maxidle); + tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp, + TCP_CONN_MAXIDLE(tp)); isconnected = FALSE; isdisconnected = TRUE; } - DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, int32_t, TCPS_FIN_WAIT_2); + DTRACE_TCP4(state__change, void, NULL, + struct inpcb *, inp, + struct tcpcb *, tp, + int32_t, TCPS_FIN_WAIT_2); tp->t_state = TCPS_FIN_WAIT_2; - /* fall through and make sure we also recognize data ACKed with the FIN */ + /* fall through and make sure we also recognize + * data ACKed with the FIN + */ } tp->t_flags |= TF_ACKNOW; break; @@ -3554,16 +3818,13 @@ process_ACK: */ case TCPS_CLOSING: if (ourfinisacked) { - DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, int32_t, TCPS_TIME_WAIT); + DTRACE_TCP4(state__change, void, NULL, + struct inpcb *, inp, + struct tcpcb *, tp, + int32_t, TCPS_TIME_WAIT); tp->t_state = TCPS_TIME_WAIT; tcp_canceltimers(tp); - /* Shorten TIME_WAIT [RFC-1644, p.28] */ - if (tp->cc_recv != 0 && - ((int)(tcp_now - tp->t_starttime)) < tcp_msl) - add_to_time_wait(tp, tp->t_rxtcur * TCPTV_TWTRUNC); - else - add_to_time_wait(tp, 2 * tcp_msl); + add_to_time_wait(tp, 2 * tcp_msl); isconnected = FALSE; isdisconnected = TRUE; } @@ -3592,6 +3853,26 @@ process_ACK: add_to_time_wait(tp, 2 * tcp_msl); goto dropafterack; } + + /* + * If there is a SACK option on the ACK and we + * haven't seen any duplicate acks before, count + * it as a duplicate ack even if the cumulative + * ack is advanced. If the receiver delayed an + * ack and detected loss afterwards, then the ack + * will advance cumulative ack and will also have + * a SACK option. So counting it as one duplicate + * ack is ok. + */ + if (sack_ackadv == 1 && + tp->t_state == TCPS_ESTABLISHED && + SACK_ENABLED(tp) && + sack_bytes_acked > 0 && + tp->t_dupacks == 0 && + SEQ_LEQ(th->th_ack, tp->snd_una) && tlen == 0) { + tcpstat.tcps_sack_ackadv++; + goto process_dupack; + } } step6: @@ -3727,11 +4008,21 @@ dodata: LIST_EMPTY(&tp->t_segq) && TCPS_HAVEESTABLISHED(tp->t_state)) { TCP_INC_VAR(tp->t_unacksegs, nlropkts); + /* + * Calculate the RTT on the receiver only if the + * connection is in streaming mode and the last + * packet was not an end-of-write + */ + if ((tp->t_flags & TF_STRETCHACK) && + !(tp->t_flagsext & TF_STREAMEOW)) + tcp_compute_rtt(tp, &to, th); + if (DELAY_ACK(tp, th) && ((tp->t_flags & TF_ACKNOW) == 0) ) { if ((tp->t_flags & TF_DELACK) == 0) { tp->t_flags |= TF_DELACK; - tp->t_timer[TCPT_DELACK] = OFFSET_FROM_START(tp, tcp_delack); + tp->t_timer[TCPT_DELACK] = + OFFSET_FROM_START(tp, tcp_delack); } } else { @@ -3742,27 +4033,31 @@ dodata: TCP_INC_VAR(tcpstat.tcps_rcvpack, nlropkts); tcpstat.tcps_rcvbyte += tlen; if (nstat_collect) { - if (m->m_pkthdr.aux_flags & MAUXF_SW_LRO_PKT) { - locked_add_64(&inp->inp_stat->rxpackets, m->m_pkthdr.lro_npkts); - } else { - locked_add_64(&inp->inp_stat->rxpackets, 1); + if (m->m_pkthdr.pkt_flags & PKTF_SW_LRO_PKT) { + INP_ADD_STAT(inp, cell, wifi, rxpackets, + m->m_pkthdr.lro_npkts); + } else { + INP_ADD_STAT(inp, cell, wifi, rxpackets, 1); } - locked_add_64(&inp->inp_stat->rxbytes, tlen); + INP_ADD_STAT(inp, cell, wifi, rxbytes, tlen); } - ND6_HINT(tp); - tcp_sbrcv_grow(tp, &so->so_rcv, &to, tlen); so_recv_data_stat(so, m, drop_hdrlen); - if (sbappendstream(&so->so_rcv, m)) + + if (sbappendstream_rcvdemux(so, m, + th->th_seq - (tp->irs + 1), 0)) { sorwakeup(so); + } } else { - thflags = tcp_reass(tp, th, &tlen, m); + thflags = tcp_reass(tp, th, &tlen, m, ifp); tp->t_flags |= TF_ACKNOW; } - if (tlen > 0 && tp->sack_enable) + if (tlen > 0 && SACK_ENABLED(tp)) tcp_update_sack_list(tp, save_start, save_end); + tcp_adaptive_rwtimo_check(tp, tlen); + if (tp->t_flags & TF_DELACK) { #if INET6 @@ -3842,20 +4137,19 @@ dodata: * standard timers. */ case TCPS_FIN_WAIT_2: - DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, int32_t, TCPS_TIME_WAIT); + DTRACE_TCP4(state__change, void, NULL, + struct inpcb *, inp, + struct tcpcb *, tp, + int32_t, TCPS_TIME_WAIT); tp->t_state = TCPS_TIME_WAIT; tcp_canceltimers(tp); - /* Shorten TIME_WAIT [RFC-1644, p.28] */ if (tp->cc_recv != 0 && ((int)(tcp_now - tp->t_starttime)) < tcp_msl) { - add_to_time_wait(tp, tp->t_rxtcur * TCPTV_TWTRUNC); /* For transaction client, force ACK now. */ tp->t_flags |= TF_ACKNOW; tp->t_unacksegs = 0; } - else - add_to_time_wait(tp, 2 * tcp_msl); + add_to_time_wait(tp, 2 * tcp_msl); soisdisconnected(so); break; @@ -3907,10 +4201,7 @@ dropafterack: (SEQ_GT(tp->snd_una, th->th_ack) || SEQ_GT(th->th_ack, tp->snd_max)) ) { rstreason = BANDLIM_RST_OPENPORT; - - if (ifp != NULL && ifp->if_tcp_stat != NULL) - atomic_add_64(&ifp->if_tcp_stat->dospacket, 1); - + IF_TCP_STATINC(ifp, dospacket); goto dropwithreset; } #if TCPDEBUG @@ -3978,8 +4269,7 @@ dropwithreset: if (dropsocket) { (void) soabort(so); tcp_unlock(so, 1, 0); - } - else if ((inp != NULL) && (nosock == 0)) { + } else if ((inp != NULL) && (nosock == 0)) { tcp_unlock(so, 1, 0); } KERNEL_DEBUG(DBG_FNC_TCP_INPUT | DBG_FUNC_END,0,0,0,0,0); @@ -4047,11 +4337,7 @@ tcp_dooptions(tp, cp, cnt, th, to, input_ifscope) if (!(th->th_flags & TH_SYN)) continue; bcopy((char *) cp + 2, (char *) &mss, sizeof(mss)); - -#if BYTE_ORDER != BIG_ENDIAN NTOHS(mss); -#endif - break; case TCPOPT_WINDOW: @@ -4059,6 +4345,7 @@ tcp_dooptions(tp, cp, cnt, th, to, input_ifscope) continue; if (!(th->th_flags & TH_SYN)) continue; + to->to_flags |= TOF_SCALE; tp->t_flags |= TF_RCVD_SCALE; tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT); break; @@ -4069,18 +4356,10 @@ tcp_dooptions(tp, cp, cnt, th, to, input_ifscope) to->to_flags |= TOF_TS; bcopy((char *)cp + 2, (char *)&to->to_tsval, sizeof(to->to_tsval)); - -#if BYTE_ORDER != BIG_ENDIAN NTOHL(to->to_tsval); -#endif - bcopy((char *)cp + 6, (char *)&to->to_tsecr, sizeof(to->to_tsecr)); - -#if BYTE_ORDER != BIG_ENDIAN NTOHL(to->to_tsecr); -#endif - /* * A timestamp received in a SYN makes * it ok to send timestamp requests and replies. @@ -4106,6 +4385,12 @@ tcp_dooptions(tp, cp, cnt, th, to, input_ifscope) tcpstat.tcps_sack_rcv_blocks++; break; + +#if MPTCP + case TCPOPT_MULTIPATH: + tcp_do_mptcp_options(tp, cp, th, to, optlen); + break; +#endif /* MPTCP */ } } if (th->th_flags & TH_SYN) @@ -4166,8 +4451,24 @@ get_base_rtt(struct tcpcb *tp) void update_base_rtt(struct tcpcb *tp, uint32_t rtt) { + int32_t i, qdelay; + u_int32_t base_rtt; + if (++tp->rtt_count >= rtt_samples_per_slot) { - int i=0; +#if TRAFFIC_MGT + /* + * If the recv side is being throttled, check if the + * current RTT is closer to the base RTT seen in + * first (recent) two slots. If so, unthrottle the stream. + */ + if (tp->t_flagsext & TF_RECV_THROTTLE) { + base_rtt = min(tp->rtt_hist[0], tp->rtt_hist[1]); + qdelay = tp->t_rttcur - base_rtt; + if (qdelay < target_qdelay) + tp->t_flagsext &= ~(TF_RECV_THROTTLE); + } +#endif /* TRAFFIC_MGT */ + for (i = (N_RTT_BASE-1); i > 0; --i) { tp->rtt_hist[i] = tp->rtt_hist[i-1]; } @@ -4178,17 +4479,69 @@ update_base_rtt(struct tcpcb *tp, uint32_t rtt) } } +/* + * If we have a timestamp reply, update smoothed RTT. If no timestamp is + * present but transmit timer is running and timed sequence number was + * acked, update smoothed RTT. + * + * If timestamps are supported, a receiver can update RTT even if + * there is no outstanding data. + * + * Some boxes send broken timestamp replies during the SYN+ACK phase, + * ignore timestamps of 0or we could calculate a huge RTT and blow up + * the retransmit timer. + */ +static void +tcp_compute_rtt(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th) +{ + VERIFY(to != NULL && th != NULL); + if (((to->to_flags & TOF_TS) != 0) && + (to->to_tsecr != 0) && + TSTMP_GEQ(tcp_now, to->to_tsecr)) { + tcp_xmit_timer(tp, tcp_now - to->to_tsecr, + to->to_tsecr, th->th_ack); + } else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) { + tcp_xmit_timer(tp, tcp_now - tp->t_rtttime, 0, + th->th_ack); + } +} + /* * Collect new round-trip time estimate * and update averages and current timeout. */ static void -tcp_xmit_timer(tp, rtt) - register struct tcpcb *tp; - int rtt; +tcp_xmit_timer(register struct tcpcb *tp, int rtt, + u_int32_t tsecr, tcp_seq th_ack) { register int delta; + if (tp->t_flagsext & TF_RECOMPUTE_RTT) { + if (SEQ_GT(th_ack, tp->snd_una) && + SEQ_LEQ(th_ack, tp->snd_max) && + (tsecr == 0 || + TSTMP_GEQ(tsecr, tp->t_badrexmt_time))) { + /* + * We received a new ACk after a + * spurious timeout. Adapt retransmission + * timer as described in rfc 4015. + */ + tp->t_flagsext &= ~(TF_RECOMPUTE_RTT); + tp->t_badrexmt_time = 0; + tp->t_srtt = max(tp->t_srtt_prev, rtt); + tp->t_srtt = tp->t_srtt << TCP_RTT_SHIFT; + tp->t_rttvar = max(tp->t_rttvar_prev, (rtt >> 1)); + tp->t_rttvar = tp->t_rttvar << TCP_RTTVAR_SHIFT; + + if (tp->t_rttbest > (tp->t_srtt + tp->t_rttvar)) + tp->t_rttbest = tp->t_srtt + tp->t_rttvar; + + goto compute_rto; + } else { + return; + } + } + tcpstat.tcps_rttupdated++; tp->t_rttupdated++; @@ -4205,9 +4558,10 @@ tcp_xmit_timer(tp, rtt) * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed * point). * - * Freebsd adjusts rtt to origin 0 by subtracting 1 from the provided - * rtt value. This was required because of the way t_rtttime was - * initiailised to 1 before. Since we changed t_rtttime to be based on + * Freebsd adjusts rtt to origin 0 by subtracting 1 + * from the provided rtt value. This was required because + * of the way t_rtttime was initiailised to 1 before. + * Since we changed t_rtttime to be based on * tcp_now, this extra adjustment is not needed. */ delta = (rtt << TCP_DELTA_SHIFT) @@ -4243,10 +4597,13 @@ tcp_xmit_timer(tp, rtt) tp->t_srtt = rtt << TCP_RTT_SHIFT; tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); } - nstat_route_rtt(tp->t_inpcb->inp_route.ro_rt, tp->t_srtt, tp->t_rttvar); + +compute_rto: + nstat_route_rtt(tp->t_inpcb->inp_route.ro_rt, tp->t_srtt, + tp->t_rttvar); tp->t_rtttime = 0; tp->t_rxtshift = 0; - tp->rxt_start = 0; + tp->t_rxtstart = 0; /* * the retransmit should happen at rtt + 4 * rttvar. @@ -4370,24 +4727,11 @@ tcp_mss(tp, offer, input_ifscope) #if INET6 if (isipv6) { rt = tcp_rtlookup6(inp, input_ifscope); - if (rt != NULL && - (IN6_IS_ADDR_LOOPBACK(&inp->in6p_faddr) || - IN6_IS_ADDR_LINKLOCAL(&inp->in6p_faddr) || - rt->rt_gateway->sa_family == AF_LINK || - in6_localaddr(&inp->in6p_faddr))) { - tp->t_flags |= TF_LOCAL; - } } else #endif /* INET6 */ { rt = tcp_rtlookup(inp, input_ifscope); - if (rt != NULL && - (rt->rt_gateway->sa_family == AF_LINK || - rt->rt_ifp->if_flags & IFF_LOOPBACK || - in_localaddr(inp->inp_faddr))) { - tp->t_flags |= TF_LOCAL; - } } isnetlocal = (tp->t_flags & TF_LOCAL); @@ -4492,8 +4836,12 @@ tcp_mss(tp, offer, input_ifscope) (origoffer == -1 || (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)) mss -= TCPOLEN_TSTAMP_APPA; - tp->t_maxseg = mss; +#if MPTCP + mss -= mptcp_adj_mss(tp, FALSE); +#endif /* MPTCP */ + tp->t_maxseg = mss; + /* * Calculate corrected value for sb_max; ensure to upgrade the * numerator for large sb_max values else it will overflow. @@ -4641,7 +4989,7 @@ tcp_newreno_partial_ack(tp, th) * (tp->snd_una has not yet been updated when this function * is called) */ - tp->snd_cwnd = tp->t_maxseg + (th->th_ack - tp->snd_una); + tp->snd_cwnd = tp->t_maxseg + BYTES_ACKED(th, tp); tp->t_flags |= TF_ACKNOW; (void) tcp_output(tp); tp->snd_cwnd = ocwnd; @@ -4651,8 +4999,8 @@ tcp_newreno_partial_ack(tp, th) * Partial window deflation. Relies on fact that tp->snd_una * not updated yet. */ - if (tp->snd_cwnd > th->th_ack - tp->snd_una) - tp->snd_cwnd -= th->th_ack - tp->snd_una; + if (tp->snd_cwnd > BYTES_ACKED(th, tp)) + tp->snd_cwnd -= BYTES_ACKED(th, tp); else tp->snd_cwnd = 0; tp->snd_cwnd += tp->t_maxseg; @@ -4673,29 +5021,61 @@ tcp_dropdropablreq(struct socket *head) { struct socket *so, *sonext; unsigned int i, j, qlen; - static int rnd; - static struct timeval old_runtime; + static u_int32_t rnd = 0; + static u_int64_t old_runtime; static unsigned int cur_cnt, old_cnt; - struct timeval tv; + u_int64_t now_sec; struct inpcb *inp = NULL; struct tcpcb *tp; if ((head->so_options & SO_ACCEPTCONN) == 0) - return 0; + return (0); + + if (TAILQ_EMPTY(&head->so_incomp)) + return (0); + + /* + * Check if there is any socket in the incomp queue + * that is closed because of a reset from the peer and is + * waiting to be garbage collected. If so, pick that as + * the victim + */ + TAILQ_FOREACH_SAFE(so, &head->so_incomp, so_list, sonext) { + inp = sotoinpcb(so); + tp = intotcpcb(inp); + if (tp != NULL && tp->t_state == TCPS_CLOSED && + so->so_head != NULL && + (so->so_state & (SS_INCOMP|SS_CANTSENDMORE|SS_CANTRCVMORE)) == + (SS_INCOMP|SS_CANTSENDMORE|SS_CANTRCVMORE)) { + /* + * The listen socket is already locked but we + * can lock this socket here without lock ordering + * issues because it is in the incomp queue and + * is not visible to others. + */ + if (lck_mtx_try_lock(&inp->inpcb_mtx)) { + so->so_usecount++; + goto found_victim; + } else { + continue; + } + } + } so = TAILQ_FIRST(&head->so_incomp); - if (!so) - return 0; - microtime(&tv); - if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) { - old_runtime = tv; + now_sec = net_uptime(); + if ((i = (now_sec - old_runtime)) != 0) { + old_runtime = now_sec; old_cnt = cur_cnt / i; cur_cnt = 0; } qlen = head->so_incqlen; + if (rnd == 0) + rnd = RandomULong(); + if (++cur_cnt > qlen || old_cnt > qlen) { rnd = (314159 * rnd + 66329) & 0xffff; j = ((qlen + 1) * rnd) >> 16; @@ -4709,25 +5089,34 @@ tcp_dropdropablreq(struct socket *head) sonext = TAILQ_NEXT(so, so_list); - if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) { - /* Avoid the issue of a socket being accepted by one input thread - * and being dropped by another input thread. - * If we can't get a hold on this mutex, then grab the next socket in line. + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) + != WNT_STOPUSING) { + /* + * Avoid the issue of a socket being accepted + * by one input thread and being dropped by + * another input thread. If we can't get a hold + * on this mutex, then grab the next socket in + * line. */ if (lck_mtx_try_lock(&inp->inpcb_mtx)) { so->so_usecount++; if ((so->so_usecount == 2) && - (so->so_state & SS_INCOMP) != 0 && - (so->so_flags & SOF_INCOMP_INPROGRESS) == 0) + (so->so_state & SS_INCOMP) && + !(so->so_flags & SOF_INCOMP_INPROGRESS)) { break; - else {/* don't use if being accepted or used in any other way */ + } else { + /* + * don't use if being accepted or + * used in any other way + */ in_pcb_checkstate(inp, WNT_RELEASE, 1); tcp_unlock(so, 1, 0); } - } - else { - /* do not try to lock the inp in in_pcb_checkstate - * because the lock is already held in some other thread. + } else { + /* + * do not try to lock the inp in + * in_pcb_checkstate because the lock + * is already held in some other thread. * Only drop the inp_wntcnt reference. */ in_pcb_checkstate(inp, WNT_RELEASE, 1); @@ -4736,20 +5125,22 @@ tcp_dropdropablreq(struct socket *head) so = sonext; } - if (!so) - return 0; + if (so == NULL) { + return (0); + } /* Makes sure socket is still in the right state to be discarded */ if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { tcp_unlock(so, 1, 0); - return 0; + return (0); } +found_victim: if (so->so_usecount != 2 || !(so->so_state & SS_INCOMP)) { /* do not discard: that socket is being accepted */ tcp_unlock(so, 1, 0); - return 0; + return (0); } TAILQ_REMOVE(&head->so_incomp, so, so_list); @@ -4761,35 +5152,38 @@ tcp_dropdropablreq(struct socket *head) so->so_head = NULL; tcp_close(tp); - tp->t_unacksegs = 0; - if (inp->inp_wantcnt > 0 && inp->inp_wantcnt != WNT_STOPUSING) { - /* Some one has a wantcnt on this pcb. Since WNT_ACQUIRE + /* + * Some one has a wantcnt on this pcb. Since WNT_ACQUIRE * doesn't require a lock, it could have happened while * we are holding the lock. This pcb will have to * be garbage collected later. * Release the reference held for so_incomp queue */ so->so_usecount--; - tcp_unlock(so, 1, 0); } else { - /* Unlock this socket and leave the reference on. We need to - * acquire the pcbinfo lock in order to fully dispose it off + /* + * Unlock this socket and leave the reference on. + * We need to acquire the pcbinfo lock in order to + * fully dispose it off */ tcp_unlock(so, 0, 0); - lck_rw_lock_exclusive(tcbinfo.mtx); + lck_rw_lock_exclusive(tcbinfo.ipi_lock); tcp_lock(so, 0, 0); /* Release the reference held for so_incomp queue */ so->so_usecount--; if (so->so_usecount != 1 || - (inp->inp_wantcnt > 0 && inp->inp_wantcnt != WNT_STOPUSING)) { - /* There is an extra wantcount or usecount that must - * have been added when the socket was unlocked. This - * socket will have to be garbage collected later + (inp->inp_wantcnt > 0 && + inp->inp_wantcnt != WNT_STOPUSING)) { + /* + * There is an extra wantcount or usecount + * that must have been added when the socket + * was unlocked. This socket will have to be + * garbage collected later */ tcp_unlock(so, 1, 0); } else { @@ -4799,7 +5193,7 @@ tcp_dropdropablreq(struct socket *head) in_pcbdispose(inp); } - lck_rw_done(tcbinfo.mtx); + lck_rw_done(tcbinfo.ipi_lock); } tcpstat.tcps_drops++; @@ -4828,7 +5222,7 @@ tcp_set_new_cc(struct socket *so, uint16_t cc_index) { struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); - uint16_t old_cc_index = 0; + u_char old_cc_index = 0; if (tp->tcp_cc_index != cc_index) { old_cc_index = tp->tcp_cc_index; @@ -4859,6 +5253,9 @@ tcp_set_recv_bg(struct socket *so) { if (!IS_TCP_RECV_BG(so)) so->so_traffic_mgt_flags |= TRAFFIC_MGT_TCP_RECVBG; + + /* Unset Large Receive Offload on background sockets */ + so_set_lro(so, SO_TC_BK); } void @@ -4866,6 +5263,12 @@ tcp_clear_recv_bg(struct socket *so) { if (IS_TCP_RECV_BG(so)) so->so_traffic_mgt_flags &= ~(TRAFFIC_MGT_TCP_RECVBG); + + /* + * Set/unset use of Large Receive Offload depending on + * the traffic class + */ + so_set_lro(so, so->so_traffic_class); } void @@ -4901,7 +5304,7 @@ inp_fc_unthrottle_tcp(struct inpcb *inp) */ tcp_output(tp); } - + static int tcp_getstat SYSCTL_HANDLER_ARGS { @@ -4909,6 +5312,41 @@ tcp_getstat SYSCTL_HANDLER_ARGS int error; + proc_t caller = PROC_NULL; + proc_t caller_parent = PROC_NULL; + char command_name[MAXCOMLEN + 1] = ""; + char parent_name[MAXCOMLEN + 1] = ""; + + if ((caller = proc_self()) != PROC_NULL) { + /* get process name */ + strlcpy(command_name, caller->p_comm, sizeof(command_name)); + + /* get parent process name if possible */ + if ((caller_parent = proc_find(caller->p_ppid)) != PROC_NULL) { + strlcpy(parent_name, caller_parent->p_comm, + sizeof(parent_name)); + proc_rele(caller_parent); + } + + if ((escape_str(command_name, strlen(command_name), + sizeof(command_name)) == 0) && + (escape_str(parent_name, strlen(parent_name), + sizeof(parent_name)) == 0)) { + kern_asl_msg(LOG_DEBUG, "messagetracer", + 5, + "com.apple.message.domain", + "com.apple.kernel.tcpstat", /* 1 */ + "com.apple.message.signature", + "tcpstat", /* 2 */ + "com.apple.message.signature2", command_name, /* 3 */ + "com.apple.message.signature3", parent_name, /* 4 */ + "com.apple.message.summarize", "YES", /* 5 */ + NULL); + } + } + if (caller != PROC_NULL) + proc_rele(caller); + if (req->oldptr == 0) { req->oldlen= (size_t)sizeof(struct tcpstat); } @@ -4919,6 +5357,153 @@ tcp_getstat SYSCTL_HANDLER_ARGS } +/* + * Checksum extended TCP header and data. + */ +int +tcp_input_checksum(int af, struct mbuf *m, struct tcphdr *th, int off, int tlen) +{ + struct ifnet *ifp = m->m_pkthdr.rcvif; + + switch (af) { + case AF_INET: { + struct ip *ip = mtod(m, struct ip *); + struct ipovly *ipov = (struct ipovly *)ip; + + if (m->m_pkthdr.pkt_flags & PKTF_SW_LRO_DID_CSUM) + return (0); + + if ((hwcksum_rx || (ifp->if_flags & IFF_LOOPBACK) || + (m->m_pkthdr.pkt_flags & PKTF_LOOP)) && + (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) { + if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) { + th->th_sum = m->m_pkthdr.csum_rx_val; + } else { + uint16_t sum = m->m_pkthdr.csum_rx_val; + uint16_t start = m->m_pkthdr.csum_rx_start; + + /* + * Perform 1's complement adjustment of octets + * that got included/excluded in the hardware- + * calculated checksum value. Ignore cases + * where the value includes or excludes the IP + * header span, as the sum for those octets + * would already be 0xffff and thus no-op. + */ + if ((m->m_pkthdr.csum_flags & CSUM_PARTIAL) && + start != 0 && (off - start) != off) { +#if BYTE_ORDER != BIG_ENDIAN + if (start < off) { + HTONS(ip->ip_len); + HTONS(ip->ip_off); + } +#endif + /* callee folds in sum */ + sum = m_adj_sum16(m, start, off, sum); +#if BYTE_ORDER != BIG_ENDIAN + if (start < off) { + NTOHS(ip->ip_off); + NTOHS(ip->ip_len); + } +#endif + } + + /* callee folds in sum */ + th->th_sum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, + sum + htonl(tlen + IPPROTO_TCP)); + } + th->th_sum ^= 0xffff; + } else { + uint16_t ip_sum; + int len; + char b[9]; + + bcopy(ipov->ih_x1, b, sizeof (ipov->ih_x1)); + bzero(ipov->ih_x1, sizeof (ipov->ih_x1)); + ip_sum = ipov->ih_len; + ipov->ih_len = (u_short)tlen; +#if BYTE_ORDER != BIG_ENDIAN + HTONS(ipov->ih_len); +#endif + len = sizeof (struct ip) + tlen; + th->th_sum = in_cksum(m, len); + bcopy(b, ipov->ih_x1, sizeof (ipov->ih_x1)); + ipov->ih_len = ip_sum; + + tcp_in_cksum_stats(len); + } + break; + } +#if INET6 + case AF_INET6: { + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + + if (m->m_pkthdr.pkt_flags & PKTF_SW_LRO_DID_CSUM) + return (0); + + if ((hwcksum_rx || (ifp->if_flags & IFF_LOOPBACK) || + (m->m_pkthdr.pkt_flags & PKTF_LOOP)) && + (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) { + if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) { + th->th_sum = m->m_pkthdr.csum_rx_val; + } else { + uint16_t sum = m->m_pkthdr.csum_rx_val; + uint16_t start = m->m_pkthdr.csum_rx_start; + + /* + * Perform 1's complement adjustment of octets + * that got included/excluded in the hardware- + * calculated checksum value. + */ + if ((m->m_pkthdr.csum_flags & CSUM_PARTIAL) && + start != off) { + uint16_t s, d; + + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) { + s = ip6->ip6_src.s6_addr16[1]; + ip6->ip6_src.s6_addr16[1] = 0 ; + } + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) { + d = ip6->ip6_dst.s6_addr16[1]; + ip6->ip6_dst.s6_addr16[1] = 0; + } + + /* callee folds in sum */ + sum = m_adj_sum16(m, start, off, sum); + + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) + ip6->ip6_src.s6_addr16[1] = s; + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) + ip6->ip6_dst.s6_addr16[1] = d; + } + + th->th_sum = in6_pseudo( + &ip6->ip6_src, &ip6->ip6_dst, + sum + htonl(tlen + IPPROTO_TCP)); + } + th->th_sum ^= 0xffff; + } else { + tcp_in6_cksum_stats(tlen); + th->th_sum = in6_cksum(m, IPPROTO_TCP, off, tlen); + } + break; + } +#endif /* INET6 */ + default: + VERIFY(0); + /* NOTREACHED */ + } + + if (th->th_sum != 0) { + tcpstat.tcps_rcvbadsum++; + IF_TCP_STATINC(ifp, badformat); + return (-1); + } + + return (0); +} + SYSCTL_PROC(_net_inet_tcp, TCPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, tcp_getstat, "S,tcpstat", "TCP statistics (struct tcpstat, netinet/tcp_var.h)"); diff --git a/bsd/netinet/tcp_ledbat.c b/bsd/netinet/tcp_ledbat.c index d13dc50bf..145188839 100644 --- a/bsd/netinet/tcp_ledbat.c +++ b/bsd/netinet/tcp_ledbat.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2011 Apple Inc. All rights reserved. + * Copyright (c) 2010-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -228,7 +228,7 @@ tcp_ledbat_inseq_ack_rcvd(struct tcpcb *tp, struct tcphdr *th) { int acked = 0; u_int32_t incr = 0; - acked = th->th_ack - tp->snd_una; + acked = BYTES_ACKED(th, tp); tp->t_bytes_acked += acked; if (tp->t_bytes_acked > tp->snd_cwnd) { tp->t_bytes_acked -= tp->snd_cwnd; @@ -260,7 +260,7 @@ tcp_ledbat_ack_rcvd(struct tcpcb *tp, struct tcphdr *th) { register u_int incr = tp->t_maxseg; int acked = 0; - acked = th->th_ack - tp->snd_una; + acked = BYTES_ACKED(th, tp); tp->t_bytes_acked += acked; if (cw >= tp->bg_ssthresh) { /* congestion-avoidance */ @@ -318,9 +318,13 @@ tcp_ledbat_post_fr(struct tcpcb *tp, struct tcphdr *th) { * snd_ssthresh outstanding data. But in case we * would be inclined to send a burst, better to do * it via the slow start mechanism. + * + * If the flight size is zero, then make congestion + * window to be worth at least 2 segments to avoid + * delayed acknowledgement (draft-ietf-tcpm-rfc3782-bis-05). */ if (ss < (int32_t)tp->snd_ssthresh) - tp->snd_cwnd = ss + tp->t_maxseg; + tp->snd_cwnd = max(ss, tp->t_maxseg) + tp->t_maxseg; else tp->snd_cwnd = tp->snd_ssthresh; tp->t_bytes_acked = 0; @@ -373,14 +377,12 @@ tcp_ledbat_after_timeout(struct tcpcb *tp) { u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; if (win < 2) win = 2; - tp->snd_cwnd = tp->t_maxseg; tp->snd_ssthresh = win * tp->t_maxseg; - tp->t_bytes_acked = 0; - tp->t_dupacks = 0; if (tp->bg_ssthresh > tp->snd_ssthresh) tp->bg_ssthresh = tp->snd_ssthresh; + tp->snd_cwnd = tp->t_maxseg; tcp_cc_resize_sndbuf(tp); } } @@ -401,6 +403,12 @@ tcp_ledbat_after_timeout(struct tcpcb *tp) { int tcp_ledbat_delay_ack(struct tcpcb *tp, struct tcphdr *th) { + /* If any flag other than TH_ACK is set, set "end-of-write" bit */ + if (th->th_flags & ~TH_ACK) + tp->t_flagsext |= TF_STREAMEOW; + else + tp->t_flagsext &= ~(TF_STREAMEOW); + if ((tp->t_flags & TF_RXWIN0SENT) == 0 && (th->th_flags & TH_PUSH) == 0 && (tp->t_unacksegs == 1)) diff --git a/bsd/netinet/tcp_lro.c b/bsd/netinet/tcp_lro.c index 0f127c7e3..d2a2539c7 100644 --- a/bsd/netinet/tcp_lro.c +++ b/bsd/netinet/tcp_lro.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 Apple Inc. All rights reserved. + * Copyright (c) 2011-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -52,7 +53,6 @@ unsigned int lrocount = 0; /* A counter used for debugging only */ unsigned int lro_seq_outoforder = 0; /* Counter for debugging */ unsigned int lro_seq_mismatch = 0; /* Counter for debugging */ -unsigned int lro_eject_req = 0; /* Counter for tracking flow ejections */ unsigned int lro_flushes = 0; /* Counter for tracking number of flushes */ unsigned int lro_single_flushes = 0; unsigned int lro_double_flushes = 0; @@ -93,7 +93,7 @@ static void tcp_lro_flush_flows(void); static void tcp_lro_sched_timer(uint64_t); static void lro_proto_input(struct mbuf *); -static struct mbuf *lro_tcp_xsum_validate(struct mbuf*, struct ipovly *, +static struct mbuf *lro_tcp_xsum_validate(struct mbuf*, struct ip *, struct tcphdr*); static struct mbuf *tcp_lro_process_pkt(struct mbuf*, struct ip*, struct tcphdr*, int); @@ -256,7 +256,7 @@ tcp_lro_coalesce(int flow_id, struct mbuf *lro_mb, struct tcphdr *tcphdr, * This bit is re-OR'd each time a packet is added to the * large coalesced packet. */ - flow->lr_mhead->m_pkthdr.aux_flags |= MAUXF_SW_LRO_PKT; + flow->lr_mhead->m_pkthdr.pkt_flags |= PKTF_SW_LRO_PKT; flow->lr_mhead->m_pkthdr.lro_npkts++; /* for tcpstat.tcps_rcvpack */ if (flow->lr_mhead->m_pkthdr.lro_pktlen < lro_mb->m_pkthdr.lro_pktlen) { @@ -292,7 +292,7 @@ tcp_lro_coalesce(int flow_id, struct mbuf *lro_mb, struct tcphdr *tcphdr, } else { if (lro_mb) { flow->lr_mhead = flow->lr_mtail = lro_mb; - flow->lr_mhead->m_pkthdr.aux_flags |= MAUXF_SW_LRO_PKT; + flow->lr_mhead->m_pkthdr.pkt_flags |= PKTF_SW_LRO_PKT; flow->lr_tcphdr = tcphdr; if ((topt) && (topt->to_flags & TOF_TS)) { ASSERT(tsval != NULL); @@ -301,6 +301,7 @@ tcp_lro_coalesce(int flow_id, struct mbuf *lro_mb, struct tcphdr *tcphdr, flow->lr_tsecr = tsecr; } flow->lr_len = payload_len; + calculate_tcp_clock(); flow->lr_timestamp = tcp_now; tcp_lro_sched_timer(0); } @@ -429,8 +430,10 @@ tcp_lro_process_pkt(struct mbuf *lro_mb, struct ip *ip_hdr, } } - if ((lro_mb = lro_tcp_xsum_validate(lro_mb, - (struct ipovly*)ip_hdr, tcp_hdr)) == NULL) { + /* Just in case */ + lro_mb->m_pkthdr.pkt_flags &= ~PKTF_SW_LRO_DID_CSUM; + + if ((lro_mb = lro_tcp_xsum_validate(lro_mb, ip_hdr, tcp_hdr)) == NULL) { if (lrodebug) { printf("tcp_lro_process_pkt: TCP xsum failed.\n"); } @@ -441,8 +444,8 @@ tcp_lro_process_pkt(struct mbuf *lro_mb, struct ip *ip_hdr, lro_pkt_count++; /* Avoids checksumming in tcp_input */ - lro_mb->m_pkthdr.aux_flags |= MAUXF_SW_LRO_DID_CSUM; - + lro_mb->m_pkthdr.pkt_flags |= PKTF_SW_LRO_DID_CSUM; + off = tcp_hdr->th_off << 2; optlen = off - sizeof (struct tcphdr); payload_len = ip_hdr->ip_len - off; @@ -535,8 +538,11 @@ tcp_lro_process_pkt(struct mbuf *lro_mb, struct ip *ip_hdr, mb = tcp_lro_eject_coalesced_pkt(flow_id); lro_flow_list[flow_id].lr_seq = ntohl(tcp_hdr->th_seq) + payload_len; + calculate_tcp_clock(); + u_int8_t timestamp = tcp_now - lro_flow_list[flow_id].lr_timestamp; lck_mtx_unlock(&tcp_lro_lock); if (mb) { + mb->m_pkthdr.lro_elapsed = timestamp; lro_proto_input(mb); } if (!coalesced) { @@ -552,10 +558,13 @@ tcp_lro_process_pkt(struct mbuf *lro_mb, struct ip *ip_hdr, case TCP_LRO_EJECT_FLOW: mb = tcp_lro_eject_coalesced_pkt(flow_id); + calculate_tcp_clock(); + u_int8_t timestamp = tcp_now - lro_flow_list[flow_id].lr_timestamp; lck_mtx_unlock(&tcp_lro_lock); if (mb) { if (lrodebug) printf("tcp_lro_process_pkt eject_flow, len = %d\n", mb->m_pkthdr.len); + mb->m_pkthdr.lro_elapsed = timestamp; lro_proto_input(mb); } @@ -596,8 +605,6 @@ tcp_lro_flush_flows(void) int i = 0; struct mbuf *mb; struct lro_flow *flow; - int active_flows = 0; - int outstanding_flows = 0; int tcpclock_updated = 0; lck_mtx_lock(&tcp_lro_lock); @@ -605,74 +612,33 @@ tcp_lro_flush_flows(void) while (i < TCP_LRO_NUM_FLOWS) { flow = &lro_flow_list[i]; if (flow->lr_mhead != NULL) { - active_flows++; + if (!tcpclock_updated) { calculate_tcp_clock(); tcpclock_updated = 1; } - if (((tcp_now - flow->lr_timestamp) >= coalesc_time) || - (flow->lr_mhead->m_pkthdr.lro_npkts >= - coalesc_sz)) { - if (lrodebug >= 2) - printf("tcp_lro_flush_flows: len =%d n_pkts = %d %d %d \n", + if (lrodebug >= 2) + printf("tcp_lro_flush_flows: len =%d n_pkts = %d %d %d \n", flow->lr_len, flow->lr_mhead->m_pkthdr.lro_npkts, flow->lr_timestamp, tcp_now); - mb = tcp_lro_eject_flow(i); + u_int8_t timestamp = tcp_now - flow->lr_timestamp; - if (mb) { - lck_mtx_unlock(&tcp_lro_lock); - lro_update_flush_stats(mb); - lro_proto_input(mb); - lck_mtx_lock(&tcp_lro_lock); - } - - } else { - tcp_lro_sched_timer(0); - outstanding_flows++; - if (lrodebug >= 2) { - printf("tcp_lro_flush_flows: did not flush flow of len =%d deadline = %x timestamp = %x \n", - flow->lr_len, tcp_now, flow->lr_timestamp); - } - } - } - if (flow->lr_flags & LRO_EJECT_REQ) { mb = tcp_lro_eject_flow(i); + if (mb) { + mb->m_pkthdr.lro_elapsed = timestamp; lck_mtx_unlock(&tcp_lro_lock); + lro_update_flush_stats(mb); lro_proto_input(mb); - lro_eject_req++; lck_mtx_lock(&tcp_lro_lock); } } i++; } lck_mtx_unlock(&tcp_lro_lock); -#if 0 - if (lrocount == 900) { - printf("%s: %d %d %d %d oo: %d mismatch: %d ej_req: %d coll: %d \n", - __func__, - tcpstat.tcps_coalesced_pack, - tcpstat.tcps_lro_twopack, - tcpstat.tcps_lro_multpack, - tcpstat.tcps_lro_largepack, - lro_seq_outoforder, - lro_seq_mismatch, - lro_eject_req, - tcpstat.tcps_flowtbl_collision); - printf("%s: all: %d single: %d double: %d good: %d \n", - __func__, lro_flushes, lro_single_flushes, - lro_double_flushes, lro_good_flushes); - lrocount = 0; - } else { - lrocount++; - } - if ((lrodebug >= 2) && (active_flows > 1)) { - printf("lro_flush_flows: active_flows = %d \n", active_flows); - } -#endif } /* @@ -718,13 +684,17 @@ tcp_lro(struct mbuf *m, unsigned int hlen) * improvement to throughput either. Loopback perf is hurt * by the 5 msec latency and it already sends large packets. */ - if ((m->m_pkthdr.rcvif->if_type == IFT_CELLULAR) || + if (IFNET_IS_CELLULAR(m->m_pkthdr.rcvif) || (m->m_pkthdr.rcvif->if_type == IFT_LOOP)) { return m; } ip_hdr = mtod(m, struct ip*); + /* don't deal with IP options */ + if (hlen > sizeof (struct ip)) + return (m); + /* only TCP is coalesced */ if (ip_hdr->ip_p != IPPROTO_TCP) { return m; @@ -745,6 +715,7 @@ tcp_lro(struct mbuf *m, unsigned int hlen) tlen = ip_hdr->ip_len ; //ignore IP header bytes len m->m_pkthdr.lro_pktlen = tlen; /* Used to return max pkt encountered to tcp */ m->m_pkthdr.lro_npkts = 1; /* Initialize a counter to hold num pkts coalesced */ + m->m_pkthdr.lro_elapsed = 0; /* Initialize the field to carry elapsed time */ off = tcp_hdr->th_off << 2; if (off < sizeof (struct tcphdr) || off > tlen) { tcpstat.tcps_rcvbadoff++; @@ -771,74 +742,21 @@ lro_proto_input(struct mbuf *m) } static struct mbuf * -lro_tcp_xsum_validate(struct mbuf *m, struct ipovly *ipov, struct tcphdr * th) +lro_tcp_xsum_validate(struct mbuf *m, struct ip *ip, struct tcphdr * th) { - - struct ip* ip = (struct ip*)ipov; - int tlen = ip->ip_len; - int len; - struct ifnet *ifp = ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL) ? - m->m_pkthdr.rcvif: NULL; - /* Expect 32-bit aligned data pointer on strict-align platforms */ MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); - if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { - if (m->m_pkthdr.csum_flags & CSUM_TCP_SUM16) { - u_short pseudo; - char b[9]; - - bcopy(ipov->ih_x1, b, sizeof (ipov->ih_x1)); - bzero(ipov->ih_x1, sizeof (ipov->ih_x1)); - ipov->ih_len = (u_short)tlen; -#if BYTE_ORDER != BIG_ENDIAN - HTONS(ipov->ih_len); -#endif - pseudo = in_cksum(m, sizeof (struct ip)); - bcopy(b, ipov->ih_x1, sizeof (ipov->ih_x1)); - - th->th_sum = in_addword(pseudo, (m->m_pkthdr.csum_data & 0xFFFF)); - } else { - if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) - th->th_sum = m->m_pkthdr.csum_data; - else - th->th_sum = in_pseudo(ip->ip_src.s_addr, - ip->ip_dst.s_addr, htonl(m->m_pkthdr.csum_data + - ip->ip_len + IPPROTO_TCP)); - } - th->th_sum ^= 0xffff; - } else { - char b[9]; - /* - * Checksum extended TCP header and data. - */ - bcopy(ipov->ih_x1, b, sizeof (ipov->ih_x1)); - bzero(ipov->ih_x1, sizeof (ipov->ih_x1)); - ipov->ih_len = (u_short)tlen; -#if BYTE_ORDER != BIG_ENDIAN - HTONS(ipov->ih_len); -#endif - len = sizeof (struct ip) + tlen; - th->th_sum = in_cksum(m, len); - bcopy(b, ipov->ih_x1, sizeof (ipov->ih_x1)); - - tcp_in_cksum_stats(len); - } - if (th->th_sum) { - tcpstat.tcps_rcvbadsum++; - if (ifp != NULL && ifp->if_tcp_stat != NULL) { - atomic_add_64(&ifp->if_tcp_stat->badformat, 1); - } - if (lrodebug) - printf("lro_tcp_xsum_validate: bad xsum and drop m = %p.\n",m); + /* we shouldn't get here for IP with options; hence sizeof (ip) */ + if (tcp_input_checksum(AF_INET, m, th, sizeof (*ip), ip->ip_len)) { + if (lrodebug) + printf("%s: bad xsum and drop m = 0x%llx.\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(m)); m_freem(m); - return NULL; + return (NULL); } - /* revert back the order as IP will look into this again. */ -#if BYTE_ORDER != BIG_ENDIAN - NTOHS(ipov->ih_len); -#endif - return m; + + return (m); } /* diff --git a/bsd/netinet/tcp_newreno.c b/bsd/netinet/tcp_newreno.c index 8d256db71..158311244 100644 --- a/bsd/netinet/tcp_newreno.c +++ b/bsd/netinet/tcp_newreno.c @@ -199,7 +199,7 @@ tcp_newreno_cwnd_init_or_reset(struct tcpcb *tp) { void tcp_newreno_inseq_ack_rcvd(struct tcpcb *tp, struct tcphdr *th) { int acked = 0; - acked = th->th_ack - tp->snd_una; + acked = BYTES_ACKED(th, tp); /* * Grow the congestion window, if the * connection is cwnd bound. @@ -233,7 +233,7 @@ tcp_newreno_ack_rcvd(struct tcpcb *tp, struct tcphdr *th) { register u_int incr = tp->t_maxseg; int acked = 0; - acked = th->th_ack - tp->snd_una; + acked = BYTES_ACKED(th, tp); if (tcp_do_rfc3465) { if (cw >= tp->snd_ssthresh) { @@ -302,9 +302,13 @@ tcp_newreno_post_fr(struct tcpcb *tp, struct tcphdr *th) { * snd_ssthresh outstanding data. But in case we * would be inclined to send a burst, better to do * it via the slow start mechanism. + * + * If the flight size is zero, then make congestion + * window to be worth at least 2 segments to avoid + * delayed acknowledgement (draft-ietf-tcpm-rfc3782-bis-05). */ if (ss < (int32_t)tp->snd_ssthresh) - tp->snd_cwnd = ss + tp->t_maxseg; + tp->snd_cwnd = max(ss, tp->t_maxseg) + tp->t_maxseg; else tp->snd_cwnd = tp->snd_ssthresh; tp->t_bytes_acked = 0; @@ -343,11 +347,9 @@ tcp_newreno_after_timeout(struct tcpcb *tp) { u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; if (win < 2) win = 2; - tp->snd_cwnd = tp->t_maxseg; tp->snd_ssthresh = win * tp->t_maxseg; - tp->t_bytes_acked = 0; - tp->t_dupacks = 0; + tp->snd_cwnd = tp->t_maxseg; tcp_cc_resize_sndbuf(tp); } } @@ -373,6 +375,12 @@ tcp_newreno_after_timeout(struct tcpcb *tp) { int tcp_newreno_delay_ack(struct tcpcb *tp, struct tcphdr *th) { + /* If any flags other than TH_ACK is set, set "end-of-write" bit */ + if ((th->th_flags & ~TH_ACK)) + tp->t_flagsext |= TF_STREAMEOW; + else + tp->t_flagsext &= ~(TF_STREAMEOW); + switch (tcp_delack_enabled) { case 1: case 2: diff --git a/bsd/netinet/tcp_output.c b/bsd/netinet/tcp_output.c index 83a205319..8c58a9bfc 100644 --- a/bsd/netinet/tcp_output.c +++ b/bsd/netinet/tcp_output.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -85,6 +85,7 @@ #include #include #include +#include #include #include @@ -121,15 +122,16 @@ #endif /* MAC_SOCKET */ #include +#if MPTCP +#include +#include +#include +#endif #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETTCP, 1) #define DBG_LAYER_END NETDBG_CODE(DBG_NETTCP, 3) #define DBG_FNC_TCP_OUTPUT NETDBG_CODE(DBG_NETTCP, (4 << 8) | 1) -#ifdef notyet -extern struct mbuf *m_copypack(); -#endif - int path_mtu_discovery = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, path_mtu_discovery, CTLFLAG_RW | CTLFLAG_LOCKED, &path_mtu_discovery, 1, "Enable Path MTU Discovery"); @@ -191,6 +193,17 @@ uint32_t tcp_prioritize_acks = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, ack_prioritize, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_prioritize_acks, 1, "Prioritize pure acks"); +uint32_t tcp_use_rtt_recvbg = 1; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, rtt_recvbg, + CTLFLAG_RW | CTLFLAG_LOCKED, + &tcp_use_rtt_recvbg, 1, "Use RTT for bg recv algorithm"); + +uint32_t tcp_recv_throttle_minwin = 16 * 1024; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, recv_throttle_minwin, + CTLFLAG_RW | CTLFLAG_LOCKED, + &tcp_recv_throttle_minwin, 1, "Minimum recv win for throttling"); + + static int32_t packchain_newlist = 0; static int32_t packchain_looped = 0; static int32_t packchain_sent = 0; @@ -206,11 +219,7 @@ extern int fw_enable; /* firewall check for packet chaining */ extern int fw_bypass; /* firewall check: disable packet chaining if there is rules */ #endif /* IPFIREWALL */ -extern vm_size_t so_cache_zone_element_size; -#if RANDOM_IP_ID -extern int ip_use_randomid; -#endif /* RANDOM_IP_ID */ -extern u_int32_t dlil_filter_count; +extern u_int32_t dlil_filter_disable_tso_count; extern u_int32_t kipf_count; extern int tcp_recv_bg; extern int maxseg_unacked; @@ -220,21 +229,7 @@ static int tcp_ip_output(struct socket *, struct tcpcb *, struct mbuf *, int, extern uint32_t get_base_rtt(struct tcpcb *tp); static struct mbuf* tcp_send_lroacks(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th); - -static __inline__ u_int16_t -get_socket_id(struct socket * s) -{ - u_int16_t val; - - if (so_cache_zone_element_size == 0) { - return (0); - } - val = (u_int16_t)(((uintptr_t)s) / so_cache_zone_element_size); - if (val == 0) { - val = 0xffff; - } - return (val); -} +static int tcp_recv_throttle(struct tcpcb *tp); /* * Tcp output routine: figure out what should be sent and send it. @@ -265,7 +260,8 @@ get_socket_id(struct socket * s) int tcp_output(struct tcpcb *tp) { - struct socket *so = tp->t_inpcb->inp_socket; + struct inpcb *inp = tp->t_inpcb; + struct socket *so = inp->inp_socket; int32_t len, recwin, sendwin, off; int flags, error; register struct mbuf *m; @@ -282,27 +278,35 @@ tcp_output(struct tcpcb *tp) int tso = 0; int sack_bytes_rxmt; struct sackhole *p; -#ifdef IPSEC +#if IPSEC unsigned ipsec_optlen = 0; -#endif +#endif /* IPSEC */ int last_off = 0; - int m_off; + int m_off = 0; int idle_time = 0; struct mbuf *m_lastm = NULL; struct mbuf *m_head = NULL; struct mbuf *packetlist = NULL; - struct mbuf *tp_inp_options = tp->t_inpcb->inp_depend4.inp4_options; + struct mbuf *tp_inp_options = inp->inp_depend4.inp4_options; #if INET6 - int isipv6 = tp->t_inpcb->inp_vflag & INP_IPV6 ; + int isipv6 = inp->inp_vflag & INP_IPV6 ; #endif short packchain_listadd = 0; - u_int16_t socket_id = get_socket_id(so); int so_options = so->so_options; struct rtentry *rt; u_int32_t basertt, svc_flags = 0, allocated_len; u_int32_t lro_ackmore = (tp->t_lropktlen != 0) ? 1 : 0; struct mbuf *mnext = NULL; int sackoptlen = 0; +#if MPTCP + unsigned int *dlenp = NULL; + u_int8_t *finp = NULL; + u_int32_t *sseqp = NULL; + u_int64_t dss_val = 0; + int mptcp_acknow = 0; +#endif /* MPTCP */ + boolean_t cell = FALSE; + boolean_t wifi = FALSE; /* * Determine length of data that should be transmitted, @@ -319,7 +323,7 @@ tcp_output(struct tcpcb *tp) if (idle && idle_time >= TCP_IDLETIMEOUT(tp)) { if (CC_ALGO(tp)->after_idle != NULL) CC_ALGO(tp)->after_idle(tp); - DTRACE_TCP5(cc, void, NULL, struct inpcb *, tp->t_inpcb, + DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, struct tcpcb *, tp, struct tcphdr *, NULL, int32_t, TCP_CC_IDLE_TIMEOUT); } @@ -330,24 +334,36 @@ tcp_output(struct tcpcb *tp) idle = 0; } } +#if MPTCP + if (tp->t_mpflags & TMPF_RESET) { + tcp_check_timer_state(tp); + /* + * Once a RST has been sent for an MPTCP subflow, + * the subflow socket stays around until deleted. + * No packets such as FINs must be sent after RST. + */ + return (0); + } +#endif /* MPTCP */ + again: KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0); #if INET6 if (isipv6) { KERNEL_DEBUG(DBG_LAYER_BEG, - ((tp->t_inpcb->inp_fport << 16) | tp->t_inpcb->inp_lport), - (((tp->t_inpcb->in6p_laddr.s6_addr16[0] & 0xffff) << 16) | - (tp->t_inpcb->in6p_faddr.s6_addr16[0] & 0xffff)), + ((inp->inp_fport << 16) | inp->inp_lport), + (((inp->in6p_laddr.s6_addr16[0] & 0xffff) << 16) | + (inp->in6p_faddr.s6_addr16[0] & 0xffff)), sendalot,0,0); } else #endif { KERNEL_DEBUG(DBG_LAYER_BEG, - ((tp->t_inpcb->inp_fport << 16) | tp->t_inpcb->inp_lport), - (((tp->t_inpcb->inp_laddr.s_addr & 0xffff) << 16) | - (tp->t_inpcb->inp_faddr.s_addr & 0xffff)), + ((inp->inp_fport << 16) | inp->inp_lport), + (((inp->inp_laddr.s_addr & 0xffff) << 16) | + (inp->inp_faddr.s_addr & 0xffff)), sendalot,0,0); } /* @@ -356,9 +372,8 @@ again: * return error or silently do nothing (assuming the address will * come back before the TCP connection times out). */ - rt = tp->t_inpcb->inp_route.ro_rt; - if (rt != NULL && (!(rt->rt_flags & RTF_UP) || - rt->generation_id != route_generation)) { + rt = inp->inp_route.ro_rt; + if (rt != NULL && ROUTE_UNUSABLE(&tp->t_inpcb->inp_route)) { struct ifnet *ifp; struct in_ifaddr *ia = NULL; struct in6_ifaddr *ia6 = NULL; @@ -371,18 +386,17 @@ again: tp->t_flags &= ~TF_TSO; if (isipv6) { - ia6 = ifa_foraddr6(&tp->t_inpcb->in6p_laddr); + ia6 = ifa_foraddr6(&inp->in6p_laddr); if (ia6 != NULL) found_srcaddr = 1; } else { - ia = ifa_foraddr(tp->t_inpcb->inp_laddr.s_addr); + ia = ifa_foraddr(inp->inp_laddr.s_addr); if (ia != NULL) found_srcaddr = 1; } /* check that the source address is still valid */ if (found_srcaddr == 0) { - soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_NOSRCADDR)); @@ -396,13 +410,13 @@ again: * advertised peer window may not be valid anymore */ - if (!tp->t_timer[TCPT_REXMT]) { - tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur); + if (!tp->t_timer[TCPT_REXMT]) { + tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur); if (tp->t_timer[TCPT_PERSIST]) { tp->t_timer[TCPT_PERSIST] = 0; tp->t_rxtshift = 0; tp->t_persist_stop = 0; - tp->rxt_start = 0; + tp->t_rxtstart = 0; } } @@ -436,7 +450,7 @@ again: tcp_set_tso(tp, ifp); } if (rt->rt_flags & RTF_UP) - rt->generation_id = route_generation; + RT_GENID_SYNC(rt); /* * See if we should do MTU discovery. Don't do it if: * 1) it is disabled via the sysctl @@ -454,12 +468,17 @@ again: RT_UNLOCK(rt); } + if (rt != NULL) { + cell = IFNET_IS_CELLULAR(rt->rt_ifp); + wifi = (!cell && IFNET_IS_WIFI(rt->rt_ifp)); + } + /* * If we've recently taken a timeout, snd_max will be greater than * snd_nxt. There may be SACK information that allows us to avoid * resending already delivered data. Adjust snd_nxt accordingly. */ - if (tp->sack_enable && SEQ_LT(tp->snd_nxt, tp->snd_max)) + if (SACK_ENABLED(tp) && SEQ_LT(tp->snd_nxt, tp->snd_max)) tcp_sack_adjust(tp); sendalot = 0; off = tp->snd_nxt - tp->snd_una; @@ -483,7 +502,7 @@ again: sack_bytes_rxmt = 0; len = 0; p = NULL; - if (tp->sack_enable && IN_FASTRECOVERY(tp) && + if (SACK_ENABLED(tp) && IN_FASTRECOVERY(tp) && (p = tcp_sack_output(tp, &sack_bytes_rxmt))) { int32_t cwin; @@ -521,11 +540,12 @@ again: tcpstat.tcps_sack_rexmit_bytes += min(len, tp->t_maxseg); if (nstat_collect) { - nstat_route_tx(tp->t_inpcb->inp_route.ro_rt, 1, - min(len, tp->t_maxseg), NSTAT_TX_FLAG_RETRANSMIT); - locked_add_64(&tp->t_inpcb->inp_stat->txpackets, 1); - locked_add_64(&tp->t_inpcb->inp_stat->txbytes, - min(len, tp->t_maxseg)); + nstat_route_tx(inp->inp_route.ro_rt, 1, + min(len, tp->t_maxseg), + NSTAT_TX_FLAG_RETRANSMIT); + INP_ADD_STAT(inp, cell, wifi, txpackets, 1); + INP_ADD_STAT(inp, cell, wifi, txbytes, + min(len, tp->t_maxseg)); tp->t_stat.txretransmitbytes += min(len, tp->t_maxseg); } } else { @@ -572,7 +592,7 @@ after_sack_rexmit: } else { tp->t_timer[TCPT_PERSIST] = 0; tp->t_rxtshift = 0; - tp->rxt_start = 0; + tp->t_rxtstart = 0; tp->t_persist_stop = 0; } } @@ -632,10 +652,11 @@ after_sack_rexmit: * know that foreign host supports TAO, suppress sending segment. */ if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) { - flags &= ~TH_SYN; + if (tp->t_state != TCPS_SYN_RECEIVED) + flags &= ~TH_SYN; off--, len++; if (len > 0 && tp->t_state == TCPS_SYN_SENT) { - while (tp->t_inpcb->inp_sndinprog_cnt == 0 && + while (inp->inp_sndinprog_cnt == 0 && tp->t_pktlist_head != NULL) { packetlist = tp->t_pktlist_head; packchain_listadd = tp->t_lastchain; @@ -646,11 +667,11 @@ after_sack_rexmit: packchain_listadd, tp_inp_options, (so_options & SO_DONTROUTE), (sack_rxmit | (sack_bytes_rxmt != 0)), 0, -#ifdef INET6 +#if INET6 isipv6); -#else +#else /* INET6 */ 0); -#endif +#endif /* !INET6 */ } @@ -658,7 +679,7 @@ after_sack_rexmit: * tcp was closed while we were in ip, * resume close */ - if (tp->t_inpcb->inp_sndinprog_cnt == 0 && + if (inp->inp_sndinprog_cnt == 0 && (tp->t_flags & TF_CLOSING)) { tp->t_flags &= ~TF_CLOSING; (void) tcp_close(tp); @@ -701,7 +722,7 @@ after_sack_rexmit: if (sendwin == 0) { tp->t_timer[TCPT_REXMT] = 0; tp->t_rxtshift = 0; - tp->rxt_start = 0; + tp->t_rxtstart = 0; tp->snd_nxt = tp->snd_una; if (tp->t_timer[TCPT_PERSIST] == 0) tcp_setpersist(tp); @@ -718,7 +739,7 @@ after_sack_rexmit: */ basertt = get_base_rtt(tp); if (tcp_do_autosendbuf == 1 && - !INP_WAIT_FOR_IF_FEEDBACK(tp->t_inpcb) && !IN_FASTRECOVERY(tp) && + !INP_WAIT_FOR_IF_FEEDBACK(inp) && !IN_FASTRECOVERY(tp) && (so->so_snd.sb_flags & (SB_AUTOSIZE | SB_TRIM)) == SB_AUTOSIZE && tcp_cansbgrow(&so->so_snd)) { if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat && @@ -753,9 +774,10 @@ after_sack_rexmit: * Offloading (if supported by hardware) and ensure that FIN is removed * if the length no longer contains the last data byte. * - * TSO may only be used if we are in a pure bulk sending state. The - * presence of TCP-MD5, SACK retransmits, SACK advertizements, ipfw rules - * and IP options prevent using TSO. With TSO the TCP header is the same + * TSO may only be used if we are in a pure bulk sending state. + * The presence of TCP-MD5, SACK retransmits, SACK advertizements, + * ipfw rules and IP options, as well as disabling hardware checksum + * offload prevent using TSO. With TSO the TCP header is the same * (except for the sequence number) for all generated packets. This * makes it impossible to transmit any options which vary per generated * segment or packet. @@ -774,14 +796,11 @@ after_sack_rexmit: #endif if (len > tp->t_maxseg) { - if ((tp->t_flags & TF_TSO) && tcp_do_tso && -#if RANDOM_IP_ID - ip_use_randomid && -#endif /* RANDOM_IP_ID */ - kipf_count == 0 && dlil_filter_count == 0 && + if ((tp->t_flags & TF_TSO) && tcp_do_tso && hwcksum_tx && + ip_use_randomid && kipf_count == 0 && dlil_filter_disable_tso_count == 0 && tp->rcv_numsacks == 0 && sack_rxmit == 0 && sack_bytes_rxmt == 0 && - tp->t_inpcb->inp_options == NULL && - tp->t_inpcb->in6p_options == NULL + inp->inp_options == NULL && + inp->in6p_options == NULL #if IPSEC && ipsec_optlen == 0 #endif @@ -797,6 +816,34 @@ after_sack_rexmit: tso = 0; } } +#if MPTCP + if (so->so_flags & SOF_MP_SUBFLOW) { + int newlen = len; + if ((tp->t_mpflags & TMPF_SND_MPPRIO) || + (tp->t_mpflags & TMPF_SND_REM_ADDR) || + (tp->t_mpflags & TMPF_SND_MPFAIL)) { + if (len > 0) { + len = 0; + } + sendalot = 1; + mptcp_acknow = 1; + } else { + mptcp_acknow = 0; + } + /* + * The contiguous bytes in the subflow socket buffer can be + * discontiguous at the MPTCP level. Since only one DSS + * option can be sent in one packet, reduce length to match + * the contiguous MPTCP level. Set sendalot to send remainder. + */ + if (len > 0) + newlen = mptcp_adj_sendlen(so, off, len); + if (newlen < len) { + len = newlen; + sendalot = 1; + } + } +#endif /* MPTCP */ if (sack_rxmit) { if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc)) flags &= ~TH_FIN; @@ -807,6 +854,31 @@ after_sack_rexmit: recwin = tcp_sbspace(tp); + /* + * If the socket is capable of doing unordered send, + * pull the amount of data that can be sent from the + * unordered priority queues to the serial queue in + * the socket buffer. If bytes are not yet available + * in the highest priority message, we may not be able + * to send any new data. + */ + if (so->so_flags & SOF_ENABLE_MSGS) { + if ((off + len) > + so->so_msg_state->msg_serial_bytes) { + sbpull_unordered_data(so, off, len); + + /* check if len needs to be modified */ + if ((off + len) > + so->so_msg_state->msg_serial_bytes) { + len = so->so_msg_state->msg_serial_bytes - off; + if (len <= 0) { + len = 0; + tcpstat.tcps_msg_sndwaithipri++; + } + } + } + } + /* * Sender silly window avoidance. We transmit under the following * conditions when len is non-zero: @@ -820,17 +892,10 @@ after_sack_rexmit: * data (receiver may be limited the window size) */ if (len) { - if (tp->t_force) { - tp->t_flags &= ~TF_MAXSEGSNT; + if (tp->t_force) goto send; - } - if (SEQ_LT(tp->snd_nxt, tp->snd_max)) { - if (len >= tp->t_maxseg) - tp->t_flags |= TF_MAXSEGSNT; - else - tp->t_flags &= ~TF_MAXSEGSNT; + if (SEQ_LT(tp->snd_nxt, tp->snd_max)) goto send; - } if (sack_rxmit) goto send; @@ -838,23 +903,19 @@ after_sack_rexmit: * Send new data on the connection only if it is * not flow controlled */ - if (!INP_WAIT_FOR_IF_FEEDBACK(tp->t_inpcb) || + if (!INP_WAIT_FOR_IF_FEEDBACK(inp) || tp->t_state != TCPS_ESTABLISHED) { - if (len >= tp->t_maxseg) { - tp->t_flags |= TF_MAXSEGSNT; + if (len >= tp->t_maxseg) goto send; - } if (!(tp->t_flags & TF_MORETOCOME) && - (idle || tp->t_flags & TF_NODELAY || tp->t_flags & TF_MAXSEGSNT) && + (idle || tp->t_flags & TF_NODELAY || + tp->t_flags & TF_MAXSEGSNT || + ALLOW_LIMITED_TRANSMIT(tp)) && (tp->t_flags & TF_NOPUSH) == 0 && - len + off >= so->so_snd.sb_cc) { - tp->t_flags &= ~TF_MAXSEGSNT; + len + off >= so->so_snd.sb_cc) goto send; - } - if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) { - tp->t_flags &= ~TF_MAXSEGSNT; + if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) goto send; - } } else { tcpstat.tcps_fcholdpacket++; } @@ -874,9 +935,13 @@ after_sack_rexmit: * taking into account that we are limited by * TCP_MAXWIN << tp->rcv_scale. */ - int32_t adv = imin(recwin, (int)TCP_MAXWIN << tp->rcv_scale) - + int32_t adv, oldwin = 0; + adv = imin(recwin, (int)TCP_MAXWIN << tp->rcv_scale) - (tp->rcv_adv - tp->rcv_nxt); + if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) + oldwin = tp->rcv_adv - tp->rcv_nxt; + if (adv >= (int32_t) (2 * tp->t_maxseg)) { /* Update only if the resulting scaled value of the window changed, or * if there is a change in the sequence since the last ack. @@ -886,9 +951,10 @@ after_sack_rexmit: * We will depend on the delack timer to send a window update * when needed. */ - if ((tp->t_flags & TF_STRETCHACK) == 0 && + if (!(tp->t_flags & TF_STRETCHACK) && (tp->last_ack_sent != tp->rcv_nxt || - ((recwin + adv) >> tp->rcv_scale) > recwin)) { + ((oldwin + adv) >> tp->rcv_scale) > + (oldwin >> tp->rcv_scale))) { goto send; } @@ -896,10 +962,11 @@ after_sack_rexmit: * delayed sending a window update because of streaming * detection. */ - if ((tp->t_flags & TF_STRETCHACK) != 0 && - (tp->t_flags & TF_DELACK) == 0) { + if ((tp->t_flags & TF_STRETCHACK) && + !(tp->t_flags & TF_DELACK)) { tp->t_flags |= TF_DELACK; - tp->t_timer[TCPT_DELACK] = OFFSET_FROM_START(tp, tcp_delack); + tp->t_timer[TCPT_DELACK] = + OFFSET_FROM_START(tp, tcp_delack); } } if (4 * adv >= (int32_t) so->so_rcv.sb_hiwat) @@ -917,6 +984,10 @@ after_sack_rexmit: goto send; if (SEQ_GT(tp->snd_up, tp->snd_una)) goto send; +#if MPTCP + if (mptcp_acknow) + goto send; +#endif /* MPTCP */ /* * If our state indicates that FIN should be sent * and we have not yet done so, then we need to send. @@ -929,12 +1000,12 @@ after_sack_rexmit: * after the retransmission timer has been turned off. Make sure * that the retransmission timer is set. */ - if (tp->sack_enable && (tp->t_state >= TCPS_ESTABLISHED) && + if (SACK_ENABLED(tp) && (tp->t_state >= TCPS_ESTABLISHED) && SEQ_GT(tp->snd_max, tp->snd_una) && tp->t_timer[TCPT_REXMT] == 0 && tp->t_timer[TCPT_PERSIST] == 0) { - tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur); - goto just_return; + tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur); + goto just_return; } /* * TCP window updates are not reliable, rather a polling protocol @@ -961,7 +1032,7 @@ after_sack_rexmit: if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 && tp->t_timer[TCPT_PERSIST] == 0) { tp->t_rxtshift = 0; - tp->rxt_start = 0; + tp->t_rxtstart = 0; tcp_setpersist(tp); } just_return: @@ -969,7 +1040,7 @@ just_return: * If there is no reason to send a segment, just return. * but if there is some packets left in the packet list, send them now. */ - while (tp->t_inpcb->inp_sndinprog_cnt == 0 && + while (inp->inp_sndinprog_cnt == 0 && tp->t_pktlist_head != NULL) { packetlist = tp->t_pktlist_head; packchain_listadd = tp->t_lastchain; @@ -979,14 +1050,14 @@ just_return: error = tcp_ip_output(so, tp, packetlist, packchain_listadd, tp_inp_options, (so_options & SO_DONTROUTE), (sack_rxmit | (sack_bytes_rxmt != 0)), recwin, -#ifdef INET6 +#if INET6 isipv6); -#else +#else /* INET6 */ 0); -#endif +#endif /* !INET6 */ } /* tcp was closed while we were in ip; resume close */ - if (tp->t_inpcb->inp_sndinprog_cnt == 0 && + if (inp->inp_sndinprog_cnt == 0 && (tp->t_flags & TF_CLOSING)) { tp->t_flags &= ~TF_CLOSING; (void) tcp_close(tp); @@ -997,6 +1068,16 @@ just_return: return (0); send: + /* + * Set TF_MAXSEGSNT flag if the segment size is greater than + * the max segment size. + */ + if (len > 0) { + if (len >= tp->t_maxseg) + tp->t_flags |= TF_MAXSEGSNT; + else + tp->t_flags &= ~TF_MAXSEGSNT; + } /* * Before ESTABLISHED, force sending of initial options * unless TCP set not to do any options. @@ -1033,8 +1114,13 @@ send: tp->request_r_scale); optlen += 4; } +#if MPTCP + if (mptcp_enable) { + optlen = mptcp_setup_syn_opts(so, flags, opt, + optlen); + } +#endif /* MPTCP */ } - } /* @@ -1141,7 +1227,7 @@ send: if (tp->rfbuf_ts == 0 && (so->so_rcv.sb_flags & SB_AUTOSIZE)) tp->rfbuf_ts = tcp_now; - if (tp->sack_enable && ((tp->t_flags & TF_NOOPT) == 0)) { + if (SACK_ENABLED(tp) && ((tp->t_flags & TF_NOOPT) == 0)) { /* * Tack on the SACK permitted option *last*. * And do padding of options after tacking this on. @@ -1165,7 +1251,27 @@ send: *bp++ = TCPOLEN_SACK_PERMITTED; optlen += TCPOLEN_SACK_PERMITTED; } + } +#if MPTCP + if (so->so_flags & SOF_MP_SUBFLOW) { + /* + * Its important to piggyback acks with data as ack only packets + * may get lost and data packets that don't send Data ACKs + * still advance the subflow level ACK and therefore make it + * hard for the remote end to recover in low cwnd situations. + */ + if (len != 0) + tp->t_mpflags |= (TMPF_SEND_DSN | + TMPF_MPTCP_ACKNOW); + else + tp->t_mpflags |= TMPF_MPTCP_ACKNOW; + optlen = mptcp_setup_opts(tp, off, &opt[0], optlen, flags, + len, &dlenp, &finp, &dss_val, &sseqp); + tp->t_mpflags &= ~TMPF_SEND_DSN; + } +#endif /* MPTCP */ + if (SACK_ENABLED(tp) && ((tp->t_flags & TF_NOOPT) == 0)) { /* * Send SACKs if necessary. This should be the last * option processed. Only as many SACKs are sent as @@ -1229,7 +1335,7 @@ send: #if INET6 if (isipv6) - ipoptlen = ip6_optlen(tp->t_inpcb); + ipoptlen = ip6_optlen(inp); else #endif { @@ -1277,6 +1383,25 @@ send: sendalot = 1; } } +#if MPTCP + /* Adjust the length in the DSS option, if it is lesser than len */ + if (dlenp) { + /* + * To test this path without SACK, artificially + * decrement len with something like + * if (len > 10) + len -= 10; + */ + if (ntohs(*dlenp) > len) { + *dlenp = htons(len); + /* Unset the FIN flag, if len was adjusted */ + if (finp) { + *finp &= ~MDSS_F; + } + sendalot = 1; + } + } +#endif /* MPTCP */ if (max_linkhdr + hdrlen > MCLBYTES) panic("tcphdr too big"); @@ -1296,8 +1421,7 @@ send: tp->t_bwmeas->bw_ts = tcp_now; } - VERIFY(tp->t_inpcb->inp_flowhash != 0); - + VERIFY(inp->inp_flowhash != 0); /* * Grab a header mbuf, attaching a copy of data to * be transmitted, and initialize the header from @@ -1310,20 +1434,27 @@ send: tcpstat.tcps_sndrexmitpack++; tcpstat.tcps_sndrexmitbyte += len; if (nstat_collect) { - nstat_route_tx(tp->t_inpcb->inp_route.ro_rt, 1, + nstat_route_tx(inp->inp_route.ro_rt, 1, len, NSTAT_TX_FLAG_RETRANSMIT); - locked_add_64(&tp->t_inpcb->inp_stat->txpackets, 1); - locked_add_64(&tp->t_inpcb->inp_stat->txbytes, len); + INP_ADD_STAT(inp, cell, wifi, txpackets, 1); + INP_ADD_STAT(inp, cell, wifi, txbytes, len); tp->t_stat.txretransmitbytes += len; } } else { tcpstat.tcps_sndpack++; tcpstat.tcps_sndbyte += len; + if (nstat_collect) { - locked_add_64(&tp->t_inpcb->inp_stat->txpackets, 1); - locked_add_64(&tp->t_inpcb->inp_stat->txbytes, len); + INP_ADD_STAT(inp, cell, wifi, txpackets, 1); + INP_ADD_STAT(inp, cell, wifi, txbytes, len); } } +#if MPTCP + if (tp->t_mpflags & TMPF_MPTCP_TRUE) { + tcpstat.tcps_mp_sndpacks++; + tcpstat.tcps_mp_sndbytes += len; + } +#endif /* MPTCP */ /* * try to use the new interface that allocates all * the necessary mbuf hdrs under 1 mbuf lock and @@ -1377,40 +1508,71 @@ send: mtod(m, caddr_t) + hdrlen); m->m_len += len; } else { - if (m != NULL) { - m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len); - if (m->m_next == 0) { - (void) m_free(m); + uint32_t copymode; + /* + * Retain packet header metadata at the socket + * buffer if this is is an MPTCP subflow, + * otherwise move it. + */ + copymode = M_COPYM_MOVE_HDR; +#if MPTCP + if ((tp->t_mpflags & TMPF_MPTCP_TRUE) || + (tp->t_mpflags & TMPF_TCP_FALLBACK)) { + copymode = M_COPYM_NOOP_HDR; + } +#endif /* MPTCP */ + if (m != NULL) { + m->m_next = m_copym_mode(so->so_snd.sb_mb, off, + (int) len, M_DONTWAIT, copymode); + if (m->m_next == NULL) { + (void) m_free(m); error = ENOBUFS; goto out; } } else { - /* - * determine whether the mbuf pointer and offset passed back by the 'last' call - * to m_copym_with_hdrs are still valid... if the head of the socket chain has - * changed (due to an incoming ACK for instance), or the offset into the chain we - * just computed is different from the one last returned by m_copym_with_hdrs (perhaps - * we're re-transmitting a packet sent earlier), than we can't pass the mbuf pointer and - * offset into it as valid hints for m_copym_with_hdrs to use (if valid, these hints allow - * m_copym_with_hdrs to avoid rescanning from the beginning of the socket buffer mbuf list. - * setting the mbuf pointer to NULL is sufficient to disable the hint mechanism. + /* + * determine whether the mbuf pointer and + * offset passed back by the 'last' call to + * m_copym_with_hdrs are still valid... if the + * head of the socket chain has changed (due + * to an incoming ACK for instance), or the + * offset into the chain we just computed is + * different from the one last returned by + * m_copym_with_hdrs (perhaps we're re- + * transmitting a packet sent earlier), then + * we can't pass the mbuf pointer and offset + * into it as valid hints for m_copym_with_hdrs + * to use (if valid, these hints allow + * m_copym_with_hdrs to avoid rescanning from + * the beginning of the socket buffer mbuf list. + * + * Setting the mbuf pointer to NULL is + * sufficient to disable the hint mechanism. */ - if (m_head != so->so_snd.sb_mb || sack_rxmit || last_off != off) - m_lastm = NULL; + if (m_head != so->so_snd.sb_mb || sack_rxmit || + last_off != off) + m_lastm = NULL; last_off = off + len; m_head = so->so_snd.sb_mb; - - /* makes sure we still have data left to be sent at this point */ + + /* + * make sure we still have data left + * to be sent at this point + */ if (m_head == NULL) { error = 0; /* should we return an error? */ goto out; } - + /* - * m_copym_with_hdrs will always return the last mbuf pointer and the offset into it that - * it acted on to fullfill the current request, whether a valid 'hint' was passed in or not + * m_copym_with_hdrs will always return the + * last mbuf pointer and the offset into it that + * it acted on to fullfill the current request, + * whether a valid 'hint' was passed in or not. */ - if ((m = m_copym_with_hdrs(so->so_snd.sb_mb, off, len, M_DONTWAIT, &m_lastm, &m_off)) == NULL) { + if ((m = m_copym_with_hdrs(so->so_snd.sb_mb, + off, len, M_DONTWAIT, &m_lastm, &m_off, + copymode)) == NULL) { error = ENOBUFS; goto out; } @@ -1453,8 +1615,12 @@ send: m->m_len = hdrlen; } m->m_pkthdr.rcvif = 0; +#if MPTCP + /* Before opt is copied to the mbuf, set the csum field */ + mptcp_output_csum(tp, m, len, hdrlen, dss_val, sseqp); +#endif /* MPTCP */ #if CONFIG_MACF_NET - mac_mbuf_label_associate_inpcb(tp->t_inpcb, m); + mac_mbuf_label_associate_inpcb(inp, m); #endif #if INET6 if (isipv6) { @@ -1534,12 +1700,14 @@ send: #if TRAFFIC_MGT if (tcp_recv_bg == 1 || IS_TCP_RECV_BG(so)) { - if (tp->acc_iaj > tcp_acc_iaj_react_limit) { - uint32_t min_iaj_win = tcp_min_iaj_win * tp->t_maxseg; + if (tcp_recv_throttle(tp)) { + uint32_t min_iaj_win = + tcp_min_iaj_win * tp->t_maxseg; if (tp->iaj_rwintop == 0 || SEQ_LT(tp->iaj_rwintop, tp->rcv_adv)) tp->iaj_rwintop = tp->rcv_adv; - if (SEQ_LT(tp->iaj_rwintop, tp->rcv_nxt + min_iaj_win)) + if (SEQ_LT(tp->iaj_rwintop, + tp->rcv_nxt + min_iaj_win)) tp->iaj_rwintop = tp->rcv_nxt + min_iaj_win; recwin = min(tp->iaj_rwintop - tp->rcv_nxt, recwin); } @@ -1565,7 +1733,7 @@ send: if (SEQ_GT(tp->snd_up, tp->snd_nxt)) { th->th_urp = htons((u_short)(tp->snd_up - tp->snd_nxt)); th->th_flags |= TH_URG; - } else + } else { /* * If no urgent pointer to send, then we pull * the urgent pointer to the left edge of the send window @@ -1573,6 +1741,7 @@ send: * number wraparound. */ tp->snd_up = tp->snd_una; /* drag it along */ + } /* * Put TCP length in extended header, and then @@ -1607,16 +1776,16 @@ send: */ if (tso) { #if INET6 - if (isipv6) - m->m_pkthdr.csum_flags = CSUM_TSO_IPV6; + if (isipv6) + m->m_pkthdr.csum_flags |= CSUM_TSO_IPV6; else #endif /* INET6 */ - m->m_pkthdr.csum_flags = CSUM_TSO_IPV4; + m->m_pkthdr.csum_flags |= CSUM_TSO_IPV4; m->m_pkthdr.tso_segsz = tp->t_maxopd - optlen; - } - else + } else { m->m_pkthdr.tso_segsz = 0; + } /* * In transmit state, time the transmission and arrange for @@ -1667,7 +1836,7 @@ timer: if (tp->t_timer[TCPT_PERSIST]) { tp->t_timer[TCPT_PERSIST] = 0; tp->t_rxtshift = 0; - tp->rxt_start = 0; + tp->t_rxtstart = 0; tp->t_persist_stop = 0; } tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur); @@ -1702,7 +1871,7 @@ timer: * to handle ttl and tos; we could keep them in * the template, but need a way to checksum without them. */ -#ifdef INET6 +#if INET6 /* * m->m_pkthdr.len should have been set before cksum calcuration, * because in6_cksum() need it. @@ -1714,28 +1883,25 @@ timer: * Also, desired default hop limit might be changed via * Neighbor Discovery. */ - ip6->ip6_hlim = in6_selecthlim(tp->t_inpcb, - tp->t_inpcb->in6p_route.ro_rt ? - tp->t_inpcb->in6p_route.ro_rt->rt_ifp - : NULL); + ip6->ip6_hlim = in6_selecthlim(inp, inp->in6p_route.ro_rt ? + inp->in6p_route.ro_rt->rt_ifp : NULL); /* TODO: IPv6 IP6TOS_ECT bit on */ KERNEL_DEBUG(DBG_LAYER_BEG, - ((tp->t_inpcb->inp_fport << 16) | tp->t_inpcb->inp_lport), - (((tp->t_inpcb->in6p_laddr.s6_addr16[0] & 0xffff) << 16) | - (tp->t_inpcb->in6p_faddr.s6_addr16[0] & 0xffff)), + ((inp->inp_fport << 16) | inp->inp_lport), + (((inp->in6p_laddr.s6_addr16[0] & 0xffff) << 16) | + (inp->in6p_faddr.s6_addr16[0] & 0xffff)), sendalot,0,0); } else #endif /* INET6 */ { ip->ip_len = m->m_pkthdr.len; - ip->ip_ttl = tp->t_inpcb->inp_ip_ttl; /* XXX */ - ip->ip_tos |= (tp->t_inpcb->inp_ip_tos & ~IPTOS_ECN_MASK);/* XXX */ + ip->ip_ttl = inp->inp_ip_ttl; /* XXX */ + ip->ip_tos |= (inp->inp_ip_tos & ~IPTOS_ECN_MASK);/* XXX */ KERNEL_DEBUG(DBG_LAYER_BEG, - ((tp->t_inpcb->inp_fport << 16) | tp->t_inpcb->inp_lport), - (((tp->t_inpcb->inp_laddr.s_addr & 0xffff) << 16) | - (tp->t_inpcb->inp_faddr.s_addr & 0xffff)), - 0,0,0); + ((inp->inp_fport << 16) | inp->inp_lport), + (((inp->inp_laddr.s_addr & 0xffff) << 16) | + (inp->inp_faddr.s_addr & 0xffff)), 0,0,0); } /* @@ -1747,9 +1913,9 @@ timer: * 4) the MTU is not locked (if it is, then discovery has been * disabled for that route) */ -#ifdef INET6 +#if INET6 if (!isipv6) -#endif +#endif /* INET6 */ if (path_mtu_discovery && (tp->t_flags & TF_PMTUD)) ip->ip_off |= IP_DF; @@ -1762,20 +1928,25 @@ timer: * The socket is kept locked while sending out packets in ip_output, even if packet chaining is not active. */ lost = 0; - m->m_pkthdr.socket_id = socket_id; /* * Embed the flow hash in pkt hdr and mark the packet as * capable of flow controlling */ - m->m_pkthdr.m_flowhash = tp->t_inpcb->inp_flowhash; - m->m_pkthdr.m_fhflags |= - (PF_TAG_TCP | PF_TAG_FLOWHASH | PF_TAG_FLOWADV); + m->m_pkthdr.pkt_flowsrc = FLOWSRC_INPCB; + m->m_pkthdr.pkt_flowid = inp->inp_flowhash; + m->m_pkthdr.pkt_flags |= PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC; +#if MPTCP + /* Disable flow advisory when using MPTCP. */ + if (!(tp->t_mpflags & TMPF_MPTCP_TRUE)) +#endif /* MPTCP */ + m->m_pkthdr.pkt_flags |= PKTF_FLOW_ADV; + m->m_pkthdr.pkt_proto = IPPROTO_TCP; m->m_nextpkt = NULL; - if (tp->t_inpcb->inp_last_outifp != NULL && - tp->t_inpcb->inp_last_outifp != lo_ifp) { + if (inp->inp_last_outifp != NULL && + !(inp->inp_last_outifp->if_flags & IFF_LOOPBACK)) { /* Hint to prioritize this packet if * 1. if the packet has no data * 2. the interface supports transmit-start model and did @@ -1784,7 +1955,7 @@ timer: * 4. there is no outstanding data on this connection. */ if (tcp_prioritize_acks != 0 && len == 0 && - (tp->t_inpcb->inp_last_outifp->if_eflags & + (inp->inp_last_outifp->if_eflags & (IFEF_TXSTART | IFEF_NOACKPRI)) == IFEF_TXSTART && th->th_flags == TH_ACK && tp->snd_una == tp->snd_max && tp->t_timer[TCPT_REXMT] == 0) { @@ -1796,15 +1967,15 @@ timer: tp->t_pktlist_sentlen += len; tp->t_lastchain++; -#ifdef INET6 +#if INET6 if (isipv6) { - DTRACE_TCP5(send, struct mbuf *, m, struct inpcb *, tp->t_inpcb, + DTRACE_TCP5(send, struct mbuf *, m, struct inpcb *, inp, struct ip6 *, ip6, struct tcpcb *, tp, struct tcphdr *, th); } else -#endif +#endif /* INET6 */ { - DTRACE_TCP5(send, struct mbuf *, m, struct inpcb *, tp->t_inpcb, + DTRACE_TCP5(send, struct mbuf *, m, struct inpcb *, inp, struct ip *, ip, struct tcpcb *, tp, struct tcphdr *, th); } @@ -1845,7 +2016,13 @@ timer: (tp->t_flags & (TH_PUSH | TF_ACKNOW)) || tp->t_force != 0 || tp->t_lastchain >= tcp_packet_chaining) { error = 0; - while (tp->t_inpcb->inp_sndinprog_cnt == 0 && + + /* + * Reset the stack memory of offset as the socket + * may get unlocked + */ + m_lastm = NULL; + while (inp->inp_sndinprog_cnt == 0 && tp->t_pktlist_head != NULL) { packetlist = tp->t_pktlist_head; packchain_listadd = tp->t_lastchain; @@ -1857,11 +2034,11 @@ timer: packchain_listadd, tp_inp_options, (so_options & SO_DONTROUTE), (sack_rxmit | (sack_bytes_rxmt != 0)), recwin, -#ifdef INET6 +#if INET6 isipv6); -#else +#else /* INET6 */ 0); -#endif +#endif /* !INET6 */ if (error) { /* @@ -1877,7 +2054,7 @@ timer: } } /* tcp was closed while we were in ip; resume close */ - if (tp->t_inpcb->inp_sndinprog_cnt == 0 && + if (inp->inp_sndinprog_cnt == 0 && (tp->t_flags & TF_CLOSING)) { tp->t_flags &= ~TF_CLOSING; (void) tcp_close(tp); @@ -1905,10 +2082,21 @@ timer: */ if ((flags & TH_SYN) == 0) { if (sack_rxmit) { - p->rxmit -= lost; + if (SEQ_GT((p->rxmit - lost), + tp->snd_una)) { + p->rxmit -= lost; + } else { + lost = p->rxmit - tp->snd_una; + p->rxmit = tp->snd_una; + } tp->sackhint.sack_bytes_rexmit -= lost; - } else - tp->snd_nxt -= lost; + } else { + if (SEQ_GT((tp->snd_nxt - lost), + tp->snd_una)) + tp->snd_nxt -= lost; + else + tp->snd_nxt = tp->snd_una; + } } } out: @@ -1928,7 +2116,7 @@ out: tcp_check_timer_state(tp); KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0); - DTRACE_TCP5(cc, void, NULL, struct inpcb *, tp->t_inpcb, + DTRACE_TCP5(cc, void, NULL, struct inpcb *, inp, struct tcpcb *, tp, struct tcphdr *, NULL, int32_t, TCP_CC_OUTPUT_ERROR); return (0); @@ -1949,17 +2137,25 @@ out: if (tso) tp->t_flags &= ~TF_TSO; - tcp_mtudisc(tp->t_inpcb, 0); + tcp_mtudisc(inp, 0); tcp_check_timer_state(tp); KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0); return 0; } - if ((error == EHOSTUNREACH || error == ENETDOWN) - && TCPS_HAVERCVDSYN(tp->t_state)) { + /* + * Unless this is due to interface restriction policy, + * treat EHOSTUNREACH/ENETDOWN as a soft error. + */ + if ((error == EHOSTUNREACH || error == ENETDOWN) && + TCPS_HAVERCVDSYN(tp->t_state) && + !((inp->inp_flags & INP_NO_IFT_CELLULAR) && + inp->inp_last_outifp != NULL && + IFNET_IS_CELLULAR(inp->inp_last_outifp))) { tp->t_softerror = error; tcp_check_timer_state(tp); - KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0); + KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, + 0, 0, 0, 0, 0); return (0); } tcp_check_timer_state(tp); @@ -1985,29 +2181,30 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt, int error = 0; boolean_t chain; boolean_t unlocked = FALSE; + boolean_t ifdenied = FALSE; struct inpcb *inp = tp->t_inpcb; struct ip_out_args ipoa = - { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR }; + { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF|IPOAF_BOUND_SRCADDR, 0 }; struct route ro; struct ifnet *outif = NULL; -#ifdef INET6 +#if INET6 struct ip6_out_args ip6oa = - { IFSCOPE_NONE, { 0 }, IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR }; + { IFSCOPE_NONE, { 0 }, IP6OAF_SELECT_SRCIF|IP6OAF_BOUND_SRCADDR, 0 }; struct route_in6 ro6; struct flowadv *adv = (isipv6 ? &ip6oa.ip6oa_flowadv : &ipoa.ipoa_flowadv); -#else +#else /* INET6 */ struct flowadv *adv = &ipoa.ipoa_flowadv; #endif /* !INET6 */ /* If socket was bound to an ifindex, tell ip_output about it */ if (inp->inp_flags & INP_BOUND_IF) { -#ifdef INET6 +#if INET6 if (isipv6) { ip6oa.ip6oa_boundif = inp->inp_boundifp->if_index; ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF; } else -#endif +#endif /* INET6 */ { ipoa.ipoa_boundif = inp->inp_boundifp->if_index; ipoa.ipoa_flags |= IPOAF_BOUND_IF; @@ -2015,26 +2212,26 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt, } if (inp->inp_flags & INP_NO_IFT_CELLULAR) { -#ifdef INET6 +#if INET6 if (isipv6) ip6oa.ip6oa_flags |= IP6OAF_NO_CELLULAR; else -#endif +#endif /* INET6 */ ipoa.ipoa_flags |= IPOAF_NO_CELLULAR; } -#ifdef INET6 +#if INET6 if (isipv6) flags |= IPV6_OUTARGS; else -#endif +#endif /* INET6 */ flags |= IP_OUTARGS; /* Copy the cached route and take an extra reference */ -#ifdef INET6 +#if INET6 if (isipv6) in6p_route_copyout(inp, &ro6); else -#endif +#endif /* INET6 */ inp_route_copyout(inp, &ro); /* @@ -2106,15 +2303,18 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt, */ cnt = 0; } -#ifdef INET6 - if (isipv6) +#if INET6 + if (isipv6) { error = ip6_output_list(pkt, cnt, inp->in6p_outputopts, &ro6, flags, NULL, NULL, &ip6oa); - else -#endif + ifdenied = (ip6oa.ip6oa_retflags & IP6OARF_IFDENIED); + } else { +#endif /* INET6 */ error = ip_output_list(pkt, cnt, opt, &ro, flags, NULL, &ipoa); + ifdenied = (ipoa.ipoa_retflags & IPOARF_IFDENIED); + } if (chain || error) { /* @@ -2165,28 +2365,27 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt, if ( --inp->inp_sndinprog_cnt == 0) inp->inp_flags &= ~(INP_FC_FEEDBACK); -#ifdef INET6 +#if INET6 if (isipv6) { if (ro6.ro_rt != NULL && (outif = ro6.ro_rt->rt_ifp) != inp->in6p_last_outifp) inp->in6p_last_outifp = outif; } else -#endif +#endif /* INET6 */ if (ro.ro_rt != NULL && (outif = ro.ro_rt->rt_ifp) != inp->inp_last_outifp) inp->inp_last_outifp = outif; - if ((inp->inp_flags & INP_NO_IFT_CELLULAR) && outif != NULL && - outif->if_type == IFT_CELLULAR) + if (error != 0 && ifdenied && (inp->inp_flags & INP_NO_IFT_CELLULAR)) soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED|SO_FILT_HINT_IFDENIED)); /* Synchronize cached PCB route & options */ -#ifdef INET6 +#if INET6 if (isipv6) in6p_route_copyin(inp, &ro6); else -#endif +#endif /* INET6 */ inp_route_copyin(inp, &ro); if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift == 0 && @@ -2241,12 +2440,16 @@ tcp_send_lroacks(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th) int count = 0; tcp_seq org_ack = ntohl(th->th_ack); tcp_seq prev_ack = 0; - int tack_offset = 28; /* XXX IPv6 not supported */ + int tack_offset = 28; /* XXX IPv6 and IP options not supported */ + int twin_offset = 34; /* XXX IPv6 and IP options not supported */ int ack_size = (tp->t_flags & TF_STRETCHACK) ? (maxseg_unacked * tp->t_maxseg) : (tp->t_maxseg << 1); int segs_acked = (tp->t_flags & TF_STRETCHACK) ? maxseg_unacked : 2; struct mbuf *prev_ack_pkt = NULL; struct socket *so = tp->t_inpcb->inp_socket; + unsigned short winsz = ntohs(th->th_win); + unsigned int scaled_win = winsz<rcv_scale; + tcp_seq win_rtedge = org_ack + scaled_win; count = tp->t_lropktlen/tp->t_maxseg; @@ -2255,6 +2458,17 @@ tcp_send_lroacks(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th) ack_chain = m_dup(m, M_DONTWAIT); if (ack_chain) { th->th_ack = htonl(prev_ack); + /* Keep adv window constant for duplicated ACK packets */ + scaled_win = win_rtedge - prev_ack; + if (scaled_win > (int32_t)(TCP_MAXWIN << tp->rcv_scale)) + scaled_win = (int32_t)(TCP_MAXWIN << tp->rcv_scale); + th->th_win = htons(scaled_win>>tp->rcv_scale); + if (lrodebug == 5) { + printf("%s: win = %d winsz = %d sc = %d" + " lro_len %d %d\n", + __func__, scaled_win>>tp->rcv_scale, winsz, + tp->rcv_scale, tp->t_lropktlen, count); + } tail = ack_chain; count -= segs_acked; /* accounts for prev_ack packet */ count = (count <= segs_acked) ? 0 : count - segs_acked; @@ -2263,14 +2477,14 @@ tcp_send_lroacks(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th) } else { return NULL; } - } + } else { tp->t_lropktlen = 0; return NULL; } prev_ack_pkt = ack_chain; - + while (count > 0) { if ((prev_ack + ack_size) < org_ack) { prev_ack += ack_size; @@ -2283,6 +2497,17 @@ tcp_send_lroacks(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th) } mnext = m_dup(prev_ack_pkt, M_DONTWAIT); if (mnext) { + /* Keep adv window constant for duplicated ACK packets */ + scaled_win = win_rtedge - prev_ack; + if (scaled_win > (int32_t)(TCP_MAXWIN << tp->rcv_scale)) + scaled_win = (int32_t)(TCP_MAXWIN << tp->rcv_scale); + winsz = htons(scaled_win>>tp->rcv_scale); + if (lrodebug == 5) { + printf("%s: winsz = %d ack %x count %d\n", + __func__, scaled_win>>tp->rcv_scale, + prev_ack, count); + } + bcopy(&winsz, mtod(prev_ack_pkt, caddr_t) + twin_offset, 2); HTONL(prev_ack); bcopy(&prev_ack, mtod(prev_ack_pkt, caddr_t) + tack_offset, 4); NTOHL(prev_ack); @@ -2291,11 +2516,6 @@ tcp_send_lroacks(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th) count -= segs_acked; tcpstat.tcps_sndacks++; so_tc_update_stats(m, so, m_get_service_class(m)); - if (lrodebug == 5) { - printf("%s: lropktlen = %d count = %d, th_ack = %x \n", - __func__, tp->t_lropktlen, count, - th->th_ack); - } } else { if (lrodebug == 5) { printf("%s: failed to alloc mbuf.\n", __func__); @@ -2303,7 +2523,69 @@ tcp_send_lroacks(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th) break; } prev_ack_pkt = mnext; - } + } tp->t_lropktlen = 0; return ack_chain; } + +static int +tcp_recv_throttle (struct tcpcb *tp) +{ + uint32_t base_rtt, newsize; + int32_t qdelay; + struct sockbuf *sbrcv = &tp->t_inpcb->inp_socket->so_rcv; + + if (tcp_use_rtt_recvbg == 1 && + TSTMP_SUPPORTED(tp)) { + /* + * Timestamps are supported on this connection. Use + * RTT to look for an increase in latency. + */ + + /* + * If the connection is already being throttled, leave it + * in that state until rtt comes closer to base rtt + */ + if (tp->t_flagsext & TF_RECV_THROTTLE) + return (1); + + base_rtt = get_base_rtt(tp); + + if (base_rtt != 0 && tp->t_rttcur != 0) { + qdelay = tp->t_rttcur - base_rtt; + /* + * if latency increased on a background flow, + * return 1 to start throttling. + */ + if (qdelay > target_qdelay) { + tp->t_flagsext |= TF_RECV_THROTTLE; + + /* + * Reduce the recv socket buffer size to + * minimize latecy. + */ + if (sbrcv->sb_idealsize > + tcp_recv_throttle_minwin) { + newsize = sbrcv->sb_idealsize >> 1; + /* Set a minimum of 16 K */ + newsize = + max(newsize, + tcp_recv_throttle_minwin); + sbrcv->sb_idealsize = newsize; + } + return (1); + } else { + return (0); + } + } + } + + /* + * Timestamps are not supported or there is no good RTT + * measurement. Use IPDV in this case. + */ + if (tp->acc_iaj > tcp_acc_iaj_react_limit) + return (1); + + return (0); +} diff --git a/bsd/netinet/tcp_sack.c b/bsd/netinet/tcp_sack.c index 69fb8a7d0..4ca79aeac 100644 --- a/bsd/netinet/tcp_sack.c +++ b/bsd/netinet/tcp_sack.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004,2007 Apple Inc. All rights reserved. + * Copyright (c) 2004-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -318,7 +318,8 @@ tcp_sackhole_remove(struct tcpcb *tp, struct sackhole *hole) * the sequence space). */ void -tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) +tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack, + u_int32_t *newbytes_acked) { struct sackhole *cur, *temp; struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp; @@ -337,18 +338,18 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) * Append received valid SACK blocks to sack_blocks[]. * Check that the SACK block range is valid. */ - for (i = 0; i < to->to_nsacks; i++) { - bcopy((to->to_sacks + i * TCPOLEN_SACK), - &sack, sizeof(sack)); - sack.start = ntohl(sack.start); - sack.end = ntohl(sack.end); - if (SEQ_GT(sack.end, sack.start) && - SEQ_GT(sack.start, tp->snd_una) && - SEQ_GT(sack.start, th_ack) && - SEQ_LT(sack.start, tp->snd_max) && - SEQ_GT(sack.end, tp->snd_una) && - SEQ_LEQ(sack.end, tp->snd_max)) - sack_blocks[num_sack_blks++] = sack; + for (i = 0; i < to->to_nsacks; i++) { + bcopy((to->to_sacks + i * TCPOLEN_SACK), + &sack, sizeof(sack)); + sack.start = ntohl(sack.start); + sack.end = ntohl(sack.end); + if (SEQ_GT(sack.end, sack.start) && + SEQ_GT(sack.start, tp->snd_una) && + SEQ_GT(sack.start, th_ack) && + SEQ_LT(sack.start, tp->snd_max) && + SEQ_GT(sack.end, tp->snd_una) && + SEQ_LEQ(sack.end, tp->snd_max)) + sack_blocks[num_sack_blks++] = sack; } /* @@ -372,7 +373,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) } } } - if (TAILQ_EMPTY(&tp->snd_holes)) + if (TAILQ_EMPTY(&tp->snd_holes)) { /* * Empty scoreboard. Need to initialize snd_fack (it may be * uninitialized or have a bogus value). Scoreboard holes @@ -380,6 +381,9 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) * the logic that adds holes to the tail of the scoreboard). */ tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack); + *newbytes_acked += (tp->snd_fack - tp->snd_una); + } + /* * In the while-loop below, incoming SACK blocks (sack_blocks[]) * and SACK holes (snd_holes) are traversed from their tails with @@ -403,6 +407,8 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL); if (temp != NULL) { tp->snd_fack = sblkp->end; + *newbytes_acked += (sblkp->end - sblkp->start); + /* Go to the previous sack block. */ sblkp--; } else { @@ -418,12 +424,16 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) SEQ_LT(tp->snd_fack, sblkp->start)) sblkp--; if (sblkp >= sack_blocks && - SEQ_LT(tp->snd_fack, sblkp->end)) + SEQ_LT(tp->snd_fack, sblkp->end)) { + *newbytes_acked += (sblkp->end - tp->snd_fack); tp->snd_fack = sblkp->end; + } } - } else if (SEQ_LT(tp->snd_fack, sblkp->end)) + } else if (SEQ_LT(tp->snd_fack, sblkp->end)) { /* fack is advanced. */ + *newbytes_acked += (sblkp->end - tp->snd_fack); tp->snd_fack = sblkp->end; + } /* We must have at least one SACK hole in scoreboard */ cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole */ /* @@ -452,6 +462,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) /* Data acks at least the beginning of hole */ if (SEQ_GEQ(sblkp->end, cur->end)) { /* Acks entire hole, so delete hole */ + *newbytes_acked += (cur->end - cur->start); temp = cur; cur = TAILQ_PREV(cur, sackhole_head, scblink); tcp_sackhole_remove(tp, temp); @@ -462,6 +473,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) continue; } else { /* Move start of hole forward */ + *newbytes_acked += (sblkp->end - cur->start); cur->start = sblkp->end; cur->rxmit = SEQ_MAX(cur->rxmit, cur->start); } @@ -469,6 +481,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) /* Data acks at least the end of hole */ if (SEQ_GEQ(sblkp->end, cur->end)) { /* Move end of hole backward */ + *newbytes_acked += (cur->end - sblkp->start); cur->end = sblkp->start; cur->rxmit = SEQ_MIN(cur->rxmit, cur->end); } else { @@ -476,6 +489,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) * ACKs some data in middle of a hole; need to * split current hole */ + *newbytes_acked += (sblkp->end - sblkp->start); temp = tcp_sackhole_insert(tp, sblkp->end, cur->end, cur); if (temp != NULL) { @@ -540,7 +554,7 @@ tcp_sack_partialack(tp, th) tp->t_timer[TCPT_REXMT] = 0; tp->t_rtttime = 0; /* send one or 2 segments based on how much new data was acked */ - if (((th->th_ack - tp->snd_una) / tp->t_maxseg) > 2) + if (((BYTES_ACKED(th, tp)) / tp->t_maxseg) > 2) num_segs = 2; tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit + (tp->snd_nxt - tp->sack_newdata) + diff --git a/bsd/netinet/tcp_subr.c b/bsd/netinet/tcp_subr.c index 531b37fa9..c9a7a6bb7 100644 --- a/bsd/netinet/tcp_subr.c +++ b/bsd/netinet/tcp_subr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -86,6 +86,8 @@ #include #include +#include + #include #include @@ -152,6 +154,8 @@ extern int tcp_lq_overflow; #if IPSEC extern int ipsec_bypass; #endif +extern struct tcptimerlist tcp_timer_list; +extern struct tcptailq tcp_tw_tailq; int tcp_mssdflt = TCP_MSS; SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt, CTLFLAG_RW | CTLFLAG_LOCKED, @@ -178,23 +182,6 @@ int tcp_minmss = TCP_MINMSS; SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_minmss , 0, "Minmum TCP Maximum Segment Size"); -/* - * Number of TCP segments per second we accept from remote host - * before we start to calculate average segment size. If average - * segment size drops below the minimum TCP MSS we assume a DoS - * attack and reset+drop the connection. Care has to be taken not to - * set this value too small to not kill interactive type connections - * (telnet, SSH) which send many small packets. - */ -#ifdef FIX_WORKAROUND_FOR_3894301 -__private_extern__ int tcp_minmssoverload = TCP_MINMSSOVERLOAD; -#else -__private_extern__ int tcp_minmssoverload = 0; -#endif -SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmssoverload, CTLFLAG_RW | CTLFLAG_LOCKED, - &tcp_minmssoverload , 0, "Number of TCP Segments per Second allowed to" - "be under the MINMSS Size"); - static int tcp_do_rfc1323 = 1; SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_do_rfc1323 , 0, "Enable rfc1323 (high performance TCP) extensions"); @@ -211,6 +198,10 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW | CTLFLAG_LOCKED, &d SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD | CTLFLAG_LOCKED, &tcbinfo.ipi_count, 0, "Number of active PCBs"); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, tw_pcbcount, + CTLFLAG_RD | CTLFLAG_LOCKED, + &tcbinfo.ipi_twcount, 0, "Number of pcbs in time-wait state"); + static int icmp_may_rst = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_RW | CTLFLAG_LOCKED, &icmp_may_rst, 0, "Certain ICMP unreachable messages may abort connections in SYN_SENT"); @@ -257,7 +248,10 @@ static void tcp_cc_init(void); struct zone *sack_hole_zone; struct zone *tcp_reass_zone; struct zone *tcp_bwmeas_zone; - +#if 0 +static unsigned int tcp_mptcp_dsnm_sz; +struct zone *tcp_mptcp_dsnm_zone; +#endif /* The array containing pointers to currently implemented TCP CC algorithms */ struct tcp_cc_algo* tcp_cc_algo_list[TCP_CC_ALGO_COUNT]; @@ -302,9 +296,6 @@ struct inp_tp { }; #undef ALIGNMENT -extern struct inpcbhead time_wait_slots[]; -extern struct tcptimerlist tcp_timer_list; - int get_inpcb_str_size(void); int get_tcp_str_size(void); @@ -345,18 +336,26 @@ tcp_cc_init(void) * Tcp initialization */ void -tcp_init() +tcp_init(struct protosw *pp, struct domain *dp) { +#pragma unused(dp) + static int tcp_initialized = 0; vm_size_t str_size; - int i; - struct inpcbinfo *pcbinfo; - + struct inpcbinfo *pcbinfo; + + VERIFY((pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED); + + if (tcp_initialized) + return; + tcp_initialized = 1; + tcp_ccgen = 1; tcp_cleartaocache(); tcp_keepinit = TCPTV_KEEP_INIT; tcp_keepidle = TCPTV_KEEP_IDLE; tcp_keepintvl = TCPTV_KEEPINTVL; + tcp_keepcnt = TCPTV_KEEPCNT; tcp_maxpersistidle = TCPTV_KEEP_IDLE; tcp_msl = TCPTV_MSL; @@ -365,21 +364,41 @@ tcp_init() tcp_now = tcp_now & 0x3fffffff; /* Starts tcp internal clock at a random value */ LIST_INIT(&tcb); - tcbinfo.listhead = &tcb; + tcbinfo.ipi_listhead = &tcb; + pcbinfo = &tcbinfo; + /* + * allocate lock group attribute and group for tcp pcb mutexes + */ + pcbinfo->ipi_lock_grp_attr = lck_grp_attr_alloc_init(); + pcbinfo->ipi_lock_grp = lck_grp_alloc_init("tcppcb", pcbinfo->ipi_lock_grp_attr); + + /* + * allocate the lock attribute for tcp pcb mutexes + */ + pcbinfo->ipi_lock_attr = lck_attr_alloc_init(); + + if ((pcbinfo->ipi_lock = lck_rw_alloc_init(pcbinfo->ipi_lock_grp, + pcbinfo->ipi_lock_attr)) == NULL) { + panic("%s: unable to allocate PCB lock\n", __func__); + /* NOTREACHED */ + } + if (!powerof2(tcp_tcbhashsize)) { printf("WARNING: TCB hash size not a power of 2\n"); tcp_tcbhashsize = 512; /* safe default */ } - tcbinfo.hashsize = tcp_tcbhashsize; - tcbinfo.hashbase = hashinit(tcp_tcbhashsize, M_PCB, &tcbinfo.hashmask); - tcbinfo.porthashbase = hashinit(tcp_tcbhashsize, M_PCB, - &tcbinfo.porthashmask); + tcbinfo.ipi_hashbase = hashinit(tcp_tcbhashsize, M_PCB, &tcbinfo.ipi_hashmask); + tcbinfo.ipi_porthashbase = hashinit(tcp_tcbhashsize, M_PCB, + &tcbinfo.ipi_porthashmask); str_size = P2ROUNDUP(sizeof(struct inp_tp), sizeof(u_int64_t)); - tcbinfo.ipi_zone = (void *) zinit(str_size, 120000*str_size, 8192, "tcpcb"); + tcbinfo.ipi_zone = zinit(str_size, 120000*str_size, 8192, "tcpcb"); zone_change(tcbinfo.ipi_zone, Z_CALLERACCT, FALSE); zone_change(tcbinfo.ipi_zone, Z_EXPAND, TRUE); + tcbinfo.ipi_gc = tcp_gc; + in_pcbinfo_attach(&tcbinfo); + str_size = P2ROUNDUP(sizeof(struct sackhole), sizeof(u_int64_t)); sack_hole_zone = zinit(str_size, 120000*str_size, 8192, "sack_hole zone"); zone_change(sack_hole_zone, Z_CALLERACCT, FALSE); @@ -418,25 +437,8 @@ tcp_init() panic("tcp_init"); #undef TCP_MINPROTOHDR - /* - * allocate lock group attribute and group for tcp pcb mutexes - */ - pcbinfo->mtx_grp_attr = lck_grp_attr_alloc_init(); - pcbinfo->mtx_grp = lck_grp_alloc_init("tcppcb", pcbinfo->mtx_grp_attr); - - /* - * allocate the lock attribute for tcp pcb mutexes - */ - pcbinfo->mtx_attr = lck_attr_alloc_init(); - - if ((pcbinfo->mtx = lck_rw_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr)) == NULL) { - printf("tcp_init: mutex not alloced!\n"); - return; /* pretty much dead if this fails... */ - } - - for (i=0; i < N_TIME_WAIT_SLOTS; i++) { - LIST_INIT(&time_wait_slots[i]); - } + /* Initialize time wait and timer lists */ + TAILQ_INIT(&tcp_tw_tailq); bzero(&tcp_timer_list, sizeof(tcp_timer_list)); LIST_INIT(&tcp_timer_list.lhead); @@ -490,16 +492,15 @@ tcp_fillheaders(tp, ip_ptr, tcp_ptr) ip6 = (struct ip6_hdr *)ip_ptr; ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | - (inp->in6p_flowinfo & IPV6_FLOWINFO_MASK); + (inp->inp_flow & IPV6_FLOWINFO_MASK); ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | (IPV6_VERSION & IPV6_VERSION_MASK); ip6->ip6_nxt = IPPROTO_TCP; ip6->ip6_plen = sizeof(struct tcphdr); ip6->ip6_src = inp->in6p_laddr; ip6->ip6_dst = inp->in6p_faddr; - tcp_hdr->th_sum = in6_cksum_phdr(&inp->in6p_laddr, - &inp->in6p_faddr, htonl(sizeof(struct tcphdr)), - htonl(IPPROTO_TCP)); + tcp_hdr->th_sum = in6_pseudo(&inp->in6p_laddr, &inp->in6p_faddr, + htonl(sizeof (struct tcphdr) + IPPROTO_TCP)); } else #endif { @@ -631,6 +632,8 @@ tcp_respond( m->m_data += max_linkhdr; #if INET6 if (isipv6) { + VERIFY((MHLEN - max_linkhdr) >= + (sizeof (*ip6) + sizeof (*nth))); bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(struct ip6_hdr)); ip6 = mtod(m, struct ip6_hdr *); @@ -638,11 +641,18 @@ tcp_respond( } else #endif /* INET6 */ { + VERIFY((MHLEN - max_linkhdr) >= + (sizeof (*ip) + sizeof (*nth))); bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); ip = mtod(m, struct ip *); nth = (struct tcphdr *)(void *)(ip + 1); } bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr)); +#if MPTCP + if ((tp) && (tp->t_mpflags & TMPF_RESET)) + flags = (TH_RST | TH_ACK); + else +#endif flags = TH_ACK; } else { m_freem(m->m_next); @@ -721,9 +731,8 @@ tcp_respond( #if INET6 if (isipv6) { nth->th_sum = 0; - nth->th_sum = in6_cksum_phdr(&ip6->ip6_src, - &ip6->ip6_dst, htons((u_short)(tlen - sizeof(struct ip6_hdr))), - htonl(IPPROTO_TCP)); + nth->th_sum = in6_pseudo(&ip6->ip6_src, &ip6->ip6_dst, + htonl((tlen - sizeof (struct ip6_hdr)) + IPPROTO_TCP)); m->m_pkthdr.csum_flags = CSUM_TCPIPV6; m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); ip6->ip6_hlim = in6_selecthlim(tp ? tp->t_inpcb : NULL, @@ -758,15 +767,21 @@ tcp_respond( MBUF_SC_UNSPEC, svc_flags); /* Embed flowhash and flow control flags */ - m->m_pkthdr.m_flowhash = tp->t_inpcb->inp_flowhash; - m->m_pkthdr.m_fhflags |= - (PF_TAG_TCP | PF_TAG_FLOWHASH | PF_TAG_FLOWADV); + m->m_pkthdr.pkt_flowsrc = FLOWSRC_INPCB; + m->m_pkthdr.pkt_flowid = tp->t_inpcb->inp_flowhash; + m->m_pkthdr.pkt_flags |= PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC; +#if MPTCP + /* Disable flow advisory when using MPTCP. */ + if (!(tp->t_mpflags & TMPF_MPTCP_TRUE)) +#endif /* MPTCP */ + m->m_pkthdr.pkt_flags |= PKTF_FLOW_ADV; + m->m_pkthdr.pkt_proto = IPPROTO_TCP; } #if INET6 if (isipv6) { struct ip6_out_args ip6oa = { ifscope, { 0 }, - IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR }; + IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR, 0 }; if (ifscope != IFSCOPE_NONE) ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF; @@ -775,20 +790,19 @@ tcp_respond( (void) ip6_output(m, NULL, ro6, IPV6_OUTARGS, NULL, NULL, &ip6oa); - if (ro6->ro_rt != NULL) { - if (ro6 == &sro6) { - rtfree(ro6->ro_rt); - ro6->ro_rt = NULL; - } else if ((outif = ro6->ro_rt->rt_ifp) != - tp->t_inpcb->in6p_last_outifp) { - tp->t_inpcb->in6p_last_outifp = outif; - } - } + + if (tp != NULL && ro6 != NULL && ro6->ro_rt != NULL && + (outif = ro6->ro_rt->rt_ifp) != + tp->t_inpcb->in6p_last_outifp) + tp->t_inpcb->in6p_last_outifp = outif; + + if (ro6 == &sro6) + ROUTE_RELEASE(ro6); } else #endif /* INET6 */ { struct ip_out_args ipoa = { ifscope, { 0 }, - IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR }; + IPOAF_SELECT_SRCIF | IPOAF_BOUND_SRCADDR, 0 }; if (ifscope != IFSCOPE_NONE) ipoa.ipoa_flags |= IPOAF_BOUND_IF; @@ -804,15 +818,16 @@ tcp_respond( */ (void) ip_output(m, NULL, &sro, IP_OUTARGS, NULL, &ipoa); + if (tp != NULL && sro.ro_rt != NULL && + (outif = sro.ro_rt->rt_ifp) != + tp->t_inpcb->inp_last_outifp) + tp->t_inpcb->inp_last_outifp = outif; + if (ro != &sro) { - if (sro.ro_rt != NULL && - (outif = sro.ro_rt->rt_ifp) != - tp->t_inpcb->inp_last_outifp) - tp->t_inpcb->inp_last_outifp = outif; /* Synchronize cached PCB route */ inp_route_copyin(tp->t_inpcb, &sro); - } else if (sro.ro_rt != NULL) { - rtfree(sro.ro_rt); + } else { + ROUTE_RELEASE(&sro); } } } @@ -836,12 +851,12 @@ tcp_newtcpcb(inp) calculate_tcp_clock(); - if (so->cached_in_sock_layer == 0) { + if (!so->cached_in_sock_layer) { it = (struct inp_tp *)(void *)inp; tp = &it->tcb; - } - else + } else { tp = (struct tcpcb *)(void *)inp->inp_saved_ppcb; + } bzero((char *) tp, sizeof(struct tcpcb)); LIST_INIT(&tp->t_segq); @@ -853,7 +868,9 @@ tcp_newtcpcb(inp) if (tcp_do_rfc1323) tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP); - tp->sack_enable = tcp_do_sack; + if (tcp_do_sack) + tp->t_flagsext |= TF_SACK_ENABLE; + TAILQ_INIT(&tp->snd_holes); tp->t_inpcb = inp; /* XXX */ /* @@ -882,6 +899,12 @@ tcp_newtcpcb(inp) tp->t_persist_timeout = tcp_max_persist_timeout; tp->t_persist_stop = 0; tp->t_flagsext |= TF_RCVUNACK_WAITSS; + tp->t_rexmtthresh = tcprexmtthresh; + + /* Clear time wait tailq entry */ + tp->t_twentry.tqe_next = NULL; + tp->t_twentry.tqe_prev = NULL; + /* * IPv4 TTL initialization is necessary for an IPv6 socket as well, * because the socket may be bound to an IPv6 wildcard address, @@ -969,10 +992,12 @@ tcp_close(tp) #if INET6 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ + struct route *ro; struct rtentry *rt; int dosavessthresh; - if ( inp->inp_ppcb == NULL) /* tcp_close was called previously, bail */ + /* tcp_close was called previously, bail */ + if ( inp->inp_ppcb == NULL) return(NULL); tcp_canceltimers(tp); @@ -996,15 +1021,19 @@ tcp_close(tp) return (NULL); } + DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp, + struct tcpcb *, tp, int32_t, TCPS_CLOSED); + if (CC_ALGO(tp)->cleanup != NULL) { CC_ALGO(tp)->cleanup(tp); } #if INET6 - rt = isipv6 ? inp->in6p_route.ro_rt : inp->inp_route.ro_rt; + ro = (isipv6 ? (struct route *)&inp->in6p_route : &inp->inp_route); #else - rt = inp->inp_route.ro_rt; + ro = &inp->inp_route; #endif + rt = ro->ro_rt; if (rt != NULL) RT_LOCK_SPIN(rt); @@ -1034,12 +1063,13 @@ tcp_close(tp) } else #endif /* INET6 */ - if (rt == NULL || !(rt->rt_flags & RTF_UP) || - ((struct sockaddr_in *)(void *)rt_key(rt))->sin_addr.s_addr == - INADDR_ANY || rt->generation_id != route_generation) { + if (ROUTE_UNUSABLE(ro) || + SIN(rt_key(rt))->sin_addr.s_addr == INADDR_ANY) { if (tp->t_state >= TCPS_CLOSE_WAIT) { - DTRACE_TCP4(state__change, void, NULL, struct inpcb *, inp, - struct tcpcb *, tp, int32_t, TCPS_CLOSING); + DTRACE_TCP4(state__change, + void, NULL, struct inpcb *, inp, + struct tcpcb *, tp, int32_t, + TCPS_CLOSING); tp->t_state = TCPS_CLOSING; } goto no_valid_rt; @@ -1147,10 +1177,14 @@ no_valid_rt: m_freem_list(tp->t_pktlist_head); TCP_PKTLIST_CLEAR(tp); -#ifdef __APPLE__ +#if MPTCP + /* Clear MPTCP state */ + tp->t_mpflags = 0; +#endif /* MPTCP */ + if (so->cached_in_sock_layer) inp->inp_saved_ppcb = (caddr_t) tp; -#endif + /* Issue a wakeup before detach so that we don't miss * a wakeup */ @@ -1165,9 +1199,9 @@ no_valid_rt: inp->inp_fport); tp->t_flagsext &= ~TF_LRO_OFFLOADED; } - + tp->t_state = TCPS_CLOSED; #if INET6 - if (INP_CHECK_SOCKAF(so, AF_INET6)) + if (SOCK_CHECK_DOM(so, PF_INET6)) in6_pcbdetach(inp); else #endif /* INET6 */ @@ -1203,34 +1237,32 @@ tcp_drain() { if (do_tcpdrain) { - struct inpcb *inpb; - struct tcpcb *tcpb; - struct tseg_qent *te; - + struct inpcb *inp; + struct tcpcb *tp; /* * Walk the tcpbs, if existing, and flush the reassembly queue, * if there is one... - * XXX: The "Net/3" implementation doesn't imply that the TCP - * reassembly queue should be flushed, but in a situation - * where we're really low on mbufs, this is potentially - * usefull. + * Do it next time if the pcbinfo lock is in use */ - if (!lck_rw_try_lock_exclusive(tcbinfo.mtx)) /* do it next time if the lock is in use */ + if (!lck_rw_try_lock_exclusive(tcbinfo.ipi_lock)) return; - for (inpb = LIST_FIRST(tcbinfo.listhead); inpb; - inpb = LIST_NEXT(inpb, inp_list)) { - if ((tcpb = intotcpcb(inpb))) { - while ((te = LIST_FIRST(&tcpb->t_segq)) - != NULL) { - LIST_REMOVE(te, tqe_q); - m_freem(te->tqe_m); - zfree(tcp_reass_zone, te); - tcp_reass_qsize--; - } + LIST_FOREACH(inp, tcbinfo.ipi_listhead, inp_list) { + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != + WNT_STOPUSING) { + tcp_lock(inp->inp_socket, 1, 0); + if (in_pcb_checkstate(inp, WNT_RELEASE, 1) + == WNT_STOPUSING) { + /* lost a race, try the next one */ + tcp_unlock(inp->inp_socket, 1, 0); + continue; + } + tp = intotcpcb(inp); + tcp_freeq(tp); + tcp_unlock(inp->inp_socket, 1, 0); } } - lck_rw_done(tcbinfo.mtx); + lck_rw_done(tcbinfo.ipi_lock); } } @@ -1362,34 +1394,34 @@ tcpcb_to_otcpcb(struct tcpcb *tp, struct otcpcb *otp) otp->snd_recover = tp->snd_recover; otp->snd_cwnd_prev = tp->snd_cwnd_prev; otp->snd_ssthresh_prev = tp->snd_ssthresh_prev; - otp->t_badrxtwin = tp->t_badrxtwin; + otp->t_badrxtwin = 0; } static int tcp_pcblist SYSCTL_HANDLER_ARGS { #pragma unused(oidp, arg1, arg2) - int error, i, n; + int error, i = 0, n; struct inpcb *inp, **inp_list; + struct tcpcb *tp; inp_gen_t gencnt; struct xinpgen xig; - int slot; /* * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ - lck_rw_lock_shared(tcbinfo.mtx); + lck_rw_lock_shared(tcbinfo.ipi_lock); if (req->oldptr == USER_ADDR_NULL) { n = tcbinfo.ipi_count; req->oldidx = 2 * (sizeof xig) + (n + n/8) * sizeof(struct xtcpcb); - lck_rw_done(tcbinfo.mtx); + lck_rw_done(tcbinfo.ipi_lock); return 0; } if (req->newptr != USER_ADDR_NULL) { - lck_rw_done(tcbinfo.mtx); + lck_rw_done(tcbinfo.ipi_lock); return EPERM; } @@ -1406,41 +1438,36 @@ tcp_pcblist SYSCTL_HANDLER_ARGS xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) { - lck_rw_done(tcbinfo.mtx); + lck_rw_done(tcbinfo.ipi_lock); return error; } /* * We are done if there is no pcb */ if (n == 0) { - lck_rw_done(tcbinfo.mtx); + lck_rw_done(tcbinfo.ipi_lock); return 0; } inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK); if (inp_list == 0) { - lck_rw_done(tcbinfo.mtx); + lck_rw_done(tcbinfo.ipi_lock); return ENOMEM; } - for (inp = LIST_FIRST(tcbinfo.listhead), i = 0; inp && i < n; - inp = LIST_NEXT(inp, inp_list)) { -#ifdef __APPLE__ - if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) -#else - if (inp->inp_gencnt <= gencnt && !prison_xinpcb(req->p, inp)) -#endif + LIST_FOREACH(inp, tcbinfo.ipi_listhead, inp_list) { + if (inp->inp_gencnt <= gencnt && + inp->inp_state != INPCB_STATE_DEAD) inp_list[i++] = inp; + if (i >= n) break; } - for (slot = 0; slot < N_TIME_WAIT_SLOTS; slot++) { - struct inpcb *inpnxt; - - for (inp = time_wait_slots[slot].lh_first; inp && i < n; inp = inpnxt) { - inpnxt = inp->inp_list.le_next; - if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) - inp_list[i++] = inp; - } + TAILQ_FOREACH(tp, &tcp_tw_tailq, t_twentry) { + inp = tp->t_inpcb; + if (inp->inp_gencnt <= gencnt && + inp->inp_state != INPCB_STATE_DEAD) + inp_list[i++] = inp; + if (i >= n) break; } n = i; @@ -1448,7 +1475,8 @@ tcp_pcblist SYSCTL_HANDLER_ARGS error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; - if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) { + if (inp->inp_gencnt <= gencnt && + inp->inp_state != INPCB_STATE_DEAD) { struct xtcpcb xt; caddr_t inp_ppcb; @@ -1458,7 +1486,8 @@ tcp_pcblist SYSCTL_HANDLER_ARGS inpcb_to_compat(inp, &xt.xt_inp); inp_ppcb = inp->inp_ppcb; if (inp_ppcb != NULL) { - tcpcb_to_otcpcb((struct tcpcb *)(void *)inp_ppcb, + tcpcb_to_otcpcb( + (struct tcpcb *)(void *)inp_ppcb, &xt.xt_tp); } else { bzero((char *) &xt.xt_tp, sizeof xt.xt_tp); @@ -1484,14 +1513,13 @@ tcp_pcblist SYSCTL_HANDLER_ARGS error = SYSCTL_OUT(req, &xig, sizeof xig); } FREE(inp_list, M_TEMP); - lck_rw_done(tcbinfo.mtx); + lck_rw_done(tcbinfo.ipi_lock); return error; } SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, tcp_pcblist, "S,xtcpcb", "List of active TCP connections"); -#if !CONFIG_EMBEDDED static void tcpcb_to_xtcpcb64(struct tcpcb *tp, struct xtcpcb64 *otp) @@ -1548,7 +1576,7 @@ tcpcb_to_xtcpcb64(struct tcpcb *tp, struct xtcpcb64 *otp) otp->snd_recover = tp->snd_recover; otp->snd_cwnd_prev = tp->snd_cwnd_prev; otp->snd_ssthresh_prev = tp->snd_ssthresh_prev; - otp->t_badrxtwin = tp->t_badrxtwin; + otp->t_badrxtwin = 0; } @@ -1556,27 +1584,27 @@ static int tcp_pcblist64 SYSCTL_HANDLER_ARGS { #pragma unused(oidp, arg1, arg2) - int error, i, n; + int error, i = 0, n; struct inpcb *inp, **inp_list; + struct tcpcb *tp; inp_gen_t gencnt; struct xinpgen xig; - int slot; /* * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ - lck_rw_lock_shared(tcbinfo.mtx); + lck_rw_lock_shared(tcbinfo.ipi_lock); if (req->oldptr == USER_ADDR_NULL) { n = tcbinfo.ipi_count; req->oldidx = 2 * (sizeof xig) + (n + n/8) * sizeof(struct xtcpcb64); - lck_rw_done(tcbinfo.mtx); + lck_rw_done(tcbinfo.ipi_lock); return 0; } if (req->newptr != USER_ADDR_NULL) { - lck_rw_done(tcbinfo.mtx); + lck_rw_done(tcbinfo.ipi_lock); return EPERM; } @@ -1593,41 +1621,36 @@ tcp_pcblist64 SYSCTL_HANDLER_ARGS xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) { - lck_rw_done(tcbinfo.mtx); + lck_rw_done(tcbinfo.ipi_lock); return error; } /* * We are done if there is no pcb */ if (n == 0) { - lck_rw_done(tcbinfo.mtx); + lck_rw_done(tcbinfo.ipi_lock); return 0; } inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK); if (inp_list == 0) { - lck_rw_done(tcbinfo.mtx); + lck_rw_done(tcbinfo.ipi_lock); return ENOMEM; } - for (inp = LIST_FIRST(tcbinfo.listhead), i = 0; inp && i < n; - inp = LIST_NEXT(inp, inp_list)) { -#ifdef __APPLE__ - if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) -#else - if (inp->inp_gencnt <= gencnt && !prison_xinpcb(req->p, inp)) -#endif + LIST_FOREACH(inp, tcbinfo.ipi_listhead, inp_list) { + if (inp->inp_gencnt <= gencnt && + inp->inp_state != INPCB_STATE_DEAD) inp_list[i++] = inp; + if (i >= n) break; } - for (slot = 0; slot < N_TIME_WAIT_SLOTS; slot++) { - struct inpcb *inpnxt; - - for (inp = time_wait_slots[slot].lh_first; inp && i < n; inp = inpnxt) { - inpnxt = inp->inp_list.le_next; - if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) - inp_list[i++] = inp; - } + TAILQ_FOREACH(tp, &tcp_tw_tailq, t_twentry) { + inp = tp->t_inpcb; + if (inp->inp_gencnt <= gencnt && + inp->inp_state != INPCB_STATE_DEAD) + inp_list[i++] = inp; + if (i >= n) break; } n = i; @@ -1665,14 +1688,13 @@ tcp_pcblist64 SYSCTL_HANDLER_ARGS error = SYSCTL_OUT(req, &xig, sizeof xig); } FREE(inp_list, M_TEMP); - lck_rw_done(tcbinfo.mtx); + lck_rw_done(tcbinfo.ipi_lock); return error; } SYSCTL_PROC(_net_inet_tcp, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, tcp_pcblist64, "S,xtcpcb64", "List of active TCP connections"); -#endif /* !CONFIG_EMBEDDED */ static int tcp_pcblist_n SYSCTL_HANDLER_ARGS @@ -1691,9 +1713,10 @@ SYSCTL_PROC(_net_inet_tcp, OID_AUTO, pcblist_n, CTLFLAG_RD | CTLFLAG_LOCKED, 0, __private_extern__ void -tcp_get_ports_used(unsigned int ifindex, uint8_t *bitfield) +tcp_get_ports_used(uint32_t ifindex, int protocol, uint32_t wildcardok, + bitstr_t *bitfield) { - inpcb_get_ports_used(ifindex, bitfield, &tcbinfo); + inpcb_get_ports_used(ifindex, protocol, wildcardok, bitfield, &tcbinfo); } __private_extern__ uint32_t @@ -1702,6 +1725,12 @@ tcp_count_opportunistic(unsigned int ifindex, u_int32_t flags) return inpcb_count_opportunistic(ifindex, &tcbinfo, flags); } +__private_extern__ uint32_t +tcp_find_anypcb_byaddr(struct ifaddr *ifa) +{ + return inpcb_find_anypcb_byaddr(ifa, &tcbinfo); +} + void tcp_ctlinput(cmd, sa, vip) int cmd; @@ -1945,7 +1974,7 @@ tcp_new_isn(tp) if (((tp->t_state == TCPS_LISTEN) || (tp->t_state == TCPS_TIME_WAIT)) && tcp_strict_rfc1948 == 0) #ifdef __APPLE__ - return random(); + return RandomULong(); #else return arc4random(); #endif @@ -2090,6 +2119,9 @@ tcp_mtudisc( (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP) mss -= TCPOLEN_TSTAMP_APPA; +#if MPTCP + mss -= mptcp_adj_mss(tp, TRUE); +#endif if (so->so_snd.sb_hiwat < mss) mss = so->so_snd.sb_hiwat; @@ -2129,8 +2161,12 @@ tcp_rtlookup(inp, input_ifscope) if ((rt = ro->ro_rt) != NULL) RT_LOCK(rt); - if (rt == NULL || !(rt->rt_flags & RTF_UP) || - rt->generation_id != route_generation) { + if (ROUTE_UNUSABLE(ro)) { + if (rt != NULL) { + RT_UNLOCK(rt); + rt = NULL; + } + ROUTE_RELEASE(ro); /* No route yet, so try to acquire one */ if (inp->inp_faddr.s_addr != INADDR_ANY) { unsigned int ifscope; @@ -2150,13 +2186,13 @@ tcp_rtlookup(inp, input_ifscope) ifscope = (inp->inp_flags & INP_BOUND_IF) ? inp->inp_boundifp->if_index : input_ifscope; - if (rt != NULL) - RT_UNLOCK(rt); rtalloc_scoped(ro, ifscope); if ((rt = ro->ro_rt) != NULL) RT_LOCK(rt); } } + if (rt != NULL) + RT_LOCK_ASSERT_HELD(rt); /* * Update MTU discovery determination. Don't do it if: @@ -2189,6 +2225,14 @@ tcp_rtlookup(inp, input_ifscope) tcp_set_tso(tp, rt->rt_ifp); } + /* Note if the peer is local */ + if (rt != NULL && + (rt->rt_gateway->sa_family == AF_LINK || + rt->rt_ifp->if_flags & IFF_LOOPBACK || + in_localaddr(inp->inp_faddr))) { + tp->t_flags |= TF_LOCAL; + } + /* * Caller needs to call RT_UNLOCK(rt). */ @@ -2211,8 +2255,12 @@ tcp_rtlookup6(inp, input_ifscope) if ((rt = ro6->ro_rt) != NULL) RT_LOCK(rt); - if (rt == NULL || !(rt->rt_flags & RTF_UP) || - rt->generation_id != route_generation) { + if (ROUTE_UNUSABLE(ro6)) { + if (rt != NULL) { + RT_UNLOCK(rt); + rt = NULL; + } + ROUTE_RELEASE(ro6); /* No route yet, so try to acquire one */ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { struct sockaddr_in6 *dst6; @@ -2233,13 +2281,14 @@ tcp_rtlookup6(inp, input_ifscope) ifscope = (inp->inp_flags & INP_BOUND_IF) ? inp->inp_boundifp->if_index : input_ifscope; - if (rt != NULL) - RT_UNLOCK(rt); rtalloc_scoped((struct route *)ro6, ifscope); if ((rt = ro6->ro_rt) != NULL) RT_LOCK(rt); } } + if (rt != NULL) + RT_LOCK_ASSERT_HELD(rt); + /* * Update path MTU Discovery determination * while looking up the route: @@ -2280,6 +2329,15 @@ tcp_rtlookup6(inp, input_ifscope) tcp_set_tso(tp, rt->rt_ifp); } + /* Note if the peer is local */ + if (rt != NULL && + (IN6_IS_ADDR_LOOPBACK(&inp->in6p_faddr) || + IN6_IS_ADDR_LINKLOCAL(&inp->in6p_faddr) || + rt->rt_gateway->sa_family == AF_LINK || + in6_localaddr(&inp->in6p_faddr))) { + tp->t_flags |= TF_LOCAL; + } + /* * Caller needs to call RT_UNLOCK(rt). */ @@ -2418,9 +2476,11 @@ tcp_unlock(struct socket *so, int refcount, void *lr) lr_saved = lr; #ifdef MORE_TCPLOCK_DEBUG - printf("tcp_unlock: so=%p sopcb=%p lock=%p ref=%x lr=%p\n", - so, so->so_pcb, &((struct inpcb *)so->so_pcb)->inpcb_mtx, - so->so_usecount, lr_saved); + printf("tcp_unlock: so=0x%llx sopcb=0x%llx lock=0x%llx ref=%x " + "lr=0x%llx\n", (uint64_t)VM_KERNEL_ADDRPERM(so), + (uint64_t)VM_KERNEL_ADDRPERM(so->so_pcb), + (uint64_t)VM_KERNEL_ADDRPERM(&(sotoinpcb(so)->inpcb_mtx)), + so->so_usecount, (uint64_t)VM_KERNEL_ADDRPERM(lr_saved)); #endif if (refcount) so->so_usecount--; @@ -2473,11 +2533,21 @@ tcp_getlock( static void tcp_sbrcv_grow_rwin(struct tcpcb *tp, struct sockbuf *sb) { u_int32_t rcvbufinc = tp->t_maxseg << tcp_autorcvbuf_inc_shift; + u_int32_t rcvbuf = sb->sb_hiwat; + struct socket *so = tp->t_inpcb->inp_socket; + + /* + * If message delivery is enabled, do not count + * unordered bytes in receive buffer towards hiwat + */ + if (so->so_flags & SOF_ENABLE_MSGS) + rcvbuf = rcvbuf - so->so_msg_state->msg_uno_bytes; + if (tcp_do_autorcvbuf == 1 && tcp_cansbgrow(sb) && (tp->t_flags & TF_SLOWLINK) == 0 && - (sb->sb_hiwat - sb->sb_cc) < rcvbufinc && - (sb->sb_hiwat < tcp_autorcvbuf_max)) { + (rcvbuf - sb->sb_cc) < rcvbufinc && + (rcvbuf < tcp_autorcvbuf_max)) { sbreserve(sb, (sb->sb_hiwat + rcvbufinc)); } } @@ -2486,11 +2556,23 @@ int32_t tcp_sbspace(struct tcpcb *tp) { struct sockbuf *sb = &tp->t_inpcb->inp_socket->so_rcv; + u_int32_t rcvbuf = sb->sb_hiwat; int32_t space; + struct socket *so = tp->t_inpcb->inp_socket; + + /* + * If message delivery is enabled, do not count + * unordered bytes in receive buffer towards hiwat mark. + * This value is used to return correct rwnd that does + * not reflect the extra unordered bytes added to the + * receive socket buffer. + */ + if (so->so_flags & SOF_ENABLE_MSGS) + rcvbuf = rcvbuf - so->so_msg_state->msg_uno_bytes; tcp_sbrcv_grow_rwin(tp, sb); - space = ((int32_t) imin((sb->sb_hiwat - sb->sb_cc), + space = ((int32_t) imin((rcvbuf - sb->sb_cc), (sb->sb_mbmax - sb->sb_mbcnt))); if (space < 0) space = 0; @@ -2514,16 +2596,23 @@ tcp_sbspace(struct tcpcb *tp) * Checks TCP Segment Offloading capability for a given connection and interface pair. */ void -tcp_set_tso(tp, ifp) - struct tcpcb *tp; - struct ifnet *ifp; +tcp_set_tso(struct tcpcb *tp, struct ifnet *ifp) { +#if MPTCP + /* + * We can't use TSO if this tcpcb belongs to an MPTCP session. + */ + if (tp->t_mpflags & TMPF_MPTCP_TRUE) { + tp->t_flags &= ~TF_TSO; + return; + } +#endif #if INET6 struct inpcb *inp = tp->t_inpcb; int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; if (isipv6) { - if (ifp && ifp->if_hwassist & IFNET_TSO_IPV6) { + if (ifp && (ifp->if_hwassist & IFNET_TSO_IPV6)) { tp->t_flags |= TF_TSO; if (ifp->if_tso_v6_mtu != 0) tp->tso_max_segment_size = ifp->if_tso_v6_mtu; @@ -2536,7 +2625,7 @@ tcp_set_tso(tp, ifp) #endif /* INET6 */ { - if (ifp && ifp->if_hwassist & IFNET_TSO_IPV4) { + if (ifp && (ifp->if_hwassist & IFNET_TSO_IPV4)) { tp->t_flags |= TF_TSO; if (ifp->if_tso_v4_mtu != 0) tp->tso_max_segment_size = ifp->if_tso_v4_mtu; @@ -2565,8 +2654,16 @@ calculate_tcp_clock() struct timeval now, hold_now; uint32_t incr = 0; - timevaladd(&tv, &interval); microuptime(&now); + + /* + * Update coarse-grained networking timestamp (in sec.); the idea + * is to update the counter returnable via net_uptime() when + * we read time. + */ + net_update_uptime_secs(now.tv_sec); + + timevaladd(&tv, &interval); if (timevalcmp(&now, &tv, >)) { /* time to update the clock */ lck_spin_lock(tcp_uptime_lock); diff --git a/bsd/netinet/tcp_timer.c b/bsd/netinet/tcp_timer.c index a8df38947..bc2942705 100644 --- a/bsd/netinet/tcp_timer.c +++ b/bsd/netinet/tcp_timer.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -74,8 +74,8 @@ #include #include #include - #include /* before tcp_seq.h, for tcp_random18() */ +#include #include #include @@ -102,6 +102,7 @@ #endif #include #include +#include extern void postevent(struct socket *, struct sockbuf *, int); @@ -122,6 +123,12 @@ extern void postevent(struct socket *, struct sockbuf *, panic("Bad link elm %p prev->next != elm", (elm)); \ } while(0) +/* tcp timer list */ +struct tcptimerlist tcp_timer_list; + +/* List of pcbs in timewait state, protected by tcbinfo's ipi_lock */ +struct tcptailq tcp_tw_tailq; + static int background_io_trigger = 5; SYSCTL_INT(_net_inet_tcp, OID_AUTO, background_io_trigger, CTLFLAG_RW | CTLFLAG_LOCKED, &background_io_trigger, 0, "Background IO Trigger Setting"); @@ -159,6 +166,10 @@ int tcp_keepintvl; SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", ""); +int tcp_keepcnt; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + &tcp_keepcnt, 0, "number of times to repeat keepalive"); + int tcp_msl; SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); @@ -196,6 +207,12 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, broken_peer_syn_rxmit_thres, CTLFLAG_RW | CT &tcp_broken_peer_syn_rxmit_thres, 0, "Number of retransmitted SYNs before " "TCP disables rfc1323 and rfc1644 during the rest of attempts"); +/* A higher threshold on local connections for disabling RFC 1323 options */ +static int tcp_broken_peer_syn_rxmit_thres_local = 10; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, broken_peer_syn_rexmit_thres_local, + CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_broken_peer_syn_rxmit_thres_local, 0, + "Number of retransmitted SYNs before disabling RFC 1323 options on local connections"); + static int tcp_timer_advanced = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_timer_advanced, CTLFLAG_RD | CTLFLAG_LOCKED, &tcp_timer_advanced, 0, "Number of times one of the timers was advanced"); @@ -213,12 +230,11 @@ int tcp_pmtud_black_hole_mss = 1200 ; SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_pmtud_black_hole_mss, 0, "Path MTU Discovery Black Hole Detection lowered MSS"); -static int tcp_keepcnt = TCPTV_KEEPCNT; -static int tcp_gc_done = FALSE; /* perfromed garbage collection of "used" sockets */ +/* performed garbage collection of "used" sockets */ +static boolean_t tcp_gc_done = FALSE; + /* max idle probes */ int tcp_maxpersistidle; - /* max idle time in persist */ -int tcp_maxidle; /* TCP delack timer is set to 100 ms. Since the processing of timer list in fast * mode will happen no faster than 100 ms, the delayed ack timer will fire some where @@ -226,11 +242,12 @@ int tcp_maxidle; */ int tcp_delack = TCP_RETRANSHZ / 10; -struct inpcbhead time_wait_slots[N_TIME_WAIT_SLOTS]; -int cur_tw_slot = 0; - -/* tcp timer list */ -struct tcptimerlist tcp_timer_list; +#if MPTCP +/* + * MP_JOIN retransmission of 3rd ACK will be every 500 msecs without backoff + */ +int tcp_jack_rxmt = TCP_RETRANSHZ / 2; +#endif /* MPTCP */ /* The frequency of running through the TCP timer list in * fast and slow mode can be configured. @@ -248,6 +265,8 @@ static void tcp_sched_timerlist(uint32_t offset); static uint32_t tcp_run_conn_timer(struct tcpcb *tp, uint16_t *next_index); static void tcp_sched_timers(struct tcpcb *tp); static inline void tcp_set_lotimer_index(struct tcpcb *); +static void tcp_rexmt_save_state(struct tcpcb *tp); +void tcp_remove_from_time_wait(struct inpcb *inp); /* Macro to compare two timers. If there is a reset of the sign bit, it is * safe to assume that the timer has wrapped around. By doing signed comparision, @@ -264,59 +283,76 @@ timer_diff(uint32_t t1, uint32_t toff1, uint32_t t2, uint32_t toff2) { #define TIMER_IS_ON_LIST(tp) ((tp)->t_flags & TF_TIMER_ONLIST) -void add_to_time_wait_locked(struct tcpcb *tp, uint32_t delay); +static void add_to_time_wait_locked(struct tcpcb *tp, uint32_t delay); void add_to_time_wait(struct tcpcb *tp, uint32_t delay) ; -static void tcp_garbage_collect(struct inpcb *, int); +static boolean_t tcp_garbage_collect(struct inpcb *, int); -void add_to_time_wait_locked(struct tcpcb *tp, uint32_t delay) +/* + * Add to tcp timewait list, delay is given in milliseconds. + */ +static void +add_to_time_wait_locked(struct tcpcb *tp, uint32_t delay) { - int tw_slot; - struct inpcbinfo *pcbinfo = &tcbinfo; + struct inpcbinfo *pcbinfo = &tcbinfo; + struct inpcb *inp = tp->t_inpcb; uint32_t timer; - /* pcb list should be locked when we get here */ - lck_rw_assert(pcbinfo->mtx, LCK_RW_ASSERT_EXCLUSIVE); - - LIST_REMOVE(tp->t_inpcb, inp_list); - - /* if (tp->t_timer[TCPT_2MSL] <= 0) - tp->t_timer[TCPT_2MSL] = 1; */ + /* pcb list should be locked when we get here */ + lck_rw_assert(pcbinfo->ipi_lock, LCK_RW_ASSERT_EXCLUSIVE); - /* - * Because we're pulling this pcb out of the main TCP pcb list, - * we need to recalculate the TCPT_2MSL timer value for tcp_slowtimo - * higher timer granularity. - */ - - timer = (delay / TCP_RETRANSHZ) * PR_SLOWHZ; - tp->t_rcvtime = (tp->t_rcvtime / TCP_RETRANSHZ) * PR_SLOWHZ; + /* We may get here multiple times, so check */ + if (!(inp->inp_flags2 & INP2_TIMEWAIT)) { + pcbinfo->ipi_twcount++; + inp->inp_flags2 |= INP2_TIMEWAIT; + + /* Remove from global inp list */ + LIST_REMOVE(inp, inp_list); + } else { + TAILQ_REMOVE(&tcp_tw_tailq, tp, t_twentry); + } - tp->t_rcvtime += timer & (N_TIME_WAIT_SLOTS - 1); + /* Compute the time at which this socket can be closed */ + timer = tcp_now + delay; + + /* We will use the TCPT_2MSL timer for tracking this delay */ - tw_slot = (timer & (N_TIME_WAIT_SLOTS - 1)) + cur_tw_slot; - if (tw_slot >= N_TIME_WAIT_SLOTS) - tw_slot -= N_TIME_WAIT_SLOTS; + if (TIMER_IS_ON_LIST(tp)) + tcp_remove_timer(tp); + tp->t_timer[TCPT_2MSL] = timer; - LIST_INSERT_HEAD(&time_wait_slots[tw_slot], tp->t_inpcb, inp_list); + TAILQ_INSERT_TAIL(&tcp_tw_tailq, tp, t_twentry); } -void add_to_time_wait(struct tcpcb *tp, uint32_t delay) +void +add_to_time_wait(struct tcpcb *tp, uint32_t delay) { - struct inpcbinfo *pcbinfo = &tcbinfo; - - if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) { + struct inpcbinfo *pcbinfo = &tcbinfo; + + if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) { tcp_unlock(tp->t_inpcb->inp_socket, 0, 0); - lck_rw_lock_exclusive(pcbinfo->mtx); + lck_rw_lock_exclusive(pcbinfo->ipi_lock); tcp_lock(tp->t_inpcb->inp_socket, 0, 0); } add_to_time_wait_locked(tp, delay); - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); + + inpcb_gc_sched(pcbinfo, INPCB_TIMER_LAZY); } -static void +/* If this is on time wait queue, remove it. */ +void +tcp_remove_from_time_wait(struct inpcb *inp) +{ + struct tcpcb *tp = intotcpcb(inp); + if (inp->inp_flags2 & INP2_TIMEWAIT) + TAILQ_REMOVE(&tcp_tw_tailq, tp, t_twentry); +} + +static boolean_t tcp_garbage_collect(struct inpcb *inp, int istimewait) { + boolean_t active = FALSE; struct socket *so; struct tcpcb *tp; @@ -330,13 +366,23 @@ tcp_garbage_collect(struct inpcb *inp, int istimewait) * overflow sockets that are eligible for garbage collection have * their usecounts set to 1. */ - if (so->so_usecount > 1 || !lck_mtx_try_lock_spin(&inp->inpcb_mtx)) - return; + if (!lck_mtx_try_lock_spin(&inp->inpcb_mtx)) + return (TRUE); /* Check again under the lock */ if (so->so_usecount > 1) { + if (inp->inp_wantcnt == WNT_STOPUSING) + active = TRUE; lck_mtx_unlock(&inp->inpcb_mtx); - return; + return (active); + } + + if (istimewait && + TSTMP_GEQ(tcp_now, tp->t_timer[TCPT_2MSL]) && + tp->t_state != TCPS_CLOSED) { + /* Become a regular mutex */ + lck_mtx_convert_spin(&inp->inpcb_mtx); + tcp_close(tp); } /* @@ -344,42 +390,46 @@ tcp_garbage_collect(struct inpcb *inp, int istimewait) * only if we are called to clean up the time wait slots, since * tcp_dropdropablreq() considers a socket to have been fully * dropped after add_to_time_wait() is finished. - * Also handle the case of connections getting closed by the peer while in the queue as - * seen with rdar://6422317 - * + * Also handle the case of connections getting closed by the peer + * while in the queue as seen with rdar://6422317 + * */ - if (so->so_usecount == 1 && + if (so->so_usecount == 1 && ((istimewait && (so->so_flags & SOF_OVERFLOW)) || - ((tp != NULL) && (tp->t_state == TCPS_CLOSED) && (so->so_head != NULL) - && ((so->so_state & (SS_INCOMP|SS_CANTSENDMORE|SS_CANTRCVMORE)) == - (SS_INCOMP|SS_CANTSENDMORE|SS_CANTRCVMORE))))) { + ((tp != NULL) && (tp->t_state == TCPS_CLOSED) && + (so->so_head != NULL) && + ((so->so_state & (SS_INCOMP|SS_CANTSENDMORE|SS_CANTRCVMORE)) == + (SS_INCOMP|SS_CANTSENDMORE|SS_CANTRCVMORE))))) { if (inp->inp_state != INPCB_STATE_DEAD) { /* Become a regular mutex */ lck_mtx_convert_spin(&inp->inpcb_mtx); #if INET6 - if (INP_CHECK_SOCKAF(so, AF_INET6)) + if (SOCK_CHECK_DOM(so, PF_INET6)) in6_pcbdetach(inp); else #endif /* INET6 */ - in_pcbdetach(inp); + in_pcbdetach(inp); } so->so_usecount--; + if (inp->inp_wantcnt == WNT_STOPUSING) + active = TRUE; lck_mtx_unlock(&inp->inpcb_mtx); - return; + return (active); } else if (inp->inp_wantcnt != WNT_STOPUSING) { lck_mtx_unlock(&inp->inpcb_mtx); - return; + return (FALSE); } /* - * We get here because the PCB is no longer searchable (WNT_STOPUSING); - * detach (if needed) and dispose if it is dead (usecount is 0). This - * covers all cases, including overflow sockets and those that are - * considered as "embryonic", i.e. created by sonewconn() in TCP input - * path, and have not yet been committed. For the former, we reduce - * the usecount to 0 as done by the code above. For the latter, the - * usecount would have reduced to 0 as part calling soabort() when the + * We get here because the PCB is no longer searchable + * (WNT_STOPUSING); detach (if needed) and dispose if it is dead + * (usecount is 0). This covers all cases, including overflow + * sockets and those that are considered as "embryonic", + * i.e. created by sonewconn() in TCP input path, and have + * not yet been committed. For the former, we reduce the usecount + * to 0 as done by the code above. For the latter, the usecount + * would have reduced to 0 as part calling soabort() when the * socket is dropped at the end of tcp_input(). */ if (so->so_usecount == 0) { @@ -387,8 +437,9 @@ tcp_garbage_collect(struct inpcb *inp, int istimewait) struct tcpcb *, tp, int32_t, TCPS_CLOSED); /* Become a regular mutex */ lck_mtx_convert_spin(&inp->inpcb_mtx); - - /* If this tp still happens to be on the timer list, + + /* + * If this tp still happens to be on the timer list, * take it out */ if (TIMER_IS_ON_LIST(tp)) { @@ -397,111 +448,100 @@ tcp_garbage_collect(struct inpcb *inp, int istimewait) if (inp->inp_state != INPCB_STATE_DEAD) { #if INET6 - if (INP_CHECK_SOCKAF(so, AF_INET6)) + if (SOCK_CHECK_DOM(so, PF_INET6)) in6_pcbdetach(inp); else #endif /* INET6 */ - in_pcbdetach(inp); + in_pcbdetach(inp); } in_pcbdispose(inp); - } else { - lck_mtx_unlock(&inp->inpcb_mtx); + return (FALSE); } + + lck_mtx_unlock(&inp->inpcb_mtx); + return (TRUE); } +/* + * TCP garbage collector callback (inpcb_timer_func_t). + * + * Returns the number of pcbs that will need to be gc-ed soon, + * returnining > 0 will keep timer active. + */ void -tcp_slowtimo(void) +tcp_gc(struct inpcbinfo *ipi) { struct inpcb *inp, *nxt; - struct tcpcb *tp; + struct tcpcb *tw_tp, *tw_ntp; #if TCPDEBUG int ostate; #endif - #if KDEBUG static int tws_checked = 0; #endif - struct inpcbinfo *pcbinfo = &tcbinfo; - - KERNEL_DEBUG(DBG_FNC_TCP_SLOW | DBG_FUNC_START, 0,0,0,0,0); - - tcp_maxidle = tcp_keepcnt * tcp_keepintvl; + KERNEL_DEBUG(DBG_FNC_TCP_SLOW | DBG_FUNC_START, 0, 0, 0, 0, 0); - /* Update tcp_now here as it may get used while processing the slow timer */ + /* + * Update tcp_now here as it may get used while + * processing the slow timer. + */ calculate_tcp_clock(); - /* Garbage collect socket/tcpcb: We need to acquire the list lock + /* + * Garbage collect socket/tcpcb: We need to acquire the list lock * exclusively to do this */ - if (lck_rw_try_lock_exclusive(pcbinfo->mtx) == FALSE) { - if (tcp_gc_done == TRUE) { /* don't sweat it this time. cleanup was done last time */ + if (lck_rw_try_lock_exclusive(ipi->ipi_lock) == FALSE) { + /* don't sweat it this time; cleanup was done last time */ + if (tcp_gc_done == TRUE) { tcp_gc_done = FALSE; - KERNEL_DEBUG(DBG_FNC_TCP_SLOW | DBG_FUNC_END, tws_checked, cur_tw_slot,0,0,0); - return; /* Upgrade failed and lost lock - give up this time. */ + KERNEL_DEBUG(DBG_FNC_TCP_SLOW | DBG_FUNC_END, + tws_checked, cur_tw_slot, 0, 0, 0); + /* Lock upgrade failed, give up this round */ + atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1); + return; } - lck_rw_lock_exclusive(pcbinfo->mtx); /* Upgrade failed, lost lock now take it again exclusive */ + /* Upgrade failed, lost lock now take it again exclusive */ + lck_rw_lock_exclusive(ipi->ipi_lock); } tcp_gc_done = TRUE; - /* - * Process the items in the current time-wait slot - */ -#if KDEBUG - tws_checked = 0; -#endif - KERNEL_DEBUG(DBG_FNC_TCP_SLOW | DBG_FUNC_NONE, tws_checked,0,0,0,0); - - LIST_FOREACH(inp, &time_wait_slots[cur_tw_slot], inp_list) { -#if KDEBUG - tws_checked++; -#endif - - if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) - continue; - - tcp_lock(inp->inp_socket, 1, 0); - - if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) - goto twunlock; - - tp = intotcpcb(inp); - if (tp == NULL) /* tp already closed, remove from list */ - goto twunlock; - - if (tp->t_timer[TCPT_2MSL] >= N_TIME_WAIT_SLOTS) { - tp->t_timer[TCPT_2MSL] -= N_TIME_WAIT_SLOTS; - tp->t_rcvtime += N_TIME_WAIT_SLOTS; - } - else - tp->t_timer[TCPT_2MSL] = 0; - - if (tp->t_timer[TCPT_2MSL] == 0) { + LIST_FOREACH_SAFE(inp, &tcb, inp_list, nxt) { + if (tcp_garbage_collect(inp, 0)) + atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1); + } - /* That pcb is ready for a close */ - tcp_free_sackholes(tp); - tp = tcp_close(tp); + /* Now cleanup the time wait ones */ + TAILQ_FOREACH_SAFE(tw_tp, &tcp_tw_tailq, t_twentry, tw_ntp) { + /* + * We check the timestamp here without holding the + * socket lock for better performance. If there are + * any pcbs in time-wait, the timer will get rescheduled. + * Hence some error in this check can be tolerated. + */ + if (TSTMP_GEQ(tcp_now, tw_tp->t_timer[TCPT_2MSL])) { + if (tcp_garbage_collect(tw_tp->t_inpcb, 1)) + atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1); + } else { + break; } -twunlock: - tcp_unlock(inp->inp_socket, 1, 0); } + /* take into account pcbs that are still in time_wait_slots */ + atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, ipi->ipi_twcount); - LIST_FOREACH_SAFE(inp, &tcb, inp_list, nxt) { - tcp_garbage_collect(inp, 0); - } + lck_rw_done(ipi->ipi_lock); - /* Now cleanup the time wait ones */ - LIST_FOREACH_SAFE(inp, &time_wait_slots[cur_tw_slot], inp_list, nxt) { - tcp_garbage_collect(inp, 1); - } + /* Clean up the socache while we are here */ + if (so_cache_timer()) + atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1); - if (++cur_tw_slot >= N_TIME_WAIT_SLOTS) - cur_tw_slot = 0; - - lck_rw_done(pcbinfo->mtx); - KERNEL_DEBUG(DBG_FNC_TCP_SLOW | DBG_FUNC_END, tws_checked, cur_tw_slot,0,0,0); + KERNEL_DEBUG(DBG_FNC_TCP_SLOW | DBG_FUNC_END, tws_checked, + cur_tw_slot, 0, 0, 0); + + return; } /* @@ -528,6 +568,41 @@ int tcp_backoff[TCP_MAXRXTSHIFT + 1] = static int tcp_totbackoff = 511; /* sum of tcp_backoff[] */ +static void tcp_rexmt_save_state(struct tcpcb *tp) +{ + u_int32_t fsize; + if (TSTMP_SUPPORTED(tp)) { + /* + * Since timestamps are supported on the connection, + * we can do recovery as described in rfc 4015. + */ + fsize = tp->snd_max - tp->snd_una; + tp->snd_ssthresh_prev = max(fsize, tp->snd_ssthresh); + tp->snd_recover_prev = tp->snd_recover; + } else { + /* + * Timestamp option is not supported on this connection. + * Record ssthresh and cwnd so they can + * be recovered if this turns out to be a "bad" retransmit. + * A retransmit is considered "bad" if an ACK for this + * segment is received within RTT/2 interval; the assumption + * here is that the ACK was already in flight. See + * "On Estimating End-to-End Network Path Properties" by + * Allman and Paxson for more details. + */ + tp->snd_cwnd_prev = tp->snd_cwnd; + tp->snd_ssthresh_prev = tp->snd_ssthresh; + tp->snd_recover_prev = tp->snd_recover; + if (IN_FASTRECOVERY(tp)) + tp->t_flags |= TF_WASFRECOVERY; + else + tp->t_flags &= ~TF_WASFRECOVERY; + } + tp->t_srtt_prev = (tp->t_srtt >> TCP_RTT_SHIFT) + 2; + tp->t_rttvar_prev = (tp->t_rttvar >> TCP_RTTVAR_SHIFT); + tp->t_flagsext &= ~(TF_RECOMPUTE_RTT); +} + /* * TCP timer processing. */ @@ -567,10 +642,10 @@ tcp_timers(tp, timer) tcp_free_sackholes(tp); if (tp->t_state != TCPS_TIME_WAIT && tp->t_state != TCPS_FIN_WAIT_2 && - ((idle_time > 0) && (idle_time < tcp_maxidle))) { - tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp, (u_int32_t)tcp_keepintvl); - } - else { + ((idle_time > 0) && (idle_time < TCP_CONN_MAXIDLE(tp)))) { + tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp, + (u_int32_t)TCP_CONN_KEEPINTVL(tp)); + } else { tp = tcp_close(tp); return(tp); } @@ -590,8 +665,8 @@ tcp_timers(tp, timer) * retransmitted the FIN 3 times without receiving an ack */ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT || - (tp->rxt_conndroptime > 0 && tp->rxt_start > 0 && - (tcp_now - tp->rxt_start) >= tp->rxt_conndroptime) || + (tp->t_rxt_conndroptime > 0 && tp->t_rxtstart > 0 && + (tcp_now - tp->t_rxtstart) >= tp->t_rxt_conndroptime) || ((tp->t_flagsext & TF_RXTFINDROP) != 0 && (tp->t_flags & TF_SENTFIN) != 0 && tp->t_rxtshift >= 4)) { @@ -611,34 +686,42 @@ tcp_timers(tp, timer) break; } - if (tp->t_rxtshift == 1) { - /* - * first retransmit; record ssthresh and cwnd so they can - * be recovered if this turns out to be a "bad" retransmit. - * A retransmit is considered "bad" if an ACK for this - * segment is received within RTT/2 interval; the assumption - * here is that the ACK was already in flight. See - * "On Estimating End-to-End Network Path Properties" by - * Allman and Paxson for more details. - */ - tp->snd_cwnd_prev = tp->snd_cwnd; - tp->snd_ssthresh_prev = tp->snd_ssthresh; - tp->snd_recover_prev = tp->snd_recover; - if (IN_FASTRECOVERY(tp)) - tp->t_flags |= TF_WASFRECOVERY; - else - tp->t_flags &= ~TF_WASFRECOVERY; - tp->t_badrxtwin = tcp_now + (tp->t_srtt >> (TCP_RTT_SHIFT)); + tcpstat.tcps_rexmttimeo++; - /* Set the time at which retransmission on this - * connection started - */ - tp->rxt_start = tcp_now; + if (tp->t_rxtshift == 1 && + tp->t_state == TCPS_ESTABLISHED) { + /* Set the time at which retransmission started. */ + tp->t_rxtstart = tcp_now; + + /* + * if this is the first retransmit timeout, save + * the state so that we can recover if the timeout + * is spurious. + */ + tcp_rexmt_save_state(tp); } - tcpstat.tcps_rexmttimeo++; +#if MPTCP + if ((tp->t_rxtshift == mptcp_fail_thresh) && + (tp->t_state == TCPS_ESTABLISHED) && + (tp->t_mpflags & TMPF_MPTCP_TRUE)) { + mptcp_act_on_txfail(so); - if (tp->t_state == TCPS_SYN_SENT) + } +#endif /* MPTCP */ + + if (tp->t_adaptive_wtimo > 0 && + tp->t_rxtshift > tp->t_adaptive_wtimo && + TCPS_HAVEESTABLISHED(tp->t_state)) { + /* Send an event to the application */ + soevent(so, + (SO_FILT_HINT_LOCKED| + SO_FILT_HINT_ADAPTIVE_WTIMO)); + } + + if (tp->t_state == TCPS_SYN_SENT) { rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift]; + tp->t_stat.synrxtshift = tp->t_rxtshift; + } else rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; TCPT_RANGESET(tp->t_rxtcur, rexmt, @@ -662,14 +745,17 @@ tcp_timers(tp, timer) * - Disable Path MTU Discovery (IP "DF" bit). * - Reduce MTU to lower value than what we negociated with peer. */ - - tp->t_flags &= ~TF_PMTUD; /* Disable Path MTU Discovery for now */ - tp->t_flags |= TF_BLACKHOLE; /* Record that we may have found a black hole */ + /* Disable Path MTU Discovery for now */ + tp->t_flags &= ~TF_PMTUD; + /* Record that we may have found a black hole */ + tp->t_flags |= TF_BLACKHOLE; optlen = tp->t_maxopd - tp->t_maxseg; - tp->t_pmtud_saved_maxopd = tp->t_maxopd; /* Keep track of previous MSS */ - if (tp->t_maxopd > tcp_pmtud_black_hole_mss) - tp->t_maxopd = tcp_pmtud_black_hole_mss; /* Reduce the MSS to intermediary value */ - else { + /* Keep track of previous MSS */ + tp->t_pmtud_saved_maxopd = tp->t_maxopd; + /* Reduce the MSS to intermediary value */ + if (tp->t_maxopd > tcp_pmtud_black_hole_mss) { + tp->t_maxopd = tcp_pmtud_black_hole_mss; + } else { tp->t_maxopd = /* use the default MSS */ #if INET6 isipv6 ? tcp_v6mssdflt : @@ -679,7 +765,8 @@ tcp_timers(tp, timer) tp->t_maxseg = tp->t_maxopd - optlen; /* - * Reset the slow-start flight size as it may depends on the new MSS + * Reset the slow-start flight size + * as it may depend on the new MSS */ if (CC_ALGO(tp)->cwnd_init != NULL) CC_ALGO(tp)->cwnd_init(tp); @@ -698,8 +785,9 @@ tcp_timers(tp, timer) tp->t_maxopd = tp->t_pmtud_saved_maxopd; tp->t_maxseg = tp->t_maxopd - optlen; /* - * Reset the slow-start flight size as it may depends on the new MSS - */ + * Reset the slow-start flight size as it + * may depend on the new MSS + */ if (CC_ALGO(tp)->cwnd_init != NULL) CC_ALGO(tp)->cwnd_init(tp); } @@ -713,9 +801,13 @@ tcp_timers(tp, timer) * broken terminal servers (most of which have hopefully been * retired) that have bad VJ header compression code which * trashes TCP segments containing unknown-to-them TCP options. + * Do this only on non-local connections. */ - if ((tp->t_state == TCPS_SYN_SENT) && - (tp->t_rxtshift == tcp_broken_peer_syn_rxmit_thres)) + if (tp->t_state == TCPS_SYN_SENT && + ((!(tp->t_flags & TF_LOCAL) && + tp->t_rxtshift == tcp_broken_peer_syn_rxmit_thres) || + ((tp->t_flags & TF_LOCAL) && + tp->t_rxtshift == tcp_broken_peer_syn_rxmit_thres_local))) tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_REQ_CC); /* @@ -751,11 +843,17 @@ tcp_timers(tp, timer) */ tp->t_rtttime = 0; - if (CC_ALGO(tp)->after_timeout != NULL) + EXIT_FASTRECOVERY(tp); + + /* RFC 5681 says: when a TCP sender detects segment loss + * using retransmit timer and the given segment has already + * been retransmitted by way of the retransmission timer at + * least once, the value of ssthresh is held constant + */ + if (tp->t_rxtshift == 1 && + CC_ALGO(tp)->after_timeout != NULL) CC_ALGO(tp)->after_timeout(tp); - tp->t_dupacks = 0; - EXIT_FASTRECOVERY(tp); /* CWR notifications are to be sent on new data right after * RTOs, Fast Retransmits and ECE notification receipts. @@ -812,12 +910,27 @@ fc_output: */ case TCPT_KEEP: tcpstat.tcps_keeptimeo++; +#if MPTCP + /* + * Regular TCP connections do not send keepalives after closing + * MPTCP must not also, after sending Data FINs. + */ + struct mptcb *mp_tp = tp->t_mptcb; + if ((tp->t_mpflags & TMPF_MPTCP_TRUE) && + (mp_tp == NULL)) { + goto dropit; + } else if (mp_tp != NULL) { + if ((mptcp_ok_to_keepalive(mp_tp) == 0)) + goto dropit; + } +#endif /* MPTCP */ if (tp->t_state < TCPS_ESTABLISHED) goto dropit; if ((always_keepalive || - tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) && + (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) || + (tp->t_flagsext & TF_DETECT_READSTALL)) && (tp->t_state <= TCPS_CLOSING || tp->t_state == TCPS_FIN_WAIT_2)) { - if (idle_time >= TCP_KEEPIDLE(tp) + (u_int32_t)tcp_maxidle) + if (idle_time >= TCP_CONN_KEEPIDLE(tp) + TCP_CONN_MAXIDLE(tp)) goto dropit; /* * Send a packet designed to force a response @@ -853,10 +966,34 @@ fc_output: tp->rcv_nxt, tp->snd_una - 1, 0, ifscope, nocell); (void) m_free(dtom(t_template)); + if (tp->t_flagsext & TF_DETECT_READSTALL) + tp->t_rtimo_probes++; } - tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, tcp_keepintvl); - } else - tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, TCP_KEEPIDLE(tp)); + tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, + TCP_CONN_KEEPINTVL(tp)); + } else { + tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, + TCP_CONN_KEEPIDLE(tp)); + } + if (tp->t_flagsext & TF_DETECT_READSTALL) { + /* + * The keep alive packets sent to detect a read + * stall did not get a response from the + * peer. Generate more keep-alives to confirm this. + * If the number of probes sent reaches the limit, + * generate an event. + */ + if (tp->t_rtimo_probes > tp->t_adaptive_rtimo) { + /* Generate an event */ + soevent(so, + (SO_FILT_HINT_LOCKED| + SO_FILT_HINT_ADAPTIVE_RTIMO)); + tcp_keepalive_reset(tp); + } else { + tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START( + tp, TCP_REXMTVAL(tp)); + } + } break; case TCPT_DELACK: if (tcp_delack_enabled && (tp->t_flags & TF_DELACK)) { @@ -864,7 +1001,7 @@ fc_output: tp->t_timer[TCPT_DELACK] = 0; tp->t_flags |= TF_ACKNOW; - /* If delayed ack timer fired while we are stretching acks, + /* If delayed ack timer fired while stretching acks * go back to acking every other packet */ if ((tp->t_flags & TF_STRETCHACK) != 0) @@ -882,6 +1019,33 @@ fc_output: } break; +#if MPTCP + case TCPT_JACK_RXMT: + if ((tp->t_state == TCPS_ESTABLISHED) && + (tp->t_mpflags & TMPF_PREESTABLISHED) && + (tp->t_mpflags & TMPF_JOINED_FLOW)) { + if (++tp->t_mprxtshift > TCP_MAXRXTSHIFT) { + tcpstat.tcps_timeoutdrop++; + postevent(so, 0, EV_TIMEOUT); + soevent(so, + (SO_FILT_HINT_LOCKED| + SO_FILT_HINT_TIMEOUT)); + tp = tcp_drop(tp, tp->t_softerror ? + tp->t_softerror : ETIMEDOUT); + break; + } + tcpstat.tcps_join_rxmts++; + tp->t_flags |= TF_ACKNOW; + + /* + * No backoff is implemented for simplicity for this + * corner case. + */ + (void) tcp_output(tp); + } + break; +#endif /* MPTCP */ + #if TCPDEBUG if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, diff --git a/bsd/netinet/tcp_timer.h b/bsd/netinet/tcp_timer.h index 213e87a39..55f428707 100644 --- a/bsd/netinet/tcp_timer.h +++ b/bsd/netinet/tcp_timer.h @@ -72,7 +72,7 @@ /* * Definitions of the TCP timers. */ -#define TCPT_NTIMERS 5 +#define TCPT_NTIMERS (TCPT_MAX + 1) /* Keep the external definition the same for binary compatibility */ #define TCPT_NTIMERS_EXT 4 @@ -82,7 +82,12 @@ #define TCPT_KEEP 2 /* keep alive */ #define TCPT_2MSL 3 /* 2*msl quiet time timer */ #define TCPT_DELACK 4 /* delayed ack timer */ +#if MPTCP +#define TCPT_JACK_RXMT 5 /* retransmit timer for join ack */ +#define TCPT_MAX 5 +#else /* MPTCP */ #define TCPT_MAX 4 +#endif /* !MPTCP */ #define TCPT_NONE (TCPT_MAX + 1) /* @@ -243,9 +248,18 @@ struct tcptimerlist { (tv) = (tvmax); \ } while(0) -#define TCP_KEEPIDLE(tp) \ - (tp->t_keepidle && (tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) ? \ - tp->t_keepidle : tcp_keepidle) +#define TCP_CONN_KEEPIDLE(tp) \ + ((tp)->t_keepidle && \ + ((tp)->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) ? \ + (tp)->t_keepidle : tcp_keepidle) +#define TCP_CONN_KEEPINIT(tp) \ + (((tp)->t_keepinit > 0) ? (tp)->t_keepinit : tcp_keepinit) +#define TCP_CONN_KEEPCNT(tp) \ + (((tp)->t_keepcnt > 0) ? (tp)->t_keepcnt : tcp_keepcnt) +#define TCP_CONN_KEEPINTVL(tp) \ + (((tp)->t_keepintvl > 0) ? (tp)->t_keepintvl : tcp_keepintvl) +#define TCP_CONN_MAXIDLE(tp) \ + (TCP_CONN_KEEPCNT(tp) * TCP_CONN_KEEPINTVL(tp)) /* Since we did not add rexmt slop for local connections, we should add * it to idle timeout. Otherwise local connections will reach idle state @@ -254,10 +268,12 @@ struct tcptimerlist { #define TCP_IDLETIMEOUT(tp) \ (((TCP_ADD_REXMTSLOP(tp)) ? 0 : tcp_rexmt_slop) + tp->t_rxtcur) +TAILQ_HEAD(tcptailq, tcpcb); + extern int tcp_keepinit; /* time to establish connection */ extern int tcp_keepidle; /* time before keepalive probes begin */ extern int tcp_keepintvl; /* time between keepalive probes */ -extern int tcp_maxidle; /* time to drop after starting probes */ +extern int tcp_keepcnt; /* number of keepalives */ extern int tcp_delack; /* delayed ack timer */ extern int tcp_maxpersistidle; extern int tcp_msl; diff --git a/bsd/netinet/tcp_usrreq.c b/bsd/netinet/tcp_usrreq.c index 19405c584..6484f92b6 100644 --- a/bsd/netinet/tcp_usrreq.c +++ b/bsd/netinet/tcp_usrreq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -70,9 +70,11 @@ #if INET6 #include #endif /* INET6 */ +#include #include #include #include +#include #include #include @@ -98,14 +100,22 @@ #include #include #include +#include #if TCPDEBUG #include #endif +#if MPTCP +#include +#endif /* MPTCP */ #if IPSEC #include #endif /*IPSEC*/ +#if FLOW_DIVERT +#include +#endif /* FLOW_DIVERT */ + void tcp_fill_info(struct tcpcb *, struct tcp_info *); errno_t tcp_fill_info_for_info_tuple(struct info_tuple *, struct tcp_info *); @@ -120,32 +130,14 @@ static int tcp_attach(struct socket *, struct proc *); static int tcp_connect(struct tcpcb *, struct sockaddr *, struct proc *); #if INET6 static int tcp6_connect(struct tcpcb *, struct sockaddr *, struct proc *); +static int tcp6_usr_connect(struct socket *, struct sockaddr *, + struct proc *); #endif /* INET6 */ static struct tcpcb * tcp_disconnect(struct tcpcb *); static struct tcpcb * tcp_usrclosed(struct tcpcb *); -static u_int32_t tcps_in_sw_cksum; -SYSCTL_UINT(_net_inet_tcp, OID_AUTO, in_sw_cksum, CTLFLAG_RD | CTLFLAG_LOCKED, - &tcps_in_sw_cksum, 0, - "Number of received packets checksummed in software"); - -static u_int64_t tcps_in_sw_cksum_bytes; -SYSCTL_QUAD(_net_inet_tcp, OID_AUTO, in_sw_cksum_bytes, CTLFLAG_RD | CTLFLAG_LOCKED, - &tcps_in_sw_cksum_bytes, - "Amount of received data checksummed in software"); - -static u_int32_t tcps_out_sw_cksum; -SYSCTL_UINT(_net_inet_tcp, OID_AUTO, out_sw_cksum, CTLFLAG_RD | CTLFLAG_LOCKED, - &tcps_out_sw_cksum, 0, - "Number of transmitted packets checksummed in software"); - -static u_int64_t tcps_out_sw_cksum_bytes; -SYSCTL_QUAD(_net_inet_tcp, OID_AUTO, out_sw_cksum_bytes, CTLFLAG_RD | CTLFLAG_LOCKED, - &tcps_out_sw_cksum_bytes, - "Amount of transmitted data checksummed in software"); - extern uint32_t tcp_autorcvbuf_max; extern void tcp_sbrcv_trim(struct tcpcb *tp, struct sockbuf *sb); @@ -161,7 +153,8 @@ extern void tcp_sbrcv_trim(struct tcpcb *tp, struct sockbuf *sb); #define TCPDEBUG2(req) #endif -SYSCTL_PROC(_net_inet_tcp, OID_AUTO, info, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, info, + CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLFLAG_KERN, 0 , 0, tcp_sysctl_info, "S", "TCP info per tuple"); /* @@ -221,8 +214,8 @@ tcp_usr_detach(struct socket *so) lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); tp = intotcpcb(inp); /* In case we got disconnected from the peer */ - if (tp == 0) - goto out; + if (tp == NULL) + goto out; TCPDEBUG1(); calculate_tcp_clock(); @@ -233,16 +226,17 @@ out: return error; } -#define COMMON_START() TCPDEBUG0; \ - do { \ - if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) { \ - return EINVAL; \ - } \ - tp = intotcpcb(inp); \ - TCPDEBUG1(); \ - calculate_tcp_clock(); \ - } while(0) - +#define COMMON_START() TCPDEBUG0; \ +do { \ + if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) \ + return (EINVAL); \ + if (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT) \ + return (EPROTOTYPE); \ + tp = intotcpcb(inp); \ + TCPDEBUG1(); \ + calculate_tcp_clock(); \ +} while (0) + #define COMMON_END(req) out: TCPDEBUG2(req); return error; goto out @@ -362,7 +356,7 @@ tcp_usr_listen(struct socket *so, struct proc *p) COMMON_START(); if (inp->inp_lport == 0) - error = in_pcbbind(inp, (struct sockaddr *)0, p); + error = in_pcbbind(inp, NULL, p); if (error == 0) tp->t_state = TCPS_LISTEN; COMMON_END(PRU_LISTEN); @@ -381,7 +375,7 @@ tcp6_usr_listen(struct socket *so, struct proc *p) inp->inp_vflag &= ~INP_IPV4; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) inp->inp_vflag |= INP_IPV4; - error = in6_pcbbind(inp, (struct sockaddr *)0, p); + error = in6_pcbbind(inp, NULL, p); } if (error == 0) tp->t_state = TCPS_LISTEN; @@ -405,9 +399,9 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) struct sockaddr_in *sinp; TCPDEBUG0; - if (inp == 0) + if (inp == NULL) { return EINVAL; - else if (inp->inp_state == INPCB_STATE_DEAD) { + } else if (inp->inp_state == INPCB_STATE_DEAD) { if (so->so_error) { error = so->so_error; so->so_error = 0; @@ -415,6 +409,23 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) } else return EINVAL; } +#if FLOW_DIVERT + else if (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT) { + uint32_t fd_ctl_unit = 0; + error = flow_divert_check_policy(so, p, FALSE, &fd_ctl_unit); + if (error == 0) { + if (fd_ctl_unit > 0) { + error = flow_divert_pcb_init(so, fd_ctl_unit); + if (error == 0) { + error = flow_divert_connect_out(so, nam, p); + } + } else { + error = ENETDOWN; + } + } + return error; + } +#endif /* FLOW_DIVERT */ tp = intotcpcb(inp); TCPDEBUG1(); @@ -434,13 +445,116 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) goto out; } - if ((error = tcp_connect(tp, nam, p)) != 0) goto out; error = tcp_output(tp); COMMON_END(PRU_CONNECT); } +static int +tcp_usr_connectx_common(struct socket *so, int af, + struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl, + struct proc *p, uint32_t ifscope, associd_t aid, connid_t *pcid, + uint32_t flags, void *arg, uint32_t arglen) +{ +#pragma unused(aid) +#if !MPTCP +#pragma unused(flags, arg, arglen) +#endif /* !MPTCP */ + struct sockaddr_entry *src_se = NULL, *dst_se = NULL; + struct inpcb *inp = sotoinpcb(so); + int error; + + if (inp == NULL) + return (EINVAL); + + VERIFY(dst_sl != NULL); + + /* select source (if specified) and destination addresses */ + error = in_selectaddrs(af, src_sl, &src_se, dst_sl, &dst_se); + if (error != 0) + return (error); + + VERIFY(*dst_sl != NULL && dst_se != NULL); + VERIFY(src_se == NULL || *src_sl != NULL); + VERIFY(dst_se->se_addr->sa_family == af); + VERIFY(src_se == NULL || src_se->se_addr->sa_family == af); + + /* + * We get here for 2 cases: + * + * a. From MPTCP, to connect a subflow. There is no need to + * bind the socket to the source address and/or interface, + * since everything has been taken care of by MPTCP. We + * simply check whether or not this is for the initial + * MPTCP connection attempt, or to join an existing one. + * + * b. From the socket layer, to connect a TCP. Perform the + * bind to source address and/or interface as necessary. + */ +#if MPTCP + if (flags & TCP_CONNREQF_MPTCP) { + struct mptsub_connreq *mpcr = arg; + + /* Check to make sure this came down from MPTCP */ + if (arg == NULL || arglen != sizeof (*mpcr)) + return (EOPNOTSUPP); + + switch (mpcr->mpcr_type) { + case MPTSUB_CONNREQ_MP_ENABLE: + break; + case MPTSUB_CONNREQ_MP_ADD: + break; + default: + return (EOPNOTSUPP); + } + } else +#endif /* MPTCP */ + { + /* bind socket to the specified interface, if requested */ + if (ifscope != IFSCOPE_NONE && + (error = inp_bindif(inp, ifscope, NULL)) != 0) + return (error); + + /* if source address and/or port is specified, bind to it */ + if (src_se != NULL) { + struct sockaddr *sa = src_se->se_addr; + error = sobindlock(so, sa, 0); /* already locked */ + if (error != 0) + return (error); + } + } + + switch (af) { + case AF_INET: + error = tcp_usr_connect(so, dst_se->se_addr, p); + break; +#if INET6 + case AF_INET6: + error = tcp6_usr_connect(so, dst_se->se_addr, p); + break; +#endif /* INET6 */ + default: + VERIFY(0); + /* NOTREACHED */ + } + + if (error == 0 && pcid != NULL) + *pcid = 1; /* there is only 1 connection for a TCP */ + + return (error); +} + +static int +tcp_usr_connectx(struct socket *so, struct sockaddr_list **src_sl, + struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, + associd_t aid, connid_t *pcid, uint32_t flags, void *arg, + uint32_t arglen) +{ + return (tcp_usr_connectx_common(so, AF_INET, src_sl, dst_sl, + p, ifscope, aid, pcid, flags, arg, arglen)); +} + #if INET6 static int tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) @@ -450,7 +564,38 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) struct tcpcb *tp; struct sockaddr_in6 *sin6p; - COMMON_START(); + TCPDEBUG0; + if (inp == NULL) { + return EINVAL; + } else if (inp->inp_state == INPCB_STATE_DEAD) { + if (so->so_error) { + error = so->so_error; + so->so_error = 0; + return error; + } else + return EINVAL; + } +#if FLOW_DIVERT + else if (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT) { + uint32_t fd_ctl_unit = 0; + error = flow_divert_check_policy(so, p, FALSE, &fd_ctl_unit); + if (error == 0) { + if (fd_ctl_unit > 0) { + error = flow_divert_pcb_init(so, fd_ctl_unit); + if (error == 0) { + error = flow_divert_connect_out(so, nam, p); + } + } else { + error = ENETDOWN; + } + } + return error; + } +#endif /* FLOW_DIVERT */ + tp = intotcpcb(inp); + TCPDEBUG1(); + + calculate_tcp_clock(); if (nam->sa_family != 0 && nam->sa_family != AF_INET6) { error = EAFNOSUPPORT; @@ -490,6 +635,16 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p) goto out; COMMON_END(PRU_CONNECT); } + +static int +tcp6_usr_connectx(struct socket *so, struct sockaddr_list **src_sl, + struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, + associd_t aid, connid_t *pcid, uint32_t flags, void *arg, + uint32_t arglen) +{ + return (tcp_usr_connectx_common(so, AF_INET6, src_sl, dst_sl, + p, ifscope, aid, pcid, flags, arg, arglen)); +} #endif /* INET6 */ /* @@ -509,16 +664,30 @@ tcp_usr_disconnect(struct socket *so) int error = 0; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp; - - lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_OWNED); + + lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, + LCK_MTX_ASSERT_OWNED); COMMON_START(); /* In case we got disconnected from the peer */ - if (tp == 0) - goto out; + if (tp == NULL) + goto out; tp = tcp_disconnect(tp); COMMON_END(PRU_DISCONNECT); } +/* + * User-protocol pru_disconnectx callback. + */ +static int +tcp_usr_disconnectx(struct socket *so, associd_t aid, connid_t cid) +{ +#pragma unused(cid) + if (aid != ASSOCID_ANY && aid != ASSOCID_ALL) + return (EINVAL); + + return (tcp_usr_disconnect(so)); +} + /* * Accept a connection. Essentially all the work is * done at higher levels; just return the address @@ -532,15 +701,17 @@ tcp_usr_accept(struct socket *so, struct sockaddr **nam) struct tcpcb *tp = NULL; TCPDEBUG0; - in_setpeeraddr(so, nam); - + in_getpeeraddr(so, nam); + if (so->so_state & SS_ISDISCONNECTED) { error = ECONNABORTED; goto out; } - if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) { + if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) return (EINVAL); - } + else if (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT) + return (EPROTOTYPE); + tp = intotcpcb(inp); TCPDEBUG1(); @@ -562,9 +733,11 @@ tcp6_usr_accept(struct socket *so, struct sockaddr **nam) error = ECONNABORTED; goto out; } - if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) { + if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) return (EINVAL); - } + else if (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT) + return (EPROTOTYPE); + tp = intotcpcb(inp); TCPDEBUG1(); @@ -599,12 +772,34 @@ tcp_usr_shutdown(struct socket *so) struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp; - COMMON_START(); + TCPDEBUG0; + if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) + return (EINVAL); + socantsendmore(so); - /* In case we got disconnected from the peer */ - if (tp == 0) - goto out; + + /* + * In case we got disconnected from the peer, or if this is + * a socket that is to be flow-diverted (but not yet). + */ + tp = intotcpcb(inp); + TCPDEBUG1(); + if (tp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { + if (tp != NULL) + error = EPROTOTYPE; + goto out; + } + + calculate_tcp_clock(); + tp = tcp_usrclosed(tp); +#if MPTCP + /* A reset has been sent but socket exists, do not send FIN */ + if ((so->so_flags & SOF_MP_SUBFLOW) && + (tp) && (tp->t_mpflags & TMPF_RESET)) { + goto out; + } +#endif if (tp) error = tcp_output(tp); COMMON_END(PRU_SHUTDOWN); @@ -622,8 +817,8 @@ tcp_usr_rcvd(struct socket *so, __unused int flags) COMMON_START(); /* In case we got disconnected from the peer */ - if (tp == 0) - goto out; + if (tp == NULL) + goto out; tcp_sbrcv_trim(tp, &so->so_rcv); tcp_output(tp); @@ -662,28 +857,35 @@ tcp_usr_rcvd(struct socket *so, __unused int flags) * tcp6_connect:??? [IPV6 only] */ static int -tcp_usr_send(struct socket *so, int flags, struct mbuf *m, - struct sockaddr *nam, struct mbuf *control, struct proc *p) +tcp_usr_send(struct socket *so, int flags, struct mbuf *m, + struct sockaddr *nam, struct mbuf *control, struct proc *p) { int error = 0; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp; + uint32_t msgpri = MSG_PRI_DEFAULT; #if INET6 int isipv6; #endif TCPDEBUG0; - if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) { + if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD || + (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { /* * OOPS! we lost a race, the TCP session got reset after * we checked SS_CANTSENDMORE, eg: while doing uiomove or a * network interrupt in the non-splnet() section of sosend(). */ - if (m) + if (m != NULL) m_freem(m); - if (control) + if (control != NULL) { m_freem(control); - error = ECONNRESET; /* XXX EPIPE? */ + control = NULL; + } + if (inp != NULL && (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + error = EPROTOTYPE; + else + error = ECONNRESET; /* XXX EPIPE? */ tp = NULL; TCPDEBUG1(); goto out; @@ -696,19 +898,51 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m, calculate_tcp_clock(); - if (control) { - /* TCP doesn't do control messages (rights, creds, etc) */ - if (control->m_len) { + if (control != NULL) { + if (so->so_flags & SOF_ENABLE_MSGS) { + /* Get the msg priority from control mbufs */ + error = tcp_get_msg_priority(control, &msgpri); + if (error) { + m_freem(control); + if (m != NULL) + m_freem(m); + control = NULL; + m = NULL; + goto out; + } m_freem(control); - if (m) + control = NULL; + } else if (control->m_len) { + /* + * if not unordered, TCP should not have + * control mbufs + */ + m_freem(control); + if (m != NULL) m_freem(m); + control = NULL; + m = NULL; error = EINVAL; goto out; } - m_freem(control); /* empty control, just free it */ } + + if (so->so_flags & SOF_ENABLE_MSGS) { + VERIFY(m->m_flags & M_PKTHDR); + m->m_pkthdr.msg_pri = msgpri; + } + + /* MPTCP sublow socket buffers must not be compressed */ + VERIFY(!(so->so_flags & SOF_MP_SUBFLOW) || + (so->so_snd.sb_flags & SB_NOCOMPRESS)); + if(!(flags & PRUS_OOB)) { - sbappendstream(&so->so_snd, m); + /* Call msg send if message delivery is enabled */ + if (so->so_flags & SOF_ENABLE_MSGS) + sbappendmsg_snd(&so->so_snd, m); + else + sbappendstream(&so->so_snd, m); + if (nam && tp->t_state < TCPS_SYN_SENT) { /* * Do implied connect if not yet connected, @@ -799,8 +1033,8 @@ tcp_usr_abort(struct socket *so) COMMON_START(); /* In case we got disconnected from the peer */ - if (tp == 0) - goto out; + if (tp == NULL) + goto out; tp = tcp_drop(tp, ECONNABORTED); so->so_usecount--; COMMON_END(PRU_ABORT); @@ -842,20 +1076,48 @@ tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags) /* xxx - should be const */ struct pr_usrreqs tcp_usrreqs = { - tcp_usr_abort, tcp_usr_accept, tcp_usr_attach, tcp_usr_bind, - tcp_usr_connect, pru_connect2_notsupp, in_control, tcp_usr_detach, - tcp_usr_disconnect, tcp_usr_listen, in_setpeeraddr, tcp_usr_rcvd, - tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown, - in_setsockaddr, sosend, soreceive, pru_sopoll_notsupp + .pru_abort = tcp_usr_abort, + .pru_accept = tcp_usr_accept, + .pru_attach = tcp_usr_attach, + .pru_bind = tcp_usr_bind, + .pru_connect = tcp_usr_connect, + .pru_connectx = tcp_usr_connectx, + .pru_control = in_control, + .pru_detach = tcp_usr_detach, + .pru_disconnect = tcp_usr_disconnect, + .pru_disconnectx = tcp_usr_disconnectx, + .pru_listen = tcp_usr_listen, + .pru_peeraddr = in_getpeeraddr, + .pru_rcvd = tcp_usr_rcvd, + .pru_rcvoob = tcp_usr_rcvoob, + .pru_send = tcp_usr_send, + .pru_shutdown = tcp_usr_shutdown, + .pru_sockaddr = in_getsockaddr, + .pru_sosend = sosend, + .pru_soreceive = soreceive, }; #if INET6 struct pr_usrreqs tcp6_usrreqs = { - tcp_usr_abort, tcp6_usr_accept, tcp_usr_attach, tcp6_usr_bind, - tcp6_usr_connect, pru_connect2_notsupp, in6_control, tcp_usr_detach, - tcp_usr_disconnect, tcp6_usr_listen, in6_mapped_peeraddr, tcp_usr_rcvd, - tcp_usr_rcvoob, tcp_usr_send, pru_sense_null, tcp_usr_shutdown, - in6_mapped_sockaddr, sosend, soreceive, pru_sopoll_notsupp + .pru_abort = tcp_usr_abort, + .pru_accept = tcp6_usr_accept, + .pru_attach = tcp_usr_attach, + .pru_bind = tcp6_usr_bind, + .pru_connect = tcp6_usr_connect, + .pru_connectx = tcp6_usr_connectx, + .pru_control = in6_control, + .pru_detach = tcp_usr_detach, + .pru_disconnect = tcp_usr_disconnect, + .pru_disconnectx = tcp_usr_disconnectx, + .pru_listen = tcp6_usr_listen, + .pru_peeraddr = in6_mapped_peeraddr, + .pru_rcvd = tcp_usr_rcvd, + .pru_rcvoob = tcp_usr_rcvoob, + .pru_send = tcp_usr_send, + .pru_shutdown = tcp_usr_shutdown, + .pru_sockaddr = in6_mapped_sockaddr, + .pru_sosend = sosend, + .pru_soreceive = soreceive, }; #endif /* INET6 */ @@ -893,16 +1155,16 @@ tcp_connect(tp, nam, p) struct socket *so = inp->inp_socket; struct tcpcb *otp; struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam; - struct sockaddr_in ifaddr; + struct in_addr laddr; struct rmxp_tao *taop; struct rmxp_tao tao_noncached; - int error; + int error = 0; struct ifnet *outif = NULL; if (inp->inp_lport == 0) { - error = in_pcbbind(inp, (struct sockaddr *)0, p); + error = in_pcbbind(inp, NULL, p); if (error) - return error; + goto done; } /* @@ -910,15 +1172,14 @@ tcp_connect(tp, nam, p) * earlier incarnation of this same connection still in * TIME_WAIT state, creating an ADDRINUSE error. */ - error = in_pcbladdr(inp, nam, &ifaddr, &outif); + error = in_pcbladdr(inp, nam, &laddr, IFSCOPE_NONE, &outif); if (error) - return error; + goto done; tcp_unlock(inp->inp_socket, 0, 0); oinp = in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port, - inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr - : ifaddr.sin_addr, + inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr : laddr, inp->inp_lport, 0, NULL); tcp_lock(inp->inp_socket, 0, 0); @@ -932,38 +1193,44 @@ tcp_connect(tp, nam, p) } if (oinp != inp && (otp = intotcpcb(oinp)) != NULL && - otp->t_state == TCPS_TIME_WAIT && + otp->t_state == TCPS_TIME_WAIT && ((int)(tcp_now - otp->t_starttime)) < tcp_msl && - (otp->t_flags & TF_RCVD_CC)) + (otp->t_flags & TF_RCVD_CC)) { otp = tcp_close(otp); - else { - printf("tcp_connect: inp=%p err=EADDRINUSE\n", inp); + } else { + printf("tcp_connect: inp=0x%llx err=EADDRINUSE\n", + (uint64_t)VM_KERNEL_ADDRPERM(inp)); if (oinp != inp) tcp_unlock(oinp->inp_socket, 1, 0); - return EADDRINUSE; + error = EADDRINUSE; + goto done; } if (oinp != inp) tcp_unlock(oinp->inp_socket, 1, 0); } skip_oinp: - if ((inp->inp_laddr.s_addr == INADDR_ANY ? ifaddr.sin_addr.s_addr : - inp->inp_laddr.s_addr) == sin->sin_addr.s_addr && - inp->inp_lport == sin->sin_port) - return EINVAL; - if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) { + if ((inp->inp_laddr.s_addr == INADDR_ANY ? laddr.s_addr : + inp->inp_laddr.s_addr) == sin->sin_addr.s_addr && + inp->inp_lport == sin->sin_port) { + error = EINVAL; + goto done; + } + if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { /*lock inversion issue, mostly with udp multicast packets */ socket_unlock(inp->inp_socket, 0); - lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx); + lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); socket_lock(inp->inp_socket, 0); } if (inp->inp_laddr.s_addr == INADDR_ANY) { - inp->inp_laddr = ifaddr.sin_addr; + inp->inp_laddr = laddr; + /* no reference needed */ inp->inp_last_outifp = outif; + inp->inp_flags |= INP_INADDR_ANY; } inp->inp_faddr = sin->sin_addr; inp->inp_fport = sin->sin_port; in_pcbrehash(inp); - lck_rw_done(inp->inp_pcbinfo->mtx); + lck_rw_done(inp->inp_pcbinfo->ipi_lock); if (inp->inp_flowhash == 0) inp->inp_flowhash = inp_calc_flowhash(inp); @@ -973,8 +1240,7 @@ skip_oinp: soisconnecting(so); tcpstat.tcps_connattempt++; tp->t_state = TCPS_SYN_SENT; - tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, - tp->t_keepinit ? tp->t_keepinit : tcp_keepinit); + tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, TCP_CONN_KEEPINIT(tp)); tp->iss = tcp_new_isn(tp); tcp_sendseqinit(tp); if (nstat_collect) @@ -998,7 +1264,11 @@ skip_oinp: tp->t_flags |= TF_SENDCCNEW; } - return 0; +done: + if (outif != NULL) + ifnet_release(outif); + + return (error); } #if INET6 @@ -1019,7 +1289,7 @@ tcp6_connect(tp, nam, p) struct ifnet *outif = NULL; if (inp->inp_lport == 0) { - error = in6_pcbbind(inp, (struct sockaddr *)0, p); + error = in6_pcbbind(inp, NULL, p); if (error) goto done; } @@ -1055,25 +1325,32 @@ tcp6_connect(tp, nam, p) goto done; } } - if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) { + if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { /*lock inversion issue, mostly with udp multicast packets */ socket_unlock(inp->inp_socket, 0); - lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx); + lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); socket_lock(inp->inp_socket, 0); } if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { inp->in6p_laddr = addr6; inp->in6p_last_outifp = outif; /* no reference needed */ + inp->in6p_flags |= INP_IN6ADDR_ANY; } inp->in6p_faddr = sin6->sin6_addr; inp->inp_fport = sin6->sin6_port; if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != 0) - inp->in6p_flowinfo = sin6->sin6_flowinfo; + inp->inp_flow = sin6->sin6_flowinfo; in_pcbrehash(inp); - lck_rw_done(inp->inp_pcbinfo->mtx); + lck_rw_done(inp->inp_pcbinfo->ipi_lock); if (inp->inp_flowhash == 0) inp->inp_flowhash = inp_calc_flowhash(inp); + /* update flowinfo - RFC 6437 */ + if (inp->inp_flow == 0 && inp->in6p_flags & IN6P_AUTOFLOWLABEL) { + inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; + inp->inp_flow |= + (htonl(inp->inp_flowhash) & IPV6_FLOWLABEL_MASK); + } tcp_set_max_rwinscale(tp, so); @@ -1081,7 +1358,7 @@ tcp6_connect(tp, nam, p) tcpstat.tcps_connattempt++; tp->t_state = TCPS_SYN_SENT; tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, - tp->t_keepinit ? tp->t_keepinit : tcp_keepinit); + TCP_CONN_KEEPINIT(tp)); tp->iss = tcp_new_isn(tp); tcp_sendseqinit(tp); if (nstat_collect) @@ -1140,7 +1417,7 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti) if (tp->snd_max != tp->snd_nxt) ti->tcpi_flags |= TCPI_FLAG_LOSSRECOVERY; else - ti->tcpi_flags &= ~TCPI_FLAG_LOSSRECOVERY; + ti->tcpi_flags &= ~TCPI_FLAG_LOSSRECOVERY; ti->tcpi_rto = tp->t_timer[TCPT_REXMT] ? tp->t_rxtcur : 0; ti->tcpi_snd_mss = tp->t_maxseg; @@ -1149,6 +1426,7 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti) ti->tcpi_rttcur = tp->t_rttcur; ti->tcpi_srtt = tp->t_srtt >> TCP_RTT_SHIFT; ti->tcpi_rttvar = tp->t_rttvar >> TCP_RTTVAR_SHIFT; + ti->tcpi_rttbest = tp->t_rttbest >> TCP_RTT_SHIFT; ti->tcpi_snd_ssthresh = tp->snd_ssthresh; ti->tcpi_snd_cwnd = tp->snd_cwnd; @@ -1170,13 +1448,29 @@ tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti) tp->t_inpcb->inp_last_outifp->if_index; //atomic_get_64(ti->tcpi_txbytes, &inp->inp_stat->txbytes); + ti->tcpi_txpackets = inp->inp_stat->txpackets; ti->tcpi_txbytes = inp->inp_stat->txbytes; ti->tcpi_txretransmitbytes = tp->t_stat.txretransmitbytes; ti->tcpi_txunacked = tp->snd_max - tp->snd_una; //atomic_get_64(ti->tcpi_rxbytes, &inp->inp_stat->rxbytes); + ti->tcpi_rxpackets = inp->inp_stat->rxpackets; ti->tcpi_rxbytes = inp->inp_stat->rxbytes; ti->tcpi_rxduplicatebytes = tp->t_stat.rxduplicatebytes; + ti->tcpi_rxoutoforderbytes = tp->t_stat.rxoutoforderbytes; + + if (tp->t_state > TCPS_LISTEN) { + ti->tcpi_synrexmits = tp->t_stat.synrxtshift; + } + ti->tcpi_cell_rxpackets = inp->inp_cstat->rxpackets; + ti->tcpi_cell_rxbytes = inp->inp_cstat->rxbytes; + ti->tcpi_cell_txpackets = inp->inp_cstat->txpackets; + ti->tcpi_cell_txbytes = inp->inp_cstat->txbytes; + + ti->tcpi_wifi_rxpackets = inp->inp_wstat->rxpackets; + ti->tcpi_wifi_rxbytes = inp->inp_wstat->rxbytes; + ti->tcpi_wifi_txpackets = inp->inp_wstat->txpackets; + ti->tcpi_wifi_txbytes = inp->inp_wstat->txbytes; } } @@ -1196,32 +1490,35 @@ tcp_fill_info_for_info_tuple(struct info_tuple *itpl, struct tcp_info *ti) if (itpl->itpl_local_sa.sa_family == AF_INET && itpl->itpl_remote_sa.sa_family == AF_INET) { inp = in_pcblookup_hash(pcbinfo, - itpl->itpl_remote_sin.sin_addr, - itpl->itpl_remote_sin.sin_port, - itpl->itpl_local_sin.sin_addr, - itpl->itpl_local_sin.sin_port, - 0, NULL); + itpl->itpl_remote_sin.sin_addr, + itpl->itpl_remote_sin.sin_port, + itpl->itpl_local_sin.sin_addr, + itpl->itpl_local_sin.sin_port, + 0, NULL); } else if (itpl->itpl_local_sa.sa_family == AF_INET6 && itpl->itpl_remote_sa.sa_family == AF_INET6) { struct in6_addr ina6_local; struct in6_addr ina6_remote; ina6_local = itpl->itpl_local_sin6.sin6_addr; - if (IN6_IS_SCOPE_LINKLOCAL(&ina6_local) && itpl->itpl_local_sin6.sin6_scope_id) + if (IN6_IS_SCOPE_LINKLOCAL(&ina6_local) && + itpl->itpl_local_sin6.sin6_scope_id) ina6_local.s6_addr16[1] = htons(itpl->itpl_local_sin6.sin6_scope_id); ina6_remote = itpl->itpl_remote_sin6.sin6_addr; - if (IN6_IS_SCOPE_LINKLOCAL(&ina6_remote) && itpl->itpl_remote_sin6.sin6_scope_id) + if (IN6_IS_SCOPE_LINKLOCAL(&ina6_remote) && + itpl->itpl_remote_sin6.sin6_scope_id) ina6_remote.s6_addr16[1] = htons(itpl->itpl_remote_sin6.sin6_scope_id); inp = in6_pcblookup_hash(pcbinfo, - &ina6_remote, - itpl->itpl_remote_sin6.sin6_port, - &ina6_local, - itpl->itpl_local_sin6.sin6_port, - 0, NULL); - } else + &ina6_remote, + itpl->itpl_remote_sin6.sin6_port, + &ina6_local, + itpl->itpl_local_sin6.sin6_port, + 0, NULL); + } else { return EINVAL; + } if (inp == NULL || (so = inp->inp_socket) == NULL) return ENOENT; @@ -1245,7 +1542,42 @@ tcp_sysctl_info(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int error; struct tcp_info ti; struct info_tuple itpl; - + proc_t caller = PROC_NULL; + proc_t caller_parent = PROC_NULL; + char command_name[MAXCOMLEN + 1] = ""; + char parent_name[MAXCOMLEN + 1] = ""; + + if ((caller = proc_self()) != PROC_NULL) { + /* get process name */ + strlcpy(command_name, caller->p_comm, sizeof(command_name)); + + /* get parent process name if possible */ + if ((caller_parent = proc_find(caller->p_ppid)) != PROC_NULL) { + strlcpy(parent_name, caller_parent->p_comm, + sizeof(parent_name)); + proc_rele(caller_parent); + } + + if ((escape_str(command_name, strlen(command_name), + sizeof(command_name)) == 0) && + (escape_str(parent_name, strlen(parent_name), + sizeof(parent_name)) == 0)) { + kern_asl_msg(LOG_DEBUG, "messagetracer", + 5, + "com.apple.message.domain", + "com.apple.kernel.tcpstat", /* 1 */ + "com.apple.message.signature", + "tcpinfo", /* 2 */ + "com.apple.message.signature2", command_name, /* 3 */ + "com.apple.message.signature3", parent_name, /* 4 */ + "com.apple.message.summarize", "YES", /* 5 */ + NULL); + } + } + + if (caller != PROC_NULL) + proc_rele(caller); + if (req->newptr == USER_ADDR_NULL) { return EINVAL; } @@ -1303,6 +1635,13 @@ tcp_lookup_peer_pid_locked(struct socket *so, pid_t *out_pid) return error; } +void +tcp_getconninfo(struct socket *so, struct conninfo_tcp *tcp_ci) +{ + (void) tcp_lookup_peer_pid_locked(so, &tcp_ci->tcpci_peer_pid); + tcp_fill_info(sototcpcb(so), &tcp_ci->tcpci_tcp_info); +} + /* * The new sockopt interface makes it possible for us to block in the * copyin/out step (if we take a page fault). Taking a page fault at @@ -1324,11 +1663,12 @@ tcp_ctloutput(so, sopt) if (inp == NULL) { return (ECONNRESET); } - /* Allow at this level */ + /* Allow at this level */ if (sopt->sopt_level != IPPROTO_TCP && - !(sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_FLUSH)) { + !(sopt->sopt_level == SOL_SOCKET && (sopt->sopt_name == SO_FLUSH || + sopt->sopt_name == SO_TRAFFIC_MGT_BACKGROUND))) { #if INET6 - if (INP_CHECK_SOCKAF(so, AF_INET6)) + if (SOCK_CHECK_DOM(so, PF_INET6)) error = ip6_ctloutput(so, sopt); else #endif /* INET6 */ @@ -1336,9 +1676,9 @@ tcp_ctloutput(so, sopt) return (error); } tp = intotcpcb(inp); - if (tp == NULL) { - return (ECONNRESET); - } + if (tp == NULL) { + return (ECONNRESET); + } calculate_tcp_clock(); @@ -1454,12 +1794,13 @@ tcp_ctloutput(so, sopt) sizeof optval); if (error) break; - if (optval < 0) + if (optval < 0 || optval > UINT32_MAX/TCP_RETRANSHZ) { error = EINVAL; - else { + } else { tp->t_keepidle = optval * TCP_RETRANSHZ; + /* reset the timer to new value */ tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, - TCP_KEEPIDLE(tp)); /* reset the timer to new value */ + TCP_CONN_KEEPIDLE(tp)); tcp_check_timer_state(tp); } break; @@ -1469,10 +1810,53 @@ tcp_ctloutput(so, sopt) sizeof optval); if (error) break; - if (optval < 0) + if (optval < 0 || optval > UINT32_MAX/TCP_RETRANSHZ) { error = EINVAL; - else + } else { tp->t_keepinit = optval * TCP_RETRANSHZ; + if (tp->t_state == TCPS_SYN_RECEIVED || + tp->t_state == TCPS_SYN_SENT) { + tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, + TCP_CONN_KEEPINIT(tp)); + tcp_check_timer_state(tp); + } + } + break; + + case TCP_KEEPINTVL: + error = sooptcopyin(sopt, &optval, sizeof(optval), + sizeof(optval)); + if (error) + break; + if (optval < 0 || optval > UINT32_MAX/TCP_RETRANSHZ) { + error = EINVAL; + } else { + tp->t_keepintvl = optval * TCP_RETRANSHZ; + if (tp->t_state == TCPS_FIN_WAIT_2 && + TCP_CONN_MAXIDLE(tp) > 0) { + tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp, + TCP_CONN_MAXIDLE(tp)); + tcp_check_timer_state(tp); + } + } + break; + + case TCP_KEEPCNT: + error = sooptcopyin(sopt, &optval, sizeof(optval), + sizeof(optval)); + if (error) + break; + if (optval < 0 || optval > INT32_MAX) { + error = EINVAL; + } else { + tp->t_keepcnt = optval; + if (tp->t_state == TCPS_FIN_WAIT_2 && + TCP_CONN_MAXIDLE(tp) > 0) { + tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp, + TCP_CONN_MAXIDLE(tp)); + tcp_check_timer_state(tp); + } + } break; case PERSIST_TIMEOUT: @@ -1493,7 +1877,7 @@ tcp_ctloutput(so, sopt) if (optval < 0) error = EINVAL; else - tp->rxt_conndroptime = optval * TCP_RETRANSHZ; + tp->t_rxt_conndroptime = optval * TCP_RETRANSHZ; break; case TCP_NOTSENT_LOWAT: error = sooptcopyin(sopt, &optval, sizeof(optval), @@ -1513,7 +1897,90 @@ tcp_ctloutput(so, sopt) } } break; + case TCP_ADAPTIVE_READ_TIMEOUT: + error = sooptcopyin(sopt, &optval, sizeof (optval), + sizeof(optval)); + if (error) + break; + if (optval < 0 || + optval > TCP_ADAPTIVE_TIMEOUT_MAX) { + error = EINVAL; + break; + } else if (optval == 0) { + tp->t_adaptive_rtimo = 0; + tcp_keepalive_reset(tp); + } else { + tp->t_adaptive_rtimo = optval; + } + break; + case TCP_ADAPTIVE_WRITE_TIMEOUT: + error = sooptcopyin(sopt, &optval, sizeof (optval), + sizeof (optval)); + if (error) + break; + if (optval < 0 || + optval > TCP_ADAPTIVE_TIMEOUT_MAX) { + error = EINVAL; + break; + } else { + tp->t_adaptive_wtimo = optval; + } + break; + case TCP_ENABLE_MSGS: + error = sooptcopyin(sopt, &optval, sizeof(optval), + sizeof(optval)); + if (error) + break; + if (optval < 0 || optval > 1) { + error = EINVAL; + } else if (optval == 1) { + /* + * Check if messages option is already + * enabled, if so return. + */ + if (so->so_flags & SOF_ENABLE_MSGS) { + VERIFY(so->so_msg_state != NULL); + break; + } + /* + * allocate memory for storing message + * related state + */ + VERIFY(so->so_msg_state == NULL); + MALLOC(so->so_msg_state, + struct msg_state *, + sizeof(struct msg_state), + M_TEMP, M_WAITOK | M_ZERO); + if (so->so_msg_state == NULL) { + error = ENOMEM; + break; + } + + /* Enable message delivery */ + so->so_flags |= SOF_ENABLE_MSGS; + } else { + /* + * Can't disable message delivery on socket + * because of restrictions imposed by + * encoding/decoding + */ + error = EINVAL; + } + break; + case TCP_SENDMOREACKS: + error = sooptcopyin(sopt, &optval, sizeof(optval), + sizeof(optval)); + if (error) + break; + if (optval < 0 || optval > 1) { + error = EINVAL; + } else if (optval == 0) { + tp->t_flagsext &= ~(TF_NOSTRETCHACK); + } else { + tp->t_flagsext |= TF_NOSTRETCHACK; + } + break; case SO_FLUSH: if ((error = sooptcopyin(sopt, &optval, sizeof (optval), sizeof (optval))) != 0) @@ -1522,6 +1989,20 @@ tcp_ctloutput(so, sopt) error = inp_flush(inp, optval); break; + case SO_TRAFFIC_MGT_BACKGROUND: + if ((error = sooptcopyin(sopt, &optval, sizeof (optval), + sizeof (optval))) != 0) + break; + + if (optval) { + socket_set_traffic_mgt_flags_locked(so, + TRAFFIC_MGT_SO_BACKGROUND); + } else { + socket_clear_traffic_mgt_flags_locked(so, + TRAFFIC_MGT_SO_BACKGROUND); + } + break; + default: error = ENOPROTOOPT; break; @@ -1539,6 +2020,12 @@ tcp_ctloutput(so, sopt) case TCP_KEEPALIVE: optval = tp->t_keepidle / TCP_RETRANSHZ; break; + case TCP_KEEPINTVL: + optval = tp->t_keepintvl / TCP_RETRANSHZ; + break; + case TCP_KEEPCNT: + optval = tp->t_keepcnt; + break; case TCP_NOOPT: optval = tp->t_flags & TF_NOOPT; break; @@ -1552,7 +2039,7 @@ tcp_ctloutput(so, sopt) optval = tp->t_persist_timeout / TCP_RETRANSHZ; break; case TCP_RXT_CONNDROPTIME: - optval = tp->rxt_conndroptime / TCP_RETRANSHZ; + optval = tp->t_rxt_conndroptime / TCP_RETRANSHZ; break; case TCP_RXT_FINDROP: optval = tp->t_flagsext & TF_RXTFINDROP; @@ -1587,6 +2074,20 @@ tcp_ctloutput(so, sopt) optval = 0; } break; + + case TCP_ENABLE_MSGS: + if (so->so_flags & SOF_ENABLE_MSGS) { + optval = 1; + } else { + optval = 0; + } + break; + case TCP_SENDMOREACKS: + if (tp->t_flagsext & TF_NOSTRETCHACK) + optval = 1; + else + optval = 0; + break; case TCP_PEER_PID: { pid_t pid; error = tcp_lookup_peer_pid_locked(so, &pid); @@ -1594,6 +2095,16 @@ tcp_ctloutput(so, sopt) error = sooptcopyout(sopt, &pid, sizeof(pid)); goto done; } + case TCP_ADAPTIVE_READ_TIMEOUT: + optval = tp->t_adaptive_rtimo; + break; + case TCP_ADAPTIVE_WRITE_TIMEOUT: + optval = tp->t_adaptive_wtimo; + break; + case SO_TRAFFIC_MGT_BACKGROUND: + optval = (so->so_traffic_mgt_flags & + TRAFFIC_MGT_SO_BACKGROUND) ? 1 : 0; + break; default: error = ENOPROTOOPT; break; @@ -1674,7 +2185,7 @@ tcp_attach(so, p) struct inpcb *inp; int error; #if INET6 - int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0; + int isipv6 = SOCK_CHECK_DOM(so, PF_INET6) != 0; #endif error = in_pcballoc(so, &tcbinfo, p); @@ -1702,7 +2213,7 @@ tcp_attach(so, p) #endif /* INET6 */ inp->inp_vflag |= INP_IPV4; tp = tcp_newtcpcb(inp); - if (tp == 0) { + if (tp == NULL) { int nofd = so->so_state & SS_NOFDREF; /* XXX */ so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ @@ -1744,6 +2255,12 @@ tcp_disconnect(tp) soisdisconnecting(so); sbflush(&so->so_rcv); tp = tcp_usrclosed(tp); +#if MPTCP + /* A reset has been sent but socket exists, do not send FIN */ + if ((so->so_flags & SOF_MP_SUBFLOW) && + (tp) && (tp->t_mpflags & TMPF_RESET)) + return (tp); +#endif if (tp) (void) tcp_output(tp); } @@ -1769,7 +2286,6 @@ tcp_usrclosed(tp) case TCPS_CLOSED: case TCPS_LISTEN: - tp->t_state = TCPS_CLOSED; tp = tcp_close(tp); break; @@ -1779,10 +2295,18 @@ tcp_usrclosed(tp) break; case TCPS_ESTABLISHED: + DTRACE_TCP4(state__change, void, NULL, + struct inpcb *, tp->t_inpcb, + struct tcpcb *, tp, + int32_t, TCPS_FIN_WAIT_1); tp->t_state = TCPS_FIN_WAIT_1; break; case TCPS_CLOSE_WAIT: + DTRACE_TCP4(state__change, void, NULL, + struct inpcb *, tp->t_inpcb, + struct tcpcb *, tp, + int32_t, TCPS_LAST_ACK); tp->t_state = TCPS_LAST_ACK; break; } @@ -1790,7 +2314,8 @@ tcp_usrclosed(tp) soisdisconnected(tp->t_inpcb->inp_socket); /* To prevent the connection hanging in FIN_WAIT_2 forever. */ if (tp->t_state == TCPS_FIN_WAIT_2) - tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp, tcp_maxidle); + tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp, + TCP_CONN_MAXIDLE(tp)); } return (tp); } @@ -1798,13 +2323,57 @@ tcp_usrclosed(tp) void tcp_in_cksum_stats(u_int32_t len) { - tcps_in_sw_cksum++; - tcps_in_sw_cksum_bytes += len; + tcpstat.tcps_rcv_swcsum++; + tcpstat.tcps_rcv_swcsum_bytes += len; } void tcp_out_cksum_stats(u_int32_t len) { - tcps_out_sw_cksum++; - tcps_out_sw_cksum_bytes += len; + tcpstat.tcps_snd_swcsum++; + tcpstat.tcps_snd_swcsum_bytes += len; +} + +#if INET6 +void +tcp_in6_cksum_stats(u_int32_t len) +{ + tcpstat.tcps_rcv6_swcsum++; + tcpstat.tcps_rcv6_swcsum_bytes += len; } + +void +tcp_out6_cksum_stats(u_int32_t len) +{ + tcpstat.tcps_snd6_swcsum++; + tcpstat.tcps_snd6_swcsum_bytes += len; +} + +/* + * When messages are enabled on a TCP socket, the message priority + * is sent as a control message. This function will extract it. + */ +int +tcp_get_msg_priority(struct mbuf *control, uint32_t *msgpri) +{ + struct cmsghdr *cm; + if (control == NULL) + return(EINVAL); + + for (cm = M_FIRST_CMSGHDR(control); cm; + cm = M_NXT_CMSGHDR(control, cm)) { + if (cm->cmsg_len < sizeof(struct cmsghdr) || + cm->cmsg_len > control->m_len) { + return (EINVAL); + } + if (cm->cmsg_level == SOL_SOCKET && + cm->cmsg_type == SCM_MSG_PRIORITY) { + *msgpri = *(unsigned int *)(void *)CMSG_DATA(cm); + break; + } + } + + VERIFY(*msgpri >= MSG_PRI_MIN && *msgpri <= MSG_PRI_MAX); + return (0); +} +#endif /* INET6 */ diff --git a/bsd/netinet/tcp_var.h b/bsd/netinet/tcp_var.h index efffc7bad..f92c3e618 100644 --- a/bsd/netinet/tcp_var.h +++ b/bsd/netinet/tcp_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -84,7 +84,7 @@ struct name { \ /* Minimum time quantum within which the timers are coalesced */ #define TCP_FASTTIMER_QUANTUM TCP_TIMERHZ /* fast mode, once every 100ms */ -#define TCP_SLOWTIMER_QUANTUM TCP_RETRANSHZ / PR_SLOWHZ /* slow mode, once every 500ms */ +#define TCP_SLOWTIMER_QUANTUM (TCP_RETRANSHZ/2) /* slow mode, once every 500ms */ #define TCP_RETRANSHZ_TO_USEC 1000 @@ -130,6 +130,23 @@ struct name { \ */ #define RESET_IAJ_SIZE_THRESH 20 +/* + * Adaptive timeout is a read/write timeout specified by the application to + * get a socket event when the transport layer detects a stall in data + * transfer. The value specified is the number of probes that can be sent + * to the peer before generating an event. Since it is not specified as + * a time value, the timeout will adjust based on the RTT seen on the link. + * The timeout will start only when there is an indication that the read/write + * operation is not making progress. + * + * If a write operation stalls, the probe will be retransmission of data. + * If a read operation stalls, the probe will be a keep-alive packet. + * + * The maximum value of adaptive timeout is set to 10 which will allow + * transmission of enough number of probes to the peer. + */ +#define TCP_ADAPTIVE_TIMEOUT_MAX 10 + /* * Kernel variables for tcp. */ @@ -181,6 +198,13 @@ struct bwmeas { uint32_t bw_sndbw; /* Measured send bw */ }; +/* MPTCP Data sequence map entry */ +struct mpt_dsn_map { + uint64_t mpt_dsn; /* data seq num recvd */ + uint32_t mpt_sseq; /* relative subflow # */ + uint16_t mpt_len; /* length of mapping */ + uint16_t mpt_csum; /* checksum value if on */ +}; #define tcp6cb tcpcb /* for KAME src sync over BSD*'s */ /* @@ -195,7 +219,7 @@ struct tcpcb { struct inpcb *t_inpcb; /* back pointer to internet pcb */ int t_state; /* state of this connection */ - u_int t_flags; + uint32_t t_flags; #define TF_ACKNOW 0x00001 /* ack peer immediately */ #define TF_DELACK 0x00002 /* ack, but try to delay it */ #define TF_NODELAY 0x00004 /* don't delay packets to coalesce */ @@ -275,8 +299,8 @@ struct tcpcb { u_int t_rttbest; /* best rtt we've seen */ u_int t_rttcur; /* most recent value of rtt */ u_int32_t t_rttupdated; /* number of times rtt sampled */ - u_int32_t rxt_conndroptime; /* retxmt conn gets dropped after this time, when set */ - u_int32_t rxt_start; /* time at a connection starts retransmitting */ + u_int32_t t_rxt_conndroptime; /* retxmt conn gets dropped after this time, when set */ + u_int32_t t_rxtstart; /* time at which retransmission started */ u_int32_t max_sndwnd; /* largest window peer has offered */ int t_softerror; /* possible error not yet reported */ @@ -286,11 +310,13 @@ struct tcpcb { #define TCPOOB_HAVEDATA 0x01 #define TCPOOB_HADDATA 0x02 /* RFC 1323 variables */ - u_char snd_scale; /* window scaling for send window */ - u_char rcv_scale; /* window scaling for recv window */ - u_char request_r_scale; /* pending window scaling */ - u_char requested_s_scale; - u_int16_t tcp_cc_index; /* index of congestion control algorithm */ + u_int8_t snd_scale; /* window scaling for send window */ + u_int8_t rcv_scale; /* window scaling for recv window */ + u_int8_t request_r_scale; /* pending window scaling */ + u_int8_t requested_s_scale; + u_int8_t tcp_cc_index; /* index of congestion control algorithm */ + u_int8_t t_adaptive_rtimo; /* Read timeout used as a multiple of RTT */ + u_int8_t t_adaptive_wtimo; /* Write timeout used as a multiple of RTT */ u_int32_t ts_recent; /* timestamp echo data */ u_int32_t ts_recent_age; /* when last updated */ @@ -300,24 +326,15 @@ struct tcpcb { tcp_cc cc_recv; /* receive connection count */ /* RFC 3465 variables */ u_int32_t t_bytes_acked; /* ABC "bytes_acked" parameter */ -/* experimental */ - u_int32_t snd_cwnd_prev; /* cwnd prior to retransmit */ - u_int32_t snd_ssthresh_prev; /* ssthresh prior to retransmit */ - u_int32_t t_badrxtwin; /* window for retransmit recovery */ - int t_keepidle; /* keepalive idle timer (override global if > 0) */ int t_lastchain; /* amount of packets chained last time around */ - int t_unacksegs; /* received but unacked segments: used for delaying acks */ + u_int16_t t_unacksegs; /* received but unacked segments for delaying acks */ + u_int8_t t_rexmtthresh; /* duplicate ack threshold for entering fast recovery */ + u_int8_t t_rtimo_probes; /* number of adaptive rtimo probes sent */ u_int32_t t_persist_timeout; /* ZWP persistence limit as set by PERSIST_TIMEOUT */ u_int32_t t_persist_stop; /* persistence limit deadline if triggered by ZWP */ u_int32_t t_notsent_lowat; /* Low water for not sent data */ -/* 3529618 MSS overload prevention */ - u_int32_t rcv_reset; - u_int32_t rcv_pps; - u_int32_t rcv_byps; - u_int32_t rcv_maxbyps; - /* Receiver state for stretch-ack algorithm */ u_int32_t rcv_unackwin; /* to measure win for stretching acks */ u_int32_t rcv_by_unackwin; /* bytes seen during the last ack-stretching win */ @@ -330,13 +347,20 @@ struct tcpcb { #define TE_SENDECE 0x10 /* Indicate that the next packet should have the TCP ECE flag set */ #define TE_ECN_ON (TE_SETUPSENT | TE_SETUPRECEIVED) /* Indicate ECN was successfully negotiated on a connection) */ +/* state for bad retransmit recovery */ + u_int32_t snd_cwnd_prev; /* cwnd prior to retransmit */ + u_int32_t snd_ssthresh_prev; /* ssthresh prior to retransmit */ tcp_seq snd_recover_prev; /* snd_recover prior to retransmit */ -/* anti DoS counters */ - u_int32_t rcv_second; /* start of interval second */ + int t_srtt_prev; /* srtt prior to retransmit */ + int t_rttvar_prev; /* rttvar prior to retransmit */ + u_int32_t t_badrexmt_time; /* bad rexmt detection time */ + +/* state to limit the number of early retransmits */ + u_int32_t t_early_rexmt_win; /* window for limiting early retransmits */ + u_int16_t t_early_rexmt_count; /* number of early rexmts seen in past window */ /* SACK related state */ - int sack_enable; /* enable SACK for this connection */ - int snd_numholes; /* number of holes seen by sender */ + int16_t snd_numholes; /* number of holes seen by sender */ TAILQ_HEAD(sackhole_head, sackhole) snd_holes; /* SACK scoreboard (sorted) */ tcp_seq snd_fack; /* last seq number(+1) sack'd by rcv'r*/ @@ -350,7 +374,12 @@ struct tcpcb { struct mbuf *t_pktlist_head; /* First packet in transmit chain */ struct mbuf *t_pktlist_tail; /* Last packet in transmit chain */ - int t_keepinit; /* connection timeout, i.e. idle time in SYN_SENT or SYN_RECV state */ + u_int32_t t_keepidle; /* keepalive idle timer (override global if > 0) */ + u_int32_t t_keepinit; /* connection timeout, i.e. idle time + in SYN_SENT or SYN_RECV state */ + u_int32_t t_keepintvl; /* interval between keepalives */ + u_int32_t t_keepcnt; /* number of keepalives before close */ + u_int32_t tso_max_segment_size; /* TCP Segment Offloading maximum segment unit for NIC */ u_int t_pmtud_saved_maxopd; /* MSS saved before performing PMTU-D BlackHole detection */ @@ -359,7 +388,9 @@ struct tcpcb { u_int32_t rxduplicatebytes; u_int32_t rxoutoforderbytes; u_int32_t txretransmitbytes; - u_int32_t unused_pad_to_8; + u_int8_t synrxtshift; + u_int8_t unused; + u_int16_t unused_pad_to_8; } t_stat; /* Background congestion related state */ @@ -372,6 +403,12 @@ struct tcpcb { #define TF_BWMEAS_INPROGRESS 0x4 /* Indicate BW meas is happening */ #define TF_MEASURESNDBW 0x8 /* Measure send bw on this connection */ #define TF_LRO_OFFLOADED 0x10 /* Connection LRO offloaded */ +#define TF_SACK_ENABLE 0x20 /* SACK is enabled */ +#define TF_RECOMPUTE_RTT 0x40 /* recompute RTT after spurious retransmit */ +#define TF_DETECT_READSTALL 0x80 /* Used to detect a stall during read operation */ +#define TF_RECV_THROTTLE 0x100 /* Input throttling active */ +#define TF_NOSTRETCHACK 0x200 /* ack every other packet */ +#define TF_STREAMEOW 0x400 /* Last packet was small indicating end of write */ #if TRAFFIC_MGT /* Inter-arrival jitter related state */ uint32_t iaj_rcv_ts; /* tcp clock when the first packet was received */ @@ -386,9 +423,43 @@ struct tcpcb { struct bwmeas *t_bwmeas; /* State for bandwidth measurement */ uint32_t t_lropktlen; /* Bytes in a LRO frame */ tcp_seq t_idleat; /* rcv_nxt at idle time */ + TAILQ_ENTRY(tcpcb) t_twentry; /* link for time wait queue */ +#if MPTCP + u_int32_t t_mpflags; /* flags for multipath TCP */ + +#define TMPF_PREESTABLISHED 0x00000001 /* conn in pre-established state */ +#define TMPF_SENT_KEYS 0x00000002 /* indicates that keys were sent */ +#define TMPF_MPTCP_TRUE 0x00000004 /* negotiated MPTCP successfully */ +#define TMPF_MPTCP_RCVD_KEY 0x00000008 /* state for 3-way handshake */ +#define TMPF_SND_MPPRIO 0x00000010 /* send priority of subflow */ +#define TMPF_SND_REM_ADDR 0x00000020 /* initiate address removal */ +#define TMPF_UNUSED 0x00000040 /* address addition acked by peer */ +#define TMPF_JOINED_FLOW 0x00000080 /* Indicates additional flow */ +#define TMPF_BACKUP_PATH 0x00000100 /* Indicates backup path */ +#define TMPF_MPTCP_ACKNOW 0x00000200 /* Send Data ACK */ +#define TMPF_SEND_DSN 0x00000400 /* Send DSN mapping */ +#define TMPF_SEND_DFIN 0x00000800 /* Send Data FIN */ +#define TMPF_RECV_DFIN 0x00001000 /* Recv Data FIN */ +#define TMPF_SENT_JOIN 0x00002000 /* Sent Join */ +#define TMPF_RECVD_JOIN 0x00004000 /* Received Join */ +#define TMPF_RESET 0x00008000 /* Send RST */ +#define TMPF_TCP_FALLBACK 0x00010000 /* Fallback to TCP */ +#define TMPF_FASTCLOSE 0x00020000 /* Send Fastclose option */ +#define TMPF_EMBED_DSN 0x00040000 /* tp has DSN mapping */ +#define TMPF_MPTCP_READY 0x00080000 /* Can send DSS options on data */ +#define TMPF_INFIN_SENT 0x00100000 /* Sent infinite mapping */ +#define TMPF_SND_MPFAIL 0x00200000 /* Received mapping csum failure */ + void *t_mptcb; /* pointer to MPTCP TCB */ + tcp_seq t_mpuna; /* unacknowledged sequence */ + struct mpt_dsn_map t_rcv_map; /* Receive mapping list */ + u_int8_t t_local_aid; /* Addr Id for authentication */ + u_int8_t t_rem_aid; /* Addr ID of another subflow */ + u_int8_t t_mprxtshift; /* join retransmission */ +#endif /* MPTCP */ }; #define IN_FASTRECOVERY(tp) (tp->t_flags & TF_FASTRECOVERY) +#define SACK_ENABLED(tp) (tp->t_flagsext & TF_SACK_ENABLE) /* * If the connection is in a throttled state due to advisory feedback from @@ -403,7 +474,35 @@ struct tcpcb { inp_reset_fc_state((_tp_)->t_inpcb); \ } while(0) -#define EXIT_FASTRECOVERY(tp) tp->t_flags &= ~TF_FASTRECOVERY +#define EXIT_FASTRECOVERY(_tp_) do { \ + (_tp_)->t_flags &= ~TF_FASTRECOVERY; \ + (_tp_)->t_dupacks = 0; \ + (_tp_)->t_rexmtthresh = tcprexmtthresh; \ + (_tp_)->t_bytes_acked = 0; \ +} while(0) + +/* + * When the number of duplicate acks received is less than + * the retransmit threshold, use Limited Transmit algorithm + */ +extern int tcprexmtthresh; +#define ALLOW_LIMITED_TRANSMIT(_tp_) \ + ((_tp_)->t_dupacks > 0 && \ + (_tp_)->t_dupacks < (_tp_)->t_rexmtthresh) + +/* + * This condition is true is timestamp option is supported + * on a connection. + */ +#define TSTMP_SUPPORTED(_tp_) \ + (((_tp_)->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP)) == \ + (TF_REQ_TSTMP|TF_RCVD_TSTMP)) + +/* + * Gives number of bytes acked by this ack + */ +#define BYTES_ACKED(_th_, _tp_) \ + ((_th_)->th_ack - (_tp_)->snd_una) #if CONFIG_DTRACE enum tcp_cc_event { @@ -421,7 +520,9 @@ enum tcp_cc_event { TCP_CC_OUTPUT_ERROR, TCP_CC_CHANGE_ALGO, TCP_CC_FLOW_CONTROL, - TCP_CC_SUSPEND + TCP_CC_SUSPEND, + TCP_CC_LIMITED_TRANSMIT, + TCP_CC_EARLY_RETRANSMIT }; #endif /* CONFIG_DTRACE */ @@ -439,6 +540,7 @@ struct tcpopt { #define TOF_SIGNATURE 0x0040 /* signature option present */ #define TOF_SIGLEN 0x0080 /* signature length valid (RFC2385) */ #define TOF_SACK 0x0100 /* Peer sent SACK option */ +#define TOF_MPTCP 0x0200 /* MPTCP options to be dropped */ u_int32_t to_tsval; u_int32_t to_tsecr; u_int16_t to_mss; @@ -729,6 +831,42 @@ struct tcpstat { u_int32_t tcps_lro_twopack; /* 2 packets coalesced */ u_int32_t tcps_lro_multpack; /* 3 or 4 pkts coalesced */ u_int32_t tcps_lro_largepack; /* 5 or more pkts coalesced */ + + u_int32_t tcps_limited_txt; /* Limited transmit used */ + u_int32_t tcps_early_rexmt; /* Early retransmit used */ + u_int32_t tcps_sack_ackadv; /* Cumulative ack advanced along with sack */ + + /* Checksum related stats */ + u_int32_t tcps_rcv_swcsum; /* tcp swcksum (inbound), packets */ + u_int32_t tcps_rcv_swcsum_bytes; /* tcp swcksum (inbound), bytes */ + u_int32_t tcps_rcv6_swcsum; /* tcp6 swcksum (inbound), packets */ + u_int32_t tcps_rcv6_swcsum_bytes; /* tcp6 swcksum (inbound), bytes */ + u_int32_t tcps_snd_swcsum; /* tcp swcksum (outbound), packets */ + u_int32_t tcps_snd_swcsum_bytes; /* tcp swcksum (outbound), bytes */ + u_int32_t tcps_snd6_swcsum; /* tcp6 swcksum (outbound), packets */ + u_int32_t tcps_snd6_swcsum_bytes; /* tcp6 swcksum (outbound), bytes */ + u_int32_t tcps_msg_unopkts; /* unordered packet on TCP msg stream */ + u_int32_t tcps_msg_unoappendfail; /* failed to append unordered pkt */ + u_int32_t tcps_msg_sndwaithipri; /* send is waiting for high priority data */ + + /* MPTCP Related stats */ + u_int32_t tcps_invalid_mpcap; /* Invalid MPTCP capable opts */ + u_int32_t tcps_invalid_joins; /* Invalid MPTCP joins */ + u_int32_t tcps_mpcap_fallback; /* TCP fallback in primary */ + u_int32_t tcps_join_fallback; /* No MPTCP in secondary */ + u_int32_t tcps_estab_fallback; /* DSS option dropped */ + u_int32_t tcps_invalid_opt; /* Catchall error stat */ + u_int32_t tcps_mp_outofwin; /* Packet lies outside the + shared recv window */ + u_int32_t tcps_mp_reducedwin; /* Reduced subflow window */ + u_int32_t tcps_mp_badcsum; /* Bad DSS csum */ + u_int32_t tcps_mp_oodata; /* Out of order data */ + u_int32_t tcps_mp_switches; /* number of subflow switch */ + u_int32_t tcps_mp_rcvtotal; /* number of rcvd packets */ + u_int32_t tcps_mp_rcvbytes; /* number of bytes received */ + u_int32_t tcps_mp_sndpacks; /* number of data packs sent */ + u_int32_t tcps_mp_sndbytes; /* number of bytes sent */ + u_int32_t tcps_join_rxmts; /* join ack retransmits */ }; struct tcpstat_local { @@ -772,7 +910,6 @@ struct xtcpcb { u_quad_t xt_alignment_hack; }; -#if !CONFIG_EMBEDDED struct xtcpcb64 { u_int32_t xt_len; @@ -853,7 +990,6 @@ struct xtcpcb64 { u_quad_t xt_alignment_hack; }; -#endif /* !CONFIG_EMBEDDED */ #ifdef PRIVATE @@ -931,7 +1067,6 @@ struct xtcpcb_n { /* experimental */ u_int32_t snd_cwnd_prev; /* cwnd prior to retransmit */ u_int32_t snd_ssthresh_prev; /* ssthresh prior to retransmit */ - u_int32_t t_badrxtwin; /* window for retransmit recovery */ }; #endif /* PRIVATE */ @@ -956,7 +1091,9 @@ struct xtcpcb_n { #define TCPCTL_V6MSSDFLT 13 /* MSS default for IPv6 */ #define TCPCTL_MAXID 14 -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE +#include + #define TCP_PKTLIST_CLEAR(tp) { \ (tp)->t_pktlist_head = (tp)->t_pktlist_tail = NULL; \ (tp)->t_lastchain = (tp)->t_pktlist_sentlen = 0; \ @@ -983,15 +1120,22 @@ struct xtcpcb_n { SYSCTL_DECL(_net_inet_tcp); #endif /* SYSCTL_DECL */ +/* + * Flags for TCP's connectx(2) user-protocol request routine. + */ +#if MPTCP +#define TCP_CONNREQF_MPTCP 0x1 /* called internally by MPTCP */ +#endif /* MPTCP */ + extern struct inpcbhead tcb; /* head of queue of active tcpcb's */ extern struct inpcbinfo tcbinfo; extern struct tcpstat tcpstat; /* tcp statistics */ extern int tcp_mssdflt; /* XXX */ extern int tcp_minmss; -extern int tcp_minmssoverload; extern int ss_fltsz; extern int ss_fltsz_local; extern int tcp_do_rfc3390; /* Calculate ss_fltsz according to RFC 3390 */ +extern int target_qdelay; #ifdef __APPLE__ extern u_int32_t tcp_now; /* for RFC 1323 timestamps */ extern struct timeval tcp_uptime; @@ -1006,6 +1150,9 @@ extern int tcp_do_sack; /* SACK enabled/disabled */ extern int tcp_obey_ifef_nowindowscale; #endif +struct protosw; +struct domain; + void tcp_canceltimers(struct tcpcb *); struct tcpcb * tcp_close(struct tcpcb *); @@ -1017,7 +1164,7 @@ void tcp_drain(void); void tcp_getrt_rtt(struct tcpcb *tp, struct rtentry *rt); struct rmxp_tao * tcp_gettaocache(struct inpcb *); -void tcp_init(void) __attribute__((section("__TEXT, initcode"))); +void tcp_init(struct protosw *, struct domain *); void tcp_input(struct mbuf *, int); void tcp_mss(struct tcpcb *, int, unsigned int); int tcp_mssopt(struct tcpcb *); @@ -1031,7 +1178,7 @@ void tcp_respond(struct tcpcb *, void *, unsigned int, unsigned int); struct rtentry *tcp_rtlookup(struct inpcb *, unsigned int); void tcp_setpersist(struct tcpcb *); -void tcp_slowtimo(void); +void tcp_gc(struct inpcbinfo *); void tcp_check_timer_state(struct tcpcb *tp); void tcp_run_timerlist(void *arg1, void *arg2); @@ -1042,7 +1189,7 @@ struct tcpcb * tcp_timers(struct tcpcb *, int); void tcp_trace(int, int, struct tcpcb *, void *, struct tcphdr *, int); -void tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq); +void tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq, u_int32_t *); void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend); void tcp_clean_sackreport(struct tcpcb *tp); void tcp_sack_adjust(struct tcpcb *tp); @@ -1052,10 +1199,10 @@ void tcp_free_sackholes(struct tcpcb *tp); int32_t tcp_sbspace(struct tcpcb *tp); void tcp_set_tso(struct tcpcb *tp, struct ifnet *ifp); void tcp_reset_stretch_ack(struct tcpcb *tp); -void tcp_get_ports_used(unsigned int , uint8_t *); +extern void tcp_get_ports_used(u_int32_t, int, u_int32_t, bitstr_t *); uint32_t tcp_count_opportunistic(unsigned int ifindex, u_int32_t flags); +uint32_t tcp_find_anypcb_byaddr(struct ifaddr *ifa); void tcp_set_max_rwinscale(struct tcpcb *tp, struct socket *so); -u_int8_t tcp_cansbgrow(struct sockbuf *sb); struct bwmeas* tcp_bwmeas_alloc(struct tcpcb *tp); void tcp_bwmeas_free(struct tcpcb *tp); @@ -1075,6 +1222,9 @@ int tcp_lock (struct socket *, int, void *); int tcp_unlock (struct socket *, int, void *); void calculate_tcp_clock(void); +extern void mptcp_insert_rmap(struct tcpcb *, struct mbuf *); +extern void tcp_keepalive_reset(struct tcpcb *); + #ifdef _KERN_LOCKS_H_ lck_mtx_t * tcp_getlock (struct socket *, int); #else @@ -1087,6 +1237,14 @@ extern u_int32_t tcp_sendspace; extern u_int32_t tcp_recvspace; tcp_seq tcp_new_isn(struct tcpcb *); -#endif /* KERNEL_RPIVATE */ +extern int tcp_input_checksum(int, struct mbuf *, struct tcphdr *, int, int); +extern void tcp_getconninfo(struct socket *, struct conninfo_tcp *); +#if MPTCP +extern uint16_t mptcp_input_csum(struct tcpcb *, struct mbuf *, int); +extern void mptcp_output_csum(struct tcpcb *, struct mbuf *, int32_t, unsigned, + u_int64_t, u_int32_t *); +extern int mptcp_adj_mss(struct tcpcb *, boolean_t); +#endif +#endif /* BSD_KERNEL_RPIVATE */ #endif /* _NETINET_TCP_VAR_H_ */ diff --git a/bsd/netinet/udp_usrreq.c b/bsd/netinet/udp_usrreq.c index e5462393d..1b2bd8b1f 100644 --- a/bsd/netinet/udp_usrreq.c +++ b/bsd/netinet/udp_usrreq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -76,24 +76,27 @@ #include #include +#include #include #include #include +#include #include #include #include #if INET6 #include -#endif +#endif /* INET6 */ #include #include #include #if INET6 #include #include -#endif +#include +#endif /* INET6 */ #include #include #include @@ -104,91 +107,80 @@ #include #include extern int ipsec_bypass; -#endif /*IPSEC*/ +extern int esp_udp_encap_port; +#endif /* IPSEC */ - -#define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETUDP, 0) -#define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETUDP, 2) -#define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETUDP, 1) -#define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETUDP, 3) -#define DBG_FNC_UDP_INPUT NETDBG_CODE(DBG_NETUDP, (5 << 8)) -#define DBG_FNC_UDP_OUTPUT NETDBG_CODE(DBG_NETUDP, (6 << 8) | 1) +#define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETUDP, 0) +#define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETUDP, 2) +#define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETUDP, 1) +#define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETUDP, 3) +#define DBG_FNC_UDP_INPUT NETDBG_CODE(DBG_NETUDP, (5 << 8)) +#define DBG_FNC_UDP_OUTPUT NETDBG_CODE(DBG_NETUDP, (6 << 8) | 1) /* * UDP protocol implementation. * Per RFC 768, August, 1980. */ #ifndef COMPAT_42 -static int udpcksum = 1; +static int udpcksum = 1; #else -static int udpcksum = 0; /* XXX */ +static int udpcksum = 0; /* XXX */ #endif -SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, CTLFLAG_RW | CTLFLAG_LOCKED, - &udpcksum, 0, ""); - -static u_int32_t udps_in_sw_cksum; -SYSCTL_UINT(_net_inet_udp, OID_AUTO, in_sw_cksum, CTLFLAG_RD | CTLFLAG_LOCKED, - &udps_in_sw_cksum, 0, - "Number of received packets checksummed in software"); - -static u_int64_t udps_in_sw_cksum_bytes; -SYSCTL_QUAD(_net_inet_udp, OID_AUTO, in_sw_cksum_bytes, CTLFLAG_RD | CTLFLAG_LOCKED, - &udps_in_sw_cksum_bytes, - "Amount of received data checksummed in software"); - -static u_int32_t udps_out_sw_cksum; -SYSCTL_UINT(_net_inet_udp, OID_AUTO, out_sw_cksum, CTLFLAG_RD | CTLFLAG_LOCKED, - &udps_out_sw_cksum, 0, - "Number of transmitted packets checksummed in software"); - -static u_int64_t udps_out_sw_cksum_bytes; -SYSCTL_QUAD(_net_inet_udp, OID_AUTO, out_sw_cksum_bytes, CTLFLAG_RD | CTLFLAG_LOCKED, - &udps_out_sw_cksum_bytes, - "Amount of transmitted data checksummed in software"); - -int log_in_vain = 0; +SYSCTL_INT(_net_inet_udp, UDPCTL_CHECKSUM, checksum, + CTLFLAG_RW | CTLFLAG_LOCKED, &udpcksum, 0, ""); + +int udp_log_in_vain = 0; SYSCTL_INT(_net_inet_udp, OID_AUTO, log_in_vain, CTLFLAG_RW | CTLFLAG_LOCKED, - &log_in_vain, 0, "Log all incoming UDP packets"); + &udp_log_in_vain, 0, "Log all incoming UDP packets"); -static int blackhole = 0; +static int blackhole = 0; SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW | CTLFLAG_LOCKED, - &blackhole, 0, "Do not send port unreachables for refused connects"); + &blackhole, 0, "Do not send port unreachables for refused connects"); -struct inpcbhead udb; /* from udp_var.h */ +struct inpcbhead udb; /* from udp_var.h */ #define udb6 udb /* for KAME src sync over BSD*'s */ -struct inpcbinfo udbinfo; +struct inpcbinfo udbinfo; #ifndef UDBHASHSIZE #define UDBHASHSIZE 16 #endif -extern int esp_udp_encap_port; - -extern void ipfwsyslog( int level, const char *format,...); - -extern int fw_verbose; -static int udp_gc_done = FALSE; /* Garbage collection performed last slowtimo */ +/* Garbage collection performed during most recent udp_gc() run */ +static boolean_t udp_gc_done = FALSE; #if IPFIREWALL -#define log_in_vain_log( a ) { \ - if ( (log_in_vain == 3 ) && (fw_verbose == 2)) { /* Apple logging, log to ipfw.log */ \ - ipfwsyslog a ; \ - } \ - else log a ; \ +extern int fw_verbose; +extern void ipfwsyslog( int level, const char *format,...); +extern void ipfw_stealth_stats_incr_udp(void); + +/* Apple logging, log to ipfw.log */ +#define log_in_vain_log(a) { \ + if ((udp_log_in_vain == 3) && (fw_verbose == 2)) { \ + ipfwsyslog a; \ + } else if ((udp_log_in_vain == 4) && (fw_verbose == 2)) { \ + ipfw_stealth_stats_incr_udp(); \ + } else { \ + log a; \ + } \ } -#else +#else /* !IPFIREWALL */ #define log_in_vain_log( a ) { log a; } -#endif +#endif /* !IPFIREWALL */ +static int udp_getstat SYSCTL_HANDLER_ARGS; struct udpstat udpstat; /* from udp_var.h */ -SYSCTL_STRUCT(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED, - &udpstat, udpstat, "UDP statistics (struct udpstat, netinet/udp_var.h)"); -SYSCTL_INT(_net_inet_udp, OID_AUTO, pcbcount, CTLFLAG_RD | CTLFLAG_LOCKED, - &udbinfo.ipi_count, 0, "Number of active PCBs"); +SYSCTL_PROC(_net_inet_udp, UDPCTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, udp_getstat, "S,udpstat", + "UDP statistics (struct udpstat, netinet/udp_var.h)"); + +SYSCTL_INT(_net_inet_udp, OID_AUTO, pcbcount, + CTLFLAG_RD | CTLFLAG_LOCKED, &udbinfo.ipi_count, 0, + "Number of active PCBs"); __private_extern__ int udp_use_randomport = 1; -SYSCTL_INT(_net_inet_udp, OID_AUTO, randomize_ports, CTLFLAG_RW | CTLFLAG_LOCKED, - &udp_use_randomport, 0, "Randomize UDP port numbers"); +SYSCTL_INT(_net_inet_udp, OID_AUTO, randomize_ports, + CTLFLAG_RW | CTLFLAG_LOCKED, &udp_use_randomport, 0, + "Randomize UDP port numbers"); #if INET6 struct udp_in6 { @@ -199,87 +191,124 @@ struct udp_ip6 { struct ip6_hdr uip6_ip6; u_char uip6_init_done : 1; }; + +static int udp_abort(struct socket *); +static int udp_attach(struct socket *, int, struct proc *); +static int udp_bind(struct socket *, struct sockaddr *, struct proc *); +static int udp_connect(struct socket *, struct sockaddr *, struct proc *); +static int udp_connectx(struct socket *, struct sockaddr_list **, + struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *, + uint32_t, void *, uint32_t); +static int udp_detach(struct socket *); +static int udp_disconnect(struct socket *); +static int udp_disconnectx(struct socket *, associd_t, connid_t); +static int udp_send(struct socket *, int, struct mbuf *, struct sockaddr *, + struct mbuf *, struct proc *); +static void udp_append(struct inpcb *, struct ip *, struct mbuf *, int, + struct sockaddr_in *, struct udp_in6 *, struct udp_ip6 *, struct ifnet *); +#else /* !INET6 */ +static void udp_append(struct inpcb *, struct ip *, struct mbuf *, int, + struct sockaddr_in *, struct ifnet *); +#endif /* !INET6 */ +static int udp_input_checksum(struct mbuf *, struct udphdr *, int, int); +static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *, + struct mbuf *, struct proc *); static void ip_2_ip6_hdr(struct ip6_hdr *ip6, struct ip *ip); -static void udp_append(struct inpcb *last, struct ip *ip, - struct mbuf *n, int off, struct sockaddr_in *pudp_in, - struct udp_in6 *pudp_in6, struct udp_ip6 *pudp_ip6); -#else -static void udp_append(struct inpcb *last, struct ip *ip, - struct mbuf *n, int off, struct sockaddr_in *pudp_in); -#endif +static void udp_gc(struct inpcbinfo *); -static int udp_detach(struct socket *so); -static int udp_output(struct inpcb *, struct mbuf *, struct sockaddr *, - struct mbuf *, struct proc *); -extern int ChkAddressOK( __uint32_t dstaddr, __uint32_t srcaddr ); +struct pr_usrreqs udp_usrreqs = { + .pru_abort = udp_abort, + .pru_attach = udp_attach, + .pru_bind = udp_bind, + .pru_connect = udp_connect, + .pru_connectx = udp_connectx, + .pru_control = in_control, + .pru_detach = udp_detach, + .pru_disconnect = udp_disconnect, + .pru_disconnectx = udp_disconnectx, + .pru_peeraddr = in_getpeeraddr, + .pru_send = udp_send, + .pru_shutdown = udp_shutdown, + .pru_sockaddr = in_getsockaddr, + .pru_sosend = sosend, + .pru_soreceive = soreceive, +}; void -udp_init() +udp_init(struct protosw *pp, struct domain *dp) { - vm_size_t str_size; - struct inpcbinfo *pcbinfo; - +#pragma unused(dp) + static int udp_initialized = 0; + vm_size_t str_size; + struct inpcbinfo *pcbinfo; + + VERIFY((pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED); + + if (udp_initialized) + return; + udp_initialized = 1; LIST_INIT(&udb); - udbinfo.listhead = &udb; - udbinfo.hashbase = hashinit(UDBHASHSIZE, M_PCB, &udbinfo.hashmask); - udbinfo.porthashbase = hashinit(UDBHASHSIZE, M_PCB, - &udbinfo.porthashmask); -#ifdef __APPLE__ - str_size = (vm_size_t) sizeof(struct inpcb); - udbinfo.ipi_zone = (void *) zinit(str_size, 80000*str_size, 8192, "udpcb"); - - pcbinfo = &udbinfo; + udbinfo.ipi_listhead = &udb; + udbinfo.ipi_hashbase = hashinit(UDBHASHSIZE, M_PCB, + &udbinfo.ipi_hashmask); + udbinfo.ipi_porthashbase = hashinit(UDBHASHSIZE, M_PCB, + &udbinfo.ipi_porthashmask); + str_size = (vm_size_t) sizeof (struct inpcb); + udbinfo.ipi_zone = zinit(str_size, 80000*str_size, 8192, "udpcb"); + + pcbinfo = &udbinfo; /* * allocate lock group attribute and group for udp pcb mutexes */ - pcbinfo->mtx_grp_attr = lck_grp_attr_alloc_init(); - - pcbinfo->mtx_grp = lck_grp_alloc_init("udppcb", pcbinfo->mtx_grp_attr); - - pcbinfo->mtx_attr = lck_attr_alloc_init(); + pcbinfo->ipi_lock_grp_attr = lck_grp_attr_alloc_init(); + pcbinfo->ipi_lock_grp = lck_grp_alloc_init("udppcb", + pcbinfo->ipi_lock_grp_attr); + pcbinfo->ipi_lock_attr = lck_attr_alloc_init(); + if ((pcbinfo->ipi_lock = lck_rw_alloc_init(pcbinfo->ipi_lock_grp, + pcbinfo->ipi_lock_attr)) == NULL) { + panic("%s: unable to allocate PCB lock\n", __func__); + /* NOTREACHED */ + } - if ((pcbinfo->mtx = lck_rw_alloc_init(pcbinfo->mtx_grp, pcbinfo->mtx_attr)) == NULL) - return; /* pretty much dead if this fails... */ -#else - udbinfo.ipi_zone = zinit("udpcb", sizeof(struct inpcb), maxsockets, - ZONE_INTERRUPT, 0); -#endif + udbinfo.ipi_gc = udp_gc; + in_pcbinfo_attach(&udbinfo); } void -udp_input(m, iphlen) - register struct mbuf *m; - int iphlen; +udp_input(struct mbuf *m, int iphlen) { - register struct ip *ip; - register struct udphdr *uh; - register struct inpcb *inp; - struct mbuf *opts = 0; + struct ip *ip; + struct udphdr *uh; + struct inpcb *inp; + struct mbuf *opts = NULL; int len, isbroadcast; struct ip save_ip; struct sockaddr *append_sa; struct inpcbinfo *pcbinfo = &udbinfo; - struct sockaddr_in udp_in = { - sizeof (udp_in), AF_INET, 0, { 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 } - }; + struct sockaddr_in udp_in; struct ip_moptions *imo = NULL; int foundmembership = 0, ret = 0; #if INET6 - struct udp_in6 udp_in6 = { - { sizeof (udp_in6.uin6_sin), AF_INET6, 0, 0, - IN6ADDR_ANY_INIT, 0 }, - 0 - }; + struct udp_in6 udp_in6; struct udp_ip6 udp_ip6; #endif /* INET6 */ - struct ifnet *ifp = (m->m_pkthdr.rcvif != NULL) ? m->m_pkthdr.rcvif: NULL; + struct ifnet *ifp = m->m_pkthdr.rcvif; + boolean_t cell = IFNET_IS_CELLULAR(ifp); + boolean_t wifi = (!cell && IFNET_IS_WIFI(ifp)); + + bzero(&udp_in, sizeof (udp_in)); + udp_in.sin_len = sizeof (struct sockaddr_in); + udp_in.sin_family = AF_INET; +#if INET6 + bzero(&udp_in6, sizeof (udp_in6)); + udp_in6.uin6_sin.sin6_len = sizeof (struct sockaddr_in6); + udp_in6.uin6_sin.sin6_family = AF_INET6; +#endif /* INET6 */ udpstat.udps_ipackets++; KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_START, 0,0,0,0,0); - if (m->m_pkthdr.csum_flags & CSUM_TCP_SUM16) - m->m_pkthdr.csum_flags = 0; /* invalidate hwcksum for UDP */ /* Expect 32-bit aligned data pointer on strict-align platforms */ MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); @@ -292,17 +321,19 @@ udp_input(m, iphlen) */ if (iphlen > sizeof (struct ip)) { ip_stripoptions(m, (struct mbuf *)0); - iphlen = sizeof(struct ip); + iphlen = sizeof (struct ip); } /* * Get IP and UDP header together in first mbuf. */ ip = mtod(m, struct ip *); - if (m->m_len < iphlen + sizeof(struct udphdr)) { - if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) { + if (m->m_len < iphlen + sizeof (struct udphdr)) { + m = m_pullup(m, iphlen + sizeof (struct udphdr)); + if (m == NULL) { udpstat.udps_hdrops++; - KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0,0,0,0,0); + KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, + 0,0,0,0,0); return; } ip = mtod(m, struct ip *); @@ -311,15 +342,12 @@ udp_input(m, iphlen) /* destination port of 0 is illegal, based on RFC768. */ if (uh->uh_dport == 0) { - - if (ifp->if_udp_stat != NULL) - atomic_add_64(&ifp->if_udp_stat->port0, 1); - + IF_UDP_STATINC(ifp, port0); goto bad; } KERNEL_DEBUG(DBG_LAYER_IN_BEG, uh->uh_dport, uh->uh_sport, - ip->ip_src.s_addr, ip->ip_dst.s_addr, uh->uh_ulen); + ip->ip_src.s_addr, ip->ip_dst.s_addr, uh->uh_ulen); /* * Make mbuf data length reflect UDP length. @@ -327,12 +355,9 @@ udp_input(m, iphlen) */ len = ntohs((u_short)uh->uh_ulen); if (ip->ip_len != len) { - if (len > ip->ip_len || len < sizeof(struct udphdr)) { + if (len > ip->ip_len || len < sizeof (struct udphdr)) { udpstat.udps_badlen++; - - if (ifp->if_udp_stat != NULL) - atomic_add_64(&ifp->if_udp_stat->badlength, 1); - + IF_UDP_STATINC(ifp, badlength); goto bad; } m_adj(m, len - ip->ip_len); @@ -347,50 +372,15 @@ udp_input(m, iphlen) /* * Checksum extended UDP header and data. */ - if (uh->uh_sum) { - if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { - if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) - uh->uh_sum = m->m_pkthdr.csum_data; - else - goto doudpcksum; - uh->uh_sum ^= 0xffff; - } else { - char b[9]; -doudpcksum: - bcopy(((struct ipovly *)ip)->ih_x1, b, - sizeof (((struct ipovly *)ip)->ih_x1)); - bzero(((struct ipovly *)ip)->ih_x1, - sizeof (((struct ipovly *)ip)->ih_x1)); - ((struct ipovly *)ip)->ih_len = uh->uh_ulen; - uh->uh_sum = in_cksum(m, len + sizeof (struct ip)); - bcopy(b, ((struct ipovly *)ip)->ih_x1, - sizeof (((struct ipovly *)ip)->ih_x1)); - - udp_in_cksum_stats(len); - } - if (uh->uh_sum) { - udpstat.udps_badsum++; - - if (ifp->if_udp_stat != NULL) - atomic_add_64(&ifp->if_udp_stat->badchksum, 1); - - m_freem(m); - KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0,0,0,0,0); - return; - } - } -#ifndef __APPLE__ - else - udpstat.udps_nosum++; -#endif + if (udp_input_checksum(m, uh, iphlen, len)) + goto bad; isbroadcast = in_broadcast(ip->ip_dst, ifp); if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || isbroadcast) { - int reuse_sock = 0, mcast_delivered = 0; - lck_rw_lock_shared(pcbinfo->mtx); + lck_rw_lock_shared(pcbinfo->ipi_lock); /* * Deliver a multicast or broadcast datagram to *all* sockets * for which the local and remote addresses and ports match @@ -407,7 +397,6 @@ doudpcksum: * fixing the interface. Maybe 4.5BSD will remedy this?) */ - /* * Construct sockaddr format source address. */ @@ -419,34 +408,43 @@ doudpcksum: */ #if INET6 udp_in6.uin6_init_done = udp_ip6.uip6_init_done = 0; -#endif +#endif /* INET6 */ LIST_FOREACH(inp, &udb, inp_list) { - if (inp->inp_socket == NULL) +#if IPSEC + int skipit; +#endif /* IPSEC */ + + if (inp->inp_socket == NULL) continue; - if (inp != sotoinpcb(inp->inp_socket)) - panic("udp_input: bad so back ptr inp=%p\n", inp); + if (inp != sotoinpcb(inp->inp_socket)) { + panic("%s: bad so back ptr inp=%p\n", + __func__, inp); + /* NOTREACHED */ + } #if INET6 if ((inp->inp_vflag & INP_IPV4) == 0) continue; -#endif - if (ip_restrictrecvif && ifp != NULL && - (ifp->if_eflags & IFEF_RESTRICTED_RECV) && - !(inp->inp_flags & INP_RECV_ANYIF)) +#endif /* INET6 */ + if (inp_restricted(inp, ifp)) continue; - if ((inp->inp_moptions == NULL) && - (ntohl(ip->ip_dst.s_addr) != INADDR_ALLHOSTS_GROUP) && - (isbroadcast == 0) ) + if (IFNET_IS_CELLULAR(ifp) && + (inp->inp_flags & INP_NO_IFT_CELLULAR)) continue; + if ((inp->inp_moptions == NULL) && + (ntohl(ip->ip_dst.s_addr) != + INADDR_ALLHOSTS_GROUP) && (isbroadcast == 0)) + continue; - if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) { + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == + WNT_STOPUSING) continue; - } - - udp_lock(inp->inp_socket, 1, 0); - if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { + udp_lock(inp->inp_socket, 1, 0); + + if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == + WNT_STOPUSING) { udp_unlock(inp->inp_socket, 1, 0); continue; } @@ -471,69 +469,73 @@ doudpcksum: } } - if (isbroadcast == 0 && (ntohl(ip->ip_dst.s_addr) != INADDR_ALLHOSTS_GROUP)) { - if((imo = inp->inp_moptions) == NULL) { + if (isbroadcast == 0 && (ntohl(ip->ip_dst.s_addr) != + INADDR_ALLHOSTS_GROUP)) { + struct sockaddr_in group; + int blocked; + + if ((imo = inp->inp_moptions) == NULL) { udp_unlock(inp->inp_socket, 1, 0); continue; - } else { - struct sockaddr_in group; - int blocked; - - IMO_LOCK(imo); - - bzero(&group, sizeof(struct sockaddr_in)); - group.sin_len = sizeof(struct sockaddr_in); - group.sin_family = AF_INET; - group.sin_addr = ip->ip_dst; - - blocked = imo_multi_filter(imo, ifp, - (struct sockaddr *)&group, - (struct sockaddr *)&udp_in); - if (blocked == MCAST_PASS) - foundmembership = 1; - - IMO_UNLOCK(imo); - if (!foundmembership) { - udp_unlock(inp->inp_socket, 1, 0); - continue; - } - foundmembership = 0; } + IMO_LOCK(imo); + + bzero(&group, sizeof (struct sockaddr_in)); + group.sin_len = sizeof (struct sockaddr_in); + group.sin_family = AF_INET; + group.sin_addr = ip->ip_dst; + + blocked = imo_multi_filter(imo, ifp, + (struct sockaddr *)&group, + (struct sockaddr *)&udp_in); + if (blocked == MCAST_PASS) + foundmembership = 1; + + IMO_UNLOCK(imo); + if (!foundmembership) { + udp_unlock(inp->inp_socket, 1, 0); + if (blocked == MCAST_NOTSMEMBER || + blocked == MCAST_MUTED) + udpstat.udps_filtermcast++; + continue; + } + foundmembership = 0; } - reuse_sock = inp->inp_socket->so_options& (SO_REUSEPORT|SO_REUSEADDR); - { + + reuse_sock = (inp->inp_socket->so_options & + (SO_REUSEPORT|SO_REUSEADDR)); + #if IPSEC - int skipit = 0; - /* check AH/ESP integrity. */ - if (ipsec_bypass == 0) { - if (ipsec4_in_reject_so(m, inp->inp_socket)) { - IPSEC_STAT_INCREMENT(ipsecstat.in_polvio); - /* do not inject data to pcb */ - skipit = 1; - } - } - if (skipit == 0) + skipit = 0; + /* check AH/ESP integrity. */ + if (ipsec_bypass == 0 && + ipsec4_in_reject_so(m, inp->inp_socket)) { + IPSEC_STAT_INCREMENT(ipsecstat.in_polvio); + /* do not inject data to pcb */ + skipit = 1; + } + if (skipit == 0) #endif /*IPSEC*/ - { - struct mbuf *n = NULL; - - if (reuse_sock) - n = m_copy(m, 0, M_COPYALL); -#if INET6 - udp_append(inp, ip, m, - iphlen + sizeof(struct udphdr), - &udp_in, &udp_in6, &udp_ip6); -#else - udp_append(inp, ip, m, - iphlen + sizeof(struct udphdr), - &udp_in); -#endif /* INET6 */ - mcast_delivered++; + { + struct mbuf *n = NULL; - m = n; - } - udp_unlock(inp->inp_socket, 1, 0); + if (reuse_sock) + n = m_copy(m, 0, M_COPYALL); +#if INET6 + udp_append(inp, ip, m, + iphlen + sizeof (struct udphdr), + &udp_in, &udp_in6, &udp_ip6, ifp); +#else /* !INET6 */ + udp_append(inp, ip, m, + iphlen + sizeof (struct udphdr), + &udp_in, ifp); +#endif /* !INET6 */ + mcast_delivered++; + + m = n; } + udp_unlock(inp->inp_socket, 1, 0); + /* * Don't look for additional matches if this one does * not have either the SO_REUSEPORT or SO_REUSEADDR @@ -556,7 +558,7 @@ doudpcksum: ip = mtod(m, struct ip *); uh = (struct udphdr *)(void *)((caddr_t)ip + iphlen); } - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); if (mcast_delivered == 0) { /* @@ -565,14 +567,12 @@ doudpcksum: * for a broadcast or multicast datgram.) */ udpstat.udps_noportbcast++; - - if (ifp->if_udp_stat != NULL) - atomic_add_64(&ifp->if_udp_stat->port_unreach, 1); - + IF_UDP_STATINC(ifp, port_unreach); goto bad; } - if (m != NULL) /* free the extra copy of mbuf or skipped by IPSec */ + /* free the extra copy of mbuf or skipped by IPSec */ + if (m != NULL) m_freem(m); KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0,0,0,0,0); return; @@ -587,12 +587,16 @@ doudpcksum: * or discard the NAT keep-alive. */ if (ipsec_bypass == 0 && (esp_udp_encap_port & 0xFFFF) != 0 && - uh->uh_dport == ntohs((u_short)esp_udp_encap_port)) { - int payload_len = len - sizeof(struct udphdr) > 4 ? 4 : len - sizeof(struct udphdr); - if (m->m_len < iphlen + sizeof(struct udphdr) + payload_len) { - if ((m = m_pullup(m, iphlen + sizeof(struct udphdr) + payload_len)) == 0) { + uh->uh_dport == ntohs((u_short)esp_udp_encap_port)) { + int payload_len = len - sizeof (struct udphdr) > 4 ? 4 : + len - sizeof (struct udphdr); + + if (m->m_len < iphlen + sizeof (struct udphdr) + payload_len) { + if ((m = m_pullup(m, iphlen + sizeof (struct udphdr) + + payload_len)) == NULL) { udpstat.udps_hdrops++; - KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0,0,0,0,0); + KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, + 0,0,0,0,0); return; } /* @@ -605,20 +609,23 @@ doudpcksum: uh = (struct udphdr *)(void *)((caddr_t)ip + iphlen); } /* Check for NAT keepalive packet */ - if (payload_len == 1 && *(u_int8_t*)((caddr_t)uh + sizeof(struct udphdr)) == 0xFF) { + if (payload_len == 1 && *(u_int8_t*) + ((caddr_t)uh + sizeof (struct udphdr)) == 0xFF) { m_freem(m); - KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0,0,0,0,0); + KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, + 0,0,0,0,0); return; - } - else if (payload_len == 4 && *(u_int32_t*)(void *)((caddr_t)uh + sizeof(struct udphdr)) != 0) { + } else if (payload_len == 4 && *(u_int32_t*)(void *) + ((caddr_t)uh + sizeof (struct udphdr)) != 0) { /* UDP encapsulated IPSec packet to pass through NAT */ - KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0,0,0,0,0); + KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, + 0,0,0,0,0); /* preserve the udp header */ - esp4_input(m, iphlen + sizeof(struct udphdr)); + esp4_input(m, iphlen + sizeof (struct udphdr)); return; } } -#endif +#endif /* IPSEC */ /* * Locate pcb for datagram. @@ -626,30 +633,30 @@ doudpcksum: inp = in_pcblookup_hash(&udbinfo, ip->ip_src, uh->uh_sport, ip->ip_dst, uh->uh_dport, 1, ifp); if (inp == NULL) { - - if (ifp->if_udp_stat != NULL) - atomic_add_64(&ifp->if_udp_stat->port_unreach, 1); - - if (log_in_vain) { + IF_UDP_STATINC(ifp, port_unreach); + + if (udp_log_in_vain) { char buf[MAX_IPv4_STR_LEN]; char buf2[MAX_IPv4_STR_LEN]; /* check src and dst address */ - if (log_in_vain != 3) - log(LOG_INFO, - "Connection attempt to UDP %s:%d from %s:%d\n", - inet_ntop(AF_INET, &ip->ip_dst, buf, sizeof(buf)), - ntohs(uh->uh_dport), - inet_ntop(AF_INET, &ip->ip_src, buf2, sizeof(buf2)), + if (udp_log_in_vain < 3) { + log(LOG_INFO, "Connection attempt to " + "UDP %s:%d from %s:%d\n", inet_ntop(AF_INET, + &ip->ip_dst, buf, sizeof (buf)), + ntohs(uh->uh_dport), inet_ntop(AF_INET, + &ip->ip_src, buf2, sizeof (buf2)), ntohs(uh->uh_sport)); - else if (!(m->m_flags & (M_BCAST | M_MCAST)) && - ip->ip_dst.s_addr != ip->ip_src.s_addr) + } else if (!(m->m_flags & (M_BCAST | M_MCAST)) && + ip->ip_dst.s_addr != ip->ip_src.s_addr) { log_in_vain_log((LOG_INFO, - "Stealth Mode connection attempt to UDP %s:%d from %s:%d\n", - inet_ntop(AF_INET, &ip->ip_dst, buf, sizeof(buf)), - ntohs(uh->uh_dport), - inet_ntop(AF_INET, &ip->ip_src, buf2, sizeof(buf2)), - ntohs(uh->uh_sport))) + "Stealth Mode connection attempt to " + "UDP %s:%d from %s:%d\n", inet_ntop(AF_INET, + &ip->ip_dst, buf, sizeof (buf)), + ntohs(uh->uh_dport), inet_ntop(AF_INET, + &ip->ip_src, buf2, sizeof (buf2)), + ntohs(uh->uh_sport))) + } } udpstat.udps_noport++; if (m->m_flags & (M_BCAST | M_MCAST)) { @@ -659,7 +666,7 @@ doudpcksum: #if ICMP_BANDLIM if (badport_bandlim(BANDLIM_ICMP_UNREACH) < 0) goto bad; -#endif +#endif /* ICMP_BANDLIM */ if (blackhole) if (ifp && ifp->if_type != IFT_LOOP) goto bad; @@ -673,10 +680,7 @@ doudpcksum: if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) { udp_unlock(inp->inp_socket, 1, 0); - - if (ifp->if_udp_stat != NULL) - atomic_add_64(&ifp->if_udp_stat->cleanup, 1); - + IF_UDP_STATINC(ifp, cleanup); goto bad; } #if IPSEC @@ -684,14 +688,11 @@ doudpcksum: if (ipsec4_in_reject_so(m, inp->inp_socket)) { IPSEC_STAT_INCREMENT(ipsecstat.in_polvio); udp_unlock(inp->inp_socket, 1, 0); - - if (ifp->if_udp_stat != NULL) - atomic_add_64(&ifp->if_udp_stat->badipsec, 1); - + IF_UDP_STATINC(ifp, badipsec); goto bad; } } -#endif /*IPSEC*/ +#endif /* IPSEC */ /* * Construct sockaddr format source address. @@ -699,9 +700,9 @@ doudpcksum: */ udp_in.sin_port = uh->uh_sport; udp_in.sin_addr = ip->ip_src; - if ((inp->inp_flags & INP_CONTROLOPTS) != 0 - || (inp->inp_socket->so_options & SO_TIMESTAMP) != 0 - || (inp->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) { + if ((inp->inp_flags & INP_CONTROLOPTS) != 0 || + (inp->inp_socket->so_options & SO_TIMESTAMP) != 0 || + (inp->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) { #if INET6 if (inp->inp_vflag & INP_IPV6) { int savedflags; @@ -712,7 +713,7 @@ doudpcksum: ret = ip6_savecontrol(inp, m, &opts); inp->inp_flags = savedflags; } else -#endif +#endif /* INET6 */ { ret = ip_savecontrol(inp, &opts, ip, m); } @@ -721,24 +722,27 @@ doudpcksum: goto bad; } } - m_adj(m, iphlen + sizeof(struct udphdr)); + m_adj(m, iphlen + sizeof (struct udphdr)); KERNEL_DEBUG(DBG_LAYER_IN_END, uh->uh_dport, uh->uh_sport, - save_ip.ip_src.s_addr, save_ip.ip_dst.s_addr, uh->uh_ulen); + save_ip.ip_src.s_addr, save_ip.ip_dst.s_addr, uh->uh_ulen); #if INET6 if (inp->inp_vflag & INP_IPV6) { in6_sin_2_v4mapsin6(&udp_in, &udp_in6.uin6_sin); append_sa = (struct sockaddr *)&udp_in6.uin6_sin; } else -#endif - append_sa = (struct sockaddr *)&udp_in; +#endif /* INET6 */ + { + append_sa = (struct sockaddr *)&udp_in; + } if (nstat_collect) { - locked_add_64(&inp->inp_stat->rxpackets, 1); - locked_add_64(&inp->inp_stat->rxbytes, m->m_pkthdr.len); + INP_ADD_STAT(inp, cell, wifi, rxpackets, 1); + INP_ADD_STAT(inp, cell, wifi, rxbytes, m->m_pkthdr.len); } so_recv_data_stat(inp->inp_socket, m, 0); - if (sbappendaddr(&inp->inp_socket->so_rcv, append_sa, m, opts, NULL) == 0) { + if (sbappendaddr(&inp->inp_socket->so_rcv, append_sa, + m, opts, NULL) == 0) { udpstat.udps_fullsock++; } else { sorwakeup(inp->inp_socket); @@ -751,16 +755,13 @@ bad: if (opts) m_freem(opts); KERNEL_DEBUG(DBG_FNC_UDP_INPUT | DBG_FUNC_END, 0,0,0,0,0); - return; } #if INET6 static void -ip_2_ip6_hdr(ip6, ip) - struct ip6_hdr *ip6; - struct ip *ip; +ip_2_ip6_hdr(struct ip6_hdr *ip6, struct ip *ip) { - bzero(ip6, sizeof(*ip6)); + bzero(ip6, sizeof (*ip6)); ip6->ip6_vfc = IPV6_VERSION; ip6->ip6_plen = ip->ip_len; @@ -775,7 +776,7 @@ ip_2_ip6_hdr(ip6, ip) ip6->ip6_dst.s6_addr32[3] = ip->ip_dst.s_addr; } } -#endif +#endif /* INET6 */ /* * subroutine of udp_input(), mainly for source code readability. @@ -784,14 +785,16 @@ static void #if INET6 udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off, struct sockaddr_in *pudp_in, struct udp_in6 *pudp_in6, - struct udp_ip6 *pudp_ip6) -#else + struct udp_ip6 *pudp_ip6, struct ifnet *ifp) +#else /* !INET6 */ udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off, - struct sockaddr_in *pudp_in) -#endif + struct sockaddr_in *pudp_in, struct ifnet *ifp) +#endif /* !INET6 */ { struct sockaddr *append_sa; struct mbuf *opts = 0; + boolean_t cell = IFNET_IS_CELLULAR(ifp); + boolean_t wifi = (!cell && IFNET_IS_WIFI(ifp)); int ret = 0; #if CONFIG_MACF_NET @@ -799,7 +802,7 @@ udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off, m_freem(n); return; } -#endif +#endif /* CONFIG_MACF_NET */ if ((last->inp_flags & INP_CONTROLOPTS) != 0 || (last->inp_socket->so_options & SO_TIMESTAMP) != 0 || (last->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) { @@ -820,7 +823,7 @@ udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off, } last->inp_flags = savedflags; } else -#endif +#endif /* INET6 */ { ret = ip_savecontrol(last, &opts, ip, n); if (ret != 0) { @@ -836,15 +839,16 @@ udp_append(struct inpcb *last, struct ip *ip, struct mbuf *n, int off, } append_sa = (struct sockaddr *)&pudp_in6->uin6_sin; } else -#endif +#endif /* INET6 */ append_sa = (struct sockaddr *)pudp_in; if (nstat_collect) { - locked_add_64(&last->inp_stat->rxpackets, 1); - locked_add_64(&last->inp_stat->rxbytes, n->m_pkthdr.len); + INP_ADD_STAT(last, cell, wifi, rxpackets, 1); + INP_ADD_STAT(last, cell, wifi, rxbytes, n->m_pkthdr.len); } so_recv_data_stat(last->inp_socket, n, 0); m_adj(n, off); - if (sbappendaddr(&last->inp_socket->so_rcv, append_sa, n, opts, NULL) == 0) { + if (sbappendaddr(&last->inp_socket->so_rcv, append_sa, + n, opts, NULL) == 0) { udpstat.udps_fullsock++; } else { sorwakeup(last->inp_socket); @@ -861,9 +865,7 @@ error: * just wake up so that he can collect error status. */ void -udp_notify(inp, errno) - register struct inpcb *inp; - int errno; +udp_notify(struct inpcb *inp, int errno) { inp->inp_socket->so_error = errno; sorwakeup(inp->inp_socket); @@ -871,10 +873,7 @@ udp_notify(inp, errno) } void -udp_ctlinput(cmd, sa, vip) - int cmd; - struct sockaddr *sa; - void *vip; +udp_ctlinput(int cmd, struct sockaddr *sa, void *vip) { struct ip *ip = vip; void (*notify)(struct inpcb *, int) = udp_notify; @@ -888,10 +887,11 @@ udp_ctlinput(cmd, sa, vip) if (PRC_IS_REDIRECT(cmd)) { ip = 0; notify = in_rtchange; - } else if (cmd == PRC_HOSTDEAD) + } else if (cmd == PRC_HOSTDEAD) { ip = 0; - else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) + } else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) { return; + } if (ip) { struct udphdr uh; @@ -908,8 +908,9 @@ udp_ctlinput(cmd, sa, vip) (*notify)(inp, inetctlerrmap[cmd]); udp_unlock(inp->inp_socket, 1, 0); } - } else + } else { in_pcbnotifyall(&udbinfo, faddr, inetctlerrmap[cmd], notify); + } } int @@ -990,18 +991,18 @@ udp_pcblist SYSCTL_HANDLER_ARGS * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ - lck_rw_lock_exclusive(udbinfo.mtx); + lck_rw_lock_exclusive(udbinfo.ipi_lock); if (req->oldptr == USER_ADDR_NULL) { n = udbinfo.ipi_count; - req->oldidx = 2 * (sizeof xig) - + (n + n/8) * sizeof(struct xinpcb); - lck_rw_done(udbinfo.mtx); - return 0; + req->oldidx = 2 * (sizeof (xig)) + + (n + n/8) * sizeof (struct xinpcb); + lck_rw_done(udbinfo.ipi_lock); + return (0); } if (req->newptr != USER_ADDR_NULL) { - lck_rw_done(udbinfo.mtx); - return EPERM; + lck_rw_done(udbinfo.ipi_lock); + return (EPERM); } /* @@ -1010,33 +1011,34 @@ udp_pcblist SYSCTL_HANDLER_ARGS gencnt = udbinfo.ipi_gencnt; n = udbinfo.ipi_count; - bzero(&xig, sizeof(xig)); - xig.xig_len = sizeof xig; + bzero(&xig, sizeof (xig)); + xig.xig_len = sizeof (xig); xig.xig_count = n; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; - error = SYSCTL_OUT(req, &xig, sizeof xig); + error = SYSCTL_OUT(req, &xig, sizeof (xig)); if (error) { - lck_rw_done(udbinfo.mtx); - return error; + lck_rw_done(udbinfo.ipi_lock); + return (error); } - /* - * We are done if there is no pcb - */ - if (n == 0) { - lck_rw_done(udbinfo.mtx); - return 0; - } - - inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK); + /* + * We are done if there is no pcb + */ + if (n == 0) { + lck_rw_done(udbinfo.ipi_lock); + return (0); + } + + inp_list = _MALLOC(n * sizeof (*inp_list), M_TEMP, M_WAITOK); if (inp_list == 0) { - lck_rw_done(udbinfo.mtx); - return ENOMEM; + lck_rw_done(udbinfo.ipi_lock); + return (ENOMEM); } - - for (inp = LIST_FIRST(udbinfo.listhead), i = 0; inp && i < n; + + for (inp = LIST_FIRST(udbinfo.ipi_listhead), i = 0; inp && i < n; inp = LIST_NEXT(inp, inp_list)) { - if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) + if (inp->inp_gencnt <= gencnt && + inp->inp_state != INPCB_STATE_DEAD) inp_list[i++] = inp; } n = i; @@ -1044,16 +1046,17 @@ udp_pcblist SYSCTL_HANDLER_ARGS error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; - if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) { + if (inp->inp_gencnt <= gencnt && + inp->inp_state != INPCB_STATE_DEAD) { struct xinpcb xi; - bzero(&xi, sizeof(xi)); - xi.xi_len = sizeof xi; + bzero(&xi, sizeof (xi)); + xi.xi_len = sizeof (xi); /* XXX should avoid extra copy */ inpcb_to_compat(inp, &xi.xi_inp); if (inp->inp_socket) sotoxsocket(inp->inp_socket, &xi.xi_socket); - error = SYSCTL_OUT(req, &xi, sizeof xi); + error = SYSCTL_OUT(req, &xi, sizeof (xi)); } } if (!error) { @@ -1064,22 +1067,22 @@ udp_pcblist SYSCTL_HANDLER_ARGS * while we were processing this request, and it * might be necessary to retry. */ - bzero(&xig, sizeof(xig)); - xig.xig_len = sizeof xig; + bzero(&xig, sizeof (xig)); + xig.xig_len = sizeof (xig); xig.xig_gen = udbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = udbinfo.ipi_count; - error = SYSCTL_OUT(req, &xig, sizeof xig); + error = SYSCTL_OUT(req, &xig, sizeof (xig)); } FREE(inp_list, M_TEMP); - lck_rw_done(udbinfo.mtx); - return error; + lck_rw_done(udbinfo.ipi_lock); + return (error); } -SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, - udp_pcblist, "S,xinpcb", "List of active UDP sockets"); +SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, + CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, udp_pcblist, + "S,xinpcb", "List of active UDP sockets"); -#if !CONFIG_EMBEDDED static int udp_pcblist64 SYSCTL_HANDLER_ARGS @@ -1094,18 +1097,18 @@ udp_pcblist64 SYSCTL_HANDLER_ARGS * The process of preparing the TCB list is too time-consuming and * resource-intensive to repeat twice on every request. */ - lck_rw_lock_shared(udbinfo.mtx); + lck_rw_lock_shared(udbinfo.ipi_lock); if (req->oldptr == USER_ADDR_NULL) { n = udbinfo.ipi_count; - req->oldidx = 2 * (sizeof xig) - + (n + n/8) * sizeof(struct xinpcb64); - lck_rw_done(udbinfo.mtx); - return 0; + req->oldidx = + 2 * (sizeof (xig)) + (n + n/8) * sizeof (struct xinpcb64); + lck_rw_done(udbinfo.ipi_lock); + return (0); } if (req->newptr != USER_ADDR_NULL) { - lck_rw_done(udbinfo.mtx); - return EPERM; + lck_rw_done(udbinfo.ipi_lock); + return (EPERM); } /* @@ -1114,33 +1117,34 @@ udp_pcblist64 SYSCTL_HANDLER_ARGS gencnt = udbinfo.ipi_gencnt; n = udbinfo.ipi_count; - bzero(&xig, sizeof(xig)); - xig.xig_len = sizeof xig; + bzero(&xig, sizeof (xig)); + xig.xig_len = sizeof (xig); xig.xig_count = n; xig.xig_gen = gencnt; xig.xig_sogen = so_gencnt; - error = SYSCTL_OUT(req, &xig, sizeof xig); + error = SYSCTL_OUT(req, &xig, sizeof (xig)); if (error) { - lck_rw_done(udbinfo.mtx); - return error; + lck_rw_done(udbinfo.ipi_lock); + return (error); } - /* - * We are done if there is no pcb - */ - if (n == 0) { - lck_rw_done(udbinfo.mtx); - return 0; - } - - inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK); + /* + * We are done if there is no pcb + */ + if (n == 0) { + lck_rw_done(udbinfo.ipi_lock); + return (0); + } + + inp_list = _MALLOC(n * sizeof (*inp_list), M_TEMP, M_WAITOK); if (inp_list == 0) { - lck_rw_done(udbinfo.mtx); - return ENOMEM; + lck_rw_done(udbinfo.ipi_lock); + return (ENOMEM); } - for (inp = LIST_FIRST(udbinfo.listhead), i = 0; inp && i < n; + for (inp = LIST_FIRST(udbinfo.ipi_listhead), i = 0; inp && i < n; inp = LIST_NEXT(inp, inp_list)) { - if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) + if (inp->inp_gencnt <= gencnt && + inp->inp_state != INPCB_STATE_DEAD) inp_list[i++] = inp; } n = i; @@ -1148,15 +1152,16 @@ udp_pcblist64 SYSCTL_HANDLER_ARGS error = 0; for (i = 0; i < n; i++) { inp = inp_list[i]; - if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) { + if (inp->inp_gencnt <= gencnt && + inp->inp_state != INPCB_STATE_DEAD) { struct xinpcb64 xi; - bzero(&xi, sizeof(xi)); - xi.xi_len = sizeof xi; + bzero(&xi, sizeof (xi)); + xi.xi_len = sizeof (xi); inpcb_to_xinpcb64(inp, &xi); if (inp->inp_socket) sotoxsocket64(inp->inp_socket, &xi.xi_socket); - error = SYSCTL_OUT(req, &xi, sizeof xi); + error = SYSCTL_OUT(req, &xi, sizeof (xi)); } } if (!error) { @@ -1167,72 +1172,64 @@ udp_pcblist64 SYSCTL_HANDLER_ARGS * while we were processing this request, and it * might be necessary to retry. */ - bzero(&xig, sizeof(xig)); - xig.xig_len = sizeof xig; + bzero(&xig, sizeof (xig)); + xig.xig_len = sizeof (xig); xig.xig_gen = udbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = udbinfo.ipi_count; - error = SYSCTL_OUT(req, &xig, sizeof xig); + error = SYSCTL_OUT(req, &xig, sizeof (xig)); } FREE(inp_list, M_TEMP); - lck_rw_done(udbinfo.mtx); - return error; + lck_rw_done(udbinfo.ipi_lock); + return (error); } -SYSCTL_PROC(_net_inet_udp, OID_AUTO, pcblist64, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, - udp_pcblist64, "S,xinpcb64", "List of active UDP sockets"); +SYSCTL_PROC(_net_inet_udp, OID_AUTO, pcblist64, + CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, udp_pcblist64, + "S,xinpcb64", "List of active UDP sockets"); -#endif /* !CONFIG_EMBEDDED */ static int udp_pcblist_n SYSCTL_HANDLER_ARGS { #pragma unused(oidp, arg1, arg2) - int error = 0; - - error = get_pcblist_n(IPPROTO_UDP, req, &udbinfo); - - return error; + return (get_pcblist_n(IPPROTO_UDP, req, &udbinfo)); } -SYSCTL_PROC(_net_inet_udp, OID_AUTO, pcblist_n, CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, - udp_pcblist_n, "S,xinpcb_n", "List of active UDP sockets"); - +SYSCTL_PROC(_net_inet_udp, OID_AUTO, pcblist_n, + CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, udp_pcblist_n, + "S,xinpcb_n", "List of active UDP sockets"); __private_extern__ void -udp_get_ports_used(unsigned int ifindex, uint8_t *bitfield) +udp_get_ports_used(uint32_t ifindex, int protocol, uint32_t wildcardok, + bitstr_t *bitfield) { - inpcb_get_ports_used(ifindex, bitfield, &udbinfo); + inpcb_get_ports_used(ifindex, protocol, wildcardok, bitfield, &udbinfo); } __private_extern__ uint32_t udp_count_opportunistic(unsigned int ifindex, u_int32_t flags) { - return inpcb_count_opportunistic(ifindex, &udbinfo, flags); + return (inpcb_count_opportunistic(ifindex, &udbinfo, flags)); } -static __inline__ u_int16_t -get_socket_id(struct socket * s) +__private_extern__ uint32_t +udp_find_anypcb_byaddr(struct ifaddr *ifa) { - u_int16_t val; - - if (s == NULL) { - return (0); - } - val = (u_int16_t)(((uintptr_t)s) / sizeof(struct socket)); - if (val == 0) { - val = 0xffff; - } - return (val); + return (inpcb_find_anypcb_byaddr(ifa, &udbinfo)); } static int -udp_check_pktinfo(struct mbuf *control, struct ifnet **outif, struct in_addr *laddr) +udp_check_pktinfo(struct mbuf *control, struct ifnet **outif, + struct in_addr *laddr) { struct cmsghdr *cm = 0; struct in_pktinfo *pktinfo; struct ifnet *ifp; + if (outif != NULL) + *outif = NULL; + /* * XXX: Currently, we assume all the optional information is stored * in a single mbuf. @@ -1243,14 +1240,16 @@ udp_check_pktinfo(struct mbuf *control, struct ifnet **outif, struct in_addr *la if (control->m_len < CMSG_LEN(0)) return (EINVAL); - for (cm = M_FIRST_CMSGHDR(control); cm; cm = M_NXT_CMSGHDR(control, cm)) { - if (cm->cmsg_len < sizeof(struct cmsghdr) || cm->cmsg_len > control->m_len) + for (cm = M_FIRST_CMSGHDR(control); cm; + cm = M_NXT_CMSGHDR(control, cm)) { + if (cm->cmsg_len < sizeof (struct cmsghdr) || + cm->cmsg_len > control->m_len) return (EINVAL); if (cm->cmsg_level != IPPROTO_IP || cm->cmsg_type != IP_PKTINFO) continue; - if (cm->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo))) + if (cm->cmsg_len != CMSG_LEN(sizeof (struct in_pktinfo))) return (EINVAL); pktinfo = (struct in_pktinfo *)(void *)CMSG_DATA(cm); @@ -1263,28 +1262,31 @@ udp_check_pktinfo(struct mbuf *control, struct ifnet **outif, struct in_addr *la return (ENXIO); } - /* If ipi_ifindex is specified it takes precedence over ipi_spec_dst */ - + /* + * If ipi_ifindex is specified it takes precedence + * over ipi_spec_dst. + */ if (pktinfo->ipi_ifindex) { ifp = ifindex2ifnet[pktinfo->ipi_ifindex]; if (ifp == NULL) { ifnet_head_done(); return (ENXIO); } - - ifnet_head_done(); - - if (outif != NULL) + if (outif != NULL) { + ifnet_reference(ifp); *outif = ifp; + } + ifnet_head_done(); laddr->s_addr = INADDR_ANY; break; } ifnet_head_done(); - /* Use the provided ipi_spec_dst address for temp source address */ - if (outif != NULL) - *outif = NULL; + /* + * Use the provided ipi_spec_dst address for temp + * source address. + */ *laddr = pktinfo->ipi_spec_dst; break; } @@ -1292,26 +1294,22 @@ udp_check_pktinfo(struct mbuf *control, struct ifnet **outif, struct in_addr *la } static int -udp_output(inp, m, addr, control, p) - register struct inpcb *inp; - struct mbuf *m; - struct sockaddr *addr; - struct mbuf *control; - struct proc *p; +udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr, + struct mbuf *control, struct proc *p) { - register struct udpiphdr *ui; - register int len = m->m_pkthdr.len; + struct udpiphdr *ui; + int len = m->m_pkthdr.len; struct sockaddr_in *sin; struct in_addr origladdr, laddr, faddr, pi_laddr; u_short lport, fport; - struct sockaddr_in ifaddr; int error = 0, udp_dodisconnect = 0, pktinfo = 0; struct socket *so = inp->inp_socket; int soopts = 0; struct mbuf *inpopts; struct ip_moptions *mopts; struct route ro; - struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF }; + struct ip_out_args ipoa = + { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF, 0 }; struct ifnet *outif = NULL; struct flowadv *adv = &ipoa.ipoa_flowadv; mbuf_svc_class_t msc = MBUF_SC_UNSPEC; @@ -1320,7 +1318,6 @@ udp_output(inp, m, addr, control, p) /* Enable flow advisory only when connected */ flowadv = (so->so_state & SS_ISCONNECTED) ? 1 : 0; - pi_laddr.s_addr = INADDR_ANY; KERNEL_DEBUG(DBG_FNC_UDP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0); @@ -1328,10 +1325,10 @@ udp_output(inp, m, addr, control, p) lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED); if (control != NULL) { msc = mbuf_service_class_from_control(control); - + VERIFY(outif == NULL); error = udp_check_pktinfo(control, &outif, &pi_laddr); - m_freem(control); + control = NULL; if (error) goto release; pktinfo++; @@ -1340,10 +1337,10 @@ udp_output(inp, m, addr, control, p) } KERNEL_DEBUG(DBG_LAYER_OUT_BEG, inp->inp_fport, inp->inp_lport, - inp->inp_laddr.s_addr, inp->inp_faddr.s_addr, - (htons((u_short)len + sizeof (struct udphdr)))); + inp->inp_laddr.s_addr, inp->inp_faddr.s_addr, + (htons((u_short)len + sizeof (struct udphdr)))); - if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) { + if (len + sizeof (struct udpiphdr) > IP_MAXPACKET) { error = EMSGSIZE; goto release; } @@ -1361,8 +1358,12 @@ udp_output(inp, m, addr, control, p) * If the ancillary IP_PKTINFO option contains an interface index, * it takes precedence over the one specified by IP_BOUND_IF. */ - if (ipoa.ipoa_boundif == IFSCOPE_NONE && - (inp->inp_flags & INP_BOUND_IF)) { + if (ipoa.ipoa_boundif == IFSCOPE_NONE && + (inp->inp_flags & INP_BOUND_IF)) { + VERIFY(inp->inp_boundifp != NULL); + ifnet_reference(inp->inp_boundifp); /* for this routine */ + if (outif != NULL) + ifnet_release(outif); outif = inp->inp_boundifp; ipoa.ipoa_boundif = outif->if_index; } @@ -1370,25 +1371,29 @@ udp_output(inp, m, addr, control, p) ipoa.ipoa_flags |= IPOAF_NO_CELLULAR; soopts |= IP_OUTARGS; - /* If there was a routing change, discard cached route and check - * that we have a valid source address. - * Reacquire a new source address if INADDR_ANY was specified + /* + * If there was a routing change, discard cached route and check + * that we have a valid source address. Reacquire a new source + * address if INADDR_ANY was specified. */ - if (inp->inp_route.ro_rt != NULL && - inp->inp_route.ro_rt->generation_id != route_generation) { - struct in_ifaddr *ia; + if (ROUTE_UNUSABLE(&inp->inp_route)) { + struct in_ifaddr *ia = NULL; + + ROUTE_RELEASE(&inp->inp_route); /* src address is gone? */ - if ((ia = ifa_foraddr(inp->inp_laddr.s_addr)) == NULL) { - if (((inp->inp_flags & INP_INADDR_ANY) == 0) || + if (inp->inp_laddr.s_addr != INADDR_ANY && + (ia = ifa_foraddr(inp->inp_laddr.s_addr)) == NULL) { + if (!(inp->inp_flags & INP_INADDR_ANY) || (so->so_state & SS_ISCONNECTED)) { - /* Rdar://5448998 - * If the source address is gone, return an error if: + /* + * Rdar://5448998 + * If the source address is gone, return an + * error if: * - the source was specified * - the socket was already connected */ - soevent(so, - (SO_FILT_HINT_LOCKED | + soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_NOSRCADDR)); error = EADDRNOTAVAIL; goto release; @@ -1400,24 +1405,24 @@ udp_output(inp, m, addr, control, p) } if (ia != NULL) IFA_REMREF(&ia->ia_ifa); - if (inp->inp_route.ro_rt != NULL) - rtfree(inp->inp_route.ro_rt); - inp->inp_route.ro_rt = NULL; } origoutifp = inp->inp_last_outifp; - /* IP_PKTINFO option check. - * If a temporary scope or src address is provided, use it for this packet only - * and make sure we forget it after sending this datagram. + /* + * IP_PKTINFO option check. If a temporary scope or src address + * is provided, use it for this packet only and make sure we forget + * it after sending this datagram. */ - if (pi_laddr.s_addr != INADDR_ANY || (ipoa.ipoa_boundif != IFSCOPE_NONE && pktinfo)) { - laddr = pi_laddr; /* temp src address for this datagram only */ + /* temp src address for this datagram only */ + laddr = pi_laddr; origladdr.s_addr = INADDR_ANY; - udp_dodisconnect = 1; /* we don't want to keep the laddr or route */ - inp->inp_flags |= INP_INADDR_ANY; /* remember we don't care about src addr.*/ + /* we don't want to keep the laddr or route */ + udp_dodisconnect = 1; + /* remember we don't care about src addr.*/ + inp->inp_flags |= INP_INADDR_ANY; } else { origladdr = laddr = inp->inp_laddr; } @@ -1435,40 +1440,59 @@ udp_output(inp, m, addr, control, p) } if (lport == 0) { /* - * In case we don't have a local port set, go through the full connect. - * We don't have a local port yet (ie, we can't be looked up), - * so it's not an issue if the input runs at the same time we do this. + * In case we don't have a local port set, go through + * the full connect. We don't have a local port yet + * (i.e., we can't be looked up), so it's not an issue + * if the input runs at the same time we do this. */ - - if (pi_laddr.s_addr != INADDR_ANY) /* if we have a source address specified, use that */ + /* if we have a source address specified, use that */ + if (pi_laddr.s_addr != INADDR_ANY) inp->inp_laddr = pi_laddr; - error = in_pcbconnect(inp, addr, p, &outif); /* if a scope is specified, use it */ - if (error) { + /* + * If a scope is specified, use it. Scope from + * IP_PKTINFO takes precendence over the the scope + * set via INP_BOUND_IF. + */ + error = in_pcbconnect(inp, addr, p, ipoa.ipoa_boundif, + &outif); + if (error) goto release; - } + laddr = inp->inp_laddr; lport = inp->inp_lport; faddr = inp->inp_faddr; fport = inp->inp_fport; udp_dodisconnect = 1; - ipoa.ipoa_boundif = (outif != NULL) ? - outif->if_index : IFSCOPE_NONE; + + /* synch up in case in_pcbladdr() overrides */ + if (outif != NULL && ipoa.ipoa_boundif != IFSCOPE_NONE) + ipoa.ipoa_boundif = outif->if_index; } else { - /* Fast path case - * we have a full address and a local port. - * use those info to build the packet without changing the pcb - * and interfering with the input path. See 3851370 - * Note: if we may have a scope from IP_PKTINFO but the - * priority is always given to the scope provided by INP_BOUND_IF. + /* + * Fast path case + * + * We have a full address and a local port; use those + * info to build the packet without changing the pcb + * and interfering with the input path. See 3851370. + * + * Scope from IP_PKTINFO takes precendence over the + * the scope set via INP_BOUND_IF. */ if (laddr.s_addr == INADDR_ANY) { - if ((error = in_pcbladdr(inp, addr, &ifaddr, &outif)) != 0) + if ((error = in_pcbladdr(inp, addr, &laddr, + ipoa.ipoa_boundif, &outif)) != 0) goto release; - laddr = ifaddr.sin_addr; - inp->inp_flags |= INP_INADDR_ANY; /* from pcbconnect: remember we don't care about src addr.*/ - ipoa.ipoa_boundif = (outif != NULL) ? - outif->if_index : IFSCOPE_NONE; + /* + * from pcbconnect: remember we don't + * care about src addr. + */ + inp->inp_flags |= INP_INADDR_ANY; + + /* synch up in case in_pcbladdr() overrides */ + if (outif != NULL && + ipoa.ipoa_boundif != IFSCOPE_NONE) + ipoa.ipoa_boundif = outif->if_index; } faddr = sin->sin_addr; @@ -1483,7 +1507,8 @@ udp_output(inp, m, addr, control, p) #if CONFIG_MACF_NET mac_mbuf_label_associate_inpcb(inp, m); -#endif +#endif /* CONFIG_MACF_NET */ + if (inp->inp_flowhash == 0) inp->inp_flowhash = inp_calc_flowhash(inp); @@ -1491,7 +1516,7 @@ udp_output(inp, m, addr, control, p) * Calculate data length and get a mbuf * for UDP and IP headers. */ - M_PREPEND(m, sizeof(struct udpiphdr), M_DONTWAIT); + M_PREPEND(m, sizeof (struct udpiphdr), M_DONTWAIT); if (m == 0) { error = ENOBUFS; goto abort; @@ -1502,20 +1527,20 @@ udp_output(inp, m, addr, control, p) * and addresses and length put into network format. */ ui = mtod(m, struct udpiphdr *); - bzero(ui->ui_x1, sizeof(ui->ui_x1)); /* XXX still needed? */ + bzero(ui->ui_x1, sizeof (ui->ui_x1)); /* XXX still needed? */ ui->ui_pr = IPPROTO_UDP; ui->ui_src = laddr; ui->ui_dst = faddr; ui->ui_sport = lport; ui->ui_dport = fport; - ui->ui_ulen = htons((u_short)len + sizeof(struct udphdr)); + ui->ui_ulen = htons((u_short)len + sizeof (struct udphdr)); /* * Set up checksum and output datagram. */ if (udpcksum && !(inp->inp_flags & INP_UDP_NOCKSUM)) { - ui->ui_sum = in_pseudo(ui->ui_src.s_addr, ui->ui_dst.s_addr, - htons((u_short)len + sizeof(struct udphdr) + IPPROTO_UDP)); + ui->ui_sum = in_pseudo(ui->ui_src.s_addr, ui->ui_dst.s_addr, + htons((u_short)len + sizeof (struct udphdr) + IPPROTO_UDP)); m->m_pkthdr.csum_flags = CSUM_UDP; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); } else { @@ -1534,7 +1559,7 @@ udp_output(inp, m, addr, control, p) error = ENOBUFS; goto abort; } -#endif /*IPSEC*/ +#endif /* IPSEC */ inpopts = inp->inp_options; soopts |= (inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST)); @@ -1543,7 +1568,8 @@ udp_output(inp, m, addr, control, p) IMO_LOCK(mopts); IMO_ADDREF_LOCKED(mopts); if (IN_MULTICAST(ntohl(ui->ui_dst.s_addr)) && - mopts->imo_multicast_ifp != NULL) { + mopts->imo_multicast_ifp != NULL) { + /* no reference needed */ inp->inp_last_outifp = mopts->imo_multicast_ifp; } IMO_UNLOCK(mopts); @@ -1553,11 +1579,12 @@ udp_output(inp, m, addr, control, p) inp_route_copyout(inp, &ro); set_packet_service_class(m, so, msc, 0); - m->m_pkthdr.socket_id = get_socket_id(inp->inp_socket); - m->m_pkthdr.m_flowhash = inp->inp_flowhash; - m->m_pkthdr.m_fhflags |= PF_TAG_FLOWHASH; + m->m_pkthdr.pkt_flowsrc = FLOWSRC_INPCB; + m->m_pkthdr.pkt_flowid = inp->inp_flowhash; + m->m_pkthdr.pkt_proto = IPPROTO_UDP; + m->m_pkthdr.pkt_flags |= (PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC); if (flowadv) - m->m_pkthdr.m_fhflags |= PF_TAG_FLOWADV; + m->m_pkthdr.pkt_flags |= PKTF_FLOW_ADV; if (ipoa.ipoa_boundif != IFSCOPE_NONE) ipoa.ipoa_flags |= IPOAF_BOUND_IF; @@ -1568,15 +1595,23 @@ udp_output(inp, m, addr, control, p) inp->inp_sndinprog_cnt++; socket_unlock(so, 0); - error = ip_output_list(m, 0, inpopts, &ro, soopts, mopts, &ipoa); + error = ip_output(m, inpopts, &ro, soopts, mopts, &ipoa); m = NULL; socket_lock(so, 0); if (mopts != NULL) IMO_REMREF(mopts); if (error == 0 && nstat_collect) { - locked_add_64(&inp->inp_stat->txpackets, 1); - locked_add_64(&inp->inp_stat->txbytes, len); + boolean_t cell, wifi; + + if (ro.ro_rt != NULL) { + cell = IFNET_IS_CELLULAR(ro.ro_rt->rt_ifp); + wifi = (!cell && IFNET_IS_WIFI(ro.ro_rt->rt_ifp)); + } else { + cell = wifi = FALSE; + } + INP_ADD_STAT(inp, cell, wifi, txpackets, 1); + INP_ADD_STAT(inp, cell, wifi, txbytes, len); } if (flowadv && (adv->code == FADV_FLOW_CONTROLLED || @@ -1598,12 +1633,10 @@ udp_output(inp, m, addr, control, p) abort: if (udp_dodisconnect) { /* Always discard the cached route for unconnected socket */ - if (inp->inp_route.ro_rt != NULL) { - rtfree(inp->inp_route.ro_rt); - inp->inp_route.ro_rt = NULL; - } + ROUTE_RELEASE(&inp->inp_route); in_pcbdisconnect(inp); inp->inp_laddr = origladdr; /* XXX rehash? */ + /* no reference needed */ inp->inp_last_outifp = origoutifp; } else if (inp->inp_route.ro_rt != NULL) { struct rtentry *rt = inp->inp_route.ro_rt; @@ -1614,22 +1647,36 @@ abort: /* * Always discard if it is a multicast or broadcast route. */ - if (rt == NULL) { - rtfree(inp->inp_route.ro_rt); - inp->inp_route.ro_rt = NULL; - } + if (rt == NULL) + ROUTE_RELEASE(&inp->inp_route); + /* * If the destination route is unicast, update outifp with * that of the route interface used by IP. */ if (rt != NULL && (outifp = rt->rt_ifp) != inp->inp_last_outifp) - inp->inp_last_outifp = outifp; + inp->inp_last_outifp = outifp; /* no reference needed */ + } else { + ROUTE_RELEASE(&inp->inp_route); } + /* + * If output interface was cellular, and this socket is denied + * access to it, generate an event. + */ + if (error != 0 && (ipoa.ipoa_retflags & IPOARF_IFDENIED) && + (inp->inp_flags & INP_NO_IFT_CELLULAR)) + soevent(so, (SO_FILT_HINT_LOCKED|SO_FILT_HINT_IFDENIED)); + release: + KERNEL_DEBUG(DBG_FNC_UDP_OUTPUT | DBG_FUNC_END, error, 0, 0, 0, 0); + if (m != NULL) m_freem(m); - KERNEL_DEBUG(DBG_FNC_UDP_OUTPUT | DBG_FUNC_END, error, 0,0,0,0); + + if (outif != NULL) + ifnet_release(outif); + return (error); } @@ -1637,47 +1684,50 @@ u_int32_t udp_sendspace = 9216; /* really max datagram size */ /* 187 1K datagrams (approx 192 KB) */ u_int32_t udp_recvspace = 187 * (1024 + #if INET6 - sizeof(struct sockaddr_in6) -#else - sizeof(struct sockaddr_in) -#endif - ); + sizeof (struct sockaddr_in6) +#else /* !INET6 */ + sizeof (struct sockaddr_in) +#endif /* !INET6 */ + ); /* Check that the values of udp send and recv space do not exceed sb_max */ static int -sysctl_udp_sospace(struct sysctl_oid *oidp, __unused void *arg1, - __unused int arg2, struct sysctl_req *req) { +sysctl_udp_sospace(struct sysctl_oid *oidp, void *arg1, int arg2, + struct sysctl_req *req) +{ +#pragma unused(arg1, arg2) u_int32_t new_value = 0, *space_p = NULL; int changed = 0, error = 0; - u_quad_t sb_effective_max = (sb_max/ (MSIZE+MCLBYTES)) * MCLBYTES; + u_quad_t sb_effective_max = (sb_max/(MSIZE+MCLBYTES)) * MCLBYTES; switch (oidp->oid_number) { - case UDPCTL_RECVSPACE: - space_p = &udp_recvspace; - break; - case UDPCTL_MAXDGRAM: - space_p = &udp_sendspace; - break; - default: - return EINVAL; + case UDPCTL_RECVSPACE: + space_p = &udp_recvspace; + break; + case UDPCTL_MAXDGRAM: + space_p = &udp_sendspace; + break; + default: + return EINVAL; } - error = sysctl_io_number(req, *space_p, sizeof(u_int32_t), - &new_value, &changed); + error = sysctl_io_number(req, *space_p, sizeof (u_int32_t), + &new_value, &changed); if (changed) { - if (new_value > 0 && new_value <= sb_effective_max) { + if (new_value > 0 && new_value <= sb_effective_max) *space_p = new_value; - } else { + else error = ERANGE; - } } - return error; + return (error); } -SYSCTL_PROC(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, - &udp_recvspace, 0, &sysctl_udp_sospace, "IU", "Maximum incoming UDP datagram size"); +SYSCTL_PROC(_net_inet_udp, UDPCTL_RECVSPACE, recvspace, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &udp_recvspace, 0, + &sysctl_udp_sospace, "IU", "Maximum incoming UDP datagram size"); -SYSCTL_PROC(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, - &udp_sendspace, 0, &sysctl_udp_sospace, "IU", "Maximum outgoing UDP datagram size"); +SYSCTL_PROC(_net_inet_udp, UDPCTL_MAXDGRAM, maxdgram, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &udp_sendspace, 0, + &sysctl_udp_sospace, "IU", "Maximum outgoing UDP datagram size"); static int udp_abort(struct socket *so) @@ -1685,35 +1735,39 @@ udp_abort(struct socket *so) struct inpcb *inp; inp = sotoinpcb(so); - if (inp == 0) - panic("udp_abort: so=%p null inp\n", so); /* ??? possible? panic instead? */ + if (inp == NULL) { + panic("%s: so=%p null inp\n", __func__, so); + /* NOTREACHED */ + } soisdisconnected(so); in_pcbdetach(inp); - return 0; + return (0); } static int -udp_attach(struct socket *so, __unused int proto, struct proc *p) +udp_attach(struct socket *so, int proto, struct proc *p) { +#pragma unused(proto) struct inpcb *inp; int error; inp = sotoinpcb(so); - if (inp != 0) - panic ("udp_attach so=%p inp=%p\n", so, inp); - + if (inp != NULL) { + panic ("%s so=%p inp=%p\n", __func__, so, inp); + /* NOTREACHED */ + } error = in_pcballoc(so, &udbinfo, p); - if (error) - return error; + if (error != 0) + return (error); error = soreserve(so, udp_sendspace, udp_recvspace); - if (error) - return error; + if (error != 0) + return (error); inp = (struct inpcb *)so->so_pcb; inp->inp_vflag |= INP_IPV4; inp->inp_ip_ttl = ip_defttl; if (nstat_collect) nstat_udp_new_pcb(inp); - return 0; + return (0); } static int @@ -1722,15 +1776,15 @@ udp_bind(struct socket *so, struct sockaddr *nam, struct proc *p) struct inpcb *inp; int error; - if (nam->sa_family != 0 && nam->sa_family != AF_INET - && nam->sa_family != AF_INET6) { - return EAFNOSUPPORT; - } + if (nam->sa_family != 0 && nam->sa_family != AF_INET && + nam->sa_family != AF_INET6) + return (EAFNOSUPPORT); + inp = sotoinpcb(so); - if (inp == 0) - return EINVAL; + if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + return (inp == NULL ? EINVAL : EPROTOTYPE); error = in_pcbbind(inp, nam, p); - return error; + return (error); } static int @@ -1740,17 +1794,86 @@ udp_connect(struct socket *so, struct sockaddr *nam, struct proc *p) int error; inp = sotoinpcb(so); - if (inp == 0) - return EINVAL; + if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + return (inp == NULL ? EINVAL : EPROTOTYPE); if (inp->inp_faddr.s_addr != INADDR_ANY) - return EISCONN; - error = in_pcbconnect(inp, nam, p, NULL); + return (EISCONN); + error = in_pcbconnect(inp, nam, p, IFSCOPE_NONE, NULL); if (error == 0) { soisconnected(so); if (inp->inp_flowhash == 0) inp->inp_flowhash = inp_calc_flowhash(inp); } - return error; + return (error); +} + +int +udp_connectx_common(struct socket *so, int af, + struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl, + struct proc *p, uint32_t ifscope, associd_t aid, connid_t *pcid, + uint32_t flags, void *arg, uint32_t arglen) +{ +#pragma unused(aid, flags, arg, arglen) + struct sockaddr_entry *src_se = NULL, *dst_se = NULL; + struct inpcb *inp = sotoinpcb(so); + int error; + + if (inp == NULL) + return (EINVAL); + + VERIFY(dst_sl != NULL); + + /* select source (if specified) and destination addresses */ + error = in_selectaddrs(af, src_sl, &src_se, dst_sl, &dst_se); + if (error != 0) + return (error); + + VERIFY(*dst_sl != NULL && dst_se != NULL); + VERIFY(src_se == NULL || *src_sl != NULL); + VERIFY(dst_se->se_addr->sa_family == af); + VERIFY(src_se == NULL || src_se->se_addr->sa_family == af); + + /* bind socket to the specified interface, if requested */ + if (ifscope != IFSCOPE_NONE && + (error = inp_bindif(inp, ifscope, NULL)) != 0) + return (error); + + /* if source address and/or port is specified, bind to it */ + if (src_se != NULL) { + struct sockaddr *sa = src_se->se_addr; + error = sobindlock(so, sa, 0); /* already locked */ + if (error != 0) + return (error); + } + + switch (af) { + case AF_INET: + error = udp_connect(so, dst_se->se_addr, p); + break; +#if INET6 + case AF_INET6: + error = udp6_connect(so, dst_se->se_addr, p); + break; +#endif /* INET6 */ + default: + VERIFY(0); + /* NOTREACHED */ + } + + if (error == 0 && pcid != NULL) + *pcid = 1; /* there is only 1 connection for a UDP */ + + return (error); +} + +static int +udp_connectx(struct socket *so, struct sockaddr_list **src_sl, + struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, + associd_t aid, connid_t *pcid, uint32_t flags, void *arg, + uint32_t arglen) +{ + return (udp_connectx_common(so, AF_INET, src_sl, dst_sl, + p, ifscope, aid, pcid, flags, arg, arglen)); } static int @@ -1759,11 +1882,13 @@ udp_detach(struct socket *so) struct inpcb *inp; inp = sotoinpcb(so); - if (inp == 0) - panic("udp_detach: so=%p null inp\n", so); /* ??? possible? panic instead? */ + if (inp == NULL) { + panic("%s: so=%p null inp\n", __func__, so); + /* NOTREACHED */ + } in_pcbdetach(inp); inp->inp_state = INPCB_STATE_DEAD; - return 0; + return (0); } static int @@ -1772,35 +1897,49 @@ udp_disconnect(struct socket *so) struct inpcb *inp; inp = sotoinpcb(so); - if (inp == 0) - return EINVAL; + if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + return (inp == NULL ? EINVAL : EPROTOTYPE); if (inp->inp_faddr.s_addr == INADDR_ANY) - return ENOTCONN; + return (ENOTCONN); in_pcbdisconnect(inp); - + /* reset flow controlled state, just in case */ inp_reset_fc_state(inp); inp->inp_laddr.s_addr = INADDR_ANY; so->so_state &= ~SS_ISCONNECTED; /* XXX */ inp->inp_last_outifp = NULL; - return 0; + return (0); } static int -udp_send(struct socket *so, __unused int flags, struct mbuf *m, struct sockaddr *addr, - struct mbuf *control, struct proc *p) +udp_disconnectx(struct socket *so, associd_t aid, connid_t cid) { +#pragma unused(cid) + if (aid != ASSOCID_ANY && aid != ASSOCID_ALL) + return (EINVAL); + + return (udp_disconnect(so)); +} + +static int +udp_send(struct socket *so, int flags, struct mbuf *m, + struct sockaddr *addr, struct mbuf *control, struct proc *p) +{ +#pragma unused(flags) struct inpcb *inp; inp = sotoinpcb(so); - if (inp == 0) { - m_freem(m); - return EINVAL; + if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { + if (m != NULL) + m_freem(m); + if (control != NULL) + m_freem(control); + return (inp == NULL ? EINVAL : EPROTOTYPE); } - - return udp_output(inp, m, addr, control, p); + + return (udp_output(inp, m, addr, control, p)); } int @@ -1809,21 +1948,12 @@ udp_shutdown(struct socket *so) struct inpcb *inp; inp = sotoinpcb(so); - if (inp == 0) - return EINVAL; + if (inp == NULL) + return (EINVAL); socantsendmore(so); - return 0; + return (0); } -struct pr_usrreqs udp_usrreqs = { - udp_abort, pru_accept_notsupp, udp_attach, udp_bind, udp_connect, - pru_connect2_notsupp, in_control, udp_detach, udp_disconnect, - pru_listen_notsupp, in_setpeeraddr, pru_rcvd_notsupp, - pru_rcvoob_notsupp, udp_send, pru_sense_null, udp_shutdown, - in_setsockaddr, sosend, soreceive, pru_sopoll_notsupp -}; - - int udp_lock(struct socket *so, int refcount, void *debug) { @@ -1834,12 +1964,12 @@ udp_lock(struct socket *so, int refcount, void *debug) else lr_saved = debug; - if (so->so_pcb) { + if (so->so_pcb != NULL) { lck_mtx_assert(&((struct inpcb *)so->so_pcb)->inpcb_mtx, LCK_MTX_ASSERT_NOTOWNED); lck_mtx_lock(&((struct inpcb *)so->so_pcb)->inpcb_mtx); } else { - panic("udp_lock: so=%p NO PCB! lr=%p lrh= %s\n", + panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__, so, lr_saved, solockhistory_nr(so)); /* NOTREACHED */ } @@ -1865,7 +1995,7 @@ udp_unlock(struct socket *so, int refcount, void *debug) so->so_usecount--; if (so->so_pcb == NULL) { - panic("udp_unlock: so=%p NO PCB! lr=%p lrh= %s\n", + panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__, so, lr_saved, solockhistory_nr(so)); /* NOTREACHED */ } else { @@ -1875,39 +2005,42 @@ udp_unlock(struct socket *so, int refcount, void *debug) so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; lck_mtx_unlock(&((struct inpcb *)so->so_pcb)->inpcb_mtx); } - - return (0); } lck_mtx_t * -udp_getlock(struct socket *so, __unused int locktype) +udp_getlock(struct socket *so, int locktype) { +#pragma unused(locktype) struct inpcb *inp = sotoinpcb(so); - - if (so->so_pcb) - return(&inp->inpcb_mtx); - else { - panic("udp_getlock: so=%p NULL so_pcb lrh= %s\n", - so, solockhistory_nr(so)); - return (so->so_proto->pr_domain->dom_mtx); + if (so->so_pcb == NULL) { + panic("%s: so=%p NULL so_pcb lrh= %s\n", __func__, + so, solockhistory_nr(so)); + /* NOTREACHED */ } + return (&inp->inpcb_mtx); } -void -udp_slowtimo() +/* + * UDP garbage collector callback (inpcb_timer_func_t). + * + * Returns > 0 to keep timer active. + */ +static void +udp_gc(struct inpcbinfo *ipi) { struct inpcb *inp, *inpnxt; struct socket *so; - struct inpcbinfo *pcbinfo = &udbinfo; - if (lck_rw_try_lock_exclusive(pcbinfo->mtx) == FALSE) { + if (lck_rw_try_lock_exclusive(ipi->ipi_lock) == FALSE) { if (udp_gc_done == TRUE) { udp_gc_done = FALSE; - return; /* couldn't get the lock, better lock next time */ + /* couldn't get the lock, must lock next time */ + atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1); + return; } - lck_rw_lock_exclusive(pcbinfo->mtx); + lck_rw_lock_exclusive(ipi->ipi_lock); } udp_gc_done = TRUE; @@ -1915,49 +2048,163 @@ udp_slowtimo() for (inp = udb.lh_first; inp != NULL; inp = inpnxt) { inpnxt = inp->inp_list.le_next; - if (inp->inp_wantcnt != WNT_STOPUSING) + /* + * Skip unless it's STOPUSING; garbage collector will + * be triggered by in_pcb_checkstate() upon setting + * wantcnt to that value. If the PCB is already dead, + * keep gc active to anticipate wantcnt changing. + */ + if (inp->inp_wantcnt != WNT_STOPUSING) continue; - so = inp->inp_socket; - if (!lck_mtx_try_lock(&inp->inpcb_mtx)) /* skip if busy, no hurry for cleanup... */ + /* + * Skip if busy, no hurry for cleanup. Keep gc active + * and try the lock again during next round. + */ + if (!lck_mtx_try_lock(&inp->inpcb_mtx)) { + atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1); continue; + } + /* + * Keep gc active unless usecount is 0. + */ + so = inp->inp_socket; if (so->so_usecount == 0) { if (inp->inp_state != INPCB_STATE_DEAD) { #if INET6 - if (INP_CHECK_SOCKAF(so, AF_INET6)) + if (SOCK_CHECK_DOM(so, PF_INET6)) in6_pcbdetach(inp); else #endif /* INET6 */ - in_pcbdetach(inp); + in_pcbdetach(inp); } in_pcbdispose(inp); } else { lck_mtx_unlock(&inp->inpcb_mtx); + atomic_add_32(&ipi->ipi_gc_req.intimer_fast, 1); } } - lck_rw_done(pcbinfo->mtx); + lck_rw_done(ipi->ipi_lock); + + return; } -int -ChkAddressOK( __uint32_t dstaddr, __uint32_t srcaddr ) +static int +udp_getstat SYSCTL_HANDLER_ARGS { - if ( dstaddr == srcaddr ){ - return 0; - } - return 1; +#pragma unused(oidp, arg1, arg2) + if (req->oldptr == USER_ADDR_NULL) + req->oldlen = (size_t)sizeof (struct udpstat); + + return (SYSCTL_OUT(req, &udpstat, MIN(sizeof (udpstat), req->oldlen))); } void udp_in_cksum_stats(u_int32_t len) { - udps_in_sw_cksum++; - udps_in_sw_cksum_bytes += len; + udpstat.udps_rcv_swcsum++; + udpstat.udps_rcv_swcsum_bytes += len; } void udp_out_cksum_stats(u_int32_t len) { - udps_out_sw_cksum++; - udps_out_sw_cksum_bytes += len; + udpstat.udps_snd_swcsum++; + udpstat.udps_snd_swcsum_bytes += len; +} + +#if INET6 +void +udp_in6_cksum_stats(u_int32_t len) +{ + udpstat.udps_rcv6_swcsum++; + udpstat.udps_rcv6_swcsum_bytes += len; +} + +void +udp_out6_cksum_stats(u_int32_t len) +{ + udpstat.udps_snd6_swcsum++; + udpstat.udps_snd6_swcsum_bytes += len; +} +#endif /* INET6 */ + +/* + * Checksum extended UDP header and data. + */ +static int +udp_input_checksum(struct mbuf *m, struct udphdr *uh, int off, int ulen) +{ + struct ifnet *ifp = m->m_pkthdr.rcvif; + struct ip *ip = mtod(m, struct ip *); + struct ipovly *ipov = (struct ipovly *)ip; + + if (uh->uh_sum == 0) { + udpstat.udps_nosum++; + return (0); + } + + if ((hwcksum_rx || (ifp->if_flags & IFF_LOOPBACK) || + (m->m_pkthdr.pkt_flags & PKTF_LOOP)) && + (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) { + if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) { + uh->uh_sum = m->m_pkthdr.csum_rx_val; + } else { + uint16_t sum = m->m_pkthdr.csum_rx_val; + uint16_t start = m->m_pkthdr.csum_rx_start; + + /* + * Perform 1's complement adjustment of octets + * that got included/excluded in the hardware- + * calculated checksum value. Ignore cases + * where the value includes or excludes the + * IP header span, as the sum for those octets + * would already be 0xffff and thus no-op. + */ + if ((m->m_pkthdr.csum_flags & CSUM_PARTIAL) && + start != 0 && (off - start) != off) { +#if BYTE_ORDER != BIG_ENDIAN + if (start < off) { + HTONS(ip->ip_len); + HTONS(ip->ip_off); + } +#endif /* BYTE_ORDER != BIG_ENDIAN */ + /* callee folds in sum */ + sum = m_adj_sum16(m, start, off, sum); +#if BYTE_ORDER != BIG_ENDIAN + if (start < off) { + NTOHS(ip->ip_off); + NTOHS(ip->ip_len); + } +#endif /* BYTE_ORDER != BIG_ENDIAN */ + } + + /* callee folds in sum */ + uh->uh_sum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, sum + htonl(ulen + IPPROTO_UDP)); + } + uh->uh_sum ^= 0xffff; + } else { + uint16_t ip_sum; + char b[9]; + + bcopy(ipov->ih_x1, b, sizeof (ipov->ih_x1)); + bzero(ipov->ih_x1, sizeof (ipov->ih_x1)); + ip_sum = ipov->ih_len; + ipov->ih_len = uh->uh_ulen; + uh->uh_sum = in_cksum(m, ulen + sizeof (struct ip)); + bcopy(b, ipov->ih_x1, sizeof (ipov->ih_x1)); + ipov->ih_len = ip_sum; + + udp_in_cksum_stats(ulen); + } + + if (uh->uh_sum != 0) { + udpstat.udps_badsum++; + IF_UDP_STATINC(ifp, badchksum); + return (-1); + } + + return (0); } diff --git a/bsd/netinet/udp_var.h b/bsd/netinet/udp_var.h index 776109a59..42d56a3c8 100644 --- a/bsd/netinet/udp_var.h +++ b/bsd/netinet/udp_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2012 Apple Inc. All rights reserved. + * Copyright (c) 2008-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -62,15 +62,15 @@ #ifndef _NETINET_UDP_VAR_H_ #define _NETINET_UDP_VAR_H_ -#include +#include #include /* * UDP kernel structures and variables. */ struct udpiphdr { - struct ipovly ui_i; /* overlaid ip structure */ + struct ipovly ui_i; /* overlaid ip structure */ struct udphdr ui_u; /* udp header */ }; #define ui_x1 ui_i.ih_x1 @@ -86,24 +86,31 @@ struct udpiphdr { #define ui_prev ui_i.ih_prev struct udpstat { - /* input statistics: */ - u_int32_t udps_ipackets; /* total input packets */ - u_int32_t udps_hdrops; /* packet shorter than header */ - u_int32_t udps_badsum; /* checksum error */ - u_int32_t udps_badlen; /* data length larger than packet */ - u_int32_t udps_noport; /* no socket on port */ - u_int32_t udps_noportbcast; /* of above, arrived as broadcast */ - u_int32_t udps_fullsock; /* not delivered, input socket full */ - u_int32_t udpps_pcbcachemiss; /* input packets missing pcb cache */ - u_int32_t udpps_pcbhashmiss; /* input packets not for hashed pcb */ - /* output statistics: */ - u_int32_t udps_opackets; /* total output packets */ - u_int32_t udps_fastout; /* output packets on fast path */ -#ifndef __APPLE__ - u_int32_t udps_nosum; /* no checksum */ - /* of no socket on port, arrived as multicast */ - u_int32_t udps_noportmcast; -#endif + /* input statistics: */ + u_int32_t udps_ipackets; /* total input packets */ + u_int32_t udps_hdrops; /* packet shorter than header */ + u_int32_t udps_badsum; /* checksum error */ + u_int32_t udps_badlen; /* data length larger than packet */ + u_int32_t udps_noport; /* no socket on port */ + u_int32_t udps_noportbcast; /* of above, arrived as broadcast */ + u_int32_t udps_fullsock; /* not delivered, input socket full */ + u_int32_t udpps_pcbcachemiss; /* input packets missing pcb cache */ + u_int32_t udpps_pcbhashmiss; /* input packets not for hashed pcb */ + /* output statistics: */ + u_int32_t udps_opackets; /* total output packets */ + u_int32_t udps_fastout; /* output packets on fast path */ + u_int32_t udps_nosum; /* no checksum */ + u_int32_t udps_noportmcast; /* of no socket on port, multicast */ + u_int32_t udps_filtermcast; /* blocked by multicast filter */ + /* checksum statistics: */ + u_int32_t udps_rcv_swcsum; /* udp swcksum (inbound), packets */ + u_int32_t udps_rcv_swcsum_bytes; /* udp swcksum (inbound), bytes */ + u_int32_t udps_rcv6_swcsum; /* udp6 swcksum (inbound), packets */ + u_int32_t udps_rcv6_swcsum_bytes; /* udp6 swcksum (inbound), bytes */ + u_int32_t udps_snd_swcsum; /* udp swcksum (outbound), packets */ + u_int32_t udps_snd_swcsum_bytes; /* udp swcksum (outbound), bytes */ + u_int32_t udps_snd6_swcsum; /* udp6 swcksum (outbound), packets */ + u_int32_t udps_snd6_swcsum_bytes; /* udp6 swcksum (outbound), bytes */ }; /* @@ -116,16 +123,24 @@ struct udpstat { #define UDPCTL_PCBLIST 5 /* list of PCBs for UDP sockets */ #define UDPCTL_MAXID 6 -#ifdef KERNEL_PRIVATE -#define UDPCTL_NAMES { \ - { 0, 0 }, \ - { "checksum", CTLTYPE_INT }, \ - { "stats", CTLTYPE_STRUCT }, \ - { "maxdgram", CTLTYPE_INT }, \ - { "recvspace", CTLTYPE_INT }, \ - { "pcblist", CTLTYPE_STRUCT }, \ +#ifdef BSD_KERNEL_PRIVATE +#include +#include + +#define UDPCTL_NAMES { \ + { 0, 0 }, \ + { "checksum", CTLTYPE_INT }, \ + { "stats", CTLTYPE_STRUCT }, \ + { "maxdgram", CTLTYPE_INT }, \ + { "recvspace", CTLTYPE_INT }, \ + { "pcblist", CTLTYPE_STRUCT }, \ } +#ifdef INET6 +#define udp6stat udpstat +#define udp6s_opackets udps_opackets +#endif /* INET6 */ + SYSCTL_DECL(_net_inet_udp); struct udpstat_local { @@ -137,34 +152,32 @@ struct udpstat_local { u_int64_t badmcast; u_int64_t cleanup; u_int64_t badipsec; -}; - -extern struct pr_usrreqs udp_usrreqs; -extern struct inpcbhead udb; -extern struct inpcbinfo udbinfo; -extern u_int32_t udp_sendspace; -extern u_int32_t udp_recvspace; -extern struct udpstat udpstat; -extern int log_in_vain; - -void udp_ctlinput(int, struct sockaddr *, void *); -int udp_ctloutput(struct socket *, struct sockopt *); - -void udp_init(void) __attribute__((section("__TEXT, initcode"))); -void udp_input(struct mbuf *, int); +}; -void udp_notify(struct inpcb *inp, int errno); -int udp_shutdown(struct socket *so); -int udp_lock (struct socket *, int, void *); -int udp_unlock (struct socket *, int, void *); -void udp_slowtimo (void); -#ifdef _KERN_LOCKS_H_ -lck_mtx_t * udp_getlock (struct socket *, int); -#else -void * udp_getlock (struct socket *, int); -#endif -void udp_get_ports_used(unsigned int, uint8_t *); -uint32_t udp_count_opportunistic(unsigned int, u_int32_t); +extern struct pr_usrreqs udp_usrreqs; +extern struct inpcbhead udb; +extern struct inpcbinfo udbinfo; +extern u_int32_t udp_sendspace; +extern u_int32_t udp_recvspace; +extern struct udpstat udpstat; +extern int udp_log_in_vain; -#endif /* KERNEL_PRIVATE */ +__BEGIN_DECLS +extern void udp_ctlinput(int, struct sockaddr *, void *); +extern int udp_ctloutput(struct socket *, struct sockopt *); +extern void udp_init(struct protosw *, struct domain *); +extern void udp_input(struct mbuf *, int); +extern int udp_connectx_common(struct socket *, int, struct sockaddr_list **, + struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *, + uint32_t, void *, uint32_t); +extern void udp_notify(struct inpcb *inp, int errno); +extern int udp_shutdown(struct socket *so); +extern int udp_lock(struct socket *, int, void *); +extern int udp_unlock(struct socket *, int, void *); +extern lck_mtx_t *udp_getlock(struct socket *, int); +extern void udp_get_ports_used(u_int32_t, int, u_int32_t, bitstr_t *); +extern uint32_t udp_count_opportunistic(unsigned int, u_int32_t); +extern uint32_t udp_find_anypcb_byaddr(struct ifaddr *); +__END_DECLS +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET_UDP_VAR_H_ */ diff --git a/bsd/netinet6/Makefile b/bsd/netinet6/Makefile index 141f50860..8d3721e43 100644 --- a/bsd/netinet6/Makefile +++ b/bsd/netinet6/Makefile @@ -7,14 +7,6 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - DATAFILES = \ ah.h ipsec.h pim6.h \ esp.h in6.h ipcomp.h raw_ip6.h \ @@ -25,9 +17,8 @@ PRIVATE_DATAFILES = \ PRIVATE_KERNELFILES = \ ah6.h esp6.h esp_rijndael.h in6_gif.h in6_ifattach.h \ - in6_prefix.h ip6_ecn.h \ - ip6protosw.h ipcomp6.h ipsec6.h \ - raw_ip6.h scope6_var.h tcp6_var.h udp6_var.h + ip6_ecn.h ip6protosw.h ipcomp6.h ipsec6.h \ + scope6_var.h tcp6_var.h udp6_var.h INSTALL_MI_LIST = ${DATAFILES} diff --git a/bsd/netinet6/ah.h b/bsd/netinet6/ah.h index cf9ddad2a..c649899d2 100644 --- a/bsd/netinet6/ah.h +++ b/bsd/netinet6/ah.h @@ -55,7 +55,7 @@ struct newah { /* variable size, 32bit bound*/ /* Authentication data */ }; -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE struct secasvar; struct ah_algorithm_state { @@ -86,6 +86,6 @@ extern void ah4_input(struct mbuf *, int); extern int ah4_output(struct mbuf *, struct secasvar *); extern int ah4_calccksum(struct mbuf *, caddr_t, size_t, const struct ah_algorithm *, struct secasvar *); -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET6_AH_H_ */ diff --git a/bsd/netinet6/ah6.h b/bsd/netinet6/ah6.h index 688f946d5..1b8add593 100644 --- a/bsd/netinet6/ah6.h +++ b/bsd/netinet6/ah6.h @@ -38,7 +38,7 @@ #define _NETINET6_AH6_H_ #include -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE struct secasvar; extern int ah6_input(struct mbuf **, int *, int); @@ -48,6 +48,5 @@ extern int ah6_calccksum(struct mbuf *, caddr_t, size_t, const struct ah_algorithm *, struct secasvar *); extern void ah6_ctlinput(int, struct sockaddr *, void *); -#endif /* KERNEL_PRIVATE */ - +#endif /* BSD_KERNEL_PRIVATE */ #endif /*_NETINET6_AH6_H_*/ diff --git a/bsd/netinet6/ah_input.c b/bsd/netinet6/ah_input.c index 05d575b5a..8d13ca5e5 100644 --- a/bsd/netinet6/ah_input.c +++ b/bsd/netinet6/ah_input.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2011 Apple Inc. All rights reserved. + * Copyright (c) 2008-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -124,8 +124,6 @@ #define IPLEN_FLIPPED #if INET -extern struct protosw inetsw[]; - void ah4_input(struct mbuf *m, int off) { @@ -191,7 +189,8 @@ ah4_input(struct mbuf *m, int off) goto fail; } KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ah4_input called to allocate SA:%p\n", sav)); + printf("DP ah4_input called to allocate SA:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(sav))); if (sav->state != SADB_SASTATE_MATURE && sav->state != SADB_SASTATE_DYING) { ipseclog((LOG_DEBUG, @@ -598,7 +597,8 @@ ah4_input(struct mbuf *m, int off) if (sav) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ah4_input call free SA:%p\n", sav)); + printf("DP ah4_input call free SA:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(sav))); key_freesav(sav, KEY_SADB_UNLOCKED); } IPSEC_STAT_INCREMENT(ipsecstat.in_success); @@ -607,7 +607,8 @@ ah4_input(struct mbuf *m, int off) fail: if (sav) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ah4_input call free SA:%p\n", sav)); + printf("DP ah4_input call free SA:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(sav))); key_freesav(sav, KEY_SADB_UNLOCKED); } if (m) @@ -672,7 +673,8 @@ ah6_input(struct mbuf **mp, int *offp, int proto) goto fail; } KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ah6_input called to allocate SA:%p\n", sav)); + printf("DP ah6_input called to allocate SA:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(sav))); if (sav->state != SADB_SASTATE_MATURE && sav->state != SADB_SASTATE_DYING) { ipseclog((LOG_DEBUG, @@ -1004,7 +1006,8 @@ ah6_input(struct mbuf **mp, int *offp, int proto) if (sav) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ah6_input call free SA:%p\n", sav)); + printf("DP ah6_input call free SA:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(sav))); key_freesav(sav, KEY_SADB_UNLOCKED); } IPSEC_STAT_INCREMENT(ipsec6stat.in_success); @@ -1013,7 +1016,8 @@ ah6_input(struct mbuf **mp, int *offp, int proto) fail: if (sav) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ah6_input call free SA:%p\n", sav)); + printf("DP ah6_input call free SA:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(sav))); key_freesav(sav, KEY_SADB_UNLOCKED); } if (m) diff --git a/bsd/netinet6/esp.h b/bsd/netinet6/esp.h index 04aa45077..048a792cb 100644 --- a/bsd/netinet6/esp.h +++ b/bsd/netinet6/esp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Apple Inc. All rights reserved. + * Copyright (c) 2008-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -95,7 +95,7 @@ struct esptail { /*variable size, 32bit bound*/ /* Authentication data (new IPsec)*/ }; -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE struct secasvar; struct esp_algorithm { @@ -130,6 +130,6 @@ extern size_t esp_hdrsiz(struct ipsecrequest *); extern int esp_schedule(const struct esp_algorithm *, struct secasvar *); extern int esp_auth(struct mbuf *, size_t, size_t, struct secasvar *, u_char *); -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET6_ESP_H_ */ diff --git a/bsd/netinet6/esp6.h b/bsd/netinet6/esp6.h index 7b054cd50..767d0c387 100644 --- a/bsd/netinet6/esp6.h +++ b/bsd/netinet6/esp6.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Apple Inc. All rights reserved. + * Copyright (c) 2008-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -66,12 +66,12 @@ #define _NETINET6_ESP6_H_ #include -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE extern int esp6_output(struct mbuf *, u_char *, struct mbuf *, struct secasvar *); extern int esp6_input(struct mbuf **, int *, int); extern void esp6_ctlinput(int, struct sockaddr *, void *); -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET6_ESP6_H_ */ diff --git a/bsd/netinet6/esp_input.c b/bsd/netinet6/esp_input.c index 7470d023e..9d0e549f8 100644 --- a/bsd/netinet6/esp_input.c +++ b/bsd/netinet6/esp_input.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2011 Apple Inc. All rights reserved. + * Copyright (c) 2008-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -132,8 +132,6 @@ extern lck_mtx_t *sadb_mutex; #if INET -extern struct protosw inetsw[]; - #define ESPMAXLEN \ (sizeof(struct esp) < sizeof(struct newesp) \ ? sizeof(struct newesp) : sizeof(struct esp)) @@ -229,7 +227,8 @@ esp4_input(m, off) goto bad; } KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP esp4_input called to allocate SA:%p\n", sav)); + printf("DP esp4_input called to allocate SA:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(sav))); if (sav->state != SADB_SASTATE_MATURE && sav->state != SADB_SASTATE_DYING) { ipseclog((LOG_DEBUG, @@ -698,7 +697,8 @@ noreplaycheck: if (sav) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP esp4_input call free SA:%p\n", sav)); + printf("DP esp4_input call free SA:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(sav))); key_freesav(sav, KEY_SADB_UNLOCKED); } IPSEC_STAT_INCREMENT(ipsecstat.in_success); @@ -707,7 +707,8 @@ noreplaycheck: bad: if (sav) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP esp4_input call free SA:%p\n", sav)); + printf("DP esp4_input call free SA:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(sav))); key_freesav(sav, KEY_SADB_UNLOCKED); } if (m) @@ -779,7 +780,8 @@ esp6_input(struct mbuf **mp, int *offp, int proto) goto bad; } KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP esp6_input called to allocate SA:%p\n", sav)); + printf("DP esp6_input called to allocate SA:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(sav))); if (sav->state != SADB_SASTATE_MATURE && sav->state != SADB_SASTATE_DYING) { ipseclog((LOG_DEBUG, @@ -1166,7 +1168,8 @@ noreplaycheck: if (sav) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP esp6_input call free SA:%p\n", sav)); + printf("DP esp6_input call free SA:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(sav))); key_freesav(sav, KEY_SADB_UNLOCKED); } IPSEC_STAT_INCREMENT(ipsec6stat.in_success); @@ -1175,7 +1178,8 @@ noreplaycheck: bad: if (sav) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP esp6_input call free SA:%p\n", sav)); + printf("DP esp6_input call free SA:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(sav))); key_freesav(sav, KEY_SADB_UNLOCKED); } if (m) diff --git a/bsd/netinet6/esp_rijndael.h b/bsd/netinet6/esp_rijndael.h index 1436da6b2..098d13321 100644 --- a/bsd/netinet6/esp_rijndael.h +++ b/bsd/netinet6/esp_rijndael.h @@ -59,7 +59,7 @@ */ #include -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE int esp_aes_schedlen(const struct esp_algorithm *); int esp_aes_schedule(const struct esp_algorithm *, struct secasvar *); int esp_cbc_decrypt_aes(struct mbuf *, size_t, struct secasvar *, @@ -68,4 +68,4 @@ int esp_cbc_encrypt_aes(struct mbuf *, size_t, size_t, struct secasvar *, const struct esp_algorithm *, int); -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ diff --git a/bsd/netinet6/frag6.c b/bsd/netinet6/frag6.c index 00174a628..6a92d7380 100644 --- a/bsd/netinet6/frag6.c +++ b/bsd/netinet6/frag6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -84,46 +84,160 @@ #include #include +#include /* * Define it to get a correct behavior on per-interface statistics. - * You will need to perform an extra routing table lookup, per fragment, - * to do it. This may, or may not be, a performance hit. */ #define IN6_IFSTAT_STRICT +MBUFQ_HEAD(fq6_head); + +static void frag6_save_context(struct mbuf *, int); +static void frag6_scrub_context(struct mbuf *); +static int frag6_restore_context(struct mbuf *); + +static void frag6_icmp6_paramprob_error(struct fq6_head *); +static void frag6_icmp6_timeex_error(struct fq6_head *); + static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *); static void frag6_deq(struct ip6asfrag *); static void frag6_insque(struct ip6q *, struct ip6q *); static void frag6_remque(struct ip6q *); -static void frag6_freef(struct ip6q *); +static void frag6_freef(struct ip6q *, struct fq6_head *, struct fq6_head *); + +static int frag6_timeout_run; /* frag6 timer is scheduled to run */ +static void frag6_timeout(void *); +static void frag6_sched_timeout(void); + +static struct ip6q *ip6q_alloc(int); +static void ip6q_free(struct ip6q *); +static void ip6q_updateparams(void); +static struct ip6asfrag *ip6af_alloc(int); +static void ip6af_free(struct ip6asfrag *); + +decl_lck_mtx_data(static, ip6qlock); +static lck_attr_t *ip6qlock_attr; +static lck_grp_t *ip6qlock_grp; +static lck_grp_attr_t *ip6qlock_grp_attr; + +/* IPv6 fragment reassembly queues (protected by ip6qlock) */ +static struct ip6q ip6q; /* ip6 reassembly queues */ +static int ip6_maxfragpackets; /* max packets in reass queues */ +static u_int32_t frag6_nfragpackets; /* # of packets in reass queues */ +static int ip6_maxfrags; /* max fragments in reass queues */ +static u_int32_t frag6_nfrags; /* # of fragments in reass queues */ +static u_int32_t ip6q_limit; /* ip6q allocation limit */ +static u_int32_t ip6q_count; /* current # of allocated ip6q's */ +static u_int32_t ip6af_limit; /* ip6asfrag allocation limit */ +static u_int32_t ip6af_count; /* current # of allocated ip6asfrag's */ + +static int sysctl_maxfragpackets SYSCTL_HANDLER_ARGS; +static int sysctl_maxfrags SYSCTL_HANDLER_ARGS; + +SYSCTL_DECL(_net_inet6_ip6); + +SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxfragpackets, 0, + sysctl_maxfragpackets, "I", + "Maximum number of IPv6 fragment reassembly queue entries"); + +SYSCTL_UINT(_net_inet6_ip6, OID_AUTO, fragpackets, + CTLFLAG_RD | CTLFLAG_LOCKED, &frag6_nfragpackets, 0, + "Current number of IPv6 fragment reassembly queue entries"); + +SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxfrags, 0, + sysctl_maxfrags, "I", "Maximum number of IPv6 fragments allowed"); -/* XXX we eventually need splreass6, or some real semaphore */ -int frag6_doing_reass; -u_int frag6_nfragpackets; -static u_int frag6_nfrags; -struct ip6q ip6q; /* ip6 reassemble queue */ - - -extern lck_mtx_t *inet6_domain_mutex; /* * Initialise reassembly queue and fragment identifier. */ void -frag6_init() +frag6_init(void) { - struct timeval tv; + /* ip6q_alloc() uses mbufs for IPv6 fragment queue structures */ + _CASSERT(sizeof (struct ip6q) <= _MLEN); + /* ip6af_alloc() uses mbufs for IPv6 fragment queue structures */ + _CASSERT(sizeof (struct ip6asfrag) <= _MLEN); + + /* IPv6 fragment reassembly queue lock */ + ip6qlock_grp_attr = lck_grp_attr_alloc_init(); + ip6qlock_grp = lck_grp_alloc_init("ip6qlock", ip6qlock_grp_attr); + ip6qlock_attr = lck_attr_alloc_init(); + lck_mtx_init(&ip6qlock, ip6qlock_grp, ip6qlock_attr); + + lck_mtx_lock(&ip6qlock); + /* Initialize IPv6 reassembly queue. */ + ip6q.ip6q_next = ip6q.ip6q_prev = &ip6q; + /* same limits as IPv4 */ ip6_maxfragpackets = nmbclusters / 32; - ip6_maxfrags = nmbclusters / 4; + ip6_maxfrags = ip6_maxfragpackets * 2; + ip6q_updateparams(); + lck_mtx_unlock(&ip6qlock); +} - /* - * in many cases, random() here does NOT return random number - * as initialization during bootstrap time occur in fixed order. - */ - microtime(&tv); - ip6_id = random() ^ tv.tv_usec; - ip6q.ip6q_next = ip6q.ip6q_prev = &ip6q; +static void +frag6_save_context(struct mbuf *m, int val) +{ + m->m_pkthdr.pkt_hdr = (void *)(uintptr_t)val; +} + +static void +frag6_scrub_context(struct mbuf *m) +{ + m->m_pkthdr.pkt_hdr = NULL; +} + +static int +frag6_restore_context(struct mbuf *m) +{ + return ((int)m->m_pkthdr.pkt_hdr); +} + +/* + * Send any deferred ICMP param problem error messages; caller must not be + * holding ip6qlock and is expected to have saved the per-packet parameter + * value via frag6_save_context(). + */ +static void +frag6_icmp6_paramprob_error(struct fq6_head *diq6) +{ + lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_NOTOWNED); + + if (!MBUFQ_EMPTY(diq6)) { + struct mbuf *merr, *merr_tmp; + int param; + MBUFQ_FOREACH_SAFE(merr, diq6, merr_tmp) { + MBUFQ_REMOVE(diq6, merr); + MBUFQ_NEXT(merr) = NULL; + param = frag6_restore_context(merr); + frag6_scrub_context(merr); + icmp6_error(merr, ICMP6_PARAM_PROB, + ICMP6_PARAMPROB_HEADER, param); + } + } +} + +/* + * Send any deferred ICMP time exceeded error messages; + * caller must not be holding ip6qlock. + */ +static void +frag6_icmp6_timeex_error(struct fq6_head *diq6) +{ + lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_NOTOWNED); + + if (!MBUFQ_EMPTY(diq6)) { + struct mbuf *m, *m_tmp; + MBUFQ_FOREACH_SAFE(m, diq6, m_tmp) { + MBUFQ_REMOVE(diq6, m); + MBUFQ_NEXT(m) = NULL; + icmp6_error(m, ICMP6_TIME_EXCEEDED, + ICMP6_TIME_EXCEED_REASSEMBLY, 0); + } + } } /* @@ -157,8 +271,6 @@ frag6_init() */ /* * Fragment input - * NOTE: this function is called with the inet6_domain_mutex held from ip6_input. - * inet6_domain_mutex is protecting he frag6 queue manipulation. */ int frag6_input(struct mbuf **mp, int *offp, int proto) @@ -172,61 +284,54 @@ frag6_input(struct mbuf **mp, int *offp, int proto) int offset = *offp, nxt, i, next; int first_frag = 0; int fragoff, frgpartlen; /* must be larger than u_int16_t */ - struct ifnet *dstifp; - struct ifaddr *ifa = NULL; + struct ifnet *dstifp = NULL; u_int8_t ecn, ecn0; + uint32_t csum, csum_flags; + struct fq6_head diq6; + int locked = 0; -#ifdef IN6_IFSTAT_STRICT - struct route_in6 ro; - struct sockaddr_in6 *dst; -#endif + VERIFY(m->m_flags & M_PKTHDR); + + MBUFQ_INIT(&diq6); /* for deferred ICMP param problem errors */ /* Expect 32-bit aligned data pointer on strict-align platforms */ MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); ip6 = mtod(m, struct ip6_hdr *); #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), return IPPROTO_DONE); + IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), goto done); ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset); #else IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f)); if (ip6f == NULL) - return IPPROTO_DONE; + goto done; #endif - dstifp = NULL; #ifdef IN6_IFSTAT_STRICT /* find the destination interface of the packet. */ - bzero(&ro, sizeof (ro)); - dst = (struct sockaddr_in6 *)&ro.ro_dst; - dst->sin6_family = AF_INET6; - dst->sin6_len = sizeof (struct sockaddr_in6); - dst->sin6_addr = ip6->ip6_dst; - - rtalloc((struct route *)&ro); - if (ro.ro_rt != NULL) { - RT_LOCK(ro.ro_rt); - if ((ifa = ro.ro_rt->rt_ifa) != NULL) { - IFA_ADDREF(ifa); - dstifp = ((struct in6_ifaddr *)ro.ro_rt->rt_ifa)->ia_ifp; + if (m->m_pkthdr.pkt_flags & PKTF_IFAINFO) { + uint32_t idx; + + if (ip6_getdstifaddr_info(m, &idx, NULL) == 0) { + if (idx > 0 && idx <= if_index) { + ifnet_head_lock_shared(); + dstifp = ifindex2ifnet[idx]; + ifnet_head_done(); + } } - RT_UNLOCK(ro.ro_rt); - rtfree(ro.ro_rt); - ro.ro_rt = NULL; } -#else - /* we are violating the spec, this is not the destination interface */ - if ((m->m_flags & M_PKTHDR) != 0) +#endif /* IN6_IFSTAT_STRICT */ + + /* we are violating the spec, this may not be the dst interface */ + if (dstifp == NULL) dstifp = m->m_pkthdr.rcvif; -#endif /* jumbo payload can't contain a fragment header */ if (ip6->ip6_plen == 0) { icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset); in6_ifstat_inc(dstifp, ifs6_reass_fail); - if (ifa != NULL) - IFA_REMREF(ifa); - return IPPROTO_DONE; + m = NULL; + goto done; } /* @@ -237,32 +342,78 @@ frag6_input(struct mbuf **mp, int *offp, int proto) */ if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) && (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) { - icmp6_error(m, ICMP6_PARAM_PROB, - ICMP6_PARAMPROB_HEADER, - offsetof(struct ip6_hdr, ip6_plen)); + icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, + offsetof(struct ip6_hdr, ip6_plen)); in6_ifstat_inc(dstifp, ifs6_reass_fail); - if (ifa != NULL) - IFA_REMREF(ifa); - return IPPROTO_DONE; + m = NULL; + goto done; + } + + /* If ip6_maxfragpackets or ip6_maxfrags is 0, never accept fragments */ + if (ip6_maxfragpackets == 0 || ip6_maxfrags == 0) { + ip6stat.ip6s_fragments++; + ip6stat.ip6s_fragdropped++; + in6_ifstat_inc(dstifp, ifs6_reass_fail); + m_freem(m); + m = NULL; + goto done; } - ip6stat.ip6s_fragments++; - in6_ifstat_inc(dstifp, ifs6_reass_reqd); - /* offset now points to data portion */ offset += sizeof(struct ip6_frag); - frag6_doing_reass = 1; - /* - * Enforce upper bound on number of fragments. - * If maxfrag is 0, never accept fragments. - * If maxfrag is -1, accept all fragments without limitation. + * Leverage partial checksum offload for simple UDP/IP fragments, + * as that is the most common case. + * + * Perform 1's complement adjustment of octets that got included/ + * excluded in the hardware-calculated checksum value. */ - if (ip6_maxfrags < 0) - ; - else if (frag6_nfrags >= (u_int)ip6_maxfrags) - goto dropfrag; + if (ip6f->ip6f_nxt == IPPROTO_UDP && + offset == (sizeof (*ip6) + sizeof (*ip6f)) && + (m->m_pkthdr.csum_flags & + (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) == + (CSUM_DATA_VALID | CSUM_PARTIAL)) { + uint32_t start; + + start = m->m_pkthdr.csum_rx_start; + csum = m->m_pkthdr.csum_rx_val; + + if (start != offset) { + uint16_t s, d; + + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) { + s = ip6->ip6_src.s6_addr16[1]; + ip6->ip6_src.s6_addr16[1] = 0 ; + } + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) { + d = ip6->ip6_dst.s6_addr16[1]; + ip6->ip6_dst.s6_addr16[1] = 0; + } + + /* callee folds in sum */ + csum = m_adj_sum16(m, start, offset, csum); + + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) + ip6->ip6_src.s6_addr16[1] = s; + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) + ip6->ip6_dst.s6_addr16[1] = d; + + } + csum_flags = m->m_pkthdr.csum_flags; + } else { + csum = 0; + csum_flags = 0; + } + + /* Invalidate checksum */ + m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID; + + ip6stat.ip6s_fragments++; + in6_ifstat_inc(dstifp, ifs6_reass_reqd); + + lck_mtx_lock(&ip6qlock); + locked = 1; for (q6 = ip6q.ip6q_next; q6 != &ip6q; q6 = q6->ip6q_next) if (ip6f->ip6f_ident == q6->ip6q_ident && @@ -276,24 +427,12 @@ frag6_input(struct mbuf **mp, int *offp, int proto) */ first_frag = 1; - /* - * Enforce upper bound on number of fragmented packets - * for which we attempt reassembly; - * If maxfrag is 0, never accept fragments. - * If maxfrag is -1, accept all fragments without limitation. - */ - if (ip6_maxfragpackets < 0) - ; - else if (frag6_nfragpackets >= (u_int)ip6_maxfragpackets) - goto dropfrag; - frag6_nfragpackets++; - q6 = (struct ip6q *)_MALLOC(sizeof(struct ip6q), M_FTABLE, - M_DONTWAIT); + q6 = ip6q_alloc(M_DONTWAIT); if (q6 == NULL) goto dropfrag; - bzero(q6, sizeof(*q6)); frag6_insque(q6, &ip6q); + frag6_nfragpackets++; /* ip6q_nxt will be filled afterwards, from 1st fragment */ q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6; @@ -301,14 +440,23 @@ frag6_input(struct mbuf **mp, int *offp, int proto) q6->ip6q_nxtp = (u_char *)nxtp; #endif q6->ip6q_ident = ip6f->ip6f_ident; - q6->ip6q_ttl = IPV6_FRAGTTL; + q6->ip6q_ttl = IPV6_FRAGTTL; q6->ip6q_src = ip6->ip6_src; q6->ip6q_dst = ip6->ip6_dst; q6->ip6q_ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */ - q6->ip6q_nfrag = 0; + q6->ip6q_nfrag = 0; + + /* + * If the first fragment has valid checksum offload + * info, the rest of fragments are eligible as well. + */ + if (csum_flags != 0) { + q6->ip6q_csum = csum; + q6->ip6q_csum_flags = csum_flags; + } } /* @@ -317,8 +465,8 @@ frag6_input(struct mbuf **mp, int *offp, int proto) */ fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK); if (fragoff == 0) { - q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) - - sizeof(struct ip6_frag); + q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) - + sizeof(struct ip6_frag); q6->ip6q_nxt = ip6f->ip6f_nxt; } @@ -331,23 +479,22 @@ frag6_input(struct mbuf **mp, int *offp, int proto) if (q6->ip6q_unfrglen >= 0) { /* The 1st fragment has already arrived. */ if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) { + lck_mtx_unlock(&ip6qlock); + locked = 0; icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, - offset - sizeof(struct ip6_frag) + - offsetof(struct ip6_frag, ip6f_offlg)); - frag6_doing_reass = 0; - if (ifa != NULL) - IFA_REMREF(ifa); - return(IPPROTO_DONE); + offset - sizeof(struct ip6_frag) + + offsetof(struct ip6_frag, ip6f_offlg)); + m = NULL; + goto done; } - } - else if (fragoff + frgpartlen > IPV6_MAXPACKET) { + } else if (fragoff + frgpartlen > IPV6_MAXPACKET) { + lck_mtx_unlock(&ip6qlock); + locked = 0; icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, - offset - sizeof(struct ip6_frag) + - offsetof(struct ip6_frag, ip6f_offlg)); - frag6_doing_reass = 0; - if (ifa != NULL) - IFA_REMREF(ifa); - return(IPPROTO_DONE); + offset - sizeof(struct ip6_frag) + + offsetof(struct ip6_frag, ip6f_offlg)); + m = NULL; + goto done; } /* * If it's the first fragment, do the above check for each @@ -366,7 +513,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto) /* dequeue the fragment. */ frag6_deq(af6); - FREE(af6, M_FTABLE); + ip6af_free(af6); /* adjust pointer. */ ip6err = mtod(merr, struct ip6_hdr *); @@ -378,19 +525,19 @@ frag6_input(struct mbuf **mp, int *offp, int proto) ip6err->ip6_src = q6->ip6q_src; ip6err->ip6_dst = q6->ip6q_dst; - icmp6_error(merr, ICMP6_PARAM_PROB, - ICMP6_PARAMPROB_HEADER, - erroff - sizeof(struct ip6_frag) + - offsetof(struct ip6_frag, ip6f_offlg)); + frag6_save_context(merr, + erroff - sizeof (struct ip6_frag) + + offsetof(struct ip6_frag, ip6f_offlg)); + + MBUFQ_ENQUEUE(&diq6, merr); } } } - ip6af = (struct ip6asfrag *)_MALLOC(sizeof(struct ip6asfrag), M_FTABLE, - M_DONTWAIT); + ip6af = ip6af_alloc(M_DONTWAIT); if (ip6af == NULL) goto dropfrag; - bzero(ip6af, sizeof(*ip6af)); + ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG; ip6af->ip6af_off = fragoff; ip6af->ip6af_frglen = frgpartlen; @@ -411,14 +558,14 @@ frag6_input(struct mbuf **mp, int *offp, int proto) ecn0 = q6->ip6q_ecn; if (ecn == IPTOS_ECN_CE) { if (ecn0 == IPTOS_ECN_NOTECT) { - FREE(ip6af, M_FTABLE); + ip6af_free(ip6af); goto dropfrag; } if (ecn0 != IPTOS_ECN_CE) q6->ip6q_ecn = IPTOS_ECN_CE; } if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) { - FREE(ip6af, M_FTABLE); + ip6af_free(ip6af); goto dropfrag; } @@ -435,6 +582,9 @@ frag6_input(struct mbuf **mp, int *offp, int proto) * If there is a preceding segment, it may provide some of * our data already. If so, drop the data from the incoming * segment. If it provides all of our data, drop us. + * + * If some of the data is dropped from the preceding + * segment, then it's checksum is invalidated. */ if (af6->ip6af_up != (struct ip6asfrag *)q6) { i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen @@ -443,6 +593,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto) if (i >= ip6af->ip6af_frglen) goto dropfrag; m_adj(IP6_REASS_MBUF(ip6af), i); + q6->ip6q_csum_flags = 0; ip6af->ip6af_off += i; ip6af->ip6af_frglen -= i; } @@ -459,6 +610,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto) af6->ip6af_frglen -= i; af6->ip6af_off += i; m_adj(IP6_REASS_MBUF(af6), i); + q6->ip6q_csum_flags = 0; break; } af6 = af6->ip6af_down; @@ -485,7 +637,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto) "overlaps the previous fragment\n", i, ip6_sprintf(&q6->ip6q_src)); #endif - FREE(ip6af, M_FTABLE); + ip6af_free(ip6af); goto dropfrag; } } @@ -497,12 +649,22 @@ frag6_input(struct mbuf **mp, int *offp, int proto) "overlaps the succeeding fragment", i, ip6_sprintf(&q6->ip6q_src)); #endif - FREE(ip6af, M_FTABLE); + ip6af_free(ip6af); goto dropfrag; } } #endif + /* + * If this fragment contains similar checksum offload info + * as that of the existing ones, accumulate checksum. Otherwise, + * invalidate checksum offload info for the entire datagram. + */ + if (csum_flags != 0 && csum_flags == q6->ip6q_csum_flags) + q6->ip6q_csum += csum; + else if (q6->ip6q_csum_flags != 0) + q6->ip6q_csum_flags = 0; + insert: /* @@ -524,18 +686,18 @@ insert: for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; af6 = af6->ip6af_down) { if (af6->ip6af_off != next) { - frag6_doing_reass = 0; - if (ifa != NULL) - IFA_REMREF(ifa); - return IPPROTO_DONE; + lck_mtx_unlock(&ip6qlock); + locked = 0; + m = NULL; + goto done; } next += af6->ip6af_frglen; } if (af6->ip6af_up->ip6af_mff) { - frag6_doing_reass = 0; - if (ifa != NULL) - IFA_REMREF(ifa); - return IPPROTO_DONE; + lck_mtx_unlock(&ip6qlock); + locked = 0; + m = NULL; + goto done; } /* @@ -552,13 +714,34 @@ insert: t = t->m_next; t->m_next = IP6_REASS_MBUF(af6); m_adj(t->m_next, af6->ip6af_offset); - FREE(af6, M_FTABLE); + ip6af_free(af6); af6 = af6dwn; } + /* + * Store partial hardware checksum info from the fragment queue; + * the receive start offset is set to 40 bytes (see code at the + * top of this routine.) + */ + if (q6->ip6q_csum_flags != 0) { + csum = q6->ip6q_csum; + + ADDCARRY(csum); + + m->m_pkthdr.csum_rx_val = csum; + m->m_pkthdr.csum_rx_start = sizeof (struct ip6_hdr); + m->m_pkthdr.csum_flags = q6->ip6q_csum_flags; + } else if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) || + (m->m_pkthdr.pkt_flags & PKTF_LOOP)) { + /* loopback checksums are always OK */ + m->m_pkthdr.csum_data = 0xffff; + m->m_pkthdr.csum_flags &= ~CSUM_PARTIAL; + m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + } + /* adjust offset to point where the original next header starts */ offset = ip6af->ip6af_offset - sizeof(struct ip6_frag); - FREE(ip6af, M_FTABLE); + ip6af_free(ip6af); ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr)); ip6->ip6_src = q6->ip6q_src; @@ -567,7 +750,7 @@ insert: ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20); nxt = q6->ip6q_nxt; -#if notyet +#ifdef notyet *q6->ip6q_nxtp = (u_char)(nxt & 0xff); #endif @@ -575,16 +758,16 @@ insert: if (m->m_len >= offset + sizeof(struct ip6_frag)) { /* This is the only possible case with !PULLDOWN_TEST */ ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag), - offset); + offset); m->m_data += sizeof(struct ip6_frag); m->m_len -= sizeof(struct ip6_frag); } else { /* this comes with no copy if the boundary is on cluster */ if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) { frag6_remque(q6); - frag6_nfrags -= q6->ip6q_nfrag; - FREE(q6, M_FTABLE); frag6_nfragpackets--; + frag6_nfrags -= q6->ip6q_nfrag; + ip6q_free(q6); goto dropfrag; } m_adj(t, sizeof(struct ip6_frag)); @@ -600,40 +783,56 @@ insert: } frag6_remque(q6); - frag6_nfrags -= q6->ip6q_nfrag; - FREE(q6, M_FTABLE); frag6_nfragpackets--; + frag6_nfrags -= q6->ip6q_nfrag; + ip6q_free(q6); + + if (m->m_flags & M_PKTHDR) /* Isn't it always true? */ + m_fixhdr(m); - if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */ - int plen = 0; - for (t = m; t; t = t->m_next) - plen += t->m_len; - m->m_pkthdr.len = plen; - } - ip6stat.ip6s_reassembled++; - in6_ifstat_inc(dstifp, ifs6_reass_ok); /* * Tell launch routine the next header */ - *mp = m; *offp = offset; - frag6_doing_reass = 0; - if (ifa != NULL) - IFA_REMREF(ifa); - return nxt; - - dropfrag: - in6_ifstat_inc(dstifp, ifs6_reass_fail); + /* arm the purge timer if not already and if there's work to do */ + frag6_sched_timeout(); + lck_mtx_unlock(&ip6qlock); + in6_ifstat_inc(dstifp, ifs6_reass_ok); + frag6_icmp6_paramprob_error(&diq6); + VERIFY(MBUFQ_EMPTY(&diq6)); + return (nxt); + +done: + VERIFY(m == NULL); + if (!locked) { + if (frag6_nfragpackets == 0) { + frag6_icmp6_paramprob_error(&diq6); + VERIFY(MBUFQ_EMPTY(&diq6)); + return (IPPROTO_DONE); + } + lck_mtx_lock(&ip6qlock); + } + /* arm the purge timer if not already and if there's work to do */ + frag6_sched_timeout(); + lck_mtx_unlock(&ip6qlock); + frag6_icmp6_paramprob_error(&diq6); + VERIFY(MBUFQ_EMPTY(&diq6)); + return (IPPROTO_DONE); + +dropfrag: ip6stat.ip6s_fragdropped++; + /* arm the purge timer if not already and if there's work to do */ + frag6_sched_timeout(); + lck_mtx_unlock(&ip6qlock); + in6_ifstat_inc(dstifp, ifs6_reass_fail); m_freem(m); - frag6_doing_reass = 0; - if (ifa != NULL) - IFA_REMREF(ifa); - return IPPROTO_DONE; + frag6_icmp6_paramprob_error(&diq6); + VERIFY(MBUFQ_EMPTY(&diq6)); + return (IPPROTO_DONE); } /* @@ -641,11 +840,12 @@ insert: * associated datagrams. */ void -frag6_freef(q6) - struct ip6q *q6; +frag6_freef(struct ip6q *q6, struct fq6_head *dfq6, struct fq6_head *diq6) { struct ip6asfrag *af6, *down6; + lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); + for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; af6 = down6) { struct mbuf *m = IP6_REASS_MBUF(af6); @@ -666,17 +866,18 @@ frag6_freef(q6) /* restore source and destination addresses */ ip6->ip6_src = q6->ip6q_src; ip6->ip6_dst = q6->ip6q_dst; - icmp6_error(m, ICMP6_TIME_EXCEEDED, - ICMP6_TIME_EXCEED_REASSEMBLY, 0); - } else - m_freem(m); - FREE(af6, M_FTABLE); + + MBUFQ_ENQUEUE(diq6, m); + } else { + MBUFQ_ENQUEUE(dfq6, m); + } + ip6af_free(af6); } frag6_remque(q6); - frag6_nfrags -= q6->ip6q_nfrag; - FREE(q6, M_FTABLE); frag6_nfragpackets--; + frag6_nfrags -= q6->ip6q_nfrag; + ip6q_free(q6); } /* @@ -684,9 +885,10 @@ frag6_freef(q6) * Like insque, but pointers in middle of structure. */ void -frag6_enq(af6, up6) - struct ip6asfrag *af6, *up6; +frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6) { + lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); + af6->ip6af_up = up6; af6->ip6af_down = up6->ip6af_down; up6->ip6af_down->ip6af_up = af6; @@ -697,17 +899,19 @@ frag6_enq(af6, up6) * To frag6_enq as remque is to insque. */ void -frag6_deq(af6) - struct ip6asfrag *af6; +frag6_deq(struct ip6asfrag *af6) { + lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); + af6->ip6af_up->ip6af_down = af6->ip6af_down; af6->ip6af_down->ip6af_up = af6->ip6af_up; } void -frag6_insque(new, old) - struct ip6q *new, *old; +frag6_insque(struct ip6q *new, struct ip6q *old) { + lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); + new->ip6q_prev = old; new->ip6q_next = old->ip6q_next; old->ip6q_next->ip6q_prev= new; @@ -715,9 +919,10 @@ frag6_insque(new, old) } void -frag6_remque(p6) - struct ip6q *p6; +frag6_remque(struct ip6q *p6) { + lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); + p6->ip6q_prev->ip6q_next = p6->ip6q_next; p6->ip6q_next->ip6q_prev = p6->ip6q_prev; } @@ -727,13 +932,24 @@ frag6_remque(p6) * if a timer expires on a reassembly * queue, discard it. */ -void -frag6_slowtimo() +static void +frag6_timeout(void *arg) { +#pragma unused(arg) + struct fq6_head dfq6, diq6; struct ip6q *q6; - lck_mtx_lock(inet6_domain_mutex); - frag6_doing_reass = 1; + MBUFQ_INIT(&dfq6); /* for deferred frees */ + MBUFQ_INIT(&diq6); /* for deferred ICMP time exceeded errors */ + + /* + * Update coarse-grained networking timestamp (in sec.); the idea + * is to piggy-back on the timeout callout to update the counter + * returnable via net_uptime(). + */ + net_update_uptime(); + + lck_mtx_lock(&ip6qlock); q6 = ip6q.ip6q_next; if (q6) while (q6 != &ip6q) { @@ -742,7 +958,7 @@ frag6_slowtimo() if (q6->ip6q_prev->ip6q_ttl == 0) { ip6stat.ip6s_fragtimeout++; /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ - frag6_freef(q6->ip6q_prev); + frag6_freef(q6->ip6q_prev, &dfq6, &diq6); } } /* @@ -750,29 +966,208 @@ frag6_slowtimo() * (due to the limit being lowered), drain off * enough to get down to the new limit. */ - while (frag6_nfragpackets > (u_int)ip6_maxfragpackets && - ip6q.ip6q_prev) { - ip6stat.ip6s_fragoverflow++; - /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ - frag6_freef(ip6q.ip6q_prev); + if (ip6_maxfragpackets >= 0) { + while (frag6_nfragpackets > (unsigned)ip6_maxfragpackets && + ip6q.ip6q_prev) { + ip6stat.ip6s_fragoverflow++; + /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ + frag6_freef(ip6q.ip6q_prev, &dfq6, &diq6); + } + } + /* re-arm the purge timer if there's work to do */ + frag6_timeout_run = 0; + frag6_sched_timeout(); + lck_mtx_unlock(&ip6qlock); + + /* free fragments that need to be freed */ + if (!MBUFQ_EMPTY(&dfq6)) + MBUFQ_DRAIN(&dfq6); + + frag6_icmp6_timeex_error(&diq6); + + VERIFY(MBUFQ_EMPTY(&dfq6)); + VERIFY(MBUFQ_EMPTY(&diq6)); +} + +static void +frag6_sched_timeout(void) +{ + lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); + + if (!frag6_timeout_run && frag6_nfragpackets > 0) { + frag6_timeout_run = 1; + timeout(frag6_timeout, NULL, hz); } - frag6_doing_reass = 0; - lck_mtx_unlock(inet6_domain_mutex); } /* * Drain off all datagram fragments. */ void -frag6_drain() +frag6_drain(void) { - if (frag6_doing_reass) - return; - lck_mtx_lock(inet6_domain_mutex); + struct fq6_head dfq6, diq6; + + MBUFQ_INIT(&dfq6); /* for deferred frees */ + MBUFQ_INIT(&diq6); /* for deferred ICMP time exceeded errors */ + + lck_mtx_lock(&ip6qlock); while (ip6q.ip6q_next != &ip6q) { ip6stat.ip6s_fragdropped++; /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ - frag6_freef(ip6q.ip6q_next); + frag6_freef(ip6q.ip6q_next, &dfq6, &diq6); + } + lck_mtx_unlock(&ip6qlock); + + /* free fragments that need to be freed */ + if (!MBUFQ_EMPTY(&dfq6)) + MBUFQ_DRAIN(&dfq6); + + frag6_icmp6_timeex_error(&diq6); + + VERIFY(MBUFQ_EMPTY(&dfq6)); + VERIFY(MBUFQ_EMPTY(&diq6)); +} + +static struct ip6q * +ip6q_alloc(int how) +{ + struct mbuf *t; + struct ip6q *q6; + + /* + * See comments in ip6q_updateparams(). Keep the count separate + * from frag6_nfragpackets since the latter represents the elements + * already in the reassembly queues. + */ + if (ip6q_limit > 0 && ip6q_count > ip6q_limit) + return (NULL); + + t = m_get(how, MT_FTABLE); + if (t != NULL) { + atomic_add_32(&ip6q_count, 1); + q6 = mtod(t, struct ip6q *); + bzero(q6, sizeof (*q6)); + } else { + q6 = NULL; + } + return (q6); +} + +static void +ip6q_free(struct ip6q *q6) +{ + (void) m_free(dtom(q6)); + atomic_add_32(&ip6q_count, -1); +} + +static struct ip6asfrag * +ip6af_alloc(int how) +{ + struct mbuf *t; + struct ip6asfrag *af6; + + /* + * See comments in ip6q_updateparams(). Keep the count separate + * from frag6_nfrags since the latter represents the elements + * already in the reassembly queues. + */ + if (ip6af_limit > 0 && ip6af_count > ip6af_limit) + return (NULL); + + t = m_get(how, MT_FTABLE); + if (t != NULL) { + atomic_add_32(&ip6af_count, 1); + af6 = mtod(t, struct ip6asfrag *); + bzero(af6, sizeof (*af6)); + } else { + af6 = NULL; + } + return (af6); +} + +static void +ip6af_free(struct ip6asfrag *af6) +{ + (void) m_free(dtom(af6)); + atomic_add_32(&ip6af_count, -1); +} + +static void +ip6q_updateparams(void) +{ + lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); + /* + * -1 for unlimited allocation. + */ + if (ip6_maxfragpackets < 0) + ip6q_limit = 0; + if (ip6_maxfrags < 0) + ip6af_limit = 0; + /* + * Positive number for specific bound. + */ + if (ip6_maxfragpackets > 0) + ip6q_limit = ip6_maxfragpackets; + if (ip6_maxfrags > 0) + ip6af_limit = ip6_maxfrags; + /* + * Zero specifies no further fragment queue allocation -- set the + * bound very low, but rely on implementation elsewhere to actually + * prevent allocation and reclaim current queues. + */ + if (ip6_maxfragpackets == 0) + ip6q_limit = 1; + if (ip6_maxfrags == 0) + ip6af_limit = 1; + /* + * Arm the purge timer if not already and if there's work to do + */ + frag6_sched_timeout(); +} + +static int +sysctl_maxfragpackets SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int error, i; + + lck_mtx_lock(&ip6qlock); + i = ip6_maxfragpackets; + error = sysctl_handle_int(oidp, &i, 0, req); + if (error || req->newptr == USER_ADDR_NULL) + goto done; + /* impose bounds */ + if (i < -1 || i > (nmbclusters / 4)) { + error = EINVAL; + goto done; + } + ip6_maxfragpackets = i; + ip6q_updateparams(); +done: + lck_mtx_unlock(&ip6qlock); + return (error); +} + +static int +sysctl_maxfrags SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int error, i; + + lck_mtx_lock(&ip6qlock); + i = ip6_maxfrags; + error = sysctl_handle_int(oidp, &i, 0, req); + if (error || req->newptr == USER_ADDR_NULL) + goto done; + /* impose bounds */ + if (i < -1 || i > (nmbclusters / 4)) { + error = EINVAL; + goto done; } - lck_mtx_unlock(inet6_domain_mutex); + ip6_maxfrags= i; + ip6q_updateparams(); /* see if we need to arm timer */ +done: + lck_mtx_unlock(&ip6qlock); + return (error); } diff --git a/bsd/netinet6/icmp6.c b/bsd/netinet6/icmp6.c index cdf92a564..c146a8995 100644 --- a/bsd/netinet6/icmp6.c +++ b/bsd/netinet6/icmp6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -122,6 +122,7 @@ #include #include #include +#include #include #include #include @@ -136,8 +137,6 @@ extern int ipsec_bypass; #include -extern struct domain inet6domain; -extern struct ip6protosw inet6sw[]; extern struct ip6protosw *ip6_protox[]; extern uint32_t rip_sendspace; @@ -151,16 +150,12 @@ static int icmp6errpps_count = 0; static struct timeval icmp6errppslim_last; extern int icmp6_nodeinfo; extern struct inpcbinfo ripcbinfo; -extern lck_mtx_t *inet6_domain_mutex; static void icmp6_errcount(struct icmp6errstat *, int, int); static int icmp6_rip6_input(struct mbuf **, int); static int icmp6_ratelimit(const struct in6_addr *, const int, const int); static const char *icmp6_redirect_diag(struct in6_addr *, struct in6_addr *, struct in6_addr *); -#ifndef HAVE_PPSRATECHECK -static int ppsratecheck(struct timeval *, int *, int); -#endif static struct mbuf *ni6_input(struct mbuf *, int); static struct mbuf *ni6_nametodns(const char *, int, int); static int ni6_dnsmatch(const char *, int, const char *, int); @@ -173,9 +168,20 @@ static int icmp6_notify_error(struct mbuf *, int, int, int); void -icmp6_init() +icmp6_init(struct ip6protosw *pp, struct domain *dp) { - mld_init(); +#pragma unused(dp) + static int icmp6_initialized = 0; + + /* Also called from ip6_init() without pp */ + VERIFY(pp == NULL || + (pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED); + + /* This gets called by more than one protocols, so initialize once */ + if (!icmp6_initialized) { + icmp6_initialized = 1; + mld_init(); + } } static void @@ -458,6 +464,23 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) goto freeit; } +#ifndef PULLDOWN_TEST + icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off); +#else + IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6)); + if (icmp6 == NULL) { + icmp6stat.icp6s_tooshort++; + return IPPROTO_DONE; + } +#endif + code = icmp6->icmp6_code; + + /* Apply rate limit before checksum validation. */ + if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) { + icmp6stat.icp6s_toofreq++; + goto freeit; + } + /* * Check multicast group membership. * Note: SSM filters are not applied for ICMPv6 traffic. @@ -474,7 +497,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) * Don't discard if this is a Neighbor Solicitation * that needs to be proxied (see check down below.) */ - if (!(m->m_pkthdr.aux_flags & MAUXF_PROXY_DST)) { + if (!(m->m_pkthdr.pkt_flags & PKTF_PROXY_DST)) { ip6stat.ip6s_notmember++; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); @@ -488,17 +511,6 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) /* * calculate the checksum */ -#ifndef PULLDOWN_TEST - icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off); -#else - IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6)); - if (icmp6 == NULL) { - icmp6stat.icp6s_tooshort++; - return IPPROTO_DONE; - } -#endif - code = icmp6->icmp6_code; - if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) { nd6log((LOG_ERR, "ICMP6 checksum error(%d|%x) %s\n", @@ -507,7 +519,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) goto freeit; } - if (m->m_pkthdr.aux_flags & MAUXF_PROXY_DST) { + if (m->m_pkthdr.pkt_flags & PKTF_PROXY_DST) { /* * This is the special case of proxying NS (dst is either * solicited-node multicast or unicast); process it locally @@ -604,11 +616,6 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) if (code != 0) goto badcode; - if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) { - icmp6stat.icp6s_toofreq++; - goto freeit; - } - if ((n = m_copy(m, 0, M_COPYALL)) == NULL) { /* Give up remote */ goto rate_limit_checked; @@ -698,11 +705,6 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) else icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldreport); - if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) { - icmp6stat.icp6s_toofreq++; - goto freeit; - } - if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ if (mld_input(m, off, icmp6len) == IPPROTO_DONE) @@ -742,10 +744,6 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo), return IPPROTO_DONE); #endif - if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) { - icmp6stat.icp6s_toofreq++; - goto freeit; - } n = m_copy(m, 0, M_COPYALL); if (n) @@ -771,11 +769,6 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) if (icmp6len < sizeof(struct nd_router_solicit)) goto badlen; - if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) { - icmp6stat.icp6s_toofreq++; - goto freeit; - } - if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ nd6_rs_input(m, off, icmp6len); @@ -794,11 +787,6 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) if (icmp6len < sizeof(struct nd_router_advert)) goto badlen; - if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) { - icmp6stat.icp6s_toofreq++; - goto freeit; - } - if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ nd6_ra_input(m, off, icmp6len); @@ -817,11 +805,6 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) if (icmp6len < sizeof(struct nd_neighbor_solicit)) goto badlen; - if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) { - icmp6stat.icp6s_toofreq++; - goto freeit; - } - if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ nd6_ns_input(m, off, icmp6len); @@ -840,11 +823,6 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) if (icmp6len < sizeof(struct nd_neighbor_advert)) goto badlen; - if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) { - icmp6stat.icp6s_toofreq++; - goto freeit; - } - if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ nd6_na_input(m, off, icmp6len); @@ -863,11 +841,6 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) if (icmp6len < sizeof(struct nd_redirect)) goto badlen; - if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) { - icmp6stat.icp6s_toofreq++; - goto freeit; - } - if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ icmp6_redirect_input(m, off); @@ -888,11 +861,6 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) break; default: - if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) { - icmp6stat.icp6s_toofreq++; - goto freeit; - } - nd6log((LOG_DEBUG, "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n", icmp6->icmp6_type, ip6_sprintf(&ip6->ip6_src), @@ -908,11 +876,6 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) break; } deliver: - if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) { - icmp6stat.icp6s_toofreq++; - goto freeit; - } - if (icmp6_notify_error(m, off, icmp6len, code)) { /* In this case, m should've been freed. */ return(IPPROTO_DONE); @@ -928,10 +891,6 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) break; } - if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) { - icmp6stat.icp6s_toofreq++; - goto freeit; - } rate_limit_checked: /* deliver the packet to appropriate sockets (unless proxying) */ if (!proxy) { @@ -1309,23 +1268,18 @@ ni6_input(m, off) goto bad; /* else it's a link-local multicast, fine */ } else { /* unicast or anycast */ - struct in6_ifaddr *ia6; + uint32_t ia6_flags; - if ((ia6 = ip6_getdstifaddr(m)) == NULL) + if (ip6_getdstifaddr_info(m, NULL, &ia6_flags) != 0) goto bad; /* XXX impossible */ - IFA_LOCK(&ia6->ia_ifa); - if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) && + if ((ia6_flags & IN6_IFF_TEMPORARY) && !(icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK)) { nd6log((LOG_DEBUG, "ni6_input: ignore node info to " "a temporary address in %s:%d", __FILE__, __LINE__)); - IFA_UNLOCK(&ia6->ia_ifa); - IFA_REMREF(&ia6->ia_ifa); goto bad; } - IFA_UNLOCK(&ia6->ia_ifa); - IFA_REMREF(&ia6->ia_ifa); } /* validate query Subject field. */ @@ -1891,9 +1845,7 @@ ni6_store_addrs(ni6, nni6, ifp0, resid) u_char *cp = (u_char *)(nni6 + 1); int niflags = ni6->ni_flags; u_int32_t ltime; - struct timeval timenow; - - getmicrotime(&timenow); + uint64_t now = net_uptime(); if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL)) return (0); /* needless to copy */ @@ -1908,6 +1860,8 @@ ni6_store_addrs(ni6, nni6, ifp0, resid) ifnet_lock_shared(ifp); for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = ifa->ifa_list.tqe_next) { + struct in6_addrlifetime_i *lt; + IFA_LOCK(ifa); if (ifa->ifa_addr->sa_family != AF_INET6) { IFA_UNLOCK(ifa); @@ -2003,13 +1957,12 @@ ni6_store_addrs(ni6, nni6, ifp0, resid) * address configuration by DHCPv6, so the former * case can't happen. */ - if (ifa6->ia6_lifetime.ia6t_expire == 0 && - (ifa6->ia6_flags & IN6_IFF_TEMPORARY) == 0) + lt = &ifa6->ia6_lifetime; + if (lt->ia6ti_expire == 0) { ltime = ND6_INFINITE_LIFETIME; - else { - if (ifa6->ia6_lifetime.ia6t_expire > - timenow.tv_sec) - ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - timenow.tv_sec); + } else { + if (lt->ia6ti_expire > now) + ltime = htonl(lt->ia6ti_expire - now); else ltime = 0; } @@ -2062,6 +2015,7 @@ icmp6_rip6_input(mp, off) struct icmp6_hdr *icmp6; struct mbuf *opts = NULL; int ret = 0; + struct ifnet *ifp = m->m_pkthdr.rcvif; #ifndef PULLDOWN_TEST /* this is assumed to be safe. */ @@ -2085,7 +2039,7 @@ icmp6_rip6_input(mp, off) if (sa6_recoverscope(&rip6src, TRUE)) return (IPPROTO_DONE); - lck_rw_lock_shared(ripcbinfo.mtx); + lck_rw_lock_shared(ripcbinfo.ipi_lock); LIST_FOREACH(in6p, &ripcb, inp_list) { if ((in6p->inp_vflag & INP_IPV6) == 0) @@ -2102,10 +2056,18 @@ icmp6_rip6_input(mp, off) && ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type, in6p->in6p_icmp6filt)) continue; + + if (inp_restricted(in6p, ifp)) + continue; + + if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && + (in6p->in6p_flags & INP_NO_IFT_CELLULAR)) + continue; + if (last) { struct mbuf *n; if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { - if ((last->in6p_flags & IN6P_CONTROLOPTS) != 0 || + if ((last->in6p_flags & INP_CONTROLOPTS) != 0 || (last->in6p_socket->so_options & SO_TIMESTAMP) != 0 || (last->in6p_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) { ret = ip6_savecontrol(last, n, &opts); @@ -2148,11 +2110,11 @@ icmp6_rip6_input(mp, off) } else { goto error; } - lck_rw_done(ripcbinfo.mtx); + lck_rw_done(ripcbinfo.ipi_lock); return IPPROTO_DONE; error: - lck_rw_done(ripcbinfo.mtx); + lck_rw_done(ripcbinfo.ipi_lock); m_freem(m); m_freem(opts); ip6stat.ip6s_delivered--; @@ -2168,6 +2130,7 @@ icmp6_reflect(m, off) struct mbuf *m; size_t off; { + struct mbuf *m_ip6hdr = m; struct ip6_hdr *ip6; struct icmp6_hdr *icmp6; struct in6_ifaddr *ia; @@ -2178,10 +2141,10 @@ icmp6_reflect(m, off) struct sockaddr_in6 sa6_src, sa6_dst; struct nd_ifinfo *ndi; u_int32_t oflow; - struct ip6_out_args ip6oa = - { IFSCOPE_NONE, { 0 }, IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR }; + struct ip6_out_args ip6oa = { IFSCOPE_NONE, { 0 }, + IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR, 0 }; - if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.rcvif != NULL) { + if (!(m->m_pkthdr.pkt_flags & PKTF_LOOP) && m->m_pkthdr.rcvif != NULL) { ip6oa.ip6oa_boundif = m->m_pkthdr.rcvif->if_index; ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF; } @@ -2199,10 +2162,6 @@ icmp6_reflect(m, off) * If there are extra headers between IPv6 and ICMPv6, strip * off that header first. */ -#if DIAGNOSTIC - if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN) - panic("assumption failed in icmp6_reflect"); -#endif if (off > sizeof(struct ip6_hdr)) { size_t l; struct ip6_hdr nip6; @@ -2212,7 +2171,7 @@ icmp6_reflect(m, off) m_adj(m, l); l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); if (m->m_len < l) { - if ((m = m_pullup(m, l)) == NULL) + if ((m_ip6hdr = m_pulldown(m, 0, l, NULL)) == NULL) return; } bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6)); @@ -2220,12 +2179,12 @@ icmp6_reflect(m, off) size_t l; l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); if (m->m_len < l) { - if ((m = m_pullup(m, l)) == NULL) + if ((m_ip6hdr = m_pulldown(m, 0, l, NULL)) == NULL) return; } } plen = m->m_pkthdr.len - sizeof(struct ip6_hdr); - ip6 = mtod(m, struct ip6_hdr *); + ip6 = mtod(m_ip6hdr, struct ip6_hdr *); ip6->ip6_nxt = IPPROTO_ICMPV6; icmp6 = (struct icmp6_hdr *)(ip6 + 1); type = icmp6->icmp6_type; /* keep type for statistics */ @@ -2278,10 +2237,14 @@ icmp6_reflect(m, off) IFA_UNLOCK(&ia->ia_ifa); } lck_rw_done(&in6_ifaddr_rwlock); - if (ia == NULL && IN6_IS_ADDR_LINKLOCAL(&t) && (m->m_flags & M_LOOP)) { + if (ia == NULL && IN6_IS_ADDR_LINKLOCAL(&t) && + ((m->m_flags & M_LOOP) || (m->m_pkthdr.pkt_flags & PKTF_LOOP))) { /* * This is the case if the dst is our link-local address - * and the sender is also ourselves. + * and the sender is also ourselves. Here we test for both + * M_LOOP and PKTF_LOOP, since the former may have been set + * in ip6_output() and that we get here as part of callling + * ip6_process_hopopts(). See comments in */ src = &t; } @@ -2309,8 +2272,7 @@ icmp6_reflect(m, off) */ src = in6_selectsrc(&sin6, NULL, NULL, &ro, &outif, &src_storage, ip6oa.ip6oa_boundif, &e); - if (ro.ro_rt) - rtfree(ro.ro_rt); /* XXX: we could use this */ + ROUTE_RELEASE(&ro); if (src == NULL) { nd6log((LOG_DEBUG, "icmp6_reflect: source can't be determined: " @@ -2590,15 +2552,7 @@ icmp6_redirect_input(m, off) sdst.sin6_len = sizeof(struct sockaddr_in6); bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); - /* - * Radar 6843900 - * Release the IPv6 domain lock because we are going to take domain_proto_mtx - * and could otherwise cause a deadlock with other threads taking these locks - * in the reverse order -- e.g. frag6_slowtimo() from pfslowtimo() - */ - lck_mtx_unlock(inet6_domain_mutex); pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst); - lck_mtx_lock(inet6_domain_mutex); #if IPSEC key_sa_routechange((struct sockaddr *)&sdst); #endif @@ -2629,8 +2583,8 @@ icmp6_redirect_output(m0, rt) u_char *p; struct ifnet *outif = NULL; struct sockaddr_in6 src_sa; - struct ip6_out_args ip6oa = - { IFSCOPE_NONE, { 0 }, IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR }; + struct ip6_out_args ip6oa = { IFSCOPE_NONE, { 0 }, + IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR, 0 }; icmp6_errcount(&icmp6stat.icp6s_outerrhist, ND_REDIRECT, 0); @@ -3079,15 +3033,19 @@ icmp6_dgram_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr_in6 *dst = (struct sockaddr_in6 *)(void *)nam; struct icmp6_hdr *icmp6; + if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { + error = (inp == NULL ? EINVAL : EPROTOTYPE); + goto bad; + } + if (kauth_cred_issuser(so->so_cred)) - return rip6_output(m, so, (struct sockaddr_in6 *)(void *)nam, - control, 0); + return (rip6_output(m, so, SIN6(nam), control, 0)); /* always copy sockaddr to avoid overwrites */ if (so->so_state & SS_ISCONNECTED) { - if (nam) { - m_freem(m); - return EISCONN; + if (nam != NULL) { + error = EISCONN; + goto bad; } /* XXX */ bzero(&tmp, sizeof(tmp)); @@ -3098,8 +3056,8 @@ icmp6_dgram_send(struct socket *so, int flags, struct mbuf *m, dst = &tmp; } else { if (nam == NULL) { - m_freem(m); - return ENOTCONN; + error = ENOTCONN; + goto bad; } tmp = *(struct sockaddr_in6 *)(void *)nam; dst = &tmp; @@ -3108,7 +3066,7 @@ icmp6_dgram_send(struct socket *so, int flags, struct mbuf *m, /* * For an ICMPv6 packet, we should know its type and code */ - if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { + if (SOCK_PROTO(so) == IPPROTO_ICMPV6) { if (m->m_len < sizeof(struct icmp6_hdr) && (m = m_pullup(m, sizeof(struct icmp6_hdr))) == NULL) { error = ENOBUFS; @@ -3120,10 +3078,11 @@ icmp6_dgram_send(struct socket *so, int flags, struct mbuf *m, * Allow only to send echo request and node information request * See RFC 2463 for Echo Request Message format */ - if ((icmp6->icmp6_type == ICMP6_ECHO_REQUEST && icmp6->icmp6_code == 0) || - (icmp6->icmp6_type == ICMP6_NI_QUERY && - (icmp6->icmp6_code == ICMP6_NI_SUBJ_IPV6 || - icmp6->icmp6_code == ICMP6_NI_SUBJ_FQDN))) { + if ((icmp6->icmp6_type == ICMP6_ECHO_REQUEST && + icmp6->icmp6_code == 0) || + (icmp6->icmp6_type == ICMP6_NI_QUERY && + (icmp6->icmp6_code == ICMP6_NI_SUBJ_IPV6 || + icmp6->icmp6_code == ICMP6_NI_SUBJ_FQDN))) { /* Good */ ; } else { @@ -3134,15 +3093,20 @@ icmp6_dgram_send(struct socket *so, int flags, struct mbuf *m, #if ENABLE_DEFAULT_SCOPE if (dst->sin6_scope_id == 0) { /* not change if specified */ - dst->sin6_scope_id = scope6_addr2default(&dst->sin6_addr); + dst->sin6_scope_id = scope6_addr2default(&dst->sin6_addr); } #endif - return rip6_output(m, so, (struct sockaddr_in6 *)(void *)nam, - control, 0); + return (rip6_output(m, so, dst, control, 0)); bad: - m_freem(m); - return error; + VERIFY(error != 0); + + if (m != NULL) + m_freem(m); + if (control != NULL) + m_freem(control); + + return (error); } /* Like rip6_attach but without root privilege enforcement */ @@ -3179,75 +3143,6 @@ icmp6_dgram_attach(struct socket *so, int proto, struct proc *p) } - -#ifndef HAVE_PPSRATECHECK -#ifndef timersub -#define timersub(tvp, uvp, vvp) \ - do { \ - (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \ - (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \ - if ((vvp)->tv_usec < 0) { \ - (vvp)->tv_sec--; \ - (vvp)->tv_usec += 1000000; \ - } \ - } while (0) -#endif - -/* - * ppsratecheck(): packets (or events) per second limitation. - */ -static int -ppsratecheck(lasttime, curpps, maxpps) - struct timeval *lasttime; - int *curpps; - int maxpps; /* maximum pps allowed */ -{ - struct timeval tv, delta; - int rv; - - microtime(&tv); - - timersub(&tv, lasttime, &delta); - - /* - * Check for 0,0 so that the message will be seen at least once. - * If more than one second has passed since the last update of - * lasttime, reset the counter. - * - * we do increment *curpps even in *curpps < maxpps case, as some may - * try to use *curpps for stat purposes as well. - */ - if ((lasttime->tv_sec == 0 && lasttime->tv_usec == 0) || - delta.tv_sec >= 1) { - *lasttime = tv; - *curpps = 0; - rv = 1; - } else if (maxpps < 0) - rv = 1; - else if (*curpps < maxpps) - rv = 1; - else - rv = 0; - -#if 1 /* DIAGNOSTIC? */ - /* be careful about wrap-around */ - if (*curpps + 1 > 0) - *curpps = *curpps + 1; -#else - /* - * assume that there's not too many calls to this function. - * not sure if the assumption holds, as it depends on *caller's* - * behavior, not the behavior of this function. - * IMHO it is wrong to make assumption on the caller's behavior, - * so the above #if is #if 1, not #ifdef DIAGNOSTIC. - */ - *curpps = *curpps + 1; -#endif - - return (rv); -} -#endif - /* * Perform rate limit check. * Returns 0 if it is okay to send the icmp6 packet. @@ -3259,8 +3154,8 @@ ppsratecheck(lasttime, curpps, maxpps) static int icmp6_ratelimit( __unused const struct in6_addr *dst, /* not used at this moment */ - __unused const int type, /* not used at this moment */ - __unused const int code) /* not used at this moment */ + __unused const int type, /* not used at this moment */ + __unused const int code) /* not used at this moment */ { int ret; diff --git a/bsd/netinet6/in6.c b/bsd/netinet6/in6.c index 8a39cc647..261fdc420 100644 --- a/bsd/netinet6/in6.c +++ b/bsd/netinet6/in6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * Copyright (c) 2003-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -26,9 +26,6 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* $FreeBSD: src/sys/netinet6/in6.c,v 1.7.2.7 2001/08/06 20:26:22 ume Exp $ */ -/* $KAME: in6.c,v 1.187 2001/05/24 07:43:59 itojun Exp $ */ - /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. @@ -113,6 +110,7 @@ #include #include #include +#include #include #include @@ -128,6 +126,9 @@ #include #include #include +#include +#include +#include #include #include @@ -145,7 +146,7 @@ #include #endif /* PF */ - /* +/* * Definitions of some costant IP6 addresses. */ const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; @@ -167,26 +168,58 @@ const struct in6_addr in6mask64 = IN6MASK64; const struct in6_addr in6mask96 = IN6MASK96; const struct in6_addr in6mask128 = IN6MASK128; -const struct sockaddr_in6 sa6_any = {sizeof(sa6_any), AF_INET6, - 0, 0, IN6ADDR_ANY_INIT, 0}; +const struct sockaddr_in6 sa6_any = { + sizeof (sa6_any), AF_INET6, 0, 0, IN6ADDR_ANY_INIT, 0 +}; -static int in6_lifaddr_ioctl(struct socket *, u_long, struct if_laddrreq *, - struct ifnet *, struct proc *); +static int in6ctl_lifaddr(struct ifnet *, u_long, struct if_laddrreq *, + boolean_t); +static int in6ctl_associd(struct socket *, u_long, caddr_t); +static int in6ctl_connid(struct socket *, u_long, caddr_t); +static int in6ctl_conninfo(struct socket *, u_long, caddr_t); +static int in6ctl_llstart(struct ifnet *, u_long, caddr_t); +static int in6ctl_llstop(struct ifnet *); +static int in6ctl_cgastart(struct ifnet *, u_long, caddr_t); +static int in6ctl_gifaddr(struct ifnet *, struct in6_ifaddr *, u_long, + struct in6_ifreq *); +static int in6ctl_gifstat(struct ifnet *, u_long, struct in6_ifreq *); +static int in6ctl_alifetime(struct in6_ifaddr *, u_long, struct in6_ifreq *, + boolean_t); +static int in6ctl_aifaddr(struct ifnet *, struct in6_aliasreq *); +static void in6ctl_difaddr(struct ifnet *, struct in6_ifaddr *); static int in6_autoconf(struct ifnet *, int); static int in6_setrouter(struct ifnet *, int); -static int in6_ifinit(struct ifnet *, struct in6_ifaddr *, - struct sockaddr_in6 *, int); +static int in6_ifinit(struct ifnet *, struct in6_ifaddr *, int); +static int in6_ifaupdate_aux(struct in6_ifaddr *, struct ifnet *, int); static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *); static struct in6_ifaddr *in6_ifaddr_alloc(int); static void in6_ifaddr_attached(struct ifaddr *); static void in6_ifaddr_detached(struct ifaddr *); static void in6_ifaddr_free(struct ifaddr *); static void in6_ifaddr_trace(struct ifaddr *, int); +#if defined(__LP64__) +static void in6_llstartreq_32_to_64(struct in6_llstartreq_32 *, + struct in6_llstartreq_64 *); +#else +static void in6_llstartreq_64_to_32(struct in6_llstartreq_64 *, + struct in6_llstartreq_32 *); +#endif static struct in6_aliasreq *in6_aliasreq_to_native(void *, int, struct in6_aliasreq *); +static struct in6_llstartreq *in6_llstartreq_to_native(void *, int, + struct in6_llstartreq *); +static int in6_to_kamescope(struct sockaddr_in6 *, struct ifnet *); static void in6_ifaddr_set_dadprogress(struct in6_ifaddr *); +static int in6_getassocids(struct socket *, uint32_t *, user_addr_t); +static int in6_getconnids(struct socket *, associd_t, uint32_t *, user_addr_t); +static int in6_getconninfo(struct socket *, connid_t, uint32_t *, + uint32_t *, int32_t *, user_addr_t, socklen_t *, user_addr_t, socklen_t *, + uint32_t *, user_addr_t, uint32_t *); + +static void in6_if_up_dad_start(struct ifnet *); + extern lck_mtx_t *nd6_mutex; extern int in6_init2done; @@ -242,9 +275,9 @@ in6_ifloop_request(int cmd, struct ifaddr *ifa) struct rtentry *nrt = NULL; int e; - bzero(&all1_sa, sizeof(all1_sa)); + bzero(&all1_sa, sizeof (all1_sa)); all1_sa.sin6_family = AF_INET6; - all1_sa.sin6_len = sizeof(struct sockaddr_in6); + all1_sa.sin6_len = sizeof (struct sockaddr_in6); all1_sa.sin6_addr = in6mask128; /* @@ -258,8 +291,7 @@ in6_ifloop_request(int cmd, struct ifaddr *ifa) */ lck_mtx_lock(rnh_lock); e = rtrequest_locked(cmd, ifa->ifa_addr, ifa->ifa_addr, - (struct sockaddr *)&all1_sa, - RTF_UP|RTF_HOST|RTF_LLINFO, &nrt); + (struct sockaddr *)&all1_sa, RTF_UP|RTF_HOST|RTF_LLINFO, &nrt); if (e != 0) { log(LOG_ERR, "in6_ifloop_request: " "%s operation failed for %s (errno=%d)\n", @@ -284,8 +316,8 @@ in6_ifloop_request(int cmd, struct ifaddr *ifa) /* * Report the addition/removal of the address to the routing socket. * XXX: since we called rtinit for a p2p interface with a destination, - * we end up reporting twice in such a case. Should we rather - * omit the second report? + * we end up reporting twice in such a case. Should we rather + * omit the second report? */ if (nrt != NULL) { rt_newaddrmsg(cmd, ifa, e, nrt); @@ -409,8 +441,8 @@ in6_mask2len(mask, lim0) u_char *lim = lim0, *p; /* ignore the scope_id part */ - if (lim0 == NULL || lim0 - (u_char *)mask > sizeof(*mask)) - lim = (u_char *)mask + sizeof(*mask); + if (lim0 == NULL || lim0 - (u_char *)mask > sizeof (*mask)) + lim = (u_char *)mask + sizeof (*mask); for (p = (u_char *)mask; p < lim; x++, p++) { if (*p != 0xff) break; @@ -435,7 +467,7 @@ in6_mask2len(mask, lim0) return (-1); } - return x * 8 + y; + return (x * 8 + y); } void @@ -445,7 +477,7 @@ in6_len2mask(mask, len) { int i; - bzero(mask, sizeof(*mask)); + bzero(mask, sizeof (*mask)); for (i = 0; i < len / 8; i++) mask->s6_addr8[i] = 0xff; if (len % 8) @@ -482,6 +514,46 @@ in6_aliasreq_32_to_64(struct in6_aliasreq_32 *src, struct in6_aliasreq_64 *dst) dst->ifra_lifetime.ia6t_pltime = src->ifra_lifetime.ia6t_pltime; } +#if defined(__LP64__) +void +in6_llstartreq_32_to_64(struct in6_llstartreq_32 *src, + struct in6_llstartreq_64 *dst) +{ + bzero(dst, sizeof (*dst)); + bcopy(src->llsr_name, dst->llsr_name, sizeof (dst->llsr_name)); + dst->llsr_flags = src->llsr_flags; + bcopy(src->llsr_cgaprep.cga_modifier.octets, + dst->llsr_cgaprep.cga_modifier.octets, + sizeof (dst->llsr_cgaprep.cga_modifier.octets)); + dst->llsr_cgaprep.cga_security_level = + src->llsr_cgaprep.cga_security_level; + dst->llsr_lifetime.ia6t_expire = src->llsr_lifetime.ia6t_expire; + dst->llsr_lifetime.ia6t_preferred = src->llsr_lifetime.ia6t_preferred; + dst->llsr_lifetime.ia6t_vltime = src->llsr_lifetime.ia6t_vltime; + dst->llsr_lifetime.ia6t_pltime = src->llsr_lifetime.ia6t_pltime; +} +#endif + +#if !defined(__LP64__) +void +in6_llstartreq_64_to_32(struct in6_llstartreq_64 *src, + struct in6_llstartreq_32 *dst) +{ + bzero(dst, sizeof (*dst)); + bcopy(src->llsr_name, dst->llsr_name, sizeof (dst->llsr_name)); + dst->llsr_flags = src->llsr_flags; + bcopy(src->llsr_cgaprep.cga_modifier.octets, + dst->llsr_cgaprep.cga_modifier.octets, + sizeof (dst->llsr_cgaprep.cga_modifier.octets)); + dst->llsr_cgaprep.cga_security_level = + src->llsr_cgaprep.cga_security_level; + dst->llsr_lifetime.ia6t_expire = src->llsr_lifetime.ia6t_expire; + dst->llsr_lifetime.ia6t_preferred = src->llsr_lifetime.ia6t_preferred; + dst->llsr_lifetime.ia6t_vltime = src->llsr_lifetime.ia6t_vltime; + dst->llsr_lifetime.ia6t_pltime = src->llsr_lifetime.ia6t_pltime; +} +#endif + static struct in6_aliasreq * in6_aliasreq_to_native(void *data, int data_is_64, struct in6_aliasreq *dst) { @@ -501,292 +573,741 @@ in6_aliasreq_to_native(void *data, int data_is_64, struct in6_aliasreq *dst) return (dst); } -#define ifa2ia6(ifa) ((struct in6_ifaddr *)(void *)(ifa)) +static struct in6_llstartreq * +in6_llstartreq_to_native(void *data, int is64, struct in6_llstartreq *dst) +{ +#if defined(__LP64__) + if (is64) + bcopy(data, dst, sizeof (*dst)); + else + in6_llstartreq_32_to_64((struct in6_llstartreq_32 *)data, + (struct in6_llstartreq_64 *)dst); +#else + if (is64) + in6_llstartreq_64_to_32((struct in6_llstartreq_64 *)data, + (struct in6_llstartreq_32 *)dst); + else + bcopy(data, dst, sizeof (*dst)); +#endif /* __LP64__ */ + return (dst); +} -int -in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, - struct proc *p) +static __attribute__((noinline)) int +in6ctl_associd(struct socket *so, u_long cmd, caddr_t data) { - struct in6_aliasreq sifra, *ifra = NULL; - struct in6_ifaddr *ia = NULL; - struct sockaddr_in6 sin6, *sa6 = NULL; - int index, privileged, error = 0; - u_int32_t ifru_scope_id[16]; - struct timeval timenow; - int p64 = proc_is64bit(p); + int error = 0; + union { + struct so_aidreq32 a32; + struct so_aidreq64 a64; + } u; - getmicrotime(&timenow); + VERIFY(so != NULL); - privileged = (proc_suser(p) == 0); switch (cmd) { -#if MROUTING - case SIOCGETSGCNT_IN6: /* struct sioc_sg_req6 */ - case SIOCGETMIFCNT_IN6_32: /* struct sioc_mif_req6_32 */ - case SIOCGETMIFCNT_IN6_64: /* struct sioc_mif_req6_64 */ - return (mrt6_ioctl(cmd, data)); - /* NOTREACHED */ -#endif + case SIOCGASSOCIDS32: { /* struct so_aidreq32 */ + bcopy(data, &u.a32, sizeof (u.a32)); + error = in6_getassocids(so, &u.a32.sar_cnt, u.a32.sar_aidp); + if (error == 0) + bcopy(&u.a32, data, sizeof (u.a32)); + break; + } - case SIOCAADDRCTL_POLICY: /* struct in6_addrpolicy */ - case SIOCDADDRCTL_POLICY: /* struct in6_addrpolicy */ - if (!privileged) - return (EPERM); - return (in6_src_ioctl(cmd, data)); - /* NOTREACHED */ + case SIOCGASSOCIDS64: { /* struct so_aidreq64 */ + bcopy(data, &u.a64, sizeof (u.a64)); + error = in6_getassocids(so, &u.a64.sar_cnt, u.a64.sar_aidp); + if (error == 0) + bcopy(&u.a64, data, sizeof (u.a64)); + break; + } - case SIOCDRADD_IN6_32: /* struct in6_defrouter_32 */ - case SIOCDRADD_IN6_64: /* struct in6_defrouter_64 */ - case SIOCDRDEL_IN6_32: /* struct in6_defrouter_32 */ - case SIOCDRDEL_IN6_64: /* struct in6_defrouter_64 */ - if (!privileged) - return (EPERM); - return (defrtrlist_ioctl(cmd, data)); + default: + VERIFY(0); /* NOTREACHED */ } - if (ifp == NULL) - return (EOPNOTSUPP); + return (error); +} + +static __attribute__((noinline)) int +in6ctl_connid(struct socket *so, u_long cmd, caddr_t data) +{ + int error = 0; + union { + struct so_cidreq32 c32; + struct so_cidreq64 c64; + } u; + + VERIFY(so != NULL); switch (cmd) { - case SIOCAUTOCONF_START: /* struct in6_ifreq */ - case SIOCAUTOCONF_STOP: /* struct in6_ifreq */ - case SIOCLL_START_32: /* struct in6_aliasreq_32 */ - case SIOCLL_START_64: /* struct in6_aliasreq_64 */ - case SIOCLL_STOP: /* struct in6_ifreq */ - case SIOCSETROUTERMODE_IN6: /* struct in6_ifreq */ - case SIOCPROTOATTACH_IN6_32: /* struct in6_aliasreq_32 */ - case SIOCPROTOATTACH_IN6_64: /* struct in6_aliasreq_64 */ - case SIOCPROTODETACH_IN6: /* struct in6_ifreq */ - if (!privileged) - return (EPERM); + case SIOCGCONNIDS32: { /* struct so_cidreq32 */ + bcopy(data, &u.c32, sizeof (u.c32)); + error = in6_getconnids(so, u.c32.scr_aid, &u.c32.scr_cnt, + u.c32.scr_cidp); + if (error == 0) + bcopy(&u.c32, data, sizeof (u.c32)); break; + } - case SIOCSNDFLUSH_IN6: /* struct in6_ifreq */ - case SIOCSPFXFLUSH_IN6: /* struct in6_ifreq */ - case SIOCSRTRFLUSH_IN6: /* struct in6_ifreq */ - case SIOCSDEFIFACE_IN6_32: /* struct in6_ndifreq_32 */ - case SIOCSDEFIFACE_IN6_64: /* struct in6_ndifreq_64 */ - case SIOCSIFINFO_FLAGS: /* struct in6_ndireq */ - if (!privileged) - return (EPERM); - /* FALLTHRU */ - case OSIOCGIFINFO_IN6: /* struct in6_ondireq */ - case SIOCGIFINFO_IN6: /* struct in6_ondireq */ - case SIOCGDRLST_IN6_32: /* struct in6_drlist_32 */ - case SIOCGDRLST_IN6_64: /* struct in6_drlist_64 */ - case SIOCGPRLST_IN6_32: /* struct in6_prlist_32 */ - case SIOCGPRLST_IN6_64: /* struct in6_prlist_64 */ - case SIOCGNBRINFO_IN6_32: /* struct in6_nbrinfo_32 */ - case SIOCGNBRINFO_IN6_64: /* struct in6_nbrinfo_64 */ - case SIOCGDEFIFACE_IN6_32: /* struct in6_ndifreq_32 */ - case SIOCGDEFIFACE_IN6_64: /* struct in6_ndifreq_64 */ - return (nd6_ioctl(cmd, data, ifp)); - /* NOTREACHED */ + case SIOCGCONNIDS64: { /* struct so_cidreq64 */ + bcopy(data, &u.c64, sizeof (u.c64)); + error = in6_getconnids(so, u.c64.scr_aid, &u.c64.scr_cnt, + u.c64.scr_cidp); + if (error == 0) + bcopy(&u.c64, data, sizeof (u.c64)); + break; + } - case SIOCSIFPREFIX_IN6: /* struct in6_prefixreq */ - case SIOCDIFPREFIX_IN6: /* struct in6_prefixreq */ - case SIOCAIFPREFIX_IN6: /* struct in6_rrenumreq */ - case SIOCCIFPREFIX_IN6: /* struct in6_rrenumreq */ - case SIOCSGIFPREFIX_IN6: /* struct in6_rrenumreq */ - case SIOCGIFPREFIX_IN6: /* struct in6_prefixreq */ - log(LOG_NOTICE, - "prefix ioctls are now invalidated. " - "please use ifconfig.\n"); - return (EOPNOTSUPP); + default: + VERIFY(0); /* NOTREACHED */ + } - case SIOCSSCOPE6: { /* struct in6_ifreq */ - struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data; + return (error); +} - if (!privileged) - return (EPERM); +static __attribute__((noinline)) int +in6ctl_conninfo(struct socket *so, u_long cmd, caddr_t data) +{ + int error = 0; + union { + struct so_cinforeq32 ci32; + struct so_cinforeq64 ci64; + } u; - bcopy(ifr->ifr_ifru.ifru_scope_id, ifru_scope_id, - sizeof (ifru_scope_id)); + VERIFY(so != NULL); - return (scope6_set(ifp, ifru_scope_id)); - /* NOTREACHED */ + switch (cmd) { + case SIOCGCONNINFO32: { /* struct so_cinforeq32 */ + bcopy(data, &u.ci32, sizeof (u.ci32)); + error = in6_getconninfo(so, u.ci32.scir_cid, &u.ci32.scir_flags, + &u.ci32.scir_ifindex, &u.ci32.scir_error, u.ci32.scir_src, + &u.ci32.scir_src_len, u.ci32.scir_dst, &u.ci32.scir_dst_len, + &u.ci32.scir_aux_type, u.ci32.scir_aux_data, + &u.ci32.scir_aux_len); + if (error == 0) + bcopy(&u.ci32, data, sizeof (u.ci32)); + break; } - case SIOCGSCOPE6: { /* struct in6_ifreq */ - struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data; - - bcopy(ifr->ifr_ifru.ifru_scope_id, ifru_scope_id, - sizeof (ifru_scope_id)); - - return (scope6_get(ifp, ifru_scope_id)); - /* NOTREACHED */ + case SIOCGCONNINFO64: { /* struct so_cinforeq64 */ + bcopy(data, &u.ci64, sizeof (u.ci64)); + error = in6_getconninfo(so, u.ci64.scir_cid, &u.ci64.scir_flags, + &u.ci64.scir_ifindex, &u.ci64.scir_error, u.ci64.scir_src, + &u.ci64.scir_src_len, u.ci64.scir_dst, &u.ci64.scir_dst_len, + &u.ci64.scir_aux_type, u.ci64.scir_aux_data, + &u.ci64.scir_aux_len); + if (error == 0) + bcopy(&u.ci64, data, sizeof (u.ci64)); + break; } - case SIOCGSCOPE6DEF: { /* struct in6_ifreq */ - struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data; - - bcopy(ifr->ifr_ifru.ifru_scope_id, ifru_scope_id, - sizeof (ifru_scope_id)); - - return (scope6_get_default(ifru_scope_id)); + default: + VERIFY(0); /* NOTREACHED */ } - case SIOCALIFADDR: /* struct if_laddrreq */ - case SIOCDLIFADDR: /* struct if_laddrreq */ - if (!privileged) - return(EPERM); - /* FALLTHRU */ - case SIOCGLIFADDR: { /* struct if_laddrreq */ - struct if_laddrreq iflr; + return (error); +} - bcopy(data, &iflr, sizeof (iflr)); - error = in6_lifaddr_ioctl(so, cmd, &iflr, ifp, p); - bcopy(&iflr, data, sizeof (iflr)); - return (error); - /* NOTREACHED */ - } - } +static __attribute__((noinline)) int +in6ctl_llstart(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + struct in6_aliasreq sifra, *ifra = NULL; + boolean_t is64; + int error = 0; + + VERIFY(ifp != NULL); switch (cmd) { case SIOCLL_START_32: /* struct in6_aliasreq_32 */ - case SIOCAIFADDR_IN6_32: { /* struct in6_aliasreq_32 */ + case SIOCLL_START_64: /* struct in6_aliasreq_64 */ + is64 = (cmd == SIOCLL_START_64); /* * Convert user ifra to the kernel form, when appropriate. * This allows the conversion between different data models * to be centralized, so that it can be passed around to other * routines that are expecting the kernel form. */ - ifra = in6_aliasreq_to_native(data, 0, &sifra); - bcopy(&ifra->ifra_addr, &sin6, sizeof (sin6)); - sa6 = &sin6; + ifra = in6_aliasreq_to_native(data, is64, &sifra); + + /* + * NOTE: All the interface specific DLIL attachements should + * be done here. They are currently done in in6_ifattach_aux() + * for the interfaces that need it. + */ + if ((ifp->if_eflags & IFEF_NOAUTOIPV6LL) != 0 && + ifra->ifra_addr.sin6_family == AF_INET6 && + /* Only check ifra_dstaddr if valid */ + (ifra->ifra_dstaddr.sin6_len == 0 || + ifra->ifra_dstaddr.sin6_family == AF_INET6)) { + /* some interfaces may provide LinkLocal addresses */ + error = in6_ifattach_aliasreq(ifp, NULL, ifra); + } else { + error = in6_ifattach_aliasreq(ifp, NULL, NULL); + } + if (error == 0) + in6_if_up_dad_start(ifp); break; + + default: + VERIFY(0); + /* NOTREACHED */ } - case SIOCLL_START_64: /* struct in6_aliasreq_64 */ - case SIOCAIFADDR_IN6_64: { /* struct in6_aliasreq_64 */ + return (error); +} + +static __attribute__((noinline)) int +in6ctl_llstop(struct ifnet *ifp) +{ + struct in6_ifaddr *ia; + + VERIFY(ifp != NULL); + + /* Remove link local addresses from interface */ + lck_rw_lock_exclusive(&in6_ifaddr_rwlock); + ia = in6_ifaddrs; + while (ia != NULL) { + if (ia->ia_ifa.ifa_ifp != ifp) { + ia = ia->ia_next; + continue; + } + IFA_LOCK(&ia->ia_ifa); + if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) { + IFA_ADDREF_LOCKED(&ia->ia_ifa); /* for us */ + IFA_UNLOCK(&ia->ia_ifa); + lck_rw_done(&in6_ifaddr_rwlock); + in6_purgeaddr(&ia->ia_ifa); + IFA_REMREF(&ia->ia_ifa); /* for us */ + lck_rw_lock_exclusive(&in6_ifaddr_rwlock); + /* + * Purging the address caused in6_ifaddr_rwlock + * to be dropped and reacquired; + * therefore search again from the beginning + * of in6_ifaddrs list. + */ + ia = in6_ifaddrs; + continue; + } + IFA_UNLOCK(&ia->ia_ifa); + ia = ia->ia_next; + } + lck_rw_done(&in6_ifaddr_rwlock); + return (0); +} + +static __attribute__((noinline)) int +in6ctl_cgastart(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + struct in6_llstartreq llsr; + int is64, error = 0; + + VERIFY(ifp != NULL); + + switch (cmd) { + case SIOCLL_CGASTART_32: /* struct in6_llstartreq_32 */ + case SIOCLL_CGASTART_64: /* struct in6_llstartreq_64 */ + is64 = (cmd == SIOCLL_CGASTART_64); /* - * Convert user ifra to the kernel form, when appropriate. + * Convert user llstartreq to the kernel form, when appropriate. * This allows the conversion between different data models * to be centralized, so that it can be passed around to other * routines that are expecting the kernel form. */ - ifra = in6_aliasreq_to_native(data, 1, &sifra); - bcopy(&ifra->ifra_addr, &sin6, sizeof (sin6)); - sa6 = &sin6; + in6_llstartreq_to_native(data, is64, &llsr); + + /* + * NOTE: All the interface specific DLIL attachements + * should be done here. They are currently done in + * in6_ifattach_llstartreq() for the interfaces that + * need it. + */ + error = in6_ifattach_llstartreq(ifp, &llsr); + if (error == 0) + in6_if_up_dad_start(ifp); break; + + default: + VERIFY(0); + /* NOTREACHED */ } - case SIOCSIFADDR_IN6: /* struct in6_ifreq (deprecated) */ + return (error); +} + +/* + * Caller passes in the ioctl data pointer directly via "ifr", with the + * expectation that this routine always uses bcopy() or other byte-aligned + * memory accesses. + */ +static __attribute__((noinline)) int +in6ctl_gifaddr(struct ifnet *ifp, struct in6_ifaddr *ia, u_long cmd, + struct in6_ifreq *ifr) +{ + struct sockaddr_in6 addr; + int error = 0; + + VERIFY(ifp != NULL); + + if (ia == NULL) + return (EADDRNOTAVAIL); + + switch (cmd) { case SIOCGIFADDR_IN6: /* struct in6_ifreq */ - case SIOCSIFDSTADDR_IN6: /* struct in6_ifreq (deprecated) */ - case SIOCSIFNETMASK_IN6: /* struct in6_ifreq (deprecated) */ - case SIOCGIFDSTADDR_IN6: /* struct in6_ifreq */ - case SIOCGIFNETMASK_IN6: /* struct in6_ifreq */ - case SIOCDIFADDR_IN6: /* struct in6_ifreq */ - case SIOCGIFPSRCADDR_IN6: /* struct in6_ifreq */ - case SIOCGIFPDSTADDR_IN6: /* struct in6_ifreq */ - case SIOCGIFAFLAG_IN6: /* struct in6_ifreq */ - case SIOCSNDFLUSH_IN6: /* struct in6_ifreq */ - case SIOCSPFXFLUSH_IN6: /* struct in6_ifreq */ - case SIOCSRTRFLUSH_IN6: /* struct in6_ifreq */ - case SIOCGIFALIFETIME_IN6: /* struct in6_ifreq */ - case SIOCSIFALIFETIME_IN6: /* struct in6_ifreq */ - case SIOCGIFSTAT_IN6: /* struct in6_ifreq */ - case SIOCGIFSTAT_ICMP6: { /* struct in6_ifreq */ - struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data; + IFA_LOCK(&ia->ia_ifa); + bcopy(&ia->ia_addr, &addr, sizeof (addr)); + IFA_UNLOCK(&ia->ia_ifa); + if ((error = sa6_recoverscope(&addr, TRUE)) != 0) + break; + bcopy(&addr, &ifr->ifr_addr, sizeof (addr)); + break; - bcopy(&ifr->ifr_addr, &sin6, sizeof (sin6)); - sa6 = &sin6; + case SIOCGIFDSTADDR_IN6: /* struct in6_ifreq */ + if (!(ifp->if_flags & IFF_POINTOPOINT)) { + error = EINVAL; + break; + } + /* + * XXX: should we check if ifa_dstaddr is NULL and return + * an error? + */ + IFA_LOCK(&ia->ia_ifa); + bcopy(&ia->ia_dstaddr, &addr, sizeof (addr)); + IFA_UNLOCK(&ia->ia_ifa); + if ((error = sa6_recoverscope(&addr, TRUE)) != 0) + break; + bcopy(&addr, &ifr->ifr_dstaddr, sizeof (addr)); break; - } default: - break; + VERIFY(0); + /* NOTREACHED */ } - switch (cmd) { - case SIOCAUTOCONF_START: - return (in6_autoconf(ifp, TRUE)); - /* NOTREACHED */ + return (error); +} - case SIOCAUTOCONF_STOP: - return (in6_autoconf(ifp, FALSE)); - /* NOTREACHED */ +/* + * Caller passes in the ioctl data pointer directly via "ifr", with the + * expectation that this routine always uses bcopy() or other byte-aligned + * memory accesses. + */ +static __attribute__((noinline)) int +in6ctl_gifstat(struct ifnet *ifp, u_long cmd, struct in6_ifreq *ifr) +{ + int error = 0, index; - case SIOCLL_START_32: - case SIOCLL_START_64: - VERIFY(ifra != NULL); - /* - * NOTE: All the interface specific DLIL attachements should - * be done here. They are currently done in in6_ifattach() - * for the interfaces that need it. - */ - if ((ifp->if_eflags & IFEF_NOAUTOIPV6LL) != 0 && - ifra->ifra_addr.sin6_family == AF_INET6 && - ifra->ifra_dstaddr.sin6_family == AF_INET6) { - /* some interfaces may provide LinkLocal addresses */ - error = in6_if_up(ifp, ifra); + VERIFY(ifp != NULL); + index = ifp->if_index; + + switch (cmd) { + case SIOCGIFSTAT_IN6: /* struct in6_ifreq */ + /* N.B.: if_inet6data is never freed once set. */ + if (IN6_IFEXTRA(ifp) == NULL) { + /* return (EAFNOSUPPORT)? */ + bzero(&ifr->ifr_ifru.ifru_stat, + sizeof (ifr->ifr_ifru.ifru_stat)); } else { - error = in6_if_up(ifp, NULL); + bcopy(&IN6_IFEXTRA(ifp)->in6_ifstat, + &ifr->ifr_ifru.ifru_stat, + sizeof (ifr->ifr_ifru.ifru_stat)); } - return (error); + break; + + case SIOCGIFSTAT_ICMP6: /* struct in6_ifreq */ + /* N.B.: if_inet6data is never freed once set. */ + if (IN6_IFEXTRA(ifp) == NULL) { + /* return (EAFNOSUPPORT)? */ + bzero(&ifr->ifr_ifru.ifru_stat, + sizeof (ifr->ifr_ifru.ifru_icmp6stat)); + } else { + bcopy(&IN6_IFEXTRA(ifp)->icmp6_ifstat, + &ifr->ifr_ifru.ifru_icmp6stat, + sizeof (ifr->ifr_ifru.ifru_icmp6stat)); + } + break; + + default: + VERIFY(0); /* NOTREACHED */ + } - case SIOCLL_STOP: - /* Remove link local addresses from interface */ - lck_rw_lock_exclusive(&in6_ifaddr_rwlock); - ia = in6_ifaddrs; - while (ia != NULL) { - if (ia->ia_ifa.ifa_ifp != ifp) { - ia = ia->ia_next; - continue; + return (error); +} + +/* + * Caller passes in the ioctl data pointer directly via "ifr", with the + * expectation that this routine always uses bcopy() or other byte-aligned + * memory accesses. + */ +static __attribute__((noinline)) int +in6ctl_alifetime(struct in6_ifaddr *ia, u_long cmd, struct in6_ifreq *ifr, + boolean_t p64) +{ + uint64_t timenow = net_uptime(); + struct in6_addrlifetime ia6_lt; + struct timeval caltime; + int error = 0; + + if (ia == NULL) + return (EADDRNOTAVAIL); + + switch (cmd) { + case SIOCGIFALIFETIME_IN6: /* struct in6_ifreq */ + IFA_LOCK(&ia->ia_ifa); + /* retrieve time as calendar time (last arg is 1) */ + in6ifa_getlifetime(ia, &ia6_lt, 1); + if (p64) { + struct in6_addrlifetime_64 lt; + + bzero(<, sizeof (lt)); + lt.ia6t_expire = ia6_lt.ia6t_expire; + lt.ia6t_preferred = ia6_lt.ia6t_preferred; + lt.ia6t_vltime = ia6_lt.ia6t_vltime; + lt.ia6t_pltime = ia6_lt.ia6t_pltime; + bcopy(<, &ifr->ifr_ifru.ifru_lifetime, sizeof (lt)); + } else { + struct in6_addrlifetime_32 lt; + + bzero(<, sizeof (lt)); + lt.ia6t_expire = (uint32_t)ia6_lt.ia6t_expire; + lt.ia6t_preferred = (uint32_t)ia6_lt.ia6t_preferred; + lt.ia6t_vltime = (uint32_t)ia6_lt.ia6t_vltime; + lt.ia6t_pltime = (uint32_t)ia6_lt.ia6t_pltime; + bcopy(<, &ifr->ifr_ifru.ifru_lifetime, sizeof (lt)); + } + IFA_UNLOCK(&ia->ia_ifa); + break; + + case SIOCSIFALIFETIME_IN6: /* struct in6_ifreq */ + getmicrotime(&caltime); + + /* sanity for overflow - beware unsigned */ + if (p64) { + struct in6_addrlifetime_64 lt; + + bcopy(&ifr->ifr_ifru.ifru_lifetime, <, sizeof (lt)); + if (lt.ia6t_vltime != ND6_INFINITE_LIFETIME && + lt.ia6t_vltime + caltime.tv_sec < caltime.tv_sec) { + error = EINVAL; + break; } - IFA_LOCK(&ia->ia_ifa); - if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) { - IFA_ADDREF_LOCKED(&ia->ia_ifa); /* for us */ - IFA_UNLOCK(&ia->ia_ifa); - lck_rw_done(&in6_ifaddr_rwlock); - in6_purgeaddr(&ia->ia_ifa); - IFA_REMREF(&ia->ia_ifa); /* for us */ - lck_rw_lock_exclusive(&in6_ifaddr_rwlock); - /* - * Purging the address caused in6_ifaddr_rwlock - * to be dropped and reacquired; - * therefore search again from the beginning - * of in6_ifaddrs list. - */ - ia = in6_ifaddrs; - continue; + if (lt.ia6t_pltime != ND6_INFINITE_LIFETIME && + lt.ia6t_pltime + caltime.tv_sec < caltime.tv_sec) { + error = EINVAL; + break; + } + } else { + struct in6_addrlifetime_32 lt; + + bcopy(&ifr->ifr_ifru.ifru_lifetime, <, sizeof (lt)); + if (lt.ia6t_vltime != ND6_INFINITE_LIFETIME && + lt.ia6t_vltime + caltime.tv_sec < caltime.tv_sec) { + error = EINVAL; + break; + } + if (lt.ia6t_pltime != ND6_INFINITE_LIFETIME && + lt.ia6t_pltime + caltime.tv_sec < caltime.tv_sec) { + error = EINVAL; + break; } - IFA_UNLOCK(&ia->ia_ifa); - ia = ia->ia_next; } - lck_rw_done(&in6_ifaddr_rwlock); - return (0); + + IFA_LOCK(&ia->ia_ifa); + if (p64) { + struct in6_addrlifetime_64 lt; + + bcopy(&ifr->ifr_ifru.ifru_lifetime, <, sizeof (lt)); + ia6_lt.ia6t_expire = lt.ia6t_expire; + ia6_lt.ia6t_preferred = lt.ia6t_preferred; + ia6_lt.ia6t_vltime = lt.ia6t_vltime; + ia6_lt.ia6t_pltime = lt.ia6t_pltime; + } else { + struct in6_addrlifetime_32 lt; + + bcopy(&ifr->ifr_ifru.ifru_lifetime, <, sizeof (lt)); + ia6_lt.ia6t_expire = (uint32_t)lt.ia6t_expire; + ia6_lt.ia6t_preferred = (uint32_t)lt.ia6t_preferred; + ia6_lt.ia6t_vltime = lt.ia6t_vltime; + ia6_lt.ia6t_pltime = lt.ia6t_pltime; + } + /* for sanity */ + if (ia6_lt.ia6t_vltime != ND6_INFINITE_LIFETIME) + ia6_lt.ia6t_expire = timenow + ia6_lt.ia6t_vltime; + else + ia6_lt.ia6t_expire = 0; + + if (ia6_lt.ia6t_pltime != ND6_INFINITE_LIFETIME) + ia6_lt.ia6t_preferred = timenow + ia6_lt.ia6t_pltime; + else + ia6_lt.ia6t_preferred = 0; + + in6ifa_setlifetime(ia, &ia6_lt); + IFA_UNLOCK(&ia->ia_ifa); + break; + + default: + VERIFY(0); /* NOTREACHED */ + } + + return (error); +} - case SIOCSETROUTERMODE_IN6: { /* struct in6_ifreq */ - int intval; +#define ifa2ia6(ifa) ((struct in6_ifaddr *)(void *)(ifa)) + +/* + * Generic INET6 control operations (ioctl's). + * + * ifp is NULL if not an interface-specific ioctl. + * + * Most of the routines called to handle the ioctls would end up being + * tail-call optimized, which unfortunately causes this routine to + * consume too much stack space; this is the reason for the "noinline" + * attribute used on those routines. + * + * If called directly from within the networking stack (as opposed to via + * pru_control), the socket parameter may be NULL. + */ +int +in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, + struct proc *p) +{ + struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data; + struct in6_aliasreq sifra, *ifra = NULL; + struct in6_ifaddr *ia = NULL; + struct sockaddr_in6 sin6, *sa6 = NULL; + boolean_t privileged = (proc_suser(p) == 0); + boolean_t p64 = proc_is64bit(p); + boolean_t so_unlocked = FALSE; + int intval, error = 0; + + /* In case it's NULL, make sure it came from the kernel */ + VERIFY(so != NULL || p == kernproc); + + /* + * ioctls which don't require ifp, may require socket. + */ + switch (cmd) { +#if MROUTING + case SIOCGETSGCNT_IN6: /* struct sioc_sg_req6 */ + case SIOCGETMIFCNT_IN6_32: /* struct sioc_mif_req6_32 */ + case SIOCGETMIFCNT_IN6_64: /* struct sioc_mif_req6_64 */ + return (mrt6_ioctl(cmd, data)); + /* NOTREACHED */ +#endif /* MROUTING */ + + case SIOCAADDRCTL_POLICY: /* struct in6_addrpolicy */ + case SIOCDADDRCTL_POLICY: /* struct in6_addrpolicy */ + if (!privileged) + return (EPERM); + return (in6_src_ioctl(cmd, data)); + /* NOTREACHED */ + + case SIOCDRADD_IN6_32: /* struct in6_defrouter_32 */ + case SIOCDRADD_IN6_64: /* struct in6_defrouter_64 */ + case SIOCDRDEL_IN6_32: /* struct in6_defrouter_32 */ + case SIOCDRDEL_IN6_64: /* struct in6_defrouter_64 */ + if (!privileged) + return (EPERM); + return (defrtrlist_ioctl(cmd, data)); + /* NOTREACHED */ + + case SIOCGASSOCIDS32: /* struct so_aidreq32 */ + case SIOCGASSOCIDS64: /* struct so_aidreq64 */ + return (in6ctl_associd(so, cmd, data)); + /* NOTREACHED */ + + case SIOCGCONNIDS32: /* struct so_cidreq32 */ + case SIOCGCONNIDS64: /* struct so_cidreq64 */ + return (in6ctl_connid(so, cmd, data)); + /* NOTREACHED */ + + case SIOCGCONNINFO32: /* struct so_cinforeq32 */ + case SIOCGCONNINFO64: /* struct so_cinforeq64 */ + return (in6ctl_conninfo(so, cmd, data)); + /* NOTREACHED */ + } + + /* + * The rest of ioctls require ifp; reject if we don't have one; + * return ENXIO to be consistent with ifioctl(). + */ + if (ifp == NULL) + return (ENXIO); + + /* + * ioctls which require ifp but not interface address. + */ + switch (cmd) { + case SIOCAUTOCONF_START: /* struct in6_ifreq */ + if (!privileged) + return (EPERM); + return (in6_autoconf(ifp, TRUE)); + /* NOTREACHED */ + + case SIOCAUTOCONF_STOP: /* struct in6_ifreq */ + if (!privileged) + return (EPERM); + return (in6_autoconf(ifp, FALSE)); + /* NOTREACHED */ + + case SIOCLL_START_32: /* struct in6_aliasreq_32 */ + case SIOCLL_START_64: /* struct in6_aliasreq_64 */ + if (!privileged) + return (EPERM); + return (in6ctl_llstart(ifp, cmd, data)); + /* NOTREACHED */ + + case SIOCLL_STOP: /* struct in6_ifreq */ + if (!privileged) + return (EPERM); + return (in6ctl_llstop(ifp)); + /* NOTREACHED */ + + case SIOCSETROUTERMODE_IN6: /* struct in6_ifreq */ + if (!privileged) + return (EPERM); - VERIFY(ifp != NULL); bcopy(&((struct in6_ifreq *)(void *)data)->ifr_intval, &intval, sizeof (intval)); return (in6_setrouter(ifp, intval)); /* NOTREACHED */ - } case SIOCPROTOATTACH_IN6_32: /* struct in6_aliasreq_32 */ case SIOCPROTOATTACH_IN6_64: /* struct in6_aliasreq_64 */ + if (!privileged) + return (EPERM); return (in6_domifattach(ifp)); /* NOTREACHED */ case SIOCPROTODETACH_IN6: /* struct in6_ifreq */ + if (!privileged) + return (EPERM); + /* Cleanup interface routes and addresses */ in6_purgeif(ifp); if ((error = proto_unplumb(PF_INET6, ifp))) - printf("SIOCPROTODETACH_IN6: %s error=%d\n", + log(LOG_ERR, "SIOCPROTODETACH_IN6: %s error=%d\n", if_name(ifp), error); return (error); /* NOTREACHED */ + + case SIOCSNDFLUSH_IN6: /* struct in6_ifreq */ + case SIOCSPFXFLUSH_IN6: /* struct in6_ifreq */ + case SIOCSRTRFLUSH_IN6: /* struct in6_ifreq */ + case SIOCSDEFIFACE_IN6_32: /* struct in6_ndifreq_32 */ + case SIOCSDEFIFACE_IN6_64: /* struct in6_ndifreq_64 */ + case SIOCSIFINFO_FLAGS: /* struct in6_ndireq */ + if (!privileged) + return (EPERM); + /* FALLTHRU */ + case OSIOCGIFINFO_IN6: /* struct in6_ondireq */ + case SIOCGIFINFO_IN6: /* struct in6_ondireq */ + case SIOCGDRLST_IN6_32: /* struct in6_drlist_32 */ + case SIOCGDRLST_IN6_64: /* struct in6_drlist_64 */ + case SIOCGPRLST_IN6_32: /* struct in6_prlist_32 */ + case SIOCGPRLST_IN6_64: /* struct in6_prlist_64 */ + case SIOCGNBRINFO_IN6_32: /* struct in6_nbrinfo_32 */ + case SIOCGNBRINFO_IN6_64: /* struct in6_nbrinfo_64 */ + case SIOCGDEFIFACE_IN6_32: /* struct in6_ndifreq_32 */ + case SIOCGDEFIFACE_IN6_64: /* struct in6_ndifreq_64 */ + return (nd6_ioctl(cmd, data, ifp)); + /* NOTREACHED */ + + case SIOCSIFPREFIX_IN6: /* struct in6_prefixreq (deprecated) */ + case SIOCDIFPREFIX_IN6: /* struct in6_prefixreq (deprecated) */ + case SIOCAIFPREFIX_IN6: /* struct in6_rrenumreq (deprecated) */ + case SIOCCIFPREFIX_IN6: /* struct in6_rrenumreq (deprecated) */ + case SIOCSGIFPREFIX_IN6: /* struct in6_rrenumreq (deprecated) */ + case SIOCGIFPREFIX_IN6: /* struct in6_prefixreq (deprecated) */ + log(LOG_NOTICE, + "prefix ioctls are now invalidated. " + "please use ifconfig.\n"); + return (EOPNOTSUPP); + /* NOTREACHED */ + + case SIOCSSCOPE6: /* struct in6_ifreq (deprecated) */ + case SIOCGSCOPE6: /* struct in6_ifreq (deprecated) */ + case SIOCGSCOPE6DEF: /* struct in6_ifreq (deprecated) */ + return (EOPNOTSUPP); + /* NOTREACHED */ + + case SIOCALIFADDR: /* struct if_laddrreq */ + case SIOCDLIFADDR: /* struct if_laddrreq */ + if (!privileged) + return (EPERM); + /* FALLTHRU */ + case SIOCGLIFADDR: { /* struct if_laddrreq */ + struct if_laddrreq iflr; + + bcopy(data, &iflr, sizeof (iflr)); + error = in6ctl_lifaddr(ifp, cmd, &iflr, p64); + bcopy(&iflr, data, sizeof (iflr)); + return (error); + /* NOTREACHED */ + } + + case SIOCLL_CGASTART_32: /* struct in6_llstartreq_32 */ + case SIOCLL_CGASTART_64: /* struct in6_llstartreq_64 */ + if (!privileged) + return (EPERM); + return (in6ctl_cgastart(ifp, cmd, data)); + /* NOTREACHED */ + + case SIOCGIFSTAT_IN6: /* struct in6_ifreq */ + case SIOCGIFSTAT_ICMP6: /* struct in6_ifreq */ + return (in6ctl_gifstat(ifp, cmd, ifr)); + /* NOTREACHED */ + } + + /* + * ioctls which require interface address; obtain sockaddr_in6. + */ + switch (cmd) { + case SIOCSIFADDR_IN6: /* struct in6_ifreq (deprecated) */ + case SIOCSIFDSTADDR_IN6: /* struct in6_ifreq (deprecated) */ + case SIOCSIFNETMASK_IN6: /* struct in6_ifreq (deprecated) */ + /* + * Since IPv6 allows a node to assign multiple addresses + * on a single interface, SIOCSIFxxx ioctls are deprecated. + */ + /* we decided to obsolete this command (20000704) */ + return (EOPNOTSUPP); + /* NOTREACHED */ + + case SIOCAIFADDR_IN6_32: /* struct in6_aliasreq_32 */ + case SIOCAIFADDR_IN6_64: /* struct in6_aliasreq_64 */ + if (!privileged) + return (EPERM); + /* + * Convert user ifra to the kernel form, when appropriate. + * This allows the conversion between different data models + * to be centralized, so that it can be passed around to other + * routines that are expecting the kernel form. + */ + ifra = in6_aliasreq_to_native(data, + (cmd == SIOCAIFADDR_IN6_64), &sifra); + bcopy(&ifra->ifra_addr, &sin6, sizeof (sin6)); + sa6 = &sin6; + break; + + case SIOCDIFADDR_IN6: /* struct in6_ifreq */ + case SIOCSIFALIFETIME_IN6: /* struct in6_ifreq */ + if (!privileged) + return (EPERM); + /* FALLTHRU */ + case SIOCGIFADDR_IN6: /* struct in6_ifreq */ + case SIOCGIFDSTADDR_IN6: /* struct in6_ifreq */ + case SIOCGIFNETMASK_IN6: /* struct in6_ifreq */ + case SIOCGIFAFLAG_IN6: /* struct in6_ifreq */ + case SIOCGIFALIFETIME_IN6: /* struct in6_ifreq */ + bcopy(&ifr->ifr_addr, &sin6, sizeof (sin6)); + sa6 = &sin6; + break; } /* @@ -801,6 +1322,7 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, * presence of ifra_addr, and reject invalid ones here. * It also decreases duplicated code among SIOC*_IN6 operations. */ + VERIFY(ia == NULL); if (sa6 != NULL && sa6->sin6_family == AF_INET6) { if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr)) { if (sa6->sin6_addr.s6_addr16[1] == 0) { @@ -818,35 +1340,22 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, sa6->sin6_scope_id = 0; /* XXX: good way? */ } } + /* + * Any failures from this point on must take into account + * a non-NULL "ia" with an outstanding reference count, and + * therefore requires IFA_REMREF. Jump to "done" label + * instead of calling return if "ia" is valid. + */ ia = in6ifa_ifpwithaddr(ifp, &sa6->sin6_addr); - } else { - ia = NULL; } + /* + * SIOCDIFADDR_IN6/SIOCAIFADDR_IN6 specific tests. + */ switch (cmd) { - case SIOCSIFADDR_IN6: /* struct in6_ifreq */ - case SIOCSIFDSTADDR_IN6: /* struct in6_ifreq */ - case SIOCSIFNETMASK_IN6: /* struct in6_ifreq */ - /* - * Since IPv6 allows a node to assign multiple addresses - * on a single interface, SIOCSIFxxx ioctls are deprecated. - */ - /* we decided to obsolete this command (20000704) */ - error = EINVAL; - goto ioctl_cleanup; - case SIOCDIFADDR_IN6: /* struct in6_ifreq */ - /* - * for IPv4, we look for existing in_ifaddr here to allow - * "ifconfig if0 delete" to remove the first IPv4 address on - * the interface. For IPv6, as the spec allows multiple - * interface address from the day one, we consider "remove the - * first one" semantics to be not preferable. - */ - if (ia == NULL) { - error = EADDRNOTAVAIL; - goto ioctl_cleanup; - } + if (ia == NULL) + return (EADDRNOTAVAIL); /* FALLTHROUGH */ case SIOCAIFADDR_IN6_32: /* struct in6_aliasreq_32 */ case SIOCAIFADDR_IN6_64: /* struct in6_aliasreq_64 */ @@ -857,478 +1366,290 @@ in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, * since SIOCDIFADDR_IN6 falls thru above. */ if (sa6->sin6_family != AF_INET6 || - sa6->sin6_len != sizeof(struct sockaddr_in6)) { + sa6->sin6_len != sizeof (struct sockaddr_in6)) { error = EAFNOSUPPORT; - goto ioctl_cleanup; - } - if (!privileged) { - error = EPERM; - goto ioctl_cleanup; + goto done; } break; + } + + /* + * Unlock the socket since ifnet_ioctl() may be invoked by + * one of the ioctl handlers below. Socket will be re-locked + * prior to returning. + */ + if (so != NULL) { + socket_unlock(so, 0); + so_unlocked = TRUE; + } + /* + * And finally process address-related ioctls. + */ + switch (cmd) { case SIOCGIFADDR_IN6: /* struct in6_ifreq */ /* This interface is basically deprecated. use SIOCGIFCONF. */ /* FALLTHRU */ - case SIOCGIFAFLAG_IN6: /* struct in6_ifreq */ - case SIOCGIFNETMASK_IN6: /* struct in6_ifreq */ case SIOCGIFDSTADDR_IN6: /* struct in6_ifreq */ - case SIOCGIFALIFETIME_IN6: /* struct in6_ifreq */ - /* must think again about its semantics */ - if (ia == NULL) { - error = EADDRNOTAVAIL; - goto ioctl_cleanup; - } + error = in6ctl_gifaddr(ifp, ia, cmd, ifr); break; - case SIOCSIFALIFETIME_IN6: { /* struct in6_ifreq */ - struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data; - - if (!privileged) { - error = EPERM; - goto ioctl_cleanup; - } - if (ia == NULL) { + case SIOCGIFNETMASK_IN6: /* struct in6_ifreq */ + if (ia != NULL) { + IFA_LOCK(&ia->ia_ifa); + bcopy(&ia->ia_prefixmask, &ifr->ifr_addr, + sizeof (struct sockaddr_in6)); + IFA_UNLOCK(&ia->ia_ifa); + } else { error = EADDRNOTAVAIL; - goto ioctl_cleanup; } - /* sanity for overflow - beware unsigned */ - if (p64) { - struct in6_addrlifetime_64 lt; + break; - bcopy(&ifr->ifr_ifru.ifru_lifetime, <, sizeof (lt)); - if (((ia->ia6_flags & IN6_IFF_TEMPORARY) != 0 - || lt.ia6t_vltime != ND6_INFINITE_LIFETIME) - && lt.ia6t_vltime + timenow.tv_sec < - timenow.tv_sec) { - error = EINVAL; - goto ioctl_cleanup; - } - if (((ia->ia6_flags & IN6_IFF_TEMPORARY) != 0 - || lt.ia6t_pltime != ND6_INFINITE_LIFETIME) - && lt.ia6t_pltime + timenow.tv_sec < - timenow.tv_sec) { - error = EINVAL; - goto ioctl_cleanup; - } + case SIOCGIFAFLAG_IN6: /* struct in6_ifreq */ + if (ia != NULL) { + IFA_LOCK(&ia->ia_ifa); + bcopy(&ia->ia6_flags, &ifr->ifr_ifru.ifru_flags6, + sizeof (ifr->ifr_ifru.ifru_flags6)); + IFA_UNLOCK(&ia->ia_ifa); } else { - struct in6_addrlifetime_32 lt; - - bcopy(&ifr->ifr_ifru.ifru_lifetime, <, sizeof (lt)); - if (((ia->ia6_flags & IN6_IFF_TEMPORARY) != 0 - || lt.ia6t_vltime != ND6_INFINITE_LIFETIME) - && lt.ia6t_vltime + timenow.tv_sec < - timenow.tv_sec) { - error = EINVAL; - goto ioctl_cleanup; - } - if (((ia->ia6_flags & IN6_IFF_TEMPORARY) != 0 - || lt.ia6t_pltime != ND6_INFINITE_LIFETIME) - && lt.ia6t_pltime + timenow.tv_sec < - timenow.tv_sec) { - error = EINVAL; - goto ioctl_cleanup; - } + error = EADDRNOTAVAIL; } break; - } - } - - switch (cmd) { - case SIOCGIFADDR_IN6: { /* struct in6_ifreq */ - struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data; - struct sockaddr_in6 addr; - IFA_LOCK(&ia->ia_ifa); - bcopy(&ia->ia_addr, &addr, sizeof (addr)); - IFA_UNLOCK(&ia->ia_ifa); - if ((error = sa6_recoverscope(&addr, TRUE)) != 0) { - IFA_REMREF(&ia->ia_ifa); - return (error); - } - bcopy(&addr, &ifr->ifr_addr, sizeof (addr)); + case SIOCGIFALIFETIME_IN6: /* struct in6_ifreq */ + case SIOCSIFALIFETIME_IN6: /* struct in6_ifreq */ + error = in6ctl_alifetime(ia, cmd, ifr, p64); break; - } - - case SIOCGIFDSTADDR_IN6: { /* struct in6_ifreq */ - struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data; - struct sockaddr_in6 dstaddr; - if ((ifp->if_flags & IFF_POINTOPOINT) == 0) { - error = EINVAL; - goto ioctl_cleanup; - } - /* - * XXX: should we check if ifa_dstaddr is NULL and return - * an error? - */ - IFA_LOCK(&ia->ia_ifa); - bcopy(&ia->ia_dstaddr, &dstaddr, sizeof (dstaddr)); - IFA_UNLOCK(&ia->ia_ifa); - if ((error = sa6_recoverscope(&dstaddr, TRUE)) != 0) { - IFA_REMREF(&ia->ia_ifa); - return (error); - } - bcopy(&dstaddr, &ifr->ifr_dstaddr, sizeof (dstaddr)); + case SIOCAIFADDR_IN6_32: /* struct in6_aliasreq_32 */ + case SIOCAIFADDR_IN6_64: /* struct in6_aliasreq_64 */ + error = in6ctl_aifaddr(ifp, ifra); break; - } - case SIOCGIFNETMASK_IN6: { /* struct in6_ifreq */ - struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data; + case SIOCDIFADDR_IN6: + in6ctl_difaddr(ifp, ia); + break; - IFA_LOCK(&ia->ia_ifa); - bcopy(&ia->ia_prefixmask, &ifr->ifr_addr, - sizeof (struct sockaddr_in6)); - IFA_UNLOCK(&ia->ia_ifa); + default: + error = ifnet_ioctl(ifp, PF_INET6, cmd, data); break; } - case SIOCGIFAFLAG_IN6: { /* struct in6_ifreq */ - struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data; +done: + if (ia != NULL) + IFA_REMREF(&ia->ia_ifa); + if (so_unlocked) + socket_lock(so, 0); - IFA_LOCK(&ia->ia_ifa); - bcopy(&ia->ia6_flags, &ifr->ifr_ifru.ifru_flags6, - sizeof (ifr->ifr_ifru.ifru_flags6)); - IFA_UNLOCK(&ia->ia_ifa); - break; - } + return (error); +} - case SIOCGIFSTAT_IN6: { /* struct in6_ifreq */ - struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data; +static __attribute__((noinline)) int +in6ctl_aifaddr(struct ifnet *ifp, struct in6_aliasreq *ifra) +{ + int i, error, addtmp, plen; + struct nd_prefix pr0, *pr; + struct in6_ifaddr *ia; - if (ifp == NULL) { - error = EINVAL; - goto ioctl_cleanup; - } - index = ifp->if_index; - lck_rw_lock_shared(&in6_ifs_rwlock); - if (in6_ifstat == NULL || index >= in6_ifstatmax || - in6_ifstat[index] == NULL) { - /* return EAFNOSUPPORT? */ - bzero(&ifr->ifr_ifru.ifru_stat, - sizeof (ifr->ifr_ifru.ifru_stat)); - } else { - bcopy(in6_ifstat[index], &ifr->ifr_ifru.ifru_stat, - sizeof (ifr->ifr_ifru.ifru_stat)); - } - lck_rw_done(&in6_ifs_rwlock); - break; - } + VERIFY(ifp != NULL && ifra != NULL); + ia = NULL; - case SIOCGIFSTAT_ICMP6: { /* struct in6_ifreq */ - struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data; + /* Attempt to attach the protocol, in case it isn't attached */ + error = in6_domifattach(ifp); + if (error == 0) { + /* PF_INET6 wasn't previously attached */ + error = in6_ifattach_aliasreq(ifp, NULL, NULL); + if (error != 0) + goto done; - if (ifp == NULL) { - error = EINVAL; - goto ioctl_cleanup; - } - index = ifp->if_index; - lck_rw_lock_shared(&icmp6_ifs_rwlock); - if (icmp6_ifstat == NULL || index >= icmp6_ifstatmax || - icmp6_ifstat[index] == NULL) { - /* return EAFNOSUPPORT? */ - bzero(&ifr->ifr_ifru.ifru_stat, - sizeof (ifr->ifr_ifru.ifru_icmp6stat)); - } else { - bcopy(icmp6_ifstat[index], - &ifr->ifr_ifru.ifru_icmp6stat, - sizeof (ifr->ifr_ifru.ifru_icmp6stat)); - } - lck_rw_done(&icmp6_ifs_rwlock); - break; + in6_if_up_dad_start(ifp); + } else if (error != EEXIST) { + goto done; } - case SIOCGIFALIFETIME_IN6: { /* struct in6_ifreq */ - struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data; + /* + * First, make or update the interface address structure, and link it + * to the list. + */ + error = in6_update_ifa(ifp, ifra, 0, &ia); + if (error != 0) + goto done; + VERIFY(ia != NULL); - IFA_LOCK(&ia->ia_ifa); - if (p64) { - struct in6_addrlifetime_64 lt; + /* Now, make the prefix on-link on the interface. */ + plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr, NULL); + if (plen == 128) + goto done; - bzero(<, sizeof (lt)); - lt.ia6t_expire = ia->ia6_lifetime.ia6t_expire; - lt.ia6t_preferred = ia->ia6_lifetime.ia6t_preferred; - lt.ia6t_vltime = ia->ia6_lifetime.ia6t_vltime; - lt.ia6t_pltime = ia->ia6_lifetime.ia6t_pltime; - bcopy(<, &ifr->ifr_ifru.ifru_lifetime, sizeof (lt)); - } else { - struct in6_addrlifetime_32 lt; + /* + * NOTE: We'd rather create the prefix before the address, but we need + * at least one address to install the corresponding interface route, + * so we configure the address first. + */ - bzero(<, sizeof (lt)); - lt.ia6t_expire = (uint32_t)ia->ia6_lifetime.ia6t_expire; - lt.ia6t_preferred = - (uint32_t)ia->ia6_lifetime.ia6t_preferred; - lt.ia6t_vltime = (uint32_t)ia->ia6_lifetime.ia6t_vltime; - lt.ia6t_pltime = (uint32_t)ia->ia6_lifetime.ia6t_pltime; - bcopy(<, &ifr->ifr_ifru.ifru_lifetime, sizeof (lt)); - } - IFA_UNLOCK(&ia->ia_ifa); - break; + /* + * Convert mask to prefix length (prefixmask has already been validated + * in in6_update_ifa(). + */ + bzero(&pr0, sizeof (pr0)); + pr0.ndpr_plen = plen; + pr0.ndpr_ifp = ifp; + pr0.ndpr_prefix = ifra->ifra_addr; + pr0.ndpr_mask = ifra->ifra_prefixmask.sin6_addr; + + /* apply the mask for safety. */ + for (i = 0; i < 4; i++) { + pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &= + ifra->ifra_prefixmask.sin6_addr.s6_addr32[i]; } - case SIOCSIFALIFETIME_IN6: { /* struct in6_ifreq */ - struct in6_ifreq *ifr = (struct in6_ifreq *)(void *)data; - - IFA_LOCK(&ia->ia_ifa); - if (p64) { - struct in6_addrlifetime_64 lt; - - bcopy(&ifr->ifr_ifru.ifru_lifetime, <, sizeof (lt)); - ia->ia6_lifetime.ia6t_expire = lt.ia6t_expire; - ia->ia6_lifetime.ia6t_preferred = lt.ia6t_preferred; - ia->ia6_lifetime.ia6t_vltime = lt.ia6t_vltime; - ia->ia6_lifetime.ia6t_pltime = lt.ia6t_pltime; - } else { - struct in6_addrlifetime_32 lt; + /* + * Since we don't have an API to set prefix (not address) lifetimes, we + * just use the same lifetimes as addresses. The (temporarily) + * installed lifetimes can be overridden by later advertised RAs (when + * accept_rtadv is non 0), which is an intended behavior. + */ + pr0.ndpr_raf_onlink = 1; /* should be configurable? */ + pr0.ndpr_raf_auto = !!(ifra->ifra_flags & IN6_IFF_AUTOCONF); + pr0.ndpr_vltime = ifra->ifra_lifetime.ia6t_vltime; + pr0.ndpr_pltime = ifra->ifra_lifetime.ia6t_pltime; + pr0.ndpr_stateflags |= NDPRF_STATIC; + lck_mtx_init(&pr0.ndpr_lock, ifa_mtx_grp, ifa_mtx_attr); + + /* add the prefix if there's one. */ + if ((pr = nd6_prefix_lookup(&pr0)) == NULL) { + /* + * nd6_prelist_add will install the corresponding interface + * route. + */ + error = nd6_prelist_add(&pr0, NULL, &pr, FALSE); + if (error != 0) + goto done; - bcopy(&ifr->ifr_ifru.ifru_lifetime, <, sizeof (lt)); - ia->ia6_lifetime.ia6t_expire = - (uint32_t)lt.ia6t_expire; - ia->ia6_lifetime.ia6t_preferred = - (uint32_t)lt.ia6t_preferred; - ia->ia6_lifetime.ia6t_vltime = lt.ia6t_vltime; - ia->ia6_lifetime.ia6t_pltime = lt.ia6t_pltime; + if (pr == NULL) { + log(LOG_ERR, "%s: nd6_prelist_add okay, but" + " no prefix.\n", __func__); + error = EINVAL; + goto done; } - /* for sanity */ - if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME || - (ia->ia6_flags & IN6_IFF_TEMPORARY) != 0) { - ia->ia6_lifetime.ia6t_expire = - timenow.tv_sec + ia->ia6_lifetime.ia6t_vltime; - } else - ia->ia6_lifetime.ia6t_expire = 0; - if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME || - (ia->ia6_flags & IN6_IFF_TEMPORARY) != 0) { - ia->ia6_lifetime.ia6t_preferred = - timenow.tv_sec + ia->ia6_lifetime.ia6t_pltime; - } else - ia->ia6_lifetime.ia6t_preferred = 0; - IFA_UNLOCK(&ia->ia_ifa); - break; } - case SIOCAIFADDR_IN6_32: /* struct in6_aliasreq_32 */ - case SIOCAIFADDR_IN6_64: { /* struct in6_aliasreq_64 */ - int i; - struct nd_prefix pr0, *pr; - - VERIFY(ifra != NULL); + IFA_LOCK(&ia->ia_ifa); - /* Attempt to attach the protocol, in case it isn't attached */ - error = in6_domifattach(ifp); - if (error) { - if (error == EEXIST) - error = 0; - else - goto ioctl_cleanup; - } else { - /* PF_INET6 wasn't previously attached */ - if ((error = in6_if_up(ifp, NULL)) != 0) - goto ioctl_cleanup; - } + /* if this is a new autoconfed addr */ + addtmp = FALSE; + if ((ia->ia6_flags & IN6_IFF_AUTOCONF) != 0 && ia->ia6_ndpr == NULL) { + NDPR_LOCK(pr); + ++pr->ndpr_addrcnt; + VERIFY(pr->ndpr_addrcnt != 0); + ia->ia6_ndpr = pr; + NDPR_ADDREF_LOCKED(pr); /* for addr reference */ /* - * first, make or update the interface address structure, - * and link it to the list. + * If this is the first autoconf address from the prefix, + * create a temporary address as well (when specified). */ - if ((error = in6_update_ifa(ifp, ifra, ia, 0, M_WAITOK)) != 0) - goto ioctl_cleanup; + addtmp = (ip6_use_tempaddr && pr->ndpr_addrcnt == 1); + NDPR_UNLOCK(pr); + } - /* - * then, make the prefix on-link on the interface. - * XXX: we'd rather create the prefix before the address, but - * we need at least one address to install the corresponding - * interface route, so we configure the address first. - */ + IFA_UNLOCK(&ia->ia_ifa); - /* - * convert mask to prefix length (prefixmask has already - * been validated in in6_update_ifa(). - */ - bzero(&pr0, sizeof(pr0)); - lck_mtx_init(&pr0.ndpr_lock, ifa_mtx_grp, ifa_mtx_attr); - pr0.ndpr_ifp = ifp; - pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr, - NULL); - if (pr0.ndpr_plen == 128) - break; /* we don't need to install a host route. */ - pr0.ndpr_prefix = ifra->ifra_addr; - pr0.ndpr_mask = ifra->ifra_prefixmask.sin6_addr; - /* apply the mask for safety. */ - for (i = 0; i < 4; i++) { - pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &= - ifra->ifra_prefixmask.sin6_addr.s6_addr32[i]; - } - /* - * XXX: since we don't have an API to set prefix (not address) - * lifetimes, we just use the same lifetimes as addresses. - * The (temporarily) installed lifetimes can be overridden by - * later advertised RAs (when accept_rtadv is non 0), which is - * an intended behavior. - */ - pr0.ndpr_raf_onlink = 1; /* should be configurable? */ - pr0.ndpr_raf_auto = - ((ifra->ifra_flags & IN6_IFF_AUTOCONF) != 0); - pr0.ndpr_vltime = ifra->ifra_lifetime.ia6t_vltime; - pr0.ndpr_pltime = ifra->ifra_lifetime.ia6t_pltime; - pr0.ndpr_stateflags |= NDPRF_STATIC; - - /* add the prefix if there's one. */ - if ((pr = nd6_prefix_lookup(&pr0)) == NULL) { - /* - * nd6_prelist_add will install the corresponding - * interface route. - */ - if ((error = nd6_prelist_add(&pr0, NULL, &pr, - FALSE)) != 0) - goto ioctl_cleanup; - if (pr == NULL) { - log(LOG_ERR, "nd6_prelist_add succedded but " - "no prefix\n"); - error = EINVAL; - goto ioctl_cleanup; - } - } - if (ia != NULL) - IFA_REMREF(&ia->ia_ifa); - if ((ia = in6ifa_ifpwithaddr(ifp, - &ifra->ifra_addr.sin6_addr)) == NULL) { - /* XXX: this should not happen! */ - log(LOG_ERR, "in6_control: addition succeeded, but" - " no ifaddr\n"); - } else { - IFA_LOCK(&ia->ia_ifa); - if ((ia->ia6_flags & IN6_IFF_AUTOCONF) != 0 && - ia->ia6_ndpr == NULL) { /* new autoconfed addr */ - NDPR_LOCK(pr); - pr->ndpr_addrcnt++; - VERIFY(pr->ndpr_addrcnt != 0); - ia->ia6_ndpr = pr; - NDPR_ADDREF_LOCKED(pr); /* for addr reference */ + if (addtmp) { + int e; + e = in6_tmpifadd(ia, 1); + if (e != 0) + log(LOG_NOTICE, "%s: failed to create a" + " temporary address, error=%d\n", + __func__, e); + } - /* - * If this is the first autoconf address from - * the prefix, create a temporary address - * as well (when specified). - */ - if (ip6_use_tempaddr && - pr->ndpr_addrcnt == 1) { - int e; - - NDPR_UNLOCK(pr); - IFA_UNLOCK(&ia->ia_ifa); - - if ((e = in6_tmpifadd(ia, 1, - M_WAITOK)) != 0) { - log(LOG_NOTICE, "in6_control: " - "failed to create a " - "temporary address, " - "errno=%d\n", - e); - } - } else { - NDPR_UNLOCK(pr); - IFA_UNLOCK(&ia->ia_ifa); - } - } else { - IFA_UNLOCK(&ia->ia_ifa); - } - /* - * this might affect the status of autoconfigured - * addresses, that is, this address might make - * other addresses detached. - */ - lck_mtx_lock(nd6_mutex); - pfxlist_onlink_check(); - lck_mtx_unlock(nd6_mutex); - } + /* + * This might affect the status of autoconfigured addresses, that is, + * this address might make other addresses detached. + */ + lck_mtx_lock(nd6_mutex); + pfxlist_onlink_check(); + lck_mtx_unlock(nd6_mutex); - /* Drop use count held above during lookup/add */ - NDPR_REMREF(pr); -#if PF - pf_ifaddr_hook(ifp, cmd); -#endif /* PF */ - break; - } + /* Drop use count held above during lookup/add */ + NDPR_REMREF(pr); - case SIOCDIFADDR_IN6: { /* struct in6_ifreq */ - int i = 0; - struct nd_prefix pr0, *pr; +done: + if (ia != NULL) + IFA_REMREF(&ia->ia_ifa); + return (error); +} - /* - * If the address being deleted is the only one that owns - * the corresponding prefix, expire the prefix as well. - * XXX: theoretically, we don't have to worry about such - * relationship, since we separate the address management - * and the prefix management. We do this, however, to provide - * as much backward compatibility as possible in terms of - * the ioctl operation. - * Note that in6_purgeaddr() will decrement ndpr_addrcnt. - */ - IFA_LOCK(&ia->ia_ifa); - bzero(&pr0, sizeof(pr0)); - pr0.ndpr_ifp = ifp; - pr0.ndpr_plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, - NULL); - if (pr0.ndpr_plen == 128) { - IFA_UNLOCK(&ia->ia_ifa); - goto purgeaddr; - } - pr0.ndpr_prefix = ia->ia_addr; - pr0.ndpr_mask = ia->ia_prefixmask.sin6_addr; - for (i = 0; i < 4; i++) { - pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &= - ia->ia_prefixmask.sin6_addr.s6_addr32[i]; - } - IFA_UNLOCK(&ia->ia_ifa); - /* - * The logic of the following condition is a bit complicated. - * We expire the prefix when - * 1. the address obeys autoconfiguration and it is the - * only owner of the associated prefix, or - * 2. the address does not obey autoconf and there is no - * other owner of the prefix. - */ - if ((pr = nd6_prefix_lookup(&pr0)) != NULL) { - IFA_LOCK(&ia->ia_ifa); - NDPR_LOCK(pr); - if (((ia->ia6_flags & IN6_IFF_AUTOCONF) != 0 && - pr->ndpr_addrcnt == 1) || - ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0 && - pr->ndpr_addrcnt == 0)) { - /* XXX: just for expiration */ - pr->ndpr_expire = 1; - } - NDPR_UNLOCK(pr); - IFA_UNLOCK(&ia->ia_ifa); +static __attribute__((noinline)) void +in6ctl_difaddr(struct ifnet *ifp, struct in6_ifaddr *ia) +{ + int i = 0; + struct nd_prefix pr0, *pr; - /* Drop use count held above during lookup */ - NDPR_REMREF(pr); - } + VERIFY(ifp != NULL && ia != NULL); -purgeaddr: - in6_purgeaddr(&ia->ia_ifa); -#if PF - pf_ifaddr_hook(ifp, cmd); -#endif /* PF */ - break; + /* + * If the address being deleted is the only one that owns + * the corresponding prefix, expire the prefix as well. + * XXX: theoretically, we don't have to worry about such + * relationship, since we separate the address management + * and the prefix management. We do this, however, to provide + * as much backward compatibility as possible in terms of + * the ioctl operation. + * Note that in6_purgeaddr() will decrement ndpr_addrcnt. + */ + IFA_LOCK(&ia->ia_ifa); + bzero(&pr0, sizeof (pr0)); + pr0.ndpr_ifp = ifp; + pr0.ndpr_plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); + if (pr0.ndpr_plen == 128) { + IFA_UNLOCK(&ia->ia_ifa); + goto purgeaddr; + } + pr0.ndpr_prefix = ia->ia_addr; + pr0.ndpr_mask = ia->ia_prefixmask.sin6_addr; + for (i = 0; i < 4; i++) { + pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &= + ia->ia_prefixmask.sin6_addr.s6_addr32[i]; } + IFA_UNLOCK(&ia->ia_ifa); + /* + * The logic of the following condition is a bit complicated. + * We expire the prefix when + * 1. the address obeys autoconfiguration and it is the + * only owner of the associated prefix, or + * 2. the address does not obey autoconf and there is no + * other owner of the prefix. + */ + if ((pr = nd6_prefix_lookup(&pr0)) != NULL) { + IFA_LOCK(&ia->ia_ifa); + NDPR_LOCK(pr); + if (((ia->ia6_flags & IN6_IFF_AUTOCONF) != 0 && + pr->ndpr_addrcnt == 1) || + ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0 && + pr->ndpr_addrcnt == 0)) { + /* XXX: just for expiration */ + pr->ndpr_expire = 1; + } + NDPR_UNLOCK(pr); + IFA_UNLOCK(&ia->ia_ifa); - default: - error = ifnet_ioctl(ifp, PF_INET6, cmd, data); - goto ioctl_cleanup; + /* Drop use count held above during lookup */ + NDPR_REMREF(pr); } -ioctl_cleanup: - if (ia != NULL) - IFA_REMREF(&ia->ia_ifa); - return (error); +purgeaddr: + in6_purgeaddr(&ia->ia_ifa); } -static int +static __attribute__((noinline)) int in6_autoconf(struct ifnet *ifp, int enable) { int error = 0; + VERIFY(ifp != NULL); + if (ifp->if_flags & IFF_LOOPBACK) return (EINVAL); @@ -1392,9 +1713,11 @@ in6_autoconf(struct ifnet *ifp, int enable) * the interface. Entering or exiting this mode will result in the removal of * autoconfigured IPv6 addresses on the interface. */ -static int +static __attribute__((noinline)) int in6_setrouter(struct ifnet *ifp, int enable) { + VERIFY(ifp != NULL); + if (ifp->if_flags & IFF_LOOPBACK) return (ENODEV); @@ -1437,368 +1760,91 @@ in6_setrouter(struct ifnet *ifp, int enable) return (in6_autoconf(ifp, FALSE)); } -/* - * Update parameters of an IPv6 interface address. - * If necessary, a new entry is created and linked into address chains. - * This function is separated from in6_control(). - */ -int -in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, - struct in6_ifaddr *ia, int flags, int how) +static int +in6_to_kamescope(struct sockaddr_in6 *sin6, struct ifnet *ifp) { - int error = 0, hostIsNew = 0, plen = -1; - struct in6_ifaddr *oia; - struct sockaddr_in6 dst6; - struct in6_addrlifetime *lt; - struct in6_multi *in6m_sol = NULL; - struct in6_multi_mship *imm; - struct timeval timenow; - struct rtentry *rt; - struct ifaddr *ifa = NULL; - int delay; + struct sockaddr_in6 tmp; + int error, id; + VERIFY(sin6 != NULL); + tmp = *sin6; - /* Validate parameters */ - if (ifp == NULL || ifra == NULL) /* this maybe redundant */ - return(EINVAL); + error = in6_recoverscope(&tmp, &sin6->sin6_addr, ifp); + if (error != 0) + return (error); - /* - * The destination address for a p2p link must have a family - * of AF_UNSPEC or AF_INET6. - */ - if ((ifp->if_flags & IFF_POINTOPOINT) != 0 && - ifra->ifra_dstaddr.sin6_family != AF_INET6 && - ifra->ifra_dstaddr.sin6_family != AF_UNSPEC) - return(EAFNOSUPPORT); - /* - * validate ifra_prefixmask. don't check sin6_family, netmask - * does not carry fields other than sin6_len. - */ - if (ifra->ifra_prefixmask.sin6_len > sizeof(struct sockaddr_in6)) - return(EINVAL); - /* - * Set the address family value for the mask if it was not set. - * Radar 3899482. - */ - if (ifra->ifra_prefixmask.sin6_len == sizeof(struct sockaddr_in6) && - ifra->ifra_prefixmask.sin6_family == 0) { - ifra->ifra_prefixmask.sin6_family = AF_INET6; - } - /* - * Because the IPv6 address architecture is classless, we require - * users to specify a (non 0) prefix length (mask) for a new address. - * We also require the prefix (when specified) mask is valid, and thus - * reject a non-consecutive mask. - */ - if (ia == NULL && ifra->ifra_prefixmask.sin6_len == 0) - return(EINVAL); - if (ifra->ifra_prefixmask.sin6_len != 0) { - plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr, - (u_char *)&ifra->ifra_prefixmask + - ifra->ifra_prefixmask.sin6_len); - if (plen <= 0) - return (EINVAL); - } else { - /* - * In this case, ia must not be NULL. We just use its prefix - * length. - */ - IFA_LOCK(&ia->ia_ifa); - plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); - IFA_UNLOCK(&ia->ia_ifa); - } - /* - * If the destination address on a p2p interface is specified, - * and the address is a scoped one, validate/set the scope - * zone identifier. - */ - dst6 = ifra->ifra_dstaddr; - if (((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) != 0 ) && - (dst6.sin6_family == AF_INET6)) { - int scopeid; - - if ((error = in6_recoverscope(&dst6, - &ifra->ifra_dstaddr.sin6_addr, - ifp)) != 0) - return(error); - - scopeid = in6_addr2scopeid(ifp, &dst6.sin6_addr); - if (dst6.sin6_scope_id == 0) /* user omit to specify the ID. */ - dst6.sin6_scope_id = scopeid; - else if (dst6.sin6_scope_id != scopeid) - return(EINVAL); /* scope ID mismatch. */ - - if ((error = in6_embedscope(&dst6.sin6_addr, &dst6, NULL, NULL, - NULL)) != 0) - return(error); - dst6.sin6_scope_id = 0; /* XXX */ - } - /* - * The destination address can be specified only for a p2p or a - * loopback interface. If specified, the corresponding prefix length - * must be 128. - */ - if (ifra->ifra_dstaddr.sin6_family == AF_INET6) { - if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) == 0) { - /* XXX: noisy message */ - log(LOG_INFO, "in6_update_ifa: a destination can be " - "specified for a p2p or a loopback IF only\n"); - return(EINVAL); - } - if (plen != 128) { - /* - * The following message seems noisy, but we dare to - * add it for diagnosis. - */ - log(LOG_INFO, "in6_update_ifa: prefixlen must be 128 " - "when dstaddr is specified\n"); - return(EINVAL); - } - } - /* lifetime consistency check */ + id = in6_addr2scopeid(ifp, &tmp.sin6_addr); + if (tmp.sin6_scope_id == 0) + tmp.sin6_scope_id = id; + else if (tmp.sin6_scope_id != id) + return (EINVAL); /* scope ID mismatch. */ - getmicrotime(&timenow); - lt = &ifra->ifra_lifetime; - if ((lt->ia6t_vltime != ND6_INFINITE_LIFETIME - || (ifra->ifra_flags & IN6_IFF_TEMPORARY) != 0) - && lt->ia6t_vltime + timenow.tv_sec < timenow.tv_sec) { - return EINVAL; - } - if (lt->ia6t_vltime == 0) { - /* - * the following log might be noisy, but this is a typical - * configuration mistake or a tool's bug. - */ - log(LOG_INFO, - "in6_update_ifa: valid lifetime is 0 for %s\n", - ip6_sprintf(&ifra->ifra_addr.sin6_addr)); - } - if ((lt->ia6t_pltime != ND6_INFINITE_LIFETIME - || (ifra->ifra_flags & IN6_IFF_TEMPORARY) != 0) - && lt->ia6t_pltime + timenow.tv_sec < timenow.tv_sec) { - return EINVAL; - } + error = in6_embedscope(&tmp.sin6_addr, &tmp, NULL, NULL, NULL); + if (error != 0) + return (error); - /* - * If this is a new address, allocate a new ifaddr and link it - * into chains. - */ - if (ia == NULL) { - hostIsNew = 1; - /* - * in6_update_ifa() may be called in a process of a received - * RA; in such a case, we should call malloc with M_NOWAIT. - * The exception to this is during init time or as part of - * handling an ioctl, when we know it's okay to do M_WAITOK. - */ - ia = in6_ifaddr_alloc(how); - if (ia == NULL) - return (ENOBUFS); - ifnet_lock_exclusive(ifp); - IFA_LOCK(&ia->ia_ifa); - LIST_INIT(&ia->ia6_memberships); - /* Initialize the address and masks, and put time stamp */ - ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr; - ia->ia_addr.sin6_family = AF_INET6; - ia->ia_addr.sin6_len = sizeof(ia->ia_addr); - ia->ia6_createtime = timenow.tv_sec; - if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) { - /* - * XXX: some functions expect that ifa_dstaddr is not - * NULL for p2p interfaces. - */ - ia->ia_ifa.ifa_dstaddr - = (struct sockaddr *)&ia->ia_dstaddr; - } else { - ia->ia_ifa.ifa_dstaddr = NULL; - } - ia->ia_ifa.ifa_netmask - = (struct sockaddr *)&ia->ia_prefixmask; + tmp.sin6_scope_id = 0; + *sin6 = tmp; + return (0); +} - ia->ia_ifp = ifp; - /* if_attach_ifa() holds a reference for ifa_link */ - if_attach_ifa(ifp, &ia->ia_ifa); - /* hold a reference for this routine */ - IFA_ADDREF_LOCKED(&ia->ia_ifa); - IFA_UNLOCK(&ia->ia_ifa); - ifnet_lock_done(ifp); - lck_rw_lock_exclusive(&in6_ifaddr_rwlock); - /* Hold a reference for in6_ifaddrs link */ - IFA_ADDREF(&ia->ia_ifa); - if ((oia = in6_ifaddrs) != NULL) { - for ( ; oia->ia_next; oia = oia->ia_next) - continue; - oia->ia_next = ia; - } else { - in6_ifaddrs = ia; - } - lck_rw_done(&in6_ifaddr_rwlock); - } else { - /* hold a reference for this routine */ - IFA_ADDREF(&ia->ia_ifa); - } +static int +in6_ifaupdate_aux(struct in6_ifaddr *ia, struct ifnet *ifp, int ifaupflags) +{ + struct sockaddr_in6 mltaddr, mltmask; + struct in6_addr llsol; + struct ifaddr *ifa; + struct in6_multi *in6m_sol; + struct in6_multi_mship *imm; + struct rtentry *rt; + int delay, error; + VERIFY(ifp != NULL && ia != NULL); ifa = &ia->ia_ifa; - IFA_LOCK(ifa); - - /* update timestamp */ - ia->ia6_updatetime = timenow.tv_sec; - - /* set prefix mask */ - if (ifra->ifra_prefixmask.sin6_len) { - /* - * We prohibit changing the prefix length of an existing - * address, because - * + such an operation should be rare in IPv6, and - * + the operation would confuse prefix management. - */ - if (ia->ia_prefixmask.sin6_len && - in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL) != plen) { - log(LOG_INFO, "in6_update_ifa: the prefix length of an" - " existing (%s) address should not be changed\n", - ip6_sprintf(&ia->ia_addr.sin6_addr)); - error = EINVAL; - IFA_UNLOCK(ifa); - goto unlink; - } - ia->ia_prefixmask = ifra->ifra_prefixmask; - } - - /* - * If a new destination address is specified, scrub the old one and - * install the new destination. Note that the interface must be - * p2p or loopback (see the check above.) - */ - if (dst6.sin6_family == AF_INET6 && - !IN6_ARE_ADDR_EQUAL(&dst6.sin6_addr, &ia->ia_dstaddr.sin6_addr)) { - if ((ia->ia_flags & IFA_ROUTE)) { - int e; - - IFA_UNLOCK(ifa); - if ((e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, - RTF_HOST)) != 0) { - log(LOG_ERR, "in6_update_ifa: failed to remove " - "a route to the old destination: %s\n", - ip6_sprintf(&ia->ia_addr.sin6_addr)); - /* proceed anyway... */ - } - IFA_LOCK(ifa); - } else { - ia->ia_flags &= ~IFA_ROUTE; - } - IFA_LOCK_ASSERT_HELD(ifa); - ia->ia_dstaddr = dst6; - } - - /* - * Set lifetimes. We do not refer to ia6t_expire and ia6t_preferred - * to see if the address is deprecated or invalidated, but initialize - * these members for applications. - */ - ia->ia6_lifetime = ifra->ifra_lifetime; - if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME - || (ifra->ifra_flags & IN6_IFF_TEMPORARY) != 0) { - ia->ia6_lifetime.ia6t_expire = - timenow.tv_sec + ia->ia6_lifetime.ia6t_vltime; - } else - ia->ia6_lifetime.ia6t_expire = 0; - if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME - || (ifra->ifra_flags & IN6_IFF_TEMPORARY) != 0) { - ia->ia6_lifetime.ia6t_preferred = - timenow.tv_sec + ia->ia6_lifetime.ia6t_pltime; - } else - ia->ia6_lifetime.ia6t_preferred = 0; - - IFA_UNLOCK(ifa); - /* reset the interface and routing table appropriately. */ - if ((error = in6_ifinit(ifp, ia, &ifra->ifra_addr, hostIsNew)) != 0) - goto unlink; + in6m_sol = NULL; - IFA_LOCK(ifa); - /* - * configure address flags. - */ - ia->ia6_flags = ifra->ifra_flags; - /* - * backward compatibility - if IN6_IFF_DEPRECATED is set from the - * userland, make it deprecated. - */ - if ((ifra->ifra_flags & IN6_IFF_DEPRECATED) != 0) { - ia->ia6_lifetime.ia6t_pltime = 0; - ia->ia6_lifetime.ia6t_preferred = timenow.tv_sec; - } /* * Mark the address as tentative before joining multicast addresses, * so that corresponding MLD responses would not have a tentative * source address. */ ia->ia6_flags &= ~IN6_IFF_DUPLICATED; /* safety */ - if (hostIsNew && in6if_do_dad(ifp)) + if (in6if_do_dad(ifp)) in6_ifaddr_set_dadprogress(ia); - /* - * Do not delay sending neighbor solicitations when using optimistic - * duplicate address detection, c.f. RFC 4429. - */ - if (ia->ia6_flags & IN6_IFF_OPTIMISTIC) - flags &= ~IN6_IFAUPDATE_DADDELAY; - - /* - * We are done if we have simply modified an existing address. - */ - if (!hostIsNew) { - IFA_UNLOCK(ifa); - /* release reference held for this routine */ - IFA_REMREF(ifa); - return (error); - } - /* - * Beyond this point, we should call in6_purgeaddr upon an error, - * not just go to unlink. - */ - IFA_LOCK_ASSERT_HELD(ifa); /* Join necessary multicast groups */ if ((ifp->if_flags & IFF_MULTICAST) != 0) { - struct sockaddr_in6 mltaddr, mltmask; - struct in6_addr llsol; - IFA_UNLOCK(ifa); /* join solicited multicast addr for new host id */ - bzero(&llsol, sizeof(struct in6_addr)); + bzero(&llsol, sizeof (struct in6_addr)); llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL; llsol.s6_addr32[1] = 0; llsol.s6_addr32[2] = htonl(1); - llsol.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3]; + llsol.s6_addr32[3] = ia->ia_addr.sin6_addr.s6_addr32[3]; llsol.s6_addr8[12] = 0xff; if ((error = in6_setscope(&llsol, ifp, NULL)) != 0) { /* XXX: should not happen */ - log(LOG_ERR, "in6_update_ifa: " - "in6_setscope failed\n"); - goto cleanup; + log(LOG_ERR, "%s: in6_setscope failed\n", __func__); + goto unwind; } delay = 0; - if ((flags & IN6_IFAUPDATE_DADDELAY)) { + if ((ifaupflags & IN6_IFAUPDATE_DADDELAY)) { /* * We need a random delay for DAD on the address * being configured. It also means delaying * transmission of the corresponding MLD report to - * avoid report collision. - * [draft-ietf-ipv6-rfc2462bis-02.txt] + * avoid report collision. [RFC 4862] */ - delay = random() % - (MAX_RTR_SOLICITATION_DELAY * PR_SLOWHZ); + delay = random() % MAX_RTR_SOLICITATION_DELAY; } imm = in6_joingroup(ifp, &llsol, &error, delay); if (imm == NULL) { nd6log((LOG_WARNING, - "in6_update_ifa: addmulti failed for " - "%s on %s (errno=%d)\n", - ip6_sprintf(&llsol), if_name(ifp), + "%s: addmulti failed for %s on %s (errno=%d)\n", + __func__, ip6_sprintf(&llsol), if_name(ifp), error)); - in6_purgeaddr((struct ifaddr *)ia); - /* release reference held for this routine */ - IFA_REMREF(ifa); - return (error); + VERIFY(error != 0); + goto unwind; } in6m_sol = imm->i6mm_maddr; /* take a refcount for this routine */ @@ -1808,8 +1854,8 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); IFA_UNLOCK(ifa); - bzero(&mltmask, sizeof(mltmask)); - mltmask.sin6_len = sizeof(struct sockaddr_in6); + bzero(&mltmask, sizeof (mltmask)); + mltmask.sin6_len = sizeof (struct sockaddr_in6); mltmask.sin6_family = AF_INET6; mltmask.sin6_addr = in6mask32; #define MLTMASK_LEN 4 /* mltmask's masklen (=32bit=4octet) */ @@ -1817,13 +1863,12 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, /* * join link-local all-nodes address */ - bzero(&mltaddr, sizeof(mltaddr)); - mltaddr.sin6_len = sizeof(struct sockaddr_in6); + bzero(&mltaddr, sizeof (mltaddr)); + mltaddr.sin6_len = sizeof (struct sockaddr_in6); mltaddr.sin6_family = AF_INET6; mltaddr.sin6_addr = in6addr_linklocal_allnodes; - if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != - 0) - goto cleanup; /* XXX: should not fail */ + if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) + goto unwind; /* XXX: should not fail */ /* * XXX: do we really need this automatic routes? @@ -1847,7 +1892,7 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, (struct sockaddr *)&mltmask, RTF_UP | RTF_CLONING, NULL, ia->ia_ifp->if_index); if (error) - goto cleanup; + goto unwind; } else { rtfree(rt); } @@ -1855,11 +1900,11 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0); if (!imm) { nd6log((LOG_WARNING, - "in6_update_ifa: addmulti failed for " - "%s on %s (errno=%d)\n", - ip6_sprintf(&mltaddr.sin6_addr), + "%s: addmulti failed for %s on %s (errno=%d)\n", + __func__, ip6_sprintf(&mltaddr.sin6_addr), if_name(ifp), error)); - goto cleanup; + VERIFY(error != 0); + goto unwind; } IFA_LOCK_SPIN(ifa); LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); @@ -1868,27 +1913,27 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, /* * join node information group address */ -#define hostnamelen strlen(hostname) +#define hostnamelen strlen(hostname) delay = 0; - if ((flags & IN6_IFAUPDATE_DADDELAY)) { + if ((ifaupflags & IN6_IFAUPDATE_DADDELAY)) { /* * The spec doesn't say anything about delay for this * group, but the same logic should apply. */ - delay = random() % - (MAX_RTR_SOLICITATION_DELAY * PR_SLOWHZ); + delay = random() % MAX_RTR_SOLICITATION_DELAY; } if (in6_nigroup(ifp, hostname, hostnamelen, &mltaddr.sin6_addr) == 0) { imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, delay); /* XXX jinmei */ if (!imm) { - nd6log((LOG_WARNING, "in6_update_ifa: " - "addmulti failed for %s on %s " + nd6log((LOG_WARNING, + "%s: addmulti failed for %s on %s " "(errno=%d)\n", - ip6_sprintf(&mltaddr.sin6_addr), + __func__, ip6_sprintf(&mltaddr.sin6_addr), if_name(ifp), error)); /* XXX not very fatal, go on... */ + error = 0; } else { IFA_LOCK_SPIN(ifa); LIST_INSERT_HEAD(&ia->ia6_memberships, @@ -1903,9 +1948,8 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, * (ff01::1%ifN, and ff01::%ifN/32) */ mltaddr.sin6_addr = in6addr_nodelocal_allnodes; - if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) - != 0) - goto cleanup; /* XXX: should not fail */ + if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) + goto unwind; /* XXX: should not fail */ /* XXX: again, do we really need the route? */ rt = rtalloc1_scoped((struct sockaddr *)&mltaddr, 0, 0UL, ia->ia_ifp->if_index); @@ -1923,25 +1967,25 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, (struct sockaddr *)&mltmask, RTF_UP | RTF_CLONING, NULL, ia->ia_ifp->if_index); if (error) - goto cleanup; + goto unwind; } else rtfree(rt); imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0); if (!imm) { - nd6log((LOG_WARNING, "in6_update_ifa: " - "addmulti failed for %s on %s " - "(errno=%d)\n", - ip6_sprintf(&mltaddr.sin6_addr), + nd6log((LOG_WARNING, + "%s: addmulti failed for %s on %s (errno=%d)\n", + __func__, ip6_sprintf(&mltaddr.sin6_addr), if_name(ifp), error)); - goto cleanup; + VERIFY(error != 0); + goto unwind; } IFA_LOCK(ifa); LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); - /* keep it locked */ -#undef MLTMASK_LEN + IFA_UNLOCK(ifa); } - IFA_LOCK_ASSERT_HELD(ifa); +#undef MLTMASK_LEN + /* * Make sure to initialize ND6 information. this is to workaround * issues with interfaces with IPv6 addresses, which have never brought @@ -1949,80 +1993,348 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, * NOTE: this is how stf0 gets initialized */ if ((error = nd6_ifattach(ifp)) != 0) - return error; + goto unwind; + + /* Ensure nd6_service() is scheduled as soon as it's convenient */ + ++nd6_sched_timeout_want; /* * Perform DAD, if needed. * XXX It may be of use, if we can administratively * disable DAD. */ - if (hostIsNew && in6if_do_dad(ifp) && - ((ifra->ifra_flags & IN6_IFF_NODAD) == 0) && - (ia->ia6_flags & IN6_IFF_DADPROGRESS)) - { + IFA_LOCK_SPIN(ifa); + if (in6if_do_dad(ifp) && ((ifa->ifa_flags & IN6_IFF_NODAD) == 0) && + (ia->ia6_flags & IN6_IFF_DADPROGRESS)) { int mindelay, maxdelay; int *delayptr, delayval; IFA_UNLOCK(ifa); - delayptr = NULL; - if ((flags & IN6_IFAUPDATE_DADDELAY)) { - /* - * We need to impose a delay before sending an NS - * for DAD. Check if we also needed a delay for the - * corresponding MLD message. If we did, the delay - * should be larger than the MLD delay (this could be - * relaxed a bit, but this simple logic is at least - * safe). - */ - mindelay = 0; - if (in6m_sol != NULL) { - IN6M_LOCK(in6m_sol); - if (in6m_sol->in6m_state == MLD_REPORTING_MEMBER) - mindelay = in6m_sol->in6m_timer; - IN6M_UNLOCK(in6m_sol); - } - maxdelay = MAX_RTR_SOLICITATION_DELAY * hz; - if (maxdelay - mindelay == 0) - delayval = 0; - else { - delayval = - (random() % (maxdelay - mindelay)) + - mindelay; - } - delayptr = &delayval; + delayptr = NULL; + if ((ifaupflags & IN6_IFAUPDATE_DADDELAY)) { + /* + * We need to impose a delay before sending an NS + * for DAD. Check if we also needed a delay for the + * corresponding MLD message. If we did, the delay + * should be larger than the MLD delay (this could be + * relaxed a bit, but this simple logic is at least + * safe). + */ + mindelay = 0; + if (in6m_sol != NULL) { + IN6M_LOCK(in6m_sol); + if (in6m_sol->in6m_state == + MLD_REPORTING_MEMBER) + mindelay = in6m_sol->in6m_timer; + IN6M_UNLOCK(in6m_sol); + } + maxdelay = MAX_RTR_SOLICITATION_DELAY * hz; + if (maxdelay - mindelay == 0) + delayval = 0; + else { + delayval = + (random() % (maxdelay - mindelay)) + + mindelay; + } + delayptr = &delayval; + } + + nd6_dad_start((struct ifaddr *)ia, delayptr); + } else { + IFA_UNLOCK(ifa); + } + + goto done; + +unwind: + VERIFY(error != 0); + in6_purgeaddr(&ia->ia_ifa); + +done: + /* release reference held for this routine */ + if (in6m_sol != NULL) + IN6M_REMREF(in6m_sol); + return (error); +} + +/* + * Request an IPv6 interface address. If the address is new, then it will be + * constructed and appended to the interface address chains. The interface + * address structure is optionally returned with a reference for the caller. + */ +int +in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, int ifaupflags, + struct in6_ifaddr **iar) +{ + struct in6_addrlifetime ia6_lt; + struct in6_ifaddr *ia; + struct ifaddr *ifa; + struct ifaddr *xifa; + struct in6_addrlifetime *lt; + uint64_t timenow; + int plen, error; + + /* Sanity check parameters and initialize locals */ + VERIFY(ifp != NULL && ifra != NULL && iar != NULL); + ia = NULL; + ifa = NULL; + error = 0; + + /* + * We always require users to specify a valid IPv6 address for + * the corresponding operation. + */ + if (ifra->ifra_addr.sin6_family != AF_INET6 || + ifra->ifra_addr.sin6_len != sizeof (struct sockaddr_in6)) { + error = EAFNOSUPPORT; + goto unwind; + } + + /* Validate ifra_prefixmask.sin6_len is properly bounded. */ + if (ifra->ifra_prefixmask.sin6_len == 0 || + ifra->ifra_prefixmask.sin6_len > sizeof (struct sockaddr_in6)) { + error = EINVAL; + goto unwind; + } + + /* Validate prefix length extracted from ifra_prefixmask structure. */ + plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr, + (u_char *)&ifra->ifra_prefixmask + ifra->ifra_prefixmask.sin6_len); + if (plen <= 0) { + error = EINVAL; + goto unwind; + } + + /* Validate lifetimes */ + lt = &ifra->ifra_lifetime; + if (lt->ia6t_pltime > lt->ia6t_vltime) { + log(LOG_INFO, + "%s: pltime 0x%x > vltime 0x%x for %s\n", __func__, + lt->ia6t_pltime, lt->ia6t_vltime, + ip6_sprintf(&ifra->ifra_addr.sin6_addr)); + error = EINVAL; + goto unwind; + } + if (lt->ia6t_vltime == 0) { + /* + * the following log might be noisy, but this is a typical + * configuration mistake or a tool's bug. + */ + log(LOG_INFO, "%s: valid lifetime is 0 for %s\n", __func__, + ip6_sprintf(&ifra->ifra_addr.sin6_addr)); + } + + /* + * Before we lock the ifnet structure, we first check to see if the + * address already exists. If so, then we don't allocate and link a + * new one here. + */ + ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr); + if (ia != NULL) + ifa = &ia->ia_ifa; + + /* + * Validate destination address on interface types that require it. + */ + if ((ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT)) != 0) { + switch (ifra->ifra_dstaddr.sin6_family) { + case AF_INET6: + if (plen != 128) { + /* noisy message for diagnostic purposes */ + log(LOG_INFO, + "%s: prefix length < 128 with" + " explicit dstaddr.\n", __func__); + error = EINVAL; + goto unwind; + } + break; + + case AF_UNSPEC: + break; + + default: + error = EAFNOSUPPORT; + goto unwind; + } + } else if (ifra->ifra_dstaddr.sin6_family != AF_UNSPEC) { + log(LOG_INFO, + "%s: dstaddr valid only on p2p and loopback interfaces.\n", + __func__); + error = EINVAL; + goto unwind; + } + + timenow = net_uptime(); + + if (ia == NULL) { + int how; + + /* Is this the first new IPv6 address for the interface? */ + ifaupflags |= IN6_IFAUPDATE_NEWADDR; + + /* Allocate memory for IPv6 interface address structure. */ + how = !(ifaupflags & IN6_IFAUPDATE_NOWAIT) ? M_WAITOK : 0; + ia = in6_ifaddr_alloc(how); + if (ia == NULL) { + error = ENOBUFS; + goto unwind; + } + + ifa = &ia->ia_ifa; + + /* + * Initialize interface address structure. + * + * Note well: none of these sockaddr_in6 structures contain a + * valid sin6_port, sin6_flowinfo or even a sin6_scope_id field. + * We still embed link-local scope identifiers at the end of an + * arbitrary fe80::/32 prefix, for historical reasons. Also, the + * ifa_dstaddr field is always non-NULL on point-to-point and + * loopback interfaces, and conventionally points to a socket + * address of AF_UNSPEC family when there is no destination. + * + * Please enjoy the dancing sea turtle. + */ + IFA_ADDREF(ifa); /* for this and optionally for caller */ + ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr; + if (ifra->ifra_dstaddr.sin6_family == AF_INET6 || + (ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) + ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr; + ifa->ifa_netmask = (struct sockaddr *)&ia->ia_prefixmask; + ifa->ifa_ifp = ifp; + ifa->ifa_metric = ifp->if_metric; + ifa->ifa_rtrequest = nd6_rtrequest; + + LIST_INIT(&ia->ia6_memberships); + ia->ia_addr.sin6_family = AF_INET6; + ia->ia_addr.sin6_len = sizeof (ia->ia_addr); + ia->ia_addr.sin6_addr = ifra->ifra_addr.sin6_addr; + ia->ia_prefixmask.sin6_family = AF_INET6; + ia->ia_prefixmask.sin6_len = sizeof (ia->ia_prefixmask); + ia->ia_prefixmask.sin6_addr = ifra->ifra_prefixmask.sin6_addr; + error = in6_to_kamescope(&ia->ia_addr, ifp); + if (error != 0) + goto unwind; + if (ifa->ifa_dstaddr != NULL) { + ia->ia_dstaddr = ifra->ifra_dstaddr; + error = in6_to_kamescope(&ia->ia_dstaddr, ifp); + if (error != 0) + goto unwind; + } + + /* Append to address chains */ + ifnet_lock_exclusive(ifp); + ifaupflags |= IN6_IFAUPDATE_1STADDR; + TAILQ_FOREACH(xifa, &ifp->if_addrlist, ifa_list) { + IFA_LOCK_SPIN(xifa); + if (xifa->ifa_addr->sa_family != AF_INET6) { + IFA_UNLOCK(xifa); + ifaupflags &= ~IN6_IFAUPDATE_1STADDR; + break; + } + IFA_UNLOCK(xifa); + } + + IFA_LOCK_SPIN(ifa); + if_attach_ifa(ifp, ifa); /* holds reference for ifnet link */ + IFA_UNLOCK(ifa); + ifnet_lock_done(ifp); + + lck_rw_lock_exclusive(&in6_ifaddr_rwlock); + if (in6_ifaddrs != NULL) { + struct in6_ifaddr *iac; + for (iac = in6_ifaddrs; iac->ia_next != NULL; + iac = iac->ia_next) + continue; + iac->ia_next = ia; + } else { + in6_ifaddrs = ia; } - nd6_dad_start((struct ifaddr *)ia, delayptr); + IFA_ADDREF(ifa); /* hold for in6_ifaddrs link */ + lck_rw_done(&in6_ifaddr_rwlock); } else { - IFA_UNLOCK(ifa); + ifa = &ia->ia_ifa; + ifaupflags &= ~(IN6_IFAUPDATE_NEWADDR|IN6_IFAUPDATE_1STADDR); } -done: - /* release reference held for this routine */ - if (ifa != NULL) - IFA_REMREF(ifa); - if (in6m_sol != NULL) - IN6M_REMREF(in6m_sol); - return (error); -unlink: + VERIFY(ia != NULL && ifa == &ia->ia_ifa); + IFA_LOCK(ifa); + + /* + * Set lifetimes. We do not refer to ia6t_expire and ia6t_preferred + * to see if the address is deprecated or invalidated, but initialize + * these members for applications. + */ + ia->ia6_updatetime = ia->ia6_createtime = timenow; + ia6_lt = *lt; + if (ia6_lt.ia6t_vltime != ND6_INFINITE_LIFETIME) + ia6_lt.ia6t_expire = timenow + ia6_lt.ia6t_vltime; + else + ia6_lt.ia6t_expire = 0; + if (ia6_lt.ia6t_pltime != ND6_INFINITE_LIFETIME) + ia6_lt.ia6t_preferred = timenow + ia6_lt.ia6t_pltime; + else + ia6_lt.ia6t_preferred = 0; + in6ifa_setlifetime(ia, &ia6_lt); + /* - * XXX: if a change of an existing address failed, keep the entry - * anyway. + * Backward compatibility - if IN6_IFF_DEPRECATED is set from the + * userland, make it deprecated. */ - if (hostIsNew) { - in6_unlink_ifa(ia, ifp); + if ((ia->ia6_flags & IN6_IFF_DEPRECATED) != 0) { + ia->ia6_lifetime.ia6ti_pltime = 0; + ia->ia6_lifetime.ia6ti_preferred = timenow; } - goto done; -cleanup: - in6_purgeaddr(&ia->ia_ifa); + /* + * Do not delay sending neighbor solicitations when using optimistic + * duplicate address detection, c.f. RFC 4429. + */ + if ((ia->ia6_flags & IN6_IFF_OPTIMISTIC) == 0) + ifaupflags |= IN6_IFAUPDATE_DADDELAY; + + /* + * Update flag or prefix length + */ + ia->ia_plen = plen; + ia->ia6_flags = ifra->ifra_flags; + + /* Release locks (new address available to concurrent tasks) */ + IFA_UNLOCK(ifa); + + /* Further initialization of the interface address */ + error = in6_ifinit(ifp, ia, ifaupflags); + if (error != 0) + goto unwind; + + /* Finish updating the address while other tasks are working with it */ + error = in6_ifaupdate_aux(ia, ifp, ifaupflags); + if (error != 0) + goto unwind; + + /* Return success (optionally w/ address for caller). */ + VERIFY(error == 0); + (void) ifnet_notify_address(ifp, AF_INET6); goto done; + +unwind: + VERIFY(error != 0); + if (ia != NULL) { + VERIFY(ifa == &ia->ia_ifa); + IFA_REMREF(ifa); + ia = NULL; + } + +done: + *iar = ia; + return (error); } void in6_purgeaddr(struct ifaddr *ifa) { struct ifnet *ifp = ifa->ifa_ifp; - struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa; + struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; struct in6_multi_mship *imm; lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED); @@ -2035,27 +2347,24 @@ in6_purgeaddr(struct ifaddr *ifa) * The interface must be p2p or loopback in this case. */ IFA_LOCK(ifa); - if ((ia->ia_flags & IFA_ROUTE) != 0 && ia->ia_dstaddr.sin6_len != 0) { - int e; + if ((ia->ia_flags & IFA_ROUTE) && ia->ia_plen == 128) { + int error, rtf; IFA_UNLOCK(ifa); - if ((e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST)) - != 0) { + rtf = (ia->ia_dstaddr.sin6_family == AF_INET6) ? RTF_HOST : 0; + error = rtinit(&(ia->ia_ifa), RTM_DELETE, rtf); + if (error != 0) { log(LOG_ERR, "in6_purgeaddr: failed to remove " "a route to the p2p destination: %s on %s, " "errno=%d\n", ip6_sprintf(&ia->ia_addr.sin6_addr), if_name(ifp), - e); + error); /* proceed anyway... */ - } else { - IFA_LOCK_SPIN(ifa); - ia->ia_flags &= ~IFA_ROUTE; - IFA_UNLOCK(ifa); } - } else { - IFA_UNLOCK(ifa); + IFA_LOCK_SPIN(ifa); + ia->ia_flags &= ~IFA_ROUTE; } - IFA_LOCK_ASSERT_NOTHELD(ifa); + IFA_UNLOCK(ifa); /* Remove ownaddr's loopback rtentry, if it exists. */ in6_ifremloop(&(ia->ia_ifa)); @@ -2075,6 +2384,8 @@ in6_purgeaddr(struct ifaddr *ifa) /* in6_unlink_ifa() will need exclusive access */ in6_unlink_ifa(ia, ifp); in6_post_msg(ifp, KEV_INET6_ADDR_DELETED, ia); + + (void) ifnet_notify_address(ifp, AF_INET6); } static void @@ -2108,7 +2419,7 @@ in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp) ia->ia_next = oia->ia_next; } else { /* search failed */ - printf("Couldn't unlink in6_ifaddr from in6_ifaddr\n"); + log(LOG_NOTICE, "%s: search failed.\n", __func__); unlinked = 0; } } @@ -2124,7 +2435,8 @@ in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp) if ((oia->ia6_flags & IN6_IFF_AUTOCONF) != 0) { if (oia->ia6_ndpr == NULL) { log(LOG_NOTICE, "in6_unlink_ifa: autoconf'ed address " - "%p has no prefix\n", oia); + "0x%llx has no prefix\n", + (uint64_t)VM_KERNEL_ADDRPERM(oia)); } else { struct nd_prefix *pr = oia->ia6_ndpr; @@ -2156,6 +2468,9 @@ in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp) /* release reference held for this routine */ IFA_REMREF(ifa); + + /* invalidate route caches */ + routegenid_inet6_update(); } void @@ -2216,20 +2531,20 @@ in6_purgeif(struct ifnet *ifp) * RFC2373 defines interface id to be 64bit, but it allows non-RFC2374 * address encoding scheme. (see figure on page 8) */ -static int -in6_lifaddr_ioctl(struct socket *so, u_long cmd, struct if_laddrreq *iflr, - struct ifnet *ifp, struct proc *p) +static __attribute__((noinline)) int +in6ctl_lifaddr(struct ifnet *ifp, u_long cmd, struct if_laddrreq *iflr, + boolean_t p64) { - struct ifaddr *ifa = NULL; + struct in6_aliasreq ifra; + struct ifaddr *ifa; struct sockaddr *sa; - int p64 = proc_is64bit(p); VERIFY(ifp != NULL); switch (cmd) { case SIOCGLIFADDR: /* address must be specified on GET with IFLR_PREFIX */ - if ((iflr->flags & IFLR_PREFIX) == 0) + if (!(iflr->flags & IFLR_PREFIX)) break; /* FALLTHROUGH */ case SIOCALIFADDR: @@ -2237,31 +2552,26 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, struct if_laddrreq *iflr, /* address must be specified on ADD and DELETE */ sa = (struct sockaddr *)&iflr->addr; if (sa->sa_family != AF_INET6) - return EINVAL; - if (sa->sa_len != sizeof(struct sockaddr_in6)) - return EINVAL; + return (EINVAL); + if (sa->sa_len != sizeof (struct sockaddr_in6)) + return (EINVAL); /* XXX need improvement */ sa = (struct sockaddr *)&iflr->dstaddr; if (sa->sa_family && sa->sa_family != AF_INET6) - return EINVAL; - if (sa->sa_len && sa->sa_len != sizeof(struct sockaddr_in6)) - return EINVAL; + return (EINVAL); + if (sa->sa_len && sa->sa_len != sizeof (struct sockaddr_in6)) + return (EINVAL); break; - default: /* shouldn't happen */ -#if 0 - panic("invalid cmd to in6_lifaddr_ioctl"); + default: + /* shouldn't happen */ + VERIFY(0); /* NOTREACHED */ -#else - return EOPNOTSUPP; -#endif } - if (sizeof(struct in6_addr) * 8 < iflr->prefixlen) - return EINVAL; + if (sizeof (struct in6_addr) * 8 < iflr->prefixlen) + return (EINVAL); switch (cmd) { - case SIOCALIFADDR: - { - struct in6_aliasreq ifra; + case SIOCALIFADDR: { struct in6_addr hostaddr; int prefixlen; int hostid_found = 0; @@ -2276,7 +2586,7 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, struct if_laddrreq *iflr, */ ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); if (!ifa) - return EADDRNOTAVAIL; + return (EADDRNOTAVAIL); IFA_LOCK_SPIN(ifa); hostaddr = *IFA_IN6(ifa); IFA_UNLOCK(ifa); @@ -2284,46 +2594,46 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, struct if_laddrreq *iflr, IFA_REMREF(ifa); ifa = NULL; - /* prefixlen must be <= 64. */ + /* prefixlen must be <= 64. */ if (64 < iflr->prefixlen) - return EINVAL; + return (EINVAL); prefixlen = iflr->prefixlen; /* hostid part must be zero. */ sin6 = (struct sockaddr_in6 *)&iflr->addr; if (sin6->sin6_addr.s6_addr32[2] != 0 || sin6->sin6_addr.s6_addr32[3] != 0) { - return EINVAL; + return (EINVAL); } - } else + } else { prefixlen = iflr->prefixlen; - + } /* copy args to in6_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */ - bzero(&ifra, sizeof(ifra)); - bcopy(iflr->iflr_name, ifra.ifra_name, sizeof(ifra.ifra_name)); + bzero(&ifra, sizeof (ifra)); + bcopy(iflr->iflr_name, ifra.ifra_name, sizeof (ifra.ifra_name)); bcopy(&iflr->addr, &ifra.ifra_addr, - ((struct sockaddr *)&iflr->addr)->sa_len); + ((struct sockaddr *)&iflr->addr)->sa_len); if (hostid_found) { /* fill in hostaddr part */ ifra.ifra_addr.sin6_addr.s6_addr32[2] = - hostaddr.s6_addr32[2]; + hostaddr.s6_addr32[2]; ifra.ifra_addr.sin6_addr.s6_addr32[3] = - hostaddr.s6_addr32[3]; + hostaddr.s6_addr32[3]; } - if (((struct sockaddr *)&iflr->dstaddr)->sa_family) { /*XXX*/ + if (((struct sockaddr *)&iflr->dstaddr)->sa_family) { /* XXX */ bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr, - ((struct sockaddr *)&iflr->dstaddr)->sa_len); + ((struct sockaddr *)&iflr->dstaddr)->sa_len); if (hostid_found) { ifra.ifra_dstaddr.sin6_addr.s6_addr32[2] = - hostaddr.s6_addr32[2]; + hostaddr.s6_addr32[2]; ifra.ifra_dstaddr.sin6_addr.s6_addr32[3] = - hostaddr.s6_addr32[3]; + hostaddr.s6_addr32[3]; } } - ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); + ifra.ifra_prefixmask.sin6_len = sizeof (struct sockaddr_in6); in6_prefixlen2mask(&ifra.ifra_prefixmask.sin6_addr, prefixlen); ifra.ifra_flags = iflr->flags & ~IFLR_PREFIX; @@ -2335,16 +2645,16 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, struct if_laddrreq *iflr, */ in6_aliasreq_64_to_32((struct in6_aliasreq_64 *)&ifra, &ifra_32); - return (in6_control(so, SIOCAIFADDR_IN6_32, - (caddr_t)&ifra_32, ifp, p)); + return (in6_control(NULL, SIOCAIFADDR_IN6_32, + (caddr_t)&ifra_32, ifp, kernproc)); #else - return (in6_control(so, SIOCAIFADDR_IN6, - (caddr_t)&ifra, ifp, p)); + return (in6_control(NULL, SIOCAIFADDR_IN6, + (caddr_t)&ifra, ifp, kernproc)); #endif /* __LP64__ */ } else { #if defined(__LP64__) - return (in6_control(so, SIOCAIFADDR_IN6, - (caddr_t)&ifra, ifp, p)); + return (in6_control(NULL, SIOCAIFADDR_IN6, + (caddr_t)&ifra, ifp, kernproc)); #else struct in6_aliasreq_64 ifra_64; /* @@ -2352,35 +2662,35 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, struct if_laddrreq *iflr, */ in6_aliasreq_32_to_64((struct in6_aliasreq_32 *)&ifra, &ifra_64); - return (in6_control(so, SIOCAIFADDR_IN6_64, - (caddr_t)&ifra_64, ifp, p)); + return (in6_control(NULL, SIOCAIFADDR_IN6_64, + (caddr_t)&ifra_64, ifp, kernproc)); #endif /* __LP64__ */ } /* NOTREACHED */ - } + } + case SIOCGLIFADDR: - case SIOCDLIFADDR: - { + case SIOCDLIFADDR: { struct in6_ifaddr *ia; struct in6_addr mask, candidate, match; struct sockaddr_in6 *sin6; int cmp; - bzero(&mask, sizeof(mask)); + bzero(&mask, sizeof (mask)); if (iflr->flags & IFLR_PREFIX) { /* lookup a prefix rather than address. */ in6_prefixlen2mask(&mask, iflr->prefixlen); sin6 = (struct sockaddr_in6 *)&iflr->addr; - bcopy(&sin6->sin6_addr, &match, sizeof(match)); + bcopy(&sin6->sin6_addr, &match, sizeof (match)); match.s6_addr32[0] &= mask.s6_addr32[0]; match.s6_addr32[1] &= mask.s6_addr32[1]; match.s6_addr32[2] &= mask.s6_addr32[2]; match.s6_addr32[3] &= mask.s6_addr32[3]; /* if you set extra bits, that's wrong */ - if (bcmp(&match, &sin6->sin6_addr, sizeof(match))) - return EINVAL; + if (bcmp(&match, &sin6->sin6_addr, sizeof (match))) + return (EINVAL); cmp = 1; } else { @@ -2391,15 +2701,14 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, struct if_laddrreq *iflr, /* on deleting an address, do exact match */ in6_prefixlen2mask(&mask, 128); sin6 = (struct sockaddr_in6 *)&iflr->addr; - bcopy(&sin6->sin6_addr, &match, sizeof(match)); + bcopy(&sin6->sin6_addr, &match, sizeof (match)); cmp = 1; } } ifnet_lock_shared(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) - { + TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { IFA_LOCK(ifa); if (ifa->ifa_addr->sa_family != AF_INET6) { IFA_UNLOCK(ifa); @@ -2410,7 +2719,7 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, struct if_laddrreq *iflr, break; } - bcopy(IFA_IN6(ifa), &candidate, sizeof(candidate)); + bcopy(IFA_IN6(ifa), &candidate, sizeof (candidate)); IFA_UNLOCK(ifa); /* * XXX: this is adhoc, but is necessary to allow @@ -2430,7 +2739,7 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, struct if_laddrreq *iflr, IFA_ADDREF(ifa); ifnet_lock_done(ifp); if (!ifa) - return EADDRNOTAVAIL; + return (EADDRNOTAVAIL); ia = ifa2ia6(ifa); if (cmd == SIOCGLIFADDR) { @@ -2443,49 +2752,46 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, struct if_laddrreq *iflr, if (IN6_IS_ADDR_LINKLOCAL(&s6->sin6_addr)) { s6->sin6_addr.s6_addr16[1] = 0; s6->sin6_scope_id = - in6_addr2scopeid(ifp, &s6->sin6_addr); + in6_addr2scopeid(ifp, &s6->sin6_addr); } if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { bcopy(&ia->ia_dstaddr, &iflr->dstaddr, - ia->ia_dstaddr.sin6_len); + ia->ia_dstaddr.sin6_len); s6 = (struct sockaddr_in6 *)&iflr->dstaddr; if (IN6_IS_ADDR_LINKLOCAL(&s6->sin6_addr)) { s6->sin6_addr.s6_addr16[1] = 0; s6->sin6_scope_id = - in6_addr2scopeid(ifp, - &s6->sin6_addr); + in6_addr2scopeid(ifp, + &s6->sin6_addr); } } else - bzero(&iflr->dstaddr, sizeof(iflr->dstaddr)); + bzero(&iflr->dstaddr, sizeof (iflr->dstaddr)); iflr->prefixlen = - in6_mask2len(&ia->ia_prefixmask.sin6_addr, - NULL); + in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); iflr->flags = ia->ia6_flags; /* XXX */ IFA_UNLOCK(ifa); IFA_REMREF(ifa); - return 0; + return (0); } else { - struct in6_aliasreq ifra; - /* fill in6_aliasreq and do ioctl(SIOCDIFADDR_IN6) */ - bzero(&ifra, sizeof(ifra)); + bzero(&ifra, sizeof (ifra)); bcopy(iflr->iflr_name, ifra.ifra_name, - sizeof(ifra.ifra_name)); + sizeof (ifra.ifra_name)); IFA_LOCK(ifa); bcopy(&ia->ia_addr, &ifra.ifra_addr, - ia->ia_addr.sin6_len); + ia->ia_addr.sin6_len); if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr, - ia->ia_dstaddr.sin6_len); + ia->ia_dstaddr.sin6_len); } else { bzero(&ifra.ifra_dstaddr, - sizeof(ifra.ifra_dstaddr)); + sizeof (ifra.ifra_dstaddr)); } bcopy(&ia->ia_prefixmask, &ifra.ifra_dstaddr, - ia->ia_prefixmask.sin6_len); + ia->ia_prefixmask.sin6_len); ifra.ifra_flags = ia->ia6_flags; IFA_UNLOCK(ifa); @@ -2503,16 +2809,16 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, struct if_laddrreq *iflr, */ in6_aliasreq_64_to_32( (struct in6_aliasreq_64 *)&ifra, &ifra_32); - return (in6_control(so, SIOCDIFADDR_IN6, - (caddr_t)&ifra_32, ifp, p)); + return (in6_control(NULL, SIOCDIFADDR_IN6, + (caddr_t)&ifra_32, ifp, kernproc)); #else - return (in6_control(so, SIOCDIFADDR_IN6, - (caddr_t)&ifra, ifp, p)); + return (in6_control(NULL, SIOCDIFADDR_IN6, + (caddr_t)&ifra, ifp, kernproc)); #endif /* __LP64__ */ } else { #if defined(__LP64__) - return (in6_control(so, SIOCDIFADDR_IN6, - (caddr_t)&ifra, ifp, p)); + return (in6_control(NULL, SIOCDIFADDR_IN6, + (caddr_t)&ifra, ifp, kernproc)); #else struct in6_aliasreq_64 ifra_64; /* @@ -2525,72 +2831,45 @@ in6_lifaddr_ioctl(struct socket *so, u_long cmd, struct if_laddrreq *iflr, */ in6_aliasreq_32_to_64( (struct in6_aliasreq_32 *)&ifra, &ifra_64); - return (in6_control(so, SIOCDIFADDR_IN6, - (caddr_t)&ifra_64, ifp, p)); + return (in6_control(NULL, SIOCDIFADDR_IN6, + (caddr_t)&ifra_64, ifp, kernproc)); #endif /* __LP64__ */ } /* NOTREACHED */ } - } + } } - return EOPNOTSUPP; /* just for safety */ + return (EOPNOTSUPP); /* just for safety */ } /* - * Initialize an interface's intetnet6 address - * and routing table entry. + * Initialize an interface's internet6 address and routing table entry. */ static int -in6_ifinit(ifp, ia, sin6, newhost) - struct ifnet *ifp; - struct in6_ifaddr *ia; - struct sockaddr_in6 *sin6; - int newhost; +in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, int ifaupflags) { - int error = 0, plen, ifacount = 0; + int error; struct ifaddr *ifa; - /* - * Give the interface a chance to initialize - * if this is its first address, - * and to validate the address if necessary. - */ - ifnet_lock_shared(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) - { - IFA_LOCK_SPIN(ifa); - if (ifa->ifa_addr->sa_family != AF_INET6) { - IFA_UNLOCK(ifa); - continue; - } - ifacount++; - IFA_UNLOCK(ifa); - } - ifnet_lock_done(ifp); - + error = 0; ifa = &ia->ia_ifa; - IFA_LOCK_SPIN(ifa); - ia->ia_addr = *sin6; - IFA_UNLOCK(ifa); /* * NOTE: SIOCSIFADDR is defined with struct ifreq as parameter, * but here we are sending it down to the interface with a pointer * to struct ifaddr, for legacy reasons. */ - if (ifacount <= 1 && - (error = ifnet_ioctl(ifp, PF_INET6, SIOCSIFADDR, ia))) { - if (error == EOPNOTSUPP) - error = 0; - else if (error) - return(error); + if ((ifaupflags & IN6_IFAUPDATE_1STADDR) != 0) { + error = ifnet_ioctl(ifp, PF_INET6, SIOCSIFADDR, ia); + if (error != 0) { + if (error != EOPNOTSUPP) + return (error); + error = 0; + } } IFA_LOCK(ifa); - ia->ia_ifa.ifa_metric = ifp->if_metric; - - /* we could do in(6)_socktrim here, but just omit it at this moment. */ /* * Special case: @@ -2598,34 +2877,34 @@ in6_ifinit(ifp, ia, sin6, newhost) * interface, install a route to the destination as an interface * direct route. */ - plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */ - if (plen == 128 && ia->ia_dstaddr.sin6_family == AF_INET6) { + if (!(ia->ia_flags & IFA_ROUTE) && ia->ia_plen == 128 && + ia->ia_dstaddr.sin6_family == AF_INET6) { IFA_UNLOCK(ifa); - if ((error = rtinit(&(ia->ia_ifa), (int)RTM_ADD, - RTF_UP | RTF_HOST)) != 0) - return(error); + error = rtinit(ifa, RTM_ADD, RTF_UP | RTF_HOST); + if (error != 0) + return (error); IFA_LOCK(ifa); ia->ia_flags |= IFA_ROUTE; } IFA_LOCK_ASSERT_HELD(ifa); - if (plen < 128) { + if (ia->ia_plen < 128) { /* * The RTF_CLONING flag is necessary for in6_is_ifloop_auto(). */ - ia->ia_ifa.ifa_flags |= RTF_CLONING; + ia->ia_flags |= RTF_CLONING; } + IFA_UNLOCK(ifa); + /* Add ownaddr as loopback rtentry, if necessary (ex. on p2p link). */ - if (newhost) { - /* set the rtrequest function to create llinfo */ - ia->ia_ifa.ifa_rtrequest = nd6_rtrequest; - IFA_UNLOCK(ifa); - in6_ifaddloop(&(ia->ia_ifa)); - } else { - IFA_UNLOCK(ifa); - } + if ((ifaupflags & IN6_IFAUPDATE_NEWADDR) != 0) + in6_ifaddloop(ifa); - return(error); + /* invalidate route caches */ + routegenid_inet6_update(); + + VERIFY(error == 0); + return (0); } void @@ -2654,7 +2933,7 @@ in6ifa_ifpforlinklocal(ifp, ignoreflags) } if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa))) { if ((((struct in6_ifaddr *)ifa)->ia6_flags & - ignoreflags) != 0) { + ignoreflags) != 0) { IFA_UNLOCK(ifa); continue; } @@ -2666,7 +2945,7 @@ in6ifa_ifpforlinklocal(ifp, ignoreflags) } ifnet_lock_done(ifp); - return((struct in6_ifaddr *)ifa); + return ((struct in6_ifaddr *)ifa); } /* @@ -2696,7 +2975,7 @@ in6ifa_ifpwithaddr(ifp, addr) } ifnet_lock_done(ifp); - return((struct in6_ifaddr *)ifa); + return ((struct in6_ifaddr *)ifa); } struct in6_ifaddr * @@ -2724,21 +3003,83 @@ in6ifa_prproxyaddr(struct in6_addr *addr) return (ia); } +void +in6ifa_getlifetime(struct in6_ifaddr *ia6, struct in6_addrlifetime *t_dst, + int iscalendar) +{ + struct in6_addrlifetime_i *t_src = &ia6->ia6_lifetime; + struct timeval caltime; + + t_dst->ia6t_vltime = t_src->ia6ti_vltime; + t_dst->ia6t_pltime = t_src->ia6ti_pltime; + t_dst->ia6t_expire = 0; + t_dst->ia6t_preferred = 0; + + /* account for system time change */ + getmicrotime(&caltime); + t_src->ia6ti_base_calendartime += + NET_CALCULATE_CLOCKSKEW(caltime, + t_src->ia6ti_base_calendartime, net_uptime(), + t_src->ia6ti_base_uptime); + + if (iscalendar) { + if (t_src->ia6ti_expire != 0 && + t_src->ia6ti_vltime != ND6_INFINITE_LIFETIME) + t_dst->ia6t_expire = t_src->ia6ti_base_calendartime + + t_src->ia6ti_expire - t_src->ia6ti_base_uptime; + + if (t_src->ia6ti_preferred != 0 && + t_src->ia6ti_pltime != ND6_INFINITE_LIFETIME) + t_dst->ia6t_preferred = t_src->ia6ti_base_calendartime + + t_src->ia6ti_preferred - t_src->ia6ti_base_uptime; + } else { + if (t_src->ia6ti_expire != 0 && + t_src->ia6ti_vltime != ND6_INFINITE_LIFETIME) + t_dst->ia6t_expire = t_src->ia6ti_expire; + + if (t_src->ia6ti_preferred != 0 && + t_src->ia6ti_pltime != ND6_INFINITE_LIFETIME) + t_dst->ia6t_preferred = t_src->ia6ti_preferred; + } +} + +void +in6ifa_setlifetime(struct in6_ifaddr *ia6, struct in6_addrlifetime *t_src) +{ + struct in6_addrlifetime_i *t_dst = &ia6->ia6_lifetime; + struct timeval caltime; + + /* account for system time change */ + getmicrotime(&caltime); + t_dst->ia6ti_base_calendartime += + NET_CALCULATE_CLOCKSKEW(caltime, + t_dst->ia6ti_base_calendartime, net_uptime(), + t_dst->ia6ti_base_uptime); + + /* trust the caller for the values */ + t_dst->ia6ti_expire = t_src->ia6t_expire; + t_dst->ia6ti_preferred = t_src->ia6t_preferred; + t_dst->ia6ti_vltime = t_src->ia6t_vltime; + t_dst->ia6ti_pltime = t_src->ia6t_pltime; +} + /* * Convert IP6 address to printable (loggable) representation. */ -static char digits[] = "0123456789abcdef"; -static int ip6round = 0; char * -ip6_sprintf(addr) - const struct in6_addr *addr; +ip6_sprintf(const struct in6_addr *addr) { + static const char digits[] = "0123456789abcdef"; + static int ip6round = 0; static char ip6buf[8][48]; + int i; char *cp; const u_short *a = (const u_short *)addr; const u_char *d; + u_char n; int dcolon = 0; + int zpad = 0; ip6round = (ip6round + 1) & 7; cp = ip6buf[ip6round]; @@ -2767,15 +3108,26 @@ ip6_sprintf(addr) continue; } d = (const u_char *)a; - *cp++ = digits[*d >> 4]; - *cp++ = digits[*d++ & 0xf]; - *cp++ = digits[*d >> 4]; - *cp++ = digits[*d & 0xf]; + zpad = 0; + if ((n = *d >> 4) != 0) { + *cp++ = digits[n]; + zpad = 1; + } + if ((n = *d++ & 0xf) != 0 || zpad) { + *cp++ = digits[n]; + zpad = 1; + } + if ((n = *d >> 4) != 0 || zpad) { + *cp++ = digits[n]; + zpad = 1; + } + if ((n = *d & 0xf) != 0 || zpad) + *cp++ = digits[n]; *cp++ = ':'; a++; } *--cp = 0; - return(ip6buf[ip6round]); + return (ip6buf[ip6round]); } int @@ -2841,14 +3193,14 @@ in6_is_addr_deprecated(struct sockaddr_in6 *sa6) (ia->ia6_flags & IN6_IFF_DEPRECATED) != 0) { IFA_UNLOCK(&ia->ia_ifa); lck_rw_done(&in6_ifaddr_rwlock); - return(1); /* true */ + return (1); /* true */ } /* XXX: do we still have to go thru the rest of the list? */ IFA_UNLOCK(&ia->ia_ifa); } lck_rw_done(&in6_ifaddr_rwlock); - return(0); /* false */ + return (0); /* false */ } /* @@ -2872,7 +3224,7 @@ struct in6_addr *src, *dst; break; } else match += 8; - return match; + return (match); } /* XXX: to be scope conscious */ @@ -2885,22 +3237,21 @@ in6_are_prefix_equal(p1, p2, len) /* sanity check */ if (0 > len || len > 128) { - log(LOG_ERR, "in6_are_prefix_equal: invalid prefix length(%d)\n", - len); - return(0); + log(LOG_ERR, "%s: invalid prefix length(%d)\n", __func__, len); + return (0); } bytelen = len / 8; bitlen = len % 8; if (bcmp(&p1->s6_addr, &p2->s6_addr, bytelen)) - return(0); - if (bitlen != 0 && + return (0); + if (bitlen != 0 && p1->s6_addr[bytelen] >> (8 - bitlen) != p2->s6_addr[bytelen] >> (8 - bitlen)) - return(0); + return (0); - return(1); + return (1); } void @@ -2913,12 +3264,11 @@ in6_prefixlen2mask(maskp, len) /* sanity check */ if (0 > len || len > 128) { - log(LOG_ERR, "in6_prefixlen2mask: invalid prefix length(%d)\n", - len); + log(LOG_ERR, "%s: invalid prefix length(%d)\n", __func__, len); return; } - bzero(maskp, sizeof(*maskp)); + bzero(maskp, sizeof (*maskp)); bytelen = len / 8; bitlen = len % 8; for (i = 0; i < bytelen; i++) @@ -2931,18 +3281,16 @@ in6_prefixlen2mask(maskp, len) * return the best address out of the same scope */ struct in6_ifaddr * -in6_ifawithscope( - struct ifnet *oifp, - struct in6_addr *dst) +in6_ifawithscope(struct ifnet *oifp, struct in6_addr *dst) { int dst_scope = in6_addrscope(dst), src_scope, best_scope = 0; int blen = -1; struct ifaddr *ifa; struct ifnet *ifp; struct in6_ifaddr *ifa_best = NULL; - + if (oifp == NULL) { - return(NULL); + return (NULL); } /* @@ -2960,8 +3308,7 @@ in6_ifawithscope( continue; ifnet_lock_shared(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) - { + TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { int tlen = -1, dscopecmp, bscopecmp, matchcmp; IFA_LOCK(ifa); @@ -3030,7 +3377,7 @@ in6_ifawithscope( * A deprecated address SHOULD NOT be used in new * communications if an alternate (non-deprecated) * address is available and has sufficient scope. - * RFC 2462, Section 5.5.4. + * RFC 4862, Section 5.5.4. */ if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) { @@ -3061,7 +3408,7 @@ in6_ifawithscope( */ if ((ifa_best->ia6_flags & IN6_IFF_DEPRECATED) && (((struct in6_ifaddr *)ifa)->ia6_flags & - IN6_IFF_DEPRECATED) == 0) + IN6_IFF_DEPRECATED) == 0) goto replace; /* @@ -3078,19 +3425,19 @@ in6_ifawithscope( ifat = (struct in6_ifaddr *)ifa; if ((ifa_best->ia6_flags & - (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) - == IN6_IFF_AUTOCONF && + (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) + == IN6_IFF_AUTOCONF && (ifat->ia6_flags & - (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) - == (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) { + (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) + == (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) { goto replace; } if ((ifa_best->ia6_flags & - (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) + (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) == (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY) && (ifat->ia6_flags & - (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) - == IN6_IFF_AUTOCONF) { + (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY)) + == IN6_IFF_AUTOCONF) { IFA_UNLOCK(ifa); continue; } @@ -3134,17 +3481,17 @@ in6_ifawithscope( * longest address match against dst is considered. * * The precise decision table is as follows: - * dscopecmp bscopecmp match bI oI | replace? - * N/A equal N/A Y N | No (1) - * N/A equal N/A N Y | Yes (2) - * N/A equal larger N/A | Yes (3) - * N/A equal !larger N/A | No (4) - * larger larger N/A N/A | No (5) - * larger smaller N/A N/A | Yes (6) - * smaller larger N/A N/A | Yes (7) - * smaller smaller N/A N/A | No (8) - * equal smaller N/A N/A | Yes (9) - * equal larger (already done at A above) + * dscopecmp bscopecmp match bI oI | replace? + * N/A equal N/A Y N | No (1) + * N/A equal N/A N Y | Yes (2) + * N/A equal larger N/A | Yes (3) + * N/A equal !larger N/A | No (4) + * larger larger N/A N/A | No (5) + * larger smaller N/A N/A | Yes (6) + * smaller larger N/A N/A | Yes (7) + * smaller smaller N/A N/A | No (8) + * equal smaller N/A N/A | Yes (9) + * equal larger (already done at A above) */ dscopecmp = IN6_ARE_SCOPE_CMP(src_scope, dst_scope); bscopecmp = IN6_ARE_SCOPE_CMP(src_scope, best_scope); @@ -3194,8 +3541,9 @@ in6_ifawithscope( replace: IFA_ADDREF_LOCKED(ifa); /* for ifa_best */ blen = tlen >= 0 ? tlen : - in6_matchlen(IFA_IN6(ifa), dst); - best_scope = in6_addrscope(&ifa2ia6(ifa)->ia_addr.sin6_addr); + in6_matchlen(IFA_IN6(ifa), dst); + best_scope = + in6_addrscope(&ifa2ia6(ifa)->ia_addr.sin6_addr); IFA_UNLOCK(ifa); if (ifa_best) IFA_REMREF(&ifa_best->ia_ifa); @@ -3225,7 +3573,7 @@ replace: IFA_UNLOCK(&ifa_best->ia_ifa); } - return(ifa_best); + return (ifa_best); } /* @@ -3233,9 +3581,7 @@ replace: * found, return the first valid address from designated IF. */ struct in6_ifaddr * -in6_ifawithifp( - struct ifnet *ifp, - struct in6_addr *dst) +in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst) { int dst_scope = in6_addrscope(dst), blen = -1, tlen; struct ifaddr *ifa; @@ -3251,8 +3597,7 @@ in6_ifawithifp( * If none, return one of global addresses assigned other ifs. */ ifnet_lock_shared(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) - { + TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { IFA_LOCK(ifa); if (ifa->ifa_addr->sa_family != AF_INET6) { IFA_UNLOCK(ifa); @@ -3291,7 +3636,8 @@ in6_ifawithifp( if (blen == -1) { IFA_UNLOCK(ifa); IFA_LOCK(&besta->ia_ifa); - blen = in6_matchlen(&besta->ia_addr.sin6_addr, dst); + blen = in6_matchlen( + &besta->ia_addr.sin6_addr, dst); IFA_UNLOCK(&besta->ia_ifa); IFA_LOCK(ifa); } @@ -3318,11 +3664,10 @@ in6_ifawithifp( ifnet_lock_done(ifp); if (dep[0] != NULL) IFA_REMREF(&dep[0]->ia_ifa); - return(besta); + return (besta); } - TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) - { + TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { IFA_LOCK(ifa); if (ifa->ifa_addr->sa_family != AF_INET6) { IFA_UNLOCK(ifa); @@ -3359,7 +3704,7 @@ in6_ifawithifp( IFA_REMREF(&dep[0]->ia_ifa); if (dep[1] != NULL) IFA_REMREF(&dep[1]->ia_ifa); - return (struct in6_ifaddr *)ifa; + return ((struct in6_ifaddr *)ifa); } ifnet_lock_done(ifp); @@ -3367,55 +3712,42 @@ in6_ifawithifp( if (dep[0]) { if (dep[1] != NULL) IFA_REMREF(&dep[1]->ia_ifa); - return dep[0]; + return (dep[0]); } if (dep[1]) - return dep[1]; + return (dep[1]); - return NULL; + return (NULL); } /* * perform DAD when interface becomes IFF_UP. */ -int -in6_if_up( - struct ifnet *ifp, - struct in6_aliasreq *ifra) +static void +in6_if_up_dad_start(struct ifnet *ifp) { struct ifaddr *ifa; - struct in6_ifaddr *ia; - int dad_delay; /* delay ticks before DAD output */ - int error; - - /* - * special cases, like 6to4, are handled in in6_ifattach - */ - error = in6_ifattach(ifp, NULL, ifra); - if (error != 0) - return error; - - dad_delay = 0; + /* start DAD on all the interface addresses */ ifnet_lock_exclusive(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) - { + TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { + struct in6_ifaddr *ia6; + IFA_LOCK_SPIN(ifa); if (ifa->ifa_addr->sa_family != AF_INET6) { IFA_UNLOCK(ifa); continue; } - ia = (struct in6_ifaddr *)ifa; - if (ia->ia6_flags & IN6_IFF_DADPROGRESS) { + ia6 = (struct in6_ifaddr *)ifa; + if (ia6->ia6_flags & IN6_IFF_DADPROGRESS) { + int delay = 0; /* delay ticks before DAD output */ IFA_UNLOCK(ifa); - nd6_dad_start(ifa, &dad_delay); + nd6_dad_start(ifa, &delay); } else { IFA_UNLOCK(ifa); } } ifnet_lock_done(ifp); - - return 0; } int @@ -3423,7 +3755,7 @@ in6if_do_dad( struct ifnet *ifp) { if ((ifp->if_flags & IFF_LOOPBACK) != 0) - return(0); + return (0); /* * If we are using the alternative neighbor discovery @@ -3447,7 +3779,7 @@ in6if_do_dad( * interfaces. We should even omit it, because loop-backed * NS would confuse the DAD procedure. */ - return(0); + return (0); default: /* * Our DAD routine requires the interface up and running. @@ -3460,9 +3792,9 @@ in6if_do_dad( */ if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) - return(0); + return (0); - return(1); + return (1); } } @@ -3501,8 +3833,7 @@ in6_setmaxmtu(void) * to the given interface. The length should be defined in "IPv6 over * xxx-link" document. Note that address architecture might also define * the length for a particular set of address prefixes, regardless of the - * link type. As clarified in rfc2462bis, those two definitions should be - * consistent, and those really are as of August 2004. + * link type. Also see RFC 4862 for additional background. */ int in6_if2idlen(struct ifnet *ifp) @@ -3557,7 +3888,8 @@ in6_if2idlen(struct ifnet *ifp) * we always use the constant, but make an explicit notice * indicating the "unknown" case. */ - printf("in6_if2idlen: unknown link type (%d)\n", ifp->if_type); + log(LOG_NOTICE, "%s: unknown link type (%d)\n", __func__, + ifp->if_type); return (64); } } @@ -3568,8 +3900,8 @@ in6_if2idlen(struct ifnet *ifp) void in6_sin6_2_sin(struct sockaddr_in *sin, struct sockaddr_in6 *sin6) { - bzero(sin, sizeof(*sin)); - sin->sin_len = sizeof(struct sockaddr_in); + bzero(sin, sizeof (*sin)); + sin->sin_len = sizeof (struct sockaddr_in); sin->sin_family = AF_INET; sin->sin_port = sin6->sin6_port; sin->sin_addr.s_addr = sin6->sin6_addr.s6_addr32[3]; @@ -3579,8 +3911,8 @@ in6_sin6_2_sin(struct sockaddr_in *sin, struct sockaddr_in6 *sin6) void in6_sin_2_v4mapsin6(struct sockaddr_in *sin, struct sockaddr_in6 *sin6) { - bzero(sin6, sizeof(*sin6)); - sin6->sin6_len = sizeof(struct sockaddr_in6); + bzero(sin6, sizeof (*sin6)); + sin6->sin6_len = sizeof (struct sockaddr_in6); sin6->sin6_family = AF_INET6; sin6->sin6_port = sin->sin_port; sin6->sin6_addr.s6_addr32[0] = 0; @@ -3588,8 +3920,7 @@ in6_sin_2_v4mapsin6(struct sockaddr_in *sin, struct sockaddr_in6 *sin6) if (sin->sin_addr.s_addr) { sin6->sin6_addr.s6_addr32[2] = IPV6_ADDR_INT32_SMP; sin6->sin6_addr.s6_addr32[3] = sin->sin_addr.s_addr; - } - else { + } else { sin6->sin6_addr.s6_addr32[2] = 0; sin6->sin6_addr.s6_addr32[3] = 0; } @@ -3618,62 +3949,61 @@ in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam) struct sockaddr_in *sin_p; struct sockaddr_in6 *sin6_p; - MALLOC(sin6_p, struct sockaddr_in6 *, sizeof *sin6_p, M_SONAME, - M_WAITOK); + MALLOC(sin6_p, struct sockaddr_in6 *, sizeof (*sin6_p), M_SONAME, + M_WAITOK); if (sin6_p == NULL) - return ENOBUFS; + return (ENOBUFS); sin_p = (struct sockaddr_in *)(void *)*nam; in6_sin_2_v4mapsin6(sin_p, sin6_p); FREE(*nam, M_SONAME); *nam = (struct sockaddr *)sin6_p; - return 0; + return (0); } /* * Posts in6_event_data message kernel events. * * To get the same size of kev_in6_data between ILP32 and LP64 data models - * we are using a special version of the in6_addrlifetime structure that - * uses only 32 bits fields to be compatible with Leopard, and that + * we are using a special version of the in6_addrlifetime structure that + * uses only 32 bits fields to be compatible with Leopard, and that * are large enough to span 68 years. */ void in6_post_msg(struct ifnet *ifp, u_int32_t event_code, struct in6_ifaddr *ifa) { - struct kev_msg ev_msg; - struct kev_in6_data in6_event_data; + struct kev_msg ev_msg; + struct kev_in6_data in6_event_data; + struct in6_addrlifetime ia6_lt; - bzero(&in6_event_data, sizeof(struct kev_in6_data)); - bzero(&ev_msg, sizeof(struct kev_msg)); - ev_msg.vendor_code = KEV_VENDOR_APPLE; - ev_msg.kev_class = KEV_NETWORK_CLASS; - ev_msg.kev_subclass = KEV_INET6_SUBCLASS; - ev_msg.event_code = event_code; + bzero(&in6_event_data, sizeof (struct kev_in6_data)); + bzero(&ev_msg, sizeof (struct kev_msg)); + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_INET6_SUBCLASS; + ev_msg.event_code = event_code; IFA_LOCK(&ifa->ia_ifa); - in6_event_data.ia_addr = ifa->ia_addr; - in6_event_data.ia_net = ifa->ia_net; - in6_event_data.ia_dstaddr = ifa->ia_dstaddr; - in6_event_data.ia_prefixmask = ifa->ia_prefixmask; - in6_event_data.ia_plen = ifa->ia_plen; - in6_event_data.ia6_flags = (u_int32_t)ifa->ia6_flags; - - in6_event_data.ia_lifetime.ia6t_expire = - ifa->ia6_lifetime.ia6t_expire; - in6_event_data.ia_lifetime.ia6t_preferred = - ifa->ia6_lifetime.ia6t_preferred; - in6_event_data.ia_lifetime.ia6t_vltime = - ifa->ia6_lifetime.ia6t_vltime; - in6_event_data.ia_lifetime.ia6t_pltime = - ifa->ia6_lifetime.ia6t_pltime; + in6_event_data.ia_addr = ifa->ia_addr; + in6_event_data.ia_net = ifa->ia_net; + in6_event_data.ia_dstaddr = ifa->ia_dstaddr; + in6_event_data.ia_prefixmask = ifa->ia_prefixmask; + in6_event_data.ia_plen = ifa->ia_plen; + in6_event_data.ia6_flags = (u_int32_t)ifa->ia6_flags; + + /* retrieve time as calendar time (last arg is 1) */ + in6ifa_getlifetime(ifa, &ia6_lt, 1); + in6_event_data.ia_lifetime.ia6t_expire = ia6_lt.ia6t_expire; + in6_event_data.ia_lifetime.ia6t_preferred = ia6_lt.ia6t_preferred; + in6_event_data.ia_lifetime.ia6t_vltime = ia6_lt.ia6t_vltime; + in6_event_data.ia_lifetime.ia6t_pltime = ia6_lt.ia6t_pltime; IFA_UNLOCK(&ifa->ia_ifa); if (ifp != NULL) { (void) strncpy(&in6_event_data.link_data.if_name[0], ifp->if_name, IFNAMSIZ); in6_event_data.link_data.if_family = ifp->if_family; - in6_event_data.link_data.if_unit = (u_int32_t) ifp->if_unit; + in6_event_data.link_data.if_unit = (u_int32_t)ifp->if_unit; } ev_msg.dv[0].data_ptr = &in6_event_data; @@ -3689,6 +4019,7 @@ in6_post_msg(struct ifnet *ifp, u_int32_t event_code, struct in6_ifaddr *ifa) void in6_ifaddr_init(void) { + in6_cga_init(); in6_multi_init(); PE_parse_boot_argn("ifa_debug", &in6ifa_debug, sizeof (in6ifa_debug)); @@ -3731,6 +4062,7 @@ in6_ifaddr_alloc(int how) ctrace_record(&in6ifa_dbg->in6ifa_alloc); } } + return (in6ifa); } @@ -3840,21 +4172,22 @@ in6_ifaddr_set_dadprogress(struct in6_ifaddr *ia) { uint32_t flags = IN6_IFF_TENTATIVE; uint32_t optdad = nd6_optimistic_dad; - + if (optdad && (ia->ia_ifp->if_eflags & IFEF_IPV6_ROUTER) == 0) { if ((optdad & ND6_OPTIMISTIC_DAD_LINKLOCAL) && IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) flags = IN6_IFF_OPTIMISTIC; else if ((optdad & ND6_OPTIMISTIC_DAD_AUTOCONF) && - (ia->ia6_flags & IN6_IFF_AUTOCONF)) { + (ia->ia6_flags & IN6_IFF_AUTOCONF)) { if (ia->ia6_flags & IN6_IFF_TEMPORARY) { if (optdad & ND6_OPTIMISTIC_DAD_TEMPORARY) flags = IN6_IFF_OPTIMISTIC; - } else { - flags = IN6_IFF_OPTIMISTIC; + } else if (ia->ia6_flags & IN6_IFF_SECURED) { + if (optdad & ND6_OPTIMISTIC_DAD_SECURED) + flags = IN6_IFF_OPTIMISTIC; } } else if ((optdad & ND6_OPTIMISTIC_DAD_DYNAMIC) && - (ia->ia6_flags & IN6_IFF_DYNAMIC)) { + (ia->ia6_flags & IN6_IFF_DYNAMIC)) { if (ia->ia6_flags & IN6_IFF_TEMPORARY) { if (optdad & ND6_OPTIMISTIC_DAD_TEMPORARY) flags = IN6_IFF_OPTIMISTIC; @@ -3863,8 +4196,196 @@ in6_ifaddr_set_dadprogress(struct in6_ifaddr *ia) } } } - + ia->ia6_flags &= ~(IN6_IFF_DUPLICATED | IN6_IFF_DADPROGRESS); ia->ia6_flags |= flags; } +/* + * Handle SIOCGASSOCIDS ioctl for PF_INET6 domain. + */ +static int +in6_getassocids(struct socket *so, uint32_t *cnt, user_addr_t aidp) +{ + struct in6pcb *in6p = sotoin6pcb(so); + associd_t aid; + + if (in6p == NULL || in6p->inp_state == INPCB_STATE_DEAD) + return (EINVAL); + + /* IN6PCB has no concept of association */ + aid = ASSOCID_ANY; + *cnt = 0; + + /* just asking how many there are? */ + if (aidp == USER_ADDR_NULL) + return (0); + + return (copyout(&aid, aidp, sizeof (aid))); +} + +/* + * Handle SIOCGCONNIDS ioctl for PF_INET6 domain. + */ +static int +in6_getconnids(struct socket *so, associd_t aid, uint32_t *cnt, + user_addr_t cidp) +{ + struct in6pcb *in6p = sotoin6pcb(so); + connid_t cid; + + if (in6p == NULL || in6p->inp_state == INPCB_STATE_DEAD) + return (EINVAL); + + if (aid != ASSOCID_ANY && aid != ASSOCID_ALL) + return (EINVAL); + + /* if connected, return 1 connection count */ + *cnt = ((so->so_state & SS_ISCONNECTED) ? 1 : 0); + + /* just asking how many there are? */ + if (cidp == USER_ADDR_NULL) + return (0); + + /* if IN6PCB is connected, assign it connid 1 */ + cid = ((*cnt != 0) ? 1 : CONNID_ANY); + + return (copyout(&cid, cidp, sizeof (cid))); +} + +/* + * Handle SIOCGCONNINFO ioctl for PF_INET6 domain. + */ +static int +in6_getconninfo(struct socket *so, connid_t cid, uint32_t *flags, + uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len, + user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type, + user_addr_t aux_data, uint32_t *aux_len) +{ +#pragma unused(aux_data) + struct in6pcb *in6p = sotoin6pcb(so); + struct sockaddr_in6 sin6; + struct ifnet *ifp = NULL; + int error = 0; + u_int32_t copy_len = 0; + + /* + * Don't test for INPCB_STATE_DEAD since this may be called + * after SOF_PCBCLEARING is set, e.g. after tcp_close(). + */ + if (in6p == NULL) { + error = EINVAL; + goto out; + } + + if (cid != CONNID_ANY && cid != CONNID_ALL && cid != 1) { + error = EINVAL; + goto out; + } + + ifp = in6p->in6p_last_outifp; + *ifindex = ((ifp != NULL) ? ifp->if_index : 0); + *soerror = so->so_error; + *flags = 0; + if (so->so_state & SS_ISCONNECTED) + *flags |= (CIF_CONNECTED | CIF_PREFERRED); + if (in6p->in6p_flags & INP_BOUND_IF) + *flags |= CIF_BOUND_IF; + if (!(in6p->in6p_flags & INP_IN6ADDR_ANY)) + *flags |= CIF_BOUND_IP; + if (!(in6p->in6p_flags & INP_ANONPORT)) + *flags |= CIF_BOUND_PORT; + + bzero(&sin6, sizeof (sin6)); + sin6.sin6_len = sizeof (sin6); + sin6.sin6_family = AF_INET6; + + /* source address and port */ + sin6.sin6_port = in6p->in6p_lport; + bcopy(&in6p->in6p_laddr, &sin6.sin6_addr, sizeof (struct in6_addr)); + if (*src_len == 0) { + *src_len = sin6.sin6_len; + } else { + if (src != USER_ADDR_NULL) { + copy_len = min(*src_len, sizeof (sin6)); + error = copyout(&sin6, src, copy_len); + if (error != 0) + goto out; + *src_len = copy_len; + } + } + + /* destination address and port */ + sin6.sin6_port = in6p->in6p_fport; + bcopy(&in6p->in6p_faddr, &sin6.sin6_addr, sizeof (struct in6_addr)); + if (*dst_len == 0) { + *dst_len = sin6.sin6_len; + } else { + if (dst != USER_ADDR_NULL) { + copy_len = min(*dst_len, sizeof (sin6)); + error = copyout(&sin6, dst, copy_len); + if (error != 0) + goto out; + *dst_len = copy_len; + } + } + + *aux_type = 0; + *aux_len = 0; + if (SOCK_PROTO(so) == IPPROTO_TCP) { + struct conninfo_tcp tcp_ci; + + *aux_type = CIAUX_TCP; + if (*aux_len == 0) { + *aux_len = sizeof (tcp_ci); + } else { + if (aux_data != USER_ADDR_NULL) { + copy_len = min(*aux_len, sizeof (tcp_ci)); + bzero(&tcp_ci, sizeof (tcp_ci)); + tcp_getconninfo(so, &tcp_ci); + error = copyout(&tcp_ci, aux_data, copy_len); + if (error != 0) + goto out; + *aux_len = copy_len; + } + } + } + +out: + return (error); +} + +/* + * 'u' group ioctls. + * + * The switch statement below does nothing at runtime, as it serves as a + * compile time check to ensure that all of the socket 'u' ioctls (those + * in the 'u' group going thru soo_ioctl) that are made available by the + * networking stack is unique. This works as long as this routine gets + * updated each time a new interface ioctl gets added. + * + * Any failures at compile time indicates duplicated ioctl values. + */ +static __attribute__((unused)) void +in6ioctl_cassert(void) +{ + /* + * This is equivalent to _CASSERT() and the compiler wouldn't + * generate any instructions, thus for compile time only. + */ + switch ((u_long)0) { + case 0: + + /* bsd/netinet6/in6_var.h */ + case SIOCGETSGCNT_IN6: + case SIOCGETMIFCNT_IN6_32: + case SIOCGETMIFCNT_IN6_64: + case SIOCAADDRCTL_POLICY: + case SIOCDADDRCTL_POLICY: + case SIOCDRADD_IN6_32: + case SIOCDRADD_IN6_64: + case SIOCDRDEL_IN6_32: + case SIOCDRDEL_IN6_64: + ; + } +} diff --git a/bsd/netinet6/in6.h b/bsd/netinet6/in6.h index 875af39a6..863d53935 100644 --- a/bsd/netinet6/in6.h +++ b/bsd/netinet6/in6.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2008-2012 Apple Inc. All rights reserved. + * Copyright (c) 2008-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,13 +22,10 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* $FreeBSD: src/sys/netinet6/in6.h,v 1.7.2.4 2001/07/04 09:45:23 ume Exp $ */ -/* $KAME: in6.h,v 1.89 2001/05/27 13:28:35 itojun Exp $ */ - /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. @@ -94,27 +91,24 @@ */ #ifndef __KAME_NETINET_IN_H_INCLUDED_ -#error "do not include netinet6/in6.h directly, include netinet/in.h. see RFC2553" +#error "do not include netinet6/in6.h directly, include netinet/in.h. " \ + " see RFC2553" #endif #ifndef _NETINET6_IN6_H_ -#define _NETINET6_IN6_H_ +#define _NETINET6_IN6_H_ #include #include - -#ifndef _SA_FAMILY_T -#define _SA_FAMILY_T -typedef __uint8_t sa_family_t; -#endif +#include /* * Identification of the network protocol stack * for *BSD-current/release: http://www.kame.net/dev/cvsweb.cgi/kame/COVERAGE * has the table of implementation/integration differences. */ -#define __KAME__ -#define __KAME_VERSION "2009/apple-darwin" +#define __KAME__ +#define __KAME_VERSION "2009/apple-darwin" #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) /* @@ -163,46 +157,46 @@ struct in6_addr { } __u6_addr; /* 128-bit IP6 address */ }; -#define s6_addr __u6_addr.__u6_addr8 -#ifdef KERNEL /*XXX nonstandard*/ -#define s6_addr8 __u6_addr.__u6_addr8 -#define s6_addr16 __u6_addr.__u6_addr16 -#define s6_addr32 __u6_addr.__u6_addr32 +#define s6_addr __u6_addr.__u6_addr8 +#ifdef KERNEL /* XXX nonstandard */ +#define s6_addr8 __u6_addr.__u6_addr8 +#define s6_addr16 __u6_addr.__u6_addr16 +#define s6_addr32 __u6_addr.__u6_addr32 #endif -#define INET6_ADDRSTRLEN 46 +#define INET6_ADDRSTRLEN 46 /* * Socket address for IPv6 */ #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define SIN6_LEN +#define SIN6_LEN #endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */ struct sockaddr_in6 { - __uint8_t sin6_len; /* length of this struct(sa_family_t)*/ + __uint8_t sin6_len; /* length of this struct(sa_family_t) */ sa_family_t sin6_family; /* AF_INET6 (sa_family_t) */ - in_port_t sin6_port; /* Transport layer port # (in_port_t)*/ + in_port_t sin6_port; /* Transport layer port # (in_port_t) */ __uint32_t sin6_flowinfo; /* IP6 flow information */ struct in6_addr sin6_addr; /* IP6 address */ __uint32_t sin6_scope_id; /* scope zone index */ }; -#ifdef KERNEL /*XXX nonstandard*/ +#ifdef KERNEL /* XXX nonstandard */ /* * Local definition for masks */ -#define IN6MASK0 {{{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }}} -#define IN6MASK7 {{{ 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ +#define IN6MASK0 {{{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }}} +#define IN6MASK7 {{{ 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}} -#define IN6MASK16 {{{ 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ +#define IN6MASK16 {{{ 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}} -#define IN6MASK32 {{{ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, \ +#define IN6MASK32 {{{ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}} -#define IN6MASK64 {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \ +#define IN6MASK64 {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}} -#define IN6MASK96 {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \ +#define IN6MASK96 {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }}} -#define IN6MASK128 {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \ +#define IN6MASK128 {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }}} #endif @@ -220,59 +214,60 @@ extern const struct in6_addr in6mask128; #define SIN6(s) ((struct sockaddr_in6 *)(void *)s) #define satosin6(sa) SIN6(sa) #define sin6tosa(sin6) ((struct sockaddr *)(void *)(sin6)) +#define SIN6IFSCOPE(s) SIN6(s) #endif /* KERNEL_PRIVATE */ -#ifdef KERNEL /*XXX nonstandard*/ +#ifdef KERNEL /* XXX nonstandard */ /* * Macros started with IPV6_ADDR is KAME local */ #if BYTE_ORDER == BIG_ENDIAN -#define IPV6_ADDR_INT32_ONE 1 -#define IPV6_ADDR_INT32_TWO 2 -#define IPV6_ADDR_INT32_MNL 0xff010000 -#define IPV6_ADDR_INT32_MLL 0xff020000 -#define IPV6_ADDR_INT32_SMP 0x0000ffff -#define IPV6_ADDR_INT16_ULL 0xfe80 -#define IPV6_ADDR_INT16_USL 0xfec0 -#define IPV6_ADDR_INT16_MLL 0xff02 +#define IPV6_ADDR_INT32_ONE 1 +#define IPV6_ADDR_INT32_TWO 2 +#define IPV6_ADDR_INT32_MNL 0xff010000 +#define IPV6_ADDR_INT32_MLL 0xff020000 +#define IPV6_ADDR_INT32_SMP 0x0000ffff +#define IPV6_ADDR_INT16_ULL 0xfe80 +#define IPV6_ADDR_INT16_USL 0xfec0 +#define IPV6_ADDR_INT16_MLL 0xff02 #elif BYTE_ORDER == LITTLE_ENDIAN -#define IPV6_ADDR_INT32_ONE 0x01000000 -#define IPV6_ADDR_INT32_TWO 0x02000000 -#define IPV6_ADDR_INT32_MNL 0x000001ff -#define IPV6_ADDR_INT32_MLL 0x000002ff -#define IPV6_ADDR_INT32_SMP 0xffff0000 -#define IPV6_ADDR_INT16_ULL 0x80fe -#define IPV6_ADDR_INT16_USL 0xc0fe -#define IPV6_ADDR_INT16_MLL 0x02ff +#define IPV6_ADDR_INT32_ONE 0x01000000 +#define IPV6_ADDR_INT32_TWO 0x02000000 +#define IPV6_ADDR_INT32_MNL 0x000001ff +#define IPV6_ADDR_INT32_MLL 0x000002ff +#define IPV6_ADDR_INT32_SMP 0xffff0000 +#define IPV6_ADDR_INT16_ULL 0x80fe +#define IPV6_ADDR_INT16_USL 0xc0fe +#define IPV6_ADDR_INT16_MLL 0x02ff #endif #endif /* * Definition of some useful macros to handle IP6 addresses */ -#define IN6ADDR_ANY_INIT \ +#define IN6ADDR_ANY_INIT \ {{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}} -#define IN6ADDR_LOOPBACK_INIT \ +#define IN6ADDR_LOOPBACK_INIT \ {{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}} #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define IN6ADDR_NODELOCAL_ALLNODES_INIT \ +#define IN6ADDR_NODELOCAL_ALLNODES_INIT \ {{{ 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}} -#define IN6ADDR_INTFACELOCAL_ALLNODES_INIT \ +#define IN6ADDR_INTFACELOCAL_ALLNODES_INIT \ {{{ 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}} -#define IN6ADDR_LINKLOCAL_ALLNODES_INIT \ +#define IN6ADDR_LINKLOCAL_ALLNODES_INIT \ {{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}} -#define IN6ADDR_LINKLOCAL_ALLROUTERS_INIT \ +#define IN6ADDR_LINKLOCAL_ALLROUTERS_INIT \ {{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }}} -#define IN6ADDR_LINKLOCAL_ALLV2ROUTERS_INIT \ +#define IN6ADDR_LINKLOCAL_ALLV2ROUTERS_INIT \ {{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16 }}} -#define IN6ADDR_V4MAPPED_INIT \ +#define IN6ADDR_V4MAPPED_INIT \ {{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }}} #endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */ @@ -293,58 +288,61 @@ extern const struct in6_addr in6addr_linklocal_allv2routers; * in ANSI standard. */ #ifdef KERNEL -#define IN6_ARE_ADDR_EQUAL(a, b) \ - (bcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], sizeof(struct in6_addr)) == 0) +#define IN6_ARE_ADDR_EQUAL(a, b) \ + (bcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], \ + sizeof (struct in6_addr)) == 0) #else #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define IN6_ARE_ADDR_EQUAL(a, b) \ - (memcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], sizeof(struct in6_addr)) == 0) +#define IN6_ARE_ADDR_EQUAL(a, b) \ + (memcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], sizeof (struct in6_addr)) \ + == 0) #endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */ #endif #ifdef KERNEL /* non standard */ /* see if two addresses are equal in a scope-conscious manner. */ -#define SA6_ARE_ADDR_EQUAL(a, b) \ +#define SA6_ARE_ADDR_EQUAL(a, b) \ (((a)->sin6_scope_id == 0 || (b)->sin6_scope_id == 0 || \ - ((a)->sin6_scope_id == (b)->sin6_scope_id)) && \ - (bcmp(&(a)->sin6_addr, &(b)->sin6_addr, sizeof(struct in6_addr)) == 0)) + ((a)->sin6_scope_id == (b)->sin6_scope_id)) && \ + (bcmp(&(a)->sin6_addr, &(b)->sin6_addr, sizeof (struct in6_addr)) == 0)) #endif /* * Unspecified */ -#define IN6_IS_ADDR_UNSPECIFIED(a) \ +#define IN6_IS_ADDR_UNSPECIFIED(a) \ ((*(const __uint32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \ - (*(const __uint32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \ - (*(const __uint32_t *)(const void *)(&(a)->s6_addr[8]) == 0) && \ - (*(const __uint32_t *)(const void *)(&(a)->s6_addr[12]) == 0)) + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \ + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[8]) == 0) && \ + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[12]) == 0)) /* * Loopback */ -#define IN6_IS_ADDR_LOOPBACK(a) \ +#define IN6_IS_ADDR_LOOPBACK(a) \ ((*(const __uint32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \ - (*(const __uint32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \ - (*(const __uint32_t *)(const void *)(&(a)->s6_addr[8]) == 0) && \ - (*(const __uint32_t *)(const void *)(&(a)->s6_addr[12]) == ntohl(1))) + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \ + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[8]) == 0) && \ + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[12]) == ntohl(1))) /* * IPv4 compatible */ -#define IN6_IS_ADDR_V4COMPAT(a) \ +#define IN6_IS_ADDR_V4COMPAT(a) \ ((*(const __uint32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \ - (*(const __uint32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \ - (*(const __uint32_t *)(const void *)(&(a)->s6_addr[8]) == 0) && \ - (*(const __uint32_t *)(const void *)(&(a)->s6_addr[12]) != 0) && \ - (*(const __uint32_t *)(const void *)(&(a)->s6_addr[12]) != ntohl(1))) + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \ + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[8]) == 0) && \ + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[12]) != 0) && \ + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[12]) != ntohl(1))) /* * Mapped */ -#define IN6_IS_ADDR_V4MAPPED(a) \ +#define IN6_IS_ADDR_V4MAPPED(a) \ ((*(const __uint32_t *)(const void *)(&(a)->s6_addr[0]) == 0) && \ - (*(const __uint32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \ - (*(const __uint32_t *)(const void *)(&(a)->s6_addr[8]) == ntohl(0x0000ffff))) + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[4]) == 0) && \ + (*(const __uint32_t *)(const void *)(&(a)->s6_addr[8]) == \ + ntohl(0x0000ffff))) /* * 6to4 @@ -355,125 +353,142 @@ extern const struct in6_addr in6addr_linklocal_allv2routers; * KAME Scope Values */ -#ifdef KERNEL /*XXX nonstandard*/ -#define IPV6_ADDR_SCOPE_NODELOCAL 0x01 +#ifdef KERNEL /* XXX nonstandard */ +#define IPV6_ADDR_SCOPE_NODELOCAL 0x01 #define IPV6_ADDR_SCOPE_INTFACELOCAL 0x01 -#define IPV6_ADDR_SCOPE_LINKLOCAL 0x02 -#define IPV6_ADDR_SCOPE_SITELOCAL 0x05 -#define IPV6_ADDR_SCOPE_ORGLOCAL 0x08 /* just used in this file */ -#define IPV6_ADDR_SCOPE_GLOBAL 0x0e +#define IPV6_ADDR_SCOPE_LINKLOCAL 0x02 +#define IPV6_ADDR_SCOPE_SITELOCAL 0x05 +#define IPV6_ADDR_SCOPE_ORGLOCAL 0x08 /* just used in this file */ +#define IPV6_ADDR_SCOPE_GLOBAL 0x0e #else -#define __IPV6_ADDR_SCOPE_NODELOCAL 0x01 -#define __IPV6_ADDR_SCOPE_INTFACELOCAL 0x01 -#define __IPV6_ADDR_SCOPE_LINKLOCAL 0x02 -#define __IPV6_ADDR_SCOPE_SITELOCAL 0x05 -#define __IPV6_ADDR_SCOPE_ORGLOCAL 0x08 /* just used in this file */ -#define __IPV6_ADDR_SCOPE_GLOBAL 0x0e +#define __IPV6_ADDR_SCOPE_NODELOCAL 0x01 +#define __IPV6_ADDR_SCOPE_INTFACELOCAL 0x01 +#define __IPV6_ADDR_SCOPE_LINKLOCAL 0x02 +#define __IPV6_ADDR_SCOPE_SITELOCAL 0x05 +#define __IPV6_ADDR_SCOPE_ORGLOCAL 0x08 /* just used in this file */ +#define __IPV6_ADDR_SCOPE_GLOBAL 0x0e #endif /* * Unicast Scope * Note that we must check topmost 10 bits only, not 16 bits (see RFC2373). */ -#define IN6_IS_ADDR_LINKLOCAL(a) \ +#define IN6_IS_ADDR_LINKLOCAL(a) \ (((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0x80)) -#define IN6_IS_ADDR_SITELOCAL(a) \ +#define IN6_IS_ADDR_SITELOCAL(a) \ (((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0xc0)) /* * Multicast */ -#define IN6_IS_ADDR_MULTICAST(a) ((a)->s6_addr[0] == 0xff) +#define IN6_IS_ADDR_MULTICAST(a) ((a)->s6_addr[0] == 0xff) /* * Unique Local IPv6 Unicast Addresses (per RFC 4193) */ -#define IN6_IS_ADDR_UNIQUE_LOCAL(a) (((a)->s6_addr[0] == 0xfc) || ((a)->s6_addr[0] == 0xfd)) +#define IN6_IS_ADDR_UNIQUE_LOCAL(a) \ + (((a)->s6_addr[0] == 0xfc) || ((a)->s6_addr[0] == 0xfd)) -#ifdef KERNEL /*XXX nonstandard*/ -#define IPV6_ADDR_MC_SCOPE(a) ((a)->s6_addr[1] & 0x0f) +#ifdef KERNEL /* XXX nonstandard */ +#define IPV6_ADDR_MC_SCOPE(a) ((a)->s6_addr[1] & 0x0f) #else -#define __IPV6_ADDR_MC_SCOPE(a) ((a)->s6_addr[1] & 0x0f) +#define __IPV6_ADDR_MC_SCOPE(a) ((a)->s6_addr[1] & 0x0f) #endif /* * Multicast Scope */ -#ifdef KERNEL /*refers nonstandard items */ -#define IN6_IS_ADDR_MC_NODELOCAL(a) \ +#ifdef KERNEL /* refers nonstandard items */ +#define IN6_IS_ADDR_MC_NODELOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ - (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_NODELOCAL)) + (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_NODELOCAL)) #define IN6_IS_ADDR_MC_INTFACELOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_INTFACELOCAL)) -#define IN6_IS_ADDR_MC_LINKLOCAL(a) \ +#define IN6_IS_ADDR_MC_LINKLOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ - (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_LINKLOCAL)) -#define IN6_IS_ADDR_MC_SITELOCAL(a) \ + (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_LINKLOCAL)) +#define IN6_IS_ADDR_MC_SITELOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ - (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_SITELOCAL)) -#define IN6_IS_ADDR_MC_ORGLOCAL(a) \ + (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_SITELOCAL)) +#define IN6_IS_ADDR_MC_ORGLOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ - (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_ORGLOCAL)) -#define IN6_IS_ADDR_MC_GLOBAL(a) \ + (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_ORGLOCAL)) +#define IN6_IS_ADDR_MC_GLOBAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ - (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_GLOBAL)) + (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_GLOBAL)) #else -#define IN6_IS_ADDR_MC_NODELOCAL(a) \ +#define IN6_IS_ADDR_MC_NODELOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ - (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_NODELOCAL)) -#define IN6_IS_ADDR_MC_LINKLOCAL(a) \ + (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_NODELOCAL)) +#define IN6_IS_ADDR_MC_LINKLOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ - (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_LINKLOCAL)) -#define IN6_IS_ADDR_MC_SITELOCAL(a) \ + (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_LINKLOCAL)) +#define IN6_IS_ADDR_MC_SITELOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ - (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_SITELOCAL)) -#define IN6_IS_ADDR_MC_ORGLOCAL(a) \ + (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_SITELOCAL)) +#define IN6_IS_ADDR_MC_ORGLOCAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ - (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_ORGLOCAL)) -#define IN6_IS_ADDR_MC_GLOBAL(a) \ + (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_ORGLOCAL)) +#define IN6_IS_ADDR_MC_GLOBAL(a) \ (IN6_IS_ADDR_MULTICAST(a) && \ - (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_GLOBAL)) + (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_GLOBAL)) #endif -#ifdef KERNEL /*nonstandard*/ +#ifdef KERNEL /* nonstandard */ /* * KAME Scope */ -#define IN6_IS_SCOPE_LINKLOCAL(a) \ +#define IN6_IS_SCOPE_LINKLOCAL(a) \ ((IN6_IS_ADDR_LINKLOCAL(a)) || \ - (IN6_IS_ADDR_MC_LINKLOCAL(a))) + (IN6_IS_ADDR_MC_LINKLOCAL(a))) -#define IN6_IS_SCOPE_EMBED(a) \ +#define IN6_IS_SCOPE_EMBED(a) \ ((IN6_IS_ADDR_LINKLOCAL(a)) || \ (IN6_IS_ADDR_MC_LINKLOCAL(a)) || \ (IN6_IS_ADDR_MC_INTFACELOCAL(a))) -#define IFA6_IS_DEPRECATED(a) \ - ((a)->ia6_lifetime.ia6t_preferred != 0 && \ - (a)->ia6_lifetime.ia6t_preferred < timenow.tv_sec) -#define IFA6_IS_INVALID(a) \ - ((a)->ia6_lifetime.ia6t_expire != 0 && \ - (a)->ia6_lifetime.ia6t_expire < timenow.tv_sec) +#define IFA6_IS_DEPRECATED(a, t) \ + ((a)->ia6_lifetime.ia6ti_preferred != 0 && \ + (a)->ia6_lifetime.ia6ti_preferred < (t)) +#define IFA6_IS_INVALID(a, t) \ + ((a)->ia6_lifetime.ia6ti_expire != 0 && \ + (a)->ia6_lifetime.ia6ti_expire < (t)) #endif /* KERNEL */ +#ifdef PRIVATE +#ifndef KERNEL +/* Private declaration for user-space (needed by ip6_mroute.h) */ +struct route_in6 { +#else /* KERNEL */ +struct route_in6_old { +#endif /* KERNEL */ + void *ro_rt; + uint32_t ro_flags; + struct sockaddr_in6 ro_dst; +}; +#endif /* PRIVATE */ + +#ifdef BSD_KERNEL_PRIVATE /* * IP6 route structure + * + * A route consists of a destination address and a reference + * to a routing entry. These are often held by protocols + * in their control blocks, e.g. inpcb. */ -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#ifdef PRIVATE struct route_in6 { /* - * N.B: struct route_in6 must begin with ro_rt and ro_flags + * N.B: struct route_in6 must begin with ro_{rt,srcia,flags} * because the code does some casts of a 'struct route_in6 *' * to a 'struct route *'. */ struct rtentry *ro_rt; - __uint32_t ro_flags; /* route flags */ + struct ifaddr *ro_srcia; + uint32_t ro_flags; /* route flags */ struct sockaddr_in6 ro_dst; }; -#endif /* PRIVATE */ -#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */ +#endif /* BSD_KERNEL_PRIVATE */ /* * Options for use with [gs]etsockopt at the IPV6 level. @@ -482,7 +497,7 @@ struct route_in6 { /* no hdrincl */ #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) /* - * RFC 3542 define the following socket options in a manner incompatible + * RFC 3542 define the following socket options in a manner incompatible * with RFC 2292: * IPV6_PKTINFO * IPV6_HOPLIMIT @@ -490,152 +505,161 @@ struct route_in6 { * IPV6_HOPOPTS * IPV6_DSTOPTS * IPV6_RTHDR - * + * * To use the new IPv6 Sockets options introduced by RFC 3542 - * the constant __APPLE_USE_RFC_3542 must be defined before + * the constant __APPLE_USE_RFC_3542 must be defined before * including * * To use the old IPv6 Sockets options from RFC 2292 * the constant __APPLE_USE_RFC_2292 must be defined before * including * - * Note that eventually RFC 3542 is going to be the + * Note that eventually RFC 3542 is going to be the * default and RFC 2292 will be obsolete. */ -#ifdef XNU_KERNEL_PRIVATE -#define __APPLE_USE_RFC_3542 1 -#endif /* XNU_KERNEL_PRIVATE */ +#ifdef BSD_KERNEL_PRIVATE +#define __APPLE_USE_RFC_3542 1 +#endif /* BSD_KERNEL_PRIVATE */ #if defined(__APPLE_USE_RFC_3542) && defined(__APPLE_USE_RFC_2292) #error "__APPLE_USE_RFC_3542 and __APPLE_USE_RFC_2292 cannot be both defined" #endif #if 0 /* the followings are relic in IPv4 and hence are disabled */ -#define IPV6_OPTIONS 1 /* buf/ip6_opts; set/get IP6 options */ -#define IPV6_RECVOPTS 5 /* bool; receive all IP6 opts w/dgram */ -#define IPV6_RECVRETOPTS 6 /* bool; receive IP6 opts for response */ -#define IPV6_RECVDSTADDR 7 /* bool; receive IP6 dst addr w/dgram */ -#define IPV6_RETOPTS 8 /* ip6_opts; set/get IP6 options */ +#define IPV6_OPTIONS 1 /* buf/ip6_opts; set/get IP6 options */ +#define IPV6_RECVOPTS 5 /* bool; receive all IP6 opts w/dgram */ +#define IPV6_RECVRETOPTS 6 /* bool; receive IP6 opts for response */ +#define IPV6_RECVDSTADDR 7 /* bool; receive IP6 dst addr w/dgram */ +#define IPV6_RETOPTS 8 /* ip6_opts; set/get IP6 options */ #endif /* 0 */ -#define IPV6_SOCKOPT_RESERVED1 3 /* reserved for future use */ +#define IPV6_SOCKOPT_RESERVED1 3 /* reserved for future use */ #endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */ -#define IPV6_UNICAST_HOPS 4 /* int; IP6 hops */ -#define IPV6_MULTICAST_IF 9 /* __uint8_t; set/get IP6 multicast i/f */ -#define IPV6_MULTICAST_HOPS 10 /* __uint8_t; set/get IP6 multicast hops */ -#define IPV6_MULTICAST_LOOP 11 /* __uint8_t; set/get IP6 mcast loopback */ -#define IPV6_JOIN_GROUP 12 /* ip6_mreq; join a group membership */ -#define IPV6_LEAVE_GROUP 13 /* ip6_mreq; leave a group membership */ +#define IPV6_UNICAST_HOPS 4 /* int; IP6 hops */ +#define IPV6_MULTICAST_IF 9 /* __uint8_t; set/get IP6 multicast i/f */ +#define IPV6_MULTICAST_HOPS 10 /* __uint8_t; set/get IP6 multicast hops */ +#define IPV6_MULTICAST_LOOP 11 /* __uint8_t; set/get IP6 mcast loopback */ +#define IPV6_JOIN_GROUP 12 /* ip6_mreq; join a group membership */ +#define IPV6_LEAVE_GROUP 13 /* ip6_mreq; leave a group membership */ #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define IPV6_PORTRANGE 14 /* int; range to choose for unspec port */ -#define ICMP6_FILTER 18 /* icmp6_filter; icmp6 filter */ -#define IPV6_2292PKTINFO 19 /* bool; send/recv if, src/dst addr */ -#define IPV6_2292HOPLIMIT 20 /* bool; hop limit */ -#define IPV6_2292NEXTHOP 21 /* bool; next hop addr */ -#define IPV6_2292HOPOPTS 22 /* bool; hop-by-hop option */ -#define IPV6_2292DSTOPTS 23 /* bool; destinaion option */ -#define IPV6_2292RTHDR 24 /* ip6_rthdr: routing header */ -#define IPV6_2292PKTOPTIONS 25 /* buf/cmsghdr; set/get IPv6 options */ - /* obsoleted by RFC3542 */ +#define IPV6_PORTRANGE 14 /* int; range to choose for unspec port */ +#define ICMP6_FILTER 18 /* icmp6_filter; icmp6 filter */ +#define IPV6_2292PKTINFO 19 /* bool; send/recv if, src/dst addr */ +#define IPV6_2292HOPLIMIT 20 /* bool; hop limit */ +#define IPV6_2292NEXTHOP 21 /* bool; next hop addr */ +#define IPV6_2292HOPOPTS 22 /* bool; hop-by-hop option */ +#define IPV6_2292DSTOPTS 23 /* bool; destinaion option */ +#define IPV6_2292RTHDR 24 /* ip6_rthdr: routing header */ + +/* buf/cmsghdr; set/get IPv6 options [obsoleted by RFC3542] */ +#define IPV6_2292PKTOPTIONS 25 #ifdef __APPLE_USE_RFC_2292 -#define IPV6_PKTINFO IPV6_2292PKTINFO -#define IPV6_HOPLIMIT IPV6_2292HOPLIMIT -#define IPV6_NEXTHOP IPV6_2292NEXTHOP -#define IPV6_HOPOPTS IPV6_2292HOPOPTS -#define IPV6_DSTOPTS IPV6_2292DSTOPTS -#define IPV6_RTHDR IPV6_2292RTHDR -#define IPV6_PKTOPTIONS IPV6_2292PKTOPTIONS +#define IPV6_PKTINFO IPV6_2292PKTINFO +#define IPV6_HOPLIMIT IPV6_2292HOPLIMIT +#define IPV6_NEXTHOP IPV6_2292NEXTHOP +#define IPV6_HOPOPTS IPV6_2292HOPOPTS +#define IPV6_DSTOPTS IPV6_2292DSTOPTS +#define IPV6_RTHDR IPV6_2292RTHDR +#define IPV6_PKTOPTIONS IPV6_2292PKTOPTIONS #endif /* __APPLE_USE_RFC_2292 */ -#define IPV6_CHECKSUM 26 /* int; checksum offset for raw socket */ +#define IPV6_CHECKSUM 26 /* int; checksum offset for raw socket */ #endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */ -#define IPV6_V6ONLY 27 /* bool; only bind INET6 at wildcard bind */ +#define IPV6_V6ONLY 27 /* bool; only bind INET6 at wildcard bind */ #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) #ifndef KERNEL -#define IPV6_BINDV6ONLY IPV6_V6ONLY +#define IPV6_BINDV6ONLY IPV6_V6ONLY #endif /* KERNEL */ -#if 1 /*IPSEC*/ -#define IPV6_IPSEC_POLICY 28 /* struct; get/set security policy */ +#if 1 /* IPSEC */ +#define IPV6_IPSEC_POLICY 28 /* struct; get/set security policy */ #endif /* 1 */ -#define IPV6_FAITH 29 /* deprecated */ - -#if 1 /*IPV6FIREWALL*/ -#define IPV6_FW_ADD 30 /* add a firewall rule to chain */ -#define IPV6_FW_DEL 31 /* delete a firewall rule from chain */ -#define IPV6_FW_FLUSH 32 /* flush firewall rule chain */ -#define IPV6_FW_ZERO 33 /* clear single/all firewall counter(s) */ -#define IPV6_FW_GET 34 /* get entire firewall rule chain */ +#define IPV6_FAITH 29 /* deprecated */ + +#if 1 /* IPV6FIREWALL */ +#define IPV6_FW_ADD 30 /* add a firewall rule to chain */ +#define IPV6_FW_DEL 31 /* delete a firewall rule from chain */ +#define IPV6_FW_FLUSH 32 /* flush firewall rule chain */ +#define IPV6_FW_ZERO 33 /* clear single/all firewall counter(s) */ +#define IPV6_FW_GET 34 /* get entire firewall rule chain */ #endif /* 1 */ -/* APPLE: NOTE the value of those 2 options is kept unchanged from - * previous version of darwin/OS X for binary compatibility reasons - * and differ from FreeBSD (values 57 and 61). See below. +/* + * APPLE: NOTE the value of those 2 options is kept unchanged from + * previous version of darwin/OS X for binary compatibility reasons + * and differ from FreeBSD (values 57 and 61). See below. */ -#define IPV6_RECVTCLASS 35 /* bool; recv traffic class values */ -#define IPV6_TCLASS 36 /* int; send traffic class value */ +#define IPV6_RECVTCLASS 35 /* bool; recv traffic class values */ +#define IPV6_TCLASS 36 /* int; send traffic class value */ #ifdef __APPLE_USE_RFC_3542 /* new socket options introduced in RFC3542 */ -#define IPV6_RTHDRDSTOPTS 57 /* ip6_dest; send dst option before rthdr - * APPLE: Value purposely different than FreeBSD (35) to avoid - * collision with definition of IPV6_RECVTCLASS in previous - * darwin implementations */ - -#define IPV6_RECVPKTINFO 61 /* bool; recv if, dst addr - * APPLE: Value purposely different than FreeBSD (36) to avoid - * collision with definition of IPV6_TCLASS in previous - * darwin implementations */ - -#define IPV6_RECVHOPLIMIT 37 /* bool; recv hop limit */ -#define IPV6_RECVRTHDR 38 /* bool; recv routing header */ -#define IPV6_RECVHOPOPTS 39 /* bool; recv hop-by-hop option */ -#define IPV6_RECVDSTOPTS 40 /* bool; recv dst option after rthdr */ +/* + * ip6_dest; send dst option before rthdr + * APPLE: Value purposely different than FreeBSD (35) to avoid + * collision with definition of IPV6_RECVTCLASS in previous + * darwin implementations + */ +#define IPV6_RTHDRDSTOPTS 57 + +/* + * bool; recv if, dst addr + * APPLE: Value purposely different than FreeBSD(36) to avoid + * collision with definition of IPV6_TCLASS in previous + * darwin implementations + */ +#define IPV6_RECVPKTINFO 61 + +#define IPV6_RECVHOPLIMIT 37 /* bool; recv hop limit */ +#define IPV6_RECVRTHDR 38 /* bool; recv routing header */ +#define IPV6_RECVHOPOPTS 39 /* bool; recv hop-by-hop option */ +#define IPV6_RECVDSTOPTS 40 /* bool; recv dst option after rthdr */ #ifdef KERNEL -#define IPV6_RECVRTHDRDSTOPTS 41 /* bool; recv dst option before rthdr */ +#define IPV6_RECVRTHDRDSTOPTS 41 /* bool; recv dst option before rthdr */ #endif -#define IPV6_USE_MIN_MTU 42 /* bool; send packets at the minimum MTU */ -#define IPV6_RECVPATHMTU 43 /* bool; notify an according MTU */ +#define IPV6_USE_MIN_MTU 42 /* bool; send packets at the minimum MTU */ +#define IPV6_RECVPATHMTU 43 /* bool; notify an according MTU */ + +/* + * mtuinfo; get the current path MTU (sopt), 4 bytes int; + * MTU notification (cmsg) + */ +#define IPV6_PATHMTU 44 -#define IPV6_PATHMTU 44 /* mtuinfo; get the current path MTU (sopt), - 4 bytes int; MTU notification (cmsg) */ -#if 0 /*obsoleted during 2292bis -> 3542*/ -#define IPV6_REACHCONF 45 /* no data; ND reachability confirm - (cmsg only/not in of RFC3542) */ +#if 0 /* obsoleted during 2292bis -> 3542 */ +/* no data; ND reachability confirm (cmsg only/not in of RFC3542) */ +#define IPV6_REACHCONF 45 #endif /* more new socket options introduced in RFC3542 */ -#define IPV6_3542PKTINFO 46 /* in6_pktinfo; send if, src addr */ -#define IPV6_3542HOPLIMIT 47 /* int; send hop limit */ -#define IPV6_3542NEXTHOP 48 /* sockaddr; next hop addr */ -#define IPV6_3542HOPOPTS 49 /* ip6_hbh; send hop-by-hop option */ -#define IPV6_3542DSTOPTS 50 /* ip6_dest; send dst option befor rthdr */ -#define IPV6_3542RTHDR 51 /* ip6_rthdr; send routing header */ +#define IPV6_3542PKTINFO 46 /* in6_pktinfo; send if, src addr */ +#define IPV6_3542HOPLIMIT 47 /* int; send hop limit */ +#define IPV6_3542NEXTHOP 48 /* sockaddr; next hop addr */ +#define IPV6_3542HOPOPTS 49 /* ip6_hbh; send hop-by-hop option */ +#define IPV6_3542DSTOPTS 50 /* ip6_dest; send dst option befor rthdr */ +#define IPV6_3542RTHDR 51 /* ip6_rthdr; send routing header */ -#define IPV6_PKTINFO IPV6_3542PKTINFO -#define IPV6_HOPLIMIT IPV6_3542HOPLIMIT -#define IPV6_NEXTHOP IPV6_3542NEXTHOP -#define IPV6_HOPOPTS IPV6_3542HOPOPTS -#define IPV6_DSTOPTS IPV6_3542DSTOPTS -#define IPV6_RTHDR IPV6_3542RTHDR +#define IPV6_PKTINFO IPV6_3542PKTINFO +#define IPV6_HOPLIMIT IPV6_3542HOPLIMIT +#define IPV6_NEXTHOP IPV6_3542NEXTHOP +#define IPV6_HOPOPTS IPV6_3542HOPOPTS +#define IPV6_DSTOPTS IPV6_3542DSTOPTS +#define IPV6_RTHDR IPV6_3542RTHDR -#define IPV6_AUTOFLOWLABEL 59 /* bool; attach flowlabel automagically */ +#define IPV6_AUTOFLOWLABEL 59 /* bool; attach flowlabel automagically */ -#define IPV6_DONTFRAG 62 /* bool; disable IPv6 fragmentation */ +#define IPV6_DONTFRAG 62 /* bool; disable IPv6 fragmentation */ -#define IPV6_PREFER_TEMPADDR 63 /* int; prefer temporary addresses as - * the source address. - */ +/* int; prefer temporary addresses as the source address. */ +#define IPV6_PREFER_TEMPADDR 63 /* * The following option is private; do not use it from user applications. * It is deliberately defined to the same value as IP_MSFILTER. */ -#define IPV6_MSFILTER 74 /* struct __msfilterreq; - * set/get multicast source filter list. - */ +#define IPV6_MSFILTER 74 /* struct __msfilterreq; */ #endif /* __APPLE_USE_RFC_3542 */ #define IPV6_BOUND_IF 125 /* int; set/get bound interface */ @@ -647,15 +671,15 @@ struct route_in6 { /* to define items, should talk with KAME guys first, for *BSD compatibility */ -#define IPV6_RTHDR_LOOSE 0 /* this hop need not be a neighbor. XXX old spec */ -#define IPV6_RTHDR_STRICT 1 /* this hop must be a neighbor. XXX old spec */ -#define IPV6_RTHDR_TYPE_0 0 /* IPv6 routing header type 0 */ +#define IPV6_RTHDR_LOOSE 0 /* this hop need not be a neighbor. */ +#define IPV6_RTHDR_STRICT 1 /* this hop must be a neighbor. */ +#define IPV6_RTHDR_TYPE_0 0 /* IPv6 routing header type 0 */ /* * Defaults and limits for options */ -#define IPV6_DEFAULT_MULTICAST_HOPS 1 /* normally limit m'casts to 1 hop */ -#define IPV6_DEFAULT_MULTICAST_LOOP 1 /* normally hear sends if a member */ +#define IPV6_DEFAULT_MULTICAST_HOPS 1 /* normally limit m'casts to 1 hop */ +#define IPV6_DEFAULT_MULTICAST_LOOP 1 /* normally hear sends if a member */ /* * The im6o_membership vector for each socket is now dynamically allocated at @@ -710,67 +734,67 @@ struct ip6_mtuinfo { * Third level is protocol number. * Fourth level is desired variable within that protocol. */ -#define IPV6PROTO_MAXID (IPPROTO_PIM + 1) /* don't list to IPV6PROTO_MAX */ +#define IPV6PROTO_MAXID (IPPROTO_PIM + 1) /* don't list to IPV6PROTO_MAX */ /* * Names for IP sysctl objects */ -#define IPV6CTL_FORWARDING 1 /* act as router */ -#define IPV6CTL_SENDREDIRECTS 2 /* may send redirects when forwarding*/ -#define IPV6CTL_DEFHLIM 3 /* default Hop-Limit */ +#define IPV6CTL_FORWARDING 1 /* act as router */ +#define IPV6CTL_SENDREDIRECTS 2 /* may send redirects when forwarding */ +#define IPV6CTL_DEFHLIM 3 /* default Hop-Limit */ #ifdef notyet -#define IPV6CTL_DEFMTU 4 /* default MTU */ +#define IPV6CTL_DEFMTU 4 /* default MTU */ #endif -#define IPV6CTL_FORWSRCRT 5 /* forward source-routed dgrams */ -#define IPV6CTL_STATS 6 /* stats */ -#define IPV6CTL_MRTSTATS 7 /* multicast forwarding stats */ -#define IPV6CTL_MRTPROTO 8 /* multicast routing protocol */ -#define IPV6CTL_MAXFRAGPACKETS 9 /* max packets reassembly queue */ -#define IPV6CTL_SOURCECHECK 10 /* verify source route and intf */ -#define IPV6CTL_SOURCECHECK_LOGINT 11 /* minimume logging interval */ -#define IPV6CTL_ACCEPT_RTADV 12 -#define IPV6CTL_KEEPFAITH 13 /* deprecated */ -#define IPV6CTL_LOG_INTERVAL 14 -#define IPV6CTL_HDRNESTLIMIT 15 -#define IPV6CTL_DAD_COUNT 16 -#define IPV6CTL_AUTO_FLOWLABEL 17 -#define IPV6CTL_DEFMCASTHLIM 18 -#define IPV6CTL_GIF_HLIM 19 /* default HLIM for gif encap packet */ -#define IPV6CTL_KAME_VERSION 20 -#define IPV6CTL_USE_DEPRECATED 21 /* use deprecated addr (RFC2462 5.5.4) */ -#define IPV6CTL_RR_PRUNE 22 /* walk timer for router renumbering */ -#if 0 /*obsolete*/ -#define IPV6CTL_MAPPED_ADDR 23 +#define IPV6CTL_FORWSRCRT 5 /* forward source-routed dgrams */ +#define IPV6CTL_STATS 6 /* stats */ +#define IPV6CTL_MRTSTATS 7 /* multicast forwarding stats */ +#define IPV6CTL_MRTPROTO 8 /* multicast routing protocol */ +#define IPV6CTL_MAXFRAGPACKETS 9 /* max packets reassembly queue */ +#define IPV6CTL_SOURCECHECK 10 /* verify source route and intf */ +#define IPV6CTL_SOURCECHECK_LOGINT 11 /* minimume logging interval */ +#define IPV6CTL_ACCEPT_RTADV 12 +#define IPV6CTL_KEEPFAITH 13 /* deprecated */ +#define IPV6CTL_LOG_INTERVAL 14 +#define IPV6CTL_HDRNESTLIMIT 15 +#define IPV6CTL_DAD_COUNT 16 +#define IPV6CTL_AUTO_FLOWLABEL 17 +#define IPV6CTL_DEFMCASTHLIM 18 +#define IPV6CTL_GIF_HLIM 19 /* default HLIM for gif encap packet */ +#define IPV6CTL_KAME_VERSION 20 +#define IPV6CTL_USE_DEPRECATED 21 /* use deprec addr (RFC2462 5.5.4) */ +#define IPV6CTL_RR_PRUNE 22 /* walk timer for router renumbering */ +#if 0 /* obsolete */ +#define IPV6CTL_MAPPED_ADDR 23 #endif -#define IPV6CTL_V6ONLY 24 -#define IPV6CTL_RTEXPIRE 25 /* cloned route expiration time */ -#define IPV6CTL_RTMINEXPIRE 26 /* min value for expiration time */ -#define IPV6CTL_RTMAXCACHE 27 /* trigger level for dynamic expire */ - -#define IPV6CTL_USETEMPADDR 32 /* use temporary addresses [RFC 4941] */ -#define IPV6CTL_TEMPPLTIME 33 /* preferred lifetime for tmpaddrs */ -#define IPV6CTL_TEMPVLTIME 34 /* valid lifetime for tmpaddrs */ -#define IPV6CTL_AUTO_LINKLOCAL 35 /* automatic link-local addr assign */ -#define IPV6CTL_RIP6STATS 36 /* raw_ip6 stats */ -#define IPV6CTL_PREFER_TEMPADDR 37 /* prefer temporary addr as src */ -#define IPV6CTL_ADDRCTLPOLICY 38 /* get/set address selection policy */ -#define IPV6CTL_USE_DEFAULTZONE 39 /* use default scope zone */ - -#define IPV6CTL_MAXFRAGS 41 /* max fragments */ -#define IPV6CTL_MCAST_PMTU 44 /* enable pMTU discovery for multicast? */ - -#define IPV6CTL_NEIGHBORGCTHRESH 46 -#define IPV6CTL_MAXIFPREFIXES 47 -#define IPV6CTL_MAXIFDEFROUTERS 48 -#define IPV6CTL_MAXDYNROUTES 49 +#define IPV6CTL_V6ONLY 24 +#define IPV6CTL_RTEXPIRE 25 /* cloned route expiration time */ +#define IPV6CTL_RTMINEXPIRE 26 /* min value for expiration time */ +#define IPV6CTL_RTMAXCACHE 27 /* trigger level for dynamic expire */ + +#define IPV6CTL_USETEMPADDR 32 /* use temporary addresses [RFC 4941] */ +#define IPV6CTL_TEMPPLTIME 33 /* preferred lifetime for tmpaddrs */ +#define IPV6CTL_TEMPVLTIME 34 /* valid lifetime for tmpaddrs */ +#define IPV6CTL_AUTO_LINKLOCAL 35 /* automatic link-local addr assign */ +#define IPV6CTL_RIP6STATS 36 /* raw_ip6 stats */ +#define IPV6CTL_PREFER_TEMPADDR 37 /* prefer temporary addr as src */ +#define IPV6CTL_ADDRCTLPOLICY 38 /* get/set address selection policy */ +#define IPV6CTL_USE_DEFAULTZONE 39 /* use default scope zone */ + +#define IPV6CTL_MAXFRAGS 41 /* max fragments */ +#define IPV6CTL_MCAST_PMTU 44 /* enable pMTU discovery for mcast? */ + +#define IPV6CTL_NEIGHBORGCTHRESH 46 +#define IPV6CTL_MAXIFPREFIXES 47 +#define IPV6CTL_MAXIFDEFROUTERS 48 +#define IPV6CTL_MAXDYNROUTES 49 #define ICMPV6CTL_ND6_ONLINKNSRFC4861 50 /* New entries should be added here from current IPV6CTL_MAXID value. */ /* to define items, should talk with KAME guys first, for *BSD compatibility */ -#define IPV6CTL_MAXID 51 +#define IPV6CTL_MAXID 51 -#ifdef KERNEL_PRIVATE -#define CTL_IPV6PROTO_NAMES { \ +#ifdef BSD_KERNEL_PRIVATE +#define CTL_IPV6PROTO_NAMES { \ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ { 0, 0 }, \ { "tcp6", CTLTYPE_NODE }, \ @@ -828,16 +852,17 @@ struct mbuf; struct ifnet; struct in6_aliasreq; -#define in6_cksum(m, n, o, l) inet6_cksum(m, n, o, l) +extern uint16_t in6_pseudo(const struct in6_addr *, const struct in6_addr *, + uint32_t); +extern u_int16_t inet6_cksum(struct mbuf *, uint32_t, uint32_t, uint32_t); -extern u_int16_t inet6_cksum(struct mbuf *m, unsigned int proto, - unsigned int offset, unsigned int transport_len); +#define in6_cksum(_m, _n, _o, _l) \ + inet6_cksum(_m, _n, _o, _l) -extern int in6_localaddr(struct in6_addr *); extern int in6_addrscope(struct in6_addr *); extern struct in6_ifaddr *in6_ifawithscope(struct ifnet *, struct in6_addr *); extern struct in6_ifaddr *in6_ifawithifp(struct ifnet *, struct in6_addr *); -extern int in6_if_up(struct ifnet *, struct in6_aliasreq *); + struct sockaddr; extern void in6_sin6_2_sin(struct sockaddr_in *sin, struct sockaddr_in6 *sin6); @@ -845,60 +870,20 @@ extern void in6_sin_2_v4mapsin6(struct sockaddr_in *sin, struct sockaddr_in6 *sin6); extern void in6_sin6_2_sin_in_sock(struct sockaddr *nam); extern int in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam); -extern void in6_delayed_cksum(struct mbuf *, u_int16_t); -extern int in6addr_local(struct in6_addr *); -#define DEBUG_HWCKSUM 1 /* IPv6 Hardware checksum on/off */ -/* - * in6_cksum_phdr: - * - * Compute significant parts of the IPv6 checksum pseudo-header - * for use in a delayed TCP/UDP checksum calculation. - * - * Args: - * - * src Source IPv6 address - * dst Destination IPv6 address - * len htonl(proto-hdr-len) - * nxt htonl(next-proto-number) - * - * NOTE: We expect the src and dst addresses to be 16-bit - * aligned! - */ -static __inline u_int16_t __unused -in6_cksum_phdr(const struct in6_addr *src, const struct in6_addr *dst, - u_int32_t len, u_int32_t nxt) -{ - u_int32_t sum = 0; - const u_int16_t *w; - - /*LINTED*/ - w = (const u_int16_t *) src; - sum += w[0]; - if (!IN6_IS_SCOPE_LINKLOCAL(src)) - sum += w[1]; - sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5]; - sum += w[6]; sum += w[7]; - - /*LINTED*/ - w = (const u_int16_t *) dst; - sum += w[0]; - if (!IN6_IS_SCOPE_LINKLOCAL(dst)) - sum += w[1]; - sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5]; - sum += w[6]; sum += w[7]; - - sum += (u_int16_t)(len >> 16) + (u_int16_t)(len /*& 0xffff*/); - - sum += (u_int16_t)(nxt >> 16) + (u_int16_t)(nxt /*& 0xffff*/); - - sum = (u_int16_t)(sum >> 16) + (u_int16_t)(sum /*& 0xffff*/); - - if (sum > 0xffff) - sum -= 0xffff; - - return (sum); -} +extern uint32_t in6_finalize_cksum(struct mbuf *, uint32_t, int32_t, + int32_t, uint32_t); + +#define in6_delayed_cksum(_m) \ + ((void) in6_finalize_cksum(_m, 0, 0, -1, CSUM_DELAY_IPV6_DATA)) +#define in6_delayed_cksum_offset(_m, _o, _s, _p) \ + ((void) in6_finalize_cksum(_m, _o, _s, _p, CSUM_DELAY_IPV6_DATA)) +#endif /* BSD_KERNEL_PRIVATE */ + +#ifdef KERNEL_PRIVATE +/* exporte for ApplicationFirewall */ +extern int in6_localaddr(struct in6_addr *); +extern int in6addr_local(struct in6_addr *); #endif /* KERNEL_PRIVATE */ #ifndef KERNEL @@ -907,8 +892,7 @@ struct cmsghdr; extern int inet6_option_space(int); extern int inet6_option_init(void *, struct cmsghdr **, int); -extern int inet6_option_append(struct cmsghdr *, const __uint8_t *, - int, int); +extern int inet6_option_append(struct cmsghdr *, const __uint8_t *, int, int); extern __uint8_t *inet6_option_alloc(struct cmsghdr *, int, int, int); extern int inet6_option_next(const struct cmsghdr *, __uint8_t **); extern int inet6_option_find(const struct cmsghdr *, __uint8_t **, int); @@ -916,7 +900,7 @@ extern int inet6_option_find(const struct cmsghdr *, __uint8_t **, int); extern size_t inet6_rthdr_space(int, int); extern struct cmsghdr *inet6_rthdr_init(void *, int); extern int inet6_rthdr_add(struct cmsghdr *, const struct in6_addr *, - unsigned int); + unsigned int); extern int inet6_rthdr_lasthop(struct cmsghdr *, unsigned int); #if 0 /* not implemented yet */ extern int inet6_rthdr_reverse(const struct cmsghdr *, struct cmsghdr *); @@ -926,15 +910,15 @@ extern struct in6_addr *inet6_rthdr_getaddr(struct cmsghdr *, int); extern int inet6_rthdr_getflags(const struct cmsghdr *, int); extern int inet6_opt_init(void *, socklen_t); -extern int inet6_opt_append(void *, socklen_t, int, __uint8_t, - socklen_t, __uint8_t, void **); +extern int inet6_opt_append(void *, socklen_t, int, __uint8_t, socklen_t, + __uint8_t, void **); extern int inet6_opt_finish(void *, socklen_t, int); extern int inet6_opt_set_val(void *, int, void *, socklen_t); -extern int inet6_opt_next(void *, socklen_t, int, __uint8_t *, - socklen_t *, void **); -extern int inet6_opt_find(void *, socklen_t, int, __uint8_t, - socklen_t *, void **); +extern int inet6_opt_next(void *, socklen_t, int, __uint8_t *, socklen_t *, + void **); +extern int inet6_opt_find(void *, socklen_t, int, __uint8_t, socklen_t *, + void **); extern int inet6_opt_get_val(void *, int, void *, socklen_t); extern socklen_t inet6_rth_space(int, int); extern void *inet6_rth_init(void *, socklen_t, int, int); diff --git a/bsd/netinet6/in6_cga.c b/bsd/netinet6/in6_cga.c new file mode 100644 index 000000000..3e43e73da --- /dev/null +++ b/bsd/netinet6/in6_cga.c @@ -0,0 +1,324 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include + +#include + +#include + +#include + +#include +#include +#include +#include +#include + +#define IN6_CGA_HASH1_LENGTH 8 +#define IN6_CGA_HASH2_LENGTH 14 +#define IN6_CGA_PREPARE_ZEROES 9 + +struct in6_cga_hash1 { + u_int8_t octets[IN6_CGA_HASH1_LENGTH]; +}; + +struct in6_cga_hash2 { + u_int8_t octets[IN6_CGA_HASH2_LENGTH]; +}; + +struct in6_cga_singleton { + boolean_t cga_initialized; + decl_lck_mtx_data(, cga_mutex); + struct in6_cga_prepare cga_prepare; + struct iovec cga_pubkey; + struct iovec cga_privkey; +}; + +static struct in6_cga_singleton in6_cga = { + .cga_initialized = FALSE, + .cga_mutex = {}, + .cga_prepare = { + .cga_modifier = {}, + .cga_security_level = 0, + }, + .cga_pubkey = { + .iov_base = NULL, + .iov_len = 0, + }, + .cga_privkey = { + .iov_base = NULL, + .iov_len = 0, + }, +}; + +static void +in6_cga_node_lock_assert(int owned) +{ + VERIFY(in6_cga.cga_initialized); + lck_mtx_assert(&in6_cga.cga_mutex, owned); +} + +static boolean_t +in6_cga_is_prepare_valid(const struct in6_cga_prepare *prepare, + const struct iovec *pubkey) +{ + static const u_int8_t zeroes[IN6_CGA_PREPARE_ZEROES] = { }; + SHA1_CTX ctx; + u_int8_t sha1[SHA1_RESULTLEN]; + u_int i, n; + + VERIFY(prepare != NULL); + VERIFY(pubkey != NULL && pubkey->iov_base != NULL); + + if (prepare->cga_security_level == 0) + return (TRUE); + + if (prepare->cga_security_level > 7) + return (FALSE); + + SHA1Init(&ctx); + SHA1Update(&ctx, &prepare->cga_modifier.octets, + IN6_CGA_MODIFIER_LENGTH); + SHA1Update(&ctx, &zeroes, IN6_CGA_PREPARE_ZEROES); + SHA1Update(&ctx, pubkey->iov_base, pubkey->iov_len); + /* FUTURE: extension fields */ + SHA1Final(sha1, &ctx); + + n = 2 * (u_int) prepare->cga_security_level; + VERIFY(n < SHA1_RESULTLEN); + for (i = 0; i < n; ++i) + if (sha1[i] != 0) + return (FALSE); + + return (TRUE); +} + +static void +in6_cga_generate_iid(const struct in6_cga_prepare *prepare, + const struct iovec *pubkey, u_int8_t collisions, struct in6_addr *in6) +{ + SHA1_CTX ctx; + u_int8_t sha1[SHA1_RESULTLEN]; + + VERIFY(prepare != NULL); + VERIFY(prepare->cga_security_level < 8); + VERIFY(pubkey != NULL && pubkey->iov_base != NULL); + VERIFY(in6 != NULL); + + SHA1Init(&ctx); + SHA1Update(&ctx, &prepare->cga_modifier.octets, 16); + SHA1Update(&ctx, in6->s6_addr, 8); + SHA1Update(&ctx, &collisions, 1); + SHA1Update(&ctx, pubkey->iov_base, pubkey->iov_len); + /* FUTURE: extension fields */ + SHA1Final(sha1, &ctx); + + in6->s6_addr8[8] = + (prepare->cga_security_level << 5) | (sha1[0] & 0x1c); + in6->s6_addr8[9] = sha1[1]; + in6->s6_addr8[10] = sha1[2]; + in6->s6_addr8[11] = sha1[3]; + in6->s6_addr8[12] = sha1[4]; + in6->s6_addr8[13] = sha1[5]; + in6->s6_addr8[14] = sha1[6]; + in6->s6_addr8[15] = sha1[7]; +} + +void +in6_cga_init(void) +{ + lck_mtx_init(&in6_cga.cga_mutex, ifa_mtx_grp, ifa_mtx_attr); + in6_cga.cga_initialized = TRUE; +} + +void +in6_cga_node_lock(void) +{ + VERIFY(in6_cga.cga_initialized); + lck_mtx_lock(&in6_cga.cga_mutex); +} + +void +in6_cga_node_unlock(void) +{ + VERIFY(in6_cga.cga_initialized); + lck_mtx_unlock(&in6_cga.cga_mutex); +} + +void +in6_cga_query(struct in6_cga_nodecfg *cfg) +{ + VERIFY(cfg != NULL); + in6_cga_node_lock_assert(LCK_MTX_ASSERT_OWNED); + + cfg->cga_pubkey = in6_cga.cga_pubkey; + cfg->cga_prepare = in6_cga.cga_prepare; +} + +int +in6_cga_start(const struct in6_cga_nodecfg *cfg) +{ + struct iovec privkey, pubkey; + const struct in6_cga_prepare *prepare; + caddr_t pubkeycopy, privkeycopy; + + VERIFY(cfg != NULL); + in6_cga_node_lock_assert(LCK_MTX_ASSERT_OWNED); + + privkey = cfg->cga_privkey; + if (privkey.iov_base == NULL || privkey.iov_len == 0 || + privkey.iov_len >= IN6_CGA_KEY_MAXSIZE) + return (EINVAL); + pubkey = cfg->cga_pubkey; + if (pubkey.iov_base == NULL || pubkey.iov_len == 0 || + pubkey.iov_len >= IN6_CGA_KEY_MAXSIZE) + return (EINVAL); + prepare = &cfg->cga_prepare; + + if (!in6_cga_is_prepare_valid(prepare, &pubkey)) + return (EINVAL); + + in6_cga.cga_prepare = *prepare; + + MALLOC(privkeycopy, caddr_t, privkey.iov_len, M_IP6CGA, M_WAITOK); + if (privkeycopy == NULL) + return (ENOMEM); + + MALLOC(pubkeycopy, caddr_t, pubkey.iov_len, M_IP6CGA, M_WAITOK); + if (pubkeycopy == NULL) { + if (privkeycopy != NULL) + FREE(privkeycopy, M_IP6CGA); + return (ENOMEM); + } + + bcopy(privkey.iov_base, privkeycopy, privkey.iov_len); + privkey.iov_base = privkeycopy; + if (in6_cga.cga_privkey.iov_base != NULL) + FREE(in6_cga.cga_privkey.iov_base, M_IP6CGA); + in6_cga.cga_privkey = privkey; + + bcopy(pubkey.iov_base, pubkeycopy, pubkey.iov_len); + pubkey.iov_base = pubkeycopy; + if (in6_cga.cga_pubkey.iov_base != NULL) + FREE(in6_cga.cga_pubkey.iov_base, M_IP6CGA); + in6_cga.cga_pubkey = pubkey; + + return (0); +} + +int +in6_cga_stop(void) +{ + in6_cga_node_lock_assert(LCK_MTX_ASSERT_OWNED); + + if (in6_cga.cga_privkey.iov_base != NULL) { + FREE(in6_cga.cga_privkey.iov_base, M_IP6CGA); + in6_cga.cga_privkey.iov_base = NULL; + in6_cga.cga_privkey.iov_len = 0; + } + + if (in6_cga.cga_pubkey.iov_base != NULL) { + FREE(in6_cga.cga_pubkey.iov_base, M_IP6CGA); + in6_cga.cga_pubkey.iov_base = NULL; + in6_cga.cga_pubkey.iov_len = 0; + } + + return (0); +} + +ssize_t +in6_cga_parameters_prepare(void *output, size_t max, + const struct in6_addr *prefix, u_int8_t collisions, + const struct in6_cga_modifier *modifier) +{ + caddr_t cursor; + + in6_cga_node_lock_assert(LCK_MTX_ASSERT_OWNED); + + if (in6_cga.cga_pubkey.iov_len == 0) { + /* No public key */ + return (EINVAL); + } + + if (output == NULL || + max < in6_cga.cga_pubkey.iov_len + sizeof (modifier->octets) + 9) { + /* Output buffer error */ + return (EINVAL); + } + + cursor = output; + if (modifier == NULL) modifier = &in6_cga.cga_prepare.cga_modifier; + if (prefix == NULL) { + static const struct in6_addr llprefix = {{{ 0xfe, 0x80 }}}; + prefix = &llprefix; + } + + bcopy(&modifier->octets, cursor, sizeof (modifier->octets)); + cursor += sizeof (modifier->octets); + + *cursor++ = (char) collisions; + + bcopy(&prefix->s6_addr[0], cursor, 8); + cursor += 8; + + bcopy(in6_cga.cga_pubkey.iov_base, cursor, in6_cga.cga_pubkey.iov_len); + cursor += in6_cga.cga_pubkey.iov_len; + + /* FUTURE: Extension fields */ + + return ((ssize_t)(cursor - (caddr_t)output)); +} + +int +in6_cga_generate(const struct in6_cga_prepare *prepare, u_int8_t collisions, + struct in6_addr *in6) +{ + int error; + const struct iovec *pubkey; + + in6_cga_node_lock_assert(LCK_MTX_ASSERT_OWNED); + VERIFY(in6 != NULL); + + if (prepare == NULL) + prepare = &in6_cga.cga_prepare; + + pubkey = &in6_cga.cga_pubkey; + + if (pubkey->iov_base != NULL) { + in6_cga_generate_iid(prepare, pubkey, collisions, in6); + error = 0; + } + else + error = EADDRNOTAVAIL; + + return (error); +} + +/* End of file */ diff --git a/bsd/netinet6/in6_cksum.c b/bsd/netinet6/in6_cksum.c index bf8fe680d..2bea8cbcc 100644 --- a/bsd/netinet6/in6_cksum.c +++ b/bsd/netinet6/in6_cksum.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2011 Apple Inc. All rights reserved. + * Copyright (c) 2009-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -120,17 +120,13 @@ */ #include +#include #include #include #include #include #include - -#include - -#include - - +#include /* * Checksum routine for Internet Protocol family headers (Portable Version). @@ -139,224 +135,100 @@ * code and should be modified for each CPU to be as fast as possible. */ -#define ADDCARRY(x) do { if (x > 65535) { x -= 65535; } } while (0) -#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);} - /* - * m MUST contain a continuous IP6 header. - * off is a offset where TCP/UDP/ICMP6 header starts. - * len is a total length of a transport segment. - * (e.g. TCP header + TCP payload) + * Compute IPv6 pseudo-header checksum; assumes 16-bit aligned pointers. */ - -u_int16_t -inet6_cksum(struct mbuf *m, unsigned int nxt, unsigned int off, - unsigned int len) +uint16_t +in6_pseudo(const struct in6_addr *src, const struct in6_addr *dst, uint32_t x) { - u_int16_t *w; - int sum = 0; - int mlen = 0; - int byte_swapped = 0; - struct ip6_hdr *ip6; - union { - u_int16_t phs[4]; - struct { - u_int32_t ph_len; - u_int8_t ph_zero[3]; - u_int8_t ph_nxt; - } ph __attribute__((__packed__)); - } uph; - union { - u_int8_t c[2]; - u_int16_t s; - } s_util; - union { - u_int16_t s[2]; - u_int32_t l; - } l_util; - - /* sanity check */ - if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.len < off + len) { - panic("inet6_cksum: mbuf len (%d) < off+len (%d+%d)\n", - m->m_pkthdr.len, off, len); - } - - if (nxt != 0) { - bzero(&uph, sizeof (uph)); - - /* - * First create IP6 pseudo header and calculate a summary. - */ - ip6 = mtod(m, struct ip6_hdr *); - w = (u_int16_t *)&ip6->ip6_src; - uph.ph.ph_len = htonl(len); - uph.ph.ph_nxt = nxt; - - /* IPv6 source address */ - sum += w[0]; - if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) - sum += w[1]; - sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5]; - sum += w[6]; sum += w[7]; - /* IPv6 destination address */ - sum += w[8]; - if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) - sum += w[9]; - sum += w[10]; sum += w[11]; sum += w[12]; sum += w[13]; - sum += w[14]; sum += w[15]; - /* Payload length and upper layer identifier */ - sum += uph.phs[0]; sum += uph.phs[1]; - sum += uph.phs[2]; sum += uph.phs[3]; - } + uint32_t sum = 0; + const uint16_t *w; /* - * Secondly calculate a summary of the first mbuf excluding offset. + * IPv6 source address */ - while (m != NULL && off > 0) { - if (m->m_len <= off) - off -= m->m_len; - else - break; - m = m->m_next; - } - w = (u_int16_t *)(void *)(mtod(m, u_char *) + off); - mlen = m->m_len - off; - if (len < mlen) - mlen = len; - len -= mlen; - /* - * Force to even boundary. - */ - if ((1 & (intptr_t) w) && (mlen > 0)) { - REDUCE; - sum <<= 8; - s_util.c[0] = *(u_char *)w; - w = (u_int16_t *)(void *)((char *)w + 1); - mlen--; - byte_swapped = 1; - } + w = (const uint16_t *)src; + sum += w[0]; + if (!IN6_IS_SCOPE_EMBED(src)) + sum += w[1]; + sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5]; + sum += w[6]; sum += w[7]; + /* - * Unroll the loop to make overhead from - * branches &c small. + * IPv6 destination address */ - while ((mlen -= 32) >= 0) { - sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; - sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; - sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; - sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; - w += 16; - } - mlen += 32; - while ((mlen -= 8) >= 0) { - sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; - w += 4; - } - mlen += 8; - if (mlen == 0 && byte_swapped == 0) - goto next; - REDUCE; - while ((mlen -= 2) >= 0) { - sum += *w++; - } - if (byte_swapped) { - REDUCE; - sum <<= 8; - byte_swapped = 0; - if (mlen == -1) { - s_util.c[1] = *(char *)w; - sum += s_util.s; - mlen = 0; - } else - mlen = -1; - } else if (mlen == -1) - s_util.c[0] = *(char *)w; - next: - m = m->m_next; + w = (const uint16_t *)dst; + sum += w[0]; + if (!IN6_IS_SCOPE_EMBED(dst)) + sum += w[1]; + sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5]; + sum += w[6]; sum += w[7]; /* - * Lastly calculate a summary of the rest of mbufs. - */ + * Caller-supplied value; 'x' could be one of: + * + * htonl(proto + length), or + * htonl(proto + length + sum) + **/ + sum += x; + + /* fold in carry bits */ + ADDCARRY(sum); + + return (sum); +} + +/* + * m MUST contain at least an IPv6 header, if nxt is specified; + * nxt is the upper layer protocol number; + * off is an offset where TCP/UDP/ICMP6 header starts; + * len is a total length of a transport segment (e.g. TCP header + TCP payload) + */ +u_int16_t +inet6_cksum(struct mbuf *m, uint32_t nxt, uint32_t off, uint32_t len) +{ + uint32_t sum; + + sum = m_sum16(m, off, len); + + if (nxt != 0) { + struct ip6_hdr *ip6; + unsigned char buf[sizeof (*ip6)] __attribute__((aligned(8))); + uint32_t mlen; - for (;m && len; m = m->m_next) { - if (m->m_len == 0) - continue; - w = mtod(m, u_int16_t *); - if (mlen == -1) { - /* - * The first byte of this mbuf is the continuation - * of a word spanning between this mbuf and the - * last mbuf. - * - * s_util.c[0] is already saved when scanning previous - * mbuf. - */ - s_util.c[1] = *(char *)w; - sum += s_util.s; - w = (u_int16_t *)(void *)((char *)w + 1); - mlen = m->m_len - 1; - len--; - } else - mlen = m->m_len; - if (len < mlen) - mlen = len; - len -= mlen; /* - * Force to even boundary. + * Sanity check + * + * Use m_length2() instead of m_length(), as we cannot rely on + * the caller setting m_pkthdr.len correctly, if the mbuf is + * a M_PKTHDR one. */ - if ((1 & (intptr_t) w) && (mlen > 0)) { - REDUCE; - sum <<= 8; - s_util.c[0] = *(u_char *)w; - w = (u_int16_t *)(void *)((char *)w + 1); - mlen--; - byte_swapped = 1; + if ((mlen = m_length2(m, NULL)) < sizeof (*ip6)) { + panic("%s: mbuf %p pkt too short (%d) for IPv6 header", + __func__, m, mlen); + /* NOTREACHED */ } + /* - * Unroll the loop to make overhead from - * branches &c small. + * In case the IPv6 header is not contiguous, or not 32-bit + * aligned, copy it to a local buffer. Note here that we + * expect the data pointer to point to the IPv6 header. */ - while ((mlen -= 32) >= 0) { - sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; - sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; - sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; - sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; - w += 16; - } - mlen += 32; - while ((mlen -= 8) >= 0) { - sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; - w += 4; - } - mlen += 8; - if (mlen == 0 && byte_swapped == 0) - continue; - REDUCE; - while ((mlen -= 2) >= 0) { - sum += *w++; + if ((sizeof (*ip6) > m->m_len) || + !IP6_HDR_ALIGNED_P(mtod(m, caddr_t))) { + m_copydata(m, 0, sizeof (*ip6), (caddr_t)buf); + ip6 = (struct ip6_hdr *)(void *)buf; + } else { + ip6 = (struct ip6_hdr *)(void *)(m->m_data); } - if (byte_swapped) { - REDUCE; - sum <<= 8; - byte_swapped = 0; - if (mlen == -1) { - s_util.c[1] = *(char *)w; - sum += s_util.s; - mlen = 0; - } else - mlen = -1; - } else if (mlen == -1) - s_util.c[0] = *(char *)w; - } - if (len) - printf("inet6_cksum: out of data by %d\n", len); - if (mlen == -1) { - /* The last mbuf has odd # of bytes. Follow the - standard (the odd byte may be shifted left by 8 bits - or not as determined by endian-ness of the machine) */ - s_util.c[1] = 0; - sum += s_util.s; + + /* add pseudo header checksum */ + sum += in6_pseudo(&ip6->ip6_src, &ip6->ip6_dst, + htonl(nxt + len)); + + /* fold in carry bits */ + ADDCARRY(sum); } - REDUCE; + return (~sum & 0xffff); } - diff --git a/bsd/netinet6/in6_gif.c b/bsd/netinet6/in6_gif.c index c977e9a7a..6142092c5 100644 --- a/bsd/netinet6/in6_gif.c +++ b/bsd/netinet6/in6_gif.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2009-2012 Apple Inc. All rights reserved. + * Copyright (c) 2009-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,12 +22,12 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* $FreeBSD: src/sys/netinet6/in6_gif.c,v 1.2.2.3 2001/07/03 11:01:52 ume Exp $ */ -/* $KAME: in6_gif.c,v 1.49 2001/05/14 14:02:17 itojun Exp $ */ +/* $FreeBSD: src/sys/netinet6/in6_gif.c,v 1.2.2.3 2001/07/03 11:01:52 ume Exp $ */ +/* $KAME: in6_gif.c,v 1.49 2001/05/14 14:02:17 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -95,17 +95,6 @@ #include -static __inline__ void* -_cast_non_const(const void * ptr) { - union { - const void* cval; - void* val; - } ret; - - ret.cval = ptr; - return (ret.val); -} - int in6_gif_output( struct ifnet *ifp, @@ -115,17 +104,21 @@ in6_gif_output( { struct gif_softc *sc = ifnet_softc(ifp); struct sockaddr_in6 *dst = (struct sockaddr_in6 *)&sc->gif_ro6.ro_dst; - struct sockaddr_in6 *sin6_src = (struct sockaddr_in6 *)(void *)sc->gif_psrc; - struct sockaddr_in6 *sin6_dst = (struct sockaddr_in6 *)(void *)sc->gif_pdst; + struct sockaddr_in6 *sin6_src = (struct sockaddr_in6 *) + (void *)sc->gif_psrc; + struct sockaddr_in6 *sin6_dst = (struct sockaddr_in6 *) + (void *)sc->gif_pdst; struct ip6_hdr *ip6; int proto; u_int8_t itos, otos; + GIF_LOCK_ASSERT(sc); + if (sin6_src == NULL || sin6_dst == NULL || sin6_src->sin6_family != AF_INET6 || sin6_dst->sin6_family != AF_INET6) { m_freem(m); - return EAFNOSUPPORT; + return (EAFNOSUPPORT); } switch (family) { @@ -135,10 +128,10 @@ in6_gif_output( struct ip *ip; proto = IPPROTO_IPV4; - if (mbuf_len(m) < sizeof(*ip)) { - m = m_pullup(m, sizeof(*ip)); + if (mbuf_len(m) < sizeof (*ip)) { + m = m_pullup(m, sizeof (*ip)); if (!m) - return ENOBUFS; + return (ENOBUFS); } ip = mtod(m, struct ip *); itos = ip->ip_tos; @@ -149,10 +142,10 @@ in6_gif_output( case AF_INET6: { proto = IPPROTO_IPV6; - if (mbuf_len(m) < sizeof(*ip6)) { - m = m_pullup(m, sizeof(*ip6)); + if (mbuf_len(m) < sizeof (*ip6)) { + m = m_pullup(m, sizeof (*ip6)); if (!m) - return ENOBUFS; + return (ENOBUFS); } ip6 = mtod(m, struct ip6_hdr *); itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; @@ -165,16 +158,16 @@ in6_gif_output( family); #endif m_freem(m); - return EAFNOSUPPORT; + return (EAFNOSUPPORT); } - + /* prepend new IP header */ - M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT); - if (m && mbuf_len(m) < sizeof(struct ip6_hdr)) - m = m_pullup(m, sizeof(struct ip6_hdr)); + M_PREPEND(m, sizeof (struct ip6_hdr), M_DONTWAIT); + if (m && mbuf_len(m) < sizeof (struct ip6_hdr)) + m = m_pullup(m, sizeof (struct ip6_hdr)); if (m == NULL) { printf("ENOBUFS in in6_gif_output %d\n", __LINE__); - return ENOBUFS; + return (ENOBUFS); } ip6 = mtod(m, struct ip6_hdr *); @@ -190,27 +183,23 @@ in6_gif_output( ip6->ip6_dst = sin6_dst->sin6_addr; else { m_freem(m); - return ENETUNREACH; + return (ENETUNREACH); } ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED : ECN_NOCARE, - &otos, &itos); + &otos, &itos); ip6->ip6_flow &= ~htonl(0xff << 20); ip6->ip6_flow |= htonl((u_int32_t)otos << 20); - if (dst->sin6_family != sin6_dst->sin6_family || + if (ROUTE_UNUSABLE(&sc->gif_ro6) || + dst->sin6_family != sin6_dst->sin6_family || !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &sin6_dst->sin6_addr) || - (sc->gif_ro6.ro_rt != NULL && - (sc->gif_ro6.ro_rt->generation_id != route_generation || - sc->gif_ro6.ro_rt->rt_ifp == ifp))) { + (sc->gif_ro6.ro_rt != NULL && sc->gif_ro6.ro_rt->rt_ifp == ifp)) { /* cache route doesn't match or recursive route */ - bzero(dst, sizeof(*dst)); + bzero(dst, sizeof (*dst)); dst->sin6_family = sin6_dst->sin6_family; - dst->sin6_len = sizeof(struct sockaddr_in6); + dst->sin6_len = sizeof (struct sockaddr_in6); dst->sin6_addr = sin6_dst->sin6_addr; - if (sc->gif_ro6.ro_rt) { - rtfree(sc->gif_ro6.ro_rt); - sc->gif_ro6.ro_rt = NULL; - } + ROUTE_RELEASE(&sc->gif_ro6); #if 0 sc->gif_if.if_mtu = GIF_MTU; #endif @@ -220,35 +209,36 @@ in6_gif_output( rtalloc((struct route *)&sc->gif_ro6); if (sc->gif_ro6.ro_rt == NULL) { m_freem(m); - return ENETUNREACH; + return (ENETUNREACH); } RT_LOCK(sc->gif_ro6.ro_rt); /* if it constitutes infinite encapsulation, punt. */ if (sc->gif_ro6.ro_rt->rt_ifp == ifp) { RT_UNLOCK(sc->gif_ro6.ro_rt); m_freem(m); - return ENETUNREACH; /*XXX*/ + return (ENETUNREACH); /* XXX */ } #if 0 ifp->if_mtu = sc->gif_ro6.ro_rt->rt_ifp->if_mtu - - sizeof(struct ip6_hdr); + - sizeof (struct ip6_hdr); #endif RT_UNLOCK(sc->gif_ro6.ro_rt); } - + #if IPV6_MINMTU /* * force fragmentation to minimum MTU, to avoid path MTU discovery. * it is too painful to ask for resend of inner packet, to achieve * path MTU discovery for encapsulated packets. */ - return(ip6_output(m, 0, &sc->gif_ro6, IPV6_MINMTU, 0, NULL, NULL)); + return (ip6_output(m, 0, &sc->gif_ro6, IPV6_MINMTU, 0, NULL, NULL)); #else - return(ip6_output(m, 0, &sc->gif_ro6, 0, 0, NULL, NULL)); + return (ip6_output(m, 0, &sc->gif_ro6, 0, 0, NULL, NULL)); #endif } -int in6_gif_input(struct mbuf **mp, int *offp, int proto) +int +in6_gif_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct ifnet *gifp = NULL; @@ -258,12 +248,12 @@ int in6_gif_input(struct mbuf **mp, int *offp, int proto) ip6 = mtod(m, struct ip6_hdr *); - gifp = ((struct gif_softc*)encap_getarg(m))->gif_if; + gifp = ((struct gif_softc *)encap_getarg(m))->gif_if; if (gifp == NULL || (gifp->if_flags & IFF_UP) == 0) { m_freem(m); ip6stat.ip6s_nogif++; - return IPPROTO_DONE; + return (IPPROTO_DONE); } otos = ip6->ip6_flow; @@ -277,10 +267,10 @@ int in6_gif_input(struct mbuf **mp, int *offp, int proto) u_int8_t otos8; af = AF_INET; otos8 = (ntohl(otos) >> 20) & 0xff; - if (mbuf_len(m) < sizeof(*ip)) { - m = m_pullup(m, sizeof(*ip)); + if (mbuf_len(m) < sizeof (*ip)) { + m = m_pullup(m, sizeof (*ip)); if (!m) - return IPPROTO_DONE; + return (IPPROTO_DONE); } ip = mtod(m, struct ip *); if (gifp->if_flags & IFF_LINK1) @@ -294,10 +284,10 @@ int in6_gif_input(struct mbuf **mp, int *offp, int proto) case IPPROTO_IPV6: { af = AF_INET6; - if (mbuf_len(m) < sizeof(*ip6)) { - m = m_pullup(m, sizeof(*ip6)); + if (mbuf_len(m) < sizeof (*ip6)) { + m = m_pullup(m, sizeof (*ip6)); if (!m) - return IPPROTO_DONE; + return (IPPROTO_DONE); } ip6 = mtod(m, struct ip6_hdr *); if (gifp->if_flags & IFF_LINK1) @@ -310,14 +300,15 @@ int in6_gif_input(struct mbuf **mp, int *offp, int proto) default: ip6stat.ip6s_nogif++; m_freem(m); - return IPPROTO_DONE; + return (IPPROTO_DONE); } - if (m->m_pkthdr.rcvif) /* replace the rcvif by gifp for ifnet_input to route it correctly */ + /* Replace the rcvif by gifp for ifnet_input to route it correctly */ + if (m->m_pkthdr.rcvif) m->m_pkthdr.rcvif = gifp; ifnet_input(gifp, m, NULL); - return IPPROTO_DONE; + return (IPPROTO_DONE); } /* @@ -341,7 +332,7 @@ gif_validate6( */ if (!IN6_ARE_ADDR_EQUAL(&src->sin6_addr, &ip6->ip6_dst) || !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_src)) - return 0; + return (0); /* martian filters on outer source - done in ip6_input */ @@ -350,9 +341,9 @@ gif_validate6( struct sockaddr_in6 sin6; struct rtentry *rt; - bzero(&sin6, sizeof(sin6)); + bzero(&sin6, sizeof (sin6)); sin6.sin6_family = AF_INET6; - sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_len = sizeof (struct sockaddr_in6); sin6.sin6_addr = ip6->ip6_src; rt = rtalloc1((struct sockaddr *)&sin6, 0, 0); @@ -368,13 +359,13 @@ gif_validate6( RT_UNLOCK(rt); rtfree(rt); } - return 0; + return (0); } RT_UNLOCK(rt); rtfree(rt); } - return 128 * 2; + return (128 * 2); } /* @@ -396,8 +387,10 @@ gif_encapcheck6( /* sanity check done in caller */ sc = (struct gif_softc *)arg; - mbuf_copydata((struct mbuf *)(size_t)m, 0, sizeof(ip6), &ip6); + GIF_LOCK_ASSERT(sc); + + mbuf_copydata((struct mbuf *)(size_t)m, 0, sizeof (ip6), &ip6); ifp = ((m->m_flags & M_PKTHDR) != 0) ? m->m_pkthdr.rcvif : NULL; - return gif_validate6(&ip6, sc, ifp); + return (gif_validate6(&ip6, sc, ifp)); } diff --git a/bsd/netinet6/in6_gif.h b/bsd/netinet6/in6_gif.h index 8baafdd43..a517d9663 100644 --- a/bsd/netinet6/in6_gif.h +++ b/bsd/netinet6/in6_gif.h @@ -1,5 +1,5 @@ -/* $FreeBSD: src/sys/netinet6/in6_gif.h,v 1.2.2.1 2000/07/15 07:14:33 kris Exp $ */ -/* $KAME: in6_gif.h,v 1.5 2000/04/14 08:36:03 itojun Exp $ */ +/* $FreeBSD: src/sys/netinet6/in6_gif.h,v 1.2.2.1 2000/07/15 07:14:33 kris Exp $ */ +/* $KAME: in6_gif.h,v 1.5 2000/04/14 08:36:03 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -31,15 +31,15 @@ */ #ifndef _NETINET6_IN6_GIF_H_ -#define _NETINET6_IN6_GIF_H_ +#define _NETINET6_IN6_GIF_H_ #include -#ifdef KERNEL_PRIVATE -#define GIF_HLIM 30 +#ifdef BSD_KERNEL_PRIVATE +#define GIF_HLIM 30 int in6_gif_input(struct mbuf **, int *, int); int in6_gif_output(struct ifnet *, int, struct mbuf *, struct rtentry *); int gif_encapcheck6(const struct mbuf *, int, int, void *); -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET6_IN6_GIF_H_ */ diff --git a/bsd/netinet6/in6_ifattach.c b/bsd/netinet6/in6_ifattach.c index 31f707c81..06e82afb5 100644 --- a/bsd/netinet6/in6_ifattach.c +++ b/bsd/netinet6/in6_ifattach.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * Copyright (c) 2003-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,11 +22,9 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* $FreeBSD: src/sys/netinet6/in6_ifattach.c,v 1.8 2002/04/19 04:46:22 suz Exp $ */ -/* $KAME: in6_ifattach.c,v 1.118 2001/05/24 07:44:00 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -91,16 +89,8 @@ #include #include +#include -#define IN6_IFSTAT_ALLOC_SIZE \ - sizeof(void *) + sizeof(struct in6_ifstat) + sizeof(uint64_t) -#define ICMP6_IFSTAT_ALLOC_SIZE \ - sizeof(void *) + sizeof(struct icmp6_ifstat) + sizeof(uint64_t) - -struct in6_ifstat **in6_ifstat = NULL; -struct icmp6_ifstat **icmp6_ifstat = NULL; -size_t in6_ifstatmax = 0; -size_t icmp6_ifstatmax = 0; u_int32_t in6_maxmtu = 0; extern lck_mtx_t *nd6_mutex; @@ -110,16 +100,18 @@ int ip6_auto_linklocal = IP6_AUTO_LINKLOCAL; int ip6_auto_linklocal = 1; /* enable by default */ #endif -int loopattach6_done = 0; - extern struct inpcbinfo udbinfo; extern struct inpcbinfo ripcbinfo; -static int get_rand_ifid(struct ifnet *, struct in6_addr *); -static int generate_tmp_ifid(u_int8_t *, const u_int8_t *, u_int8_t *); -int in6_get_hw_ifid(struct ifnet *, struct in6_addr *); -static int get_ifid(struct ifnet *, struct ifnet *, struct in6_addr *); -static int in6_ifattach_linklocal(struct ifnet *, struct ifnet *, struct in6_aliasreq *); +static const unsigned int in6_extra_size = sizeof(struct in6_ifextra); +static const unsigned int in6_extra_bufsize = in6_extra_size + + sizeof(void *) + sizeof(uint64_t); + +static int get_rand_iid(struct ifnet *, struct in6_addr *); +static int in6_generate_tmp_iid(u_int8_t *, const u_int8_t *, u_int8_t *); +static int in6_select_iid_from_all_hw(struct ifnet *, struct ifnet *, + struct in6_addr *); +static int in6_ifattach_linklocal(struct ifnet *, struct in6_aliasreq *); static int in6_ifattach_loopback(struct ifnet *); /* @@ -132,7 +124,7 @@ static int in6_ifattach_loopback(struct ifnet *); * in6 - upper 64bits are preserved */ static int -get_rand_ifid( +get_rand_iid( __unused struct ifnet *ifp, struct in6_addr *in6) /* upper 64bits are preserved */ { @@ -140,19 +132,13 @@ get_rand_ifid( u_int8_t digest[SHA1_RESULTLEN]; int hostnlen = strlen(hostname); -#if 0 - /* we need at least several letters as seed for ifid */ - if (len < 3) - return -1; -#endif - /* generate 8 bytes of pseudo-random value. */ - bzero(&ctxt, sizeof(ctxt)); + bzero(&ctxt, sizeof (ctxt)); SHA1Init(&ctxt); SHA1Update(&ctxt, hostname, hostnlen); SHA1Final(digest, &ctxt); - /* assumes sizeof(digest) > sizeof(ifid) */ + /* assumes sizeof (digest) > sizeof (iid) */ bcopy(digest, &in6->s6_addr[8], 8); /* make sure to set "u" bit to local, and "g" bit to individual. */ @@ -162,11 +148,11 @@ get_rand_ifid( /* convert EUI64 into IPv6 interface identifier */ ND6_EUI64_TO_IFID(in6); - return 0; + return (0); } static int -generate_tmp_ifid( +in6_generate_tmp_iid( u_int8_t *seed0, const u_int8_t *seed1, u_int8_t *ret) @@ -177,14 +163,15 @@ generate_tmp_ifid( struct timeval tv; /* If there's no history, start with a random seed. */ - bzero(nullbuf, sizeof(nullbuf)); - if (bcmp(nullbuf, seed0, sizeof(nullbuf)) == 0) { + bzero(nullbuf, sizeof (nullbuf)); + if (bcmp(nullbuf, seed0, sizeof (nullbuf)) == 0) { int i; for (i = 0; i < 2; i++) { - microtime(&tv); - val32 = random() ^ tv.tv_usec; - bcopy(&val32, seed + sizeof(val32) * i, sizeof(val32)); + getmicrotime(&tv); + val32 = RandomULong() ^ tv.tv_usec; + bcopy(&val32, seed + sizeof (val32) * i, + sizeof (val32)); } } else { bcopy(seed0, seed, 8); @@ -197,16 +184,16 @@ generate_tmp_ifid( if (0) { /* for debugging purposes only */ int i; - printf("generate_tmp_ifid: new randomized ID from: "); + printf("%s: new randomized ID from: ", __func__); for (i = 0; i < 16; i++) printf("%02x", seed[i]); printf(" "); } /* generate 16 bytes of pseudo-random value. */ - bzero(&ctxt, sizeof(ctxt)); + bzero(&ctxt, sizeof (ctxt)); SHA1Init(&ctxt); - SHA1Update(&ctxt, seed, sizeof(seed)); + SHA1Update(&ctxt, seed, sizeof (seed)); SHA1Final(digest, &ctxt); /* @@ -222,11 +209,11 @@ generate_tmp_ifid( * for simplicity. If the caclculated digest happens to be zero, * use a random non-zero value as the last resort. */ - if (bcmp(nullbuf, ret, sizeof(nullbuf)) == 0) { + if (bcmp(nullbuf, ret, sizeof (nullbuf)) == 0) { nd6log((LOG_INFO, - "generate_tmp_ifid: computed SHA1 value is zero.\n")); + "%s: computed SHA1 value is zero.\n", __func__)); - microtime(&tv); + getmicrotime(&tv); val32 = random() ^ tv.tv_usec; val32 = 1 + (val32 % (0xffffffff - 1)); } @@ -248,19 +235,19 @@ generate_tmp_ifid( printf("\n"); } - return 0; + return (0); } /* - * Get interface identifier for the specified interface. + * Get interface identifier for the specified interface using the method in + * Appendix A of RFC 4291. + * * XXX assumes single sockaddr_dl (AF_LINK address) per an interface * * in6 - upper 64bits are preserved */ int -in6_get_hw_ifid( - struct ifnet *ifp, - struct in6_addr *in6) /* upper 64bits are preserved */ +in6_iid_from_hw(struct ifnet *ifp, struct in6_addr *in6) { struct ifaddr *ifa = NULL; struct sockaddr_dl *sdl; @@ -345,8 +332,8 @@ in6_get_hw_ifid( /* * due to insufficient bitwidth, we mark it local. */ - in6->s6_addr[8] &= ~ND6_EUI64_GBIT; /* g bit to "individual" */ - in6->s6_addr[8] |= ND6_EUI64_UBIT; /* u bit to "local" */ + in6->s6_addr[8] &= ~ND6_EUI64_GBIT; /* g to "individual" */ + in6->s6_addr[8] |= ND6_EUI64_UBIT; /* u to "local" */ break; case IFT_GIF: @@ -354,7 +341,7 @@ in6_get_hw_ifid( case IFT_STF: #endif /* - * RFC2893 says: "SHOULD use IPv4 address as ifid source". + * RFC2893 says: "SHOULD use IPv4 address as IID source". * however, IPv4 address is not very suitable as unique * identifier source (can be renumbered). * we don't do this. @@ -376,7 +363,7 @@ in6_get_hw_ifid( ND6_EUI64_TO_IFID(in6); /* - * sanity check: ifid must not be all zero, avoid conflict with + * sanity check: iid must not be all zero, avoid conflict with * subnet router anycast */ if ((in6->s6_addr[8] & ~(ND6_EUI64_GBIT | ND6_EUI64_UBIT)) == 0x00 && @@ -397,14 +384,16 @@ done: } /* - * Get interface identifier for the specified interface. If it is not - * available on ifp0, borrow interface identifier from other information - * sources. + * Get interface identifier for the specified interface using the method in + * Appendix A of RFC 4291. If it is not available on ifp0, borrow interface + * identifier from other information sources. * - * altifp - secondary EUI64 source + * ifp - primary EUI64 source + * altifp - secondary EUI64 source + * in6 - IPv6 address to output IID */ static int -get_ifid( +in6_select_iid_from_all_hw( struct ifnet *ifp0, struct ifnet *altifp, /* secondary EUI64 source */ struct in6_addr *in6) @@ -412,15 +401,15 @@ get_ifid( struct ifnet *ifp; /* first, try to get it from the interface itself */ - if (in6_get_hw_ifid(ifp0, in6) == 0) { - nd6log((LOG_DEBUG, "%s: got interface identifier from itself\n", + if (in6_iid_from_hw(ifp0, in6) == 0) { + nd6log((LOG_DEBUG, "%s: IID derived from HW interface.\n", if_name(ifp0))); goto success; } /* try secondary EUI64 source. this basically is for ATM PVC */ - if (altifp && in6_get_hw_ifid(altifp, in6) == 0) { - nd6log((LOG_DEBUG, "%s: got interface identifier from %s\n", + if (altifp && in6_iid_from_hw(altifp, in6) == 0) { + nd6log((LOG_DEBUG, "%s: IID from alterate HW interface %s.\n", if_name(ifp0), if_name(altifp))); goto success; } @@ -430,16 +419,15 @@ get_ifid( TAILQ_FOREACH(ifp, &ifnet_head, if_list) { if (ifp == ifp0) continue; - if (in6_get_hw_ifid(ifp, in6) != 0) + if (in6_iid_from_hw(ifp, in6) != 0) continue; /* - * to borrow ifid from other interface, ifid needs to be + * to borrow IID from other interface, IID needs to be * globally unique */ if (ND6_IFID_UNIVERSAL(in6)) { - nd6log((LOG_DEBUG, - "%s: borrow interface identifier from %s\n", + nd6log((LOG_DEBUG, "%s: borrowed IID from %s\n", if_name(ifp0), if_name(ifp))); ifnet_head_done(); goto success; @@ -448,89 +436,38 @@ get_ifid( ifnet_head_done(); /* last resort: get from random number source */ - if (get_rand_ifid(ifp, in6) == 0) { - nd6log((LOG_DEBUG, - "%s: interface identifier generated by random number\n", - if_name(ifp0))); + if (get_rand_iid(ifp, in6) == 0) { + nd6log((LOG_DEBUG, "%s: IID from PRNG.\n", if_name(ifp0))); goto success; } printf("%s: failed to get interface identifier\n", if_name(ifp0)); - return -1; + return (-1); success: - nd6log((LOG_INFO, "%s: ifid: " + nd6log((LOG_INFO, "%s: IID: " "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n", if_name(ifp0), in6->s6_addr[8], in6->s6_addr[9], in6->s6_addr[10], in6->s6_addr[11], in6->s6_addr[12], in6->s6_addr[13], in6->s6_addr[14], in6->s6_addr[15])); - return 0; + return (0); } static int -in6_ifattach_linklocal( - struct ifnet *ifp, - struct ifnet *altifp, /* secondary EUI64 source */ - struct in6_aliasreq *ifra_passed) +in6_ifattach_linklocal(struct ifnet *ifp, struct in6_aliasreq *ifra) { struct in6_ifaddr *ia; - struct in6_aliasreq ifra; struct nd_prefix pr0, *pr; int i, error; - /* - * configure link-local address. - */ - bzero(&ifra, sizeof(ifra)); + VERIFY(ifra != NULL); proto_plumb(PF_INET6, ifp); - /* - * in6_update_ifa() does not use ifra_name, but we accurately set it - * for safety. - */ - strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); - - if ((ifp->if_eflags & IFEF_NOAUTOIPV6LL) != 0 && - ifra_passed != NULL) /* interface provided both addresses for us */ - bcopy(&ifra_passed->ifra_addr, &(ifra.ifra_addr), sizeof(struct sockaddr_in6)); - else { - ifra.ifra_addr.sin6_family = AF_INET6; - ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6); - ifra.ifra_addr.sin6_addr.s6_addr16[0] = htons(0xfe80); - ifra.ifra_addr.sin6_addr.s6_addr16[1] = htons(ifp->if_index); - ifra.ifra_addr.sin6_addr.s6_addr32[1] = 0; - if ((ifp->if_flags & IFF_LOOPBACK) != 0) { - ifra.ifra_addr.sin6_addr.s6_addr32[2] = 0; - ifra.ifra_addr.sin6_addr.s6_addr32[3] = htonl(1); - } else { - if (get_ifid(ifp, altifp, &ifra.ifra_addr.sin6_addr) != 0) { - nd6log((LOG_ERR, - " %s: no ifid available\n", if_name(ifp))); - return EADDRNOTAVAIL; - } - } - } - if (in6_setscope(&ifra.ifra_addr.sin6_addr, ifp, NULL)) - return (EADDRNOTAVAIL); - - ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); - ifra.ifra_prefixmask.sin6_family = AF_INET6; - ifra.ifra_prefixmask.sin6_addr = in6mask64; - /* link-local addresses should NEVER expire. */ - ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME; - ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME; - - /* - * Now call in6_update_ifa() to do a bunch of procedures to configure - * a link-local address. We can set the 3rd argument to NULL, because - * we know there's no other link-local address on the interface - * and therefore we are adding one (instead of updating one). - */ - if ((error = in6_update_ifa(ifp, &ifra, NULL, - IN6_IFAUPDATE_DADDELAY, M_WAITOK)) != 0) { + error = in6_update_ifa(ifp, ifra, IN6_IFAUPDATE_DADDELAY, &ia); + if (error != 0) { /* * XXX: When the interface does not support IPv6, this call * would fail in the SIOCSIFADDR ioctl. I believe the @@ -538,20 +475,14 @@ in6_ifattach_linklocal( * suppress it. (jinmei@kame.net 20010130) */ if (error != EAFNOSUPPORT) - nd6log((LOG_NOTICE, "in6_ifattach_linklocal: failed to " + nd6log((LOG_NOTICE, "%s: failed to " "configure a link-local address on %s " "(errno=%d)\n", - if_name(ifp), error)); + __func__, if_name(ifp), error)); return (EADDRNOTAVAIL); } + VERIFY(ia != NULL); - ia = in6ifa_ifpforlinklocal(ifp, 0); /* ia must not be NULL */ -#if DIAGNOSTIC - if (!ia) { - panic("ia == NULL in in6_ifattach_linklocal"); - /*NOTREACHED*/ - } -#endif /* * Make the link-local prefix (fe80::%link/64) as on-link. * Since we'd like to manage prefixes separately from addresses, @@ -559,13 +490,13 @@ in6_ifattach_linklocal( * and add it to the prefix list as a never-expire prefix. * XXX: this change might affect some existing code base... */ - bzero(&pr0, sizeof(pr0)); + bzero(&pr0, sizeof (pr0)); lck_mtx_init(&pr0.ndpr_lock, ifa_mtx_grp, ifa_mtx_attr); pr0.ndpr_ifp = ifp; /* this should be 64 at this moment. */ - pr0.ndpr_plen = in6_mask2len(&ifra.ifra_prefixmask.sin6_addr, NULL); - pr0.ndpr_mask = ifra.ifra_prefixmask.sin6_addr; - pr0.ndpr_prefix = ifra.ifra_addr; + pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr, NULL); + pr0.ndpr_mask = ifra->ifra_prefixmask.sin6_addr; + pr0.ndpr_prefix = ifra->ifra_addr; /* apply the mask for safety. (nd6_prelist_add will apply it again) */ for (i = 0; i < 4; i++) { pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &= @@ -591,22 +522,19 @@ in6_ifattach_linklocal( if ((error = nd6_prelist_add(&pr0, NULL, &pr, TRUE)) != 0) { IFA_REMREF(&ia->ia_ifa); lck_mtx_destroy(&pr0.ndpr_lock, ifa_mtx_grp); - return(error); + return (error); } } - if (ia != NULL) { - in6_post_msg(ifp, KEV_INET6_NEW_LL_ADDR, ia); - IFA_REMREF(&ia->ia_ifa); - } + in6_post_msg(ifp, KEV_INET6_NEW_LL_ADDR, ia); + IFA_REMREF(&ia->ia_ifa); /* Drop use count held above during lookup/add */ if (pr != NULL) NDPR_REMREF(pr); lck_mtx_destroy(&pr0.ndpr_lock, ifa_mtx_grp); - - return 0; + return (0); } static int @@ -614,17 +542,18 @@ in6_ifattach_loopback( struct ifnet *ifp) /* must be IFT_LOOP */ { struct in6_aliasreq ifra; + struct in6_ifaddr *ia; int error; - bzero(&ifra, sizeof(ifra)); + bzero(&ifra, sizeof (ifra)); /* * in6_update_ifa() does not use ifra_name, but we accurately set it * for safety. */ - strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); + strncpy(ifra.ifra_name, if_name(ifp), sizeof (ifra.ifra_name)); - ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); + ifra.ifra_prefixmask.sin6_len = sizeof (struct sockaddr_in6); ifra.ifra_prefixmask.sin6_family = AF_INET6; ifra.ifra_prefixmask.sin6_addr = in6mask128; @@ -632,11 +561,11 @@ in6_ifattach_loopback( * Always initialize ia_dstaddr (= broadcast address) to loopback * address. Follows IPv4 practice - see in_ifinit(). */ - ifra.ifra_dstaddr.sin6_len = sizeof(struct sockaddr_in6); + ifra.ifra_dstaddr.sin6_len = sizeof (struct sockaddr_in6); ifra.ifra_dstaddr.sin6_family = AF_INET6; ifra.ifra_dstaddr.sin6_addr = in6addr_loopback; - ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6); + ifra.ifra_addr.sin6_len = sizeof (struct sockaddr_in6); ifra.ifra_addr.sin6_family = AF_INET6; ifra.ifra_addr.sin6_addr = in6addr_loopback; @@ -650,23 +579,24 @@ in6_ifattach_loopback( /* skip registration to the prefix list. XXX should be temporary. */ ifra.ifra_flags |= IN6_IFF_NOPFX; - /* - * We are sure that this is a newly assigned address, so we can set - * NULL to the 3rd arg. - */ - if ((error = in6_update_ifa(ifp, &ifra, NULL, 0, M_WAITOK)) != 0) { - nd6log((LOG_ERR, "in6_ifattach_loopback: failed to configure " - "the loopback address on %s (errno=%d)\n", - if_name(ifp), error)); + /* add the new interface address */ + error = in6_update_ifa(ifp, &ifra, 0, &ia); + if (error != 0) { + nd6log((LOG_ERR, + "%s: failed to configure loopback address %s (error=%d)\n", + __func__, if_name(ifp), error)); + VERIFY(ia == NULL); return (EADDRNOTAVAIL); } - return 0; + VERIFY(ia != NULL); + IFA_REMREF(&ia->ia_ifa); + return (0); } /* * compute NI group address, based on the current hostname setting. - * see draft-ietf-ipngwg-icmp-name-lookup-* (04 and later). + * see RFC 4620. * * when ifp == NULL, the caller is responsible for filling scopeid. */ @@ -685,13 +615,13 @@ in6_nigroup( char n[64]; /* a single label must not exceed 63 chars */ if (!namelen || !name) - return -1; + return (-1); p = name; while (p && *p && *p != '.' && p - name < namelen) p++; - if (p - name > sizeof(n) - 1) - return -1; /* label too long */ + if (p - name > sizeof (n) - 1) + return (-1); /* label too long */ l = p - name; strncpy(n, name, l); n[(int)l] = '\0'; @@ -700,148 +630,68 @@ in6_nigroup( *q = *q - 'A' + 'a'; } - /* generate 8 bytes of pseudo-random value. */ - bzero(&ctxt, sizeof(ctxt)); + /* generate 16 bytes of pseudo-random value. */ + bzero(&ctxt, sizeof (ctxt)); SHA1Init(&ctxt); - SHA1Update(&ctxt, &l, sizeof(l)); + SHA1Update(&ctxt, &l, sizeof (l)); SHA1Update(&ctxt, n, l); SHA1Final(digest, &ctxt); - bzero(in6, sizeof(*in6)); + bzero(in6, sizeof (*in6)); in6->s6_addr16[0] = IPV6_ADDR_INT16_MLL; in6->s6_addr8[11] = 2; - bcopy(digest, &in6->s6_addr32[3], sizeof(in6->s6_addr32[3])); + in6->s6_addr8[12] = 0xff; + /* copy first 3 bytes of prefix into address */ + bcopy(digest, &in6->s6_addr8[13], 3); if (in6_setscope(in6, ifp, NULL)) return (-1); /* XXX: should not fail */ - return 0; + return (0); } int in6_domifattach(struct ifnet *ifp) { - int error = 0; + int error; + + VERIFY(ifp != NULL); - if ((error = proto_plumb(PF_INET6, ifp))) { + error = proto_plumb(PF_INET6, ifp); + if (error != 0) { if (error != EEXIST) - log(LOG_ERR, "%s: proto_plumb returned %d if=%s%d\n", - __func__, error, ifp->if_name, ifp->if_unit); + log(LOG_ERR, "%s: proto_plumb returned %d if=%s\n", + __func__, error, if_name(ifp)); } else { - nd6_ifattach(ifp); - scope6_ifattach(ifp); + error = in6_ifattach_prelim(ifp); + if (error != 0) { + int errorx; + + log(LOG_ERR, + "%s: in6_ifattach_prelim returned %d if=%s%d\n", + __func__, error, ifp->if_name, ifp->if_unit); + + errorx = proto_unplumb(PF_INET6, ifp); + if (errorx != 0) /* XXX should not fail */ + log(LOG_ERR, + "%s: proto_unplumb returned %d if=%s%d\n", + __func__, errorx, ifp->if_name, + ifp->if_unit); + } } return (error); } - -/* - * XXX multiple loopback interface needs more care. for instance, - * nodelocal address needs to be configured onto only one of them. - * XXX multiple link-local address case - */ int -in6_ifattach( - struct ifnet *ifp, - struct ifnet *altifp, /* secondary EUI64 source */ - struct in6_aliasreq *ifra) +in6_ifattach_prelim(struct ifnet *ifp) { - static size_t if_indexlim = 8; - struct in6_ifaddr *ia; - struct in6_addr in6; - int error; - void *buf; - - lck_rw_lock_exclusive(&in6_ifs_rwlock); - /* - * We have some arrays that should be indexed by if_index. - * since if_index will grow dynamically, they should grow too. - * struct in6_ifstat **in6_ifstat - * struct icmp6_ifstat **icmp6_ifstat - */ - if (in6_ifstat == NULL || icmp6_ifstat == NULL || - if_index >= if_indexlim) { - while (if_index >= if_indexlim) - if_indexlim <<= 1; - } - - /* grow in6_ifstat */ - if (in6_ifstatmax < if_indexlim) { - size_t n; - caddr_t q; - - n = if_indexlim * sizeof(struct in6_ifstat *); - q = (caddr_t)_MALLOC(n, M_IFADDR, M_WAITOK|M_ZERO); - if (q == NULL) { - lck_rw_done(&in6_ifs_rwlock); - return ENOBUFS; - } - if (in6_ifstat) { - bcopy((caddr_t)in6_ifstat, q, - in6_ifstatmax * sizeof(struct in6_ifstat *)); - FREE((caddr_t)in6_ifstat, M_IFADDR); - } - in6_ifstat = (struct in6_ifstat **)(void *)q; - in6_ifstatmax = if_indexlim; - } - - if (in6_ifstat[ifp->if_index] == NULL) { - buf = _MALLOC(IN6_IFSTAT_ALLOC_SIZE, M_IFADDR, M_WAITOK); - if (buf == NULL) { - lck_rw_done(&in6_ifs_rwlock); - return ENOBUFS; - } - bzero(buf, IN6_IFSTAT_ALLOC_SIZE); - in6_ifstat[ifp->if_index] = (struct in6_ifstat *) - P2ROUNDUP((intptr_t)buf + sizeof(void *), sizeof(uint64_t)); - VERIFY(IS_P2ALIGNED(in6_ifstat[ifp->if_index], sizeof(uint64_t))); - } - lck_rw_done(&in6_ifs_rwlock); - - lck_rw_lock_exclusive(&icmp6_ifs_rwlock); - if (icmp6_ifstatmax < if_indexlim) { - size_t n; - caddr_t q; - - n = if_indexlim * sizeof(struct icmp6_ifstat *); - q = (caddr_t)_MALLOC(n, M_IFADDR, M_WAITOK|M_ZERO); - if (q == NULL) { - lck_rw_done(&icmp6_ifs_rwlock); - return ENOBUFS; - } - if (icmp6_ifstat) { - bcopy((caddr_t)icmp6_ifstat, q, - icmp6_ifstatmax * sizeof(struct icmp6_ifstat *)); - FREE((caddr_t)icmp6_ifstat, M_IFADDR); - } - icmp6_ifstat = (struct icmp6_ifstat **)(void *)q; - icmp6_ifstatmax = if_indexlim; - } - - if (icmp6_ifstat[ifp->if_index] == NULL) { - buf = _MALLOC(ICMP6_IFSTAT_ALLOC_SIZE, M_IFADDR, M_WAITOK); - if (buf == NULL) { - lck_rw_done(&icmp6_ifs_rwlock); - return ENOBUFS; - } - bzero(buf, ICMP6_IFSTAT_ALLOC_SIZE); - icmp6_ifstat[ifp->if_index] = (struct icmp6_ifstat *) - P2ROUNDUP((intptr_t)buf + sizeof(void *), sizeof(uint64_t)); - VERIFY(IS_P2ALIGNED(icmp6_ifstat[ifp->if_index], sizeof(uint64_t))); - } - lck_rw_done(&icmp6_ifs_rwlock); - - /* initialize NDP variables */ - if ((error = nd6_ifattach(ifp)) != 0) - return error; + struct in6_ifextra *ext; + void **pbuf, *base; + int error = 0; - /* initialize scope identifiers */ - if ((error = scope6_ifattach(ifp)) != 0) - return error; + VERIFY(ifp != NULL); - /* - * quirks based on interface type - */ + /* quirks based on interface type */ switch (ifp->if_type) { #if IFT_STF case IFT_STF: @@ -851,70 +701,258 @@ in6_ifattach( * linklocals for 6to4 interface, but there's no use and * it is rather harmful to have one. */ - goto statinit; + goto skipmcast; #endif default: break; } /* - * usually, we require multicast capability to the interface + * IPv6 requires multicast capability at the interface. + * (previously, this was a silent error.) */ if ((ifp->if_flags & IFF_MULTICAST) == 0) { nd6log((LOG_INFO, "in6_ifattach: ", - "%s is not multicast capable, IPv6 not enabled\n", + "%s is not multicast capable, IPv6 not enabled\n", if_name(ifp))); - return EINVAL; + return (EINVAL); } - /* - * assign loopback address for loopback interface. - * XXX multiple loopback interface case. - */ +#if IFT_STF +skipmcast: +#endif + + if (ifp->if_inet6data == NULL) { + ext = (struct in6_ifextra *)_MALLOC(in6_extra_size, M_IFADDR, + M_WAITOK|M_ZERO); + if (!ext) + return (ENOMEM); + base = (void *)P2ROUNDUP((intptr_t)ext + sizeof(uint64_t), + sizeof(uint64_t)); + VERIFY(((intptr_t)base + in6_extra_size) <= + ((intptr_t)ext + in6_extra_bufsize)); + pbuf = (void **)((intptr_t)base - sizeof(void *)); + *pbuf = ext; + ifp->if_inet6data = base; + VERIFY(IS_P2ALIGNED(ifp->if_inet6data, sizeof(uint64_t))); + } else { + /* + * Since the structure is never freed, we need to zero out + * some of its members. We avoid zeroing out the scope6 + * structure on purpose because other threads might be + * using its contents. + */ + bzero(&IN6_IFEXTRA(ifp)->icmp6_ifstat, + sizeof(IN6_IFEXTRA(ifp)->icmp6_ifstat)); + bzero(&IN6_IFEXTRA(ifp)->in6_ifstat, + sizeof(IN6_IFEXTRA(ifp)->in6_ifstat)); + } + + /* initialize NDP variables */ + if ((error = nd6_ifattach(ifp)) != 0) + return (error); + + scope6_ifattach(ifp); + + /* initialize loopback interface address */ if ((ifp->if_flags & IFF_LOOPBACK) != 0) { - struct in6_ifaddr *ia6 = NULL; - if (!OSCompareAndSwap(0, 1, (UInt32 *)&loopattach6_done)) { - in6 = in6addr_loopback; - if ((ia6 = in6ifa_ifpwithaddr(ifp, &in6)) == NULL) { - if (in6_ifattach_loopback(ifp) != 0) { - OSCompareAndSwap(1, 0, (UInt32 *)&loopattach6_done); - return EINVAL; - } - } - else { - IFA_REMREF(&ia6->ia_ifa); - } + error = in6_ifattach_loopback(ifp); + if (error != 0) { + log(LOG_ERR, "%s: in6_ifattach_loopback returned %d\n", + __func__, error, ifp->if_name, + ifp->if_unit); + return (error); } } + /* update dynamically. */ + if (in6_maxmtu < ifp->if_mtu) + in6_maxmtu = ifp->if_mtu; + + VERIFY(error == 0); + return (0); +} + +int +in6_ifattach_aliasreq(struct ifnet *ifp, struct ifnet *altifp, + struct in6_aliasreq *ifra0) +{ + int error; + struct in6_ifaddr *ia6; + struct in6_aliasreq ifra; + + error = in6_ifattach_prelim(ifp); + if (error != 0) + return (error); + + if (!ip6_auto_linklocal) + return (0); + + /* assign a link-local address, only if there isn't one here already. */ + ia6 = in6ifa_ifpforlinklocal(ifp, 0); + if (ia6 != NULL) { + IFA_REMREF(&ia6->ia_ifa); + return (0); + } + + bzero(&ifra, sizeof (ifra)); + /* - * assign a link-local address, if there's none. + * in6_update_ifa() does not use ifra_name, but we accurately set it + * for safety. */ - if (ip6_auto_linklocal) { - ia = in6ifa_ifpforlinklocal(ifp, 0); - if (ia == NULL) { - if (in6_ifattach_linklocal(ifp, altifp, ifra) == 0) { - /* linklocal address assigned */ - } else { - nd6log((LOG_INFO, "in6_ifattach: %s failed to " - "attach a linklocal address.\n", + strncpy(ifra.ifra_name, if_name(ifp), sizeof (ifra.ifra_name)); + + /* Initialize the IPv6 interface address in our in6_aliasreq block */ + if ((ifp->if_eflags & IFEF_NOAUTOIPV6LL) != 0 && ifra0 != NULL) { + /* interface provided both addresses for us */ + struct sockaddr_in6 *sin6 = &ifra.ifra_addr; + struct in6_addr *in6 = &sin6->sin6_addr; + boolean_t ok = TRUE; + + bcopy(&ifra0->ifra_addr, sin6, sizeof (struct sockaddr_in6)); + + if (sin6->sin6_family != AF_INET6 || sin6->sin6_port != 0) + ok = FALSE; + if (ok && (in6->s6_addr16[0] != htons(0xfe80))) + ok = FALSE; + if (ok) { + if (sin6->sin6_scope_id == 0 && in6->s6_addr16[1] == 0) + in6->s6_addr16[1] = htons(ifp->if_index); + else if (sin6->sin6_scope_id != 0 && + sin6->sin6_scope_id != ifp->if_index) + ok = FALSE; + else if (in6->s6_addr16[1] != 0 && + ntohs(in6->s6_addr16[1]) != ifp->if_index) + ok = FALSE; + } + if (ok && (in6->s6_addr32[1] != 0)) + ok = FALSE; + if (!ok) + return (EINVAL); + } else { + ifra.ifra_addr.sin6_family = AF_INET6; + ifra.ifra_addr.sin6_len = sizeof (struct sockaddr_in6); + ifra.ifra_addr.sin6_addr.s6_addr16[0] = htons(0xfe80); + ifra.ifra_addr.sin6_addr.s6_addr16[1] = htons(ifp->if_index); + ifra.ifra_addr.sin6_addr.s6_addr32[1] = 0; + if ((ifp->if_flags & IFF_LOOPBACK) != 0) { + ifra.ifra_addr.sin6_addr.s6_addr32[2] = 0; + ifra.ifra_addr.sin6_addr.s6_addr32[3] = htonl(1); + } else { + if (in6_select_iid_from_all_hw(ifp, altifp, + &ifra.ifra_addr.sin6_addr) != 0) { + nd6log((LOG_ERR, "%s: no IID available\n", if_name(ifp))); - /* failed to assign linklocal address. bark? */ + return (EADDRNOTAVAIL); } - } else { - IFA_REMREF(&ia->ia_ifa); } } -#if IFT_STF /* XXX */ -statinit: -#endif + if (in6_setscope(&ifra.ifra_addr.sin6_addr, ifp, NULL)) + return (EADDRNOTAVAIL); - /* update dynamically. */ - if (in6_maxmtu < ifp->if_mtu) - in6_maxmtu = ifp->if_mtu; + /* Set the prefix mask */ + ifra.ifra_prefixmask.sin6_len = sizeof (struct sockaddr_in6); + ifra.ifra_prefixmask.sin6_family = AF_INET6; + ifra.ifra_prefixmask.sin6_addr = in6mask64; - return 0; + /* link-local addresses should NEVER expire. */ + ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME; + ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME; + + /* Attach the link-local address */ + if (in6_ifattach_linklocal(ifp, &ifra) != 0) { + nd6log((LOG_INFO, + "%s: %s could not attach link-local address.\n", + __func__, if_name(ifp))); + /* NB: not an error */ + } + + return (0); +} + +int +in6_ifattach_llstartreq(struct ifnet *ifp, struct in6_llstartreq *llsr) +{ + struct in6_aliasreq ifra; + struct in6_ifaddr *ia6; + struct nd_ifinfo *ndi; + int error; + + VERIFY(llsr != NULL); + + error = in6_ifattach_prelim(ifp); + if (error != 0) + return (error); + + if (!ip6_auto_linklocal || (ifp->if_eflags & IFEF_NOAUTOIPV6LL) != 0) + return (0); + + if (nd6_send_opstate == ND6_SEND_OPMODE_DISABLED) + return (ENXIO); + + lck_rw_lock_shared(nd_if_rwlock); + ndi = ND_IFINFO(ifp); + VERIFY(ndi != NULL && ndi->initialized); + if ((ndi->flags & ND6_IFF_INSECURE) != 0) { + lck_rw_done(nd_if_rwlock); + return (ENXIO); + } + lck_rw_done(nd_if_rwlock); + + /* assign a link-local address, only if there isn't one here already. */ + ia6 = in6ifa_ifpforlinklocal(ifp, 0); + if (ia6 != NULL) { + IFA_REMREF(&ia6->ia_ifa); + return (0); + } + + bzero(&ifra, sizeof (ifra)); + strncpy(ifra.ifra_name, if_name(ifp), sizeof (ifra.ifra_name)); + + ifra.ifra_addr.sin6_family = AF_INET6; + ifra.ifra_addr.sin6_len = sizeof (struct sockaddr_in6); + ifra.ifra_addr.sin6_addr.s6_addr16[0] = htons(0xfe80); + ifra.ifra_addr.sin6_addr.s6_addr16[1] = htons(ifp->if_index); + ifra.ifra_addr.sin6_addr.s6_addr32[1] = 0; + ifra.ifra_flags = IN6_IFF_SECURED; + + in6_cga_node_lock(); + if (in6_cga_generate(&llsr->llsr_cgaprep, 0, + &ifra.ifra_addr.sin6_addr)) { + in6_cga_node_unlock(); + return (EADDRNOTAVAIL); + } + in6_cga_node_unlock(); + + if (in6_setscope(&ifra.ifra_addr.sin6_addr, ifp, NULL)) + return (EADDRNOTAVAIL); + + /* Set the prefix mask */ + ifra.ifra_prefixmask.sin6_len = sizeof (struct sockaddr_in6); + ifra.ifra_prefixmask.sin6_family = AF_INET6; + ifra.ifra_prefixmask.sin6_addr = in6mask64; + + /* + * link-local addresses should NEVER expire, but cryptographic + * ones may have finite preferred lifetime [if it's important to + * keep them from being used by applications as persistent device + * identifiers]. + */ + ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME; + ifra.ifra_lifetime.ia6t_pltime = llsr->llsr_lifetime.ia6t_pltime; + + /* Attach the link-local address */ + if (in6_ifattach_linklocal(ifp, &ifra) != 0) { + /* NB: not an error */ + nd6log((LOG_INFO, + "%s: %s could not attach link-local address.\n", + __func__, if_name(ifp))); + } + + VERIFY(error == 0); + return (error); } /* @@ -1045,8 +1083,8 @@ in6_ifdetach(struct ifnet *ifp) /* * release another refcnt for the link from in6_ifaddrs. * Do this only if it's not already unlinked in the event - * that we lost the race, since in6_ifaddr_rwlock was momentarily - * dropped above. + * that we lost the race, since in6_ifaddr_rwlock was + * momentarily dropped above. */ if (unlinked) IFA_REMREF(ifa); @@ -1063,6 +1101,9 @@ in6_ifdetach(struct ifnet *ifp) } ifnet_lock_done(ifp); + /* invalidate route caches */ + routegenid_inet6_update(); + /* * remove neighbor management table. we call it twice just to make * sure we nuke everything. maybe we need just one call. @@ -1074,15 +1115,15 @@ in6_ifdetach(struct ifnet *ifp) nd6_purge(ifp); /* remove route to link-local allnodes multicast (ff02::1) */ - bzero(&sin6, sizeof(sin6)); - sin6.sin6_len = sizeof(struct sockaddr_in6); + bzero(&sin6, sizeof (sin6)); + sin6.sin6_len = sizeof (struct sockaddr_in6); sin6.sin6_family = AF_INET6; sin6.sin6_addr = in6addr_linklocal_allnodes; sin6.sin6_addr.s6_addr16[1] = htons(ifp->if_index); rt = rtalloc1((struct sockaddr *)&sin6, 0, 0); if (rt != NULL) { RT_LOCK(rt); - if (rt->rt_ifp == ifp) { + if (rt->rt_ifp == ifp) { /* * Prevent another thread from modifying rt_key, * rt_gateway via rt_setgate() after the rt_lock @@ -1100,11 +1141,8 @@ in6_ifdetach(struct ifnet *ifp) } void -in6_get_tmpifid( - struct ifnet *ifp, - u_int8_t *retbuf, - const u_int8_t *baseid, - int generate) +in6_iid_mktmp(struct ifnet *ifp, u_int8_t *retbuf, const u_int8_t *baseid, + int generate) { u_int8_t nullbuf[8]; struct nd_ifinfo *ndi; @@ -1113,19 +1151,20 @@ in6_get_tmpifid( ndi = ND_IFINFO(ifp); VERIFY(ndi != NULL && ndi->initialized); lck_mtx_lock(&ndi->lock); - bzero(nullbuf, sizeof(nullbuf)); - if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) == 0) { + bzero(nullbuf, sizeof (nullbuf)); + if (bcmp(ndi->randomid, nullbuf, sizeof (nullbuf)) == 0) { /* we've never created a random ID. Create a new one. */ generate = 1; } if (generate) { - bcopy(baseid, ndi->randomseed1, sizeof(ndi->randomseed1)); + bcopy(baseid, ndi->randomseed1, sizeof (ndi->randomseed1)); - /* generate_tmp_ifid will update seedn and buf */ - (void)generate_tmp_ifid(ndi->randomseed0, ndi->randomseed1, - ndi->randomid); + /* in6_generate_tmp_iid will update seedn and buf */ + (void) in6_generate_tmp_iid(ndi->randomseed0, ndi->randomseed1, + ndi->randomid); } + bcopy(ndi->randomid, retbuf, 8); lck_mtx_unlock(&ndi->lock); lck_rw_done(nd_if_rwlock); @@ -1139,12 +1178,11 @@ in6_tmpaddrtimer(void *arg) struct nd_ifinfo *ndi; u_int8_t nullbuf[8]; - timeout(in6_tmpaddrtimer, (caddr_t)0, - (ip6_temp_preferred_lifetime - ip6_desync_factor - - ip6_temp_regen_advance) * hz); + timeout(in6_tmpaddrtimer, (caddr_t)0, (ip6_temp_preferred_lifetime - + ip6_desync_factor - ip6_temp_regen_advance) * hz); lck_rw_lock_shared(nd_if_rwlock); - bzero(nullbuf, sizeof(nullbuf)); + bzero(nullbuf, sizeof (nullbuf)); for (i = 1; i < if_index + 1; i++) { if (!nd_ifinfo || i >= nd_ifinfo_indexlim) break; @@ -1152,14 +1190,13 @@ in6_tmpaddrtimer(void *arg) if (!ndi->initialized) continue; lck_mtx_lock(&ndi->lock); - if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) != 0) { + if (bcmp(ndi->randomid, nullbuf, sizeof (nullbuf)) != 0) { /* * We've been generating a random ID on this interface. * Create a new one. */ - (void)generate_tmp_ifid(ndi->randomseed0, - ndi->randomseed1, - ndi->randomid); + (void) in6_generate_tmp_iid(ndi->randomseed0, + ndi->randomseed1, ndi->randomid); } lck_mtx_unlock(&ndi->lock); } diff --git a/bsd/netinet6/in6_ifattach.h b/bsd/netinet6/in6_ifattach.h index 40ffa0379..394636c06 100644 --- a/bsd/netinet6/in6_ifattach.h +++ b/bsd/netinet6/in6_ifattach.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2003-2010 Apple Inc. All rights reserved. + * Copyright (c) 2003-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,10 +22,9 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* $KAME: in6_ifattach.h,v 1.4 2000/02/22 14:04:18 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -57,16 +56,20 @@ */ #ifndef _NETINET6_IN6_IFATTACH_H_ -#define _NETINET6_IN6_IFATTACH_H_ +#define _NETINET6_IN6_IFATTACH_H_ #include -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE extern int in6_domifattach(struct ifnet *); -extern int in6_ifattach(struct ifnet *, struct ifnet *, struct in6_aliasreq *); +extern int in6_ifattach_prelim(struct ifnet *); +extern int in6_ifattach_aliasreq(struct ifnet *, struct ifnet *, + struct in6_aliasreq *); +extern int in6_ifattach_llstartreq(struct ifnet *, struct in6_llstartreq *); extern void in6_ifdetach(struct ifnet *); -extern void in6_get_tmpifid(struct ifnet *, u_int8_t *, const u_int8_t *, int); +extern int in6_iid_from_hw(struct ifnet *, struct in6_addr *); +extern void in6_iid_mktmp(struct ifnet *, u_int8_t *, const u_int8_t *, int); extern void in6_tmpaddrtimer(void *); extern int in6_nigroup(struct ifnet *, const char *, int, struct in6_addr *); -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET6_IN6_IFATTACH_H_ */ diff --git a/bsd/netinet6/in6_mcast.c b/bsd/netinet6/in6_mcast.c index 715403cf8..a76ce4fe0 100644 --- a/bsd/netinet6/in6_mcast.c +++ b/bsd/netinet6/in6_mcast.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 Apple Inc. All rights reserved. + * Copyright (c) 2010-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -246,9 +246,6 @@ static void ip6ms_free(struct ip6_msource *); static struct in6_msource *in6ms_alloc(int); static void in6ms_free(struct in6_msource *); -#define IM6O_CAST_TO_NONCONST(x) ((struct ip6_moptions *)(void *)(uintptr_t)x) -#define IN6M_CAST_TO_NONCONST(x) ((struct in6_multi *)(void *)(uintptr_t)x) - /* * IPv6 source tree comparison function. * @@ -349,7 +346,7 @@ im6o_match_group(const struct ip6_moptions *imo, const struct ifnet *ifp, int idx; int nmships; - IM6O_LOCK_ASSERT_HELD(IM6O_CAST_TO_NONCONST(imo)); + IM6O_LOCK_ASSERT_HELD(__DECONST(struct ip6_moptions *, imo)); gsin6 = (struct sockaddr_in6 *)(uintptr_t)(size_t)group; @@ -397,7 +394,7 @@ im6o_match_source(const struct ip6_moptions *imo, const size_t gidx, struct ip6_msource *ims; const sockunion_t *psa; - IM6O_LOCK_ASSERT_HELD(IM6O_CAST_TO_NONCONST(imo)); + IM6O_LOCK_ASSERT_HELD(__DECONST(struct ip6_moptions *, imo)); VERIFY(src->sa_family == AF_INET6); VERIFY(gidx != (size_t)-1 && gidx < imo->im6o_num_memberships); @@ -429,7 +426,7 @@ im6o_mc_filter(const struct ip6_moptions *imo, const struct ifnet *ifp, struct in6_msource *ims; int mode; - IM6O_LOCK_ASSERT_HELD(IM6O_CAST_TO_NONCONST(imo)); + IM6O_LOCK_ASSERT_HELD(__DECONST(struct ip6_moptions *, imo)); VERIFY(ifp != NULL); gidx = im6o_match_group(imo, ifp, group); @@ -795,7 +792,8 @@ im6f_rollback(struct in6_mfilter *imf) lims->im6sl_st[1] = lims->im6sl_st[0]; } else { /* revert source added t1 */ - MLD_PRINTF(("%s: free in6ms %p\n", __func__, lims)); + MLD_PRINTF(("%s: free in6ms 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(lims))); RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims); in6ms_free(lims); imf->im6f_nsrc--; @@ -855,7 +853,8 @@ im6f_reap(struct in6_mfilter *imf) lims = (struct in6_msource *)ims; if ((lims->im6sl_st[0] == MCAST_UNDEFINED) && (lims->im6sl_st[1] == MCAST_UNDEFINED)) { - MLD_PRINTF(("%s: free in6ms %p\n", __func__, lims)); + MLD_PRINTF(("%s: free in6ms 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(lims))); RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims); in6ms_free(lims); imf->im6f_nsrc--; @@ -876,7 +875,8 @@ im6f_purge(struct in6_mfilter *imf) RB_FOREACH_SAFE(ims, ip6_msource_tree, &imf->im6f_sources, tims) { lims = (struct in6_msource *)ims; - MLD_PRINTF(("%s: free in6ms %p\n", __func__, lims)); + MLD_PRINTF(("%s: free in6ms 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(lims))); RB_REMOVE(ip6_msource_tree, &imf->im6f_sources, ims); in6ms_free(lims); imf->im6f_nsrc--; @@ -916,8 +916,8 @@ in6m_get_source(struct in6_multi *inm, const struct in6_addr *addr, RB_INSERT(ip6_msource_tree, &inm->in6m_srcs, nims); ++inm->in6m_nsrc; ims = nims; - MLD_PRINTF(("%s: allocated %s as %p\n", __func__, - ip6_sprintf(addr), ims)); + MLD_PRINTF(("%s: allocated %s as 0x%llx\n", __func__, + ip6_sprintf(addr), (uint64_t)VM_KERNEL_ADDRPERM(ims))); } *pims = ims; @@ -936,7 +936,7 @@ uint8_t im6s_get_mode(const struct in6_multi *inm, const struct ip6_msource *ims, uint8_t t) { - IN6M_LOCK_ASSERT_HELD(IN6M_CAST_TO_NONCONST(inm)); + IN6M_LOCK_ASSERT_HELD(__DECONST(struct in6_multi *, inm)); t = !!t; if (inm->in6m_st[t].iss_ex > 0 && @@ -1107,7 +1107,9 @@ in6m_merge(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf) inm->in6m_st[1].iss_asm++; } - MLD_PRINTF(("%s: merged imf %p to inm %p\n", __func__, imf, inm)); + MLD_PRINTF(("%s: merged imf 0x%llx to inm 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(imf), + (uint64_t)VM_KERNEL_ADDRPERM(inm))); in6m_print(inm); out_reap: @@ -1129,7 +1131,8 @@ in6m_commit(struct in6_multi *inm) IN6M_LOCK_ASSERT_HELD(inm); - MLD_PRINTF(("%s: commit inm %p\n", __func__, inm)); + MLD_PRINTF(("%s: commit inm 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(inm))); MLD_PRINTF(("%s: pre commit:\n", __func__)); in6m_print(inm); @@ -1154,7 +1157,8 @@ in6m_reap(struct in6_multi *inm) ims->im6s_st[1].ex > 0 || ims->im6s_st[1].in > 0 || ims->im6s_stp != 0) continue; - MLD_PRINTF(("%s: free ims %p\n", __func__, ims)); + MLD_PRINTF(("%s: free ims 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(ims))); RB_REMOVE(ip6_msource_tree, &inm->in6m_srcs, ims); ip6ms_free(ims); inm->in6m_nsrc--; @@ -1172,7 +1176,8 @@ in6m_purge(struct in6_multi *inm) IN6M_LOCK_ASSERT_HELD(inm); RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs, tims) { - MLD_PRINTF(("%s: free ims %p\n", __func__, ims)); + MLD_PRINTF(("%s: free ims 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(ims))); RB_REMOVE(ip6_msource_tree, &inm->in6m_srcs, ims); ip6ms_free(ims); inm->in6m_nsrc--; @@ -1199,8 +1204,6 @@ in6_joingroup(struct ifnet *ifp, struct in6_addr *mcaddr, return (NULL); } - delay = (delay * PR_SLOWHZ) / hz; - error = in6_mc_join(ifp, mcaddr, NULL, &imm->i6mm_maddr, delay); if (error) { *errorp = error; @@ -1244,6 +1247,7 @@ in6_mc_join(struct ifnet *ifp, const struct in6_addr *mcaddr, struct in6_mfilter timf; struct in6_multi *inm = NULL; int error = 0; + struct mld_tparams mtp; /* * Sanity: Check scope zone ID was set for ifp, if and @@ -1255,9 +1259,11 @@ in6_mc_join(struct ifnet *ifp, const struct in6_addr *mcaddr, VERIFY(mcaddr->s6_addr16[1] != 0); } - MLD_PRINTF(("%s: join %s on %p(%s%d))\n", __func__, - ip6_sprintf(mcaddr), ifp, ifp->if_name, ifp->if_unit)); + MLD_PRINTF(("%s: join %s on 0x%llx(%s))\n", __func__, + ip6_sprintf(mcaddr), (uint64_t)VM_KERNEL_ADDRPERM(ifp), + if_name(ifp))); + bzero(&mtp, sizeof (mtp)); *pinm = NULL; /* @@ -1285,15 +1291,17 @@ in6_mc_join(struct ifnet *ifp, const struct in6_addr *mcaddr, } MLD_PRINTF(("%s: doing mld downcall\n", __func__)); - error = mld_change_state(inm, delay); + error = mld_change_state(inm, &mtp, delay); if (error) { MLD_PRINTF(("%s: failed to update source\n", __func__)); + im6f_rollback(imf); goto out_in6m_release; } out_in6m_release: if (error) { - MLD_PRINTF(("%s: dropping ref on %p\n", __func__, inm)); + MLD_PRINTF(("%s: dropping ref on 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(inm))); IN6M_UNLOCK(inm); IN6M_REMREF(inm); } else { @@ -1301,6 +1309,9 @@ out_in6m_release: *pinm = inm; /* keep refcount from in6_mc_get() */ } + /* schedule timer now that we've dropped the lock(s) */ + mld_set_timeout(&mtp); + return (error); } @@ -1322,7 +1333,9 @@ in6_mc_leave(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf) { struct in6_mfilter timf; int error, lastref; + struct mld_tparams mtp; + bzero(&mtp, sizeof (mtp)); error = 0; IN6M_LOCK_ASSERT_NOTHELD(inm); @@ -1330,10 +1343,10 @@ in6_mc_leave(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf) in6_multihead_lock_exclusive(); IN6M_LOCK(inm); - MLD_PRINTF(("%s: leave inm %p, %s/%s%d, imf %p\n", __func__, - inm, ip6_sprintf(&inm->in6m_addr), + MLD_PRINTF(("%s: leave inm 0x%llx, %s/%s%d, imf 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(inm), ip6_sprintf(&inm->in6m_addr), (in6m_is_ifp_detached(inm) ? "null" : inm->in6m_ifp->if_name), - inm->in6m_ifp->if_unit, imf)); + inm->in6m_ifp->if_unit, (uint64_t)VM_KERNEL_ADDRPERM(imf))); /* * If no imf was specified (i.e. kernel consumer), @@ -1357,7 +1370,7 @@ in6_mc_leave(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf) KASSERT(error == 0, ("%s: failed to merge inm state\n", __func__)); MLD_PRINTF(("%s: doing mld downcall\n", __func__)); - error = mld_change_state(inm, 0); + error = mld_change_state(inm, &mtp, 0); #if MLD_DEBUG if (error) MLD_PRINTF(("%s: failed mld downcall\n", __func__)); @@ -1371,6 +1384,9 @@ in6_mc_leave(struct in6_multi *inm, /*const*/ struct in6_mfilter *imf) if (lastref) IN6M_REMREF(inm); /* for in6_multihead list */ + /* schedule timer now that we've dropped the lock(s) */ + mld_set_timeout(&mtp); + return (error); } @@ -1396,7 +1412,9 @@ in6p_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) size_t idx; uint16_t fmode; int error, doblock; + struct mld_tparams mtp; + bzero(&mtp, sizeof (mtp)); ifp = NULL; error = 0; doblock = 0; @@ -1525,7 +1543,7 @@ in6p_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) } MLD_PRINTF(("%s: doing mld downcall\n", __func__)); - error = mld_change_state(inm, 0); + error = mld_change_state(inm, &mtp, 0); IN6M_UNLOCK(inm); #if MLD_DEBUG if (error) @@ -1543,6 +1561,10 @@ out_im6f_rollback: out_imo_locked: IM6O_UNLOCK(imo); IM6O_REMREF(imo); /* from in6p_findmoptions() */ + + /* schedule timer now that we've dropped the lock(s) */ + mld_set_timeout(&mtp); + return (error); } @@ -1664,8 +1686,8 @@ in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt) return (EADDRNOTAVAIL); if ((size_t) msfr.msfr_nsrcs > - SIZE_MAX / sizeof(struct sockaddr_storage)) - msfr.msfr_nsrcs = SIZE_MAX / sizeof(struct sockaddr_storage); + UINT32_MAX / sizeof(struct sockaddr_storage)) + msfr.msfr_nsrcs = UINT32_MAX / sizeof(struct sockaddr_storage); if (msfr.msfr_nsrcs > in6_mcast_maxsocksrc) msfr.msfr_nsrcs = in6_mcast_maxsocksrc; @@ -1786,9 +1808,9 @@ ip6_getmoptions(struct inpcb *inp, struct sockopt *sopt) * If socket is neither of type SOCK_RAW or SOCK_DGRAM, * or is a divert socket, reject it. */ - if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || - (inp->inp_socket->so_proto->pr_type != SOCK_RAW && - inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { + if (SOCK_PROTO(inp->inp_socket) == IPPROTO_DIVERT || + (SOCK_TYPE(inp->inp_socket) != SOCK_RAW && + SOCK_TYPE(inp->inp_socket) != SOCK_DGRAM)) { return (EOPNOTSUPP); } @@ -1883,8 +1905,8 @@ in6p_lookup_mcast_ifp(const struct inpcb *in6p, if (ro6.ro_rt != NULL) { ifp = ro6.ro_rt->rt_ifp; VERIFY(ifp != NULL); - rtfree(ro6.ro_rt); } + ROUTE_RELEASE(&ro6); return (ifp); } @@ -1941,7 +1963,9 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt) size_t idx; int error, is_new; uint32_t scopeid = 0; + struct mld_tparams mtp; + bzero(&mtp, sizeof (mtp)); ifp = NULL; imf = NULL; error = 0; @@ -2011,8 +2035,9 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt) ifp = ifindex2ifnet[mreq.ipv6mr_interface]; ifnet_head_done(); } - MLD_PRINTF(("%s: ipv6mr_interface = %d, ifp = %p\n", - __func__, mreq.ipv6mr_interface, ifp)); + MLD_PRINTF(("%s: ipv6mr_interface = %d, ifp = 0x%llx\n", + __func__, mreq.ipv6mr_interface, + (uint64_t)VM_KERNEL_ADDRPERM(ifp))); break; } @@ -2236,7 +2261,7 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt) goto out_im6f_rollback; } MLD_PRINTF(("%s: doing mld downcall\n", __func__)); - error = mld_change_state(inm, 0); + error = mld_change_state(inm, &mtp, 0); IN6M_UNLOCK(inm); if (error) { MLD_PRINTF(("%s: failed mld downcall\n", @@ -2266,6 +2291,10 @@ out_im6o_free: out_imo_locked: IM6O_UNLOCK(imo); IM6O_REMREF(imo); /* from in6p_findmoptions() */ + + /* schedule timer now that we've dropped the lock(s) */ + mld_set_timeout(&mtp); + return (error); } @@ -2286,7 +2315,9 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt) uint32_t ifindex = 0; size_t idx; int error, is_final; + struct mld_tparams mtp; + bzero(&mtp, sizeof (mtp)); ifp = NULL; error = 0; is_final = 1; @@ -2440,7 +2471,8 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt) } VERIFY(ifp != NULL); - MLD_PRINTF(("%s: ifp = %p\n", __func__, ifp)); + MLD_PRINTF(("%s: ifp = 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(ifp))); /* * Find the membership in the membership array. @@ -2478,7 +2510,7 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt) } ims = im6o_match_source(imo, idx, &ssa->sa); if (ims == NULL) { - MLD_PRINTF(("%s: source %p %spresent\n", __func__, + MLD_PRINTF(("%s: source %s %spresent\n", __func__, ip6_sprintf(&ssa->sin6.sin6_addr), "not ")); error = EADDRNOTAVAIL; @@ -2516,7 +2548,7 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt) } MLD_PRINTF(("%s: doing mld downcall\n", __func__)); - error = mld_change_state(inm, 0); + error = mld_change_state(inm, &mtp, 0); if (error) { MLD_PRINTF(("%s: failed mld downcall\n", __func__)); } @@ -2546,6 +2578,10 @@ out_im6f_rollback: out_locked: IM6O_UNLOCK(imo); IM6O_REMREF(imo); /* from in6p_findmoptions() */ + + /* schedule timer now that we've dropped the lock(s) */ + mld_set_timeout(&mtp); + return (error); } @@ -2611,7 +2647,10 @@ in6p_set_source_filters(struct inpcb *inp, struct sockopt *sopt) struct in6_multi *inm; size_t idx; int error; - user_addr_t tmp_ptr; + user_addr_t tmp_ptr; + struct mld_tparams mtp; + + bzero(&mtp, sizeof (mtp)); if (IS_64BIT_PROCESS(current_proc())) { error = sooptcopyin(sopt, &msfr64, @@ -2632,8 +2671,8 @@ in6p_set_source_filters(struct inpcb *inp, struct sockopt *sopt) } if ((size_t) msfr.msfr_nsrcs > - SIZE_MAX / sizeof(struct sockaddr_storage)) - msfr.msfr_nsrcs = SIZE_MAX / sizeof(struct sockaddr_storage); + UINT32_MAX / sizeof(struct sockaddr_storage)) + msfr.msfr_nsrcs = UINT32_MAX / sizeof(struct sockaddr_storage); if (msfr.msfr_nsrcs > in6_mcast_maxsocksrc) return (ENOBUFS); @@ -2783,7 +2822,7 @@ in6p_set_source_filters(struct inpcb *inp, struct sockopt *sopt) } MLD_PRINTF(("%s: doing mld downcall\n", __func__)); - error = mld_change_state(inm, 0); + error = mld_change_state(inm, &mtp, 0); IN6M_UNLOCK(inm); #if MLD_DEBUG if (error) @@ -2802,6 +2841,9 @@ out_imo_locked: IM6O_UNLOCK(imo); IM6O_REMREF(imo); /* from in6p_findmoptions() */ + /* schedule timer now that we've dropped the lock(s) */ + mld_set_timeout(&mtp); + return (error); } @@ -2827,9 +2869,9 @@ ip6_setmoptions(struct inpcb *inp, struct sockopt *sopt) * If socket is neither of type SOCK_RAW or SOCK_DGRAM, * or is a divert socket, reject it. */ - if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || - (inp->inp_socket->so_proto->pr_type != SOCK_RAW && - inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) + if (SOCK_PROTO(inp->inp_socket) == IPPROTO_DIVERT || + (SOCK_TYPE(inp->inp_socket) != SOCK_RAW && + SOCK_TYPE(inp->inp_socket) != SOCK_DGRAM)) return (EOPNOTSUPP); switch (sopt->sopt_name) { @@ -3001,7 +3043,8 @@ sysctl_ip6_mcast_filters SYSCTL_HANDLER_ARGS break; /* abort */ } RB_FOREACH(ims, ip6_msource_tree, &inm->in6m_srcs) { - MLD_PRINTF(("%s: visit node %p\n", __func__, ims)); + MLD_PRINTF(("%s: visit node 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(ims))); /* * Only copy-out sources which are in-mode. */ @@ -3430,6 +3473,7 @@ in6m_mode_str(const int mode) static const char *in6m_statestrs[] = { "not-member\n", "silent\n", + "reporting\n", "idle\n", "lazy\n", "sleeping\n", @@ -3455,25 +3499,25 @@ in6m_print(const struct in6_multi *inm) { int t; - IN6M_LOCK_ASSERT_HELD(IN6M_CAST_TO_NONCONST(inm)); + IN6M_LOCK_ASSERT_HELD(__DECONST(struct in6_multi *, inm)); if (mld_debug == 0) return; - printf("%s: --- begin in6m %p ---\n", __func__, inm); - printf("addr %s ifp %p(%s%d) ifma %p\n", + printf("%s: --- begin in6m 0x%llx ---\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(inm)); + printf("addr %s ifp 0x%llx(%s) ifma 0x%llx\n", ip6_sprintf(&inm->in6m_addr), - inm->in6m_ifp, - inm->in6m_ifp->if_name, - inm->in6m_ifp->if_unit, - inm->in6m_ifma); + (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp), + if_name(inm->in6m_ifp), + (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifma)); printf("timer %u state %s refcount %u scq.len %u\n", inm->in6m_timer, in6m_state_str(inm->in6m_state), inm->in6m_refcount, inm->in6m_scq.ifq_len); - printf("mli %p nsrc %lu sctimer %u scrv %u\n", - inm->in6m_mli, + printf("mli 0x%llx nsrc %lu sctimer %u scrv %u\n", + (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_mli), inm->in6m_nsrc, inm->in6m_sctimer, inm->in6m_scrv); @@ -3485,7 +3529,8 @@ in6m_print(const struct in6_multi *inm) inm->in6m_st[t].iss_in, inm->in6m_st[t].iss_rec); } - printf("%s: --- end in6m %p ---\n", __func__, inm); + printf("%s: --- end in6m 0x%llx ---\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(inm)); } #else diff --git a/bsd/netinet6/in6_pcb.c b/bsd/netinet6/in6_pcb.c index 4c5925528..e340b968a 100644 --- a/bsd/netinet6/in6_pcb.c +++ b/bsd/netinet6/in6_pcb.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * Copyright (c) 2003-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -136,100 +136,101 @@ #include #endif /* IPSEC */ -struct in6_addr zeroin6_addr; - /* - in6_pcblookup_local_and_cleanup does everything - in6_pcblookup_local does but it checks for a socket - that's going away. Since we know that the lock is - held read+write when this function is called, we - can safely dispose of this socket like the slow - timer would usually do and return NULL. This is - great for bind. -*/ -static struct inpcb* -in6_pcblookup_local_and_cleanup( - struct inpcbinfo *pcbinfo, - struct in6_addr *laddr, - u_int lport_arg, - int wild_okay) + * in6_pcblookup_local_and_cleanup does everything + * in6_pcblookup_local does but it checks for a socket + * that's going away. Since we know that the lock is + * held read+write when this function is called, we + * can safely dispose of this socket like the slow + * timer would usually do and return NULL. This is + * great for bind. + */ +static struct inpcb * +in6_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, + struct in6_addr *laddr, u_int lport_arg, int wild_okay) { struct inpcb *inp; - + /* Perform normal lookup */ inp = in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay); - + /* Check if we found a match but it's waiting to be disposed */ - if (inp && inp->inp_wantcnt == WNT_STOPUSING) { + if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) { struct socket *so = inp->inp_socket; - + lck_mtx_lock(&inp->inpcb_mtx); - + if (so->so_usecount == 0) { if (inp->inp_state != INPCB_STATE_DEAD) in6_pcbdetach(inp); - in_pcbdispose(inp); + in_pcbdispose(inp); /* will unlock & destroy */ inp = NULL; - } - else { + } else { lck_mtx_unlock(&inp->inpcb_mtx); } } - - return inp; + + return (inp); } +/* + * Bind an INPCB to an address and/or port. This routine should not alter + * the caller-supplied local address "nam". + */ int in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) { struct socket *so = inp->inp_socket; - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; u_short lport = 0; int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); -#if !CONFIG_EMBEDDED int error; kauth_cred_t cred; -#endif if (!in6_ifaddrs) /* XXX broken! */ return (EADDRNOTAVAIL); if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) - return(EINVAL); - if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) + return (EINVAL); + if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT))) wild = 1; socket_unlock(so, 0); /* keep reference */ - lck_rw_lock_exclusive(pcbinfo->mtx); - if (nam) { + lck_rw_lock_exclusive(pcbinfo->ipi_lock); + if (nam != NULL) { struct ifnet *outif = NULL; + struct sockaddr_in6 sin6; - sin6 = (struct sockaddr_in6 *)(void *)nam; - if (nam->sa_len != sizeof(*sin6)) { - lck_rw_done(pcbinfo->mtx); + if (nam->sa_len != sizeof (struct sockaddr_in6)) { + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); - return(EINVAL); + return (EINVAL); } /* * family check. */ if (nam->sa_family != AF_INET6) { - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); - return(EAFNOSUPPORT); + return (EAFNOSUPPORT); } + lport = SIN6(nam)->sin6_port; + + bzero(&sin6, sizeof (sin6)); + *(&sin6) = *SIN6(nam); /* KAME hack: embed scopeid */ - if (in6_embedscope(&sin6->sin6_addr, sin6, inp, NULL, + if (in6_embedscope(&sin6.sin6_addr, &sin6, inp, NULL, NULL) != 0) { - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); - return EINVAL; + return (EINVAL); } - /* this must be cleared for ifa_ifwithaddr() */ - sin6->sin6_scope_id = 0; - lport = sin6->sin6_port; - if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { + /* Sanitize local copy for address searches */ + sin6.sin6_flowinfo = 0; + sin6.sin6_scope_id = 0; + sin6.sin6_port = 0; + + if (IN6_IS_ADDR_MULTICAST(&sin6.sin6_addr)) { /* * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; * allow compepte duplication of binding if @@ -239,156 +240,157 @@ in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) */ if (so->so_options & SO_REUSEADDR) reuseport = SO_REUSEADDR|SO_REUSEPORT; - } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr)) { struct ifaddr *ifa; - sin6->sin6_port = 0; /* yech... */ - if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin6)) == 0) { - lck_rw_done(pcbinfo->mtx); + ifa = ifa_ifwithaddr(SA(&sin6)); + if (ifa == NULL) { + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); - return(EADDRNOTAVAIL); - } - - /* - * XXX: bind to an anycast address might accidentally - * cause sending a packet with anycast source address. - * We should allow to bind to a deprecated address, since - * the application dare to use it. - */ - if (ifa != NULL) { + return (EADDRNOTAVAIL); + } else { + /* + * XXX: bind to an anycast address might + * accidentally cause sending a packet with + * anycast source address. We should allow + * to bind to a deprecated address, since + * the application dare to use it. + */ IFA_LOCK_SPIN(ifa); if (((struct in6_ifaddr *)ifa)->ia6_flags & - (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) { + (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY| + IN6_IFF_DETACHED)) { IFA_UNLOCK(ifa); IFA_REMREF(ifa); - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); - return(EADDRNOTAVAIL); + return (EADDRNOTAVAIL); } + /* + * Opportunistically determine the outbound + * interface that may be used; this may not + * hold true if we end up using a route + * going over a different interface, e.g. + * when sending to a local address. This + * will get updated again after sending. + */ outif = ifa->ifa_ifp; IFA_UNLOCK(ifa); IFA_REMREF(ifa); } } - if (lport) { + if (lport != 0) { struct inpcb *t; + uid_t u; /* GROSS */ -#if !CONFIG_EMBEDDED if (ntohs(lport) < IPV6PORT_RESERVED) { cred = kauth_cred_proc_ref(p); - error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0); + error = priv_check_cred(cred, + PRIV_NETINET_RESERVEDPORT, 0); kauth_cred_unref(&cred); if (error != 0) { - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); - return(EACCES); + return (EACCES); } } -#endif - - if (kauth_cred_getuid(so->so_cred) && - !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { + if (!IN6_IS_ADDR_MULTICAST(&sin6.sin6_addr) && + (u = kauth_cred_getuid(so->so_cred)) != 0) { t = in6_pcblookup_local_and_cleanup(pcbinfo, - &sin6->sin6_addr, lport, + &sin6.sin6_addr, lport, INPLOOKUP_WILDCARD); - if (t && - (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || - !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) || - (t->inp_socket->so_options & - SO_REUSEPORT) == 0) && - (kauth_cred_getuid(so->so_cred) != - kauth_cred_getuid(t->inp_socket->so_cred)) && - ((t->inp_socket->so_flags & SOF_REUSESHAREUID) == 0)) { - lck_rw_done(pcbinfo->mtx); + if (t != NULL && (!IN6_IS_ADDR_UNSPECIFIED( + &sin6.sin6_addr) || + !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) || + !(t->inp_socket->so_options & + SO_REUSEPORT)) && (u != kauth_cred_getuid( + t->inp_socket->so_cred)) && + !(t->inp_socket->so_flags & + SOF_REUSESHAREUID)) { + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); return (EADDRINUSE); } - if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && - IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + if (!(inp->inp_flags & IN6P_IPV6_V6ONLY) && + IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr)) { struct sockaddr_in sin; - in6_sin6_2_sin(&sin, sin6); - t = in_pcblookup_local_and_cleanup(pcbinfo, - sin.sin_addr, lport, - INPLOOKUP_WILDCARD); - if (t && (t->inp_socket->so_options & SO_REUSEPORT) == 0 && + in6_sin6_2_sin(&sin, &sin6); + t = in_pcblookup_local_and_cleanup( + pcbinfo, sin.sin_addr, lport, + INPLOOKUP_WILDCARD); + if (t != NULL && + !(t->inp_socket->so_options & + SO_REUSEPORT) && (kauth_cred_getuid(so->so_cred) != - kauth_cred_getuid(t->inp_socket->so_cred)) && - (ntohl(t->inp_laddr.s_addr) != - INADDR_ANY || - INP_SOCKAF(so) == - INP_SOCKAF(t->inp_socket))) { - - lck_rw_done(pcbinfo->mtx); + kauth_cred_getuid(t->inp_socket-> + so_cred)) && (t->inp_laddr.s_addr != + INADDR_ANY || SOCK_DOM(so) == + SOCK_DOM(t->inp_socket))) { + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); return (EADDRINUSE); } } } - t = in6_pcblookup_local_and_cleanup(pcbinfo, &sin6->sin6_addr, - lport, wild); - if (t && (reuseport & t->inp_socket->so_options) == 0) { - lck_rw_done(pcbinfo->mtx); + t = in6_pcblookup_local_and_cleanup(pcbinfo, + &sin6.sin6_addr, lport, wild); + if (t != NULL && + (reuseport & t->inp_socket->so_options) == 0) { + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); - return(EADDRINUSE); + return (EADDRINUSE); } - if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && - IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + if (!(inp->inp_flags & IN6P_IPV6_V6ONLY) && + IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr)) { struct sockaddr_in sin; - in6_sin6_2_sin(&sin, sin6); - t = in_pcblookup_local_and_cleanup(pcbinfo, sin.sin_addr, - lport, wild); - if (t && - (reuseport & t->inp_socket->so_options) - == 0 && - (ntohl(t->inp_laddr.s_addr) - != INADDR_ANY || - INP_SOCKAF(so) == - INP_SOCKAF(t->inp_socket))) { - lck_rw_done(pcbinfo->mtx); + in6_sin6_2_sin(&sin, &sin6); + t = in_pcblookup_local_and_cleanup(pcbinfo, + sin.sin_addr, lport, wild); + if (t != NULL && (reuseport & + t->inp_socket->so_options) == 0 && + (t->inp_laddr.s_addr != INADDR_ANY || + SOCK_DOM(so) == SOCK_DOM(t->inp_socket))) { + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); return (EADDRINUSE); } } } - inp->in6p_laddr = sin6->sin6_addr; + inp->in6p_laddr = sin6.sin6_addr; inp->in6p_last_outifp = outif; } socket_lock(so, 0); if (lport == 0) { int e; if ((e = in6_pcbsetport(&inp->in6p_laddr, inp, p, 1)) != 0) { - lck_rw_done(pcbinfo->mtx); - return(e); + lck_rw_done(pcbinfo->ipi_lock); + return (e); } - } - else { + } else { inp->inp_lport = lport; if (in_pcbinshash(inp, 1) != 0) { inp->in6p_laddr = in6addr_any; inp->inp_lport = 0; inp->in6p_last_outifp = NULL; - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); return (EAGAIN); } } - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); sflt_notify(so, sock_evt_bound, NULL); - return(0); + return (0); } /* * Transform old in6_pcbconnect() into an inner subroutine for new - * in6_pcbconnect(): Do some validity-checking on the remote - * address (in mbuf 'nam') and then determine local host address - * (i.e., which interface) to use to access that remote host. + * in6_pcbconnect(); do some validity-checking on the remote address + * (in "nam") and then determine local host address (i.e., which + * interface) to use to access that remote host. * - * This preserves definition of in6_pcbconnect(), while supporting a - * slightly different version for T/TCP. (This is more than - * a bit of a kludge, but cleaning up the internal interfaces would - * have forced minor changes in every protocol). + * This routine may alter the caller-supplied remote address "nam". * * This routine might return an ifp with a reference held if the caller * provides a non-NULL outif, even in the error case. The caller is @@ -398,7 +400,6 @@ int in6_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in6_addr *plocal_addr6, struct ifnet **outif) { - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(void *)nam; struct in6_addr *addr6 = NULL; struct in6_addr src_storage; int error = 0; @@ -406,15 +407,15 @@ in6_pcbladdr(struct inpcb *inp, struct sockaddr *nam, if (outif != NULL) *outif = NULL; - if (nam->sa_len != sizeof (*sin6)) + if (nam->sa_len != sizeof (struct sockaddr_in6)) return (EINVAL); - if (sin6->sin6_family != AF_INET6) + if (SIN6(nam)->sin6_family != AF_INET6) return (EAFNOSUPPORT); - if (sin6->sin6_port == 0) + if (SIN6(nam)->sin6_port == 0) return (EADDRNOTAVAIL); /* KAME hack: embed scopeid */ - if (in6_embedscope(&sin6->sin6_addr, sin6, inp, NULL, NULL) != 0) + if (in6_embedscope(&SIN6(nam)->sin6_addr, SIN6(nam), inp, NULL, NULL) != 0) return (EINVAL); if (in6_ifaddrs) { @@ -422,12 +423,12 @@ in6_pcbladdr(struct inpcb *inp, struct sockaddr *nam, * If the destination address is UNSPECIFIED addr, * use the loopback addr, e.g ::1. */ - if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) - sin6->sin6_addr = in6addr_loopback; + if (IN6_IS_ADDR_UNSPECIFIED(&SIN6(nam)->sin6_addr)) + SIN6(nam)->sin6_addr = in6addr_loopback; } ifscope = (inp->inp_flags & INP_BOUND_IF) ? - inp->inp_boundifp->if_index : IFSCOPE_NONE; + inp->inp_boundifp->if_index : IFSCOPE_NONE; /* * XXX: in6_selectsrc might replace the bound local address @@ -438,7 +439,7 @@ in6_pcbladdr(struct inpcb *inp, struct sockaddr *nam, * even in the error case; caller always needs to release it * if non-NULL. */ - addr6 = in6_selectsrc(sin6, inp->in6p_outputopts, inp, + addr6 = in6_selectsrc(SIN6(nam), inp->in6p_outputopts, inp, &inp->in6p_route, outif, &src_storage, ifscope, &error); if (outif != NULL) { @@ -460,10 +461,12 @@ in6_pcbladdr(struct inpcb *inp, struct sockaddr *nam, if (addr6 == NULL) { if (outif != NULL && (*outif) != NULL && - (inp->inp_flags & INP_NO_IFT_CELLULAR) && - (*outif)->if_type == IFT_CELLULAR) + (inp->inp_flags & INP_NO_IFT_CELLULAR) && + IFNET_IS_CELLULAR(*outif)) { soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_IFDENIED)); + error = EHOSTUNREACH; + } if (error == 0) error = EADDRNOTAVAIL; return (error); @@ -485,10 +488,7 @@ in6_pcbladdr(struct inpcb *inp, struct sockaddr *nam, * then pick one. */ int -in6_pcbconnect( - struct inpcb *inp, - struct sockaddr *nam, - struct proc *p) +in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p) { struct in6_addr addr6; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(void *)nam; @@ -505,19 +505,16 @@ in6_pcbconnect( * whenever it's non-NULL. */ if ((error = in6_pcbladdr(inp, nam, &addr6, &outif)) != 0) { - if ((inp->inp_flags & INP_NO_IFT_CELLULAR) && - outif != NULL && - outif->if_type == IFT_CELLULAR) - soevent(inp->inp_socket, + if ((inp->inp_flags & INP_NO_IFT_CELLULAR) && outif != NULL && + IFNET_IS_CELLULAR(outif)) + soevent(inp->inp_socket, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_IFDENIED)); goto done; } socket_unlock(inp->inp_socket, 0); pcb = in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr, - sin6->sin6_port, - IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) - ? &addr6 : &inp->in6p_laddr, - inp->inp_lport, 0, NULL); + sin6->sin6_port, IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ? + &addr6 : &inp->in6p_laddr, inp->inp_lport, 0, NULL); socket_lock(inp->inp_socket, 0); if (pcb != NULL) { in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0); @@ -526,29 +523,25 @@ in6_pcbconnect( } if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (inp->inp_lport == 0) { - error = in6_pcbbind(inp, (struct sockaddr *)0, p); + error = in6_pcbbind(inp, NULL, p); if (error) goto done; } inp->in6p_laddr = addr6; inp->in6p_last_outifp = outif; /* no reference needed */ + inp->in6p_flags |= INP_IN6ADDR_ANY; } - if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) { - /*lock inversion issue, mostly with udp multicast packets */ + if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { + /* lock inversion issue, mostly with udp multicast packets */ socket_unlock(inp->inp_socket, 0); - lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx); + lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); socket_lock(inp->inp_socket, 0); } inp->in6p_faddr = sin6->sin6_addr; inp->inp_fport = sin6->sin6_port; - /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ - inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; - if (inp->in6p_flags & IN6P_AUTOFLOWLABEL) - inp->in6p_flowinfo |= - (htonl(ip6_flow_seq++) & IPV6_FLOWLABEL_MASK); in_pcbrehash(inp); - lck_rw_done(inp->inp_pcbinfo->mtx); + lck_rw_done(inp->inp_pcbinfo->ipi_lock); done: if (outif != NULL) @@ -558,61 +551,72 @@ done: } void -in6_pcbdisconnect( - struct inpcb *inp) +in6_pcbdisconnect(struct inpcb *inp) { - if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) { - /*lock inversion issue, mostly with udp multicast packets */ - socket_unlock(inp->inp_socket, 0); - lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx); - socket_lock(inp->inp_socket, 0); + struct socket *so = inp->inp_socket; + + if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { + /* lock inversion issue, mostly with udp multicast packets */ + socket_unlock(so, 0); + lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); + socket_lock(so, 0); } - bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr)); + bzero((caddr_t)&inp->in6p_faddr, sizeof (inp->in6p_faddr)); inp->inp_fport = 0; - /* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ - inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; + /* clear flowinfo - RFC 6437 */ + inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; in_pcbrehash(inp); - lck_rw_done(inp->inp_pcbinfo->mtx); - if (inp->inp_socket->so_state & SS_NOFDREF) + lck_rw_done(inp->inp_pcbinfo->ipi_lock); + /* + * A multipath subflow socket would have its SS_NOFDREF set by default, + * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB; + * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared. + */ + if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) in6_pcbdetach(inp); } void -in6_pcbdetach( - struct inpcb *inp) +in6_pcbdetach(struct inpcb *inp) { struct socket *so = inp->inp_socket; - struct inpcbinfo *ipi = inp->inp_pcbinfo; + + if (so->so_pcb == NULL) { + /* PCB has been disposed */ + panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__, + inp, so, SOCK_PROTO(so)); + /* NOTREACHED */ + } #if IPSEC if (inp->in6p_sp != NULL) { - ipsec6_delete_pcbpolicy(inp); + (void) ipsec6_delete_pcbpolicy(inp); } #endif /* IPSEC */ - if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) - printf("in6_pcbdetach so=%p can't be marked dead ok\n", so); - - inp->inp_state = INPCB_STATE_DEAD; + /* mark socket state as dead */ + if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) { + panic("%s: so=%p proto=%d couldn't set to STOPUSING\n", + __func__, so, SOCK_PROTO(so)); + /* NOTREACHED */ + } - if ((so->so_flags & SOF_PCBCLEARING) == 0) { + if (!(so->so_flags & SOF_PCBCLEARING)) { struct ip_moptions *imo; struct ip6_moptions *im6o; inp->inp_vflag = 0; - so->so_flags |= SOF_PCBCLEARING; - inp->inp_gencnt = ++ipi->ipi_gencnt; - if (inp->in6p_options) + if (inp->in6p_options != NULL) { m_freem(inp->in6p_options); + inp->in6p_options = NULL; + } ip6_freepcbopts(inp->in6p_outputopts); - if (inp->in6p_route.ro_rt) { - rtfree(inp->in6p_route.ro_rt); - inp->in6p_route.ro_rt = NULL; + ROUTE_RELEASE(&inp->in6p_route); + /* free IPv4 related resources in case of mapped addr */ + if (inp->inp_options != NULL) { + (void) m_free(inp->inp_options); + inp->inp_options = NULL; } - /* Check and free IPv4 related resources in case of mapped addr */ - if (inp->inp_options) - (void)m_free(inp->inp_options); - im6o = inp->in6p_moptions; inp->in6p_moptions = NULL; if (im6o != NULL) @@ -622,109 +626,143 @@ in6_pcbdetach( inp->inp_moptions = NULL; if (imo != NULL) IMO_REMREF(imo); + sofreelastref(so, 0); + inp->inp_state = INPCB_STATE_DEAD; + /* makes sure we're not called twice from so_close */ + so->so_flags |= SOF_PCBCLEARING; + + inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST); } } struct sockaddr * -in6_sockaddr( - in_port_t port, - struct in6_addr *addr_p) +in6_sockaddr(in_port_t port, struct in6_addr *addr_p) { struct sockaddr_in6 *sin6; - MALLOC(sin6, struct sockaddr_in6 *, sizeof *sin6, M_SONAME, M_WAITOK); + MALLOC(sin6, struct sockaddr_in6 *, sizeof (*sin6), M_SONAME, M_WAITOK); if (sin6 == NULL) - return NULL; - bzero(sin6, sizeof *sin6); + return (NULL); + bzero(sin6, sizeof (*sin6)); sin6->sin6_family = AF_INET6; - sin6->sin6_len = sizeof(*sin6); + sin6->sin6_len = sizeof (*sin6); sin6->sin6_port = port; sin6->sin6_addr = *addr_p; + + /* would be good to use sa6_recoverscope(), except for locking */ if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); else - sin6->sin6_scope_id = 0; /*XXX*/ + sin6->sin6_scope_id = 0; /* XXX */ if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) sin6->sin6_addr.s6_addr16[1] = 0; - return (struct sockaddr *)sin6; + return ((struct sockaddr *)sin6); } -struct sockaddr * -in6_v4mapsin6_sockaddr( - in_port_t port, - struct in_addr *addr_p) +void +in6_sockaddr_s(in_port_t port, struct in6_addr *addr_p, + struct sockaddr_in6 *sin6) { - struct sockaddr_in sin; - struct sockaddr_in6 *sin6_p; - - bzero(&sin, sizeof sin); - sin.sin_family = AF_INET; - sin.sin_len = sizeof(sin); - sin.sin_port = port; - sin.sin_addr = *addr_p; - - MALLOC(sin6_p, struct sockaddr_in6 *, sizeof *sin6_p, M_SONAME, - M_WAITOK); - if (sin6_p == NULL) - return NULL; - in6_sin_2_v4mapsin6(&sin, sin6_p); - - return (struct sockaddr *)sin6_p; + bzero(sin6, sizeof (*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof (*sin6); + sin6->sin6_port = port; + sin6->sin6_addr = *addr_p; + + /* would be good to use sa6_recoverscope(), except for locking */ + if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) + sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); + else + sin6->sin6_scope_id = 0; /* XXX */ + if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) + sin6->sin6_addr.s6_addr16[1] = 0; } /* - * The calling convention of in6_setsockaddr() and in6_setpeeraddr() was + * The calling convention of in6_getsockaddr() and in6_getpeeraddr() was * modified to match the pru_sockaddr() and pru_peeraddr() entry points * in struct pr_usrreqs, so that protocols can just reference then directly - * without the need for a wrapper function. The socket must have a valid - * (i.e., non-nil) PCB, but it should be impossible to get an invalid one - * except through a kernel programming error, so it is acceptable to panic - * (or in this case trap) if the PCB is invalid. (Actually, we don't trap - * because there actually /is/ a programming error somewhere... XXX) + * without the need for a wrapper function. */ int -in6_setsockaddr( - struct socket *so, - struct sockaddr **nam) +in6_getsockaddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp; struct in6_addr addr; in_port_t port; - inp = sotoinpcb(so); - if (!inp) { - return EINVAL; - } + if ((inp = sotoinpcb(so)) == NULL) + return (EINVAL); + port = inp->inp_lport; addr = inp->in6p_laddr; *nam = in6_sockaddr(port, &addr); if (*nam == NULL) - return ENOBUFS; - return 0; + return (ENOBUFS); + return (0); } int -in6_setpeeraddr( - struct socket *so, - struct sockaddr **nam) +in6_getsockaddr_s(struct socket *so, struct sockaddr_storage *ss) { struct inpcb *inp; struct in6_addr addr; in_port_t port; - inp = sotoinpcb(so); - if (!inp) { - return EINVAL; - } + VERIFY(ss != NULL); + bzero(ss, sizeof (*ss)); + + if ((inp = sotoinpcb(so)) == NULL || + (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + return (inp == NULL ? EINVAL : EPROTOTYPE); + + port = inp->inp_lport; + addr = inp->in6p_laddr; + + in6_sockaddr_s(port, &addr, SIN6(ss)); + return (0); +} + +int +in6_getpeeraddr(struct socket *so, struct sockaddr **nam) +{ + struct inpcb *inp; + struct in6_addr addr; + in_port_t port; + + if ((inp = sotoinpcb(so)) == NULL) + return (EINVAL); + port = inp->inp_fport; addr = inp->in6p_faddr; *nam = in6_sockaddr(port, &addr); if (*nam == NULL) - return ENOBUFS; - return 0; + return (ENOBUFS); + return (0); +} + +int +in6_getpeeraddr_s(struct socket *so, struct sockaddr_storage *ss) +{ + struct inpcb *inp; + struct in6_addr addr; + in_port_t port; + + VERIFY(ss != NULL); + bzero(ss, sizeof (*ss)); + + if ((inp = sotoinpcb(so)) == NULL || + (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + return (inp == NULL ? EINVAL : EPROTOTYPE); + + port = inp->inp_fport; + addr = inp->in6p_faddr; + + in6_sockaddr_s(port, &addr, SIN6(ss)); + return (0); } int @@ -734,16 +772,16 @@ in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam) int error; if (inp == NULL) - return EINVAL; + return (EINVAL); if (inp->inp_vflag & INP_IPV4) { - error = in_setsockaddr(so, nam); + error = in_getsockaddr(so, nam); if (error == 0) error = in6_sin_2_v4mapsin6_in_sock(nam); } else { - /* scope issues will be handled in in6_setsockaddr(). */ - error = in6_setsockaddr(so, nam); + /* scope issues will be handled in in6_getsockaddr(). */ + error = in6_getsockaddr(so, nam); } - return error; + return (error); } int @@ -753,16 +791,16 @@ in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam) int error; if (inp == NULL) - return EINVAL; + return (EINVAL); if (inp->inp_vflag & INP_IPV4) { - error = in_setpeeraddr(so, nam); + error = in_getpeeraddr(so, nam); if (error == 0) error = in6_sin_2_v4mapsin6_in_sock(nam); } else { - /* scope issues will be handled in in6_setpeeraddr(). */ - error = in6_setpeeraddr(so, nam); + /* scope issues will be handled in in6_getpeeraddr(). */ + error = in6_getpeeraddr(so, nam); } - return error; + return (error); } /* @@ -775,21 +813,16 @@ in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam) * any errors for each matching socket. */ void -in6_pcbnotify(pcbinfo, dst, fport_arg, src, lport_arg, cmd, cmdarg, notify) - struct inpcbinfo *pcbinfo; - struct sockaddr *dst; - const struct sockaddr *src; - u_int fport_arg, lport_arg; - int cmd; - void *cmdarg; - void (*notify)(struct inpcb *, int); +in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst, u_int fport_arg, + const struct sockaddr *src, u_int lport_arg, int cmd, void *cmdarg, + void (*notify)(struct inpcb *, int)) { + struct inpcbhead *head = pcbinfo->ipi_listhead; struct inpcb *inp, *ninp; struct sockaddr_in6 sa6_src, *sa6_dst; u_short fport = fport_arg, lport = lport_arg; u_int32_t flowinfo; int errno; - struct inpcbhead *head = pcbinfo->listhead; if ((unsigned)cmd > PRC_NCMDS || dst->sa_family != AF_INET6) return; @@ -816,17 +849,17 @@ in6_pcbnotify(pcbinfo, dst, fport_arg, src, lport_arg, cmd, cmdarg, notify) if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) { fport = 0; lport = 0; - bzero((caddr_t)&sa6_src.sin6_addr, sizeof(sa6_src.sin6_addr)); + bzero((caddr_t)&sa6_src.sin6_addr, sizeof (sa6_src.sin6_addr)); if (cmd != PRC_HOSTDEAD) notify = in6_rtchange; } errno = inet6ctlerrmap[cmd]; - lck_rw_lock_shared(pcbinfo->mtx); - for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) { - ninp = LIST_NEXT(inp, inp_list); + lck_rw_lock_shared(pcbinfo->ipi_lock); + for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) { + ninp = LIST_NEXT(inp, inp_list); - if ((inp->inp_vflag & INP_IPV6) == 0) + if (!(inp->inp_vflag & INP_IPV6)) continue; /* @@ -840,7 +873,8 @@ in6_pcbnotify(pcbinfo, dst, fport_arg, src, lport_arg, cmd, cmdarg, notify) */ if (cmd == PRC_MSGSIZE && (inp->inp_flags & IN6P_MTU) != 0 && (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || - IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &sa6_dst->sin6_addr))) { + IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, + &sa6_dst->sin6_addr))) { ip6_notify_pmtu(inp, (struct sockaddr_in6 *)(void *)dst, (u_int32_t *)cmdarg); } @@ -855,46 +889,44 @@ in6_pcbnotify(pcbinfo, dst, fport_arg, src, lport_arg, cmd, cmdarg, notify) */ if (lport == 0 && fport == 0 && flowinfo && inp->inp_socket != NULL && - flowinfo == (inp->in6p_flowinfo & IPV6_FLOWLABEL_MASK) && + flowinfo == (inp->inp_flow & IPV6_FLOWLABEL_MASK) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr)) goto do_notify; else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, - &sa6_dst->sin6_addr) || - inp->inp_socket == 0 || - (lport && inp->inp_lport != lport) || - (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) && - !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, - &sa6_src.sin6_addr)) || - (fport && inp->inp_fport != fport)) + &sa6_dst->sin6_addr) || inp->inp_socket == NULL || + (lport && inp->inp_lport != lport) || + (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) && + !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, + &sa6_src.sin6_addr)) || (fport && inp->inp_fport != fport)) continue; - - do_notify: +do_notify: if (notify) { - if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == + WNT_STOPUSING) continue; socket_lock(inp->inp_socket, 1); (*notify)(inp, errno); - (void)in_pcb_checkstate(inp, WNT_RELEASE, 1); + (void) in_pcb_checkstate(inp, WNT_RELEASE, 1); socket_unlock(inp->inp_socket, 1); } } - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); } /* * Lookup a PCB based on the local address and port. */ struct inpcb * -in6_pcblookup_local( - struct inpcbinfo *pcbinfo, - struct in6_addr *laddr, - u_int lport_arg, - int wild_okay) +in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr, + u_int lport_arg, int wild_okay) { struct inpcb *inp; int matchwild = 3, wildcard; u_short lport = lport_arg; + struct inpcbporthead *porthash; + struct inpcb *match = NULL; + struct inpcbport *phd; if (!wild_okay) { struct inpcbhead *head; @@ -902,10 +934,10 @@ in6_pcblookup_local( * Look for an unconnected (wildcard foreign addr) PCB that * matches the local address and port we're looking for. */ - head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, - pcbinfo->hashmask)]; + head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, + pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { - if ((inp->inp_vflag & INP_IPV6) == 0) + if (!(inp->inp_vflag & INP_IPV6)) continue; if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && @@ -920,55 +952,50 @@ in6_pcblookup_local( * Not found. */ return (NULL); - } else { - struct inpcbporthead *porthash; - struct inpcbport *phd; - struct inpcb *match = NULL; + } + /* + * Best fit PCB lookup. + * + * First see if this local port is in use by looking on the + * port hash list. + */ + porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, + pcbinfo->ipi_porthashmask)]; + LIST_FOREACH(phd, porthash, phd_hash) { + if (phd->phd_port == lport) + break; + } + if (phd != NULL) { /* - * Best fit PCB lookup. - * - * First see if this local port is in use by looking on the - * port hash list. + * Port is in use by one or more PCBs. Look for best + * fit. */ - porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, - pcbinfo->porthashmask)]; - LIST_FOREACH(phd, porthash, phd_hash) { - if (phd->phd_port == lport) - break; - } - if (phd != NULL) { - /* - * Port is in use by one or more PCBs. Look for best - * fit. - */ - LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { - wildcard = 0; - if ((inp->inp_vflag & INP_IPV6) == 0) + LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { + wildcard = 0; + if (!(inp->inp_vflag & INP_IPV6)) + continue; + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) + wildcard++; + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { + if (IN6_IS_ADDR_UNSPECIFIED(laddr)) + wildcard++; + else if (!IN6_ARE_ADDR_EQUAL( + &inp->in6p_laddr, laddr)) continue; - if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) + } else { + if (!IN6_IS_ADDR_UNSPECIFIED(laddr)) wildcard++; - if (!IN6_IS_ADDR_UNSPECIFIED( - &inp->in6p_laddr)) { - if (IN6_IS_ADDR_UNSPECIFIED(laddr)) - wildcard++; - else if (!IN6_ARE_ADDR_EQUAL( - &inp->in6p_laddr, laddr)) - continue; - } else { - if (!IN6_IS_ADDR_UNSPECIFIED(laddr)) - wildcard++; - } - if (wildcard < matchwild) { - match = inp; - matchwild = wildcard; - if (matchwild == 0) { - break; - } + } + if (wildcard < matchwild) { + match = inp; + matchwild = wildcard; + if (matchwild == 0) { + break; } } } - return (match); } + return (match); } /* @@ -978,18 +1005,16 @@ in6_pcblookup_local( * (by a redirect), time to try a default gateway again. */ void -in6_losing( - struct inpcb *in6p) +in6_losing(struct inpcb *in6p) { struct rtentry *rt; struct rt_addrinfo info; if ((rt = in6p->in6p_route.ro_rt) != NULL) { - in6p->in6p_route.ro_rt = NULL; RT_LOCK(rt); - bzero((caddr_t)&info, sizeof(info)); + bzero((caddr_t)&info, sizeof (info)); info.rti_info[RTAX_DST] = - (struct sockaddr *)&in6p->in6p_route.ro_dst; + (struct sockaddr *)&in6p->in6p_route.ro_dst; info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); @@ -1010,8 +1035,8 @@ in6_losing( * A new route can be allocated * the next time output is attempted. */ - rtfree(rt); } + ROUTE_RELEASE(&in6p->in6p_route); } /* @@ -1019,34 +1044,23 @@ in6_losing( * and allocate a (hopefully) better one. */ void -in6_rtchange( - struct inpcb *inp, - __unused int errno) +in6_rtchange(struct inpcb *inp, int errno) { - if (inp->in6p_route.ro_rt) { - rtfree(inp->in6p_route.ro_rt); - inp->in6p_route.ro_rt = 0; - /* - * A new route can be allocated the next time - * output is attempted. - */ - } +#pragma unused(errno) + /* + * A new route can be allocated the next time + * output is attempted. + */ + ROUTE_RELEASE(&inp->in6p_route); } /* * Check if PCB exists hash list. Also returns uid and gid of socket */ int -in6_pcblookup_hash_exists( - struct inpcbinfo *pcbinfo, - struct in6_addr *faddr, - u_int fport_arg, - struct in6_addr *laddr, - u_int lport_arg, - int wildcard, - uid_t *uid, - gid_t *gid, - struct ifnet *ifp) +in6_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, + u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, int wildcard, + uid_t *uid, gid_t *gid, struct ifnet *ifp) { struct inpcbhead *head; struct inpcb *inp; @@ -1056,21 +1070,22 @@ in6_pcblookup_hash_exists( *uid = UID_MAX; *gid = GID_MAX; - lck_rw_lock_shared(pcbinfo->mtx); + lck_rw_lock_shared(pcbinfo->ipi_lock); /* * First look for an exact match. */ - head = &pcbinfo->hashbase[INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, - lport, fport, - pcbinfo->hashmask)]; + head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, + lport, fport, pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { - if ((inp->inp_vflag & INP_IPV6) == 0) + if (!(inp->inp_vflag & INP_IPV6)) continue; - if (ip6_restrictrecvif && ifp != NULL && - (ifp->if_eflags & IFEF_RESTRICTED_RECV) && - !(inp->in6p_flags & IN6P_RECV_ANYIF)) + if (inp_restricted(inp, ifp)) + continue; + + if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && + (inp->in6p_flags & INP_NO_IFT_CELLULAR)) continue; if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && @@ -1086,39 +1101,43 @@ in6_pcblookup_hash_exists( *gid = kauth_cred_getgid( inp->inp_socket->so_cred); } - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); return (found); } } if (wildcard) { struct inpcb *local_wild = NULL; - head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, - pcbinfo->hashmask)]; + head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, + pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { - if ((inp->inp_vflag & INP_IPV6) == 0) + if (!(inp->inp_vflag & INP_IPV6)) continue; - if (ip6_restrictrecvif && ifp != NULL && - (ifp->if_eflags & IFEF_RESTRICTED_RECV) && - !(inp->in6p_flags & IN6P_RECV_ANYIF)) + if (inp_restricted(inp, ifp)) + continue; + + if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && + (inp->in6p_flags & INP_NO_IFT_CELLULAR)) continue; if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && inp->inp_lport == lport) { if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, - laddr)) { - if ((found = (inp->inp_socket != NULL))) { + laddr)) { + found = (inp->inp_socket != NULL); + if (found) { *uid = kauth_cred_getuid( inp->inp_socket->so_cred); *gid = kauth_cred_getgid( inp->inp_socket->so_cred); } - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); return (found); - } - else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) + } else if (IN6_IS_ADDR_UNSPECIFIED( + &inp->in6p_laddr)) { local_wild = inp; + } } } if (local_wild) { @@ -1128,7 +1147,7 @@ in6_pcblookup_hash_exists( *gid = kauth_cred_getgid( local_wild->inp_socket->so_cred); } - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); return (found); } } @@ -1136,7 +1155,7 @@ in6_pcblookup_hash_exists( /* * Not found. */ - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); return (0); } @@ -1144,34 +1163,30 @@ in6_pcblookup_hash_exists( * Lookup PCB in hash list. */ struct inpcb * -in6_pcblookup_hash( - struct inpcbinfo *pcbinfo, - struct in6_addr *faddr, - u_int fport_arg, - struct in6_addr *laddr, - u_int lport_arg, - int wildcard, - __unused struct ifnet *ifp) +in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, + u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, int wildcard, + struct ifnet *ifp) { struct inpcbhead *head; struct inpcb *inp; u_short fport = fport_arg, lport = lport_arg; - lck_rw_lock_shared(pcbinfo->mtx); + lck_rw_lock_shared(pcbinfo->ipi_lock); /* * First look for an exact match. */ - head = &pcbinfo->hashbase[INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, - lport, fport, - pcbinfo->hashmask)]; + head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, + lport, fport, pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { - if ((inp->inp_vflag & INP_IPV6) == 0) + if (!(inp->inp_vflag & INP_IPV6)) + continue; + + if (inp_restricted(inp, ifp)) continue; - if (ip6_restrictrecvif && ifp != NULL && - (ifp->if_eflags & IFEF_RESTRICTED_RECV) && - !(inp->in6p_flags & IN6P_RECV_ANYIF)) + if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && + (inp->in6p_flags & INP_NO_IFT_CELLULAR)) continue; if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && @@ -1179,14 +1194,15 @@ in6_pcblookup_hash( inp->inp_fport == fport && inp->inp_lport == lport) { /* - * Found. Check if pcb is still valid - */ - if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) { - lck_rw_done(pcbinfo->mtx); + * Found. Check if pcb is still valid + */ + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != + WNT_STOPUSING) { + lck_rw_done(pcbinfo->ipi_lock); return (inp); - } - else { /* it's there but dead, say it isn't found */ - lck_rw_done(pcbinfo->mtx); + } else { + /* it's there but dead, say it isn't found */ + lck_rw_done(pcbinfo->ipi_lock); return (NULL); } } @@ -1194,40 +1210,44 @@ in6_pcblookup_hash( if (wildcard) { struct inpcb *local_wild = NULL; - head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, - pcbinfo->hashmask)]; + head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, + pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { - if ((inp->inp_vflag & INP_IPV6) == 0) + if (!(inp->inp_vflag & INP_IPV6)) + continue; + + if (inp_restricted(inp, ifp)) continue; - if (ip6_restrictrecvif && ifp != NULL && - (ifp->if_eflags & IFEF_RESTRICTED_RECV) && - !(inp->in6p_flags & IN6P_RECV_ANYIF)) + if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && + (inp->in6p_flags & INP_NO_IFT_CELLULAR)) continue; if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && inp->inp_lport == lport) { if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, - laddr)) { - if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) { - lck_rw_done(pcbinfo->mtx); + laddr)) { + if (in_pcb_checkstate(inp, WNT_ACQUIRE, + 0) != WNT_STOPUSING) { + lck_rw_done(pcbinfo->ipi_lock); return (inp); - } - else { /* it's there but dead, say it isn't found */ - lck_rw_done(pcbinfo->mtx); + } else { + /* dead; say it isn't found */ + lck_rw_done(pcbinfo->ipi_lock); return (NULL); } - } - else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) + } else if (IN6_IS_ADDR_UNSPECIFIED( + &inp->in6p_laddr)) { local_wild = inp; + } } } - if (local_wild && in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) { - lck_rw_done(pcbinfo->mtx); + if (local_wild && in_pcb_checkstate(local_wild, + WNT_ACQUIRE, 0) != WNT_STOPUSING) { + lck_rw_done(pcbinfo->ipi_lock); return (local_wild); - } - else { - lck_rw_done(pcbinfo->mtx); + } else { + lck_rw_done(pcbinfo->ipi_lock); return (NULL); } } @@ -1235,7 +1255,7 @@ in6_pcblookup_hash( /* * Not found. */ - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); return (NULL); } @@ -1245,19 +1265,46 @@ init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m) struct ip6_hdr *ip; ip = mtod(m, struct ip6_hdr *); - bzero(sin6, sizeof(*sin6)); - sin6->sin6_len = sizeof(*sin6); + bzero(sin6, sizeof (*sin6)); + sin6->sin6_len = sizeof (*sin6); sin6->sin6_family = AF_INET6; sin6->sin6_addr = ip->ip6_src; - if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) + if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) { sin6->sin6_addr.s6_addr16[1] = 0; - sin6->sin6_scope_id = - (m->m_pkthdr.rcvif && IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) - ? m->m_pkthdr.rcvif->if_index : 0; - - return; + if ((m->m_pkthdr.pkt_flags & (PKTF_LOOP|PKTF_IFAINFO)) == + (PKTF_LOOP|PKTF_IFAINFO)) + sin6->sin6_scope_id = m->m_pkthdr.src_ifindex; + else if (m->m_pkthdr.rcvif != NULL) + sin6->sin6_scope_id = m->m_pkthdr.rcvif->if_index; + } } +/* + * The following routines implement this scheme: + * + * Callers of ip6_output() that intend to cache the route in the inpcb pass + * a local copy of the struct route to ip6_output(). Using a local copy of + * the cached route significantly simplifies things as IP no longer has to + * worry about having exclusive access to the passed in struct route, since + * it's defined in the caller's stack; in essence, this allows for a lock- + * less operation when updating the struct route at the IP level and below, + * whenever necessary. The scheme works as follows: + * + * Prior to dropping the socket's lock and calling ip6_output(), the caller + * copies the struct route from the inpcb into its stack, and adds a reference + * to the cached route entry, if there was any. The socket's lock is then + * dropped and ip6_output() is called with a pointer to the copy of struct + * route defined on the stack (not to the one in the inpcb.) + * + * Upon returning from ip6_output(), the caller then acquires the socket's + * lock and synchronizes the cache; if there is no route cached in the inpcb, + * it copies the local copy of struct route (which may or may not contain any + * route) back into the cache; otherwise, if the inpcb has a route cached in + * it, the one in the local copy will be freed, if there's any. Trashing the + * cached route in the inpcb can be avoided because ip6_output() is single- + * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized + * by the socket/transport layer.) + */ void in6p_route_copyout(struct inpcb *inp, struct route_in6 *dst) { @@ -1268,8 +1315,8 @@ in6p_route_copyout(struct inpcb *inp, struct route_in6 *dst) /* Minor sanity check */ if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET6) panic("%s: wrong or corrupted route: %p", __func__, src); - - route_copyout((struct route *)dst, (struct route *)src, sizeof(*dst)); + + route_copyout((struct route *)dst, (struct route *)src, sizeof (*dst)); } void @@ -1283,6 +1330,5 @@ in6p_route_copyin(struct inpcb *inp, struct route_in6 *src) if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET6) panic("%s: wrong or corrupted route: %p", __func__, src); - route_copyin((struct route *)src, (struct route *)dst, sizeof(*src)); + route_copyin((struct route *)src, (struct route *)dst, sizeof (*src)); } - diff --git a/bsd/netinet6/in6_pcb.h b/bsd/netinet6/in6_pcb.h index 1ad240007..093cef2f0 100644 --- a/bsd/netinet6/in6_pcb.h +++ b/bsd/netinet6/in6_pcb.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2008-2012 Apple Inc. All rights reserved. + * Copyright (c) 2008-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -94,7 +94,7 @@ #define _NETINET6_IN6_PCB_H_ #include -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE extern void in6_losing(struct inpcb *); extern int in6_pcbbind(struct inpcb *, struct sockaddr *, struct proc *); extern int in6_pcbconnect(struct inpcb *, struct sockaddr *, struct proc *); @@ -112,10 +112,11 @@ extern void in6_pcbnotify(struct inpcbinfo *, struct sockaddr *, u_int, const struct sockaddr *, u_int, int, void *, void (*)(struct inpcb *, int)); extern void in6_rtchange(struct inpcb *, int); extern struct sockaddr *in6_sockaddr(in_port_t port, struct in6_addr *addr_p); -extern struct sockaddr *in6_v4mapsin6_sockaddr(in_port_t port, - struct in_addr *addr_p); -extern int in6_setpeeraddr(struct socket *so, struct sockaddr **nam); -extern int in6_setsockaddr(struct socket *so, struct sockaddr **nam); +extern void in6_sockaddr_s(in_port_t, struct in6_addr *, struct sockaddr_in6 *); +extern int in6_getpeeraddr(struct socket *, struct sockaddr **); +extern int in6_getpeeraddr_s(struct socket *, struct sockaddr_storage *); +extern int in6_getsockaddr(struct socket *, struct sockaddr **); +extern int in6_getsockaddr_s(struct socket *, struct sockaddr_storage *); extern int in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam); extern int in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam); extern int in6_selecthlim(struct in6pcb *, struct ifnet *); @@ -124,6 +125,5 @@ extern int in6_pcbsetport(struct in6_addr *, struct inpcb *, extern void init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m); extern void in6p_route_copyout(struct inpcb *, struct route_in6 *); extern void in6p_route_copyin(struct inpcb *, struct route_in6 *); -#endif /* KERNEL_PRIVATE */ - +#endif /* BSD_KERNEL_PRIVATE */ #endif /* !_NETINET6_IN6_PCB_H_ */ diff --git a/bsd/netinet6/in6_prefix.c b/bsd/netinet6/in6_prefix.c deleted file mode 100644 index da85f486d..000000000 --- a/bsd/netinet6/in6_prefix.c +++ /dev/null @@ -1,1382 +0,0 @@ -/* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/* $KAME: in6_prefix.c,v 1.27 2000/03/29 23:13:13 itojun Exp $ */ - -/* - * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the project nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * Copyright (c) 1982, 1986, 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)in.c 8.2 (Berkeley) 11/15/93 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -#ifdef __APPLE__ -#define M_IP6RR M_IP6MISC -#define M_RR_ADDR M_IP6MISC -#else -static MALLOC_DEFINE(M_IP6RR, "ip6rr", "IPv6 Router Renumbering Prefix"); -static MALLOC_DEFINE(M_RR_ADDR, "rp_addr", "IPv6 Router Renumbering Ifid"); -#endif - -struct rr_prhead rr_prefix; - -#include - -static void add_each_addr(struct socket *so, struct rr_prefix *rpp, - struct rp_addr *rap); -static int create_ra_entry(struct rp_addr **rapp); -static int add_each_prefix(struct socket *so, struct rr_prefix *rpp); -static void free_rp_entries(struct rr_prefix *rpp); -static int link_stray_ia6s(struct rr_prefix *rpp); -static void rp_remove(struct rr_prefix *rpp); -extern lck_mtx_t *prefix6_mutex; - -/* - * Copy bits from src to tgt, from off bit for len bits. - * Caller must specify collect tgtsize and srcsize. - */ -static void -bit_copy(char *tgt, u_int tgtsize, char *src, u_int srcsize, - u_int off, u_int len) -{ - char *sp, *tp; - - /* arg values check */ - if (srcsize < off || srcsize < (off + len) || - tgtsize < off || tgtsize < (off + len)) { - log(LOG_ERR, - "in6_prefix.c: bit_copy: invalid args: srcsize %d,\n" - "tgtsize %d, off %d, len %d\n", srcsize, tgtsize, off, - len); - return; - } - - /* search start point */ - for (sp = src, tp = tgt; off >= 8; sp++, tp++) - off-=8; - /* copy starting bits */ - if (off) { - char setbit; - int startbits; - - startbits = min((8 - off), len); - - for (setbit = (0x80 >> off); startbits; - setbit >>= 1, startbits--, len--) - *tp |= (setbit & *sp); - tp++; - sp++; - } - /* copy midium bits */ - for (; len >= 8; sp++, tp++) { - *tp = *sp; - len-=8; - } - /* copy ending bits */ - if (len) { - char setbit; - - for (setbit = 0x80; len; setbit >>= 1, len--) - *tp |= (setbit & *sp); - } -} - -static struct ifprefix * -in6_prefixwithifp(struct ifnet *ifp, int plen, struct in6_addr *dst) -{ - struct ifprefix *ifpr; - - /* search matched prefix */ - ifnet_lock_shared(ifp); - for (ifpr = TAILQ_FIRST(&ifp->if_prefixhead); ifpr; - ifpr = TAILQ_NEXT(ifpr, ifpr_list)) - { - if (ifpr->ifpr_prefix->sa_family != AF_INET6 || - ifpr->ifpr_type != IN6_PREFIX_RR) - continue; - if (plen <= in6_matchlen(dst, IFPR_IN6(ifpr))) - break; - } - ifnet_lock_done(ifp); - return (ifpr); -} - -#if 0 -/* - * Search prefix which matches arg prefix as specified in - * draft-ietf-ipngwg-router-renum-08.txt - */ -static struct rr_prefix * -search_matched_prefix(struct ifnet *ifp, struct in6_prefixreq *ipr) -{ - struct ifprefix *ifpr; - struct ifaddr *ifa; - struct rr_prefix *rpp; - - /* search matched prefix */ - ifpr = in6_prefixwithifp(ifp, ipr->ipr_plen, - &ipr->ipr_prefix.sin6_addr); - if (ifpr != NULL) - return ifpr2rp(ifpr); - - /* - * search matched addr, and then search prefix - * which matches the addr - */ - - ifnet_lock_shared(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) - { - IFA_LOCK(ifa); - if (ifa->ifa_addr->sa_family != AF_INET6) { - IFA_UNLOCK(ifa); - continue; - } - if (ipr->ipr_plen <= - in6_matchlen(&ipr->ipr_prefix.sin6_addr, IFA_IN6(ifa))) { - /* keep it locked */ - break; - } - IFA_UNLOCK(ifa); - } - if (ifa == NULL) { - ifnet_lock_done(ifp); - return NULL; - } - IFA_LOCK_ASSERT_HELD(ifa); - rpp = ifpr2rp(((struct in6_ifaddr *)ifa)->ia6_ifpr); - IFA_UNLOCK(ifa); - if (rpp != 0) { - ifnet_lock_done(ifp); - return rpp; - } - - for (ifpr = TAILQ_FIRST(&ifp->if_prefixhead); ifpr; - ifpr = TAILQ_NEXT(ifpr, ifpr_list)) - { - if (ifpr->ifpr_prefix->sa_family != AF_INET6 || - ifpr->ifpr_type != IN6_PREFIX_RR) - continue; - if (ifpr->ifpr_plen <= in6_matchlen(IFA_IN6(ifa), - IFPR_IN6(ifpr))) - break; - } - ifnet_lock_done(ifp); - if (ifpr != NULL) - log(LOG_ERR, "in6_prefix.c: search_matched_prefix: addr %s" - "has no pointer to prefix %s\n", ip6_sprintf(IFA_IN6(ifa)), - ip6_sprintf(IFPR_IN6(ifpr))); - return ifpr2rp(ifpr); -} - -/* - * Search prefix which matches arg prefix as specified in - * draft-ietf-ipngwg-router-renum-08.txt, and mark it if exists. - * Return 1 if anything matched, and 0 if nothing matched. - */ -static int -mark_matched_prefixes(u_int32_t cmd, struct ifnet *ifp, struct in6_rrenumreq *irr) -{ - struct ifprefix *ifpr; - struct ifaddr *ifa; - int matchlen, matched = 0; - - /* search matched prefixes */ - ifnet_lock_exclusive(ifp); /* Should if_prefixhead be protected by IPv6?? */ - for (ifpr = TAILQ_FIRST(&ifp->if_prefixhead); ifpr; - ifpr = TAILQ_NEXT(ifpr, ifpr_list)) - { - if (ifpr->ifpr_prefix->sa_family != AF_INET6 || - ifpr->ifpr_type != IN6_PREFIX_RR) - continue; - matchlen = in6_matchlen(&irr->irr_matchprefix.sin6_addr, - IFPR_IN6(ifpr)); - if (irr->irr_m_minlen > ifpr->ifpr_plen || - irr->irr_m_maxlen < ifpr->ifpr_plen || - irr->irr_m_len > matchlen) - continue; - matched = 1; - ifpr2rp(ifpr)->rp_statef_addmark = 1; - if (cmd == SIOCCIFPREFIX_IN6) - ifpr2rp(ifpr)->rp_statef_delmark = 1; - } - - /* - * search matched addr, and then search prefixes - * which matche the addr - */ - TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) - { - struct rr_prefix *rpp; - - IFA_LOCK(ifa); - if (ifa->ifa_addr->sa_family != AF_INET6) { - IFA_UNLOCK(ifa); - continue; - } - matchlen = in6_matchlen(&irr->irr_matchprefix.sin6_addr, - IFA_IN6(ifa)); - if (irr->irr_m_minlen > matchlen || - irr->irr_m_maxlen < matchlen || irr->irr_m_len > matchlen) { - IFA_UNLOCK(ifa); - continue; - } - rpp = ifpr2rp(((struct in6_ifaddr *)ifa)->ia6_ifpr); - if (rpp != 0) { - matched = 1; - rpp->rp_statef_addmark = 1; - if (cmd == SIOCCIFPREFIX_IN6) - rpp->rp_statef_delmark = 1; - } else { - log(LOG_WARNING, "in6_prefix.c: mark_matched_prefixes:" - "no back pointer to ifprefix for %s. " - "ND autoconfigured addr?\n", - ip6_sprintf(IFA_IN6(ifa))); - } - IFA_UNLOCK(ifa); - } - ifnet_lock_done(ifp); - return matched; -} - -/* - * Mark global prefixes as to be deleted. - */ -static void -delmark_global_prefixes(struct ifnet *ifp, __unused struct in6_rrenumreq *irr) -{ - struct ifprefix *ifpr; - - /* search matched prefixes */ - ifnet_lock_exclusive(ifp); - for (ifpr = TAILQ_FIRST(&ifp->if_prefixhead); ifpr; - ifpr = TAILQ_NEXT(ifpr, ifpr_list)) - { - if (ifpr->ifpr_prefix->sa_family != AF_INET6 || - ifpr->ifpr_type != IN6_PREFIX_RR) - continue; - /* mark delete global prefix */ - if (in6_addrscope(RP_IN6(ifpr2rp(ifpr))) == - IPV6_ADDR_SCOPE_GLOBAL) - ifpr2rp(ifpr)->rp_statef_delmark = 1; - } - ifnet_lock_done(ifp); -} - -/* Unmark prefixes */ -static void -unmark_prefixes(struct ifnet *ifp) -{ - struct ifprefix *ifpr; - - /* unmark all prefix */ - ifnet_lock_exclusive(ifp); - for (ifpr = TAILQ_FIRST(&ifp->if_prefixhead); ifpr; - ifpr = TAILQ_NEXT(ifpr, ifpr_list)) - { - if (ifpr->ifpr_prefix->sa_family != AF_INET6 || - ifpr->ifpr_type != IN6_PREFIX_RR) - continue; - /* unmark prefix */ - ifpr2rp(ifpr)->rp_statef_addmark = 0; - ifpr2rp(ifpr)->rp_statef_delmark = 0; - } - ifnet_lock_done(ifp); -} -#endif - -static void -init_prefix_ltimes(struct rr_prefix *rpp) -{ - struct timeval timenow; - - getmicrotime(&timenow); - - if (rpp->rp_pltime == RR_INFINITE_LIFETIME || - rpp->rp_rrf_decrprefd == 0) - rpp->rp_preferred = 0; - else - rpp->rp_preferred = timenow.tv_sec + rpp->rp_pltime; - if (rpp->rp_vltime == RR_INFINITE_LIFETIME || - rpp->rp_rrf_decrvalid == 0) - rpp->rp_expire = 0; - else - rpp->rp_expire = timenow.tv_sec + rpp->rp_vltime; -} - -static int -rr_are_ifid_equal(struct in6_addr *ii1, struct in6_addr *ii2, int ii_len) -{ - int ii_bytelen, ii_bitlen; - int p_bytelen, p_bitlen; - - /* sanity check */ - if (1 > ii_len || - ii_len > 124) { /* as RFC2373, prefix is at least 4 bit */ - log(LOG_ERR, "rr_are_ifid_equal: invalid ifid length(%d)\n", - ii_len); - return(0); - } - - ii_bytelen = ii_len / 8; - ii_bitlen = ii_len % 8; - - p_bytelen = sizeof(struct in6_addr) - ii_bytelen - 1; - p_bitlen = 8 - ii_bitlen; - - if (bcmp(ii1->s6_addr + p_bytelen + 1, ii2->s6_addr + p_bytelen + 1, - ii_bytelen)) - return(0); - if (((ii1->s6_addr[p_bytelen] << p_bitlen) & 0xff) != - ((ii2->s6_addr[p_bytelen] << p_bitlen) & 0xff)) - return(0); - - return(1); -} - -static struct rp_addr * -search_ifidwithprefix(struct rr_prefix *rpp, struct in6_addr *ifid) -{ - struct rp_addr *rap; - - lck_mtx_lock(prefix6_mutex); - LIST_FOREACH(rap, &rpp->rp_addrhead, ra_entry) - { - if (rr_are_ifid_equal(ifid, &rap->ra_ifid, - (sizeof(struct in6_addr) << 3) - - rpp->rp_plen)) - break; - } - lck_mtx_unlock(prefix6_mutex); - return rap; -} - -static int -assign_ra_entry(struct rr_prefix *rpp, int iilen, struct in6_ifaddr *ia) -{ - int error = 0; - struct rp_addr *rap; - - if ((error = create_ra_entry(&rap)) != 0) - return error; - - /* copy interface id part */ - IFA_LOCK(&ia->ia_ifa); - bit_copy((caddr_t)&rap->ra_ifid, sizeof(rap->ra_ifid) << 3, - (caddr_t)IA6_IN6(ia), sizeof(*IA6_IN6(ia)) << 3, - rpp->rp_plen, iilen); - /* link to ia, and put into list */ - rap->ra_addr = ia; - IFA_ADDREF_LOCKED(&rap->ra_addr->ia_ifa); -#if 0 /* Can't do this now, because rpp may be on th stack. should fix it? */ - ia->ia6_ifpr = rp2ifpr(rpp); -#endif - IFA_UNLOCK(&ia->ia_ifa); - lck_mtx_lock(prefix6_mutex); - LIST_INSERT_HEAD(&rpp->rp_addrhead, rap, ra_entry); - lck_mtx_unlock(prefix6_mutex); - - return 0; -} - -/* - * add a link-local address to an interface. we will add new interface address - * (prefix database + new interface id). - */ -static int -in6_prefix_add_llifid(__unused int iilen, struct in6_ifaddr *ia) -{ - struct rr_prefix *rpp; - struct rp_addr *rap; - struct socket so; - int error; - - if ((error = create_ra_entry(&rap)) != 0) - return(error); - /* copy interface id part */ - IFA_LOCK(&ia->ia_ifa); - bit_copy((caddr_t)&rap->ra_ifid, sizeof(rap->ra_ifid) << 3, - (caddr_t)IA6_IN6(ia), sizeof(*IA6_IN6(ia)) << 3, - 64, (sizeof(rap->ra_ifid) << 3) - 64); - IFA_UNLOCK(&ia->ia_ifa); - /* XXX: init dummy so */ - bzero(&so, sizeof(so)); - /* insert into list */ - lck_mtx_lock(prefix6_mutex); - LIST_FOREACH(rpp, &rr_prefix, rp_entry) - { - /* - * do not attempt to add an address, if ifp does not match - */ - if (rpp->rp_ifp != ia->ia_ifp) - continue; - - LIST_INSERT_HEAD(&rpp->rp_addrhead, rap, ra_entry); - add_each_addr(&so, rpp, rap); - } - lck_mtx_unlock(prefix6_mutex); - return 0; -} - -#if 0 -/* - * add an address to an interface. if the interface id portion is new, - * we will add new interface address (prefix database + new interface id). - */ -int -in6_prefix_add_ifid(int iilen, struct in6_ifaddr *ia) -{ - struct in6_addr addr; - int plen; - struct ifprefix *ifpr; - struct rp_addr *rap; - int error = 0; - - IFA_LOCK_SPIN(&ia->ia_ifa); - addr = *IA6_IN6(ia); - plen = (sizeof(*IA6_IN6(ia)) << 3) - iilen; - IFA_UNLOCK(&ia->ia_ifa); - - if (IN6_IS_ADDR_LINKLOCAL(&addr)) - return(in6_prefix_add_llifid(iilen, ia)); - ifpr = in6_prefixwithifp(ia->ia_ifp, plen, &addr); - if (ifpr == NULL) { - struct rr_prefix rp; - struct socket so; - struct ifnet *ifp; - int pplen = (plen == 128) ? 64 : plen; /* XXX hardcoded 64 is bad */ - - /* allocate a prefix for ia, with default properties */ - - /* init rp */ - bzero(&rp, sizeof(rp)); - rp.rp_type = IN6_PREFIX_RR; - rp.rp_ifp = ifp = ia->ia_ifp; - rp.rp_plen = pplen; - rp.rp_prefix.sin6_len = sizeof(rp.rp_prefix); - rp.rp_prefix.sin6_family = AF_INET6; - bit_copy((char *)RP_IN6(&rp), sizeof(*RP_IN6(&rp)) << 3, - (char *)&addr, sizeof (addr) << 3, 0, pplen); - rp.rp_vltime = rp.rp_pltime = RR_INFINITE_LIFETIME; - rp.rp_raf_onlink = 1; - rp.rp_raf_auto = 1; - /* Is some FlagMasks for rrf necessary? */ - rp.rp_rrf_decrvalid = rp.rp_rrf_decrprefd = 0; - rp.rp_origin = PR_ORIG_RR; /* can be renumbered */ - - /* create ra_entry */ - ifnet_lock_shared(ifp); - error = link_stray_ia6s(&rp); - ifnet_lock_done(ifp); - if (error != 0) { - free_rp_entries(&rp); - return error; - } - - /* XXX: init dummy so */ - bzero(&so, sizeof(so)); - - error = add_each_prefix(&so, &rp); - - /* free each rp_addr entry */ - free_rp_entries(&rp); - - if (error != 0) - return error; - - /* search again */ - ifpr = in6_prefixwithifp(ia->ia_ifp, pplen, &addr); - if (ifpr == NULL) - return 0; - } - rap = search_ifidwithprefix(ifpr2rp(ifpr), &addr); - if (rap != NULL) { - if (rap->ra_addr == NULL) { - rap->ra_addr = ia; - IFA_ADDREF(&rap->ra_addr->ia_ifa); - } else if (rap->ra_addr != ia) { - /* There may be some inconsistencies between addrs. */ - log(LOG_ERR, "ip6_prefix.c: addr %s/%d matched prefix" - " already has another ia %p(%s) on its ifid list\n", - ip6_sprintf(&addr), plen, rap->ra_addr, - ip6_sprintf(IA6_IN6(rap->ra_addr))); - return EADDRINUSE /* XXX */; - } - IFA_LOCK_SPIN(&ia->ia_ifa); - ia->ia6_ifpr = ifpr; - IFA_UNLOCK(&ia->ia_ifa); - return 0; - } - error = assign_ra_entry(ifpr2rp(ifpr), iilen, ia); - if (error == 0) { - IFA_LOCK_SPIN(&ia->ia_ifa); - ia->ia6_ifpr = ifpr; - IFA_UNLOCK(&ia->ia_ifa); - } - return (error); -} -#endif - -#if 0 -void -in6_prefix_remove_ifid(__unused int iilen, struct in6_ifaddr *ia) -{ - struct rp_addr *rap; - struct in6_addr addr; - struct ifprefix *ifpr; - - IFA_LOCK_SPIN(&ia->ia_ifa); - if ((ifpr = ia->ia6_ifpr) == NULL) { - IFA_UNLOCK(&ia->ia_ifa); - return; - } - addr = *IA6_IN6(ia); - IFA_UNLOCK(&ia->ia_ifa); - rap = search_ifidwithprefix(ifpr2rp(ifpr), &addr); - if (rap != NULL) { - lck_mtx_lock(prefix6_mutex); - LIST_REMOVE(rap, ra_entry); - lck_mtx_unlock(prefix6_mutex); - if (rap->ra_addr) { - IFA_REMREF(&rap->ra_addr->ia_ifa); - rap->ra_addr = NULL; - } - FREE(rap, M_RR_ADDR); - } - - if (LIST_EMPTY(&ifpr2rp(ifpr)->rp_addrhead)) - rp_remove(ifpr2rp(ifpr)); -} -#endif - -void -in6_purgeprefix( - struct ifnet *ifp) -{ - struct ifprefix *ifpr, *nextifpr; - - /* delete prefixes before ifnet goes away */ - ifnet_lock_exclusive(ifp); - for (ifpr = TAILQ_FIRST(&ifp->if_prefixhead); ifpr; - ifpr = nextifpr) - { - nextifpr = TAILQ_NEXT(ifpr, ifpr_list); - if (ifpr->ifpr_prefix->sa_family != AF_INET6 || - ifpr->ifpr_type != IN6_PREFIX_RR) - continue; - (void)delete_each_prefix(ifpr2rp(ifpr), PR_ORIG_KERNEL); - } - ifnet_lock_done(ifp); -} - -static void -add_each_addr(struct socket *so, struct rr_prefix *rpp, struct rp_addr *rap) -{ - struct in6_ifaddr *ia6; - struct in6_aliasreq ifra; - struct proc *p = current_proc(); - int error, p64 = proc_is64bit(p); - - /* init ifra */ - bzero(&ifra, sizeof(ifra)); - strncpy(ifra.ifra_name, if_name(rpp->rp_ifp), sizeof(ifra.ifra_name)); - ifra.ifra_addr.sin6_family = ifra.ifra_prefixmask.sin6_family = - AF_INET6; - ifra.ifra_addr.sin6_len = ifra.ifra_prefixmask.sin6_len = - sizeof(ifra.ifra_addr); - /* copy prefix part */ - bit_copy((char *)&ifra.ifra_addr.sin6_addr, - sizeof(ifra.ifra_addr.sin6_addr) << 3, - (char *)RP_IN6(rpp), sizeof(*RP_IN6(rpp)) << 3, - 0, rpp->rp_plen); - /* copy interface id part */ - bit_copy((char *)&ifra.ifra_addr.sin6_addr, - sizeof(ifra.ifra_addr.sin6_addr) << 3, - (char *)&rap->ra_ifid, sizeof(rap->ra_ifid) << 3, - rpp->rp_plen, (sizeof(rap->ra_ifid) << 3) - rpp->rp_plen); - in6_prefixlen2mask(&ifra.ifra_prefixmask.sin6_addr, rpp->rp_plen); - /* don't care ifra_flags for now */ - - /* - * XXX: if we did this with finite lifetime values, the lifetimes would - * decrese in time and never incremented. - * we should need more clarifications on the prefix mechanism... - */ - ifra.ifra_lifetime.ia6t_vltime = rpp->rp_vltime; - ifra.ifra_lifetime.ia6t_pltime = rpp->rp_pltime; - - ia6 = in6ifa_ifpwithaddr(rpp->rp_ifp, &ifra.ifra_addr.sin6_addr); - if (ia6 != NULL) { - struct in6_ifaddr *ria6 = NULL; - - IFA_LOCK(&ia6->ia_ifa); - if (ia6->ia6_ifpr == NULL) { - /* link this addr and the prefix each other */ - if (rap->ra_addr != NULL) - ria6 = rap->ra_addr; - /* Reference held in in6ifa_ifpwithaddr() */ - rap->ra_addr = ia6; - ia6->ia6_ifpr = rp2ifpr(rpp); - IFA_UNLOCK(&ia6->ia_ifa); - if (ria6 != NULL) - IFA_REMREF(&ria6->ia_ifa); - return; - } - if (ia6->ia6_ifpr == rp2ifpr(rpp)) { - if (rap->ra_addr != NULL) - ria6 = rap->ra_addr; - /* Reference held in in6ifa_ifpwithaddr() */ - rap->ra_addr = ia6; - IFA_UNLOCK(&ia6->ia_ifa); - if (ria6 != NULL) - IFA_REMREF(&ria6->ia_ifa); - return; - } - /* - * The addr is already assigned to other - * prefix. - * There may be some inconsistencies between - * prefixes. - * e.g. overraped prefixes with common starting - * part and different plefixlen. - * Or, completely duplicated prefixes? - * log it and return. - */ - log(LOG_ERR, - "in6_prefix.c: add_each_addr: addition of an addr %s/%d " - "failed because there is already another addr %s/%d\n", - ip6_sprintf(&ifra.ifra_addr.sin6_addr), rpp->rp_plen, - ip6_sprintf(IA6_IN6(ia6)), - in6_mask2len(&ia6->ia_prefixmask.sin6_addr, NULL)); - IFA_UNLOCK(&ia6->ia_ifa); - IFA_REMREF(&ia6->ia_ifa); - return; - } - /* propagate ANYCAST flag if it is set for ancestor addr */ - if (rap->ra_flags.anycast != 0) - ifra.ifra_flags |= IN6_IFF_ANYCAST; - - if (!p64) { -#if defined(__LP64__) - struct in6_aliasreq_32 ifra_32; - /* - * Use 32-bit ioctl and structure for 32-bit process. - */ - in6_aliasreq_64_to_32((struct in6_aliasreq_64 *)&ifra, - &ifra_32); - error = in6_control(so, SIOCAIFADDR_IN6_32, (caddr_t)&ifra_32, - rpp->rp_ifp, p); -#else - error = in6_control(so, SIOCAIFADDR_IN6, (caddr_t)&ifra, - rpp->rp_ifp, p); -#endif /* __LP64__ */ - } else { -#if defined(__LP64__) - error = in6_control(so, SIOCAIFADDR_IN6, (caddr_t)&ifra, - rpp->rp_ifp, p); -#else - struct in6_aliasreq_64 ifra_64; - /* - * Use 32-bit ioctl and structure for 32-bit process. - */ - in6_aliasreq_32_to_64((struct in6_aliasreq_32 *)&ifra, - &ifra_64); - error = in6_control(so, SIOCAIFADDR_IN6_64, (caddr_t)&ifra_64, - rpp->rp_ifp, p); -#endif /* __LP64__ */ - } - - if (error != 0) { - log(LOG_ERR, "in6_prefix.c: add_each_addr: addition of an addr" - "%s/%d failed because in6_control failed for error %d\n", - ip6_sprintf(&ifra.ifra_addr.sin6_addr), rpp->rp_plen, - error); - return; - } - - /* - * link beween this addr and the prefix will be done - * in in6_prefix_add_ifid - */ -} - -static int -rrpr_update(struct socket *so, struct rr_prefix *new) -{ - struct rr_prefix *rpp; - struct ifprefix *ifpr; - struct rp_addr *rap; - - /* search existing prefix */ - ifnet_lock_exclusive(new->rp_ifp); - for (ifpr = TAILQ_FIRST(&new->rp_ifp->if_prefixhead); ifpr; - ifpr = TAILQ_NEXT(ifpr, ifpr_list)) - { - if (ifpr->ifpr_prefix->sa_family != AF_INET6 || - ifpr->ifpr_type != IN6_PREFIX_RR) - continue; - if (ifpr->ifpr_plen == new->rp_plen && - in6_are_prefix_equal(IFPR_IN6(ifpr), RP_IN6(new), - ifpr->ifpr_plen)) - break; - } - rpp = ifpr2rp(ifpr); - if (rpp != NULL) { - /* - * We got a prefix which we have seen in the past. - */ - /* - * If the origin of the already-installed prefix is more - * preferable than the new one, ignore installation request. - */ - if (rpp->rp_origin > new->rp_origin) { - ifnet_lock_done(new->rp_ifp); - return(EPERM); - } - - /* update prefix information */ - rpp->rp_flags.prf_ra = new->rp_flags.prf_ra; - if (rpp->rp_origin >= PR_ORIG_RR) - rpp->rp_flags.prf_rr = new->rp_flags.prf_rr; - rpp->rp_vltime = new->rp_vltime; - rpp->rp_pltime = new->rp_pltime; - rpp->rp_expire = new->rp_expire; - rpp->rp_preferred = new->rp_preferred; - rpp->rp_statef_delmark = 0; /* cancel deletion */ - /* - * Interface id related update. - * add rp_addr entries in new into rpp, if they have not - * been already included in rpp. - */ - lck_mtx_lock(prefix6_mutex); - while (!LIST_EMPTY(&new->rp_addrhead)) - { - rap = LIST_FIRST(&new->rp_addrhead); - LIST_REMOVE(rap, ra_entry); - if (search_ifidwithprefix(rpp, &rap->ra_ifid) - != NULL) { - if (rap->ra_addr) { - IFA_REMREF(&rap->ra_addr->ia_ifa); - rap->ra_addr = NULL; - } - FREE(rap, M_RR_ADDR); - continue; - } - LIST_INSERT_HEAD(&rpp->rp_addrhead, rap, ra_entry); - } - lck_mtx_unlock(prefix6_mutex); - } else { - /* - * We got a fresh prefix. - */ - /* create new prefix */ - rpp = (struct rr_prefix *)_MALLOC(sizeof(*rpp), M_IP6RR, - M_NOWAIT); - if (rpp == NULL) { - log(LOG_ERR, "in6_prefix.c: rrpr_update:%d" - ": ENOBUFS for rr_prefix\n", __LINE__); - ifnet_lock_done(new->rp_ifp); - return(ENOBUFS); - } - /* initilization */ - lck_mtx_lock(prefix6_mutex); - *rpp = *new; - LIST_INIT(&rpp->rp_addrhead); - /* move rp_addr entries of new to rpp */ - while (!LIST_EMPTY(&new->rp_addrhead)) - { - rap = LIST_FIRST(&new->rp_addrhead); - LIST_REMOVE(rap, ra_entry); - LIST_INSERT_HEAD(&rpp->rp_addrhead, rap, ra_entry); - } - lck_mtx_unlock(prefix6_mutex); - - /* let rp_ifpr.ifpr_prefix point rr_prefix. */ - rpp->rp_ifpr.ifpr_prefix = (struct sockaddr *)&rpp->rp_prefix; - /* link rr_prefix entry to if_prefixlist */ - { - struct ifnet *ifp = rpp->rp_ifp; - - if ((ifpr = TAILQ_FIRST(&ifp->if_prefixhead)) - != NULL) { - for ( ; TAILQ_NEXT(ifpr, ifpr_list); - ifpr = TAILQ_NEXT(ifpr, ifpr_list)) - continue; - TAILQ_NEXT(ifpr, ifpr_list) = rp2ifpr(rpp); - } else - TAILQ_FIRST(&ifp->if_prefixhead) = - rp2ifpr(rpp); - rp2ifpr(rpp)->ifpr_type = IN6_PREFIX_RR; - } - /* link rr_prefix entry to rr_prefix list */ - lck_mtx_lock(prefix6_mutex); - LIST_INSERT_HEAD(&rr_prefix, rpp, rp_entry); - lck_mtx_unlock(prefix6_mutex); - } - ifnet_lock_done(new->rp_ifp); - - if (!new->rp_raf_auto) - return 0; - - /* - * Add an address for each interface id, if it is not yet - * If it existed but not pointing to the prefix yet, - * init the prefix pointer. - */ - lck_mtx_lock(prefix6_mutex); - LIST_FOREACH(rap, &rpp->rp_addrhead, ra_entry) { - struct in6_ifaddr *ia6; - - if ((ia6 = rap->ra_addr) != NULL) { - IFA_LOCK(&ia6->ia_ifa); - if (ia6->ia6_ifpr == NULL) - ia6->ia6_ifpr = rp2ifpr(rpp); - IFA_UNLOCK(&ia6->ia_ifa); - continue; - } - add_each_addr(so, rpp, rap); - } - lck_mtx_unlock(prefix6_mutex); - return 0; -} - -static int -add_each_prefix(struct socket *so, struct rr_prefix *rpp) -{ - init_prefix_ltimes(rpp); - return(rrpr_update(so, rpp)); -} - -static void -rp_remove(struct rr_prefix *rpp) -{ - - /* unlink rp_entry from if_prefixlist */ - lck_mtx_lock(prefix6_mutex); - { - struct ifnet *ifp = rpp->rp_ifp; - struct ifprefix *ifpr; - - ifnet_lock_exclusive(ifp); - if ((ifpr = TAILQ_FIRST(&ifp->if_prefixhead)) == rp2ifpr(rpp)) - TAILQ_FIRST(&ifp->if_prefixhead) = - TAILQ_NEXT(ifpr, ifpr_list); - else { - while (TAILQ_NEXT(ifpr, ifpr_list) != NULL && - (TAILQ_NEXT(ifpr, ifpr_list) != rp2ifpr(rpp))) - ifpr = TAILQ_NEXT(ifpr, ifpr_list); - if (TAILQ_NEXT(ifpr, ifpr_list)) - TAILQ_NEXT(ifpr, ifpr_list) = - TAILQ_NEXT(rp2ifpr(rpp), ifpr_list); - else - printf("Couldn't unlink rr_prefix from ifp\n"); - } - ifnet_lock_done(ifp); - } - /* unlink rp_entry from rr_prefix list */ - LIST_REMOVE(rpp, rp_entry); - lck_mtx_unlock(prefix6_mutex); - FREE(rpp, M_IP6RR); -} - -static int -create_ra_entry(struct rp_addr **rapp) -{ - *rapp = (struct rp_addr *)_MALLOC(sizeof(struct rp_addr), M_RR_ADDR, - M_NOWAIT); - if (*rapp == NULL) { - log(LOG_ERR, "in6_prefix.c:%d: ENOBUFS" - "for rp_addr\n", __LINE__); - return ENOBUFS; - } - bzero(*rapp, sizeof(*(*rapp))); - - return 0; -} - -#if 0 -static int -init_newprefix(struct in6_rrenumreq *irr, struct ifprefix *ifpr, - struct rr_prefix *rpp) -{ - struct rp_addr *orap; - - /* init rp */ - bzero(rpp, sizeof(*rpp)); - rpp->rp_type = IN6_PREFIX_RR; - rpp->rp_ifp = ifpr->ifpr_ifp; - rpp->rp_plen = ifpr->ifpr_plen; - rpp->rp_prefix.sin6_len = sizeof(rpp->rp_prefix); - rpp->rp_prefix.sin6_family = AF_INET6; - bit_copy((char *)RP_IN6(rpp), sizeof(*RP_IN6(rpp)) << 3, - (char *)&irr->irr_useprefix.sin6_addr, - sizeof(irr->irr_useprefix.sin6_addr) << 3, - 0, irr->irr_u_uselen); - /* copy keeplen part if necessary as necessary len */ - if (irr->irr_u_uselen < ifpr->ifpr_plen) - bit_copy((char *)RP_IN6(rpp), sizeof(*RP_IN6(rpp)) << 3, - (char *)IFPR_IN6(ifpr), sizeof(*IFPR_IN6(ifpr)) << 3, - irr->irr_u_uselen, - min(ifpr->ifpr_plen - irr->irr_u_uselen, - irr->irr_u_keeplen)); - lck_mtx_lock(prefix6_mutex); - LIST_FOREACH(orap, &(ifpr2rp(ifpr)->rp_addrhead), ra_entry) - { - struct rp_addr *rap; - int error = 0; - struct in6_ifaddr *ia6; - - if ((error = create_ra_entry(&rap)) != 0) - return error; - rap->ra_ifid = orap->ra_ifid; - ia6 = orap->ra_addr->ia_ifa; - if (ia6 != NULL) { - IFA_LOCK(&ia6->ia_ifa); - rap->ra_flags.anycast = - ((ia6->ia6_flags & IN6_IFF_ANYCAST) != 0) ? 1 : 0; - IFA_UNLOCK(&ia6->ia_ifa); - } else { - rap->ra_flags.anycast = 0; - } - LIST_INSERT_HEAD(&rpp->rp_addrhead, rap, ra_entry); - } - rpp->rp_vltime = irr->irr_vltime; - rpp->rp_pltime = irr->irr_pltime; - rpp->rp_raf_onlink = irr->irr_raf_mask_onlink ? irr->irr_raf_onlink : - ifpr2rp(ifpr)->rp_raf_onlink; - rpp->rp_raf_auto = irr->irr_raf_mask_auto ? irr->irr_raf_auto : - ifpr2rp(ifpr)->rp_raf_auto; - /* Is some FlagMasks for rrf necessary? */ - rpp->rp_rrf = irr->irr_rrf; - rpp->rp_origin = irr->irr_origin; - lck_mtx_unlock(prefix6_mutex); - - return 0; -} -#endif - -static void -free_rp_entries(struct rr_prefix *rpp) -{ - /* - * This func is only called with rpp on stack(not on list). - * So no splnet() here - */ - lck_mtx_lock(prefix6_mutex); - while (!LIST_EMPTY(&rpp->rp_addrhead)) - { - struct rp_addr *rap; - - rap = LIST_FIRST(&rpp->rp_addrhead); - LIST_REMOVE(rap, ra_entry); - if (rap->ra_addr) { - IFA_REMREF(&rap->ra_addr->ia_ifa); - rap->ra_addr = NULL; - } - FREE(rap, M_RR_ADDR); - } - lck_mtx_unlock(prefix6_mutex); -} - -#if 0 -static int -add_useprefixes(struct socket *so, struct ifnet *ifp, - struct in6_rrenumreq *irr) -{ - struct ifprefix *ifpr, *nextifpr; - struct rr_prefix rp; - int error = 0; - - /* add prefixes to each of marked prefix */ - ifnet_lock_exclusive(ifp); - for (ifpr = TAILQ_FIRST(&ifp->if_prefixhead); ifpr; ifpr = nextifpr) - { - nextifpr = TAILQ_NEXT(ifpr, ifpr_list); - if (ifpr->ifpr_prefix->sa_family != AF_INET6 || - ifpr->ifpr_type != IN6_PREFIX_RR) - continue; - if (ifpr2rp(ifpr)->rp_statef_addmark) { - if ((error = init_newprefix(irr, ifpr, &rp)) != 0) - break; - error = add_each_prefix(so, &rp); - } - } - ifnet_lock_done(ifp); - /* free each rp_addr entry */ - free_rp_entries(&rp); - - return error; -} -#endif - -static void -unprefer_prefix(struct rr_prefix *rpp) -{ - struct rp_addr *rap; - struct timeval timenow; - - getmicrotime(&timenow); - - lck_mtx_lock(prefix6_mutex); - for (rap = rpp->rp_addrhead.lh_first; rap != NULL; - rap = rap->ra_entry.le_next) { - struct in6_ifaddr *ia6; - - if ((ia6 = rap->ra_addr) == NULL) - continue; - IFA_LOCK(&ia6->ia_ifa); - ia6->ia6_lifetime.ia6t_preferred = timenow.tv_sec; - ia6->ia6_lifetime.ia6t_pltime = 0; - IFA_UNLOCK(&ia6->ia_ifa); - } - lck_mtx_unlock(prefix6_mutex); - -} - -int -delete_each_prefix(struct rr_prefix *rpp, u_char origin) -{ - int error = 0; - - if (rpp->rp_origin > origin) - return(EPERM); - - lck_mtx_lock(prefix6_mutex); - while (rpp->rp_addrhead.lh_first != NULL) { - struct rp_addr *rap; - struct in6_ifaddr *ia6; - - rap = LIST_FIRST(&rpp->rp_addrhead); - if (rap == NULL) { - break; - } - LIST_REMOVE(rap, ra_entry); - if ((ia6 = rap->ra_addr) == NULL) { - FREE(rap, M_RR_ADDR); - continue; - } - rap->ra_addr = NULL; - IFA_LOCK(&ia6->ia_ifa); - ia6->ia6_ifpr = NULL; - IFA_UNLOCK(&ia6->ia_ifa); - - in6_purgeaddr(&ia6->ia_ifa, 0); - IFA_REMREF(&ia6->ia_ifa); - FREE(rap, M_RR_ADDR); - } - rp_remove(rpp); - lck_mtx_unlock(prefix6_mutex); - - return error; -} - -#if 0 -static void -delete_prefixes(struct ifnet *ifp, u_char origin) -{ - struct ifprefix *ifpr, *nextifpr; - - /* delete prefixes marked as tobe deleted */ - ifnet_lock_exclusive(ifp); - for (ifpr = TAILQ_FIRST(&ifp->if_prefixhead); ifpr; ifpr = nextifpr) - { - nextifpr = TAILQ_NEXT(ifpr, ifpr_list); - if (ifpr->ifpr_prefix->sa_family != AF_INET6 || - ifpr->ifpr_type != IN6_PREFIX_RR) - continue; - if (ifpr2rp(ifpr)->rp_statef_delmark) - (void)delete_each_prefix(ifpr2rp(ifpr), origin); - } - ifnet_lock_done(ifp); -} -#endif - -static int -link_stray_ia6s(struct rr_prefix *rpp) -{ - struct ifaddr *ifa; - - ifnet_lock_assert(rpp->rp_ifp, IFNET_LCK_ASSERT_OWNED); - - for (ifa = rpp->rp_ifp->if_addrlist.tqh_first; ifa; - ifa = ifa->ifa_list.tqe_next) - { - struct rp_addr *rap; - struct rr_prefix *orpp; - int error = 0; - - IFA_LOCK(ifa); - if (ifa->ifa_addr->sa_family != AF_INET6) { - IFA_UNLOCK(ifa); - continue; - } - if (rpp->rp_plen > in6_matchlen(RP_IN6(rpp), IFA_IN6(ifa))) { - IFA_UNLOCK(ifa); - continue; - } - orpp = ifpr2rp(((struct in6_ifaddr *)ifa)->ia6_ifpr); - if (orpp != NULL) { - if (!in6_are_prefix_equal(RP_IN6(orpp), RP_IN6(rpp), - rpp->rp_plen)) - log(LOG_ERR, "in6_prefix.c: link_stray_ia6s:" - "addr %s/%d already linked to a prefix" - "and it matches also %s/%d\n", - ip6_sprintf(IFA_IN6(ifa)), orpp->rp_plen, - ip6_sprintf(RP_IN6(rpp)), - rpp->rp_plen); - IFA_UNLOCK(ifa); - continue; - } - IFA_UNLOCK(ifa); - if ((error = assign_ra_entry(rpp, - (sizeof(rap->ra_ifid) << 3) - - rpp->rp_plen, - (struct in6_ifaddr *)ifa)) != 0) - return error; - } - return 0; -} - -#if 0 -/* XXX assumes that permission is already checked by the caller */ -int -in6_prefix_ioctl(struct socket *so, u_long cmd, caddr_t data, - struct ifnet *ifp) -{ - struct rr_prefix *rpp, rp_tmp; - struct rp_addr *rap; - struct in6_prefixreq *ipr = (struct in6_prefixreq *)data; - struct in6_rrenumreq *irr = (struct in6_rrenumreq *)data; - struct ifaddr *ifa; - int error = 0; - - /* - * Failsafe for erroneous address config program. - * Let's hope rrenumd don't make a mistakes. - */ - if (ipr->ipr_origin <= PR_ORIG_RA) - ipr->ipr_origin = PR_ORIG_STATIC; - - switch (cmd) { - case SIOCSGIFPREFIX_IN6: - delmark_global_prefixes(ifp, irr); - /* FALL THROUGH */ - case SIOCAIFPREFIX_IN6: - case SIOCCIFPREFIX_IN6: - /* check if preferred lifetime > valid lifetime */ - if (irr->irr_pltime > irr->irr_vltime) { - log(LOG_NOTICE, - "in6_prefix_ioctl: preferred lifetime" - "(%ld) is greater than valid lifetime(%ld)\n", - (u_int32_t)irr->irr_pltime, (u_int32_t)irr->irr_vltime); - error = EINVAL; - break; - } - if (mark_matched_prefixes(cmd, ifp, irr)) { - if (irr->irr_u_uselen != 0) - if ((error = add_useprefixes(so, ifp, irr)) - != 0) - goto failed; - if (cmd != SIOCAIFPREFIX_IN6) - delete_prefixes(ifp, irr->irr_origin); - } else - return (EADDRNOTAVAIL); - failed: - unmark_prefixes(ifp); - break; - case SIOCGIFPREFIX_IN6: - rpp = search_matched_prefix(ifp, ipr); - if (rpp == NULL || ifp != rpp->rp_ifp) - return (EADDRNOTAVAIL); - - ipr->ipr_origin = rpp->rp_origin; - ipr->ipr_plen = rpp->rp_plen; - ipr->ipr_vltime = rpp->rp_vltime; - ipr->ipr_pltime = rpp->rp_pltime; - ipr->ipr_flags = rpp->rp_flags; - ipr->ipr_prefix = rpp->rp_prefix; - - break; - case SIOCSIFPREFIX_IN6: - /* check if preferred lifetime > valid lifetime */ - if (ipr->ipr_pltime > ipr->ipr_vltime) { - log(LOG_NOTICE, - "in6_prefix_ioctl: preferred lifetime" - "(%ld) is greater than valid lifetime(%ld)\n", - (u_int32_t)ipr->ipr_pltime, (u_int32_t)ipr->ipr_vltime); - error = EINVAL; - break; - } - - /* init rp_tmp */ - bzero((caddr_t)&rp_tmp, sizeof(rp_tmp)); - rp_tmp.rp_ifp = ifp; - rp_tmp.rp_plen = ipr->ipr_plen; - rp_tmp.rp_prefix = ipr->ipr_prefix; - rp_tmp.rp_vltime = ipr->ipr_vltime; - rp_tmp.rp_pltime = ipr->ipr_pltime; - rp_tmp.rp_flags = ipr->ipr_flags; - rp_tmp.rp_origin = ipr->ipr_origin; - - /* create rp_addr entries, usually at least for lladdr */ - ifnet_lock_shared(ifp); - if ((error = link_stray_ia6s(&rp_tmp)) != 0) { - ifnet_lock_done(ifp); - free_rp_entries(&rp_tmp); - break; - } - for (ifa = ifp->if_addrlist.tqh_first; - ifa; - ifa = ifa->ifa_list.tqe_next) - { - IFA_LOCK(ifa); - if (ifa->ifa_addr->sa_family != AF_INET6) { - IFA_UNLOCK(ifa); - continue; - } - if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa)) == 0) { - IFA_UNLOCK(ifa); - continue; - } - if ((error = create_ra_entry(&rap)) != 0) { - IFA_UNLOCK(ifa); - ifnet_lock_done(ifp); - free_rp_entries(&rp_tmp); - goto bad; - } - /* copy interface id part */ - bit_copy((caddr_t)&rap->ra_ifid, - sizeof(rap->ra_ifid) << 3, - (caddr_t)IFA_IN6(ifa), - sizeof(*IFA_IN6(ifa)) << 3, - rp_tmp.rp_plen, - (sizeof(rap->ra_ifid) << 3) - rp_tmp.rp_plen); - IFA_UNLOCK(ifa); - /* insert into list */ - lck_mtx_lock(prefix6_mutex); - LIST_INSERT_HEAD(&rp_tmp.rp_addrhead, rap, ra_entry); - lck_mtx_unlock(prefix6_mutex); - } - ifnet_lock_done(ifp); - - error = add_each_prefix(so, &rp_tmp); - - /* free each rp_addr entry */ - free_rp_entries(&rp_tmp); - - break; - case SIOCDIFPREFIX_IN6: - rpp = search_matched_prefix(ifp, ipr); - if (rpp == NULL || ifp != rpp->rp_ifp) - return (EADDRNOTAVAIL); - - ifnet_lock_exclusive(ifp); - error = delete_each_prefix(rpp, ipr->ipr_origin); - ifnet_lock_done(ifp); - break; - } - bad: - return error; -} -#endif - diff --git a/bsd/netinet6/in6_prefix.h b/bsd/netinet6/in6_prefix.h deleted file mode 100644 index fa3567676..000000000 --- a/bsd/netinet6/in6_prefix.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (C) 1995, 1996, 1997, 1998 and 1999 WIDE Project. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the project nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#include - -#include - -#ifdef KERNEL_PRIVATE -struct rr_prefix { - struct ifprefix rp_ifpr; - LIST_ENTRY(rr_prefix) rp_entry; - LIST_HEAD(rp_addrhead, rp_addr) rp_addrhead; - struct sockaddr_in6 rp_prefix; /* prefix */ - u_int32_t rp_vltime; /* advertised valid lifetime */ - u_int32_t rp_pltime; /* advertised preferred lifetime */ - time_t rp_expire; /* expiration time of the prefix */ - time_t rp_preferred; /* preferred time of the prefix */ - struct in6_prflags rp_flags; - u_char rp_origin; /* from where this prefix info is obtained */ - struct rp_stateflags { - /* if some prefix should be added to this prefix */ - u_char addmark : 1; - u_char delmark : 1; /* if this prefix will be deleted */ - } rp_stateflags; -}; - -#define rp_type rp_ifpr.ifpr_type -#define rp_ifp rp_ifpr.ifpr_ifp -#define rp_plen rp_ifpr.ifpr_plen - -#define rp_raf rp_flags.prf_ra -#define rp_raf_onlink rp_flags.prf_ra.onlink -#define rp_raf_auto rp_flags.prf_ra.autonomous - -#define rp_statef_addmark rp_stateflags.addmark -#define rp_statef_delmark rp_stateflags.delmark - -#define rp_rrf rp_flags.prf_rr -#define rp_rrf_decrvalid rp_flags.prf_rr.decrvalid -#define rp_rrf_decrprefd rp_flags.prf_rr.decrprefd - -struct rp_addr { - LIST_ENTRY(rp_addr) ra_entry; - struct in6_addr ra_ifid; - struct in6_ifaddr *ra_addr; - struct ra_flags { - u_char anycast : 1; - } ra_flags; -}; - -#define ifpr2rp(ifpr) ((struct rr_prefix *)(ifpr)) -#define rp2ifpr(rp) ((struct ifprefix *)(rp)) - -#define RP_IN6(rp) (&(rp)->rp_prefix.sin6_addr) - -#define RR_INFINITE_LIFETIME 0xffffffff - - -LIST_HEAD(rr_prhead, rr_prefix); - -extern struct rr_prhead rr_prefix; - -int delete_each_prefix (struct rr_prefix *rpp, u_char origin); - -#endif /* KERNEL_PRIVATE */ diff --git a/bsd/netinet6/in6_proto.c b/bsd/netinet6/in6_proto.c index cb44f9b41..a87c6e662 100644 --- a/bsd/netinet6/in6_proto.c +++ b/bsd/netinet6/in6_proto.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2008-2012 Apple Inc. All rights reserved. + * Copyright (c) 2008-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,13 +22,10 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* $FreeBSD: src/sys/netinet6/in6_proto.c,v 1.19 2002/10/16 02:25:05 sam Exp $ */ -/* $KAME: in6_proto.c,v 1.91 2001/05/27 13:28:35 itojun Exp $ */ - /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. @@ -116,6 +113,7 @@ #include #include #include +#include #include #include @@ -128,7 +126,6 @@ #include #include #include -#include #include #include @@ -162,218 +159,275 @@ * TCP/IP protocol family: IP6, ICMP6, UDP, TCP. */ -extern struct domain inet6domain; +extern struct domain inet6domain_s; +struct domain *inet6domain = NULL; + static struct pr_usrreqs nousrreqs; lck_mtx_t *inet6_domain_mutex; -#define PR_LISTEN 0 -#define PR_ABRTACPTDIS 0 - -extern int in6_inithead(void **, int); -void in6_dinit(void) __attribute__((section("__TEXT, initcode"))); - -static int rip6_pr_output(struct mbuf *m, struct socket *so, struct sockaddr_in6 *, struct mbuf *); +static void in6_dinit(struct domain *); +static int rip6_pr_output(struct mbuf *, struct socket *, + struct sockaddr_in6 *, struct mbuf *); struct ip6protosw inet6sw[] = { -{ 0, &inet6domain, IPPROTO_IPV6, 0, - 0, 0, 0, 0, - 0, - ip6_init, 0, frag6_slowtimo, frag6_drain, - 0, - &nousrreqs, - 0, 0, 0, - { 0, 0 }, NULL, { 0 } +{ + .pr_type = 0, + .pr_protocol = IPPROTO_IPV6, + .pr_init = ip6_init, + .pr_drain = ip6_drain, + .pr_usrreqs = &nousrreqs, }, -{ SOCK_DGRAM, &inet6domain, IPPROTO_UDP, PR_ATOMIC|PR_ADDR|PR_PROTOLOCK|PR_PCBLOCK, - udp6_input, 0, udp6_ctlinput, ip6_ctloutput, - 0, - 0, 0, 0, 0, - 0, - &udp6_usrreqs, - udp_lock, udp_unlock, udp_getlock, - { 0, 0 }, NULL, { 0 } +{ + .pr_type = SOCK_DGRAM, + .pr_protocol = IPPROTO_UDP, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_PROTOLOCK|PR_PCBLOCK| + PR_EVCONNINFO, + .pr_input = udp6_input, + .pr_ctlinput = udp6_ctlinput, + .pr_ctloutput = ip6_ctloutput, +#if !INET /* don't call initialization twice */ + .pr_init = udp_init, +#endif /* !INET */ + .pr_usrreqs = &udp6_usrreqs, + .pr_lock = udp_lock, + .pr_unlock = udp_unlock, + .pr_getlock = udp_getlock, }, -{ SOCK_STREAM, &inet6domain, IPPROTO_TCP, PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_PROTOLOCK|PR_PCBLOCK|PR_DISPOSE, - tcp6_input, 0, tcp6_ctlinput, tcp_ctloutput, - 0, -#if INET /* don't call initialization and timeout routines twice */ - 0, 0, 0, tcp_drain, -#else - tcp_init, 0, tcp_slowtimo, tcp_drain, -#endif - 0, - &tcp6_usrreqs, - tcp_lock, tcp_unlock, tcp_getlock, - { 0, 0 }, NULL, { 0 } +{ + .pr_type = SOCK_STREAM, + .pr_protocol = IPPROTO_TCP, + .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_PCBLOCK| + PR_PROTOLOCK|PR_DISPOSE|PR_EVCONNINFO, + .pr_input = tcp6_input, + .pr_ctlinput = tcp6_ctlinput, + .pr_ctloutput = tcp_ctloutput, +#if !INET /* don't call initialization and timeout routines twice */ + .pr_init = tcp_init, +#endif /* !INET */ + .pr_drain = tcp_drain, + .pr_usrreqs = &tcp6_usrreqs, + .pr_lock = tcp_lock, + .pr_unlock = tcp_unlock, + .pr_getlock = tcp_getlock, }, -{ SOCK_RAW, &inet6domain, IPPROTO_RAW, PR_ATOMIC|PR_ADDR, - rip6_input, rip6_pr_output, rip6_ctlinput, rip6_ctloutput, - 0, - 0, 0, 0, 0, - 0, - &rip6_usrreqs, - 0, rip_unlock, 0, - { 0, 0 }, NULL, { 0 } +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_RAW, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = rip6_input, + .pr_output = rip6_pr_output, + .pr_ctlinput = rip6_ctlinput, + .pr_ctloutput = rip6_ctloutput, +#if !INET /* don't call initialization and timeout routines twice */ + .pr_init = rip_init, +#endif /* !INET */ + .pr_usrreqs = &rip6_usrreqs, + .pr_unlock = rip_unlock, }, -{ SOCK_RAW, &inet6domain, IPPROTO_ICMPV6, PR_ATOMIC|PR_ADDR|PR_LASTHDR, - icmp6_input, rip6_pr_output, rip6_ctlinput, rip6_ctloutput, - 0, - icmp6_init, 0, mld_slowtimo, 0, - 0, - &rip6_usrreqs, - 0, rip_unlock, 0, - { 0, 0 }, NULL, { 0 } +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_ICMPV6, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = icmp6_input, + .pr_output = rip6_pr_output, + .pr_ctlinput = rip6_ctlinput, + .pr_ctloutput = rip6_ctloutput, + .pr_init = icmp6_init, + .pr_usrreqs = &rip6_usrreqs, + .pr_unlock = rip_unlock, }, -{ SOCK_DGRAM, &inet6domain, IPPROTO_ICMPV6, PR_ATOMIC|PR_ADDR|PR_LASTHDR, - icmp6_input, rip6_pr_output, rip6_ctlinput, icmp6_dgram_ctloutput, - 0, - icmp6_init, 0, mld_slowtimo, 0, - 0, - &icmp6_dgram_usrreqs, - 0, rip_unlock, 0, - { 0, 0 }, NULL, { 0 } +{ + .pr_type = SOCK_DGRAM, + .pr_protocol = IPPROTO_ICMPV6, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = icmp6_input, + .pr_output = rip6_pr_output, + .pr_ctlinput = rip6_ctlinput, + .pr_ctloutput = icmp6_dgram_ctloutput, + .pr_init = icmp6_init, + .pr_usrreqs = &icmp6_dgram_usrreqs, + .pr_unlock = rip_unlock, }, -{ SOCK_RAW, &inet6domain, IPPROTO_DSTOPTS,PR_ATOMIC|PR_ADDR, - dest6_input, 0, 0, 0, - 0, - 0, 0, 0, 0, - 0, - &nousrreqs, - 0, 0, 0, - { 0, 0 }, NULL, { 0 } +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_DSTOPTS, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = dest6_input, + .pr_usrreqs = &nousrreqs, }, -{ SOCK_RAW, &inet6domain, IPPROTO_ROUTING,PR_ATOMIC|PR_ADDR, - route6_input, 0, 0, 0, - 0, - 0, 0, 0, 0, - 0, - &nousrreqs, - 0, 0, 0, - { 0, 0 }, NULL, { 0 } +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_ROUTING, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = route6_input, + .pr_usrreqs = &nousrreqs, }, -{ SOCK_RAW, &inet6domain, IPPROTO_FRAGMENT,PR_ATOMIC|PR_ADDR, - frag6_input, 0, 0, 0, - 0, - 0, 0, 0, 0, - 0, - &nousrreqs, - 0, 0, 0, - { 0, 0 }, NULL, { 0 } +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_FRAGMENT, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_PROTOLOCK, + .pr_input = frag6_input, + .pr_usrreqs = &nousrreqs, }, #if IPSEC -{ SOCK_RAW, &inet6domain, IPPROTO_AH, PR_ATOMIC|PR_ADDR|PR_PROTOLOCK, - ah6_input, 0, 0, 0, - 0, - 0, 0, 0, 0, - 0, - &nousrreqs, - 0, 0, 0, - { 0, 0 }, NULL, { 0 } +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_AH, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_PROTOLOCK, + .pr_input = ah6_input, + .pr_usrreqs = &nousrreqs, }, #if IPSEC_ESP -{ SOCK_RAW, &inet6domain, IPPROTO_ESP, PR_ATOMIC|PR_ADDR|PR_PROTOLOCK, - esp6_input, 0, - esp6_ctlinput, - 0, - 0, - 0, 0, 0, 0, - 0, - &nousrreqs, - 0, 0, 0, - { 0, 0 }, NULL, { 0 } +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_ESP, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_PROTOLOCK, + .pr_input = esp6_input, + .pr_ctlinput = esp6_ctlinput, + .pr_usrreqs = &nousrreqs, }, -#endif -{ SOCK_RAW, &inet6domain, IPPROTO_IPCOMP, PR_ATOMIC|PR_ADDR|PR_PROTOLOCK, - ipcomp6_input, 0, 0, 0, - 0, - 0, 0, 0, 0, - 0, - &nousrreqs, - 0, 0, 0, - { 0, 0 }, NULL, { 0 } +#endif /* IPSEC_ESP */ +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_IPCOMP, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_PROTOLOCK, + .pr_input = ipcomp6_input, + .pr_usrreqs = &nousrreqs, }, #endif /* IPSEC */ #if INET -{ SOCK_RAW, &inet6domain, IPPROTO_IPV4, PR_ATOMIC|PR_ADDR|PR_LASTHDR, - encap6_input, rip6_pr_output, 0, rip6_ctloutput, - 0, - encap_init, 0, 0, 0, - 0, - &rip6_usrreqs, - 0, rip_unlock, 0, - { 0, 0 }, NULL, { 0 } +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_IPV4, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = encap6_input, + .pr_output = rip6_pr_output, + .pr_ctloutput = rip6_ctloutput, + .pr_init = encap6_init, + .pr_usrreqs = &rip6_usrreqs, + .pr_unlock = rip_unlock, }, #endif /*INET*/ -{ SOCK_RAW, &inet6domain, IPPROTO_IPV6, PR_ATOMIC|PR_ADDR|PR_LASTHDR, - encap6_input, rip6_pr_output, 0, rip6_ctloutput, - 0, - encap_init, 0, 0, 0, - 0, - &rip6_usrreqs, - 0, rip_unlock, 0, - { 0, 0 }, NULL, { 0 } +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_IPV6, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = encap6_input, + .pr_output = rip6_pr_output, + .pr_ctloutput = rip6_ctloutput, + .pr_init = encap6_init, + .pr_usrreqs = &rip6_usrreqs, + .pr_unlock = rip_unlock, }, #if MROUTING -{ SOCK_RAW, &inet6domain, IPPROTO_PIM, PR_ATOMIC|PR_ADDR|PR_LASTHDR, - pim6_input, rip6_pr_output, 0, rip6_ctloutput, - 0, - 0, 0, 0, 0, - 0, - &rip6_usrreqs, - 0, rip_unlock, 0, - { 0, 0 }, NULL, { 0 } +{ + .pr_type = SOCK_RAW, + .pr_protocol = IPPROTO_PIM, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = pim6_input, + .pr_output = rip6_pr_output, + .pr_ctloutput = rip6_ctloutput, + .pr_usrreqs = &rip6_usrreqs, + .pr_unlock = rip_unlock, }, -#endif +#endif /* MROUTING */ /* raw wildcard */ -{ SOCK_RAW, &inet6domain, 0, PR_ATOMIC|PR_ADDR|PR_LASTHDR, - rip6_input, rip6_pr_output, 0, rip6_ctloutput, - 0, - 0, 0, 0, 0, - 0, - &rip6_usrreqs, - 0, rip_unlock, 0, - { 0, 0 }, NULL, { 0 } +{ + .pr_type = SOCK_RAW, + .pr_protocol = 0, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = rip6_input, + .pr_output = rip6_pr_output, + .pr_ctloutput = rip6_ctloutput, + .pr_usrreqs = &rip6_usrreqs, + .pr_unlock = rip_unlock, }, }; - int in6_proto_count = (sizeof (inet6sw) / sizeof (struct ip6protosw)); -struct domain inet6domain = - { AF_INET6, "internet6", in6_dinit, 0, 0, - (struct protosw *)inet6sw, 0, - in6_inithead, offsetof(struct sockaddr_in6, sin6_addr) << 3, sizeof(struct sockaddr_in6) , - sizeof(struct sockaddr_in6), 0, - NULL, 0, {0,0} - }; - -DOMAIN_SET(inet6); +struct domain inet6domain_s = { + .dom_family = PF_INET6, + .dom_flags = DOM_REENTRANT, + .dom_name = "internet6", + .dom_init = in6_dinit, + .dom_rtattach = in6_inithead, + .dom_rtoffset = offsetof(struct sockaddr_in6, sin6_addr) << 3, + .dom_maxrtkey = sizeof (struct sockaddr_in6), + .dom_protohdrlen = sizeof (struct sockaddr_in6), +}; /* Initialize the PF_INET6 domain, and add in the pre-defined protos */ void -in6_dinit(void) +in6_dinit(struct domain *dp) { - register int i; - register struct ip6protosw *pr; - register struct domain *dp; - static int inet6domain_initted = 0; - - if (!inet6domain_initted) { - dp = &inet6domain; - - for (i=0, pr = &inet6sw[0]; idom_mtx; - inet6domain_initted = 1; - } + struct ip6protosw *pr; + int i; + + VERIFY(!(dp->dom_flags & DOM_INITIALIZED)); + VERIFY(inet6domain == NULL); + + inet6domain = dp; + + _CASSERT(sizeof (struct protosw) == sizeof (struct ip6protosw)); + _CASSERT(offsetof(struct ip6protosw, pr_entry) == + offsetof(struct protosw, pr_entry)); + _CASSERT(offsetof(struct ip6protosw, pr_domain) == + offsetof(struct protosw, pr_domain)); + _CASSERT(offsetof(struct ip6protosw, pr_protosw) == + offsetof(struct protosw, pr_protosw)); + _CASSERT(offsetof(struct ip6protosw, pr_type) == + offsetof(struct protosw, pr_type)); + _CASSERT(offsetof(struct ip6protosw, pr_protocol) == + offsetof(struct protosw, pr_protocol)); + _CASSERT(offsetof(struct ip6protosw, pr_flags) == + offsetof(struct protosw, pr_flags)); + _CASSERT(offsetof(struct ip6protosw, pr_input) == + offsetof(struct protosw, pr_input)); + _CASSERT(offsetof(struct ip6protosw, pr_output) == + offsetof(struct protosw, pr_output)); + _CASSERT(offsetof(struct ip6protosw, pr_ctlinput) == + offsetof(struct protosw, pr_ctlinput)); + _CASSERT(offsetof(struct ip6protosw, pr_ctloutput) == + offsetof(struct protosw, pr_ctloutput)); + _CASSERT(offsetof(struct ip6protosw, pr_usrreqs) == + offsetof(struct protosw, pr_usrreqs)); + _CASSERT(offsetof(struct ip6protosw, pr_init) == + offsetof(struct protosw, pr_init)); + _CASSERT(offsetof(struct ip6protosw, pr_drain) == + offsetof(struct protosw, pr_drain)); + _CASSERT(offsetof(struct ip6protosw, pr_sysctl) == + offsetof(struct protosw, pr_sysctl)); + _CASSERT(offsetof(struct ip6protosw, pr_lock) == + offsetof(struct protosw, pr_lock)); + _CASSERT(offsetof(struct ip6protosw, pr_unlock) == + offsetof(struct protosw, pr_unlock)); + _CASSERT(offsetof(struct ip6protosw, pr_getlock) == + offsetof(struct protosw, pr_getlock)); + _CASSERT(offsetof(struct ip6protosw, pr_filter_head) == + offsetof(struct protosw, pr_filter_head)); + _CASSERT(offsetof(struct ip6protosw, pr_old) == + offsetof(struct protosw, pr_old)); + + /* + * Attach first, then initialize. ip6_init() needs raw IP6 handler. + */ + for (i = 0, pr = &inet6sw[0]; i < in6_proto_count; i++, pr++) + net_add_proto((struct protosw *)pr, dp, 0); + for (i = 0, pr = &inet6sw[0]; i < in6_proto_count; i++, pr++) + net_init_proto((struct protosw *)pr, dp); + + inet6_domain_mutex = dp->dom_mtx; } -int rip6_pr_output(__unused struct mbuf *m, __unused struct socket *so, - __unused struct sockaddr_in6 *sin6, __unused struct mbuf *m1) +static int +rip6_pr_output(struct mbuf *m, struct socket *so, struct sockaddr_in6 *sin6, + struct mbuf *m1) { - panic("rip6_pr_output\n"); - return 0; +#pragma unused(m, so, sin6, m1) + panic("%s\n", __func__); + /* NOTREACHED */ + return (0); } /* @@ -396,15 +450,12 @@ int ip6_sendredirects = IPV6_SENDREDIRECTS; int ip6_defhlim = IPV6_DEFHLIM; int ip6_defmcasthlim = IPV6_DEFAULT_MULTICAST_HOPS; int ip6_accept_rtadv = 1; /* deprecated */ -int ip6_maxfragpackets; /* initialized in frag6.c:frag6_init() */ -int ip6_maxfrags; int ip6_log_interval = 5; int ip6_hdrnestlimit = 15; /* How many header options will we process? */ int ip6_dad_count = 1; /* DupAddrDetectionTransmits */ -u_int32_t ip6_flow_seq; int ip6_auto_flowlabel = 1; int ip6_gif_hlim = 0; -int ip6_use_deprecated = 1; /* allow deprecated addr (RFC2462 5.5.4) */ +int ip6_use_deprecated = 1; /* allow deprecated addr [RFC 4862, 5.5.4] */ int ip6_rr_prune = 5; /* router renumbering prefix * walk list every 5 sec. */ int ip6_mcast_pmtu = 0; /* enable pMTU discovery for multicast? */ @@ -416,9 +467,8 @@ int ip6_maxifdefrouters = 16; /* Max acceptable def routers via RA */ int ip6_maxdynroutes = 1024; /* Max # of routes created via redirect */ int ip6_only_allow_rfc4193_prefix = 0; /* Only allow RFC4193 style Unique Local IPv6 Unicast prefixes */ -u_int32_t ip6_id = 0UL; static int ip6_keepfaith = 0; -time_t ip6_log_time = (time_t)0L; +uint64_t ip6_log_time = 0; int nd6_onlink_ns_rfc4861 = 0; /* allow 'on-link' nd6 NS (as in RFC 4861) */ /* icmp6 */ @@ -454,16 +504,21 @@ int udp6_recvspace = 40 * (1024 + sizeof(struct sockaddr_in6)); /* * sysctl related items. */ -SYSCTL_NODE(_net, PF_INET6, inet6, CTLFLAG_RW | CTLFLAG_LOCKED, 0, - "Internet6 Family"); +SYSCTL_NODE(_net, PF_INET6, inet6, + CTLFLAG_RW | CTLFLAG_LOCKED, 0, "Internet6 Family"); /* net.inet6 */ -SYSCTL_NODE(_net_inet6, IPPROTO_IPV6, ip6, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP6"); -SYSCTL_NODE(_net_inet6, IPPROTO_ICMPV6, icmp6, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "ICMP6"); -SYSCTL_NODE(_net_inet6, IPPROTO_UDP, udp6, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "UDP6"); -SYSCTL_NODE(_net_inet6, IPPROTO_TCP, tcp6, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "TCP6"); +SYSCTL_NODE(_net_inet6, IPPROTO_IPV6, ip6, + CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IP6"); +SYSCTL_NODE(_net_inet6, IPPROTO_ICMPV6, icmp6, + CTLFLAG_RW|CTLFLAG_LOCKED, 0, "ICMP6"); +SYSCTL_NODE(_net_inet6, IPPROTO_UDP, udp6, + CTLFLAG_RW|CTLFLAG_LOCKED, 0, "UDP6"); +SYSCTL_NODE(_net_inet6, IPPROTO_TCP, tcp6, + CTLFLAG_RW|CTLFLAG_LOCKED, 0, "TCP6"); #if IPSEC -SYSCTL_NODE(_net_inet6, IPPROTO_ESP, ipsec6, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IPSEC6"); +SYSCTL_NODE(_net_inet6, IPPROTO_ESP, ipsec6, + CTLFLAG_RW|CTLFLAG_LOCKED, 0, "IPSEC6"); #endif /* IPSEC */ /* net.inet6.ip6 */ @@ -483,9 +538,9 @@ sysctl_ip6_temppltime SYSCTL_HANDLER_ARGS ip6_temp_preferred_lifetime < ip6_desync_factor + ip6_temp_regen_advance) { ip6_temp_preferred_lifetime = old; - return(EINVAL); + return (EINVAL); } - return(error); + return (error); } static int @@ -502,10 +557,20 @@ sysctl_ip6_tempvltime SYSCTL_HANDLER_ARGS error = SYSCTL_IN(req, arg1, sizeof(int)); if (ip6_temp_valid_lifetime > ND6_MAX_LIFETIME || ip6_temp_valid_lifetime < ip6_temp_preferred_lifetime) { - ip6_temp_preferred_lifetime = old; - return(EINVAL); + ip6_temp_valid_lifetime = old; + return (EINVAL); } - return(error); + return (error); +} + +static int +ip6_getstat SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1, arg2) + if (req->oldptr == USER_ADDR_NULL) + req->oldlen = (size_t)sizeof (struct ip6stat); + + return (SYSCTL_OUT(req, &ip6stat, MIN(sizeof (ip6stat), req->oldlen))); } SYSCTL_INT(_net_inet6_ip6, IPV6CTL_FORWARDING, @@ -514,12 +579,8 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_SENDREDIRECTS, redirect, CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_sendredirects, 0, ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim, CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_defhlim, 0, ""); -SYSCTL_STRUCT(_net_inet6_ip6, IPV6CTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED, - &ip6stat, ip6stat, ""); -SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, - maxfragpackets, CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxfragpackets, 0, ""); -SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, - maxfrags, CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxfrags, 0, ""); +SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOCKED, + 0, 0, ip6_getstat, "S,ip6stat", ""); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv, CTLFLAG_RD | CTLFLAG_LOCKED, &ip6_accept_rtadv, 0, ""); @@ -588,6 +649,8 @@ SYSCTL_STRUCT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats, CTLFLAG_RD | CTLFLAG_LOC &icmp6stat, icmp6stat, ""); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_PRUNE, nd6_prune, CTLFLAG_RW | CTLFLAG_LOCKED, &nd6_prune, 0, ""); +SYSCTL_INT(_net_inet6_icmp6, OID_AUTO, + nd6_prune_lazy, CTLFLAG_RW | CTLFLAG_LOCKED, &nd6_prune_lazy, 0, ""); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DELAY, nd6_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nd6_delay, 0, ""); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_UMAXTRIES, @@ -602,8 +665,6 @@ SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO, nodeinfo, CTLFLAG_RW | CTLFLAG_LOCKED, &icmp6_nodeinfo, 0, ""); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ERRPPSLIMIT, errppslimit, CTLFLAG_RW | CTLFLAG_LOCKED, &icmp6errppslim, 0, ""); -SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXNUDHINT, - nd6_maxnudhint, CTLFLAG_RW | CTLFLAG_LOCKED, &nd6_maxnudhint, 0, ""); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DEBUG, nd6_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &nd6_debug, 0, ""); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_ONLINKNSRFC4861, diff --git a/bsd/netinet6/in6_rmx.c b/bsd/netinet6/in6_rmx.c index 509bba093..e2fe40208 100644 --- a/bsd/netinet6/in6_rmx.c +++ b/bsd/netinet6/in6_rmx.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2011 Apple Inc. All rights reserved. + * Copyright (c) 2003-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -26,9 +26,6 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* $FreeBSD: src/sys/netinet6/in6_rmx.c,v 1.1.2.2 2001/07/03 11:01:52 ume Exp $ */ -/* $KAME: in6_rmx.c,v 1.10 2001/05/24 05:44:58 itojun Exp $ */ - /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. @@ -130,45 +127,59 @@ #include #include -extern int in6_inithead(void **head, int off); -static void in6_rtqtimo(void *rock); -static void in6_mtutimo(void *rock); extern int tvtohz(struct timeval *); +static int in6_rtqtimo_run; /* in6_rtqtimo is scheduled to run */ +static void in6_rtqtimo(void *); +static void in6_sched_rtqtimo(struct timeval *); + +static struct radix_node *in6_addroute(void *, void *, struct radix_node_head *, + struct radix_node *); +static struct radix_node *in6_deleteroute(void *, void *, + struct radix_node_head *); +static struct radix_node *in6_matroute(void *, struct radix_node_head *); static struct radix_node *in6_matroute_args(void *, struct radix_node_head *, rn_matchf_t *, void *); +static void in6_clsroute(struct radix_node *, struct radix_node_head *); +static int in6_rtqkill(struct radix_node *, void *); -#define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */ +#define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */ /* * Accessed by in6_addroute(), in6_deleteroute() and in6_rtqkill(), during * which the routing lock (rnh_lock) is held and thus protects the variable. */ -static int in6dynroutes; +static int in6dynroutes; /* * Do what we need to do when inserting a route. */ static struct radix_node * in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, - struct radix_node *treenodes) + struct radix_node *treenodes) { struct rtentry *rt = (struct rtentry *)treenodes; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(void *)rt_key(rt); struct radix_node *ret; + char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN]; + uint32_t flags = rt->rt_flags; + boolean_t verbose = (rt_verbose > 1); lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); RT_LOCK_ASSERT_HELD(rt); + if (verbose) + rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); + /* * If this is a dynamic route (which is created via Redirect) and * we already have the maximum acceptable number of such route entries, * reject creating a new one. We could initiate garbage collection to * make available space right now, but the benefit would probably not * be worth the cleaning overhead; we only have to endure a slightly - * suboptimal path even without the redirecbted route. + * suboptimal path even without the redirected route. */ - if ((rt->rt_flags & RTF_DYNAMIC) != 0 && + if ((rt->rt_flags & RTF_DYNAMIC) && ip6_maxdynroutes >= 0 && in6dynroutes >= ip6_maxdynroutes) return (NULL); @@ -178,9 +189,8 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) rt->rt_flags |= RTF_MULTICAST; - if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) { + if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) rt->rt_flags |= RTF_PRCLONING; - } /* * A little bit of help for both IPv6 output and input: @@ -198,16 +208,15 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, */ if (rt->rt_flags & RTF_HOST) { IFA_LOCK_SPIN(rt->rt_ifa); - if (IN6_ARE_ADDR_EQUAL(&satosin6(rt->rt_ifa->ifa_addr) - ->sin6_addr, - &sin6->sin6_addr)) { + if (IN6_ARE_ADDR_EQUAL(&satosin6(rt->rt_ifa->ifa_addr)-> + sin6_addr, &sin6->sin6_addr)) { rt->rt_flags |= RTF_LOCAL; } IFA_UNLOCK(rt->rt_ifa); } - if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) - && rt->rt_ifp) + if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) && + rt->rt_ifp) rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; ret = rn_addroute(v_arg, n_arg, head, treenodes); @@ -216,16 +225,34 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, /* * We are trying to add a host route, but can't. * Find out if it is because of an - * ARP entry and delete it if so. + * ND6 entry and delete it if so. */ rt2 = rtalloc1_scoped_locked((struct sockaddr *)sin6, 0, RTF_CLONING | RTF_PRCLONING, sin6_get_ifscope(rt_key(rt))); - if (rt2) { + if (rt2 != NULL) { + char dbufc[MAX_IPv6_STR_LEN]; + RT_LOCK(rt2); + if (verbose) + rt_str(rt2, dbufc, sizeof (dbufc), NULL, 0); + if ((rt2->rt_flags & RTF_LLINFO) && (rt2->rt_flags & RTF_HOST) && rt2->rt_gateway != NULL && rt2->rt_gateway->sa_family == AF_LINK) { + if (verbose) { + log(LOG_DEBUG, "%s: unable to insert " + "route to %s:%s, flags=%b, due to " + "existing ND6 route %s->%s " + "flags=%b, attempting to delete\n", + __func__, dbuf, + (rt->rt_ifp != NULL) ? + rt->rt_ifp->if_xname : "", + rt->rt_flags, RTF_BITS, + dbufc, (rt2->rt_ifp != NULL) ? + rt2->rt_ifp->if_xname : "", + rt2->rt_flags, RTF_BITS); + } /* * Safe to drop rt_lock and use rt_key, * rt_gateway, since holding rnh_lock here @@ -235,9 +262,9 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, RT_UNLOCK(rt2); (void) rtrequest_locked(RTM_DELETE, rt_key(rt2), rt2->rt_gateway, rt_mask(rt2), - rt2->rt_flags, 0); + rt2->rt_flags, NULL); ret = rn_addroute(v_arg, n_arg, head, - treenodes); + treenodes); } else { RT_UNLOCK(rt2); } @@ -259,13 +286,13 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, */ rt2 = rtalloc1_scoped_locked((struct sockaddr *)sin6, 0, RTF_CLONING | RTF_PRCLONING, sin6_get_ifscope(rt_key(rt))); - if (rt2) { + if (rt2 != NULL) { RT_LOCK(rt2); - if ((rt2->rt_flags & (RTF_CLONING|RTF_HOST|RTF_GATEWAY)) - == RTF_CLONING - && rt2->rt_gateway - && rt2->rt_gateway->sa_family == AF_LINK - && rt2->rt_ifp == rt->rt_ifp) { + if ((rt2->rt_flags & (RTF_CLONING|RTF_HOST| + RTF_GATEWAY)) == RTF_CLONING && + rt2->rt_gateway && + rt2->rt_gateway->sa_family == AF_LINK && + rt2->rt_ifp == rt->rt_ifp) { ret = rt2->rt_nodes; } RT_UNLOCK(rt2); @@ -273,14 +300,37 @@ in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, } } - if (ret != NULL && (rt->rt_flags & RTF_DYNAMIC) != 0) + if (ret != NULL && (rt->rt_flags & RTF_DYNAMIC)) in6dynroutes++; - return ret; + if (!verbose) + goto done; + + if (ret != NULL) { + if (flags != rt->rt_flags) { + log(LOG_DEBUG, "%s: route to %s->%s->%s inserted, " + "oflags=%b, flags=%b\n", __func__, + dbuf, gbuf, (rt->rt_ifp != NULL) ? + rt->rt_ifp->if_xname : "", flags, RTF_BITS, + rt->rt_flags, RTF_BITS); + } else { + log(LOG_DEBUG, "%s: route to %s->%s->%s inserted, " + "flags=%b\n", __func__, dbuf, gbuf, + (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", + rt->rt_flags, RTF_BITS); + } + } else { + log(LOG_DEBUG, "%s: unable to insert route to %s->%s->%s, " + "flags=%b, already exists\n", __func__, dbuf, gbuf, + (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", + rt->rt_flags, RTF_BITS); + } +done: + return (ret); } static struct radix_node * -in6_deleteroute(void * v_arg, void *netmask_arg, struct radix_node_head *head) +in6_deleteroute(void *v_arg, void *netmask_arg, struct radix_node_head *head) { struct radix_node *rn; @@ -289,12 +339,21 @@ in6_deleteroute(void * v_arg, void *netmask_arg, struct radix_node_head *head) rn = rn_delete(v_arg, netmask_arg, head); if (rn != NULL) { struct rtentry *rt = (struct rtentry *)rn; - RT_LOCK_SPIN(rt); - if ((rt->rt_flags & RTF_DYNAMIC) != 0) + + RT_LOCK(rt); + if (rt->rt_flags & RTF_DYNAMIC) in6dynroutes--; + if (rt_verbose > 1) { + char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN]; + + rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); + log(LOG_DEBUG, "%s: route to %s->%s->%s deleted, " + "flags=%b\n", __func__, dbuf, gbuf, + (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", + rt->rt_flags, RTF_BITS); + } RT_UNLOCK(rt); } - return (rn); } @@ -309,9 +368,26 @@ in6_validate(struct radix_node *rn) RT_LOCK_ASSERT_HELD(rt); /* This is first reference? */ - if (rt->rt_refcnt == 0 && (rt->rt_flags & RTPRF_OURS)) { - rt->rt_flags &= ~RTPRF_OURS; - rt_setexpire(rt, 0); + if (rt->rt_refcnt == 0) { + if (rt_verbose > 2) { + char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN]; + + rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); + log(LOG_DEBUG, "%s: route to %s->%s->%s validated, " + "flags=%b\n", __func__, dbuf, gbuf, + (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", + rt->rt_flags, RTF_BITS); + } + + /* + * It's one of ours; unexpire it. If the timer is already + * scheduled, let it run later as it won't re-arm itself + * if there's nothing to do. + */ + if (rt->rt_flags & RTPRF_OURS) { + rt->rt_flags &= ~RTPRF_OURS; + rt_setexpire(rt, 0); + } } return (rn); } @@ -346,30 +422,32 @@ in6_matroute_args(void *v_arg, struct radix_node_head *head, SYSCTL_DECL(_net_inet6_ip6); -static int rtq_reallyold = 60*60; - /* one hour is ``really old'' */ -SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTEXPIRE, rtexpire, - CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_reallyold , 0, ""); - -static int rtq_minreallyold = 10; - /* never automatically crank down to less */ -SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTMINEXPIRE, rtminexpire, - CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_minreallyold , 0, ""); +/* one hour is ``really old'' */ +static uint32_t rtq_reallyold = 60*60; +SYSCTL_UINT(_net_inet6_ip6, IPV6CTL_RTEXPIRE, rtexpire, + CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_reallyold, 0, ""); -static int rtq_toomany = 128; - /* 128 cached routes is ``too many'' */ -SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTMAXCACHE, rtmaxcache, - CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_toomany , 0, ""); +/* never automatically crank down to less */ +static uint32_t rtq_minreallyold = 10; +SYSCTL_UINT(_net_inet6_ip6, IPV6CTL_RTMINEXPIRE, rtminexpire, + CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_minreallyold, 0, ""); +/* 128 cached routes is ``too many'' */ +static uint32_t rtq_toomany = 128; +SYSCTL_UINT(_net_inet6_ip6, IPV6CTL_RTMAXCACHE, rtmaxcache, + CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_toomany, 0, ""); /* * On last reference drop, mark the route as belong to us so that it can be * timed out. */ static void -in6_clsroute(struct radix_node *rn, __unused struct radix_node_head *head) +in6_clsroute(struct radix_node *rn, struct radix_node_head *head) { +#pragma unused(head) + char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN]; struct rtentry *rt = (struct rtentry *)rn; + boolean_t verbose = (rt_verbose > 1); lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); RT_LOCK_ASSERT_HELD(rt); @@ -386,12 +464,23 @@ in6_clsroute(struct radix_node *rn, __unused struct radix_node_head *head) if (!(rt->rt_flags & (RTF_WASCLONED | RTF_DYNAMIC))) return; + if (verbose) + rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); + /* * Delete the route immediately if RTF_DELCLONE is set or * if route caching is disabled (rtq_reallyold set to 0). * Otherwise, let it expire and be deleted by in6_rtqkill(). */ if ((rt->rt_flags & RTF_DELCLONE) || rtq_reallyold == 0) { + int err; + + if (verbose) { + log(LOG_DEBUG, "%s: deleting route to %s->%s->%s, " + "flags=%b\n", __func__, dbuf, gbuf, + (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", + rt->rt_flags, RTF_BITS); + } /* * Delete the route from the radix tree but since we are * called when the route's reference count is 0, don't @@ -402,31 +491,48 @@ in6_clsroute(struct radix_node *rn, __unused struct radix_node_head *head) * from calling rt_setgate() on this route. */ RT_UNLOCK(rt); - if (rtrequest_locked(RTM_DELETE, rt_key(rt), - rt->rt_gateway, rt_mask(rt), rt->rt_flags, &rt) == 0) { + err = rtrequest_locked(RTM_DELETE, rt_key(rt), + rt->rt_gateway, rt_mask(rt), rt->rt_flags, &rt); + if (err == 0) { /* Now let the caller free it */ RT_LOCK(rt); RT_REMREF_LOCKED(rt); } else { RT_LOCK(rt); + if (!verbose) + rt_str(rt, dbuf, sizeof (dbuf), + gbuf, sizeof (gbuf)); + log(LOG_ERR, "%s: error deleting route to " + "%s->%s->%s, flags=%b, err=%d\n", __func__, + dbuf, gbuf, (rt->rt_ifp != NULL) ? + rt->rt_ifp->if_xname : "", rt->rt_flags, + RTF_BITS, err); } } else { uint64_t timenow; timenow = net_uptime(); rt->rt_flags |= RTPRF_OURS; - rt_setexpire(rt, - rt_expiry(rt, timenow, rtq_reallyold)); + rt_setexpire(rt, timenow + rtq_reallyold); + + if (verbose) { + log(LOG_DEBUG, "%s: route to %s->%s->%s invalidated, " + "flags=%b, expire=T+%u\n", __func__, dbuf, gbuf, + (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "", + rt->rt_flags, RTF_BITS, rt->rt_expire - timenow); + } + + /* We have at least one entry; arm the timer if not already */ + in6_sched_rtqtimo(NULL); } } struct rtqk_arg { struct radix_node_head *rnh; - int mode; int updating; int draining; - int killed; - int found; + uint32_t killed; + uint32_t found; uint64_t nextstop; }; @@ -442,24 +548,40 @@ in6_rtqkill(struct radix_node *rn, void *rock) { struct rtqk_arg *ap = rock; struct rtentry *rt = (struct rtentry *)rn; - int err; + boolean_t verbose = (rt_verbose > 1); uint64_t timenow; + int err; timenow = net_uptime(); lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); RT_LOCK(rt); if (rt->rt_flags & RTPRF_OURS) { + char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN]; + + if (verbose) + rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf)); + ap->found++; VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0); VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0); if (ap->draining || rt->rt_expire <= timenow || - ((rt->rt_flags & RTF_DYNAMIC) != 0 && - ip6_maxdynroutes >= 0 && + ((rt->rt_flags & RTF_DYNAMIC) && ip6_maxdynroutes >= 0 && in6dynroutes > ip6_maxdynroutes / 2)) { - if (rt->rt_refcnt > 0) - panic("rtqkill route really not free"); - + if (rt->rt_refcnt > 0) { + panic("%s: route %p marked with RTPRF_OURS " + "with non-zero refcnt (%u)", __func__, + rt, rt->rt_refcnt); + /* NOTREACHED */ + } + if (verbose) { + log(LOG_DEBUG, "%s: deleting route to " + "%s->%s->%s, flags=%b, draining=%d\n", + __func__, dbuf, gbuf, (rt->rt_ifp != NULL) ? + rt->rt_ifp->if_xname : "", rt->rt_flags, + RTF_BITS, ap->draining); + } + RT_ADDREF_LOCKED(rt); /* for us to free below */ /* * Delete this route since we're done with it; * the route may be freed afterwards, so we @@ -471,52 +593,81 @@ in6_rtqkill(struct radix_node *rn, void *rock) */ RT_UNLOCK(rt); err = rtrequest_locked(RTM_DELETE, rt_key(rt), - rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); - if (err) { - log(LOG_WARNING, "in6_rtqkill: error %d", err); + rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); + if (err != 0) { + RT_LOCK(rt); + if (!verbose) + rt_str(rt, dbuf, sizeof (dbuf), + gbuf, sizeof (gbuf)); + log(LOG_ERR, "%s: error deleting route to " + "%s->%s->%s, flags=%b, err=%d\n", __func__, + dbuf, gbuf, (rt->rt_ifp != NULL) ? + rt->rt_ifp->if_xname : "", rt->rt_flags, + RTF_BITS, err); + RT_UNLOCK(rt); } else { ap->killed++; } + rtfree_locked(rt); } else { - if (ap->updating && - (rt->rt_expire - timenow) > - rt_expiry(rt, 0, rtq_reallyold)) { - rt_setexpire(rt, rt_expiry(rt, - timenow, rtq_reallyold)); + uint64_t expire = (rt->rt_expire - timenow); + + if (ap->updating && expire > rtq_reallyold) { + rt_setexpire(rt, timenow + rtq_reallyold); + if (verbose) { + log(LOG_DEBUG, "%s: route to " + "%s->%s->%s, flags=%b, adjusted " + "expire=T+%u (was T+%u)\n", + __func__, dbuf, gbuf, + (rt->rt_ifp != NULL) ? + rt->rt_ifp->if_xname : "", + rt->rt_flags, RTF_BITS, + (rt->rt_expire - timenow), expire); + } } - ap->nextstop = lmin(ap->nextstop, - rt->rt_expire); + ap->nextstop = lmin(ap->nextstop, rt->rt_expire); RT_UNLOCK(rt); } } else { RT_UNLOCK(rt); } - return 0; + return (0); } -#define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */ +#define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */ static int rtq_timeout = RTQ_TIMEOUT; static void -in6_rtqtimo(void *rock) +in6_rtqtimo(void *targ) { - struct radix_node_head *rnh = rock; +#pragma unused(targ) + struct radix_node_head *rnh; struct rtqk_arg arg; struct timeval atv; static uint64_t last_adjusted_timeout = 0; + boolean_t verbose = (rt_verbose > 1); uint64_t timenow; + uint32_t ours; lck_mtx_lock(rnh_lock); + rnh = rt_tables[AF_INET6]; + VERIFY(rnh != NULL); + /* Get the timestamp after we acquire the lock for better accuracy */ timenow = net_uptime(); - - arg.found = arg.killed = 0; + if (verbose) { + log(LOG_DEBUG, "%s: initial nextstop is T+%u seconds\n", + __func__, rtq_timeout); + } + bzero(&arg, sizeof (arg)); arg.rnh = rnh; arg.nextstop = timenow + rtq_timeout; - arg.draining = arg.updating = 0; rnh->rnh_walktree(rnh, in6_rtqkill, &arg); - + if (verbose) { + log(LOG_DEBUG, "%s: found %u, killed %u\n", __func__, + arg.found, arg.killed); + } /* * Attempt to be somewhat dynamic about this: * If there are ``too many'' routes sitting around taking up space, @@ -525,19 +676,19 @@ in6_rtqtimo(void *rock) * than once in rtq_timeout seconds, to keep from cranking down too * hard. */ - if ((arg.found - arg.killed > rtq_toomany) - && ((timenow - last_adjusted_timeout) >= (uint64_t)rtq_timeout) - && rtq_reallyold > rtq_minreallyold) { - rtq_reallyold = 2*rtq_reallyold / 3; - if (rtq_reallyold < rtq_minreallyold) { + ours = (arg.found - arg.killed); + if (ours > rtq_toomany && + ((timenow - last_adjusted_timeout) >= (uint64_t)rtq_timeout) && + rtq_reallyold > rtq_minreallyold) { + rtq_reallyold = 2 * rtq_reallyold / 3; + if (rtq_reallyold < rtq_minreallyold) rtq_reallyold = rtq_minreallyold; - } last_adjusted_timeout = timenow; -#if DIAGNOSTIC - log(LOG_DEBUG, "in6_rtqtimo: adjusted rtq_reallyold to %d", - rtq_reallyold); -#endif + if (verbose) { + log(LOG_DEBUG, "%s: adjusted rtq_reallyold to %d " + "seconds\n", __func__, rtq_reallyold); + } arg.found = arg.killed = 0; arg.updating = 1; rnh->rnh_walktree(rnh, in6_rtqkill, &arg); @@ -545,88 +696,53 @@ in6_rtqtimo(void *rock) atv.tv_usec = 0; atv.tv_sec = arg.nextstop - timenow; + /* re-arm the timer only if there's work to do */ + in6_rtqtimo_run = 0; + if (ours > 0) + in6_sched_rtqtimo(&atv); + else if (verbose) + log(LOG_DEBUG, "%s: not rescheduling timer\n", __func__); lck_mtx_unlock(rnh_lock); - timeout(in6_rtqtimo, rock, tvtohz(&atv)); } -/* - * Age old PMTUs. - */ -struct mtuex_arg { - struct radix_node_head *rnh; - uint64_t nextstop; -}; - -static int -in6_mtuexpire(struct radix_node *rn, void *rock) +static void +in6_sched_rtqtimo(struct timeval *atv) { - struct rtentry *rt = (struct rtentry *)rn; - struct mtuex_arg *ap = rock; - uint64_t timenow; - - timenow = net_uptime(); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); - /* sanity */ - if (!rt) - panic("rt == NULL in in6_mtuexpire"); + if (!in6_rtqtimo_run) { + struct timeval tv; - RT_LOCK(rt); - VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0); - VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0); - if (rt->rt_expire && !(rt->rt_flags & RTF_PROBEMTU)) { - if (rt->rt_expire <= timenow) { - rt->rt_flags |= RTF_PROBEMTU; - } else { - ap->nextstop = lmin(ap->nextstop, - rt->rt_expire); + if (atv == NULL) { + tv.tv_usec = 0; + tv.tv_sec = MAX(rtq_timeout / 10, 1); + atv = &tv; + } + if (rt_verbose > 1) { + log(LOG_DEBUG, "%s: timer scheduled in " + "T+%llus.%lluu\n", __func__, + (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec); } + in6_rtqtimo_run = 1; + timeout(in6_rtqtimo, NULL, tvtohz(atv)); } - RT_UNLOCK(rt); - - return 0; } -#define MTUTIMO_DEFAULT (60*1) - -static void -in6_mtutimo(void *rock) +void +in6_rtqdrain(void) { - struct radix_node_head *rnh = rock; - struct mtuex_arg arg; - struct timeval atv; - uint64_t timenow, timo; + struct radix_node_head *rnh; + struct rtqk_arg arg; - timenow = net_uptime(); + if (rt_verbose > 1) + log(LOG_DEBUG, "%s: draining routes\n", __func__); - arg.rnh = rnh; - arg.nextstop = timenow + MTUTIMO_DEFAULT; lck_mtx_lock(rnh_lock); - rnh->rnh_walktree(rnh, in6_mtuexpire, &arg); - - atv.tv_usec = 0; - timo = arg.nextstop; - if (timo < timenow) { -#if DIAGNOSTIC - log(LOG_DEBUG, "IPv6: invalid mtu expiration time on routing table\n"); -#endif - arg.nextstop = timenow + 30; /*last resort*/ - } - atv.tv_sec = timo - timenow; - lck_mtx_unlock(rnh_lock); - timeout(in6_mtutimo, rock, tvtohz(&atv)); -} - -void -in6_rtqdrain() -{ - struct radix_node_head *rnh = rt_tables[AF_INET6]; - struct rtqk_arg arg; - arg.found = arg.killed = 0; + rnh = rt_tables[AF_INET6]; + VERIFY(rnh != NULL); + bzero(&arg, sizeof (arg)); arg.rnh = rnh; - arg.nextstop = 0; arg.draining = 1; - arg.updating = 0; - lck_mtx_lock(rnh_lock); rnh->rnh_walktree(rnh, in6_rtqkill, &arg); lck_mtx_unlock(rnh_lock); } @@ -639,11 +755,20 @@ in6_inithead(void **head, int off) { struct radix_node_head *rnh; + /* If called from route_init(), make sure it is exactly once */ + VERIFY(head != (void **)&rt_tables[AF_INET6] || *head == NULL); + if (!rn_inithead(head, off)) - return 0; + return (0); - if (head != (void **)&rt_tables[AF_INET6]) /* BOGUS! */ - return 1; /* only do this for the real routing table */ + /* + * We can get here from nfs_subs.c as well, in which case this + * won't be for the real routing table and thus we're done; + * this also takes care of the case when we're called more than + * once from anywhere but route_init(). + */ + if (head != (void **)&rt_tables[AF_INET6]) + return (1); /* only do this for the real routing table */ rnh = *head; rnh->rnh_addaddr = in6_addroute; @@ -651,7 +776,5 @@ in6_inithead(void **head, int off) rnh->rnh_matchaddr = in6_matroute; rnh->rnh_matchaddr_args = in6_matroute_args; rnh->rnh_close = in6_clsroute; - in6_rtqtimo(rnh); /* kick off timeout first time */ - in6_mtutimo(rnh); /* kick off timeout first time */ - return 1; + return (1); } diff --git a/bsd/netinet6/in6_src.c b/bsd/netinet6/in6_src.c index 40bad0948..522f0d556 100644 --- a/bsd/netinet6/in6_src.c +++ b/bsd/netinet6/in6_src.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,13 +22,10 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* $FreeBSD: src/sys/netinet6/in6_src.c,v 1.1.2.2 2001/07/03 11:01:52 ume Exp $ */ -/* $KAME: in6_src.c,v 1.37 2001/03/29 05:34:31 itojun Exp $ */ - /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. @@ -133,10 +130,10 @@ SYSCTL_DECL(_net_inet6_ip6); static int ip6_select_srcif_debug = 0; SYSCTL_INT(_net_inet6_ip6, OID_AUTO, select_srcif_debug, - CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_select_srcif_debug, 0, - "log source interface selection debug info"); + CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_select_srcif_debug, 0, + "log source interface selection debug info"); -#define ADDR_LABEL_NOTAPP (-1) +#define ADDR_LABEL_NOTAPP (-1) struct in6_addrpolicy defaultaddrpolicy; int ip6_prefer_tempaddr = 1; @@ -150,12 +147,12 @@ extern lck_mtx_t *addrsel_mutex; #endif static int selectroute(struct sockaddr_in6 *, struct sockaddr_in6 *, - struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, - struct ifnet **, struct rtentry **, int, int, - const struct ip6_out_args *ip6oa); + struct ip6_pktopts *, struct ip6_moptions *, struct in6_ifaddr **, + struct route_in6 *, struct ifnet **, struct rtentry **, int, int, + struct ip6_out_args *ip6oa); static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *ro, - const struct ip6_out_args *, struct ifnet **); + struct ip6_out_args *, struct ifnet **); static void init_policy_queue(void); static int add_addrsel_policyent(const struct in6_addrpolicy *); #ifdef ENABLE_ADDRSEL @@ -173,24 +170,24 @@ void addrsel_policy_init(void); * If necessary, this function lookups the routing table and returns * an entry to the caller for later use. */ -#define REPLACE(r) do {\ - if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ - sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ +#define REPLACE(r) do {\ + if ((r) < sizeof (ip6stat.ip6s_sources_rule) / \ + sizeof (ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ ip6stat.ip6s_sources_rule[(r)]++; \ goto replace; \ -} while(0) -#define NEXTSRC(r) do {\ - if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ - sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ +} while (0) +#define NEXTSRC(r) do {\ + if ((r) < sizeof (ip6stat.ip6s_sources_rule) / \ + sizeof (ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ ip6stat.ip6s_sources_rule[(r)]++; \ goto next; /* XXX: we can't use 'continue' here */ \ -} while(0) -#define BREAK(r) do { \ - if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ - sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ +} while (0) +#define BREAK(r) do { \ + if ((r) < sizeof (ip6stat.ip6s_sources_rule) / \ + sizeof (ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ ip6stat.ip6s_sources_rule[(r)]++; \ goto out; /* XXX: we can't use 'break' here */ \ -} while(0) +} while (0) /* * Regardless of error, it will return an ifp with a reference held if the @@ -212,11 +209,9 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, u_int32_t odstzone; int prefer_tempaddr; struct ip6_moptions *mopts; - struct timeval timenow; - struct ip6_out_args ip6oa = { ifscope, { 0 }, IP6OAF_SELECT_SRCIF }; + struct ip6_out_args ip6oa = { ifscope, { 0 }, IP6OAF_SELECT_SRCIF, 0 }; boolean_t islocal = FALSE; - - getmicrotime(&timenow); + uint64_t secs = net_uptime(); dst = dstsock->sin6_addr; /* make a copy for local operation */ *errorp = 0; @@ -259,9 +254,9 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, * the interface must be specified; otherwise, ifa_ifwithaddr() * will fail matching the address. */ - bzero(&srcsock, sizeof(srcsock)); + bzero(&srcsock, sizeof (srcsock)); srcsock.sin6_family = AF_INET6; - srcsock.sin6_len = sizeof(srcsock); + srcsock.sin6_len = sizeof (srcsock); srcsock.sin6_addr = pi->ipi6_addr; if (ifp != NULL) { *errorp = in6_setscope(&srcsock.sin6_addr, ifp, NULL); @@ -280,10 +275,10 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, IFA_LOCK_SPIN(&ia6->ia_ifa); if ((ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY)) || ((ip6oa.ip6oa_flags & IP6OAF_NO_CELLULAR) && - (ia6->ia_ifa.ifa_ifp->if_type == IFT_CELLULAR))) { + IFNET_IS_CELLULAR(ia6->ia_ifa.ifa_ifp))) { IFA_UNLOCK(&ia6->ia_ifa); IFA_REMREF(&ia6->ia_ifa); - *errorp = EADDRNOTAVAIL; + *errorp = EHOSTUNREACH; src_storage = NULL; goto done; } @@ -347,14 +342,14 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, /* avoid unusable addresses */ if ((ia->ia6_flags & - (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) + (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) goto next; - if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia)) + if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia, secs)) goto next; if (!nd6_optimistic_dad && - (ia->ia6_flags & IN6_IFF_OPTIMISTIC) != 0) + (ia->ia6_flags & IN6_IFF_OPTIMISTIC) != 0) goto next; /* Rule 1: Prefer same address */ @@ -382,9 +377,11 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, * Rule 3: Avoid deprecated addresses. Note that the case of * !ip6_use_deprecated is already rejected above. */ - if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia)) + if (!IFA6_IS_DEPRECATED(ia_best, secs) && + IFA6_IS_DEPRECATED(ia, secs)) NEXTSRC(3); - if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia)) + if (IFA6_IS_DEPRECATED(ia_best, secs) && + !IFA6_IS_DEPRECATED(ia, secs)) REPLACE(3); /* @@ -436,7 +433,7 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, */ if (IN6_IS_ADDR_MULTICAST(&dst) || in6_matchlen(&ia_best->ia_addr.sin6_addr, &dst) >= - in6_mask2len(&ia_best->ia_prefixmask.sin6_addr, NULL)) + ia_best->ia_plen) islocal = TRUE; if (opts == NULL || opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) { @@ -497,11 +494,11 @@ in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, replace: best_scope = (new_scope >= 0 ? new_scope : - in6_addrscope(&ia->ia_addr.sin6_addr)); + in6_addrscope(&ia->ia_addr.sin6_addr)); best_policy = (new_policy ? new_policy : - in6_addrsel_lookup_policy(&ia->ia_addr)); + in6_addrsel_lookup_policy(&ia->ia_addr)); best_matchlen = (new_matchlen >= 0 ? new_matchlen : - in6_matchlen(&ia->ia_addr.sin6_addr, &dst)); + in6_matchlen(&ia->ia_addr.sin6_addr, &dst)); IFA_ADDREF_LOCKED(&ia->ia_ifa); /* for ia_best */ IFA_UNLOCK(&ia->ia_ifa); if (ia_best != NULL) @@ -526,13 +523,15 @@ out: if (ia_best != NULL && (ip6oa.ip6oa_flags & IP6OAF_NO_CELLULAR) && - ia_best->ia_ifa.ifa_ifp->if_type == IFT_CELLULAR) { + IFNET_IS_CELLULAR(ia_best->ia_ifa.ifa_ifp)) { IFA_REMREF(&ia_best->ia_ifa); ia_best = NULL; + *errorp = EHOSTUNREACH; } if ((ia = ia_best) == NULL) { - *errorp = EADDRNOTAVAIL; + if (*errorp == 0) + *errorp = EADDRNOTAVAIL; src_storage = NULL; goto done; } @@ -569,9 +568,10 @@ done: */ static int selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock, - struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, + struct ip6_pktopts *opts, struct ip6_moptions *mopts, + struct in6_ifaddr **retsrcia, struct route_in6 *ro, struct ifnet **retifp, struct rtentry **retrt, int clone, - int norouteok, const struct ip6_out_args *ip6oa) + int norouteok, struct ip6_out_args *ip6oa) { int error = 0; struct ifnet *ifp = NULL, *ifp0 = NULL; @@ -581,8 +581,9 @@ selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock, struct in6_addr *dst = &dstsock->sin6_addr; struct ifaddr *ifa = NULL; char s_src[MAX_IPv6_STR_LEN], s_dst[MAX_IPv6_STR_LEN]; - boolean_t select_srcif, proxied_ifa = FALSE; - unsigned int ifscope = ip6oa->ip6oa_boundif; + boolean_t select_srcif, proxied_ifa = FALSE, local_dst = FALSE; + unsigned int ifscope = ((ip6oa != NULL) ? + ip6oa->ip6oa_boundif : IFSCOPE_NONE); #if 0 char ip6buf[INET6_ADDRSTRLEN]; @@ -591,11 +592,11 @@ selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock, dstsock->sin6_addr.s6_addr32[1] == 0 && !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) { printf("in6_selectroute: strange destination %s\n", - ip6_sprintf(ip6buf, &dstsock->sin6_addr)); + ip6_sprintf(ip6buf, &dstsock->sin6_addr)); } else { printf("in6_selectroute: destination = %s%%%d\n", - ip6_sprintf(ip6buf, &dstsock->sin6_addr), - dstsock->sin6_scope_id); /* for debug */ + ip6_sprintf(ip6buf, &dstsock->sin6_addr), + dstsock->sin6_scope_id); /* for debug */ } #endif @@ -691,14 +692,14 @@ getsrcif: * or if we haven't done source interface selection on this * route (for this PCB instance) before. */ - if (!select_srcif || (ro != NULL && ro->ro_rt != NULL && - (ro->ro_rt->rt_flags & RTF_UP) && - ro->ro_rt->generation_id == route_generation && - (ro->ro_flags & ROF_SRCIF_SELECTED))) { - if (ro != NULL && ro->ro_rt != NULL) { - ifa = ro->ro_rt->rt_ifa; - IFA_ADDREF(ifa); - } + if (!select_srcif) { + goto getroute; + } else if (!ROUTE_UNUSABLE(ro) && ro->ro_srcia != NULL && + (ro->ro_flags & ROF_SRCIF_SELECTED)) { + if (ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) + local_dst = TRUE; + ifa = ro->ro_srcia; + IFA_ADDREF(ifa); /* for caller */ goto getroute; } @@ -727,7 +728,7 @@ getsrcif: if (scope == IFSCOPE_NONE) { scope = rt_ifp->if_index; if (scope != get_primary_ifscope(AF_INET6) && - ro->ro_rt->generation_id != route_generation) + ROUTE_UNUSABLE(ro)) scope = get_primary_ifscope(AF_INET6); } @@ -774,6 +775,15 @@ getsrcif: * found interface. */ if (ifa == NULL && ifscope == IFSCOPE_NONE) { + struct ifaddr *ifadst; + + /* Check if the destination address is one of ours */ + ifadst = (struct ifaddr *)ifa_foraddr6(&dstsock->sin6_addr); + if (ifadst != NULL) { + local_dst = TRUE; + IFA_REMREF(ifadst); + } + ifa = (struct ifaddr *)ifa_foraddr6(&srcsock->sin6_addr); if (ip6_select_srcif_debug && ifa != NULL) { @@ -784,7 +794,7 @@ getsrcif: } getroute: - if (ifa != NULL && !proxied_ifa) + if (ifa != NULL && !proxied_ifa && !local_dst) ifscope = ifa->ifa_ifp->if_index; /* @@ -809,34 +819,30 @@ getroute: ron = &opts->ip6po_nextroute; if (ron->ro_rt != NULL) RT_LOCK(ron->ro_rt); - if ((ron->ro_rt != NULL && - ((ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) != - (RTF_UP | RTF_LLINFO) || - ron->ro_rt->generation_id != route_generation || + if (ROUTE_UNUSABLE(ron) || (ron->ro_rt != NULL && + (!(ron->ro_rt->rt_flags & RTF_LLINFO) || (select_srcif && (ifa == NULL || (ifa->ifa_ifp != ron->ro_rt->rt_ifp && !proxied_ifa))))) || !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr, &sin6_next->sin6_addr)) { - if (ron->ro_rt != NULL) { + if (ron->ro_rt != NULL) RT_UNLOCK(ron->ro_rt); - rtfree(ron->ro_rt); - ron->ro_rt = NULL; - } + + ROUTE_RELEASE(ron); *satosin6(&ron->ro_dst) = *sin6_next; } if (ron->ro_rt == NULL) { rtalloc_scoped((struct route *)ron, ifscope); if (ron->ro_rt != NULL) RT_LOCK(ron->ro_rt); - if (ron->ro_rt == NULL || + if (ROUTE_UNUSABLE(ron) || !(ron->ro_rt->rt_flags & RTF_LLINFO) || !IN6_ARE_ADDR_EQUAL(&satosin6(rt_key(ron->ro_rt))-> sin6_addr, &sin6_next->sin6_addr)) { - if (ron->ro_rt != NULL) { + if (ron->ro_rt != NULL) RT_UNLOCK(ron->ro_rt); - rtfree(ron->ro_rt); - ron->ro_rt = NULL; - } + + ROUTE_RELEASE(ron); error = EHOSTUNREACH; goto done; } @@ -868,16 +874,16 @@ getroute: if (ro == NULL) goto done; if (ro->ro_rt != NULL) - RT_LOCK(ro->ro_rt); - if (ro->ro_rt != NULL && (!(ro->ro_rt->rt_flags & RTF_UP) || - satosin6(&ro->ro_dst)->sin6_family != AF_INET6 || - ro->ro_rt->generation_id != route_generation || + RT_LOCK_SPIN(ro->ro_rt); + if (ROUTE_UNUSABLE(ro) || (ro->ro_rt != NULL && + (satosin6(&ro->ro_dst)->sin6_family != AF_INET6 || !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst) || (select_srcif && (ifa == NULL || - (ifa->ifa_ifp != ro->ro_rt->rt_ifp && !proxied_ifa))))) { - RT_UNLOCK(ro->ro_rt); - rtfree(ro->ro_rt); - ro->ro_rt = NULL; + (ifa->ifa_ifp != ro->ro_rt->rt_ifp && !proxied_ifa)))))) { + if (ro->ro_rt != NULL) + RT_UNLOCK(ro->ro_rt); + + ROUTE_RELEASE(ro); } if (ro->ro_rt == NULL) { struct sockaddr_in6 *sa6; @@ -885,10 +891,10 @@ getroute: if (ro->ro_rt != NULL) RT_UNLOCK(ro->ro_rt); /* No route yet, so try to acquire one */ - bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); + bzero(&ro->ro_dst, sizeof (struct sockaddr_in6)); sa6 = (struct sockaddr_in6 *)&ro->ro_dst; sa6->sin6_family = AF_INET6; - sa6->sin6_len = sizeof(struct sockaddr_in6); + sa6->sin6_len = sizeof (struct sockaddr_in6); sa6->sin6_addr = *dst; if (IN6_IS_ADDR_MULTICAST(dst)) { ro->ro_rt = rtalloc1_scoped( @@ -897,7 +903,7 @@ getroute: rtalloc_scoped((struct route *)ro, ifscope); } if (ro->ro_rt != NULL) - RT_LOCK(ro->ro_rt); + RT_LOCK_SPIN(ro->ro_rt); } /* @@ -961,9 +967,7 @@ validateroute: } } RT_UNLOCK(route->ro_rt); - rtfree(route->ro_rt); - route->ro_rt = NULL; - route->ro_flags &= ~ROF_SRCIF_SELECTED; + ROUTE_RELEASE(route); error = EHOSTUNREACH; /* Undo the settings done above */ route = NULL; @@ -976,8 +980,17 @@ validateroute: if (srcif_selected) { VERIFY(has_route); - route->ro_flags |= ROF_SRCIF_SELECTED; - route->ro_rt->generation_id = route_generation; + if (ifa != route->ro_srcia || + !(route->ro_flags & ROF_SRCIF_SELECTED)) { + RT_CONVERT_LOCK(route->ro_rt); + if (ifa != NULL) + IFA_ADDREF(ifa); /* for route_in6 */ + if (route->ro_srcia != NULL) + IFA_REMREF(route->ro_srcia); + route->ro_srcia = ifa; + route->ro_flags |= ROF_SRCIF_SELECTED; + RT_GENID_SYNC(route->ro_rt); + } RT_UNLOCK(route->ro_rt); } } else { @@ -1003,21 +1016,29 @@ validateroute: done: if (error == 0) { - if ((ip6oa->ip6oa_flags & IP6OAF_NO_CELLULAR) && - ((ifp != NULL && ifp->if_type == IFT_CELLULAR) || + if (ip6oa != NULL && + (ip6oa->ip6oa_flags & IP6OAF_NO_CELLULAR) && + ((ifp != NULL && IFNET_IS_CELLULAR(ifp)) || (route != NULL && route->ro_rt != NULL && - route->ro_rt->rt_ifp->if_type == IFT_CELLULAR))) { + IFNET_IS_CELLULAR(route->ro_rt->rt_ifp)))) { if (route != NULL && route->ro_rt != NULL) { - rtfree(route->ro_rt); - route->ro_rt = NULL; - route->ro_flags &= ~ROF_SRCIF_SELECTED; + ROUTE_RELEASE(route); route = NULL; } ifp = NULL; /* ditch ifp; keep ifp0 */ error = EHOSTUNREACH; + ip6oa->ip6oa_retflags |= IP6OARF_IFDENIED; } } + /* + * If the interface is disabled for IPv6, then ENETDOWN error. + */ + if (error == 0 && + ifp != NULL && (ifp->if_eflags & IFEF_IPV6_DISABLED)) { + error = ENETDOWN; + } + if (ifp == NULL && (route == NULL || route->ro_rt == NULL)) { /* * This can happen if the caller did not pass a cached route @@ -1025,7 +1046,7 @@ done: */ error = EHOSTUNREACH; } - if (error == EHOSTUNREACH) + if (error == EHOSTUNREACH || error == ENETDOWN) ip6stat.ip6s_noroute++; /* @@ -1040,6 +1061,12 @@ done: *retifp = ifp; } + if (retsrcia != NULL) { + if (ifa != NULL) + IFA_ADDREF(ifa); /* for caller */ + *retsrcia = (struct in6_ifaddr *)ifa; + } + if (error == 0) { if (retrt != NULL && route != NULL) *retrt = route->ro_rt; /* ro_rt may be NULL */ @@ -1064,18 +1091,18 @@ done: static int in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, - const struct ip6_out_args *ip6oa, struct ifnet **retifp) + struct ip6_out_args *ip6oa, struct ifnet **retifp) { int err = 0; struct route_in6 sro; struct rtentry *rt = NULL; if (ro == NULL) { - bzero(&sro, sizeof(sro)); + bzero(&sro, sizeof (sro)); ro = &sro; } - if ((err = selectroute(NULL, dstsock, opts, mopts, ro, retifp, + if ((err = selectroute(NULL, dstsock, opts, mopts, NULL, ro, retifp, &rt, 0, 1, ip6oa)) != 0) goto done; @@ -1117,8 +1144,10 @@ in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, } done: - if (ro == &sro && rt && rt == sro.ro_rt) - rtfree(rt); + if (ro == &sro) { + VERIFY(rt == NULL || rt == ro->ro_rt); + ROUTE_RELEASE(ro); + } /* * retifp might point to a valid ifp with a reference held; @@ -1136,12 +1165,12 @@ done: */ int in6_selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock, - struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, - struct ifnet **retifp, struct rtentry **retrt, int clone, - const struct ip6_out_args *ip6oa) + struct ip6_pktopts *opts, struct ip6_moptions *mopts, + struct in6_ifaddr **retsrcia, struct route_in6 *ro, struct ifnet **retifp, + struct rtentry **retrt, int clone, struct ip6_out_args *ip6oa) { - return (selectroute(srcsock, dstsock, opts, mopts, ro, retifp, + return (selectroute(srcsock, dstsock, opts, mopts, retsrcia, ro, retifp, retrt, clone, 0, ip6oa)); } @@ -1151,14 +1180,12 @@ in6_selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock, * 2. (If the outgoing interface is detected) the current * hop limit of the interface specified by router advertisement. * 3. The system default hoplimit. -*/ + */ int -in6_selecthlim( - struct in6pcb *in6p, - struct ifnet *ifp) +in6_selecthlim(struct in6pcb *in6p, struct ifnet *ifp) { if (in6p && in6p->in6p_hops >= 0) { - return(in6p->in6p_hops); + return (in6p->in6p_hops); } else { lck_rw_lock_shared(nd_if_rwlock); if (ifp && ifp->if_index < nd_ifinfo_indexlim) { @@ -1166,9 +1193,8 @@ in6_selecthlim( struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index]; if (ndi->initialized) { - lck_mtx_lock(&ndi->lock); + /* access chlim without lock, for performance */ chlim = ndi->chlim; - lck_mtx_unlock(&ndi->lock); } else { chlim = ip6_defhlim; } @@ -1176,7 +1202,7 @@ in6_selecthlim( return (chlim); } else { lck_rw_done(nd_if_rwlock); - return(ip6_defhlim); + return (ip6_defhlim); } } } @@ -1186,21 +1212,19 @@ in6_selecthlim( * share this function by all *bsd*... */ int -in6_pcbsetport( - __unused struct in6_addr *laddr, - struct inpcb *inp, - struct proc *p, - int locked) +in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct proc *p, + int locked) { +#pragma unused(laddr) struct socket *so = inp->inp_socket; u_int16_t lport = 0, first, last, *lastport; int count, error = 0, wild = 0; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; kauth_cred_t cred; if (!locked) { /* Make sure we don't run into a deadlock: 4052373 */ - if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) { + if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) { socket_unlock(inp->inp_socket, 0); - lck_rw_lock_exclusive(pcbinfo->mtx); + lck_rw_lock_exclusive(pcbinfo->ipi_lock); socket_lock(inp->inp_socket, 0); } } @@ -1214,23 +1238,23 @@ in6_pcbsetport( if (inp->inp_flags & INP_HIGHPORT) { first = ipport_hifirstauto; /* sysctl */ last = ipport_hilastauto; - lastport = &pcbinfo->lasthi; + lastport = &pcbinfo->ipi_lasthi; } else if (inp->inp_flags & INP_LOWPORT) { cred = kauth_cred_proc_ref(p); error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0); kauth_cred_unref(&cred); if (error != 0) { if (!locked) - lck_rw_done(pcbinfo->mtx); - return error; + lck_rw_done(pcbinfo->ipi_lock); + return (error); } first = ipport_lowfirstauto; /* 1023 */ last = ipport_lowlastauto; /* 600 */ - lastport = &pcbinfo->lastlow; + lastport = &pcbinfo->ipi_lastlow; } else { first = ipport_firstauto; /* sysctl */ last = ipport_lastauto; - lastport = &pcbinfo->lastport; + lastport = &pcbinfo->ipi_lastport; } /* * Simple check to ensure all ports are not used up causing @@ -1254,19 +1278,17 @@ in6_pcbsetport( inp->in6p_laddr = in6addr_any; inp->in6p_last_outifp = NULL; if (!locked) - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); return (EAGAIN); } --*lastport; if (*lastport > first || *lastport < last) *lastport = first; lport = htons(*lastport); - } while (in6_pcblookup_local(pcbinfo, - &inp->in6p_laddr, lport, wild)); + } while (in6_pcblookup_local(pcbinfo, &inp->in6p_laddr, lport, + wild)); } else { - /* - * counting up - */ + /* counting up */ count = last - first; do { @@ -1278,15 +1300,15 @@ in6_pcbsetport( inp->in6p_laddr = in6addr_any; inp->in6p_last_outifp = NULL; if (!locked) - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); return (EAGAIN); } ++*lastport; if (*lastport < first || *lastport > last) *lastport = first; lport = htons(*lastport); - } while (in6_pcblookup_local(pcbinfo, - &inp->in6p_laddr, lport, wild)); + } while (in6_pcblookup_local(pcbinfo, &inp->in6p_laddr, lport, + wild)); } inp->inp_lport = lport; @@ -1295,24 +1317,24 @@ in6_pcbsetport( inp->inp_lport = 0; inp->in6p_last_outifp = NULL; if (!locked) - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); return (EAGAIN); } if (!locked) - lck_rw_done(pcbinfo->mtx); - return(0); + lck_rw_done(pcbinfo->ipi_lock); + return (0); } /* - * * The followings are implementation of the policy table using a - * * simple tail queue. - * * XXX such details should be hidden. - * * XXX implementation using binary tree should be more efficient. - * */ + * The followings are implementation of the policy table using a + * simple tail queue. + * XXX such details should be hidden. + * XXX implementation using binary tree should be more efficient. + */ struct addrsel_policyent { - TAILQ_ENTRY(addrsel_policyent) ape_entry; - struct in6_addrpolicy ape_policy; + TAILQ_ENTRY(addrsel_policyent) ape_entry; + struct in6_addrpolicy ape_policy; }; TAILQ_HEAD(addrsel_policyhead, addrsel_policyent); @@ -1322,8 +1344,7 @@ struct addrsel_policyhead addrsel_policytab; static void init_policy_queue(void) { - - TAILQ_INIT(&addrsel_policytab); + TAILQ_INIT(&addrsel_policytab); } void @@ -1335,96 +1356,157 @@ addrsel_policy_init(void) */ static const struct in6_addrpolicy defaddrsel[] = { /* localhost */ - { .addr = { .sin6_family = AF_INET6, + { + .addr = { + .sin6_family = AF_INET6, .sin6_addr = IN6ADDR_LOOPBACK_INIT, - .sin6_len = sizeof(struct sockaddr_in6) }, - .addrmask = { .sin6_family = AF_INET6, - .sin6_addr = IN6MASK128, - .sin6_len = sizeof(struct sockaddr_in6) }, - .preced = 60, - .label = 0 }, + .sin6_len = sizeof (struct sockaddr_in6) + }, + .addrmask = { + .sin6_family = AF_INET6, + .sin6_addr = IN6MASK128, + .sin6_len = sizeof (struct sockaddr_in6) + }, + .preced = 60, + .label = 0 + }, + /* ULA */ - { .addr = { .sin6_family = AF_INET6, + { + .addr = { + .sin6_family = AF_INET6, .sin6_addr = {{{ 0xfc }}}, - .sin6_len = sizeof(struct sockaddr_in6) }, - .addrmask = { .sin6_family = AF_INET6, - .sin6_addr = IN6MASK7, - .sin6_len = sizeof(struct sockaddr_in6) }, - .preced = 50, - .label = 1 }, + .sin6_len = sizeof (struct sockaddr_in6) + }, + .addrmask = { + .sin6_family = AF_INET6, + .sin6_addr = IN6MASK7, + .sin6_len = sizeof (struct sockaddr_in6) + }, + .preced = 50, + .label = 1 + }, + /* any IPv6 src */ - { .addr = { .sin6_family = AF_INET6, + { + .addr = { + .sin6_family = AF_INET6, .sin6_addr = IN6ADDR_ANY_INIT, - .sin6_len = sizeof(struct sockaddr_in6) }, - .addrmask = { .sin6_family = AF_INET6, - .sin6_addr = IN6MASK0, - .sin6_len = sizeof(struct sockaddr_in6) }, - .preced = 40, - .label = 2 }, + .sin6_len = sizeof (struct sockaddr_in6) + }, + .addrmask = { + .sin6_family = AF_INET6, + .sin6_addr = IN6MASK0, + .sin6_len = sizeof (struct sockaddr_in6) + }, + .preced = 40, + .label = 2 }, + /* any IPv4 src */ - { .addr = { .sin6_family = AF_INET6, + { + .addr = { + .sin6_family = AF_INET6, .sin6_addr = IN6ADDR_V4MAPPED_INIT, - .sin6_len = sizeof(struct sockaddr_in6) }, - .addrmask = { .sin6_family = AF_INET6, - .sin6_addr = IN6MASK96, - .sin6_len = sizeof(struct sockaddr_in6) }, - .preced = 30, - .label = 3 }, + .sin6_len = sizeof (struct sockaddr_in6) + }, + .addrmask = { + .sin6_family = AF_INET6, + .sin6_addr = IN6MASK96, + .sin6_len = sizeof (struct sockaddr_in6) + }, + .preced = 30, + .label = 3 + }, + /* 6to4 */ - { .addr = { .sin6_family = AF_INET6, + { + .addr = { + .sin6_family = AF_INET6, .sin6_addr = {{{ 0x20, 0x02 }}}, - .sin6_len = sizeof(struct sockaddr_in6) }, - .addrmask = { .sin6_family = AF_INET6, - .sin6_addr = IN6MASK16, - .sin6_len = sizeof(struct sockaddr_in6) }, - .preced = 20, - .label = 4 }, + .sin6_len = sizeof (struct sockaddr_in6) + }, + .addrmask = { + .sin6_family = AF_INET6, + .sin6_addr = IN6MASK16, + .sin6_len = sizeof (struct sockaddr_in6) + }, + .preced = 20, + .label = 4 + }, + /* Teredo */ - { .addr = { .sin6_family = AF_INET6, + { + .addr = { + .sin6_family = AF_INET6, .sin6_addr = {{{ 0x20, 0x01 }}}, - .sin6_len = sizeof(struct sockaddr_in6) }, - .addrmask = { .sin6_family = AF_INET6, - .sin6_addr = IN6MASK32, - .sin6_len = sizeof(struct sockaddr_in6) }, - .preced = 10, - .label = 5 }, + .sin6_len = sizeof (struct sockaddr_in6) + }, + .addrmask = { + .sin6_family = AF_INET6, + .sin6_addr = IN6MASK32, + .sin6_len = sizeof (struct sockaddr_in6) + }, + .preced = 10, + .label = 5 + }, + /* v4 compat addresses */ - { .addr = { .sin6_family = AF_INET6, + { + .addr = { + .sin6_family = AF_INET6, .sin6_addr = IN6ADDR_ANY_INIT, - .sin6_len = sizeof(struct sockaddr_in6) }, - .addrmask = { .sin6_family = AF_INET6, - .sin6_addr = IN6MASK96, - .sin6_len = sizeof(struct sockaddr_in6) }, - .preced = 1, - .label = 10 }, + .sin6_len = sizeof (struct sockaddr_in6) + }, + .addrmask = { + .sin6_family = AF_INET6, + .sin6_addr = IN6MASK96, + .sin6_len = sizeof (struct sockaddr_in6) + }, + .preced = 1, + .label = 10 + }, + /* site-local (deprecated) */ - { .addr = { .sin6_family = AF_INET6, + { + .addr = { + .sin6_family = AF_INET6, .sin6_addr = {{{ 0xfe, 0xc0 }}}, - .sin6_len = sizeof(struct sockaddr_in6) }, - .addrmask = { .sin6_family = AF_INET6, - .sin6_addr = IN6MASK16, - .sin6_len = sizeof(struct sockaddr_in6) }, - .preced = 1, - .label = 11 }, + .sin6_len = sizeof (struct sockaddr_in6) + }, + .addrmask = { + .sin6_family = AF_INET6, + .sin6_addr = IN6MASK16, + .sin6_len = sizeof (struct sockaddr_in6) + }, + .preced = 1, + .label = 11 + }, + /* 6bone (deprecated) */ - { .addr = { .sin6_family = AF_INET6, + { + .addr = { + .sin6_family = AF_INET6, .sin6_addr = {{{ 0x3f, 0xfe }}}, - .sin6_len = sizeof(struct sockaddr_in6) }, - .addrmask = { .sin6_family = AF_INET6, - .sin6_addr = IN6MASK16, - .sin6_len = sizeof(struct sockaddr_in6) }, - .preced = 1, - .label = 12 }, + .sin6_len = sizeof (struct sockaddr_in6) + }, + .addrmask = { + .sin6_family = AF_INET6, + .sin6_addr = IN6MASK16, + .sin6_len = sizeof (struct sockaddr_in6) + }, + .preced = 1, + .label = 12 + }, }; int i; init_policy_queue(); /* initialize the "last resort" policy */ - bzero(&defaultaddrpolicy, sizeof(defaultaddrpolicy)); + bzero(&defaultaddrpolicy, sizeof (defaultaddrpolicy)); defaultaddrpolicy.label = ADDR_LABEL_NOTAPP; - for (i = 0; i < sizeof(defaddrsel) / sizeof(defaddrsel[0]); i++) + for (i = 0; i < sizeof (defaddrsel) / sizeof (defaddrsel[0]); i++) add_addrsel_policyent(&defaddrsel[i]); } @@ -1483,36 +1565,36 @@ match_addrsel_policy(struct sockaddr_in6 *key) bestmatchlen = matchlen; } - next: + next: continue; } return (bestpol); -} +} static int add_addrsel_policyent(const struct in6_addrpolicy *newpolicy) { struct addrsel_policyent *new, *pol; - MALLOC(new, struct addrsel_policyent *, sizeof(*new), M_IFADDR, - M_WAITOK); + MALLOC(new, struct addrsel_policyent *, sizeof (*new), M_IFADDR, + M_WAITOK); ADDRSEL_LOCK(); /* duplication check */ TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) { if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr, - &pol->ape_policy.addr.sin6_addr) && + &pol->ape_policy.addr.sin6_addr) && IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr, - &pol->ape_policy.addrmask.sin6_addr)) { + &pol->ape_policy.addrmask.sin6_addr)) { ADDRSEL_UNLOCK(); FREE(new, M_IFADDR); return (EEXIST); /* or override it? */ } } - bzero(new, sizeof(*new)); + bzero(new, sizeof (*new)); /* XXX: should validate entry */ new->ape_policy = *newpolicy; @@ -1585,20 +1667,20 @@ dump_addrsel_policyent(const struct in6_addrpolicy *pol, void *arg) int error = 0; struct walkarg *w = arg; - error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol)); + error = SYSCTL_OUT(w->w_req, pol, sizeof (*pol)); return (error); } static int -in6_src_sysctl SYSCTL_HANDLER_ARGS +in6_src_sysctl SYSCTL_HANDLER_ARGS { #pragma unused(oidp, arg1, arg2) struct walkarg w; if (req->newptr) - return EPERM; - bzero(&w, sizeof(w)); + return (EPERM); + bzero(&w, sizeof (w)); w.w_req = req; return (walk_addrsel_policy(dump_addrsel_policyent, &w)); @@ -1664,12 +1746,8 @@ in6_src_ioctl(u_long cmd, caddr_t data) * we may want to change the function to return something other than ifp. */ int -in6_embedscope( - struct in6_addr *in6, - const struct sockaddr_in6 *sin6, - struct in6pcb *in6p, - struct ifnet **ifpp, - struct ip6_pktopts *opt) +in6_embedscope(struct in6_addr *in6, const struct sockaddr_in6 *sin6, + struct in6pcb *in6p, struct ifnet **ifpp, struct ip6_pktopts *opt) { struct ifnet *ifp = NULL; u_int32_t scopeid; @@ -1702,37 +1780,45 @@ in6_embedscope( IM6O_UNLOCK(in6p->in6p_moptions); } - if (opt) + if (opt != NULL) optp = opt; - else if (in6p) + else if (in6p != NULL) optp = in6p->in6p_outputopts; /* * KAME assumption: link id == interface id */ - ifnet_head_lock_shared(); - if (in6p && optp && (pi = optp->ip6po_pktinfo) && - pi->ipi6_ifindex) { - ifp = ifindex2ifnet[pi->ipi6_ifindex]; + if (in6p != NULL && optp != NULL && + (pi = optp->ip6po_pktinfo) != NULL && + pi->ipi6_ifindex != 0) { + /* ifp is needed here if only we're returning it */ + if (ifpp != NULL) { + ifnet_head_lock_shared(); + ifp = ifindex2ifnet[pi->ipi6_ifindex]; + ifnet_head_done(); + } in6->s6_addr16[1] = htons(pi->ipi6_ifindex); - } else if (in6p && IN6_IS_ADDR_MULTICAST(in6) && + } else if (in6p != NULL && IN6_IS_ADDR_MULTICAST(in6) && in6p->in6p_moptions != NULL && im6o_multicast_ifp != NULL) { ifp = im6o_multicast_ifp; in6->s6_addr16[1] = htons(ifp->if_index); - } else if (scopeid) { - /* + } else if (scopeid != 0) { + /* * Since scopeid is unsigned, we only have to check it - * against if_index + * against if_index (ifnet_head_lock not needed since + * if_index is an ever-increasing integer.) */ - if (if_index < scopeid) { - ifnet_head_done(); - return ENXIO; /* XXX EINVAL? */ + if (if_index < scopeid) + return (ENXIO); /* XXX EINVAL? */ + /* ifp is needed here only if we're returning it */ + if (ifpp != NULL) { + ifnet_head_lock_shared(); + ifp = ifindex2ifnet[scopeid]; + ifnet_head_done(); } - ifp = ifindex2ifnet[scopeid]; - /*XXX assignment to 16bit from 32bit variable */ + /* XXX assignment to 16bit from 32bit variable */ in6->s6_addr16[1] = htons(scopeid & 0xffff); } - ifnet_head_done(); if (ifpp != NULL) { if (ifp != NULL) @@ -1741,7 +1827,7 @@ in6_embedscope( } } - return 0; + return (0); } /* @@ -1773,20 +1859,20 @@ in6_recoverscope( */ scopeid = ntohs(sin6->sin6_addr.s6_addr16[1]); if (scopeid) { - /* - * sanity check + /* + * sanity check * * Since scopeid is unsigned, we only have to check it * against if_index */ if (if_index < scopeid) - return ENXIO; + return (ENXIO); if (ifp && ifp->if_index != scopeid) - return ENXIO; + return (ENXIO); sin6->sin6_addr.s6_addr16[1] = 0; sin6->sin6_scope_id = scopeid; } } - return 0; + return (0); } diff --git a/bsd/netinet6/in6_var.h b/bsd/netinet6/in6_var.h index ca7528f2d..80723feed 100644 --- a/bsd/netinet6/in6_var.h +++ b/bsd/netinet6/in6_var.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,13 +22,10 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* $FreeBSD: src/sys/netinet6/in6_var.h,v 1.3.2.2 2001/07/03 11:01:52 ume Exp $ */ -/* $KAME: in6_var.h,v 1.56 2001/03/29 05:34:31 itojun Exp $ */ - /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. @@ -94,17 +91,15 @@ */ #ifndef _NETINET6_IN6_VAR_H_ -#define _NETINET6_IN6_VAR_H_ +#define _NETINET6_IN6_VAR_H_ #include -#ifdef XNU_KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #include #include -#endif - -#ifdef __APPLE__ +#include +#endif /* BSD_KERNEL_PRIVATE */ #include -#endif /* * pltime/vltime are just for future reference (required to implements 2 @@ -121,7 +116,7 @@ struct in6_addrlifetime { u_int32_t ia6t_pltime; /* prefix lifetime */ }; -#ifdef XNU_KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE struct in6_addrlifetime_32 { u_int32_t ia6t_expire; u_int32_t ia6t_preferred; @@ -136,31 +131,46 @@ struct in6_addrlifetime_64 { u_int32_t ia6t_pltime; }; +/* + * Internal representation of ia6_lifetime (expiration in uptime unit) + */ +struct in6_addrlifetime_i { + u_int64_t ia6ti_expire; /* valid lifetime expiration time */ + u_int64_t ia6ti_preferred; /* preferred lifetime expiration time */ + u_int32_t ia6ti_vltime; /* valid lifetime */ + u_int32_t ia6ti_pltime; /* prefix lifetime */ + u_int64_t ia6ti_base_calendartime; /* calendar time at creation */ + u_int64_t ia6ti_base_uptime; /* uptime at creation */ +}; + /* * Interface address, IPv6 version. One of these structures * is allocated for each interface with an IPv6 address. * The ifaddr structure contains the protocol-independent part * of the structure and is assumed to be first. */ -struct in6_ifaddr { - struct ifaddr ia_ifa; /* protocol-independent info */ + +struct in6_ifaddr { + struct ifaddr ia_ifa; /* protocol-independent info */ #define ia_ifp ia_ifa.ifa_ifp -#define ia_flags ia_ifa.ifa_flags - struct sockaddr_in6 ia_addr; /* interface address */ - struct sockaddr_in6 ia_net; /* network number of interface */ - struct sockaddr_in6 ia_dstaddr; /* space for destination addr */ - struct sockaddr_in6 ia_prefixmask; /* prefix mask */ +#define ia_flags ia_ifa.ifa_flags + struct sockaddr_in6 ia_addr; /* interface address */ + struct sockaddr_in6 ia_net; /* network number of interface */ + struct sockaddr_in6 ia_dstaddr; /* space for destination addr */ + struct sockaddr_in6 ia_prefixmask; /* prefix mask */ u_int32_t ia_plen; /* prefix length */ - struct in6_ifaddr *ia_next; /* next in6 list of IP6 addresses */ - int ia6_flags; + struct in6_ifaddr *ia_next; /* next in6 list of IP6 addresses */ + int ia6_flags; - struct in6_addrlifetime ia6_lifetime; - time_t ia6_createtime; /* the creation time of this address, which is - * currently used for temporary addresses only. - */ - time_t ia6_updatetime; + struct in6_addrlifetime_i ia6_lifetime; + /* + * the creation time of this address, which is + * currently used for temporary addresses only. + */ + u_int64_t ia6_createtime; + u_int64_t ia6_updatetime; - struct ifprefix *ia6_ifpr; /* back pointer to ifprefix */ + struct ifprefix *ia6_ifpr; /* back pointer to ifprefix */ /* back pointer to the ND prefix (for autoconfigured addresses only) */ struct nd_prefix *ia6_ndpr; @@ -170,7 +180,7 @@ struct in6_ifaddr { }; #define ifatoia6(ifa) ((struct in6_ifaddr *)(void *)(ifa)) -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ /* control structure to manage address selection policy */ struct in6_addrpolicy { @@ -232,7 +242,7 @@ struct icmp6_ifstat { u_quad_t ifs6_in_error; /* ipv6IfIcmpInDestUnreachs, # of input dest unreach errors */ u_quad_t ifs6_in_dstunreach; - /* ipv6IfIcmpInAdminProhibs, # of input administratively prohibited errs */ + /* ipv6IfIcmpInAdminProhibs, # of input admin. prohibited errs */ u_quad_t ifs6_in_adminprohib; /* ipv6IfIcmpInTimeExcds, # of input time exceeded errors */ u_quad_t ifs6_in_timeexceed; @@ -250,7 +260,7 @@ struct icmp6_ifstat { u_quad_t ifs6_in_routeradvert; /* ipv6IfIcmpInNeighborSolicits, # of input neighbor solicitations */ u_quad_t ifs6_in_neighborsolicit; - /* ipv6IfIcmpInNeighborAdvertisements, # of input neighbor advertisements */ + /* ipv6IfIcmpInNeighborAdvertisements, # of input neighbor advs. */ u_quad_t ifs6_in_neighboradvert; /* ipv6IfIcmpInRedirects, # of input redirects */ u_quad_t ifs6_in_redirect; @@ -270,7 +280,7 @@ struct icmp6_ifstat { u_quad_t ifs6_out_error; /* ipv6IfIcmpOutDestUnreachs, # of output dest unreach errors */ u_quad_t ifs6_out_dstunreach; - /* ipv6IfIcmpOutAdminProhibs, # of output administratively prohibited errs */ + /* ipv6IfIcmpOutAdminProhibs, # of output admin. prohibited errs */ u_quad_t ifs6_out_adminprohib; /* ipv6IfIcmpOutTimeExcds, # of output time exceeded errors */ u_quad_t ifs6_out_timeexceed; @@ -284,11 +294,11 @@ struct icmp6_ifstat { u_quad_t ifs6_out_echoreply; /* ipv6IfIcmpOutRouterSolicits, # of output router solicitations */ u_quad_t ifs6_out_routersolicit; - /* ipv6IfIcmpOutRouterAdvertisements, # of output router advertisements */ + /* ipv6IfIcmpOutRouterAdvertisements, # of output router advs. */ u_quad_t ifs6_out_routeradvert; /* ipv6IfIcmpOutNeighborSolicits, # of output neighbor solicitations */ u_quad_t ifs6_out_neighborsolicit; - /* ipv6IfIcmpOutNeighborAdvertisements, # of output neighbor advertisements */ + /* ipv6IfIcmpOutNeighborAdvertisements, # of output neighbor advs. */ u_quad_t ifs6_out_neighboradvert; /* ipv6IfIcmpOutRedirects, # of output redirects */ u_quad_t ifs6_out_redirect; @@ -300,7 +310,19 @@ struct icmp6_ifstat { u_quad_t ifs6_out_mlddone; }; -struct in6_ifreq { +#ifdef BSD_KERNEL_PRIVATE +/* + * Per-interface IPv6 structures. + */ +struct in6_ifextra { + struct scope6_id scope6_id; + struct in6_ifstat in6_ifstat; + struct icmp6_ifstat icmp6_ifstat; +}; +#define IN6_IFEXTRA(_ifp) ((struct in6_ifextra *)(_ifp->if_inet6data)) +#endif /* BSD_KERNEL_PRIVATE */ + +struct in6_ifreq { char ifr_name[IFNAMSIZ]; union { struct sockaddr_in6 ifru_addr; @@ -317,7 +339,7 @@ struct in6_ifreq { } ifr_ifru; }; -struct in6_aliasreq { +struct in6_aliasreq { char ifra_name[IFNAMSIZ]; struct sockaddr_in6 ifra_addr; struct sockaddr_in6 ifra_dstaddr; @@ -326,8 +348,53 @@ struct in6_aliasreq { struct in6_addrlifetime ifra_lifetime; }; -#ifdef XNU_KERNEL_PRIVATE -struct in6_aliasreq_32 { +#ifdef PRIVATE +#define IN6_CGA_MODIFIER_LENGTH 16 + +struct in6_cga_modifier { + u_int8_t octets[IN6_CGA_MODIFIER_LENGTH]; +}; + +struct in6_cga_prepare { + struct in6_cga_modifier cga_modifier; + u_int8_t cga_security_level; + u_int8_t reserved_A[15]; +}; + +#define IN6_CGA_KEY_MAXSIZE 2048 /* octets */ + +struct in6_cga_nodecfg { + struct iovec cga_privkey; + struct iovec cga_pubkey; + struct in6_cga_prepare cga_prepare; +}; + +struct in6_llstartreq { + char llsr_name[IFNAMSIZ]; + int llsr_flags; + struct in6_cga_prepare llsr_cgaprep; + struct in6_addrlifetime llsr_lifetime; +}; + +#ifdef BSD_KERNEL_PRIVATE +struct in6_llstartreq_32 { + char llsr_name[IFNAMSIZ]; + int llsr_flags; + struct in6_cga_prepare llsr_cgaprep; + struct in6_addrlifetime_32 llsr_lifetime; +}; + +struct in6_llstartreq_64 { + char llsr_name[IFNAMSIZ]; + int llsr_flags; + struct in6_cga_prepare llsr_cgaprep; + struct in6_addrlifetime_64 llsr_lifetime; +}; +#endif /* !BSD_KERNEL_PRIVATE */ +#endif /* PRIVATE */ + +#ifdef BSD_KERNEL_PRIVATE +struct in6_aliasreq_32 { char ifra_name[IFNAMSIZ]; struct sockaddr_in6 ifra_addr; struct sockaddr_in6 ifra_dstaddr; @@ -336,7 +403,7 @@ struct in6_aliasreq_32 { struct in6_addrlifetime_32 ifra_lifetime; }; -struct in6_aliasreq_64 { +struct in6_aliasreq_64 { char ifra_name[IFNAMSIZ]; struct sockaddr_in6 ifra_addr; struct sockaddr_in6 ifra_dstaddr; @@ -344,11 +411,11 @@ struct in6_aliasreq_64 { int ifra_flags; struct in6_addrlifetime_64 ifra_lifetime; }; -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ /* prefix type macro */ -#define IN6_PREFIX_ND 1 -#define IN6_PREFIX_RR 2 +#define IN6_PREFIX_ND 1 +#define IN6_PREFIX_RR 2 /* * prefix related flags passed between kernel(NDP related part) and @@ -382,20 +449,20 @@ struct in6_prefixreq { struct sockaddr_in6 ipr_prefix; }; -#define PR_ORIG_RA 0 -#define PR_ORIG_RR 1 -#define PR_ORIG_STATIC 2 -#define PR_ORIG_KERNEL 3 +#define PR_ORIG_RA 0 +#define PR_ORIG_RR 1 +#define PR_ORIG_STATIC 2 +#define PR_ORIG_KERNEL 3 -#define ipr_raf_onlink ipr_flags.prf_ra.onlink -#define ipr_raf_auto ipr_flags.prf_ra.autonomous +#define ipr_raf_onlink ipr_flags.prf_ra.onlink +#define ipr_raf_auto ipr_flags.prf_ra.autonomous -#define ipr_statef_onlink ipr_flags.prf_state.onlink +#define ipr_statef_onlink ipr_flags.prf_state.onlink -#define ipr_rrf_decrvalid ipr_flags.prf_rr.decrvalid -#define ipr_rrf_decrprefd ipr_flags.prf_rr.decrprefd +#define ipr_rrf_decrvalid ipr_flags.prf_rr.decrvalid +#define ipr_rrf_decrprefd ipr_flags.prf_rr.decrprefd -struct in6_rrenumreq { +struct in6_rrenumreq { char irr_name[IFNAMSIZ]; u_char irr_origin; u_char irr_m_len; /* match len for matchprefix */ @@ -415,54 +482,35 @@ struct in6_rrenumreq { struct sockaddr_in6 irr_useprefix; }; -#define irr_raf_mask_onlink irr_raflagmask.onlink -#define irr_raf_mask_auto irr_raflagmask.autonomous -#define irr_raf_mask_reserved irr_raflagmask.reserved +#define irr_raf_mask_onlink irr_raflagmask.onlink +#define irr_raf_mask_auto irr_raflagmask.autonomous +#define irr_raf_mask_reserved irr_raflagmask.reserved -#define irr_raf_onlink irr_flags.prf_ra.onlink -#define irr_raf_auto irr_flags.prf_ra.autonomous +#define irr_raf_onlink irr_flags.prf_ra.onlink +#define irr_raf_auto irr_flags.prf_ra.autonomous -#define irr_statef_onlink irr_flags.prf_state.onlink +#define irr_statef_onlink irr_flags.prf_state.onlink -#define irr_rrf irr_flags.prf_rr -#define irr_rrf_decrvalid irr_flags.prf_rr.decrvalid -#define irr_rrf_decrprefd irr_flags.prf_rr.decrprefd +#define irr_rrf irr_flags.prf_rr +#define irr_rrf_decrvalid irr_flags.prf_rr.decrvalid +#define irr_rrf_decrprefd irr_flags.prf_rr.decrprefd -#ifdef XNU_KERNEL_PRIVATE /* - * Given a pointer to an in6_ifaddr (ifaddr), - * return a pointer to the addr as a sockaddr_in6 + * Event data, inet6 style. */ -#define IA6_IN6(ia) (&((ia)->ia_addr.sin6_addr)) -#define IA6_DSTIN6(ia) (&((ia)->ia_dstaddr.sin6_addr)) -#define IA6_MASKIN6(ia) (&((ia)->ia_prefixmask.sin6_addr)) -#define IA6_SIN6(ia) (&((ia)->ia_addr)) -#define IA6_DSTSIN6(ia) (&((ia)->ia_dstaddr)) -#define IFA_IN6(x) \ - (&((struct sockaddr_in6 *)(void *)((x)->ifa_addr))->sin6_addr) -#define IFA_DSTIN6(x) \ - (&((struct sockaddr_in6 *)(void *)((x)->ifa_dstaddr))->sin6_addr) -#define IFPR_IN6(x) \ - (&((struct sockaddr_in6 *)(void *)((x)->ifpr_prefix))->sin6_addr) -#endif /* XNU_KERNEL_PRIVATE */ - -/* - * Event data, internet6 style. - */ - struct kev_in6_addrlifetime { - u_int32_t ia6t_expire; - u_int32_t ia6t_preferred; - u_int32_t ia6t_vltime; - u_int32_t ia6t_pltime; + u_int32_t ia6t_expire; + u_int32_t ia6t_preferred; + u_int32_t ia6t_vltime; + u_int32_t ia6t_pltime; }; struct kev_in6_data { - struct net_event_data link_data; - struct sockaddr_in6 ia_addr; /* interface address */ - struct sockaddr_in6 ia_net; /* network number of interface */ - struct sockaddr_in6 ia_dstaddr; /* space for destination addr */ - struct sockaddr_in6 ia_prefixmask; /* prefix mask */ + struct net_event_data link_data; + struct sockaddr_in6 ia_addr; /* interface address */ + struct sockaddr_in6 ia_net; /* network number of interface */ + struct sockaddr_in6 ia_dstaddr; /* space for destination addr */ + struct sockaddr_in6 ia_prefixmask; /* prefix mask */ u_int32_t ia_plen; /* prefix length */ u_int32_t ia6_flags; /* address flags from in6_ifaddr */ struct kev_in6_addrlifetime ia_lifetime; /* address life info */ @@ -471,214 +519,271 @@ struct kev_in6_data { /* * Define inet6 event subclass and specific inet6 events. */ +#define KEV_INET6_SUBCLASS 6 /* inet6 subclass identifier */ -#define KEV_INET6_SUBCLASS 6 /* inet6 subclass identifier */ - -#define KEV_INET6_NEW_USER_ADDR 1 /* Userland configured IPv6 address */ -#define KEV_INET6_CHANGED_ADDR 2 /* Address changed event (future) */ -#define KEV_INET6_ADDR_DELETED 3 /* IPv6 add. in ia_addr field was deleted */ -#define KEV_INET6_NEW_LL_ADDR 4 /* Autoconfigured linklocal address has appeared */ -#define KEV_INET6_NEW_RTADV_ADDR 5 /* Autoconf router advertised address has appeared */ -#define KEV_INET6_DEFROUTER 6 /* Default router dectected by kernel */ +#define KEV_INET6_NEW_USER_ADDR 1 /* Userland configured IPv6 address */ +#define KEV_INET6_CHANGED_ADDR 2 /* Address changed event (future) */ +#define KEV_INET6_ADDR_DELETED 3 /* IPv6 address was deleted */ +#define KEV_INET6_NEW_LL_ADDR 4 /* Autoconf LL address appeared */ +#define KEV_INET6_NEW_RTADV_ADDR 5 /* Autoconf address has appeared */ +#define KEV_INET6_DEFROUTER 6 /* Default router detected */ -#ifdef XNU_KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE /* Utility function used inside netinet6 kernel code for generating events */ void in6_post_msg(struct ifnet *, u_int32_t, struct in6_ifaddr *); -#endif /* XNU_KERNEL_PRIVATE */ -#define IN6_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \ +#define IN6_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \ (((d)->s6_addr32[0] ^ (a)->s6_addr32[0]) & (m)->s6_addr32[0]) == 0 && \ (((d)->s6_addr32[1] ^ (a)->s6_addr32[1]) & (m)->s6_addr32[1]) == 0 && \ (((d)->s6_addr32[2] ^ (a)->s6_addr32[2]) & (m)->s6_addr32[2]) == 0 && \ - (((d)->s6_addr32[3] ^ (a)->s6_addr32[3]) & (m)->s6_addr32[3]) == 0 ) + (((d)->s6_addr32[3] ^ (a)->s6_addr32[3]) & (m)->s6_addr32[3]) == 0) + +/* + * Given a pointer to an in6_ifaddr (ifaddr), + * return a pointer to the addr as a sockaddr_in6 + */ +#define IA6_IN6(ia) (&((ia)->ia_addr.sin6_addr)) +#define IA6_DSTIN6(ia) (&((ia)->ia_dstaddr.sin6_addr)) +#define IA6_MASKIN6(ia) (&((ia)->ia_prefixmask.sin6_addr)) +#define IA6_SIN6(ia) (&((ia)->ia_addr)) +#define IA6_DSTSIN6(ia) (&((ia)->ia_dstaddr)) +#define IFA_IN6(x) \ + (&((struct sockaddr_in6 *)(void *)((x)->ifa_addr))->sin6_addr) +#define IFA_DSTIN6(x) \ + (&((struct sockaddr_in6 *)(void *)((x)->ifa_dstaddr))->sin6_addr) +#define IFPR_IN6(x) \ + (&((struct sockaddr_in6 *)(void *)((x)->ifpr_prefix))->sin6_addr) +#endif /* BSD_KERNEL_PRIVATE */ -#define SIOCSIFADDR_IN6 _IOW('i', 12, struct in6_ifreq) -#define SIOCGIFADDR_IN6 _IOWR('i', 33, struct in6_ifreq) +#define SIOCSIFADDR_IN6 _IOW('i', 12, struct in6_ifreq) +#define SIOCGIFADDR_IN6 _IOWR('i', 33, struct in6_ifreq) /* * SIOCSxxx ioctls should be unused (see comments in in6.c), but * we do not shift numbers for binary compatibility. */ -#define SIOCSIFDSTADDR_IN6 _IOW('i', 14, struct in6_ifreq) -#define SIOCSIFNETMASK_IN6 _IOW('i', 22, struct in6_ifreq) - -#define SIOCGIFDSTADDR_IN6 _IOWR('i', 34, struct in6_ifreq) -#define SIOCGIFNETMASK_IN6 _IOWR('i', 37, struct in6_ifreq) - -#define SIOCDIFADDR_IN6 _IOW('i', 25, struct in6_ifreq) -#define SIOCAIFADDR_IN6 _IOW('i', 26, struct in6_aliasreq) -#ifdef XNU_KERNEL_PRIVATE -#define SIOCAIFADDR_IN6_32 _IOW('i', 26, struct in6_aliasreq_32) -#define SIOCAIFADDR_IN6_64 _IOW('i', 26, struct in6_aliasreq_64) -#endif /* XNU_KERNEL_PRIVATE */ - -#define SIOCSIFPHYADDR_IN6 _IOW('i', 62, struct in6_aliasreq) -#ifdef XNU_KERNEL_PRIVATE -#define SIOCSIFPHYADDR_IN6_32 _IOW('i', 62, struct in6_aliasreq_32) -#define SIOCSIFPHYADDR_IN6_64 _IOW('i', 62, struct in6_aliasreq_64) -#endif /* XNU_KERNEL_PRIVATE */ +#define SIOCSIFDSTADDR_IN6 _IOW('i', 14, struct in6_ifreq) +#define SIOCSIFNETMASK_IN6 _IOW('i', 22, struct in6_ifreq) + +#define SIOCGIFDSTADDR_IN6 _IOWR('i', 34, struct in6_ifreq) +#define SIOCGIFNETMASK_IN6 _IOWR('i', 37, struct in6_ifreq) + +#define SIOCDIFADDR_IN6 _IOW('i', 25, struct in6_ifreq) +#define SIOCAIFADDR_IN6 _IOW('i', 26, struct in6_aliasreq) +#ifdef BSD_KERNEL_PRIVATE +#define SIOCAIFADDR_IN6_32 _IOW('i', 26, struct in6_aliasreq_32) +#define SIOCAIFADDR_IN6_64 _IOW('i', 26, struct in6_aliasreq_64) +#endif /* BSD_KERNEL_PRIVATE */ + +#define SIOCSIFPHYADDR_IN6 _IOW('i', 62, struct in6_aliasreq) +#ifdef BSD_KERNEL_PRIVATE +#define SIOCSIFPHYADDR_IN6_32 _IOW('i', 62, struct in6_aliasreq_32) +#define SIOCSIFPHYADDR_IN6_64 _IOW('i', 62, struct in6_aliasreq_64) +#endif /* BSD_KERNEL_PRIVATE */ #define SIOCGIFPSRCADDR_IN6 _IOWR('i', 63, struct in6_ifreq) #define SIOCGIFPDSTADDR_IN6 _IOWR('i', 64, struct in6_ifreq) -#define SIOCGIFAFLAG_IN6 _IOWR('i', 73, struct in6_ifreq) -#define SIOCGDRLST_IN6 _IOWR('i', 74, struct in6_drlist) -#ifdef XNU_KERNEL_PRIVATE -#define SIOCGDRLST_IN6_32 _IOWR('i', 74, struct in6_drlist_32) -#define SIOCGDRLST_IN6_64 _IOWR('i', 74, struct in6_drlist_64) -#endif /* XNU_KERNEL_PRIVATE */ -#define SIOCGPRLST_IN6 _IOWR('i', 75, struct in6_prlist) -#ifdef XNU_KERNEL_PRIVATE -#define SIOCGPRLST_IN6_32 _IOWR('i', 75, struct in6_prlist_32) -#define SIOCGPRLST_IN6_64 _IOWR('i', 75, struct in6_prlist_64) -#endif /* XNU_KERNEL_PRIVATE */ -#define OSIOCGIFINFO_IN6 _IOWR('i', 108, struct in6_ondireq) -#define SIOCGIFINFO_IN6 _IOWR('i', 76, struct in6_ondireq) -#define SIOCSNDFLUSH_IN6 _IOWR('i', 77, struct in6_ifreq) -#define SIOCGNBRINFO_IN6 _IOWR('i', 78, struct in6_nbrinfo) -#ifdef XNU_KERNEL_PRIVATE -#define SIOCGNBRINFO_IN6_32 _IOWR('i', 78, struct in6_nbrinfo_32) -#define SIOCGNBRINFO_IN6_64 _IOWR('i', 78, struct in6_nbrinfo_64) -#endif /* XNU_KERNEL_PRIVATE */ -#define SIOCSPFXFLUSH_IN6 _IOWR('i', 79, struct in6_ifreq) -#define SIOCSRTRFLUSH_IN6 _IOWR('i', 80, struct in6_ifreq) - -#define SIOCGIFALIFETIME_IN6 _IOWR('i', 81, struct in6_ifreq) -#define SIOCSIFALIFETIME_IN6 _IOWR('i', 82, struct in6_ifreq) -#define SIOCGIFSTAT_IN6 _IOWR('i', 83, struct in6_ifreq) -#define SIOCGIFSTAT_ICMP6 _IOWR('i', 84, struct in6_ifreq) - -#define SIOCSDEFIFACE_IN6 _IOWR('i', 85, struct in6_ndifreq) -#define SIOCGDEFIFACE_IN6 _IOWR('i', 86, struct in6_ndifreq) -#ifdef XNU_KERNEL_PRIVATE -#define SIOCSDEFIFACE_IN6_32 _IOWR('i', 85, struct in6_ndifreq_32) -#define SIOCSDEFIFACE_IN6_64 _IOWR('i', 85, struct in6_ndifreq_64) -#define SIOCGDEFIFACE_IN6_32 _IOWR('i', 86, struct in6_ndifreq_32) -#define SIOCGDEFIFACE_IN6_64 _IOWR('i', 86, struct in6_ndifreq_64) -#endif /* XNU_KERNEL_PRIVATE */ - -#define SIOCSIFINFO_FLAGS _IOWR('i', 87, struct in6_ndireq) /* XXX */ - -#define SIOCSSCOPE6 _IOW('i', 88, struct in6_ifreq) -#define SIOCGSCOPE6 _IOWR('i', 89, struct in6_ifreq) -#define SIOCGSCOPE6DEF _IOWR('i', 90, struct in6_ifreq) - -#define SIOCSIFPREFIX_IN6 _IOW('i', 100, struct in6_prefixreq) /* set */ -#define SIOCGIFPREFIX_IN6 _IOWR('i', 101, struct in6_prefixreq) /* get */ -#define SIOCDIFPREFIX_IN6 _IOW('i', 102, struct in6_prefixreq) /* del */ -#define SIOCAIFPREFIX_IN6 _IOW('i', 103, struct in6_rrenumreq) /* add */ -#define SIOCCIFPREFIX_IN6 _IOW('i', 104, \ - struct in6_rrenumreq) /* change */ -#define SIOCSGIFPREFIX_IN6 _IOW('i', 105, \ - struct in6_rrenumreq) /* set global */ - -#define SIOCGETSGCNT_IN6 _IOWR('u', 28, \ - struct sioc_sg_req6) /* get s,g pkt cnt */ -#define SIOCGETMIFCNT_IN6 _IOWR('u', 107, \ - struct sioc_mif_req6) /* get pkt cnt per if */ -#ifdef XNU_KERNEL_PRIVATE -#define SIOCGETMIFCNT_IN6_32 _IOWR('u', 107, struct sioc_mif_req6_32) -#define SIOCGETMIFCNT_IN6_64 _IOWR('u', 107, struct sioc_mif_req6_64) -#endif /* XNU_KERNEL_PRIVATE */ - -#define SIOCAADDRCTL_POLICY _IOW('u', 108, struct in6_addrpolicy) -#define SIOCDADDRCTL_POLICY _IOW('u', 109, struct in6_addrpolicy) +#define SIOCGIFAFLAG_IN6 _IOWR('i', 73, struct in6_ifreq) + +#define SIOCGDRLST_IN6 _IOWR('i', 74, struct in6_drlist) +#ifdef BSD_KERNEL_PRIVATE +#define SIOCGDRLST_IN6_32 _IOWR('i', 74, struct in6_drlist_32) +#define SIOCGDRLST_IN6_64 _IOWR('i', 74, struct in6_drlist_64) +#endif /* BSD_KERNEL_PRIVATE */ + +#define SIOCGPRLST_IN6 _IOWR('i', 75, struct in6_prlist) +#ifdef BSD_KERNEL_PRIVATE +#define SIOCGPRLST_IN6_32 _IOWR('i', 75, struct in6_prlist_32) +#define SIOCGPRLST_IN6_64 _IOWR('i', 75, struct in6_prlist_64) +#endif /* BSD_KERNEL_PRIVATE */ + +#define OSIOCGIFINFO_IN6 _IOWR('i', 108, struct in6_ondireq) +#define SIOCGIFINFO_IN6 _IOWR('i', 76, struct in6_ondireq) +#define SIOCSNDFLUSH_IN6 _IOWR('i', 77, struct in6_ifreq) + +#define SIOCGNBRINFO_IN6 _IOWR('i', 78, struct in6_nbrinfo) +#ifdef BSD_KERNEL_PRIVATE +#define SIOCGNBRINFO_IN6_32 _IOWR('i', 78, struct in6_nbrinfo_32) +#define SIOCGNBRINFO_IN6_64 _IOWR('i', 78, struct in6_nbrinfo_64) +#endif /* BSD_KERNEL_PRIVATE */ + +#define SIOCSPFXFLUSH_IN6 _IOWR('i', 79, struct in6_ifreq) +#define SIOCSRTRFLUSH_IN6 _IOWR('i', 80, struct in6_ifreq) + +#define SIOCGIFALIFETIME_IN6 _IOWR('i', 81, struct in6_ifreq) +#define SIOCSIFALIFETIME_IN6 _IOWR('i', 82, struct in6_ifreq) +#define SIOCGIFSTAT_IN6 _IOWR('i', 83, struct in6_ifreq) +#define SIOCGIFSTAT_ICMP6 _IOWR('i', 84, struct in6_ifreq) + +#define SIOCSDEFIFACE_IN6 _IOWR('i', 85, struct in6_ndifreq) +#define SIOCGDEFIFACE_IN6 _IOWR('i', 86, struct in6_ndifreq) +#ifdef BSD_KERNEL_PRIVATE +#define SIOCSDEFIFACE_IN6_32 _IOWR('i', 85, struct in6_ndifreq_32) +#define SIOCSDEFIFACE_IN6_64 _IOWR('i', 85, struct in6_ndifreq_64) +#define SIOCGDEFIFACE_IN6_32 _IOWR('i', 86, struct in6_ndifreq_32) +#define SIOCGDEFIFACE_IN6_64 _IOWR('i', 86, struct in6_ndifreq_64) +#endif /* BSD_KERNEL_PRIVATE */ + +#define SIOCSIFINFO_FLAGS _IOWR('i', 87, struct in6_ndireq) /* XXX */ + +/* N.B.: These 3 ioctls are deprecated and won't work */ +#define SIOCSSCOPE6 _IOW('i', 88, struct in6_ifreq) +#define SIOCGSCOPE6 _IOWR('i', 89, struct in6_ifreq) +#define SIOCGSCOPE6DEF _IOWR('i', 90, struct in6_ifreq) + +#define SIOCSIFPREFIX_IN6 _IOW('i', 100, struct in6_prefixreq) /* set */ +#define SIOCGIFPREFIX_IN6 _IOWR('i', 101, struct in6_prefixreq) /* get */ +#define SIOCDIFPREFIX_IN6 _IOW('i', 102, struct in6_prefixreq) /* del */ +#define SIOCAIFPREFIX_IN6 _IOW('i', 103, struct in6_rrenumreq) /* add */ + +/* change */ +#define SIOCCIFPREFIX_IN6 _IOW('i', 104, struct in6_rrenumreq) + +/* set global */ +#define SIOCSGIFPREFIX_IN6 _IOW('i', 105, struct in6_rrenumreq) + +/* + * multicast routing, get s/g pkt cnt, pkt cnt per interface. + */ +#define SIOCGETSGCNT_IN6 _IOWR('u', 28, struct sioc_sg_req6) +#define SIOCGETMIFCNT_IN6 _IOWR('u', 107, struct sioc_mif_req6) +#ifdef BSD_KERNEL_PRIVATE +#define SIOCGETMIFCNT_IN6_32 _IOWR('u', 107, struct sioc_mif_req6_32) +#define SIOCGETMIFCNT_IN6_64 _IOWR('u', 107, struct sioc_mif_req6_64) +#endif /* BSD_KERNEL_PRIVATE */ + +#define SIOCAADDRCTL_POLICY _IOW('u', 108, struct in6_addrpolicy) +#define SIOCDADDRCTL_POLICY _IOW('u', 109, struct in6_addrpolicy) #ifdef PRIVATE /* - * temporary control calls to attach/detach IP to/from an ethernet interface + * temporary control calls to attach/detach IPv6 to/from an interface. + */ +#define SIOCPROTOATTACH_IN6 _IOWR('i', 110, struct in6_aliasreq) +#ifdef BSD_KERNEL_PRIVATE +#define SIOCPROTOATTACH_IN6_32 _IOWR('i', 110, struct in6_aliasreq_32) +#define SIOCPROTOATTACH_IN6_64 _IOWR('i', 110, struct in6_aliasreq_64) +#endif /* BSD_KERNEL_PRIVATE */ +#define SIOCPROTODETACH_IN6 _IOWR('i', 111, struct in6_ifreq) + +/* + * start/stop aquiring linklocal on interface + */ +#define SIOCLL_START _IOWR('i', 130, struct in6_aliasreq) +#ifdef BSD_KERNEL_PRIVATE +#define SIOCLL_START_32 _IOWR('i', 130, struct in6_aliasreq_32) +#define SIOCLL_START_64 _IOWR('i', 130, struct in6_aliasreq_64) +#endif /* BSD_KERNEL_PRIVATE */ +#define SIOCLL_STOP _IOWR('i', 131, struct in6_ifreq) + +/* + * accept rtadvd (and stop accepting) on this interface. + */ +#define SIOCAUTOCONF_START _IOWR('i', 132, struct in6_ifreq) +#define SIOCAUTOCONF_STOP _IOWR('i', 133, struct in6_ifreq) + +/* + * add/remove default IPv6 router. + */ +#define SIOCDRADD_IN6 _IOWR('u', 134, struct in6_defrouter) +#ifdef BSD_KERNEL_PRIVATE +#define SIOCDRADD_IN6_32 _IOWR('u', 134, struct in6_defrouter_32) +#define SIOCDRADD_IN6_64 _IOWR('u', 134, struct in6_defrouter_64) +#endif /* BSD_KERNEL_PRIVATE */ +#define SIOCDRDEL_IN6 _IOWR('u', 135, struct in6_defrouter) +#ifdef BSD_KERNEL_PRIVATE +#define SIOCDRDEL_IN6_32 _IOWR('u', 135, struct in6_defrouter_32) +#define SIOCDRDEL_IN6_64 _IOWR('u', 135, struct in6_defrouter_64) +#endif /* BSD_KERNEL_PRIVATE */ + +/* + * enable/disable IPv6 router mode on interface. + */ +#define SIOCSETROUTERMODE_IN6 _IOWR('i', 136, struct in6_ifreq) + +/* + * start secure link-local interface addresses */ -#define SIOCPROTOATTACH_IN6 _IOWR('i', 110, struct in6_aliasreq) /* attach proto to interface */ -#ifdef XNU_KERNEL_PRIVATE -#define SIOCPROTOATTACH_IN6_32 _IOWR('i', 110, struct in6_aliasreq_32) -#define SIOCPROTOATTACH_IN6_64 _IOWR('i', 110, struct in6_aliasreq_64) -#endif /* XNU_KERNEL_PRIVATE */ -#define SIOCPROTODETACH_IN6 _IOWR('i', 111, struct in6_ifreq) /* detach proto from interface */ - -#define SIOCLL_START _IOWR('i', 130, struct in6_aliasreq) /* start aquiring linklocal on interface */ -#ifdef XNU_KERNEL_PRIVATE -#define SIOCLL_START_32 _IOWR('i', 130, struct in6_aliasreq_32) -#define SIOCLL_START_64 _IOWR('i', 130, struct in6_aliasreq_64) -#endif /* XNU_KERNEL_PRIVATE */ -#define SIOCLL_STOP _IOWR('i', 131, struct in6_ifreq) /* deconfigure linklocal from interface */ -#define SIOCAUTOCONF_START _IOWR('i', 132, struct in6_ifreq) /* accept rtadvd on this interface */ -#define SIOCAUTOCONF_STOP _IOWR('i', 133, struct in6_ifreq) /* stop accepting rtadv for this interface */ - -#define SIOCDRADD_IN6 _IOWR('u', 134, struct in6_defrouter) -#ifdef XNU_KERNEL_PRIVATE -#define SIOCDRADD_IN6_32 _IOWR('u', 134, struct in6_defrouter_32) -#define SIOCDRADD_IN6_64 _IOWR('u', 134, struct in6_defrouter_64) -#endif /* XNU_KERNEL_PRIVATE */ -#define SIOCDRDEL_IN6 _IOWR('u', 135, struct in6_defrouter) -#ifdef XNU_KERNEL_PRIVATE -#define SIOCDRDEL_IN6_32 _IOWR('u', 135, struct in6_defrouter_32) -#define SIOCDRDEL_IN6_64 _IOWR('u', 135, struct in6_defrouter_64) -#endif /* XNU_KERNEL_PRIVATE */ -#define SIOCSETROUTERMODE_IN6 _IOWR('i', 136, struct in6_ifreq) /* enable/disable IPv6 router mode on interface */ +#define SIOCLL_CGASTART _IOW('i', 160, struct in6_llstartreq) +#ifdef BSD_KERNEL_PRIVATE +#define SIOCLL_CGASTART_32 _IOW('i', 160, struct in6_llstartreq_32) +#define SIOCLL_CGASTART_64 _IOW('i', 160, struct in6_llstartreq_64) +#endif #endif /* PRIVATE */ -#define IN6_IFF_ANYCAST 0x01 /* anycast address */ -#define IN6_IFF_TENTATIVE 0x02 /* tentative address */ -#define IN6_IFF_DUPLICATED 0x04 /* DAD detected duplicate */ -#define IN6_IFF_DETACHED 0x08 /* may be detached from the link */ -#define IN6_IFF_DEPRECATED 0x10 /* deprecated address */ -#define IN6_IFF_NODAD 0x20 /* don't perform DAD on this address - * (used only at first SIOC* call) - */ -#define IN6_IFF_AUTOCONF 0x40 /* autoconfigurable address. */ -#define IN6_IFF_TEMPORARY 0x80 /* temporary (anonymous) address. */ -#define IN6_IFF_DYNAMIC 0x100 /* assigned by DHCPv6 service */ -#define IN6_IFF_OPTIMISTIC 0x200 /* optimistic DAD, i.e. RFC 4429 */ -#define IN6_IFF_NOPFX 0x8000 /* skip kernel prefix management. - * XXX: this should be temporary. - */ +#ifdef BSD_KERNEL_PRIVATE +/* + * XXX: Do not extend IN6_IFF values beyond 16-bit. We currently copy the + * values from ia6_flags as is into m_pkthdr.{src,dst}_iff which are + * defined as 16-bit variables. We can relax this once we define a + * unified set of interface flags for {ia,ia6}_flags and perform + * translation between those and the publicly-defined ones below. + */ +#endif /* BSD_KERNEL_PRIVATE */ +#define IN6_IFF_ANYCAST 0x01 /* anycast address */ +#define IN6_IFF_TENTATIVE 0x02 /* tentative address */ +#define IN6_IFF_DUPLICATED 0x04 /* DAD detected duplicate */ +#define IN6_IFF_DETACHED 0x08 /* may be detached from the link */ +#define IN6_IFF_DEPRECATED 0x10 /* deprecated address */ + +/* don't perform DAD on this address (used only at first SIOC* call) */ +#define IN6_IFF_NODAD 0x20 + +#define IN6_IFF_AUTOCONF 0x40 /* autoconfigurable address. */ +#define IN6_IFF_TEMPORARY 0x80 /* temporary (anonymous) address. */ +#define IN6_IFF_DYNAMIC 0x100 /* assigned by DHCPv6 service */ +#define IN6_IFF_OPTIMISTIC 0x200 /* optimistic DAD, i.e. RFC 4429 */ +#define IN6_IFF_SECURED 0x400 /* cryptographically generated */ + +/* skip kernel prefix management. XXX: this should be temporary. */ +#define IN6_IFF_NOPFX 0x8000 /* Duplicate Address Detection [DAD] in progress. */ -#define IN6_IFF_DADPROGRESS (IN6_IFF_TENTATIVE|IN6_IFF_OPTIMISTIC) +#define IN6_IFF_DADPROGRESS (IN6_IFF_TENTATIVE|IN6_IFF_OPTIMISTIC) /* do not input/output */ -#define IN6_IFF_NOTREADY (IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED) +#define IN6_IFF_NOTREADY (IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED) #ifdef KERNEL -#define IN6_ARE_SCOPE_CMP(a,b) ((a)-(b)) -#define IN6_ARE_SCOPE_EQUAL(a,b) ((a)==(b)) +#define IN6_ARE_SCOPE_CMP(a, b) ((a) - (b)) +#define IN6_ARE_SCOPE_EQUAL(a, b) ((a) == (b)) #endif /* KERNEL */ -#ifdef XNU_KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE extern struct in6_ifaddr *in6_ifaddrs; -extern struct in6_ifstat **in6_ifstat; -extern size_t in6_ifstatmax; extern struct icmp6stat icmp6stat; -extern struct icmp6_ifstat **icmp6_ifstat; -extern size_t icmp6_ifstatmax; -extern lck_rw_t in6_ifs_rwlock; -#define in6_ifstat_inc(ifp, tag) \ -do { \ - lck_rw_lock_shared(&in6_ifs_rwlock); \ - int _z_index = ifp ? ifp->if_index : 0; \ - if ((_z_index) && _z_index <= if_index \ - && _z_index < (signed)in6_ifstatmax \ - && in6_ifstat && in6_ifstat[_z_index]) { \ - atomic_add_64(&in6_ifstat[_z_index]->tag, 1); \ - } \ - lck_rw_done(&in6_ifs_rwlock); \ +extern lck_rw_t in6_ifaddr_rwlock; +extern lck_mtx_t proxy6_lock; +extern u_char inet6ctlerrmap[]; +extern u_int32_t in6_maxmtu; + +/* N.B.: if_inet6data is never freed once set, so we don't need to lock */ +#define in6_ifstat_inc_common(_ifp, _tag, _atomic) do { \ + if (_ifp != NULL && IN6_IFEXTRA(_ifp) != NULL) { \ + if (_atomic) \ + atomic_add_64( \ + &IN6_IFEXTRA(_ifp)->in6_ifstat._tag, 1); \ + else \ + IN6_IFEXTRA(_ifp)->in6_ifstat._tag++; \ + } \ } while (0) -__private_extern__ lck_rw_t in6_ifaddr_rwlock; -__private_extern__ lck_mtx_t proxy6_lock; +/* atomic version */ +#define in6_ifstat_inc(_ifp, _tag) \ + in6_ifstat_inc_common(_ifp, _tag, TRUE) -extern struct ifqueue ip6intrq; /* IP6 packet input queue */ -extern struct in6_addr zeroin6_addr; -extern u_char inet6ctlerrmap[]; -extern u_int32_t in6_maxmtu; +/* non-atomic version (for fast paths) */ +#define in6_ifstat_inc_na(_ifp, _tag) \ + in6_ifstat_inc_common(_ifp, _tag, FALSE) /* * Macro for finding the internet address structure (in6_ifaddr) corresponding * to a given interface (ifnet structure). */ -#define IFP_TO_IA6(ifp, ia) \ -/* struct ifnet *ifp; */ \ -/* struct in6_ifaddr *ia; */ \ +#define IFP_TO_IA6(ifp, ia) \ + /* struct ifnet *ifp; */ \ + /* struct in6_ifaddr *ia; */ \ do { \ struct ifaddr *_ifa; \ ifnet_lock_assert(ifp, LCK_RW_ASSERT_HELD); \ @@ -744,10 +849,29 @@ struct in6_multi_mship { struct mld_ifinfo; /* + * IPv6 group descriptor. + * + * For every entry on an ifnet's if_multiaddrs list which represents + * an IP multicast group, there is one of these structures. + * + * If any source filters are present, then a node will exist in the RB-tree + * to permit fast lookup by source whenever an operation takes place. + * This permits pre-order traversal when we issue reports. + * Source filter trees are kept separately from the socket layer to + * greatly simplify locking. + * + * When MLDv2 is active, in6m_timer is the response to group query timer. + * The state-change timer in6m_sctimer is separate; whenever state changes + * for the group the state change record is generated and transmitted, + * and kept if retransmissions are necessary. + * * The request count here is a count of requests for this address, not a * count of pointers to this structure. + * + * FUTURE: in6m_entry is now only used when groups are being purged + * on a detaching ifnet. It could be demoted to a SLIST_ENTRY. */ -struct in6_multi { +struct in6_multi { decl_lck_mtx_data(, in6m_lock); u_int32_t in6m_refcount; /* reference count */ u_int32_t in6m_reqcnt; /* request count for this address */ @@ -758,6 +882,7 @@ struct in6_multi { struct ifmultiaddr *in6m_ifma; /* back pointer to ifmultiaddr */ u_int in6m_state; /* state of the membership */ u_int in6m_timer; /* MLD6 listener report timer */ + /* New fields for MLDv2 follow. */ struct mld_ifinfo *in6m_mli; /* MLD info */ SLIST_ENTRY(in6_multi) in6m_dtle; /* detached waiting for rele */ @@ -766,11 +891,11 @@ struct in6_multi { struct ip6_msource_tree in6m_srcs; /* tree of sources */ u_long in6m_nsrc; /* # of tree entries */ - struct ifqueue in6m_scq; /* queue of pending - * state-change packets */ + struct ifqueue in6m_scq; /* pending state-change packets */ struct timeval in6m_lastgsrtv; /* last G-S-R query */ uint16_t in6m_sctimer; /* state-change timer */ uint16_t in6m_scrv; /* state-change rexmit count */ + /* * SSM state counters which track state at T0 (the time the last * state-change report's RV timer went to zero) and T1 @@ -796,7 +921,7 @@ struct in6_multi { #define IN6M_LOCK_ASSERT_NOTHELD(_in6m) \ lck_mtx_assert(&(_in6m)->in6m_lock, LCK_MTX_ASSERT_NOTOWNED) -#define IN6M_LOCK(_in6m) \ +#define IN6M_LOCK(_in6m) \ lck_mtx_lock(&(_in6m)->in6m_lock) #define IN6M_LOCK_SPIN(_in6m) \ @@ -822,10 +947,11 @@ struct in6_multi { #define IN6M_REMREF_LOCKED(_in6m) \ in6m_remref(_in6m, 1) -#define IN6M_TIMER_UNDEF -1 - /* flags to in6_update_ifa */ -#define IN6_IFAUPDATE_DADDELAY 0x1 /* first time to configure an address */ +#define IN6_IFAUPDATE_NOWAIT 0x1 /* don't block allocating memory */ +#define IN6_IFAUPDATE_1STADDR 0x2 /* first address on interface */ +#define IN6_IFAUPDATE_NEWADDR 0x4 /* new address on interface */ +#define IN6_IFAUPDATE_DADDELAY 0x8 /* must delay initial DAD probe */ struct ip6_moptions; struct sockopt; @@ -837,9 +963,9 @@ extern LIST_HEAD(in6_multihead, in6_multi) in6_multihead; * Structure used by macros below to remember position when stepping through * all of the in6_multi records. */ -struct in6_multistep { - struct in6_ifaddr *i_ia; - struct in6_multi *i_in6m; +struct in6_multistep { + struct in6_ifaddr *i_ia; + struct in6_multi *i_in6m; }; /* @@ -857,7 +983,7 @@ struct in6_multistep { * * Must be called with in6_multihead_lock held. */ -#define IN6_LOOKUP_MULTI(addr, ifp, in6m) \ +#define IN6_LOOKUP_MULTI(addr, ifp, in6m) \ /* struct in6_addr *addr; */ \ /* struct ifnet *ifp; */ \ /* struct in6_multi *in6m; */ \ @@ -875,7 +1001,7 @@ do { \ IN6M_UNLOCK(in6m); \ IN6_NEXT_MULTI(_step, in6m); \ } \ -} while(0) +} while (0) /* * Macro to step through all of the in6_multi records, one at a time. @@ -886,7 +1012,7 @@ do { \ * * Must be called with in6_multihead_lock held. */ -#define IN6_NEXT_MULTI(step, in6m) \ +#define IN6_NEXT_MULTI(step, in6m) \ /* struct in6_multistep step; */ \ /* struct in6_multi *in6m; */ \ do { \ @@ -895,7 +1021,7 @@ do { \ (step).i_in6m = (step).i_in6m->in6m_entry.le_next; \ } while (0) -#define IN6_FIRST_MULTI(step, in6m) \ +#define IN6_FIRST_MULTI(step, in6m) \ /* struct in6_multistep step; */ \ /* struct in6_multi *in6m */ \ do { \ @@ -904,6 +1030,11 @@ do { \ IN6_NEXT_MULTI((step), (in6m)); \ } while (0) +extern lck_mtx_t *inet6_domain_mutex; +extern struct domain *inet6domain; + +struct ip6_pktopts; + /* Multicast private KPIs. */ extern int im6o_mc_filter(const struct ip6_moptions *, const struct ifnet *, const struct sockaddr *, const struct sockaddr *); @@ -917,12 +1048,10 @@ extern void in6m_print(const struct in6_multi *); extern int in6m_record_source(struct in6_multi *, const struct in6_addr *); extern int ip6_getmoptions(struct inpcb *, struct sockopt *); extern int ip6_setmoptions(struct inpcb *, struct sockopt *); - /* Legacy KAME multicast private KPIs. */ extern struct in6_multi_mship *in6_joingroup(struct ifnet *, struct in6_addr *, int *, int); extern int in6_leavegroup(struct in6_multi_mship *); - extern void in6_multi_init(void); extern void in6m_addref(struct in6_multi *, int); extern void in6m_remref(struct in6_multi *, int); @@ -932,8 +1061,8 @@ extern int in6_mask2len(struct in6_addr *, u_char *); extern void in6_len2mask(struct in6_addr *, int); extern int in6_control(struct socket *, u_long, caddr_t, struct ifnet *, struct proc *); -extern int in6_update_ifa(struct ifnet *, struct in6_aliasreq *, - struct in6_ifaddr *, int, int); +extern int in6_update_ifa(struct ifnet *, struct in6_aliasreq *, int, + struct in6_ifaddr **); extern void in6_purgeaddr(struct ifaddr *); extern int in6if_do_dad(struct ifnet *); extern void in6_purgeif(struct ifnet *); @@ -944,6 +1073,9 @@ extern void in6_purgemkludge(struct ifnet *); extern struct in6_ifaddr *in6ifa_ifpforlinklocal(struct ifnet *, int); extern struct in6_ifaddr *in6ifa_ifpwithaddr(struct ifnet *, struct in6_addr *); extern struct in6_ifaddr *in6ifa_prproxyaddr(struct in6_addr *); +extern void in6ifa_getlifetime(struct in6_ifaddr *, + struct in6_addrlifetime *, int); +extern void in6ifa_setlifetime(struct in6_ifaddr *, struct in6_addrlifetime *); extern char *ip6_sprintf(const struct in6_addr *); extern int in6_addr2scopeid(struct ifnet *, struct in6_addr *); extern int in6_matchlen(struct in6_addr *, struct in6_addr *); @@ -954,17 +1086,11 @@ extern int in6_prefix_add_ifid(int iilen, struct in6_ifaddr *ia); extern void in6_prefix_remove_ifid(int iilen, struct in6_ifaddr *ia); extern void in6_purgeprefix(struct ifnet *); extern void in6_purgeaddrs(struct ifnet *); - extern int in6_is_addr_deprecated(struct sockaddr_in6 *); extern uint8_t im6s_get_mode(const struct in6_multi *, const struct ip6_msource *, uint8_t); - extern void im6f_leave(struct in6_mfilter *); extern void im6f_purge(struct in6_mfilter *); - -struct inpcb; -struct ip6_pktopts; - extern int in6_embedscope(struct in6_addr *, const struct sockaddr_in6 *, struct inpcb *, struct ifnet **, struct ip6_pktopts *); extern int in6_recoverscope(struct sockaddr_in6 *, const struct in6_addr *, @@ -974,14 +1100,26 @@ extern void in6_aliasreq_64_to_32(struct in6_aliasreq_64 *, extern void in6_aliasreq_32_to_64(struct in6_aliasreq_32 *, struct in6_aliasreq_64 *); extern void in6_ifaddr_init(void); +extern int in6_inithead(void **, int); extern void in6_rtqdrain(void); extern struct radix_node *in6_validate(struct radix_node *); extern int in6_if2idlen(struct ifnet *); -extern int in6_src_ioctl (u_long, caddr_t); - -__private_extern__ void in6_multihead_lock_exclusive(void); -__private_extern__ void in6_multihead_lock_shared(void); -__private_extern__ void in6_multihead_lock_assert(int); -__private_extern__ void in6_multihead_lock_done(void); -#endif /* XNU_KERNEL_PRIVATE */ +extern int in6_src_ioctl(u_long, caddr_t); +extern void in6_multihead_lock_exclusive(void); +extern void in6_multihead_lock_shared(void); +extern void in6_multihead_lock_assert(int); +extern void in6_multihead_lock_done(void); + +extern void in6_cga_init(void); +extern void in6_cga_node_lock(void); +extern void in6_cga_node_unlock(void); +extern void in6_cga_query(struct in6_cga_nodecfg *); +extern int in6_cga_start(const struct in6_cga_nodecfg *); +extern int in6_cga_stop(void); +extern ssize_t in6_cga_parameters_prepare(void *, size_t, + const struct in6_addr *, u_int8_t, const struct in6_cga_modifier *); +extern int in6_cga_generate(const struct in6_cga_prepare *, u_int8_t, + struct in6_addr *); + +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET6_IN6_VAR_H_ */ diff --git a/bsd/netinet6/ip6_ecn.h b/bsd/netinet6/ip6_ecn.h index 125a95581..2506dd3af 100644 --- a/bsd/netinet6/ip6_ecn.h +++ b/bsd/netinet6/ip6_ecn.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Apple Inc. All rights reserved. + * Copyright (c) 2008-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -64,7 +64,7 @@ */ #include -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE extern void ip6_ecn_ingress(int, u_int32_t *, const u_int32_t *); extern void ip6_ecn_egress(int, const u_int32_t *, u_int32_t *); -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ diff --git a/bsd/netinet6/ip6_forward.c b/bsd/netinet6/ip6_forward.c index 42156858e..6fdaa1069 100644 --- a/bsd/netinet6/ip6_forward.c +++ b/bsd/netinet6/ip6_forward.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2011 Apple Inc. All rights reserved. + * Copyright (c) 2009-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -128,19 +128,36 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, int error, type = 0, code = 0; boolean_t proxy = FALSE; struct mbuf *mcopy = NULL; - struct ifnet *ifp, *origifp; /* maybe unnecessary */ - u_int32_t inzone, outzone; + struct ifnet *ifp, *rcvifp, *origifp; /* maybe unnecessary */ + u_int32_t inzone, outzone, len; struct in6_addr src_in6, dst_in6; + uint64_t curtime = net_uptime(); #if IPSEC struct secpolicy *sp = NULL; #endif - struct timeval timenow; unsigned int ifscope = IFSCOPE_NONE; #if PF struct pf_mtag *pf_mtag; #endif /* PF */ - getmicrotime(&timenow); + /* + * In the prefix proxying case, the route to the proxied node normally + * gets created by nd6_prproxy_ns_output(), as part of forwarding a + * NS (NUD/AR) packet to the proxied node. In the event that such + * packet did not arrive in time before the correct route gets created, + * ip6_input() would have performed a rtalloc() which most likely will + * create the wrong cloned route; this route points back to the same + * interface as the inbound interface, since the parent non-scoped + * prefix route points there. Therefore we check if that is the case + * and perform the necessary fixup to get the correct route installed. + */ + if (!srcrt && nd6_prproxy && + (rt = ip6forward_rt->ro_rt) != NULL && (rt->rt_flags & RTF_PROXY)) { + nd6_proxy_find_fwdroute(m->m_pkthdr.rcvif, ip6forward_rt); + if ((rt = ip6forward_rt->ro_rt) != NULL) + ifscope = rt->rt_ifp->if_index; + } + #if PF pf_mtag = pf_find_mtag(m); if (pf_mtag != NULL && pf_mtag->pftag_rtableid != IFSCOPE_NONE) @@ -155,8 +172,8 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, RT_LOCK(rt); if (rt->rt_ifp->if_index != ifscope) { RT_UNLOCK(rt); - rtfree(rt); - rt = ip6forward_rt->ro_rt = NULL; + ROUTE_RELEASE(ip6forward_rt); + rt = NULL; } else { RT_UNLOCK(rt); } @@ -191,8 +208,8 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { ip6stat.ip6s_cantforward++; /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */ - if (ip6_log_time + ip6_log_interval < timenow.tv_sec) { - ip6_log_time = timenow.tv_sec; + if (ip6_log_time + ip6_log_interval < curtime) { + ip6_log_time = curtime; log(LOG_DEBUG, "cannot forward " "from %s to %s nxt %d received on %s\n", @@ -335,14 +352,14 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, error = ipsec6_output_tunnel(&state, sp, 0); key_freesp(sp, KEY_SADB_UNLOCKED); - if (state.tunneled == 4) + if (state.tunneled == 4) { + ROUTE_RELEASE(&state.ro); return (NULL); /* packet is gone - sent over IPv4 */ - - m = state.m; - if (state.ro.ro_rt) { - rtfree(state.ro.ro_rt); - state.ro.ro_rt = NULL; } + + m = state.m; + ROUTE_RELEASE(&state.ro); + if (error) { /* mbuf is already reclaimed in ipsec6_output_tunnel. */ switch (error) { @@ -371,8 +388,8 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, return (NULL); } } - skip_ipsec: #endif /* IPSEC */ + skip_ipsec: dst = (struct sockaddr_in6 *)&ip6forward_rt->ro_dst; if ((rt = ip6forward_rt->ro_rt) != NULL) { @@ -381,19 +398,19 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, RT_ADDREF_LOCKED(rt); } + VERIFY(rt == NULL || rt == ip6forward_rt->ro_rt); if (!srcrt) { /* * ip6forward_rt->ro_dst.sin6_addr is equal to ip6->ip6_dst */ - if (rt == NULL || !(rt->rt_flags & RTF_UP) || - rt->generation_id != route_generation) { + if (ROUTE_UNUSABLE(ip6forward_rt)) { if (rt != NULL) { /* Release extra ref */ RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); - rtfree(rt); - ip6forward_rt->ro_rt = NULL; } + ROUTE_RELEASE(ip6forward_rt); + /* this probably fails but give it a try again */ rtalloc_scoped_ign((struct route *)ip6forward_rt, RTF_PRCLONING, ifscope); @@ -414,16 +431,15 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, return (NULL); } RT_LOCK_ASSERT_HELD(rt); - } else if (rt == NULL || !(rt->rt_flags & RTF_UP) || - !IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &dst->sin6_addr) || - rt->generation_id != route_generation) { + } else if (ROUTE_UNUSABLE(ip6forward_rt) || + !IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &dst->sin6_addr)) { if (rt != NULL) { /* Release extra ref */ RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); - rtfree(rt); - ip6forward_rt->ro_rt = NULL; } + ROUTE_RELEASE(ip6forward_rt); + bzero(dst, sizeof(*dst)); dst->sin6_len = sizeof(struct sockaddr_in6); dst->sin6_family = AF_INET6; @@ -475,8 +491,8 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, ip6stat.ip6s_badscope++; in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard); - if (ip6_log_time + ip6_log_interval < timenow.tv_sec) { - ip6_log_time = timenow.tv_sec; + if (ip6_log_time + ip6_log_interval < curtime) { + ip6_log_time = curtime; log(LOG_DEBUG, "cannot forward " "src %s, dst %s, nxt %d, rcvif %s, outif %s\n", @@ -652,10 +668,19 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, } /* we can just use rcvif in forwarding. */ - origifp = m->m_pkthdr.rcvif; - } - else + origifp = rcvifp = m->m_pkthdr.rcvif; + } else if (nd6_prproxy) { + /* + * In the prefix proxying case, we need to inform nd6_output() + * about the inbound interface, so that any subsequent NS + * packets generated by nd6_prproxy_ns_output() will not be + * sent back to that same interface. + */ + origifp = rcvifp = m->m_pkthdr.rcvif; + } else { + rcvifp = m->m_pkthdr.rcvif; origifp = rt->rt_ifp; + } /* * clear embedded scope identifiers if necessary. * in6_clearscope will touch the addresses only when necessary. @@ -671,7 +696,7 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, * If this is to be processed locally, let ip6_input have it. */ if (proxy) { - VERIFY(m->m_pkthdr.aux_flags & MAUXF_PROXY_DST); + VERIFY(m->m_pkthdr.pkt_flags & PKTF_PROXY_DST); /* Release extra ref */ RT_REMREF(rt); if (mcopy != NULL) @@ -694,11 +719,23 @@ ip6_forward(struct mbuf *m, struct route_in6 *ip6forward_rt, ip6 = mtod(m, struct ip6_hdr *); #endif /* PF */ + /* Mark this packet as being forwarded from another interface */ + m->m_pkthdr.pkt_flags |= PKTF_FORWARDED; + len = m_pktlen(m); + error = nd6_output(ifp, origifp, m, dst, rt, NULL); if (error) { in6_ifstat_inc(ifp, ifs6_out_discard); ip6stat.ip6s_cantforward++; } else { + /* + * Increment stats on the source interface; the ones + * for destination interface has been taken care of + * during output above by virtue of PKTF_FORWARDED. + */ + rcvifp->if_fpackets++; + rcvifp->if_fbytes += len; + ip6stat.ip6s_forward++; in6_ifstat_inc(ifp, ifs6_out_forward); if (type) diff --git a/bsd/netinet6/ip6_fw.h b/bsd/netinet6/ip6_fw.h index 92f913f29..1debe3fb2 100644 --- a/bsd/netinet6/ip6_fw.h +++ b/bsd/netinet6/ip6_fw.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2011 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -149,7 +149,7 @@ struct ip6_fw { /* count of 0 means match all ports) */ }; -#if defined(KERNEL_PRIVATE) +#if defined(BSD_KERNEL_PRIVATE) #pragma pack(4) struct ip6_fw_32 { @@ -217,7 +217,7 @@ struct ip6_fw_64 { }; -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #define IPV6_FW_GETNSRCP(rule) ((rule)->fw_nports & 0x0f) #define IPV6_FW_SETNSRCP(rule, n) do { \ @@ -321,7 +321,7 @@ struct ip6_fw_chain { /* * Main firewall chains definitions and global var's definitions. */ -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #define M_IP6FW M_IPFW @@ -342,7 +342,7 @@ extern ip6_fw_chk_t *ip6_fw_chk_ptr; extern ip6_fw_ctl_t *ip6_fw_ctl_ptr; extern int ip6_fw_enable; -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* __APPLE_API_OBSOLETE */ #endif /* _IP6_FW_H */ diff --git a/bsd/netinet6/ip6_id.c b/bsd/netinet6/ip6_id.c index 26fffd286..eab70a6c0 100644 --- a/bsd/netinet6/ip6_id.c +++ b/bsd/netinet6/ip6_id.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2009 Apple Inc. All rights reserved. + * Copyright (c) 2009-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ @@ -53,8 +53,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $KAME: ip6_id.c,v 1.13 2003/09/16 09:11:19 itojun Exp $ */ /*- @@ -90,8 +88,6 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $OpenBSD: ip_id.c,v 1.6 2002/03/15 18:19:52 millert Exp $ */ #include @@ -122,7 +118,9 @@ #include #include #include +#include #include +#include #include #include @@ -130,10 +128,6 @@ #include #include -#ifndef INT32_MAX -#define INT32_MAX 0x7fffffffU -#endif - struct randomtab { const int ru_bits; /* resulting bits */ const long ru_out; /* Time after wich will be reseeded */ @@ -215,26 +209,24 @@ pmod(u_int32_t gen, u_int32_t expo, u_int32_t mod) static void initid(struct randomtab *p) { + time_t curtime = (time_t)net_uptime(); u_int32_t j, i; int noprime = 1; - struct timeval timenow; - - getmicrotime(&timenow); - p->ru_x = random() % p->ru_m; + p->ru_x = RandomULong() % p->ru_m; /* (bits - 1) bits of random seed */ - p->ru_seed = random() & (~0U >> (32 - p->ru_bits + 1)); - p->ru_seed2 = random() & (~0U >> (32 - p->ru_bits + 1)); + p->ru_seed = RandomULong() & (~0U >> (32 - p->ru_bits + 1)); + p->ru_seed2 = RandomULong() & (~0U >> (32 - p->ru_bits + 1)); /* Determine the LCG we use */ - p->ru_b = (random() & (~0U >> (32 - p->ru_bits))) | 1; + p->ru_b = (RandomULong() & (~0U >> (32 - p->ru_bits))) | 1; p->ru_a = pmod(p->ru_agen, - (random() & (~0U >> (32 - p->ru_bits))) & (~1U), p->ru_m); + (RandomULong() & (~0U >> (32 - p->ru_bits))) & (~1U), p->ru_m); while (p->ru_b % 3 == 0) p->ru_b += 2; - j = random() % p->ru_n; + j = RandomULong() % p->ru_n; /* * Do a fast gcd(j, RU_N - 1), so we can find a j with @@ -255,23 +247,21 @@ initid(struct randomtab *p) p->ru_g = pmod(p->ru_gen, j, p->ru_n); p->ru_counter = 0; - p->ru_reseed = timenow.tv_sec + p->ru_out; + p->ru_reseed = curtime + p->ru_out; p->ru_msb = p->ru_msb ? 0 : (1U << (p->ru_bits - 1)); } static u_int32_t randomid(struct randomtab *p) { + time_t curtime = (time_t)net_uptime(); int i, n; u_int32_t tmp; - struct timeval timenow; - - getmicrotime(&timenow); - if (p->ru_counter >= p->ru_max || timenow.tv_sec > p->ru_reseed) + if (p->ru_counter >= p->ru_max || curtime > p->ru_reseed) initid(p); - tmp = random(); + tmp = RandomULong(); /* Skip a random number of ids */ n = tmp & 0x3; tmp = tmp >> 2; @@ -280,25 +270,25 @@ randomid(struct randomtab *p) for (i = 0; i <= n; i++) { /* Linear Congruential Generator */ - p->ru_x = (u_int32_t)((u_int64_t)p->ru_a * p->ru_x + p->ru_b) % p->ru_m; + p->ru_x = ((u_int64_t)p->ru_a * p->ru_x + p->ru_b) % p->ru_m; } p->ru_counter += i; - return (p->ru_seed ^ pmod(p->ru_g, p->ru_seed2 ^ p->ru_x, p->ru_n)) | - p->ru_msb; + return ((p->ru_seed ^ pmod(p->ru_g, p->ru_seed2 ^ p->ru_x, p->ru_n)) | + p->ru_msb); } u_int32_t ip6_randomid(void) { - return randomid(&randomtab_32); + return (randomid(&randomtab_32)); } u_int32_t ip6_randomflowlabel(void) { - return randomid(&randomtab_20) & 0xfffff; + return (randomid(&randomtab_20) & 0xfffff); } diff --git a/bsd/netinet6/ip6_input.c b/bsd/netinet6/ip6_input.c index 118aa245a..bef42ee38 100644 --- a/bsd/netinet6/ip6_input.c +++ b/bsd/netinet6/ip6_input.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * Copyright (c) 2003-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,11 +22,9 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* $FreeBSD: src/sys/netinet6/ip6_input.c,v 1.11.2.10 2001/07/24 19:10:18 brooks Exp $ */ -/* $KAME: ip6_input.c,v 1.194 2001/05/27 13:28:35 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -92,7 +90,6 @@ * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 */ - #include #include #include @@ -109,9 +106,11 @@ #include #include #include -#include +#include +#include #include +#include #include #include @@ -121,13 +120,15 @@ #include #include #include +#include #include #include #if INET #include #include -#endif /*INET*/ +#endif /* INET */ +#include #include #include #include @@ -136,41 +137,31 @@ #include #include #include -#include +#include #if IPSEC #include -#if INET6 #include -#endif extern int ipsec_bypass; -#endif +#endif /* IPSEC */ +#if IPFW2 #include +#endif /* IPFW2 */ #if DUMMYNET #include #include #endif /* DUMMYNET */ -#include - -#include - /* we need it for NLOOP. */ #include "loop.h" -#include - #if PF #include #endif /* PF */ -extern struct domain inet6domain; -extern struct ip6protosw inet6sw[]; - -struct ip6protosw * ip6_protox[IPPROTO_MAX]; -static int ip6qmaxlen = IFQ_MAXLEN; +struct ip6protosw *ip6_protox[IPPROTO_MAX]; static lck_grp_attr_t *in6_ifaddr_rwlock_grp_attr; static lck_grp_t *in6_ifaddr_rwlock_grp; @@ -180,17 +171,10 @@ decl_lck_rw_data(, in6_ifaddr_rwlock); /* Protected by in6_ifaddr_rwlock */ struct in6_ifaddr *in6_ifaddrs = NULL; -int ip6_forward_srcrt; /* XXX */ -int ip6_sourcecheck; /* XXX */ -int ip6_sourcecheck_interval; /* XXX */ -const int int6intrq_present = 1; - -int ip6_ours_check_algorithm; - -#define IN6_IFSTAT_REQUIRE_ALIGNED_64(f) \ +#define IN6_IFSTAT_REQUIRE_ALIGNED_64(f) \ _CASSERT(!(offsetof(struct in6_ifstat, f) % sizeof (uint64_t))) -#define ICMP6_IFSTAT_REQUIRE_ALIGNED_64(f) \ +#define ICMP6_IFSTAT_REQUIRE_ALIGNED_64(f) \ _CASSERT(!(offsetof(struct icmp6_ifstat, f) % sizeof (uint64_t))) #if IPFW2 @@ -198,59 +182,49 @@ int ip6_ours_check_algorithm; ip6_fw_chk_t *ip6_fw_chk_ptr; ip6_fw_ctl_t *ip6_fw_ctl_ptr; int ip6_fw_enable = 1; -#endif +#endif /* IPFW2 */ struct ip6stat ip6stat; -#ifdef __APPLE__ -struct ifqueue ip6intrq; -decl_lck_mtx_data(, ip6_init_mutex); decl_lck_mtx_data(, proxy6_lock); -decl_lck_mtx_data(, dad6_mutex_data); -decl_lck_mtx_data(, nd6_mutex_data); -decl_lck_mtx_data(, prefix6_mutex_data); -decl_lck_mtx_data(, scope6_mutex_data); +decl_lck_mtx_data(static, dad6_mutex_data); +decl_lck_mtx_data(static, nd6_mutex_data); +decl_lck_mtx_data(static, prefix6_mutex_data); lck_mtx_t *dad6_mutex = &dad6_mutex_data; lck_mtx_t *nd6_mutex = &nd6_mutex_data; lck_mtx_t *prefix6_mutex = &prefix6_mutex_data; -lck_mtx_t *scope6_mutex = &scope6_mutex_data; #ifdef ENABLE_ADDRSEL -decl_lck_mtx_data(, addrsel_mutex_data); +decl_lck_mtx_data(static, addrsel_mutex_data); lck_mtx_t *addrsel_mutex = &addrsel_mutex_data; #endif -decl_lck_rw_data(, in6_ifs_rwlock); -decl_lck_rw_data(, icmp6_ifs_rwlock); -lck_attr_t *ip6_mutex_attr; -lck_grp_t *ip6_mutex_grp; -lck_grp_attr_t *ip6_mutex_grp_attr; -extern lck_mtx_t *inet6_domain_mutex; -#endif +static lck_attr_t *ip6_mutex_attr; +static lck_grp_t *ip6_mutex_grp; +static lck_grp_attr_t *ip6_mutex_grp_attr; + extern int loopattach_done; extern void addrsel_policy_init(void); static void ip6_init_delayed(void); -static struct ip6aux *ip6_setdstifaddr(struct mbuf *, struct in6_ifaddr *); - static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *); #if PULLDOWN_TEST static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int); #endif -#ifdef __APPLE__ -void gifattach(void); -void stfattach(void); -#endif +#if NSTF +extern void stfattach(void); +#endif /* NSTF */ SYSCTL_DECL(_net_inet6_ip6); -int ip6_doscopedroute = 1; -SYSCTL_INT(_net_inet6_ip6, OID_AUTO, scopedroute, CTLFLAG_RD | CTLFLAG_LOCKED, - &ip6_doscopedroute, 0, "Enable IPv6 scoped routing"); +int ip6_doscopedroute = 1; +SYSCTL_INT(_net_inet6_ip6, OID_AUTO, scopedroute, + CTLFLAG_RD | CTLFLAG_LOCKED, &ip6_doscopedroute, 0, + "Enable IPv6 scoped routing"); -int ip6_restrictrecvif = 1; -SYSCTL_INT(_net_inet6_ip6, OID_AUTO, restrictrecvif, - CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_restrictrecvif, 0, - "Enable inbound interface restrictions"); +static uint32_t ip6_adj_clear_hwcksum = 0; +SYSCTL_UINT(_net_inet6_ip6, OID_AUTO, adj_clear_hwcksum, + CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_adj_clear_hwcksum, 0, + "Invalidate hwcksum info when adjusting length"); /* * On platforms which require strict alignment (currently for anything but @@ -272,14 +246,14 @@ SYSCTL_INT(_net_inet6_ip6, OID_AUTO, restrictrecvif, struct ifnet *__ifp = (_ifp); \ atomic_add_64(&(__ifp)->if_alignerrs, 1); \ if (((_m)->m_flags & M_PKTHDR) && \ - (_m)->m_pkthdr.header != NULL) \ - (_m)->m_pkthdr.header = NULL; \ + (_m)->m_pkthdr.pkt_hdr != NULL) \ + (_m)->m_pkthdr.pkt_hdr = NULL; \ _n = m_defrag_offset(_m, max_linkhdr, M_NOWAIT); \ if (_n == NULL) { \ ip6stat.ip6s_toosmall++; \ m_freem(_m); \ (_m) = NULL; \ - _action \ + _action; \ } else { \ VERIFY(_n != (_m)); \ (_m) = _n; \ @@ -289,10 +263,9 @@ SYSCTL_INT(_net_inet6_ip6, OID_AUTO, restrictrecvif, #endif /* !__i386__ && !__x86_64__ */ static void -ip6_proto_input( - __unused protocol_family_t protocol, - mbuf_t packet) +ip6_proto_input(protocol_family_t protocol, mbuf_t packet) { +#pragma unused(protocol) ip6_input(packet); } @@ -301,32 +274,49 @@ ip6_proto_input( * All protocols not implemented in kernel go to raw IP6 protocol handler. */ void -ip6_init() +ip6_init(struct ip6protosw *pp, struct domain *dp) { - struct ip6protosw *pr; - int i; + static int ip6_initialized = 0; + struct protosw *pr; struct timeval tv; + int i; + domain_unguard_t unguard; + + domain_proto_mtx_lock_assert_held(); + VERIFY((pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED); + + _CASSERT((sizeof (struct ip6_hdr) + + sizeof (struct icmp6_hdr)) <= _MHLEN); - _CASSERT((sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr)) <= - _MHLEN); + if (ip6_initialized) + return; + ip6_initialized = 1; PE_parse_boot_argn("net.inet6.ip6.scopedroute", &ip6_doscopedroute, sizeof (ip6_doscopedroute)); -#if DIAGNOSTIC - if (sizeof(struct protosw) != sizeof(struct ip6protosw)) - panic("sizeof(protosw) != sizeof(ip6protosw)"); -#endif - pr = (struct ip6protosw *)pffindproto_locked(PF_INET6, IPPROTO_RAW, SOCK_RAW); - if (pr == 0) - panic("ip6_init"); + pr = pffindproto_locked(PF_INET6, IPPROTO_RAW, SOCK_RAW); + if (pr == NULL) { + panic("%s: Unable to find [PF_INET6,IPPROTO_RAW,SOCK_RAW]\n", + __func__); + /* NOTREACHED */ + } + + /* Initialize the entire ip6_protox[] array to IPPROTO_RAW. */ for (i = 0; i < IPPROTO_MAX; i++) - ip6_protox[i] = pr; - for (pr = (struct ip6protosw*)inet6domain.dom_protosw; pr; pr = pr->pr_next) { - if(!(pr->pr_domain)) continue; /* If uninitialized, skip */ - if (pr->pr_domain->dom_family == PF_INET6 && - pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { - ip6_protox[pr->pr_protocol] = pr; + ip6_protox[i] = (struct ip6protosw *)pr; + /* + * Cycle through IP protocols and put them into the appropriate place + * in ip6_protox[], skipping protocols IPPROTO_{IP,RAW}. + */ + VERIFY(dp == inet6domain && dp->dom_family == PF_INET6); + TAILQ_FOREACH(pr, &dp->dom_protosw, pr_entry) { + VERIFY(pr->pr_domain == dp); + if (pr->pr_protocol != 0 && pr->pr_protocol != IPPROTO_RAW) { + /* Be careful to only index valid IP protocols. */ + if (pr->pr_protocol < IPPROTO_MAX) + ip6_protox[pr->pr_protocol] = + (struct ip6protosw *)pr; } } @@ -338,21 +328,13 @@ ip6_init() lck_mtx_init(dad6_mutex, ip6_mutex_grp, ip6_mutex_attr); lck_mtx_init(nd6_mutex, ip6_mutex_grp, ip6_mutex_attr); lck_mtx_init(prefix6_mutex, ip6_mutex_grp, ip6_mutex_attr); - lck_mtx_init(scope6_mutex, ip6_mutex_grp, ip6_mutex_attr); + scope6_init(ip6_mutex_grp, ip6_mutex_attr); #ifdef ENABLE_ADDRSEL lck_mtx_init(addrsel_mutex, ip6_mutex_grp, ip6_mutex_attr); #endif lck_mtx_init(&proxy6_lock, ip6_mutex_grp, ip6_mutex_attr); - lck_mtx_init(&ip6_init_mutex, ip6_mutex_grp, ip6_mutex_attr); - - lck_rw_init(&in6_ifs_rwlock, ip6_mutex_grp, ip6_mutex_attr); - lck_rw_init(&icmp6_ifs_rwlock, ip6_mutex_grp, ip6_mutex_attr); - - inet6domain.dom_flags = DOM_REENTRANT; - - ip6intrq.ifq_maxlen = ip6qmaxlen; in6_ifaddr_rwlock_grp_attr = lck_grp_attr_alloc_init(); in6_ifaddr_rwlock_grp = lck_grp_alloc_init("in6_ifaddr_rwlock", @@ -380,7 +362,7 @@ ip6_init() IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_reass_ok); IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_reass_fail); IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_mcast); - IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_mcast); + IN6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_mcast); ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_msg); ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_in_error); @@ -418,20 +400,16 @@ ip6_init() ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_mldreport); ICMP6_IFSTAT_REQUIRE_ALIGNED_64(ifs6_out_mlddone); + getmicrotime(&tv); + ip6_desync_factor = + (RandomULong() ^ tv.tv_usec) % MAX_TEMP_DESYNC_FACTOR; + in6_ifaddr_init(); ip6_moptions_init(); nd6_init(); frag6_init(); - icmp6_init(); + icmp6_init(NULL, dp); addrsel_policy_init(); - /* - * in many cases, random() here does NOT return random number - * as initialization during bootstrap time occur in fixed order. - */ - microtime(&tv); - ip6_flow_seq = random() ^ tv.tv_usec; - microtime(&tv); - ip6_desync_factor = (random() ^ tv.tv_usec) % MAX_TEMP_DESYNC_FACTOR; /* * P2P interfaces often route the local address to the loopback @@ -440,64 +418,84 @@ ip6_init() */ net_init_add(ip6_init_delayed); - domain_proto_mtx_unlock(TRUE); - proto_register_input(PF_INET6, ip6_proto_input, NULL, 0); - domain_proto_mtx_lock(); + unguard = domain_unguard_deploy(); + i = proto_register_input(PF_INET6, ip6_proto_input, NULL, 0); + if (i != 0) { + panic("%s: failed to register PF_INET6 protocol: %d\n", + __func__, i); + /* NOTREACHED */ + } + domain_unguard_release(unguard); } static void ip6_init_delayed(void) { - (void) in6_ifattach(lo_ifp, NULL, NULL); - -#ifdef __APPLE__ - /* nd6_timer_init */ - timeout(nd6_timer, (caddr_t)0, hz); + (void) in6_ifattach_prelim(lo_ifp); /* timer for regeneranation of temporary addresses randomize ID */ - timeout(in6_tmpaddrtimer, (caddr_t)0, - (ip6_temp_preferred_lifetime - ip6_desync_factor - - ip6_temp_regen_advance) * hz); + timeout(in6_tmpaddrtimer, NULL, + (ip6_temp_preferred_lifetime - ip6_desync_factor - + ip6_temp_regen_advance) * hz); -#if NGIF - gifattach(); -#endif #if NSTF stfattach(); -#endif -#endif /* __APPLE__ */ +#endif /* NSTF */ } void ip6_input(struct mbuf *m) { struct ip6_hdr *ip6; - int off = sizeof(struct ip6_hdr), nest; + int off = sizeof (struct ip6_hdr), nest; u_int32_t plen; u_int32_t rtalert = ~0; int nxt = 0, ours = 0; - struct ifnet *deliverifp = NULL; - ipfilter_t inject_ipfref = 0; + struct ifnet *inifp, *deliverifp = NULL; + ipfilter_t inject_ipfref = NULL; int seen; struct in6_ifaddr *ia6 = NULL; - struct route_in6 ip6_forward_rt; struct sockaddr_in6 *dst6; #if DUMMYNET - struct m_tag *tag; - struct ip_fw_args args; - - bzero(&args, sizeof(struct ip_fw_args)); + struct m_tag *tag; #endif /* DUMMYNET */ + struct { + struct route_in6 rin6; +#if DUMMYNET + struct ip_fw_args args; +#endif /* DUMMYNET */ + } ip6ibz; +#define rin6 ip6ibz.rin6 +#define args ip6ibz.args - bzero(&ip6_forward_rt, sizeof(ip6_forward_rt)); + /* zero out {rin6, args} */ + bzero(&ip6ibz, sizeof (ip6ibz)); - /* Check if the packet we received is valid after interface filter + /* + * Check if the packet we received is valid after interface filter * processing */ MBUF_INPUT_CHECK(m, m->m_pkthdr.rcvif); + inifp = m->m_pkthdr.rcvif; + VERIFY(inifp != NULL); /* Perform IP header alignment fixup, if needed */ - IP6_HDR_ALIGNMENT_FIXUP(m, m->m_pkthdr.rcvif, return;); + IP6_HDR_ALIGNMENT_FIXUP(m, inifp, return); + + m->m_pkthdr.pkt_flags &= ~PKTF_FORWARDED; +#if IPSEC + /* + * should the inner packet be considered authentic? + * see comment in ah4_input(). + */ + m->m_flags &= ~M_AUTHIPHDR; + m->m_flags &= ~M_AUTHIPDGM; +#endif /* IPSEC */ + + /* + * make sure we don't have onion peering information into m_aux. + */ + ip6_delaux(m); #if DUMMYNET if ((tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, @@ -505,12 +503,12 @@ ip6_input(struct mbuf *m) struct dn_pkt_tag *dn_tag; dn_tag = (struct dn_pkt_tag *)(tag+1); - + args.fwa_pf_rule = dn_tag->dn_pf_rule; - + m_tag_delete(m, tag); } - + if (args.fwa_pf_rule) { ip6 = mtod(m, struct ip6_hdr *); /* In case PF got disabled */ @@ -519,70 +517,50 @@ ip6_input(struct mbuf *m) #endif /* DUMMYNET */ /* - * No need to proccess packet twice if we've - * already seen it + * No need to proccess packet twice if we've already seen it. */ inject_ipfref = ipf_get_inject_filter(m); - if (inject_ipfref != 0) { + if (inject_ipfref != NULL) { ip6 = mtod(m, struct ip6_hdr *); nxt = ip6->ip6_nxt; seen = 0; goto injectit; - } else + } else { seen = 1; - -#if IPSEC - /* - * should the inner packet be considered authentic? - * see comment in ah4_input(). - */ - if (m) { - m->m_flags &= ~M_AUTHIPHDR; - m->m_flags &= ~M_AUTHIPDGM; } -#endif - - /* - * make sure we don't have onion peering information into m_aux. - */ - ip6_delaux(m); /* * mbuf statistics */ if (m->m_flags & M_EXT) { - if (m->m_next) + if (m->m_next != NULL) ip6stat.ip6s_mext2m++; else ip6stat.ip6s_mext1++; } else { -#define M2MMAX (sizeof(ip6stat.ip6s_m2m)/sizeof(ip6stat.ip6s_m2m[0])) - if (m->m_next) { - if (m->m_flags & M_LOOP) { - ip6stat.ip6s_m2m[ifnet_index(lo_ifp)]++; /* XXX */ - } else if (m->m_pkthdr.rcvif->if_index < M2MMAX) - ip6stat.ip6s_m2m[m->m_pkthdr.rcvif->if_index]++; - else +#define M2MMAX (sizeof (ip6stat.ip6s_m2m) / sizeof (ip6stat.ip6s_m2m[0])) + if (m->m_next != NULL) { + if (m->m_pkthdr.pkt_flags & PKTF_LOOP) { + /* XXX */ + ip6stat.ip6s_m2m[ifnet_index(lo_ifp)]++; + } else if (inifp->if_index < M2MMAX) { + ip6stat.ip6s_m2m[inifp->if_index]++; + } else { ip6stat.ip6s_m2m[0]++; - } else + } + } else { ip6stat.ip6s_m1++; + } #undef M2MMAX } /* - * Drop the packet if IPv6 operation is disabled on the IF; - * accessing the flag is done without acquiring nd_ifinfo lock - * for performance reasons. + * Drop the packet if IPv6 operation is disabled on the interface. */ - lck_rw_lock_shared(nd_if_rwlock); - if (m->m_pkthdr.rcvif->if_index < nd_ifinfo_indexlim && - (nd_ifinfo[m->m_pkthdr.rcvif->if_index].flags & ND6_IFF_IFDISABLED)) { - lck_rw_done(nd_if_rwlock); + if (inifp->if_eflags & IFEF_IPV6_DISABLED) goto bad; - } - lck_rw_done(nd_if_rwlock); - in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive); + in6_ifstat_inc_na(inifp, ifs6_in_receive); ip6stat.ip6s_total++; #ifndef PULLDOWN_TEST @@ -591,7 +569,7 @@ ip6_input(struct mbuf *m) * that does not conform to KAME requirement. too bad. * XXX: fails to join if interface MTU > MCLBYTES. jumbogram? */ - if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) { + if (m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) { struct mbuf *n; MGETHDR(n, M_DONTWAIT, MT_HEADER); /* MAC-OK */ @@ -612,14 +590,11 @@ ip6_input(struct mbuf *m) m_freem(m); m = n; } - IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), - {goto done;}); + IP6_EXTHDR_CHECK(m, 0, sizeof (struct ip6_hdr), { goto done; }); #endif - if (m->m_len < sizeof(struct ip6_hdr)) { - struct ifnet *inifp; - inifp = m->m_pkthdr.rcvif; - if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == 0) { + if (m->m_len < sizeof (struct ip6_hdr)) { + if ((m = m_pullup(m, sizeof (struct ip6_hdr))) == 0) { ip6stat.ip6s_toosmall++; in6_ifstat_inc(inifp, ifs6_in_hdrerr); goto done; @@ -630,7 +605,7 @@ ip6_input(struct mbuf *m) if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) { ip6stat.ip6s_badvers++; - in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr); + in6_ifstat_inc(inifp, ifs6_in_hdrerr); goto bad; } @@ -651,7 +626,7 @@ ip6_input(struct mbuf *m) if (!m) goto done; } -#endif +#endif /* IPFW2 */ /* * Check against address spoofing/corruption. @@ -662,11 +637,11 @@ ip6_input(struct mbuf *m) * XXX: "badscope" is not very suitable for a multicast source. */ ip6stat.ip6s_badscope++; - in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); + in6_ifstat_inc(inifp, ifs6_in_addrerr); goto bad; } if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) && - !(m->m_flags & M_LOOP)) { + !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) { /* * In this case, the packet should come from the loopback * interface. However, we cannot just check the if_flags, @@ -674,7 +649,7 @@ ip6_input(struct mbuf *m) * as the outgoing/incoming interface. */ ip6stat.ip6s_badscope++; - in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); + in6_ifstat_inc(inifp, ifs6_in_addrerr); goto bad; } @@ -693,7 +668,7 @@ ip6_input(struct mbuf *m) if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) || IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) { ip6stat.ip6s_badscope++; - in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); + in6_ifstat_inc(inifp, ifs6_in_addrerr); goto bad; } #if 0 @@ -707,7 +682,7 @@ ip6_input(struct mbuf *m) if (IN6_IS_ADDR_V4COMPAT(&ip6->ip6_src) || IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) { ip6stat.ip6s_badscope++; - in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); + in6_ifstat_inc(inifp, ifs6_in_addrerr); goto bad; } #endif @@ -725,29 +700,32 @@ ip6_input(struct mbuf *m) */ if (nstat_collect) { struct rtentry *rte = - ifnet_cached_rtlookup_inet6(m->m_pkthdr.rcvif, - &ip6->ip6_src); + ifnet_cached_rtlookup_inet6(inifp, &ip6->ip6_src); if (rte != NULL) { nstat_route_rx(rte, 1, m->m_pkthdr.len, 0); rtfree(rte); } } + /* for consistency */ + m->m_pkthdr.pkt_proto = ip6->ip6_nxt; + #if DUMMYNET check_with_pf: -#endif +#endif /* DUMMYNET */ #if PF /* Invoke inbound packet filter */ if (PF_IS_ENABLED) { int error; #if DUMMYNET - error = pf_af_hook(m->m_pkthdr.rcvif, NULL, &m, AF_INET6, TRUE, &args); -#else - error = pf_af_hook(m->m_pkthdr.rcvif, NULL, &m, AF_INET6, TRUE, NULL); -#endif + error = pf_af_hook(inifp, NULL, &m, AF_INET6, TRUE, &args); +#else /* !DUMMYNET */ + error = pf_af_hook(inifp, NULL, &m, AF_INET6, TRUE, NULL); +#endif /* !DUMMYNET */ if (error != 0 || m == NULL) { if (m != NULL) { - panic("%s: unexpected packet %p\n", __func__, m); + panic("%s: unexpected packet %p\n", + __func__, m); /* NOTREACHED */ } /* Already freed by callee */ @@ -758,7 +736,8 @@ check_with_pf: #endif /* PF */ /* drop packets if interface ID portion is already filled */ - if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) { + if (!(inifp->if_flags & IFF_LOOPBACK) && + !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) { if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src) && ip6->ip6_src.s6_addr16[1]) { ip6stat.ip6s_badscope++; @@ -771,27 +750,33 @@ check_with_pf: } } - if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) - ip6->ip6_src.s6_addr16[1] - = htons(m->m_pkthdr.rcvif->if_index); - if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) - ip6->ip6_dst.s6_addr16[1] - = htons(m->m_pkthdr.rcvif->if_index); + if (m->m_pkthdr.pkt_flags & PKTF_IFAINFO) { + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) + ip6->ip6_src.s6_addr16[1] = + htons(m->m_pkthdr.src_ifindex); + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) + ip6->ip6_dst.s6_addr16[1] = + htons(m->m_pkthdr.dst_ifindex); + } else { + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) + ip6->ip6_src.s6_addr16[1] = htons(inifp->if_index); + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) + ip6->ip6_dst.s6_addr16[1] = htons(inifp->if_index); + } /* * Multicast check */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { struct in6_multi *in6m = NULL; - struct ifnet *ifp = m->m_pkthdr.rcvif; - in6_ifstat_inc(ifp, ifs6_in_mcast); + in6_ifstat_inc_na(inifp, ifs6_in_mcast); /* * See if we belong to the destination multicast group on the * arrival interface. */ in6_multihead_lock_shared(); - IN6_LOOKUP_MULTI(&ip6->ip6_dst, ifp, in6m); + IN6_LOOKUP_MULTI(&ip6->ip6_dst, inifp, in6m); in6_multihead_lock_done(); if (in6m != NULL) { IN6M_REMREF(in6m); @@ -799,31 +784,74 @@ check_with_pf: } else if (!nd6_prproxy #if MROUTING && !ip6_mrouter -#endif +#endif /* MROUTING */ ) { ip6stat.ip6s_notmember++; ip6stat.ip6s_cantforward++; - in6_ifstat_inc(ifp, ifs6_in_discard); + in6_ifstat_inc(inifp, ifs6_in_discard); goto bad; } - deliverifp = ifp; + deliverifp = inifp; + VERIFY(ia6 == NULL); goto hbhcheck; } /* - * Unicast check + * Unicast check + * + * Fast path: see if the target is ourselves. */ - dst6 = (struct sockaddr_in6 *)&ip6_forward_rt.ro_dst; - dst6->sin6_len = sizeof(struct sockaddr_in6); + lck_rw_lock_shared(&in6_ifaddr_rwlock); + for (ia6 = in6_ifaddrs; ia6 != NULL; ia6 = ia6->ia_next) { + /* + * No reference is held on the address, as we just need + * to test for a few things while holding the RW lock. + */ + if (IN6_ARE_ADDR_EQUAL(&ia6->ia_addr.sin6_addr, &ip6->ip6_dst)) + break; + } + + if (ia6 != NULL) { + /* + * For performance, test without acquiring the address lock; + * a lot of things in the address are set once and never + * changed (e.g. ia_ifp.) + */ + if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) { + /* this address is ready */ + ours = 1; + deliverifp = ia6->ia_ifp; + /* + * record dst address information into mbuf. + */ + (void) ip6_setdstifaddr_info(m, 0, ia6); + lck_rw_done(&in6_ifaddr_rwlock); + goto hbhcheck; + } + lck_rw_done(&in6_ifaddr_rwlock); + ia6 = NULL; + /* address is not ready, so discard the packet. */ + nd6log((LOG_INFO, "%s: packet to an unready address %s->%s\n", + __func__, ip6_sprintf(&ip6->ip6_src), + ip6_sprintf(&ip6->ip6_dst))); + goto bad; + } + lck_rw_done(&in6_ifaddr_rwlock); + + /* + * Slow path: route lookup. + */ + dst6 = SIN6(&rin6.ro_dst); + dst6->sin6_len = sizeof (struct sockaddr_in6); dst6->sin6_family = AF_INET6; dst6->sin6_addr = ip6->ip6_dst; - rtalloc_scoped_ign((struct route *)&ip6_forward_rt, + rtalloc_scoped_ign((struct route *)&rin6, RTF_PRCLONING, IFSCOPE_NONE); - if (ip6_forward_rt.ro_rt != NULL) - RT_LOCK(ip6_forward_rt.ro_rt); + if (rin6.ro_rt != NULL) + RT_LOCK_SPIN(rin6.ro_rt); -#define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key)) +#define rt6_key(r) (SIN6((r)->rt_nodes->rn_key)) /* * Accept the packet if the forwarding interface to the destination @@ -845,59 +873,43 @@ check_with_pf: * while it would be less efficient. Or, should we rather install a * reject route for such a case? */ - if (ip6_forward_rt.ro_rt != NULL && - (ip6_forward_rt.ro_rt->rt_flags & - (RTF_HOST|RTF_GATEWAY)) == RTF_HOST && + if (rin6.ro_rt != NULL && + (rin6.ro_rt->rt_flags & (RTF_HOST|RTF_GATEWAY)) == RTF_HOST && #if RTF_WASCLONED - !(ip6_forward_rt.ro_rt->rt_flags & RTF_WASCLONED) && -#endif -#if 0 - /* - * The check below is redundant since the comparison of - * the destination and the key of the rtentry has - * already done through looking up the routing table. - */ - IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, - &rt6_key(ip6_forward_rt.ro_rt)->sin6_addr) + !(rin6.ro_rt->rt_flags & RTF_WASCLONED) && #endif - ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_LOOP) { - ia6 = (struct in6_ifaddr *)ip6_forward_rt.ro_rt->rt_ifa; - - /* - * record address information into m_aux. - */ - (void)ip6_setdstifaddr(m, ia6); - + rin6.ro_rt->rt_ifp->if_type == IFT_LOOP) { + ia6 = (struct in6_ifaddr *)rin6.ro_rt->rt_ifa; /* - * packets to a tentative, duplicated, or somehow invalid + * Packets to a tentative, duplicated, or somehow invalid * address must not be accepted. + * + * For performance, test without acquiring the address lock; + * a lot of things in the address are set once and never + * changed (e.g. ia_ifp.) */ - RT_CONVERT_LOCK(ip6_forward_rt.ro_rt); /* just in case */ - IFA_LOCK_SPIN(&ia6->ia_ifa); if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) { - IFA_UNLOCK(&ia6->ia_ifa); /* this address is ready */ ours = 1; deliverifp = ia6->ia_ifp; /* correct? */ - /* Count the packet in the ip address stats */ - - RT_UNLOCK(ip6_forward_rt.ro_rt); - ia6 = NULL; + /* + * record dst address information into mbuf. + */ + (void) ip6_setdstifaddr_info(m, 0, ia6); + RT_UNLOCK(rin6.ro_rt); goto hbhcheck; } - IFA_UNLOCK(&ia6->ia_ifa); - RT_UNLOCK(ip6_forward_rt.ro_rt); + RT_UNLOCK(rin6.ro_rt); + ia6 = NULL; /* address is not ready, so discard the packet. */ - nd6log((LOG_INFO, - "ip6_input: packet to an unready address %s->%s\n", - ip6_sprintf(&ip6->ip6_src), + nd6log((LOG_INFO, "%s: packet to an unready address %s->%s\n", + __func__, ip6_sprintf(&ip6->ip6_src), ip6_sprintf(&ip6->ip6_dst))); - ia6 = NULL; goto bad; } - if (ip6_forward_rt.ro_rt != NULL) - RT_UNLOCK(ip6_forward_rt.ro_rt); + if (rin6.ro_rt != NULL) + RT_UNLOCK(rin6.ro_rt); /* * Now there is no reason to process the packet if it's not our own @@ -905,36 +917,24 @@ check_with_pf: */ if (!ip6_forwarding) { ip6stat.ip6s_cantforward++; - in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); + in6_ifstat_inc(inifp, ifs6_in_discard); goto bad; } - hbhcheck: +hbhcheck: /* - * record address information into m_aux, if we don't have one yet. + * record dst address information into mbuf, if we don't have one yet. * note that we are unable to record it, if the address is not listed * as our interface address (e.g. multicast addresses, etc.) */ - if (deliverifp && (ia6 = ip6_getdstifaddr(m)) == NULL) { + if (deliverifp != NULL && ia6 == NULL) { ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst); - if (ia6) { - if (!ip6_setdstifaddr(m, ia6)) { - /* - * XXX maybe we should drop the packet here, - * as we could not provide enough information - * to the upper layers. - */ - } + if (ia6 != NULL) { + (void) ip6_setdstifaddr_info(m, 0, ia6); IFA_REMREF(&ia6->ia_ifa); - ia6 = NULL; } } - if (ia6 != NULL) { - IFA_REMREF(&ia6->ia_ifa); - ia6 = NULL; - } - /* * Process Hop-by-Hop options header if it's contained. * m may be modified in ip6_hopopts_input(). @@ -945,8 +945,8 @@ check_with_pf: struct ip6_hbh *hbh; if (ip6_hopopts_input(&plen, &rtalert, &m, &off)) { -#if 0 /*touches NULL pointer*/ - in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); +#if 0 /* touches NULL pointer */ + in6_ifstat_inc(inifp, ifs6_in_discard); #endif goto done; /* m have already been freed */ } @@ -966,19 +966,18 @@ check_with_pf: * (non-zero) payload length to the variable plen. */ ip6stat.ip6s_badoptions++; - in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); - in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr); - icmp6_error(m, ICMP6_PARAM_PROB, - ICMP6_PARAMPROB_HEADER, - (caddr_t)&ip6->ip6_plen - (caddr_t)ip6); + in6_ifstat_inc(inifp, ifs6_in_discard); + in6_ifstat_inc(inifp, ifs6_in_hdrerr); + icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, + (caddr_t)&ip6->ip6_plen - (caddr_t)ip6); goto done; } #ifndef PULLDOWN_TEST /* ip6_hopopts_input() ensures that mbuf is contiguous */ hbh = (struct ip6_hbh *)(ip6 + 1); #else - IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr), - sizeof(struct ip6_hbh)); + IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, + sizeof (struct ip6_hdr), sizeof (struct ip6_hbh)); if (hbh == NULL) { ip6stat.ip6s_tooshort++; goto done; @@ -1015,17 +1014,35 @@ check_with_pf: * Trim mbufs if longer than we expect. * Drop packet if shorter than we expect. */ - if (m->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) { + if (m->m_pkthdr.len - sizeof (struct ip6_hdr) < plen) { ip6stat.ip6s_tooshort++; - in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated); + in6_ifstat_inc(inifp, ifs6_in_truncated); goto bad; } - if (m->m_pkthdr.len > sizeof(struct ip6_hdr) + plen) { + if (m->m_pkthdr.len > sizeof (struct ip6_hdr) + plen) { + /* + * Invalidate hardware checksum info if ip6_adj_clear_hwcksum + * is set; useful to handle buggy drivers. Note that this + * should not be enabled by default, as we may get here due + * to link-layer padding. + */ + if (ip6_adj_clear_hwcksum && + (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) && + !(inifp->if_flags & IFF_LOOPBACK) && + !(m->m_pkthdr.pkt_flags & PKTF_LOOP)) { + m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID; + m->m_pkthdr.csum_data = 0; + ip6stat.ip6s_adj_hwcsum_clr++; + } + + ip6stat.ip6s_adj++; if (m->m_len == m->m_pkthdr.len) { - m->m_len = sizeof(struct ip6_hdr) + plen; - m->m_pkthdr.len = sizeof(struct ip6_hdr) + plen; - } else - m_adj(m, sizeof(struct ip6_hdr) + plen - m->m_pkthdr.len); + m->m_len = sizeof (struct ip6_hdr) + plen; + m->m_pkthdr.len = sizeof (struct ip6_hdr) + plen; + } else { + m_adj(m, sizeof (struct ip6_hdr) + plen - + m->m_pkthdr.len); + } } /* @@ -1041,11 +1058,11 @@ check_with_pf: * must be discarded, else it may be accepted below. */ #if MROUTING - if (ip6_mrouter && ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) { + if (ip6_mrouter && ip6_mforward(ip6, inifp, m)) { ip6stat.ip6s_cantforward++; goto bad; } -#endif +#endif /* MROUTING */ if (!ours && nd6_prproxy) { /* * If this isn't for us, this might be a Neighbor @@ -1056,7 +1073,7 @@ check_with_pf: */ ours = nd6_prproxy_isours(m, ip6, NULL, IFSCOPE_NONE); VERIFY(!ours || - (m->m_pkthdr.aux_flags & MAUXF_PROXY_DST)); + (m->m_pkthdr.pkt_flags & PKTF_PROXY_DST)); } if (!ours) goto bad; @@ -1073,11 +1090,11 @@ check_with_pf: * proxied nodes on different links (src is link-local, dst * is target address.) */ - if ((m = ip6_forward(m, &ip6_forward_rt, 0)) == NULL) + if ((m = ip6_forward(m, &rin6, 0)) == NULL) goto done; - VERIFY(ip6_forward_rt.ro_rt != NULL); - VERIFY(m->m_pkthdr.aux_flags & MAUXF_PROXY_DST); - deliverifp = ip6_forward_rt.ro_rt->rt_ifp; + VERIFY(rin6.ro_rt != NULL); + VERIFY(m->m_pkthdr.pkt_flags & PKTF_PROXY_DST); + deliverifp = rin6.ro_rt->rt_ifp; ours = 1; } @@ -1095,7 +1112,7 @@ check_with_pf: if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) || IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) { ip6stat.ip6s_badscope++; - in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); + in6_ifstat_inc(inifp, ifs6_in_addrerr); goto bad; } @@ -1103,7 +1120,7 @@ check_with_pf: * Tell launch routine the next header */ ip6stat.ip6s_delivered++; - in6_ifstat_inc(deliverifp, ifs6_in_deliver); + in6_ifstat_inc_na(deliverifp, ifs6_in_deliver); injectit: nest = 0; @@ -1113,7 +1130,7 @@ injectit: * we do it once for the outermost protocol, and we assume each * protocol handler wouldn't mess with the alignment afterwards. */ - IP6_HDR_ALIGNMENT_FIXUP(m, m->m_pkthdr.rcvif, return;); + IP6_HDR_ALIGNMENT_FIXUP(m, inifp, return); while (nxt != IPPROTO_DONE) { struct ipfilter *filter; @@ -1130,7 +1147,7 @@ injectit: */ if (m->m_pkthdr.len < off) { ip6stat.ip6s_tooshort++; - in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated); + in6_ifstat_inc(inifp, ifs6_in_truncated); goto bad; } @@ -1146,9 +1163,9 @@ injectit: if (ipsec6_in_reject(m, NULL)) { IPSEC_STAT_INCREMENT(ipsec6stat.in_polvio); goto bad; - } + } } -#endif +#endif /* IPSEC */ /* * Call IP filter @@ -1164,8 +1181,8 @@ injectit: errno_t result; result = filter->ipf_filter.ipf_input( - filter->ipf_filter.cookie, - (mbuf_t *)&m, off, nxt); + filter->ipf_filter.cookie, + (mbuf_t *)&m, off, nxt); if (result == EJUSTRETURN) { ipf_unref(); goto done; @@ -1180,8 +1197,8 @@ injectit: } DTRACE_IP6(receive, struct mbuf *, m, struct inpcb *, NULL, - struct ip6_hdr *, ip6, struct ifnet *, m->m_pkthdr.rcvif, - struct ip *, NULL, struct ip6_hdr *, ip6); + struct ip6_hdr *, ip6, struct ifnet *, inifp, + struct ip *, NULL, struct ip6_hdr *, ip6); if ((pr_input = ip6_protox[nxt]->pr_input) == NULL) { m_freem(m); @@ -1196,47 +1213,93 @@ injectit: } } done: - if (ip6_forward_rt.ro_rt != NULL) - rtfree(ip6_forward_rt.ro_rt); + ROUTE_RELEASE(&rin6); return; - bad: +bad: m_freem(m); goto done; } -/* - * set/grab in6_ifaddr correspond to IPv6 destination address. - * XXX backward compatibility wrapper - */ -static struct ip6aux * -ip6_setdstifaddr(struct mbuf *m, struct in6_ifaddr *ia6) +void +ip6_setsrcifaddr_info(struct mbuf *m, uint32_t src_idx, struct in6_ifaddr *ia6) { - struct ip6aux *n; - - n = ip6_addaux(m); - if (n != NULL) { - if (ia6 != NULL) - IFA_ADDREF(&ia6->ia_ifa); - if (n->ip6a_dstia6 != NULL) - IFA_REMREF(&n->ip6a_dstia6->ia_ifa); - n->ip6a_dstia6 = ia6; + VERIFY(m->m_flags & M_PKTHDR); + + /* + * If the source ifaddr is specified, pick up the information + * from there; otherwise just grab the passed-in ifindex as the + * caller may not have the ifaddr available. + */ + if (ia6 != NULL) { + m->m_pkthdr.pkt_flags |= PKTF_IFAINFO; + m->m_pkthdr.src_ifindex = ia6->ia_ifp->if_index; + + /* See IN6_IFF comments in in6_var.h */ + m->m_pkthdr.src_iff = (ia6->ia6_flags & 0xffff); + } else { + m->m_pkthdr.src_iff = 0; + m->m_pkthdr.src_ifindex = src_idx; + if (src_idx != 0) + m->m_pkthdr.pkt_flags |= PKTF_IFAINFO; } - return (struct ip6aux *)n; /* NULL if failed to set */ } -struct in6_ifaddr * -ip6_getdstifaddr(m) - struct mbuf *m; +void +ip6_setdstifaddr_info(struct mbuf *m, uint32_t dst_idx, struct in6_ifaddr *ia6) { - struct ip6aux *n; + VERIFY(m->m_flags & M_PKTHDR); - n = ip6_findaux(m); - if (n != NULL) { - if (n->ip6a_dstia6 != NULL) - IFA_ADDREF(&n->ip6a_dstia6->ia_ifa); - return (n->ip6a_dstia6); + /* + * If the destination ifaddr is specified, pick up the information + * from there; otherwise just grab the passed-in ifindex as the + * caller may not have the ifaddr available. + */ + if (ia6 != NULL) { + m->m_pkthdr.pkt_flags |= PKTF_IFAINFO; + m->m_pkthdr.dst_ifindex = ia6->ia_ifp->if_index; + + /* See IN6_IFF comments in in6_var.h */ + m->m_pkthdr.dst_iff = (ia6->ia6_flags & 0xffff); + } else { + m->m_pkthdr.dst_iff = 0; + m->m_pkthdr.dst_ifindex = dst_idx; + if (dst_idx != 0) + m->m_pkthdr.pkt_flags |= PKTF_IFAINFO; } - return (NULL); +} + +int +ip6_getsrcifaddr_info(struct mbuf *m, uint32_t *src_idx, uint32_t *ia6f) +{ + VERIFY(m->m_flags & M_PKTHDR); + + if (!(m->m_pkthdr.pkt_flags & PKTF_IFAINFO)) + return (-1); + + if (src_idx != NULL) + *src_idx = m->m_pkthdr.src_ifindex; + + if (ia6f != NULL) + *ia6f = m->m_pkthdr.src_iff; + + return (0); +} + +int +ip6_getdstifaddr_info(struct mbuf *m, uint32_t *dst_idx, uint32_t *ia6f) +{ + VERIFY(m->m_flags & M_PKTHDR); + + if (!(m->m_pkthdr.pkt_flags & PKTF_IFAINFO)) + return (-1); + + if (dst_idx != NULL) + *dst_idx = m->m_pkthdr.dst_ifindex; + + if (ia6f != NULL) + *ia6f = m->m_pkthdr.dst_iff; + + return (0); } /* @@ -1254,33 +1317,33 @@ ip6_hopopts_input(uint32_t *plenp, uint32_t *rtalertp, struct mbuf **mp, /* validation of the length of the header */ #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, off, sizeof(*hbh), return -1); + IP6_EXTHDR_CHECK(m, off, sizeof (*hbh), return (-1)); hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off); hbhlen = (hbh->ip6h_len + 1) << 3; - IP6_EXTHDR_CHECK(m, off, hbhlen, return -1); + IP6_EXTHDR_CHECK(m, off, hbhlen, return (-1)); hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off); #else - IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, - sizeof(struct ip6_hdr), sizeof(struct ip6_hbh)); + IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof (struct ip6_hdr), + sizeof (struct ip6_hbh)); if (hbh == NULL) { ip6stat.ip6s_tooshort++; - return -1; + return (-1); } hbhlen = (hbh->ip6h_len + 1) << 3; - IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr), - hbhlen); + IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof (struct ip6_hdr), + hbhlen); if (hbh == NULL) { ip6stat.ip6s_tooshort++; - return -1; + return (-1); } #endif off += hbhlen; - hbhlen -= sizeof(struct ip6_hbh); - opt = (u_int8_t *)hbh + sizeof(struct ip6_hbh); + hbhlen -= sizeof (struct ip6_hbh); + opt = (u_int8_t *)hbh + sizeof (struct ip6_hbh); - if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh), - hbhlen, rtalertp, plenp) < 0) + if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof (struct ip6_hbh), + hbhlen, rtalertp, plenp) < 0) return (-1); *offp = off; @@ -1311,7 +1374,7 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp) u_int8_t *opt = opthead; u_int16_t rtalert_val; u_int32_t jumboplen; - const int erroff = sizeof(struct ip6_hdr) + sizeof(struct ip6_hbh); + const int erroff = sizeof (struct ip6_hdr) + sizeof (struct ip6_hbh); for (; hbhlen > 0; hbhlen -= optlen, opt += optlen) { switch (*opt) { @@ -1336,7 +1399,7 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp) icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, erroff + opt + 1 - opthead); - return(-1); + return (-1); } optlen = IP6OPT_RTALERT_LEN; bcopy((caddr_t)(opt + 2), (caddr_t)&rtalert_val, 2); @@ -1353,7 +1416,7 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp) icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, erroff + opt + 1 - opthead); - return(-1); + return (-1); } optlen = IP6OPT_JUMBO_LEN; @@ -1367,14 +1430,14 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp) icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, erroff + opt - opthead); - return(-1); + return (-1); } /* * We may see jumbolen in unaligned location, so * we'd need to perform bcopy(). */ - bcopy(opt + 2, &jumboplen, sizeof(jumboplen)); + bcopy(opt + 2, &jumboplen, sizeof (jumboplen)); jumboplen = (u_int32_t)htonl(jumboplen); #if 1 @@ -1391,7 +1454,7 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp) icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, erroff + opt + 2 - opthead); - return(-1); + return (-1); } #endif @@ -1403,7 +1466,7 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp) icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, erroff + opt + 2 - opthead); - return(-1); + return (-1); } *plenp = jumboplen; @@ -1416,18 +1479,18 @@ ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp) optlen = ip6_unknown_opt(opt, m, erroff + opt - opthead); if (optlen == -1) { - return(-1); + return (-1); } optlen += 2; break; } } - return(0); + return (0); - bad: +bad: m_freem(m); - return(-1); + return (-1); } /* @@ -1443,24 +1506,28 @@ ip6_unknown_opt(uint8_t *optp, struct mbuf *m, int off) switch (IP6OPT_TYPE(*optp)) { case IP6OPT_TYPE_SKIP: /* ignore the option */ - return((int)*(optp + 1)); + return ((int)*(optp + 1)); + case IP6OPT_TYPE_DISCARD: /* silently discard */ m_freem(m); - return(-1); + return (-1); + case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */ ip6stat.ip6s_badoptions++; icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off); - return(-1); + return (-1); + case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */ ip6stat.ip6s_badoptions++; ip6 = mtod(m, struct ip6_hdr *); if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || - (m->m_flags & (M_BCAST|M_MCAST))) + (m->m_flags & (M_BCAST|M_MCAST))) { m_freem(m); - else + } else { icmp6_error(m, ICMP6_PARAM_PROB, - ICMP6_PARAMPROB_OPTION, off); - return(-1); + ICMP6_PARAMPROB_OPTION, off); + } + return (-1); } m_freem(m); /* XXX: NOTREACHED */ @@ -1491,27 +1558,26 @@ ip6_savecontrol_v4(struct inpcb *inp, struct mbuf *m, struct mbuf **mp, if ((inp->inp_socket->so_options & SO_TIMESTAMP) != 0) { struct timeval tv; - microtime(&tv); - mp = sbcreatecontrol_mbuf((caddr_t) &tv, sizeof(tv), + getmicrotime(&tv); + mp = sbcreatecontrol_mbuf((caddr_t)&tv, sizeof (tv), SCM_TIMESTAMP, SOL_SOCKET, mp); - if (*mp == NULL) - return NULL; + if (*mp == NULL) + return (NULL); } - if ((inp->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) { - uint64_t time; + if ((inp->inp_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) { + uint64_t time; - time = mach_absolute_time(); - mp = sbcreatecontrol_mbuf((caddr_t) &time, sizeof(time), - SCM_TIMESTAMP_MONOTONIC, SOL_SOCKET, mp); - - if (*mp == NULL) - return NULL; - } + time = mach_absolute_time(); + mp = sbcreatecontrol_mbuf((caddr_t)&time, sizeof (time), + SCM_TIMESTAMP_MONOTONIC, SOL_SOCKET, mp); + if (*mp == NULL) + return (NULL); + } if ((inp->inp_socket->so_flags & SOF_RECV_TRAFFIC_CLASS) != 0) { int tc = m_get_traffic_class(m); - mp = sbcreatecontrol_mbuf((caddr_t) &tc, sizeof(tc), - SO_TRAFFIC_CLASS, SOL_SOCKET, mp); + mp = sbcreatecontrol_mbuf((caddr_t)&tc, sizeof (tc), + SO_TRAFFIC_CLASS, SOL_SOCKET, mp); if (*mp == NULL) return (NULL); } @@ -1522,31 +1588,32 @@ ip6_savecontrol_v4(struct inpcb *inp, struct mbuf *m, struct mbuf **mp, return (mp); } -#define IS2292(inp, x, y) (((inp)->inp_flags & IN6P_RFC2292) ? (x) : (y)) +#define IS2292(inp, x, y) (((inp)->inp_flags & IN6P_RFC2292) ? (x) : (y)) /* RFC 2292 sec. 5 */ if ((inp->inp_flags & IN6P_PKTINFO) != 0) { struct in6_pktinfo pi6; - bcopy(&ip6->ip6_dst, &pi6.ipi6_addr, sizeof(struct in6_addr)); + bcopy(&ip6->ip6_dst, &pi6.ipi6_addr, sizeof (struct in6_addr)); in6_clearscope(&pi6.ipi6_addr); /* XXX */ pi6.ipi6_ifindex = (m && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0; - mp = sbcreatecontrol_mbuf((caddr_t) &pi6, - sizeof(struct in6_pktinfo), - IS2292(inp, IPV6_2292PKTINFO, IPV6_PKTINFO), IPPROTO_IPV6, mp); - if (*mp == NULL) - return NULL; + mp = sbcreatecontrol_mbuf((caddr_t)&pi6, + sizeof (struct in6_pktinfo), + IS2292(inp, IPV6_2292PKTINFO, IPV6_PKTINFO), + IPPROTO_IPV6, mp); + if (*mp == NULL) + return (NULL); } if ((inp->inp_flags & IN6P_HOPLIMIT) != 0) { int hlim = ip6->ip6_hlim & 0xff; - mp = sbcreatecontrol_mbuf((caddr_t) &hlim, sizeof(int), + mp = sbcreatecontrol_mbuf((caddr_t)&hlim, sizeof (int), IS2292(inp, IPV6_2292HOPLIMIT, IPV6_HOPLIMIT), IPPROTO_IPV6, mp); - if (*mp == NULL) - return NULL; + if (*mp == NULL) + return (NULL); } if (v4only != NULL) @@ -1568,7 +1635,7 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) mp = np; if (v4only) - return(0); + return (0); if ((in6p->inp_flags & IN6P_TCLASS) != 0) { u_int32_t flowinfo; @@ -1578,9 +1645,9 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) flowinfo >>= 20; tclass = flowinfo & 0xff; - mp = sbcreatecontrol_mbuf((caddr_t) &tclass, sizeof(tclass), + mp = sbcreatecontrol_mbuf((caddr_t)&tclass, sizeof (tclass), IPV6_TCLASS, IPPROTO_IPV6, mp); - if (*mp == NULL) + if (*mp == NULL) goto no_mbufs; } @@ -1611,18 +1678,18 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) hbh = (struct ip6_hbh *)(ip6 + 1); hbhlen = (hbh->ip6h_len + 1) << 3; #else - ext = ip6_pullexthdr(m, sizeof(struct ip6_hdr), + ext = ip6_pullexthdr(m, sizeof (struct ip6_hdr), ip6->ip6_nxt); if (ext == NULL) { ip6stat.ip6s_tooshort++; - return(0); + return (0); } hbh = mtod(ext, struct ip6_hbh *); hbhlen = (hbh->ip6h_len + 1) << 3; if (hbhlen != ext->m_len) { m_freem(ext); ip6stat.ip6s_tooshort++; - return(0); + return (0); } #endif @@ -1647,7 +1714,7 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) } if ((in6p->inp_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) { - int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr); + int nxt = ip6->ip6_nxt, off = sizeof (struct ip6_hdr); /* * Search for destination options headers or routing @@ -1678,7 +1745,7 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) } #ifndef PULLDOWN_TEST - if (off + sizeof(*ip6e) > m->m_len) + if (off + sizeof (*ip6e) > m->m_len) goto loopend; ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off); if (nxt == IPPROTO_AH) @@ -1691,7 +1758,7 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) ext = ip6_pullexthdr(m, off, nxt); if (ext == NULL) { ip6stat.ip6s_tooshort++; - return(0); + return (0); } ip6e = mtod(ext, struct ip6_ext *); if (nxt == IPPROTO_AH) @@ -1701,7 +1768,7 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) if (elen != ext->m_len) { m_freem(ext); ip6stat.ip6s_tooshort++; - return(0); + return (0); } #endif @@ -1711,10 +1778,9 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) break; mp = sbcreatecontrol_mbuf((caddr_t)ip6e, elen, - IS2292(in6p, - IPV6_2292DSTOPTS, IPV6_DSTOPTS), - IPPROTO_IPV6, mp); - if (*mp == NULL) { + IS2292(in6p, IPV6_2292DSTOPTS, + IPV6_DSTOPTS), IPPROTO_IPV6, mp); + if (*mp == NULL) { #if PULLDOWN_TEST m_freem(ext); #endif @@ -1762,14 +1828,14 @@ ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp) ext = NULL; #endif } - loopend: +loopend: ; } - return(0); + return (0); no_mbufs: ip6stat.ip6s_pktdropcntrl++; /* XXX increment a stat to show the failure */ - return(ENOBUFS); + return (ENOBUFS); } #undef IS2292 @@ -1786,28 +1852,28 @@ ip6_notify_pmtu(struct inpcb *in6p, struct sockaddr_in6 *dst, u_int32_t *mtu) return; #ifdef DIAGNOSTIC - if (so == NULL) /* I believe this is impossible */ + if (so == NULL) { /* I believe this is impossible */ panic("ip6_notify_pmtu: socket is NULL"); + /* NOTREACHED */ + } #endif - bzero(&mtuctl, sizeof(mtuctl)); /* zero-clear for safety */ + bzero(&mtuctl, sizeof (mtuctl)); /* zero-clear for safety */ mtuctl.ip6m_mtu = *mtu; mtuctl.ip6m_addr = *dst; if (sa6_recoverscope(&mtuctl.ip6m_addr, TRUE)) return; - if ((m_mtu = sbcreatecontrol((caddr_t)&mtuctl, sizeof(mtuctl), + if ((m_mtu = sbcreatecontrol((caddr_t)&mtuctl, sizeof (mtuctl), IPV6_PATHMTU, IPPROTO_IPV6)) == NULL) return; - if (sbappendaddr(&so->so_rcv, (struct sockaddr *)dst, NULL, m_mtu, NULL) - == 0) { + if (sbappendaddr(&so->so_rcv, SA(dst), NULL, m_mtu, NULL) == 0) { m_freem(m_mtu); /* XXX: should count statistics */ - } else + } else { sorwakeup(so); - - return; + } } #if PULLDOWN_TEST @@ -1837,7 +1903,7 @@ ip6_pullexthdr(m, off, nxt) } #endif - m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e); + m_copydata(m, off, sizeof (ip6e), (caddr_t)&ip6e); if (nxt == IPPROTO_AH) elen = (ip6e.ip6e_len + 2) << 2; else @@ -1852,17 +1918,17 @@ ip6_pullexthdr(m, off, nxt) } } if (!n) - return NULL; + return (NULL); n->m_len = 0; if (elen >= M_TRAILINGSPACE(n)) { m_free(n); - return NULL; + return (NULL); } m_copydata(m, off, elen, mtod(n, caddr_t)); n->m_len = elen; - return n; + return (n); } #endif @@ -1885,20 +1951,20 @@ ip6_get_prevhdr(m, off) { struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); - if (off == sizeof(struct ip6_hdr)) - return((char *) &ip6->ip6_nxt); - else { + if (off == sizeof (struct ip6_hdr)) { + return ((char *)&ip6->ip6_nxt); + } else { int len, nxt; struct ip6_ext *ip6e = NULL; nxt = ip6->ip6_nxt; - len = sizeof(struct ip6_hdr); + len = sizeof (struct ip6_hdr); while (len < off) { ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + len); switch (nxt) { case IPPROTO_FRAGMENT: - len += sizeof(struct ip6_frag); + len += sizeof (struct ip6_frag); break; case IPPROTO_AH: len += (ip6e->ip6e_len + 2) << 2; @@ -1910,9 +1976,9 @@ ip6_get_prevhdr(m, off) nxt = ip6e->ip6e_nxt; } if (ip6e) - return((char *) &ip6e->ip6e_nxt); + return ((char *)&ip6e->ip6e_nxt); else - return NULL; + return (NULL); } } @@ -1920,90 +1986,81 @@ ip6_get_prevhdr(m, off) * get next header offset. m will be retained. */ int -ip6_nexthdr(m, off, proto, nxtp) - struct mbuf *m; - int off; - int proto; - int *nxtp; +ip6_nexthdr(struct mbuf *m, int off, int proto, int *nxtp) { struct ip6_hdr ip6; struct ip6_ext ip6e; struct ip6_frag fh; /* just in case */ - if (m == NULL) - panic("ip6_nexthdr: m == NULL"); + VERIFY(m != NULL); if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len < off) - return -1; + return (-1); switch (proto) { case IPPROTO_IPV6: - if (m->m_pkthdr.len < off + sizeof(ip6)) - return -1; - m_copydata(m, off, sizeof(ip6), (caddr_t)&ip6); + if (m->m_pkthdr.len < off + sizeof (ip6)) + return (-1); + m_copydata(m, off, sizeof (ip6), (caddr_t)&ip6); if (nxtp) *nxtp = ip6.ip6_nxt; - off += sizeof(ip6); - return off; + off += sizeof (ip6); + return (off); case IPPROTO_FRAGMENT: /* * terminate parsing if it is not the first fragment, * it does not make sense to parse through it. */ - if (m->m_pkthdr.len < off + sizeof(fh)) - return -1; - m_copydata(m, off, sizeof(fh), (caddr_t)&fh); + if (m->m_pkthdr.len < off + sizeof (fh)) + return (-1); + m_copydata(m, off, sizeof (fh), (caddr_t)&fh); /* IP6F_OFF_MASK = 0xfff8(BigEndian), 0xf8ff(LittleEndian) */ if (fh.ip6f_offlg & IP6F_OFF_MASK) - return -1; + return (-1); if (nxtp) *nxtp = fh.ip6f_nxt; - off += sizeof(struct ip6_frag); - return off; + off += sizeof (struct ip6_frag); + return (off); case IPPROTO_AH: - if (m->m_pkthdr.len < off + sizeof(ip6e)) - return -1; - m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e); + if (m->m_pkthdr.len < off + sizeof (ip6e)) + return (-1); + m_copydata(m, off, sizeof (ip6e), (caddr_t)&ip6e); if (nxtp) *nxtp = ip6e.ip6e_nxt; off += (ip6e.ip6e_len + 2) << 2; - return off; + return (off); case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: - if (m->m_pkthdr.len < off + sizeof(ip6e)) - return -1; - m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e); + if (m->m_pkthdr.len < off + sizeof (ip6e)) + return (-1); + m_copydata(m, off, sizeof (ip6e), (caddr_t)&ip6e); if (nxtp) *nxtp = ip6e.ip6e_nxt; off += (ip6e.ip6e_len + 1) << 3; - return off; + return (off); case IPPROTO_NONE: case IPPROTO_ESP: case IPPROTO_IPCOMP: /* give up */ - return -1; + return (-1); default: - return -1; + return (-1); } - return -1; + return (-1); } /* * get offset for the last header in the chain. m will be kept untainted. */ int -ip6_lasthdr(m, off, proto, nxtp) - struct mbuf *m; - int off; - int proto; - int *nxtp; +ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp) { int newoff; int nxt; @@ -2015,11 +2072,11 @@ ip6_lasthdr(m, off, proto, nxtp) while (1) { newoff = ip6_nexthdr(m, off, proto, nxtp); if (newoff < 0) - return off; + return (off); else if (newoff < off) - return -1; /* invalid */ + return (-1); /* invalid */ else if (newoff == off) - return newoff; + return (newoff); off = newoff; proto = *nxtp; @@ -2072,26 +2129,14 @@ ip6_delaux(struct mbuf *m) } /* - * Called by m_tag_free(). - */ -void -ip6_destroyaux(struct ip6aux *n) -{ - if (n->ip6a_dstia6 != NULL) { - IFA_REMREF(&n->ip6a_dstia6->ia_ifa); - n->ip6a_dstia6 = NULL; - } -} - -/* - * Called by m_tag_copy() + * Drain callback */ void -ip6_copyaux(struct ip6aux *src, struct ip6aux *dst) +ip6_drain(void) { - bcopy(src, dst, sizeof (*dst)); - if (dst->ip6a_dstia6 != NULL) - IFA_ADDREF(&dst->ip6a_dstia6->ia_ifa); + frag6_drain(); /* fragments */ + in6_rtqdrain(); /* protocol cloned routes */ + nd6_drain(NULL); /* cloned routes: ND6 */ } /* diff --git a/bsd/netinet6/ip6_mroute.c b/bsd/netinet6/ip6_mroute.c index f6504807b..1870bdf28 100644 --- a/bsd/netinet6/ip6_mroute.c +++ b/bsd/netinet6/ip6_mroute.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * Copyright (c) 2003-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -97,7 +97,6 @@ #include #include #include -#include #include #include @@ -607,7 +606,7 @@ add_m6if(mifcp) struct mif6 *mifp; struct ifnet *ifp; int error; -#if notyet +#ifdef notyet struct tbf *m_tbf = tbftable + mifcp->mif6c_mifi; #endif @@ -667,9 +666,9 @@ add_m6if(mifcp) #if MRT6DEBUG if (mrt6debug) log(LOG_DEBUG, - "add_mif #%d, phyint %s%d\n", + "add_mif #%d, phyint %s\n", mifcp->mif6c_mifi, - ifp->if_name, ifp->if_unit); + if_name(ifp)); #endif return 0; @@ -982,7 +981,7 @@ ip6_mforward(ip6, ifp, m) struct mif6 *mifp; struct mbuf *mm; mifi_t mifi; - struct timeval timenow; + uint64_t curtime = net_uptime(); #if MRT6DEBUG if (mrt6debug & DEBUG_FORWARD) @@ -1007,11 +1006,10 @@ ip6_mforward(ip6, ifp, m) * MLD packets can be sent with the unspecified source address * (although such packets must normally set 1 to the hop limit field). */ - getmicrotime(&timenow); if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { ip6stat.ip6s_cantforward++; - if (ip6_log_time + ip6_log_interval < timenow.tv_sec) { - ip6_log_time = timenow.tv_sec; + if (ip6_log_time + ip6_log_interval < curtime) { + ip6_log_time = curtime; log(LOG_DEBUG, "cannot forward " "from %s to %s nxt %d received on %s\n", @@ -1538,7 +1536,8 @@ phyint_send(ip6, mifp, m) dst6->sin6_len = sizeof(struct sockaddr_in6); dst6->sin6_family = AF_INET6; dst6->sin6_addr = ip6->ip6_dst; - ip6_mloopback(ifp, m, (struct sockaddr_in6 *)&ro.ro_dst); + ip6_mloopback(NULL, ifp, m, (struct sockaddr_in6 *)&ro.ro_dst, + -1, -1); } /* * Put the packet into the sending queue of the outgoing interface @@ -1620,7 +1619,7 @@ register_send(ip6, mif, m) MGETHDR(mm, M_DONTWAIT, MT_HEADER); if (mm == NULL) return ENOBUFS; -#ifdef __darwin8_notyet +#ifdef notyet #if CONFIG_MACF_NET mac_create_mbuf_multicast_encap(m, mif->m6_ifp, mm); #endif diff --git a/bsd/netinet6/ip6_mroute.h b/bsd/netinet6/ip6_mroute.h index 193efea2a..abdf59fd9 100644 --- a/bsd/netinet6/ip6_mroute.h +++ b/bsd/netinet6/ip6_mroute.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Apple Inc. All rights reserved. + * Copyright (c) 2008-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -128,7 +128,7 @@ struct mif6ctl { mifi_t mif6c_mifi; /* the index of the mif to be added */ u_char mif6c_flags; /* MIFF_ flags defined below */ u_short mif6c_pifi; /* the index of the physical IF */ -#if notyet +#ifdef notyet u_int mif6c_rate_limit; /* max rate */ #endif }; @@ -248,13 +248,16 @@ struct sioc_mif_req6_64 { #endif /* XNU_KERNEL_PRIVATE */ #ifdef PRIVATE +#ifndef KERNEL /* * The kernel's multicast-interface structure. + * + * XXX: This is unused and is currently exposed for netstat. */ struct mif6 { u_char m6_flags; /* MIFF_ flags defined above */ u_int m6_rate_limit; /* max rate */ -#if notyet +#ifdef notyet struct tbf *m6_tbf; /* token bucket structure at intf. */ #endif struct in6_addr m6_lcl_addr; /* local interface address */ @@ -264,7 +267,7 @@ struct mif6 { u_quad_t m6_bytes_in; /* # bytes in on interface */ u_quad_t m6_bytes_out; /* # bytes out on interface */ struct route_in6 m6_route;/* cached route if this is a tunnel */ -#if notyet +#ifdef notyet u_int m6_rsvp_on; /* RSVP listening on this vif */ struct socket *m6_rsvpd; /* RSVP daemon socket */ #endif @@ -272,6 +275,8 @@ struct mif6 { /* * The kernel's multicast forwarding cache entry structure + * + * XXX: This is unused and is currently exposed for netstat. */ struct mf6c { struct sockaddr_in6 mf6c_origin; /* IPv6 origin of mcasts */ @@ -287,6 +292,7 @@ struct mf6c { struct rtdetq *mf6c_stall; /* pkts waiting for route */ struct mf6c *mf6c_next; /* hash table linkage */ }; +#endif /* !KERNEL */ #define MF6C_INCOMPLETE_PARENT ((mifi_t)-1) @@ -299,6 +305,7 @@ struct mf6c { #define MAX_UPQ6 4 /* max. no of pkts in upcall Q */ +#ifdef BSD_KERNEL_PRIVATE /* * Argument structure used for pkt info. while upcall is made */ @@ -313,16 +320,14 @@ struct rtdetq { /* XXX: rtdetq is also defined in ip_mroute.h */ }; #endif /* _NETINET_IP_MROUTE_H_ */ -#if MROUTING -#ifdef XNU_KERNEL_PRIVATE extern struct mrt6stat mrt6stat; +#if MROUTING extern int ip6_mrouter_set(struct socket *, struct sockopt *); extern int ip6_mrouter_get(struct socket *, struct sockopt *); extern int ip6_mrouter_done(void); extern int mrt6_ioctl(u_long, caddr_t); -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* MROUTING */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* PRIVATE */ -#endif - #endif /* !_NETINET6_IP6_MROUTE_H_ */ diff --git a/bsd/netinet6/ip6_output.c b/bsd/netinet6/ip6_output.c index d96db58f7..1dc5dec38 100644 --- a/bsd/netinet6/ip6_output.c +++ b/bsd/netinet6/ip6_output.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,13 +22,10 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* $FreeBSD: src/sys/netinet6/ip6_output.c,v 1.43 2002/10/31 19:45:48 ume Exp $ */ -/* $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $ */ - /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. @@ -113,72 +110,68 @@ #include #include #include +#include #include +#include #include #include #include +#include #include #include #include #include #include +#include + #include #include #include #include #include #include -#include - #if IPSEC #include -#if INET6 #include -#endif #include extern int ipsec_bypass; #endif /* IPSEC */ #if CONFIG_MACF_NET #include -#endif /* MAC_NET */ - -#include +#endif /* CONFIG_MACF_NET */ #if DUMMYNET +#include #include #include #endif /* DUMMYNET */ -#include - -#include - #if PF #include #endif /* PF */ -#ifndef __APPLE__ -static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options"); -#endif - -int ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt); -static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, - struct socket *, struct sockopt *sopt); -static int ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, int uproto); -static int ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt); -static int ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, int sticky, int cmsg, int uproto); -static void im6o_trace(struct ip6_moptions *, int); static int ip6_copyexthdr(struct mbuf **, caddr_t, int); -static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int, - struct ip6_frag **); +static void ip6_out_cksum_stats(int, u_int32_t); static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t); +static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int, + struct ip6_frag **); +static int ip6_getpmtu(struct route_in6 *, struct route_in6 *, + struct ifnet *, struct in6_addr *, u_int32_t *, boolean_t *); +static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, struct socket *, + struct sockopt *sopt); +static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **, int); +static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *); +static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int); +static void im6o_trace(struct ip6_moptions *, int); +static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, int, + int, int); static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *); -static int ip6_getpmtu (struct route_in6 *, struct route_in6 *, - struct ifnet *, struct in6_addr *, u_int32_t *, int *); +static void ip6_output_checksum(struct ifnet *, uint32_t, struct mbuf *, + int, uint32_t, uint32_t); #define IM6O_TRACE_HIST_SIZE 32 /* size of trace history */ @@ -215,39 +208,34 @@ static struct zone *im6o_zone; /* zone for ip6_moptions */ SYSCTL_DECL(_net_inet6_ip6); -static int ip6_maxchainsent = 0; -SYSCTL_INT(_net_inet6_ip6, OID_AUTO, maxchainsent, CTLFLAG_RW | CTLFLAG_LOCKED, - &ip6_maxchainsent, 0, "use dlil_output_list"); +static int ip6_maxchainsent = 0; +SYSCTL_INT(_net_inet6_ip6, OID_AUTO, maxchainsent, + CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxchainsent, 0, + "use dlil_output_list"); /* * XXX we don't handle mbuf chains yet in nd6_output() so ip6_output_list() only * walks through the packet chain and sends each mbuf separately. */ int -ip6_output_list( - struct mbuf *m0, - int packetlist, - struct ip6_pktopts *opt, - struct route_in6 *ro, - int flags, - struct ip6_moptions *im6o, - struct ifnet **ifpp, /* XXX: just for statistics */ - struct ip6_out_args *ip6oap) +ip6_output_list(struct mbuf *m0, int packetlist, struct ip6_pktopts *opt, + struct route_in6 *ro, int flags, struct ip6_moptions *im6o, + struct ifnet **ifpp, struct ip6_out_args *ip6oa) { #pragma unused(packetlist) struct mbuf *m = m0, *nextpkt; int error = 0; - while (m) { + while (m != NULL) { /* * Break the chain before calling ip6_output() and free the * mbufs if there was an error. */ nextpkt = m->m_nextpkt; m->m_nextpkt = NULL; - error = ip6_output(m, opt, ro, flags, im6o, ifpp, ip6oap); - if (error) { - if (nextpkt) + error = ip6_output(m, opt, ro, flags, im6o, ifpp, ip6oa); + if (error != 0) { + if (nextpkt != NULL) m_freem_list(nextpkt); return (error); } @@ -264,86 +252,91 @@ ip6_output_list( * The mbuf chain containing the packet will be freed. * The mbuf opt, if present, will not be freed. * + * If ro is non-NULL and has valid ro->ro_rt, route lookup would be + * skipped and ro->ro_rt would be used. Otherwise the result of route + * lookup is stored in ro->ro_rt. + * * type of "mtu": rt_rmx.rmx_mtu is u_int32_t, ifnet.ifr_mtu is int, and * nd_ifinfo.linkmtu is u_int32_t. so we use u_int32_t to hold largest one, * which is rt_rmx.rmx_mtu. */ int -ip6_output( - struct mbuf *m0, - struct ip6_pktopts *opt, - struct route_in6 *ro, - int flags, - struct ip6_moptions *im6o, - struct ifnet **ifpp, /* XXX: just for statistics */ - struct ip6_out_args *ip6oap) +ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, + int flags, struct ip6_moptions *im6o, struct ifnet **ifpp, + struct ip6_out_args *ip6oa) { - struct ip6_hdr *ip6, *mhip6; - struct ifnet *ifp = NULL, *origifp = NULL; - struct mbuf *m = m0; - int hlen, tlen, len, off; - struct route_in6 ip6route; + struct ip6_hdr *ip6; + u_char *nexthdrp; + struct ifnet *ifp = NULL, *origifp = NULL; /* refcnt'd */ + struct mbuf *m, *mprev; + int hlen, tlen, len, off, nxt0; + struct route_in6 *ro_pmtu = NULL; struct rtentry *rt = NULL; struct sockaddr_in6 *dst, src_sa, dst_sa; int error = 0; - struct in6_ifaddr *ia = NULL; + struct in6_ifaddr *ia = NULL, *src_ia = NULL; u_int32_t mtu; - int alwaysfrag = 0, dontfrag = 0; + boolean_t alwaysfrag = FALSE; u_int32_t optlen = 0, plen = 0, unfragpartlen = 0; - struct ip6_exthdrs exthdrs; - struct in6_addr finaldst, src0, dst0; - u_int32_t zone; - struct route_in6 *ro_pmtu = NULL; - int hdrsplit = 0; - int needipsec = 0; + struct ip6_rthdr *rh; + struct in6_addr finaldst; ipfilter_t inject_filter_ref; - int tso; - boolean_t select_srcif; - struct ipf_pktopts *ippo = NULL, ipf_pktopts; - struct ip6_out_args ip6oa = { IFSCOPE_NONE, { 0 }, 0 }; + struct ipf_pktopts *ippo = NULL; struct flowadv *adv = NULL; - u_int32_t ifmtu; #if DUMMYNET struct m_tag *tag; - struct route_in6 saved_route; - struct route_in6 saved_ro_pmtu; - struct ip_fw_args args; + struct ip6_out_args saved_ip6oa; struct sockaddr_in6 dst_buf; - - bzero(&args, sizeof(struct ip_fw_args)); #endif /* DUMMYNET */ - - if ((flags & IPV6_OUTARGS) && ip6oap != NULL) { - ip6oa = *ip6oap; - adv = &ip6oap->ip6oa_flowadv; - adv->code = FADV_SUCCESS; - } - #if IPSEC - int needipsectun = 0; struct socket *so = NULL; struct secpolicy *sp = NULL; struct route_in6 *ipsec_saved_route = NULL; - struct ipsec_output_state ipsec_state; - - bzero(&ipsec_state, sizeof(ipsec_state)); - - /* for AH processing. stupid to have "socket" variable in IP layer... */ - if (ipsec_bypass == 0) - { - so = ipsec_getsocket(m); - (void)ipsec_setsocket(m, NULL); - } + boolean_t needipsectun = FALSE; #endif /* IPSEC */ + struct { + struct ipf_pktopts ipf_pktopts; + struct ip6_exthdrs exthdrs; + struct route_in6 ip6route; +#if IPSEC + struct ipsec_output_state ipsec_state; +#endif /* IPSEC */ +#if DUMMYNET + struct route_in6 saved_route; + struct route_in6 saved_ro_pmtu; + struct ip_fw_args args; +#endif /* DUMMYNET */ + } ip6obz; +#define ipf_pktopts ip6obz.ipf_pktopts +#define exthdrs ip6obz.exthdrs +#define ip6route ip6obz.ip6route +#define ipsec_state ip6obz.ipsec_state +#define saved_route ip6obz.saved_route +#define saved_ro_pmtu ip6obz.saved_ro_pmtu +#define args ip6obz.args + union { + struct { + boolean_t select_srcif : 1; + boolean_t hdrsplit : 1; + boolean_t dontfrag : 1; +#if IPSEC + boolean_t needipsec : 1; + boolean_t noipsec : 1; +#endif /* IPSEC */ + }; + uint32_t raw; + } ip6obf = { .raw = 0 }; - bzero(&ipf_pktopts, sizeof(struct ipf_pktopts)); - ippo = &ipf_pktopts; + VERIFY(m0->m_flags & M_PKTHDR); - ip6 = mtod(m, struct ip6_hdr *); - inject_filter_ref = ipf_get_inject_filter(m); + /* zero out {saved_route, saved_ro_pmtu, ip6route, exthdrs, args} */ + bzero(&ip6obz, sizeof (ip6obz)); - /* Grab info from mtags prepended to the chain */ #if DUMMYNET + if (SLIST_EMPTY(&m0->m_pkthdr.tags)) + goto tags_done; + + /* Grab info from mtags prepended to the chain */ if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) { struct dn_pkt_tag *dn_tag; @@ -351,80 +344,122 @@ ip6_output( dn_tag = (struct dn_pkt_tag *)(tag+1); args.fwa_pf_rule = dn_tag->dn_pf_rule; - bcopy(&dn_tag->dn_dst6, &dst_buf, sizeof(dst_buf)); + bcopy(&dn_tag->dn_dst6, &dst_buf, sizeof (dst_buf)); dst = &dst_buf; ifp = dn_tag->dn_ifp; - if (ifp) + if (ifp != NULL) ifnet_reference(ifp); flags = dn_tag->dn_flags; - if (dn_tag->dn_flags & IPV6_OUTARGS) - ip6oa = dn_tag->dn_ip6oa; + if (dn_tag->dn_flags & IPV6_OUTARGS) { + saved_ip6oa = dn_tag->dn_ip6oa; + ip6oa = &saved_ip6oa; + } saved_route = dn_tag->dn_ro6; ro = &saved_route; saved_ro_pmtu = dn_tag->dn_ro6_pmtu; ro_pmtu = &saved_ro_pmtu; origifp = dn_tag->dn_origifp; - if (origifp) + if (origifp != NULL) ifnet_reference(origifp); mtu = dn_tag->dn_mtu; - alwaysfrag = dn_tag->dn_alwaysfrag; + alwaysfrag = (dn_tag->dn_alwaysfrag != 0); unfragpartlen = dn_tag->dn_unfragpartlen; - bcopy(&dn_tag->dn_exthdrs, &exthdrs, sizeof(exthdrs)); + bcopy(&dn_tag->dn_exthdrs, &exthdrs, sizeof (exthdrs)); m_tag_delete(m0, tag); } + +tags_done: #endif /* DUMMYNET */ + m = m0; + m->m_pkthdr.pkt_flags &= ~(PKTF_LOOP|PKTF_IFAINFO); + +#if IPSEC + /* for AH processing. stupid to have "socket" variable in IP layer... */ + if (ipsec_bypass == 0) { + so = ipsec_getsocket(m); + (void) ipsec_setsocket(m, NULL); + + /* If packet is bound to an interface, check bound policies */ + if ((flags & IPV6_OUTARGS) && + (ip6oa->ip6oa_flags & IPOAF_BOUND_IF) && + ip6oa->ip6oa_boundif != IFSCOPE_NONE) { + /* ip6obf.noipsec is a bitfield, use temp integer */ + int noipsec = 0; + + if (ipsec6_getpolicybyinterface(m, IPSEC_DIR_OUTBOUND, + flags, ip6oa, &noipsec, &sp) != 0) + goto bad; + + ip6obf.noipsec = (noipsec != 0); + } + } +#endif /* IPSEC */ + + ip6 = mtod(m, struct ip6_hdr *); + nxt0 = ip6->ip6_nxt; finaldst = ip6->ip6_dst; + inject_filter_ref = ipf_get_inject_filter(m); + ippo = &ipf_pktopts; if (ip6_doscopedroute && (flags & IPV6_OUTARGS)) { - if ((select_srcif = (!(flags & (IPV6_FORWARDING | + /* + * In the forwarding case, only the ifscope value is used, + * as source interface selection doesn't take place. + */ + if ((ip6obf.select_srcif = (!(flags & (IPV6_FORWARDING | IPV6_UNSPECSRC | IPV6_FLAG_NOSRCIFSEL)) && - (ip6oa.ip6oa_flags & IP6OAF_SELECT_SRCIF)))) + (ip6oa->ip6oa_flags & IP6OAF_SELECT_SRCIF)))) ipf_pktopts.ippo_flags |= IPPOF_SELECT_SRCIF; - if ((ip6oa.ip6oa_flags & IP6OAF_BOUND_IF) && - ip6oa.ip6oa_boundif != IFSCOPE_NONE) { + if ((ip6oa->ip6oa_flags & IP6OAF_BOUND_IF) && + ip6oa->ip6oa_boundif != IFSCOPE_NONE) { ipf_pktopts.ippo_flags |= (IPPOF_BOUND_IF | - (ip6oa.ip6oa_boundif << IPPOF_SHIFT_IFSCOPE)); + (ip6oa->ip6oa_boundif << IPPOF_SHIFT_IFSCOPE)); } - if (ip6oa.ip6oa_flags & IP6OAF_BOUND_SRCADDR) + if (ip6oa->ip6oa_flags & IP6OAF_BOUND_SRCADDR) ipf_pktopts.ippo_flags |= IPPOF_BOUND_SRCADDR; } else { - select_srcif = FALSE; - ip6oa.ip6oa_boundif = IFSCOPE_NONE; - ip6oa.ip6oa_flags &= ~(IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_IF | - IP6OAF_BOUND_SRCADDR); + ip6obf.select_srcif = FALSE; + if (flags & IPV6_OUTARGS) { + ip6oa->ip6oa_boundif = IFSCOPE_NONE; + ip6oa->ip6oa_flags &= ~(IP6OAF_SELECT_SRCIF | + IP6OAF_BOUND_IF | IP6OAF_BOUND_SRCADDR); + } } - if ((flags & IPV6_OUTARGS) && (ip6oa.ip6oa_flags & IP6OAF_NO_CELLULAR)) + if ((flags & IPV6_OUTARGS) && (ip6oa->ip6oa_flags & IP6OAF_NO_CELLULAR)) ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR; + if (flags & IPV6_OUTARGS) { + adv = &ip6oa->ip6oa_flowadv; + adv->code = FADV_SUCCESS; + ip6oa->ip6oa_retflags = 0; + } + #if DUMMYNET if (args.fwa_pf_rule) { ip6 = mtod(m, struct ip6_hdr *); - + VERIFY(ro != NULL); /* ro == saved_route */ goto check_with_pf; } #endif /* DUMMYNET */ -#define MAKE_EXTHDR(hp, mp) \ - do { \ - if (hp) { \ +#define MAKE_EXTHDR(hp, mp) do { \ + if (hp != NULL) { \ struct ip6_ext *eh = (struct ip6_ext *)(hp); \ - error = ip6_copyexthdr((mp), (caddr_t)(hp), \ - ((eh)->ip6e_len + 1) << 3); \ + error = ip6_copyexthdr((mp), (caddr_t)(hp), \ + ((eh)->ip6e_len + 1) << 3); \ if (error) \ goto freehdrs; \ } \ - } while (0) - - bzero(&exthdrs, sizeof(exthdrs)); +} while (0) - if (opt) { + if (opt != NULL) { /* Hop-by-Hop options header */ MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh); /* Destination options header(1st part) */ @@ -447,19 +482,26 @@ ip6_output( MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2); } +#undef MAKE_EXTHDR + #if IPSEC - if (ipsec_bypass != 0) + if (ipsec_bypass != 0 || ip6obf.noipsec) goto skip_ipsec; - /* get a security policy for this packet */ - if (so == NULL) - sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error); - else - sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error); - + /* May have been set above if packet was bound */ if (sp == NULL) { - IPSEC_STAT_INCREMENT(ipsec6stat.out_inval); - goto freehdrs; + /* get a security policy for this packet */ + if (so == NULL) { + sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, + 0, &error); + } else { + sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND, + so, &error); + } + if (sp == NULL) { + IPSEC_STAT_INCREMENT(ipsec6stat.out_inval); + goto freehdrs; + } } error = 0; @@ -477,7 +519,7 @@ ip6_output( case IPSEC_POLICY_BYPASS: case IPSEC_POLICY_NONE: /* no need to do IPsec. */ - needipsec = 0; + ip6obf.needipsec = FALSE; break; case IPSEC_POLICY_IPSEC: @@ -486,14 +528,25 @@ ip6_output( error = key_spdacquire(sp); goto freehdrs; } - needipsec = 1; + if (sp->ipsec_if) { + /* Verify the redirect to ipsec interface */ + if (sp->ipsec_if == ifp) { + /* Set policy for mbuf */ + m->m_pkthdr.ipsec_policy = sp->id; + goto skip_ipsec; + } + goto bad; + } else { + ip6obf.needipsec = TRUE; + } break; case IPSEC_POLICY_ENTRUST: default: - printf("ip6_output: Invalid policy found. %d\n", sp->policy); + printf("%s: Invalid policy found: %d\n", __func__, sp->policy); + break; } - skip_ipsec: +skip_ipsec: #endif /* IPSEC */ /* @@ -501,36 +554,33 @@ ip6_output( * Keep the length of the unfragmentable part for fragmentation. */ optlen = 0; - if (exthdrs.ip6e_hbh) + if (exthdrs.ip6e_hbh != NULL) optlen += exthdrs.ip6e_hbh->m_len; - if (exthdrs.ip6e_dest1) + if (exthdrs.ip6e_dest1 != NULL) optlen += exthdrs.ip6e_dest1->m_len; - if (exthdrs.ip6e_rthdr) + if (exthdrs.ip6e_rthdr != NULL) optlen += exthdrs.ip6e_rthdr->m_len; - unfragpartlen = optlen + sizeof(struct ip6_hdr); + unfragpartlen = optlen + sizeof (struct ip6_hdr); /* NOTE: we don't add AH/ESP length here. do that later. */ - if (exthdrs.ip6e_dest2) + if (exthdrs.ip6e_dest2 != NULL) optlen += exthdrs.ip6e_dest2->m_len; - - if (needipsec && - (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) != 0) { - in6_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen); - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA; - } - /* * If we need IPsec, or there is at least one extension header, * separate IP6 header from the payload. */ - if ((needipsec || optlen) && !hdrsplit) { + if (( +#if IPSEC + ip6obf.needipsec || +#endif /* IPSEC */ + optlen) && !ip6obf.hdrsplit) { if ((error = ip6_splithdr(m, &exthdrs)) != 0) { m = NULL; goto freehdrs; } m = exthdrs.ip6e_ip6; - hdrsplit++; + ip6obf.hdrsplit = TRUE; } /* adjust pointer */ @@ -538,26 +588,26 @@ ip6_output( /* adjust mbuf packet header length */ m->m_pkthdr.len += optlen; - plen = m->m_pkthdr.len - sizeof(*ip6); + plen = m->m_pkthdr.len - sizeof (*ip6); /* If this is a jumbo payload, insert a jumbo payload option. */ if (plen > IPV6_MAXPACKET) { - if (!hdrsplit) { + if (!ip6obf.hdrsplit) { if ((error = ip6_splithdr(m, &exthdrs)) != 0) { m = NULL; goto freehdrs; } m = exthdrs.ip6e_ip6; - hdrsplit++; + ip6obf.hdrsplit = TRUE; } /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0) goto freehdrs; ip6->ip6_plen = 0; - } else + } else { ip6->ip6_plen = htons(plen); - + } /* * Concatenate headers and fill in next header fields. * Here we have, on "m" @@ -568,107 +618,116 @@ ip6_output( * during the header composing process, "m" points to IPv6 header. * "mprev" points to an extension header prior to esp. */ - { - u_char *nexthdrp = &ip6->ip6_nxt; - struct mbuf *mprev = m; + nexthdrp = &ip6->ip6_nxt; + mprev = m; - /* - * we treat dest2 specially. this makes IPsec processing - * much easier. the goal here is to make mprev point the - * mbuf prior to dest2. - * - * result: IPv6 dest2 payload - * m and mprev will point to IPv6 header. - */ - if (exthdrs.ip6e_dest2) { - if (!hdrsplit) - panic("assumption failed: hdr not split"); - exthdrs.ip6e_dest2->m_next = m->m_next; - m->m_next = exthdrs.ip6e_dest2; - *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt; - ip6->ip6_nxt = IPPROTO_DSTOPTS; - } - -#define MAKE_CHAIN(m, mp, p, i)\ - do {\ - if (m) {\ - if (!hdrsplit) \ - panic("assumption failed: hdr not split"); \ - *mtod((m), u_char *) = *(p);\ - *(p) = (i);\ - p = mtod((m), u_char *);\ - (m)->m_next = (mp)->m_next;\ - (mp)->m_next = (m);\ - (mp) = (m);\ - }\ - } while (0) - /* - * result: IPv6 hbh dest1 rthdr dest2 payload - * m will point to IPv6 header. mprev will point to the - * extension header prior to dest2 (rthdr in the above case). - */ - MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, - nexthdrp, IPPROTO_HOPOPTS); - MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, - nexthdrp, IPPROTO_DSTOPTS); - MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, - nexthdrp, IPPROTO_ROUTING); - - if (!TAILQ_EMPTY(&ipv6_filters)) { - struct ipfilter *filter; - int seen = (inject_filter_ref == 0); - int fixscope = 0; - - if (im6o != NULL && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { - ippo->ippo_flags |= IPPOF_MCAST_OPTS; - IM6O_LOCK(im6o); - ippo->ippo_mcast_ifnet = im6o->im6o_multicast_ifp; - ippo->ippo_mcast_ttl = im6o->im6o_multicast_hlim; - ippo->ippo_mcast_loop = im6o->im6o_multicast_loop; - IM6O_UNLOCK(im6o); - } + /* + * we treat dest2 specially. this makes IPsec processing + * much easier. the goal here is to make mprev point the + * mbuf prior to dest2. + * + * result: IPv6 dest2 payload + * m and mprev will point to IPv6 header. + */ + if (exthdrs.ip6e_dest2 != NULL) { + if (!ip6obf.hdrsplit) { + panic("assumption failed: hdr not split"); + /* NOTREACHED */ + } + exthdrs.ip6e_dest2->m_next = m->m_next; + m->m_next = exthdrs.ip6e_dest2; + *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt; + ip6->ip6_nxt = IPPROTO_DSTOPTS; + } + +#define MAKE_CHAIN(m, mp, p, i) do { \ + if (m != NULL) { \ + if (!ip6obf.hdrsplit) { \ + panic("assumption failed: hdr not split"); \ + /* NOTREACHED */ \ + } \ + *mtod((m), u_char *) = *(p); \ + *(p) = (i); \ + p = mtod((m), u_char *); \ + (m)->m_next = (mp)->m_next; \ + (mp)->m_next = (m); \ + (mp) = (m); \ + } \ +} while (0) + /* + * result: IPv6 hbh dest1 rthdr dest2 payload + * m will point to IPv6 header. mprev will point to the + * extension header prior to dest2 (rthdr in the above case). + */ + MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS); + MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS); + MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING); - /* Hack: embed the scope_id in the destination */ - if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst) && - (ip6->ip6_dst.s6_addr16[1] == 0) && (ro != NULL)) { - fixscope = 1; - ip6->ip6_dst.s6_addr16[1] = htons(ro->ro_dst.sin6_scope_id); - } - { - ipf_ref(); - TAILQ_FOREACH(filter, &ipv6_filters, ipf_link) { - /* - * No need to proccess packet twice if we've - * already seen it - */ - if (seen == 0) { - if ((struct ipfilter *)inject_filter_ref == filter) - seen = 1; - } else if (filter->ipf_filter.ipf_output) { - errno_t result; - - result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo); - if (result == EJUSTRETURN) { - ipf_unref(); - goto done; - } - if (result != 0) { - ipf_unref(); - goto bad; - } - } +#undef MAKE_CHAIN + +#if IPSEC + if (ip6obf.needipsec && (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) + in6_delayed_cksum_offset(m, 0, optlen, nxt0); +#endif /* IPSEC */ + + if (!TAILQ_EMPTY(&ipv6_filters)) { + struct ipfilter *filter; + int seen = (inject_filter_ref == NULL); + int fixscope = 0; + + if (im6o != NULL && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + ippo->ippo_flags |= IPPOF_MCAST_OPTS; + IM6O_LOCK(im6o); + ippo->ippo_mcast_ifnet = im6o->im6o_multicast_ifp; + ippo->ippo_mcast_ttl = im6o->im6o_multicast_hlim; + ippo->ippo_mcast_loop = im6o->im6o_multicast_loop; + IM6O_UNLOCK(im6o); + } + + /* Hack: embed the scope_id in the destination */ + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst) && + (ip6->ip6_dst.s6_addr16[1] == 0) && (ro != NULL)) { + fixscope = 1; + ip6->ip6_dst.s6_addr16[1] = + htons(ro->ro_dst.sin6_scope_id); + } + + ipf_ref(); + TAILQ_FOREACH(filter, &ipv6_filters, ipf_link) { + /* + * Don't process packet twice if we've already seen it. + */ + if (seen == 0) { + if ((struct ipfilter *)inject_filter_ref == + filter) + seen = 1; + } else if (filter->ipf_filter.ipf_output != NULL) { + errno_t result; + + result = filter->ipf_filter.ipf_output( + filter->ipf_filter.cookie, + (mbuf_t *)&m, ippo); + if (result == EJUSTRETURN) { + ipf_unref(); + goto done; + } + if (result != 0) { + ipf_unref(); + goto bad; } - ipf_unref(); } - ip6 = mtod(m, struct ip6_hdr *); - /* Hack: cleanup embedded scope_id if we put it there */ - if (fixscope) - ip6->ip6_dst.s6_addr16[1] = 0; } + ipf_unref(); + + ip6 = mtod(m, struct ip6_hdr *); + /* Hack: cleanup embedded scope_id if we put it there */ + if (fixscope) + ip6->ip6_dst.s6_addr16[1] = 0; + } #if IPSEC - if (!needipsec) - goto skip_ipsec2; + if (ip6obf.needipsec) { + int segleft_org; /* * pointers after IPsec headers are not valid any more. @@ -677,19 +736,18 @@ ip6_output( */ exthdrs.ip6e_dest2 = NULL; - { - struct ip6_rthdr *rh = NULL; - int segleft_org = 0; - - if (exthdrs.ip6e_rthdr) { + if (exthdrs.ip6e_rthdr != NULL) { rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *); segleft_org = rh->ip6r_segleft; rh->ip6r_segleft = 0; + } else { + rh = NULL; + segleft_org = 0; } ipsec_state.m = m; - error = ipsec6_output_trans(&ipsec_state, nexthdrp, mprev, sp, flags, - &needipsectun); + error = ipsec6_output_trans(&ipsec_state, nexthdrp, mprev, + sp, flags, &needipsectun); m = ipsec_state.m; if (error) { /* mbuf is already reclaimed in ipsec6_output_trans. */ @@ -702,8 +760,9 @@ ip6_output( case ENOMEM: break; default: - printf("ip6_output (ipsec): error code %d\n", error); - /* fall through */ + printf("ip6_output (ipsec): error code %d\n", + error); + /* FALLTHRU */ case ENOENT: /* don't show these error codes to the user */ error = 0; @@ -711,66 +770,63 @@ ip6_output( } goto bad; } - if (exthdrs.ip6e_rthdr) { + if (exthdrs.ip6e_rthdr != NULL) { /* ah6_output doesn't modify mbuf chain */ rh->ip6r_segleft = segleft_org; } - } } -skip_ipsec2: -#endif +#endif /* IPSEC */ /* * If there is a routing header, replace the destination address field * with the first hop of the routing header. */ - if (exthdrs.ip6e_rthdr) { - struct ip6_rthdr *rh = - (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr, - struct ip6_rthdr *)); + if (exthdrs.ip6e_rthdr != NULL) { struct ip6_rthdr0 *rh0; struct in6_addr *addr; struct sockaddr_in6 sa; + rh = (struct ip6_rthdr *) + (mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *)); switch (rh->ip6r_type) { case IPV6_RTHDR_TYPE_0: - rh0 = (struct ip6_rthdr0 *)rh; - addr = (struct in6_addr *)(void *)(rh0 + 1); - - /* - * construct a sockaddr_in6 form of - * the first hop. - * - * XXX: we may not have enough - * information about its scope zone; - * there is no standard API to pass - * the information from the - * application. - */ - bzero(&sa, sizeof(sa)); - sa.sin6_family = AF_INET6; - sa.sin6_len = sizeof(sa); - sa.sin6_addr = addr[0]; - if ((error = sa6_embedscope(&sa, - ip6_use_defzone)) != 0) { - goto bad; - } - ip6->ip6_dst = sa.sin6_addr; - bcopy(&addr[1], &addr[0], sizeof(struct in6_addr) - * (rh0->ip6r0_segleft - 1)); - addr[rh0->ip6r0_segleft - 1] = finaldst; - /* XXX */ - in6_clearscope(addr + rh0->ip6r0_segleft - 1); - break; + rh0 = (struct ip6_rthdr0 *)rh; + addr = (struct in6_addr *)(void *)(rh0 + 1); + + /* + * construct a sockaddr_in6 form of + * the first hop. + * + * XXX: we may not have enough + * information about its scope zone; + * there is no standard API to pass + * the information from the + * application. + */ + bzero(&sa, sizeof (sa)); + sa.sin6_family = AF_INET6; + sa.sin6_len = sizeof (sa); + sa.sin6_addr = addr[0]; + if ((error = sa6_embedscope(&sa, + ip6_use_defzone)) != 0) { + goto bad; + } + ip6->ip6_dst = sa.sin6_addr; + bcopy(&addr[1], &addr[0], sizeof (struct in6_addr) * + (rh0->ip6r0_segleft - 1)); + addr[rh0->ip6r0_segleft - 1] = finaldst; + /* XXX */ + in6_clearscope(addr + rh0->ip6r0_segleft - 1); + break; default: /* is it possible? */ - error = EINVAL; - goto bad; + error = EINVAL; + goto bad; } } /* Source address validation */ if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && - (flags & IPV6_UNSPECSRC) == 0) { + !(flags & IPV6_UNSPECSRC)) { error = EOPNOTSUPP; ip6stat.ip6s_badscope++; goto bad; @@ -786,37 +842,40 @@ skip_ipsec2: /* * Route packet. */ - if (ro == 0) { + if (ro == NULL) { ro = &ip6route; - bzero((caddr_t)ro, sizeof(*ro)); + bzero((caddr_t)ro, sizeof (*ro)); } + VERIFY(ro_pmtu == NULL); /* must not get here if dummynet */ ro_pmtu = ro; - if (opt && opt->ip6po_rthdr) + if (opt != NULL && opt->ip6po_rthdr) ro = &opt->ip6po_route; - dst = (struct sockaddr_in6 *)&ro->ro_dst; + dst = SIN6(&ro->ro_dst); - if (ro && ro->ro_rt) + if (ro->ro_rt != NULL) RT_LOCK_ASSERT_NOTHELD(ro->ro_rt); /* * if specified, try to fill in the traffic class field. * do not override if a non-zero value is already set. * we check the diffserv field and the ecn field separately. */ - if (opt && opt->ip6po_tclass >= 0) { + if (opt != NULL && opt->ip6po_tclass >= 0) { int mask = 0; if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0) mask |= 0xfc; if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0) mask |= 0x03; - if (mask != 0) - ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20); + if (mask != 0) { + ip6->ip6_flow |= + htonl((opt->ip6po_tclass & mask) << 20); + } } /* fill in or override the hop limit field, if necessary. */ - if (opt && opt->ip6po_hlim != -1) + if (opt && opt->ip6po_hlim != -1) { ip6->ip6_hlim = opt->ip6po_hlim & 0xff; - else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + } else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { if (im6o != NULL) { IM6O_LOCK(im6o); ip6->ip6_hlim = im6o->im6o_multicast_hlim; @@ -834,24 +893,21 @@ skip_ipsec2: * below (since it uses this route as a hint) or during the * next transmit. */ - if (ro->ro_rt != NULL && (!(ro->ro_rt->rt_flags & RTF_UP) || - dst->sin6_family != AF_INET6 || - !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst) || - ro->ro_rt->generation_id != route_generation)) { - rtfree(ro->ro_rt); - ro->ro_rt = NULL; - } + if (ROUTE_UNUSABLE(ro) || dst->sin6_family != AF_INET6 || + !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst)) + ROUTE_RELEASE(ro); + if (ro->ro_rt == NULL) { - bzero(dst, sizeof(*dst)); + bzero(dst, sizeof (*dst)); dst->sin6_family = AF_INET6; - dst->sin6_len = sizeof(struct sockaddr_in6); + dst->sin6_len = sizeof (struct sockaddr_in6); dst->sin6_addr = ip6->ip6_dst; } #if IPSEC - if (needipsec && needipsectun) { + if (ip6obf.needipsec && needipsectun) { #if CONFIG_DTRACE struct ifnet *trace_ifp = (ifpp != NULL) ? (*ifpp) : NULL; -#endif +#endif /* CONFIG_DTRACE */ /* * All the extension headers will become inaccessible * (since they can be encrypted). @@ -860,25 +916,27 @@ skip_ipsec2: * * IPv6 [ESP|AH] IPv6 [extension headers] payload */ - bzero(&exthdrs, sizeof(exthdrs)); + bzero(&exthdrs, sizeof (exthdrs)); exthdrs.ip6e_ip6 = m; ipsec_state.m = m; - route_copyout(&ipsec_state.ro, (struct route *)ro, sizeof(ipsec_state.ro)); - ipsec_state.dst = (struct sockaddr *)dst; + route_copyout(&ipsec_state.ro, (struct route *)ro, + sizeof (ipsec_state.ro)); + ipsec_state.dst = SA(dst); - /* Added a trace here so that we can see packets inside a tunnel */ + /* So that we can see packets inside the tunnel */ DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL, - struct ip6_hdr *, ip6, struct ifnet *, trace_ifp, - struct ip *, NULL, struct ip6_hdr *, ip6); + struct ip6_hdr *, ip6, struct ifnet *, trace_ifp, + struct ip *, NULL, struct ip6_hdr *, ip6); error = ipsec6_output_tunnel(&ipsec_state, sp, flags); - if (ipsec_state.tunneled == 4) /* tunneled in IPv4 - packet is gone */ + /* tunneled in IPv4? packet is gone */ + if (ipsec_state.tunneled == 4) goto done; m = ipsec_state.m; ipsec_saved_route = ro; ro = (struct route_in6 *)&ipsec_state.ro; - dst = (struct sockaddr_in6 *)(void *)ipsec_state.dst; + dst = SIN6(ipsec_state.dst); if (error) { /* mbuf is already reclaimed in ipsec6_output_tunnel. */ m0 = m = NULL; @@ -891,8 +949,9 @@ skip_ipsec2: case ENOMEM: break; default: - printf("ip6_output (ipsec): error code %d\n", error); - /* fall through */ + printf("ip6_output (ipsec): error code %d\n", + error); + /* FALLTHRU */ case ENOENT: /* don't show these error codes to the user */ error = 0; @@ -901,11 +960,14 @@ skip_ipsec2: goto bad; } /* - * The packet has been encapsulated so the ifscope is no longer valid - * since it does not apply to the outer address: ignore the ifscope. + * The packet has been encapsulated so the ifscope + * is no longer valid since it does not apply to the + * outer address: ignore the ifscope. */ - ip6oa.ip6oa_boundif = IFSCOPE_NONE; - ip6oa.ip6oa_flags &= ~IP6OAF_BOUND_IF; + if (flags & IPV6_OUTARGS) { + ip6oa->ip6oa_boundif = IFSCOPE_NONE; + ip6oa->ip6oa_flags &= ~IP6OAF_BOUND_IF; + } if (opt != NULL && opt->ip6po_pktinfo != NULL) { if (opt->ip6po_pktinfo->ipi6_ifindex != IFSCOPE_NONE) opt->ip6po_pktinfo->ipi6_ifindex = IFSCOPE_NONE; @@ -923,23 +985,24 @@ skip_ipsec2: /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); - if (select_srcif) { - bzero(&src_sa, sizeof(src_sa)); + if (ip6obf.select_srcif) { + bzero(&src_sa, sizeof (src_sa)); src_sa.sin6_family = AF_INET6; - src_sa.sin6_len = sizeof(src_sa); + src_sa.sin6_len = sizeof (src_sa); src_sa.sin6_addr = ip6->ip6_src; } - bzero(&dst_sa, sizeof(dst_sa)); + bzero(&dst_sa, sizeof (dst_sa)); dst_sa.sin6_family = AF_INET6; - dst_sa.sin6_len = sizeof(dst_sa); + dst_sa.sin6_len = sizeof (dst_sa); dst_sa.sin6_addr = ip6->ip6_dst; /* * in6_selectroute() might return an ifp with its reference held * even in the error case, so make sure to release its reference. + * ip6oa may be NULL if IPV6_OUTARGS isn't set. */ - if ((error = in6_selectroute(select_srcif ? &src_sa : NULL, - &dst_sa, opt, im6o, ro, &ifp, &rt, 0, &ip6oa)) != 0) { + if ((error = in6_selectroute(ip6obf.select_srcif ? &src_sa : NULL, + &dst_sa, opt, im6o, &src_ia, ro, &ifp, &rt, 0, ip6oa)) != 0) { switch (error) { case EHOSTUNREACH: ip6stat.ip6s_noroute++; @@ -964,9 +1027,9 @@ skip_ipsec2: /* * then rt (for unicast) and ifp must be non-NULL valid values. */ - if ((flags & IPV6_FORWARDING) == 0) { + if (!(flags & IPV6_FORWARDING)) { /* XXX: the FORWARDING flag can be set for mrouting. */ - in6_ifstat_inc(ifp, ifs6_out_request); + in6_ifstat_inc_na(ifp, ifs6_out_request); } if (rt != NULL) { RT_LOCK(rt); @@ -979,8 +1042,9 @@ skip_ipsec2: /* * The outgoing interface must be in the zone of source and - * destination addresses. We should use ia_ifp to support the - * case of sending packets to an address of our own. + * destination addresses (except local/loopback). We should + * use ia_ifp to support the case of sending packets to an + * address of our own. */ if (ia != NULL && ia->ia_ifp) { ifnet_reference(ia->ia_ifp); /* for origifp */ @@ -994,50 +1058,78 @@ skip_ipsec2: ifnet_release(origifp); origifp = ifp; } - src0 = ip6->ip6_src; - if (in6_setscope(&src0, origifp, &zone)) - goto badscope; - bzero(&src_sa, sizeof(src_sa)); - src_sa.sin6_family = AF_INET6; - src_sa.sin6_len = sizeof(src_sa); - src_sa.sin6_addr = ip6->ip6_src; - if (sa6_recoverscope(&src_sa, TRUE) || zone != src_sa.sin6_scope_id) - goto badscope; - - dst0 = ip6->ip6_dst; - if (in6_setscope(&dst0, origifp, &zone)) - goto badscope; - /* re-initialize to be sure */ - bzero(&dst_sa, sizeof(dst_sa)); - dst_sa.sin6_family = AF_INET6; - dst_sa.sin6_len = sizeof(dst_sa); - dst_sa.sin6_addr = ip6->ip6_dst; - if (sa6_recoverscope(&dst_sa, TRUE) || zone != dst_sa.sin6_scope_id) { - goto badscope; - } - /* scope check is done. */ - goto routefound; + /* skip scope enforcements for local/loopback route */ + if (rt == NULL || !(rt->rt_ifp->if_flags & IFF_LOOPBACK)) { + struct in6_addr src0, dst0; + u_int32_t zone; - badscope: - ip6stat.ip6s_badscope++; - in6_ifstat_inc(origifp, ifs6_out_discard); - if (error == 0) - error = EHOSTUNREACH; /* XXX */ - goto bad; + src0 = ip6->ip6_src; + if (in6_setscope(&src0, origifp, &zone)) + goto badscope; + bzero(&src_sa, sizeof (src_sa)); + src_sa.sin6_family = AF_INET6; + src_sa.sin6_len = sizeof (src_sa); + src_sa.sin6_addr = ip6->ip6_src; + if ((sa6_recoverscope(&src_sa, TRUE) || + zone != src_sa.sin6_scope_id)) + goto badscope; + + dst0 = ip6->ip6_dst; + if ((in6_setscope(&dst0, origifp, &zone))) + goto badscope; + /* re-initialize to be sure */ + bzero(&dst_sa, sizeof (dst_sa)); + dst_sa.sin6_family = AF_INET6; + dst_sa.sin6_len = sizeof (dst_sa); + dst_sa.sin6_addr = ip6->ip6_dst; + if ((sa6_recoverscope(&dst_sa, TRUE) || + zone != dst_sa.sin6_scope_id)) + goto badscope; + + /* scope check is done. */ + goto routefound; + +badscope: + ip6stat.ip6s_badscope++; + in6_ifstat_inc(origifp, ifs6_out_discard); + if (error == 0) + error = EHOSTUNREACH; /* XXX */ + goto bad; + } - routefound: - if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { - if (opt && opt->ip6po_nextroute.ro_rt) { +routefound: + if (rt != NULL && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + if (opt != NULL && opt->ip6po_nextroute.ro_rt) { /* * The nexthop is explicitly specified by the * application. We assume the next hop is an IPv6 * address. */ - dst = (struct sockaddr_in6 *)(void *)opt->ip6po_nexthop; + dst = SIN6(opt->ip6po_nexthop); + } else if ((rt->rt_flags & RTF_GATEWAY)) { + dst = SIN6(rt->rt_gateway); + } + /* + * For packets destined to local/loopback, record the + * source the source interface (which owns the source + * address), as well as the output interface. This is + * needed to reconstruct the embedded zone for the + * link-local address case in ip6_input(). + */ + if (ia != NULL && (ifp->if_flags & IFF_LOOPBACK)) { + uint32_t srcidx; + + if (src_ia != NULL) + srcidx = src_ia->ia_ifp->if_index; + else if (ro->ro_srcia != NULL) + srcidx = ro->ro_srcia->ifa_ifp->if_index; + else + srcidx = 0; + + ip6_setsrcifaddr_info(m, srcidx, NULL); + ip6_setdstifaddr_info(m, 0, ia); } - else if ((rt->rt_flags & RTF_GATEWAY)) - dst = (struct sockaddr_in6 *)(void *)rt->rt_gateway; } if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { @@ -1046,13 +1138,12 @@ skip_ipsec2: struct in6_multi *in6m; m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST; - - in6_ifstat_inc(ifp, ifs6_out_mcast); + in6_ifstat_inc_na(ifp, ifs6_out_mcast); /* * Confirm that the outgoing interface supports multicast. */ - if ((ifp->if_flags & IFF_MULTICAST) == 0) { + if (!(ifp->if_flags & IFF_MULTICAST)) { ip6stat.ip6s_noroute++; in6_ifstat_inc(ifp, ifs6_out_discard); error = ENETUNREACH; @@ -1064,7 +1155,7 @@ skip_ipsec2: if (im6o != NULL) IM6O_LOCK(im6o); if (in6m != NULL && - (im6o == NULL || im6o->im6o_multicast_loop)) { + (im6o == NULL || im6o->im6o_multicast_loop)) { if (im6o != NULL) IM6O_UNLOCK(im6o); /* @@ -1072,7 +1163,7 @@ skip_ipsec2: * on the outgoing interface, and the caller did not * forbid loopback, loop back a copy. */ - ip6_mloopback(ifp, m, dst); + ip6_mloopback(NULL, ifp, m, dst, optlen, nxt0); } else { if (im6o != NULL) IM6O_UNLOCK(im6o); @@ -1089,7 +1180,7 @@ skip_ipsec2: * if necessary. */ #if MROUTING - if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) { + if (ip6_mrouter && !(flags & IPV6_FORWARDING)) { /* * XXX: ip6_mforward expects that rcvif is NULL * when it is called from the originating path. @@ -1105,7 +1196,7 @@ skip_ipsec2: goto done; } } -#endif +#endif /* MROUTING */ } if (in6m != NULL) IN6M_REMREF(in6m); @@ -1153,13 +1244,13 @@ skip_ipsec2: * See RFC 3542 for more details. */ if (mtu > IPV6_MMTU) { - if ((flags & IPV6_MINMTU)) + if ((flags & IPV6_MINMTU)) { mtu = IPV6_MMTU; - else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL) + } else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL) { mtu = IPV6_MMTU; - else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && - (opt == NULL || - opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) { + } else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && + (opt == NULL || + opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) { mtu = IPV6_MMTU; } } @@ -1175,7 +1266,7 @@ skip_ipsec2: /* * Check with the firewall... */ - if (ip6_fw_enable && ip6_fw_chk_ptr) { + if (ip6_fw_enable && ip6_fw_chk_ptr) { u_short port = 0; m->m_pkthdr.rcvif = NULL; /* XXX */ /* If ipfw says divert, we have to just drop packet */ @@ -1183,39 +1274,39 @@ skip_ipsec2: m_freem(m); goto done; } - if (!m) { + if (m == NULL) { error = EACCES; goto done; } } -#endif +#endif /* IPFW2 */ /* * If the outgoing packet contains a hop-by-hop options header, * it must be examined and processed even by the source node. * (RFC 2460, section 4.) */ - if (exthdrs.ip6e_hbh) { + if (exthdrs.ip6e_hbh != NULL) { struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *); u_int32_t dummy; /* XXX unused */ - + uint32_t oplen = 0; /* for ip6_process_hopopts() */ #if DIAGNOSTIC if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len) panic("ip6e_hbh is not continuous"); #endif /* - * XXX: if we have to send an ICMPv6 error to the sender, - * we need the M_LOOP flag since icmp6_error() expects - * the IPv6 and the hop-by-hop options header are - * continuous unless the flag is set. + * XXX: If we have to send an ICMPv6 error to the sender, + * we need the M_LOOP flag since icmp6_error() expects + * the IPv6 and the hop-by-hop options header are + * continuous unless the flag is set. */ m->m_flags |= M_LOOP; m->m_pkthdr.rcvif = ifp; if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1), - ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh), - &dummy, &plen) < 0) { + ((hbh->ip6h_len + 1) << 3) - sizeof (struct ip6_hbh), + &dummy, &oplen) < 0) { /* m was already freed at this point */ - error = EINVAL;/* better error? */ + error = EINVAL; /* better error? */ goto done; } m->m_flags &= ~M_LOOP; /* XXX */ @@ -1224,18 +1315,19 @@ skip_ipsec2: #if DUMMYNET check_with_pf: -#endif +#endif /* DUMMYNET */ #if PF if (PF_IS_ENABLED) { #if DUMMYNET /* - * TBD: Need to save opt->ip6po_flags for reinjection rdar://10434993 + * TODO: Need to save opt->ip6po_flags for reinjection + * rdar://10434993 */ args.fwa_m = m; args.fwa_oif = ifp; args.fwa_oflags = flags; - if ((flags & IPV6_OUTARGS)) - args.fwa_ip6oa = &ip6oa; + if (flags & IPV6_OUTARGS) + args.fwa_ip6oa = ip6oa; args.fwa_ro6 = ro; args.fwa_dst6 = dst; args.fwa_ro6_pmtu = ro_pmtu; @@ -1246,18 +1338,19 @@ check_with_pf: args.fwa_exthdrs = &exthdrs; /* Invoke outbound packet filter */ error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, &args); -#else +#else /* !DUMMYNET */ error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, NULL); -#endif /* DUMMYNET */ +#endif /* !DUMMYNET */ if (error != 0 || m == NULL) { /* * Note that if we ever handle packet chain, we will - * have to restore the linkage from the previous + * have to restore the linkage from the previous * packet to the next like in ip_outout_list() */ if (m != NULL) { - panic("%s: unexpected packet %p\n", __func__, m); + panic("%s: unexpected packet %p\n", + __func__, m); /* NOTREACHED */ } /* Already freed by callee */ @@ -1288,11 +1381,11 @@ check_with_pf: */ tlen = m->m_pkthdr.len; - if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) - dontfrag = 1; + if (opt != NULL && (opt->ip6po_flags & IP6PO_DONTFRAG)) + ip6obf.dontfrag = TRUE; else - dontfrag = 0; - if (dontfrag && alwaysfrag) { /* case 4 */ + ip6obf.dontfrag = FALSE; + if (ip6obf.dontfrag && alwaysfrag) { /* case 4 */ /* conflicting request - can't transmit */ error = EMSGSIZE; goto bad; @@ -1300,10 +1393,8 @@ check_with_pf: lck_rw_lock_shared(nd_if_rwlock); /* Access without acquiring nd_ifinfo lock for performance */ - ifmtu = IN6_LINKMTU(ifp); - lck_rw_done(nd_if_rwlock); - - if (dontfrag && tlen > ifmtu) { /* case 2-b */ + if (ip6obf.dontfrag && tlen > IN6_LINKMTU(ifp)) { /* case 2-b */ + lck_rw_done(nd_if_rwlock); /* * Even if the DONTFRAG option is specified, we cannot send the * packet when the data length is larger than the MTU of the @@ -1316,40 +1407,28 @@ check_with_pf: struct ip6ctlparam ip6cp; mtu32 = (u_int32_t)mtu; - bzero(&ip6cp, sizeof(ip6cp)); + bzero(&ip6cp, sizeof (ip6cp)); ip6cp.ip6c_cmdarg = (void *)&mtu32; - pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst, - (void *)&ip6cp); - + pfctlinput2(PRC_MSGSIZE, SA(&ro_pmtu->ro_dst), (void *)&ip6cp); error = EMSGSIZE; goto bad; + } else { + lck_rw_done(nd_if_rwlock); } /* * transmit packet without fragmentation */ - tso = (ifp->if_hwassist & IFNET_TSO_IPV6) && - (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6); - if (dontfrag || (!alwaysfrag && /* case 1-a and 2-a */ - (tlen <= mtu || tso || (ifp->if_hwassist & CSUM_FRAGMENT_IPV6)))) { - int sw_csum; - - ip6 = mtod(m, struct ip6_hdr *); + if (ip6obf.dontfrag || (!alwaysfrag && /* case 1-a and 2-a */ + (tlen <= mtu || TSO_IPV6_OK(ifp, m) || + (ifp->if_hwassist & CSUM_FRAGMENT_IPV6)))) { #ifdef IPSEC /* clean ipsec history once it goes out of the node */ ipsec_delaux(m); -#endif +#endif /* IPSEC */ - if (apple_hwcksum_tx == 0) /* Do not let HW handle cksum */ - sw_csum = m->m_pkthdr.csum_flags; - else - sw_csum = m->m_pkthdr.csum_flags & - ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist); + ip6_output_checksum(ifp, mtu, m, nxt0, tlen, optlen); - if ((sw_csum & CSUM_DELAY_IPV6_DATA) != 0) { - in6_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen); - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA; - } if (ro->ro_rt) RT_LOCK_ASSERT_NOTHELD(ro->ro_rt); error = nd6_output(ifp, origifp, m, dst, ro->ro_rt, adv); @@ -1359,7 +1438,12 @@ check_with_pf: /* * try to fragment the packet. case 1-b and 3 */ - if (mtu < IPV6_MMTU) { + if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) { + /* TSO and fragment aren't compatible */ + error = EMSGSIZE; + in6_ifstat_inc(ifp, ifs6_out_fragfail); + goto bad; + } else if (mtu < IPV6_MMTU) { /* path MTU cannot be less than IPV6_MMTU */ error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); @@ -1384,7 +1468,7 @@ check_with_pf: if (mtu > IPV6_MAXPACKET) mtu = IPV6_MAXPACKET; - len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7; + len = (mtu - hlen - sizeof (struct ip6_frag)) & ~7; if (len < 8) { error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); @@ -1397,13 +1481,13 @@ check_with_pf: * Change the next header field of the last header in the * unfragmentable part. */ - if (exthdrs.ip6e_rthdr) { + if (exthdrs.ip6e_rthdr != NULL) { nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *); *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT; - } else if (exthdrs.ip6e_dest1) { + } else if (exthdrs.ip6e_dest1 != NULL) { nextproto = *mtod(exthdrs.ip6e_dest1, u_char *); *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT; - } else if (exthdrs.ip6e_hbh) { + } else if (exthdrs.ip6e_hbh != NULL) { nextproto = *mtod(exthdrs.ip6e_hbh, u_char *); *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT; } else { @@ -1411,10 +1495,8 @@ check_with_pf: ip6->ip6_nxt = IPPROTO_FRAGMENT; } - if ((m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) != 0) { - in6_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen); - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA; - } + if (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) + in6_delayed_cksum_offset(m, 0, optlen, nxt0); /* * Loop through length of segment after first fragment, @@ -1423,8 +1505,10 @@ check_with_pf: */ m0 = m; for (off = hlen; off < tlen; off += len) { + struct ip6_hdr *mhip6; + MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */ - if (!m) { + if (m == NULL) { error = ENOBUFS; ip6stat.ip6s_odropped++; goto sendorfree; @@ -1436,9 +1520,9 @@ check_with_pf: m->m_data += max_linkhdr; mhip6 = mtod(m, struct ip6_hdr *); *mhip6 = *ip6; - m->m_len = sizeof(*mhip6); - error = ip6_insertfraghdr(m0, m, hlen, &ip6f); - if (error) { + m->m_len = sizeof (*mhip6); + error = ip6_insertfraghdr(m0, m, hlen, &ip6f); + if (error) { ip6stat.ip6s_odropped++; goto sendorfree; } @@ -1448,26 +1532,25 @@ check_with_pf: else ip6f->ip6f_offlg |= IP6F_MORE_FRAG; mhip6->ip6_plen = htons((u_short)(len + hlen + - sizeof(*ip6f) - - sizeof(struct ip6_hdr))); - if ((m_frgpart = m_copy(m0, off, len)) == 0) { + sizeof (*ip6f) - sizeof (struct ip6_hdr))); + if ((m_frgpart = m_copy(m0, off, len)) == NULL) { error = ENOBUFS; ip6stat.ip6s_odropped++; goto sendorfree; } m_cat(m, m_frgpart); - m->m_pkthdr.len = len + hlen + sizeof(*ip6f); - m->m_pkthdr.rcvif = 0; - m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id; + m->m_pkthdr.len = len + hlen + sizeof (*ip6f); + m->m_pkthdr.rcvif = NULL; + M_COPY_CLASSIFIER(m, m0); M_COPY_PFTAG(m, m0); - m_set_service_class(m, m0->m_pkthdr.svc); -#ifdef __darwin8_notyet +#ifdef notyet #if CONFIG_MACF_NET mac_create_fragment(m0, m); -#endif -#endif +#endif /* CONFIG_MACF_NET */ +#endif /* notyet */ + ip6f->ip6f_reserved = 0; ip6f->ip6f_ident = id; ip6f->ip6f_nxt = nextproto; @@ -1483,55 +1566,42 @@ check_with_pf: */ sendorfree: m = m0->m_nextpkt; - m0->m_nextpkt = 0; + m0->m_nextpkt = NULL; m_freem(m0); - for (m0 = m; m; m = m0) { + for (m0 = m; m != NULL; m = m0) { m0 = m->m_nextpkt; - m->m_nextpkt = 0; + m->m_nextpkt = NULL; if (error == 0) { - /* Record statistics for this interface address. */ - if (ia) { -#ifndef __APPLE__ - ia->ia_ifa.if_opackets++; - ia->ia_ifa.if_obytes += m->m_pkthdr.len; -#endif - } #if IPSEC /* clean ipsec history once it goes out of the node */ ipsec_delaux(m); -#endif +#endif /* IPSEC */ error = nd6_output(ifp, origifp, m, dst, ro->ro_rt, adv); - - } else + } else { m_freem(m); + } } if (error == 0) ip6stat.ip6s_fragmented++; done: + ROUTE_RELEASE(&ip6route); #if IPSEC - if (ipsec_saved_route) { - ro = ipsec_saved_route; - if (ipsec_state.ro.ro_rt) { - rtfree(ipsec_state.ro.ro_rt); - } - } -#endif /* IPSEC */ - if (ro == &ip6route && ro->ro_rt) { /* brace necessary for rtfree */ - rtfree(ro->ro_rt); - } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) { - rtfree(ro_pmtu->ro_rt); - } - -#if IPSEC + ROUTE_RELEASE(&ipsec_state.ro); if (sp != NULL) key_freesp(sp, KEY_SADB_UNLOCKED); #endif /* IPSEC */ +#if DUMMYNET + ROUTE_RELEASE(&saved_route); + ROUTE_RELEASE(&saved_ro_pmtu); +#endif /* DUMMYNET */ if (ia != NULL) IFA_REMREF(&ia->ia_ifa); + if (src_ia != NULL) + IFA_REMREF(&src_ia->ia_ifa); if (ifp != NULL) ifnet_release(ifp); if (origifp != NULL) @@ -1539,81 +1609,225 @@ done: return (error); freehdrs: - m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */ - m_freem(exthdrs.ip6e_dest1); - m_freem(exthdrs.ip6e_rthdr); - m_freem(exthdrs.ip6e_dest2); - /* fall through */ + if (exthdrs.ip6e_hbh != NULL) + m_freem(exthdrs.ip6e_hbh); + if (exthdrs.ip6e_dest1 != NULL) + m_freem(exthdrs.ip6e_dest1); + if (exthdrs.ip6e_rthdr != NULL) + m_freem(exthdrs.ip6e_rthdr); + if (exthdrs.ip6e_dest2 != NULL) + m_freem(exthdrs.ip6e_dest2); + /* FALLTHRU */ bad: - m_freem(m); + if (m != NULL) + m_freem(m); goto done; + +#undef ipf_pktopts +#undef exthdrs +#undef ip6route +#undef ipsec_state +#undef saved_route +#undef saved_ro_pmtu +#undef args } static int -ip6_copyexthdr(mp, hdr, hlen) - struct mbuf **mp; - caddr_t hdr; - int hlen; +ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen) { struct mbuf *m; if (hlen > MCLBYTES) - return(ENOBUFS); /* XXX */ + return (ENOBUFS); /* XXX */ MGET(m, M_DONTWAIT, MT_DATA); - if (!m) - return(ENOBUFS); + if (m == NULL) + return (ENOBUFS); if (hlen > MLEN) { MCLGET(m, M_DONTWAIT); - if ((m->m_flags & M_EXT) == 0) { + if (!(m->m_flags & M_EXT)) { m_free(m); return (ENOBUFS); } } m->m_len = hlen; - if (hdr) + if (hdr != NULL) bcopy(hdr, mtod(m, caddr_t), hlen); *mp = m; return (0); } +static void +ip6_out_cksum_stats(int proto, u_int32_t len) +{ + switch (proto) { + case IPPROTO_TCP: + tcp_out6_cksum_stats(len); + break; + case IPPROTO_UDP: + udp_out6_cksum_stats(len); + break; + default: + /* keep only TCP or UDP stats for now */ + break; + } +} + /* - * Process a delayed payload checksum calculation. + * Process a delayed payload checksum calculation (outbound path.) + * + * hoff is the number of bytes beyond the mbuf data pointer which + * points to the IPv6 header. optlen is the number of bytes, if any, + * between the end of IPv6 header and the beginning of the ULP payload + * header, which represents the extension headers. If optlen is less + * than zero, this routine will bail when it detects extension headers. + * + * Returns a bitmask representing all the work done in software. */ -void -in6_delayed_cksum(struct mbuf *m, uint16_t offset) +uint32_t +in6_finalize_cksum(struct mbuf *m, uint32_t hoff, int32_t optlen, + int32_t nxt0, uint32_t csum_flags) { - uint16_t csum; + unsigned char buf[sizeof (struct ip6_hdr)] __attribute__((aligned(8))); + struct ip6_hdr *ip6; + uint32_t offset, mlen, hlen, olen, sw_csum; + uint16_t csum, ulpoff, plen; + uint8_t nxt; - csum = in6_cksum(m, 0, offset, m->m_pkthdr.len - offset); - if (csum == 0 && (m->m_pkthdr.csum_flags & CSUM_UDPIPV6) != 0) { - csum = 0xffff; + _CASSERT(sizeof (csum) == sizeof (uint16_t)); + VERIFY(m->m_flags & M_PKTHDR); + + sw_csum = (csum_flags & m->m_pkthdr.csum_flags); + + if ((sw_csum &= CSUM_DELAY_IPV6_DATA) == 0) + goto done; + + mlen = m->m_pkthdr.len; /* total mbuf len */ + hlen = sizeof (*ip6); /* IPv6 header len */ + + /* sanity check (need at least IPv6 header) */ + if (mlen < (hoff + hlen)) { + panic("%s: mbuf %p pkt len (%u) < hoff+ip6_hdr " + "(%u+%u)\n", __func__, m, mlen, hoff, hlen); + /* NOTREACHED */ + } + + /* + * In case the IPv6 header is not contiguous, or not 32-bit + * aligned, copy it to a local buffer. + */ + if ((hoff + hlen) > m->m_len || + !IP6_HDR_ALIGNED_P(mtod(m, caddr_t) + hoff)) { + m_copydata(m, hoff, hlen, (caddr_t)buf); + ip6 = (struct ip6_hdr *)(void *)buf; + } else { + ip6 = (struct ip6_hdr *)(void *)(m->m_data + hoff); + } + + nxt = ip6->ip6_nxt; + plen = ntohs(ip6->ip6_plen); + if (plen != (mlen - (hoff + hlen))) { + plen = OSSwapInt16(plen); + if (plen != (mlen - (hoff + hlen))) { + /* Don't complain for jumbograms */ + if (plen != 0 || nxt != IPPROTO_HOPOPTS) { + printf("%s: mbuf 0x%llx proto %d IPv6 " + "plen %d (%x) [swapped %d (%x)] doesn't " + "match actual packet length; %d is used " + "instead\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(m), nxt, + ip6->ip6_plen, ip6->ip6_plen, plen, plen, + (mlen - (hoff + hlen))); + } + plen = mlen - (hoff + hlen); + } + } + + if (optlen < 0) { + /* next header isn't TCP/UDP and we don't know optlen, bail */ + if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) { + sw_csum = 0; + goto done; + } + olen = 0; + } else { + /* caller supplied the original transport number; use it */ + if (nxt0 >= 0) + nxt = nxt0; + olen = optlen; } - offset += (m->m_pkthdr.csum_data & 0xffff); - if ((offset + sizeof(csum)) > m->m_len) { - m_copyback(m, offset, sizeof(csum), &csum); - } else if (IP6_HDR_ALIGNED_P(mtod(m, char *))) { + offset = hoff + hlen + olen; /* ULP header */ + + /* sanity check */ + if (mlen < offset) { + panic("%s: mbuf %p pkt len (%u) < hoff+ip6_hdr+ext_hdr " + "(%u+%u+%u)\n", __func__, m, mlen, hoff, hlen, olen); + /* NOTREACHED */ + } + + /* + * offset is added to the lower 16-bit value of csum_data, + * which is expected to contain the ULP offset; therefore + * CSUM_PARTIAL offset adjustment must be undone. + */ + if ((m->m_pkthdr.csum_flags & (CSUM_PARTIAL|CSUM_DATA_VALID)) == + (CSUM_PARTIAL|CSUM_DATA_VALID)) { + /* + * Get back the original ULP offset (this will + * undo the CSUM_PARTIAL logic in ip6_output.) + */ + m->m_pkthdr.csum_data = (m->m_pkthdr.csum_tx_stuff - + m->m_pkthdr.csum_tx_start); + } + + ulpoff = (m->m_pkthdr.csum_data & 0xffff); /* ULP csum offset */ + + if (mlen < (ulpoff + sizeof (csum))) { + panic("%s: mbuf %p pkt len (%u) proto %d invalid ULP " + "cksum offset (%u) cksum flags 0x%x\n", __func__, + m, mlen, nxt, ulpoff, m->m_pkthdr.csum_flags); + /* NOTREACHED */ + } + + csum = inet6_cksum(m, 0, offset, plen - olen); + + /* Update stats */ + ip6_out_cksum_stats(nxt, plen - olen); + + /* RFC1122 4.1.3.4 */ + if (csum == 0 && (m->m_pkthdr.csum_flags & CSUM_UDPIPV6)) + csum = 0xffff; + + /* Insert the checksum in the ULP csum field */ + offset += ulpoff; + if ((offset + sizeof (csum)) > m->m_len) { + m_copyback(m, offset, sizeof (csum), &csum); + } else if (IP6_HDR_ALIGNED_P(mtod(m, char *) + hoff)) { *(uint16_t *)(void *)(mtod(m, char *) + offset) = csum; } else { bcopy(&csum, (mtod(m, char *) + offset), sizeof (csum)); } + m->m_pkthdr.csum_flags &= + ~(CSUM_DELAY_IPV6_DATA | CSUM_DATA_VALID | CSUM_PARTIAL); + +done: + return (sw_csum); } + /* * Insert jumbo payload option. */ static int -ip6_insert_jumboopt(exthdrs, plen) - struct ip6_exthdrs *exthdrs; - u_int32_t plen; +ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen) { struct mbuf *mopt; u_char *optbuf; u_int32_t v; -#define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */ +#define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */ /* * If there is no hop-by-hop options header, allocate new one. @@ -1621,9 +1835,9 @@ ip6_insert_jumboopt(exthdrs, plen) * jumbo payload option, allocate a cluster to store the whole options. * Otherwise, use it to store the options. */ - if (exthdrs->ip6e_hbh == 0) { + if (exthdrs->ip6e_hbh == NULL) { MGET(mopt, M_DONTWAIT, MT_DATA); - if (mopt == 0) + if (mopt == NULL) return (ENOBUFS); mopt->m_len = JUMBOOPTLEN; optbuf = mtod(mopt, u_char *); @@ -1655,14 +1869,14 @@ ip6_insert_jumboopt(exthdrs, plen) * at this point. */ MGET(n, M_DONTWAIT, MT_DATA); - if (n) { + if (n != NULL) { MCLGET(n, M_DONTWAIT); - if ((n->m_flags & M_EXT) == 0) { + if (!(n->m_flags & M_EXT)) { m_freem(n); n = NULL; } } - if (!n) + if (n == NULL) return (ENOBUFS); n->m_len = oldoptlen + JUMBOOPTLEN; bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t), @@ -1689,7 +1903,7 @@ ip6_insert_jumboopt(exthdrs, plen) optbuf[2] = IP6OPT_JUMBO; optbuf[3] = 4; v = (u_int32_t)htonl(plen + JUMBOOPTLEN); - bcopy(&v, &optbuf[4], sizeof(u_int32_t)); + bcopy(&v, &optbuf[4], sizeof (u_int32_t)); /* finally, adjust the packet header length */ exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN; @@ -1702,17 +1916,15 @@ ip6_insert_jumboopt(exthdrs, plen) * Insert fragment header and copy unfragmentable header portions. */ static int -ip6_insertfraghdr(m0, m, hlen, frghdrp) - struct mbuf *m0, *m; - int hlen; - struct ip6_frag **frghdrp; +ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen, + struct ip6_frag **frghdrp) { struct mbuf *n, *mlast; - if (hlen > sizeof(struct ip6_hdr)) { - n = m_copym(m0, sizeof(struct ip6_hdr), - hlen - sizeof(struct ip6_hdr), M_DONTWAIT); - if (n == 0) + if (hlen > sizeof (struct ip6_hdr)) { + n = m_copym(m0, sizeof (struct ip6_hdr), + hlen - sizeof (struct ip6_hdr), M_DONTWAIT); + if (n == NULL) return (ENOBUFS); m->m_next = n; } else @@ -1722,21 +1934,21 @@ ip6_insertfraghdr(m0, m, hlen, frghdrp) for (mlast = n; mlast->m_next; mlast = mlast->m_next) ; - if ((mlast->m_flags & M_EXT) == 0 && - M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) { - /* use the trailing space of the last mbuf for the fragment hdr */ + if (!(mlast->m_flags & M_EXT) && + M_TRAILINGSPACE(mlast) >= sizeof (struct ip6_frag)) { + /* use the trailing space of the last mbuf for the frag hdr */ *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) + mlast->m_len); - mlast->m_len += sizeof(struct ip6_frag); - m->m_pkthdr.len += sizeof(struct ip6_frag); + mlast->m_len += sizeof (struct ip6_frag); + m->m_pkthdr.len += sizeof (struct ip6_frag); } else { /* allocate a new mbuf for the fragment header */ struct mbuf *mfrg; MGET(mfrg, M_DONTWAIT, MT_DATA); - if (mfrg == 0) + if (mfrg == NULL) return (ENOBUFS); - mfrg->m_len = sizeof(struct ip6_frag); + mfrg->m_len = sizeof (struct ip6_frag); *frghdrp = mtod(mfrg, struct ip6_frag *); mlast->m_next = mfrg; } @@ -1744,31 +1956,26 @@ ip6_insertfraghdr(m0, m, hlen, frghdrp) return (0); } -extern int load_ipfw(void); static int ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, struct ifnet *ifp, struct in6_addr *dst, u_int32_t *mtup, - int *alwaysfragp) + boolean_t *alwaysfragp) { u_int32_t mtu = 0; - int alwaysfrag = 0; + boolean_t alwaysfrag = FALSE; int error = 0; if (ro_pmtu != ro) { /* The first hop and the final destination may differ. */ - struct sockaddr_in6 *sa6_dst = - (struct sockaddr_in6 *)&ro_pmtu->ro_dst; - if (ro_pmtu->ro_rt && - ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 || - ro_pmtu->ro_rt->generation_id != route_generation || - !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) { - rtfree(ro_pmtu->ro_rt); - ro_pmtu->ro_rt = (struct rtentry *)NULL; - } + struct sockaddr_in6 *sa6_dst = SIN6(&ro_pmtu->ro_dst); + if (ROUTE_UNUSABLE(ro_pmtu) || + !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst)) + ROUTE_RELEASE(ro_pmtu); + if (ro_pmtu->ro_rt == NULL) { - bzero(sa6_dst, sizeof(*sa6_dst)); + bzero(sa6_dst, sizeof (*sa6_dst)); sa6_dst->sin6_family = AF_INET6; - sa6_dst->sin6_len = sizeof(struct sockaddr_in6); + sa6_dst->sin6_len = sizeof (struct sockaddr_in6); sa6_dst->sin6_addr = *dst; rtalloc_scoped((struct route *)ro_pmtu, @@ -1776,7 +1983,6 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, } } - if (ro_pmtu->ro_rt != NULL) { u_int32_t ifmtu; @@ -1785,7 +1991,11 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, ifmtu = IN6_LINKMTU(ifp); lck_rw_done(nd_if_rwlock); - RT_LOCK_SPIN(ro_pmtu->ro_rt); + /* + * Access rmx_mtu without holding the route entry lock, + * for performance; this isn't something that changes + * often, so optimize. + */ mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu; if (mtu > ifmtu || mtu == 0) { /* @@ -1799,11 +2009,10 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, * if MTU on the route is 0, we need to fix the MTU. * this case happens with path MTU discovery timeouts. */ - mtu = ifmtu; - if ((ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0) - ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */ - } - else if (mtu < IPV6_MMTU) { + mtu = ifmtu; + if (!(ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU)) + ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */ + } else if (mtu < IPV6_MMTU) { /* * RFC2460 section 5, last paragraph: * if we record ICMPv6 too big message with @@ -1812,22 +2021,22 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, * (fragment header is needed regardless from the * packet size, for translators to identify packets) */ - alwaysfrag = 1; + alwaysfrag = TRUE; mtu = IPV6_MMTU; - } - RT_UNLOCK(ro_pmtu->ro_rt); + } } else { if (ifp) { lck_rw_lock_shared(nd_if_rwlock); /* Don't hold nd_ifinfo lock for performance */ mtu = IN6_LINKMTU(ifp); lck_rw_done(nd_if_rwlock); - } else + } else { error = EHOSTUNREACH; /* XXX */ + } } *mtup = mtu; - if (alwaysfragp) + if (alwaysfragp != NULL) *alwaysfragp = alwaysfrag; return (error); } @@ -1836,9 +2045,7 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro, * IP6 socket option processing. */ int -ip6_ctloutput(so, sopt) - struct socket *so; - struct sockopt *sopt; +ip6_ctloutput(struct socket *so, struct sockopt *sopt) { int optdatalen, uproto; void *optdata; @@ -1849,37 +2056,33 @@ ip6_ctloutput(so, sopt) int optlen = 0; struct proc *p; - if (sopt == NULL) { - panic("ip6_ctloutput: arg soopt is NULL"); - /* NOTREACHED */ - } + VERIFY(sopt != NULL); + level = sopt->sopt_level; op = sopt->sopt_dir; optname = sopt->sopt_name; optlen = sopt->sopt_valsize; p = sopt->sopt_p; - uproto = (int)so->so_proto->pr_protocol; + uproto = (int)SOCK_PROTO(so); privileged = (proc_suser(p) == 0); if (level == IPPROTO_IPV6) { switch (op) { - case SOPT_SET: switch (optname) { - case IPV6_2292PKTOPTIONS: - { + case IPV6_2292PKTOPTIONS: { struct mbuf *m; - error = soopt_getm(sopt, &m); /* XXX */ + error = soopt_getm(sopt, &m); if (error != 0) break; - error = soopt_mcopyin(sopt, m); /* XXX */ + error = soopt_mcopyin(sopt, m); if (error != 0) break; error = ip6_pcbopts(&in6p->in6p_outputopts, - m, so, sopt); - m_freem(m); /* XXX */ + m, so, sopt); + m_freem(m); break; } @@ -1899,12 +2102,11 @@ ip6_ctloutput(so, sopt) case IPV6_RECVHOPOPTS: case IPV6_RECVDSTOPTS: case IPV6_RECVRTHDRDSTOPTS: - if (!privileged) - break; + if (!privileged) + break; /* FALLTHROUGH */ case IPV6_UNICAST_HOPS: case IPV6_HOPLIMIT: - case IPV6_RECVPKTINFO: case IPV6_RECVHOPLIMIT: case IPV6_RECVRTHDR: @@ -1912,43 +2114,45 @@ ip6_ctloutput(so, sopt) case IPV6_RECVTCLASS: case IPV6_V6ONLY: case IPV6_AUTOFLOWLABEL: - if (optlen != sizeof(int)) { + if (optlen != sizeof (int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, - sizeof optval, sizeof optval); + sizeof (optval), sizeof (optval)); if (error) break; - switch (optname) { + switch (optname) { case IPV6_UNICAST_HOPS: - if (optval < -1 || optval >= 256) + if (optval < -1 || optval >= 256) { error = EINVAL; - else { + } else { /* -1 = kernel default */ in6p->in6p_hops = optval; - if ((in6p->inp_vflag & - INP_IPV4) != 0) - in6p->inp_ip_ttl = optval; + if (in6p->inp_vflag & + INP_IPV4) { + in6p->inp_ip_ttl = + optval; + } } break; -#define OPTSET(bit) \ -do { \ - if (optval) \ - in6p->inp_flags |= (bit); \ - else \ - in6p->inp_flags &= ~(bit); \ -} while (/*CONSTCOND*/ 0) -#define OPTSET2292(bit) \ -do { \ - in6p->inp_flags |= IN6P_RFC2292; \ - if (optval) \ - in6p->inp_flags |= (bit); \ - else \ - in6p->inp_flags &= ~(bit); \ -} while (/*CONSTCOND*/ 0) -#define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0) +#define OPTSET(bit) do { \ + if (optval) \ + in6p->inp_flags |= (bit); \ + else \ + in6p->inp_flags &= ~(bit); \ +} while (0) + +#define OPTSET2292(bit) do { \ + in6p->inp_flags |= IN6P_RFC2292; \ + if (optval) \ + in6p->inp_flags |= (bit); \ + else \ + in6p->inp_flags &= ~(bit); \ +} while (0) + +#define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0) case IPV6_RECVPKTINFO: /* cannot mix with RFC2292 */ @@ -1959,8 +2163,7 @@ do { \ OPTSET(IN6P_PKTINFO); break; - case IPV6_HOPLIMIT: - { + case IPV6_HOPLIMIT: { struct ip6_pktopts **optp; /* cannot mix with RFC2292 */ @@ -1970,7 +2173,7 @@ do { \ } optp = &in6p->in6p_outputopts; error = ip6_pcbopt(IPV6_HOPLIMIT, - (u_char *)&optval, sizeof(optval), + (u_char *)&optval, sizeof (optval), optp, uproto); break; } @@ -2038,7 +2241,8 @@ do { \ * see ipng mailing list, Jun 22 2001. */ if (in6p->inp_lport || - !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) { + !IN6_IS_ADDR_UNSPECIFIED( + &in6p->in6p_laddr)) { error = EINVAL; break; } @@ -2048,10 +2252,12 @@ do { \ else in6p->inp_vflag |= INP_IPV4; break; + case IPV6_RECVTCLASS: /* we can mix with RFC2292 */ OPTSET(IN6P_TCLASS); break; + case IPV6_AUTOFLOWLABEL: OPTSET(IN6P_AUTOFLOWLABEL); break; @@ -2062,23 +2268,23 @@ do { \ case IPV6_TCLASS: case IPV6_DONTFRAG: case IPV6_USE_MIN_MTU: - case IPV6_PREFER_TEMPADDR: - if (optlen != sizeof(optval)) { + case IPV6_PREFER_TEMPADDR: { + struct ip6_pktopts **optp; + + if (optlen != sizeof (optval)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, - sizeof optval, sizeof optval); + sizeof (optval), sizeof (optval)); if (error) break; - { - struct ip6_pktopts **optp; - optp = &in6p->in6p_outputopts; - error = ip6_pcbopt(optname, - (u_char *)&optval, sizeof(optval), - optp, uproto); - break; - } + + optp = &in6p->in6p_outputopts; + error = ip6_pcbopt(optname, (u_char *)&optval, + sizeof (optval), optp, uproto); + break; + } case IPV6_2292PKTINFO: case IPV6_2292HOPLIMIT: @@ -2086,12 +2292,12 @@ do { \ case IPV6_2292DSTOPTS: case IPV6_2292RTHDR: /* RFC 2292 */ - if (optlen != sizeof(int)) { + if (optlen != sizeof (int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, - sizeof optval, sizeof optval); + sizeof (optval), sizeof (optval)); if (error) break; switch (optname) { @@ -2107,26 +2313,27 @@ do { \ * See comments for IPV6_RECVHOPOPTS. */ if (!privileged) - return(EPERM); + return (EPERM); OPTSET2292(IN6P_HOPOPTS); break; case IPV6_2292DSTOPTS: if (!privileged) - return(EPERM); - OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */ + return (EPERM); + OPTSET2292(IN6P_DSTOPTS| + IN6P_RTHDRDSTOPTS); /* XXX */ break; case IPV6_2292RTHDR: OPTSET2292(IN6P_RTHDR); break; } break; + case IPV6_3542PKTINFO: case IPV6_3542HOPOPTS: case IPV6_3542RTHDR: case IPV6_3542DSTOPTS: case IPV6_RTHDRDSTOPTS: - case IPV6_3542NEXTHOP: - { + case IPV6_3542NEXTHOP: { struct ip6_pktopts **optp; /* new advanced API (RFC3542) */ struct mbuf *m; @@ -2140,18 +2347,16 @@ do { \ if (error != 0) break; error = soopt_mcopyin(sopt, m); - if (error) { - m_freem(m); + if (error != 0) break; - } + optp = &in6p->in6p_outputopts; - error = ip6_pcbopt(optname, mtod(m, u_char *), - m->m_len, optp, uproto); + error = ip6_pcbopt(optname, mtod(m, u_char *), + m->m_len, optp, uproto); m_freem(m); break; } #undef OPTSET - case IPV6_MULTICAST_IF: case IPV6_MULTICAST_HOPS: case IPV6_MULTICAST_LOOP: @@ -2169,7 +2374,7 @@ do { \ case IPV6_PORTRANGE: error = sooptcopyin(sopt, &optval, - sizeof optval, sizeof optval); + sizeof (optval), sizeof (optval)); if (error) break; @@ -2194,45 +2399,39 @@ do { \ break; } break; - #if IPSEC - case IPV6_IPSEC_POLICY: - { + case IPV6_IPSEC_POLICY: { caddr_t req = NULL; size_t len = 0; struct mbuf *m; - if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ + if ((error = soopt_getm(sopt, &m)) != 0) break; - if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ + if ((error = soopt_mcopyin(sopt, m)) != 0) break; - if (m) { - req = mtod(m, caddr_t); - len = m->m_len; - } + + req = mtod(m, caddr_t); + len = m->m_len; error = ipsec6_set_policy(in6p, optname, req, - len, privileged); + len, privileged); m_freem(m); - } break; -#endif /* KAME IPSEC */ - + } +#endif /* IPSEC */ #if IPFIREWALL case IPV6_FW_ADD: case IPV6_FW_DEL: case IPV6_FW_FLUSH: - case IPV6_FW_ZERO: - { + case IPV6_FW_ZERO: { if (ip6_fw_ctl_ptr == NULL) load_ip6fw(); if (ip6_fw_ctl_ptr != NULL) error = (*ip6_fw_ctl_ptr)(sopt); else - return ENOPROTOOPT; - } + error = ENOPROTOOPT; break; + } #endif /* IPFIREWALL */ - /* * IPv6 variant of IP_BOUND_IF; for details see * comments on IP_BOUND_IF in ip_ctloutput(). @@ -2250,7 +2449,7 @@ do { \ if (error) break; - error = inp_bindif(in6p, optval); + error = inp_bindif(in6p, optval, NULL); break; case IPV6_NO_IFT_CELLULAR: @@ -2266,7 +2465,15 @@ do { \ if (error) break; - error = inp_nocellular(in6p, optval); + /* once set, it cannot be unset */ + if (!optval && + (in6p->inp_flags & INP_NO_IFT_CELLULAR)) { + error = EINVAL; + break; + } + + error = so_set_restrictions(so, + SO_RESTRICT_DENY_CELLULAR); break; case IPV6_OUT_IF: @@ -2304,7 +2511,6 @@ do { \ case IPV6_RECVHOPLIMIT: case IPV6_RECVRTHDR: case IPV6_RECVPATHMTU: - case IPV6_V6ONLY: case IPV6_PORTRANGE: case IPV6_RECVTCLASS: @@ -2347,8 +2553,7 @@ do { \ optval = OPTBIT(IN6P_IPV6_V6ONLY); break; - case IPV6_PORTRANGE: - { + case IPV6_PORTRANGE: { int flags; flags = in6p->inp_flags; if (flags & INP_HIGHPORT) @@ -2358,7 +2563,7 @@ do { \ else optval = 0; break; - } + } case IPV6_RECVTCLASS: optval = OPTBIT(IN6P_TCLASS); break; @@ -2370,16 +2575,15 @@ do { \ if (error) break; error = sooptcopyout(sopt, &optval, - sizeof optval); + sizeof (optval)); break; - case IPV6_PATHMTU: - { + case IPV6_PATHMTU: { u_int32_t pmtu = 0; struct ip6_mtuinfo mtuinfo; struct route_in6 sro; - bzero(&sro, sizeof(sro)); + bzero(&sro, sizeof (sro)); if (!(so->so_state & SS_ISCONNECTED)) return (ENOTCONN); @@ -2390,17 +2594,16 @@ do { \ */ error = ip6_getpmtu(&sro, NULL, NULL, &in6p->in6p_faddr, &pmtu, NULL); - if (sro.ro_rt) - rtfree(sro.ro_rt); + ROUTE_RELEASE(&sro); if (error) break; if (pmtu > IPV6_MAXPACKET) pmtu = IPV6_MAXPACKET; - bzero(&mtuinfo, sizeof(mtuinfo)); + bzero(&mtuinfo, sizeof (mtuinfo)); mtuinfo.ip6m_mtu = (u_int32_t)pmtu; optdata = (void *)&mtuinfo; - optdatalen = sizeof(mtuinfo); + optdatalen = sizeof (mtuinfo); error = sooptcopyout(sopt, optdata, optdatalen); break; @@ -2425,12 +2628,14 @@ do { \ optval = OPTBIT(IN6P_RTHDR); break; case IPV6_2292DSTOPTS: - optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); + optval = OPTBIT(IN6P_DSTOPTS| + IN6P_RTHDRDSTOPTS); break; } error = sooptcopyout(sopt, &optval, - sizeof optval); + sizeof (optval)); break; + case IPV6_PKTINFO: case IPV6_HOPOPTS: case IPV6_RTHDR: @@ -2451,47 +2656,42 @@ do { \ case IPV6_MSFILTER: error = ip6_getmoptions(in6p, sopt); break; - #if IPSEC - case IPV6_IPSEC_POLICY: - { + case IPV6_IPSEC_POLICY: { caddr_t req = NULL; size_t len = 0; struct mbuf *m = NULL; - struct mbuf **mp = &m; + struct mbuf *mp = NULL; - error = soopt_getm(sopt, &m); /* XXX */ + error = soopt_getm(sopt, &m); if (error != 0) break; - error = soopt_mcopyin(sopt, m); /* XXX */ + error = soopt_mcopyin(sopt, m); if (error != 0) break; - if (m) { - req = mtod(m, caddr_t); - len = m->m_len; - } - error = ipsec6_get_policy(in6p, req, len, mp); + + req = mtod(m, caddr_t); + len = m->m_len; + error = ipsec6_get_policy(in6p, req, len, &mp); if (error == 0) - error = soopt_mcopyout(sopt, m); /*XXX*/ - if (error == 0 && m) - m_freem(m); + error = soopt_mcopyout(sopt, mp); + if (mp != NULL) + m_freem(mp); + m_freem(m); break; - } -#endif /* KAME IPSEC */ - + } +#endif /* IPSEC */ #if IPFIREWALL - case IPV6_FW_GET: - { + case IPV6_FW_GET: { if (ip6_fw_ctl_ptr == NULL) load_ip6fw(); if (ip6_fw_ctl_ptr != NULL) error = (*ip6_fw_ctl_ptr)(sopt); else - return ENOPROTOOPT; - } + error = ENOPROTOOPT; break; + } #endif /* IPFIREWALL */ - case IPV6_BOUND_IF: if (in6p->inp_flags & INP_BOUND_IF) optval = in6p->inp_boundifp->if_index; @@ -2522,7 +2722,7 @@ do { \ } else { error = EINVAL; } - return(error); + return (error); } int @@ -2538,9 +2738,8 @@ ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt) optname = sopt->sopt_name; optlen = sopt->sopt_valsize; - if (level != IPPROTO_IPV6) { + if (level != IPPROTO_IPV6) return (EINVAL); - } switch (optname) { case IPV6_CHECKSUM: @@ -2554,32 +2753,32 @@ ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt) */ switch (op) { case SOPT_SET: - if (optlen != sizeof(int)) { + if (optlen != sizeof (int)) { error = EINVAL; break; } - error = sooptcopyin(sopt, &optval, sizeof(optval), - sizeof(optval)); + error = sooptcopyin(sopt, &optval, sizeof (optval), + sizeof (optval)); if (error) break; if ((optval % 2) != 0) { /* the API assumes even offset values */ error = EINVAL; - } else if (so->so_proto->pr_protocol == - IPPROTO_ICMPV6) { + } else if (SOCK_PROTO(so) == IPPROTO_ICMPV6) { if (optval != icmp6off) error = EINVAL; - } else + } else { in6p->in6p_cksum = optval; + } break; case SOPT_GET: - if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) + if (SOCK_PROTO(so) == IPPROTO_ICMPV6) optval = icmp6off; else optval = in6p->in6p_cksum; - error = sooptcopyout(sopt, &optval, sizeof(optval)); + error = sooptcopyout(sopt, &optval, sizeof (optval)); break; default: @@ -2601,49 +2800,48 @@ ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt) * specifying behavior of outgoing packets. */ static int -ip6_pcbopts( - struct ip6_pktopts **pktopt, - struct mbuf *m, - __unused struct socket *so, - __unused struct sockopt *sopt) +ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m, struct socket *so, + struct sockopt *sopt) { +#pragma unused(sopt) struct ip6_pktopts *opt = *pktopt; int error = 0; /* turn off any old options. */ - if (opt) { + if (opt != NULL) { #if DIAGNOSTIC if (opt->ip6po_pktinfo || opt->ip6po_nexthop || opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 || opt->ip6po_rhinfo.ip6po_rhi_rthdr) - printf("ip6_pcbopts: all specified options are cleared.\n"); + printf("%s: all specified options are cleared.\n", + __func__); #endif ip6_clearpktopts(opt, -1); } else { - opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK); + opt = _MALLOC(sizeof (*opt), M_IP6OPT, M_WAITOK); if (opt == NULL) - return ENOBUFS; + return (ENOBUFS); } *pktopt = NULL; - if (!m || m->m_len == 0) { + if (m == NULL || m->m_len == 0) { /* * Only turning off any previous options, regardless of * whether the opt is just created or given. */ - if (opt) + if (opt != NULL) FREE(opt, M_IP6OPT); - return(0); + return (0); } /* set options specified by user. */ - if ((error = ip6_setpktopts(m, opt, NULL, so->so_proto->pr_protocol)) != 0) { + if ((error = ip6_setpktopts(m, opt, NULL, SOCK_PROTO(so))) != 0) { ip6_clearpktopts(opt, -1); /* XXX: discard all options */ FREE(opt, M_IP6OPT); - return(error); + return (error); } *pktopt = opt; - return(0); + return (0); } /* @@ -2654,7 +2852,7 @@ void ip6_initpktopts(struct ip6_pktopts *opt) { - bzero(opt, sizeof(*opt)); + bzero(opt, sizeof (*opt)); opt->ip6po_hlim = -1; /* -1 means default hop limit */ opt->ip6po_tclass = -1; /* -1 means default traffic class */ opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY; @@ -2669,9 +2867,9 @@ ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, opt = *pktopt; if (opt == NULL) { - opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK); + opt = _MALLOC(sizeof (*opt), M_IP6OPT, M_WAITOK); if (opt == NULL) - return(ENOBUFS); + return (ENOBUFS); ip6_initpktopts(opt); *pktopt = opt; } @@ -2685,7 +2883,6 @@ ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt) void *optdata = NULL; int optdatalen = 0; struct ip6_ext *ip6e; - int error = 0; struct in6_pktinfo null_pktinfo; int deftclass = 0, on; int defminmtu = IP6PO_MINMTU_MCASTONLY; @@ -2698,18 +2895,20 @@ ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt) optdata = (void *)pktopt->ip6po_pktinfo; else { /* XXX: we don't have to do this every time... */ - bzero(&null_pktinfo, sizeof(null_pktinfo)); + bzero(&null_pktinfo, sizeof (null_pktinfo)); optdata = (void *)&null_pktinfo; } - optdatalen = sizeof(struct in6_pktinfo); + optdatalen = sizeof (struct in6_pktinfo); break; + case IPV6_TCLASS: if (pktopt && pktopt->ip6po_tclass >= 0) optdata = (void *)&pktopt->ip6po_tclass; else optdata = (void *)&deftclass; - optdatalen = sizeof(int); + optdatalen = sizeof (int); break; + case IPV6_HOPOPTS: if (pktopt && pktopt->ip6po_hbh) { optdata = (void *)pktopt->ip6po_hbh; @@ -2717,6 +2916,7 @@ ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt) optdatalen = (ip6e->ip6e_len + 1) << 3; } break; + case IPV6_RTHDR: if (pktopt && pktopt->ip6po_rthdr) { optdata = (void *)pktopt->ip6po_rthdr; @@ -2724,6 +2924,7 @@ ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt) optdatalen = (ip6e->ip6e_len + 1) << 3; } break; + case IPV6_RTHDRDSTOPTS: if (pktopt && pktopt->ip6po_dest1) { optdata = (void *)pktopt->ip6po_dest1; @@ -2731,6 +2932,7 @@ ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt) optdatalen = (ip6e->ip6e_len + 1) << 3; } break; + case IPV6_DSTOPTS: if (pktopt && pktopt->ip6po_dest2) { optdata = (void *)pktopt->ip6po_dest2; @@ -2738,34 +2940,39 @@ ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt) optdatalen = (ip6e->ip6e_len + 1) << 3; } break; + case IPV6_NEXTHOP: if (pktopt && pktopt->ip6po_nexthop) { optdata = (void *)pktopt->ip6po_nexthop; optdatalen = pktopt->ip6po_nexthop->sa_len; } break; + case IPV6_USE_MIN_MTU: if (pktopt) optdata = (void *)&pktopt->ip6po_minmtu; else optdata = (void *)&defminmtu; - optdatalen = sizeof(int); + optdatalen = sizeof (int); break; + case IPV6_DONTFRAG: if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG)) on = 1; else on = 0; optdata = (void *)&on; - optdatalen = sizeof(on); + optdatalen = sizeof (on); break; + case IPV6_PREFER_TEMPADDR: if (pktopt) optdata = (void *)&pktopt->ip6po_prefer_tempaddr; else optdata = (void *)&defpreftemp; - optdatalen = sizeof(int); + optdatalen = sizeof (int); break; + default: /* should not happen */ #ifdef DIAGNOSTIC panic("ip6_getpcbopt: unexpected option\n"); @@ -2773,9 +2980,7 @@ ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt) return (ENOPROTOOPT); } - error = sooptcopyout(sopt, optdata, optdatalen); - - return (error); + return (sooptcopyout(sopt, optdata, optdatalen)); } void @@ -2794,10 +2999,7 @@ ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname) if (optname == -1 || optname == IPV6_TCLASS) pktopt->ip6po_tclass = -1; if (optname == -1 || optname == IPV6_NEXTHOP) { - if (pktopt->ip6po_nextroute.ro_rt) { - rtfree(pktopt->ip6po_nextroute.ro_rt); - pktopt->ip6po_nextroute.ro_rt = NULL; - } + ROUTE_RELEASE(&pktopt->ip6po_nextroute); if (pktopt->ip6po_nexthop) FREE(pktopt->ip6po_nexthop, M_IP6OPT); pktopt->ip6po_nexthop = NULL; @@ -2816,10 +3018,7 @@ ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname) if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr) FREE(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT); pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL; - if (pktopt->ip6po_route.ro_rt) { - rtfree(pktopt->ip6po_route.ro_rt); - pktopt->ip6po_route.ro_rt = NULL; - } + ROUTE_RELEASE(&pktopt->ip6po_route); } if (optname == -1 || optname == IPV6_DSTOPTS) { if (pktopt->ip6po_dest2) @@ -2828,16 +3027,15 @@ ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname) } } -#define PKTOPT_EXTHDRCPY(type) \ -do {\ - if (src->type) {\ - int hlen =\ - (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\ - dst->type = _MALLOC(hlen, M_IP6OPT, canwait);\ - if (dst->type == NULL && canwait == M_NOWAIT)\ - goto bad;\ - bcopy(src->type, dst->type, hlen);\ - }\ +#define PKTOPT_EXTHDRCPY(type) do { \ + if (src->type) { \ + int hlen = \ + (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3; \ + dst->type = _MALLOC(hlen, M_IP6OPT, canwait); \ + if (dst->type == NULL && canwait == M_NOWAIT) \ + goto bad; \ + bcopy(src->type, dst->type, hlen); \ + } \ } while (0) static int @@ -2852,19 +3050,19 @@ copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait) dst->ip6po_tclass = src->ip6po_tclass; dst->ip6po_flags = src->ip6po_flags; if (src->ip6po_pktinfo) { - dst->ip6po_pktinfo = _MALLOC(sizeof(*dst->ip6po_pktinfo), - M_IP6OPT, canwait); + dst->ip6po_pktinfo = _MALLOC(sizeof (*dst->ip6po_pktinfo), + M_IP6OPT, canwait); if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT) goto bad; *dst->ip6po_pktinfo = *src->ip6po_pktinfo; } if (src->ip6po_nexthop) { dst->ip6po_nexthop = _MALLOC(src->ip6po_nexthop->sa_len, - M_IP6OPT, canwait); + M_IP6OPT, canwait); if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT) goto bad; bcopy(src->ip6po_nexthop, dst->ip6po_nexthop, - src->ip6po_nexthop->sa_len); + src->ip6po_nexthop->sa_len); } PKTOPT_EXTHDRCPY(ip6po_hbh); PKTOPT_EXTHDRCPY(ip6po_dest1); @@ -2872,7 +3070,7 @@ copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait) PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */ return (0); - bad: +bad: ip6_clearpktopts(dst, -1); return (ENOBUFS); } @@ -2884,7 +3082,7 @@ ip6_copypktopts(struct ip6_pktopts *src, int canwait) int error; struct ip6_pktopts *dst; - dst = _MALLOC(sizeof(*dst), M_IP6OPT, canwait); + dst = _MALLOC(sizeof (*dst), M_IP6OPT, canwait); if (dst == NULL) return (NULL); ip6_initpktopts(dst); @@ -3050,7 +3248,7 @@ int ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt, struct ip6_pktopts *stickyopt, int uproto) { - struct cmsghdr *cm = 0; + struct cmsghdr *cm = NULL; if (control == NULL || opt == NULL) return (EINVAL); @@ -3082,10 +3280,12 @@ ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt, if (control->m_len < CMSG_LEN(0)) return (EINVAL); - for (cm = M_FIRST_CMSGHDR(control); cm; cm = M_NXT_CMSGHDR(control, cm)) { + for (cm = M_FIRST_CMSGHDR(control); cm != NULL; + cm = M_NXT_CMSGHDR(control, cm)) { int error; - if (cm->cmsg_len < sizeof(struct cmsghdr) || cm->cmsg_len > control->m_len) + if (cm->cmsg_len < sizeof (struct cmsghdr) || + cm->cmsg_len > control->m_len) return (EINVAL); if (cm->cmsg_level != IPPROTO_IPV6) continue; @@ -3163,12 +3363,11 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, switch (optname) { case IPV6_2292PKTINFO: - case IPV6_PKTINFO: - { + case IPV6_PKTINFO: { struct ifnet *ifp = NULL; struct in6_pktinfo *pktinfo; - if (len != sizeof(struct in6_pktinfo)) + if (len != sizeof (struct in6_pktinfo)) return (EINVAL); pktinfo = (struct in6_pktinfo *)(void *)buf; @@ -3198,7 +3397,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, ifnet_head_done(); return (ENXIO); } - + if (pktinfo->ipi6_ifindex) { ifp = ifindex2ifnet[pktinfo->ipi6_ifindex]; if (ifp == NULL) { @@ -3206,7 +3405,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, return (ENXIO); } } - + ifnet_head_done(); /* @@ -3220,18 +3419,17 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, * application when it is used as a sticky option. */ if (opt->ip6po_pktinfo == NULL) { - opt->ip6po_pktinfo = _MALLOC(sizeof(*pktinfo), + opt->ip6po_pktinfo = _MALLOC(sizeof (*pktinfo), M_IP6OPT, M_NOWAIT); if (opt->ip6po_pktinfo == NULL) return (ENOBUFS); } - bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo)); + bcopy(pktinfo, opt->ip6po_pktinfo, sizeof (*pktinfo)); break; } case IPV6_2292HOPLIMIT: - case IPV6_HOPLIMIT: - { + case IPV6_HOPLIMIT: { int *hlimp; /* @@ -3241,7 +3439,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, if (optname == IPV6_HOPLIMIT && sticky) return (ENOPROTOOPT); - if (len != sizeof(int)) + if (len != sizeof (int)) return (EINVAL); hlimp = (int *)(void *)buf; if (*hlimp < -1 || *hlimp > 255) @@ -3251,11 +3449,10 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, break; } - case IPV6_TCLASS: - { + case IPV6_TCLASS: { int tclass; - if (len != sizeof(int)) + if (len != sizeof (int)) return (EINVAL); tclass = *(int *)(void *)buf; if (tclass < -1 || tclass > 255) @@ -3277,16 +3474,14 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, } /* check if cmsg_len is large enough for sa_len */ - if (len < sizeof(struct sockaddr) || len < *buf) + if (len < sizeof (struct sockaddr) || len < *buf) return (EINVAL); - switch (((struct sockaddr *)buf)->sa_family) { - case AF_INET6: - { - struct sockaddr_in6 *sa6 = - (struct sockaddr_in6 *)(void *)buf; + switch (SA(buf)->sa_family) { + case AF_INET6: { + struct sockaddr_in6 *sa6 = SIN6(buf); - if (sa6->sin6_len != sizeof(struct sockaddr_in6)) + if (sa6->sin6_len != sizeof (struct sockaddr_in6)) return (EINVAL); if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) || @@ -3313,8 +3508,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, break; case IPV6_2292HOPOPTS: - case IPV6_HOPOPTS: - { + case IPV6_HOPOPTS: { struct ip6_hbh *hbh; int hbhlen; @@ -3333,7 +3527,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, } /* message length validation */ - if (len < sizeof(struct ip6_hbh)) + if (len < sizeof (struct ip6_hbh)) return (EINVAL); hbh = (struct ip6_hbh *)(void *)buf; hbhlen = (hbh->ip6h_len + 1) << 3; @@ -3352,8 +3546,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, case IPV6_2292DSTOPTS: case IPV6_DSTOPTS: - case IPV6_RTHDRDSTOPTS: - { + case IPV6_RTHDRDSTOPTS: { struct ip6_dest *dest, **newdest = NULL; int destlen; @@ -3367,7 +3560,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, } /* message length validation */ - if (len < sizeof(struct ip6_dest)) + if (len < sizeof (struct ip6_dest)) return (EINVAL); dest = (struct ip6_dest *)(void *)buf; destlen = (dest->ip6d_len + 1) << 3; @@ -3411,13 +3604,11 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, if (*newdest == NULL) return (ENOBUFS); bcopy(dest, *newdest, destlen); - break; } case IPV6_2292RTHDR: - case IPV6_RTHDR: - { + case IPV6_RTHDR: { struct ip6_rthdr *rth; int rthlen; @@ -3427,7 +3618,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, } /* message length validation */ - if (len < sizeof(struct ip6_rthdr)) + if (len < sizeof (struct ip6_rthdr)) return (EINVAL); rth = (struct ip6_rthdr *)(void *)buf; rthlen = (rth->ip6r_len + 1) << 3; @@ -3453,12 +3644,11 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, if (opt->ip6po_rthdr == NULL) return (ENOBUFS); bcopy(rth, opt->ip6po_rthdr, rthlen); - break; } case IPV6_USE_MIN_MTU: - if (len != sizeof(int)) + if (len != sizeof (int)) return (EINVAL); minmtupolicy = *(int *)(void *)buf; if (minmtupolicy != IP6PO_MINMTU_MCASTONLY && @@ -3470,7 +3660,7 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, break; case IPV6_DONTFRAG: - if (len != sizeof(int)) + if (len != sizeof (int)) return (EINVAL); if (uproto == IPPROTO_TCP || *(int *)(void *)buf == 0) { @@ -3479,12 +3669,13 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, * (RFC3542 leaves this case unspecified.) */ opt->ip6po_flags &= ~IP6PO_DONTFRAG; - } else + } else { opt->ip6po_flags |= IP6PO_DONTFRAG; + } break; case IPV6_PREFER_TEMPADDR: - if (len != sizeof(int)) + if (len != sizeof (int)) return (EINVAL); preftemp = *(int *)(void *)buf; if (preftemp != IP6PO_TEMPADDR_SYSTEM && @@ -3509,38 +3700,32 @@ ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, * pointer that might NOT be &loif -- easier than replicating that code here. */ void -ip6_mloopback( - struct ifnet *ifp, - struct mbuf *m, - struct sockaddr_in6 *dst) +ip6_mloopback(struct ifnet *srcifp, struct ifnet *origifp, struct mbuf *m, + struct sockaddr_in6 *dst, uint32_t optlen, int32_t nxt0) { struct mbuf *copym; struct ip6_hdr *ip6; + struct in6_addr src; - copym = m_copy(m, 0, M_COPYALL); - if (copym == NULL) + if (lo_ifp == NULL) return; /* + * Copy the packet header as it's needed for the checksum. * Make sure to deep-copy IPv6 header portion in case the data * is in an mbuf cluster, so that we can safely override the IPv6 * header portion later. */ - if ((copym->m_flags & M_EXT) != 0 || - copym->m_len < sizeof(struct ip6_hdr)) { - copym = m_pullup(copym, sizeof(struct ip6_hdr)); - if (copym == NULL) - return; - } + copym = m_copym_mode(m, 0, M_COPYALL, M_DONTWAIT, M_COPYM_COPY_HDR); + if (copym != NULL && ((copym->m_flags & M_EXT) || + copym->m_len < sizeof (struct ip6_hdr))) + copym = m_pullup(copym, sizeof (struct ip6_hdr)); -#if DIAGNOSTIC - if (copym->m_len < sizeof(*ip6)) { - m_freem(copym); + if (copym == NULL) return; - } -#endif ip6 = mtod(copym, struct ip6_hdr *); + src = ip6->ip6_src; /* * clear embedded scope identifiers if necessary. * in6_clearscope will touch the addresses only when necessary. @@ -3548,83 +3733,161 @@ ip6_mloopback( in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); -#ifdef __APPLE__ + if (copym->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) + in6_delayed_cksum_offset(copym, 0, optlen, nxt0); - /* Makes sure the HW checksum flags are cleaned before sending the packet */ + /* + * Stuff the 'real' ifp into the pkthdr, to be used in matching + * in ip6_input(); we need the loopback ifp/dl_tag passed as args + * to make the loopback driver compliant with the data link + * requirements. + */ + copym->m_pkthdr.rcvif = origifp; - if ((copym->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) != 0) { - in6_delayed_cksum(copym, sizeof(struct ip6_hdr)); - copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA; + /* + * Also record the source interface (which owns the source address). + * This is basically a stripped down version of ifa_foraddr6(). + */ + if (srcifp == NULL) { + struct in6_ifaddr *ia; + + lck_rw_lock_shared(&in6_ifaddr_rwlock); + for (ia = in6_ifaddrs; ia != NULL; ia = ia->ia_next) { + IFA_LOCK_SPIN(&ia->ia_ifa); + /* compare against src addr with embedded scope */ + if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &src)) { + srcifp = ia->ia_ifp; + IFA_UNLOCK(&ia->ia_ifa); + break; + } + IFA_UNLOCK(&ia->ia_ifa); + } + lck_rw_done(&in6_ifaddr_rwlock); } - copym->m_pkthdr.rcvif = 0; - copym->m_pkthdr.csum_data = 0; - copym->m_pkthdr.csum_flags = 0; + if (srcifp != NULL) + ip6_setsrcifaddr_info(copym, srcifp->if_index, NULL); + ip6_setdstifaddr_info(copym, origifp->if_index, NULL); - if (lo_ifp) { - copym->m_pkthdr.rcvif = ifp; - dlil_output(lo_ifp, PF_INET6, copym, 0, - (struct sockaddr *)dst, 0, NULL); - } else - m_free(copym); -#else - (void)if_simloop(ifp, copym, dst->sin6_family, NULL); -#endif + dlil_output(lo_ifp, PF_INET6, copym, NULL, SA(dst), 0, NULL); } /* * Chop IPv6 header off from the payload. */ static int -ip6_splithdr(m, exthdrs) - struct mbuf *m; - struct ip6_exthdrs *exthdrs; +ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs) { struct mbuf *mh; struct ip6_hdr *ip6; ip6 = mtod(m, struct ip6_hdr *); - if (m->m_len > sizeof(*ip6)) { + if (m->m_len > sizeof (*ip6)) { MGETHDR(mh, M_DONTWAIT, MT_HEADER); /* MAC-OK */ - if (mh == 0) { + if (mh == NULL) { m_freem(m); - return ENOBUFS; + return (ENOBUFS); } M_COPY_PKTHDR(mh, m); - MH_ALIGN(mh, sizeof(*ip6)); + MH_ALIGN(mh, sizeof (*ip6)); m->m_flags &= ~M_PKTHDR; - m->m_len -= sizeof(*ip6); - m->m_data += sizeof(*ip6); + m->m_len -= sizeof (*ip6); + m->m_data += sizeof (*ip6); mh->m_next = m; m = mh; - m->m_len = sizeof(*ip6); - bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6)); + m->m_len = sizeof (*ip6); + bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof (*ip6)); } exthdrs->ip6e_ip6 = m; - return 0; + return (0); +} + +static void +ip6_output_checksum(struct ifnet *ifp, uint32_t mtu, struct mbuf *m, + int nxt0, uint32_t tlen, uint32_t optlen) +{ + uint32_t sw_csum, hwcap = ifp->if_hwassist; + int tso = TSO_IPV6_OK(ifp, m); + + if (!hwcksum_tx) { + /* do all in software; checksum offload is disabled */ + sw_csum = CSUM_DELAY_IPV6_DATA & m->m_pkthdr.csum_flags; + } else { + /* do in software what the hardware cannot */ + sw_csum = m->m_pkthdr.csum_flags & + ~IF_HWASSIST_CSUM_FLAGS(hwcap); + } + + if (optlen != 0) { + sw_csum |= (CSUM_DELAY_IPV6_DATA & + m->m_pkthdr.csum_flags); + } else if (!(sw_csum & CSUM_DELAY_IPV6_DATA) && + (hwcap & CSUM_PARTIAL)) { + /* + * Partial checksum offload, ere), if no extension + * headers, and TCP only (no UDP support, as the + * hardware may not be able to convert +0 to + * -0 (0xffff) per RFC1122 4.1.3.4.) + */ + if (hwcksum_tx && !tso && + (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) && + tlen <= mtu) { + uint16_t start = sizeof (struct ip6_hdr); + uint16_t ulpoff = + m->m_pkthdr.csum_data & 0xffff; + m->m_pkthdr.csum_flags |= + (CSUM_DATA_VALID | CSUM_PARTIAL); + m->m_pkthdr.csum_tx_stuff = (ulpoff + start); + m->m_pkthdr.csum_tx_start = start; + sw_csum = 0; + } else { + sw_csum |= (CSUM_DELAY_IPV6_DATA & + m->m_pkthdr.csum_flags); + } + } + + if (sw_csum & CSUM_DELAY_IPV6_DATA) { + in6_delayed_cksum_offset(m, 0, optlen, nxt0); + sw_csum &= ~CSUM_DELAY_IPV6_DATA; + } + + if (hwcksum_tx) { + /* + * Drop off bits that aren't supported by hardware; + * also make sure to preserve non-checksum related bits. + */ + m->m_pkthdr.csum_flags = + ((m->m_pkthdr.csum_flags & + (IF_HWASSIST_CSUM_FLAGS(hwcap) | CSUM_DATA_VALID)) | + (m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_MASK)); + } else { + /* drop all bits; checksum offload is disabled */ + m->m_pkthdr.csum_flags = 0; + } } /* * Compute IPv6 extension header length. */ int -ip6_optlen(in6p) - struct in6pcb *in6p; +ip6_optlen(struct in6pcb *in6p) { int len; if (!in6p->in6p_outputopts) - return 0; + return (0); len = 0; -#define elen(x) \ - (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0) +#define elen(x) \ + (((struct ip6_ext *)(x)) ? \ + (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0) len += elen(in6p->in6p_outputopts->ip6po_hbh); - if (in6p->in6p_outputopts->ip6po_rthdr) + if (in6p->in6p_outputopts->ip6po_rthdr) { /* dest1 is valid with rthdr only */ len += elen(in6p->in6p_outputopts->ip6po_dest1); + } len += elen(in6p->in6p_outputopts->ip6po_rthdr); len += elen(in6p->in6p_outputopts->ip6po_dest2); - return len; + return (len); #undef elen } diff --git a/bsd/netinet6/ip6_var.h b/bsd/netinet6/ip6_var.h index 9507c904b..ac02672df 100644 --- a/bsd/netinet6/ip6_var.h +++ b/bsd/netinet6/ip6_var.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,13 +22,10 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* $FreeBSD: src/sys/netinet6/ip6_var.h,v 1.2.2.2 2001/07/03 11:01:54 ume Exp $ */ -/* $KAME: ip6_var.h,v 1.62 2001/05/03 14:51:48 itojun Exp $ */ - /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. @@ -94,10 +91,12 @@ */ #ifndef _NETINET6_IP6_VAR_H_ -#define _NETINET6_IP6_VAR_H_ +#define _NETINET6_IP6_VAR_H_ #include -#ifdef XNU_KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE +#include + /* * IP6 reassembly queue structure. Each fragment * being reassembled is attached to one of these structures. @@ -113,10 +112,12 @@ struct ip6q { struct ip6q *ip6q_next; struct ip6q *ip6q_prev; int ip6q_unfrglen; /* len of unfragmentable part */ -#if notyet +#ifdef notyet u_char *ip6q_nxtp; #endif int ip6q_nfrag; /* # of fragments */ + uint32_t ip6q_csum_flags; /* checksum flags */ + uint32_t ip6q_csum; /* partial checksum value */ }; struct ip6asfrag { @@ -129,7 +130,7 @@ struct ip6asfrag { u_int16_t ip6af_mff; /* more fragment bit in frag off */ }; -#define IP6_REASS_MBUF(ip6af) (*(struct mbuf **)&((ip6af)->ip6af_m)) +#define IP6_REASS_MBUF(ip6af) (*(struct mbuf **)&((ip6af)->ip6af_m)) struct ip6_moptions { decl_lck_mtx_data(, im6o_lock); @@ -192,16 +193,16 @@ struct ip6po_rhinfo { struct ip6_rthdr *ip6po_rhi_rthdr; /* Routing header */ struct route_in6 ip6po_rhi_route; /* Route to the 1st hop */ }; -#define ip6po_rthdr ip6po_rhinfo.ip6po_rhi_rthdr -#define ip6po_route ip6po_rhinfo.ip6po_rhi_route +#define ip6po_rthdr ip6po_rhinfo.ip6po_rhi_rthdr +#define ip6po_route ip6po_rhinfo.ip6po_rhi_route /* Nexthop related info */ struct ip6po_nhinfo { struct sockaddr *ip6po_nhi_nexthop; struct route_in6 ip6po_nhi_route; /* Route to the nexthop */ }; -#define ip6po_nexthop ip6po_nhinfo.ip6po_nhi_nexthop -#define ip6po_nextroute ip6po_nhinfo.ip6po_nhi_route +#define ip6po_nexthop ip6po_nhinfo.ip6po_nhi_nexthop +#define ip6po_nextroute ip6po_nhinfo.ip6po_nhi_route struct ip6_pktopts { struct mbuf *ip6po_m; /* Pointer to mbuf storing the data */ @@ -227,29 +228,30 @@ struct ip6_pktopts { int ip6po_tclass; /* traffic class */ int ip6po_minmtu; /* fragment vs PMTU discovery policy */ -#define IP6PO_MINMTU_MCASTONLY -1 /* default; send at min MTU for multicast*/ -#define IP6PO_MINMTU_DISABLE 0 /* always perform pmtu disc */ -#define IP6PO_MINMTU_ALL 1 /* always send at min MTU */ +#define IP6PO_MINMTU_MCASTONLY -1 /* default; send at min MTU for multicast */ +#define IP6PO_MINMTU_DISABLE 0 /* always perform pmtu disc */ +#define IP6PO_MINMTU_ALL 1 /* always send at min MTU */ + + /* whether temporary addresses are preferred as source address */ + int ip6po_prefer_tempaddr; - int ip6po_prefer_tempaddr; /* whether temporary addresses are - preferred as source address */ -#define IP6PO_TEMPADDR_SYSTEM -1 /* follow the system default */ -#define IP6PO_TEMPADDR_NOTPREFER 0 /* not prefer temporary address */ -#define IP6PO_TEMPADDR_PREFER 1 /* prefer temporary address */ +#define IP6PO_TEMPADDR_SYSTEM -1 /* follow the system default */ +#define IP6PO_TEMPADDR_NOTPREFER 0 /* not prefer temporary address */ +#define IP6PO_TEMPADDR_PREFER 1 /* prefer temporary address */ int ip6po_flags; #if 0 /* parameters in this block is obsolete. do not reuse the values. */ -#define IP6PO_REACHCONF 0x01 /* upper-layer reachability confirmation. */ -#define IP6PO_MINMTU 0x02 /* use minimum MTU (IPV6_USE_MIN_MTU) */ +#define IP6PO_REACHCONF 0x01 /* upper-layer reachability confirmation. */ +#define IP6PO_MINMTU 0x02 /* use minimum MTU (IPV6_USE_MIN_MTU) */ #endif -#define IP6PO_DONTFRAG 0x04 /* disable fragmentation (IPV6_DONTFRAG) */ -#define IP6PO_USECOA 0x08 /* use care of address */ +#define IP6PO_DONTFRAG 0x04 /* no fragmentation (IPV6_DONTFRAG) */ +#define IP6PO_USECOA 0x08 /* use care of address */ }; /* * Control options for incoming packets */ -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ struct ip6stat { u_quad_t ip6s_total; /* total packets received */ @@ -262,7 +264,7 @@ struct ip6stat { u_quad_t ip6s_forward; /* packets forwarded */ u_quad_t ip6s_cantforward; /* packets rcvd for unreachable dest */ u_quad_t ip6s_redirectsent; /* packets forwarded on same net */ - u_quad_t ip6s_delivered; /* datagrams delivered to upper level*/ + u_quad_t ip6s_delivered; /* datagrams delivered to upper level */ u_quad_t ip6s_localout; /* total ip packets generated here */ u_quad_t ip6s_odropped; /* lost packets due to nobufs, etc. */ u_quad_t ip6s_reassembled; /* total packets reassembled ok */ @@ -313,40 +315,46 @@ struct ip6stat { /* number of times that each rule of source selection is applied. */ u_quad_t ip6s_sources_rule[16]; -#ifdef PRIVATE - u_quad_t ip6s_pktdropcntrl; /* pkt dropped, no mbufs for control data */ -#endif /* PRIVATE */ + /* pkt dropped, no mbufs for control data */ + u_quad_t ip6s_pktdropcntrl; + + /* total packets trimmed/adjusted */ + u_quad_t ip6s_adj; + /* hwcksum info discarded during adjustment */ + u_quad_t ip6s_adj_hwcsum_clr; + + /* duplicate address detection collisions */ + u_quad_t ip6s_dad_collide; }; -#ifdef KERNEL_PRIVATE -#ifdef KERNEL +#ifdef BSD_KERNEL_PRIVATE /* * IPv6 onion peeling state. - * it will be initialized when we come into ip6_input(). - * XXX do not make it a kitchen sink! + * + * This is currently allocated for packets destined to the all-nodes + * multicast address over Ethernet. IPv6 destination address information + * is now stored in the mbuf itself. */ struct ip6aux { u_int32_t ip6a_flags; -#define IP6A_SWAP 0x01 /* swapped home/care-of on packet */ -#define IP6A_HASEEN 0x02 /* HA was present */ -#define IP6A_BRUID 0x04 /* BR Unique Identifier was present */ -#define IP6A_RTALERTSEEN 0x08 /* rtalert present */ +#define IP6A_HASEEN 0x01 /* HA was present */ + +#ifdef notyet +#define IP6A_SWAP 0x02 /* swapped home/care-of on packet */ +#define IP6A_BRUID 0x04 /* BR Unique Identifier was present */ +#define IP6A_RTALERTSEEN 0x08 /* rtalert present */ /* ip6.ip6_src */ struct in6_addr ip6a_careof; /* care-of address of the peer */ struct in6_addr ip6a_home; /* home address of the peer */ u_int16_t ip6a_bruid; /* BR unique identifier */ - /* ip6.ip6_dst */ - struct in6_ifaddr *ip6a_dstia6; /* my ifaddr that matches ip6_dst */ - /* rtalert */ u_int16_t ip6a_rtalert; /* rtalert option value */ +#endif /* notyet */ - /* - * decapsulation history will be here. - * with IPsec it may not be accurate. - */ + /* ether source address if all-nodes multicast destination */ + u_char ip6a_ehsrc[ETHER_ADDR_LEN]; }; /* flags passed to ip6_output as last parameter */ @@ -356,8 +364,8 @@ struct ip6aux { #define IPV6_FLAG_NOSRCIFSEL 0x80 /* bypas source address selection */ #define IPV6_OUTARGS 0x100 /* has ancillary output info */ -#ifdef XNU_KERNEL_PRIVATE -#define IP6_HDR_ALIGNED_P(_ip6) ((((uintptr_t)(_ip6)) & ((uintptr_t)3)) == 0) +#ifdef BSD_KERNEL_PRIVATE +#define IP6_HDR_ALIGNED_P(_ip6) ((((uintptr_t)(_ip6)) & ((uintptr_t)3)) == 0) /* * On platforms which require strict alignment (currently for anything but @@ -374,7 +382,7 @@ struct ip6aux { } \ } while (0) #endif /* !__i386__ && !__x86_64__ */ -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #include @@ -389,141 +397,143 @@ struct ip6_out_args { #define IP6OAF_BOUND_IF 0x00000002 /* boundif value is valid */ #define IP6OAF_BOUND_SRCADDR 0x00000004 /* bound to src address */ #define IP6OAF_NO_CELLULAR 0x00000010 /* skip IFT_CELLULAR */ + u_int32_t ip6oa_retflags; /* IP6OARF return flags (see below) */ +#define IP6OARF_IFDENIED 0x00000001 /* denied access to interface */ }; -extern struct ip6stat ip6stat; /* statistics */ -extern u_int32_t ip6_id; /* fragment identifier */ -extern int ip6_defhlim; /* default hop limit */ -extern int ip6_defmcasthlim; /* default multicast hop limit */ -extern int ip6_forwarding; /* act as router? */ -extern int ip6_forward_srcrt; /* forward src-routed? */ -extern int ip6_gif_hlim; /* Hop limit for gif encap packet */ -extern int ip6_use_deprecated; /* allow deprecated addr as source */ -extern int ip6_rr_prune; /* router renumbering prefix - * walk list every 5 sec. */ -extern int ip6_mcast_pmtu; /* enable pMTU discovery for multicast? */ -#define ip6_mapped_addr_on (!ip6_v6only) -extern int ip6_v6only; - -extern int ip6_neighborgcthresh; /* Threshold # of NDP entries for GC */ -extern int ip6_maxifprefixes; /* Max acceptable prefixes via RA per IF */ -extern int ip6_maxifdefrouters; /* Max acceptable def routers via RA */ -extern int ip6_maxdynroutes; /* Max # of routes created via redirect */ +extern struct ip6stat ip6stat; /* statistics */ +extern int ip6_defhlim; /* default hop limit */ +extern int ip6_defmcasthlim; /* default multicast hop limit */ +extern int ip6_forwarding; /* act as router? */ +extern int ip6_gif_hlim; /* Hop limit for gif encap packet */ +extern int ip6_use_deprecated; /* allow deprecated addr as source */ +extern int ip6_rr_prune; /* router renumbering prefix */ + /* walk list every 5 sec. */ +extern int ip6_mcast_pmtu; /* enable pMTU discovery for multicast? */ +#define ip6_mapped_addr_on (!ip6_v6only) +extern int ip6_v6only; + +extern int ip6_neighborgcthresh; /* Threshold # of NDP entries for GC */ +extern int ip6_maxifprefixes; /* Max acceptable prefixes via RA per IF */ +extern int ip6_maxifdefrouters; /* Max acceptable def routers via RA */ +extern int ip6_maxdynroutes; /* Max # of routes created via redirect */ #if MROUTING -extern struct socket *ip6_mrouter; /* multicast routing daemon */ -#endif -extern int ip6_sendredirects; /* send IP redirects when forwarding? */ -extern int ip6_maxfragpackets; /* Maximum packets in reassembly queue */ -extern int ip6_maxfrags; /* Maximum fragments in reassembly queue */ -extern int ip6_sourcecheck; /* Verify source interface */ -extern int ip6_sourcecheck_interval; /* Interval between log messages */ -extern int ip6_accept_rtadv; /* deprecated */ -extern int ip6_log_interval; -extern time_t ip6_log_time; -extern int ip6_hdrnestlimit; /* upper limit of # of extension headers */ -extern int ip6_dad_count; /* DupAddrDetectionTransmits */ -extern int ip6_only_allow_rfc4193_prefix; /* RFC4193 Unique Local Unicast Prefixes only */ - -extern u_int32_t ip6_flow_seq; +extern struct socket *ip6_mrouter; /* multicast routing daemon */ +#endif /* MROUTING */ +extern int ip6_sendredirects; /* send IP redirects when forwarding? */ +extern int ip6_accept_rtadv; /* deprecated */ +extern int ip6_log_interval; +extern uint64_t ip6_log_time; +extern int ip6_hdrnestlimit; /* upper limit of # of extension headers */ +extern int ip6_dad_count; /* DupAddrDetectionTransmits */ + +/* RFC4193 Unique Local Unicast Prefixes only */ +extern int ip6_only_allow_rfc4193_prefix; + extern int ip6_auto_flowlabel; extern int ip6_auto_linklocal; -extern int ip6_anonportmin; /* minimum ephemeral port */ -extern int ip6_anonportmax; /* maximum ephemeral port */ -extern int ip6_lowportmin; /* minimum reserved port */ -extern int ip6_lowportmax; /* maximum reserved port */ +extern int ip6_anonportmin; /* minimum ephemeral port */ +extern int ip6_anonportmax; /* maximum ephemeral port */ +extern int ip6_lowportmin; /* minimum reserved port */ +extern int ip6_lowportmax; /* maximum reserved port */ -extern int ip6_use_tempaddr; /* whether to use temporary addresses. */ -extern int ip6_prefer_tempaddr; /* whether to prefer temporary addresses - in the source address selection */ -extern int ip6_use_defzone; /* whether to use the default scope zone - when unspecified */ +extern int ip6_use_tempaddr; /* whether to use temporary addresses. */ -extern struct pr_usrreqs rip6_usrreqs; -extern struct pr_usrreqs icmp6_dgram_usrreqs; - -extern int ip6_doscopedroute; -extern int ip6_restrictrecvif; - -struct sockopt; +/* whether to prefer temporary addresses in the source address selection */ +extern int ip6_prefer_tempaddr; -struct inpcb; +/* whether to use the default scope zone when unspecified */ +extern int ip6_use_defzone; -int icmp6_ctloutput(struct socket *, struct sockopt *); -int icmp6_dgram_ctloutput(struct socket *, struct sockopt *); -int icmp6_dgram_send(struct socket *, int , struct mbuf *, struct sockaddr *, struct mbuf *, struct proc *); -int icmp6_dgram_attach(struct socket *, int , struct proc *); +extern struct pr_usrreqs rip6_usrreqs; +extern struct pr_usrreqs icmp6_dgram_usrreqs; +extern int ip6_doscopedroute; +struct sockopt; +struct inpcb; struct in6_ifaddr; -void ip6_init(void); -void ip6_input(struct mbuf *); -struct in6_ifaddr *ip6_getdstifaddr(struct mbuf *); -void ip6_freepcbopts(struct ip6_pktopts *); -int ip6_unknown_opt(u_int8_t *, struct mbuf *, int); -char * ip6_get_prevhdr(struct mbuf *, int); -int ip6_nexthdr(struct mbuf *, int, int, int *); -int ip6_lasthdr(struct mbuf *, int, int, int *); +struct ip6protosw; +struct domain; + +extern int icmp6_ctloutput(struct socket *, struct sockopt *); +extern int icmp6_dgram_ctloutput(struct socket *, struct sockopt *); +extern int icmp6_dgram_send(struct socket *, int, struct mbuf *, + struct sockaddr *, struct mbuf *, struct proc *); +extern int icmp6_dgram_attach(struct socket *, int, struct proc *); + +extern void ip6_init(struct ip6protosw *, struct domain *); +extern void ip6_input(struct mbuf *); +extern void ip6_setsrcifaddr_info(struct mbuf *, uint32_t, struct in6_ifaddr *); +extern void ip6_setdstifaddr_info(struct mbuf *, uint32_t, struct in6_ifaddr *); +extern int ip6_getsrcifaddr_info(struct mbuf *, uint32_t *, uint32_t *); +extern int ip6_getdstifaddr_info(struct mbuf *, uint32_t *, uint32_t *); +extern void ip6_freepcbopts(struct ip6_pktopts *); +extern int ip6_unknown_opt(u_int8_t *, struct mbuf *, int); +extern char *ip6_get_prevhdr(struct mbuf *, int); +extern int ip6_nexthdr(struct mbuf *, int, int, int *); +extern int ip6_lasthdr(struct mbuf *, int, int, int *); extern void ip6_moptions_init(void); extern struct ip6_moptions *ip6_allocmoptions(int); extern void im6o_addref(struct ip6_moptions *, int); extern void im6o_remref(struct ip6_moptions *); -struct ip6aux *ip6_addaux(struct mbuf *); -struct ip6aux *ip6_findaux(struct mbuf *); -void ip6_delaux(struct mbuf *); -extern void ip6_destroyaux(struct ip6aux *); -extern void ip6_copyaux(struct ip6aux *, struct ip6aux *); - -int ip6_mforward(struct ip6_hdr *, struct ifnet *, struct mbuf *); -int ip6_process_hopopts(struct mbuf *, u_int8_t *, int, u_int32_t *, - u_int32_t *); -struct mbuf **ip6_savecontrol_v4(struct inpcb *, struct mbuf *, - struct mbuf **, int *); -int ip6_savecontrol(struct inpcb *, struct mbuf *, struct mbuf **); -struct mbuf *ip6_forward(struct mbuf *, struct route_in6 *, int); -void ip6_notify_pmtu __P((struct inpcb *, struct sockaddr_in6 *, - u_int32_t *)); -void ip6_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in6 *); -int ip6_output(struct mbuf *, struct ip6_pktopts *, struct route_in6 *, - int, struct ip6_moptions *, struct ifnet **, struct ip6_out_args *); -int ip6_output_list(struct mbuf *, int, struct ip6_pktopts *, - struct route_in6 *, int, struct ip6_moptions *, struct ifnet **, - struct ip6_out_args *); -int ip6_ctloutput(struct socket *, struct sockopt *sopt); -void ip6_initpktopts(struct ip6_pktopts *); -int ip6_setpktoptions(struct mbuf *, struct ip6_pktopts *, int, int); -void ip6_clearpktopts(struct ip6_pktopts *, int); -struct ip6_pktopts *ip6_copypktopts(struct ip6_pktopts *, int); -int ip6_optlen(struct inpcb *); - -int route6_input(struct mbuf **, int *, int); - -void frag6_init(void); -int frag6_input(struct mbuf **, int *, int); -void frag6_slowtimo(void); -void frag6_drain(void); - -int rip6_input(struct mbuf **, int *, int); -void rip6_ctlinput(int, struct sockaddr *, void *); -int rip6_ctloutput(struct socket *so, struct sockopt *sopt); -int rip6_output(struct mbuf *, struct socket *, struct sockaddr_in6 *, struct mbuf *, int); - -int dest6_input(struct mbuf **, int *, int); -extern struct in6_addr *in6_selectsrc(struct sockaddr_in6 *, - struct ip6_pktopts *, struct inpcb *, struct route_in6 *, - struct ifnet **, struct in6_addr *, unsigned int, int *); -extern struct in6_addrpolicy * - in6_addrsel_lookup_policy(struct sockaddr_in6 *); -int in6_selectroute(struct sockaddr_in6 *, struct sockaddr_in6 *, - struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, - struct ifnet **, struct rtentry **, int, const struct ip6_out_args *); -int ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt, struct ip6_pktopts *stickyopt, int uproto); -u_int32_t ip6_randomid(void); -u_int32_t ip6_randomflowlabel(void); - -#endif /* KERNEL */ -#endif /* KERNEL_PRIVATE */ +extern struct ip6aux *ip6_addaux(struct mbuf *); +extern struct ip6aux *ip6_findaux(struct mbuf *); +extern void ip6_delaux(struct mbuf *); +#if MROUTING +extern int ip6_mforward(struct ip6_hdr *, struct ifnet *, struct mbuf *); +#endif /* MROUTING */ +extern int ip6_process_hopopts(struct mbuf *, u_int8_t *, int, u_int32_t *, + u_int32_t *); +extern struct mbuf **ip6_savecontrol_v4(struct inpcb *, struct mbuf *, + struct mbuf **, int *); +extern int ip6_savecontrol(struct inpcb *, struct mbuf *, struct mbuf **); +extern struct mbuf *ip6_forward(struct mbuf *, struct route_in6 *, int); +extern void ip6_notify_pmtu(struct inpcb *, struct sockaddr_in6 *, u_int32_t *); +extern void ip6_mloopback(struct ifnet *, struct ifnet *, struct mbuf *, + struct sockaddr_in6 *, uint32_t, int32_t); +extern int ip6_output(struct mbuf *, struct ip6_pktopts *, struct route_in6 *, + int, struct ip6_moptions *, struct ifnet **, struct ip6_out_args *); +extern int ip6_output_list(struct mbuf *, int, struct ip6_pktopts *, + struct route_in6 *, int, struct ip6_moptions *, struct ifnet **, + struct ip6_out_args *); +extern int ip6_ctloutput(struct socket *, struct sockopt *); +extern int ip6_raw_ctloutput(struct socket *, struct sockopt *); +extern void ip6_initpktopts(struct ip6_pktopts *); +extern int ip6_setpktoptions(struct mbuf *, struct ip6_pktopts *, int, int); +extern void ip6_clearpktopts(struct ip6_pktopts *, int); +extern struct ip6_pktopts *ip6_copypktopts(struct ip6_pktopts *, int); +extern int ip6_optlen(struct inpcb *); +extern void ip6_drain(void); + +extern int route6_input(struct mbuf **, int *, int); + +extern void frag6_init(void); +extern int frag6_input(struct mbuf **, int *, int); +extern void frag6_drain(void); + +extern int rip6_input(struct mbuf **, int *, int); +extern void rip6_ctlinput(int, struct sockaddr *, void *); +extern int rip6_ctloutput(struct socket *so, struct sockopt *sopt); +extern int rip6_output(struct mbuf *, struct socket *, struct sockaddr_in6 *, + struct mbuf *, int); + +extern int dest6_input(struct mbuf **, int *, int); +extern struct in6_addr *in6_selectsrc(struct sockaddr_in6 *, + struct ip6_pktopts *, struct inpcb *, struct route_in6 *, + struct ifnet **, struct in6_addr *, unsigned int, int *); +extern struct in6_addrpolicy *in6_addrsel_lookup_policy(struct sockaddr_in6 *); +extern int in6_selectroute(struct sockaddr_in6 *, struct sockaddr_in6 *, + struct ip6_pktopts *, struct ip6_moptions *, struct in6_ifaddr **, + struct route_in6 *, struct ifnet **, struct rtentry **, int, + struct ip6_out_args *); +extern int ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt, + struct ip6_pktopts *stickyopt, int uproto); +extern u_int32_t ip6_randomid(void); +extern u_int32_t ip6_randomflowlabel(void); +#endif /* BSD_KERNEL_PRIVATE */ #endif /* !_NETINET6_IP6_VAR_H_ */ diff --git a/bsd/netinet6/ip6protosw.h b/bsd/netinet6/ip6protosw.h index dbadffc81..12bec55b0 100644 --- a/bsd/netinet6/ip6protosw.h +++ b/bsd/netinet6/ip6protosw.h @@ -1,9 +1,8 @@ -/* $FreeBSD: src/sys/netinet6/ip6protosw.h,v 1.2.2.3 2001/07/03 11:01:54 ume Exp $ */ /* - * Copyright (c) 2008-2009 Apple Inc. All rights reserved. + * Copyright (c) 2008-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -12,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -23,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ @@ -97,10 +96,10 @@ */ #ifndef _NETINET6_IP6PROTOSW_H_ -#define _NETINET6_IP6PROTOSW_H_ +#define _NETINET6_IP6PROTOSW_H_ #include -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #include /* @@ -112,7 +111,6 @@ struct mbuf; struct sockaddr; struct socket; struct domain; -struct proc; struct ip6_hdr; struct icmp6_hdr; struct in6_addr; @@ -148,64 +146,54 @@ struct ip6ctlparam { u_int8_t ip6c_nxt; /* final next header field */ }; -#ifdef __LP64__ // K64todo: might also make sense for the generic case -#pragma pack(4) -#endif +/* + * TODO: + * + * This closesly resembles protosw; we should merge them one day. Offsets + * must be exactly equivalent, due to casting. + */ struct ip6protosw { - short pr_type; /* socket type used for */ + TAILQ_ENTRY(ip6protosw) pr_entry; /* chain for domain */ struct domain *pr_domain; /* domain protocol a member of */ - short pr_protocol; /* protocol number */ - unsigned int pr_flags; /* see below */ -/* protocol-protocol hooks */ - int (*pr_input)(struct mbuf **, int *, int); - /* input to protocol (from below) */ - int (*pr_output)(struct mbuf *m, struct socket *so, - struct sockaddr_in6 *, struct mbuf *); - /* output to protocol (from above) */ - void (*pr_ctlinput)(int, struct sockaddr *, void *); - /* control input (from below) */ - int (*pr_ctloutput)(struct socket *, struct sockopt *); - /* control output (from above) */ -/* user-protocol hook */ - int (*pr_usrreq)(struct socket *, int, struct mbuf *, - struct mbuf *, struct mbuf *, struct proc *); - /* user request: see list below */ - -/* utility hooks */ - void (*pr_init)(void); /* initialization hook */ -#if __APPLE__ - void (*pr_unused)(void); /* placeholder - fasttimo is removed */ -#else - void (*pr_fasttimo)(void); - /* fast timeout (200ms) */ -#endif - void (*pr_slowtimo)(void); - /* slow timeout (500ms) */ - void (*pr_drain)(void); - /* flush any excess space possible */ -#ifdef __APPLE__ + struct protosw *pr_protosw; /* pointer to self */ + u_int16_t pr_type; /* socket type used for */ + u_int16_t pr_protocol; /* protocol number */ + u_int32_t pr_flags; /* see below */ + /* + * protocol-protocol hooks + */ + int (*pr_input) /* input to protocol (from below) */ + (struct mbuf **, int *, int); + int (*pr_output) /* output to protocol (from above) */ + (struct mbuf *m, struct socket *so, + struct sockaddr_in6 *, struct mbuf *); + void (*pr_ctlinput) /* control input (from below) */ + (int, struct sockaddr *, void *); + int (*pr_ctloutput) /* control output (from above) */ + (struct socket *, struct sockopt *); + /* + * user-protocol hook + */ + struct pr_usrreqs *pr_usrreqs; /* user request; see list below */ + /* + * utility hooks + */ + void (*pr_init) /* initialization hook */ + (struct ip6protosw *, struct domain *); + void (*pr_drain)(void); /* flush any excess space possible */ /* for compat. with IPv4 protosw */ - int (*pr_sysctl)(void); /* sysctl for protocol */ -#endif - - struct pr_usrreqs *pr_usrreqs; /* supersedes pr_usrreq() */ -#ifdef __APPLE__ - int (*pr_lock) (struct socket *so, int locktype, void *debug); /* lock function for protocol */ - int (*pr_unlock) (struct socket *so, int locktype, void *debug); /* unlock for protocol */ -#ifdef _KERN_LOCKS_H_ - lck_mtx_t * (*pr_getlock) (struct socket *so, int locktype); /* unlock for protocol */ -#else - void * (*pr_getlock) (struct socket *so, int locktype); /* unlock for protocol */ -#endif - /* Filter hooks */ - TAILQ_HEAD(pr6_sfilter, NFDescriptor) pr_sfilter; - struct ip6protosw *pr_next; /* Chain for domain */ - u_int32_t reserved[1]; -#endif + int (*pr_sysctl)(void); /* sysctl for protocol */ + int (*pr_lock) /* lock function for protocol */ + (struct socket *so, int locktype, void *debug); + int (*pr_unlock) /* unlock for protocol */ + (struct socket *so, int locktype, void *debug); + lck_mtx_t *(*pr_getlock) /* retrieve protocol lock */ + (struct socket *so, int locktype); + /* + * misc + */ + TAILQ_HEAD(, socket_filter) pr_filter_head; + struct protosw_old *pr_old; }; -#ifdef __LP64__ // K64todo: might also make sense for the generic case -#pragma pack() -#endif - -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET6_IP6PROTOSW_H_ */ diff --git a/bsd/netinet6/ipcomp.h b/bsd/netinet6/ipcomp.h index d584de236..b976f138a 100644 --- a/bsd/netinet6/ipcomp.h +++ b/bsd/netinet6/ipcomp.h @@ -53,8 +53,7 @@ struct ipcomp { #define IPCOMP_CPI_NEGOTIATE_MIN 256 -#ifdef KERNEL -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE struct ipcomp_algorithm { int (*compress)(struct mbuf *, struct mbuf *, size_t *); int (*decompress)(struct mbuf *, struct mbuf *, size_t *); @@ -65,7 +64,6 @@ struct ipsecrequest; extern const struct ipcomp_algorithm *ipcomp_algorithm_lookup(int); extern void ipcomp4_input(struct mbuf *, int); extern int ipcomp4_output(struct mbuf *, struct secasvar *); -#endif /* KERNEL_PRIVATE */ -#endif /* KERNEL */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET6_IPCOMP_H_ */ diff --git a/bsd/netinet6/ipcomp6.h b/bsd/netinet6/ipcomp6.h index 2bd7b6678..41b766614 100644 --- a/bsd/netinet6/ipcomp6.h +++ b/bsd/netinet6/ipcomp6.h @@ -39,10 +39,10 @@ #include #include -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE extern int ipcomp6_input(struct mbuf **, int *, int); extern int ipcomp6_output(struct mbuf *, u_char *, struct mbuf *, struct secasvar *); -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /*_NETINET6_IPCOMP6_H_*/ diff --git a/bsd/netinet6/ipsec.c b/bsd/netinet6/ipsec.c index 271dbe1ab..9be2fb12e 100644 --- a/bsd/netinet6/ipsec.c +++ b/bsd/netinet6/ipsec.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2012 Apple Inc. All rights reserved. + * Copyright (c) 2008-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -237,13 +237,15 @@ SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_ESP_RANDPAD, esp_randpad, CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_esp_randpad, 0, ""); #endif /* INET6 */ +static int ipsec_setspidx_interface(struct secpolicyindex *, u_int, struct mbuf *, + int, int, int); static int ipsec_setspidx_mbuf(struct secpolicyindex *, u_int, u_int, struct mbuf *, int); static int ipsec4_setspidx_inpcb(struct mbuf *, struct inpcb *pcb); #if INET6 static int ipsec6_setspidx_in6pcb(struct mbuf *, struct in6pcb *pcb); #endif -static int ipsec_setspidx(struct mbuf *, struct secpolicyindex *, int); +static int ipsec_setspidx(struct mbuf *, struct secpolicyindex *, int, int); static void ipsec4_get_ulp(struct mbuf *m, struct secpolicyindex *, int); static int ipsec4_setspidx_ipaddr(struct mbuf *, struct secpolicyindex *); #if INET6 @@ -320,12 +322,12 @@ ipsec4_getpolicybysock(m, dir, so, error) return ipsec4_getpolicybyaddr(m, dir, 0, error); } - switch (so->so_proto->pr_domain->dom_family) { - case AF_INET: + switch (SOCK_DOM(so)) { + case PF_INET: pcbsp = sotoinpcb(so)->inp_sp; break; #if INET6 - case AF_INET6: + case PF_INET6: pcbsp = sotoin6pcb(so)->in6p_sp; break; #endif @@ -338,13 +340,13 @@ ipsec4_getpolicybysock(m, dir, so, error) KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_START, 0,0,0,0,0); - switch (so->so_proto->pr_domain->dom_family) { - case AF_INET: + switch (SOCK_DOM(so)) { + case PF_INET: /* set spidx in pcb */ *error = ipsec4_setspidx_inpcb(m, sotoinpcb(so)); break; #if INET6 - case AF_INET6: + case PF_INET6: /* set spidx in pcb */ *error = ipsec6_setspidx_in6pcb(m, sotoin6pcb(so)); break; @@ -394,8 +396,9 @@ ipsec4_getpolicybysock(m, dir, so, error) /* SP found */ if (kernsp != NULL) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ipsec4_getpolicybysock called " - "to allocate SP:%p\n", kernsp)); + printf("DP ipsec4_getpolicybysock called " + "to allocate SP:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(kernsp))); *error = 0; KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 3,*error,0,0,0); return kernsp; @@ -441,8 +444,9 @@ ipsec4_getpolicybysock(m, dir, so, error) /* SP found */ if (kernsp != NULL) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ipsec4_getpolicybysock called " - "to allocate SP:%p\n", kernsp)); + printf("DP ipsec4_getpolicybysock called " + "to allocate SP:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(kernsp))); *error = 0; KERNEL_DEBUG(DBG_FNC_GETPOL_SOCK | DBG_FUNC_END, 7,*error,0,0,0); return kernsp; @@ -502,14 +506,13 @@ ipsec4_getpolicybysock(m, dir, so, error) * others : error occurred. */ struct secpolicy * -ipsec4_getpolicybyaddr(m, dir, flag, error) - struct mbuf *m; - u_int dir; - int flag; - int *error; +ipsec4_getpolicybyaddr(struct mbuf *m, + u_int dir, + int flag, + int *error) { struct secpolicy *sp = NULL; - + if (ipsec_bypass != 0) return 0; @@ -518,30 +521,30 @@ ipsec4_getpolicybyaddr(m, dir, flag, error) /* sanity check */ if (m == NULL || error == NULL) panic("ipsec4_getpolicybyaddr: NULL pointer was passed.\n"); + { + struct secpolicyindex spidx; - { - struct secpolicyindex spidx; + KERNEL_DEBUG(DBG_FNC_GETPOL_ADDR | DBG_FUNC_START, 0,0,0,0,0); + bzero(&spidx, sizeof(spidx)); - KERNEL_DEBUG(DBG_FNC_GETPOL_ADDR | DBG_FUNC_START, 0,0,0,0,0); - bzero(&spidx, sizeof(spidx)); + /* make a index to look for a policy */ + *error = ipsec_setspidx_mbuf(&spidx, dir, AF_INET, m, + (flag & IP_FORWARDING) ? 0 : 1); - /* make a index to look for a policy */ - *error = ipsec_setspidx_mbuf(&spidx, dir, AF_INET, m, - (flag & IP_FORWARDING) ? 0 : 1); + if (*error != 0) { + KERNEL_DEBUG(DBG_FNC_GETPOL_ADDR | DBG_FUNC_END, 1,*error,0,0,0); + return NULL; + } - if (*error != 0) { - KERNEL_DEBUG(DBG_FNC_GETPOL_ADDR | DBG_FUNC_END, 1,*error,0,0,0); - return NULL; + sp = key_allocsp(&spidx, dir); } - sp = key_allocsp(&spidx, dir); - } - /* SP found */ if (sp != NULL) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ipsec4_getpolicybyaddr called " - "to allocate SP:%p\n", sp)); + printf("DP ipsec4_getpolicybyaddr called " + "to allocate SP:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(sp))); *error = 0; KERNEL_DEBUG(DBG_FNC_GETPOL_ADDR | DBG_FUNC_END, 2,*error,0,0,0); return sp; @@ -563,6 +566,69 @@ ipsec4_getpolicybyaddr(m, dir, flag, error) return &ip4_def_policy; } +/* Match with bound interface rather than src addr. + * Unlike getpolicybyaddr, do not set the default policy. + * Return 0 if should continue processing, or -1 if packet + * should be dropped. + */ +int +ipsec4_getpolicybyinterface(struct mbuf *m, + u_int dir, + int *flags, + struct ip_out_args *ipoa, + struct secpolicy **sp) +{ + struct secpolicyindex spidx; + int error = 0; + + if (ipsec_bypass != 0) + return 0; + + /* Sanity check */ + if (m == NULL || ipoa == NULL || sp == NULL) + panic("ipsec4_getpolicybyinterface: NULL pointer was passed.\n"); + + if (ipoa->ipoa_boundif == IFSCOPE_NONE) + return 0; + + KERNEL_DEBUG(DBG_FNC_GETPOL_ADDR | DBG_FUNC_START, 0,0,0,0,0); + bzero(&spidx, sizeof(spidx)); + + /* make a index to look for a policy */ + error = ipsec_setspidx_interface(&spidx, dir, m, (*flags & IP_FORWARDING) ? 0 : 1, + ipoa->ipoa_boundif, 4); + + if (error != 0) { + KERNEL_DEBUG(DBG_FNC_GETPOL_ADDR | DBG_FUNC_END, 1,error,0,0,0); + return 0; + } + + *sp = key_allocsp(&spidx, dir); + + /* Return SP, whether NULL or not */ + if (*sp != NULL && (*sp)->policy == IPSEC_POLICY_IPSEC) { + if ((*sp)->ipsec_if == NULL) { + /* Invalid to capture on an interface without redirect */ + key_freesp(*sp, KEY_SADB_UNLOCKED); + *sp = NULL; + return -1; + } else if ((*sp)->disabled) { + /* Disabled policies go in the clear */ + key_freesp(*sp, KEY_SADB_UNLOCKED); + *sp = NULL; + *flags |= IP_NOIPSEC; /* Avoid later IPSec check */ + } else { + /* If policy is enabled, redirect to ipsec interface */ + ipoa->ipoa_boundif = (*sp)->ipsec_if->if_index; + } + } + + KERNEL_DEBUG(DBG_FNC_GETPOL_ADDR | DBG_FUNC_END, 2,error,0,0,0); + + return 0; +} + + #if INET6 /* * For OUTBOUND packet having a socket. Searching SPD for packet, @@ -592,7 +658,7 @@ ipsec6_getpolicybysock(m, dir, so, error) panic("ipsec6_getpolicybysock: NULL pointer was passed.\n"); #if DIAGNOSTIC - if (so->so_proto->pr_domain->dom_family != AF_INET6) + if (SOCK_DOM(so) != PF_INET6) panic("ipsec6_getpolicybysock: socket domain != inet6\n"); #endif @@ -641,8 +707,9 @@ ipsec6_getpolicybysock(m, dir, so, error) /* SP found */ if (kernsp != NULL) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ipsec6_getpolicybysock called " - "to allocate SP:%p\n", kernsp)); + printf("DP ipsec6_getpolicybysock called " + "to allocate SP:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(kernsp))); *error = 0; return kernsp; } @@ -684,8 +751,9 @@ ipsec6_getpolicybysock(m, dir, so, error) /* SP found */ if (kernsp != NULL) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ipsec6_getpolicybysock called " - "to allocate SP:%p\n", kernsp)); + printf("DP ipsec6_getpolicybysock called " + "to allocate SP:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(kernsp))); *error = 0; return kernsp; } @@ -747,11 +815,10 @@ ipsec6_getpolicybysock(m, dir, so, error) #endif struct secpolicy * -ipsec6_getpolicybyaddr(m, dir, flag, error) - struct mbuf *m; - u_int dir; - int flag; - int *error; +ipsec6_getpolicybyaddr(struct mbuf *m, + u_int dir, + int flag, + int *error) { struct secpolicy *sp = NULL; @@ -779,8 +846,9 @@ ipsec6_getpolicybyaddr(m, dir, flag, error) /* SP found */ if (sp != NULL) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ipsec6_getpolicybyaddr called " - "to allocate SP:%p\n", sp)); + printf("DP ipsec6_getpolicybyaddr called " + "to allocate SP:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(sp))); *error = 0; return sp; } @@ -798,6 +866,71 @@ ipsec6_getpolicybyaddr(m, dir, flag, error) *error = 0; return &ip6_def_policy; } + +/* Match with bound interface rather than src addr. + * Unlike getpolicybyaddr, do not set the default policy. + * Return 0 if should continue processing, or -1 if packet + * should be dropped. + */ +int +ipsec6_getpolicybyinterface(struct mbuf *m, + u_int dir, + int flag, + struct ip6_out_args *ip6oap, + int *noipsec, + struct secpolicy **sp) +{ + struct secpolicyindex spidx; + int error = 0; + + if (ipsec_bypass != 0) + return 0; + + /* Sanity check */ + if (m == NULL || sp == NULL || noipsec == NULL || ip6oap == NULL) + panic("ipsec6_getpolicybyinterface: NULL pointer was passed.\n"); + + *noipsec = 0; + + if (ip6oap->ip6oa_boundif == IFSCOPE_NONE) + return 0; + + KERNEL_DEBUG(DBG_FNC_GETPOL_ADDR | DBG_FUNC_START, 0,0,0,0,0); + bzero(&spidx, sizeof(spidx)); + + /* make a index to look for a policy */ + error = ipsec_setspidx_interface(&spidx, dir, m, (flag & IP_FORWARDING) ? 0 : 1, + ip6oap->ip6oa_boundif, 6); + + if (error != 0) { + KERNEL_DEBUG(DBG_FNC_GETPOL_ADDR | DBG_FUNC_END, 1,error,0,0,0); + return 0; + } + + *sp = key_allocsp(&spidx, dir); + + /* Return SP, whether NULL or not */ + if (*sp != NULL && (*sp)->policy == IPSEC_POLICY_IPSEC) { + if ((*sp)->ipsec_if == NULL) { + /* Invalid to capture on an interface without redirect */ + key_freesp(*sp, KEY_SADB_UNLOCKED); + *sp = NULL; + return -1; + } else if ((*sp)->disabled) { + /* Disabled policies go in the clear */ + key_freesp(*sp, KEY_SADB_UNLOCKED); + *sp = NULL; + *noipsec = 1; /* Avoid later IPSec check */ + } else { + /* If policy is enabled, redirect to ipsec interface */ + ip6oap->ip6oa_boundif = (*sp)->ipsec_if->if_index; + } + } + + KERNEL_DEBUG(DBG_FNC_GETPOL_ADDR | DBG_FUNC_END, 2,*error,0,0,0); + + return 0; +} #endif /* INET6 */ /* @@ -810,7 +943,7 @@ ipsec6_getpolicybyaddr(m, dir, flag, error) * 0: success. * other: failure, and set errno. */ -int +static int ipsec_setspidx_mbuf( struct secpolicyindex *spidx, u_int dir, @@ -826,7 +959,7 @@ ipsec_setspidx_mbuf( bzero(spidx, sizeof(*spidx)); - error = ipsec_setspidx(m, spidx, needport); + error = ipsec_setspidx(m, spidx, needport, 0); if (error) goto bad; spidx->dir = dir; @@ -839,6 +972,42 @@ ipsec_setspidx_mbuf( return EINVAL; } +static int +ipsec_setspidx_interface( + struct secpolicyindex *spidx, + u_int dir, + struct mbuf *m, + int needport, + int ifindex, + int ip_version) +{ + int error; + + /* sanity check */ + if (spidx == NULL || m == NULL) + panic("ipsec_setspidx_interface: NULL pointer was passed.\n"); + + bzero(spidx, sizeof(*spidx)); + + error = ipsec_setspidx(m, spidx, needport, ip_version); + if (error) + goto bad; + spidx->dir = dir; + + if (ifindex != 0) { + ifnet_head_lock_shared(); + spidx->internal_if = ifindex2ifnet[ifindex]; + ifnet_head_done(); + } else { + spidx->internal_if = NULL; + } + + return 0; + +bad: + return EINVAL; +} + static int ipsec4_setspidx_inpcb(m, pcb) struct mbuf *m; @@ -862,13 +1031,13 @@ ipsec4_setspidx_inpcb(m, pcb) bzero(&pcb->inp_sp->sp_out->spidx, sizeof(*spidx)); spidx = &pcb->inp_sp->sp_in->spidx; - error = ipsec_setspidx(m, spidx, 1); + error = ipsec_setspidx(m, spidx, 1, 0); if (error) goto bad; spidx->dir = IPSEC_DIR_INBOUND; spidx = &pcb->inp_sp->sp_out->spidx; - error = ipsec_setspidx(m, spidx, 1); + error = ipsec_setspidx(m, spidx, 1, 0); if (error) goto bad; spidx->dir = IPSEC_DIR_OUTBOUND; @@ -902,13 +1071,13 @@ ipsec6_setspidx_in6pcb(m, pcb) bzero(&pcb->in6p_sp->sp_out->spidx, sizeof(*spidx)); spidx = &pcb->in6p_sp->sp_in->spidx; - error = ipsec_setspidx(m, spidx, 1); + error = ipsec_setspidx(m, spidx, 1, 0); if (error) goto bad; spidx->dir = IPSEC_DIR_INBOUND; spidx = &pcb->in6p_sp->sp_out->spidx; - error = ipsec_setspidx(m, spidx, 1); + error = ipsec_setspidx(m, spidx, 1, 0); if (error) goto bad; spidx->dir = IPSEC_DIR_OUTBOUND; @@ -928,10 +1097,10 @@ bad: * the caller is responsible for error recovery (like clearing up spidx). */ static int -ipsec_setspidx(m, spidx, needport) - struct mbuf *m; - struct secpolicyindex *spidx; - int needport; +ipsec_setspidx(struct mbuf *m, + struct secpolicyindex *spidx, + int needport, + int force_ip_version) { struct ip *ip = NULL; struct ip ipbuf; @@ -939,10 +1108,10 @@ ipsec_setspidx(m, spidx, needport) struct mbuf *n; int len; int error; - + if (m == NULL) panic("ipsec_setspidx: m == 0 passed.\n"); - + /* * validate m->m_pkthdr.len. we see incorrect length if we * mistakenly call this function with inconsistent mbuf chain @@ -974,11 +1143,16 @@ ipsec_setspidx(m, spidx, needport) m_copydata(m, 0, sizeof(ipbuf), (caddr_t)&ipbuf); ip = &ipbuf; } + + if (force_ip_version) { + v = force_ip_version; + } else { #ifdef _IP_VHL - v = _IP_VHL_V(ip->ip_vhl); + v = _IP_VHL_V(ip->ip_vhl); #else - v = ip->ip_v; + v = ip->ip_v; #endif + } switch (v) { case 4: error = ipsec4_setspidx_ipaddr(m, spidx); @@ -1116,6 +1290,7 @@ ipsec4_setspidx_ipaddr(m, spidx) sin->sin_len = sizeof(struct sockaddr_in); bcopy(&ip->ip_dst, &sin->sin_addr, sizeof(ip->ip_dst)); spidx->prefd = sizeof(struct in_addr) << 3; + return 0; } @@ -1959,7 +2134,8 @@ ipsec4_in_reject_so(m, so) result = ipsec_in_reject(sp, m); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ipsec4_in_reject_so call free SP:%p\n", sp)); + printf("DP ipsec4_in_reject_so call free SP:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(sp))); key_freesp(sp, KEY_SADB_UNLOCKED); return result; @@ -2017,7 +2193,8 @@ ipsec6_in_reject_so(m, so) result = ipsec_in_reject(sp, m); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ipsec6_in_reject_so call free SP:%p\n", sp)); + printf("DP ipsec6_in_reject_so call free SP:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(sp))); key_freesp(sp, KEY_SADB_UNLOCKED); return result; @@ -2152,7 +2329,8 @@ ipsec4_hdrsiz(m, dir, inp) size = ipsec_hdrsiz(sp); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ipsec4_hdrsiz call free SP:%p\n", sp)); + printf("DP ipsec4_hdrsiz call free SP:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(sp))); KEYDEBUG(KEYDEBUG_IPSEC_DATA, printf("ipsec4_hdrsiz: size:%lu.\n", (u_int32_t)size)); key_freesp(sp, KEY_SADB_UNLOCKED); @@ -2192,7 +2370,8 @@ ipsec6_hdrsiz(m, dir, in6p) return 0; size = ipsec_hdrsiz(sp); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ipsec6_hdrsiz call free SP:%p\n", sp)); + printf("DP ipsec6_hdrsiz call free SP:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(sp))); KEYDEBUG(KEYDEBUG_IPSEC_DATA, printf("ipsec6_hdrsiz: size:%lu.\n", (u_int32_t)size)); key_freesp(sp, KEY_SADB_UNLOCKED); @@ -2308,11 +2487,7 @@ ipsec4_encapsulate(m, sav) ipseclog((LOG_ERR, "IPv4 ipsec: size exceeds limit: " "leave ip_len as is (invalid packet)\n")); } -#ifdef RANDOM_IP_ID ip->ip_id = ip_randomid(); -#else - ip->ip_id = htons(ip_id++); -#endif bcopy(&((struct sockaddr_in *)&sav->sah->saidx.src)->sin_addr, &ip->ip_src, sizeof(ip->ip_src)); bcopy(&((struct sockaddr_in *)&sav->sah->saidx.dst)->sin_addr, @@ -2401,11 +2576,7 @@ ipsec4_encapsulate_utun_esp_keepalive(m_ptr, sav) ipseclog((LOG_ERR, "IPv4 ipsec: size exceeds limit: " "leave ip_len as is (invalid packet)\n")); } -#ifdef RANDOM_IP_ID ip->ip_id = ip_randomid(); -#else - ip->ip_id = htons(ip_id++); -#endif bcopy(&((struct sockaddr_in *)&sav->sah->saidx.src)->sin_addr, &ip->ip_src, sizeof(ip->ip_src)); bcopy(&((struct sockaddr_in *)&sav->sah->saidx.dst)->sin_addr, @@ -3158,13 +3329,11 @@ ipsec4_output( if (ro4->ro_rt != NULL) { RT_LOCK(ro4->ro_rt); } - if (ro4->ro_rt != NULL && - (ro4->ro_rt->generation_id != route_generation || - !(ro4->ro_rt->rt_flags & RTF_UP) || - dst4->sin_addr.s_addr != ip->ip_dst.s_addr)) { - RT_UNLOCK(ro4->ro_rt); - rtfree(ro4->ro_rt); - ro4->ro_rt = NULL; + if (ROUTE_UNUSABLE(ro4) || + dst4->sin_addr.s_addr != ip->ip_dst.s_addr) { + if (ro4->ro_rt != NULL) + RT_UNLOCK(ro4->ro_rt); + ROUTE_RELEASE(ro4); } if (ro4->ro_rt == 0) { dst4->sin_family = AF_INET; @@ -3193,10 +3362,7 @@ ipsec4_output( if (ro4->ro_rt->rt_flags & RTF_GATEWAY) dst4 = (struct sockaddr_in *)(void *)ro4->ro_rt->rt_gateway; RT_UNLOCK(ro4->ro_rt); - if (state->ro.ro_rt != NULL) { - rtfree(state->ro.ro_rt); - state->ro.ro_rt = NULL; - } + ROUTE_RELEASE(&state->ro); route_copyout(&state->ro, ro4, sizeof(state->ro)); state->dst = (struct sockaddr *)dst4; state->tunneled = 4; @@ -3590,7 +3756,7 @@ ipsec6_output_tunnel( struct route *ro4 = NULL; struct route ro4_copy; struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, - IPOAF_SELECT_SRCIF }; + IPOAF_SELECT_SRCIF, 0 }; /* * must be last isr because encapsulated IPv6 packet @@ -3619,13 +3785,11 @@ ipsec6_output_tunnel( if (ro4->ro_rt) { RT_LOCK(ro4->ro_rt); } - if (ro4->ro_rt != NULL && - (ro4->ro_rt->generation_id != route_generation || - !(ro4->ro_rt->rt_flags & RTF_UP) || - dst4->sin_addr.s_addr != ip->ip_dst.s_addr)) { - RT_UNLOCK(ro4->ro_rt); - rtfree(ro4->ro_rt); - ro4->ro_rt = NULL; + if (ROUTE_UNUSABLE(ro4) || + dst4->sin_addr.s_addr != ip->ip_dst.s_addr) { + if (ro4->ro_rt != NULL) + RT_UNLOCK(ro4->ro_rt); + ROUTE_RELEASE(ro4); } if (ro4->ro_rt == NULL) { dst4->sin_family = AF_INET; @@ -3640,9 +3804,7 @@ ipsec6_output_tunnel( state->m = ipsec4_splithdr(state->m); if (!state->m) { error = ENOMEM; - if (ro4_copy.ro_rt != NULL) { - rtfree(ro4_copy.ro_rt); - } + ROUTE_RELEASE(&ro4_copy); goto bad; } switch (isr->saidx.proto) { @@ -3650,9 +3812,7 @@ ipsec6_output_tunnel( #if IPSEC_ESP if ((error = esp4_output(state->m, sav)) != 0) { state->m = NULL; - if (ro4_copy.ro_rt != NULL) { - rtfree(ro4_copy.ro_rt); - } + ROUTE_RELEASE(&ro4_copy); goto bad; } break; @@ -3661,26 +3821,20 @@ ipsec6_output_tunnel( m_freem(state->m); state->m = NULL; error = EINVAL; - if (ro4_copy.ro_rt != NULL) { - rtfree(ro4_copy.ro_rt); - } + ROUTE_RELEASE(&ro4_copy); goto bad; #endif case IPPROTO_AH: if ((error = ah4_output(state->m, sav)) != 0) { state->m = NULL; - if (ro4_copy.ro_rt != NULL) { - rtfree(ro4_copy.ro_rt); - } + ROUTE_RELEASE(&ro4_copy); goto bad; } break; case IPPROTO_IPCOMP: if ((error = ipcomp4_output(state->m, sav)) != 0) { state->m = NULL; - if (ro4_copy.ro_rt != NULL) { - rtfree(ro4_copy.ro_rt); - } + ROUTE_RELEASE(&ro4_copy); goto bad; } break; @@ -3691,17 +3845,13 @@ ipsec6_output_tunnel( m_freem(state->m); state->m = NULL; error = EINVAL; - if (ro4_copy.ro_rt != NULL) { - rtfree(ro4_copy.ro_rt); - } + ROUTE_RELEASE(&ro4_copy); goto bad; } if (state->m == 0) { error = ENOMEM; - if (ro4_copy.ro_rt != NULL) { - rtfree(ro4_copy.ro_rt); - } + ROUTE_RELEASE(&ro4_copy); goto bad; } ip = mtod(state->m, struct ip *); @@ -3731,13 +3881,11 @@ ipsec6_output_tunnel( if (ro6->ro_rt) { RT_LOCK(ro6->ro_rt); } - if (ro6->ro_rt != NULL && - (ro6->ro_rt->generation_id != route_generation || - !(ro6->ro_rt->rt_flags & RTF_UP) || - !IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr, &ip6->ip6_dst))) { - RT_UNLOCK(ro6->ro_rt); - rtfree(ro6->ro_rt); - ro6->ro_rt = NULL; + if (ROUTE_UNUSABLE(ro6) || + !IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr, &ip6->ip6_dst)) { + if (ro6->ro_rt != NULL) + RT_UNLOCK(ro6->ro_rt); + ROUTE_RELEASE(ro6); } if (ro6->ro_rt == 0) { bzero(dst6, sizeof(*dst6)); @@ -3770,10 +3918,7 @@ ipsec6_output_tunnel( if (ro6->ro_rt->rt_flags & RTF_GATEWAY) dst6 = (struct sockaddr_in6 *)(void *)ro6->ro_rt->rt_gateway; RT_UNLOCK(ro6->ro_rt); - if (state->ro.ro_rt != NULL) { - rtfree(state->ro.ro_rt); - state->ro.ro_rt = NULL; - } + ROUTE_RELEASE(&state->ro); route_copyout(&state->ro, ro6, sizeof(state->ro)); state->dst = (struct sockaddr *)dst6; state->tunneled = 6; @@ -4404,7 +4549,8 @@ ipsec_send_natt_keepalive( struct mbuf *m; struct ip *ip; int error; - struct ip_out_args ipoa = { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF }; + struct ip_out_args ipoa = + { IFSCOPE_NONE, { 0 }, IPOAF_SELECT_SRCIF, 0 }; struct route ro; lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); @@ -4456,11 +4602,10 @@ ipsec_send_natt_keepalive( // grab sadb_mutex, to get a local copy of sah's route cache lck_mtx_lock(sadb_mutex); - if (sav->sah->sa_route.ro_rt != NULL && - rt_key(sav->sah->sa_route.ro_rt)->sa_family != AF_INET) { - rtfree(sav->sah->sa_route.ro_rt); - sav->sah->sa_route.ro_rt = NULL; - } + if (ROUTE_UNUSABLE(&sav->sah->sa_route) || + rt_key(sav->sah->sa_route.ro_rt)->sa_family != AF_INET) + ROUTE_RELEASE(&sav->sah->sa_route); + route_copyout(&ro, &sav->sah->sa_route, sizeof(ro)); lck_mtx_unlock(sadb_mutex); diff --git a/bsd/netinet6/ipsec.h b/bsd/netinet6/ipsec.h index ead5dc68c..75234b206 100644 --- a/bsd/netinet6/ipsec.h +++ b/bsd/netinet6/ipsec.h @@ -40,8 +40,9 @@ #include #include -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #include +#include /* lock for IPSec stats */ extern lck_grp_t *sadb_stat_mutex_grp; @@ -53,6 +54,10 @@ extern lck_mtx_t *sadb_stat_mutex; #define IPSEC_STAT_INCREMENT(x) \ {lck_mtx_lock(sadb_stat_mutex); (x)++; lck_mtx_unlock(sadb_stat_mutex);} +struct secpolicyaddrrange { + struct sockaddr_storage start; /* Start (low values) of address range */ + struct sockaddr_storage end; /* End (high values) of address range */ +}; /* * Security Policy Index @@ -67,6 +72,9 @@ struct secpolicyindex { u_int8_t prefs; /* prefix length in bits for src */ u_int8_t prefd; /* prefix length in bits for dst */ u_int16_t ul_proto; /* upper layer Protocol */ + ifnet_t internal_if; /* Interface a matching packet is bound to */ + struct secpolicyaddrrange src_range; /* IP src address range for SP */ + struct secpolicyaddrrange dst_range; /* IP dst address range for SP */ #ifdef notyet uid_t uids; uid_t uidd; @@ -91,6 +99,11 @@ struct secpolicy { /* pointer to the ipsec request tree, */ /* if policy == IPSEC else this value == NULL.*/ + ifnet_t ipsec_if; /* IPSec interface to use */ + ifnet_t outgoing_if; /* Outgoing interface for encrypted traffic */ + + char disabled; /* Set to ignore policy */ + /* * lifetime handler. * the policy can be used without limitiation if both lifetime and @@ -133,7 +146,7 @@ struct secspacq { int count; /* for lifetime */ /* XXX: here is mbuf place holder to be sent ? */ }; -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ /* according to IANA assignment, port 0x0000 and proto 0xff are reserved. */ #define IPSEC_PORT_ANY 0 @@ -219,7 +232,7 @@ struct ipsecstat { u_quad_t out_comphist[256]; }; -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE /* * Definitions for IPsec & Key sysctl operations. */ @@ -277,13 +290,11 @@ struct ipsecstat { { "esp_randpad", CTLTYPE_INT }, \ } -#ifdef KERNEL - #define IPSEC_IS_P2ALIGNED(p) 1 #define IPSEC_GET_P2UNALIGNED_OFS(p) 0 struct ipsec_output_state { - int tunneled; + int tunneled; struct mbuf *m; struct route ro; struct sockaddr *dst; @@ -314,6 +325,8 @@ extern struct secpolicy *ipsec4_getpolicybysock(struct mbuf *, u_int, struct socket *, int *); extern struct secpolicy *ipsec4_getpolicybyaddr(struct mbuf *, u_int, int, int *); +extern int ipsec4_getpolicybyinterface(struct mbuf *, u_int, int *, + struct ip_out_args *, struct secpolicy **); struct inpcb; extern int ipsec_init_policy(struct socket *so, struct inpcbpolicy **); @@ -362,8 +375,7 @@ extern struct socket *ipsec_getsocket(struct mbuf *); extern int ipsec_addhist(struct mbuf *, int, u_int32_t); extern struct ipsec_history *ipsec_gethist(struct mbuf *, int *); extern void ipsec_clearhist(struct mbuf *); -#endif /* KERNEL */ -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #ifndef KERNEL __BEGIN_DECLS diff --git a/bsd/netinet6/ipsec6.h b/bsd/netinet6/ipsec6.h index e24f11acc..66775a8e9 100644 --- a/bsd/netinet6/ipsec6.h +++ b/bsd/netinet6/ipsec6.h @@ -41,7 +41,7 @@ #include #include -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE extern struct ipsecstat ipsec6stat; extern struct secpolicy ip6_def_policy; extern int ip6_esp_trans_deflev; @@ -51,10 +51,14 @@ extern int ip6_ah_net_deflev; extern int ip6_ipsec_ecn; extern int ip6_esp_randpad; +struct ip6_out_args; + extern struct secpolicy *ipsec6_getpolicybysock(struct mbuf *, u_int, struct socket *, int *); extern struct secpolicy *ipsec6_getpolicybyaddr(struct mbuf *, u_int, int, int *); +extern int ipsec6_getpolicybyinterface(struct mbuf *, + u_int, int, struct ip6_out_args *, int *, struct secpolicy **); struct inpcb; @@ -79,6 +83,5 @@ extern int ipsec6_output_tunnel(struct ipsec_output_state *, struct secpolicy *, int); extern int ipsec6_tunnel_validate(struct mbuf *, int, u_int, struct secasvar *); -#endif /* KERNEL_PRIVATE */ - +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET6_IPSEC6_H_ */ diff --git a/bsd/netinet6/mld6.c b/bsd/netinet6/mld6.c index 7cedd2e22..38c46dd72 100644 --- a/bsd/netinet6/mld6.c +++ b/bsd/netinet6/mld6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -110,6 +110,8 @@ #include #include +#include + #include #include @@ -153,7 +155,7 @@ static lck_grp_attr_t *mld_mtx_grp_attr; * the state changes to MLD_LEAVING_MEMBER, and later dropped in the timeout * handler. Each in6_multi holds a reference to the underlying mld_ifinfo. * - * Thus, the permitted lock oder is: + * Thus, the permitted lock order is: * * mld_mtx, in6_multihead_lock, inm6_lock, mli_lock * @@ -169,27 +171,28 @@ static struct mld_ifinfo *mli_alloc(int); static void mli_free(struct mld_ifinfo *); static void mli_delete(const struct ifnet *, struct mld_in6m_relhead *); static void mld_dispatch_packet(struct mbuf *); -static void mld_final_leave(struct in6_multi *, struct mld_ifinfo *); -static int mld_handle_state_change(struct in6_multi *, - struct mld_ifinfo *); +static void mld_final_leave(struct in6_multi *, struct mld_ifinfo *, + struct mld_tparams *); +static int mld_handle_state_change(struct in6_multi *, struct mld_ifinfo *, + struct mld_tparams *); static int mld_initial_join(struct in6_multi *, struct mld_ifinfo *, - const int); + struct mld_tparams *, const int); #ifdef MLD_DEBUG static const char * mld_rec_type_to_str(const int); #endif -static void mld_set_version(struct mld_ifinfo *, const int); +static uint32_t mld_set_version(struct mld_ifinfo *, const int); static void mld_flush_relq(struct mld_ifinfo *, struct mld_in6m_relhead *); static void mld_dispatch_queue(struct mld_ifinfo *, struct ifqueue *, int); static int mld_v1_input_query(struct ifnet *, const struct ip6_hdr *, /*const*/ struct mld_hdr *); -static int mld_v1_input_report(struct ifnet *, const struct ip6_hdr *, - /*const*/ struct mld_hdr *); +static int mld_v1_input_report(struct ifnet *, struct mbuf *, + const struct ip6_hdr *, /*const*/ struct mld_hdr *); static void mld_v1_process_group_timer(struct in6_multi *, const int); static void mld_v1_process_querier_timers(struct mld_ifinfo *); static int mld_v1_transmit_report(struct in6_multi *, const int); -static void mld_v1_update_group(struct in6_multi *, const int); +static uint32_t mld_v1_update_group(struct in6_multi *, const int); static void mld_v2_cancel_link_timers(struct mld_ifinfo *); -static void mld_v2_dispatch_general_query(struct mld_ifinfo *); +static uint32_t mld_v2_dispatch_general_query(struct mld_ifinfo *); static struct mbuf * mld_v2_encap_report(struct ifnet *, struct mbuf *); static int mld_v2_enqueue_filter_change(struct ifqueue *, @@ -208,34 +211,26 @@ static int mld_v2_process_group_query(struct in6_multi *, int, struct mbuf *, const int); static int sysctl_mld_gsr SYSCTL_HANDLER_ARGS; static int sysctl_mld_ifinfo SYSCTL_HANDLER_ARGS; +static int sysctl_mld_v2enable SYSCTL_HANDLER_ARGS; + +static int mld_timeout_run; /* MLD timer is scheduled to run */ +static void mld_timeout(void *); +static void mld_sched_timeout(void); /* * Normative references: RFC 2710, RFC 3590, RFC 3810. - * - * XXX LOR PREVENTION - * A special case for IPv6 is the in6_setscope() routine. ip6_output() - * will not accept an ifp; it wants an embedded scope ID, unlike - * ip_output(), which happily takes the ifp given to it. The embedded - * scope ID is only used by MLD to select the outgoing interface. - * - * As such, we exploit the fact that the scope ID is just the interface - * index, and embed it in the IPv6 destination address accordingly. - * This is potentially NOT VALID for MLDv1 reports, as they - * are always sent to the multicast group itself; as MLDv2 - * reports are always sent to ff02::16, this is not an issue - * when MLDv2 is in use. */ - -#define MLD_EMBEDSCOPE(pin6, zoneid) \ - (pin6)->s6_addr16[1] = htons((zoneid) & 0xFFFF) - static struct timeval mld_gsrdelay = {10, 0}; static LIST_HEAD(, mld_ifinfo) mli_head; +static int querier_present_timers_running6; static int interface_timers_running6; static int state_change_timers_running6; static int current_state_timers_running6; +/* + * Subsystem lock macros. + */ #define MLD_LOCK() \ lck_mtx_lock(&mld_mtx) #define MLD_LOCK_ASSERT_HELD() \ @@ -280,6 +275,12 @@ static int mld_v1enable = 1; SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED, &mld_v1enable, 0, "Enable fallback to MLDv1"); +static int mld_v2enable = 1; +SYSCTL_PROC(_net_inet6_mld, OID_AUTO, v2enable, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + &mld_v2enable, 0, sysctl_mld_v2enable, "I", + "Enable MLDv2 (debug purposes only)"); + static int mld_use_allow = 1; SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RW | CTLFLAG_LOCKED, &mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves"); @@ -313,6 +314,31 @@ static struct mld_raopt mld_ra = { }; static struct ip6_pktopts mld_po; +/* Store MLDv2 record count in the module private scratch space */ +#define vt_nrecs pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val16[0] + +static __inline void +mld_save_context(struct mbuf *m, struct ifnet *ifp) +{ + m->m_pkthdr.rcvif = ifp; +} + +static __inline void +mld_scrub_context(struct mbuf *m) +{ + m->m_pkthdr.rcvif = NULL; +} + +/* + * Restore context from a queued output chain. + * Return saved ifp. + */ +static __inline struct ifnet * +mld_restore_context(struct mbuf *m) +{ + return (m->m_pkthdr.rcvif); +} + /* * Retrieve or set threshold between group-source queries in seconds. */ @@ -411,6 +437,52 @@ out_locked: return (error); } +static int +sysctl_mld_v2enable SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int error; + int i; + struct mld_ifinfo *mli; + struct mld_tparams mtp = { 0, 0, 0, 0 }; + + MLD_LOCK(); + + i = mld_v2enable; + + error = sysctl_handle_int(oidp, &i, 0, req); + if (error || !req->newptr) + goto out_locked; + + if (i < 0 || i > 1) { + error = EINVAL; + goto out_locked; + } + + mld_v2enable = i; + /* + * If we enabled v2, the state transition will take care of upgrading + * the MLD version back to v2. Otherwise, we have to explicitly + * downgrade. Note that this functionality is to be used for debugging. + */ + if (mld_v2enable == 1) + goto out_locked; + + LIST_FOREACH(mli, &mli_head, mli_link) { + MLI_LOCK(mli); + if (mld_set_version(mli, MLD_VERSION_1) > 0) + mtp.qpt = 1; + MLI_UNLOCK(mli); + } + +out_locked: + MLD_UNLOCK(); + + mld_set_timeout(&mtp); + + return (error); +} + /* * Dispatch an entire queue of pending packet chains. * @@ -428,7 +500,9 @@ mld_dispatch_queue(struct mld_ifinfo *mli, struct ifqueue *ifq, int limit) IF_DEQUEUE(ifq, m); if (m == NULL) break; - MLD_PRINTF(("%s: dispatch %p from %p\n", __func__, ifq, m)); + MLD_PRINTF(("%s: dispatch 0x%llx from 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(ifq), + (uint64_t)VM_KERNEL_ADDRPERM(m))); if (mli != NULL) MLI_UNLOCK(mli); mld_dispatch_packet(m); @@ -481,8 +555,8 @@ mld_domifattach(struct ifnet *ifp, int how) { struct mld_ifinfo *mli; - MLD_PRINTF(("%s: called for ifp %p(%s%d)\n", - __func__, ifp, ifp->if_name, ifp->if_unit)); + MLD_PRINTF(("%s: called for ifp 0x%llx(%s)\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); mli = mli_alloc(how); if (mli == NULL) @@ -504,8 +578,8 @@ mld_domifattach(struct ifnet *ifp, int how) MLD_UNLOCK(); - MLD_PRINTF(("allocate mld_ifinfo for ifp %p(%s%d)\n", - ifp, ifp->if_name, ifp->if_unit)); + MLD_PRINTF(("%s: allocate mld_ifinfo for ifp 0x%llx(%s)\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); return (mli); } @@ -537,8 +611,8 @@ mld_domifreattach(struct mld_ifinfo *mli) MLD_UNLOCK(); - MLD_PRINTF(("reattached mld_ifinfo for ifp %p(%s%d)\n", - ifp, ifp->if_name, ifp->if_unit)); + MLD_PRINTF(("%s: reattached mld_ifinfo for ifp 0x%llx(%s)\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); } /* @@ -551,8 +625,8 @@ mld_domifdetach(struct ifnet *ifp) SLIST_INIT(&in6m_dthead); - MLD_PRINTF(("%s: called for ifp %p(%s%d)\n", - __func__, ifp, ifp->if_name, ifp->if_unit)); + MLD_PRINTF(("%s: called for ifp 0x%llx(%s)\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); MLD_LOCK(); mli_delete(ifp, (struct mld_in6m_relhead *)&in6m_dthead); @@ -594,7 +668,8 @@ mli_delete(const struct ifnet *ifp, struct mld_in6m_relhead *in6m_dthead) } MLI_UNLOCK(mli); } - panic("%s: mld_ifinfo not found for ifp %p\n", __func__, ifp); + panic("%s: mld_ifinfo not found for ifp %p(%s)\n", __func__, + ifp, ifp->if_xname); } __private_extern__ void @@ -618,7 +693,10 @@ mli_initvar(struct mld_ifinfo *mli, struct ifnet *ifp, int reattach) MLI_LOCK_ASSERT_HELD(mli); mli->mli_ifp = ifp; - mli->mli_version = MLD_VERSION_2; + if (mld_v2enable) + mli->mli_version = MLD_VERSION_2; + else + mli->mli_version = MLD_VERSION_1; mli->mli_flags = 0; mli->mli_rv = MLD_RV_INIT; mli->mli_qi = MLD_QI_INIT; @@ -722,8 +800,8 @@ mli_remref(struct mld_ifinfo *mli) /* Now that we're dropped all locks, release detached records */ MLD_REMOVE_DETACHED_IN6M(&in6m_dthead); - MLD_PRINTF(("%s: freeing mld_ifinfo for ifp %p(%s%d)\n", - __func__, ifp, ifp->if_name, ifp->if_unit)); + MLD_PRINTF(("%s: freeing mld_ifinfo for ifp 0x%llx(%s)\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); mli_free(mli); } @@ -741,16 +819,19 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, { struct mld_ifinfo *mli; struct in6_multi *inm; - int is_general_query; + int err = 0, is_general_query; uint16_t timer; + struct mld_tparams mtp = { 0, 0, 0, 0 }; + + MLD_LOCK_ASSERT_NOTHELD(); is_general_query = 0; if (!mld_v1enable) { - MLD_PRINTF(("ignore v1 query %s on ifp %p(%s%d)\n", - ip6_sprintf(&mld->mld_addr), - ifp, ifp->if_name, ifp->if_unit)); - return (0); + MLD_PRINTF(("%s: ignore v1 query %s on ifp 0x%llx(%s)\n", + __func__, ip6_sprintf(&mld->mld_addr), + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); + goto done; } /* @@ -758,10 +839,10 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, * a router's link-local address. */ if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { - MLD_PRINTF(("ignore v1 query src %s on ifp %p(%s%d)\n", - ip6_sprintf(&ip6->ip6_src), - ifp, ifp->if_name, ifp->if_unit)); - return (0); + MLD_PRINTF(("%s: ignore v1 query src %s on ifp 0x%llx(%s)\n", + __func__, ip6_sprintf(&ip6->ip6_src), + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); + goto done; } /* @@ -777,8 +858,10 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, dst = ip6->ip6_dst; in6_clearscope(&dst); - if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes)) - return (EINVAL); + if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes)) { + err = EINVAL; + goto done; + } is_general_query = 1; } else { /* @@ -795,18 +878,18 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, VERIFY(mli != NULL); MLI_LOCK(mli); - mld_set_version(mli, MLD_VERSION_1); + mtp.qpt = mld_set_version(mli, MLD_VERSION_1); MLI_UNLOCK(mli); - timer = (ntohs(mld->mld_maxdelay) * PR_SLOWHZ) / MLD_TIMER_SCALE; + timer = ntohs(mld->mld_maxdelay) / MLD_TIMER_SCALE; if (timer == 0) timer = 1; if (is_general_query) { struct in6_multistep step; - MLD_PRINTF(("process v1 general query on ifp %p(%s%d)\n", - ifp, ifp->if_name, ifp->if_unit)); + MLD_PRINTF(("%s: process v1 general query on ifp 0x%llx(%s)\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); /* * For each reporting group joined on this * interface, kick the report timer. @@ -816,7 +899,7 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, while (inm != NULL) { IN6M_LOCK(inm); if (inm->in6m_ifp == ifp) - mld_v1_update_group(inm, timer); + mtp.cst += mld_v1_update_group(inm, timer); IN6M_UNLOCK(inm); IN6_NEXT_MULTI(step, inm); } @@ -833,18 +916,21 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, if (inm != NULL) { IN6M_LOCK(inm); - MLD_PRINTF(("process v1 query %s on ifp %p(%s%d)\n", + MLD_PRINTF(("%s: process v1 query %s on " + "ifp 0x%llx(%s)\n", __func__, ip6_sprintf(&mld->mld_addr), - ifp, ifp->if_name, ifp->if_unit)); - mld_v1_update_group(inm, timer); + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); + mtp.cst = mld_v1_update_group(inm, timer); IN6M_UNLOCK(inm); IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */ } /* XXX Clear embedded scope ID as userland won't expect it. */ in6_clearscope(&mld->mld_addr); } +done: + mld_set_timeout(&mtp); - return (0); + return (err); } /* @@ -862,14 +948,14 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, * Unlike MLDv2, the delay per group should be jittered * to avoid bursts of MLDv1 reports. */ -static void +static uint32_t mld_v1_update_group(struct in6_multi *inm, const int timer) { IN6M_LOCK_ASSERT_HELD(inm); - MLD_PRINTF(("%s: %s/%s%d timer=%d\n", __func__, + MLD_PRINTF(("%s: %s/%s timer=%d\n", __func__, ip6_sprintf(&inm->in6m_addr), - inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit, timer)); + if_name(inm->in6m_ifp), timer)); switch (inm->in6m_state) { case MLD_NOT_MEMBER: @@ -891,7 +977,6 @@ mld_v1_update_group(struct in6_multi *inm, const int timer) MLD_PRINTF(("%s: ->REPORTING\n", __func__)); inm->in6m_state = MLD_REPORTING_MEMBER; inm->in6m_timer = MLD_RANDOM_DELAY(timer); - current_state_timers_running6 = 1; break; case MLD_SLEEPING_MEMBER: MLD_PRINTF(("%s: ->AWAKENING\n", __func__)); @@ -900,6 +985,8 @@ mld_v1_update_group(struct in6_multi *inm, const int timer) case MLD_LEAVING_MEMBER: break; } + + return (inm->in6m_timer); } /* @@ -918,34 +1005,44 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, struct mldv2_query *mld; struct in6_multi *inm; uint32_t maxdelay, nsrc, qqi; - int is_general_query; + int err = 0, is_general_query; uint16_t timer; uint8_t qrv; + struct mld_tparams mtp = { 0, 0, 0, 0 }; + + MLD_LOCK_ASSERT_NOTHELD(); is_general_query = 0; + if (!mld_v2enable) { + MLD_PRINTF(("%s: ignore v2 query %s on ifp 0x%llx(%s)\n", + __func__, ip6_sprintf(&ip6->ip6_src), + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); + goto done; + } + /* * RFC3810 Section 6.2: MLD queries must originate from * a router's link-local address. */ if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { - MLD_PRINTF(("ignore v1 query src %s on ifp %p(%s%d)\n", - ip6_sprintf(&ip6->ip6_src), - ifp, ifp->if_name, ifp->if_unit)); - return (0); + MLD_PRINTF(("%s: ignore v1 query src %s on ifp 0x%llx(%s)\n", + __func__, ip6_sprintf(&ip6->ip6_src), + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); + goto done; } - MLD_PRINTF(("input v2 query on ifp %p(%s%d)\n", ifp, ifp->if_name, - ifp->if_unit)); + MLD_PRINTF(("%s: input v2 query on ifp 0x%llx(%s)\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); mld = (struct mldv2_query *)(mtod(m, uint8_t *) + off); maxdelay = ntohs(mld->mld_maxdelay); /* in 1/10ths of a second */ - if (maxdelay >= 32678) { + if (maxdelay >= 32768) { maxdelay = (MLD_MRC_MANT(maxdelay) | 0x1000) << (MLD_MRC_EXP(maxdelay) + 3); } - timer = (maxdelay * PR_SLOWHZ) / MLD_TIMER_SCALE; + timer = maxdelay / MLD_TIMER_SCALE; if (timer == 0) timer = 1; @@ -963,11 +1060,15 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, } nsrc = ntohs(mld->mld_numsrc); - if (nsrc > MLD_MAX_GS_SOURCES) - return (EMSGSIZE); + if (nsrc > MLD_MAX_GS_SOURCES) { + err = EMSGSIZE; + goto done; + } if (icmp6len < sizeof(struct mldv2_query) + - (nsrc * sizeof(struct in6_addr))) - return (EMSGSIZE); + (nsrc * sizeof(struct in6_addr))) { + err = EMSGSIZE; + goto done; + } /* * Do further input validation upfront to avoid resetting timers @@ -975,17 +1076,13 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, */ if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) { /* - * General Queries SHOULD be directed to ff02::1. * A general query with a source list has undefined * behaviour; discard it. */ - struct in6_addr dst; - - dst = ip6->ip6_dst; - in6_clearscope(&dst); - if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes) || - nsrc > 0) - return (EINVAL); + if (nsrc > 0) { + err = EINVAL; + goto done; + } is_general_query = 1; } else { /* @@ -1007,16 +1104,16 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, */ if (mli->mli_version != MLD_VERSION_2) { MLI_UNLOCK(mli); - return (0); + goto done; } - mld_set_version(mli, MLD_VERSION_2); + mtp.qpt = mld_set_version(mli, MLD_VERSION_2); mli->mli_rv = qrv; mli->mli_qi = qqi; - mli->mli_qri = maxdelay; + mli->mli_qri = MAX(timer, MLD_QRI_MIN); - MLD_PRINTF(("%s: qrv %d qi %d maxdelay %d\n", __func__, qrv, qqi, - maxdelay)); + MLD_PRINTF(("%s: qrv %d qi %d qri %d\n", __func__, mli->mli_rv, + mli->mli_qi, mli->mli_qri)); if (is_general_query) { /* @@ -1030,11 +1127,10 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, * not schedule any other reports. * Otherwise, reset the interface timer. */ - MLD_PRINTF(("process v2 general query on ifp %p(%s%d)\n", - ifp, ifp->if_name, ifp->if_unit)); + MLD_PRINTF(("%s: process v2 general query on ifp 0x%llx(%s)\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) { - mli->mli_v2_timer = MLD_RANDOM_DELAY(timer); - interface_timers_running6 = 1; + mtp.it = mli->mli_v2_timer = MLD_RANDOM_DELAY(timer); } MLI_UNLOCK(mli); } else { @@ -1051,11 +1147,9 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm); in6_multihead_lock_done(); if (inm == NULL) - return (0); + goto done; IN6M_LOCK(inm); -#ifndef __APPLE__ - /* TODO: need ratecheck equivalent */ if (nsrc > 0) { if (!ratecheck(&inm->in6m_lastgsrtv, &mld_gsrdelay)) { @@ -1063,12 +1157,11 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, __func__)); IN6M_UNLOCK(inm); IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */ - return (0); + goto done; } } -#endif - MLD_PRINTF(("process v2 group query on ifp %p(%s%d)\n", - ifp, ifp->if_name, ifp->if_unit)); + MLD_PRINTF(("%s: process v2 group query on ifp 0x%llx(%s)\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); /* * If there is a pending General Query response * scheduled sooner than the selected delay, no @@ -1077,19 +1170,26 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, * group-specific or group-and-source query. */ MLI_LOCK(mli); - if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) { - MLI_UNLOCK(mli); - mld_v2_process_group_query(inm, timer, m, off); - } else { - MLI_UNLOCK(mli); + mtp.it = mli->mli_v2_timer; + MLI_UNLOCK(mli); + if (mtp.it == 0 || mtp.it >= timer) { + (void) mld_v2_process_group_query(inm, timer, m, off); + mtp.cst = inm->in6m_timer; } IN6M_UNLOCK(inm); IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */ /* XXX Clear embedded scope ID as userland won't expect it. */ in6_clearscope(&mld->mld_addr); } +done: + if (mtp.it > 0) { + MLD_PRINTF(("%s: v2 general query response scheduled in " + "T+%d seconds on ifp 0x%llx(%s)\n", __func__, mtp.it, + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); + } + mld_set_timeout(&mtp); - return (0); + return (err); } /* @@ -1142,7 +1242,6 @@ mld_v2_process_group_query(struct in6_multi *inm, int timer, struct mbuf *m0, } inm->in6m_state = MLD_G_QUERY_PENDING_MEMBER; inm->in6m_timer = MLD_RANDOM_DELAY(timer); - current_state_timers_running6 = 1; return (retval); } @@ -1153,7 +1252,6 @@ mld_v2_process_group_query(struct in6_multi *inm, int timer, struct mbuf *m0, if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER) { timer = min(inm->in6m_timer, timer); inm->in6m_timer = MLD_RANDOM_DELAY(timer); - current_state_timers_running6 = 1; return (retval); } @@ -1197,7 +1295,6 @@ mld_v2_process_group_query(struct in6_multi *inm, int timer, struct mbuf *m0, __func__)); inm->in6m_state = MLD_SG_QUERY_PENDING_MEMBER; inm->in6m_timer = MLD_RANDOM_DELAY(timer); - current_state_timers_running6 = 1; } } @@ -1212,21 +1309,22 @@ mld_v2_process_group_query(struct in6_multi *inm, int timer, struct mbuf *m0, * mld_addr. This is OK as we own the mbuf chain. */ static int -mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, - /*const*/ struct mld_hdr *mld) +mld_v1_input_report(struct ifnet *ifp, struct mbuf *m, + const struct ip6_hdr *ip6, /*const*/ struct mld_hdr *mld) { struct in6_addr src, dst; struct in6_ifaddr *ia; struct in6_multi *inm; if (!mld_v1enable) { - MLD_PRINTF(("ignore v1 report %s on ifp %p(%s%d)\n", - ip6_sprintf(&mld->mld_addr), - ifp, ifp->if_name, ifp->if_unit)); + MLD_PRINTF(("%s: ignore v1 report %s on ifp 0x%llx(%s)\n", + __func__, ip6_sprintf(&mld->mld_addr), + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); return (0); } - if (ifp->if_flags & IFF_LOOPBACK) + if ((ifp->if_flags & IFF_LOOPBACK) || + (m->m_pkthdr.pkt_flags & PKTF_LOOP)) return (0); /* @@ -1236,9 +1334,9 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, src = ip6->ip6_src; in6_clearscope(&src); if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) { - MLD_PRINTF(("ignore v1 query src %s on ifp %p(%s%d)\n", - ip6_sprintf(&ip6->ip6_src), - ifp, ifp->if_name, ifp->if_unit)); + MLD_PRINTF(("%s: ignore v1 query src %s on ifp 0x%llx(%s)\n", + __func__, ip6_sprintf(&ip6->ip6_src), + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); return (EINVAL); } @@ -1250,9 +1348,9 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, in6_clearscope(&dst); if (!IN6_IS_ADDR_MULTICAST(&mld->mld_addr) || !IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) { - MLD_PRINTF(("ignore v1 query dst %s on ifp %p(%s%d)\n", - ip6_sprintf(&ip6->ip6_dst), - ifp, ifp->if_name, ifp->if_unit)); + MLD_PRINTF(("%s: ignore v1 query dst %s on ifp 0x%llx(%s)\n", + __func__, ip6_sprintf(&ip6->ip6_dst), + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); return (EINVAL); } @@ -1280,8 +1378,9 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, return (0); } - MLD_PRINTF(("process v1 report %s on ifp %p(%s%d)\n", - ip6_sprintf(&mld->mld_addr), ifp, ifp->if_name, ifp->if_unit)); + MLD_PRINTF(("%s: process v1 report %s on ifp 0x%llx(%s)\n", + __func__, ip6_sprintf(&mld->mld_addr), + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); /* * Embed scope ID of receiving interface in MLD query for lookup @@ -1330,9 +1429,10 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, case MLD_REPORTING_MEMBER: case MLD_IDLE_MEMBER: case MLD_AWAKENING_MEMBER: - MLD_PRINTF(("report suppressed for %s on ifp %p(%s%d)\n", + MLD_PRINTF(("%s: report suppressed for %s on " + "ifp 0x%llx(%s)\n", __func__, ip6_sprintf(&mld->mld_addr), - ifp, ifp->if_name, ifp->if_unit)); + (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp))); case MLD_LAZY_MEMBER: inm->in6m_state = MLD_LAZY_MEMBER; break; @@ -1371,7 +1471,8 @@ mld_input(struct mbuf *m, int off, int icmp6len) struct mld_hdr *mld; int mldlen; - MLD_PRINTF(("%s: called w/mbuf (%p,%d)\n", __func__, m, off)); + MLD_PRINTF(("%s: called w/mbuf (0x%llx,%d)\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(m), off)); ifp = m->m_pkthdr.rcvif; @@ -1409,7 +1510,7 @@ mld_input(struct mbuf *m, int off, int icmp6len) break; case MLD_LISTENER_REPORT: icmp6_ifstat_inc(ifp, ifs6_in_mldreport); - if (mld_v1_input_report(ifp, ip6, mld) != 0) + if (mld_v1_input_report(ifp, m, ip6, mld) != 0) return (0); break; case MLDV2_LISTENER_REPORT: @@ -1426,56 +1527,87 @@ mld_input(struct mbuf *m, int off, int icmp6len) } /* - * MLD6 slowtimo handler. - * Combiles both the slow and fast timer into one. We loose some responsivness but - * allows the system to avoid having a pr_fasttimo, thus allowing for power savings. + * Schedule MLD timer based on various parameters; caller must ensure that + * lock ordering is maintained as this routine acquires MLD global lock. */ void -mld_slowtimo(void) +mld_set_timeout(struct mld_tparams *mtp) +{ + MLD_LOCK_ASSERT_NOTHELD(); + VERIFY(mtp != NULL); + + if (mtp->qpt != 0 || mtp->it != 0 || mtp->cst != 0 || mtp->sct != 0) { + MLD_LOCK(); + if (mtp->qpt != 0) + querier_present_timers_running6 = 1; + if (mtp->it != 0) + interface_timers_running6 = 1; + if (mtp->cst != 0) + current_state_timers_running6 = 1; + if (mtp->sct != 0) + state_change_timers_running6 = 1; + mld_sched_timeout(); + MLD_UNLOCK(); + } +} + +/* + * MLD6 timer handler (per 1 second). + */ +static void +mld_timeout(void *arg) { +#pragma unused(arg) struct ifqueue scq; /* State-change packets */ struct ifqueue qrq; /* Query response packets */ struct ifnet *ifp; struct mld_ifinfo *mli; struct in6_multi *inm; - int uri_fasthz = 0; + int uri_sec = 0; SLIST_HEAD(, in6_multi) in6m_dthead; SLIST_INIT(&in6m_dthead); + /* + * Update coarse-grained networking timestamp (in sec.); the idea + * is to piggy-back on the timeout callout to update the counter + * returnable via net_uptime(). + */ + net_update_uptime(); + MLD_LOCK(); - LIST_FOREACH(mli, &mli_head, mli_link) { - MLI_LOCK(mli); - mld_v1_process_querier_timers(mli); - MLI_UNLOCK(mli); - } + MLD_PRINTF(("%s: qpt %d, it %d, cst %d, sct %d\n", __func__, + querier_present_timers_running6, interface_timers_running6, + current_state_timers_running6, state_change_timers_running6)); /* - * Quick check to see if any work needs to be done, in order to - * minimize the overhead of fasttimo processing. + * MLDv1 querier present timer processing. */ - if (!current_state_timers_running6 && - !interface_timers_running6 && - !state_change_timers_running6) { - MLD_UNLOCK(); - return; + if (querier_present_timers_running6) { + querier_present_timers_running6 = 0; + LIST_FOREACH(mli, &mli_head, mli_link) { + MLI_LOCK(mli); + mld_v1_process_querier_timers(mli); + if (mli->mli_v1_timer > 0) + querier_present_timers_running6 = 1; + MLI_UNLOCK(mli); + } } /* * MLDv2 General Query response timer processing. */ if (interface_timers_running6) { -#if 0 MLD_PRINTF(("%s: interface timers running\n", __func__)); -#endif interface_timers_running6 = 0; LIST_FOREACH(mli, &mli_head, mli_link) { MLI_LOCK(mli); if (mli->mli_v2_timer == 0) { /* Do nothing. */ } else if (--mli->mli_v2_timer == 0) { - mld_v2_dispatch_general_query(mli); + if (mld_v2_dispatch_general_query(mli) > 0) + interface_timers_running6 = 1; } else { interface_timers_running6 = 1; } @@ -1489,9 +1621,8 @@ mld_slowtimo(void) current_state_timers_running6 = 0; state_change_timers_running6 = 0; -#if 0 + MLD_PRINTF(("%s: state change timers running\n", __func__)); -#endif memset(&qrq, 0, sizeof(struct ifqueue)); qrq.ifq_maxlen = MLD_MAX_G_GS_PACKETS; @@ -1508,7 +1639,7 @@ mld_slowtimo(void) MLI_LOCK(mli); ifp = mli->mli_ifp; - uri_fasthz = MLD_RANDOM_DELAY(mli->mli_uri * PR_SLOWHZ); + uri_sec = MLD_RANDOM_DELAY(mli->mli_uri); MLI_UNLOCK(mli); in6_multihead_lock_shared(); @@ -1526,7 +1657,7 @@ mld_slowtimo(void) break; case MLD_VERSION_2: mld_v2_process_group_timers(mli, &qrq, - &scq, inm, uri_fasthz); + &scq, inm, uri_sec); break; } MLI_UNLOCK(mli); @@ -1567,12 +1698,28 @@ next: } out_locked: + /* re-arm the timer if there's work to do */ + mld_timeout_run = 0; + mld_sched_timeout(); MLD_UNLOCK(); /* Now that we're dropped all locks, release detached records */ MLD_REMOVE_DETACHED_IN6M(&in6m_dthead); } +static void +mld_sched_timeout(void) +{ + MLD_LOCK_ASSERT_HELD(); + + if (!mld_timeout_run && + (querier_present_timers_running6 || current_state_timers_running6 || + interface_timers_running6 || state_change_timers_running6)) { + mld_timeout_run = 1; + timeout(mld_timeout, NULL, hz); + } +} + /* * Free the in6_multi reference(s) for this MLD lifecycle. * @@ -1630,6 +1777,7 @@ mld_v1_process_group_timer(struct in6_multi *inm, const int mld_version) #pragma unused(mld_version) int report_timer_expired; + MLD_LOCK_ASSERT_HELD(); IN6M_LOCK_ASSERT_HELD(inm); MLI_LOCK_ASSERT_HELD(inm->in6m_mli); @@ -1639,6 +1787,7 @@ mld_v1_process_group_timer(struct in6_multi *inm, const int mld_version) report_timer_expired = 1; } else { current_state_timers_running6 = 1; + /* caller will schedule timer */ return; } @@ -1674,11 +1823,12 @@ mld_v1_process_group_timer(struct in6_multi *inm, const int mld_version) static void mld_v2_process_group_timers(struct mld_ifinfo *mli, struct ifqueue *qrq, struct ifqueue *scq, - struct in6_multi *inm, const int uri_fasthz) + struct in6_multi *inm, const int uri_sec) { int query_response_timer_expired; int state_change_retransmit_timer_expired; + MLD_LOCK_ASSERT_HELD(); IN6M_LOCK_ASSERT_HELD(inm); MLI_LOCK_ASSERT_HELD(mli); VERIFY(mli == inm->in6m_mli); @@ -1690,7 +1840,7 @@ mld_v2_process_group_timers(struct mld_ifinfo *mli, * During a transition from compatibility mode back to MLDv2, * a group record in REPORTING state may still have its group * timer active. This is a no-op in this function; it is easier - * to deal with it here than to complicate the slow-timeout path. + * to deal with it here than to complicate the timeout path. */ if (inm->in6m_timer == 0) { query_response_timer_expired = 0; @@ -1698,6 +1848,7 @@ mld_v2_process_group_timers(struct mld_ifinfo *mli, query_response_timer_expired = 1; } else { current_state_timers_running6 = 1; + /* caller will schedule timer */ } if (inm->in6m_sctimer == 0) { @@ -1706,9 +1857,10 @@ mld_v2_process_group_timers(struct mld_ifinfo *mli, state_change_retransmit_timer_expired = 1; } else { state_change_timers_running6 = 1; + /* caller will schedule timer */ } - /* We are in fasttimo, so be quick about it. */ + /* We are in timer callback, so be quick about it. */ if (!state_change_retransmit_timer_expired && !query_response_timer_expired) return; @@ -1751,8 +1903,9 @@ mld_v2_process_group_timers(struct mld_ifinfo *mli, * reset the timer. */ if (--inm->in6m_scrv > 0) { - inm->in6m_sctimer = uri_fasthz; + inm->in6m_sctimer = uri_sec; state_change_timers_running6 = 1; + /* caller will schedule timer */ } /* * Retransmit the previously computed state-change @@ -1764,9 +1917,9 @@ mld_v2_process_group_timers(struct mld_ifinfo *mli, (void) mld_v2_merge_state_changes(inm, scq); in6m_commit(inm); - MLD_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__, + MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__, ip6_sprintf(&inm->in6m_addr), - inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit)); + if_name(inm->in6m_ifp))); /* * If we are leaving the group for good, make sure @@ -1800,24 +1953,23 @@ mld_v2_process_group_timers(struct mld_ifinfo *mli, * Switch to a different version on the given interface, * as per Section 9.12. */ -static void +static uint32_t mld_set_version(struct mld_ifinfo *mli, const int mld_version) { int old_version_timer; MLI_LOCK_ASSERT_HELD(mli); - MLD_PRINTF(("%s: switching to v%d on ifp %p(%s%d)\n", __func__, - mld_version, mli->mli_ifp, mli->mli_ifp->if_name, - mli->mli_ifp->if_unit)); + MLD_PRINTF(("%s: switching to v%d on ifp 0x%llx(%s)\n", __func__, + mld_version, (uint64_t)VM_KERNEL_ADDRPERM(mli->mli_ifp), + if_name(mli->mli_ifp))); if (mld_version == MLD_VERSION_1) { /* * Compute the "Older Version Querier Present" timer as per - * Section 9.12. + * Section 9.12, in seconds. */ old_version_timer = (mli->mli_rv * mli->mli_qi) + mli->mli_qri; - old_version_timer *= PR_SLOWHZ; mli->mli_v1_timer = old_version_timer; } @@ -1827,11 +1979,18 @@ mld_set_version(struct mld_ifinfo *mli, const int mld_version) } MLI_LOCK_ASSERT_HELD(mli); + + return (mli->mli_v1_timer); } /* * Cancel pending MLDv2 timers for the given link and all groups * joined on it; state-change, general-query, and group-query timers. + * + * Only ever called on a transition from v2 to Compatibility mode. Kill + * the timers stone dead (this may be expensive for large N groups), they + * will be restarted if Compatibility Mode deems that they must be due to + * query processing. */ static void mld_v2_cancel_link_timers(struct mld_ifinfo *mli) @@ -1842,19 +2001,20 @@ mld_v2_cancel_link_timers(struct mld_ifinfo *mli) MLI_LOCK_ASSERT_HELD(mli); - MLD_PRINTF(("%s: cancel v2 timers on ifp %p(%s%d)\n", __func__, - mli->mli_ifp, mli->mli_ifp->if_name, mli->mli_ifp->if_unit)); + MLD_PRINTF(("%s: cancel v2 timers on ifp 0x%llx(%s)\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(mli->mli_ifp), if_name(mli->mli_ifp))); /* - * Fast-track this potentially expensive operation - * by checking all the global 'timer pending' flags. + * Stop the v2 General Query Response on this link stone dead. + * If timer is woken up due to interface_timers_running6, + * the flag will be cleared if there are no pending link timers. */ - if (!interface_timers_running6 && - !state_change_timers_running6 && - !current_state_timers_running6) - return; - mli->mli_v2_timer = 0; + + /* + * Now clear the current-state and state-change report timers + * for all memberships scoped to this link. + */ ifp = mli->mli_ifp; MLI_UNLOCK(mli); @@ -1872,6 +2032,10 @@ mld_v2_cancel_link_timers(struct mld_ifinfo *mli) case MLD_LAZY_MEMBER: case MLD_SLEEPING_MEMBER: case MLD_AWAKENING_MEMBER: + /* + * These states are either not relevant in v2 mode, + * or are unreported. Do nothing. + */ break; case MLD_LEAVING_MEMBER: /* @@ -1894,15 +2058,16 @@ mld_v2_cancel_link_timers(struct mld_ifinfo *mli) in6m_clear_recorded(inm); /* FALLTHROUGH */ case MLD_REPORTING_MEMBER: - inm->in6m_sctimer = 0; - inm->in6m_timer = 0; inm->in6m_state = MLD_REPORTING_MEMBER; - /* - * Free any pending MLDv2 state-change records. - */ - IF_DRAIN(&inm->in6m_scq); break; } + /* + * Always clear state-change and group report timers. + * Free any pending MLDv2 state-change records. + */ + inm->in6m_sctimer = 0; + inm->in6m_timer = 0; + IF_DRAIN(&inm->in6m_scq); next: IN6M_UNLOCK(inm); IN6_NEXT_MULTI(step, inm); @@ -1921,13 +2086,15 @@ mld_v1_process_querier_timers(struct mld_ifinfo *mli) { MLI_LOCK_ASSERT_HELD(mli); - if (mli->mli_version != MLD_VERSION_2 && --mli->mli_v1_timer == 0) { + if (mld_v2enable && mli->mli_version != MLD_VERSION_2 && + --mli->mli_v1_timer == 0) { /* * MLDv1 Querier Present timer expired; revert to MLDv2. */ - MLD_PRINTF(("%s: transition from v%d -> v%d on %p(%s%d)\n", + MLD_PRINTF(("%s: transition from v%d -> v%d on 0x%llx(%s)\n", __func__, mli->mli_version, MLD_VERSION_2, - mli->mli_ifp, mli->mli_ifp->if_name, mli->mli_ifp->if_unit)); + (uint64_t)VM_KERNEL_ADDRPERM(mli->mli_ifp), + if_name(mli->mli_ifp))); mli->mli_version = MLD_VERSION_2; } } @@ -2003,6 +2170,7 @@ mld_v1_transmit_report(struct in6_multi *in6m, const int type) mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), sizeof(struct mld_hdr)); + mld_save_context(mh, ifp); mh->m_flags |= M_MLDV1; /* @@ -2011,15 +2179,17 @@ mld_v1_transmit_report(struct in6_multi *in6m, const int type) * mld_dispatch_packet() here since that will eventually call * ip6_output(), which will try to lock in6_multihead_lock and cause * a deadlock. - * Instead we defer the work to the mld_slowtimo() thread, thus + * Instead we defer the work to the mld_timeout() thread, thus * avoiding unlocking in_multihead_lock here. */ if (IF_QFULL(&in6m->in6m_mli->mli_v1q)) { MLD_PRINTF(("%s: v1 outbound queue full\n", __func__)); error = ENOMEM; m_freem(mh); - } else + } else { IF_ENQUEUE(&in6m->in6m_mli->mli_v1q, mh); + VERIFY(error == 0); + } return (error); } @@ -2040,17 +2210,21 @@ mld_v1_transmit_report(struct in6_multi *in6m, const int type) * * If delay is non-zero, and the state change is an initial multicast * join, the state change report will be delayed by 'delay' ticks - * in units of PR_FASTHZ if MLDv1 is active on the link; otherwise + * in units of seconds if MLDv1 is active on the link; otherwise * the initial MLDv2 state change report will be delayed by whichever * is sooner, a pending state-change timer or delay itself. */ int -mld_change_state(struct in6_multi *inm, const int delay) +mld_change_state(struct in6_multi *inm, struct mld_tparams *mtp, + const int delay) { struct mld_ifinfo *mli; struct ifnet *ifp; int error = 0; + VERIFY(mtp != NULL); + bzero(mtp, sizeof (*mtp)); + IN6M_LOCK_ASSERT_HELD(inm); VERIFY(inm->in6m_mli != NULL); MLI_LOCK_ASSERT_NOTHELD(inm->in6m_mli); @@ -2079,19 +2253,18 @@ mld_change_state(struct in6_multi *inm, const int delay) inm->in6m_st[0].iss_fmode, inm->in6m_st[1].iss_fmode)); if (inm->in6m_st[0].iss_fmode == MCAST_UNDEFINED) { MLD_PRINTF(("%s: initial join\n", __func__)); - error = mld_initial_join(inm, mli, delay); + error = mld_initial_join(inm, mli, mtp, delay); goto out; } else if (inm->in6m_st[1].iss_fmode == MCAST_UNDEFINED) { MLD_PRINTF(("%s: final leave\n", __func__)); - mld_final_leave(inm, mli); + mld_final_leave(inm, mli, mtp); goto out; } } else { MLD_PRINTF(("%s: filter set change\n", __func__)); } - error = mld_handle_state_change(inm, mli); - + error = mld_handle_state_change(inm, mli, mtp); out: return (error); } @@ -2106,11 +2279,11 @@ out: * initial state of the membership. * * If the delay argument is non-zero, then we must delay sending the - * initial state change for delay ticks (in units of PR_FASTHZ). + * initial state change for delay ticks (in units of seconds). */ static int mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli, - const int delay) + struct mld_tparams *mtp, const int delay) { struct ifnet *ifp; struct ifqueue *ifq; @@ -2119,10 +2292,12 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli, IN6M_LOCK_ASSERT_HELD(inm); MLI_LOCK_ASSERT_NOTHELD(mli); + VERIFY(mtp != NULL); - MLD_PRINTF(("%s: initial join %s on ifp %p(%s%d)\n", + MLD_PRINTF(("%s: initial join %s on ifp 0x%llx(%s)\n", __func__, ip6_sprintf(&inm->in6m_addr), - inm->in6m_ifp, inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit)); + (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp), + if_name(inm->in6m_ifp))); error = 0; syncstates = 1; @@ -2175,10 +2350,10 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli, * and delay sending the initial MLDv1 report * by not transitioning to the IDLE state. */ - odelay = MLD_RANDOM_DELAY(MLD_V1_MAX_RI * PR_SLOWHZ); + odelay = MLD_RANDOM_DELAY(MLD_V1_MAX_RI); if (delay) { inm->in6m_timer = max(delay, odelay); - current_state_timers_running6 = 1; + mtp->cst = 1; } else { inm->in6m_state = MLD_IDLE_MEMBER; error = mld_v1_transmit_report(inm, @@ -2189,7 +2364,7 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli, if (error == 0) { inm->in6m_timer = odelay; - current_state_timers_running6 = 1; + mtp->cst = 1; } } break; @@ -2211,6 +2386,7 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli, IF_DRAIN(ifq); retval = mld_v2_enqueue_group_record(ifq, inm, 1, 0, 0, (mli->mli_flags & MLIF_USEALLOW)); + mtp->cst = (ifq->ifq_len > 0); MLD_PRINTF(("%s: enqueue record = %d\n", __func__, retval)); if (retval <= 0) { @@ -2221,7 +2397,7 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli, /* * Schedule transmission of pending state-change * report up to RV times for this link. The timer - * will fire at the next mld_fasttimo (~200ms), + * will fire at the next mld_timeout (1 second)), * giving us an opportunity to merge the reports. * * If a delay was provided to this function, only @@ -2235,10 +2411,10 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli, min(inm->in6m_sctimer, delay); } else inm->in6m_sctimer = delay; - } else + } else { inm->in6m_sctimer = 1; - state_change_timers_running6 = 1; - + } + mtp->sct = 1; error = 0; break; } @@ -2252,9 +2428,9 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli, */ if (syncstates) { in6m_commit(inm); - MLD_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__, + MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__, ip6_sprintf(&inm->in6m_addr), - inm->in6m_ifp->if_name, ifp->if_unit)); + if_name(inm->in6m_ifp))); } return (error); @@ -2264,17 +2440,20 @@ mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli, * Issue an intermediate state change during the life-cycle. */ static int -mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli) +mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli, + struct mld_tparams *mtp) { struct ifnet *ifp; - int retval; + int retval = 0; IN6M_LOCK_ASSERT_HELD(inm); MLI_LOCK_ASSERT_NOTHELD(mli); + VERIFY(mtp != NULL); - MLD_PRINTF(("%s: state change for %s on ifp %p(%s%d)\n", + MLD_PRINTF(("%s: state change for %s on ifp 0x%llx(%s)\n", __func__, ip6_sprintf(&inm->in6m_addr), - inm->in6m_ifp, inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit)); + (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp), + if_name(inm->in6m_ifp))); ifp = inm->in6m_ifp; @@ -2292,20 +2471,22 @@ mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli) } MLD_PRINTF(("%s: nothing to do\n", __func__)); in6m_commit(inm); - MLD_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__, + MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__, ip6_sprintf(&inm->in6m_addr), - inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit)); - return (0); + if_name(inm->in6m_ifp))); + goto done; } IF_DRAIN(&inm->in6m_scq); retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0, (mli->mli_flags & MLIF_USEALLOW)); + mtp->cst = (inm->in6m_scq.ifq_len > 0); MLD_PRINTF(("%s: enqueue record = %d\n", __func__, retval)); if (retval <= 0) { MLI_UNLOCK(mli); - return (-retval); + retval *= -1; + goto done; } /* * If record(s) were enqueued, start the state-change @@ -2313,10 +2494,11 @@ mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli) */ inm->in6m_scrv = mli->mli_rv; inm->in6m_sctimer = 1; - state_change_timers_running6 = 1; + mtp->sct = 1; MLI_UNLOCK(mli); - return (0); +done: + return (retval); } /* @@ -2328,16 +2510,19 @@ mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli) * to INCLUDE {} for immediate transmission. */ static void -mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli) +mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli, + struct mld_tparams *mtp) { int syncstates = 1; IN6M_LOCK_ASSERT_HELD(inm); MLI_LOCK_ASSERT_NOTHELD(mli); + VERIFY(mtp != NULL); - MLD_PRINTF(("%s: final leave %s on ifp %p(%s%d)\n", + MLD_PRINTF(("%s: final leave %s on ifp 0x%llx(%s)\n", __func__, ip6_sprintf(&inm->in6m_addr), - inm->in6m_ifp, inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit)); + (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp), + if_name(inm->in6m_ifp))); switch (inm->in6m_state) { case MLD_NOT_MEMBER: @@ -2359,7 +2544,9 @@ mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli) "mode\n", __func__); /* NOTREACHED */ } - mld_v1_transmit_report(inm, MLD_LISTENER_DONE); + /* scheduler timer if enqueue is successful */ + mtp->cst = (mld_v1_transmit_report(inm, + MLD_LISTENER_DONE) == 0); IN6M_LOCK_ASSERT_HELD(inm); MLI_LOCK_ASSERT_HELD(mli); @@ -2369,16 +2556,16 @@ mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli) /* * Stop group timer and all pending reports. * Immediately enqueue a state-change report - * TO_IN {} to be sent on the next fast timeout, + * TO_IN {} to be sent on the next timeout, * giving us an opportunity to merge reports. */ IF_DRAIN(&inm->in6m_scq); inm->in6m_timer = 0; inm->in6m_scrv = mli->mli_rv; - MLD_PRINTF(("%s: Leaving %s/%s%d with %d " + MLD_PRINTF(("%s: Leaving %s/%s with %d " "pending retransmissions.\n", __func__, ip6_sprintf(&inm->in6m_addr), - inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit, + if_name(inm->in6m_ifp), inm->in6m_scrv)); if (inm->in6m_scrv == 0) { inm->in6m_state = MLD_NOT_MEMBER; @@ -2401,13 +2588,14 @@ mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli) retval = mld_v2_enqueue_group_record( &inm->in6m_scq, inm, 1, 0, 0, (mli->mli_flags & MLIF_USEALLOW)); + mtp->cst = (inm->in6m_scq.ifq_len > 0); KASSERT(retval != 0, ("%s: enqueue record = %d\n", __func__, retval)); inm->in6m_state = MLD_LEAVING_MEMBER; inm->in6m_sctimer = 1; - state_change_timers_running6 = 1; + mtp->sct = 1; syncstates = 0; } } @@ -2422,13 +2610,13 @@ mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli) if (syncstates) { in6m_commit(inm); - MLD_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__, + MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__, ip6_sprintf(&inm->in6m_addr), - inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit)); + if_name(inm->in6m_ifp))); inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED; - MLD_PRINTF(("%s: T1 now MCAST_UNDEFINED for %p/%s%d\n", - __func__, &inm->in6m_addr, inm->in6m_ifp->if_name, - inm->in6m_ifp->if_unit)); + MLD_PRINTF(("%s: T1 now MCAST_UNDEFINED for 0x%llx/%s\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(&inm->in6m_addr), + if_name(inm->in6m_ifp))); } } @@ -2574,9 +2762,9 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm, return (mld_v2_enqueue_filter_change(ifq, inm)); if (type == MLD_DO_NOTHING) { - MLD_PRINTF(("%s: nothing to do for %s/%s%d\n", + MLD_PRINTF(("%s: nothing to do for %s/%s\n", __func__, ip6_sprintf(&inm->in6m_addr), - inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit)); + if_name(inm->in6m_ifp))); return (0); } @@ -2588,10 +2776,10 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm, minrec0len = sizeof(struct mldv2_record); if (record_has_sources) minrec0len += sizeof(struct in6_addr); - MLD_PRINTF(("%s: queueing %s for %s/%s%d\n", __func__, + MLD_PRINTF(("%s: queueing %s for %s/%s\n", __func__, mld_rec_type_to_str(type), ip6_sprintf(&inm->in6m_addr), - inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit)); + if_name(inm->in6m_ifp))); /* * Check if we have a packet in the tail of the queue for this @@ -2627,6 +2815,8 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm, if (m == NULL) return (-ENOMEM); + mld_save_context(m, ifp); + MLD_PRINTF(("%s: allocated first packet\n", __func__)); } @@ -2724,7 +2914,6 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm, if (m != m0) { MLD_PRINTF(("%s: enqueueing first packet\n", __func__)); m->m_pkthdr.vt_nrecs = 1; - m->m_pkthdr.rcvif = ifp; IF_ENQUEUE(ifq, m); } else { m->m_pkthdr.vt_nrecs++; @@ -2750,6 +2939,7 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm, m = m_gethdr(M_DONTWAIT, MT_DATA); if (m == NULL) return (-ENOMEM); + mld_save_context(m, ifp); md = m_getptr(m, 0, &off); pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off); MLD_PRINTF(("%s: allocated next packet\n", __func__)); @@ -2799,7 +2989,6 @@ mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm, nbytes += (msrcs * sizeof(struct in6_addr)); MLD_PRINTF(("%s: enqueueing next packet\n", __func__)); - m->m_pkthdr.rcvif = ifp; IF_ENQUEUE(ifq, m); } @@ -2905,6 +3094,7 @@ mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm) return (-ENOMEM); } m->m_pkthdr.vt_nrecs = 0; + mld_save_context(m, ifp); m0srcs = (ifp->if_mtu - MLD_MTUSPACE - sizeof(struct mldv2_record)) / sizeof(struct in6_addr); @@ -3024,7 +3214,6 @@ mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm) * packet if it wasn't already queued. */ m->m_pkthdr.vt_nrecs++; - m->m_pkthdr.rcvif = ifp; if (m != m0) IF_ENQUEUE(ifq, m); nbytes += npbytes; @@ -3066,8 +3255,8 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq) gq = &inm->in6m_scq; #ifdef MLD_DEBUG if (gq->ifq_head == NULL) { - MLD_PRINTF(("%s: WARNING: queue for inm %p is empty\n", - __func__, inm)); + MLD_PRINTF(("%s: WARNING: queue for inm 0x%llx is empty\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm))); } #endif @@ -3100,7 +3289,8 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq) if (!domerge && IF_QFULL(gq)) { MLD_PRINTF(("%s: outbound queue full, skipping whole " - "packet %p\n", __func__, m)); + "packet 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(m))); n = m->m_nextpkt; if (!docopy) { IF_REMQUEUE(gq, m); @@ -3111,13 +3301,15 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq) } if (!docopy) { - MLD_PRINTF(("%s: dequeueing %p\n", __func__, m)); + MLD_PRINTF(("%s: dequeueing 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(m))); n = m->m_nextpkt; IF_REMQUEUE(gq, m); m0 = m; m = n; } else { - MLD_PRINTF(("%s: copying %p\n", __func__, m)); + MLD_PRINTF(("%s: copying 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(m))); m0 = m_dup(m, M_NOWAIT); if (m0 == NULL) return (ENOMEM); @@ -3126,15 +3318,17 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq) } if (!domerge) { - MLD_PRINTF(("%s: queueing %p to ifscq %p)\n", - __func__, m0, ifscq)); - m0->m_pkthdr.rcvif = inm->in6m_ifp; + MLD_PRINTF(("%s: queueing 0x%llx to ifscq 0x%llx)\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(m0), + (uint64_t)VM_KERNEL_ADDRPERM(ifscq))); IF_ENQUEUE(ifscq, m0); } else { struct mbuf *mtl; /* last mbuf of packet mt */ - MLD_PRINTF(("%s: merging %p with ifscq tail %p)\n", - __func__, m0, mt)); + MLD_PRINTF(("%s: merging 0x%llx with ifscq tail " + "0x%llx)\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(m0), + (uint64_t)VM_KERNEL_ADDRPERM(mt))); mtl = m_last(mt); m0->m_flags &= ~M_PKTHDR; @@ -3152,7 +3346,7 @@ mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq) /* * Respond to a pending MLDv2 General Query. */ -static void +static uint32_t mld_v2_dispatch_general_query(struct mld_ifinfo *mli) { struct ifnet *ifp; @@ -3207,13 +3401,14 @@ next: MLI_LOCK_ASSERT_HELD(mli); /* - * Slew transmission of bursts over 500ms intervals. + * Slew transmission of bursts over 1 second intervals. */ if (mli->mli_gq.ifq_head != NULL) { mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY( MLD_RESPONSE_BURST_INTERVAL); - interface_timers_running6 = 1; } + + return (mli->mli_v2_timer); } /* @@ -3235,15 +3430,17 @@ mld_dispatch_packet(struct mbuf *m) int off; int type; - MLD_PRINTF(("%s: transmit %p\n", __func__, m)); + MLD_PRINTF(("%s: transmit 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(m))); /* * Check if the ifnet is still attached. */ - ifp = m->m_pkthdr.rcvif; + ifp = mld_restore_context(m); if (ifp == NULL || !ifnet_is_attached(ifp, 0)) { - MLD_PRINTF(("%s: dropped %p as ifindex %u went away.\n", - __func__, m, (u_int)if_index)); + MLD_PRINTF(("%s: dropped 0x%llx as ifindex %u went away.\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(m), + (u_int)if_index)); m_freem(m); ip6stat.ip6s_noroute++; return; @@ -3268,7 +3465,8 @@ mld_dispatch_packet(struct mbuf *m) } else { m0 = mld_v2_encap_report(ifp, m); if (m0 == NULL) { - MLD_PRINTF(("%s: dropped %p\n", __func__, m)); + MLD_PRINTF(("%s: dropped 0x%llx\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(m))); /* * mld_v2_encap_report() has already freed our mbuf. */ @@ -3278,20 +3476,12 @@ mld_dispatch_packet(struct mbuf *m) } } + mld_scrub_context(m0); m->m_flags &= ~(M_PROTOFLAGS); m0->m_pkthdr.rcvif = lo_ifp; ip6 = mtod(m0, struct ip6_hdr *); -#if 0 - (void) in6_setscope(&ip6->ip6_dst, ifp, NULL); /* XXX LOR */ -#else - /* - * XXX XXX Break some KPI rules to prevent an LOR which would - * occur if we called in6_setscope() at transmission. - * See comments at top of file. - */ - MLD_EMBEDSCOPE(&ip6->ip6_dst, ifp->if_index); -#endif + (void) in6_setscope(&ip6->ip6_dst, ifp, NULL); /* * Retrieve the ICMPv6 type before handoff to ip6_output(), @@ -3302,7 +3492,8 @@ mld_dispatch_packet(struct mbuf *m) type = mld->mld_type; if (ifp->if_eflags & IFEF_TXSTART) { - /* Use control service class if the outgoing + /* + * Use control service class if the outgoing * interface supports transmit-start model. */ (void) m_set_service_class(m0, MBUF_SC_CTL); @@ -3314,7 +3505,8 @@ mld_dispatch_packet(struct mbuf *m) IM6O_REMREF(im6o); if (error) { - MLD_PRINTF(("%s: ip6_output(%p) = %d\n", __func__, m0, error)); + MLD_PRINTF(("%s: ip6_output(0x%llx) = %d\n", __func__, + (uint64_t)VM_KERNEL_ADDRPERM(m0), error)); if (oifp != NULL) ifnet_release(oifp); return; diff --git a/bsd/netinet6/mld6_var.h b/bsd/netinet6/mld6_var.h index 7ccbcbf0b..249506b85 100644 --- a/bsd/netinet6/mld6_var.h +++ b/bsd/netinet6/mld6_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -99,7 +99,7 @@ struct mld_ifinfo_u { #define MLD_VERSION_2 2 /* Default */ #endif /* PRIVATE */ -#ifdef XNU_KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #include #define MLD_DEBUG 1 @@ -110,7 +110,7 @@ extern int mld_debug; #define MLD_PRINTF(x) #endif -#define MLD_RANDOM_DELAY(X) (random() % (X) + 1) +#define MLD_RANDOM_DELAY(X) (RandomULong() % (X) + 1) #define MLD_MAX_STATE_CHANGES 24 /* Max pending changes per group */ /* @@ -151,7 +151,7 @@ extern int mld_debug; #define MLD_MAX_STATE_CHANGE_PACKETS 8 /* # of packets per state change */ #define MLD_MAX_RESPONSE_PACKETS 16 /* # of packets for general query */ #define MLD_MAX_RESPONSE_BURST 4 /* # of responses to send at once */ -#define MLD_RESPONSE_BURST_INTERVAL (PR_SLOWHZ) /* 500ms */ +#define MLD_RESPONSE_BURST_INTERVAL 1 /* 1 second */ /* * MLD-specific mbuf flags. @@ -226,15 +226,26 @@ struct mld_ifinfo { */ #define MLD_IFINFO(ifp) ((ifp)->if_mli) -extern int mld_change_state(struct in6_multi *, const int); +/* + * MLD timer schedule parameters + */ +struct mld_tparams { + int qpt; /* querier_present_timers_running6 */ + int it; /* interface_timers_running6 */ + int cst; /* current_state_timers_running6 */ + int sct; /* state_change_timers_running6 */ +}; + +extern int mld_change_state(struct in6_multi *, struct mld_tparams *, + const int); extern struct mld_ifinfo *mld_domifattach(struct ifnet *, int); extern void mld_domifreattach(struct mld_ifinfo *); extern void mld_domifdetach(struct ifnet *); extern void mld_fasttimo(void); extern void mld_ifdetach(struct ifnet *); extern int mld_input(struct mbuf *, int, int); -extern void mld_slowtimo(void); extern void mld_init(void); +extern void mld_set_timeout(struct mld_tparams *); extern void mli_addref(struct mld_ifinfo *, int); extern void mli_remref(struct mld_ifinfo *); __private_extern__ void mld6_initsilent(struct ifnet *, struct mld_ifinfo *); @@ -243,6 +254,5 @@ __private_extern__ void mld6_initsilent(struct ifnet *, struct mld_ifinfo *); SYSCTL_DECL(_net_inet6_mld); #endif -#endif /* XNU_KERNEL_PRIVATE */ - +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET6_MLD6_VAR_H_ */ diff --git a/bsd/netinet6/nd6.c b/bsd/netinet6/nd6.c index 2495e5650..3935be28d 100644 --- a/bsd/netinet6/nd6.c +++ b/bsd/netinet6/nd6.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,13 +22,10 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* $FreeBSD: src/sys/netinet6/nd6.c,v 1.20 2002/08/02 20:49:14 rwatson Exp $ */ -/* $KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $ */ - /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. @@ -81,10 +78,11 @@ #include #include +#include + #include #include -#define DONT_WARN_OBSOLETE #include #include #include @@ -92,6 +90,7 @@ #include #include #include +#include #include #include @@ -105,15 +104,14 @@ #include "loop.h" -#include - -#define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */ -#define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */ +#define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */ +#define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */ #define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0) /* timer values */ int nd6_prune = 1; /* walk list every 1 seconds */ +int nd6_prune_lazy = 5; /* lazily walk list every 5 seconds */ int nd6_delay = 5; /* delay first probe time 5 second */ int nd6_umaxtries = 3; /* maximum unicast query */ int nd6_mmaxtries = 3; /* maximum multicast query */ @@ -123,7 +121,6 @@ int nd6_gctimer = (60 * 60 * 24); /* 1 day: garbage collection timer */ /* preventing too many loops in ND option parsing */ int nd6_maxndopt = 10; /* max # of ND options allowed */ -int nd6_maxnudhint = 0; /* max # of subsequent upper layer hints */ int nd6_maxqueuelen = 1; /* max # of packets cached in unresolved ND entries */ #if ND6_DEBUG @@ -133,10 +130,9 @@ int nd6_debug = 0; #endif int nd6_optimistic_dad = - (ND6_OPTIMISTIC_DAD_LINKLOCAL|ND6_OPTIMISTIC_DAD_AUTOCONF| - ND6_OPTIMISTIC_DAD_TEMPORARY|ND6_OPTIMISTIC_DAD_DYNAMIC); - -static int nd6_is_new_addr_neighbor (struct sockaddr_in6 *, struct ifnet *); + (ND6_OPTIMISTIC_DAD_LINKLOCAL|ND6_OPTIMISTIC_DAD_AUTOCONF| + ND6_OPTIMISTIC_DAD_TEMPORARY|ND6_OPTIMISTIC_DAD_DYNAMIC| + ND6_OPTIMISTIC_DAD_SECURED); /* for debugging? */ static int nd6_inuse, nd6_allocated; @@ -157,7 +153,7 @@ static int nd6_inuse, nd6_allocated; * * - Routing lock (rnh_lock) * - * ln_hold, ln_asked, ln_expire, ln_state, ln_router, ln_byhint, ln_flags, + * ln_hold, ln_asked, ln_expire, ln_state, ln_router, ln_flags, * ln_llreach, ln_lastused * * - Routing entry lock (rt_lock) @@ -168,7 +164,8 @@ static int nd6_inuse, nd6_allocated; * freed until the route itself is freed. */ struct llinfo_nd6 llinfo_nd6 = { - &llinfo_nd6, &llinfo_nd6, NULL, NULL, 0, 0, 0, 0, 0, 0, NULL, 0 + .ln_next = &llinfo_nd6, + .ln_prev = &llinfo_nd6, }; /* Protected by nd_if_rwlock */ @@ -185,10 +182,28 @@ lck_rw_t *nd_if_rwlock = &nd_if_rwlock_data; struct nd_drhead nd_defrouter; struct nd_prhead nd_prefix = { 0 }; -/* Serialization variables for nd6_drain() */ -static boolean_t nd6_drain_busy; -static void *nd6_drain_waitchan = &nd6_drain_busy; -static int nd6_drain_waiters = 0; +/* + * nd6_timeout() is scheduled on a demand basis. nd6_timeout_run is used + * to indicate whether or not a timeout has been scheduled. The rnh_lock + * mutex is used to protect this scheduling; it is a natural choice given + * the work done in the timer callback. Unfortunately, there are cases + * when nd6_timeout() needs to be scheduled while rnh_lock cannot be easily + * held, due to lock ordering. In those cases, we utilize a "demand" counter + * nd6_sched_timeout_want which can be atomically incremented without + * having to hold rnh_lock. On places where we acquire rnh_lock, such as + * nd6_rtrequest(), we check this counter and schedule the timer if it is + * non-zero. The increment happens on various places when we allocate + * new ND entries, default routers, prefixes and addresses. + */ +static int nd6_timeout_run; /* nd6_timeout is scheduled to run */ +static void nd6_timeout(void *); +int nd6_sched_timeout_want; /* demand count for timer to be sched */ +static boolean_t nd6_fast_timer_on = FALSE; + +/* Serialization variables for nd6_service(), protected by rnh_lock */ +static boolean_t nd6_service_busy; +static void *nd6_service_wc = &nd6_service_busy; +static int nd6_service_waiters = 0; int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL; static struct sockaddr_in6 all1_sa; @@ -196,20 +211,26 @@ static struct sockaddr_in6 all1_sa; static int regen_tmpaddr(struct in6_ifaddr *); extern lck_mtx_t *nd6_mutex; -static void nd6_slowtimo(void *ignored_arg); -static struct llinfo_nd6 *nd6_llinfo_alloc(void); +static struct llinfo_nd6 *nd6_llinfo_alloc(int); static void nd6_llinfo_free(void *); static void nd6_llinfo_purge(struct rtentry *); static void nd6_llinfo_get_ri(struct rtentry *, struct rt_reach_info *); static void nd6_llinfo_get_iflri(struct rtentry *, struct ifnet_llreach_info *); +static uint64_t ln_getexpire(struct llinfo_nd6 *); +static void nd6_service(void *); +static void nd6_slowtimo(void *); +static int nd6_is_new_addr_neighbor(struct sockaddr_in6 *, struct ifnet *); static int nd6_siocgdrlst(void *, int); static int nd6_siocgprlst(void *, int); +static int nd6_sysctl_drlist SYSCTL_HANDLER_ARGS; +static int nd6_sysctl_prlist SYSCTL_HANDLER_ARGS; + /* * Insertion and removal from llinfo_nd6 must be done with rnh_lock held. */ -#define LN_DEQUEUE(_ln) do { \ +#define LN_DEQUEUE(_ln) do { \ lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); \ RT_LOCK_ASSERT_HELD((_ln)->ln_rt); \ (_ln)->ln_next->ln_prev = (_ln)->ln_prev; \ @@ -218,7 +239,7 @@ static int nd6_siocgprlst(void *, int); (_ln)->ln_flags &= ~ND6_LNF_IN_USE; \ } while (0) -#define LN_INSERTHEAD(_ln) do { \ +#define LN_INSERTHEAD(_ln) do { \ lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); \ RT_LOCK_ASSERT_HELD((_ln)->ln_rt); \ (_ln)->ln_next = llinfo_nd6.ln_next; \ @@ -232,20 +253,30 @@ static struct zone *llinfo_nd6_zone; #define LLINFO_ND6_ZONE_MAX 256 /* maximum elements in zone */ #define LLINFO_ND6_ZONE_NAME "llinfo_nd6" /* name for zone */ +extern int tvtohz(struct timeval *); + +static int nd6_init_done; + +SYSCTL_DECL(_net_inet6_icmp6); + +SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist, + CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, + nd6_sysctl_drlist, "S,in6_defrouter", ""); + +SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist, + CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, + nd6_sysctl_prlist, "S,in6_defrouter", ""); + void -nd6_init() +nd6_init(void) { - static int nd6_init_done = 0; int i; - if (nd6_init_done) { - log(LOG_NOTICE, "nd6_init called more than once (ignored)\n"); - return; - } + VERIFY(!nd6_init_done); all1_sa.sin6_family = AF_INET6; - all1_sa.sin6_len = sizeof(struct sockaddr_in6); - for (i = 0; i < sizeof(all1_sa.sin6_addr); i++) + all1_sa.sin6_len = sizeof (struct sockaddr_in6); + for (i = 0; i < sizeof (all1_sa.sin6_addr); i++) all1_sa.sin6_addr.s6_addr[i] = 0xff; /* initialization of the default router list */ @@ -272,13 +303,20 @@ nd6_init() nd6_init_done = 1; /* start timer */ - timeout(nd6_slowtimo, (caddr_t)0, ND6_SLOWTIMER_INTERVAL * hz); + timeout(nd6_slowtimo, NULL, ND6_SLOWTIMER_INTERVAL * hz); } static struct llinfo_nd6 * -nd6_llinfo_alloc(void) +nd6_llinfo_alloc(int how) { - return (zalloc(llinfo_nd6_zone)); + struct llinfo_nd6 *ln; + + ln = (how == M_WAITOK) ? zalloc(llinfo_nd6_zone) : + zalloc_noblock(llinfo_nd6_zone); + if (ln != NULL) + bzero(ln, sizeof (*ln)); + + return (ln); } static void @@ -365,60 +403,118 @@ nd6_llinfo_get_iflri(struct rtentry *rt, struct ifnet_llreach_info *iflri) } } +void +ln_setexpire(struct llinfo_nd6 *ln, uint64_t expiry) +{ + ln->ln_expire = expiry; +} + +static uint64_t +ln_getexpire(struct llinfo_nd6 *ln) +{ + struct timeval caltime; + uint64_t expiry; + + if (ln->ln_expire != 0) { + struct rtentry *rt = ln->ln_rt; + + VERIFY(rt != NULL); + /* account for system time change */ + getmicrotime(&caltime); + + rt->base_calendartime += + NET_CALCULATE_CLOCKSKEW(caltime, + rt->base_calendartime, net_uptime(), rt->base_uptime); + + expiry = rt->base_calendartime + + ln->ln_expire - rt->base_uptime; + } else { + expiry = 0; + } + return (expiry); +} + +void +nd6_ifreset(struct ifnet *ifp) +{ + struct nd_ifinfo *ndi; + + lck_rw_assert(nd_if_rwlock, LCK_RW_ASSERT_HELD); + VERIFY(ifp != NULL && ifp->if_index < nd_ifinfo_indexlim); + ndi = &nd_ifinfo[ifp->if_index]; + + VERIFY(ndi->initialized); + lck_mtx_assert(&ndi->lock, LCK_MTX_ASSERT_OWNED); + ndi->linkmtu = ifp->if_mtu; + ndi->chlim = IPV6_DEFHLIM; + ndi->basereachable = REACHABLE_TIME; + ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable); + ndi->retrans = RETRANS_TIMER; +} + int nd6_ifattach(struct ifnet *ifp) { + size_t newlim; + struct nd_ifinfo *ndi; + /* * We have some arrays that should be indexed by if_index. * since if_index will grow dynamically, they should grow too. */ - lck_rw_lock_exclusive(nd_if_rwlock); - if (nd_ifinfo == NULL || if_index >= nd_ifinfo_indexlim) { - size_t n; - caddr_t q; - size_t newlim = nd_ifinfo_indexlim; - - while (if_index >= newlim) - newlim <<= 1; - - /* grow nd_ifinfo */ - n = newlim * sizeof(struct nd_ifinfo); - q = (caddr_t)_MALLOC(n, M_IP6NDP, M_WAITOK); - if (q == NULL) { - lck_rw_done(nd_if_rwlock); - return (ENOBUFS); - } - bzero(q, n); - nd_ifinfo_indexlim = newlim; - if (nd_ifinfo) { - bcopy((caddr_t)nd_ifinfo, q, n/2); - /* - * We might want to pattern fill the old - * array to catch use-after-free cases. - */ - FREE((caddr_t)nd_ifinfo, M_IP6NDP); + lck_rw_lock_shared(nd_if_rwlock); + newlim = nd_ifinfo_indexlim; + if (nd_ifinfo == NULL || if_index >= newlim) { + if (!lck_rw_lock_shared_to_exclusive(nd_if_rwlock)) + lck_rw_lock_exclusive(nd_if_rwlock); + lck_rw_assert(nd_if_rwlock, LCK_RW_ASSERT_EXCLUSIVE); + + newlim = nd_ifinfo_indexlim; + if (nd_ifinfo == NULL || if_index >= newlim) { + size_t n; + caddr_t q; + + while (if_index >= newlim) + newlim <<= 1; + + /* grow nd_ifinfo */ + n = newlim * sizeof (struct nd_ifinfo); + q = (caddr_t)_MALLOC(n, M_IP6NDP, M_WAITOK); + if (q == NULL) { + lck_rw_done(nd_if_rwlock); + return (ENOBUFS); + } + bzero(q, n); + if (nd_ifinfo != NULL) { + bcopy((caddr_t)nd_ifinfo, q, n/2); + /* + * We might want to pattern fill the old + * array to catch use-after-free cases. + */ + FREE((caddr_t)nd_ifinfo, M_IP6NDP); + } + nd_ifinfo = (struct nd_ifinfo *)(void *)q; + nd_ifinfo_indexlim = newlim; } - nd_ifinfo = (struct nd_ifinfo *)(void *)q; } -#define ND nd_ifinfo[ifp->if_index] - /* - * Don't initialize if called twice. - */ - if (ND.initialized) { - lck_rw_done(nd_if_rwlock); - return (0); + VERIFY(ifp != NULL); + ndi = &nd_ifinfo[ifp->if_index]; + if (!ndi->initialized) { + lck_mtx_init(&ndi->lock, nd_if_lock_grp, nd_if_lock_attr); + ndi->initialized = TRUE; } - lck_mtx_init(&ND.lock, nd_if_lock_grp, nd_if_lock_attr); - ND.initialized = TRUE; - ND.linkmtu = ifp->if_mtu; - ND.chlim = IPV6_DEFHLIM; - ND.basereachable = REACHABLE_TIME; - ND.reachable = ND_COMPUTE_RTIME(ND.basereachable); - ND.retrans = RETRANS_TIMER; - ND.flags = ND6_IFF_PERFORMNUD; + + lck_mtx_lock(&ndi->lock); + + ndi->flags = ND6_IFF_PERFORMNUD; + if (!(ifp->if_flags & IFF_MULTICAST)) + ndi->flags |= ND6_IFF_IFDISABLED; + + nd6_ifreset(ifp); + lck_mtx_unlock(&ndi->lock); + lck_rw_done(nd_if_rwlock); -#undef ND nd6_setmtu(ifp); @@ -436,7 +532,7 @@ nd6_setmtu(struct ifnet *ifp) u_int32_t oldmaxmtu, maxmtu; /* - * Make sure IPv6 is enabled for the interface first, + * Make sure IPv6 is enabled for the interface first, * because this can be called directly from SIOCSIFMTU for IPv4 */ lck_rw_lock_shared(nd_if_rwlock); @@ -465,15 +561,15 @@ nd6_setmtu(struct ifnet *ifp) maxmtu = ndi->maxmtu = ifp->if_mtu; /* - * Decreasing the interface MTU under IPV6 minimum MTU may cause - * undesirable situation. We thus notify the operator of the change - * explicitly. The check for oldmaxmtu is necessary to restrict the - * log to the case of changing the MTU, not initializing it. - */ + * Decreasing the interface MTU under IPV6 minimum MTU may cause + * undesirable situation. We thus notify the operator of the change + * explicitly. The check for oldmaxmtu is necessary to restrict the + * log to the case of changing the MTU, not initializing it. + */ if (oldmaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) { log(LOG_NOTICE, "nd6_setmtu: " - "new link MTU on %s%d (%u) is too small for IPv6\n", - ifp->if_name, ifp->if_unit, (uint32_t)ndi->maxmtu); + "new link MTU on %s (%u) is too small for IPv6\n", + if_name(ifp), (uint32_t)ndi->maxmtu); } ndi->linkmtu = ifp->if_mtu; lck_mtx_unlock(&ndi->lock); @@ -485,15 +581,12 @@ nd6_setmtu(struct ifnet *ifp) } void -nd6_option_init( - void *opt, - int icmp6len, - union nd_opts *ndopts) +nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts) { - bzero(ndopts, sizeof(*ndopts)); + bzero(ndopts, sizeof (*ndopts)); ndopts->nd_opts_search = (struct nd_opt_hdr *)opt; - ndopts->nd_opts_last - = (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len); + ndopts->nd_opts_last = + (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len); if (icmp6len == 0) { ndopts->nd_opts_done = 1; @@ -505,8 +598,7 @@ nd6_option_init( * Take one ND option. */ struct nd_opt_hdr * -nd6_option( - union nd_opts *ndopts) +nd6_option(union nd_opts *ndopts) { struct nd_opt_hdr *nd_opt; int olen; @@ -516,16 +608,16 @@ nd6_option( if (!ndopts->nd_opts_last) panic("uninitialized ndopts in nd6_option\n"); if (!ndopts->nd_opts_search) - return NULL; + return (NULL); if (ndopts->nd_opts_done) - return NULL; + return (NULL); nd_opt = ndopts->nd_opts_search; /* make sure nd_opt_len is inside the buffer */ if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) { - bzero(ndopts, sizeof(*ndopts)); - return NULL; + bzero(ndopts, sizeof (*ndopts)); + return (NULL); } olen = nd_opt->nd_opt_len << 3; @@ -534,21 +626,21 @@ nd6_option( * Message validation requires that all included * options have a length that is greater than zero. */ - bzero(ndopts, sizeof(*ndopts)); - return NULL; + bzero(ndopts, sizeof (*ndopts)); + return (NULL); } ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen); if (ndopts->nd_opts_search > ndopts->nd_opts_last) { /* option overruns the end of buffer, invalid */ - bzero(ndopts, sizeof(*ndopts)); - return NULL; + bzero(ndopts, sizeof (*ndopts)); + return (NULL); } else if (ndopts->nd_opts_search == ndopts->nd_opts_last) { /* reached the end of options chain */ ndopts->nd_opts_done = 1; ndopts->nd_opts_search = NULL; } - return nd_opt; + return (nd_opt); } /* @@ -557,8 +649,7 @@ nd6_option( * multiple options of the same type. */ int -nd6_options( - union nd_opts *ndopts) +nd6_options(union nd_opts *ndopts) { struct nd_opt_hdr *nd_opt; int i = 0; @@ -568,7 +659,7 @@ nd6_options( if (ndopts->nd_opts_last == NULL) panic("uninitialized ndopts in nd6_options"); if (ndopts->nd_opts_search == NULL) - return 0; + return (0); while (1) { nd_opt = nd6_option(ndopts); @@ -578,8 +669,8 @@ nd6_options( * options have a length that is greater than zero. */ icmp6stat.icp6s_nd_badopt++; - bzero(ndopts, sizeof(*ndopts)); - return -1; + bzero(ndopts, sizeof (*ndopts)); + return (-1); } if (nd_opt == NULL) @@ -596,21 +687,21 @@ nd6_options( nd_opt->nd_opt_type)); /* XXX bark? */ } else { - ndopts->nd_opt_array[nd_opt->nd_opt_type] - = nd_opt; + ndopts->nd_opt_array[nd_opt->nd_opt_type] = + nd_opt; } break; case ND_OPT_PREFIX_INFORMATION: if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) { - ndopts->nd_opt_array[nd_opt->nd_opt_type] - = nd_opt; + ndopts->nd_opt_array[nd_opt->nd_opt_type] = + nd_opt; } ndopts->nd_opts_pi_end = - (struct nd_opt_prefix_info *)nd_opt; + (struct nd_opt_prefix_info *)nd_opt; break; case ND_OPT_RDNSS: - /* ignore */ - break; + /* ignore */ + break; default: /* * Unknown options must be silently ignored, @@ -633,21 +724,52 @@ skip1: break; } - return 0; + return (0); } -void -nd6_drain(__unused void *ignored_arg) +struct nd6svc_arg { + int draining; + uint32_t killed; + uint32_t aging_lazy; + uint32_t aging; + uint32_t sticky; + uint32_t found; +}; + +/* + * ND6 service routine to expire default route list and prefix list + */ +static void +nd6_service(void *arg) { + struct nd6svc_arg *ap = arg; struct llinfo_nd6 *ln; struct nd_defrouter *dr; struct nd_prefix *pr; struct ifnet *ifp = NULL; struct in6_ifaddr *ia6, *nia6; - struct in6_addrlifetime *lt6; - struct timeval timenow; + uint64_t timenow; - getmicrotime(&timenow); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + /* + * Since we may drop rnh_lock and nd6_mutex below, we want + * to run this entire operation single threaded. + */ + while (nd6_service_busy) { + nd6log2((LOG_DEBUG, "%s: %s is blocked by %d waiters\n", + __func__, ap->draining ? "drainer" : "timer", + nd6_service_waiters)); + nd6_service_waiters++; + (void) msleep(nd6_service_wc, rnh_lock, (PZERO-1), + __func__, NULL); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + } + + /* We are busy now; tell everyone else to go away */ + nd6_service_busy = TRUE; + + net_update_uptime(); + timenow = net_uptime(); again: /* * The global list llinfo_nd6 is modified by nd6_request() and is @@ -661,13 +783,13 @@ again: * pass thru the entries and clear the flag so they can be processed * during the next timeout. */ - lck_mtx_lock(rnh_lock); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + ln = llinfo_nd6.ln_next; while (ln != NULL && ln != &llinfo_nd6) { struct rtentry *rt; struct sockaddr_in6 *dst; struct llinfo_nd6 *next; - struct nd_ifinfo *ndi; u_int32_t retrans, flags; /* ln_next/prev/rt is protected by rnh_lock */ @@ -681,6 +803,7 @@ again: ln = next; continue; } + ap->found++; /* rt->rt_ifp should never be NULL */ if ((ifp = rt->rt_ifp) == NULL) { @@ -692,12 +815,12 @@ again: /* rt_llinfo must always be equal to ln */ if ((struct llinfo_nd6 *)rt->rt_llinfo != ln) { panic("%s: rt_llinfo(%p) is not equal to ln(%p)", - __func__, rt->rt_llinfo, ln); + __func__, rt->rt_llinfo, ln); /* NOTREACHED */ } /* rt_key should never be NULL */ - dst = (struct sockaddr_in6 *)(void *)rt_key(rt); + dst = SIN6(rt_key(rt)); if (dst == NULL) { panic("%s: rt(%p) key is NULL ln(%p)", __func__, rt, ln); @@ -707,7 +830,35 @@ again: /* Set the flag in case we jump to "again" */ ln->ln_flags |= ND6_LNF_TIMER_SKIP; - if (ln->ln_expire > timenow.tv_sec) { + if (ln->ln_expire == 0 || (rt->rt_flags & RTF_STATIC)) { + ap->sticky++; + } else if (ap->draining && (rt->rt_refcnt == 0)) { + /* + * If we are draining, immediately purge non-static + * entries without oustanding route refcnt. + */ + if (ln->ln_state > ND6_LLINFO_INCOMPLETE) + ln->ln_state = ND6_LLINFO_STALE; + else + ln->ln_state = ND6_LLINFO_PURGE; + ln_setexpire(ln, timenow); + } + + /* + * If the entry has not expired, skip it. Take note on the + * state, as entries that are in the STALE state are simply + * waiting to be garbage collected, in which case we can + * relax the callout scheduling (use nd6_prune_lazy). + */ + if (ln->ln_expire > timenow) { + switch (ln->ln_state) { + case ND6_LLINFO_STALE: + ap->aging_lazy++; + break; + default: + ap->aging++; + break; + } RT_UNLOCK(rt); ln = next; continue; @@ -715,17 +866,34 @@ again: lck_rw_lock_shared(nd_if_rwlock); if (ifp->if_index >= nd_ifinfo_indexlim) { - lck_rw_done(nd_if_rwlock); - RT_UNLOCK(rt); - ln = next; - continue; + /* + * In the event the nd_ifinfo[] array is not in synch + * by now, we don't want to hold on to the llinfo entry + * forever; just purge it rather than have it consume + * resources. That's better than transmitting out of + * the interface as the rest of the layers may not be + * ready as well. + * + * We can retire this logic once we get rid of the + * separate array and utilize a per-ifnet structure. + */ + retrans = RETRANS_TIMER; + flags = ND6_IFF_PERFORMNUD; + if (ln->ln_expire != 0) { + ln->ln_state = ND6_LLINFO_PURGE; + log (LOG_ERR, "%s: purging rt(0x%llx) " + "ln(0x%llx) dst %s, if_index %d >= %d\n", + __func__, (uint64_t)VM_KERNEL_ADDRPERM(rt), + (uint64_t)VM_KERNEL_ADDRPERM(ln), + ip6_sprintf(&dst->sin6_addr), ifp->if_index, + nd_ifinfo_indexlim); + } + } else { + struct nd_ifinfo *ndi = ND_IFINFO(ifp); + VERIFY(ndi->initialized); + retrans = ndi->retrans; + flags = ndi->flags; } - ndi = ND_IFINFO(ifp); - VERIFY(ndi->initialized); - lck_mtx_lock(&ndi->lock); - retrans = ndi->retrans; - flags = ndi->flags; - lck_mtx_unlock(&ndi->lock); lck_rw_done(nd_if_rwlock); RT_LOCK_ASSERT_HELD(rt); @@ -733,19 +901,22 @@ again: switch (ln->ln_state) { case ND6_LLINFO_INCOMPLETE: if (ln->ln_asked < nd6_mmaxtries) { + struct ifnet *exclifp = ln->ln_exclifp; ln->ln_asked++; - ln->ln_expire = timenow.tv_sec + retrans / 1000; + ln_setexpire(ln, timenow + retrans / 1000); RT_ADDREF_LOCKED(rt); RT_UNLOCK(rt); lck_mtx_unlock(rnh_lock); if (ip6_forwarding) { - nd6_prproxy_ns_output(ifp, NULL, - &dst->sin6_addr, ln); + nd6_prproxy_ns_output(ifp, exclifp, + NULL, &dst->sin6_addr, ln); } else { nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0); } RT_REMREF(rt); + ap->aging++; + lck_mtx_lock(rnh_lock); } else { struct mbuf *m = ln->ln_hold; ln->ln_hold = NULL; @@ -758,37 +929,44 @@ again: * older rcvif? */ m->m_pkthdr.rcvif = ifp; + RT_ADDREF_LOCKED(rt); RT_UNLOCK(rt); lck_mtx_unlock(rnh_lock); icmp6_error(m, ICMP6_DST_UNREACH, - ICMP6_DST_UNREACH_ADDR, 0); + ICMP6_DST_UNREACH_ADDR, 0); } else { + RT_ADDREF_LOCKED(rt); RT_UNLOCK(rt); lck_mtx_unlock(rnh_lock); } nd6_free(rt); + ap->killed++; + lck_mtx_lock(rnh_lock); + rtfree_locked(rt); } - lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); goto again; case ND6_LLINFO_REACHABLE: - if (ln->ln_expire) { + if (ln->ln_expire != 0) { ln->ln_state = ND6_LLINFO_STALE; - ln->ln_expire = rt_expiry(rt, timenow.tv_sec, - nd6_gctimer); + ln_setexpire(ln, timenow + nd6_gctimer); + ap->aging_lazy++; } RT_UNLOCK(rt); break; case ND6_LLINFO_STALE: case ND6_LLINFO_PURGE: - /* Garbage Collection(RFC 2461 5.3) */ - if (ln->ln_expire) { + /* Garbage Collection(RFC 4861 5.3) */ + if (ln->ln_expire != 0) { + RT_ADDREF_LOCKED(rt); RT_UNLOCK(rt); lck_mtx_unlock(rnh_lock); nd6_free(rt); - lck_mtx_assert(rnh_lock, - LCK_MTX_ASSERT_NOTOWNED); + ap->killed++; + lck_mtx_lock(rnh_lock); + rtfree_locked(rt); goto again; } else { RT_UNLOCK(rt); @@ -800,39 +978,45 @@ again: /* We need NUD */ ln->ln_asked = 1; ln->ln_state = ND6_LLINFO_PROBE; - ln->ln_expire = timenow.tv_sec + retrans / 1000; + ln_setexpire(ln, timenow + retrans / 1000); RT_ADDREF_LOCKED(rt); RT_UNLOCK(rt); lck_mtx_unlock(rnh_lock); nd6_ns_output(ifp, &dst->sin6_addr, &dst->sin6_addr, ln, 0); - lck_mtx_assert(rnh_lock, - LCK_MTX_ASSERT_NOTOWNED); RT_REMREF(rt); + ap->aging++; + lck_mtx_lock(rnh_lock); goto again; } ln->ln_state = ND6_LLINFO_STALE; /* XXX */ - ln->ln_expire = rt_expiry(rt, timenow.tv_sec, - nd6_gctimer); + ln_setexpire(ln, timenow + nd6_gctimer); RT_UNLOCK(rt); + ap->aging_lazy++; break; case ND6_LLINFO_PROBE: if (ln->ln_asked < nd6_umaxtries) { ln->ln_asked++; - ln->ln_expire = timenow.tv_sec + retrans / 1000; + ln_setexpire(ln, timenow + retrans / 1000); RT_ADDREF_LOCKED(rt); RT_UNLOCK(rt); lck_mtx_unlock(rnh_lock); nd6_ns_output(ifp, &dst->sin6_addr, &dst->sin6_addr, ln, 0); RT_REMREF(rt); + ap->aging++; + lck_mtx_lock(rnh_lock); } else { + RT_ADDREF_LOCKED(rt); RT_UNLOCK(rt); lck_mtx_unlock(rnh_lock); nd6_free(rt); + ap->killed++; + lck_mtx_lock(rnh_lock); + rtfree_locked(rt); } - lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); goto again; default: @@ -861,12 +1045,18 @@ again: lck_mtx_lock(nd6_mutex); dr = TAILQ_FIRST(&nd_defrouter); while (dr) { - if (dr->expire && dr->expire < timenow.tv_sec) { + ap->found++; + if (dr->expire != 0 && dr->expire < timenow) { struct nd_defrouter *t; t = TAILQ_NEXT(dr, dr_entry); defrtrlist_del(dr); dr = t; + ap->killed++; } else { + if (dr->expire == 0 || (dr->stateflags & NDDRF_STATIC)) + ap->sticky++; + else + ap->aging_lazy++; dr = TAILQ_NEXT(dr, dr_entry); } } @@ -881,6 +1071,7 @@ again: addrloop: lck_rw_lock_exclusive(&in6_ifaddr_rwlock); for (ia6 = in6_ifaddrs; ia6; ia6 = nia6) { + ap->found++; nia6 = ia6->ia_next; IFA_LOCK(&ia6->ia_ifa); /* @@ -891,8 +1082,7 @@ addrloop: */ IFA_ADDREF_LOCKED(&ia6->ia_ifa); /* check address lifetime */ - lt6 = &ia6->ia6_lifetime; - if (IFA6_IS_INVALID(ia6)) { + if (IFA6_IS_INVALID(ia6, timenow)) { /* * If the expiring address is temporary, try * regenerating a new one. This would be useful when @@ -928,13 +1118,24 @@ addrloop: * of in6_ifaddrs list. */ in6_purgeaddr(&ia6->ia_ifa); + ap->killed++; /* Release extra reference taken above */ IFA_REMREF(&ia6->ia_ifa); goto addrloop; } + /* + * The lazy timer runs every nd6_prune_lazy seconds with at + * most "2 * nd6_prune_lazy - 1" leeway. We consider the worst + * case here and make sure we schedule the regular timer if an + * interface address is about to expire. + */ + if (IFA6_IS_INVALID(ia6, timenow + 3 * nd6_prune_lazy)) + ap->aging++; + else + ap->aging_lazy++; IFA_LOCK_ASSERT_HELD(&ia6->ia_ifa); - if (IFA6_IS_DEPRECATED(ia6)) { + if (IFA6_IS_DEPRECATED(ia6, timenow)) { int oldflags = ia6->ia6_flags; ia6->ia6_flags |= IN6_IFF_DEPRECATED; @@ -986,60 +1187,182 @@ addrloop: lck_rw_done(&in6_ifaddr_rwlock); lck_mtx_lock(nd6_mutex); - /* - * Since we drop the nd6_mutex in prelist_remove, we want to run this - * section single threaded. - */ - while (nd6_drain_busy) { - nd6_drain_waiters++; - msleep(nd6_drain_waitchan, nd6_mutex, (PZERO-1), - __func__, NULL); - lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); - } - nd6_drain_busy = TRUE; - /* expire prefix list */ pr = nd_prefix.lh_first; - while (pr) { + while (pr != NULL) { + ap->found++; /* * check prefix lifetime. * since pltime is just for autoconf, pltime processing for * prefix is not necessary. */ NDPR_LOCK(pr); - if (pr->ndpr_stateflags & NDPRF_PROCESSED) { + if (pr->ndpr_stateflags & NDPRF_PROCESSED_SERVICE) { NDPR_UNLOCK(pr); pr = pr->ndpr_next; continue; } - if (pr->ndpr_expire && pr->ndpr_expire < timenow.tv_sec) { + if (pr->ndpr_expire != 0 && pr->ndpr_expire < timenow) { /* * address expiration and prefix expiration are * separate. NEVER perform in6_purgeaddr here. */ - pr->ndpr_stateflags |= NDPRF_PROCESSED; + pr->ndpr_stateflags |= NDPRF_PROCESSED_SERVICE; NDPR_ADDREF_LOCKED(pr); prelist_remove(pr); NDPR_UNLOCK(pr); + pfxlist_onlink_check(); NDPR_REMREF(pr); pr = nd_prefix.lh_first; + ap->killed++; } else { - pr->ndpr_stateflags |= NDPRF_PROCESSED; + if (pr->ndpr_expire == 0 || + (pr->ndpr_stateflags & NDPRF_STATIC)) + ap->sticky++; + else + ap->aging_lazy++; + pr->ndpr_stateflags |= NDPRF_PROCESSED_SERVICE; NDPR_UNLOCK(pr); pr = pr->ndpr_next; } } LIST_FOREACH(pr, &nd_prefix, ndpr_entry) { NDPR_LOCK(pr); - pr->ndpr_stateflags &= ~NDPRF_PROCESSED; + pr->ndpr_stateflags &= ~NDPRF_PROCESSED_SERVICE; NDPR_UNLOCK(pr); } - nd6_drain_busy = FALSE; - if (nd6_drain_waiters > 0) { - nd6_drain_waiters = 0; - wakeup(nd6_drain_waitchan); - } lck_mtx_unlock(nd6_mutex); + + lck_mtx_lock(rnh_lock); + /* We're done; let others enter */ + nd6_service_busy = FALSE; + if (nd6_service_waiters > 0) { + nd6_service_waiters = 0; + wakeup(nd6_service_wc); + } +} + +void +nd6_drain(void *arg) +{ +#pragma unused(arg) + struct nd6svc_arg sarg; + + nd6log2((LOG_DEBUG, "%s: draining ND6 entries\n", __func__)); + + lck_mtx_lock(rnh_lock); + bzero(&sarg, sizeof (sarg)); + sarg.draining = 1; + nd6_service(&sarg); + nd6log2((LOG_DEBUG, "%s: found %u, aging_lazy %u, aging %u, " + "sticky %u, killed %u\n", __func__, sarg.found, sarg.aging_lazy, + sarg.aging, sarg.sticky, sarg.killed)); + lck_mtx_unlock(rnh_lock); +} + +/* + * We use the ``arg'' variable to decide whether or not the timer we're + * running is the fast timer. We do this to reset the nd6_fast_timer_on + * variable so that later we don't end up ignoring a ``fast timer'' + * request if the 5 second timer is running (see nd6_sched_timeout). + */ +static void +nd6_timeout(void *arg) +{ + struct nd6svc_arg sarg; + + lck_mtx_lock(rnh_lock); + bzero(&sarg, sizeof (sarg)); + nd6_service(&sarg); + nd6log2((LOG_DEBUG, "%s: found %u, aging_lazy %u, aging %u, " + "sticky %u, killed %u\n", __func__, sarg.found, sarg.aging_lazy, + sarg.aging, sarg.sticky, sarg.killed)); + /* re-arm the timer if there's work to do */ + nd6_timeout_run--; + VERIFY(nd6_timeout_run >= 0 && nd6_timeout_run < 2); + if (arg == &nd6_fast_timer_on) + nd6_fast_timer_on = FALSE; + if (sarg.aging_lazy > 0 || sarg.aging > 0 || nd6_sched_timeout_want) { + struct timeval atv, ltv, *leeway; + int lazy = nd6_prune_lazy; + + if (sarg.aging > 0 || lazy < 1) { + atv.tv_usec = 0; + atv.tv_sec = nd6_prune; + leeway = NULL; + } else { + VERIFY(lazy >= 1); + atv.tv_usec = 0; + atv.tv_sec = MAX(nd6_prune, lazy); + ltv.tv_usec = 0; + ltv.tv_sec = MAX(random() % lazy, 1) * 2; + leeway = <v; + } + nd6_sched_timeout(&atv, leeway); + } else if (nd6_debug) { + nd6log2((LOG_DEBUG, "%s: not rescheduling timer\n", __func__)); + } + lck_mtx_unlock(rnh_lock); +} + +void +nd6_sched_timeout(struct timeval *atv, struct timeval *ltv) +{ + struct timeval tv; + + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + if (atv == NULL) { + tv.tv_usec = 0; + tv.tv_sec = MAX(nd6_prune, 1); + atv = &tv; + ltv = NULL; /* ignore leeway */ + } + /* see comments on top of this file */ + if (nd6_timeout_run == 0) { + if (ltv == NULL) { + nd6log2((LOG_DEBUG, "%s: timer scheduled in " + "T+%llus.%lluu (demand %d)\n", __func__, + (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec, + nd6_sched_timeout_want)); + nd6_fast_timer_on = TRUE; + timeout(nd6_timeout, &nd6_fast_timer_on, tvtohz(atv)); + } else { + nd6log2((LOG_DEBUG, "%s: timer scheduled in " + "T+%llus.%lluu with %llus.%lluu leeway " + "(demand %d)\n", __func__, (uint64_t)atv->tv_sec, + (uint64_t)atv->tv_usec, (uint64_t)ltv->tv_sec, + (uint64_t)ltv->tv_usec, nd6_sched_timeout_want)); + nd6_fast_timer_on = FALSE; + timeout_with_leeway(nd6_timeout, NULL, + tvtohz(atv), tvtohz(ltv)); + } + nd6_timeout_run++; + nd6_sched_timeout_want = 0; + } else if (nd6_timeout_run == 1 && ltv == NULL && + nd6_fast_timer_on == FALSE) { + nd6log2((LOG_DEBUG, "%s: fast timer scheduled in " + "T+%llus.%lluu (demand %d)\n", __func__, + (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec, + nd6_sched_timeout_want)); + nd6_fast_timer_on = TRUE; + nd6_sched_timeout_want = 0; + nd6_timeout_run++; + timeout(nd6_timeout, &nd6_fast_timer_on, tvtohz(atv)); + } else { + if (ltv == NULL) { + nd6log2((LOG_DEBUG, "%s: not scheduling timer: " + "timers %d, fast_timer %d, T+%llus.%lluu\n", + __func__, nd6_timeout_run, nd6_fast_timer_on, + (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec)); + } else { + nd6log2((LOG_DEBUG, "%s: not scheduling timer: " + "timers %d, fast_timer %d, T+%llus.%lluu " + "with %llus.%lluu leeway\n", __func__, + nd6_timeout_run, nd6_fast_timer_on, + (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec, + (uint64_t)ltv->tv_sec, (uint64_t)ltv->tv_usec)); + } + } } /* @@ -1053,13 +1376,13 @@ nd6_post_msg(u_int32_t code, struct nd_prefix_list *prefix_list, struct kev_nd6_ra_data nd6_ra_msg_data; struct nd_prefix_list *itr = prefix_list; - bzero(&ev_msg, sizeof(struct kev_msg)); - ev_msg.vendor_code = KEV_VENDOR_APPLE; - ev_msg.kev_class = KEV_NETWORK_CLASS; - ev_msg.kev_subclass = KEV_ND6_SUBCLASS; - ev_msg.event_code = code; + bzero(&ev_msg, sizeof (struct kev_msg)); + ev_msg.vendor_code = KEV_VENDOR_APPLE; + ev_msg.kev_class = KEV_NETWORK_CLASS; + ev_msg.kev_subclass = KEV_ND6_SUBCLASS; + ev_msg.event_code = code; - bzero(&nd6_ra_msg_data, sizeof(nd6_ra_msg_data)); + bzero(&nd6_ra_msg_data, sizeof (nd6_ra_msg_data)); nd6_ra_msg_data.lladdrlen = (dl_addr_len <= ND6_ROUTER_LL_SIZE) ? dl_addr_len : ND6_ROUTER_LL_SIZE; bcopy(dl_addr, &nd6_ra_msg_data.lladdr, nd6_ra_msg_data.lladdrlen); @@ -1082,50 +1405,38 @@ nd6_post_msg(u_int32_t code, struct nd_prefix_list *prefix_list, nd6_ra_msg_data.prefix.origin = PR_ORIG_RA; nd6_ra_msg_data.prefix.vltime = itr->pr.ndpr_vltime; nd6_ra_msg_data.prefix.pltime = itr->pr.ndpr_pltime; - nd6_ra_msg_data.prefix.expire = itr->pr.ndpr_expire; + nd6_ra_msg_data.prefix.expire = ndpr_getexpire(&itr->pr); nd6_ra_msg_data.prefix.flags = itr->pr.ndpr_stateflags; nd6_ra_msg_data.prefix.refcnt = itr->pr.ndpr_addrcnt; nd6_ra_msg_data.prefix.if_index = itr->pr.ndpr_ifp->if_index; /* send the message up */ - ev_msg.dv[0].data_ptr = &nd6_ra_msg_data; - ev_msg.dv[0].data_length = sizeof(nd6_ra_msg_data); - ev_msg.dv[1].data_length = 0; + ev_msg.dv[0].data_ptr = &nd6_ra_msg_data; + ev_msg.dv[0].data_length = sizeof (nd6_ra_msg_data); + ev_msg.dv[1].data_length = 0; kev_post_msg(&ev_msg); /* clean up for the next prefix */ - bzero(&nd6_ra_msg_data.prefix, sizeof(nd6_ra_msg_data.prefix)); + bzero(&nd6_ra_msg_data.prefix, sizeof (nd6_ra_msg_data.prefix)); itr = itr->next; nd6_ra_msg_data.list_index++; } } /* - * ND6 timer routine to expire default route list and prefix list + * Regenerate deprecated/invalidated temporary address */ -void -nd6_timer(__unused void *ignored_arg) -{ - nd6_drain(NULL); - timeout(nd6_timer, (caddr_t)0, nd6_prune * hz); -} - static int -regen_tmpaddr( - struct in6_ifaddr *ia6) /* deprecated/invalidated temporary address */ +regen_tmpaddr(struct in6_ifaddr *ia6) { struct ifaddr *ifa; struct ifnet *ifp; struct in6_ifaddr *public_ifa6 = NULL; - struct timeval timenow; - - getmicrotime(&timenow); + uint64_t timenow = net_uptime(); ifp = ia6->ia_ifa.ifa_ifp; ifnet_lock_shared(ifp); - for (ifa = ifp->if_addrlist.tqh_first; ifa; - ifa = ifa->ifa_list.tqe_next) - { + TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { struct in6_ifaddr *it6; IFA_LOCK(ifa); @@ -1153,7 +1464,7 @@ regen_tmpaddr( * a long period. */ if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && - !IFA6_IS_DEPRECATED(it6)) { + !IFA6_IS_DEPRECATED(it6, timenow)) { IFA_UNLOCK(ifa); if (public_ifa6 != NULL) IFA_REMREF(&public_ifa6->ia_ifa); @@ -1167,7 +1478,7 @@ regen_tmpaddr( * loop here, because there may be a still-preferred temporary * address with the prefix. */ - if (!IFA6_IS_DEPRECATED(it6)) { + if (!IFA6_IS_DEPRECATED(it6, timenow)) { IFA_ADDREF_LOCKED(ifa); /* for public_ifa6 */ IFA_UNLOCK(ifa); if (public_ifa6 != NULL) @@ -1182,17 +1493,17 @@ regen_tmpaddr( if (public_ifa6 != NULL) { int e; - if ((e = in6_tmpifadd(public_ifa6, 0, M_WAITOK)) != 0) { + if ((e = in6_tmpifadd(public_ifa6, 0)) != 0) { log(LOG_NOTICE, "regen_tmpaddr: failed to create a new" " tmp addr,errno=%d\n", e); IFA_REMREF(&public_ifa6->ia_ifa); - return(-1); + return (-1); } IFA_REMREF(&public_ifa6->ia_ifa); - return(0); + return (0); } - return(-1); + return (-1); } /* @@ -1200,8 +1511,7 @@ regen_tmpaddr( * ifp goes away. */ void -nd6_purge( - struct ifnet *ifp) +nd6_purge(struct ifnet *ifp) { struct llinfo_nd6 *ln; struct nd_defrouter *dr, *ndr; @@ -1259,6 +1569,7 @@ nd6_purge( NDPR_ADDREF_LOCKED(pr); prelist_remove(pr); NDPR_UNLOCK(pr); + pfxlist_onlink_check(); NDPR_REMREF(pr); } else { NDPR_UNLOCK(pr); @@ -1301,10 +1612,11 @@ again: if (rt->rt_gateway != NULL && rt->rt_gateway->sa_family == AF_LINK && SDL(rt->rt_gateway)->sdl_index == ifp->if_index) { + RT_ADDREF_LOCKED(rt); RT_UNLOCK(rt); lck_mtx_unlock(rnh_lock); /* - * See comments on nd6_timer() for reasons why + * See comments on nd6_service() for reasons why * this loop is repeated; we bite the costs of * going thru the same llinfo_nd6 more than once * here, since this purge happens during detach, @@ -1313,6 +1625,7 @@ again: * same time (thus a flag wouldn't buy anything). */ nd6_free(rt); + RT_REMREF(rt); lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); goto again; } else { @@ -1331,29 +1644,23 @@ again: * the correct variant of the relevant routing routines. */ struct rtentry * -nd6_lookup( - struct in6_addr *addr6, - int create, - struct ifnet *ifp, - int rt_locked) +nd6_lookup(struct in6_addr *addr6, int create, struct ifnet *ifp, int rt_locked) { struct rtentry *rt; struct sockaddr_in6 sin6; unsigned int ifscope; - bzero(&sin6, sizeof(sin6)); - sin6.sin6_len = sizeof(struct sockaddr_in6); + bzero(&sin6, sizeof (sin6)); + sin6.sin6_len = sizeof (struct sockaddr_in6); sin6.sin6_family = AF_INET6; sin6.sin6_addr = *addr6; ifscope = (ifp != NULL) ? ifp->if_index : IFSCOPE_NONE; if (rt_locked) { lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); - rt = rtalloc1_scoped_locked((struct sockaddr *)&sin6, - create, 0, ifscope); + rt = rtalloc1_scoped_locked(SA(&sin6), create, 0, ifscope); } else { - rt = rtalloc1_scoped((struct sockaddr *)&sin6, - create, 0, ifscope); + rt = rtalloc1_scoped(SA(&sin6), create, 0, ifscope); } if (rt != NULL) { @@ -1388,9 +1695,9 @@ nd6_lookup( * This hack is necessary for a neighbor which can't * be covered by our own prefix. */ - ifa = ifaof_ifpforaddr((struct sockaddr *)&sin6, ifp); + ifa = ifaof_ifpforaddr(SA(&sin6), ifp); if (ifa == NULL) - return(NULL); + return (NULL); /* * Create a new route. RTF_LLINFO is necessary @@ -1404,8 +1711,7 @@ nd6_lookup( ifa_flags = ifa->ifa_flags; IFA_UNLOCK(ifa); if ((e = rtrequest_scoped_locked(RTM_ADD, - (struct sockaddr *)&sin6, ifa->ifa_addr, - (struct sockaddr *)&all1_sa, + SA(&sin6), ifa->ifa_addr, SA(&all1_sa), (ifa_flags | RTF_HOST | RTF_LLINFO) & ~RTF_CLONING, &rt, ifscope)) != 0) { if (e != EEXIST) @@ -1417,7 +1723,7 @@ nd6_lookup( lck_mtx_unlock(rnh_lock); IFA_REMREF(ifa); if (rt == NULL) - return(NULL); + return (NULL); RT_LOCK(rt); if (rt->rt_llinfo) { @@ -1425,7 +1731,7 @@ nd6_lookup( ln->ln_state = ND6_LLINFO_NOSTATE; } } else { - return(NULL); + return (NULL); } } RT_LOCK_ASSERT_HELD(rt); @@ -1436,9 +1742,9 @@ nd6_lookup( * route to a p2p interface) may have the flag, too, while the * destination is not actually a neighbor. * XXX: we can't use rt->rt_ifp to check for the interface, since - * it might be the loopback interface if the entry is for our - * own address on a non-loopback interface. Instead, we should - * use rt->rt_ifa->ifa_ifp, which would specify the REAL + * it might be the loopback interface if the entry is for our + * own address on a non-loopback interface. Instead, we should + * use rt->rt_ifa->ifa_ifp, which would specify the REAL * interface. * Note also that ifa_ifp and ifp may differ when we connect two * interfaces to a same link, install a link prefix to an interface, @@ -1451,7 +1757,7 @@ nd6_lookup( * hence the test for RTF_PROXY. */ if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 || - rt->rt_gateway->sa_family != AF_LINK || rt->rt_llinfo == NULL || + rt->rt_gateway->sa_family != AF_LINK || rt->rt_llinfo == NULL || (ifp && rt->rt_ifa->ifa_ifp != ifp && !(rt->rt_flags & RTF_PROXY))) { RT_REMREF_LOCKED(rt); @@ -1462,12 +1768,12 @@ nd6_lookup( ifp ? if_name(ifp) : "unspec"); /* xxx more logs... kazu */ } - return(NULL); + return (NULL); } /* * Caller needs to release reference and call RT_UNLOCK(rt). */ - return(rt); + return (rt); } /* @@ -1476,9 +1782,7 @@ nd6_lookup( * to not reenter the routing code from within itself. */ static int -nd6_is_new_addr_neighbor( - struct sockaddr_in6 *addr, - struct ifnet *ifp) +nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) { struct nd_prefix *pr; struct ifaddr *dstaddr; @@ -1536,7 +1840,7 @@ nd6_is_new_addr_neighbor( * If the address is assigned on the node of the other side of * a p2p interface, the address should be a neighbor. */ - dstaddr = ifa_ifwithdstaddr((struct sockaddr *)addr); + dstaddr = ifa_ifwithdstaddr(SA(addr)); if (dstaddr != NULL) { if (dstaddr->ifa_ifp == ifp) { IFA_REMREF(dstaddr); @@ -1570,7 +1874,8 @@ nd6_is_new_addr_neighbor( * XXX: should take care of the destination of a p2p link? */ int -nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp, int rt_locked) +nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp, + int rt_locked) { struct rtentry *rt; @@ -1603,8 +1908,7 @@ nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp, int rt_locked * that the change is safe. */ void -nd6_free( - struct rtentry *rt) +nd6_free(struct rtentry *rt) { struct llinfo_nd6 *ln; struct in6_addr in6; @@ -1617,7 +1921,7 @@ nd6_free( RT_LOCK(rt); RT_ADDREF_LOCKED(rt); /* Extra ref */ ln = rt->rt_llinfo; - in6 = ((struct sockaddr_in6 *)(void *)rt_key(rt))->sin6_addr; + in6 = SIN6(rt_key(rt))->sin6_addr; /* * Prevent another thread from modifying rt_key, rt_gateway @@ -1632,8 +1936,7 @@ nd6_free( * selection even when we are a router, if Scoped Routing is enabled. */ if (ip6_doscopedroute || !ip6_forwarding) { - dr = defrouter_lookup(&((struct sockaddr_in6 *)(void *) - rt_key(rt))->sin6_addr, rt->rt_ifp); + dr = defrouter_lookup(&SIN6(rt_key(rt))->sin6_addr, rt->rt_ifp); if ((ln && ln->ln_router) || dr) { /* @@ -1692,105 +1995,41 @@ nd6_free( * caches, and disable the route entry not to be used in already * cached routes. */ - (void) rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, - rt_mask(rt), 0, (struct rtentry **)0); + (void) rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), 0, NULL); /* Extra ref held above; now free it */ rtfree(rt); } -/* - * Upper-layer reachability hint for Neighbor Unreachability Detection. - * - * XXX cost-effective methods? - */ void -nd6_nud_hint( - struct rtentry *rt, - struct in6_addr *dst6, - int force) -{ - struct llinfo_nd6 *ln; - struct timeval timenow; - - getmicrotime(&timenow); - - /* - * If the caller specified "rt", use that. Otherwise, resolve the - * routing table by supplied "dst6". - */ - if (!rt) { - if (!dst6) - return; - /* Callee returns a locked route upon success */ - if ((rt = nd6_lookup(dst6, 0, NULL, 0)) == NULL) - return; - RT_LOCK_ASSERT_HELD(rt); - } else { - RT_LOCK(rt); - RT_ADDREF_LOCKED(rt); - } - - if ((rt->rt_flags & RTF_GATEWAY) != 0 || - (rt->rt_flags & RTF_LLINFO) == 0 || - !rt->rt_llinfo || !rt->rt_gateway || - rt->rt_gateway->sa_family != AF_LINK) { - /* This is not a host route. */ - goto done; - } - - ln = rt->rt_llinfo; - if (ln->ln_state < ND6_LLINFO_REACHABLE) - goto done; - - /* - * if we get upper-layer reachability confirmation many times, - * it is possible we have false information. - */ - if (!force) { - ln->ln_byhint++; - if (ln->ln_byhint > nd6_maxnudhint) - goto done; - } - - ln->ln_state = ND6_LLINFO_REACHABLE; - if (ln->ln_expire) { - struct nd_ifinfo *ndi; - - lck_rw_lock_shared(nd_if_rwlock); - ndi = ND_IFINFO(rt->rt_ifp); - VERIFY(ndi != NULL && ndi->initialized); - lck_mtx_lock(&ndi->lock); - ln->ln_expire = timenow.tv_sec + ndi->reachable; - lck_mtx_unlock(&ndi->lock); - lck_rw_done(nd_if_rwlock); - } -done: - RT_REMREF_LOCKED(rt); - RT_UNLOCK(rt); -} - -void -nd6_rtrequest( - int req, - struct rtentry *rt, - __unused struct sockaddr *sa) +nd6_rtrequest(int req, struct rtentry *rt, struct sockaddr *sa) { +#pragma unused(sa) struct sockaddr *gate = rt->rt_gateway; struct llinfo_nd6 *ln = rt->rt_llinfo; - static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK, 0, 0, 0, 0, 0, - {0,0,0,0,0,0,0,0,0,0,0,0,} }; + static struct sockaddr_dl null_sdl = + { .sdl_len = sizeof (null_sdl), .sdl_family = AF_LINK }; struct ifnet *ifp = rt->rt_ifp; struct ifaddr *ifa; - struct timeval timenow; + uint64_t timenow; + char buf[MAX_IPv6_STR_LEN]; + VERIFY(nd6_init_done); lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); RT_LOCK_ASSERT_HELD(rt); - if ((rt->rt_flags & RTF_GATEWAY)) + /* + * We have rnh_lock held, see if we need to schedule the timer; + * we might do this again below during RTM_RESOLVE, but doing it + * now handles all other cases. + */ + if (nd6_sched_timeout_want) + nd6_sched_timeout(NULL, NULL); + + if (rt->rt_flags & RTF_GATEWAY) return; - if (nd6_need_cache(ifp) == 0 && (rt->rt_flags & RTF_HOST) == 0) { + if (!nd6_need_cache(ifp) && !(rt->rt_flags & RTF_HOST)) { /* * This is probably an interface direct route for a link * which does not need neighbor caches (e.g. fe80::%lo0/64). @@ -1842,15 +2081,16 @@ nd6_rtrequest( } } - getmicrotime(&timenow); + timenow = net_uptime(); + switch (req) { case RTM_ADD: /* * There is no backward compatibility :) * * if ((rt->rt_flags & RTF_HOST) == 0 && - * SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff) - * rt->rt_flags |= RTF_CLONING; + * SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff) + * rt->rt_flags |= RTF_CLONING; */ if ((rt->rt_flags & RTF_CLONING) || ((rt->rt_flags & RTF_LLINFO) && ln == NULL)) { @@ -1860,8 +2100,7 @@ nd6_rtrequest( * treated as on-link but is currently not * (RTF_LLINFO && ln == NULL case). */ - if (rt_setgate(rt, rt_key(rt), - (struct sockaddr *)&null_sdl) == 0) { + if (rt_setgate(rt, rt_key(rt), SA(&null_sdl)) == 0) { gate = rt->rt_gateway; SDL(gate)->sdl_type = ifp->if_type; SDL(gate)->sdl_index = ifp->if_index; @@ -1869,12 +2108,13 @@ nd6_rtrequest( * In case we're called before 1.0 sec. * has elapsed. */ - if (ln != NULL) - ln->ln_expire = + if (ln != NULL) { + ln_setexpire(ln, (ifp->if_eflags & IFEF_IPV6_ND6ALT) - ? 0 : MAX(timenow.tv_sec, 1); + ? 0 : MAX(timenow, 1)); + } } - if ((rt->rt_flags & RTF_CLONING)) + if (rt->rt_flags & RTF_CLONING) break; } /* @@ -1883,32 +2123,36 @@ nd6_rtrequest( * * There are also couple of other things to be discussed: * - unsolicited NA code needs improvement beforehand - * - RFC2461 says we MAY send multicast unsolicited NA + * - RFC4861 says we MAY send multicast unsolicited NA * (7.2.6 paragraph 4), however, it also says that we * SHOULD provide a mechanism to prevent multicast NA storm. * we don't have anything like it right now. * note that the mechanism needs a mutual agreement * between proxies, which means that we need to implement * a new protocol, or a new kludge. - * - from RFC2461 6.2.4, host MUST NOT send an unsolicited NA. + * - from RFC4861 6.2.4, host MUST NOT send an unsolicited RA. * we need to check ip6forwarding before sending it. * (or should we allow proxy ND configuration only for * routers? there's no mention about proxy ND from hosts) */ /* FALLTHROUGH */ case RTM_RESOLVE: - if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) == 0) { + if (!(ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK))) { /* * Address resolution isn't necessary for a point to * point link, so we can skip this test for a p2p link. */ if (gate->sa_family != AF_LINK || - gate->sa_len < sizeof(null_sdl)) { + gate->sa_len < sizeof (null_sdl)) { /* Don't complain in case of RTM_ADD */ if (req == RTM_RESOLVE) { - log(LOG_DEBUG, - "nd6_rtrequest: bad gateway " - "value: %s\n", if_name(ifp)); + log(LOG_ERR, "%s: route to %s has bad " + "gateway address (sa_family %u " + "sa_len %u) on %s\n", __func__, + inet_ntop(AF_INET6, + &SIN6(rt_key(rt))->sin6_addr, buf, + sizeof (buf)), gate->sa_family, + gate->sa_len, if_name(ifp)); } break; } @@ -1921,42 +2165,41 @@ nd6_rtrequest( * Case 2: This route may come from cloning, or a manual route * add with a LL address. */ - rt->rt_llinfo = ln = nd6_llinfo_alloc(); - if (ln == NULL) { - log(LOG_DEBUG, "nd6_rtrequest: malloc failed\n"); + rt->rt_llinfo = ln = nd6_llinfo_alloc(M_WAITOK); + if (ln == NULL) break; - } - rt->rt_llinfo_get_ri = nd6_llinfo_get_ri; - rt->rt_llinfo_get_iflri = nd6_llinfo_get_iflri; - rt->rt_llinfo_purge = nd6_llinfo_purge; - rt->rt_llinfo_free = nd6_llinfo_free; - nd6_inuse++; nd6_allocated++; - Bzero(ln, sizeof(*ln)); + rt->rt_llinfo_get_ri = nd6_llinfo_get_ri; + rt->rt_llinfo_get_iflri = nd6_llinfo_get_iflri; + rt->rt_llinfo_purge = nd6_llinfo_purge; + rt->rt_llinfo_free = nd6_llinfo_free; + rt->rt_flags |= RTF_LLINFO; ln->ln_rt = rt; /* this is required for "ndp" command. - shin */ if (req == RTM_ADD) { - /* + /* * gate should have some valid AF_LINK entry, * and ln->ln_expire should have some lifetime * which is specified by ndp command. */ ln->ln_state = ND6_LLINFO_REACHABLE; - ln->ln_byhint = 0; } else { - /* + /* * When req == RTM_RESOLVE, rt is created and * initialized in rtrequest(), so rt_expire is 0. */ ln->ln_state = ND6_LLINFO_NOSTATE; - + /* In case we're called before 1.0 sec. has elapsed */ - ln->ln_expire = (ifp->if_eflags & IFEF_IPV6_ND6ALT) - ? 0 : MAX(timenow.tv_sec, 1); + ln_setexpire(ln, (ifp->if_eflags & IFEF_IPV6_ND6ALT) ? + 0 : MAX(timenow, 1)); } - rt->rt_flags |= RTF_LLINFO; LN_INSERTHEAD(ln); + nd6_inuse++; + + /* We have at least one entry; arm the timer if not already */ + nd6_sched_timeout(NULL, NULL); /* * If we have too many cache entries, initiate immediate @@ -1965,7 +2208,7 @@ nd6_rtrequest( * cause re-entering rtable related routines triggering an LOR * problem. */ - if (ip6_neighborgcthresh >= 0 && + if (ip6_neighborgcthresh > 0 && nd6_inuse >= ip6_neighborgcthresh) { int i; @@ -1986,7 +2229,7 @@ nd6_rtrequest( ln_end->ln_state = ND6_LLINFO_STALE; else ln_end->ln_state = ND6_LLINFO_PURGE; - ln_end->ln_expire = timenow.tv_sec; + ln_setexpire(ln_end, timenow); RT_UNLOCK(rt_end); } } @@ -1996,13 +2239,12 @@ nd6_rtrequest( * to the interface. */ ifa = (struct ifaddr *)in6ifa_ifpwithaddr(rt->rt_ifp, - &SIN6(rt_key(rt))->sin6_addr); - if (ifa) { + &SIN6(rt_key(rt))->sin6_addr); + if (ifa != NULL) { caddr_t macp = nd6_ifptomac(ifp); - ln->ln_expire = 0; + ln_setexpire(ln, 0); ln->ln_state = ND6_LLINFO_REACHABLE; - ln->ln_byhint = 0; - if (macp) { + if (macp != NULL) { Bcopy(macp, LLADDR(SDL(gate)), ifp->if_addrlen); SDL(gate)->sdl_alen = ifp->if_addrlen; } @@ -2020,10 +2262,17 @@ nd6_rtrequest( */ if (rt->rt_if_ref_fn != NULL) { rt->rt_if_ref_fn(lo_ifp, 1); - rt->rt_if_ref_fn(rt->rt_ifp, -1); + rt->rt_if_ref_fn(rt->rt_ifp, + -1); } } - rt->rt_ifp = lo_ifp; /* XXX */ + rt->rt_ifp = lo_ifp; + /* + * If rmx_mtu is not locked, update it + * to the MTU used by the new interface. + */ + if (!(rt->rt_rmx.rmx_locks & RTV_MTU)) + rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; /* * Make sure rt_ifa be equal to the ifaddr * corresponding to the address. @@ -2038,9 +2287,8 @@ nd6_rtrequest( } IFA_REMREF(ifa); } else if (rt->rt_flags & RTF_ANNOUNCE) { - ln->ln_expire = 0; + ln_setexpire(ln, 0); ln->ln_state = ND6_LLINFO_REACHABLE; - ln->ln_byhint = 0; /* join solicited node multicast for proxy ND */ if (ifp->if_flags & IFF_MULTICAST) { @@ -2055,7 +2303,8 @@ nd6_rtrequest( llsol.s6_addr8[12] = 0xff; if (in6_setscope(&llsol, ifp, NULL)) break; - error = in6_mc_join(ifp, &llsol, NULL, &in6m, 0); + error = in6_mc_join(ifp, &llsol, + NULL, &in6m, 0); if (error) { nd6log((LOG_ERR, "%s: failed to join " "%s (errno=%d)\n", if_name(ifp), @@ -2071,8 +2320,8 @@ nd6_rtrequest( if (ln == NULL) break; /* leave from solicited node multicast for proxy ND */ - if ((rt->rt_flags & RTF_ANNOUNCE) != 0 && - (ifp->if_flags & IFF_MULTICAST) != 0) { + if ((rt->rt_flags & RTF_ANNOUNCE) && + (ifp->if_flags & IFF_MULTICAST)) { struct in6_addr llsol; struct in6_multi *in6m; @@ -2139,7 +2388,8 @@ nd6_siocgdrlst(void *data, int data_is_64) while (dr && i < DRLSTSIZ) { drl_64->defrouter[i].rtaddr = dr->rtaddr; - if (IN6_IS_ADDR_LINKLOCAL(&drl_64->defrouter[i].rtaddr)) { + if (IN6_IS_ADDR_LINKLOCAL( + &drl_64->defrouter[i].rtaddr)) { /* XXX: need to this hack for KAME stack */ drl_64->defrouter[i].rtaddr.s6_addr16[1] = 0; } else { @@ -2150,7 +2400,7 @@ nd6_siocgdrlst(void *data, int data_is_64) } drl_64->defrouter[i].flags = dr->flags; drl_64->defrouter[i].rtlifetime = dr->rtlifetime; - drl_64->defrouter[i].expire = dr->expire; + drl_64->defrouter[i].expire = nddr_getexpire(dr); drl_64->defrouter[i].if_index = dr->ifp->if_index; i++; dr = TAILQ_NEXT(dr, dr_entry); @@ -2168,7 +2418,7 @@ nd6_siocgdrlst(void *data, int data_is_64) /* preserve the interface name */ bcopy(data, drl_32, sizeof (drl_32->ifname)); - while (dr && i < DRLSTSIZ) { + while (dr != NULL && i < DRLSTSIZ) { drl_32->defrouter[i].rtaddr = dr->rtaddr; if (IN6_IS_ADDR_LINKLOCAL(&drl_32->defrouter[i].rtaddr)) { /* XXX: need to this hack for KAME stack */ @@ -2181,7 +2431,7 @@ nd6_siocgdrlst(void *data, int data_is_64) } drl_32->defrouter[i].flags = dr->flags; drl_32->defrouter[i].rtlifetime = dr->rtlifetime; - drl_32->defrouter[i].expire = dr->expire; + drl_32->defrouter[i].expire = nddr_getexpire(dr); drl_32->defrouter[i].if_index = dr->ifp->if_index; i++; dr = TAILQ_NEXT(dr, dr_entry); @@ -2230,13 +2480,13 @@ nd6_siocgprlst(void *data, int data_is_64) prl_64->prefix[i].vltime = pr->ndpr_vltime; prl_64->prefix[i].pltime = pr->ndpr_pltime; prl_64->prefix[i].if_index = pr->ndpr_ifp->if_index; - prl_64->prefix[i].expire = pr->ndpr_expire; + prl_64->prefix[i].expire = ndpr_getexpire(pr); pfr = pr->ndpr_advrtrs.lh_first; j = 0; while (pfr) { if (j < DRLSTSIZ) { -#define RTRADDR prl_64->prefix[i].advrtr[j] +#define RTRADDR prl_64->prefix[i].advrtr[j] RTRADDR = pfr->router->rtaddr; if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) { /* XXX: hack for KAME */ @@ -2285,13 +2535,13 @@ nd6_siocgprlst(void *data, int data_is_64) prl_32->prefix[i].vltime = pr->ndpr_vltime; prl_32->prefix[i].pltime = pr->ndpr_pltime; prl_32->prefix[i].if_index = pr->ndpr_ifp->if_index; - prl_32->prefix[i].expire = pr->ndpr_expire; + prl_32->prefix[i].expire = ndpr_getexpire(pr); pfr = pr->ndpr_advrtrs.lh_first; j = 0; while (pfr) { if (j < DRLSTSIZ) { -#define RTRADDR prl_32->prefix[i].advrtr[j] +#define RTRADDR prl_32->prefix[i].advrtr[j] RTRADDR = pfr->router->rtaddr; if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) { /* XXX: hack for KAME */ @@ -2326,7 +2576,10 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) struct nd_defrouter *dr; struct nd_prefix *pr; struct rtentry *rt; - int i = ifp->if_index, error = 0; + int i, error = 0; + + VERIFY(ifp != NULL); + i = ifp->if_index; switch (cmd) { case SIOCGDRLST_IN6_32: /* struct in6_drlist_32 */ @@ -2469,7 +2722,8 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) in6_purgeaddr(&ia->ia_ifa); IFA_REMREF(&ia->ia_ifa); lck_mtx_lock(nd6_mutex); - lck_rw_lock_exclusive(&in6_ifaddr_rwlock); + lck_rw_lock_exclusive( + &in6_ifaddr_rwlock); /* * Purging the address caused * in6_ifaddr_rwlock to be @@ -2490,6 +2744,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) NDPR_LOCK(pr); prelist_remove(pr); NDPR_UNLOCK(pr); + pfxlist_onlink_check(); /* * If we were trying to restart this loop * above by changing the value of 'next', we might @@ -2538,7 +2793,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) nb_addr = nbi_32.addr; /* * XXX: KAME specific hack for scoped addresses - * XXXX: for other scopes than link-local? + * XXXX: for other scopes than link-local? */ if (IN6_IS_ADDR_LINKLOCAL(&nbi_32.addr) || IN6_IS_ADDR_MC_LINKLOCAL(&nbi_32.addr)) { @@ -2559,7 +2814,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) nbi_32.state = ln->ln_state; nbi_32.asked = ln->ln_asked; nbi_32.isrouter = ln->ln_router; - nbi_32.expire = ln->ln_expire; + nbi_32.expire = ln_getexpire(ln); RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); bcopy(&nbi_32, data, sizeof (nbi_32)); @@ -2575,7 +2830,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) nb_addr = nbi_64.addr; /* * XXX: KAME specific hack for scoped addresses - * XXXX: for other scopes than link-local? + * XXXX: for other scopes than link-local? */ if (IN6_IS_ADDR_LINKLOCAL(&nbi_64.addr) || IN6_IS_ADDR_MC_LINKLOCAL(&nbi_64.addr)) { @@ -2596,7 +2851,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) nbi_64.state = ln->ln_state; nbi_64.asked = ln->ln_asked; nbi_64.isrouter = ln->ln_router; - nbi_64.expire = ln->ln_expire; + nbi_64.expire = ln_getexpire(ln); RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); bcopy(&nbi_64, data, sizeof (nbi_64)); @@ -2649,14 +2904,10 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) * on reception of inbound ND6 packets. (RS/RA/NS/redirect) */ void -nd6_cache_lladdr( - struct ifnet *ifp, - struct in6_addr *from, - char *lladdr, - __unused int lladdrlen, - int type, /* ICMP6 type */ - int code) /* type dependent information */ +nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, + int lladdrlen, int type, int code) { +#pragma unused(lladdrlen) struct rtentry *rt = NULL; struct llinfo_nd6 *ln = NULL; int is_newentry; @@ -2665,7 +2916,8 @@ nd6_cache_lladdr( int olladdr; int llchange; int newstate = 0; - struct timeval timenow; + uint64_t timenow; + boolean_t sched_timeout = FALSE; if (ifp == NULL) panic("ifp == NULL in nd6_cache_lladdr"); @@ -2679,13 +2931,8 @@ nd6_cache_lladdr( /* * Validation about ifp->if_addrlen and lladdrlen must be done in * the caller. - * - * XXX If the link does not have link-layer adderss, what should - * we do? (ifp->if_addrlen == 0) - * Spec says nothing in sections for RA, RS and NA. There's small - * description on it in NS section (RFC 2461 7.2.3). */ - getmicrotime(&timenow); + timenow = net_uptime(); rt = nd6_lookup(from, 0, ifp, 0); if (rt == NULL) { @@ -2783,7 +3030,7 @@ fail: * we must set the timer now, although it is actually * meaningless. */ - ln->ln_expire = timenow.tv_sec + nd6_gctimer; + ln_setexpire(ln, timenow + nd6_gctimer); ln->ln_hold = NULL; if (m != NULL) { @@ -2800,7 +3047,8 @@ fail: } } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { /* probe right away */ - ln->ln_expire = timenow.tv_sec; + ln_setexpire(ln, timenow); + sched_timeout = TRUE; } } @@ -2821,15 +3069,15 @@ fail: * neighbor cache, this is similar to (6). * This case is rare but we figured that we MUST NOT set IsRouter. * - * newentry olladdr lladdr llchange NS RS RA redir - * D R - * 0 n n -- (1) c ? s - * 0 y n -- (2) c s s - * 0 n y -- (3) c s s - * 0 y y n (4) c s s - * 0 y y y (5) c s s - * 1 -- n -- (6) c c c s - * 1 -- y -- (7) c c s c s + * newentry olladdr lladdr llchange NS RS RA redir + * D R + * 0 n n -- (1) c ? s + * 0 y n -- (2) c s s + * 0 n y -- (3) c s s + * 0 y y n (4) c s s + * 0 y y y (5) c s s + * 1 -- n -- (6) c c c s + * 1 -- y -- (7) c c s c s * * (c=clear s=set) */ @@ -2843,9 +3091,9 @@ fail: break; case ND_REDIRECT: /* - * If the icmp is a redirect to a better router, always set the - * is_router flag. Otherwise, if the entry is newly created, - * clear the flag. [RFC 2461, sec 8.3] + * If the ICMP message is a Redirect to a better router, always + * set the is_router flag. Otherwise, if the entry is newly + * created, then clear the flag. [RFC 4861, sec 8.3] */ if (code == ND_REDIRECT_ROUTER) ln->ln_router = 1; @@ -2890,12 +3138,17 @@ fail: RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); } + if (sched_timeout) { + lck_mtx_lock(rnh_lock); + nd6_sched_timeout(NULL, NULL); + lck_mtx_unlock(rnh_lock); + } } static void -nd6_slowtimo( - __unused void *ignored_arg) +nd6_slowtimo(void *arg) { +#pragma unused(arg) int i; struct nd_ifinfo *nd6if; @@ -2913,18 +3166,19 @@ nd6_slowtimo( * Since reachable time rarely changes by router * advertisements, we SHOULD insure that a new random * value gets recomputed at least once every few hours. - * (RFC 2461, 6.3.4) + * (RFC 4861, 6.3.4) */ nd6if->recalctm = nd6_recalc_reachtm_interval; - nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable); + nd6if->reachable = + ND_COMPUTE_RTIME(nd6if->basereachable); } lck_mtx_unlock(&nd6if->lock); } lck_rw_done(nd_if_rwlock); - timeout(nd6_slowtimo, (caddr_t)0, ND6_SLOWTIMER_INTERVAL * hz); + timeout(nd6_slowtimo, NULL, ND6_SLOWTIMER_INTERVAL * hz); } -#define senderr(e) { error = (e); goto bad;} +#define senderr(e) { error = (e); goto bad; } int nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, struct sockaddr_in6 *dst, struct rtentry *hint0, struct flowadv *adv) @@ -2933,7 +3187,7 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, struct rtentry *rt = hint0, *hint = hint0; struct llinfo_nd6 *ln = NULL; int error = 0; - struct timeval timenow; + uint64_t timenow; struct rtentry *rtrele = NULL; struct nd_ifinfo *ndi; @@ -2973,8 +3227,8 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, if (!(rt->rt_flags & RTF_UP)) { RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); - if ((hint = rt = rtalloc1_scoped((struct sockaddr *)dst, - 1, 0, ifp->if_index)) != NULL) { + if ((hint = rt = rtalloc1_scoped(SA(dst), 1, 0, + ifp->if_index)) != NULL) { RT_LOCK_SPIN(rt); if (rt->rt_ifp != ifp) { /* XXX: loop care? */ @@ -3024,7 +3278,7 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, } RT_LOCK_SPIN(rt); - gw6 = *((struct sockaddr_in6 *)(void *)rt->rt_gateway); + gw6 = *(SIN6(rt->rt_gateway)); /* If hint is now down, give up */ if (!(rt->rt_flags & RTF_UP)) { @@ -3053,8 +3307,7 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, rtfree(gwrt); lookup: lck_mtx_lock(rnh_lock); - gwrt = rtalloc1_scoped_locked( - (struct sockaddr *)&gw6, 1, 0, + gwrt = rtalloc1_scoped_locked(SA(&gw6), 1, 0, ifp->if_index); RT_LOCK(rt); @@ -3193,8 +3446,10 @@ lookup: lck_rw_done(nd_if_rwlock); log(LOG_DEBUG, "nd6_output: can't allocate llinfo for %s " - "(ln=%p, rt=%p)\n", - ip6_sprintf(&dst->sin6_addr), ln, rt); + "(ln=0x%llx, rt=0x%llx)\n", + ip6_sprintf(&dst->sin6_addr), + (uint64_t)VM_KERNEL_ADDRPERM(ln), + (uint64_t)VM_KERNEL_ADDRPERM(rt)); senderr(EIO); /* XXX: good error? */ } lck_mtx_unlock(&ndi->lock); @@ -3203,13 +3458,14 @@ lookup: goto sendpkt; /* send anyway */ } - getmicrotime(&timenow); + net_update_uptime(); + timenow = net_uptime(); /* We don't have to do link-layer address resolution on a p2p link. */ if ((ifp->if_flags & IFF_POINTOPOINT) != 0 && ln->ln_state < ND6_LLINFO_REACHABLE) { ln->ln_state = ND6_LLINFO_STALE; - ln->ln_expire = rt_expiry(rt, timenow.tv_sec, nd6_gctimer); + ln_setexpire(ln, timenow + nd6_gctimer); } /* @@ -3217,12 +3473,14 @@ lookup: * STALE, we have to change the state to DELAY and a sets a timer to * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do * neighbor unreachability detection on expiration. - * (RFC 2461 7.3.3) + * (RFC 4861 7.3.3) */ if (ln->ln_state == ND6_LLINFO_STALE) { ln->ln_asked = 0; ln->ln_state = ND6_LLINFO_DELAY; - ln->ln_expire = rt_expiry(rt, timenow.tv_sec, nd6_delay); + ln_setexpire(ln, timenow + nd6_delay); + /* N.B.: we will re-arm the timer below. */ + _CASSERT(ND6_LLINFO_DELAY > ND6_LLINFO_INCOMPLETE); } /* @@ -3235,26 +3493,46 @@ lookup: /* * Move this entry to the head of the queue so that it is * less likely for this entry to be a target of forced - * garbage collection (see nd6_rtrequest()). + * garbage collection (see nd6_rtrequest()). Do this only + * if the entry is non-permanent (as permanent ones will + * never be purged), and if the number of active entries + * is at least half of the threshold. */ - lck_mtx_lock(rnh_lock); - RT_LOCK_SPIN(rt); - if (ln->ln_flags & ND6_LNF_IN_USE) { - LN_DEQUEUE(ln); - LN_INSERTHEAD(ln); + if (ln->ln_state == ND6_LLINFO_DELAY || + (ln->ln_expire != 0 && ip6_neighborgcthresh > 0 && + nd6_inuse >= (ip6_neighborgcthresh >> 1))) { + lck_mtx_lock(rnh_lock); + if (ln->ln_state == ND6_LLINFO_DELAY) + nd6_sched_timeout(NULL, NULL); + if (ln->ln_expire != 0 && ip6_neighborgcthresh > 0 && + nd6_inuse >= (ip6_neighborgcthresh >> 1)) { + RT_LOCK_SPIN(rt); + if (ln->ln_flags & ND6_LNF_IN_USE) { + LN_DEQUEUE(ln); + LN_INSERTHEAD(ln); + } + RT_UNLOCK(rt); + } + lck_mtx_unlock(rnh_lock); } - RT_UNLOCK(rt); - lck_mtx_unlock(rnh_lock); goto sendpkt; } + /* + * If this is a prefix proxy route, record the inbound interface + * so that it can be excluded from the list of interfaces eligible + * for forwarding the proxied NS in nd6_prproxy_ns_output(). + */ + if (rt->rt_flags & RTF_PROXY) + ln->ln_exclifp = ((origifp == ifp) ? NULL : origifp); + /* * There is a neighbor cache entry, but no ethernet address * response yet. Replace the held mbuf (if any) with this * latest one. * * This code conforms to the rate-limiting rule described in Section - * 7.2.2 of RFC 2461, because the timer is set correctly after sending + * 7.2.2 of RFC 4861, because the timer is set correctly after sending * an NS below. */ if (ln->ln_state == ND6_LLINFO_NOSTATE) @@ -3262,47 +3540,56 @@ lookup: if (ln->ln_hold) m_freem(ln->ln_hold); ln->ln_hold = m; - if (ln->ln_expire && ln->ln_asked < nd6_mmaxtries && - ln->ln_expire < timenow.tv_sec) { + if (ln->ln_expire != 0 && ln->ln_asked < nd6_mmaxtries && + ln->ln_expire <= timenow) { ln->ln_asked++; lck_rw_lock_shared(nd_if_rwlock); ndi = ND_IFINFO(ifp); VERIFY(ndi != NULL && ndi->initialized); lck_mtx_lock(&ndi->lock); - ln->ln_expire = timenow.tv_sec + ndi->retrans / 1000; + ln_setexpire(ln, timenow + ndi->retrans / 1000); lck_mtx_unlock(&ndi->lock); lck_rw_done(nd_if_rwlock); RT_UNLOCK(rt); /* We still have a reference on rt (for ln) */ if (ip6_forwarding) - nd6_prproxy_ns_output(ifp, NULL, &dst->sin6_addr, ln); + nd6_prproxy_ns_output(ifp, origifp, NULL, + &dst->sin6_addr, ln); else nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0); + lck_mtx_lock(rnh_lock); + nd6_sched_timeout(NULL, NULL); + lck_mtx_unlock(rnh_lock); } else { RT_UNLOCK(rt); } /* * Move this entry to the head of the queue so that it is * less likely for this entry to be a target of forced - * garbage collection (see nd6_rtrequest()). + * garbage collection (see nd6_rtrequest()). Do this only + * if the entry is non-permanent (as permanent ones will + * never be purged), and if the number of active entries + * is at least half of the threshold. */ - lck_mtx_lock(rnh_lock); - RT_LOCK_SPIN(rt); - if (ln->ln_flags & ND6_LNF_IN_USE) { - LN_DEQUEUE(ln); - LN_INSERTHEAD(ln); - } - /* Clean up "rt" now while we can */ - if (rt == hint0) { - RT_REMREF_LOCKED(rt); - RT_UNLOCK(rt); - } else { - RT_UNLOCK(rt); - rtfree_locked(rt); + if (ln->ln_expire != 0 && ip6_neighborgcthresh > 0 && + nd6_inuse >= (ip6_neighborgcthresh >> 1)) { + lck_mtx_lock(rnh_lock); + RT_LOCK_SPIN(rt); + if (ln->ln_flags & ND6_LNF_IN_USE) { + LN_DEQUEUE(ln); + LN_INSERTHEAD(ln); + } + /* Clean up "rt" now while we can */ + if (rt == hint0) { + RT_REMREF_LOCKED(rt); + RT_UNLOCK(rt); + } else { + RT_UNLOCK(rt); + rtfree_locked(rt); + } + rt = NULL; /* "rt" has been taken care of */ + lck_mtx_unlock(rnh_lock); } - rt = NULL; /* "rt" has been taken care of */ - lck_mtx_unlock(rnh_lock); - error = 0; goto release; @@ -3311,39 +3598,25 @@ sendpkt: RT_LOCK_ASSERT_NOTHELD(rt); /* discard the packet if IPv6 operation is disabled on the interface */ - lck_rw_lock_shared(nd_if_rwlock); - ndi = ND_IFINFO(ifp); - VERIFY(ndi != NULL && ndi->initialized); - /* test is done here without holding ndi lock, for performance */ - if (ndi->flags & ND6_IFF_IFDISABLED) { - lck_rw_done(nd_if_rwlock); + if (ifp->if_eflags & IFEF_IPV6_DISABLED) { error = ENETDOWN; /* better error? */ goto bad; } - lck_rw_done(nd_if_rwlock); - if ((ifp->if_flags & IFF_LOOPBACK) != 0) { + if (ifp->if_flags & IFF_LOOPBACK) { /* forwarding rules require the original scope_id */ m->m_pkthdr.rcvif = origifp; error = dlil_output(origifp, PF_INET6, m, (caddr_t)rt, - (struct sockaddr *)dst, 0, adv); + SA(dst), 0, adv); goto release; } else { /* Do not allow loopback address to wind up on a wire */ struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); if ((IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) || - IN6_IS_ADDR_LOOPBACK(&ip6->ip6_dst))) { + IN6_IS_ADDR_LOOPBACK(&ip6->ip6_dst))) { ip6stat.ip6s_badscope++; - /* - * Do not simply drop the packet just like a - * firewall -- we want the the application to feel - * the pain. Return ENETUNREACH like ip6_output - * does in some similar cases. This can startle - * the otherwise clueless process that specifies - * loopback as the source address. - */ - error = ENETUNREACH; + error = EADDRNOTAVAIL; goto bad; } } @@ -3356,12 +3629,20 @@ sendpkt: RT_UNLOCK(rt); } - if (hint && nstat_collect) - nstat_route_tx(hint, 1, m->m_pkthdr.len, 0); + if (hint != NULL && nstat_collect) { + int scnt; + + if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) && + (m->m_pkthdr.tso_segsz > 0)) + scnt = m->m_pkthdr.len / m->m_pkthdr.tso_segsz; + else + scnt = 1; + + nstat_route_tx(hint, scnt, m->m_pkthdr.len, 0); + } m->m_pkthdr.rcvif = NULL; - error = dlil_output(ifp, PF_INET6, m, (caddr_t)rt, - (struct sockaddr *)dst, 0, adv); + error = dlil_output(ifp, PF_INET6, m, (caddr_t)rt, SA(dst), 0, adv); goto release; bad: @@ -3396,8 +3677,7 @@ release: #undef senderr int -nd6_need_cache( - struct ifnet *ifp) +nd6_need_cache(struct ifnet *ifp) { /* * XXX: we currently do not make neighbor cache on any interface @@ -3423,19 +3703,15 @@ nd6_need_cache( #endif case IFT_BRIDGE: case IFT_CELLULAR: - return(1); + return (1); default: - return(0); + return (0); } } int -nd6_storelladdr( - struct ifnet *ifp, - struct rtentry *rt, - struct mbuf *m, - struct sockaddr *dst, - u_char *desten) +nd6_storelladdr(struct ifnet *ifp, struct rtentry *rt, struct mbuf *m, + struct sockaddr *dst, u_char *desten) { int i; struct sockaddr_dl *sdl; @@ -3450,42 +3726,41 @@ nd6_storelladdr( case IFT_IEEE80211: #endif case IFT_BRIDGE: - ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr, - desten); - return(1); + ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr, desten); + return (1); case IFT_IEEE1394: for (i = 0; i < ifp->if_addrlen; i++) desten[i] = ~0; - return(1); + return (1); case IFT_ARCNET: *desten = 0; - return(1); + return (1); default: - return(0); /* caller will free mbuf */ + return (0); /* caller will free mbuf */ } } if (rt == NULL) { /* this could happen, if we could not allocate memory */ - return(0); /* caller will free mbuf */ + return (0); /* caller will free mbuf */ } RT_LOCK(rt); if (rt->rt_gateway->sa_family != AF_LINK) { printf("nd6_storelladdr: something odd happens\n"); RT_UNLOCK(rt); - return(0); /* caller will free mbuf */ + return (0); /* caller will free mbuf */ } sdl = SDL(rt->rt_gateway); if (sdl->sdl_alen == 0) { /* this should be impossible, but we bark here for debugging */ printf("nd6_storelladdr: sdl_alen == 0\n"); RT_UNLOCK(rt); - return(0); /* caller will free mbuf */ + return (0); /* caller will free mbuf */ } bcopy(LLADDR(sdl), desten, sdl->sdl_alen); RT_UNLOCK(rt); - return(1); + return (1); } /* @@ -3527,8 +3802,8 @@ nd6_lookup_ipv6(ifnet_t ifp, const struct sockaddr_in6 *ip6_dest, if (route != NULL) RT_UNLOCK(route); result = dlil_resolve_multi(ifp, - (const struct sockaddr*)ip6_dest, - (struct sockaddr *)ll_dest, ll_dest_len); + (const struct sockaddr *)ip6_dest, + SA(ll_dest), ll_dest_len); if (route != NULL) RT_LOCK(route); goto release; @@ -3544,7 +3819,9 @@ nd6_lookup_ipv6(ifnet_t ifp, const struct sockaddr_in6 *ip6_dest, } if (route->rt_gateway->sa_family != AF_LINK) { - printf("nd6_lookup_ipv6: gateway address not AF_LINK\n"); + printf("%s: route %s on %s%d gateway address not AF_LINK\n", + __func__, ip6_sprintf(&ip6_dest->sin6_addr), + route->rt_ifp->if_name, route->rt_ifp->if_unit); result = EADDRNOTAVAIL; goto release; } @@ -3552,7 +3829,9 @@ nd6_lookup_ipv6(ifnet_t ifp, const struct sockaddr_in6 *ip6_dest, sdl = SDL(route->rt_gateway); if (sdl->sdl_alen == 0) { /* this should be impossible, but we bark here for debugging */ - printf("nd6_lookup_ipv6: sdl_alen == 0\n"); + printf("%s: route %s on %s%d sdl_alen == 0\n", __func__, + ip6_sprintf(&ip6_dest->sin6_addr), route->rt_ifp->if_name, + route->rt_ifp->if_unit); result = EHOSTUNREACH; goto release; } @@ -3576,97 +3855,113 @@ release: int nd6_setifinfo(struct ifnet *ifp, u_int32_t before, u_int32_t after) { + uint32_t b, a; + int err = 0; + /* - * We only care about ND6_IFF_PROXY_PREFIXES for now. + * Handle ND6_IFF_IFDISABLED */ - before &= ND6_IFF_PROXY_PREFIXES; - after &= ND6_IFF_PROXY_PREFIXES; + if ((before & ND6_IFF_IFDISABLED) || + (after & ND6_IFF_IFDISABLED)) { + b = (before & ND6_IFF_IFDISABLED); + a = (after & ND6_IFF_IFDISABLED); - if (before == after) - return (0); + if (b != a && (err = nd6_if_disable(ifp, + ((int32_t)(a - b) > 0))) != 0) + goto done; + } + + /* + * Handle ND6_IFF_PROXY_PREFIXES + */ + if ((before & ND6_IFF_PROXY_PREFIXES) || + (after & ND6_IFF_PROXY_PREFIXES)) { + b = (before & ND6_IFF_PROXY_PREFIXES); + a = (after & ND6_IFF_PROXY_PREFIXES); - return (nd6_if_prproxy(ifp, ((int32_t)(after - before) > 0))); + if (b != a && (err = nd6_if_prproxy(ifp, + ((int32_t)(a - b) > 0))) != 0) + goto done; + } +done: + return (err); } -SYSCTL_DECL(_net_inet6_icmp6); +/* + * Enable/disable IPv6 on an interface, called as part of + * setting/clearing ND6_IFF_IFDISABLED, or during DAD failure. + */ +int +nd6_if_disable(struct ifnet *ifp, boolean_t enable) +{ + ifnet_lock_shared(ifp); + if (enable) + ifp->if_eflags |= IFEF_IPV6_DISABLED; + else + ifp->if_eflags &= ~IFEF_IPV6_DISABLED; + ifnet_lock_done(ifp); + + return (0); +} static int nd6_sysctl_drlist SYSCTL_HANDLER_ARGS { #pragma unused(oidp, arg1, arg2) - int error = 0; - char buf[1024]; + char pbuf[MAX_IPv6_STR_LEN]; struct nd_defrouter *dr; - int p64 = proc_is64bit(req->p); + int error = 0; - if (req->newptr) + if (req->newptr != USER_ADDR_NULL) return (EPERM); lck_mtx_lock(nd6_mutex); - if (p64) { - struct in6_defrouter_64 *d, *de; - - for (dr = TAILQ_FIRST(&nd_defrouter); - dr; - dr = TAILQ_NEXT(dr, dr_entry)) { - d = (struct in6_defrouter_64 *)(void *)buf; - de = (struct in6_defrouter_64 *) - (void *)(buf + sizeof (buf)); - - if (d + 1 <= de) { - bzero(d, sizeof (*d)); - d->rtaddr.sin6_family = AF_INET6; - d->rtaddr.sin6_len = sizeof (d->rtaddr); - if (in6_recoverscope(&d->rtaddr, &dr->rtaddr, - dr->ifp) != 0) - log(LOG_ERR, - "scope error in " - "default router list (%s)\n", - ip6_sprintf(&dr->rtaddr)); - d->flags = dr->flags; - d->stateflags = dr->stateflags; - d->stateflags &= ~NDDRF_PROCESSED; - d->rtlifetime = dr->rtlifetime; - d->expire = dr->expire; - d->if_index = dr->ifp->if_index; - } else { - panic("buffer too short"); - } - error = SYSCTL_OUT(req, buf, sizeof (*d)); - if (error) + if (proc_is64bit(req->p)) { + struct in6_defrouter_64 d; + + bzero(&d, sizeof (d)); + d.rtaddr.sin6_family = AF_INET6; + d.rtaddr.sin6_len = sizeof (d.rtaddr); + + TAILQ_FOREACH(dr, &nd_defrouter, dr_entry) { + d.rtaddr.sin6_addr = dr->rtaddr; + if (in6_recoverscope(&d.rtaddr, + &dr->rtaddr, dr->ifp) != 0) + log(LOG_ERR, "scope error in default router " + "list (%s)\n", inet_ntop(AF_INET6, + &dr->rtaddr, pbuf, sizeof (pbuf))); + d.flags = dr->flags; + d.stateflags = dr->stateflags; + d.stateflags &= ~NDDRF_PROCESSED; + d.rtlifetime = dr->rtlifetime; + d.expire = nddr_getexpire(dr); + d.if_index = dr->ifp->if_index; + error = SYSCTL_OUT(req, &d, sizeof (d)); + if (error != 0) break; } } else { - struct in6_defrouter_32 *d_32, *de_32; - - for (dr = TAILQ_FIRST(&nd_defrouter); - dr; - dr = TAILQ_NEXT(dr, dr_entry)) { - d_32 = (struct in6_defrouter_32 *)(void *)buf; - de_32 = (struct in6_defrouter_32 *) - (void *)(buf + sizeof (buf)); - - if (d_32 + 1 <= de_32) { - bzero(d_32, sizeof (*d_32)); - d_32->rtaddr.sin6_family = AF_INET6; - d_32->rtaddr.sin6_len = sizeof (d_32->rtaddr); - if (in6_recoverscope(&d_32->rtaddr, &dr->rtaddr, - dr->ifp) != 0) - log(LOG_ERR, - "scope error in " - "default router list (%s)\n", - ip6_sprintf(&dr->rtaddr)); - d_32->flags = dr->flags; - d_32->stateflags = dr->stateflags; - d_32->stateflags &= ~NDDRF_PROCESSED; - d_32->rtlifetime = dr->rtlifetime; - d_32->expire = dr->expire; - d_32->if_index = dr->ifp->if_index; - } else { - panic("buffer too short"); - } - error = SYSCTL_OUT(req, buf, sizeof (*d_32)); - if (error) + struct in6_defrouter_32 d; + + bzero(&d, sizeof (d)); + d.rtaddr.sin6_family = AF_INET6; + d.rtaddr.sin6_len = sizeof (d.rtaddr); + + TAILQ_FOREACH(dr, &nd_defrouter, dr_entry) { + d.rtaddr.sin6_addr = dr->rtaddr; + if (in6_recoverscope(&d.rtaddr, + &dr->rtaddr, dr->ifp) != 0) + log(LOG_ERR, "scope error in default router " + "list (%s)\n", inet_ntop(AF_INET6, + &dr->rtaddr, pbuf, sizeof (pbuf))); + d.flags = dr->flags; + d.stateflags = dr->stateflags; + d.stateflags &= ~NDDRF_PROCESSED; + d.rtlifetime = dr->rtlifetime; + d.expire = nddr_getexpire(dr); + d.if_index = dr->ifp->if_index; + error = SYSCTL_OUT(req, &d, sizeof (d)); + if (error != 0) break; } } @@ -3678,151 +3973,115 @@ static int nd6_sysctl_prlist SYSCTL_HANDLER_ARGS { #pragma unused(oidp, arg1, arg2) - int error = 0; - char buf[1024]; + char pbuf[MAX_IPv6_STR_LEN]; + struct nd_pfxrouter *pfr; + struct sockaddr_in6 s6; struct nd_prefix *pr; - int p64 = proc_is64bit(req->p); + int error = 0; - if (req->newptr) + if (req->newptr != USER_ADDR_NULL) return (EPERM); - lck_mtx_lock(nd6_mutex); - if (p64) { - struct in6_prefix_64 *p, *pe; - - for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) { - u_short advrtrs = 0; - size_t advance; - struct sockaddr_in6 *sin6, *s6; - struct nd_pfxrouter *pfr; + bzero(&s6, sizeof (s6)); + s6.sin6_family = AF_INET6; + s6.sin6_len = sizeof (s6); - p = (struct in6_prefix_64 *)(void *)buf; - pe = (struct in6_prefix_64 *) - (void *)(buf + sizeof (buf)); + lck_mtx_lock(nd6_mutex); + if (proc_is64bit(req->p)) { + struct in6_prefix_64 p; - if (p + 1 <= pe) { - bzero(p, sizeof (*p)); - sin6 = (struct sockaddr_in6 *)(p + 1); + bzero(&p, sizeof (p)); + p.origin = PR_ORIG_RA; - NDPR_LOCK(pr); - p->prefix = pr->ndpr_prefix; - if (in6_recoverscope(&p->prefix, - &p->prefix.sin6_addr, pr->ndpr_ifp) != 0) + LIST_FOREACH(pr, &nd_prefix, ndpr_entry) { + NDPR_LOCK(pr); + p.prefix = pr->ndpr_prefix; + if (in6_recoverscope(&p.prefix, + &pr->ndpr_prefix.sin6_addr, pr->ndpr_ifp) != 0) + log(LOG_ERR, "scope error in " + "prefix list (%s)\n", inet_ntop(AF_INET6, + &p.prefix.sin6_addr, pbuf, sizeof (pbuf))); + p.raflags = pr->ndpr_raf; + p.prefixlen = pr->ndpr_plen; + p.vltime = pr->ndpr_vltime; + p.pltime = pr->ndpr_pltime; + p.if_index = pr->ndpr_ifp->if_index; + p.expire = ndpr_getexpire(pr); + p.refcnt = pr->ndpr_addrcnt; + p.flags = pr->ndpr_stateflags; + p.advrtrs = 0; + LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) + p.advrtrs++; + error = SYSCTL_OUT(req, &p, sizeof (p)); + if (error != 0) { + NDPR_UNLOCK(pr); + break; + } + LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) { + s6.sin6_addr = pfr->router->rtaddr; + if (in6_recoverscope(&s6, &pfr->router->rtaddr, + pfr->router->ifp) != 0) log(LOG_ERR, "scope error in prefix list (%s)\n", - ip6_sprintf(&p->prefix.sin6_addr)); - p->raflags = pr->ndpr_raf; - p->prefixlen = pr->ndpr_plen; - p->vltime = pr->ndpr_vltime; - p->pltime = pr->ndpr_pltime; - p->if_index = pr->ndpr_ifp->if_index; - p->expire = pr->ndpr_expire; - p->refcnt = pr->ndpr_addrcnt; - p->flags = pr->ndpr_stateflags; - p->origin = PR_ORIG_RA; - advrtrs = 0; - for (pfr = pr->ndpr_advrtrs.lh_first; - pfr; - pfr = pfr->pfr_next) { - if ((void *)&sin6[advrtrs + 1] > - (void *)pe) { - advrtrs++; - continue; - } - s6 = &sin6[advrtrs]; - bzero(s6, sizeof (*s6)); - s6->sin6_family = AF_INET6; - s6->sin6_len = sizeof (*sin6); - if (in6_recoverscope(s6, - &pfr->router->rtaddr, - pfr->router->ifp) != 0) - log(LOG_ERR, "scope error in " - "prefix list (%s)\n", - ip6_sprintf(&pfr->router-> - rtaddr)); - advrtrs++; - } - p->advrtrs = advrtrs; - NDPR_UNLOCK(pr); - } else { - panic("buffer too short"); + inet_ntop(AF_INET6, &s6.sin6_addr, + pbuf, sizeof (pbuf))); + error = SYSCTL_OUT(req, &s6, sizeof (s6)); + if (error != 0) + break; } - advance = sizeof (*p) + sizeof (*sin6) * advrtrs; - error = SYSCTL_OUT(req, buf, advance); - if (error) + NDPR_UNLOCK(pr); + if (error != 0) break; } } else { - struct in6_prefix_32 *p_32, *pe_32; + struct in6_prefix_32 p; - for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) { - u_short advrtrs = 0; - size_t advance; - struct sockaddr_in6 *sin6, *s6; - struct nd_pfxrouter *pfr; + bzero(&p, sizeof (p)); + p.origin = PR_ORIG_RA; - p_32 = (struct in6_prefix_32 *)(void *)buf; - pe_32 = (struct in6_prefix_32 *) - (void *)(buf + sizeof (buf)); - - if (p_32 + 1 <= pe_32) { - bzero(p_32, sizeof (*p_32)); - sin6 = (struct sockaddr_in6 *)(p_32 + 1); - - NDPR_LOCK(pr); - p_32->prefix = pr->ndpr_prefix; - if (in6_recoverscope(&p_32->prefix, - &p_32->prefix.sin6_addr, pr->ndpr_ifp) != 0) - log(LOG_ERR, "scope error in prefix " - "list (%s)\n", ip6_sprintf(&p_32-> - prefix.sin6_addr)); - p_32->raflags = pr->ndpr_raf; - p_32->prefixlen = pr->ndpr_plen; - p_32->vltime = pr->ndpr_vltime; - p_32->pltime = pr->ndpr_pltime; - p_32->if_index = pr->ndpr_ifp->if_index; - p_32->expire = pr->ndpr_expire; - p_32->refcnt = pr->ndpr_addrcnt; - p_32->flags = pr->ndpr_stateflags; - p_32->origin = PR_ORIG_RA; - advrtrs = 0; - for (pfr = pr->ndpr_advrtrs.lh_first; - pfr; - pfr = pfr->pfr_next) { - if ((void *)&sin6[advrtrs + 1] > - (void *)pe_32) { - advrtrs++; - continue; - } - s6 = &sin6[advrtrs]; - bzero(s6, sizeof (*s6)); - s6->sin6_family = AF_INET6; - s6->sin6_len = sizeof (*sin6); - if (in6_recoverscope(s6, - &pfr->router->rtaddr, - pfr->router->ifp) != 0) - log(LOG_ERR, "scope error in " - "prefix list (%s)\n", - ip6_sprintf(&pfr->router-> - rtaddr)); - advrtrs++; - } - p_32->advrtrs = advrtrs; + LIST_FOREACH(pr, &nd_prefix, ndpr_entry) { + NDPR_LOCK(pr); + p.prefix = pr->ndpr_prefix; + if (in6_recoverscope(&p.prefix, + &pr->ndpr_prefix.sin6_addr, pr->ndpr_ifp) != 0) + log(LOG_ERR, + "scope error in prefix list (%s)\n", + inet_ntop(AF_INET6, &p.prefix.sin6_addr, + pbuf, sizeof (pbuf))); + p.raflags = pr->ndpr_raf; + p.prefixlen = pr->ndpr_plen; + p.vltime = pr->ndpr_vltime; + p.pltime = pr->ndpr_pltime; + p.if_index = pr->ndpr_ifp->if_index; + p.expire = ndpr_getexpire(pr); + p.refcnt = pr->ndpr_addrcnt; + p.flags = pr->ndpr_stateflags; + p.advrtrs = 0; + LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) + p.advrtrs++; + error = SYSCTL_OUT(req, &p, sizeof (p)); + if (error != 0) { NDPR_UNLOCK(pr); - } else { - panic("buffer too short"); + break; + } + LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) { + s6.sin6_addr = pfr->router->rtaddr; + if (in6_recoverscope(&s6, &pfr->router->rtaddr, + pfr->router->ifp) != 0) + log(LOG_ERR, + "scope error in prefix list (%s)\n", + inet_ntop(AF_INET6, &s6.sin6_addr, + pbuf, sizeof (pbuf))); + error = SYSCTL_OUT(req, &s6, sizeof (s6)); + if (error != 0) + break; } - advance = sizeof (*p_32) + sizeof (*sin6) * advrtrs; - error = SYSCTL_OUT(req, buf, advance); - if (error) + NDPR_UNLOCK(pr); + if (error != 0) break; } } lck_mtx_unlock(nd6_mutex); + return (error); } -SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist, - CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, nd6_sysctl_drlist, "S,in6_defrouter",""); -SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist, - CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, nd6_sysctl_prlist, "S,in6_defrouter",""); - diff --git a/bsd/netinet6/nd6.h b/bsd/netinet6/nd6.h index a831fb5ee..7996b090d 100644 --- a/bsd/netinet6/nd6.h +++ b/bsd/netinet6/nd6.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,13 +22,9 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ - -/* $FreeBSD: src/sys/netinet6/nd6.h,v 1.2.2.3 2001/08/13 01:10:49 simokawa Exp $ */ -/* $KAME: nd6.h,v 1.55 2001/04/27 15:09:49 itojun Exp $ */ - /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. @@ -59,17 +55,17 @@ */ #ifndef _NETINET6_ND6_H_ -#define _NETINET6_ND6_H_ +#define _NETINET6_ND6_H_ #include /* see net/route.h, or net/if_inarp.h */ #ifndef RTF_ANNOUNCE -#define RTF_ANNOUNCE RTF_PROTO2 +#define RTF_ANNOUNCE RTF_PROTO2 #endif #include -#ifdef XNU_KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #include #include #include @@ -84,24 +80,24 @@ struct llinfo_nd6 { /* * The following are protected by rt_lock */ + struct ifnet *ln_exclifp; /* excluded interface (prefix proxy) */ struct mbuf *ln_hold; /* last packet until resolved/timeout */ - long ln_asked; /* number of queries already sent for this addr */ - u_int32_t ln_expire; /* lifetime for NDP state transition */ + uint32_t ln_asked; /* # of queries already sent for this addr */ short ln_state; /* reachability state */ short ln_router; /* 2^0: ND6 router bit */ - int ln_byhint; /* # of times we made it reachable by UL hint */ u_int32_t ln_flags; /* flags; see below */ - struct if_llreach *ln_llreach; /* link-layer reachability record */ + u_int64_t ln_expire; /* lifetime for NDP state transition */ u_int64_t ln_lastused; /* last used timestamp */ + struct if_llreach *ln_llreach; /* link-layer reachability record */ }; /* Values for ln_flags */ #define ND6_LNF_TIMER_SKIP 0x1 /* modified by nd6_timer() */ #define ND6_LNF_IN_USE 0x2 /* currently in llinfo_nd6 list */ -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ -#define ND6_LLINFO_PURGE -3 -#define ND6_LLINFO_NOSTATE -2 +#define ND6_LLINFO_PURGE -3 +#define ND6_LLINFO_NOSTATE -2 /* * We don't need the WAITDELETE state any more, but we keep the definition * in a comment line instead of removing it. This is necessary to avoid @@ -109,35 +105,38 @@ struct llinfo_nd6 { * affect backward compatibility with old applications. * (20000711 jinmei@kame.net) */ -/* #define ND6_LLINFO_WAITDELETE -1 */ -#define ND6_LLINFO_INCOMPLETE 0 -#define ND6_LLINFO_REACHABLE 1 -#define ND6_LLINFO_STALE 2 -#define ND6_LLINFO_DELAY 3 -#define ND6_LLINFO_PROBE 4 - -#ifdef XNU_KERNEL_PRIVATE -#define ND6_IS_LLINFO_PROBREACH(n) ((n)->ln_state > ND6_LLINFO_INCOMPLETE) -#define ND6_LLINFO_PERMANENT(n) (((n)->ln_expire == 0) && ((n)->ln_state > ND6_LLINFO_INCOMPLETE)) - -#define ND6_EUI64_GBIT 0x01 -#define ND6_EUI64_UBIT 0x02 - -#define ND6_EUI64_TO_IFID(in6) do {(in6)->s6_addr[8] ^= ND6_EUI64_UBIT; } while (0) -#define ND6_EUI64_GROUP(in6) ((in6)->s6_addr[8] & ND6_EUI64_GBIT) -#define ND6_EUI64_INDIVIDUAL(in6) (!ND6_EUI64_GROUP(in6)) -#define ND6_EUI64_LOCAL(in6) ((in6)->s6_addr[8] & ND6_EUI64_UBIT) -#define ND6_EUI64_UNIVERSAL(in6) (!ND6_EUI64_LOCAL(in6)) -#define ND6_IFID_LOCAL(in6) (!ND6_EUI64_LOCAL(in6)) -#define ND6_IFID_UNIVERSAL(in6) (!ND6_EUI64_UNIVERSAL(in6)) -#endif /* XNU_KERNEL_PRIVATE */ - -#if !defined(XNU_KERNEL_PRIVATE) +/* #define ND6_LLINFO_WAITDELETE -1 */ +#define ND6_LLINFO_INCOMPLETE 0 +#define ND6_LLINFO_REACHABLE 1 +#define ND6_LLINFO_STALE 2 +#define ND6_LLINFO_DELAY 3 +#define ND6_LLINFO_PROBE 4 + +#ifdef BSD_KERNEL_PRIVATE +#define ND6_IS_LLINFO_PROBREACH(n) ((n)->ln_state > ND6_LLINFO_INCOMPLETE) +#define ND6_LLINFO_PERMANENT(n) \ + (((n)->ln_expire == 0) && ((n)->ln_state > ND6_LLINFO_INCOMPLETE)) + +#define ND6_EUI64_GBIT 0x01 +#define ND6_EUI64_UBIT 0x02 + +#define ND6_EUI64_TO_IFID(in6) \ + do {(in6)->s6_addr[8] ^= ND6_EUI64_UBIT; } while (0) + +#define ND6_EUI64_GROUP(in6) ((in6)->s6_addr[8] & ND6_EUI64_GBIT) +#define ND6_EUI64_INDIVIDUAL(in6) (!ND6_EUI64_GROUP(in6)) +#define ND6_EUI64_LOCAL(in6) ((in6)->s6_addr[8] & ND6_EUI64_UBIT) +#define ND6_EUI64_UNIVERSAL(in6) (!ND6_EUI64_LOCAL(in6)) +#define ND6_IFID_LOCAL(in6) (!ND6_EUI64_LOCAL(in6)) +#define ND6_IFID_UNIVERSAL(in6) (!ND6_EUI64_UNIVERSAL(in6)) +#endif /* BSD_KERNEL_PRIVATE */ + +#if !defined(BSD_KERNEL_PRIVATE) struct nd_ifinfo { #else /* For binary compatibility, this structure must not change */ struct nd_ifinfo_compat { -#endif /* !XNU_KERNEL_PRIVATE */ +#endif /* !BSD_KERNEL_PRIVATE */ u_int32_t linkmtu; /* LinkMTU */ u_int32_t maxmtu; /* Upper bound of LinkMTU */ u_int32_t basereachable; /* BaseReachableTime */ @@ -153,7 +152,7 @@ struct nd_ifinfo_compat { u_int8_t randomid[8]; /* current random ID */ }; -#if defined(XNU_KERNEL_PRIVATE) +#if defined(BSD_KERNEL_PRIVATE) struct nd_ifinfo { decl_lck_mtx_data(, lock); boolean_t initialized; /* Flag to see the entry is initialized */ @@ -173,34 +172,43 @@ struct nd_ifinfo { /* keep track of routers and prefixes on this link */ int32_t nprefixes; int32_t ndefrouters; + struct in6_cga_modifier local_cga_modifier; }; -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ -#define ND6_IFF_PERFORMNUD 0x1 +#define ND6_IFF_PERFORMNUD 0x1 #if defined(PRIVATE) -#define ND6_IFF_ACCEPT_RTADV 0x2 /* APPLE: not used. Innterface specific router - * advertisments are handled with a specific ifnet - * flag: IFEF_ACCEPT_RTADVD - */ -#define ND6_IFF_PREFER_SOURCE 0x4 /* APPLE: NOT USED not related to ND. */ -#define ND6_IFF_IFDISABLED 0x8 /* IPv6 operation is disabled due to - * DAD failure. (XXX: not ND-specific) - */ -#define ND6_IFF_DONT_SET_IFROUTE 0x10 /* NOT USED */ + +/* + * APPLE: not used. Interface specific router advertisements are handled with a + * specific ifnet flag: IFEF_ACCEPT_RTADVD + */ +#define ND6_IFF_ACCEPT_RTADV 0x2 + +/* APPLE: NOT USED not related to ND. */ +#define ND6_IFF_PREFER_SOURCE 0x4 + +/* IPv6 operation is disabled due to * DAD failure. (XXX: not ND-specific) */ +#define ND6_IFF_IFDISABLED 0x8 + +#define ND6_IFF_DONT_SET_IFROUTE 0x10 /* NOT USED */ #endif /* PRIVATE */ -#define ND6_IFF_PROXY_PREFIXES 0x20 -#define ND6_IFF_IGNORE_NA 0x40 +#define ND6_IFF_PROXY_PREFIXES 0x20 +#define ND6_IFF_IGNORE_NA 0x40 +#if defined(PRIVATE) +#define ND6_IFF_INSECURE 0x80 +#endif struct in6_nbrinfo { char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */ struct in6_addr addr; /* IPv6 address of the neighbor */ - long asked; /* number of queries already sent for this addr */ + long asked; /* # of queries already sent for this addr */ int isrouter; /* if it acts as a router */ int state; /* reachability state */ int expire; /* lifetime for NDP state transition */ }; -#if defined(XNU_KERNEL_PRIVATE) +#if defined(BSD_KERNEL_PRIVATE) struct in6_nbrinfo_32 { char ifname[IFNAMSIZ]; struct in6_addr addr; @@ -218,10 +226,10 @@ struct in6_nbrinfo_64 { int state; int expire; } __attribute__((aligned(8))); -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ -#define DRLSTSIZ 10 -#define PRLSTSIZ 10 +#define DRLSTSIZ 10 +#define PRLSTSIZ 10 struct in6_drlist { char ifname[IFNAMSIZ]; @@ -234,7 +242,7 @@ struct in6_drlist { } defrouter[DRLSTSIZ]; }; -#if defined(XNU_KERNEL_PRIVATE) +#if defined(BSD_KERNEL_PRIVATE) struct in6_drlist_32 { char ifname[IFNAMSIZ]; struct { @@ -256,13 +264,13 @@ struct in6_drlist_64 { u_short if_index __attribute__((aligned(8))); } defrouter[DRLSTSIZ] __attribute__((aligned(8))); }; -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ /* valid values for stateflags */ #define NDDRF_INSTALLED 0x1 /* installed in the routing table */ #define NDDRF_IFSCOPE 0x2 /* installed as a scoped route */ #define NDDRF_STATIC 0x4 /* for internal use only */ -#ifdef XNU_KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #define NDDRF_PROCESSED 0x10 #endif @@ -275,7 +283,7 @@ struct in6_defrouter { u_short if_index; }; -#if defined(XNU_KERNEL_PRIVATE) +#if defined(BSD_KERNEL_PRIVATE) struct in6_defrouter_32 { struct sockaddr_in6 rtaddr; u_char flags; @@ -293,7 +301,7 @@ struct in6_defrouter_64 { u_long expire __attribute__((aligned(8))); u_short if_index __attribute__((aligned(8))); } __attribute__((aligned(8))); -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ struct in6_prlist { char ifname[IFNAMSIZ]; @@ -311,7 +319,7 @@ struct in6_prlist { } prefix[PRLSTSIZ]; }; -#if defined(XNU_KERNEL_PRIVATE) +#if defined(BSD_KERNEL_PRIVATE) struct in6_prlist_32 { char ifname[IFNAMSIZ]; struct { @@ -344,7 +352,7 @@ struct in6_prlist_64 { struct in6_addr advrtr[DRLSTSIZ]; } prefix[PRLSTSIZ]; }; -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ struct in6_prefix { struct sockaddr_in6 prefix; @@ -361,7 +369,7 @@ struct in6_prefix { /* struct sockaddr_in6 advrtr[] */ }; -#if defined(XNU_KERNEL_PRIVATE) +#if defined(BSD_KERNEL_PRIVATE) struct in6_prefix_32 { struct sockaddr_in6 prefix; struct prf_ra raflags; @@ -391,7 +399,7 @@ struct in6_prefix_64 { u_short advrtrs; /* struct sockaddr_in6 advrtr[] */ }; -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ struct in6_ondireq { char ifname[IFNAMSIZ]; @@ -408,7 +416,7 @@ struct in6_ondireq { } ndi; }; -#if !defined(XNU_KERNEL_PRIVATE) +#if !defined(BSD_KERNEL_PRIVATE) struct in6_ndireq { char ifname[IFNAMSIZ]; struct nd_ifinfo ndi; @@ -418,17 +426,17 @@ struct in6_ndireq { char ifname[IFNAMSIZ]; struct nd_ifinfo_compat ndi; }; -#endif /* !XNU_KERNEL_PRIVATE */ +#endif /* !BSD_KERNEL_PRIVATE */ struct in6_ndifreq { char ifname[IFNAMSIZ]; u_long ifindex; }; -#define MAX_RTR_SOLICITATION_DELAY 1 /* 1sec */ -#define RTR_SOLICITATION_INTERVAL 4 /* 4sec */ +#define MAX_RTR_SOLICITATION_DELAY 1 /* 1sec */ +#define RTR_SOLICITATION_INTERVAL 4 /* 4sec */ -#if defined(XNU_KERNEL_PRIVATE) +#if defined(BSD_KERNEL_PRIVATE) struct in6_ndifreq_32 { char ifname[IFNAMSIZ]; u_int32_t ifindex; @@ -438,34 +446,36 @@ struct in6_ndifreq_64 { char ifname[IFNAMSIZ]; u_long ifindex __attribute__((aligned(8))); }; -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ /* Prefix status */ -#define NDPRF_ONLINK 0x1 -#define NDPRF_DETACHED 0x2 -#define NDPRF_STATIC 0x100 -#define NDPRF_IFSCOPE 0x1000 -#define NDPRF_PRPROXY 0x2000 -#ifdef XNU_KERNEL_PRIVATE -#define NDPRF_PROCESSED 0x08000 +#define NDPRF_ONLINK 0x1 +#define NDPRF_DETACHED 0x2 +#define NDPRF_STATIC 0x100 +#define NDPRF_IFSCOPE 0x1000 +#define NDPRF_PRPROXY 0x2000 +#ifdef BSD_KERNEL_PRIVATE +#define NDPRF_PROCESSED_ONLINK 0x08000 +#define NDPRF_PROCESSED_SERVICE 0x10000 #endif /* protocol constants */ -#define MAX_RTR_SOLICITATION_DELAY 1 /*1sec*/ -#define RTR_SOLICITATION_INTERVAL 4 /*4sec*/ -#define MAX_RTR_SOLICITATIONS 3 +#define MAX_RTR_SOLICITATION_DELAY 1 /* 1sec */ +#define RTR_SOLICITATION_INTERVAL 4 /* 4sec */ +#define MAX_RTR_SOLICITATIONS 3 -#define ND6_INFINITE_LIFETIME 0xffffffff -#define ND6_MAX_LIFETIME 0x7fffffff +#define ND6_INFINITE_LIFETIME 0xffffffff +#define ND6_MAX_LIFETIME 0x7fffffff -#ifdef XNU_KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE /* * Protects nd_ifinfo[] */ -__private_extern__ lck_rw_t *nd_if_rwlock; +extern lck_rw_t *nd_if_rwlock; -#define ND_IFINFO(ifp) \ - ((ifp)->if_index < nd_ifinfo_indexlim ? &nd_ifinfo[(ifp)->if_index] : NULL) +#define ND_IFINFO(ifp) \ + ((ifp)->if_index < nd_ifinfo_indexlim ? &nd_ifinfo[(ifp)->if_index] : \ + NULL) /* * In a more readable form, we derive linkmtu based on: @@ -479,7 +489,7 @@ __private_extern__ lck_rw_t *nd_if_rwlock; * else * linkmtu = ifp->if_mtu; */ -#define IN6_LINKMTU(ifp) \ +#define IN6_LINKMTU(ifp) \ ((ND_IFINFO(ifp) == NULL || !ND_IFINFO(ifp)->initialized) ? \ (ifp)->if_mtu : ((ND_IFINFO(ifp)->linkmtu && \ ND_IFINFO(ifp)->linkmtu < (ifp)->if_mtu) ? ND_IFINFO(ifp)->linkmtu : \ @@ -487,35 +497,36 @@ __private_extern__ lck_rw_t *nd_if_rwlock; ND_IFINFO(ifp)->maxmtu : (ifp)->if_mtu))) /* node constants */ -#define MAX_REACHABLE_TIME 3600000 /* msec */ -#define REACHABLE_TIME 30000 /* msec */ -#define RETRANS_TIMER 1000 /* msec */ -#define MIN_RANDOM_FACTOR 512 /* 1024 * 0.5 */ -#define MAX_RANDOM_FACTOR 1536 /* 1024 * 1.5 */ -#define DEF_TEMP_VALID_LIFETIME 604800 /* 1 week */ -#define DEF_TEMP_PREFERRED_LIFETIME 86400 /* 1 day */ -#define TEMPADDR_REGEN_ADVANCE 5 /* sec */ -#define MAX_TEMP_DESYNC_FACTOR 600 /* 10 min */ -#define ND_COMPUTE_RTIME(x) \ - (((MIN_RANDOM_FACTOR * (x >> 10)) + (random() & \ +#define MAX_REACHABLE_TIME 3600000 /* msec */ +#define REACHABLE_TIME 30000 /* msec */ +#define RETRANS_TIMER 1000 /* msec */ +#define MIN_RANDOM_FACTOR 512 /* 1024 * 0.5 */ +#define MAX_RANDOM_FACTOR 1536 /* 1024 * 1.5 */ +#define DEF_TEMP_VALID_LIFETIME 604800 /* 1 week */ +#define DEF_TEMP_PREFERRED_LIFETIME 86400 /* 1 day */ +#define TEMPADDR_REGEN_ADVANCE 5 /* sec */ +#define MAX_TEMP_DESYNC_FACTOR 600 /* 10 min */ +#define ND_COMPUTE_RTIME(x) \ + (((MIN_RANDOM_FACTOR * (x >> 10)) + (RandomULong() & \ ((MAX_RANDOM_FACTOR - MIN_RANDOM_FACTOR) * (x >> 10)))) /1000) TAILQ_HEAD(nd_drhead, nd_defrouter); struct nd_defrouter { decl_lck_mtx_data(, nddr_lock); - uint32_t nddr_refcount; - uint32_t nddr_debug; TAILQ_ENTRY(nd_defrouter) dr_entry; - struct in6_addr rtaddr; - u_char flags; /* flags on RA message */ + struct in6_addr rtaddr; + u_int32_t nddr_refcount; + u_int32_t nddr_debug; + u_int64_t expire; + u_int64_t base_calendartime; /* calendar time at creation */ + u_int64_t base_uptime; /* uptime at creation */ + u_char flags; /* flags on RA message */ u_char stateflags; u_short rtlifetime; - u_int32_t expire; - struct ifnet *ifp; unsigned int genid; int err; - void (*nddr_trace) /* callback fn for tracing refs */ - (struct nd_defrouter *, int); + struct ifnet *ifp; + void (*nddr_trace)(struct nd_defrouter *, int); /* trace callback fn */ }; #define NDDR_LOCK_ASSERT_HELD(_nddr) \ @@ -558,41 +569,43 @@ struct nd_prefix { decl_lck_mtx_data(, ndpr_lock); u_int32_t ndpr_refcount; /* reference count */ u_int32_t ndpr_debug; /* see ifa_debug flags */ - struct ifnet *ndpr_ifp; - struct rtentry *ndpr_rt; + struct ifnet *ndpr_ifp; + struct rtentry *ndpr_rt; LIST_ENTRY(nd_prefix) ndpr_entry; - struct sockaddr_in6 ndpr_prefix; /* prefix */ + struct sockaddr_in6 ndpr_prefix; /* prefix */ struct in6_addr ndpr_mask; /* netmask derived from the prefix */ struct in6_addr ndpr_addr; /* address that is derived from the prefix */ - u_int32_t ndpr_vltime; /* advertised valid lifetime */ - u_int32_t ndpr_pltime; /* advertised preferred lifetime */ - time_t ndpr_preferred; /* preferred time of the prefix */ - time_t ndpr_expire; /* expiration time of the prefix */ - time_t ndpr_lastupdate; /* reception time of last advertisement */ - struct prf_ra ndpr_flags; - u_int32_t ndpr_stateflags; /* actual state flags */ + u_int32_t ndpr_vltime; /* advertised valid lifetime */ + u_int32_t ndpr_pltime; /* advertised preferred lifetime */ + u_int64_t ndpr_preferred; /* preferred time of the prefix */ + u_int64_t ndpr_expire; /* expiration time of the prefix */ + u_int64_t ndpr_lastupdate; /* rx time of last advertisement */ + u_int64_t ndpr_base_calendartime; /* calendar time at creation */ + u_int64_t ndpr_base_uptime; /* uptime at creation */ + struct prf_ra ndpr_flags; + unsigned int ndpr_genid; /* protects ndpr_advrtrs */ + u_int32_t ndpr_stateflags; /* actual state flags */ /* list of routers that advertise the prefix: */ LIST_HEAD(pr_rtrhead, nd_pfxrouter) ndpr_advrtrs; - u_char ndpr_plen; - int ndpr_addrcnt; /* reference counter from addresses */ - u_int32_t ndpr_allmulti_cnt; /* total all-multi reqs */ - u_int32_t ndpr_prproxy_sols_cnt; /* total # of proxied NS */ + u_char ndpr_plen; + int ndpr_addrcnt; /* reference counter from addresses */ + u_int32_t ndpr_allmulti_cnt; /* total all-multi reqs */ + u_int32_t ndpr_prproxy_sols_cnt; /* total # of proxied NS */ struct prproxy_sols_tree ndpr_prproxy_sols; /* tree of proxied NS */ - void (*ndpr_trace) /* callback fn for tracing refs */ - (struct nd_prefix *, int); + void (*ndpr_trace)(struct nd_prefix *, int); /* trace callback fn */ }; -#define ndpr_next ndpr_entry.le_next +#define ndpr_next ndpr_entry.le_next -#define ndpr_raf ndpr_flags -#define ndpr_raf_onlink ndpr_flags.onlink -#define ndpr_raf_auto ndpr_flags.autonomous -#define ndpr_raf_router ndpr_flags.router +#define ndpr_raf ndpr_flags +#define ndpr_raf_onlink ndpr_flags.onlink +#define ndpr_raf_auto ndpr_flags.autonomous +#define ndpr_raf_router ndpr_flags.router /* * We keep expired prefix for certain amount of time, for validation purposes. * 1800s = MaxRtrAdvInterval */ -#define NDPR_KEEP_EXPIRED (1800 * 2) +#define NDPR_KEEP_EXPIRED (1800 * 2) #define NDPR_LOCK_ASSERT_HELD(_ndpr) \ lck_mtx_assert(&(_ndpr)->ndpr_lock, LCK_MTX_ASSERT_OWNED) @@ -645,20 +658,20 @@ struct inet6_ndpr_msghdr { u_char prm_plen; /* length of prefix in bits */ }; -#define prm_raf_onlink prm_flags.prf_ra.onlink -#define prm_raf_auto prm_flags.prf_ra.autonomous +#define prm_raf_onlink prm_flags.prf_ra.onlink +#define prm_raf_auto prm_flags.prf_ra.autonomous -#define prm_statef_onlink prm_flags.prf_state.onlink +#define prm_statef_onlink prm_flags.prf_state.onlink -#define prm_rrf_decrvalid prm_flags.prf_rr.decrvalid -#define prm_rrf_decrprefd prm_flags.prf_rr.decrprefd +#define prm_rrf_decrvalid prm_flags.prf_rr.decrvalid +#define prm_rrf_decrprefd prm_flags.prf_rr.decrprefd -#define ifpr2ndpr(ifpr) ((struct nd_prefix *)(ifpr)) -#define ndpr2ifpr(ndpr) ((struct ifprefix *)(ndpr)) +#define ifpr2ndpr(ifpr) ((struct nd_prefix *)(ifpr)) +#define ndpr2ifpr(ndpr) ((struct ifprefix *)(ndpr)) struct nd_pfxrouter { LIST_ENTRY(nd_pfxrouter) pfr_entry; -#define pfr_next pfr_entry.le_next +#define pfr_next pfr_entry.le_next struct nd_defrouter *router; }; @@ -668,15 +681,15 @@ struct nd_prefix_list { struct nd_prefix_list *next; struct nd_prefix pr; }; -#endif /* XNU_KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #if defined(PRIVATE) /* ND6 kernel event subclass value */ -#define KEV_ND6_SUBCLASS 7 +#define KEV_ND6_SUBCLASS 7 /* ND6 kernel event action type */ -#define KEV_ND6_RA 1 +#define KEV_ND6_RA 1 /* ND6 RA L2 source address length */ -#define ND6_ROUTER_LL_SIZE 64 +#define ND6_ROUTER_LL_SIZE 64 struct nd6_ra_prefix { struct sockaddr_in6 prefix; @@ -693,8 +706,8 @@ struct nd6_ra_prefix { }; /* ND6 router advertisement valid bits */ -#define KEV_ND6_DATA_VALID_MTU (0x1 << 0) -#define KEV_ND6_DATA_VALID_PREFIX (0x1 << 1) +#define KEV_ND6_DATA_VALID_MTU (0x1 << 0) +#define KEV_ND6_DATA_VALID_PREFIX (0x1 << 1) struct kev_nd6_ra_data { u_int8_t lladdr[ND6_ROUTER_LL_SIZE]; @@ -708,9 +721,10 @@ struct kev_nd6_ra_data { }; #endif /* PRIVATE */ -#if defined(XNU_KERNEL_PRIVATE) +#if defined(BSD_KERNEL_PRIVATE) /* nd6.c */ extern int nd6_prune; +extern int nd6_prune_lazy; extern int nd6_delay; extern int nd6_umaxtries; extern int nd6_mmaxtries; @@ -727,13 +741,14 @@ extern size_t nd_ifinfo_indexlim; extern int nd6_onlink_ns_rfc4861; extern int nd6_optimistic_dad; -#define nd6log(x) do { if (nd6_debug >= 1) log x; } while (0) -#define nd6log2(x) do { if (nd6_debug >= 2) log x; } while (0) +#define nd6log(x) do { if (nd6_debug >= 1) log x; } while (0) +#define nd6log2(x) do { if (nd6_debug >= 2) log x; } while (0) -#define ND6_OPTIMISTIC_DAD_LINKLOCAL (1 << 0) -#define ND6_OPTIMISTIC_DAD_AUTOCONF (1 << 1) -#define ND6_OPTIMISTIC_DAD_TEMPORARY (1 << 2) -#define ND6_OPTIMISTIC_DAD_DYNAMIC (1 << 3) +#define ND6_OPTIMISTIC_DAD_LINKLOCAL (1 << 0) +#define ND6_OPTIMISTIC_DAD_AUTOCONF (1 << 1) +#define ND6_OPTIMISTIC_DAD_TEMPORARY (1 << 2) +#define ND6_OPTIMISTIC_DAD_DYNAMIC (1 << 3) +#define ND6_OPTIMISTIC_DAD_SECURED (1 << 4) /* nd6_rtr.c */ extern int nd6_defifindex; @@ -755,22 +770,25 @@ union nd_opts { struct nd_opt_hdr *search; /* multiple opts */ struct nd_opt_hdr *last; /* multiple opts */ int done; - struct nd_opt_prefix_info *pi_end;/* multiple opts, end */ + struct nd_opt_prefix_info *pi_end; /* multiple opts, end */ } nd_opt_each; }; -#define nd_opts_src_lladdr nd_opt_each.src_lladdr -#define nd_opts_tgt_lladdr nd_opt_each.tgt_lladdr -#define nd_opts_pi nd_opt_each.pi_beg -#define nd_opts_pi_end nd_opt_each.pi_end -#define nd_opts_rh nd_opt_each.rh -#define nd_opts_mtu nd_opt_each.mtu -#define nd_opts_search nd_opt_each.search -#define nd_opts_last nd_opt_each.last -#define nd_opts_done nd_opt_each.done +#define nd_opts_src_lladdr nd_opt_each.src_lladdr +#define nd_opts_tgt_lladdr nd_opt_each.tgt_lladdr +#define nd_opts_pi nd_opt_each.pi_beg +#define nd_opts_pi_end nd_opt_each.pi_end +#define nd_opts_rh nd_opt_each.rh +#define nd_opts_mtu nd_opt_each.mtu +#define nd_opts_search nd_opt_each.search +#define nd_opts_last nd_opt_each.last +#define nd_opts_done nd_opt_each.done /* XXX: need nd6_var.h?? */ /* nd6.c */ +extern int nd6_sched_timeout_want; +extern void nd6_sched_timeout(struct timeval *, struct timeval *); extern void nd6_init(void); +extern void nd6_ifreset(struct ifnet *); extern int nd6_ifattach(struct ifnet *); extern int nd6_is_addr_neighbor(struct sockaddr_in6 *, struct ifnet *, int); extern void nd6_option_init(void *, int, union nd_opts *); @@ -778,7 +796,6 @@ extern struct nd_opt_hdr *nd6_option(union nd_opts *); extern int nd6_options(union nd_opts *); extern struct rtentry *nd6_lookup(struct in6_addr *, int, struct ifnet *, int); extern void nd6_setmtu(struct ifnet *); -extern void nd6_timer(void *); extern void nd6_purge(struct ifnet *); extern void nd6_free(struct rtentry *); extern void nd6_nud_hint(struct rtentry *, struct in6_addr *, int); @@ -794,8 +811,10 @@ extern int nd6_storelladdr(struct ifnet *, struct rtentry *, struct mbuf *, struct sockaddr *, u_char *); extern int nd6_need_cache(struct ifnet *); extern void nd6_drain(void *); -extern void nd6_post_msg(u_int32_t, struct nd_prefix_list *, u_int32_t, u_int32_t, char *, u_int32_t); +extern void nd6_post_msg(u_int32_t, struct nd_prefix_list *, u_int32_t, + u_int32_t, char *, u_int32_t); extern int nd6_setifinfo(struct ifnet *, u_int32_t, u_int32_t); +extern void ln_setexpire(struct llinfo_nd6 *, uint64_t); /* nd6_nbr.c */ extern void nd6_nbr_init(void); @@ -808,7 +827,7 @@ extern void nd6_ns_output(struct ifnet *, const struct in6_addr *, extern caddr_t nd6_ifptomac(struct ifnet *); extern void nd6_dad_start(struct ifaddr *, int *); extern void nd6_dad_stop(struct ifaddr *); -extern void nd6_dad_duplicated(struct ifaddr *, boolean_t); +extern void nd6_dad_duplicated(struct ifaddr *); extern void nd6_llreach_alloc(struct rtentry *, struct ifnet *, void *, unsigned int, boolean_t); extern void nd6_llreach_set_reachable(struct ifnet *, void *, unsigned int); @@ -844,11 +863,13 @@ extern struct nd_prefix *nd6_prefix_lookup(struct nd_prefix *); extern int in6_init_prefix_ltimes(struct nd_prefix *ndpr); extern void rt6_flush(struct in6_addr *, struct ifnet *); extern int nd6_setdefaultiface(int); -extern int in6_tmpifadd(const struct in6_ifaddr *, int, int); +extern int in6_tmpifadd(const struct in6_ifaddr *, int); extern void nddr_addref(struct nd_defrouter *, int); extern struct nd_defrouter *nddr_remref(struct nd_defrouter *, int); +extern uint64_t nddr_getexpire(struct nd_defrouter *); extern void ndpr_addref(struct nd_prefix *, int); extern struct nd_prefix *ndpr_remref(struct nd_prefix *, int); +extern uint64_t ndpr_getexpire(struct nd_prefix *); /* nd6_prproxy.c */ struct ip6_hdr; @@ -857,47 +878,65 @@ extern void nd6_prproxy_init(void); extern int nd6_if_prproxy(struct ifnet *, boolean_t); extern void nd6_prproxy_prelist_update(struct nd_prefix *, struct nd_prefix *); extern boolean_t nd6_prproxy_ifaddr(struct in6_ifaddr *); +extern void nd6_proxy_find_fwdroute(struct ifnet *, struct route_in6 *); extern boolean_t nd6_prproxy_isours(struct mbuf *, struct ip6_hdr *, struct route_in6 *, unsigned int); -extern void nd6_prproxy_ns_output(struct ifnet *, struct in6_addr *, - struct in6_addr *, struct llinfo_nd6 *); +extern void nd6_prproxy_ns_output(struct ifnet *, struct ifnet *, + struct in6_addr *, struct in6_addr *, struct llinfo_nd6 *); extern void nd6_prproxy_ns_input(struct ifnet *, struct in6_addr *, char *, int, struct in6_addr *, struct in6_addr *); extern void nd6_prproxy_na_input(struct ifnet *, struct in6_addr *, struct in6_addr *, struct in6_addr *, int); extern void nd6_prproxy_sols_reap(struct nd_prefix *); extern void nd6_prproxy_sols_prune(struct nd_prefix *, u_int32_t); -#endif /* XNU_KERNEL_PRIVATE */ +extern int nd6_if_disable(struct ifnet *, boolean_t); +#endif /* BSD_KERNEL_PRIVATE */ #ifdef KERNEL -/*! - @function nd6_lookup_ipv6 - @discussion This function will check the routing table for a cached - neighbor discovery entry or trigger an neighbor discovery query - to resolve the IPv6 address to a link-layer address. - - nd entries are stored in the routing table. This function will - lookup the IPv6 destination in the routing table. If the - destination requires forwarding to a gateway, the route of the - gateway will be looked up. The route entry is inspected to - determine if the link layer destination address is known. If - unknown, neighbor discovery will be used to resolve the entry. - @param interface The interface the packet is being sent on. - @param ip6_dest The IPv6 destination of the packet. - @param ll_dest On output, the link-layer destination. - @param ll_dest_len The length of the buffer for ll_dest. - @param hint Any routing hint passed down from the protocol. - @param packet The packet being transmitted. - @result May return an error such as EHOSTDOWN or ENETUNREACH. If - this function returns EJUSTRETURN, the packet has been queued - and will be sent when the address is resolved. If any other - value is returned, the caller is responsible for disposing of - the packet. +/* + * @function nd6_lookup_ipv6 + * @discussion This function will check the routing table for a cached + * neighbor discovery entry or trigger an neighbor discovery query + * to resolve the IPv6 address to a link-layer address. + * nd entries are stored in the routing table. This function will + * lookup the IPv6 destination in the routing table. If the + * destination requires forwarding to a gateway, the route of the + * gateway will be looked up. The route entry is inspected to + * determine if the link layer destination address is known. If + * unknown, neighbor discovery will be used to resolve the entry. + * @param interface The interface the packet is being sent on. + * @param ip6_dest The IPv6 destination of the packet. + * @param ll_dest On output, the link-layer destination. + * @param ll_dest_len The length of the buffer for ll_dest. + * @param hint Any routing hint passed down from the protocol. + * @param packet The packet being transmitted. + * @result May return an error such as EHOSTDOWN or ENETUNREACH. If + * this function returns EJUSTRETURN, the packet has been queued + * and will be sent when the address is resolved. If any other + * value is returned, the caller is responsible for disposing of + * the packet. */ extern errno_t nd6_lookup_ipv6(ifnet_t interface, const struct sockaddr_in6 *ip6_dest, struct sockaddr_dl *ll_dest, size_t ll_dest_len, route_t hint, mbuf_t packet); #endif /* KERNEL */ + +/* nd6_send.c */ +#ifdef BSD_KERNEL_PRIVATE +/* + * nd6_send_opmode + * + * value using CGA tx SEND rx SEND + * -------- --------- ------- ------- + * DISABLED NO NO NO + * QUIET YES NO NO + */ +extern int nd6_send_opstate; + +#define ND6_SEND_OPMODE_DISABLED 0 +#define ND6_SEND_OPMODE_CGA_QUIET 1 + +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET6_ND6_H_ */ diff --git a/bsd/netinet6/nd6_nbr.c b/bsd/netinet6/nd6_nbr.c index 53d5367cf..fa8125a07 100644 --- a/bsd/netinet6/nd6_nbr.c +++ b/bsd/netinet6/nd6_nbr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,8 +25,6 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* $FreeBSD: src/sys/netinet6/nd6_nbr.c,v 1.4.2.4 2001/07/06 05:32:25 sumikawa Exp $ */ -/* $KAME: nd6_nbr.c,v 1.64 2001/05/17 03:48:30 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -85,6 +83,7 @@ #include #include #include +#include #include #include #include @@ -99,21 +98,19 @@ extern int ipsec_bypass; #endif -#include - struct dadq; static struct dadq *nd6_dad_find(struct ifaddr *); void nd6_dad_stoptimer(struct ifaddr *); static void nd6_dad_timer(struct ifaddr *); static void nd6_dad_ns_output(struct dadq *, struct ifaddr *); -static void nd6_dad_ns_input(struct ifaddr *); -static void nd6_dad_na_input(struct ifaddr *, caddr_t, int); +static void nd6_dad_ns_input(struct mbuf *, struct ifaddr *); +static struct mbuf *nd6_dad_na_input(struct mbuf *, struct ifnet *, + struct in6_addr *, caddr_t, int); static void dad_addref(struct dadq *, int); static void dad_remref(struct dadq *); static struct dadq *nd6_dad_attach(struct dadq *, struct ifaddr *); static void nd6_dad_detach(struct dadq *, struct ifaddr *); -static int dad_ignore_ns = 0; /* ignore NS in DAD - specwise incorrect*/ static int dad_maxtry = 15; /* max # of *tries* to transmit DAD packet */ static unsigned int dad_size; /* size of zone element */ @@ -153,7 +150,6 @@ static struct zone *dad_zone; /* zone for dadq */ extern lck_mtx_t *dad6_mutex; extern lck_mtx_t *nd6_mutex; -extern int in6_get_hw_ifid(struct ifnet *, struct in6_addr *); static int nd6_llreach_base = (LL_BASE_REACHABLE / 1000); /* seconds */ @@ -178,7 +174,7 @@ nd6_llreach_alloc(struct rtentry *rt, struct ifnet *ifp, void *addr, if (nd6_llreach_base != 0 && (ln->ln_expire != 0 || (ifp->if_eflags & IFEF_IPV6_ND6ALT) != 0) && - rt->rt_ifp != lo_ifp && + !(rt->rt_ifp->if_flags & IFF_LOOPBACK) && ifp->if_addrlen == IF_LLREACH_MAXLEN && /* Ethernet */ alen == ifp->if_addrlen) { struct if_llreach *lr; @@ -226,8 +222,8 @@ nd6_llreach_alloc(struct rtentry *rt, struct ifnet *ifp, void *addr, if (nd6_debug && lr != NULL && why != NULL) { char tmp[MAX_IPv6_STR_LEN]; - nd6log((LOG_DEBUG, "%s%d: %s%s for %s\n", ifp->if_name, - ifp->if_unit, type, why, inet_ntop(AF_INET6, + nd6log((LOG_DEBUG, "%s: %s%s for %s\n", if_name(ifp), + type, why, inet_ntop(AF_INET6, &SIN6(rt_key(rt))->sin6_addr, tmp, sizeof (tmp)))); } } @@ -243,8 +239,8 @@ nd6_llreach_use(struct llinfo_nd6 *ln) /* * Input a Neighbor Solicitation Message. * - * Based on RFC 2461 - * Based on RFC 2462 (duplicate address detection) + * Based on RFC 4861 + * Based on RFC 4862 (duplicate address detection) */ void nd6_ns_input( @@ -267,6 +263,7 @@ nd6_ns_input( union nd_opts ndopts; struct sockaddr_dl proxydl; boolean_t advrouter; + boolean_t is_dad_probe; if ((ifp->if_eflags & IFEF_IPV6_ND6ALT) != 0) { nd6log((LOG_INFO, "nd6_ns_input: on ND6ALT interface!\n")); @@ -286,6 +283,8 @@ nd6_ns_input( return; } #endif + m->m_pkthdr.pkt_flags |= PKTF_INET6_RESOLVE; + ip6 = mtod(m, struct ip6_hdr *); /* adjust pointer for safety */ taddr6 = nd_ns->nd_ns_target; if (in6_setscope(&taddr6, ifp, NULL) != 0) @@ -299,7 +298,8 @@ nd6_ns_input( goto bad; } - if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) { + is_dad_probe = IN6_IS_ADDR_UNSPECIFIED(&saddr6); + if (is_dad_probe) { /* dst has to be a solicited node multicast address. */ if (daddr6.s6_addr16[0] == IPV6_ADDR_INT16_MLL && /* don't check ifindex portion */ @@ -351,7 +351,7 @@ nd6_ns_input( lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3; } - if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && lladdr) { + if (is_dad_probe && lladdr) { nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet " "(link-layer address option)\n")); goto bad; @@ -468,7 +468,10 @@ nd6_ns_input( * unicast somebody is doing address resolution -> ignore * unspec dup address detection * - * The processing is defined in RFC 2462 (and updated by RFC 4429) + * The processing is defined in the "draft standard" RFC 4862 (and by + * RFC 4429, which is a "proposed standard" update to its obsolete + * predecessor, RFC 2462) The reason optimistic DAD is not included + * in RFC 4862 is entirely due to IETF procedural considerations. */ if (dadprogress) { /* @@ -478,8 +481,8 @@ nd6_ns_input( * If not, the packet is for addess resolution; * silently ignore it. */ - if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) - nd6_dad_ns_input(ifa); + if (is_dad_probe) + nd6_dad_ns_input(m, ifa); goto freeit; } @@ -496,7 +499,7 @@ nd6_ns_input( * the address. * S bit ("solicited") must be zero. */ - if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) { + if (is_dad_probe) { saddr6 = in6addr_linklocal_allnodes; if (in6_setscope(&saddr6, ifp, NULL) != 0) goto bad; @@ -538,9 +541,9 @@ nd6_ns_input( * - ND6 header target IP6 address * - ND6 header source datalink address * - * Based on RFC 2461 - * Based on RFC 2462 (duplicate address detection) - * Updated by RFC 4429 (optimistic duplicate address detection) + * Based on RFC 4861 + * Based on RFC 4862 (duplicate address detection) + * Based on RFC 4429 (optimistic duplicate address detection) * * Caller must bump up ln->ln_rt refcnt to make sure 'ln' doesn't go * away if there is a llinfo_nd6 passed in. @@ -565,8 +568,8 @@ nd6_ns_output( int flags; caddr_t mac; struct route_in6 ro; - struct ip6_out_args ip6oa = - { IFSCOPE_NONE, { 0 }, IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR }; + struct ip6_out_args ip6oa = { IFSCOPE_NONE, { 0 }, + IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR, 0 }; u_int32_t rtflags = 0; if ((ifp->if_eflags & IFEF_IPV6_ND6ALT) || IN6_IS_ADDR_MULTICAST(taddr6)) @@ -681,10 +684,6 @@ nd6_ns_output( rtflags = ln->ln_rt->rt_flags; RT_UNLOCK(ln->ln_rt); } - if (ia != NULL) { - IFA_REMREF(&ia->ia_ifa); - ia = NULL; - } if (hsrc != NULL && (ia = in6ifa_ifpwithaddr(ifp, hsrc)) && (ia->ia6_flags & IN6_IFF_OPTIMISTIC) == 0) { src = hsrc; @@ -709,6 +708,11 @@ nd6_ns_output( goto bad; } + if (ia != NULL) { + IFA_REMREF(&ia->ia_ifa); + ia = NULL; + } + ia = in6ifa_ifpwithaddr(ifp, src); if (!ia || (ia->ia6_flags & IN6_IFF_OPTIMISTIC)) { nd6log((LOG_DEBUG, @@ -773,23 +777,28 @@ nd6_ns_output( #if IPSEC /* Don't lookup socket */ if (ipsec_bypass == 0) - (void)ipsec_setsocket(m, NULL); + (void) ipsec_setsocket(m, NULL); #endif flags = dad ? IPV6_UNSPECSRC : 0; flags |= IPV6_OUTARGS; + /* + * PKTF_{INET,INET6}_RESOLVE_RTR are mutually exclusive, so make + * sure only one of them is set (just in case.) + */ + m->m_pkthdr.pkt_flags &= ~(PKTF_INET_RESOLVE | PKTF_RESOLVE_RTR); + m->m_pkthdr.pkt_flags |= PKTF_INET6_RESOLVE; /* * If this is a NS for resolving the (default) router, mark * the packet accordingly so that the driver can find out, * in case it needs to perform driver-specific action(s). */ - if (rtflags & RTF_ROUTER) { - m->m_pkthdr.aux_flags |= MAUXF_INET6_RESOLVE_RTR; - VERIFY(!(m->m_pkthdr.aux_flags & MAUXF_INET_RESOLVE_RTR)); - } + if (rtflags & RTF_ROUTER) + m->m_pkthdr.pkt_flags |= PKTF_RESOLVE_RTR; if (ifp->if_eflags & IFEF_TXSTART) { - /* Use control service class if the interface + /* + * Use control service class if the interface * supports transmit-start model */ (void) m_set_service_class(m, MBUF_SC_CTL); @@ -803,42 +812,33 @@ nd6_ns_output( } icmp6stat.icp6s_outhist[ND_NEIGHBOR_SOLICIT]++; +exit: if (im6o != NULL) IM6O_REMREF(im6o); - if (ro.ro_rt) { /* we don't cache this route. */ - rtfree(ro.ro_rt); - } + + ROUTE_RELEASE(&ro); /* we don't cache this route. */ + if (ia != NULL) IFA_REMREF(&ia->ia_ifa); return; bad: - if (im6o != NULL) - IM6O_REMREF(im6o); - if (ro.ro_rt) { - rtfree(ro.ro_rt); - } m_freem(m); - if (ia != NULL) - IFA_REMREF(&ia->ia_ifa); - return; + goto exit; } /* * Neighbor advertisement input handling. * - * Based on RFC 2461 - * Based on RFC 2462 (duplicate address detection) + * Based on RFC 4861 + * Based on RFC 4862 (duplicate address detection) * * the following items are not implemented yet: - * - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD) - * - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD) + * - anycast advertisement delay rule (RFC 4861 7.2.7, SHOULD) + * - proxy advertisement delay rule (RFC 4861 7.2.8, last paragraph, "should") */ void -nd6_na_input( - struct mbuf *m, - int off, - int icmp6len) +nd6_na_input(struct mbuf *m, int off, int icmp6len) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); @@ -852,12 +852,11 @@ nd6_na_input( int is_override; char *lladdr = NULL; int lladdrlen = 0; - struct ifaddr *ifa = NULL; struct llinfo_nd6 *ln; struct rtentry *rt; struct sockaddr_dl *sdl; union nd_opts ndopts; - struct timeval timenow; + uint64_t timenow; if ((ifp->if_eflags & IFEF_IPV6_ND6ALT) != 0) { nd6log((LOG_INFO, "nd6_na_input: on ND6ALT interface!\n")); @@ -885,6 +884,7 @@ nd6_na_input( return; } #endif + m->m_pkthdr.pkt_flags |= PKTF_INET6_RESOLVE; flags = nd_na->nd_na_flags_reserved; is_router = ((flags & ND_NA_FLAG_ROUTER) != 0); @@ -920,61 +920,20 @@ nd6_na_input( if (ndopts.nd_opts_tgt_lladdr) { lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1); lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3; - } - - ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); - /* - * Target address matches one of my interface address. - * - * If my address is tentative or optimistic, this means that there's - * somebody already using the same address as mine. This indicates DAD - * failure. This is defined in RFC 2462 and updated by RFC 4429. - * - * Otherwise, process as defined in RFC 2461. - */ - if (ifa != NULL) { - IFA_LOCK(ifa); - if (((struct in6_ifaddr *)ifa)->ia6_flags & - IN6_IFF_DADPROGRESS) { - struct nd_ifinfo *ndi; - boolean_t ignorena = FALSE; - - IFA_UNLOCK(ifa); - lck_rw_lock_shared(nd_if_rwlock); - ndi = ND_IFINFO(ifp); - if (ndi != NULL && ndi->initialized) { - lck_mtx_lock(&ndi->lock); - ignorena = ndi->flags & ND6_IFF_IGNORE_NA; - lck_mtx_unlock(&ndi->lock); - } - lck_rw_done(nd_if_rwlock); - if (ignorena) - log(LOG_ERR, "%s: ignoring duplicate DAD due " - "to sleep proxy (%s)\n", __func__, - if_name(ifp)); - else - nd6_dad_na_input(ifa, lladdr, lladdrlen); - goto freeit; + if (((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { + nd6log((LOG_INFO, + "nd6_na_input: lladdrlen mismatch for %s " + "(if %d, NA packet %d)\n", + ip6_sprintf(&taddr6), ifp->if_addrlen, + lladdrlen - 2)); + goto bad; } - IFA_UNLOCK(ifa); } - /* Just for safety, maybe unnecessary. */ - if (ifa) { - log(LOG_ERR, - "nd6_na_input: duplicate IP6 address %s\n", - ip6_sprintf(&taddr6)); - goto freeit; - } - - if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { - nd6log((LOG_INFO, - "nd6_na_input: lladdrlen mismatch for %s " - "(if %d, NA packet %d)\n", - ip6_sprintf(&taddr6), ifp->if_addrlen, lladdrlen - 2)); - goto bad; - } + m = nd6_dad_na_input(m, ifp, &taddr6, lladdr, lladdrlen); + if (m == NULL) + return; /* Forwarding associated with NDPRF_PRPROXY may apply. */ if (ip6_forwarding && nd6_prproxy) @@ -1009,6 +968,13 @@ nd6_na_input( /* Change the interface when the existing route is on */ rt->rt_ifp = ifp; + + /* + * If rmx_mtu is not locked, update it + * to the MTU used by the new interface. + */ + if (!(rt->rt_rmx.rmx_locks & RTV_MTU)) + rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; } } @@ -1020,7 +986,8 @@ nd6_na_input( goto freeit; } - getmicrotime(&timenow); + timenow = net_uptime(); + if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { /* * If the link-layer has address, and no lladdr option came, @@ -1039,23 +1006,25 @@ nd6_na_input( bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen); if (is_solicited) { ln->ln_state = ND6_LLINFO_REACHABLE; - ln->ln_byhint = 0; - if (ln->ln_expire) { + if (ln->ln_expire != 0) { struct nd_ifinfo *ndi; lck_rw_lock_shared(nd_if_rwlock); ndi = ND_IFINFO(rt->rt_ifp); VERIFY(ndi != NULL && ndi->initialized); lck_mtx_lock(&ndi->lock); - ln->ln_expire = rt_expiry(rt, timenow.tv_sec, - ndi->reachable); + ln_setexpire(ln, timenow + ndi->reachable); lck_mtx_unlock(&ndi->lock); lck_rw_done(nd_if_rwlock); + RT_UNLOCK(rt); + lck_mtx_lock(rnh_lock); + nd6_sched_timeout(NULL, NULL); + lck_mtx_unlock(rnh_lock); + RT_LOCK(rt); } } else { ln->ln_state = ND6_LLINFO_STALE; - ln->ln_expire = rt_expiry(rt, timenow.tv_sec, - nd6_gctimer); + ln_setexpire(ln, timenow + nd6_gctimer); } if ((ln->ln_router = is_router) != 0) { /* @@ -1113,8 +1082,7 @@ nd6_na_input( */ if (ln->ln_state == ND6_LLINFO_REACHABLE) { ln->ln_state = ND6_LLINFO_STALE; - ln->ln_expire = rt_expiry(rt, timenow.tv_sec, - nd6_gctimer); + ln_setexpire(ln, timenow + nd6_gctimer); } RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); @@ -1137,25 +1105,27 @@ nd6_na_input( */ if (is_solicited) { ln->ln_state = ND6_LLINFO_REACHABLE; - ln->ln_byhint = 0; - if (ln->ln_expire) { + if (ln->ln_expire != 0) { struct nd_ifinfo *ndi; lck_rw_lock_shared(nd_if_rwlock); ndi = ND_IFINFO(ifp); VERIFY(ndi != NULL && ndi->initialized); lck_mtx_lock(&ndi->lock); - ln->ln_expire = - rt_expiry(rt, timenow.tv_sec, - ndi->reachable); + ln_setexpire(ln, + timenow + ndi->reachable); lck_mtx_unlock(&ndi->lock); lck_rw_done(nd_if_rwlock); + RT_UNLOCK(rt); + lck_mtx_lock(rnh_lock); + nd6_sched_timeout(NULL, NULL); + lck_mtx_unlock(rnh_lock); + RT_LOCK(rt); } } else { if (lladdr && llchange) { ln->ln_state = ND6_LLINFO_STALE; - ln->ln_expire = rt_expiry(rt, - timenow.tv_sec, nd6_gctimer); + ln_setexpire(ln, timenow + nd6_gctimer); } } } @@ -1233,17 +1203,12 @@ nd6_na_input( RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); -freeit: - m_freem(m); - if (ifa != NULL) - IFA_REMREF(ifa); - return; - bad: icmp6stat.icp6s_badna++; + /* fall through */ + +freeit: m_freem(m); - if (ifa != NULL) - IFA_REMREF(ifa); } /* @@ -1278,8 +1243,8 @@ nd6_na_output( struct sockaddr_in6 dst_sa; int icmp6len, maxlen, error; struct ifnet *outif = NULL; - struct ip6_out_args ip6oa = - { IFSCOPE_NONE, { 0 }, IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR }; + struct ip6_out_args ip6oa = { IFSCOPE_NONE, { 0 }, + IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_SRCADDR, 0 }; bzero(&ro, sizeof(ro)); @@ -1433,8 +1398,9 @@ nd6_na_output( #if IPSEC /* Don't lookup socket */ if (ipsec_bypass == 0) - (void)ipsec_setsocket(m, NULL); + (void) ipsec_setsocket(m, NULL); #endif + m->m_pkthdr.pkt_flags |= PKTF_INET6_RESOLVE; if (ifp->if_eflags & IFEF_TXSTART) { /* Use control service class if the interface supports @@ -1451,21 +1417,16 @@ nd6_na_output( } icmp6stat.icp6s_outhist[ND_NEIGHBOR_ADVERT]++; +exit: if (im6o != NULL) IM6O_REMREF(im6o); - if (ro.ro_rt) { - rtfree(ro.ro_rt); - } + + ROUTE_RELEASE(&ro); return; bad: - if (im6o != NULL) - IM6O_REMREF(im6o); - if (ro.ro_rt) { - rtfree(ro.ro_rt); - } m_freem(m); - return; + goto exit; } caddr_t @@ -1489,7 +1450,7 @@ nd6_ifptomac( #endif case IFT_BRIDGE: case IFT_ISO88025: - return ((caddr_t)ifnet_lladdr(ifp)); + return ((caddr_t)IF_LLADDR(ifp)); default: return NULL; } @@ -1507,7 +1468,7 @@ struct dadq { int dad_ns_ocount; /* NS sent so far */ int dad_ns_icount; int dad_na_icount; - int dad_na_ixcount; /* Count of IFDISABLED eligible NA rx'd */ + int dad_nd_ixcount; /* Count of IFDISABLED eligible ND rx'd */ }; static struct dadq_head dadq; @@ -1671,7 +1632,7 @@ nd6_dad_attach(struct dadq *dp, struct ifaddr *ifa) dp->dad_count = ip6_dad_count; dp->dad_ns_icount = dp->dad_na_icount = 0; dp->dad_ns_ocount = dp->dad_ns_tcount = 0; - dp->dad_na_ixcount = 0; + dp->dad_nd_ixcount = 0; VERIFY(!dp->dad_attached); dp->dad_attached = 1; DAD_ADDREF_LOCKED(dp); /* for caller */ @@ -1822,8 +1783,10 @@ nd6_dad_timer(struct ifaddr *ifa) * See what we've got. */ int duplicate; + boolean_t candisable; duplicate = 0; + candisable = dp->dad_nd_ixcount > 0; if (dp->dad_na_icount) { /* @@ -1840,8 +1803,12 @@ nd6_dad_timer(struct ifaddr *ifa) DAD_UNLOCK(dp); if (duplicate) { + nd6log((LOG_INFO, + "%s: duplicate IPv6 address %s [timer]\n", + __func__, ip6_sprintf(&ia->ia_addr.sin6_addr), + if_name(ia->ia_ifp))); + nd6_dad_duplicated(ifa); /* (*dp) will be freed in nd6_dad_duplicated() */ - nd6_dad_duplicated(ifa, TRUE); } else { /* * We are done with DAD. No NA came, no NS came. @@ -1872,53 +1839,53 @@ done: } void -nd6_dad_duplicated(struct ifaddr *ifa, boolean_t dontignhwdup) +nd6_dad_duplicated(struct ifaddr *ifa) { struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; struct dadq *dp; struct ifnet *ifp = ifa->ifa_ifp; - int hwdupposs; + boolean_t disable; dp = nd6_dad_find(ifa); if (dp == NULL) { - log(LOG_ERR, "nd6_dad_duplicated: DAD structure not found\n"); + log(LOG_ERR, "%s: DAD structure not found.\n", __func__); return; } - hwdupposs = 0; IFA_LOCK(&ia->ia_ifa); DAD_LOCK(dp); - log(LOG_ERR, "%s: DAD detected duplicate IPv6 address %s: " - "NS in/out=%d/%d, NA in=%d inx=%d\n", - if_name(ifp), ip6_sprintf(&ia->ia_addr.sin6_addr), - dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_na_icount, - dp->dad_na_ixcount); - hwdupposs = dp->dad_na_ixcount; + nd6log((LOG_ERR, "%s: NS in/out=%d/%d, NA in=%d, ND x=%d\n", + __func__, dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_na_icount, + dp->dad_nd_ixcount)); + disable = dp->dad_nd_ixcount > 0; DAD_UNLOCK(dp); ia->ia6_flags &= ~IN6_IFF_DADPROGRESS; ia->ia6_flags |= IN6_IFF_DUPLICATED; IFA_UNLOCK(&ia->ia_ifa); + /* increment DAD collision counter */ + ++ip6stat.ip6s_dad_collide; + /* We are done with DAD, with duplicated address found. (failure) */ untimeout((void (*)(void *))nd6_dad_timer, (void *)ifa); IFA_LOCK(&ia->ia_ifa); - log(LOG_ERR, "%s: DAD complete for %s - duplicate found\n", + log(LOG_ERR, "%s: DAD complete for %s - duplicate found.\n", if_name(ifp), ip6_sprintf(&ia->ia_addr.sin6_addr)); - log(LOG_ERR, "%s: manual intervention required\n", - if_name(ifp)); IFA_UNLOCK(&ia->ia_ifa); - - if (hwdupposs || - (dontignhwdup && IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr))) { + + if (disable) { log(LOG_ERR, "%s: possible hardware address duplication " - "detected, disable IPv6\n", if_name(ifp)); - + "detected, disabling IPv6 for interface.\n", if_name(ifp)); + lck_rw_lock_shared(nd_if_rwlock); - nd_ifinfo[ifp->if_index].flags |= - ND6_IFF_IFDISABLED; + nd_ifinfo[ifp->if_index].flags |= ND6_IFF_IFDISABLED; lck_rw_done(nd_if_rwlock); + /* Make sure to set IFEF_IPV6_DISABLED too */ + nd6_if_disable(ifp, TRUE); } + log(LOG_ERR, "%s: manual intervention required!\n", if_name(ifp)); + /* Send an event to the configuration agent so that the * duplicate address will be notified to the user and will * be removed. @@ -1955,76 +1922,125 @@ nd6_dad_ns_output(struct dadq *dp, struct ifaddr *ifa) } static void -nd6_dad_ns_input(struct ifaddr *ifa) +nd6_dad_ns_input(struct mbuf *m, struct ifaddr *ifa) { struct dadq *dp; - int duplicate; - struct ifnet *ifp; - - if (ifa == NULL) - panic("ifa == NULL in nd6_dad_ns_input"); - - ifp = ifa->ifa_ifp; - duplicate = 0; - dp = nd6_dad_find(ifa); + struct in6_ifaddr *ia; + boolean_t candisable, dadstarted; - /* Quickhack - completely ignore DAD NS packets */ - if (dad_ignore_ns) { - struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; - IFA_LOCK(&ia->ia_ifa); - nd6log((LOG_INFO, - "nd6_dad_ns_input: ignoring DAD NS packet for " - "address %s(%s)\n", ip6_sprintf(&ia->ia_addr.sin6_addr), - if_name(ifa->ifa_ifp))); - IFA_UNLOCK(&ia->ia_ifa); - return; + VERIFY(ifa != NULL); + candisable = FALSE; + IFA_LOCK(ifa); + ia = (struct in6_ifaddr *) ifa; + if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) { + struct ip6aux *ip6a; + + candisable = TRUE; + ip6a = ip6_findaux(m); + + if (ip6a && (ip6a->ip6a_flags & IP6A_HASEEN) != 0) { + struct in6_addr in6 = ia->ia_addr.sin6_addr; + + nd6log((LOG_INFO, + "%s: eh_src=%02x:%02x:%02x:%02x:%02x:%02x -> %s\n", + __func__, + ip6a->ip6a_ehsrc[0], ip6a->ip6a_ehsrc[1], + ip6a->ip6a_ehsrc[2], ip6a->ip6a_ehsrc[3], + ip6a->ip6a_ehsrc[4], ip6a->ip6a_ehsrc[5], + if_name(ifa->ifa_ifp))); + + in6.s6_addr8[8] = ip6a->ip6a_ehsrc[0] ^ ND6_EUI64_UBIT; + in6.s6_addr8[9] = ip6a->ip6a_ehsrc[1]; + in6.s6_addr8[10] = ip6a->ip6a_ehsrc[2]; + in6.s6_addr8[11] = 0xff; + in6.s6_addr8[12] = 0xfe; + in6.s6_addr8[13] = ip6a->ip6a_ehsrc[3]; + in6.s6_addr8[14] = ip6a->ip6a_ehsrc[4]; + in6.s6_addr8[15] = ip6a->ip6a_ehsrc[5]; + + if (!IN6_ARE_ADDR_EQUAL(&in6, &ia->ia_addr.sin6_addr)) { + nd6log((LOG_ERR, "%s: DAD NS for %s on %s " + "is from another MAC address.\n", __func__, + ip6_sprintf(&ia->ia_addr.sin6_addr), + if_name(ifa->ifa_ifp))); + candisable = FALSE; + } + } else { + nd6log((LOG_INFO, + "%s: no eh_src for DAD NS %s at %s.\n", __func__, + ip6_sprintf(&ia->ia_addr.sin6_addr), + if_name(ifa->ifa_ifp))); + } } + IFA_UNLOCK(ifa); - /* - * if I'm yet to start DAD, someone else started using this address - * first. I have a duplicate and you win. + /* If DAD has not yet started, then this DAD NS probe is proof that + * another node has started first. Otherwise, it could be a multicast + * loopback, in which case it should be counted and handled later in + * the DAD timer callback. */ - if (dp != NULL) + dadstarted = FALSE; + dp = nd6_dad_find(ifa); + if (dp != NULL) { DAD_LOCK(dp); - if (dp == NULL || dp->dad_ns_ocount == 0) - duplicate++; - - /* XXX more checks for loopback situation - see nd6_dad_timer too */ - - if (duplicate) { - if (dp != NULL) { - DAD_UNLOCK(dp); - DAD_REMREF(dp); - dp = NULL; - } - nd6_dad_duplicated(ifa, TRUE); - } else if (dp != NULL) { - /* - * not sure if I got a duplicate. - * increment ns count and see what happens. - */ - dp->dad_ns_icount++; + ++dp->dad_ns_icount; + if (candisable) + ++dp->dad_nd_ixcount; + if (dp->dad_ns_ocount > 0) + dadstarted = TRUE; DAD_UNLOCK(dp); DAD_REMREF(dp); + dp = NULL; + } + + nd6log((LOG_INFO, "%s: dadstarted=%d candisable=%d\n", + __func__, dadstarted, candisable)); + + if (!dadstarted) { + nd6log((LOG_INFO, + "%s: duplicate IPv6 address %s [processing NS on %s]\n", + __func__, ip6_sprintf(&ia->ia_addr.sin6_addr), + if_name(ifa->ifa_ifp))); + nd6_dad_duplicated(ifa); } } -static void -nd6_dad_na_input(struct ifaddr *ifa, caddr_t lladdr, int lladdrlen) +static struct mbuf * +nd6_dad_na_input(struct mbuf *m, struct ifnet *ifp, struct in6_addr *taddr, + caddr_t lladdr, int lladdrlen) { - struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; + struct ifaddr *ifa; + struct in6_ifaddr *ia; struct dadq *dp; - int hwdupposs; + struct nd_ifinfo *ndi; + boolean_t candisable, ignoring; + ifa = (struct ifaddr *) in6ifa_ifpwithaddr(ifp, taddr); if (ifa == NULL) - panic("ifa == NULL in nd6_dad_na_input"); + return m; - dp = nd6_dad_find(ifa); - if (dp == NULL) { - log(LOG_ERR, "nd6_dad_na_input: DAD structure not found\n"); - return; + candisable = FALSE; + ignoring = FALSE; + + /* The ND6_IFF_IGNORE_NA flag is here for legacy reasons. */ + lck_rw_lock_shared(nd_if_rwlock); + ndi = ND_IFINFO(ifp); + if (ndi != NULL && ndi->initialized) { + lck_mtx_lock(&ndi->lock); + ignoring = !!(ndi->flags & ND6_IFF_IGNORE_NA); + lck_mtx_unlock(&ndi->lock); } - + lck_rw_done(nd_if_rwlock); + if (ignoring) { + nd6log((LOG_INFO, "%s: ignoring duplicate NA on " + "%s [ND6_IFF_IGNORE_NA]\n", __func__, if_name(ifp))); + goto done; + } + + /* Lock the interface address until done (see label below). */ + IFA_LOCK(ifa); + ia = (struct in6_ifaddr *) ifa; + /* * If the address is a link-local address formed from an interface * identifier based on the hardware address which is supposed to be @@ -2034,15 +2050,37 @@ nd6_dad_na_input(struct ifaddr *ifa, caddr_t lladdr, int lladdrlen) * hardware address is not also ours, which is a transitory possibility * in the presence of network-resident sleep proxies on the local link. */ - hwdupposs = 0; - IFA_LOCK(ifa); - if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) { - struct ifnet *ifp; - struct in6_addr in6; - + + if (!(ia->ia6_flags & IN6_IFF_DADPROGRESS)) { IFA_UNLOCK(ifa); - ifp = ifa->ifa_ifp; - + nd6log((LOG_INFO, "%s: ignoring duplicate NA on " + "%s [DAD not in progress]\n", __func__, + if_name(ifp))); + goto done; + } + + /* Some sleep proxies improperly send the client's Ethernet address in + * the target link-layer address option, so detect this by comparing + * the L2-header source address, if we have seen it, with the target + * address, and ignoring the NA if they don't match. + */ + if (lladdr != NULL && lladdrlen == ETHER_ADDR_LEN) { + struct ip6aux *ip6a = ip6_findaux(m); + if (ip6a && (ip6a->ip6a_flags & IP6A_HASEEN) != 0 && + bcmp(ip6a->ip6a_ehsrc, lladdr, ETHER_ADDR_LEN) != 0) { + IFA_UNLOCK(ifa); + nd6log((LOG_ERR, "%s: ignoring duplicate NA on %s " + "[eh_src != tgtlladdr]\n", __func__, if_name(ifp))); + goto done; + } + } + + IFA_UNLOCK(ifa); + + if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr) && + !(ia->ia6_flags & IN6_IFF_SECURED)) { + struct in6_addr in6; + /* * To avoid over-reaction, we only apply this logic when we are * very sure that hardware addresses are supposed to be unique. @@ -2060,27 +2098,29 @@ nd6_dad_na_input(struct ifaddr *ifa, caddr_t lladdr, int lladdrlen) if (lladdr != NULL && lladdrlen > 0) { struct ifaddr *llifa; struct sockaddr_dl *sdl; - + llifa = ifp->if_lladdr; IFA_LOCK(llifa); sdl = (struct sockaddr_dl *)(void *) llifa->ifa_addr; - if (lladdrlen == sdl->sdl_alen || + if (lladdrlen == sdl->sdl_alen && bcmp(lladdr, LLADDR(sdl), lladdrlen) == 0) - hwdupposs = 1; + candisable = TRUE; IFA_UNLOCK(llifa); } in6 = ia->ia_addr.sin6_addr; - if (in6_get_hw_ifid(ifp, &in6) != 0) + if (in6_iid_from_hw(ifp, &in6) != 0) break; - /* - * Apply this logic only to the EUI-64 form of - * link-local interface identifiers. - */ + + /* Refine decision about whether IPv6 can be disabled */ IFA_LOCK(ifa); - if (hwdupposs && + if (candisable && !IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &in6)) { - hwdupposs = 0; + /* + * Apply this logic only to the embedded MAC + * address form of link-local IPv6 address. + */ + candisable = FALSE; } else if (lladdr == NULL && IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &in6)) { /* @@ -2089,26 +2129,40 @@ nd6_dad_na_input(struct ifaddr *ifa, caddr_t lladdr, int lladdrlen) * has our address. Mark it as a hardware * duplicate so we disable IPv6 later on. */ - hwdupposs = 1; + candisable = TRUE; } IFA_UNLOCK(ifa); break; default: break; } - } else { - IFA_UNLOCK(ifa); } - + + dp = nd6_dad_find(ifa); + if (dp == NULL) { + nd6log((LOG_INFO, "%s: no DAD structure for %s on %s.\n", + __func__, ip6_sprintf(taddr), if_name(ifp))); + goto done; + } + DAD_LOCK_SPIN(dp); dp->dad_na_icount++; - if (hwdupposs) - dp->dad_na_ixcount++; + if (candisable) + dp->dad_nd_ixcount++; DAD_UNLOCK(dp); DAD_REMREF(dp); - + /* remove the address. */ - nd6_dad_duplicated(ifa, FALSE); + nd6log((LOG_INFO, + "%s: duplicate IPv6 address %s [processing NA on %s]\n", __func__, + ip6_sprintf(taddr), if_name(ifp))); + nd6_dad_duplicated(ifa); + +done: + IFA_LOCK_ASSERT_NOTHELD(ifa); + IFA_REMREF(ifa); + m_freem(m); + return NULL; } static void @@ -2172,29 +2226,29 @@ nd6_alt_node_addr_decompose(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr_dl* sdl, struct sockaddr_in6 *sin6) { static const size_t EUI64_LENGTH = 8; - + VERIFY(nd6_need_cache(ifp)); VERIFY(sa); VERIFY(sdl && (void *)sa != (void *)sdl); VERIFY(sin6 && (void *)sa != (void *)sin6); - + bzero(sin6, sizeof *sin6); sin6->sin6_len = sizeof *sin6; sin6->sin6_family = AF_INET6; - + bzero(sdl, sizeof *sdl); sdl->sdl_len = sizeof *sdl; sdl->sdl_family = AF_LINK; sdl->sdl_type = ifp->if_type; sdl->sdl_index = ifp->if_index; - + switch (sa->sa_family) { case AF_INET6: { struct sockaddr_in6 *sin6a = (struct sockaddr_in6 *)(void *)sa; struct in6_addr *in6 = &sin6a->sin6_addr; - + VERIFY(sa->sa_len == sizeof *sin6); - + sdl->sdl_nlen = strlen(ifp->if_name); bcopy(ifp->if_name, sdl->sdl_data, sdl->sdl_nlen); if (in6->s6_addr[11] == 0xff && in6->s6_addr[12] == 0xfe) { @@ -2205,12 +2259,11 @@ nd6_alt_node_addr_decompose(struct ifnet *ifp, struct sockaddr *sa, LLADDR(sdl)[3] = in6->s6_addr[13]; LLADDR(sdl)[4] = in6->s6_addr[14]; LLADDR(sdl)[5] = in6->s6_addr[15]; - } - else { + } else { sdl->sdl_alen = EUI64_LENGTH; bcopy(&in6->s6_addr[8], LLADDR(sdl), EUI64_LENGTH); } - + sdl->sdl_slen = 0; break; } @@ -2218,10 +2271,10 @@ nd6_alt_node_addr_decompose(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr_dl *sdla = (struct sockaddr_dl *)(void *)sa; struct in6_addr *in6 = &sin6->sin6_addr; caddr_t lla = LLADDR(sdla); - + VERIFY(sa->sa_len <= sizeof *sdl); bcopy(sa, sdl, sa->sa_len); - + sin6->sin6_scope_id = sdla->sdl_index; if (sin6->sin6_scope_id == 0) sin6->sin6_scope_id = ifp->if_index; @@ -2231,7 +2284,7 @@ nd6_alt_node_addr_decompose(struct ifnet *ifp, struct sockaddr *sa, bcopy(lla, &in6->s6_addr[8], EUI64_LENGTH); else { VERIFY(sdla->sdl_alen == ETHER_ADDR_LEN); - + in6->s6_addr[8] = ((uint8_t) lla[0] ^ ND6_EUI64_UBIT); in6->s6_addr[9] = (uint8_t) lla[1]; in6->s6_addr[10] = (uint8_t) lla[2]; @@ -2241,7 +2294,7 @@ nd6_alt_node_addr_decompose(struct ifnet *ifp, struct sockaddr *sa, in6->s6_addr[14] = (uint8_t) lla[4]; in6->s6_addr[15] = (uint8_t) lla[5]; } - + break; } default: @@ -2258,8 +2311,8 @@ nd6_alt_node_present(struct ifnet *ifp, struct sockaddr_in6 *sin6, struct llinfo_nd6 *ln; struct if_llreach *lr; - nd6_cache_lladdr(ifp, &sin6->sin6_addr, LLADDR(sdl), - sdl->sdl_alen, ND_NEIGHBOR_ADVERT, 0); + nd6_cache_lladdr(ifp, &sin6->sin6_addr, LLADDR(sdl), sdl->sdl_alen, + ND_NEIGHBOR_ADVERT, 0); lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED); lck_mtx_lock(rnh_lock); @@ -2273,7 +2326,7 @@ nd6_alt_node_present(struct ifnet *ifp, struct sockaddr_in6 *sin6, ln = rt->rt_llinfo; ln->ln_state = ND6_LLINFO_REACHABLE; - ln->ln_expire = 0; + ln_setexpire(ln, 0); lr = ln->ln_llreach; if (lr) { @@ -2293,10 +2346,10 @@ nd6_alt_node_present(struct ifnet *ifp, struct sockaddr_in6 *sin6, if (rt == NULL) { log(LOG_ERR, "%s: failed to add/update host route to %s.\n", __func__, ip6_sprintf(&sin6->sin6_addr)); - } - else { - nd6log((LOG_DEBUG, "%s: host route to %s [lr=%p]\n", __func__, - ip6_sprintf(&sin6->sin6_addr), lr)); + } else { + nd6log((LOG_DEBUG, "%s: host route to %s [lr=0x%llx]\n", + __func__, ip6_sprintf(&sin6->sin6_addr), + (uint64_t)VM_KERNEL_ADDRPERM(lr))); } } @@ -2316,7 +2369,7 @@ nd6_alt_node_absent(struct ifnet *ifp, struct sockaddr_in6 *sin6) if (rt != NULL) { RT_LOCK(rt); - if (!(rt->rt_flags & (RTF_PINNED|RTF_CLONING|RTF_PRCLONING)) && + if (!(rt->rt_flags & (RTF_CLONING|RTF_PRCLONING)) && (rt->rt_flags & (RTF_HOST|RTF_LLINFO|RTF_WASCLONED)) == (RTF_HOST|RTF_LLINFO|RTF_WASCLONED)) { rt->rt_flags |= RTF_CONDEMNED; @@ -2327,8 +2380,7 @@ nd6_alt_node_absent(struct ifnet *ifp, struct sockaddr_in6 *sin6) (struct rtentry **)NULL); rtfree_locked(rt); - } - else { + } else { RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); } diff --git a/bsd/netinet6/nd6_prproxy.c b/bsd/netinet6/nd6_prproxy.c index 3bac47eb1..f4935bd9a 100644 --- a/bsd/netinet6/nd6_prproxy.c +++ b/bsd/netinet6/nd6_prproxy.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012 Apple Inc. All rights reserved. + * Copyright (c) 2011-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -442,7 +442,7 @@ nd6_if_prproxy(struct ifnet *ifp, boolean_t enable) } if (pr == NULL) - continue; + break; up = nd6_ndprl_alloc(M_WAITOK); if (up == NULL) { @@ -513,7 +513,7 @@ nd6_if_prproxy(struct ifnet *ifp, boolean_t enable) /* * Called from the input path to determine whether the packet is destined - * to a proxied node; if so, mark the mbuf with MAUXF_PROXY_DST so that + * to a proxied node; if so, mark the mbuf with PKTFF_PROXY_DST so that * icmp6_input() knows that this is not to be delivered to socket(s). */ boolean_t @@ -541,13 +541,11 @@ nd6_prproxy_isours(struct mbuf *m, struct ip6_hdr *ip6, struct route_in6 *ro6, if ((rt = ro6->ro_rt) != NULL) RT_LOCK(rt); - if (rt == NULL || !(rt->rt_flags & RTF_UP) || - rt->generation_id != route_generation) { - if (rt != NULL) { + if (ROUTE_UNUSABLE(ro6)) { + if (rt != NULL) RT_UNLOCK(rt); - rtfree(rt); - rt = ro6->ro_rt = NULL; - } + + ROUTE_RELEASE(ro6); /* Caller must have ensured this condition (not srcrt) */ VERIFY(IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, @@ -565,11 +563,148 @@ nd6_prproxy_isours(struct mbuf *m, struct ip6_hdr *ip6, struct route_in6 *ro6, done: if (ours) - m->m_pkthdr.aux_flags |= MAUXF_PROXY_DST; + m->m_pkthdr.pkt_flags |= PKTF_PROXY_DST; return (ours); } +/* + * Called from the input path to determine whether or not the proxy + * route entry is pointing to the correct interface, and to perform + * the necessary route fixups otherwise. + */ +void +nd6_proxy_find_fwdroute(struct ifnet *ifp, struct route_in6 *ro6) +{ + struct in6_addr *dst6 = &ro6->ro_dst.sin6_addr; + struct ifnet *fwd_ifp = NULL; + struct nd_prefix *pr; + struct rtentry *rt; + + if ((rt = ro6->ro_rt) != NULL) { + RT_LOCK(rt); + if (!(rt->rt_flags & RTF_PROXY) || rt->rt_ifp == ifp) { + nd6log2((LOG_DEBUG, "%s: found incorrect prefix " + "proxy route for dst %s on %s\n", if_name(ifp), + ip6_sprintf(dst6), + if_name(rt->rt_ifp))); + RT_UNLOCK(rt); + /* look it up below */ + } else { + RT_UNLOCK(rt); + /* + * The route is already marked with RTF_PRPROXY and + * it isn't pointing back to the inbound interface; + * optimistically return (see notes below). + */ + return; + } + } + + /* + * Find out where we should forward this packet to, by searching + * for another interface that is proxying for the prefix. Our + * current implementation assumes that the proxied prefix is shared + * to no more than one downstream interfaces (typically a bridge + * interface). + */ + lck_mtx_lock(nd6_mutex); + for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) { + struct in6_addr pr_addr; + struct nd_prefix *fwd; + u_char pr_len; + + NDPR_LOCK(pr); + if (!(pr->ndpr_stateflags & NDPRF_ONLINK) || + !(pr->ndpr_stateflags & NDPRF_PRPROXY) || + !IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr, + dst6, &pr->ndpr_mask)) { + NDPR_UNLOCK(pr); + continue; + } + + VERIFY(!(pr->ndpr_stateflags & NDPRF_IFSCOPE)); + bcopy(&pr->ndpr_prefix.sin6_addr, &pr_addr, sizeof (pr_addr)); + pr_len = pr->ndpr_plen; + NDPR_UNLOCK(pr); + + for (fwd = nd_prefix.lh_first; fwd; fwd = fwd->ndpr_next) { + NDPR_LOCK(fwd); + if (!(fwd->ndpr_stateflags & NDPRF_ONLINK) || + fwd->ndpr_ifp == ifp || + fwd->ndpr_plen != pr_len || + !in6_are_prefix_equal(&fwd->ndpr_prefix.sin6_addr, + &pr_addr, pr_len)) { + NDPR_UNLOCK(fwd); + continue; + } + + fwd_ifp = fwd->ndpr_ifp; + NDPR_UNLOCK(fwd); + break; + } + break; + } + lck_mtx_unlock(nd6_mutex); + + lck_mtx_lock(rnh_lock); + ROUTE_RELEASE_LOCKED(ro6); + + /* + * Lookup a forwarding route; delete the route if it's incorrect, + * or return to caller if the correct one got created prior to + * our acquiring the rnh_lock. + */ + if ((rt = rtalloc1_scoped_locked(SA(&ro6->ro_dst), 0, + RTF_CLONING | RTF_PRCLONING, IFSCOPE_NONE)) != NULL) { + RT_LOCK(rt); + if (rt->rt_ifp != fwd_ifp || !(rt->rt_flags & RTF_PROXY)) { + rt->rt_flags |= RTF_CONDEMNED; + RT_UNLOCK(rt); + (void) rtrequest_locked(RTM_DELETE, rt_key(rt), + rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); + rtfree_locked(rt); + rt = NULL; + } else { + nd6log2((LOG_DEBUG, "%s: found prefix proxy route " + "for dst %s\n", if_name(rt->rt_ifp), + ip6_sprintf(dst6))); + RT_UNLOCK(rt); + ro6->ro_rt = rt; /* refcnt held by rtalloc1 */ + lck_mtx_unlock(rnh_lock); + return; + } + } + VERIFY(rt == NULL && ro6->ro_rt == NULL); + + /* + * Clone a route from the correct parent prefix route and return it. + */ + if (fwd_ifp != NULL && (rt = rtalloc1_scoped_locked(SA(&ro6->ro_dst), 1, + RTF_PRCLONING, fwd_ifp->if_index)) != NULL) { + RT_LOCK(rt); + if (!(rt->rt_flags & RTF_PROXY)) { + RT_UNLOCK(rt); + rtfree_locked(rt); + rt = NULL; + } else { + nd6log2((LOG_DEBUG, "%s: allocated prefix proxy " + "route for dst %s\n", if_name(rt->rt_ifp), + ip6_sprintf(dst6))); + RT_UNLOCK(rt); + ro6->ro_rt = rt; /* refcnt held by rtalloc1 */ + } + } + VERIFY(rt != NULL || ro6->ro_rt == NULL); + + if (fwd_ifp == NULL || rt == NULL) { + nd6log2((LOG_ERR, "%s: failed to find forwarding prefix " + "proxy entry for dst %s\n", if_name(ifp), + ip6_sprintf(dst6))); + } + lck_mtx_unlock(rnh_lock); +} + /* * Called when a prefix transitions between on-link and off-link. Perform * routing (RTF_PROXY) and interface (all-multicast) related operations on @@ -739,8 +874,8 @@ nd6_prproxy_ifaddr(struct in6_ifaddr *ia) * the original interface. */ void -nd6_prproxy_ns_output(struct ifnet *ifp, struct in6_addr *daddr, - struct in6_addr *taddr, struct llinfo_nd6 *ln) +nd6_prproxy_ns_output(struct ifnet *ifp, struct ifnet *exclifp, + struct in6_addr *daddr, struct in6_addr *taddr, struct llinfo_nd6 *ln) { SLIST_HEAD(, nd6_prproxy_prelist) ndprl_head; struct nd6_prproxy_prelist *ndprl, *ndprl_tmp; @@ -749,6 +884,21 @@ nd6_prproxy_ns_output(struct ifnet *ifp, struct in6_addr *daddr, struct in6_addr pr_addr; u_char pr_len; + /* + * Ignore excluded interface if it's the same as the original; + * we always send a NS on the original interface down below. + */ + if (exclifp != NULL && exclifp == ifp) + exclifp = NULL; + + if (exclifp == NULL) + nd6log2((LOG_DEBUG, "%s: sending NS who has %s on ALL\n", + if_name(ifp), ip6_sprintf(taddr))); + else + nd6log2((LOG_DEBUG, "%s: sending NS who has %s on ALL " + "(except %s)\n", if_name(ifp), + ip6_sprintf(taddr), if_name(exclifp))); + SLIST_INIT(&ndprl_head); lck_mtx_lock(nd6_mutex); @@ -771,7 +921,7 @@ nd6_prproxy_ns_output(struct ifnet *ifp, struct in6_addr *daddr, for (fwd = nd_prefix.lh_first; fwd; fwd = fwd->ndpr_next) { NDPR_LOCK(fwd); if (!(fwd->ndpr_stateflags & NDPRF_ONLINK) || - fwd->ndpr_ifp == ifp || + fwd->ndpr_ifp == ifp || fwd->ndpr_ifp == exclifp || fwd->ndpr_plen != pr_len || !in6_are_prefix_equal(&fwd->ndpr_prefix.sin6_addr, &pr_addr, pr_len)) { @@ -813,10 +963,9 @@ nd6_prproxy_ns_output(struct ifnet *ifp, struct in6_addr *daddr, if (pr->ndpr_stateflags & NDPRF_ONLINK) { NDPR_UNLOCK(pr); nd6log2((LOG_DEBUG, - "%s%d: Sending cloned NS who has %s on %s%d\n", - fwd_ifp->if_name, fwd_ifp->if_unit, - ip6_sprintf(taddr), ifp->if_name, - ifp->if_unit)); + "%s: Sending cloned NS who has %s, originally " + "on %s\n", if_name(fwd_ifp), + ip6_sprintf(taddr), if_name(ifp))); nd6_ns_output(fwd_ifp, daddr, taddr, NULL, 0); } else { @@ -944,11 +1093,11 @@ nd6_prproxy_ns_input(struct ifnet *ifp, struct in6_addr *saddr, if (pr->ndpr_stateflags & NDPRF_ONLINK) { NDPR_UNLOCK(pr); nd6log2((LOG_DEBUG, - "%s%d: Forwarding NS (%s) from %s to %s who has %s " - "on %s%d\n", fwd_ifp->if_name, fwd_ifp->if_unit, - ndprl->ndprl_sol ? "NUD/AR" : "DAD", - ip6_sprintf(saddr), ip6_sprintf(daddr), - ip6_sprintf(taddr), ifp->if_name, ifp->if_unit)); + "%s: Forwarding NS (%s) from %s to %s who " + "has %s, originally on %s\n", if_name(fwd_ifp), + ndprl->ndprl_sol ? "NUD/AR" : + "DAD", ip6_sprintf(saddr), ip6_sprintf(daddr), + ip6_sprintf(taddr), if_name(ifp))); nd6_ns_output(fwd_ifp, ndprl->ndprl_sol ? taddr : NULL, taddr, NULL, !ndprl->ndprl_sol); @@ -1088,20 +1237,19 @@ nd6_prproxy_na_input(struct ifnet *ifp, struct in6_addr *saddr, if (send_na) { if (!ndprl->ndprl_sol) { nd6log2((LOG_DEBUG, - "%s%d: Forwarding NA (DAD) from %s to %s " - "tgt is %s on %s%d\n", - fwd_ifp->if_name, fwd_ifp->if_unit, + "%s: Forwarding NA (DAD) from %s to %s " + "tgt is %s, originally on %s\n", + if_name(fwd_ifp), ip6_sprintf(saddr), ip6_sprintf(&daddr), - ip6_sprintf(taddr), ifp->if_name, - ifp->if_unit)); + ip6_sprintf(taddr), if_name(ifp))); } else { nd6log2((LOG_DEBUG, - "%s%d: Forwarding NA (NUD/AR) from %s to " - "%s (was %s) tgt is %s on %s%d\n", - fwd_ifp->if_name, fwd_ifp->if_unit, - ip6_sprintf(saddr), ip6_sprintf(&daddr), - ip6_sprintf(daddr0), ip6_sprintf(taddr), - ifp->if_name, ifp->if_unit)); + "%s: Forwarding NA (NUD/AR) from %s to " + "%s (was %s) tgt is %s, originally on " + "%s\n", if_name(fwd_ifp), + ip6_sprintf(saddr), + ip6_sprintf(&daddr), ip6_sprintf(daddr0), + ip6_sprintf(taddr), if_name(ifp))); } nd6_na_output(fwd_ifp, &daddr, taddr, flags, 1, NULL); diff --git a/bsd/netinet6/nd6_rtr.c b/bsd/netinet6/nd6_rtr.c index 34bfb18a6..b4f2cf456 100644 --- a/bsd/netinet6/nd6_rtr.c +++ b/bsd/netinet6/nd6_rtr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * Copyright (c) 2003-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -26,9 +26,6 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* $FreeBSD: src/sys/netinet6/nd6_rtr.c,v 1.11 2002/04/19 04:46:23 suz Exp $ */ -/* $KAME: nd6_rtr.c,v 1.111 2001/04/27 01:37:15 jinmei Exp $ */ - /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. @@ -71,6 +68,9 @@ #include #include #include +#include + +#include #include #include @@ -99,13 +99,14 @@ static struct nd_defrouter *defrtrlist_update_common(struct nd_defrouter *, boolean_t); static struct nd_defrouter *defrtrlist_update(struct nd_defrouter *); -static struct in6_ifaddr *in6_ifadd(struct nd_prefix *, int); +static struct in6_ifaddr *in6_pfx_newpersistaddr(struct nd_prefix *, int, + int *); static void defrtrlist_sync(struct ifnet *); static struct nd_pfxrouter *pfxrtr_lookup(struct nd_prefix *, struct nd_defrouter *); static void pfxrtr_add(struct nd_prefix *, struct nd_defrouter *); -static void pfxrtr_del(struct nd_pfxrouter *); +static void pfxrtr_del(struct nd_pfxrouter *, struct nd_prefix *); static struct nd_pfxrouter *find_pfxlist_reachable_router(struct nd_prefix *); static void nd6_rtmsg(int, struct rtentry *); @@ -115,7 +116,7 @@ static struct nd_prefix *nd6_prefix_equal_lookup(struct nd_prefix *, boolean_t); static void nd6_prefix_sync(struct ifnet *); static void in6_init_address_ltimes(struct nd_prefix *, - struct in6_addrlifetime *, boolean_t); + struct in6_addrlifetime *); static int rt6_deleteroute(struct radix_node *, void *); @@ -142,9 +143,9 @@ u_int32_t ip6_temp_preferred_lifetime = DEF_TEMP_PREFERRED_LIFETIME; u_int32_t ip6_temp_valid_lifetime = DEF_TEMP_VALID_LIFETIME; /* * shorter lifetimes for debugging purposes. -u_int32_t ip6_temp_preferred_lifetime = 800; -static u_int32_t ip6_temp_valid_lifetime = 1800; -*/ + * u_int32_t ip6_temp_preferred_lifetime = 800; + * static u_int32_t ip6_temp_valid_lifetime = 1800; + */ int ip6_temp_regen_advance = TEMPADDR_REGEN_ADVANCE; extern lck_mtx_t *nd6_mutex; @@ -160,11 +161,11 @@ static void *nd_defrouter_waitchan = &nd_defrouter_busy; static int nd_defrouter_waiters = 0; /* RTPREF_MEDIUM has to be 0! */ -#define RTPREF_HIGH 1 -#define RTPREF_MEDIUM 0 -#define RTPREF_LOW (-1) -#define RTPREF_RESERVED (-2) -#define RTPREF_INVALID (-3) /* internal */ +#define RTPREF_HIGH 1 +#define RTPREF_MEDIUM 0 +#define RTPREF_LOW (-1) +#define RTPREF_RESERVED (-2) +#define RTPREF_INVALID (-3) /* internal */ #define NDPR_TRACE_HIST_SIZE 32 /* size of trace history */ @@ -189,7 +190,7 @@ static struct zone *ndpr_zone; /* zone for nd_prefix */ #define NDPR_ZONE_MAX 64 /* maximum elements in zone */ #define NDPR_ZONE_NAME "nd6_prefix" /* zone name */ -#define NDDR_TRACE_HIST_SIZE 32 /* size of trace history */ +#define NDDR_TRACE_HIST_SIZE 32 /* size of trace history */ /* For gdb */ __private_extern__ unsigned int nddr_trace_hist_size = NDDR_TRACE_HIST_SIZE; @@ -304,9 +305,9 @@ nd6_rs_input( } else { struct sockaddr_in6 src_sa6; - bzero(&src_sa6, sizeof(src_sa6)); + bzero(&src_sa6, sizeof (src_sa6)); src_sa6.sin6_family = AF_INET6; - src_sa6.sin6_len = sizeof(src_sa6); + src_sa6.sin6_len = sizeof (src_sa6); src_sa6.sin6_addr = ip6->ip6_src; if (!nd6_is_addr_neighbor(&src_sa6, ifp, 0)) { nd6log((LOG_INFO, "nd6_rs_input: " @@ -326,7 +327,7 @@ nd6_rs_input( } #endif - icmp6len -= sizeof(*nd_rs); + icmp6len -= sizeof (*nd_rs); nd6_option_init(nd_rs + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, @@ -350,11 +351,11 @@ nd6_rs_input( nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_SOLICIT, 0); - freeit: +freeit: m_freem(m); return; - bad: +bad: icmp6stat.icp6s_badrs++; m_freem(m); } @@ -380,19 +381,20 @@ nd6_ra_input( int mcast = 0; union nd_opts ndopts; struct nd_defrouter *dr = NULL; - struct timeval timenow; u_int32_t mtu = 0; char *lladdr = NULL; u_int32_t lladdrlen = 0; struct nd_prefix_list *nd_prefix_list_head = NULL; u_int32_t nd_prefix_list_length = 0; struct in6_ifaddr *ia6 = NULL; + struct nd_prefix_list *prfl; + struct nd_defrouter dr0; + u_int32_t advreachable; + /* Expect 32-bit aligned data pointer on strict-align platforms */ MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); - getmicrotime(&timenow); - /* * Discard RA unless IFEF_ACCEPT_RTADV is set (as host), or when * IFEF_IPV6_ROUTER is set (as router) but the RA is not locally @@ -439,7 +441,7 @@ nd6_ra_input( } #endif - icmp6len -= sizeof(*nd_ra); + icmp6len -= sizeof (*nd_ra); nd6_option_init(nd_ra + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, @@ -448,9 +450,7 @@ nd6_ra_input( goto freeit; } - { - struct nd_defrouter dr0; - u_int32_t advreachable = nd_ra->nd_ra_reachable; + advreachable = nd_ra->nd_ra_reachable; /* remember if this is a multicasted advertisement */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) @@ -468,7 +468,7 @@ nd6_ra_input( dr0.rtaddr = saddr6; dr0.flags = nd_ra->nd_ra_flags_reserved; dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime); - dr0.expire = timenow.tv_sec + dr0.rtlifetime; + dr0.expire = net_uptime() + dr0.rtlifetime; dr0.ifp = ifp; /* unspecified or not? (RFC 2461 6.3.4) */ if (advreachable) { @@ -490,7 +490,6 @@ nd6_ra_input( lck_mtx_lock(nd6_mutex); dr = defrtrlist_update(&dr0); lck_mtx_unlock(nd6_mutex); - } /* * prefix @@ -501,9 +500,9 @@ nd6_ra_input( struct nd_prefix pr; for (pt = (struct nd_opt_hdr *)ndopts.nd_opts_pi; - pt <= (struct nd_opt_hdr *)ndopts.nd_opts_pi_end; - pt = (struct nd_opt_hdr *)((caddr_t)pt + - (pt->nd_opt_len << 3))) { + pt <= (struct nd_opt_hdr *)ndopts.nd_opts_pi_end; + pt = (struct nd_opt_hdr *)((caddr_t)pt + + (pt->nd_opt_len << 3))) { if (pt->nd_opt_type != ND_OPT_PREFIX_INFORMATION) continue; pi = (struct nd_opt_prefix_info *)pt; @@ -524,27 +523,27 @@ nd6_ra_input( continue; } - if (IN6_IS_ADDR_MULTICAST(&pi->nd_opt_pi_prefix) - || IN6_IS_ADDR_LINKLOCAL(&pi->nd_opt_pi_prefix)) { + if (IN6_IS_ADDR_MULTICAST(&pi->nd_opt_pi_prefix) || + IN6_IS_ADDR_LINKLOCAL(&pi->nd_opt_pi_prefix)) { nd6log((LOG_INFO, - "nd6_ra_input: invalid prefix " - "%s, ignored\n", + "%s: invalid prefix %s, ignored\n", + __func__, ip6_sprintf(&pi->nd_opt_pi_prefix))); continue; } - bzero(&pr, sizeof(pr)); + bzero(&pr, sizeof (pr)); lck_mtx_init(&pr.ndpr_lock, ifa_mtx_grp, ifa_mtx_attr); NDPR_LOCK(&pr); pr.ndpr_prefix.sin6_family = AF_INET6; - pr.ndpr_prefix.sin6_len = sizeof(pr.ndpr_prefix); + pr.ndpr_prefix.sin6_len = sizeof (pr.ndpr_prefix); pr.ndpr_prefix.sin6_addr = pi->nd_opt_pi_prefix; pr.ndpr_ifp = m->m_pkthdr.rcvif; pr.ndpr_raf_onlink = (pi->nd_opt_pi_flags_reserved & - ND_OPT_PI_FLAG_ONLINK) ? 1 : 0; + ND_OPT_PI_FLAG_ONLINK) ? 1 : 0; pr.ndpr_raf_auto = (pi->nd_opt_pi_flags_reserved & - ND_OPT_PI_FLAG_AUTO) ? 1 : 0; + ND_OPT_PI_FLAG_AUTO) ? 1 : 0; pr.ndpr_plen = pi->nd_opt_pi_prefix_len; pr.ndpr_vltime = ntohl(pi->nd_opt_pi_valid_time); pr.ndpr_pltime = @@ -563,12 +562,11 @@ nd6_ra_input( "[not RFC 4193]\n", ip6_sprintf(&pi->nd_opt_pi_prefix))); pr.ndpr_raf_auto = 0; - } - else if (!nd6_accept_6to4 && - IN6_IS_ADDR_6TO4(&pi->nd_opt_pi_prefix)) { + } else if (!nd6_accept_6to4 && + IN6_IS_ADDR_6TO4(&pi->nd_opt_pi_prefix)) { nd6log((LOG_INFO, - "nd6_ra_input: no SLAAC on prefix %s " - "[6to4]\n", + "%s: no SLAAC on prefix %s " + "[6to4]\n", __func__, ip6_sprintf(&pi->nd_opt_pi_prefix))); pr.ndpr_raf_auto = 0; } @@ -580,7 +578,7 @@ nd6_ra_input( } else { NDPR_UNLOCK(&pr); } - (void)prelist_update(&pr, dr, m, mcast); + (void) prelist_update(&pr, dr, m, mcast); lck_mtx_destroy(&pr.ndpr_lock, ifa_mtx_grp); /* @@ -589,7 +587,7 @@ nd6_ra_input( * be properly set until after the router advertisement * updating can vet the values. */ - struct nd_prefix_list *prfl = NULL; + prfl = NULL; MALLOC(prfl, struct nd_prefix_list *, sizeof (*prfl), M_TEMP, M_WAITOK | M_ZERO); @@ -599,6 +597,7 @@ nd6_ra_input( continue; } + /* this is only for nd6_post_msg(), otherwise unused */ bcopy(&pr.ndpr_prefix, &prfl->pr.ndpr_prefix, sizeof (prfl->pr.ndpr_prefix)); prfl->pr.ndpr_raf = pr.ndpr_raf; @@ -606,6 +605,9 @@ nd6_ra_input( prfl->pr.ndpr_vltime = pr.ndpr_vltime; prfl->pr.ndpr_pltime = pr.ndpr_pltime; prfl->pr.ndpr_expire = pr.ndpr_expire; + prfl->pr.ndpr_base_calendartime = + pr.ndpr_base_calendartime; + prfl->pr.ndpr_base_uptime = pr.ndpr_base_uptime; prfl->pr.ndpr_stateflags = pr.ndpr_stateflags; prfl->pr.ndpr_addrcnt = pr.ndpr_addrcnt; prfl->pr.ndpr_ifp = pr.ndpr_ifp; @@ -668,7 +670,7 @@ nd6_ra_input( ndi = NULL; } - skip: +skip: /* * Source link layer address @@ -702,22 +704,20 @@ nd6_ra_input( pfxlist_onlink_check(); lck_mtx_unlock(nd6_mutex); - freeit: +freeit: m_freem(m); if (dr) NDDR_REMREF(dr); - { - struct nd_prefix_list *prfl = NULL; + prfl = NULL; while ((prfl = nd_prefix_list_head) != NULL) { nd_prefix_list_head = prfl->next; FREE(prfl, M_TEMP); } - } return; - bad: +bad: icmp6stat.icp6s_badra++; goto freeit; } @@ -737,9 +737,8 @@ nd6_rtmsg(cmd, rt) RT_LOCK_ASSERT_HELD(rt); - bzero((caddr_t)&info, sizeof(info)); - /* Lock ifp for if_lladdr */ - ifnet_lock_shared(ifp); + bzero((caddr_t)&info, sizeof (info)); + /* It's not necessary to lock ifp for if_lladdr */ info.rti_info[RTAX_DST] = rt_key(rt); info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; info.rti_info[RTAX_NETMASK] = rt_mask(rt); @@ -751,7 +750,6 @@ nd6_rtmsg(cmd, rt) info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; rt_missmsg(cmd, &info, rt->rt_flags, 0); - ifnet_lock_done(ifp); } static void @@ -763,32 +761,39 @@ defrouter_addreq(struct nd_defrouter *new, boolean_t scoped) int err; lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED); + NDDR_LOCK_ASSERT_NOTHELD(new); + /* + * We're free to lock and unlock NDDR because our callers + * are holding an extra reference for us. + */ + NDDR_LOCK(new); if (new->stateflags & NDDRF_INSTALLED) - return; + goto out; if (new->ifp->if_eflags & IFEF_IPV6_ROUTER) { nd6log2((LOG_INFO, "%s: ignoring router %s, scoped=%d, " "static=%d on advertising interface\n", if_name(new->ifp), ip6_sprintf(&new->rtaddr), scoped, (new->stateflags & NDDRF_STATIC) ? 1 : 0)); - return; + goto out; } nd6log2((LOG_INFO, "%s: adding default router %s, scoped=%d, " "static=%d\n", if_name(new->ifp), ip6_sprintf(&new->rtaddr), scoped, (new->stateflags & NDDRF_STATIC) ? 1 : 0)); - Bzero(&def, sizeof(def)); - Bzero(&mask, sizeof(mask)); - Bzero(&gate, sizeof(gate)); + Bzero(&def, sizeof (def)); + Bzero(&mask, sizeof (mask)); + Bzero(&gate, sizeof (gate)); def.sin6_len = mask.sin6_len = gate.sin6_len - = sizeof(struct sockaddr_in6); + = sizeof (struct sockaddr_in6); def.sin6_family = mask.sin6_family = gate.sin6_family = AF_INET6; gate.sin6_addr = new->rtaddr; ifscope = scoped ? new->ifp->if_index : IFSCOPE_NONE; + NDDR_UNLOCK(new); err = rtrequest_scoped(RTM_ADD, (struct sockaddr *)&def, (struct sockaddr *)&gate, (struct sockaddr *)&mask, @@ -799,6 +804,7 @@ defrouter_addreq(struct nd_defrouter *new, boolean_t scoped) nd6_rtmsg(RTM_ADD, newrt); /* tell user process */ RT_REMREF_LOCKED(newrt); RT_UNLOCK(newrt); + NDDR_LOCK(new); new->stateflags |= NDDRF_INSTALLED; if (ifscope != IFSCOPE_NONE) new->stateflags |= NDDRF_IFSCOPE; @@ -808,8 +814,12 @@ defrouter_addreq(struct nd_defrouter *new, boolean_t scoped) "%s on %s scoped %d (errno = %d)\n", __func__, ip6_sprintf(&gate.sin6_addr), if_name(new->ifp), (ifscope != IFSCOPE_NONE), err)); + NDDR_LOCK(new); } new->err = err; + +out: + NDDR_UNLOCK(new); } struct nd_defrouter * @@ -822,12 +832,12 @@ defrouter_lookup( lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); for (dr = TAILQ_FIRST(&nd_defrouter); dr; - dr = TAILQ_NEXT(dr, dr_entry)) { + dr = TAILQ_NEXT(dr, dr_entry)) { NDDR_LOCK(dr); if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr)) { NDDR_ADDREF_LOCKED(dr); NDDR_UNLOCK(dr); - return(dr); + return (dr); } NDDR_UNLOCK(dr); } @@ -849,24 +859,27 @@ defrouter_delreq(struct nd_defrouter *dr) int err; lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED); - + NDDR_LOCK_ASSERT_NOTHELD(dr); + /* + * We're free to lock and unlock NDDR because our callers + * are holding an extra reference for us. + */ + NDDR_LOCK(dr); /* ifp would be NULL for the "drany" case */ if (dr->ifp != NULL && !(dr->stateflags & NDDRF_INSTALLED)) - return; - - NDDR_LOCK_ASSERT_HELD(dr); + goto out; nd6log2((LOG_INFO, "%s: removing default router %s, scoped=%d, " "static=%d\n", dr->ifp != NULL ? if_name(dr->ifp) : "ANY", ip6_sprintf(&dr->rtaddr), (dr->stateflags & NDDRF_IFSCOPE) ? 1 : 0, (dr->stateflags & NDDRF_STATIC) ? 1 : 0)); - Bzero(&def, sizeof(def)); - Bzero(&mask, sizeof(mask)); - Bzero(&gate, sizeof(gate)); + Bzero(&def, sizeof (def)); + Bzero(&mask, sizeof (mask)); + Bzero(&gate, sizeof (gate)); def.sin6_len = mask.sin6_len = gate.sin6_len - = sizeof(struct sockaddr_in6); + = sizeof (struct sockaddr_in6); def.sin6_family = mask.sin6_family = gate.sin6_family = AF_INET6; gate.sin6_addr = dr->rtaddr; @@ -876,6 +889,7 @@ defrouter_delreq(struct nd_defrouter *dr) } else { ifscope = IFSCOPE_NONE; } + NDDR_UNLOCK(dr); err = rtrequest_scoped(RTM_DELETE, (struct sockaddr *)&def, (struct sockaddr *)&gate, (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt, ifscope); @@ -891,6 +905,7 @@ defrouter_delreq(struct nd_defrouter *dr) ip6_sprintf(&gate.sin6_addr), dr->ifp != NULL ? if_name(dr->ifp) : "ANY", (ifscope != IFSCOPE_NONE), err)); } + NDDR_LOCK(dr); /* ESRCH means it's no longer in the routing table; ignore it */ if (oldrt != NULL || err == ESRCH) { dr->stateflags &= ~NDDRF_INSTALLED; @@ -898,6 +913,8 @@ defrouter_delreq(struct nd_defrouter *dr) dr->stateflags &= ~NDDRF_IFSCOPE; } dr->err = 0; +out: + NDDR_UNLOCK(dr); } @@ -918,9 +935,7 @@ defrouter_reset(void) NDDR_ADDREF_LOCKED(dr); NDDR_UNLOCK(dr); lck_mtx_unlock(nd6_mutex); - NDDR_LOCK(dr); defrouter_delreq(dr); - NDDR_UNLOCK(dr); lck_mtx_lock(nd6_mutex); NDDR_REMREF(dr); dr = TAILQ_FIRST(&nd_defrouter); @@ -935,9 +950,7 @@ defrouter_reset(void) bzero(&drany, sizeof (drany)); lck_mtx_init(&drany.nddr_lock, ifa_mtx_grp, ifa_mtx_attr); lck_mtx_unlock(nd6_mutex); - NDDR_LOCK(&drany); defrouter_delreq(&drany); - NDDR_UNLOCK(&drany); lck_mtx_destroy(&drany.nddr_lock, ifa_mtx_grp); lck_mtx_lock(nd6_mutex); } @@ -1026,9 +1039,15 @@ defrtrlist_del(struct nd_defrouter *dr) struct nd_defrouter *deldr = NULL; struct nd_prefix *pr; struct ifnet *ifp = dr->ifp; + boolean_t resetmtu; lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); + if (!ip6_doscopedroute && dr == TAILQ_FIRST(&nd_defrouter)) + deldr = dr; /* The router is primary. */ + + TAILQ_REMOVE(&nd_defrouter, dr, dr_entry); + ++nd6_defrouter_genid; /* * Flush all the routing table entries that use the router * as a next hop. @@ -1041,24 +1060,14 @@ defrtrlist_del(struct nd_defrouter *dr) lck_mtx_lock(nd6_mutex); NDDR_REMREF(dr); } - - if (!ip6_doscopedroute && dr == TAILQ_FIRST(&nd_defrouter)) - deldr = dr; /* The router is primary. */ - - TAILQ_REMOVE(&nd_defrouter, dr, dr_entry); - ++nd6_defrouter_genid; - nd6log2((LOG_INFO, "%s: freeing defrouter %s\n", if_name(dr->ifp), ip6_sprintf(&dr->rtaddr))); - /* * Delete it from the routing table. */ NDDR_ADDREF(dr); lck_mtx_unlock(nd6_mutex); - NDDR_LOCK(dr); defrouter_delreq(dr); - NDDR_UNLOCK(dr); lck_mtx_lock(nd6_mutex); NDDR_REMREF(dr); @@ -1070,7 +1079,7 @@ defrtrlist_del(struct nd_defrouter *dr) NDPR_LOCK(pr); if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL) - pfxrtr_del(pfxrtr); + pfxrtr_del(pfxrtr, pr); NDPR_UNLOCK(pr); } @@ -1084,17 +1093,24 @@ defrtrlist_del(struct nd_defrouter *dr) if (deldr || ip6_doscopedroute) defrouter_select(ifp); + resetmtu = FALSE; lck_rw_lock_shared(nd_if_rwlock); if (ifp->if_index < nd_ifinfo_indexlim) { struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index]; VERIFY(ndi->initialized); lck_mtx_lock(&ndi->lock); - VERIFY(ndi->ndefrouters > 0); - ndi->ndefrouters--; + VERIFY(ndi->ndefrouters >= 0); + if (ndi->ndefrouters > 0 && --ndi->ndefrouters == 0) { + nd6_ifreset(ifp); + resetmtu = TRUE; + } lck_mtx_unlock(&ndi->lock); } lck_rw_done(nd_if_rwlock); + if (resetmtu) + nd6_setmtu(ifp); + NDDR_REMREF(dr); /* remove list reference */ } @@ -1264,7 +1280,7 @@ defrouter_select(struct ifnet *ifp) * installed_dr = currently installed primary router */ for (dr = TAILQ_FIRST(&nd_defrouter); dr; - dr = TAILQ_NEXT(dr, dr_entry)) { + dr = TAILQ_NEXT(dr, dr_entry)) { boolean_t reachable, advrouter; struct in6_addr rtaddr; struct ifnet *drifp; @@ -1367,7 +1383,7 @@ defrouter_select(struct ifnet *ifp) * Install the unreachable one(s) if necesssary. */ for (dr = TAILQ_FIRST(&nd_defrouter); dr; - dr = TAILQ_NEXT(dr, dr_entry)) { + dr = TAILQ_NEXT(dr, dr_entry)) { struct nd_defrouter *_dr; if (!ip6_doscopedroute) @@ -1383,7 +1399,7 @@ defrouter_select(struct ifnet *ifp) /* See if there is already a default router for the link */ for (_dr = TAILQ_FIRST(&nd_defrouter); _dr; - _dr = TAILQ_NEXT(_dr, dr_entry)) { + _dr = TAILQ_NEXT(_dr, dr_entry)) { if (_dr != dr) NDDR_LOCK(_dr); if (_dr == dr || _dr->ifp != dr->ifp) { @@ -1532,13 +1548,9 @@ defrouter_select(struct ifnet *ifp) */ lck_mtx_unlock(nd6_mutex); if (installed_dr) { - NDDR_LOCK(installed_dr); defrouter_delreq(installed_dr); - NDDR_UNLOCK(installed_dr); } - NDDR_LOCK(dr); defrouter_addreq(dr, FALSE); - NDDR_UNLOCK(dr); lck_mtx_lock(nd6_mutex); } goto out; @@ -1564,7 +1576,8 @@ defrouter_select(struct ifnet *ifp) * * genid is used to skip entries that are not to be added/removed on the * second while loop. - * NDDRF_PROCESSED is used to skip entries that were already processed. + * NDDRF_PROCESSED is used to skip entries that were already + * processed. * This is necessary because we drop the nd6_mutex and start the while * loop again. */ @@ -1591,9 +1604,7 @@ defrouter_select(struct ifnet *ifp) NDDR_ADDREF_LOCKED(dr); NDDR_UNLOCK(dr); lck_mtx_unlock(nd6_mutex); - NDDR_LOCK(dr); defrouter_delreq(dr); - NDDR_UNLOCK(dr); lck_mtx_lock(nd6_mutex); NDDR_LOCK(dr); if (dr && dr != installed_dr0) @@ -1635,7 +1646,7 @@ defrouter_select(struct ifnet *ifp) /* Handle case (b) */ for (_dr = TAILQ_FIRST(&nd_defrouter); _dr; - _dr = TAILQ_NEXT(_dr, dr_entry)) { + _dr = TAILQ_NEXT(_dr, dr_entry)) { if (_dr == dr) continue; /* @@ -1657,9 +1668,7 @@ defrouter_select(struct ifnet *ifp) !(_dr->stateflags & NDDRF_STATIC) && (dr->stateflags & NDDRF_STATIC)) { lck_mtx_unlock(nd6_mutex); - NDDR_LOCK(_dr); defrouter_delreq(_dr); - NDDR_UNLOCK(_dr); lck_mtx_lock(nd6_mutex); NDDR_REMREF(_dr); _dr = NULL; @@ -1669,11 +1678,9 @@ defrouter_select(struct ifnet *ifp) NDDR_ADDREF_LOCKED(dr); NDDR_UNLOCK(dr); lck_mtx_unlock(nd6_mutex); - NDDR_LOCK(dr); defrouter_addreq(dr, (selected_dr == NULL || dr->ifp != selected_dr->ifp)); dr->genid = nd6_defrouter_genid; - NDDR_UNLOCK(dr); lck_mtx_lock(nd6_mutex); NDDR_REMREF(dr); /* @@ -1712,6 +1719,7 @@ defrtrlist_update_common(struct nd_defrouter *new, boolean_t scoped) struct nd_defrouter *dr, *n; struct ifnet *ifp = new->ifp; struct nd_ifinfo *ndi = NULL; + struct timeval caltime; lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); @@ -1789,12 +1797,12 @@ defrtrlist_update_common(struct nd_defrouter *new, boolean_t scoped) /* entry does not exist */ if (new->rtlifetime == 0) { - return(NULL); + return (NULL); } n = nddr_alloc(M_WAITOK); if (n == NULL) { - return(NULL); + return (NULL); } lck_rw_lock_shared(nd_if_rwlock); @@ -1824,18 +1832,23 @@ freeit: nd6log2((LOG_INFO, "%s: allocating defrouter %s\n", if_name(ifp), ip6_sprintf(&new->rtaddr))); + getmicrotime(&caltime); NDDR_LOCK(n); - memcpy(&n->rtaddr, &new->rtaddr, sizeof(n->rtaddr)); + memcpy(&n->rtaddr, &new->rtaddr, sizeof (n->rtaddr)); n->flags = new->flags; n->stateflags = new->stateflags; n->stateflags &= ~NDDRF_PROCESSED; n->rtlifetime = new->rtlifetime; n->expire = new->expire; + n->base_calendartime = caltime.tv_sec; + n->base_uptime = net_uptime(); n->ifp = new->ifp; n->genid = new->genid; n->err = new->err; NDDR_UNLOCK(n); insert: + /* get nd6_service() to be scheduled as soon as it's convenient */ + ++nd6_sched_timeout_want; /* * Insert the new router in the Default Router List; @@ -1849,7 +1862,7 @@ insert: /* insert at the end of the group */ for (dr = TAILQ_FIRST(&nd_defrouter); dr; - dr = TAILQ_NEXT(dr, dr_entry)) { + dr = TAILQ_NEXT(dr, dr_entry)) { if (rtpref(n) > rtpref(dr) || (ip6_doscopedroute && !scoped && rtpref(n) == rtpref(dr))) break; @@ -1889,7 +1902,7 @@ defrtrlist_sync(struct ifnet *ifp) } for (dr = TAILQ_FIRST(&nd_defrouter); dr; - dr = TAILQ_NEXT(dr, dr_entry)) { + dr = TAILQ_NEXT(dr, dr_entry)) { NDDR_LOCK(dr); if (dr->ifp == ifp && (dr->stateflags & NDDRF_INSTALLED)) break; @@ -1899,7 +1912,7 @@ defrtrlist_sync(struct ifnet *ifp) if (dr == NULL) { defrouter_select(ifp); } else { - memcpy(&new.rtaddr, &dr->rtaddr, sizeof(new.rtaddr)); + memcpy(&new.rtaddr, &dr->rtaddr, sizeof (new.rtaddr)); new.flags = dr->flags; new.stateflags = dr->stateflags; new.stateflags &= ~NDDRF_PROCESSED; @@ -1929,7 +1942,7 @@ pfxrtr_lookup(struct nd_prefix *pr, struct nd_defrouter *dr) break; } - return(search); + return (search); } static void @@ -1943,21 +1956,23 @@ pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr) new = zalloc(ndprtr_zone); if (new == NULL) return; - bzero(new, sizeof(*new)); + bzero(new, sizeof (*new)); new->router = dr; NDPR_LOCK(pr); LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry); + pr->ndpr_genid++; NDPR_UNLOCK(pr); - + pfxlist_onlink_check(); } static void -pfxrtr_del( - struct nd_pfxrouter *pfr) +pfxrtr_del(struct nd_pfxrouter *pfr, struct nd_prefix *pr) { lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); + NDPR_LOCK_ASSERT_HELD(pr); + pr->ndpr_genid++; LIST_REMOVE(pfr, pfr_entry); zfree(ndprtr_zone, pfr); } @@ -1982,7 +1997,7 @@ nd6_prefix_lookup(struct nd_prefix *pr) } lck_mtx_unlock(nd6_mutex); - return(search); + return (search); } static void @@ -2042,6 +2057,7 @@ repeat: NDPR_ADDREF_LOCKED(pr); prelist_remove(pr); NDPR_UNLOCK(pr); + pfxlist_onlink_check(); NDPR_REMREF(pr); } else { NDPR_UNLOCK(pr); @@ -2060,9 +2076,6 @@ nd6_prelist_add(struct nd_prefix *pr, struct nd_defrouter *dr, struct ifnet *ifp = pr->ndpr_ifp; struct nd_ifinfo *ndi = NULL; int i, error; - struct timeval timenow; - - getmicrotime(&timenow); if (ip6_maxifprefixes >= 0) { lck_rw_lock_shared(nd_if_rwlock); @@ -2089,7 +2102,7 @@ nd6_prelist_add(struct nd_prefix *pr, struct nd_defrouter *dr, if (ndi->nprefixes >= ip6_maxifprefixes) { lck_mtx_unlock(&ndi->lock); lck_rw_done(nd_if_rwlock); - return(ENOMEM); + return (ENOMEM); } lck_mtx_unlock(&ndi->lock); lck_rw_done(nd_if_rwlock); @@ -2097,7 +2110,7 @@ nd6_prelist_add(struct nd_prefix *pr, struct nd_defrouter *dr, new = ndpr_alloc(M_WAITOK); if (new == NULL) - return ENOMEM; + return (ENOMEM); NDPR_LOCK(new); NDPR_LOCK(pr); @@ -2113,9 +2126,9 @@ nd6_prelist_add(struct nd_prefix *pr, struct nd_defrouter *dr, if ((error = in6_init_prefix_ltimes(new)) != 0) { NDPR_UNLOCK(new); ndpr_free(new); - return(error); + return (error); } - new->ndpr_lastupdate = timenow.tv_sec; + new->ndpr_lastupdate = net_uptime(); if (newp != NULL) { *newp = new; NDPR_ADDREF_LOCKED(new); /* for caller */ @@ -2130,6 +2143,9 @@ nd6_prelist_add(struct nd_prefix *pr, struct nd_defrouter *dr, NDPR_UNLOCK(new); + /* get nd6_service() to be scheduled as soon as it's convenient */ + ++nd6_sched_timeout_want; + lck_mtx_lock(nd6_mutex); /* link ndpr_entry to nd_prefix list */ LIST_INSERT_HEAD(&nd_prefix, new, ndpr_entry); @@ -2171,7 +2187,7 @@ nd6_prelist_add(struct nd_prefix *pr, struct nd_defrouter *dr, lck_mtx_unlock(nd6_mutex); - return 0; + return (0); } /* @@ -2224,7 +2240,7 @@ prelist_remove(struct nd_prefix *pr) /* free list of routers that adversed the prefix */ for (pfr = pr->ndpr_advrtrs.lh_first; pfr; pfr = next) { next = pfr->pfr_next; - pfxrtr_del(pfr); + pfxrtr_del(pfr, pr); } lck_rw_lock_shared(nd_if_rwlock); @@ -2244,7 +2260,11 @@ prelist_remove(struct nd_prefix *pr) /* NOTREACHED */ } - pfxlist_onlink_check(); + /* + * Don't call pfxlist_onlink_check() here because we are + * holding the NDPR lock and this could cause a deadlock when + * there are multiple threads executing pfxlist_onlink_check(). + */ } int @@ -2262,7 +2282,7 @@ prelist_update( int newprefix = 0; int auth; struct in6_addrlifetime lt6_tmp; - struct timeval timenow; + uint64_t timenow = net_uptime(); /* no need to lock "new" here, as it is local to the caller */ NDPR_LOCK_ASSERT_NOTHELD(new); @@ -2274,8 +2294,7 @@ prelist_update( * both IP header and IP datagrams, doesn't it ? */ #if defined(M_AUTHIPHDR) && defined(M_AUTHIPDGM) - auth = (m->m_flags & M_AUTHIPHDR - && m->m_flags & M_AUTHIPDGM) ? 1 : 0; + auth = (m->m_flags & M_AUTHIPHDR) && (m->m_flags & M_AUTHIPDGM); #endif } @@ -2300,8 +2319,8 @@ prelist_update( if (new->ndpr_raf_onlink) { pr->ndpr_vltime = new->ndpr_vltime; pr->ndpr_pltime = new->ndpr_pltime; - pr->ndpr_preferred = new->ndpr_preferred; - pr->ndpr_expire = new->ndpr_expire; + (void) in6_init_prefix_ltimes(pr); /* XXX error case? */ + pr->ndpr_lastupdate = net_uptime(); } if (new->ndpr_raf_onlink && @@ -2338,16 +2357,16 @@ prelist_update( if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0) goto end; - bzero(&new->ndpr_addr, sizeof(struct in6_addr)); + bzero(&new->ndpr_addr, sizeof (struct in6_addr)); error = nd6_prelist_add(new, dr, &newpr, FALSE); if (error != 0 || newpr == NULL) { nd6log((LOG_NOTICE, "prelist_update: " "nd6_prelist_add failed for %s/%d on %s " - "errno=%d, returnpr=%p\n", + "errno=%d, returnpr=0x%llx\n", ip6_sprintf(&new->ndpr_prefix.sin6_addr), - new->ndpr_plen, if_name(new->ndpr_ifp), - error, newpr)); + new->ndpr_plen, if_name(new->ndpr_ifp), + error, (uint64_t)VM_KERNEL_ADDRPERM(newpr))); goto end; /* we should just give up in this case. */ } @@ -2370,7 +2389,7 @@ prelist_update( } /* - * Address autoconfiguration based on Section 5.5.3 of RFC 2462. + * Address autoconfiguration based on Section 5.5.3 of RFC 4862. * Note that pr must be non NULL at this point. */ @@ -2395,16 +2414,9 @@ prelist_update( * list of addresses associated with the interface, and the Valid * Lifetime is not 0, form an address. We first check if we have * a matching prefix. - * Note: we apply a clarification in rfc2462bis-02 here. We only - * consider autoconfigured addresses while RFC2462 simply said - * "address". */ - - getmicrotime(&timenow); - ifnet_lock_shared(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) - { + TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { struct in6_ifaddr *ifa6; u_int32_t remaininglifetime; @@ -2416,7 +2428,7 @@ prelist_update( ifa6 = (struct in6_ifaddr *)ifa; /* - * We only consider autoconfigured addresses as per rfc2462bis. + * We only consider autoconfigured addresses as per RFC 4862. */ if (!(ifa6->ia6_flags & IN6_IFF_AUTOCONF)) { IFA_UNLOCK(ifa); @@ -2451,27 +2463,24 @@ prelist_update( * are sure there is at least one matched address, we can * proceed to 5.5.3. (e): update the lifetimes according to the * "two hours" rule and the privacy extension. - * We apply some clarifications in rfc2462bis: - * - use remaininglifetime instead of storedlifetime as a - * variable name - * - remove the dead code in the "two-hour" rule */ -#define TWOHOUR (120*60) - lt6_tmp = ifa6->ia6_lifetime; +#define TWOHOUR (120*60) + + /* retrieve time as uptime (last arg is 0) */ + in6ifa_getlifetime(ifa6, <6_tmp, 0); if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME) remaininglifetime = ND6_INFINITE_LIFETIME; - else if (timenow.tv_sec - ifa6->ia6_updatetime > - lt6_tmp.ia6t_vltime) { + else if (timenow - ifa6->ia6_updatetime > lt6_tmp.ia6t_vltime) { /* * The case of "invalid" address. We should usually * not see this case. */ remaininglifetime = 0; - } else + } else { remaininglifetime = lt6_tmp.ia6t_vltime - - (timenow.tv_sec - ifa6->ia6_updatetime); - + (timenow - ifa6->ia6_updatetime); + } /* when not updating, keep the current stored lifetime. */ lt6_tmp.ia6t_vltime = remaininglifetime; @@ -2496,7 +2505,7 @@ prelist_update( /* Special handling for lifetimes of temporary addresses. */ if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0) { u_int32_t maxvltime, maxpltime; - + /* Constrain lifetimes to system limits. */ if (lt6_tmp.ia6t_vltime > ip6_temp_valid_lifetime) lt6_tmp.ia6t_vltime = ip6_temp_valid_lifetime; @@ -2511,72 +2520,47 @@ prelist_update( * intervals. */ if (ip6_temp_valid_lifetime > - (u_int32_t)((timenow.tv_sec - ifa6->ia6_createtime) + + (u_int32_t)((timenow - ifa6->ia6_createtime) + ip6_desync_factor)) { maxvltime = ip6_temp_valid_lifetime - - (timenow.tv_sec - ifa6->ia6_createtime) - + (timenow - ifa6->ia6_createtime) - ip6_desync_factor; } else maxvltime = 0; if (ip6_temp_preferred_lifetime > - (u_int32_t)((timenow.tv_sec - ifa6->ia6_createtime) + + (u_int32_t)((timenow - ifa6->ia6_createtime) + ip6_desync_factor)) { maxpltime = ip6_temp_preferred_lifetime - - (timenow.tv_sec - ifa6->ia6_createtime) - + (timenow - ifa6->ia6_createtime) - ip6_desync_factor; } else maxpltime = 0; - if (lt6_tmp.ia6t_vltime > maxvltime) + if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME || + lt6_tmp.ia6t_vltime > maxvltime) lt6_tmp.ia6t_vltime = maxvltime; - if (lt6_tmp.ia6t_pltime > maxpltime) + + if (lt6_tmp.ia6t_pltime == ND6_INFINITE_LIFETIME || + lt6_tmp.ia6t_pltime > maxpltime) lt6_tmp.ia6t_pltime = maxpltime; } - in6_init_address_ltimes(pr, <6_tmp, - !!(ifa6->ia6_flags & IN6_IFF_TEMPORARY)); - - ifa6->ia6_lifetime = lt6_tmp; - ifa6->ia6_updatetime = timenow.tv_sec; + in6_init_address_ltimes(pr, <6_tmp); + + in6ifa_setlifetime(ifa6, <6_tmp); + ifa6->ia6_updatetime = timenow; IFA_UNLOCK(ifa); } ifnet_lock_done(ifp); if (ia6_match == NULL && new->ndpr_vltime) { - int ifidlen; - /* * 5.5.3 (d) (continued) * No address matched and the valid lifetime is non-zero. * Create a new address. */ - /* - * Prefix Length check: - * If the sum of the prefix length and interface identifier - * length does not equal 128 bits, the Prefix Information - * option MUST be ignored. The length of the interface - * identifier is defined in a separate link-type specific - * document. - */ - ifidlen = in6_if2idlen(ifp); - if (ifidlen < 0) { - /* this should not happen, so we always log it. */ - log(LOG_ERR, "prelist_update: IFID undefined (%s)\n", - if_name(ifp)); - goto end; - } - NDPR_LOCK(pr); - if (ifidlen + pr->ndpr_plen != 128) { - nd6log((LOG_INFO, - "prelist_update: invalid prefixlen " - "%d for %s, ignored\n", - pr->ndpr_plen, if_name(ifp))); - NDPR_UNLOCK(pr); - goto end; - } - NDPR_UNLOCK(pr); - - if ((ia6 = in6_ifadd(new, mcast)) != NULL) { + if ((ia6 = in6_pfx_newpersistaddr(new, mcast, &error)) + != NULL) { /* * note that we should use pr (not new) for reference. */ @@ -2592,7 +2576,7 @@ prelist_update( /* * RFC 4941 3.3 (2). * When a new public address is created as described - * in RFC2462, also create a new temporary address. + * in RFC 4862, also create a new temporary address. * * RFC 4941 3.5. * When an interface connects to a new link, a new @@ -2603,7 +2587,7 @@ prelist_update( */ if (ip6_use_tempaddr) { int e; - if ((e = in6_tmpifadd(ia6, 1, M_WAITOK)) != 0) { + if ((e = in6_tmpifadd(ia6, 1)) != 0) { nd6log((LOG_NOTICE, "prelist_update: " "failed to create a temporary " "address, errno=%d\n", @@ -2621,9 +2605,6 @@ prelist_update( lck_mtx_lock(nd6_mutex); pfxlist_onlink_check(); lck_mtx_unlock(nd6_mutex); - } else { - /* just set an error. do not bark here. */ - error = EADDRNOTAVAIL; } } @@ -2634,7 +2615,7 @@ end: NDPR_REMREF(pr); if (ia6_match != NULL) IFA_REMREF(&ia6_match->ia_ifa); - return error; + return (error); } /* @@ -2748,6 +2729,28 @@ nddr_remref(struct nd_defrouter *nddr, int locked) return (nddr); } +uint64_t +nddr_getexpire(struct nd_defrouter *dr) +{ + struct timeval caltime; + uint64_t expiry; + + if (dr->expire != 0) { + /* account for system time change */ + getmicrotime(&caltime); + + dr->base_calendartime += + NET_CALCULATE_CLOCKSKEW(caltime, + dr->base_calendartime, net_uptime(), dr->base_uptime); + + expiry = dr->base_calendartime + + dr->expire - dr->base_uptime; + } else { + expiry = 0; + } + return (expiry); +} + /* * Neighbor Discover Prefix structure reference counting routines. */ @@ -2875,6 +2878,29 @@ ndpr_remref(struct nd_prefix *ndpr, int locked) return (ndpr); } +uint64_t +ndpr_getexpire(struct nd_prefix *pr) +{ + struct timeval caltime; + uint64_t expiry; + + if (pr->ndpr_expire != 0 && pr->ndpr_vltime != ND6_INFINITE_LIFETIME) { + /* account for system time change */ + getmicrotime(&caltime); + + pr->ndpr_base_calendartime += + NET_CALCULATE_CLOCKSKEW(caltime, + pr->ndpr_base_calendartime, net_uptime(), + pr->ndpr_base_uptime); + + expiry = pr->ndpr_base_calendartime + + pr->ndpr_expire - pr->ndpr_base_uptime; + } else { + expiry = 0; + } + return (expiry); +} + /* * A supplement function used in the on-link detection below; * detect if a given prefix has a (probably) reachable advertising router. @@ -2886,17 +2912,22 @@ find_pfxlist_reachable_router(struct nd_prefix *pr) struct nd_pfxrouter *pfxrtr; struct rtentry *rt; struct llinfo_nd6 *ln; + struct ifnet *ifp; + struct in6_addr rtaddr; + unsigned int genid; lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED); NDPR_LOCK_ASSERT_HELD(pr); - for (pfxrtr = LIST_FIRST(&pr->ndpr_advrtrs); pfxrtr; - pfxrtr = LIST_NEXT(pfxrtr, pfr_entry)) { + genid = pr->ndpr_genid; + pfxrtr = LIST_FIRST(&pr->ndpr_advrtrs); + while (pfxrtr) { + ifp = pfxrtr->router->ifp; + rtaddr = pfxrtr->router->rtaddr; NDPR_UNLOCK(pr); lck_mtx_unlock(nd6_mutex); /* Callee returns a locked route upon success */ - if ((rt = nd6_lookup(&pfxrtr->router->rtaddr, 0, - pfxrtr->router->ifp, 0)) != NULL) { + if ((rt = nd6_lookup(&rtaddr, 0, ifp, 0)) != NULL) { RT_LOCK_ASSERT_HELD(rt); if ((ln = rt->rt_llinfo) != NULL && ND6_IS_LLINFO_PROBREACH(ln)) { @@ -2911,6 +2942,11 @@ find_pfxlist_reachable_router(struct nd_prefix *pr) } lck_mtx_lock(nd6_mutex); NDPR_LOCK(pr); + if (pr->ndpr_genid != genid) { + pfxrtr = LIST_FIRST(&pr->ndpr_advrtrs); + genid = pr->ndpr_genid; + } else + pfxrtr = LIST_NEXT(pfxrtr, pfr_entry); } NDPR_LOCK_ASSERT_HELD(pr); @@ -2959,7 +2995,7 @@ pfxlist_onlink_check(void) pr = nd_prefix.lh_first; while (pr) { NDPR_LOCK(pr); - if (pr->ndpr_stateflags & NDPRF_PROCESSED) { + if (pr->ndpr_stateflags & NDPRF_PROCESSED_ONLINK) { NDPR_UNLOCK(pr); pr = pr->ndpr_next; continue; @@ -2971,19 +3007,19 @@ pfxlist_onlink_check(void) NDPR_REMREF(pr); break; } - pr->ndpr_stateflags |= NDPRF_PROCESSED; + pr->ndpr_stateflags |= NDPRF_PROCESSED_ONLINK; NDPR_UNLOCK(pr); NDPR_REMREF(pr); /* * Since find_pfxlist_reachable_router() drops the nd6_mutex, we - * have to start over, but the NDPRF_PROCESSED flag will stop - * us from checking the same prefix twice. + * have to start over, but the NDPRF_PROCESSED_ONLINK flag will + * stop us from checking the same prefix twice. */ pr = nd_prefix.lh_first; } LIST_FOREACH(prclear, &nd_prefix, ndpr_entry) { NDPR_LOCK(prclear); - prclear->ndpr_stateflags &= ~NDPRF_PROCESSED; + prclear->ndpr_stateflags &= ~NDPRF_PROCESSED_ONLINK; NDPR_UNLOCK(prclear); } @@ -3028,7 +3064,7 @@ pfxlist_onlink_check(void) * set nor in static prefixes */ if (pr->ndpr_raf_onlink == 0 || - pr->ndpr_stateflags & NDPRF_PROCESSED || + pr->ndpr_stateflags & NDPRF_PROCESSED_ONLINK || pr->ndpr_stateflags & NDPRF_STATIC) { NDPR_UNLOCK(pr); pr = pr->ndpr_next; @@ -3043,14 +3079,14 @@ pfxlist_onlink_check(void) find_pfxlist_reachable_router(pr) != NULL && (pr->ndpr_debug & IFD_ATTACHED)) pr->ndpr_stateflags &= ~NDPRF_DETACHED; - pr->ndpr_stateflags |= NDPRF_PROCESSED; + pr->ndpr_stateflags |= NDPRF_PROCESSED_ONLINK; NDPR_UNLOCK(pr); NDPR_REMREF(pr); /* * Since find_pfxlist_reachable_router() drops the * nd6_mutex, we have to start over, but the - * NDPRF_PROCESSED flag will stop us from checking - * the same prefix twice. + * NDPRF_PROCESSED_ONLINK flag will stop us from + * checking the same prefix twice. */ pr = nd_prefix.lh_first; } @@ -3070,7 +3106,7 @@ pfxlist_onlink_check(void) } LIST_FOREACH(prclear, &nd_prefix, ndpr_entry) { NDPR_LOCK(prclear); - prclear->ndpr_stateflags &= ~NDPRF_PROCESSED; + prclear->ndpr_stateflags &= ~NDPRF_PROCESSED_ONLINK; NDPR_UNLOCK(prclear); } VERIFY(nd_prefix_busy); @@ -3144,7 +3180,7 @@ pfxlist_onlink_check(void) * another NA, RA, or when the link status changes. */ err = ifnet_get_address_list_family_internal(NULL, &ifap, AF_INET6, 0, - M_NOWAIT); + M_NOWAIT, 0); if (err != 0 || ifap == NULL) { nd6log((LOG_ERR, "%s: ifnet_get_address_list_family_internal " "failed", __func__)); @@ -3388,10 +3424,10 @@ nd6_prefix_onlink_common(struct nd_prefix *pr, boolean_t force_scoped, NDPR_LOCK(pr); if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) { nd6log((LOG_ERR, - "nd6_prefix_onlink: %s/%d on %s scoped=%d is already " - "on-link\n", ip6_sprintf(&pr->ndpr_prefix.sin6_addr), - pr->ndpr_plen, if_name(pr->ndpr_ifp), - (pr->ndpr_stateflags & NDPRF_IFSCOPE) ? 1 : 0); + "%s: %s/%d on %s scoped=%d is already on-link\n", + __func__, ip6_sprintf(&pr->ndpr_prefix.sin6_addr), + pr->ndpr_plen, if_name(pr->ndpr_ifp), + (pr->ndpr_stateflags & NDPRF_IFSCOPE) ? 1 : 0); NDPR_UNLOCK(pr); return (EEXIST)); } @@ -3430,8 +3466,7 @@ nd6_prefix_onlink_common(struct nd_prefix *pr, boolean_t force_scoped, */ /* search for a link-local addr */ ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, - IN6_IFF_NOTREADY| - IN6_IFF_ANYCAST); + IN6_IFF_NOTREADY | IN6_IFF_ANYCAST); if (ifa == NULL) { struct in6_ifaddr *ia6; ifnet_lock_shared(ifp); @@ -3462,8 +3497,8 @@ nd6_prefix_onlink_common(struct nd_prefix *pr, boolean_t force_scoped, * in6_ifinit() sets nd6_rtrequest to ifa_rtrequest for all ifaddrs. * ifa->ifa_rtrequest = nd6_rtrequest; */ - bzero(&mask6, sizeof(mask6)); - mask6.sin6_len = sizeof(mask6); + bzero(&mask6, sizeof (mask6)); + mask6.sin6_len = sizeof (mask6); mask6.sin6_addr = pr->ndpr_mask; prefix = pr->ndpr_prefix; if ((rt = pr->ndpr_rt) != NULL) @@ -3616,15 +3651,15 @@ nd6_prefix_offlink(struct nd_prefix *pr) return (EEXIST); } - bzero(&sa6, sizeof(sa6)); + bzero(&sa6, sizeof (sa6)); sa6.sin6_family = AF_INET6; - sa6.sin6_len = sizeof(sa6); + sa6.sin6_len = sizeof (sa6); bcopy(&pr->ndpr_prefix.sin6_addr, &sa6.sin6_addr, - sizeof(struct in6_addr)); - bzero(&mask6, sizeof(mask6)); + sizeof (struct in6_addr)); + bzero(&mask6, sizeof (mask6)); mask6.sin6_family = AF_INET6; - mask6.sin6_len = sizeof(sa6); - bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr)); + mask6.sin6_len = sizeof (sa6); + bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof (struct in6_addr)); prefix = pr->ndpr_prefix; plen = pr->ndpr_plen; if ((ndpr_rt = pr->ndpr_rt) != NULL) @@ -3682,7 +3717,7 @@ nd6_prefix_offlink(struct nd_prefix *pr) } if (opr->ndpr_plen == plen && in6_are_prefix_equal(&prefix.sin6_addr, - &opr->ndpr_prefix.sin6_addr, plen)) { + &opr->ndpr_prefix.sin6_addr, plen)) { int e; NDPR_UNLOCK(opr); @@ -3692,7 +3727,8 @@ nd6_prefix_offlink(struct nd_prefix *pr) "nd6_prefix_offlink: failed to " "recover a prefix %s/%d from %s " "to %s (errno = %d)\n", - ip6_sprintf(&opr->ndpr_prefix.sin6_addr), + ip6_sprintf( + &opr->ndpr_prefix.sin6_addr), opr->ndpr_plen, if_name(ifp), if_name(opr->ndpr_ifp), e)); } @@ -3744,178 +3780,209 @@ nd6_prefix_offlink(struct nd_prefix *pr) } static struct in6_ifaddr * -in6_ifadd( - struct nd_prefix *pr, - int mcast) +in6_pfx_newpersistaddr(struct nd_prefix *pr, int mcast, int *errorp) { - struct ifnet *ifp = pr->ndpr_ifp; - struct in6_aliasreq ifra; - struct in6_ifaddr *ia, *ib; - int error, plen0; - int updateflags; + struct in6_ifaddr *ia6; + struct ifnet *ifp; + struct nd_ifinfo *ndi; struct in6_addr mask; - int prefixlen; + struct in6_aliasreq ifra; + int error, ifaupdate, iidlen, notcga; + + VERIFY(pr != NULL); + VERIFY(errorp != NULL); + + NDPR_LOCK(pr); + ifp = pr->ndpr_ifp; + ia6 = NULL; + error = 0; /* - * find a link-local address (will be interface ID). - * Is it really mandatory? Theoretically, a global or a site-local - * address can be configured without a link-local address, if we - * have a unique interface identifier... - * - * it is not mandatory to have a link-local address, we can generate - * interface identifier on the fly. we do this because: - * (1) it should be the easiest way to find interface identifier. - * (2) RFC2462 5.4 suggesting the use of the same interface identifier - * for multiple addresses on a single interface, and possible shortcut - * of DAD. we omitted DAD for this reason in the past. - * (3) a user can prevent autoconfiguration of global address - * by removing link-local address by hand (this is partly because we - * don't have other way to control the use of IPv6 on an interface. - * this has been our design choice - cf. NRL's "ifconfig auto"). - * (4) it is easier to manage when an interface has addresses - * with the same interface identifier, than to have multiple addresses - * with different interface identifiers. + * Prefix Length check: + * If the sum of the prefix length and interface identifier + * length does not equal 128 bits, the Prefix Information + * option MUST be ignored. The length of the interface + * identifier is defined in a separate link-type specific + * document. */ - ib = in6ifa_ifpforlinklocal(ifp, 0);/* 0 is OK? */ - if (ib == NULL) - return (NULL); + iidlen = in6_if2idlen(ifp); + if (iidlen < 0) { + error = EADDRNOTAVAIL; + /* this should not happen, so we always log it. */ + log(LOG_ERR, "%s: IID length undefined (%s)\n", + __func__, if_name(ifp)); + goto unlock1; + } else if (iidlen != 64) { + error = EADDRNOTAVAIL; + /* + * stateless autoconfiguration not yet well-defined for IID + * lengths other than 64 octets. Just give up for now. + */ + nd6log((LOG_INFO, "%s: IID length not 64 octets (%s)\n", + __func__, if_name(ifp))); + goto unlock1; + } - IFA_LOCK(&ib->ia_ifa); - NDPR_LOCK(pr); - prefixlen = pr->ndpr_plen; - in6_len2mask(&mask, prefixlen); - plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL); - /* prefixlen + ifidlen must be equal to 128 */ - if (prefixlen != plen0) { - nd6log((LOG_INFO, "in6_ifadd: wrong prefixlen for %s " - "(prefix=%d ifid=%d)\n", - if_name(ifp), prefixlen, 128 - plen0)); - NDPR_UNLOCK(pr); - IFA_UNLOCK(&ib->ia_ifa); - IFA_REMREF(&ib->ia_ifa); - return (NULL); + if (iidlen + pr->ndpr_plen != 128) { + error = EADDRNOTAVAIL; + nd6log((LOG_INFO, + "%s: invalid prefix length %d for %s, ignored\n", + __func__, pr->ndpr_plen, if_name(ifp))); + goto unlock1; } - /* make ifaddr */ + lck_rw_lock_shared(nd_if_rwlock); + if (ifp->if_index >= nd_ifinfo_indexlim) { + error = EADDRNOTAVAIL; + nd6log((LOG_INFO, + "%s: invalid prefix length %d for %s, ignored\n", + __func__, pr->ndpr_plen, if_name(ifp))); + goto unlock2; + } - bzero(&ifra, sizeof(ifra)); - /* - * in6_update_ifa() does not use ifra_name, but we accurately set it - * for safety. - */ - strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); + bzero(&ifra, sizeof (ifra)); + strncpy(ifra.ifra_name, if_name(ifp), sizeof (ifra.ifra_name)); ifra.ifra_addr.sin6_family = AF_INET6; - ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6); + ifra.ifra_addr.sin6_len = sizeof (struct sockaddr_in6); + /* prefix */ bcopy(&pr->ndpr_prefix.sin6_addr, &ifra.ifra_addr.sin6_addr, - sizeof(ifra.ifra_addr.sin6_addr)); + sizeof (ifra.ifra_addr.sin6_addr)); + in6_len2mask(&mask, pr->ndpr_plen); ifra.ifra_addr.sin6_addr.s6_addr32[0] &= mask.s6_addr32[0]; ifra.ifra_addr.sin6_addr.s6_addr32[1] &= mask.s6_addr32[1]; ifra.ifra_addr.sin6_addr.s6_addr32[2] &= mask.s6_addr32[2]; ifra.ifra_addr.sin6_addr.s6_addr32[3] &= mask.s6_addr32[3]; - /* interface ID */ - ifra.ifra_addr.sin6_addr.s6_addr32[0] |= - (ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]); - ifra.ifra_addr.sin6_addr.s6_addr32[1] |= - (ib->ia_addr.sin6_addr.s6_addr32[1] & ~mask.s6_addr32[1]); - ifra.ifra_addr.sin6_addr.s6_addr32[2] |= - (ib->ia_addr.sin6_addr.s6_addr32[2] & ~mask.s6_addr32[2]); - ifra.ifra_addr.sin6_addr.s6_addr32[3] |= - (ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]); + ndi = &nd_ifinfo[ifp->if_index]; + VERIFY(ndi->initialized); + lck_mtx_lock(&ndi->lock); + + notcga = nd6_send_opstate == ND6_SEND_OPMODE_DISABLED || + (ndi->flags & ND6_IFF_INSECURE) != 0; + + lck_mtx_unlock(&ndi->lock); + lck_rw_done(nd_if_rwlock); + NDPR_UNLOCK(pr); + + if (notcga) { + ia6 = in6ifa_ifpforlinklocal(ifp, 0); + if (ia6 == NULL) { + error = EADDRNOTAVAIL; + nd6log((LOG_INFO, "%s: no link-local address (%s)\n", + __func__, if_name(ifp))); + goto done; + } + + IFA_LOCK(&ia6->ia_ifa); + ifra.ifra_addr.sin6_addr.s6_addr32[0] |= + (ia6->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]); + ifra.ifra_addr.sin6_addr.s6_addr32[1] |= + (ia6->ia_addr.sin6_addr.s6_addr32[1] & ~mask.s6_addr32[1]); + ifra.ifra_addr.sin6_addr.s6_addr32[2] |= + (ia6->ia_addr.sin6_addr.s6_addr32[2] & ~mask.s6_addr32[2]); + ifra.ifra_addr.sin6_addr.s6_addr32[3] |= + (ia6->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]); + IFA_UNLOCK(&ia6->ia_ifa); + IFA_REMREF(&ia6->ia_ifa); + ia6 = NULL; + } else { + in6_cga_node_lock(); + error = in6_cga_generate(NULL, 0, &ifra.ifra_addr.sin6_addr); + in6_cga_node_unlock(); + if (error == 0) + ifra.ifra_flags |= IN6_IFF_SECURED; + else { + nd6log((LOG_ERR, "%s: no CGA available (%s)\n", + __func__, if_name(ifp))); + goto done; + } + } + + VERIFY(ia6 == NULL); /* new prefix mask. */ - ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); + ifra.ifra_prefixmask.sin6_len = sizeof (struct sockaddr_in6); ifra.ifra_prefixmask.sin6_family = AF_INET6; bcopy(&mask, &ifra.ifra_prefixmask.sin6_addr, - sizeof(ifra.ifra_prefixmask.sin6_addr)); + sizeof (ifra.ifra_prefixmask.sin6_addr)); /* lifetimes. */ ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime; ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime; - /* XXX: scope zone ID? */ - + /* address flags */ ifra.ifra_flags |= IN6_IFF_AUTOCONF; /* obey autoconf */ - NDPR_UNLOCK(pr); - IFA_UNLOCK(&ib->ia_ifa); - IFA_REMREF(&ib->ia_ifa); - /* * Make sure that we do not have this address already. This should * usually not happen, but we can still see this case, e.g., if we * have manually configured the exact address to be configured. */ - if ((ib = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr)) != NULL) { - IFA_REMREF(&ib->ia_ifa); + if ((ia6 = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr)) + != NULL) { + error = EADDRNOTAVAIL; + IFA_REMREF(&ia6->ia_ifa); + ia6 = NULL; + /* this should be rare enough to make an explicit log */ - log(LOG_INFO, "in6_ifadd: %s is already configured\n", - ip6_sprintf(&ifra.ifra_addr.sin6_addr)); - return (NULL); + log(LOG_INFO, "%s: %s is already configured!\n", + __func__, ip6_sprintf(&ifra.ifra_addr.sin6_addr)); + goto done; } /* * Allocate ifaddr structure, link into chain, etc. * If we are going to create a new address upon receiving a multicasted * RA, we need to impose a random delay before starting DAD. - * [draft-ietf-ipv6-rfc2462bis-02.txt, Section 5.4.2] + * [RFC 4862, Section 5.4.2] */ - updateflags = 0; + ifaupdate = IN6_IFAUPDATE_NOWAIT; if (mcast) - updateflags |= IN6_IFAUPDATE_DADDELAY; - error = in6_update_ifa(ifp, &ifra, NULL, updateflags, M_WAITOK); + ifaupdate |= IN6_IFAUPDATE_DADDELAY; + error = in6_update_ifa(ifp, &ifra, ifaupdate, &ia6); if (error != 0) { nd6log((LOG_ERR, - "in6_ifadd: failed to make ifaddr %s on %s (errno=%d)\n", - ip6_sprintf(&ifra.ifra_addr.sin6_addr), if_name(ifp), - error)); - return(NULL); /* ifaddr must not have been allocated. */ + "%s: failed to make ifaddr %s on %s (errno=%d)\n", + __func__, ip6_sprintf(&ifra.ifra_addr.sin6_addr), + if_name(ifp), error)); + error = EADDRNOTAVAIL; + goto done; } - ia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr); - if (ia == NULL) { - /* - * XXX: both in6_ifadd and in6_iftmpadd follow this admittedly - * suboptimal pattern of calling in6_update_ifa to add the - * interface address, then calling in6ifa_ifpwithaddr to - * retrieve it from the interface address list after some - * concurrent kernel thread has first had the opportunity to - * call in6_purgeaddr and delete everything. - */ - nd6log((LOG_ERR, - "in6_ifadd: ifa update succeeded, but we got no ifaddr\n")); - return(NULL); - } + VERIFY(ia6 != NULL); + in6_post_msg(ifp, KEV_INET6_NEW_RTADV_ADDR, ia6); + goto done; + +unlock2: + lck_rw_done(nd_if_rwlock); + +unlock1: + NDPR_UNLOCK(pr); - in6_post_msg(ifp, KEV_INET6_NEW_RTADV_ADDR, ia); - return(ia); +done: + *errorp = error; + return (ia6); } #define IA6_NONCONST(i) ((struct in6_ifaddr *)(uintptr_t)(i)) int -in6_tmpifadd( - const struct in6_ifaddr *ia0, /* corresponding public address */ - int forcegen, - int how) +in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen) { struct ifnet *ifp = ia0->ia_ifa.ifa_ifp; struct in6_ifaddr *ia, *newia; struct in6_aliasreq ifra; - int i, error; + int i, error, ifaupdate; int trylimit = 3; /* XXX: adhoc value */ - int updateflags; u_int32_t randid[2]; time_t vltime0, pltime0; - struct timeval timenow; + uint64_t timenow = net_uptime(); struct in6_addr addr; struct nd_prefix *ndpr; - getmicrotime(&timenow); - - bzero(&ifra, sizeof(ifra)); - strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); + bzero(&ifra, sizeof (ifra)); + strncpy(ifra.ifra_name, if_name(ifp), sizeof (ifra.ifra_name)); IFA_LOCK(&IA6_NONCONST(ia0)->ia_ifa); ifra.ifra_addr = ia0->ia_addr; /* copy prefix mask */ @@ -3929,7 +3996,7 @@ in6_tmpifadd( IFA_UNLOCK(&IA6_NONCONST(ia0)->ia_ifa); again: - in6_get_tmpifid(ifp, (u_int8_t *)randid, + in6_iid_mktmp(ifp, (u_int8_t *)randid, (const u_int8_t *)&addr.s6_addr[8], forcegen); ifra.ifra_addr.sin6_addr.s6_addr32[2] |= @@ -3938,7 +4005,7 @@ again: (randid[1] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[3])); /* - * in6_get_tmpifid() quite likely provided a unique interface ID. + * in6_iid_mktmp() quite likely provided a unique interface ID. * However, we may still have a chance to see collision, because * there may be a time lag between generation of the ID and generation * of the address. So, we'll do one more sanity check. @@ -3948,7 +4015,7 @@ again: if (trylimit-- == 0) { nd6log((LOG_NOTICE, "in6_tmpifadd: failed to find " "a unique random IFID\n")); - return(EEXIST); + return (EEXIST); } forcegen = 1; goto again; @@ -3956,24 +4023,31 @@ again: /* * The Valid Lifetime is the lower of the Valid Lifetime of the - * public address or TEMP_VALID_LIFETIME. + * public address or TEMP_VALID_LIFETIME. * The Preferred Lifetime is the lower of the Preferred Lifetime - * of the public address or TEMP_PREFERRED_LIFETIME - - * DESYNC_FACTOR. + * of the public address or TEMP_PREFERRED_LIFETIME - + * DESYNC_FACTOR. */ IFA_LOCK(&IA6_NONCONST(ia0)->ia_ifa); - vltime0 = IFA6_IS_INVALID(ia0) - ? 0 - : (ia0->ia6_lifetime.ia6t_vltime - - (timenow.tv_sec - ia0->ia6_updatetime)); - if (vltime0 > ip6_temp_valid_lifetime) + if (ia0->ia6_lifetime.ia6ti_vltime != ND6_INFINITE_LIFETIME) { + vltime0 = IFA6_IS_INVALID(ia0, timenow) ? 0 : + (ia0->ia6_lifetime.ia6ti_vltime - + (timenow - ia0->ia6_updatetime)); + if (vltime0 > ip6_temp_valid_lifetime) + vltime0 = ip6_temp_valid_lifetime; + } else { vltime0 = ip6_temp_valid_lifetime; - pltime0 = IFA6_IS_DEPRECATED(ia0) - ? 0 - : (ia0->ia6_lifetime.ia6t_pltime - - (timenow.tv_sec - ia0->ia6_updatetime)); - if (pltime0 > ip6_temp_preferred_lifetime - ip6_desync_factor) + } + if (ia0->ia6_lifetime.ia6ti_pltime != ND6_INFINITE_LIFETIME) { + pltime0 = IFA6_IS_DEPRECATED(ia0, timenow) ? 0 : + (ia0->ia6_lifetime.ia6ti_pltime - + (timenow - ia0->ia6_updatetime)); + if (pltime0 > ip6_temp_preferred_lifetime - ip6_desync_factor) + pltime0 = ip6_temp_preferred_lifetime - + ip6_desync_factor; + } else { pltime0 = ip6_temp_preferred_lifetime - ip6_desync_factor; + } ifra.ifra_lifetime.ia6t_vltime = vltime0; ifra.ifra_lifetime.ia6t_pltime = pltime0; IFA_UNLOCK(&IA6_NONCONST(ia0)->ia_ifa); @@ -3982,36 +4056,21 @@ again: * Lifetime is greater than REGEN_ADVANCE time units. */ if (ifra.ifra_lifetime.ia6t_pltime <= ip6_temp_regen_advance) - return(0); + return (0); /* XXX: scope zone ID? */ ifra.ifra_flags |= (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY); /* allocate ifaddr structure, link into chain, etc. */ - updateflags = 0; - - if (how) - updateflags |= IN6_IFAUPDATE_DADDELAY; - - if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags, how)) != 0) + ifaupdate = IN6_IFAUPDATE_NOWAIT | IN6_IFAUPDATE_DADDELAY; + error = in6_update_ifa(ifp, &ifra, ifaupdate, &newia); + if (error != 0) { + nd6log((LOG_ERR, "in6_tmpifadd: failed to add address.\n")); return (error); - - newia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr); - if (newia == NULL) { - /* - * XXX: both in6_ifadd and in6_iftmpadd follow this admittedly - * suboptimal pattern of calling in6_update_ifa to add the - * interface address, then calling in6ifa_ifpwithaddr to - * retrieve it from the interface address list after some - * concurrent kernel thread has first had the opportunity to - * call in6_purgeaddr and delete everything. - */ - nd6log((LOG_ERR, - "in6_tmpifadd: ifa update succeeded, but we got " - "no ifaddr\n")); - return(EINVAL); } + VERIFY(newia != NULL); + IFA_LOCK(&IA6_NONCONST(ia0)->ia_ifa); ndpr = ia0->ia6_ndpr; if (ndpr == NULL) { @@ -4059,19 +4118,23 @@ again: /* remove our reference */ NDPR_REMREF(ndpr); - return(0); + return (0); } #undef IA6_NONCONST int in6_init_prefix_ltimes(struct nd_prefix *ndpr) { - struct timeval timenow; + struct timeval caltime; + u_int64_t timenow = net_uptime(); NDPR_LOCK_ASSERT_HELD(ndpr); - getmicrotime(&timenow); - /* check if preferred lifetime > valid lifetime. RFC2462 5.5.3 (c) */ + getmicrotime(&caltime); + ndpr->ndpr_base_calendartime = caltime.tv_sec; + ndpr->ndpr_base_uptime = timenow; + + /* check if preferred lifetime > valid lifetime. RFC 4862 5.5.3 (c) */ if (ndpr->ndpr_pltime > ndpr->ndpr_vltime) { nd6log((LOG_INFO, "in6_init_prefix_ltimes: preferred lifetime" "(%d) is greater than valid lifetime(%d)\n", @@ -4081,36 +4144,35 @@ in6_init_prefix_ltimes(struct nd_prefix *ndpr) if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME) ndpr->ndpr_preferred = 0; else - ndpr->ndpr_preferred = timenow.tv_sec + ndpr->ndpr_pltime; + ndpr->ndpr_preferred = timenow + ndpr->ndpr_pltime; if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME) ndpr->ndpr_expire = 0; else - ndpr->ndpr_expire = timenow.tv_sec + ndpr->ndpr_vltime; + ndpr->ndpr_expire = timenow + ndpr->ndpr_vltime; - return 0; + return (0); } static void -in6_init_address_ltimes(__unused struct nd_prefix *new, - struct in6_addrlifetime *lt6, boolean_t is_temporary) +in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6) { - struct timeval timenow; +#pragma unused(new) + uint64_t timenow = net_uptime(); - getmicrotime(&timenow); /* Valid lifetime must not be updated unless explicitly specified. */ /* init ia6t_expire */ - if (!is_temporary && lt6->ia6t_vltime == ND6_INFINITE_LIFETIME) + if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME) { lt6->ia6t_expire = 0; - else { - lt6->ia6t_expire = timenow.tv_sec; + } else { + lt6->ia6t_expire = timenow; lt6->ia6t_expire += lt6->ia6t_vltime; } /* init ia6t_preferred */ - if (!is_temporary && lt6->ia6t_pltime == ND6_INFINITE_LIFETIME) + if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME) { lt6->ia6t_preferred = 0; - else { - lt6->ia6t_preferred = timenow.tv_sec; + } else { + lt6->ia6t_preferred = timenow; lt6->ia6t_preferred += lt6->ia6t_pltime; } } @@ -4152,12 +4214,12 @@ rt6_deleteroute( RT_LOCK(rt); if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6) { RT_UNLOCK(rt); - return(0); + return (0); } if (!IN6_ARE_ADDR_EQUAL(gate, &SIN6(rt->rt_gateway)->sin6_addr)) { RT_UNLOCK(rt); - return(0); + return (0); } /* * Do not delete a static route. @@ -4166,7 +4228,7 @@ rt6_deleteroute( */ if ((rt->rt_flags & RTF_STATIC) != 0) { RT_UNLOCK(rt); - return(0); + return (0); } /* * We delete only host route. This means, in particular, we don't @@ -4174,7 +4236,7 @@ rt6_deleteroute( */ if ((rt->rt_flags & RTF_HOST) == 0) { RT_UNLOCK(rt); - return(0); + return (0); } /* @@ -4199,7 +4261,7 @@ nd6_setdefaultiface( ifnet_head_lock_shared(); if (ifindex < 0 || if_index < ifindex) { ifnet_head_done(); - return(EINVAL); + return (EINVAL); } def_ifp = ifindex2ifnet[ifindex]; ifnet_head_done(); @@ -4243,5 +4305,5 @@ nd6_setdefaultiface( } lck_mtx_unlock(nd6_mutex); - return(error); + return (error); } diff --git a/bsd/netinet6/nd6_send.c b/bsd/netinet6/nd6_send.c new file mode 100644 index 000000000..916607bd6 --- /dev/null +++ b/bsd/netinet6/nd6_send.c @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include + +SYSCTL_DECL(_net_inet6); /* Note: Not in any common header. */ + +SYSCTL_NODE(_net_inet6, OID_AUTO, send, CTLFLAG_RW | CTLFLAG_LOCKED, 0, + "IPv6 Secure Neighbor Discovery"); + +static int nd6_send_opmode = ND6_SEND_OPMODE_DISABLED; + +SYSCTL_INT(_net_inet6_send, OID_AUTO, opstate, CTLFLAG_RD | CTLFLAG_LOCKED, + &nd6_send_opstate, 0, "current SEND operating state"); + +int nd6_send_opstate = ND6_SEND_OPMODE_DISABLED; +SYSCTL_INT(_net_inet6_send, OID_AUTO, opmode, CTLFLAG_RW | CTLFLAG_LOCKED, + &nd6_send_opmode, 0, "configured SEND operating mode"); + +static int sysctl_cga_parameters SYSCTL_HANDLER_ARGS; + +SYSCTL_PROC(_net_inet6_send, OID_AUTO, cga_parameters, + CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, + sysctl_cga_parameters, "S,nd6_send_nodecfg", ""); + +/* + * The size of the buffer is sufficient to contain a public key, its size in + * machine binary type for the kernel, and the CGA precalc for the global + * scope. This interface is not a public API, so we don't anticipate that the + * userland and the kernel will be mismatched between ILP32 and LP64. + */ +#define SYSCTL_CGA_PARAMETERS_BUFFER_SIZE \ + 2 * (sizeof (u_int16_t) + IN6_CGA_KEY_MAXSIZE) + \ + sizeof (struct in6_cga_prepare) + +static int +sysctl_cga_parameters SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg1) + u_int namelen; + char *oldp, *newp; + const char *fin; + struct in6_cga_nodecfg cfg; + struct iovec *iov; + int error; + char *buffer; + u_int16_t u16; + + namelen = arg2; + if (namelen != 0) { + log(LOG_ERR, "%s: name length err [len=%u]\n", __func__, + namelen); + return (EINVAL); + } + + if (req->newlen > SYSCTL_CGA_PARAMETERS_BUFFER_SIZE) { + log(LOG_ERR, "%s: input buffer size error [len=%u]\n", __func__, + req->newlen); + return (EINVAL); + } + + MALLOC(buffer, char *, SYSCTL_CGA_PARAMETERS_BUFFER_SIZE, M_IP6CGA, + M_WAITOK); + if (buffer == NULL) { + log(LOG_ERR, "%s: could not allocate marshaling buffer.\n", + __func__); + return (ENOMEM); + } + + in6_cga_node_lock(); + + if (req->oldptr != USER_ADDR_NULL && req->oldlen > 0) { + oldp = buffer; + fin = &buffer[SYSCTL_CGA_PARAMETERS_BUFFER_SIZE]; + if (req->oldlen < SYSCTL_CGA_PARAMETERS_BUFFER_SIZE) + fin = &buffer[req->oldlen]; + + in6_cga_query(&cfg); + iov = &cfg.cga_pubkey; + if (iov->iov_len > 0) { + VERIFY(iov->iov_len < UINT16_MAX); + + if (&oldp[sizeof (cfg.cga_prepare)] <= fin) + bcopy(&cfg.cga_prepare, oldp, + sizeof (cfg.cga_prepare)); + oldp += sizeof (cfg.cga_prepare); + + if (&oldp[sizeof (u16)] < fin) { + u16 = (u_int16_t) iov->iov_len; + bcopy(&u16, oldp, sizeof (u16)); + } + oldp += sizeof (u16); + + if (&oldp[iov->iov_len] < fin) + bcopy(iov->iov_base, oldp, iov->iov_len); + oldp += iov->iov_len; + + if (oldp > fin) { + req->oldlen = oldp - buffer; + log(LOG_ERR, "%s: marshalled data too large.\n", + __func__); + error = ENOMEM; + goto done; + } + } + + error = SYSCTL_OUT(req, buffer, oldp - buffer); + if (error) + goto done; + } + + if (req->newptr == USER_ADDR_NULL) + goto done; + + error = proc_suser(current_proc()); + if (error) + goto done; + + if (req->newlen == 0) { + in6_cga_stop(); + nd6_send_opstate = ND6_SEND_OPMODE_DISABLED; + goto done; + } + + error = SYSCTL_IN(req, buffer, req->newlen); + if (error) + goto done; + + newp = buffer; + fin = &buffer[req->newlen]; + + bzero(&cfg, sizeof cfg); + + if (&newp[sizeof (cfg.cga_prepare)] <= fin) + bcopy(newp, &cfg.cga_prepare, sizeof (cfg.cga_prepare)); + newp += sizeof (cfg.cga_prepare); + + iov = &cfg.cga_privkey; + if (&newp[sizeof (u16)] < fin) { + bcopy(newp, &u16, sizeof (u16)); + iov->iov_len = u16; + + if (iov->iov_len > IN6_CGA_KEY_MAXSIZE) { + error = EINVAL; + goto done; + } + } + newp += sizeof (u16); + + iov->iov_base = newp; + newp += iov->iov_len; + + iov = &cfg.cga_pubkey; + if (&newp[sizeof (u16)] < fin) { + bcopy(newp, &u16, sizeof (u16)); + iov->iov_len = u16; + + if (iov->iov_len > IN6_CGA_KEY_MAXSIZE) { + error = EINVAL; + goto done; + } + } + newp += sizeof (u16); + + iov->iov_base = newp; + newp += iov->iov_len; + + if (newp > fin) { + log(LOG_ERR, "%s: input too large [octets=%ld].\n", __func__, + newp - fin); + error = ENOMEM; + goto done; + } + + error = in6_cga_start(&cfg); + if (!error) + nd6_send_opstate = nd6_send_opmode; + else + log(LOG_ERR, "%s: in6_cga_start error=%d.\n", __func__, + error); + +done: + in6_cga_node_unlock(); + FREE(buffer, M_IP6CGA); + return (error); +} + +/* End of file */ diff --git a/bsd/netinet6/pim6_var.h b/bsd/netinet6/pim6_var.h index 050f8e4c8..a84637b1a 100644 --- a/bsd/netinet6/pim6_var.h +++ b/bsd/netinet6/pim6_var.h @@ -60,7 +60,7 @@ struct pim6stat { #define PIM6CTL_STATS 1 /* statistics (read-only) */ #define PIM6CTL_MAXID 2 -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #define PIM6CTL_NAMES { \ { 0, 0 }, \ { 0, 0 }, \ @@ -68,5 +68,5 @@ struct pim6stat { int pim6_input(struct mbuf **, int*); -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET6_PIM6_VAR_H_ */ diff --git a/bsd/netinet6/raw_ip6.c b/bsd/netinet6/raw_ip6.c index a08cf7139..09728f630 100644 --- a/bsd/netinet6/raw_ip6.c +++ b/bsd/netinet6/raw_ip6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -116,9 +116,7 @@ #include #include #include -#if ENABLE_DEFAULT_SCOPE #include -#endif #include #include @@ -136,7 +134,6 @@ extern struct inpcbhead ripcb; extern struct inpcbinfo ripcbinfo; extern u_int32_t rip_sendspace; extern u_int32_t rip_recvspace; -extern int ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt); struct rip6stat rip6stat; @@ -152,12 +149,13 @@ rip6_input( int proto) { struct mbuf *m = *mp; - register struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); - register struct inpcb *in6p; + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + struct inpcb *in6p; struct inpcb *last = 0; struct mbuf *opts = NULL; struct sockaddr_in6 rip6src; int ret; + struct ifnet *ifp = m->m_pkthdr.rcvif; /* Expect 32-bit aligned data pointer on strict-align platforms */ MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); @@ -166,7 +164,7 @@ rip6_input( init_sin6(&rip6src, m); /* general init */ - lck_rw_lock_shared(ripcbinfo.mtx); + lck_rw_lock_shared(ripcbinfo.ipi_lock); LIST_FOREACH(in6p, &ripcb, inp_list) { if ((in6p->in6p_vflag & INP_IPV6) == 0) continue; @@ -179,6 +177,14 @@ rip6_input( if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src)) continue; + + if (inp_restricted(in6p, ifp)) + continue; + + if (ifp != NULL && IFNET_IS_CELLULAR(ifp) && + (in6p->in6p_flags & INP_NO_IFT_CELLULAR)) + continue; + if (proto == IPPROTO_ICMPV6 || in6p->in6p_cksum != -1) { rip6stat.rip6s_isum++; if (in6_cksum(m, ip6->ip6_nxt, *offp, @@ -201,7 +207,7 @@ rip6_input( } else #endif /*IPSEC*/ if (n) { - if ((last->in6p_flags & IN6P_CONTROLOPTS) != 0 || + if ((last->in6p_flags & INP_CONTROLOPTS) != 0 || (last->in6p_socket->so_options & SO_TIMESTAMP) != 0 || (last->in6p_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) { ret = ip6_savecontrol(last, n, &opts); @@ -239,7 +245,7 @@ rip6_input( } else #endif /*IPSEC*/ if (last) { - if ((last->in6p_flags & IN6P_CONTROLOPTS) != 0 || + if ((last->in6p_flags & INP_CONTROLOPTS) != 0 || (last->in6p_socket->so_options & SO_TIMESTAMP) != 0 || (last->in6p_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) { ret = ip6_savecontrol(last, m, &opts); @@ -275,7 +281,7 @@ rip6_input( } unlock: - lck_rw_done(ripcbinfo.mtx); + lck_rw_done(ripcbinfo.ipi_lock); return IPPROTO_DONE; } @@ -331,7 +337,7 @@ rip6_ctlinput( */ int rip6_output( - register struct mbuf *m, + struct mbuf *m, struct socket *so, struct sockaddr_in6 *dstsock, struct mbuf *control, @@ -342,22 +348,26 @@ rip6_output( struct inpcb *in6p; u_int plen = m->m_pkthdr.len; int error = 0; - struct ip6_pktopts opt, *optp = 0; + struct ip6_pktopts opt, *optp = NULL; struct ip6_moptions *im6o = NULL; struct ifnet *oifp = NULL; int type = 0, code = 0; /* for ICMPv6 output statistics only */ mbuf_svc_class_t msc = MBUF_SC_UNSPEC; struct ip6_out_args ip6oa = - { IFSCOPE_NONE, { 0 }, IP6OAF_SELECT_SRCIF }; + { IFSCOPE_NONE, { 0 }, IP6OAF_SELECT_SRCIF, 0 }; int flags = IPV6_OUTARGS; - if (dstsock && IN6_IS_ADDR_V4MAPPED(&dstsock->sin6_addr)) { - m_freem(m); - return (EINVAL); - } - in6p = sotoin6pcb(so); + if (in6p == NULL || (in6p->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { + error = (in6p == NULL ? EINVAL : EPROTOTYPE); + goto bad; + } + if (dstsock != NULL && IN6_IS_ADDR_V4MAPPED(&dstsock->sin6_addr)) { + error = EINVAL; + goto bad; + } + if (in6p->inp_flags & INP_BOUND_IF) { ip6oa.ip6oa_boundif = in6p->inp_boundifp->if_index; ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF; @@ -369,7 +379,8 @@ rip6_output( if (control) { msc = mbuf_service_class_from_control(control); - if ((error = ip6_setpktopts(control, &opt, NULL, so->so_proto->pr_protocol)) != 0) + if ((error = ip6_setpktopts(control, &opt, NULL, + SOCK_PROTO(so))) != 0) goto bad; optp = &opt; } else @@ -379,7 +390,7 @@ rip6_output( * For an ICMPv6 packet, we should know its type and code * to update statistics. */ - if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { + if (SOCK_PROTO(so) == IPPROTO_ICMPV6) { struct icmp6_hdr *icmp6; if (m->m_len < sizeof(struct icmp6_hdr) && (m = m_pullup(m, sizeof(struct icmp6_hdr))) == NULL) { @@ -393,6 +404,12 @@ rip6_output( if (in6p->inp_flowhash == 0) in6p->inp_flowhash = inp_calc_flowhash(in6p); + /* update flowinfo - RFC 6437 */ + if (in6p->inp_flow == 0 && in6p->in6p_flags & IN6P_AUTOFLOWLABEL) { + in6p->inp_flow &= ~IPV6_FLOWLABEL_MASK; + in6p->inp_flow |= + (htonl(in6p->inp_flowhash) & IPV6_FLOWLABEL_MASK); + } M_PREPEND(m, sizeof(*ip6), M_WAIT); if (m == NULL) { @@ -495,21 +512,20 @@ rip6_output( } } ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | - (in6p->in6p_flowinfo & IPV6_FLOWINFO_MASK); + (in6p->inp_flow & IPV6_FLOWINFO_MASK); ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | (IPV6_VERSION & IPV6_VERSION_MASK); /* ip6_plen will be filled in ip6_output, so not fill it here. */ ip6->ip6_nxt = in6p->in6p_ip6_nxt; ip6->ip6_hlim = in6_selecthlim(in6p, oifp); - if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 || - in6p->in6p_cksum != -1) { + if (SOCK_PROTO(so) == IPPROTO_ICMPV6 || in6p->in6p_cksum != -1) { struct mbuf *n; int off; u_int16_t *p; /* compute checksum */ - if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) + if (SOCK_PROTO(so) == IPPROTO_ICMPV6) off = offsetof(struct icmp6_hdr, icmp6_cksum); else off = in6p->in6p_cksum; @@ -538,11 +554,8 @@ rip6_output( } #endif /*IPSEC*/ - if (in6p->in6p_route.ro_rt != NULL && - in6p->in6p_route.ro_rt->generation_id != route_generation) { - rtfree(in6p->in6p_route.ro_rt); - in6p->in6p_route.ro_rt = NULL; - } + if (ROUTE_UNUSABLE(&in6p->in6p_route)) + ROUTE_RELEASE(&in6p->in6p_route); if (oifp != NULL) { ifnet_release(oifp); @@ -550,8 +563,11 @@ rip6_output( } set_packet_service_class(m, so, msc, PKT_SCF_IPV6); - m->m_pkthdr.m_flowhash = in6p->inp_flowhash; - m->m_pkthdr.m_fhflags |= PF_TAG_FLOWHASH; + m->m_pkthdr.pkt_flowsrc = FLOWSRC_INPCB; + m->m_pkthdr.pkt_flowid = in6p->inp_flowhash; + m->m_pkthdr.pkt_flags |= (PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC | + PKTF_FLOW_RAWSOCK); + m->m_pkthdr.pkt_proto = in6p->in6p_ip6_nxt; if (im6o != NULL) IM6O_ADDREF(im6o); @@ -575,10 +591,9 @@ rip6_output( * Always discard the cached route for unconnected * socket or if it is a multicast route. */ - if (rt == NULL) { - rtfree(in6p->in6p_route.ro_rt); - in6p->in6p_route.ro_rt = NULL; - } + if (rt == NULL) + ROUTE_RELEASE(&in6p->in6p_route); + /* * If this is a connected socket and the destination * route is not multicast, update outif with that of @@ -587,9 +602,20 @@ rip6_output( if (rt != NULL && (outif = rt->rt_ifp) != in6p->in6p_last_outifp) in6p->in6p_last_outifp = outif; + } else { + ROUTE_RELEASE(&in6p->in6p_route); } - if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { + /* + * If output interface was cellular, and this socket is denied + * access to it, generate an event. + */ + if (error != 0 && (ip6oa.ip6oa_retflags & IP6OARF_IFDENIED) && + (in6p->inp_flags & INP_NO_IFT_CELLULAR)) + soevent(in6p->inp_socket, (SO_FILT_HINT_LOCKED| + SO_FILT_HINT_IFDENIED)); + + if (SOCK_PROTO(so) == IPPROTO_ICMPV6) { if (oifp) icmp6_ifoutstat_inc(oifp, type, code); icmp6stat.icp6s_outhist[type]++; @@ -598,16 +624,15 @@ rip6_output( goto freectl; - bad: - if (m) +bad: + if (m != NULL) m_freem(m); - freectl: - if (optp == &opt && optp->ip6po_rthdr && optp->ip6po_route.ro_rt) { - rtfree(optp->ip6po_route.ro_rt); - optp->ip6po_route.ro_rt = NULL; - } - if (control) { +freectl: + if (optp == &opt && optp->ip6po_rthdr) + ROUTE_RELEASE(&optp->ip6po_route); + + if (control != NULL) { if (optp == &opt) ip6_clearpktopts(optp, -1); m_freem(control); @@ -807,26 +832,38 @@ rip6_disconnect(struct socket *so) } static int -rip6_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p) +rip6_bind(struct socket *so, struct sockaddr *nam, struct proc *p) { +#pragma unused(p) struct inpcb *inp = sotoinpcb(so); - struct sockaddr_in6 *addr = (struct sockaddr_in6 *)(void *)nam; + struct sockaddr_in6 sin6; struct ifaddr *ifa = NULL; struct ifnet *outif = NULL; + int error; - if (nam->sa_len != sizeof(*addr)) - return EINVAL; + if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + return (inp == NULL ? EINVAL : EPROTOTYPE); - if (TAILQ_EMPTY(&ifnet_head) || addr->sin6_family != AF_INET6) - return EADDRNOTAVAIL; -#if ENABLE_DEFAULT_SCOPE - if (addr->sin6_scope_id == 0) { /* not change if specified */ - addr->sin6_scope_id = scope6_addr2default(&addr->sin6_addr); - } -#endif - if (!IN6_IS_ADDR_UNSPECIFIED(&addr->sin6_addr) && - (ifa = ifa_ifwithaddr((struct sockaddr *)addr)) == 0) - return EADDRNOTAVAIL; + if (nam->sa_len != sizeof (struct sockaddr_in6)) + return (EINVAL); + + if (TAILQ_EMPTY(&ifnet_head) || SIN6(nam)->sin6_family != AF_INET6) + return (EADDRNOTAVAIL); + + bzero(&sin6, sizeof (sin6)); + *(&sin6) = *SIN6(nam); + + if ((error = sa6_embedscope(&sin6, ip6_use_defzone)) != 0) + return (error); + + /* Sanitize local copy for address searches */ + sin6.sin6_flowinfo = 0; + sin6.sin6_scope_id = 0; + sin6.sin6_port = 0; + + if (!IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr) && + (ifa = ifa_ifwithaddr(SA(&sin6))) == 0) + return (EADDRNOTAVAIL); if (ifa != NULL) { IFA_LOCK(ifa); if (((struct in6_ifaddr *)ifa)->ia6_flags & @@ -834,15 +871,15 @@ rip6_bind(struct socket *so, struct sockaddr *nam, __unused struct proc *p) IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) { IFA_UNLOCK(ifa); IFA_REMREF(ifa); - return(EADDRNOTAVAIL); + return (EADDRNOTAVAIL); } outif = ifa->ifa_ifp; IFA_UNLOCK(ifa); IFA_REMREF(ifa); } - inp->in6p_laddr = addr->sin6_addr; + inp->in6p_laddr = sin6.sin6_addr; inp->in6p_last_outifp = outif; - return 0; + return (0); } static int @@ -859,6 +896,8 @@ rip6_connect(struct socket *so, struct sockaddr *nam, __unused struct proc *p) unsigned int ifscope; struct ifnet *outif = NULL; + if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + return (inp == NULL ? EINVAL : EPROTOTYPE); if (nam->sa_len != sizeof(*addr)) return EINVAL; if (TAILQ_EMPTY(&ifnet_head)) @@ -900,18 +939,24 @@ rip6_shutdown(struct socket *so) static int rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, - struct mbuf *control, struct proc *p) + struct mbuf *control, struct proc *p) { #pragma unused(flags, p) struct inpcb *inp = sotoinpcb(so); struct sockaddr_in6 tmp; struct sockaddr_in6 *dst = (struct sockaddr_in6 *)(void *)nam; + int error = 0; + + if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { + error = (inp == NULL ? EINVAL : EPROTOTYPE); + goto bad; + } /* always copy sockaddr to avoid overwrites */ if (so->so_state & SS_ISCONNECTED) { - if (nam) { - m_freem(m); - return EISCONN; + if (nam != NULL) { + error = EISCONN; + goto bad; } /* XXX */ bzero(&tmp, sizeof(tmp)); @@ -922,8 +967,8 @@ rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, dst = &tmp; } else { if (nam == NULL) { - m_freem(m); - return ENOTCONN; + error = ENOTCONN; + goto bad; } tmp = *(struct sockaddr_in6 *)(void *)nam; dst = &tmp; @@ -933,24 +978,47 @@ rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, dst->sin6_scope_id = scope6_addr2default(&dst->sin6_addr); } #endif - return rip6_output(m, so, dst, control, 1); + return (rip6_output(m, so, dst, control, 1)); + +bad: + VERIFY(error != 0); + + if (m != NULL) + m_freem(m); + if (control != NULL) + m_freem(control); + + return (error); } struct pr_usrreqs rip6_usrreqs = { - rip6_abort, pru_accept_notsupp, rip6_attach, rip6_bind, rip6_connect, - pru_connect2_notsupp, in6_control, rip6_detach, rip6_disconnect, - pru_listen_notsupp, in6_setpeeraddr, pru_rcvd_notsupp, - pru_rcvoob_notsupp, rip6_send, pru_sense_null, rip6_shutdown, - in6_setsockaddr, sosend, soreceive, pru_sopoll_notsupp + .pru_abort = rip6_abort, + .pru_attach = rip6_attach, + .pru_bind = rip6_bind, + .pru_connect = rip6_connect, + .pru_control = in6_control, + .pru_detach = rip6_detach, + .pru_disconnect = rip6_disconnect, + .pru_peeraddr = in6_getpeeraddr, + .pru_send = rip6_send, + .pru_shutdown = rip6_shutdown, + .pru_sockaddr = in6_getsockaddr, + .pru_sosend = sosend, + .pru_soreceive = soreceive, }; __private_extern__ struct pr_usrreqs icmp6_dgram_usrreqs = { - rip6_abort, pru_accept_notsupp, icmp6_dgram_attach, rip6_bind, rip6_connect, - pru_connect2_notsupp, in6_control, rip6_detach, rip6_disconnect, - pru_listen_notsupp, in6_setpeeraddr, pru_rcvd_notsupp, - pru_rcvoob_notsupp, icmp6_dgram_send, pru_sense_null, rip6_shutdown, - in6_setsockaddr, sosend, soreceive, pru_sopoll_notsupp + .pru_abort = rip6_abort, + .pru_attach = icmp6_dgram_attach, + .pru_bind = rip6_bind, + .pru_connect = rip6_connect, + .pru_control = in6_control, + .pru_detach = rip6_detach, + .pru_disconnect = rip6_disconnect, + .pru_peeraddr = in6_getpeeraddr, + .pru_send = icmp6_dgram_send, + .pru_shutdown = rip6_shutdown, + .pru_sockaddr = in6_getsockaddr, + .pru_sosend = sosend, + .pru_soreceive = soreceive, }; - - - diff --git a/bsd/netinet6/raw_ip6.h b/bsd/netinet6/raw_ip6.h index 608e1366c..390917d01 100644 --- a/bsd/netinet6/raw_ip6.h +++ b/bsd/netinet6/raw_ip6.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Apple Inc. All rights reserved. + * Copyright (c) 2008-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -76,7 +76,7 @@ struct rip6stat { u_quad_t rip6s_opackets; /* total output packets */ }; -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE extern struct rip6stat rip6stat; -#endif /* KERNEL_PRIVATE */ -#endif +#endif /* BSD_KERNEL_PRIVATE */ +#endif /* _NETINET6_RAW_IP6_H_ */ diff --git a/bsd/netinet6/route6.c b/bsd/netinet6/route6.c index 9325aadec..9a8dc3cae 100644 --- a/bsd/netinet6/route6.c +++ b/bsd/netinet6/route6.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,13 +22,10 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* $FreeBSD: src/sys/netinet6/route6.c,v 1.1.2.3 2001/07/03 11:01:55 ume Exp $ */ -/* $KAME: route6.c,v 1.24 2001/03/14 03:07:05 itojun Exp $ */ - /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. @@ -76,11 +73,6 @@ #include -#if IP6_RTHDR0_ALLOWED -static int ip6_rthdr0(struct mbuf *, struct ip6_hdr *, - struct ip6_rthdr0 *); -#endif /* IP6_RTHDR0_ALLOWED */ - int route6_input(struct mbuf **mp, int *offp, int proto) { @@ -89,6 +81,7 @@ route6_input(struct mbuf **mp, int *offp, int proto) struct mbuf *m = *mp; struct ip6_rthdr *rh; int off = *offp, rhlen; +#ifdef notyet struct ip6aux *ip6a; ip6a = ip6_findaux(m); @@ -100,6 +93,7 @@ route6_input(struct mbuf **mp, int *offp, int proto) return IPPROTO_DONE; } } +#endif /* notyet */ #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(*rh), return IPPROTO_DONE); @@ -117,40 +111,11 @@ route6_input(struct mbuf **mp, int *offp, int proto) IP6_EXTHDR_GET(rh, struct ip6_rthdr *, m, off, sizeof(*rh)); if (rh == NULL) { ip6stat.ip6s_tooshort++; - return IPPROTO_DONE; + return (IPPROTO_DONE); } #endif switch (rh->ip6r_type) { -#if IP6_RTHDR0_ALLOWED - case IPV6_RTHDR_TYPE_0: - rhlen = (rh->ip6r_len + 1) << 3; -#ifndef PULLDOWN_TEST - /* - * note on option length: - * due to IP6_EXTHDR_CHECK assumption, we cannot handle - * very big routing header (max rhlen == 2048). - */ - IP6_EXTHDR_CHECK(m, off, rhlen, return IPPROTO_DONE); -#else - /* - * note on option length: - * maximum rhlen: 2048 - * max mbuf m_pulldown can handle: MCLBYTES == usually 2048 - * so, here we are assuming that m_pulldown can handle - * rhlen == 2048 case. this may not be a good thing to - * assume - we may want to avoid pulling it up altogether. - */ - IP6_EXTHDR_GET(rh, struct ip6_rthdr *, m, off, rhlen); - if (rh == NULL) { - ip6stat.ip6s_tooshort++; - return IPPROTO_DONE; - } -#endif - if (ip6_rthdr0(m, ip6, (struct ip6_rthdr0 *)rh)) - return(IPPROTO_DONE); - break; -#endif /* IP6_RTHDR0_ALLOWED */ default: /* unknown routing type */ if (rh->ip6r_segleft == 0) { @@ -159,135 +124,10 @@ route6_input(struct mbuf **mp, int *offp, int proto) } ip6stat.ip6s_badoptions++; icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, - (caddr_t)&rh->ip6r_type - (caddr_t)ip6); - return(IPPROTO_DONE); + (caddr_t)&rh->ip6r_type - (caddr_t)ip6); + return (IPPROTO_DONE); } *offp += rhlen; - return(rh->ip6r_nxt); + return (rh->ip6r_nxt); } - -#if IP6_RTHDR0_ALLOWED -/* - * Type0 routing header processing - * - * RFC2292 backward compatibility warning: no support for strict/loose bitmap, - * as it was dropped between RFC1883 and RFC2460. - */ -static int -ip6_rthdr0(m, ip6, rh0) - struct mbuf *m; - struct ip6_hdr *ip6; - struct ip6_rthdr0 *rh0; -{ - int addrs, index; - struct in6_addr *nextaddr, tmpaddr, ia6 = NULL; - struct route_in6 ip6forward_rt; - - if (rh0->ip6r0_segleft == 0) - return(0); - - if (rh0->ip6r0_len % 2 -#if COMPAT_RFC1883 - || rh0->ip6r0_len > 46 -#endif - ) { - /* - * Type 0 routing header can't contain more than 23 addresses. - * RFC 2462: this limitation was removed since strict/loose - * bitmap field was deleted. - */ - ip6stat.ip6s_badoptions++; - icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, - (caddr_t)&rh0->ip6r0_len - (caddr_t)ip6); - return (-1); - } - - if ((addrs = rh0->ip6r0_len / 2) < rh0->ip6r0_segleft) { - ip6stat.ip6s_badoptions++; - icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, - (caddr_t)&rh0->ip6r0_segleft - (caddr_t)ip6); - return (-1); - } - - index = addrs - rh0->ip6r0_segleft; - rh0->ip6r0_segleft--; - /* note that ip6r0_addr does not exist in RFC2292bis */ - nextaddr = rh0->ip6r0_addr + index; - - /* - * reject invalid addresses. be proactive about malicious use of - * IPv4 mapped/compat address. - * XXX need more checks? - */ - if (IN6_IS_ADDR_MULTICAST(nextaddr) || - IN6_IS_ADDR_UNSPECIFIED(nextaddr) || - IN6_IS_ADDR_V4MAPPED(nextaddr) || - IN6_IS_ADDR_V4COMPAT(nextaddr)) { - ip6stat.ip6s_badoptions++; - m_freem(m); - return (-1); - } - if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || - IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst) || - IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst) || - IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) { - ip6stat.ip6s_badoptions++; - m_freem(m); - return (-1); - } - - /* - * Determine the scope zone of the next hop, based on the interface - * of the current hop. [RFC4007, Section 9] - * Then disambiguate the scope zone for the next hop (if necessary). - */ - if ((ia6 = ip6_getdstifaddr(m)) == NULL) - goto bad; - if (in6_setscope(nextaddr, ia6->ia_ifp, NULL) != 0) { - ip6stat.ip6s_badscope++; - IFA_REMREF(&ia6->ia_ifa); - ia6 = NULL; - goto bad; - } - IFA_REMREF(&ia6->ia_ifa); - ia6 = NULL; - - /* - * Swap the IPv6 destination address and nextaddr. Forward the packet. - */ - tmpaddr = *nextaddr; - *nextaddr = ip6->ip6_dst; - in6_clearscope(nextaddr); /* XXX */ - ip6->ip6_dst = tmpaddr; - if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst)) - ip6->ip6_dst.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index); - - /* - * Don't use the globally cached route to forward packet having - * Type 0 routing header(s); instead, do an explicit lookup using - * a local route entry variable, in case the next address in the - * packet is bogus (which would otherwise unnecessarily invalidate - * the globally cached route). - */ - bzero(&ip6forward_rt, sizeof (ip6forward_rt)); - -#if COMPAT_RFC1883 - if (rh0->ip6r0_slmap[index / 8] & (1 << (7 - (index % 8)))) - ip6_forward(m, &ip6forward_rt, IPV6_SRCRT_NEIGHBOR, 0); - else - ip6_forward(m, &ip6forward_rt, IPV6_SRCRT_NOTNEIGHBOR, 0); -#else - ip6_forward(m, &ip6forward_rt, 1, 0); -#endif - - /* Release reference to the looked up route */ - if (ip6forward_rt.ro_rt != NULL) { - rtfree(ip6forward_rt.ro_rt); - ip6forward_rt.ro_rt = NULL; - } - - return(-1); /* m would be freed in ip6_forward() */ -} -#endif /* IP6_RTHDR0_ALLOWED */ - diff --git a/bsd/netinet6/scope6.c b/bsd/netinet6/scope6.c index 27ad76492..8b85f414b 100644 --- a/bsd/netinet6/scope6.c +++ b/bsd/netinet6/scope6.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2009-2011 Apple Inc. All rights reserved. + * Copyright (c) 2009-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,17 +22,14 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* $FreeBSD: src/sys/netinet6/scope6.c,v 1.3 2002/03/25 10:12:51 ume Exp $ */ -/* $KAME: scope6.c,v 1.10 2000/07/24 13:29:31 itojun Exp $ */ - /* * Copyright (C) 2000 WIDE Project. * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -44,7 +41,7 @@ * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. - * + * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -75,149 +72,52 @@ #include #include -extern lck_mtx_t *scope6_mutex; - #ifdef ENABLE_DEFAULT_SCOPE int ip6_use_defzone = 1; #else int ip6_use_defzone = 0; #endif -static size_t if_scope_indexlim = 8; -struct scope6_id *scope6_ids = NULL; +decl_lck_mtx_data(static, scope6_lock); +static struct scope6_id sid_default; -int -scope6_ifattach( - struct ifnet *ifp) -{ - /* - * We have some arrays that should be indexed by if_index. - * since if_index will grow dynamically, they should grow too. - */ - lck_mtx_lock(scope6_mutex); - if (scope6_ids == NULL || if_index >= if_scope_indexlim) { - size_t n; - caddr_t q; - int newlim = if_scope_indexlim; - - while (if_index >= newlim) - newlim <<= 1; - - /* grow scope index array */ - n = newlim * sizeof(struct scope6_id); - /* XXX: need new malloc type? */ - q = (caddr_t)_MALLOC(n, M_IFADDR, M_WAITOK); - if (q == NULL) { - lck_mtx_unlock(scope6_mutex); - return ENOBUFS; - } - if_scope_indexlim = newlim; - bzero(q, n); - if (scope6_ids) { - bcopy((caddr_t)scope6_ids, q, n/2); - FREE((caddr_t)scope6_ids, M_IFADDR); - } - scope6_ids = (struct scope6_id *)(void *)q; - } +#define SID(ifp) &IN6_IFEXTRA(ifp)->scope6_id -#define SID scope6_ids[ifp->if_index] +void +scope6_init(lck_grp_t *grp, lck_attr_t *attr) +{ + bzero(&sid_default, sizeof(sid_default)); + lck_mtx_init(&scope6_lock, grp, attr); +} - /* don't initialize if called twice */ - if (SID.s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL]) { - lck_mtx_unlock(scope6_mutex); - return 0; - } +void +scope6_ifattach(struct ifnet *ifp) +{ + struct scope6_id *sid; + VERIFY(IN6_IFEXTRA(ifp) != NULL); + if_inet6data_lock_exclusive(ifp); + sid = SID(ifp); + /* N.B.: the structure is already zero'ed */ /* * XXX: IPV6_ADDR_SCOPE_xxx macros are not standard. * Should we rather hardcode here? */ - SID.s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL] = ifp->if_index; - SID.s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL] = ifp->if_index; + sid->s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL] = ifp->if_index; + sid->s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL] = ifp->if_index; #if MULTI_SCOPE /* by default, we don't care about scope boundary for these scopes. */ - SID.s6id_list[IPV6_ADDR_SCOPE_SITELOCAL] = 1; - SID.s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL] = 1; + sid->s6id_list[IPV6_ADDR_SCOPE_SITELOCAL] = 1; + sid->s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL] = 1; #endif -#undef SID - lck_mtx_unlock(scope6_mutex); - - return 0; -} - -int -scope6_set( - struct ifnet *ifp, - u_int32_t *idlist) -{ - int i; - int error = 0; - - if (scope6_ids == NULL) /* paranoid? */ - return(EINVAL); - - /* - * XXX: We need more consistency checks of the relationship among - * scopes (e.g. an organization should be larger than a site). - */ - - /* - * TODO(XXX): after setting, we should reflect the changes to - * interface addresses, routing table entries, PCB entries... - */ - - lck_mtx_lock(scope6_mutex); - for (i = 0; i < 16; i++) { - if (idlist[i] && - idlist[i] != scope6_ids[ifp->if_index].s6id_list[i]) { - if (i == IPV6_ADDR_SCOPE_INTFACELOCAL && - idlist[i] > if_index) { - /* - * XXX: theoretically, there should be no - * relationship between link IDs and interface - * IDs, but we check the consistency for - * safety in later use. - */ - lck_mtx_unlock(scope6_mutex); - return(EINVAL); - } - - /* - * XXX: we must need lots of work in this case, - * but we simply set the new value in this initial - * implementation. - */ - scope6_ids[ifp->if_index].s6id_list[i] = idlist[i]; - } - } - lck_mtx_unlock(scope6_mutex); - - return(error); + if_inet6data_lock_done(ifp); } -int -scope6_get( - struct ifnet *ifp, - u_int32_t *idlist) -{ - if (scope6_ids == NULL) /* paranoid? */ - return(EINVAL); - - lck_mtx_lock(scope6_mutex); - bcopy(scope6_ids[ifp->if_index].s6id_list, idlist, - sizeof(scope6_ids[ifp->if_index].s6id_list)); - lck_mtx_unlock(scope6_mutex); - - return(0); -} - - /* * Get a scope of the address. Node-local, link-local, site-local or global. */ int -in6_addrscope(addr) -struct in6_addr *addr; +in6_addrscope(struct in6_addr *addr) { int scope; @@ -226,18 +126,14 @@ struct in6_addr *addr; switch (scope) { case 0x80: - return IPV6_ADDR_SCOPE_LINKLOCAL; - break; + return (IPV6_ADDR_SCOPE_LINKLOCAL); case 0xc0: - return IPV6_ADDR_SCOPE_SITELOCAL; - break; + return (IPV6_ADDR_SCOPE_SITELOCAL); default: - return IPV6_ADDR_SCOPE_GLOBAL; /* just in case */ - break; + return (IPV6_ADDR_SCOPE_GLOBAL); /* just in case */ } } - if (addr->s6_addr8[0] == 0xff) { scope = addr->s6_addr8[1] & 0x0f; @@ -247,17 +143,13 @@ struct in6_addr *addr; */ switch (scope) { case IPV6_ADDR_SCOPE_INTFACELOCAL: - return IPV6_ADDR_SCOPE_INTFACELOCAL; - break; + return (IPV6_ADDR_SCOPE_INTFACELOCAL); case IPV6_ADDR_SCOPE_LINKLOCAL: - return IPV6_ADDR_SCOPE_LINKLOCAL; - break; + return (IPV6_ADDR_SCOPE_LINKLOCAL); case IPV6_ADDR_SCOPE_SITELOCAL: - return IPV6_ADDR_SCOPE_SITELOCAL; - break; + return (IPV6_ADDR_SCOPE_SITELOCAL); default: - return IPV6_ADDR_SCOPE_GLOBAL; - break; + return (IPV6_ADDR_SCOPE_GLOBAL); } } @@ -265,55 +157,47 @@ struct in6_addr *addr; * Regard loopback and unspecified addresses as global, since * they have no ambiguity. */ - if (bcmp(&in6addr_loopback, addr, sizeof(*addr) - 1) == 0) { + if (bcmp(&in6addr_loopback, addr, sizeof (*addr) - 1) == 0) { if (addr->s6_addr8[15] == 1) /* loopback */ - return IPV6_ADDR_SCOPE_LINKLOCAL; + return (IPV6_ADDR_SCOPE_LINKLOCAL); if (addr->s6_addr8[15] == 0) /* unspecified */ - return IPV6_ADDR_SCOPE_GLOBAL; /* XXX: correct? */ + return (IPV6_ADDR_SCOPE_GLOBAL); /* XXX: correct? */ } - return IPV6_ADDR_SCOPE_GLOBAL; + return (IPV6_ADDR_SCOPE_GLOBAL); } int -in6_addr2scopeid( - struct ifnet *ifp, /* must not be NULL */ - struct in6_addr *addr) /* must not be NULL */ +in6_addr2scopeid(struct ifnet *ifp, struct in6_addr *addr) { int scope = in6_addrscope(addr); - int index = ifp->if_index; int retid = 0; + struct scope6_id *sid; - if (scope6_ids == NULL) /* paranoid? */ - return(0); /* XXX */ - - lck_mtx_lock(scope6_mutex); - if (index >= if_scope_indexlim) { - lck_mtx_unlock(scope6_mutex); - return(0); /* XXX */ - } - -#define SID scope6_ids[index] - switch(scope) { + if_inet6data_lock_shared(ifp); + if (IN6_IFEXTRA(ifp) == NULL) + goto err; + sid = SID(ifp); + switch (scope) { case IPV6_ADDR_SCOPE_NODELOCAL: retid = -1; /* XXX: is this an appropriate value? */ break; case IPV6_ADDR_SCOPE_LINKLOCAL: - retid=SID.s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL]; + retid = sid->s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL]; break; case IPV6_ADDR_SCOPE_SITELOCAL: - retid=SID.s6id_list[IPV6_ADDR_SCOPE_SITELOCAL]; + retid = sid->s6id_list[IPV6_ADDR_SCOPE_SITELOCAL]; break; case IPV6_ADDR_SCOPE_ORGLOCAL: - retid=SID.s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL]; + retid = sid->s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL]; break; default: break; /* XXX: value 0, treat as global. */ } -#undef SID +err: + if_inet6data_lock_done(ifp); - lck_mtx_unlock(scope6_mutex); - return retid; + return (retid); } /* @@ -346,7 +230,7 @@ sa6_embedscope(struct sockaddr_in6 *sin6, int defaultok) return (ENXIO); ifnet_head_lock_shared(); ifp = ifindex2ifnet[zoneid]; - if (ifp == NULL) {/* XXX: this can happen for some OS */ + if (ifp == NULL) { /* XXX: this can happen for some OS */ ifnet_head_done(); return (ENXIO); } @@ -357,7 +241,7 @@ sa6_embedscope(struct sockaddr_in6 *sin6, int defaultok) sin6->sin6_scope_id = 0; } - return 0; + return (0); } void @@ -426,12 +310,11 @@ sa6_recoverscope(struct sockaddr_in6 *sin6, boolean_t attachcheck) } } - return 0; + return (0); } void -scope6_setdefault( - struct ifnet *ifp) /* note that this might be NULL */ +scope6_setdefault(struct ifnet *ifp) { /* * Currently, this function just set the default "link" according to @@ -439,43 +322,37 @@ scope6_setdefault( * We might eventually have to separate the notion of "link" from * "interface" and provide a user interface to set the default. */ - lck_mtx_lock(scope6_mutex); - if (ifp) { - scope6_ids[0].s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL] = - ifp->if_index; - scope6_ids[0].s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL] = - ifp->if_index; + lck_mtx_lock(&scope6_lock); + if (ifp != NULL) { + sid_default.s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL] = + ifp->if_index; + sid_default.s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL] = + ifp->if_index; } else { - scope6_ids[0].s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL] = 0; - scope6_ids[0].s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL] = 0; + sid_default.s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL] = 0; + sid_default.s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL] = 0; } - lck_mtx_unlock(scope6_mutex); + lck_mtx_unlock(&scope6_lock); } -int -scope6_get_default( - u_int32_t *idlist) -{ - if (scope6_ids == NULL) /* paranoid? */ - return(EINVAL); - - lck_mtx_lock(scope6_mutex); - bcopy(scope6_ids[0].s6id_list, idlist, - sizeof(scope6_ids[0].s6id_list)); - lck_mtx_unlock(scope6_mutex); - - return(0); -} u_int32_t -scope6_addr2default( - struct in6_addr *addr) +scope6_addr2default(struct in6_addr *addr) { u_int32_t id = 0; int index = in6_addrscope(addr); - lck_mtx_lock(scope6_mutex); - id = scope6_ids[0].s6id_list[index]; - lck_mtx_unlock(scope6_mutex); + + /* + * special case: The loopback address should be considered as + * link-local, but there's no ambiguity in the syntax. + */ + if (IN6_IS_ADDR_LOOPBACK(addr)) + return (0); + + lck_mtx_lock(&scope6_lock); + id = sid_default.s6id_list[index]; + lck_mtx_unlock(&scope6_lock); + return (id); } @@ -491,14 +368,7 @@ in6_setscope(struct in6_addr *in6, struct ifnet *ifp, u_int32_t *ret_id) { int scope; u_int32_t zoneid = 0; - int index = ifp->if_index; - -#ifdef DIAGNOSTIC - if (scope6_ids == NULL) { /* should not happen */ - panic("in6_setscope: scope array is NULL"); - /* NOTREACHED */ - } -#endif + struct scope6_id *sid; /* * special case: the loopback address can only belong to a loopback @@ -516,36 +386,35 @@ in6_setscope(struct in6_addr *in6, struct ifnet *ifp, u_int32_t *ret_id) scope = in6_addrscope(in6); - lck_mtx_lock(scope6_mutex); - if (index >= if_scope_indexlim) { - lck_mtx_unlock(scope6_mutex); - if (ret_id != NULL) + if_inet6data_lock_shared(ifp); + if (IN6_IFEXTRA(ifp) == NULL) { + if_inet6data_lock_done(ifp); + if (ret_id) *ret_id = 0; return (EINVAL); } -#define SID scope6_ids[index] + sid = SID(ifp); switch (scope) { case IPV6_ADDR_SCOPE_INTFACELOCAL: /* should be interface index */ - zoneid = SID.s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL]; + zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL]; break; case IPV6_ADDR_SCOPE_LINKLOCAL: - zoneid = SID.s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL]; + zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL]; break; case IPV6_ADDR_SCOPE_SITELOCAL: - zoneid = SID.s6id_list[IPV6_ADDR_SCOPE_SITELOCAL]; + zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_SITELOCAL]; break; case IPV6_ADDR_SCOPE_ORGLOCAL: - zoneid = SID.s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL]; + zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL]; break; -#undef SID default: zoneid = 0; /* XXX: treat as global. */ break; } - lck_mtx_unlock(scope6_mutex); + if_inet6data_lock_done(ifp); if (ret_id != NULL) *ret_id = zoneid; @@ -573,4 +442,3 @@ in6_clearscope(struct in6_addr *in6) return (modified); } - diff --git a/bsd/netinet6/scope6_var.h b/bsd/netinet6/scope6_var.h index 300e8228c..22270e584 100644 --- a/bsd/netinet6/scope6_var.h +++ b/bsd/netinet6/scope6_var.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2009-2010 Apple Inc. All rights reserved. + * Copyright (c) 2009-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,17 +22,14 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* $FreeBSD: src/sys/netinet6/scope6_var.h,v 1.1.2.1 2000/07/15 07:14:38 kris Exp $ */ -/* $KAME: scope6_var.h,v 1.4 2000/05/18 15:03:27 jinmei Exp $ */ - /* * Copyright (C) 2000 WIDE Project. * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -44,7 +41,7 @@ * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. - * + * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -62,32 +59,27 @@ #define _NETINET6_SCOPE6_VAR_H_ #include -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE +/* + * 16 is correspondent to 4bit multicast scope field. + * i.e. from node-local to global with some reserved/unassigned types. + */ +#define SCOPE6_ID_MAX 16 struct scope6_id { - /* - * 16 is correspondent to 4bit multicast scope field. - * i.e. from node-local to global with some reserved/unassigned types. - */ - u_int32_t s6id_list[16]; + u_int32_t s6id_list[SCOPE6_ID_MAX]; }; -void scope6_init (void); -int scope6_ifattach(struct ifnet *); -void scope6_ifdetach (struct scope6_id *); -int scope6_set(struct ifnet *, u_int32_t *); -int scope6_get(struct ifnet *, u_int32_t *); -void scope6_setdefault(struct ifnet *); -int scope6_get_default(u_int32_t *); -u_int32_t scope6_in6_addrscope(struct in6_addr *); -u_int32_t scope6_addr2default(struct in6_addr *); -int sa6_embedscope (struct sockaddr_in6 *, int); -int sa6_recoverscope (struct sockaddr_in6 *, boolean_t); -int in6_setscope (struct in6_addr *, struct ifnet *, u_int32_t *); -int in6_clearscope (struct in6_addr *); +extern void scope6_init(lck_grp_t *, lck_attr_t *); +extern void scope6_ifattach(struct ifnet *); +extern void scope6_setdefault(struct ifnet *); +extern u_int32_t scope6_in6_addrscope(struct in6_addr *); +extern u_int32_t scope6_addr2default(struct in6_addr *); +extern int sa6_embedscope (struct sockaddr_in6 *, int); +extern int sa6_recoverscope (struct sockaddr_in6 *, boolean_t); +extern int in6_setscope (struct in6_addr *, struct ifnet *, u_int32_t *); +extern int in6_clearscope (struct in6_addr *); extern void rtkey_to_sa6(struct rtentry *, struct sockaddr_in6 *); extern void rtgw_to_sa6(struct rtentry *, struct sockaddr_in6 *); - -#endif /* KERNEL_PRIVATE */ - +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET6_SCOPE6_VAR_H_ */ diff --git a/bsd/netinet6/tcp6_var.h b/bsd/netinet6/tcp6_var.h index 5fded19e6..8190f5269 100644 --- a/bsd/netinet6/tcp6_var.h +++ b/bsd/netinet6/tcp6_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 Apple Inc. All rights reserved. + * Copyright (c) 2010-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -97,7 +97,7 @@ #define _NETINET_TCP6_VAR_H_ #include -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #ifdef SYSCTL_DECL SYSCTL_DECL(_net_inet6_tcp6); #endif @@ -112,6 +112,5 @@ struct rtentry *tcp_rtlookup6(struct inpcb *, unsigned int); extern struct pr_usrreqs tcp6_usrreqs; -#endif /* KERNEL_PRIVATE */ - +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET_TCP6_VAR_H_ */ diff --git a/bsd/netinet6/udp6_output.c b/bsd/netinet6/udp6_output.c index fa609ea19..71d7cdffd 100644 --- a/bsd/netinet6/udp6_output.c +++ b/bsd/netinet6/udp6_output.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -129,13 +129,11 @@ #include #include -#ifdef IPSEC +#if IPSEC #include -#ifdef INET6 #include -#endif extern int ipsec_bypass; -#endif /*IPSEC*/ +#endif /* IPSEC */ #include @@ -144,35 +142,12 @@ extern int ipsec_bypass; * Per RFC 768, August, 1980. */ -#define in6pcb inpcb -#define udp6stat udpstat -#define udp6s_opackets udps_opackets - -static __inline__ u_int16_t -get_socket_id(struct socket * s) -{ - u_int16_t val; - - if (s == NULL) { - return (0); - } - val = (u_int16_t)(((uintptr_t)s) / sizeof(struct socket)); - if (val == 0) { - val = 0xffff; - } - return (val); -} - int -udp6_output(in6p, m, addr6, control, p) - struct in6pcb *in6p; - struct mbuf *m; - struct mbuf *control; - struct sockaddr *addr6; - struct proc *p; +udp6_output(struct in6pcb *in6p, struct mbuf *m, struct sockaddr *addr6, + struct mbuf *control, struct proc *p) { u_int32_t ulen = m->m_pkthdr.len; - u_int32_t plen = sizeof(struct udphdr) + ulen; + u_int32_t plen = sizeof (struct udphdr) + ulen; struct ip6_hdr *ip6; struct udphdr *udp6; struct in6_addr *laddr, *faddr; @@ -180,18 +155,20 @@ udp6_output(in6p, m, addr6, control, p) int error = 0; struct ip6_pktopts opt, *optp = NULL; struct ip6_moptions *im6o; - int af = AF_INET6, hlen = sizeof(struct ip6_hdr); + int af = AF_INET6, hlen = sizeof (struct ip6_hdr); int flags; struct sockaddr_in6 tmp; struct in6_addr storage; mbuf_svc_class_t msc = MBUF_SC_UNSPEC; struct ip6_out_args ip6oa = - { IFSCOPE_NONE, { 0 }, IP6OAF_SELECT_SRCIF }; + { IFSCOPE_NONE, { 0 }, IP6OAF_SELECT_SRCIF, 0 }; struct flowadv *adv = &ip6oa.ip6oa_flowadv; + struct socket *so = in6p->in6p_socket; + struct route_in6 ro; int flowadv = 0; /* Enable flow advisory only when connected */ - flowadv = (in6p->inp_socket->so_state & SS_ISCONNECTED) ? 1 : 0; + flowadv = (so->so_state & SS_ISCONNECTED) ? 1 : 0; if (flowadv && INP_WAIT_FOR_IF_FEEDBACK(in6p)) { error = ENOBUFS; @@ -207,8 +184,8 @@ udp6_output(in6p, m, addr6, control, p) if (control) { msc = mbuf_service_class_from_control(control); - - if ((error = ip6_setpktopts(control, &opt, NULL, IPPROTO_UDP)) != 0) + if ((error = ip6_setpktopts(control, &opt, + NULL, IPPROTO_UDP)) != 0) goto release; optp = &opt; } else @@ -217,7 +194,8 @@ udp6_output(in6p, m, addr6, control, p) if (addr6) { /* * IPv4 version of udp_output calls in_pcbconnect in this case, - * which needs splnet and affects performance. + * which has its costs. + * * Since we saw no essential reason for calling in_pcbconnect, * we get rid of such kind of logic, and call in6_selectsrc * and in6_pcbsetport in order to fill in the local address @@ -258,8 +236,9 @@ udp6_output(in6p, m, addr6, control, p) */ error = EINVAL; goto release; - } else + } else { af = AF_INET; + } } /* KAME hack: embed scopeid */ @@ -311,15 +290,21 @@ udp6_output(in6p, m, addr6, control, p) if (in6p->inp_flowhash == 0) in6p->inp_flowhash = inp_calc_flowhash(in6p); + /* update flowinfo - RFC 6437 */ + if (in6p->inp_flow == 0 && in6p->in6p_flags & IN6P_AUTOFLOWLABEL) { + in6p->inp_flow &= ~IPV6_FLOWLABEL_MASK; + in6p->inp_flow |= + (htonl(in6p->inp_flowhash) & IPV6_FLOWLABEL_MASK); + } if (af == AF_INET) - hlen = sizeof(struct ip); + hlen = sizeof (struct ip); /* * Calculate data length and get a mbuf * for UDP and IP6 headers. */ - M_PREPEND(m, hlen + sizeof(struct udphdr), M_DONTWAIT); + M_PREPEND(m, hlen + sizeof (struct udphdr), M_DONTWAIT); if (m == 0) { error = ENOBUFS; goto release; @@ -340,21 +325,20 @@ udp6_output(in6p, m, addr6, control, p) switch (af) { case AF_INET6: ip6 = mtod(m, struct ip6_hdr *); - ip6->ip6_flow = in6p->in6p_flowinfo & IPV6_FLOWINFO_MASK; - ip6->ip6_vfc &= ~IPV6_VERSION_MASK; - ip6->ip6_vfc |= IPV6_VERSION; -#if 0 /* ip6_plen will be filled in ip6_output. */ + ip6->ip6_flow = in6p->inp_flow & IPV6_FLOWINFO_MASK; + ip6->ip6_vfc &= ~IPV6_VERSION_MASK; + ip6->ip6_vfc |= IPV6_VERSION; +#if 0 /* ip6_plen will be filled in ip6_output. */ ip6->ip6_plen = htons((u_short)plen); #endif ip6->ip6_nxt = IPPROTO_UDP; - ip6->ip6_hlim = in6_selecthlim(in6p, - in6p->in6p_route.ro_rt ? - in6p->in6p_route.ro_rt->rt_ifp : NULL); + ip6->ip6_hlim = in6_selecthlim(in6p, in6p->in6p_route.ro_rt ? + in6p->in6p_route.ro_rt->rt_ifp : NULL); ip6->ip6_src = *laddr; ip6->ip6_dst = *faddr; - udp6->uh_sum = in6_cksum_phdr(laddr, faddr, - htonl(plen), htonl(IPPROTO_UDP)); + udp6->uh_sum = in6_pseudo(laddr, faddr, + htonl(plen + IPPROTO_UDP)); m->m_pkthdr.csum_flags = CSUM_UDPIPV6; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); @@ -364,20 +348,29 @@ udp6_output(in6p, m, addr6, control, p) flags = IPV6_OUTARGS; udp6stat.udp6s_opackets++; -#ifdef IPSEC - if (ipsec_bypass == 0 && ipsec_setsocket(m, in6p->in6p_socket) != 0) { +#if IPSEC + if (ipsec_bypass == 0 && ipsec_setsocket(m, so) != 0) { error = ENOBUFS; goto release; } -#endif /*IPSEC*/ - m->m_pkthdr.socket_id = get_socket_id(in6p->in6p_socket); +#endif /* IPSEC */ + + /* In case of IPv4-mapped address used in previous send */ + if (ROUTE_UNUSABLE(&in6p->in6p_route) || + rt_key(in6p->in6p_route.ro_rt)->sa_family != AF_INET6) + ROUTE_RELEASE(&in6p->in6p_route); - set_packet_service_class(m, in6p->in6p_socket, msc, PKT_SCF_IPV6); + /* Copy the cached route and take an extra reference */ + in6p_route_copyout(in6p, &ro); - m->m_pkthdr.m_flowhash = in6p->inp_flowhash; - m->m_pkthdr.m_fhflags |= PF_TAG_FLOWHASH; + set_packet_service_class(m, so, msc, PKT_SCF_IPV6); + + m->m_pkthdr.pkt_flowsrc = FLOWSRC_INPCB; + m->m_pkthdr.pkt_flowid = in6p->inp_flowhash; + m->m_pkthdr.pkt_proto = IPPROTO_UDP; + m->m_pkthdr.pkt_flags |= (PKTF_FLOW_ID | PKTF_FLOW_LOCALSRC); if (flowadv) - m->m_pkthdr.m_fhflags |= PF_TAG_FLOWADV; + m->m_pkthdr.pkt_flags |= PKTF_FLOW_ADV; im6o = in6p->in6p_moptions; if (im6o != NULL) { @@ -385,28 +378,42 @@ udp6_output(in6p, m, addr6, control, p) IM6O_ADDREF_LOCKED(im6o); if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && im6o->im6o_multicast_ifp != NULL) { - in6p->in6p_last_outifp = im6o->im6o_multicast_ifp; + in6p->in6p_last_outifp = + im6o->im6o_multicast_ifp; } IM6O_UNLOCK(im6o); } in6p->inp_sndinprog_cnt++; - error = ip6_output(m, optp, &in6p->in6p_route, - flags, im6o, NULL, &ip6oa); + socket_unlock(so, 0); + error = ip6_output(m, optp, &ro, flags, im6o, NULL, &ip6oa); + m = NULL; + socket_lock(so, 0); if (im6o != NULL) IM6O_REMREF(im6o); if (error == 0 && nstat_collect) { - locked_add_64(&in6p->inp_stat->txpackets, 1); - locked_add_64(&in6p->inp_stat->txbytes, ulen); + boolean_t cell, wifi; + + if (in6p->in6p_route.ro_rt != NULL) { + cell = IFNET_IS_CELLULAR(in6p->in6p_route. + ro_rt->rt_ifp); + wifi = (!cell && IFNET_IS_WIFI(in6p->in6p_route. + ro_rt->rt_ifp)); + } else { + cell = wifi = FALSE; + } + INP_ADD_STAT(in6p, cell, wifi, txpackets, 1); + INP_ADD_STAT(in6p, cell, wifi, txbytes, ulen); } if (flowadv && (adv->code == FADV_FLOW_CONTROLLED || adv->code == FADV_SUSPENDED)) { - /* return an error to indicate - * that the packet has been dropped + /* + * Return an error to indicate + * that the packet has been dropped. */ error = ENOBUFS; inp_set_fc_state(in6p, adv->code); @@ -416,32 +423,42 @@ udp6_output(in6p, m, addr6, control, p) if ( --in6p->inp_sndinprog_cnt == 0) in6p->inp_flags &= ~(INP_FC_FEEDBACK); + /* Synchronize PCB cached route */ + in6p_route_copyin(in6p, &ro); + if (in6p->in6p_route.ro_rt != NULL) { struct rtentry *rt = in6p->in6p_route.ro_rt; struct ifnet *outif; - if ((rt->rt_flags & RTF_MULTICAST) || - in6p->in6p_socket == NULL || - !(in6p->in6p_socket->so_state & SS_ISCONNECTED)) { + if (rt->rt_flags & RTF_MULTICAST) rt = NULL; /* unusable */ - } + /* * Always discard the cached route for unconnected * socket or if it is a multicast route. */ - if (rt == NULL) { - rtfree(in6p->in6p_route.ro_rt); - in6p->in6p_route.ro_rt = NULL; - } + if (rt == NULL) + ROUTE_RELEASE(&in6p->in6p_route); + /* - * If this is a connected socket and the destination - * route is not multicast, update outif with that of - * the route interface used by IP. + * If the destination route is unicast, update outif + * with that of the route interface used by IP. */ if (rt != NULL && (outif = rt->rt_ifp) != in6p->in6p_last_outifp) in6p->in6p_last_outifp = outif; + } else { + ROUTE_RELEASE(&in6p->in6p_route); } + + /* + * If output interface was cellular, and this socket is + * denied access to it, generate an event. + */ + if (error != 0 && (ip6oa.ip6oa_retflags & IP6OARF_IFDENIED) && + (in6p->inp_flags & INP_NO_IFT_CELLULAR)) + soevent(in6p->inp_socket, (SO_FILT_HINT_LOCKED| + SO_FILT_HINT_IFDENIED)); break; case AF_INET: error = EAFNOSUPPORT; @@ -450,13 +467,14 @@ udp6_output(in6p, m, addr6, control, p) goto releaseopt; release: - m_freem(m); + if (m != NULL) + m_freem(m); releaseopt: - if (control) { + if (control != NULL) { if (optp == &opt) ip6_clearpktopts(optp, -1); m_freem(control); } - return(error); + return (error); } diff --git a/bsd/netinet6/udp6_usrreq.c b/bsd/netinet6/udp6_usrreq.c index 5e9ac2da0..df5a5ff73 100644 --- a/bsd/netinet6/udp6_usrreq.c +++ b/bsd/netinet6/udp6_usrreq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -112,6 +112,7 @@ #include #include #include +#include #include #include @@ -132,48 +133,85 @@ #include #include extern int ipsec_bypass; -#endif /*IPSEC*/ +#endif /* IPSEC */ /* * UDP protocol inplementation. * Per RFC 768, August, 1980. */ -extern struct protosw inetsw[]; -static int udp6_detach(struct socket *so); +static int udp6_abort(struct socket *); +static int udp6_attach(struct socket *, int, struct proc *); +static int udp6_bind(struct socket *, struct sockaddr *, struct proc *); +static int udp6_connectx(struct socket *, struct sockaddr_list **, + struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *, + uint32_t, void *, uint32_t); +static int udp6_detach(struct socket *); +static int udp6_disconnect(struct socket *); +static int udp6_disconnectx(struct socket *, associd_t, connid_t); +static int udp6_send(struct socket *, int, struct mbuf *, struct sockaddr *, + struct mbuf *, struct proc *); static void udp6_append(struct inpcb *, struct ip6_hdr *, - struct sockaddr_in6 *, struct mbuf *, int); - -extern void ipfwsyslog( int level, const char *format,...); -extern int fw_verbose; + struct sockaddr_in6 *, struct mbuf *, int, struct ifnet *); +static int udp6_input_checksum(struct mbuf *, struct udphdr *, int, int); #if IPFIREWALL -#define log_in_vain_log( a ) { \ - if ( (log_in_vain == 3 ) && (fw_verbose == 2)) { /* Apple logging, log to ipfw.log */ \ - ipfwsyslog a ; \ - } \ - else log a ; \ +extern int fw_verbose; +extern void ipfwsyslog( int level, const char *format,...); +extern void ipfw_stealth_stats_incr_udpv6(void); + +/* Apple logging, log to ipfw.log */ +#define log_in_vain_log(a) { \ + if ((udp_log_in_vain == 3) && (fw_verbose == 2)) { \ + ipfwsyslog a; \ + } else if ((udp_log_in_vain == 4) && (fw_verbose == 2)) { \ + ipfw_stealth_stats_incr_udpv6(); \ + } else { \ + log a; \ + } \ } -#else +#else /* !IPFIREWALL */ #define log_in_vain_log( a ) { log a; } -#endif +#endif /* !IPFIREWALL */ + +struct pr_usrreqs udp6_usrreqs = { + .pru_abort = udp6_abort, + .pru_attach = udp6_attach, + .pru_bind = udp6_bind, + .pru_connect = udp6_connect, + .pru_connectx = udp6_connectx, + .pru_control = in6_control, + .pru_detach = udp6_detach, + .pru_disconnect = udp6_disconnect, + .pru_disconnectx = udp6_disconnectx, + .pru_peeraddr = in6_mapped_peeraddr, + .pru_send = udp6_send, + .pru_shutdown = udp_shutdown, + .pru_sockaddr = in6_mapped_sockaddr, + .pru_sosend = sosend, + .pru_soreceive = soreceive, +}; /* * subroutine of udp6_input(), mainly for source code readability. */ static void -udp6_append(struct inpcb *last, __unused struct ip6_hdr *ip6, - struct sockaddr_in6 *udp_in6, struct mbuf *n, int off) +udp6_append(struct inpcb *last, struct ip6_hdr *ip6, + struct sockaddr_in6 *udp_in6, struct mbuf *n, int off, struct ifnet *ifp) { +#pragma unused(ip6) struct mbuf *opts = NULL; int ret = 0; + boolean_t cell = IFNET_IS_CELLULAR(ifp); + boolean_t wifi = (!cell && IFNET_IS_WIFI(ifp)); + #if CONFIG_MACF_NET if (mac_inpcb_check_deliver(last, n, AF_INET6, SOCK_DGRAM) != 0) { m_freem(n); return; } -#endif - if ((last->in6p_flags & IN6P_CONTROLOPTS) != 0 || +#endif /* CONFIG_MACF_NET */ + if ((last->in6p_flags & INP_CONTROLOPTS) != 0 || (last->in6p_socket->so_options & SO_TIMESTAMP) != 0 || (last->in6p_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) { ret = ip6_savecontrol(last, n, &opts); @@ -185,8 +223,8 @@ udp6_append(struct inpcb *last, __unused struct ip6_hdr *ip6, } m_adj(n, off); if (nstat_collect) { - locked_add_64(&last->inp_stat->rxpackets, 1); - locked_add_64(&last->inp_stat->rxbytes, n->m_pkthdr.len); + INP_ADD_STAT(last, cell, wifi, rxpackets, 1); + INP_ADD_STAT(last, cell, wifi, rxbytes, n->m_pkthdr.len); } so_recv_data_stat(last->in6p_socket, n, 0); if (sbappendaddr(&last->in6p_socket->so_rcv, @@ -197,79 +235,55 @@ udp6_append(struct inpcb *last, __unused struct ip6_hdr *ip6, } int -udp6_input( - struct mbuf **mp, - int *offp, - int proto) +udp6_input(struct mbuf **mp, int *offp, int proto) { #pragma unused(proto) struct mbuf *m = *mp; struct ifnet *ifp; - register struct ip6_hdr *ip6; - register struct udphdr *uh; - register struct inpcb *in6p; + struct ip6_hdr *ip6; + struct udphdr *uh; + struct inpcb *in6p; struct mbuf *opts = NULL; int off = *offp; int plen, ulen, ret = 0; + boolean_t cell, wifi; struct sockaddr_in6 udp_in6; struct inpcbinfo *pcbinfo = &udbinfo; struct sockaddr_in6 fromsa; - IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), return IPPROTO_DONE); + IP6_EXTHDR_CHECK(m, off, sizeof (struct udphdr), return IPPROTO_DONE); /* Expect 32-bit aligned data pointer on strict-align platforms */ MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); ifp = m->m_pkthdr.rcvif; ip6 = mtod(m, struct ip6_hdr *); + cell = IFNET_IS_CELLULAR(ifp); + wifi = (!cell && IFNET_IS_WIFI(ifp)); udpstat.udps_ipackets++; - plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6); + plen = ntohs(ip6->ip6_plen) - off + sizeof (*ip6); uh = (struct udphdr *)(void *)((caddr_t)ip6 + off); ulen = ntohs((u_short)uh->uh_ulen); if (plen != ulen) { udpstat.udps_badlen++; - - if (ifp->if_udp_stat != NULL) - atomic_add_64(&ifp->if_udp_stat->badlength, 1); - + IF_UDP_STATINC(ifp, badlength); goto bad; } /* destination port of 0 is illegal, based on RFC768. */ if (uh->uh_dport == 0) { - - if (ifp->if_udp_stat != NULL) - atomic_add_64(&ifp->if_udp_stat->port0, 1); - + IF_UDP_STATINC(ifp, port0); goto bad; } /* * Checksum extended UDP header and data. */ - if (uh->uh_sum) { - if ((apple_hwcksum_rx != 0) && (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) { - uh->uh_sum = m->m_pkthdr.csum_data; - uh->uh_sum ^= 0xffff; - } - else { - if (in6_cksum(m, IPPROTO_UDP, off, ulen) != 0) { - udpstat.udps_badsum++; - - if (ifp->if_udp_stat != NULL) - atomic_add_64(&ifp->if_udp_stat->badchksum, 1); - - goto bad; - } - } - } -#ifndef __APPLE__ - else - udpstat.udps_nosum++; -#endif + if (udp6_input_checksum(m, uh, off, ulen)) + goto bad; /* * Construct sockaddr format source address. @@ -277,7 +291,6 @@ udp6_input( init_sin6(&fromsa, m); fromsa.sin6_port = uh->uh_sport; - if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { int reuse_sock = 0, mcast_delivered = 0; struct ip6_moptions *imo; @@ -323,24 +336,31 @@ udp6_input( * Locate pcb(s) for datagram. * (Algorithm copied from raw_intr().) */ - lck_rw_lock_shared(pcbinfo->mtx); + lck_rw_lock_shared(pcbinfo->ipi_lock); LIST_FOREACH(in6p, &udb, inp_list) { +#if IPSEC + int skipit; +#endif /* IPSEC */ if ((in6p->inp_vflag & INP_IPV6) == 0) continue; - if (ip6_restrictrecvif && ifp != NULL && - (ifp->if_eflags & IFEF_RESTRICTED_RECV) && - !(in6p->in6p_flags & IN6P_RECV_ANYIF)) + if (inp_restricted(in6p, ifp)) + continue; + + if (IFNET_IS_CELLULAR(ifp) && + (in6p->in6p_flags & INP_NO_IFT_CELLULAR)) continue; - if (in_pcb_checkstate(in6p, WNT_ACQUIRE, 0) == WNT_STOPUSING) + if (in_pcb_checkstate(in6p, WNT_ACQUIRE, 0) == + WNT_STOPUSING) continue; udp_lock(in6p->in6p_socket, 1, 0); - if (in_pcb_checkstate(in6p, WNT_RELEASE, 1) == WNT_STOPUSING) { + if (in_pcb_checkstate(in6p, WNT_RELEASE, 1) == + WNT_STOPUSING) { udp_unlock(in6p->in6p_socket, 1, 0); continue; } @@ -355,65 +375,65 @@ udp6_input( */ imo = in6p->in6p_moptions; if (imo && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { - struct sockaddr_in6 mcaddr; - int blocked; + struct sockaddr_in6 mcaddr; + int blocked; IM6O_LOCK(imo); - bzero(&mcaddr, sizeof(struct sockaddr_in6)); - mcaddr.sin6_len = sizeof(struct sockaddr_in6); + bzero(&mcaddr, sizeof (struct sockaddr_in6)); + mcaddr.sin6_len = sizeof (struct sockaddr_in6); mcaddr.sin6_family = AF_INET6; mcaddr.sin6_addr = ip6->ip6_dst; blocked = im6o_mc_filter(imo, ifp, - (struct sockaddr *)&mcaddr, - (struct sockaddr *)&fromsa); + (struct sockaddr *)&mcaddr, + (struct sockaddr *)&fromsa); IM6O_UNLOCK(imo); if (blocked != MCAST_PASS) { udp_unlock(in6p->in6p_socket, 1, 0); + if (blocked == MCAST_NOTSMEMBER || + blocked == MCAST_MUTED) + udpstat.udps_filtermcast++; continue; } } - if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { - if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, - &ip6->ip6_src) || - in6p->in6p_fport != uh->uh_sport) { - udp_unlock(in6p->in6p_socket, 1, 0); - continue; - } + if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) && + (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, + &ip6->ip6_src) || + in6p->in6p_fport != uh->uh_sport)) { + udp_unlock(in6p->in6p_socket, 1, 0); + continue; } reuse_sock = in6p->inp_socket->so_options & (SO_REUSEPORT | SO_REUSEADDR); - { #if IPSEC - int skipit = 0; - /* Check AH/ESP integrity. */ - if (ipsec_bypass == 0) { - if (ipsec6_in_reject_so(m, in6p->inp_socket)) { - IPSEC_STAT_INCREMENT(ipsec6stat.in_polvio); - /* do not inject data to pcb */ - skipit = 1; - } - } - if (skipit == 0) -#endif /*IPSEC*/ - { - /* - * KAME NOTE: do not - * m_copy(m, offset, ...) below. - * sbappendaddr() expects M_PKTHDR, - * and m_copy() will copy M_PKTHDR - * only if offset is 0. - */ - if (reuse_sock) - n = m_copy(m, 0, M_COPYALL); - udp6_append(in6p, ip6, &udp_in6, m, - off + sizeof (struct udphdr)); - mcast_delivered++; - } - udp_unlock(in6p->in6p_socket, 1, 0); + skipit = 0; + /* Check AH/ESP integrity. */ + if (ipsec_bypass == 0 && + ipsec6_in_reject_so(m, in6p->inp_socket)) { + IPSEC_STAT_INCREMENT(ipsec6stat.in_polvio); + /* do not inject data to pcb */ + skipit = 1; + } + if (skipit == 0) +#endif /* IPSEC */ + { + /* + * KAME NOTE: do not + * m_copy(m, offset, ...) below. + * sbappendaddr() expects M_PKTHDR, + * and m_copy() will copy M_PKTHDR + * only if offset is 0. + */ + if (reuse_sock) + n = m_copy(m, 0, M_COPYALL); + udp6_append(in6p, ip6, &udp_in6, m, + off + sizeof (struct udphdr), ifp); + mcast_delivered++; } + udp_unlock(in6p->in6p_socket, 1, 0); + /* * Don't look for additional matches if this one does * not have either the SO_REUSEPORT or SO_REUSEADDR @@ -437,7 +457,7 @@ udp6_input( ip6 = mtod(m, struct ip6_hdr *); uh = (struct udphdr *)(void *)((caddr_t)ip6 + off); } - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); if (mcast_delivered == 0) { /* @@ -446,59 +466,51 @@ udp6_input( * for a broadcast or multicast datgram.) */ udpstat.udps_noport++; -#ifndef __APPLE__ udpstat.udps_noportmcast++; -#endif - if (ifp->if_udp_stat != NULL) - atomic_add_64(&ifp->if_udp_stat->port_unreach, 1); - + IF_UDP_STATINC(ifp, port_unreach); goto bad; } if (reuse_sock != 0) /* free the extra copy of mbuf */ m_freem(m); - return IPPROTO_DONE; + return (IPPROTO_DONE); } /* * Locate pcb for datagram. */ in6p = in6_pcblookup_hash(&udbinfo, &ip6->ip6_src, uh->uh_sport, - &ip6->ip6_dst, uh->uh_dport, 1, - m->m_pkthdr.rcvif); + &ip6->ip6_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif); if (in6p == NULL) { + IF_UDP_STATINC(ifp, port_unreach); - if (ifp->if_udp_stat != NULL) - atomic_add_64(&ifp->if_udp_stat->port_unreach, 1); - - if (log_in_vain) { + if (udp_log_in_vain) { char buf[INET6_ADDRSTRLEN]; - strlcpy(buf, ip6_sprintf(&ip6->ip6_dst), sizeof(buf)); - if (log_in_vain != 3) - log(LOG_INFO, - "Connection attempt to UDP %s:%d from %s:%d\n", - buf, ntohs(uh->uh_dport), - ip6_sprintf(&ip6->ip6_src), ntohs(uh->uh_sport)); - else if (!(m->m_flags & (M_BCAST | M_MCAST)) && - !IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) - log_in_vain_log((LOG_INFO, - "Connection attempt to UDP %s:%d from %s:%d\n", - buf, ntohs(uh->uh_dport), - ip6_sprintf(&ip6->ip6_src), ntohs(uh->uh_sport))); + strlcpy(buf, ip6_sprintf(&ip6->ip6_dst), sizeof (buf)); + if (udp_log_in_vain < 3) { + log(LOG_INFO, "Connection attempt to UDP " + "%s:%d from %s:%d\n", buf, + ntohs(uh->uh_dport), + ip6_sprintf(&ip6->ip6_src), + ntohs(uh->uh_sport)); + } else if (!(m->m_flags & (M_BCAST | M_MCAST)) && + !IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) { + log_in_vain_log((LOG_INFO, "Connection attempt " + "to UDP %s:%d from %s:%d\n", buf, + ntohs(uh->uh_dport), + ip6_sprintf(&ip6->ip6_src), + ntohs(uh->uh_sport))); + } } udpstat.udps_noport++; if (m->m_flags & M_MCAST) { printf("UDP6: M_MCAST is set in a unicast packet.\n"); -#ifndef __APPLE__ udpstat.udps_noportmcast++; -#endif - if (ifp->if_udp_stat != NULL) - atomic_add_64(&ifp->if_udp_stat->badmcast, 1); - + IF_UDP_STATINC(ifp, badmcast); goto bad; } icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0); - return IPPROTO_DONE; + return (IPPROTO_DONE); } #if IPSEC /* @@ -508,14 +520,11 @@ udp6_input( if (ipsec6_in_reject_so(m, in6p->in6p_socket)) { IPSEC_STAT_INCREMENT(ipsec6stat.in_polvio); in_pcb_checkstate(in6p, WNT_RELEASE, 0); - - if (ifp->if_udp_stat != NULL) - atomic_add_64(&ifp->if_udp_stat->badipsec, 1); - + IF_UDP_STATINC(ifp, badipsec); goto bad; } } -#endif /*IPSEC*/ +#endif /* IPSEC */ /* * Construct sockaddr format source address. @@ -525,33 +534,29 @@ udp6_input( if (in_pcb_checkstate(in6p, WNT_RELEASE, 1) == WNT_STOPUSING) { udp_unlock(in6p->in6p_socket, 1, 0); - - if (ifp->if_udp_stat != NULL) - atomic_add_64(&ifp->if_udp_stat->cleanup, 1); - + IF_UDP_STATINC(ifp, cleanup); goto bad; } init_sin6(&udp_in6, m); /* general init */ udp_in6.sin6_port = uh->uh_sport; - if ((in6p->in6p_flags & IN6P_CONTROLOPTS) != 0 || - (in6p->in6p_socket->so_options & SO_TIMESTAMP) != 0 || - (in6p->in6p_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) { + if ((in6p->in6p_flags & INP_CONTROLOPTS) != 0 || + (in6p->in6p_socket->so_options & SO_TIMESTAMP) != 0 || + (in6p->in6p_socket->so_options & SO_TIMESTAMP_MONOTONIC) != 0) { ret = ip6_savecontrol(in6p, m, &opts); if (ret != 0) { udp_unlock(in6p->in6p_socket, 1, 0); goto bad; } } - m_adj(m, off + sizeof(struct udphdr)); + m_adj(m, off + sizeof (struct udphdr)); if (nstat_collect) { - locked_add_64(&in6p->inp_stat->rxpackets, 1); - locked_add_64(&in6p->inp_stat->rxbytes, m->m_pkthdr.len); + INP_ADD_STAT(in6p, cell, wifi, rxpackets, 1); + INP_ADD_STAT(in6p, cell, wifi, rxbytes, m->m_pkthdr.len); } so_recv_data_stat(in6p->in6p_socket, m, 0); if (sbappendaddr(&in6p->in6p_socket->so_rcv, - (struct sockaddr *)&udp_in6, - m, opts, NULL) == 0) { + (struct sockaddr *)&udp_in6, m, opts, NULL) == 0) { m = NULL; opts = NULL; udpstat.udps_fullsock++; @@ -560,20 +565,17 @@ udp6_input( } sorwakeup(in6p->in6p_socket); udp_unlock(in6p->in6p_socket, 1, 0); - return IPPROTO_DONE; + return (IPPROTO_DONE); bad: - if (m) + if (m != NULL) m_freem(m); - if (opts) + if (opts != NULL) m_freem(opts); - return IPPROTO_DONE; + return (IPPROTO_DONE); } void -udp6_ctlinput( - int cmd, - struct sockaddr *sa, - void *d) +udp6_ctlinput(int cmd, struct sockaddr *sa, void *d) { struct udphdr uh; struct ip6_hdr *ip6; @@ -588,7 +590,7 @@ udp6_ctlinput( } *uhp; if (sa->sa_family != AF_INET6 || - sa->sa_len != sizeof(struct sockaddr_in6)) + sa->sa_len != sizeof (struct sockaddr_in6)) return; if ((unsigned)cmd >= PRC_NCMDS) @@ -613,105 +615,62 @@ udp6_ctlinput( sa6_src = &sa6_any; } - if (ip6) { + if (ip6 != NULL) { /* * XXX: We assume that when IPV6 is non NULL, * M and OFF are valid. */ /* check if we can safely examine src and dst ports */ - if (m->m_pkthdr.len < off + sizeof(*uhp)) + if (m->m_pkthdr.len < off + sizeof (*uhp)) return; - bzero(&uh, sizeof(uh)); - m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh); + bzero(&uh, sizeof (uh)); + m_copydata(m, off, sizeof (*uhp), (caddr_t)&uh); (void) in6_pcbnotify(&udbinfo, sa, uh.uh_dport, - (struct sockaddr*)ip6cp->ip6c_src, - uh.uh_sport, cmd, NULL, notify); - } else - (void) in6_pcbnotify(&udbinfo, sa, 0, (struct sockaddr *)&sa6_src, - 0, cmd, NULL, notify); -} - -#ifndef __APPLE__ -static int -udp6_getcred SYSCTL_HANDLER_ARGS -{ - struct sockaddr_in6 addrs[2]; - struct inpcb *inp; - int error, s; - - error = proc_suser(req->p); - if (error) - return (error); - - if (req->newlen != sizeof(addrs)) - return (EINVAL); - if (req->oldlen != sizeof(*(kauth_cred_t)0)) - return (EINVAL); - error = SYSCTL_IN(req, addrs, sizeof(addrs)); - if (error) - return (error); - s = splnet(); - inp = in6_pcblookup_hash(&udbinfo, &addrs[1].sin6_addr, - addrs[1].sin6_port, - &addrs[0].sin6_addr, addrs[0].sin6_port, - 1, NULL); - if (!inp || !inp->inp_socket || !inp->inp_socket->so_cred) { - error = ENOENT; - goto out; + (struct sockaddr*)ip6cp->ip6c_src, uh.uh_sport, + cmd, NULL, notify); + } else { + (void) in6_pcbnotify(&udbinfo, sa, 0, + (struct sockaddr *)&sa6_src, 0, cmd, NULL, notify); } - /* - * XXX This should not be copying out a credential!!!! This - * XXX is an opaque type, and is not intended to be introspected, - * XXX and the size of this structure *WILL* change as planned MACF - * XXX and kauth changes go forward. - */ - error = SYSCTL_OUT(req, inp->inp_socket->so_cred->pc_ucred, - sizeof(*(kauth_cred_t)0)); - -out: - splx(s); - return (error); } -SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, - 0, 0, - udp6_getcred, "S,ucred", "Get the ucred of a UDP6 connection"); -#endif - static int udp6_abort(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); - if (inp == 0) - return EINVAL; /* ??? possible? panic instead? */ + if (inp == NULL) { + panic("%s: so=%p null inp\n", __func__, so); + /* NOTREACHED */ + } soisdisconnected(so); in6_pcbdetach(inp); - return 0; + return (0); } static int -udp6_attach(struct socket *so, __unused int proto, struct proc *p) +udp6_attach(struct socket *so, int proto, struct proc *p) { +#pragma unused(proto) struct inpcb *inp; int error; inp = sotoinpcb(so); - if (inp != 0) - return EINVAL; + if (inp != NULL) + return (EINVAL); error = in_pcballoc(so, &udbinfo, p); if (error) - return error; + return (error); if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { error = soreserve(so, udp_sendspace, udp_recvspace); if (error) - return error; + return (error); } inp = (struct inpcb *)so->so_pcb; inp->inp_vflag |= INP_IPV6; @@ -728,7 +687,7 @@ udp6_attach(struct socket *so, __unused int proto, struct proc *p) inp->inp_ip_ttl = ip_defttl; if (nstat_collect) nstat_udp_new_pcb(inp); - return 0; + return (0); } static int @@ -738,8 +697,8 @@ udp6_bind(struct socket *so, struct sockaddr *nam, struct proc *p) int error; inp = sotoinpcb(so); - if (inp == 0) - return EINVAL; + if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + return (inp == NULL ? EINVAL : EPROTOTYPE); inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; @@ -748,32 +707,32 @@ udp6_bind(struct socket *so, struct sockaddr *nam, struct proc *p) sin6_p = (struct sockaddr_in6 *)(void *)nam; - if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr)) + if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr)) { inp->inp_vflag |= INP_IPV4; - else if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) { + } else if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6_p); inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; error = in_pcbbind(inp, (struct sockaddr *)&sin, p); - return error; + return (error); } } error = in6_pcbbind(inp, nam, p); - return error; + return (error); } -static int +int udp6_connect(struct socket *so, struct sockaddr *nam, struct proc *p) { struct inpcb *inp; int error; inp = sotoinpcb(so); - if (inp == 0) - return EINVAL; + if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + return (inp == NULL ? EINVAL : EPROTOTYPE); if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { struct sockaddr_in6 *sin6_p; @@ -783,31 +742,51 @@ udp6_connect(struct socket *so, struct sockaddr *nam, struct proc *p) struct sockaddr_in sin; if (inp->inp_faddr.s_addr != INADDR_ANY) - return EISCONN; + return (EISCONN); in6_sin6_2_sin(&sin, sin6_p); - error = in_pcbconnect(inp, (struct sockaddr *)&sin, p, NULL); + error = in_pcbconnect(inp, (struct sockaddr *)&sin, + p, IFSCOPE_NONE, NULL); if (error == 0) { inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; soisconnected(so); } - return error; + return (error); } } if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) - return EISCONN; + return (EISCONN); error = in6_pcbconnect(inp, nam, p); if (error == 0) { - if (ip6_mapped_addr_on || (inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { /* should be non mapped addr */ + /* should be non mapped addr */ + if (ip6_mapped_addr_on || + (inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; } soisconnected(so); if (inp->inp_flowhash == 0) inp->inp_flowhash = inp_calc_flowhash(inp); + /* update flowinfo - RFC 6437 */ + if (inp->inp_flow == 0 && + inp->in6p_flags & IN6P_AUTOFLOWLABEL) { + inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; + inp->inp_flow |= + (htonl(inp->inp_flowhash) & IPV6_FLOWLABEL_MASK); + } } - return error; + return (error); +} + +static int +udp6_connectx(struct socket *so, struct sockaddr_list **src_sl, + struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, + associd_t aid, connid_t *pcid, uint32_t flags, void *arg, + uint32_t arglen) +{ + return (udp_connectx_common(so, AF_INET6, src_sl, dst_sl, + p, ifscope, aid, pcid, flags, arg, arglen)); } static int @@ -816,10 +795,10 @@ udp6_detach(struct socket *so) struct inpcb *inp; inp = sotoinpcb(so); - if (inp == 0) - return EINVAL; + if (inp == NULL) + return (EINVAL); in6_pcbdetach(inp); - return 0; + return (0); } static int @@ -828,8 +807,8 @@ udp6_disconnect(struct socket *so) struct inpcb *inp; inp = sotoinpcb(so); - if (inp == 0) - return EINVAL; + if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) + return (inp == NULL ? EINVAL : EPROTOTYPE); if (inp->inp_vflag & INP_IPV4) { struct pr_usrreqs *pru; @@ -839,7 +818,7 @@ udp6_disconnect(struct socket *so) } if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) - return ENOTCONN; + return (ENOTCONN); in6_pcbdisconnect(inp); @@ -849,24 +828,34 @@ udp6_disconnect(struct socket *so) inp->in6p_laddr = in6addr_any; inp->in6p_last_outifp = NULL; so->so_state &= ~SS_ISCONNECTED; /* XXX */ - return 0; + return (0); +} + +static int +udp6_disconnectx(struct socket *so, associd_t aid, connid_t cid) +{ +#pragma unused(cid) + if (aid != ASSOCID_ANY && aid != ASSOCID_ALL) + return (EINVAL); + + return (udp6_disconnect(so)); } static int udp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, - struct mbuf *control, struct proc *p) + struct mbuf *control, struct proc *p) { struct inpcb *inp; int error = 0; inp = sotoinpcb(so); - if (inp == 0) { - error = EINVAL; + if (inp == NULL || (inp->inp_flags2 & INP2_WANT_FLOW_DIVERT)) { + error = (inp == NULL ? EINVAL : EPROTOTYPE); goto bad; } - if (addr) { - if (addr->sa_len != sizeof(struct sockaddr_in6)) { + if (addr != NULL) { + if (addr->sa_len != sizeof (struct sockaddr_in6)) { error = EINVAL; goto bad; } @@ -878,39 +867,106 @@ udp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, if (ip6_mapped_addr_on || (inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { int hasv4addr; - struct sockaddr_in6 *sin6 = 0; + struct sockaddr_in6 *sin6 = NULL; - if (addr == 0) + if (addr == NULL) { hasv4addr = (inp->inp_vflag & INP_IPV4); - else { + } else { sin6 = (struct sockaddr_in6 *)(void *)addr; - hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) - ? 1 : 0; + hasv4addr = + IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ? 1 : 0; } if (hasv4addr) { struct pr_usrreqs *pru; - if (sin6) + if (sin6 != NULL) in6_sin6_2_sin_in_sock(addr); pru = ip_protox[IPPROTO_UDP]->pr_usrreqs; - error = ((*pru->pru_send)(so, flags, m, addr, control, - p)); + error = ((*pru->pru_send)(so, flags, m, addr, + control, p)); /* addr will just be freed in sendit(). */ - return error; + return (error); } } + return (udp6_output(inp, m, addr, control, p)); + +bad: + VERIFY(error != 0); - return udp6_output(inp, m, addr, control, p); + if (m != NULL) + m_freem(m); + if (control != NULL) + m_freem(control); - bad: - m_freem(m); - return(error); + return (error); } -struct pr_usrreqs udp6_usrreqs = { - udp6_abort, pru_accept_notsupp, udp6_attach, udp6_bind, udp6_connect, - pru_connect2_notsupp, in6_control, udp6_detach, udp6_disconnect, - pru_listen_notsupp, in6_mapped_peeraddr, pru_rcvd_notsupp, - pru_rcvoob_notsupp, udp6_send, pru_sense_null, udp_shutdown, - in6_mapped_sockaddr, sosend, soreceive, pru_sopoll_notsupp -}; +/* + * Checksum extended UDP header and data. + */ +static int +udp6_input_checksum(struct mbuf *m, struct udphdr *uh, int off, int ulen) +{ + struct ifnet *ifp = m->m_pkthdr.rcvif; + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + + if (uh->uh_sum == 0) { + /* UDP/IPv6 checksum is mandatory (RFC2460) */ + udpstat.udps_nosum++; + goto badsum; + } + + if ((hwcksum_rx || (ifp->if_flags & IFF_LOOPBACK) || + (m->m_pkthdr.pkt_flags & PKTF_LOOP)) && + (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) { + if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) { + uh->uh_sum = m->m_pkthdr.csum_rx_val; + } else { + uint16_t sum = m->m_pkthdr.csum_rx_val; + uint16_t start = m->m_pkthdr.csum_rx_start; + + /* + * Perform 1's complement adjustment of octets + * that got included/excluded in the hardware- + * calculated checksum value. + */ + if ((m->m_pkthdr.csum_flags & CSUM_PARTIAL) && + start != off) { + uint16_t s, d; + + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) { + s = ip6->ip6_src.s6_addr16[1]; + ip6->ip6_src.s6_addr16[1] = 0 ; + } + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) { + d = ip6->ip6_dst.s6_addr16[1]; + ip6->ip6_dst.s6_addr16[1] = 0; + } + + /* callee folds in sum */ + sum = m_adj_sum16(m, start, off, sum); + + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) + ip6->ip6_src.s6_addr16[1] = s; + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) + ip6->ip6_dst.s6_addr16[1] = d; + } + + uh->uh_sum = in6_pseudo(&ip6->ip6_src, &ip6->ip6_dst, + sum + htonl(ulen + IPPROTO_UDP)); + } + uh->uh_sum ^= 0xffff; + } else { + udp_in6_cksum_stats(ulen); + uh->uh_sum = in6_cksum(m, IPPROTO_UDP, off, ulen); + } + + if (uh->uh_sum != 0) { +badsum: + udpstat.udps_badsum++; + IF_UDP_STATINC(ifp, badchksum); + return (-1); + } + + return (0); +} diff --git a/bsd/netinet6/udp6_var.h b/bsd/netinet6/udp6_var.h index bd6916e4e..0758482f7 100644 --- a/bsd/netinet6/udp6_var.h +++ b/bsd/netinet6/udp6_var.h @@ -1,3 +1,30 @@ +/* + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. @@ -66,16 +93,15 @@ #define _NETINET6_UDP6_VAR_H_ #include -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE SYSCTL_DECL(_net_inet6_udp6); -extern struct pr_usrreqs udp6_usrreqs; - -void udp6_ctlinput(int, struct sockaddr *, void *); -int udp6_input(struct mbuf **, int *, int); -int udp6_output(struct inpcb *inp, struct mbuf *m, - struct sockaddr *addr, struct mbuf *control, - struct proc *p); -#endif /* KERNEL_PRIVATE */ +extern struct pr_usrreqs udp6_usrreqs; +extern void udp6_ctlinput(int, struct sockaddr *, void *); +extern int udp6_input(struct mbuf **, int *, int); +extern int udp6_output(struct inpcb *, struct mbuf *, struct sockaddr *, + struct mbuf *, struct proc *); +extern int udp6_connect(struct socket *, struct sockaddr *, struct proc *); +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETINET6_UDP6_VAR_H_ */ diff --git a/bsd/netkey/Makefile b/bsd/netkey/Makefile index 1a68c8a44..882e7b5e3 100644 --- a/bsd/netkey/Makefile +++ b/bsd/netkey/Makefile @@ -7,14 +7,6 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - DATAFILES = \ keysock.h diff --git a/bsd/netkey/key.c b/bsd/netkey/key.c index ffbdcf88e..f80c0c0ad 100644 --- a/bsd/netkey/key.c +++ b/bsd/netkey/key.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2008-2011 Apple Inc. All rights reserved. + * Copyright (c) 2008-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ @@ -163,6 +163,7 @@ lck_mtx_t *pfkey_stat_mutex = &pfkey_stat_mutex_data; */ u_int32_t key_debug_level = 0; //### our sysctl is not dynamic +static int key_timehandler_running = 0; static u_int key_spi_trycnt = 1000; static u_int32_t key_spi_minval = 0x100; static u_int32_t key_spi_maxval = 0x0fffffff; /* XXX */ @@ -183,8 +184,8 @@ __private_extern__ u_int32_t natt_now = 0; static LIST_HEAD(_sptree, secpolicy) sptree[IPSEC_DIR_MAX]; /* SPD */ static LIST_HEAD(_sahtree, secashead) sahtree; /* SAD */ static LIST_HEAD(_regtree, secreg) regtree[SADB_SATYPE_MAX + 1]; - /* registed list */ - +/* registed list */ + #define SPIHASHSIZE 128 #define SPIHASH(x) (((x) ^ ((x) >> 16)) % SPIHASHSIZE) static LIST_HEAD(_spihash, secasvar) spihash[SPIHASHSIZE]; @@ -235,6 +236,11 @@ static const int minsize[] = { sizeof(struct sadb_x_sa2), /* SADB_X_SA2 */ sizeof(struct sadb_session_id), /* SADB_EXT_SESSION_ID */ sizeof(struct sadb_sastat), /* SADB_EXT_SASTAT */ + sizeof(struct sadb_x_ipsecif), /* SADB_X_EXT_IPSECIF */ + sizeof(struct sadb_address), /* SADB_X_EXT_ADDR_RANGE_SRC_START */ + sizeof(struct sadb_address), /* SADB_X_EXT_ADDR_RANGE_SRC_END */ + sizeof(struct sadb_address), /* SADB_X_EXT_ADDR_RANGE_DST_START */ + sizeof(struct sadb_address), /* SADB_X_EXT_ADDR_RANGE_DST_END */ }; static const int maxsize[] = { sizeof(struct sadb_msg), /* SADB_EXT_RESERVED */ @@ -259,6 +265,11 @@ static const int maxsize[] = { sizeof(struct sadb_x_sa2), /* SADB_X_SA2 */ 0, /* SADB_EXT_SESSION_ID */ 0, /* SADB_EXT_SASTAT */ + sizeof(struct sadb_x_ipsecif), /* SADB_X_EXT_IPSECIF */ + 0, /* SADB_X_EXT_ADDR_RANGE_SRC_START */ + 0, /* SADB_X_EXT_ADDR_RANGE_SRC_END */ + 0, /* SADB_X_EXT_ADDR_RANGE_DST_START */ + 0, /* SADB_X_EXT_ADDR_RANGE_DST_END */ }; static int ipsec_esp_keymin = 256; @@ -268,138 +279,143 @@ static int ipsec_ah_keymin = 128; SYSCTL_DECL(_net_key); /* Thread safe: no accumulated state */ SYSCTL_INT(_net_key, KEYCTL_DEBUG_LEVEL, debug, CTLFLAG_RW | CTLFLAG_LOCKED, \ - &key_debug_level, 0, ""); + &key_debug_level, 0, ""); /* max count of trial for the decision of spi value */ SYSCTL_INT(_net_key, KEYCTL_SPI_TRY, spi_trycnt, CTLFLAG_RW | CTLFLAG_LOCKED, \ - &key_spi_trycnt, 0, ""); + &key_spi_trycnt, 0, ""); /* minimum spi value to allocate automatically. */ SYSCTL_INT(_net_key, KEYCTL_SPI_MIN_VALUE, spi_minval, CTLFLAG_RW | CTLFLAG_LOCKED, \ - &key_spi_minval, 0, ""); + &key_spi_minval, 0, ""); /* maximun spi value to allocate automatically. */ SYSCTL_INT(_net_key, KEYCTL_SPI_MAX_VALUE, spi_maxval, CTLFLAG_RW | CTLFLAG_LOCKED, \ - &key_spi_maxval, 0, ""); + &key_spi_maxval, 0, ""); /* interval to initialize randseed */ SYSCTL_INT(_net_key, KEYCTL_RANDOM_INT, int_random, CTLFLAG_RW | CTLFLAG_LOCKED, \ - &key_int_random, 0, ""); + &key_int_random, 0, ""); /* lifetime for larval SA; thread safe due to > compare */ SYSCTL_INT(_net_key, KEYCTL_LARVAL_LIFETIME, larval_lifetime, CTLFLAG_RW | CTLFLAG_LOCKED, \ - &key_larval_lifetime, 0, ""); + &key_larval_lifetime, 0, ""); /* counter for blocking to send SADB_ACQUIRE to IKEd */ SYSCTL_INT(_net_key, KEYCTL_BLOCKACQ_COUNT, blockacq_count, CTLFLAG_RW | CTLFLAG_LOCKED, \ - &key_blockacq_count, 0, ""); + &key_blockacq_count, 0, ""); /* lifetime for blocking to send SADB_ACQUIRE to IKEd: Thread safe, > compare */ SYSCTL_INT(_net_key, KEYCTL_BLOCKACQ_LIFETIME, blockacq_lifetime, CTLFLAG_RW | CTLFLAG_LOCKED, \ - &key_blockacq_lifetime, 0, ""); + &key_blockacq_lifetime, 0, ""); /* ESP auth */ SYSCTL_INT(_net_key, KEYCTL_ESP_AUTH, esp_auth, CTLFLAG_RW | CTLFLAG_LOCKED, \ - &ipsec_esp_auth, 0, ""); + &ipsec_esp_auth, 0, ""); /* minimum ESP key length */ SYSCTL_INT(_net_key, KEYCTL_ESP_KEYMIN, esp_keymin, CTLFLAG_RW | CTLFLAG_LOCKED, \ - &ipsec_esp_keymin, 0, ""); + &ipsec_esp_keymin, 0, ""); /* minimum AH key length */ SYSCTL_INT(_net_key, KEYCTL_AH_KEYMIN, ah_keymin, CTLFLAG_RW | CTLFLAG_LOCKED, \ - &ipsec_ah_keymin, 0, ""); + &ipsec_ah_keymin, 0, ""); /* perfered old SA rather than new SA */ SYSCTL_INT(_net_key, KEYCTL_PREFERED_OLDSA, prefered_oldsa, CTLFLAG_RW | CTLFLAG_LOCKED,\ - &key_preferred_oldsa, 0, ""); + &key_preferred_oldsa, 0, ""); /* time between NATT keepalives in seconds, 0 disabled */ SYSCTL_INT(_net_key, KEYCTL_NATT_KEEPALIVE_INTERVAL, natt_keepalive_interval, CTLFLAG_RW | CTLFLAG_LOCKED,\ - &natt_keepalive_interval, 0, ""); + &natt_keepalive_interval, 0, ""); /* PF_KEY statistics */ SYSCTL_STRUCT(_net_key, KEYCTL_PFKEYSTAT, pfkeystat, CTLFLAG_RD | CTLFLAG_LOCKED,\ - &pfkeystat, pfkeystat, ""); + &pfkeystat, pfkeystat, ""); #ifndef LIST_FOREACH #define LIST_FOREACH(elm, head, field) \ - for (elm = LIST_FIRST(head); elm; elm = LIST_NEXT(elm, field)) +for (elm = LIST_FIRST(head); elm; elm = LIST_NEXT(elm, field)) #endif #define __LIST_CHAINED(elm) \ - (!((elm)->chain.le_next == NULL && (elm)->chain.le_prev == NULL)) +(!((elm)->chain.le_next == NULL && (elm)->chain.le_prev == NULL)) #define LIST_INSERT_TAIL(head, elm, type, field) \ do {\ - struct type *curelm = LIST_FIRST(head); \ - if (curelm == NULL) {\ - LIST_INSERT_HEAD(head, elm, field); \ - } else { \ - while (LIST_NEXT(curelm, field)) \ - curelm = LIST_NEXT(curelm, field);\ - LIST_INSERT_AFTER(curelm, elm, field);\ - }\ +struct type *curelm = LIST_FIRST(head); \ +if (curelm == NULL) {\ +LIST_INSERT_HEAD(head, elm, field); \ +} else { \ +while (LIST_NEXT(curelm, field)) \ +curelm = LIST_NEXT(curelm, field);\ +LIST_INSERT_AFTER(curelm, elm, field);\ +}\ } while (0) #define KEY_CHKSASTATE(head, sav, name) \ do { \ - if ((head) != (sav)) { \ - ipseclog((LOG_DEBUG, "%s: state mismatched (TREE=%d SA=%d)\n", \ - (name), (head), (sav))); \ - continue; \ - } \ +if ((head) != (sav)) { \ +ipseclog((LOG_DEBUG, "%s: state mismatched (TREE=%d SA=%d)\n", \ +(name), (head), (sav))); \ +continue; \ +} \ } while (0) #define KEY_CHKSPDIR(head, sp, name) \ do { \ - if ((head) != (sp)) { \ - ipseclog((LOG_DEBUG, "%s: direction mismatched (TREE=%d SP=%d), " \ - "anyway continue.\n", \ - (name), (head), (sp))); \ - } \ +if ((head) != (sp)) { \ +ipseclog((LOG_DEBUG, "%s: direction mismatched (TREE=%d SP=%d), " \ +"anyway continue.\n", \ +(name), (head), (sp))); \ +} \ } while (0) #if 1 #define KMALLOC_WAIT(p, t, n) \ - ((p) = (t) _MALLOC((u_int32_t)(n), M_SECA, M_WAITOK)) +((p) = (t) _MALLOC((u_int32_t)(n), M_SECA, M_WAITOK)) #define KMALLOC_NOWAIT(p, t, n) \ - ((p) = (t) _MALLOC((u_int32_t)(n), M_SECA, M_NOWAIT)) +((p) = (t) _MALLOC((u_int32_t)(n), M_SECA, M_NOWAIT)) #define KFREE(p) \ - _FREE((caddr_t)(p), M_SECA); +_FREE((caddr_t)(p), M_SECA); #else #define KMALLOC_WAIT(p, t, n) \ do { \ - ((p) = (t)_MALLOC((u_int32_t)(n), M_SECA, M_WAITOK)); \ - printf("%s %d: %p <- KMALLOC_WAIT(%s, %d)\n", \ - __FILE__, __LINE__, (p), #t, n); \ +((p) = (t)_MALLOC((u_int32_t)(n), M_SECA, M_WAITOK)); \ +printf("%s %d: %p <- KMALLOC_WAIT(%s, %d)\n", \ +__FILE__, __LINE__, (p), #t, n); \ } while (0) #define KMALLOC_NOWAIT(p, t, n) \ - do { \ - ((p) = (t)_MALLOC((u_int32_t)(n), M_SECA, M_NOWAIT)); \ - printf("%s %d: %p <- KMALLOC_NOWAIT(%s, %d)\n", \ - __FILE__, __LINE__, (p), #t, n); \ - } while (0) +do { \ +((p) = (t)_MALLOC((u_int32_t)(n), M_SECA, M_NOWAIT)); \ +printf("%s %d: %p <- KMALLOC_NOWAIT(%s, %d)\n", \ +__FILE__, __LINE__, (p), #t, n); \ +} while (0) #define KFREE(p) \ - do { \ - printf("%s %d: %p -> KFREE()\n", __FILE__, __LINE__, (p)); \ - _FREE((caddr_t)(p), M_SECA); \ - } while (0) +do { \ +printf("%s %d: %p -> KFREE()\n", __FILE__, __LINE__, (p)); \ +_FREE((caddr_t)(p), M_SECA); \ +} while (0) #endif /* * set parameters into secpolicyindex buffer. * Must allocate secpolicyindex buffer passed to this function. */ -#define KEY_SETSECSPIDX(_dir, s, d, ps, pd, ulp, idx) \ +#define KEY_SETSECSPIDX(_dir, s, d, ps, pd, ulp, ifp, s_s, s_e, d_s, d_e, idx) \ do { \ - bzero((idx), sizeof(struct secpolicyindex)); \ - (idx)->dir = (_dir); \ - (idx)->prefs = (ps); \ - (idx)->prefd = (pd); \ - (idx)->ul_proto = (ulp); \ - bcopy((s), &(idx)->src, ((struct sockaddr *)(s))->sa_len); \ - bcopy((d), &(idx)->dst, ((struct sockaddr *)(d))->sa_len); \ +bzero((idx), sizeof(struct secpolicyindex)); \ +(idx)->dir = (_dir); \ +(idx)->prefs = (ps); \ +(idx)->prefd = (pd); \ +(idx)->ul_proto = (ulp); \ +(idx)->internal_if = (ifp); \ +if (s) bcopy((s), &(idx)->src, ((struct sockaddr *)(s))->sa_len); \ +if (d) bcopy((d), &(idx)->dst, ((struct sockaddr *)(d))->sa_len); \ +if (s_s) bcopy((s_s), &(idx)->src_range.start, ((struct sockaddr *)(s_s))->sa_len); \ +if (s_e) bcopy((s_e), &(idx)->src_range.end, ((struct sockaddr *)(s_e))->sa_len); \ +if (d_s) bcopy((d_s), &(idx)->dst_range.start, ((struct sockaddr *)(d_s))->sa_len); \ +if (d_e) bcopy((d_e), &(idx)->dst_range.end, ((struct sockaddr *)(d_e))->sa_len); \ } while (0) /* @@ -408,12 +424,12 @@ do { \ */ #define KEY_SETSECASIDX(p, m, r, s, d, idx) \ do { \ - bzero((idx), sizeof(struct secasindex)); \ - (idx)->proto = (p); \ - (idx)->mode = (m); \ - (idx)->reqid = (r); \ - bcopy((s), &(idx)->src, ((const struct sockaddr *)(s))->sa_len); \ - bcopy((d), &(idx)->dst, ((const struct sockaddr *)(d))->sa_len); \ +bzero((idx), sizeof(struct secasindex)); \ +(idx)->proto = (p); \ +(idx)->mode = (m); \ +(idx)->reqid = (r); \ +bcopy((s), &(idx)->src, ((const struct sockaddr *)(s))->sa_len); \ +bcopy((d), &(idx)->dst, ((const struct sockaddr *)(d))->sa_len); \ } while (0) /* key statistics */ @@ -428,55 +444,60 @@ struct sadb_msghdr { int extlen[SADB_EXT_MAX + 1]; }; +static struct secpolicy *__key_getspbyid(u_int32_t id); static struct secasvar *key_do_allocsa_policy(struct secashead *, u_int, u_int16_t); static int key_do_get_translated_port(struct secashead *, struct secasvar *, u_int); static void key_delsp(struct secpolicy *); static struct secpolicy *key_getsp(struct secpolicyindex *); -static struct secpolicy *key_getspbyid(u_int32_t); static u_int32_t key_newreqid(void); static struct mbuf *key_gather_mbuf(struct mbuf *, - const struct sadb_msghdr *, int, int, int *); + const struct sadb_msghdr *, int, int, int *); static int key_spdadd(struct socket *, struct mbuf *, - const struct sadb_msghdr *); + const struct sadb_msghdr *); static u_int32_t key_getnewspid(void); static int key_spddelete(struct socket *, struct mbuf *, - const struct sadb_msghdr *); + const struct sadb_msghdr *); static int key_spddelete2(struct socket *, struct mbuf *, - const struct sadb_msghdr *); + const struct sadb_msghdr *); +static int key_spdenable(struct socket *, struct mbuf *, + const struct sadb_msghdr *); +static int key_spddisable(struct socket *, struct mbuf *, + const struct sadb_msghdr *); static int key_spdget(struct socket *, struct mbuf *, - const struct sadb_msghdr *); + const struct sadb_msghdr *); static int key_spdflush(struct socket *, struct mbuf *, - const struct sadb_msghdr *); + const struct sadb_msghdr *); static int key_spddump(struct socket *, struct mbuf *, - const struct sadb_msghdr *); + const struct sadb_msghdr *); static struct mbuf *key_setdumpsp(struct secpolicy *, - u_int8_t, u_int32_t, u_int32_t); + u_int8_t, u_int32_t, u_int32_t); static u_int key_getspreqmsglen(struct secpolicy *); static int key_spdexpire(struct secpolicy *); static struct secashead *key_newsah(struct secasindex *, u_int8_t); static struct secasvar *key_newsav(struct mbuf *, - const struct sadb_msghdr *, struct secashead *, int *); + const struct sadb_msghdr *, struct secashead *, int *); static struct secashead *key_getsah(struct secasindex *); static struct secasvar *key_checkspidup(struct secasindex *, u_int32_t); static void key_setspi __P((struct secasvar *, u_int32_t)); static struct secasvar *key_getsavbyspi(struct secashead *, u_int32_t); static int key_setsaval(struct secasvar *, struct mbuf *, - const struct sadb_msghdr *); + const struct sadb_msghdr *); static int key_mature(struct secasvar *); static struct mbuf *key_setdumpsa(struct secasvar *, u_int8_t, - u_int8_t, u_int32_t, u_int32_t); + u_int8_t, u_int32_t, u_int32_t); static struct mbuf *key_setsadbmsg(u_int8_t, u_int16_t, u_int8_t, - u_int32_t, pid_t, u_int16_t); + u_int32_t, pid_t, u_int16_t); static struct mbuf *key_setsadbsa(struct secasvar *); static struct mbuf *key_setsadbaddr(u_int16_t, - struct sockaddr *, u_int8_t, u_int16_t); + struct sockaddr *, u_int8_t, u_int16_t); +static struct mbuf *key_setsadbipsecif(ifnet_t, ifnet_t, ifnet_t, int); #if 0 static struct mbuf *key_setsadbident(u_int16_t, u_int16_t, caddr_t, - int, u_int64_t); + int, u_int64_t); #endif static struct mbuf *key_setsadbxsa2(u_int8_t, u_int32_t, u_int32_t); static struct mbuf *key_setsadbxpolicy(u_int16_t, u_int8_t, - u_int32_t); + u_int32_t); static void *key_newbuf(const void *, u_int); #if INET6 static int key_ismyaddr6(struct sockaddr_in6 *); @@ -492,29 +513,30 @@ static void key_update_natt_keepalive_timestamp(struct secasvar *, struct secasv static int key_cmpsaidx(struct secasindex *, struct secasindex *, int); static int key_cmpspidx_exactly(struct secpolicyindex *, - struct secpolicyindex *); + struct secpolicyindex *); static int key_cmpspidx_withmask(struct secpolicyindex *, - struct secpolicyindex *); + struct secpolicyindex *); static int key_sockaddrcmp(struct sockaddr *, struct sockaddr *, int); +static int key_is_addr_in_range(struct sockaddr_storage *, struct secpolicyaddrrange *); static int key_bbcmp(caddr_t, caddr_t, u_int); static void key_srandom(void); static u_int16_t key_satype2proto(u_int8_t); static u_int8_t key_proto2satype(u_int16_t); static int key_getspi(struct socket *, struct mbuf *, - const struct sadb_msghdr *); + const struct sadb_msghdr *); static u_int32_t key_do_getnewspi(struct sadb_spirange *, struct secasindex *); static int key_update(struct socket *, struct mbuf *, - const struct sadb_msghdr *); + const struct sadb_msghdr *); #if IPSEC_DOSEQCHECK static struct secasvar *key_getsavbyseq(struct secashead *, u_int32_t); #endif static int key_add(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_setident(struct secashead *, struct mbuf *, - const struct sadb_msghdr *); + const struct sadb_msghdr *); static struct mbuf *key_getmsgbuf_x1(struct mbuf *, const struct sadb_msghdr *); static int key_delete(struct socket *, struct mbuf *, - const struct sadb_msghdr *); + const struct sadb_msghdr *); static int key_get(struct socket *, struct mbuf *, const struct sadb_msghdr *); static void key_getcomb_setlifetime(struct sadb_comb *); @@ -534,44 +556,42 @@ static struct secacq *key_getacqbyseq(u_int32_t); static struct secspacq *key_newspacq(struct secpolicyindex *); static struct secspacq *key_getspacq(struct secpolicyindex *); static int key_acquire2(struct socket *, struct mbuf *, - const struct sadb_msghdr *); + const struct sadb_msghdr *); static int key_register(struct socket *, struct mbuf *, - const struct sadb_msghdr *); + const struct sadb_msghdr *); static int key_expire(struct secasvar *); static int key_flush(struct socket *, struct mbuf *, - const struct sadb_msghdr *); + const struct sadb_msghdr *); static int key_dump(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_promisc(struct socket *, struct mbuf *, - const struct sadb_msghdr *); + const struct sadb_msghdr *); static int key_senderror(struct socket *, struct mbuf *, int); static int key_validate_ext(const struct sadb_ext *, int); static int key_align(struct mbuf *, struct sadb_msghdr *); static struct mbuf *key_alloc_mbuf(int); static int key_getsastat (struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_setsaval2(struct secasvar *sav, - u_int8_t satype, - u_int8_t alg_auth, - u_int8_t alg_enc, - u_int32_t flags, - u_int8_t replay, - struct sadb_key *key_auth, - u_int16_t key_auth_len, - struct sadb_key *key_enc, - u_int16_t key_enc_len, - u_int16_t natt_port, - u_int32_t seq, - u_int32_t spi, - u_int32_t pid, - struct sadb_lifetime *lifetime_hard, - struct sadb_lifetime *lifetime_soft); + u_int8_t satype, + u_int8_t alg_auth, + u_int8_t alg_enc, + u_int32_t flags, + u_int8_t replay, + struct sadb_key *key_auth, + u_int16_t key_auth_len, + struct sadb_key *key_enc, + u_int16_t key_enc_len, + u_int16_t natt_port, + u_int32_t seq, + u_int32_t spi, + u_int32_t pid, + struct sadb_lifetime *lifetime_hard, + struct sadb_lifetime *lifetime_soft); extern int ipsec_bypass; extern int esp_udp_encap_port; int ipsec_send_natt_keepalive(struct secasvar *sav); -void key_init(void); - - +void key_init(struct protosw *, struct domain *); /* * PF_KEY init @@ -579,49 +599,55 @@ void key_init(void); * */ void -key_init(void) +key_init(struct protosw *pp, struct domain *dp) { - + static int key_initialized = 0; int i; + + VERIFY((pp->pr_flags & (PR_INITIALIZED|PR_ATTACHED)) == PR_ATTACHED); _CASSERT(PFKEY_ALIGN8(sizeof(struct sadb_msg)) <= _MHLEN); + + if (key_initialized) + return; + key_initialized = 1; sadb_mutex_grp_attr = lck_grp_attr_alloc_init(); sadb_mutex_grp = lck_grp_alloc_init("sadb", sadb_mutex_grp_attr); sadb_mutex_attr = lck_attr_alloc_init(); - - lck_mtx_init(sadb_mutex, sadb_mutex_grp, sadb_mutex_attr); + lck_mtx_init(sadb_mutex, sadb_mutex_grp, sadb_mutex_attr); + pfkey_stat_mutex_grp_attr = lck_grp_attr_alloc_init(); pfkey_stat_mutex_grp = lck_grp_alloc_init("pfkey_stat", pfkey_stat_mutex_grp_attr); pfkey_stat_mutex_attr = lck_attr_alloc_init(); - + lck_mtx_init(pfkey_stat_mutex, pfkey_stat_mutex_grp, pfkey_stat_mutex_attr); - + for (i = 0; i < SPIHASHSIZE; i++) LIST_INIT(&spihash[i]); - - raw_init(); - - bzero((caddr_t)&key_cb, sizeof(key_cb)); + raw_init(pp, dp); + + bzero((caddr_t)&key_cb, sizeof(key_cb)); + for (i = 0; i < IPSEC_DIR_MAX; i++) { LIST_INIT(&sptree[i]); } ipsec_policy_count = 0; - + LIST_INIT(&sahtree); - + for (i = 0; i <= SADB_SATYPE_MAX; i++) { LIST_INIT(®tree[i]); } ipsec_sav_count = 0; - + #ifndef IPSEC_NONBLOCK_ACQUIRE LIST_INIT(&acqtree); #endif LIST_INIT(&spacqtree); - + /* system default */ #if INET ip4_def_policy.policy = IPSEC_POLICY_NONE; @@ -631,19 +657,29 @@ key_init(void) ip6_def_policy.policy = IPSEC_POLICY_NONE; ip6_def_policy.refcnt++; /*never reclaim this*/ #endif - -#ifndef IPSEC_DEBUG2 - timeout((void *)key_timehandler, (void *)0, hz); -#endif /*IPSEC_DEBUG2*/ - + + key_timehandler_running = 0; + /* initialize key statistics */ keystat.getspi_count = 1; - + #ifndef __APPLE__ printf("IPsec: Initialized Security Association Processing.\n"); #endif } +static void +key_start_timehandler(void) +{ + /* must be called while locked */ + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); +#ifndef IPSEC_DEBUG2 + if (key_timehandler_running == 0) { + key_timehandler_running = 1; + (void)timeout((void *)key_timehandler, (void *)0, hz); + } +#endif /*IPSEC_DEBUG2*/ +} /* %%% IPsec policy management */ /* @@ -654,47 +690,58 @@ key_init(void) */ struct secpolicy * key_allocsp( - struct secpolicyindex *spidx, - u_int dir) + struct secpolicyindex *spidx, + u_int dir) { struct secpolicy *sp; struct timeval tv; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); /* sanity check */ if (spidx == NULL) panic("key_allocsp: NULL pointer is passed.\n"); - + /* check direction */ switch (dir) { - case IPSEC_DIR_INBOUND: - case IPSEC_DIR_OUTBOUND: - break; - default: - panic("key_allocsp: Invalid direction is passed.\n"); + case IPSEC_DIR_INBOUND: + case IPSEC_DIR_OUTBOUND: + break; + default: + panic("key_allocsp: Invalid direction is passed.\n"); } - + /* get a SP entry */ KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("*** objects\n"); - kdebug_secpolicyindex(spidx)); - + printf("*** objects\n"); + kdebug_secpolicyindex(spidx)); + lck_mtx_lock(sadb_mutex); LIST_FOREACH(sp, &sptree[dir], chain) { KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("*** in SPD\n"); - kdebug_secpolicyindex(&sp->spidx)); - + printf("*** in SPD\n"); + kdebug_secpolicyindex(&sp->spidx)); + if (sp->state == IPSEC_SPSTATE_DEAD) continue; + + /* If the policy is disabled, skip */ + if (sp->disabled > 0) + continue; + + /* If the incoming spidx specifies bound if, + ignore unbound policies*/ + if (spidx->internal_if != NULL + && (sp->spidx.internal_if == NULL || sp->ipsec_if == NULL)) + continue; + if (key_cmpspidx_withmask(&sp->spidx, spidx)) goto found; } lck_mtx_unlock(sadb_mutex); return NULL; - + found: - + /* found a SPD entry */ microtime(&tv); sp->lastused = tv.tv_sec; @@ -704,8 +751,8 @@ found: /* sanity check */ KEY_CHKSPDIR(sp->spidx.dir, dir, "key_allocsp"); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP key_allocsp cause refcnt++:%d SP:%p\n", - sp->refcnt, sp)); + printf("DP key_allocsp cause refcnt++:%d SP:0x%llx\n", + sp->refcnt, (uint64_t)VM_KERNEL_ADDRPERM(sp))); return sp; } @@ -715,10 +762,10 @@ found: */ struct secpolicy * key_gettunnel( - struct sockaddr *osrc, - struct sockaddr *odst, - struct sockaddr *isrc, - struct sockaddr *idst) + struct sockaddr *osrc, + struct sockaddr *odst, + struct sockaddr *isrc, + struct sockaddr *idst) { struct secpolicy *sp; const int dir = IPSEC_DIR_INBOUND; @@ -726,26 +773,26 @@ key_gettunnel( struct ipsecrequest *r1, *r2, *p; struct sockaddr *os, *od, *is, *id; struct secpolicyindex spidx; - + if (isrc->sa_family != idst->sa_family) { ipseclog((LOG_ERR, "protocol family mismatched %d != %d\n.", - isrc->sa_family, idst->sa_family)); + isrc->sa_family, idst->sa_family)); return NULL; } - + lck_mtx_lock(sadb_mutex); LIST_FOREACH(sp, &sptree[dir], chain) { if (sp->state == IPSEC_SPSTATE_DEAD) continue; - + r1 = r2 = NULL; for (p = sp->req; p; p = p->next) { if (p->saidx.mode != IPSEC_MODE_TUNNEL) continue; - + r1 = r2; r2 = p; - + if (!r1) { /* here we look at address matches only */ spidx = sp->spidx; @@ -755,7 +802,7 @@ key_gettunnel( bcopy(isrc, &spidx.src, isrc->sa_len); bcopy(idst, &spidx.dst, idst->sa_len); if (!key_cmpspidx_withmask(&sp->spidx, &spidx)) - continue; + continue; } else { is = (struct sockaddr *)&r1->saidx.src; id = (struct sockaddr *)&r1->saidx.dst; @@ -763,19 +810,19 @@ key_gettunnel( key_sockaddrcmp(id, idst, 0)) continue; } - + os = (struct sockaddr *)&r2->saidx.src; od = (struct sockaddr *)&r2->saidx.dst; if (key_sockaddrcmp(os, osrc, 0) || key_sockaddrcmp(od, odst, 0)) continue; - + goto found; } } lck_mtx_unlock(sadb_mutex); return NULL; - + found: microtime(&tv); sp->lastused = tv.tv_sec; @@ -792,14 +839,14 @@ found: */ int key_checkrequest( - struct ipsecrequest *isr, - struct secasindex *saidx, - struct secasvar **sav) + struct ipsecrequest *isr, + struct secasindex *saidx, + struct secasvar **sav) { u_int level; int error; struct sockaddr_in *sin; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); *sav = NULL; @@ -807,32 +854,32 @@ key_checkrequest( /* sanity check */ if (isr == NULL || saidx == NULL) panic("key_checkrequest: NULL pointer is passed.\n"); - + /* check mode */ switch (saidx->mode) { - case IPSEC_MODE_TRANSPORT: - case IPSEC_MODE_TUNNEL: - break; - case IPSEC_MODE_ANY: - default: - panic("key_checkrequest: Invalid policy defined.\n"); + case IPSEC_MODE_TRANSPORT: + case IPSEC_MODE_TUNNEL: + break; + case IPSEC_MODE_ANY: + default: + panic("key_checkrequest: Invalid policy defined.\n"); } - + /* get current level */ level = ipsec_get_reqlevel(isr); - - + + /* * key_allocsa_policy should allocate the oldest SA available. * See key_do_allocsa_policy(), and draft-jenkins-ipsec-rekeying-03.txt. */ if (*sav == NULL) *sav = key_allocsa_policy(saidx); - + /* When there is SA. */ if (*sav != NULL) return 0; - + /* There is no SA. * * Remove dst port - used for special natt support - don't call @@ -845,10 +892,10 @@ key_checkrequest( if ((error = key_acquire(saidx, isr->sp)) != 0) { /* XXX What should I do ? */ ipseclog((LOG_DEBUG, "key_checkrequest: error %d returned " - "from key_acquire.\n", error)); + "from key_acquire.\n", error)); return error; } - + return level == IPSEC_LEVEL_REQUIRE ? ENOENT : 0; } @@ -862,7 +909,7 @@ u_int32_t sah_search_calls = 0; u_int32_t sah_search_count = 0; struct secasvar * key_allocsa_policy( - struct secasindex *saidx) + struct secasindex *saidx) { struct secashead *sah; struct secasvar *sav; @@ -875,7 +922,7 @@ key_allocsa_policy( lck_mtx_lock(sadb_mutex); sah_search_calls++; LIST_FOREACH(sah, &sahtree, chain) { - sah_search_count++; + sah_search_count++; if (sah->state == SADB_SASTATE_DEAD) continue; if (key_cmpsaidx(&sah->saidx, saidx, CMP_MODE | CMP_REQID)) @@ -883,9 +930,9 @@ key_allocsa_policy( } lck_mtx_unlock(sadb_mutex); return NULL; - - found: - + +found: + /* * search a valid state list for outbound packet. * This search order is important. @@ -897,17 +944,17 @@ key_allocsa_policy( saorder_state_valid = saorder_state_valid_prefer_new; arraysize = _ARRAYLEN(saorder_state_valid_prefer_new); } - - + + sin = (struct sockaddr_in *)&saidx->dst; dstport = sin->sin_port; if (saidx->mode == IPSEC_MODE_TRANSPORT) sin->sin_port = IPSEC_PORT_ANY; - + for (stateidx = 0; stateidx < arraysize; stateidx++) { - + state = saorder_state_valid[stateidx]; - + sav = key_do_allocsa_policy(sah, state, dstport); if (sav != NULL) { lck_mtx_unlock(sadb_mutex); @@ -918,6 +965,67 @@ key_allocsa_policy( return NULL; } +static void +key_send_delete (struct secasvar *sav) +{ + struct mbuf *m, *result; + u_int8_t satype; + + key_sa_chgstate(sav, SADB_SASTATE_DEAD); + + if ((satype = key_proto2satype(sav->sah->saidx.proto)) == 0) + panic("key_do_allocsa_policy: invalid proto is passed.\n"); + + m = key_setsadbmsg(SADB_DELETE, 0, + satype, 0, 0, sav->refcnt - 1); + if (!m) + goto msgfail; + result = m; + + /* set sadb_address for saidx's. */ + m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, + (struct sockaddr *)&sav->sah->saidx.src, + sav->sah->saidx.src.ss_len << 3, + IPSEC_ULPROTO_ANY); + if (!m) + goto msgfail; + m_cat(result, m); + + /* set sadb_address for saidx's. */ + m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, + (struct sockaddr *)&sav->sah->saidx.dst, + sav->sah->saidx.src.ss_len << 3, + IPSEC_ULPROTO_ANY); + if (!m) + goto msgfail; + m_cat(result, m); + + /* create SA extension */ + m = key_setsadbsa(sav); + if (!m) + goto msgfail; + m_cat(result, m); + + if (result->m_len < sizeof(struct sadb_msg)) { + result = m_pullup(result, + sizeof(struct sadb_msg)); + if (result == NULL) + goto msgfail; + } + + result->m_pkthdr.len = 0; + for (m = result; m; m = m->m_next) + result->m_pkthdr.len += m->m_len; + mtod(result, struct sadb_msg *)->sadb_msg_len = + PFKEY_UNIT64(result->m_pkthdr.len); + + if (key_sendup_mbuf(NULL, result, + KEY_SENDUP_REGISTERED)) + goto msgfail; +msgfail: + key_freesav(sav, KEY_SADB_LOCKED); +} + /* * searching SAD with direction, protocol, mode and state. * called by key_allocsa_policy(). @@ -927,78 +1035,78 @@ key_allocsa_policy( */ static struct secasvar * key_do_allocsa_policy( - struct secashead *sah, - u_int state, - u_int16_t dstport) + struct secashead *sah, + u_int state, + u_int16_t dstport) { struct secasvar *sav, *nextsav, *candidate, *natt_candidate, *no_natt_candidate, *d; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - + /* initialize */ candidate = NULL; natt_candidate = NULL; no_natt_candidate = NULL; - + for (sav = LIST_FIRST(&sah->savtree[state]); sav != NULL; sav = nextsav) { - + nextsav = LIST_NEXT(sav, chain); - + /* sanity check */ KEY_CHKSASTATE(sav->state, state, "key_do_allocsa_policy"); - + if (sah->saidx.mode == IPSEC_MODE_TUNNEL && dstport && ((sav->flags & SADB_X_EXT_NATT) != 0) && ntohs(dstport) != sav->remote_ike_port) continue; - + if (sah->saidx.mode == IPSEC_MODE_TRANSPORT && ((sav->flags & SADB_X_EXT_NATT_MULTIPLEUSERS) != 0) && ntohs(dstport) != sav->remote_ike_port) continue; /* skip this one - not a match - or not UDP */ - + if ((sah->saidx.mode == IPSEC_MODE_TUNNEL && ((sav->flags & SADB_X_EXT_NATT) != 0)) || (sah->saidx.mode == IPSEC_MODE_TRANSPORT && ((sav->flags & SADB_X_EXT_NATT_MULTIPLEUSERS) != 0))) { - if (natt_candidate == NULL) { - natt_candidate = sav; - continue; - } else - candidate = natt_candidate; - } else { - if (no_natt_candidate == NULL) { - no_natt_candidate = sav; - continue; - } else - candidate = no_natt_candidate; - } - + if (natt_candidate == NULL) { + natt_candidate = sav; + continue; + } else + candidate = natt_candidate; + } else { + if (no_natt_candidate == NULL) { + no_natt_candidate = sav; + continue; + } else + candidate = no_natt_candidate; + } + /* Which SA is the better ? */ - + /* sanity check 2 */ if (candidate->lft_c == NULL || sav->lft_c == NULL) panic("key_do_allocsa_policy: " - "lifetime_current is NULL.\n"); - + "lifetime_current is NULL.\n"); + /* What the best method is to compare ? */ if (key_preferred_oldsa) { if (candidate->lft_c->sadb_lifetime_addtime > - sav->lft_c->sadb_lifetime_addtime) { + sav->lft_c->sadb_lifetime_addtime) { if ((sav->flags & SADB_X_EXT_NATT_MULTIPLEUSERS) != 0) natt_candidate = sav; else no_natt_candidate = sav; - } + } continue; /*NOTREACHED*/ } - + /* prefered new sa rather than old sa */ if (candidate->lft_c->sadb_lifetime_addtime < - sav->lft_c->sadb_lifetime_addtime) { + sav->lft_c->sadb_lifetime_addtime) { d = candidate; if ((sav->flags & SADB_X_EXT_NATT_MULTIPLEUSERS) != 0) natt_candidate = sav; @@ -1006,68 +1114,17 @@ key_do_allocsa_policy( no_natt_candidate = sav; } else d = sav; - + /* * prepared to delete the SA when there is more * suitable candidate and the lifetime of the SA is not * permanent. */ if (d->lft_c->sadb_lifetime_addtime != 0) { - struct mbuf *m, *result; - - key_sa_chgstate(d, SADB_SASTATE_DEAD); - - m = key_setsadbmsg(SADB_DELETE, 0, - d->sah->saidx.proto, 0, 0, d->refcnt - 1); - if (!m) - goto msgfail; - result = m; - - /* set sadb_address for saidx's. */ - m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, - (struct sockaddr *)&d->sah->saidx.src, - d->sah->saidx.src.ss_len << 3, - IPSEC_ULPROTO_ANY); - if (!m) - goto msgfail; - m_cat(result, m); - - /* set sadb_address for saidx's. */ - m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, - (struct sockaddr *)&d->sah->saidx.src, - d->sah->saidx.src.ss_len << 3, - IPSEC_ULPROTO_ANY); - if (!m) - goto msgfail; - m_cat(result, m); - - /* create SA extension */ - m = key_setsadbsa(d); - if (!m) - goto msgfail; - m_cat(result, m); - - if (result->m_len < sizeof(struct sadb_msg)) { - result = m_pullup(result, - sizeof(struct sadb_msg)); - if (result == NULL) - goto msgfail; - } - - result->m_pkthdr.len = 0; - for (m = result; m; m = m->m_next) - result->m_pkthdr.len += m->m_len; - mtod(result, struct sadb_msg *)->sadb_msg_len = - PFKEY_UNIT64(result->m_pkthdr.len); - - if (key_sendup_mbuf(NULL, result, - KEY_SENDUP_REGISTERED)) - goto msgfail; - msgfail: - key_freesav(d, KEY_SADB_LOCKED); + key_send_delete(d); } } - + /* choose latest if both types present */ if (natt_candidate == NULL) candidate = no_natt_candidate; @@ -1076,17 +1133,17 @@ key_do_allocsa_policy( else if (sah->saidx.mode == IPSEC_MODE_TUNNEL && dstport) candidate = natt_candidate; else if (natt_candidate->lft_c->sadb_lifetime_addtime > - no_natt_candidate->lft_c->sadb_lifetime_addtime) + no_natt_candidate->lft_c->sadb_lifetime_addtime) candidate = natt_candidate; else candidate = no_natt_candidate; - + if (candidate) { candidate->refcnt++; KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP allocsa_policy cause " - "refcnt++:%d SA:%p\n", - candidate->refcnt, candidate)); + printf("DP allocsa_policy cause " + "refcnt++:%d SA:0x%llx\n", candidate->refcnt, + (uint64_t)VM_KERNEL_ADDRPERM(candidate))); } return candidate; } @@ -1108,11 +1165,11 @@ key_do_allocsa_policy( */ struct secasvar * key_allocsa( - u_int family, - caddr_t src, - caddr_t dst, - u_int proto, - u_int32_t spi) + u_int family, + caddr_t src, + caddr_t dst, + u_int proto, + u_int32_t spi) { struct secasvar *sav, *match; u_int stateidx, state, tmpidx, matchidx; @@ -1122,11 +1179,11 @@ key_allocsa( int arraysize; lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); - + /* sanity check */ if (src == NULL || dst == NULL) panic("key_allocsa: NULL pointer is passed.\n"); - + /* * when both systems employ similar strategy to use a SA. * the search order is important even in the inbound case. @@ -1138,7 +1195,7 @@ key_allocsa( saorder_state_valid = saorder_state_valid_prefer_new; arraysize = _ARRAYLEN(saorder_state_valid_prefer_new); } - + /* * searching SAD. * XXX: to be checked internal IP header somewhere. Also when @@ -1170,103 +1227,103 @@ key_allocsa( } if (tmpidx >= matchidx) continue; - + #if 0 /* don't check src */ /* check src address */ switch (family) { - case AF_INET: - bzero(&sin, sizeof(sin)); - sin.sin_family = AF_INET; - sin.sin_len = sizeof(sin); - bcopy(src, &sin.sin_addr, - sizeof(sin.sin_addr)); - if (key_sockaddrcmp((struct sockaddr*)&sin, - (struct sockaddr *)&sav->sah->saidx.src, 0) != 0) - continue; - break; - case AF_INET6: - bzero(&sin6, sizeof(sin6)); - sin6.sin6_family = AF_INET6; - sin6.sin6_len = sizeof(sin6); - bcopy(src, &sin6.sin6_addr, - sizeof(sin6.sin6_addr)); - if (IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr)) { - /* kame fake scopeid */ - sin6.sin6_scope_id = - ntohs(sin6.sin6_addr.s6_addr16[1]); - sin6.sin6_addr.s6_addr16[1] = 0; - } - if (key_sockaddrcmp((struct sockaddr*)&sin6, - (struct sockaddr *)&sav->sah->saidx.src, 0) != 0) + case AF_INET: + bzero(&sin, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(sin); + bcopy(src, &sin.sin_addr, + sizeof(sin.sin_addr)); + if (key_sockaddrcmp((struct sockaddr*)&sin, + (struct sockaddr *)&sav->sah->saidx.src, 0) != 0) + continue; + break; + case AF_INET6: + bzero(&sin6, sizeof(sin6)); + sin6.sin6_family = AF_INET6; + sin6.sin6_len = sizeof(sin6); + bcopy(src, &sin6.sin6_addr, + sizeof(sin6.sin6_addr)); + if (IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr)) { + /* kame fake scopeid */ + sin6.sin6_scope_id = + ntohs(sin6.sin6_addr.s6_addr16[1]); + sin6.sin6_addr.s6_addr16[1] = 0; + } + if (key_sockaddrcmp((struct sockaddr*)&sin6, + (struct sockaddr *)&sav->sah->saidx.src, 0) != 0) + continue; + break; + default: + ipseclog((LOG_DEBUG, "key_allocsa: " + "unknown address family=%d.\n", + family)); continue; - break; - default: - ipseclog((LOG_DEBUG, "key_allocsa: " - "unknown address family=%d.\n", - family)); - continue; } - + #endif /* check dst address */ switch (family) { - case AF_INET: - bzero(&sin, sizeof(sin)); - sin.sin_family = AF_INET; - sin.sin_len = sizeof(sin); - bcopy(dst, &sin.sin_addr, - sizeof(sin.sin_addr)); - if (key_sockaddrcmp((struct sockaddr*)&sin, - (struct sockaddr *)&sav->sah->saidx.dst, 0) != 0) - continue; - - break; - case AF_INET6: - bzero(&sin6, sizeof(sin6)); - sin6.sin6_family = AF_INET6; - sin6.sin6_len = sizeof(sin6); - bcopy(dst, &sin6.sin6_addr, - sizeof(sin6.sin6_addr)); - if (IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr)) { - /* kame fake scopeid */ - sin6.sin6_scope_id = - ntohs(sin6.sin6_addr.s6_addr16[1]); - sin6.sin6_addr.s6_addr16[1] = 0; - } - if (key_sockaddrcmp((struct sockaddr*)&sin6, - (struct sockaddr *)&sav->sah->saidx.dst, 0) != 0) + case AF_INET: + bzero(&sin, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(sin); + bcopy(dst, &sin.sin_addr, + sizeof(sin.sin_addr)); + if (key_sockaddrcmp((struct sockaddr*)&sin, + (struct sockaddr *)&sav->sah->saidx.dst, 0) != 0) + continue; + + break; + case AF_INET6: + bzero(&sin6, sizeof(sin6)); + sin6.sin6_family = AF_INET6; + sin6.sin6_len = sizeof(sin6); + bcopy(dst, &sin6.sin6_addr, + sizeof(sin6.sin6_addr)); + if (IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr)) { + /* kame fake scopeid */ + sin6.sin6_scope_id = + ntohs(sin6.sin6_addr.s6_addr16[1]); + sin6.sin6_addr.s6_addr16[1] = 0; + } + if (key_sockaddrcmp((struct sockaddr*)&sin6, + (struct sockaddr *)&sav->sah->saidx.dst, 0) != 0) + continue; + break; + default: + ipseclog((LOG_DEBUG, "key_allocsa: " + "unknown address family=%d.\n", family)); continue; - break; - default: - ipseclog((LOG_DEBUG, "key_allocsa: " - "unknown address family=%d.\n", family)); - continue; } - + match = sav; matchidx = tmpidx; } if (match) goto found; - + /* not found */ lck_mtx_unlock(sadb_mutex); return NULL; - + found: match->refcnt++; lck_mtx_unlock(sadb_mutex); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP allocsa cause refcnt++:%d SA:%p\n", - match->refcnt, match)); + printf("DP allocsa cause refcnt++:%d SA:0x%llx\n", + match->refcnt, (uint64_t)VM_KERNEL_ADDRPERM(match))); return match; } u_int16_t key_natt_get_translated_port( - struct secasvar *outsav) + struct secasvar *outsav) { - + struct secasindex saidx; struct secashead *sah; u_int stateidx, state; @@ -1289,9 +1346,9 @@ key_natt_get_translated_port( } lck_mtx_unlock(sadb_mutex); return 0; - + found: - /* + /* * Found sah - now go thru list of SAs and find * matching remote ike port. If found - set * sav->natt_encapsulated_src_port and return the port. @@ -1307,7 +1364,7 @@ found: saorder_state_valid = saorder_state_valid_prefer_new; arraysize = _ARRAYLEN(saorder_state_valid_prefer_new); } - + for (stateidx = 0; stateidx < arraysize; stateidx++) { state = saorder_state_valid[stateidx]; if (key_do_get_translated_port(sah, outsav, state)) { @@ -1321,64 +1378,64 @@ found: static int key_do_get_translated_port( - struct secashead *sah, - struct secasvar *outsav, - u_int state) + struct secashead *sah, + struct secasvar *outsav, + u_int state) { struct secasvar *currsav, *nextsav, *candidate; - - + + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); /* initilize */ candidate = NULL; - + for (currsav = LIST_FIRST(&sah->savtree[state]); currsav != NULL; currsav = nextsav) { - + nextsav = LIST_NEXT(currsav, chain); - + /* sanity check */ KEY_CHKSASTATE(currsav->state, state, "key_do_get_translated_port"); if ((currsav->flags & SADB_X_EXT_NATT_MULTIPLEUSERS) == 0 || currsav->remote_ike_port != outsav->remote_ike_port) continue; - + if (candidate == NULL) { candidate = currsav; continue; } /* Which SA is the better ? */ - + /* sanity check 2 */ if (candidate->lft_c == NULL || currsav->lft_c == NULL) panic("key_do_get_translated_port: " - "lifetime_current is NULL.\n"); - + "lifetime_current is NULL.\n"); + /* What the best method is to compare ? */ if (key_preferred_oldsa) { if (candidate->lft_c->sadb_lifetime_addtime > - currsav->lft_c->sadb_lifetime_addtime) { + currsav->lft_c->sadb_lifetime_addtime) { candidate = currsav; } continue; /*NOTREACHED*/ } - + /* prefered new sa rather than old sa */ if (candidate->lft_c->sadb_lifetime_addtime < - currsav->lft_c->sadb_lifetime_addtime) + currsav->lft_c->sadb_lifetime_addtime) candidate = currsav; } - - if (candidate) { + + if (candidate) { outsav->natt_encapsulated_src_port = candidate->natt_encapsulated_src_port; return 1; } - + return 0; } @@ -1388,10 +1445,10 @@ key_do_get_translated_port( */ void key_freesp( - struct secpolicy *sp, - int locked) + struct secpolicy *sp, + int locked) { - + /* sanity check */ if (sp == NULL) panic("key_freesp: NULL pointer is passed.\n"); @@ -1402,9 +1459,9 @@ key_freesp( lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); sp->refcnt--; KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP freesp cause refcnt--:%d SP:%p\n", - sp->refcnt, sp)); - + printf("DP freesp cause refcnt--:%d SP:0x%llx\n", + sp->refcnt, (uint64_t)VM_KERNEL_ADDRPERM(sp))); + if (sp->refcnt == 0) key_delsp(sp); if (!locked) @@ -1421,55 +1478,55 @@ static void key_freesp_so(struct secpolicy **); */ void key_freeso( - struct socket *so) + struct socket *so) { /* sanity check */ if (so == NULL) panic("key_freeso: NULL pointer is passed.\n"); - + lck_mtx_lock(sadb_mutex); - switch (so->so_proto->pr_domain->dom_family) { + switch (SOCK_DOM(so)) { #if INET - case PF_INET: + case PF_INET: { - struct inpcb *pcb = sotoinpcb(so); - - /* Does it have a PCB ? */ - if (pcb == NULL || pcb->inp_sp == NULL) - goto done; - key_freesp_so(&pcb->inp_sp->sp_in); - key_freesp_so(&pcb->inp_sp->sp_out); + struct inpcb *pcb = sotoinpcb(so); + + /* Does it have a PCB ? */ + if (pcb == NULL || pcb->inp_sp == NULL) + goto done; + key_freesp_so(&pcb->inp_sp->sp_in); + key_freesp_so(&pcb->inp_sp->sp_out); } - break; + break; #endif #if INET6 - case PF_INET6: + case PF_INET6: { #if HAVE_NRL_INPCB - struct inpcb *pcb = sotoinpcb(so); - - /* Does it have a PCB ? */ - if (pcb == NULL || pcb->inp_sp == NULL) - goto done; - key_freesp_so(&pcb->inp_sp->sp_in); - key_freesp_so(&pcb->inp_sp->sp_out); + struct inpcb *pcb = sotoinpcb(so); + + /* Does it have a PCB ? */ + if (pcb == NULL || pcb->inp_sp == NULL) + goto done; + key_freesp_so(&pcb->inp_sp->sp_in); + key_freesp_so(&pcb->inp_sp->sp_out); #else - struct in6pcb *pcb = sotoin6pcb(so); - - /* Does it have a PCB ? */ - if (pcb == NULL || pcb->in6p_sp == NULL) - goto done; - key_freesp_so(&pcb->in6p_sp->sp_in); - key_freesp_so(&pcb->in6p_sp->sp_out); + struct in6pcb *pcb = sotoin6pcb(so); + + /* Does it have a PCB ? */ + if (pcb == NULL || pcb->in6p_sp == NULL) + goto done; + key_freesp_so(&pcb->in6p_sp->sp_in); + key_freesp_so(&pcb->in6p_sp->sp_out); #endif } - break; + break; #endif /* INET6 */ - default: - ipseclog((LOG_DEBUG, "key_freeso: unknown address family=%d.\n", - so->so_proto->pr_domain->dom_family)); - break; + default: + ipseclog((LOG_DEBUG, "key_freeso: unknown address family=%d.\n", + SOCK_DOM(so))); + break; } done: lck_mtx_unlock(sadb_mutex); @@ -1479,29 +1536,30 @@ done: static void key_freesp_so( - struct secpolicy **sp) + struct secpolicy **sp) { - + /* sanity check */ if (sp == NULL || *sp == NULL) panic("key_freesp_so: sp == NULL\n"); - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); switch ((*sp)->policy) { - case IPSEC_POLICY_IPSEC: - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP freeso calls free SP:%p\n", *sp)); - key_freesp(*sp, KEY_SADB_LOCKED); - *sp = NULL; - break; - case IPSEC_POLICY_ENTRUST: - case IPSEC_POLICY_BYPASS: - return; - default: - panic("key_freesp_so: Invalid policy found %d", (*sp)->policy); + case IPSEC_POLICY_IPSEC: + KEYDEBUG(KEYDEBUG_IPSEC_STAMP, + printf("DP freeso calls free SP:0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(*sp))); + key_freesp(*sp, KEY_SADB_LOCKED); + *sp = NULL; + break; + case IPSEC_POLICY_ENTRUST: + case IPSEC_POLICY_BYPASS: + return; + default: + panic("key_freesp_so: Invalid policy found %d", (*sp)->policy); } - + return; } @@ -1514,23 +1572,24 @@ key_freesp_so( */ void key_freesav( - struct secasvar *sav, - int locked) + struct secasvar *sav, + int locked) { - + /* sanity check */ if (sav == NULL) panic("key_freesav: NULL pointer is passed.\n"); - + if (!locked) lck_mtx_lock(sadb_mutex); else lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); sav->refcnt--; KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP freesav cause refcnt--:%d SA:%p SPI %u\n", - sav->refcnt, sav, (u_int32_t)ntohl(sav->spi))); - + printf("DP freesav cause refcnt--:%d SA:0x%llx SPI %u\n", + sav->refcnt, (uint64_t)VM_KERNEL_ADDRPERM(sav), + (u_int32_t)ntohl(sav->spi))); + if (sav->refcnt == 0) key_delsav(sav); if (!locked) @@ -1544,28 +1603,43 @@ key_freesav( */ static void key_delsp( - struct secpolicy *sp) + struct secpolicy *sp) { - + /* sanity check */ if (sp == NULL) panic("key_delsp: NULL pointer is passed.\n"); - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); sp->state = IPSEC_SPSTATE_DEAD; - + if (sp->refcnt > 0) return; /* can't free */ - + /* remove from SP index */ if (__LIST_CHAINED(sp)) { LIST_REMOVE(sp, chain); ipsec_policy_count--; } - + + if (sp->spidx.internal_if) { + ifnet_release(sp->spidx.internal_if); + sp->spidx.internal_if = NULL; + } + + if (sp->ipsec_if) { + ifnet_release(sp->ipsec_if); + sp->ipsec_if = NULL; + } + + if (sp->outgoing_if) { + ifnet_release(sp->outgoing_if); + sp->outgoing_if = NULL; + } + { struct ipsecrequest *isr = sp->req, *nextisr; - + while (isr != NULL) { nextisr = isr->next; KFREE(isr); @@ -1573,7 +1647,7 @@ key_delsp( } } keydb_delsecpolicy(sp); - + return; } @@ -1584,16 +1658,16 @@ key_delsp( */ static struct secpolicy * key_getsp( - struct secpolicyindex *spidx) + struct secpolicyindex *spidx) { struct secpolicy *sp; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - + /* sanity check */ if (spidx == NULL) panic("key_getsp: NULL pointer is passed.\n"); - + LIST_FOREACH(sp, &sptree[spidx->dir], chain) { if (sp->state == IPSEC_SPSTATE_DEAD) continue; @@ -1602,7 +1676,7 @@ key_getsp( return sp; } } - + return NULL; } @@ -1611,14 +1685,28 @@ key_getsp( * OUT: NULL : not found * others : found, pointer to a SP. */ -static struct secpolicy * +struct secpolicy * key_getspbyid( - u_int32_t id) + u_int32_t id) { struct secpolicy *sp; + + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); + + lck_mtx_lock(sadb_mutex); + sp = __key_getspbyid(id); + lck_mtx_unlock(sadb_mutex); + + return sp; +} +static struct secpolicy * +__key_getspbyid(u_int32_t id) +{ + struct secpolicy *sp; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - + LIST_FOREACH(sp, &sptree[IPSEC_DIR_INBOUND], chain) { if (sp->state == IPSEC_SPSTATE_DEAD) continue; @@ -1627,7 +1715,7 @@ key_getspbyid( return sp; } } - + LIST_FOREACH(sp, &sptree[IPSEC_DIR_OUTBOUND], chain) { if (sp->state == IPSEC_SPSTATE_DEAD) continue; @@ -1636,7 +1724,7 @@ key_getspbyid( return sp; } } - + return NULL; } @@ -1649,10 +1737,10 @@ key_newsp(void) newsp = keydb_newsecpolicy(); if (!newsp) return newsp; - + newsp->refcnt = 1; newsp->req = NULL; - + return newsp; } @@ -1663,12 +1751,12 @@ key_newsp(void) */ struct secpolicy * key_msg2sp( - struct sadb_x_policy *xpl0, - size_t len, - int *error) + struct sadb_x_policy *xpl0, + size_t len, + int *error) { struct secpolicy *newsp; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); /* sanity check */ @@ -1681,204 +1769,204 @@ key_msg2sp( *error = EINVAL; return NULL; } - + if ((newsp = key_newsp()) == NULL) { *error = ENOBUFS; return NULL; } - + newsp->spidx.dir = xpl0->sadb_x_policy_dir; newsp->policy = xpl0->sadb_x_policy_type; - + /* check policy */ switch (xpl0->sadb_x_policy_type) { - case IPSEC_POLICY_DISCARD: + case IPSEC_POLICY_DISCARD: case IPSEC_POLICY_GENERATE: - case IPSEC_POLICY_NONE: - case IPSEC_POLICY_ENTRUST: - case IPSEC_POLICY_BYPASS: - newsp->req = NULL; - break; - - case IPSEC_POLICY_IPSEC: + case IPSEC_POLICY_NONE: + case IPSEC_POLICY_ENTRUST: + case IPSEC_POLICY_BYPASS: + newsp->req = NULL; + break; + + case IPSEC_POLICY_IPSEC: { - int tlen; - struct sadb_x_ipsecrequest *xisr; - struct ipsecrequest **p_isr = &newsp->req; - - /* validity check */ - if (PFKEY_EXTLEN(xpl0) < sizeof(*xpl0)) { - ipseclog((LOG_DEBUG, - "key_msg2sp: Invalid msg length.\n")); - key_freesp(newsp, KEY_SADB_UNLOCKED); - *error = EINVAL; - return NULL; - } - - tlen = PFKEY_EXTLEN(xpl0) - sizeof(*xpl0); - xisr = (struct sadb_x_ipsecrequest *)(xpl0 + 1); - - while (tlen > 0) { - - /* length check */ - if (xisr->sadb_x_ipsecrequest_len < sizeof(*xisr)) { - ipseclog((LOG_DEBUG, "key_msg2sp: " - "invalid ipsecrequest length.\n")); - key_freesp(newsp, KEY_SADB_UNLOCKED); - *error = EINVAL; - return NULL; - } - - /* allocate request buffer */ - KMALLOC_WAIT(*p_isr, struct ipsecrequest *, sizeof(**p_isr)); - if ((*p_isr) == NULL) { - ipseclog((LOG_DEBUG, - "key_msg2sp: No more memory.\n")); - key_freesp(newsp, KEY_SADB_UNLOCKED); - *error = ENOBUFS; - return NULL; - } - bzero(*p_isr, sizeof(**p_isr)); - - /* set values */ - (*p_isr)->next = NULL; - - switch (xisr->sadb_x_ipsecrequest_proto) { - case IPPROTO_ESP: - case IPPROTO_AH: - case IPPROTO_IPCOMP: - break; - default: - ipseclog((LOG_DEBUG, - "key_msg2sp: invalid proto type=%u\n", - xisr->sadb_x_ipsecrequest_proto)); - key_freesp(newsp, KEY_SADB_UNLOCKED); - *error = EPROTONOSUPPORT; - return NULL; - } - (*p_isr)->saidx.proto = xisr->sadb_x_ipsecrequest_proto; - - switch (xisr->sadb_x_ipsecrequest_mode) { - case IPSEC_MODE_TRANSPORT: - case IPSEC_MODE_TUNNEL: - break; - case IPSEC_MODE_ANY: - default: + int tlen; + struct sadb_x_ipsecrequest *xisr; + struct ipsecrequest **p_isr = &newsp->req; + + /* validity check */ + if (PFKEY_EXTLEN(xpl0) < sizeof(*xpl0)) { ipseclog((LOG_DEBUG, - "key_msg2sp: invalid mode=%u\n", - xisr->sadb_x_ipsecrequest_mode)); + "key_msg2sp: Invalid msg length.\n")); key_freesp(newsp, KEY_SADB_UNLOCKED); *error = EINVAL; return NULL; } - (*p_isr)->saidx.mode = xisr->sadb_x_ipsecrequest_mode; - - switch (xisr->sadb_x_ipsecrequest_level) { - case IPSEC_LEVEL_DEFAULT: - case IPSEC_LEVEL_USE: - case IPSEC_LEVEL_REQUIRE: - break; - case IPSEC_LEVEL_UNIQUE: - /* validity check */ - /* - * If range violation of reqid, kernel will - * update it, don't refuse it. - */ - if (xisr->sadb_x_ipsecrequest_reqid - > IPSEC_MANUAL_REQID_MAX) { + + tlen = PFKEY_EXTLEN(xpl0) - sizeof(*xpl0); + xisr = (struct sadb_x_ipsecrequest *)(xpl0 + 1); + + while (tlen > 0) { + + /* length check */ + if (xisr->sadb_x_ipsecrequest_len < sizeof(*xisr)) { + ipseclog((LOG_DEBUG, "key_msg2sp: " + "invalid ipsecrequest length.\n")); + key_freesp(newsp, KEY_SADB_UNLOCKED); + *error = EINVAL; + return NULL; + } + + /* allocate request buffer */ + KMALLOC_WAIT(*p_isr, struct ipsecrequest *, sizeof(**p_isr)); + if ((*p_isr) == NULL) { ipseclog((LOG_DEBUG, - "key_msg2sp: reqid=%d range " - "violation, updated by kernel.\n", - xisr->sadb_x_ipsecrequest_reqid)); - xisr->sadb_x_ipsecrequest_reqid = 0; + "key_msg2sp: No more memory.\n")); + key_freesp(newsp, KEY_SADB_UNLOCKED); + *error = ENOBUFS; + return NULL; } - - /* allocate new reqid id if reqid is zero. */ - if (xisr->sadb_x_ipsecrequest_reqid == 0) { - u_int32_t reqid; - if ((reqid = key_newreqid()) == 0) { + bzero(*p_isr, sizeof(**p_isr)); + + /* set values */ + (*p_isr)->next = NULL; + + switch (xisr->sadb_x_ipsecrequest_proto) { + case IPPROTO_ESP: + case IPPROTO_AH: + case IPPROTO_IPCOMP: + break; + default: + ipseclog((LOG_DEBUG, + "key_msg2sp: invalid proto type=%u\n", + xisr->sadb_x_ipsecrequest_proto)); key_freesp(newsp, KEY_SADB_UNLOCKED); - *error = ENOBUFS; + *error = EPROTONOSUPPORT; return NULL; - } - (*p_isr)->saidx.reqid = reqid; - xisr->sadb_x_ipsecrequest_reqid = reqid; - } else { - /* set it for manual keying. */ - (*p_isr)->saidx.reqid = - xisr->sadb_x_ipsecrequest_reqid; } - break; - - default: - ipseclog((LOG_DEBUG, "key_msg2sp: invalid level=%u\n", - xisr->sadb_x_ipsecrequest_level)); - key_freesp(newsp, KEY_SADB_UNLOCKED); - *error = EINVAL; - return NULL; - } - (*p_isr)->level = xisr->sadb_x_ipsecrequest_level; - - /* set IP addresses if there */ - if (xisr->sadb_x_ipsecrequest_len > sizeof(*xisr)) { - struct sockaddr *paddr; - - paddr = (struct sockaddr *)(xisr + 1); - - /* validity check */ - if (paddr->sa_len - > sizeof((*p_isr)->saidx.src)) { - ipseclog((LOG_DEBUG, "key_msg2sp: invalid request " - "address length.\n")); - key_freesp(newsp, KEY_SADB_UNLOCKED); - *error = EINVAL; - return NULL; + (*p_isr)->saidx.proto = xisr->sadb_x_ipsecrequest_proto; + + switch (xisr->sadb_x_ipsecrequest_mode) { + case IPSEC_MODE_TRANSPORT: + case IPSEC_MODE_TUNNEL: + break; + case IPSEC_MODE_ANY: + default: + ipseclog((LOG_DEBUG, + "key_msg2sp: invalid mode=%u\n", + xisr->sadb_x_ipsecrequest_mode)); + key_freesp(newsp, KEY_SADB_UNLOCKED); + *error = EINVAL; + return NULL; } - bcopy(paddr, &(*p_isr)->saidx.src, - paddr->sa_len); - - paddr = (struct sockaddr *)((caddr_t)paddr - + paddr->sa_len); - + (*p_isr)->saidx.mode = xisr->sadb_x_ipsecrequest_mode; + + switch (xisr->sadb_x_ipsecrequest_level) { + case IPSEC_LEVEL_DEFAULT: + case IPSEC_LEVEL_USE: + case IPSEC_LEVEL_REQUIRE: + break; + case IPSEC_LEVEL_UNIQUE: + /* validity check */ + /* + * If range violation of reqid, kernel will + * update it, don't refuse it. + */ + if (xisr->sadb_x_ipsecrequest_reqid + > IPSEC_MANUAL_REQID_MAX) { + ipseclog((LOG_DEBUG, + "key_msg2sp: reqid=%d range " + "violation, updated by kernel.\n", + xisr->sadb_x_ipsecrequest_reqid)); + xisr->sadb_x_ipsecrequest_reqid = 0; + } + + /* allocate new reqid id if reqid is zero. */ + if (xisr->sadb_x_ipsecrequest_reqid == 0) { + u_int32_t reqid; + if ((reqid = key_newreqid()) == 0) { + key_freesp(newsp, KEY_SADB_UNLOCKED); + *error = ENOBUFS; + return NULL; + } + (*p_isr)->saidx.reqid = reqid; + xisr->sadb_x_ipsecrequest_reqid = reqid; + } else { + /* set it for manual keying. */ + (*p_isr)->saidx.reqid = + xisr->sadb_x_ipsecrequest_reqid; + } + break; + + default: + ipseclog((LOG_DEBUG, "key_msg2sp: invalid level=%u\n", + xisr->sadb_x_ipsecrequest_level)); + key_freesp(newsp, KEY_SADB_UNLOCKED); + *error = EINVAL; + return NULL; + } + (*p_isr)->level = xisr->sadb_x_ipsecrequest_level; + + /* set IP addresses if there */ + if (xisr->sadb_x_ipsecrequest_len > sizeof(*xisr)) { + struct sockaddr *paddr; + + paddr = (struct sockaddr *)(xisr + 1); + + /* validity check */ + if (paddr->sa_len + > sizeof((*p_isr)->saidx.src)) { + ipseclog((LOG_DEBUG, "key_msg2sp: invalid request " + "address length.\n")); + key_freesp(newsp, KEY_SADB_UNLOCKED); + *error = EINVAL; + return NULL; + } + bcopy(paddr, &(*p_isr)->saidx.src, + paddr->sa_len); + + paddr = (struct sockaddr *)((caddr_t)paddr + + paddr->sa_len); + + /* validity check */ + if (paddr->sa_len + > sizeof((*p_isr)->saidx.dst)) { + ipseclog((LOG_DEBUG, "key_msg2sp: invalid request " + "address length.\n")); + key_freesp(newsp, KEY_SADB_UNLOCKED); + *error = EINVAL; + return NULL; + } + bcopy(paddr, &(*p_isr)->saidx.dst, + paddr->sa_len); + } + + (*p_isr)->sp = newsp; + + /* initialization for the next. */ + p_isr = &(*p_isr)->next; + tlen -= xisr->sadb_x_ipsecrequest_len; + /* validity check */ - if (paddr->sa_len - > sizeof((*p_isr)->saidx.dst)) { - ipseclog((LOG_DEBUG, "key_msg2sp: invalid request " - "address length.\n")); + if (tlen < 0) { + ipseclog((LOG_DEBUG, "key_msg2sp: becoming tlen < 0.\n")); key_freesp(newsp, KEY_SADB_UNLOCKED); *error = EINVAL; return NULL; } - bcopy(paddr, &(*p_isr)->saidx.dst, - paddr->sa_len); - } - - (*p_isr)->sp = newsp; - - /* initialization for the next. */ - p_isr = &(*p_isr)->next; - tlen -= xisr->sadb_x_ipsecrequest_len; - - /* validity check */ - if (tlen < 0) { - ipseclog((LOG_DEBUG, "key_msg2sp: becoming tlen < 0.\n")); - key_freesp(newsp, KEY_SADB_UNLOCKED); - *error = EINVAL; - return NULL; - } - - xisr = (struct sadb_x_ipsecrequest *)(void *) + + xisr = (struct sadb_x_ipsecrequest *)(void *) ((caddr_t)xisr + xisr->sadb_x_ipsecrequest_len); - } + } } - break; - default: - ipseclog((LOG_DEBUG, "key_msg2sp: invalid policy type.\n")); - key_freesp(newsp, KEY_SADB_UNLOCKED); - *error = EINVAL; - return NULL; + break; + default: + ipseclog((LOG_DEBUG, "key_msg2sp: invalid policy type.\n")); + key_freesp(newsp, KEY_SADB_UNLOCKED); + *error = EINVAL; + return NULL; } - + *error = 0; return newsp; } @@ -1889,20 +1977,20 @@ key_newreqid(void) lck_mtx_lock(sadb_mutex); static u_int32_t auto_reqid = IPSEC_MANUAL_REQID_MAX + 1; int done = 0; - + /* The reqid must be limited to 16 bits because the PF_KEY message format only uses - 16 bits for this field. Once it becomes larger than 16 bits - ipsec fails to - work anymore. Changing the PF_KEY message format would introduce compatibility - issues. This code now tests to see if the tentative reqid is in use */ - + 16 bits for this field. Once it becomes larger than 16 bits - ipsec fails to + work anymore. Changing the PF_KEY message format would introduce compatibility + issues. This code now tests to see if the tentative reqid is in use */ + while (!done) { struct secpolicy *sp; - struct ipsecrequest *isr; + struct ipsecrequest *isr; int dir; - + auto_reqid = (auto_reqid == 0xFFFF - ? IPSEC_MANUAL_REQID_MAX + 1 : auto_reqid + 1); - + ? IPSEC_MANUAL_REQID_MAX + 1 : auto_reqid + 1); + /* check for uniqueness */ done = 1; for (dir = 0; dir < IPSEC_DIR_MAX; dir++) { @@ -1918,9 +2006,9 @@ key_newreqid(void) } if (done == 0) break; - } + } } - + lck_mtx_unlock(sadb_mutex); return auto_reqid; } @@ -1930,81 +2018,81 @@ key_newreqid(void) */ struct mbuf * key_sp2msg( - struct secpolicy *sp) + struct secpolicy *sp) { struct sadb_x_policy *xpl; int tlen; caddr_t p; struct mbuf *m; - + /* sanity check. */ if (sp == NULL) panic("key_sp2msg: NULL pointer was passed.\n"); - + tlen = key_getspreqmsglen(sp); - + m = key_alloc_mbuf(tlen); if (!m || m->m_next) { /*XXX*/ if (m) m_freem(m); return NULL; } - + m->m_len = tlen; m->m_next = NULL; xpl = mtod(m, struct sadb_x_policy *); bzero(xpl, tlen); - + xpl->sadb_x_policy_len = PFKEY_UNIT64(tlen); xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY; xpl->sadb_x_policy_type = sp->policy; xpl->sadb_x_policy_dir = sp->spidx.dir; xpl->sadb_x_policy_id = sp->id; p = (caddr_t)xpl + sizeof(*xpl); - + /* if is the policy for ipsec ? */ if (sp->policy == IPSEC_POLICY_IPSEC) { struct sadb_x_ipsecrequest *xisr; struct ipsecrequest *isr; - + for (isr = sp->req; isr != NULL; isr = isr->next) { - + xisr = (struct sadb_x_ipsecrequest *)(void *)p; - + xisr->sadb_x_ipsecrequest_proto = isr->saidx.proto; xisr->sadb_x_ipsecrequest_mode = isr->saidx.mode; xisr->sadb_x_ipsecrequest_level = isr->level; xisr->sadb_x_ipsecrequest_reqid = isr->saidx.reqid; - + p += sizeof(*xisr); bcopy(&isr->saidx.src, p, isr->saidx.src.ss_len); p += isr->saidx.src.ss_len; bcopy(&isr->saidx.dst, p, isr->saidx.dst.ss_len); p += isr->saidx.src.ss_len; - + xisr->sadb_x_ipsecrequest_len = - PFKEY_ALIGN8(sizeof(*xisr) - + isr->saidx.src.ss_len - + isr->saidx.dst.ss_len); + PFKEY_ALIGN8(sizeof(*xisr) + + isr->saidx.src.ss_len + + isr->saidx.dst.ss_len); } } - + return m; } /* m will not be freed nor modified */ static struct mbuf * key_gather_mbuf(struct mbuf *m, const struct sadb_msghdr *mhp, - int ndeep, int nitem, int *items) + int ndeep, int nitem, int *items) { int idx; int i; struct mbuf *result = NULL, *n; int len; - + if (m == NULL || mhp == NULL) panic("null pointer passed to key_gather"); - + for (i = 0; i < nitem; i++) { idx = items[i]; if (idx < 0 || idx > SADB_EXT_MAX) @@ -2015,20 +2103,16 @@ key_gather_mbuf(struct mbuf *m, const struct sadb_msghdr *mhp, if (idx != SADB_EXT_RESERVED && (mhp->ext[idx] == NULL || mhp->extlen[idx] == 0)) continue; - + if (idx == SADB_EXT_RESERVED) { len = PFKEY_ALIGN8(sizeof(struct sadb_msg)); -#if DIAGNOSTIC - if (len > MHLEN) - panic("assumption failed"); -#endif - MGETHDR(n, M_WAITOK, MT_DATA); + MGETHDR(n, M_WAITOK, MT_DATA); // sadb_msg len < MHLEN - enforced by _CASSERT if (!n) goto fail; n->m_len = len; n->m_next = NULL; m_copydata(m, 0, sizeof(struct sadb_msg), - mtod(n, caddr_t)); + mtod(n, caddr_t)); } else if (i < ndeep) { len = mhp->extlen[idx]; n = key_alloc_mbuf(len); @@ -2038,28 +2122,28 @@ key_gather_mbuf(struct mbuf *m, const struct sadb_msghdr *mhp, goto fail; } m_copydata(m, mhp->extoff[idx], mhp->extlen[idx], - mtod(n, caddr_t)); + mtod(n, caddr_t)); } else { n = m_copym(m, mhp->extoff[idx], mhp->extlen[idx], - M_WAITOK); + M_WAITOK); } if (n == NULL) goto fail; - + if (result) m_cat(result, n); else result = n; } - + if ((result->m_flags & M_PKTHDR) != 0) { result->m_pkthdr.len = 0; for (n = result; n; n = n->m_next) result->m_pkthdr.len += n->m_len; } - + return result; - + fail: m_freem(result); return NULL; @@ -2083,32 +2167,51 @@ fail: */ static int key_spdadd( - struct socket *so, - struct mbuf *m, - const struct sadb_msghdr *mhp) + struct socket *so, + struct mbuf *m, + const struct sadb_msghdr *mhp) { - struct sadb_address *src0, *dst0; + struct sadb_address *src0, *dst0, *src1, *dst1; struct sadb_x_policy *xpl0, *xpl; struct sadb_lifetime *lft = NULL; struct secpolicyindex spidx; struct secpolicy *newsp; struct timeval tv; + ifnet_t internal_if = NULL; + char *outgoing_if = NULL; + char *ipsec_if = NULL; + struct sadb_x_ipsecif *ipsecifopts = NULL; int error; - + int use_src_range = 0; + int use_dst_range = 0; + int init_disabled = 0; + int address_family, address_len; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_spdadd: NULL pointer is passed.\n"); - - if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || + + if (mhp->ext[SADB_X_EXT_ADDR_RANGE_SRC_START] != NULL && mhp->ext[SADB_X_EXT_ADDR_RANGE_SRC_END] != NULL) { + use_src_range = 1; + } + if (mhp->ext[SADB_X_EXT_ADDR_RANGE_DST_START] != NULL && mhp->ext[SADB_X_EXT_ADDR_RANGE_DST_END] != NULL) { + use_dst_range = 1; + } + + if ((!use_src_range && mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL) || + (!use_dst_range && mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) || mhp->ext[SADB_X_EXT_POLICY] == NULL) { ipseclog((LOG_DEBUG, "key_spdadd: invalid message is passed.\n")); return key_senderror(so, m, EINVAL); } - if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address) || + if ((use_src_range && (mhp->extlen[SADB_X_EXT_ADDR_RANGE_SRC_START] < sizeof(struct sadb_address) + || mhp->extlen[SADB_X_EXT_ADDR_RANGE_SRC_END] < sizeof(struct sadb_address))) || + (!use_src_range && mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address)) || + (use_dst_range && (mhp->extlen[SADB_X_EXT_ADDR_RANGE_DST_START] < sizeof(struct sadb_address) + || mhp->extlen[SADB_X_EXT_ADDR_RANGE_DST_END] < sizeof(struct sadb_address))) || + (!use_dst_range && mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) || mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) { ipseclog((LOG_DEBUG, "key_spdadd: invalid message is passed.\n")); return key_senderror(so, m, EINVAL); @@ -2120,50 +2223,106 @@ key_spdadd( return key_senderror(so, m, EINVAL); } lft = (struct sadb_lifetime *) - (void *)mhp->ext[SADB_EXT_LIFETIME_HARD]; + (void *)mhp->ext[SADB_EXT_LIFETIME_HARD]; } - - src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; - dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; + if (mhp->ext[SADB_X_EXT_IPSECIF] != NULL) { + if (mhp->extlen[SADB_X_EXT_IPSECIF] < sizeof(struct sadb_x_ipsecif)) { + ipseclog((LOG_DEBUG, "key_spdadd: invalid message is passed.\n")); + return key_senderror(so, m, EINVAL); + } + } + + if (use_src_range) { + src0 = (struct sadb_address *)mhp->ext[SADB_X_EXT_ADDR_RANGE_SRC_START]; + src1 = (struct sadb_address *)mhp->ext[SADB_X_EXT_ADDR_RANGE_SRC_END]; + } else { + src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; + } + if (use_dst_range) { + dst0 = (struct sadb_address *)mhp->ext[SADB_X_EXT_ADDR_RANGE_DST_START]; + dst1 = (struct sadb_address *)mhp->ext[SADB_X_EXT_ADDR_RANGE_DST_END]; + } else { + dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; + } xpl0 = (struct sadb_x_policy *)(void *)mhp->ext[SADB_X_EXT_POLICY]; - - /* make secindex */ - /* XXX boundary check against sa_len */ - KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir, - src0 + 1, - dst0 + 1, - src0->sadb_address_prefixlen, - dst0->sadb_address_prefixlen, - src0->sadb_address_proto, - &spidx); - - /* checking the direciton. */ + ipsecifopts = (struct sadb_x_ipsecif *)(void *)mhp->ext[SADB_X_EXT_IPSECIF]; + + /* check addresses */ + address_family = ((struct sockaddr *)(src0 + 1))->sa_family; + address_len = ((struct sockaddr *)(src0 + 1))->sa_len; + if (use_src_range) { + if (((struct sockaddr *)(src1+ 1))->sa_family != address_family || + ((struct sockaddr *)(src1+ 1))->sa_len != address_len) { + return key_senderror(so, m, EINVAL); + } + } + if (((struct sockaddr *)(dst0+ 1))->sa_family != address_family || + ((struct sockaddr *)(dst0+ 1))->sa_len != address_len) { + return key_senderror(so, m, EINVAL); + } + if (use_dst_range) { + if (((struct sockaddr *)(dst1+ 1))->sa_family != address_family || + ((struct sockaddr *)(dst1+ 1))->sa_len != address_len) { + return key_senderror(so, m, EINVAL); + } + } + + /* checking the direciton. */ switch (xpl0->sadb_x_policy_dir) { - case IPSEC_DIR_INBOUND: - case IPSEC_DIR_OUTBOUND: - break; - default: - ipseclog((LOG_DEBUG, "key_spdadd: Invalid SP direction.\n")); - mhp->msg->sadb_msg_errno = EINVAL; - return 0; - } - - /* check policy */ + case IPSEC_DIR_INBOUND: + case IPSEC_DIR_OUTBOUND: + break; + default: + ipseclog((LOG_DEBUG, "key_spdadd: Invalid SP direction.\n")); + mhp->msg->sadb_msg_errno = EINVAL; + return 0; + } + + /* check policy */ /* key_spdadd() accepts DISCARD, NONE and IPSEC. */ if (xpl0->sadb_x_policy_type == IPSEC_POLICY_ENTRUST - || xpl0->sadb_x_policy_type == IPSEC_POLICY_BYPASS) { + || xpl0->sadb_x_policy_type == IPSEC_POLICY_BYPASS) { ipseclog((LOG_DEBUG, "key_spdadd: Invalid policy type.\n")); return key_senderror(so, m, EINVAL); } - + /* policy requests are mandatory when action is ipsec. */ - if (mhp->msg->sadb_msg_type != SADB_X_SPDSETIDX - && xpl0->sadb_x_policy_type == IPSEC_POLICY_IPSEC - && mhp->extlen[SADB_X_EXT_POLICY] <= sizeof(*xpl0)) { + if (mhp->msg->sadb_msg_type != SADB_X_SPDSETIDX + && xpl0->sadb_x_policy_type == IPSEC_POLICY_IPSEC + && mhp->extlen[SADB_X_EXT_POLICY] <= sizeof(*xpl0)) { ipseclog((LOG_DEBUG, "key_spdadd: some policy requests part required.\n")); return key_senderror(so, m, EINVAL); } - + + /* Process interfaces */ + if (ipsecifopts != NULL) { + if (ipsecifopts->sadb_x_ipsecif_internal_if) { + ifnet_find_by_name(ipsecifopts->sadb_x_ipsecif_internal_if, &internal_if); + } + if (ipsecifopts->sadb_x_ipsecif_outgoing_if) { + outgoing_if = ipsecifopts->sadb_x_ipsecif_outgoing_if; + } + if (ipsecifopts->sadb_x_ipsecif_ipsec_if) { + ipsec_if = ipsecifopts->sadb_x_ipsecif_ipsec_if; + } + init_disabled = ipsecifopts->sadb_x_ipsecif_init_disabled; + } + + /* make secindex */ + /* XXX boundary check against sa_len */ + KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir, + src0 + 1, + dst0 + 1, + src0->sadb_address_prefixlen, + dst0->sadb_address_prefixlen, + src0->sadb_address_proto, + internal_if, + use_src_range ? src0 + 1 : NULL, + use_src_range ? src1 + 1 : NULL, + use_dst_range ? dst0 + 1 : NULL, + use_dst_range ? dst1 + 1 : NULL, + &spidx); + /* * checking there is SP already or not. * SPDUPDATE doesn't depend on whether there is a SP or not. @@ -2182,20 +2341,33 @@ key_spdadd( key_freesp(newsp, KEY_SADB_LOCKED); ipseclog((LOG_DEBUG, "key_spdadd: a SP entry exists already.\n")); lck_mtx_unlock(sadb_mutex); + if (internal_if) { + ifnet_release(internal_if); + internal_if = NULL; + } return key_senderror(so, m, EEXIST); } } lck_mtx_unlock(sadb_mutex); + /* allocation new SP entry */ if ((newsp = key_msg2sp(xpl0, PFKEY_EXTLEN(xpl0), &error)) == NULL) { + if (internal_if) { + ifnet_release(internal_if); + internal_if = NULL; + } return key_senderror(so, m, error); } - + if ((newsp->id = key_getnewspid()) == 0) { keydb_delsecpolicy(newsp); + if (internal_if) { + ifnet_release(internal_if); + internal_if = NULL; + } return key_senderror(so, m, ENOBUFS); } - + /* XXX boundary check against sa_len */ KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir, src0 + 1, @@ -2203,23 +2375,17 @@ key_spdadd( src0->sadb_address_prefixlen, dst0->sadb_address_prefixlen, src0->sadb_address_proto, + internal_if, + use_src_range ? src0 + 1 : NULL, + use_src_range ? src1 + 1 : NULL, + use_dst_range ? dst0 + 1 : NULL, + use_dst_range ? dst1 + 1 : NULL, &newsp->spidx); - - /* sanity check on addr pair */ - if (((struct sockaddr *)(src0 + 1))->sa_family != - ((struct sockaddr *)(dst0+ 1))->sa_family) { - keydb_delsecpolicy(newsp); - return key_senderror(so, m, EINVAL); - } - if (((struct sockaddr *)(src0 + 1))->sa_len != - ((struct sockaddr *)(dst0+ 1))->sa_len) { - keydb_delsecpolicy(newsp); - return key_senderror(so, m, EINVAL); - } + #if 1 - /* - * allow IPv6 over IPv4 tunnels using ESP - - * otherwise reject if inner and outer address families not equal + /* + * allow IPv6 over IPv4 tunnels using ESP - + * otherwise reject if inner and outer address families not equal */ if (newsp->req && newsp->req->saidx.src.ss_family) { struct sockaddr *sa; @@ -2228,6 +2394,10 @@ key_spdadd( if (newsp->req->saidx.mode != IPSEC_MODE_TUNNEL || newsp->req->saidx.proto != IPPROTO_ESP || sa->sa_family != AF_INET6 || newsp->req->saidx.src.ss_family != AF_INET) { keydb_delsecpolicy(newsp); + if (internal_if) { + ifnet_release(internal_if); + internal_if = NULL; + } return key_senderror(so, m, EINVAL); } } @@ -2239,44 +2409,60 @@ key_spdadd( if (newsp->req->saidx.mode != IPSEC_MODE_TUNNEL || newsp->req->saidx.proto != IPPROTO_ESP || sa->sa_family != AF_INET6 || newsp->req->saidx.dst.ss_family != AF_INET) { keydb_delsecpolicy(newsp); + if (internal_if) { + ifnet_release(internal_if); + internal_if = NULL; + } return key_senderror(so, m, EINVAL); } } } #endif - + microtime(&tv); newsp->created = tv.tv_sec; newsp->lastused = tv.tv_sec; newsp->lifetime = lft ? lft->sadb_lifetime_addtime : 0; newsp->validtime = lft ? lft->sadb_lifetime_usetime : 0; - + + if (outgoing_if != NULL) { + ifnet_find_by_name(outgoing_if, &newsp->outgoing_if); + } + if (ipsec_if != NULL) { + ifnet_find_by_name(ipsec_if, &newsp->ipsec_if); + } + if (init_disabled > 0) { + newsp->disabled = 1; + } + newsp->refcnt = 1; /* do not reclaim until I say I do */ newsp->state = IPSEC_SPSTATE_ALIVE; lck_mtx_lock(sadb_mutex); /* * policies of type generate should be at the end of the SPD * because they function as default discard policies - */ - if (newsp->policy == IPSEC_POLICY_GENERATE) + * Don't start timehandler for generate policies + */ + if (newsp->policy == IPSEC_POLICY_GENERATE) LIST_INSERT_TAIL(&sptree[newsp->spidx.dir], newsp, secpolicy, chain); else { /* XXX until we have policy ordering in the kernel */ struct secpolicy *tmpsp; - + LIST_FOREACH(tmpsp, &sptree[newsp->spidx.dir], chain) - if (tmpsp->policy == IPSEC_POLICY_GENERATE) - break; + if (tmpsp->policy == IPSEC_POLICY_GENERATE) + break; if (tmpsp) LIST_INSERT_BEFORE(tmpsp, newsp, chain); else LIST_INSERT_TAIL(&sptree[newsp->spidx.dir], newsp, secpolicy, chain); + key_start_timehandler(); } - + ipsec_policy_count++; /* Turn off the ipsec bypass */ if (ipsec_bypass != 0) ipsec_bypass = 0; - + /* delete the entry in spacqtree */ if (mhp->msg->sadb_msg_type == SADB_X_SPDUPDATE) { struct secspacq *spacq; @@ -2288,51 +2474,54 @@ key_spdadd( } } lck_mtx_unlock(sadb_mutex); - + { - struct mbuf *n, *mpolicy; - struct sadb_msg *newmsg; - int off; - - /* create new sadb_msg to reply. */ - if (lft) { - int mbufItems[] = {SADB_EXT_RESERVED, SADB_X_EXT_POLICY, - SADB_EXT_LIFETIME_HARD, SADB_EXT_ADDRESS_SRC, - SADB_EXT_ADDRESS_DST}; - n = key_gather_mbuf(m, mhp, 2, sizeof(mbufItems)/sizeof(int), mbufItems); - } else { - int mbufItems[] = {SADB_EXT_RESERVED, SADB_X_EXT_POLICY, - SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST}; - n = key_gather_mbuf(m, mhp, 2, sizeof(mbufItems)/sizeof(int), mbufItems); - } - if (!n) - return key_senderror(so, m, ENOBUFS); - - if (n->m_len < sizeof(*newmsg)) { - n = m_pullup(n, sizeof(*newmsg)); + struct mbuf *n, *mpolicy; + struct sadb_msg *newmsg; + int off; + + /* create new sadb_msg to reply. */ + if (lft) { + int mbufItems[] = {SADB_EXT_RESERVED, SADB_X_EXT_POLICY, + SADB_EXT_LIFETIME_HARD, SADB_EXT_ADDRESS_SRC, + SADB_EXT_ADDRESS_DST, SADB_X_EXT_ADDR_RANGE_SRC_START, SADB_X_EXT_ADDR_RANGE_SRC_END, + SADB_X_EXT_ADDR_RANGE_DST_START, SADB_X_EXT_ADDR_RANGE_DST_END}; + n = key_gather_mbuf(m, mhp, 2, sizeof(mbufItems)/sizeof(int), mbufItems); + } else { + int mbufItems[] = {SADB_EXT_RESERVED, SADB_X_EXT_POLICY, + SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST, + SADB_X_EXT_ADDR_RANGE_SRC_START, SADB_X_EXT_ADDR_RANGE_SRC_END, + SADB_X_EXT_ADDR_RANGE_DST_START, SADB_X_EXT_ADDR_RANGE_DST_END}; + n = key_gather_mbuf(m, mhp, 2, sizeof(mbufItems)/sizeof(int), mbufItems); + } if (!n) return key_senderror(so, m, ENOBUFS); - } - newmsg = mtod(n, struct sadb_msg *); - newmsg->sadb_msg_errno = 0; - newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); - - off = 0; - mpolicy = m_pulldown(n, PFKEY_ALIGN8(sizeof(struct sadb_msg)), - sizeof(*xpl), &off); - if (mpolicy == NULL) { - /* n is already freed */ - return key_senderror(so, m, ENOBUFS); - } - xpl = (struct sadb_x_policy *)(void *)(mtod(mpolicy, caddr_t) + off); - if (xpl->sadb_x_policy_exttype != SADB_X_EXT_POLICY) { - m_freem(n); - return key_senderror(so, m, EINVAL); - } - xpl->sadb_x_policy_id = newsp->id; - - m_freem(m); - return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); + + if (n->m_len < sizeof(*newmsg)) { + n = m_pullup(n, sizeof(*newmsg)); + if (!n) + return key_senderror(so, m, ENOBUFS); + } + newmsg = mtod(n, struct sadb_msg *); + newmsg->sadb_msg_errno = 0; + newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); + + off = 0; + mpolicy = m_pulldown(n, PFKEY_ALIGN8(sizeof(struct sadb_msg)), + sizeof(*xpl), &off); + if (mpolicy == NULL) { + /* n is already freed */ + return key_senderror(so, m, ENOBUFS); + } + xpl = (struct sadb_x_policy *)(void *)(mtod(mpolicy, caddr_t) + off); + if (xpl->sadb_x_policy_exttype != SADB_X_EXT_POLICY) { + m_freem(n); + return key_senderror(so, m, EINVAL); + } + xpl->sadb_x_policy_id = newsp->id; + + m_freem(m); + return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); } } @@ -2353,10 +2542,10 @@ key_getnewspid(void) lck_mtx_lock(sadb_mutex); while (count--) { newid = (policy_id = (policy_id == ~0 ? 1 : policy_id + 1)); - - if ((sp = key_getspbyid(newid)) == NULL) + + if ((sp = __key_getspbyid(newid)) == NULL) break; - + key_freesp(sp, KEY_SADB_LOCKED); } lck_mtx_unlock(sadb_mutex); @@ -2364,7 +2553,7 @@ key_getnewspid(void) ipseclog((LOG_DEBUG, "key_getnewspid: to allocate policy id is failed.\n")); return 0; } - + return newid; } @@ -2382,38 +2571,81 @@ key_getnewspid(void) */ static int key_spddelete( - struct socket *so, - struct mbuf *m, - const struct sadb_msghdr *mhp) + struct socket *so, + struct mbuf *m, + const struct sadb_msghdr *mhp) { - struct sadb_address *src0, *dst0; + struct sadb_address *src0, *dst0, *src1, *dst1; struct sadb_x_policy *xpl0; struct secpolicyindex spidx; struct secpolicy *sp; - - lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); + ifnet_t internal_if = NULL; + struct sadb_x_ipsecif *ipsecifopts = NULL; + int use_src_range = 0; + int use_dst_range = 0; + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); + /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_spddelete: NULL pointer is passed.\n"); - - if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || + + if (mhp->ext[SADB_X_EXT_ADDR_RANGE_SRC_START] != NULL && mhp->ext[SADB_X_EXT_ADDR_RANGE_SRC_END] != NULL) { + use_src_range = 1; + } + if (mhp->ext[SADB_X_EXT_ADDR_RANGE_DST_START] != NULL && mhp->ext[SADB_X_EXT_ADDR_RANGE_DST_END] != NULL) { + use_dst_range = 1; + } + + if ((!use_src_range && mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL) || + (!use_dst_range && mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) || mhp->ext[SADB_X_EXT_POLICY] == NULL) { ipseclog((LOG_DEBUG, "key_spddelete: invalid message is passed.\n")); return key_senderror(so, m, EINVAL); } - if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address) || + if ((use_src_range && (mhp->extlen[SADB_X_EXT_ADDR_RANGE_SRC_START] < sizeof(struct sadb_address) + || mhp->extlen[SADB_X_EXT_ADDR_RANGE_SRC_END] < sizeof(struct sadb_address))) || + (!use_src_range && mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address)) || + (use_dst_range && (mhp->extlen[SADB_X_EXT_ADDR_RANGE_DST_START] < sizeof(struct sadb_address) + || mhp->extlen[SADB_X_EXT_ADDR_RANGE_DST_END] < sizeof(struct sadb_address))) || + (!use_dst_range && mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) || mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) { ipseclog((LOG_DEBUG, "key_spddelete: invalid message is passed.\n")); return key_senderror(so, m, EINVAL); } - - src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; - dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; + + if (use_src_range) { + src0 = (struct sadb_address *)mhp->ext[SADB_X_EXT_ADDR_RANGE_SRC_START]; + src1 = (struct sadb_address *)mhp->ext[SADB_X_EXT_ADDR_RANGE_SRC_END]; + } else { + src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; + } + if (use_dst_range) { + dst0 = (struct sadb_address *)mhp->ext[SADB_X_EXT_ADDR_RANGE_DST_START]; + dst1 = (struct sadb_address *)mhp->ext[SADB_X_EXT_ADDR_RANGE_DST_END]; + } else { + dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; + } xpl0 = (struct sadb_x_policy *)(void *)mhp->ext[SADB_X_EXT_POLICY]; - + ipsecifopts = (struct sadb_x_ipsecif *)(void *)mhp->ext[SADB_X_EXT_IPSECIF]; + + /* checking the direciton. */ + switch (xpl0->sadb_x_policy_dir) { + case IPSEC_DIR_INBOUND: + case IPSEC_DIR_OUTBOUND: + break; + default: + ipseclog((LOG_DEBUG, "key_spddelete: Invalid SP direction.\n")); + return key_senderror(so, m, EINVAL); + } + + /* Process interfaces */ + if (ipsecifopts != NULL) { + if (ipsecifopts->sadb_x_ipsecif_internal_if) { + ifnet_find_by_name(ipsecifopts->sadb_x_ipsecif_internal_if, &internal_if); + } + } + /* make secindex */ /* XXX boundary check against sa_len */ KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir, @@ -2422,51 +2654,57 @@ key_spddelete( src0->sadb_address_prefixlen, dst0->sadb_address_prefixlen, src0->sadb_address_proto, + internal_if, + use_src_range ? src0 + 1 : NULL, + use_src_range ? src1 + 1 : NULL, + use_dst_range ? dst0 + 1 : NULL, + use_dst_range ? dst1 + 1 : NULL, &spidx); - - /* checking the direciton. */ - switch (xpl0->sadb_x_policy_dir) { - case IPSEC_DIR_INBOUND: - case IPSEC_DIR_OUTBOUND: - break; - default: - ipseclog((LOG_DEBUG, "key_spddelete: Invalid SP direction.\n")); - return key_senderror(so, m, EINVAL); - } - + /* Is there SP in SPD ? */ lck_mtx_lock(sadb_mutex); if ((sp = key_getsp(&spidx)) == NULL) { ipseclog((LOG_DEBUG, "key_spddelete: no SP found.\n")); lck_mtx_unlock(sadb_mutex); + if (internal_if) { + ifnet_release(internal_if); + internal_if = NULL; + } return key_senderror(so, m, EINVAL); } - + + if (internal_if) { + ifnet_release(internal_if); + internal_if = NULL; + } + /* save policy id to buffer to be returned. */ xpl0->sadb_x_policy_id = sp->id; - + sp->state = IPSEC_SPSTATE_DEAD; key_freesp(sp, KEY_SADB_LOCKED); lck_mtx_unlock(sadb_mutex); - - + + { - struct mbuf *n; - struct sadb_msg *newmsg; - int mbufItems[] = {SADB_EXT_RESERVED, SADB_X_EXT_POLICY, - SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST}; - - /* create new sadb_msg to reply. */ - n = key_gather_mbuf(m, mhp, 1, sizeof(mbufItems)/sizeof(int), mbufItems); - if (!n) - return key_senderror(so, m, ENOBUFS); - - newmsg = mtod(n, struct sadb_msg *); - newmsg->sadb_msg_errno = 0; - newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); - - m_freem(m); - return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); + struct mbuf *n; + struct sadb_msg *newmsg; + int mbufItems[] = {SADB_EXT_RESERVED, SADB_X_EXT_POLICY, + SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST, + SADB_X_EXT_ADDR_RANGE_SRC_START, SADB_X_EXT_ADDR_RANGE_SRC_END, + SADB_X_EXT_ADDR_RANGE_DST_START, SADB_X_EXT_ADDR_RANGE_DST_END}; + + /* create new sadb_msg to reply. */ + n = key_gather_mbuf(m, mhp, 1, sizeof(mbufItems)/sizeof(int), mbufItems); + if (!n) + return key_senderror(so, m, ENOBUFS); + + newmsg = mtod(n, struct sadb_msg *); + newmsg->sadb_msg_errno = 0; + newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); + + m_freem(m); + return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); } } @@ -2484,91 +2722,211 @@ key_spddelete( */ static int key_spddelete2( - struct socket *so, - struct mbuf *m, - const struct sadb_msghdr *mhp) + struct socket *so, + struct mbuf *m, + const struct sadb_msghdr *mhp) { u_int32_t id; struct secpolicy *sp; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_spddelete2: NULL pointer is passed.\n"); - + if (mhp->ext[SADB_X_EXT_POLICY] == NULL || mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) { ipseclog((LOG_DEBUG, "key_spddelete2: invalid message is passed.\n")); key_senderror(so, m, EINVAL); return 0; } - + id = ((struct sadb_x_policy *) - (void *)mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id; - + (void *)mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id; + /* Is there SP in SPD ? */ lck_mtx_lock(sadb_mutex); - if ((sp = key_getspbyid(id)) == NULL) { + if ((sp = __key_getspbyid(id)) == NULL) { lck_mtx_unlock(sadb_mutex); ipseclog((LOG_DEBUG, "key_spddelete2: no SP found id:%u.\n", id)); return key_senderror(so, m, EINVAL); } - + sp->state = IPSEC_SPSTATE_DEAD; key_freesp(sp, KEY_SADB_LOCKED); lck_mtx_unlock(sadb_mutex); - + { - struct mbuf *n, *nn; - struct sadb_msg *newmsg; - int off, len; - - /* create new sadb_msg to reply. */ - len = PFKEY_ALIGN8(sizeof(struct sadb_msg)); - - if (len > MCLBYTES) - return key_senderror(so, m, ENOBUFS); - MGETHDR(n, M_WAITOK, MT_DATA); - if (n && len > MHLEN) { - MCLGET(n, M_WAITOK); - if ((n->m_flags & M_EXT) == 0) { - m_freem(n); - n = NULL; + struct mbuf *n, *nn; + struct sadb_msg *newmsg; + int off, len; + + /* create new sadb_msg to reply. */ + len = PFKEY_ALIGN8(sizeof(struct sadb_msg)); + + if (len > MCLBYTES) + return key_senderror(so, m, ENOBUFS); + MGETHDR(n, M_WAITOK, MT_DATA); + if (n && len > MHLEN) { + MCLGET(n, M_WAITOK); + if ((n->m_flags & M_EXT) == 0) { + m_freem(n); + n = NULL; + } } - } - if (!n) - return key_senderror(so, m, ENOBUFS); - - n->m_len = len; - n->m_next = NULL; - off = 0; - - m_copydata(m, 0, sizeof(struct sadb_msg), mtod(n, caddr_t) + off); - off += PFKEY_ALIGN8(sizeof(struct sadb_msg)); - + if (!n) + return key_senderror(so, m, ENOBUFS); + + n->m_len = len; + n->m_next = NULL; + off = 0; + + m_copydata(m, 0, sizeof(struct sadb_msg), mtod(n, caddr_t) + off); + off += PFKEY_ALIGN8(sizeof(struct sadb_msg)); + #if DIAGNOSTIC - if (off != len) - panic("length inconsistency in key_spddelete2"); + if (off != len) + panic("length inconsistency in key_spddelete2"); #endif + + n->m_next = m_copym(m, mhp->extoff[SADB_X_EXT_POLICY], + mhp->extlen[SADB_X_EXT_POLICY], M_WAITOK); + if (!n->m_next) { + m_freem(n); + return key_senderror(so, m, ENOBUFS); + } + + n->m_pkthdr.len = 0; + for (nn = n; nn; nn = nn->m_next) + n->m_pkthdr.len += nn->m_len; + + newmsg = mtod(n, struct sadb_msg *); + newmsg->sadb_msg_errno = 0; + newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); + + m_freem(m); + return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); + } +} - n->m_next = m_copym(m, mhp->extoff[SADB_X_EXT_POLICY], - mhp->extlen[SADB_X_EXT_POLICY], M_WAITOK); - if (!n->m_next) { - m_freem(n); - return key_senderror(so, m, ENOBUFS); - } - - n->m_pkthdr.len = 0; - for (nn = n; nn; nn = nn->m_next) - n->m_pkthdr.len += nn->m_len; - - newmsg = mtod(n, struct sadb_msg *); - newmsg->sadb_msg_errno = 0; - newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); +static int +key_spdenable( + struct socket *so, + struct mbuf *m, + const struct sadb_msghdr *mhp) +{ + u_int32_t id; + struct secpolicy *sp; + + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); + + /* sanity check */ + if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) + panic("key_spdenable: NULL pointer is passed.\n"); + + if (mhp->ext[SADB_X_EXT_POLICY] == NULL || + mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) { + ipseclog((LOG_DEBUG, "key_spdenable: invalid message is passed.\n")); + key_senderror(so, m, EINVAL); + return 0; + } + + id = ((struct sadb_x_policy *) + (void *)mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id; + + /* Is there SP in SPD ? */ + lck_mtx_lock(sadb_mutex); + if ((sp = __key_getspbyid(id)) == NULL) { + lck_mtx_unlock(sadb_mutex); + ipseclog((LOG_DEBUG, "key_spdenable: no SP found id:%u.\n", id)); + return key_senderror(so, m, EINVAL); + } + + sp->disabled = 0; + lck_mtx_unlock(sadb_mutex); + + { + struct mbuf *n; + struct sadb_msg *newmsg; + int mbufItems[] = {SADB_EXT_RESERVED, SADB_X_EXT_POLICY}; + + /* create new sadb_msg to reply. */ + n = key_gather_mbuf(m, mhp, 1, sizeof(mbufItems)/sizeof(int), mbufItems); + if (!n) + return key_senderror(so, m, ENOBUFS); + + if (n->m_len < sizeof(struct sadb_msg)) { + n = m_pullup(n, sizeof(struct sadb_msg)); + if (n == NULL) + return key_senderror(so, m, ENOBUFS); + } + newmsg = mtod(n, struct sadb_msg *); + newmsg->sadb_msg_errno = 0; + newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); + + m_freem(m); + return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); + } +} - m_freem(m); - return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); +static int +key_spddisable( + struct socket *so, + struct mbuf *m, + const struct sadb_msghdr *mhp) +{ + u_int32_t id; + struct secpolicy *sp; + + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); + + /* sanity check */ + if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) + panic("key_spddisable: NULL pointer is passed.\n"); + + if (mhp->ext[SADB_X_EXT_POLICY] == NULL || + mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) { + ipseclog((LOG_DEBUG, "key_spddisable: invalid message is passed.\n")); + key_senderror(so, m, EINVAL); + return 0; + } + + id = ((struct sadb_x_policy *) + (void *)mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id; + + /* Is there SP in SPD ? */ + lck_mtx_lock(sadb_mutex); + if ((sp = __key_getspbyid(id)) == NULL) { + lck_mtx_unlock(sadb_mutex); + ipseclog((LOG_DEBUG, "key_spddisable: no SP found id:%u.\n", id)); + return key_senderror(so, m, EINVAL); + } + + sp->disabled = 1; + lck_mtx_unlock(sadb_mutex); + + { + struct mbuf *n; + struct sadb_msg *newmsg; + int mbufItems[] = {SADB_EXT_RESERVED, SADB_X_EXT_POLICY}; + + /* create new sadb_msg to reply. */ + n = key_gather_mbuf(m, mhp, 1, sizeof(mbufItems)/sizeof(int), mbufItems); + if (!n) + return key_senderror(so, m, ENOBUFS); + + if (n->m_len < sizeof(struct sadb_msg)) { + n = m_pullup(n, sizeof(struct sadb_msg)); + if (n == NULL) + return key_senderror(so, m, ENOBUFS); + } + newmsg = mtod(n, struct sadb_msg *); + newmsg->sadb_msg_errno = 0; + newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); + + m_freem(m); + return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); } } @@ -2586,32 +2944,32 @@ key_spddelete2( */ static int key_spdget( - struct socket *so, - struct mbuf *m, - const struct sadb_msghdr *mhp) + struct socket *so, + struct mbuf *m, + const struct sadb_msghdr *mhp) { u_int32_t id; struct secpolicy *sp; struct mbuf *n; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_spdget: NULL pointer is passed.\n"); - + if (mhp->ext[SADB_X_EXT_POLICY] == NULL || mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) { ipseclog((LOG_DEBUG, "key_spdget: invalid message is passed.\n")); return key_senderror(so, m, EINVAL); } - + id = ((struct sadb_x_policy *) - (void *)mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id; - + (void *)mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id; + /* Is there SP in SPD ? */ lck_mtx_lock(sadb_mutex); - if ((sp = key_getspbyid(id)) == NULL) { + if ((sp = __key_getspbyid(id)) == NULL) { ipseclog((LOG_DEBUG, "key_spdget: no SP found id:%u.\n", id)); lck_mtx_unlock(sadb_mutex); return key_senderror(so, m, ENOENT); @@ -2642,12 +3000,12 @@ key_spdget( */ int key_spdacquire( - struct secpolicy *sp) + struct secpolicy *sp) { struct mbuf *result = NULL, *m; struct secspacq *newspacq; int error; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); /* sanity check */ @@ -2657,7 +3015,7 @@ key_spdacquire( panic("key_spdacquire: called but there is request.\n"); if (sp->policy != IPSEC_POLICY_IPSEC) panic("key_spdacquire: policy mismathed. IPsec is expected.\n"); - + /* get a entry to check whether sent message or not. */ lck_mtx_lock(sadb_mutex); if ((newspacq = key_getspacq(&sp->spidx)) != NULL) { @@ -2678,6 +3036,7 @@ key_spdacquire( } /* add to acqtree */ LIST_INSERT_HEAD(&spacqtree, newspacq, chain); + key_start_timehandler(); } lck_mtx_unlock(sadb_mutex); /* create new sadb_msg to reply. */ @@ -2687,16 +3046,16 @@ key_spdacquire( goto fail; } result = m; - + result->m_pkthdr.len = 0; for (m = result; m; m = m->m_next) result->m_pkthdr.len += m->m_len; - + mtod(result, struct sadb_msg *)->sadb_msg_len = - PFKEY_UNIT64(result->m_pkthdr.len); - + PFKEY_UNIT64(result->m_pkthdr.len); + return key_sendup_mbuf(NULL, m, KEY_SENDUP_REGISTERED); - + fail: if (result) m_freem(result); @@ -2717,9 +3076,9 @@ fail: */ static int key_spdflush( - struct socket *so, - struct mbuf *m, - const struct sadb_msghdr *mhp) + struct socket *so, + struct mbuf *m, + const struct sadb_msghdr *mhp) { struct sadb_msg *newmsg; struct secpolicy *sp; @@ -2728,10 +3087,10 @@ key_spdflush( /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_spdflush: NULL pointer is passed.\n"); - + if (m->m_len != PFKEY_ALIGN8(sizeof(struct sadb_msg))) return key_senderror(so, m, EINVAL); - + lck_mtx_lock(sadb_mutex); for (dir = 0; dir < IPSEC_DIR_MAX; dir++) { LIST_FOREACH(sp, &sptree[dir], chain) { @@ -2744,7 +3103,7 @@ key_spdflush( ipseclog((LOG_DEBUG, "key_spdflush: No more memory.\n")); return key_senderror(so, m, ENOBUFS); } - + if (m->m_next) m_freem(m->m_next); m->m_next = NULL; @@ -2752,7 +3111,7 @@ key_spdflush( newmsg = mtod(m, struct sadb_msg *); newmsg->sadb_msg_errno = 0; newmsg->sadb_msg_len = PFKEY_UNIT64(m->m_pkthdr.len); - + return key_sendup_mbuf(so, m, KEY_SENDUP_ALL); } @@ -2767,12 +3126,12 @@ key_spdflush( * * m will always be freed. */ - + static int key_spddump( - struct socket *so, - struct mbuf *m, - const struct sadb_msghdr *mhp) + struct socket *so, + struct mbuf *m, + const struct sadb_msghdr *mhp) { struct secpolicy *sp, **spbuf = NULL, **sp_ptr; int cnt = 0, bufcount; @@ -2783,7 +3142,7 @@ key_spddump( /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_spddump: NULL pointer is passed.\n"); - + if ((bufcount = ipsec_policy_count) == 0) { error = ENOENT; goto end; @@ -2800,7 +3159,7 @@ key_spddump( sp_ptr = spbuf; for (dir = 0; dir < IPSEC_DIR_MAX; dir++) { LIST_FOREACH(sp, &sptree[dir], chain) { - if (cnt == bufcount) + if (cnt == bufcount) break; /* buffer full */ *sp_ptr++ = sp; sp->refcnt++; @@ -2808,7 +3167,7 @@ key_spddump( } } lck_mtx_unlock(sadb_mutex); - + if (cnt == 0) { error = ENOENT; goto end; @@ -2818,8 +3177,8 @@ key_spddump( while (cnt) { --cnt; n = key_setdumpsp(*sp_ptr++, SADB_X_SPDDUMP, cnt, - mhp->msg->sadb_msg_pid); - + mhp->msg->sadb_msg_pid); + if (n) key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } @@ -2829,68 +3188,107 @@ key_spddump( key_freesp(*(--sp_ptr), KEY_SADB_LOCKED); lck_mtx_unlock(sadb_mutex); -end: +end: if (spbuf) KFREE(spbuf); if (error) return key_senderror(so, m, error); - + m_freem(m); return 0; - + } static struct mbuf * key_setdumpsp( - struct secpolicy *sp, - u_int8_t type, - u_int32_t seq, - u_int32_t pid) + struct secpolicy *sp, + u_int8_t type, + u_int32_t seq, + u_int32_t pid) { struct mbuf *result = NULL, *m; - + m = key_setsadbmsg(type, 0, SADB_SATYPE_UNSPEC, seq, pid, sp->refcnt); if (!m) goto fail; result = m; - - m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, - (struct sockaddr *)&sp->spidx.src, sp->spidx.prefs, - sp->spidx.ul_proto); - if (!m) - goto fail; - m_cat(result, m); - - m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, - (struct sockaddr *)&sp->spidx.dst, sp->spidx.prefd, - sp->spidx.ul_proto); - if (!m) - goto fail; - m_cat(result, m); - + + if (sp->spidx.src_range.start.ss_len > 0) { + m = key_setsadbaddr(SADB_X_EXT_ADDR_RANGE_SRC_START, + (struct sockaddr *)&sp->spidx.src_range.start, sp->spidx.prefs, + sp->spidx.ul_proto); + if (!m) + goto fail; + m_cat(result, m); + + m = key_setsadbaddr(SADB_X_EXT_ADDR_RANGE_SRC_END, + (struct sockaddr *)&sp->spidx.src_range.end, sp->spidx.prefs, + sp->spidx.ul_proto); + if (!m) + goto fail; + m_cat(result, m); + } else { + m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, + (struct sockaddr *)&sp->spidx.src, sp->spidx.prefs, + sp->spidx.ul_proto); + if (!m) + goto fail; + m_cat(result, m); + } + + if (sp->spidx.dst_range.start.ss_len > 0) { + m = key_setsadbaddr(SADB_X_EXT_ADDR_RANGE_DST_START, + (struct sockaddr *)&sp->spidx.dst_range.start, sp->spidx.prefd, + sp->spidx.ul_proto); + if (!m) + goto fail; + m_cat(result, m); + + m = key_setsadbaddr(SADB_X_EXT_ADDR_RANGE_DST_END, + (struct sockaddr *)&sp->spidx.dst_range.end, sp->spidx.prefd, + sp->spidx.ul_proto); + if (!m) + goto fail; + m_cat(result, m); + } else { + m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, + (struct sockaddr *)&sp->spidx.dst, sp->spidx.prefd, + sp->spidx.ul_proto); + if (!m) + goto fail; + m_cat(result, m); + } + + if (sp->spidx.internal_if || sp->outgoing_if || sp->ipsec_if || sp->disabled) { + m = key_setsadbipsecif(sp->spidx.internal_if, sp->outgoing_if, sp->ipsec_if, sp->disabled); + if (!m) + goto fail; + m_cat(result, m); + } + m = key_sp2msg(sp); if (!m) goto fail; m_cat(result, m); - + if ((result->m_flags & M_PKTHDR) == 0) goto fail; - + if (result->m_len < sizeof(struct sadb_msg)) { result = m_pullup(result, sizeof(struct sadb_msg)); if (result == NULL) goto fail; } - + result->m_pkthdr.len = 0; for (m = result; m; m = m->m_next) result->m_pkthdr.len += m->m_len; - + mtod(result, struct sadb_msg *)->sadb_msg_len = - PFKEY_UNIT64(result->m_pkthdr.len); - + PFKEY_UNIT64(result->m_pkthdr.len); + return result; - + fail: m_freem(result); return NULL; @@ -2901,30 +3299,30 @@ fail: */ static u_int key_getspreqmsglen( - struct secpolicy *sp) + struct secpolicy *sp) { u_int tlen; - + tlen = sizeof(struct sadb_x_policy); - + /* if is the policy for ipsec ? */ if (sp->policy != IPSEC_POLICY_IPSEC) return tlen; - + /* get length of ipsec requests */ { - struct ipsecrequest *isr; - int len; - - for (isr = sp->req; isr != NULL; isr = isr->next) { - len = sizeof(struct sadb_x_ipsecrequest) + struct ipsecrequest *isr; + int len; + + for (isr = sp->req; isr != NULL; isr = isr->next) { + len = sizeof(struct sadb_x_ipsecrequest) + isr->saidx.src.ss_len + isr->saidx.dst.ss_len; - - tlen += PFKEY_ALIGN8(len); - } + + tlen += PFKEY_ALIGN8(len); + } } - + return tlen; } @@ -2939,19 +3337,19 @@ key_getspreqmsglen( */ static int key_spdexpire( - struct secpolicy *sp) + struct secpolicy *sp) { struct mbuf *result = NULL, *m; int len; int error = EINVAL; struct sadb_lifetime *lt; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); /* sanity check */ if (sp == NULL) panic("key_spdexpire: NULL pointer is passed.\n"); - + /* set msg header */ m = key_setsadbmsg(SADB_X_SPDEXPIRE, 0, 0, 0, 0, 0); if (!m) { @@ -2959,7 +3357,7 @@ key_spdexpire( goto fail; } result = m; - + /* create lifetime extension (current and hard) */ len = PFKEY_ALIGN8(sizeof(*lt)) * 2; m = key_alloc_mbuf(len); @@ -2985,27 +3383,67 @@ key_spdexpire( lt->sadb_lifetime_addtime = sp->lifetime; lt->sadb_lifetime_usetime = sp->validtime; m_cat(result, m); - - /* set sadb_address for source */ - m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, - (struct sockaddr *)&sp->spidx.src, - sp->spidx.prefs, sp->spidx.ul_proto); - if (!m) { - error = ENOBUFS; - goto fail; - } - m_cat(result, m); - - /* set sadb_address for destination */ - m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, - (struct sockaddr *)&sp->spidx.dst, - sp->spidx.prefd, sp->spidx.ul_proto); - if (!m) { - error = ENOBUFS; - goto fail; - } - m_cat(result, m); - + + /* set sadb_address(es) for source */ + if (sp->spidx.src_range.start.ss_len > 0) { + m = key_setsadbaddr(SADB_X_EXT_ADDR_RANGE_SRC_START, + (struct sockaddr *)&sp->spidx.src_range.start, sp->spidx.prefs, + sp->spidx.ul_proto); + if (!m) { + error = ENOBUFS; + goto fail; + } + m_cat(result, m); + + m = key_setsadbaddr(SADB_X_EXT_ADDR_RANGE_SRC_END, + (struct sockaddr *)&sp->spidx.src_range.end, sp->spidx.prefs, + sp->spidx.ul_proto); + if (!m) { + error = ENOBUFS; + goto fail; + } + m_cat(result, m); + } else { + m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, + (struct sockaddr *)&sp->spidx.src, sp->spidx.prefs, + sp->spidx.ul_proto); + if (!m) { + error = ENOBUFS; + goto fail; + } + m_cat(result, m); + } + + /* set sadb_address(es) for dest */ + if (sp->spidx.dst_range.start.ss_len > 0) { + m = key_setsadbaddr(SADB_X_EXT_ADDR_RANGE_DST_START, + (struct sockaddr *)&sp->spidx.dst_range.start, sp->spidx.prefd, + sp->spidx.ul_proto); + if (!m) { + error = ENOBUFS; + goto fail; + } + m_cat(result, m); + + m = key_setsadbaddr(SADB_X_EXT_ADDR_RANGE_DST_END, + (struct sockaddr *)&sp->spidx.dst_range.end, sp->spidx.prefd, + sp->spidx.ul_proto); + if (!m) { + error = ENOBUFS; + goto fail; + } + m_cat(result, m); + } else { + m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, + (struct sockaddr *)&sp->spidx.dst, sp->spidx.prefd, + sp->spidx.ul_proto); + if (!m) { + error = ENOBUFS; + goto fail; + } + m_cat(result, m); + } + /* set secpolicy */ m = key_sp2msg(sp); if (!m) { @@ -3013,12 +3451,12 @@ key_spdexpire( goto fail; } m_cat(result, m); - + if ((result->m_flags & M_PKTHDR) == 0) { error = EINVAL; goto fail; } - + if (result->m_len < sizeof(struct sadb_msg)) { result = m_pullup(result, sizeof(struct sadb_msg)); if (result == NULL) { @@ -3026,17 +3464,17 @@ key_spdexpire( goto fail; } } - + result->m_pkthdr.len = 0; for (m = result; m; m = m->m_next) result->m_pkthdr.len += m->m_len; - + mtod(result, struct sadb_msg *)->sadb_msg_len = - PFKEY_UNIT64(result->m_pkthdr.len); - + PFKEY_UNIT64(result->m_pkthdr.len); + return key_sendup_mbuf(NULL, result, KEY_SENDUP_REGISTERED); - - fail: + +fail: if (result) m_freem(result); return error; @@ -3050,48 +3488,48 @@ key_spdexpire( */ static struct secashead * key_newsah( - struct secasindex *saidx, - u_int8_t dir) + struct secasindex *saidx, + u_int8_t dir) { struct secashead *newsah; - + /* sanity check */ if (saidx == NULL) panic("key_newsaidx: NULL pointer is passed.\n"); - + newsah = keydb_newsecashead(); if (newsah == NULL) return NULL; - + bcopy(saidx, &newsah->saidx, sizeof(newsah->saidx)); /* remove the ports */ switch (saidx->src.ss_family) { - case AF_INET: + case AF_INET: ((struct sockaddr_in *)(&newsah->saidx.src))->sin_port = IPSEC_PORT_ANY; break; - case AF_INET6: - ((struct sockaddr_in6 *)(&newsah->saidx.src))->sin6_port = IPSEC_PORT_ANY; + case AF_INET6: + ((struct sockaddr_in6 *)(&newsah->saidx.src))->sin6_port = IPSEC_PORT_ANY; break; - default: + default: break; } switch (saidx->dst.ss_family) { - case AF_INET: - ((struct sockaddr_in *)(&newsah->saidx.dst))->sin_port = IPSEC_PORT_ANY; + case AF_INET: + ((struct sockaddr_in *)(&newsah->saidx.dst))->sin_port = IPSEC_PORT_ANY; break; - case AF_INET6: + case AF_INET6: ((struct sockaddr_in6 *)(&newsah->saidx.dst))->sin6_port = IPSEC_PORT_ANY; break; - default: + default: break; - } - + } + newsah->dir = dir; /* add to saidxtree */ newsah->state = SADB_SASTATE_MATURE; LIST_INSERT_HEAD(&sahtree, newsah, chain); - + key_start_timehandler(); return(newsah); } @@ -3100,62 +3538,67 @@ key_newsah( */ void key_delsah( - struct secashead *sah) + struct secashead *sah) { struct secasvar *sav, *nextsav; u_int stateidx, state; int zombie = 0; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - + /* sanity check */ if (sah == NULL) panic("key_delsah: NULL pointer is passed.\n"); - + /* searching all SA registerd in the secindex. */ for (stateidx = 0; stateidx < _ARRAYLEN(saorder_state_any); stateidx++) { - + state = saorder_state_any[stateidx]; for (sav = (struct secasvar *)LIST_FIRST(&sah->savtree[state]); sav != NULL; sav = nextsav) { - + nextsav = LIST_NEXT(sav, chain); - + if (sav->refcnt > 0) { /* give up to delete this sa */ zombie++; continue; } - + /* sanity check */ KEY_CHKSASTATE(state, sav->state, "key_delsah"); - + key_freesav(sav, KEY_SADB_LOCKED); - + /* remove back pointer */ sav->sah = NULL; sav = NULL; } } - + /* don't delete sah only if there are savs. */ if (zombie) return; - - if (sah->sa_route.ro_rt) { - rtfree(sah->sa_route.ro_rt); - sah->sa_route.ro_rt = (struct rtentry *)NULL; - } - + + ROUTE_RELEASE(&sah->sa_route); + + if (sah->idents) { + KFREE(sah->idents); + } + + if (sah->identd) { + KFREE(sah->identd); + } + /* remove from tree of SA index */ if (__LIST_CHAINED(sah)) LIST_REMOVE(sah, chain); - + KFREE(sah); - + return; } @@ -3173,20 +3616,20 @@ key_delsah( */ static struct secasvar * key_newsav( - struct mbuf *m, - const struct sadb_msghdr *mhp, - struct secashead *sah, - int *errp) + struct mbuf *m, + const struct sadb_msghdr *mhp, + struct secashead *sah, + int *errp) { struct secasvar *newsav; const struct sadb_sa *xsa; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - + /* sanity check */ if (m == NULL || mhp == NULL || mhp->msg == NULL || sah == NULL) panic("key_newsa: NULL pointer is passed.\n"); - + KMALLOC_NOWAIT(newsav, struct secasvar *, sizeof(struct secasvar)); if (newsav == NULL) { lck_mtx_unlock(sadb_mutex); @@ -3199,39 +3642,44 @@ key_newsav( } } bzero((caddr_t)newsav, sizeof(struct secasvar)); - + switch (mhp->msg->sadb_msg_type) { - case SADB_GETSPI: - key_setspi(newsav, 0); - + case SADB_GETSPI: + key_setspi(newsav, 0); + #if IPSEC_DOSEQCHECK - /* sync sequence number */ - if (mhp->msg->sadb_msg_seq == 0) - newsav->seq = + /* sync sequence number */ + if (mhp->msg->sadb_msg_seq == 0) + newsav->seq = (acq_seq = (acq_seq == ~0 ? 1 : ++acq_seq)); - else + else #endif + newsav->seq = mhp->msg->sadb_msg_seq; + break; + + case SADB_ADD: + /* sanity check */ + if (mhp->ext[SADB_EXT_SA] == NULL) { + KFREE(newsav); + ipseclog((LOG_DEBUG, "key_newsa: invalid message is passed.\n")); + *errp = EINVAL; + return NULL; + } + xsa = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA]; + key_setspi(newsav, xsa->sadb_sa_spi); newsav->seq = mhp->msg->sadb_msg_seq; - break; - - case SADB_ADD: - /* sanity check */ - if (mhp->ext[SADB_EXT_SA] == NULL) { + break; + default: KFREE(newsav); - ipseclog((LOG_DEBUG, "key_newsa: invalid message is passed.\n")); *errp = EINVAL; return NULL; - } - xsa = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA]; - key_setspi(newsav, xsa->sadb_sa_spi); - newsav->seq = mhp->msg->sadb_msg_seq; - break; - default: - KFREE(newsav); - *errp = EINVAL; - return NULL; } - + + if (mhp->ext[SADB_X_EXT_SA2] != NULL) { + if (((struct sadb_x_sa2 *)(void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_alwaysexpire) + newsav->always_expire = 1; + } + /* copy sav values */ if (mhp->msg->sadb_msg_type != SADB_GETSPI) { *errp = key_setsaval(newsav, m, mhp); @@ -3241,25 +3689,71 @@ key_newsav( KFREE(newsav); return NULL; } + } else { + /* For get SPI, if has a hard lifetime, apply */ + const struct sadb_lifetime *lft0; + struct timeval tv; + + lft0 = (struct sadb_lifetime *)(void *)mhp->ext[SADB_EXT_LIFETIME_HARD]; + if (lft0 != NULL) { + /* make lifetime for CURRENT */ + KMALLOC_NOWAIT(newsav->lft_c, struct sadb_lifetime *, + sizeof(struct sadb_lifetime)); + if (newsav->lft_c == NULL) { + lck_mtx_unlock(sadb_mutex); + KMALLOC_WAIT(newsav->lft_c, struct sadb_lifetime *, + sizeof(struct sadb_lifetime)); + lck_mtx_lock(sadb_mutex); + if (newsav->lft_c == NULL) { + ipseclog((LOG_DEBUG, "key_newsa: No more memory.\n")); + KFREE(newsav); + *errp = ENOBUFS; + return NULL; + } + } + + microtime(&tv); + + newsav->lft_c->sadb_lifetime_len = PFKEY_UNIT64(sizeof(struct sadb_lifetime)); + newsav->lft_c->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT; + newsav->lft_c->sadb_lifetime_allocations = 0; + newsav->lft_c->sadb_lifetime_bytes = 0; + newsav->lft_c->sadb_lifetime_addtime = tv.tv_sec; + newsav->lft_c->sadb_lifetime_usetime = 0; + + if (mhp->extlen[SADB_EXT_LIFETIME_HARD] < sizeof(*lft0)) { + ipseclog((LOG_DEBUG, "key_newsa: invalid hard lifetime ext len.\n")); + KFREE(newsav); + *errp = EINVAL; + return NULL; + } + newsav->lft_h = (struct sadb_lifetime *)key_newbuf(lft0, sizeof(*lft0)); + if (newsav->lft_h == NULL) { + ipseclog((LOG_DEBUG, "key_newsa: No more memory.\n")); + KFREE(newsav); + *errp = ENOBUFS; + return NULL; + } + } } - + /* reset created */ { - struct timeval tv; - microtime(&tv); - newsav->created = tv.tv_sec; + struct timeval tv; + microtime(&tv); + newsav->created = tv.tv_sec; } - + newsav->pid = mhp->msg->sadb_msg_pid; - + /* add to satree */ newsav->sah = sah; newsav->refcnt = 1; newsav->state = SADB_SASTATE_LARVAL; LIST_INSERT_TAIL(&sah->savtree[SADB_SASTATE_LARVAL], newsav, - secasvar, chain); + secasvar, chain); ipsec_sav_count++; - + return newsav; } @@ -3310,7 +3804,7 @@ key_newsav2(struct secashead *sah, } } bzero((caddr_t)newsav, sizeof(struct secasvar)); - + #if IPSEC_DOSEQCHECK /* sync sequence number */ if (seq == 0) @@ -3319,7 +3813,7 @@ key_newsav2(struct secashead *sah, #endif newsav->seq = seq; key_setspi(newsav, spi); - + if (key_setsaval2(newsav, satype, alg_auth, @@ -3373,15 +3867,15 @@ key_newsav2(struct secashead *sah, */ void key_delsav( - struct secasvar *sav) + struct secasvar *sav) { - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); /* sanity check */ if (sav == NULL) panic("key_delsav: NULL pointer is passed.\n"); - + if (sav->refcnt > 0) return; /* can't free */ @@ -3389,10 +3883,10 @@ key_delsav( if (__LIST_CHAINED(sav)) LIST_REMOVE(sav, chain); ipsec_sav_count--; - + if (sav->spihash.le_prev || sav->spihash.le_next) LIST_REMOVE(sav, spihash); - + if (sav->key_auth != NULL) { bzero(_KEYBUF(sav->key_auth), _KEYLEN(sav->key_auth)); KFREE(sav->key_auth); @@ -3428,9 +3922,9 @@ key_delsav( KFREE(sav->iv); sav->iv = NULL; } - + KFREE(sav); - + return; } @@ -3442,19 +3936,19 @@ key_delsav( */ static struct secashead * key_getsah( - struct secasindex *saidx) + struct secasindex *saidx) { struct secashead *sah; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - + LIST_FOREACH(sah, &sahtree, chain) { if (sah->state == SADB_SASTATE_DEAD) continue; if (key_cmpsaidx(&sah->saidx, saidx, CMP_REQID)) return sah; } - + return NULL; } @@ -3463,9 +3957,9 @@ key_newsah2 (struct secasindex *saidx, u_int8_t dir) { struct secashead *sah; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - + sah = key_getsah(saidx); if (!sah) { return(key_newsah(saidx, dir)); @@ -3482,20 +3976,20 @@ key_newsah2 (struct secasindex *saidx, */ static struct secasvar * key_checkspidup( - struct secasindex *saidx, - u_int32_t spi) + struct secasindex *saidx, + u_int32_t spi) { struct secasvar *sav; u_int stateidx, state; lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - + /* check address family */ if (saidx->src.ss_family != saidx->dst.ss_family) { ipseclog((LOG_DEBUG, "key_checkspidup: address family mismatched.\n")); return NULL; } - + /* check all SAD */ LIST_FOREACH(sav, &spihash[SPIHASH(spi)], spihash) { if (sav->spi != spi) @@ -3509,14 +4003,14 @@ key_checkspidup( return sav; } } - + return NULL; } static void key_setspi( - struct secasvar *sav, - u_int32_t spi) + struct secasvar *sav, + u_int32_t spi) { lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); sav->spi = spi; @@ -3534,12 +4028,12 @@ key_setspi( */ static struct secasvar * key_getsavbyspi( - struct secashead *sah, - u_int32_t spi) + struct secashead *sah, + u_int32_t spi) { struct secasvar *sav, *match; u_int stateidx, state, matchidx; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); match = NULL; matchidx = _ARRAYLEN(saorder_state_alive); @@ -3557,7 +4051,7 @@ key_getsavbyspi( } } } - + return match; } @@ -3571,22 +4065,22 @@ key_getsavbyspi( */ static int key_setsaval( - struct secasvar *sav, - struct mbuf *m, - const struct sadb_msghdr *mhp) + struct secasvar *sav, + struct mbuf *m, + const struct sadb_msghdr *mhp) { #if IPSEC_ESP const struct esp_algorithm *algo; #endif int error = 0; struct timeval tv; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - + /* sanity check */ if (m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_setsaval: NULL pointer is passed.\n"); - + /* initialization */ sav->replay = NULL; sav->key_auth = NULL; @@ -3600,18 +4094,18 @@ key_setsaval( sav->remote_ike_port = 0; sav->natt_last_activity = natt_now; sav->natt_encapsulated_src_port = 0; - + /* SA */ if (mhp->ext[SADB_EXT_SA] != NULL) { const struct sadb_sa *sa0; - + sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA]; if (mhp->extlen[SADB_EXT_SA] < sizeof(*sa0)) { ipseclog((LOG_DEBUG, "key_setsaval: invalid message size.\n")); error = EINVAL; goto fail; } - + sav->alg_auth = sa0->sadb_sa_auth; sav->alg_enc = sa0->sadb_sa_encrypt; sav->flags = sa0->sadb_sa_flags; @@ -3622,7 +4116,7 @@ key_setsaval( */ if ((sav->flags & SADB_X_EXT_NATT) != 0) { if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa_2) || - ((const struct sadb_sa_2*)(sa0))->sadb_sa_natt_port == 0) { + ((const struct sadb_sa_2*)(sa0))->sadb_sa_natt_port == 0) { ipseclog((LOG_DEBUG, "key_setsaval: natt port not set.\n")); error = EINVAL; goto fail; @@ -3632,14 +4126,14 @@ key_setsaval( /* * Verify if SADB_X_EXT_NATT_MULTIPLEUSERS flag is set that - * SADB_X_EXT_NATT is set and SADB_X_EXT_NATT_KEEPALIVE is not + * SADB_X_EXT_NATT is set and SADB_X_EXT_NATT_KEEPALIVE is not * set (we're not behind nat) - otherwise clear it. */ if ((sav->flags & SADB_X_EXT_NATT_MULTIPLEUSERS) != 0) if ((sav->flags & SADB_X_EXT_NATT) == 0 || (sav->flags & SADB_X_EXT_NATT_KEEPALIVE) != 0) sav->flags &= ~SADB_X_EXT_NATT_MULTIPLEUSERS; - + /* replay window */ if ((sa0->sadb_sa_flags & SADB_X_EXT_OLD) == 0) { sav->replay = keydb_newsecreplay(sa0->sadb_sa_replay); @@ -3650,15 +4144,15 @@ key_setsaval( } } } - + /* Authentication keys */ if (mhp->ext[SADB_EXT_KEY_AUTH] != NULL) { const struct sadb_key *key0; int len; - + key0 = (const struct sadb_key *)mhp->ext[SADB_EXT_KEY_AUTH]; len = mhp->extlen[SADB_EXT_KEY_AUTH]; - + error = 0; if (len < sizeof(*key0)) { ipseclog((LOG_DEBUG, "key_setsaval: invalid auth key ext len. len = %d\n", len)); @@ -3666,22 +4160,22 @@ key_setsaval( goto fail; } switch (mhp->msg->sadb_msg_satype) { - case SADB_SATYPE_AH: - case SADB_SATYPE_ESP: - if (len == PFKEY_ALIGN8(sizeof(struct sadb_key)) && - sav->alg_auth != SADB_X_AALG_NULL) + case SADB_SATYPE_AH: + case SADB_SATYPE_ESP: + if (len == PFKEY_ALIGN8(sizeof(struct sadb_key)) && + sav->alg_auth != SADB_X_AALG_NULL) + error = EINVAL; + break; + case SADB_X_SATYPE_IPCOMP: + default: error = EINVAL; - break; - case SADB_X_SATYPE_IPCOMP: - default: - error = EINVAL; - break; + break; } if (error) { ipseclog((LOG_DEBUG, "key_setsaval: invalid key_auth values.\n")); goto fail; } - + sav->key_auth = (struct sadb_key *)key_newbuf(key0, len); if (sav->key_auth == NULL) { ipseclog((LOG_DEBUG, "key_setsaval: No more memory.\n")); @@ -3689,15 +4183,15 @@ key_setsaval( goto fail; } } - + /* Encryption key */ if (mhp->ext[SADB_EXT_KEY_ENCRYPT] != NULL) { const struct sadb_key *key0; int len; - + key0 = (const struct sadb_key *)mhp->ext[SADB_EXT_KEY_ENCRYPT]; len = mhp->extlen[SADB_EXT_KEY_ENCRYPT]; - + error = 0; if (len < sizeof(*key0)) { ipseclog((LOG_DEBUG, "key_setsaval: invalid encryption key ext len. len = %d\n", len)); @@ -3705,83 +4199,83 @@ key_setsaval( goto fail; } switch (mhp->msg->sadb_msg_satype) { - case SADB_SATYPE_ESP: - if (len == PFKEY_ALIGN8(sizeof(struct sadb_key)) && - sav->alg_enc != SADB_EALG_NULL) { - ipseclog((LOG_DEBUG, "key_setsaval: invalid ESP algorithm.\n")); - error = EINVAL; + case SADB_SATYPE_ESP: + if (len == PFKEY_ALIGN8(sizeof(struct sadb_key)) && + sav->alg_enc != SADB_EALG_NULL) { + ipseclog((LOG_DEBUG, "key_setsaval: invalid ESP algorithm.\n")); + error = EINVAL; + break; + } + sav->key_enc = (struct sadb_key *)key_newbuf(key0, len); + if (sav->key_enc == NULL) { + ipseclog((LOG_DEBUG, "key_setsaval: No more memory.\n")); + error = ENOBUFS; + goto fail; + } break; - } - sav->key_enc = (struct sadb_key *)key_newbuf(key0, len); - if (sav->key_enc == NULL) { - ipseclog((LOG_DEBUG, "key_setsaval: No more memory.\n")); - error = ENOBUFS; - goto fail; - } - break; - case SADB_X_SATYPE_IPCOMP: - if (len != PFKEY_ALIGN8(sizeof(struct sadb_key))) + case SADB_X_SATYPE_IPCOMP: + if (len != PFKEY_ALIGN8(sizeof(struct sadb_key))) + error = EINVAL; + sav->key_enc = NULL; /*just in case*/ + break; + case SADB_SATYPE_AH: + default: error = EINVAL; - sav->key_enc = NULL; /*just in case*/ - break; - case SADB_SATYPE_AH: - default: - error = EINVAL; - break; + break; } if (error) { ipseclog((LOG_DEBUG, "key_setsaval: invalid key_enc value.\n")); goto fail; } } - + /* set iv */ sav->ivlen = 0; - + switch (mhp->msg->sadb_msg_satype) { - case SADB_SATYPE_ESP: + case SADB_SATYPE_ESP: #if IPSEC_ESP - algo = esp_algorithm_lookup(sav->alg_enc); - if (algo && algo->ivlen) - sav->ivlen = (*algo->ivlen)(algo, sav); - if (sav->ivlen == 0) - break; - KMALLOC_NOWAIT(sav->iv, caddr_t, sav->ivlen); - if (sav->iv == 0) { - lck_mtx_unlock(sadb_mutex); - KMALLOC_WAIT(sav->iv, caddr_t, sav->ivlen); - lck_mtx_lock(sadb_mutex); + algo = esp_algorithm_lookup(sav->alg_enc); + if (algo && algo->ivlen) + sav->ivlen = (*algo->ivlen)(algo, sav); + if (sav->ivlen == 0) + break; + KMALLOC_NOWAIT(sav->iv, caddr_t, sav->ivlen); if (sav->iv == 0) { - ipseclog((LOG_DEBUG, "key_setsaval: No more memory.\n")); - error = ENOBUFS; - goto fail; + lck_mtx_unlock(sadb_mutex); + KMALLOC_WAIT(sav->iv, caddr_t, sav->ivlen); + lck_mtx_lock(sadb_mutex); + if (sav->iv == 0) { + ipseclog((LOG_DEBUG, "key_setsaval: No more memory.\n")); + error = ENOBUFS; + goto fail; + } } - } - - /* initialize */ - key_randomfill(sav->iv, sav->ivlen); + + /* initialize */ + key_randomfill(sav->iv, sav->ivlen); #endif - break; - case SADB_SATYPE_AH: - case SADB_X_SATYPE_IPCOMP: - break; - default: - ipseclog((LOG_DEBUG, "key_setsaval: invalid SA type.\n")); - error = EINVAL; - goto fail; + break; + case SADB_SATYPE_AH: + case SADB_X_SATYPE_IPCOMP: + break; + default: + ipseclog((LOG_DEBUG, "key_setsaval: invalid SA type.\n")); + error = EINVAL; + goto fail; } - + /* reset created */ microtime(&tv); sav->created = tv.tv_sec; - + /* make lifetime for CURRENT */ KMALLOC_NOWAIT(sav->lft_c, struct sadb_lifetime *, - sizeof(struct sadb_lifetime)); + sizeof(struct sadb_lifetime)); if (sav->lft_c == NULL) { lck_mtx_unlock(sadb_mutex); KMALLOC_WAIT(sav->lft_c, struct sadb_lifetime *, - sizeof(struct sadb_lifetime)); + sizeof(struct sadb_lifetime)); lck_mtx_lock(sadb_mutex); if (sav->lft_c == NULL) { ipseclog((LOG_DEBUG, "key_setsaval: No more memory.\n")); @@ -3789,61 +4283,61 @@ key_setsaval( goto fail; } } - + microtime(&tv); - + sav->lft_c->sadb_lifetime_len = - PFKEY_UNIT64(sizeof(struct sadb_lifetime)); + PFKEY_UNIT64(sizeof(struct sadb_lifetime)); sav->lft_c->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT; sav->lft_c->sadb_lifetime_allocations = 0; sav->lft_c->sadb_lifetime_bytes = 0; sav->lft_c->sadb_lifetime_addtime = tv.tv_sec; sav->lft_c->sadb_lifetime_usetime = 0; - + /* lifetimes for HARD and SOFT */ { - const struct sadb_lifetime *lft0; - - lft0 = (struct sadb_lifetime *) + const struct sadb_lifetime *lft0; + + lft0 = (struct sadb_lifetime *) (void *)mhp->ext[SADB_EXT_LIFETIME_HARD]; - if (lft0 != NULL) { - if (mhp->extlen[SADB_EXT_LIFETIME_HARD] < sizeof(*lft0)) { - ipseclog((LOG_DEBUG, "key_setsaval: invalid hard lifetime ext len.\n")); - error = EINVAL; - goto fail; - } - sav->lft_h = (struct sadb_lifetime *)key_newbuf(lft0, - sizeof(*lft0)); - if (sav->lft_h == NULL) { - ipseclog((LOG_DEBUG, "key_setsaval: No more memory.\n")); - error = ENOBUFS; - goto fail; + if (lft0 != NULL) { + if (mhp->extlen[SADB_EXT_LIFETIME_HARD] < sizeof(*lft0)) { + ipseclog((LOG_DEBUG, "key_setsaval: invalid hard lifetime ext len.\n")); + error = EINVAL; + goto fail; + } + sav->lft_h = (struct sadb_lifetime *)key_newbuf(lft0, + sizeof(*lft0)); + if (sav->lft_h == NULL) { + ipseclog((LOG_DEBUG, "key_setsaval: No more memory.\n")); + error = ENOBUFS; + goto fail; + } + /* to be initialize ? */ } - /* to be initialize ? */ - } - - lft0 = (struct sadb_lifetime *) + + lft0 = (struct sadb_lifetime *) (void *)mhp->ext[SADB_EXT_LIFETIME_SOFT]; - if (lft0 != NULL) { - if (mhp->extlen[SADB_EXT_LIFETIME_SOFT] < sizeof(*lft0)) { - ipseclog((LOG_DEBUG, "key_setsaval: invalid soft lifetime ext len.\n")); - error = EINVAL; - goto fail; - } - sav->lft_s = (struct sadb_lifetime *)key_newbuf(lft0, - sizeof(*lft0)); - if (sav->lft_s == NULL) { - ipseclog((LOG_DEBUG, "key_setsaval: No more memory.\n")); - error = ENOBUFS; - goto fail; + if (lft0 != NULL) { + if (mhp->extlen[SADB_EXT_LIFETIME_SOFT] < sizeof(*lft0)) { + ipseclog((LOG_DEBUG, "key_setsaval: invalid soft lifetime ext len.\n")); + error = EINVAL; + goto fail; + } + sav->lft_s = (struct sadb_lifetime *)key_newbuf(lft0, + sizeof(*lft0)); + if (sav->lft_s == NULL) { + ipseclog((LOG_DEBUG, "key_setsaval: No more memory.\n")); + error = ENOBUFS; + goto fail; + } + /* to be initialize ? */ } - /* to be initialize ? */ - } } - + return 0; - - fail: + +fail: /* initialization */ if (sav->replay != NULL) { keydb_delsecreplay(sav->replay); @@ -3880,7 +4374,7 @@ key_setsaval( KFREE(sav->lft_s); sav->lft_s = NULL; } - + return error; } @@ -3915,9 +4409,9 @@ key_setsaval2(struct secasvar *sav, #endif int error = 0; struct timeval tv; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - + /* initialization */ sav->replay = NULL; sav->key_auth = NULL; @@ -3931,14 +4425,14 @@ key_setsaval2(struct secasvar *sav, sav->remote_ike_port = 0; sav->natt_last_activity = natt_now; sav->natt_encapsulated_src_port = 0; - + sav->alg_auth = alg_auth; sav->alg_enc = alg_enc; sav->flags = flags; sav->pid = pid; sav->seq = seq; key_setspi(sav, htonl(spi)); - + /* * Verify that a nat-traversal port was specified if * the nat-traversal flag is set. @@ -3951,17 +4445,17 @@ key_setsaval2(struct secasvar *sav, } sav->remote_ike_port = natt_port; } - + /* * Verify if SADB_X_EXT_NATT_MULTIPLEUSERS flag is set that - * SADB_X_EXT_NATT is set and SADB_X_EXT_NATT_KEEPALIVE is not + * SADB_X_EXT_NATT is set and SADB_X_EXT_NATT_KEEPALIVE is not * set (we're not behind nat) - otherwise clear it. */ if ((sav->flags & SADB_X_EXT_NATT_MULTIPLEUSERS) != 0) if ((sav->flags & SADB_X_EXT_NATT) == 0 || (sav->flags & SADB_X_EXT_NATT_KEEPALIVE) != 0) sav->flags &= ~SADB_X_EXT_NATT_MULTIPLEUSERS; - + /* replay window */ if ((flags & SADB_X_EXT_OLD) == 0) { sav->replay = keydb_newsecreplay(replay); @@ -3990,7 +4484,7 @@ key_setsaval2(struct secasvar *sav, /* set iv */ sav->ivlen = 0; - + if (satype == SADB_SATYPE_ESP) { #if IPSEC_ESP algo = esp_algorithm_lookup(sav->alg_enc); @@ -4013,7 +4507,7 @@ key_setsaval2(struct secasvar *sav, } #endif } - + /* reset created */ microtime(&tv); sav->created = tv.tv_sec; @@ -4109,93 +4603,93 @@ fail: */ static int key_mature( - struct secasvar *sav) + struct secasvar *sav) { int mature; int checkmask = 0; /* 2^0: ealg 2^1: aalg 2^2: calg */ int mustmask = 0; /* 2^0: ealg 2^1: aalg 2^2: calg */ - + mature = 0; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - + /* check SPI value */ switch (sav->sah->saidx.proto) { - case IPPROTO_ESP: - case IPPROTO_AH: - - /* No reason to test if this is >= 0, because ntohl(sav->spi) is unsigned. */ - if (ntohl(sav->spi) <= 255) { - ipseclog((LOG_DEBUG, - "key_mature: illegal range of SPI %u.\n", - (u_int32_t)ntohl(sav->spi))); - return EINVAL; - } - break; + case IPPROTO_ESP: + case IPPROTO_AH: + + /* No reason to test if this is >= 0, because ntohl(sav->spi) is unsigned. */ + if (ntohl(sav->spi) <= 255) { + ipseclog((LOG_DEBUG, + "key_mature: illegal range of SPI %u.\n", + (u_int32_t)ntohl(sav->spi))); + return EINVAL; + } + break; } - + /* check satype */ switch (sav->sah->saidx.proto) { - case IPPROTO_ESP: - /* check flags */ - if ((sav->flags & SADB_X_EXT_OLD) - && (sav->flags & SADB_X_EXT_DERIV)) { - ipseclog((LOG_DEBUG, "key_mature: " - "invalid flag (derived) given to old-esp.\n")); - return EINVAL; - } - if (sav->alg_auth == SADB_AALG_NONE) - checkmask = 1; - else - checkmask = 3; - mustmask = 1; - break; - case IPPROTO_AH: - /* check flags */ - if (sav->flags & SADB_X_EXT_DERIV) { - ipseclog((LOG_DEBUG, "key_mature: " - "invalid flag (derived) given to AH SA.\n")); - return EINVAL; - } - if (sav->alg_enc != SADB_EALG_NONE) { - ipseclog((LOG_DEBUG, "key_mature: " - "protocol and algorithm mismated.\n")); - return(EINVAL); - } - checkmask = 2; - mustmask = 2; - break; - case IPPROTO_IPCOMP: - if (sav->alg_auth != SADB_AALG_NONE) { - ipseclog((LOG_DEBUG, "key_mature: " - "protocol and algorithm mismated.\n")); - return(EINVAL); - } - if ((sav->flags & SADB_X_EXT_RAWCPI) == 0 - && ntohl(sav->spi) >= 0x10000) { - ipseclog((LOG_DEBUG, "key_mature: invalid cpi for IPComp.\n")); - return(EINVAL); - } - checkmask = 4; - mustmask = 4; - break; - default: - ipseclog((LOG_DEBUG, "key_mature: Invalid satype.\n")); - return EPROTONOSUPPORT; + case IPPROTO_ESP: + /* check flags */ + if ((sav->flags & SADB_X_EXT_OLD) + && (sav->flags & SADB_X_EXT_DERIV)) { + ipseclog((LOG_DEBUG, "key_mature: " + "invalid flag (derived) given to old-esp.\n")); + return EINVAL; + } + if (sav->alg_auth == SADB_AALG_NONE) + checkmask = 1; + else + checkmask = 3; + mustmask = 1; + break; + case IPPROTO_AH: + /* check flags */ + if (sav->flags & SADB_X_EXT_DERIV) { + ipseclog((LOG_DEBUG, "key_mature: " + "invalid flag (derived) given to AH SA.\n")); + return EINVAL; + } + if (sav->alg_enc != SADB_EALG_NONE) { + ipseclog((LOG_DEBUG, "key_mature: " + "protocol and algorithm mismated.\n")); + return(EINVAL); + } + checkmask = 2; + mustmask = 2; + break; + case IPPROTO_IPCOMP: + if (sav->alg_auth != SADB_AALG_NONE) { + ipseclog((LOG_DEBUG, "key_mature: " + "protocol and algorithm mismated.\n")); + return(EINVAL); + } + if ((sav->flags & SADB_X_EXT_RAWCPI) == 0 + && ntohl(sav->spi) >= 0x10000) { + ipseclog((LOG_DEBUG, "key_mature: invalid cpi for IPComp.\n")); + return(EINVAL); + } + checkmask = 4; + mustmask = 4; + break; + default: + ipseclog((LOG_DEBUG, "key_mature: Invalid satype.\n")); + return EPROTONOSUPPORT; } - + /* check authentication algorithm */ if ((checkmask & 2) != 0) { const struct ah_algorithm *algo; int keylen; - + algo = ah_algorithm_lookup(sav->alg_auth); if (!algo) { ipseclog((LOG_DEBUG,"key_mature: " - "unknown authentication algorithm.\n")); + "unknown authentication algorithm.\n")); return EINVAL; } - + /* algorithm-dependent check */ if (sav->key_auth) keylen = sav->key_auth->sadb_key_bits; @@ -4203,12 +4697,12 @@ key_mature( keylen = 0; if (keylen < algo->keymin || algo->keymax < keylen) { ipseclog((LOG_DEBUG, - "key_mature: invalid AH key length %d " - "(%d-%d allowed)\n", - keylen, algo->keymin, algo->keymax)); + "key_mature: invalid AH key length %d " + "(%d-%d allowed)\n", + keylen, algo->keymin, algo->keymax)); return EINVAL; } - + if (algo->mature) { if ((*algo->mature)(sav)) { /* message generated in per-algorithm function*/ @@ -4216,25 +4710,25 @@ key_mature( } else mature = SADB_SATYPE_AH; } - + if ((mustmask & 2) != 0 && mature != SADB_SATYPE_AH) { ipseclog((LOG_DEBUG, "key_mature: no satisfy algorithm for AH\n")); return EINVAL; } } - + /* check encryption algorithm */ if ((checkmask & 1) != 0) { #if IPSEC_ESP const struct esp_algorithm *algo; int keylen; - + algo = esp_algorithm_lookup(sav->alg_enc); if (!algo) { ipseclog((LOG_DEBUG, "key_mature: unknown encryption algorithm.\n")); return EINVAL; } - + /* algorithm-dependent check */ if (sav->key_enc) keylen = sav->key_enc->sadb_key_bits; @@ -4242,12 +4736,12 @@ key_mature( keylen = 0; if (keylen < algo->keymin || algo->keymax < keylen) { ipseclog((LOG_DEBUG, - "key_mature: invalid ESP key length %d " - "(%d-%d allowed)\n", - keylen, algo->keymin, algo->keymax)); + "key_mature: invalid ESP key length %d " + "(%d-%d allowed)\n", + keylen, algo->keymin, algo->keymax)); return EINVAL; } - + if (algo->mature) { if ((*algo->mature)(sav)) { /* message generated in per-algorithm function*/ @@ -4255,7 +4749,7 @@ key_mature( } else mature = SADB_SATYPE_ESP; } - + if ((mustmask & 1) != 0 && mature != SADB_SATYPE_ESP) { ipseclog((LOG_DEBUG, "key_mature: no satisfy algorithm for ESP\n")); return EINVAL; @@ -4265,11 +4759,11 @@ key_mature( return EINVAL; #endif } - + /* check compression algorithm */ if ((checkmask & 4) != 0) { const struct ipcomp_algorithm *algo; - + /* algorithm-dependent check */ algo = ipcomp_algorithm_lookup(sav->alg_enc); if (!algo) { @@ -4277,9 +4771,9 @@ key_mature( return EINVAL; } } - + key_sa_chgstate(sav, SADB_SASTATE_MATURE); - + return 0; } @@ -4288,11 +4782,11 @@ key_mature( */ static struct mbuf * key_setdumpsa( - struct secasvar *sav, - u_int8_t type, - u_int8_t satype, - u_int32_t seq, - u_int32_t pid) + struct secasvar *sav, + u_int8_t type, + u_int8_t satype, + u_int32_t seq, + u_int32_t pid) { struct mbuf *result = NULL, *tres = NULL, *m; int l = 0; @@ -4311,85 +4805,85 @@ key_setdumpsa( if (m == NULL) goto fail; result = m; - + for (i = sizeof(dumporder)/sizeof(dumporder[0]) - 1; i >= 0; i--) { m = NULL; p = NULL; switch (dumporder[i]) { - case SADB_EXT_SA: - m = key_setsadbsa(sav); - if (!m) - goto fail; - break; - - case SADB_X_EXT_SA2: - m = key_setsadbxsa2(sav->sah->saidx.mode, - sav->replay ? sav->replay->count : 0, - sav->sah->saidx.reqid); - if (!m) - goto fail; - break; - - case SADB_EXT_ADDRESS_SRC: - m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, - (struct sockaddr *)&sav->sah->saidx.src, - FULLMASK, IPSEC_ULPROTO_ANY); - if (!m) - goto fail; - break; - - case SADB_EXT_ADDRESS_DST: - m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, - (struct sockaddr *)&sav->sah->saidx.dst, - FULLMASK, IPSEC_ULPROTO_ANY); - if (!m) - goto fail; - break; - - case SADB_EXT_KEY_AUTH: - if (!sav->key_auth) - continue; - l = PFKEY_UNUNIT64(sav->key_auth->sadb_key_len); - p = sav->key_auth; - break; - - case SADB_EXT_KEY_ENCRYPT: - if (!sav->key_enc) - continue; - l = PFKEY_UNUNIT64(sav->key_enc->sadb_key_len); - p = sav->key_enc; - break; - - case SADB_EXT_LIFETIME_CURRENT: - if (!sav->lft_c) - continue; - l = PFKEY_UNUNIT64(((struct sadb_ext *)sav->lft_c)->sadb_ext_len); - p = sav->lft_c; - break; - - case SADB_EXT_LIFETIME_HARD: - if (!sav->lft_h) - continue; - l = PFKEY_UNUNIT64(((struct sadb_ext *)sav->lft_h)->sadb_ext_len); - p = sav->lft_h; - break; - - case SADB_EXT_LIFETIME_SOFT: - if (!sav->lft_s) + case SADB_EXT_SA: + m = key_setsadbsa(sav); + if (!m) + goto fail; + break; + + case SADB_X_EXT_SA2: + m = key_setsadbxsa2(sav->sah->saidx.mode, + sav->replay ? sav->replay->count : 0, + sav->sah->saidx.reqid); + if (!m) + goto fail; + break; + + case SADB_EXT_ADDRESS_SRC: + m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, + (struct sockaddr *)&sav->sah->saidx.src, + FULLMASK, IPSEC_ULPROTO_ANY); + if (!m) + goto fail; + break; + + case SADB_EXT_ADDRESS_DST: + m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, + (struct sockaddr *)&sav->sah->saidx.dst, + FULLMASK, IPSEC_ULPROTO_ANY); + if (!m) + goto fail; + break; + + case SADB_EXT_KEY_AUTH: + if (!sav->key_auth) + continue; + l = PFKEY_UNUNIT64(sav->key_auth->sadb_key_len); + p = sav->key_auth; + break; + + case SADB_EXT_KEY_ENCRYPT: + if (!sav->key_enc) + continue; + l = PFKEY_UNUNIT64(sav->key_enc->sadb_key_len); + p = sav->key_enc; + break; + + case SADB_EXT_LIFETIME_CURRENT: + if (!sav->lft_c) + continue; + l = PFKEY_UNUNIT64(((struct sadb_ext *)sav->lft_c)->sadb_ext_len); + p = sav->lft_c; + break; + + case SADB_EXT_LIFETIME_HARD: + if (!sav->lft_h) + continue; + l = PFKEY_UNUNIT64(((struct sadb_ext *)sav->lft_h)->sadb_ext_len); + p = sav->lft_h; + break; + + case SADB_EXT_LIFETIME_SOFT: + if (!sav->lft_s) + continue; + l = PFKEY_UNUNIT64(((struct sadb_ext *)sav->lft_s)->sadb_ext_len); + p = sav->lft_s; + break; + + case SADB_EXT_ADDRESS_PROXY: + case SADB_EXT_IDENTITY_SRC: + case SADB_EXT_IDENTITY_DST: + /* XXX: should we brought from SPD ? */ + case SADB_EXT_SENSITIVITY: + default: continue; - l = PFKEY_UNUNIT64(((struct sadb_ext *)sav->lft_s)->sadb_ext_len); - p = sav->lft_s; - break; - - case SADB_EXT_ADDRESS_PROXY: - case SADB_EXT_IDENTITY_SRC: - case SADB_EXT_IDENTITY_DST: - /* XXX: should we brought from SPD ? */ - case SADB_EXT_SENSITIVITY: - default: - continue; } - + if ((!m && !p) || (m && p)) goto fail; if (p && tres) { @@ -4405,29 +4899,29 @@ key_setdumpsa( goto fail; m_copyback(m, 0, l, p); } - + if (tres) m_cat(m, tres); tres = m; } - + m_cat(result, tres); - + if (result->m_len < sizeof(struct sadb_msg)) { result = m_pullup(result, sizeof(struct sadb_msg)); if (result == NULL) goto fail; } - + result->m_pkthdr.len = 0; for (m = result; m; m = m->m_next) result->m_pkthdr.len += m->m_len; - + mtod(result, struct sadb_msg *)->sadb_msg_len = - PFKEY_UNIT64(result->m_pkthdr.len); - + PFKEY_UNIT64(result->m_pkthdr.len); + return result; - + fail: m_freem(result); m_freem(tres); @@ -4439,17 +4933,17 @@ fail: */ static struct mbuf * key_setsadbmsg( - u_int8_t type, - u_int16_t tlen, - u_int8_t satype, - u_int32_t seq, - pid_t pid, - u_int16_t reserved) + u_int8_t type, + u_int16_t tlen, + u_int8_t satype, + u_int32_t seq, + pid_t pid, + u_int16_t reserved) { struct mbuf *m; struct sadb_msg *p; int len; - + len = PFKEY_ALIGN8(sizeof(struct sadb_msg)); if (len > MCLBYTES) return NULL; @@ -4465,9 +4959,9 @@ key_setsadbmsg( return NULL; m->m_pkthdr.len = m->m_len = len; m->m_next = NULL; - + p = mtod(m, struct sadb_msg *); - + bzero(p, len); p->sadb_msg_version = PF_KEY_V2; p->sadb_msg_type = type; @@ -4477,7 +4971,7 @@ key_setsadbmsg( p->sadb_msg_reserved = reserved; p->sadb_msg_seq = seq; p->sadb_msg_pid = (u_int32_t)pid; - + return m; } @@ -4486,12 +4980,12 @@ key_setsadbmsg( */ static struct mbuf * key_setsadbsa( - struct secasvar *sav) + struct secasvar *sav) { struct mbuf *m; struct sadb_sa *p; int len; - + len = PFKEY_ALIGN8(sizeof(struct sadb_sa)); m = key_alloc_mbuf(len); if (!m || m->m_next) { /*XXX*/ @@ -4499,9 +4993,9 @@ key_setsadbsa( m_freem(m); return NULL; } - + p = mtod(m, struct sadb_sa *); - + bzero(p, len); p->sadb_sa_len = PFKEY_UNIT64(len); p->sadb_sa_exttype = SADB_EXT_SA; @@ -4511,7 +5005,7 @@ key_setsadbsa( p->sadb_sa_auth = sav->alg_auth; p->sadb_sa_encrypt = sav->alg_enc; p->sadb_sa_flags = sav->flags; - + return m; } @@ -4520,49 +5014,85 @@ key_setsadbsa( */ static struct mbuf * key_setsadbaddr( - u_int16_t exttype, - struct sockaddr *saddr, - u_int8_t prefixlen, - u_int16_t ul_proto) + u_int16_t exttype, + struct sockaddr *saddr, + u_int8_t prefixlen, + u_int16_t ul_proto) { struct mbuf *m; struct sadb_address *p; size_t len; - + len = PFKEY_ALIGN8(sizeof(struct sadb_address)) + - PFKEY_ALIGN8(saddr->sa_len); + PFKEY_ALIGN8(saddr->sa_len); m = key_alloc_mbuf(len); if (!m || m->m_next) { /*XXX*/ if (m) m_freem(m); return NULL; } - + p = mtod(m, struct sadb_address *); - + bzero(p, len); p->sadb_address_len = PFKEY_UNIT64(len); p->sadb_address_exttype = exttype; p->sadb_address_proto = ul_proto; if (prefixlen == FULLMASK) { switch (saddr->sa_family) { - case AF_INET: - prefixlen = sizeof(struct in_addr) << 3; - break; - case AF_INET6: - prefixlen = sizeof(struct in6_addr) << 3; - break; - default: - ; /*XXX*/ + case AF_INET: + prefixlen = sizeof(struct in_addr) << 3; + break; + case AF_INET6: + prefixlen = sizeof(struct in6_addr) << 3; + break; + default: + ; /*XXX*/ } } p->sadb_address_prefixlen = prefixlen; p->sadb_address_reserved = 0; - + bcopy(saddr, - mtod(m, caddr_t) + PFKEY_ALIGN8(sizeof(struct sadb_address)), - saddr->sa_len); + mtod(m, caddr_t) + PFKEY_ALIGN8(sizeof(struct sadb_address)), + saddr->sa_len); + + return m; +} +static struct mbuf * +key_setsadbipsecif(ifnet_t internal_if, + ifnet_t outgoing_if, + ifnet_t ipsec_if, + int init_disabled) +{ + struct mbuf *m; + struct sadb_x_ipsecif *p; + size_t len; + + len = PFKEY_ALIGN8(sizeof(struct sadb_x_ipsecif)); + m = key_alloc_mbuf(len); + if (!m || m->m_next) { /*XXX*/ + if (m) + m_freem(m); + return NULL; + } + + p = mtod(m, struct sadb_x_ipsecif *); + + bzero(p, len); + p->sadb_x_ipsecif_len = PFKEY_UNIT64(len); + p->sadb_x_ipsecif_exttype = SADB_X_EXT_IPSECIF; + + if (internal_if && internal_if->if_xname) + strncpy(p->sadb_x_ipsecif_internal_if, internal_if->if_xname, IFXNAMSIZ); + if (outgoing_if && outgoing_if->if_xname) + strncpy(p->sadb_x_ipsecif_outgoing_if, outgoing_if->if_xname, IFXNAMSIZ); + if (ipsec_if && ipsec_if->if_xname) + strncpy(p->sadb_x_ipsecif_ipsec_if, ipsec_if->if_xname, IFXNAMSIZ); + + p->sadb_x_ipsecif_init_disabled = init_disabled; + return m; } @@ -4575,7 +5105,7 @@ key_setsadbsession_id (u_int64_t session_ids[]) struct mbuf *m; struct sadb_session_id *p; size_t len; - + len = PFKEY_ALIGN8(sizeof(*p)); m = key_alloc_mbuf(len); if (!m || m->m_next) { /*XXX*/ @@ -4583,15 +5113,15 @@ key_setsadbsession_id (u_int64_t session_ids[]) m_freem(m); return NULL; } - + p = mtod(m, __typeof__(p)); - + bzero(p, len); p->sadb_session_id_len = PFKEY_UNIT64(len); p->sadb_session_id_exttype = SADB_EXT_SESSION_ID; p->sadb_session_id_v[0] = session_ids[0]; p->sadb_session_id_v[1] = session_ids[1]; - + return m; } @@ -4600,17 +5130,17 @@ key_setsadbsession_id (u_int64_t session_ids[]) */ static struct mbuf * key_setsadbsastat (u_int32_t dir, - struct sastat *stats, - u_int32_t max_stats) + struct sastat *stats, + u_int32_t max_stats) { struct mbuf *m; struct sadb_sastat *p; int list_len, len; - + if (!stats) { - return NULL; + return NULL; } - + list_len = sizeof(*stats) * max_stats; len = PFKEY_ALIGN8(sizeof(*p)) + PFKEY_ALIGN8(list_len); m = key_alloc_mbuf(len); @@ -4619,20 +5149,20 @@ key_setsadbsastat (u_int32_t dir, m_freem(m); return NULL; } - + p = mtod(m, __typeof__(p)); - + bzero(p, len); p->sadb_sastat_len = PFKEY_UNIT64(len); p->sadb_sastat_exttype = SADB_EXT_SASTAT; p->sadb_sastat_dir = dir; p->sadb_sastat_list_len = max_stats; if (list_len) { - bcopy(stats, + bcopy(stats, mtod(m, caddr_t) + PFKEY_ALIGN8(sizeof(*p)), list_len); } - + return m; } @@ -4642,16 +5172,16 @@ key_setsadbsastat (u_int32_t dir, */ static struct mbuf * key_setsadbident( - u_int16_t exttype, - u_int16_t idtype, - caddr_t string, - int stringlen, - u_int64_t id) + u_int16_t exttype, + u_int16_t idtype, + caddr_t string, + int stringlen, + u_int64_t id) { struct mbuf *m; struct sadb_ident *p; size_t len; - + len = PFKEY_ALIGN8(sizeof(struct sadb_ident)) + PFKEY_ALIGN8(stringlen); m = key_alloc_mbuf(len); if (!m || m->m_next) { /*XXX*/ @@ -4659,20 +5189,20 @@ key_setsadbident( m_freem(m); return NULL; } - + p = mtod(m, struct sadb_ident *); - + bzero(p, len); p->sadb_ident_len = PFKEY_UNIT64(len); p->sadb_ident_exttype = exttype; p->sadb_ident_type = idtype; p->sadb_ident_reserved = 0; p->sadb_ident_id = id; - + bcopy(string, - mtod(m, caddr_t) + PFKEY_ALIGN8(sizeof(struct sadb_ident)), - stringlen); - + mtod(m, caddr_t) + PFKEY_ALIGN8(sizeof(struct sadb_ident)), + stringlen); + return m; } #endif @@ -4682,14 +5212,14 @@ key_setsadbident( */ static struct mbuf * key_setsadbxsa2( - u_int8_t mode, - u_int32_t seq, - u_int32_t reqid) + u_int8_t mode, + u_int32_t seq, + u_int32_t reqid) { struct mbuf *m; struct sadb_x_sa2 *p; size_t len; - + len = PFKEY_ALIGN8(sizeof(struct sadb_x_sa2)); m = key_alloc_mbuf(len); if (!m || m->m_next) { /*XXX*/ @@ -4697,9 +5227,9 @@ key_setsadbxsa2( m_freem(m); return NULL; } - + p = mtod(m, struct sadb_x_sa2 *); - + bzero(p, len); p->sadb_x_sa2_len = PFKEY_UNIT64(len); p->sadb_x_sa2_exttype = SADB_X_EXT_SA2; @@ -4708,7 +5238,7 @@ key_setsadbxsa2( p->sadb_x_sa2_reserved2 = 0; p->sadb_x_sa2_sequence = seq; p->sadb_x_sa2_reqid = reqid; - + return m; } @@ -4717,14 +5247,14 @@ key_setsadbxsa2( */ static struct mbuf * key_setsadbxpolicy( - u_int16_t type, - u_int8_t dir, - u_int32_t id) + u_int16_t type, + u_int8_t dir, + u_int32_t id) { struct mbuf *m; struct sadb_x_policy *p; size_t len; - + len = PFKEY_ALIGN8(sizeof(struct sadb_x_policy)); m = key_alloc_mbuf(len); if (!m || m->m_next) { /*XXX*/ @@ -4732,16 +5262,16 @@ key_setsadbxpolicy( m_freem(m); return NULL; } - + p = mtod(m, struct sadb_x_policy *); - + bzero(p, len); p->sadb_x_policy_len = PFKEY_UNIT64(len); p->sadb_x_policy_exttype = SADB_X_EXT_POLICY; p->sadb_x_policy_type = type; p->sadb_x_policy_dir = dir; p->sadb_x_policy_id = id; - + return m; } @@ -4751,11 +5281,11 @@ key_setsadbxpolicy( */ static void * key_newbuf( - const void *src, - u_int len) + const void *src, + u_int len) { caddr_t new; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); KMALLOC_NOWAIT(new, caddr_t, len); if (new == NULL) { @@ -4768,7 +5298,7 @@ key_newbuf( } } bcopy(src, new, len); - + return new; } @@ -4778,44 +5308,44 @@ key_newbuf( */ int key_ismyaddr( - struct sockaddr *sa) + struct sockaddr *sa) { #if INET struct sockaddr_in *sin; struct in_ifaddr *ia; #endif - + /* sanity check */ if (sa == NULL) panic("key_ismyaddr: NULL pointer is passed.\n"); - + switch (sa->sa_family) { #if INET - case AF_INET: - lck_rw_lock_shared(in_ifaddr_rwlock); - sin = (struct sockaddr_in *)(void *)sa; - for (ia = in_ifaddrhead.tqh_first; ia; - ia = ia->ia_link.tqe_next) { - IFA_LOCK_SPIN(&ia->ia_ifa); - if (sin->sin_family == ia->ia_addr.sin_family && - sin->sin_len == ia->ia_addr.sin_len && - sin->sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr) - { + case AF_INET: + lck_rw_lock_shared(in_ifaddr_rwlock); + sin = (struct sockaddr_in *)(void *)sa; + for (ia = in_ifaddrhead.tqh_first; ia; + ia = ia->ia_link.tqe_next) { + IFA_LOCK_SPIN(&ia->ia_ifa); + if (sin->sin_family == ia->ia_addr.sin_family && + sin->sin_len == ia->ia_addr.sin_len && + sin->sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr) + { + IFA_UNLOCK(&ia->ia_ifa); + lck_rw_done(in_ifaddr_rwlock); + return 1; + } IFA_UNLOCK(&ia->ia_ifa); - lck_rw_done(in_ifaddr_rwlock); - return 1; } - IFA_UNLOCK(&ia->ia_ifa); - } - lck_rw_done(in_ifaddr_rwlock); - break; + lck_rw_done(in_ifaddr_rwlock); + break; #endif #if INET6 - case AF_INET6: - return key_ismyaddr6((struct sockaddr_in6 *)(void *)sa); + case AF_INET6: + return key_ismyaddr6((struct sockaddr_in6 *)(void *)sa); #endif } - + return 0; } @@ -4830,22 +5360,22 @@ key_ismyaddr( static int key_ismyaddr6( - struct sockaddr_in6 *sin6) + struct sockaddr_in6 *sin6) { struct in6_ifaddr *ia; struct in6_multi *in6m; - + lck_rw_lock_shared(&in6_ifaddr_rwlock); for (ia = in6_ifaddrs; ia; ia = ia->ia_next) { IFA_LOCK(&ia->ia_ifa); if (key_sockaddrcmp((struct sockaddr *)&sin6, - (struct sockaddr *)&ia->ia_addr, 0) == 0) { + (struct sockaddr *)&ia->ia_addr, 0) == 0) { IFA_UNLOCK(&ia->ia_ifa); lck_rw_done(&in6_ifaddr_rwlock); return 1; } IFA_UNLOCK(&ia->ia_ifa); - + /* * XXX Multicast * XXX why do we care about multlicast here while we don't care @@ -4863,11 +5393,11 @@ key_ismyaddr6( } } lck_rw_done(&in6_ifaddr_rwlock); - + /* loopback, just for safety */ if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr)) return 1; - + return 0; } #endif /*INET6*/ @@ -4877,29 +5407,29 @@ key_ismyaddr6( * flag can specify to compare 2 saidxes. * compare two secasindex structure without both mode and reqid. * don't compare port. - * IN: + * IN: * saidx0: source, it can be in SAD. * saidx1: object. - * OUT: + * OUT: * 1 : equal * 0 : not equal */ static int key_cmpsaidx( - struct secasindex *saidx0, - struct secasindex *saidx1, - int flag) + struct secasindex *saidx0, + struct secasindex *saidx1, + int flag) { /* sanity */ if (saidx0 == NULL && saidx1 == NULL) return 1; - + if (saidx0 == NULL || saidx1 == NULL) return 0; - + if (saidx0->proto != saidx1->proto) return 0; - + if (flag == CMP_EXACTLY) { if (saidx0->mode != saidx1->mode) return 0; @@ -4909,7 +5439,7 @@ key_cmpsaidx( bcmp(&saidx0->dst, &saidx1->dst, saidx0->dst.ss_len) != 0) return 0; } else { - + /* CMP_MODE_REQID, CMP_REQID, CMP_HEAD */ if (flag & CMP_REQID) { /* @@ -4919,23 +5449,23 @@ key_cmpsaidx( if (saidx1->reqid != 0 && saidx0->reqid != saidx1->reqid) return 0; } - + if (flag & CMP_MODE) { if (saidx0->mode != IPSEC_MODE_ANY - && saidx0->mode != saidx1->mode) + && saidx0->mode != saidx1->mode) return 0; } - + if (key_sockaddrcmp((struct sockaddr *)&saidx0->src, - (struct sockaddr *)&saidx1->src, flag & CMP_PORT ? 1 : 0) != 0) { + (struct sockaddr *)&saidx1->src, flag & CMP_PORT ? 1 : 0) != 0) { return 0; } if (key_sockaddrcmp((struct sockaddr *)&saidx0->dst, - (struct sockaddr *)&saidx1->dst, flag & CMP_PORT ? 1 : 0) != 0) { + (struct sockaddr *)&saidx1->dst, flag & CMP_PORT ? 1 : 0) != 0) { return 0; } } - + return 1; } @@ -4950,30 +5480,48 @@ key_cmpsaidx( */ static int key_cmpspidx_exactly( - struct secpolicyindex *spidx0, - struct secpolicyindex *spidx1) + struct secpolicyindex *spidx0, + struct secpolicyindex *spidx1) { /* sanity */ if (spidx0 == NULL && spidx1 == NULL) return 1; - + if (spidx0 == NULL || spidx1 == NULL) return 0; - + if (spidx0->prefs != spidx1->prefs - || spidx0->prefd != spidx1->prefd - || spidx0->ul_proto != spidx1->ul_proto) + || spidx0->prefd != spidx1->prefd + || spidx0->ul_proto != spidx1->ul_proto + || spidx0->internal_if != spidx1->internal_if) return 0; - + if (key_sockaddrcmp((struct sockaddr *)&spidx0->src, - (struct sockaddr *)&spidx1->src, 1) != 0) { + (struct sockaddr *)&spidx1->src, 1) != 0) { return 0; } if (key_sockaddrcmp((struct sockaddr *)&spidx0->dst, - (struct sockaddr *)&spidx1->dst, 1) != 0) { + (struct sockaddr *)&spidx1->dst, 1) != 0) { return 0; } - + + if (key_sockaddrcmp((struct sockaddr *)&spidx0->src_range.start, + (struct sockaddr *)&spidx1->src_range.start, 1) != 0) { + return 0; + } + if (key_sockaddrcmp((struct sockaddr *)&spidx0->src_range.end, + (struct sockaddr *)&spidx1->src_range.end, 1) != 0) { + return 0; + } + if (key_sockaddrcmp((struct sockaddr *)&spidx0->dst_range.start, + (struct sockaddr *)&spidx1->dst_range.start, 1) != 0) { + return 0; + } + if (key_sockaddrcmp((struct sockaddr *)&spidx0->dst_range.end, + (struct sockaddr *)&spidx1->dst_range.end, 1) != 0) { + return 0; + } + return 1; } @@ -4988,146 +5536,250 @@ key_cmpspidx_exactly( */ static int key_cmpspidx_withmask( - struct secpolicyindex *spidx0, - struct secpolicyindex *spidx1) + struct secpolicyindex *spidx0, + struct secpolicyindex *spidx1) { + int spidx0_src_is_range = 0; + int spidx0_dst_is_range = 0; + /* sanity */ if (spidx0 == NULL && spidx1 == NULL) return 1; - + if (spidx0 == NULL || spidx1 == NULL) return 0; - - if (spidx0->src.ss_family != spidx1->src.ss_family || - spidx0->dst.ss_family != spidx1->dst.ss_family || - spidx0->src.ss_len != spidx1->src.ss_len || - spidx0->dst.ss_len != spidx1->dst.ss_len) + + if (spidx0->src_range.start.ss_len > 0) + spidx0_src_is_range = 1; + + if (spidx0->dst_range.start.ss_len > 0) + spidx0_dst_is_range = 1; + + if ((spidx0_src_is_range ? spidx0->src_range.start.ss_family : spidx0->src.ss_family) != spidx1->src.ss_family || + (spidx0_dst_is_range ? spidx0->dst_range.start.ss_family : spidx0->dst.ss_family) != spidx1->dst.ss_family || + (spidx0_src_is_range ? spidx0->src_range.start.ss_len : spidx0->src.ss_len) != spidx1->src.ss_len || + (spidx0_dst_is_range ? spidx0->dst_range.start.ss_len : spidx0->dst.ss_len) != spidx1->dst.ss_len) return 0; - + /* if spidx.ul_proto == IPSEC_ULPROTO_ANY, ignore. */ if (spidx0->ul_proto != (u_int16_t)IPSEC_ULPROTO_ANY - && spidx0->ul_proto != spidx1->ul_proto) + && spidx0->ul_proto != spidx1->ul_proto) return 0; - - switch (spidx0->src.ss_family) { - case AF_INET: - if (satosin(&spidx0->src)->sin_port != IPSEC_PORT_ANY - && satosin(&spidx0->src)->sin_port != - satosin(&spidx1->src)->sin_port) - return 0; - if (!key_bbcmp((caddr_t)&satosin(&spidx0->src)->sin_addr, - (caddr_t)&satosin(&spidx1->src)->sin_addr, spidx0->prefs)) - return 0; - break; - case AF_INET6: - if (satosin6(&spidx0->src)->sin6_port != IPSEC_PORT_ANY - && satosin6(&spidx0->src)->sin6_port != - satosin6(&spidx1->src)->sin6_port) - return 0; - /* - * scope_id check. if sin6_scope_id is 0, we regard it - * as a wildcard scope, which matches any scope zone ID. - */ - if (satosin6(&spidx0->src)->sin6_scope_id && - satosin6(&spidx1->src)->sin6_scope_id && - satosin6(&spidx0->src)->sin6_scope_id != - satosin6(&spidx1->src)->sin6_scope_id) - return 0; - if (!key_bbcmp((caddr_t)&satosin6(&spidx0->src)->sin6_addr, - (caddr_t)&satosin6(&spidx1->src)->sin6_addr, spidx0->prefs)) - return 0; - break; - default: - /* XXX */ - if (bcmp(&spidx0->src, &spidx1->src, spidx0->src.ss_len) != 0) - return 0; - break; - } - - switch (spidx0->dst.ss_family) { - case AF_INET: - if (satosin(&spidx0->dst)->sin_port != IPSEC_PORT_ANY - && satosin(&spidx0->dst)->sin_port != - satosin(&spidx1->dst)->sin_port) - return 0; - if (!key_bbcmp((caddr_t)&satosin(&spidx0->dst)->sin_addr, - (caddr_t)&satosin(&spidx1->dst)->sin_addr, spidx0->prefd)) - return 0; - break; - case AF_INET6: - if (satosin6(&spidx0->dst)->sin6_port != IPSEC_PORT_ANY - && satosin6(&spidx0->dst)->sin6_port != - satosin6(&spidx1->dst)->sin6_port) - return 0; - /* - * scope_id check. if sin6_scope_id is 0, we regard it - * as a wildcard scope, which matches any scope zone ID. - */ - if (satosin6(&spidx0->src)->sin6_scope_id && - satosin6(&spidx1->src)->sin6_scope_id && - satosin6(&spidx0->dst)->sin6_scope_id != - satosin6(&spidx1->dst)->sin6_scope_id) - return 0; - if (!key_bbcmp((caddr_t)&satosin6(&spidx0->dst)->sin6_addr, - (caddr_t)&satosin6(&spidx1->dst)->sin6_addr, spidx0->prefd)) - return 0; - break; - default: - /* XXX */ - if (bcmp(&spidx0->dst, &spidx1->dst, spidx0->dst.ss_len) != 0) - return 0; - break; - } - + + /* If spidx1 specifies interface, ignore src addr */ + if (spidx1->internal_if != NULL) { + if (spidx0->internal_if == NULL + || spidx0->internal_if != spidx1->internal_if) + return 0; + + /* Still check ports */ + switch (spidx0->src.ss_family) { + case AF_INET: + if (spidx0_src_is_range && + (satosin(&spidx1->src)->sin_port < satosin(&spidx0->src_range.start)->sin_port + || satosin(&spidx1->src)->sin_port > satosin(&spidx0->src_range.end)->sin_port)) + return 0; + else if (satosin(&spidx0->src)->sin_port != IPSEC_PORT_ANY + && satosin(&spidx0->src)->sin_port != + satosin(&spidx1->src)->sin_port) + return 0; + break; + case AF_INET6: + if (spidx0_src_is_range && + (satosin6(&spidx1->src)->sin6_port < satosin6(&spidx0->src_range.start)->sin6_port + || satosin6(&spidx1->src)->sin6_port > satosin6(&spidx0->src_range.end)->sin6_port)) + return 0; + else if (satosin6(&spidx0->src)->sin6_port != IPSEC_PORT_ANY + && satosin6(&spidx0->src)->sin6_port != + satosin6(&spidx1->src)->sin6_port) + return 0; + break; + default: + break; + } + } else if (spidx0_src_is_range) { + if (!key_is_addr_in_range(&spidx1->src, &spidx0->src_range)) + return 0; + } else { + switch (spidx0->src.ss_family) { + case AF_INET: + if (satosin(&spidx0->src)->sin_port != IPSEC_PORT_ANY + && satosin(&spidx0->src)->sin_port != + satosin(&spidx1->src)->sin_port) + return 0; + if (!key_bbcmp((caddr_t)&satosin(&spidx0->src)->sin_addr, + (caddr_t)&satosin(&spidx1->src)->sin_addr, spidx0->prefs)) + return 0; + break; + case AF_INET6: + if (satosin6(&spidx0->src)->sin6_port != IPSEC_PORT_ANY + && satosin6(&spidx0->src)->sin6_port != + satosin6(&spidx1->src)->sin6_port) + return 0; + /* + * scope_id check. if sin6_scope_id is 0, we regard it + * as a wildcard scope, which matches any scope zone ID. + */ + if (satosin6(&spidx0->src)->sin6_scope_id && + satosin6(&spidx1->src)->sin6_scope_id && + satosin6(&spidx0->src)->sin6_scope_id != + satosin6(&spidx1->src)->sin6_scope_id) + return 0; + if (!key_bbcmp((caddr_t)&satosin6(&spidx0->src)->sin6_addr, + (caddr_t)&satosin6(&spidx1->src)->sin6_addr, spidx0->prefs)) + return 0; + break; + default: + /* XXX */ + if (bcmp(&spidx0->src, &spidx1->src, spidx0->src.ss_len) != 0) + return 0; + break; + } + } + + if (spidx0_dst_is_range) { + if (!key_is_addr_in_range(&spidx1->dst, &spidx0->dst_range)) + return 0; + } else { + switch (spidx0->dst.ss_family) { + case AF_INET: + if (satosin(&spidx0->dst)->sin_port != IPSEC_PORT_ANY + && satosin(&spidx0->dst)->sin_port != + satosin(&spidx1->dst)->sin_port) + return 0; + if (!key_bbcmp((caddr_t)&satosin(&spidx0->dst)->sin_addr, + (caddr_t)&satosin(&spidx1->dst)->sin_addr, spidx0->prefd)) + return 0; + break; + case AF_INET6: + if (satosin6(&spidx0->dst)->sin6_port != IPSEC_PORT_ANY + && satosin6(&spidx0->dst)->sin6_port != + satosin6(&spidx1->dst)->sin6_port) + return 0; + /* + * scope_id check. if sin6_scope_id is 0, we regard it + * as a wildcard scope, which matches any scope zone ID. + */ + if (satosin6(&spidx0->src)->sin6_scope_id && + satosin6(&spidx1->src)->sin6_scope_id && + satosin6(&spidx0->dst)->sin6_scope_id != + satosin6(&spidx1->dst)->sin6_scope_id) + return 0; + if (!key_bbcmp((caddr_t)&satosin6(&spidx0->dst)->sin6_addr, + (caddr_t)&satosin6(&spidx1->dst)->sin6_addr, spidx0->prefd)) + return 0; + break; + default: + /* XXX */ + if (bcmp(&spidx0->dst, &spidx1->dst, spidx0->dst.ss_len) != 0) + return 0; + break; + } + } + /* XXX Do we check other field ? e.g. flowinfo */ - + return 1; } -/* returns 0 on match */ +static int +key_is_addr_in_range(struct sockaddr_storage *addr, struct secpolicyaddrrange *addr_range) +{ + int cmp = 0; + + if (addr == NULL || addr_range == NULL) + return 0; + + /* Must be greater than or equal to start */ + cmp = key_sockaddrcmp((struct sockaddr *)addr, (struct sockaddr *)&addr_range->start, 1); + if (cmp != 0 && cmp != 1) + return 0; + + /* Must be less than or equal to end */ + cmp = key_sockaddrcmp((struct sockaddr *)addr, (struct sockaddr *)&addr_range->end, 1); + if (cmp != 0 && cmp != -1) + return 0; + + return 1; +} + +/* + Return values: + -1: sa1 < sa2 + 0: sa1 == sa2 + 1: sa1 > sa2 + 2: Not comparable or error + */ static int key_sockaddrcmp( - struct sockaddr *sa1, - struct sockaddr *sa2, - int port) + struct sockaddr *sa1, + struct sockaddr *sa2, + int port) { + int result = 0; + int port_result = 0; + if (sa1->sa_family != sa2->sa_family || sa1->sa_len != sa2->sa_len) - return 1; - + return 2; + + if (sa1->sa_len == 0) + return 0; + switch (sa1->sa_family) { - case AF_INET: - if (sa1->sa_len != sizeof(struct sockaddr_in)) - return 1; - if (satosin(sa1)->sin_addr.s_addr != - satosin(sa2)->sin_addr.s_addr) { - return 1; - } - if (port && satosin(sa1)->sin_port != satosin(sa2)->sin_port) - return 1; - break; - case AF_INET6: - if (sa1->sa_len != sizeof(struct sockaddr_in6)) - return 1; /*EINVAL*/ - if (satosin6(sa1)->sin6_scope_id != - satosin6(sa2)->sin6_scope_id) { - return 1; - } - if (!IN6_ARE_ADDR_EQUAL(&satosin6(sa1)->sin6_addr, - &satosin6(sa2)->sin6_addr)) { - return 1; - } - if (port && - satosin6(sa1)->sin6_port != satosin6(sa2)->sin6_port) { - return 1; - } - break; - default: - if (bcmp(sa1, sa2, sa1->sa_len) != 0) - return 1; - break; + case AF_INET: + if (sa1->sa_len != sizeof(struct sockaddr_in)) + return 2; + + result = memcmp(&satosin(sa1)->sin_addr.s_addr, &satosin(sa2)->sin_addr.s_addr, sizeof(satosin(sa1)->sin_addr.s_addr)); + + if (port) { + if (satosin(sa1)->sin_port < satosin(sa2)->sin_port) { + port_result = -1; + } else if (satosin(sa1)->sin_port > satosin(sa2)->sin_port) { + port_result = 1; + } + + if (result == 0) + result = port_result; + else if ((result > 0 && port_result < 0) || (result < 0 && port_result > 0)) + return 2; + } + + break; + case AF_INET6: + if (sa1->sa_len != sizeof(struct sockaddr_in6)) + return 2; /*EINVAL*/ + + if (satosin6(sa1)->sin6_scope_id != + satosin6(sa2)->sin6_scope_id) { + return 2; + } + + result = memcmp(&satosin6(sa1)->sin6_addr.s6_addr[0], &satosin6(sa2)->sin6_addr.s6_addr[0], sizeof(struct in6_addr)); + + if (port) { + if (satosin6(sa1)->sin6_port < satosin6(sa2)->sin6_port) { + port_result = -1; + } else if (satosin6(sa1)->sin6_port > satosin6(sa2)->sin6_port) { + port_result = 1; + } + + if (result == 0) + result = port_result; + else if ((result > 0 && port_result < 0) || (result < 0 && port_result > 0)) + return 2; + } + + break; + default: + result = memcmp(sa1, sa2, sa1->sa_len); + break; } - - return 0; + + if (result < 0) result = -1; + else if (result > 0) result = 1; + + return result; } /* @@ -5142,25 +5794,25 @@ key_sockaddrcmp( */ static int key_bbcmp( - caddr_t p1, - caddr_t p2, - u_int bits) + caddr_t p1, + caddr_t p2, + u_int bits) { u_int8_t mask; - + /* XXX: This could be considerably faster if we compare a word * at a time, but it is complicated on LSB Endian machines */ - + /* Handle null pointers */ if (p1 == NULL || p2 == NULL) return (p1 == p2); - + while (bits >= 8) { if (*p1++ != *p2++) return 0; bits -= 8; } - + if (bits > 0) { mask = ~((1<<(8-bits))-1); if ((*p1 & mask) != (*p2 & mask)) @@ -5187,9 +5839,10 @@ key_timehandler(void) struct secasvar **savexbuf = NULL, **savexptr = NULL; struct secasvar **savkabuf = NULL, **savkaptr = NULL; int spbufcount = 0, savbufcount = 0, spcount = 0, savexcount = 0, savkacount = 0, cnt; - - microtime(&tv); + int stop_handler = 1; /* stop the timehandler */ + microtime(&tv); + /* pre-allocate buffers before taking the lock */ /* if allocation failures occur - portions of the processing will be skipped */ if ((spbufcount = ipsec_policy_count) != 0) { @@ -5210,22 +5863,25 @@ key_timehandler(void) lck_mtx_lock(sadb_mutex); /* SPD */ if (spbuf) { - + struct secpolicy *sp, *nextsp; - + for (dir = 0; dir < IPSEC_DIR_MAX; dir++) { for (sp = LIST_FIRST(&sptree[dir]); sp != NULL; sp = nextsp) { - - spd_count++; + + /* don't prevent timehandler from stopping for generate policy */ + if (sp->policy != IPSEC_POLICY_GENERATE) + stop_handler = 0; + spd_count++; nextsp = LIST_NEXT(sp, chain); - + if (sp->state == IPSEC_SPSTATE_DEAD) { key_freesp(sp, KEY_SADB_LOCKED); continue; } - + if (sp->lifetime == 0 && sp->validtime == 0) continue; if (spbuf && spcount < spbufcount) { @@ -5233,57 +5889,78 @@ key_timehandler(void) if ((sp->lifetime && tv.tv_sec - sp->created > sp->lifetime) || (sp->validtime - && tv.tv_sec - sp->lastused > sp->validtime)) { - //key_spdexpire(sp); - sp->state = IPSEC_SPSTATE_DEAD; - sp->refcnt++; - *spptr++ = sp; - spcount++; - } + && tv.tv_sec - sp->lastused > sp->validtime)) { + //key_spdexpire(sp); + sp->state = IPSEC_SPSTATE_DEAD; + sp->refcnt++; + *spptr++ = sp; + spcount++; + } } } } } - + /* SAD */ - if (savbufcount != 0) { + { struct secashead *sah, *nextsah; struct secasvar *sav, *nextsav; - + for (sah = LIST_FIRST(&sahtree); sah != NULL; sah = nextsah) { - - sah_count++; + + sah_count++; nextsah = LIST_NEXT(sah, chain); - + /* if sah has been dead, then delete it and process next sah. */ if (sah->state == SADB_SASTATE_DEAD) { key_delsah(sah); dead_sah_count++; continue; } - + if (LIST_FIRST(&sah->savtree[SADB_SASTATE_LARVAL]) == NULL && - LIST_FIRST(&sah->savtree[SADB_SASTATE_MATURE]) == NULL && - LIST_FIRST(&sah->savtree[SADB_SASTATE_DYING]) == NULL && + LIST_FIRST(&sah->savtree[SADB_SASTATE_MATURE]) == NULL && + LIST_FIRST(&sah->savtree[SADB_SASTATE_DYING]) == NULL && LIST_FIRST(&sah->savtree[SADB_SASTATE_DEAD]) == NULL) { - key_delsah(sah); + key_delsah(sah); empty_sah_count++; continue; } - + + if (savbufcount == 0) { + continue; + } + + stop_handler = 0; + /* if LARVAL entry doesn't become MATURE, delete it. */ for (sav = LIST_FIRST(&sah->savtree[SADB_SASTATE_LARVAL]); sav != NULL; sav = nextsav) { - - larval_sav_count++; + + larval_sav_count++; total_sav_count++; nextsav = LIST_NEXT(sav, chain); - - if (tv.tv_sec - sav->created > key_larval_lifetime) { - key_freesav(sav, KEY_SADB_LOCKED); + + if (sav->lft_h != NULL) { + /* If a hard lifetime is defined for the LARVAL SA, use it */ + if (sav->lft_h->sadb_lifetime_addtime != 0 + && tv.tv_sec - sav->created > sav->lft_h->sadb_lifetime_addtime) { + if (sav->always_expire) { + key_send_delete(sav); + sav = NULL; + } else { + key_sa_chgstate(sav, SADB_SASTATE_DEAD); + key_freesav(sav, KEY_SADB_LOCKED); + sav = NULL; + } + } + } else { + if (tv.tv_sec - sav->created > key_larval_lifetime) { + key_freesav(sav, KEY_SADB_LOCKED); + } } } @@ -5314,42 +5991,42 @@ key_timehandler(void) for (sav = LIST_FIRST(&sah->savtree[SADB_SASTATE_MATURE]); sav != NULL; sav = nextsav) { - - mature_sav_count++; + + mature_sav_count++; total_sav_count++; nextsav = LIST_NEXT(sav, chain); - + /* we don't need to check. */ if (sav->lft_s == NULL) continue; - + /* sanity check */ if (sav->lft_c == NULL) { ipseclog((LOG_DEBUG,"key_timehandler: " - "There is no CURRENT time, why?\n")); + "There is no CURRENT time, why?\n")); continue; } - + /* check SOFT lifetime */ if (sav->lft_s->sadb_lifetime_addtime != 0 - && tv.tv_sec - sav->created > sav->lft_s->sadb_lifetime_addtime) { + && tv.tv_sec - sav->created > sav->lft_s->sadb_lifetime_addtime) { /* - * check the SA if it has been used. - * when it hasn't been used, delete it. - * i don't think such SA will be used. + * If always_expire is set, expire. Otherwise, + * if the SA has not been used, delete immediately. */ - if (sav->lft_c->sadb_lifetime_usetime == 0) { + if (sav->lft_c->sadb_lifetime_usetime == 0 + && sav->always_expire == 0) { key_sa_chgstate(sav, SADB_SASTATE_DEAD); key_freesav(sav, KEY_SADB_LOCKED); sav = NULL; } else if (savexbuf && savexcount < savbufcount) { - key_sa_chgstate(sav, SADB_SASTATE_DYING); + key_sa_chgstate(sav, SADB_SASTATE_DYING); sav->refcnt++; *savexptr++ = sav; savexcount++; } } - + /* check SOFT lifetime by bytes */ /* * XXX I don't know the way to delete this SA @@ -5357,9 +6034,9 @@ key_timehandler(void) * installed too big lifetime by time. */ else if (savexbuf && savexcount < savbufcount - && sav->lft_s->sadb_lifetime_bytes != 0 - && sav->lft_s->sadb_lifetime_bytes < sav->lft_c->sadb_lifetime_bytes) { - + && sav->lft_s->sadb_lifetime_bytes != 0 + && sav->lft_s->sadb_lifetime_bytes < sav->lft_c->sadb_lifetime_bytes) { + /* * XXX If we keep to send expire * message in the status of @@ -5372,43 +6049,48 @@ key_timehandler(void) savexcount++; } } - + /* check DYING entry to change status to DEAD. */ for (sav = LIST_FIRST(&sah->savtree[SADB_SASTATE_DYING]); sav != NULL; sav = nextsav) { - - dying_sav_count++; + + dying_sav_count++; total_sav_count++; nextsav = LIST_NEXT(sav, chain); - + /* we don't need to check. */ if (sav->lft_h == NULL) continue; - + /* sanity check */ if (sav->lft_c == NULL) { ipseclog((LOG_DEBUG, "key_timehandler: " - "There is no CURRENT time, why?\n")); + "There is no CURRENT time, why?\n")); continue; } - + if (sav->lft_h->sadb_lifetime_addtime != 0 - && tv.tv_sec - sav->created > sav->lft_h->sadb_lifetime_addtime) { - key_sa_chgstate(sav, SADB_SASTATE_DEAD); - key_freesav(sav, KEY_SADB_LOCKED); - sav = NULL; + && tv.tv_sec - sav->created > sav->lft_h->sadb_lifetime_addtime) { + if (sav->always_expire) { + key_send_delete(sav); + sav = NULL; + } else { + key_sa_chgstate(sav, SADB_SASTATE_DEAD); + key_freesav(sav, KEY_SADB_LOCKED); + sav = NULL; + } } #if 0 /* XXX Should we keep to send expire message until HARD lifetime ? */ else if (savbuf && savexcount < savbufcount - && sav->lft_s != NULL - && sav->lft_s->sadb_lifetime_addtime != 0 - && tv.tv_sec - sav->created > sav->lft_s->sadb_lifetime_addtime) { + && sav->lft_s != NULL + && sav->lft_s->sadb_lifetime_addtime != 0 + && tv.tv_sec - sav->created > sav->lft_s->sadb_lifetime_addtime) { /* * XXX: should be checked to be * installed the valid SA. */ - + /* * If there is no SA then sending * expire message. @@ -5421,31 +6103,31 @@ key_timehandler(void) #endif /* check HARD lifetime by bytes */ else if (sav->lft_h->sadb_lifetime_bytes != 0 - && sav->lft_h->sadb_lifetime_bytes < sav->lft_c->sadb_lifetime_bytes) { + && sav->lft_h->sadb_lifetime_bytes < sav->lft_c->sadb_lifetime_bytes) { key_sa_chgstate(sav, SADB_SASTATE_DEAD); key_freesav(sav, KEY_SADB_LOCKED); sav = NULL; } } - + /* delete entry in DEAD */ for (sav = LIST_FIRST(&sah->savtree[SADB_SASTATE_DEAD]); sav != NULL; sav = nextsav) { - - dead_sav_count++; + + dead_sav_count++; total_sav_count++; nextsav = LIST_NEXT(sav, chain); - + /* sanity check */ if (sav->state != SADB_SASTATE_DEAD) { ipseclog((LOG_DEBUG, "key_timehandler: " - "invalid sav->state " - "(queue: %d SA: %d): " - "kill it anyway\n", - SADB_SASTATE_DEAD, sav->state)); + "invalid sav->state " + "(queue: %d SA: %d): " + "kill it anyway\n", + SADB_SASTATE_DEAD, sav->state)); } - + /* * do not call key_freesav() here. * sav should already be freed, and sav->refcnt @@ -5454,72 +6136,74 @@ key_timehandler(void) */ } } - } - - if (++key_timehandler_debug >= 300) { - if (key_debug_level) { - printf("%s: total stats for %u calls\n", __FUNCTION__, key_timehandler_debug); - printf("%s: walked %u SPDs\n", __FUNCTION__, spd_count); - printf("%s: walked %llu SAs: LARVAL SAs %u, MATURE SAs %u, DYING SAs %u, DEAD SAs %u\n", __FUNCTION__, - total_sav_count, larval_sav_count, mature_sav_count, dying_sav_count, dead_sav_count); - printf("%s: walked %u SAHs: DEAD SAHs %u, EMPTY SAHs %u\n", __FUNCTION__, - sah_count, dead_sah_count, empty_sah_count); - if (sah_search_calls) { - printf("%s: SAH search cost %d iters per call\n", __FUNCTION__, - (sah_search_count/sah_search_calls)); - } - } - spd_count = 0; - sah_count = 0; - dead_sah_count = 0; - empty_sah_count = 0; - larval_sav_count = 0; - mature_sav_count = 0; - dying_sav_count = 0; - dead_sav_count = 0; - total_sav_count = 0; - sah_search_count = 0; - sah_search_calls = 0; - key_timehandler_debug = 0; - } + } + + if (++key_timehandler_debug >= 300) { + if (key_debug_level) { + printf("%s: total stats for %u calls\n", __FUNCTION__, key_timehandler_debug); + printf("%s: walked %u SPDs\n", __FUNCTION__, spd_count); + printf("%s: walked %llu SAs: LARVAL SAs %u, MATURE SAs %u, DYING SAs %u, DEAD SAs %u\n", __FUNCTION__, + total_sav_count, larval_sav_count, mature_sav_count, dying_sav_count, dead_sav_count); + printf("%s: walked %u SAHs: DEAD SAHs %u, EMPTY SAHs %u\n", __FUNCTION__, + sah_count, dead_sah_count, empty_sah_count); + if (sah_search_calls) { + printf("%s: SAH search cost %d iters per call\n", __FUNCTION__, + (sah_search_count/sah_search_calls)); + } + } + spd_count = 0; + sah_count = 0; + dead_sah_count = 0; + empty_sah_count = 0; + larval_sav_count = 0; + mature_sav_count = 0; + dying_sav_count = 0; + dead_sav_count = 0; + total_sav_count = 0; + sah_search_count = 0; + sah_search_calls = 0; + key_timehandler_debug = 0; + } #ifndef IPSEC_NONBLOCK_ACQUIRE /* ACQ tree */ { - struct secacq *acq, *nextacq; - - for (acq = LIST_FIRST(&acqtree); - acq != NULL; - acq = nextacq) { - - nextacq = LIST_NEXT(acq, chain); - - if (tv.tv_sec - acq->created > key_blockacq_lifetime - && __LIST_CHAINED(acq)) { - LIST_REMOVE(acq, chain); - KFREE(acq); + struct secacq *acq, *nextacq; + + for (acq = LIST_FIRST(&acqtree); + acq != NULL; + acq = nextacq) { + + stop_handler = 0; + nextacq = LIST_NEXT(acq, chain); + + if (tv.tv_sec - acq->created > key_blockacq_lifetime + && __LIST_CHAINED(acq)) { + LIST_REMOVE(acq, chain); + KFREE(acq); + } } - } } #endif - + /* SP ACQ tree */ { - struct secspacq *acq, *nextacq; - - for (acq = LIST_FIRST(&spacqtree); - acq != NULL; - acq = nextacq) { - - nextacq = LIST_NEXT(acq, chain); - - if (tv.tv_sec - acq->created > key_blockacq_lifetime - && __LIST_CHAINED(acq)) { - LIST_REMOVE(acq, chain); - KFREE(acq); + struct secspacq *acq, *nextacq; + + for (acq = LIST_FIRST(&spacqtree); + acq != NULL; + acq = nextacq) { + + stop_handler = 0; + nextacq = LIST_NEXT(acq, chain); + + if (tv.tv_sec - acq->created > key_blockacq_lifetime + && __LIST_CHAINED(acq)) { + LIST_REMOVE(acq, chain); + KFREE(acq); + } } - } } - + /* initialize random seed */ if (key_tick_init_random++ > key_int_random) { key_tick_init_random = 0; @@ -5527,9 +6211,9 @@ key_timehandler(void) } natt_now++; - + lck_mtx_unlock(sadb_mutex); - + /* send messages outside of sadb_mutex */ if (spbuf && spcount > 0) { cnt = spcount; @@ -5539,7 +6223,7 @@ key_timehandler(void) if (savkabuf && savkacount > 0) { struct secasvar **savkaptr_sav = savkaptr; int cnt_send = savkacount; - + while (cnt_send--) { if (ipsec_send_natt_keepalive(*(--savkaptr))) { // iterate (all over again) and update timestamps @@ -5547,7 +6231,7 @@ key_timehandler(void) int cnt_update = savkacount; while (cnt_update--) { key_update_natt_keepalive_timestamp(*savkaptr, - *(--savkaptr_update)); + *(--savkaptr_update)); } } } @@ -5575,14 +6259,17 @@ key_timehandler(void) key_freesav(*savexptr++, KEY_SADB_LOCKED); KFREE(savexbuf); } - lck_mtx_unlock(sadb_mutex); - #ifndef IPSEC_DEBUG2 - /* do exchange to tick time !! */ - (void)timeout((void *)key_timehandler, (void *)0, hz); + if (stop_handler) + key_timehandler_running = 0; + else { + /* do exchange to tick time !! */ + (void)timeout((void *)key_timehandler, (void *)0, hz); + } #endif /* IPSEC_DEBUG2 */ + lck_mtx_unlock(sadb_mutex); return; } @@ -5597,12 +6284,12 @@ key_srandom(void) random(); #else struct timeval tv; - + microtime(&tv); - + srandom(tv.tv_usec); #endif - + return; } @@ -5610,36 +6297,36 @@ u_int32_t key_random(void) { u_int32_t value; - + key_randomfill(&value, sizeof(value)); return value; } void key_randomfill( - void *p, - size_t l) + void *p, + size_t l) { #ifdef __APPLE__ - + read_random(p, (u_int)l); #else size_t n; u_int32_t v; static int warn = 1; - + n = 0; n = (size_t)read_random(p, (u_int)l); /* last resort */ while (n < l) { v = random(); bcopy(&v, (u_int8_t *)p + n, - l - n < sizeof(v) ? l - n : sizeof(v)); + l - n < sizeof(v) ? l - n : sizeof(v)); n += sizeof(v); - + if (warn) { printf("WARNING: pseudo-random number generator " - "used for IPsec processing\n"); + "used for IPsec processing\n"); warn = 0; } } @@ -5654,20 +6341,20 @@ key_randomfill( */ static u_int16_t key_satype2proto( - u_int8_t satype) + u_int8_t satype) { switch (satype) { - case SADB_SATYPE_UNSPEC: - return IPSEC_PROTO_ANY; - case SADB_SATYPE_AH: - return IPPROTO_AH; - case SADB_SATYPE_ESP: - return IPPROTO_ESP; - case SADB_X_SATYPE_IPCOMP: - return IPPROTO_IPCOMP; - break; - default: - return 0; + case SADB_SATYPE_UNSPEC: + return IPSEC_PROTO_ANY; + case SADB_SATYPE_AH: + return IPPROTO_AH; + case SADB_SATYPE_ESP: + return IPPROTO_ESP; + case SADB_X_SATYPE_IPCOMP: + return IPPROTO_IPCOMP; + break; + default: + return 0; } /* NOTREACHED */ } @@ -5679,18 +6366,18 @@ key_satype2proto( */ static u_int8_t key_proto2satype( - u_int16_t proto) + u_int16_t proto) { switch (proto) { - case IPPROTO_AH: - return SADB_SATYPE_AH; - case IPPROTO_ESP: - return SADB_SATYPE_ESP; - case IPPROTO_IPCOMP: - return SADB_X_SATYPE_IPCOMP; - break; - default: - return 0; + case IPPROTO_AH: + return SADB_SATYPE_AH; + case IPPROTO_ESP: + return SADB_SATYPE_ESP; + case IPPROTO_IPCOMP: + return SADB_X_SATYPE_IPCOMP; + break; + default: + return 0; } /* NOTREACHED */ } @@ -5710,9 +6397,9 @@ key_proto2satype( */ static int key_getspi( - struct socket *so, - struct mbuf *m, - const struct sadb_msghdr *mhp) + struct socket *so, + struct mbuf *m, + const struct sadb_msghdr *mhp) { struct sadb_address *src0, *dst0; struct secasindex saidx; @@ -5723,13 +6410,13 @@ key_getspi( u_int8_t mode; u_int32_t reqid; int error; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_getspi: NULL pointer is passed.\n"); - + if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) { ipseclog((LOG_DEBUG, "key_getspi: invalid message is passed.\n")); @@ -5742,70 +6429,70 @@ key_getspi( } if (mhp->ext[SADB_X_EXT_SA2] != NULL) { mode = ((struct sadb_x_sa2 *) - (void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; + (void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; reqid = ((struct sadb_x_sa2 *) - (void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; + (void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; } else { mode = IPSEC_MODE_ANY; reqid = 0; } - + src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); - + /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "key_getspi: invalid satype is passed.\n")); return key_senderror(so, m, EINVAL); } - + /* make sure if port number is zero. */ switch (((struct sockaddr *)(src0 + 1))->sa_family) { - case AF_INET: - if (((struct sockaddr *)(src0 + 1))->sa_len != - sizeof(struct sockaddr_in)) - return key_senderror(so, m, EINVAL); - ((struct sockaddr_in *)(void *)(src0 + 1))->sin_port = 0; - break; - case AF_INET6: - if (((struct sockaddr *)(src0 + 1))->sa_len != - sizeof(struct sockaddr_in6)) - return key_senderror(so, m, EINVAL); - ((struct sockaddr_in6 *)(void *)(src0 + 1))->sin6_port = 0; - break; - default: - ; /*???*/ + case AF_INET: + if (((struct sockaddr *)(src0 + 1))->sa_len != + sizeof(struct sockaddr_in)) + return key_senderror(so, m, EINVAL); + ((struct sockaddr_in *)(void *)(src0 + 1))->sin_port = 0; + break; + case AF_INET6: + if (((struct sockaddr *)(src0 + 1))->sa_len != + sizeof(struct sockaddr_in6)) + return key_senderror(so, m, EINVAL); + ((struct sockaddr_in6 *)(void *)(src0 + 1))->sin6_port = 0; + break; + default: + ; /*???*/ } switch (((struct sockaddr *)(dst0 + 1))->sa_family) { - case AF_INET: - if (((struct sockaddr *)(dst0 + 1))->sa_len != - sizeof(struct sockaddr_in)) - return key_senderror(so, m, EINVAL); - ((struct sockaddr_in *)(void *)(dst0 + 1))->sin_port = 0; - break; - case AF_INET6: - if (((struct sockaddr *)(dst0 + 1))->sa_len != - sizeof(struct sockaddr_in6)) - return key_senderror(so, m, EINVAL); - ((struct sockaddr_in6 *)(void *)(dst0 + 1))->sin6_port = 0; - break; - default: - ; /*???*/ + case AF_INET: + if (((struct sockaddr *)(dst0 + 1))->sa_len != + sizeof(struct sockaddr_in)) + return key_senderror(so, m, EINVAL); + ((struct sockaddr_in *)(void *)(dst0 + 1))->sin_port = 0; + break; + case AF_INET6: + if (((struct sockaddr *)(dst0 + 1))->sa_len != + sizeof(struct sockaddr_in6)) + return key_senderror(so, m, EINVAL); + ((struct sockaddr_in6 *)(void *)(dst0 + 1))->sin6_port = 0; + break; + default: + ; /*???*/ } - + /* XXX boundary check against sa_len */ KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); - + lck_mtx_lock(sadb_mutex); - + /* SPI allocation */ spi = key_do_getnewspi((struct sadb_spirange *) - (void *)mhp->ext[SADB_EXT_SPIRANGE], &saidx); + (void *)mhp->ext[SADB_EXT_SPIRANGE], &saidx); if (spi == 0) { lck_mtx_unlock(sadb_mutex); return key_senderror(so, m, EINVAL); } - + /* get a SA index */ if ((newsah = key_getsah(&saidx)) == NULL) { /* create a new SA index: key_addspi is always used for inbound spi */ @@ -5815,7 +6502,7 @@ key_getspi( return key_senderror(so, m, ENOBUFS); } } - + /* get a new SA */ /* XXX rewrite */ newsav = key_newsav(m, mhp, newsah, &error); @@ -5824,10 +6511,10 @@ key_getspi( lck_mtx_unlock(sadb_mutex); return key_senderror(so, m, error); } - + /* set spi */ key_setspi(newsav, htonl(spi)); - + #ifndef IPSEC_NONBLOCK_ACQUIRE /* delete the entry in acqtree */ if (mhp->msg->sadb_msg_seq != 0) { @@ -5839,77 +6526,77 @@ key_getspi( acq->created = tv.tv_sec; acq->count = 0; } - } + } #endif - + lck_mtx_unlock(sadb_mutex); { - struct mbuf *n, *nn; - struct sadb_sa *m_sa; - struct sadb_msg *newmsg; - int off, len; - - /* create new sadb_msg to reply. */ - len = PFKEY_ALIGN8(sizeof(struct sadb_msg)) + + struct mbuf *n, *nn; + struct sadb_sa *m_sa; + struct sadb_msg *newmsg; + int off, len; + + /* create new sadb_msg to reply. */ + len = PFKEY_ALIGN8(sizeof(struct sadb_msg)) + PFKEY_ALIGN8(sizeof(struct sadb_sa)); - if (len > MCLBYTES) - return key_senderror(so, m, ENOBUFS); - - MGETHDR(n, M_WAITOK, MT_DATA); - if (n && len > MHLEN) { - MCLGET(n, M_WAITOK); - if ((n->m_flags & M_EXT) == 0) { - m_freem(n); - n = NULL; + if (len > MCLBYTES) + return key_senderror(so, m, ENOBUFS); + + MGETHDR(n, M_WAITOK, MT_DATA); + if (n && len > MHLEN) { + MCLGET(n, M_WAITOK); + if ((n->m_flags & M_EXT) == 0) { + m_freem(n); + n = NULL; + } } - } - if (!n) - return key_senderror(so, m, ENOBUFS); - - n->m_len = len; - n->m_next = NULL; - off = 0; - - m_copydata(m, 0, sizeof(struct sadb_msg), mtod(n, caddr_t) + off); - off += PFKEY_ALIGN8(sizeof(struct sadb_msg)); - - m_sa = (struct sadb_sa *)(void *)(mtod(n, caddr_t) + off); - m_sa->sadb_sa_len = PFKEY_UNIT64(sizeof(struct sadb_sa)); - m_sa->sadb_sa_exttype = SADB_EXT_SA; - m_sa->sadb_sa_spi = htonl(spi); - off += PFKEY_ALIGN8(sizeof(struct sadb_sa)); - + if (!n) + return key_senderror(so, m, ENOBUFS); + + n->m_len = len; + n->m_next = NULL; + off = 0; + + m_copydata(m, 0, sizeof(struct sadb_msg), mtod(n, caddr_t) + off); + off += PFKEY_ALIGN8(sizeof(struct sadb_msg)); + + m_sa = (struct sadb_sa *)(void *)(mtod(n, caddr_t) + off); + m_sa->sadb_sa_len = PFKEY_UNIT64(sizeof(struct sadb_sa)); + m_sa->sadb_sa_exttype = SADB_EXT_SA; + m_sa->sadb_sa_spi = htonl(spi); + off += PFKEY_ALIGN8(sizeof(struct sadb_sa)); + #if DIAGNOSTIC - if (off != len) - panic("length inconsistency in key_getspi"); + if (off != len) + panic("length inconsistency in key_getspi"); #endif - { - int mbufItems[] = {SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST}; - n->m_next = key_gather_mbuf(m, mhp, 0, sizeof(mbufItems)/sizeof(int), mbufItems); - if (!n->m_next) { - m_freem(n); - return key_senderror(so, m, ENOBUFS); - } - } - - if (n->m_len < sizeof(struct sadb_msg)) { - n = m_pullup(n, sizeof(struct sadb_msg)); - if (n == NULL) - return key_sendup_mbuf(so, m, KEY_SENDUP_ONE); - } - - n->m_pkthdr.len = 0; - for (nn = n; nn; nn = nn->m_next) - n->m_pkthdr.len += nn->m_len; - - newmsg = mtod(n, struct sadb_msg *); - newmsg->sadb_msg_seq = newsav->seq; - newmsg->sadb_msg_errno = 0; - newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); - - m_freem(m); - return key_sendup_mbuf(so, n, KEY_SENDUP_ONE); + { + int mbufItems[] = {SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST}; + n->m_next = key_gather_mbuf(m, mhp, 0, sizeof(mbufItems)/sizeof(int), mbufItems); + if (!n->m_next) { + m_freem(n); + return key_senderror(so, m, ENOBUFS); + } + } + + if (n->m_len < sizeof(struct sadb_msg)) { + n = m_pullup(n, sizeof(struct sadb_msg)); + if (n == NULL) + return key_sendup_mbuf(so, m, KEY_SENDUP_ONE); + } + + n->m_pkthdr.len = 0; + for (nn = n; nn; nn = nn->m_next) + n->m_pkthdr.len += nn->m_len; + + newmsg = mtod(n, struct sadb_msg *); + newmsg->sadb_msg_seq = newsav->seq; + newmsg->sadb_msg_errno = 0; + newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); + + m_freem(m); + return key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } } @@ -5923,12 +6610,12 @@ key_getspi2(struct sockaddr *src, { u_int32_t spi; struct secasindex saidx; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); - + /* XXX boundary check against sa_len */ KEY_SETSECASIDX(proto, mode, reqid, src, dst, &saidx); - + /* make sure if port number is zero. */ switch (((struct sockaddr *)&saidx.src)->sa_family) { case AF_INET: @@ -5958,14 +6645,14 @@ key_getspi2(struct sockaddr *src, default: ; /*???*/ } - + lck_mtx_lock(sadb_mutex); /* SPI allocation */ spi = key_do_getnewspi(spirange, &saidx); - + lck_mtx_unlock(sadb_mutex); - + return spi; } @@ -5978,15 +6665,15 @@ key_getspi2(struct sockaddr *src, */ static u_int32_t key_do_getnewspi( - struct sadb_spirange *spirange, - struct secasindex *saidx) + struct sadb_spirange *spirange, + struct secasindex *saidx) { u_int32_t newspi; u_int32_t keymin, keymax; int count = key_spi_trycnt; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - + /* set spi range to allocate */ if (spirange != NULL) { keymin = spirange->sadb_spirange_min; @@ -6006,44 +6693,44 @@ key_do_getnewspi( t = keymin; keymin = keymax; keymax = t; } } - + if (keymin == keymax) { if (key_checkspidup(saidx, keymin) != NULL) { ipseclog((LOG_DEBUG, "key_do_getnewspi: SPI %u exists already.\n", keymin)); return 0; } - + count--; /* taking one cost. */ newspi = keymin; - + } else { - + u_int32_t range = keymax - keymin + 1; /* overflow value of zero means full range */ - + /* init SPI */ newspi = 0; - + /* when requesting to allocate spi ranged */ while (count--) { u_int32_t rand_val = key_random(); /* generate pseudo-random SPI value ranged. */ newspi = (range == 0 ? rand_val : keymin + (rand_val % range)); - + if (key_checkspidup(saidx, newspi) == NULL) break; } - + if (count == 0 || newspi == 0) { ipseclog((LOG_DEBUG, "key_do_getnewspi: to allocate spi is failed.\n")); return 0; } } - + /* statistics */ keystat.getspi_count = - (keystat.getspi_count + key_spi_trycnt - count) / 2; - + (keystat.getspi_count + key_spi_trycnt - count) / 2; + return newspi; } @@ -6062,9 +6749,9 @@ key_do_getnewspi( */ static int key_update( - struct socket *so, - struct mbuf *m, - const struct sadb_msghdr *mhp) + struct socket *so, + struct mbuf *m, + const struct sadb_msghdr *mhp) { struct sadb_sa *sa0; struct sadb_address *src0, *dst0; @@ -6075,19 +6762,19 @@ key_update( u_int8_t mode; u_int32_t reqid; int error; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_update: NULL pointer is passed.\n"); - + /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "key_update: invalid satype is passed.\n")); return key_senderror(so, m, EINVAL); } - + if (mhp->ext[SADB_EXT_SA] == NULL || mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || @@ -6099,9 +6786,9 @@ key_update( mhp->ext[SADB_EXT_LIFETIME_SOFT] == NULL) || (mhp->ext[SADB_EXT_LIFETIME_HARD] == NULL && mhp->ext[SADB_EXT_LIFETIME_SOFT] != NULL)) { - ipseclog((LOG_DEBUG, "key_update: invalid message is passed.\n")); - return key_senderror(so, m, EINVAL); - } + ipseclog((LOG_DEBUG, "key_update: invalid message is passed.\n")); + return key_senderror(so, m, EINVAL); + } if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) || mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { @@ -6110,31 +6797,31 @@ key_update( } if (mhp->ext[SADB_X_EXT_SA2] != NULL) { mode = ((struct sadb_x_sa2 *) - (void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; + (void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; reqid = ((struct sadb_x_sa2 *) - (void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; + (void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; } else { mode = IPSEC_MODE_ANY; reqid = 0; } /* XXX boundary checking for other extensions */ - + sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA]; src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); - + /* XXX boundary check against sa_len */ KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); - + lck_mtx_lock(sadb_mutex); - + /* get a SA header */ if ((sah = key_getsah(&saidx)) == NULL) { lck_mtx_unlock(sadb_mutex); ipseclog((LOG_DEBUG, "key_update: no SA index found.\n")); return key_senderror(so, m, ENOENT); } - + /* set spidx if there */ /* XXX rewrite */ error = key_setident(sah, m, mhp); @@ -6142,53 +6829,53 @@ key_update( lck_mtx_unlock(sadb_mutex); return key_senderror(so, m, error); } - + /* find a SA with sequence number. */ #if IPSEC_DOSEQCHECK if (mhp->msg->sadb_msg_seq != 0 - && (sav = key_getsavbyseq(sah, mhp->msg->sadb_msg_seq)) == NULL) { + && (sav = key_getsavbyseq(sah, mhp->msg->sadb_msg_seq)) == NULL) { lck_mtx_unlock(sadb_mutex); ipseclog((LOG_DEBUG, - "key_update: no larval SA with sequence %u exists.\n", - mhp->msg->sadb_msg_seq)); + "key_update: no larval SA with sequence %u exists.\n", + mhp->msg->sadb_msg_seq)); return key_senderror(so, m, ENOENT); } #else if ((sav = key_getsavbyspi(sah, sa0->sadb_sa_spi)) == NULL) { lck_mtx_unlock(sadb_mutex); ipseclog((LOG_DEBUG, - "key_update: no such a SA found (spi:%u)\n", - (u_int32_t)ntohl(sa0->sadb_sa_spi))); + "key_update: no such a SA found (spi:%u)\n", + (u_int32_t)ntohl(sa0->sadb_sa_spi))); return key_senderror(so, m, EINVAL); } #endif - + /* validity check */ if (sav->sah->saidx.proto != proto) { lck_mtx_unlock(sadb_mutex); ipseclog((LOG_DEBUG, - "key_update: protocol mismatched (DB=%u param=%u)\n", - sav->sah->saidx.proto, proto)); + "key_update: protocol mismatched (DB=%u param=%u)\n", + sav->sah->saidx.proto, proto)); return key_senderror(so, m, EINVAL); } #if IPSEC_DOSEQCHECK if (sav->spi != sa0->sadb_sa_spi) { lck_mtx_unlock(sadb_mutex); ipseclog((LOG_DEBUG, - "key_update: SPI mismatched (DB:%u param:%u)\n", - (u_int32_t)ntohl(sav->spi), - (u_int32_t)ntohl(sa0->sadb_sa_spi))); + "key_update: SPI mismatched (DB:%u param:%u)\n", + (u_int32_t)ntohl(sav->spi), + (u_int32_t)ntohl(sa0->sadb_sa_spi))); return key_senderror(so, m, EINVAL); } #endif if (sav->pid != mhp->msg->sadb_msg_pid) { lck_mtx_unlock(sadb_mutex); ipseclog((LOG_DEBUG, - "key_update: pid mismatched (DB:%u param:%u)\n", - sav->pid, mhp->msg->sadb_msg_pid)); + "key_update: pid mismatched (DB:%u param:%u)\n", + sav->pid, mhp->msg->sadb_msg_pid)); return key_senderror(so, m, EINVAL); } - + /* copy sav values */ error = key_setsaval(sav, m, mhp); if (error) { @@ -6203,9 +6890,9 @@ key_update( */ if ((sav->flags & SADB_X_EXT_NATT_MULTIPLEUSERS) != 0 && (sav->sah->saidx.mode != IPSEC_MODE_TRANSPORT || - sav->sah->saidx.src.ss_family != AF_INET)) + sav->sah->saidx.src.ss_family != AF_INET)) sav->flags &= ~SADB_X_EXT_NATT_MULTIPLEUSERS; - + /* check SA values to be mature. */ if ((error = key_mature(sav)) != 0) { key_freesav(sav, KEY_SADB_LOCKED); @@ -6216,17 +6903,17 @@ key_update( lck_mtx_unlock(sadb_mutex); { - struct mbuf *n; - - /* set msg buf from mhp */ - n = key_getmsgbuf_x1(m, mhp); - if (n == NULL) { - ipseclog((LOG_DEBUG, "key_update: No more memory.\n")); - return key_senderror(so, m, ENOBUFS); - } - - m_freem(m); - return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); + struct mbuf *n; + + /* set msg buf from mhp */ + n = key_getmsgbuf_x1(m, mhp); + if (n == NULL) { + ipseclog((LOG_DEBUG, "key_update: No more memory.\n")); + return key_senderror(so, m, ENOBUFS); + } + + m_freem(m); + return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); } } @@ -6240,31 +6927,31 @@ key_update( #if IPSEC_DOSEQCHECK static struct secasvar * key_getsavbyseq( - struct secashead *sah, - u_int32_t seq) + struct secashead *sah, + u_int32_t seq) { struct secasvar *sav; u_int state; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - + state = SADB_SASTATE_LARVAL; - + /* search SAD with sequence number ? */ LIST_FOREACH(sav, &sah->savtree[state], chain) { - + KEY_CHKSASTATE(state, sav->state, "key_getsabyseq"); - + if (sav->seq == seq) { sav->refcnt++; KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP key_getsavbyseq cause " - "refcnt++:%d SA:%p\n", - sav->refcnt, sav)); + printf("DP key_getsavbyseq cause " + "refcnt++:%d SA:0x%llx\n", sav->refcnt, + (uint64_t)VM_KERNEL_ADDRPERM(sav))); return sav; } } - + return NULL; } #endif @@ -6286,9 +6973,9 @@ key_getsavbyseq( */ static int key_add( - struct socket *so, - struct mbuf *m, - const struct sadb_msghdr *mhp) + struct socket *so, + struct mbuf *m, + const struct sadb_msghdr *mhp) { struct sadb_sa *sa0; struct sadb_address *src0, *dst0; @@ -6299,19 +6986,19 @@ key_add( u_int8_t mode; u_int32_t reqid; int error; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_add: NULL pointer is passed.\n"); - + /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "key_add: invalid satype is passed.\n")); return key_senderror(so, m, EINVAL); } - + if (mhp->ext[SADB_EXT_SA] == NULL || mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || @@ -6323,9 +7010,9 @@ key_add( mhp->ext[SADB_EXT_LIFETIME_SOFT] == NULL) || (mhp->ext[SADB_EXT_LIFETIME_HARD] == NULL && mhp->ext[SADB_EXT_LIFETIME_SOFT] != NULL)) { - ipseclog((LOG_DEBUG, "key_add: invalid message is passed.\n")); - return key_senderror(so, m, EINVAL); - } + ipseclog((LOG_DEBUG, "key_add: invalid message is passed.\n")); + return key_senderror(so, m, EINVAL); + } if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) || mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { @@ -6335,23 +7022,23 @@ key_add( } if (mhp->ext[SADB_X_EXT_SA2] != NULL) { mode = ((struct sadb_x_sa2 *) - (void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; + (void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; reqid = ((struct sadb_x_sa2 *) - (void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; + (void *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; } else { mode = IPSEC_MODE_ANY; reqid = 0; } - + sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA]; src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; - + /* XXX boundary check against sa_len */ KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); - + lck_mtx_lock(sadb_mutex); - + /* get a SA header */ if ((newsah = key_getsah(&saidx)) == NULL) { /* create a new SA header: key_addspi is always used for outbound spi */ @@ -6361,7 +7048,7 @@ key_add( return key_senderror(so, m, ENOBUFS); } } - + /* set spidx if there */ /* XXX rewrite */ error = key_setident(newsah, m, mhp); @@ -6369,7 +7056,7 @@ key_add( lck_mtx_unlock(sadb_mutex); return key_senderror(so, m, error); } - + /* create new SA entry. */ /* We can create new SA only if SPI is different. */ if (key_getsavbyspi(newsah, sa0->sadb_sa_spi)) { @@ -6382,61 +7069,61 @@ key_add( lck_mtx_unlock(sadb_mutex); return key_senderror(so, m, error); } - + /* * Verify if SADB_X_EXT_NATT_MULTIPLEUSERS flag is set that * this SA is for transport mode - otherwise clear it. */ if ((newsav->flags & SADB_X_EXT_NATT_MULTIPLEUSERS) != 0 && (newsah->saidx.mode != IPSEC_MODE_TRANSPORT || - newsah->saidx.dst.ss_family != AF_INET)) + newsah->saidx.dst.ss_family != AF_INET)) newsav->flags &= ~SADB_X_EXT_NATT_MULTIPLEUSERS; - + /* check SA values to be mature. */ if ((error = key_mature(newsav)) != 0) { key_freesav(newsav, KEY_SADB_LOCKED); lck_mtx_unlock(sadb_mutex); return key_senderror(so, m, error); } - + lck_mtx_unlock(sadb_mutex); /* * don't call key_freesav() here, as we would like to keep the SA * in the database on success. */ - + { - struct mbuf *n; - - /* set msg buf from mhp */ - n = key_getmsgbuf_x1(m, mhp); - if (n == NULL) { - ipseclog((LOG_DEBUG, "key_update: No more memory.\n")); - return key_senderror(so, m, ENOBUFS); - } - - m_freem(m); - return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); - } + struct mbuf *n; + + /* set msg buf from mhp */ + n = key_getmsgbuf_x1(m, mhp); + if (n == NULL) { + ipseclog((LOG_DEBUG, "key_update: No more memory.\n")); + return key_senderror(so, m, ENOBUFS); + } + + m_freem(m); + return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); + } } /* m is retained */ static int key_setident( - struct secashead *sah, - struct mbuf *m, - const struct sadb_msghdr *mhp) + struct secashead *sah, + struct mbuf *m, + const struct sadb_msghdr *mhp) { const struct sadb_ident *idsrc, *iddst; int idsrclen, iddstlen; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - + /* sanity check */ if (sah == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_setident: NULL pointer is passed.\n"); - + /* don't make buffer if not there */ if (mhp->ext[SADB_EXT_IDENTITY_SRC] == NULL && mhp->ext[SADB_EXT_IDENTITY_DST] == NULL) { @@ -6450,31 +7137,31 @@ key_setident( ipseclog((LOG_DEBUG, "key_setident: invalid identity.\n")); return EINVAL; } - + idsrc = (const struct sadb_ident *) - (void *)mhp->ext[SADB_EXT_IDENTITY_SRC]; + (void *)mhp->ext[SADB_EXT_IDENTITY_SRC]; iddst = (const struct sadb_ident *) - (void *)mhp->ext[SADB_EXT_IDENTITY_DST]; + (void *)mhp->ext[SADB_EXT_IDENTITY_DST]; idsrclen = mhp->extlen[SADB_EXT_IDENTITY_SRC]; iddstlen = mhp->extlen[SADB_EXT_IDENTITY_DST]; - + /* validity check */ if (idsrc->sadb_ident_type != iddst->sadb_ident_type) { ipseclog((LOG_DEBUG, "key_setident: ident type mismatch.\n")); return EINVAL; } - + switch (idsrc->sadb_ident_type) { - case SADB_IDENTTYPE_PREFIX: - case SADB_IDENTTYPE_FQDN: - case SADB_IDENTTYPE_USERFQDN: - default: - /* XXX do nothing */ - sah->idents = NULL; - sah->identd = NULL; - return 0; + case SADB_IDENTTYPE_PREFIX: + case SADB_IDENTTYPE_FQDN: + case SADB_IDENTTYPE_USERFQDN: + default: + /* XXX do nothing */ + sah->idents = NULL; + sah->identd = NULL; + return 0; } - + /* make structure */ KMALLOC_NOWAIT(sah->idents, struct sadb_ident *, idsrclen); if (sah->idents == NULL) { @@ -6500,35 +7187,35 @@ key_setident( } bcopy(idsrc, sah->idents, idsrclen); bcopy(iddst, sah->identd, iddstlen); - + return 0; } /* * m will not be freed on return. - * it is caller's responsibility to free the result. + * it is caller's responsibility to free the result. */ static struct mbuf * key_getmsgbuf_x1( - struct mbuf *m, - const struct sadb_msghdr *mhp) + struct mbuf *m, + const struct sadb_msghdr *mhp) { struct mbuf *n; int mbufItems[] = {SADB_EXT_RESERVED, SADB_EXT_SA, - SADB_X_EXT_SA2, SADB_EXT_ADDRESS_SRC, - SADB_EXT_ADDRESS_DST, SADB_EXT_LIFETIME_HARD, - SADB_EXT_LIFETIME_SOFT, SADB_EXT_IDENTITY_SRC, - SADB_EXT_IDENTITY_DST}; - + SADB_X_EXT_SA2, SADB_EXT_ADDRESS_SRC, + SADB_EXT_ADDRESS_DST, SADB_EXT_LIFETIME_HARD, + SADB_EXT_LIFETIME_SOFT, SADB_EXT_IDENTITY_SRC, + SADB_EXT_IDENTITY_DST}; + /* sanity check */ if (m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_getmsgbuf_x1: NULL pointer is passed.\n"); - + /* create new sadb_msg to reply. */ n = key_gather_mbuf(m, mhp, 1, sizeof(mbufItems)/sizeof(int), mbufItems); if (!n) return NULL; - + if (n->m_len < sizeof(struct sadb_msg)) { n = m_pullup(n, sizeof(struct sadb_msg)); if (n == NULL) @@ -6536,13 +7223,13 @@ key_getmsgbuf_x1( } mtod(n, struct sadb_msg *)->sadb_msg_errno = 0; mtod(n, struct sadb_msg *)->sadb_msg_len = - PFKEY_UNIT64(n->m_pkthdr.len); - + PFKEY_UNIT64(n->m_pkthdr.len); + return n; } static int key_delete_all(struct socket *, struct mbuf *, - const struct sadb_msghdr *, u_int16_t); + const struct sadb_msghdr *, u_int16_t); /* * SADB_DELETE processing @@ -6557,9 +7244,9 @@ static int key_delete_all(struct socket *, struct mbuf *, */ static int key_delete( - struct socket *so, - struct mbuf *m, - const struct sadb_msghdr *mhp) + struct socket *so, + struct mbuf *m, + const struct sadb_msghdr *mhp) { struct sadb_sa *sa0; struct sadb_address *src0, *dst0; @@ -6567,31 +7254,31 @@ key_delete( struct secashead *sah; struct secasvar *sav = NULL; u_int16_t proto; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_delete: NULL pointer is passed.\n"); - + /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "key_delete: invalid satype is passed.\n")); return key_senderror(so, m, EINVAL); } - + if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) { ipseclog((LOG_DEBUG, "key_delete: invalid message is passed.\n")); return key_senderror(so, m, EINVAL); } - + if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { ipseclog((LOG_DEBUG, "key_delete: invalid message is passed.\n")); return key_senderror(so, m, EINVAL); } - + lck_mtx_lock(sadb_mutex); if (mhp->ext[SADB_EXT_SA] == NULL) { @@ -6602,27 +7289,27 @@ key_delete( */ ipseclog((LOG_DEBUG, "key_delete: doing delete all.\n")); /* key_delete_all will unlock sadb_mutex */ - return key_delete_all(so, m, mhp, proto); + return key_delete_all(so, m, mhp, proto); } else if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa)) { lck_mtx_unlock(sadb_mutex); ipseclog((LOG_DEBUG, "key_delete: invalid message is passed.\n")); return key_senderror(so, m, EINVAL); } - + sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA]; src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); - + /* XXX boundary check against sa_len */ KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); - + /* get a SA header */ LIST_FOREACH(sah, &sahtree, chain) { if (sah->state == SADB_SASTATE_DEAD) continue; if (key_cmpsaidx(&sah->saidx, &saidx, CMP_HEAD) == 0) continue; - + /* get a SA with SPI. */ sav = key_getsavbyspi(sah, sa0->sadb_sa_spi); if (sav) @@ -6633,35 +7320,35 @@ key_delete( ipseclog((LOG_DEBUG, "key_delete: no SA found.\n")); return key_senderror(so, m, ENOENT); } - + key_sa_chgstate(sav, SADB_SASTATE_DEAD); key_freesav(sav, KEY_SADB_LOCKED); lck_mtx_unlock(sadb_mutex); sav = NULL; - + { - struct mbuf *n; - struct sadb_msg *newmsg; - int mbufItems[] = {SADB_EXT_RESERVED, SADB_EXT_SA, - SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST}; - - /* create new sadb_msg to reply. */ - n = key_gather_mbuf(m, mhp, 1, sizeof(mbufItems)/sizeof(int), mbufItems); - if (!n) - return key_senderror(so, m, ENOBUFS); - - if (n->m_len < sizeof(struct sadb_msg)) { - n = m_pullup(n, sizeof(struct sadb_msg)); - if (n == NULL) + struct mbuf *n; + struct sadb_msg *newmsg; + int mbufItems[] = {SADB_EXT_RESERVED, SADB_EXT_SA, + SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST}; + + /* create new sadb_msg to reply. */ + n = key_gather_mbuf(m, mhp, 1, sizeof(mbufItems)/sizeof(int), mbufItems); + if (!n) return key_senderror(so, m, ENOBUFS); - } - newmsg = mtod(n, struct sadb_msg *); - newmsg->sadb_msg_errno = 0; - newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); - - m_freem(m); - return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); + + if (n->m_len < sizeof(struct sadb_msg)) { + n = m_pullup(n, sizeof(struct sadb_msg)); + if (n == NULL) + return key_senderror(so, m, ENOBUFS); + } + newmsg = mtod(n, struct sadb_msg *); + newmsg->sadb_msg_errno = 0; + newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); + + m_freem(m); + return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); } } @@ -6670,31 +7357,31 @@ key_delete( */ static int key_delete_all( - struct socket *so, - struct mbuf *m, - const struct sadb_msghdr *mhp, - u_int16_t proto) + struct socket *so, + struct mbuf *m, + const struct sadb_msghdr *mhp, + u_int16_t proto) { struct sadb_address *src0, *dst0; struct secasindex saidx; struct secashead *sah; struct secasvar *sav, *nextsav; u_int stateidx, state; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - + src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); - + /* XXX boundary check against sa_len */ KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); - + LIST_FOREACH(sah, &sahtree, chain) { if (sah->state == SADB_SASTATE_DEAD) continue; if (key_cmpsaidx(&sah->saidx, &saidx, CMP_HEAD) == 0) continue; - + /* Delete all non-LARVAL SAs. */ for (stateidx = 0; stateidx < _ARRAYLEN(saorder_state_alive); @@ -6708,9 +7395,9 @@ key_delete_all( /* sanity check */ if (sav->state != state) { ipseclog((LOG_DEBUG, "key_delete_all: " - "invalid sav->state " - "(queue: %d SA: %d)\n", - state, sav->state)); + "invalid sav->state " + "(queue: %d SA: %d)\n", + state, sav->state)); continue; } @@ -6722,27 +7409,27 @@ key_delete_all( lck_mtx_unlock(sadb_mutex); { - struct mbuf *n; - struct sadb_msg *newmsg; - int mbufItems[] = {SADB_EXT_RESERVED, SADB_EXT_ADDRESS_SRC, - SADB_EXT_ADDRESS_DST}; - - /* create new sadb_msg to reply. */ - n = key_gather_mbuf(m, mhp, 1, sizeof(mbufItems)/sizeof(int), mbufItems); - if (!n) - return key_senderror(so, m, ENOBUFS); - - if (n->m_len < sizeof(struct sadb_msg)) { - n = m_pullup(n, sizeof(struct sadb_msg)); - if (n == NULL) + struct mbuf *n; + struct sadb_msg *newmsg; + int mbufItems[] = {SADB_EXT_RESERVED, SADB_EXT_ADDRESS_SRC, + SADB_EXT_ADDRESS_DST}; + + /* create new sadb_msg to reply. */ + n = key_gather_mbuf(m, mhp, 1, sizeof(mbufItems)/sizeof(int), mbufItems); + if (!n) return key_senderror(so, m, ENOBUFS); - } - newmsg = mtod(n, struct sadb_msg *); - newmsg->sadb_msg_errno = 0; - newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); - - m_freem(m); - return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); + + if (n->m_len < sizeof(struct sadb_msg)) { + n = m_pullup(n, sizeof(struct sadb_msg)); + if (n == NULL) + return key_senderror(so, m, ENOBUFS); + } + newmsg = mtod(n, struct sadb_msg *); + newmsg->sadb_msg_errno = 0; + newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); + + m_freem(m); + return key_sendup_mbuf(so, n, KEY_SENDUP_ALL); } } @@ -6760,9 +7447,9 @@ key_delete_all( */ static int key_get( - struct socket *so, - struct mbuf *m, - const struct sadb_msghdr *mhp) + struct socket *so, + struct mbuf *m, + const struct sadb_msghdr *mhp) { struct sadb_sa *sa0; struct sadb_address *src0, *dst0; @@ -6770,19 +7457,19 @@ key_get( struct secashead *sah; struct secasvar *sav = NULL; u_int16_t proto; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_get: NULL pointer is passed.\n"); - + /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "key_get: invalid satype is passed.\n")); return key_senderror(so, m, EINVAL); } - + if (mhp->ext[SADB_EXT_SA] == NULL || mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) { @@ -6795,14 +7482,14 @@ key_get( ipseclog((LOG_DEBUG, "key_get: invalid message is passed.\n")); return key_senderror(so, m, EINVAL); } - + sa0 = (struct sadb_sa *)(void *)mhp->ext[SADB_EXT_SA]; src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; - + /* XXX boundary check against sa_len */ KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); - + lck_mtx_lock(sadb_mutex); /* get a SA header */ @@ -6811,7 +7498,7 @@ key_get( continue; if (key_cmpsaidx(&sah->saidx, &saidx, CMP_HEAD) == 0) continue; - + /* get a SA with SPI. */ sav = key_getsavbyspi(sah, sa0->sadb_sa_spi); if (sav) @@ -6822,30 +7509,30 @@ key_get( ipseclog((LOG_DEBUG, "key_get: no SA found.\n")); return key_senderror(so, m, ENOENT); } - + { - struct mbuf *n; - u_int8_t satype; - - /* map proto to satype */ - if ((satype = key_proto2satype(sah->saidx.proto)) == 0) { + struct mbuf *n; + u_int8_t satype; + + /* map proto to satype */ + if ((satype = key_proto2satype(sah->saidx.proto)) == 0) { + lck_mtx_unlock(sadb_mutex); + ipseclog((LOG_DEBUG, "key_get: there was invalid proto in SAD.\n")); + return key_senderror(so, m, EINVAL); + } lck_mtx_unlock(sadb_mutex); - ipseclog((LOG_DEBUG, "key_get: there was invalid proto in SAD.\n")); - return key_senderror(so, m, EINVAL); - } - lck_mtx_unlock(sadb_mutex); - - /* create new sadb_msg to reply. */ - n = key_setdumpsa(sav, SADB_GET, satype, mhp->msg->sadb_msg_seq, - mhp->msg->sadb_msg_pid); - - - - if (!n) - return key_senderror(so, m, ENOBUFS); - - m_freem(m); - return key_sendup_mbuf(so, n, KEY_SENDUP_ONE); + + /* create new sadb_msg to reply. */ + n = key_setdumpsa(sav, SADB_GET, satype, mhp->msg->sadb_msg_seq, + mhp->msg->sadb_msg_pid); + + + + if (!n) + return key_senderror(so, m, ENOBUFS); + + m_freem(m); + return key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } } @@ -6856,39 +7543,39 @@ key_get( */ static int key_getsastatbyspi_one (u_int32_t spi, - struct sastat *stat) + struct sastat *stat) { struct secashead *sah; struct secasvar *sav = NULL; - + if ((void *)stat == NULL) { - return -1; + return -1; } - + lck_mtx_lock(sadb_mutex); /* get a SA header */ LIST_FOREACH(sah, &sahtree, chain) { if (sah->state == SADB_SASTATE_DEAD) continue; - + /* get a SA with SPI. */ sav = key_getsavbyspi(sah, spi); if (sav) { - stat->spi = sav->spi; + stat->spi = sav->spi; stat->created = sav->created; if (sav->lft_c) { - bcopy(sav->lft_c,&stat->lft_c, sizeof(stat->lft_c)); + bcopy(sav->lft_c,&stat->lft_c, sizeof(stat->lft_c)); } else { - bzero(&stat->lft_c, sizeof(stat->lft_c)); + bzero(&stat->lft_c, sizeof(stat->lft_c)); } lck_mtx_unlock(sadb_mutex); return 0; } } - + lck_mtx_unlock(sadb_mutex); - + return -1; } @@ -6899,28 +7586,28 @@ key_getsastatbyspi_one (u_int32_t spi, */ static int key_getsastatbyspi (struct sastat *stat_arg, - u_int32_t max_stat_arg, - struct sastat *stat_res, - u_int32_t *max_stat_res) + u_int32_t max_stat_arg, + struct sastat *stat_res, + u_int32_t *max_stat_res) { - int cur, found = 0; - + int cur, found = 0; + if (stat_arg == NULL || - stat_res == NULL || + stat_res == NULL || max_stat_res == NULL) { - return -1; + return -1; } - + for (cur = 0; cur < max_stat_arg; cur++) { - if (key_getsastatbyspi_one(stat_arg[cur].spi, - &stat_res[found]) == 0) { - found++; + if (key_getsastatbyspi_one(stat_arg[cur].spi, + &stat_res[found]) == 0) { + found++; } } *max_stat_res = found; - + if (found) { - return 0; + return 0; } return -1; } @@ -6928,9 +7615,9 @@ key_getsastatbyspi (struct sastat *stat_arg, /* XXX make it sysctl-configurable? */ static void key_getcomb_setlifetime( - struct sadb_comb *comb) + struct sadb_comb *comb) { - + comb->sadb_comb_soft_allocations = 1; comb->sadb_comb_hard_allocations = 1; comb->sadb_comb_soft_bytes = 0; @@ -6956,20 +7643,20 @@ key_getcomb_esp(void) int i, off, o; int totlen; const int l = PFKEY_ALIGN8(sizeof(struct sadb_comb)); - + m = NULL; for (i = 1; i <= SADB_EALG_MAX; i++) { algo = esp_algorithm_lookup(i); if (!algo) continue; - + if (algo->keymax < ipsec_esp_keymin) continue; if (algo->keymin < ipsec_esp_keymin) encmin = ipsec_esp_keymin; else encmin = algo->keymin; - + if (ipsec_esp_auth) m = key_getcomb_ah(); else { @@ -6987,7 +7674,7 @@ key_getcomb_esp(void) } if (!m) goto fail; - + totlen = 0; for (n = m; n; n = n->m_next) totlen += n->m_len; @@ -6995,7 +7682,7 @@ key_getcomb_esp(void) if (totlen % l) panic("assumption failed in key_getcomb_esp"); #endif - + for (off = 0; off < totlen; off += l) { n = m_pulldown(m, off, l, &o); if (!n) { @@ -7003,23 +7690,23 @@ key_getcomb_esp(void) goto fail; } comb = (struct sadb_comb *) - (void *)(mtod(n, caddr_t) + o); + (void *)(mtod(n, caddr_t) + o); bzero(comb, sizeof(*comb)); key_getcomb_setlifetime(comb); comb->sadb_comb_encrypt = i; comb->sadb_comb_encrypt_minbits = encmin; comb->sadb_comb_encrypt_maxbits = algo->keymax; } - + if (!result) result = m; else m_cat(result, m); } - + return result; - - fail: + +fail: if (result) m_freem(result); return NULL; @@ -7038,7 +7725,7 @@ key_getcomb_ah(void) int keymin; int i; const int l = PFKEY_ALIGN8(sizeof(struct sadb_comb)); - + m = NULL; for (i = 1; i <= SADB_AALG_MAX; i++) { #if 1 @@ -7049,14 +7736,14 @@ key_getcomb_ah(void) algo = ah_algorithm_lookup(i); if (!algo) continue; - + if (algo->keymax < ipsec_ah_keymin) continue; if (algo->keymin < ipsec_ah_keymin) keymin = ipsec_ah_keymin; else keymin = algo->keymin; - + if (!m) { #if DIAGNOSTIC if (l > MLEN) @@ -7072,7 +7759,7 @@ key_getcomb_ah(void) M_PREPEND(m, l, M_WAITOK); if (!m) return NULL; - + comb = mtod(m, struct sadb_comb *); bzero(comb, sizeof(*comb)); key_getcomb_setlifetime(comb); @@ -7080,7 +7767,7 @@ key_getcomb_ah(void) comb->sadb_comb_auth_minbits = keymin; comb->sadb_comb_auth_maxbits = algo->keymax; } - + return m; } @@ -7096,13 +7783,13 @@ key_getcomb_ipcomp(void) struct mbuf *m; int i; const int l = PFKEY_ALIGN8(sizeof(struct sadb_comb)); - + m = NULL; for (i = 1; i <= SADB_X_CALG_MAX; i++) { algo = ipcomp_algorithm_lookup(i); if (!algo) continue; - + if (!m) { #if DIAGNOSTIC if (l > MLEN) @@ -7118,14 +7805,14 @@ key_getcomb_ipcomp(void) M_PREPEND(m, l, M_WAITOK); if (!m) return NULL; - + comb = mtod(m, struct sadb_comb *); bzero(comb, sizeof(*comb)); key_getcomb_setlifetime(comb); comb->sadb_comb_encrypt = i; /* what should we set into sadb_comb_*_{min,max}bits? */ } - + return m; } @@ -7136,45 +7823,45 @@ key_getcomb_ipcomp(void) */ static struct mbuf * key_getprop( - const struct secasindex *saidx) + const struct secasindex *saidx) { struct sadb_prop *prop; struct mbuf *m, *n; const int l = PFKEY_ALIGN8(sizeof(struct sadb_prop)); int totlen; - + switch (saidx->proto) { #if IPSEC_ESP - case IPPROTO_ESP: - m = key_getcomb_esp(); - break; + case IPPROTO_ESP: + m = key_getcomb_esp(); + break; #endif - case IPPROTO_AH: - m = key_getcomb_ah(); - break; - case IPPROTO_IPCOMP: - m = key_getcomb_ipcomp(); - break; - default: - return NULL; + case IPPROTO_AH: + m = key_getcomb_ah(); + break; + case IPPROTO_IPCOMP: + m = key_getcomb_ipcomp(); + break; + default: + return NULL; } - + if (!m) return NULL; M_PREPEND(m, l, M_WAITOK); if (!m) return NULL; - + totlen = 0; for (n = m; n; n = n->m_next) totlen += n->m_len; - + prop = mtod(m, struct sadb_prop *); bzero(prop, sizeof(*prop)); prop->sadb_prop_len = PFKEY_UNIT64(totlen); prop->sadb_prop_exttype = SADB_EXT_PROPOSAL; prop->sadb_prop_replay = 32; /* XXX */ - + return m; } @@ -7200,8 +7887,8 @@ key_getprop( */ static int key_acquire( - struct secasindex *saidx, - struct secpolicy *sp) + struct secasindex *saidx, + struct secpolicy *sp) { struct mbuf *result = NULL, *m; #ifndef IPSEC_NONBLOCK_ACQUIRE @@ -7210,7 +7897,7 @@ key_acquire( u_int8_t satype; int error = -1; u_int32_t seq; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); /* sanity check */ @@ -7218,7 +7905,7 @@ key_acquire( panic("key_acquire: NULL pointer is passed.\n"); if ((satype = key_proto2satype(saidx->proto)) == 0) panic("key_acquire: invalid proto is passed.\n"); - + #ifndef IPSEC_NONBLOCK_ACQUIRE /* * We never do anything about acquirng SA. There is anather @@ -7244,13 +7931,14 @@ key_acquire( lck_mtx_unlock(sadb_mutex); return ENOBUFS; } - + /* add to acqtree */ LIST_INSERT_HEAD(&acqtree, newacq, chain); + key_start_timehandler(); } seq = newacq->seq; lck_mtx_unlock(sadb_mutex); - + #else seq = (acq_seq = (acq_seq == ~0 ? 1 : ++acq_seq)); #endif @@ -7260,26 +7948,26 @@ key_acquire( goto fail; } result = m; - + /* set sadb_address for saidx's. */ m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, - (struct sockaddr *)&saidx->src, FULLMASK, IPSEC_ULPROTO_ANY); + (struct sockaddr *)&saidx->src, FULLMASK, IPSEC_ULPROTO_ANY); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); - + m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, - (struct sockaddr *)&saidx->dst, FULLMASK, IPSEC_ULPROTO_ANY); + (struct sockaddr *)&saidx->dst, FULLMASK, IPSEC_ULPROTO_ANY); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); - + /* XXX proxy address (optional) */ - + /* set sadb_x_policy */ if (sp) { m = key_setsadbxpolicy(sp->policy, sp->spidx.dir, sp->id); @@ -7296,7 +7984,7 @@ key_acquire( /* create identity extension (FQDN) */ struct sadb_ident *id; int fqdnlen; - + fqdnlen = strlen(fqdn) + 1; /* +1 for terminating-NUL */ id = (struct sadb_ident *)p; bzero(id, sizeof(*id) + PFKEY_ALIGN8(fqdnlen)); @@ -7306,12 +7994,12 @@ key_acquire( bcopy(fqdn, id + 1, fqdnlen); p += sizeof(struct sadb_ident) + PFKEY_ALIGN8(fqdnlen); } - + if (idexttype) { /* create identity extension (USERFQDN) */ struct sadb_ident *id; int userfqdnlen; - + if (userfqdn) { /* +1 for terminating-NUL */ userfqdnlen = strlen(userfqdn) + 1; @@ -7330,9 +8018,9 @@ key_acquire( p += sizeof(struct sadb_ident) + PFKEY_ALIGN8(userfqdnlen); } #endif - + /* XXX sensitivity (optional) */ - + /* create proposal/combination extension */ m = key_getprop(saidx); #if 0 @@ -7353,12 +8041,12 @@ key_acquire( if (m) m_cat(result, m); #endif - + if ((result->m_flags & M_PKTHDR) == 0) { error = EINVAL; goto fail; } - + if (result->m_len < sizeof(struct sadb_msg)) { result = m_pullup(result, sizeof(struct sadb_msg)); if (result == NULL) { @@ -7366,17 +8054,17 @@ key_acquire( goto fail; } } - + result->m_pkthdr.len = 0; for (m = result; m; m = m->m_next) result->m_pkthdr.len += m->m_len; - + mtod(result, struct sadb_msg *)->sadb_msg_len = - PFKEY_UNIT64(result->m_pkthdr.len); - + PFKEY_UNIT64(result->m_pkthdr.len); + return key_sendup_mbuf(NULL, result, KEY_SENDUP_REGISTERED); - - fail: + +fail: if (result) m_freem(result); return error; @@ -7385,74 +8073,74 @@ key_acquire( #ifndef IPSEC_NONBLOCK_ACQUIRE static struct secacq * key_newacq( - struct secasindex *saidx) + struct secasindex *saidx) { struct secacq *newacq; struct timeval tv; - + /* get new entry */ KMALLOC_NOWAIT(newacq, struct secacq *, sizeof(struct secacq)); if (newacq == NULL) { lck_mtx_unlock(sadb_mutex); KMALLOC_WAIT(newacq, struct secacq *, sizeof(struct secacq)); lck_mtx_lock(sadb_mutex); - if (newacq == NULL) { + if (newacq == NULL) { ipseclog((LOG_DEBUG, "key_newacq: No more memory.\n")); return NULL; } } bzero(newacq, sizeof(*newacq)); - + /* copy secindex */ bcopy(saidx, &newacq->saidx, sizeof(newacq->saidx)); newacq->seq = (acq_seq == ~0 ? 1 : ++acq_seq); microtime(&tv); newacq->created = tv.tv_sec; newacq->count = 0; - + return newacq; } static struct secacq * key_getacq( - struct secasindex *saidx) + struct secasindex *saidx) { struct secacq *acq; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - + LIST_FOREACH(acq, &acqtree, chain) { if (key_cmpsaidx(saidx, &acq->saidx, CMP_EXACTLY)) return acq; } - + return NULL; } static struct secacq * key_getacqbyseq( - u_int32_t seq) + u_int32_t seq) { struct secacq *acq; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - + LIST_FOREACH(acq, &acqtree, chain) { if (acq->seq == seq) return acq; } - + return NULL; } #endif static struct secspacq * key_newspacq( - struct secpolicyindex *spidx) + struct secpolicyindex *spidx) { struct secspacq *acq; struct timeval tv; - + /* get new entry */ KMALLOC_NOWAIT(acq, struct secspacq *, sizeof(struct secspacq)); if (acq == NULL) { @@ -7465,29 +8153,29 @@ key_newspacq( } } bzero(acq, sizeof(*acq)); - + /* copy secindex */ bcopy(spidx, &acq->spidx, sizeof(acq->spidx)); microtime(&tv); acq->created = tv.tv_sec; acq->count = 0; - + return acq; } static struct secspacq * key_getspacq( - struct secpolicyindex *spidx) + struct secpolicyindex *spidx) { struct secspacq *acq; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - + LIST_FOREACH(acq, &spacqtree, chain) { if (key_cmpspidx_exactly(spidx, &acq->spidx)) return acq; } - + return NULL; } @@ -7507,34 +8195,34 @@ key_getspacq( */ static int key_acquire2( - struct socket *so, - struct mbuf *m, - const struct sadb_msghdr *mhp) + struct socket *so, + struct mbuf *m, + const struct sadb_msghdr *mhp) { const struct sadb_address *src0, *dst0; struct secasindex saidx; struct secashead *sah; u_int16_t proto; int error; - - + + /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_acquire2: NULL pointer is passed.\n"); - + /* * Error message from KMd. * We assume that if error was occurred in IKEd, the length of PFKEY * message is equal to the size of sadb_msg structure. * We do not raise error even if error occurred in this function. */ - lck_mtx_lock(sadb_mutex); - + lck_mtx_lock(sadb_mutex); + if (mhp->msg->sadb_msg_len == PFKEY_UNIT64(sizeof(struct sadb_msg))) { #ifndef IPSEC_NONBLOCK_ACQUIRE struct secacq *acq; struct timeval tv; - + /* check sequence number */ if (mhp->msg->sadb_msg_seq == 0) { lck_mtx_unlock(sadb_mutex); @@ -7542,7 +8230,7 @@ key_acquire2( m_freem(m); return 0; } - + if ((acq = key_getacqbyseq(mhp->msg->sadb_msg_seq)) == NULL) { /* * the specified larval SA is already gone, or we got @@ -7552,7 +8240,7 @@ key_acquire2( m_freem(m); return 0; } - + /* reset acq counter in order to deletion by timehander. */ microtime(&tv); acq->created = tv.tv_sec; @@ -7562,18 +8250,18 @@ key_acquire2( m_freem(m); return 0; } - + /* * This message is from user land. */ - + /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { lck_mtx_unlock(sadb_mutex); ipseclog((LOG_DEBUG, "key_acquire2: invalid satype is passed.\n")); return key_senderror(so, m, EINVAL); } - + if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || mhp->ext[SADB_EXT_PROPOSAL] == NULL) { @@ -7590,14 +8278,14 @@ key_acquire2( ipseclog((LOG_DEBUG, "key_acquire2: invalid message is passed.\n")); return key_senderror(so, m, EINVAL); } - + src0 = (const struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; dst0 = (const struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; - + /* XXX boundary check against sa_len */ /* cast warnings */ KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); - + /* get a SA index */ LIST_FOREACH(sah, &sahtree, chain) { if (sah->state == SADB_SASTATE_DEAD) @@ -7614,10 +8302,10 @@ key_acquire2( error = key_acquire(&saidx, NULL); if (error != 0) { ipseclog((LOG_DEBUG, "key_acquire2: error %d returned " - "from key_acquire.\n", mhp->msg->sadb_msg_errno)); + "from key_acquire.\n", mhp->msg->sadb_msg_errno)); return key_senderror(so, m, error); } - + return key_sendup_mbuf(so, m, KEY_SENDUP_REGISTERED); } @@ -7636,32 +8324,32 @@ key_acquire2( */ static int key_register( - struct socket *so, - struct mbuf *m, - const struct sadb_msghdr *mhp) + struct socket *so, + struct mbuf *m, + const struct sadb_msghdr *mhp) { struct secreg *reg, *newreg = 0; /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_register: NULL pointer is passed.\n"); - + /* check for invalid register message */ if (mhp->msg->sadb_msg_satype >= sizeof(regtree)/sizeof(regtree[0])) return key_senderror(so, m, EINVAL); - + /* When SATYPE_UNSPEC is specified, only return sadb_supported. */ if (mhp->msg->sadb_msg_satype == SADB_SATYPE_UNSPEC) goto setmsg; - + /* create regnode */ KMALLOC_WAIT(newreg, struct secreg *, sizeof(*newreg)); - if (newreg == NULL) { - ipseclog((LOG_DEBUG, "key_register: No more memory.\n")); - return key_senderror(so, m, ENOBUFS); - } - bzero((caddr_t)newreg, sizeof(*newreg)); - + if (newreg == NULL) { + ipseclog((LOG_DEBUG, "key_register: No more memory.\n")); + return key_senderror(so, m, ENOBUFS); + } + bzero((caddr_t)newreg, sizeof(*newreg)); + lck_mtx_lock(sadb_mutex); /* check whether existing or not */ LIST_FOREACH(reg, ®tree[mhp->msg->sadb_msg_satype], chain) { @@ -7672,132 +8360,132 @@ key_register( return key_senderror(so, m, EEXIST); } } - + socket_lock(so, 1); newreg->so = so; ((struct keycb *)sotorawcb(so))->kp_registered++; socket_unlock(so, 1); - + /* add regnode to regtree. */ LIST_INSERT_HEAD(®tree[mhp->msg->sadb_msg_satype], newreg, chain); lck_mtx_unlock(sadb_mutex); - setmsg: +setmsg: { - struct mbuf *n; - struct sadb_msg *newmsg; - struct sadb_supported *sup; - u_int len, alen, elen; - int off; - int i; - struct sadb_alg *alg; - - /* create new sadb_msg to reply. */ - alen = 0; - for (i = 1; i <= SADB_AALG_MAX; i++) { - if (ah_algorithm_lookup(i)) - alen += sizeof(struct sadb_alg); - } - if (alen) - alen += sizeof(struct sadb_supported); - elen = 0; + struct mbuf *n; + struct sadb_msg *newmsg; + struct sadb_supported *sup; + u_int len, alen, elen; + int off; + int i; + struct sadb_alg *alg; + + /* create new sadb_msg to reply. */ + alen = 0; + for (i = 1; i <= SADB_AALG_MAX; i++) { + if (ah_algorithm_lookup(i)) + alen += sizeof(struct sadb_alg); + } + if (alen) + alen += sizeof(struct sadb_supported); + elen = 0; #if IPSEC_ESP - for (i = 1; i <= SADB_EALG_MAX; i++) { - if (esp_algorithm_lookup(i)) - elen += sizeof(struct sadb_alg); - } - if (elen) - elen += sizeof(struct sadb_supported); + for (i = 1; i <= SADB_EALG_MAX; i++) { + if (esp_algorithm_lookup(i)) + elen += sizeof(struct sadb_alg); + } + if (elen) + elen += sizeof(struct sadb_supported); #endif - - len = sizeof(struct sadb_msg) + alen + elen; - - if (len > MCLBYTES) - return key_senderror(so, m, ENOBUFS); - - MGETHDR(n, M_WAITOK, MT_DATA); - if (n && len > MHLEN) { - MCLGET(n, M_WAITOK); - if ((n->m_flags & M_EXT) == 0) { - m_freem(n); - n = NULL; + + len = sizeof(struct sadb_msg) + alen + elen; + + if (len > MCLBYTES) + return key_senderror(so, m, ENOBUFS); + + MGETHDR(n, M_WAITOK, MT_DATA); + if (n && len > MHLEN) { + MCLGET(n, M_WAITOK); + if ((n->m_flags & M_EXT) == 0) { + m_freem(n); + n = NULL; + } } - } - if (!n) - return key_senderror(so, m, ENOBUFS); - - n->m_pkthdr.len = n->m_len = len; - n->m_next = NULL; - off = 0; - - m_copydata(m, 0, sizeof(struct sadb_msg), mtod(n, caddr_t) + off); - newmsg = mtod(n, struct sadb_msg *); - newmsg->sadb_msg_errno = 0; - newmsg->sadb_msg_len = PFKEY_UNIT64(len); - off += PFKEY_ALIGN8(sizeof(struct sadb_msg)); - - /* for authentication algorithm */ - if (alen) { - sup = (struct sadb_supported *)(void *)(mtod(n, caddr_t) + off); - sup->sadb_supported_len = PFKEY_UNIT64(alen); - sup->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH; - off += PFKEY_ALIGN8(sizeof(*sup)); - - for (i = 1; i <= SADB_AALG_MAX; i++) { - const struct ah_algorithm *aalgo; - - aalgo = ah_algorithm_lookup(i); - if (!aalgo) - continue; - alg = (struct sadb_alg *) + if (!n) + return key_senderror(so, m, ENOBUFS); + + n->m_pkthdr.len = n->m_len = len; + n->m_next = NULL; + off = 0; + + m_copydata(m, 0, sizeof(struct sadb_msg), mtod(n, caddr_t) + off); + newmsg = mtod(n, struct sadb_msg *); + newmsg->sadb_msg_errno = 0; + newmsg->sadb_msg_len = PFKEY_UNIT64(len); + off += PFKEY_ALIGN8(sizeof(struct sadb_msg)); + + /* for authentication algorithm */ + if (alen) { + sup = (struct sadb_supported *)(void *)(mtod(n, caddr_t) + off); + sup->sadb_supported_len = PFKEY_UNIT64(alen); + sup->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH; + off += PFKEY_ALIGN8(sizeof(*sup)); + + for (i = 1; i <= SADB_AALG_MAX; i++) { + const struct ah_algorithm *aalgo; + + aalgo = ah_algorithm_lookup(i); + if (!aalgo) + continue; + alg = (struct sadb_alg *) (void *)(mtod(n, caddr_t) + off); - alg->sadb_alg_id = i; - alg->sadb_alg_ivlen = 0; - alg->sadb_alg_minbits = aalgo->keymin; - alg->sadb_alg_maxbits = aalgo->keymax; - off += PFKEY_ALIGN8(sizeof(*alg)); + alg->sadb_alg_id = i; + alg->sadb_alg_ivlen = 0; + alg->sadb_alg_minbits = aalgo->keymin; + alg->sadb_alg_maxbits = aalgo->keymax; + off += PFKEY_ALIGN8(sizeof(*alg)); + } } - } - + #if IPSEC_ESP - /* for encryption algorithm */ - if (elen) { - sup = (struct sadb_supported *)(void *)(mtod(n, caddr_t) + off); - sup->sadb_supported_len = PFKEY_UNIT64(elen); - sup->sadb_supported_exttype = SADB_EXT_SUPPORTED_ENCRYPT; - off += PFKEY_ALIGN8(sizeof(*sup)); - - for (i = 1; i <= SADB_EALG_MAX; i++) { - const struct esp_algorithm *ealgo; - - ealgo = esp_algorithm_lookup(i); - if (!ealgo) - continue; - alg = (struct sadb_alg *) + /* for encryption algorithm */ + if (elen) { + sup = (struct sadb_supported *)(void *)(mtod(n, caddr_t) + off); + sup->sadb_supported_len = PFKEY_UNIT64(elen); + sup->sadb_supported_exttype = SADB_EXT_SUPPORTED_ENCRYPT; + off += PFKEY_ALIGN8(sizeof(*sup)); + + for (i = 1; i <= SADB_EALG_MAX; i++) { + const struct esp_algorithm *ealgo; + + ealgo = esp_algorithm_lookup(i); + if (!ealgo) + continue; + alg = (struct sadb_alg *) (void *)(mtod(n, caddr_t) + off); - alg->sadb_alg_id = i; - if (ealgo && ealgo->ivlen) { - /* - * give NULL to get the value preferred by - * algorithm XXX SADB_X_EXT_DERIV ? - */ - alg->sadb_alg_ivlen = + alg->sadb_alg_id = i; + if (ealgo && ealgo->ivlen) { + /* + * give NULL to get the value preferred by + * algorithm XXX SADB_X_EXT_DERIV ? + */ + alg->sadb_alg_ivlen = (*ealgo->ivlen)(ealgo, NULL); - } else - alg->sadb_alg_ivlen = 0; - alg->sadb_alg_minbits = ealgo->keymin; - alg->sadb_alg_maxbits = ealgo->keymax; - off += PFKEY_ALIGN8(sizeof(struct sadb_alg)); + } else + alg->sadb_alg_ivlen = 0; + alg->sadb_alg_minbits = ealgo->keymin; + alg->sadb_alg_maxbits = ealgo->keymax; + off += PFKEY_ALIGN8(sizeof(struct sadb_alg)); + } } - } #endif - + #if DIGAGNOSTIC - if (off != len) - panic("length assumption failed in key_register"); + if (off != len) + panic("length assumption failed in key_register"); #endif - - m_freem(m); - return key_sendup_mbuf(so, n, KEY_SENDUP_REGISTERED); + + m_freem(m); + return key_sendup_mbuf(so, n, KEY_SENDUP_REGISTERED); } } @@ -7807,7 +8495,7 @@ key_register( */ void key_freereg( - struct socket *so) + struct socket *so) { struct secreg *reg; int i; @@ -7815,7 +8503,7 @@ key_freereg( /* sanity check */ if (so == NULL) panic("key_freereg: NULL pointer is passed.\n"); - + /* * check whether existing or not. * check all type of SA, because there is a potential that @@ -7825,7 +8513,7 @@ key_freereg( for (i = 0; i <= SADB_SATYPE_MAX; i++) { LIST_FOREACH(reg, ®tree[i], chain) { if (reg->so == so - && __LIST_CHAINED(reg)) { + && __LIST_CHAINED(reg)) { LIST_REMOVE(reg, chain); KFREE(reg); break; @@ -7848,14 +8536,14 @@ key_freereg( */ static int key_expire( - struct secasvar *sav) + struct secasvar *sav) { int satype; struct mbuf *result = NULL, *m; int len; int error = -1; struct sadb_lifetime *lt; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); /* sanity check */ @@ -7865,7 +8553,7 @@ key_expire( panic("key_expire: Why was SA index in SA NULL.\n"); if ((satype = key_proto2satype(sav->sah->saidx.proto)) == 0) panic("key_expire: invalid proto is passed.\n"); - + /* set msg header */ m = key_setsadbmsg(SADB_EXPIRE, 0, satype, sav->seq, 0, sav->refcnt); if (!m) { @@ -7873,7 +8561,7 @@ key_expire( goto fail; } result = m; - + /* create SA extension */ m = key_setsadbsa(sav); if (!m) { @@ -7881,17 +8569,17 @@ key_expire( goto fail; } m_cat(result, m); - + /* create SA extension */ m = key_setsadbxsa2(sav->sah->saidx.mode, - sav->replay ? sav->replay->count : 0, - sav->sah->saidx.reqid); + sav->replay ? sav->replay->count : 0, + sav->sah->saidx.reqid); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); - + /* create lifetime extension (current and soft) */ len = PFKEY_ALIGN8(sizeof(*lt)) * 2; m = key_alloc_mbuf(len); @@ -7912,32 +8600,32 @@ key_expire( lt = (struct sadb_lifetime *)(void *)(mtod(m, caddr_t) + len / 2); bcopy(sav->lft_s, lt, sizeof(*lt)); m_cat(result, m); - + /* set sadb_address for source */ m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, - (struct sockaddr *)&sav->sah->saidx.src, - FULLMASK, IPSEC_ULPROTO_ANY); + (struct sockaddr *)&sav->sah->saidx.src, + FULLMASK, IPSEC_ULPROTO_ANY); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); - + /* set sadb_address for destination */ m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, - (struct sockaddr *)&sav->sah->saidx.dst, - FULLMASK, IPSEC_ULPROTO_ANY); + (struct sockaddr *)&sav->sah->saidx.dst, + FULLMASK, IPSEC_ULPROTO_ANY); if (!m) { error = ENOBUFS; goto fail; } m_cat(result, m); - + if ((result->m_flags & M_PKTHDR) == 0) { error = EINVAL; goto fail; } - + if (result->m_len < sizeof(struct sadb_msg)) { result = m_pullup(result, sizeof(struct sadb_msg)); if (result == NULL) { @@ -7945,17 +8633,17 @@ key_expire( goto fail; } } - + result->m_pkthdr.len = 0; for (m = result; m; m = m->m_next) result->m_pkthdr.len += m->m_len; - + mtod(result, struct sadb_msg *)->sadb_msg_len = - PFKEY_UNIT64(result->m_pkthdr.len); - + PFKEY_UNIT64(result->m_pkthdr.len); + return key_sendup_mbuf(NULL, result, KEY_SENDUP_REGISTERED); - - fail: + +fail: if (result) m_freem(result); return error; @@ -7975,9 +8663,9 @@ key_expire( */ static int key_flush( - struct socket *so, - struct mbuf *m, - const struct sadb_msghdr *mhp) + struct socket *so, + struct mbuf *m, + const struct sadb_msghdr *mhp) { struct sadb_msg *newmsg; struct secashead *sah, *nextsah; @@ -7989,13 +8677,13 @@ key_flush( /* sanity check */ if (so == NULL || mhp == NULL || mhp->msg == NULL) panic("key_flush: NULL pointer is passed.\n"); - + /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "key_flush: invalid satype is passed.\n")); return key_senderror(so, m, EINVAL); } - + lck_mtx_lock(sadb_mutex); /* no SATYPE specified, i.e. flushing all SA. */ @@ -8003,11 +8691,11 @@ key_flush( sah != NULL; sah = nextsah) { nextsah = LIST_NEXT(sah, chain); - + if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC - && proto != sah->saidx.proto) + && proto != sah->saidx.proto) continue; - + for (stateidx = 0; stateidx < _ARRAYLEN(saorder_state_alive); stateidx++) { @@ -8015,14 +8703,14 @@ key_flush( for (sav = LIST_FIRST(&sah->savtree[state]); sav != NULL; sav = nextsav) { - + nextsav = LIST_NEXT(sav, chain); - + key_sa_chgstate(sav, SADB_SASTATE_DEAD); key_freesav(sav, KEY_SADB_LOCKED); } } - + sah->state = SADB_SASTATE_DEAD; } lck_mtx_unlock(sadb_mutex); @@ -8032,7 +8720,7 @@ key_flush( ipseclog((LOG_DEBUG, "key_flush: No more memory.\n")); return key_senderror(so, m, ENOBUFS); } - + if (m->m_next) m_freem(m->m_next); m->m_next = NULL; @@ -8040,7 +8728,7 @@ key_flush( newmsg = mtod(m, struct sadb_msg *); newmsg->sadb_msg_errno = 0; newmsg->sadb_msg_len = PFKEY_UNIT64(m->m_pkthdr.len); - + return key_sendup_mbuf(so, m, KEY_SENDUP_ALL); } @@ -8056,7 +8744,7 @@ key_flush( * * m will always be freed. */ - + struct sav_dump_elem { struct secasvar *sav; u_int8_t satype; @@ -8064,9 +8752,9 @@ struct sav_dump_elem { static int key_dump( - struct socket *so, - struct mbuf *m, - const struct sadb_msghdr *mhp) + struct socket *so, + struct mbuf *m, + const struct sadb_msghdr *mhp) { struct secashead *sah; struct secasvar *sav; @@ -8078,19 +8766,19 @@ key_dump( int cnt = 0, cnt2, bufcount; struct mbuf *n; int error = 0; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_dump: NULL pointer is passed.\n"); - + /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "key_dump: invalid satype is passed.\n")); return key_senderror(so, m, EINVAL); } - + if ((bufcount = ipsec_sav_count) <= 0) { error = ENOENT; goto end; @@ -8102,13 +8790,13 @@ key_dump( error = ENOMEM; goto end; } - + /* count sav entries to be sent to the userland. */ lck_mtx_lock(sadb_mutex); elem_ptr = savbuf; LIST_FOREACH(sah, &sahtree, chain) { if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC - && proto != sah->saidx.proto) + && proto != sah->saidx.proto) continue; /* map proto to satype */ @@ -8118,7 +8806,7 @@ key_dump( error = EINVAL; goto end; } - + for (stateidx = 0; stateidx < _ARRAYLEN(saorder_state_any); stateidx++) { @@ -8130,33 +8818,33 @@ key_dump( elem_ptr->satype = satype; sav->refcnt++; elem_ptr++; - cnt++; + cnt++; } } } lck_mtx_unlock(sadb_mutex); - + if (cnt == 0) { error = ENOENT; goto end; } - + /* send this to the userland, one at a time. */ elem_ptr = savbuf; cnt2 = cnt; while (cnt2) { n = key_setdumpsa(elem_ptr->sav, SADB_DUMP, elem_ptr->satype, - --cnt2, mhp->msg->sadb_msg_pid); + --cnt2, mhp->msg->sadb_msg_pid); if (!n) { error = ENOBUFS; goto end; } - + key_sendup_mbuf(so, n, KEY_SENDUP_ONE); elem_ptr++; } - + end: if (savbuf) { if (cnt) { @@ -8168,10 +8856,10 @@ end: } KFREE(savbuf); } - + if (error) return key_senderror(so, m, error); - + m_freem(m); return 0; } @@ -8183,18 +8871,18 @@ end: */ static int key_promisc( - struct socket *so, - struct mbuf *m, - const struct sadb_msghdr *mhp) + struct socket *so, + struct mbuf *m, + const struct sadb_msghdr *mhp) { int olen; /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) panic("key_promisc: NULL pointer is passed.\n"); - + olen = PFKEY_UNUNIT64(mhp->msg->sadb_msg_len); - + if (olen < sizeof(struct sadb_msg)) { #if 1 return key_senderror(so, m, EINVAL); @@ -8211,31 +8899,31 @@ key_promisc( return key_senderror(so, m, EINVAL); mhp->msg->sadb_msg_errno = 0; switch (mhp->msg->sadb_msg_satype) { - case 0: - case 1: - kp->kp_promisc = mhp->msg->sadb_msg_satype; - break; - default: - socket_unlock(so, 1); - return key_senderror(so, m, EINVAL); + case 0: + case 1: + kp->kp_promisc = mhp->msg->sadb_msg_satype; + break; + default: + socket_unlock(so, 1); + return key_senderror(so, m, EINVAL); } socket_unlock(so, 1); - + /* send the original message back to everyone */ mhp->msg->sadb_msg_errno = 0; return key_sendup_mbuf(so, m, KEY_SENDUP_ALL); } else { /* send packet as is */ - + m_adj(m, PFKEY_ALIGN8(sizeof(struct sadb_msg))); - + /* TODO: if sadb_msg_seq is specified, send to specific pid */ return key_sendup_mbuf(so, m, KEY_SENDUP_ALL); } } static int (*key_typesw[])(struct socket *, struct mbuf *, - const struct sadb_msghdr *) = { + const struct sadb_msghdr *) = { NULL, /* SADB_RESERVED */ key_getspi, /* SADB_GETSPI */ key_update, /* SADB_UPDATE */ @@ -8260,6 +8948,8 @@ static int (*key_typesw[])(struct socket *, struct mbuf *, NULL, /* SADB_X_SPDEXPIRE */ key_spddelete2, /* SADB_X_SPDDELETE2 */ key_getsastat, /* SADB_GETSASTAT */ + key_spdenable, /* SADB_X_SPDENABLE */ + key_spddisable, /* SADB_X_SPDDISABLE */ }; /* @@ -8275,27 +8965,27 @@ static int (*key_typesw[])(struct socket *, struct mbuf *, */ int key_parse( - struct mbuf *m, - struct socket *so) + struct mbuf *m, + struct socket *so) { struct sadb_msg *msg; struct sadb_msghdr mh; u_int orglen; int error; int target; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); /* sanity check */ if (m == NULL || so == NULL) panic("key_parse: NULL pointer is passed.\n"); - + #if 0 /*kdebug_sadb assumes msg in linear buffer*/ KEYDEBUG(KEYDEBUG_KEY_DUMP, - ipseclog((LOG_DEBUG, "key_parse: passed sadb_msg\n")); - kdebug_sadb(msg)); + ipseclog((LOG_DEBUG, "key_parse: passed sadb_msg\n")); + kdebug_sadb(msg)); #endif - + if (m->m_len < sizeof(struct sadb_msg)) { m = m_pullup(m, sizeof(struct sadb_msg)); if (!m) @@ -8304,7 +8994,7 @@ key_parse( msg = mtod(m, struct sadb_msg *); orglen = PFKEY_UNUNIT64(msg->sadb_msg_len); target = KEY_SENDUP_ONE; - + if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len != m->m_pkthdr.len) { ipseclog((LOG_DEBUG, "key_parse: invalid message length.\n")); @@ -8312,24 +9002,24 @@ key_parse( error = EINVAL; goto senderror; } - + if (msg->sadb_msg_version != PF_KEY_V2) { ipseclog((LOG_DEBUG, - "key_parse: PF_KEY version %u is mismatched.\n", - msg->sadb_msg_version)); + "key_parse: PF_KEY version %u is mismatched.\n", + msg->sadb_msg_version)); PFKEY_STAT_INCREMENT(pfkeystat.out_invver); error = EINVAL; goto senderror; } - + if (msg->sadb_msg_type > SADB_MAX) { ipseclog((LOG_DEBUG, "key_parse: invalid type %u is passed.\n", - msg->sadb_msg_type)); + msg->sadb_msg_type)); PFKEY_STAT_INCREMENT(pfkeystat.out_invmsgtype); error = EINVAL; goto senderror; } - + /* for old-fashioned code - should be nuked */ if (m->m_pkthdr.len > MCLBYTES) { m_freem(m); @@ -8337,7 +9027,7 @@ key_parse( } if (m->m_next) { struct mbuf *n; - + MGETHDR(n, M_WAITOK, MT_DATA); if (n && m->m_pkthdr.len > MHLEN) { MCLGET(n, M_WAITOK); @@ -8356,86 +9046,88 @@ key_parse( m_freem(m); m = n; } - + /* align the mbuf chain so that extensions are in contiguous region. */ error = key_align(m, &mh); if (error) return error; - + if (m->m_next) { /*XXX*/ m_freem(m); return ENOBUFS; } - + msg = mh.msg; - + /* check SA type */ switch (msg->sadb_msg_satype) { - case SADB_SATYPE_UNSPEC: - switch (msg->sadb_msg_type) { - case SADB_GETSPI: - case SADB_UPDATE: - case SADB_ADD: - case SADB_DELETE: - case SADB_GET: - case SADB_ACQUIRE: - case SADB_EXPIRE: - ipseclog((LOG_DEBUG, "key_parse: must specify satype " - "when msg type=%u.\n", msg->sadb_msg_type)); + case SADB_SATYPE_UNSPEC: + switch (msg->sadb_msg_type) { + case SADB_GETSPI: + case SADB_UPDATE: + case SADB_ADD: + case SADB_DELETE: + case SADB_GET: + case SADB_ACQUIRE: + case SADB_EXPIRE: + ipseclog((LOG_DEBUG, "key_parse: must specify satype " + "when msg type=%u.\n", msg->sadb_msg_type)); + PFKEY_STAT_INCREMENT(pfkeystat.out_invsatype); + error = EINVAL; + goto senderror; + } + break; + case SADB_SATYPE_AH: + case SADB_SATYPE_ESP: + case SADB_X_SATYPE_IPCOMP: + switch (msg->sadb_msg_type) { + case SADB_X_SPDADD: + case SADB_X_SPDDELETE: + case SADB_X_SPDGET: + case SADB_X_SPDDUMP: + case SADB_X_SPDFLUSH: + case SADB_X_SPDSETIDX: + case SADB_X_SPDUPDATE: + case SADB_X_SPDDELETE2: + case SADB_X_SPDENABLE: + case SADB_X_SPDDISABLE: + ipseclog((LOG_DEBUG, "key_parse: illegal satype=%u\n", + msg->sadb_msg_type)); + PFKEY_STAT_INCREMENT(pfkeystat.out_invsatype); + error = EINVAL; + goto senderror; + } + break; + case SADB_SATYPE_RSVP: + case SADB_SATYPE_OSPFV2: + case SADB_SATYPE_RIPV2: + case SADB_SATYPE_MIP: + ipseclog((LOG_DEBUG, "key_parse: type %u isn't supported.\n", + msg->sadb_msg_satype)); PFKEY_STAT_INCREMENT(pfkeystat.out_invsatype); - error = EINVAL; + error = EOPNOTSUPP; goto senderror; - } - break; - case SADB_SATYPE_AH: - case SADB_SATYPE_ESP: - case SADB_X_SATYPE_IPCOMP: - switch (msg->sadb_msg_type) { - case SADB_X_SPDADD: - case SADB_X_SPDDELETE: - case SADB_X_SPDGET: - case SADB_X_SPDDUMP: - case SADB_X_SPDFLUSH: - case SADB_X_SPDSETIDX: - case SADB_X_SPDUPDATE: - case SADB_X_SPDDELETE2: - ipseclog((LOG_DEBUG, "key_parse: illegal satype=%u\n", - msg->sadb_msg_type)); + case 1: /* XXX: What does it do? */ + if (msg->sadb_msg_type == SADB_X_PROMISC) + break; + /*FALLTHROUGH*/ + default: + ipseclog((LOG_DEBUG, "key_parse: invalid type %u is passed.\n", + msg->sadb_msg_satype)); PFKEY_STAT_INCREMENT(pfkeystat.out_invsatype); error = EINVAL; goto senderror; - } - break; - case SADB_SATYPE_RSVP: - case SADB_SATYPE_OSPFV2: - case SADB_SATYPE_RIPV2: - case SADB_SATYPE_MIP: - ipseclog((LOG_DEBUG, "key_parse: type %u isn't supported.\n", - msg->sadb_msg_satype)); - PFKEY_STAT_INCREMENT(pfkeystat.out_invsatype); - error = EOPNOTSUPP; - goto senderror; - case 1: /* XXX: What does it do? */ - if (msg->sadb_msg_type == SADB_X_PROMISC) - break; - /*FALLTHROUGH*/ - default: - ipseclog((LOG_DEBUG, "key_parse: invalid type %u is passed.\n", - msg->sadb_msg_satype)); - PFKEY_STAT_INCREMENT(pfkeystat.out_invsatype); - error = EINVAL; - goto senderror; } - + /* check field of upper layer protocol and address family */ if (mh.ext[SADB_EXT_ADDRESS_SRC] != NULL - && mh.ext[SADB_EXT_ADDRESS_DST] != NULL) { + && mh.ext[SADB_EXT_ADDRESS_DST] != NULL) { struct sadb_address *src0, *dst0; u_int plen; - + src0 = (struct sadb_address *)(mh.ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mh.ext[SADB_EXT_ADDRESS_DST]); - + /* check upper layer protocol */ if (src0->sadb_address_proto != dst0->sadb_address_proto) { ipseclog((LOG_DEBUG, "key_parse: upper layer protocol mismatched.\n")); @@ -8443,7 +9135,7 @@ key_parse( error = EINVAL; goto senderror; } - + /* check family */ if (PFKEY_ADDR_SADDR(src0)->sa_family != PFKEY_ADDR_SADDR(dst0)->sa_family) { @@ -8455,74 +9147,74 @@ key_parse( if (PFKEY_ADDR_SADDR(src0)->sa_len != PFKEY_ADDR_SADDR(dst0)->sa_len) { ipseclog((LOG_DEBUG, - "key_parse: address struct size mismatched.\n")); + "key_parse: address struct size mismatched.\n")); PFKEY_STAT_INCREMENT(pfkeystat.out_invaddr); error = EINVAL; goto senderror; } - + switch (PFKEY_ADDR_SADDR(src0)->sa_family) { - case AF_INET: - if (PFKEY_ADDR_SADDR(src0)->sa_len != - sizeof(struct sockaddr_in)) { - PFKEY_STAT_INCREMENT(pfkeystat.out_invaddr); - error = EINVAL; - goto senderror; - } - break; - case AF_INET6: - if (PFKEY_ADDR_SADDR(src0)->sa_len != - sizeof(struct sockaddr_in6)) { + case AF_INET: + if (PFKEY_ADDR_SADDR(src0)->sa_len != + sizeof(struct sockaddr_in)) { + PFKEY_STAT_INCREMENT(pfkeystat.out_invaddr); + error = EINVAL; + goto senderror; + } + break; + case AF_INET6: + if (PFKEY_ADDR_SADDR(src0)->sa_len != + sizeof(struct sockaddr_in6)) { + PFKEY_STAT_INCREMENT(pfkeystat.out_invaddr); + error = EINVAL; + goto senderror; + } + break; + default: + ipseclog((LOG_DEBUG, + "key_parse: unsupported address family.\n")); PFKEY_STAT_INCREMENT(pfkeystat.out_invaddr); - error = EINVAL; + error = EAFNOSUPPORT; goto senderror; - } - break; - default: - ipseclog((LOG_DEBUG, - "key_parse: unsupported address family.\n")); - PFKEY_STAT_INCREMENT(pfkeystat.out_invaddr); - error = EAFNOSUPPORT; - goto senderror; } - + switch (PFKEY_ADDR_SADDR(src0)->sa_family) { - case AF_INET: - plen = sizeof(struct in_addr) << 3; - break; - case AF_INET6: - plen = sizeof(struct in6_addr) << 3; - break; - default: - plen = 0; /*fool gcc*/ - break; + case AF_INET: + plen = sizeof(struct in_addr) << 3; + break; + case AF_INET6: + plen = sizeof(struct in6_addr) << 3; + break; + default: + plen = 0; /*fool gcc*/ + break; } - + /* check max prefix length */ if (src0->sadb_address_prefixlen > plen || dst0->sadb_address_prefixlen > plen) { ipseclog((LOG_DEBUG, - "key_parse: illegal prefixlen.\n")); + "key_parse: illegal prefixlen.\n")); PFKEY_STAT_INCREMENT(pfkeystat.out_invaddr); error = EINVAL; goto senderror; } - + /* * prefixlen == 0 is valid because there can be a case when * all addresses are matched. */ } - + if (msg->sadb_msg_type >= sizeof(key_typesw)/sizeof(key_typesw[0]) || key_typesw[msg->sadb_msg_type] == NULL) { PFKEY_STAT_INCREMENT(pfkeystat.out_invmsgtype); error = EINVAL; goto senderror; } - + return (*key_typesw[msg->sadb_msg_type])(so, m, &mh); - + senderror: msg->sadb_msg_errno = error; return key_sendup_mbuf(so, m, target); @@ -8530,17 +9222,17 @@ senderror: static int key_senderror( - struct socket *so, - struct mbuf *m, - int code) + struct socket *so, + struct mbuf *m, + int code) { struct sadb_msg *msg; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); if (m->m_len < sizeof(struct sadb_msg)) panic("invalid mbuf passed to key_senderror"); - + msg = mtod(m, struct sadb_msg *); msg->sadb_msg_errno = code; return key_sendup_mbuf(so, m, KEY_SENDUP_ONE); @@ -8553,27 +9245,27 @@ key_senderror( */ static int key_align( - struct mbuf *m, - struct sadb_msghdr *mhp) + struct mbuf *m, + struct sadb_msghdr *mhp) { struct mbuf *n; struct sadb_ext *ext; size_t off, end; int extlen; int toff; - + /* sanity check */ if (m == NULL || mhp == NULL) panic("key_align: NULL pointer is passed.\n"); if (m->m_len < sizeof(struct sadb_msg)) panic("invalid mbuf passed to key_align"); - + /* initialize */ bzero(mhp, sizeof(*mhp)); - + mhp->msg = mtod(m, struct sadb_msg *); mhp->ext[0] = (struct sadb_ext *)mhp->msg; /*XXX backward compat */ - + end = PFKEY_UNUNIT64(mhp->msg->sadb_msg_len); extlen = end; /*just in case extlen is not updated*/ for (off = sizeof(struct sadb_msg); off < end; off += extlen) { @@ -8583,94 +9275,99 @@ key_align( return ENOBUFS; } ext = (struct sadb_ext *)(void *)(mtod(n, caddr_t) + toff); - + /* set pointer */ switch (ext->sadb_ext_type) { - case SADB_EXT_SA: - case SADB_EXT_ADDRESS_SRC: - case SADB_EXT_ADDRESS_DST: - case SADB_EXT_ADDRESS_PROXY: - case SADB_EXT_LIFETIME_CURRENT: - case SADB_EXT_LIFETIME_HARD: - case SADB_EXT_LIFETIME_SOFT: - case SADB_EXT_KEY_AUTH: - case SADB_EXT_KEY_ENCRYPT: - case SADB_EXT_IDENTITY_SRC: - case SADB_EXT_IDENTITY_DST: - case SADB_EXT_SENSITIVITY: - case SADB_EXT_PROPOSAL: - case SADB_EXT_SUPPORTED_AUTH: - case SADB_EXT_SUPPORTED_ENCRYPT: - case SADB_EXT_SPIRANGE: - case SADB_X_EXT_POLICY: - case SADB_X_EXT_SA2: - case SADB_EXT_SESSION_ID: - case SADB_EXT_SASTAT: - /* duplicate check */ - /* - * XXX Are there duplication payloads of either - * KEY_AUTH or KEY_ENCRYPT ? - */ - if (mhp->ext[ext->sadb_ext_type] != NULL) { + case SADB_EXT_SA: + case SADB_EXT_ADDRESS_SRC: + case SADB_EXT_ADDRESS_DST: + case SADB_EXT_ADDRESS_PROXY: + case SADB_EXT_LIFETIME_CURRENT: + case SADB_EXT_LIFETIME_HARD: + case SADB_EXT_LIFETIME_SOFT: + case SADB_EXT_KEY_AUTH: + case SADB_EXT_KEY_ENCRYPT: + case SADB_EXT_IDENTITY_SRC: + case SADB_EXT_IDENTITY_DST: + case SADB_EXT_SENSITIVITY: + case SADB_EXT_PROPOSAL: + case SADB_EXT_SUPPORTED_AUTH: + case SADB_EXT_SUPPORTED_ENCRYPT: + case SADB_EXT_SPIRANGE: + case SADB_X_EXT_POLICY: + case SADB_X_EXT_SA2: + case SADB_EXT_SESSION_ID: + case SADB_EXT_SASTAT: + case SADB_X_EXT_IPSECIF: + case SADB_X_EXT_ADDR_RANGE_SRC_START: + case SADB_X_EXT_ADDR_RANGE_SRC_END: + case SADB_X_EXT_ADDR_RANGE_DST_START: + case SADB_X_EXT_ADDR_RANGE_DST_END: + /* duplicate check */ + /* + * XXX Are there duplication payloads of either + * KEY_AUTH or KEY_ENCRYPT ? + */ + if (mhp->ext[ext->sadb_ext_type] != NULL) { + ipseclog((LOG_DEBUG, + "key_align: duplicate ext_type %u " + "is passed.\n", ext->sadb_ext_type)); + m_freem(m); + PFKEY_STAT_INCREMENT(pfkeystat.out_dupext); + return EINVAL; + } + break; + default: ipseclog((LOG_DEBUG, - "key_align: duplicate ext_type %u " - "is passed.\n", ext->sadb_ext_type)); + "key_align: invalid ext_type %u is passed.\n", + ext->sadb_ext_type)); m_freem(m); - PFKEY_STAT_INCREMENT(pfkeystat.out_dupext); + PFKEY_STAT_INCREMENT(pfkeystat.out_invexttype); return EINVAL; - } - break; - default: - ipseclog((LOG_DEBUG, - "key_align: invalid ext_type %u is passed.\n", - ext->sadb_ext_type)); - m_freem(m); - PFKEY_STAT_INCREMENT(pfkeystat.out_invexttype); - return EINVAL; } - + extlen = PFKEY_UNUNIT64(ext->sadb_ext_len); - + if (key_validate_ext(ext, extlen)) { m_freem(m); PFKEY_STAT_INCREMENT(pfkeystat.out_invlen); return EINVAL; } - + n = m_pulldown(m, off, extlen, &toff); if (!n) { /* m is already freed */ return ENOBUFS; } ext = (struct sadb_ext *)(void *)(mtod(n, caddr_t) + toff); - + mhp->ext[ext->sadb_ext_type] = ext; mhp->extoff[ext->sadb_ext_type] = off; mhp->extlen[ext->sadb_ext_type] = extlen; } - + if (off != end) { m_freem(m); PFKEY_STAT_INCREMENT(pfkeystat.out_invlen); return EINVAL; } - + return 0; } static int key_validate_ext( - const struct sadb_ext *ext, - int len) + const struct sadb_ext *ext, + int len) { struct sockaddr *sa; enum { NONE, ADDR } checktype = NONE; int baselen; const int sal = offsetof(struct sockaddr, sa_len) + sizeof(sa->sa_len); - + if (len != PFKEY_UNUNIT64(ext->sadb_ext_len)) return EINVAL; - + /* if it does not match minimum/maximum length, bail */ if (ext->sadb_ext_type >= sizeof(minsize) / sizeof(minsize[0]) || ext->sadb_ext_type >= sizeof(maxsize) / sizeof(maxsize[0])) @@ -8679,51 +9376,49 @@ key_validate_ext( return EINVAL; if (maxsize[ext->sadb_ext_type] && len > maxsize[ext->sadb_ext_type]) return EINVAL; - + /* more checks based on sadb_ext_type XXX need more */ switch (ext->sadb_ext_type) { - case SADB_EXT_ADDRESS_SRC: - case SADB_EXT_ADDRESS_DST: - case SADB_EXT_ADDRESS_PROXY: - baselen = PFKEY_ALIGN8(sizeof(struct sadb_address)); - checktype = ADDR; - break; - case SADB_EXT_IDENTITY_SRC: - case SADB_EXT_IDENTITY_DST: - if (((struct sadb_ident *)(uintptr_t)(size_t)ext)-> - sadb_ident_type == SADB_X_IDENTTYPE_ADDR) { - baselen = PFKEY_ALIGN8(sizeof(struct sadb_ident)); + case SADB_EXT_ADDRESS_SRC: + case SADB_EXT_ADDRESS_DST: + case SADB_EXT_ADDRESS_PROXY: + case SADB_X_EXT_ADDR_RANGE_SRC_START: + case SADB_X_EXT_ADDR_RANGE_SRC_END: + case SADB_X_EXT_ADDR_RANGE_DST_START: + case SADB_X_EXT_ADDR_RANGE_DST_END: + baselen = PFKEY_ALIGN8(sizeof(struct sadb_address)); checktype = ADDR; - } else + break; + case SADB_EXT_IDENTITY_SRC: + case SADB_EXT_IDENTITY_DST: + if (((struct sadb_ident *)(uintptr_t)(size_t)ext)-> + sadb_ident_type == SADB_X_IDENTTYPE_ADDR) { + baselen = PFKEY_ALIGN8(sizeof(struct sadb_ident)); + checktype = ADDR; + } else + checktype = NONE; + break; + default: checktype = NONE; - break; - default: - checktype = NONE; - break; + break; } - + switch (checktype) { - case NONE: - break; - case ADDR: - sa = (struct sockaddr *)((caddr_t)(uintptr_t)ext + baselen); - - if (len < baselen + sal) - return EINVAL; - if (baselen + PFKEY_ALIGN8(sa->sa_len) != len) - return EINVAL; - break; + case NONE: + break; + case ADDR: + sa = (struct sockaddr *)((caddr_t)(uintptr_t)ext + baselen); + + if (len < baselen + sal) + return EINVAL; + if (baselen + PFKEY_ALIGN8(sa->sa_len) != len) + return EINVAL; + break; } - + return 0; } -void -key_domain_init(void) -{ - return; -} - /* * XXX: maybe This function is called after INBOUND IPsec processing. * @@ -8734,28 +9429,28 @@ key_domain_init(void) */ int key_checktunnelsanity( - struct secasvar *sav, - __unused u_int family, - __unused caddr_t src, - __unused caddr_t dst) + struct secasvar *sav, + __unused u_int family, + __unused caddr_t src, + __unused caddr_t dst) { - + /* sanity check */ if (sav->sah == NULL) panic("sav->sah == NULL at key_checktunnelsanity"); - + /* XXX: check inner IP header */ - + return 1; } /* record data transfer on SA, and update timestamps */ void key_sa_recordxfer( - struct secasvar *sav, - struct mbuf *m) + struct secasvar *sav, + struct mbuf *m) { - + if (!sav) panic("key_sa_recordxfer called with sav == NULL"); @@ -8763,7 +9458,7 @@ key_sa_recordxfer( panic("key_sa_recordxfer called with m == NULL"); if (!sav->lft_c) return; - + lck_mtx_lock(sadb_mutex); /* * XXX Currently, there is a difference of bytes size @@ -8771,7 +9466,7 @@ key_sa_recordxfer( */ sav->lft_c->sadb_lifetime_bytes += m->m_pkthdr.len; /* to check bytes lifetime is done in key_timehandler(). */ - + /* * We use the number of packets as the unit of * sadb_lifetime_allocations. We increment the variable @@ -8779,7 +9474,7 @@ key_sa_recordxfer( */ sav->lft_c->sadb_lifetime_allocations++; /* XXX check for expires? */ - + /* * NOTE: We record CURRENT sadb_lifetime_usetime by using wall clock, * in seconds. HARD and SOFT lifetime are measured by the time @@ -8792,10 +9487,10 @@ key_sa_recordxfer( * <-----> SOFT */ { - struct timeval tv; - microtime(&tv); - sav->lft_c->sadb_lifetime_usetime = tv.tv_sec; - /* XXX check for expires? */ + struct timeval tv; + microtime(&tv); + sav->lft_c->sadb_lifetime_usetime = tv.tv_sec; + /* XXX check for expires? */ } lck_mtx_unlock(sadb_mutex); @@ -8805,7 +9500,7 @@ key_sa_recordxfer( /* dumb version */ void key_sa_routechange( - struct sockaddr *dst) + struct sockaddr *dst) { struct secashead *sah; struct route *ro; @@ -8814,33 +9509,32 @@ key_sa_routechange( LIST_FOREACH(sah, &sahtree, chain) { ro = &sah->sa_route; if (ro->ro_rt && dst->sa_len == ro->ro_dst.sa_len - && bcmp(dst, &ro->ro_dst, dst->sa_len) == 0) { - rtfree(ro->ro_rt); - ro->ro_rt = (struct rtentry *)NULL; + && bcmp(dst, &ro->ro_dst, dst->sa_len) == 0) { + ROUTE_RELEASE(ro); } } lck_mtx_unlock(sadb_mutex); - + return; } void key_sa_chgstate( - struct secasvar *sav, - u_int8_t state) + struct secasvar *sav, + u_int8_t state) { - + if (sav == NULL) panic("key_sa_chgstate called with sav == NULL"); - + if (sav->state == state) return; - + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_OWNED); - + if (__LIST_CHAINED(sav)) LIST_REMOVE(sav, chain); - + sav->state = state; LIST_INSERT_HEAD(&sav->sah->savtree[state], sav, chain); @@ -8848,7 +9542,7 @@ key_sa_chgstate( void key_sa_stir_iv( - struct secasvar *sav) + struct secasvar *sav) { lck_mtx_lock(sadb_mutex); if (!sav->iv) @@ -8860,11 +9554,11 @@ key_sa_stir_iv( /* XXX too much? */ static struct mbuf * key_alloc_mbuf( - int l) + int l) { struct mbuf *m = NULL, *n; int len, t; - + len = l; while (len > 0) { MGET(n, M_DONTWAIT, MT_DATA); @@ -8874,7 +9568,7 @@ key_alloc_mbuf( m_freem(m); return NULL; } - + n->m_next = NULL; n->m_len = 0; n->m_len = M_TRAILINGSPACE(n); @@ -8884,74 +9578,74 @@ key_alloc_mbuf( n->m_data += t; n->m_len = len; } - + len -= n->m_len; - + if (m) m_cat(m, n); else m = n; } - + return m; } static struct mbuf * key_setdumpsastats (u_int32_t dir, - struct sastat *stats, - u_int32_t max_stats, - u_int64_t session_ids[], - u_int32_t seq, - u_int32_t pid) + struct sastat *stats, + u_int32_t max_stats, + u_int64_t session_ids[], + u_int32_t seq, + u_int32_t pid) { - struct mbuf *result = NULL, *m = NULL; - - m = key_setsadbmsg(SADB_GETSASTAT, 0, 0, seq, pid, 0); - if (!m) { - goto fail; + struct mbuf *result = NULL, *m = NULL; + + m = key_setsadbmsg(SADB_GETSASTAT, 0, 0, seq, pid, 0); + if (!m) { + goto fail; } - result = m; - + result = m; + m = key_setsadbsession_id(session_ids); if (!m) { - goto fail; + goto fail; } - m_cat(result, m); - + m_cat(result, m); + m = key_setsadbsastat(dir, - stats, - max_stats); + stats, + max_stats); if (!m) { - goto fail; + goto fail; } - m_cat(result, m); - - if ((result->m_flags & M_PKTHDR) == 0) { + m_cat(result, m); + + if ((result->m_flags & M_PKTHDR) == 0) { goto fail; - } - - if (result->m_len < sizeof(struct sadb_msg)) { - result = m_pullup(result, sizeof(struct sadb_msg)); + } + + if (result->m_len < sizeof(struct sadb_msg)) { + result = m_pullup(result, sizeof(struct sadb_msg)); if (result == NULL) { goto fail; } - } - - result->m_pkthdr.len = 0; - for (m = result; m; m = m->m_next) { - result->m_pkthdr.len += m->m_len; } - - mtod(result, struct sadb_msg *)->sadb_msg_len = - PFKEY_UNIT64(result->m_pkthdr.len); - - return result; - - fail: + + result->m_pkthdr.len = 0; + for (m = result; m; m = m->m_next) { + result->m_pkthdr.len += m->m_len; + } + + mtod(result, struct sadb_msg *)->sadb_msg_len = + PFKEY_UNIT64(result->m_pkthdr.len); + + return result; + +fail: if (result) { - m_freem(result); + m_freem(result); } - return NULL; + return NULL; } /* @@ -8960,11 +9654,11 @@ key_setdumpsastats (u_int32_t dir, * * m will always be freed. */ - + static int key_getsastat (struct socket *so, - struct mbuf *m, - const struct sadb_msghdr *mhp) + struct mbuf *m, + const struct sadb_msghdr *mhp) { struct sadb_session_id *session_id; u_int32_t bufsize, arg_count, res_count; @@ -8972,123 +9666,254 @@ key_getsastat (struct socket *so, struct sastat *sa_stats_sav = NULL; struct mbuf *n; int error = 0; - + /* sanity check */ if (so == NULL || m == NULL || mhp == NULL || mhp->msg == NULL) - panic("%s: NULL pointer is passed.\n", __FUNCTION__); - - if (mhp->ext[SADB_EXT_SESSION_ID] == NULL) { - printf("%s: invalid message is passed. missing session-id.\n", __FUNCTION__); + panic("%s: NULL pointer is passed.\n", __FUNCTION__); + + if (mhp->ext[SADB_EXT_SESSION_ID] == NULL) { + printf("%s: invalid message is passed. missing session-id.\n", __FUNCTION__); return key_senderror(so, m, EINVAL); - } + } if (mhp->extlen[SADB_EXT_SESSION_ID] < sizeof(struct sadb_session_id)) { - printf("%s: invalid message is passed. short session-id.\n", __FUNCTION__); + printf("%s: invalid message is passed. short session-id.\n", __FUNCTION__); return key_senderror(so, m, EINVAL); - } + } if (mhp->ext[SADB_EXT_SASTAT] == NULL) { - printf("%s: invalid message is passed. missing stat args.\n", __FUNCTION__); + printf("%s: invalid message is passed. missing stat args.\n", __FUNCTION__); return key_senderror(so, m, EINVAL); - } - if (mhp->extlen[SADB_EXT_SASTAT] < sizeof(*sa_stats_arg)) { - printf("%s: invalid message is passed. short stat args.\n", __FUNCTION__); + } + if (mhp->extlen[SADB_EXT_SASTAT] < sizeof(*sa_stats_arg)) { + printf("%s: invalid message is passed. short stat args.\n", __FUNCTION__); return key_senderror(so, m, EINVAL); - } - + } + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); - + // exit early if there are no active SAs if (ipsec_sav_count <= 0) { - printf("%s: No active SAs.\n", __FUNCTION__); + printf("%s: No active SAs.\n", __FUNCTION__); error = ENOENT; goto end; } bufsize = (ipsec_sav_count + 1) * sizeof(*sa_stats_sav); - + KMALLOC_WAIT(sa_stats_sav, __typeof__(sa_stats_sav), bufsize); if (sa_stats_sav == NULL) { - printf("%s: No more memory.\n", __FUNCTION__); + printf("%s: No more memory.\n", __FUNCTION__); error = ENOMEM; goto end; } bzero(sa_stats_sav, bufsize); - - sa_stats_arg = (__typeof__(sa_stats_arg)) - (void *)mhp->ext[SADB_EXT_SASTAT]; + + sa_stats_arg = (__typeof__(sa_stats_arg)) + (void *)mhp->ext[SADB_EXT_SASTAT]; arg_count = sa_stats_arg->sadb_sastat_list_len; // exit early if there are no requested SAs if (arg_count == 0) { - printf("%s: No SAs requested.\n", __FUNCTION__); + printf("%s: No SAs requested.\n", __FUNCTION__); error = ENOENT; goto end; } res_count = 0; - + if (key_getsastatbyspi((struct sastat *)(sa_stats_arg + 1), - arg_count, - sa_stats_sav, - &res_count)) { - printf("%s: Error finding SAs.\n", __FUNCTION__); + arg_count, + sa_stats_sav, + &res_count)) { + printf("%s: Error finding SAs.\n", __FUNCTION__); error = ENOENT; goto end; } if (!res_count) { - printf("%s: No SAs found.\n", __FUNCTION__); + printf("%s: No SAs found.\n", __FUNCTION__); error = ENOENT; goto end; } - + session_id = (__typeof__(session_id)) - (void *)mhp->ext[SADB_EXT_SESSION_ID]; - + (void *)mhp->ext[SADB_EXT_SESSION_ID]; + /* send this to the userland. */ n = key_setdumpsastats(sa_stats_arg->sadb_sastat_dir, - sa_stats_sav, - res_count, - session_id->sadb_session_id_v, - mhp->msg->sadb_msg_seq, - mhp->msg->sadb_msg_pid); - if (!n) { - printf("%s: No bufs to dump stats.\n", __FUNCTION__); + sa_stats_sav, + res_count, + session_id->sadb_session_id_v, + mhp->msg->sadb_msg_seq, + mhp->msg->sadb_msg_pid); + if (!n) { + printf("%s: No bufs to dump stats.\n", __FUNCTION__); error = ENOBUFS; goto end; } - - key_sendup_mbuf(so, n, KEY_SENDUP_ALL); + + key_sendup_mbuf(so, n, KEY_SENDUP_ALL); end: if (sa_stats_sav) { KFREE(sa_stats_sav); } - + if (error) return key_senderror(so, m, error); - + m_freem(m); return 0; } static void key_update_natt_keepalive_timestamp (struct secasvar *sav_sent, - struct secasvar *sav_update) + struct secasvar *sav_update) { struct secasindex saidx_swap_sent_addr; - + // exit early if two SAs are identical, or if sav_update is current if (sav_sent == sav_update || sav_update->natt_last_activity == natt_now) { return; } - + // assuming that (sav_update->remote_ike_port != 0 && (esp_udp_encap_port & 0xFFFF) != 0) - + bzero(&saidx_swap_sent_addr, sizeof(saidx_swap_sent_addr)); memcpy(&saidx_swap_sent_addr.src, &sav_sent->sah->saidx.dst, sizeof(saidx_swap_sent_addr.src)); memcpy(&saidx_swap_sent_addr.dst, &sav_sent->sah->saidx.src, sizeof(saidx_swap_sent_addr.dst)); saidx_swap_sent_addr.proto = sav_sent->sah->saidx.proto; saidx_swap_sent_addr.mode = sav_sent->sah->saidx.mode; // we ignore reqid for split-tunnel setups - + if (key_cmpsaidx(&sav_sent->sah->saidx, &sav_update->sah->saidx, CMP_MODE | CMP_PORT) || key_cmpsaidx(&saidx_swap_sent_addr, &sav_update->sah->saidx, CMP_MODE | CMP_PORT)) { sav_update->natt_last_activity = natt_now; } } + +static int +key_send_delsp (struct secpolicy *sp) +{ + struct mbuf *result = NULL, *m; + + if (sp == NULL) + goto fail; + + /* set msg header */ + m = key_setsadbmsg(SADB_X_SPDDELETE, 0, 0, 0, 0, 0); + if (!m) { + goto fail; + } + result = m; + + /* set sadb_address(es) for source */ + if (sp->spidx.src_range.start.ss_len > 0) { + m = key_setsadbaddr(SADB_X_EXT_ADDR_RANGE_SRC_START, + (struct sockaddr *)&sp->spidx.src_range.start, sp->spidx.prefs, + sp->spidx.ul_proto); + if (!m) + goto fail; + m_cat(result, m); + + m = key_setsadbaddr(SADB_X_EXT_ADDR_RANGE_SRC_END, + (struct sockaddr *)&sp->spidx.src_range.end, sp->spidx.prefs, + sp->spidx.ul_proto); + if (!m) + goto fail; + m_cat(result, m); + } else { + m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, + (struct sockaddr *)&sp->spidx.src, sp->spidx.prefs, + sp->spidx.ul_proto); + if (!m) + goto fail; + m_cat(result, m); + } + + /* set sadb_address(es) for destination */ + if (sp->spidx.dst_range.start.ss_len > 0) { + m = key_setsadbaddr(SADB_X_EXT_ADDR_RANGE_DST_START, + (struct sockaddr *)&sp->spidx.dst_range.start, sp->spidx.prefd, + sp->spidx.ul_proto); + if (!m) + goto fail; + m_cat(result, m); + + m = key_setsadbaddr(SADB_X_EXT_ADDR_RANGE_DST_END, + (struct sockaddr *)&sp->spidx.dst_range.end, sp->spidx.prefd, + sp->spidx.ul_proto); + if (!m) + goto fail; + m_cat(result, m); + } else { + m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, + (struct sockaddr *)&sp->spidx.dst, sp->spidx.prefd, + sp->spidx.ul_proto); + if (!m) + goto fail; + m_cat(result, m); + } + + /* set secpolicy */ + m = key_sp2msg(sp); + if (!m) { + goto fail; + } + m_cat(result, m); + + if ((result->m_flags & M_PKTHDR) == 0) { + goto fail; + } + + if (result->m_len < sizeof(struct sadb_msg)) { + result = m_pullup(result, sizeof(struct sadb_msg)); + if (result == NULL) { + goto fail; + } + } + + result->m_pkthdr.len = 0; + for (m = result; m; m = m->m_next) + result->m_pkthdr.len += m->m_len; + + mtod(result, struct sadb_msg *)->sadb_msg_len = PFKEY_UNIT64(result->m_pkthdr.len); + + return key_sendup_mbuf(NULL, result, KEY_SENDUP_REGISTERED); + +fail: + if (result) + m_free(result); + return -1; +} + +void +key_delsp_for_ipsec_if (ifnet_t ipsec_if) +{ + struct secpolicy *sp, *nextsp; + int dir; + + if (ipsec_if == NULL) + return; + + lck_mtx_assert(sadb_mutex, LCK_MTX_ASSERT_NOTOWNED); + + lck_mtx_lock(sadb_mutex); + + for (dir = 0; dir < IPSEC_DIR_MAX; dir++) { + for (sp = LIST_FIRST(&sptree[dir]); + sp != NULL; + sp = nextsp) { + + nextsp = LIST_NEXT(sp, chain); + + if (sp->ipsec_if == ipsec_if) { + ifnet_release(sp->ipsec_if); + sp->ipsec_if = NULL; + + key_send_delsp(sp); + + sp->state = IPSEC_SPSTATE_DEAD; + key_freesp(sp, KEY_SADB_LOCKED); + } + } + } + + lck_mtx_unlock(sadb_mutex); + +} diff --git a/bsd/netkey/key.h b/bsd/netkey/key.h index f2a2729a1..ad5255a92 100644 --- a/bsd/netkey/key.h +++ b/bsd/netkey/key.h @@ -33,7 +33,7 @@ #define _NETKEY_KEY_H_ #include -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #define KEY_SADB_UNLOCKED 0 #define KEY_SADB_LOCKED 1 @@ -75,7 +75,6 @@ extern u_int32_t key_random(void); extern void key_randomfill(void *, size_t); extern void key_freereg(struct socket *); extern int key_parse(struct mbuf *, struct socket *); -extern void key_domain_init(void); extern int key_checktunnelsanity(struct secasvar *, u_int, caddr_t, caddr_t); extern void key_sa_recordxfer(struct secasvar *, struct mbuf *); extern void key_sa_routechange(struct sockaddr *); @@ -106,7 +105,9 @@ extern struct secasvar * key_newsav2(struct secashead *sah, struct sadb_lifetime *lifetime_hard, struct sadb_lifetime *lifetime_soft); extern void key_delsav(struct secasvar *sav); +extern struct secpolicy *key_getspbyid(u_int32_t); +extern void key_delsp_for_ipsec_if(ifnet_t ipsec_if); -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETKEY_KEY_H_ */ diff --git a/bsd/netkey/key_debug.c b/bsd/netkey/key_debug.c index 1d7522054..23a5fff24 100644 --- a/bsd/netkey/key_debug.c +++ b/bsd/netkey/key_debug.c @@ -558,8 +558,9 @@ kdebug_secpolicyindex(spidx) if (spidx == NULL) panic("kdebug_secpolicyindex: NULL pointer was passed.\n"); - printf("secpolicyindex{ dir=%u prefs=%u prefd=%u ul_proto=%u\n", - spidx->dir, spidx->prefs, spidx->prefd, spidx->ul_proto); + printf("secpolicyindex{ dir=%u prefs=%u prefd=%u ul_proto=%u internal_if=%s\n", + spidx->dir, spidx->prefs, spidx->prefd, spidx->ul_proto, + (spidx->internal_if) ? spidx->internal_if->if_xname : "N/A"); ipsec_hexdump((caddr_t)&spidx->src, ((struct sockaddr *)&spidx->src)->sa_len); @@ -671,21 +672,27 @@ kdebug_mbufhdr(m) if (m == NULL) return; - printf("mbuf(%p){ m_next:%p m_nextpkt:%p m_data:%p " - "m_len:%d m_type:0x%02x m_flags:0x%02x }\n", - m, m->m_next, m->m_nextpkt, m->m_data, - m->m_len, m->m_type, m->m_flags); + printf("mbuf(0x%llx){ m_next:0x%llx m_nextpkt:0x%llx m_data:0x%llx " + "m_len:%d m_type:0x%02x m_flags:0x%02x }\n", + (uint64_t)VM_KERNEL_ADDRPERM(m), + (uint64_t)VM_KERNEL_ADDRPERM(m->m_next), + (uint64_t)VM_KERNEL_ADDRPERM(m->m_nextpkt), + (uint64_t)VM_KERNEL_ADDRPERM(m->m_data), + m->m_len, m->m_type, m->m_flags); if (m->m_flags & M_PKTHDR) { - printf(" m_pkthdr{ len:%d rcvif:%p }\n", - m->m_pkthdr.len, m->m_pkthdr.rcvif); + printf(" m_pkthdr{ len:%d rcvif:0x%llx }\n", + m->m_pkthdr.len, + (uint64_t)VM_KERNEL_ADDRPERM(m->m_pkthdr.rcvif)); } if (m->m_flags & M_EXT) { - printf(" m_ext{ ext_buf:%p ext_free:%p " - "ext_size:%u ext_ref:%p }\n", - m->m_ext.ext_buf, m->m_ext.ext_free, - m->m_ext.ext_size, m->m_ext.ext_refs); + printf(" m_ext{ ext_buf:0x%llx ext_free:0x%llx " + "ext_size:%u ext_ref:0x%llx }\n", + (uint64_t)VM_KERNEL_ADDRPERM(m->m_ext.ext_buf), + (uint64_t)VM_KERNEL_ADDRPERM(m->m_ext.ext_free), + m->m_ext.ext_size, + (uint64_t)VM_KERNEL_ADDRPERM(m->m_ext.ext_refs)); } return; diff --git a/bsd/netkey/key_debug.h b/bsd/netkey/key_debug.h index b2e94a3ad..0683f37f8 100644 --- a/bsd/netkey/key_debug.h +++ b/bsd/netkey/key_debug.h @@ -64,7 +64,7 @@ struct sadb_ext; extern void kdebug_sadb(struct sadb_msg *); extern void kdebug_sadb_x_policy(struct sadb_ext *); -#ifdef KERNEL +#ifdef BSD_KERNEL_PRIVATE extern u_int32_t key_debug_level; struct secpolicy; @@ -79,7 +79,7 @@ extern void kdebug_secasindex(struct secasindex *); extern void kdebug_secasv(struct secasvar *); extern void kdebug_mbufhdr(struct mbuf *); extern void kdebug_mbuf(struct mbuf *); -#endif /* KERNEL */ +#endif /* BSD_KERNEL_PRIVATE */ struct sockaddr; extern void kdebug_sockaddr(struct sockaddr *); diff --git a/bsd/netkey/key_var.h b/bsd/netkey/key_var.h index 7f3f45c52..7552efebc 100644 --- a/bsd/netkey/key_var.h +++ b/bsd/netkey/key_var.h @@ -50,7 +50,7 @@ #define KEYCTL_PFKEYSTAT 14 #define KEYCTL_MAXID 15 -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #define KEYCTL_NAMES { \ { 0, 0 }, \ { "debug", CTLTYPE_INT }, \ @@ -85,13 +85,11 @@ &natt_keepalive_interval, \ } -#ifdef KERNEL #define _ARRAYLEN(p) (sizeof(p)/sizeof(p[0])) #define _KEYLEN(key) ((u_int)((key)->sadb_key_bits >> 3)) #define _KEYBITS(key) ((u_int)((key)->sadb_key_bits)) #define _KEYBUF(key) ((caddr_t)((caddr_t)(key) + sizeof(struct sadb_key))) -#endif /*KERNEL*/ -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETKEY_KEY_VAR_H_ */ diff --git a/bsd/netkey/keydb.h b/bsd/netkey/keydb.h index 079f6e288..dc1563a60 100644 --- a/bsd/netkey/keydb.h +++ b/bsd/netkey/keydb.h @@ -33,8 +33,7 @@ #define _NETKEY_KEYDB_H_ #include -#ifdef KERNEL -#ifdef KERNEL_PRIVATE +#ifdef BSD_KERNEL_PRIVATE #include #include @@ -107,6 +106,8 @@ struct secasvar { u_int32_t natt_last_activity; u_int16_t remote_ike_port; u_int16_t natt_encapsulated_src_port; /* network byte order */ + + u_int8_t always_expire; /* Send expire/delete messages even if unused */ void *utun_pcb; utun_is_keepalive_func utun_is_keepalive_fn; @@ -170,7 +171,6 @@ extern void keydb_delsecreplay(struct secreplay *); // extern struct secreg *keydb_newsecreg(void); // not used // extern void keydb_delsecreg(struct secreg *); // not used -#endif /* KERNEL_PRIVATE */ -#endif /* KERNEL */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* _NETKEY_KEYDB_H_ */ diff --git a/bsd/netkey/keysock.c b/bsd/netkey/keysock.c index 08f515e1c..69c1e92c8 100644 --- a/bsd/netkey/keysock.c +++ b/bsd/netkey/keysock.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -56,15 +57,17 @@ #include extern lck_mtx_t *raw_mtx; -extern void key_init(void) __attribute__((section("__TEXT, initcode"))); +extern void key_init(struct protosw *, struct domain *); struct sockaddr key_dst = { 2, PF_KEY, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,} }; struct sockaddr key_src = { 2, PF_KEY, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,} }; +static void key_dinit(struct domain *); static int key_sendup0(struct rawcb *, struct mbuf *, int); struct pfkeystat pfkeystat; +static struct domain *keydomain = NULL; extern lck_mtx_t *pfkey_stat_mutex; @@ -492,14 +495,19 @@ key_sockaddr(struct socket *so, struct sockaddr **nam) return error; } -struct pr_usrreqs key_usrreqs = { - key_abort, pru_accept_notsupp, key_attach, key_bind, - key_connect, - pru_connect2_notsupp, pru_control_notsupp, key_detach, - key_disconnect, pru_listen_notsupp, key_peeraddr, - pru_rcvd_notsupp, - pru_rcvoob_notsupp, key_send, pru_sense_null, key_shutdown, - key_sockaddr, sosend, soreceive, pru_sopoll_notsupp +static struct pr_usrreqs key_usrreqs = { + .pru_abort = key_abort, + .pru_attach = key_attach, + .pru_bind = key_bind, + .pru_connect = key_connect, + .pru_detach = key_detach, + .pru_disconnect = key_disconnect, + .pru_peeraddr = key_peeraddr, + .pru_send = key_send, + .pru_shutdown = key_shutdown, + .pru_sockaddr = key_sockaddr, + .pru_sosend = sosend, + .pru_soreceive = soreceive, }; /* sysctl */ @@ -509,25 +517,40 @@ SYSCTL_NODE(_net, PF_KEY, key, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Key Family"); * Definitions of protocols supported in the KEY domain. */ -extern struct domain keydomain; - -struct protosw keysw[] = { -{ SOCK_RAW, &keydomain, PF_KEY_V2, PR_ATOMIC|PR_ADDR, - NULL, key_output, raw_ctlinput, NULL, - NULL, - key_init, NULL, NULL, NULL, - NULL, - &key_usrreqs, - NULL, NULL, NULL, - { NULL, NULL }, NULL, { 0 } +extern struct domain keydomain_s; + +static struct protosw keysw[] = { +{ + .pr_type = SOCK_RAW, + .pr_protocol = PF_KEY_V2, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_output = key_output, + .pr_ctlinput = raw_ctlinput, + .pr_init = key_init, + .pr_usrreqs = &key_usrreqs, } }; -struct domain keydomain = { PF_KEY, "key", key_domain_init, NULL, NULL, - keysw, NULL, - NULL, 0, - sizeof(struct key_cb), 0, 0, - NULL, 0, { 0, 0} +static int key_proto_count = (sizeof (keysw) / sizeof (struct protosw)); + +struct domain keydomain_s = { + .dom_family = PF_KEY, + .dom_name = "key", + .dom_init = key_dinit, + .dom_maxrtkey = sizeof (struct key_cb), }; -DOMAIN_SET(key); +static void +key_dinit(struct domain *dp) +{ + struct protosw *pr; + int i; + + VERIFY(!(dp->dom_flags & DOM_INITIALIZED)); + VERIFY(keydomain == NULL); + + keydomain = dp; + + for (i = 0, pr = &keysw[0]; i < key_proto_count; i++, pr++) + net_add_proto(pr, dp, 1); +} diff --git a/bsd/netkey/keysock.h b/bsd/netkey/keysock.h index 413e281d5..32d8ef559 100644 --- a/bsd/netkey/keysock.h +++ b/bsd/netkey/keysock.h @@ -62,12 +62,10 @@ struct pfkeystat { #define KEY_SENDUP_ALL 1 #define KEY_SENDUP_REGISTERED 2 +#ifdef BSD_KERNEL_PRIVATE #define PFKEY_STAT_INCREMENT(x) \ {lck_mtx_lock(pfkey_stat_mutex); (x)++; lck_mtx_unlock(pfkey_stat_mutex);} - -#ifdef KERNEL_PRIVATE -#ifdef KERNEL struct keycb { struct rawcb kp_raw; /* rawcb */ int kp_promisc; /* promiscuous mode */ @@ -86,7 +84,6 @@ extern int key_usrreq(struct socket *, extern int key_sendup(struct socket *, struct sadb_msg *, u_int, int); extern int key_sendup_mbuf(struct socket *, struct mbuf *, int); -#endif /* KERNEL */ -#endif /* KERNEL_PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /*_NETKEY_KEYSOCK_H_*/ diff --git a/bsd/nfs/Makefile b/bsd/nfs/Makefile index d4c4ce3cb..605a48118 100644 --- a/bsd/nfs/Makefile +++ b/bsd/nfs/Makefile @@ -7,20 +7,11 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - DATAFILES = \ krpc.h nfs.h nfsdiskless.h nfsm_subs.h nfsmount.h nfsnode.h \ - nfs_lock.h nfs_gss.h \ + nfs_lock.h nfs_gss.h nfs_ioctl.h \ nfsproto.h nfsrvcache.h rpcv2.h xdr_subs.h - INSTALL_MI_LIST = ${DATAFILES} INSTALL_MI_DIR = nfs @@ -29,7 +20,6 @@ EXPORT_MI_LIST = EXPORT_MI_DIR = nfs - include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/nfs/nfs.h b/bsd/nfs/nfs.h index bbb466d93..45d3ade4a 100644 --- a/bsd/nfs/nfs.h +++ b/bsd/nfs/nfs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -78,7 +78,7 @@ #define NFS_TICKINTVL 5 /* Desired time for a tick (msec) */ #define NFS_HZ (hz / nfs_ticks) /* Ticks/sec */ -__private_extern__ int nfs_ticks; +extern int nfs_ticks; #define NFS_TIMEO (1 * NFS_HZ) /* Default timeout = 1 second */ #define NFS_MINTIMEO (1 * NFS_HZ) /* Min timeout to use */ #define NFS_MAXTIMEO (60 * NFS_HZ) /* Max timeout to backoff to */ @@ -183,6 +183,9 @@ __private_extern__ int nfs_ticks; #define NFS_MATTR_FS_LOCATIONS 21 /* list of locations for the file system */ #define NFS_MATTR_MNTFLAGS 22 /* VFS mount flags (MNT_*) */ #define NFS_MATTR_MNTFROM 23 /* fixed string to use for "f_mntfromname" */ +#define NFS_MATTR_REALM 24 /* Realm to authenticate with */ +#define NFS_MATTR_PRINCIPAL 25 /* GSS principal to authenticate with */ +#define NFS_MATTR_SVCPRINCIPAL 26 /* GSS principal to authenticate to, the server principal */ /* NFS mount flags */ #define NFS_MFLAG_SOFT 0 /* soft mount (requests fail if unresponsive) */ @@ -578,11 +581,11 @@ struct nfs_export_stat_counters { #define NFSRV_USER_STAT_DEF_IDLE_SEC 7200 /* default idle seconds (node no longer considered active) */ /* active user list globals */ -__private_extern__ uint32_t nfsrv_user_stat_enabled; /* enable/disable active user list */ -__private_extern__ uint32_t nfsrv_user_stat_node_count; /* current count of user stat nodes */ -__private_extern__ uint32_t nfsrv_user_stat_max_idle_sec; /* idle seconds (node no longer considered active) */ -__private_extern__ uint32_t nfsrv_user_stat_max_nodes; /* active user list size limit */ -__private_extern__ lck_grp_t *nfsrv_active_user_mutex_group; +extern uint32_t nfsrv_user_stat_enabled; /* enable/disable active user list */ +extern uint32_t nfsrv_user_stat_node_count; /* current count of user stat nodes */ +extern uint32_t nfsrv_user_stat_max_idle_sec; /* idle seconds (node no longer considered active) */ +extern uint32_t nfsrv_user_stat_max_nodes; /* active user list size limit */ +extern lck_grp_t *nfsrv_active_user_mutex_group; /* An active user node represented in the kernel */ struct nfs_user_stat_node { @@ -643,15 +646,15 @@ struct nfs_exportfs { LIST_HEAD(,nfs_export) nxfs_exports; /* list of exports for this file system */ }; -__private_extern__ LIST_HEAD(nfsrv_expfs_list, nfs_exportfs) nfsrv_exports; -__private_extern__ lck_rw_t nfsrv_export_rwlock; // lock for export data structures +extern LIST_HEAD(nfsrv_expfs_list, nfs_exportfs) nfsrv_exports; +extern lck_rw_t nfsrv_export_rwlock; // lock for export data structures #define NFSRVEXPHASHSZ 64 #define NFSRVEXPHASHVAL(FSID, EXPID) \ (((FSID) >> 24) ^ ((FSID) >> 16) ^ ((FSID) >> 8) ^ (EXPID)) #define NFSRVEXPHASH(FSID, EXPID) \ (&nfsrv_export_hashtbl[NFSRVEXPHASHVAL((FSID),(EXPID)) & nfsrv_export_hash]) -__private_extern__ LIST_HEAD(nfsrv_export_hashhead, nfs_export) *nfsrv_export_hashtbl; -__private_extern__ u_long nfsrv_export_hash; +extern LIST_HEAD(nfsrv_export_hashhead, nfs_export) *nfsrv_export_hashtbl; +extern u_long nfsrv_export_hash; #if CONFIG_FSE /* @@ -666,22 +669,22 @@ struct nfsrv_fmod { #define NFSRVFMODHASHSZ 128 #define NFSRVFMODHASH(vp) (((uintptr_t) vp) & nfsrv_fmod_hash) -__private_extern__ LIST_HEAD(nfsrv_fmod_hashhead, nfsrv_fmod) *nfsrv_fmod_hashtbl; -__private_extern__ u_long nfsrv_fmod_hash; -__private_extern__ lck_mtx_t *nfsrv_fmod_mutex; -__private_extern__ int nfsrv_fmod_pending, nfsrv_fsevents_enabled; +extern LIST_HEAD(nfsrv_fmod_hashhead, nfsrv_fmod) *nfsrv_fmod_hashtbl; +extern u_long nfsrv_fmod_hash; +extern lck_mtx_t *nfsrv_fmod_mutex; +extern int nfsrv_fmod_pending, nfsrv_fsevents_enabled; #endif -__private_extern__ int nfsrv_async, nfsrv_export_hash_size, +extern int nfsrv_async, nfsrv_export_hash_size, nfsrv_reqcache_size, nfsrv_sock_max_rec_queue_length; -__private_extern__ uint32_t nfsrv_gss_context_ttl; -__private_extern__ struct nfsstats nfsstats; +extern uint32_t nfsrv_gss_context_ttl; +extern struct nfsstats nfsstats; #define NFS_UC_Q_DEBUG #ifdef NFS_UC_Q_DEBUG -__private_extern__ int nfsrv_uc_use_proxy; -__private_extern__ uint32_t nfsrv_uc_queue_limit; -__private_extern__ uint32_t nfsrv_uc_queue_max_seen; -__private_extern__ volatile uint32_t nfsrv_uc_queue_count; +extern int nfsrv_uc_use_proxy; +extern uint32_t nfsrv_uc_queue_limit; +extern uint32_t nfsrv_uc_queue_max_seen; +extern volatile uint32_t nfsrv_uc_queue_count; #endif #endif // KERNEL @@ -811,7 +814,7 @@ struct nfsrv_uc_arg; #define NFSINT_SIGMASK (sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \ sigmask(SIGHUP)|sigmask(SIGQUIT)) -__private_extern__ size_t nfs_mbuf_mhlen, nfs_mbuf_minclsize; +extern size_t nfs_mbuf_mhlen, nfs_mbuf_minclsize; /* * NFS mbuf chain structure used for managing the building/dissection of RPCs @@ -909,8 +912,8 @@ struct nfsreq { * Queue head for nfsreq's */ TAILQ_HEAD(nfs_reqqhead, nfsreq); -__private_extern__ struct nfs_reqqhead nfs_reqq; -__private_extern__ lck_grp_t *nfs_request_grp; +extern struct nfs_reqqhead nfs_reqq; +extern lck_grp_t *nfs_request_grp; #define R_XID32(x) ((x) & 0xffffffff) @@ -944,15 +947,16 @@ __private_extern__ lck_grp_t *nfs_request_grp; #define RL_WAITING 0x0002 /* Someone waiting for lock. */ #define RL_QUEUED 0x0004 /* request is on the queue */ -__private_extern__ u_int32_t nfs_xid, nfs_xidwrap; -__private_extern__ int nfs_iosize, nfs_allow_async, nfs_statfs_rate_limit; -__private_extern__ int nfs_access_cache_timeout, nfs_access_delete, nfs_access_dotzfs, nfs_access_for_getattr; -__private_extern__ int nfs_lockd_mounts, nfs_lockd_request_sent, nfs_single_des; -__private_extern__ int nfs_tprintf_initial_delay, nfs_tprintf_delay; -__private_extern__ int nfsiod_thread_count, nfsiod_thread_max, nfs_max_async_writes; -__private_extern__ int nfs_idmap_ctrl, nfs_callback_port; -__private_extern__ int nfs_is_mobile; -__private_extern__ uint32_t nfs_squishy_flags; +extern u_int32_t nfs_xid, nfs_xidwrap; +extern int nfs_iosize, nfs_allow_async, nfs_statfs_rate_limit; +extern int nfs_access_cache_timeout, nfs_access_delete, nfs_access_dotzfs, nfs_access_for_getattr; +extern int nfs_lockd_mounts, nfs_lockd_request_sent, nfs_single_des; +extern int nfs_tprintf_initial_delay, nfs_tprintf_delay; +extern int nfsiod_thread_count, nfsiod_thread_max, nfs_max_async_writes; +extern int nfs_idmap_ctrl, nfs_callback_port; +extern int nfs_is_mobile; +extern uint32_t nfs_squishy_flags; +extern uint32_t nfs_debug_ctl; /* bits for nfs_idmap_ctrl: */ #define NFS_IDMAP_CTRL_USE_IDMAP_SERVICE 0x00000001 /* use the ID mapping service */ @@ -1020,7 +1024,7 @@ struct nfsrv_sock { #define SLPNOLIST ((struct nfsrv_sock *)0xdeadbeef) /* sentinel value for sockets not in the nfsrv_sockwg list */ -__private_extern__ struct nfsrv_sock *nfsrv_udpsock, *nfsrv_udp6sock; +extern struct nfsrv_sock *nfsrv_udpsock, *nfsrv_udp6sock; /* * global NFS server socket lists: @@ -1030,12 +1034,12 @@ __private_extern__ struct nfsrv_sock *nfsrv_udpsock, *nfsrv_udp6sock; * nfsrv_sockwork - sockets being worked on which may have more work to do (ns_svcq) * nfsrv_sockwg - sockets with pending write gather input (ns_wgq) */ -__private_extern__ TAILQ_HEAD(nfsrv_sockhead, nfsrv_sock) nfsrv_socklist, nfsrv_deadsocklist, +extern TAILQ_HEAD(nfsrv_sockhead, nfsrv_sock) nfsrv_socklist, nfsrv_deadsocklist, nfsrv_sockwg, nfsrv_sockwait, nfsrv_sockwork; /* lock groups for nfsrv_sock's */ -__private_extern__ lck_grp_t *nfsrv_slp_rwlock_group; -__private_extern__ lck_grp_t *nfsrv_slp_mutex_group; +extern lck_grp_t *nfsrv_slp_rwlock_group; +extern lck_grp_t *nfsrv_slp_mutex_group; /* * One of these structures is allocated for each nfsd. @@ -1082,30 +1086,30 @@ struct nfsrv_descript { kauth_cred_t nd_cr; /* Credentials */ }; -__private_extern__ TAILQ_HEAD(nfsd_head, nfsd) nfsd_head, nfsd_queue; +extern TAILQ_HEAD(nfsd_head, nfsd) nfsd_head, nfsd_queue; /* mutex for nfs server */ -__private_extern__ lck_mtx_t *nfsd_mutex; -__private_extern__ int nfsd_thread_count, nfsd_thread_max; +extern lck_mtx_t *nfsd_mutex; +extern int nfsd_thread_count, nfsd_thread_max; /* request list mutex */ -__private_extern__ lck_mtx_t *nfs_request_mutex; -__private_extern__ int nfs_request_timer_on; +extern lck_mtx_t *nfs_request_mutex; +extern int nfs_request_timer_on; /* mutex for nfs client globals */ -__private_extern__ lck_mtx_t *nfs_global_mutex; +extern lck_mtx_t *nfs_global_mutex; /* NFSv4 callback globals */ -__private_extern__ int nfs4_callback_timer_on; -__private_extern__ in_port_t nfs4_cb_port, nfs4_cb_port6; +extern int nfs4_callback_timer_on; +extern in_port_t nfs4_cb_port, nfs4_cb_port6; /* nfs timer call structures */ -__private_extern__ thread_call_t nfs_request_timer_call; -__private_extern__ thread_call_t nfs_buf_timer_call; -__private_extern__ thread_call_t nfs4_callback_timer_call; -__private_extern__ thread_call_t nfsrv_deadsock_timer_call; +extern thread_call_t nfs_request_timer_call; +extern thread_call_t nfs_buf_timer_call; +extern thread_call_t nfs4_callback_timer_call; +extern thread_call_t nfsrv_deadsock_timer_call; #if CONFIG_FSE -__private_extern__ thread_call_t nfsrv_fmod_timer_call; +extern thread_call_t nfsrv_fmod_timer_call; #endif __BEGIN_DECLS @@ -1447,6 +1451,23 @@ void nfsrv_uc_cleanup(void); void nfsrv_uc_addsock(struct nfsrv_sock *, int); void nfsrv_uc_dequeue(struct nfsrv_sock *); +/* Debug support */ +#define NFS_DEBUG_LEVEL (nfs_debug_ctl & 0xf) +#define NFS_DEBUG_FACILITY ((nfs_debug_ctl >> 4) & 0xff) +#define NFS_DEBUG_FLAGS ((nfs_debug_ctl >> 12) & 0xff) +#define NFS_DEBUG_VALUE ((nfs_debug_ctl >> 20) & 0xfff) +#define NFS_FAC_SOCK 0x01 +#define NFS_FAC_STATE 0x02 +#define NFS_FAC_NODE 0x04 +#define NFS_FAC_VNOP 0x08 +#define NFS_FAC_BIO 0x10 +#define NFS_FAC_GSS 0x20 + +#define NFS_DBG(fac, lev, fmt, ...) \ + if (__builtin_expect(NFS_DEBUG_LEVEL, 0)) nfs_printf(fac, lev, "%s: %d: " fmt, __func__, __LINE__, ## __VA_ARGS__) + +void nfs_printf(int, int, const char *, ...) __printflike(3,4); + __END_DECLS #endif /* KERNEL */ diff --git a/bsd/nfs/nfs4_subs.c b/bsd/nfs/nfs4_subs.c index 69f12d1f7..50c657b94 100644 --- a/bsd/nfs/nfs4_subs.c +++ b/bsd/nfs/nfs4_subs.c @@ -2480,7 +2480,7 @@ restart: if (now.tv_sec == nmp->nm_recover_start) { printf("nfs recovery throttled for %s, 0x%x\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, nmp->nm_stategenid); lck_mtx_unlock(&nmp->nm_lock); - tsleep(&lbolt, (PZERO-1), "nfsrecoverrestart", hz); + tsleep(nfs_recover, (PZERO-1), "nfsrecoverrestart", hz); goto restart; } nmp->nm_recover_start = now.tv_sec; @@ -2570,7 +2570,7 @@ restart: if ((error == ETIMEDOUT) || nfs_mount_state_error_should_restart(error)) { if (error == ETIMEDOUT) nfs_need_reconnect(nmp); - tsleep(&lbolt, (PZERO-1), "nfsrecoverrestart", 0); + tsleep(nfs_recover, (PZERO-1), "nfsrecoverrestart", hz); printf("nfs recovery restarting for %s, 0x%x, error %d\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, nmp->nm_stategenid, error); goto restart; @@ -2626,7 +2626,7 @@ reclaim_locks: if ((error == ETIMEDOUT) || nfs_mount_state_error_should_restart(error)) { if (error == ETIMEDOUT) nfs_need_reconnect(nmp); - tsleep(&lbolt, (PZERO-1), "nfsrecoverrestart", 0); + tsleep(nfs_recover, (PZERO-1), "nfsrecoverrestart", hz); printf("nfs recovery restarting for %s, 0x%x, error %d\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, nmp->nm_stategenid, error); goto restart; @@ -2651,7 +2651,7 @@ reclaim_locks: nfs4_delegation_return(nofp->nof_np, R_RECOVER, thd, noop->noo_cred); if (!(nmp->nm_sockflags & NMSOCK_READY)) { /* looks like we need a reconnect */ - tsleep(&lbolt, (PZERO-1), "nfsrecoverrestart", 0); + tsleep(nfs_recover, (PZERO-1), "nfsrecoverrestart", hz); printf("nfs recovery restarting for %s, 0x%x, error %d\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, nmp->nm_stategenid, error); goto restart; diff --git a/bsd/nfs/nfs4_vnops.c b/bsd/nfs/nfs4_vnops.c index 6259af593..85ebceb47 100644 --- a/bsd/nfs/nfs4_vnops.c +++ b/bsd/nfs/nfs4_vnops.c @@ -6031,7 +6031,7 @@ nfs_vnop_read( int error; if (vnode_vtype(ap->a_vp) != VREG) - return (EPERM); + return (vnode_vtype(vp) == VDIR) ? EISDIR : EPERM; np = VTONFS(vp); nmp = NFSTONMP(np); diff --git a/bsd/nfs/nfs_gss.c b/bsd/nfs/nfs_gss.c index 7633c00b9..f1dd81b5f 100644 --- a/bsd/nfs/nfs_gss.c +++ b/bsd/nfs/nfs_gss.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2010 Apple Inc. All rights reserved. + * Copyright (c) 2007-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -84,6 +84,7 @@ #include #include #include +#include #include #include @@ -112,6 +113,9 @@ #define NFS_GSS_MACH_MAX_RETRIES 3 +#define NFS_GSS_DBG(...) NFS_DBG(NFS_FAC_GSS, 7, ## __VA_ARGS__) +#define NFS_GSS_ISDBG (NFS_DEBUG_FACILITY & NFS_FAC_GSS) + typedef struct { int type; union { @@ -186,7 +190,7 @@ static int nfs_gss_clnt_ctx_failover(struct nfsreq *); static int nfs_gss_clnt_ctx_init(struct nfsreq *, struct nfs_gss_clnt_ctx *); static int nfs_gss_clnt_ctx_init_retry(struct nfsreq *, struct nfs_gss_clnt_ctx *); static int nfs_gss_clnt_ctx_callserver(struct nfsreq *, struct nfs_gss_clnt_ctx *); -static char *nfs_gss_clnt_svcname(struct nfsmount *); +static uint8_t *nfs_gss_clnt_svcname(struct nfsmount *, gssd_nametype *, uint32_t *); static int nfs_gss_clnt_gssd_upcall(struct nfsreq *, struct nfs_gss_clnt_ctx *); static void nfs_gss_clnt_ctx_remove(struct nfsmount *, struct nfs_gss_clnt_ctx *); #endif /* NFSCLIENT */ @@ -280,18 +284,33 @@ nfs_gss_sysok(struct nfsreq *req) * * Note that the code allows superuser (uid == 0) * to adopt the context of another user. + * + * We'll match on the audit session ids, since those + * processes will have acccess to the same credential cache. */ + +#define kauth_cred_getasid(cred) ((cred)->cr_audit.as_aia_p->ai_asid) +#define kauth_cred_getauid(cred) ((cred)->cr_audit.as_aia_p->ai_auid) + +static int +nfs_gss_clnt_ctx_cred_match(kauth_cred_t cred1, kauth_cred_t cred2) +{ + if (kauth_cred_getasid(cred1) == kauth_cred_getasid(cred2)) + return (1); + return (0); +} + + static int nfs_gss_clnt_ctx_find(struct nfsreq *req) { struct nfsmount *nmp = req->r_nmp; struct nfs_gss_clnt_ctx *cp; - uid_t uid = kauth_cred_getuid(req->r_cred); int error = 0; lck_mtx_lock(&nmp->nm_lock); TAILQ_FOREACH(cp, &nmp->nm_gsscl, gss_clnt_entries) { - if (cp->gss_clnt_uid == uid) { + if (nfs_gss_clnt_ctx_cred_match(cp->gss_clnt_cred, req->r_cred)) { if (cp->gss_clnt_flags & GSS_CTX_INVAL) continue; nfs_gss_clnt_ctx_ref(req, cp); @@ -300,7 +319,7 @@ nfs_gss_clnt_ctx_find(struct nfsreq *req) } } - if (uid == 0) { + if (kauth_cred_getuid(req->r_cred) == 0) { /* * If superuser is trying to get access, then co-opt * the first valid context in the list. @@ -345,7 +364,8 @@ nfs_gss_clnt_ctx_find(struct nfsreq *req) return (ENOMEM); } - cp->gss_clnt_uid = uid; + cp->gss_clnt_cred = req->r_cred; + kauth_cred_ref(cp->gss_clnt_cred); cp->gss_clnt_mtx = lck_mtx_alloc_init(nfs_gss_clnt_grp, LCK_ATTR_NULL); cp->gss_clnt_thread = current_thread(); nfs_gss_clnt_ctx_ref(req, cp); @@ -387,7 +407,6 @@ nfs_gss_clnt_ctx_failover(struct nfsreq *req) { struct nfsmount *nmp = req->r_nmp; struct nfs_gss_clnt_ctx *cp; - uid_t uid = kauth_cred_getuid(req->r_cred); struct timeval now; MALLOC(cp, struct nfs_gss_clnt_ctx *, sizeof(*cp), M_TEMP, M_WAITOK|M_ZERO); @@ -395,7 +414,8 @@ nfs_gss_clnt_ctx_failover(struct nfsreq *req) return (ENOMEM); cp->gss_clnt_service = RPCSEC_GSS_SVC_SYS; - cp->gss_clnt_uid = uid; + cp->gss_clnt_cred = req->r_cred; + kauth_cred_ref(cp->gss_clnt_cred); cp->gss_clnt_mtx = lck_mtx_alloc_init(nfs_gss_clnt_grp, LCK_ATTR_NULL); microuptime(&now); cp->gss_clnt_ctime = now.tv_sec; // time stamp @@ -984,7 +1004,7 @@ nfs_gss_clnt_ctx_init(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) /* Initialize a new client context */ - cp->gss_clnt_svcname = nfs_gss_clnt_svcname(nmp); + cp->gss_clnt_svcname = nfs_gss_clnt_svcname(nmp, &cp->gss_clnt_svcnt, &cp->gss_clnt_svcnamlen); if (cp->gss_clnt_svcname == NULL) { error = NFSERR_EAUTH; goto nfsmout; @@ -1025,8 +1045,9 @@ retry: */ error = nfs_gss_clnt_ctx_callserver(req, cp); if (error) { - if (cp->gss_clnt_proc == RPCSEC_GSS_INIT && + if (error == ENEEDAUTH && cp->gss_clnt_proc == RPCSEC_GSS_INIT && (cp->gss_clnt_gssd_flags & (GSSD_RESTART | GSSD_NFS_1DES)) == 0) { + NFS_GSS_DBG("Retrying with single DES for req %p\n", req); cp->gss_clnt_gssd_flags = (GSSD_RESTART | GSSD_NFS_1DES); if (cp->gss_clnt_token) FREE(cp->gss_clnt_token, M_TEMP); @@ -1157,7 +1178,7 @@ nfs_gss_clnt_ctx_init_retry(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) microuptime(&now); waituntil = now.tv_sec + timeo; while (now.tv_sec < waituntil) { - tsleep(&lbolt, PSOCK | slpflag, "nfs_gss_clnt_ctx_init_retry", 0); + tsleep(NULL, PSOCK | slpflag, "nfs_gss_clnt_ctx_init_retry", hz); slpflag = 0; error = nfs_sigintr(req->r_nmp, req, current_thread(), 0); if (error) @@ -1278,7 +1299,7 @@ nfs_gss_clnt_ctx_callserver(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) cp->gss_clnt_mport, !req->r_nmp ? unknown : vfs_statfs(req->r_nmp->nm_mountp)->f_mntfromname, - cp->gss_clnt_uid, + kauth_cred_getuid(cp->gss_clnt_cred), who, cp->gss_clnt_major, cp->gss_clnt_minor); @@ -1292,32 +1313,59 @@ nfsmout: } /* - * Ugly hack to get the service principal from the f_mntfromname field in - * the statfs struct. We assume a format of server:path. We don't currently - * support url's or other bizarre formats like path@server. A better solution - * here might be to allow passing the service principal down in the mount args. - * For kerberos we just use the default realm. + * We construct the service principal as a gss hostbased service principal of + * the form nfs@, unless the servers principal was passed down in the + * mount arguments. If the arguments don't specify the service principal, the + * server name is extracted the location passed in the mount argument if + * available. Otherwise assume a format of : in the + * mntfromname. We don't currently support url's or other bizarre formats like + * path@server. Mount_url will convert the nfs url into : when + * calling mount, so this works out well in practice. + * */ -static char * -nfs_gss_clnt_svcname(struct nfsmount *nmp) + +static uint8_t * +nfs_gss_clnt_svcname(struct nfsmount *nmp, gssd_nametype *nt, uint32_t *len) { - char *svcname, *d, *mntfromhere; - int len; + char *svcname, *d, *server; + int lindx, sindx; if (!nmp) return (NULL); - mntfromhere = &vfs_statfs(nmp->nm_mountp)->f_mntfromname[0]; - len = strlen(mntfromhere) + 5; /* "nfs/" plus null */ - MALLOC(svcname, char *, len, M_TEMP, M_NOWAIT); - if (svcname == NULL) - return (NULL); - strlcpy(svcname, "nfs/", len); - strlcat(svcname, mntfromhere, len); - d = strchr(svcname, ':'); - if (d) - *d = '\0'; - return (svcname); + if (nmp->nm_sprinc) { + *len = strlen(nmp->nm_sprinc) + 1; + MALLOC(svcname, char *, *len, M_TEMP, M_WAITOK); + *nt = GSSD_HOSTBASED; + if (svcname == NULL) + return (NULL); + strlcpy(svcname, nmp->nm_sprinc, *len); + + return ((uint8_t *)svcname); + } + + *nt = GSSD_HOSTBASED; + if (nmp->nm_locations.nl_numlocs && !(NFS_GSS_ISDBG && (NFS_DEBUG_FLAGS & 0x1))) { + lindx = nmp->nm_locations.nl_current.nli_loc; + sindx = nmp->nm_locations.nl_current.nli_serv; + server = nmp->nm_locations.nl_locations[lindx]->nl_servers[sindx]->ns_name; + *len = (uint32_t)strlen(server); + } else { + /* Older binaries using older mount args end up here */ + server = vfs_statfs(nmp->nm_mountp)->f_mntfromname; + NFS_GSS_DBG("nfs getting gss svcname from %s\n", server); + d = strchr(server, ':'); + *len = (uint32_t)(d ? (d - server) : strlen(server)); + } + + *len += 5; /* "nfs@" plus null */ + MALLOC(svcname, char *, *len, M_TEMP, M_WAITOK); + strlcpy(svcname, "nfs", *len); + strlcat(svcname, "@", *len); + strlcat(svcname, server, *len); + NFS_GSS_DBG("nfs svcname = %s\n", svcname); + + return ((uint8_t *)svcname); } /* @@ -1326,8 +1374,6 @@ nfs_gss_clnt_svcname(struct nfsmount *nmp) * to get a send right to talk to a new gssd instance that launchd has launched * based on the cred's uid and audit session id. */ -#define kauth_cred_getasid(cred) ((cred)->cr_audit.as_aia_p->ai_asid) -#define kauth_cred_getauid(cred) ((cred)->cr_audit.as_aia_p->ai_auid) static mach_port_t nfs_gss_clnt_get_upcall_port(kauth_cred_t credp) @@ -1376,51 +1422,95 @@ nfs_gss_clnt_gssd_upcall(struct nfsreq *req, struct nfs_gss_clnt_ctx *cp) gssd_byte_buffer otoken = NULL; mach_msg_type_number_t otokenlen; int error = 0; - char uprinc[1]; + uint8_t *principal = NULL; + uint32_t plen = 0; + int32_t nt = GSSD_STRING_NAME; + vm_map_copy_t pname = NULL; + vm_map_copy_t svcname = NULL; + char display_name[MAX_DISPLAY_STR] = ""; uint32_t ret_flags; - + uint32_t nfs_1des = (cp->gss_clnt_gssd_flags & GSSD_NFS_1DES); + struct nfsmount *nmp; + /* * NFS currently only supports default principals or - * principals based on the uid of the caller. - * - * N.B. Note we define a one character array for the principal - * so that we can hold an empty string required by mach, since - * the kernel is being compiled with -Wwrite-strings. + * principals based on the uid of the caller, unless + * the principal to use for the mounting cred was specified + * in the mount argmuments. If the realm to use was specified + * then will send that up as the principal since the realm is + * preceed by an "@" gssd that will try and select the default + * principal for that realm. */ - uprinc[0] = '\0'; + + nmp = req->r_nmp; + if (nmp == NULL || vfs_isforce(nmp->nm_mountp) || (nmp->nm_state & (NFSSTA_FORCE | NFSSTA_DEAD))) + return (ENXIO); + + if (cp->gss_clnt_principal && cp->gss_clnt_prinlen) { + principal = cp->gss_clnt_principal; + plen = cp->gss_clnt_prinlen; + nt = cp->gss_clnt_prinnt; + } else if (nmp->nm_principal && IS_VALID_CRED(nmp->nm_mcred) && req->r_cred == nmp->nm_mcred) { + plen = (uint32_t)strlen(nmp->nm_principal); + MALLOC(principal, uint8_t *, plen, M_TEMP, M_WAITOK | M_ZERO); + if (principal == NULL) + return (ENOMEM); + bcopy(nmp->nm_principal, principal, plen); + cp->gss_clnt_prinnt = nt = GSSD_USER; + } + else if (nmp->nm_realm) { + plen = (uint32_t)strlen(nmp->nm_realm); + principal = (uint8_t *)nmp->nm_realm; + nt = GSSD_USER; + } + if (!IPC_PORT_VALID(cp->gss_clnt_mport)) { cp->gss_clnt_mport = nfs_gss_clnt_get_upcall_port(req->r_cred); if (cp->gss_clnt_mport == IPC_PORT_NULL) goto out; } - if (cp->gss_clnt_tokenlen > 0) + if (plen) + nfs_gss_mach_alloc_buffer(principal, plen, &pname); + if (cp->gss_clnt_svcnamlen) + nfs_gss_mach_alloc_buffer(cp->gss_clnt_svcname, cp->gss_clnt_svcnamlen, &svcname); + if (cp->gss_clnt_tokenlen) nfs_gss_mach_alloc_buffer(cp->gss_clnt_token, cp->gss_clnt_tokenlen, &itoken); retry: - kr = mach_gss_init_sec_context( + kr = mach_gss_init_sec_context_v2( cp->gss_clnt_mport, GSSD_KRB5_MECH, (gssd_byte_buffer) itoken, (mach_msg_type_number_t) cp->gss_clnt_tokenlen, - cp->gss_clnt_uid, - uprinc, - cp->gss_clnt_svcname, + kauth_cred_getuid(cp->gss_clnt_cred), + nt, + (gssd_byte_buffer)pname, (mach_msg_type_number_t) plen, + cp->gss_clnt_svcnt, + (gssd_byte_buffer)svcname, (mach_msg_type_number_t) cp->gss_clnt_svcnamlen, GSSD_MUTUAL_FLAG, - cp->gss_clnt_gssd_flags, + &cp->gss_clnt_gssd_flags, &cp->gss_clnt_context, &cp->gss_clnt_cred_handle, &ret_flags, &okey, (mach_msg_type_number_t *) &skeylen, &otoken, &otokenlen, + cp->gss_clnt_display ? NULL : display_name, &cp->gss_clnt_major, &cp->gss_clnt_minor); + /* Should be cleared and set in gssd ? */ cp->gss_clnt_gssd_flags &= ~GSSD_RESTART; - + cp->gss_clnt_gssd_flags |= nfs_1des; + if (kr != KERN_SUCCESS) { printf("nfs_gss_clnt_gssd_upcall: mach_gss_init_sec_context failed: %x (%d)\n", kr, kr); if (kr == MIG_SERVER_DIED && cp->gss_clnt_cred_handle == 0 && - retry_cnt++ < NFS_GSS_MACH_MAX_RETRIES) { + retry_cnt++ < NFS_GSS_MACH_MAX_RETRIES && + !vfs_isforce(nmp->nm_mountp) && (nmp->nm_state & (NFSSTA_FORCE | NFSSTA_DEAD)) == 0) { + if (plen) + nfs_gss_mach_alloc_buffer(principal, plen, &pname); + if (cp->gss_clnt_svcnamlen) + nfs_gss_mach_alloc_buffer(cp->gss_clnt_svcname, cp->gss_clnt_svcnamlen, &svcname); if (cp->gss_clnt_tokenlen > 0) nfs_gss_mach_alloc_buffer(cp->gss_clnt_token, cp->gss_clnt_tokenlen, &itoken); goto retry; @@ -1431,22 +1521,41 @@ retry: goto out; } + if (cp->gss_clnt_display == NULL && *display_name != '\0') { + int dlen = strnlen(display_name, MAX_DISPLAY_STR) + 1; /* Add extra byte to include '\0' */ + + if (dlen < MAX_DISPLAY_STR) { + MALLOC(cp->gss_clnt_display, char *, dlen, M_TEMP, M_WAITOK); + if (cp->gss_clnt_display == NULL) + goto skip; + bcopy(display_name, cp->gss_clnt_display, dlen); + } else { + goto skip; + } + } +skip: /* * Make sure any unusual errors are expanded and logged by gssd + * + * XXXX, we need to rethink this and just have gssd return a string for the major and minor codes. */ if (cp->gss_clnt_major != GSS_S_COMPLETE && cp->gss_clnt_major != GSS_S_CONTINUE_NEEDED) { +#define GETMAJERROR(x) (((x) >> GSS_C_ROUTINE_ERROR_OFFSET) & GSS_C_ROUTINE_ERROR_MASK) char who[] = "client"; - char unknown[] = ""; - + uint32_t gss_error = GETMAJERROR(cp->gss_clnt_major); + (void) mach_gss_log_error( cp->gss_clnt_mport, - !req->r_nmp ? unknown : - vfs_statfs(req->r_nmp->nm_mountp)->f_mntfromname, - cp->gss_clnt_uid, + vfs_statfs(nmp->nm_mountp)->f_mntfromname, + kauth_cred_getuid(cp->gss_clnt_cred), who, cp->gss_clnt_major, cp->gss_clnt_minor); + gss_error = gss_error ? gss_error : cp->gss_clnt_major; + printf("NFS gssd auth failure mount %s for %s major = %d minor = %d\n", + vfs_statfs(nmp->nm_mountp)->f_mntfromname, cp->gss_clnt_display ? cp->gss_clnt_display : who, + gss_error, (int32_t)cp->gss_clnt_minor); } if (skeylen > 0) { @@ -1619,6 +1728,12 @@ nfs_gss_clnt_ctx_remove(struct nfsmount *nmp, struct nfs_gss_clnt_ctx *cp) if (cp->gss_clnt_mtx) lck_mtx_destroy(cp->gss_clnt_mtx, nfs_gss_clnt_grp); + if (IS_VALID_CRED(cp->gss_clnt_cred)) + kauth_cred_unref(&cp->gss_clnt_cred); + if (cp->gss_clnt_principal) + FREE(cp->gss_clnt_principal, M_TEMP); + if (cp->gss_clnt_display) + FREE(cp->gss_clnt_display, M_TEMP); if (cp->gss_clnt_handle) FREE(cp->gss_clnt_handle, M_TEMP); if (cp->gss_clnt_seqbits) @@ -1642,7 +1757,7 @@ nfs_gss_clnt_ctx_renew(struct nfsreq *req) struct nfsmount *nmp = req->r_nmp; struct nfs_gss_clnt_ctx *ncp; int error = 0; - uid_t saved_uid; + kauth_cred_t saved_cred; mach_port_t saved_mport; if (cp == NULL) @@ -1654,7 +1769,8 @@ nfs_gss_clnt_ctx_renew(struct nfsreq *req) nfs_gss_clnt_ctx_unref(req); return (0); // already being renewed } - saved_uid = cp->gss_clnt_uid; + saved_cred = cp->gss_clnt_cred; + kauth_cred_ref(saved_cred); saved_mport = host_copy_special_port(cp->gss_clnt_mport); /* Remove the old context */ @@ -1680,7 +1796,8 @@ nfs_gss_clnt_ctx_renew(struct nfsreq *req) goto out; } - ncp->gss_clnt_uid = saved_uid; + ncp->gss_clnt_cred = saved_cred; + kauth_cred_ref(ncp->gss_clnt_cred); ncp->gss_clnt_mport = host_copy_special_port(saved_mport); // re-use the gssd port ncp->gss_clnt_mtx = lck_mtx_alloc_init(nfs_gss_clnt_grp, LCK_ATTR_NULL); ncp->gss_clnt_thread = current_thread(); @@ -1695,6 +1812,7 @@ nfs_gss_clnt_ctx_renew(struct nfsreq *req) error = nfs_gss_clnt_ctx_init_retry(req, ncp); // Initialize new context out: host_release_special_port(saved_mport); + kauth_cred_unref(&saved_cred); if (error) nfs_gss_clnt_ctx_unref(req); @@ -1718,25 +1836,22 @@ nfs_gss_clnt_ctx_unmount(struct nfsmount *nmp) for (;;) { lck_mtx_lock(&nmp->nm_lock); cp = TAILQ_FIRST(&nmp->nm_gsscl); + if (cp) { + lck_mtx_lock(cp->gss_clnt_mtx); + cp->gss_clnt_refcnt++; + lck_mtx_unlock(cp->gss_clnt_mtx); + req.r_gss_ctx = cp; + } lck_mtx_unlock(&nmp->nm_lock); if (cp == NULL) break; - - nfs_gss_clnt_ctx_ref(&req, cp); - + /* * Tell the server to destroy its context. * But don't bother if it's a forced unmount * or if it's a dummy sec=sys context. */ if (!(nmp->nm_state & NFSSTA_FORCE) && (cp->gss_clnt_service != RPCSEC_GSS_SVC_SYS)) { - kauth_cred_t cred; - struct posix_cred temp_pcred; - - bzero((caddr_t) &temp_pcred, sizeof(temp_pcred)); - temp_pcred.cr_ngroups = 1; - temp_pcred.cr_uid = cp->gss_clnt_uid; - cred = posix_cred_create(&temp_pcred); cp->gss_clnt_proc = RPCSEC_GSS_DESTROY; error = 0; @@ -1746,10 +1861,9 @@ nfs_gss_clnt_ctx_unmount(struct nfsmount *nmp) nfsm_chain_build_done(error, &nmreq); if (!error) nfs_request_gss(nmp->nm_mountp, &nmreq, - current_thread(), cred, 0, cp, &nmrep, &status); + current_thread(), cp->gss_clnt_cred, 0, cp, &nmrep, &status); nfsm_chain_cleanup(&nmreq); nfsm_chain_cleanup(&nmrep); - kauth_cred_unref(&cred); } /* @@ -1764,6 +1878,45 @@ nfs_gss_clnt_ctx_unmount(struct nfsmount *nmp) } } +/* + * Destroy a mounts context for a credential + */ +int +nfs_gss_clnt_ctx_destroy(struct nfsmount *nmp, kauth_cred_t cred) +{ + struct nfs_gss_clnt_ctx *cp; + struct nfsreq req; + + req.r_nmp = nmp; + + lck_mtx_lock(&nmp->nm_lock); + TAILQ_FOREACH(cp, &nmp->nm_gsscl, gss_clnt_entries) { + if (nfs_gss_clnt_ctx_cred_match(cp->gss_clnt_cred, cred)) { + if (cp->gss_clnt_flags & GSS_CTX_INVAL) + continue; + lck_mtx_lock(cp->gss_clnt_mtx); + cp->gss_clnt_refcnt++; + cp->gss_clnt_flags |= GSS_CTX_INVAL; + lck_mtx_unlock(cp->gss_clnt_mtx); + req.r_gss_ctx = cp; + break; + } + } + lck_mtx_unlock(&nmp->nm_lock); + + if (cp == NULL) + return (ENOENT); + + /* + * Drop the reference to remove it if its + * refcount is zero. + */ + nfs_gss_clnt_ctx_unref(&req); + + return (0); +} + + #endif /* NFSCLIENT */ /************* @@ -2841,15 +2994,19 @@ nfs_gss_mach_alloc_buffer(u_char *buf, uint32_t buflen, vm_map_copy_t *addr) if (buf == NULL || buflen == 0) return; - tbuflen = round_page(buflen); + tbuflen = vm_map_round_page(buflen, + vm_map_page_mask(ipc_kernel_map)); kr = vm_allocate(ipc_kernel_map, &kmem_buf, tbuflen, VM_FLAGS_ANYWHERE); if (kr != 0) { printf("nfs_gss_mach_alloc_buffer: vm_allocate failed\n"); return; } - kr = vm_map_wire(ipc_kernel_map, vm_map_trunc_page(kmem_buf), - vm_map_round_page(kmem_buf + tbuflen), + kr = vm_map_wire(ipc_kernel_map, + vm_map_trunc_page(kmem_buf, + vm_map_page_mask(ipc_kernel_map)), + vm_map_round_page(kmem_buf + tbuflen, + vm_map_page_mask(ipc_kernel_map)), VM_PROT_READ|VM_PROT_WRITE, FALSE); if (kr != 0) { printf("nfs_gss_mach_alloc_buffer: vm_map_wire failed\n"); @@ -2860,8 +3017,12 @@ nfs_gss_mach_alloc_buffer(u_char *buf, uint32_t buflen, vm_map_copy_t *addr) // Shouldn't need to bzero below since vm_allocate returns zeroed pages // bzero(kmem_buf + buflen, tbuflen - buflen); - kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(kmem_buf), - vm_map_round_page(kmem_buf + tbuflen), FALSE); + kr = vm_map_unwire(ipc_kernel_map, + vm_map_trunc_page(kmem_buf, + vm_map_page_mask(ipc_kernel_map)), + vm_map_round_page(kmem_buf + tbuflen, + vm_map_page_mask(ipc_kernel_map)), + FALSE); if (kr != 0) { printf("nfs_gss_mach_alloc_buffer: vm_map_unwire failed\n"); return; diff --git a/bsd/nfs/nfs_gss.h b/bsd/nfs/nfs_gss.h index e8cdb5a6a..7c7b245dd 100644 --- a/bsd/nfs/nfs_gss.h +++ b/bsd/nfs/nfs_gss.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2010 Apple Inc. All rights reserved. + * Copyright (c) 2007-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -31,6 +31,7 @@ #include #include +#include #include #define RPCSEC_GSS 6 @@ -56,16 +57,43 @@ extern u_char krb5_mech[11]; /* * GSS-API things */ +typedef uint32_t OM_uint32; + #define GSS_S_COMPLETE 0 #define GSS_S_CONTINUE_NEEDED 1 +/* + * Some "helper" definitions to make the status code macros obvious. + * From gssapi.h: + */ +#define GSS_C_CALLING_ERROR_OFFSET 24 +#define GSS_C_ROUTINE_ERROR_OFFSET 16 +#define GSS_C_SUPPLEMENTARY_OFFSET 0 +#define GSS_C_CALLING_ERROR_MASK ((OM_uint32) 0377ul) +#define GSS_C_ROUTINE_ERROR_MASK ((OM_uint32) 0377ul) +#define GSS_C_SUPPLEMENTARY_MASK ((OM_uint32) 0177777ul) + +/* + * The macros that test status codes for error conditions. Note that the + * GSS_ERROR() macro has changed slightly from the V1 GSSAPI so that it now + * evaluates its argument only once. + */ +#define GSS_CALLING_ERROR(x) \ + ((x) & (GSS_C_CALLING_ERROR_MASK << GSS_C_CALLING_ERROR_OFFSET)) +#define GSS_ROUTINE_ERROR(x) \ + ((x) & (GSS_C_ROUTINE_ERROR_MASK << GSS_C_ROUTINE_ERROR_OFFSET)) +#define GSS_SUPPLEMENTARY_INFO(x) \ + ((x) & (GSS_C_SUPPLEMENTARY_MASK << GSS_C_SUPPLEMENTARY_OFFSET)) +#define GSS_ERROR(x) \ + ((x) & ((GSS_C_CALLING_ERROR_MASK << GSS_C_CALLING_ERROR_OFFSET) | \ + (GSS_C_ROUTINE_ERROR_MASK << GSS_C_ROUTINE_ERROR_OFFSET))) + #define GSS_MAXSEQ 0x80000000 // The biggest sequence number #define GSS_SVC_MAXCONTEXTS 500000 // Max contexts supported #define GSS_SVC_SEQWINDOW 256 // Server's sequence window #define GSS_CLNT_SEQLISTMAX 32 // Max length of req seq num list #define GSS_CLNT_SYS_VALID 300 // Valid time (sec) for failover ctx - #define SKEYLEN 8 // length of DES key #define SKEYLEN3 24 // length of DES3 keyboard #define MAX_SKEYLEN SKEYLEN3 @@ -102,21 +130,27 @@ struct nfs_gss_clnt_ctx { TAILQ_ENTRY(nfs_gss_clnt_ctx) gss_clnt_entries; uint32_t gss_clnt_flags; // Flag bits - see below uint32_t gss_clnt_refcnt; // Reference count - uid_t gss_clnt_uid; // Owner of this context + kauth_cred_t gss_clnt_cred; // Owner of this context + uint8_t *gss_clnt_principal; // Principal to use for this credential + uint32_t gss_clnt_prinlen; // Length of principal + gssd_nametype gss_clnt_prinnt; // Name type of principal + char *gss_clnt_display; // display name of principal uint32_t gss_clnt_proc; // Current GSS proc for cred uint32_t gss_clnt_seqnum; // GSS sequence number uint32_t gss_clnt_service; // Indicates krb5, krb5i or krb5p - u_char *gss_clnt_handle; // Identifies server context + uint8_t *gss_clnt_handle; // Identifies server context uint32_t gss_clnt_handle_len; // Size of server's ctx handle time_t gss_clnt_ctime; // When context was created uint32_t gss_clnt_seqwin; // Server's seq num window uint32_t *gss_clnt_seqbits; // Bitmap to track seq numbers in use mach_port_t gss_clnt_mport; // Mach port for gssd upcall - u_char *gss_clnt_verf; // RPC verifier from server - char *gss_clnt_svcname; // Service name e.g. "nfs/big.apple.com" + uint8_t *gss_clnt_verf; // RPC verifier from server + uint8_t *gss_clnt_svcname; // Service name e.g. "nfs/big.apple.com" + uint32_t gss_clnt_svcnamlen; // Service name length + gssd_nametype gss_clnt_svcnt; // Service name type gssd_cred gss_clnt_cred_handle; // Opaque cred handle from gssd gssd_ctx gss_clnt_context; // Opaque context handle from gssd - u_char *gss_clnt_token; // GSS token exchanged via gssd & server + uint8_t *gss_clnt_token; // GSS token exchanged via gssd & server uint32_t gss_clnt_tokenlen; // Length of token gss_key_info gss_clnt_kinfo; // GSS key info uint32_t gss_clnt_gssd_flags; // Special flag bits to gssd @@ -129,8 +163,9 @@ struct nfs_gss_clnt_ctx { */ #define GSS_CTX_COMPLETE 0x00000001 // Context is complete #define GSS_CTX_INVAL 0x00000002 // Context is invalid -#define GSS_NEEDSEQ 0x00000004 // Need a sequence number -#define GSS_NEEDCTX 0x00000008 // Need the context +#define GSS_CTX_INCOMPLETE 0x00000004 // Context needs to be inited +#define GSS_NEEDSEQ 0x00000008 // Need a sequence number +#define GSS_NEEDCTX 0x00000010 // Need the context /* * The server's RPCSEC_GSS context information @@ -149,7 +184,7 @@ struct nfs_gss_svc_ctx { uint32_t gss_svc_seqwin; // GSS sequence number window uint32_t *gss_svc_seqbits; // Bitmap to track seq numbers gssd_cred gss_svc_cred_handle; // Opaque cred handle from gssd - gssd_ctx gss_svc_context; // Opaque context handle from gssd + gssd_ctx gss_svc_context; // Opaque context handle from gssd u_char *gss_svc_token; // GSS token exchanged via gssd & client uint32_t gss_svc_tokenlen; // Length of token gss_key_info gss_svc_kinfo; // Session key info @@ -189,6 +224,7 @@ int nfs_gss_clnt_ctx_renew(struct nfsreq *); void nfs_gss_clnt_ctx_ref(struct nfsreq *, struct nfs_gss_clnt_ctx *); void nfs_gss_clnt_ctx_unref(struct nfsreq *); void nfs_gss_clnt_ctx_unmount(struct nfsmount *); +int nfs_gss_clnt_ctx_destroy(struct nfsmount *, kauth_cred_t cred); int nfs_gss_svc_cred_get(struct nfsrv_descript *, struct nfsm_chain *); int nfs_gss_svc_verf_put(struct nfsrv_descript *, struct nfsm_chain *); int nfs_gss_svc_ctx_init(struct nfsrv_descript *, struct nfsrv_sock *, mbuf_t *); diff --git a/iokit/Kernel/i386/IOAsmSupport.s b/bsd/nfs/nfs_ioctl.h similarity index 75% rename from iokit/Kernel/i386/IOAsmSupport.s rename to bsd/nfs/nfs_ioctl.h index ff7585311..31e27f56d 100644 --- a/iokit/Kernel/i386/IOAsmSupport.s +++ b/bsd/nfs/nfs_ioctl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,21 +25,19 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#include +/* + * Header file to export nfs defined ioctls for nfs_vnop_ioctls + */ + +#ifndef _NFS_NFS_IOCTL_H_ +#define _NFS_NFS_IOCTL_H_ +#include /* - * Seemingly unused references from cpp statically initialized objects. + * fsctl (vnop_ioctl) to detroy the callers credentials associated with the vnode's mount */ +#define NFS_IOC_DESTROY_CRED _IO('n', 1) +#define NFS_FSCTL_DESTROY_CRED IOCBASECMD(NFS_IOC_DESTROY_CRED) -.globl .constructors_used -.globl .destructors_used -.data - .align 2 - .long 0x11223344 -.constructors_used: - .long 0xdeadbeef - .long 0x11223344 -.destructors_used: - .long 0xdeadbeef - .long 0x11223344 +#endif diff --git a/bsd/nfs/nfs_node.c b/bsd/nfs/nfs_node.c index 9563cc5de..fe04bd587 100644 --- a/bsd/nfs/nfs_node.c +++ b/bsd/nfs/nfs_node.c @@ -96,6 +96,8 @@ static lck_grp_t *nfs_node_lck_grp; static lck_grp_t *nfs_data_lck_grp; lck_mtx_t *nfs_node_hash_mutex; +#define NFS_NODE_DBG(...) NFS_DBG(NFS_FAC_NODE, 7, ## __VA_ARGS__) + /* * Initialize hash links for nfsnodes * and build nfsnode free list. @@ -281,24 +283,23 @@ loop: * Update the vnode if the name/and or the parent has * changed. We need to do this so that if getattrlist is * called asking for ATTR_CMN_NAME, that the "most" - * correct name is being returned if we're not making an - * entry. In addition for monitored vnodes we need to - * kick the vnode out of the name cache. We do this so - * that if there are hard links in the same directory - * the link will not be found and a lookup will get us - * here to return the name of the current link. In - * addition by removing the name from the name cache the - * old name will not be found after a rename done on - * another client or the server. The principle reason - * to do this is because Finder is asking for - * notifications on a directory. The directory changes, - * Finder gets notified, reads the directory (which we - * have purged) and for each entry returned calls - * getattrlist with the name returned from - * readdir. gettattrlist has to call namei/lookup to - * resolve the name, because its not in the cache we end - * up here. We need to update the name so Finder will - * get the name it called us with. + * correct name is being returned. In addition for + * monitored vnodes we need to kick the vnode out of the + * name cache. We do this so that if there are hard + * links in the same directory the link will not be + * found and a lookup will get us here to return the + * name of the current link. In addition by removing the + * name from the name cache the old name will not be + * found after a rename done on another client or the + * server. The principle reason to do this is because + * Finder is asking for notifications on a directory. + * The directory changes, Finder gets notified, reads + * the directory (which we have purged) and for each + * entry returned calls getattrlist with the name + * returned from readdir. gettattrlist has to call + * namei/lookup to resolve the name, because its not in + * the cache we end up here. We need to update the name + * so Finder will get the name it called us with. * * We had an imperfect solution with respect to case * sensitivity. There is a test that is run in @@ -340,7 +341,7 @@ loop: * ATTR_CMN_NAME */ if (dnp && cnp && (vp != NFSTOV(dnp))) { - int update_flags = vnode_ismonitored((NFSTOV(dnp))) ? VNODE_UPDATE_CACHE : 0; + int update_flags = (vnode_ismonitored((NFSTOV(dnp)))) ? VNODE_UPDATE_CACHE : 0; int (*cmp)(const char *s1, const char *s2, size_t n); cmp = nfs_case_insensitive(mp) ? strncasecmp : strncmp; @@ -351,8 +352,11 @@ loop: update_flags |= VNODE_UPDATE_NAME; if (vnode_parent(vp) != NFSTOV(dnp)) update_flags |= VNODE_UPDATE_PARENT; - if (update_flags) + if (update_flags) { + NFS_NODE_DBG("vnode_update_identity old name %s new name %*s\n", + vp->v_name, cnp->cn_namelen, cnp->cn_nameptr ? cnp->cn_nameptr : ""); vnode_update_identity(vp, NFSTOV(dnp), cnp->cn_nameptr, cnp->cn_namelen, 0, update_flags); + } } *npp = np; @@ -587,14 +591,23 @@ nfs_vnop_inactive(ap) { vnode_t vp = ap->a_vp; vfs_context_t ctx = ap->a_context; - nfsnode_t np = VTONFS(ap->a_vp); + nfsnode_t np; struct nfs_sillyrename *nsp; struct nfs_vattr nvattr; int unhash, attrerr, busyerror, error, inuse, busied, force; struct nfs_open_file *nofp; struct componentname cn; - struct nfsmount *nmp = NFSTONMP(np); - mount_t mp = vnode_mount(vp); + struct nfsmount *nmp; + mount_t mp; + + if (vp == NULL) + panic("nfs_vnop_inactive: vp == NULL"); + np = VTONFS(vp); + if (np == NULL) + panic("nfs_vnop_inactive: np == NULL"); + + nmp = NFSTONMP(np); + mp = vnode_mount(vp); restart: force = (!mp || (mp->mnt_kern_flag & MNTK_FRCUNMOUNT)); @@ -603,8 +616,21 @@ restart: /* There shouldn't be any open or lock state at this point */ lck_mtx_lock(&np->n_openlock); - if (np->n_openrefcnt && !force) + if (np->n_openrefcnt && !force) { + /* + * vnode_rele and vnode_put drop the vnode lock before + * calling VNOP_INACTIVE, so there is a race were the + * vnode could become active again. Perhaps there are + * other places where this can happen, so if we've got + * here we need to get out. + */ +#ifdef NFS_NODE_DEBUG NP(np, "nfs_vnop_inactive: still open: %d", np->n_openrefcnt); +#endif + lck_mtx_unlock(&np->n_openlock); + return 0; + } + TAILQ_FOREACH(nofp, &np->n_opens, nof_link) { lck_mtx_lock(&nofp->nof_lock); if (nofp->nof_flags & NFS_OPEN_FILE_BUSY) { @@ -1300,6 +1326,7 @@ nfs_data_update_size(nfsnode_t np, int datalocked) } #define DODEBUG 1 + int nfs_mount_is_dirty(mount_t mp) { @@ -1326,8 +1353,8 @@ out: microuptime(&then); timersub(&then, &now, &diff); - printf("nfs_mount_is_dirty took %lld mics for %ld slots and %ld nodes return %d\n", - (uint64_t)diff.tv_sec * 1000000LL + diff.tv_usec, i, ncnt, (i <= nfsnodehash)); + NFS_DBG(NFS_FAC_SOCK, 7, "mount_is_dirty for %s took %lld mics for %ld slots and %ld nodes return %d\n", + vfs_statfs(mp)->f_mntfromname, (uint64_t)diff.tv_sec * 1000000LL + diff.tv_usec, i, ncnt, (i <= nfsnodehash)); #endif return (i <= nfsnodehash); diff --git a/bsd/nfs/nfs_socket.c b/bsd/nfs/nfs_socket.c index 27126d218..c163bfa04 100644 --- a/bsd/nfs/nfs_socket.c +++ b/bsd/nfs/nfs_socket.c @@ -107,6 +107,8 @@ #include #include +#define NFS_SOCK_DBG(...) NFS_DBG(NFS_FAC_SOCK, 7, ## __VA_ARGS__) + /* XXX */ boolean_t current_thread_aborted(void); kern_return_t thread_terminate(thread_t); @@ -145,6 +147,12 @@ nfs_sockaddr_cmp(struct sockaddr *sa1, struct sockaddr *sa2) #if NFSCLIENT +int nfs_connect_search_new_socket(struct nfsmount *, struct nfs_socket_search *, struct timeval *); +int nfs_connect_search_socket_connect(struct nfsmount *, struct nfs_socket *, int); +int nfs_connect_search_ping(struct nfsmount *, struct nfs_socket *, struct timeval *); +void nfs_connect_search_socket_found(struct nfsmount *, struct nfs_socket_search *, struct nfs_socket *); +void nfs_connect_search_socket_reap(struct nfsmount *, struct nfs_socket_search *, struct timeval *); +int nfs_connect_search_check(struct nfsmount *, struct nfs_socket_search *, struct timeval *); int nfs_reconnect(struct nfsmount *); int nfs_connect_setup(struct nfsmount *); void nfs_mount_sock_thread(void *, wait_result_t); @@ -161,12 +169,6 @@ int nfs_can_squish(struct nfsmount *); int nfs_is_squishy(struct nfsmount *); int nfs_is_dead(int, struct nfsmount *); -#ifdef NFS_SOCKET_DEBUGGING -#define NFS_SOCK_DBG(X) printf X -#else -#define NFS_SOCK_DBG(X) -#endif - /* * Estimate rto for an nfs rpc sent via. an unreliable datagram. * Use the mean and mean deviation of rtt for the appropriate type of rpc @@ -316,18 +318,18 @@ nfs_connect_upcall(socket_t so, void *arg, __unused int waitflag) int error = 0, recv = 1; if (nso->nso_flags & NSO_CONNECTING) { - NFS_SOCK_DBG(("nfs connect - socket %p upcall - connecting\n", nso)); + NFS_SOCK_DBG("nfs connect - socket %p upcall - connecting\n", nso); wakeup(nso->nso_wake); return; } lck_mtx_lock(&nso->nso_lock); if ((nso->nso_flags & (NSO_UPCALL|NSO_DISCONNECTING|NSO_DEAD)) || !(nso->nso_flags & NSO_PINGING)) { - NFS_SOCK_DBG(("nfs connect - socket %p upcall - nevermind\n", nso)); + NFS_SOCK_DBG("nfs connect - socket %p upcall - nevermind\n", nso); lck_mtx_unlock(&nso->nso_lock); return; } - NFS_SOCK_DBG(("nfs connect - socket %p upcall\n", nso)); + NFS_SOCK_DBG("nfs connect - socket %p upcall\n", nso); nso->nso_flags |= NSO_UPCALL; /* loop while we make error-free progress */ @@ -476,6 +478,8 @@ nfs_socket_create( sinaddr = &((struct sockaddr_in6*)sa)->sin6_addr; if (inet_ntop(sa->sa_family, sinaddr, naddr, sizeof(naddr)) != naddr) strlcpy(naddr, "", sizeof(naddr)); +#else + char naddr[1] = { 0 }; #endif *nsop = NULL; @@ -531,14 +535,14 @@ nfs_socket_create( } if (error) { - NFS_SOCK_DBG(("nfs connect %s error %d creating socket %p %s type %d%s port %d prot %d %d\n", + NFS_SOCK_DBG("nfs connect %s error %d creating socket %p %s type %d%s port %d prot %d %d\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nso, naddr, sotype, - resvport ? "r" : "", port, protocol, vers)); + resvport ? "r" : "", port, protocol, vers); nfs_socket_destroy(nso); } else { - NFS_SOCK_DBG(("nfs connect %s created socket %p %s type %d%s port %d prot %d %d\n", + NFS_SOCK_DBG("nfs connect %s created socket %p %s type %d%s port %d prot %d %d\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, naddr, - sotype, resvport ? "r" : "", port, protocol, vers)); + sotype, resvport ? "r" : "", port, protocol, vers); *nsop = nso; } return (error); @@ -566,7 +570,7 @@ nfs_socket_destroy(struct nfs_socket *nso) FREE(nso->nso_saddr, M_SONAME); if (nso->nso_saddr2) FREE(nso->nso_saddr2, M_SONAME); - NFS_SOCK_DBG(("nfs connect - socket %p destroyed\n", nso)); + NFS_SOCK_DBG("nfs connect - socket %p destroyed\n", nso); FREE(nso, M_TEMP); } @@ -680,50 +684,52 @@ nfs_socket_search_update_error(struct nfs_socket_search *nss, int error) nss->nss_error = error; } -/* - * Continue the socket search until we have something to report. +/* nfs_connect_search_new_socket: + * Given a socket search structure for an nfs mount try to find a new socket from the set of addresses specified + * by nss. + * + * nss_last is set to -1 at initialization to indicate the first time. Its set to -2 if address was found but + * could not be used or if a socket timed out. */ int -nfs_connect_search_loop(struct nfsmount *nmp, struct nfs_socket_search *nss) +nfs_connect_search_new_socket(struct nfsmount *nmp, struct nfs_socket_search *nss, struct timeval *now) { - struct nfs_socket *nso, *nsonext; - struct timeval now; struct nfs_fs_location *fsl; struct nfs_fs_server *fss; struct sockaddr_storage ss; + struct nfs_socket *nso; char *addrstr; - int error, nomore = 0; + int error = 0; + -loop: - microuptime(&now); - NFS_SOCK_DBG(("nfs connect %s search %ld\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, now.tv_sec)); + NFS_SOCK_DBG("nfs connect %s nss_addrcnt = %d\n", + vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss->nss_addrcnt); - /* Time to start another socket? */ - while ((nss->nss_last < 0) || (nss->nss_sockcnt == 0) || - ((nss->nss_sockcnt < 4) && (now.tv_sec >= (nss->nss_last + 2)))) { + /* + * while there are addresses and: + * we have no sockets or + * the last address failed and did not produce a socket (nss_last < 0) or + * Its been a while (2 seconds) and we have less than the max number of concurrent sockets to search (4) + * then attempt to create a socket with the current address. + */ + while (nss->nss_addrcnt > 0 && ((nss->nss_last < 0) || (nss->nss_sockcnt == 0) || + ((nss->nss_sockcnt < 4) && (now->tv_sec >= (nss->nss_last + 2))))) { if (nmp->nm_sockflags & NMSOCK_UNMOUNT) return (EINTR); - /* Find the next address to try... */ - /* Have we run out of locations? */ - if (!nomore && (nss->nss_last != -1) && !nfs_location_index_cmp(&nss->nss_nextloc, &nss->nss_startloc)) - nomore = 1; - if (nomore) { - if (nss->nss_last < 0) - nss->nss_last = now.tv_sec; - break; - } /* Can we convert the address to a sockaddr? */ fsl = nmp->nm_locations.nl_locations[nss->nss_nextloc.nli_loc]; fss = fsl->nl_servers[nss->nss_nextloc.nli_serv]; addrstr = fss->ns_addresses[nss->nss_nextloc.nli_addr]; if (!nfs_uaddr2sockaddr(addrstr, (struct sockaddr*)&ss)) { nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc); + nss->nss_addrcnt -= 1; nss->nss_last = -2; continue; } /* Check that socket family is acceptable. */ if (nmp->nm_sofamily && (ss.ss_family != nmp->nm_sofamily)) { nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc); + nss->nss_addrcnt -= 1; nss->nss_last = -2; continue; } @@ -748,136 +754,166 @@ loop: TAILQ_INSERT_TAIL(&nss->nss_socklist, nso, nso_link); nss->nss_sockcnt++; nfs_location_next(&nmp->nm_locations, &nss->nss_nextloc); - - nss->nss_last = now.tv_sec; + nss->nss_addrcnt -= 1; + + nss->nss_last = now->tv_sec; } - /* check each active socket and try to push it along */ - TAILQ_FOREACH(nso, &nss->nss_socklist, nso_link) { + if (nss->nss_addrcnt == 0 && nss->nss_last < 0) + nss->nss_last = now->tv_sec; + + return (error); +} + +/* + * nfs_connect_search_socket_connect: Connect an nfs socket nso for nfsmount nmp. + * If successful set the socket options for the socket as require from the mount. + * + * Assumes: nso->nso_lock is held on entry and return. + */ +int +nfs_connect_search_socket_connect(struct nfsmount *nmp, struct nfs_socket *nso, int verbose) +{ + int error; + + if ((nso->nso_sotype != SOCK_STREAM) && NMFLAG(nmp, NOCONNECT)) { + /* no connection needed, just say it's already connected */ + NFS_SOCK_DBG("nfs connect %s UDP socket %p noconnect\n", + vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); + nso->nso_flags |= NSO_CONNECTED; + nfs_socket_options(nmp, nso); + return (1); /* Socket is connected and setup */ + } else if (!(nso->nso_flags & NSO_CONNECTING)) { + /* initiate the connection */ + nso->nso_flags |= NSO_CONNECTING; + lck_mtx_unlock(&nso->nso_lock); + NFS_SOCK_DBG("nfs connect %s connecting socket %p\n", + vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); + error = sock_connect(nso->nso_so, nso->nso_saddr, MSG_DONTWAIT); lck_mtx_lock(&nso->nso_lock); - if (!(nso->nso_flags & NSO_CONNECTED)) { - if ((nso->nso_sotype != SOCK_STREAM) && NMFLAG(nmp, NOCONNECT)) { - /* no connection needed, just say it's already connected */ - nso->nso_flags |= NSO_CONNECTED; - NFS_SOCK_DBG(("nfs connect %s UDP socket %p noconnect\n", - vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso)); - } else if (!(nso->nso_flags & NSO_CONNECTING)) { - /* initiate the connection */ - nso->nso_flags |= NSO_CONNECTING; - lck_mtx_unlock(&nso->nso_lock); - NFS_SOCK_DBG(("nfs connect %s connecting socket %p\n", - vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso)); - error = sock_connect(nso->nso_so, nso->nso_saddr, MSG_DONTWAIT); - lck_mtx_lock(&nso->nso_lock); - if (error && (error != EINPROGRESS)) { - nso->nso_error = error; - nso->nso_flags |= NSO_DEAD; - lck_mtx_unlock(&nso->nso_lock); - continue; - } - } - if (nso->nso_flags & NSO_CONNECTING) { - /* check the connection */ - if (sock_isconnected(nso->nso_so)) { - NFS_SOCK_DBG(("nfs connect %s socket %p is connected\n", - vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso)); - nso->nso_flags &= ~NSO_CONNECTING; - nso->nso_flags |= NSO_CONNECTED; - } else { - int optlen = sizeof(error); - error = 0; - sock_getsockopt(nso->nso_so, SOL_SOCKET, SO_ERROR, &error, &optlen); - if (error) { /* we got an error on the socket */ - NFS_SOCK_DBG(("nfs connect %s socket %p connection error %d\n", - vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error)); - if (nss->nss_flags & NSS_VERBOSE) - log(LOG_INFO, "nfs_connect: socket error %d for %s\n", - error, vfs_statfs(nmp->nm_mountp)->f_mntfromname); - nso->nso_error = error; - nso->nso_flags |= NSO_DEAD; - lck_mtx_unlock(&nso->nso_lock); - continue; - } - } - } - if (nso->nso_flags & NSO_CONNECTED) - nfs_socket_options(nmp, nso); - } - if (!(nso->nso_flags & NSO_CONNECTED)) { - lck_mtx_unlock(&nso->nso_lock); - continue; + if (error && (error != EINPROGRESS)) { + nso->nso_error = error; + nso->nso_flags |= NSO_DEAD; + return (0); } - if (!(nso->nso_flags & (NSO_PINGING|NSO_VERIFIED)) || - ((nso->nso_sotype == SOCK_DGRAM) && (now.tv_sec >= nso->nso_reqtimestamp+2))) { - /* initiate a NULL RPC request */ - uint64_t xid = nso->nso_pingxid; - mbuf_t m, mreq = NULL; - struct msghdr msg; - size_t reqlen, sentlen; - uint32_t vers; - - if (!(vers = nso->nso_version)) { - if (nso->nso_protocol == PMAPPROG) - vers = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4; - else if (nso->nso_protocol == NFS_PROG) - vers = NFS_VER3; - } - lck_mtx_unlock(&nso->nso_lock); - error = nfsm_rpchead2(nmp, nso->nso_sotype, nso->nso_protocol, vers, 0, RPCAUTH_SYS, - vfs_context_ucred(vfs_context_kernel()), NULL, NULL, &xid, &mreq); - lck_mtx_lock(&nso->nso_lock); - if (!error) { - nso->nso_flags |= NSO_PINGING; - nso->nso_pingxid = R_XID32(xid); - nso->nso_reqtimestamp = now.tv_sec; - bzero(&msg, sizeof(msg)); - if ((nso->nso_sotype != SOCK_STREAM) && !sock_isconnected(nso->nso_so)) { - msg.msg_name = nso->nso_saddr; - msg.msg_namelen = nso->nso_saddr->sa_len; - } - for (reqlen=0, m=mreq; m; m = mbuf_next(m)) - reqlen += mbuf_len(m); - lck_mtx_unlock(&nso->nso_lock); - error = sock_sendmbuf(nso->nso_so, &msg, mreq, 0, &sentlen); - NFS_SOCK_DBG(("nfs connect %s verifying socket %p send rv %d\n", - vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error)); - lck_mtx_lock(&nso->nso_lock); - if (!error && (sentlen != reqlen)) - error = ETIMEDOUT; - } - if (error) { + } + if (nso->nso_flags & NSO_CONNECTING) { + /* check the connection */ + if (sock_isconnected(nso->nso_so)) { + NFS_SOCK_DBG("nfs connect %s socket %p is connected\n", + vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); + nso->nso_flags &= ~NSO_CONNECTING; + nso->nso_flags |= NSO_CONNECTED; + nfs_socket_options(nmp, nso); + return (1); /* Socket is connected and setup */ + } else { + int optlen = sizeof(error); + error = 0; + sock_getsockopt(nso->nso_so, SOL_SOCKET, SO_ERROR, &error, &optlen); + if (error) { /* we got an error on the socket */ + NFS_SOCK_DBG("nfs connect %s socket %p connection error %d\n", + vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error); + if (verbose) + printf("nfs connect socket error %d for %s\n", + error, vfs_statfs(nmp->nm_mountp)->f_mntfromname); nso->nso_error = error; nso->nso_flags |= NSO_DEAD; - lck_mtx_unlock(&nso->nso_lock); - continue; + return (0); } } - if (nso->nso_flags & NSO_VERIFIED) { - /* WOOHOO!! This socket looks good! */ - NFS_SOCK_DBG(("nfs connect %s socket %p verified\n", - vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso)); - if (!nso->nso_version) { - /* If the version isn't set, the default must have worked. */ - if (nso->nso_protocol == PMAPPROG) - nso->nso_version = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4; - if (nso->nso_protocol == NFS_PROG) - nso->nso_version = NFS_VER3; - } - lck_mtx_unlock(&nso->nso_lock); - TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link); - nss->nss_sockcnt--; - nss->nss_sock = nso; - break; + } + + return (0); /* Waiting to be connected */ +} + +/* + * nfs_connect_search_ping: Send a null proc on the nso socket. + */ +int +nfs_connect_search_ping(struct nfsmount *nmp, struct nfs_socket *nso, struct timeval *now) +{ + /* initiate a NULL RPC request */ + uint64_t xid = nso->nso_pingxid; + mbuf_t m, mreq = NULL; + struct msghdr msg; + size_t reqlen, sentlen; + uint32_t vers = nso->nso_version; + int error; + + if (!vers) { + if (nso->nso_protocol == PMAPPROG) + vers = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4; + else if (nso->nso_protocol == NFS_PROG) + vers = NFS_VER3; + } + lck_mtx_unlock(&nso->nso_lock); + error = nfsm_rpchead2(nmp, nso->nso_sotype, nso->nso_protocol, vers, 0, RPCAUTH_SYS, + vfs_context_ucred(vfs_context_kernel()), NULL, NULL, &xid, &mreq); + lck_mtx_lock(&nso->nso_lock); + if (!error) { + nso->nso_flags |= NSO_PINGING; + nso->nso_pingxid = R_XID32(xid); + nso->nso_reqtimestamp = now->tv_sec; + bzero(&msg, sizeof(msg)); + if ((nso->nso_sotype != SOCK_STREAM) && !sock_isconnected(nso->nso_so)) { + msg.msg_name = nso->nso_saddr; + msg.msg_namelen = nso->nso_saddr->sa_len; } + for (reqlen=0, m=mreq; m; m = mbuf_next(m)) + reqlen += mbuf_len(m); lck_mtx_unlock(&nso->nso_lock); + error = sock_sendmbuf(nso->nso_so, &msg, mreq, 0, &sentlen); + NFS_SOCK_DBG("nfs connect %s verifying socket %p send rv %d\n", + vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error); + lck_mtx_lock(&nso->nso_lock); + if (!error && (sentlen != reqlen)) + error = ETIMEDOUT; + } + if (error) { + nso->nso_error = error; + nso->nso_flags |= NSO_DEAD; + return (0); } + return (1); +} + +/* + * nfs_connect_search_socket_found: Take the found socket of the socket search list and assign it to the searched socket. + * Set the nfs socket protocol and version if needed. + */ +void +nfs_connect_search_socket_found(struct nfsmount *nmp __unused, struct nfs_socket_search *nss, struct nfs_socket *nso) +{ + NFS_SOCK_DBG("nfs connect %s socket %p verified\n", + vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); + if (!nso->nso_version) { + /* If the version isn't set, the default must have worked. */ + if (nso->nso_protocol == PMAPPROG) + nso->nso_version = (nso->nso_saddr->sa_family == AF_INET) ? PMAPVERS : RPCBVERS4; + if (nso->nso_protocol == NFS_PROG) + nso->nso_version = NFS_VER3; + } + TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link); + nss->nss_sockcnt--; + nss->nss_sock = nso; +} + +/* + * nfs_connect_search_socket_reap: For each socket in the search list mark any timed out socket as dead and remove from + * the list. Dead socket are then destroyed. + */ +void +nfs_connect_search_socket_reap(struct nfsmount *nmp __unused, struct nfs_socket_search *nss, struct timeval *now) +{ + struct nfs_socket *nso, *nsonext; + TAILQ_FOREACH_SAFE(nso, &nss->nss_socklist, nso_link, nsonext) { lck_mtx_lock(&nso->nso_lock); - if (now.tv_sec >= (nso->nso_timestamp + nss->nss_timeo)) { + if (now->tv_sec >= (nso->nso_timestamp + nss->nss_timeo)) { /* took too long */ - NFS_SOCK_DBG(("nfs connect %s socket %p timed out\n", - vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso)); + NFS_SOCK_DBG("nfs connect %s socket %p timed out\n", + vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); nso->nso_error = ETIMEDOUT; nso->nso_flags |= NSO_DEAD; } @@ -886,38 +922,112 @@ loop: continue; } lck_mtx_unlock(&nso->nso_lock); - NFS_SOCK_DBG(("nfs connect %s reaping socket %p %d\n", - vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, nso->nso_error)); + NFS_SOCK_DBG("nfs connect %s reaping socket %p %d\n", + vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, nso->nso_error); nfs_socket_search_update_error(nss, nso->nso_error); TAILQ_REMOVE(&nss->nss_socklist, nso, nso_link); nss->nss_sockcnt--; nfs_socket_destroy(nso); - if (!nomore) + /* If there are more sockets to try, force the starting of another socket */ + if (nss->nss_addrcnt > 0) nss->nss_last = -2; } +} + +/* + * nfs_connect_search_check: Check on the status of search and wait for replies if needed. + */ +int +nfs_connect_search_check(struct nfsmount *nmp, struct nfs_socket_search *nss, struct timeval *now) +{ + int error; + + /* log a warning if connect is taking a while */ + if (((now->tv_sec - nss->nss_timestamp) >= 8) && ((nss->nss_flags & (NSS_VERBOSE|NSS_WARNED)) == NSS_VERBOSE)) { + printf("nfs_connect: socket connect taking a while for %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname); + nss->nss_flags |= NSS_WARNED; + } + if (nmp->nm_sockflags & NMSOCK_UNMOUNT) + return (EINTR); + if ((error = nfs_sigintr(nmp, NULL, current_thread(), 0))) + return (error); + + /* If we were succesfull at sending a ping, wait up to a second for a reply */ + if (nss->nss_last >= 0) + tsleep(nss, PSOCK, "nfs_connect_search_wait", hz); + + return (0); +} + +/* + * Continue the socket search until we have something to report. + */ +int +nfs_connect_search_loop(struct nfsmount *nmp, struct nfs_socket_search *nss) +{ + struct nfs_socket *nso; + struct timeval now; + int error; + int verbose = (nss->nss_flags & NSS_VERBOSE); + +loop: + microuptime(&now); + NFS_SOCK_DBG("nfs connect %s search %ld\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, now.tv_sec); + + /* add a new socket to the socket list if needed and available */ + error = nfs_connect_search_new_socket(nmp, nss, &now); + if (error) { + NFS_SOCK_DBG("nfs connect returned %d\n", error); + return (error); + } + + /* check each active socket on the list and try to push it along */ + TAILQ_FOREACH(nso, &nss->nss_socklist, nso_link) { + lck_mtx_lock(&nso->nso_lock); + + /* If not connected connect it */ + if (!(nso->nso_flags & NSO_CONNECTED)) { + if (!nfs_connect_search_socket_connect(nmp, nso, verbose)) { + lck_mtx_unlock(&nso->nso_lock); + continue; + } + } + + /* If the socket hasn't been verified or in a ping, ping it. We also handle UDP retransmits */ + if (!(nso->nso_flags & (NSO_PINGING|NSO_VERIFIED)) || + ((nso->nso_sotype == SOCK_DGRAM) && (now.tv_sec >= nso->nso_reqtimestamp+2))) { + if (!nfs_connect_search_ping(nmp, nso, &now)) { + lck_mtx_unlock(&nso->nso_lock); + continue; + } + } + + /* Has the socket been verified by the up call routine? */ + if (nso->nso_flags & NSO_VERIFIED) { + /* WOOHOO!! This socket looks good! */ + nfs_connect_search_socket_found(nmp, nss, nso); + lck_mtx_unlock(&nso->nso_lock); + break; + } + lck_mtx_unlock(&nso->nso_lock); + } + + /* Check for timed out sockets and mark as dead and then remove all dead sockets. */ + nfs_connect_search_socket_reap(nmp, nss, &now); + /* * Keep looping if we haven't found a socket yet and we have more * sockets to (continue to) try. */ error = 0; - if (!nss->nss_sock && (!TAILQ_EMPTY(&nss->nss_socklist) || !nomore)) { - /* log a warning if connect is taking a while */ - if (((now.tv_sec - nss->nss_timestamp) >= 30) && ((nss->nss_flags & (NSS_VERBOSE|NSS_WARNED)) == NSS_VERBOSE)) { - log(LOG_INFO, "nfs_connect: socket connect taking a while for %s\n", - vfs_statfs(nmp->nm_mountp)->f_mntfromname); - nss->nss_flags |= NSS_WARNED; - } - if (nmp->nm_sockflags & NMSOCK_UNMOUNT) - return (EINTR); - if ((error = nfs_sigintr(nmp, NULL, current_thread(), 0))) - return (error); - if (nss->nss_last >= 0) - tsleep(nss, PSOCK, "nfs_connect_search_wait", hz); - goto loop; + if (!nss->nss_sock && (!TAILQ_EMPTY(&nss->nss_socklist) || nss->nss_addrcnt)) { + error = nfs_connect_search_check(nmp, nss, &now); + if (!error) + goto loop; } - NFS_SOCK_DBG(("nfs connect %s returning %d\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, error)); + NFS_SOCK_DBG("nfs connect %s returning %d\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, error); return (error); } @@ -950,25 +1060,27 @@ nfs_connect(struct nfsmount *nmp, int verbose, int timeo) fhandle_t *fh = NULL; char *path = NULL; in_port_t port; - + int addrtotal = 0; + /* paranoia... check that we have at least one address in the locations */ uint32_t loc, serv; for (loc=0; loc < nmp->nm_locations.nl_numlocs; loc++) { for (serv=0; serv < nmp->nm_locations.nl_locations[loc]->nl_servcount; serv++) { - if (nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount) - break; - NFS_SOCK_DBG(("nfs connect %s search, server %s has no addresses\n", - vfs_statfs(nmp->nm_mountp)->f_mntfromname, - nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_name)); + addrtotal += nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount; + if (nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_addrcount == 0) + NFS_SOCK_DBG("nfs connect %s search, server %s has no addresses\n", + vfs_statfs(nmp->nm_mountp)->f_mntfromname, + nmp->nm_locations.nl_locations[loc]->nl_servers[serv]->ns_name); } - if (serv < nmp->nm_locations.nl_locations[loc]->nl_servcount) - break; } - if (loc >= nmp->nm_locations.nl_numlocs) { - NFS_SOCK_DBG(("nfs connect %s search failed, no addresses\n", - vfs_statfs(nmp->nm_mountp)->f_mntfromname)); + + if (addrtotal == 0) { + NFS_SOCK_DBG("nfs connect %s search failed, no addresses\n", + vfs_statfs(nmp->nm_mountp)->f_mntfromname); return (EINVAL); - } + } else + NFS_SOCK_DBG("nfs connect %s has %d addresses\n", + vfs_statfs(nmp->nm_mountp)->f_mntfromname, addrtotal); lck_mtx_lock(&nmp->nm_lock); nmp->nm_sockflags |= NMSOCK_CONNECTING; @@ -980,6 +1092,7 @@ nfs_connect(struct nfsmount *nmp, int verbose, int timeo) tryagain: /* initialize socket search state */ bzero(&nss, sizeof(nss)); + nss.nss_addrcnt = addrtotal; nss.nss_error = savederror; TAILQ_INIT(&nss.nss_socklist); nss.nss_sotype = sotype; @@ -1023,9 +1136,9 @@ tryagain: nss.nss_version = nmp->nm_vers; } } - NFS_SOCK_DBG(("nfs connect first %s, so type %d port %d prot %d %d\n", + NFS_SOCK_DBG("nfs connect first %s, so type %d port %d prot %d %d\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss.nss_sotype, nss.nss_port, - nss.nss_protocol, nss.nss_version)); + nss.nss_protocol, nss.nss_version); } else { /* we've connected before, just connect to NFS port */ if (!nmp->nm_nfsport) { @@ -1038,9 +1151,9 @@ tryagain: nss.nss_protocol = NFS_PROG; nss.nss_version = nmp->nm_vers; } - NFS_SOCK_DBG(("nfs connect %s, so type %d port %d prot %d %d\n", + NFS_SOCK_DBG("nfs connect %s, so type %d port %d prot %d %d\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, nss.nss_sotype, nss.nss_port, - nss.nss_protocol, nss.nss_version)); + nss.nss_protocol, nss.nss_version); } /* Set next location to first valid location. */ @@ -1050,8 +1163,8 @@ tryagain: (nss.nss_nextloc.nli_addr >= nmp->nm_locations.nl_locations[nss.nss_nextloc.nli_loc]->nl_servers[nss.nss_nextloc.nli_serv]->ns_addrcount)) { nfs_location_next(&nmp->nm_locations, &nss.nss_nextloc); if (!nfs_location_index_cmp(&nss.nss_nextloc, &nss.nss_startloc)) { - NFS_SOCK_DBG(("nfs connect %s search failed, couldn't find a valid location index\n", - vfs_statfs(nmp->nm_mountp)->f_mntfromname)); + NFS_SOCK_DBG("nfs connect %s search failed, couldn't find a valid location index\n", + vfs_statfs(nmp->nm_mountp)->f_mntfromname); return (ENOENT); } } @@ -1067,8 +1180,8 @@ keepsearching: /* Try using UDP */ sotype = SOCK_DGRAM; savederror = nss.nss_error; - NFS_SOCK_DBG(("nfs connect %s TCP failed %d %d, trying UDP\n", - vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nss.nss_error)); + NFS_SOCK_DBG("nfs connect %s TCP failed %d %d, trying UDP\n", + vfs_statfs(nmp->nm_mountp)->f_mntfromname, error, nss.nss_error); goto tryagain; } if (!error) @@ -1084,8 +1197,8 @@ keepsearching: FREE(fh, M_TEMP); if (path) FREE_ZONE(path, MAXPATHLEN, M_NAMEI); - NFS_SOCK_DBG(("nfs connect %s search failed, returning %d\n", - vfs_statfs(nmp->nm_mountp)->f_mntfromname, error)); + NFS_SOCK_DBG("nfs connect %s search failed, returning %d\n", + vfs_statfs(nmp->nm_mountp)->f_mntfromname, error); return (error); } @@ -1100,8 +1213,8 @@ keepsearching: port = ntohs(((struct sockaddr_in6*)nso->nso_saddr)->sin6_port); if (port == PMAPPORT) { /* Use this portmapper port to get the port #s we need. */ - NFS_SOCK_DBG(("nfs connect %s got portmapper socket %p\n", - vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso)); + NFS_SOCK_DBG("nfs connect %s got portmapper socket %p\n", + vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); /* remove the connect upcall so nfs_portmap_lookup() can use this socket */ sock_setupcall(nso->nso_so, NULL, NULL); @@ -1211,7 +1324,7 @@ keepsearching: } /* nso is an NFS socket */ - NFS_SOCK_DBG(("nfs connect %s got NFS socket %p\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso)); + NFS_SOCK_DBG("nfs connect %s got NFS socket %p\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso); /* If NFS version wasn't specified, it was determined during the connect. */ nfsvers = nmp->nm_vers ? nmp->nm_vers : (int)nso->nso_version; @@ -1266,8 +1379,8 @@ keepsearching: nfs_location_mntfromname(&nmp->nm_locations, nso->nso_location, path, MAXPATHLEN, 1); error = nfs3_mount_rpc(nmp, saddr, nso->nso_sotype, nfsvers, path, vfs_context_current(), timeo, fh, &nmp->nm_servsec); - NFS_SOCK_DBG(("nfs connect %s socket %p mount %d\n", - vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error)); + NFS_SOCK_DBG("nfs connect %s socket %p mount %d\n", + vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error); if (!error) { /* Make sure we can agree on a security flavor. */ int o, s; /* indices into mount option and server security flavor lists */ @@ -1400,8 +1513,8 @@ keepsearching: wakeup(&nmp->nm_sockflags); } if (error) { - NFS_SOCK_DBG(("nfs connect %s socket %p setup failed %d\n", - vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error)); + NFS_SOCK_DBG("nfs connect %s socket %p setup failed %d\n", + vfs_statfs(nmp->nm_mountp)->f_mntfromname, nso, error); nfs_socket_search_update_error(&nss, error); nmp->nm_saddr = oldsaddr; if (!(nmp->nm_sockflags & NMSOCK_HASCONNECTED)) { @@ -1455,7 +1568,7 @@ keepsearching: FREE(fh, M_TEMP); if (path) FREE_ZONE(path, MAXPATHLEN, M_NAMEI); - NFS_SOCK_DBG(("nfs connect %s success\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname)); + NFS_SOCK_DBG("nfs connect %s success\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname); return (0); } @@ -1531,6 +1644,7 @@ nfs_reconnect(struct nfsmount *nmp) /* we're not yet completely mounted and */ /* we can't reconnect, so we fail */ lck_mtx_unlock(&nmp->nm_lock); + NFS_SOCK_DBG("Not mounted returning %d\n", error); return (error); } nfs_mount_check_dead_timeout(nmp); @@ -1539,7 +1653,7 @@ nfs_reconnect(struct nfsmount *nmp) return (error); } lck_mtx_unlock(&nmp->nm_lock); - tsleep(&lbolt, PSOCK, "nfs_reconnect_delay", 0); + tsleep(nfs_reconnect, PSOCK, "nfs_reconnect_delay", 2*hz); if ((error = nfs_sigintr(nmp, NULL, thd, 0))) return (error); } @@ -1654,6 +1768,7 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) struct timeval now; int error, dofinish; nfsnode_t np; + int do_reconnect_sleep = 0; lck_mtx_lock(&nmp->nm_lock); @@ -1673,9 +1788,26 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) nmp->nm_reconnect_start = now.tv_sec; } lck_mtx_unlock(&nmp->nm_lock); - NFS_SOCK_DBG(("nfs reconnect %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname)); - if (nfs_reconnect(nmp) == 0) + NFS_SOCK_DBG("nfs reconnect %s\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname); + /* + * XXX We don't want to call reconnect again right away if returned errors + * before that may not have blocked. This has caused spamming null procs + * from machines in the pass. + */ + if (do_reconnect_sleep) + tsleep(nfs_mount_sock_thread, PSOCK, "nfs_reconnect_sock_thread_delay", hz); + error = nfs_reconnect(nmp); + if (error) { + int lvl = 7; + if (error == EIO || error == EINTR) { + lvl = (do_reconnect_sleep++ % 600) ? 7 : 0; + } + nfs_printf(NFS_FAC_SOCK, lvl, "nfs reconnect %s: returned %d\n", + vfs_statfs(nmp->nm_mountp)->f_mntfromname, error); + } else { nmp->nm_reconnect_start = 0; + do_reconnect_sleep = 0; + } lck_mtx_lock(&nmp->nm_lock); } if ((nmp->nm_sockflags & NMSOCK_READY) && @@ -1730,9 +1862,9 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) if (error == ENEEDAUTH) req->r_xid = 0; } - NFS_SOCK_DBG(("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n", + NFS_SOCK_DBG("nfs async%s restart: p %d x 0x%llx f 0x%x rtt %d\n", nfs_request_using_gss(req) ? " gss" : "", req->r_procnum, req->r_xid, - req->r_flags, req->r_rtt)); + req->r_flags, req->r_rtt); error = !req->r_nmp ? ENXIO : 0; /* unmounted? */ if (!error) error = nfs_sigintr(nmp, req, req->r_thread, 0); @@ -1754,8 +1886,8 @@ nfs_mount_sock_thread(void *arg, __unused wait_result_t wr) error = 0; continue; } - NFS_SOCK_DBG(("nfs async resend: p %d x 0x%llx f 0x%x rtt %d\n", - req->r_procnum, req->r_xid, req->r_flags, req->r_rtt)); + NFS_SOCK_DBG("nfs async resend: p %d x 0x%llx f 0x%x rtt %d\n", + req->r_procnum, req->r_xid, req->r_flags, req->r_rtt); error = !req->r_nmp ? ENXIO : 0; /* unmounted? */ if (!error) error = nfs_sigintr(nmp, req, req->r_thread, 0); @@ -2832,7 +2964,7 @@ again: lck_mtx_unlock(&req->r_mtx); return (0); } - NFS_SOCK_DBG(("nfs_send: 0x%llx wait reconnect\n", req->r_xid)); + NFS_SOCK_DBG("nfs_send: 0x%llx wait reconnect\n", req->r_xid); lck_mtx_lock(&req->r_mtx); req->r_flags &= ~R_MUSTRESEND; req->r_rtt = 0; @@ -2964,8 +3096,8 @@ again: error = sock_sendmbuf(nso->nso_so, &msg, mreqcopy, 0, &sentlen); #ifdef NFS_SOCKET_DEBUGGING if (error || (sentlen != req->r_mreqlen)) - NFS_SOCK_DBG(("nfs_send: 0x%llx sent %d/%d error %d\n", - req->r_xid, (int)sentlen, (int)req->r_mreqlen, error)); + NFS_SOCK_DBG("nfs_send: 0x%llx sent %d/%d error %d\n", + req->r_xid, (int)sentlen, (int)req->r_mreqlen, error); #endif if (!error && (sentlen != req->r_mreqlen)) error = EWOULDBLOCK; @@ -3016,8 +3148,8 @@ again: sock_getsockopt(nso->nso_so, SOL_SOCKET, SO_ERROR, &clearerror, &optlen); #ifdef NFS_SOCKET_DEBUGGING if (clearerror) - NFS_SOCK_DBG(("nfs_send: ignoring UDP socket error %d so %d\n", - error, clearerror)); + NFS_SOCK_DBG("nfs_send: ignoring UDP socket error %d so %d\n", + error, clearerror); #endif } } @@ -3044,7 +3176,7 @@ again: break; } if (needrecon && (nso == nmp->nm_nso)) { /* mark socket as needing reconnect */ - NFS_SOCK_DBG(("nfs_send: 0x%llx need reconnect %d\n", req->r_xid, error)); + NFS_SOCK_DBG("nfs_send: 0x%llx need reconnect %d\n", req->r_xid, error); nfs_need_reconnect(nmp); } @@ -3118,7 +3250,7 @@ nfs_udp_rcv(socket_t so, void *arg, __unused int waitflag) if (error && (error != EWOULDBLOCK)) { /* problems with the socket... mark for reconnection */ - NFS_SOCK_DBG(("nfs_udp_rcv: need reconnect %d\n", error)); + NFS_SOCK_DBG("nfs_udp_rcv: need reconnect %d\n", error); nfs_need_reconnect(nmp); } } @@ -3178,12 +3310,12 @@ nfs_tcp_rcv(socket_t so, void *arg, __unused int waitflag) } #ifdef NFS_SOCKET_DEBUGGING if (!recv && (error != EWOULDBLOCK)) - NFS_SOCK_DBG(("nfs_tcp_rcv: got nothing, error %d, got FIN?\n", error)); + NFS_SOCK_DBG("nfs_tcp_rcv: got nothing, error %d, got FIN?\n", error); #endif /* note: no error and no data indicates server closed its end */ if ((error != EWOULDBLOCK) && (error || !recv)) { /* problems with the socket... mark for reconnection */ - NFS_SOCK_DBG(("nfs_tcp_rcv: need reconnect %d\n", error)); + NFS_SOCK_DBG("nfs_tcp_rcv: need reconnect %d\n", error); nfs_need_reconnect(nmp); } } @@ -3214,7 +3346,7 @@ nfs_sock_poke(struct nfsmount *nmp) msg.msg_iov = &aio; msg.msg_iovlen = 1; error = sock_send(nmp->nm_nso->nso_so, &msg, MSG_DONTWAIT, &len); - NFS_SOCK_DBG(("nfs_sock_poke: error %d\n", error)); + NFS_SOCK_DBG("nfs_sock_poke: error %d\n", error); nfs_is_dead(error, nmp); } @@ -3350,8 +3482,8 @@ nfs_wait_reply(struct nfsreq *req) break; /* check if we need to resend */ if (req->r_flags & R_MUSTRESEND) { - NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d\n", - req->r_procnum, req->r_xid, req->r_flags, req->r_rtt)); + NFS_SOCK_DBG("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d\n", + req->r_procnum, req->r_xid, req->r_flags, req->r_rtt); req->r_flags |= R_SENDING; lck_mtx_unlock(&req->r_mtx); if (nfs_request_using_gss(req)) { @@ -3368,8 +3500,8 @@ nfs_wait_reply(struct nfsreq *req) } error = nfs_send(req, 1); lck_mtx_lock(&req->r_mtx); - NFS_SOCK_DBG(("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d err %d\n", - req->r_procnum, req->r_xid, req->r_flags, req->r_rtt, error)); + NFS_SOCK_DBG("nfs wait resend: p %d x 0x%llx f 0x%x rtt %d err %d\n", + req->r_procnum, req->r_xid, req->r_flags, req->r_rtt, error); if (error) break; if (((error = req->r_error)) || req->r_nmrep.nmc_mhead) @@ -3933,7 +4065,7 @@ nfs_request_finish( do { if ((error = nfs_sigintr(req->r_nmp, req, req->r_thread, 0))) goto nfsmout; - tsleep(&lbolt, PSOCK|slpflag, "nfs_jukebox_trylater", 0); + tsleep(nfs_request_finish, PSOCK|slpflag, "nfs_jukebox_trylater", hz); slpflag = 0; } while (--delay > 0); } @@ -4604,10 +4736,10 @@ nfs_request_timer(__unused void *param0, __unused void *param1) continue; } /* The request has timed out */ - NFS_SOCK_DBG(("nfs timeout: proc %d %d xid %llx rtt %d to %d # %d, t %ld/%d\n", + NFS_SOCK_DBG("nfs timeout: proc %d %d xid %llx rtt %d to %d # %d, t %ld/%d\n", req->r_procnum, proct[req->r_procnum], req->r_xid, req->r_rtt, timeo, nmp->nm_timeouts, - (now.tv_sec - req->r_start)*NFS_HZ, maxtime)); + (now.tv_sec - req->r_start)*NFS_HZ, maxtime); if (nmp->nm_timeouts < 8) nmp->nm_timeouts++; nfs_mount_check_dead_timeout(nmp); @@ -4644,9 +4776,9 @@ nfs_request_timer(__unused void *param0, __unused void *param1) lck_mtx_unlock(&req->r_mtx); continue; } - NFS_SOCK_DBG(("nfs timer TERMINATE: p %d x 0x%llx f 0x%x rtt %d t %ld\n", + NFS_SOCK_DBG("nfs timer TERMINATE: p %d x 0x%llx f 0x%x rtt %d t %ld\n", req->r_procnum, req->r_xid, req->r_flags, req->r_rtt, - now.tv_sec - req->r_start)); + now.tv_sec - req->r_start); nfs_softterm(req); finish_asyncio = ((req->r_callback.rcb_func != NULL) && !(req->r_flags & R_WAITSENT)); wakeup(req); @@ -4673,8 +4805,8 @@ nfs_request_timer(__unused void *param0, __unused void *param1) lck_mtx_unlock(&req->r_mtx); continue; } - NFS_SOCK_DBG(("nfs timer mark resend: p %d x 0x%llx f 0x%x rtt %d\n", - req->r_procnum, req->r_xid, req->r_flags, req->r_rtt)); + NFS_SOCK_DBG("nfs timer mark resend: p %d x 0x%llx f 0x%x rtt %d\n", + req->r_procnum, req->r_xid, req->r_flags, req->r_rtt); req->r_flags |= R_MUSTRESEND; req->r_rtt = -1; wakeup(req); @@ -4750,7 +4882,8 @@ nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocke * If the mount is hung and we've requested not to hang * on remote filesystems, then bail now. */ - if (!error && (nmp->nm_state & NFSSTA_TIMEO) && nfs_noremotehang(thd)) + if (current_proc() != kernproc && + !error && (nmp->nm_state & NFSSTA_TIMEO) && nfs_noremotehang(thd)) error = EIO; if (!nmplocked) @@ -4759,7 +4892,7 @@ nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocke return (error); /* may not have a thread for async I/O */ - if (thd == NULL) + if (thd == NULL || current_proc() == kernproc) return (0); /* @@ -5223,14 +5356,14 @@ nfs_is_squishy(struct nfsmount *nmp) int squishy = 0; int timeo = (nfs_squishy_flags & NFS_SQUISH_QUICK) ? NFS_SQUISHY_QUICKTIMEOUT : NFS_SQUISHY_DEADTIMEOUT; - NFS_SOCK_DBG(("nfs_is_squishy: %s: nm_curdeadtiemout = %d, nfs_is_mobile = %d\n", - vfs_statfs(mp)->f_mntfromname, nmp->nm_curdeadtimeout, nfs_is_mobile)); + NFS_SOCK_DBG("%s: nm_curdeadtiemout = %d, nfs_is_mobile = %d\n", + vfs_statfs(mp)->f_mntfromname, nmp->nm_curdeadtimeout, nfs_is_mobile); if (!nfs_can_squish(nmp)) goto out; timeo = (nmp->nm_deadtimeout > timeo) ? max(nmp->nm_deadtimeout/8, timeo) : timeo; - NFS_SOCK_DBG(("nfs_is_squishy: nm_writers = %d nm_mappers = %d timeo = %d\n", nmp->nm_writers, nmp->nm_mappers, timeo)); + NFS_SOCK_DBG("nm_writers = %d nm_mappers = %d timeo = %d\n", nmp->nm_writers, nmp->nm_mappers, timeo); if (nmp->nm_writers == 0 && nmp->nm_mappers == 0) { uint64_t flags = mp ? vfs_flags(mp) : 0; @@ -5253,7 +5386,7 @@ out: nmp->nm_curdeadtimeout = squishy ? timeo : nmp->nm_deadtimeout; - NFS_SOCK_DBG(("nfs_is_squishy: nm_curdeadtimeout = %d\n", nmp->nm_curdeadtimeout)); + NFS_SOCK_DBG("nm_curdeadtimeout = %d\n", nmp->nm_curdeadtimeout); return (squishy); } @@ -5269,9 +5402,10 @@ nfs_is_dead_lock(int error, struct nfsmount *nmp) if (nmp->nm_state & NFSSTA_DEAD) return (1); - if ((error != ENETUNREACH && error != EHOSTUNREACH) || + if ((error != ENETUNREACH && error != EHOSTUNREACH && error != EADDRNOTAVAIL) || !(nmp->nm_locations.nl_numlocs == 1 && nmp->nm_locations.nl_locations[0]->nl_servcount == 1)) return (0); + if ((nfs_squishy_flags & NFS_SQUISH_QUICK) && nfs_is_squishy(nmp)) { printf("nfs_is_dead: nfs server %s: unreachable. Squished dead\n", vfs_statfs(nmp->nm_mountp)->f_mntfromname); nmp->nm_state |= NFSSTA_DEAD; diff --git a/bsd/nfs/nfs_subs.c b/bsd/nfs/nfs_subs.c index e0e9446bf..2ed209532 100644 --- a/bsd/nfs/nfs_subs.c +++ b/bsd/nfs/nfs_subs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -1029,14 +1029,63 @@ nfsm_rpchead( req->r_auth, req->r_cred, req, mrest, xidp, mreqp); } +/* + * get_auiliary_groups: Gets the supplementary groups from a credential. + * + * IN: cred: credential to get the associated groups from. + * OUT: groups: An array of gids of NGROUPS size. + * IN: count: The number of groups to get; i.e.; the number of groups the server supports + * + * returns: The number of groups found. + * + * Just a wrapper around kauth_cred_getgroups to handle the case of a server supporting less + * than NGROUPS. + */ +static int +get_auxiliary_groups(kauth_cred_t cred, gid_t groups[NGROUPS], int count) +{ + gid_t pgid; + int maxcount = count < NGROUPS ? count + 1 : NGROUPS; + int i; + + for (i = 0; i < NGROUPS; i++) + groups[i] = -2; /* Initialize to the nobody group */ + + (void)kauth_cred_getgroups(cred, groups, &maxcount); + if (maxcount < 1) + return (maxcount); + + /* + * kauth_get_groups returns the primary group followed by the + * users auxiliary groups. If the number of groups the server supports + * is less than NGROUPS, then we will drop the first group so that + * we can send one more group over the wire. + */ + + + if (count < NGROUPS) { + pgid = kauth_cred_getgid(cred); + if (pgid == groups[0]) { + maxcount -= 1; + for (i = 0; i < maxcount; i++) { + groups[i] = groups[i+1]; + } + } + } + + return (maxcount); +} + int nfsm_rpchead2(struct nfsmount *nmp, int sotype, int prog, int vers, int proc, int auth_type, kauth_cred_t cred, struct nfsreq *req, mbuf_t mrest, u_int64_t *xidp, mbuf_t *mreqp) { mbuf_t mreq, mb; - int error, i, grpsiz, auth_len = 0, authsiz, reqlen; + int error, i, auth_len = 0, authsiz, reqlen; size_t headlen; struct nfsm_chain nmreq; + gid_t grouplist[NGROUPS]; + int groupcount; /* calculate expected auth length */ switch (auth_type) { @@ -1045,19 +1094,14 @@ nfsm_rpchead2(struct nfsmount *nmp, int sotype, int prog, int vers, int proc, in break; case RPCAUTH_SYS: { - gid_t grouplist[NGROUPS]; - int groupcount = NGROUPS; + int count = nmp->nm_numgrps < NGROUPS ? nmp->nm_numgrps : NGROUPS; if (!cred) return (EINVAL); - - (void)kauth_cred_getgroups(cred, grouplist, &groupcount); - if (groupcount < 1) + groupcount = get_auxiliary_groups(cred, grouplist, count); + if (groupcount < 0) return (EINVAL); - - auth_len = (((((uint32_t)groupcount - 1) > nmp->nm_numgrps) ? - nmp->nm_numgrps : (groupcount - 1)) << 2) + - 5 * NFSX_UNSIGNED; + auth_len = ((uint32_t)groupcount + 5) * NFSX_UNSIGNED; break; } case RPCAUTH_KRB5: @@ -1129,21 +1173,14 @@ add_cred: error = mbuf_setnext(nmreq.nmc_mcur, mrest); break; case RPCAUTH_SYS: { - gid_t grouplist[NGROUPS]; - int groupcount; - nfsm_chain_add_32(error, &nmreq, RPCAUTH_SYS); nfsm_chain_add_32(error, &nmreq, authsiz); nfsm_chain_add_32(error, &nmreq, 0); /* stamp */ nfsm_chain_add_32(error, &nmreq, 0); /* zero-length hostname */ nfsm_chain_add_32(error, &nmreq, kauth_cred_getuid(cred)); /* UID */ nfsm_chain_add_32(error, &nmreq, kauth_cred_getgid(cred)); /* GID */ - grpsiz = (auth_len >> 2) - 5; - nfsm_chain_add_32(error, &nmreq, grpsiz);/* additional GIDs */ - memset(grouplist, 0, sizeof(grouplist)); - groupcount = grpsiz; - (void)kauth_cred_getgroups(cred, grouplist, &groupcount); - for (i = 1; i <= grpsiz; i++) + nfsm_chain_add_32(error, &nmreq, groupcount);/* additional GIDs */ + for (i = 0; i < groupcount; i++) nfsm_chain_add_32(error, &nmreq, grouplist[i]); /* And the verifier... */ @@ -1161,17 +1198,17 @@ add_cred: case RPCAUTH_KRB5P: error = nfs_gss_clnt_cred_put(req, &nmreq, mrest); if (error == ENEEDAUTH) { - gid_t grouplist[NGROUPS]; - int groupcount = NGROUPS; + int count = nmp->nm_numgrps < NGROUPS ? nmp->nm_numgrps : NGROUPS; + /* * Use sec=sys for this user */ error = 0; req->r_auth = auth_type = RPCAUTH_SYS; - (void)kauth_cred_getgroups(cred, grouplist, &groupcount); - auth_len = (((((uint32_t)groupcount - 1) > nmp->nm_numgrps) ? - nmp->nm_numgrps : (groupcount - 1)) << 2) + - 5 * NFSX_UNSIGNED; + groupcount = get_auxiliary_groups(cred, grouplist, count); + if (groupcount < 0) + return (EINVAL); + auth_len = ((uint32_t)groupcount + 5) * NFSX_UNSIGNED; authsiz = nfsm_rndup(auth_len); goto add_cred; } @@ -1939,6 +1976,27 @@ nfs_uaddr2sockaddr(const char *uaddr, struct sockaddr *addr) } +/* NFS Client debugging support */ +uint32_t nfs_debug_ctl; + +#include +#include + +void +nfs_printf(int facility, int level, const char *fmt, ...) +{ + va_list ap; + + if ((uint32_t)level > NFS_DEBUG_LEVEL) + return; + if (NFS_DEBUG_FACILITY && !((uint32_t)facility & NFS_DEBUG_FACILITY)) + return; + + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); +} + #endif /* NFSCLIENT */ /* @@ -2100,12 +2158,6 @@ nfsrv_namei( /* Check for encountering a symbolic link */ if (cnp->cn_flags & ISSYMLINK) { -#if CONFIG_VFS_FUNNEL - if ((cnp->cn_flags & FSNODELOCKHELD)) { - cnp->cn_flags &= ~FSNODELOCKHELD; - unlock_fsnode(nip->ni_dvp, NULL); - } -#endif /* CONFIG_VFS_FUNNEL */ if (cnp->cn_flags & (LOCKPARENT | WANTPARENT)) vnode_put(nip->ni_dvp); if (nip->ni_vp) { @@ -2295,7 +2347,7 @@ nfsm_chain_get_sattr( { int error = 0; uint32_t val = 0; - uint64_t val64; + uint64_t val64 = 0; struct timespec now; if (nd->nd_vers == NFS_VER2) { @@ -2486,12 +2538,13 @@ nfsrv_hang_addrlist(struct nfs_export *nx, struct user_nfs_export_args *unxa) * Seems silly to initialize every AF when most are not * used, do so on demand here */ - for (dom = domains; dom; dom = dom->dom_next) + TAILQ_FOREACH(dom, &domains, dom_entry) { if (dom->dom_family == i && dom->dom_rtattach) { dom->dom_rtattach((void **)&nx->nx_rtable[i], dom->dom_rtoffset); break; } + } if ((rnh = nx->nx_rtable[i]) == 0) { if (IS_VALID_CRED(cred)) kauth_cred_unref(&cred); diff --git a/bsd/nfs/nfs_syscalls.c b/bsd/nfs/nfs_syscalls.c index c79ab007e..ceeb803ff 100644 --- a/bsd/nfs/nfs_syscalls.c +++ b/bsd/nfs/nfs_syscalls.c @@ -168,6 +168,8 @@ SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_L SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, ""); SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, ""); SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, ""); +SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_debug_ctl, 0, ""); + #endif /* NFSCLIENT */ @@ -496,7 +498,7 @@ out: return (error); } -extern struct fileops vnops; +extern const struct fileops vnops; /* * syscall for the rpc.lockd to use to translate a NFS file handle into @@ -606,7 +608,6 @@ fhopen( proc_t p, fp = nfp; fp->f_fglob->fg_flag = fmode & FMASK; - fp->f_fglob->fg_type = DTYPE_VNODE; fp->f_fglob->fg_ops = &vnops; fp->f_fglob->fg_data = (caddr_t)vp; @@ -622,7 +623,7 @@ fhopen( proc_t p, type = F_FLOCK; if ((fmode & FNONBLOCK) == 0) type |= F_WAIT; - if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx))) { + if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) { struct vfs_context context = *vfs_context_current(); /* Modify local copy (to not damage thread copy) */ context.vc_ucred = fp->f_fglob->fg_cred; diff --git a/bsd/nfs/nfs_vfsops.c b/bsd/nfs/nfs_vfsops.c index 1c571a21f..50af3f4c3 100644 --- a/bsd/nfs/nfs_vfsops.c +++ b/bsd/nfs/nfs_vfsops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -2700,6 +2700,9 @@ mountnfs( nmp->nm_deadtimeout = 0; nmp->nm_curdeadtimeout = 0; NFS_BITMAP_SET(nmp->nm_flags, NFS_MFLAG_NOACL); + nmp->nm_realm = NULL; + nmp->nm_principal = NULL; + nmp->nm_sprinc = NULL; } mattrs = nmp->nm_mattrs; @@ -3040,6 +3043,50 @@ mountnfs( } nfsmerr_if(error); + if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_REALM)) { + xb_get_32(error, &xb, len); + if (!error && ((len < 1) || (len > MAXPATHLEN))) + error=EINVAL; + nfsmerr_if(error); + /* allocate an extra byte for a leading '@' if its not already prepended to the realm */ + MALLOC(nmp->nm_realm, char *, len+2, M_TEMP, M_WAITOK|M_ZERO); + if (!nmp->nm_realm) + error = ENOMEM; + nfsmerr_if(error); + error = xb_get_bytes(&xb, nmp->nm_realm, len, 0); + if (error == 0 && *nmp->nm_realm != '@') { + bcopy(nmp->nm_realm, &nmp->nm_realm[1], len); + nmp->nm_realm[0] = '@'; + } + } + nfsmerr_if(error); + + if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_PRINCIPAL)) { + xb_get_32(error, &xb, len); + if (!error && ((len < 1) || (len > MAXPATHLEN))) + error=EINVAL; + nfsmerr_if(error); + MALLOC(nmp->nm_principal, char *, len+1, M_TEMP, M_WAITOK|M_ZERO); + if (!nmp->nm_principal) + error = ENOMEM; + nfsmerr_if(error); + error = xb_get_bytes(&xb, nmp->nm_principal, len, 0); + } + nfsmerr_if(error); + + if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SVCPRINCIPAL)) { + xb_get_32(error, &xb, len); + if (!error && ((len < 1) || (len > MAXPATHLEN))) + error=EINVAL; + nfsmerr_if(error); + MALLOC(nmp->nm_sprinc, char *, len+1, M_TEMP, M_WAITOK|M_ZERO); + if (!nmp->nm_sprinc) + error = ENOMEM; + nfsmerr_if(error); + error = xb_get_bytes(&xb, nmp->nm_sprinc, len, 0); + } + nfsmerr_if(error); + /* * Sanity check/finalize settings. */ @@ -3109,8 +3156,6 @@ mountnfs( NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_NONAMEDATTR); NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_NOACL); NFS_BITMAP_CLR(nmp->nm_flags, NFS_MFLAG_ACLONLY); - if (IS_VALID_CRED(nmp->nm_mcred)) - kauth_cred_unref(&nmp->nm_mcred); } nfsmerr_if(error); @@ -4355,6 +4400,13 @@ nfs_mount_cleanup(struct nfsmount *nmp) nfs_fs_locations_cleanup(&nmp->nm_locations); + if (nmp->nm_realm) + FREE(nmp->nm_realm, M_TEMP); + if (nmp->nm_principal) + FREE(nmp->nm_principal, M_TEMP); + if (nmp->nm_sprinc) + FREE(nmp->nm_sprinc, M_TEMP); + if (nmp->nm_args) xb_free(nmp->nm_args); lck_mtx_destroy(&nmp->nm_lock, nfs_mount_grp); @@ -4839,7 +4891,13 @@ nfs_mountinfo_assemble(struct nfsmount *nmp, struct xdrbuf *xb) NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFLAGS); if (origargsvers < NFS_ARGSVERSION_XDR) NFS_BITMAP_SET(mattrs, NFS_MATTR_MNTFROM); - + if (nmp->nm_realm) + NFS_BITMAP_SET(mattrs, NFS_MATTR_REALM); + if (nmp->nm_principal) + NFS_BITMAP_SET(mattrs, NFS_MATTR_PRINCIPAL); + if (nmp->nm_sprinc) + NFS_BITMAP_SET(mattrs, NFS_MATTR_SVCPRINCIPAL); + /* set up current mount flags bitmap */ /* first set the flags that we will be setting - either on OR off */ NFS_BITMAP_ZERO(mflags_mask, NFS_MFLAG_BITMAP_LEN); @@ -5007,6 +5065,13 @@ nfs_mountinfo_assemble(struct nfsmount *nmp, struct xdrbuf *xb) if (origargsvers < NFS_ARGSVERSION_XDR) xb_add_string(error, &xbinfo, vfs_statfs(nmp->nm_mountp)->f_mntfromname, strlen(vfs_statfs(nmp->nm_mountp)->f_mntfromname)); /* MNTFROM */ + if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_REALM)) + xb_add_string(error, &xbinfo, nmp->nm_realm, strlen(nmp->nm_realm)); + if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_PRINCIPAL)) + xb_add_string(error, &xbinfo, nmp->nm_principal, strlen(nmp->nm_principal)); + if (NFS_BITMAP_ISSET(mattrs, NFS_MATTR_SVCPRINCIPAL)) + xb_add_string(error, &xbinfo, nmp->nm_sprinc, strlen(nmp->nm_sprinc)); + curargs_end_offset = xb_offset(&xbinfo); /* NFS_MIATTR_CUR_LOC_INDEX */ diff --git a/bsd/nfs/nfs_vnops.c b/bsd/nfs/nfs_vnops.c index eb636101d..e771822d8 100644 --- a/bsd/nfs/nfs_vnops.c +++ b/bsd/nfs/nfs_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -119,6 +119,8 @@ #include #include +#define NFS_VNOP_DBG(...) NFS_DBG(NFS_FAC_VNOP, 7, ## __VA_ARGS__) + /* * NFS vnode ops */ @@ -571,6 +573,21 @@ nfs_vnop_access( * in the cache. */ + /* + * In addition if the kernel is checking for access, KAUTH_VNODE_ACCESS + * not set, just return. At this moment do not know what the state of + * the server is and what ever we get back be it either yea or nay is + * going to be stale. Finder (Desktop services/FileURL) might hang when + * going over the wire when just asking getattrlist for the roots FSID + * since we are going to be called to see if we're authorized for + * search. Since we are returning without checking the cache and/or + * going over the wire, it makes no sense to update the cache. + * + * N.B. This is also the strategy that SMB is using. + */ + if (!(ap->a_action & KAUTH_VNODE_ACCESS)) + return (0); + /* * Convert KAUTH primitives to NFS access rights. */ @@ -1424,11 +1441,16 @@ nfsmout: cache_purge(vp); np->n_ncgen++; NFS_CHANGED_UPDATE_NC(nfsvers, np, nvap); + NFS_VNOP_DBG("Purge directory 0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(vp)); } if (NFS_CHANGED(nfsvers, np, nvap)) { FSDBG(513, -1, np, -1, np); - if (vtype == VDIR) + if (vtype == VDIR) { + NFS_VNOP_DBG("Invalidate directory 0x%llx\n", + (uint64_t)VM_KERNEL_ADDRPERM(vp)); nfs_invaldir(np); + } nfs_node_unlock(np); if (wanted) wakeup(np); @@ -1467,6 +1489,28 @@ nfsmout: /* * NFS getattr call from vfs. */ + +/* + * The attributes we support over the wire. + * We also get fsid but the vfs layer gets it out of the mount + * structure after this calling us so there's no need to return it, + * and Finder expects to call getattrlist just looking for the FSID + * with out hanging on a non responsive server. + */ +#define NFS3_SUPPORTED_VATTRS \ + (VNODE_ATTR_va_rdev | \ + VNODE_ATTR_va_nlink | \ + VNODE_ATTR_va_data_size | \ + VNODE_ATTR_va_data_alloc | \ + VNODE_ATTR_va_uid | \ + VNODE_ATTR_va_gid | \ + VNODE_ATTR_va_mode | \ + VNODE_ATTR_va_modify_time | \ + VNODE_ATTR_va_change_time | \ + VNODE_ATTR_va_access_time | \ + VNODE_ATTR_va_fileid | \ + VNODE_ATTR_va_type) + int nfs3_vnop_getattr( struct vnop_getattr_args /* { @@ -1481,6 +1525,19 @@ nfs3_vnop_getattr( struct vnode_attr *vap = ap->a_vap; dev_t rdev; + /* + * Lets don't go over the wire if we don't support any of the attributes. + * Just fall through at the VFS layer and let it cons up what it needs. + */ + /* Return the io size no matter what, since we don't go over the wire for this */ + VATTR_RETURN(vap, va_iosize, nfs_iosize); + if ((vap->va_active & NFS3_SUPPORTED_VATTRS) == 0) + return (0); + + if (VATTR_IS_ACTIVE(ap->a_vap, va_name)) + NFS_VNOP_DBG("Getting attrs for 0x%llx, vname is %s\n", + (uint64_t)VM_KERNEL_ADDRPERM(ap->a_vp), + ap->a_vp->v_name ? ap->a_vp->v_name : "empty"); error = nfs_getattr(VTONFS(ap->a_vp), &nva, ap->a_context, NGA_CACHED); if (error) return (error); @@ -1496,7 +1553,6 @@ nfs3_vnop_getattr( VATTR_RETURN(vap, va_fileid, nva.nva_fileid); VATTR_RETURN(vap, va_data_size, nva.nva_size); VATTR_RETURN(vap, va_data_alloc, nva.nva_bytes); - VATTR_RETURN(vap, va_iosize, nfs_iosize); vap->va_access_time.tv_sec = nva.nva_timesec[NFSTIME_ACCESS]; vap->va_access_time.tv_nsec = nva.nva_timensec[NFSTIME_ACCESS]; VATTR_SET_SUPPORTED(vap, va_access_time); @@ -3071,11 +3127,12 @@ nfs_write_rpc2( { struct nfsmount *nmp; int error = 0, nfsvers; - int backup, wverfset, commit, committed; + int wverfset, commit, committed; uint64_t wverf = 0, wverf2; size_t nmwsize, totalsize, tsiz, len, rlen; struct nfsreq rq, *req = &rq; uint32_t stategenid = 0, vrestart = 0, restart = 0; + uio_t uio_save = NULL; #if DIAGNOSTIC /* XXX limitation based on need to back up uio on short write */ @@ -3098,6 +3155,11 @@ nfs_write_rpc2( return (EFBIG); } + uio_save = uio_duplicate(uio); + if (uio_save == NULL) { + return (EIO); + } + while (tsiz > 0) { len = (tsiz > nmwsize) ? nmwsize : tsiz; FSDBG(537, np, uio_offset(uio), len, 0); @@ -3139,8 +3201,9 @@ nfs_write_rpc2( /* check for a short write */ if (rlen < len) { - backup = len - rlen; - uio_pushback(uio, backup); + /* Reset the uio to reflect the actual transfer */ + *uio = *uio_save; + uio_update(uio, totalsize - (tsiz - rlen)); len = rlen; } @@ -3161,13 +3224,14 @@ nfs_write_rpc2( error = EIO; break; } - backup = totalsize - tsiz; - uio_pushback(uio, backup); + *uio = *uio_save; // Reset the uio back to the start committed = NFS_WRITE_FILESYNC; wverfset = 0; tsiz = totalsize; } } + if (uio_save) + uio_free(uio_save); if (wverfset && wverfp) *wverfp = wverf; *iomodep = committed; @@ -3334,7 +3398,7 @@ nfs3_vnop_mknod( int error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0; struct timespec premtime = { 0, 0 }; u_int32_t rdev; - u_int64_t xid, dxid; + u_int64_t xid = 0, dxid; int nfsvers, gotuid, gotgid; struct nfsm_chain nmreq, nmrep; struct nfsreq rq, *req = &rq; @@ -4254,7 +4318,7 @@ nfs3_vnop_symlink( struct timespec premtime = { 0, 0 }; vnode_t newvp = NULL; int nfsvers, gotuid, gotgid; - u_int64_t xid, dxid; + u_int64_t xid = 0, dxid; nfsnode_t np = NULL; nfsnode_t dnp = VTONFS(dvp); struct nfsmount *nmp; @@ -4412,7 +4476,7 @@ nfs3_vnop_mkdir( int error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0; struct timespec premtime = { 0, 0 }; int nfsvers, gotuid, gotgid; - u_int64_t xid, dxid; + u_int64_t xid= 0, dxid; fhandle_t fh; struct nfsm_chain nmreq, nmrep; struct nfsreq rq, *req = &rq; @@ -6597,18 +6661,22 @@ nfs_vnop_ioctl( { vfs_context_t ctx = ap->a_context; vnode_t vp = ap->a_vp; + struct nfsmount *mp = VTONMP(vp); int error = ENOTTY; + if (mp == NULL) + return (ENXIO); + switch (ap->a_command) { case F_FULLFSYNC: if (vnode_vfsisrdonly(vp)) return (EROFS); - if (!VTONMP(vp)) - return (ENXIO); error = nfs_flush(VTONFS(vp), MNT_WAIT, vfs_context_thread(ctx), 0); break; - + case NFS_FSCTL_DESTROY_CRED: + error = nfs_gss_clnt_ctx_destroy(mp, vfs_context_ucred(ctx)); + break; } return (error); @@ -6829,7 +6897,7 @@ cancel: * erroneous. */ char nfs_pageouterrorhandler(int); -enum actiontype {NOACTION, DUMP, DUMPANDLOG, RETRY, RETRYWITHSLEEP, SEVER}; +enum actiontype {NOACTION, DUMP, DUMPANDLOG, RETRY, SEVER}; #define NFS_ELAST 88 static u_char errorcount[NFS_ELAST+1]; /* better be zeros when initialized */ static const char errortooutcome[NFS_ELAST+1] = { @@ -7372,11 +7440,6 @@ cancel: case RETRY: abortflags = UPL_ABORT_FREE_ON_EMPTY; break; - case RETRYWITHSLEEP: - abortflags = UPL_ABORT_FREE_ON_EMPTY; - /* pri unused. PSOCK for placeholder. */ - tsleep(&lbolt, PSOCK, "nfspageout", 0); - break; case SEVER: /* not implemented */ default: NP(np, "nfs_pageout: action %d not expected", action); diff --git a/bsd/nfs/nfsmount.h b/bsd/nfs/nfsmount.h index 574b5a70e..4e9e88e6c 100644 --- a/bsd/nfs/nfsmount.h +++ b/bsd/nfs/nfsmount.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -199,6 +199,7 @@ struct nfs_socket_search { uint32_t nss_protocol; /* RPC protocol */ uint32_t nss_version; /* RPC protocol version */ uint32_t nss_flags; /* (see below) */ + int nss_addrcnt; /* Number addresses to try or left */ int nss_timeo; /* how long we are willing to wait */ int nss_error; /* best error we've gotten so far */ }; @@ -240,7 +241,7 @@ struct nfs_client_id { int nci_idlen; /* length of client id buffer */ }; TAILQ_HEAD(nfsclientidlist, nfs_client_id); -__private_extern__ struct nfsclientidlist nfsclientids; +extern struct nfsclientidlist nfsclientids; /* * Mount structure. @@ -254,10 +255,13 @@ struct nfsmount { uint32_t nm_mflags_mask[NFS_MFLAG_BITMAP_LEN]; /* mount flags mask in mount args */ uint32_t nm_mflags[NFS_MFLAG_BITMAP_LEN]; /* mount flags in mount args */ uint32_t nm_flags[NFS_MFLAG_BITMAP_LEN]; /* current mount flags (soft, intr, etc...) */ + char * nm_realm; /* Kerberos realm to use */ + char * nm_principal; /* GSS principal to use on initial mount */ + char * nm_sprinc; /* Kerberos principal of the server */ int nm_state; /* Internal state flags */ int nm_vers; /* NFS version */ struct nfs_funcs *nm_funcs; /* version-specific functions */ - kauth_cred_t nm_mcred; /* credential used for the mount (v4) */ + kauth_cred_t nm_mcred; /* credential used for the mount */ mount_t nm_mountp; /* VFS structure for this filesystem */ nfsnode_t nm_dnp; /* root directory nfsnode pointer */ struct nfs_fs_locations nm_locations; /* file system locations */ diff --git a/bsd/nfs/nfsnode.h b/bsd/nfs/nfsnode.h index adf50cc52..94ddeef1d 100644 --- a/bsd/nfs/nfsnode.h +++ b/bsd/nfs/nfsnode.h @@ -197,11 +197,11 @@ TAILQ_HEAD(nfsbuffreehead, nfsbuf); #define NFSNOLIST ((void*)0xdeadbeef) -__private_extern__ lck_mtx_t *nfs_buf_mutex; -__private_extern__ int nfsbufcnt, nfsbufmin, nfsbufmax, nfsbufmetacnt, nfsbufmetamax; -__private_extern__ int nfsbuffreecnt, nfsbuffreemetacnt, nfsbufdelwricnt, nfsneedbuffer; -__private_extern__ int nfs_nbdwrite; -__private_extern__ struct nfsbuffreehead nfsbuffree, nfsbufdelwri; +extern lck_mtx_t *nfs_buf_mutex; +extern int nfsbufcnt, nfsbufmin, nfsbufmax, nfsbufmetacnt, nfsbufmetamax; +extern int nfsbuffreecnt, nfsbuffreemetacnt, nfsbufdelwricnt, nfsneedbuffer; +extern int nfs_nbdwrite; +extern struct nfsbuffreehead nfsbuffree, nfsbufdelwri; #ifdef NFSBUFDEBUG #define NFSBUFCNTCHK() \ @@ -398,8 +398,8 @@ struct nfs_vattr { } while (0) -__private_extern__ lck_grp_t *nfs_open_grp; -__private_extern__ uint32_t nfs_open_owner_seqnum, nfs_lock_owner_seqnum; +extern lck_grp_t *nfs_open_grp; +extern uint32_t nfs_open_owner_seqnum, nfs_lock_owner_seqnum; /* * NFSv4 open owner structure - one per cred per mount @@ -759,7 +759,7 @@ struct nfsnode { #define NFSTOV(np) ((np)->n_vnode) /* nfsnode hash table mutex */ -__private_extern__ lck_mtx_t *nfs_node_hash_mutex; +extern lck_mtx_t *nfs_node_hash_mutex; /* * printf-like helper macro that also outputs node name. @@ -780,9 +780,9 @@ struct nfsiod { }; TAILQ_HEAD(nfsiodlist, nfsiod); TAILQ_HEAD(nfsiodmountlist, nfsmount); -__private_extern__ struct nfsiodlist nfsiodfree, nfsiodwork; -__private_extern__ struct nfsiodmountlist nfsiodmounts; -__private_extern__ lck_mtx_t *nfsiod_mutex; +extern struct nfsiodlist nfsiodfree, nfsiodwork; +extern struct nfsiodmountlist nfsiodmounts; +extern lck_mtx_t *nfsiod_mutex; #if defined(KERNEL) diff --git a/bsd/nfs/nfsproto.h b/bsd/nfs/nfsproto.h index ec6bc9311..f081170df 100644 --- a/bsd/nfs/nfsproto.h +++ b/bsd/nfs/nfsproto.h @@ -358,9 +358,9 @@ typedef enum { NFNON=0, NFREG=1, NFDIR=2, NFBLK=3, NFCHR=4, NFLNK=5, ((uint32_t*)(B))[__i] = 0; \ } while (0) -__private_extern__ uint32_t nfs_fs_attr_bitmap[NFS_ATTR_BITMAP_LEN]; -__private_extern__ uint32_t nfs_object_attr_bitmap[NFS_ATTR_BITMAP_LEN]; -__private_extern__ uint32_t nfs_getattr_bitmap[NFS_ATTR_BITMAP_LEN]; +extern uint32_t nfs_fs_attr_bitmap[NFS_ATTR_BITMAP_LEN]; +extern uint32_t nfs_object_attr_bitmap[NFS_ATTR_BITMAP_LEN]; +extern uint32_t nfs_getattr_bitmap[NFS_ATTR_BITMAP_LEN]; #define NFS_CLEAR_ATTRIBUTES(A) NFS_BITMAP_ZERO((A), NFS_ATTR_BITMAP_LEN) #define NFS_COPY_ATTRIBUTES(SRC, DST) \ diff --git a/bsd/nfs/nfsrvcache.h b/bsd/nfs/nfsrvcache.h index 06bc9baeb..5d3e311b5 100644 --- a/bsd/nfs/nfsrvcache.h +++ b/bsd/nfs/nfsrvcache.h @@ -125,8 +125,8 @@ struct nfsrvcache { #define RC_INETADDR 0x20 #define RC_NAM 0x40 -__private_extern__ lck_grp_t *nfsrv_reqcache_lck_grp; -__private_extern__ lck_mtx_t *nfsrv_reqcache_mutex; +extern lck_grp_t *nfsrv_reqcache_lck_grp; +extern lck_mtx_t *nfsrv_reqcache_mutex; #endif /* __APPLE_API_PRIVATE */ #endif /* _NFS_NFSRVCACHE_H_ */ diff --git a/bsd/security/Makefile b/bsd/security/Makefile index 92974f6e2..875c99d6a 100644 --- a/bsd/security/Makefile +++ b/bsd/security/Makefile @@ -3,34 +3,14 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - include $(MakeInc_cmd) include $(MakeInc_def) INSTINC_SUBDIRS = \ audit -INSTINC_SUBDIRS_I386 = \ - -INSTINC_SUBDIRS_X86_64 = \ - -INSTINC_SUBDIRS_ARM = \ - EXPINC_SUBDIRS = \ audit -EXPINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS_X86_64 = \ - -EXPINC_SUBDIRS_ARM = \ - -SETUP_SUBDIRS = \ - -COMP_SUBDIRS = \ - -INST_SUBDIRS = \ - - include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/security/audit/Makefile b/bsd/security/audit/Makefile index ac552f60d..e2cf77f5a 100644 --- a/bsd/security/audit/Makefile +++ b/bsd/security/audit/Makefile @@ -3,31 +3,12 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -INSTINC_SUBDIRS_X86_64 = \ - -INSTINC_SUBDIRS_ARM = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS_X86_64 = \ - -EXPINC_SUBDIRS_ARM = \ - DATAFILES = \ audit_ioctl.h -PRIVATE_DATAFILES = - INSTALL_MI_LIST = ${DATAFILES} INSTALL_MI_DIR = security/audit @@ -38,8 +19,6 @@ EXPORT_MI_LIST = ${DATAFILES} audit.h audit_bsd.h audit_private.h EXPORT_MI_DIR = security/audit -INSTALL_MI_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} - include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/security/audit/audit_arg.c b/bsd/security/audit/audit_arg.c index eb6d5d434..19ba08a57 100644 --- a/bsd/security/audit/audit_arg.c +++ b/bsd/security/audit/audit_arg.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 1999-2009 Apple Inc. + * Copyright (c) 1999-2012 Apple Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -594,7 +594,7 @@ audit_arg_file(struct kaudit_record *ar, __unused proc_t p, struct sockaddr_in *sin; struct sockaddr_in6 *sin6; - switch (fp->f_fglob->fg_type) { + switch (FILEGLOB_DTYPE(fp->f_fglob)) { case DTYPE_VNODE: /* case DTYPE_FIFO: */ audit_arg_vnpath_withref(ar, @@ -603,15 +603,13 @@ audit_arg_file(struct kaudit_record *ar, __unused proc_t p, case DTYPE_SOCKET: so = (struct socket *)fp->f_fglob->fg_data; - if (INP_CHECK_SOCKAF(so, PF_INET)) { + if (SOCK_CHECK_DOM(so, PF_INET)) { if (so->so_pcb == NULL) break; ar->k_ar.ar_arg_sockinfo.sai_type = so->so_type; - ar->k_ar.ar_arg_sockinfo.sai_domain = - INP_SOCKAF(so); - ar->k_ar.ar_arg_sockinfo.sai_protocol = - so->so_proto->pr_protocol; + ar->k_ar.ar_arg_sockinfo.sai_domain = SOCK_DOM(so); + ar->k_ar.ar_arg_sockinfo.sai_protocol = SOCK_PROTO(so); pcb = (struct inpcb *)so->so_pcb; sin = (struct sockaddr_in *) &ar->k_ar.ar_arg_sockinfo.sai_faddr; @@ -623,15 +621,13 @@ audit_arg_file(struct kaudit_record *ar, __unused proc_t p, sin->sin_port = pcb->inp_lport; ARG_SET_VALID(ar, ARG_SOCKINFO); } - if (INP_CHECK_SOCKAF(so, PF_INET6)) { + if (SOCK_CHECK_DOM(so, PF_INET6)) { if (so->so_pcb == NULL) break; ar->k_ar.ar_arg_sockinfo.sai_type = so->so_type; - ar->k_ar.ar_arg_sockinfo.sai_domain = - INP_SOCKAF(so); - ar->k_ar.ar_arg_sockinfo.sai_protocol = - so->so_proto->pr_protocol; + ar->k_ar.ar_arg_sockinfo.sai_domain = SOCK_DOM(so); + ar->k_ar.ar_arg_sockinfo.sai_protocol = SOCK_PROTO(so); pcb = (struct inpcb *)so->so_pcb; sin6 = (struct sockaddr_in6 *) &ar->k_ar.ar_arg_sockinfo.sai_faddr; diff --git a/bsd/security/audit/audit_pipe.c b/bsd/security/audit/audit_pipe.c index 943cac431..f2efbbe08 100644 --- a/bsd/security/audit/audit_pipe.c +++ b/bsd/security/audit/audit_pipe.c @@ -646,6 +646,7 @@ audit_pipe_free(struct audit_pipe *ap) audit_pipe_flush(ap); cv_destroy(&ap->ap_cv); AUDIT_PIPE_SX_LOCK_DESTROY(ap); + AUDIT_PIPE_UNLOCK(ap); AUDIT_PIPE_LOCK_DESTROY(ap); #ifndef __APPLE__ knlist_destroy(&ap->ap_selinfo.si_note); @@ -1006,7 +1007,7 @@ audit_pipe_read(dev_t dev, struct uio *uio, __unused int flag) KASSERT(ape->ape_record_len > ap->ap_qoffset, ("audit_pipe_read: record_len > qoffset (1)")); - toread = MIN(ape->ape_record_len - ap->ap_qoffset, + toread = MIN((int)(ape->ape_record_len - ap->ap_qoffset), uio_resid(uio)); AUDIT_PIPE_UNLOCK(ap); error = uiomove((char *)ape->ape_record + ap->ap_qoffset, diff --git a/bsd/security/audit/audit_session.c b/bsd/security/audit/audit_session.c index 9d26833f5..ab38cc22a 100644 --- a/bsd/security/audit/audit_session.c +++ b/bsd/security/audit/audit_session.c @@ -1660,6 +1660,7 @@ audit_sdev_free(struct audit_sdev *asdev) audit_sdev_flush(asdev); cv_destroy(&asdev->asdev_cv); AUDIT_SDEV_SX_LOCK_DESTROY(asdev); + AUDIT_SDEV_UNLOCK(asdev); AUDIT_SDEV_LOCK_DESTROY(asdev); TAILQ_REMOVE(&audit_sdev_list, asdev, asdev_list); @@ -1911,7 +1912,7 @@ audit_sdev_read(dev_t dev, struct uio *uio, __unused int flag) KASSERT(ase->ase_record_len > asdev->asdev_qoffset, ("audit_sdev_read: record_len > qoffset (1)")); - toread = MIN(ase->ase_record_len - asdev->asdev_qoffset, + toread = MIN((int)(ase->ase_record_len - asdev->asdev_qoffset), uio_resid(uio)); AUDIT_SDEV_UNLOCK(asdev); error = uiomove((char *) ase->ase_record + asdev->asdev_qoffset, diff --git a/bsd/security/audit/audit_syscalls.c b/bsd/security/audit/audit_syscalls.c index 359717f7b..a99464aef 100644 --- a/bsd/security/audit/audit_syscalls.c +++ b/bsd/security/audit/audit_syscalls.c @@ -825,7 +825,12 @@ int getaudit_addr(proc_t p, struct getaudit_addr_args *uap, __unused int32_t *retval) { +#if CONFIG_MACF + int error = mac_proc_check_getaudit(p); + if (error) + return (error); +#endif /* CONFIG_MACF */ WARN_IF_AINFO_ADDR_CHANGED(uap->length, sizeof(auditinfo_addr_t), "getaudit_addr(2)", "auditinfo_addr_t"); diff --git a/bsd/sys/Makefile b/bsd/sys/Makefile index 211d741b9..30d592d47 100644 --- a/bsd/sys/Makefile +++ b/bsd/sys/Makefile @@ -7,9 +7,11 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = +INSTINC_SUBDIRS = \ + _types -EXPINC_SUBDIRS = +EXPINC_SUBDIRS = \ + _types # Installs header file for user level - # $(DSTROOT)/System/Library/Frameworks/System.framework/Headers @@ -46,16 +48,17 @@ PRIVATE_DATAFILES = \ fsctl.h \ fsgetpath.h \ fslog.h \ + guarded.h \ imgsrc.h \ ipcs.h \ kas_info.h \ + kern_overrides.h \ shm_internal.h \ spawn_internal.h \ tree.h \ ux_exception.h \ - proc_info.h \ process_policy.h \ - vnioctl.h \ + proc_uuid_policy.h \ priv.h # Installs header file for kernel extensions - @@ -100,6 +103,7 @@ PRIVATE_KERNELFILES = \ mach_swapon.h \ msgbuf.h \ eventvar.h \ + pthread_shims.h \ quota.h \ sem_internal.h \ shm_internal.h \ @@ -122,7 +126,7 @@ INSTALL_MI_DIR = sys EXPORT_MI_LIST = ${KERNELFILES} ${PRIVATE_KERNELFILES} linker_set.h bsdtask_info.h pthread_internal.h filedesc.h pipe.h resourcevar.h semaphore.h \ vnode_internal.h proc_internal.h file_internal.h mount_internal.h \ - uio_internal.h tree.h munge.h + uio_internal.h tree.h munge.h kern_tests.h EXPORT_MI_GEN_LIST = syscall.h sysproto.h diff --git a/bsd/sys/_structs.h b/bsd/sys/_structs.h index 102101f0b..99e4c3250 100644 --- a/bsd/sys/_structs.h +++ b/bsd/sys/_structs.h @@ -65,99 +65,32 @@ #ifdef __need_struct_sigaltstack #undef __need_struct_sigaltstack -/* Structure used in sigaltstack call. */ -#ifndef _STRUCT_SIGALTSTACK -#if __DARWIN_UNIX03 -#define _STRUCT_SIGALTSTACK struct __darwin_sigaltstack -#else /* !__DARWIN_UNIX03 */ -#define _STRUCT_SIGALTSTACK struct sigaltstack -#endif /* __DARWIN_UNIX03 */ -_STRUCT_SIGALTSTACK -{ - void *ss_sp; /* signal stack base */ - __darwin_size_t ss_size; /* signal stack length */ - int ss_flags; /* SA_DISABLE and/or SA_ONSTACK */ -}; -#endif /* _STRUCT_SIGALTSTACK */ +#include #endif /* __need_struct_sigaltstack */ #ifdef __need_struct_timespec #undef __need_struct_timespec -#ifndef _STRUCT_TIMESPEC -#define _STRUCT_TIMESPEC struct timespec -_STRUCT_TIMESPEC -{ - __darwin_time_t tv_sec; - long tv_nsec; -}; -#endif /* _STRUCT_TIMESPEC */ +#include #endif /* __need_struct_timespec */ #ifdef __need_struct_timeval #undef __need_struct_timeval -#ifndef _STRUCT_TIMEVAL -#define _STRUCT_TIMEVAL struct timeval -_STRUCT_TIMEVAL -{ - __darwin_time_t tv_sec; /* seconds */ - __darwin_suseconds_t tv_usec; /* and microseconds */ -}; -#endif /* _STRUCT_TIMEVAL */ +#include #endif /* __need_struct_timeval */ #ifdef __need_struct_timeval32 #undef __need_struct_timeval32 -#ifndef _STRUCT_TIMEVAL32 -#define _STRUCT_TIMEVAL32 struct timeval32 -_STRUCT_TIMEVAL32 -{ - __int32_t tv_sec; /* seconds */ - __int32_t tv_usec; /* and microseconds */ -}; -#endif /* _STRUCT_TIMEVAL32 */ +#include #endif /* __need_struct_timeval32 */ #ifdef __need_struct_ucontext #undef __need_struct_ucontext -#ifndef _STRUCT_UCONTEXT -#if __DARWIN_UNIX03 -#define _STRUCT_UCONTEXT struct __darwin_ucontext -#else /* !__DARWIN_UNIX03 */ -#define _STRUCT_UCONTEXT struct ucontext -#endif /* __DARWIN_UNIX03 */ -_STRUCT_UCONTEXT -{ - int uc_onstack; - __darwin_sigset_t uc_sigmask; /* signal mask used by this context */ - _STRUCT_SIGALTSTACK uc_stack; /* stack used by this context */ - _STRUCT_UCONTEXT *uc_link; /* pointer to resuming context */ - __darwin_size_t uc_mcsize; /* size of the machine context passed in */ - _STRUCT_MCONTEXT *uc_mcontext; /* pointer to machine specific context */ -#ifdef _XOPEN_SOURCE - _STRUCT_MCONTEXT __mcontext_data; -#endif /* _XOPEN_SOURCE */ -}; -#endif /* _STRUCT_UCONTEXT */ +#include #endif /* __need_struct_ucontext */ #ifdef __need_struct_ucontext64 #undef __need_struct_ucontext64 -#ifndef _STRUCT_UCONTEXT64 -#if __DARWIN_UNIX03 -#define _STRUCT_UCONTEXT64 struct __darwin_ucontext64 -#else /* !__DARWIN_UNIX03 */ -#define _STRUCT_UCONTEXT64 struct ucontext64 -#endif /* __DARWIN_UNIX03 */ -_STRUCT_UCONTEXT64 -{ - int uc_onstack; - __darwin_sigset_t uc_sigmask; /* signal mask used by this context */ - _STRUCT_SIGALTSTACK uc_stack; /* stack used by this context */ - _STRUCT_UCONTEXT64 *uc_link; /* pointer to resuming context */ - __darwin_size_t uc_mcsize; /* size of the machine context passed in */ - _STRUCT_MCONTEXT64 *uc_mcontext64; /* pointer to machine specific context */ -}; -#endif /* _STRUCT_UCONTEXT64 */ +#include #endif /* __need_struct_ucontext64 */ #ifdef KERNEL @@ -167,173 +100,59 @@ _STRUCT_UCONTEXT64 */ #ifdef __need_struct_user_timespec #undef __need_struct_user_timespec -#ifndef _STRUCT_USER_TIMESPEC -#define _STRUCT_USER_TIMESPEC struct user_timespec -_STRUCT_USER_TIMESPEC -{ - user_time_t tv_sec; /* seconds */ - user_long_t tv_nsec; /* and nanoseconds */ -}; -#endif /* _STRUCT_USER_TIMESPEC */ +#include #endif /* __need_struct_user_timespec */ #ifdef __need_struct_user64_timespec #undef __need_struct_user64_timespec -#ifndef _STRUCT_USER64_TIMESPEC -#define _STRUCT_USER64_TIMESPEC struct user64_timespec -_STRUCT_USER64_TIMESPEC -{ - user64_time_t tv_sec; /* seconds */ - user64_long_t tv_nsec; /* and nanoseconds */ -}; -#endif /* _STRUCT_USER64_TIMESPEC */ +#include #endif /* __need_struct_user64_timespec */ #ifdef __need_struct_user32_timespec #undef __need_struct_user32_timespec -#ifndef _STRUCT_USER32_TIMESPEC -#define _STRUCT_USER32_TIMESPEC struct user32_timespec -_STRUCT_USER32_TIMESPEC -{ - user32_time_t tv_sec; /* seconds */ - user32_long_t tv_nsec; /* and nanoseconds */ -}; -#endif /* _STRUCT_USER32_TIMESPEC */ +#include #endif /* __need_struct_user32_timespec */ #ifdef __need_struct_user_timeval #undef __need_struct_user_timeval -#ifndef _STRUCT_USER_TIMEVAL -#define _STRUCT_USER_TIMEVAL struct user_timeval -_STRUCT_USER_TIMEVAL -{ - user_time_t tv_sec; /* seconds */ - __int32_t tv_usec; /* and microseconds */ -}; -#endif /* _STRUCT_USER_TIMEVAL */ +#include #endif /* __need_struct_user_timeval */ #ifdef __need_struct_user64_timeval #undef __need_struct_user64_timeval -#ifndef _STRUCT_USER64_TIMEVAL -#define _STRUCT_USER64_TIMEVAL struct user64_timeval -_STRUCT_USER64_TIMEVAL -{ - user64_time_t tv_sec; /* seconds */ - __int32_t tv_usec; /* and microseconds */ -}; -#endif /* _STRUCT_USER64_TIMEVAL */ +#include #endif /* __need_struct_user64_timeval */ #ifdef __need_struct_user32_timeval #undef __need_struct_user32_timeval -#ifndef _STRUCT_USER32_TIMEVAL -#define _STRUCT_USER32_TIMEVAL struct user32_timeval -_STRUCT_USER32_TIMEVAL -{ - user32_time_t tv_sec; /* seconds */ - __int32_t tv_usec; /* and microseconds */ -}; -#endif /* _STRUCT_USER32_TIMEVAL */ +#include #endif /* __need_struct_user32_timeval */ #ifdef __need_struct_user64_itimerval #undef __need_struct_user64_itimerval -#ifndef _STRUCT_USER64_ITIMERVAL -#define _STRUCT_USER64_ITIMERVAL struct user64_itimerval -_STRUCT_USER64_ITIMERVAL -{ - _STRUCT_USER64_TIMEVAL it_interval; /* timer interval */ - _STRUCT_USER64_TIMEVAL it_value; /* current value */ -}; -#endif /* _STRUCT_USER64_TIMEVAL */ +#include #endif /* __need_struct_user64_itimerval */ #ifdef __need_struct_user32_itimerval #undef __need_struct_user32_itimerval -#ifndef _STRUCT_USER32_ITIMERVAL -#define _STRUCT_USER32_ITIMERVAL struct user32_itimerval -_STRUCT_USER32_ITIMERVAL -{ - _STRUCT_USER32_TIMEVAL it_interval; /* timer interval */ - _STRUCT_USER32_TIMEVAL it_value; /* current value */ -}; -#endif /* _STRUCT_USER32_TIMEVAL */ +#include #endif /* __need_struct_user32_itimerval */ #endif /* KERNEL */ #ifdef __need_fd_set #undef __need_fd_set -#ifndef _FD_SET -#define _FD_SET -/* - * Select uses bit masks of file descriptors in longs. These macros - * manipulate such bit fields (the filesystem macros use chars). The - * extra protection here is to permit application redefinition above - * the default size. - */ -#ifdef FD_SETSIZE -#define __DARWIN_FD_SETSIZE FD_SETSIZE -#else /* !FD_SETSIZE */ -#define __DARWIN_FD_SETSIZE 1024 -#endif /* FD_SETSIZE */ -#define __DARWIN_NBBY 8 /* bits in a byte */ -#define __DARWIN_NFDBITS (sizeof(__int32_t) * __DARWIN_NBBY) /* bits per mask */ -#define __DARWIN_howmany(x, y) ((((x) % (y)) == 0) ? ((x) / (y)) : (((x) / (y)) + 1)) /* # y's == x bits? */ - -__BEGIN_DECLS -typedef struct fd_set { - __int32_t fds_bits[__DARWIN_howmany(__DARWIN_FD_SETSIZE, __DARWIN_NFDBITS)]; -} fd_set; -__END_DECLS - -/* This inline avoids argument side-effect issues with FD_ISSET() */ -static __inline int -__darwin_fd_isset(int _n, const struct fd_set *_p) -{ - return (_p->fds_bits[_n/__DARWIN_NFDBITS] & (1<<(_n % __DARWIN_NFDBITS))); -} - -#define __DARWIN_FD_SET(n, p) do { int __fd = (n); ((p)->fds_bits[__fd/__DARWIN_NFDBITS] |= (1<<(__fd % __DARWIN_NFDBITS))); } while(0) -#define __DARWIN_FD_CLR(n, p) do { int __fd = (n); ((p)->fds_bits[__fd/__DARWIN_NFDBITS] &= ~(1<<(__fd % __DARWIN_NFDBITS))); } while(0) -#define __DARWIN_FD_ISSET(n, p) __darwin_fd_isset((n), (p)) - -#if __GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 3 -/* - * Use the built-in bzero function instead of the library version so that - * we do not pollute the namespace or introduce prototype warnings. - */ -#define __DARWIN_FD_ZERO(p) __builtin_bzero(p, sizeof(*(p))) -#else -#define __DARWIN_FD_ZERO(p) bzero(p, sizeof(*(p))) -#endif - -#define __DARWIN_FD_COPY(f, t) bcopy(f, t, sizeof(*(f))) -#endif /* _FD_SET */ +#include #endif /* __need_fd_set */ #ifdef __need_stack_t #undef __need_stack_t -#ifndef _STACK_T -#define _STACK_T -typedef _STRUCT_SIGALTSTACK stack_t; /* [???] signal stack */ -#endif /* _STACK_T */ #endif /* __need_stack_t */ #ifdef __need_ucontext_t #undef __need_ucontext_t -/* user context */ -#ifndef _UCONTEXT_T -#define _UCONTEXT_T -typedef _STRUCT_UCONTEXT ucontext_t; /* [???] user context */ -#endif /* _UCONTEXT_T */ #endif /* __need_ucontext_t */ #ifdef __need_ucontext64_t #undef __need_ucontext64_t -#ifndef _UCONTEXT64_T -#define _UCONTEXT64_T -typedef _STRUCT_UCONTEXT64 ucontext64_t; /* [???] user context */ -#endif /* _UCONTEXT64_T */ #endif /* __need_ucontext64_t */ diff --git a/bsd/sys/_types/Makefile b/bsd/sys/_types/Makefile new file mode 100644 index 000000000..e72f4a682 --- /dev/null +++ b/bsd/sys/_types/Makefile @@ -0,0 +1,151 @@ +export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd +export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def +export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule +export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + + +include $(MakeInc_cmd) +include $(MakeInc_def) + +INSTINC_SUBDIRS = + +EXPINC_SUBDIRS = + +# Installs header file for user level - +# $(DSTROOT)/System/Library/Frameworks/System.framework/Headers +# $(DSTROOT)/System/Library/Frameworks/System.framework/PrivateHeaders +# $(DSTROOT)/usr/include/ +DATAFILES = \ + ___offsetof.h \ + _blkcnt_t.h \ + _blksize_t.h \ + _clock_t.h \ + _ct_rune_t.h \ + _dev_t.h \ + _errno_t.h \ + _fd_clr.h \ + _fd_copy.h \ + _fd_def.h \ + _fd_isset.h \ + _fd_set.h \ + _fd_setsize.h \ + _fd_zero.h \ + _filesec_t.h \ + _fsblkcnt_t.h \ + _fsfilcnt_t.h \ + _gid_t.h \ + _guid_t.h \ + _id_t.h \ + _in_addr_t.h \ + _in_port_t.h \ + _ino64_t.h \ + _ino_t.h \ + _int16_t.h \ + _int32_t.h \ + _int64_t.h \ + _int8_t.h \ + _intptr_t.h \ + _iovec_t.h \ + _key_t.h \ + _mach_port_t.h \ + _mbstate_t.h \ + _mode_t.h \ + _nlink_t.h \ + _null.h \ + _o_dsync.h \ + _o_sync.h \ + _off_t.h \ + _os_inline.h \ + _pid_t.h \ + _posix_vdisable.h \ + _pthread_attr_t.h \ + _pthread_cond_t.h \ + _pthread_condattr_t.h \ + _pthread_key_t.h \ + _pthread_mutex_t.h \ + _pthread_mutexattr_t.h \ + _pthread_once_t.h \ + _pthread_rwlock_t.h \ + _pthread_rwlockattr_t.h \ + _pthread_t.h \ + _ptrdiff_t.h \ + _rsize_t.h \ + _rune_t.h \ + _s_ifmt.h \ + _sa_family_t.h \ + _seek_set.h \ + _sigaltstack.h \ + _sigset_t.h \ + _size_t.h \ + _socklen_t.h \ + _ssize_t.h \ + _suseconds_t.h \ + _time_t.h \ + _timespec.h \ + _timeval.h \ + _timeval32.h \ + _ucontext.h \ + _ucontext64.h \ + _uid_t.h \ + _uintptr_t.h \ + _useconds_t.h \ + _uuid_t.h \ + _va_list.h \ + _wchar_t.h \ + _wint_t.h \ + + + +# Installs header file for Apple internal use in user level - +# $(DSTROOT)/System/Library/Frameworks/System.framework/PrivateHeaders +PRIVATE_DATAFILES = \ + + +# Installs header file for kernel extensions - +# $(DSTROOT)/System/Library/Frameworks/Kernel.framework/Headers +# $(DSTROOT)/System/Library/Frameworks/Kernel.framework/PrivateHeaders +KERNELFILES = ${DATAFILES} \ + _user_timespec.h \ + _user64_timespec.h \ + _user32_timespec.h \ + _user_timeval.h \ + _user64_timeval.h \ + _user32_timeval.h \ + _user64_itimerval.h \ + _user32_itimerval.h \ + + +# Installs header file for Apple internal use for kernel extensions - +# $(DSTROOT)/System/Library/Frameworks/Kernel.framework/PrivateHeaders +PRIVATE_KERNELFILES = \ + + + +# /System/Library/Frameworks/System.framework/Headers and /usr/include +INSTALL_MI_LIST = ${DATAFILES} + +INSTALL_MI_GEN_LIST = + +INSTALL_MI_DIR = sys/_types + +EXPORT_MI_LIST = ${KERNELFILES} ${PRIVATE_KERNELFILES} + +EXPORT_MI_GEN_LIST = + +EXPORT_MI_DIR = sys/_types + +# /System/Library/Frameworks/System.framework/PrivateHeaders +INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} + +# /System/Library/Frameworks/Kernel.framework/PrivateHeaders +INSTALL_KF_MI_LCL_LIST = ${KERNELFILES} ${PRIVATE_KERNELFILES} + +INSTALL_KF_MI_LCL_GEN_LIST = + +# /System/Library/Frameworks/Kernel.framework/Headers +INSTALL_KF_MI_LIST = ${KERNELFILES} + +INSTALL_KF_MI_GEN_LIST = + +include $(MakeInc_rule) +include $(MakeInc_dir) diff --git a/bsd/sys/_types/___offsetof.h b/bsd/sys/_types/___offsetof.h new file mode 100644 index 000000000..852dc98af --- /dev/null +++ b/bsd/sys/_types/___offsetof.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef __offsetof +#define __offsetof(type, field) ((size_t)(&((type *)0)->field)) +#endif /* __offsetof */ diff --git a/bsd/sys/_types/_blkcnt_t.h b/bsd/sys/_types/_blkcnt_t.h new file mode 100644 index 000000000..c8a9d3963 --- /dev/null +++ b/bsd/sys/_types/_blkcnt_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _BLKCNT_T +#define _BLKCNT_T +typedef __darwin_blkcnt_t blkcnt_t; +#endif /* _BLKCNT_T */ diff --git a/bsd/sys/_types/_blksize_t.h b/bsd/sys/_types/_blksize_t.h new file mode 100644 index 000000000..de50f2ca3 --- /dev/null +++ b/bsd/sys/_types/_blksize_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _BLKSIZE_T +#define _BLKSIZE_T +typedef __darwin_blksize_t blksize_t; +#endif /* _BLKSIZE_T */ diff --git a/bsd/sys/_types/_clock_t.h b/bsd/sys/_types/_clock_t.h new file mode 100644 index 000000000..d58801cde --- /dev/null +++ b/bsd/sys/_types/_clock_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _CLOCK_T +#define _CLOCK_T +typedef __darwin_clock_t clock_t; +#endif /* _CLOCK_T */ diff --git a/bsd/sys/_types/_ct_rune_t.h b/bsd/sys/_types/_ct_rune_t.h new file mode 100644 index 000000000..116174cad --- /dev/null +++ b/bsd/sys/_types/_ct_rune_t.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _CT_RUNE_T +#define _CT_RUNE_T +typedef __darwin_ct_rune_t ct_rune_t; +#endif /* _CT_RUNE_T */ diff --git a/bsd/sys/_types/_dev_t.h b/bsd/sys/_types/_dev_t.h new file mode 100644 index 000000000..cf6d3ad22 --- /dev/null +++ b/bsd/sys/_types/_dev_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _DEV_T +#define _DEV_T +typedef __darwin_dev_t dev_t; /* device number */ +#endif /* _DEV_T */ diff --git a/bsd/sys/_types/_errno_t.h b/bsd/sys/_types/_errno_t.h new file mode 100644 index 000000000..bb2b2d061 --- /dev/null +++ b/bsd/sys/_types/_errno_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _ERRNO_T +#define _ERRNO_T +typedef int errno_t; +#endif /* _ERRNO_T */ diff --git a/osfmk/chud/i386/chud_cpu_asm.s b/bsd/sys/_types/_fd_clr.h similarity index 90% rename from osfmk/chud/i386/chud_cpu_asm.s rename to bsd/sys/_types/_fd_clr.h index aed4310e5..52351ea2a 100644 --- a/osfmk/chud/i386/chud_cpu_asm.s +++ b/bsd/sys/_types/_fd_clr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,8 +25,6 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ - -#define ASSEMBLER -#include -#include - +#ifndef FD_CLR +#define FD_CLR(n, p) __DARWIN_FD_CLR(n, p) +#endif /* FD_CLR */ diff --git a/bsd/sys/_types/_fd_copy.h b/bsd/sys/_types/_fd_copy.h new file mode 100644 index 000000000..2eddb34e7 --- /dev/null +++ b/bsd/sys/_types/_fd_copy.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef FD_COPY +#define FD_COPY(f, t) __DARWIN_FD_COPY(f, t) +#endif /* FD_COPY */ diff --git a/bsd/sys/_types/_fd_def.h b/bsd/sys/_types/_fd_def.h new file mode 100644 index 000000000..0a36997c8 --- /dev/null +++ b/bsd/sys/_types/_fd_def.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _FD_SET +#define _FD_SET +/* + * Select uses bit masks of file descriptors in longs. These macros + * manipulate such bit fields (the filesystem macros use chars). The + * extra protection here is to permit application redefinition above + * the default size. + */ +#ifdef FD_SETSIZE +#define __DARWIN_FD_SETSIZE FD_SETSIZE +#else /* !FD_SETSIZE */ +#define __DARWIN_FD_SETSIZE 1024 +#endif /* FD_SETSIZE */ +#define __DARWIN_NBBY 8 /* bits in a byte */ +#define __DARWIN_NFDBITS (sizeof(__int32_t) * __DARWIN_NBBY) /* bits per mask */ +#define __DARWIN_howmany(x, y) ((((x) % (y)) == 0) ? ((x) / (y)) : (((x) / (y)) + 1)) /* # y's == x bits? */ + +__BEGIN_DECLS +typedef struct fd_set { + __int32_t fds_bits[__DARWIN_howmany(__DARWIN_FD_SETSIZE, __DARWIN_NFDBITS)]; +} fd_set; +__END_DECLS + +/* This inline avoids argument side-effect issues with FD_ISSET() */ +static __inline int +__darwin_fd_isset(int _n, const struct fd_set *_p) +{ + return (_p->fds_bits[(unsigned long)_n/__DARWIN_NFDBITS] & ((__int32_t)(1<<((unsigned long)_n % __DARWIN_NFDBITS)))); +} + +#define __DARWIN_FD_SET(n, p) do { int __fd = (n); ((p)->fds_bits[(unsigned long)__fd/__DARWIN_NFDBITS] |= ((__int32_t)(1<<((unsigned long)__fd % __DARWIN_NFDBITS)))); } while(0) +#define __DARWIN_FD_CLR(n, p) do { int __fd = (n); ((p)->fds_bits[(unsigned long)__fd/__DARWIN_NFDBITS] &= ~((__int32_t)(1<<((unsigned long)__fd % __DARWIN_NFDBITS)))); } while(0) +#define __DARWIN_FD_ISSET(n, p) __darwin_fd_isset((n), (p)) + +#if __GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 3 +/* + * Use the built-in bzero function instead of the library version so that + * we do not pollute the namespace or introduce prototype warnings. + */ +#define __DARWIN_FD_ZERO(p) __builtin_bzero(p, sizeof(*(p))) +#else +#define __DARWIN_FD_ZERO(p) bzero(p, sizeof(*(p))) +#endif + +#define __DARWIN_FD_COPY(f, t) bcopy(f, t, sizeof(*(f))) +#endif /* _FD_SET */ diff --git a/bsd/sys/_types/_fd_isset.h b/bsd/sys/_types/_fd_isset.h new file mode 100644 index 000000000..089d0d57d --- /dev/null +++ b/bsd/sys/_types/_fd_isset.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef FD_ISSET +#define FD_ISSET(n, p) __DARWIN_FD_ISSET(n, p) +#endif /* FD_ISSET */ diff --git a/osfmk/chud/i386/chud_cpu_asm.h b/bsd/sys/_types/_fd_set.h similarity index 90% rename from osfmk/chud/i386/chud_cpu_asm.h rename to bsd/sys/_types/_fd_set.h index 6f5938897..1fe7da32e 100644 --- a/osfmk/chud/i386/chud_cpu_asm.h +++ b/bsd/sys/_types/_fd_set.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,8 +25,6 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ - -#ifndef _CHUD_CPU_ASM_H_ -#define _CHUD_CPU_ASM_H_ - -#endif // _CHUD_CPU_ASM_H_ +#ifndef FD_SET +#define FD_SET(n, p) __DARWIN_FD_SET(n, p) +#endif /* FD_SET */ diff --git a/bsd/sys/_types/_fd_setsize.h b/bsd/sys/_types/_fd_setsize.h new file mode 100644 index 000000000..4bf02ae2c --- /dev/null +++ b/bsd/sys/_types/_fd_setsize.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef FD_SETSIZE +#define FD_SETSIZE __DARWIN_FD_SETSIZE +#endif /* FD_SETSIZE */ diff --git a/bsd/sys/_types/_fd_zero.h b/bsd/sys/_types/_fd_zero.h new file mode 100644 index 000000000..4fe98ae64 --- /dev/null +++ b/bsd/sys/_types/_fd_zero.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef FD_ZERO +#define FD_ZERO(p) __DARWIN_FD_ZERO(p) +#endif /* FD_ZERO */ diff --git a/bsd/sys/_types/_filesec_t.h b/bsd/sys/_types/_filesec_t.h new file mode 100644 index 000000000..58870a2ef --- /dev/null +++ b/bsd/sys/_types/_filesec_t.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _FILESEC_T +#define _FILESEC_T +struct _filesec; +typedef struct _filesec *filesec_t; +#endif /* _FILESEC_T */ diff --git a/bsd/sys/_types/_fsblkcnt_t.h b/bsd/sys/_types/_fsblkcnt_t.h new file mode 100644 index 000000000..12e03a537 --- /dev/null +++ b/bsd/sys/_types/_fsblkcnt_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _FSBLKCNT_T +#define _FSBLKCNT_T +typedef __darwin_fsblkcnt_t fsblkcnt_t; +#endif /* _FSBLKCNT_T */ diff --git a/bsd/sys/_types/_fsfilcnt_t.h b/bsd/sys/_types/_fsfilcnt_t.h new file mode 100644 index 000000000..9a72eca63 --- /dev/null +++ b/bsd/sys/_types/_fsfilcnt_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _FSFILCNT_T +#define _FSFILCNT_T +typedef __darwin_fsfilcnt_t fsfilcnt_t; +#endif /* _FSFILCNT_T */ diff --git a/libsyscall/mach/mach/task.h b/bsd/sys/_types/_gid_t.h similarity index 86% rename from libsyscall/mach/mach/task.h rename to bsd/sys/_types/_gid_t.h index 6cef51794..f64f56c62 100644 --- a/libsyscall/mach/mach/task.h +++ b/bsd/sys/_types/_gid_t.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,10 +25,7 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#if defined(__i386__) -#include -#elif defined(__x86_64__) -#include -#else -#error unknown architecture +#ifndef _GID_T +#define _GID_T +typedef __darwin_gid_t gid_t; #endif diff --git a/bsd/sys/_types/_guid_t.h b/bsd/sys/_types/_guid_t.h new file mode 100644 index 000000000..47e722d04 --- /dev/null +++ b/bsd/sys/_types/_guid_t.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _KAUTH_GUID +#define _KAUTH_GUID +/* Apple-style globally unique identifier */ +typedef struct { +#define KAUTH_GUID_SIZE 16 /* 128-bit identifier */ + unsigned char g_guid[KAUTH_GUID_SIZE]; +} guid_t; +#define _GUID_T +#endif /* _KAUTH_GUID */ diff --git a/bsd/sys/_types/_id_t.h b/bsd/sys/_types/_id_t.h new file mode 100644 index 000000000..b5a8a2f2c --- /dev/null +++ b/bsd/sys/_types/_id_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _ID_T +#define _ID_T +typedef __darwin_id_t id_t; /* can hold pid_t, gid_t, or uid_t */ +#endif /* _ID_T */ diff --git a/bsd/sys/_types/_in_addr_t.h b/bsd/sys/_types/_in_addr_t.h new file mode 100644 index 000000000..a534517c6 --- /dev/null +++ b/bsd/sys/_types/_in_addr_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _IN_ADDR_T +#define _IN_ADDR_T +typedef __uint32_t in_addr_t; /* base type for internet address */ +#endif /* _IN_ADDR_T */ diff --git a/bsd/sys/_types/_in_port_t.h b/bsd/sys/_types/_in_port_t.h new file mode 100644 index 000000000..cf3da0020 --- /dev/null +++ b/bsd/sys/_types/_in_port_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _IN_PORT_T +#define _IN_PORT_T +typedef __uint16_t in_port_t; +#endif /* _IN_PORT_T */ diff --git a/bsd/sys/_types/_ino64_t.h b/bsd/sys/_types/_ino64_t.h new file mode 100644 index 000000000..a7ca59e97 --- /dev/null +++ b/bsd/sys/_types/_ino64_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _INO64_T +#define _INO64_T +typedef __darwin_ino64_t ino64_t; /* 64bit inode number */ +#endif /* _INO64_T */ diff --git a/bsd/sys/_types/_ino_t.h b/bsd/sys/_types/_ino_t.h new file mode 100644 index 000000000..2bc666f92 --- /dev/null +++ b/bsd/sys/_types/_ino_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _INO_T +#define _INO_T +typedef __darwin_ino_t ino_t; /* inode number */ +#endif /* _INO_T */ diff --git a/bsd/sys/_types/_int16_t.h b/bsd/sys/_types/_int16_t.h new file mode 100644 index 000000000..ed373d649 --- /dev/null +++ b/bsd/sys/_types/_int16_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _INT16_T +#define _INT16_T +typedef short int16_t; +#endif /* _INT16_T */ diff --git a/bsd/sys/_types/_int32_t.h b/bsd/sys/_types/_int32_t.h new file mode 100644 index 000000000..15041d498 --- /dev/null +++ b/bsd/sys/_types/_int32_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _INT32_T +#define _INT32_T +typedef int int32_t; +#endif /* _INT32_T */ diff --git a/bsd/sys/_types/_int64_t.h b/bsd/sys/_types/_int64_t.h new file mode 100644 index 000000000..fd14b60a9 --- /dev/null +++ b/bsd/sys/_types/_int64_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _INT64_T +#define _INT64_T +typedef long long int64_t; +#endif /* _INT64_T */ diff --git a/bsd/sys/_types/_int8_t.h b/bsd/sys/_types/_int8_t.h new file mode 100644 index 000000000..c48ef254e --- /dev/null +++ b/bsd/sys/_types/_int8_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _INT8_T +#define _INT8_T +typedef __signed char int8_t; +#endif /* _INT8_T */ diff --git a/bsd/sys/_types/_intptr_t.h b/bsd/sys/_types/_intptr_t.h new file mode 100644 index 000000000..c01f906f5 --- /dev/null +++ b/bsd/sys/_types/_intptr_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _INTPTR_T +#define _INTPTR_T +typedef __darwin_intptr_t intptr_t; +#endif /* _INTPTR_T */ diff --git a/bsd/sys/_types/_iovec_t.h b/bsd/sys/_types/_iovec_t.h new file mode 100644 index 000000000..9aa311d29 --- /dev/null +++ b/bsd/sys/_types/_iovec_t.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _STRUCT_IOVEC +#define _STRUCT_IOVEC +struct iovec { + void * iov_base; /* [XSI] Base address of I/O memory region */ + size_t iov_len; /* [XSI] Size of region iov_base points to */ +}; +#endif /* _STRUCT_IOVEC */ diff --git a/bsd/sys/_types/_key_t.h b/bsd/sys/_types/_key_t.h new file mode 100644 index 000000000..2b5bdbd4b --- /dev/null +++ b/bsd/sys/_types/_key_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _KEY_T +#define _KEY_T +typedef __int32_t key_t; /* IPC key (for Sys V IPC) */ +#endif /* _KEY_T */ diff --git a/iokit/Drivers/platform/drvApplePlatformExpert/AppleCPU.h b/bsd/sys/_types/_mach_port_t.h similarity index 62% rename from iokit/Drivers/platform/drvApplePlatformExpert/AppleCPU.h rename to bsd/sys/_types/_mach_port_t.h index ee029d636..d2bbae678 100644 --- a/iokit/Drivers/platform/drvApplePlatformExpert/AppleCPU.h +++ b/bsd/sys/_types/_mach_port_t.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,33 +25,26 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. + +/* + * mach_port_t - a named port right + * + * In user-space, "rights" are represented by the name of the + * right in the Mach port namespace. Even so, this type is + * presented as a unique one to more clearly denote the presence + * of a right coming along with the name. + * + * Often, various rights for a port held in a single name space + * will coalesce and are, therefore, be identified by a single name + * [this is the case for send and receive rights]. But not + * always [send-once rights currently get a unique name for + * each right]. * - * DRI: Josh de Cesare + * This definition of mach_port_t is only for user-space. * */ -#ifndef _IOKIT_APPLECPU_H -#define _IOKIT_APPLECPU_H - -#include - -class AppleCPU : public IOCPU -{ - OSDeclareDefaultStructors(AppleCPU); - -private: - IOCPUInterruptController *cpuIC; - -public: - virtual bool start(IOService *provider); - virtual void initCPU(bool boot); - virtual void quiesceCPU(void); - virtual kern_return_t startCPU(vm_offset_t start_paddr, - vm_offset_t arg_paddr); - virtual void haltCPU(void); - virtual const OSSymbol *getCPUName(void); -}; - -#endif /* ! _IOKIT_APPLECPU_H */ +#ifndef _MACH_PORT_T +#define _MACH_PORT_T +typedef __darwin_mach_port_t mach_port_t; +#endif /* _MACH_PORT_T */ diff --git a/bsd/sys/_types/_mbstate_t.h b/bsd/sys/_types/_mbstate_t.h new file mode 100644 index 000000000..790d112a2 --- /dev/null +++ b/bsd/sys/_types/_mbstate_t.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _MBSTATE_T +#define _MBSTATE_T +typedef __darwin_mbstate_t mbstate_t; +#endif /* _MBSTATE_T */ diff --git a/bsd/sys/_types/_mode_t.h b/bsd/sys/_types/_mode_t.h new file mode 100644 index 000000000..a378b7dcc --- /dev/null +++ b/bsd/sys/_types/_mode_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _MODE_T +#define _MODE_T +typedef __darwin_mode_t mode_t; +#endif /* _MODE_T */ diff --git a/bsd/sys/_types/_nlink_t.h b/bsd/sys/_types/_nlink_t.h new file mode 100644 index 000000000..6b0e8cd32 --- /dev/null +++ b/bsd/sys/_types/_nlink_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _NLINK_T +#define _NLINK_T +typedef __uint16_t nlink_t; /* link count */ +#endif /* _NLINK_T */ diff --git a/bsd/sys/_types/_null.h b/bsd/sys/_types/_null.h new file mode 100644 index 000000000..8a32fe0d5 --- /dev/null +++ b/bsd/sys/_types/_null.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef NULL +#define NULL __DARWIN_NULL +#endif /* NULL */ diff --git a/bsd/sys/_types/_o_dsync.h b/bsd/sys/_types/_o_dsync.h new file mode 100644 index 000000000..fece722c7 --- /dev/null +++ b/bsd/sys/_types/_o_dsync.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef O_DSYNC +#define O_DSYNC 0x400000 /* synch I/O data integrity */ +#endif /* O_DSYNC */ diff --git a/bsd/sys/_types/_o_sync.h b/bsd/sys/_types/_o_sync.h new file mode 100644 index 000000000..85bdd6945 --- /dev/null +++ b/bsd/sys/_types/_o_sync.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef O_SYNC +#define O_SYNC 0x0080 /* synch I/O file integrity */ +#endif /* O_SYNC */ diff --git a/bsd/sys/_types/_off_t.h b/bsd/sys/_types/_off_t.h new file mode 100644 index 000000000..fc6eacad4 --- /dev/null +++ b/bsd/sys/_types/_off_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _OFF_T +#define _OFF_T +typedef __darwin_off_t off_t; +#endif /* _OFF_T */ diff --git a/bsd/sys/_types/_os_inline.h b/bsd/sys/_types/_os_inline.h new file mode 100644 index 000000000..d85c91214 --- /dev/null +++ b/bsd/sys/_types/_os_inline.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#if !defined(OS_INLINE) +# if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +# define OS_INLINE static inline +# else +# define OS_INLINE static __inline__ +# endif +#endif /* OS_INLINE */ diff --git a/bsd/sys/_types/_pid_t.h b/bsd/sys/_types/_pid_t.h new file mode 100644 index 000000000..ea369b218 --- /dev/null +++ b/bsd/sys/_types/_pid_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _PID_T +#define _PID_T +typedef __darwin_pid_t pid_t; +#endif /* _PID_T */ diff --git a/bsd/sys/_types/_posix_vdisable.h b/bsd/sys/_types/_posix_vdisable.h new file mode 100644 index 000000000..4808c5318 --- /dev/null +++ b/bsd/sys/_types/_posix_vdisable.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _POSIX_VDISABLE +#define _POSIX_VDISABLE ((unsigned char)'\377') +#endif /* POSIX_VDISABLE */ diff --git a/bsd/sys/_types/_pthread_attr_t.h b/bsd/sys/_types/_pthread_attr_t.h new file mode 100644 index 000000000..0f69ae97c --- /dev/null +++ b/bsd/sys/_types/_pthread_attr_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _PTHREAD_ATTR_T +#define _PTHREAD_ATTR_T +typedef __darwin_pthread_attr_t pthread_attr_t; +#endif /* _PTHREAD_ATTR_T */ diff --git a/bsd/sys/_types/_pthread_cond_t.h b/bsd/sys/_types/_pthread_cond_t.h new file mode 100644 index 000000000..ce07f92dd --- /dev/null +++ b/bsd/sys/_types/_pthread_cond_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _PTHREAD_COND_T +#define _PTHREAD_COND_T +typedef __darwin_pthread_cond_t pthread_cond_t; +#endif /* _PTHREAD_COND_T */ diff --git a/bsd/sys/_types/_pthread_condattr_t.h b/bsd/sys/_types/_pthread_condattr_t.h new file mode 100644 index 000000000..4dad3cfb7 --- /dev/null +++ b/bsd/sys/_types/_pthread_condattr_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _PTHREAD_CONDATTR_T +#define _PTHREAD_CONDATTR_T +typedef __darwin_pthread_condattr_t pthread_condattr_t; +#endif /* _PTHREAD_CONDATTR_T */ diff --git a/bsd/sys/_types/_pthread_key_t.h b/bsd/sys/_types/_pthread_key_t.h new file mode 100644 index 000000000..5dd708529 --- /dev/null +++ b/bsd/sys/_types/_pthread_key_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _PTHREAD_KEY_T +#define _PTHREAD_KEY_T +typedef __darwin_pthread_key_t pthread_key_t; +#endif /* _PTHREAD_KEY_T */ diff --git a/bsd/sys/_types/_pthread_mutex_t.h b/bsd/sys/_types/_pthread_mutex_t.h new file mode 100644 index 000000000..4a580ba91 --- /dev/null +++ b/bsd/sys/_types/_pthread_mutex_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _PTHREAD_MUTEX_T +#define _PTHREAD_MUTEX_T +typedef __darwin_pthread_mutex_t pthread_mutex_t; +#endif /*_PTHREAD_MUTEX_T */ diff --git a/bsd/sys/_types/_pthread_mutexattr_t.h b/bsd/sys/_types/_pthread_mutexattr_t.h new file mode 100644 index 000000000..26879560e --- /dev/null +++ b/bsd/sys/_types/_pthread_mutexattr_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _PTHREAD_MUTEXATTR_T +#define _PTHREAD_MUTEXATTR_T +typedef __darwin_pthread_mutexattr_t pthread_mutexattr_t; +#endif /* _PTHREAD_MUTEXATTR_T */ diff --git a/bsd/sys/_types/_pthread_once_t.h b/bsd/sys/_types/_pthread_once_t.h new file mode 100644 index 000000000..e5bcf2925 --- /dev/null +++ b/bsd/sys/_types/_pthread_once_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _PTHREAD_ONCE_T +#define _PTHREAD_ONCE_T +typedef __darwin_pthread_once_t pthread_once_t; +#endif /* _PTHREAD_ONCE_T */ diff --git a/bsd/sys/_types/_pthread_rwlock_t.h b/bsd/sys/_types/_pthread_rwlock_t.h new file mode 100644 index 000000000..f6d018e7f --- /dev/null +++ b/bsd/sys/_types/_pthread_rwlock_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _PTHREAD_RWLOCK_T +#define _PTHREAD_RWLOCK_T +typedef __darwin_pthread_rwlock_t pthread_rwlock_t; +#endif /* _PTHREAD_RWLOCK_T */ diff --git a/bsd/sys/_types/_pthread_rwlockattr_t.h b/bsd/sys/_types/_pthread_rwlockattr_t.h new file mode 100644 index 000000000..ab28658a2 --- /dev/null +++ b/bsd/sys/_types/_pthread_rwlockattr_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _PTHREAD_RWLOCKATTR_T +#define _PTHREAD_RWLOCKATTR_T +typedef __darwin_pthread_rwlockattr_t pthread_rwlockattr_t; +#endif /* _PTHREAD_RWLOCKATTR_T */ diff --git a/bsd/sys/_types/_pthread_t.h b/bsd/sys/_types/_pthread_t.h new file mode 100644 index 000000000..97ecb7cec --- /dev/null +++ b/bsd/sys/_types/_pthread_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _PTHREAD_T +#define _PTHREAD_T +typedef __darwin_pthread_t pthread_t; +#endif /* _PTHREAD_T */ diff --git a/bsd/sys/_types/_ptrdiff_t.h b/bsd/sys/_types/_ptrdiff_t.h new file mode 100644 index 000000000..2f7344551 --- /dev/null +++ b/bsd/sys/_types/_ptrdiff_t.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _PTRDIFF_T +#define _PTRDIFF_T +typedef __darwin_ptrdiff_t ptrdiff_t; +#endif /* _PTRDIFF_T */ diff --git a/bsd/sys/_types/_rsize_t.h b/bsd/sys/_types/_rsize_t.h new file mode 100644 index 000000000..68e18ef71 --- /dev/null +++ b/bsd/sys/_types/_rsize_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _RSIZE_T +#define _RSIZE_T +typedef __darwin_size_t rsize_t; +#endif /* _RSIZE_T */ diff --git a/bsd/sys/_types/_rune_t.h b/bsd/sys/_types/_rune_t.h new file mode 100644 index 000000000..19a231899 --- /dev/null +++ b/bsd/sys/_types/_rune_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _RUNE_T +#define _RUNE_T +typedef __darwin_rune_t rune_t; +#endif /* _RUNE_T */ diff --git a/bsd/sys/_types/_s_ifmt.h b/bsd/sys/_types/_s_ifmt.h new file mode 100644 index 000000000..21a75ca2a --- /dev/null +++ b/bsd/sys/_types/_s_ifmt.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* + * [XSI] The symbolic names for file modes for use as values of mode_t + * shall be defined as described in + */ +#ifndef S_IFMT +/* File type */ +#define S_IFMT 0170000 /* [XSI] type of file mask */ +#define S_IFIFO 0010000 /* [XSI] named pipe (fifo) */ +#define S_IFCHR 0020000 /* [XSI] character special */ +#define S_IFDIR 0040000 /* [XSI] directory */ +#define S_IFBLK 0060000 /* [XSI] block special */ +#define S_IFREG 0100000 /* [XSI] regular */ +#define S_IFLNK 0120000 /* [XSI] symbolic link */ +#define S_IFSOCK 0140000 /* [XSI] socket */ +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#define S_IFWHT 0160000 /* OBSOLETE: whiteout */ +#endif + +/* File mode */ +/* Read, write, execute/search by owner */ +#define S_IRWXU 0000700 /* [XSI] RWX mask for owner */ +#define S_IRUSR 0000400 /* [XSI] R for owner */ +#define S_IWUSR 0000200 /* [XSI] W for owner */ +#define S_IXUSR 0000100 /* [XSI] X for owner */ +/* Read, write, execute/search by group */ +#define S_IRWXG 0000070 /* [XSI] RWX mask for group */ +#define S_IRGRP 0000040 /* [XSI] R for group */ +#define S_IWGRP 0000020 /* [XSI] W for group */ +#define S_IXGRP 0000010 /* [XSI] X for group */ +/* Read, write, execute/search by others */ +#define S_IRWXO 0000007 /* [XSI] RWX mask for other */ +#define S_IROTH 0000004 /* [XSI] R for other */ +#define S_IWOTH 0000002 /* [XSI] W for other */ +#define S_IXOTH 0000001 /* [XSI] X for other */ + +#define S_ISUID 0004000 /* [XSI] set user id on execution */ +#define S_ISGID 0002000 /* [XSI] set group id on execution */ +#define S_ISVTX 0001000 /* [XSI] directory restrcted delete */ + +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#define S_ISTXT S_ISVTX /* sticky bit: not supported */ +#define S_IREAD S_IRUSR /* backward compatability */ +#define S_IWRITE S_IWUSR /* backward compatability */ +#define S_IEXEC S_IXUSR /* backward compatability */ +#endif +#endif /* !S_IFMT */ diff --git a/bsd/sys/_types/_sa_family_t.h b/bsd/sys/_types/_sa_family_t.h new file mode 100644 index 000000000..3460f2661 --- /dev/null +++ b/bsd/sys/_types/_sa_family_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _SA_FAMILY_T +#define _SA_FAMILY_T +typedef __uint8_t sa_family_t; +#endif /* _SA_FAMILY_T */ diff --git a/bsd/sys/_types/_seek_set.h b/bsd/sys/_types/_seek_set.h new file mode 100644 index 000000000..a95c6e4c8 --- /dev/null +++ b/bsd/sys/_types/_seek_set.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* whence values for lseek(2) */ +#ifndef SEEK_SET +#define SEEK_SET 0 /* set file offset to offset */ +#define SEEK_CUR 1 /* set file offset to current plus offset */ +#define SEEK_END 2 /* set file offset to EOF plus offset */ +#endif /* !SEEK_SET */ diff --git a/iokit/Drivers/platform/drvAppleNVRAM/AppleNVRAM.h b/bsd/sys/_types/_sigaltstack.h similarity index 67% rename from iokit/Drivers/platform/drvAppleNVRAM/AppleNVRAM.h rename to bsd/sys/_types/_sigaltstack.h index 2bfae06b7..24d23fefb 100644 --- a/iokit/Drivers/platform/drvAppleNVRAM/AppleNVRAM.h +++ b/bsd/sys/_types/_sigaltstack.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -26,30 +26,19 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#include - -enum { - kNVRAMTypeNone = 0, - kNVRAMTypeIOMem, - kNVRAMTypePort, - - kNVRAMImageSize = 0x2000 -}; - -class AppleNVRAM : public IONVRAMController +/* Structure used in sigaltstack call. */ +#ifndef _STRUCT_SIGALTSTACK +#if __DARWIN_UNIX03 +#define _STRUCT_SIGALTSTACK struct __darwin_sigaltstack +#else /* !__DARWIN_UNIX03 */ +#define _STRUCT_SIGALTSTACK struct sigaltstack +#endif /* __DARWIN_UNIX03 */ +_STRUCT_SIGALTSTACK { - OSDeclareDefaultStructors(AppleNVRAM); - -private: - UInt32 _nvramType; - volatile UInt8 *_nvramData; - volatile UInt8 *_nvramPort; - -public: - bool start(IOService *provider); - - virtual IOReturn read(IOByteCount offset, UInt8 *buffer, - IOByteCount length); - virtual IOReturn write(IOByteCount offset, UInt8 *buffer, - IOByteCount length); + void *ss_sp; /* signal stack base */ + __darwin_size_t ss_size; /* signal stack length */ + int ss_flags; /* SA_DISABLE and/or SA_ONSTACK */ }; +typedef _STRUCT_SIGALTSTACK stack_t; /* [???] signal stack */ + +#endif /* _STRUCT_SIGALTSTACK */ diff --git a/bsd/sys/_types/_sigset_t.h b/bsd/sys/_types/_sigset_t.h new file mode 100644 index 000000000..d4e9b6538 --- /dev/null +++ b/bsd/sys/_types/_sigset_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _SIGSET_T +#define _SIGSET_T +typedef __darwin_sigset_t sigset_t; +#endif /* _SIGSET_T */ diff --git a/bsd/sys/_types/_size_t.h b/bsd/sys/_types/_size_t.h new file mode 100644 index 000000000..8346ba897 --- /dev/null +++ b/bsd/sys/_types/_size_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _SIZE_T +#define _SIZE_T +typedef __darwin_size_t size_t; +#endif /* _SIZE_T */ diff --git a/bsd/sys/_types/_socklen_t.h b/bsd/sys/_types/_socklen_t.h new file mode 100644 index 000000000..110a3fa74 --- /dev/null +++ b/bsd/sys/_types/_socklen_t.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _SOCKLEN_T +#define _SOCKLEN_T +typedef __darwin_socklen_t socklen_t; +#endif + diff --git a/bsd/sys/_types/_ssize_t.h b/bsd/sys/_types/_ssize_t.h new file mode 100644 index 000000000..636a850d4 --- /dev/null +++ b/bsd/sys/_types/_ssize_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _SSIZE_T +#define _SSIZE_T +typedef __darwin_ssize_t ssize_t; +#endif /* _SSIZE_T */ diff --git a/bsd/sys/_types/_suseconds_t.h b/bsd/sys/_types/_suseconds_t.h new file mode 100644 index 000000000..883143a73 --- /dev/null +++ b/bsd/sys/_types/_suseconds_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _SUSECONDS_T +#define _SUSECONDS_T +typedef __darwin_suseconds_t suseconds_t; +#endif /* _SUSECONDS_T */ diff --git a/bsd/sys/_types/_time_t.h b/bsd/sys/_types/_time_t.h new file mode 100644 index 000000000..19b5f5e1e --- /dev/null +++ b/bsd/sys/_types/_time_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _TIME_T +#define _TIME_T +typedef __darwin_time_t time_t; +#endif /* _TIME_T */ diff --git a/libsyscall/mach/mach/thread_act.h b/bsd/sys/_types/_timespec.h similarity index 84% rename from libsyscall/mach/mach/thread_act.h rename to bsd/sys/_types/_timespec.h index b413f7a7e..73525337d 100644 --- a/libsyscall/mach/mach/thread_act.h +++ b/bsd/sys/_types/_timespec.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,10 +25,11 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#if defined(__i386__) -#include -#elif defined(__x86_64__) -#include -#else -#error unknown architecture -#endif +#ifndef _STRUCT_TIMESPEC +#define _STRUCT_TIMESPEC struct timespec +_STRUCT_TIMESPEC +{ + __darwin_time_t tv_sec; + long tv_nsec; +}; +#endif /* _STRUCT_TIMESPEC */ diff --git a/bsd/sys/_types/_timeval.h b/bsd/sys/_types/_timeval.h new file mode 100644 index 000000000..2f73808a4 --- /dev/null +++ b/bsd/sys/_types/_timeval.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _STRUCT_TIMEVAL +#define _STRUCT_TIMEVAL struct timeval +_STRUCT_TIMEVAL +{ + __darwin_time_t tv_sec; /* seconds */ + __darwin_suseconds_t tv_usec; /* and microseconds */ +}; +#endif /* _STRUCT_TIMEVAL */ diff --git a/bsd/sys/_types/_timeval32.h b/bsd/sys/_types/_timeval32.h new file mode 100644 index 000000000..ae5d3fe8b --- /dev/null +++ b/bsd/sys/_types/_timeval32.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _STRUCT_TIMEVAL32 +#define _STRUCT_TIMEVAL32 struct timeval32 +_STRUCT_TIMEVAL32 +{ + __int32_t tv_sec; /* seconds */ + __int32_t tv_usec; /* and microseconds */ +}; +#endif /* _STRUCT_TIMEVAL32 */ diff --git a/bsd/sys/_types/_ucontext.h b/bsd/sys/_types/_ucontext.h new file mode 100644 index 000000000..159ff0a7e --- /dev/null +++ b/bsd/sys/_types/_ucontext.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _STRUCT_UCONTEXT +#if __DARWIN_UNIX03 +#define _STRUCT_UCONTEXT struct __darwin_ucontext +#else /* !__DARWIN_UNIX03 */ +#define _STRUCT_UCONTEXT struct ucontext +#endif /* __DARWIN_UNIX03 */ +_STRUCT_UCONTEXT +{ + int uc_onstack; + __darwin_sigset_t uc_sigmask; /* signal mask used by this context */ + _STRUCT_SIGALTSTACK uc_stack; /* stack used by this context */ + _STRUCT_UCONTEXT *uc_link; /* pointer to resuming context */ + __darwin_size_t uc_mcsize; /* size of the machine context passed in */ + _STRUCT_MCONTEXT *uc_mcontext; /* pointer to machine specific context */ +#ifdef _XOPEN_SOURCE + _STRUCT_MCONTEXT __mcontext_data; +#endif /* _XOPEN_SOURCE */ +}; + +/* user context */ +typedef _STRUCT_UCONTEXT ucontext_t; /* [???] user context */ + +#endif /* _STRUCT_UCONTEXT */ diff --git a/bsd/sys/_types/_ucontext64.h b/bsd/sys/_types/_ucontext64.h new file mode 100644 index 000000000..f2a620a19 --- /dev/null +++ b/bsd/sys/_types/_ucontext64.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _STRUCT_UCONTEXT64 +#if __DARWIN_UNIX03 +#define _STRUCT_UCONTEXT64 struct __darwin_ucontext64 +#else /* !__DARWIN_UNIX03 */ +#define _STRUCT_UCONTEXT64 struct ucontext64 +#endif /* __DARWIN_UNIX03 */ +_STRUCT_UCONTEXT64 +{ + int uc_onstack; + __darwin_sigset_t uc_sigmask; /* signal mask used by this context */ + _STRUCT_SIGALTSTACK uc_stack; /* stack used by this context */ + _STRUCT_UCONTEXT64 *uc_link; /* pointer to resuming context */ + __darwin_size_t uc_mcsize; /* size of the machine context passed in */ + _STRUCT_MCONTEXT64 *uc_mcontext64; /* pointer to machine specific context */ +}; +typedef _STRUCT_UCONTEXT64 ucontext64_t; /* [???] user context */ + +#endif /* _STRUCT_UCONTEXT64 */ diff --git a/bsd/sys/_types/_uid_t.h b/bsd/sys/_types/_uid_t.h new file mode 100644 index 000000000..678f7db14 --- /dev/null +++ b/bsd/sys/_types/_uid_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _UID_T +#define _UID_T +typedef __darwin_uid_t uid_t; +#endif /* _UID_T */ diff --git a/bsd/sys/_types/_uintptr_t.h b/bsd/sys/_types/_uintptr_t.h new file mode 100644 index 000000000..3b0bcce69 --- /dev/null +++ b/bsd/sys/_types/_uintptr_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _UINTPTR_T +#define _UINTPTR_T +typedef unsigned long uintptr_t; +#endif /* _UINTPTR_T */ diff --git a/bsd/sys/_types/_useconds_t.h b/bsd/sys/_types/_useconds_t.h new file mode 100644 index 000000000..780d2364d --- /dev/null +++ b/bsd/sys/_types/_useconds_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _USECONDS_T +#define _USECONDS_T +typedef __darwin_useconds_t useconds_t; +#endif /* _USECONDS_T */ diff --git a/bsd/sys/_types/_user32_itimerval.h b/bsd/sys/_types/_user32_itimerval.h new file mode 100644 index 000000000..130b19c39 --- /dev/null +++ b/bsd/sys/_types/_user32_itimerval.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifdef KERNEL +#ifndef _STRUCT_USER32_ITIMERVAL +#define _STRUCT_USER32_ITIMERVAL struct user32_itimerval +_STRUCT_USER32_ITIMERVAL +{ + _STRUCT_USER32_TIMEVAL it_interval; /* timer interval */ + _STRUCT_USER32_TIMEVAL it_value; /* current value */ +}; +#endif /* _STRUCT_USER32_TIMEVAL */ +#endif /* KERNEL */ diff --git a/bsd/sys/_types/_user32_timespec.h b/bsd/sys/_types/_user32_timespec.h new file mode 100644 index 000000000..981360755 --- /dev/null +++ b/bsd/sys/_types/_user32_timespec.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifdef KERNEL +#ifndef _STRUCT_USER32_TIMESPEC +#define _STRUCT_USER32_TIMESPEC struct user32_timespec +_STRUCT_USER32_TIMESPEC +{ + user32_time_t tv_sec; /* seconds */ + user32_long_t tv_nsec; /* and nanoseconds */ +}; +#endif /* _STRUCT_USER32_TIMESPEC */ +#endif /* KERNEL */ diff --git a/bsd/sys/_types/_user32_timeval.h b/bsd/sys/_types/_user32_timeval.h new file mode 100644 index 000000000..fb3ef222f --- /dev/null +++ b/bsd/sys/_types/_user32_timeval.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifdef KERNEL +#ifndef _STRUCT_USER32_TIMEVAL +#define _STRUCT_USER32_TIMEVAL struct user32_timeval +_STRUCT_USER32_TIMEVAL +{ + user32_time_t tv_sec; /* seconds */ + __int32_t tv_usec; /* and microseconds */ +}; +#endif /* _STRUCT_USER32_TIMEVAL */ +#endif /* KERNEL */ diff --git a/bsd/sys/_types/_user64_itimerval.h b/bsd/sys/_types/_user64_itimerval.h new file mode 100644 index 000000000..4c58fece9 --- /dev/null +++ b/bsd/sys/_types/_user64_itimerval.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifdef KERNEL +#ifndef _STRUCT_USER64_ITIMERVAL +#define _STRUCT_USER64_ITIMERVAL struct user64_itimerval +_STRUCT_USER64_ITIMERVAL +{ + _STRUCT_USER64_TIMEVAL it_interval; /* timer interval */ + _STRUCT_USER64_TIMEVAL it_value; /* current value */ +}; +#endif /* _STRUCT_USER64_ITIMERVAL */ +#endif /* KERNEL */ diff --git a/bsd/sys/_types/_user64_timespec.h b/bsd/sys/_types/_user64_timespec.h new file mode 100644 index 000000000..d80b1cee1 --- /dev/null +++ b/bsd/sys/_types/_user64_timespec.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifdef KERNEL +#ifndef _STRUCT_USER64_TIMESPEC +#define _STRUCT_USER64_TIMESPEC struct user64_timespec +_STRUCT_USER64_TIMESPEC +{ + user64_time_t tv_sec; /* seconds */ + user64_long_t tv_nsec; /* and nanoseconds */ +}; +#endif /* _STRUCT_USER64_TIMESPEC */ +#endif /* KERNEL */ diff --git a/bsd/sys/_types/_user64_timeval.h b/bsd/sys/_types/_user64_timeval.h new file mode 100644 index 000000000..38b1fca8e --- /dev/null +++ b/bsd/sys/_types/_user64_timeval.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifdef KERNEL +#ifndef _STRUCT_USER64_TIMEVAL +#define _STRUCT_USER64_TIMEVAL struct user64_timeval +_STRUCT_USER64_TIMEVAL +{ + user64_time_t tv_sec; /* seconds */ + __int32_t tv_usec; /* and microseconds */ +}; +#endif /* _STRUCT_USER64_TIMEVAL */ +#endif /* KERNEL */ diff --git a/bsd/sys/_types/_user_timespec.h b/bsd/sys/_types/_user_timespec.h new file mode 100644 index 000000000..9c25c3eb7 --- /dev/null +++ b/bsd/sys/_types/_user_timespec.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifdef KERNEL +/* LP64 version of struct timespec. time_t is a long and must grow when + * we're dealing with a 64-bit process. + * WARNING - keep in sync with struct timespec + */ + +#ifndef _STRUCT_USER_TIMESPEC +#define _STRUCT_USER_TIMESPEC struct user_timespec +_STRUCT_USER_TIMESPEC +{ + user_time_t tv_sec; /* seconds */ + user_long_t tv_nsec; /* and nanoseconds */ +}; +#endif /* _STRUCT_USER_TIMESPEC */ +#endif /* KERNEL */ diff --git a/bsd/sys/_types/_user_timeval.h b/bsd/sys/_types/_user_timeval.h new file mode 100644 index 000000000..01ae2404e --- /dev/null +++ b/bsd/sys/_types/_user_timeval.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifdef KERNEL +#ifndef _STRUCT_USER_TIMEVAL +#define _STRUCT_USER_TIMEVAL struct user_timeval +_STRUCT_USER_TIMEVAL +{ + user_time_t tv_sec; /* seconds */ + __int32_t tv_usec; /* and microseconds */ +}; +#endif /* _STRUCT_USER_TIMEVAL */ +#endif /* KERNEL */ diff --git a/bsd/sys/_types/_uuid_t.h b/bsd/sys/_types/_uuid_t.h new file mode 100644 index 000000000..b61023892 --- /dev/null +++ b/bsd/sys/_types/_uuid_t.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _UUID_T +#define _UUID_T +typedef __darwin_uuid_t uuid_t; +#endif /* _UUID_T */ diff --git a/bsd/sys/_types/_va_list.h b/bsd/sys/_types/_va_list.h new file mode 100644 index 000000000..c36072a5f --- /dev/null +++ b/bsd/sys/_types/_va_list.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _VA_LIST_T +#define _VA_LIST_T +typedef __darwin_va_list va_list; +#endif /* _VA_LIST_T */ diff --git a/bsd/sys/_types/_wchar_t.h b/bsd/sys/_types/_wchar_t.h new file mode 100644 index 000000000..5a5d56cb7 --- /dev/null +++ b/bsd/sys/_types/_wchar_t.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* wchar_t is a built-in type in C++ */ +#ifndef __cplusplus +#ifndef _WCHAR_T +#define _WCHAR_T +typedef __darwin_wchar_t wchar_t; +#endif /* _WCHAR_T */ +#endif /* __cplusplus */ diff --git a/bsd/sys/_types/_wint_t.h b/bsd/sys/_types/_wint_t.h new file mode 100644 index 000000000..d1bbbad87 --- /dev/null +++ b/bsd/sys/_types/_wint_t.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _WINT_T +#define _WINT_T +typedef __darwin_wint_t wint_t; +#endif /* _WINT_T */ diff --git a/bsd/sys/aio.h b/bsd/sys/aio.h index d4f7e7a68..71b2a8133 100644 --- a/bsd/sys/aio.h +++ b/bsd/sys/aio.h @@ -47,22 +47,14 @@ * * In our case, this is limited to struct timespec, off_t and ssize_t. */ -#define __need_struct_timespec +#include #ifdef KERNEL -#define __need_struct_user64_timespec -#define __need_struct_user32_timespec +#include +#include #endif /* KERNEL */ -#include -#ifndef _OFF_T -typedef __darwin_off_t off_t; -#define _OFF_T -#endif - -#ifndef _SSIZE_T -#define _SSIZE_T -typedef __darwin_ssize_t ssize_t; -#endif +#include +#include /* * A aio_fsync() options that the calling thread is to continue execution @@ -71,14 +63,8 @@ typedef __darwin_ssize_t ssize_t; * * [XSI] from */ -#ifndef O_SYNC /* allow simultaneous inclusion of */ -#define O_SYNC 0x0080 /* synch I/O file integrity */ -#endif - -#ifndef O_DSYNC /* allow simultaneous inclusion of */ -#define O_DSYNC 0x400000 /* synch I/O data integrity */ -#endif - +#include +#include struct aiocb { int aio_fildes; /* File descriptor */ diff --git a/bsd/sys/aio_kern.h b/bsd/sys/aio_kern.h index 18a801f98..4b08724b1 100644 --- a/bsd/sys/aio_kern.h +++ b/bsd/sys/aio_kern.h @@ -53,6 +53,7 @@ struct aio_workq_entry user_addr_t uaiocbp; /* pointer passed in from user land */ struct user_aiocb aiocb; /* copy of aiocb from user land */ vm_map_t aio_map; /* user land map we have a reference to */ + thread_t thread; /* thread that queued this request */ /* Entry lock */ int aio_refcount; @@ -100,7 +101,7 @@ __private_extern__ void _aio_create_worker_threads(int num); __private_extern__ void -aio_init(void) __attribute__((section("__TEXT, initcode"))); +aio_init(void); task_t get_aiotask(void); diff --git a/bsd/sys/attr.h b/bsd/sys/attr.h index e33ad2b86..4eacfc744 100644 --- a/bsd/sys/attr.h +++ b/bsd/sys/attr.h @@ -489,11 +489,11 @@ struct user32_fssearchblock { #endif /* KERNEL */ - struct searchstate { - u_char reserved[556]; // sizeof( SearchState ) -}; - + uint32_t ss_union_flags; // for SRCHFS_START + uint32_t ss_union_layer; // 0 = top + u_char ss_fsstate[548]; // fs private +} __attribute__((packed)); #define FST_EOF (-1) /* end-of-file offset */ diff --git a/bsd/sys/bitstring.h b/bsd/sys/bitstring.h new file mode 100644 index 000000000..f4bb7fa54 --- /dev/null +++ b/bsd/sys/bitstring.h @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/*- + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Vixie. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _SYS_BITSTRING_H_ +#define _SYS_BITSTRING_H_ + +#ifdef XNU_KERNEL_PRIVATE +#include + +typedef uint8_t bitstr_t; + +/* internal macros */ + /* byte of the bitstring bit is in */ +#define _bit_byte(bit) \ + ((bit) >> 3) + + /* mask for the bit within its byte */ +#define _bit_mask(bit) \ + (1 << ((bit) & 0x7)) + +/* external macros */ + /* bytes in a bitstring of nbits bits */ +#define bitstr_size(nbits) \ + (((nbits) + 7) >> 3) + + /* allocate a bitstring on the stack */ +#define bit_decl(name, nbits) \ + ((name)[bitstr_size(nbits)]) + + /* is bit N of bitstring name set? */ +#define bit_test(name, bit) \ + ((name)[_bit_byte(bit)] & _bit_mask(bit)) + + /* set bit N of bitstring name */ +#define bit_set(name, bit) \ + ((name)[_bit_byte(bit)] |= _bit_mask(bit)) + + /* set bit N of bitstring name (atomic) */ +#define bit_set_atomic(name, bit) \ + atomic_bitset_8(&((name)[_bit_byte(bit)]), _bit_mask(bit)) + + /* clear bit N of bitstring name */ +#define bit_clear(name, bit) \ + ((name)[_bit_byte(bit)] &= ~_bit_mask(bit)) + + /* clear bit N of bitstring name (atomic) */ +#define bit_clear_atomic(name, bit) \ + atomic_bitclear_8(&((name)[_bit_byte(bit)]), _bit_mask(bit)) + + /* clear bits start ... stop in bitstring */ +#define bit_nclear(name, start, stop) do { \ + bitstr_t *_name = (name); \ + int _start = (start), _stop = (stop); \ + int _startbyte = _bit_byte(_start); \ + int _stopbyte = _bit_byte(_stop); \ + if (_startbyte == _stopbyte) { \ + _name[_startbyte] &= ((0xff >> (8 - (_start & 0x7))) | \ + (0xff << ((_stop & 0x7) + 1))); \ + } else { \ + _name[_startbyte] &= 0xff >> (8 - (_start & 0x7)); \ + while (++_startbyte < _stopbyte) \ + _name[_startbyte] = 0; \ + _name[_stopbyte] &= 0xff << ((_stop & 0x7) + 1); \ + } \ +} while (0) + + /* set bits start ... stop in bitstring */ +#define bit_nset(name, start, stop) do { \ + bitstr_t *_name = (name); \ + int _start = (start), _stop = (stop); \ + int _startbyte = _bit_byte(_start); \ + int _stopbyte = _bit_byte(_stop); \ + if (_startbyte == _stopbyte) { \ + _name[_startbyte] |= ((0xff << (_start & 0x7)) & \ + (0xff >> (7 - (_stop & 0x7)))); \ + } else { \ + _name[_startbyte] |= 0xff << ((_start) & 0x7); \ + while (++_startbyte < _stopbyte) \ + _name[_startbyte] = 0xff; \ + _name[_stopbyte] |= 0xff >> (7 - (_stop & 0x7)); \ + } \ +} while (0) + + /* find first bit clear in name */ +#define bit_ffc(name, nbits, value) do { \ + bitstr_t *_name = (name); \ + int _byte, _nbits = (nbits); \ + int _stopbyte = _bit_byte(_nbits - 1), _value = -1; \ + if (_nbits > 0) \ + for (_byte = 0; _byte <= _stopbyte; ++_byte) \ + if (_name[_byte] != 0xff) { \ + bitstr_t _lb; \ + _value = _byte << 3; \ + for (_lb = _name[_byte]; (_lb & 0x1); \ + ++_value, _lb >>= 1); \ + break; \ + } \ + if (_value >= nbits) \ + _value = -1; \ + *(value) = _value; \ +} while (0) + + /* find first bit set in name */ +#define bit_ffs(name, nbits, value) do { \ + bitstr_t *_name = (name); \ + int _byte, _nbits = (nbits); \ + int _stopbyte = _bit_byte(_nbits - 1), _value = -1; \ + if (_nbits > 0) \ + for (_byte = 0; _byte <= _stopbyte; ++_byte) \ + if (_name[_byte]) { \ + bitstr_t _lb; \ + _value = _byte << 3; \ + for (_lb = _name[_byte]; !(_lb & 0x1); \ + ++_value, _lb >>= 1); \ + break; \ + } \ + if (_value >= nbits) \ + _value = -1; \ + *(value) = _value; \ +} while (0) + +#endif /* XNU_KERNEL_PRIVATE */ +#endif /* !_SYS_BITSTRING_H_ */ diff --git a/bsd/sys/buf.h b/bsd/sys/buf.h index 43af7fb1a..93c885982 100644 --- a/bsd/sys/buf.h +++ b/bsd/sys/buf.h @@ -89,7 +89,7 @@ #define B_FUA 0x00000400 /* Write-through disk cache(if supported) */ #define B_PASSIVE 0x00000800 /* PASSIVE I/Os are ignored by THROTTLE I/O */ #define B_IOSTREAMING 0x00001000 /* sequential access pattern detected */ -#define B_THROTTLED_IO 0x00002000 /* low priority I/O */ +#define B_THROTTLED_IO 0x00002000 /* low priority I/O (deprecated) */ #define B_ENCRYPTED_IO 0x00004000 /* Encrypted I/O */ #define B_STATICCONTENT 0x00008000 /* Buffer is likely to remain unaltered */ @@ -1100,6 +1100,24 @@ void bufattr_setcpoff(bufattr_t, uint64_t); */ int bufattr_rawencrypted(bufattr_t bap); +/*! + @function bufattr_markgreedymode + @abstract Mark a buffer to use the greedy mode for writing. + @param bap Buffer attributes to mark. + @discussion Greedy Mode: request improved write performance from the underlying device at the expense of storage effeciency + @return void. + */ + void bufattr_markgreedymode(bufattr_t bap); + +/*! + @function bufattr_greedymode + @abstract Check if a buffer is written using the Greedy Mode + @param bap Buffer attributes to test. + @discussion Greedy Mode: request improved write performance from the underlying device at the expense of storage effeciency + @return Nonzero if buffer uses greedy mode, 0 otherwise. + */ +int bufattr_greedymode(bufattr_t bap); + /*! @function bufattr_throttled @abstract Check if a buffer is throttled. @@ -1133,6 +1151,34 @@ int bufattr_meta(bufattr_t bap); */ int bufattr_delayidlesleep(bufattr_t bap); +/*! + @function buf_kernel_addrperm_addr + @abstract Obfuscate the buf pointers. + @param addr Buf_t pointer. + @return Obfuscated pointer if addr is non zero, 0 otherwise. + */ +vm_offset_t buf_kernel_addrperm_addr(void * addr); + +/*! + @function bufattr_markquickcomplete + @abstract Mark a buffer to hint quick completion to the driver. + @discussion This flag hints the storage driver that some thread is waiting for this I/O to complete. + It should therefore attempt to complete it as soon as possible at the cost of device efficiency. + @param bap Buffer attributes to mark. + @return void. + */ +void bufattr_markquickcomplete(bufattr_t bap); + +/*! + @function bufattr_quickcomplete + @abstract Check if a buffer is marked for quick completion + @discussion This flag hints the storage driver that some thread is waiting for this I/O to complete. + It should therefore attempt to complete it as soon as possible at the cost of device efficiency. + @param bap Buffer attribute to test. + @return Nonzero if the buffer is marked for quick completion, 0 otherwise. + */ +int bufattr_quickcomplete(bufattr_t bap); + #endif /* KERNEL_PRIVATE */ __END_DECLS diff --git a/bsd/sys/buf_internal.h b/bsd/sys/buf_internal.h index 016632623..6c698d044 100644 --- a/bsd/sys/buf_internal.h +++ b/bsd/sys/buf_internal.h @@ -145,6 +145,7 @@ struct buf { #endif }; +extern vm_offset_t buf_kernel_addrperm; /* cluster_io definitions for use with io bufs */ #define b_uploffset b_bufsize @@ -183,7 +184,7 @@ struct buf { * mask used by buf_flags... these are the readable external flags */ #define BUF_X_RDFLAGS (B_PHYS | B_RAW | B_LOCKED | B_ASYNC | B_READ | B_WRITE | B_PAGEIO |\ - B_META | B_CLUSTER | B_DELWRI | B_FUA | B_PASSIVE | B_IOSTREAMING | B_THROTTLED_IO |\ + B_META | B_CLUSTER | B_DELWRI | B_FUA | B_PASSIVE | B_IOSTREAMING |\ B_ENCRYPTED_IO | B_STATICCONTENT) /* * mask used by buf_clearflags/buf_setflags... these are the writable external flags @@ -191,6 +192,26 @@ struct buf { #define BUF_X_WRFLAGS (B_PHYS | B_RAW | B_LOCKED | B_ASYNC | B_READ | B_WRITE | B_PAGEIO |\ B_NOCACHE | B_FUA | B_PASSIVE | B_IOSTREAMING) +#if 0 +/* b_flags defined in buf.h */ +#define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */ +#define B_READ 0x00000001 /* Read buffer. */ +#define B_ASYNC 0x00000002 /* Start I/O, do not wait. */ +#define B_NOCACHE 0x00000004 /* Do not cache block after use. */ +#define B_DELWRI 0x00000008 /* Delay I/O until buffer reused. */ +#define B_LOCKED 0x00000010 /* Locked in core (not reusable). */ +#define B_PHYS 0x00000020 /* I/O to user memory. */ +#define B_CLUSTER 0x00000040 /* UPL based I/O generated by cluster layer */ +#define B_PAGEIO 0x00000080 /* Page in/out */ +#define B_META 0x00000100 /* buffer contains meta-data. */ +#define B_RAW 0x00000200 /* Set by physio for raw transfers. */ +#define B_FUA 0x00000400 /* Write-through disk cache(if supported) */ +#define B_PASSIVE 0x00000800 /* PASSIVE I/Os are ignored by THROTTLE I/O */ +#define B_IOSTREAMING 0x00001000 /* sequential access pattern detected */ +#define B_ENCRYPTED_IO 0x00004000 /* Encrypted I/O */ +#define B_STATICCONTENT 0x00008000 /* Buffer is likely to remain unaltered */ +#endif + /* * These flags are kept in b_flags... access is lockless * External flags are defined in buf.h and cannot overlap @@ -235,7 +256,13 @@ struct buf { #define BA_DELAYIDLESLEEP 0x00000004 /* Process is marked to delay idle sleep on disk IO */ #define BA_NOCACHE 0x00000008 #define BA_META 0x00000010 +#define BA_IO_TIER_MASK 0x00000f00 +#define BA_IO_TIER_SHIFT 8 +#define BA_GREEDY_MODE 0x00000020 /* High speed writes that consume more storage */ +#define BA_QUICK_COMPLETE 0x00000040 /* Request quick completion at expense of storage efficiency */ +#define GET_BUFATTR_IO_TIER(bap) ((bap->ba_flags & BA_IO_TIER_MASK) >> BA_IO_TIER_SHIFT) +#define SET_BUFATTR_IO_TIER(bap, tier) (bap->ba_flags |= ((tier << BA_IO_TIER_SHIFT) & BA_IO_TIER_MASK)) extern int niobuf_headers; /* The number of IO buffer headers for cluster IO */ extern int nbuf_headers; /* The number of buffer headers */ @@ -265,7 +292,7 @@ buf_t alloc_io_buf(vnode_t, int); void free_io_buf(buf_t); int allocbuf(struct buf *, int); -void bufinit(void) __attribute__((section("__TEXT, initcode"))); +void bufinit(void); /* * Flags for buf_acquire @@ -278,7 +305,7 @@ void bufinit(void) __attribute__((section("__TEXT, initcode"))); void buf_list_lock(void); void buf_list_unlock(void); -void cluster_init(void) __attribute__((section("__TEXT, initcode"))); +void cluster_init(void); void buf_drop(buf_t); errno_t buf_acquire(buf_t, int, int, int); diff --git a/bsd/sys/cdefs.h b/bsd/sys/cdefs.h index 2cbc7fef6..08363c1d4 100644 --- a/bsd/sys/cdefs.h +++ b/bsd/sys/cdefs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -146,11 +146,37 @@ #define __used __attribute__((used)) /* __deprecated causes the compiler to produce a warning when encountering - * code using the deprecated functionality. This may require turning on - * such wardning with the -Wdeprecated flag. + * code using the deprecated functionality. + * __deprecated_msg() does the same, and compilers that support it will print + * a message along with the deprecation warning. + * This may require turning on such warning with the -Wdeprecated flag. + * __deprecated_enum_msg() should be used on enums, and compilers that support + * it will print the deprecation warning. */ #define __deprecated __attribute__((deprecated)) +#ifdef __has_extension + #if __has_extension(attribute_deprecated_with_message) + #define __deprecated_msg(_msg) __attribute__((deprecated(_msg))) + #else + #define __deprecated_msg(_msg) __attribute__((deprecated)) + #endif +#elif defined(__GNUC__) && ((__GNUC__ >= 5) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5))) + #define __deprecated_msg(_msg) __attribute__((deprecated(_msg))) +#else + #define __deprecated_msg(_msg) __attribute__((deprecated)) +#endif + +#ifdef __has_extension + #if __has_extension(enumerator_attributes) + #define __deprecated_enum_msg(_msg) __deprecated_msg(_msg) + #else + #define __deprecated_enum_msg(_msg) + #endif +#else + #define __deprecated_enum_msg(_msg) +#endif + /* __unavailable causes the compiler to error out when encountering * code using the tagged function of variable. */ @@ -172,6 +198,44 @@ #define __restrict restrict #endif +/* Declaring inline functions within headers is error-prone due to differences + * across various versions of the C language and extensions. __header_inline + * can be used to declare inline functions within system headers. In cases + * where you want to force inlining instead of letting the compiler make + * the decision, you can use __header_always_inline. + * + * Be aware that using inline for functions which compilers may also provide + * builtins can behave differently under various compilers. If you intend to + * provide an inline version of such a function, you may want to use a macro + * instead. + * + * The check for !__GNUC__ || __clang__ is because gcc doesn't correctly + * support c99 inline in some cases: + * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55965 + */ + +#if __STDC_VERSION__ >= 199901L && (!defined(__GNUC__) || defined(__clang__)) +# define __header_inline inline +#elif defined(__GNUC__) && defined(__GNUC_STDC_INLINE__) +# define __header_inline extern __inline __attribute__((__gnu_inline__)) +#elif defined(__GNUC__) +# define __header_inline extern __inline +#else + /* If we land here, we've encountered an unsupported compiler, + * so hopefully it understands static __inline as a fallback. + */ +# define __header_inline static __inline +#endif + +#ifdef __GNUC__ +# define __header_always_inline __header_inline __attribute__ ((__always_inline__)) +#else + /* Unfortunately, we're using a compiler that we don't know how to force to + * inline. Oh well. + */ +# define __header_always_inline __header_inline +#endif + /* * Compiler-dependent macros to declare that functions take printf-like * or scanf-like arguments. They are null except for versions of gcc @@ -208,6 +272,17 @@ #define __FBSDID(s) #endif +#ifndef __DECONST +#define __DECONST(type, var) __CAST_AWAY_QUALIFIER(var, const, type) +#endif + +#ifndef __DEVOLATILE +#define __DEVOLATILE(type, var) __CAST_AWAY_QUALIFIER(var, volatile, type) +#endif + +#ifndef __DEQUALIFY +#define __DEQUALIFY(type, var) __CAST_AWAY_QUALIFIER(var, const volatile, type) +#endif /* * COMPILATION ENVIRONMENTS -- see compat(5) for additional detail @@ -530,6 +605,13 @@ #define __DARWIN_C_LEVEL __DARWIN_C_FULL #endif +/* If the developer has neither requested a strict language mode nor a version + * of POSIX, turn on functionality provided by __STDC_WANT_LIB_EXT1__ as part + * of __DARWIN_C_FULL. + */ +#if !defined(__STDC_WANT_LIB_EXT1__) && !defined(__STRICT_ANSI__) && __DARWIN_C_LEVEL >= __DARWIN_C_FULL +#define __STDC_WANT_LIB_EXT1__ 1 +#endif /* * long long is not supported in c89 (__STRICT_ANSI__), but g++ -ansi and @@ -596,4 +678,21 @@ #define __CAST_AWAY_QUALIFIER(variable, qualifier, type) (type) (long)(variable) #endif +/* + * __XNU_PRIVATE_EXTERN is a linkage decoration indicating that a symbol can be + * used from other compilation units, but not other libraries or executables. + */ +#ifndef __XNU_PRIVATE_EXTERN +#define __XNU_PRIVATE_EXTERN __attribute__((visibility("hidden"))) +#endif + +/* + * Architecture validation for current SDK + */ +#if !defined(__sys_cdefs_arch_unknown__) && defined(__i386__) +#elif !defined(__sys_cdefs_arch_unknown__) && defined(__x86_64__) +#else +#error Unsupported architecture +#endif + #endif /* !_CDEFS_H_ */ diff --git a/bsd/sys/codesign.h b/bsd/sys/codesign.h index 5894d982c..21e05670b 100644 --- a/bsd/sys/codesign.h +++ b/bsd/sys/codesign.h @@ -29,39 +29,186 @@ #ifndef _SYS_CODESIGN_H_ #define _SYS_CODESIGN_H_ -#include - /* code signing attributes of a process */ -#define CS_VALID 0x0001 /* dynamically valid */ -#define CS_HARD 0x0100 /* don't load invalid pages */ -#define CS_KILL 0x0200 /* kill process if it becomes invalid */ -#define CS_EXEC_SET_HARD 0x1000 /* set CS_HARD on any exec'ed process */ -#define CS_EXEC_SET_KILL 0x2000 /* set CS_KILL on any exec'ed process */ -#define CS_KILLED 0x10000 /* was killed by kernel for invalidity */ -#define CS_RESTRICT 0x20000 /* tell dyld to treat restricted */ +#define CS_VALID 0x0000001 /* dynamically valid */ +#define CS_ADHOC 0x0000002 /* ad hoc signed */ + +#define CS_HARD 0x0000100 /* don't load invalid pages */ +#define CS_KILL 0x0000200 /* kill process if it becomes invalid */ +#define CS_CHECK_EXPIRATION 0x0000400 /* force expiration checking */ +#define CS_RESTRICT 0x0000800 /* tell dyld to treat restricted */ +#define CS_ENFORCEMENT 0x0001000 /* require enforcement */ + +#define CS_ALLOWED_MACHO 0x00ffffe + +#define CS_EXEC_SET_HARD 0x0100000 /* set CS_HARD on any exec'ed process */ +#define CS_EXEC_SET_KILL 0x0200000 /* set CS_KILL on any exec'ed process */ +#define CS_EXEC_SET_ENFORCEMENT 0x0400000 /* set CS_ENFORCEMENT on any exec'ed process */ + +#define CS_KILLED 0x1000000 /* was killed by kernel for invalidity */ /* csops operations */ #define CS_OPS_STATUS 0 /* return status */ #define CS_OPS_MARKINVALID 1 /* invalidate process */ #define CS_OPS_MARKHARD 2 /* set HARD flag */ #define CS_OPS_MARKKILL 3 /* set KILL flag (sticky) */ -#define CS_OPS_PIDPATH 4 /* get executable's pathname */ +#ifdef KERNEL_PRIVATE +/* CS_OPS_PIDPATH 4 */ +#endif #define CS_OPS_CDHASH 5 /* get code directory hash */ #define CS_OPS_PIDOFFSET 6 /* get offset of active Mach-o slice */ #define CS_OPS_ENTITLEMENTS_BLOB 7 /* get entitlements blob */ #define CS_OPS_MARKRESTRICT 8 /* set RESTRICT flag (sticky) */ +#define CS_OPS_SET_STATUS 9 /* set codesign flags */ +#define CS_OPS_BLOB 10 /* get codesign blob */ +#define CS_OPS_IDENTITY 11 /* get codesign identity */ + +/* SigPUP */ +#define CS_OPS_SIGPUP_INSTALL 20 +#define CS_OPS_SIGPUP_DROP 21 +#define CS_OPS_SIGPUP_VALIDATE 22 + +struct sigpup_install_table { + uint64_t data; + uint64_t length; + uint64_t path; +}; + + +/* + * Magic numbers used by Code Signing + */ +enum { + CSMAGIC_REQUIREMENT = 0xfade0c00, /* single Requirement blob */ + CSMAGIC_REQUIREMENTS = 0xfade0c01, /* Requirements vector (internal requirements) */ + CSMAGIC_CODEDIRECTORY = 0xfade0c02, /* CodeDirectory blob */ + CSMAGIC_EMBEDDED_SIGNATURE = 0xfade0cc0, /* embedded form of signature data */ + CSMAGIC_EMBEDDED_SIGNATURE_OLD = 0xfade0b02, /* XXX */ + CSMAGIC_EMBEDDED_ENTITLEMENTS = 0xfade7171, /* embedded entitlements */ + CSMAGIC_DETACHED_SIGNATURE = 0xfade0cc1, /* multi-arch collection of embedded signatures */ + CSMAGIC_BLOBWRAPPER = 0xfade0b01, /* CMS Signature, among other things */ + + CS_SUPPORTSSCATTER = 0x20100, + + CSSLOT_CODEDIRECTORY = 0, /* slot index for CodeDirectory */ + CSSLOT_INFOSLOT = 1, + CSSLOT_REQUIREMENTS = 2, + CSSLOT_RESOURCEDIR = 3, + CSSLOT_APPLICATION = 4, + CSSLOT_ENTITLEMENTS = 5, + + CSSLOT_SIGNATURESLOT = 0x10000, /* CMS Signature */ + + CSTYPE_INDEX_REQUIREMENTS = 0x00000002, /* compat with amfi */ + CSTYPE_INDEX_ENTITLEMENTS = 0x00000005, /* compat with amfi */ + + CS_HASHTYPE_SHA1 = 1 +}; + + +#define KERNEL_HAVE_CS_CODEDIRECTORY 1 + +/* + * C form of a CodeDirectory. + */ +typedef struct __CodeDirectory { + uint32_t magic; /* magic number (CSMAGIC_CODEDIRECTORY) */ + uint32_t length; /* total length of CodeDirectory blob */ + uint32_t version; /* compatibility version */ + uint32_t flags; /* setup and mode flags */ + uint32_t hashOffset; /* offset of hash slot element at index zero */ + uint32_t identOffset; /* offset of identifier string */ + uint32_t nSpecialSlots; /* number of special hash slots */ + uint32_t nCodeSlots; /* number of ordinary (code) hash slots */ + uint32_t codeLimit; /* limit to main image signature range */ + uint8_t hashSize; /* size of each hash in bytes */ + uint8_t hashType; /* type of hash (cdHashType* constants) */ + uint8_t spare1; /* unused (must be zero) */ + uint8_t pageSize; /* log2(page size in bytes); 0 => infinite */ + uint32_t spare2; /* unused (must be zero) */ + /* Version 0x20100 */ + uint32_t scatterOffset; /* offset of optional scatter vector */ + /* followed by dynamic content as located by offset fields above */ +} CS_CodeDirectory; + +/* + * Structure of an embedded-signature SuperBlob + */ + +typedef struct __BlobIndex { + uint32_t type; /* type of entry */ + uint32_t offset; /* offset of entry */ +} CS_BlobIndex; + +typedef struct __SC_SuperBlob { + uint32_t magic; /* magic number */ + uint32_t length; /* total length of SuperBlob */ + uint32_t count; /* number of index entries following */ + CS_BlobIndex index[]; /* (count) entries */ + /* followed by Blobs in no particular order as indicated by offsets in index */ +} CS_SuperBlob; + +typedef struct __SC_GenericBlob { + uint32_t magic; /* magic number */ + uint32_t length; /* total length of blob */ + char data[]; +} CS_GenericBlob; + +typedef struct __SC_Scatter { + uint32_t count; // number of pages; zero for sentinel (only) + uint32_t base; // first page number + uint64_t targetOffset; // offset in target + uint64_t spare; // reserved +} SC_Scatter; + #ifndef KERNEL -__BEGIN_DECLS +#include +__BEGIN_DECLS /* code sign operations */ int csops(pid_t pid, unsigned int ops, void * useraddr, size_t usersize); -/* works only with CS_OPS_PIDPATH and CS_OPS_ENTITLEMENTS_BLOB */ int csops_audittoken(pid_t pid, unsigned int ops, void * useraddr, size_t usersize, audit_token_t * token); +__END_DECLS + +#else /* !KERNEL */ + +#include + +struct vnode; + +struct cscsr_functions { + int csr_version; +#define CSCSR_VERSION 1 + int (*csr_validate_header)(const uint8_t *, size_t); + const void* (*csr_find_file_codedirectory)(struct vnode *, const uint8_t *, size_t, size_t *); +}; + +__BEGIN_DECLS +int cs_enforcement(struct proc *); +int cs_entitlements_blob_get(struct proc *, void **out_start, size_t *out_length); +uint8_t * cs_get_cdhash(struct proc *); +void cs_register_cscsr(struct cscsr_functions *); __END_DECLS -#endif /* ! KERNEL */ +#ifdef XNU_KERNEL_PRIVATE + +void cs_init(void); +int cs_allow_invalid(struct proc *); +int cs_invalid_page(addr64_t); +int sigpup_install(user_addr_t); +int sigpup_drop(void); + +extern int cs_debug; +extern int cs_validation; +#if !SECURE_KERNEL +extern int cs_enforcement_panic; +#endif + +#endif /* XNU_KERNEL_PRIVATE */ + +#endif /* KERNEL */ #endif /* _SYS_CODESIGN_H_ */ diff --git a/bsd/sys/conf.h b/bsd/sys/conf.h index bd6e518b0..2d7dd1d33 100644 --- a/bsd/sys/conf.h +++ b/bsd/sys/conf.h @@ -93,12 +93,6 @@ struct vnode; #define D_TTY 3 #ifdef KERNEL -/* - * Flags for d_type (squeezed into the top half of d_type). - */ -#define D_TYPEMASK 0xffff -#define D_TRACKCLOSE 0x00010000 /* track all closes */ - /* * Device switch function types. */ diff --git a/bsd/sys/content_protection.h b/bsd/sys/content_protection.h index a4066e184..d35884f1b 100644 --- a/bsd/sys/content_protection.h +++ b/bsd/sys/content_protection.h @@ -35,6 +35,13 @@ * Protection classes vary in their restrictions on read/writability. A is generally * the strictest, and D is effectively no restriction. */ + +/* + * dir_none forces new items created in the directory to pick up the mount point default + * protection level. it is only allowed for directories. + */ +#define PROTECTION_CLASS_DIR_NONE 0 + #define PROTECTION_CLASS_A 1 #define PROTECTION_CLASS_B 2 #define PROTECTION_CLASS_C 3 diff --git a/bsd/sys/cprotect.h b/bsd/sys/cprotect.h index eb0a134fd..9a7c36d77 100644 --- a/bsd/sys/cprotect.h +++ b/bsd/sys/cprotect.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2009 Apple Inc. All rights reserved. + * Copyright (c) 2009-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ @@ -40,38 +40,43 @@ extern "C" { #include #include -#define CP_IV_KEYSIZE 20 /* 16x8 = 128, but SHA1 pushes 20 bytes so keep space for that */ -#define CP_MAX_KEYSIZE 32 /* 8x4 = 32, 32x8 = 256 */ -#define CP_MAX_WRAPPEDKEYSIZE 128 /* The size of the largest allowed key */ +#define CP_IV_KEYSIZE 20 /* 16x8 = 128, but SHA1 pushes 20 bytes so keep space for that */ +#define CP_MAX_KEYSIZE 32 /* 8x4 = 32, 32x8 = 256 */ +#define CP_MAX_WRAPPEDKEYSIZE 128 /* The size of the largest allowed key */ #define CP_INITIAL_WRAPPEDKEYSIZE 40 -#define CP_V2_WRAPPEDKEYSIZE 40 /* Size of the wrapped key in a v2 EA */ +#define CP_V2_WRAPPEDKEYSIZE 40 /* Size of the wrapped key in a v2 EA */ +#define CP_V4_RESERVEDBYTES 20 /* Number of reserved bytes in EA still present */ /* lock events from AppleKeyStore */ -#define CP_LOCKED_STATE 0 /* Device is locked */ -#define CP_UNLOCKED_STATE 1 /* Device is unlocked */ +#define CP_LOCKED_STATE 0 /* Device is locked */ +#define CP_UNLOCKED_STATE 1 /* Device is unlocked */ -#define CP_LOCKED_KEYCHAIN 0 -#define CP_UNLOCKED_KEYCHAIN 1 +#define CP_MAX_STATE 1 /* uint8_t ; maximum # of states is 255 */ -/* For struct cprotect: cp_flags */ -#define CP_NEEDS_KEYS 0x1 /* File needs persistent keys */ -#define CP_KEY_FLUSHED 0x2 /* File's unwrapped key has been purged from memory */ -#define CP_NO_XATTR 0x4 /* Key info has not been saved as EA to the FS */ -#define CP_OFF_IV_ENABLED 0x8 /* Only go down relative IV route if this flag is set */ +#define CP_LOCKED_KEYCHAIN 0 +#define CP_UNLOCKED_KEYCHAIN 1 -#define CP_RELOCATION_INFLIGHT 0x10 /* File with offset IVs is in the process of being relocated. */ +/* For struct cprotect: cp_flags */ +#define CP_NEEDS_KEYS 0x01 /* File needs persistent keys */ +#define CP_KEY_FLUSHED 0x02 /* File's unwrapped key has been purged from memory */ +#define CP_NO_XATTR 0x04 /* Key info has not been saved as EA to the FS */ +#define CP_OFF_IV_ENABLED 0x08 /* Only go down relative IV route if this flag is set */ +#define CP_RELOCATION_INFLIGHT 0x10 /* File with offset IVs is in the process of being relocated. */ +#define CP_SEP_WRAPPEDKEY 0x20 /* Wrapped key delivered from keybag */ /* Content Protection VNOP Operation flags */ -#define CP_READ_ACCESS 0x1 -#define CP_WRITE_ACCESS 0x2 +#define CP_READ_ACCESS 0x1 +#define CP_WRITE_ACCESS 0x2 -/* +/* * Check for this version when deciding to enable features + * For iOS 4, CP_CURRENT_MAJOR_VERS = 2.0 + * For iOS 5, CP_CURRENT_MAJOR_VERS = 4.0 */ #define CONTENT_PROTECTION_XATTR_NAME "com.apple.system.cprotect" -#define CP_NEW_MAJOR_VERS 4 -#define CP_PREV_MAJOR_VERS 2 -#define CP_MINOR_VERS 0 +#define CP_NEW_MAJOR_VERS 4 +#define CP_PREV_MAJOR_VERS 2 +#define CP_MINOR_VERS 0 typedef struct cprotect *cprotect_t; typedef struct cp_wrap_func *cp_wrap_func_t; @@ -82,13 +87,42 @@ typedef struct cnode * cnode_ptr_t; //forward declare the struct. struct hfsmount; -/* The wrappers are invoked by the AKS kext */ -typedef int wrapper_t(uint32_t properties, uint64_t file_id, void *key_bytes, size_t key_length, void *wrapped_data, size_t *wrapped_length); -typedef int unwrapper_t(uint32_t properties, void *wrapped_data, size_t wrapped_data_length, void *key_bytes, size_t *key_length); +/* Structures passed between HFS and AKS kext */ +typedef struct { + void *key; + unsigned key_len; + void *iv_key; + unsigned iv_key_len; + uint32_t flags; +} cp_raw_key_s, *cp_raw_key_t; + +typedef struct { + void *key; + unsigned key_len; + uint32_t dp_class; +} cp_wrapped_key_s, *cp_wrapped_key_t; + +typedef struct { + ino64_t inode; + uint32_t volume; + pid_t pid; + uid_t uid; +} cp_cred_s, *cp_cred_t; -/* - * Runtime-only structure containing the content protection status - * for the given file. This is contained within the cnode +/* The wrappers are invoked on the AKS kext */ +typedef int unwrapper_t(cp_cred_t access, const cp_wrapped_key_t wrapped_key_in, cp_raw_key_t key_out); +typedef int rewrapper_t(cp_cred_t access, uint32_t dp_class, const cp_wrapped_key_t wrapped_key_in, cp_wrapped_key_t wrapped_key_out); +typedef int new_key_t(cp_cred_t access, uint32_t dp_class, cp_raw_key_t key_out, cp_wrapped_key_t wrapped_key_out); +typedef int invalidater_t(cp_cred_t access); /* invalidates keys */ + + +/* Flags for Interaction between AKS / Kernel */ +#define CP_RAW_KEY_WRAPPEDKEY 0x00000001 + + +/* + * Runtime-only structure containing the content protection status + * for the given file. This is contained within the cnode * This is passed down to IOStorageFamily via the bufattr struct * ****************************************************** @@ -108,12 +142,15 @@ struct cprotect { uint32_t cp_cache_key_len; uint8_t cp_cache_key[CP_MAX_KEYSIZE]; uint32_t cp_persistent_key_len; + void* cp_backing_cnode; uint8_t cp_persistent_key[]; }; struct cp_wrap_func { - wrapper_t *wrapper; - unwrapper_t *unwrapper; + new_key_t *new_key; + unwrapper_t *unwrapper; + rewrapper_t *rewrapper; + invalidater_t *invalidater; }; struct cp_global_state { @@ -123,10 +160,13 @@ struct cp_global_state { }; /* - * On-disk structure written as the per-file EA payload + * On-disk structure written as the per-file EA payload * All on-disk multi-byte fields for the CP XATTR must be stored * little-endian on-disk. This means they must be endian swapped to - * L.E on getxattr() and converted to LE on setxattr(). + * L.E on getxattr() and converted to LE on setxattr(). + * + * This structure is a fixed length and is tightly packed. + * 56 bytes total. */ struct cp_xattr_v2 { u_int16_t xattr_major_version; @@ -135,7 +175,26 @@ struct cp_xattr_v2 { u_int32_t persistent_class; u_int32_t key_size; uint8_t persistent_key[CP_V2_WRAPPEDKEYSIZE]; -}; +} __attribute__((aligned(2), packed)); + + +/* + * V4 Content Protection EA On-Disk Layout. + * + * This structure must be tightly packed, but the *size can vary* + * depending on the length of the key. At MOST, the key length will be + * CP_MAX_WRAPPEDKEYSIZE, but the length is defined by the key_size field. + * + * Either way, the packing must be applied to ensure that the key data is + * retrievable in the right location relative to the start of the struct. + * + * Fully packed, this structure can range from : + * MIN: 36 bytes (no key -- used with directories) + * MAX: 164 bytes (with 128 byte key) + * + * During runtime we always allocate with the full 128 byte key, but only + * use as much of the key buffer as needed. It must be tightly packed, though. + */ struct cp_xattr_v4 { u_int16_t xattr_major_version; @@ -143,28 +202,32 @@ struct cp_xattr_v4 { u_int32_t flags; u_int32_t persistent_class; u_int32_t key_size; - u_int32_t reserved1; - u_int32_t reserved2; - u_int32_t reserved3; - u_int32_t reserved4; - u_int32_t reserved5; + /* CP V4 Reserved Bytes == 20 */ + u_int8_t reserved[CP_V4_RESERVEDBYTES]; + /* All above fields are fixed regardless of key length (36 bytes) */ + /* Max Wrapped Size == 128 */ uint8_t persistent_key[CP_MAX_WRAPPEDKEYSIZE]; -}; +} __attribute__((aligned(2), packed)); -/* Same is true for the root EA, all fields must be written little endian. */ -struct cp_root_xattr { - u_int16_t major_version; - u_int16_t minor_version; - u_int64_t flags; - u_int32_t reserved1; - u_int32_t reserved2; - u_int32_t reserved3; - u_int32_t reserved4; -}; + +/* + * The Root Directory's EA (fileid 1) is special; it defines information about + * what capabilities the filesystem is using. + * + * The data is still stored little endian. + * + * Note that this structure is tightly packed: 28 bytes total. + */ + struct cp_root_xattr { + u_int16_t major_version; + u_int16_t minor_version; + u_int64_t flags; + u_int8_t reserved[16]; +} __attribute__((aligned(2), packed)); -/* - * Functions to check the status of a CP and to query +/* + * Functions to check the status of a CP and to query * the containing filesystem to see if it is supported. */ int cp_vnode_getclass(vnode_t, int *); @@ -175,25 +238,24 @@ int cp_key_store_action(int); int cp_register_wraps(cp_wrap_func_t); int cp_entry_init(cnode_ptr_t, struct mount *); -int cp_entry_create_keys(struct cprotect **entry_ptr, struct cnode *dcp, struct hfsmount *hfsmp, - uint32_t input_class, uint32_t fileid, mode_t cmode); int cp_entry_gentempkeys(struct cprotect **entry_ptr, struct hfsmount *hfsmp); -void cp_entry_destroy(struct cprotect **entry_ptr); - +int cp_needs_tempkeys (struct hfsmount *hfsmp, int* needs); +void cp_entry_destroy(struct cprotect *entry_ptr); +void cp_replace_entry (struct cnode *cp, struct cprotect *newentry); cnode_ptr_t cp_get_protected_cnode(vnode_t); int cp_handle_vnop(vnode_t, int, int); int cp_fs_protected (mount_t); int cp_getrootxattr (struct hfsmount *hfsmp, struct cp_root_xattr *outxattr); int cp_setrootxattr (struct hfsmount *hfsmp, struct cp_root_xattr *newxattr); int cp_setxattr(struct cnode *cp, struct cprotect *entry, struct hfsmount *hfsmp, uint32_t fileid, int options); -int cp_update_mkb (struct cprotect *entry, uint32_t fileid); +int cp_generate_keys (struct hfsmount *hfsmp, struct cnode *cp, int targetclass, struct cprotect **newentry); +int cp_setup_newentry (struct hfsmount *hfsmp, struct cnode *dcp, int32_t suppliedclass, + mode_t cmode, struct cprotect **tmpentry); int cp_handle_relocate (cnode_ptr_t cp, struct hfsmount *hfsmp); int cp_handle_open(struct vnode *vp, int mode); int cp_get_root_major_vers (struct vnode *vp, uint32_t *level); - -#if 0 -int cp_isdevice_locked (void); -#endif +int cp_get_default_level (struct vnode *vp, uint32_t *level); +int cp_is_valid_class (int isdir, int32_t protectionclass); #endif /* KERNEL_PRIVATE */ diff --git a/bsd/sys/dirent.h b/bsd/sys/dirent.h index a559b8b37..57df2ad9e 100644 --- a/bsd/sys/dirent.h +++ b/bsd/sys/dirent.h @@ -78,10 +78,7 @@ #include #include -#ifndef _INO_T -typedef __darwin_ino_t ino_t; /* inode number */ -#define _INO_T -#endif +#include #define __DARWIN_MAXNAMLEN 255 diff --git a/bsd/sys/disk.h b/bsd/sys/disk.h index ba454ab69..0e136a413 100644 --- a/bsd/sys/disk.h +++ b/bsd/sys/disk.h @@ -32,6 +32,10 @@ #include #include +#ifdef XNU_KERNEL_PRIVATE +#include +#endif /* XNU_KERNEL_PRIVATE */ + /* * Definitions * @@ -172,6 +176,21 @@ typedef struct #define DKIOCLOCKPHYSICALEXTENTS _IO('d', 81) #define DKIOCGETPHYSICALEXTENT _IOWR('d', 82, dk_physical_extent_t) #define DKIOCUNLOCKPHYSICALEXTENTS _IO('d', 83) +#define DKIOCGETMAXPRIORITYCOUNT _IOR('d', 84, uint32_t) + +#ifdef XNU_KERNEL_PRIVATE +typedef struct +{ + boolean_t mi_mdev; /* Is this a memdev device? */ + boolean_t mi_phys; /* Physical memory? */ + uint32_t mi_base; /* Base page number of the device? */ + uint64_t mi_size; /* Size of the device (in ) */ +} dk_memdev_info_t; + +typedef dk_memdev_info_t memdev_info_t; + +#define DKIOCGETMEMDEVINFO _IOR('d', 90, dk_memdev_info_t) +#endif /* XNU_KERNEL_PRIVATE */ #ifdef PRIVATE typedef struct _dk_cs_pin { dk_extent_t cp_extent; @@ -181,6 +200,13 @@ typedef struct _dk_cs_pin { #define _DKIOCCSPINEXTENT _IOW('d', 199, _dk_cs_pin_t) #define _DKIOCCSUNPINEXTENT _IOW('d', 200, _dk_cs_pin_t) #define _DKIOCGETMIGRATIONUNITBYTESIZE _IOR('d', 201, uint32_t) +typedef struct _dk_cs_map { + dk_extent_t cm_extent; + uint64_t cm_bytes_mapped; +} _dk_cs_map_t; +#define _DKIOCCSMAP _IOWR('d', 202, _dk_cs_map_t) +#define _DKIOCCSSETFSVNODE _IOW('d', 203, vnode_t) +#define _DKIOCCSGETFREEBYTES _IOR('d', 204, uint64_t) #endif /* PRIVATE */ #endif /* KERNEL */ diff --git a/bsd/sys/disklabel.h b/bsd/sys/disklabel.h index a56c354b7..61c14411d 100644 --- a/bsd/sys/disklabel.h +++ b/bsd/sys/disklabel.h @@ -225,7 +225,7 @@ struct disklabel { #define DTYPE_FLOPPY 10 /* floppy */ #ifdef DKTYPENAMES -static char *dktypenames[] = { +static const char *dktypenames[] = { "unknown", "SMD", "MSCP", @@ -265,7 +265,7 @@ static char *dktypenames[] = { #define FS_HFS 15 /* Macintosh HFS */ #ifdef DKTYPENAMES -static char *fstypenames[] = { +static const char *fstypenames[] = { "unused", "swap", "Version 6", diff --git a/bsd/sys/domain.h b/bsd/sys/domain.h index 9f6bd965f..3fe847cdf 100644 --- a/bsd/sys/domain.h +++ b/bsd/sys/domain.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2006, 2012 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */ @@ -67,65 +67,128 @@ #define _SYS_DOMAIN_H_ #ifdef PRIVATE - #include -#ifdef KERNEL +#include +#ifdef KERNEL_PRIVATE #include -#endif /* KERNEL */ /* * Structure per communications domain. */ -#include - /* * Forward structure declarations for function prototypes [sic]. */ struct mbuf; -#define DOM_REENTRANT 0x01 #pragma pack(4) -struct domain { +#ifdef XNU_KERNEL_PRIVATE +/* + * Legacy domain. + * + * NOTE: Do not modify this structure, as there are modules outside of xnu + * which rely on the size and layout for binary compatibility. This structure + * is simply used by the exported net_{add,del}_domain_old, pffinddomain_old + * routines. Internally, domains are stored in the private variant of domain + * defined down below. + */ +struct domain_old { +#else +struct domain { +#endif /* !XNU_KERNEL_PRIVATE */ int dom_family; /* AF_xxx */ - const char *dom_name; + const char *dom_name; void (*dom_init)(void); /* initialize domain data structures */ - int (*dom_externalize)(struct mbuf *); - /* externalize access rights */ - void (*dom_dispose)(struct mbuf *); - /* dispose of internalized rights */ + int (*dom_externalize) /* externalize access rights */ + (struct mbuf *); + void (*dom_dispose) /* dispose of internalized rights */ + (struct mbuf *); +#ifdef XNU_KERNEL_PRIVATE + struct protosw_old *dom_protosw; /* Chain of protosw's for AF */ + struct domain_old *dom_next; +#else struct protosw *dom_protosw; /* Chain of protosw's for AF */ struct domain *dom_next; - int (*dom_rtattach)(void **, int); - /* initialize routing table */ +#endif /* !XNU_KERNEL_PRIVATE */ + int (*dom_rtattach) /* initialize routing table */ + (void **, int); int dom_rtoffset; /* an arg to rtattach, in bits */ int dom_maxrtkey; /* for routing layer */ - int dom_protohdrlen; /* Let the protocol tell us */ + int dom_protohdrlen; /* len of protocol header */ int dom_refs; /* # socreates outstanding */ -#ifdef _KERN_LOCKS_H_ - lck_mtx_t *dom_mtx; /* domain global mutex */ -#else - void *dom_mtx; /* domain global mutex */ -#endif - uint32_t dom_flags; - uint32_t reserved[2]; + lck_mtx_t *dom_mtx; /* domain global mutex */ + uint32_t dom_flags; + uint32_t reserved[2]; }; #pragma pack() -#ifdef KERNEL -extern struct domain *domains; -extern struct domain localdomain; +#ifdef XNU_KERNEL_PRIVATE +/* + * Internal, private and extendable representation of domain. + */ +struct domain { + int dom_family; /* AF_xxx */ + uint32_t dom_flags; /* domain flags (see below ) */ + uint32_t dom_refs; /* # socreates outstanding */ + lck_mtx_t *dom_mtx; /* domain global mutex */ + decl_lck_mtx_data(, dom_mtx_s); + TAILQ_ENTRY(domain) dom_entry; /* next domain in list */ + TAILQ_HEAD(, protosw) dom_protosw; /* protosw chain */ + void (*dom_init) /* initialize domain data structures */ + (struct domain *); + int (*dom_externalize) /* externalize access rights */ + (struct mbuf *); + void (*dom_dispose) /* dispose of internalized rights */ + (struct mbuf *); + int (*dom_rtattach) /* initialize routing table */ + (void **, int); + int dom_rtoffset; /* an arg to rtattach, in bits */ + int dom_maxrtkey; /* for routing layer */ + int dom_protohdrlen; /* len of protocol header */ + const char *dom_name; + struct domain_old *dom_old; /* domain pointer per net_add_domain */ +}; -__BEGIN_DECLS -extern void net_add_domain(struct domain *dp); -extern int net_del_domain(struct domain *); -extern int domain_proto_mtx_lock(void); -extern void domain_proto_mtx_unlock(int locked); -__END_DECLS +extern TAILQ_HEAD(domains_head, domain) domains; +extern struct domain *localdomain; +#endif /* XNU_KERNEL_PRIVATE */ + +/* + * Values for dom_flags + */ +#define DOM_REENTRANT 0x1 +#ifdef BSD_KERNEL_PRIVATE +#define DOM_INITIALIZED 0x2 /* domain has been initialized */ +#define DOM_ATTACHED 0x4 /* domain is in the global list */ +#define DOM_OLD 0x10000000 /* domain added via net_add_domain */ -#define DOMAIN_SET(domain_set) +/* pseudo-public domain flags */ +#define DOMF_USERFLAGS (DOM_REENTRANT) +#endif /* BSD_KERNEL_PRIVATE */ -#endif /* KERNEL */ +__BEGIN_DECLS +#ifdef XNU_KERNEL_PRIVATE +extern void net_add_domain_old(struct domain_old *dp); +extern int net_del_domain_old(struct domain_old *); +extern void net_drain_domains(void); +extern void domain_proto_mtx_lock_assert_held(void); +extern void domain_proto_mtx_lock_assert_notheld(void); +struct domain_guard; +typedef struct domain_guard *domain_guard_t; +extern domain_guard_t domain_guard_deploy(void); +extern void domain_guard_release(domain_guard_t); +struct domain_unguard; +typedef struct domain_unguard *domain_unguard_t; +extern domain_unguard_t domain_unguard_deploy(void); +extern void domain_unguard_release(domain_unguard_t); +extern struct domain_old *pffinddomain_old(int); +#else +extern void net_add_domain(struct domain *dp); +extern int net_del_domain(struct domain *); +#endif /* XNU_KERNEL_PRIVATE */ +extern struct domain *pffinddomain(int); +__END_DECLS +#endif /* KERNEL_PRIVATE */ #endif /* PRIVATE */ #endif /* _SYS_DOMAIN_H_ */ diff --git a/bsd/sys/dtrace.h b/bsd/sys/dtrace.h index cffaefdef..5267d5019 100644 --- a/bsd/sys/dtrace.h +++ b/bsd/sys/dtrace.h @@ -19,6 +19,10 @@ * CDDL HEADER END */ +/* + * Portions copyright (c) 2011, Joyent, Inc. All rights reserved. + */ + /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -571,6 +575,7 @@ typedef struct dtrace_difv { #define DTRACEAGG_STDDEV (DTRACEACT_AGGREGATION + 6) #define DTRACEAGG_QUANTIZE (DTRACEACT_AGGREGATION + 7) #define DTRACEAGG_LQUANTIZE (DTRACEACT_AGGREGATION + 8) +#define DTRACEAGG_LLQUANTIZE (DTRACEACT_AGGREGATION + 9) #define DTRACEACT_ISAGG(x) \ (DTRACEACT_CLASS(x) == DTRACEACT_AGGREGATION) @@ -612,6 +617,31 @@ typedef struct dtrace_difv { (int32_t)(((x) & DTRACE_LQUANTIZE_BASEMASK) >> \ DTRACE_LQUANTIZE_BASESHIFT) +#define DTRACE_LLQUANTIZE_FACTORSHIFT 48 +#define DTRACE_LLQUANTIZE_FACTORMASK ((uint64_t)UINT16_MAX << 48) +#define DTRACE_LLQUANTIZE_LOWSHIFT 32 +#define DTRACE_LLQUANTIZE_LOWMASK ((uint64_t)UINT16_MAX << 32) +#define DTRACE_LLQUANTIZE_HIGHSHIFT 16 +#define DTRACE_LLQUANTIZE_HIGHMASK ((uint64_t)UINT16_MAX << 16) +#define DTRACE_LLQUANTIZE_NSTEPSHIFT 0 +#define DTRACE_LLQUANTIZE_NSTEPMASK UINT16_MAX + +#define DTRACE_LLQUANTIZE_FACTOR(x) \ + (uint16_t)(((x) & DTRACE_LLQUANTIZE_FACTORMASK) >> \ + DTRACE_LLQUANTIZE_FACTORSHIFT) + +#define DTRACE_LLQUANTIZE_LOW(x) \ + (uint16_t)(((x) & DTRACE_LLQUANTIZE_LOWMASK) >> \ + DTRACE_LLQUANTIZE_LOWSHIFT) + +#define DTRACE_LLQUANTIZE_HIGH(x) \ + (uint16_t)(((x) & DTRACE_LLQUANTIZE_HIGHMASK) >> \ + DTRACE_LLQUANTIZE_HIGHSHIFT) + +#define DTRACE_LLQUANTIZE_NSTEPS(x) \ + (uint16_t)(((x) & DTRACE_LLQUANTIZE_NSTEPMASK) >> \ + DTRACE_LLQUANTIZE_NSTEPSHIFT) + #define DTRACE_USTACK_NFRAMES(x) (uint32_t)((x) & UINT32_MAX) #define DTRACE_USTACK_STRSIZE(x) (uint32_t)((x) >> 32) #define DTRACE_USTACK_ARG(x, y) \ diff --git a/bsd/sys/dtrace_glue.h b/bsd/sys/dtrace_glue.h index 6b3665b02..71f688919 100644 --- a/bsd/sys/dtrace_glue.h +++ b/bsd/sys/dtrace_glue.h @@ -135,12 +135,13 @@ typedef struct cpu_core { extern cpu_core_t *cpu_core; +extern unsigned int dtrace_max_cpus; /* max number of enabled cpus */ +#define NCPU dtrace_max_cpus extern int cpu_number(void); /* From #include . Called from probe context, must blacklist. */ #define CPU (&(cpu_list[cpu_number()])) /* Pointer to current CPU */ #define CPU_ON_INTR(cpup) ml_at_interrupt_context() /* always invoked on current cpu */ -#define NCPU real_ncpus /* * Routines used to register interest in cpu's being added to or removed @@ -497,15 +498,15 @@ extern void vmem_free(vmem_t *vmp, void *vaddr, size_t size); * Atomic */ -static inline void atomic_add_32( uint32_t *theValue, int32_t theAmount ) +static inline void atomic_add_32( uint32_t *theAddress, int32_t theAmount ) { - (void)OSAddAtomic( theAmount, theValue ); + (void)OSAddAtomic( theAmount, theAddress ); } #if defined(__i386__) || defined(__x86_64__) -static inline void atomic_add_64( uint64_t *theValue, int64_t theAmount ) +static inline void atomic_add_64( uint64_t *theAddress, int64_t theAmount ) { - (void)OSAddAtomic64( theAmount, (SInt64 *)theValue ); + (void)OSAddAtomic64( theAmount, (SInt64 *)theAddress ); } #endif diff --git a/bsd/sys/dtrace_impl.h b/bsd/sys/dtrace_impl.h index 38614c300..2b02707c7 100644 --- a/bsd/sys/dtrace_impl.h +++ b/bsd/sys/dtrace_impl.h @@ -1215,6 +1215,8 @@ struct dtrace_provider { void *dtpv_arg; /* provider argument */ uint_t dtpv_defunct; /* boolean: defunct provider */ struct dtrace_provider *dtpv_next; /* next provider */ + uint64_t probe_count; /* no. of associated probes */ + uint64_t ecb_count; /* no. of associated enabled ECBs */ }; struct dtrace_meta { diff --git a/bsd/sys/errno.h b/bsd/sys/errno.h index 8105a42b3..522fec1f8 100644 --- a/bsd/sys/errno.h +++ b/bsd/sys/errno.h @@ -72,6 +72,11 @@ #include #if !defined(KERNEL) && !defined(KERNEL_PRIVATE) + +#if defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 +#include +#endif + __BEGIN_DECLS extern int * __error(void); #define errno (*__error()) @@ -269,6 +274,7 @@ __END_DECLS #define ERECYCLE (-5) /* restart lookup under heavy vnode pressure/recycling */ #define EREDRIVEOPEN (-6) #define EKEEPLOOKING (-7) +#define ERESERVEDNAME (-8) /* path is known but not usable */ /* used for cvwait error returns to Libc */ #define ECVCERORR 256 #define ECVPERORR 512 diff --git a/bsd/sys/event.h b/bsd/sys/event.h index 47fb6dafe..6e834d594 100644 --- a/bsd/sys/event.h +++ b/bsd/sys/event.h @@ -75,9 +75,10 @@ #ifdef PRIVATE #define EVFILT_SOCK (-13) /* Socket events */ +#define EVFILT_MEMORYSTATUS (-14) /* Memorystatus events */ #endif /* PRIVATE */ -#define EVFILT_SYSCOUNT 13 +#define EVFILT_SYSCOUNT 14 #define EVFILT_THREADMARKER EVFILT_SYSCOUNT /* Internal use only */ #pragma pack(4) @@ -237,34 +238,55 @@ struct kevent64_s { * that hangs off the proc structure. They also both play games with the hint * passed to KNOTE(). If NOTE_SIGNAL is passed as a hint, then the lower bits * of the hint contain the signal. IF NOTE_FORK is passed, then the lower bits - * contain the PID of the child. + * contain the PID of the child (but the pid does not get passed through in + * the actual kevent). */ -#define NOTE_EXIT 0x80000000 /* process exited */ -#define NOTE_FORK 0x40000000 /* process forked */ -#define NOTE_EXEC 0x20000000 /* process exec'd */ -#define NOTE_REAP 0x10000000 /* process reaped */ -#define NOTE_SIGNAL 0x08000000 /* shared with EVFILT_SIGNAL */ -#define NOTE_EXITSTATUS 0x04000000 /* exit status to be returned, valid for child process only */ -#define NOTE_RESOURCEEND 0x02000000 /* resource limit reached, resource type returned */ - -#if CONFIG_EMBEDDED -/* 0x01000000 is reserved for future use */ - -/* App states notification */ -#define NOTE_APPACTIVE 0x00800000 /* app went to active state */ -#define NOTE_APPBACKGROUND 0x00400000 /* app went to background */ -#define NOTE_APPNONUI 0x00200000 /* app went to active with no UI */ -#define NOTE_APPINACTIVE 0x00100000 /* app went to inactive state */ -#define NOTE_APPALLSTATES 0x00f00000 -#endif /* CONFIG_EMBEDDED */ - -#define NOTE_PDATAMASK 0x000fffff /* mask for pid/signal */ +enum { + eNoteReapDeprecated __deprecated_enum_msg("This kqueue(2) EVFILT_PROC flag is deprecated") = 0x10000000 +}; + +#define NOTE_EXIT 0x80000000 /* process exited */ +#define NOTE_FORK 0x40000000 /* process forked */ +#define NOTE_EXEC 0x20000000 /* process exec'd */ +#define NOTE_REAP ((unsigned int)eNoteReapDeprecated /* 0x10000000 */) /* process reaped */ +#define NOTE_SIGNAL 0x08000000 /* shared with EVFILT_SIGNAL */ +#define NOTE_EXITSTATUS 0x04000000 /* exit status to be returned, valid for child process only */ +#define NOTE_EXIT_DETAIL 0x02000000 /* provide details on reasons for exit */ + + +#define NOTE_PDATAMASK 0x000fffff /* mask for signal & exit status */ #define NOTE_PCTRLMASK (~NOTE_PDATAMASK) /* * If NOTE_EXITSTATUS is present, provide additional info about exiting process. */ -#define NOTE_EXIT_REPARENTED 0x00080000 /* exited while reparented */ +enum { + eNoteExitReparentedDeprecated __deprecated_enum_msg("This kqueue(2) EVFILT_PROC flag is no longer sent") = 0x00080000 +}; +#define NOTE_EXIT_REPARENTED ((unsigned int)eNoteExitReparentedDeprecated) /* exited while reparented */ + +/* + * If NOTE_EXIT_DETAIL is present, these bits indicate specific reasons for exiting. + */ +#define NOTE_EXIT_DETAIL_MASK 0x00070000 +#define NOTE_EXIT_DECRYPTFAIL 0x00010000 +#define NOTE_EXIT_MEMORY 0x00020000 +#define NOTE_EXIT_CSERROR 0x00040000 + +#ifdef PRIVATE + +/* + * If NOTE_EXIT_MEMORY is present, these bits indicate specific jetsam condition. + */ +#define NOTE_EXIT_MEMORY_DETAIL_MASK 0xfc000000 +#define NOTE_EXIT_MEMORY_VMPAGESHORTAGE 0x80000000 /* jetsam condition: lowest jetsam priority proc killed due to vm page shortage */ +#define NOTE_EXIT_MEMORY_VMTHRASHING 0x40000000 /* jetsam condition: lowest jetsam priority proc killed due to vm thrashing */ +#define NOTE_EXIT_MEMORY_HIWAT 0x20000000 /* jetsam condition: process reached its high water mark */ +#define NOTE_EXIT_MEMORY_PID 0x10000000 /* jetsam condition: special pid kill requested */ +#define NOTE_EXIT_MEMORY_IDLE 0x08000000 /* jetsam condition: idle process cleaned up */ +#define NOTE_EXIT_MEMORY_VNODE 0X04000000 /* jetsam condition: virtual node kill */ + +#endif /* * data/hint fflags for EVFILT_VM, shared with userspace. @@ -274,6 +296,24 @@ struct kevent64_s { #define NOTE_VM_PRESSURE_SUDDEN_TERMINATE 0x20000000 /* will quit immediately on memory pressure */ #define NOTE_VM_ERROR 0x10000000 /* there was an error */ +#ifdef PRIVATE + +/* + * data/hint fflags for EVFILT_MEMORYSTATUS, shared with userspace. + */ +#define NOTE_MEMORYSTATUS_PRESSURE_NORMAL 0x00000001 /* system memory pressure has returned to normal */ +#define NOTE_MEMORYSTATUS_PRESSURE_WARN 0x00000002 /* system memory pressure has changed to the warning state */ +#define NOTE_MEMORYSTATUS_PRESSURE_CRITICAL 0x00000004 /* system memory pressure has changed to the critical state */ + +typedef enum vm_pressure_level { + kVMPressureNormal = 0, + kVMPressureWarning = 1, + kVMPressureUrgent = 2, + kVMPressureCritical = 3, +} vm_pressure_level_t; + +#endif + /* * data/hint fflags for EVFILT_TIMER, shared with userspace. * The default is a (repeating) interval timer with the data @@ -286,6 +326,9 @@ struct kevent64_s { #define NOTE_NSECONDS 0x00000004 /* data is nanoseconds */ #define NOTE_ABSOLUTE 0x00000008 /* absolute timeout */ /* ... implicit EV_ONESHOT */ +#define NOTE_LEEWAY 0x00000010 /* ext[1] holds leeway for power aware timers */ +#define NOTE_CRITICAL 0x00000020 /* system does minimal timer coalescing */ +#define NOTE_BACKGROUND 0x00000040 /* system does maximum timer coalescing */ #ifdef PRIVATE /* * data/hint fflags for EVFILT_SOCK, shared with userspace. @@ -300,6 +343,11 @@ struct kevent64_s { #define NOTE_SUSPEND 0x00000040 /* output queue suspended */ #define NOTE_RESUME 0x00000080 /* output queue resumed */ #define NOTE_KEEPALIVE 0x00000100 /* TCP Keepalive received */ +#define NOTE_ADAPTIVE_WTIMO 0x00000200 /* TCP adaptive write timeout */ +#define NOTE_ADAPTIVE_RTIMO 0x00000400 /* TCP adaptive read timeout */ +#define NOTE_CONNECTED 0x00000800 /* socket is connected */ +#define NOTE_DISCONNECTED 0x00001000 /* socket is disconnected */ +#define NOTE_CONNINFO_UPDATED 0x00002000 /* connection info was updated */ #endif /* PRIVATE */ @@ -345,6 +393,7 @@ SLIST_HEAD(klist, knote); #ifdef KERNEL_PRIVATE #include +#include #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_KQUEUE); @@ -409,7 +458,7 @@ struct proc; struct wait_queue; SLIST_HEAD(klist, knote); -extern void knote_init(void) __attribute__((section("__TEXT, initcode"))); +extern void knote_init(void); extern void klist_init(struct klist *list); #define KNOTE(list, hint) knote(list, hint) diff --git a/bsd/sys/fasttrap_impl.h b/bsd/sys/fasttrap_impl.h index a4017cc41..840fcde2d 100644 --- a/bsd/sys/fasttrap_impl.h +++ b/bsd/sys/fasttrap_impl.h @@ -201,7 +201,7 @@ extern int fasttrap_tracepoint_init(proc_t *, fasttrap_tracepoint_t *, extern int fasttrap_tracepoint_install(proc_t *, fasttrap_tracepoint_t *); extern int fasttrap_tracepoint_remove(proc_t *, fasttrap_tracepoint_t *); -#if defined (__i386__) || defined(__x86_64__) +#if defined(__x86_64__) extern int fasttrap_pid_probe(x86_saved_state_t *regs); extern int fasttrap_return_probe(x86_saved_state_t* regs); #else diff --git a/bsd/sys/fbt.h b/bsd/sys/fbt.h index 3796443ea..dd07bc527 100644 --- a/bsd/sys/fbt.h +++ b/bsd/sys/fbt.h @@ -29,7 +29,7 @@ #ifndef _FBT_H #define _FBT_H -#if defined(__i386__) || defined (__x86_64__) +#if defined (__x86_64__) typedef uint8_t machine_inst_t; #else #error Unknown Architecture diff --git a/bsd/sys/fcntl.h b/bsd/sys/fcntl.h index acd5f4c2f..22d4eb8ea 100644 --- a/bsd/sys/fcntl.h +++ b/bsd/sys/fcntl.h @@ -82,25 +82,10 @@ #endif /* We should not be exporting size_t here. Temporary for gcc bootstrapping. */ -#ifndef _SIZE_T -#define _SIZE_T -typedef __darwin_size_t size_t; -#endif - -#ifndef _MODE_T -typedef __darwin_mode_t mode_t; -#define _MODE_T -#endif - -#ifndef _OFF_T -typedef __darwin_off_t off_t; -#define _OFF_T -#endif - -#ifndef _PID_T -typedef __darwin_pid_t pid_t; -#define _PID_T -#endif +#include +#include +#include +#include /* * File status flags: these are used by open(2), fcntl(2). @@ -129,9 +114,9 @@ typedef __darwin_pid_t pid_t; #endif #define O_NONBLOCK 0x0004 /* no delay */ #define O_APPEND 0x0008 /* set append mode */ -#ifndef O_SYNC /* allow simultaneous inclusion of */ -#define O_SYNC 0x0080 /* synch I/O file integrity */ -#endif + +#include + #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) #define O_SHLOCK 0x0010 /* open with shared file lock */ #define O_EXLOCK 0x0020 /* open with exclusive file lock */ @@ -167,9 +152,7 @@ typedef __darwin_pid_t pid_t; #define O_SYMLINK 0x200000 /* allow open of a symlink */ #endif -#ifndef O_DSYNC /* allow simultaneous inclusion of */ -#define O_DSYNC 0x400000 /* synch I/O data integrity */ -#endif +#include #ifdef KERNEL #define FNODIRECT 0x800000 /* fcntl(F_NODIRECT, 1) */ @@ -187,6 +170,10 @@ typedef __darwin_pid_t pid_t; #define FSINGLE_WRITER 0x4000000 /* fcntl(F_SINGLE_WRITER, 1) */ #endif +#ifdef KERNEL +#define O_CLOFORK 0x8000000 /* implicitly set FD_CLOFORK */ +#endif + /* Data Protection Flags */ #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) #define O_DP_GETRAWENCRYPTED 0x0001 @@ -244,6 +231,9 @@ typedef __darwin_pid_t pid_t; #define F_GETLK 7 /* get record locking information */ #define F_SETLK 8 /* set record locking information */ #define F_SETLKW 9 /* F_SETLK; wait if blocked */ +#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL +#define F_SETLKWTIMEOUT 10 /* F_SETLK; wait if blocked, return on timeout */ +#endif /* __DARWIN_C_LEVEL >= __DARWIN_C_FULL */ #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) #define F_FLUSH_DATA 40 #define F_CHKCLEAN 41 /* Used for regression test */ @@ -251,8 +241,9 @@ typedef __darwin_pid_t pid_t; #define F_SETSIZE 43 /* Truncate a file without zeroing space */ #define F_RDADVISE 44 /* Issue an advisory read async with no copy to user */ #define F_RDAHEAD 45 /* turn read ahead off/on for this fd */ -#define F_READBOOTSTRAP 46 /* Read bootstrap from disk */ -#define F_WRITEBOOTSTRAP 47 /* Write bootstrap on disk */ +/* + * 46,47 used to be F_READBOOTSTRAP and F_WRITEBOOTSTRAP + */ #define F_NOCACHE 48 /* turn data caching off/on for this fd */ #define F_LOG2PHYS 49 /* file offset to device offset */ #define F_GETPATH 50 /* return the full path of the fd */ @@ -270,7 +261,10 @@ typedef __darwin_pid_t pid_t; #define F_ADDSIGS 59 /* add detached signatures */ +#ifdef PRIVATE +/* Deprecated/Removed in 10.9 */ #define F_MARKDEPENDENCY 60 /* this process hosts the device supporting the fs backing this fd */ +#endif #define F_ADDFILESIGS 61 /* add signature from same file (used by dyld for shared libs) */ @@ -309,6 +303,18 @@ typedef __darwin_pid_t pid_t; #define F_GETPROTECTIONLEVEL 77 /* Get the protection version number for this filesystem */ +#define F_FINDSIGS 78 /* Add detached code signatures (used by dyld for shared libs) */ + +#ifdef PRIVATE +#define F_GETDEFAULTPROTLEVEL 79 /* Get the default protection level for the filesystem */ +#define F_MAKECOMPRESSED 80 /* Make the file compressed; truncate & toggle BSD bits */ +#define F_SET_GREEDY_MODE 81 /* + * indicate to the filesystem/storage driver that the content to be + * written should be written in greedy mode for additional speed at + * the cost of storage efficiency. A nonzero value enables it, 0 disables it. + */ +#endif + // FS-specific fcntl()'s numbers begin at 0x00010000 and go up #define FCNTL_FS_SPECIFIC_BASE 0x00010000 @@ -321,6 +327,9 @@ typedef __darwin_pid_t pid_t; /* file descriptor flags (F_GETFD, F_SETFD) */ #define FD_CLOEXEC 1 /* close-on-exec flag */ +#if PRIVATE +#define FD_CLOFORK 2 /* close-on-fork flag */ +#endif /* record locking flags (F_GETLK, F_SETLK, F_SETLKW) */ #define F_RDLCK 1 /* shared or read lock */ @@ -339,58 +348,13 @@ typedef __darwin_pid_t pid_t; * [XSI] The values used for l_whence shall be defined as described * in */ -#ifndef SEEK_SET -#define SEEK_SET 0 /* set file offset to offset */ -#define SEEK_CUR 1 /* set file offset to current plus offset */ -#define SEEK_END 2 /* set file offset to EOF plus offset */ -#endif /* !SEEK_SET */ +#include /* * [XSI] The symbolic names for file modes for use as values of mode_t * shall be defined as described in */ -#ifndef S_IFMT -/* File type */ -#define S_IFMT 0170000 /* [XSI] type of file mask */ -#define S_IFIFO 0010000 /* [XSI] named pipe (fifo) */ -#define S_IFCHR 0020000 /* [XSI] character special */ -#define S_IFDIR 0040000 /* [XSI] directory */ -#define S_IFBLK 0060000 /* [XSI] block special */ -#define S_IFREG 0100000 /* [XSI] regular */ -#define S_IFLNK 0120000 /* [XSI] symbolic link */ -#define S_IFSOCK 0140000 /* [XSI] socket */ -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define S_IFWHT 0160000 /* OBSOLETE: whiteout */ -#endif - -/* File mode */ -/* Read, write, execute/search by owner */ -#define S_IRWXU 0000700 /* [XSI] RWX mask for owner */ -#define S_IRUSR 0000400 /* [XSI] R for owner */ -#define S_IWUSR 0000200 /* [XSI] W for owner */ -#define S_IXUSR 0000100 /* [XSI] X for owner */ -/* Read, write, execute/search by group */ -#define S_IRWXG 0000070 /* [XSI] RWX mask for group */ -#define S_IRGRP 0000040 /* [XSI] R for group */ -#define S_IWGRP 0000020 /* [XSI] W for group */ -#define S_IXGRP 0000010 /* [XSI] X for group */ -/* Read, write, execute/search by others */ -#define S_IRWXO 0000007 /* [XSI] RWX mask for other */ -#define S_IROTH 0000004 /* [XSI] R for other */ -#define S_IWOTH 0000002 /* [XSI] W for other */ -#define S_IXOTH 0000001 /* [XSI] X for other */ - -#define S_ISUID 0004000 /* [XSI] set user id on execution */ -#define S_ISGID 0002000 /* [XSI] set group id on execution */ -#define S_ISVTX 0001000 /* [XSI] directory restrcted delete */ - -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define S_ISTXT S_ISVTX /* sticky bit: not supported */ -#define S_IREAD S_IRUSR /* backward compatability */ -#define S_IWRITE S_IWUSR /* backward compatability */ -#define S_IEXEC S_IXUSR /* backward compatability */ -#endif -#endif /* !S_IFMT */ +#include #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) /* allocate flags (F_PREALLOCATE) */ @@ -417,6 +381,19 @@ struct flock { short l_whence; /* type of l_start */ }; +#include + +#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL +/* + * Advisory file segment locking with time out - + * Information passed to system by user for F_SETLKWTIMEOUT + */ +struct flocktimeout { + struct flock fl; /* flock passed for file locking */ + struct timespec timeout; /* timespec struct for timeout */ +}; +#endif /* __DARWIN_C_LEVEL >= __DARWIN_C_FULL */ + #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) /* * advisory file read data type - @@ -587,11 +564,9 @@ struct user_fopenfrom { #ifndef KERNEL #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#ifndef _FILESEC_T -struct _filesec; -typedef struct _filesec *filesec_t; -#define _FILESEC_T -#endif + +#include + typedef enum { FILESEC_OWNER = 1, FILESEC_GROUP = 2, diff --git a/bsd/sys/file_internal.h b/bsd/sys/file_internal.h index 473415d1d..52836e020 100644 --- a/bsd/sys/file_internal.h +++ b/bsd/sys/file_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -74,6 +74,7 @@ #include #include #include +#include struct proc; struct uio; @@ -99,7 +100,7 @@ struct fileproc { #define FP_INCREATE 0x0001 #define FP_INCLOSE 0x0002 #define FP_INSELECT 0x0004 -#define FP_INCHRREAD 0x0000 /* disable FP_INCHRREAD */ +#define FP_UNUSED 0x0008 /* unused (was FP_INCHRREAD) */ #define FP_WRITTEN 0x0010 #define FP_CLOSING 0x0020 #define FP_WAITCLOSE 0x0040 @@ -107,8 +108,24 @@ struct fileproc { #define FP_WAITEVENT 0x0100 #define FP_SELCONFLICT 0x0200 /* select conflict on an individual fp */ -#define FP_VALID_FLAGS (FP_INCREATE | FP_INCLOSE | FP_INSELECT | FP_INCHRREAD | FP_WRITTEN | FP_CLOSING | FP_WAITCLOSE | FP_AIOISSUED | FP_WAITEVENT | FP_SELCONFLICT) +/* squeeze a "type" value into the upper flag bits */ +#define _FP_TYPESHIFT 24 +#define FP_TYPEMASK (0x7 << _FP_TYPESHIFT) /* 8 "types" of fileproc */ + +#define FILEPROC_TYPE(fp) ((fp)->f_flags & FP_TYPEMASK) + +#define FP_ISGUARDED(fp, attribs) \ + ((FILEPROC_TYPE(fp) == FTYPE_GUARDED) ? fp_isguarded(fp, attribs) : 0) + +typedef enum { + FTYPE_SIMPLE = 0, + FTYPE_GUARDED = (1 << _FP_TYPESHIFT) +} fileproc_type_t; + +#define FP_VALID_FLAGS (FP_INCREATE | FP_INCLOSE | FP_INSELECT |\ + FP_WRITTEN | FP_CLOSING | FP_WAITCLOSE |\ + FP_AIOISSUED | FP_WAITEVENT | FP_SELCONFLICT | _FP_TYPEMASK) #ifndef _KAUTH_CRED_T #define _KAUTH_CRED_T @@ -126,7 +143,8 @@ typedef enum { DTYPE_PSXSEM, /* POSIX Semaphores */ DTYPE_KQUEUE, /* kqueue */ DTYPE_PIPE, /* pipe */ - DTYPE_FSEVENTS /* fsevents */ + DTYPE_FSEVENTS, /* fsevents */ + DTYPE_ATALK /* (obsolete) */ } file_type_t; /* defines for fg_lflags */ @@ -141,11 +159,12 @@ typedef enum { struct fileglob { LIST_ENTRY(fileglob) f_msglist;/* list of active files */ int32_t fg_flag; /* see fcntl.h */ - file_type_t fg_type; /* descriptor type */ int32_t fg_count; /* reference count */ int32_t fg_msgcount; /* references from message queue */ + int32_t fg_lflags; /* file global flags */ kauth_cred_t fg_cred; /* credentials associated with descriptor */ - struct fileops { + const struct fileops { + file_type_t fo_type; /* descriptor type */ int (*fo_read) (struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx); int (*fo_write) (struct fileproc *fp, struct uio *uio, @@ -164,7 +183,6 @@ struct fileglob { off_t fg_offset; void *fg_data; /* vnode or socket or SHM or semaphore */ lck_mtx_t fg_lock; - int32_t fg_lflags; /* file global flags */ #if CONFIG_MACF struct label *fg_label; /* JMM - use the one in the cred? */ #endif @@ -176,6 +194,8 @@ extern struct fmsglist fmsghead; /* head of list of open files */ extern int maxfiles; /* kernel limit on number of open files */ extern int nfiles; /* actual number of open files */ extern int maxfilesperproc; + +#define FILEGLOB_DTYPE(fg) ((const file_type_t)((fg)->fg_ops->fo_type)) #endif /* __APPLE_API_PRIVATE */ @@ -188,6 +208,7 @@ int fo_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx); int fo_close(struct fileglob *fg, vfs_context_t ctx); int fo_kqfilter(struct fileproc *fp, struct knote *kn, vfs_context_t ctx); void fileproc_drain(proc_t, struct fileproc *); +int fp_tryswap(proc_t, int fd, struct fileproc *nfp); int fp_drop(struct proc *p, int fd, struct fileproc *fp, int locked); int fp_drop_written(proc_t p, int fd, struct fileproc *fp); int fp_drop_event(proc_t p, int fd, struct fileproc *fp); @@ -208,7 +229,16 @@ int fp_getfvpandvid(struct proc *p, int fd, struct fileproc **resultfp, struct v struct socket; int fp_getfsock(struct proc *p, int fd, struct fileproc **resultfp, struct socket **results); int fp_lookup(struct proc *p, int fd, struct fileproc **resultfp, int locked); +int fp_isguarded(struct fileproc *fp, u_int attribs); +int fp_guard_exception(proc_t p, int fd, struct fileproc *fp, u_int attribs); int closef_locked(struct fileproc *fp, struct fileglob *fg, struct proc *p); +int close_internal_locked(proc_t p, int fd, struct fileproc *fp, int flags); +struct nameidata; +struct vnode_attr; +int open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, + struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, + int32_t *retval); +int kqueue_body(struct proc *p, fp_allocfn_t, void *cra, int32_t *retval); void fg_insertuipc(struct fileglob * fg); void fg_removeuipc(struct fileglob * fg); void unp_gc_wait(void); @@ -219,6 +249,9 @@ void procfdtbl_waitfd(struct proc * p, int fd); void procfdtbl_clearfd(struct proc * p, int fd); boolean_t filetype_issendable(file_type_t type); extern int fdgetf_noref(proc_t, int, struct fileproc **); +extern struct fileproc *fileproc_alloc_init(void *crargs); +extern void fileproc_free(struct fileproc *fp); +extern void guarded_fileproc_free(struct fileproc *fp); __END_DECLS #endif /* __APPLE_API_UNSTABLE */ diff --git a/bsd/sys/filedesc.h b/bsd/sys/filedesc.h index 740e2d6f4..b5b80a6bc 100644 --- a/bsd/sys/filedesc.h +++ b/bsd/sys/filedesc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -96,8 +96,6 @@ struct filedesc { int fd_lastfile; /* high-water mark of fd_ofiles */ int fd_freefile; /* approx. next free file */ u_short fd_cmask; /* mask for file creation */ - uint32_t fd_refcnt; /* reference count */ - int fd_knlistsize; /* size of knlist */ struct klist *fd_knlist; /* list of attached knotes */ u_long fd_knhashmask; /* size of knhash */ @@ -115,15 +113,18 @@ struct filedesc { /* * Per-process open flags. */ -#define UF_EXCLOSE 0x01 /* auto-close on exec */ +#define UF_EXCLOSE 0x01 /* auto-close on exec */ +#define UF_FORKCLOSE 0x02 /* auto-close on fork */ #define UF_RESERVED 0x04 /* open pending / in progress */ #define UF_CLOSING 0x08 /* close in progress */ #ifdef KERNEL #define UF_RESVWAIT 0x10 /* close in progress */ #define UF_INHERIT 0x20 /* "inherit-on-exec" */ + #define UF_VALID_FLAGS \ - (UF_EXCLOSE | UF_RESERVED | UF_CLOSING | UF_RESVWAIT | UF_INHERIT) + (UF_EXCLOSE | UF_FORKCLOSE | UF_RESERVED | UF_CLOSING |\ + UF_RESVWAIT | UF_INHERIT) #endif /* KERNEL */ /* @@ -145,9 +146,13 @@ extern int fdavail(proc_t p, int n); #define fdflags(p, fd) \ (&(p)->p_fd->fd_ofileflags[(fd)]) extern int falloc(proc_t p, struct fileproc **resultfp, int *resultfd, vfs_context_t ctx); -extern void ffree(struct file *fp); #ifdef __APPLE_API_PRIVATE +typedef struct fileproc *(*fp_allocfn_t)(void *); +extern int falloc_withalloc(proc_t p, struct fileproc **resultfp, + int *resultfd, vfs_context_t ctx, + fp_allocfn_t fp_zalloc, void *crarg); + extern struct filedesc *fdcopy(proc_t p, struct vnode *uth_cdir); extern void fdfree(proc_t p); extern void fdexec(proc_t p, short flags); diff --git a/bsd/sys/fsctl.h b/bsd/sys/fsctl.h index 40c6b10b1..789876fd8 100644 --- a/bsd/sys/fsctl.h +++ b/bsd/sys/fsctl.h @@ -74,6 +74,7 @@ #ifdef XNU_KERNEL_PRIVATE +/* Userland structs for namespace handler */ typedef struct user64_namespace_handler_info { user64_addr_t token; user64_addr_t flags; @@ -86,12 +87,16 @@ typedef struct user32_namespace_handler_info { user32_addr_t fdptr; } user32_namespace_handler_info; + +/* Kernel-internal of namespace handler info */ typedef struct namespace_handler_info { user_addr_t token; user_addr_t flags; user_addr_t fdptr; } namespace_handler_info; + +/* Userland structs for extended namespace handler */ typedef struct user64_namespace_handler_info_ext { user64_addr_t token; user64_addr_t flags; @@ -106,6 +111,8 @@ typedef struct user32_namespace_handler_info_ext { user32_addr_t infoptr; } user32_namespace_handler_info_ext; + +/* Kernel-internal of extended namespace handler */ typedef struct namespace_handler_info_ext { user_addr_t token; user_addr_t flags; @@ -113,6 +120,45 @@ typedef struct namespace_handler_info_ext { user_addr_t infoptr; } namespace_handler_info_ext; +/* Size-Augmented namespace_handler_info */ +/* 64 bit userland*/ +typedef struct user64_namespace_handler_data { + user64_addr_t token; + user64_addr_t flags; + user64_addr_t fdptr; + user64_addr_t infoptr; + user64_addr_t objid; + user64_addr_t reserved1; + user64_addr_t reserved2; + user64_addr_t reserved3; +} user64_namespace_handler_data; + +/*32 bit userland*/ +typedef struct user32_namespace_handler_data { + user32_addr_t token; + user32_addr_t flags; + user32_addr_t fdptr; + user32_addr_t infoptr; + user32_addr_t objid; + user32_addr_t reserved1; + user32_addr_t reserved2; + user32_addr_t reserved3; +} user32_namespace_handler_data; + +/* kernel-internal */ +typedef struct namespace_handler_data { + user_addr_t token; + user_addr_t flags; + user_addr_t fdptr; + user_addr_t infoptr; + user_addr_t objid; + user_addr_t reserved1; + user_addr_t reserved2; + user_addr_t reserved3; +} namespace_handler_data; + + +#define NSPACE_REARM_NO_ARG ((void *)1) extern int resolve_nspace_item(struct vnode *vp, uint64_t op); extern int resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg); extern int get_nspace_item_status(struct vnode *vp, int32_t *status); @@ -132,6 +178,16 @@ typedef struct namespace_handler_info_ext { int64_t *infoptr; // for snapshot write events, the kernel puts an offset/length pair here } namespace_handler_info_ext; +typedef struct namespace_handler_data { + int32_t *token; + int64_t *flags; + int32_t *fdptr; + int64_t *infoptr; // for snapshot write events, the kernel puts an offset/length pair here + int64_t *objid; + uint32_t *reserved1; + uint32_t *reserved2; + uint32_t *reserved3; +} namespace_handler_data; #endif /* XNU_KERNEL_PRIVATE */ @@ -143,7 +199,10 @@ typedef struct namespace_handler_info_ext { #define NAMESPACE_HANDLER_METADATA_WRITE_OP 0x0020 #define NAMESPACE_HANDLER_METADATA_DELETE_OP 0x0040 #define NAMESPACE_HANDLER_METADATA_MOD 0x0080 +// #define NAMESPACE_HANDLER_OP_DO_NOT_USE 0x0100 // SNAPSHOT_EVENT uses this value #define NAMESPACE_HANDLER_LINK_CREATE 0x0200 +#define NAMESPACE_HANDLER_RENAME_SUCCESS_OP 0x0400 +#define NAMESPACE_HANDLER_RENAME_FAILED_OP 0x0800 #define NAMESPACE_HANDLER_NSPACE_EVENT 0x1000 #define NAMESPACE_HANDLER_SNAPSHOT_EVENT 0x0100 @@ -189,8 +248,8 @@ typedef struct package_ext_info { #define FSIOC_SET_PACKAGE_EXTS _IOW('A', 2, struct package_ext_info) #define FSCTL_SET_PACKAGE_EXTS IOCBASECMD(FSIOC_SET_PACKAGE_EXTS) -#define FSIOC_WAIT_FOR_SYNC _IOR('A', 3, int32_t) -#define FSCTL_WAIT_FOR_SYNC IOCBASECMD(FSIOC_WAIT_FOR_SYNC) +/* Unsupported - previously FSIOC_WAIT_FOR_SYNC */ +#define FSIOC_UNSUPPORTED _IOR('A', 3, int32_t) #define FSIOC_NAMESPACE_HANDLER_GET _IOW('A', 4, struct namespace_handler_info) #define FSCTL_NAMESPACE_HANDLER_GET IOCBASECMD(FSIOC_NAMESPACE_HANDLER_GET) @@ -222,8 +281,12 @@ typedef struct package_ext_info { #define FSIOC_SNAPSHOT_HANDLER_GET_EXT _IOW('A', 13, struct namespace_handler_info_ext) #define FSCTL_SNAPSHOT_HANDLER_GET_EXT IOCBASECMD(FSIOC_SNAPSHOT_HANDLER_GET_EXT) +#define FSIOC_NAMESPACE_HANDLER_GETDATA _IOW('A', 14, struct namespace_handler_data) +#define FSCTL_NAMESPACE_HANDLER_GETDATA IOCBASECMD(FSIOC_NAMESPACE_HANDLER_GETDATA) + + // -// IO commands 14, 15, 16, and 17 are currently unused +// IO commands 15, 16, and 17 are currently unused // // diff --git a/bsd/sys/fslog.h b/bsd/sys/fslog.h index 1266f3075..002455668 100644 --- a/bsd/sys/fslog.h +++ b/bsd/sys/fslog.h @@ -34,8 +34,8 @@ #include #ifdef KERNEL -/* Log file system related error in key-value format identified by Apple - * system log (ASL) facility. The key-value pairs are string pointers +/* Log file system related error in key-value format identified by Apple + * system log (ASL) facility. The key-value pairs are string pointers * (char *) and are provided as variable arguments list. A NULL value * indicates end of the list. * @@ -74,9 +74,9 @@ unsigned long fslog_err(unsigned long msg_id, ... ); /* Reserved message IDs to determine message order */ -#define FSLOG_MSG_SINGLE ULONG_MAX -#define FSLOG_MSG_FIRST 0x0 -#define FSLOG_MSG_LAST (~(ULONG_MAX >> 1)) +#define FSLOG_MSG_SINGLE ULONG_MAX +#define FSLOG_MSG_FIRST 0x0 +#define FSLOG_MSG_LAST (~(ULONG_MAX >> 1)) #ifdef BSD_KERNEL_PRIVATE @@ -109,27 +109,27 @@ void fslog_extmod_msgtracer(proc_t caller, proc_t target); #define FSLOG_VAL_ORDER_MIDDLE "Middle" #define FSLOG_VAL_ORDER_LAST "Last" +/* Keys for IO/FS logging using FSLog */ +#define FSLOG_KEY_ERR_TYPE "ErrType" /* Type of problem (IO, FS Corruption) */ +#define FSLOG_KEY_ERRNO "ErrNo" /* Error number (Integer) */ +#define FSLOG_KEY_IOTYPE "IOType" /* Type of IO (Read/Write) */ +#define FSLOG_KEY_PHYS_BLKNUM "PBlkNum" /* Physical block number */ +#define FSLOG_KEY_LOG_BLKNUM "LBlkNum" /* Logical block number */ +#define FSLOG_KEY_DEVNODE "DevNode" /* Device node (f_mntfromname) */ +#define FSLOG_KEY_PATH "Path" /* File system path */ +#define FSLOG_KEY_MNTPT "MountPt" /* Mount point */ + /* Values used by FSLog */ #define FSLOG_VAL_FACILITY "com.apple.system.fs" /* Facility generating messages */ #define FSLOG_VAL_LEVEL LOG_ERR /* Priority level */ #define FSLOG_VAL_READ_UID 0 /* Allow read access to root only */ -/* Keys for IO/FS logging using FSLog */ -#define FSLOG_KEY_ERR_TYPE "ErrType" /* Type of problem (IO, FS Corruption) */ -#define FSLOG_KEY_ERRNO "ErrNo" /* Error number (Integer) */ -#define FSLOG_KEY_IOTYPE "IOType" /* Type of IO (Read/Write) */ -#define FSLOG_KEY_PHYS_BLKNUM "PBlkNum" /* Physical block number */ -#define FSLOG_KEY_LOG_BLKNUM "LBlkNum" /* Logical block number */ -#define FSLOG_KEY_DEVNODE "DevNode" /* Device node (f_mntfromname) */ -#define FSLOG_KEY_PATH "Path" /* File system path */ -#define FSLOG_KEY_MNTPT "MountPt" /* Mount point */ - /* Values for type of error (FSLOG_KEY_ERR_TYPE) */ -#define FSLOG_VAL_ERR_TYPE_IO "IO" /* IO error */ -#define FSLOG_VAL_ERR_TYPE_FS "FS" /* FS error */ +#define FSLOG_VAL_ERR_TYPE_IO "IO" /* IO error */ +#define FSLOG_VAL_ERR_TYPE_FS "FS" /* FS error */ /* Values for type of operation (FSLOG_KEY_IOTYPE) */ -#define FSLOG_VAL_IOTYPE_READ "Read" -#define FSLOG_VAL_IOTYPE_WRITE "Write" +#define FSLOG_VAL_IOTYPE_READ "Read" +#define FSLOG_VAL_IOTYPE_WRITE "Write" #endif /* !_FSLOG_H_ */ diff --git a/bsd/sys/guarded.h b/bsd/sys/guarded.h new file mode 100644 index 000000000..57c9661e7 --- /dev/null +++ b/bsd/sys/guarded.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _SYS_GUARDED_H_ +#define _SYS_GUARDED_H_ + +#include +#include + +#ifdef PRIVATE + +__BEGIN_DECLS + +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) + +#ifndef _GUARDID_T +#define _GUARDID_T +typedef __uint64_t guardid_t; +#endif /* _GUARDID_T */ + +#if !defined(KERNEL) +extern int guarded_open_np(const char *path, + const guardid_t *guard, u_int guardflags, int flags, ...); +extern int guarded_kqueue_np(const guardid_t *guard, u_int guardflags); +extern int guarded_close_np(int fd, const guardid_t *guard); +extern int change_fdguard_np(int fd, const guardid_t *guard, u_int guardflags, + const guardid_t *nguard, u_int nguardflags, int *fdflagsp); +#endif /* KERNEL */ + +/* + * Guard types. + * + * GUARD_TYPE_FD: Guarded file descriptor. + */ +#define GUARD_TYPE_FD 0x2 + +/* + * File descriptor guard flavors. + */ + +/* Forbid close(2), and the implicit close() that a dup2(2) may do. + * Forces close-on-fork to be set immutably too. + */ +#define GUARD_CLOSE (1u << 0) + +/* + * Forbid dup(2), dup2(2), and fcntl(2) subcodes F_DUPFD, F_DUPFD_CLOEXEC + * on a guarded fd. Also forbids open's of a guarded fd via /dev/fd/ + * (an implicit dup.) + */ +#define GUARD_DUP (1u << 1) + +/* + * Forbid sending a guarded fd via a socket + */ +#define GUARD_SOCKET_IPC (1u << 2) + +/* + * Forbid creating a fileport from a guarded fd + */ +#define GUARD_FILEPORT (1u << 3) + +/* + * Violating a guard results in an error (EPERM), and potentially + * an exception with one or more of the following bits set. + */ +enum guard_exception_codes { + kGUARD_EXC_CLOSE = 1u << 0, /* close of a guarded fd */ + kGUARD_EXC_DUP = 1u << 1, /* dup of a guarded fd */ + kGUARD_EXC_NOCLOEXEC = 1u << 2, /* clear close-on-exec */ + kGUARD_EXC_SOCKET_IPC = 1u << 3, /* sendmsg of a guarded fd */ + kGUARD_EXC_FILEPORT = 1u << 4, /* fileport_makeport .. */ + kGUARD_EXC_MISMATCH = 1u << 5 /* wrong guard for guarded fd */ +}; + +#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ + +__END_DECLS + +#endif /* PRIVATE */ + +#endif /* !_SYS_GUARDED_H_ */ diff --git a/bsd/sys/imgact.h b/bsd/sys/imgact.h index dd25d3a58..83812f5b2 100644 --- a/bsd/sys/imgact.h +++ b/bsd/sys/imgact.h @@ -111,10 +111,12 @@ struct image_params { struct label *ip_execlabelp; /* label of the executable */ struct label *ip_scriptlabelp; /* label of the script */ + struct vnode *ip_scriptvp; /* script */ unsigned int ip_csflags; /* code signing flags */ void *ip_px_sa; void *ip_px_sfa; void *ip_px_spa; + void *ip_px_smpx; /* MAC-specific spawn attrs. */ }; /* diff --git a/bsd/sys/ipc.h b/bsd/sys/ipc.h index a43cacc55..91f3492c3 100644 --- a/bsd/sys/ipc.h +++ b/bsd/sys/ipc.h @@ -85,25 +85,11 @@ * [XSI] The uid_t, gid_t, mode_t, and key_t types SHALL be defined as * described in . */ -#ifndef _UID_T -typedef __darwin_uid_t uid_t; /* user id */ -#define _UID_T -#endif - -#ifndef _GID_T -typedef __darwin_gid_t gid_t; -#define _GID_T -#endif - -#ifndef _MODE_T -typedef __darwin_mode_t mode_t; -#define _MODE_T -#endif - -#ifndef _KEY_T -#define _KEY_T -typedef __int32_t key_t; -#endif +#include +#include +#include +#include + #pragma pack(4) diff --git a/bsd/sys/kas_info.h b/bsd/sys/kas_info.h index c1be0761d..d95cf420c 100644 --- a/bsd/sys/kas_info.h +++ b/bsd/sys/kas_info.h @@ -46,7 +46,7 @@ __BEGIN_DECLS #ifndef KERNEL -int kas_info(int selector, void *value, size_t *size) __OSX_AVAILABLE_STARTING(__MAC_10_8, __IPHONE_NA); +int kas_info(int selector, void *value, size_t *size) __OSX_AVAILABLE_STARTING(__MAC_10_8, __IPHONE_6_0); #endif /* KERNEL */ diff --git a/bsd/sys/kasl.h b/bsd/sys/kasl.h new file mode 100644 index 000000000..080d18e38 --- /dev/null +++ b/bsd/sys/kasl.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _SYS_KASL_H_ +#define _SYS_KASL_H_ + +#ifdef BSD_KERNEL_PRIVATE + +#define KASL_KEY_FACILITY "Facility" /* Facility generating messages */ +#define KASL_KEY_LEVEL "Level" /* Priority level */ + +extern int +kern_asl_msg_va(int level, const char *facility, int num_pairs, + va_list vargs, ...); + +extern int +kern_asl_msg(int level, const char *facility, int num_pairs, ...); + +int escape_str(char *str, int len, int buflen); + +#endif /* BSD_KERNEL_PRIVATE */ + +#endif /* !_SYS_KASL_H_ */ diff --git a/bsd/sys/kauth.h b/bsd/sys/kauth.h index a077ceefa..9539319e8 100644 --- a/bsd/sys/kauth.h +++ b/bsd/sys/kauth.h @@ -39,6 +39,7 @@ #include #include #include /* __offsetof() */ +#include /* NGROUPS_MAX */ #ifdef __APPLE_API_EVOLVING @@ -49,15 +50,7 @@ #define KAUTH_UID_NONE (~(uid_t)0 - 100) /* not a valid UID */ #define KAUTH_GID_NONE (~(gid_t)0 - 100) /* not a valid GID */ -#ifndef _KAUTH_GUID -#define _KAUTH_GUID -/* Apple-style globally unique identifier */ -typedef struct { -#define KAUTH_GUID_SIZE 16 /* 128-bit identifier */ - unsigned char g_guid[KAUTH_GUID_SIZE]; -} guid_t; -#define _GUID_T -#endif /* _KAUTH_GUID */ +#include /* NT Security Identifier, structure as defined by Microsoft */ #pragma pack(1) /* push packing of 1 byte */ @@ -110,6 +103,8 @@ struct kauth_identity_extlookup { #define KAUTH_EXTLOOKUP_WANT_PWNAM (1<<16) #define KAUTH_EXTLOOKUP_VALID_GRNAM (1<<17) #define KAUTH_EXTLOOKUP_WANT_GRNAM (1<<18) +#define KAUTH_EXTLOOKUP_VALID_SUPGRPS (1<<19) +#define KAUTH_EXTLOOKUP_WANT_SUPGRPS (1<<20) __darwin_pid_t el_info_pid; /* request on behalf of PID */ u_int64_t el_extend; /* extension field */ @@ -126,6 +121,8 @@ struct kauth_identity_extlookup { ntsid_t el_gsid; /* group SID */ u_int32_t el_gsid_valid; /* TTL on translation result (seconds) */ u_int32_t el_member_valid; /* TTL on group lookup result */ + u_int32_t el_sup_grp_cnt; /* count of supplemental groups up to NGROUPS */ + gid_t el_sup_groups[NGROUPS_MAX]; /* supplemental group list */ }; struct kauth_cache_sizes { @@ -299,7 +296,7 @@ extern kauth_cred_t kauth_cred_setgroups(kauth_cred_t cred, gid_t *groups, int g struct uthread; extern void kauth_cred_uthread_update(struct uthread *, proc_t); #ifdef CONFIG_MACF -extern int kauth_proc_label_update_execve(struct proc *p, struct vfs_context *ctx, struct vnode *vp, struct label *scriptlabel, struct label *execlabel); +extern int kauth_proc_label_update_execve(struct proc *p, struct vfs_context *ctx, struct vnode *vp, struct vnode *scriptvp, struct label *scriptlabel, struct label *execlabel, void *psattr); #endif extern int kauth_cred_getgroups(kauth_cred_t _cred, gid_t *_groups, int *_groupcount); extern int kauth_cred_assume(uid_t _uid); @@ -779,12 +776,12 @@ void kprintf(const char *fmt, ...); extern lck_grp_t *kauth_lck_grp; #ifdef XNU_KERNEL_PRIVATE __BEGIN_DECLS -extern void kauth_init(void) __attribute__((section("__TEXT, initcode"))); -extern void kauth_cred_init(void) __attribute__((section("__TEXT, initcode"))); +extern void kauth_init(void); +extern void kauth_cred_init(void); #if CONFIG_EXT_RESOLVER -extern void kauth_identity_init(void) __attribute__((section("__TEXT, initcode"))); -extern void kauth_groups_init(void) __attribute__((section("__TEXT, initcode"))); -extern void kauth_resolver_init(void) __attribute__((section("__TEXT, initcode"))); +extern void kauth_identity_init(void); +extern void kauth_groups_init(void); +extern void kauth_resolver_init(void); #endif __END_DECLS #endif /* XNU_KERNEL_PRIVATE */ diff --git a/bsd/sys/kdebug.h b/bsd/sys/kdebug.h index 9a76fbc80..99441af70 100644 --- a/bsd/sys/kdebug.h +++ b/bsd/sys/kdebug.h @@ -52,6 +52,66 @@ __BEGIN_DECLS #include #endif +typedef enum +{ + KD_CALLBACK_KDEBUG_ENABLED, // Trace is now enabled. No arguments + KD_CALLBACK_KDEBUG_DISABLED, // Trace is now disabled. No arguments + KD_CALLBACK_SYNC_FLUSH, // Request the latest entries from the IOP, and block until complete. No arguments + KD_CALLBACK_TYPEFILTER_CHANGED, // Typefilter is enabled. A read-only pointer to the typefilter is provided, but is only valid while in the callback. +} kd_callback_type; +typedef void (*kd_callback_fn) (void* context, kd_callback_type reason, void* arg); + +struct kd_callback { + kd_callback_fn func; + void* context; + char iop_name[8]; // null-terminated string with name of core. +}; + +typedef struct kd_callback kd_callback_t; + +/* + * Registers an IOP for participation in tracing. + * + * The registered callback function will be called with the + * supplied context as the first argument, followed by a + * kd_callback_type and an associated void* argument. + * + * The return value is a nonzero coreid that shall be used in + * kernel_debug_enter() to refer to your IOP. If the allocation + * failed, then 0 will be returned. + * + * + * Caveats: + * Note that not all callback calls will indicate a change in + * state (e.g. disabling trace twice would send two disable + * notifications). + * + */ +extern int kernel_debug_register_callback(kd_callback_t callback); + +extern void kernel_debug_enter( + uint32_t coreid, + uint32_t debugid, + uint64_t timestamp, + uintptr_t arg1, + uintptr_t arg2, + uintptr_t arg3, + uintptr_t arg4, + uintptr_t threadid + ); + + +/* + * state bits for hfs_update event + */ +#define DBG_HFS_UPDATE_ACCTIME 0x01 +#define DBG_HFS_UPDATE_MODTIME 0x02 +#define DBG_HFS_UPDATE_CHGTIME 0x04 +#define DBG_HFS_UPDATE_MODIFIED 0x08 +#define DBG_HFS_UPDATE_FORCE 0x10 +#define DBG_HFS_UPDATE_DATEADDED 0x20 + + /* * types of faults that vm_fault handles * and creates trace entries for @@ -64,6 +124,8 @@ __BEGIN_DECLS #define DBG_GUARD_FAULT 6 #define DBG_PAGEINV_FAULT 7 #define DBG_PAGEIND_FAULT 8 +#define DBG_COMPRESSOR_FAULT 9 +#define DBG_COMPRESSOR_SWAPIN_FAULT 10 /* The debug code consists of the following @@ -99,6 +161,7 @@ __BEGIN_DECLS #define DBG_APPS 33 #define DBG_LAUNCHD 34 #define DBG_PERF 37 +#define DBG_IMPORTANCE 38 #define DBG_MIG 255 /* **** The Kernel Debug Sub Classes for Mach (DBG_MACH) **** */ @@ -123,6 +186,8 @@ __BEGIN_DECLS #define DBG_MACH_LOCKS 0x60 /* new lock APIs */ #define DBG_MACH_PMAP 0x70 /* pmap */ #define DBG_MACH_MP 0x90 /* MP related */ +#define DBG_MACH_VM_PRESSURE 0xA0 /* Memory Pressure Events */ +#define DBG_MACH_STACKSHOT 0xA1 /* Stackshot/Microstackshot subsystem */ /* Codes for Scheduler (DBG_MACH_SCHED) */ #define MACH_SCHED 0x0 /* Scheduler */ @@ -149,8 +214,16 @@ __BEGIN_DECLS #define MACH_REDISPATCH 0x16 /* "next thread" thread redispatched */ #define MACH_REMOTE_AST 0x17 /* AST signal issued to remote processor */ -#define MACH_SCHED_LPA_BROKEN 0x18 /* last_processor affinity broken in choose_processor */ +#define MACH_SCHED_CHOOSE_PROCESSOR 0x18 /* Result of choose_processor */ #define MACH_DEEP_IDLE 0x19 /* deep idle on master processor */ +#define MACH_SCHED_DECAY_PRIORITY 0x1a /* timeshare thread priority decayed/restored */ +#define MACH_CPU_THROTTLE_DISABLE 0x1b /* Global CPU Throttle Disable */ +#define MACH_RW_PROMOTE 0x1c /* promoted due to RW lock promotion */ +#define MACH_RW_DEMOTE 0x1d /* promotion due to RW lock undone */ + +/* Codes for IPC (DBG_MACH_IPC) */ +#define MACH_TASK_SUSPEND 0x0 /* Suspended a task */ +#define MACH_TASK_RESUME 0x1 /* Resumed a task */ /* Codes for pmap (DBG_MACH_PMAP) */ #define PMAP__CREATE 0x0 @@ -164,6 +237,14 @@ __BEGIN_DECLS #define PMAP__FLUSH_TLBS 0x8 #define PMAP__UPDATE_INTERRUPT 0x9 #define PMAP__ATTRIBUTE_CLEAR 0xa +#define PMAP__REUSABLE 0xb +#define PMAP__QUERY_RESIDENT 0xc +#define PMAP__FLUSH_KERN_TLBS 0xd +#define PMAP__FLUSH_DELAYED_TLBS 0xe + +/* Codes for Stackshot/Microstackshot (DBG_MACH_STACKSHOT) */ +#define MICROSTACKSHOT_RECORD 0x0 +#define MICROSTACKSHOT_GATHER 0x1 /* **** The Kernel Debug Sub Classes for Network (DBG_NETWORK) **** */ #define DBG_NETIP 1 /* Internet Protocol */ @@ -224,6 +305,7 @@ __BEGIN_DECLS #define DBG_IOCPUPM 49 /* CPU Power Management */ #define DBG_IOGRAPHICS 50 /* Graphics */ #define DBG_HIBERNATE 51 /* hibernation related events */ +#define DBG_IOTHUNDERBOLT 52 /* Thunderbolt */ /* Backwards compatibility */ @@ -275,9 +357,12 @@ __BEGIN_DECLS #define DBG_HFS 8 /* HFS-specific events; see bsd/hfs/hfs_kdebug.h */ #define DBG_EXFAT 0xE /* ExFAT-specific events; see the exfat project */ #define DBG_MSDOS 0xF /* FAT-specific events; see the msdosfs project */ +#define DBG_ACFS 0x10 /* Xsan-specific events; see the XsanFS project */ +#define DBG_THROTTLE 0x11 /* I/O Throttling events */ /* The Kernel Debug Sub Classes for BSD */ #define DBG_BSD_PROC 0x01 /* process/signals related */ +#define DBG_BSD_MEMSTAT 0x02 /* memorystatus / jetsam operations */ #define DBG_BSD_EXCP_SC 0x0C /* System Calls */ #define DBG_BSD_AIO 0x0D /* aio (POSIX async IO) */ #define DBG_BSD_SC_EXTENDED_INFO 0x0E /* System Calls, extended info */ @@ -288,6 +373,16 @@ __BEGIN_DECLS #define BSD_PROC_EXIT 1 /* process exit */ #define BSD_PROC_FRCEXIT 2 /* Kernel force termination */ +/* Codes for BSD subcode class DBG_BSD_MEMSTAT */ +#define BSD_MEMSTAT_SCAN 1 /* memorystatus thread awake */ +#define BSD_MEMSTAT_JETSAM 2 /* LRU jetsam */ +#define BSD_MEMSTAT_JETSAM_HIWAT 3 /* highwater jetsam */ +#define BSD_MEMSTAT_FREEZE 4 /* freeze process */ +#define BSD_MEMSTAT_LATENCY_COALESCE 5 /* delay imposed to coalesce jetsam reports */ +#define BSD_MEMSTAT_UPDATE 6 /* priority update */ +#define BSD_MEMSTAT_IDLE_DEMOTE 7 /* idle demotion fired */ +#define BSD_MEMSTAT_CLEAR_ERRORS 8 /* reset termination error state */ + /* The Kernel Debug Sub Classes for DBG_TRACE */ #define DBG_TRACE_DATA 0 #define DBG_TRACE_STRING 1 @@ -311,14 +406,49 @@ __BEGIN_DECLS #define DKIO_ASYNC 0x04 #define DKIO_META 0x08 #define DKIO_PAGING 0x10 -#define DKIO_THROTTLE 0x20 +#define DKIO_THROTTLE 0x20 /* Deprecated, still provided so fs_usage doesn't break */ #define DKIO_PASSIVE 0x40 #define DKIO_NOCACHE 0x80 +#define DKIO_TIER_MASK 0xF00 +#define DKIO_TIER_SHIFT 8 /* Kernel Debug Sub Classes for Applications (DBG_APPS) */ #define DBG_APP_LOGINWINDOW 0x03 +#define DBG_APP_AUDIO 0x04 #define DBG_APP_SAMBA 0x80 +/* Kernel Debug codes for Throttling (DBG_THROTTLE) */ +#define OPEN_THROTTLE_WINDOW 0x1 +#define PROCESS_THROTTLED 0x2 +#define IO_THROTTLE_DISABLE 0x3 + + +/* Subclasses for MACH Importance Policies (DBG_IMPORTANCE) */ +/* TODO: Split up boost and task policy? */ +#define IMP_ASSERTION 0x10 /* Task takes/drops a boost assertion */ +#define IMP_BOOST 0x11 /* Task boost level changed */ +#define IMP_MSG 0x12 /* boosting message sent by donating task on donating port */ +#define IMP_WATCHPORT 0x13 /* port marked as watchport, and boost was transferred to the watched task */ +#define IMP_TASK_SUPPRESSION 0x17 /* Task changed suppression behaviors */ +#define IMP_TASK_APPTYPE 0x18 /* Task launched with apptype */ +#define IMP_UPDATE 0x19 /* Requested -> effective calculation */ +/* DBG_IMPORTANCE subclasses 0x20 - 0x3F reserved for task policy flavors */ + +/* Codes for IMP_ASSERTION */ +#define IMP_HOLD 0x2 /* Task holds a boost assertion */ +#define IMP_DROP 0x4 /* Task drops a boost assertion */ +#define IMP_EXTERN 0x8 /* boost assertion moved from kernel to userspace responsibility (externalized) */ + +/* Codes for IMP_BOOST */ +#define IMP_BOOSTED 0x1 +#define IMP_UNBOOSTED 0x2 /* Task drops a boost assertion */ + +/* Codes for IMP_MSG */ +#define IMP_MSG_SEND 0x1 /* boosting message sent by donating task on donating port */ +#define IMP_MSG_DELV 0x2 /* boosting message delivered to task */ + +/* Codes for IMP_UPDATE */ +#define IMP_UPDATE_TASK_CREATE 0x1 /**********************************************************************/ @@ -353,6 +483,8 @@ __BEGIN_DECLS #define PMAP_CODE(code) MACHDBG_CODE(DBG_MACH_PMAP, code) +#define IMPORTANCE_CODE(SubClass, code) KDBG_CODE(DBG_IMPORTANCE, (SubClass), (code)) + /* Usage: * kernel_debug((KDBG_CODE(DBG_NETWORK, DNET_PROTOCOL, 51) | DBG_FUNC_START), * offset, 0, 0, 0,0) @@ -535,7 +667,12 @@ do { \ #ifdef KERNEL_PRIVATE #include + +#define NUMPARMS 23 + struct proc; + +extern void kdebug_lookup_gen_events(long *dbg_parms, int dbg_namelen, void *dp, boolean_t lookup); extern void kdbg_trace_data(struct proc *proc, long *arg_pid); extern void kdbg_trace_string(struct proc *proc, long *arg1, long *arg2, long *arg3, long *arg4); @@ -549,12 +686,8 @@ void enable_wrap(uint32_t old_slowcheck, boolean_t lostevents); void release_storage_unit(int cpu, uint32_t storage_unit); int allocate_storage_unit(int cpu); -void trace_handler_map_ctrl_page(uintptr_t addr, unsigned long ctrl_page_size, unsigned long storage_size, unsigned long kds_ptr_size); -void trace_handler_map_bufinfo(uintptr_t addr, unsigned long size); -void trace_handler_unmap_bufinfo(void); -void trace_handler_map_buffer(int index, uintptr_t addr, unsigned long size); -void trace_handler_unmap_buffer(int index); -void trace_set_timebases(uint64_t tsc, uint64_t ns); +#define KDBG_CLASS_ENCODE(Class, SubClass) (((Class & 0xff) << 24) | ((SubClass & 0xff) << 16)) +#define KDBG_CLASS_DECODE(Debugid) (Debugid & 0xFFFF0000) #endif /* KERNEL_PRIVATE */ @@ -684,6 +817,71 @@ typedef struct { char command[20]; } kd_threadmap; +typedef struct { + uint32_t version_no; + uint32_t cpu_count; +} kd_cpumap_header; + +/* cpumap flags */ +#define KDBG_CPUMAP_IS_IOP 0x1 + +typedef struct { + uint32_t cpu_id; + uint32_t flags; + char name[8]; +} kd_cpumap; + +/* + * TRACE file formats... + * + * RAW_VERSION0 + * + * uint32_t #threadmaps + * kd_threadmap[] + * kd_buf[] + * + * RAW_VERSION1 + * + * RAW_header, with version_no set to RAW_VERSION1 + * kd_threadmap[] + * Empty space to pad alignment to the nearest page boundary. + * kd_buf[] + * + * RAW_VERSION1+ + * + * RAW_header, with version_no set to RAW_VERSION1 + * kd_threadmap[] + * kd_cpumap_header, with version_no set to RAW_VERSION1 + * kd_cpumap[] + * Empty space to pad alignment to the nearest page boundary. + * kd_buf[] + * + * V1+ implementation details... + * + * It would have been nice to add the cpumap data "correctly", but there were + * several obstacles. Existing code attempts to parse both V1 and V0 files. + * Due to the fact that V0 has no versioning or header, the test looks like + * this: + * + * // Read header + * if (header.version_no != RAW_VERSION1) { // Assume V0 } + * + * If we add a VERSION2 file format, all existing code is going to treat that + * as a VERSION0 file when reading it, and crash terribly when trying to read + * RAW_VERSION2 threadmap entries. + * + * To differentiate between a V1 and V1+ file, read as V1 until you reach + * the padding bytes. Then: + * + * boolean_t is_v1plus = FALSE; + * if (padding_bytes >= sizeof(kd_cpumap_header)) { + * kd_cpumap_header header = // read header; + * if (header.version_no == RAW_VERSION1) { + * is_v1plus = TRUE; + * } + * } + * + */ typedef struct { int version_no; @@ -695,7 +893,6 @@ typedef struct { #define RAW_VERSION0 0x55aa0000 #define RAW_VERSION1 0x55aa0101 - #define KDBG_CLASSTYPE 0x10000 #define KDBG_SUBCLSTYPE 0x20000 #define KDBG_RANGETYPE 0x40000 @@ -709,23 +906,6 @@ typedef struct { #define KDBG_BUFINIT 0x80000000 -/* Control operations */ -#define KDBG_EFLAGS 1 -#define KDBG_DFLAGS 2 -#define KDBG_ENABLE 3 -#define KDBG_SETNUMBUF 4 -#define KDBG_GETNUMBUF 5 -#define KDBG_SETUP 6 -#define KDBG_REMOVE 7 -#define KDBG_SETREGCODE 8 -#define KDBG_GETREGCODE 9 -#define KDBG_READTRACE 10 -#define KDBG_PIDTR 11 -#define KDBG_THRMAP 12 -#define KDBG_PIDEX 14 -#define KDBG_SETRTCDEC 15 -#define KDBG_KDGETENTROPY 16 - /* Minimum value allowed when setting decrementer ticks */ #define KDBG_MINRTCDEC 2500 diff --git a/bsd/sys/kern_control.h b/bsd/sys/kern_control.h index 4a5a411d6..7b51d55e8 100644 --- a/bsd/sys/kern_control.h +++ b/bsd/sys/kern_control.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004, 2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -182,6 +182,15 @@ typedef void * kern_ctl_ref; */ #define CTL_FLAG_REG_SOCK_STREAM 0x4 +#ifdef KERNEL_PRIVATE +/*! + @defined CTL_FLAG_REG_EXTENDED + @discussion This flag indicates that this kernel control utilizes the + the extended fields within the kern_ctl_reg structure. +*/ +#define CTL_FLAG_REG_EXTENDED 0x8 +#endif /* KERNEL_PRIVATE */ + /* Data flags for controllers */ /*! @defined CTL_DATA_NOWAKEUP @@ -299,6 +308,26 @@ typedef errno_t (*ctl_setopt_func)(kern_ctl_ref kctlref, u_int32_t unit, void *u typedef errno_t (*ctl_getopt_func)(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, int opt, void *data, size_t *len); +#ifdef KERNEL_PRIVATE +/*! + @typedef ctl_rcvd_func + @discussion The ctl_rcvd_func is called when the client reads data from + the kernel control socket. The kernel control can use this callback + in combination with ctl_getenqueuespace() to avoid overflowing + the socket's receive buffer. When ctl_getenqueuespace() returns + 0 or ctl_enqueuedata()/ctl_enqueuembuf() return ENOBUFS, the + kernel control can wait until this callback is called before + trying to enqueue the data again. + @param kctlref The control ref of the kernel control. + @param unit The unit number of the kernel control instance. + @param unitinfo The user-defined private data initialized by the + ctl_connect_func callback. + @param flags The recv flags. See the recv(2) man page. + */ +typedef void (*ctl_rcvd_func)(kern_ctl_ref kctlref, u_int32_t unit, void *unitinfo, + int flags); +#endif /* KERNEL_PRIVATE */ + /*! @struct kern_ctl_reg @discussion This structure defines the properties of a kernel @@ -351,6 +380,9 @@ struct kern_ctl_reg ctl_send_func ctl_send; ctl_setopt_func ctl_setopt; ctl_getopt_func ctl_getopt; +#ifdef KERNEL_PRIVATE + ctl_rcvd_func ctl_rcvd; /* Only valid if CTL_FLAG_REG_EXTENDED is set */ +#endif /* KERNEL_PRIVATE */ }; /*! @@ -393,8 +425,8 @@ ctl_deregister(kern_ctl_ref kctlref); @param unit The unit number of the kernel control instance. @param data A pointer to the data to send. @param len The length of data to send. - @param flags Send flags. CTL_DATA_NOWAKEUP is currently the only - supported flag. + @param flags Send flags. CTL_DATA_NOWAKEUP and CTL_DATA_EOR are currently + the only supported flags. @result 0 - Data was enqueued to be read by the client. EINVAL - Invalid parameters. EMSGSIZE - The buffer is too large. @@ -411,8 +443,8 @@ ctl_enqueuedata(kern_ctl_ref kctlref, u_int32_t unit, void *data, size_t len, u_ @param kctlref The control reference of the kernel control. @param unit The unit number of the kernel control instance. @param m An mbuf chain containing the data to send to the client. - @param flags Send flags. CTL_DATA_NOWAKEUP is currently the only - supported flag. + @param flags Send flags. CTL_DATA_NOWAKEUP and CTL_DATA_EOR are currently + the only supported flags. @result 0 - Data was enqueued to be read by the client. EINVAL - Invalid parameters. ENOBUFS - The queue is full. diff --git a/bsd/sys/kern_event.h b/bsd/sys/kern_event.h index 393638877..03b30f16f 100644 --- a/bsd/sys/kern_event.h +++ b/bsd/sys/kern_event.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,30 +22,30 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */ /*! @header kern_event.h - This header defines in-kernel functions for generating kernel events as well - as functions for receiving kernel events using a kernel event socket. + This header defines in-kernel functions for generating kernel events as + well as functions for receiving kernel events using a kernel event + socket. */ #ifndef SYS_KERN_EVENT_H -#define SYS_KERN_EVENT_H +#define SYS_KERN_EVENT_H #include #include #include -#define KEVENTS_ON 1 -#define KEV_SNDSPACE (4 * 1024) -#define KEV_RECVSPACE (32 * 1024) +#define KEV_SNDSPACE (4 * 1024) +#define KEV_RECVSPACE (32 * 1024) -#define KEV_ANY_VENDOR 0 -#define KEV_ANY_CLASS 0 -#define KEV_ANY_SUBCLASS 0 +#define KEV_ANY_VENDOR 0 +#define KEV_ANY_CLASS 0 +#define KEV_ANY_SUBCLASS 0 /* * Vendor Code @@ -53,12 +53,11 @@ /*! @defined KEV_VENDOR_APPLE - @discussion Apple generated kernel events use the hard coded vendor code - value of 1. Third party kernel events use a dynamically allocated vendor - code. The vendor code can be found using the SIOCGKEVVENDOR ioctl. + @discussion Apple generated kernel events use the hard coded vendor code + value of 1. Third party kernel events use a dynamically allocated vendor + code. The vendor code can be found using the SIOCGKEVVENDOR ioctl. */ -#define KEV_VENDOR_APPLE 1 - +#define KEV_VENDOR_APPLE 1 /* * Definition of top-level classifications for KEV_VENDOR_APPLE @@ -66,61 +65,63 @@ /*! @defined KEV_NETWORK_CLASS - @discussion Network kernel event class. -*/ -#define KEV_NETWORK_CLASS 1 + @discussion Network kernel event class. + */ +#define KEV_NETWORK_CLASS 1 /*! @defined KEV_IOKIT_CLASS - @discussion IOKit kernel event class. -*/ -#define KEV_IOKIT_CLASS 2 + @discussion IOKit kernel event class. + */ +#define KEV_IOKIT_CLASS 2 /*! - @defined KEV_IOKIT_CLASS - @discussion System kernel event class. -*/ -#define KEV_SYSTEM_CLASS 3 + @defined KEV_SYSTEM_CLASS + @discussion System kernel event class. + */ +#define KEV_SYSTEM_CLASS 3 /*! @defined KEV_APPLESHARE_CLASS - @discussion AppleShare kernel event class. -*/ -#define KEV_APPLESHARE_CLASS 4 + @discussion AppleShare kernel event class. + */ +#define KEV_APPLESHARE_CLASS 4 /*! @defined KEV_FIREWALL_CLASS @discussion Firewall kernel event class. -*/ -#define KEV_FIREWALL_CLASS 5 + */ +#define KEV_FIREWALL_CLASS 5 /*! @defined KEV_IEEE80211_CLASS @discussion IEEE 802.11 kernel event class. -*/ -#define KEV_IEEE80211_CLASS 6 + */ +#define KEV_IEEE80211_CLASS 6 /*! @struct kern_event_msg - @discussion This structure is prepended to all kernel events. This structure - is used to determine the format of the remainder of the kernel event. - This structure will appear on all messages received on a kernel event - socket. To post a kernel event, a slightly different structure is used. + @discussion This structure is prepended to all kernel events. This + structure is used to determine the format of the remainder of + the kernel event. This structure will appear on all messages + received on a kernel event socket. To post a kernel event, a + slightly different structure is used. @field total_size Total size of the kernel event message including the header. @field vendor_code The vendor code indicates which vendor generated the - kernel event. This gives every vendor a unique set of classes and - subclasses to use. Use the SIOCGKEVVENDOR ioctl to look up vendor codes - for vendors other than Apple. Apple uses KEV_VENDOR_APPLE. + kernel event. This gives every vendor a unique set of classes + and subclasses to use. Use the SIOCGKEVVENDOR ioctl to look up + vendor codes for vendors other than Apple. Apple uses + KEV_VENDOR_APPLE. @field kev_class The class of the kernel event. @field kev_subclass The subclass of the kernel event. @field id Monotonically increasing value. @field event_code The event code. - @field event_data Any additional data about this event. Format will depend - on the vendor_code, kev_class, kev_subclass, and event_code. The length - of the event_data can be determined using total_size - - KEV_MSG_HEADER_SIZE. -*/ + @field event_data Any additional data about this event. Format will + depend on the vendor_code, kev_class, kev_subclass, and + event_code. The length of the event_data can be determined + using total_size - KEV_MSG_HEADER_SIZE. + */ struct kern_event_msg { u_int32_t total_size; /* Size of entire event msg */ u_int32_t vendor_code; /* For non-Apple extensibility */ @@ -133,29 +134,30 @@ struct kern_event_msg { /*! @defined KEV_MSG_HEADER_SIZE - @discussion Size of the header portion of the kern_event_msg structure. This - accounts for everything right up to event_data. The size of the data can - be found by subtracting KEV_MSG_HEADER_SIZE from the total size from the - kern_event_msg. -*/ -#define KEV_MSG_HEADER_SIZE (offsetof(struct kern_event_msg, event_data[0])) + @discussion Size of the header portion of the kern_event_msg structure. + This accounts for everything right up to event_data. The size + of the data can be found by subtracting KEV_MSG_HEADER_SIZE + from the total size from the kern_event_msg. + */ +#define KEV_MSG_HEADER_SIZE (offsetof(struct kern_event_msg, event_data[0])) /*! @struct kev_request - @discussion This structure is used with the SIOCSKEVFILT and SIOCGKEVFILT to - set and get the control filter setting for a kernel control socket. + @discussion This structure is used with the SIOCSKEVFILT and + SIOCGKEVFILT to set and get the control filter setting for a + kernel control socket. @field total_size Total size of the kernel event message including the header. - @field vendor_code All kernel events that don't match this vendor code will - be ignored. KEV_ANY_VENDOR can be used to receive kernel events with any - vendor code. + @field vendor_code All kernel events that don't match this vendor code + will be ignored. KEV_ANY_VENDOR can be used to receive kernel + events with any vendor code. @field kev_class All kernel events that don't match this class will be - ignored. KEV_ANY_CLASS can be used to receive kernel events with any - class. - @field kev_subclass All kernel events that don't match this subclass will be - ignored. KEV_ANY_SUBCLASS can be used to receive kernel events with any - subclass. -*/ + ignored. KEV_ANY_CLASS can be used to receive kernel events with + any class. + @field kev_subclass All kernel events that don't match this subclass + will be ignored. KEV_ANY_SUBCLASS can be used to receive kernel + events with any subclass. + */ struct kev_request { u_int32_t vendor_code; u_int32_t kev_class; @@ -164,70 +166,72 @@ struct kev_request { /*! @defined KEV_VENDOR_CODE_MAX_STR_LEN - @discussion This define sets the maximum length of a string that can be used - to identify a vendor or kext when looking up a vendor code. -*/ -#define KEV_VENDOR_CODE_MAX_STR_LEN 200 - -#pragma pack(4) + @discussion This define sets the maximum length of a string that can be + used to identify a vendor or kext when looking up a vendor code. + */ +#define KEV_VENDOR_CODE_MAX_STR_LEN 200 /*! @struct kev_vendor_code - @discussion This structure is used with the SIOCGKEVVENDOR ioctl to convert - from a string identifying a kext or vendor, in the form of a bundle - identifier, to a vendor code. + @discussion This structure is used with the SIOCGKEVVENDOR ioctl to + convert from a string identifying a kext or vendor, in the + form of a bundle identifier, to a vendor code. @field vendor_code After making the SIOCGKEVVENDOR ioctl call, this will be filled in with the vendor code if there is one. @field vendor_string A bundle style identifier. -*/ + */ +#pragma pack(4) struct kev_vendor_code { u_int32_t vendor_code; char vendor_string[KEV_VENDOR_CODE_MAX_STR_LEN]; }; - #pragma pack() /*! @defined SIOCGKEVID - @discussion Retrieve the current event id. Each event generated will have - a new idea. The next event to be generated will have an id of id+1. -*/ -#define SIOCGKEVID _IOR('e', 1, u_int32_t) + @discussion Retrieve the current event id. Each event generated will + have a new id. The next event to be generated will have an id + of id+1. + */ +#define SIOCGKEVID _IOR('e', 1, u_int32_t) /*! @defined SIOCSKEVFILT - @discussion Set the kernel event filter for this socket. Kernel events not - matching this filter will not be received on this socket. -*/ -#define SIOCSKEVFILT _IOW('e', 2, struct kev_request) + @discussion Set the kernel event filter for this socket. Kernel events + not matching this filter will not be received on this socket. + */ +#define SIOCSKEVFILT _IOW('e', 2, struct kev_request) /*! @defined SIOCGKEVFILT - @discussion Retrieve the kernel event filter for this socket. Kernel events - not matching this filter will not be received on this socket. -*/ -#define SIOCGKEVFILT _IOR('e', 3, struct kev_request) + @discussion Retrieve the kernel event filter for this socket. Kernel + events not matching this filter will not be received on this + socket. + */ +#define SIOCGKEVFILT _IOR('e', 3, struct kev_request) /*! @defined SIOCGKEVVENDOR - @discussion Lookup the vendor code for the specified vendor. ENOENT will be - returned if a vendor code for that vendor string does not exist. -*/ + @discussion Lookup the vendor code for the specified vendor. ENOENT will + be returned if a vendor code for that vendor string does not + exist. + */ #define SIOCGKEVVENDOR _IOWR('e', 4, struct kev_vendor_code) #ifdef KERNEL /*! @define N_KEV_VECTORS @discussion The maximum number of kev_d_vectors for a kernel event. -*/ -#define N_KEV_VECTORS 5 + */ +#define N_KEV_VECTORS 5 /*! @struct kev_d_vectors - @discussion This structure is used to append some data to a kernel event. + @discussion This structure is used to append some data to a kernel + event. @field data_length The length of data. @field data_ptr A pointer to data. -*/ + */ struct kev_d_vectors { u_int32_t data_length; /* Length of the event data */ void *data_ptr; /* Pointer to event data */ @@ -240,28 +244,28 @@ struct kev_d_vectors { @field kev_class The event's class. @field kev_class The event's subclass. @field kev_class The event's code. - @field dv An array of vectors describing additional data to be appended to - the kernel event. -*/ + @field dv An array of vectors describing additional data to be appended + to the kernel event. + */ struct kev_msg { - u_int32_t vendor_code; /* For non-Apple extensibility */ - u_int32_t kev_class; /* Layer of event source */ - u_int32_t kev_subclass; /* Component within layer */ - u_int32_t event_code; /* The event code */ - struct kev_d_vectors dv[N_KEV_VECTORS]; /* Up to n data vectors */ + u_int32_t vendor_code; /* For non-Apple extensibility */ + u_int32_t kev_class; /* Layer of event source */ + u_int32_t kev_subclass; /* Component within layer */ + u_int32_t event_code; /* The event code */ + struct kev_d_vectors dv[N_KEV_VECTORS]; /* Up to n data vectors */ }; /*! @function kev_vendor_code_find - @discussion Lookup a vendor_code given a unique string. If the vendor code - has not been used since launch, a unique integer will be assigned for - that string. Vendor codes will remain the same until the machine is - rebooted. - @param vendor_string A bundle style vendor identifier (i.e. com.apple). - @param vendor_code Upon return, a unique vendor code for use when posting - kernel events. - @result May return ENOMEM if memory constraints prevent allocation of a new - vendor code. + @discussion Lookup a vendor_code given a unique string. If the vendor + code has not been used since launch, a unique integer will be + assigned for that string. Vendor codes will remain the same + until the machine is rebooted. + @param vendor_string A bundle style vendor identifier(i.e. com.apple). + @param vendor_code Upon return, a unique vendor code for use when + posting kernel events. + @result May return ENOMEM if memory constraints prevent allocation of a + new vendor code. */ errno_t kev_vendor_code_find(const char *vendor_string, u_int32_t *vendor_code); @@ -270,32 +274,32 @@ errno_t kev_vendor_code_find(const char *vendor_string, u_int32_t *vendor_code); @discussion Post a kernel event message. @param event_msg A structure defining the kernel event message to post. @result Will return zero upon success. May return a number of errors - depending on the type of failure. EINVAL indicates that there was - something wrong with the kerne event. The vendor code of the kernel - event must be assigned using kev_vendor_code_find. If the message is - too large, EMSGSIZE will be returned. + depending on the type of failure. EINVAL indicates that there + was something wrong with the kerne event. The vendor code of + the kernel event must be assigned using kev_vendor_code_find. + If the message is too large, EMSGSIZE will be returned. */ errno_t kev_msg_post(struct kev_msg *event_msg); #ifdef PRIVATE /* - * Internal version of kev_post_msg. Allows posting Apple vendor code kernel + * Internal version of kev_msg_post. Allows posting Apple vendor code kernel * events. */ int kev_post_msg(struct kev_msg *event); LIST_HEAD(kern_event_head, kern_event_pcb); -struct kern_event_pcb { - LIST_ENTRY(kern_event_pcb) ev_link; /* glue on list of all PCBs */ - struct socket *ev_socket; /* pointer back to socket */ - u_int32_t vendor_code_filter; - u_int32_t class_filter; - u_int32_t subclass_filter; +struct kern_event_pcb { + decl_lck_mtx_data(, evp_mtx); /* per-socket mutex */ + LIST_ENTRY(kern_event_pcb) evp_link; /* glue on list of all PCBs */ + struct socket *evp_socket; /* pointer back to socket */ + u_int32_t evp_vendor_code_filter; + u_int32_t evp_class_filter; + u_int32_t evp_subclass_filter; }; -#define sotoevpcb(so) ((struct kern_event_pcb *)((so)->so_pcb)) - +#define sotoevpcb(so) ((struct kern_event_pcb *)((so)->so_pcb)) #endif /* PRIVATE */ #endif /* KERNEL */ diff --git a/bsd/sys/kern_memorystatus.h b/bsd/sys/kern_memorystatus.h index 66fabee01..bacc7588d 100644 --- a/bsd/sys/kern_memorystatus.h +++ b/bsd/sys/kern_memorystatus.h @@ -34,35 +34,36 @@ #include #include -#define DEFAULT_JETSAM_PRIORITY -100 +#define JETSAM_PRIORITY_REVISION 2 -enum { - kMemorystatusFlagsFrontmost = (1 << 0), - kMemorystatusFlagsKilled = (1 << 1), - kMemorystatusFlagsKilledHiwat = (1 << 2), - kMemorystatusFlagsFrozen = (1 << 3), - kMemorystatusFlagsKilledVnodes = (1 << 4), - kMemorystatusFlagsKilledSwap = (1 << 5), - kMemorystatusFlagsThawed = (1 << 6), - kMemorystatusFlagsKilledVM = (1 << 7), - kMemorystatusFlagsSuspForDiagnosis = (1 << 8), - kMemorystatusFlagsActive = (1 << 9), - kMemorystatusFlagsSupportsIdleExit = (1 << 10), - kMemorystatusFlagsDirty = (1 << 11) -}; +#define JETSAM_PRIORITY_IDLE 0 +#define JETSAM_PRIORITY_IDLE_DEFERRED 1 +#define JETSAM_PRIORITY_BACKGROUND_OPPORTUNISTIC 2 +#define JETSAM_PRIORITY_BACKGROUND 3 +#define JETSAM_PRIORITY_MAIL 4 +#define JETSAM_PRIORITY_PHONE 5 +#define JETSAM_PRIORITY_UI_SUPPORT 8 +#define JETSAM_PRIORITY_FOREGROUND_SUPPORT 9 +#define JETSAM_PRIORITY_FOREGROUND 10 +#define JETSAM_PRIORITY_AUDIO_AND_ACCESSORY 12 +#define JETSAM_PRIORITY_CONDUCTOR 13 +#define JETSAM_PRIORITY_HOME 16 +#define JETSAM_PRIORITY_EXECUTIVE 17 +#define JETSAM_PRIORITY_IMPORTANT 18 +#define JETSAM_PRIORITY_CRITICAL 19 -#if TARGET_OS_EMBEDDED || CONFIG_EMBEDDED +#define JETSAM_PRIORITY_MAX 21 -/* - * Define Memory Status event subclass. - * Subclass of KEV_SYSTEM_CLASS - */ +/* TODO - tune. This should probably be lower priority */ +#define JETSAM_PRIORITY_DEFAULT 18 +#define JETSAM_PRIORITY_TELEPHONY 19 -/*! - @defined KEV_MEMORYSTATUS_SUBCLASS - @discussion The kernel event subclass for memory status events. -*/ -#define KEV_MEMORYSTATUS_SUBCLASS 3 +/* Compatibility */ +#define DEFAULT_JETSAM_PRIORITY 18 + +#define DEFERRED_IDLE_EXIT_TIME_SECS 10 + +#define KEV_MEMORYSTATUS_SUBCLASS 3 enum { kMemorystatusLevelNote = 1, @@ -81,18 +82,12 @@ enum { typedef struct memorystatus_priority_entry { pid_t pid; - uint32_t flags; - int32_t hiwat_pages; int32_t priority; - int32_t reserved; - int32_t reserved2; + uint64_t user_data; + int32_t limit; + uint32_t state; } memorystatus_priority_entry_t; -/* -** how many processes to snapshot -*/ -#define kMaxSnapshotEntries 128 - typedef struct memorystatus_kernel_stats { uint32_t free_pages; uint32_t active_pages; @@ -100,6 +95,13 @@ typedef struct memorystatus_kernel_stats { uint32_t throttled_pages; uint32_t purgeable_pages; uint32_t wired_pages; + uint32_t speculative_pages; + uint32_t filebacked_pages; + uint32_t anonymous_pages; + uint32_t compressor_pages; + uint64_t compressions; + uint64_t decompressions; + uint64_t total_uncompressed_pages_in_compressor; } memorystatus_kernel_stats_t; /* @@ -108,12 +110,16 @@ typedef struct memorystatus_kernel_stats { */ typedef struct jetsam_snapshot_entry { - pid_t pid; - char name[MAXCOMLEN+1]; - int32_t priority; + pid_t pid; + char name[MAXCOMLEN+1]; + int32_t priority; uint32_t pages; - uint32_t flags; - uint8_t uuid[16]; + uint32_t max_pages; + uint32_t state; + uint32_t killed; + uint64_t user_data; + uint8_t uuid[16]; + uint32_t fds; } memorystatus_jetsam_snapshot_entry_t; typedef struct jetsam_snapshot { @@ -121,96 +127,125 @@ typedef struct jetsam_snapshot { uint64_t notification_time; memorystatus_kernel_stats_t stats; size_t entry_count; - memorystatus_jetsam_snapshot_entry_t entries[1]; + memorystatus_jetsam_snapshot_entry_t entries[]; } memorystatus_jetsam_snapshot_t; -typedef memorystatus_priority_entry_t jetsam_priority_entry_t; -typedef memorystatus_jetsam_snapshot_t jetsam_snapshot_t; -typedef memorystatus_jetsam_snapshot_entry_t jetsam_snapshot_entry_t; - -#define kMemoryStatusLevelNote kMemorystatusLevelNote -#define kMemoryStatusSnapshotNote kMemorystatusSnapshotNote -#define kMemoryStatusFreezeNote kMemorystatusFreezeNote -#define kMemoryStatusPressureNote kMemorystatusPressureNote - typedef struct memorystatus_freeze_entry { int32_t pid; uint32_t flags; uint32_t pages; } memorystatus_freeze_entry_t; -#endif /* TARGET_OS_EMBEDDED */ - -#ifdef XNU_KERNEL_PRIVATE +/* TODO - deprecate; see */ +#define kMaxSnapshotEntries 192 -/* General tunables */ +/* State */ +#define kMemorystatusSuspended 0x01 +#define kMemorystatusFrozen 0x02 +#define kMemorystatusWasThawed 0x04 +#define kMemorystatusTracked 0x08 +#define kMemorystatusSupportsIdleExit 0x10 +#define kMemorystatusDirty 0x20 -#define DELTA_PERCENT 5 -#define CRITICAL_PERCENT 5 -#define HIGHWATER_PERCENT 10 -#define PRESSURE_PERCENT 15 -#define FREEZE_PERCENT 50 - -#define POLICY_MORE_FREE_OFFSET_PERCENT 5 -#define POLICY_DIAGNOSTIC_OFFSET_PERCENT 5 - -#define IDLE_EXIT_TIME_SECS 10 +/* Cause */ +enum { + kMemorystatusKilled = 1, + kMemorystatusKilledHiwat, + kMemorystatusKilledVnodes, + kMemorystatusKilledVMPageShortage, + kMemorystatusKilledVMThrashing, + kMemorystatusKilledPerProcessLimit, + kMemorystatusKilledDiagnostic, + kMemorystatusKilledIdleExit +}; +/* Temporary, to prevent the need for a linked submission of ReportCrash */ +/* Remove when has been integrated */ enum { - kProcessSuspended = (1 << 0), - kProcessFrozen = (1 << 1), - kProcessNoReclaimWorth = (1 << 2), - kProcessIgnored = (1 << 3), - kProcessLocked = (1 << 4), - kProcessKilled = (1 << 5), - kProcessNotifiedForPressure = (1 << 6), - kProcessPriorityUpdated = (1 << 7), - kProcessActive = (1 << 8), - kProcessForeground = (1 << 9), - kProcessSuspendedForDiag = (1 << 10), - kProcessSupportsIdleExit = (1 << 11), - kProcessDirty = (1 << 12), - kProcessIgnoreIdleExit = (1 << 13) + kMemorystatusKilledVM = kMemorystatusKilledVMPageShortage }; -typedef struct memorystatus_node { - TAILQ_ENTRY(memorystatus_node) link; - pid_t pid; - int32_t priority; - uint32_t state; -#if CONFIG_JETSAM - int32_t hiwat_pages; -#endif -#if CONFIG_FREEZE - uint32_t resident_pages; +/* Memorystatus control */ +#define MEMORYSTATUS_BUFFERSIZE_MAX 65536 + +#ifndef KERNEL +int memorystatus_get_level(user_addr_t level); +int memorystatus_control(uint32_t command, int32_t pid, uint32_t flags, void *buffer, size_t buffersize); #endif - uint64_t clean_time; -} memorystatus_node; -extern int memorystatus_wakeup; -extern unsigned int memorystatus_running; +/* Commands */ +#define MEMORYSTATUS_CMD_GET_PRIORITY_LIST 1 +#define MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES 2 +#define MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT 3 +#define MEMORYSTATUS_CMD_GET_PRESSURE_STATUS 4 +#define MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK 5 /* TODO: deprecate */ + +#if PRIVATE +/* Test commands */ + +/* Trigger forced jetsam */ +#define MEMORYSTATUS_CMD_TEST_JETSAM 1000 + +/* Panic on jetsam options */ +typedef struct memorystatus_jetsam_panic_options { + uint32_t data; + uint32_t mask; +} memorystatus_jetsam_panic_options_t; -extern unsigned int memorystatus_available_pages; -extern unsigned int memorystatus_available_pages_critical; -extern unsigned int memorystatus_level; -extern unsigned int memorystatus_delta; +#define MEMORYSTATUS_CMD_SET_JETSAM_PANIC_BITS 1001 +#endif /* PRIVATE */ + +typedef struct memorystatus_priority_properties { + int32_t priority; + uint64_t user_data; +} memorystatus_priority_properties_t; + +#ifdef XNU_KERNEL_PRIVATE + +/* p_memstat_state flags */ + +#define P_MEMSTAT_SUSPENDED 0x00000001 +#define P_MEMSTAT_FROZEN 0x00000002 +#define P_MEMSTAT_NORECLAIM 0x00000004 +#define P_MEMSTAT_ERROR 0x00000008 +#define P_MEMSTAT_LOCKED 0x00000010 +#define P_MEMSTAT_TERMINATED 0x00000020 +#define P_MEMSTAT_NOTFIED 0x00000040 +#define P_MEMSTAT_PRIORITYUPDATED 0x00000080 +#define P_MEMSTAT_FOREGROUND 0x00000100 +#define P_MEMSTAT_DIAG_SUSPENDED 0x00000200 +#define P_MEMSTAT_PRIOR_THAW 0x00000400 +#define P_MEMSTAT_MEMLIMIT_BACKGROUND 0x00000800 +#define P_MEMSTAT_INTERNAL 0x00001000 extern void memorystatus_init(void) __attribute__((section("__TEXT, initcode"))); -extern kern_return_t memorystatus_list_add(int pid, int priority, int high_water_mark); -extern kern_return_t memorystatus_list_change(boolean_t effective, int pid, int priority, int state_flags, int high_water_mark); -extern kern_return_t memorystatus_list_remove(int pid); +extern int memorystatus_add(proc_t p, boolean_t locked); +extern int memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effective, boolean_t update_memlimit, int32_t memlimit, boolean_t memlimit_background); +extern int memorystatus_remove(proc_t p, boolean_t locked); + +extern int memorystatus_dirty_track(proc_t p, uint32_t pcontrol); +extern int memorystatus_dirty_set(proc_t p, boolean_t self, uint32_t pcontrol); +extern int memorystatus_dirty_get(proc_t p); + +extern int memorystatus_on_terminate(proc_t p); + +extern void memorystatus_on_suspend(proc_t p); +extern void memorystatus_on_resume(proc_t p); +extern void memorystatus_on_inactivity(proc_t p); + +extern void memorystatus_on_pageout_scan_end(void); + +/* Memorystatus kevent */ -extern kern_return_t memorystatus_on_track_dirty(int pid, boolean_t track); -extern kern_return_t memorystatus_on_dirty(int pid, boolean_t dirty); +void memorystatus_kevent_init(lck_grp_t *grp, lck_attr_t *attr); -extern void memorystatus_on_suspend(int pid); -extern void memorystatus_on_resume(int pid); -extern void memorystatus_on_inactivity(int pid); +int memorystatus_knote_register(struct knote *kn); +void memorystatus_knote_unregister(struct knote *kn); #if CONFIG_JETSAM -typedef enum memorystatus_policy_t { +typedef enum memorystatus_policy { kPolicyDefault = 0x0, kPolicyMoreFree = 0x1, kPolicyDiagnoseAll = 0x2, @@ -221,21 +256,21 @@ typedef enum memorystatus_policy_t { extern int memorystatus_jetsam_wakeup; extern unsigned int memorystatus_jetsam_running; -extern int memorystatus_kill_top_proc(boolean_t any, uint32_t reason); -extern int memorystatus_kill_top_proc_from_VM(void); +boolean_t memorystatus_kill_on_VM_page_shortage(boolean_t async); +boolean_t memorystatus_kill_on_VM_thrashing(boolean_t async); +boolean_t memorystatus_kill_on_vnode_limit(void); -extern void memorystatus_update(unsigned int pages_avail); +void memorystatus_on_ledger_footprint_exceeded(int warning, const int max_footprint_mb); -#if VM_PRESSURE_EVENTS +void memorystatus_pages_update(unsigned int pages_avail); -#define MEMORYSTATUS_SUSPENDED_THRESHOLD 4 +extern boolean_t memorystatus_is_foreground_locked(proc_t p); -extern int memorystatus_request_vm_pressure_candidate(void); -extern void memorystatus_send_pressure_note(int pid); +#else /* CONFIG_JETSAM */ -#endif /* VM_PRESSURE_EVENTS */ +boolean_t memorystatus_idle_exit_from_VM(void); -#endif /* CONFIG_JETSAM */ +#endif /* !CONFIG_JETSAM */ #ifdef CONFIG_FREEZE @@ -264,6 +299,19 @@ extern void memorystatus_freeze_init(void) __attribute__((section("__TEXT, initc #endif /* CONFIG_FREEZE */ +#if VM_PRESSURE_EVENTS + +#define MEMORYSTATUS_SUSPENDED_THRESHOLD 4 + +extern kern_return_t memorystatus_update_vm_pressure(boolean_t); + +#if CONFIG_JETSAM +extern int memorystatus_send_pressure_note(int pid); +extern boolean_t memorystatus_bg_pressure_eligible(proc_t p); +#endif + +#endif /* VM_PRESSURE_EVENTS */ + #endif /* XNU_KERNEL_PRIVATE */ #endif /* SYS_MEMORYSTATUS_H */ diff --git a/bsd/sys/kern_overrides.h b/bsd/sys/kern_overrides.h new file mode 100644 index 000000000..377d8487a --- /dev/null +++ b/bsd/sys/kern_overrides.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2013 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _SYS_KERN_OVERRIDES_H +#define _SYS_KERN_OVERRIDES_H + +#include +#include +#include +#include + +__BEGIN_DECLS + +/* System Overrides Flags */ +#define SYS_OVERRIDE_IO_THROTTLE 0x1 +#define SYS_OVERRIDE_CPU_THROTTLE 0x2 + +#define SYS_OVERRIDE_FLAGS_MASK (SYS_OVERRIDE_IO_THROTTLE | SYS_OVERRIDE_CPU_THROTTLE) + +#ifdef BSD_KERNEL_PRIVATE +void init_system_override(void); +#endif + +#ifndef KERNEL +int system_override(uint64_t timeout, uint64_t flags); +#endif + +__END_DECLS + +#endif /*_SYS_KERN_OVERRIDES_H */ diff --git a/bsd/sys/kern_tests.h b/bsd/sys/kern_tests.h new file mode 100644 index 000000000..5ed6cc727 --- /dev/null +++ b/bsd/sys/kern_tests.h @@ -0,0 +1,4 @@ +#ifndef _KERN_TESTS_H +#define _KERN_TESTS_H + +#endif /* !defined(_KERN_TESTS_H) */ diff --git a/bsd/sys/kernel.h b/bsd/sys/kernel.h index 6b9c7baef..2674b234c 100644 --- a/bsd/sys/kernel.h +++ b/bsd/sys/kernel.h @@ -91,8 +91,6 @@ extern int profhz; /* profiling clock's frequency */ #ifdef KERNEL_PRIVATE -extern int lbolt; /* once a second sleep address */ - extern struct timezone tz; /* XXX */ extern int tick; /* usec per tick (1000000 / hz) */ diff --git a/bsd/sys/kernel_types.h b/bsd/sys/kernel_types.h index 20a0bafb3..5615bfa72 100644 --- a/bsd/sys/kernel_types.h +++ b/bsd/sys/kernel_types.h @@ -41,7 +41,6 @@ #endif -typedef int errno_t; typedef int64_t daddr64_t; #ifndef BSD_BUILD @@ -130,15 +129,7 @@ typedef struct bufattr* bufattr_t; #endif /* !BSD_BUILD */ -#ifndef _KAUTH_GUID -#define _KAUTH_GUID -/* Apple-style globally unique identifier */ -typedef struct { -#define KAUTH_GUID_SIZE 16 /* 128-bit identifier */ - unsigned char g_guid[KAUTH_GUID_SIZE]; -} guid_t; -#define _GUID_T -#endif /* _KAUTH_GUID */ +#include #ifndef _KAUTH_ACE #define _KAUTH_ACE diff --git a/bsd/sys/kpi_mbuf.h b/bsd/sys/kpi_mbuf.h index 3cb4c787d..a17245d51 100644 --- a/bsd/sys/kpi_mbuf.h +++ b/bsd/sys/kpi_mbuf.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2008-2011 Apple Inc. All rights reserved. + * Copyright (c) 2008-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /*! @@ -161,7 +161,8 @@ typedef u_int32_t mbuf_tso_request_flags_t; enum { #ifdef KERNEL_PRIVATE - MBUF_CSUM_REQ_SUM16 = 0x1000, /* Weird apple hardware checksum */ + MBUF_CSUM_PARTIAL = 0x1000, /* 16-bit 1's complement sum */ + MBUF_CSUM_REQ_SUM16 = MBUF_CSUM_PARTIAL, #endif /* KERNEL_PRIVATE */ MBUF_CSUM_REQ_IP = 0x0001, MBUF_CSUM_REQ_TCP = 0x0002, @@ -196,7 +197,7 @@ typedef u_int32_t mbuf_csum_request_flags_t; */ enum { #ifdef KERNEL_PRIVATE - MBUF_CSUM_TCP_SUM16 = MBUF_CSUM_REQ_SUM16, /* Weird apple hardware checksum */ + MBUF_CSUM_TCP_SUM16 = MBUF_CSUM_PARTIAL, #endif /* KERNEL_PRIVATE */ MBUF_CSUM_DID_IP = 0x0100, MBUF_CSUM_IP_GOOD = 0x0200, @@ -908,7 +909,8 @@ extern mbuf_flags_t mbuf_flags(const mbuf_t mbuf); @function mbuf_setflags @discussion Sets the set of set flags. @param mbuf The mbuf. - @param flags The flags that should be set, all other flags will be cleared. + @param flags The flags that should be set, all other flags will be + cleared. Certain flags such as MBUF_EXT cannot be altered. @result 0 upon success otherwise the errno error. */ extern errno_t mbuf_setflags(mbuf_t mbuf, mbuf_flags_t flags); @@ -918,7 +920,8 @@ extern errno_t mbuf_setflags(mbuf_t mbuf, mbuf_flags_t flags); @discussion Useful for setting or clearing individual flags. Easier than calling mbuf_setflags(m, mbuf_flags(m) | M_FLAG). @param mbuf The mbuf. - @param flags The flags that should be set or cleared. + @param flags The flags that should be set or cleared. Certain flags + such as MBUF_EXT cannot be altered. @param mask The mask controlling which flags will be modified. @result 0 upon success otherwise the errno error. */ @@ -1339,6 +1342,63 @@ extern errno_t mbuf_tag_find(mbuf_t mbuf, mbuf_tag_id_t module_id, extern void mbuf_tag_free(mbuf_t mbuf, mbuf_tag_id_t module_id, mbuf_tag_type_t type); +#ifdef KERNEL_PRIVATE +/* + @function mbuf_add_drvaux + @discussion Allocate space for driver auxiliary data and attach it + to the packet (MBUF_PKTHDR is required.) This space is freed + when the mbuf is freed or when mbuf_del_drvaux is called. + Only one instance of driver auxiliary data may be attached to + a packet. Any attempt to add it to a packet already associated + with one will yield an error, and the existing one must first + be removed via mbuf_del_drvaux. The format and length of the + data depend largely on the family and sub-family. The system + makes no attempt to define and/or interpret the contents of + the data, and simply acts as a conduit between its producer + and consumer. + @param mbuf The mbuf to attach the auxiliary data to. + @param how Indicate whether you are willing to block and wait for + memory, if memory is not immediately available. + @param family The interface family as defined in net/kpi_interface.h. + @param subfamily The interface sub-family as defined in + net/kpi_interface.h. + @param length The length of the auxiliary data, must be greater than 0. + @param data_p Upon successful return, *data_p will point to the + space allocated for the data. Caller may set this to NULL. + @result 0 upon success otherwise the errno error. + */ +extern errno_t mbuf_add_drvaux(mbuf_t mbuf, mbuf_how_t how, + u_int32_t family, u_int32_t subfamily, size_t length, void **data_p); + +/* + @function mbuf_find_drvaux + @discussion Find the driver auxiliary data associated with a packet. + @param mbuf The mbuf the auxiliary data is attached to. + @param family_p Upon successful return, *family_p will contain + the interface family associated with the data, as defined + in net/kpi_interface.h. Caller may set this to NULL. + @param subfamily_p Upon successful return, *subfamily_p will contain + the interface family associated with the data, as defined + in net/kpi_interface.h. Caller may set this to NULL. + @param length_p Upon successful return, *length_p will contain + the length of the driver auxiliary data. Caller may + set this to NULL. + @param data_p Upon successful return, *data_p will point to the + space allocated for the data. + @result 0 upon success otherwise the errno error. + */ +extern errno_t mbuf_find_drvaux(mbuf_t mbuf, u_int32_t *family_p, + u_int32_t *subfamily_p, u_int32_t *length_p, void **data_p); + +/* + @function mbuf_del_drvaux + @discussion Remove and free any driver auxility data associated + with the packet. + @param mbuf The mbuf the auxiliary data is attached to. + */ +extern void mbuf_del_drvaux(mbuf_t mbuf); +#endif /* KERNEL_PRIVATE */ + /* mbuf stats */ /*! @@ -1386,7 +1446,7 @@ extern mbuf_traffic_class_t mbuf_get_traffic_class(mbuf_t mbuf); @discussion Set the traffic class of an mbuf packet. @param mbuf The mbuf to set the traffic class on. @tc The traffic class - @result 0 on success, EINVAL if bad paramater is passed + @result 0 on success, EINVAL if bad parameter is passed */ extern errno_t mbuf_set_traffic_class(mbuf_t mbuf, mbuf_traffic_class_t tc); @@ -1400,6 +1460,24 @@ extern errno_t mbuf_set_traffic_class(mbuf_t mbuf, mbuf_traffic_class_t tc); extern int mbuf_is_traffic_class_privileged(mbuf_t mbuf); #ifdef KERNEL_PRIVATE + +/*! + @function mbuf_get_traffic_class_max_count + @discussion Returns the maximum number of mbuf traffic class types + @result The total count of mbuf traffic classes + */ +extern u_int32_t mbuf_get_traffic_class_max_count(void); + +/*! + @function mbuf_get_traffic_class_index + @discussion Returns the zero-based index of an mbuf traffic class value + @param tc The traffic class + @param index Pointer to the index value + @result 0 on success, EINVAL if bad parameter is passed + */ +extern errno_t mbuf_get_traffic_class_index(mbuf_traffic_class_t tc, + u_int32_t *index); + /*! @enum mbuf_svc_class_t @abstract Service class of a packet @@ -1458,6 +1536,23 @@ typedef enum { MBUF_SC_CTL = 0x00480190, /* highest class */ } mbuf_svc_class_t; +/*! + @function mbuf_get_service_class_max_count + @discussion Returns the maximum number of mbuf service class types. + @result The total count of mbuf service classes. + */ +extern u_int32_t mbuf_get_service_class_max_count(void); + +/*! + @function mbuf_get_service_class_index + @discussion Returns the zero-based index of an mbuf service class value + @param sc The service class + @param index Pointer to the index value + @result 0 on success, EINVAL if bad parameter is passed + */ +extern errno_t mbuf_get_service_class_index(mbuf_svc_class_t sc, + u_int32_t *index); + /*! @function mbuf_get_service_class @discussion Get the service class of an mbuf packet @@ -1471,7 +1566,7 @@ extern mbuf_svc_class_t mbuf_get_service_class(mbuf_t mbuf); @discussion Set the service class of an mbuf packet. @param mbuf The mbuf to set the service class on. @sc The service class - @result 0 on success, EINVAL if bad paramater is passed + @result 0 on success, EINVAL if bad parameter is passed */ extern errno_t mbuf_set_service_class(mbuf_t mbuf, mbuf_svc_class_t sc); @@ -1510,6 +1605,19 @@ typedef u_int32_t mbuf_pkthdr_aux_flags_t; */ extern errno_t mbuf_pkthdr_aux_flags(mbuf_t mbuf, mbuf_pkthdr_aux_flags_t *paux_flags); + +/* + @function mbuf_get_driver_scratch + @discussion Returns a pointer to a driver specific area in the mbuf + @param m The mbuf whose driver scratch space is to be returned + @param area A pointer to a location to store the address of the + driver scratch space. This value is guaranteed to be 32-bit + aligned. + @param area_ln A pointer to a location to store the total length of + the memory location. +*/ +extern errno_t mbuf_get_driver_scratch(mbuf_t m, u_int8_t **area, + size_t *area_ln); #endif /* KERNEL_PRIVATE */ /* IF_QUEUE interaction */ diff --git a/bsd/sys/kpi_socket.h b/bsd/sys/kpi_socket.h index 5f2093369..658522422 100644 --- a/bsd/sys/kpi_socket.h +++ b/bsd/sys/kpi_socket.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2008-2011 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2008-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /*! @@ -56,8 +56,8 @@ struct timeval; Calls to your upcall function are not serialized and may be called concurrently from multiple threads in the kernel. - Your upcall function will be called: - when there is data more than the low water mark for reading, + Your upcall function will be called: + when there is data more than the low water mark for reading, or when there is space for a write, or when there is a connection to accept, or when a socket is connected, @@ -69,6 +69,21 @@ struct timeval; */ typedef void (*sock_upcall)(socket_t so, void *cookie, int waitf); +#ifdef KERNEL_PRIVATE +/*! + @typedef sock_evupcall + + @discussion sock_evupcall is used by a socket to notify an in kernel + client when an event occurs. Instead of making blocking calls in + the kernel, a client can specify an upcall which will be called + when an event status is available. + @param so A reference to the socket that's ready. + @param cookie The cookie passed in when the socket was created. + @param int Indicates the event as defined by SO_FILT_HINT_* +*/ +typedef void (*sock_evupcall)(socket_t so, void *cookie, u_int32_t event); +#endif /* KERNEL_PRIVATE */ + /*! @function sock_accept @discussion Accepts an incoming connection on a socket. See 'man 2 @@ -240,6 +255,8 @@ extern void socket_clear_traffic_mgt_flags_locked(socket_t so, u_int32_t flags); extern void socket_set_traffic_mgt_flags(socket_t so, u_int32_t flags); extern void socket_clear_traffic_mgt_flags(socket_t so, u_int32_t flags); extern errno_t socket_defunct(struct proc *, socket_t so, int); +extern errno_t sock_receive_internal(socket_t, struct msghdr *, mbuf_t *, + int, size_t *); #endif /* BSD_KERNEL_PRIVATE */ #endif /* KERNEL_PRIVATE */ @@ -487,18 +504,53 @@ extern void sock_freeaddr(struct sockaddr *sockname); /* @function sock_setupcall @discussion Set the notifier function to be called when an event - occurs on the socket. This may be set to NULL to disable - further notifications. Setting the function does not + occurs on the socket. This may be set to NULL to disable + further notifications. Setting the function does not affect currently notifications about to be sent or being sent. - Note: When this function is used on a socket passed from userspace - it is crucial to call sock_retain() on the socket otherwise a callback - could be dispatched on a closed socket and cause a crash. + Note: When this function is used on a socket passed from + userspace it is crucial to call sock_retain() on the socket + otherwise a callback could be dispatched on a closed socket + and cause a crash. @param sock The socket. @param callback The notifier function @param context A cookie passed directly to the callback */ -extern errno_t sock_setupcall(socket_t sock, sock_upcall callback, void* context); +extern errno_t sock_setupcall(socket_t sock, sock_upcall callback, + void *context); + +/* + @function sock_setupcalls + @discussion Set the notifier function to be called when an event + occurs on the socket. This may be set to NULL to disable + further notifications. Setting the function does not + affect currently notifications about to be sent or being sent. + Note: When this function is used on a socket passed from + userspace it is crucial to call sock_retain() on the socket + otherwise a callback could be dispatched on a closed socket + and cause a crash. + @param sock The socket. + @param read_callback The read notifier function + @param read_context A cookie passed directly to the read callback + @param write_callback The write notifier function + @param write_context A cookie passed directly to the write callback +*/ +extern errno_t sock_setupcalls(socket_t sock, sock_upcall read_callback, + void *read_context, sock_upcall write_callback, void *write_context); +/* + @function sock_catchevents + @discussion Set the notifier function to be called when an event + occurs on the socket. This may be set to NULL to disable + further notifications. Setting the function does not + affect currently notifications about to be sent or being sent. + @param sock The socket. + @param event_callback The event notifier function + @param event_context A cookie passed directly to the event callback + @param event_mask One or more SO_FILT_HINT_* values OR'ed together, + indicating the registered event(s). +*/ +extern errno_t sock_catchevents(socket_t sock, sock_evupcall event_callback, + void *event_context, u_int32_t event_mask); #endif /* KERNEL_PRIVATE */ __END_DECLS diff --git a/bsd/sys/kpi_socketfilter.h b/bsd/sys/kpi_socketfilter.h index 10ab4323c..c52153117 100644 --- a/bsd/sys/kpi_socketfilter.h +++ b/bsd/sys/kpi_socketfilter.h @@ -338,6 +338,9 @@ typedef errno_t (*sf_connect_in_func)(void *cookie, socket_t so, @result Return: 0 - The caller will continue with normal processing of the connection. + EJUSTRETURN - The caller will return with a value of 0 (no error) + from that point without further processing the connect command. The + protocol layer will not see the call. Anything Else - The caller will rejecting the outbound connection. */ @@ -355,6 +358,9 @@ typedef errno_t (*sf_connect_out_func)(void *cookie, socket_t so, @param to The local address of the socket will be bound to. @result Return: 0 - The caller will continue with normal processing of the bind. + EJUSTRETURN - The caller will return with a value of 0 (no error) + from that point without further processing the bind command. The + protocol layer will not see the call. Anything Else - The caller will rejecting the bind. */ typedef errno_t (*sf_bind_func)(void *cookie, socket_t so, @@ -372,6 +378,9 @@ typedef errno_t (*sf_bind_func)(void *cookie, socket_t so, @result Return: 0 - The caller will continue with normal processing of the setsockopt. + EJUSTRETURN - The caller will return with a value of 0 (no error) + from that point without further propagating the set option + command. The socket and protocol layers will not see the call. Anything Else - The caller will stop processing and return this error. */ @@ -389,6 +398,9 @@ typedef errno_t (*sf_setoption_func)(void *cookie, socket_t so, sockopt_t opt); @result Return: 0 - The caller will continue with normal processing of the getsockopt. + EJUSTRETURN - The caller will return with a value of 0 (no error) + from that point without further propagating the get option + command. The socket and protocol layers will not see the call. Anything Else - The caller will stop processing and return this error. */ @@ -404,6 +416,9 @@ typedef errno_t (*sf_getoption_func)(void *cookie, socket_t so, sockopt_t opt); @param so The socket the filter is attached to. @result Return: 0 - The caller will continue with normal processing of listen. + EJUSTRETURN - The caller will return with a value of 0 (no error) + from that point without further processing the listen command. The + protocol will not see the call. Anything Else - The caller will stop processing and return this error. */ @@ -426,6 +441,9 @@ typedef errno_t (*sf_listen_func)(void *cookie, socket_t so); @result Return: 0 - The caller will continue with normal processing of this ioctl. + EJUSTRETURN - The caller will return with a value of 0 (no error) + from that point without further processing or propogating + the ioctl. Anything Else - The caller will stop processing and return this error. */ diff --git a/bsd/sys/linker_set.h b/bsd/sys/linker_set.h index 852f34695..1aea00848 100644 --- a/bsd/sys/linker_set.h +++ b/bsd/sys/linker_set.h @@ -98,9 +98,9 @@ #define __LS_VA_STRCONCAT(_x,_y) __LS_VA_STRINGIFY(_x,_y) #define __LINKER_MAKE_SET(_set, _sym) \ /*__unused*/ /*static*/ void const * /*const*/ __set_##_set##_sym_##_sym \ - __attribute__ ((section(__LS_VA_STRCONCAT(__DATA,_set)))) = (void *)&_sym + __attribute__ ((section(__LS_VA_STRCONCAT(__DATA,_set)),used)) = (void *)&_sym /* the line above is very fragile - if your compiler breaks linker sets, - just play around with "static", "const" etc. :-) */ + just play around with "static", "const", "used" etc. :-) */ /* * Public macros. diff --git a/bsd/sys/lockf.h b/bsd/sys/lockf.h index 4cbefc091..c12abebf0 100644 --- a/bsd/sys/lockf.h +++ b/bsd/sys/lockf.h @@ -73,6 +73,11 @@ struct vnode; MALLOC_DECLARE(M_LOCKF); #endif +#if IMPORTANCE_INHERITANCE +#define LF_NOT_BOOSTED 0 +#define LF_BOOSTED 1 +#endif /* IMPORTANCE_INHERITANCE */ + /* * The lockf structure is a kernel structure which contains the information * associated with a byte range lock. The lockf structures are linked into @@ -95,6 +100,9 @@ struct lockf { struct lockf *lf_next; /* Pointer to the next lock on this inode */ struct locklist lf_blkhd; /* List of requests blocked on this lock */ TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */ +#if IMPORTANCE_INHERITANCE + int lf_boosted; /* Is the owner of the lock boosted */ +#endif /* IMPORTANCE_INHERITANCE */ }; #pragma pack() diff --git a/bsd/sys/malloc.h b/bsd/sys/malloc.h index 80883f08b..8aea6f647 100644 --- a/bsd/sys/malloc.h +++ b/bsd/sys/malloc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -118,7 +118,7 @@ #define M_VNODE 25 /* Dynamically allocated vnodes */ #define M_CACHE 26 /* Dynamically allocated cache entries */ #define M_DQUOT 27 /* UFS quota entries */ -#define M_UFSMNT 28 /* UFS mount structure */ +#define M_PROC_UUID_POLICY 28 /* proc UUID policy entries */ #define M_SHM 29 /* SVID compatible shared memory segments */ #define M_PLIMIT 30 /* plimit structures */ #define M_SIGACTS 31 /* sigacts structures */ @@ -168,8 +168,8 @@ #define M_HFSMNT 75 /* HFS mount structure */ #define M_HFSNODE 76 /* HFS catalog node */ #define M_HFSFORK 77 /* HFS file fork */ -#define M_ZFSMNT 78 /* ZFS mount data */ -#define M_ZFSNODE 79 /* ZFS inode */ +/* unused 78 */ +/* unused 79 */ #define M_TEMP 80 /* misc temporary data buffers */ #define M_SECA 81 /* security associations, key management */ #define M_DEVFS 82 @@ -192,9 +192,7 @@ #define M_FILEGLOB 99 /* fileglobal */ #define M_KAUTH 100 /* kauth subsystem */ #define M_DUMMYNET 101 /* dummynet */ -#if CONFIG_VFS_FUNNEL -#define M_UNSAFEFS 102 /* storage for vnode lock state for unsafe FS */ -#endif /* CONFIG_VFS_FUNNEL */ +/* M_UNSAFEFS 102 */ #define M_MACPIPELABEL 103 /* MAC pipe labels */ #define M_MACTEMP 104 /* MAC framework */ #define M_SBUF 105 /* string buffers */ @@ -209,8 +207,11 @@ #define M_IN6MFILTER 112 /* IPv6 multicast PCB-layer source filter */ #define M_IP6MOPTS 113 /* IPv6 multicast options */ #define M_IP6MSOURCE 114 /* IPv6 multicast MLD-layer source filter */ +#define M_FLOW_DIVERT_PCB 115 /* flow divert control block */ +#define M_FLOW_DIVERT_GROUP 116 /* flow divert socket group */ +#define M_IP6CGA 117 -#define M_LAST 115 /* Must be last type + 1 */ +#define M_LAST 118 /* Must be last type + 1 */ #else /* BSD_KERNEL_PRIVATE */ diff --git a/bsd/sys/mbuf.h b/bsd/sys/mbuf.h index e3ad20c4a..8a849338b 100644 --- a/bsd/sys/mbuf.h +++ b/bsd/sys/mbuf.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2011 Apple Inc. All rights reserved. + * Copyright (c) 1999-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -86,7 +86,7 @@ #include #include - +#include /* * Mbufs are of a single size, MSIZE (machine/param.h), which * includes overhead. An mbuf may add a single "mbuf cluster" of size @@ -125,12 +125,12 @@ /* header at beginning of each mbuf: */ struct m_hdr { - struct mbuf *mh_next; /* next buffer in chain */ - struct mbuf *mh_nextpkt; /* next chain in queue/record */ - int32_t mh_len; /* amount of data in this mbuf */ - caddr_t mh_data; /* location of data */ - short mh_type; /* type of data in this mbuf */ - short mh_flags; /* flags; see below */ + struct mbuf *mh_next; /* next buffer in chain */ + struct mbuf *mh_nextpkt; /* next chain in queue/record */ + caddr_t mh_data; /* location of data */ + int32_t mh_len; /* amount of data in this mbuf */ + u_int16_t mh_type; /* type of data in this mbuf */ + u_int16_t mh_flags; /* flags; see below */ }; /* @@ -164,82 +164,286 @@ struct m_taghdr { u_int64_t refcnt; /* Number of tags in this mbuf */ }; -/* Values for pftag_flags */ -#define PF_TAG_GENERATED 0x000001 /* pkt generated by PF */ -#define PF_TAG_FRAGCACHE 0x000002 -#define PF_TAG_TRANSLATE_LOCALHOST 0x000004 -#define PF_TAG_FLOWHASH 0x000100 /* valid flowhash value */ -#define PF_TAG_HDR_INET 0x000200 /* hdr points to IPv4 */ -#define PF_TAG_HDR_INET6 0x000400 /* hdr points to IPv6 */ -#define PF_TAG_TCP 0x000800 /* payload is TCP */ -#define PF_TAG_FLOWADV 0x010000 /* local flow advisory */ -#define PF_TAG_QUEUE1 0x100000 /* queue-specific */ - -#define IF_PKTSEQ_SHIFT 4 +/* + * Driver auxiliary metadata tag (KERNEL_TAG_TYPE_DRVAUX). + */ +struct m_drvaux_tag { + u_int32_t da_family; /* IFNET_FAMILY values */ + u_int32_t da_subfamily; /* IFNET_SUBFAMILY values */ + u_int32_t da_reserved; /* for future */ + u_int32_t da_length; /* length of following data */ +}; -/* PF mbuf tag */ +/* Values for pftag_flags (16-bit wide) */ +#define PF_TAG_GENERATED 0x1 /* pkt generated by PF */ +#define PF_TAG_FRAGCACHE 0x2 +#define PF_TAG_TRANSLATE_LOCALHOST 0x4 +#if PF_ECN +#define PF_TAG_HDR_INET 0x8 /* hdr points to IPv4 */ +#define PF_TAG_HDR_INET6 0x10 /* hdr points to IPv6 */ +#endif /* PF_ECN */ +/* + * PF mbuf tag + */ struct pf_mtag { + u_int16_t pftag_flags; /* PF_TAG flags */ + u_int16_t pftag_rtableid; /* alternate routing table id */ + u_int16_t pftag_tag; + u_int16_t pftag_routed; +#if PF_ALTQ + u_int32_t pftag_qid; +#endif /* PF_ALTQ */ +#if PF_ECN void *pftag_hdr; /* saved hdr pos in mbuf, for ECN */ - unsigned int pftag_rtableid; /* alternate routing table id */ +#endif /* PF_ECN */ +}; + +/* + * TCP mbuf tag + */ +struct tcp_pktinfo { union { struct { - u_int32_t qid; - union { - u_int8_t val8[4]; - u_int16_t val16[2]; - u_int32_t val32; - } __qpriv_u; /* for queue-specific use */ - } __pf_data; - u_int64_t pktseq; - } __pfifseq_u; /* Used for pf or interface bandwidth measurement */ -#define pftag_qid __pfifseq_u.__pf_data.qid -#define pftag_qpriv8 __pfifseq_u.__pf_data.__qpriv_u.val8 -#define pftag_qpriv16 __pfifseq_u.__pf_data.__qpriv_u.val16 -#define pftag_qpriv32 __pfifseq_u.__pf_data.__qpriv_u.val32 -#define pftag_pktseq __pfifseq_u.pktseq - u_int32_t pftag_flowhash; - u_int16_t pftag_tag; - u_int16_t pftag_routed; - u_int32_t pftag_flags; /* PF_TAG flags */ + u_int32_t segsz; /* segment size (actual MSS) */ + } __tx; + struct { + u_int16_t lro_pktlen; /* max seg size encountered */ + u_int8_t lro_npkts; /* # of coalesced TCP pkts */ + u_int8_t lro_timediff; /* time spent in LRO */ + } __rx; + } __offload; + union { + u_int32_t pri; /* send msg priority */ + u_int32_t seq; /* recv msg sequence # */ + } __msgattr; +#define tso_segsz proto_mtag.__pr_u.tcp.tm_tcp.__offload.__tx.segsz +#define lro_pktlen proto_mtag.__pr_u.tcp.tm_tcp.__offload.__rx.lro_pktlen +#define lro_npkts proto_mtag.__pr_u.tcp.tm_tcp.__offload.__rx.lro_npkts +#define lro_elapsed proto_mtag.__pr_u.tcp.tm_tcp.__offload.__rx.lro_timediff +#define msg_pri proto_mtag.__pr_u.tcp.tm_tcp.__msgattr.pri +#define msg_seq proto_mtag.__pr_u.tcp.tm_tcp.__msgattr.seq }; -/* TCP specific mbuf tag */ +/* + * MPTCP mbuf tag + */ +struct mptcp_pktinfo { + u_int64_t mtpi_dsn; /* MPTCP Data Sequence Number */ + union { + u_int64_t mtpi_dan; /* MPTCP Data Ack Number */ + struct { + u_int32_t mtpi_rel_seq; /* Relative Seq Number */ + u_int32_t mtpi_length; /* Length of mapping */ + } mtpi_subf; + }; +#define mp_dsn proto_mtag.__pr_u.tcp.tm_mptcp.mtpi_dsn +#define mp_rseq proto_mtag.__pr_u.tcp.tm_mptcp.mtpi_subf.mtpi_rel_seq +#define mp_rlen proto_mtag.__pr_u.tcp.tm_mptcp.mtpi_subf.mtpi_length +#define mp_dack proto_mtag.__pr_u.tcp.tm_mptcp.mtpi_subf.mtpi_dan +}; + +/* + * TCP specific mbuf tag. Note that the current implementation uses + * MPTCP metadata strictly between MPTCP and the TCP subflow layers, + * hence tm_tcp and tm_mptcp are mutually exclusive. This also means + * that TCP messages functionality is currently incompatible with MPTCP. + */ struct tcp_mtag { - u_int tm_tso_segz; /* TSO segment size (actual MSS) */ - u_int16_t tm_pktlen; /* LRO - max segment size encountered */ - u_int16_t tm_npkts; /* LRO - number of coalesced TCP pkts */ + union { + struct tcp_pktinfo tm_tcp; /* TCP and below */ + struct mptcp_pktinfo tm_mptcp; /* MPTCP-TCP only */ + }; +}; + +/* + * IPSec mbuf tag + */ +struct ipsec_mtag { + uint32_t policy_id; +#define ipsec_policy proto_mtag.__pr_u.ipsec.policy_id }; -/* record/packet header in first mbuf of chain; valid if M_PKTHDR set */ +/* + * Protocol specific mbuf tag (at most one protocol metadata per mbuf). + * + * Care must be taken to ensure that they are mutually exclusive, e.g. + * IPSec policy ID implies no TCP segment offload (which is fine given + * that the former is used on the virtual ipsec interface that does + * not advertise the TSO capability.) + */ +struct proto_mtag { + union { + struct tcp_mtag tcp; /* TCP specific */ + struct ipsec_mtag ipsec; /* IPSec specific */ + } __pr_u; +}; + +/* + * Record/packet header in first mbuf of chain; valid only if M_PKTHDR set. + */ struct pkthdr { - int len; /* total packet length */ struct ifnet *rcvif; /* rcv interface */ - /* variables for ip and tcp reassembly */ - void *header; /* pointer to packet header */ + void *pkt_hdr; /* pointer to packet header */ + int32_t len; /* total packet length */ /* variables for hardware checksum */ /* Note: csum_flags is used for hardware checksum and VLAN */ - int csum_flags; /* flags regarding checksum */ - int csum_data; /* data field used by csum routines */ - u_short vlan_tag; /* VLAN tag, host byte order */ - u_short socket_id; /* socket id */ - SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ - struct pf_mtag pf_mtag; /* built-in PF tag */ -#define m_flowhash pf_mtag.pftag_flowhash -#define m_fhflags pf_mtag.pftag_flags - u_int32_t svc; /* MBUF_SVC value */ - u_int16_t vt_nrecs; /* # of IGMPv3/MLDv2 records */ - u_int16_t aux_flags; /* auxiliary packet flags */ - struct tcp_mtag tcp_mtag; /* tcp related data */ -#define tso_segsz tcp_mtag.tm_tso_segz -#define lro_pktlen tcp_mtag.tm_pktlen -#define lro_npkts tcp_mtag.tm_npkts + u_int32_t csum_flags; /* flags regarding checksum */ + union { + struct { + u_int16_t val; /* checksum value */ + u_int16_t start; /* checksum start offset */ + } _csum_rx; +#define csum_rx_val _csum_rx.val +#define csum_rx_start _csum_rx.start + struct { + u_int16_t start; /* checksum start offset */ + u_int16_t stuff; /* checksum stuff offset */ + } _csum_tx; +#define csum_tx_start _csum_tx.start +#define csum_tx_stuff _csum_tx.stuff + u_int32_t csum_data; /* data field used by csum routines */ + }; + u_int16_t vlan_tag; /* VLAN tag, host byte order */ + /* + * Packet classifier info + * + * PKTF_FLOW_ID set means valid flow ID. A non-zero flow ID value + * means the packet has been classified by one of the flow sources. + * It is also a prerequisite for flow control advisory, which is + * enabled by additionally setting PKTF_FLOW_ADV. + * + * The protocol value is a best-effort representation of the payload. + * It is opportunistically updated and used only for optimization. + * It is not a substitute for parsing the protocol header(s); use it + * only as a hint. + * + * If PKTF_IFAINFO is set, pkt_ifainfo contains one or both of the + * indices of interfaces which own the source and/or destination + * addresses of the packet. For the local/loopback case (PKTF_LOOP), + * both should be valid, and thus allows for the receiving end to + * quickly determine the actual interfaces used by the the addresses; + * they may not necessarily be the same or refer to the loopback + * interface. Otherwise, in the non-local/loopback case, the indices + * are opportunistically set, and because of that only one may be set + * (0 means the index has not been determined.) In addition, the + * interface address flags are also recorded. This allows us to avoid + * storing the corresponding {in,in6}_ifaddr in an mbuf tag. Ideally + * this would be a superset of {ia,ia6}_flags, but the namespaces are + * overlapping at present, so we'll need a new set of values in future + * to achieve this. For now, we will just rely on the address family + * related code paths examining this mbuf to interpret the flags. + */ + u_int8_t pkt_proto; /* IPPROTO value */ + u_int8_t pkt_flowsrc; /* FLOWSRC values */ + u_int32_t pkt_flowid; /* flow ID */ + u_int32_t pkt_flags; /* PKTF flags (see below) */ + u_int32_t pkt_svc; /* MBUF_SVC value */ + union { + struct { + u_int16_t src; /* ifindex of src addr i/f */ + u_int16_t src_flags; /* src PKT_IFAIFF flags */ + u_int16_t dst; /* ifindex of dst addr i/f */ + u_int16_t dst_flags; /* dst PKT_IFAIFF flags */ + } _pkt_iaif; +#define src_ifindex _pkt_iaif.src +#define src_iff _pkt_iaif.src_flags +#define dst_ifindex _pkt_iaif.dst +#define dst_iff _pkt_iaif.dst_flags + u_int64_t pkt_ifainfo; /* data field used by ifainfo */ + }; +#if MEASURE_BW + u_int64_t pkt_bwseq; /* sequence # */ +#endif /* MEASURE_BW */ + /* + * Tags (external and built-in) + */ + SLIST_HEAD(packet_tags, m_tag) tags; /* list of external tags */ + struct proto_mtag proto_mtag; /* built-in protocol-specific tag */ + struct pf_mtag pf_mtag; /* built-in PF tag */ + /* + * Module private scratch space (32-bit aligned), currently 16-bytes + * large. Anything stored here is not guaranteed to survive across + * modules. This should be the penultimate structure right before + * the red zone. Add new fields above this. + */ + struct { + union { + u_int8_t __mpriv8[16]; + u_int16_t __mpriv16[8]; + struct { + union { + u_int8_t __val8[4]; + u_int16_t __val16[2]; + u_int32_t __val32; + } __mpriv32_u; + } __mpriv32[4]; + u_int64_t __mpriv64[2]; + } __mpriv_u; + } pkt_mpriv __attribute__((aligned(4))); + u_int32_t redzone; /* red zone */ }; -/* description of external storage mapped into mbuf, valid if M_EXT set */ +/* + * Flow data source type. A data source module is responsible for generating + * a unique flow ID and associating it to each data flow as pkt_flowid. + * This is required for flow control/advisory, as it allows the output queue + * to identify the data source object and inform that it can resume its + * transmission (in the event it was flow controlled.) + */ +#define FLOWSRC_INPCB 1 /* flow ID generated by INPCB */ +#define FLOWSRC_IFNET 2 /* flow ID generated by interface */ +#define FLOWSRC_PF 3 /* flow ID generated by PF */ + +/* + * Packet flags. Unlike m_flags, all packet flags are copied along when + * copying m_pkthdr, i.e. no equivalent of M_COPYFLAGS here. These flags + * (and other classifier info) will be cleared during DLIL input. + * + * Some notes about M_LOOP and PKTF_LOOP: + * + * - M_LOOP flag is overloaded, and its use is discouraged. Historically, + * that flag was used by the KAME implementation for allowing certain + * certain exceptions to be made in the IP6_EXTHDR_CHECK() logic; this + * was originally meant to be set as the packet is looped back to the + * system, and in some circumstances temporarily set in ip6_output(). + * Over time, this flag was used by the pre-output routines to indicate + * to the DLIL frameout and output routines, that the packet may be + * looped back to the system under the right conditions. In addition, + * this is an mbuf flag rather than an mbuf packet header flag. + * + * - PKTF_LOOP is an mbuf packet header flag, which is set if and only + * if the packet was looped back to the system. This flag should be + * used instead for newer code. + */ +#define PKTF_FLOW_ID 0x1 /* pkt has valid flowid value */ +#define PKTF_FLOW_ADV 0x2 /* pkt triggers local flow advisory */ +#define PKTF_FLOW_LOCALSRC 0x4 /* pkt is locally originated */ +#define PKTF_FLOW_RAWSOCK 0x8 /* pkt locally generated by raw sock */ +#define PKTF_PRIO_PRIVILEGED 0x10 /* packet priority is privileged */ +#define PKTF_PROXY_DST 0x20 /* processed but not locally destined */ +#define PKTF_INET_RESOLVE 0x40 /* IPv4 resolver packet */ +#define PKTF_INET6_RESOLVE 0x80 /* IPv6 resolver packet */ +#define PKTF_RESOLVE_RTR 0x100 /* pkt is for resolving router */ +#define PKTF_SW_LRO_PKT 0x200 /* pkt is a large coalesced pkt */ +#define PKTF_SW_LRO_DID_CSUM 0x400 /* IP and TCP checksums done by LRO */ +#define PKTF_MPTCP 0x800 /* TCP with MPTCP metadata */ +#define PKTF_MPSO 0x1000 /* MPTCP socket meta data */ +#define PKTF_LOOP 0x2000 /* loopbacked packet */ +#define PKTF_IFAINFO 0x4000 /* pkt has valid interface addr info */ +#define PKTF_SO_BACKGROUND 0x8000 /* data is from background source */ +#define PKTF_FORWARDED 0x10000 /* pkt was forwarded from another i/f */ +/* flags related to flow control/advisory and identification */ +#define PKTF_FLOW_MASK \ + (PKTF_FLOW_ID | PKTF_FLOW_ADV | PKTF_FLOW_LOCALSRC | PKTF_FLOW_RAWSOCK) + +/* + * Description of external storage mapped into mbuf, valid only if M_EXT set. + */ struct m_ext { caddr_t ext_buf; /* start of buffer */ - void (*ext_free)(caddr_t, u_int, caddr_t); /* free routine if not the usual */ + void (*ext_free) /* free routine if not the usual */ + (caddr_t, u_int, caddr_t); u_int ext_size; /* size of buffer, for ext_free */ caddr_t ext_arg; /* additional ext_free argument */ struct ext_refsq { /* references held */ @@ -254,6 +458,9 @@ struct m_ext { /* define m_ext to a type since it gets redefined below */ typedef struct m_ext _m_ext_t; +/* + * The mbuf object + */ struct mbuf { struct m_hdr m_hdr; union { @@ -289,7 +496,7 @@ struct mbuf { #define M_PROTO1 0x0008 /* protocol-specific */ #define M_PROTO2 0x0010 /* protocol-specific */ #define M_PROTO3 0x0020 /* protocol-specific */ -#define M_LOOP 0x0040 /* packet is looped back */ +#define M_LOOP 0x0040 /* packet is looped back (also see PKTF_LOOP) */ #define M_PROTO5 0x0080 /* protocol-specific */ /* mbuf pkthdr flags, also in m_flags (private) */ @@ -328,17 +535,24 @@ struct mbuf { #define CSUM_IP_VALID 0x0200 /* ... the csum is valid */ #define CSUM_DATA_VALID 0x0400 /* csum_data field is valid */ #define CSUM_PSEUDO_HDR 0x0800 /* csum_data has pseudo hdr */ -#define CSUM_TCP_SUM16 0x1000 /* simple TCP Sum16 computation */ +#define CSUM_PARTIAL 0x1000 /* simple Sum16 computation */ #define CSUM_DELAY_DATA (CSUM_TCP | CSUM_UDP) #define CSUM_DELAY_IP (CSUM_IP) /* IPv4 only: no IPv6 IP cksum */ #define CSUM_DELAY_IPV6_DATA (CSUM_TCPIPV6 | CSUM_UDPIPV6) #define CSUM_DATA_IPV6_VALID CSUM_DATA_VALID /* csum_data field is valid */ + +#define CSUM_TX_FLAGS \ + (CSUM_DELAY_IP | CSUM_DELAY_DATA | CSUM_DELAY_IPV6_DATA | \ + CSUM_DATA_VALID | CSUM_PARTIAL) + +#define CSUM_RX_FLAGS \ + (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_PSEUDO_HDR | \ + CSUM_DATA_VALID | CSUM_PARTIAL) + /* * Note: see also IF_HWASSIST_CSUM defined in */ -/* bottom 16 bits reserved for hardware checksum */ -#define CSUM_CHECKSUM_MASK 0xffff /* VLAN tag present */ #define CSUM_VLAN_TAG_VALID 0x10000 /* vlan_tag field is valid */ @@ -347,17 +561,22 @@ struct mbuf { #define CSUM_TSO_IPV4 0x100000 /* This mbuf needs to be segmented by the NIC */ #define CSUM_TSO_IPV6 0x200000 /* This mbuf needs to be segmented by the NIC */ -/* - * Auxiliary packet flags. Unlike m_flags, all auxiliary flags are copied - * along when copying m_pkthdr, i.e. no equivalent of M_COPYFLAGS here. - * Note that this flag is 16-bit wide. - */ -#define MAUXF_PRIO_PRIVILEGED 0x0001 /* packet priority is privileged */ -#define MAUXF_PROXY_DST 0x0002 /* processed but not locally destined */ -#define MAUXF_INET_RESOLVE_RTR 0x0004 /* pkt is for resolving IPv4 router */ -#define MAUXF_INET6_RESOLVE_RTR 0x0008 /* pkt is for resolving IPv6 router */ -#define MAUXF_SW_LRO_PKT 0x0010 /* pkt is a large coalesced pkt */ -#define MAUXF_SW_LRO_DID_CSUM 0x0020 /* IP and TCP checksums done by LRO*/ +#define TSO_IPV4_OK(_ifp, _m) \ + (((_ifp)->if_hwassist & IFNET_TSO_IPV4) && \ + ((_m)->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) \ + +#define TSO_IPV4_NOTOK(_ifp, _m) \ + (!((_ifp)->if_hwassist & IFNET_TSO_IPV4) && \ + ((_m)->m_pkthdr.csum_flags & CSUM_TSO_IPV4)) \ + +#define TSO_IPV6_OK(_ifp, _m) \ + (((_ifp)->if_hwassist & IFNET_TSO_IPV6) && \ + ((_m)->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) \ + +#define TSO_IPV6_NOTOK(_ifp, _m) \ + (!((_ifp)->if_hwassist & IFNET_TSO_IPV6) && \ + ((_m)->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) \ + #endif /* XNU_KERNEL_PRIVATE */ /* mbuf types */ @@ -459,6 +678,8 @@ union m16kcluster { #define M_COPY_PFTAG(to, from) m_copy_pftag(to, from) +#define M_COPY_CLASSIFIER(to, from) m_copy_classifier(to, from) + /* * Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place * an object of the specified size at the end of the mbuf, longword aligned. @@ -566,10 +787,10 @@ do { \ m->m_type == MT_FREE || \ ((m->m_flags & M_EXT) != 0 && m->m_ext.ext_buf == NULL)) { \ panic_plain("Failed mbuf validity check: mbuf %p len %d " \ - "type %d flags 0x%x data %p rcvif %s%d ifflags 0x%x", \ + "type %d flags 0x%x data %p rcvif %s ifflags 0x%x", \ m, m->m_len, m->m_type, m->m_flags, \ ((m->m_flags & M_EXT) ? m->m_ext.ext_buf : m->m_data), \ - rcvif->if_name, rcvif->if_unit, \ + if_name(rcvif), \ (rcvif->if_flags & 0xffff)); \ } \ } while (0) @@ -857,6 +1078,11 @@ struct mbuf; #define M_DONTWAIT M_NOWAIT #define M_WAIT M_WAITOK +/* modes for m_copym and variants */ +#define M_COPYM_NOOP_HDR 0 /* don't copy/move pkthdr contents */ +#define M_COPYM_COPY_HDR 1 /* copy pkthdr from old to new */ +#define M_COPYM_MOVE_HDR 2 /* move pkthdr from old to new */ + /* * These macros are mapped to the appropriate KPIs, so that private code * can be simply recompiled in order to be forward-compatible with future @@ -868,11 +1094,12 @@ struct mbuf; #define MINCLSIZE mbuf_get_minclsize() /* cluster usage threshold */ extern void m_freem(struct mbuf *); -extern char *mcl_to_paddr(char *); +extern u_int64_t mcl_to_paddr(char *); extern void m_adj(struct mbuf *, int); extern void m_cat(struct mbuf *, struct mbuf *); extern void m_copydata(struct mbuf *, int, int, void *); extern struct mbuf *m_copym(struct mbuf *, int, int, int); +extern struct mbuf *m_copym_mode(struct mbuf *, int, int, int, uint32_t); extern struct mbuf *m_get(int, int); extern struct mbuf *m_gethdr(int, int); extern struct mbuf *m_getpacket(void); @@ -898,10 +1125,9 @@ extern void m_mclfree(caddr_t p); if (((_m)->m_flags & M_PKTHDR) && \ (_m)->m_pkthdr.rcvif != NULL) { \ panic_plain("\n%s: mbuf %p data ptr %p is not " \ - "32-bit aligned [%s%d: alignerrs=%lld]\n", \ + "32-bit aligned [%s: alignerrs=%lld]\n", \ __func__, (_m), (_m)->m_data, \ - (_m)->m_pkthdr.rcvif->if_name, \ - (_m)->m_pkthdr.rcvif->if_unit, \ + if_name((_m)->m_pkthdr.rcvif), \ (_m)->m_pkthdr.rcvif->if_alignerrs); \ } else { \ panic_plain("\n%s: mbuf %p data ptr %p is not " \ @@ -987,19 +1213,19 @@ extern void m_mclfree(caddr_t p); c == SCVAL_RV || c == SCVAL_VI || c == SCVAL_VO || \ c == SCVAL_CTL) -__private_extern__ union mbigcluster *mbutl; /* start VA of mbuf pool */ -__private_extern__ union mbigcluster *embutl; /* end VA of mbuf pool */ -__private_extern__ unsigned int nmbclusters; /* number of mapped clusters */ -__private_extern__ int njcl; /* # of jumbo clusters */ -__private_extern__ int njclbytes; /* size of a jumbo cluster */ -__private_extern__ int max_hdr; /* largest link+protocol header */ -__private_extern__ int max_datalen; /* MHLEN - max_hdr */ +extern union mbigcluster *mbutl; /* start VA of mbuf pool */ +extern union mbigcluster *embutl; /* end VA of mbuf pool */ +extern unsigned int nmbclusters; /* number of mapped clusters */ +extern int njcl; /* # of jumbo clusters */ +extern int njclbytes; /* size of a jumbo cluster */ +extern int max_hdr; /* largest link+protocol header */ +extern int max_datalen; /* MHLEN - max_hdr */ /* Use max_linkhdr instead of _max_linkhdr */ -__private_extern__ int _max_linkhdr; /* largest link-level header */ +extern int _max_linkhdr; /* largest link-level header */ /* Use max_protohdr instead of _max_protohdr */ -__private_extern__ int _max_protohdr; /* largest protocol header */ +extern int _max_protohdr; /* largest protocol header */ __private_extern__ unsigned int mbuf_default_ncl(int, u_int64_t); __private_extern__ void mbinit(void); @@ -1011,7 +1237,7 @@ __private_extern__ struct mbuf *m_mbigget(struct mbuf *, int); __private_extern__ caddr_t m_16kalloc(int); __private_extern__ void m_16kfree(caddr_t, u_int, caddr_t); __private_extern__ struct mbuf *m_m16kget(struct mbuf *, int); - +__private_extern__ int m_reinit(struct mbuf *, int); __private_extern__ struct mbuf *m_free(struct mbuf *); __private_extern__ struct mbuf *m_getclr(int, int); __private_extern__ struct mbuf *m_getptr(struct mbuf *, int, int *); @@ -1036,6 +1262,7 @@ __private_extern__ caddr_t m_mclalloc(int); __private_extern__ int m_mclhasreference(struct mbuf *); __private_extern__ void m_copy_pkthdr(struct mbuf *, struct mbuf *); __private_extern__ void m_copy_pftag(struct mbuf *, struct mbuf *); +__private_extern__ void m_copy_classifier(struct mbuf *, struct mbuf *); __private_extern__ struct mbuf *m_dtom(void *); __private_extern__ int m_mtocl(void *); @@ -1054,7 +1281,7 @@ __private_extern__ struct mbuf *m_copyback_cow(struct mbuf *, int, int, __private_extern__ int m_makewritable(struct mbuf **, int, int, int); __private_extern__ struct mbuf *m_dup(struct mbuf *m, int how); __private_extern__ struct mbuf *m_copym_with_hdrs(struct mbuf *, int, int, int, - struct mbuf **, int *); + struct mbuf **, int *, uint32_t); __private_extern__ struct mbuf *m_getpackethdrs(int, int); __private_extern__ struct mbuf *m_getpacket_how(int); __private_extern__ struct mbuf *m_getpackets_internal(unsigned int *, int, @@ -1102,7 +1329,7 @@ enum { KERNEL_TAG_TYPE_ENCAP = 8, KERNEL_TAG_TYPE_INET6 = 9, KERNEL_TAG_TYPE_IPSEC = 10, - KERNEL_TAG_TYPE_PF = 11 + KERNEL_TAG_TYPE_DRVAUX = 11 }; /* Packet tag routines */ @@ -1118,7 +1345,7 @@ __private_extern__ struct m_tag *m_tag_locate(struct mbuf *, u_int32_t, u_int16_t, struct m_tag *); __private_extern__ struct m_tag *m_tag_copy(struct m_tag *, int); __private_extern__ int m_tag_copy_chain(struct mbuf *, struct mbuf *, int); -__private_extern__ void m_tag_init(struct mbuf *); +__private_extern__ void m_tag_init(struct mbuf *, int); __private_extern__ struct m_tag *m_tag_first(struct mbuf *); __private_extern__ struct m_tag *m_tag_next(struct mbuf *, struct m_tag *); @@ -1129,7 +1356,11 @@ __END_DECLS #ifdef XNU_KERNEL_PRIVATE __BEGIN_DECLS -__private_extern__ void m_service_class_init(struct mbuf *); +__private_extern__ void m_scratch_init(struct mbuf *); +__private_extern__ u_int32_t m_scratch_get(struct mbuf *, u_int8_t **); + +__private_extern__ void m_classifier_init(struct mbuf *, uint32_t); + __private_extern__ int m_set_service_class(struct mbuf *, mbuf_svc_class_t); __private_extern__ mbuf_svc_class_t m_get_service_class(struct mbuf *); __private_extern__ mbuf_svc_class_t m_service_class_from_idx(u_int32_t); @@ -1137,6 +1368,15 @@ __private_extern__ mbuf_svc_class_t m_service_class_from_val(u_int32_t); __private_extern__ int m_set_traffic_class(struct mbuf *, mbuf_traffic_class_t); __private_extern__ mbuf_traffic_class_t m_get_traffic_class(struct mbuf *); +#define ADDCARRY(_x) do { \ + while (((_x) >> 16) != 0) \ + (_x) = ((_x) >> 16) + ((_x) & 0xffff); \ +} while (0) + +__private_extern__ u_int16_t m_adj_sum16(struct mbuf *, u_int32_t, + u_int32_t, u_int32_t); +__private_extern__ u_int16_t m_sum16(struct mbuf *, u_int32_t, u_int32_t); + __END_DECLS #endif /* XNU_KERNEL_PRIVATE */ #endif /* KERNEL */ diff --git a/bsd/sys/mcache.h b/bsd/sys/mcache.h index 428a865ec..34c76988f 100644 --- a/bsd/sys/mcache.h +++ b/bsd/sys/mcache.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2006-2012 Apple Inc. All rights reserved. + * Copyright (c) 2006-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ #ifndef _SYS_MCACHE_H @@ -100,7 +100,66 @@ extern "C" { } while (0) #endif /* __LP64__ */ -#define CPU_CACHE_SIZE 64 +#define atomic_or_8_ov(a, n) \ + ((u_int8_t) OSBitOrAtomic8(n, (volatile UInt8 *)a)) + +#define atomic_or_8(a, n) \ + ((void) atomic_or_8_ov(a, n)) + +#define atomic_bitset_8(a, n) \ + atomic_or_8(a, n) + +#define atomic_or_16_ov(a, n) \ + ((u_int16_t) OSBitOrAtomic16(n, (volatile UInt16 *)a)) + +#define atomic_or_16(a, n) \ + ((void) atomic_or_16_ov(a, n)) + +#define atomic_bitset_16(a, n) \ + atomic_or_16(a, n) + +#define atomic_or_32_ov(a, n) \ + ((u_int32_t) OSBitOrAtomic(n, (volatile UInt32 *)a)) + +#define atomic_or_32(a, n) \ + ((void) atomic_or_32_ov(a, n)) + +#define atomic_bitset_32(a, n) \ + atomic_or_32(a, n) + +#define atomic_and_8_ov(a, n) \ + ((u_int8_t) OSBitAndAtomic8(n, (volatile UInt8 *)a)) + +#define atomic_and_8(a, n) \ + ((void) atomic_and_8_ov(a, n)) + +#define atomic_bitclear_8(a, n) \ + atomic_and_8(a, ~(n)) + +#define atomic_and_16_ov(a, n) \ + ((u_int16_t) OSBitAndAtomic16(n, (volatile UInt16 *)a)) + +#define atomic_and_16(a, n) \ + ((void) atomic_and_16_ov(a, n)) + +#define atomic_bitclear_16(a, n) \ + atomic_and_16(a, ~(n)) + +#define atomic_and_32_ov(a, n) \ + ((u_int32_t) OSBitAndAtomic(n, (volatile UInt32 *)a)) + +#define atomic_and_32(a, n) \ + ((void) atomic_and_32_ov(a, n)) + +#define atomic_bitclear_32(a, n) \ + atomic_and_32(a, ~(n)) + +/* + * Use CPU_CACHE_LINE_SIZE instead of MAX_CPU_CACHE_LINE_SIZE, unless + * wasting space is of no concern. + */ +#define MAX_CPU_CACHE_LINE_SIZE 64 +#define CPU_CACHE_LINE_SIZE mcache_cache_line_size() #ifndef IS_P2ALIGNED #define IS_P2ALIGNED(v, a) \ @@ -109,7 +168,7 @@ extern "C" { #ifndef P2ROUNDUP #define P2ROUNDUP(x, align) \ - (-(-((uintptr_t)(x)) & -(align))) + (-(-((uintptr_t)(x)) & -((uintptr_t)align))) #endif /* P2ROUNDUP */ #ifndef P2ROUNDDOWN @@ -185,7 +244,7 @@ typedef struct mcache_cpu { int cc_objs; /* number of objects in filled bkt */ int cc_pobjs; /* number of objects in previous bkt */ int cc_bktsize; /* number of elements in a full bkt */ -} __attribute__((aligned(CPU_CACHE_SIZE), packed)) mcache_cpu_t; +} __attribute__((aligned(MAX_CPU_CACHE_LINE_SIZE), packed)) mcache_cpu_t; typedef unsigned int (*mcache_allocfn_t)(void *, mcache_obj_t ***, unsigned int, int); @@ -271,8 +330,10 @@ typedef struct mcache_audit { mcache_t *mca_cache; /* parent cache of the buffer */ struct thread *mca_thread; /* thread doing transaction */ struct thread *mca_pthread; /* previous transaction thread */ - size_t mca_contents_size; /* size of contents */ - void *mca_contents; /* contents at last free */ + size_t mca_contents_size; /* size of saved contents */ + void *mca_contents; /* user-specific saved contents */ + uint32_t mca_tstamp; /* transaction timestamp (ms) */ + uint32_t mca_ptstamp; /* prev transaction timestamp (ms) */ uint16_t mca_depth; /* pc stack depth */ uint16_t mca_pdepth; /* previous transaction pc stack */ void *mca_stack[MCACHE_STACK_DEPTH]; @@ -284,6 +345,7 @@ typedef struct mcache_audit { __private_extern__ int assfail(const char *, const char *, int); __private_extern__ void mcache_init(void); __private_extern__ unsigned int mcache_getflags(void); +__private_extern__ unsigned int mcache_cache_line_size(void); __private_extern__ mcache_t *mcache_create(const char *, size_t, size_t, u_int32_t, int); __private_extern__ void *mcache_alloc(mcache_t *, int); @@ -301,7 +363,8 @@ __private_extern__ void mcache_waiter_inc(mcache_t *); __private_extern__ void mcache_waiter_dec(mcache_t *); __private_extern__ boolean_t mcache_bkt_isempty(mcache_t *); -__private_extern__ void mcache_buffer_log(mcache_audit_t *, void *, mcache_t *); +__private_extern__ void mcache_buffer_log(mcache_audit_t *, void *, mcache_t *, + struct timeval *); __private_extern__ void mcache_set_pattern(u_int64_t, void *, size_t); __private_extern__ void *mcache_verify_pattern(u_int64_t, void *, size_t); __private_extern__ void *mcache_verify_set_pattern(u_int64_t, u_int64_t, @@ -314,7 +377,7 @@ __private_extern__ char *mcache_dump_mca(mcache_audit_t *); __private_extern__ void mcache_audit_panic(mcache_audit_t *, void *, size_t, int64_t, int64_t); -__private_extern__ mcache_t *mcache_audit_cache; +extern mcache_t *mcache_audit_cache; #ifdef __cplusplus } diff --git a/bsd/sys/mman.h b/bsd/sys/mman.h index 109c63634..d7469fbf3 100644 --- a/bsd/sys/mman.h +++ b/bsd/sys/mman.h @@ -85,21 +85,9 @@ * [various] The mode_t, off_t, and size_t types shall be defined as * described in */ -#ifndef _MODE_T -typedef __darwin_mode_t mode_t; -#define _MODE_T -#endif - -#ifndef _OFF_T -typedef __darwin_off_t off_t; -#define _OFF_T -#endif - -#ifndef _SIZE_T -#define _SIZE_T -typedef __darwin_size_t size_t; -#endif - +#include +#include +#include /* * Protections are chosen from these bits, or-ed together @@ -231,7 +219,7 @@ __END_DECLS #else /* KERNEL */ #ifdef XNU_KERNEL_PRIVATE -void pshm_cache_init(void) __attribute__((section("__TEXT, initcode"))); /* for bsd_init() */ +void pshm_cache_init(void); /* for bsd_init() */ void pshm_lock_init(void); /* diff --git a/bsd/sys/mount.h b/bsd/sys/mount.h index 9a2c732f0..d2d05608a 100644 --- a/bsd/sys/mount.h +++ b/bsd/sys/mount.h @@ -444,12 +444,18 @@ union union_vfsidctl { /* the fields vc_vers and vc_fsid are compatible */ #define VFS_CTL_NOLOCKS 0x00010006 /* disable file locking */ #define VFS_CTL_SADDR 0x00010007 /* get server address */ #define VFS_CTL_DISC 0x00010008 /* server disconnected */ +#define VFS_CTL_SERVERINFO 0x00010009 /* information about fs server */ struct vfsquery { u_int32_t vq_flags; u_int32_t vq_spare[31]; }; +struct vfs_server { + int32_t vs_minutes; /* minutes until server goes down. */ + u_int8_t vs_server_name[MAXHOSTNAMELEN*3]; /* UTF8 server name to display (null terminated) */ +}; + /* vfsquery flags */ #define VQ_NOTRESP 0x0001 /* server down */ #define VQ_NEEDAUTH 0x0002 /* server bad auth */ @@ -461,8 +467,8 @@ struct vfsquery { #define VQ_NOTRESPLOCK 0x0080 /* server lockd down */ #define VQ_UPDATE 0x0100 /* filesystem information has changed */ #define VQ_VERYLOWDISK 0x0200 /* file system has *very* little disk space left */ -#define VQ_SYNCEVENT 0x0400 /* a sync just happened */ -#define VQ_FLAG0800 0x0800 /* placeholder */ +#define VQ_SYNCEVENT 0x0400 /* a sync just happened (not set by kernel starting Mac OS X 10.9) */ +#define VQ_SERVEREVENT 0x0800 /* server issued notification/warning */ #define VQ_FLAG1000 0x1000 /* placeholder */ #define VQ_FLAG2000 0x2000 /* placeholder */ #define VQ_FLAG4000 0x4000 /* placeholder */ @@ -490,8 +496,8 @@ struct vfsioattr { /* * Filesystem Registration information */ -#define VFS_TBLTHREADSAFE 0x0001 -#define VFS_TBLFSNODELOCK 0x0002 +#define VFS_TBLTHREADSAFE 0x0001 /* Only threadsafe filesystems are supported */ +#define VFS_TBLFSNODELOCK 0x0002 /* Only threadsafe filesystems are supported */ #define VFS_TBLNOTYPENUM 0x0008 #define VFS_TBLLOCALVOL 0x0010 #define VFS_TBL64BITREADY 0x0020 @@ -503,6 +509,7 @@ struct vfsioattr { #define VFS_TBLNOMACLABEL 0x1000 #define VFS_TBLVNOP_PAGEINV2 0x2000 #define VFS_TBLVNOP_PAGEOUTV2 0x4000 +#define VFS_TBLVNOP_NOUPDATEID_RENAME 0x8000 /* vfs should not call vnode_update_ident on rename */ struct vfs_fsentry { @@ -1176,9 +1183,9 @@ int vfs_getattr(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx); int vfs_setattr(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx); int vfs_extendedsecurity(mount_t); mount_t vfs_getvfs_by_mntonname(char *); -void vfs_markdependency(mount_t); vnode_t vfs_vnodecovered(mount_t mp); /* Returns vnode with an iocount that must be released with vnode_put() */ vnode_t vfs_devvp(mount_t mp); /* Please see block comment with implementation */ +int vfs_nativexattrs (mount_t mp); /* whether or not the FS supports EAs natively */ void * vfs_mntlabel(mount_t mp); /* Safe to cast to "struct label*"; returns "void*" to limit dependence of mount.h on security headers. */ void vfs_setunmountpreflight(mount_t mp); void vfs_setcompoundopen(mount_t mp); diff --git a/bsd/sys/mount_internal.h b/bsd/sys/mount_internal.h index ccf31dd9a..f246bc077 100644 --- a/bsd/sys/mount_internal.h +++ b/bsd/sys/mount_internal.h @@ -134,7 +134,9 @@ struct mount { uint32_t mnt_ioflags; /* flags for underlying device */ pending_io_t mnt_pending_write_size __attribute__((aligned(sizeof(pending_io_t)))); /* byte count of pending writes */ pending_io_t mnt_pending_read_size __attribute__((aligned(sizeof(pending_io_t)))); /* byte count of pending reads */ - + struct timeval mnt_last_write_issued_timestamp; + struct timeval mnt_last_write_completed_timestamp; + lck_rw_t mnt_rwlock; /* mutex readwrite lock */ lck_mtx_t mnt_renamelock; /* mutex that serializes renames that change shape of tree */ vnode_t mnt_devvp; /* the device mounted on for local file systems */ @@ -187,15 +189,6 @@ struct mount { * volumes marked 'MNTK_AUTH_OPAQUE'. */ int mnt_authcache_ttl; - /* - * The proc structure pointer and process ID form a - * sufficiently unique duple identifying the process - * hosting this mount point. Set by vfs_markdependency() - * and utilized in new_vnode() to avoid reclaiming vnodes - * with this dependency (radar 5192010). - */ - pid_t mnt_dependent_pid; - void *mnt_dependent_process; char fstypename_override[MFSTYPENAMELEN]; }; @@ -319,12 +312,10 @@ struct vfstable { #define VFC_VFSPREFLIGHT 0x040 #define VFC_VFSREADDIR_EXTENDED 0x080 #define VFC_VFS64BITREADY 0x100 -#if CONFIG_VFS_FUNNEL -#define VFC_VFSTHREADSAFE 0x200 -#endif /* CONFIG_VFS_FUNNEL */ #define VFC_VFSNOMACLABEL 0x1000 #define VFC_VFSVNOP_PAGEINV2 0x2000 #define VFC_VFSVNOP_PAGEOUTV2 0x4000 +#define VFC_VFSVNOP_NOUPDATEID_RENAME 0x8000 extern int maxvfsconf; /* highest defined filesystem type */ @@ -466,10 +457,17 @@ boolean_t vfs_iskernelmount(mount_t); #endif /* throttled I/O api */ + +/* returned by throttle_io_will_be_throttled */ +#define THROTTLE_DISENGAGED 0 +#define THROTTLE_ENGAGED 1 +#define THROTTLE_NOW 2 + int throttle_get_io_policy(struct uthread **ut); +int throttle_get_passive_io_policy(struct uthread **ut); int throttle_io_will_be_throttled(int lowpri_window_msecs, mount_t mp); -void throttle_info_update_by_mount(mount_t mp); -void unthrottle_thread(uthread_t); +void *throttle_info_update_by_mount(mount_t mp); +void rethrottle_thread(uthread_t ut); /* throttled I/O helper function */ /* convert the lowest bit to a device index */ diff --git a/bsd/sys/msg.h b/bsd/sys/msg.h index 9d856a984..6f530fa66 100644 --- a/bsd/sys/msg.h +++ b/bsd/sys/msg.h @@ -74,25 +74,10 @@ * NOTE: The definition of the key_t type is implicit from the * inclusion of */ -#ifndef _PID_T -typedef __darwin_pid_t pid_t; -#define _PID_T -#endif - -#ifndef _TIME_T -#define _TIME_T -typedef __darwin_time_t time_t; -#endif - -#ifndef _SIZE_T -#define _SIZE_T -typedef __darwin_size_t size_t; -#endif - -#ifndef _SSIZE_T -#define _SSIZE_T -typedef __darwin_ssize_t ssize_t; -#endif +#include +#include +#include +#include /* [XSI] Used for the number of messages in the message queue */ typedef unsigned long msgqnum_t; diff --git a/bsd/sys/munge.h b/bsd/sys/munge.h index 19a3dcd1d..170d1a51e 100644 --- a/bsd/sys/munge.h +++ b/bsd/sys/munge.h @@ -39,6 +39,7 @@ void munge_wl(const void *arg0 __unused, void *args); void munge_wwl(const void *arg0 __unused, void *args); void munge_wwlw(const void *arg0 __unused, void *args); void munge_wwlll(const void *arg0 __unused, void *args); +void munge_wwllww(const void *arg0 __unused, void *args); void munge_wlw(const void *arg0 __unused, void *args); void munge_wlwwwll(const void *arg0 __unused, void *args); void munge_wlwwwllw(const void *arg0 __unused, void *args); @@ -64,6 +65,8 @@ void munge_wws(const void *arg0 __unused, void *args); void munge_wwwsw(const void *arg0 __unused, void *args); void munge_llllll(const void *arg0 __unused, void *args __unused); void munge_l(const void *arg0 __unused, void *args __unused); +void munge_ll(const void *arg0 __unused, void *args __unused); void munge_lw(const void *arg0 __unused, void *args); void munge_lwww(const void *arg0 __unused, void *args); +void munge_wwlwww(const void *arg0 __unused, void *args); #endif /* __MUNGE_H__ */ diff --git a/bsd/sys/namei.h b/bsd/sys/namei.h index 803a9d8d7..9ea13c01e 100644 --- a/bsd/sys/namei.h +++ b/bsd/sys/namei.h @@ -175,9 +175,6 @@ struct nameidata { #define AUDITVNPATH2 0x00200000 /* audit the path/vnode info */ #define USEDVP 0x00400000 /* start the lookup at ndp.ni_dvp */ #define CN_VOLFSPATH 0x00800000 /* user path was a volfs style path */ -#if CONFIG_VFS_FUNNEL -#define FSNODELOCKHELD 0x01000000 -#endif /* CONFIG_VFS_FUNNEL */ #define UNIONCREATED 0x02000000 /* union fs creation of vnode */ #if NAMEDRSRCFORK #define CN_WANTSRSRCFORK 0x04000000 @@ -185,6 +182,9 @@ struct nameidata { #endif /* public NOTRIGGER 0x10000000 see vnode.h */ #define CN_NBMOUNTLOOK 0x20000000 /* do not block for cross mount lookups */ +#ifdef BSD_KERNEL_PRIVATE +#define CN_SKIPNAMECACHE 0x40000000 /* skip cache during lookup(), allow FS to handle all components */ +#endif /* * Initialization of an nameidata structure. @@ -254,10 +254,10 @@ struct namecache { int namei(struct nameidata *ndp); void nameidone(struct nameidata *); -void namei_unlock_fsnode(struct nameidata *ndp); int lookup(struct nameidata *ndp); int relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp); +int lookup_traverse_union(vnode_t dvp, vnode_t *new_dvp, vfs_context_t ctx); void lookup_compound_vnop_post_hook(int error, vnode_t dvp, vnode_t vp, struct nameidata *ndp, int did_create); /* diff --git a/bsd/sys/param.h b/bsd/sys/param.h index 93f926150..764907b0a 100644 --- a/bsd/sys/param.h +++ b/bsd/sys/param.h @@ -77,10 +77,7 @@ #define NeXTBSD4_0 0 /* NeXTBSD 4.0 */ #include - -#ifndef NULL -#define NULL __DARWIN_NULL -#endif /* ! NULL */ +#include #ifndef LOCORE #include @@ -219,7 +216,8 @@ #ifndef howmany #define howmany(x, y) ((((x) % (y)) == 0) ? ((x) / (y)) : (((x) / (y)) + 1)) #endif -#define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) +#define roundup(x, y) ((((x) % (y)) == 0) ? \ + (x) : ((x) + ((y) - ((x) % (y))))) #define powerof2(x) ((((x)-1)&(x))==0) /* Macros for min/max. */ diff --git a/bsd/sys/posix_sem.h b/bsd/sys/posix_sem.h index 4b1e405f6..1d416d12f 100644 --- a/bsd/sys/posix_sem.h +++ b/bsd/sys/posix_sem.h @@ -61,8 +61,9 @@ struct pseminfo { gid_t psem_gid; char psem_name[PSEMNAMLEN + 1]; /* segment name */ void * psem_semobject; - struct proc * sem_proc; struct label * psem_label; + pid_t psem_creator_pid; + uint64_t psem_creator_uniqueid; }; #define PSEMINFO_NULL (struct pseminfo *)0 diff --git a/bsd/sys/priv.h b/bsd/sys/priv.h index 78f553f8a..e81fcf533 100644 --- a/bsd/sys/priv.h +++ b/bsd/sys/priv.h @@ -78,17 +78,24 @@ * subsystem, indicated by a subsystem name. */ #define PRIV_ADJTIME 1000 /* Set time adjustment. */ +#define PRIV_PROC_UUID_POLICY 1001 /* Change process uuid policy table. */ +#define PRIV_GLOBAL_PROC_INFO 1002 /* Query information for processes owned by other users */ +#define PRIV_SYSTEM_OVERRIDE 1003 /* Override global system settings for various subsystems for a limited duration/system-mode */ /* * Virtual memory privileges. */ #define PRIV_VM_PRESSURE 6000 /* Check VM pressure. */ #define PRIV_VM_JETSAM 6001 /* Adjust jetsam configuration. */ +#define PRIV_VM_FOOTPRINT_LIMIT 6002 /* Adjust physical footprint limit. */ /* * Network stack privileges. */ #define PRIV_NET_PRIVILEGED_TRAFFIC_CLASS 10000 /* Set SO_PRIVILEGED_TRAFFIC_CLASS. */ +#define PRIV_NET_PRIVILEGED_SOCKET_DELEGATE 10001 /* Set delegate on a socket */ +#define PRIV_NET_INTERFACE_CONTROL 10002 /* Enable interface debug logging. */ +#define PRIV_NET_PRIVILEGED_NETWORK_STATISTICS 10003 /* Access to all sockets */ /* * IPv4 and IPv6 privileges. diff --git a/bsd/sys/proc.h b/bsd/sys/proc.h index 9b80718f3..afe765190 100644 --- a/bsd/sys/proc.h +++ b/bsd/sys/proc.h @@ -209,13 +209,16 @@ struct extern_proc { #define P_DIRTY_TRACK 0x00000001 /* track dirty state */ #define P_DIRTY_ALLOW_IDLE_EXIT 0x00000002 /* process can be idle-exited when clean */ -#define P_DIRTY 0x00000004 /* process is dirty */ -#define P_DIRTY_SHUTDOWN 0x00000008 /* process is dirty during shutdown */ -#define P_DIRTY_TERMINATED 0x00000010 /* process has been marked for termination */ -#define P_DIRTY_BUSY 0x00000020 /* serialization flag */ +#define P_DIRTY_DEFER 0x00000004 /* defer initial opt-in to idle-exit */ +#define P_DIRTY 0x00000008 /* process is dirty */ +#define P_DIRTY_SHUTDOWN 0x00000010 /* process is dirty during shutdown */ +#define P_DIRTY_TERMINATED 0x00000020 /* process has been marked for termination */ +#define P_DIRTY_BUSY 0x00000040 /* serialization flag */ +#define P_DIRTY_MARKED 0x00000080 /* marked dirty previously */ +#define P_DIRTY_DEFER_IN_PROGRESS 0x00000100 /* deferral to idle-band in process */ -#define P_DIRTY_CAN_IDLE_EXIT (P_DIRTY_TRACK | P_DIRTY_ALLOW_IDLE_EXIT) #define P_DIRTY_IS_DIRTY (P_DIRTY | P_DIRTY_SHUTDOWN) +#define P_DIRTY_IDLE_EXIT_ENABLED (P_DIRTY_TRACK|P_DIRTY_ALLOW_IDLE_EXIT) #endif /* XNU_KERNEL_PRIVATE || !KERNEL */ @@ -302,16 +305,40 @@ extern int msleep1(void *chan, lck_mtx_t *mtx, int pri, const char *wmesg, u_int task_t proc_task(proc_t); extern int proc_pidversion(proc_t); extern int proc_getcdhash(proc_t, unsigned char *); -#endif /* KERNEL_PRIVATE */ -#ifdef XNU_KERNEL_PRIVATE + +/*! + @function proc_pidbackgrounded + @abstract KPI to determine if a process is currently backgrounded. + @discussion The process may move into or out of background state at any time, + so be prepared for this value to be outdated immediately. + @param pid PID of the process to be queried. + @param state Pointer to a value which will be set to 1 if the process + is currently backgrounded, 0 otherwise. + @return ESRCH if pid cannot be found or has started exiting. + + EINVAL if state is NULL. + */ +extern int proc_pidbackgrounded(pid_t pid, uint32_t* state); + /* * This returns an unique 64bit id of a given process. * Caller needs to hold proper reference on the * passed in process strucutre. */ extern uint64_t proc_uniqueid(proc_t); -extern uint64_t proc_selfuniqueid(void); + +#endif /* KERNEL_PRIVATE */ + +#ifdef XNU_KERNEL_PRIVATE + +/* unique 64bit id for process's original parent */ +extern uint64_t proc_puniqueid(proc_t); + extern void proc_getexecutableuuid(proc_t, unsigned char *, unsigned long); + +extern uint64_t proc_was_throttled(proc_t); +extern uint64_t proc_did_throttle(proc_t); + #endif /* XNU_KERNEL_PRIVATE*/ __END_DECLS diff --git a/bsd/sys/proc_info.h b/bsd/sys/proc_info.h index 381aef8f3..670f05f57 100644 --- a/bsd/sys/proc_info.h +++ b/bsd/sys/proc_info.h @@ -95,6 +95,24 @@ struct proc_bsdshortinfo { }; +#ifdef PRIVATE +struct proc_uniqidentifierinfo { + uint8_t p_uuid[16]; /* UUID of the main executable */ + uint64_t p_uniqueid; /* 64 bit unique identifier for process */ + uint64_t p_puniqueid; /* unique identifier for process's parent */ + uint64_t p_reserve2; /* reserved for future use */ + uint64_t p_reserve3; /* reserved for future use */ + uint64_t p_reserve4; /* reserved for future use */ +}; + + +struct proc_bsdinfowithuniqid { + struct proc_bsdinfo pbsd; + struct proc_uniqidentifierinfo p_uniqidentifier; +}; +#endif + + /* pbi_flags values */ #define PROC_FLAG_SYSTEM 1 /* System process */ #define PROC_FLAG_TRACED 2 /* process currently being traced, possibly by gdb */ @@ -118,8 +136,14 @@ struct proc_bsdshortinfo { #ifdef PRIVATE #define PROC_FLAG_DARWINBG 0x8000 /* process in darwin background */ #define PROC_FLAG_EXT_DARWINBG 0x10000 /* process in darwin background - external enforcement */ -#define PROC_FLAG_IOS_APPLEDAEMON 0x20000 /* Process is apple daemon */ -#define PROC_FLAG_DELAYIDLESLEEP 0x40000 /* Process is marked to delay idle sleep on disk IO */ +#define PROC_FLAG_IOS_APPLEDAEMON 0x20000 /* Process is apple daemon */ +#define PROC_FLAG_DELAYIDLESLEEP 0x40000 /* Process is marked to delay idle sleep on disk IO */ +#define PROC_FLAG_IOS_IMPPROMOTION 0x80000 /* Process is daemon which receives importane donation */ +#define PROC_FLAG_ADAPTIVE 0x100000 /* Process is adaptive */ +#define PROC_FLAG_ADAPTIVE_IMPORTANT 0x200000 /* Process is adaptive, and is currently important */ +#define PROC_FLAG_IMPORTANCE_DONOR 0x400000 /* Process is marked as an importance donor */ +#define PROC_FLAG_SUPPRESSED 0x800000 /* Process is suppressed */ +#define PROC_FLAG_IOS_APPLICATION 0x1000000 /* Process is an application */ #endif @@ -240,12 +264,18 @@ struct proc_fileinfo { uint32_t fi_status; off_t fi_offset; int32_t fi_type; - int32_t rfu_1; /* reserved */ + uint32_t fi_guardflags; }; /* stats flags in proc_fileinfo */ #define PROC_FP_SHARED 1 /* shared by more than one fd */ #define PROC_FP_CLEXEC 2 /* close on exec */ +#define PROC_FP_GUARDED 4 /* guarded fd */ + +#define PROC_FI_GUARD_CLOSE (1u << 0) +#define PROC_FI_GUARD_DUP (1u << 1) +#define PROC_FI_GUARD_SOCKET_IPC (1u << 2) +#define PROC_FI_GUARD_FILEPORT (1u << 3) /* * A copy of stat64 with static sized fields. @@ -647,6 +677,19 @@ struct proc_fileportinfo { #define PROC_PIDTHREADID64INFO 15 #define PROC_PIDTHREADID64INFO_SIZE (sizeof(struct proc_threadinfo)) +#define PROC_PID_RUSAGE 16 +#define PROC_PID_RUSAGE_SIZE 0 + +#ifdef PRIVATE +#define PROC_PIDUNIQIDENTIFIERINFO 17 +#define PROC_PIDUNIQIDENTIFIERINFO_SIZE \ + (sizeof(struct proc_uniqidentifierinfo)) + +#define PROC_PIDT_BSDINFOWITHUNIQID 18 +#define PROC_PIDT_BSDINFOWITHUNIQID_SIZE \ + (sizeof(struct proc_bsdinfowithuniqid)) +#endif + /* Flavors for proc_pidfdinfo */ #define PROC_PIDFDVNODEINFO 1 @@ -706,13 +749,27 @@ struct proc_fileportinfo { /* proc_track_dirty() flags */ #define PROC_DIRTY_TRACK 0x1 #define PROC_DIRTY_ALLOW_IDLE_EXIT 0x2 - -#define PROC_DIRTY_TRACK_MASK (PROC_DIRTY_TRACK|PROC_DIRTY_ALLOW_IDLE_EXIT) +#define PROC_DIRTY_DEFER 0x4 /* proc_get_dirty() flags */ #define PROC_DIRTY_TRACKED 0x1 #define PROC_DIRTY_ALLOWS_IDLE_EXIT 0x2 -#define PROC_DIRTY_IS_DIRTY 0x4 +#define PROC_DIRTY_IS_DIRTY 0x4 + +#ifdef PRIVATE + +/* __proc_info() call numbers */ +#define PROC_INFO_CALL_LISTPIDS 0x1 +#define PROC_INFO_CALL_PIDINFO 0x2 +#define PROC_INFO_CALL_PIDFDINFO 0x3 +#define PROC_INFO_CALL_KERNMSGBUF 0x4 +#define PROC_INFO_CALL_SETCONTROL 0x5 +#define PROC_INFO_CALL_PIDFILEPORTINFO 0x6 +#define PROC_INFO_CALL_TERMINATE 0x7 +#define PROC_INFO_CALL_DIRTYCONTROL 0x8 +#define PROC_INFO_CALL_PIDRUSAGE 0x9 + +#endif /* PRIVATE */ #ifdef XNU_KERNEL_PRIVATE #ifndef pshmnode diff --git a/bsd/sys/proc_internal.h b/bsd/sys/proc_internal.h index e4c7497bf..428618c9d 100644 --- a/bsd/sys/proc_internal.h +++ b/bsd/sys/proc_internal.h @@ -154,6 +154,7 @@ struct session { */ #define S_DEFAULT 0x00000000 /* No flags set */ #define S_NOCTTY 0x00000001 /* Do not associate controlling tty */ +#define S_CTTYREF 0x00000010 /* vnode ref taken by cttyopen */ #define S_LIST_TERM 1 /* marked for termination */ @@ -217,7 +218,8 @@ struct proc { gid_t p_rgid; uid_t p_svuid; gid_t p_svgid; - uint64_t p_uniqueid; /* process uniqe ID */ + uint64_t p_uniqueid; /* process unique ID - incremented on fork/spawn/vfork, remains same across exec. */ + uint64_t p_puniqueid; /* parent's unique ID - set on fork/spawn/vfork, doesn't change if reparented. */ lck_mtx_t p_mlock; /* mutex lock for proc */ @@ -334,11 +336,6 @@ struct proc { uint32_t p_pcaction; /* action for process control on starvation */ uint8_t p_uuid[16]; /* from LC_UUID load command */ -#if !CONFIG_EMBEDDED -#define PROC_LEGACY_BEHAVIOR_IOTHROTTLE (0x00000001) - uint32_t p_legacy_behavior; -#endif - /* End area that is copied on creation. */ /* XXXXXXXXXXXXX End of BCOPY'ed on fork (AIOLOCK)XXXXXXXXXXXXXXXX */ #define p_endcopy p_aio_total_count @@ -349,13 +346,14 @@ struct proc { struct klist p_klist; /* knote list (PL ?)*/ - struct rusage *p_ru; /* Exit information. (PL) */ + struct rusage_superset *p_ru; /* Exit information. (PL) */ int p_sigwaitcnt; thread_t p_signalholder; thread_t p_transholder; /* DEPRECATE following field */ u_short p_acflag; /* Accounting flags. */ + volatile u_short p_vfs_iopolicy; /* VFS iopolicy flags. */ struct lctx *p_lctx; /* Pointer to login context. */ LIST_ENTRY(proc) p_lclist; /* List of processes in lctx. */ @@ -372,6 +370,9 @@ struct proc { int p_ractive; int p_idversion; /* version of process identity */ void * p_pthhash; /* pthread waitqueue hash */ + volatile uint64_t was_throttled __attribute__((aligned(8))); /* Counter for number of throttled I/Os */ + volatile uint64_t did_throttle __attribute__((aligned(8))); /* Counter for number of I/Os this proc throttled */ + #if DIAGNOSTIC unsigned int p_fdlock_pc[4]; unsigned int p_fdunlock_pc[4]; @@ -381,10 +382,27 @@ struct proc { #endif /* SIGNAL_DEBUG */ #endif /* DIAGNOSTIC */ uint64_t p_dispatchqueue_offset; + uint64_t p_dispatchqueue_serialno_offset; #if VM_PRESSURE_EVENTS struct timeval vm_pressure_last_notify_tstamp; #endif - int p_dirty; /* dirty state */ + +#if CONFIG_MEMORYSTATUS + /* Fields protected by proc list lock */ + TAILQ_ENTRY(proc) p_memstat_list; /* priority bucket link */ + uint32_t p_memstat_state; /* state */ + int32_t p_memstat_effectivepriority; /* priority after transaction state accounted for */ + int32_t p_memstat_requestedpriority; /* active priority */ + uint64_t p_memstat_userdata; /* user state */ + uint32_t p_memstat_dirty; /* dirty state */ + uint64_t p_memstat_idledeadline; /* time at which process became clean */ +#if CONFIG_JETSAM + int32_t p_memstat_memlimit; /* cached memory limit */ +#endif +#if CONFIG_FREEZE + uint32_t p_memstat_suspendedfootprint; /* footprint at time of suspensions */ +#endif /* CONFIG_FREEZE */ +#endif /* CONFIG_MEMORYSTATUS */ }; #define PGRPID_DEAD 0xdeaddead @@ -420,8 +438,8 @@ struct proc { #define P_LTRANSCOMMIT 0x00000020 /* process is committed to trans */ #define P_LINTRANSIT 0x00000040 /* process in exec or in creation */ #define P_LTRANSWAIT 0x00000080 /* waiting for trans to complete */ -#define P_LVFORK 0x00000100 /* */ -#define P_LINVFORK 0x00000200 /* */ +#define P_LVFORK 0x00000100 /* parent proc of a vfork */ +#define P_LINVFORK 0x00000200 /* child proc of a vfork */ #define P_LTRACED 0x00000400 /* */ #define P_LSIGEXC 0x00000800 /* */ #define P_LNOATTACH 0x00001000 /* */ @@ -437,7 +455,16 @@ struct proc { #define P_LRAGE_VNODES 0x00400000 #define P_LREGISTER 0x00800000 /* thread start fns registered */ #define P_LVMRSRCOWNER 0x01000000 /* can handle the resource ownership of */ -#define P_LPTERMINATE 0x02000000 /* can handle the resource ownership of */ +/* old P_LPTERMINATE 0x02000000 */ +#define P_LTERM_DECRYPTFAIL 0x04000000 /* process terminating due to key failure to decrypt */ +#define P_LTERM_JETSAM 0x08000000 /* process is being jetsam'd */ +#define P_JETSAM_VMPAGESHORTAGE 0x00000000 /* jetsam: lowest jetsam priority proc, killed due to vm page shortage */ +#define P_JETSAM_VMTHRASHING 0x10000000 /* jetsam: lowest jetsam priority proc, killed due to vm thrashing */ +#define P_JETSAM_HIWAT 0x20000000 /* jetsam: high water mark */ +#define P_JETSAM_PID 0x30000000 /* jetsam: pid */ +#define P_JETSAM_IDLEEXIT 0x40000000 /* jetsam: idle exit */ +#define P_JETSAM_VNODE 0x50000000 /* jetsam: vnode kill */ +#define P_JETSAM_MASK 0x70000000 /* jetsam type mask */ /* Process control state for resource starvation */ #define P_PCTHROTTLE 1 @@ -455,6 +482,9 @@ struct proc { /* additional process flags */ #define P_LADVLOCK 0x01 +/* p_vfs_iopolicy flags */ +#define P_VFS_IOPOLICY_FORCE_HFS_CASE_SENSITIVITY 0x0001 + /* defns for proc_iterate */ #define PROC_ALLPROCLIST 1 /* walk the allproc list (procs not exited yet) */ #define PROC_ZOMBPROCLIST 2 /* walk the zombie list */ @@ -597,7 +627,7 @@ struct user64_extern_proc { */ extern int nprocs, maxproc; /* Current and max number of procs. */ extern int maxprocperuid; /* Current number of procs per uid */ -__private_extern__ int hard_maxproc; /* hard limit */ +extern int hard_maxproc; /* hard limit */ extern unsigned int proc_shutdown_exitcount; #define PID_MAX 99999 @@ -649,7 +679,7 @@ LIST_HEAD(proclist, proc); extern struct proclist allproc; /* List of all processes. */ extern struct proclist zombproc; /* List of zombie processes. */ extern struct proc *initproc; -extern void procinit(void) __attribute__((section("__TEXT, initcode"))); +extern void procinit(void); extern void proc_lock(struct proc *); extern void proc_unlock(struct proc *); extern void proc_spinlock(struct proc *); @@ -690,7 +720,7 @@ extern int tsleep1(void *chan, int pri, const char *wmesg, u_int64_t abstime, in extern int msleep0(void *chan, lck_mtx_t *mtx, int pri, const char *wmesg, int timo, int (*continuation)(int)); extern void vfork_return(struct proc *child, int32_t *retval, int rval); extern int exit1(struct proc *, int, int *); -extern int exit1_internal(struct proc *, int, int *, boolean_t, boolean_t); +extern int exit1_internal(struct proc *, int, int *, boolean_t, boolean_t, int); extern int fork1(proc_t, thread_t *, int); extern void vfork_exit_internal(struct proc *p, int rv, int forced); extern void proc_reparentlocked(struct proc *child, struct proc * newparent, int cansignal, int locked); @@ -745,7 +775,6 @@ extern int proc_pendingsignals(proc_t, sigset_t); int proc_getpcontrol(int pid, int * pcontrolp); int proc_dopcontrol(proc_t p, void *unused_arg); int proc_resetpcontrol(int pid); -extern void proc_removethrottle(proc_t); #if PSYNCH void pth_proc_hashinit(proc_t); void pth_proc_hashdelete(proc_t); @@ -755,4 +784,11 @@ void psynch_wq_cleanup(__unused void * param, __unused void * param1); extern lck_mtx_t * pthread_list_mlock; #endif /* PSYNCH */ struct uthread * current_uthread(void); + +/* return 1 if process is forcing case-sensitive HFS+ access, 0 for default */ +extern int proc_is_forcing_hfs_case_sensitivity(proc_t); + +pid_t dtrace_proc_selfpid(void); +pid_t dtrace_proc_selfppid(void); +uid_t dtrace_proc_selfruid(void); #endif /* !_SYS_PROC_INTERNAL_H_ */ diff --git a/bsd/sys/proc_uuid_policy.h b/bsd/sys/proc_uuid_policy.h new file mode 100644 index 000000000..18118dc15 --- /dev/null +++ b/bsd/sys/proc_uuid_policy.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _SYS_PROC_UUID_POLICY_H +#define _SYS_PROC_UUID_POLICY_H + +#include +#include +#include +#include +#include + +__BEGIN_DECLS + +/* + * The proc_uuid_policy subsystem allows a privileged client to + * upload policies to the kernel keyed by Mach-O executable + * UUID. In-kernel clients can query this policy table cheaply + * to determine if a resource or process should by governed + * by the policy flags. During early boot, the policy table + * may be empty or sparse, which in-kernel clients should + * have a specified behavior for. + */ + +#define PROC_UUID_POLICY_OPERATION_CLEAR 0x00000000 +#define PROC_UUID_POLICY_OPERATION_ADD 0x00000001 +#define PROC_UUID_POLICY_OPERATION_REMOVE 0x00000002 + +/* The namespace of flags are managed by in-kernel clients */ +#define PROC_UUID_POLICY_FLAGS_NONE 0x00000000 +#define PROC_UUID_NO_CELLULAR 0x00000001 +#define PROC_UUID_FLOW_DIVERT 0x00000002 + +#if BSD_KERNEL_PRIVATE +/* + * Look up a policy indexed by UUID. + * + * Paramaters: + * uuid UUID to look up, must be not the zero-uuid + * flags Flags that have been associated with the UUID on successful + * lookup. + * gencount The generation count of the internal policy table representation. + * + * Initial lookups by an in-kernel subsystem should pass 0 for flags/gencount. + * Subsequent lookups for the same UUID with the same flags and gencount passed + * in can short-circuit the lookup if the generation count has not changed. + * + * Return: + * 0 Success, UUID was found, flags and gencount are returned + * EINVAL Bad UUID or other pointer parameter + * ENOENT UUID not found + * + */ +extern int proc_uuid_policy_lookup(uuid_t uuid, uint32_t *flags, int32_t *gencount); + +extern void proc_uuid_policy_init(void); +#endif /* BSD_KERNEL_PRIVATE */ + +#ifndef KERNEL +/* + * Upload a policy indexed by UUID. + * + * Parameters: + * operation CLEAR Remove all existing entries + * ADD Add the specified UUID and flags to the policy table. + * Existing entries for the UUID are replaced. + * REMOVE Remove entry for the specified UUID. + * uuid Pointer to UUID for Mach-O executable + * uuidlen sizeof(uuid_t) + * flags Flags to be stored in the policy table + * + * Return: + * 0 Success, operation completed without error. + * -1 Failure, errno can contain: + * ENOENT REMOVE operation specified a UUID not in the policy table. + * EPERM Call is not privileged to call this system call + * EINVAL Invalid parameter + * ERANGE Invalid uuidlen + * ENOMEM Too many entries exist + */ +extern int proc_uuid_policy(uint32_t operation, uuid_t uuid, size_t uuidlen, uint32_t flags); +#endif /* !KERNEL */ + +__END_DECLS + +#endif /*_SYS_PROC_UUID_POLICY_H */ diff --git a/bsd/sys/process_policy.h b/bsd/sys/process_policy.h index d9ad48272..35341278f 100644 --- a/bsd/sys/process_policy.h +++ b/bsd/sys/process_policy.h @@ -52,6 +52,8 @@ __BEGIN_DECLS #define PROC_POLICY_ACTION_GET 11 /* get the policy attributes */ #define PROC_POLICY_ACTION_ADD 12 /* add a policy attribute */ #define PROC_POLICY_ACTION_REMOVE 13 /* remove a policy attribute */ +#define PROC_POLICY_ACTION_HOLD 14 /* hold an importance boost assertion */ +#define PROC_POLICY_ACTION_DROP 15 /* drop an importance boost assertion */ /* policies */ #define PROC_POLICY NONE 0 @@ -59,12 +61,13 @@ __BEGIN_DECLS #define PROC_POLICY_HARDWARE_ACCESS 2 /* access to various hardware */ #define PROC_POLICY_RESOURCE_STARVATION 3 /* behavior on resource starvation */ #define PROC_POLICY_RESOURCE_USAGE 4 /* behavior on resource consumption */ -#if CONFIG_EMBEDDED +#if BUILD_LIBSYSCALL #define PROC_POLICY_APP_LIFECYCLE 5 /* app life cycle management */ -#else /* CONFIG_EMBEDDED */ +#else /* BUILD_LIBSYSCALL */ #define PROC_POLICY_RESERVED 5 /* behavior on resource consumption */ -#endif /* CONFIG_EMBEDDED */ +#endif /* BUILD_LIBSYSCALL */ #define PROC_POLICY_APPTYPE 6 /* behavior on resource consumption */ +#define PROC_POLICY_BOOST 7 /* importance boost/drop */ /* sub policies for background policy */ #define PROC_POLICY_BG_NONE 0 /* none */ @@ -72,11 +75,11 @@ __BEGIN_DECLS #define PROC_POLICY_BG_DISKTHROTTLE 2 /* disk accesses throttled */ #define PROC_POLICY_BG_NETTHROTTLE 4 /* network accesses throttled */ #define PROC_POLICY_BG_GPUDENY 8 /* no access to GPU */ -#if CONFIG_EMBEDDED +#if BUILD_LIBSYSCALL #define PROC_POLICY_BG_ALL 0x0F -#else /* CONFIG_EMBEDDED */ +#else /* BUILD_LIBSYSCALL */ #define PROC_POLICY_BG_ALL 0x07 -#endif /* CONFIG_EMBEDDED */ +#endif /* BUILD_LIBSYSCALL */ #define PROC_POLICY_BG_DEFAULT PROC_POLICY_BG_ALL /* sub policies for hardware */ @@ -137,15 +140,13 @@ __BEGIN_DECLS #define PROC_POLICY_RSRCACT_NOTIFY_KQ 4 /* send kqueue notification */ #define PROC_POLICY_RSRCACT_NOTIFY_EXC 5 /* send exception */ +#define PROC_POLICY_CPUMON_DISABLE 0xFF /* Disable CPU usage monitor */ +#define PROC_POLICY_CPUMON_DEFAULTS 0xFE /* Set default CPU usage monitor params */ -/* type of resource for kqueue notifiction */ -#define PROC_POLICY_RSRTYPE_CPU 1 -#define PROC_POLICY_RSRTYPE_WIREDMEM 2 -#define PROC_POLICY_RSRTYPE_VIRTUALMEM 4 -#define PROC_POLICY_RSRTYPE_DISK 8 -#define PROC_POLICY_RSRTYPE_NETWORK 0x010 -#define PROC_POLICY_RSRTYPE_POWER 0x20 - +/* sub policies for importance boost/drop */ +#define PROC_POLICY_IMP_IMPORTANT 1 /* Important-level boost */ +#define PROC_POLICY_IMP_STANDARD 2 /* Standard-level boost */ +#define PROC_POLICY_IMP_DONATION 3 /* Mark a task as an importance source */ typedef struct proc_policy_attribute { uint32_t ppattr_attribute; /* the policy attribute to be modified or returned */ @@ -164,33 +165,28 @@ typedef struct proc_policy_cpuusage_attr { uint64_t ppattr_cpu_attr_deadline; /* 64bit deadline in nsecs */ } proc_policy_cpuusage_attr_t; -#if CONFIG_EMBEDDED +#if BUILD_LIBSYSCALL /* sub policies for app lifecycle management */ #define PROC_POLICY_APPLIFE_NONE 0 /* does nothing.. */ #define PROC_POLICY_APPLIFE_STATE 1 /* sets the app to various lifecycle states */ #define PROC_POLICY_APPLIFE_DEVSTATUS 2 /* notes the device in inactive or short/long term */ #define PROC_POLICY_APPLIFE_PIDBIND 3 /* a thread is to be bound to another processes app state */ -#endif /* CONFIG_EMBEDDED */ +#endif /* BUILD_LIBSYSCALL */ /* sub policies for PROC_POLICY_APPTYPE */ #define PROC_POLICY_APPTYPE_NONE 0 /* does nothing.. */ #define PROC_POLICY_APPTYPE_MODIFY 1 /* sets the app to various lifecycle states */ -#if CONFIG_EMBEDDED +#if BUILD_LIBSYSCALL #define PROC_POLICY_APPTYPE_THREADTHR 2 /* notes the device in inactive or short/long term */ -#endif /* CONFIG_EMBEDDED */ - - -#define PROC_POLICY_OSX_APPTYPE_NONE 0 -#if CONFIG_EMBEDDED -#define PROC_POLICY_IOS_RESV1_APPTYPE 1 /* TAL based launched */ -#define PROC_POLICY_IOS_APPLE_DAEMON 2 /* for user of IOS apple daemons */ -#define PROC_POLICY_IOS_APPTYPE 3 /* ios specific handling */ -#define PROC_POLICY_IOS_NONUITYPE 4 /* ios non graphics type */ -#else -#define PROC_POLICY_OSX_APPTYPE_TAL 1 /* TAL based launched */ -#define PROC_POLICY_OSX_APPTYPE_WIDGET 2 /* for dashboard client */ -#define PROC_POLICY_OSX_APPTYPE_DASHCLIENT 2 /* rename to move away from widget */ -#endif +#endif /* BUILD_LIBSYSCALL */ + +/* exported apptypes for PROC_POLICY_APPTYPE */ +#define PROC_POLICY_OSX_APPTYPE_TAL 1 /* TAL-launched app */ + +#define PROC_POLICY_OSX_APPTYPE_DASHCLIENT 2 /* Dashboard client (deprecated) */ +#define PROC_POLICY_IOS_DONATEIMP 6 /* donates priority imp (deprecated) */ +#define PROC_POLICY_IOS_HOLDIMP 7 /* hold importance assertion (deprecated) */ +#define PROC_POLICY_IOS_DROPIMP 8 /* drop importance assertion (deprecated) */ #ifndef KERNEL int process_policy(int scope, int action, int policy, int policy_subtype, proc_policy_attribute_t * attrp, pid_t target_pid, uint64_t target_threadid); diff --git a/bsd/sys/protosw.h b/bsd/sys/protosw.h index 75b6d6b28..d16f4a3aa 100644 --- a/bsd/sys/protosw.h +++ b/bsd/sys/protosw.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */ @@ -64,20 +64,19 @@ */ #ifndef _SYS_PROTOSW_H_ -#define _SYS_PROTOSW_H_ +#define _SYS_PROTOSW_H_ #include #include +/* XXX: this will go away */ #define PR_SLOWHZ 2 /* 2 slow timeouts per second */ -#ifndef __APPLE__ -/* - * See rdar://7617868: pr_fasttimo was removed use your own timer or pr_slowtimo instead - */ -#define PR_FASTHZ 5 /* 5 fast timeouts per second */ -#endif -#ifdef PRIVATE +#ifdef KERNEL_PRIVATE +#include +#include +#include +#include /* Forward declare these structures referenced from prototypes below. */ struct mbuf; @@ -86,8 +85,80 @@ struct sockaddr; struct socket; struct sockopt; struct socket_filter; +#ifdef XNU_KERNEL_PRIVATE +struct domain_old; +#endif /* XNU_KERNEL_PRIVATE */ + +#pragma pack(4) -/*#ifdef _KERNEL*/ +#ifdef XNU_KERNEL_PRIVATE +/* + * Legacy protocol switch table. + * + * NOTE: Do not modify this structure, as there are modules outside of xnu + * which rely on the size and layout for binary compatibility. This structure + * is simply used by the exported net_{add,del}_proto_old, pffindproto_old + * routines, and by the domain_old structure. Internally, protocol switch + * tables are stored in the private variant of protosw defined down below. + */ +struct protosw_old { +#else +struct protosw { +#endif /* !XNU_KERNEL_PRIVATE */ + short pr_type; /* socket type used for */ + struct domain *pr_domain; /* domain protocol a member of */ + short pr_protocol; /* protocol number */ + unsigned int pr_flags; /* see below */ + /* + * protocol-protocol hooks + */ + void (*pr_input) /* input to protocol (from below) */ + (struct mbuf *, int len); + int (*pr_output) /* output to protocol (from above) */ + (struct mbuf *m, struct socket *so); + void (*pr_ctlinput) /* control input (from below) */ + (int, struct sockaddr *, void *); + int (*pr_ctloutput) /* control output (from above) */ + (struct socket *, struct sockopt *); + /* + * user-protocol hook + */ + void *pr_ousrreq; + /* + * utility hooks + */ + void (*pr_init)(void); /* initialization hook */ + void (*pr_unused)(void); /* placeholder - fasttimo is removed */ + void (*pr_unused2)(void); /* placeholder - slowtimo is removed */ + void (*pr_drain)(void); /* flush any excess space possible */ + int (*pr_sysctl) /* sysctl for protocol */ + (int *, u_int, void *, size_t *, void *, size_t); +#ifdef XNU_KERNEL_PRIVATE + struct pr_usrreqs_old *pr_usrreqs; /* supersedes pr_usrreq() */ +#else + struct pr_usrreqs *pr_usrreqs; /* supersedes pr_usrreq() */ +#endif /* !XNU_KERNEL_PRIVATE */ + int (*pr_lock) /* lock function for protocol */ + (struct socket *so, int locktype, void *debug); + int (*pr_unlock) /* unlock for protocol */ + (struct socket *so, int locktype, void *debug); + lck_mtx_t *(*pr_getlock) /* retrieve protocol lock */ + (struct socket *so, int locktype); + /* + * Implant hooks + */ + TAILQ_HEAD(, socket_filter) pr_filter_head; +#ifdef XNU_KERNEL_PRIVATE + struct protosw_old *pr_next; /* chain for domain */ +#else + struct protosw *pr_next; /* chain for domain */ +#endif /* !XNU_KERNEL_PRIVATE */ + u_int32_t reserved[1]; /* padding for future use */ +}; + +#pragma pack() + +#ifdef XNU_KERNEL_PRIVATE /* * Protocol switch table. * @@ -95,10 +166,8 @@ struct socket_filter; * which is used for protocol-protocol and system-protocol communication. * * A protocol is called through the pr_init entry before any other. - * Thereafter it is called every 200ms through the pr_fasttimo entry and - * every 500ms through the pr_slowtimo for timer based actions. - * The system will call the pr_drain entry if it is low on space and - * this should throw away any non-critical data. + * The system will call the pr_drain entry if it is low on space and this + * should throw away any non-critical data. * * Protocols pass data between themselves as chains of mbufs using * the pr_input and pr_output hooks. Pr_input passes data up (towards @@ -109,68 +178,59 @@ struct socket_filter; * * The userreq routine interfaces protocols to the system and is * described below. + * + * After a protocol is attached, its pr_domain will be set to the domain + * which the protocol belongs to, and its pr_protosw will be set to the + * address of the protosw instance. The latter is useful for finding + * the real/original protosw instance, in the event so_proto is altered + * to point to an alternative/derivative protosw. E.g. the list of + * socket filters is only applicable on the original protosw instance. + * + * Internal, private and extendable representation of protosw. */ - -#include -#include -#include -#ifdef KERNEL -#include -#endif /* KERNEL */ - -#pragma pack(4) - struct protosw { - short pr_type; /* socket type used for */ + TAILQ_ENTRY(protosw) pr_entry; /* chain for domain */ struct domain *pr_domain; /* domain protocol a member of */ - short pr_protocol; /* protocol number */ - unsigned int pr_flags; /* see below */ -/* protocol-protocol hooks */ - void (*pr_input)(struct mbuf *, int len); - /* input to protocol (from below) */ - int (*pr_output)(struct mbuf *m, struct socket *so); - /* output to protocol (from above) */ - void (*pr_ctlinput)(int, struct sockaddr *, void *); - /* control input (from below) */ - int (*pr_ctloutput)(struct socket *, struct sockopt *); - /* control output (from above) */ -/* user-protocol hook */ - void *pr_ousrreq; -/* utility hooks */ - void (*pr_init)(void); /* initialization hook */ -#if __APPLE__ - void (*pr_unused)(void); /* placeholder - fasttimo is removed */ -#else - void (*pr_fasttimo)(void); - /* fast timeout (200ms) */ -#endif - void (*pr_slowtimo)(void); - /* slow timeout (500ms) */ - void (*pr_drain)(void); - /* flush any excess space possible */ -#if __APPLE__ - int (*pr_sysctl)(int *, u_int, void *, size_t *, void *, size_t); - /* sysctl for protocol */ -#endif - struct pr_usrreqs *pr_usrreqs; /* supersedes pr_usrreq() */ -#if __APPLE__ - int (*pr_lock) (struct socket *so, int locktype, void *debug); /* lock function for protocol */ - int (*pr_unlock) (struct socket *so, int locktype, void *debug); /* unlock for protocol */ -#ifdef _KERN_LOCKS_H_ - lck_mtx_t * (*pr_getlock) (struct socket *so, int locktype); -#else - void * (*pr_getlock) (struct socket *so, int locktype); -#endif -#endif -#if __APPLE__ -/* Implant hooks */ + struct protosw *pr_protosw; /* pointer to self */ + u_int16_t pr_type; /* socket type used for */ + u_int16_t pr_protocol; /* protocol number */ + u_int32_t pr_flags; /* see below */ + /* + * protocol-protocol hooks + */ + void (*pr_input) /* input to protocol (from below) */ + (struct mbuf *, int len); + int (*pr_output) /* output to protocol (from above) */ + (struct mbuf *m, struct socket *so); + void (*pr_ctlinput) /* control input (from below) */ + (int, struct sockaddr *, void *); + int (*pr_ctloutput) /* control output (from above) */ + (struct socket *, struct sockopt *); + /* + * user-protocol hook + */ + struct pr_usrreqs *pr_usrreqs; /* user request; see list below */ + /* + * utility hooks + */ + void (*pr_init) /* initialization hook */ + (struct protosw *, struct domain *); + void (*pr_drain)(void); /* flush any excess space possible */ + int (*pr_sysctl) /* sysctl for protocol */ + (int *, u_int, void *, size_t *, void *, size_t); + int (*pr_lock) /* lock function for protocol */ + (struct socket *so, int locktype, void *debug); + int (*pr_unlock) /* unlock for protocol */ + (struct socket *so, int locktype, void *debug); + lck_mtx_t *(*pr_getlock) /* retrieve protocol lock */ + (struct socket *so, int locktype); + /* + * misc + */ TAILQ_HEAD(, socket_filter) pr_filter_head; - struct protosw *pr_next; /* Chain for domain */ - u_int32_t reserved[1]; /* Padding for future use */ -#endif + struct protosw_old *pr_old; }; - -#pragma pack() +#endif /* XNU_KERNEL_PRIVATE */ /* * Values for pr_flags. @@ -181,30 +241,106 @@ struct protosw { * is only relevant if PR_CONNREQUIRED is set (otherwise sendto is allowed * anyhow). */ -#define PR_ATOMIC 0x01 /* exchange atomic messages only */ -#define PR_ADDR 0x02 /* addresses given with messages */ -#define PR_CONNREQUIRED 0x04 /* connection required by protocol */ -#define PR_WANTRCVD 0x08 /* want PRU_RCVD calls */ -#define PR_RIGHTS 0x10 /* passes capabilities */ -#define PR_IMPLOPCL 0x20 /* implied open/close */ -#define PR_LASTHDR 0x40 /* enforce ipsec policy; last header */ -#define PR_PROTOLOCK 0x80 /* protocol takes care of it's own locking */ -#define PR_PCBLOCK 0x100 /* protocol supports per pcb finer grain locking */ -#define PR_DISPOSE 0x200 /* protocol requires late lists disposal */ -#define PR_AGGDRAIN 0x400 /* protocol requires aggressive draining */ +#define PR_ATOMIC 0x01 /* exchange atomic messages only */ +#define PR_ADDR 0x02 /* addresses given with messages */ +#define PR_CONNREQUIRED 0x04 /* connection required by protocol */ +#define PR_WANTRCVD 0x08 /* want PRU_RCVD calls */ +#define PR_RIGHTS 0x10 /* passes capabilities */ +#define PR_IMPLOPCL 0x20 /* implied open/close */ +#define PR_LASTHDR 0x40 /* enforce ipsec policy; last header */ +#define PR_PROTOLOCK 0x80 /* protocol takes care of it's own locking */ +#define PR_PCBLOCK 0x100 /* protocol supports per pcb locking */ +#define PR_DISPOSE 0x200 /* protocol requires late lists disposal */ +#ifdef BSD_KERNEL_PRIVATE +#define PR_INITIALIZED 0x400 /* protocol has been initialized */ +#define PR_ATTACHED 0x800 /* protocol is attached to a domain */ +#define PR_MULTICONN 0x1000 /* supports multiple connect calls */ +#define PR_EVCONNINFO 0x2000 /* protocol generates conninfo event */ +#define PR_OLD 0x10000000 /* added via net_add_proto */ + +/* pseudo-public domain flags */ +#define PRF_USERFLAGS \ + (PR_ATOMIC|PR_ADDR|PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS| \ + PR_IMPLOPCL|PR_LASTHDR|PR_PROTOLOCK|PR_PCBLOCK|PR_DISPOSE) +#endif /* BSD_KERNEL_PRIVATE */ + +#ifdef BSD_KERNEL_PRIVATE +/* + * The arguments to the ctlinput routine are + * (*protosw[].pr_ctlinput)(cmd, sa, arg); + * where cmd is one of the commands below, sa is a pointer to a sockaddr, + * and arg is a `void *' argument used within a protocol family. + */ +#define PRC_IFDOWN 0 /* interface transition */ +#define PRC_ROUTEDEAD 1 /* select new route if possible ??? */ +#define PRC_IFUP 2 /* interface has come back up */ +#define PRC_QUENCH2 3 /* DEC congestion bit says slow down */ +#define PRC_QUENCH 4 /* some one said to slow down */ +#define PRC_MSGSIZE 5 /* message size forced drop */ +#define PRC_HOSTDEAD 6 /* host appears to be down */ +#define PRC_HOSTUNREACH 7 /* deprecated (use PRC_UNREACH_HOST) */ +#define PRC_UNREACH_NET 8 /* no route to network */ +#define PRC_UNREACH_HOST 9 /* no route to host */ +#define PRC_UNREACH_PROTOCOL 10 /* dst says bad protocol */ +#define PRC_UNREACH_PORT 11 /* bad port # */ +/* was PRC_UNREACH_NEEDFRAG 12 (use PRC_MSGSIZE) */ +#define PRC_UNREACH_SRCFAIL 13 /* source route failed */ +#define PRC_REDIRECT_NET 14 /* net routing redirect */ +#define PRC_REDIRECT_HOST 15 /* host routing redirect */ +#define PRC_REDIRECT_TOSNET 16 /* redirect for type of service & net */ +#define PRC_REDIRECT_TOSHOST 17 /* redirect for tos & host */ +#define PRC_TIMXCEED_INTRANS 18 /* packet lifetime expired in transit */ +#define PRC_TIMXCEED_REASS 19 /* lifetime expired on reass q */ +#define PRC_PARAMPROB 20 /* header incorrect */ +#define PRC_UNREACH_ADMIN_PROHIB 21 /* packet administrativly prohibited */ + +#define PRC_NCMDS 22 + +#define PRC_IS_REDIRECT(cmd) \ + ((cmd) >= PRC_REDIRECT_NET && (cmd) <= PRC_REDIRECT_TOSHOST) + +#ifdef PRCREQUESTS +char *prcrequests[] = { + "IFDOWN", "ROUTEDEAD", "IFUP", "DEC-BIT-QUENCH2", + "QUENCH", "MSGSIZE", "HOSTDEAD", "#7", + "NET-UNREACH", "HOST-UNREACH", "PROTO-UNREACH", "PORT-UNREACH", + "#12", "SRCFAIL-UNREACH", "NET-REDIRECT", "HOST-REDIRECT", + "TOSNET-REDIRECT", "TOSHOST-REDIRECT", "TX-INTRANS", "TX-REASS", + "PARAMPROB", "ADMIN-UNREACH" +}; +#endif /* PRCREQUESTS */ /* - * The arguments to usrreq are: - * (*protosw[].pr_usrreq)(up, req, m, nam, opt); - * where up is a (struct socket *), req is one of these requests, - * m is a optional mbuf chain containing a message, - * nam is an optional mbuf chain containing an address, - * and opt is a pointer to a socketopt structure or nil. - * The protocol is responsible for disposal of the mbuf chain m, - * the caller is responsible for any space held by nam and opt. + * The arguments to ctloutput are: + * (*protosw[].pr_ctloutput)(req, so, level, optname, optval, p); + * req is one of the actions listed below, so is a (struct socket *), + * level is an indication of which protocol layer the option is intended. + * optname is a protocol dependent socket option request, + * optval is a pointer to a mbuf-chain pointer, for value-return results. + * The protocol is responsible for disposal of the mbuf chain *optval + * if supplied, + * the caller is responsible for any space held by *optval, when returned. * A non-zero return from usrreq gives an * UNIX error number which should be passed to higher level software. */ +#define PRCO_GETOPT 0 +#define PRCO_SETOPT 1 + +#define PRCO_NCMDS 2 + +#ifdef PRCOREQUESTS +char *prcorequests[] = { + "GETOPT", "SETOPT", +}; +#endif /* PRCOREQUESTS */ + +/* + * In earlier BSD network stacks, a single pr_usrreq() function pointer was + * invoked with an operation number indicating what operation was desired. + * We now provide individual function pointers which protocols can implement, + * which offers a number of benefits (such as type checking for arguments). + * These older constants are still present in order to support TCP debugging. + */ #define PRU_ATTACH 0 /* attach protocol to up */ #define PRU_DETACH 1 /* detach protocol from up */ #define PRU_BIND 2 /* bind socket to address */ @@ -229,8 +365,8 @@ struct protosw { #define PRU_PROTORCV 20 /* receive from below */ #define PRU_PROTOSEND 21 /* send to below */ /* end for protocol's internal use */ -#define PRU_SEND_EOF 22 /* send and close */ -#define PRU_NREQ 22 +#define PRU_SEND_EOF 22 /* send and close */ +#define PRU_NREQ 22 #ifdef PRUREQUESTS char *prurequests[] = { @@ -239,211 +375,174 @@ char *prurequests[] = { "RCVD", "SEND", "ABORT", "CONTROL", "SENSE", "RCVOOB", "SENDOOB", "SOCKADDR", "PEERADDR", "CONNECT2", "FASTTIMO", "SLOWTIMO", - "PROTORCV", "PROTOSEND", - "SEND_EOF", + "PROTORCV", "PROTOSEND", "SEND_EOF", }; -#endif - -#ifdef KERNEL /* users shouldn't see this decl */ +#endif /* PRUREQUESTS */ +#endif /* BSD_KERNEL_PRIVATE */ struct ifnet; struct stat; struct ucred; struct uio; +#ifdef XNU_KERNEL_PRIVATE /* - * If the ordering here looks odd, that's because it's alphabetical. - * Having this structure separated out from the main protoswitch is allegedly - * a big (12 cycles per call) lose on high-end CPUs. We will eventually - * migrate this stuff back into the main structure. + * Legacy user-protocol hooks. + * + * NOTE: Do not modify this structure, as there are modules outside of xnu + * which rely on the size and layout for binary compatibility. This structure + * is simply used by the protosw_old structure. Internally, user-protocol + * hooks use the private variant of pr_usrreqs defined down below. */ +struct pr_usrreqs_old { +#else struct pr_usrreqs { +#endif /* !XNU_KERNEL_PRIVATE */ int (*pru_abort)(struct socket *so); int (*pru_accept)(struct socket *so, struct sockaddr **nam); int (*pru_attach)(struct socket *so, int proto, struct proc *p); int (*pru_bind)(struct socket *so, struct sockaddr *nam, - struct proc *p); + struct proc *p); int (*pru_connect)(struct socket *so, struct sockaddr *nam, - struct proc *p); + struct proc *p); int (*pru_connect2)(struct socket *so1, struct socket *so2); int (*pru_control)(struct socket *so, u_long cmd, caddr_t data, - struct ifnet *ifp, struct proc *p); + struct ifnet *ifp, struct proc *p); int (*pru_detach)(struct socket *so); int (*pru_disconnect)(struct socket *so); int (*pru_listen)(struct socket *so, struct proc *p); int (*pru_peeraddr)(struct socket *so, struct sockaddr **nam); int (*pru_rcvd)(struct socket *so, int flags); int (*pru_rcvoob)(struct socket *so, struct mbuf *m, int flags); - int (*pru_send)(struct socket *so, int flags, struct mbuf *m, - struct sockaddr *addr, struct mbuf *control, - struct proc *p); -#define PRUS_OOB 0x1 -#define PRUS_EOF 0x2 -#define PRUS_MORETOCOME 0x4 + int (*pru_send)(struct socket *so, int flags, struct mbuf *m, + struct sockaddr *addr, struct mbuf *control, + struct proc *p); int (*pru_sense)(struct socket *so, void *sb, int isstat64); int (*pru_shutdown)(struct socket *so); int (*pru_sockaddr)(struct socket *so, struct sockaddr **nam); - - /* - * These three added later, so they are out of order. They are used - * for shortcutting (fast path input/output) in some protocols. - * XXX - that's a lie, they are not implemented yet - * Rather than calling sosend() etc. directly, calls are made - * through these entry points. For protocols which still use - * the generic code, these just point to those routines. - */ int (*pru_sosend)(struct socket *so, struct sockaddr *addr, - struct uio *uio, struct mbuf *top, - struct mbuf *control, int flags); - int (*pru_soreceive)(struct socket *so, - struct sockaddr **paddr, - struct uio *uio, struct mbuf **mp0, - struct mbuf **controlp, int *flagsp); - int (*pru_sopoll)(struct socket *so, int events, - struct ucred *cred, void *); + struct uio *uio, struct mbuf *top, struct mbuf *control, + int flags); + int (*pru_soreceive)(struct socket *so, struct sockaddr **paddr, + struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, + int *flagsp); + int (*pru_sopoll)(struct socket *so, int events, + struct ucred *cred, void *); }; -__BEGIN_DECLS - -extern int pru_abort_notsupp(struct socket *so); -extern int pru_accept_notsupp(struct socket *so, struct sockaddr **nam); -extern int pru_attach_notsupp(struct socket *so, int proto, - struct proc *p); -extern int pru_bind_notsupp(struct socket *so, struct sockaddr *nam, - struct proc *p); -extern int pru_connect_notsupp(struct socket *so, struct sockaddr *nam, - struct proc *p); -extern int pru_connect2_notsupp(struct socket *so1, struct socket *so2); -extern int pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data, - struct ifnet *ifp, struct proc *p); -extern int pru_detach_notsupp(struct socket *so); -extern int pru_disconnect_notsupp(struct socket *so); -extern int pru_listen_notsupp(struct socket *so, struct proc *p); -extern int pru_peeraddr_notsupp(struct socket *so, - struct sockaddr **nam); -extern int pru_rcvd_notsupp(struct socket *so, int flags); -extern int pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, - int flags); -extern int pru_send_notsupp(struct socket *so, int flags, struct mbuf *m, - struct sockaddr *addr, struct mbuf *control, - struct proc *p); -extern int pru_sense_null(struct socket *so, void * sb, int isstat64); -extern int pru_shutdown_notsupp(struct socket *so); -extern int pru_sockaddr_notsupp(struct socket *so, - struct sockaddr **nam); -extern int pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, - struct uio *uio, struct mbuf *top, - struct mbuf *control, int flags); -extern int pru_soreceive_notsupp(struct socket *so, - struct sockaddr **paddr, - struct uio *uio, struct mbuf **mp0, - struct mbuf **controlp, int *flagsp); -extern int pru_sopoll_notsupp(struct socket *so, int events, - struct ucred *cred, void *); - -__END_DECLS - -#endif /* KERNEL */ - +#ifdef XNU_KERNEL_PRIVATE /* - * The arguments to the ctlinput routine are - * (*protosw[].pr_ctlinput)(cmd, sa, arg); - * where cmd is one of the commands below, sa is a pointer to a sockaddr, - * and arg is a `void *' argument used within a protocol family. + * If the ordering here looks odd, that's because it's alphabetical. These + * should eventually be merged back into struct protosw. + * + * Internal, private and extendable representation of pr_usrreqs. + * + * NOTE: When adding new ones, also add default callbacks in pru_sanitize(). */ -#define PRC_IFDOWN 0 /* interface transition */ -#define PRC_ROUTEDEAD 1 /* select new route if possible ??? */ -#define PRC_IFUP 2 /* interface has come back up */ -#define PRC_QUENCH2 3 /* DEC congestion bit says slow down */ -#define PRC_QUENCH 4 /* some one said to slow down */ -#define PRC_MSGSIZE 5 /* message size forced drop */ -#define PRC_HOSTDEAD 6 /* host appears to be down */ -#define PRC_HOSTUNREACH 7 /* deprecated (use PRC_UNREACH_HOST) */ -#define PRC_UNREACH_NET 8 /* no route to network */ -#define PRC_UNREACH_HOST 9 /* no route to host */ -#define PRC_UNREACH_PROTOCOL 10 /* dst says bad protocol */ -#define PRC_UNREACH_PORT 11 /* bad port # */ -/* was PRC_UNREACH_NEEDFRAG 12 (use PRC_MSGSIZE) */ -#define PRC_UNREACH_SRCFAIL 13 /* source route failed */ -#define PRC_REDIRECT_NET 14 /* net routing redirect */ -#define PRC_REDIRECT_HOST 15 /* host routing redirect */ -#define PRC_REDIRECT_TOSNET 16 /* redirect for type of service & net */ -#define PRC_REDIRECT_TOSHOST 17 /* redirect for tos & host */ -#define PRC_TIMXCEED_INTRANS 18 /* packet lifetime expired in transit */ -#define PRC_TIMXCEED_REASS 19 /* lifetime expired on reass q */ -#define PRC_PARAMPROB 20 /* header incorrect */ -#define PRC_UNREACH_ADMIN_PROHIB 21 /* packet administrativly prohibited */ - -#define PRC_NCMDS 22 - -#define PRC_IS_REDIRECT(cmd) \ - ((cmd) >= PRC_REDIRECT_NET && (cmd) <= PRC_REDIRECT_TOSHOST) - -#ifdef PRCREQUESTS -char *prcrequests[] = { - "IFDOWN", "ROUTEDEAD", "IFUP", "DEC-BIT-QUENCH2", - "QUENCH", "MSGSIZE", "HOSTDEAD", "#7", - "NET-UNREACH", "HOST-UNREACH", "PROTO-UNREACH", "PORT-UNREACH", - "#12", "SRCFAIL-UNREACH", "NET-REDIRECT", "HOST-REDIRECT", - "TOSNET-REDIRECT", "TOSHOST-REDIRECT", "TX-INTRANS", "TX-REASS", - "PARAMPROB", "ADMIN-UNREACH" +struct pr_usrreqs { + uint32_t pru_flags; /* see PRUF flags below */ + int (*pru_abort)(struct socket *); + int (*pru_accept)(struct socket *, struct sockaddr **); + int (*pru_attach)(struct socket *, int proto, struct proc *); + int (*pru_bind)(struct socket *, struct sockaddr *, struct proc *); + int (*pru_connect)(struct socket *, struct sockaddr *, + struct proc *); + int (*pru_connect2)(struct socket *, struct socket *); + int (*pru_connectx)(struct socket *, struct sockaddr_list **, + struct sockaddr_list **, struct proc *, uint32_t, + associd_t, connid_t *, uint32_t, void *, uint32_t); + int (*pru_control)(struct socket *, u_long, caddr_t, + struct ifnet *, struct proc *); + int (*pru_detach)(struct socket *); + int (*pru_disconnect)(struct socket *); + int (*pru_disconnectx)(struct socket *, associd_t, connid_t); + int (*pru_listen)(struct socket *, struct proc *); + int (*pru_peeloff)(struct socket *, associd_t, struct socket **); + int (*pru_peeraddr)(struct socket *, struct sockaddr **); + int (*pru_rcvd)(struct socket *, int); + int (*pru_rcvoob)(struct socket *, struct mbuf *, int); + int (*pru_send)(struct socket *, int, struct mbuf *, + struct sockaddr *, struct mbuf *, struct proc *); +#define PRUS_OOB 0x1 +#define PRUS_EOF 0x2 +#define PRUS_MORETOCOME 0x4 + int (*pru_sense)(struct socket *, void *, int); + int (*pru_shutdown)(struct socket *); + int (*pru_sockaddr)(struct socket *, struct sockaddr **); + int (*pru_sopoll)(struct socket *, int, struct ucred *, void *); + int (*pru_soreceive)(struct socket *, struct sockaddr **, + struct uio *, struct mbuf **, struct mbuf **, int *); + int (*pru_sosend)(struct socket *, struct sockaddr *, + struct uio *, struct mbuf *, struct mbuf *, int); + int (*pru_socheckopt)(struct socket *, struct sockopt *); }; -#endif -/* - * The arguments to ctloutput are: - * (*protosw[].pr_ctloutput)(req, so, level, optname, optval, p); - * req is one of the actions listed below, so is a (struct socket *), - * level is an indication of which protocol layer the option is intended. - * optname is a protocol dependent socket option request, - * optval is a pointer to a mbuf-chain pointer, for value-return results. - * The protocol is responsible for disposal of the mbuf chain *optval - * if supplied, - * the caller is responsible for any space held by *optval, when returned. - * A non-zero return from usrreq gives an - * UNIX error number which should be passed to higher level software. - */ -#define PRCO_GETOPT 0 -#define PRCO_SETOPT 1 +/* Values for pru_flags */ +#define PRUF_OLD 0x10000000 /* added via net_add_proto */ -#define PRCO_NCMDS 2 - -#ifdef PRCOREQUESTS -char *prcorequests[] = { - "GETOPT", "SETOPT", -}; -#endif - -#ifdef KERNEL +#endif /* XNU_KERNEL_PRIVATE */ __BEGIN_DECLS -void domaininit(void) __attribute__((section("__TEXT, initcode"))); -void domainfin(void) __attribute__((section("__TEXT, fincode"))); - -void pfctlinput(int, struct sockaddr *); -void pfctlinput2(int, struct sockaddr *, void *); -struct protosw *pffindproto(int family, int protocol, int type); -struct protosw *pffindproto_locked(int family, int protocol, int type); -struct protosw *pffindtype(int family, int type); - -extern int net_add_proto(struct protosw *, struct domain *); +extern int pru_abort_notsupp(struct socket *so); +extern int pru_accept_notsupp(struct socket *so, struct sockaddr **nam); +extern int pru_attach_notsupp(struct socket *so, int proto, struct proc *p); +extern int pru_bind_notsupp(struct socket *so, struct sockaddr *nam, + struct proc *p); +extern int pru_connect_notsupp(struct socket *so, struct sockaddr *nam, + struct proc *p); +extern int pru_connect2_notsupp(struct socket *so1, struct socket *so2); +#ifdef XNU_KERNEL_PRIVATE +extern int pru_connectx_notsupp(struct socket *, struct sockaddr_list **, + struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *, + uint32_t, void *, uint32_t); +extern int pru_disconnectx_notsupp(struct socket *, associd_t, connid_t); +extern int pru_socheckopt_null(struct socket *, struct sockopt *); +extern int pru_peeloff_notsupp(struct socket *, associd_t, struct socket **); +#endif /* XNU_KERNEL_PRIVATE */ +extern int pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data, + struct ifnet *ifp, struct proc *p); +extern int pru_detach_notsupp(struct socket *so); +extern int pru_disconnect_notsupp(struct socket *so); +extern int pru_listen_notsupp(struct socket *so, struct proc *p); +extern int pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam); +extern int pru_rcvd_notsupp(struct socket *so, int flags); +extern int pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags); +extern int pru_send_notsupp(struct socket *so, int flags, struct mbuf *m, + struct sockaddr *addr, struct mbuf *control, struct proc *p); +extern int pru_sense_null(struct socket *so, void * sb, int isstat64); +extern int pru_shutdown_notsupp(struct socket *so); +extern int pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam); +extern int pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, + struct uio *uio, struct mbuf *top, struct mbuf *control, int flags); +extern int pru_soreceive_notsupp(struct socket *so, + struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, + struct mbuf **controlp, int *flagsp); +extern int pru_sopoll_notsupp(struct socket *so, int events, + struct ucred *cred, void *); +#ifdef XNU_KERNEL_PRIVATE +extern void pru_sanitize(struct pr_usrreqs *); +extern void domaininit(void); +extern void domainfin(void); +extern void pfctlinput(int, struct sockaddr *); +extern void pfctlinput2(int, struct sockaddr *, void *); +extern struct protosw *pffindproto_locked(int, int, int); +extern struct protosw *pffindprotonotype(int, int); +extern struct protosw *pffindtype(int, int); +extern struct protosw_old *pffindproto_old(int, int, int); +extern int net_add_proto(struct protosw *, struct domain *, int); +extern void net_init_proto(struct protosw *, struct domain *); extern int net_del_proto(int, int, struct domain *); - +extern int net_add_proto_old(struct protosw_old *, struct domain_old *); +extern int net_del_proto_old(int, int, struct domain_old *); +extern void net_update_uptime(void); +extern void net_update_uptime_secs(uint64_t secs); extern u_int64_t net_uptime(void); +extern void net_uptime2timeval(struct timeval *); +#else +extern int net_add_proto(struct protosw *, struct domain *); +extern int net_del_proto(int, int, struct domain *); +#endif /* XNU_KERNEL_PRIVATE */ +extern struct protosw *pffindproto(int family, int protocol, int type); __END_DECLS - -/* Temp hack to link static domains together */ - -#define LINK_PROTOS(psw) \ -static void link_ ## psw ## _protos() \ -{ \ - int i; \ - \ - for (i=0; i < ((sizeof(psw)/sizeof(psw[0])) - 1); i++) \ - psw[i].pr_next = &psw[i + 1]; \ -} - -#endif - -#endif /* PRIVATE */ +#endif /* KERNEL_PRIVATE */ #endif /* !_SYS_PROTOSW_H_ */ diff --git a/bsd/sys/pthread_internal.h b/bsd/sys/pthread_internal.h index b22de04d8..4b0c21282 100644 --- a/bsd/sys/pthread_internal.h +++ b/bsd/sys/pthread_internal.h @@ -33,124 +33,16 @@ #include struct ksyn_waitq_element { - TAILQ_ENTRY(ksyn_waitq_element) kwe_list; /* link to other list members */ - void * kwe_kwqqueue; /* queue blocked on */ - uint32_t kwe_flags; /* flags */ - uint32_t kwe_lockseq; /* the sequence of the entry */ - uint32_t kwe_count; /* upper bound on number of matches still pending */ - uint32_t kwe_psynchretval; /* thread retval */ - void *kwe_uth; /* uthread */ +#if __LP64__ + char opaque[48]; +#else + char opaque[32]; +#endif }; -typedef struct ksyn_waitq_element * ksyn_waitq_element_t; - -/* kew_flags defns */ -#define KWE_THREAD_INWAIT 1 -#define KWE_THREAD_PREPOST 2 -#define KWE_THREAD_BROADCAST 4 - - -#define WORKQUEUE_HIGH_PRIOQUEUE 0 /* high priority queue */ -#define WORKQUEUE_DEFAULT_PRIOQUEUE 1 /* default priority queue */ -#define WORKQUEUE_LOW_PRIOQUEUE 2 /* low priority queue */ -#define WORKQUEUE_BG_PRIOQUEUE 3 /* background priority queue */ - -#define WORKQUEUE_NUMPRIOS 4 - -#define WORKQUEUE_OVERCOMMIT 0x10000 - -struct threadlist { - TAILQ_ENTRY(threadlist) th_entry; - thread_t th_thread; - int th_flags; - uint16_t th_affinity_tag; - uint8_t th_priority; - uint8_t th_policy; - struct workqueue *th_workq; - mach_vm_size_t th_stacksize; - mach_vm_size_t th_allocsize; - mach_vm_offset_t th_stackaddr; - mach_port_name_t th_thport; -}; -#define TH_LIST_INITED 0x01 -#define TH_LIST_RUNNING 0x02 -#define TH_LIST_BLOCKED 0x04 -#define TH_LIST_SUSPENDED 0x08 -#define TH_LIST_BUSY 0x10 -#define TH_LIST_NEED_WAKEUP 0x20 -#define TH_LIST_CONSTRAINED 0x40 - - -struct workqueue { - proc_t wq_proc; - vm_map_t wq_map; - task_t wq_task; - thread_call_t wq_atimer_call; - int wq_flags; - int wq_lflags; - uint64_t wq_thread_yielded_timestamp; - uint32_t wq_thread_yielded_count; - uint32_t wq_timer_interval; - uint32_t wq_affinity_max; - uint32_t wq_threads_scheduled; - uint32_t wq_constrained_threads_scheduled; - uint32_t wq_nthreads; - uint32_t wq_thidlecount; - uint32_t wq_reqcount; - TAILQ_HEAD(, threadlist) wq_thrunlist; - TAILQ_HEAD(, threadlist) wq_thidlelist; - uint16_t wq_requests[WORKQUEUE_NUMPRIOS]; - uint16_t wq_ocrequests[WORKQUEUE_NUMPRIOS]; - uint16_t wq_reqconc[WORKQUEUE_NUMPRIOS]; /* requested concurrency for each priority level */ - uint16_t *wq_thscheduled_count[WORKQUEUE_NUMPRIOS]; - uint32_t *wq_thactive_count[WORKQUEUE_NUMPRIOS]; /* must be uint32_t since we OSAddAtomic on these */ - uint64_t *wq_lastblocked_ts[WORKQUEUE_NUMPRIOS]; -}; -#define WQ_LIST_INITED 0x01 -#define WQ_ATIMER_RUNNING 0x02 -#define WQ_EXITING 0x04 - -#define WQL_ATIMER_BUSY 0x01 -#define WQL_ATIMER_WAITING 0x02 -#define WQL_EXCEEDED_CONSTRAINED_THREAD_LIMIT 0x04 -#define WQL_EXCEEDED_TOTAL_THREAD_LIMIT 0x08 - - -#define WQ_VECT_SET_BIT(vector, bit) \ - vector[(bit) / 32] |= (1 << ((bit) % 32)) - -#define WQ_VECT_CLEAR_BIT(vector, bit) \ - vector[(bit) / 32] &= ~(1 << ((bit) % 32)) - -#define WQ_VECT_TEST_BIT(vector, bit) \ - vector[(bit) / 32] & (1 << ((bit) % 32)) - - -#define WORKQUEUE_MAXTHREADS 512 -#define WQ_YIELDED_THRESHOLD 2000 -#define WQ_YIELDED_WINDOW_USECS 30000 -#define WQ_STALLED_WINDOW_USECS 200 -#define WQ_REDUCE_POOL_WINDOW_USECS 5000000 -#define WQ_MAX_TIMER_INTERVAL_USECS 50000 - -/* workq_kernreturn commands */ -#define WQOPS_QUEUE_ADD 1 -#define WQOPS_QUEUE_REMOVE_OBSOLETE 2 -#define WQOPS_THREAD_RETURN 4 -#define WQOPS_THREAD_SETCONC 8 -#define WQOPS_QUEUE_NEWSPISUPP 0x10 /* this is to check for newer SPI support */ -#define WQOPS_QUEUE_REQTHREADS 0x20 /* request number of threads of a prio */ - -#define PTH_DEFAULT_STACKSIZE 512*1024 -#define PTH_DEFAULT_GUARDSIZE 4*1024 -#define MAX_PTHREAD_SIZE 64*1024 - -extern lck_grp_attr_t *pthread_lck_grp_attr; -extern lck_grp_t *pthread_lck_grp; -extern lck_attr_t *pthread_lck_attr; void workqueue_mark_exiting(struct proc *); void workqueue_exit(struct proc *); void pthread_init(void); -void psynch_zoneinit(void); + #endif /* _SYS_PTHREAD_INTERNAL_H_ */ diff --git a/bsd/sys/pthread_shims.h b/bsd/sys/pthread_shims.h new file mode 100644 index 000000000..1e530ab4a --- /dev/null +++ b/bsd/sys/pthread_shims.h @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifdef KERNEL_PRIVATE + +#ifndef _PTHREAD_SHIMS_H_ +#define _PTHREAD_SHIMS_H_ + +#ifndef ASSEMBLER + +#include +#include +#include +#include +#include +#include +#include + +#ifndef PTHREAD_INTERNAL +struct uthread; +#define M_PROC 41 +#endif + +#ifdef NEEDS_SCHED_CALL_T +typedef void (*sched_call_t)(int type, thread_t thread); +#endif + +typedef struct pthread_functions_s { + int version; + + /* internal calls, kernel core -> kext */ + void (*pthread_init)(void); + int (*fill_procworkqueue)(proc_t p, struct proc_workqueueinfo * pwqinfo); + void (*workqueue_init_lock)(proc_t p); + void (*workqueue_destroy_lock)(proc_t p); + void (*workqueue_exit)(struct proc *p); + void (*workqueue_mark_exiting)(struct proc *p); + void (*workqueue_thread_yielded)(void); + void (*pth_proc_hashinit)(proc_t p); + void (*pth_proc_hashdelete)(proc_t p); + + /* syscall stubs */ + int (*bsdthread_create)(struct proc *p, user_addr_t user_func, user_addr_t user_funcarg, user_addr_t user_stack, user_addr_t user_pthread, uint32_t flags, user_addr_t *retval); + int (*bsdthread_register)(struct proc *p, user_addr_t threadstart, user_addr_t wqthread, int pthsize, user_addr_t dummy_value, user_addr_t targetconc_ptr, uint64_t dispatchqueue_offset, int32_t *retval); + int (*bsdthread_terminate)(struct proc *p, user_addr_t stackaddr, size_t size, uint32_t kthport, uint32_t sem, int32_t *retval); + int (*thread_selfid)(struct proc *p, uint64_t *retval); + int (*workq_kernreturn)(struct proc *p, int options, user_addr_t item, int affinity, int prio, int32_t *retval); + int (*workq_open)(struct proc *p, int32_t *retval); + + /* psynch syscalls */ + int (*psynch_mutexwait)(proc_t p, user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint64_t tid, uint32_t flags, uint32_t *retval); + int (*psynch_mutexdrop)(proc_t p, user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint64_t tid, uint32_t flags, uint32_t *retval); + int (*psynch_cvbroad)(proc_t p, user_addr_t cv, uint64_t cvlsgen, uint64_t cvudgen, uint32_t flags, user_addr_t mutex, uint64_t mugen, uint64_t tid, uint32_t *retval); + int (*psynch_cvsignal)(proc_t p, user_addr_t cv, uint64_t cvlsgen, uint32_t cvugen, int thread_port, user_addr_t mutex, uint64_t mugen, uint64_t tid, uint32_t flags, uint32_t *retval); + int (*psynch_cvwait)(proc_t p, user_addr_t cv, uint64_t cvlsgen, uint32_t cvugen, user_addr_t mutex, uint64_t mugen, uint32_t flags, int64_t sec, uint32_t nsec, uint32_t * retval); + int (*psynch_cvclrprepost)(proc_t p, user_addr_t cv, uint32_t cvgen, uint32_t cvugen, uint32_t cvsgen, uint32_t prepocnt, uint32_t preposeq, uint32_t flags, int *retval); + int (*psynch_rw_longrdlock)(proc_t p, user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval); + int (*psynch_rw_rdlock)(proc_t p, user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval); + int (*psynch_rw_unlock)(proc_t p, user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval); + int (*psynch_rw_wrlock)(proc_t p, user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval); + int (*psynch_rw_yieldwrlock)(proc_t p, user_addr_t rwlock, uint32_t lgenval, uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval); + + sched_call_t (*workqueue_get_sched_callback)(void); + + /* padding for future */ + void* _pad[99]; +} *pthread_functions_t; + +typedef struct pthread_callbacks_s { + int version; + + /* config information */ + uint32_t config_thread_max; + uint32_t (*get_task_threadmax)(void); + + /* proc.h accessors */ + uint64_t (*proc_get_register)(struct proc *t); + void (*proc_set_register)(struct proc *t); + + user_addr_t (*proc_get_threadstart)(struct proc *t); + void (*proc_set_threadstart)(struct proc *t, user_addr_t addr); + user_addr_t (*proc_get_wqthread)(struct proc *t); + void (*proc_set_wqthread)(struct proc *t, user_addr_t addr); + int (*proc_get_pthsize)(struct proc *t); + void (*proc_set_pthsize)(struct proc *t, int size); + user_addr_t (*proc_get_targconc)(struct proc *t); + void (*proc_set_targconc)(struct proc *t, user_addr_t addr); + uint64_t (*proc_get_dispatchqueue_offset)(struct proc *t); + void (*proc_set_dispatchqueue_offset)(struct proc *t, uint64_t offset); + lck_spin_t* (*proc_get_wqlockptr)(struct proc *t); + boolean_t* (*proc_get_wqinitingptr)(struct proc *t); + void* (*proc_get_wqptr)(struct proc *t); + void (*proc_set_wqptr)(struct proc *t, void* ptr); + int (*proc_get_wqsize)(struct proc *t); + void (*proc_set_wqsize)(struct proc *t, int sz); + void (*proc_lock)(struct proc *t); + void (*proc_unlock)(struct proc *t); + task_t (*proc_get_task)(struct proc *t); + void* (*proc_get_pthhash)(struct proc *t); + void (*proc_set_pthhash)(struct proc *t, void* ptr); + + /* bsd/sys/user.h */ + void* (*uthread_get_threadlist)(struct uthread *t); + void (*uthread_set_threadlist)(struct uthread *t, void* threadlist); + sigset_t (*uthread_get_sigmask)(struct uthread *t); + void (*uthread_set_sigmask)(struct uthread *t, sigset_t s); + void* (*uthread_get_uukwe)(struct uthread *t); + int (*uthread_get_returnval)(struct uthread *t); + void (*uthread_set_returnval)(struct uthread *t, int val); + int (*uthread_is_cancelled)(struct uthread *t); + + /* vm_protos.h calls */ + ipc_space_t (*task_get_ipcspace)(task_t t); + mach_port_name_t (*ipc_port_copyout_send)(ipc_port_t sright, ipc_space_t space); + + /* osfmk/vm/vm_map.h */ + kern_return_t (*vm_map_page_info)(vm_map_t map, vm_map_offset_t offset, vm_page_info_flavor_t flavor, vm_page_info_t info, mach_msg_type_number_t *count); + vm_map_t (*vm_map_switch)(vm_map_t map); + + /* wq functions */ + kern_return_t (*thread_set_wq_state32)(thread_t thread, thread_state_t state); + kern_return_t (*thread_set_wq_state64)(thread_t thread, thread_state_t state); + + /* sched_prim.h */ + void (*thread_exception_return)(); + void (*thread_bootstrap_return)(); + + /* kern/clock.h */ + void (*absolutetime_to_microtime)(uint64_t abstime, clock_sec_t *secs, clock_usec_t *microsecs); + + /* osfmk/kern/task.h */ + int (*proc_restore_workq_bgthreadpolicy)(thread_t t); + int (*proc_apply_workq_bgthreadpolicy)(thread_t t); + + /* osfmk/kern/thread.h */ + struct uthread* (*get_bsdthread_info)(thread_t th); + void (*thread_sched_call)(thread_t t, sched_call_t call); + void (*thread_static_param)(thread_t t, boolean_t state); + kern_return_t (*thread_create_workq)(task_t t, thread_continue_t c, thread_t *new_t); + kern_return_t (*thread_policy_set_internal)(thread_t t, thread_policy_flavor_t flavour, thread_policy_t info, mach_msg_type_number_t count); + + /* osfmk/kern/affinity.h */ + kern_return_t (*thread_affinity_set)(thread_t thread, uint32_t tag); + + /* bsd/sys/systm.h */ + void (*unix_syscall_return)(int error); + + /* osfmk/kern/zalloc.h */ + void* (*zalloc)(zone_t zone); + void (*zfree)(zone_t zone, void* ptr); + zone_t (*zinit)(vm_size_t, vm_size_t maxmem, vm_size_t alloc, const char *name); + + /* bsd/kerb/kern_sig.c */ + void (*__pthread_testcancel)(int); + + /* calls without portfolio */ + kern_return_t (*mach_port_deallocate)(ipc_space_t space, mach_port_name_t name); + kern_return_t (*semaphore_signal_internal_trap)(mach_port_name_t sema_name); + vm_map_t (*current_map)(void); + + /* osfmk/kern/thread.h */ + ipc_port_t (*convert_thread_to_port)(thread_t th); + + /* mach/task.h */ + kern_return_t (*thread_create)(task_t parent_task, thread_act_t *child_act); + + /* mach/thread_act.h */ + kern_return_t (*thread_resume)(thread_act_t target_act); + + /* osfmk//machine_routines.h */ + int (*ml_get_max_cpus)(void); + + + /* xnu: struct proc p_dispatchqueue_serialno_offset additions */ + uint64_t (*proc_get_dispatchqueue_serialno_offset)(struct proc *p); + void (*proc_set_dispatchqueue_serialno_offset)(struct proc *p, uint64_t offset); + + /* padding for future */ + void* _pad[98]; + +} *pthread_callbacks_t; + +void +pthread_kext_register(pthread_functions_t fns, pthread_callbacks_t *callbacks); + +#ifdef BSD_KERNEL_PRIVATE +void workqueue_mark_exiting(struct proc *); +void workqueue_exit(struct proc *); +void workqueue_thread_yielded(void); +sched_call_t workqueue_get_sched_callback(void); +void pthread_init(void); + +extern pthread_callbacks_t pthread_kern; +extern pthread_functions_t pthread_functions; +#endif + +#endif /* ASSEMBLER */ +#endif /* _PTHREAD_SHIMS_H_ */ +#endif /* KERNEL_PRIVATE */ diff --git a/bsd/sys/queue.h b/bsd/sys/queue.h index 9ccb63e74..294eec935 100644 --- a/bsd/sys/queue.h +++ b/bsd/sys/queue.h @@ -173,21 +173,50 @@ struct qm_trace { #define TRASHIT(x) #endif /* QUEUE_MACRO_DEBUG */ +/* + * Horrible macros to enable use of code that was meant to be C-specific + * (and which push struct onto type) in C++; without these, C++ code + * that uses these macros in the context of a class will blow up + * due to "struct" being preprended to "type" by the macros, causing + * inconsistent use of tags. + * + * This approach is necessary because these are macros; we have to use + * these on a per-macro basis (because the queues are implemented as + * macros, disabling this warning in the scope of the header file is + * insufficient), whuch means we can't use #pragma, and have to use + * _Pragma. We only need to use these for the queue macros that + * prepend "struct" to "type" and will cause C++ to blow up. + */ +#if defined(__clang__) && defined(__cplusplus) +#define __MISMATCH_TAGS_PUSH \ + _Pragma("clang diagnostic push") \ + _Pragma("clang diagnostic ignored \"-Wmismatched-tags\"") +#define __MISMATCH_TAGS_POP \ + _Pragma("clang diagnostic pop") +#else +#define __MISMATCH_TAGS_PUSH +#define __MISMATCH_TAGS_POP +#endif + /* * Singly-linked List declarations. */ #define SLIST_HEAD(name, type) \ +__MISMATCH_TAGS_PUSH \ struct name { \ struct type *slh_first; /* first element */ \ -} +} \ +__MISMATCH_TAGS_POP #define SLIST_HEAD_INITIALIZER(head) \ { NULL } #define SLIST_ENTRY(type) \ +__MISMATCH_TAGS_PUSH \ struct { \ struct type *sle_next; /* next element */ \ -} +} \ +__MISMATCH_TAGS_POP /* * Singly-linked List functions. @@ -227,7 +256,9 @@ struct { \ #define SLIST_NEXT(elm, field) ((elm)->field.sle_next) -#define SLIST_REMOVE(head, elm, type, field) do { \ +#define SLIST_REMOVE(head, elm, type, field) \ +__MISMATCH_TAGS_PUSH \ +do { \ if (SLIST_FIRST((head)) == (elm)) { \ SLIST_REMOVE_HEAD((head), field); \ } \ @@ -238,7 +269,8 @@ struct { \ SLIST_REMOVE_AFTER(curelm, field); \ } \ TRASHIT((elm)->field.sle_next); \ -} while (0) +} while (0) \ +__MISMATCH_TAGS_POP #define SLIST_REMOVE_AFTER(elm, field) do { \ SLIST_NEXT(elm, field) = \ @@ -253,18 +285,22 @@ struct { \ * Singly-linked Tail queue declarations. */ #define STAILQ_HEAD(name, type) \ +__MISMATCH_TAGS_PUSH \ struct name { \ struct type *stqh_first;/* first element */ \ struct type **stqh_last;/* addr of last next element */ \ -} +} \ +__MISMATCH_TAGS_POP #define STAILQ_HEAD_INITIALIZER(head) \ { NULL, &(head).stqh_first } #define STAILQ_ENTRY(type) \ +__MISMATCH_TAGS_PUSH \ struct { \ struct type *stqe_next; /* next element */ \ -} +} \ +__MISMATCH_TAGS_POP /* * Singly-linked Tail queue functions. @@ -316,14 +352,18 @@ struct { \ } while (0) #define STAILQ_LAST(head, type, field) \ +__MISMATCH_TAGS_PUSH \ (STAILQ_EMPTY((head)) ? \ NULL : \ ((struct type *)(void *) \ - ((char *)((head)->stqh_last) - __offsetof(struct type, field)))) + ((char *)((head)->stqh_last) - __offsetof(struct type, field))))\ +__MISMATCH_TAGS_POP #define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next) -#define STAILQ_REMOVE(head, elm, type, field) do { \ +#define STAILQ_REMOVE(head, elm, type, field) \ +__MISMATCH_TAGS_PUSH \ +do { \ if (STAILQ_FIRST((head)) == (elm)) { \ STAILQ_REMOVE_HEAD((head), field); \ } \ @@ -334,7 +374,8 @@ struct { \ STAILQ_REMOVE_AFTER(head, curelm, field); \ } \ TRASHIT((elm)->field.stqe_next); \ -} while (0) +} while (0) \ +__MISMATCH_TAGS_POP #define STAILQ_REMOVE_HEAD(head, field) do { \ if ((STAILQ_FIRST((head)) = \ @@ -353,7 +394,9 @@ struct { \ (head)->stqh_last = &STAILQ_NEXT((elm), field); \ } while (0) -#define STAILQ_SWAP(head1, head2, type) do { \ +#define STAILQ_SWAP(head1, head2, type) \ +__MISMATCH_TAGS_PUSH \ +do { \ struct type *swap_first = STAILQ_FIRST(head1); \ struct type **swap_last = (head1)->stqh_last; \ STAILQ_FIRST(head1) = STAILQ_FIRST(head2); \ @@ -364,25 +407,30 @@ struct { \ (head1)->stqh_last = &STAILQ_FIRST(head1); \ if (STAILQ_EMPTY(head2)) \ (head2)->stqh_last = &STAILQ_FIRST(head2); \ -} while (0) +} while (0) \ +__MISMATCH_TAGS_POP /* * List declarations. */ #define LIST_HEAD(name, type) \ +__MISMATCH_TAGS_PUSH \ struct name { \ struct type *lh_first; /* first element */ \ -} +} \ +__MISMATCH_TAGS_POP #define LIST_HEAD_INITIALIZER(head) \ { NULL } #define LIST_ENTRY(type) \ +__MISMATCH_TAGS_PUSH \ struct { \ struct type *le_next; /* next element */ \ struct type **le_prev; /* address of previous next element */ \ -} +} \ +__MISMATCH_TAGS_POP /* * List functions. @@ -469,7 +517,9 @@ struct { \ TRASHIT((elm)->field.le_prev); \ } while (0) -#define LIST_SWAP(head1, head2, type, field) do { \ +#define LIST_SWAP(head1, head2, type, field) \ +__MISMATCH_TAGS_PUSH \ +do { \ struct type *swap_tmp = LIST_FIRST((head1)); \ LIST_FIRST((head1)) = LIST_FIRST((head2)); \ LIST_FIRST((head2)) = swap_tmp; \ @@ -477,27 +527,32 @@ struct { \ swap_tmp->field.le_prev = &LIST_FIRST((head1)); \ if ((swap_tmp = LIST_FIRST((head2))) != NULL) \ swap_tmp->field.le_prev = &LIST_FIRST((head2)); \ -} while (0) +} while (0) \ +__MISMATCH_TAGS_POP /* * Tail queue declarations. */ #define TAILQ_HEAD(name, type) \ +__MISMATCH_TAGS_PUSH \ struct name { \ struct type *tqh_first; /* first element */ \ struct type **tqh_last; /* addr of last next element */ \ TRACEBUF \ -} +} \ +__MISMATCH_TAGS_POP #define TAILQ_HEAD_INITIALIZER(head) \ { NULL, &(head).tqh_first } #define TAILQ_ENTRY(type) \ +__MISMATCH_TAGS_PUSH \ struct { \ struct type *tqe_next; /* next element */ \ struct type **tqe_prev; /* address of previous next element */ \ TRACEBUF \ -} +} \ +__MISMATCH_TAGS_POP /* * Tail queue functions. @@ -588,12 +643,16 @@ struct { \ } while (0) #define TAILQ_LAST(head, headname) \ - (*(((struct headname *)((head)->tqh_last))->tqh_last)) +__MISMATCH_TAGS_PUSH \ + (*(((struct headname *)((head)->tqh_last))->tqh_last)) \ +__MISMATCH_TAGS_POP #define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) #define TAILQ_PREV(elm, headname, field) \ - (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) +__MISMATCH_TAGS_PUSH \ + (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) \ +__MISMATCH_TAGS_POP #define TAILQ_REMOVE(head, elm, field) do { \ if ((TAILQ_NEXT((elm), field)) != NULL) \ @@ -609,7 +668,12 @@ struct { \ QMD_TRACE_ELEM(&(elm)->field); \ } while (0) -#define TAILQ_SWAP(head1, head2, type, field) do { \ +/* + * Why did they switch to spaces for this one macro? + */ +#define TAILQ_SWAP(head1, head2, type, field) \ +__MISMATCH_TAGS_PUSH \ +do { \ struct type *swap_first = (head1)->tqh_first; \ struct type **swap_last = (head1)->tqh_last; \ (head1)->tqh_first = (head2)->tqh_first; \ @@ -624,22 +688,27 @@ struct { \ swap_first->field.tqe_prev = &(head2)->tqh_first; \ else \ (head2)->tqh_last = &(head2)->tqh_first; \ -} while (0) +} while (0) \ +__MISMATCH_TAGS_POP /* * Circular queue definitions. */ #define CIRCLEQ_HEAD(name, type) \ +__MISMATCH_TAGS_PUSH \ struct name { \ struct type *cqh_first; /* first element */ \ struct type *cqh_last; /* last element */ \ -} +} \ +__MISMATCH_TAGS_POP #define CIRCLEQ_ENTRY(type) \ +__MISMATCH_TAGS_PUSH \ struct { \ struct type *cqe_next; /* next element */ \ struct type *cqe_prev; /* previous element */ \ -} +} \ +__MISMATCH_TAGS_POP /* * Circular queue functions. diff --git a/bsd/sys/resource.h b/bsd/sys/resource.h index 311e1f601..d47395073 100644 --- a/bsd/sys/resource.h +++ b/bsd/sys/resource.h @@ -68,25 +68,25 @@ #include #include -#ifndef KERNEL +#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL +#include +#endif /* __DARWIN_C_LEVEL >= __DARWIN_C_FULL */ + +#ifndef KERNEL #include #endif /* [XSI] The timeval structure shall be defined as described in * */ -#define __need_struct_timeval +#include #ifdef KERNEL -#define __need_struct_user32_timeval -#define __need_struct_user64_timeval +#include +#include #endif -#include /* The id_t type shall be defined as described in */ -#ifndef _ID_T -#define _ID_T -typedef __darwin_id_t id_t; /* can hold pid_t, gid_t, or uid_t */ -#endif +#include /* @@ -107,7 +107,7 @@ typedef __uint64_t rlim_t; #define PRIO_PGRP 1 /* Second argument is a GID */ #define PRIO_USER 2 /* Second argument is a UID */ -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL #define PRIO_DARWIN_THREAD 3 /* Second argument is always 0 (current thread) */ #define PRIO_DARWIN_PROCESS 4 /* Second argument is a PID */ @@ -130,7 +130,7 @@ typedef __uint64_t rlim_t; */ #define PRIO_DARWIN_NONUI 0x1001 -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ +#endif /* __DARWIN_C_LEVEL >= __DARWIN_C_FULL */ @@ -157,9 +157,9 @@ typedef __uint64_t rlim_t; struct rusage { struct timeval ru_utime; /* user time used (PL) */ struct timeval ru_stime; /* system time used (PL) */ -#if defined(_POSIX_C_SOURCE) && !defined(_DARWIN_C_SOURCE) +#if __DARWIN_C_LEVEL < __DARWIN_C_FULL long ru_opaque[14]; /* implementation defined */ -#else /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ +#else /* * Informational aliases for source compatibility with programs * that need more information than that provided by standards, @@ -181,12 +181,99 @@ struct rusage { long ru_nvcsw; /* voluntary context switches (atomic) */ long ru_nivcsw; /* involuntary " */ #define ru_last ru_nivcsw /* internal: ruadd() range end */ -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ +#endif /* __DARWIN_C_LEVEL >= __DARWIN_C_FULL */ +}; + +#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL +/* + * Flavors for proc_pid_rusage(). + */ +#define RUSAGE_INFO_V0 0 +#define RUSAGE_INFO_V1 1 +#define RUSAGE_INFO_V2 2 +#define RUSAGE_INFO_CURRENT RUSAGE_INFO_V2 + +typedef void *rusage_info_t; + +struct rusage_info_v0 { + uint8_t ri_uuid[16]; + uint64_t ri_user_time; + uint64_t ri_system_time; + uint64_t ri_pkg_idle_wkups; + uint64_t ri_interrupt_wkups; + uint64_t ri_pageins; + uint64_t ri_wired_size; + uint64_t ri_resident_size; + uint64_t ri_phys_footprint; + uint64_t ri_proc_start_abstime; + uint64_t ri_proc_exit_abstime; }; +struct rusage_info_v1 { + uint8_t ri_uuid[16]; + uint64_t ri_user_time; + uint64_t ri_system_time; + uint64_t ri_pkg_idle_wkups; + uint64_t ri_interrupt_wkups; + uint64_t ri_pageins; + uint64_t ri_wired_size; + uint64_t ri_resident_size; + uint64_t ri_phys_footprint; + uint64_t ri_proc_start_abstime; + uint64_t ri_proc_exit_abstime; + uint64_t ri_child_user_time; + uint64_t ri_child_system_time; + uint64_t ri_child_pkg_idle_wkups; + uint64_t ri_child_interrupt_wkups; + uint64_t ri_child_pageins; + uint64_t ri_child_elapsed_abstime; +}; + +struct rusage_info_v2 { + uint8_t ri_uuid[16]; + uint64_t ri_user_time; + uint64_t ri_system_time; + uint64_t ri_pkg_idle_wkups; + uint64_t ri_interrupt_wkups; + uint64_t ri_pageins; + uint64_t ri_wired_size; + uint64_t ri_resident_size; + uint64_t ri_phys_footprint; + uint64_t ri_proc_start_abstime; + uint64_t ri_proc_exit_abstime; + uint64_t ri_child_user_time; + uint64_t ri_child_system_time; + uint64_t ri_child_pkg_idle_wkups; + uint64_t ri_child_interrupt_wkups; + uint64_t ri_child_pageins; + uint64_t ri_child_elapsed_abstime; + uint64_t ri_diskio_bytesread; + uint64_t ri_diskio_byteswritten; +}; + +#endif /* __DARWIN_C_LEVEL >= __DARWIN_C_FULL */ #ifdef KERNEL +struct rusage_superset { + struct rusage ru; + struct rusage_info_v2 ri; +}; + +struct rusage_info_child { + uint64_t ri_child_user_time; + uint64_t ri_child_system_time; + uint64_t ri_child_pkg_idle_wkups; + uint64_t ri_child_interrupt_wkups; + uint64_t ri_child_pageins; + uint64_t ri_child_elapsed_abstime; +}; + +struct rusage_info_diskiobytes { + volatile uint64_t ri_bytesread __attribute__((aligned(8))); + volatile uint64_t ri_byteswritten __attribute__((aligned(8))); +}; + struct user64_rusage { struct user64_timeval ru_utime; /* user time used */ struct user64_timeval ru_stime; /* system time used */ @@ -251,15 +338,15 @@ struct user32_rusage { #define RLIMIT_STACK 3 /* stack size */ #define RLIMIT_CORE 4 /* core file size */ #define RLIMIT_AS 5 /* address space (resident set size) */ -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL #define RLIMIT_RSS RLIMIT_AS /* source compatibility alias */ #define RLIMIT_MEMLOCK 6 /* locked-in-memory address space */ #define RLIMIT_NPROC 7 /* number of processes */ -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ +#endif /* __DARWIN_C_LEVEL >= __DARWIN_C_FULL */ #define RLIMIT_NOFILE 8 /* number of open files */ -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL #define RLIM_NLIMITS 9 /* total number of resource limits */ -#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ +#endif /* __DARWIN_C_LEVEL >= __DARWIN_C_FULL */ #define _RLIMIT_POSIX_FLAG 0x1000 /* Set bit for strict POSIX */ /* @@ -271,25 +358,65 @@ struct rlimit { rlim_t rlim_max; /* maximum value for rlim_cur */ }; -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL +/* + * proc_rlimit_control() + * + * Resource limit flavors + */ +#define RLIMIT_WAKEUPS_MONITOR 0x1 /* Configure the wakeups monitor. */ +#define RLIMIT_CPU_USAGE_MONITOR 0x2 /* Configure the CPU usage monitor. */ + +/* + * Flags for wakeups monitor control. + */ +#define WAKEMON_ENABLE 0x01 +#define WAKEMON_DISABLE 0x02 +#define WAKEMON_GET_PARAMS 0x04 +#define WAKEMON_SET_DEFAULTS 0x08 +#define WAKEMON_MAKE_FATAL 0x10 /* Configure the task so that violations are fatal. */ +/* + * Flags for CPU usage monitor control. + */ +#define CPUMON_MAKE_FATAL 0x1000 + +struct proc_rlimit_control_wakeupmon { + uint32_t wm_flags; + int32_t wm_rate; +}; + /* I/O type */ #define IOPOL_TYPE_DISK 0 +#if PRIVATE +#define IOPOL_TYPE_VFS_HFS_CASE_SENSITIVITY 1 +#endif /* scope */ #define IOPOL_SCOPE_PROCESS 0 #define IOPOL_SCOPE_THREAD 1 +#define IOPOL_SCOPE_DARWIN_BG 2 /* I/O Priority */ -#define IOPOL_DEFAULT 0 -#define IOPOL_NORMAL 1 -#define IOPOL_PASSIVE 2 -#define IOPOL_THROTTLE 3 -#define IOPOL_UTILITY 4 +#define IOPOL_DEFAULT 0 +#define IOPOL_IMPORTANT 1 +#define IOPOL_PASSIVE 2 +#define IOPOL_THROTTLE 3 +#define IOPOL_UTILITY 4 +#define IOPOL_STANDARD 5 + +/* compatibility with older names */ +#define IOPOL_APPLICATION IOPOL_STANDARD +#define IOPOL_NORMAL IOPOL_IMPORTANT + +#if PRIVATE +#define IOPOL_VFS_HFS_CASE_SENSITIVITY_DEFAULT 0 +#define IOPOL_VFS_HFS_CASE_SENSITIVITY_FORCE_CASE_SENSITIVE 1 +#endif #ifdef PRIVATE /* - * Structures for use in communicating via iopolicysys() between Lic and the - * kernel. Not to be used by uesr programs directly. + * Structures for use in communicating via iopolicysys() between Libc and the + * kernel. Not to be used by user programs directly. */ /* @@ -308,21 +435,21 @@ struct _iopol_param_t { }; #endif /* PRIVATE */ -#endif /* !_POSIX_C_SOURCE || _DARWIN_C_SOURCE */ +#endif /* __DARWIN_C_LEVEL >= __DARWIN_C_FULL */ #ifndef KERNEL __BEGIN_DECLS int getpriority(int, id_t); -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL int getiopolicy_np(int, int) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); -#endif /* !_POSIX_C_SOURCE || _DARWIN_C_SOURCE */ +#endif /* __DARWIN_C_LEVEL >= __DARWIN_C_FULL */ int getrlimit(int, struct rlimit *) __DARWIN_ALIAS(getrlimit); int getrusage(int, struct rusage *); int setpriority(int, id_t, int); -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#if __DARWIN_C_LEVEL >= __DARWIN_C_FULL int setiopolicy_np(int, int, int) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); -#endif /* !_POSIX_C_SOURCE || _DARWIN_C_SOURCE */ +#endif /* __DARWIN_C_LEVEL >= __DARWIN_C_FULL */ int setrlimit(int, const struct rlimit *) __DARWIN_ALIAS(setrlimit); __END_DECLS diff --git a/bsd/sys/resourcevar.h b/bsd/sys/resourcevar.h index eaf7f148b..89aa0aa6e 100644 --- a/bsd/sys/resourcevar.h +++ b/bsd/sys/resourcevar.h @@ -71,9 +71,8 @@ * (not necessarily resident except when running). */ struct pstats { -#define pstat_startzero p_ru - struct rusage p_ru; /* stats for this proc */ - struct rusage p_cru; /* (PL) sum of stats for reaped children */ + struct rusage p_ru; /* stats for this proc */ + struct rusage p_cru; /* (PL) sum of stats for reaped children */ struct uprof { /* profile arguments */ struct uprof *pr_next; /* multiple prof buffers allowed */ @@ -84,10 +83,11 @@ struct pstats { u_int32_t pr_addr; /* temp storage for addr until AST */ u_int32_t pr_ticks; /* temp storage for ticks until AST */ } p_prof; -#define pstat_endzero p_start - - struct timeval p_start; /* starting time ; compat only */ + + uint64_t ps_start; /* starting time ; compat only */ #ifdef KERNEL + struct rusage_info_child ri_child; /* (PL) sum of additional stats for reaped children (proc_pid_rusage) */ + struct rusage_info_diskiobytes ri_diskiobytes; /* Bytes of Disk I/O done by the process */ struct user_uprof { /* profile arguments */ struct user_uprof *pr_next; /* multiple prof buffers allowed */ user_addr_t pr_base; /* buffer base */ @@ -132,6 +132,7 @@ void addupc_task(struct proc *p, user_addr_t pc, u_int ticks); void calcru(struct proc *p, struct timeval *up, struct timeval *sp, struct timeval *ip); void ruadd(struct rusage *ru, struct rusage *ru2); +void update_rusage_info_child(struct rusage_info_child *ru, struct rusage_info_v2 *ru2); void proc_limitget(proc_t p, int whichi, struct rlimit * limp); void proc_limitdrop(proc_t p, int exiting); void proc_limitfork(proc_t parent, proc_t child); diff --git a/bsd/sys/sdt_impl.h b/bsd/sys/sdt_impl.h index e9531067c..aca7c5a6c 100644 --- a/bsd/sys/sdt_impl.h +++ b/bsd/sys/sdt_impl.h @@ -61,13 +61,6 @@ extern "C" { #include -#if !defined(__APPLE__) -#if defined(__i386) || defined(__amd64) -typedef uint8_t sdt_instr_t; -#else -typedef uint32_t sdt_instr_t; -#endif -#else struct module { int sdt_nprobes; sdt_probedesc_t *sdt_probes; @@ -86,12 +79,11 @@ extern int sdt_probetab_mask; #define SDT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & sdt_probetab_mask) -#if defined(__i386__) || defined(__x86_64__) +#if defined(__x86_64__) typedef uint8_t sdt_instr_t; #else #error Unknown implementation #endif -#endif /* __APPLE__ */ typedef struct sdt_provider { const char *sdtp_name; /* name of provider */ diff --git a/bsd/sys/select.h b/bsd/sys/select.h index 79237174b..a4c33d0c8 100644 --- a/bsd/sys/select.h +++ b/bsd/sys/select.h @@ -72,30 +72,18 @@ * The timespec structure shall be defined as described in * The header shall define the timeval structure. */ -#define __need_fd_set -#define __need_struct_timespec -#define __need_struct_timeval -#include +#include +#include +#include /* * The time_t and suseconds_t types shall be defined as described in * * The sigset_t type shall be defined as described in */ -#ifndef _TIME_T -#define _TIME_T -typedef __darwin_time_t time_t; -#endif - -#ifndef _SUSECONDS_T -#define _SUSECONDS_T -typedef __darwin_suseconds_t suseconds_t; -#endif - -#ifndef _SIGSET_T -#define _SIGSET_T -typedef __darwin_sigset_t sigset_t; -#endif +#include +#include +#include /* * [XSI] FD_CLR, FD_ISSET, FD_SET, FD_ZERO may be declared as a function, or @@ -109,25 +97,14 @@ typedef __darwin_sigset_t sigset_t; * extra protection here is to permit application redefinition above * the default size. */ -#ifndef FD_SETSIZE -#define FD_SETSIZE __DARWIN_FD_SETSIZE -#endif /* FD_SETSIZE */ -#ifndef FD_SET -#define FD_SET(n, p) __DARWIN_FD_SET(n, p) -#endif /* FD_SET */ -#ifndef FD_CLR -#define FD_CLR(n, p) __DARWIN_FD_CLR(n, p) -#endif /* FD_CLR */ -#ifndef FD_ISSET -#define FD_ISSET(n, p) __DARWIN_FD_ISSET(n, p) -#endif /* FD_ISSET */ -#ifndef FD_ZERO -#define FD_ZERO(p) __DARWIN_FD_ZERO(p) -#endif /* FD_ZERO */ +#include +#include +#include +#include +#include + #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#ifndef FD_COPY -#define FD_COPY(f, t) __DARWIN_FD_COPY(f, t) -#endif /* FD_COPY */ +#include #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ #ifdef KERNEL diff --git a/bsd/sys/sem.h b/bsd/sys/sem.h index 35689b2fc..67c6064aa 100644 --- a/bsd/sys/sem.h +++ b/bsd/sys/sem.h @@ -55,20 +55,9 @@ * NOTE: The definition of the key_t type is implicit from the * inclusion of */ -#ifndef _PID_T -typedef __darwin_pid_t pid_t; -#define _PID_T -#endif - -#ifndef _TIME_T -#define _TIME_T -typedef __darwin_time_t time_t; -#endif - -#ifndef _SIZE_T -#define _SIZE_T -typedef __darwin_size_t size_t; -#endif +#include +#include +#include /* * Technically, we should force all code references to the new structure diff --git a/bsd/sys/semaphore.h b/bsd/sys/semaphore.h index 977655231..379c4baaf 100644 --- a/bsd/sys/semaphore.h +++ b/bsd/sys/semaphore.h @@ -63,7 +63,7 @@ __END_DECLS #else /* KERNEL */ void psem_lock_init(void); -void psem_cache_init(void) __attribute__((section("__TEXT, initcode"))); +void psem_cache_init(void); #endif /* KERNEL */ #endif /* _SYS_SEMAPHORE_H_ */ diff --git a/bsd/sys/shm.h b/bsd/sys/shm.h index e7a3a4b58..150261adc 100644 --- a/bsd/sys/shm.h +++ b/bsd/sys/shm.h @@ -81,20 +81,9 @@ * NOTE: The definition of the key_t type is implicit from the * inclusion of */ -#ifndef _PID_T -typedef __darwin_pid_t pid_t; -#define _PID_T -#endif - -#ifndef _TIME_T -#define _TIME_T -typedef __darwin_time_t time_t; -#endif - -#ifndef _SIZE_T -#define _SIZE_T -typedef __darwin_size_t size_t; -#endif +#include +#include +#include /* * [XSI] The unsigned integer type used for the number of current attaches diff --git a/bsd/sys/signal.h b/bsd/sys/signal.h index 0ec175436..49deff231 100644 --- a/bsd/sys/signal.h +++ b/bsd/sys/signal.h @@ -142,35 +142,15 @@ #ifndef _ANSI_SOURCE #include -#define __need_mcontext_t -#define __need_stack_t -#define __need_ucontext_t -#include - -#ifndef _PID_T -#define _PID_T -typedef __darwin_pid_t pid_t; -#endif - -#ifndef _PTHREAD_ATTR_T -#define _PTHREAD_ATTR_T -typedef __darwin_pthread_attr_t pthread_attr_t; -#endif - -#ifndef _SIGSET_T -#define _SIGSET_T -typedef __darwin_sigset_t sigset_t; -#endif +#include +#include +#include -#ifndef _SIZE_T -#define _SIZE_T -typedef __darwin_size_t size_t; -#endif - -#ifndef _UID_T -#define _UID_T -typedef __darwin_uid_t uid_t; -#endif +#include +#include +#include +#include +#include union sigval { /* Members as suggested by Annex C of POSIX 1003.1b. */ @@ -576,7 +556,7 @@ struct sigstack { */ #define sigmask(m) (1 << ((m)-1)) -#ifdef BSD_KERNEL_PRIVATE +#ifdef KERNEL_PRIVATE /* * signals delivered on a per-thread basis. */ @@ -598,7 +578,7 @@ struct sigstack { sigmask(SIGTTIN)|sigmask(SIGTTOU)|\ sigmask(SIGUSR1)|sigmask(SIGUSR2)) -#endif /* BSD_KERNEL_PRIVATE */ +#endif /* KERNEL_PRIVATE */ #define BADSIG SIG_ERR diff --git a/bsd/sys/signalvar.h b/bsd/sys/signalvar.h index 390d1b764..63f37d442 100644 --- a/bsd/sys/signalvar.h +++ b/bsd/sys/signalvar.h @@ -191,7 +191,7 @@ int sigprop[NSIG + 1] = { /* * Machine-independent functions: */ -int coredump(struct proc *p); +int coredump(struct proc *p, uint32_t reserve_mb, int ignore_ulimit); void execsigs(struct proc *p, thread_t thread); void gsignal(int pgid, int sig); int issignal_locked(struct proc *p); @@ -199,7 +199,7 @@ int CURSIG(struct proc *p); int clear_procsiglist(struct proc *p, int bit, int in_signalstart); int set_procsigmask(struct proc *p, int bit); void postsig_locked(int sig); -void siginit(struct proc *p) __attribute__((section("__TEXT, initcode"))); +void siginit(struct proc *p); void trapsignal(struct proc *p, int sig, unsigned code); void pt_setrunnable(struct proc *p); int hassigprop(int sig, int prop); diff --git a/bsd/sys/socket.h b/bsd/sys/socket.h index 204f5363b..4c4ec1865 100644 --- a/bsd/sys/socket.h +++ b/bsd/sys/socket.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -78,9 +78,10 @@ #ifdef PRIVATE #include +#include #endif /* PRIVATE */ -#ifndef KERNEL +#ifndef KERNEL #include #endif @@ -91,53 +92,23 @@ /* * Data types. */ -#ifndef _GID_T -typedef __darwin_gid_t gid_t; -#define _GID_T -#endif - -#ifndef _OFF_T -typedef __darwin_off_t off_t; -#define _OFF_T -#endif -#ifndef _PID_T -typedef __darwin_pid_t pid_t; -#define _PID_T -#endif - -#ifndef _SA_FAMILY_T -#define _SA_FAMILY_T -typedef __uint8_t sa_family_t; -#endif - -#ifndef _SOCKLEN_T -#define _SOCKLEN_T -typedef __darwin_socklen_t socklen_t; -#endif +#include +#include +#include +#include +#include /* XXX Not explicitly defined by POSIX, but function return types are */ -#ifndef _SIZE_T -#define _SIZE_T -typedef __darwin_size_t size_t; -#endif +#include /* XXX Not explicitly defined by POSIX, but function return types are */ -#ifndef _SSIZE_T -#define _SSIZE_T -typedef __darwin_ssize_t ssize_t; -#endif +#include /* * [XSI] The iovec structure shall be defined as described in . */ -#ifndef _STRUCT_IOVEC -#define _STRUCT_IOVEC -struct iovec { - void * iov_base; /* [XSI] Base address of I/O memory region */ - size_t iov_len; /* [XSI] Size of region iov_base points to */ -}; -#endif +#include #ifdef PRIVATE #define SO_TCDBG_PID 0x01 /* Set/get traffic class for PID */ @@ -228,10 +199,12 @@ struct so_tcdbg { #define SO_UPCALLCLOSEWAIT 0x1027 /* APPLE: block on close until an upcall returns */ #endif #define SO_LINGER_SEC 0x1080 /* linger on close if data present (in seconds) */ -#define SO_RESTRICTIONS 0x1081 /* APPLE: deny inbound/outbound/both/flag set */ -#define SO_RESTRICT_DENYIN 0x00000001 /* flag for SO_RESTRICTIONS - deny inbound */ -#define SO_RESTRICT_DENYOUT 0x00000002 /* flag for SO_RESTRICTIONS - deny outbound */ -#define SO_RESTRICT_DENYSET 0x80000000 /* flag for SO_RESTRICTIONS - deny has been set */ +#ifdef PRIVATE +#define SO_RESTRICTIONS 0x1081 /* APPLE: deny flag set */ +#define SO_RESTRICT_DENY_IN 0x1 /* deny inbound (trapdoor) */ +#define SO_RESTRICT_DENY_OUT 0x2 /* deny outbound (trapdoor) */ +#define SO_RESTRICT_DENY_CELLULAR 0x4 /* deny use of cellular (trapdoor) */ +#endif /* PRIVATE */ #define SO_RANDOMPORT 0x1082 /* APPLE: request local port randomization */ #define SO_NP_EXTENSIONS 0x1083 /* To turn off some POSIX behavior */ #endif @@ -340,6 +313,13 @@ struct so_tcdbg { #define SO_TC_ALL (-1) #define SO_RECV_ANYIF 0x1104 /* unrestricted inbound processing */ +#define SO_TRAFFIC_MGT_BACKGROUND 0x1105 /* Background traffic management */ + +#define SO_FLOW_DIVERT_TOKEN 0x1106 /* flow divert token */ + +#define SO_DELEGATED 0x1107 /* set socket as delegate (pid_t) */ +#define SO_DELEGATED_UUID 0x1108 /* set socket as delegate (uuid_t) */ + #endif /* PRIVATE */ #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ @@ -420,10 +400,8 @@ struct so_np_extensions { #define AF_IPX 23 /* Novell Internet Protocol */ #define AF_SIP 24 /* Simple Internet Protocol */ #define pseudo_AF_PIP 25 /* Help Identify PIP packets */ -#ifdef __APPLE__ /*define pseudo_AF_BLUE 26 Identify packets for Blue Box - Not used */ #define AF_NDRV 27 /* Network Driver 'raw' access */ -#endif #define AF_ISDN 28 /* Integrated Services Digital Network*/ #define AF_E164 AF_ISDN /* CCITT E.164 recommendation */ #define pseudo_AF_KEY 29 /* Internal key-management function */ @@ -431,30 +409,22 @@ struct so_np_extensions { #define AF_INET6 30 /* IPv6 */ #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) #define AF_NATM 31 /* native ATM access */ -#ifdef __APPLE__ #define AF_SYSTEM 32 /* Kernel event messages */ #define AF_NETBIOS 33 /* NetBIOS */ #define AF_PPP 34 /* PPP communication protocol */ -#else -#define AF_ATM 30 /* ATM */ -#endif #define pseudo_AF_HDRCMPLT 35 /* Used by BPF to not rewrite headers - * in interface output routine - */ + * in interface output routine */ #ifdef PRIVATE #define AF_AFP 36 /* Used by AFP */ #else #define AF_RESERVED_36 36 /* Reserved for internal usage */ #endif - -#ifndef __APPLE__ -#define AF_NETGRAPH 32 /* Netgraph sockets */ -#endif #define AF_IEEE80211 37 /* IEEE 802.11 protocol */ -#ifdef __APPLE__ #define AF_UTUN 38 -#endif -#define AF_MAX 39 +#ifdef PRIVATE +#define AF_MULTIPATH 39 +#endif /* PRIVATE */ +#define AF_MAX 40 #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ /* @@ -501,6 +471,24 @@ struct sockaddr_storage { char __ss_pad2[_SS_PAD2SIZE]; }; +#ifdef BSD_KERNEL_PRIVATE +#include + +struct sockaddr_entry { + TAILQ_ENTRY(sockaddr_entry) se_link; + struct sockaddr *se_addr; + uint32_t se_flags; +}; + +#define SEF_ATTACHED 1 /* attached to sockaddr_list */ + +struct sockaddr_list { + TAILQ_HEAD(, sockaddr_entry) sl_head; + uint32_t sl_cnt; + +}; +#endif /* BSD_KERNEL_PRIVATE */ + /* * Protocol families, same as address families for now. */ @@ -532,31 +520,23 @@ struct sockaddr_storage { #define PF_IPX AF_IPX /* same format as AF_NS */ #define PF_RTIP pseudo_AF_RTIP /* same format as AF_INET */ #define PF_PIP pseudo_AF_PIP -#ifdef __APPLE__ #define PF_NDRV AF_NDRV -#endif #define PF_ISDN AF_ISDN #define PF_KEY pseudo_AF_KEY #define PF_INET6 AF_INET6 #define PF_NATM AF_NATM -#ifdef __APPLE__ #define PF_SYSTEM AF_SYSTEM #define PF_NETBIOS AF_NETBIOS #define PF_PPP AF_PPP #ifdef PRIVATE -#define PF_AFP AF_AFP +#define PF_AFP AF_AFP #else #define PF_RESERVED_36 AF_RESERVED_36 #endif - -#else -#define PF_ATM AF_ATM -#define PF_NETGRAPH AF_NETGRAPH -#endif - -#ifdef __APPLE__ #define PF_UTUN AF_UTUN -#endif +#ifdef PRIVATE +#define PF_MULTIPATH AF_MULTIPATH +#endif /* PRIVATE */ #define PF_MAX AF_MAX /* @@ -850,6 +830,11 @@ struct cmsgcred { #define SCM_CREDS 0x03 /* process creds (struct cmsgcred) */ #define SCM_TIMESTAMP_MONOTONIC 0x04 /* timestamp (uint64_t) */ +#ifdef PRIVATE +#define SCM_SEQNUM 0x05 /* TCP unordered recv seq no */ +#define SCM_MSG_PRIORITY 0x06 /* TCP unordered snd priority */ +#endif /* PRIVATE */ + #ifdef KERNEL_PRIVATE /* * 4.3 compat sockaddr (deprecated) @@ -923,6 +908,169 @@ struct user32_sf_hdtr { #endif /* !_POSIX_C_SOURCE */ +#ifdef PRIVATE +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +typedef __uint32_t associd_t; +#define ASSOCID_ANY 0 +#define ASSOCID_ALL ((associd_t)(-1ULL)) + +typedef __uint32_t connid_t; +#define CONNID_ANY 0 +#define CONNID_ALL ((connid_t)(-1ULL)) + +/* + * Structure for SIOCGASSOCIDS + */ +struct so_aidreq { + __uint32_t sar_cnt; /* number of associations */ + associd_t *sar_aidp; /* array of association IDs */ +}; + +#ifdef BSD_KERNEL_PRIVATE +struct so_aidreq32 { + __uint32_t sar_cnt; + user32_addr_t sar_aidp; +}; + +struct so_aidreq64 { + __uint32_t sar_cnt; + user64_addr_t sar_aidp __attribute__((aligned(8))); +}; +#endif /* BSD_KERNEL_PRIVATE */ + +/* + * Structure for SIOCGCONNIDS + */ +struct so_cidreq { + associd_t scr_aid; /* association ID */ + __uint32_t scr_cnt; /* number of connections */ + connid_t *scr_cidp; /* array of connection IDs */ +}; + +#ifdef BSD_KERNEL_PRIVATE +struct so_cidreq32 { + associd_t scr_aid; + __uint32_t scr_cnt; + user32_addr_t scr_cidp; +}; + +struct so_cidreq64 { + associd_t scr_aid; + __uint32_t scr_cnt; + user64_addr_t scr_cidp __attribute__((aligned(8))); +}; +#endif /* BSD_KERNEL_PRIVATE */ + +/* + * Structure for SIOCGCONNINFO + */ +struct so_cinforeq { + connid_t scir_cid; /* connection ID */ + __uint32_t scir_flags; /* see flags below */ + __uint32_t scir_ifindex; /* (last) outbound interface */ + __int32_t scir_error; /* most recent error */ + struct sockaddr *scir_src; /* source address */ + socklen_t scir_src_len; /* source address len */ + struct sockaddr *scir_dst; /* destination address */ + socklen_t scir_dst_len; /* destination address len */ + __uint32_t scir_aux_type; /* aux data type (CIAUX) */ + void *scir_aux_data; /* aux data */ + __uint32_t scir_aux_len; /* aux data len */ +}; + +#ifdef BSD_KERNEL_PRIVATE +struct so_cinforeq32 { + connid_t scir_cid; + __uint32_t scir_flags; + __uint32_t scir_ifindex; + __int32_t scir_error; + user32_addr_t scir_src; + socklen_t scir_src_len; + user32_addr_t scir_dst; + socklen_t scir_dst_len; + __uint32_t scir_aux_type; + user32_addr_t scir_aux_data; + __uint32_t scir_aux_len; +}; + +struct so_cinforeq64 { + connid_t scir_cid; + __uint32_t scir_flags; + __uint32_t scir_ifindex; + __int32_t scir_error; + user64_addr_t scir_src __attribute__((aligned(8))); + socklen_t scir_src_len; + user64_addr_t scir_dst __attribute__((aligned(8))); + socklen_t scir_dst_len; + __uint32_t scir_aux_type; + user64_addr_t scir_aux_data __attribute__((aligned(8))); + __uint32_t scir_aux_len; +}; +#endif /* BSD_KERNEL_PRIVATE */ + +/* valid connection info flags */ +#define CIF_CONNECTING 0x1 /* connection was attempted */ +#define CIF_CONNECTED 0x2 /* connection is established */ +#define CIF_DISCONNECTING 0x4 /* disconnection was attempted */ +#define CIF_DISCONNECTED 0x8 /* has been disconnected */ +#define CIF_BOUND_IF 0x10 /* bound to an interface */ +#define CIF_BOUND_IP 0x20 /* bound to a src address */ +#define CIF_BOUND_PORT 0x40 /* bound to a src port */ +#define CIF_PREFERRED 0x80 /* connection is primary/preferred */ +#define CIF_MP_CAPABLE 0x100 /* supports multipath protocol */ +#define CIF_MP_READY 0x200 /* multipath protocol confirmed */ +#define CIF_MP_DEGRADED 0x400 /* has lost its multipath capability */ +#define CIF_MP_ACTIVE 0x800 /* this is the active subflow */ + +/* valid connection info auxiliary data types */ +#define CIAUX_TCP 0x1 /* TCP auxiliary data (conninfo_tcp_t) */ + +/* + * Structure for SIOC{S,G}CONNORDER + */ +struct so_cordreq { + connid_t sco_cid; /* connection ID */ + __uint32_t sco_rank; /* rank (0 means unspecified) */ +}; + +/* + * Network policy subclass (of KEV_NETWORK_CLASS) + */ +#define KEV_NETPOLICY_SUBCLASS 3 + +#define KEV_NETPOLICY_IFDENIED 1 /* denied access to interface */ + +/* + * Common structure for KEV_NETPOLICY_SUBCLASS + */ +struct netpolicy_event_data { + __uint64_t eupid; /* effective unique PID */ + pid_t epid; /* effective PID */ +#if !defined(__LP64__) + __uint32_t pad; +#endif /* __LP64__ */ + uuid_t euuid; /* effective UUID */ +}; + +/* + * NETPOLICY_IFDENIED event structure + */ +struct kev_netpolicy_ifdenied { + struct netpolicy_event_data ev_data; +}; + +#ifndef KERNEL +__BEGIN_DECLS +extern int connectx(int s, struct sockaddr *, socklen_t, struct sockaddr *, + socklen_t, __uint32_t, associd_t, connid_t *); +extern int disconnectx(int s, associd_t, connid_t); +extern int peeloff(int s, associd_t); +extern int socket_delegate(int, int, int, pid_t); +__END_DECLS +#endif /* !KERNEL */ +#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ +#endif /* PRIVATE */ + #ifndef KERNEL __BEGIN_DECLS int accept(int, struct sockaddr * __restrict, socklen_t * __restrict) diff --git a/bsd/sys/socketvar.h b/bsd/sys/socketvar.h index 06e6f5c41..423634cca 100644 --- a/bsd/sys/socketvar.h +++ b/bsd/sys/socketvar.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -73,41 +73,34 @@ #define _SYS_SOCKETVAR_H_ #include +#include +#ifdef KERNEL_PRIVATE #include /* for TAILQ macros */ #include /* for struct selinfo */ #include #include -#include +#include +#endif /* KERNEL_PRIVATE */ -/* - * Hacks to get around compiler complaints - */ +typedef u_quad_t so_gen_t; + +#ifdef KERNEL_PRIVATE struct mbuf; struct socket_filter_entry; struct protosw; struct sockif; struct sockutil; -#ifdef KERNEL_PRIVATE /* strings for sleep message: */ extern char netio[], netcon[], netcls[]; #define SOCKET_CACHE_ON #define SO_CACHE_FLUSH_INTERVAL 1 /* Seconds */ #define SO_CACHE_TIME_LIMIT (120/SO_CACHE_FLUSH_INTERVAL) /* Seconds */ #define SO_CACHE_MAX_FREE_BATCH 50 -#define MAX_CACHED_SOCKETS 60000 +#define MAX_CACHED_SOCKETS 512 #define TEMPDEBUG 0 - -/* - * Kernel structure per socket. - * Contains send and receive buffer queues, - * handle on protocol and pointer to protocol - * private data and error information. - */ #endif /* KERNEL_PRIVATE */ -typedef u_quad_t so_gen_t; - #ifdef PRIVATE #define SO_TC_STATS_MAX 4 @@ -117,14 +110,43 @@ struct data_stats { u_int64_t txpackets; u_int64_t txbytes; }; + +#define MSG_PRI_0 0 /* TCP message priority, lowest */ +#define MSG_PRI_1 1 +#define MSG_PRI_2 2 +#define MSG_PRI_3 3 /* TCP message priority, highest */ +#define MSG_PRI_MAX MSG_PRI_3 +#define MSG_PRI_MIN MSG_PRI_0 +#define MSG_PRI_COUNT 4 +#define MSG_PRI_DEFAULT MSG_PRI_1 #endif /* PRIVATE */ #ifdef KERNEL_PRIVATE -#ifndef __APPLE__ -/* We don't support BSD style socket filters */ -struct accept_filter; -#endif +/* State for TCP message send or receive */ +struct msg_priq { + struct mbuf *msgq_head; /* first mbuf in the queue */ + struct mbuf *msgq_tail; /* last mbuf in the queue */ + struct mbuf *msgq_lastmsg; /* last message in the queue */ + u_int32_t msgq_flags; /* flags per priority queue */ +#define MSGQ_MSG_NOTDONE 0x1 /* set when EOR of a msg is not seen */ + u_int32_t msgq_bytes; /* data bytes in this queue */ +}; + +struct msg_state { + struct msg_priq msg_priq[MSG_PRI_COUNT]; /* priority queues */ + u_int32_t msg_serial_bytes; /* bytes moved to serial queue */ + u_int32_t msg_uno_bytes; /* out-of-order chars in rcv buffer */ +}; + +/* mbuf flag used to indicate out of order data received */ +#define M_UNORDERED_DATA M_PROTO1 +/* + * Kernel structure per socket. + * Contains send and receive buffer queues, + * handle on protocol and pointer to protocol + * private data and error information. + */ struct socket { int so_zone; /* zone we were allocated from */ short so_type; /* generic type, see socket.h */ @@ -155,79 +177,65 @@ struct socket { short so_timeo; /* connection timeout */ u_short so_error; /* error affecting connection */ pid_t so_pgid; /* pgid for signals */ - u_int32_t so_oobmark; /* chars to oob mark */ -#ifndef __APPLE__ - /* We don't support AIO ops */ - TAILQ_HEAD(, aiocblist) so_aiojobq; /* AIO ops waiting on socket */ -#endif + u_int32_t so_oobmark; /* chars to oob mark */ /* * Variables for socket buffering. */ - struct sockbuf { + struct sockbuf { u_int32_t sb_cc; /* actual chars in buffer */ u_int32_t sb_hiwat; /* max actual char count */ u_int32_t sb_mbcnt; /* chars of mbufs used */ u_int32_t sb_mbmax; /* max chars of mbufs to use */ u_int32_t sb_ctl; /* non-data chars in buffer */ u_int32_t sb_lowat; /* low water mark */ - struct mbuf *sb_mb; /* the mbuf chain */ - struct mbuf *sb_mbtail; /* the last mbuf in the chain */ - struct mbuf *sb_lastrecord; /* first mbuf of last record */ -#if __APPLE__ - struct socket *sb_so; /* socket back ptr for kexts */ -#endif - struct selinfo sb_sel; /* process selecting read/write */ - short sb_flags; /* flags, see below */ - struct timeval sb_timeo; /* timeout for read/write */ - u_int32_t sb_idealsize; /* Ideal size for the sb based on bandwidth and delay */ - void *reserved1[4]; /* for future use */ + struct mbuf *sb_mb; /* the mbuf chain */ + struct mbuf *sb_mbtail; /* the last mbuf in the chain */ + struct mbuf *sb_lastrecord; /* first mbuf of last record */ + struct socket *sb_so; /* socket back ptr for kexts */ + struct selinfo sb_sel; /* process selecting rd/wr */ + u_int32_t sb_flags; /* flags, see below */ + struct timeval sb_timeo; /* timeout for read/write */ + u_int32_t sb_idealsize; /* Ideal size for the sb based + on bandwidth and delay */ + void (*sb_upcall)(struct socket *, void *arg, int waitf); + void *sb_upcallarg; /* Arg for above */ + u_int32_t sb_wantlock; /* # of SB_LOCK waiters */ + u_int32_t sb_waiters; /* # of data/space waiters */ + u_int32_t _reserved[2]; /* for future use */ } so_rcv, so_snd; #define SB_MAX (8192*1024) /* default for max chars in sockbuf */ -#define LOW_SB_MAX (2*9*1024) /* lower limit on max socket buffer size, 2 max datagrams */ -#define SB_LOCK 0x01 /* lock on data queue */ -#define SB_WANT 0x02 /* someone is waiting to lock */ -#define SB_WAIT 0x04 /* someone is waiting for data/space */ -#define SB_SEL 0x08 /* someone is selecting */ +#define LOW_SB_MAX (2*9*1024) /* lower limit on max socket buffer + size, 2 max datagrams */ +#define SB_LOCK 0x1 /* lock on data queue */ +#define SB_NOINTR 0x2 /* operations not interruptible */ +#define SB_RECV 0x4 /* this is rcv sb */ +#define SB_SEL 0x8 /* someone is selecting */ #define SB_ASYNC 0x10 /* ASYNC I/O, need signals */ #define SB_UPCALL 0x20 /* someone wants an upcall */ -#define SB_NOINTR 0x40 /* operations not interruptible */ -#define SB_KNOTE 0x100 /* kernel note attached */ +#define SB_KNOTE 0x40 /* kernel note attached */ +#define SB_DROP 0x80 /* does not accept any more data */ +#define SB_UNIX 0x100 /* UNIX domain socket buffer */ #define SB_USRSIZE 0x200 /* user specified sbreserve */ -#ifndef __APPLE__ -#define SB_AIO 0x80 /* AIO operations queued */ -#else -#define SB_NOTIFY (SB_WAIT|SB_SEL|SB_ASYNC) -#define SB_DROP 0x400 /* does not accept any more data */ -#define SB_UNIX 0x800 /* UNIX domain socket buffer */ -#define SB_AUTOSIZE 0x1000 /* automatically size socket buffer */ -#define SB_TRIM 0x2000 /* Trim the socket buffer */ -#define SB_RECV 0x8000 /* this is rcv sb */ - - caddr_t so_tpcb; /* Wisc. protocol control block, used by some kexts */ -#endif +#define SB_AUTOSIZE 0x400 /* automatically size socket buffer */ +#define SB_TRIM 0x800 /* Trim the socket buffer */ +#define SB_NOCOMPRESS 0x1000 /* do not compress socket buffer */ - void (*so_upcall)(struct socket *so, caddr_t arg, int waitf); - caddr_t so_upcallarg; /* Arg for above */ + caddr_t so_tpcb; /* Misc. protocol control block, used + by some kexts */ + + void (*so_event)(struct socket *, void *, u_int32_t); + void *so_eventarg; /* Arg for above */ kauth_cred_t so_cred; /* cred of who opened the socket */ /* NB: generation count must not be first; easiest to make it last. */ so_gen_t so_gencnt; /* generation count */ -#ifndef __APPLE__ - void *so_emuldata; /* private data for emulators */ - struct so_accf { - struct accept_filter *so_accept_filter; - void *so_accept_filter_arg; /* saved filter args */ - char *so_accept_filter_str; /* saved user args */ - } *so_accf; -#else TAILQ_HEAD(, eventqelt) so_evlist; - int cached_in_sock_layer; /* bundled with pcb/pcb.inp_ppcb? */ - struct socket *cache_next; - struct socket *cache_prev; - u_int32_t cache_timestamp; + boolean_t cached_in_sock_layer; /* bundled with inpcb and tcpcb */ + STAILQ_ENTRY(socket) so_cache_ent; /* socache entry */ + u_int32_t cache_timestamp; /* time socket was cached */ caddr_t so_saved_pcb; /* Saved pcb when cacheing */ - struct mbuf *so_temp; /* Holding area for outbound frags */ + struct mbuf *so_temp; /* Holding area for outbound frags */ /* Plug-in support - make the socket interface overridable */ - struct mbuf *so_tail; + struct mbuf *so_tail; struct socket_filter_entry *so_filt; /* NKE hook */ u_int32_t so_flags; /* Flags */ #define SOF_NOSIGPIPE 0x1 @@ -235,75 +243,98 @@ struct socket { #define SOF_PCBCLEARING 0x4 /* pru_disconnect done; don't call pru_detach */ #define SOF_DEFUNCT 0x8 /* socket marked as inactive */ #define SOF_CLOSEWAIT 0x10 /* blocked in close awaiting some events */ -#define SOF_REUSESHAREUID 0x40 /* Allows SO_REUSEADDR/SO_REUSEPORT for multiple so_uid */ +#define SOF_REUSESHAREUID 0x40 /* Allows SO_REUSEADDR/SO_REUSEPORT + for multiple so_uid */ #define SOF_MULTIPAGES 0x80 /* jumbo clusters may be used for sosend */ -#define SOF_ABORTED 0x100 /* soabort was already called once on the socket */ -#define SOF_OVERFLOW 0x200 /* socket was dropped as overflow of listen queue */ +#define SOF_ABORTED 0x100 /* soabort was already called once */ +#define SOF_OVERFLOW 0x200 /* socket was dropped as overflow of listen q */ #ifdef __APPLE_API_PRIVATE -#define SOF_NOTIFYCONFLICT 0x400 /* notify that a bind was done on a port already in use */ +#define SOF_NOTIFYCONFLICT 0x400 /* notify that a bind was done on a + port already in use */ #endif -#define SOF_UPCALLCLOSEWAIT 0x800 /* block on close until an upcall returns */ -#define SOF_BINDRANDOMPORT 0x1000 /* Request a randomized port number for the bind */ -#define SOF_NPX_SETOPTSHUT 0x2000 /* Non POSIX extension to allow setsockopt(2) after shut down */ -#define SOF_RECV_TRAFFIC_CLASS 0x4000 /* Receive traffic class as ancillary data */ -#define SOF_NODEFUNCT 0x8000 /* socket cannot be defunct'd */ +#define SOF_UPCALLCLOSEWAIT 0x800 /* block close until upcall returns */ +#define SOF_BINDRANDOMPORT 0x1000 /* Randomized port number for bind */ +#define SOF_NPX_SETOPTSHUT 0x2000 /* Non POSIX extension to allow + setsockopt(2) after shut down */ +#define SOF_RECV_TRAFFIC_CLASS 0x4000 /* Receive TC as ancillary data */ +#define SOF_NODEFUNCT 0x8000 /* socket cannot be defunct'd */ #define SOF_PRIVILEGED_TRAFFIC_CLASS 0x10000 /* traffic class is privileged */ -#define SOF_SUSPENDED 0x20000 /* interface output queue is suspended */ -#define SOF_INCOMP_INPROGRESS 0x40000 /* incomp socket still being processed */ -#define SOF_NOTSENT_LOWAT 0x80000 /* A different lowat on not sent data has been set */ -#define SOF_KNOTE 0x100000 /* socket is on the EV_SOCK klist */ -#define SOF_USELRO 0x200000 /* TCP must use LRO on these sockets */ - uint32_t so_upcallusecount; /* number of upcalls in progress */ - int so_usecount; /* refcounting of socket use */; - int so_retaincnt; - u_int32_t so_filteruse; /* usecount for the socket filters */ +#define SOF_SUSPENDED 0x20000 /* i/f output queue is suspended */ +#define SOF_INCOMP_INPROGRESS 0x40000 /* incomp socket is being processed */ +#define SOF_NOTSENT_LOWAT 0x80000 /* A different lowat on not sent + data has been set */ +#define SOF_KNOTE 0x100000 /* socket is on the EV_SOCK klist */ +#define SOF_USELRO 0x200000 /* TCP must use LRO on these sockets */ +#define SOF_ENABLE_MSGS 0x400000 /* TCP must enable message delivery */ +#define SOF_FLOW_DIVERT 0x800000 /* Flow Divert is enabled */ +#define SOF_MP_SUBFLOW 0x1000000 /* is a multipath subflow socket */ +#define SOF_MPTCP_TRUE 0x2000000 /* Established e2e MPTCP connection */ +#define SOF_MPTCP_CLIENT 0x4000000 /* Only client starts addtnal flows */ +#define SOF_MP_SEC_SUBFLOW 0x8000000 /* Set up secondary flow */ +#define SOF_MP_TRYFAILOVER 0x10000000 /* Failing subflow */ +#define SOF_DELEGATED 0x20000000 /* on behalf of another process */ + uint32_t so_upcallusecount; /* number of upcalls in progress */ + int so_usecount; /* refcounting of socket use */; + int so_retaincnt; + u_int32_t so_filteruse; /* usecount for the socket filters */ u_int32_t so_traffic_mgt_flags; /* traffic_mgt socket config */ thread_t so_send_filt_thread; u_int32_t so_restrictions; -/* for debug pruposes */ + /* for debug pruposes */ #define SO_LCKDBG_MAX 4 /* number of debug locking Link Registers recorded */ void *lock_lr[SO_LCKDBG_MAX]; /* locking calling history */ int next_lock_lr; void *unlock_lr[SO_LCKDBG_MAX]; /* unlocking caller history */ int next_unlock_lr; void *reserved; /* reserved for future use */ -#endif /* __APPLE__ */ - struct label *so_label; /* MAC label for socket */ - struct label *so_peerlabel; /* cached MAC label for socket peer */ - thread_t so_background_thread; /* thread that marked this socket background */ + struct label *so_label; /* MAC label for socket */ + struct label *so_peerlabel; /* cached MAC label for socket peer */ + thread_t so_background_thread; /* thread that marked + this socket background */ int so_traffic_class; - // last process to interact with this socket - u_int64_t last_upid; - pid_t last_pid; - struct data_stats so_tc_stats[SO_TC_STATS_MAX]; - struct klist so_klist; /* klist for EV_SOCK events */ + u_int64_t last_upid; /* upid of most recent accessor */ + pid_t last_pid; /* pid of most recent accessor */ + struct data_stats so_tc_stats[SO_TC_STATS_MAX]; + struct klist so_klist; /* klist for EV_SOCK events */ + + struct msg_state *so_msg_state; /* unordered snd/rcv state */ + struct flow_divert_pcb *so_fd_pcb; /* Flow Divert control block */ + u_int32_t so_eventmask; /* event mask */ + + u_int64_t e_upid; /* upid of the effective owner */ + pid_t e_pid; /* pid of the effective owner */ + + uuid_t last_uuid; /* uuid of most recent accessor */ + uuid_t e_uuid; /* uuid of effective owner */ + + int32_t so_policy_gencnt; /* UUID policy gencnt */ + u_int32_t so_ifdenied_notifies; /* # of notifications generated */ }; /* Control message accessor in mbufs */ -#define _MIN_NXT_CMSGHDR_PTR(cmsg) \ - ((char *)(cmsg) + \ - __DARWIN_ALIGN32((__uint32_t)(cmsg)->cmsg_len) + \ +#define _MIN_NXT_CMSGHDR_PTR(cmsg) \ + ((char *)(cmsg) + \ + __DARWIN_ALIGN32((__uint32_t)(cmsg)->cmsg_len) + \ __DARWIN_ALIGN32(sizeof(struct cmsghdr))) -#define M_FIRST_CMSGHDR(m) \ - ((char *)(m) != (char *)0L && (size_t)(m)->m_len >= sizeof(struct cmsghdr) && \ - (socklen_t)(m)->m_len >= __DARWIN_ALIGN32(((struct cmsghdr *)(void *)(m)->m_data)->cmsg_len) ?\ - (struct cmsghdr *)(void *)(m)->m_data : \ - (struct cmsghdr *)0L) - -#define M_NXT_CMSGHDR(m, cmsg) \ - ((char *)(cmsg) == (char *)0L ? M_FIRST_CMSGHDR(m) : \ - _MIN_NXT_CMSGHDR_PTR(cmsg) > ((char *)(m)->m_data) + (m)->m_len || \ - _MIN_NXT_CMSGHDR_PTR(cmsg) < (char *)(m)->m_data ? \ - (struct cmsghdr *)0L /* NULL */ : \ - (struct cmsghdr *)(void *)((unsigned char *)(cmsg) + \ - __DARWIN_ALIGN32((__uint32_t)(cmsg)->cmsg_len))) - -#endif /* KERNEL_PRIVATE */ +#define M_FIRST_CMSGHDR(m) \ + ((char *)(m) != (char *)0L && \ + (size_t)(m)->m_len >= sizeof (struct cmsghdr) && \ + (socklen_t)(m)->m_len >= \ + __DARWIN_ALIGN32(((struct cmsghdr *)(void *)(m)->m_data)->cmsg_len) ? \ + (struct cmsghdr *)(void *)(m)->m_data : (struct cmsghdr *)0L) + +#define M_NXT_CMSGHDR(m, cmsg) \ + ((char *)(cmsg) == (char *)0L ? M_FIRST_CMSGHDR(m) : \ + _MIN_NXT_CMSGHDR_PTR(cmsg) > ((char *)(m)->m_data) + (m)->m_len || \ + _MIN_NXT_CMSGHDR_PTR(cmsg) < (char *)(m)->m_data ? \ + (struct cmsghdr *)0L /* NULL */ : \ + (struct cmsghdr *)(void *)((unsigned char *)(cmsg) + \ + __DARWIN_ALIGN32((__uint32_t)(cmsg)->cmsg_len))) /* * Socket state bits. @@ -326,6 +357,7 @@ struct socket { #define SS_DRAINING 0x4000 /* close waiting for blocked system calls to drain */ #define SS_DEFUNCT 0x8000 /* has been fully defunct'd */ +#endif /* KERNEL_PRIVATE */ #if defined(__LP64__) #define _XSOCKET_PTR(x) u_int32_t @@ -355,7 +387,7 @@ struct xsockbuf { * Externalized form of struct socket used by the sysctl(3) interface. */ struct xsocket { - u_int32_t xso_len; /* length of this structure */ + u_int32_t xso_len; /* length of this structure */ _XSOCKET_PTR(struct socket *) xso_so; /* makes a convenient handle */ short so_type; short so_options; @@ -376,11 +408,9 @@ struct xsocket { uid_t so_uid; /* XXX */ }; -#if !CONFIG_EMBEDDED - struct xsocket64 { - u_int32_t xso_len; /* length of this structure */ - u_int64_t xso_so; /* makes a convenient handle */ + u_int32_t xso_len; /* length of this structure */ + u_int64_t xso_so; /* makes a convenient handle */ short so_type; short so_options; short so_linger; @@ -400,10 +430,7 @@ struct xsocket64 { uid_t so_uid; /* XXX */ }; -#endif /* !CONFIG_EMBEDDED */ - #ifdef PRIVATE - #define XSO_SOCKET 0x001 #define XSO_RCVBUF 0x002 #define XSO_SNDBUF 0x004 @@ -412,9 +439,9 @@ struct xsocket64 { #define XSO_TCPCB 0x020 struct xsocket_n { - u_int32_t xso_len; /* length of this structure */ - u_int32_t xso_kind; /* XSO_SOCKET */ - u_int64_t xso_so; /* makes a convenient handle */ + u_int32_t xso_len; /* length of this structure */ + u_int32_t xso_kind; /* XSO_SOCKET */ + u_int64_t xso_so; /* makes a convenient handle */ short so_type; u_int32_t so_options; short so_linger; @@ -433,8 +460,8 @@ struct xsocket_n { }; struct xsockbuf_n { - u_int32_t xsb_len; /* length of this structure */ - u_int32_t xsb_kind; /* XSO_RCVBUF or XSO_SNDBUF */ + u_int32_t xsb_len; /* length of this structure */ + u_int32_t xsb_kind; /* XSO_RCVBUF or XSO_SNDBUF */ u_int32_t sb_cc; u_int32_t sb_hiwat; u_int32_t sb_mbcnt; @@ -445,42 +472,37 @@ struct xsockbuf_n { }; struct xsockstat_n { - u_int32_t xst_len; /* length of this structure */ - u_int32_t xst_kind; /* XSO_STATS */ + u_int32_t xst_len; /* length of this structure */ + u_int32_t xst_kind; /* XSO_STATS */ struct data_stats xst_tc_stats[SO_TC_STATS_MAX]; }; - #endif /* PRIVATE */ #pragma pack() #ifdef KERNEL_PRIVATE -/* - * Macros for sockets and socket buffering. - */ - -#define sbtoso(sb) (sb->sb_so) +#include /* - * Functions for sockets and socket buffering. - * These are macros on FreeBSD. On Darwin the - * implementation is in bsd/kern/uipc_socket2.c + * Argument structure for sosetopt et seq. This is in the KERNEL + * section because it will never be visible to user code. */ +enum sopt_dir { SOPT_GET, SOPT_SET }; +struct sockopt { + enum sopt_dir sopt_dir; /* is this a get or a set? */ + int sopt_level; /* second arg of [gs]etsockopt */ + int sopt_name; /* third arg of [gs]etsockopt */ + user_addr_t sopt_val; /* fourth arg of [gs]etsockopt */ + size_t sopt_valsize; /* (almost) fifth arg of [gs]etsockopt */ + struct proc *sopt_p; /* calling process or null if kernel */ +}; -__BEGIN_DECLS -int sb_notify(struct sockbuf *sb); -int sbspace(struct sockbuf *sb); -int sosendallatonce(struct socket *so); -int soreadable(struct socket *so); -int sowriteable(struct socket *so); -void sballoc(struct sockbuf *sb, struct mbuf *m); -void sbfree(struct sockbuf *sb, struct mbuf *m); -int sblock(struct sockbuf *sb, int wf); -void sbunlock(struct sockbuf *sb, int locked); -void sorwakeup(struct socket *so); -void sowwakeup(struct socket *so); -__END_DECLS +#ifdef MALLOC_DECLARE +MALLOC_DECLARE(M_PCB); +MALLOC_DECLARE(M_SONAME); +#endif /* MALLOC_DECLARE */ +#ifdef BSD_KERNEL_PRIVATE /* * Socket extension mechanism: control block hooks: * This is the "head" of any control block for an extenstion @@ -497,10 +519,6 @@ struct kextcb { struct sockutil *e_sout; /* Sockbuf utility functions */ }; #define EXT_NULL 0x0 /* STATE: Not in use */ -#define sotokextcb(so) (so ? so->so_ext : 0) - -#ifdef KERNEL -#include /* Hints for socket event processing */ #define SO_FILT_HINT_LOCKED 0x00000001 /* socket is already locked */ @@ -509,30 +527,34 @@ struct kextcb { #define SO_FILT_HINT_CANTSENDMORE 0x00000008 /* Can't write more data */ #define SO_FILT_HINT_TIMEOUT 0x00000010 /* timeout */ #define SO_FILT_HINT_NOSRCADDR 0x00000020 /* No src address available */ -#define SO_FILT_HINT_IFDENIED 0x00000040 /* interface denied connection */ +#define SO_FILT_HINT_IFDENIED 0x00000040 /* interface denied access */ #define SO_FILT_HINT_SUSPEND 0x00000080 /* output queue suspended */ #define SO_FILT_HINT_RESUME 0x00000100 /* output queue resumed */ #define SO_FILT_HINT_KEEPALIVE 0x00000200 /* TCP Keepalive received */ - -#define SO_FILT_HINT_EV (SO_FILT_HINT_CONNRESET | \ - SO_FILT_HINT_CANTRCVMORE | SO_FILT_HINT_CANTSENDMORE | \ - SO_FILT_HINT_TIMEOUT | SO_FILT_HINT_NOSRCADDR | \ - SO_FILT_HINT_IFDENIED | SO_FILT_HINT_SUSPEND | \ - SO_FILT_HINT_RESUME | SO_FILT_HINT_KEEPALIVE) - -/* - * Argument structure for sosetopt et seq. This is in the KERNEL - * section because it will never be visible to user code. - */ -enum sopt_dir { SOPT_GET, SOPT_SET }; -struct sockopt { - enum sopt_dir sopt_dir; /* is this a get or a set? */ - int sopt_level; /* second arg of [gs]etsockopt */ - int sopt_name; /* third arg of [gs]etsockopt */ - user_addr_t sopt_val; /* fourth arg of [gs]etsockopt */ - size_t sopt_valsize; /* (almost) fifth arg of [gs]etsockopt */ - struct proc *sopt_p; /* calling process or null if kernel */ -}; +#define SO_FILT_HINT_ADAPTIVE_WTIMO 0x00000400 /* TCP adaptive write timeout */ +#define SO_FILT_HINT_ADAPTIVE_RTIMO 0x00000800 /* TCP adaptive read timeout */ +#define SO_FILT_HINT_CONNECTED 0x00001000 /* socket is connected */ +#define SO_FILT_HINT_DISCONNECTED 0x00002000 /* socket is disconnected */ +#define SO_FILT_HINT_CONNINFO_UPDATED 0x00004000 /* updated conninfo avail. */ +#define SO_FILT_HINT_MPFAILOVER 0x00008000 /* multipath failover */ +#define SO_FILT_HINT_MPSTATUS 0x00010000 /* multipath status */ +#define SO_FILT_HINT_MUSTRST 0x00020000 /* must send RST and close */ + +#define SO_FILT_HINT_BITS \ + "\020\1LOCKED\2CONNRESET\3CANTRCVMORE\4CANTSENDMORE\5TIMEOUT" \ + "\6NOSRCADDR\7IFDENIED\10SUSPEND\11RESUME\12KEEPALIVE\13AWTIMO" \ + "\14ARTIMO\15CONNECTED\16DISCONNECTED\17CONNINFO_UPDATED" \ + "\20MPFAILOVER\21MPSTATUS\22MUSTRST" + +/* Mask for hints that have corresponding kqueue events */ +#define SO_FILT_HINT_EV \ + (SO_FILT_HINT_CONNRESET | SO_FILT_HINT_CANTRCVMORE | \ + SO_FILT_HINT_CANTSENDMORE | SO_FILT_HINT_TIMEOUT | \ + SO_FILT_HINT_NOSRCADDR | SO_FILT_HINT_IFDENIED | \ + SO_FILT_HINT_SUSPEND | SO_FILT_HINT_RESUME | \ + SO_FILT_HINT_KEEPALIVE | SO_FILT_HINT_ADAPTIVE_WTIMO | \ + SO_FILT_HINT_ADAPTIVE_RTIMO | SO_FILT_HINT_CONNECTED | \ + SO_FILT_HINT_DISCONNECTED | SO_FILT_HINT_CONNINFO_UPDATED) #if SENDFILE struct sf_buf { @@ -541,59 +563,30 @@ struct sf_buf { struct vm_page *m; /* currently mapped page */ vm_offset_t kva; /* va of mapping */ }; -#endif - -#ifdef MALLOC_DECLARE -MALLOC_DECLARE(M_PCB); -MALLOC_DECLARE(M_SONAME); -#endif +#endif /* SENDFILE */ -extern int maxsockets; -extern u_int32_t sb_max; -extern int socket_zone; -extern so_gen_t so_gencnt; -extern int socket_debug; -extern int sosendjcl; -extern int sosendjcl_ignore_capab; -extern int sodefunctlog; -extern int sothrottlelog; -extern int somaxconn; - -struct file; -struct filedesc; -struct mbuf; -struct sockaddr; -struct stat; -struct ucred; -struct uio; -struct knote; -struct so_tcdbg; - -#define SBLASTRECORDCHK(sb, s) \ +#define SBLASTRECORDCHK(sb, s) \ if (socket_debug) sblastrecordchk(sb, s); -#define SBLASTMBUFCHK(sb, s) \ +#define SBLASTMBUFCHK(sb, s) \ if (socket_debug) sblastmbufchk(sb, s); -#define SB_EMPTY_FIXUP(sb) { \ - if ((sb)->sb_mb == NULL) { \ - (sb)->sb_mbtail = NULL; \ - (sb)->sb_lastrecord = NULL; \ - } \ +#define SB_EMPTY_FIXUP(sb) { \ + if ((sb)->sb_mb == NULL) { \ + (sb)->sb_mbtail = NULL; \ + (sb)->sb_lastrecord = NULL; \ + } \ } -#define SB_MB_CHECK(sb) do { \ - if (((sb)->sb_mb != NULL && \ - (sb)->sb_cc == 0) || \ - ((sb)->sb_mb == NULL && \ - (sb)->sb_cc > 0)) \ - panic("corrupt so_rcv: sb_mb %p sb_cc %d\n", \ - (sb)->sb_mb, (sb)->sb_cc); \ +#define SB_MB_CHECK(sb) do { \ + if (((sb)->sb_mb != NULL && \ + (sb)->sb_cc == 0) || \ + ((sb)->sb_mb == NULL && (sb)->sb_cc > 0)) \ + panic("corrupt so_rcv: sb_mb %p sb_cc %d\n", \ + (sb)->sb_mb, (sb)->sb_cc); \ } while(0) - #define SODEFUNCTLOG(x) do { if (sodefunctlog) printf x; } while (0) - #define SOTHROTTLELOG(x) do { if (sothrottlelog) printf x; } while (0) /* @@ -607,156 +600,265 @@ struct so_tcdbg; #define SOTCDB_NO_RECVTCPBG 0x20 /* Do not use throttling on receiver-side of TCP */ #define SOTCDB_NO_PRIVILEGED 0x40 /* Do not set privileged traffic flag */ +#define SOCK_DOM(so) ((so)->so_proto->pr_domain->dom_family) +#define SOCK_TYPE(so) ((so)->so_proto->pr_type) +#define SOCK_PROTO(so) ((so)->so_proto->pr_protocol) + +#define SOCK_CHECK_DOM(so, dom) (SOCK_DOM(so) == (dom)) +#define SOCK_CHECK_TYPE(so, type) (SOCK_TYPE(so) == (type)) +#define SOCK_CHECK_PROTO(so, proto) (SOCK_PROTO(so) == (proto)) + +/* + * Socket process information + */ +struct so_procinfo { + pid_t spi_pid; + pid_t spi_epid; +}; + +extern int maxsockets; +extern u_int32_t sb_max; +extern so_gen_t so_gencnt; +extern int socket_debug; +extern int sosendjcl; +extern int sosendjcl_ignore_capab; +extern int sodefunctlog; +extern int sothrottlelog; +extern int sorestrictrecv; +extern int somaxconn; +extern uint32_t tcp_autosndbuf_max; extern u_int32_t sotcdb; +extern u_int32_t net_io_policy_throttled; +extern u_int32_t net_io_policy_log; +#if CONFIG_PROC_UUID_POLICY +extern u_int32_t net_io_policy_uuid; +#endif /* CONFIG_PROC_UUID_POLICY */ +#endif /* BSD_KERNEL_PRIVATE */ + +struct mbuf; +struct sockaddr; +struct ucred; +struct uio; /* * From uipc_socket and friends */ __BEGIN_DECLS -/* Not exported */ -extern void socketinit(void) __attribute__((section("__TEXT, initcode"))); - /* Exported */ +extern int sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, + struct mbuf *m0, struct mbuf *control, int *error_out); +extern int sbappendrecord(struct sockbuf *sb, struct mbuf *m0); +extern void sbflush(struct sockbuf *sb); +extern int sbspace(struct sockbuf *sb); +extern int soabort(struct socket *so); +extern void socantrcvmore(struct socket *so); +extern void socantsendmore(struct socket *so); +extern int sodisconnect(struct socket *so); +extern void sofree(struct socket *so); +extern void sofreelastref(struct socket *, int); +extern void soisconnected(struct socket *so); +extern void soisconnecting(struct socket *so); +extern void soisdisconnected(struct socket *so); +extern void soisdisconnecting(struct socket *so); +extern struct socket *sonewconn(struct socket *head, int connstatus, + const struct sockaddr *from); +extern int sopoll(struct socket *so, int events, struct ucred *cred, void *wql); +extern int sooptcopyin(struct sockopt *sopt, void *data, size_t len, + size_t minlen); +extern int sooptcopyout(struct sockopt *sopt, void *data, size_t len); +extern int soreceive(struct socket *so, struct sockaddr **paddr, + struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp); +extern int soreserve(struct socket *so, u_int32_t sndcc, u_int32_t rcvcc); +extern void sorwakeup(struct socket *so); +extern int sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, + struct mbuf *top, struct mbuf *control, int flags); +__END_DECLS + +#ifdef BSD_KERNEL_PRIVATE +struct file; +struct filedesc; +struct so_tcdbg; + +__BEGIN_DECLS +/* Not exported */ +extern void socketinit(void); extern struct sockaddr *dup_sockaddr(struct sockaddr *sa, int canwait); extern int getsock(struct filedesc *fdp, int fd, struct file **fpp); extern int sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type); +extern void get_sockev_state(struct socket *, u_int32_t *); +extern void so_update_last_owner_locked(struct socket *, struct proc *); +extern void so_update_policy(struct socket *); extern int sbappend(struct sockbuf *sb, struct mbuf *m); extern int sbappendstream(struct sockbuf *sb, struct mbuf *m); -extern int sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, - struct mbuf *m0, struct mbuf *control, int *error_out); extern int sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control, int *error_out); -extern int sbappendrecord(struct sockbuf *sb, struct mbuf *m0); +extern int sbappendmsgstream_rcv(struct sockbuf *sb, struct mbuf *m, + uint32_t seqnum, int flags); +extern int sbappendstream_rcvdemux(struct socket *so, struct mbuf *m, + uint32_t seqnum, int flags); +#if MPTCP +extern int sbappendmptcpstream_rcv(struct sockbuf *sb, struct mbuf *m); +#endif /* MPTCP */ +extern int sbappendmsg_snd(struct sockbuf *sb, struct mbuf *m); +extern void sbpull_unordered_data(struct socket *, int32_t, int32_t); extern void sbcheck(struct sockbuf *sb); extern void sblastmbufchk(struct sockbuf *, const char *); extern void sblastrecordchk(struct sockbuf *, const char *); extern struct mbuf *sbcreatecontrol(caddr_t p, int size, int type, int level); -extern struct mbuf **sbcreatecontrol_mbuf(caddr_t p, int size, int type, int level, struct mbuf** m); +extern struct mbuf **sbcreatecontrol_mbuf(caddr_t p, int size, int type, + int level, struct mbuf** m); extern void sbdrop(struct sockbuf *sb, int len); extern void sbdroprecord(struct sockbuf *sb); -extern void sbflush(struct sockbuf *sb); extern int sbinsertoob(struct sockbuf *sb, struct mbuf *m0); extern void sbrelease(struct sockbuf *sb); extern int sbreserve(struct sockbuf *sb, u_int32_t cc); extern void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb); extern int sbwait(struct sockbuf *sb); -extern int sb_lock(struct sockbuf *sb); +extern void sbwakeup(struct sockbuf *sb); extern void sb_empty_assert(struct sockbuf *, const char *); +extern int sb_notify(struct sockbuf *sb); +extern void sballoc(struct sockbuf *sb, struct mbuf *m); +extern void sbfree(struct sockbuf *sb, struct mbuf *m); +extern void sbfree_chunk(struct sockbuf *sb, struct mbuf *m); + +/* + * Flags to sblock(). + */ +#define SBL_WAIT 0x00000001 /* Wait if not immediately available. */ +#define SBL_NOINTR 0x00000002 /* Force non-interruptible sleep. */ +#define SBL_IGNDEFUNCT 0x00000004 /* Ignore defunct'd state */ +#define SBL_VALID (SBL_WAIT | SBL_NOINTR | SBL_IGNDEFUNCT) +extern int sblock(struct sockbuf *sb, uint32_t flags); +extern void sbunlock(struct sockbuf *sb, boolean_t keeplocked); -extern int soabort(struct socket *so); extern int soaccept(struct socket *so, struct sockaddr **nam); extern int soacceptlock(struct socket *so, struct sockaddr **nam, int dolock); extern int soacceptfilter(struct socket *so); extern struct socket *soalloc(int waitok, int dom, int type); -extern int sobind(struct socket *so, struct sockaddr *nam); -extern void socantrcvmore(struct socket *so); -extern void socantsendmore(struct socket *so); +extern int sobindlock(struct socket *so, struct sockaddr *nam, int dolock); extern int soclose(struct socket *so); +extern int soclose_locked(struct socket *so); +extern void soclose_wait_locked(struct socket *so); extern int soconnect(struct socket *so, struct sockaddr *nam); extern int soconnectlock(struct socket *so, struct sockaddr *nam, int dolock); extern int soconnect2(struct socket *so1, struct socket *so2); +extern int soconnectxlocked(struct socket *so, struct sockaddr_list **src_sl, + struct sockaddr_list **dst_sl, struct proc *, uint32_t, associd_t, + connid_t *, uint32_t, void *, u_int32_t); +extern int sodisconnectx(struct socket *so, associd_t, connid_t); +extern int sodisconnectxlocked(struct socket *so, associd_t, connid_t); +extern int sopeelofflocked(struct socket *so, associd_t, struct socket **); +extern void soevupcall(struct socket *, u_int32_t); +/* flags for socreate_internal */ +#define SOCF_ASYNC 0x1 /* non-blocking socket */ +#define SOCF_MP_SUBFLOW 0x2 /* multipath subflow socket */ +extern int socreate_internal(int dom, struct socket **aso, int type, int proto, + struct proc *, uint32_t, struct proc *); extern int socreate(int dom, struct socket **aso, int type, int proto); +extern int socreate_delegate(int dom, struct socket **aso, int type, int proto, pid_t epid); extern void sodealloc(struct socket *so); -extern int sodisconnect(struct socket *so); extern int sodisconnectlocked(struct socket *so); -extern void sofree(struct socket *so); extern void soreference(struct socket *so); extern void sodereference(struct socket *so); extern void somultipages(struct socket *, boolean_t); extern int sosetdefunct(struct proc *, struct socket *, int level, boolean_t); extern int sodefunct(struct proc *, struct socket *, int level); -extern int sogetopt(struct socket *so, struct sockopt *sopt); extern void sohasoutofband(struct socket *so); -extern void soisconnected(struct socket *so); -extern void soisconnecting(struct socket *so); -extern void soisdisconnected(struct socket *so); extern void sodisconnectwakeup(struct socket *so); -extern void soisdisconnecting(struct socket *so); extern int soisthrottled(struct socket *so); extern int soisprivilegedtraffic(struct socket *so); +extern int soissrcbackground(struct socket *so); extern int solisten(struct socket *so, int backlog); extern struct socket *sodropablereq(struct socket *head); -extern struct socket *sonewconn(struct socket *head, int connstatus, - const struct sockaddr *from); -extern int sooptcopyin(struct sockopt *sopt, void *data, size_t len, - size_t minlen); -extern int sooptcopyout(struct sockopt *sopt, void *data, size_t len); extern int socket_lock(struct socket *so, int refcount); extern int socket_unlock(struct socket *so, int refcount); -extern void sofreelastref(struct socket *, int); extern int sogetaddr_locked(struct socket *, struct sockaddr **, int); extern const char *solockhistory_nr(struct socket *); extern void soevent(struct socket *so, long hint); -extern void get_sockev_state(struct socket *, u_int32_t *); +extern void sorflush(struct socket *so); +extern void sowflush(struct socket *so); +extern void sowakeup(struct socket *so, struct sockbuf *sb); +extern int soioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p); +extern int sogetoptlock(struct socket *so, struct sockopt *sopt, int); +extern int sosetoptlock(struct socket *so, struct sockopt *sopt, int); +extern int soshutdown(struct socket *so, int how); +extern int soshutdownlock(struct socket *so, int how); +extern void sotoxsocket(struct socket *so, struct xsocket *xso); +extern void sotoxsocket64(struct socket *so, struct xsocket64 *xso); +extern int sosendallatonce(struct socket *so); +extern int soreadable(struct socket *so); +extern int sowriteable(struct socket *so); +extern void sowwakeup(struct socket *so); +extern int sosendcheck(struct socket *, struct sockaddr *, user_ssize_t, + int32_t, int32_t, int, int *, struct mbuf *); + +extern int soo_ioctl(struct fileproc *, u_long, caddr_t, vfs_context_t); +extern int soo_stat(struct socket *, void *, int); +extern int soo_select(struct fileproc *, int, void *, vfs_context_t); +extern int soo_kqfilter(struct fileproc *, struct knote *, vfs_context_t); + +extern struct sockaddr_entry *sockaddrentry_alloc(int); +extern void sockaddrentry_free(struct sockaddr_entry *); +extern struct sockaddr_entry *sockaddrentry_dup(const struct sockaddr_entry *, + int); +extern struct sockaddr_list *sockaddrlist_alloc(int); +extern void sockaddrlist_free(struct sockaddr_list *); +extern void sockaddrlist_insert(struct sockaddr_list *, + struct sockaddr_entry *); +extern void sockaddrlist_remove(struct sockaddr_list *, + struct sockaddr_entry *); +extern struct sockaddr_list *sockaddrlist_dup(const struct sockaddr_list *, + int); -#ifdef BSD_KERNEL_PRIVATE /* Service class flags used for setting service class on a packet */ #define PKT_SCF_IPV6 0x00000001 /* IPv6 packet */ #define PKT_SCF_TCP_ACK 0x00000002 /* Pure TCP ACK */ extern void set_packet_service_class(struct mbuf *, struct socket *, mbuf_svc_class_t, u_int32_t); -extern void so_tc_update_stats(struct mbuf *, struct socket *, mbuf_svc_class_t ); +extern void so_tc_update_stats(struct mbuf *, struct socket *, + mbuf_svc_class_t); extern mbuf_svc_class_t mbuf_service_class_from_control(struct mbuf *); extern mbuf_svc_class_t so_tc2msc(int); extern int so_svc2tc(mbuf_svc_class_t); +extern u_int8_t tcp_cansbgrow(struct sockbuf *sb); +extern int tcp_get_msg_priority(struct mbuf *, uint32_t *); extern void set_tcp_stream_priority(struct socket *so); + extern int so_set_traffic_class(struct socket *, int); extern void so_set_default_traffic_class(struct socket *); extern int so_set_opportunistic(struct socket *, int); extern int so_get_opportunistic(struct socket *); extern int so_set_recv_anyif(struct socket *, int); extern int so_get_recv_anyif(struct socket *); +extern int so_set_effective_pid(struct socket *, int, struct proc *); +extern int so_set_effective_uuid(struct socket *, uuid_t, struct proc *); +extern int so_set_restrictions(struct socket *, uint32_t); +extern uint32_t so_get_restrictions(struct socket *); extern void socket_tclass_init(void); extern int so_set_tcdbg(struct socket *, struct so_tcdbg *); extern int sogetopt_tcdbg(struct socket *, struct sockopt *); +extern void so_set_lro(struct socket*, int); + +extern int so_isdstlocal(struct socket *); extern void so_recv_data_stat(struct socket *, struct mbuf *, size_t); extern int so_wait_for_if_feedback(struct socket *); -#endif /* BSD_KERNEL_PRIVATE */ - -/* - * XXX; prepare mbuf for (__FreeBSD__ < 3) routines. - * Used primarily in IPSec and IPv6 code. - */ +extern int msgq_sbspace(struct socket *so, struct mbuf *control); extern int soopt_getm(struct sockopt *sopt, struct mbuf **mp); extern int soopt_mcopyin(struct sockopt *sopt, struct mbuf *m); extern int soopt_mcopyout(struct sockopt *sopt, struct mbuf *m); +extern boolean_t so_cache_timer(void); -extern int sopoll(struct socket *so, int events, struct ucred *cred, void *wql); -extern int soreceive(struct socket *so, struct sockaddr **paddr, - struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp); -extern int soreserve(struct socket *so, u_int32_t sndcc, u_int32_t rcvcc); -extern void sorflush(struct socket *so); -extern int sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, - struct mbuf *top, struct mbuf *control, int flags); - -extern int sosetopt(struct socket *so, struct sockopt *sopt); -extern int soshutdown(struct socket *so, int how); -extern int soshutdownlock(struct socket *so, int how); -extern void sotoxsocket(struct socket *so, struct xsocket *xso); -#if !CONFIG_EMBEDDED -extern void sotoxsocket64(struct socket *so, struct xsocket64 *xso); -#endif -extern void sbwakeup(struct sockbuf *sb); -extern void sowakeup(struct socket *so, struct sockbuf *sb); -extern int soioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p); - -#ifndef __APPLE__ -/* accept filter functions */ -extern int accept_filt_add(struct accept_filter *filt); -extern int accept_filt_del(char *name); -extern struct accept_filter *accept_filt_get(char *name); -#ifdef ACCEPT_FILTER_MOD -extern int accept_filt_generic_mod_event(module_t mod, int event, void *data); -SYSCTL_DECL(_net_inet_accf); -#endif /* ACCEPT_FILTER_MOD */ -#endif /* !defined(__APPLE__) */ +extern void mptcp_preproc_sbdrop(struct mbuf *, unsigned int); +extern void mptcp_postproc_sbdrop(struct mbuf *, u_int64_t, u_int32_t, + u_int32_t); +extern void mptcp_adj_rmap(struct socket *, struct mbuf *); +extern void netpolicy_post_msg(uint32_t, struct netpolicy_event_data *, + uint32_t); __END_DECLS - -#endif /* KERNEL */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* KERNEL_PRIVATE */ - #endif /* !_SYS_SOCKETVAR_H_ */ diff --git a/bsd/sys/sockio.h b/bsd/sys/sockio.h index 98009542c..8de12db1d 100644 --- a/bsd/sys/sockio.h +++ b/bsd/sys/sockio.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -197,21 +197,20 @@ #define SIOCSIFDEVMTU SIOCSIFALTMTU /* deprecated */ #endif /* KERNEL_PRIVATE */ -#ifdef PRIVATE -#ifdef KERNEL +#if !defined(KERNEL) || defined(KERNEL_PRIVATE) #define SIOCIFGCLONERS _IOWR('i', 129, struct if_clonereq) /* get cloners */ +#endif /* !KERNEL || KERNEL_PRIVATE */ +#ifdef KERNEL_PRIVATE #define SIOCIFGCLONERS32 _IOWR('i', 129, struct if_clonereq32) /* get cloners */ #define SIOCIFGCLONERS64 _IOWR('i', 129, struct if_clonereq64) /* get cloners */ -#endif /* KERNEL */ - -#endif /* PRIVATE */ +#endif /* KERNEL_PRIVATE */ #define SIOCGIFASYNCMAP _IOWR('i', 124, struct ifreq) /* get ppp asyncmap */ #define SIOCSIFASYNCMAP _IOW('i', 125, struct ifreq) /* set ppp asyncmap */ #ifdef PRIVATE -#define SIOCSETOT _IOW('s', 128, int) /* set socket for LibOT */ +#define SIOCSETOT _IOW('s', 128, int) /* deprecated */ #endif /* PRIVATE */ #define SIOCGIFMAC _IOWR('i', 130, struct ifreq) /* get IF MAC label */ @@ -237,4 +236,27 @@ #define SIOCGIFTHROTTLE _IOWR('i', 149, struct if_throttlereq) #endif /* PRIVATE */ +#ifdef PRIVATE +#define SIOCGASSOCIDS _IOWR('s', 150, struct so_aidreq) /* get associds */ +#define SIOCGCONNIDS _IOWR('s', 151, struct so_cidreq) /* get connids */ +#define SIOCGCONNINFO _IOWR('s', 152, struct so_cinforeq) /* get conninfo */ +#ifdef BSD_KERNEL_PRIVATE +#define SIOCGASSOCIDS32 _IOWR('s', 150, struct so_aidreq32) +#define SIOCGASSOCIDS64 _IOWR('s', 150, struct so_aidreq64) +#define SIOCGCONNIDS32 _IOWR('s', 151, struct so_cidreq32) +#define SIOCGCONNIDS64 _IOWR('s', 151, struct so_cidreq64) +#define SIOCGCONNINFO32 _IOWR('s', 152, struct so_cinforeq32) +#define SIOCGCONNINFO64 _IOWR('s', 152, struct so_cinforeq64) +#endif /* BSD_KERNEL_PRIVATE */ +#define SIOCSCONNORDER _IOWR('s', 153, struct so_cordreq) /* set conn order */ +#define SIOCGCONNORDER _IOWR('s', 154, struct so_cordreq) /* get conn order */ +#endif /* PRIVATE */ + +#ifdef PRIVATE +#define SIOCSIFLOG _IOWR('i', 155, struct ifreq) +#define SIOCGIFLOG _IOWR('i', 156, struct ifreq) +#define SIOCGIFDELEGATE _IOWR('i', 157, struct ifreq) +#define SIOCGIFLLADDR _IOWR('i', 158, struct ifreq) /* get link level addr */ +#define SIOCGIFTYPE _IOWR('i', 159, struct ifreq) /* get interface type */ +#endif /* PRIVATE */ #endif /* !_SYS_SOCKIO_H_ */ diff --git a/bsd/sys/spawn_internal.h b/bsd/sys/spawn_internal.h index 42f4687f8..c96434f3a 100644 --- a/bsd/sys/spawn_internal.h +++ b/bsd/sys/spawn_internal.h @@ -54,6 +54,7 @@ typedef enum { PSPA_SPECIAL = 0, PSPA_EXCEPTION = 1, PSPA_AU_SESSION = 2, + PSPA_IMP_WATCHPORTS = 3, } pspa_t; /* @@ -87,6 +88,36 @@ typedef struct _posix_spawn_port_actions { #define NBINPREFS 4 +/* + * Mapping of opaque data pointer to a MAC policy (specified by name). + */ +typedef struct _ps_mac_policy_extension { + char policyname[128]; + union { + uint64_t data; + void *datap; /* pointer in kernel memory */ + }; + uint64_t datalen; +} _ps_mac_policy_extension_t; + +/* + * A collection of extra data passed to MAC policies for the newly spawned process. + */ +typedef struct _posix_spawn_mac_policy_extensions { + int psmx_alloc; + int psmx_count; + _ps_mac_policy_extension_t psmx_extensions[]; +} *_posix_spawn_mac_policy_extensions_t; + +/* + * Returns size in bytes of a _posix_spawn_mac_policy_extensions holding x elements. + */ +#define PS_MAC_EXTENSIONS_SIZE(x) \ + __offsetof(struct _posix_spawn_mac_policy_extensions, psmx_extensions[(x)]) + +#define PS_MAC_EXTENSIONS_INIT_COUNT 2 + + /* * A posix_spawnattr structure contains all of the attribute elements that * can be set, as well as any metadata whose validity is signalled by the @@ -95,6 +126,7 @@ typedef struct _posix_spawn_port_actions { */ typedef struct _posix_spawnattr { short psa_flags; /* spawn attribute flags */ + short flags_padding; /* get the flags to be int aligned */ sigset_t psa_sigdefault; /* signal set to default */ sigset_t psa_sigmask; /* signal set to mask */ pid_t psa_pgroup; /* pgroup to spawn into */ @@ -103,51 +135,78 @@ typedef struct _posix_spawnattr { int psa_apptype; /* app type and process spec behav */ uint64_t psa_cpumonitor_percent; /* CPU usage monitor percentage */ uint64_t psa_cpumonitor_interval; /* CPU usage monitor interval, in seconds */ - _posix_spawn_port_actions_t psa_ports; /* special/exception ports */ - /* XXX - k64/u32 unaligned below here */ -#if CONFIG_MEMORYSTATUS || CONFIG_EMBEDDED || TARGET_OS_EMBEDDED - /* Jetsam related */ - short psa_jetsam_flags; /* flags */ - int psa_priority; /* relative importance */ - int psa_high_water_mark; /* resident page count limit */ -#endif + /* + * TODO: cleanup - see . psa_ports is a pointer, + * meaning that the fields following differ in alignment between 32 and + * 64-bit architectures. All pointers (existing and new) should therefore + * be placed at the end; changing this now, however, would currently break + * some legacy dependencies. The radar will be used to track resolution when + * appropriate. + */ + + short psa_jetsam_flags; /* jetsam flags */ + short short_padding; /* Padding for alignment issues */ + int psa_priority; /* jetsam relative importance */ + int psa_high_water_mark; /* jetsam resident page count limit */ + int int_padding; /* Padding for alignment issues */ + /* MAC policy-specific extensions. */ + _posix_spawn_port_actions_t psa_ports; /* special/exception ports */ + _posix_spawn_mac_policy_extensions_t psa_mac_extensions; } *_posix_spawnattr_t; /* * Jetsam flags */ -#if CONFIG_MEMORYSTATUS || CONFIG_EMBEDDED || TARGET_OS_EMBEDDED -#define POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY 0x1 -#endif +#define POSIX_SPAWN_JETSAM_SET 0x8000 + +#define POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY 0x1 +#define POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND 0x2 /* - * DEPRECATED: maintained for transition purposes only - * posix_spawn apptype settings. + * Deprecated posix_spawn psa_flags values + * + * POSIX_SPAWN_OSX_TALAPP_START 0x0400 + * POSIX_SPAWN_IOS_RESV1_APP_START 0x0400 + * POSIX_SPAWN_IOS_APPLE_DAEMON_START 0x0800 + * POSIX_SPAWN_IOS_APP_START 0x1000 + * POSIX_SPAWN_OSX_WIDGET_START 0x0800 + * POSIX_SPAWN_OSX_DBCLIENT_START 0x0800 + * POSIX_SPAWN_OSX_RESVAPP_START 0x1000 */ -#if TARGET_OS_EMBEDDED || CONFIG_EMBEDDED -/* for compat sake */ -#define POSIX_SPAWN_OSX_TALAPP_START 0x0400 -#define POSIX_SPAWN_IOS_RESV1_APP_START 0x0400 -#define POSIX_SPAWN_IOS_APPLE_DAEMON_START 0x0800 /* not a bug, same as widget just rename */ -#define POSIX_SPAWN_IOS_APP_START 0x1000 -#else /* TARGET_OS_EMBEDDED */ -#define POSIX_SPAWN_OSX_TALAPP_START 0x0400 -#define POSIX_SPAWN_OSX_WIDGET_START 0x0800 -#define POSIX_SPAWN_OSX_DBCLIENT_START 0x0800 /* not a bug, same as widget just rename */ -#define POSIX_SPAWN_OSX_RESVAPP_START 0x1000 /* reserved for app start usages */ -#endif /* TARGET_OS_EMBEDDED */ +/* + * Deprecated posix_spawn psa_apptype values + * + * POSIX_SPAWN_PROCESS_TYPE_APPLEDAEMON 0x00000001 + * POSIX_SPAWN_PROCESS_TYPE_UIAPP 0x00000002 + * POSIX_SPAWN_PROCESS_TYPE_ADAPTIVE 0x00000004 + * POSIX_SPAWN_PROCESS_TYPE_TAL 0x00000001 + * POSIX_SPAWN_PROCESS_TYPE_WIDGET 0x00000002 + * POSIX_SPAWN_PROCESS_TYPE_DELAYIDLESLEEP 0x10000000 + * + * POSIX_SPAWN_PROCESS_FLAG_IMPORTANCE_DONOR 0x00000010 + * POSIX_SPAWN_PROCESS_FLAG_ADAPTIVE 0x00000020 + * POSIX_SPAWN_PROCESS_FLAG_START_BACKGROUND 0x00000040 + * POSIX_SPAWN_PROCESS_FLAG_START_LIGHT_THROTTLE 0x00000080 + */ /* - * posix_spawn apptype and process attribute settings. + * posix_spawn psa_apptype process type settings. + * when POSIX_SPAWN_PROC_TYPE is set, old psa_apptype bits are ignored */ -#if TARGET_OS_EMBEDDED || CONFIG_EMBEDDED -#define POSIX_SPAWN_APPTYPE_IOS_APPLEDAEMON 0x0001 /* it is an iOS apple daemon */ -#else /* TARGET_OS_EMBEDDED */ -#define POSIX_SPAWN_APPTYPE_OSX_TAL 0x0001 /* it is a TAL app */ -#define POSIX_SPAWN_APPTYPE_OSX_WIDGET 0x0002 /* it is a widget */ -#define POSIX_SPAWN_APPTYPE_DELAYIDLESLEEP 0x10000000 /* Process is marked to delay idle sleep on disk IO */ -#endif /* TARGET_OS_EMBEDDED */ + +#define POSIX_SPAWN_PROCESS_TYPE_NORMAL 0x00000000 +#define POSIX_SPAWN_PROCESS_TYPE_DEFAULT POSIX_SPAWN_PROCESS_TYPE_NORMAL + +#define POSIX_SPAWN_PROC_TYPE_MASK 0x00000F00 + +#define POSIX_SPAWN_PROC_TYPE_APP_DEFAULT 0x00000100 +#define POSIX_SPAWN_PROC_TYPE_APP_TAL 0x00000200 + +#define POSIX_SPAWN_PROC_TYPE_DAEMON_STANDARD 0x00000300 +#define POSIX_SPAWN_PROC_TYPE_DAEMON_INTERACTIVE 0x00000400 +#define POSIX_SPAWN_PROC_TYPE_DAEMON_BACKGROUND 0x00000500 +#define POSIX_SPAWN_PROC_TYPE_DAEMON_ADAPTIVE 0x00000600 /* * Allowable posix_spawn() file actions @@ -238,6 +297,11 @@ struct _posix_spawn_args_desc { __darwin_size_t port_actions_size; /* size of port actions block */ _posix_spawn_port_actions_t port_actions; /* pointer to port block */ + __darwin_size_t mac_extensions_size; + _posix_spawn_mac_policy_extensions_t + mac_extensions; /* pointer to policy-specific + * attributes */ + }; #ifdef KERNEL @@ -255,6 +319,8 @@ struct user32__posix_spawn_args_desc { uint32_t file_actions; /* pointer to block */ uint32_t port_actions_size; /* size of port actions block */ uint32_t port_actions; /* pointer to block */ + uint32_t mac_extensions_size; + uint32_t mac_extensions; }; struct user__posix_spawn_args_desc { @@ -264,6 +330,8 @@ struct user__posix_spawn_args_desc { user_addr_t file_actions; /* pointer to block */ user_size_t port_actions_size; /* size of port actions block */ user_addr_t port_actions; /* pointer to block */ + user_size_t mac_extensions_size; /* size of MAC-specific attrs. */ + user_addr_t mac_extensions; /* pointer to block */ }; diff --git a/bsd/sys/stat.h b/bsd/sys/stat.h index d5daf6120..2f6dbcbff 100644 --- a/bsd/sys/stat.h +++ b/bsd/sys/stat.h @@ -79,75 +79,32 @@ #endif /* KERNEL */ /* [XSI] The timespec structure may be defined as described in */ -#define __need_struct_timespec +#include #ifdef KERNEL -#define __need_struct_user64_timespec -#define __need_struct_user32_timespec +#include +#include #endif /* KERNEL */ -#include /* * [XSI] The blkcnt_t, blksize_t, dev_t, ino_t, mode_t, nlink_t, uid_t, * gid_t, off_t, and time_t types shall be defined as described in * . */ -#ifndef _BLKCNT_T -typedef __darwin_blkcnt_t blkcnt_t; -#define _BLKCNT_T -#endif - -#ifndef _BLKSIZE_T -typedef __darwin_blksize_t blksize_t; -#define _BLKSIZE_T -#endif - -#ifndef _DEV_T -typedef __darwin_dev_t dev_t; /* device number */ -#define _DEV_T -#endif - -#ifndef _INO_T -typedef __darwin_ino_t ino_t; /* inode number */ -#define _INO_T -#endif +#include +#include +#include /* device number */ +#include #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#ifndef _INO64_T -typedef __darwin_ino64_t ino64_t; /* 64bit inode number */ -#define _INO64_T -#endif +#include #endif /* !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) */ -#ifndef _MODE_T -typedef __darwin_mode_t mode_t; -#define _MODE_T -#endif - -#ifndef _NLINK_T -typedef __uint16_t nlink_t; /* link count */ -#define _NLINK_T -#endif - -#ifndef _UID_T -typedef __darwin_uid_t uid_t; /* user id */ -#define _UID_T -#endif - -#ifndef _GID_T -typedef __darwin_gid_t gid_t; -#define _GID_T -#endif - -#ifndef _OFF_T -typedef __darwin_off_t off_t; -#define _OFF_T -#endif - -#ifndef _TIME_T -#define _TIME_T -typedef __darwin_time_t time_t; -#endif - +#include +#include +#include +#include +#include +#include #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) /* @@ -432,48 +389,7 @@ extern void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp); * [XSI] The following are symbolic names for the values of type mode_t. They * are bitmap values. */ -#ifndef S_IFMT -/* File type */ -#define S_IFMT 0170000 /* [XSI] type of file mask */ -#define S_IFIFO 0010000 /* [XSI] named pipe (fifo) */ -#define S_IFCHR 0020000 /* [XSI] character special */ -#define S_IFDIR 0040000 /* [XSI] directory */ -#define S_IFBLK 0060000 /* [XSI] block special */ -#define S_IFREG 0100000 /* [XSI] regular */ -#define S_IFLNK 0120000 /* [XSI] symbolic link */ -#define S_IFSOCK 0140000 /* [XSI] socket */ -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define S_IFWHT 0160000 /* OBSOLETE: whiteout */ -#endif - -/* File mode */ -/* Read, write, execute/search by owner */ -#define S_IRWXU 0000700 /* [XSI] RWX mask for owner */ -#define S_IRUSR 0000400 /* [XSI] R for owner */ -#define S_IWUSR 0000200 /* [XSI] W for owner */ -#define S_IXUSR 0000100 /* [XSI] X for owner */ -/* Read, write, execute/search by group */ -#define S_IRWXG 0000070 /* [XSI] RWX mask for group */ -#define S_IRGRP 0000040 /* [XSI] R for group */ -#define S_IWGRP 0000020 /* [XSI] W for group */ -#define S_IXGRP 0000010 /* [XSI] X for group */ -/* Read, write, execute/search by others */ -#define S_IRWXO 0000007 /* [XSI] RWX mask for other */ -#define S_IROTH 0000004 /* [XSI] R for other */ -#define S_IWOTH 0000002 /* [XSI] W for other */ -#define S_IXOTH 0000001 /* [XSI] X for other */ - -#define S_ISUID 0004000 /* [XSI] set user id on execution */ -#define S_ISGID 0002000 /* [XSI] set group id on execution */ -#define S_ISVTX 0001000 /* [XSI] directory restrcted delete */ - -#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#define S_ISTXT S_ISVTX /* sticky bit: not supported */ -#define S_IREAD S_IRUSR /* backward compatability */ -#define S_IWRITE S_IWUSR /* backward compatability */ -#define S_IEXEC S_IXUSR /* backward compatability */ -#endif -#endif /* !S_IFMT */ +#include /* * [XSI] The following macros shall be provided to test whether a file is @@ -598,11 +514,9 @@ int mknod(const char *, mode_t, dev_t); mode_t umask(mode_t); #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#ifndef _FILESEC_T -struct _filesec; -typedef struct _filesec *filesec_t; -#define _FILESEC_T -#endif + +#include + int chflags(const char *, __uint32_t); int chmodx_np(const char *, filesec_t); int fchflags(int, __uint32_t); diff --git a/bsd/sys/sys_domain.h b/bsd/sys/sys_domain.h index 981d9f107..d760373e5 100644 --- a/bsd/sys/sys_domain.h +++ b/bsd/sys/sys_domain.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005, 2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -86,6 +86,7 @@ struct kctl ctl_send_func send; /* Send data to nke */ ctl_setopt_func setopt; /* set kctl configuration */ ctl_getopt_func getopt; /* get kctl configuration */ + ctl_rcvd_func rcvd; /* Notify nke when client reads data */ TAILQ_HEAD(, ctl_cb) kcb_head; u_int32_t lastunit; @@ -101,13 +102,12 @@ struct ctl_cb { u_int32_t usecount; }; - -extern struct domain systemdomain; +extern struct domain *systemdomain; /* built in system domain protocols init function */ __BEGIN_DECLS -int kern_event_init(void) __attribute__((section("__TEXT, initcode"))); -int kern_control_init(void) __attribute__((section("__TEXT, initcode"))); +void kern_event_init(struct domain *); +void kern_control_init(struct domain *); __END_DECLS #endif /* KERNEL_PRIVATE */ diff --git a/bsd/sys/sysctl.h b/bsd/sys/sysctl.h index 601ace8b8..b566fe453 100644 --- a/bsd/sys/sysctl.h +++ b/bsd/sys/sysctl.h @@ -299,7 +299,7 @@ __END_DECLS #define SYSCTL_OID(parent, nbr, name, kind, a1, a2, handler, fmt, descr) \ struct sysctl_oid sysctl_##parent##_##name = { \ &sysctl_##parent##_children, { 0 }, \ - nbr, kind|CTLFLAG_OID2, a1, a2, #name, handler, fmt, descr, SYSCTL_OID_VERSION, 0 }; \ + nbr, (int)(kind|CTLFLAG_OID2), a1, (int)(a2), #name, handler, fmt, descr, SYSCTL_OID_VERSION, 0 }; \ SYSCTL_LINKER_SET_ENTRY(__sysctl_set, sysctl_##parent##_##name) /* This constructs a node from which other oids can hang. */ @@ -455,7 +455,7 @@ SYSCTL_DECL(_user); #define KERN_PROCARGS 38 /* 39 was KERN_PCSAMPLES... now deprecated */ #define KERN_NETBOOT 40 /* int: are we netbooted? 1=yes,0=no */ -#define KERN_PANICINFO 41 /* node: panic UI information (deprecated) */ + /* 41 was KERN_PANICINFO : panic UI information (deprecated) */ #define KERN_SYSV 42 /* node: System V IPC information */ #define KERN_AFFINITY 43 /* xxx */ #define KERN_TRANSLATE 44 /* xxx */ @@ -536,7 +536,7 @@ SYSCTL_DECL(_user); #define KERN_KDSETREG 8 #define KERN_KDGETREG 9 #define KERN_KDREADTR 10 -#define KERN_KDPIDTR 11 +#define KERN_KDPIDTR 11 #define KERN_KDTHRMAP 12 /* Don't use 13 as it is overloaded with KERN_VNODE */ #define KERN_KDPIDEX 14 @@ -546,13 +546,10 @@ SYSCTL_DECL(_user); #define KERN_KDWRITEMAP 18 #define KERN_KDENABLE_BG_TRACE 19 #define KERN_KDDISABLE_BG_TRACE 20 +#define KERN_KDREADCURTHRMAP 21 #define KERN_KDSET_TYPEFILTER 22 - -/* KERN_PANICINFO types (deprecated) */ -#define KERN_PANICINFO_MAXSIZE 1 /* quad: panic UI image size limit */ -#define KERN_PANICINFO_IMAGE 2 /* panic UI in 8-bit kraw format */ -#define KERN_PANICINFO_TEST 4 /* Allow the panic UI to be tested by root without causing a panic */ -#define KERN_PANICINFO_NOROOT_TEST 5 /* Allow the noroot UI to be tested by root */ +#define KERN_KDBUFWAIT 23 +#define KERN_KDCPUMAP 24 #define CTL_KERN_NAMES { \ { 0, 0 }, \ @@ -596,7 +593,7 @@ SYSCTL_DECL(_user); { "procargs",CTLTYPE_STRUCT },\ { "dummy", CTLTYPE_INT }, /* deprecated pcsamples */ \ { "netboot", CTLTYPE_INT }, \ - { "panicinfo", CTLTYPE_NODE }, /* deprecated: panicinfo */ \ + { "dummy", CTLTYPE_INT }, /* deprecated: panicinfo */ \ { "sysv", CTLTYPE_NODE }, \ { "dummy", CTLTYPE_INT }, \ { "dummy", CTLTYPE_INT }, \ @@ -1124,7 +1121,7 @@ struct linker_set; void sysctl_register_set(const char *set); void sysctl_unregister_set(const char *set); -void sysctl_mib_init(void) __attribute__((section("__TEXT, initcode"))); +void sysctl_mib_init(void); int kernel_sysctl(struct proc *p, int *name, u_int namelen, void *old, size_t *oldlenp, void *newp, size_t newlen); int userland_sysctl(struct proc *p, int *name, u_int namelen, user_addr_t old, diff --git a/bsd/sys/sysent.h b/bsd/sys/sysent.h index f2560fd79..33c991c14 100644 --- a/bsd/sys/sysent.h +++ b/bsd/sys/sysent.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2007 Apple Inc. All rights reserved. + * Copyright (c) 2004-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -39,13 +39,11 @@ typedef int32_t sy_call_t(struct proc *, void *, int *); typedef void sy_munge_t(const void *, void *); struct sysent { /* system call table */ - int16_t sy_narg; /* number of args */ - int8_t sy_resv; /* reserved */ - int8_t sy_flags; /* flags */ sy_call_t *sy_call; /* implementing function */ sy_munge_t *sy_arg_munge32; /* system call arguments munger for 32-bit process */ sy_munge_t *sy_arg_munge64; /* system call arguments munger for 64-bit process */ int32_t sy_return_type; /* system call return types */ + int16_t sy_narg; /* number of args */ uint16_t sy_arg_bytes; /* Total size of arguments in bytes for * 32-bit system calls */ @@ -56,11 +54,7 @@ extern struct sysent sysent[]; #endif /* __INIT_SYSENT_C__ */ extern int nsysent; -#define NUM_SYSENT 440 /* Current number of defined syscalls */ - -/* sy_funnel flags bits */ -#define FUNNEL_MASK 0x07f -#define UNSAFE_64BIT 0x080 +#define NUM_SYSENT 456 /* Current number of defined syscalls */ /* * Valid values for sy_cancel diff --git a/bsd/sys/systm.h b/bsd/sys/systm.h index 263e9223b..a3dfef0a9 100644 --- a/bsd/sys/systm.h +++ b/bsd/sys/systm.h @@ -192,6 +192,7 @@ void update_last_io_time(mount_t mp); #ifdef KERNEL_PRIVATE void timeout(void (*)(void *), void *arg, int ticks); +void timeout_with_leeway(void (*)(void *), void *arg, int ticks, int leeway_ticks); void untimeout(void (*)(void *), void *arg); int bsd_hostname(char *, int, int*); #endif /* KERNEL_PRIVATE */ @@ -235,9 +236,6 @@ int throttle_info_ref_by_mask(uint64_t throttle_mask, throttle_info_handle_t *th void throttle_info_rel_by_mask(throttle_info_handle_t throttle_info_handle); void throttle_info_update_by_mask(void *throttle_info_handle, int flags); -void throttle_legacy_process_incr(void); -void throttle_legacy_process_decr(void); - /* * 'throttle_info_handle' acquired via 'throttle_info_ref_by_mask' * 'policy' should be specified as either IOPOL_UTILITY or IPOL_THROTTLE, @@ -245,6 +243,18 @@ void throttle_legacy_process_decr(void); */ int throttle_info_io_will_be_throttled(void *throttle_info_handle, int policy); +#ifdef XNU_KERNEL_PRIVATE +void *exec_spawnattr_getmacpolicyinfo(const void *macextensions, const char *policyname, size_t *lenp); +#endif + +#ifdef BSD_KERNEL_PRIVATE + +#define THROTTLE_IO_ENABLE 1 +#define THROTTLE_IO_DISABLE 0 +void sys_override_io_throttle(int flag); + +#endif /* BSD_KERNEL_PRIVATE */ + __END_DECLS #endif /* !_SYS_SYSTM_H_ */ diff --git a/bsd/sys/termios.h b/bsd/sys/termios.h index c1f3b5dac..ac0cacb1e 100644 --- a/bsd/sys/termios.h +++ b/bsd/sys/termios.h @@ -107,9 +107,7 @@ #endif #define NCCS 20 -#ifndef _POSIX_VDISABLE -#define _POSIX_VDISABLE 0xff -#endif +#include #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) #define CCEQ(val, c) ((c) == (val) ? (val) != _POSIX_VDISABLE : 0) diff --git a/bsd/sys/time.h b/bsd/sys/time.h index a32ed62d6..fdf777617 100644 --- a/bsd/sys/time.h +++ b/bsd/sys/time.h @@ -68,6 +68,7 @@ #include #ifdef KERNEL #include /* user_time_t */ +#include /* uint64_t */ #else /* !KERNEL */ #include #endif /* KERNEL */ @@ -76,30 +77,22 @@ * [XSI] The fd_set type shall be defined as described in . * The timespec structure shall be defined as described in */ -#define __need_fd_set -#define __need_struct_timespec -#define __need_struct_timeval +#include +#include +#include #ifdef KERNEL -#define __need_struct_user_timespec -#define __need_struct_user32_timespec -#define __need_struct_user64_timespec -#define __need_struct_user_timeval -#define __need_struct_user32_timeval -#define __need_struct_user64_timeval -#define __need_struct_user32_itimerval -#define __need_struct_user64_itimerval +#include +#include +#include +#include +#include +#include +#include +#include #endif /* KERNEL */ -#include -#ifndef _TIME_T -#define _TIME_T -typedef __darwin_time_t time_t; -#endif - -#ifndef _SUSECONDS_T -#define _SUSECONDS_T -typedef __darwin_suseconds_t suseconds_t; -#endif +#include +#include /* * Structure used as a parameter by getitimer(2) and setitimer(2) system @@ -124,27 +117,15 @@ struct itimerval { * extra protection here is to permit application redefinition above * the default size. */ -#ifndef FD_SETSIZE -#define FD_SETSIZE __DARWIN_FD_SETSIZE -#endif /* FD_SETSIZE */ -#ifndef FD_SET -#define FD_SET(n, p) __DARWIN_FD_SET(n, p) -#endif /* FD_SET */ -#ifndef FD_CLR -#define FD_CLR(n,p) __DARWIN_FD_CLR(n, p) -#endif /* FD_CLR */ -#ifndef FD_ISSET -#define FD_ISSET(n, p) __DARWIN_FD_ISSET(n, p) -#endif /* FD_ISSET */ -#ifndef FD_ZERO -#define FD_ZERO(p) __DARWIN_FD_ZERO(p) -#endif /* FD_ZERO */ +#include +#include +#include +#include +#include #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#ifndef FD_COPY -#define FD_COPY(f, t) __DARWIN_FD_COPY(f, t) -#endif /* FD_COPY */ +#include #define TIMEVAL_TO_TIMESPEC(tv, ts) { \ (ts)->tv_sec = (tv)->tv_sec; \ @@ -212,6 +193,7 @@ struct clockinfo { #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) __BEGIN_DECLS void microtime(struct timeval *tv); +void microtime_with_abstime(struct timeval *tv, uint64_t *abstime); void microuptime(struct timeval *tv); #define getmicrotime(a) microtime(a) #define getmicrouptime(a) microuptime(a) @@ -224,7 +206,9 @@ void timevalsub(struct timeval *t1, struct timeval *t2); void timevalfix(struct timeval *t1); #ifdef BSD_KERNEL_PRIVATE time_t boottime_sec(void); -void inittodr(time_t base) __attribute__((section("__TEXT, initcode"))); +void inittodr(time_t base); +int ratecheck(struct timeval *lasttime, const struct timeval *mininterval); +int ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps); #endif /* BSD_KERNEL_PRIVATE */ __END_DECLS diff --git a/bsd/sys/timeb.h b/bsd/sys/timeb.h index a5c92708a..0debabf8f 100644 --- a/bsd/sys/timeb.h +++ b/bsd/sys/timeb.h @@ -74,10 +74,7 @@ #include /* [XSI] The time_t type shall be defined as described in */ -#ifndef _TIME_T -#define _TIME_T -typedef __darwin_time_t time_t; -#endif +#include /* * [XSI] Structure whose address is passed as the first parameter to ftime() diff --git a/bsd/sys/times.h b/bsd/sys/times.h index d1b014321..eeacb398b 100644 --- a/bsd/sys/times.h +++ b/bsd/sys/times.h @@ -74,10 +74,7 @@ #include /* [XSI] The clock_t type shall be defined as described in */ -#ifndef _CLOCK_T -#define _CLOCK_T -typedef __darwin_clock_t clock_t; -#endif +#include /* * [XSI] Structure whose address is passed as the first parameter to times() diff --git a/bsd/sys/types.h b/bsd/sys/types.h index eec5230dd..6590597cd 100644 --- a/bsd/sys/types.h +++ b/bsd/sys/types.h @@ -99,92 +99,32 @@ typedef quad_t * qaddr_t; typedef char * caddr_t; /* core address */ typedef int32_t daddr_t; /* disk address */ -#ifndef _DEV_T -typedef __darwin_dev_t dev_t; /* device number */ -#define _DEV_T -#endif +#include /* device number */ typedef u_int32_t fixpt_t; /* fixed point number */ -#ifndef _BLKCNT_T -typedef __darwin_blkcnt_t blkcnt_t; -#define _BLKCNT_T -#endif - -#ifndef _BLKSIZE_T -typedef __darwin_blksize_t blksize_t; -#define _BLKSIZE_T -#endif - -#ifndef _GID_T -typedef __darwin_gid_t gid_t; -#define _GID_T -#endif - -#ifndef _IN_ADDR_T -#define _IN_ADDR_T -typedef __uint32_t in_addr_t; /* base type for internet address */ -#endif - -#ifndef _IN_PORT_T -#define _IN_PORT_T -typedef __uint16_t in_port_t; -#endif - -#ifndef _INO_T -typedef __darwin_ino_t ino_t; /* inode number */ -#define _INO_T -#endif +#include +#include +#include +#include +#include +#include #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#ifndef _INO64_T -typedef __darwin_ino64_t ino64_t; /* 64bit inode number */ -#define _INO64_T -#endif +#include /* 64bit inode number */ #endif /* !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) */ -#ifndef _KEY_T -#define _KEY_T -typedef __int32_t key_t; /* IPC key (for Sys V IPC) */ -#endif - -#ifndef _MODE_T -typedef __darwin_mode_t mode_t; -#define _MODE_T -#endif - -#ifndef _NLINK_T -typedef __uint16_t nlink_t; /* link count */ -#define _NLINK_T -#endif - -#ifndef _ID_T -#define _ID_T -typedef __darwin_id_t id_t; /* can hold pid_t, gid_t, or uid_t */ -#endif - -#ifndef _PID_T -typedef __darwin_pid_t pid_t; -#define _PID_T -#endif - -#ifndef _OFF_T -typedef __darwin_off_t off_t; -#define _OFF_T -#endif +#include +#include +#include +#include +#include +#include typedef int32_t segsz_t; /* segment size */ typedef int32_t swblk_t; /* swap offset */ -#ifndef _UID_T -typedef __darwin_uid_t uid_t; /* user id */ -#define _UID_T -#endif - -#ifndef _ID_T -typedef __darwin_id_t id_t; -#define _ID_T -#endif +#include #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) /* Major, minor numbers, dev_t's. */ @@ -218,36 +158,17 @@ static inline dev_t makedev(__uint32_t _major, __uint32_t _minor) #endif /* !__cplusplus */ #endif /* !_POSIX_C_SOURCE */ -#ifndef _CLOCK_T -#define _CLOCK_T -typedef __darwin_clock_t clock_t; -#endif +#include +#include +#include +#include -#ifndef _SIZE_T -#define _SIZE_T -/* DO NOT REMOVE THIS COMMENT: fixincludes needs to see - * _GCC_SIZE_T */ -typedef __darwin_size_t size_t; -#endif +#include +#include -#ifndef _SSIZE_T -#define _SSIZE_T -typedef __darwin_ssize_t ssize_t; -#endif - -#ifndef _TIME_T -#define _TIME_T -typedef __darwin_time_t time_t; -#endif - -#ifndef _USECONDS_T -#define _USECONDS_T -typedef __darwin_useconds_t useconds_t; -#endif - -#ifndef _SUSECONDS_T -#define _SUSECONDS_T -typedef __darwin_suseconds_t suseconds_t; +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +#include +#include #endif #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) @@ -256,8 +177,7 @@ typedef __darwin_suseconds_t suseconds_t; * compatability, and is intended to be removed at some point in the * future; please include instead. */ -#define __need_fd_set -#include +#include #define NBBY __DARWIN_NBBY /* bits in a byte */ #define NFDBITS __DARWIN_NFDBITS /* bits per mask */ @@ -270,25 +190,14 @@ typedef __int32_t fd_mask; * extra protection here is to permit application redefinition above * the default size. */ -#ifndef FD_SETSIZE -#define FD_SETSIZE __DARWIN_FD_SETSIZE -#endif /* FD_SETSIZE */ -#ifndef FD_SET -#define FD_SET(n, p) __DARWIN_FD_SET(n, p) -#endif /* FD_SET */ -#ifndef FD_CLR -#define FD_CLR(n, p) __DARWIN_FD_CLR(n, p) -#endif /* FD_CLR */ -#ifndef FD_ISSET -#define FD_ISSET(n, p) __DARWIN_FD_ISSET(n, p) -#endif /* FD_ISSET */ -#ifndef FD_ZERO -#define FD_ZERO(p) __DARWIN_FD_ZERO(p) -#endif /* FD_ZERO */ +#include +#include +#include +#include +#include + #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) -#ifndef FD_COPY -#define FD_COPY(f, t) __DARWIN_FD_COPY(f, t) -#endif /* FD_COPY */ +#include #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ @@ -313,60 +222,23 @@ struct uio; #ifndef __POSIX_LIB__ -#ifndef _PTHREAD_ATTR_T -#define _PTHREAD_ATTR_T -typedef __darwin_pthread_attr_t pthread_attr_t; -#endif -#ifndef _PTHREAD_COND_T -#define _PTHREAD_COND_T -typedef __darwin_pthread_cond_t pthread_cond_t; -#endif -#ifndef _PTHREAD_CONDATTR_T -#define _PTHREAD_CONDATTR_T -typedef __darwin_pthread_condattr_t pthread_condattr_t; -#endif -#ifndef _PTHREAD_MUTEX_T -#define _PTHREAD_MUTEX_T -typedef __darwin_pthread_mutex_t pthread_mutex_t; -#endif -#ifndef _PTHREAD_MUTEXATTR_T -#define _PTHREAD_MUTEXATTR_T -typedef __darwin_pthread_mutexattr_t pthread_mutexattr_t; -#endif -#ifndef _PTHREAD_ONCE_T -#define _PTHREAD_ONCE_T -typedef __darwin_pthread_once_t pthread_once_t; -#endif -#ifndef _PTHREAD_RWLOCK_T -#define _PTHREAD_RWLOCK_T -typedef __darwin_pthread_rwlock_t pthread_rwlock_t; -#endif -#ifndef _PTHREAD_RWLOCKATTR_T -#define _PTHREAD_RWLOCKATTR_T -typedef __darwin_pthread_rwlockattr_t pthread_rwlockattr_t; -#endif -#ifndef _PTHREAD_T -#define _PTHREAD_T -typedef __darwin_pthread_t pthread_t; -#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include #endif /* __POSIX_LIB__ */ -#ifndef _PTHREAD_KEY_T -#define _PTHREAD_KEY_T -typedef __darwin_pthread_key_t pthread_key_t; -#endif +#include /* statvfs and fstatvfs */ -#ifndef _FSBLKCNT_T -#define _FSBLKCNT_T -typedef __darwin_fsblkcnt_t fsblkcnt_t; -#endif - -#ifndef _FSFILCNT_T -#define _FSFILCNT_T -typedef __darwin_fsfilcnt_t fsfilcnt_t; -#endif +#include +#include #endif /* !_SYS_TYPES_H_ */ diff --git a/bsd/sys/ubc.h b/bsd/sys/ubc.h index 37bcdfcff..2982d5008 100644 --- a/bsd/sys/ubc.h +++ b/bsd/sys/ubc.h @@ -38,7 +38,7 @@ #include #include #include - +#include /* defns for ubc_sync_range() and ubc_msync */ @@ -70,7 +70,9 @@ int ubc_setcred(struct vnode *, struct proc *) __deprecated; /* code signing */ struct cs_blob; struct cs_blob *ubc_cs_blob_get(vnode_t, cpu_type_t, off_t); -int cs_entitlements_blob_get(proc_t p, void **, size_t *); +int cs_entitlements_blob_get(proc_t, void **, size_t *); +int cs_blob_get(proc_t, void **, size_t *); +const char *cs_identity_get(proc_t); #endif /* cluster IO routines */ diff --git a/bsd/sys/ubc_internal.h b/bsd/sys/ubc_internal.h index d7197f089..64a75b853 100644 --- a/bsd/sys/ubc_internal.h +++ b/bsd/sys/ubc_internal.h @@ -101,6 +101,7 @@ struct cs_blob { cpu_type_t csb_cpu_type; unsigned int csb_flags; off_t csb_base_offset; /* Offset of Mach-O binary in fat binary */ + off_t csb_blob_offset; /* offset of blob itself, from csb_base_offset */ off_t csb_start_offset; /* Blob coverage area start, from csb_base_offset */ off_t csb_end_offset; /* Blob coverage area end, from csb_base_offset */ ipc_port_t csb_mem_handle; @@ -108,6 +109,7 @@ struct cs_blob { vm_offset_t csb_mem_offset; vm_address_t csb_mem_kaddr; unsigned char csb_sha1[SHA1_RESULTLEN]; + unsigned int csb_sigpup; }; /* @@ -150,7 +152,7 @@ struct ubc_info { */ __BEGIN_DECLS -__private_extern__ void ubc_init(void) __attribute__((section("__TEXT, initcode")));; +__private_extern__ void ubc_init(void); __private_extern__ int ubc_umount(mount_t mp); __private_extern__ void ubc_unmountall(void); __private_extern__ memory_object_t ubc_getpager(vnode_t); @@ -159,7 +161,7 @@ __private_extern__ void ubc_destroy_named(vnode_t); /* internal only */ __private_extern__ void cluster_release(struct ubc_info *); __private_extern__ uint32_t cluster_max_io_size(mount_t, int); -__private_extern__ uint32_t cluster_hard_throttle_limit(vnode_t, uint32_t *, uint32_t); +__private_extern__ uint32_t cluster_throttle_io_limit(vnode_t, uint32_t *); /* Flags for ubc_getobject() */ @@ -185,7 +187,8 @@ int UBCINFOEXISTS(vnode_t); /* code signing */ struct cs_blob; -int ubc_cs_blob_add(vnode_t, cpu_type_t, off_t, vm_address_t, vm_size_t); +int ubc_cs_blob_add(vnode_t, cpu_type_t, off_t, vm_address_t, off_t, vm_size_t); +int ubc_cs_sigpup_add(vnode_t, vm_address_t, vm_size_t); struct cs_blob *ubc_get_cs_blobs(vnode_t); int ubc_cs_getcdhash(vnode_t, off_t, unsigned char *); kern_return_t ubc_cs_blob_allocate(vm_offset_t *, vm_size_t *); diff --git a/bsd/sys/ucontext.h b/bsd/sys/ucontext.h index 249cf5e23..8240c35bd 100644 --- a/bsd/sys/ucontext.h +++ b/bsd/sys/ucontext.h @@ -32,15 +32,11 @@ #include #include -#define __need_mcontext_t -#define __need_stack_t -#define __need_ucontext_t -#include +#include +#include +#include -#ifndef _SIGSET_T -#define _SIGSET_T -typedef __darwin_sigset_t sigset_t; -#endif +#include #ifdef KERNEL #include /* user_addr_t, user_size_t */ diff --git a/bsd/sys/ucred.h b/bsd/sys/ucred.h index 6d914a4df..7dcd85e18 100644 --- a/bsd/sys/ucred.h +++ b/bsd/sys/ucred.h @@ -148,8 +148,6 @@ struct xucred { __BEGIN_DECLS int crcmp(kauth_cred_t cr1, kauth_cred_t cr2); int suser(kauth_cred_t cred, u_short *acflag); -int is_suser(void); -int is_suser1(void); int set_security_token(struct proc * p); void cru2x(kauth_cred_t cr, struct xucred *xcr); __END_DECLS diff --git a/bsd/sys/uio.h b/bsd/sys/uio.h index 1c48b49cd..84f9f690b 100644 --- a/bsd/sys/uio.h +++ b/bsd/sys/uio.h @@ -71,27 +71,14 @@ * [XSI] The ssize_t and size_t types shall be defined as described * in . */ -#ifndef _SIZE_T -#define _SIZE_T -typedef __darwin_size_t size_t; -#endif - -#ifndef _SSIZE_T -#define _SSIZE_T -typedef __darwin_ssize_t ssize_t; -#endif +#include +#include /* * [XSI] Structure whose address is passed as the second parameter to the * readv() and writev() functions. */ -#ifndef _STRUCT_IOVEC -#define _STRUCT_IOVEC -struct iovec { - void * iov_base; /* [XSI] Base address of I/O memory region */ - size_t iov_len; /* [XSI] Size of region iov_base points to */ -}; -#endif +#include #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) diff --git a/bsd/sys/uio_internal.h b/bsd/sys/uio_internal.h index 470291cf4..959eb501c 100644 --- a/bsd/sys/uio_internal.h +++ b/bsd/sys/uio_internal.h @@ -97,7 +97,7 @@ struct user_iovec; #ifdef XNU_KERNEL_PRIVATE __private_extern__ struct user_iovec * uio_iovsaddr( uio_t a_uio ); -__private_extern__ void uio_calculateresid( uio_t a_uio ); +__private_extern__ int uio_calculateresid( uio_t a_uio ); __private_extern__ void uio_setcurriovlen( uio_t a_uio, user_size_t a_value ); __private_extern__ int uio_spacetype( uio_t a_uio ); __private_extern__ uio_t uio_createwithbuffer( int a_iovcount, off_t a_offset, int a_spacetype, int a_iodirection, void *a_buf_p, size_t a_buffer_size ); diff --git a/bsd/sys/un.h b/bsd/sys/un.h index 9a75d32c3..2f06218cb 100644 --- a/bsd/sys/un.h +++ b/bsd/sys/un.h @@ -68,10 +68,7 @@ #include /* [XSI] The sa_family_t type shall be defined as described in */ -#ifndef _SA_FAMILY_T -#define _SA_FAMILY_T -typedef __uint8_t sa_family_t; -#endif +#include /* * [XSI] Definitions for UNIX IPC domain. @@ -88,8 +85,11 @@ struct sockaddr_un { #define SOL_LOCAL 0 /* Socket options. */ -#define LOCAL_PEERCRED 0x001 /* retrieve peer credentails */ -#define LOCAL_PEERPID 0x002 /* retrieve peer pid */ +#define LOCAL_PEERCRED 0x001 /* retrieve peer credentails */ +#define LOCAL_PEERPID 0x002 /* retrieve peer pid */ +#define LOCAL_PEEREPID 0x003 /* retrieve eff. peer pid */ +#define LOCAL_PEERUUID 0x004 /* retrieve peer UUID */ +#define LOCAL_PEEREUUID 0x005 /* retrieve eff. peer UUID */ #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ @@ -108,7 +108,7 @@ int uipc_ctloutput (struct socket *so, struct sockopt *sopt); int unp_connect2(struct socket *so, struct socket *so2); void unp_dispose(struct mbuf *m); int unp_externalize(struct mbuf *rights); -void unp_init(void) __attribute__((section("__TEXT, initcode"))); +void unp_init(void); extern struct pr_usrreqs uipc_usrreqs; int unp_lock(struct socket *, int, void *); int unp_unlock(struct socket *, int, void *); diff --git a/bsd/sys/unistd.h b/bsd/sys/unistd.h index c778c66f3..88f3cd574 100644 --- a/bsd/sys/unistd.h +++ b/bsd/sys/unistd.h @@ -81,9 +81,7 @@ /* execution-time symbolic constants */ /* may disable terminal special characters */ -#ifndef _POSIX_VDISABLE -#define _POSIX_VDISABLE ((unsigned char)'\377') -#endif +#include #define _POSIX_THREAD_KEYS_MAX 128 @@ -121,11 +119,7 @@ #endif /* whence values for lseek(2) */ -#ifndef SEEK_SET -#define SEEK_SET 0 /* set file offset to offset */ -#define SEEK_CUR 1 /* set file offset to current plus offset */ -#define SEEK_END 2 /* set file offset to EOF plus offset */ -#endif /* !SEEK_SET */ +#include #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) /* whence values for lseek(2); renamed by POSIX 1003.1 */ diff --git a/bsd/sys/unpcb.h b/bsd/sys/unpcb.h index d4d566654..936e99801 100644 --- a/bsd/sys/unpcb.h +++ b/bsd/sys/unpcb.h @@ -203,7 +203,6 @@ struct xunpcb { u_quad_t xu_alignment_hack; }; -#if !CONFIG_EMBEDDED struct xunpcb64_list_entry { u_int64_t le_next; @@ -237,7 +236,6 @@ struct xunpcb64 { struct xsocket64 xu_socket; }; -#endif /* !CONFIG_EMBEDDED */ #pragma pack() diff --git a/bsd/sys/user.h b/bsd/sys/user.h index b5c1106e5..771563663 100644 --- a/bsd/sys/user.h +++ b/bsd/sys/user.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -116,23 +116,19 @@ struct uthread { int uu_rval[2]; /* thread exception handling */ - int uu_exception; mach_exception_code_t uu_code; /* ``code'' to trap */ mach_exception_subcode_t uu_subcode; + int uu_exception; char uu_cursig; /* p_cursig for exc. */ /* support for syscalls which use continuations */ - struct _select { - u_int32_t *ibits, *obits; /* bits to select on */ - uint nbytes; /* number of bytes in ibits and obits */ + union { + struct _select_data { u_int64_t abstime; + char * wql; int poll; int error; int count; - int _reserved1; // UNUSED: avoid changing size for now - char * wql; - } uu_select; /* saved state for select() */ - /* to support kevent continuations */ - union { + } ss_select_data; struct _kqueue_scan { kevent_callback_t call; /* per-event callback */ kqueue_continue_t cont; /* whole call continuation */ @@ -149,49 +145,60 @@ struct uthread { int eventcount; /* user-level event count */ int eventout; /* number of events output */ } ss_kevent; /* saved state for kevent() */ + + struct _kauth { + user_addr_t message; /* message in progress */ + } uu_kauth; + + struct ksyn_waitq_element uu_kwe; /* user for pthread synch */ } uu_kevent; - struct _kauth { - user_addr_t message; /* message in progress */ - } uu_kauth; + + struct _select { + u_int32_t *ibits, *obits; /* bits to select on */ + uint nbytes; /* number of bytes in ibits and obits */ + struct _select_data *data; + } uu_select; /* saved state for select() */ + /* internal support for continuation framework */ int (*uu_continuation)(int); int uu_pri; int uu_timo; caddr_t uu_wchan; /* sleeping thread wait channel */ const char *uu_wmesg; /* ... wait message */ - int uu_flag; struct proc * uu_proc; thread_t uu_thread; void * uu_userstate; wait_queue_set_t uu_wqset; /* cached across select calls */ size_t uu_allocsize; /* ...size of select cache */ + int uu_flag; sigset_t uu_siglist; /* signals pending for the thread */ sigset_t uu_sigwait; /* sigwait on this thread*/ sigset_t uu_sigmask; /* signal mask for the thread */ sigset_t uu_oldmask; /* signal mask saved before sigpause */ - struct vfs_context uu_context; /* thread + cred */ sigset_t uu_vforkmask; /* saved signal mask during vfork */ + struct vfs_context uu_context; /* thread + cred */ TAILQ_ENTRY(uthread) uu_list; /* List of uthreads in proc */ struct kaudit_record *uu_ar; /* audit record */ struct task* uu_aio_task; /* target task for async io */ - u_int32_t uu_network_lock_held; /* network support for pf locking */ lck_mtx_t *uu_mtx; TAILQ_ENTRY(uthread) uu_throttlelist; /* List of uthreads currently throttled */ + void * uu_throttle_info; /* pointer to throttled I/Os info */ int uu_on_throttlelist; int uu_lowpri_window; boolean_t uu_throttle_bc; - void * uu_throttle_info; /* pointer to throttled I/Os info */ + + u_int32_t uu_network_marks; /* network control flow marks */ struct kern_sigaltstack uu_sigstk; - int uu_defer_reclaims; vnode_t uu_vreclaims; - int uu_notrigger; /* XXX - flag for autofs */ vnode_t uu_cdir; /* per thread CWD */ + int uu_notrigger; /* XXX - flag for autofs */ int uu_dupfd; /* fd in fdesc_open/dupfdopen */ + int uu_defer_reclaims; #ifdef JOE_DEBUG int uu_iocount; @@ -200,11 +207,11 @@ struct uthread { void * uu_pcs[32][10]; #endif #if CONFIG_DTRACE - siginfo_t t_dtrace_siginfo; uint32_t t_dtrace_errno; /* Most recent errno */ + siginfo_t t_dtrace_siginfo; + uint64_t t_dtrace_resumepid; /* DTrace's pidresume() pid */ uint8_t t_dtrace_stop; /* indicates a DTrace desired stop */ uint8_t t_dtrace_sig; /* signal sent via DTrace's raise() */ - uint64_t t_dtrace_resumepid; /* DTrace's pidresume() pid */ union __tdu { struct __tds { @@ -241,7 +248,6 @@ struct uthread { #endif /* CONFIG_DTRACE */ void * uu_threadlist; char * pth_name; - struct ksyn_waitq_element uu_kwe; /* user for pthread synch */ struct label * uu_label; /* MAC label */ }; @@ -260,11 +266,12 @@ typedef struct uthread * uthread_t; #define UT_PROCEXIT 0x00000200 /* this thread completed the proc exit */ #define UT_RAGE_VNODES 0x00000400 /* rapid age any vnodes created by this thread */ /* 0x00000800 unused, used to be UT_BACKGROUND */ -#define UT_BACKGROUND_TRAFFIC_MGT 0x00001000 /* background traffic is regulated */ +/* 0x00001000 unused, used to be UT_BACKGROUND_TRAFFIC_MGT */ #define UT_VFORK 0x02000000 /* thread has vfork children */ #define UT_SETUID 0x04000000 /* thread is settugid() */ #define UT_WASSETUID 0x08000000 /* thread was settugid() (in vfork) */ +#define UT_VFORKING 0x10000000 /* thread in vfork() syscall */ #endif /* BSD_KERNEL_PRIVATE */ diff --git a/bsd/sys/vnode.h b/bsd/sys/vnode.h index 1a9343729..8b97538ac 100644 --- a/bsd/sys/vnode.h +++ b/bsd/sys/vnode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -68,6 +68,7 @@ #include #ifdef KERNEL #include +#include #include #endif @@ -149,7 +150,8 @@ enum vtagtype { #define IO_ENCRYPTED 0x20000 /* Retrieve encrypted blocks from the filesystem */ #define IO_RETURN_ON_THROTTLE 0x40000 #define IO_SINGLE_WRITER 0x80000 -#define IO_SYSCALL_DISPATCH 0x100000 /* I/O origin is file table syscall */ +#define IO_SYSCALL_DISPATCH 0x100000 /* I/O was originated from a file table syscall */ +#define IO_SWAP_DISPATCH 0x200000 /* I/O was originated from the swap layer */ /* * Component Name: this structure describes the pathname @@ -683,8 +685,8 @@ extern int vttoif_tab[]; #define REVOKEALL 0x0001 /* vnop_revoke: revoke all aliases */ -/* VNOP_REMOVE/unlink flags: */ -#define VNODE_REMOVE_NODELETEBUSY 0x0001 /* Do not delete busy files (Carbon) */ +/* VNOP_REMOVE/unlink flags */ +#define VNODE_REMOVE_NODELETEBUSY 0x0001 /* Don't delete busy files (Carbon) */ #define VNODE_REMOVE_SKIP_NAMESPACE_EVENT 0x0002 /* Do not upcall to userland handlers */ /* VNOP_READDIR flags: */ @@ -1897,7 +1899,22 @@ int vnode_isshadow(vnode_t); */ vnode_t vnode_parent(vnode_t); void vnode_setparent(vnode_t, vnode_t); -const char * vnode_name(vnode_t); +/*! + @function vnode_getname_printable + @abstract Get a non-null printable name of a vnode. + @Used to make sure a printable name is returned for all vnodes. If a name exists or can be artificially created, the routine creates a new entry in the VFS namecache. Otherwise, the function returns an artificially created vnode name which is safer and easier to use. vnode_putname_printable() should be used to release names obtained by this routine. + @param vp The vnode whose name to grab. + @return The printable name. + */ +const char *vnode_getname_printable(vnode_t vp); + +/*! + @function vnode_putname_printable + @abstract Release a reference on a name from the VFS cache if it was added by the matching vnode_getname_printable() call. + @param name String to release. + @return void. + */ +void vnode_putname_printable(const char *name); void vnode_setname(vnode_t, char *); int vnode_isnoflush(vnode_t); void vnode_setnoflush(vnode_t); diff --git a/bsd/sys/vnode_if.h b/bsd/sys/vnode_if.h index 33ae10047..1c8796d13 100644 --- a/bsd/sys/vnode_if.h +++ b/bsd/sys/vnode_if.h @@ -1051,6 +1051,7 @@ struct vnop_advlock_args { struct flock *a_fl; int a_flags; vfs_context_t a_context; + struct timespec *a_timeout; }; /*! @@ -1069,10 +1070,11 @@ struct vnop_advlock_args { @param flags F_FLOCK: use flock() semantics. F_POSIX: use POSIX semantics. F_WAIT: sleep if necessary. F_PROV: Non-coelesced provisional lock (unused in xnu). @param ctx Context to authenticate for advisory locking request. + @param timeout Timespec for timeout in case of F_SETLKWTIMEOUT. @return 0 for success, or an error code. */ #ifdef XNU_KERNEL_PRIVATE -extern errno_t VNOP_ADVLOCK(vnode_t, caddr_t, int, struct flock *, int, vfs_context_t); +extern errno_t VNOP_ADVLOCK(vnode_t, caddr_t, int, struct flock *, int, vfs_context_t, struct timespec *); #endif /* XNU_KERNEL_PRIVATE */ struct vnop_allocate_args { diff --git a/bsd/sys/vnode_internal.h b/bsd/sys/vnode_internal.h index af27d9513..be2d81c51 100644 --- a/bsd/sys/vnode_internal.h +++ b/bsd/sys/vnode_internal.h @@ -93,14 +93,6 @@ struct label; LIST_HEAD(buflists, buf); -#if CONFIG_VFS_FUNNEL -struct unsafe_fsnode { - lck_mtx_t fsnodelock; - int32_t fsnode_count; - void * fsnodeowner; -}; -#endif /* CONFIG_VFS_FUNNEL */ - #if CONFIG_TRIGGERS /* * VFS Internal (private) trigger vnode resolver info. @@ -137,7 +129,7 @@ struct vnode { TAILQ_ENTRY(vnode) v_freelist; /* vnode freelist */ TAILQ_ENTRY(vnode) v_mntvnodes; /* vnodes for mount point */ LIST_HEAD(, namecache) v_nclinks; /* name cache entries that name this vnode */ - LIST_HEAD(, namecache) v_ncchildren; /* name cache entries that regard us as there parent */ + LIST_HEAD(, namecache) v_ncchildren; /* name cache entries that regard us as their parent */ vnode_t v_defer_reclaimlist; /* in case we have to defer the reclaim to avoid recursion */ uint32_t v_listflag; /* flags protected by the vnode_list_lock (see below) */ uint32_t v_flag; /* vnode flags (see below) */ @@ -178,14 +170,6 @@ struct vnode { const char *v_name; /* name component of the vnode */ vnode_t v_parent; /* pointer to parent vnode */ struct lockf *v_lockf; /* advisory lock list head */ -#if CONFIG_VFS_FUNNEL - struct unsafe_fsnode *v_unsafefs; /* pointer to struct used to lock */ -#else - int32_t v_reserved1; -#ifdef __LP64__ - int32_t v_reserved2; -#endif -#endif /* CONFIG_VFS_FUNNEL */ int (**v_op)(void *); /* vnode operations vector */ mount_t v_mount; /* ptr to vfs we are in */ void * v_data; /* private data for fs */ @@ -274,6 +258,7 @@ struct vnode { #endif #define VOPENEVT 0x800000 /* if process is P_CHECKOPENEVT, then or in the O_EVTONLY flag on open */ #define VNEEDSSNAPSHOT 0x1000000 +#define VNOCS 0x2000000 /* is there no code signature available */ /* * Global vnode data. @@ -374,9 +359,10 @@ extern struct vnodeop_desc *vnodeop_descs[]; struct ostat; -#define BUILDPATH_NO_FS_ENTER 0x1 /* Use cache values, do not enter file system */ -#define BUILDPATH_CHECKACCESS 0x2 /* Check if parents have search rights */ -#define BUILDPATH_CHECK_MOVED 0x4 /* Return EAGAIN if the parent hierarchy is modified */ +#define BUILDPATH_NO_FS_ENTER 0x1 /* Use cache values, do not enter file system */ +#define BUILDPATH_CHECKACCESS 0x2 /* Check if parents have search rights */ +#define BUILDPATH_CHECK_MOVED 0x4 /* Return EAGAIN if the parent hierarchy is modified */ +#define BUILDPATH_VOLUME_RELATIVE 0x8 /* Return path relative to the nearest mount point */ int build_path(vnode_t first_vp, char *buff, int buflen, int *outlen, int flags, vfs_context_t ctx); @@ -444,23 +430,17 @@ int vn_setxattr(vnode_t, const char *, uio_t, int, vfs_context_t); int vn_removexattr(vnode_t, const char *, int, vfs_context_t); int vn_listxattr(vnode_t, uio_t, size_t *, int, vfs_context_t); -int default_getxattr(vnode_t, const char *, uio_t, size_t *, int, vfs_context_t); -int default_setxattr(vnode_t, const char *, uio_t, int, vfs_context_t); -int default_removexattr(vnode_t, const char *, int, vfs_context_t); - -int check_appledouble_header(vnode_t, vfs_context_t); - #if NAMEDSTREAMS errno_t vnode_getnamedstream(vnode_t, vnode_t *, const char *, enum nsoperation, int, vfs_context_t); errno_t vnode_makenamedstream(vnode_t, vnode_t *, const char *, int, vfs_context_t); errno_t vnode_removenamedstream(vnode_t, vnode_t, const char *, int, vfs_context_t); errno_t vnode_flushnamedstream(vnode_t vp, vnode_t svp, vfs_context_t context); -errno_t vnode_relenamedstream(vnode_t vp, vnode_t svp, vfs_context_t context); -errno_t vnode_verifynamedstream (vnode_t vp, vfs_context_t ctx); +errno_t vnode_relenamedstream(vnode_t vp, vnode_t svp); +errno_t vnode_verifynamedstream (vnode_t vp); #endif -void nchinit(void) __attribute__((section("__TEXT, initcode"))); +void nchinit(void); int resize_namecache(uint32_t newsize); void name_cache_lock_shared(void); void name_cache_lock(void); @@ -532,17 +512,17 @@ void vn_clearunionwait(vnode_t, int); void SPECHASH_LOCK(void); void SPECHASH_UNLOCK(void); -void vnode_authorize_init(void) __attribute__((section("__TEXT, initcode"))); +void vnode_authorize_init(void); -void vfsinit(void) __attribute__((section("__TEXT, initcode"))); +void vfsinit(void); void vnode_lock(vnode_t); void vnode_unlock(vnode_t); /* * XXX exported symbols; should be static */ -void vfs_op_init(void) __attribute__((section("__TEXT, initcode"))); -void vfs_opv_init(void) __attribute__((section("__TEXT, initcode"))); +void vfs_op_init(void); +void vfs_opv_init(void); int vfs_sysctl(int *name, uint32_t namelen, user_addr_t oldp, size_t *oldlenp, user_addr_t newp, size_t newlen, struct proc *p); int sysctl_vnode(struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req); diff --git a/bsd/sys/wait.h b/bsd/sys/wait.h index 1de5ce97a..23506faff 100644 --- a/bsd/sys/wait.h +++ b/bsd/sys/wait.h @@ -86,15 +86,8 @@ typedef enum { * [XSI] The id_t and pid_t types shall be defined as described * in */ -#ifndef _PID_T -typedef __darwin_pid_t pid_t; -#define _PID_T -#endif - -#ifndef _ID_T -typedef __darwin_id_t id_t; -#define _ID_T -#endif +#include +#include /* * [XSI] The siginfo_t type shall be defined as described in diff --git a/bsd/sys/xattr.h b/bsd/sys/xattr.h index b4fe1ac99..1e062b615 100644 --- a/bsd/sys/xattr.h +++ b/bsd/sys/xattr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2010 Apple Inc. All rights reserved. + * Copyright (c) 2004-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -56,21 +56,27 @@ #ifdef KERNEL + +#ifdef KERNEL_PRIVATE +#define XATTR_VNODE_SUPPORTED(vp) \ + ((vp)->v_type == VREG || (vp)->v_type == VDIR || (vp)->v_type == VLNK || (vp)->v_type == VSOCK || (vp)->v_type == VFIFO) +#endif + __BEGIN_DECLS int xattr_protected(const char *); int xattr_validatename(const char *); /* Maximum extended attribute size supported by VFS */ -#define XATTR_MAXSIZE (64 * 1024 * 1024) +#define XATTR_MAXSIZE INT32_MAX #ifdef PRIVATE /* Maximum extended attribute size in an Apple Double file */ -#define AD_XATTR_MAXSIZE (128 * 1024) +#define AD_XATTR_MAXSIZE XATTR_MAXSIZE /* Number of bits used to represent the maximum size of * extended attribute stored in an Apple Double file. */ -#define AD_XATTR_SIZE_BITS 18 +#define AD_XATTR_SIZE_BITS 31 #endif /* PRIVATE */ __END_DECLS diff --git a/bsd/uuid/Makefile b/bsd/uuid/Makefile index 1f7f17bfc..10f9cecaa 100644 --- a/bsd/uuid/Makefile +++ b/bsd/uuid/Makefile @@ -7,44 +7,29 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - # In both the framework PrivateHeader area and /usr/include/uuid DATAFILES = \ uuid.h -# Only in the framework PrivateHeader area -PRIVATE_DATAFILES = \ - # KERNELFILES will appear only in the kernel framework KERNELFILES = \ uuid.h -# Only in the private kernel framework -PRIVATE_KERNELFILES = \ - - INSTALL_MI_LIST = ${DATAFILES} INSTALL_MI_DIR = uuid -EXPORT_MI_LIST = ${KERNELFILES} ${PRIVATE_KERNELFILES} +EXPORT_MI_LIST = ${KERNELFILES} EXPORT_MI_DIR = uuid # /System/Library/Frameworks/System.framework/PrivateHeaders -INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} +INSTALL_MI_LCL_LIST = ${DATAFILES} # /System/Library/Frameworks/Kernel.framework/PrivateHeaders -INSTALL_KF_MI_LCL_LIST = ${KERNELFILES} ${PRIVATE_KERNELFILES} +INSTALL_KF_MI_LCL_LIST = ${KERNELFILES} # /System/Library/Frameworks/Kernel.framework/Headers diff --git a/bsd/uuid/uuid.h b/bsd/uuid/uuid.h index be1f90fe2..52602867e 100644 --- a/bsd/uuid/uuid.h +++ b/bsd/uuid/uuid.h @@ -36,11 +36,7 @@ #define _UUID_UUID_H #include - -#ifndef _UUID_T -#define _UUID_T -typedef __darwin_uuid_t uuid_t; -#endif /* _UUID_T */ +#include #ifndef _UUID_STRING_T #define _UUID_STRING_T diff --git a/bsd/vfs/Makefile b/bsd/vfs/Makefile index b9ddbedcc..68e740b9a 100644 --- a/bsd/vfs/Makefile +++ b/bsd/vfs/Makefile @@ -7,14 +7,6 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - DATAFILES = \ vfs_support.h @@ -26,7 +18,6 @@ EXPORT_MI_LIST = ${DATAFILES} EXPORT_MI_DIR = vfs - include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/vfs/kpi_vfs.c b/bsd/vfs/kpi_vfs.c index 2e200b08c..200cb51c4 100644 --- a/bsd/vfs/kpi_vfs.c +++ b/bsd/vfs/kpi_vfs.c @@ -120,25 +120,23 @@ #include #endif +#include + #define ESUCCESS 0 #undef mount_t #undef vnode_t #define COMPAT_ONLY - -#if CONFIG_VFS_FUNNEL -#define THREAD_SAFE_FS(VP) \ - ((VP)->v_unsafefs ? 0 : 1) -#endif /* CONFIG_VFS_FUNNEL */ - #define NATIVE_XATTR(VP) \ ((VP)->v_mount ? (VP)->v_mount->mnt_kern_flag & MNTK_EXTENDED_ATTRS : 0) +#if CONFIG_APPLEDOUBLE static void xattrfile_remove(vnode_t dvp, const char *basename, vfs_context_t ctx, int force); static void xattrfile_setattr(vnode_t dvp, const char * basename, struct vnode_attr * vap, vfs_context_t ctx); +#endif /* CONFIG_APPLEDOUBLE */ /* * vnode_setneedinactive @@ -166,50 +164,6 @@ vnode_setneedinactive(vnode_t vp) } -#if CONFIG_VFS_FUNNEL -int -lock_fsnode(vnode_t vp, int *funnel_state) -{ - if (funnel_state) - *funnel_state = thread_funnel_set(kernel_flock, TRUE); - - if (vp->v_unsafefs) { - if (vp->v_unsafefs->fsnodeowner == current_thread()) { - vp->v_unsafefs->fsnode_count++; - } else { - lck_mtx_lock(&vp->v_unsafefs->fsnodelock); - - if (vp->v_lflag & (VL_TERMWANT | VL_TERMINATE | VL_DEAD)) { - lck_mtx_unlock(&vp->v_unsafefs->fsnodelock); - - if (funnel_state) - (void) thread_funnel_set(kernel_flock, *funnel_state); - return (ENOENT); - } - vp->v_unsafefs->fsnodeowner = current_thread(); - vp->v_unsafefs->fsnode_count = 1; - } - } - return (0); -} - - -void -unlock_fsnode(vnode_t vp, int *funnel_state) -{ - if (vp->v_unsafefs) { - if (--vp->v_unsafefs->fsnode_count == 0) { - vp->v_unsafefs->fsnodeowner = NULL; - lck_mtx_unlock(&vp->v_unsafefs->fsnodelock); - } - } - if (funnel_state) - (void) thread_funnel_set(kernel_flock, *funnel_state); -} -#endif /* CONFIG_VFS_FUNNEL */ - - - /* ====================================================================== */ /* ************ EXTERNAL KERNEL APIS ********************************** */ /* ====================================================================== */ @@ -221,21 +175,10 @@ int VFS_MOUNT(mount_t mp, vnode_t devvp, user_addr_t data, vfs_context_t ctx) { int error; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ if ((mp == dead_mountp) || (mp->mnt_op->vfs_mount == 0)) return(ENOTSUP); -#if CONFIG_VFS_FUNNEL - thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } -#endif /* CONFIG_VFS_FUNNEL */ - if (vfs_context_is64bit(ctx)) { if (vfs_64bitready(mp)) { error = (*mp->mnt_op->vfs_mount)(mp, devvp, data, ctx); @@ -248,12 +191,6 @@ VFS_MOUNT(mount_t mp, vnode_t devvp, user_addr_t data, vfs_context_t ctx) error = (*mp->mnt_op->vfs_mount)(mp, devvp, data, ctx); } -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ - return (error); } @@ -261,29 +198,12 @@ int VFS_START(mount_t mp, int flags, vfs_context_t ctx) { int error; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ if ((mp == dead_mountp) || (mp->mnt_op->vfs_start == 0)) return(ENOTSUP); -#if CONFIG_VFS_FUNNEL - thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } -#endif /* CONFIG_VFS_FUNNEL */ - error = (*mp->mnt_op->vfs_start)(mp, flags, ctx); -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ - return (error); } @@ -291,29 +211,12 @@ int VFS_UNMOUNT(mount_t mp, int flags, vfs_context_t ctx) { int error; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ if ((mp == dead_mountp) || (mp->mnt_op->vfs_unmount == 0)) return(ENOTSUP); -#if CONFIG_VFS_FUNNEL - thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } -#endif /* CONFIG_VFS_FUNNEL */ - error = (*mp->mnt_op->vfs_unmount)(mp, flags, ctx); -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ - return (error); } @@ -338,10 +241,6 @@ int VFS_ROOT(mount_t mp, struct vnode ** vpp, vfs_context_t ctx) { int error; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ if ((mp == dead_mountp) || (mp->mnt_op->vfs_root == 0)) return(ENOTSUP); @@ -350,21 +249,8 @@ VFS_ROOT(mount_t mp, struct vnode ** vpp, vfs_context_t ctx) ctx = vfs_context_current(); } -#if CONFIG_VFS_FUNNEL - thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } -#endif /* CONFIG_VFS_FUNNEL */ - error = (*mp->mnt_op->vfs_root)(mp, vpp, ctx); -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ - return (error); } @@ -372,29 +258,12 @@ int VFS_QUOTACTL(mount_t mp, int cmd, uid_t uid, caddr_t datap, vfs_context_t ctx) { int error; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ if ((mp == dead_mountp) || (mp->mnt_op->vfs_quotactl == 0)) return(ENOTSUP); -#if CONFIG_VFS_FUNNEL - thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } -#endif /* CONFIG_VFS_FUNNEL */ - error = (*mp->mnt_op->vfs_quotactl)(mp, cmd, uid, datap, ctx); -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ - return (error); } @@ -402,10 +271,6 @@ int VFS_GETATTR(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx) { int error; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ if ((mp == dead_mountp) || (mp->mnt_op->vfs_getattr == 0)) return(ENOTSUP); @@ -414,21 +279,8 @@ VFS_GETATTR(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx) ctx = vfs_context_current(); } -#if CONFIG_VFS_FUNNEL - thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } -#endif /* CONFIG_VFS_FUNNEL */ - error = (*mp->mnt_op->vfs_getattr)(mp, vfa, ctx); -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ - return(error); } @@ -436,10 +288,6 @@ int VFS_SETATTR(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx) { int error; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ if ((mp == dead_mountp) || (mp->mnt_op->vfs_setattr == 0)) return(ENOTSUP); @@ -448,21 +296,8 @@ VFS_SETATTR(mount_t mp, struct vfs_attr *vfa, vfs_context_t ctx) ctx = vfs_context_current(); } -#if CONFIG_VFS_FUNNEL - thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } -#endif /* CONFIG_VFS_FUNNEL */ - error = (*mp->mnt_op->vfs_setattr)(mp, vfa, ctx); -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ - return(error); } @@ -470,10 +305,6 @@ int VFS_SYNC(mount_t mp, int flags, vfs_context_t ctx) { int error; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ if ((mp == dead_mountp) || (mp->mnt_op->vfs_sync == 0)) return(ENOTSUP); @@ -482,21 +313,8 @@ VFS_SYNC(mount_t mp, int flags, vfs_context_t ctx) ctx = vfs_context_current(); } -#if CONFIG_VFS_FUNNEL - thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } -#endif /* CONFIG_VFS_FUNNEL */ - error = (*mp->mnt_op->vfs_sync)(mp, flags, ctx); -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ - return(error); } @@ -504,10 +322,6 @@ int VFS_VGET(mount_t mp, ino64_t ino, struct vnode **vpp, vfs_context_t ctx) { int error; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ if ((mp == dead_mountp) || (mp->mnt_op->vfs_vget == 0)) return(ENOTSUP); @@ -516,21 +330,8 @@ VFS_VGET(mount_t mp, ino64_t ino, struct vnode **vpp, vfs_context_t ctx) ctx = vfs_context_current(); } -#if CONFIG_VFS_FUNNEL - thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } -#endif /* CONFIG_VFS_FUNNEL */ - error = (*mp->mnt_op->vfs_vget)(mp, ino, vpp, ctx); -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ - return(error); } @@ -538,10 +339,6 @@ int VFS_FHTOVP(mount_t mp, int fhlen, unsigned char * fhp, vnode_t * vpp, vfs_context_t ctx) { int error; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ if ((mp == dead_mountp) || (mp->mnt_op->vfs_fhtovp == 0)) return(ENOTSUP); @@ -550,21 +347,8 @@ VFS_FHTOVP(mount_t mp, int fhlen, unsigned char * fhp, vnode_t * vpp, vfs_contex ctx = vfs_context_current(); } -#if CONFIG_VFS_FUNNEL - thread_safe = (mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } -#endif /* CONFIG_VFS_FUNNEL */ - error = (*mp->mnt_op->vfs_fhtovp)(mp, fhlen, fhp, vpp, ctx); -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ - return(error); } @@ -572,10 +356,6 @@ int VFS_VPTOFH(struct vnode * vp, int *fhlenp, unsigned char * fhp, vfs_context_t ctx) { int error; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ if ((vp->v_mount == dead_mountp) || (vp->v_mount->mnt_op->vfs_vptofh == 0)) return(ENOTSUP); @@ -584,21 +364,8 @@ VFS_VPTOFH(struct vnode * vp, int *fhlenp, unsigned char * fhp, vfs_context_t ct ctx = vfs_context_current(); } -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } -#endif /* CONFIG_VFS_FUNNEL */ - error = (*vp->v_mount->mnt_op->vfs_vptofh)(vp, fhlenp, fhp, ctx); -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ - return(error); } @@ -766,17 +533,6 @@ vfs_clearauthcache_ttl(mount_t mp) mount_unlock(mp); } -void -vfs_markdependency(mount_t mp) -{ - proc_t p = current_proc(); - mount_lock(mp); - mp->mnt_dependent_process = p; - mp->mnt_dependent_pid = proc_pid(p); - mount_unlock(mp); -} - - int vfs_authopaque(mount_t mp) { @@ -923,7 +679,12 @@ vfs_setfsprivate(mount_t mp, void *mntdata) mount_unlock(mp); } - +/* query whether the mount point supports native EAs */ +int +vfs_nativexattrs(mount_t mp) { + return (mp->mnt_kern_flag & MNTK_EXTENDED_ATTRS); +} + /* * return the block size of the underlying * device associated with mount_t @@ -1050,12 +811,10 @@ vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle) || (vfe->vfe_opvdescs == (struct vnodeopv_desc **)NULL)) return(EINVAL); -#if !CONFIG_VFS_FUNNEL - /* Non-threadsafe filesystems are not supported e.g. on K64 & iOS */ + /* Non-threadsafe filesystems are not supported */ if ((vfe->vfe_flags & (VFS_TBLTHREADSAFE | VFS_TBLFSNODELOCK)) == 0) { return (EINVAL); } -#endif /* !CONFIG_VFS_FUNNEL */ MALLOC(newvfstbl, void *, sizeof(struct vfstable), M_TEMP, M_WAITOK); @@ -1078,12 +837,6 @@ vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle) newvfstbl->vfc_vfsflags |= VFC_VFSVNOP_PAGEINV2; if (vfe->vfe_flags & VFS_TBLVNOP_PAGEOUTV2) newvfstbl->vfc_vfsflags |= VFC_VFSVNOP_PAGEOUTV2; -#if CONFIG_VFS_FUNNEL - if (vfe->vfe_flags & VFS_TBLTHREADSAFE) - newvfstbl->vfc_vfsflags |= VFC_VFSTHREADSAFE; - if (vfe->vfe_flags & VFS_TBLFSNODELOCK) - newvfstbl->vfc_vfsflags |= VFC_VFSTHREADSAFE; -#endif /* CONFIG_VFS_FUNNEL */ if ((vfe->vfe_flags & VFS_TBLLOCALVOL) == VFS_TBLLOCALVOL) newvfstbl->vfc_flags |= MNT_LOCAL; if ((vfe->vfe_flags & VFS_TBLLOCALVOL) && (vfe->vfe_flags & VFS_TBLGENERICMNTARGS) == 0) @@ -1099,6 +852,8 @@ vfs_fsadd(struct vfs_fsentry *vfe, vfstable_t * handle) newvfstbl->vfc_vfsflags |= VFC_VFSREADDIR_EXTENDED; if (vfe->vfe_flags & VFS_TBLNOMACLABEL) newvfstbl->vfc_vfsflags |= VFC_VFSNOMACLABEL; + if (vfe->vfe_flags & VFS_TBLVNOP_NOUPDATEID_RENAME) + newvfstbl->vfc_vfsflags |= VFC_VFSVNOP_NOUPDATEID_RENAME; /* * Allocate and init the vectors. @@ -2056,13 +1811,6 @@ vnode_setparent(vnode_t vp, vnode_t dvp) vp->v_parent = dvp; } -const char * -vnode_name(vnode_t vp) -{ - /* we try to keep v_name a reasonable name for the node */ - return(vp->v_name); -} - void vnode_setname(vnode_t vp, char * name) { @@ -2463,7 +2211,7 @@ vnode_getattr(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) if (VATTR_NOT_RETURNED(vap, va_acl) || VATTR_NOT_RETURNED(vap, va_uuuid) || VATTR_NOT_RETURNED(vap, va_guuid)) { fsec = NULL; - if ((vp->v_type == VDIR) || (vp->v_type == VLNK) || (vp->v_type == VREG)) { + if (XATTR_VNODE_SUPPORTED(vp)) { /* try to get the filesec */ if ((error = vnode_get_filesec(vp, &fsec, ctx)) != 0) goto out; @@ -2771,7 +2519,7 @@ vnode_setattr_fallback(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx) * Fail for file types that we don't permit extended security * to be set on. */ - if ((vp->v_type != VDIR) && (vp->v_type != VLNK) && (vp->v_type != VREG)) { + if (!XATTR_VNODE_SUPPORTED(vp)) { VFS_DEBUG(ctx, vp, "SETATTR - Can't write ACL to file type %d", vnode_vtype(vp)); error = EINVAL; goto out; @@ -3013,11 +2761,6 @@ VNOP_LOOKUP(vnode_t dvp, vnode_t *vpp, struct componentname *cnp, vfs_context_t { int _err; struct vnop_lookup_args a; - vnode_t vp; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_lookup_desc; a.a_dvp = dvp; @@ -3025,41 +2768,10 @@ VNOP_LOOKUP(vnode_t dvp, vnode_t *vpp, struct componentname *cnp, vfs_context_t a.a_cnp = cnp; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(dvp); - if (!thread_safe) { - if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*dvp->v_op[vnop_lookup_desc.vdesc_offset])(&a); - - vp = *vpp; - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - if ( (cnp->cn_flags & ISLASTCN) ) { - if ( (cnp->cn_flags & LOCKPARENT) ) { - if ( !(cnp->cn_flags & FSNODELOCKHELD) ) { - /* - * leave the fsnode lock held on - * the directory, but restore the funnel... - * also indicate that we need to drop the - * fsnode_lock when we're done with the - * system call processing for this path - */ - cnp->cn_flags |= FSNODELOCKHELD; - - (void) thread_funnel_set(kernel_flock, funnel_state); - return (_err); - } - } - } - unlock_fsnode(dvp, &funnel_state); + if (_err == 0 && *vpp) { + DTRACE_FSINFO(lookup, vnode_t, *vpp); } -#endif /* CONFIG_VFS_FUNNEL */ return (_err); } @@ -3114,6 +2826,15 @@ VNOP_COMPOUND_OPEN(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, int32_t fla } _err = (*dvp->v_op[vnop_compound_open_desc.vdesc_offset])(&a); + if (want_create) { + if (_err == 0 && *vpp) { + DTRACE_FSINFO(compound_open, vnode_t, *vpp); + } else { + DTRACE_FSINFO(compound_open, vnode_t, dvp); + } + } else { + DTRACE_FSINFO(compound_open, vnode_t, *vpp); + } did_create = (*a.a_status & COMPOUND_OPEN_STATUS_DID_CREATE); @@ -3122,13 +2843,14 @@ VNOP_COMPOUND_OPEN(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, int32_t fla } if (did_create) { +#if CONFIG_APPLEDOUBLE if (!NATIVE_XATTR(dvp)) { /* * Remove stale Apple Double file (if any). */ xattrfile_remove(dvp, cnp->cn_nameptr, ctx, 0); } - +#endif /* CONFIG_APPLEDOUBLE */ /* On create, provide kqueue notification */ post_event_if_success(dvp, _err, NOTE_WRITE); } @@ -3160,10 +2882,6 @@ VNOP_CREATE(vnode_t dvp, vnode_t * vpp, struct componentname * cnp, struct vnode { int _err; struct vnop_create_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_create_desc; a.a_dvp = dvp; @@ -3172,28 +2890,19 @@ VNOP_CREATE(vnode_t dvp, vnode_t * vpp, struct componentname * cnp, struct vnode a.a_vap = vap; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(dvp); - if (!thread_safe) { - if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { - return (_err); - } + _err = (*dvp->v_op[vnop_create_desc.vdesc_offset])(&a); + if (_err == 0 && *vpp) { + DTRACE_FSINFO(create, vnode_t, *vpp); } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*dvp->v_op[vnop_create_desc.vdesc_offset])(&a); +#if CONFIG_APPLEDOUBLE if (_err == 0 && !NATIVE_XATTR(dvp)) { /* * Remove stale Apple Double file (if any). */ xattrfile_remove(dvp, cnp->cn_nameptr, ctx, 0); } - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(dvp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ +#endif /* CONFIG_APPLEDOUBLE */ post_event_if_success(dvp, _err, NOTE_WRITE); @@ -3221,10 +2930,6 @@ VNOP_WHITEOUT(vnode_t dvp, struct componentname * cnp, int flags, vfs_context_t { int _err; struct vnop_whiteout_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_whiteout_desc; a.a_dvp = dvp; @@ -3232,29 +2937,15 @@ VNOP_WHITEOUT(vnode_t dvp, struct componentname * cnp, int flags, vfs_context_t a.a_flags = flags; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(dvp); - if (!thread_safe) { - if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*dvp->v_op[vnop_whiteout_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(dvp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(whiteout, vnode_t, dvp); post_event_if_success(dvp, _err, NOTE_WRITE); return (_err); } - #if 0 +#if 0 /* *# *#% mknod dvp L U U @@ -3276,10 +2967,6 @@ VNOP_MKNOD(vnode_t dvp, vnode_t * vpp, struct componentname * cnp, struct vnode_ int _err; struct vnop_mknod_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_mknod_desc; a.a_dvp = dvp; @@ -3288,22 +2975,10 @@ VNOP_MKNOD(vnode_t dvp, vnode_t * vpp, struct componentname * cnp, struct vnode_ a.a_vap = vap; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(dvp); - if (!thread_safe) { - if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*dvp->v_op[vnop_mknod_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(dvp, &funnel_state); + if (_err == 0 && *vpp) { + DTRACE_FSINFO(mknod, vnode_t, *vpp); } -#endif /* CONFIG_VFS_FUNNEL */ post_event_if_success(dvp, _err, NOTE_WRITE); @@ -3328,10 +3003,6 @@ VNOP_OPEN(vnode_t vp, int mode, vfs_context_t ctx) { int _err; struct vnop_open_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ if (ctx == NULL) { ctx = vfs_context_current(); @@ -3341,29 +3012,8 @@ VNOP_OPEN(vnode_t vp, int mode, vfs_context_t ctx) a.a_mode = mode; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - if ( (_err = lock_fsnode(vp, NULL)) ) { - (void) thread_funnel_set(kernel_flock, funnel_state); - return (_err); - } - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_open_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - unlock_fsnode(vp, NULL); - } - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(open, vnode_t, vp); return (_err); } @@ -3386,10 +3036,6 @@ VNOP_CLOSE(vnode_t vp, int fflag, vfs_context_t ctx) { int _err; struct vnop_close_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ if (ctx == NULL) { ctx = vfs_context_current(); @@ -3399,29 +3045,8 @@ VNOP_CLOSE(vnode_t vp, int fflag, vfs_context_t ctx) a.a_fflag = fflag; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - if ( (_err = lock_fsnode(vp, NULL)) ) { - (void) thread_funnel_set(kernel_flock, funnel_state); - return (_err); - } - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_close_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - unlock_fsnode(vp, NULL); - } - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(close, vnode_t, vp); return (_err); } @@ -3444,10 +3069,6 @@ VNOP_ACCESS(vnode_t vp, int action, vfs_context_t ctx) { int _err; struct vnop_access_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ if (ctx == NULL) { ctx = vfs_context_current(); @@ -3457,22 +3078,8 @@ VNOP_ACCESS(vnode_t vp, int action, vfs_context_t ctx) a.a_action = action; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_access_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(access, vnode_t, vp); return (_err); } @@ -3495,32 +3102,14 @@ VNOP_GETATTR(vnode_t vp, struct vnode_attr * vap, vfs_context_t ctx) { int _err; struct vnop_getattr_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_getattr_desc; a.a_vp = vp; a.a_vap = vap; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_getattr_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(getattr, vnode_t, vp); return (_err); } @@ -3543,27 +3132,16 @@ VNOP_SETATTR(vnode_t vp, struct vnode_attr * vap, vfs_context_t ctx) { int _err; struct vnop_setattr_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_setattr_desc; a.a_vp = vp; a.a_vap = vap; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_setattr_desc.vdesc_offset])(&a); + DTRACE_FSINFO(setattr, vnode_t, vp); +#if CONFIG_APPLEDOUBLE /* * Shadow uid/gid/mod change to extended attribute file. */ @@ -3598,12 +3176,7 @@ VNOP_SETATTR(vnode_t vp, struct vnode_attr * vap, vfs_context_t ctx) vnode_putname(vname); } } - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ +#endif /* CONFIG_APPLEDOUBLE */ /* * If we have changed any of the things about the file that are likely @@ -3657,10 +3230,9 @@ VNOP_READ(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t ctx) { int _err; struct vnop_read_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ +#if CONFIG_DTRACE + user_ssize_t resid = uio_resid(uio); +#endif if (ctx == NULL) { ctx = vfs_context_current(); @@ -3672,29 +3244,9 @@ VNOP_READ(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t ctx) a.a_ioflag = ioflag; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - if ( (_err = lock_fsnode(vp, NULL)) ) { - (void) thread_funnel_set(kernel_flock, funnel_state); - return (_err); - } - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_read_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - unlock_fsnode(vp, NULL); - } - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO_IO(read, + vnode_t, vp, user_ssize_t, (resid - uio_resid(uio))); return (_err); } @@ -3719,10 +3271,9 @@ VNOP_WRITE(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t ctx) { struct vnop_write_args a; int _err; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ +#if CONFIG_DTRACE + user_ssize_t resid = uio_resid(uio); +#endif if (ctx == NULL) { ctx = vfs_context_current(); @@ -3734,29 +3285,9 @@ VNOP_WRITE(vnode_t vp, struct uio * uio, int ioflag, vfs_context_t ctx) a.a_ioflag = ioflag; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - if ( (_err = lock_fsnode(vp, NULL)) ) { - (void) thread_funnel_set(kernel_flock, funnel_state); - return (_err); - } - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_write_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - unlock_fsnode(vp, NULL); - } - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO_IO(write, + vnode_t, vp, user_ssize_t, (resid - uio_resid(uio))); post_event_if_success(vp, _err, NOTE_WRITE); @@ -3784,10 +3315,6 @@ VNOP_IOCTL(vnode_t vp, u_long command, caddr_t data, int fflag, vfs_context_t ct { int _err; struct vnop_ioctl_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ if (ctx == NULL) { ctx = vfs_context_current(); @@ -3820,29 +3347,8 @@ VNOP_IOCTL(vnode_t vp, u_long command, caddr_t data, int fflag, vfs_context_t ct a.a_fflag = fflag; a.a_context= ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - if ( (_err = lock_fsnode(vp, NULL)) ) { - (void) thread_funnel_set(kernel_flock, funnel_state); - return (_err); - } - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_ioctl_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - unlock_fsnode(vp, NULL); - } - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(ioctl, vnode_t, vp); return (_err); } @@ -3868,10 +3374,6 @@ VNOP_SELECT(vnode_t vp, int which , int fflags, void * wql, vfs_context_t ctx) { int _err; struct vnop_select_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ if (ctx == NULL) { ctx = vfs_context_current(); @@ -3883,29 +3385,8 @@ VNOP_SELECT(vnode_t vp, int which , int fflags, void * wql, vfs_context_t ctx) a.a_context = ctx; a.a_wql = wql; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - if ( (_err = lock_fsnode(vp, NULL)) ) { - (void) thread_funnel_set(kernel_flock, funnel_state); - return (_err); - } - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_select_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - if (vp->v_type != VCHR && vp->v_type != VFIFO && vp->v_type != VSOCK) { - unlock_fsnode(vp, NULL); - } - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(select, vnode_t, vp); return (_err); } @@ -3931,11 +3412,6 @@ VNOP_EXCHANGE(vnode_t fvp, vnode_t tvp, int options, vfs_context_t ctx) { int _err; struct vnop_exchange_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; - vnode_t lock_first = NULL, lock_second = NULL; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_exchange_desc; a.a_fvp = fvp; @@ -3943,37 +3419,8 @@ VNOP_EXCHANGE(vnode_t fvp, vnode_t tvp, int options, vfs_context_t ctx) a.a_options = options; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(fvp); - if (!thread_safe) { - /* - * Lock in vnode address order to avoid deadlocks - */ - if (fvp < tvp) { - lock_first = fvp; - lock_second = tvp; - } else { - lock_first = tvp; - lock_second = fvp; - } - if ( (_err = lock_fsnode(lock_first, &funnel_state)) ) { - return (_err); - } - if ( (_err = lock_fsnode(lock_second, NULL)) ) { - unlock_fsnode(lock_first, &funnel_state); - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*fvp->v_op[vnop_exchange_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(lock_second, NULL); - unlock_fsnode(lock_first, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(exchange, vnode_t, fvp); /* Don't post NOTE_WRITE because file descriptors follow the data ... */ post_event_if_success(fvp, _err, NOTE_ATTRIB); @@ -4001,30 +3448,14 @@ VNOP_REVOKE(vnode_t vp, int flags, vfs_context_t ctx) { struct vnop_revoke_args a; int _err; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_revoke_desc; a.a_vp = vp; a.a_flags = flags; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_revoke_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(revoke, vnode_t, vp); return (_err); } @@ -4048,32 +3479,14 @@ VNOP_MMAP(vnode_t vp, int fflags, vfs_context_t ctx) { int _err; struct vnop_mmap_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_mmap_desc; a.a_vp = vp; a.a_fflags = fflags; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_mmap_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(mmap, vnode_t, vp); return (_err); } @@ -4096,31 +3509,13 @@ VNOP_MNOMAP(vnode_t vp, vfs_context_t ctx) { int _err; struct vnop_mnomap_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_mnomap_desc; a.a_vp = vp; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_mnomap_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(mnomap, vnode_t, vp); return (_err); } @@ -4144,32 +3539,14 @@ VNOP_FSYNC(vnode_t vp, int waitfor, vfs_context_t ctx) { struct vnop_fsync_args a; int _err; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_fsync_desc; a.a_vp = vp; a.a_waitfor = waitfor; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_fsync_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(fsync, vnode_t, vp); return (_err); } @@ -4196,10 +3573,6 @@ VNOP_REMOVE(vnode_t dvp, vnode_t vp, struct componentname * cnp, int flags, vfs_ { int _err; struct vnop_remove_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_remove_desc; a.a_dvp = dvp; @@ -4208,34 +3581,21 @@ VNOP_REMOVE(vnode_t dvp, vnode_t vp, struct componentname * cnp, int flags, vfs_ a.a_flags = flags; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(dvp); - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*dvp->v_op[vnop_remove_desc.vdesc_offset])(&a); + DTRACE_FSINFO(remove, vnode_t, vp); if (_err == 0) { vnode_setneedinactive(vp); - +#if CONFIG_APPLEDOUBLE if ( !(NATIVE_XATTR(dvp)) ) { /* * Remove any associated extended attribute file (._ AppleDouble file). */ xattrfile_remove(dvp, cnp->cn_nameptr, ctx, 1); } +#endif /* CONFIG_APPLEDOUBLE */ } -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ - post_event_if_success(vp, _err, NOTE_DELETE | NOTE_LINK); post_event_if_success(dvp, _err, NOTE_WRITE); @@ -4259,15 +3619,21 @@ VNOP_COMPOUND_REMOVE(vnode_t dvp, vnode_t *vpp, struct nameidata *ndp, int32_t f a.a_remove_authorizer = vn_authorize_unlink; _err = (*dvp->v_op[vnop_compound_remove_desc.vdesc_offset])(&a); + if (_err == 0 && *vpp) { + DTRACE_FSINFO(compound_remove, vnode_t, *vpp); + } else { + DTRACE_FSINFO(compound_remove, vnode_t, dvp); + } if (_err == 0) { vnode_setneedinactive(*vpp); - +#if CONFIG_APPLEDOUBLE if ( !(NATIVE_XATTR(dvp)) ) { /* * Remove any associated extended attribute file (._ AppleDouble file). */ xattrfile_remove(dvp, ndp->ni_cnd.cn_nameptr, ctx, 1); } +#endif /* CONFIG_APPLEDOUBLE */ } post_event_if_success(*vpp, _err, NOTE_DELETE | NOTE_LINK); @@ -4306,11 +3672,8 @@ VNOP_LINK(vnode_t vp, vnode_t tdvp, struct componentname * cnp, vfs_context_t ct { int _err; struct vnop_link_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ +#if CONFIG_APPLEDOUBLE /* * For file systems with non-native extended attributes, * disallow linking to an existing "._" Apple Double file. @@ -4329,28 +3692,16 @@ VNOP_LINK(vnode_t vp, vnode_t tdvp, struct componentname * cnp, vfs_context_t ct return (_err); } } +#endif /* CONFIG_APPLEDOUBLE */ + a.a_desc = &vnop_link_desc; a.a_vp = vp; a.a_tdvp = tdvp; a.a_cnp = cnp; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*tdvp->v_op[vnop_link_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(link, vnode_t, vp); post_event_if_success(vp, _err, NOTE_LINK); post_event_if_success(tdvp, _err, NOTE_WRITE); @@ -4364,27 +3715,26 @@ vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, s uint32_t flags, vfs_context_t ctx) { int _err; - vnode_t src_attr_vp = NULLVP; - vnode_t dst_attr_vp = NULLVP; struct nameidata *fromnd = NULL; struct nameidata *tond = NULL; +#if CONFIG_APPLEDOUBLE + vnode_t src_attr_vp = NULLVP; + vnode_t dst_attr_vp = NULLVP; char smallname1[48]; char smallname2[48]; char *xfromname = NULL; char *xtoname = NULL; +#endif /* CONFIG_APPLEDOUBLE */ int batched; batched = vnode_compound_rename_available(fdvp); -#if CONFIG_VFS_FUNNEL - vnode_t fdvp_unsafe = (THREAD_SAFE_FS(fdvp) ? NULLVP : fdvp); -#endif /* CONFIG_VFS_FUNNEL */ - if (!batched) { if (*fvpp == NULLVP) panic("Not batched, and no fvp?"); } +#if CONFIG_APPLEDOUBLE /* * We need to preflight any potential AppleDouble file for the source file * before doing the rename operation, since we could potentially be doing @@ -4459,21 +3809,23 @@ vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, s } } } +#endif /* CONFIG_APPLEDOUBLE */ if (batched) { _err = VNOP_COMPOUND_RENAME(fdvp, fvpp, fcnp, fvap, tdvp, tvpp, tcnp, tvap, flags, ctx); if (_err != 0) { printf("VNOP_COMPOUND_RENAME() returned %d\n", _err); } - } else { _err = VNOP_RENAME(fdvp, *fvpp, fcnp, tdvp, *tvpp, tcnp, ctx); } - +#if CONFIG_MACF if (_err == 0) { mac_vnode_notify_rename(ctx, *fvpp, tdvp, tcnp); } +#endif +#if CONFIG_APPLEDOUBLE /* * Rename any associated extended attribute file (._ AppleDouble file). */ @@ -4493,7 +3845,7 @@ vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, s error = namei(tond); if (error) - goto out; + goto ad_error; if (tond->ni_vp) { dst_attr_vp = tond->ni_vp; @@ -4519,11 +3871,13 @@ vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, s if (fdvp != tdvp) update_flags |= VNODE_UPDATE_PARENT; - vnode_update_identity(src_attr_vp, tdvp, + if ((src_attr_vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSVNOP_NOUPDATEID_RENAME) == 0) { + vnode_update_identity(src_attr_vp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags); + } } /* kevent notifications for moving resource files @@ -4551,18 +3905,9 @@ vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, s args.a_cnp = &tond->ni_cnd; args.a_context = ctx; -#if CONFIG_VFS_FUNNEL - if (fdvp_unsafe != NULLVP) - error = lock_fsnode(dst_attr_vp, NULL); -#endif /* CONFIG_VFS_FUNNEL */ if (error == 0) { error = (*tdvp->v_op[vnop_remove_desc.vdesc_offset])(&args); -#if CONFIG_VFS_FUNNEL - if (fdvp_unsafe != NULLVP) - unlock_fsnode(dst_attr_vp, NULL); -#endif /* CONFIG_VFS_FUNNEL */ - if (error == 0) vnode_setneedinactive(dst_attr_vp); } @@ -4574,7 +3919,7 @@ vn_rename(struct vnode *fdvp, struct vnode **fvpp, struct componentname *fcnp, s post_event_if_success(dst_attr_vp, error, NOTE_DELETE); } } -out: +ad_error: if (src_attr_vp) { vnode_put(src_attr_vp); nameidone(fromnd); @@ -4583,19 +3928,19 @@ out: vnode_put(dst_attr_vp); nameidone(tond); } - if (fromnd) { - FREE(fromnd, M_TEMP); - } - if (tond) { - FREE(tond, M_TEMP); - } if (xfromname && xfromname != &smallname1[0]) { FREE(xfromname, M_TEMP); } if (xtoname && xtoname != &smallname2[0]) { FREE(xtoname, M_TEMP); } - +#endif /* CONFIG_APPLEDOUBLE */ + if (fromnd) { + FREE(fromnd, M_TEMP); + } + if (tond) { + FREE(tond, M_TEMP); + } return _err; } @@ -4628,12 +3973,6 @@ VNOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp, int _err = 0; int events; struct vnop_rename_args a; -#if CONFIG_VFS_FUNNEL - int funnel_state = 0; - vnode_t lock_first = NULL, lock_second = NULL; - vnode_t fdvp_unsafe = NULLVP; - vnode_t tdvp_unsafe = NULLVP; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_rename_desc; a.a_fdvp = fdvp; @@ -4644,88 +3983,17 @@ VNOP_RENAME(struct vnode *fdvp, struct vnode *fvp, struct componentname *fcnp, a.a_tcnp = tcnp; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - if (!THREAD_SAFE_FS(fdvp)) - fdvp_unsafe = fdvp; - if (!THREAD_SAFE_FS(tdvp)) - tdvp_unsafe = tdvp; - - if (fdvp_unsafe != NULLVP) { - /* - * Lock parents in vnode address order to avoid deadlocks - * note that it's possible for the fdvp to be unsafe, - * but the tdvp to be safe because tvp could be a directory - * in the root of a filesystem... in that case, tdvp is the - * in the filesystem that this root is mounted on - */ - if (tdvp_unsafe == NULL || fdvp_unsafe == tdvp_unsafe) { - lock_first = fdvp_unsafe; - lock_second = NULL; - } else if (fdvp_unsafe < tdvp_unsafe) { - lock_first = fdvp_unsafe; - lock_second = tdvp_unsafe; - } else { - lock_first = tdvp_unsafe; - lock_second = fdvp_unsafe; - } - if ( (_err = lock_fsnode(lock_first, &funnel_state)) ) - return (_err); - - if (lock_second != NULL && (_err = lock_fsnode(lock_second, NULL))) { - unlock_fsnode(lock_first, &funnel_state); - return (_err); - } - - /* - * Lock both children in vnode address order to avoid deadlocks - */ - if (tvp == NULL || tvp == fvp) { - lock_first = fvp; - lock_second = NULL; - } else if (fvp < tvp) { - lock_first = fvp; - lock_second = tvp; - } else { - lock_first = tvp; - lock_second = fvp; - } - if ( (_err = lock_fsnode(lock_first, NULL)) ) - goto out1; - - if (lock_second != NULL && (_err = lock_fsnode(lock_second, NULL))) { - unlock_fsnode(lock_first, NULL); - goto out1; - } - } -#endif /* CONFIG_VFS_FUNNEL */ - /* do the rename of the main file. */ _err = (*fdvp->v_op[vnop_rename_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (fdvp_unsafe != NULLVP) { - if (lock_second != NULL) - unlock_fsnode(lock_second, NULL); - unlock_fsnode(lock_first, NULL); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(rename, vnode_t, fdvp); if (_err == 0) { if (tvp && tvp != fvp) vnode_setneedinactive(tvp); } -#if CONFIG_VFS_FUNNEL -out1: - if (fdvp_unsafe != NULLVP) { - if (tdvp_unsafe != NULLVP) - unlock_fsnode(tdvp_unsafe, NULL); - unlock_fsnode(fdvp_unsafe, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ - /* Wrote at least one directory. If transplanted a dir, also changed link counts */ - if (0 == _err) { + if (_err == 0) { events = NOTE_WRITE; if (vnode_isdir(fvp)) { /* Link count on dir changed only if we are moving a dir and... @@ -4787,6 +4055,7 @@ VNOP_COMPOUND_RENAME( /* do the rename of the main file. */ _err = (*fdvp->v_op[vnop_compound_rename_desc.vdesc_offset])(&a); + DTRACE_FSINFO(compound_rename, vnode_t, fdvp); if (_err == 0) { if (*tvpp && *tvpp != *fvpp) @@ -4794,7 +4063,7 @@ VNOP_COMPOUND_RENAME( } /* Wrote at least one directory. If transplanted a dir, also changed link counts */ - if (0 == _err && *fvpp != *tvpp) { + if (_err == 0 && *fvpp != *tvpp) { if (!*fvpp) { panic("No fvpp after compound rename?"); } @@ -4860,7 +4129,7 @@ vn_mkdir(struct vnode *dvp, struct vnode **vpp, struct nameidata *ndp, } } - #if 0 +#if 0 /* *# *#% mkdir dvp L U U @@ -4882,10 +4151,6 @@ VNOP_MKDIR(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, { int _err; struct vnop_mkdir_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_mkdir_desc; a.a_dvp = dvp; @@ -4894,28 +4159,18 @@ VNOP_MKDIR(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, a.a_vap = vap; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(dvp); - if (!thread_safe) { - if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*dvp->v_op[vnop_mkdir_desc.vdesc_offset])(&a); + if (_err == 0 && *vpp) { + DTRACE_FSINFO(mkdir, vnode_t, *vpp); + } +#if CONFIG_APPLEDOUBLE if (_err == 0 && !NATIVE_XATTR(dvp)) { /* * Remove stale Apple Double file (if any). */ xattrfile_remove(dvp, cnp->cn_nameptr, ctx, 0); } - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(dvp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ +#endif /* CONFIG_APPLEDOUBLE */ post_event_if_success(dvp, _err, NOTE_LINK | NOTE_WRITE); @@ -4942,12 +4197,17 @@ VNOP_COMPOUND_MKDIR(struct vnode *dvp, struct vnode **vpp, struct nameidata *ndp a.a_reserved = NULL; _err = (*dvp->v_op[vnop_compound_mkdir_desc.vdesc_offset])(&a); + if (_err == 0 && *vpp) { + DTRACE_FSINFO(compound_mkdir, vnode_t, *vpp); + } +#if CONFIG_APPLEDOUBLE if (_err == 0 && !NATIVE_XATTR(dvp)) { /* * Remove stale Apple Double file (if any). */ xattrfile_remove(dvp, ndp->ni_cnd.cn_nameptr, ctx, 0); } +#endif /* CONFIG_APPLEDOUBLE */ post_event_if_success(dvp, _err, NOTE_LINK | NOTE_WRITE); @@ -4997,10 +4257,6 @@ VNOP_RMDIR(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, vfs_c { int _err; struct vnop_rmdir_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_rmdir_desc; a.a_dvp = dvp; @@ -5008,34 +4264,21 @@ VNOP_RMDIR(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, vfs_c a.a_cnp = cnp; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(dvp); - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_rmdir_desc.vdesc_offset])(&a); + DTRACE_FSINFO(rmdir, vnode_t, vp); if (_err == 0) { vnode_setneedinactive(vp); - +#if CONFIG_APPLEDOUBLE if ( !(NATIVE_XATTR(dvp)) ) { /* * Remove any associated extended attribute file (._ AppleDouble file). */ xattrfile_remove(dvp, cnp->cn_nameptr, ctx, 1); } +#endif } -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ - /* If you delete a dir, it loses its "." reference --> NOTE_LINK */ post_event_if_success(vp, _err, NOTE_DELETE | NOTE_LINK); post_event_if_success(dvp, _err, NOTE_LINK | NOTE_WRITE); @@ -5064,12 +4307,17 @@ VNOP_COMPOUND_RMDIR(struct vnode *dvp, struct vnode **vpp, struct nameidata *ndp no_vp = (*vpp == NULLVP); _err = (*dvp->v_op[vnop_compound_rmdir_desc.vdesc_offset])(&a); + if (_err == 0 && *vpp) { + DTRACE_FSINFO(compound_rmdir, vnode_t, *vpp); + } +#if CONFIG_APPLEDOUBLE if (_err == 0 && !NATIVE_XATTR(dvp)) { /* * Remove stale Apple Double file (if any). */ xattrfile_remove(dvp, ndp->ni_cnd.cn_nameptr, ctx, 0); } +#endif if (*vpp) { post_event_if_success(*vpp, _err, NOTE_DELETE | NOTE_LINK); @@ -5090,6 +4338,7 @@ VNOP_COMPOUND_RMDIR(struct vnode *dvp, struct vnode **vpp, struct nameidata *ndp return (_err); } +#if CONFIG_APPLEDOUBLE /* * Remove a ._ AppleDouble file */ @@ -5204,9 +4453,6 @@ xattrfile_setattr(vnode_t dvp, const char * basename, struct vnode_attr * vap, nameidone(&nd); if (xvp->v_type == VREG) { -#if CONFIG_VFS_FUNNEL - int thread_safe = THREAD_SAFE_FS(dvp); -#endif /* CONFIG_VFS_FUNNEL */ struct vnop_setattr_args a; a.a_desc = &vnop_setattr_desc; @@ -5214,33 +4460,16 @@ xattrfile_setattr(vnode_t dvp, const char * basename, struct vnode_attr * vap, a.a_vap = vap; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - if ( (lock_fsnode(xvp, NULL)) ) - goto out1; - } -#endif /* CONFIG_VFS_FUNNEL */ - (void) (*xvp->v_op[vnop_setattr_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(xvp, NULL); - } -#endif /* CONFIG_VFS_FUNNEL */ } - -#if CONFIG_VFS_FUNNEL -out1: -#endif /* CONFIG_VFS_FUNNEL */ vnode_put(xvp); - out2: if (filename && filename != &smallname[0]) { FREE(filename, M_TEMP); } } +#endif /* CONFIG_APPLEDOUBLE */ #if 0 /* @@ -5266,10 +4495,6 @@ VNOP_SYMLINK(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, { int _err; struct vnop_symlink_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_symlink_desc; a.a_dvp = dvp; @@ -5279,29 +4504,17 @@ VNOP_SYMLINK(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, a.a_target = target; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(dvp); - if (!thread_safe) { - if ( (_err = lock_fsnode(dvp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*dvp->v_op[vnop_symlink_desc.vdesc_offset])(&a); + DTRACE_FSINFO(symlink, vnode_t, dvp); +#if CONFIG_APPLEDOUBLE if (_err == 0 && !NATIVE_XATTR(dvp)) { /* * Remove stale Apple Double file (if any). Posts its own knotes */ xattrfile_remove(dvp, cnp->cn_nameptr, ctx, 0); } +#endif /* CONFIG_APPLEDOUBLE */ -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(dvp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ - post_event_if_success(dvp, _err, NOTE_WRITE); return (_err); @@ -5330,10 +4543,9 @@ VNOP_READDIR(struct vnode *vp, struct uio *uio, int flags, int *eofflag, { int _err; struct vnop_readdir_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ +#if CONFIG_DTRACE + user_ssize_t resid = uio_resid(uio); +#endif a.a_desc = &vnop_readdir_desc; a.a_vp = vp; @@ -5342,23 +4554,11 @@ VNOP_READDIR(struct vnode *vp, struct uio *uio, int flags, int *eofflag, a.a_eofflag = eofflag; a.a_numdirent = numdirent; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ _err = (*vp->v_op[vnop_readdir_desc.vdesc_offset])(&a); + DTRACE_FSINFO_IO(readdir, + vnode_t, vp, user_ssize_t, (resid - uio_resid(uio))); -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ return (_err); } @@ -5388,10 +4588,9 @@ VNOP_READDIRATTR(struct vnode *vp, struct attrlist *alist, struct uio *uio, uint { int _err; struct vnop_readdirattr_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ +#if CONFIG_DTRACE + user_ssize_t resid = uio_resid(uio); +#endif a.a_desc = &vnop_readdirattr_desc; a.a_vp = vp; @@ -5404,22 +4603,9 @@ VNOP_READDIRATTR(struct vnode *vp, struct attrlist *alist, struct uio *uio, uint a.a_actualcount = actualcount; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_readdirattr_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO_IO(readdirattr, + vnode_t, vp, user_ssize_t, (resid - uio_resid(uio))); return (_err); } @@ -5462,32 +4648,17 @@ VNOP_READLINK(struct vnode *vp, struct uio *uio, vfs_context_t ctx) { int _err; struct vnop_readlink_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ - +#if CONFIG_DTRACE + user_ssize_t resid = uio_resid(uio); +#endif a.a_desc = &vnop_readlink_desc; a.a_vp = vp; a.a_uio = uio; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_readlink_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO_IO(readlink, + vnode_t, vp, user_ssize_t, (resid - uio_resid(uio))); return (_err); } @@ -5509,31 +4680,13 @@ VNOP_INACTIVE(struct vnode *vp, vfs_context_t ctx) { int _err; struct vnop_inactive_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_inactive_desc; a.a_vp = vp; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_inactive_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(inactive, vnode_t, vp); #if NAMEDSTREAMS /* For file systems that do not support namedstream natively, mark @@ -5570,29 +4723,13 @@ VNOP_RECLAIM(struct vnode *vp, vfs_context_t ctx) { int _err; struct vnop_reclaim_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_reclaim_desc; a.a_vp = vp; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_reclaim_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(reclaim, vnode_t, vp); return (_err); } @@ -5624,10 +4761,6 @@ VNOP_PATHCONF(struct vnode *vp, int name, int32_t *retval, vfs_context_t ctx) { int _err; struct vnop_pathconf_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_pathconf_desc; a.a_vp = vp; @@ -5635,22 +4768,8 @@ VNOP_PATHCONF(struct vnode *vp, int name, int32_t *retval, vfs_context_t ctx) a.a_retval = retval; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_pathconf_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(pathconf, vnode_t, vp); return (_err); } @@ -5683,14 +4802,10 @@ struct vnop_advlock_args { }; #endif /* 0*/ errno_t -VNOP_ADVLOCK(struct vnode *vp, caddr_t id, int op, struct flock *fl, int flags, vfs_context_t ctx) +VNOP_ADVLOCK(struct vnode *vp, caddr_t id, int op, struct flock *fl, int flags, vfs_context_t ctx, struct timespec *timeout) { int _err; struct vnop_advlock_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_advlock_desc; a.a_vp = vp; @@ -5699,13 +4814,7 @@ VNOP_ADVLOCK(struct vnode *vp, caddr_t id, int op, struct flock *fl, int flags, a.a_fl = fl; a.a_flags = flags; a.a_context = ctx; - -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } -#endif /* CONFIG_VFS_FUNNEL */ + a.a_timeout = timeout; /* Disallow advisory locking on non-seekable vnodes */ if (vnode_isfifo(vp)) { @@ -5718,14 +4827,9 @@ VNOP_ADVLOCK(struct vnode *vp, caddr_t id, int op, struct flock *fl, int flags, /* Advisory locking done by underlying filesystem */ _err = (*vp->v_op[vnop_advlock_desc.vdesc_offset])(&a); } + DTRACE_FSINFO(advlock, vnode_t, vp); } -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ - return (_err); } @@ -5753,10 +4857,6 @@ VNOP_ALLOCATE(struct vnode *vp, off_t length, u_int32_t flags, off_t *bytesalloc { int _err; struct vnop_allocate_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_allocate_desc; a.a_vp = vp; @@ -5766,28 +4866,14 @@ VNOP_ALLOCATE(struct vnode *vp, off_t length, u_int32_t flags, off_t *bytesalloc a.a_offset = offset; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_allocate_desc.vdesc_offset])(&a); + DTRACE_FSINFO(allocate, vnode_t, vp); #if CONFIG_FSE if (_err == 0) { add_fsevent(FSE_STAT_CHANGED, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE); } #endif -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ - return (_err); } @@ -5813,10 +4899,6 @@ VNOP_PAGEIN(struct vnode *vp, upl_t pl, upl_offset_t pl_offset, off_t f_offset, { int _err; struct vnop_pagein_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_pagein_desc; a.a_vp = vp; @@ -5827,21 +4909,9 @@ VNOP_PAGEIN(struct vnode *vp, upl_t pl, upl_offset_t pl_offset, off_t f_offset, a.a_flags = flags; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_pagein_desc.vdesc_offset])(&a); + DTRACE_FSINFO(pagein, vnode_t, vp); -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ - return (_err); } @@ -5868,10 +4938,6 @@ VNOP_PAGEOUT(struct vnode *vp, upl_t pl, upl_offset_t pl_offset, off_t f_offset, { int _err; struct vnop_pageout_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_pageout_desc; a.a_vp = vp; @@ -5882,20 +4948,8 @@ VNOP_PAGEOUT(struct vnode *vp, upl_t pl, upl_offset_t pl_offset, off_t f_offset, a.a_flags = flags; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_pageout_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(pageout, vnode_t, vp); post_event_if_success(vp, _err, NOTE_WRITE); @@ -5943,10 +4997,6 @@ VNOP_SEARCHFS(struct vnode *vp, void *searchparams1, void *searchparams2, struct { int _err; struct vnop_searchfs_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_searchfs_desc; a.a_vp = vp; @@ -5963,22 +5013,8 @@ VNOP_SEARCHFS(struct vnode *vp, void *searchparams1, void *searchparams2, struct a.a_searchstate = searchstate; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_searchfs_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(searchfs, vnode_t, vp); return (_err); } @@ -6018,6 +5054,7 @@ VNOP_COPYFILE(struct vnode *fvp, struct vnode *tdvp, struct vnode *tvp, struct c a.a_flags = flags; a.a_context = ctx; _err = (*fvp->v_op[vnop_copyfile_desc.vdesc_offset])(&a); + DTRACE_FSINFO(copyfile, vnode_t, fvp); return (_err); } @@ -6026,10 +5063,6 @@ VNOP_GETXATTR(vnode_t vp, const char *name, uio_t uio, size_t *size, int options { struct vnop_getxattr_args a; int error; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_getxattr_desc; a.a_vp = vp; @@ -6039,22 +5072,8 @@ VNOP_GETXATTR(vnode_t vp, const char *name, uio_t uio, size_t *size, int options a.a_options = options; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (error = lock_fsnode(vp, &funnel_state)) ) { - return (error); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - error = (*vp->v_op[vnop_getxattr_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(getxattr, vnode_t, vp); return (error); } @@ -6064,10 +5083,6 @@ VNOP_SETXATTR(vnode_t vp, const char *name, uio_t uio, int options, vfs_context_ { struct vnop_setxattr_args a; int error; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_setxattr_desc; a.a_vp = vp; @@ -6076,22 +5091,8 @@ VNOP_SETXATTR(vnode_t vp, const char *name, uio_t uio, int options, vfs_context_ a.a_options = options; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (error = lock_fsnode(vp, &funnel_state)) ) { - return (error); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - error = (*vp->v_op[vnop_setxattr_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(setxattr, vnode_t, vp); if (error == 0) vnode_uncache_authorized_action(vp, KAUTH_INVALIDATE_CACHED_RIGHTS); @@ -6106,10 +5107,6 @@ VNOP_REMOVEXATTR(vnode_t vp, const char *name, int options, vfs_context_t ctx) { struct vnop_removexattr_args a; int error; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_removexattr_desc; a.a_vp = vp; @@ -6117,22 +5114,8 @@ VNOP_REMOVEXATTR(vnode_t vp, const char *name, int options, vfs_context_t ctx) a.a_options = options; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (error = lock_fsnode(vp, &funnel_state)) ) { - return (error); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - error = (*vp->v_op[vnop_removexattr_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(removexattr, vnode_t, vp); post_event_if_success(vp, error, NOTE_ATTRIB); @@ -6144,10 +5127,6 @@ VNOP_LISTXATTR(vnode_t vp, uio_t uio, size_t *size, int options, vfs_context_t c { struct vnop_listxattr_args a; int error; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_listxattr_desc; a.a_vp = vp; @@ -6156,22 +5135,8 @@ VNOP_LISTXATTR(vnode_t vp, uio_t uio, size_t *size, int options, vfs_context_t c a.a_options = options; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (error = lock_fsnode(vp, &funnel_state)) ) { - return (error); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - error = (*vp->v_op[vnop_listxattr_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(listxattr, vnode_t, vp); return (error); } @@ -6195,30 +5160,14 @@ VNOP_BLKTOOFF(struct vnode *vp, daddr64_t lblkno, off_t *offset) { int _err; struct vnop_blktooff_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_blktooff_desc; a.a_vp = vp; a.a_lblkno = lblkno; a.a_offset = offset; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_blktooff_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(blktooff, vnode_t, vp); return (_err); } @@ -6241,30 +5190,14 @@ VNOP_OFFTOBLK(struct vnode *vp, off_t offset, daddr64_t *lblkno) { int _err; struct vnop_offtoblk_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = &vnop_offtoblk_desc; a.a_vp = vp; a.a_offset = offset; a.a_lblkno = lblkno; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_offtoblk_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(offtoblk, vnode_t, vp); return (_err); } @@ -6293,10 +5226,6 @@ VNOP_BLOCKMAP(struct vnode *vp, off_t foffset, size_t size, daddr64_t *bpn, size int _err; struct vnop_blockmap_args a; size_t localrun = 0; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ if (ctx == NULL) { ctx = vfs_context_current(); @@ -6311,20 +5240,8 @@ VNOP_BLOCKMAP(struct vnode *vp, off_t foffset, size_t size, daddr64_t *bpn, size a.a_flags = flags; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - funnel_state = thread_funnel_set(kernel_flock, TRUE); - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_blockmap_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - (void) thread_funnel_set(kernel_flock, funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(blockmap, vnode_t, vp); /* * We used a local variable to request information from the underlying @@ -6357,9 +5274,11 @@ VNOP_STRATEGY(struct buf *bp) { int _err; struct vnop_strategy_args a; + vnode_t vp = buf_vnode(bp); a.a_desc = &vnop_strategy_desc; a.a_bp = bp; - _err = (*buf_vnode(bp)->v_op[vnop_strategy_desc.vdesc_offset])(&a); + _err = (*vp->v_op[vnop_strategy_desc.vdesc_offset])(&a); + DTRACE_FSINFO(strategy, vnode_t, vp); return (_err); } @@ -6374,9 +5293,11 @@ VNOP_BWRITE(struct buf *bp) { int _err; struct vnop_bwrite_args a; + vnode_t vp = buf_vnode(bp); a.a_desc = &vnop_bwrite_desc; a.a_bp = bp; - _err = (*buf_vnode(bp)->v_op[vnop_bwrite_desc.vdesc_offset])(&a); + _err = (*vp->v_op[vnop_bwrite_desc.vdesc_offset])(&a); + DTRACE_FSINFO(bwrite, vnode_t, vp); return (_err); } @@ -6393,33 +5314,15 @@ VNOP_KQFILT_ADD(struct vnode *vp, struct knote *kn, vfs_context_t ctx) { int _err; struct vnop_kqfilt_add_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = VDESC(vnop_kqfilt_add); a.a_vp = vp; a.a_kn = kn; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_kqfilt_add_desc.vdesc_offset])(&a); + DTRACE_FSINFO(kqfilt_add, vnode_t, vp); -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ - return(_err); } @@ -6436,32 +5339,14 @@ VNOP_KQFILT_REMOVE(struct vnode *vp, uintptr_t ident, vfs_context_t ctx) { int _err; struct vnop_kqfilt_remove_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = VDESC(vnop_kqfilt_remove); a.a_vp = vp; a.a_ident = ident; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_kqfilt_remove_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(kqfilt_remove, vnode_t, vp); return(_err); } @@ -6471,10 +5356,6 @@ VNOP_MONITOR(vnode_t vp, uint32_t events, uint32_t flags, void *handle, vfs_cont { int _err; struct vnop_monitor_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = VDESC(vnop_monitor); a.a_vp = vp; @@ -6483,22 +5364,8 @@ VNOP_MONITOR(vnode_t vp, uint32_t events, uint32_t flags, void *handle, vfs_cont a.a_handle = handle; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_monitor_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(monitor, vnode_t, vp); return(_err); } @@ -6516,32 +5383,14 @@ VNOP_SETLABEL(struct vnode *vp, struct label *label, vfs_context_t ctx) { int _err; struct vnop_setlabel_args a; -#if CONFIG_VFS_FUNNEL - int thread_safe; - int funnel_state = 0; -#endif /* CONFIG_VFS_FUNNEL */ a.a_desc = VDESC(vnop_setlabel); a.a_vp = vp; a.a_vl = label; a.a_context = ctx; -#if CONFIG_VFS_FUNNEL - thread_safe = THREAD_SAFE_FS(vp); - if (!thread_safe) { - if ( (_err = lock_fsnode(vp, &funnel_state)) ) { - return (_err); - } - } -#endif /* CONFIG_VFS_FUNNEL */ - _err = (*vp->v_op[vnop_setlabel_desc.vdesc_offset])(&a); - -#if CONFIG_VFS_FUNNEL - if (!thread_safe) { - unlock_fsnode(vp, &funnel_state); - } -#endif /* CONFIG_VFS_FUNNEL */ + DTRACE_FSINFO(setlabel, vnode_t, vp); return(_err); } @@ -6554,13 +5403,9 @@ VNOP_SETLABEL(struct vnode *vp, struct label *label, vfs_context_t ctx) errno_t VNOP_GETNAMEDSTREAM(vnode_t vp, vnode_t *svpp, const char *name, enum nsoperation operation, int flags, vfs_context_t ctx) { + int _err; struct vnop_getnamedstream_args a; -#if CONFIG_VFS_FUNNEL - if (!THREAD_SAFE_FS(vp)) - return (ENOTSUP); -#endif /* CONFIG_VFS_FUNNEL */ - a.a_desc = &vnop_getnamedstream_desc; a.a_vp = vp; a.a_svpp = svpp; @@ -6569,7 +5414,9 @@ VNOP_GETNAMEDSTREAM(vnode_t vp, vnode_t *svpp, const char *name, enum nsoperatio a.a_flags = flags; a.a_context = ctx; - return (*vp->v_op[vnop_getnamedstream_desc.vdesc_offset])(&a); + _err = (*vp->v_op[vnop_getnamedstream_desc.vdesc_offset])(&a); + DTRACE_FSINFO(getnamedstream, vnode_t, vp); + return (_err); } /* @@ -6578,13 +5425,9 @@ VNOP_GETNAMEDSTREAM(vnode_t vp, vnode_t *svpp, const char *name, enum nsoperatio errno_t VNOP_MAKENAMEDSTREAM(vnode_t vp, vnode_t *svpp, const char *name, int flags, vfs_context_t ctx) { + int _err; struct vnop_makenamedstream_args a; -#if CONFIG_VFS_FUNNEL - if (!THREAD_SAFE_FS(vp)) - return (ENOTSUP); -#endif /* CONFIG_VFS_FUNNEL */ - a.a_desc = &vnop_makenamedstream_desc; a.a_vp = vp; a.a_svpp = svpp; @@ -6592,7 +5435,9 @@ VNOP_MAKENAMEDSTREAM(vnode_t vp, vnode_t *svpp, const char *name, int flags, vfs a.a_flags = flags; a.a_context = ctx; - return (*vp->v_op[vnop_makenamedstream_desc.vdesc_offset])(&a); + _err = (*vp->v_op[vnop_makenamedstream_desc.vdesc_offset])(&a); + DTRACE_FSINFO(makenamedstream, vnode_t, vp); + return (_err); } @@ -6602,13 +5447,9 @@ VNOP_MAKENAMEDSTREAM(vnode_t vp, vnode_t *svpp, const char *name, int flags, vfs errno_t VNOP_REMOVENAMEDSTREAM(vnode_t vp, vnode_t svp, const char *name, int flags, vfs_context_t ctx) { + int _err; struct vnop_removenamedstream_args a; -#if CONFIG_VFS_FUNNEL - if (!THREAD_SAFE_FS(vp)) - return (ENOTSUP); -#endif /* CONFIG_VFS_FUNNEL */ - a.a_desc = &vnop_removenamedstream_desc; a.a_vp = vp; a.a_svp = svp; @@ -6616,6 +5457,8 @@ VNOP_REMOVENAMEDSTREAM(vnode_t vp, vnode_t svp, const char *name, int flags, vfs a.a_flags = flags; a.a_context = ctx; - return (*vp->v_op[vnop_removenamedstream_desc.vdesc_offset])(&a); + _err = (*vp->v_op[vnop_removenamedstream_desc.vdesc_offset])(&a); + DTRACE_FSINFO(removenamedstream, vnode_t, vp); + return (_err); } #endif diff --git a/bsd/vfs/vfs_attrlist.c b/bsd/vfs/vfs_attrlist.c index 7033b4cf6..25c57ee40 100644 --- a/bsd/vfs/vfs_attrlist.c +++ b/bsd/vfs/vfs_attrlist.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1995-2010 Apple Inc. All rights reserved. + * Copyright (c) 1995-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -72,60 +72,125 @@ struct _attrlist_buf { /* - * Pack (count) bytes from (source) into (buf). + * Attempt to pack a fixed width attribute of size (count) bytes from + * source to our attrlist buffer. */ static void attrlist_pack_fixed(struct _attrlist_buf *ab, void *source, ssize_t count) { + /* + * Use ssize_t for pointer math purposes, + * since a ssize_t is a signed long + */ ssize_t fit; - /* how much room left in the buffer? */ - fit = imin(count, ab->allocated - (ab->fixedcursor - ab->base)); - if (fit > 0) + /* + * Compute the amount of remaining space in the attrlist buffer + * based on how much we've used for fixed width fields vs. the + * start of the attributes. + * + * If we've still got room, then 'fit' will contain the amount of + * remaining space. + * + * Note that this math is safe because, in the event that the + * fixed-width cursor has moved beyond the end of the buffer, + * then, the second input into lmin() below will be negative, and + * we will fail the (fit > 0) check below. + */ + fit = lmin(count, ab->allocated - (ab->fixedcursor - ab->base)); + if (fit > 0) { + /* Copy in as much as we can */ bcopy(source, ab->fixedcursor, fit); + } - /* always move in increments of 4 */ + /* always move in increments of 4, even if we didn't pack an attribute. */ ab->fixedcursor += roundup(count, 4); } + +/* + * Attempt to pack one (or two) variable width attributes into the attrlist + * buffer. If we are trying to pack two variable width attributes, they are treated + * as a single variable-width attribute from the POV of the system call caller. + * + * Recall that a variable-width attribute has two components: the fixed-width + * attribute that tells the caller where to look, and the actual variable width data. + */ static void -attrlist_pack_variable2(struct _attrlist_buf *ab, const void *source, ssize_t count, const void *ext, ssize_t extcount) -{ - struct attrreference ar; +attrlist_pack_variable2(struct _attrlist_buf *ab, const void *source, ssize_t count, + const void *ext, ssize_t extcount) { + + /* Use ssize_t's for pointer math ease */ + struct attrreference ar; ssize_t fit; - /* pack the reference to the variable object */ + /* + * Pack the fixed-width component to the variable object. + * Note that we may be able to pack the fixed width attref, but not + * the variable (if there's no room). + */ ar.attr_dataoffset = ab->varcursor - ab->fixedcursor; ar.attr_length = count + extcount; attrlist_pack_fixed(ab, &ar, sizeof(ar)); - /* calculate space and pack the variable object */ - fit = imin(count, ab->allocated - (ab->varcursor - ab->base)); + /* + * Use an lmin() to do a signed comparison. We use a signed comparison + * to detect the 'out of memory' conditions as described above in the + * fixed width check above. + * + * Then pack the first variable attribute as space allows. Note that we advance + * the variable cursor only if we we had some available space. + */ + fit = lmin(count, ab->allocated - (ab->varcursor - ab->base)); if (fit > 0) { - if (source != NULL) + if (source != NULL) { bcopy(source, ab->varcursor, fit); + } ab->varcursor += fit; } - fit = imin(extcount, ab->allocated - (ab->varcursor - ab->base)); + + /* Compute the available space for the second attribute */ + fit = lmin(extcount, ab->allocated - (ab->varcursor - ab->base)); if (fit > 0) { - if (ext != NULL) + /* Copy in data for the second attribute (if needed) if there is room */ + if (ext != NULL) { bcopy(ext, ab->varcursor, fit); + } ab->varcursor += fit; } /* always move in increments of 4 */ ab->varcursor = (char *)roundup((uintptr_t)ab->varcursor, 4); } + +/* + * Packing a single variable-width attribute is the same as calling the two, but with + * an invalid 2nd attribute. + */ static void attrlist_pack_variable(struct _attrlist_buf *ab, const void *source, ssize_t count) { attrlist_pack_variable2(ab, source, count, NULL, 0); } + +/* + * Attempt to pack a string. This is a special case of a variable width attribute. + * + * If "source" is NULL, then an empty string ("") will be packed. If "source" is + * not NULL, but "count" is zero, then "source" is assumed to be a NUL-terminated + * C-string. If "source" is not NULL and "count" is not zero, then only the first + * "count" bytes of "source" will be copied, and a NUL terminator will be added. + * + * If the attrlist buffer doesn't have enough room to hold the entire string (including + * NUL terminator), then copy as much as will fit. The attrlist buffer's "varcursor" + * will always be updated based on the entire length of the string (including NUL + * terminator); this means "varcursor" may end up pointing beyond the end of the + * allocated buffer space. + */ static void attrlist_pack_string(struct _attrlist_buf *ab, const char *source, ssize_t count) { - struct attrreference ar; + struct attrreference ar; ssize_t fit, space; - /* * Supplied count is character count of string text, excluding trailing nul * which we always supply here. @@ -137,25 +202,49 @@ attrlist_pack_string(struct _attrlist_buf *ab, const char *source, ssize_t count } /* - * Make the reference and pack it. - * Note that this is entirely independent of how much we get into - * the buffer. + * Construct the fixed-width attribute that refers to this string. */ ar.attr_dataoffset = ab->varcursor - ab->fixedcursor; ar.attr_length = count + 1; attrlist_pack_fixed(ab, &ar, sizeof(ar)); - - /* calculate how much of the string text we can copy, and do that */ + + /* + * Now compute how much available memory we have to copy the string text. + * + * space = the number of bytes available in the attribute buffer to hold the + * string's value. + * + * fit = the number of bytes to copy from the start of the string into the + * attribute buffer, NOT including the NUL terminator. If the attribute + * buffer is large enough, this will be the string's length; otherwise, it + * will be equal to "space". + */ space = ab->allocated - (ab->varcursor - ab->base); - fit = imin(count, space); - if (fit > 0) + fit = lmin(count, space); + if (space > 0) { + /* + * If there is space remaining, copy data in, and + * accommodate the trailing NUL terminator. + * + * NOTE: if "space" is too small to hold the string and its NUL + * terminator (space < fit + 1), then the string value in the attribute + * buffer will NOT be NUL terminated! + * + * NOTE 2: bcopy() will do nothing if the length ("fit") is zero. + * Therefore, we don't bother checking for that here. + */ bcopy(source, ab->varcursor, fit); - /* is there room for our trailing nul? */ - if (space > fit) - ab->varcursor[fit] = '\0'; + /* is there room for our trailing nul? */ + if (space > fit) { + ab->varcursor[fit++] = '\0'; + /* 'fit' now the number of bytes AFTER adding in the NUL */ + } + } + /* + * always move in increments of 4 (including the trailing NUL) + */ + ab->varcursor += roundup((count+1), 4); - /* always move in increments of 4 */ - ab->varcursor += roundup(count + 1, 4); } #define ATTR_PACK4(AB, V) \ @@ -866,7 +955,7 @@ getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp, * Note that since we won't ever copy out more than the caller requested, * we never need to allocate more than they offer. */ - ab.allocated = imin(uap->bufferSize, fixedsize + varsize); + ab.allocated = ulmin(uap->bufferSize, fixedsize + varsize); if (ab.allocated > ATTR_MAX_BUFFER) { error = ENOMEM; VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: buffer size too large (%d limit %d)", ab.allocated, ATTR_MAX_BUFFER); @@ -1193,7 +1282,8 @@ out: */ static int -getattrlist_internal(vnode_t vp, struct getattrlist_args *uap, proc_t p, vfs_context_t ctx) +getattrlist_internal(vnode_t vp, struct getattrlist_args *uap, + __unused struct componentname *getattr_name, proc_t p, vfs_context_t ctx) { struct attrlist al; struct vnode_attr va; @@ -1413,10 +1503,12 @@ getattrlist_internal(vnode_t vp, struct getattrlist_args *uap, proc_t p, vfs_con va.va_name[MAXPATHLEN-1] = '\0'; /* Ensure nul-termination */ cnp = va.va_name; cnl = strlen(cnp); - } else { + } + else { + /* Filesystem did not support getting the name */ if (vnode_isvroot(vp)) { if (vp->v_mount->mnt_vfsstat.f_mntonname[1] == 0x00 && - vp->v_mount->mnt_vfsstat.f_mntonname[0] == '/') { + vp->v_mount->mnt_vfsstat.f_mntonname[0] == '/') { /* special case for boot volume. Use root name when it's * available (which is the volume name) or just the mount on * name of "/". we must do this for binary compatibility with @@ -1433,7 +1525,8 @@ getattrlist_internal(vnode_t vp, struct getattrlist_args *uap, proc_t p, vfs_con else { getattrlist_findnamecomp(vp->v_mount->mnt_vfsstat.f_mntonname, &cnp, &cnl); } - } else { + } + else { cnp = vname = vnode_getname(vp); cnl = 0; if (cnp != NULL) { @@ -1471,10 +1564,21 @@ getattrlist_internal(vnode_t vp, struct getattrlist_args *uap, proc_t p, vfs_con * user-space this is OK. */ if ((al.commonattr & ATTR_CMN_EXTENDED_SECURITY) && - VATTR_IS_SUPPORTED(&va, va_acl) && - (va.va_acl != NULL)) - varsize += roundup(KAUTH_FILESEC_SIZE(va.va_acl->acl_entrycount), 4); - + VATTR_IS_SUPPORTED(&va, va_acl) && + (va.va_acl != NULL)) { + + /* + * Since we have a kauth_acl_t (not a kauth_filesec_t), we have to check against + * KAUTH_FILESEC_NOACL ourselves + */ + if (va.va_acl->acl_entrycount == KAUTH_FILESEC_NOACL) { + varsize += roundup((KAUTH_FILESEC_SIZE(0)), 4); + } + else { + varsize += roundup ((KAUTH_FILESEC_SIZE(va.va_acl->acl_entrycount)), 4); + } + } + /* * Allocate a target buffer for attribute results. * @@ -1483,7 +1587,8 @@ getattrlist_internal(vnode_t vp, struct getattrlist_args *uap, proc_t p, vfs_con * don't result in a panic if the caller's buffer is too small.. */ ab.allocated = fixedsize + varsize; - if (ab.allocated > ATTR_MAX_BUFFER) { + /* Cast 'allocated' to an unsigned to verify allocation size */ + if ( ((size_t)ab.allocated) > ATTR_MAX_BUFFER) { error = ENOMEM; VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: buffer size too large (%d limit %d)", ab.allocated, ATTR_MAX_BUFFER); goto out; @@ -2008,7 +2113,8 @@ fgetattrlist(proc_t p, struct fgetattrlist_args *uap, __unused int32_t *retval) ap.bufferSize = uap->bufferSize; ap.options = uap->options; - error = getattrlist_internal(vp, &ap, p, ctx); + /* Default to using the vnode's name. */ + error = getattrlist_internal(vp, &ap, NULL, p, ctx); file_drop(uap->fd); if (vp) @@ -2037,15 +2143,20 @@ getattrlist(proc_t p, struct getattrlist_args *uap, __unused int32_t *retval) nameiflags |= FOLLOW; NDINIT(&nd, LOOKUP, OP_GETATTR, nameiflags, UIO_USERSPACE, uap->path, ctx); - if ((error = namei(&nd)) != 0) - goto out; + if ((error = namei(&nd)) != 0) { + /* vp is still uninitialized */ + return error; + } + vp = nd.ni_vp; + /* Pass along our componentname to getattrlist_internal */ + error = getattrlist_internal(vp, uap, &(nd.ni_cnd), p, ctx); + + /* Retain the namei reference until the getattrlist completes. */ nameidone(&nd); - - error = getattrlist_internal(vp, uap, p, ctx); -out: if (vp) vnode_put(vp); + return error; } @@ -2246,6 +2357,12 @@ setattrlist_internal(vnode_t vp, struct setattrlist_args *uap, proc_t p, vfs_con */ cp = cursor; ATTR_UNPACK(ar); + if (ar.attr_dataoffset < 0) { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: bad offset supplied", ar.attr_dataoffset); + error = EINVAL; + goto out; + } + cp += ar.attr_dataoffset; rfsec = (kauth_filesec_t)cp; if (((((char *)rfsec) + KAUTH_FILESEC_SIZE(0)) > bufend) || /* no space for acl */ @@ -2291,7 +2408,14 @@ setattrlist_internal(vnode_t vp, struct setattrlist_args *uap, proc_t p, vfs_con if (al.volattr & ATTR_VOL_INFO) { if (al.volattr & ATTR_VOL_NAME) { volname = cursor; - ATTR_UNPACK(ar); + ATTR_UNPACK(ar); + /* attr_length cannot be 0! */ + if ((ar.attr_dataoffset < 0) || (ar.attr_length == 0)) { + VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: bad offset supplied (2) ", ar.attr_dataoffset); + error = EINVAL; + goto out; + } + volname += ar.attr_dataoffset; if ((volname + ar.attr_length) > bufend) { error = EINVAL; diff --git a/bsd/vfs/vfs_bio.c b/bsd/vfs/vfs_bio.c index ce7c68e82..934dbceef 100644 --- a/bsd/vfs/vfs_bio.c +++ b/bsd/vfs/vfs_bio.c @@ -138,8 +138,8 @@ static buf_t buf_create_shadow_internal(buf_t bp, boolean_t force_copy, __private_extern__ int bdwrite_internal(buf_t, int); /* zone allocated buffer headers */ -static void bufzoneinit(void) __attribute__((section("__TEXT, initcode"))); -static void bcleanbuf_thread_init(void) __attribute__((section("__TEXT, initcode"))); +static void bufzoneinit(void); +static void bcleanbuf_thread_init(void); static void bcleanbuf_thread(void); static zone_t buf_hdr_zone; @@ -461,9 +461,7 @@ bufattr_rawencrypted(bufattr_t bap) { int bufattr_throttled(bufattr_t bap) { - if ( (bap->ba_flags & BA_THROTTLED_IO) ) - return 1; - return 0; + return (GET_BUFATTR_IO_TIER(bap)); } int @@ -481,16 +479,10 @@ bufattr_meta(bufattr_t bap) { } int -#if !CONFIG_EMBEDDED bufattr_delayidlesleep(bufattr_t bap) -#else /* !CONFIG_EMBEDDED */ -bufattr_delayidlesleep(__unused bufattr_t bap) -#endif /* !CONFIG_EMBEDDED */ { -#if !CONFIG_EMBEDDED if ( (bap->ba_flags & BA_DELAYIDLESLEEP) ) return 1; -#endif /* !CONFIG_EMBEDDED */ return 0; } @@ -511,6 +503,30 @@ buf_static(buf_t bp) { return 0; } +void +bufattr_markgreedymode(bufattr_t bap) { + SET(bap->ba_flags, BA_GREEDY_MODE); +} + +int +bufattr_greedymode(bufattr_t bap) { + if ( (bap->ba_flags & BA_GREEDY_MODE) ) + return 1; + return 0; +} + +void +bufattr_markquickcomplete(bufattr_t bap) { + SET(bap->ba_flags, BA_QUICK_COMPLETE); +} + +int +bufattr_quickcomplete(bufattr_t bap) { + if ( (bap->ba_flags & BA_QUICK_COMPLETE) ) + return 1; + return 0; +} + errno_t buf_error(buf_t bp) { @@ -1196,7 +1212,7 @@ buf_strategy(vnode_t devvp, void *ap) errno_t error; #if CONFIG_DTRACE int dtrace_io_start_flag = 0; /* We only want to trip the io:::start - * probe once, with the true phisical + * probe once, with the true physical * block in place (b_blkno) */ @@ -1314,9 +1330,11 @@ buf_strategy(vnode_t devvp, void *ap) * means that the I/O is properly set * up to be a multiple of the page size, or * we were able to successfully set up the - * phsyical block mapping + * physical block mapping */ - return (VOCALL(devvp->v_op, VOFFSET(vnop_strategy), ap)); + error = VOCALL(devvp->v_op, VOFFSET(vnop_strategy), ap); + DTRACE_FSINFO(strategy, vnode_t, vp); + return (error); } @@ -1983,7 +2001,7 @@ bufinit(void) #if BALANCE_QUEUES { - static void bufq_balance_thread_init(void) __attribute__((section("__TEXT, initcode"))); + static void bufq_balance_thread_init(void); /* create a thread to do dynamic buffer queue balancing */ bufq_balance_thread_init(); } @@ -2082,8 +2100,10 @@ bio_doread(vnode_t vp, daddr64_t blkno, int size, kauth_cred_t cred, int async, trace(TR_BREADMISS, pack(vp, size), blkno); /* Pay for the read. */ - if (p && p->p_stats) + if (p && p->p_stats) { OSIncrementAtomicLong(&p->p_stats->p_ru.ru_inblock); /* XXX */ + OSAddAtomic64(size, &p->p_stats->ri_diskiobytes.ri_bytesread); + } if (async) { /* @@ -2217,9 +2237,11 @@ buf_bwrite(buf_t bp) */ if (wasdelayed) buf_reassign(bp, vp); - else - if (p && p->p_stats) - OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock); /* XXX */ + else + if (p && p->p_stats) { + OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock); /* XXX */ + OSAddAtomic64(buf_count(bp), &p->p_stats->ri_diskiobytes.ri_byteswritten); + } } trace(TR_BUFWRITE, pack(vp, bp->b_bcount), bp->b_lblkno); @@ -2243,8 +2265,10 @@ buf_bwrite(buf_t bp) if (wasdelayed) buf_reassign(bp, vp); else - if (p && p->p_stats) - OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock); /* XXX */ + if (p && p->p_stats) { + OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock); /* XXX */ + OSAddAtomic64(buf_count(bp), &p->p_stats->ri_diskiobytes.ri_byteswritten); + } /* Release the buffer. */ // XXXdbg - only if the unused bit is set @@ -2299,8 +2323,10 @@ bdwrite_internal(buf_t bp, int return_error) */ if (!ISSET(bp->b_flags, B_DELWRI)) { SET(bp->b_flags, B_DELWRI); - if (p && p->p_stats) + if (p && p->p_stats) { OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock); /* XXX */ + OSAddAtomic64(buf_count(bp), &p->p_stats->ri_diskiobytes.ri_byteswritten); + } OSAddAtomicLong(1, &nbdwrite); buf_reassign(bp, vp); } @@ -2489,10 +2515,9 @@ buf_brelse_shadow(buf_t bp) } } lck_mtx_unlock(buf_mtxp); - - if (need_wakeup) { + + if (need_wakeup) wakeup(bp_head); - } #ifdef BUF_MAKE_PRIVATE if (bp == bp_data && data_ref == 0) @@ -2820,6 +2845,7 @@ incore_locked(vnode_t vp, daddr64_t blkno, struct bufhashhdr *dp) return (NULL); } + void buf_wait_for_shadow_io(vnode_t vp, daddr64_t blkno) { @@ -3783,13 +3809,12 @@ buf_biowait(buf_t bp) * (for swap pager, that puts swap buffers on the free lists (!!!), * for the vn device, that puts malloc'd buffers on the free lists!) */ -extern struct timeval priority_IO_timestamp_for_root; -extern int hard_throttle_on_root; void buf_biodone(buf_t bp) { mount_t mp; + struct bufattr *bap; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 387)) | DBG_FUNC_START, bp, bp->b_datap, bp->b_flags, 0, 0); @@ -3801,6 +3826,8 @@ buf_biodone(buf_t bp) fslog_io_error(bp); } + bap = &bp->b_attr; + if (bp->b_vp && bp->b_vp->v_mount) { mp = bp->b_vp->v_mount; } else { @@ -3814,8 +3841,9 @@ buf_biodone(buf_t bp) INCR_PENDING_IO(-(pending_io_t)buf_count(bp), mp->mnt_pending_read_size); } - if (kdebug_enable) { - int code = DKIO_DONE; + if (kdebug_enable) { + int code = DKIO_DONE; + int io_tier = GET_BUFATTR_IO_TIER(bap); if (bp->b_flags & B_READ) code |= DKIO_READ; @@ -3827,24 +3855,20 @@ buf_biodone(buf_t bp) else if (bp->b_flags & B_PAGEIO) code |= DKIO_PAGING; - if (bp->b_flags & B_THROTTLED_IO) + if (io_tier != 0) code |= DKIO_THROTTLE; - else if (bp->b_flags & B_PASSIVE) + + code |= ((io_tier << DKIO_TIER_SHIFT) & DKIO_TIER_MASK); + + if (bp->b_flags & B_PASSIVE) code |= DKIO_PASSIVE; - if (bp->b_attr.ba_flags & BA_NOCACHE) + if (bap->ba_flags & BA_NOCACHE) code |= DKIO_NOCACHE; KERNEL_DEBUG_CONSTANT_IST(KDEBUG_COMMON, FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE, - bp, (uintptr_t)bp->b_vp, - bp->b_resid, bp->b_error, 0); + buf_kernel_addrperm_addr(bp), (uintptr_t)VM_KERNEL_ADDRPERM(bp->b_vp), bp->b_resid, bp->b_error, 0); } - if ((bp->b_vp != NULLVP) && - ((bp->b_flags & (B_THROTTLED_IO | B_PASSIVE | B_IOSTREAMING | B_PAGEIO | B_READ | B_THROTTLED_IO | B_PASSIVE)) == (B_PAGEIO | B_READ)) && - (bp->b_vp->v_mount->mnt_kern_flag & MNTK_ROOTDEV)) { - microuptime(&priority_IO_timestamp_for_root); - hard_throttle_on_root = 0; - } /* * I/O was done, so don't believe @@ -3852,13 +3876,11 @@ buf_biodone(buf_t bp) * and we need to reset the THROTTLED/PASSIVE * indicators */ - CLR(bp->b_flags, (B_WASDIRTY | B_THROTTLED_IO | B_PASSIVE)); - CLR(bp->b_attr.ba_flags, (BA_META | BA_NOCACHE)); -#if !CONFIG_EMBEDDED - CLR(bp->b_attr.ba_flags, (BA_THROTTLED_IO | BA_DELAYIDLESLEEP)); -#else - CLR(bp->b_attr.ba_flags, BA_THROTTLED_IO); -#endif /* !CONFIG_EMBEDDED */ + CLR(bp->b_flags, (B_WASDIRTY | B_PASSIVE)); + CLR(bap->ba_flags, (BA_META | BA_NOCACHE | BA_DELAYIDLESLEEP)); + + SET_BUFATTR_IO_TIER(bap, 0); + DTRACE_IO1(done, buf_t, bp); if (!ISSET(bp->b_flags, B_READ) && !ISSET(bp->b_flags, B_RAW)) @@ -3934,6 +3956,18 @@ biodone_done: (uintptr_t)bp, (uintptr_t)bp->b_datap, bp->b_flags, 0, 0); } +/* + * Obfuscate buf pointers. + */ +vm_offset_t +buf_kernel_addrperm_addr(void * addr) +{ + if ((vm_offset_t)addr == 0) + return 0; + else + return ((vm_offset_t)addr + buf_kernel_addrperm); +} + /* * Return a count of buffers on the "locked" queue. */ diff --git a/bsd/vfs/vfs_cache.c b/bsd/vfs/vfs_cache.c index 73fbb3afb..65cb02d22 100644 --- a/bsd/vfs/vfs_cache.c +++ b/bsd/vfs/vfs_cache.c @@ -75,6 +75,7 @@ #include #include #include +#include #include #include #include @@ -159,7 +160,7 @@ lck_mtx_t strcache_mtx_locks[NUM_STRCACHE_LOCKS]; static vnode_t cache_lookup_locked(vnode_t dvp, struct componentname *cnp); static const char *add_name_internal(const char *, uint32_t, u_int, boolean_t, u_int); -static void init_string_table(void) __attribute__((section("__TEXT, initcode"))); +static void init_string_table(void); static void cache_delete(struct namecache *, int); static void cache_enter_locked(vnode_t dvp, vnode_t vp, struct componentname *cnp, const char *strname); @@ -170,7 +171,7 @@ static void cache_enter_locked(vnode_t dvp, vnode_t vp, struct componentname *cn void dump_string_table(void); #endif /* DUMP_STRING_TABLE */ -static void init_crc32(void) __attribute__((section("__TEXT, initcode"))); +static void init_crc32(void); static unsigned int crc32tab[256]; @@ -197,6 +198,10 @@ static unsigned int crc32tab[256]; * one of the parents moved while we were building the path. The * caller can special handle this case by calling build_path again. * + * If BUILDPATH_VOLUME_RELATIVE is set in flags, we return path + * that is relative to the nearest mount point, i.e. do not + * cross over mount points during building the path. + * * passed in vp must have a valid io_count reference */ int @@ -263,7 +268,17 @@ again: goto out_unlock; } else { - vp = vp->v_mount->mnt_vnodecovered; + /* + * This the root of the volume and the caller does not + * want to cross mount points. Therefore just return + * '/' as the relative path. + */ + if (flags & BUILDPATH_VOLUME_RELATIVE) { + *--end = '/'; + goto out_unlock; + } else { + vp = vp->v_mount->mnt_vnodecovered; + } } } @@ -443,6 +458,7 @@ bad_news: if (vp && !vnode_isdir(vp) && vp->v_parent) vp = vp->v_parent; } + /* * When a mount point is crossed switch the vp. * Continue until we find the root or we find @@ -457,7 +473,13 @@ bad_news: if (!(tvp->v_flag & VROOT) || !tvp->v_mount) break; /* not the root of a mounted FS */ - tvp = tvp->v_mount->mnt_vnodecovered; + + if (flags & BUILDPATH_VOLUME_RELATIVE) { + /* Do not cross over mount points */ + tvp = NULL; + } else { + tvp = tvp->v_mount->mnt_vnodecovered; + } } if (tvp == NULLVP) goto out_unlock; @@ -561,6 +583,51 @@ vnode_putname(const char *name) vfs_removename(name); } +static const char unknown_vnodename[] = "(unknown vnode name)"; + +const char * +vnode_getname_printable(vnode_t vp) +{ + const char *name = vnode_getname(vp); + if (name != NULL) + return name; + + switch (vp->v_type) { + case VCHR: + case VBLK: + { + /* + * Create an artificial dev name from + * major and minor device number + */ + char dev_name[64]; + (void) snprintf(dev_name, sizeof(dev_name), + "%c(%u, %u)", VCHR == vp->v_type ? 'c':'b', + major(vp->v_rdev), minor(vp->v_rdev)); + /* + * Add the newly created dev name to the name + * cache to allow easier cleanup. Also, + * vfs_addname allocates memory for the new name + * and returns it. + */ + NAME_CACHE_LOCK_SHARED(); + name = vfs_addname(dev_name, strlen(dev_name), 0, 0); + NAME_CACHE_UNLOCK(); + return name; + } + default: + return unknown_vnodename; + } +} + +void +vnode_putname_printable(const char *name) +{ + if (name == unknown_vnodename) + return; + vnode_putname(name); +} + /* * if VNODE_UPDATE_PARENT, and we can take @@ -1001,7 +1068,7 @@ cache_lookup_path(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, microuptime(&tv); } for (;;) { - /* + /* * Search a directory. * * The cn_hash value is for use by cache_lookup @@ -1140,6 +1207,14 @@ skiprsrcfork: } } + if ((cnp->cn_flags & CN_SKIPNAMECACHE)) { + /* + * Force lookup to go to the filesystem with + * all cnp fields set up. + */ + break; + } + /* * "." and ".." aren't supposed to be cached, so check * for them before checking the cache. @@ -1241,7 +1316,7 @@ need_dp: * immediately w/o waiting... it always succeeds */ vnode_get(dp); - } else if ( (vnode_getwithvid_drainok(dp, vid)) ) { + } else if ((error = vnode_getwithvid_drainok(dp, vid))) { /* * failure indicates the vnode * changed identity or is being @@ -1251,9 +1326,18 @@ need_dp: * don't necessarily return ENOENT, though, because * we really want to go back to disk and make sure it's * there or not if someone else is changing this - * vnode. + * vnode. That being said, the one case where we do want + * to return ENOENT is when the vnode's mount point is + * in the process of unmounting and we might cause a deadlock + * in our attempt to take an iocount. An ENODEV error return + * is from vnode_get* is an indication this but we change that + * ENOENT for upper layers. */ - error = ERECYCLE; + if (error == ENODEV) { + error = ENOENT; + } else { + error = ERECYCLE; + } goto errorout; } } @@ -1345,11 +1429,12 @@ cache_lookup_locked(vnode_t dvp, struct componentname *cnp) } +unsigned int hash_string(const char *cp, int len); // // Have to take a len argument because we may only need to // hash part of a componentname. // -static unsigned int +unsigned int hash_string(const char *cp, int len) { unsigned hash = 0; diff --git a/bsd/vfs/vfs_cluster.c b/bsd/vfs/vfs_cluster.c index 69dfdfda3..2f662c53c 100644 --- a/bsd/vfs/vfs_cluster.c +++ b/bsd/vfs/vfs_cluster.c @@ -162,8 +162,8 @@ static int cluster_io_type(struct uio *uio, int *io_type, u_int32_t *io_length, static int cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int non_rounded_size, int flags, buf_t real_bp, struct clios *iostate, int (*)(buf_t, void *), void *callback_arg); static int cluster_iodone(buf_t bp, void *callback_arg); -static int cluster_ioerror(upl_t upl, int upl_offset, int abort_size, int error, int io_flags); -static int cluster_hard_throttle_on(vnode_t vp, uint32_t); +static int cluster_ioerror(upl_t upl, int upl_offset, int abort_size, int error, int io_flags, vnode_t vp); +static int cluster_is_throttled(vnode_t vp); static void cluster_iostate_wait(struct clios *iostate, u_int target, const char *wait_name); @@ -242,17 +242,10 @@ int (*bootcache_contains_block)(dev_t device, u_int64_t blkno) = NULL; #define WRITE_BEHIND 1 #define WRITE_BEHIND_SSD 1 -#if CONFIG_EMBEDDED -#define PREFETCH 1 -#define PREFETCH_SSD 1 -uint32_t speculative_prefetch_max = 512; /* maximum number of pages to use for a specluative read-ahead */ -uint32_t speculative_prefetch_max_iosize = (512 * 1024); /* maximum I/O size to use for a specluative read-ahead */ -#else #define PREFETCH 3 #define PREFETCH_SSD 1 uint32_t speculative_prefetch_max = (MAX_UPL_SIZE * 3); uint32_t speculative_prefetch_max_iosize = (512 * 1024); /* maximum I/O size to use for a specluative read-ahead on SSDs*/ -#endif #define IO_SCALE(vp, base) (vp->v_mount->mnt_ioscale * (base)) @@ -267,25 +260,13 @@ int speculative_reads_disabled = 0; * can be outstanding on a single vnode * before we issue a synchronous write */ -#define HARD_THROTTLE_MAXCNT 0 -#define HARD_THROTTLE_MAX_IOSIZE (128 * 1024) -#define LEGACY_HARD_THROTTLE_MAX_IOSIZE (512 * 1024) - -extern int32_t throttle_legacy_process_count; -int hard_throttle_on_root = 0; -uint32_t hard_throttle_max_iosize = HARD_THROTTLE_MAX_IOSIZE; -uint32_t legacy_hard_throttle_max_iosize = LEGACY_HARD_THROTTLE_MAX_IOSIZE; -struct timeval priority_IO_timestamp_for_root; - -#if CONFIG_EMBEDDED -#define THROTTLE_MAX_IOSIZE (hard_throttle_max_iosize) -#else -#define THROTTLE_MAX_IOSIZE (throttle_legacy_process_count == 0 ? hard_throttle_max_iosize : legacy_hard_throttle_max_iosize) -#endif +#define THROTTLE_MAXCNT 0 + +uint32_t throttle_max_iosize = (128 * 1024); +#define THROTTLE_MAX_IOSIZE (throttle_max_iosize) -SYSCTL_INT(_debug, OID_AUTO, lowpri_throttle_max_iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &hard_throttle_max_iosize, 0, ""); -SYSCTL_INT(_debug, OID_AUTO, lowpri_legacy_throttle_max_iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &legacy_hard_throttle_max_iosize, 0, ""); +SYSCTL_INT(_debug, OID_AUTO, lowpri_throttle_max_iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &throttle_max_iosize, 0, ""); void @@ -499,27 +480,9 @@ cluster_io_present_in_BC(vnode_t vp, off_t f_offset) static int -cluster_hard_throttle_on(vnode_t vp, uint32_t hard_throttle) +cluster_is_throttled(vnode_t vp) { - int throttle_type = 0; - - if ( (throttle_type = throttle_io_will_be_throttled(-1, vp->v_mount)) ) - return(throttle_type); - - if (hard_throttle && (vp->v_mount->mnt_kern_flag & MNTK_ROOTDEV)) { - static struct timeval hard_throttle_maxelapsed = { 0, 100000 }; - struct timeval elapsed; - - if (hard_throttle_on_root) - return(1); - - microuptime(&elapsed); - timevalsub(&elapsed, &priority_IO_timestamp_for_root); - - if (timevalcmp(&elapsed, &hard_throttle_maxelapsed, <)) - return(1); - } - return(0); + return (throttle_io_will_be_throttled(-1, vp->v_mount)); } @@ -545,7 +508,7 @@ cluster_iostate_wait(struct clios *iostate, u_int target, const char *wait_name) static int -cluster_ioerror(upl_t upl, int upl_offset, int abort_size, int error, int io_flags) +cluster_ioerror(upl_t upl, int upl_offset, int abort_size, int error, int io_flags, vnode_t vp) { int upl_abort_code = 0; int page_in = 0; @@ -568,7 +531,7 @@ cluster_ioerror(upl_t upl, int upl_offset, int abort_size, int error, int io_fla * leave pages in the cache unchanged on error */ upl_abort_code = UPL_ABORT_FREE_ON_EMPTY; - else if (page_out && (error != ENXIO)) + else if (page_out && ((error != ENXIO) || vnode_isswap(vp))) /* * transient error... leave pages unchanged */ @@ -602,6 +565,7 @@ cluster_iodone(buf_t bp, void *callback_arg) buf_t cbp_head; buf_t cbp_next; buf_t real_bp; + vnode_t vp; struct clios *iostate; boolean_t transaction_complete = FALSE; @@ -657,6 +621,7 @@ cluster_iodone(buf_t bp, void *callback_arg) total_resid = 0; cbp = cbp_head; + vp = cbp->b_vp; upl_offset = cbp->b_uploffset; upl = cbp->b_upl; b_flags = cbp->b_flags; @@ -741,7 +706,7 @@ cluster_iodone(buf_t bp, void *callback_arg) commit_size = (pg_offset + transaction_size + (PAGE_SIZE - 1)) & ~PAGE_MASK; if (error) - upl_flags = cluster_ioerror(upl, upl_offset - pg_offset, commit_size, error, b_flags); + upl_flags = cluster_ioerror(upl, upl_offset - pg_offset, commit_size, error, b_flags, vp); else { upl_flags = UPL_COMMIT_FREE_ON_EMPTY; @@ -771,9 +736,9 @@ cluster_iodone(buf_t bp, void *callback_arg) uint32_t -cluster_hard_throttle_limit(vnode_t vp, uint32_t *limit, uint32_t hard_throttle) +cluster_throttle_io_limit(vnode_t vp, uint32_t *limit) { - if (cluster_hard_throttle_on(vp, hard_throttle)) { + if (cluster_is_throttled(vp)) { *limit = THROTTLE_MAX_IOSIZE; return 1; } @@ -879,6 +844,7 @@ cluster_complete_transaction(buf_t *cbp_head, void *callback_arg, int *retval, i { buf_t cbp; int error; + boolean_t isswapout = FALSE; /* * cluster_complete_transaction will @@ -896,12 +862,18 @@ cluster_complete_transaction(buf_t *cbp_head, void *callback_arg, int *retval, i */ for (cbp = *cbp_head; cbp; cbp = cbp->b_trans_next) cbp->b_flags |= B_TDONE; + cbp = *cbp_head; + + if ((flags & (CL_ASYNC | CL_PAGEOUT)) == CL_PAGEOUT && vnode_isswap(cbp->b_vp)) + isswapout = TRUE; - error = cluster_iodone(*cbp_head, callback_arg); + error = cluster_iodone(cbp, callback_arg); if ( !(flags & CL_ASYNC) && error && *retval == 0) { - if (((flags & (CL_PAGEOUT | CL_KEEPCACHED)) != CL_PAGEOUT) || (error != ENXIO)) - *retval = error; + if (((flags & (CL_PAGEOUT | CL_KEEPCACHED)) != CL_PAGEOUT) || (error != ENXIO)) + *retval = error; + else if (isswapout == TRUE) + *retval = error; } *cbp_head = (buf_t)NULL; } @@ -1014,10 +986,10 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no max_iosize = PAGE_SIZE; if (flags & CL_THROTTLE) { - if ( !(flags & CL_PAGEOUT) && cluster_hard_throttle_on(vp, 1)) { + if ( !(flags & CL_PAGEOUT) && cluster_is_throttled(vp)) { if (max_iosize > THROTTLE_MAX_IOSIZE) max_iosize = THROTTLE_MAX_IOSIZE; - async_throttle = HARD_THROTTLE_MAXCNT; + async_throttle = THROTTLE_MAXCNT; } else { if ( (flags & CL_DEV_MEMORY) ) async_throttle = IO_SCALE(vp, VNODE_ASYNC_THROTTLE); @@ -1639,7 +1611,7 @@ cluster_io(vnode_t vp, upl_t upl, vm_offset_t upl_offset, off_t f_offset, int no pg_offset = upl_offset & PAGE_MASK; abort_size = (upl_end_offset - upl_offset + PAGE_MASK) & ~PAGE_MASK; - upl_flags = cluster_ioerror(upl, upl_offset - pg_offset, abort_size, error, io_flags); + upl_flags = cluster_ioerror(upl, upl_offset - pg_offset, abort_size, error, io_flags, vp); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 28)) | DBG_FUNC_NONE, upl, upl_offset - pg_offset, abort_size, (error << 24) | upl_flags, 0); @@ -2215,13 +2187,13 @@ next_dwrite: while (io_req_size >= PAGE_SIZE && uio->uio_offset < newEOF && retval == 0) { int throttle_type; - if ( (throttle_type = cluster_hard_throttle_on(vp, 1)) ) { + if ( (throttle_type = cluster_is_throttled(vp)) ) { /* * we're in the throttle window, at the very least * we want to limit the size of the I/O we're about * to issue */ - if ( (flags & IO_RETURN_ON_THROTTLE) && throttle_type == 2) { + if ( (flags & IO_RETURN_ON_THROTTLE) && throttle_type == THROTTLE_NOW) { /* * we're in the throttle window and at least 1 I/O * has already been issued by a throttleable thread @@ -3427,7 +3399,13 @@ cluster_read_ext(vnode_t vp, struct uio *uio, off_t filesize, int xflags, int (* * the first vector in the uio request */ if (((flags & IO_NOCACHE) || (flags & IO_ENCRYPTED)) && UIO_SEG_IS_USER_SPACE(uio->uio_segflg)) { - retval = cluster_io_type(uio, &read_type, &read_length, 0); + + boolean_t check_io_type = TRUE; + + + if (check_io_type) { + retval = cluster_io_type(uio, &read_type, &read_length, 0); + } } while ((cur_resid = uio_resid(uio)) && uio->uio_offset < filesize && retval == 0) { @@ -3523,9 +3501,9 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file panic ("encrypted blocks will hit UBC!"); } - policy = proc_get_task_selfdiskacc(); + policy = throttle_get_io_policy(NULL); - if (policy == IOPOL_THROTTLE || policy == IOPOL_UTILITY || (flags & IO_NOCACHE)) + if (policy == THROTTLE_LEVEL_TIER3 || policy == THROTTLE_LEVEL_TIER2 || (flags & IO_NOCACHE)) take_reference = 0; if (flags & IO_PASSIVE) @@ -3549,7 +3527,7 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file rd_ahead_enabled = 0; rap = NULL; } else { - if (cluster_hard_throttle_on(vp, 1)) { + if (cluster_is_throttled(vp)) { /* * we're in the throttle window, at the very least * we want to limit the size of the I/O we're about @@ -3684,7 +3662,7 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file iostate.io_wanted = 0; if ( (flags & IO_RETURN_ON_THROTTLE) ) { - if (cluster_hard_throttle_on(vp, 0) == 2) { + if (cluster_is_throttled(vp) == THROTTLE_NOW) { if ( !cluster_io_present_in_BC(vp, uio->uio_offset)) { /* * we're in the throttle window and at least 1 I/O @@ -3953,7 +3931,7 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file retval = error; if (io_req_size) { - if (cluster_hard_throttle_on(vp, 1)) { + if (cluster_is_throttled(vp)) { /* * we're in the throttle window, at the very least * we want to limit the size of the I/O we're about @@ -3967,7 +3945,7 @@ cluster_read_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t file /* * coming out of throttled state */ - if (policy != IOPOL_THROTTLE && policy != IOPOL_UTILITY) { + if (policy != THROTTLE_LEVEL_TIER3 && policy != THROTTLE_LEVEL_TIER2) { if (rap != NULL) rd_ahead_enabled = 1; prefetch_enabled = 1; @@ -4143,7 +4121,7 @@ next_dread: while (io_req_size && retval == 0) { u_int32_t io_start; - if (cluster_hard_throttle_on(vp, 1)) { + if (cluster_is_throttled(vp)) { /* * we're in the throttle window, at the very least * we want to limit the size of the I/O we're about @@ -4295,7 +4273,7 @@ next_dread: } } if ( (flags & IO_RETURN_ON_THROTTLE) ) { - if (cluster_hard_throttle_on(vp, 0) == 2) { + if (cluster_is_throttled(vp) == THROTTLE_NOW) { if ( !cluster_io_present_in_BC(vp, uio->uio_offset)) { /* * we're in the throttle window and at least 1 I/O @@ -4846,15 +4824,10 @@ advisory_read_ext(vnode_t vp, off_t filesize, off_t f_offset, int resid, int (*c max_io_size = cluster_max_io_size(vp->v_mount, CL_READ); -#if CONFIG_EMBEDDED - if (max_io_size > speculative_prefetch_max_iosize) - max_io_size = speculative_prefetch_max_iosize; -#else if ((vp->v_mount->mnt_kern_flag & MNTK_SSD) && !ignore_is_ssd) { if (max_io_size > speculative_prefetch_max_iosize) max_io_size = speculative_prefetch_max_iosize; } -#endif KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 60)) | DBG_FUNC_START, (int)f_offset, resid, (int)filesize, 0, 0); @@ -5081,7 +5054,7 @@ cluster_push_ext(vnode_t vp, int flags, int (*callback)(buf_t, void *), void *ca lck_mtx_unlock(&wbp->cl_lockw); - sparse_cluster_push(&scmap, vp, ubc_getsize(vp), PUSH_ALL, flags | IO_PASSIVE, callback, callback_arg); + sparse_cluster_push(&scmap, vp, ubc_getsize(vp), PUSH_ALL, flags, callback, callback_arg); lck_mtx_lock(&wbp->cl_lockw); @@ -5090,11 +5063,11 @@ cluster_push_ext(vnode_t vp, int flags, int (*callback)(buf_t, void *), void *ca if (wbp->cl_sparse_wait && wbp->cl_sparse_pushes == 0) wakeup((caddr_t)&wbp->cl_sparse_pushes); } else { - sparse_cluster_push(&(wbp->cl_scmap), vp, ubc_getsize(vp), PUSH_ALL, flags | IO_PASSIVE, callback, callback_arg); + sparse_cluster_push(&(wbp->cl_scmap), vp, ubc_getsize(vp), PUSH_ALL, flags, callback, callback_arg); } retval = 1; } else { - retval = cluster_try_push(wbp, vp, ubc_getsize(vp), PUSH_ALL, flags | IO_PASSIVE, callback, callback_arg); + retval = cluster_try_push(wbp, vp, ubc_getsize(vp), PUSH_ALL, flags, callback, callback_arg); } lck_mtx_unlock(&wbp->cl_lockw); diff --git a/bsd/vfs/vfs_conf.c b/bsd/vfs/vfs_conf.c index a64040dd9..d802374b6 100644 --- a/bsd/vfs/vfs_conf.c +++ b/bsd/vfs/vfs_conf.c @@ -72,13 +72,6 @@ #include #include -#if CONFIG_VFS_FUNNEL -#define VFS_THREAD_SAFE_FLAG VFC_VFSTHREADSAFE /* Only defined under CONFIG_VFS_FUNNEL */ -#else -#define VFS_THREAD_SAFE_FLAG 0 -#endif /* CONFIG_VFS_FUNNEL */ - - /* * These define the root filesystem, device, and root filesystem type. */ @@ -104,6 +97,11 @@ extern struct vfsops afs_vfsops; extern struct vfsops null_vfsops; extern struct vfsops devfs_vfsops; +#if MOCKFS +extern struct vfsops mockfs_vfsops; +extern int mockfs_mountroot(mount_t, vnode_t, vfs_context_t); +#endif /* MOCKFS */ + /* * For nfs_mountroot(void) cast. nfs_mountroot ignores its parameters, if * invoked through this table. @@ -116,7 +114,7 @@ typedef int (*mountroot_t)(mount_t, vnode_t, vfs_context_t); static struct vfstable vfstbllist[] = { /* HFS/HFS+ Filesystem */ #if HFS - { &hfs_vfsops, "hfs", 17, 0, (MNT_LOCAL | MNT_DOVOLFS), hfs_mountroot, NULL, 0, 0, VFC_VFSLOCALARGS | VFC_VFSREADDIR_EXTENDED | VFS_THREAD_SAFE_FLAG | VFC_VFS64BITREADY | VFC_VFSVNOP_PAGEOUTV2 | VFC_VFSVNOP_PAGEINV2, NULL, 0}, + { &hfs_vfsops, "hfs", 17, 0, (MNT_LOCAL | MNT_DOVOLFS), hfs_mountroot, NULL, 0, 0, VFC_VFSLOCALARGS | VFC_VFSREADDIR_EXTENDED | VFC_VFS64BITREADY | VFC_VFSVNOP_PAGEOUTV2 | VFC_VFSVNOP_PAGEINV2, NULL, 0}, #endif /* Memory-based Filesystem */ @@ -129,7 +127,7 @@ static struct vfstable vfstbllist[] = { /* Sun-compatible Network Filesystem */ #if NFSCLIENT - { &nfs_vfsops, "nfs", 2, 0, 0, NULL, NULL, 0, 0, VFC_VFSGENERICARGS | VFC_VFSPREFLIGHT | VFS_THREAD_SAFE_FLAG | VFC_VFS64BITREADY | VFC_VFSREADDIR_EXTENDED, NULL, 0}, + { &nfs_vfsops, "nfs", 2, 0, 0, NULL, NULL, 0, 0, VFC_VFSGENERICARGS | VFC_VFSPREFLIGHT | VFC_VFS64BITREADY | VFC_VFSREADDIR_EXTENDED, NULL, 0}, #endif /* Andrew Filesystem */ @@ -142,15 +140,20 @@ static struct vfstable vfstbllist[] = { /* Device Filesystem */ #if DEVFS #if CONFIG_MACF - { &devfs_vfsops, "devfs", 19, 0, (MNT_DONTBROWSE | MNT_MULTILABEL), NULL, NULL, 0, 0, VFC_VFSGENERICARGS | VFS_THREAD_SAFE_FLAG | VFC_VFS64BITREADY, NULL, 0}, + { &devfs_vfsops, "devfs", 19, 0, (MNT_DONTBROWSE | MNT_MULTILABEL), NULL, NULL, 0, 0, VFC_VFSGENERICARGS | VFC_VFS64BITREADY, NULL, 0}, #else - { &devfs_vfsops, "devfs", 19, 0, MNT_DONTBROWSE, NULL, NULL, 0, 0, VFC_VFSGENERICARGS | VFS_THREAD_SAFE_FLAG | VFC_VFS64BITREADY, NULL, 0}, + { &devfs_vfsops, "devfs", 19, 0, MNT_DONTBROWSE, NULL, NULL, 0, 0, VFC_VFSGENERICARGS | VFC_VFS64BITREADY, NULL, 0}, #endif /* MAC */ #endif #ifndef __LP64__ #endif /* __LP64__ */ +#if MOCKFS + /* If we are configured for it, mockfs should always be the last standard entry (and thus the last FS we attempt mountroot with) */ + { &mockfs_vfsops, "mockfs", 0x6D6F636B, 0, MNT_LOCAL, mockfs_mountroot, NULL, 0, 0, VFC_VFSGENERICARGS, NULL, 0}, +#endif /* MOCKFS */ + {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, {NULL, "", 0, 0, 0, NULL, NULL, 0, 0, 0, NULL, 0}, @@ -198,7 +201,9 @@ extern struct vnodeopv_desc spec_nfsv4nodeop_opv_desc; extern struct vnodeopv_desc fifo_nfsv4nodeop_opv_desc; extern struct vnodeopv_desc null_vnodeop_opv_desc; extern struct vnodeopv_desc hfs_vnodeop_opv_desc; +#if CONFIG_HFS_STD extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc; +#endif extern struct vnodeopv_desc hfs_specop_opv_desc; extern struct vnodeopv_desc hfs_fifoop_opv_desc; extern struct vnodeopv_desc devfs_vnodeop_opv_desc; @@ -208,6 +213,10 @@ extern struct vnodeopv_desc devfs_devfd_vnodeop_opv_desc; extern struct vnodeopv_desc devfs_fdesc_vnodeop_opv_desc; #endif /* FDESC */ +#if MOCKFS +extern struct vnodeopv_desc mockfs_vnodeop_opv_desc; +#endif /* MOCKFS */ + struct vnodeopv_desc *vfs_opv_descs[] = { &dead_vnodeop_opv_desc, #if FIFO && SOCKETS @@ -229,7 +238,9 @@ struct vnodeopv_desc *vfs_opv_descs[] = { #endif #if HFS &hfs_vnodeop_opv_desc, +#if CONFIG_HFS_STD &hfs_std_vnodeop_opv_desc, +#endif &hfs_specop_opv_desc, #if FIFO &hfs_fifoop_opv_desc, @@ -243,5 +254,8 @@ struct vnodeopv_desc *vfs_opv_descs[] = { &devfs_fdesc_vnodeop_opv_desc, #endif /* FDESC */ #endif /* DEVFS */ +#if MOCKFS + &mockfs_vnodeop_opv_desc, +#endif /* MOCKFS */ NULL }; diff --git a/bsd/vfs/vfs_fsevents.c b/bsd/vfs/vfs_fsevents.c index 887ee2888..5c816aad5 100644 --- a/bsd/vfs/vfs_fsevents.c +++ b/bsd/vfs/vfs_fsevents.c @@ -165,30 +165,6 @@ __private_extern__ void qsort( size_t member_size, int (*)(const void *, const void *)); - - -/* From kdp_udp.c + user mode Libc - this ought to be in a library */ -static char * -strnstr(char *s, const char *find, size_t slen) -{ - char c, sc; - size_t len; - - if ((c = *find++) != '\0') { - len = strlen(find); - do { - do { - if ((sc = *s++) == '\0' || slen-- < 1) - return (NULL); - } while (sc != c); - if (len > slen) - return (NULL); - } while (strncmp(s, find, len) != 0); - s--; - } - return (s); -} - static int is_ignored_directory(const char *path) { @@ -1890,7 +1866,7 @@ fseventsf_drain(struct fileproc *fp, __unused vfs_context_t ctx) static int fseventsopen(__unused dev_t dev, __unused int flag, __unused int mode, __unused struct proc *p) { - if (!is_suser()) { + if (!kauth_cred_issuser(kauth_cred_get())) { return EPERM; } @@ -2083,7 +2059,8 @@ fseventswrite(__unused dev_t dev, struct uio *uio, __unused int ioflag) } -static struct fileops fsevents_fops = { +static const struct fileops fsevents_fops = { + DTYPE_FSEVENTS, fseventsf_read, fseventsf_write, fseventsf_ioctl, @@ -2200,7 +2177,6 @@ fseventsioctl(__unused dev_t dev, u_long cmd, caddr_t data, __unused int flag, s } proc_fdlock(p); f->f_fglob->fg_flag = FREAD | FWRITE; - f->f_fglob->fg_type = DTYPE_FSEVENTS; f->f_fglob->fg_ops = &fsevents_fops; f->f_fglob->fg_data = (caddr_t) fseh; proc_fdunlock(p); diff --git a/bsd/vfs/vfs_fslog.c b/bsd/vfs/vfs_fslog.c index 580ea60b4..dfc64b7a2 100644 --- a/bsd/vfs/vfs_fslog.c +++ b/bsd/vfs/vfs_fslog.c @@ -26,7 +26,6 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#include #include #include #include @@ -35,158 +34,75 @@ #include #include #include -#include /* for vaddlog() */ +#include #include -#include - #include #include +#include -#include - -/* String to append as format modifier for each key-value pair */ -#define FSLOG_KEYVAL_FMT "[%s %s] " -#define FSLOG_KEYVAL_FMT_LEN (sizeof(FSLOG_KEYVAL_FMT) - 1) - -#define FSLOG_NEWLINE_CHAR "\n" -#define FSLOG_NEWLINE_CHAR_LEN (sizeof(FSLOG_NEWLINE_CHAR) - 1) +#include -/* Length of entire ASL message in 10 characters. Kernel defaults to zero */ -#define FSLOG_ASL_MSG_LEN " 0" +#include -/* Length of default format string to be used by printf */ -#define MAX_FMT_LEN 256 +#include -/* Internal function to print input values as key-value pairs in format - * identifiable by Apple system log (ASL) facility. All key-value pairs - * are assumed to be pointer to strings and are provided using two ways - - * (a) va_list argument which is a list of varying number of arguments - * created by the caller of this function. - * (b) variable number of arguments passed to this function. - * - * Parameters - - * level - Priority level for this ASL message - * facility - Facility for this ASL message. - * num_pairs - Number of key-value pairs provided by vargs argument. - * vargs - List of key-value pairs. - * ... - Additional key-value pairs (apart from vargs) as variable - * argument list. A NULL value indicates the end of the - * variable argument list. - * - * Returns - - * zero - On success, when it prints all key-values pairs provided. - * E2BIG - When it cannot print all key-value pairs provided and had - * to truncate the output. +/* Log information about external modification of a process, + * using MessageTracer formatting. Assumes that both the caller + * and target are appropriately locked. + * Currently prints following information - + * 1. Caller process name (truncated to 16 characters) + * 2. Caller process Mach-O UUID + * 3. Target process name (truncated to 16 characters) + * 4. Target process Mach-O UUID */ -static int fslog_asl_msg(int level, const char *facility, int num_pairs, va_list vargs, ...) +void +fslog_extmod_msgtracer(proc_t caller, proc_t target) { - int err = 0; - char fmt[MAX_FMT_LEN]; /* Format string to use with vaddlog */ - int calc_pairs = 0; - size_t len; - int i; - va_list ap; - char *ptr; - - /* Mask extra bits, if any, from priority level */ - level = LOG_PRI(level); - - /* Create the first part of format string consisting of ASL - * message length, level, and facility. - */ - if (facility) { - snprintf(fmt, MAX_FMT_LEN, "%s [%s %d] [%s %d] [%s %s] ", - FSLOG_ASL_MSG_LEN, - FSLOG_KEY_LEVEL, level, - FSLOG_KEY_READ_UID, FSLOG_VAL_READ_UID, - FSLOG_KEY_FACILITY, facility); - } else { - snprintf(fmt, MAX_FMT_LEN, "%s [%s %d] [%s %d] ", - FSLOG_ASL_MSG_LEN, - FSLOG_KEY_LEVEL, level, - FSLOG_KEY_READ_UID, FSLOG_VAL_READ_UID); - } - - /* Determine the number of key-value format string [%s %s] that - * should be added in format string for every key-value pair provided - * in va_list. Calculate maximum number of format string that can be - * accommodated in the remaining format buffer (after saving space - * for newline character). If the caller provided pairs in va_list - * is more than calculated pairs, truncate extra pairs. - */ - len = MAX_FMT_LEN - strlen(fmt) - FSLOG_NEWLINE_CHAR_LEN - 1; - calc_pairs = len / FSLOG_KEYVAL_FMT_LEN; - if (num_pairs <= calc_pairs) { - calc_pairs = num_pairs; - } else { - err = E2BIG; - } - - /* Append format strings [%s %s] for the key-value pairs in vargs */ - len = MAX_FMT_LEN - FSLOG_NEWLINE_CHAR_LEN; - for (i = 0; i < calc_pairs; i++) { - (void) strlcat(fmt, FSLOG_KEYVAL_FMT, len); - } - - /* Count number of variable arguments provided to this function - * and determine total number of key-value pairs. - */ - calc_pairs = 0; - va_start(ap, vargs); - ptr = va_arg(ap, char *); - while (ptr) { - calc_pairs++; - ptr = va_arg(ap, char *); - } - calc_pairs /= 2; - va_end(ap); - - /* If user provided variable number of arguments, append them as - * as real key-value "[k v]" into the format string. If the format - * string is too small, ignore the key-value pair completely. - */ - if (calc_pairs) { - char *key, *val; - size_t pairlen; - int offset; + if ((caller != PROC_NULL) && (target != PROC_NULL)) { - /* Calculate bytes available for key-value pairs after reserving - * bytes for newline character and NULL terminator + /* + * Print into buffer large enough for "ThisIsAnApplicat(BC223DD7-B314-42E0-B6B0-C5D2E6638337)", + * including space for escaping, and NUL byte included in sizeof(uuid_string_t). */ - len = MAX_FMT_LEN - strlen(fmt) - FSLOG_NEWLINE_CHAR_LEN - 1; - offset = strlen(fmt); - - va_start(ap, vargs); - for (i = 0; i < calc_pairs; i++) { - key = va_arg(ap, char *); - val = va_arg(ap, char *); - /* Calculate bytes required to store next key-value pair as - * "[key val] " including space for '[', ']', and two spaces. - */ - pairlen = strlen(key) + strlen(val) + 4; - if (pairlen > len) { - err = E2BIG; - break; - } + uuid_string_t uuidstr; + char c_name[2*MAXCOMLEN + 2 /* () */ + sizeof(uuid_string_t)]; + char t_name[2*MAXCOMLEN + 2 /* () */ + sizeof(uuid_string_t)]; - /* len + 1 because one byte has been set aside for NULL - * terminator in calculation of 'len' above - */ - snprintf((fmt + offset), len + 1, FSLOG_KEYVAL_FMT, key, val); - offset += pairlen; - len -= pairlen; + strlcpy(c_name, caller->p_comm, sizeof(c_name)); + uuid_unparse_upper(caller->p_uuid, uuidstr); + strlcat(c_name, "(", sizeof(c_name)); + strlcat(c_name, uuidstr, sizeof(c_name)); + strlcat(c_name, ")", sizeof(c_name)); + if (0 != escape_str(c_name, strlen(c_name), sizeof(c_name))) { + return; } - va_end(ap); - } - - /* Append newline */ - (void) strlcat(fmt, FSLOG_NEWLINE_CHAR, MAX_FMT_LEN); - /* Print the key-value pairs in ASL format */ - vaddlog(fmt, vargs); + strlcpy(t_name, target->p_comm, sizeof(t_name)); + uuid_unparse_upper(target->p_uuid, uuidstr); + strlcat(t_name, "(", sizeof(t_name)); + strlcat(t_name, uuidstr, sizeof(t_name)); + strlcat(t_name, ")", sizeof(t_name)); + if (0 != escape_str(t_name, strlen(t_name), sizeof(t_name))) { + return; + } +#if DEBUG + printf("EXTMOD: %s(%d) -> %s(%d)\n", + c_name, + proc_pid(caller), + t_name, + proc_pid(target)); +#endif - return err; + kern_asl_msg(LOG_DEBUG, "messagetracer", + 5, + "com.apple.message.domain", "com.apple.kernel.external_modification", /* 0 */ + "com.apple.message.signature", c_name, /* 1 */ + "com.apple.message.signature2", t_name, /* 2 */ + "com.apple.message.result", "noop", /* 3 */ + "com.apple.message.summarize", "YES", /* 4 */ + NULL); + } } /* Log file system related error in key-value format identified by Apple @@ -248,8 +164,10 @@ unsigned long fslog_err(unsigned long msg_id, ... ) va_start(ap, msg_id); if (msg_id == FSLOG_MSG_SINGLE) { /* Single message, do not print message ID and message order */ - (void) fslog_asl_msg(FSLOG_VAL_LEVEL, FSLOG_VAL_FACILITY, - num_pairs, ap, NULL); + (void) kern_asl_msg_va(FSLOG_VAL_LEVEL, FSLOG_VAL_FACILITY, + num_pairs, ap, + FSLOG_KEY_READ_UID, FSLOG_VAL_READ_UID, + NULL); } else { if (msg_id == FSLOG_MSG_FIRST) { /* First message, generate random message ID */ @@ -277,60 +195,16 @@ unsigned long fslog_err(unsigned long msg_id, ... ) } snprintf(msg_id_str, sizeof(msg_id_str), "%lu", msg_id); - (void) fslog_asl_msg(FSLOG_VAL_LEVEL, FSLOG_VAL_FACILITY, - num_pairs, ap, - FSLOG_KEY_MSG_ID, msg_id_str, - FSLOG_KEY_MSG_ORDER, msg_order_ptr, NULL); + (void) kern_asl_msg_va(FSLOG_VAL_LEVEL, FSLOG_VAL_FACILITY, + num_pairs, ap, + FSLOG_KEY_READ_UID, FSLOG_VAL_READ_UID, + FSLOG_KEY_MSG_ID, msg_id_str, + FSLOG_KEY_MSG_ORDER, msg_order_ptr, NULL); } va_end(ap); return msg_id; } -/* Search if given string contains '[' and ']'. If any, escape it by - * prefixing with a '\'. If the length of the string is not big enough, - * no changes are done and error is returned. - * - * Parameters - - * str - string that can contain '[' or ']', should be NULL terminated - * len - length, in bytes, of valid data, including NULL character. - * buflen - size of buffer that contains the string - */ -static int escape_str(char *str, int len, int buflen) -{ - int count; - char *src, *dst; - - /* Count number of characters to escape */ - src = str; - count = 0; - do { - if ((*src == '[') || (*src == ']')) { - count++; - } - } while (*src++); - - if (count) { - /* Check if the buffer has enough space to escape all characters */ - if ((buflen - len) < count) { - return ENOSPC; - } - - src = str + len; - dst = src + count; - while (count) { - *dst-- = *src; - if ((*src == '[') || (*src == ']')) { - /* Last char copied needs to be escaped */ - *dst-- = '\\'; - count--; - } - src--; - } - } - - return 0; -} - /* Log information about runtime file system corruption detected by * the file system. It takes the VFS mount structure as * parameter which is used to access the mount point of the @@ -382,9 +256,9 @@ void fslog_io_error(const buf_t bp) /* Determine type of IO operation */ if (buf_flags(bp) & B_READ) { - iotype = FSLOG_VAL_IOTYPE_READ; + iotype = FSLOG_VAL_IOTYPE_READ; } else { - iotype = FSLOG_VAL_IOTYPE_WRITE; + iotype = FSLOG_VAL_IOTYPE_WRITE; } /* Convert physical block number to string */ @@ -459,72 +333,3 @@ out: return; } -static void -_fslog_extmod_msgtracer_internal(int level, const char *facility, int num_pairs, ...) -{ - va_list ap; - - va_start(ap, num_pairs); - (void) fslog_asl_msg(level, facility, - num_pairs, ap, NULL); - va_end(ap); -} - -/* Log information about external modification of a process, - * using MessageTracer formatting. Assumes that both the caller - * and target are appropriately locked. - * Currently prints following information - - * 1. Caller process name (truncated to 16 characters) - * 2. Caller process Mach-O UUID - * 3. Target process name (truncated to 16 characters) - * 4. Target process Mach-O UUID - */ -void -fslog_extmod_msgtracer(proc_t caller, proc_t target) -{ - if ((caller != PROC_NULL) && (target != PROC_NULL)) { - - /* - * Print into buffer large enough for "ThisIsAnApplicat(BC223DD7-B314-42E0-B6B0-C5D2E6638337)", - * including space for escaping, and NUL byte included in sizeof(uuid_string_t). - */ - - uuid_string_t uuidstr; - char c_name[2*MAXCOMLEN + 2 /* () */ + sizeof(uuid_string_t)]; - char t_name[2*MAXCOMLEN + 2 /* () */ + sizeof(uuid_string_t)]; - - strlcpy(c_name, caller->p_comm, sizeof(c_name)); - uuid_unparse_upper(caller->p_uuid, uuidstr); - strlcat(c_name, "(", sizeof(c_name)); - strlcat(c_name, uuidstr, sizeof(c_name)); - strlcat(c_name, ")", sizeof(c_name)); - if (0 != escape_str(c_name, strlen(c_name), sizeof(c_name))) { - return; - } - - strlcpy(t_name, target->p_comm, sizeof(t_name)); - uuid_unparse_upper(target->p_uuid, uuidstr); - strlcat(t_name, "(", sizeof(t_name)); - strlcat(t_name, uuidstr, sizeof(t_name)); - strlcat(t_name, ")", sizeof(t_name)); - if (0 != escape_str(t_name, strlen(t_name), sizeof(t_name))) { - return; - } - -#if DEBUG - printf("EXTMOD: %s(%d) -> %s(%d)\n", - c_name, - proc_pid(caller), - t_name, - proc_pid(target)); -#endif - - _fslog_extmod_msgtracer_internal(LOG_DEBUG, "messagetracer", - 4, - "com.apple.message.domain", "com.apple.kernel.external_modification", /* 0 */ - "com.apple.message.signature", c_name, /* 1 */ - "com.apple.message.signature2", t_name, /* 2 */ - "com.apple.message.result", "noop", /* 3 */ - NULL); - } -} diff --git a/bsd/vfs/vfs_init.c b/bsd/vfs/vfs_init.c index 2c83c4725..f9dfc09e5 100644 --- a/bsd/vfs/vfs_init.c +++ b/bsd/vfs/vfs_init.c @@ -102,7 +102,7 @@ #define DODEBUG(A) #endif -__private_extern__ void vntblinit(void) __attribute__((section("__TEXT, initcode"))); +__private_extern__ void vntblinit(void); extern struct vnodeopv_desc *vfs_opv_descs[]; /* a list of lists of vnodeops defns */ diff --git a/bsd/vfs/vfs_journal.c b/bsd/vfs/vfs_journal.c index 3864ff34c..d38556692 100644 --- a/bsd/vfs/vfs_journal.c +++ b/bsd/vfs/vfs_journal.c @@ -248,10 +248,11 @@ static int insert_block(journal *jnl, struct bucket **buf_ptr, int blk_index, of // we use it to checksum the journal header and the block list // headers that are at the start of each transaction. // -static int +static unsigned int calc_checksum(char *ptr, int len) { - int i, cksum=0; + int i; + unsigned int cksum=0; // this is a lame checksum but for now it'll do for(i = 0; i < len; i++, ptr++) { @@ -1091,7 +1092,8 @@ add_block(journal *jnl, struct bucket **buf_ptr, off_t block_num, size_t size, _ static int replay_journal(journal *jnl) { - int i, orig_checksum, checksum, check_block_checksums=0, bad_blocks=0; + int i, bad_blocks=0; + unsigned int orig_checksum, checksum, check_block_checksums = 0; size_t ret; size_t max_bsize = 0; /* protected by block_ptr */ block_list_header *blhdr; @@ -1157,7 +1159,7 @@ restart_replay: if (jnl->flags & JOURNAL_NEED_SWAP) { // calculate the checksum based on the unswapped data // because it is done byte-at-a-time. - orig_checksum = SWAP32(orig_checksum); + orig_checksum = (unsigned int)SWAP32(orig_checksum); checksum = calc_checksum((char *)blhdr, BLHDR_CHECKSUM_SIZE); swap_block_list_header(jnl, blhdr); } else { @@ -1546,9 +1548,10 @@ get_io_info(struct vnode *devvp, size_t phys_blksz, journal *jnl, struct vfs_con if (VNOP_IOCTL(devvp, DKIOCGETFEATURES, (caddr_t)&features, 0, context) == 0) { if (features & DK_FEATURE_FORCE_UNIT_ACCESS) { - const char *name = vnode_name(devvp); + const char *name = vnode_getname_printable(devvp); jnl->flags |= JOURNAL_DO_FUA_WRITES; - printf("jnl: %s: enabling FUA writes (features 0x%x)\n", name ? name : "no-name-dev", features); + printf("jnl: %s: enabling FUA writes (features 0x%x)\n", name, features); + vnode_putname_printable(name); } if (features & DK_FEATURE_UNMAP) { jnl->flags |= JOURNAL_USE_UNMAP; @@ -1616,23 +1619,6 @@ get_io_info(struct vnode *devvp, size_t phys_blksz, journal *jnl, struct vfs_con } -static const char * -get_jdev_name(struct vnode *jvp) -{ - const char *jdev_name; - - jdev_name = vnode_name(jvp); - if (jdev_name == NULL) { - jdev_name = vfs_addname("unknown-dev", strlen("unknown-dev"), 0, 0); - } else { - // this just bumps the refcount on the name so we have our own copy - jdev_name = vfs_addname(jdev_name, strlen(jdev_name), 0, 0); - } - - return jdev_name; -} - - journal * journal_create(struct vnode *jvp, off_t offset, @@ -1642,7 +1628,8 @@ journal_create(struct vnode *jvp, int32_t flags, int32_t tbuffer_size, void (*flush)(void *arg), - void *arg) + void *arg, + struct mount *fsmount) { journal *jnl; uint32_t phys_blksz, new_txn_base; @@ -1658,36 +1645,36 @@ journal_create(struct vnode *jvp, context.vc_thread = current_thread(); context.vc_ucred = FSCRED; - jdev_name = get_jdev_name(jvp); + jdev_name = vnode_getname_printable(jvp); /* Get the real physical block size. */ if (VNOP_IOCTL(jvp, DKIOCGETBLOCKSIZE, (caddr_t)&phys_blksz, 0, &context)) { - return NULL; + goto cleanup_jdev_name; } if (journal_size < (256*1024) || journal_size > (MAX_JOURNAL_SIZE)) { - printf("jnl: create: journal size %lld looks bogus.\n", journal_size); - return NULL; + printf("jnl: %s: create: journal size %lld looks bogus.\n", jdev_name, journal_size); + goto cleanup_jdev_name; } min_size = phys_blksz * (phys_blksz / sizeof(block_info)); /* Reject journals that are too small given the sector size of the device */ if (journal_size < min_size) { - printf("jnl: create: journal size (%lld) too small given sector size of (%u)\n", - journal_size, phys_blksz); - return NULL; + printf("jnl: %s: create: journal size (%lld) too small given sector size of (%u)\n", + jdev_name, journal_size, phys_blksz); + goto cleanup_jdev_name; } if (phys_blksz > min_fs_blksz) { printf("jnl: %s: create: error: phys blksize %u bigger than min fs blksize %zd\n", jdev_name, phys_blksz, min_fs_blksz); - return NULL; + goto cleanup_jdev_name; } if ((journal_size % phys_blksz) != 0) { printf("jnl: %s: create: journal size 0x%llx is not an even multiple of block size 0x%ux\n", jdev_name, journal_size, phys_blksz); - return NULL; + goto cleanup_jdev_name; } @@ -1703,6 +1690,12 @@ journal_create(struct vnode *jvp, jnl->jdev_name = jdev_name; lck_mtx_init(&jnl->old_start_lock, jnl_mutex_group, jnl_lock_attr); + // Keep a point to the mount around for use in IO throttling. + jnl->fsmount = fsmount; + // XXX: This lock discipline looks correct based on dounmount(), but it + // doesn't seem to be documented anywhere. + mount_ref(fsmount, 0); + get_io_info(jvp, phys_blksz, jnl, &context); if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&jnl->header_buf, phys_blksz)) { @@ -1738,7 +1731,7 @@ journal_create(struct vnode *jvp, && jnl->jhdr->sequence_num != 0) { new_txn_base = (jnl->jhdr->sequence_num + (journal_size / phys_blksz) + (random() % 16384)) & 0x00ffffff; - printf("jnl: create: avoiding old sequence number 0x%x (0x%x)\n", jnl->jhdr->sequence_num, new_txn_base); + printf("jnl: %s: create: avoiding old sequence number 0x%x (0x%x)\n", jdev_name, jnl->jhdr->sequence_num, new_txn_base); #if 0 int i; @@ -1793,19 +1786,20 @@ journal_create(struct vnode *jvp, goto bad_write; } - return jnl; + goto journal_create_complete; bad_write: kmem_free(kernel_map, (vm_offset_t)jnl->header_buf, phys_blksz); bad_kmem_alloc: - if (jdev_name) { - vfs_removename(jdev_name); - } jnl->jhdr = NULL; FREE_ZONE(jnl, sizeof(struct journal), M_JNL_JNL); - - return NULL; + mount_drop(fsmount, 0); +cleanup_jdev_name: + vnode_putname_printable(jdev_name); + jnl = NULL; +journal_create_complete: + return jnl; } @@ -1818,7 +1812,8 @@ journal_open(struct vnode *jvp, int32_t flags, int32_t tbuffer_size, void (*flush)(void *arg), - void *arg) + void *arg, + struct mount *fsmount) { journal *jnl; uint32_t orig_blksz=0; @@ -1826,39 +1821,39 @@ journal_open(struct vnode *jvp, u_int32_t min_size = 0; int orig_checksum, checksum; struct vfs_context context; - const char *jdev_name = get_jdev_name(jvp); + const char *jdev_name = vnode_getname_printable(jvp); context.vc_thread = current_thread(); context.vc_ucred = FSCRED; /* Get the real physical block size. */ if (VNOP_IOCTL(jvp, DKIOCGETBLOCKSIZE, (caddr_t)&phys_blksz, 0, &context)) { - return NULL; + goto cleanup_jdev_name; } if (phys_blksz > min_fs_blksz) { printf("jnl: %s: open: error: phys blksize %u bigger than min fs blksize %zd\n", jdev_name, phys_blksz, min_fs_blksz); - return NULL; + goto cleanup_jdev_name; } if (journal_size < (256*1024) || journal_size > (1024*1024*1024)) { - printf("jnl: open: journal size %lld looks bogus.\n", journal_size); - return NULL; + printf("jnl: %s: open: journal size %lld looks bogus.\n", jdev_name, journal_size); + goto cleanup_jdev_name; } min_size = phys_blksz * (phys_blksz / sizeof(block_info)); /* Reject journals that are too small given the sector size of the device */ if (journal_size < min_size) { - printf("jnl: open: journal size (%lld) too small given sector size of (%u)\n", - journal_size, phys_blksz); - return NULL; + printf("jnl: %s: open: journal size (%lld) too small given sector size of (%u)\n", + jdev_name, journal_size, phys_blksz); + goto cleanup_jdev_name; } if ((journal_size % phys_blksz) != 0) { printf("jnl: %s: open: journal size 0x%llx is not an even multiple of block size 0x%x\n", jdev_name, journal_size, phys_blksz); - return NULL; + goto cleanup_jdev_name; } MALLOC_ZONE(jnl, struct journal *, sizeof(struct journal), M_JNL_JNL, M_WAITOK); @@ -1873,6 +1868,12 @@ journal_open(struct vnode *jvp, jnl->jdev_name = jdev_name; lck_mtx_init(&jnl->old_start_lock, jnl_mutex_group, jnl_lock_attr); + /* We need a reference to the mount to later pass to the throttling code for + * IO accounting. + */ + jnl->fsmount = fsmount; + mount_ref(fsmount, 0); + get_io_info(jvp, phys_blksz, jnl, &context); if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&jnl->header_buf, phys_blksz)) { @@ -2045,7 +2046,7 @@ journal_open(struct vnode *jvp, lck_mtx_init(&jnl->flock, jnl_mutex_group, jnl_lock_attr); lck_rw_init(&jnl->trim_lock, jnl_mutex_group, jnl_lock_attr); - return jnl; + goto journal_open_complete; bad_journal: if (orig_blksz != 0) { @@ -2055,11 +2056,13 @@ bad_journal: } kmem_free(kernel_map, (vm_offset_t)jnl->header_buf, phys_blksz); bad_kmem_alloc: - if (jdev_name) { - vfs_removename(jdev_name); - } FREE_ZONE(jnl, sizeof(struct journal), M_JNL_JNL); - return NULL; + mount_drop(fsmount, 0); +cleanup_jdev_name: + vnode_putname_printable(jdev_name); + jnl = NULL; +journal_open_complete: + return jnl; } @@ -2075,7 +2078,7 @@ journal_is_clean(struct vnode *jvp, int ret; int orig_checksum, checksum; struct vfs_context context; - const char *jdev_name = get_jdev_name(jvp); + const char *jdev_name = vnode_getname_printable(jvp); context.vc_thread = current_thread(); context.vc_ucred = FSCRED; @@ -2083,31 +2086,36 @@ journal_is_clean(struct vnode *jvp, /* Get the real physical block size. */ if (VNOP_IOCTL(jvp, DKIOCGETBLOCKSIZE, (caddr_t)&phys_blksz, 0, &context)) { printf("jnl: %s: is_clean: failed to get device block size.\n", jdev_name); - return EINVAL; + ret = EINVAL; + goto cleanup_jdev_name; } if (phys_blksz > (uint32_t)min_fs_block_size) { printf("jnl: %s: is_clean: error: phys blksize %d bigger than min fs blksize %zd\n", jdev_name, phys_blksz, min_fs_block_size); - return EINVAL; + ret = EINVAL; + goto cleanup_jdev_name; } if (journal_size < (256*1024) || journal_size > (MAX_JOURNAL_SIZE)) { - printf("jnl: is_clean: journal size %lld looks bogus.\n", journal_size); - return EINVAL; + printf("jnl: %s: is_clean: journal size %lld looks bogus.\n", jdev_name, journal_size); + ret = EINVAL; + goto cleanup_jdev_name; } if ((journal_size % phys_blksz) != 0) { printf("jnl: %s: is_clean: journal size 0x%llx is not an even multiple of block size 0x%x\n", jdev_name, journal_size, phys_blksz); - return EINVAL; + ret = EINVAL; + goto cleanup_jdev_name; } memset(&jnl, 0, sizeof(jnl)); if (kmem_alloc_kobject(kernel_map, (vm_offset_t *)&jnl.header_buf, phys_blksz)) { printf("jnl: %s: is_clean: could not allocate space for header buffer (%d bytes)\n", jdev_name, phys_blksz); - return ENOMEM; + ret = ENOMEM; + goto cleanup_jdev_name; } jnl.header_buf_size = phys_blksz; @@ -2168,12 +2176,9 @@ journal_is_clean(struct vnode *jvp, get_out: kmem_free(kernel_map, (vm_offset_t)jnl.header_buf, phys_blksz); - if (jdev_name) { - vfs_removename(jdev_name); - } - - return ret; - +cleanup_jdev_name: + vnode_putname_printable(jdev_name); + return ret; } @@ -2275,10 +2280,16 @@ journal_close(journal *jnl) kmem_free(kernel_map, (vm_offset_t)jnl->header_buf, jnl->header_buf_size); jnl->jhdr = (void *)0xbeefbabe; - if (jnl->jdev_name) { - vfs_removename(jnl->jdev_name); - } + // Release reference on the mount + if (jnl->fsmount) + mount_drop(jnl->fsmount, 0); + + vnode_putname_printable(jnl->jdev_name); + unlock_journal(jnl); + lck_mtx_destroy(&jnl->old_start_lock, jnl_mutex_group); + lck_mtx_destroy(&jnl->jlock, jnl_mutex_group); + lck_mtx_destroy(&jnl->flock, jnl_mutex_group); FREE_ZONE(jnl, sizeof(struct journal), M_JNL_JNL); } @@ -2847,6 +2858,8 @@ journal_modify_block_end(journal *jnl, struct buf *bp, void (*func)(buf_t bp, vo blhdr->binfo[i].bnum = (off_t)(buf_blkno(bp)); blhdr->binfo[i].u.bp = bp; + KERNEL_DEBUG_CONSTANT(0x3018004, vp, blhdr->binfo[i].bnum, bsize, 0, 0); + if (func) { void (*old_func)(buf_t, void *)=NULL, *old_arg=NULL; @@ -3045,31 +3058,41 @@ trim_realloc(struct jnl_trim_list *trim) ; trim - The trim list to be searched. ; offset - The first byte of the range to be searched for. ; length - The number of bytes of the extent being searched for. + ; overlap_start - start of the overlapping extent + ; overlap_len - length of the overlapping extent ; ; Output: ; (result) - TRUE if one or more extents overlap, FALSE otherwise. ;________________________________________________________________________________ */ static int -trim_search_extent(struct jnl_trim_list *trim, uint64_t offset, uint64_t length) +trim_search_extent(struct jnl_trim_list *trim, uint64_t offset, + uint64_t length, uint64_t *overlap_start, uint64_t *overlap_len) { uint64_t end = offset + length; uint32_t lower = 0; /* Lowest index to search */ uint32_t upper = trim->extent_count; /* Highest index to search + 1 */ uint32_t middle; - + /* A binary search over the extent list. */ while (lower < upper) { middle = (lower + upper) / 2; - + if (trim->extents[middle].offset >= end) upper = middle; else if (trim->extents[middle].offset + trim->extents[middle].length <= offset) lower = middle + 1; - else + else { + if (overlap_start) { + *overlap_start = trim->extents[middle].offset; + } + if (overlap_len) { + *overlap_len = trim->extents[middle].length; + } return TRUE; + } } - + return FALSE; } @@ -3214,6 +3237,92 @@ journal_trim_add_extent(journal *jnl, uint64_t offset, uint64_t length) return 0; } +/* + * journal_trim_extent_overlap + * + * Return 1 if there are any pending TRIMs that overlap with the given offset and length + * Return 0 otherwise. + */ + +int journal_trim_extent_overlap (journal *jnl, uint64_t offset, uint64_t length, uint64_t *end) { + transaction *tr = NULL; + int overlap = 0; + + uint64_t overlap_start; + uint64_t overlap_len; + tr = jnl->active_tr; + CHECK_TRANSACTION(tr); + + /* + * There are two lists that need to be examined for potential overlaps: + * + * The first is the current transaction. Since this function requires that + * a transaction be active when this is called, this is the "active_tr" + * pointer in the journal struct. This has a trimlist pointer which needs + * to be searched. + */ + overlap = trim_search_extent (&tr->trim, offset, length, &overlap_start, &overlap_len); + if (overlap == 0) { + /* + * The second is the async trim list, which is only done if the current + * transaction group (active transaction) did not overlap with our target + * extent. This async trim list is the set of all previously + * committed transaction groups whose I/Os are now in-flight. We need to hold the + * trim lock in order to search this list. If we grab the list before the + * TRIM has completed, then we will compare it. If it is grabbed AFTER the + * TRIM has completed, then the pointer will be zeroed out and we won't have + * to check anything. + */ + lck_rw_lock_shared (&jnl->trim_lock); + if (jnl->async_trim != NULL) { + overlap = trim_search_extent(jnl->async_trim, offset, length, &overlap_start, &overlap_len); + } + lck_rw_unlock_shared (&jnl->trim_lock); + } + + if (overlap) { + /* compute the end (min) of the overlapping range */ + if ( (overlap_start + overlap_len) < (offset + length)) { + *end = (overlap_start + overlap_len); + } + else { + *end = (offset + length); + } + } + + + return overlap; +} + +/* + * journal_request_immediate_flush + * + * FS requests that the journal flush immediately upon the + * active transaction's completion. + * + * Returns 0 if operation succeeds + * Returns EPERM if we failed to leave hint + */ +int +journal_request_immediate_flush (journal *jnl) { + + transaction *tr = NULL; + /* + * Is a transaction still in process? You must do + * this while there are txns open + */ + tr = jnl->active_tr; + if (tr != NULL) { + CHECK_TRANSACTION(tr); + tr->flush_on_completion = TRUE; + } + else { + return EPERM; + } + return 0; +} + + /* ;________________________________________________________________________________ @@ -3400,7 +3509,7 @@ journal_trim_remove_extent(journal *jnl, uint64_t offset, uint64_t length) */ lck_rw_lock_shared(&jnl->trim_lock); if (jnl->async_trim != NULL) - found = trim_search_extent(jnl->async_trim, offset, length); + found = trim_search_extent(jnl->async_trim, offset, length, NULL, NULL); lck_rw_unlock_shared(&jnl->trim_lock); if (found) { @@ -3452,12 +3561,6 @@ journal_trim_flush(journal *jnl, transaction *tr) errno = VNOP_IOCTL(jnl->fsdev, DKIOCUNMAP, (caddr_t)&unmap, FWRITE, vfs_context_kernel()); if (jnl_kdebug) KERNEL_DEBUG_CONSTANT(DBG_JOURNAL_TRIM_UNMAP | DBG_FUNC_END, errno, 0, 0, 0, 0); - if (errno) { - printf("jnl: error %d from DKIOCUNMAP (extents=%lx, count=%u); disabling trim for %s\n", - errno, (unsigned long) (unmap.extents), unmap.extentsCount, - jnl->jdev_name); - jnl->flags &= ~JOURNAL_USE_UNMAP; - } } /* @@ -3582,7 +3685,7 @@ end_transaction(transaction *tr, int force_it, errno_t (*callback)(void*), void jnl->cur_tr = tr; goto done; } - + // if our transaction buffer isn't very full, just hang // on to it and don't actually flush anything. this is // what is known as "group commit". we will flush the @@ -3840,7 +3943,9 @@ done: static void finish_end_thread(transaction *tr) { - proc_apply_thread_selfdiskacc(IOPOL_PASSIVE); + proc_set_task_policy(current_task(), current_thread(), + TASK_POLICY_INTERNAL, TASK_POLICY_IOPOL, IOPOL_PASSIVE); + finish_end_transaction(tr, NULL, NULL); thread_deallocate(current_thread()); @@ -3850,7 +3955,8 @@ finish_end_thread(transaction *tr) static void write_header_thread(journal *jnl) { - proc_apply_thread_selfdiskacc(IOPOL_PASSIVE); + proc_set_task_policy(current_task(), current_thread(), + TASK_POLICY_INTERNAL, TASK_POLICY_IOPOL, IOPOL_PASSIVE); if (write_journal_header(jnl, 1, jnl->saved_sequence_num)) jnl->write_header_failed = TRUE; @@ -4347,7 +4453,19 @@ journal_end_transaction(journal *jnl) // called from end_transaction(). // jnl->active_tr = NULL; - ret = end_transaction(tr, 0, NULL, NULL, TRUE, FALSE); + + /* Examine the force-journal-flush state in the active txn */ + if (tr->flush_on_completion == TRUE) { + /* + * If the FS requested it, disallow group commit and force the + * transaction out to disk immediately. + */ + ret = end_transaction(tr, 1, NULL, NULL, TRUE, TRUE); + } + else { + /* in the common path we can simply use the double-buffered journal */ + ret = end_transaction(tr, 0, NULL, NULL, TRUE, FALSE); + } return ret; } @@ -4640,7 +4758,8 @@ journal_create(__unused struct vnode *jvp, __unused int32_t flags, __unused int32_t tbuffer_size, __unused void (*flush)(void *arg), - __unused void *arg) + __unused void *arg, + __unused struct mount *fsmount) { return NULL; } @@ -4654,7 +4773,8 @@ journal_open(__unused struct vnode *jvp, __unused int32_t flags, __unused int32_t tbuffer_size, __unused void (*flush)(void *arg), - __unused void *arg) + __unused void *arg, + __unused struct mount *fsmount) { return NULL; } diff --git a/bsd/vfs/vfs_journal.h b/bsd/vfs/vfs_journal.h index 7b7f4f319..0e7950ebb 100644 --- a/bsd/vfs/vfs_journal.h +++ b/bsd/vfs/vfs_journal.h @@ -65,7 +65,7 @@ typedef struct block_list_header { u_int16_t max_blocks; // max number of blocks in this chunk u_int16_t num_blocks; // number of valid block numbers in block_nums int32_t bytes_used; // how many bytes of this tbuffer are used - int32_t checksum; // on-disk: checksum of this header and binfo[0] + uint32_t checksum; // on-disk: checksum of this header and binfo[0] int32_t flags; // check-checksums, initial blhdr, etc block_info binfo[1]; // so we can reference them by name } block_list_header; @@ -99,6 +99,7 @@ typedef struct transaction { uint32_t sequence_num; struct jnl_trim_list trim; boolean_t delayed_header_write; + boolean_t flush_on_completion; //flush transaction immediately upon txn end. } transaction; @@ -113,7 +114,7 @@ typedef struct journal_header { volatile off_t end; // zero-based byte offset of where free space begins off_t size; // size in bytes of the entire journal int32_t blhdr_size; // size in bytes of each block_list_header in the journal - int32_t checksum; + uint32_t checksum; int32_t jhdr_size; // block size (in bytes) of the journal header uint32_t sequence_num; // NEW FIELD: a monotonically increasing value assigned to all txn's } journal_header; @@ -146,6 +147,7 @@ typedef struct journal { const char *jdev_name; struct vnode *fsdev; // vnode of the file system device + struct mount *fsmount; // mount of the file system void (*flush)(void *arg); // fs callback to flush meta data blocks void *flush_arg; // arg that's passed to flush() @@ -207,7 +209,7 @@ __BEGIN_DECLS /* * Call journal_init() to initialize the journaling code (sets up lock attributes) */ -void journal_init(void) __attribute__((section("__TEXT, initcode"))); +void journal_init(void); /* * Call journal_create() to create a new journal. You only @@ -244,7 +246,8 @@ journal *journal_create(struct vnode *jvp, int32_t flags, int32_t tbuffer_size, void (*flush)(void *arg), - void *arg); + void *arg, + struct mount *fsmount); /* * Call journal_open() when mounting an existing file system @@ -264,7 +267,8 @@ journal *journal_open(struct vnode *jvp, int32_t flags, int32_t tbuffer_size, void (*flush)(void *arg), - void *arg); + void *arg, + struct mount *fsmount); /* * Test whether the journal is clean or not. This is intended @@ -327,6 +331,9 @@ int journal_kill_block(journal *jnl, struct buf *bp); int journal_trim_add_extent(journal *jnl, uint64_t offset, uint64_t length); int journal_trim_remove_extent(journal *jnl, uint64_t offset, uint64_t length); void journal_trim_set_callback(journal *jnl, jnl_trim_callback_t callback, void *arg); +int journal_trim_extent_overlap (journal *jnl, uint64_t offset, uint64_t length, uint64_t *end); +/* Mark state in the journal that requests an immediate journal flush upon txn completion */ +int journal_request_immediate_flush (journal *jnl); #endif int journal_end_transaction(journal *jnl); diff --git a/bsd/vfs/vfs_lookup.c b/bsd/vfs/vfs_lookup.c index 02e1576c1..784c835fc 100644 --- a/bsd/vfs/vfs_lookup.c +++ b/bsd/vfs/vfs_lookup.c @@ -88,7 +88,7 @@ #include /* For _PC_NAME_MAX */ #include #include - +#include #include #if CONFIG_MACF @@ -109,6 +109,7 @@ static void kdebug_lookup(struct vnode *dp, struct componentname *cnp); #if CONFIG_VOLFS static int vfs_getrealpath(const char * path, char * realpath, size_t bufsize, vfs_context_t ctx); +#define MAX_VOLFS_RESTARTS 5 #endif boolean_t lookup_continue_ok(struct nameidata *ndp); @@ -122,6 +123,9 @@ int lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int wantparent, int atroot, vfs_context_t ctx); int lookup_handle_emptyname(struct nameidata *ndp, struct componentname *cnp, int wantparent); + + + /* * Convert a pathname into a pointer to a locked inode. * @@ -174,6 +178,10 @@ namei(struct nameidata *ndp) uthread_t ut = (struct uthread *)get_bsdthread_info(current_thread()); #endif +#if CONFIG_VOLFS + int volfs_restarts = 0; +#endif + fdp = p->p_fd; #if DIAGNOSTIC @@ -255,6 +263,16 @@ retry_copy: if (error) goto error_out; + /* + * Since the name cache may contain positive entries of + * the incorrect case, force lookup() to bypass the cache + * and call directly into the filesystem for each path + * component. Note: the FS may still consult the cache, + * but can apply rules to validate the results. + */ + if (proc_is_forcing_hfs_case_sensitivity(p)) + cnp->cn_flags |= CN_SKIPNAMECACHE; + #if CONFIG_VOLFS /* * Check for legacy volfs style pathnames. @@ -385,6 +403,38 @@ out_drop: cnp->cn_pnbuf = NULL; ndp->ni_vp = NULLVP; ndp->ni_dvp = NULLVP; + +#if CONFIG_VOLFS + /* + * Deal with volfs fallout. + * + * At this point, if we were originally given a volfs path that + * looks like /.vol/123/456, then we would have had to convert it into + * a full path. Assuming that part worked properly, we will now attempt + * to conduct a lookup of the item in the namespace. Under normal + * circumstances, if a user looked up /tmp/foo and it was not there, it + * would be permissible to return ENOENT. + * + * However, we may not want to do that here. Specifically, the volfs path + * uniquely identifies a certain item in the namespace regardless of where it + * lives. If the item has moved in between the time we constructed the + * path and now, when we're trying to do a lookup/authorization on the full + * path, we may have gotten an ENOENT. + * + * At this point we can no longer tell if the path no longer exists + * or if the item in question no longer exists. It could have been renamed + * away, in which case the /.vol identifier is still valid. + * + * Do this dance a maximum of MAX_VOLFS_RESTARTS times. + */ + if ((error == ENOENT) && (ndp->ni_cnd.cn_flags & CN_VOLFSPATH)) { + if (volfs_restarts < MAX_VOLFS_RESTARTS) { + volfs_restarts++; + goto vnode_recycled; + } + } +#endif + if (error == ERECYCLE){ /* vnode was recycled underneath us. re-drive lookup to start at the beginning again, since recycling invalidated last lookup*/ @@ -409,6 +459,10 @@ namei_compound_available(vnode_t dp, struct nameidata *ndp) int lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx) { +#if !CONFIG_MACF +#pragma unused(cnp) +#endif + int error; if (!dp_authorized_in_cache) { @@ -524,12 +578,6 @@ lookup_handle_rsrc_fork(vnode_t dp, struct nameidata *ndp, struct componentname /* The "parent" of the stream is the file. */ if (wantparent) { if (ndp->ni_dvp) { -#if CONFIG_VFS_FUNNEL - if (ndp->ni_cnd.cn_flags & FSNODELOCKHELD) { - ndp->ni_cnd.cn_flags &= ~FSNODELOCKHELD; - unlock_fsnode(ndp->ni_dvp, NULL); - } -#endif /* CONFIG_VFS_FUNNEL */ vnode_put(ndp->ni_dvp); } ndp->ni_dvp = dp; @@ -731,7 +779,7 @@ nextname: } #endif if (kdebug_enable) - kdebug_lookup(dp, cnp); + kdebug_lookup(ndp->ni_vp, cnp); return 0; @@ -1013,7 +1061,7 @@ unionlookup: * will be doing a batched operation. Return an iocount on dvp. */ #if NAMEDRSRCFORK - if ((cnp->cn_flags & ISLASTCN) && namei_compound_available(dp, ndp) && !(cnp->cn_flags & CN_WANTSRSRCFORK)) { + if ((cnp->cn_flags & ISLASTCN) && namei_compound_available(dp, ndp) && !(cnp->cn_flags & CN_WANTSRSRCFORK)) { #else if ((cnp->cn_flags & ISLASTCN) && namei_compound_available(dp, ndp)) { #endif /* NAMEDRSRCFORK */ @@ -1024,30 +1072,44 @@ unionlookup: nc_generation = dp->v_nc_generation; - error = VNOP_LOOKUP(dp, &ndp->ni_vp, cnp, ctx); + /* + * Note: + * Filesystems that support hardlinks may want to call vnode_update_identity + * if the lookup operation below will modify the in-core vnode to belong to a new point + * in the namespace. VFS cannot infer whether or not the look up operation makes the vnode + * name change or change parents. Without this, the lookup may make update + * filesystem-specific in-core metadata but fail to update the v_parent or v_name + * fields in the vnode. If VFS were to do this, it would be necessary to call + * vnode_update_identity on every lookup operation -- expensive! + * + * However, even with this in place, multiple lookups may occur in between this lookup + * and the subsequent vnop, so, at best, we could only guarantee that you would get a + * valid path back, and not necessarily the one that you wanted. + * + * Example: + * /tmp/a == /foo/b + * + * If you are now looking up /foo/b and the vnode for this link represents /tmp/a, + * vnode_update_identity will fix the parentage so that you can get /foo/b back + * through the v_parent chain (preventing you from getting /tmp/b back). It would + * not fix whether or not you should or should not get /tmp/a vs. /foo/b. + */ + error = VNOP_LOOKUP(dp, &ndp->ni_vp, cnp, ctx); if ( error ) { lookup_error: if ((error == ENOENT) && - (dp->v_flag & VROOT) && (dp->v_mount != NULL) && + (dp->v_mount != NULL) && (dp->v_mount->mnt_flag & MNT_UNION)) { -#if CONFIG_VFS_FUNNEL - if ((cnp->cn_flags & FSNODELOCKHELD)) { - cnp->cn_flags &= ~FSNODELOCKHELD; - unlock_fsnode(dp, NULL); - } -#endif /* CONFIG_VFS_FUNNEL */ tdp = dp; - dp = tdp->v_mount->mnt_vnodecovered; - + error = lookup_traverse_union(tdp, &dp, ctx); vnode_put(tdp); - - if ( (vnode_getwithref(dp)) ) { + if (error) { dp = NULLVP; - error = ENOENT; goto bad; } + ndp->ni_dvp = dp; dp_authorized = 0; goto unionlookup; @@ -1059,6 +1121,25 @@ lookup_error: if (ndp->ni_vp != NULLVP) panic("leaf should be empty"); +#if NAMEDRSRCFORK + /* + * At this point, error should be EJUSTRETURN. + * + * If CN_WANTSRSRCFORK is set, that implies that the + * underlying filesystem could not find the "parent" of the + * resource fork (the data fork), and we are doing a lookup + * for a CREATE event. + * + * However, this should be converted to an error, as the + * failure to find this parent should disallow further + * progress to try and acquire a resource fork vnode. + */ + if (cnp->cn_flags & CN_WANTSRSRCFORK) { + error = ENOENT; + goto bad; + } +#endif + error = lookup_validate_creation_path(ndp); if (error) goto bad; @@ -1110,12 +1191,6 @@ returned_from_lookup_path: return (0); bad2: -#if CONFIG_VFS_FUNNEL - if ((cnp->cn_flags & FSNODELOCKHELD)) { - cnp->cn_flags &= ~FSNODELOCKHELD; - unlock_fsnode(ndp->ni_dvp, NULL); - } -#endif /* CONFIG_VFS_FUNNEL */ if (ndp->ni_dvp) vnode_put(ndp->ni_dvp); @@ -1127,12 +1202,6 @@ bad2: return (error); bad: -#if CONFIG_VFS_FUNNEL - if ((cnp->cn_flags & FSNODELOCKHELD)) { - cnp->cn_flags &= ~FSNODELOCKHELD; - unlock_fsnode(ndp->ni_dvp, NULL); - } -#endif /* CONFIG_VFS_FUNNEL */ if (dp) vnode_put(dp); ndp->ni_vp = NULLVP; @@ -1142,6 +1211,78 @@ bad: return (error); } +/* + * Given a vnode in a union mount, traverse to the equivalent + * vnode in the underlying mount. + */ +int +lookup_traverse_union(vnode_t dvp, vnode_t *new_dvp, vfs_context_t ctx) +{ + char *path = NULL, *pp; + const char *name, *np; + int len; + int error = 0; + struct nameidata nd; + vnode_t vp = dvp; + + *new_dvp = NULL; + + if (vp && vp->v_flag & VROOT) { + *new_dvp = vp->v_mount->mnt_vnodecovered; + if (vnode_getwithref(*new_dvp)) + return ENOENT; + return 0; + } + + path = (char *) kalloc(MAXPATHLEN); + if (path == NULL) { + error = ENOMEM; + goto done; + } + + /* + * Walk back up to the mountpoint following the + * v_parent chain and build a slash-separated path. + * Then lookup that path starting with the covered vnode. + */ + pp = path + (MAXPATHLEN - 1); + *pp = '\0'; + + while (1) { + name = vnode_getname(vp); + if (name == NULL) { + printf("lookup_traverse_union: null parent name: .%s\n", pp); + error = ENOENT; + goto done; + } + len = strlen(name); + if ((len + 1) > (pp - path)) { // Enough space for this name ? + error = ENAMETOOLONG; + vnode_putname(name); + goto done; + } + for (np = name + len; len > 0; len--) // Copy name backwards + *--pp = *--np; + vnode_putname(name); + vp = vp->v_parent; + if (vp == NULLVP || vp->v_flag & VROOT) + break; + *--pp = '/'; + } + + /* Evaluate the path in the underlying mount */ + NDINIT(&nd, LOOKUP, OP_LOOKUP, USEDVP, UIO_SYSSPACE, CAST_USER_ADDR_T(pp), ctx); + nd.ni_dvp = dvp->v_mount->mnt_vnodecovered; + error = namei(&nd); + if (error == 0) + *new_dvp = nd.ni_vp; + nameidone(&nd); +done: + if (path) + kfree(path, MAXPATHLEN); + return error; +} + int lookup_validate_creation_path(struct nameidata *ndp) { @@ -1182,6 +1323,10 @@ lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vn while ((dp->v_type == VDIR) && dp->v_mountedhere && ((cnp->cn_flags & NOCROSSMOUNT) == 0)) { + + if (dp->v_mountedhere->mnt_lflag & MNT_LFORCE) { + break; // don't traverse into a forced unmount + } #if CONFIG_TRIGGERS /* * For a trigger vnode, call its resolver when crossing its mount (if requested) @@ -1292,13 +1437,6 @@ lookup_handle_symlink(struct nameidata *ndp, vnode_t *new_dp, vfs_context_t ctx) vnode_t dp; char *tmppn; -#if CONFIG_VFS_FUNNEL - if ((cnp->cn_flags & FSNODELOCKHELD)) { - cnp->cn_flags &= ~FSNODELOCKHELD; - unlock_fsnode(ndp->ni_dvp, NULL); - } -#endif /* CONFIG_VFS_FUNNEL */ - if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { return ELOOP; } @@ -1503,27 +1641,12 @@ bad: return (error); } -void -namei_unlock_fsnode(struct nameidata *ndp) -{ -#if CONFIG_VFS_FUNNEL - if ((ndp->ni_cnd.cn_flags & FSNODELOCKHELD)) { - ndp->ni_cnd.cn_flags &= ~FSNODELOCKHELD; - unlock_fsnode(ndp->ni_dvp, NULL); - } -#else - (void)ndp; -#endif /* CONFIG_VFS_FUNNEL */ -} - /* * Free pathname buffer */ void nameidone(struct nameidata *ndp) { - namei_unlock_fsnode(ndp); - if (ndp->ni_cnd.cn_flags & HASBUF) { char *tmp = ndp->ni_cnd.cn_pnbuf; @@ -1534,8 +1657,6 @@ nameidone(struct nameidata *ndp) } -#define NUMPARMS 23 - /* * Log (part of) a pathname using the KERNEL_DEBUG_CONSTANT mechanism, as used * by fs_usage. The path up to and including the current component name are @@ -1566,11 +1687,40 @@ nameidone(struct nameidata *ndp) * no '>' padding. But /foo_bar/spam would log "/foo_bar>>>>". */ #if (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) -static void -kdebug_lookup(struct vnode *dp, struct componentname *cnp) + +void +kdebug_lookup_gen_events(long *dbg_parms, int dbg_namelen, void *dp, boolean_t lookup) { - unsigned int i; int code; + unsigned int i; + + /* + * In the event that we collect multiple, consecutive pathname + * entries, we must mark the start of the path's string and the end. + */ + if (lookup == TRUE) + code = (FSDBG_CODE(DBG_FSRW,36)) | DBG_FUNC_START; + else + code = (FSDBG_CODE(DBG_FSRW,39)) | DBG_FUNC_START; + + if (dbg_namelen <= (int)(3 * sizeof(long))) + code |= DBG_FUNC_END; + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, code, VM_KERNEL_ADDRPERM(dp), dbg_parms[0], dbg_parms[1], dbg_parms[2], 0); + + code &= ~DBG_FUNC_START; + + for (i=3, dbg_namelen -= (3 * sizeof(long)); dbg_namelen > 0; i+=4, dbg_namelen -= (4 * sizeof(long))) { + if (dbg_namelen <= (int)(4 * sizeof(long))) + code |= DBG_FUNC_END; + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, code, dbg_parms[i], dbg_parms[i+1], dbg_parms[i+2], dbg_parms[i+3], 0); + } +} + +static void +kdebug_lookup(vnode_t dp, struct componentname *cnp) +{ int dbg_namelen; char *dbg_nameptr; long dbg_parms[NUMPARMS]; @@ -1592,28 +1742,16 @@ kdebug_lookup(struct vnode *dp, struct componentname *cnp) *(cnp->cn_nameptr + cnp->cn_namelen) ? '>' : 0, sizeof(dbg_parms) - dbg_namelen); } - - /* - * In the event that we collect multiple, consecutive pathname - * entries, we must mark the start of the path's string and the end. - */ - code = (FSDBG_CODE(DBG_FSRW,36)) | DBG_FUNC_START; - - if (dbg_namelen <= 12) - code |= DBG_FUNC_END; + kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)dp, TRUE); +} - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, code, dp, dbg_parms[0], dbg_parms[1], dbg_parms[2], 0); - - code &= ~DBG_FUNC_START; - - for (i=3, dbg_namelen -= 12; dbg_namelen > 0; i+=4, dbg_namelen -= 16) { - if (dbg_namelen <= 16) - code |= DBG_FUNC_END; +#else /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) */ - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, code, dbg_parms[i], dbg_parms[i+1], dbg_parms[i+2], dbg_parms[i+3], 0); - } +void +kdebug_lookup_gen_events(long *dbg_parms __unused, int dbg_namelen __unused, void *dp __unused) +{ } -#else /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_IST) */ + static void kdebug_lookup(struct vnode *dp __unused, struct componentname *cnp __unused) { diff --git a/bsd/vfs/vfs_subr.c b/bsd/vfs/vfs_subr.c index 84fa6ff28..c7e8afd30 100644 --- a/bsd/vfs/vfs_subr.c +++ b/bsd/vfs/vfs_subr.c @@ -2167,7 +2167,7 @@ vclean(vnode_t vp, int flags) /* Delete the shadow stream file before we reclaim its vnode */ if (vnode_isshadow(vp)) { - vnode_relenamedstream(pvp, vp, ctx); + vnode_relenamedstream(pvp, vp); } /* @@ -3722,9 +3722,6 @@ new_vnode(vnode_t *vpp) int deferred; struct timeval initial_tv; struct timeval current_tv; -#if CONFIG_VFS_FUNNEL - struct unsafe_fsnode *l_unsafefs = 0; -#endif /* CONFIG_VFS_FUNNEL */ proc_t curproc = current_proc(); initial_tv.tv_sec = 0; @@ -3890,7 +3887,7 @@ retry: * Running out of vnodes tends to make a system unusable. Start killing * processes that jetsam knows are killable. */ - if (memorystatus_kill_top_proc(TRUE, kMemorystatusFlagsKilledVnodes) < 0) { + if (memorystatus_kill_on_vnode_limit() == FALSE) { /* * If jetsam can't find any more processes to kill and there * still aren't any free vnodes, panic. Hopefully we'll get a @@ -3967,13 +3964,6 @@ steal_this_vp: OSAddAtomicLong(1, &num_reusedvnodes); -#if CONFIG_VFS_FUNNEL - if (vp->v_unsafefs) { - l_unsafefs = vp->v_unsafefs; - vp->v_unsafefs = (struct unsafe_fsnode *)NULL; - } -#endif /* CONFIG_VFS_FUNNEL */ - #if CONFIG_MACF /* * We should never see VL_LABELWAIT or VL_LABEL here. @@ -4003,13 +3993,6 @@ steal_this_vp: vnode_unlock(vp); -#if CONFIG_VFS_FUNNEL - if (l_unsafefs) { - lck_mtx_destroy(&l_unsafefs->fsnodelock, vnode_lck_grp); - FREE_ZONE((void *)l_unsafefs, sizeof(struct unsafe_fsnode), M_UNSAFEFS); - } -#endif /* CONFIG_VFS_FUNNEL */ - done: *vpp = vp; @@ -4286,6 +4269,7 @@ vnode_getiocount(vnode_t vp, unsigned int vid, int vflags) int nosusp = vflags & VNODE_NOSUSPEND; int always = vflags & VNODE_ALWAYS; int beatdrain = vflags & VNODE_DRAINO; + int withvid = vflags & VNODE_WITHID; for (;;) { /* @@ -4319,14 +4303,30 @@ vnode_getiocount(vnode_t vp, unsigned int vid, int vflags) break; /* - * In some situations, we want to get an iocount - * even if the vnode is draining to prevent deadlock, - * e.g. if we're in the filesystem, potentially holding - * resources that could prevent other iocounts from - * being released. + * If this vnode is getting drained, there are some cases where + * we can't block. */ - if (beatdrain && (vp->v_lflag & VL_DRAIN)) { - break; + if (vp->v_lflag & VL_DRAIN) { + /* + * In some situations, we want to get an iocount + * even if the vnode is draining to prevent deadlock, + * e.g. if we're in the filesystem, potentially holding + * resources that could prevent other iocounts from + * being released. + */ + if (beatdrain) + break; + /* + * Don't block if the vnode's mount point is unmounting as + * we may be the thread the unmount is itself waiting on + * Only callers who pass in vids (at this point, we've already + * handled nosusp and nodead) are expecting error returns + * from this function, so only we can only return errors for + * those. ENODEV is intended to inform callers that the call + * failed because an unmount is in progress. + */ + if (withvid && (vp->v_mount) && vfs_isunmount(vp->v_mount)) + return(ENODEV); } vnode_lock_convert(vp); @@ -4338,7 +4338,7 @@ vnode_getiocount(vnode_t vp, unsigned int vid, int vflags) } else msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_getiocount", NULL); } - if (((vflags & VNODE_WITHID) != 0) && vid != vp->v_id) { + if (withvid && vid != vp->v_id) { return(ENOENT); } if (++vp->v_references >= UNAGE_THRESHHOLD) { @@ -4486,9 +4486,9 @@ vnode_create(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp) if (param == NULL) return (EINVAL); - /* Do quick sanity check on the parameters */ + /* Do quick sanity check on the parameters. */ if (param->vnfs_vtype == VBAD) { - return (EINVAL); + return EINVAL; } #if CONFIG_TRIGGERS @@ -4612,8 +4612,7 @@ vnode_create(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp) if (VCHR == vp->v_type) { u_int maj = major(vp->v_rdev); - if (maj < (u_int)nchrdev && - (D_TYPEMASK & cdevsw[maj].d_type) == D_TTY) + if (maj < (u_int)nchrdev && cdevsw[maj].d_type == D_TTY) vp->v_flag |= VISTTY; } } @@ -4649,15 +4648,6 @@ vnode_create(uint32_t flavor, uint32_t size, void *data, vnode_t *vpp) */ insmntque(vp, param->vnfs_mp); } -#if CONFIG_VFS_FUNNEL - if ((param->vnfs_mp->mnt_vtable->vfc_vfsflags & VFC_VFSTHREADSAFE) == 0) { - MALLOC_ZONE(vp->v_unsafefs, struct unsafe_fsnode *, - sizeof(struct unsafe_fsnode), M_UNSAFEFS, M_WAITOK); - vp->v_unsafefs->fsnode_count = 0; - vp->v_unsafefs->fsnodeowner = (void *)NULL; - lck_mtx_init(&vp->v_unsafefs->fsnodelock, vnode_lck_grp, vnode_lck_attr); - } -#endif /* CONFIG_VFS_FUNNEL */ } if (dvp && vnode_ref(dvp) == 0) { vp->v_parent = dvp; @@ -5360,6 +5350,9 @@ vn_attribute_cleanup(struct vnode_attr *vap, uint32_t defaulted_fields) int vn_authorize_unlink(vnode_t dvp, vnode_t vp, struct componentname *cnp, vfs_context_t ctx, __unused void *reserved) { +#if !CONFIG_MACF +#pragma unused(cnp) +#endif int error = 0; /* @@ -5388,7 +5381,6 @@ vn_authorize_open_existing(vnode_t vp, struct componentname *cnp, int fmode, vfs /* Open of existing case */ kauth_action_t action; int error = 0; - if (cnp->cn_ndp == NULL) { panic("NULL ndp"); } @@ -5461,7 +5453,6 @@ vn_authorize_open_existing(vnode_t vp, struct componentname *cnp, int fmode, vfs } } error = vnode_authorize(vp, NULL, action, ctx); - #if NAMEDSTREAMS if (error == EACCES) { /* @@ -5471,17 +5462,20 @@ vn_authorize_open_existing(vnode_t vp, struct componentname *cnp, int fmode, vfs * then it should be authorized. */ if (vnode_isshadow(vp) && vnode_isnamedstream (vp)) { - error = vnode_verifynamedstream(vp, ctx); + error = vnode_verifynamedstream(vp); } } #endif - + return error; } int vn_authorize_create(vnode_t dvp, struct componentname *cnp, struct vnode_attr *vap, vfs_context_t ctx, void *reserved) { +#if !CONFIG_MACF +#pragma unused(vap) +#endif /* Creation case */ int error; @@ -5647,6 +5641,9 @@ out: int vn_authorize_mkdir(vnode_t dvp, struct componentname *cnp, struct vnode_attr *vap, vfs_context_t ctx, void *reserved) { +#if !CONFIG_MACF +#pragma unused(vap) +#endif int error; if (reserved != NULL) { @@ -5682,8 +5679,11 @@ out: int vn_authorize_rmdir(vnode_t dvp, vnode_t vp, struct componentname *cnp, vfs_context_t ctx, void *reserved) { +#if CONFIG_MACF int error; - +#else +#pragma unused(cnp) +#endif if (reserved != NULL) { panic("Non-NULL reserved argument to vn_authorize_rmdir()"); } diff --git a/bsd/vfs/vfs_syscalls.c b/bsd/vfs/vfs_syscalls.c index 7e68e2982..efd4e324e 100644 --- a/bsd/vfs/vfs_syscalls.c +++ b/bsd/vfs/vfs_syscalls.c @@ -176,9 +176,6 @@ int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t); __private_extern__ int sync_internal(void); -__private_extern__ -int open1(vfs_context_t, struct nameidata *, int, struct vnode_attr *, int32_t *); - __private_extern__ int unlink1(vfs_context_t, struct nameidata *, int); @@ -192,9 +189,10 @@ uint32_t mount_generation = 0; /* counts number of mount and unmount operations */ unsigned int vfs_nummntops=0; -extern struct fileops vnops; +extern const struct fileops vnops; +#if CONFIG_APPLEDOUBLE extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *); - +#endif /* CONFIG_APPLEDOUBLE */ /* * Virtual File System System Calls @@ -302,7 +300,9 @@ boolean_t root_fs_upgrade_try = FALSE; int __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval) { - vnode_t pvp, vp; + vnode_t pvp = NULL; + vnode_t vp = NULL; + int need_nameidone = 0; vfs_context_t ctx = vfs_context_current(); char fstypename[MFSNAMELEN]; struct nameidata nd; @@ -310,8 +310,11 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3 char *labelstr = NULL; int flags = uap->flags; int error; +#if CONFIG_IMGSRC_ACCESS || CONFIG_MACF boolean_t is_64bit = IS_64BIT_PROCESS(p); - +#else +#pragma unused(p) +#endif /* * Get the fs type name from user space */ @@ -325,8 +328,10 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3 NDINIT(&nd, LOOKUP, OP_MOUNT, NOTRIGGER | FOLLOW | AUDITVNPATH1 | WANTPARENT, UIO_USERSPACE, uap->path, ctx); error = namei(&nd); - if (error) - return (error); + if (error) { + goto out; + } + need_nameidone = 1; vp = nd.ni_vp; pvp = nd.ni_dvp; @@ -377,16 +382,39 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3 AUDIT_ARG(fflags, flags); if ((vp->v_flag & VROOT) && - (vp->v_mount->mnt_flag & MNT_ROOTFS)) { + (vp->v_mount->mnt_flag & MNT_ROOTFS)) { + if (!(flags & MNT_UNION)) { flags |= MNT_UPDATE; - /* - * See 7392553 for more details on why this check exists. - * Suffice to say: If this check is ON and something tries - * to mount the rootFS RW, we'll turn off the codesign - * bitmap optimization. - */ + } + else { + /* + * For a union mount on '/', treat it as fresh + * mount instead of update. + * Otherwise, union mouting on '/' used to panic the + * system before, since mnt_vnodecovered was found to + * be NULL for '/' which is required for unionlookup + * after it gets ENOENT on union mount. + */ + flags = (flags & ~(MNT_UPDATE)); + } + +#if 0 +//#ifdef SECURE_KERNEL + if ((flags & MNT_RDONLY) == 0) { + /* Release kernels are not allowed to mount "/" as rw */ + error = EPERM; + goto out; + } +//#endif +#endif + /* + * See 7392553 for more details on why this check exists. + * Suffice to say: If this check is ON and something tries + * to mount the rootFS RW, we'll turn off the codesign + * bitmap optimization. + */ #if CHECK_CS_VALIDATION_BITMAP - if ( !(flags & MNT_RDONLY) ) { + if ((flags & MNT_RDONLY) == 0 ) { root_fs_upgrade_try = TRUE; } #endif @@ -394,15 +422,23 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int3 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0, labelstr, FALSE, ctx); + out: + #if CONFIG_MACF if (labelstr) FREE(labelstr, M_MACTEMP); #endif /* CONFIG_MACF */ - vnode_put(vp); - vnode_put(pvp); - nameidone(&nd); + if (vp) { + vnode_put(vp); + } + if (pvp) { + vnode_put(pvp); + } + if (need_nameidone) { + nameidone(&nd); + } return (error); } @@ -426,6 +462,9 @@ mount_common(char *fstypename, vnode_t pvp, vnode_t vp, struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount, vfs_context_t ctx) { +#if !CONFIG_MACF +#pragma unused(labelstr) +#endif struct vnode *devvp = NULLVP; struct vnode *device_vnode = NULLVP; #if CONFIG_MACF @@ -1032,6 +1071,9 @@ out1: int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth) { +#if !CONFIG_MACF +#pragma unused(cnp,fsname) +#endif struct vnode_attr va; int error; @@ -1771,7 +1813,9 @@ dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx) int lflags = 0; struct vnode *devvp = NULLVP; #if CONFIG_TRIGGERS + proc_t p = vfs_context_proc(ctx); int did_vflush = 0; + int pflags_save = 0; #endif /* CONFIG_TRIGGERS */ if (flags & MNT_FORCE) @@ -1794,6 +1838,12 @@ dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx) */ return (EBUSY); } + +#if CONFIG_TRIGGERS + if (flags & MNT_NOBLOCK && p != kernproc) + pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag); +#endif + mp->mnt_kern_flag |= MNTK_UNMOUNT; mp->mnt_lflag |= MNT_LUNMOUNT; mp->mnt_flag &=~ MNT_ASYNC; @@ -1925,8 +1975,13 @@ out: needwakeup = 1; } - #if CONFIG_TRIGGERS + if (flags & MNT_NOBLOCK && p != kernproc) { + // Restore P_NOREMOTEHANG bit to its previous value + if ((pflags_save & P_NOREMOTEHANG) == 0) + OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag); + } + /* * Callback and context are set together under the mount lock, and * never cleared, so we're safe to examine them here, drop the lock, @@ -2036,31 +2091,16 @@ sync_callback(mount_t mp, void * arg) } -#include - -clock_sec_t sync_wait_time = 0; - /* ARGSUSED */ int sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval) { - clock_nsec_t nsecs; - vfs_iterate(LK_NOWAIT, sync_callback, (void *)0); - { - static fsid_t fsid = { { 0, 0 } }; - - clock_get_calendar_microtime(&sync_wait_time, &nsecs); - vfs_event_signal(&fsid, VQ_SYNCEVENT, (intptr_t)NULL); - wakeup((caddr_t)&sync_wait_time); - } - - { if(print_vmpage_stat) { vm_countdirtypages(); } - } + #if DIAGNOSTIC if (syncprt) vfs_bufstats(); @@ -2218,11 +2258,13 @@ statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval) nameidone(&nd); error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT); - vnode_put(vp); - if (error != 0) + if (error != 0) { + vnode_put(vp); return (error); + } error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE); + vnode_put(vp); return (error); } @@ -2330,11 +2372,13 @@ statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *r nameidone(&nd); error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT); - vnode_put(vp); - if (error != 0) + if (error != 0) { + vnode_put(vp); return (error); + } error = statfs64_common(mp, sp, uap->buf); + vnode_put(vp); return (error); } @@ -2425,11 +2469,13 @@ getfsstat_callback(mount_t mp, void * arg) fstp->sfsp += my_size; if (fstp->mp) { +#if CONFIG_MACF error = mac_mount_label_get(mp, *fstp->mp); if (error) { fstp->error = error; return(VFS_RETURNED_DONE); } +#endif fstp->mp++; } } @@ -2961,21 +3007,20 @@ change_dir(struct nameidata *ndp, vfs_context_t ctx) * XXX Need to implement uid, gid */ int -open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *vap, int32_t *retval) +open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, + struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, + int32_t *retval) { proc_t p = vfs_context_proc(ctx); uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx)); - struct filedesc *fdp = p->p_fd; struct fileproc *fp; vnode_t vp; int flags, oflags; - struct fileproc *nfp; int type, indx, error; struct flock lf; int no_controlling_tty = 0; int deny_controlling_tty = 0; struct session *sessp = SESSION_NULL; - struct vfs_context context = *vfs_context_current(); /* local copy */ oflags = uflags; @@ -2986,10 +3031,10 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *v AUDIT_ARG(fflags, oflags); AUDIT_ARG(mode, vap->va_mode); - if ( (error = falloc(p, &nfp, &indx, ctx)) ) { + if ((error = falloc_withalloc(p, + &fp, &indx, ctx, fp_zalloc, cra)) != 0) { return (error); } - fp = nfp; uu->uu_dupfd = -indx - 1; if (!(p->p_flag & P_CONTROLT)) { @@ -3013,7 +3058,7 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *v if ((error = vn_open_auth(ndp, &flags, vap))) { if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */ - if ((error = dupfdopen(fdp, indx, uu->uu_dupfd, flags, error)) == 0) { + if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) { fp_drop(p, indx, NULL, 0); *retval = indx; if (deny_controlling_tty) { @@ -3043,7 +3088,6 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *v vp = ndp->ni_vp; fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY); - fp->f_fglob->fg_type = DTYPE_VNODE; fp->f_fglob->fg_ops = &vnops; fp->f_fglob->fg_data = (caddr_t)vp; @@ -3072,7 +3116,7 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *v if (error) goto bad; #endif - if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx))) + if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) goto bad; fp->f_fglob->fg_flag |= FHASLOCK; } @@ -3123,6 +3167,8 @@ open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, struct vnode_attr *v proc_fdlock(p); if (flags & O_CLOEXEC) *fdflags(p, indx) |= UF_EXCLOSE; + if (flags & O_CLOFORK) + *fdflags(p, indx) |= UF_FORKCLOSE; procfdtbl_releasefd(p, indx, NULL); fp_drop(p, indx, fp, 1); proc_fdunlock(p); @@ -3141,7 +3187,7 @@ bad: if (sessp != SESSION_NULL) session_rele(sessp); - /* Modify local copy (to not damage thread copy) */ + struct vfs_context context = *vfs_context_current(); context.vc_ucred = fp->f_fglob->fg_cred; vn_close(vp, fp->f_fglob->fg_flag, &context); @@ -3149,7 +3195,6 @@ bad: fp_free(p, indx, fp); return (error); - } /* @@ -3205,7 +3250,8 @@ open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval) NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current()); - ciferror = open1(vfs_context_current(), &nd, uap->flags, &va, retval); + ciferror = open1(vfs_context_current(), &nd, uap->flags, &va, + fileproc_alloc_init, NULL, retval); if (xsecdst != NULL) kauth_filesec_free(xsecdst); @@ -3258,7 +3304,8 @@ int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED); } - error = open1(vfs_context_current(), &nd, uap->flags, &va, retval); + error = open1(vfs_context_current(), &nd, uap->flags, &va, + fileproc_alloc_init, NULL, retval); return error; } @@ -3287,7 +3334,8 @@ open_nocancel(proc_t p, struct open_nocancel_args *uap, int32_t *retval) NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, uap->path, vfs_context_current()); - return(open1(vfs_context_current(), &nd, uap->flags, &va, retval)); + return (open1(vfs_context_current(), &nd, uap->flags, &va, + fileproc_alloc_init, NULL, retval)); } @@ -3687,6 +3735,10 @@ link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval) if (error) goto out2; +#if CONFIG_MACF + (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd); +#endif + #if CONFIG_FSE need_event = need_fsevent(FSE_CREATE_FILE, dvp); #else @@ -3824,10 +3876,15 @@ symlink(proc_t p, struct symlink_args *uap, __unused int32_t *retval) if (error == 0) error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx); +#if CONFIG_MACF + if (error == 0) + error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx); +#endif + /* do fallback attribute handling */ if (error == 0) error = vnode_setattr_fallback(vp, &va, ctx); - + if (error == 0) { int update_flags = 0; @@ -3985,12 +4042,11 @@ lookup_continue: flags |= VNODE_REMOVE_NODELETEBUSY; } - /* If we're told to, then skip any potential future upcalls */ + /* Skip any potential upcalls if told to. */ if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) { flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT; } - if (vp) { batched = vnode_compound_remove_available(vp); /* @@ -5335,6 +5391,10 @@ fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *re case USER_ADDR_NULL: VATTR_SET(&va, va_acl, NULL); break; + case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */ + VATTR_SET(&va, va_acl, NULL); + break; + /* not being set */ case CAST_USER_ADDR_T(-1): break; default: @@ -5710,11 +5770,13 @@ ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval) return(error); } - if (fp->f_fglob->fg_type == DTYPE_PSXSHM) { + switch (FILEGLOB_DTYPE(fp->f_fglob)) { + case DTYPE_PSXSHM: error = pshm_truncate(p, fp, uap->fd, uap->length, retval); goto out; - } - if (fp->f_fglob->fg_type != DTYPE_VNODE) { + case DTYPE_VNODE: + break; + default: error = EINVAL; goto out; } @@ -6747,7 +6809,7 @@ continue_lookup: if (error == EKEEPLOOKING) { goto continue_lookup; } - +#if CONFIG_APPLEDOUBLE /* * Special case to remove orphaned AppleDouble * files. I don't like putting this in the kernel, @@ -6767,7 +6829,7 @@ continue_lookup: if (!error) error = vn_rmdir(dvp, &vp, &nd, vap, ctx); } - +#endif /* CONFIG_APPLEDOUBLE */ /* * Call out to allow 3rd party notification of delete. * Ignore result of kauth_authorize_fileop call. @@ -6904,6 +6966,8 @@ vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag, } } +#define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U) + /* * Read a block of directory entries in a file system independent format. */ @@ -6930,6 +6994,9 @@ getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *byt goto out; } + if (bufsize > GETDIRENTRIES_MAXBUFSIZE) + bufsize = GETDIRENTRIES_MAXBUFSIZE; + #if CONFIG_MACF error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob); if (error) @@ -6980,16 +7047,17 @@ unionread: goto out; } - if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { + if ((vp->v_mount->mnt_flag & MNT_UNION)) { struct vnode *tvp = vp; - vp = vp->v_mount->mnt_vnodecovered; - vnode_getwithref(vp); - vnode_ref(vp); - fp->f_fglob->fg_data = (caddr_t) vp; - fp->f_fglob->fg_offset = 0; - vnode_rele(tvp); - vnode_put(tvp); - goto unionread; + if (lookup_traverse_union(tvp, &vp, &context) == 0) { + vnode_ref(vp); + fp->f_fglob->fg_data = (caddr_t) vp; + fp->f_fglob->fg_offset = 0; + vnode_rele(tvp); + vnode_put(tvp); + goto unionread; + } + vp = tvp; } } @@ -7168,12 +7236,9 @@ out: /* -* Obtain attribute information on objects in a directory while enumerating -* the directory. This call does not yet support union mounted directories. -* TO DO -* 1.union mounted directories. -*/ - + * Obtain attribute information on objects in a directory while enumerating + * the directory. + */ /* ARGSUSED */ int getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval) @@ -7182,7 +7247,7 @@ getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval struct fileproc *fp; uio_t auio = NULL; int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; - uint32_t count; + uint32_t count, savecount; uint32_t newstate; int error, eofflag; uint32_t loff; @@ -7201,6 +7266,7 @@ getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) { return(error); } + savecount = count; if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) { return (error); } @@ -7224,6 +7290,7 @@ getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval AUDIT_ARG(vnpath, vp, ARG_VNODE1); +unionread: if (vp->v_type != VDIR) { (void)vnode_put(vp); error = EINVAL; @@ -7240,8 +7307,7 @@ getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval /* set up the uio structure which will contain the users return buffer */ loff = fp->f_fglob->fg_offset; - auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, - &uio_buf[0], sizeof(uio_buf)); + auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf)); uio_addiov(auio, uap->buffer, uap->buffersize); /* @@ -7258,11 +7324,41 @@ getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval /* Believe it or not, uap->options only has 32-bits of valid * info, so truncate before extending again */ - error = VNOP_READDIRATTR(vp, &attributelist, auio, - count, - (u_long)(uint32_t)uap->options, &newstate, &eofflag, - &count, ctx); + + error = VNOP_READDIRATTR(vp, &attributelist, auio, count, + (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx); + } + + if (error) { + (void) vnode_put(vp); + goto out; + } + + /* + * If we've got the last entry of a directory in a union mount + * then reset the eofflag and pretend there's still more to come. + * The next call will again set eofflag and the buffer will be empty, + * so traverse to the underlying directory and do the directory + * read there. + */ + if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) { + if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries + eofflag = 0; + } else { // Empty buffer + struct vnode *tvp = vp; + if (lookup_traverse_union(tvp, &vp, ctx) == 0) { + vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0); + fp->f_fglob->fg_data = (caddr_t) vp; + fp->f_fglob->fg_offset = 0; // reset index for new dir + count = savecount; + vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0); + vnode_put(tvp); + goto unionread; + } + vp = tvp; + } } + (void)vnode_put(vp); if (error) @@ -7282,7 +7378,7 @@ out: file_drop(fd); return (error); /* return error earlier, an retval of 0 or 1 now */ -} /* end of getdirentryattr system call */ +} /* end of getdirentriesattr system call */ /* * Exchange data between two files @@ -7347,15 +7443,12 @@ exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t goto out; } - /* - * if the two vnodes are not files, return an error. - */ - if ( (vnode_isreg(svp) == 0) || (vnode_isreg(fvp) == 0) ) { + /* If they're not files, return an error */ + if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) { error = EINVAL; goto out; } - #if CONFIG_MACF error = mac_vnode_check_exchangedata(ctx, fvp, svp); @@ -7441,6 +7534,19 @@ out2: return (error); } +/* + * Return (in MB) the amount of freespace on the given vnode's volume. + */ +uint32_t freespace_mb(vnode_t vp); + +uint32_t +freespace_mb(vnode_t vp) +{ + vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT); + return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail * + vp->v_mount->mnt_vfsstat.f_bsize) >> 20); +} + #if CONFIG_SEARCHFS /* ARGSUSED */ @@ -7448,8 +7554,8 @@ out2: int searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval) { - vnode_t vp; - int error=0; + vnode_t vp, tvp; + int i, error=0; int fserror = 0; struct nameidata nd; struct user64_fssearchblock searchblock; @@ -7465,7 +7571,7 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval) vfs_context_t ctx = vfs_context_current(); char uio_buf[ UIO_SIZEOF(1) ]; - /* Start by copying in fsearchblock paramater list */ + /* Start by copying in fsearchblock parameter list */ if (IS_64BIT_PROCESS(p)) { error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock)); timelimit.tv_sec = searchblock.timelimit.tv_sec; @@ -7531,6 +7637,16 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval) if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate)))) goto freeandexit; + /* + * When searching a union mount, need to set the + * start flag at the first call on each layer to + * reset state for the new volume. + */ + if (uap->options & SRCHFS_START) + state->ss_union_layer = 0; + else + uap->options |= state->ss_union_flags; + state->ss_union_flags = 0; /* * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter, @@ -7579,9 +7695,8 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval) } /* set up the uio structure which will contain the users return buffer */ - auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, - &uio_buf[0], sizeof(uio_buf)); - uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize); + auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf)); + uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize); nameiflags = 0; if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW; @@ -7591,9 +7706,37 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval) error = namei(&nd); if (error) goto freeandexit; - + vp = nd.ni_vp; nameidone(&nd); - vp = nd.ni_vp; + + /* + * Switch to the root vnode for the volume + */ + error = VFS_ROOT(vnode_mount(vp), &tvp, ctx); + if (error) + goto freeandexit; + vnode_put(vp); + vp = tvp; + + /* + * If it's a union mount, the path lookup takes + * us to the top layer. But we may need to descend + * to a lower layer. For non-union mounts the layer + * is always zero. + */ + for (i = 0; i < (int) state->ss_union_layer; i++) { + if ((vp->v_mount->mnt_flag & MNT_UNION) == 0) + break; + tvp = vp; + vp = vp->v_mount->mnt_vnodecovered; + if (vp == NULL) { + vp = tvp; + error = ENOENT; + goto freeandexit; + } + vnode_getwithref(vp); + vnode_put(tvp); + } #if CONFIG_MACF error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs); @@ -7606,7 +7749,7 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval) /* * If searchblock.maxmatches == 0, then skip the search. This has happened - * before and sometimes the underlyning code doesnt deal with it well. + * before and sometimes the underlying code doesnt deal with it well. */ if (searchblock.maxmatches == 0) { nummatches = 0; @@ -7614,27 +7757,37 @@ searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval) } /* - Allright, we have everything we need, so lets make that call. - - We keep special track of the return value from the file system: - EAGAIN is an acceptable error condition that shouldn't keep us - from copying out any results... + * Allright, we have everything we need, so lets make that call. + * + * We keep special track of the return value from the file system: + * EAGAIN is an acceptable error condition that shouldn't keep us + * from copying out any results... */ fserror = VNOP_SEARCHFS(vp, - searchparams1, - searchparams2, - &searchblock.searchattrs, - (u_long)searchblock.maxmatches, - &timelimit, - returnattrs, - &nummatches, - (u_long)uap->scriptcode, - (u_long)uap->options, - auio, - state, - ctx); + searchparams1, + searchparams2, + &searchblock.searchattrs, + (u_long)searchblock.maxmatches, + &timelimit, + returnattrs, + &nummatches, + (u_long)uap->scriptcode, + (u_long)uap->options, + auio, + (struct searchstate *) &state->ss_fsstate, + ctx); + /* + * If it's a union mount we need to be called again + * to search the mounted-on filesystem. + */ + if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) { + state->ss_union_flags = SRCHFS_START; + state->ss_union_layer++; // search next layer down + fserror = EAGAIN; + } + saveandexit: vnode_put(vp); @@ -7645,7 +7798,7 @@ saveandexit: if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0) goto freeandexit; - if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0) + if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0) goto freeandexit; error = fserror; @@ -7729,6 +7882,18 @@ typedef struct { nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT]; +/* namespace fsctl functions */ +static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type); +static int nspace_item_flags_for_type(nspace_type_t nspace_type); +static int nspace_open_flags_for_type(nspace_type_t nspace_type); +static nspace_type_t nspace_type_for_op(uint64_t op); +static int nspace_is_special_process(struct proc *proc); +static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx); +static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type); +static int validate_namespace_args (int is64bit, int size); +static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data); + + static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type) { switch(nspace_type) { @@ -7946,7 +8111,7 @@ retry: // if (vp != nspace_items[i].vp) { nspace_items[i].vp = vp; - nspace_items[i].arg = arg; + nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user nspace_items[i].op = op; nspace_items[i].vid = vnode_vid(vp); nspace_items[i].flags = NSPACE_ITEM_NEW; @@ -8134,7 +8299,7 @@ vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx) } static int -wait_for_namespace_event(namespace_handler_info_ext *nhi, nspace_type_t nspace_type) +wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type) { int i, error=0, unblock=0; task_t curtask; @@ -8184,9 +8349,14 @@ wait_for_namespace_event(namespace_handler_info_ext *nhi, nspace_type_t nspace_t int32_t indx, fmode; struct proc *p = current_proc(); vfs_context_t ctx = vfs_context_current(); - + struct vnode_attr va; + + + /* + * Use vnode pointer to acquire a file descriptor for + * hand-off to userland + */ fmode = nspace_open_flags_for_type(nspace_type); - error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid); if (error) { unblock = 1; @@ -8207,19 +8377,30 @@ wait_for_namespace_event(namespace_handler_info_ext *nhi, nspace_type_t nspace_t } fp->f_fglob->fg_flag = fmode; - fp->f_fglob->fg_type = DTYPE_VNODE; fp->f_fglob->fg_ops = &vnops; fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp; proc_fdlock(p); procfdtbl_releasefd(p, indx, NULL); fp_drop(p, indx, fp, 1); - proc_fdunlock(p); - - error = copyout(&nspace_items[i].token, nhi->token, sizeof(uint32_t)); - error = copyout(&nspace_items[i].op, nhi->flags, sizeof(uint64_t)); - error = copyout(&indx, nhi->fdptr, sizeof(uint32_t)); - if (nhi->infoptr) { + proc_fdunlock(p); + + /* + * All variants of the namespace handler struct support these three fields: + * token, flags, and the FD pointer + */ + error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t)); + error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t)); + error = copyout(&indx, nhd->fdptr, sizeof(uint32_t)); + + /* + * Handle optional fields: + * extended version support an info ptr (offset, length), and the + * + * namedata version supports a unique per-link object ID + * + */ + if (nhd->infoptr) { uio_t uio = (uio_t)nspace_items[i].arg; uint64_t u_offset, u_length; @@ -8230,9 +8411,23 @@ wait_for_namespace_event(namespace_handler_info_ext *nhi, nspace_type_t nspace_t u_offset = 0; u_length = 0; } - error = copyout(&u_offset, nhi->infoptr, sizeof(uint64_t)); - error = copyout(&u_length, nhi->infoptr+sizeof(uint64_t), sizeof(uint64_t)); + error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t)); + error = copyout(&u_length, nhd->infoptr+sizeof(uint64_t), sizeof(uint64_t)); } + + if (nhd->objid) { + VATTR_INIT(&va); + VATTR_WANTED(&va, va_linkid); + error = vnode_getattr(nspace_items[i].vp, &va, ctx); + if (error == 0 ) { + uint64_t linkid = 0; + if (VATTR_IS_SUPPORTED (&va, va_linkid)) { + linkid = (uint64_t)va.va_linkid; + } + error = copyout (&linkid, nhd->objid, sizeof(uint64_t)); + } + } + if (error) { vn_close(nspace_items[i].vp, fmode, ctx); fp_free(p, indx, fp); @@ -8298,13 +8493,50 @@ wait_for_namespace_event(namespace_handler_info_ext *nhi, nspace_type_t nspace_t return error; } +static inline int validate_namespace_args (int is64bit, int size) { + + if (is64bit) { + /* Must be one of these */ + if (size == sizeof(user64_namespace_handler_info)) { + goto sizeok; + } + if (size == sizeof(user64_namespace_handler_info_ext)) { + goto sizeok; + } + if (size == sizeof(user64_namespace_handler_data)) { + goto sizeok; + } + return EINVAL; + } + else { + /* 32 bit -- must be one of these */ + if (size == sizeof(user32_namespace_handler_info)) { + goto sizeok; + } + if (size == sizeof(user32_namespace_handler_info_ext)) { + goto sizeok; + } + if (size == sizeof(user32_namespace_handler_data)) { + goto sizeok; + } + return EINVAL; + } + +sizeok: + + return 0; + +} static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data) { int error = 0; - namespace_handler_info_ext nhi; + namespace_handler_data nhd; - if (nspace_type == NSPACE_HANDLER_SNAPSHOT && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) { + bzero (&nhd, sizeof(namespace_handler_data)); + + if (nspace_type == NSPACE_HANDLER_SNAPSHOT && + (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) { return EINVAL; } @@ -8312,36 +8544,48 @@ static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int return error; } - if ( (is64bit && size != sizeof(user64_namespace_handler_info) && size != sizeof(user64_namespace_handler_info_ext)) - || (is64bit == 0 && size != sizeof(user32_namespace_handler_info) && size != sizeof(user32_namespace_handler_info_ext))) { - - // either you're 64-bit and passed a 64-bit struct or - // you're 32-bit and passed a 32-bit struct. otherwise - // it's not ok. - return EINVAL; + error = validate_namespace_args (is64bit, size); + if (error) { + return error; } + /* Copy in the userland pointers into our kernel-only struct */ + if (is64bit) { - nhi.token = (user_addr_t)((user64_namespace_handler_info *)data)->token; - nhi.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags; - nhi.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr; - if (size == sizeof(user64_namespace_handler_info_ext)) { - nhi.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr; - } else { - nhi.infoptr = 0; + /* 64 bit userland structures */ + nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token; + nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags; + nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr; + + /* If the size is greater than the standard info struct, add in extra fields */ + if (size > (sizeof(user64_namespace_handler_info))) { + if (size >= (sizeof(user64_namespace_handler_info_ext))) { + nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr; + } + if (size == (sizeof(user64_namespace_handler_data))) { + nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid; + } + /* Otherwise the fields were pre-zeroed when we did the bzero above. */ } - } else { - nhi.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token); - nhi.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags); - nhi.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr); - if (size == sizeof(user32_namespace_handler_info_ext)) { - nhi.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr); - } else { - nhi.infoptr = 0; + } + else { + /* 32 bit userland structures */ + nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token); + nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags); + nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr); + + if (size > (sizeof(user32_namespace_handler_info))) { + if (size >= (sizeof(user32_namespace_handler_info_ext))) { + nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr); + } + if (size == (sizeof(user32_namespace_handler_data))) { + nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid; + } + /* Otherwise the fields were pre-zeroed when we did the bzero above. */ } } - return wait_for_namespace_event(&nhi, nspace_type); + return wait_for_namespace_event(&nhd, nspace_type); } /* @@ -8465,22 +8709,26 @@ fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long error = set_package_extensions_table(ext_strings, num_entries, max_width); - } else if (IOCBASECMD(cmd) == FSCTL_WAIT_FOR_SYNC) { - error = tsleep((caddr_t)&sync_wait_time, PVFS|PCATCH, "sync-wait", 0); - if (error == 0) { - *(uint32_t *)data = (uint32_t)sync_wait_time; - error = 0; - } else { - error *= -1; - } - - } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_GET) { + + } + + /* namespace handlers */ + else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_GET) { error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data); - } else if (IOCBASECMD(cmd) == FSCTL_OLD_SNAPSHOT_HANDLER_GET) { + } + + /* Snapshot handlers */ + else if (IOCBASECMD(cmd) == FSCTL_OLD_SNAPSHOT_HANDLER_GET) { error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data); } else if (IOCBASECMD(cmd) == FSCTL_SNAPSHOT_HANDLER_GET_EXT) { error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data); - } else if (IOCBASECMD(cmd) == FSCTL_TRACKED_HANDLER_GET) { + } + + /* Tracked File Handlers */ + else if (IOCBASECMD(cmd) == FSCTL_TRACKED_HANDLER_GET) { + error = process_namespace_fsctl(NSPACE_HANDLER_TRACK, is64bit, size, data); + } + else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_GETDATA) { error = process_namespace_fsctl(NSPACE_HANDLER_TRACK, is64bit, size, data); } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_UPDATE) { uint32_t token, val; @@ -9235,13 +9483,28 @@ fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval) error = ENOTSUP; /* unexpected failure */ goto out; } +unionget: /* Find the target vnode. */ if (uap->objid == 2) { error = VFS_ROOT(mp, &vp, ctx); } else { error = VFS_VGET(mp, (ino64_t)uap->objid, &vp, ctx); } - vfs_unbusy(mp); + + if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) { + /* + * If the fileid isn't found and we're in a union + * mount volume, then see if the fileid is in the + * mounted-on volume. + */ + struct mount *tmp = mp; + mp = vnode_mount(tmp->mnt_vnodecovered); + vfs_unbusy(tmp); + if (vfs_busy(mp, LK_NOWAIT) == 0) + goto unionget; + } else + vfs_unbusy(mp); + if (error) { goto out; } @@ -9261,6 +9524,23 @@ fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval) goto out; } AUDIT_ARG(text, realpath); + + if (kdebug_enable) { + long dbg_parms[NUMPARMS]; + int dbg_namelen; + + dbg_namelen = (int)sizeof(dbg_parms); + + if (length < dbg_namelen) { + memcpy((char *)dbg_parms, realpath, length); + memset((char *)dbg_parms + length, 0, dbg_namelen - length); + + dbg_namelen = length; + } else + memcpy((char *)dbg_parms, realpath + (length - dbg_namelen), dbg_namelen); + + kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE); + } error = copyout((caddr_t)realpath, uap->buf, length); *retval = (user_ssize_t)length; /* may be superseded by error */ @@ -9556,3 +9836,32 @@ void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp) usbp->st_qspare[0] = sbp->st_qspare[0]; usbp->st_qspare[1] = sbp->st_qspare[1]; } + +/* + * Purge buffer cache for simulating cold starts + */ +static int vnode_purge_callback(struct vnode *vp, __unused void *cargs) +{ + ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE); + + return VNODE_RETURNED; +} + +static int vfs_purge_callback(mount_t mp, __unused void * arg) +{ + vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL); + + return VFS_RETURNED; +} + +int +vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval) +{ + if (!kauth_cred_issuser(kauth_cred_get())) + return EPERM; + + vfs_iterate(0/* flags */, vfs_purge_callback, NULL); + + return 0; +} + diff --git a/bsd/vfs/vfs_vnops.c b/bsd/vfs/vfs_vnops.c index 671a51fc3..24e968dc0 100644 --- a/bsd/vfs/vfs_vnops.c +++ b/bsd/vfs/vfs_vnops.c @@ -115,6 +115,7 @@ int ubc_setcred(struct vnode *, struct proc *); #include #endif +extern void sigpup_attach_vnode(vnode_t); /* XXX */ static int vn_closefile(struct fileglob *fp, vfs_context_t ctx); static int vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, @@ -135,8 +136,16 @@ static int vn_kqfilt_remove(struct vnode *vp, uintptr_t ident, vfs_context_t ctx); #endif -struct fileops vnops = - { vn_read, vn_write, vn_ioctl, vn_select, vn_closefile, vn_kqfilt_add, NULL }; +const struct fileops vnops = { + DTYPE_VNODE, + vn_read, + vn_write, + vn_ioctl, + vn_select, + vn_closefile, + vn_kqfilt_add, + NULL +}; struct filterops vnode_filtops = { .f_isfd = 1, @@ -186,6 +195,8 @@ vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx) kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN, (uintptr_t)vp, 0); + sigpup_attach_vnode(vp); + return 0; bad: @@ -263,13 +274,7 @@ vn_open_auth_do_create(struct nameidata *ndp, struct vnode_attr *vap, int fmode, } #endif - /* - * Unlock the fsnode (if locked) here so that we are free - * to drop the dvp iocount and prevent deadlock in build_path(). - * nameidone() will still do the right thing later. - */ vp = ndp->ni_vp; - namei_unlock_fsnode(ndp); if (*did_create) { int update_flags = 0; @@ -488,7 +493,8 @@ continue_create_lookup: /* Fall through */ } - } else { + } + else { /* * Not O_CREAT */ @@ -684,6 +690,7 @@ int vn_close(struct vnode *vp, int flags, vfs_context_t ctx) { int error; + int flusherror = 0; #if NAMEDRSRCFORK /* Sync data from resource fork shadow file if needed. */ @@ -691,7 +698,7 @@ vn_close(struct vnode *vp, int flags, vfs_context_t ctx) (vp->v_parent != NULLVP) && vnode_isshadow(vp)) { if (flags & FWASWRITTEN) { - (void) vnode_flushnamedstream(vp->v_parent, vp, ctx); + flusherror = vnode_flushnamedstream(vp->v_parent, vp, ctx); } } #endif @@ -715,6 +722,9 @@ vn_close(struct vnode *vp, int flags, vfs_context_t ctx) if (!vnode_isspec(vp)) (void)vnode_rele_ext(vp, flags, 0); + if (flusherror) { + error = flusherror; + } return (error); } @@ -863,7 +873,7 @@ vn_rdwr_64( if (error == 0) { if (rw == UIO_READ) { - if (vnode_isswap(vp)) { + if (vnode_isswap(vp) && ((ioflg & IO_SWAP_DISPATCH) == 0)) { error = vn_read_swapfile(vp, auio); } else { error = VNOP_READ(vp, auio, ioflg, &context); @@ -888,7 +898,8 @@ static int vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) { struct vnode *vp; - int error, ioflag; + int error; + int ioflag; off_t count; vp = (struct vnode *)fp->f_fglob->fg_data; @@ -963,9 +974,9 @@ vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) } #endif - /* - * IO_SYSCALL_DISPATCH signals to VNOP handlers that this write originated - * from a file table write. + /* + * IO_SYSCALL_DISPATCH signals to VNOP handlers that this write came from + * a file table write */ ioflag = (IO_UNIT | IO_SYSCALL_DISPATCH); @@ -1328,14 +1339,14 @@ vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx) error = ENXIO; goto out; } - *(int *)data = D_TYPEMASK & bdevsw[major(vp->v_rdev)].d_type; + *(int *)data = bdevsw[major(vp->v_rdev)].d_type; } else if (vp->v_type == VCHR) { if (major(vp->v_rdev) >= nchrdev) { error = ENXIO; goto out; } - *(int *)data = D_TYPEMASK & cdevsw[major(vp->v_rdev)].d_type; + *(int *)data = cdevsw[major(vp->v_rdev)].d_type; } else { error = ENOTTY; goto out; @@ -1413,13 +1424,14 @@ vn_closefile(struct fileglob *fg, vfs_context_t ctx) if ( (error = vnode_getwithref(vp)) == 0 ) { - if ((fg->fg_flag & FHASLOCK) && fg->fg_type == DTYPE_VNODE) { + if ((fg->fg_flag & FHASLOCK) && + FILEGLOB_DTYPE(fg) == DTYPE_VNODE) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; - (void)VNOP_ADVLOCK(vp, (caddr_t)fg, F_UNLCK, &lf, F_FLOCK, ctx); + (void)VNOP_ADVLOCK(vp, (caddr_t)fg, F_UNLCK, &lf, F_FLOCK, ctx, NULL); } error = vn_close(vp, fg->fg_flag, ctx); @@ -1612,11 +1624,14 @@ static intptr_t vnode_readable_data_count(vnode_t vp, off_t current_offset, int ispoll) { if (vnode_isfifo(vp)) { +#if FIFO int cnt; int err = fifo_charcount(vp, &cnt); if (err == 0) { return (intptr_t)cnt; - } else { + } else +#endif + { return (intptr_t)0; } } else if (vnode_isreg(vp)) { @@ -1649,11 +1664,14 @@ static intptr_t vnode_writable_space_count(vnode_t vp) { if (vnode_isfifo(vp)) { +#if FIFO long spc; int err = fifo_freespace(vp, &spc); if (err == 0) { return (intptr_t)spc; - } else { + } else +#endif + { return (intptr_t)0; } } else if (vnode_isreg(vp)) { diff --git a/bsd/vfs/vfs_xattr.c b/bsd/vfs/vfs_xattr.c index c4587268f..dc2e09d32 100644 --- a/bsd/vfs/vfs_xattr.c +++ b/bsd/vfs/vfs_xattr.c @@ -56,6 +56,9 @@ #include #endif +#if !CONFIG_APPLEDOUBLE +#define PANIC_ON_NOAPPLEDOUBLE 1 +#endif #if NAMEDSTREAMS @@ -72,7 +75,7 @@ static int shadow_sequence; #define MAKE_SHADOW_NAME(VP, NAME) \ snprintf((NAME), sizeof((NAME)), (SHADOW_NAME_FMT), \ ((void*)(VM_KERNEL_ADDRPERM(VP))), \ - ((VP)->v_id), \ + (VP)->v_id, \ ((void*)(VM_KERNEL_ADDRPERM((VP)->v_data)))) /* The full path to the shadow directory */ @@ -85,7 +88,6 @@ static int shadow_sequence; snprintf((NAME), sizeof((NAME)), (SHADOW_DIR_FMT), \ ((void*)(VM_KERNEL_ADDRPERM(VP))), shadow_sequence) - static int default_getnamedstream(vnode_t vp, vnode_t *svpp, const char *name, enum nsoperation op, vfs_context_t context); static int default_makenamedstream(vnode_t vp, vnode_t *svpp, const char *name, vfs_context_t context); @@ -94,19 +96,22 @@ static int default_removenamedstream(vnode_t vp, const char *name, vfs_context_ static int getshadowfile(vnode_t vp, vnode_t *svpp, int makestream, size_t *rsrcsize, int *creator, vfs_context_t context); -static int get_shadow_dir(vnode_t *sdvpp, vfs_context_t context); - -#endif +static int get_shadow_dir(vnode_t *sdvpp); +#endif /* NAMEDSTREAMS */ /* * Default xattr support routines. */ +static int default_getxattr(vnode_t vp, const char *name, uio_t uio, size_t *size, int options, + vfs_context_t context); +static int default_setxattr(vnode_t vp, const char *name, uio_t uio, int options, + vfs_context_t context); static int default_listxattr(vnode_t vp, uio_t uio, size_t *size, int options, - vfs_context_t context); - - + vfs_context_t context); +static int default_removexattr(vnode_t vp, const char *name, int options, + vfs_context_t context); /* * Retrieve the data of an extended attribute. @@ -117,7 +122,7 @@ vn_getxattr(vnode_t vp, const char *name, uio_t uio, size_t *size, { int error; - if (!(vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VLNK)) { + if (!XATTR_VNODE_SUPPORTED(vp)) { return (EPERM); } #if NAMEDSTREAMS @@ -163,7 +168,6 @@ vn_getxattr(vnode_t vp, const char *name, uio_t uio, size_t *size, if (error == ENOTSUP && !(options & XATTR_NODEFAULT)) { /* * A filesystem may keep some EAs natively and return ENOTSUP for others. - * SMB returns ENOTSUP for finderinfo and resource forks. */ error = default_getxattr(vp, name, uio, size, options, context); } @@ -179,7 +183,7 @@ vn_setxattr(vnode_t vp, const char *name, uio_t uio, int options, vfs_context_t { int error; - if (!(vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VLNK)) { + if (!XATTR_VNODE_SUPPORTED(vp)) { return (EPERM); } #if NAMEDSTREAMS @@ -249,7 +253,6 @@ vn_setxattr(vnode_t vp, const char *name, uio_t uio, int options, vfs_context_t if (error == ENOTSUP && !(options & XATTR_NODEFAULT)) { /* * A filesystem may keep some EAs natively and return ENOTSUP for others. - * SMB returns ENOTSUP for finderinfo and resource forks. */ error = default_setxattr(vp, name, uio, options, context); } @@ -270,7 +273,7 @@ vn_removexattr(vnode_t vp, const char * name, int options, vfs_context_t context { int error; - if (!(vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VLNK)) { + if (!XATTR_VNODE_SUPPORTED(vp)) { return (EPERM); } #if NAMEDSTREAMS @@ -297,7 +300,6 @@ vn_removexattr(vnode_t vp, const char * name, int options, vfs_context_t context if (error == ENOTSUP && !(options & XATTR_NODEFAULT)) { /* * A filesystem may keep some EAs natively and return ENOTSUP for others. - * SMB returns ENOTSUP for finderinfo and resource forks. */ error = default_removexattr(vp, name, options, context); #ifdef DUAL_EAS @@ -330,7 +332,7 @@ vn_listxattr(vnode_t vp, uio_t uio, size_t *size, int options, vfs_context_t con { int error; - if (!(vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VLNK)) { + if (!XATTR_VNODE_SUPPORTED(vp)) { return (EPERM); } #if NAMEDSTREAMS @@ -357,8 +359,7 @@ vn_listxattr(vnode_t vp, uio_t uio, size_t *size, int options, vfs_context_t con /* * A filesystem may keep some but not all EAs natively, in which case * the native EA names will have been uiomove-d out (or *size updated) - * and the default_listxattr here will finish the job. Note SMB takes - * advantage of this for its finder-info and resource forks. + * and the default_listxattr here will finish the job. */ error = default_listxattr(vp, uio, size, options, context); } @@ -396,6 +397,7 @@ xattr_protected(const char *attrname) #if NAMEDSTREAMS + /* * Obtain a named stream from vnode vp. */ @@ -517,12 +519,20 @@ vnode_removenamedstream(vnode_t vp, vnode_t svp, const char *name, int flags, vf * to create and initialize the file again. */ errno_t -vnode_relenamedstream(vnode_t vp, vnode_t svp, vfs_context_t context) -{ +vnode_relenamedstream(vnode_t vp, vnode_t svp) { vnode_t dvp; struct componentname cn; char tmpname[80]; errno_t err; + + /* + * We need to use the kernel context here. If we used the supplied + * VFS context we have no clue whether or not it originated from userland + * where it could be subject to a chroot jail. We need to ensure that all + * filesystem access to shadow files is done on the same FS regardless of + * userland process restrictions. + */ + vfs_context_t kernelctx = vfs_context_kernel(); cache_purge(svp); @@ -532,19 +542,22 @@ vnode_relenamedstream(vnode_t vp, vnode_t svp, vfs_context_t context) cn.cn_nameiop = DELETE; cn.cn_flags = ISLASTCN; - cn.cn_context = context; + cn.cn_context = kernelctx; cn.cn_pnbuf = tmpname; cn.cn_pnlen = sizeof(tmpname); cn.cn_nameptr = cn.cn_pnbuf; cn.cn_namelen = strlen(tmpname); - /* Obtain the vnode for the shadow files directory. */ - err = get_shadow_dir(&dvp, context); + /* + * Obtain the vnode for the shadow files directory. Make sure to + * use the kernel ctx as described above. + */ + err = get_shadow_dir(&dvp); if (err != 0) { return err; } - (void) VNOP_REMOVE(dvp, svp, &cn, 0, context); + (void) VNOP_REMOVE(dvp, svp, &cn, 0, kernelctx); vnode_put(dvp); return (0); @@ -552,6 +565,9 @@ vnode_relenamedstream(vnode_t vp, vnode_t svp, vfs_context_t context) /* * Flush a named stream shadow file. + * + * 'vp' represents the AppleDouble file. + * 'svp' represents the shadow file. */ errno_t vnode_flushnamedstream(vnode_t vp, vnode_t svp, vfs_context_t context) @@ -564,6 +580,13 @@ vnode_flushnamedstream(vnode_t vp, vnode_t svp, vfs_context_t context) size_t iosize; size_t datasize; int error; + /* + * The kernel context must be used for all I/O to the shadow file + * and its namespace operations + */ + vfs_context_t kernelctx = vfs_context_kernel(); + + /* The supplied context is used for access to the AD file itself */ VATTR_INIT(&va); VATTR_WANTED(&va, va_data_size); @@ -587,7 +610,7 @@ vnode_flushnamedstream(vnode_t vp, vnode_t svp, vfs_context_t context) /* * Copy the shadow stream file data into the resource fork. */ - error = VNOP_OPEN(svp, 0, context); + error = VNOP_OPEN(svp, 0, kernelctx); if (error) { printf("vnode_flushnamedstream: err %d opening file\n", error); goto out; @@ -597,7 +620,7 @@ vnode_flushnamedstream(vnode_t vp, vnode_t svp, vfs_context_t context) uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ); uio_addiov(auio, (uintptr_t)bufptr, iosize); - error = VNOP_READ(svp, auio, 0, context); + error = VNOP_READ(svp, auio, 0, kernelctx); if (error) { break; } @@ -616,7 +639,9 @@ vnode_flushnamedstream(vnode_t vp, vnode_t svp, vfs_context_t context) } offset += iosize; } - (void) VNOP_CLOSE(svp, 0, context); + + /* close shadowfile */ + (void) VNOP_CLOSE(svp, 0, kernelctx); out: if (bufptr) { kmem_free(kernel_map, (vm_offset_t)bufptr, bufsize); @@ -627,21 +652,31 @@ out: return (error); } + /* * Verify that the vnode 'vp' is a vnode that lives in the shadow * directory. We can't just query the parent pointer directly since * the shadowfile is hooked up to the actual file it's a stream for. */ -errno_t vnode_verifynamedstream(vnode_t vp, vfs_context_t context) { +errno_t vnode_verifynamedstream(vnode_t vp) { int error; struct vnode *shadow_dvp = NULL; struct vnode *shadowfile = NULL; struct componentname cn; + + /* + * We need to use the kernel context here. If we used the supplied + * VFS context we have no clue whether or not it originated from userland + * where it could be subject to a chroot jail. We need to ensure that all + * filesystem access to shadow files is done on the same FS regardless of + * userland process restrictions. + */ + vfs_context_t kernelctx = vfs_context_kernel(); char tmpname[80]; - + /* Get the shadow directory vnode */ - error = get_shadow_dir(&shadow_dvp, context); + error = get_shadow_dir(&shadow_dvp); if (error) { return error; } @@ -653,13 +688,13 @@ errno_t vnode_verifynamedstream(vnode_t vp, vfs_context_t context) { bzero(&cn, sizeof(cn)); cn.cn_nameiop = LOOKUP; cn.cn_flags = ISLASTCN | CN_ALLOWRSRCFORK; - cn.cn_context = context; + cn.cn_context = kernelctx; cn.cn_pnbuf = tmpname; cn.cn_pnlen = sizeof(tmpname); cn.cn_nameptr = cn.cn_pnbuf; cn.cn_namelen = strlen(tmpname); - if (VNOP_LOOKUP (shadow_dvp, &shadowfile, &cn, context) == 0) { + if (VNOP_LOOKUP (shadow_dvp, &shadowfile, &cn, kernelctx) == 0) { /* is the pointer the same? */ if (shadowfile == vp) { error = 0; @@ -676,6 +711,17 @@ errno_t vnode_verifynamedstream(vnode_t vp, vfs_context_t context) { return error; } +/* + * Access or create the shadow file as needed. + * + * 'makestream' with non-zero value means that we need to guarantee we were the + * creator of the shadow file. + * + * 'context' is the user supplied context for the original VFS operation that + * caused us to need a shadow file. + * + * int pointed to by 'creator' is nonzero if we created the shadowfile. + */ static int getshadowfile(vnode_t vp, vnode_t *svpp, int makestream, size_t *rsrcsize, int *creator, vfs_context_t context) @@ -688,6 +734,7 @@ getshadowfile(vnode_t vp, vnode_t *svpp, int makestream, size_t *rsrcsize, size_t datasize = 0; int error = 0; int retries = 0; + vfs_context_t kernelctx = vfs_context_kernel(); retry_create: *creator = 0; @@ -723,13 +770,13 @@ retry_create: VATTR_SET(&va, va_flags, UF_HIDDEN); /* Obtain the vnode for the shadow files directory. */ - if (get_shadow_dir(&dvp, context) != 0) { + if (get_shadow_dir(&dvp) != 0) { error = ENOTDIR; goto out; } if (!makestream) { /* See if someone else already has it open. */ - if (VNOP_LOOKUP(dvp, &svp, &cn, context) == 0) { + if (VNOP_LOOKUP(dvp, &svp, &cn, kernelctx) == 0) { /* Double check existence by asking for size. */ VATTR_INIT(&va); VATTR_WANTED(&va, va_data_size); @@ -739,7 +786,10 @@ retry_create: } } - /* Otherwise make sure the resource fork data exists. */ + /* + * Otherwise make sure the resource fork data exists. + * Use the supplied context for accessing the AD file. + */ error = vn_getxattr(vp, XATTR_RESOURCEFORK_NAME, NULL, &datasize, XATTR_NOSECURITY, context); /* @@ -766,13 +816,13 @@ retry_create: } } /* Create the shadow stream file. */ - error = VNOP_CREATE(dvp, &svp, &cn, &va, context); + error = VNOP_CREATE(dvp, &svp, &cn, &va, kernelctx); if (error == 0) { vnode_recycle(svp); *creator = 1; } else if ((error == EEXIST) && !makestream) { - error = VNOP_LOOKUP(dvp, &svp, &cn, context); + error = VNOP_LOOKUP(dvp, &svp, &cn, kernelctx); } else if ((error == ENOENT) && !makestream) { /* @@ -826,6 +876,9 @@ default_getnamedstream(vnode_t vp, vnode_t *svpp, const char *name, enum nsopera int creator; int error; + /* need the kernel context for accessing the shadowfile */ + vfs_context_t kernelctx = vfs_context_kernel(); + /* * Only the "com.apple.ResourceFork" stream is supported here. */ @@ -836,6 +889,9 @@ default_getnamedstream(vnode_t vp, vnode_t *svpp, const char *name, enum nsopera retry: /* * Obtain a shadow file for the resource fork I/O. + * + * Need to pass along the supplied context so that getshadowfile + * can access the AD file as needed, using it. */ error = getshadowfile(vp, &svp, 0, &datasize, &creator, context); if (error) { @@ -896,7 +952,8 @@ retry: auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ); offset = 0; - error = VNOP_OPEN(svp, 0, context); + /* open the shadow file */ + error = VNOP_OPEN(svp, 0, kernelctx); if (error) { goto out; } @@ -907,6 +964,7 @@ retry: uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ); uio_addiov(auio, (uintptr_t)bufptr, iosize); + /* use supplied ctx for AD file */ error = vn_getxattr(vp, XATTR_RESOURCEFORK_NAME, auio, &tmpsize, XATTR_NOSECURITY, context); if (error) { @@ -915,13 +973,16 @@ retry: uio_reset(auio, offset, UIO_SYSSPACE, UIO_WRITE); uio_addiov(auio, (uintptr_t)bufptr, iosize); - error = VNOP_WRITE(svp, auio, 0, context); + /* kernel context for writing shadowfile */ + error = VNOP_WRITE(svp, auio, 0, kernelctx); if (error) { break; } offset += iosize; } - (void) VNOP_CLOSE(svp, 0, context); + + /* close shadow file */ + (void) VNOP_CLOSE(svp, 0, kernelctx); } out: /* Wake up anyone waiting for svp file content */ @@ -940,7 +1001,7 @@ out: * Also add the VISSHADOW bit here to indicate we're done operating * on this vnode. */ - (void)vnode_relenamedstream(vp, svp, context); + (void)vnode_relenamedstream(vp, svp); vnode_lock (svp); svp->v_flag |= VISSHADOW; wakeup((caddr_t)&svp->v_parent); @@ -978,6 +1039,8 @@ default_makenamedstream(vnode_t vp, vnode_t *svpp, const char *name, vfs_context *svpp = NULLVP; return (ENOATTR); } + + /* Supply the context to getshadowfile so it can manipulate the AD file */ error = getshadowfile(vp, svpp, 1, NULL, &creator, context); /* @@ -1014,8 +1077,7 @@ default_removenamedstream(vnode_t vp, const char *name, vfs_context_t context) } static int -get_shadow_dir(vnode_t *sdvpp, vfs_context_t context) -{ +get_shadow_dir(vnode_t *sdvpp) { vnode_t dvp = NULLVP; vnode_t sdvp = NULLVP; struct componentname cn; @@ -1023,7 +1085,7 @@ get_shadow_dir(vnode_t *sdvpp, vfs_context_t context) char tmpname[80]; uint32_t tmp_fsid; int error; - + vfs_context_t kernelctx = vfs_context_kernel(); bzero(tmpname, sizeof(tmpname)); MAKE_SHADOW_DIRNAME(rootvnode, tmpname); @@ -1031,8 +1093,11 @@ get_shadow_dir(vnode_t *sdvpp, vfs_context_t context) * Look up the shadow directory to ensure that it still exists. * By looking it up, we get an iocounted dvp to use, and avoid some coherency issues * in caching it when multiple threads may be trying to manipulate the pointers. + * + * Make sure to use the kernel context. We want a singular view of + * the shadow dir regardless of chrooted processes. */ - error = vnode_lookup(tmpname, 0, &sdvp, context); + error = vnode_lookup(tmpname, 0, &sdvp, kernelctx); if (error == 0) { /* * If we get here, then we have successfully looked up the shadow dir, @@ -1046,10 +1111,12 @@ get_shadow_dir(vnode_t *sdvpp, vfs_context_t context) bzero (tmpname, sizeof(tmpname)); /* - * Obtain the vnode for "/var/run" directory. + * Obtain the vnode for "/var/run" directory using the kernel + * context. + * * This is defined in the SHADOW_DIR_CONTAINER macro */ - if (vnode_lookup(SHADOW_DIR_CONTAINER, 0, &dvp, context) != 0) { + if (vnode_lookup(SHADOW_DIR_CONTAINER, 0, &dvp, kernelctx) != 0) { error = ENOTSUP; goto out; } @@ -1063,7 +1130,7 @@ get_shadow_dir(vnode_t *sdvpp, vfs_context_t context) bzero(&cn, sizeof(cn)); cn.cn_nameiop = LOOKUP; cn.cn_flags = ISLASTCN; - cn.cn_context = context; + cn.cn_context = kernelctx; cn.cn_pnbuf = tmpname; cn.cn_pnlen = sizeof(tmpname); cn.cn_nameptr = cn.cn_pnbuf; @@ -1080,23 +1147,23 @@ get_shadow_dir(vnode_t *sdvpp, vfs_context_t context) VATTR_SET(&va, va_flags, UF_HIDDEN); va.va_vaflags = VA_EXCLUSIVE; - error = VNOP_MKDIR(dvp, &sdvp, &cn, &va, context); + error = VNOP_MKDIR(dvp, &sdvp, &cn, &va, kernelctx); /* * There can be only one winner for an exclusive create. */ if (error == EEXIST) { /* loser has to look up directory */ - error = VNOP_LOOKUP(dvp, &sdvp, &cn, context); + error = VNOP_LOOKUP(dvp, &sdvp, &cn, kernelctx); if (error == 0) { /* Make sure its in fact a directory */ if (sdvp->v_type != VDIR) { goto baddir; } - /* Obtain the fsid for /tmp directory */ + /* Obtain the fsid for /var/run directory */ VATTR_INIT(&va); VATTR_WANTED(&va, va_fsid); - if (VNOP_GETATTR(dvp, &va, context) != 0 || + if (VNOP_GETATTR(dvp, &va, kernelctx) != 0 || !VATTR_IS_SUPPORTED(&va, va_fsid)) { goto baddir; } @@ -1113,7 +1180,7 @@ get_shadow_dir(vnode_t *sdvpp, vfs_context_t context) va.va_dirlinkcount = 1; va.va_acl = (kauth_acl_t) KAUTH_FILESEC_NONE; - if (VNOP_GETATTR(sdvp, &va, context) != 0 || + if (VNOP_GETATTR(sdvp, &va, kernelctx) != 0 || !VATTR_IS_SUPPORTED(&va, va_uid) || !VATTR_IS_SUPPORTED(&va, va_gid) || !VATTR_IS_SUPPORTED(&va, va_mode) || @@ -1124,7 +1191,7 @@ get_shadow_dir(vnode_t *sdvpp, vfs_context_t context) * Make sure its what we want: * - owned by root * - not writable by anyone - * - on same file system as /tmp + * - on same file system as /var/run * - not a hard-linked directory * - no ACLs (they might grant write access) */ @@ -1157,10 +1224,10 @@ baddir: error = ENOTDIR; goto out; } -#endif - +#endif /* NAMEDSTREAMS */ +#if CONFIG_APPLEDOUBLE /* * Default Implementation (Non-native EA) */ @@ -1517,7 +1584,7 @@ static int check_and_swap_apple_double_header(attr_info_t *ainfop) /* * Retrieve the data of an extended attribute. */ -int +static int default_getxattr(vnode_t vp, const char *name, uio_t uio, size_t *size, __unused int options, vfs_context_t context) { @@ -1639,7 +1706,7 @@ out: /* * Set the data of an extended attribute. */ -int +static int default_setxattr(vnode_t vp, const char *name, uio_t uio, int options, vfs_context_t context) { vnode_t xvp = NULL; @@ -1766,19 +1833,34 @@ start: error = EPERM; goto out; } - if (ainfo.rsrcfork && ainfo.rsrcfork->length) { - /* attr exists and "create" was specified? */ - if (options & XATTR_CREATE) { - error = EEXIST; - goto out; + /* Make sure we have a rsrc fork pointer.. */ + if (ainfo.rsrcfork == NULL) { + error = ENOATTR; + goto out; + } + if (ainfo.rsrcfork) { + if (ainfo.rsrcfork->length != 0) { + if (options & XATTR_CREATE) { + /* attr exists, and create specified ? */ + error = EEXIST; + goto out; + } } - } else { - /* attr doesn't exists and "replace" was specified? */ - if (options & XATTR_REPLACE) { - error = ENOATTR; - goto out; + else { + /* Zero length AD rsrc fork */ + if (options & XATTR_REPLACE) { + /* attr doesn't exist (0-length), but replace specified ? */ + error = ENOATTR; + goto out; + } } } + else { + /* We can't do much if we somehow didn't get an AD rsrc pointer */ + error = ENOATTR; + goto out; + } + endoffset = uio_resid(uio) + uio_offset(uio); /* new size */ uio_setoffset(uio, uio_offset(uio) + ainfo.rsrcfork->offset); error = VNOP_WRITE(xvp, uio, 0, context); @@ -2008,7 +2090,7 @@ out: /* * Remove an extended attribute. */ -int +static int default_removexattr(vnode_t vp, const char *name, __unused int options, vfs_context_t context) { vnode_t xvp = NULL; @@ -2326,70 +2408,6 @@ out: return (error); } -/* - * Check the header of a ._ file to verify that it is in fact an Apple Double - * file. Returns 0 if the header is valid, non-zero if invalid. - */ -int check_appledouble_header(vnode_t vp, vfs_context_t ctx) -{ - int error = 0; - attr_info_t ainfo; - struct vnode_attr va; - uio_t auio = NULL; - void *buffer = NULL; - int iosize; - - ainfo.filevp = vp; - ainfo.context = ctx; - VATTR_INIT(&va); - VATTR_WANTED(&va, va_data_size); - if ((error = vnode_getattr(vp, &va, ctx))) { - goto out; - } - ainfo.filesize = va.va_data_size; - - iosize = MIN(ATTR_MAX_HDR_SIZE, ainfo.filesize); - if (iosize == 0) { - error = ENOATTR; - goto out; - } - ainfo.iosize = iosize; - - MALLOC(buffer, void *, iosize, M_TEMP, M_WAITOK); - if (buffer == NULL) { - error = ENOMEM; - goto out; - } - - auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ); - uio_addiov(auio, (uintptr_t)buffer, iosize); - - /* Read the header */ - error = VNOP_READ(vp, auio, 0, ctx); - if (error) { - goto out; - } - ainfo.rawsize = iosize - uio_resid(auio); - ainfo.rawdata = (u_int8_t *)buffer; - - error = check_and_swap_apple_double_header(&ainfo); - if (error) { - goto out; - } - - /* If we made it here, then the header is ok */ - -out: - if (auio) { - uio_free(auio); - } - if (buffer) { - FREE(buffer, M_TEMP); - } - - return error; -} - static int open_xattrfile(vnode_t vp, int fileflags, vnode_t *xvpp, vfs_context_t context) { @@ -2584,20 +2602,17 @@ lookup: error = ENOATTR; } out: - if (dvp && (dvp != vp)) { - vnode_put(dvp); - } - if (basename) { - vnode_putname(basename); - } - if (filename && filename != &smallname[0]) { - FREE(filename, M_TEMP); - } if (error) { if (xvp != NULLVP) { if (opened) { (void) VNOP_CLOSE(xvp, fileflags, context); } + + if (fileflags & O_CREAT) { + /* Delete the xattr file if we encountered any errors */ + (void) remove_xattrfile (xvp, context); + } + if (referenced) { (void) vnode_rele(xvp); } @@ -2608,6 +2623,17 @@ out: error = EPERM; } } + /* Release resources after error-handling */ + if (dvp && (dvp != vp)) { + vnode_put(dvp); + } + if (basename) { + vnode_putname(basename); + } + if (filename && filename != &smallname[0]) { + FREE(filename, M_TEMP); + } + *xvpp = xvp; /* return a referenced vnode */ return (error); } @@ -2964,7 +2990,12 @@ create_xattrfile(vnode_t xvp, u_int32_t fileid, vfs_context_t context) init_empty_resource_fork(rsrcforkhdr); /* Push it out. */ - error = VNOP_WRITE(xvp, auio, 0, context); + error = VNOP_WRITE(xvp, auio, IO_UNIT, context); + + /* Did we write out the full uio? */ + if (uio_resid(auio) > 0) { + error = ENOSPC; + } uio_free(auio); FREE(buffer, M_TEMP); @@ -3285,11 +3316,11 @@ lock_xattrfile(vnode_t xvp, short locktype, vfs_context_t context) lf.l_len = 0; lf.l_type = locktype; /* F_WRLCK or F_RDLCK */ /* Note: id is just a kernel address that's not a proc */ - error = VNOP_ADVLOCK(xvp, (caddr_t)xvp, F_SETLK, &lf, F_FLOCK|F_WAIT, context); + error = VNOP_ADVLOCK(xvp, (caddr_t)xvp, F_SETLK, &lf, F_FLOCK|F_WAIT, context, NULL); return (error == ENOTSUP ? 0 : error); } -static int + int unlock_xattrfile(vnode_t xvp, vfs_context_t context) { struct flock lf; @@ -3300,7 +3331,55 @@ unlock_xattrfile(vnode_t xvp, vfs_context_t context) lf.l_len = 0; lf.l_type = F_UNLCK; /* Note: id is just a kernel address that's not a proc */ - error = VNOP_ADVLOCK(xvp, (caddr_t)xvp, F_UNLCK, &lf, F_FLOCK, context); + error = VNOP_ADVLOCK(xvp, (caddr_t)xvp, F_UNLCK, &lf, F_FLOCK, context, NULL); return (error == ENOTSUP ? 0 : error); } +#else /* CONFIG_APPLEDOUBLE */ + +#undef panic +#define panic printf + +static int +default_getxattr(vnode_t vp, const char *name, + __unused uio_t uio, __unused size_t *size, __unused int options, + __unused vfs_context_t context) +{ +#if PANIC_ON_NOAPPLEDOUBLE + panic("%s: no AppleDouble support, vp %p name %s", __func__, vp, name); +#endif + return (ENOTSUP); +} + +static int +default_setxattr(vnode_t vp, const char *name, + __unused uio_t uio, __unused int options, __unused vfs_context_t context) +{ +#if PANIC_ON_NOAPPLEDOUBLE + panic("%s: no AppleDouble support, vp %p name %s", __func__, vp, name); +#endif + return (ENOTSUP); +} + +static int +default_listxattr(vnode_t vp, + __unused uio_t uio, __unused size_t *size, __unused int options, + __unused vfs_context_t context) +{ +#if PANIC_ON_NOAPPLEDOUBLE + panic("%s: no AppleDouble support, vp %p name %s", __func__, vp, "."); +#endif + return (ENOTSUP); +} + +static int +default_removexattr(vnode_t vp, const char *name, + __unused int options, __unused vfs_context_t context) +{ +#if PANIC_ON_NOAPPLEDOUBLE + panic("%s: no AppleDouble support, vp %p name %s", __func__, vp, name); +#endif + return (ENOTSUP); +} + +#endif /* CONFIG_APPLEDOUBLE */ diff --git a/bsd/vfs/vnode_if.sh b/bsd/vfs/vnode_if.sh old mode 100644 new mode 100755 diff --git a/bsd/vm/Makefile b/bsd/vm/Makefile index 608304077..a116bf789 100644 --- a/bsd/vm/Makefile +++ b/bsd/vm/Makefile @@ -7,23 +7,17 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) - -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - DATAFILES = \ vnode_pager.h +INSTALL_MI_LIST = ${DATAFILES} + +INSTALL_MI_DIR = vm + EXPORT_MI_LIST = ${DATAFILES} EXPORT_MI_DIR = vm - include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/bsd/vm/dp_backing_file.c b/bsd/vm/dp_backing_file.c index 9df1b810e..cc52be8fb 100644 --- a/bsd/vm/dp_backing_file.c +++ b/bsd/vm/dp_backing_file.c @@ -77,6 +77,8 @@ #include #endif +void kprintf(const char *fmt, ...); + /* * temporary support for delayed instantiation * of default_pager @@ -159,6 +161,7 @@ backing_store_suspend_return: } extern boolean_t backing_store_stop_compaction; +extern boolean_t compressor_store_stop_compaction; /* * Routine: macx_backing_store_compaction @@ -182,9 +185,15 @@ macx_backing_store_compaction(int flags) if (flags & SWAP_COMPACT_DISABLE) { backing_store_stop_compaction = TRUE; + compressor_store_stop_compaction = TRUE; + + kprintf("backing_store_stop_compaction = TRUE\n"); } else if (flags & SWAP_COMPACT_ENABLE) { backing_store_stop_compaction = FALSE; + compressor_store_stop_compaction = FALSE; + + kprintf("backing_store_stop_compaction = FALSE\n"); } return 0; @@ -211,6 +220,33 @@ macx_triggers( extern boolean_t dp_isssd; +extern void vm_swap_init(void); +extern int vm_compressor_mode; + +/* + * In the compressed pager world, the swapfiles are created by the kernel. + * Well, all except the first one. That swapfile is absorbed by the kernel at + * the end of the macx_swapon function (if swap is enabled). That's why + * we allow the first invocation of macx_swapon to succeed. + * + * If the compressor pool is running low, the kernel messages the dynamic pager + * on the port it has registered with the kernel. That port can transport 1 of 2 + * pieces of information to dynamic pager: create a swapfile or delete a swapfile. + * + * We choose to transmit the former. So, that message tells dynamic pager + * to create a swapfile and activate it by calling macx_swapon. + * + * We deny this new macx_swapon request. That leads dynamic pager to interpret the + * failure as a serious error and notify all it's clients that swap is running low. + * That's how we get the loginwindow "Resume / Force Quit Applications" dialog to appear. + * + * NOTE: + * If the kernel has already created multiple swapfiles by the time the compressor + * pool is running low (and it has to play this trick), dynamic pager won't be able to + * create a file in user-space and, that too will lead to a similar notification blast + * to all of it's clients. So, that behaves as desired too. + */ +boolean_t macx_swapon_allowed = TRUE; /* * Routine: macx_swapon @@ -235,6 +271,19 @@ macx_swapon( struct proc *p = current_proc(); int dp_cluster_size; + if (COMPRESSED_PAGER_IS_ACTIVE) { + if (macx_swapon_allowed == FALSE) { + return EINVAL; + } else { + if ((vm_compressor_mode == VM_PAGER_COMPRESSOR_WITH_SWAP) || + (vm_compressor_mode == VM_PAGER_FREEZER_COMPRESSOR_WITH_SWAP)) { + vm_swap_init(); + } + + macx_swapon_allowed = FALSE; + return 0; + } + } AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPON); AUDIT_ARG(value32, args->priority); @@ -319,9 +368,6 @@ macx_swapon( goto swapon_bailout; } -#if CONFIG_EMBEDDED - dp_cluster_size = 1 * PAGE_SIZE; -#else if ((dp_isssd = vnode_pager_isSSD(vp)) == TRUE) { /* * keep the cluster size small since the @@ -335,7 +381,6 @@ macx_swapon( */ dp_cluster_size = 0; } -#endif kr = default_pager_backing_store_create(default_pager, -1, /* default priority */ dp_cluster_size, @@ -423,7 +468,6 @@ macx_swapoff( int error; boolean_t funnel_state; vfs_context_t ctx = vfs_context_current(); - struct uthread *ut; int orig_iopol_disk; AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPOFF); @@ -470,14 +514,16 @@ macx_swapoff( } backing_store = (mach_port_t)bs_port_table[i].bs; - ut = get_bsdthread_info(current_thread()); + orig_iopol_disk = proc_get_task_policy(current_task(), current_thread(), + TASK_POLICY_INTERNAL, TASK_POLICY_IOPOL); - orig_iopol_disk = proc_get_thread_selfdiskacc(); - proc_apply_thread_selfdiskacc(IOPOL_THROTTLE); + proc_set_task_policy(current_task(), current_thread(), TASK_POLICY_INTERNAL, + TASK_POLICY_IOPOL, IOPOL_THROTTLE); kr = default_pager_backing_store_delete(backing_store); - proc_apply_thread_selfdiskacc(orig_iopol_disk); + proc_set_task_policy(current_task(), current_thread(), TASK_POLICY_INTERNAL, + TASK_POLICY_IOPOL, orig_iopol_disk); switch (kr) { case KERN_SUCCESS: @@ -521,6 +567,11 @@ swapoff_bailout: * Function: * Syscall interface to get general swap statistics */ +extern uint64_t vm_swap_get_total_space(void); +extern uint64_t vm_swap_get_used_space(void); +extern uint64_t vm_swap_get_free_space(void); +extern boolean_t vm_swap_up; + int macx_swapinfo( memory_object_size_t *total_p, @@ -534,53 +585,71 @@ macx_swapinfo( kern_return_t kr; error = 0; + if (COMPRESSED_PAGER_IS_ACTIVE) { + + if (vm_swap_up == TRUE) { + + *total_p = vm_swap_get_total_space(); + *avail_p = vm_swap_get_free_space(); + *pagesize_p = PAGE_SIZE_64; + *encrypted_p = TRUE; + + } else { + + *total_p = 0; + *avail_p = 0; + *pagesize_p = 0; + *encrypted_p = FALSE; + } + } else { - /* - * Get a handle on the default pager. - */ - default_pager = MEMORY_OBJECT_DEFAULT_NULL; - kr = host_default_memory_manager(host_priv_self(), &default_pager, 0); - if (kr != KERN_SUCCESS) { - error = EAGAIN; /* XXX why EAGAIN ? */ - goto done; - } - if (default_pager == MEMORY_OBJECT_DEFAULT_NULL) { /* - * The default pager has not initialized yet, - * so it can't be using any swap space at all. + * Get a handle on the default pager. */ - *total_p = 0; - *avail_p = 0; - *pagesize_p = 0; - *encrypted_p = FALSE; - goto done; - } - - /* - * Get swap usage data from default pager. - */ - kr = default_pager_info_64(default_pager, &dpi64); - if (kr != KERN_SUCCESS) { - error = ENOTSUP; - goto done; - } + default_pager = MEMORY_OBJECT_DEFAULT_NULL; + kr = host_default_memory_manager(host_priv_self(), &default_pager, 0); + if (kr != KERN_SUCCESS) { + error = EAGAIN; /* XXX why EAGAIN ? */ + goto done; + } + if (default_pager == MEMORY_OBJECT_DEFAULT_NULL) { + /* + * The default pager has not initialized yet, + * so it can't be using any swap space at all. + */ + *total_p = 0; + *avail_p = 0; + *pagesize_p = 0; + *encrypted_p = FALSE; + goto done; + } + + /* + * Get swap usage data from default pager. + */ + kr = default_pager_info_64(default_pager, &dpi64); + if (kr != KERN_SUCCESS) { + error = ENOTSUP; + goto done; + } - /* - * Provide default pager info to caller. - */ - *total_p = dpi64.dpi_total_space; - *avail_p = dpi64.dpi_free_space; - *pagesize_p = dpi64.dpi_page_size; - if (dpi64.dpi_flags & DPI_ENCRYPTED) { - *encrypted_p = TRUE; - } else { - *encrypted_p = FALSE; - } + /* + * Provide default pager info to caller. + */ + *total_p = dpi64.dpi_total_space; + *avail_p = dpi64.dpi_free_space; + *pagesize_p = dpi64.dpi_page_size; + if (dpi64.dpi_flags & DPI_ENCRYPTED) { + *encrypted_p = TRUE; + } else { + *encrypted_p = FALSE; + } done: - if (default_pager != MEMORY_OBJECT_DEFAULT_NULL) { - /* release our handle on default pager */ - memory_object_default_deallocate(default_pager); + if (default_pager != MEMORY_OBJECT_DEFAULT_NULL) { + /* release our handle on default pager */ + memory_object_default_deallocate(default_pager); + } } return error; } diff --git a/bsd/vm/vm_compressor_backing_file.c b/bsd/vm/vm_compressor_backing_file.c new file mode 100644 index 000000000..0c8453eea --- /dev/null +++ b/bsd/vm/vm_compressor_backing_file.c @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void vm_swapfile_open(const char *path, vnode_t *vp); +void vm_swapfile_close(uint64_t path, vnode_t vp); +int vm_swapfile_preallocate(vnode_t vp, uint64_t *size); +uint64_t vm_swapfile_get_blksize(vnode_t vp); +uint64_t vm_swapfile_get_transfer_size(vnode_t vp); +int vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags); + +void +vm_swapfile_open(const char *path, vnode_t *vp) +{ + int error = 0; + vfs_context_t ctx = vfs_context_current(); + + if ((error = vnode_open(path, (O_CREAT | FREAD | FWRITE), S_IRUSR | S_IWUSR, 0, vp, ctx))) { + printf("Failed to open swap file %d\n", error); + *vp = NULL; + return; + } + + vnode_put(*vp); +} + +uint64_t +vm_swapfile_get_blksize(vnode_t vp) +{ + return ((uint64_t)vfs_devblocksize(vnode_mount(vp))); +} + +uint64_t +vm_swapfile_get_transfer_size(vnode_t vp) +{ + return((uint64_t)vp->v_mount->mnt_vfsstat.f_iosize); +} + +int unlink1(vfs_context_t, struct nameidata *, int); + +void +vm_swapfile_close(uint64_t path_addr, vnode_t vp) +{ + struct nameidata nd; + vfs_context_t context = vfs_context_current(); + int error = 0; + + vnode_getwithref(vp); + vnode_close(vp, 0, context); + + NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_SYSSPACE, + path_addr, context); + + error = unlink1(context, &nd, 0); +} + +int +vm_swapfile_preallocate(vnode_t vp, uint64_t *size) +{ + int error = 0; + uint64_t file_size = 0; + vfs_context_t ctx = NULL; + + + ctx = vfs_context_current(); + +#if CONFIG_PROTECT + { +#if 0 // + + if ((error = cp_vnode_setclass(vp, PROTECTION_CLASS_F))) { + if(config_protect_bug) { + printf("swap protection class set failed with %d\n", error); + } else { + panic("swap protection class set failed with %d\n", error); + } + } +#endif + /* initialize content protection keys manually */ + if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) { + printf("Content Protection key failure on swap: %d\n", error); + vnode_put(vp); + vp = NULL; + goto done; + } + } +#endif + + /* + * This check exists because dynamic_pager creates the 1st swapfile, + * swapfile0, for us from user-space in a supported manner (with IO_NOZEROFILL etc). + * + * If dynamic_pager, in the future, discontinues creating that file, + * then we need to change this check to a panic / assert or return an error. + * That's because we can't be sure if the file has been created correctly. + */ + + if ((error = vnode_size(vp, (off_t*) &file_size, ctx)) != 0) { + + printf("vnode_size (existing files) for swap files failed: %d\n", error); + goto done; + } else { + + if (file_size == 0) { + + error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx); + + if (error) { + printf("vnode_setsize for swap files failed: %d\n", error); + goto done; + } + } else { + + *size = file_size; + } + } + + vnode_lock_spin(vp); + SET(vp->v_flag, VSWAP); + vnode_unlock(vp); +done: + return error; +} + +int +vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags) +{ + int error = 0; + uint64_t io_size = npages * PAGE_SIZE_64; +#if 1 + kern_return_t kr = KERN_SUCCESS; + upl_t upl = NULL; + unsigned int count = 0; + int upl_create_flags = 0, upl_control_flags = 0; + upl_size_t upl_size = 0; + + upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE; + upl_control_flags = UPL_IOSYNC; + + if ((flags & SWAP_READ) == FALSE) { + upl_create_flags |= UPL_COPYOUT_FROM; + } + + upl_size = io_size; + kr = vm_map_create_upl( kernel_map, + start, + &upl_size, + &upl, + NULL, + &count, + &upl_create_flags); + + if (kr != KERN_SUCCESS || (upl_size != io_size)) { + panic("vm_map_create_upl failed with %d\n", kr); + } + + if (flags & SWAP_READ) { + vnode_pagein(vp, + upl, + 0, + offset, + io_size, + upl_control_flags | UPL_IGNORE_VALID_PAGE_CHECK, + &error); + if (error) { +#if DEBUG + printf("vm_swapfile_io: vnode_pagein failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size); +#else /* DEBUG */ + printf("vm_swapfile_io: vnode_pagein failed with %d.\n", error); +#endif /* DEBUG */ + } + + } else { + vnode_pageout(vp, + upl, + 0, + offset, + io_size, + upl_control_flags, + &error); + if (error) { +#if DEBUG + printf("vm_swapfile_io: vnode_pageout failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size); +#else /* DEBUG */ + printf("vm_swapfile_io: vnode_pageout failed with %d.\n", error); +#endif /* DEBUG */ + } + } + return error; + +#else /* 1 */ + vfs_context_t ctx; + ctx = vfs_context_kernel(); + + error = vn_rdwr((flags & SWAP_READ) ? UIO_READ : UIO_WRITE, vp, (caddr_t)start, io_size, offset, + UIO_SYSSPACE, IO_SYNC | IO_NODELOCKED | IO_UNIT | IO_NOCACHE | IO_SWAP_DISPATCH, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); + + if (error) { + printf("vn_rdwr: Swap I/O failed with %d\n", error); + } + return error; +#endif /* 1 */ +} + + +#define MAX_BATCH_TO_TRIM 256 + +u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl) +{ + int error = 0; + int trim_index = 0; + u_int32_t blocksize = 0; + struct vnode *devvp; + dk_extent_t *extents; + dk_unmap_t unmap; + + if ( !(vp->v_mount->mnt_ioflags & MNT_IOFLAGS_UNMAP_SUPPORTED)) + return (ENOTSUP); + + if (tl == NULL) + return (0); + + /* + * Get the underlying device vnode and physical block size + */ + devvp = vp->v_mount->mnt_devvp; + blocksize = vp->v_mount->mnt_devblocksize; + + extents = kalloc(sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM); + + memset (&unmap, 0, sizeof(dk_unmap_t)); + unmap.extents = extents; + + while (tl) { + daddr64_t io_blockno; /* Block number corresponding to the start of the extent */ + size_t io_bytecount; /* Number of bytes in current extent for the specified range */ + size_t trimmed; + size_t remaining_length; + off_t current_offset; + + current_offset = tl->tl_offset; + remaining_length = tl->tl_length; + trimmed = 0; + + /* + * We may not get the entire range from tl_offset -> tl_offset+tl_length in a single + * extent from the blockmap call. Keep looping/going until we are sure we've hit + * the whole range or if we encounter an error. + */ + while (trimmed < tl->tl_length) { + /* + * VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the + * specified offset. It returns blocks in contiguous chunks, so if the logical range is + * broken into multiple extents, it must be called multiple times, increasing the offset + * in each call to ensure that the entire range is covered. + */ + error = VNOP_BLOCKMAP (vp, current_offset, remaining_length, + &io_blockno, &io_bytecount, NULL, VNODE_READ, NULL); + + if (error) { + goto trim_exit; + } + + extents[trim_index].offset = (uint64_t) io_blockno * (u_int64_t) blocksize; + extents[trim_index].length = io_bytecount; + + trim_index++; + + if (trim_index == MAX_BATCH_TO_TRIM) { + + unmap.extentsCount = trim_index; + error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); + + if (error) { + goto trim_exit; + } + trim_index = 0; + } + trimmed += io_bytecount; + current_offset += io_bytecount; + remaining_length -= io_bytecount; + } + tl = tl->tl_next; + } + if (trim_index) { + + unmap.extentsCount = trim_index; + error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); + } +trim_exit: + kfree(extents, sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM); + + return error; +} diff --git a/bsd/vm/vm_unix.c b/bsd/vm/vm_unix.c index 0dd7823ce..52f21a6ce 100644 --- a/bsd/vm/vm_unix.c +++ b/bsd/vm/vm_unix.c @@ -99,7 +99,6 @@ int _shared_region_map( struct proc*, int, unsigned int, struct shared_file_mapping_np*, memory_object_control_t*, struct shared_file_mapping_np*); -int _shared_region_slide(uint32_t, mach_vm_offset_t, mach_vm_size_t, mach_vm_offset_t, mach_vm_size_t, memory_object_control_t); int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *); SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, ""); @@ -178,9 +177,15 @@ useracc( user_size_t len, int prot) { + vm_map_t map; + + map = current_map(); return (vm_map_check_protection( - current_map(), - vm_map_trunc_page(addr), vm_map_round_page(addr+len), + map, + vm_map_trunc_page(addr, + vm_map_page_mask(map)), + vm_map_round_page(addr+len, + vm_map_page_mask(map)), prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE)); } @@ -189,10 +194,17 @@ vslock( user_addr_t addr, user_size_t len) { - kern_return_t kret; - kret = vm_map_wire(current_map(), vm_map_trunc_page(addr), - vm_map_round_page(addr+len), - VM_PROT_READ | VM_PROT_WRITE ,FALSE); + kern_return_t kret; + vm_map_t map; + + map = current_map(); + kret = vm_map_wire(map, + vm_map_trunc_page(addr, + vm_map_page_mask(map)), + vm_map_round_page(addr+len, + vm_map_page_mask(map)), + VM_PROT_READ | VM_PROT_WRITE, + FALSE); switch (kret) { case KERN_SUCCESS: @@ -219,14 +231,17 @@ vsunlock( vm_map_offset_t vaddr; ppnum_t paddr; #endif /* FIXME ] */ - kern_return_t kret; + kern_return_t kret; + vm_map_t map; + + map = current_map(); #if FIXME /* [ */ if (dirtied) { pmap = get_task_pmap(current_task()); - for (vaddr = vm_map_trunc_page(addr); - vaddr < vm_map_round_page(addr+len); - vaddr += PAGE_SIZE) { + for (vaddr = vm_map_trunc_page(addr, PAGE_MASK); + vaddr < vm_map_round_page(addr+len, PAGE_MASK); + vaddr += PAGE_SIZE) { paddr = pmap_extract(pmap, vaddr); pg = PHYS_TO_VM_PAGE(paddr); vm_page_set_modified(pg); @@ -236,8 +251,12 @@ vsunlock( #ifdef lint dirtied++; #endif /* lint */ - kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr), - vm_map_round_page(addr+len), FALSE); + kret = vm_map_unwire(map, + vm_map_trunc_page(addr, + vm_map_page_mask(map)), + vm_map_round_page(addr+len, + vm_map_page_mask(map)), + FALSE); switch (kret) { case KERN_SUCCESS: return (0); @@ -759,7 +778,6 @@ pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret) } target = targetproc->task; -#ifndef CONFIG_EMBEDDED if (target != TASK_NULL) { mach_port_t tfpport; @@ -786,7 +804,6 @@ pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret) } } } -#endif task_reference(target); error = task_pidsuspend(target); @@ -799,8 +816,8 @@ pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret) } #if CONFIG_MEMORYSTATUS else { - memorystatus_on_suspend(pid); - } + memorystatus_on_suspend(targetproc); + } #endif task_deallocate(target); @@ -845,7 +862,6 @@ pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret) } target = targetproc->task; -#ifndef CONFIG_EMBEDDED if (target != TASK_NULL) { mach_port_t tfpport; @@ -872,12 +888,11 @@ pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret) } } } -#endif task_reference(target); #if CONFIG_MEMORYSTATUS - memorystatus_on_resume(pid); + memorystatus_on_resume(targetproc); #endif error = task_pidresume(target); @@ -885,7 +900,11 @@ pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret) if (error == KERN_INVALID_ARGUMENT) { error = EINVAL; } else { - error = EPERM; + if (error == KERN_MEMORY_ERROR) { + psignal(targetproc, SIGKILL); + error = EIO; + } else + error = EPERM; } } @@ -899,127 +918,6 @@ out: return error; } -#if CONFIG_EMBEDDED -kern_return_t -pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret) -{ - int error = 0; - proc_t targetproc = PROC_NULL; - int pid = args->pid; - -#ifndef CONFIG_FREEZE - #pragma unused(pid) -#else - -#if CONFIG_MACF - error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_HIBERNATE); - if (error) { - error = EPERM; - goto out; - } -#endif - - /* - * The only accepted pid value here is currently -1, since we just kick off the freeze thread - * here - individual ids aren't required. However, it's intended that that this call is to change - * in the future to initiate freeze of individual processes. In anticipation, we'll obtain the - * process handle for potentially valid values and call task_for_pid_posix_check(); this way, everything - * is validated correctly and set for further refactoring. See for more details. - */ - if (pid >= 0) { - targetproc = proc_find(pid); - if (targetproc == PROC_NULL) { - error = ESRCH; - goto out; - } - - if (!task_for_pid_posix_check(targetproc)) { - error = EPERM; - goto out; - } - } - - if (pid == -1) { - memorystatus_on_inactivity(pid); - } else { - error = EPERM; - } - -out: - -#endif /* CONFIG_FREEZE */ - - if (targetproc != PROC_NULL) - proc_rele(targetproc); - *ret = error; - return error; -} - -int -pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret) -{ - int error = 0; - proc_t targetproc = PROC_NULL; - struct filedesc *fdp; - struct fileproc *fp; - int pid = args->pid; - int level = args->level; - int i; - - if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC && - level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL) - { - error = EINVAL; - goto out; - } - -#if CONFIG_MACF - error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SHUTDOWN_SOCKETS); - if (error) { - error = EPERM; - goto out; - } -#endif - - targetproc = proc_find(pid); - if (targetproc == PROC_NULL) { - error = ESRCH; - goto out; - } - - if (!task_for_pid_posix_check(targetproc)) { - error = EPERM; - goto out; - } - - proc_fdlock(targetproc); - fdp = targetproc->p_fd; - - for (i = 0; i < fdp->fd_nfiles; i++) { - struct socket *sockp; - - fp = fdp->fd_ofiles[i]; - if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 || - fp->f_fglob->fg_type != DTYPE_SOCKET) - { - continue; - } - - sockp = (struct socket *)fp->f_fglob->fg_data; - - /* Call networking stack with socket and level */ - (void) socket_defunct(targetproc, sockp, level); - } - - proc_fdunlock(targetproc); - -out: - if (targetproc != PROC_NULL) - proc_rele(targetproc); - *ret = error; - return error; -} -#endif /* CONFIG_EMBEDDED */ static int sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1, @@ -1032,7 +930,7 @@ sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1, if (error || req->newptr == USER_ADDR_NULL) return(error); - if (!is_suser()) + if (!kauth_cred_issuser(kauth_cred_get())) return(EPERM); if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) { @@ -1197,7 +1095,9 @@ _shared_region_map( struct vnode_attr va; off_t fs; memory_object_size_t file_size; +#if CONFIG_MACF vm_prot_t maxprot = VM_PROT_ALL; +#endif memory_object_control_t file_control; struct vm_shared_region *shared_region; @@ -1220,12 +1120,12 @@ _shared_region_map( } /* make sure we're attempting to map a vnode */ - if (fp->f_fglob->fg_type != DTYPE_VNODE) { + if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map: " "fd=%d not a vnode (type=%d)\n", current_thread(), p->p_pid, p->p_comm, - fd, fp->f_fglob->fg_type)); + fd, FILEGLOB_DTYPE(fp->f_fglob))); error = EINVAL; goto done; } @@ -1445,49 +1345,6 @@ done: return error; } -int -_shared_region_slide(uint32_t slide, - mach_vm_offset_t entry_start_address, - mach_vm_size_t entry_size, - mach_vm_offset_t slide_start, - mach_vm_size_t slide_size, - memory_object_control_t sr_file_control) -{ - void *slide_info_entry = NULL; - int error; - - if((error = vm_shared_region_slide_init(slide_size, entry_start_address, entry_size, slide, sr_file_control))) { - printf("slide_info initialization failed with kr=%d\n", error); - goto done; - } - - slide_info_entry = vm_shared_region_get_slide_info_entry(); - if (slide_info_entry == NULL){ - error = EFAULT; - } else { - error = copyin((user_addr_t)slide_start, - slide_info_entry, - (vm_size_t)slide_size); - } - if (error) { - goto done; - } - - if (vm_shared_region_slide_sanity_check() != KERN_SUCCESS) { - error = EFAULT; - printf("Sanity Check failed for slide_info\n"); - } else { -#if DEBUG - printf("Succesfully init slide_info with start_address: %p region_size: %ld slide_header_size: %ld\n", - (void*)(uintptr_t)entry_start_address, - (unsigned long)entry_size, - (unsigned long)slide_size); -#endif - } -done: - return error; -} - int shared_region_map_and_slide_np( struct proc *p, @@ -1553,7 +1410,7 @@ shared_region_map_and_slide_np( } if (slide) { - kr = _shared_region_slide(slide, + kr = vm_shared_region_slide(slide, mapping_to_slide.sfm_file_offset, mapping_to_slide.sfm_size, uap->slide_start, @@ -1602,6 +1459,10 @@ extern unsigned int vm_page_purgeable_wired_count; SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_purgeable_wired_count, 0, "Wired purgeable page count"); +extern int madvise_free_debug; +SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED, + &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)"); + SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_stats_reusable.reusable_count, 0, "Reusable page count"); SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED, @@ -1626,6 +1487,8 @@ SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_stats_reusable.can_reuse_success, ""); SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_stats_reusable.can_reuse_failure, ""); +SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED, + &vm_page_stats_reusable.reusable_reclaimed, ""); extern unsigned int vm_page_free_count, vm_page_speculative_count; @@ -1683,7 +1546,7 @@ void vm_pageout_io_throttle(void) { */ if (uthread->uu_lowpri_window) { - throttle_lowpri_io(TRUE); + throttle_lowpri_io(1); } } diff --git a/bsd/vm/vnode_pager.c b/bsd/vm/vnode_pager.c index 93441bcb8..4bc408e87 100644 --- a/bsd/vm/vnode_pager.c +++ b/bsd/vm/vnode_pager.c @@ -90,7 +90,7 @@ vnode_pager_throttle() ut = get_bsdthread_info(current_thread()); if (ut->uu_lowpri_window) - throttle_lowpri_io(TRUE); + throttle_lowpri_io(1); } @@ -112,15 +112,14 @@ vnode_pager_isinuse(struct vnode *vp) } uint32_t -vnode_pager_return_hard_throttle_limit(struct vnode *vp, uint32_t *limit, uint32_t hard_throttle) +vnode_pager_return_throttle_io_limit(struct vnode *vp, uint32_t *limit) { - return(cluster_hard_throttle_limit(vp, limit, hard_throttle)); + return(cluster_throttle_io_limit(vp, limit)); } vm_object_offset_t vnode_pager_get_filesize(struct vnode *vp) { - return (vm_object_offset_t) ubc_getsize(vp); } @@ -511,10 +510,14 @@ vnode_pagein( int first_pg; int xsize; int must_commit = 1; + int ignore_valid_page_check = 0; if (flags & UPL_NOCOMMIT) must_commit = 0; + if (flags & UPL_IGNORE_VALID_PAGE_CHECK) + ignore_valid_page_check = 1; + if (UBCINFOEXISTS(vp) == 0) { result = PAGER_ERROR; error = PAGER_ERROR; @@ -605,13 +608,19 @@ vnode_pagein( if (upl_page_present(pl, last_pg)) break; } - /* - * skip over 'valid' pages... we don't want to issue I/O for these - */ - for (start_pg = last_pg; last_pg < pages_in_upl; last_pg++) { - if (!upl_valid_page(pl, last_pg)) - break; + + if (ignore_valid_page_check == 1) { + start_pg = last_pg; + } else { + /* + * skip over 'valid' pages... we don't want to issue I/O for these + */ + for (start_pg = last_pg; last_pg < pages_in_upl; last_pg++) { + if (!upl_valid_page(pl, last_pg)) + break; + } } + if (last_pg > start_pg) { /* * we've found a range of valid pages @@ -648,7 +657,7 @@ vnode_pagein( * 'cluster_io' */ for (start_pg = last_pg; last_pg < pages_in_upl; last_pg++) { - if (upl_valid_page(pl, last_pg) || !upl_page_present(pl, last_pg)) + if (( !ignore_valid_page_check && upl_valid_page(pl, last_pg)) || !upl_page_present(pl, last_pg)) break; } if (last_pg > start_pg) { diff --git a/config/BSDKernel.exports b/config/BSDKernel.exports index a424e367b..a7abbc7c9 100644 --- a/config/BSDKernel.exports +++ b/config/BSDKernel.exports @@ -346,6 +346,8 @@ _kauth_unlisten_scope _kdebug_enable _kernel_debug _kernel_debug1 +_kernel_debug_enter +_kernel_debug_register_callback _kernproc _kev_msg_post _kev_vendor_code_find diff --git a/config/BSDKernel.i386.exports b/config/BSDKernel.i386.exports deleted file mode 100644 index 1cdfeddfe..000000000 --- a/config/BSDKernel.i386.exports +++ /dev/null @@ -1,38 +0,0 @@ -_file_vnode -_in6_cksum:_inet6_cksum -_is_suser -_is_suser1 -_kauth_cred_rele -_mbuf_data -_mbuf_inet6_cksum -_mbuf_len -_mbuf_next -_mbuf_nextpkt -_mbuf_pkthdr_header -_mbuf_pkthdr_len -_mbuf_pkthdr_rcvif -_mbuf_pkthdr_setheader -_mbuf_setlen -_mbuf_setnextpkt -_mbuf_type -_nd6_lookup_ipv6 -_proc_ucred -_rootvnode -_spl0 -_splbio -_splclock -_splhigh -_splimp -_spllo -_spln -_sploff -_splon -_splpower -_splsched -_splsoftclock -_spltty -_splvm -_splx -_suser -_ubc_setcred -_ubc_sync_range diff --git a/config/Dummy.exports b/config/Dummy.exports deleted file mode 100644 index fe7149c32..000000000 --- a/config/Dummy.exports +++ /dev/null @@ -1 +0,0 @@ -# Dummy exports, exists for stub architectures like PPC diff --git a/config/IOKit.exports b/config/IOKit.exports index 0cb629e45..aaf27f1a9 100644 --- a/config/IOKit.exports +++ b/config/IOKit.exports @@ -1008,6 +1008,7 @@ __ZN8IOMapper10gMetaClassE __ZN8IOMapper10superClassE __ZN8IOMapper17setMapperRequiredEb __ZN8IOMapper19copyMapperForDeviceEP9IOService +__ZN8IOMapper28copyMapperForDeviceWithIndexEP9IOServicej __ZN8IOMapper19waitForSystemMapperEv __ZN8IOMapper13iovmMapMemoryEP8OSObjectjjjP13upl_page_infoPK21IODMAMapSpecification __ZN8IOMapper4freeEv diff --git a/config/IOKit.i386.exports b/config/IOKit.i386.exports deleted file mode 100644 index 77389f47a..000000000 --- a/config/IOKit.i386.exports +++ /dev/null @@ -1,571 +0,0 @@ -_IOLockUnlock_darwin10:_lck_mtx_unlock_darwin10 -_IOOFPathMatching -_IOPanic -_IOSpinUnlock -_IOTrySpinLock -_PE_parse_boot_arg -__Z16IODTFindSlotNameP15IORegistryEntrym -__Z16IODTSetResolvingP15IORegistryEntryPFlmPmS1_EPFvS0_PhS4_S4_E -__Z17IODTGetCellCountsP15IORegistryEntryPmS1_ -__Z22IODTResolveAddressCellP15IORegistryEntryPmS1_S1_ -__Z23IODTFindMatchingEntriesP15IORegistryEntrymPKc -__ZN10IOWorkLoop19workLoopWithOptionsEm -__ZN10IOWorkLoop20_RESERVEDIOWorkLoop3Ev -__ZN10IOWorkLoop20_RESERVEDIOWorkLoop4Ev -__ZN10IOWorkLoop20_RESERVEDIOWorkLoop5Ev -__ZN10IOWorkLoop20_RESERVEDIOWorkLoop6Ev -__ZN10IOWorkLoop20_RESERVEDIOWorkLoop7Ev -__ZN10IOWorkLoop9sleepGateEPv12UnsignedWidem -__ZN10IOWorkLoop9sleepGateEPvm -__ZN11IOCatalogue11findDriversEP12OSDictionaryPl -__ZN11IOCatalogue11findDriversEP9IOServicePl -__ZN11IODataQueue11withEntriesEmm -__ZN11IODataQueue12withCapacityEm -__ZN11IODataQueue15initWithEntriesEmm -__ZN11IODataQueue16initWithCapacityEm -__ZN11IODataQueue7enqueueEPvm -__ZN11IOMemoryMap10getAddressEv -__ZN11IOMemoryMap18getPhysicalSegmentEmPm -__ZN11IOMemoryMap19setMemoryDescriptorEP18IOMemoryDescriptory -__ZN11IOMemoryMap21_RESERVEDIOMemoryMap0Ev -__ZN11IOMemoryMap21_RESERVEDIOMemoryMap1Ev -__ZN11IOMemoryMap21_RESERVEDIOMemoryMap2Ev -__ZN11IOMemoryMap21_RESERVEDIOMemoryMap3Ev -__ZN11IOMemoryMap21_RESERVEDIOMemoryMap4Ev -__ZN11IOMemoryMap21_RESERVEDIOMemoryMap5Ev -__ZN11IOMemoryMap21_RESERVEDIOMemoryMap6Ev -__ZN11IOMemoryMap21_RESERVEDIOMemoryMap7Ev -__ZN11IOMemoryMap7getSizeEv -__ZN11IOMemoryMap8redirectEP18IOMemoryDescriptormm -__ZN11IOMemoryMap8redirectEP18IOMemoryDescriptormy -__ZN12IODMACommand11OutputBig32EPS_NS_9Segment64EPvm -__ZN12IODMACommand11OutputBig64EPS_NS_9Segment64EPvm -__ZN12IODMACommand11synchronizeEm -__ZN12IODMACommand12OutputHost32EPS_NS_9Segment64EPvm -__ZN12IODMACommand12OutputHost64EPS_NS_9Segment64EPvm -__ZN12IODMACommand14OutputLittle32EPS_NS_9Segment64EPvm -__ZN12IODMACommand14OutputLittle64EPS_NS_9Segment64EPvm -__ZN12IODMACommand15genIOVMSegmentsEPFbPS_NS_9Segment64EPvmEPyS2_Pm -__ZN12IODMACommand15genIOVMSegmentsEPyPvPm -__ZN12IODMACommand17withSpecificationEPFbPS_NS_9Segment64EPvmEhyNS_14MappingOptionsEymP8IOMapperS2_ -__ZN12IODMACommand21initWithSpecificationEPFbPS_NS_9Segment64EPvmEhyNS_14MappingOptionsEymP8IOMapperS2_ -__ZN12IODMACommand22_RESERVEDIODMACommand3Ev -__ZN12IODMACommand22_RESERVEDIODMACommand4Ev -__ZN12IODMACommand22_RESERVEDIODMACommand5Ev -__ZN12IODMACommand22_RESERVEDIODMACommand6Ev -__ZN12IODMACommand22_RESERVEDIODMACommand7Ev -__ZN12IODMACommand22_RESERVEDIODMACommand8Ev -__ZN12IODMACommand22_RESERVEDIODMACommand9Ev -__ZN12IODMACommand23_RESERVEDIODMACommand10Ev -__ZN12IODMACommand23_RESERVEDIODMACommand11Ev -__ZN12IODMACommand23_RESERVEDIODMACommand12Ev -__ZN12IODMACommand23_RESERVEDIODMACommand13Ev -__ZN12IODMACommand23_RESERVEDIODMACommand14Ev -__ZN12IODMACommand23_RESERVEDIODMACommand15Ev -__ZN12IODMACommand24prepareWithSpecificationEPFbPS_NS_9Segment64EPvmEhyNS_14MappingOptionsEymP8IOMapperyybb -__ZN12IODMACommand8transferEmyPvy -__ZN12IOUserClient12initWithTaskEP4taskPvm -__ZN12IOUserClient12initWithTaskEP4taskPvmP12OSDictionary -__ZN12IOUserClient15mapClientMemoryEmP4taskmj -__ZN12IOUserClient15sendAsyncResultEPjiPPvm -__ZN12IOUserClient17mapClientMemory64EmP4taskmy -__ZN12IOUserClient17sendAsyncResult64EPyiS0_m -__ZN12IOUserClient19clientMemoryForTypeEmPmPP18IOMemoryDescriptor -__ZN12IOUserClient19setAsyncReference64EPyP8ipc_portyy -__ZN12IOUserClient22_RESERVEDIOUserClient2Ev -__ZN12IOUserClient22_RESERVEDIOUserClient3Ev -__ZN12IOUserClient22_RESERVEDIOUserClient4Ev -__ZN12IOUserClient22_RESERVEDIOUserClient5Ev -__ZN12IOUserClient22_RESERVEDIOUserClient6Ev -__ZN12IOUserClient22_RESERVEDIOUserClient7Ev -__ZN12IOUserClient22_RESERVEDIOUserClient8Ev -__ZN12IOUserClient22_RESERVEDIOUserClient9Ev -__ZN12IOUserClient23_RESERVEDIOUserClient10Ev -__ZN12IOUserClient23_RESERVEDIOUserClient11Ev -__ZN12IOUserClient23_RESERVEDIOUserClient12Ev -__ZN12IOUserClient23_RESERVEDIOUserClient13Ev -__ZN12IOUserClient23_RESERVEDIOUserClient14Ev -__ZN12IOUserClient23_RESERVEDIOUserClient15Ev -__ZN12IOUserClient23getExternalTrapForIndexEm -__ZN12IOUserClient24getNotificationSemaphoreEmPP9semaphore -__ZN12IOUserClient24getTargetAndTrapForIndexEPP9IOServicem -__ZN12IOUserClient24registerNotificationPortEP8ipc_portmm -__ZN12IOUserClient24registerNotificationPortEP8ipc_portmy -__ZN12IOUserClient25getExternalMethodForIndexEm -__ZN12IOUserClient26getTargetAndMethodForIndexEPP9IOServicem -__ZN12IOUserClient30getExternalAsyncMethodForIndexEm -__ZN12IOUserClient31getAsyncTargetAndMethodForIndexEPP9IOServicem -__ZN13IOCommandGate12commandSleepEPv12UnsignedWidem -__ZN13IOCommandGate12commandSleepEPvm -__ZN13IOCommandGate23_RESERVEDIOCommandGate1Ev -__ZN13IOCommandGate23_RESERVEDIOCommandGate2Ev -__ZN13IOCommandGate23_RESERVEDIOCommandGate3Ev -__ZN13IOCommandGate23_RESERVEDIOCommandGate4Ev -__ZN13IOCommandGate23_RESERVEDIOCommandGate5Ev -__ZN13IOCommandGate23_RESERVEDIOCommandGate6Ev -__ZN13IOCommandGate23_RESERVEDIOCommandGate7Ev -__ZN13IOCommandPool11commandPoolEP9IOServiceP10IOWorkLoopm -__ZN13IOCommandPool23_RESERVEDIOCommandPool0Ev -__ZN13IOCommandPool23_RESERVEDIOCommandPool1Ev -__ZN13IOCommandPool23_RESERVEDIOCommandPool2Ev -__ZN13IOCommandPool23_RESERVEDIOCommandPool3Ev -__ZN13IOCommandPool23_RESERVEDIOCommandPool4Ev -__ZN13IOCommandPool23_RESERVEDIOCommandPool5Ev -__ZN13IOCommandPool23_RESERVEDIOCommandPool6Ev -__ZN13IOCommandPool23_RESERVEDIOCommandPool7Ev -__ZN13IOCommandPool4initEP9IOServiceP10IOWorkLoopm -__ZN13IOEventSource23_RESERVEDIOEventSource0Ev -__ZN13IOEventSource23_RESERVEDIOEventSource1Ev -__ZN13IOEventSource23_RESERVEDIOEventSource2Ev -__ZN13IOEventSource23_RESERVEDIOEventSource3Ev -__ZN13IOEventSource23_RESERVEDIOEventSource4Ev -__ZN13IOEventSource23_RESERVEDIOEventSource5Ev -__ZN13IOEventSource23_RESERVEDIOEventSource6Ev -__ZN13IOEventSource23_RESERVEDIOEventSource7Ev -__ZN13IOEventSource9sleepGateEPv12UnsignedWidem -__ZN13IOEventSource9sleepGateEPvm -__ZN13_IOServiceJob8startJobEP9IOServiceim -__ZN14IOCommandQueue10gMetaClassE -__ZN14IOCommandQueue10superClassE -__ZN14IOCommandQueue12checkForWorkEv -__ZN14IOCommandQueue12commandQueueEP8OSObjectPFvS1_PvS2_S2_S2_Ei -__ZN14IOCommandQueue14enqueueCommandEbPvS0_S0_S0_ -__ZN14IOCommandQueue15performAndFlushEP8OSObjectPFvS1_PvS2_S2_S2_E -__ZN14IOCommandQueue4freeEv -__ZN14IOCommandQueue4initEP8OSObjectPFvS1_PvS2_S2_S2_Ei -__ZN14IOCommandQueue9MetaClassC1Ev -__ZN14IOCommandQueue9MetaClassC2Ev -__ZN14IOCommandQueue9metaClassE -__ZN14IOCommandQueueC1EPK11OSMetaClass -__ZN14IOCommandQueueC1Ev -__ZN14IOCommandQueueC2EPK11OSMetaClass -__ZN14IOCommandQueueC2Ev -__ZN14IOCommandQueueD0Ev -__ZN14IOCommandQueueD2Ev -__ZN14IODeviceMemory12withSubRangeEPS_mm -__ZN14IODeviceMemory13arrayFromListEPNS_11InitElementEm -__ZN14IODeviceMemory9withRangeEmm -__ZN14IOMemoryCursor17withSpecificationEPFvNS_15PhysicalSegmentEPvmEmmm -__ZN14IOMemoryCursor19genPhysicalSegmentsEP18IOMemoryDescriptormPvmmPm -__ZN14IOMemoryCursor21initWithSpecificationEPFvNS_15PhysicalSegmentEPvmEmmm -__ZN14IOPMrootDomain17setSleepSupportedEm -__ZN14IOPMrootDomain19sysPowerDownHandlerEPvS0_mP9IOServiceS0_j -__ZN14IOPMrootDomain24receivePowerNotificationEm -__ZN14IOPMrootDomain27displayWranglerNotificationEPvS0_mP9IOServiceS0_j -__ZN15IODMAController13getControllerEP9IOServicem -__ZN15IODMAController16notifyDMACommandEP16IODMAEventSourceP12IODMACommandim -__ZN15IODMAController20createControllerNameEm -__ZN15IODMAController21registerDMAControllerEm -__ZN15IORegistryEntry25_RESERVEDIORegistryEntry6Ev -__ZN15IORegistryEntry25_RESERVEDIORegistryEntry7Ev -__ZN15IORegistryEntry25_RESERVEDIORegistryEntry8Ev -__ZN15IORegistryEntry25_RESERVEDIORegistryEntry9Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry10Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry11Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry12Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry13Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry14Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry15Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry16Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry17Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry18Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry19Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry20Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry21Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry22Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry23Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry24Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry25Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry26Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry27Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry28Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry29Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry30Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry31Ev -__ZN16IODMAEventSource14dmaEventSourceEP8OSObjectP9IOServicePFvS1_PS_P12IODMACommandimES8_m -__ZN16IODMAEventSource15startDMACommandEP12IODMACommand11IODirectionmm -__ZN16IODMAEventSource16notifyDMACommandEP12IODMACommandim -__ZN16IODMAEventSource4initEP8OSObjectP9IOServicePFvS1_PS_P12IODMACommandimES8_m -__ZN16IOKitDiagnostics12updateOffsetEP12OSDictionarymPKc -__ZN16IORangeAllocator10deallocateEmm -__ZN16IORangeAllocator12allocElementEm -__ZN16IORangeAllocator13allocateRangeEmm -__ZN16IORangeAllocator14deallocElementEm -__ZN16IORangeAllocator28setFragmentCapacityIncrementEm -__ZN16IORangeAllocator4initEmmmm -__ZN16IORangeAllocator8allocateEmPmm -__ZN16IORangeAllocator9withRangeEmmmm -__ZN17IOBigMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm -__ZN17IOBigMemoryCursor17withSpecificationEmmm -__ZN17IOBigMemoryCursor21initWithSpecificationEmmm -__ZN17IOPolledInterface27_RESERVEDIOPolledInterface0Ev -__ZN17IOPolledInterface27_RESERVEDIOPolledInterface1Ev -__ZN17IOPolledInterface27_RESERVEDIOPolledInterface2Ev -__ZN17IOPolledInterface27_RESERVEDIOPolledInterface3Ev -__ZN17IOPolledInterface27_RESERVEDIOPolledInterface4Ev -__ZN17IOPolledInterface27_RESERVEDIOPolledInterface5Ev -__ZN17IOPolledInterface27_RESERVEDIOPolledInterface6Ev -__ZN17IOPolledInterface27_RESERVEDIOPolledInterface7Ev -__ZN17IOPolledInterface27_RESERVEDIOPolledInterface8Ev -__ZN17IOPolledInterface27_RESERVEDIOPolledInterface9Ev -__ZN17IOPolledInterface28_RESERVEDIOPolledInterface10Ev -__ZN17IOPolledInterface28_RESERVEDIOPolledInterface11Ev -__ZN17IOPolledInterface28_RESERVEDIOPolledInterface12Ev -__ZN17IOPolledInterface28_RESERVEDIOPolledInterface13Ev -__ZN17IOPolledInterface28_RESERVEDIOPolledInterface14Ev -__ZN17IOPolledInterface28_RESERVEDIOPolledInterface15Ev -__ZN17IOSharedDataQueue11withEntriesEmm -__ZN17IOSharedDataQueue12withCapacityEm -__ZN17IOSharedDataQueue16initWithCapacityEm -__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue0Ev -__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue1Ev -__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue2Ev -__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue3Ev -__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue4Ev -__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue5Ev -__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue6Ev -__ZN17IOSharedDataQueue27_RESERVEDIOSharedDataQueue7Ev -__ZN17IOSharedDataQueue7dequeueEPvPm -__ZN18IOMemoryDescriptor10setMappingEP4taskjm -__ZN18IOMemoryDescriptor10withRangesEP14IOVirtualRangem11IODirectionP4taskb -__ZN18IOMemoryDescriptor10writeBytesEmPKvm -__ZN18IOMemoryDescriptor11makeMappingEPS_P4taskjmmm -__ZN18IOMemoryDescriptor11withAddressEPvm11IODirection -__ZN18IOMemoryDescriptor11withAddressEjm11IODirectionP4task -__ZN18IOMemoryDescriptor11withOptionsEPvmmP4taskmP8IOMapper -__ZN18IOMemoryDescriptor12setPurgeableEmPm -__ZN18IOMemoryDescriptor12withSubRangeEPS_mm11IODirection -__ZN18IOMemoryDescriptor14initWithRangesEP14IOVirtualRangem11IODirectionP4taskb -__ZN18IOMemoryDescriptor15initWithAddressEPvm11IODirection -__ZN18IOMemoryDescriptor15initWithAddressEjm11IODirectionP4task -__ZN18IOMemoryDescriptor15initWithOptionsEPvmmP4taskmP8IOMapper -__ZN18IOMemoryDescriptor16getSourceSegmentEmPm -__ZN18IOMemoryDescriptor16performOperationEmmm -__ZN18IOMemoryDescriptor16withAddressRangeEyymP4task -__ZN18IOMemoryDescriptor17getVirtualSegmentEmPm -__ZN18IOMemoryDescriptor17withAddressRangesEP14IOAddressRangemmP4task -__ZN18IOMemoryDescriptor18getPhysicalSegmentEmPm -__ZN18IOMemoryDescriptor18getPhysicalSegmentEmPmm -__ZN18IOMemoryDescriptor18withPhysicalRangesEP15IOPhysicalRangem11IODirectionb -__ZN18IOMemoryDescriptor19createMappingInTaskEP4taskymyy -__ZN18IOMemoryDescriptor19withPhysicalAddressEmm11IODirection -__ZN18IOMemoryDescriptor20getPhysicalSegment64EmPm -__ZN18IOMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb -__ZN18IOMemoryDescriptor23initWithPhysicalAddressEmm11IODirection -__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor8Ev -__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor9Ev -__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor10Ev -__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor11Ev -__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor12Ev -__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor13Ev -__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor14Ev -__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor15Ev -__ZN18IOMemoryDescriptor3mapEP4taskjmmm -__ZN18IOMemoryDescriptor3mapEm -__ZN18IOMemoryDescriptor5doMapEP7_vm_mapPjmmm -__ZN18IOMemoryDescriptor6setTagEm -__ZN18IOMemoryDescriptor7doUnmapEP7_vm_mapjm -__ZN18IOMemoryDescriptor9readBytesEmPvm -__ZN18IORegistryIterator11iterateOverEP15IORegistryEntryPK15IORegistryPlanem -__ZN18IORegistryIterator11iterateOverEPK15IORegistryPlanem -__ZN18IOTimerEventSource10setTimeoutE12UnsignedWide -__ZN18IOTimerEventSource10setTimeoutE13mach_timespec -__ZN18IOTimerEventSource10setTimeoutEmm -__ZN18IOTimerEventSource10wakeAtTimeE12UnsignedWide -__ZN18IOTimerEventSource10wakeAtTimeE13mach_timespec -__ZN18IOTimerEventSource10wakeAtTimeEmm -__ZN18IOTimerEventSource12setTimeoutMSEm -__ZN18IOTimerEventSource12setTimeoutUSEm -__ZN18IOTimerEventSource12wakeAtTimeMSEm -__ZN18IOTimerEventSource12wakeAtTimeUSEm -__ZN18IOTimerEventSource15setTimeoutTicksEm -__ZN18IOTimerEventSource15wakeAtTimeTicksEm -__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource0Ev -__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource1Ev -__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource2Ev -__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource3Ev -__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource4Ev -__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource5Ev -__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource6Ev -__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource7Ev -__ZN20IOLittleMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm -__ZN20IOLittleMemoryCursor17withSpecificationEmmm -__ZN20IOLittleMemoryCursor21initWithSpecificationEmmm -__ZN20RootDomainUserClient15setPreventativeEmm -__ZN20RootDomainUserClient26getTargetAndMethodForIndexEPP9IOServicem -__ZN21IOInterruptController10initVectorElP17IOInterruptVector -__ZN21IOInterruptController11causeVectorElP17IOInterruptVector -__ZN21IOInterruptController12enableVectorElP17IOInterruptVector -__ZN21IOInterruptController13getVectorTypeElP17IOInterruptVector -__ZN21IOInterruptController17disableVectorHardElP17IOInterruptVector -__ZN21IOInterruptController17vectorCanBeSharedElP17IOInterruptVector -__ZN21IOInterruptController31_RESERVEDIOInterruptController0Ev -__ZN21IOInterruptController31_RESERVEDIOInterruptController1Ev -__ZN21IOInterruptController31_RESERVEDIOInterruptController2Ev -__ZN21IOInterruptController31_RESERVEDIOInterruptController3Ev -__ZN21IOInterruptController31_RESERVEDIOInterruptController4Ev -__ZN21IOInterruptController31_RESERVEDIOInterruptController5Ev -__ZN21IONaturalMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm -__ZN21IONaturalMemoryCursor17withSpecificationEmmm -__ZN21IONaturalMemoryCursor21initWithSpecificationEmmm -__ZN21IOSubMemoryDescriptor11makeMappingEP18IOMemoryDescriptorP4taskjmmm -__ZN21IOSubMemoryDescriptor12initSubRangeEP18IOMemoryDescriptormm11IODirection -__ZN21IOSubMemoryDescriptor12setPurgeableEmPm -__ZN21IOSubMemoryDescriptor12withSubRangeEP18IOMemoryDescriptormmm -__ZN21IOSubMemoryDescriptor18getPhysicalSegmentEmPmm -__ZN21IOSubMemoryDescriptor7prepareE11IODirection -__ZN21IOSubMemoryDescriptor8completeE11IODirection -__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource0Ev -__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource1Ev -__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource2Ev -__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource3Ev -__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource4Ev -__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource5Ev -__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource6Ev -__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource7Ev -__ZN23IOMultiMemoryDescriptor15withDescriptorsEPP18IOMemoryDescriptorm11IODirectionb -__ZN23IOMultiMemoryDescriptor18getPhysicalSegmentEmPmm -__ZN23IOMultiMemoryDescriptor19initWithDescriptorsEPP18IOMemoryDescriptorm11IODirectionb -__ZN23IOMultiMemoryDescriptor7prepareE11IODirection -__ZN23IOMultiMemoryDescriptor8completeE11IODirection -__ZN24IOBufferMemoryDescriptor11appendBytesEPKvj -__ZN24IOBufferMemoryDescriptor11withOptionsEmjj -__ZN24IOBufferMemoryDescriptor12setDirectionE11IODirection -__ZN24IOBufferMemoryDescriptor12withCapacityEj11IODirectionb -__ZN24IOBufferMemoryDescriptor13initWithBytesEPKvj11IODirectionb -__ZN24IOBufferMemoryDescriptor14getBytesNoCopyEjj -__ZN24IOBufferMemoryDescriptor15initWithOptionsEmjj -__ZN24IOBufferMemoryDescriptor15initWithOptionsEmjjP4task -__ZN24IOBufferMemoryDescriptor17inTaskWithOptionsEP4taskmjj -__ZN24IOBufferMemoryDescriptor20initWithPhysicalMaskEP4taskmyyy -__ZN24IOBufferMemoryDescriptor22inTaskWithPhysicalMaskEP4taskmyy -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor2Ev -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor3Ev -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor4Ev -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor5Ev -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor6Ev -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor7Ev -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor8Ev -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor9Ev -__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor10Ev -__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor11Ev -__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor12Ev -__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor13Ev -__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor14Ev -__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor15Ev -__ZN24IOBufferMemoryDescriptor9setLengthEj -__ZN24IOBufferMemoryDescriptor9withBytesEPKvj11IODirectionb -__ZN25IOGeneralMemoryDescriptor11setPositionEm -__ZN25IOGeneralMemoryDescriptor11wireVirtualE11IODirection -__ZN25IOGeneralMemoryDescriptor12setPurgeableEmPm -__ZN25IOGeneralMemoryDescriptor13mapIntoKernelEj -__ZN25IOGeneralMemoryDescriptor14initWithRangesEP14IOVirtualRangem11IODirectionP4taskb -__ZN25IOGeneralMemoryDescriptor15initWithAddressEPvm11IODirection -__ZN25IOGeneralMemoryDescriptor15initWithAddressEjm11IODirectionP4task -__ZN25IOGeneralMemoryDescriptor15initWithOptionsEPvmmP4taskmP8IOMapper -__ZN25IOGeneralMemoryDescriptor15unmapFromKernelEv -__ZN25IOGeneralMemoryDescriptor16getSourceSegmentEmPm -__ZN25IOGeneralMemoryDescriptor17getVirtualSegmentEmPm -__ZN25IOGeneralMemoryDescriptor18getPhysicalSegmentEmPm -__ZN25IOGeneralMemoryDescriptor18getPhysicalSegmentEmPmm -__ZN25IOGeneralMemoryDescriptor20getPhysicalSegment64EmPm -__ZN25IOGeneralMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb -__ZN25IOGeneralMemoryDescriptor23initWithPhysicalAddressEmm11IODirection -__ZN25IOGeneralMemoryDescriptor5doMapEP7_vm_mapPjmmm -__ZN25IOGeneralMemoryDescriptor7doUnmapEP7_vm_mapjm -__ZN25IOGeneralMemoryDescriptor7prepareE11IODirection -__ZN25IOGeneralMemoryDescriptor8completeE11IODirection -__ZN27IOSharedInterruptController37_RESERVEDIOSharedInterruptController0Ev -__ZN27IOSharedInterruptController37_RESERVEDIOSharedInterruptController1Ev -__ZN27IOSharedInterruptController37_RESERVEDIOSharedInterruptController2Ev -__ZN27IOSharedInterruptController37_RESERVEDIOSharedInterruptController3Ev -__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource0Ev -__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource1Ev -__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource2Ev -__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource3Ev -__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource4Ev -__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource5Ev -__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource6Ev -__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource7Ev -__ZN29IOInterleavedMemoryDescriptor12withCapacityEm11IODirection -__ZN29IOInterleavedMemoryDescriptor16initWithCapacityEm11IODirection -__ZN29IOInterleavedMemoryDescriptor18getPhysicalSegmentEmPmm -__ZN29IOInterleavedMemoryDescriptor19setMemoryDescriptorEP18IOMemoryDescriptormm -__ZN29IOInterleavedMemoryDescriptor22clearMemoryDescriptorsE11IODirection -__ZN29IOInterleavedMemoryDescriptor7prepareE11IODirection -__ZN29IOInterleavedMemoryDescriptor8completeE11IODirection -__ZN8IOMapper10allocTableEm -__ZN8IOMapper10iovmInsertEjmP13upl_page_infom -__ZN8IOMapper10iovmInsertEjmPjm -__ZN8IOMapper11NewARTTableEmPPvPj -__ZN8IOMapper12FreeARTTableEP6OSDatam -__ZN8IOMapper18_RESERVEDIOMapper4Ev -__ZN8IOMapper18_RESERVEDIOMapper5Ev -__ZN8IOMapper18_RESERVEDIOMapper6Ev -__ZN8IOMapper18_RESERVEDIOMapper7Ev -__ZN8IOMapper18_RESERVEDIOMapper8Ev -__ZN8IOMapper18_RESERVEDIOMapper9Ev -__ZN8IOMapper18iovmFreeDMACommandEP12IODMACommandjm -__ZN8IOMapper19_RESERVEDIOMapper10Ev -__ZN8IOMapper19_RESERVEDIOMapper11Ev -__ZN8IOMapper19_RESERVEDIOMapper12Ev -__ZN8IOMapper19_RESERVEDIOMapper13Ev -__ZN8IOMapper19_RESERVEDIOMapper14Ev -__ZN8IOMapper19_RESERVEDIOMapper15Ev -__ZN8IOMapper19iovmAllocDMACommandEP12IODMACommandm -__ZN8IOPMprot10gMetaClassE -__ZN8IOPMprot10superClassE -__ZN8IOPMprot9MetaClassC1Ev -__ZN8IOPMprot9MetaClassC2Ev -__ZN8IOPMprot9metaClassE -__ZN8IOPMprotC1EPK11OSMetaClass -__ZN8IOPMprotC1Ev -__ZN8IOPMprotC2EPK11OSMetaClass -__ZN8IOPMprotC2Ev -__ZN8IOPMprotD0Ev -__ZN8IOPMprotD2Ev -__ZN8IOSyncer10gMetaClassE -__ZN8IOSyncer10superClassE -__ZN8IOSyncer13privateSignalEv -__ZN8IOSyncer4freeEv -__ZN8IOSyncer4initEb -__ZN8IOSyncer4waitEb -__ZN8IOSyncer6createEb -__ZN8IOSyncer6reinitEv -__ZN8IOSyncer6signalEib -__ZN8IOSyncer9MetaClassC1Ev -__ZN8IOSyncer9MetaClassC2Ev -__ZN8IOSyncer9metaClassE -__ZN8IOSyncerC1EPK11OSMetaClass -__ZN8IOSyncerC1Ev -__ZN8IOSyncerC2EPK11OSMetaClass -__ZN8IOSyncerC2Ev -__ZN8IOSyncerD0Ev -__ZN8IOSyncerD2Ev -__ZN9IOService10adjustBusyEl -__ZN9IOService10handleOpenEPS_mPv -__ZN9IOService10systemWakeEv -__ZN9IOService10youAreRootEv -__ZN9IOService11_adjustBusyEl -__ZN9IOService11handleCloseEPS_m -__ZN9IOService11tellClientsEi -__ZN9IOService12clampPowerOnEm -__ZN9IOService12didTerminateEPS_mPb -__ZN9IOService12requestProbeEm -__ZN9IOService12waitForStateEmmP13mach_timespec -__ZN9IOService13getPMworkloopEv -__ZN9IOService13messageClientEmP8OSObjectPvj -__ZN9IOService13newUserClientEP4taskPvmP12OSDictionaryPP12IOUserClient -__ZN9IOService13newUserClientEP4taskPvmPP12IOUserClient -__ZN9IOService13startMatchingEm -__ZN9IOService13waitMatchIdleEm -__ZN9IOService13willTerminateEPS_m -__ZN9IOService14doServiceMatchEm -__ZN9IOService14messageClientsEmPvj -__ZN9IOService14newTemperatureElPS_ -__ZN9IOService14setPowerParentEP17IOPowerConnectionbm -__ZN9IOService15addNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_l -__ZN9IOService15nextIdleTimeoutE12UnsignedWideS0_j -__ZN9IOService15registerServiceEm -__ZN9IOService15tellChangeDown1Em -__ZN9IOService15tellChangeDown2Em -__ZN9IOService15terminateClientEPS_m -__ZN9IOService15terminatePhase1Em -__ZN9IOService15terminateWorkerEm -__ZN9IOService16ack_timer_tickedEv -__ZN9IOService16command_receivedEPvS0_S0_S0_ -__ZN9IOService16didYouWakeSystemEv -__ZN9IOService16registerInterestEPK8OSSymbolPFiPvS3_mPS_S3_jES3_S3_ -__ZN9IOService16requestTerminateEPS_m -__ZN9IOService16setCPUSnoopDelayEm -__ZN9IOService18doServiceTerminateEm -__ZN9IOService18matchPropertyTableEP12OSDictionaryPl -__ZN9IOService18requireMaxBusStallEm -__ZN9IOService18settleTimerExpiredEv -__ZN9IOService18systemWillShutdownEm -__ZN9IOService19_RESERVEDIOService6Ev -__ZN9IOService19_RESERVEDIOService7Ev -__ZN9IOService19_RESERVEDIOService8Ev -__ZN9IOService19_RESERVEDIOService9Ev -__ZN9IOService19deliverNotificationEPK8OSSymbolmm -__ZN9IOService19installNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_lPP10OSIterator -__ZN9IOService20_RESERVEDIOService10Ev -__ZN9IOService20_RESERVEDIOService11Ev -__ZN9IOService20_RESERVEDIOService12Ev -__ZN9IOService20_RESERVEDIOService13Ev -__ZN9IOService20_RESERVEDIOService14Ev -__ZN9IOService20_RESERVEDIOService15Ev -__ZN9IOService20_RESERVEDIOService16Ev -__ZN9IOService20_RESERVEDIOService17Ev -__ZN9IOService20_RESERVEDIOService18Ev -__ZN9IOService20_RESERVEDIOService19Ev -__ZN9IOService20_RESERVEDIOService20Ev -__ZN9IOService20_RESERVEDIOService21Ev -__ZN9IOService20_RESERVEDIOService22Ev -__ZN9IOService20_RESERVEDIOService23Ev -__ZN9IOService20_RESERVEDIOService24Ev -__ZN9IOService20_RESERVEDIOService25Ev -__ZN9IOService20_RESERVEDIOService26Ev -__ZN9IOService20_RESERVEDIOService27Ev -__ZN9IOService20_RESERVEDIOService28Ev -__ZN9IOService20_RESERVEDIOService29Ev -__ZN9IOService20_RESERVEDIOService30Ev -__ZN9IOService20_RESERVEDIOService31Ev -__ZN9IOService20_RESERVEDIOService32Ev -__ZN9IOService20_RESERVEDIOService33Ev -__ZN9IOService20_RESERVEDIOService34Ev -__ZN9IOService20_RESERVEDIOService35Ev -__ZN9IOService20_RESERVEDIOService36Ev -__ZN9IOService20_RESERVEDIOService37Ev -__ZN9IOService20_RESERVEDIOService38Ev -__ZN9IOService20_RESERVEDIOService39Ev -__ZN9IOService20_RESERVEDIOService40Ev -__ZN9IOService20_RESERVEDIOService41Ev -__ZN9IOService20_RESERVEDIOService42Ev -__ZN9IOService20_RESERVEDIOService43Ev -__ZN9IOService20_RESERVEDIOService44Ev -__ZN9IOService20_RESERVEDIOService45Ev -__ZN9IOService20_RESERVEDIOService46Ev -__ZN9IOService20_RESERVEDIOService47Ev -__ZN9IOService22PM_Clamp_Timer_ExpiredEv -__ZN9IOService22powerDomainDidChangeToEmP17IOPowerConnection -__ZN9IOService23acknowledgeNotificationEPvm -__ZN9IOService23addMatchingNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_P10IONotifierES5_S5_l -__ZN9IOService23powerDomainWillChangeToEmP17IOPowerConnection -__ZN9IOService23scheduleTerminatePhase2Em -__ZN9IOService23tellClientsWithResponseEi -__ZN9IOService24PM_idle_timer_expirationEv -__ZN9IOService24mapDeviceMemoryWithIndexEjm -__ZN9IOService26temperatureCriticalForZoneEPS_ -__ZN9IOService27serializedAllowPowerChange2Em -__ZN9IOService28serializedCancelPowerChange2Em -__ZN9IOService4openEPS_mPv -__ZN9IOService5closeEPS_m -__ZN9IOService5probeEPS_Pl -__ZN9IOService6PMfreeEv -__ZN9IOService7messageEmPS_Pv -__ZN9IOService8finalizeEm -__ZN9IOService9terminateEm -__ZNK11IOCatalogue13serializeDataEmP11OSSerialize -__ZNK14IOCommandQueue12getMetaClassEv -__ZNK14IOCommandQueue9MetaClass5allocEv -__ZNK15IORegistryEntry11getPropertyEPK8OSStringPK15IORegistryPlanem -__ZNK15IORegistryEntry11getPropertyEPK8OSSymbolPK15IORegistryPlanem -__ZNK15IORegistryEntry11getPropertyEPKcPK15IORegistryPlanem -__ZNK15IORegistryEntry12copyPropertyEPK8OSStringPK15IORegistryPlanem -__ZNK15IORegistryEntry12copyPropertyEPK8OSSymbolPK15IORegistryPlanem -__ZNK15IORegistryEntry12copyPropertyEPKcPK15IORegistryPlanem -__ZNK18IOMemoryDescriptor19dmaCommandOperationEmPvj -__ZNK25IOGeneralMemoryDescriptor19dmaCommandOperationEmPvj -__ZNK8IOPMprot12getMetaClassEv -__ZNK8IOPMprot9MetaClass5allocEv -__ZNK8IOSyncer12getMetaClassEv -__ZNK8IOSyncer9MetaClass5allocEv -__ZTV14IOCommandQueue -__ZTV8IOPMprot -__ZTV8IOSyncer -__ZTVN14IOCommandQueue9MetaClassE -__ZTVN8IOPMprot9MetaClassE -__ZTVN8IOSyncer9MetaClassE -_ev_try_lock -_ev_unlock diff --git a/config/IOKit.x86_64.exports b/config/IOKit.x86_64.exports index 5c2aa9a49..012166184 100644 --- a/config/IOKit.x86_64.exports +++ b/config/IOKit.x86_64.exports @@ -68,6 +68,7 @@ __ZN12IOUserClient17mapClientMemory64EjP4taskjy __ZN12IOUserClient17sendAsyncResult64EPyiS0_j __ZN12IOUserClient19clientMemoryForTypeEjPjPP18IOMemoryDescriptor __ZN12IOUserClient19setAsyncReference64EPyP8ipc_portyy +__ZN12IOUserClient19setAsyncReference64EPyP8ipc_portyyP4task __ZN12IOUserClient22_RESERVEDIOUserClient0Ev __ZN12IOUserClient22_RESERVEDIOUserClient1Ev __ZN12IOUserClient22_RESERVEDIOUserClient2Ev @@ -91,6 +92,7 @@ __ZN12IOUserClient24registerNotificationPortEP8ipc_portjj __ZN12IOUserClient24registerNotificationPortEP8ipc_portjy __ZN12IOUserClient25getExternalMethodForIndexEj __ZN12IOUserClient26getTargetAndMethodForIndexEPP9IOServicej +__ZN12IOUserClient28sendAsyncResult64WithOptionsEPyiS0_jj __ZN12IOUserClient30getExternalAsyncMethodForIndexEj __ZN12IOUserClient31getAsyncTargetAndMethodForIndexEPP9IOServicej __ZN13IOCommandGate12commandSleepEPvj @@ -135,7 +137,7 @@ __ZN14IOPMrootDomain19sysPowerDownHandlerEPvS0_jP9IOServiceS0_m __ZN14IOPMrootDomain24receivePowerNotificationEj __ZN14IOPMrootDomain27displayWranglerNotificationEPvS0_jP9IOServiceS0_m __ZN15IODMAController13getControllerEP9IOServicej -__ZN15IODMAController16notifyDMACommandEP16IODMAEventSourceP12IODMACommandiy +__ZN15IODMAController16notifyDMACommandEP16IODMAEventSourceP12IODMACommandiyy __ZN15IODMAController20createControllerNameEj __ZN15IODMAController21registerDMAControllerEj __ZN15IORegistryEntry25_RESERVEDIORegistryEntry0Ev @@ -170,10 +172,10 @@ __ZN15IORegistryEntry26_RESERVEDIORegistryEntry28Ev __ZN15IORegistryEntry26_RESERVEDIORegistryEntry29Ev __ZN15IORegistryEntry26_RESERVEDIORegistryEntry30Ev __ZN15IORegistryEntry26_RESERVEDIORegistryEntry31Ev -__ZN16IODMAEventSource14dmaEventSourceEP8OSObjectP9IOServicePFvS1_PS_P12IODMACommandiyES8_j +__ZN16IODMAEventSource14dmaEventSourceEP8OSObjectP9IOServicePFvS1_PS_P12IODMACommandiyyES8_j __ZN16IODMAEventSource15startDMACommandEP12IODMACommandjyy -__ZN16IODMAEventSource16notifyDMACommandEP12IODMACommandiy -__ZN16IODMAEventSource4initEP8OSObjectP9IOServicePFvS1_PS_P12IODMACommandiyES8_j +__ZN16IODMAEventSource16notifyDMACommandEP12IODMACommandiyy +__ZN16IODMAEventSource4initEP8OSObjectP9IOServicePFvS1_PS_P12IODMACommandiyyES8_j __ZN16IOKitDiagnostics12updateOffsetEP12OSDictionaryjPKc __ZN16IORangeAllocator10deallocateEyy __ZN16IORangeAllocator12allocElementEj @@ -220,6 +222,7 @@ __ZN18IOMemoryDescriptor11makeMappingEPS_P4taskyjyy __ZN18IOMemoryDescriptor11withAddressEPvyj __ZN18IOMemoryDescriptor11withOptionsEPvjjP4taskjP8IOMapper __ZN18IOMemoryDescriptor12setPurgeableEjPj +__ZN18IOMemoryDescriptor13getPageCountsEPyS0_ __ZN18IOMemoryDescriptor15initWithOptionsEPvjjP4taskjP8IOMapper __ZN18IOMemoryDescriptor16performOperationEjyy __ZN18IOMemoryDescriptor16withAddressRangeEyyjP4task @@ -400,6 +403,7 @@ __ZN9IOService11_adjustBusyEi __ZN9IOService11handleCloseEPS_j __ZN9IOService12didTerminateEPS_jPb __ZN9IOService12requestProbeEj +__ZN9IOService12updateReportEP19IOReportChannelListjPvS2_ __ZN9IOService13messageClientEjP8OSObjectPvm __ZN9IOService13newUserClientEP4taskPvjP12OSDictionaryPP12IOUserClient __ZN9IOService13newUserClientEP4taskPvjPP12IOUserClient @@ -409,6 +413,7 @@ __ZN9IOService13willTerminateEPS_j __ZN9IOService14doServiceMatchEj __ZN9IOService14messageClientsEjPvm __ZN9IOService15addNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_i +__ZN9IOService15configureReportEP19IOReportChannelListjPvS2_ __ZN9IOService15nextIdleTimeoutEyyj __ZN9IOService15registerServiceEj __ZN9IOService15terminateClientEPS_j @@ -421,8 +426,6 @@ __ZN9IOService18doServiceTerminateEj __ZN9IOService18matchPropertyTableEP12OSDictionaryPi __ZN9IOService18requireMaxBusStallEj __ZN9IOService18systemWillShutdownEj -__ZN9IOService19_RESERVEDIOService0Ev -__ZN9IOService19_RESERVEDIOService1Ev __ZN9IOService19_RESERVEDIOService2Ev __ZN9IOService19_RESERVEDIOService3Ev __ZN9IOService19_RESERVEDIOService4Ev diff --git a/config/Libkern.exports b/config/Libkern.exports index b310d501a..be87ea7c5 100644 --- a/config/Libkern.exports +++ b/config/Libkern.exports @@ -47,6 +47,7 @@ _SHA1Update _STRDUP __Z13OSUnserializePKcPP8OSString __Z16OSUnserializeXMLPKcPP8OSString +__Z16OSUnserializeXMLPKcmPP8OSString __ZN10OSIterator10gMetaClassE __ZN10OSIterator10superClassE __ZN10OSIterator9MetaClassC1Ev diff --git a/config/Libkern.i386.exports b/config/Libkern.i386.exports deleted file mode 100644 index f9ef1120e..000000000 --- a/config/Libkern.i386.exports +++ /dev/null @@ -1,142 +0,0 @@ -_OSAddAtomic64 -_OSCompareAndSwap64 -_OSRuntimeFinalizeCPP -_OSRuntimeInitializeCPP -_OSRuntimeUnloadCPP -_OSRuntimeUnloadCPPForSegment -__ZN10OSIterator20_RESERVEDOSIterator0Ev -__ZN10OSIterator20_RESERVEDOSIterator1Ev -__ZN10OSIterator20_RESERVEDOSIterator2Ev -__ZN10OSIterator20_RESERVEDOSIterator3Ev -__ZN11OSMetaClass21_RESERVEDOSMetaClass0Ev -__ZN11OSMetaClass21_RESERVEDOSMetaClass1Ev -__ZN11OSMetaClass21_RESERVEDOSMetaClass2Ev -__ZN11OSMetaClass21_RESERVEDOSMetaClass3Ev -__ZN11OSMetaClass21_RESERVEDOSMetaClass4Ev -__ZN11OSMetaClass21_RESERVEDOSMetaClass5Ev -__ZN11OSMetaClass21_RESERVEDOSMetaClass6Ev -__ZN11OSMetaClass21_RESERVEDOSMetaClass7Ev -__ZN11OSSerialize21_RESERVEDOSSerialize0Ev -__ZN11OSSerialize21_RESERVEDOSSerialize1Ev -__ZN11OSSerialize21_RESERVEDOSSerialize2Ev -__ZN11OSSerialize21_RESERVEDOSSerialize3Ev -__ZN11OSSerialize21_RESERVEDOSSerialize4Ev -__ZN11OSSerialize21_RESERVEDOSSerialize5Ev -__ZN11OSSerialize21_RESERVEDOSSerialize6Ev -__ZN11OSSerialize21_RESERVEDOSSerialize7Ev -__ZN12OSCollection22_RESERVEDOSCollection2Ev -__ZN12OSCollection22_RESERVEDOSCollection3Ev -__ZN12OSCollection22_RESERVEDOSCollection4Ev -__ZN12OSCollection22_RESERVEDOSCollection5Ev -__ZN12OSCollection22_RESERVEDOSCollection6Ev -__ZN12OSCollection22_RESERVEDOSCollection7Ev -__ZN12OSDictionary22_RESERVEDOSDictionary0Ev -__ZN12OSDictionary22_RESERVEDOSDictionary1Ev -__ZN12OSDictionary22_RESERVEDOSDictionary2Ev -__ZN12OSDictionary22_RESERVEDOSDictionary3Ev -__ZN12OSDictionary22_RESERVEDOSDictionary4Ev -__ZN12OSDictionary22_RESERVEDOSDictionary5Ev -__ZN12OSDictionary22_RESERVEDOSDictionary6Ev -__ZN12OSDictionary22_RESERVEDOSDictionary7Ev -__ZN12OSOrderedSet12withCapacityEjPFlPK15OSMetaClassBaseS2_PvES3_ -__ZN12OSOrderedSet16initWithCapacityEjPFlPK15OSMetaClassBaseS2_PvES3_ -__ZN12OSOrderedSet22_RESERVEDOSOrderedSet0Ev -__ZN12OSOrderedSet22_RESERVEDOSOrderedSet1Ev -__ZN12OSOrderedSet22_RESERVEDOSOrderedSet2Ev -__ZN12OSOrderedSet22_RESERVEDOSOrderedSet3Ev -__ZN12OSOrderedSet22_RESERVEDOSOrderedSet4Ev -__ZN12OSOrderedSet22_RESERVEDOSOrderedSet5Ev -__ZN12OSOrderedSet22_RESERVEDOSOrderedSet6Ev -__ZN12OSOrderedSet22_RESERVEDOSOrderedSet7Ev -__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase3Ev -__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase4Ev -__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase5Ev -__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase6Ev -__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase7Ev -__ZN5OSSet15_RESERVEDOSSet0Ev -__ZN5OSSet15_RESERVEDOSSet1Ev -__ZN5OSSet15_RESERVEDOSSet2Ev -__ZN5OSSet15_RESERVEDOSSet3Ev -__ZN5OSSet15_RESERVEDOSSet4Ev -__ZN5OSSet15_RESERVEDOSSet5Ev -__ZN5OSSet15_RESERVEDOSSet6Ev -__ZN5OSSet15_RESERVEDOSSet7Ev -__ZN6OSData16_RESERVEDOSData1Ev -__ZN6OSData16_RESERVEDOSData2Ev -__ZN6OSData16_RESERVEDOSData3Ev -__ZN6OSData16_RESERVEDOSData4Ev -__ZN6OSData16_RESERVEDOSData5Ev -__ZN6OSData16_RESERVEDOSData6Ev -__ZN6OSData16_RESERVEDOSData7Ev -__ZN7OSArray17_RESERVEDOSArray0Ev -__ZN7OSArray17_RESERVEDOSArray1Ev -__ZN7OSArray17_RESERVEDOSArray2Ev -__ZN7OSArray17_RESERVEDOSArray3Ev -__ZN7OSArray17_RESERVEDOSArray4Ev -__ZN7OSArray17_RESERVEDOSArray5Ev -__ZN7OSArray17_RESERVEDOSArray6Ev -__ZN7OSArray17_RESERVEDOSArray7Ev -__ZN8OSNumber18_RESERVEDOSNumber0Ev -__ZN8OSNumber18_RESERVEDOSNumber1Ev -__ZN8OSNumber18_RESERVEDOSNumber2Ev -__ZN8OSNumber18_RESERVEDOSNumber3Ev -__ZN8OSNumber18_RESERVEDOSNumber4Ev -__ZN8OSNumber18_RESERVEDOSNumber5Ev -__ZN8OSNumber18_RESERVEDOSNumber6Ev -__ZN8OSNumber18_RESERVEDOSNumber7Ev -__ZN8OSObject18_RESERVEDOSObject0Ev -__ZN8OSObject18_RESERVEDOSObject1Ev -__ZN8OSObject18_RESERVEDOSObject2Ev -__ZN8OSObject18_RESERVEDOSObject3Ev -__ZN8OSObject18_RESERVEDOSObject4Ev -__ZN8OSObject18_RESERVEDOSObject5Ev -__ZN8OSObject18_RESERVEDOSObject6Ev -__ZN8OSObject18_RESERVEDOSObject7Ev -__ZN8OSObject18_RESERVEDOSObject8Ev -__ZN8OSObject18_RESERVEDOSObject9Ev -__ZN8OSObject19_RESERVEDOSObject10Ev -__ZN8OSObject19_RESERVEDOSObject11Ev -__ZN8OSObject19_RESERVEDOSObject12Ev -__ZN8OSObject19_RESERVEDOSObject13Ev -__ZN8OSObject19_RESERVEDOSObject14Ev -__ZN8OSObject19_RESERVEDOSObject15Ev -__ZN8OSString18_RESERVEDOSString0Ev -__ZN8OSString18_RESERVEDOSString1Ev -__ZN8OSString18_RESERVEDOSString2Ev -__ZN8OSString18_RESERVEDOSString3Ev -__ZN8OSString18_RESERVEDOSString4Ev -__ZN8OSString18_RESERVEDOSString5Ev -__ZN8OSString18_RESERVEDOSString6Ev -__ZN8OSString18_RESERVEDOSString7Ev -__ZN8OSString18_RESERVEDOSString8Ev -__ZN8OSString18_RESERVEDOSString9Ev -__ZN8OSString19_RESERVEDOSString10Ev -__ZN8OSString19_RESERVEDOSString11Ev -__ZN8OSString19_RESERVEDOSString12Ev -__ZN8OSString19_RESERVEDOSString13Ev -__ZN8OSString19_RESERVEDOSString14Ev -__ZN8OSString19_RESERVEDOSString15Ev -__ZN8OSSymbol18_RESERVEDOSSymbol0Ev -__ZN8OSSymbol18_RESERVEDOSSymbol1Ev -__ZN8OSSymbol18_RESERVEDOSSymbol2Ev -__ZN8OSSymbol18_RESERVEDOSSymbol3Ev -__ZN8OSSymbol18_RESERVEDOSSymbol4Ev -__ZN8OSSymbol18_RESERVEDOSSymbol5Ev -__ZN8OSSymbol18_RESERVEDOSSymbol6Ev -__ZN8OSSymbol18_RESERVEDOSSymbol7Ev -__ZN9OSBoolean19_RESERVEDOSBoolean0Ev -__ZN9OSBoolean19_RESERVEDOSBoolean1Ev -__ZN9OSBoolean19_RESERVEDOSBoolean2Ev -__ZN9OSBoolean19_RESERVEDOSBoolean3Ev -__ZN9OSBoolean19_RESERVEDOSBoolean4Ev -__ZN9OSBoolean19_RESERVEDOSBoolean5Ev -__ZN9OSBoolean19_RESERVEDOSBoolean6Ev -__ZN9OSBoolean19_RESERVEDOSBoolean7Ev -_lck_mtx_convert_spin -_lck_mtx_lock_spin -_lck_mtx_try_lock_spin -_lck_mtx_unlock_darwin10 -_sprintf -_strcat -_strcpy -_vsprintf diff --git a/config/MACFramework.i386.exports b/config/MACFramework.i386.exports deleted file mode 100644 index aa74fd56a..000000000 --- a/config/MACFramework.i386.exports +++ /dev/null @@ -1,11 +0,0 @@ -_kau_will_audit -_mac_do_machexc -_mac_kalloc -_mac_kalloc_noblock -_mac_kfree -_mac_mbuf_alloc -_mac_mbuf_free -_mac_schedule_userret -_mac_unwire -_mac_wire -_sysctl__security_mac_children diff --git a/config/Mach.exports b/config/Mach.exports index eb47c7d62..acc7d4745 100644 --- a/config/Mach.exports +++ b/config/Mach.exports @@ -24,6 +24,7 @@ _lck_spin_sleep _lck_spin_sleep_deadline _mach_absolute_time _mach_msg_send_from_kernel_proper +_mach_vm_pressure_level_monitor _mach_vm_pressure_monitor _nanoseconds_to_absolutetime _preemption_enabled diff --git a/config/Mach.i386.exports b/config/Mach.i386.exports deleted file mode 100644 index 8b6d67bba..000000000 --- a/config/Mach.i386.exports +++ /dev/null @@ -1,4 +0,0 @@ -_host_vmxon -_host_vmxoff -_mach_msg_send_from_kernel -_semaphore_timedwait diff --git a/config/Makefile b/config/Makefile index 201cbee6c..02cec1c9f 100644 --- a/config/Makefile +++ b/config/Makefile @@ -3,71 +3,23 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - include $(MakeInc_cmd) include $(MakeInc_def) -ALL_SUBDIRS = - -INSTINC_SUBDIRS = -INSTINC_SUBDIRS_I386 = -INSTINC_SUBDIRS_X86_64 = -INSTINC_SUBDIRS_ARM = - -EXPINC_SUBDIRS = -EXPINC_SUBDIRS_I386 = -EXPINC_SUBDIRS_X86_64 = -EXPINC_SUBDIRS_ARM = - -COMP_SUBDIRS = +MD_SUPPORTED_KPI_FILENAME = SupportedKPIs-${CURRENT_ARCH_CONFIG_LC}.txt +MI_SUPPORTED_KPI_FILENAME = SupportedKPIs-all-archs.txt -INST_SUBDIRS = +INSTALL_KEXT_DIR = $(DSTROOT)$(INSTALL_EXTENSIONS_DIR) - -INSTALL_KEXT_PLIST_LIST= \ +KEXT_PLIST_LIST = \ System.kext/Info.plist \ - System.kext/PlugIns/Libkern.kext/Info.plist \ - System.kext/PlugIns/Mach.kext/Info.plist \ - System.kext/PlugIns/BSDKernel.kext/Info.plist \ - System.kext/PlugIns/MACFramework.kext/Info.plist \ - System.kext/PlugIns/IOKit.kext/Info.plist \ System.kext/PlugIns/AppleNMI.kext/Info.plist \ System.kext/PlugIns/ApplePlatformFamily.kext/Info.plist \ System.kext/PlugIns/IONVRAMFamily.kext/Info.plist \ - System.kext/PlugIns/IOSystemManagement.kext/Info.plist \ - System.kext/PlugIns/Unsupported.kext/Info.plist \ - System.kext/PlugIns/Private.kext/Info.plist - -INSTALL_KEXT_DIR = /System/Library/Extensions/ - - -MD_SUPPORTED_KPI_FILENAME="SupportedKPIs-${ARCH_CONFIG_LC}.txt" -MI_SUPPORTED_KPI_FILENAME="SupportedKPIs-all-archs.txt" - -ifneq ($(MACHINE_CONFIG), DEFAULT) -OBJPATH = $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG) -else -OBJPATH = $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG) -endif - -ifeq ($(ARCH_CONFIG),I386) -SUPPORT_SYSTEM60_KEXT = 1 -else -SUPPORT_SYSTEM60_KEXT = 0 -endif - -ifeq ($(SUPPORT_SYSTEM60_KEXT),1) -INSTALL_KEXT_PLIST_LIST += \ - System.kext/PlugIns/System6.0.kext/Info.plist \ - System.kext/PlugIns/Libkern6.0.kext/Info.plist \ - System.kext/PlugIns/Mach6.0.kext/Info.plist \ - System.kext/PlugIns/BSDKernel6.0.kext/Info.plist \ - System.kext/PlugIns/IOKit6.0.kext/Info.plist -endif + System.kext/PlugIns/IOSystemManagement.kext/Info.plist SYMBOL_COMPONENT_LIST = \ - System6.0 \ BSDKernel \ IOKit \ Libkern \ @@ -76,160 +28,123 @@ SYMBOL_COMPONENT_LIST = \ Unsupported \ Private -SYMBOL_SET_BUILD = $(foreach set, $(SYMBOL_COMPONENT_LIST), $(OBJPATH)/$(set).symbolset) -SYMBOL_SET_FAT = $(foreach set, $(SYMBOL_COMPONENT_LIST), $(OBJROOT)/$(set).symbolset) +KEXT_MACHO_LIST = $(foreach symbolset,$(filter-out Dummy,$(SYMBOL_COMPONENT_LIST)),System.kext/PlugIns/$(symbolset).kext/$(symbolset)) +KEXT_PLIST_LIST += $(foreach symbolset,$(filter-out Dummy,$(SYMBOL_COMPONENT_LIST)),System.kext/PlugIns/$(symbolset).kext/Info.plist) + +SYMROOT_INSTALL_KEXT_MACHO_FILES = $(addprefix $(SYMROOT)/,$(KEXT_MACHO_LIST)) +DSTROOT_INSTALL_KEXT_MACHO_FILES = $(addprefix $(INSTALL_KEXT_DIR)/,$(KEXT_MACHO_LIST)) + +SYMROOT_INSTALL_KEXT_PLISTS = $(addprefix $(SYMROOT)/,$(KEXT_PLIST_LIST)) +DSTROOT_INSTALL_KEXT_PLISTS = $(addprefix $(INSTALL_KEXT_DIR)/,$(KEXT_PLIST_LIST)) -INSTALL_KEXT_PLISTS = $(addprefix $(DSTROOT)$(INSTALL_KEXT_DIR), $(INSTALL_KEXT_PLIST_LIST)) +EXPORTS_FILES = $(foreach symbolset,$(SYMBOL_COMPONENT_LIST),$(symbolset).exports $(symbolset).$(CURRENT_ARCH_CONFIG_LC).exports) Unused.exports -$(INSTALL_KEXT_PLISTS): $(DSTROOT)$(INSTALL_KEXT_DIR)% : $(SOURCE)/% - @echo Install $< in $@ - $(_v)$(MKDIR) $(dir $@); \ - $(RM) $(RMFLAGS) $@; \ - $(INSTALL) $(DATA_INSTALL_FLAGS) $< $(dir $@) +SYMBOL_SET_BUILD = $(foreach symbolset, $(SYMBOL_COMPONENT_LIST), $(OBJPATH)/$(symbolset).symbolset) $(OBJPATH)/allsymbols: $(OBJPATH)/mach_kernel $(_v)$(NM) -gj $< > $@ -$(SYMBOL_SET_BUILD): $(OBJPATH)/%.symbolset : %.exports %.$(ARCH_CONFIG_LC).exports $(OBJPATH)/allsymbols - $(_v)if [ "$*" != System6.0 -o $(SUPPORT_SYSTEM60_KEXT) -eq 1 ]; then \ - $(KEXT_CREATE_SYMBOL_SET) \ - $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_)) \ - -import $(OBJPATH)/allsymbols \ - -export $*.exports \ - -export $*.$(ARCH_CONFIG_LC).exports \ - -output $@ $(_vstdout); \ - fi - $(_v)if [ ! -e $@ ]; then \ - printf "" > $@; \ - fi - -# When building the top-level install target for all architectures, aggregate the per-arch -# symbolsets for the primary machine configuration for each kernel/arch config -$(SYMBOL_SET_FAT): $(OBJROOT)/%.symbolset : - $(_v)per_arch_symbolsets=""; \ - for arch_config in $(INSTALL_ARCHS); \ - do \ - \ - my_counter=1; \ - found_arch=0; \ - for my_config in $(TARGET_CONFIGS_UC); \ - do \ - if [ $${my_counter} -eq 1 ] ; then \ - kernel_config=$${my_config}; \ - elif [ $${my_counter} -eq 2 ] ; then \ - if [ $${my_config} = $${arch_config} ]; then \ - found_arch=1; \ - fi; \ - else \ - if [ $${found_arch} -eq 1 ]; then \ - machine_config=$${my_config};\ - break; \ - fi; \ - my_counter=0; \ - fi; \ - my_counter=$$((my_counter + 1)); \ - done; \ - \ - if [ $${arch_config} = ARM ] ; then \ - if [ $${machine_config} = DEFAULT ] ; then \ - machine_config=$(DEFAULT_ARM_MACHINE_CONFIG); \ - fi; \ - fi; \ - if [ $${arch_config} = L4_ARM ] ; then \ - if [ $${machine_config} = DEFAULT ] ; then \ - machine_config=$(DEFAULT_L4_ARM_MACHINE_CONFIG); \ - fi; \ - fi; \ - if [ $${machine_config} = DEFAULT ] ; then \ - objpath=${OBJROOT}/$${kernel_config}_$${arch_config}; \ - else \ - objpath=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}; \ - fi; \ - if [ -s "$${objpath}/$*.symbolset" ]; then \ - per_arch_symbolsets="$${per_arch_symbolsets} $${objpath}/$*.symbolset"; \ - fi; \ - done; \ - if [ -n "$${per_arch_symbolsets}" ]; then \ - $(LIPO) $${per_arch_symbolsets} -create -output $@; \ - else \ - printf "" > $@; \ - fi - -build_symbol_sets: $(SYMBOL_SET_BUILD) $(OBJPATH)/allsymbols - $(_v)$(KEXT_CREATE_SYMBOL_SET) \ - $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_)) \ - $(foreach comp,$(filter-out System6.0 Private,$(SYMBOL_COMPONENT_LIST)), \ - -export $(SRCROOT)/$(COMPONENT)/$(comp).exports \ - -export $(SRCROOT)/$(COMPONENT)/$(comp).$(ARCH_CONFIG_LC).exports) \ - -import $(OBJPATH)/allsymbols \ - -output /dev/null $(_vstdout); - $(_v)$(KEXT_CREATE_SYMBOL_SET) \ - $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_)) \ - $(foreach comp,$(filter-out System6.0 Unsupported,$(SYMBOL_COMPONENT_LIST)), \ - -export $(SRCROOT)/$(COMPONENT)/$(comp).exports \ - -export $(SRCROOT)/$(COMPONENT)/$(comp).$(ARCH_CONFIG_LC).exports) \ - -import $(OBJPATH)/allsymbols \ - -output /dev/null $(_vstdout); - $(_v) $(SRCROOT)/$(COMPONENT)/list_supported.sh $(SRCROOT)/$(COMPONENT) $(ARCH_CONFIG_LC) $(OBJPATH)/${MD_SUPPORTED_KPI_FILENAME}; - $(_v)if [ -n `echo $${ARCH_CONFIGS%%\ *} | grep -i $(ARCH_CONFIG)` ]; \ - then \ - $(SRCROOT)/$(COMPONENT)/list_supported.sh $(SRCROOT)/$(COMPONENT) "ALL" $(OBJROOT)/${MI_SUPPORTED_KPI_FILENAME}; \ - fi - - -install_symbol_sets: $(SYMBOL_SET_FAT) $(SRCROOT)/config/MasterVersion $(INSTALL_KEXT_PLISTS) - $(_v)if [ -s "$(OBJROOT)/System6.0.symbolset" -a $(SUPPORT_SYSTEM60_KEXT) -eq 1 ]; then \ - install $(INSTALL_FLAGS) $(OBJROOT)/System6.0.symbolset $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/System6.0.kext/kernel.6.0; \ - install $(INSTALL_FLAGS) $(OBJROOT)/System6.0.symbolset $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/Mach6.0.kext/Mach6.0; \ - install $(INSTALL_FLAGS) $(OBJROOT)/System6.0.symbolset $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/BSDKernel6.0.kext/BSDKernel6.0; \ - install $(INSTALL_FLAGS) $(OBJROOT)/System6.0.symbolset $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/Libkern6.0.kext/Libkern6.0; \ - install $(INSTALL_FLAGS) $(OBJROOT)/System6.0.symbolset $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/IOKit6.0.kext/IOKit6.0; \ - fi - $(_v)if [ -s "$(OBJROOT)/BSDKernel.symbolset" ]; then \ - install $(INSTALL_FLAGS) $(OBJROOT)/BSDKernel.symbolset $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/BSDKernel.kext/BSDKernel; \ - install $(INSTALL_FLAGS) $(OBJROOT)/IOKit.symbolset $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/IOKit.kext/IOKit; \ - install $(INSTALL_FLAGS) $(OBJROOT)/Libkern.symbolset $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/Libkern.kext/Libkern; \ - install $(INSTALL_FLAGS) $(OBJROOT)/Mach.symbolset $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/Mach.kext/Mach; \ - install $(INSTALL_FLAGS) $(OBJROOT)/MACFramework.symbolset $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/MACFramework.kext/MACFramework; \ - install $(INSTALL_FLAGS) $(OBJROOT)/Unsupported.symbolset $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/Unsupported.kext/Unsupported; \ - install $(INSTALL_FLAGS) $(OBJROOT)/Private.symbolset $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/Private.kext/Private; \ - fi - $(_v)$(NEWVERS) $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/Info.plist \ - $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/AppleNMI.kext/Info.plist \ - $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/ApplePlatformFamily.kext/Info.plist \ - $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/BSDKernel.kext/Info.plist \ - $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/IOKit.kext/Info.plist \ - $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/IONVRAMFamily.kext/Info.plist \ - $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/IOSystemManagement.kext/Info.plist \ - $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/Libkern.kext/Info.plist \ - $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/Mach.kext/Info.plist \ - $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/MACFramework.kext/Info.plist \ - $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/Unsupported.kext/Info.plist \ - $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext/PlugIns/Private.kext/Info.plist; - $(_v)$(MKDIR) $(DSTROOT)/$(KRESDIR); - $(_v)install $(INSTALL_FLAGS) $(OBJPATH)/$(MD_SUPPORTED_KPI_FILENAME) $(DSTROOT)/$(KRESDIR); - $(_v)if [ -n `echo $${ARCH_CONFIGS%%\ *} | grep -i $(ARCH_CONFIG)` ]; then \ - install $(INSTALL_FLAGS) $(OBJROOT)/$(MI_SUPPORTED_KPI_FILENAME) $(DSTROOT)/$(KRESDIR); \ - fi - $(_v)$(MKDIR) $(SYMROOT) - $(_v)$(CP) -rf $(DSTROOT)/$(INSTALL_KEXT_DIR)/System.kext $(SYMROOT) - -do_build_all: build_symbol_sets - -do_build_install: install_symbol_sets - -EXPORTS_FILE_LIST = $(addprefix $(SOURCE)/,$(foreach set,$(SYMBOL_COMPONENT_LIST), $(set).exports $(set).$(ARCH_CONFIG_LC).exports)) -EXPORTS_FILE_LIST_NOSYSTEM60 = $(addprefix $(SOURCE)/,$(foreach set, $(filter-out System6.0,$(SYMBOL_COMPONENT_LIST)), $(set).exports $(set).$(ARCH_CONFIG_LC).exports)) - -# Does not include "whole-kernel" clients -build_mach_kernel_exports: - $(_v)if [ $(SUPPORT_SYSTEM60_KEXT) -eq 1 ]; then \ - $(SOURCE)/generate_linker_exports.sh $(OBJPATH)/kernel-kpi.exp \ - $(EXPORTS_FILE_LIST) || exit 1; \ - else \ - $(SOURCE)/generate_linker_exports.sh $(OBJPATH)/kernel-kpi.exp \ - $(EXPORTS_FILE_LIST_NOSYSTEM60) || exit 1; \ - fi; +$(SYMBOL_SET_BUILD): $(OBJPATH)/%.symbolset : %.exports %.$(CURRENT_ARCH_CONFIG_LC).exports $(OBJPATH)/allsymbols + @echo SYMBOLSET $* "($(CURRENT_ARCH_CONFIG_LC))" + $(_v)$(KEXT_CREATE_SYMBOL_SET) \ + $(ARCH_FLAGS_$(CURRENT_ARCH_CONFIG)) \ + -import $(OBJPATH)/allsymbols \ + -export $(SOURCE)/$*.exports \ + -export $(SOURCE)/$*.$(CURRENT_ARCH_CONFIG_LC).exports \ + -output $@ $(_vstdout) + +.PHONY: check_all_exports + +check_all_exports: $(OBJPATH)/allsymbols + $(_v)$(KEXT_CREATE_SYMBOL_SET) \ + $(ARCH_FLAGS_$(CURRENT_ARCH_CONFIG)) \ + -import $(OBJPATH)/allsymbols \ + $(foreach symbolset,$(filter-out Private,$(SYMBOL_COMPONENT_LIST)), \ + -export $(SOURCE)/$(symbolset).exports \ + -export $(SOURCE)/$(symbolset).$(CURRENT_ARCH_CONFIG_LC).exports) \ + -output /dev/null $(_vstdout) + $(_v)$(KEXT_CREATE_SYMBOL_SET) \ + $(ARCH_FLAGS_$(CURRENT_ARCH_CONFIG)) \ + -import $(OBJPATH)/allsymbols \ + $(foreach symbolset,$(filter-out Unsupported,$(SYMBOL_COMPONENT_LIST)), \ + -export $(SOURCE)/$(symbolset).exports \ + -export $(SOURCE)/$(symbolset).$(CURRENT_ARCH_CONFIG_LC).exports) \ + -output /dev/null $(_vstdout) + +$(OBJPATH)/$(MD_SUPPORTED_KPI_FILENAME): $(EXPORTS_FILES) + @echo SUPPORTED_KPI "($(CURRENT_ARCH_CONFIG_LC))" + $(_v)$(SRCROOT)/config/list_supported.sh $(SOURCE) $(CURRENT_ARCH_CONFIG_LC) $@ + +$(OBJPATH)/$(MI_SUPPORTED_KPI_FILENAME): $(EXPORTS_FILES) + @echo SUPPORTED_KPI "(all)" + $(_v)$(SRCROOT)/config/list_supported.sh $(SOURCE) all $@ + +build_symbol_sets: check_all_exports $(SYMBOL_SET_BUILD) $(OBJPATH)/allsymbols \ + $(OBJPATH)/$(MD_SUPPORTED_KPI_FILENAME) \ + $(OBJPATH)/$(MI_SUPPORTED_KPI_FILENAME) + +do_config_all:: build_symbol_sets + +# There's no simple static pattern rule for these paths, so hardcode dependencies in the command list +$(SYMROOT_INSTALL_KEXT_MACHO_FILES): ALWAYS + $(_v)$(MKDIR) $(dir $@) + $(_v)if [ $(OBJROOT)/.symbolset.timestamp -nt $@ ]; then \ + echo INSTALLSYM symbolset $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC))"; \ + $(INSTALL) $(EXEC_INSTALL_FLAGS) $(OBJPATH)/$(@F).symbolset $@; \ + cmdstatus=$$?; \ + else \ + echo INSTALLSYM symbolset $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC))"; \ + $(LIPO) -create $@ $(OBJPATH)/$(@F).symbolset -output $@ 2>/dev/null || true; \ + cmdstatus=$$?; \ + fi; \ + exit $$cmdstatus + +.PHONY: symroot_kext_plists + +symroot_kext_plists: $(SYMROOT_INSTALL_KEXT_PLISTS) + +$(SYMROOT_INSTALL_KEXT_PLISTS): $(SYMROOT)/% : $(SOURCE)/% + $(_v)$(MKDIR) $(dir $@) + @echo INSTALLSYM kextplist $* + $(_v)$(INSTALL) $(DATA_INSTALL_FLAGS) $< $@ + $(_v)$(NEWVERS) $@ $(_vstdout) + +.PHONY: dstroot_kext_plists + +dstroot_kext_plists: $(DSTROOT_INSTALL_KEXT_PLISTS) + +$(DSTROOT_INSTALL_KEXT_PLISTS): $(INSTALL_KEXT_DIR)/% : $(SYMROOT)/% + $(_v)$(MKDIR) $(dir $@) + @echo INSTALL kextplist $* + $(_v)$(INSTALL) $(DATA_INSTALL_FLAGS) $< $@ + +$(DSTROOT_INSTALL_KEXT_MACHO_FILES): $(INSTALL_KEXT_DIR)/% : $(SYMROOT)/% + $(_v)$(MKDIR) $(dir $@) + @echo INSTALL $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC))" + $(_v)$(INSTALL) $(EXEC_INSTALL_FLAGS) $< $@ + +$(DSTROOT)/$(KRESDIR)/$(MD_SUPPORTED_KPI_FILENAME) $(DSTROOT)/$(KRESDIR)/$(MI_SUPPORTED_KPI_FILENAME): $(DSTROOT)/$(KRESDIR)/% : $(OBJPATH)/% + $(_v)$(MKDIR) $(dir $@) + @echo INSTALL $* + $(_v)$(INSTALL) $(INSTALL_FLAGS) $< $@ + +install_symbol_sets: $(SYMROOT_INSTALL_KEXT_MACHO_FILES) \ + symroot_kext_plists \ + $(DSTROOT_INSTALL_KEXT_MACHO_FILES) \ + dstroot_kext_plists \ + $(DSTROOT)/$(KRESDIR)/$(MD_SUPPORTED_KPI_FILENAME) \ + $(DSTROOT)/$(KRESDIR)/$(MI_SUPPORTED_KPI_FILENAME) + +do_build_install_primary:: install_symbol_sets + +$(OBJPATH)/kernel-kpi.exp: $(EXPORTS_FILES) + $(_v)$(SOURCE)/generate_linker_exports.sh $@ $+ + +.PHONY: build_mach_kernel_exports + +build_mach_kernel_exports: $(OBJPATH)/kernel-kpi.exp + +do_build_all:: build_mach_kernel_exports include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/config/MasterVersion b/config/MasterVersion index bf00b8e5c..013405f61 100644 --- a/config/MasterVersion +++ b/config/MasterVersion @@ -1,4 +1,4 @@ -12.5.0 +13.0.0 # The first line of this file contains the master version number for the kernel. # All other instances of the kernel version in xnu are derived from this file. diff --git a/config/Private.exports b/config/Private.exports index 3bfacfc39..96bd0eb11 100644 --- a/config/Private.exports +++ b/config/Private.exports @@ -10,6 +10,8 @@ __ZTV5IOCPU __ZN24IOCPUInterruptController* __ZNK24IOCPUInterruptController* __ZTV24IOCPUInterruptController +_assert_wait_timeout_with_leeway +_assert_wait_deadline_with_leeway _b_to_q _bdevsw _boot @@ -18,6 +20,7 @@ _bsd_hostname _bsd_set_dependency_capable _buf_attr _buf_create_shadow +_buf_kernel_addrperm_addr _buf_setfilter _buf_shadow _bufattr_meta @@ -31,10 +34,16 @@ _chudxnu_thread_set_dirty _clalloc _clfree _cons_cinput +_convert_port_to_task_suspension_token +_convert_task_suspension_token_to_port _convert_task_to_port _cp_key_store_action _cp_register_wraps +_cs_enforcement _cs_entitlements_blob_get +_cs_identity_get +_cs_get_cdhash +_cs_register_cscsr _ctl_id_by_name _ctl_name_by_id _fd_rdwr @@ -44,12 +53,19 @@ _ifnet_allocate_extended _ifnet_bandwidths _ifnet_clone_attach _ifnet_clone_detach -_ifnet_get_local_ports _ifnet_dequeue _ifnet_dequeue_service_class _ifnet_dequeue_multi _ifnet_dequeue_service_class_multi +_ifnet_disable_output +_ifnet_enable_output _ifnet_enqueue +_ifnet_flowid +_ifnet_get_delegate +_ifnet_get_local_ports +_ifnet_get_local_ports_extended +_ifnet_get_inuse_address_list +_ifnet_get_service_class_sndq_len _ifnet_get_sndq_len _ifnet_get_rcvq_maxlen _ifnet_get_sndq_maxlen @@ -57,27 +73,35 @@ _ifnet_idle_flags _ifnet_inet_defrouter_llreachinfo _ifnet_inet6_defrouter_llreachinfo _ifnet_input_extended +_ifnet_latencies _ifnet_link_quality _ifnet_notice_node_presence _ifnet_notice_node_absence _ifnet_notice_master_elected +_ifnet_poll_params _ifnet_purge +_ifnet_report_issues _ifnet_set_bandwidths +_ifnet_set_delegate _ifnet_set_idle_flags +_ifnet_set_latencies _ifnet_set_link_quality _ifnet_set_output_sched_model +_ifnet_set_poll_params _ifnet_set_rcvq_maxlen _ifnet_set_sndq_maxlen _ifnet_start +_ifnet_subfamily _ifnet_transmit_burst_start _ifnet_transmit_burst_end +_ifnet_tx_compl_status +_in6_localaddr _in6addr_local +_in_localaddr _inaddr_local -_inet_domain_mutex _inp_clear_INP_INADDR_ANY -_ip_mutex -_ip_output -_ip_protox +_ip_gre_output +_ip_gre_register_input _ipc_port_release_send _kauth_cred_getgroups _kauth_cred_guid2grnam @@ -103,21 +127,33 @@ _m_pullup _m_split _m_trailingspace:_mbuf_trailingspace _mac_proc_set_enforce +_mach_vm_allocate +_mach_vm_deallocate +_mach_vm_map +_mach_vm_protect +_mbuf_add_drvaux +_mbuf_find_drvaux +_mbuf_del_drvaux +_mbuf_get_driver_scratch _mbuf_get_priority:_mbuf_get_traffic_class _mbuf_get_service_class +_mbuf_get_service_class_index +_mbuf_get_service_class_max_count +_mbuf_get_traffic_class_index +_mbuf_get_traffic_class_max_count _mbuf_is_service_class_privileged:_mbuf_is_traffic_class_privileged _mbuf_pkthdr_aux_flags _mcl_to_paddr _mountroot_post_hook -_net_add_domain -_net_add_proto -_net_del_domain -_net_del_proto +_net_add_domain:_net_add_domain_old +_net_add_proto:_net_add_proto_old +_net_del_domain:_net_del_domain_old +_net_del_proto:_net_del_proto_old _netboot_root _perf_monitor_register_* _perf_monitor_unregister -_pffinddomain -_pffindproto +_pffinddomain:_pffinddomain_old +_pffindproto:_pffindproto_old _pmc_config_set_interrupt_threshold _pmc_config_set_value _pmc_create_config @@ -140,7 +176,9 @@ _pmc_unregister _post_sys_powersource _port_name_to_task _port_name_to_thread +_proc_pidbackgrounded _proc_task +_proc_uniqueid _pru_abort_notsupp _pru_accept_notsupp _pru_bind_notsupp @@ -156,12 +194,12 @@ _pru_sense_null _pru_shutdown_notsupp _pru_sockaddr_notsupp _pru_sopoll_notsupp +_pthread_kext_register _q_to_b _register_crypto_functions _register_decmpfs_decompressor _rootdev _rootvp -_rtfree _sbappendaddr _sbappendrecord _sbflush @@ -169,10 +207,12 @@ _sbspace _soabort _socantrcvmore _socantsendmore +_sock_catchevents _sock_getlistener _sock_release _sock_retain _sock_setupcall +_sock_setupcalls _sodisconnect _sofree _sofreelastref @@ -188,6 +228,7 @@ _soreceive _soreserve _sorwakeup _sosend +_strnstr _termioschars _thread_call_allocate_with_priority _thread_call_cancel_wait @@ -207,6 +248,7 @@ _throttle_info_io_will_be_throttled _throttle_lowpri_io _throttle_set_thread_io_policy _timeout +_timeout_with_leeway _tk_nin _tk_rawcc _tsleep @@ -229,6 +271,9 @@ _unmountroot_pre_hook _unputc _unregister_decmpfs_decompressor _untimeout +_utun_ctl_register_dtls +_utun_pkt_dtls_input +_utun_ctl_disable_crypto_dtls _vnode_isdyldsharedcache _vnode_ismonitored _vnode_istty @@ -247,15 +292,22 @@ _vfs_getattr _vfs_getbyid _vfs_get_notify_attributes _vfs_mntlabel +_vfs_nativexattrs _vfs_setcompoundopen _vfs_setunmountpreflight _vfs_throttle_mask _vfs_vnodecovered +_vm_fault _vm_map_copy_copy _vm_map_copy_discard _vm_map_copyin _vm_map_copyin_common _vm_map_copyout +_vm_map_page_mask +_vm_map_page_shift +_vm_map_page_size +_vm_map_round_page_mask +_vm_map_trunc_page_mask _vn_getpath_fsenter _vn_searchfs_inappropriate_name _vnode_lookup_continue_needed diff --git a/config/Private.i386.exports b/config/Private.i386.exports deleted file mode 100644 index 80e66dfe9..000000000 --- a/config/Private.i386.exports +++ /dev/null @@ -1,38 +0,0 @@ -_IOGetBootKeyStoreData -_SHA256_Final -_SHA256_Init -_SHA256_Update -__ZN22IOInterruptEventSource7warmCPUEy -_acpi_install_wake_handler -_acpi_sleep_kernel -_add_fsevent -_apic_table -_apply_func_phys -_bufattr_delayidlesleep -_cpu_to_lapic -_cpuid_features -_cpuid_info -_lapic_end_of_interrupt -_lapic_unmask_perfcnt_interrupt -_mp_broadcast -_mp_cpus_call -_mp_cpus_call1 -_need_fsevent -_pal_efi_call_in_32bit_mode -_pal_efi_call_in_64bit_mode -_pal_machine_sleep -_smp_initialized -_vfs_addtrigger -_vfs_istraditionaltrigger -_vfs_resolver_auxiliary -_vfs_resolver_result -_vfs_resolver_sequence -_vfs_resolver_status -_vfs_settriggercallback -_vnode_trigger_update -_xts_decrypt -_xts_done -_xts_encrypt -_xts_start -_aes_decrypt -_PE_reboot_on_panic diff --git a/config/System.kext/PlugIns/BSDKernel6.0.kext/Info.plist b/config/System.kext/PlugIns/BSDKernel6.0.kext/Info.plist deleted file mode 100644 index 95393ca8d..000000000 --- a/config/System.kext/PlugIns/BSDKernel6.0.kext/Info.plist +++ /dev/null @@ -1,34 +0,0 @@ - - - - - CFBundleDevelopmentRegion - English - CFBundleExecutable - BSDKernel6.0 - CFBundleGetInfoString - BSD Kernel Pseudoextension, Apple Computer Inc, 7.9.9 - CFBundleIdentifier - com.apple.kernel.bsd - CFBundleInfoDictionaryVersion - 6.0 - CFBundleName - BSD Kernel Pseudoextension - CFBundlePackageType - KEXT - CFBundleShortVersionString - 7.9.9 - CFBundleSignature - ???? - CFBundleVersion - 7.9.9 - OSBundleCompatibleVersion - 1.1 - OSBundleRequired - Root - OSKernelResource - - OSBundleAllowUserLoad - - - diff --git a/config/System.kext/PlugIns/IOKit6.0.kext/Info.plist b/config/System.kext/PlugIns/IOKit6.0.kext/Info.plist deleted file mode 100644 index 34a0cd3d7..000000000 --- a/config/System.kext/PlugIns/IOKit6.0.kext/Info.plist +++ /dev/null @@ -1,34 +0,0 @@ - - - - - CFBundleDevelopmentRegion - English - CFBundleExecutable - IOKit6.0 - CFBundleGetInfoString - I/O Kit Pseudoextension, Apple Computer Inc, 7.9.9 - CFBundleIdentifier - com.apple.kernel.iokit - CFBundleInfoDictionaryVersion - 6.0 - CFBundleName - I/O Kit Pseudoextension - CFBundlePackageType - KEXT - CFBundleShortVersionString - 7.9.9 - CFBundleSignature - ???? - CFBundleVersion - 7.9.9 - OSBundleCompatibleVersion - 1.0.0b1 - OSBundleRequired - Root - OSKernelResource - - OSBundleAllowUserLoad - - - diff --git a/config/System.kext/PlugIns/Libkern6.0.kext/Info.plist b/config/System.kext/PlugIns/Libkern6.0.kext/Info.plist deleted file mode 100644 index 8014a6c03..000000000 --- a/config/System.kext/PlugIns/Libkern6.0.kext/Info.plist +++ /dev/null @@ -1,34 +0,0 @@ - - - - - CFBundleDevelopmentRegion - English - CFBundleExecutable - Libkern6.0 - CFBundleGetInfoString - Libkern Pseudoextension, Apple Computer Inc, 7.9.9 - CFBundleIdentifier - com.apple.kernel.libkern - CFBundleInfoDictionaryVersion - 6.0 - CFBundleName - Libkern Pseudoextension - CFBundlePackageType - KEXT - CFBundleShortVersionString - 7.9.9 - CFBundleSignature - ???? - CFBundleVersion - 7.9.9 - OSBundleCompatibleVersion - 1.0.0b1 - OSBundleRequired - Root - OSKernelResource - - OSBundleAllowUserLoad - - - diff --git a/config/System.kext/PlugIns/Mach6.0.kext/Info.plist b/config/System.kext/PlugIns/Mach6.0.kext/Info.plist deleted file mode 100644 index c46200211..000000000 --- a/config/System.kext/PlugIns/Mach6.0.kext/Info.plist +++ /dev/null @@ -1,34 +0,0 @@ - - - - - CFBundleDevelopmentRegion - English - CFBundleExecutable - Mach6.0 - CFBundleGetInfoString - Mach Kernel Pseudoextension, Apple Computer Inc, 7.9.9 - CFBundleIdentifier - com.apple.kernel.mach - CFBundleInfoDictionaryVersion - 6.0 - CFBundleName - Mach Kernel Pseudoextension - CFBundlePackageType - KEXT - CFBundleShortVersionString - 7.9.9 - CFBundleSignature - ???? - CFBundleVersion - 7.9.9 - OSBundleCompatibleVersion - 1.0.0b1 - OSBundleRequired - Root - OSKernelResource - - OSBundleAllowUserLoad - - - diff --git a/config/System.kext/PlugIns/System6.0.kext/Info.plist b/config/System.kext/PlugIns/System6.0.kext/Info.plist deleted file mode 100644 index 24f566b70..000000000 --- a/config/System.kext/PlugIns/System6.0.kext/Info.plist +++ /dev/null @@ -1,34 +0,0 @@ - - - - - CFBundleDevelopmentRegion - English - CFBundleExecutable - kernel.6.0 - CFBundleGetInfoString - System Resource Pseudoextension, Apple Inc, 7.9.9 - CFBundleIdentifier - com.apple.kernel.6.0 - CFBundleInfoDictionaryVersion - 6.0 - CFBundleName - System Resource Pseudoextension - CFBundlePackageType - KEXT - CFBundleShortVersionString - 7.9.9 - CFBundleSignature - ???? - CFBundleVersion - 7.9.9 - OSBundleCompatibleVersion - 7.9.9 - OSBundleRequired - Root - OSKernelResource - - OSBundleAllowUserLoad - - - diff --git a/config/System6.0.exports b/config/System6.0.exports deleted file mode 100644 index 2749c3ad2..000000000 --- a/config/System6.0.exports +++ /dev/null @@ -1,3190 +0,0 @@ -_DTCreateEntryIterator -_DTCreatePropertyIterator -_DTDisposeEntryIterator -_DTDisposePropertyIterator -_DTEnterEntry -_DTEntryIsEqual -_DTExitEntry -_DTFindEntry -_DTGetProperty -_DTInit -_DTIterateEntries -_DTIterateProperties -_DTLookupEntry -_DTRestartEntryIteration -_DTRestartPropertyIteration -_Debugger -_IOAlignmentToSize -_IOBSDNameMatching -_IOBSDRegistryEntryForDeviceTree -_IOBSDRegistryEntryGetData -_IOBSDRegistryEntryRelease -_IOCreateThread -_IODTFreeLoaderInfo -_IODTGetLoaderInfo -_IODelay -_IOExitThread -_IOFindBSDRoot -_IOFindNameForValue -_IOFindValueForName -_IOFlushProcessorCache -_IOFree -_IOFreeAligned -_IOFreeContiguous -_IOFreePageable -_IOGetTime -_IOIteratePageableMaps -_IOKitBSDInit -_IOKitResetTime -_IOLibInit -_IOLockAlloc -_IOLockFree -_IOLockGetMachLock -_IOLockInitWithState -_IOLockLock:_lck_mtx_lock -_IOLockSleep -_IOLockSleepDeadline -_IOLockTryLock:_lck_mtx_try_lock -_IOLockUnlock:_lck_mtx_unlock -_IOLockWakeup -_IOLog -_IOMalloc -_IOMallocAligned -_IOMallocContiguous -_IOMallocPageable -_IOMappedRead16 -_IOMappedRead32 -_IOMappedRead64 -_IOMappedRead8 -_IOMappedWrite16 -_IOMappedWrite32 -_IOMappedWrite64 -_IOMappedWrite8 -_IOMapperIOVMAlloc -_IOMapperIOVMFree -_IOMapperInsertPPNPages -_IOMapperInsertPage -_IOMapperInsertUPLPages -_IONDRVLibrariesInitialize -_IONetworkNamePrefixMatching -_IOOFPathMatching -_IOPageableMapForAddress -_IOPanic -_IOPrintPlane -_IORWLockAlloc -_IORWLockFree -_IORWLockGetMachLock -_IORWLockRead:_lck_rw_lock_shared -_IORWLockUnlock:_lck_rw_done -_IORWLockWrite:_lck_rw_lock_exclusive -_IORecursiveLockAlloc -_IORecursiveLockFree -_IORecursiveLockGetMachLock -_IORecursiveLockHaveLock -_IORecursiveLockLock -_IORecursiveLockSleep -_IORecursiveLockTryLock -_IORecursiveLockUnlock -_IORecursiveLockWakeup -_IOSetProcessorCacheMode -_IOSimpleLockAlloc -_IOSimpleLockFree -_IOSimpleLockGetMachLock -_IOSimpleLockInit -_IOSimpleLockLock:_lck_spin_lock -_IOSimpleLockTryLock:_lck_spin_try_lock -_IOSimpleLockUnlock:_lck_spin_unlock -_IOSizeToAlignment -_IOSleep -_IOSystemShutdownNotification -_IOZeroTvalspec -_KUNCExecute -_KUNCGetNotificationID -_KUNCUserNotificationDisplayAlert -_KUNCUserNotificationDisplayFromBundle -_KUNCUserNotificationDisplayNotice -_MD5Final -_MD5Init -_MD5Update -_OSAddAtomic -_OSAddAtomic16 -_OSAddAtomic8 -_OSBitAndAtomic -_OSBitAndAtomic16 -_OSBitAndAtomic8 -_OSBitOrAtomic -_OSBitOrAtomic16 -_OSBitOrAtomic8 -_OSBitXorAtomic -_OSBitXorAtomic16 -_OSBitXorAtomic8 -_OSCompareAndSwap -_OSDecrementAtomic -_OSDecrementAtomic16 -_OSDecrementAtomic8 -_OSIncrementAtomic -_OSIncrementAtomic16 -_OSIncrementAtomic8 -_OSKernelStackRemaining -_OSPrintMemory -_OSRuntimeFinalizeCPP -_OSRuntimeInitializeCPP -_OSRuntimeUnloadCPP -_OSRuntimeUnloadCPPForSegment -_OSTestAndClear -_OSTestAndSet -_OSUnserializechar -_OSUnserializelval -_OSUnserializenerrs -_OSlibkernInit -_PEGetGMTTimeOfDay -_PEGetMachineName -_PEGetModelName -_PEGetPlatformEpoch -_PEHaltRestart -_PESavePanicInfo -_PESetGMTTimeOfDay -_PE_boot_args -_PE_call_timebase_callback -_PE_cpu_halt -_PE_cpu_machine_init -_PE_cpu_machine_quiesce -_PE_cpu_signal -_PE_cpu_start -_PE_create_console -_PE_current_console -_PE_display_icon -_PE_enter_debugger -_PE_get_hotkey -_PE_halt_restart -_PE_init_iokit -_PE_init_kprintf -_PE_init_platform -_PE_init_printf -_PE_initialize_console -_PE_kputc -_PE_poll_input -_PE_putc -_PE_register_timebase_callback -_PE_state -_StartIOKit -__Z13OSUnserializePKcPP8OSString -__Z16IOCPUSleepKernelv -__Z16IODTFindSlotNameP15IORegistryEntrym -__Z16IODTSetResolvingP15IORegistryEntryPFlmPmS1_EPFvS0_PhS4_S4_E -__Z16OSUnserializeXMLPKcPP8OSString -__Z17IODTGetCellCountsP15IORegistryEntryPmS1_ -__Z17IODTMapInterruptsP15IORegistryEntry -__Z17IODeviceTreeAllocPv -__Z17IOServiceOrderingPK15OSMetaClassBaseS1_Pv -__Z18IODTCompareNubNamePK15IORegistryEntryP8OSStringPS3_ -__Z19printDictionaryKeysP12OSDictionaryPc -__Z20IODTMakeNVDescriptorP15IORegistryEntryP17IONVRAMDescriptor -__Z20IODTMatchNubWithKeysP15IORegistryEntryPKc -__Z21IODTResolveAddressingP15IORegistryEntryPKcP14IODeviceMemory -__Z22IODTResolveAddressCellP15IORegistryEntryPmS1_S1_ -__Z23IODTFindMatchingEntriesP15IORegistryEntrymPKc -__Z27IODTInterruptControllerNameP15IORegistryEntry -__ZN10IOMachPort10gMetaClassE -__ZN10IOMachPort10superClassE -__ZN10IOMachPort11dictForTypeEj -__ZN10IOMachPort13portForObjectEP8OSObjectj -__ZN10IOMachPort14setHoldDestroyEP8OSObjectj -__ZN10IOMachPort20makeSendRightForTaskEP4taskP8OSObjectj -__ZN10IOMachPort20releasePortForObjectEP8OSObjectj -__ZN10IOMachPort22noMoreSendersForObjectEP8OSObjectjPj -__ZN10IOMachPort4freeEv -__ZN10IOMachPort9MetaClassC1Ev -__ZN10IOMachPort9MetaClassC2Ev -__ZN10IOMachPort9metaClassE -__ZN10IOMachPortC1EPK11OSMetaClass -__ZN10IOMachPortC1Ev -__ZN10IOMachPortC2EPK11OSMetaClass -__ZN10IOMachPortC2Ev -__ZN10IOMachPortD0Ev -__ZN10IOMachPortD2Ev -__ZN10IONotifier10gMetaClassE -__ZN10IONotifier10superClassE -__ZN10IONotifier9MetaClassC1Ev -__ZN10IONotifier9MetaClassC2Ev -__ZN10IONotifier9metaClassE -__ZN10IONotifierC1EPK11OSMetaClass -__ZN10IONotifierC2EPK11OSMetaClass -__ZN10IONotifierD0Ev -__ZN10IONotifierD2Ev -__ZN10IOWorkLoop10gMetaClassE -__ZN10IOWorkLoop10superClassE -__ZN10IOWorkLoop10threadMainEv -__ZN10IOWorkLoop10wakeupGateEPvb -__ZN10IOWorkLoop12tryCloseGateEv -__ZN10IOWorkLoop13_maintRequestEPvS0_S0_S0_ -__ZN10IOWorkLoop14addEventSourceEP13IOEventSource -__ZN10IOWorkLoop15runEventSourcesEv -__ZN10IOWorkLoop17removeEventSourceEP13IOEventSource -__ZN10IOWorkLoop19signalWorkAvailableEv -__ZN10IOWorkLoop20_RESERVEDIOWorkLoop3Ev -__ZN10IOWorkLoop20_RESERVEDIOWorkLoop4Ev -__ZN10IOWorkLoop20_RESERVEDIOWorkLoop5Ev -__ZN10IOWorkLoop20_RESERVEDIOWorkLoop6Ev -__ZN10IOWorkLoop20_RESERVEDIOWorkLoop7Ev -__ZN10IOWorkLoop4freeEv -__ZN10IOWorkLoop4initEv -__ZN10IOWorkLoop8openGateEv -__ZN10IOWorkLoop8workLoopEv -__ZN10IOWorkLoop9MetaClassC1Ev -__ZN10IOWorkLoop9MetaClassC2Ev -__ZN10IOWorkLoop9closeGateEv -__ZN10IOWorkLoop9metaClassE -__ZN10IOWorkLoop9runActionEPFiP8OSObjectPvS2_S2_S2_ES1_S2_S2_S2_S2_ -__ZN10IOWorkLoop9sleepGateEPv12UnsignedWidem -__ZN10IOWorkLoop9sleepGateEPvm -__ZN10IOWorkLoopC1EPK11OSMetaClass -__ZN10IOWorkLoopC1Ev -__ZN10IOWorkLoopC2EPK11OSMetaClass -__ZN10IOWorkLoopC2Ev -__ZN10IOWorkLoopD0Ev -__ZN10IOWorkLoopD2Ev -__ZN10OSIterator10gMetaClassE -__ZN10OSIterator10superClassE -__ZN10OSIterator20_RESERVEDOSIterator0Ev -__ZN10OSIterator20_RESERVEDOSIterator1Ev -__ZN10OSIterator20_RESERVEDOSIterator2Ev -__ZN10OSIterator20_RESERVEDOSIterator3Ev -__ZN10OSIterator9MetaClassC1Ev -__ZN10OSIterator9MetaClassC2Ev -__ZN10OSIterator9metaClassE -__ZN10OSIteratorC1EPK11OSMetaClass -__ZN10OSIteratorC2EPK11OSMetaClass -__ZN10OSIteratorD0Ev -__ZN10OSIteratorD2Ev -__ZN11IOCatalogue10addDriversEP7OSArrayb -__ZN11IOCatalogue10gMetaClassE -__ZN11IOCatalogue10initializeEv -__ZN11IOCatalogue10superClassE -__ZN11IOCatalogue11findDriversEP12OSDictionaryPl -__ZN11IOCatalogue11findDriversEP9IOServicePl -__ZN11IOCatalogue13removeDriversEP12OSDictionaryb -__ZN11IOCatalogue13startMatchingEP12OSDictionary -__ZN11IOCatalogue15moduleHasLoadedEP8OSString -__ZN11IOCatalogue15moduleHasLoadedEPKc -__ZN11IOCatalogue16terminateDriversEP12OSDictionary -__ZN11IOCatalogue25terminateDriversForModuleEP8OSStringb -__ZN11IOCatalogue25terminateDriversForModuleEPKcb -__ZN11IOCatalogue4freeEv -__ZN11IOCatalogue4initEP7OSArray -__ZN11IOCatalogue5resetEv -__ZN11IOCatalogue9MetaClassC1Ev -__ZN11IOCatalogue9MetaClassC2Ev -__ZN11IOCatalogue9metaClassE -__ZN11IOCatalogueC1EPK11OSMetaClass -__ZN11IOCatalogueC1Ev -__ZN11IOCatalogueC2EPK11OSMetaClass -__ZN11IOCatalogueC2Ev -__ZN11IOCatalogueD0Ev -__ZN11IOCatalogueD2Ev -__ZN11IODataQueue10gMetaClassE -__ZN11IODataQueue10superClassE -__ZN11IODataQueue11withEntriesEmm -__ZN11IODataQueue12withCapacityEm -__ZN11IODataQueue15initWithEntriesEmm -__ZN11IODataQueue16initWithCapacityEm -__ZN11IODataQueue19getMemoryDescriptorEv -__ZN11IODataQueue19setNotificationPortEP8ipc_port -__ZN11IODataQueue29sendDataAvailableNotificationEv -__ZN11IODataQueue4freeEv -__ZN11IODataQueue7enqueueEPvm -__ZN11IODataQueue9MetaClassC1Ev -__ZN11IODataQueue9MetaClassC2Ev -__ZN11IODataQueue9metaClassE -__ZN11IODataQueueC1EPK11OSMetaClass -__ZN11IODataQueueC1Ev -__ZN11IODataQueueC2EPK11OSMetaClass -__ZN11IODataQueueC2Ev -__ZN11IODataQueueD0Ev -__ZN11IODataQueueD2Ev -__ZN11IOMemoryMap10gMetaClassE -__ZN11IOMemoryMap10getAddressEv -__ZN11IOMemoryMap10superClassE -__ZN11IOMemoryMap13getMapOptionsEv -__ZN11IOMemoryMap14getAddressTaskEv -__ZN11IOMemoryMap17getVirtualAddressEv -__ZN11IOMemoryMap18getPhysicalAddressEv -__ZN11IOMemoryMap18getPhysicalSegmentEmPm -__ZN11IOMemoryMap19getMemoryDescriptorEv -__ZN11IOMemoryMap21_RESERVEDIOMemoryMap0Ev -__ZN11IOMemoryMap21_RESERVEDIOMemoryMap1Ev -__ZN11IOMemoryMap21_RESERVEDIOMemoryMap2Ev -__ZN11IOMemoryMap21_RESERVEDIOMemoryMap3Ev -__ZN11IOMemoryMap21_RESERVEDIOMemoryMap4Ev -__ZN11IOMemoryMap21_RESERVEDIOMemoryMap5Ev -__ZN11IOMemoryMap21_RESERVEDIOMemoryMap6Ev -__ZN11IOMemoryMap21_RESERVEDIOMemoryMap7Ev -__ZN11IOMemoryMap5unmapEv -__ZN11IOMemoryMap7getSizeEv -__ZN11IOMemoryMap9MetaClassC1Ev -__ZN11IOMemoryMap9MetaClassC2Ev -__ZN11IOMemoryMap9getLengthEv -__ZN11IOMemoryMap9metaClassE -__ZN11IOMemoryMapC1EPK11OSMetaClass -__ZN11IOMemoryMapC1Ev -__ZN11IOMemoryMapC2EPK11OSMetaClass -__ZN11IOMemoryMapC2Ev -__ZN11IOMemoryMapD0Ev -__ZN11IOMemoryMapD2Ev -__ZN11IOResources10gMetaClassE -__ZN11IOResources10superClassE -__ZN11IOResources13setPropertiesEP8OSObject -__ZN11IOResources18matchPropertyTableEP12OSDictionary -__ZN11IOResources9MetaClassC1Ev -__ZN11IOResources9MetaClassC2Ev -__ZN11IOResources9metaClassE -__ZN11IOResources9resourcesEv -__ZN11IOResourcesC1EPK11OSMetaClass -__ZN11IOResourcesC1Ev -__ZN11IOResourcesC2EPK11OSMetaClass -__ZN11IOResourcesC2Ev -__ZN11IOResourcesD0Ev -__ZN11IOResourcesD2Ev -__ZN11OSMetaClass10preModLoadEPKc -__ZN11OSMetaClass11postModLoadEPv -__ZN11OSMetaClass12checkModLoadEPv -__ZN11OSMetaClass14modHasInstanceEPKc -__ZN11OSMetaClass15considerUnloadsEv -__ZN11OSMetaClass18allocClassWithNameEPK8OSString -__ZN11OSMetaClass18allocClassWithNameEPK8OSSymbol -__ZN11OSMetaClass18allocClassWithNameEPKc -__ZN11OSMetaClass18getClassDictionaryEv -__ZN11OSMetaClass18reportModInstancesEPKc -__ZN11OSMetaClass19printInstanceCountsEv -__ZN11OSMetaClass20getMetaClassWithNameEPK8OSSymbol -__ZN11OSMetaClass21_RESERVEDOSMetaClass0Ev -__ZN11OSMetaClass21_RESERVEDOSMetaClass1Ev -__ZN11OSMetaClass21_RESERVEDOSMetaClass2Ev -__ZN11OSMetaClass21_RESERVEDOSMetaClass3Ev -__ZN11OSMetaClass21_RESERVEDOSMetaClass4Ev -__ZN11OSMetaClass21_RESERVEDOSMetaClass5Ev -__ZN11OSMetaClass21_RESERVEDOSMetaClass6Ev -__ZN11OSMetaClass21_RESERVEDOSMetaClass7Ev -__ZN11OSMetaClass21checkMetaCastWithNameEPK8OSStringPK15OSMetaClassBase -__ZN11OSMetaClass21checkMetaCastWithNameEPK8OSSymbolPK15OSMetaClassBase -__ZN11OSMetaClass21checkMetaCastWithNameEPKcPK15OSMetaClassBase -__ZN11OSMetaClass24serializeClassDictionaryEP12OSDictionary -__ZN11OSMetaClass8logErrorEi -__ZN11OSMetaClass9metaClassE -__ZN11OSMetaClassC1EPKcPKS_j -__ZN11OSMetaClassC2EPKcPKS_j -__ZN11OSMetaClassD0Ev -__ZN11OSMetaClassD2Ev -__ZN11OSMetaClassdlEPvm -__ZN11OSMetaClassnwEm -__ZN11OSSerialize10gMetaClassE -__ZN11OSSerialize10superClassE -__ZN11OSSerialize12addXMLEndTagEPKc -__ZN11OSSerialize12withCapacityEj -__ZN11OSSerialize14addXMLStartTagEPK15OSMetaClassBasePKc -__ZN11OSSerialize14ensureCapacityEj -__ZN11OSSerialize16initWithCapacityEj -__ZN11OSSerialize20previouslySerializedEPK15OSMetaClassBase -__ZN11OSSerialize20setCapacityIncrementEj -__ZN11OSSerialize21_RESERVEDOSSerialize0Ev -__ZN11OSSerialize21_RESERVEDOSSerialize1Ev -__ZN11OSSerialize21_RESERVEDOSSerialize2Ev -__ZN11OSSerialize21_RESERVEDOSSerialize3Ev -__ZN11OSSerialize21_RESERVEDOSSerialize4Ev -__ZN11OSSerialize21_RESERVEDOSSerialize5Ev -__ZN11OSSerialize21_RESERVEDOSSerialize6Ev -__ZN11OSSerialize21_RESERVEDOSSerialize7Ev -__ZN11OSSerialize4freeEv -__ZN11OSSerialize7addCharEc -__ZN11OSSerialize9MetaClassC1Ev -__ZN11OSSerialize9MetaClassC2Ev -__ZN11OSSerialize9addStringEPKc -__ZN11OSSerialize9clearTextEv -__ZN11OSSerialize9metaClassE -__ZN11OSSerializeC1EPK11OSMetaClass -__ZN11OSSerializeC1Ev -__ZN11OSSerializeC2EPK11OSMetaClass -__ZN11OSSerializeC2Ev -__ZN11OSSerializeD0Ev -__ZN11OSSerializeD2Ev -__ZN12IOPMinformee10gMetaClassE -__ZN12IOPMinformee10initializeEP9IOService -__ZN12IOPMinformee10superClassE -__ZN12IOPMinformee4freeEv -__ZN12IOPMinformee9MetaClassC1Ev -__ZN12IOPMinformee9MetaClassC2Ev -__ZN12IOPMinformee9metaClassE -__ZN12IOPMinformeeC1EPK11OSMetaClass -__ZN12IOPMinformeeC1Ev -__ZN12IOPMinformeeC2EPK11OSMetaClass -__ZN12IOPMinformeeC2Ev -__ZN12IOPMinformeeD0Ev -__ZN12IOPMinformeeD2Ev -__ZN12IORootParent10dozeSystemEv -__ZN12IORootParent10gMetaClassE -__ZN12IORootParent10superClassE -__ZN12IORootParent10wakeSystemEv -__ZN12IORootParent11sleepSystemEv -__ZN12IORootParent11sleepToDozeEv -__ZN12IORootParent13restartSystemEv -__ZN12IORootParent14shutDownSystemEv -__ZN12IORootParent5startEP9IOService -__ZN12IORootParent9MetaClassC1Ev -__ZN12IORootParent9MetaClassC2Ev -__ZN12IORootParent9metaClassE -__ZN12IORootParentC1EPK11OSMetaClass -__ZN12IORootParentC1Ev -__ZN12IORootParentC2EPK11OSMetaClass -__ZN12IORootParentC2Ev -__ZN12IORootParentD0Ev -__ZN12IORootParentD2Ev -__ZN12IOUserClient10clientDiedEv -__ZN12IOUserClient10gMetaClassE -__ZN12IOUserClient10getServiceEv -__ZN12IOUserClient10initializeEv -__ZN12IOUserClient10superClassE -__ZN12IOUserClient11clientCloseEv -__ZN12IOUserClient12initWithTaskEP4taskPvm -__ZN12IOUserClient12initWithTaskEP4taskPvmP12OSDictionary -__ZN12IOUserClient13connectClientEPS_ -__ZN12IOUserClient14externalMethodEjP25IOExternalMethodArgumentsP24IOExternalMethodDispatchP8OSObjectPv -__ZN12IOUserClient15mapClientMemoryEmP4taskmj -__ZN12IOUserClient15sendAsyncResultEPjiPPvm -__ZN12IOUserClient17setAsyncReferenceEPjP8ipc_portPvS3_ -__ZN12IOUserClient18clientHasPrivilegeEPvPKc -__ZN12IOUserClient19clientMemoryForTypeEmPmPP18IOMemoryDescriptor -__ZN12IOUserClient20exportObjectToClientEP4taskP8OSObjectPS3_ -__ZN12IOUserClient21destroyUserReferencesEP8OSObject -__ZN12IOUserClient22_RESERVEDIOUserClient2Ev -__ZN12IOUserClient22_RESERVEDIOUserClient3Ev -__ZN12IOUserClient22_RESERVEDIOUserClient4Ev -__ZN12IOUserClient22_RESERVEDIOUserClient5Ev -__ZN12IOUserClient22_RESERVEDIOUserClient6Ev -__ZN12IOUserClient22_RESERVEDIOUserClient7Ev -__ZN12IOUserClient22_RESERVEDIOUserClient8Ev -__ZN12IOUserClient22_RESERVEDIOUserClient9Ev -__ZN12IOUserClient23_RESERVEDIOUserClient10Ev -__ZN12IOUserClient23_RESERVEDIOUserClient11Ev -__ZN12IOUserClient23_RESERVEDIOUserClient12Ev -__ZN12IOUserClient23_RESERVEDIOUserClient13Ev -__ZN12IOUserClient23_RESERVEDIOUserClient14Ev -__ZN12IOUserClient23_RESERVEDIOUserClient15Ev -__ZN12IOUserClient23getExternalTrapForIndexEm -__ZN12IOUserClient24getNotificationSemaphoreEmPP9semaphore -__ZN12IOUserClient24getTargetAndTrapForIndexEPP9IOServicem -__ZN12IOUserClient24registerNotificationPortEP8ipc_portmm -__ZN12IOUserClient24registerNotificationPortEP8ipc_portmy -__ZN12IOUserClient25getExternalMethodForIndexEm -__ZN12IOUserClient26getTargetAndMethodForIndexEPP9IOServicem -__ZN12IOUserClient26removeMappingForDescriptorEP18IOMemoryDescriptor -__ZN12IOUserClient30getExternalAsyncMethodForIndexEm -__ZN12IOUserClient31getAsyncTargetAndMethodForIndexEPP9IOServicem -__ZN12IOUserClient4freeEv -__ZN12IOUserClient4initEP12OSDictionary -__ZN12IOUserClient4initEv -__ZN12IOUserClient9MetaClassC1Ev -__ZN12IOUserClient9MetaClassC2Ev -__ZN12IOUserClient9metaClassE -__ZN12IOUserClientC1EPK11OSMetaClass -__ZN12IOUserClientC2EPK11OSMetaClass -__ZN12IOUserClientD0Ev -__ZN12IOUserClientD2Ev -__ZN12OSCollection10gMetaClassE -__ZN12OSCollection10setOptionsEjjPv -__ZN12OSCollection10superClassE -__ZN12OSCollection11haveUpdatedEv -__ZN12OSCollection14copyCollectionEP12OSDictionary -__ZN12OSCollection22_RESERVEDOSCollection2Ev -__ZN12OSCollection22_RESERVEDOSCollection3Ev -__ZN12OSCollection22_RESERVEDOSCollection4Ev -__ZN12OSCollection22_RESERVEDOSCollection5Ev -__ZN12OSCollection22_RESERVEDOSCollection6Ev -__ZN12OSCollection22_RESERVEDOSCollection7Ev -__ZN12OSCollection4initEv -__ZN12OSCollection9MetaClassC1Ev -__ZN12OSCollection9MetaClassC2Ev -__ZN12OSCollection9metaClassE -__ZN12OSCollectionC1EPK11OSMetaClass -__ZN12OSCollectionC2EPK11OSMetaClass -__ZN12OSCollectionD0Ev -__ZN12OSCollectionD2Ev -__ZN12OSDictionary10gMetaClassE -__ZN12OSDictionary10setOptionsEjjPv -__ZN12OSDictionary10superClassE -__ZN12OSDictionary11withObjectsEPPK8OSObjectPPK8OSStringjj -__ZN12OSDictionary11withObjectsEPPK8OSObjectPPK8OSSymboljj -__ZN12OSDictionary12removeObjectEPK8OSString -__ZN12OSDictionary12removeObjectEPK8OSSymbol -__ZN12OSDictionary12removeObjectEPKc -__ZN12OSDictionary12withCapacityEj -__ZN12OSDictionary14copyCollectionEPS_ -__ZN12OSDictionary14ensureCapacityEj -__ZN12OSDictionary14withDictionaryEPKS_j -__ZN12OSDictionary15flushCollectionEv -__ZN12OSDictionary15initWithObjectsEPPK8OSObjectPPK8OSStringjj -__ZN12OSDictionary15initWithObjectsEPPK8OSObjectPPK8OSSymboljj -__ZN12OSDictionary16initWithCapacityEj -__ZN12OSDictionary18initWithDictionaryEPKS_j -__ZN12OSDictionary20setCapacityIncrementEj -__ZN12OSDictionary22_RESERVEDOSDictionary0Ev -__ZN12OSDictionary22_RESERVEDOSDictionary1Ev -__ZN12OSDictionary22_RESERVEDOSDictionary2Ev -__ZN12OSDictionary22_RESERVEDOSDictionary3Ev -__ZN12OSDictionary22_RESERVEDOSDictionary4Ev -__ZN12OSDictionary22_RESERVEDOSDictionary5Ev -__ZN12OSDictionary22_RESERVEDOSDictionary6Ev -__ZN12OSDictionary22_RESERVEDOSDictionary7Ev -__ZN12OSDictionary4freeEv -__ZN12OSDictionary5mergeEPKS_ -__ZN12OSDictionary9MetaClassC1Ev -__ZN12OSDictionary9MetaClassC2Ev -__ZN12OSDictionary9metaClassE -__ZN12OSDictionary9setObjectEPK8OSStringPK15OSMetaClassBase -__ZN12OSDictionary9setObjectEPK8OSSymbolPK15OSMetaClassBase -__ZN12OSDictionary9setObjectEPKcPK15OSMetaClassBase -__ZN12OSDictionaryC1EPK11OSMetaClass -__ZN12OSDictionaryC1Ev -__ZN12OSDictionaryC2EPK11OSMetaClass -__ZN12OSDictionaryC2Ev -__ZN12OSDictionaryD0Ev -__ZN12OSDictionaryD2Ev -__ZN12OSOrderedSet10gMetaClassE -__ZN12OSOrderedSet10setOptionsEjjPv -__ZN12OSOrderedSet10superClassE -__ZN12OSOrderedSet11orderObjectEPK15OSMetaClassBase -__ZN12OSOrderedSet12removeObjectEPK15OSMetaClassBase -__ZN12OSOrderedSet12withCapacityEjPFlPK15OSMetaClassBaseS2_PvES3_ -__ZN12OSOrderedSet13setLastObjectEPK15OSMetaClassBase -__ZN12OSOrderedSet14copyCollectionEP12OSDictionary -__ZN12OSOrderedSet14ensureCapacityEj -__ZN12OSOrderedSet14getOrderingRefEv -__ZN12OSOrderedSet14setFirstObjectEPK15OSMetaClassBase -__ZN12OSOrderedSet15flushCollectionEv -__ZN12OSOrderedSet16initWithCapacityEjPFlPK15OSMetaClassBaseS2_PvES3_ -__ZN12OSOrderedSet20setCapacityIncrementEj -__ZN12OSOrderedSet22_RESERVEDOSOrderedSet0Ev -__ZN12OSOrderedSet22_RESERVEDOSOrderedSet1Ev -__ZN12OSOrderedSet22_RESERVEDOSOrderedSet2Ev -__ZN12OSOrderedSet22_RESERVEDOSOrderedSet3Ev -__ZN12OSOrderedSet22_RESERVEDOSOrderedSet4Ev -__ZN12OSOrderedSet22_RESERVEDOSOrderedSet5Ev -__ZN12OSOrderedSet22_RESERVEDOSOrderedSet6Ev -__ZN12OSOrderedSet22_RESERVEDOSOrderedSet7Ev -__ZN12OSOrderedSet4freeEv -__ZN12OSOrderedSet9MetaClassC1Ev -__ZN12OSOrderedSet9MetaClassC2Ev -__ZN12OSOrderedSet9metaClassE -__ZN12OSOrderedSet9setObjectEPK15OSMetaClassBase -__ZN12OSOrderedSet9setObjectEjPK15OSMetaClassBase -__ZN12OSOrderedSetC1EPK11OSMetaClass -__ZN12OSOrderedSetC1Ev -__ZN12OSOrderedSetC2EPK11OSMetaClass -__ZN12OSOrderedSetC2Ev -__ZN12OSOrderedSetD0Ev -__ZN12OSOrderedSetD2Ev -__ZN12OSSerializer10gMetaClassE -__ZN12OSSerializer10superClassE -__ZN12OSSerializer9MetaClassC1Ev -__ZN12OSSerializer9MetaClassC2Ev -__ZN12OSSerializer9forTargetEPvPFbS0_S0_P11OSSerializeES0_ -__ZN12OSSerializer9metaClassE -__ZN12OSSerializerC1EPK11OSMetaClass -__ZN12OSSerializerC1Ev -__ZN12OSSerializerC2EPK11OSMetaClass -__ZN12OSSerializerC2Ev -__ZN12OSSerializerD0Ev -__ZN12OSSerializerD2Ev -__ZN12OSSymbolPool12insertSymbolEP8OSSymbol -__ZN12OSSymbolPool12removeSymbolEP8OSSymbol -__ZN12OSSymbolPool13initHashStateEv -__ZN12OSSymbolPool13nextHashStateEP17OSSymbolPoolState -__ZN12OSSymbolPool18reconstructSymbolsEv -__ZN12OSSymbolPool4initEv -__ZN12OSSymbolPool4log2Ej -__ZN12OSSymbolPool6exp2mlEj -__ZN12OSSymbolPoolC1EPKS_ -__ZN12OSSymbolPoolC2EPKS_ -__ZN12OSSymbolPoolD0Ev -__ZN12OSSymbolPoolD1Ev -__ZN12OSSymbolPoolD2Ev -__ZN12OSSymbolPooldlEPvm -__ZN12OSSymbolPoolnwEm -__ZN13IOCommandGate10gMetaClassE -__ZN13IOCommandGate10runCommandEPvS0_S0_S0_ -__ZN13IOCommandGate10superClassE -__ZN13IOCommandGate11commandGateEP8OSObjectPFiS1_PvS2_S2_S2_E -__ZN13IOCommandGate11setWorkLoopEP10IOWorkLoop -__ZN13IOCommandGate12commandSleepEPv12UnsignedWidem -__ZN13IOCommandGate12commandSleepEPvm -__ZN13IOCommandGate13attemptActionEPFiP8OSObjectPvS2_S2_S2_ES2_S2_S2_S2_ -__ZN13IOCommandGate13commandWakeupEPvb -__ZN13IOCommandGate14attemptCommandEPvS0_S0_S0_ -__ZN13IOCommandGate23_RESERVEDIOCommandGate1Ev -__ZN13IOCommandGate23_RESERVEDIOCommandGate2Ev -__ZN13IOCommandGate23_RESERVEDIOCommandGate3Ev -__ZN13IOCommandGate23_RESERVEDIOCommandGate4Ev -__ZN13IOCommandGate23_RESERVEDIOCommandGate5Ev -__ZN13IOCommandGate23_RESERVEDIOCommandGate6Ev -__ZN13IOCommandGate23_RESERVEDIOCommandGate7Ev -__ZN13IOCommandGate4freeEv -__ZN13IOCommandGate4initEP8OSObjectPFiS1_PvS2_S2_S2_E -__ZN13IOCommandGate6enableEv -__ZN13IOCommandGate7disableEv -__ZN13IOCommandGate9MetaClassC1Ev -__ZN13IOCommandGate9MetaClassC2Ev -__ZN13IOCommandGate9metaClassE -__ZN13IOCommandGate9runActionEPFiP8OSObjectPvS2_S2_S2_ES2_S2_S2_S2_ -__ZN13IOCommandGateC1EPK11OSMetaClass -__ZN13IOCommandGateC1Ev -__ZN13IOCommandGateC2EPK11OSMetaClass -__ZN13IOCommandGateC2Ev -__ZN13IOCommandGateD0Ev -__ZN13IOCommandGateD2Ev -__ZN13IOCommandPool10gMetaClassE -__ZN13IOCommandPool10getCommandEb -__ZN13IOCommandPool10superClassE -__ZN13IOCommandPool11commandPoolEP9IOServiceP10IOWorkLoopm -__ZN13IOCommandPool12withWorkLoopEP10IOWorkLoop -__ZN13IOCommandPool13returnCommandEP9IOCommand -__ZN13IOCommandPool15gatedGetCommandEPP9IOCommandb -__ZN13IOCommandPool16initWithWorkLoopEP10IOWorkLoop -__ZN13IOCommandPool18gatedReturnCommandEP9IOCommand -__ZN13IOCommandPool23_RESERVEDIOCommandPool0Ev -__ZN13IOCommandPool23_RESERVEDIOCommandPool1Ev -__ZN13IOCommandPool23_RESERVEDIOCommandPool2Ev -__ZN13IOCommandPool23_RESERVEDIOCommandPool3Ev -__ZN13IOCommandPool23_RESERVEDIOCommandPool4Ev -__ZN13IOCommandPool23_RESERVEDIOCommandPool5Ev -__ZN13IOCommandPool23_RESERVEDIOCommandPool6Ev -__ZN13IOCommandPool23_RESERVEDIOCommandPool7Ev -__ZN13IOCommandPool4freeEv -__ZN13IOCommandPool4initEP9IOServiceP10IOWorkLoopm -__ZN13IOCommandPool9MetaClassC1Ev -__ZN13IOCommandPool9MetaClassC2Ev -__ZN13IOCommandPool9metaClassE -__ZN13IOCommandPoolC1EPK11OSMetaClass -__ZN13IOCommandPoolC1Ev -__ZN13IOCommandPoolC2EPK11OSMetaClass -__ZN13IOCommandPoolC2Ev -__ZN13IOCommandPoolD0Ev -__ZN13IOCommandPoolD2Ev -__ZN13IOEventSource10gMetaClassE -__ZN13IOEventSource10superClassE -__ZN13IOEventSource10wakeupGateEPvb -__ZN13IOEventSource11setWorkLoopEP10IOWorkLoop -__ZN13IOEventSource12tryCloseGateEv -__ZN13IOEventSource19signalWorkAvailableEv -__ZN13IOEventSource23_RESERVEDIOEventSource0Ev -__ZN13IOEventSource23_RESERVEDIOEventSource1Ev -__ZN13IOEventSource23_RESERVEDIOEventSource2Ev -__ZN13IOEventSource23_RESERVEDIOEventSource3Ev -__ZN13IOEventSource23_RESERVEDIOEventSource4Ev -__ZN13IOEventSource23_RESERVEDIOEventSource5Ev -__ZN13IOEventSource23_RESERVEDIOEventSource6Ev -__ZN13IOEventSource23_RESERVEDIOEventSource7Ev -__ZN13IOEventSource4freeEv -__ZN13IOEventSource4initEP8OSObjectPFvS1_zE -__ZN13IOEventSource6enableEv -__ZN13IOEventSource7disableEv -__ZN13IOEventSource7setNextEPS_ -__ZN13IOEventSource8openGateEv -__ZN13IOEventSource9MetaClassC1Ev -__ZN13IOEventSource9MetaClassC2Ev -__ZN13IOEventSource9closeGateEv -__ZN13IOEventSource9metaClassE -__ZN13IOEventSource9setActionEPFvP8OSObjectzE -__ZN13IOEventSource12checkForWorkEv -__ZN13IOEventSource9sleepGateEPv12UnsignedWidem -__ZN13IOEventSource9sleepGateEPvm -__ZN13IOEventSourceC1EPK11OSMetaClass -__ZN13IOEventSourceC2EPK11OSMetaClass -__ZN13IOEventSourceD0Ev -__ZN13IOEventSourceD2Ev -__ZN13_IOServiceJob10gMetaClassE -__ZN13_IOServiceJob10pingConfigEPS_ -__ZN13_IOServiceJob10superClassE -__ZN13_IOServiceJob8startJobEP9IOServiceim -__ZN13_IOServiceJob9MetaClassC1Ev -__ZN13_IOServiceJob9MetaClassC2Ev -__ZN13_IOServiceJob9metaClassE -__ZN13_IOServiceJobC1EPK11OSMetaClass -__ZN13_IOServiceJobC1Ev -__ZN13_IOServiceJobC2EPK11OSMetaClass -__ZN13_IOServiceJobC2Ev -__ZN13_IOServiceJobD0Ev -__ZN13_IOServiceJobD2Ev -__ZN14IOCommandQueue10gMetaClassE -__ZN14IOCommandQueue10superClassE -__ZN14IOCommandQueue12checkForWorkEv -__ZN14IOCommandQueue12commandQueueEP8OSObjectPFvS1_PvS2_S2_S2_Ei -__ZN14IOCommandQueue14enqueueCommandEbPvS0_S0_S0_ -__ZN14IOCommandQueue15performAndFlushEP8OSObjectPFvS1_PvS2_S2_S2_E -__ZN14IOCommandQueue4freeEv -__ZN14IOCommandQueue4initEP8OSObjectPFvS1_PvS2_S2_S2_Ei -__ZN14IOCommandQueue9MetaClassC1Ev -__ZN14IOCommandQueue9MetaClassC2Ev -__ZN14IOCommandQueue9metaClassE -__ZN14IOCommandQueueC1EPK11OSMetaClass -__ZN14IOCommandQueueC1Ev -__ZN14IOCommandQueueC2EPK11OSMetaClass -__ZN14IOCommandQueueC2Ev -__ZN14IOCommandQueueD0Ev -__ZN14IOCommandQueueD2Ev -__ZN14IODeviceMemory12withSubRangeEPS_mm -__ZN14IODeviceMemory13arrayFromListEPNS_11InitElementEm -__ZN14IODeviceMemory9withRangeEmm -__ZN14IOMemoryCursor10gMetaClassE -__ZN14IOMemoryCursor10superClassE -__ZN14IOMemoryCursor17withSpecificationEPFvNS_15PhysicalSegmentEPvmEmmm -__ZN14IOMemoryCursor19genPhysicalSegmentsEP18IOMemoryDescriptormPvmmPm -__ZN14IOMemoryCursor21initWithSpecificationEPFvNS_15PhysicalSegmentEPvmEmmm -__ZN14IOMemoryCursor9MetaClassC1Ev -__ZN14IOMemoryCursor9MetaClassC2Ev -__ZN14IOMemoryCursor9metaClassE -__ZN14IOMemoryCursorC1EPK11OSMetaClass -__ZN14IOMemoryCursorC1Ev -__ZN14IOMemoryCursorC2EPK11OSMetaClass -__ZN14IOMemoryCursorC2Ev -__ZN14IOMemoryCursorD0Ev -__ZN14IOMemoryCursorD2Ev -__ZN14IOPMrootDomain10gMetaClassE -__ZN14IOPMrootDomain10superClassE -__ZN14IOPMrootDomain11sleepSystemEv -__ZN14IOPMrootDomain12tellChangeUpEm -__ZN14IOPMrootDomain12wakeFromDozeEv -__ZN14IOPMrootDomain13askChangeDownEm -__ZN14IOPMrootDomain13restartSystemEv -__ZN14IOPMrootDomain13setPropertiesEP8OSObject -__ZN14IOPMrootDomain14publishFeatureEPKc -__ZN14IOPMrootDomain14shutdownSystemEv -__ZN14IOPMrootDomain14tellChangeDownEm -__ZN14IOPMrootDomain15powerChangeDoneEm -__ZN14IOPMrootDomain16tellNoChangeDownEm -__ZN14IOPMrootDomain17getSleepSupportedEv -__ZN14IOPMrootDomain17setAggressivenessEmm -__ZN14IOPMrootDomain17setSleepSupportedEm -__ZN14IOPMrootDomain18changePowerStateToEm -__ZN14IOPMrootDomain19sysPowerDownHandlerEPvS0_mP9IOServiceS0_j -__ZN14IOPMrootDomain22changePowerStateToPrivEm -__ZN14IOPMrootDomain23requestPowerDomainStateEmP17IOPowerConnectionm -__ZN14IOPMrootDomain24receivePowerNotificationEm -__ZN14IOPMrootDomain5startEP9IOService -__ZN14IOPMrootDomain9MetaClassC1Ev -__ZN14IOPMrootDomain9MetaClassC2Ev -__ZN14IOPMrootDomain9constructEv -__ZN14IOPMrootDomain9metaClassE -__ZN14IOPMrootDomainC1EPK11OSMetaClass -__ZN14IOPMrootDomainC1Ev -__ZN14IOPMrootDomainC2EPK11OSMetaClass -__ZN14IOPMrootDomainC2Ev -__ZN14IOPMrootDomainD0Ev -__ZN14IOPMrootDomainD2Ev -__ZN15IOConditionLock10gMetaClassE -__ZN15IOConditionLock10superClassE -__ZN15IOConditionLock10unlockWithEi -__ZN15IOConditionLock12setConditionEi -__ZN15IOConditionLock13withConditionEib -__ZN15IOConditionLock17initWithConditionEib -__ZN15IOConditionLock4freeEv -__ZN15IOConditionLock4lockEv -__ZN15IOConditionLock6unlockEv -__ZN15IOConditionLock7tryLockEv -__ZN15IOConditionLock8lockWhenEi -__ZN15IOConditionLock9MetaClassC1Ev -__ZN15IOConditionLock9MetaClassC2Ev -__ZN15IOConditionLock9metaClassE -__ZN15IOConditionLockC1EPK11OSMetaClass -__ZN15IOConditionLockC1Ev -__ZN15IOConditionLockC2EPK11OSMetaClass -__ZN15IOConditionLockC2Ev -__ZN15IOConditionLockD0Ev -__ZN15IOConditionLockD2Ev -__ZN15IOPanicPlatform10gMetaClassE -__ZN15IOPanicPlatform10superClassE -__ZN15IOPanicPlatform5startEP9IOService -__ZN15IOPanicPlatform9MetaClassC1Ev -__ZN15IOPanicPlatform9MetaClassC2Ev -__ZN15IOPanicPlatform9metaClassE -__ZN15IOPanicPlatformC1EPK11OSMetaClass -__ZN15IOPanicPlatformC1Ev -__ZN15IOPanicPlatformC2EPK11OSMetaClass -__ZN15IOPanicPlatformC2Ev -__ZN15IOPanicPlatformD0Ev -__ZN15IOPanicPlatformD2Ev -__ZN15IORegistryEntry10gMetaClassE -__ZN15IORegistryEntry10initializeEv -__ZN15IORegistryEntry10superClassE -__ZN15IORegistryEntry11dealiasPathEPPKcPK15IORegistryPlane -__ZN15IORegistryEntry11detachAboveEPK15IORegistryPlane -__ZN15IORegistryEntry11setLocationEPK8OSSymbolPK15IORegistryPlane -__ZN15IORegistryEntry11setLocationEPKcPK15IORegistryPlane -__ZN15IORegistryEntry11setPropertyEPK8OSStringP8OSObject -__ZN15IORegistryEntry11setPropertyEPK8OSSymbolP8OSObject -__ZN15IORegistryEntry11setPropertyEPKcP8OSObject -__ZN15IORegistryEntry11setPropertyEPKcPvj -__ZN15IORegistryEntry11setPropertyEPKcS1_ -__ZN15IORegistryEntry11setPropertyEPKcb -__ZN15IORegistryEntry11setPropertyEPKcyj -__ZN15IORegistryEntry13attachToChildEPS_PK15IORegistryPlane -__ZN15IORegistryEntry13childFromPathEPKcPK15IORegistryPlanePcPi -__ZN15IORegistryEntry13setPropertiesEP8OSObject -__ZN15IORegistryEntry14attachToParentEPS_PK15IORegistryPlane -__ZN15IORegistryEntry14removePropertyEPK8OSString -__ZN15IORegistryEntry14removePropertyEPK8OSSymbol -__ZN15IORegistryEntry14removePropertyEPKc -__ZN15IORegistryEntry15detachFromChildEPS_PK15IORegistryPlane -__ZN15IORegistryEntry15getRegistryRootEv -__ZN15IORegistryEntry16detachFromParentEPS_PK15IORegistryPlane -__ZN15IORegistryEntry16setPropertyTableEP12OSDictionary -__ZN15IORegistryEntry17matchPathLocationEPKcPK15IORegistryPlane -__ZN15IORegistryEntry17runPropertyActionEPFiP8OSObjectPvS2_S2_S2_ES1_S2_S2_S2_S2_ -__ZN15IORegistryEntry18getGenerationCountEv -__ZN15IORegistryEntry21getChildFromComponentEPPKcPK15IORegistryPlane -__ZN15IORegistryEntry25_RESERVEDIORegistryEntry6Ev -__ZN15IORegistryEntry25_RESERVEDIORegistryEntry7Ev -__ZN15IORegistryEntry25_RESERVEDIORegistryEntry8Ev -__ZN15IORegistryEntry25_RESERVEDIORegistryEntry9Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry10Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry11Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry12Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry13Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry14Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry15Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry16Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry17Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry18Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry19Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry20Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry21Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry22Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry23Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry24Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry25Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry26Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry27Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry28Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry29Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry30Ev -__ZN15IORegistryEntry26_RESERVEDIORegistryEntry31Ev -__ZN15IORegistryEntry4freeEv -__ZN15IORegistryEntry4initEP12OSDictionary -__ZN15IORegistryEntry4initEPS_PK15IORegistryPlane -__ZN15IORegistryEntry7setNameEPK8OSSymbolPK15IORegistryPlane -__ZN15IORegistryEntry7setNameEPKcPK15IORegistryPlane -__ZN15IORegistryEntry8fromPathEPKcPK15IORegistryPlanePcPiPS_ -__ZN15IORegistryEntry8getPlaneEPKc -__ZN15IORegistryEntry9MetaClassC1Ev -__ZN15IORegistryEntry9MetaClassC2Ev -__ZN15IORegistryEntry9detachAllEPK15IORegistryPlane -__ZN15IORegistryEntry9makePlaneEPKc -__ZN15IORegistryEntry9metaClassE -__ZN15IORegistryEntryC1EPK11OSMetaClass -__ZN15IORegistryEntryC1Ev -__ZN15IORegistryEntryC2EPK11OSMetaClass -__ZN15IORegistryEntryC2Ev -__ZN15IORegistryEntryD0Ev -__ZN15IORegistryEntryD2Ev -__ZN15IORegistryPlane10gMetaClassE -__ZN15IORegistryPlane10superClassE -__ZN15IORegistryPlane9MetaClassC1Ev -__ZN15IORegistryPlane9MetaClassC2Ev -__ZN15IORegistryPlane9metaClassE -__ZN15IORegistryPlaneC1EPK11OSMetaClass -__ZN15IORegistryPlaneC1Ev -__ZN15IORegistryPlaneC2EPK11OSMetaClass -__ZN15IORegistryPlaneC2Ev -__ZN15IORegistryPlaneD0Ev -__ZN15IORegistryPlaneD2Ev -__ZN15IOWatchDogTimer10gMetaClassE -__ZN15IOWatchDogTimer10superClassE -__ZN15IOWatchDogTimer13setPropertiesEP8OSObject -__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer0Ev -__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer1Ev -__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer2Ev -__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer3Ev -__ZN15IOWatchDogTimer4stopEP9IOService -__ZN15IOWatchDogTimer5startEP9IOService -__ZN15IOWatchDogTimer9MetaClassC1Ev -__ZN15IOWatchDogTimer9MetaClassC2Ev -__ZN15IOWatchDogTimer9metaClassE -__ZN15IOWatchDogTimerC1EPK11OSMetaClass -__ZN15IOWatchDogTimerC2EPK11OSMetaClass -__ZN15IOWatchDogTimerD0Ev -__ZN15IOWatchDogTimerD2Ev -__ZN15OSMetaClassBase12safeMetaCastEPKS_PK11OSMetaClass -__ZN15OSMetaClassBase13checkTypeInstEPKS_S1_ -__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase3Ev -__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase4Ev -__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase5Ev -__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase6Ev -__ZN15OSMetaClassBase25_RESERVEDOSMetaClassBase7Ev -__ZN15OSMetaClassBaseC1Ev -__ZN15OSMetaClassBaseC2Ev -__ZN15OSMetaClassBaseD0Ev -__ZN15OSMetaClassBaseD2Ev -__ZN15OSMetaClassMetaC1Ev -__ZN15OSMetaClassMetaC2Ev -__ZN16IOKitDiagnostics10gMetaClassE -__ZN16IOKitDiagnostics10superClassE -__ZN16IOKitDiagnostics11diagnosticsEv -__ZN16IOKitDiagnostics12updateOffsetEP12OSDictionarymPKc -__ZN16IOKitDiagnostics9MetaClassC1Ev -__ZN16IOKitDiagnostics9MetaClassC2Ev -__ZN16IOKitDiagnostics9metaClassE -__ZN16IOKitDiagnosticsC1EPK11OSMetaClass -__ZN16IOKitDiagnosticsC1Ev -__ZN16IOKitDiagnosticsC2EPK11OSMetaClass -__ZN16IOKitDiagnosticsC2Ev -__ZN16IOKitDiagnosticsD0Ev -__ZN16IOKitDiagnosticsD2Ev -__ZN16IOPMinformeeList10gMetaClassE -__ZN16IOPMinformeeList10initializeEv -__ZN16IOPMinformeeList10nextInListEP12IOPMinformee -__ZN16IOPMinformeeList10superClassE -__ZN16IOPMinformeeList11firstInListEv -__ZN16IOPMinformeeList13numberOfItemsEv -__ZN16IOPMinformeeList14removeFromListEP9IOService -__ZN16IOPMinformeeList4freeEv -__ZN16IOPMinformeeList8findItemEP9IOService -__ZN16IOPMinformeeList9MetaClassC1Ev -__ZN16IOPMinformeeList9MetaClassC2Ev -__ZN16IOPMinformeeList9addToListEP12IOPMinformee -__ZN16IOPMinformeeList9metaClassE -__ZN16IOPMinformeeListC1EPK11OSMetaClass -__ZN16IOPMinformeeListC1Ev -__ZN16IOPMinformeeListC2EPK11OSMetaClass -__ZN16IOPMinformeeListC2Ev -__ZN16IOPMinformeeListD0Ev -__ZN16IOPMinformeeListD2Ev -__ZN16IOPlatformDevice10gMetaClassE -__ZN16IOPlatformDevice10superClassE -__ZN16IOPlatformDevice12getResourcesEv -__ZN16IOPlatformDevice13matchLocationEP9IOService -__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice0Ev -__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice1Ev -__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice2Ev -__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice3Ev -__ZN16IOPlatformDevice9MetaClassC1Ev -__ZN16IOPlatformDevice9MetaClassC2Ev -__ZN16IOPlatformDevice9metaClassE -__ZN16IOPlatformDeviceC1EPK11OSMetaClass -__ZN16IOPlatformDeviceC1Ev -__ZN16IOPlatformDeviceC2EPK11OSMetaClass -__ZN16IOPlatformDeviceC2Ev -__ZN16IOPlatformDeviceD0Ev -__ZN16IOPlatformDeviceD2Ev -__ZN16IOPlatformExpert10gMetaClassE -__ZN16IOPlatformExpert10superClassE -__ZN16IOPlatformExpert11haltRestartEj -__ZN16IOPlatformExpert11sleepKernelEv -__ZN16IOPlatformExpert12CheckSubTreeEP7OSArrayP9IOServiceS3_P12OSDictionary -__ZN16IOPlatformExpert12getModelNameEPci -__ZN16IOPlatformExpert12hasPMFeatureEm -__ZN16IOPlatformExpert13savePanicInfoEPhm -__ZN16IOPlatformExpert14getBootROMTypeEv -__ZN16IOPlatformExpert14getChipSetTypeEv -__ZN16IOPlatformExpert14getConsoleInfoEP8PE_Video -__ZN16IOPlatformExpert14getMachineNameEPci -__ZN16IOPlatformExpert14getMachineTypeEv -__ZN16IOPlatformExpert14setBootROMTypeEl -__ZN16IOPlatformExpert14setChipSetTypeEl -__ZN16IOPlatformExpert14setConsoleInfoEP8PE_Videoj -__ZN16IOPlatformExpert14setMachineTypeEl -__ZN16IOPlatformExpert15getGMTTimeOfDayEv -__ZN16IOPlatformExpert15getNubResourcesEP9IOService -__ZN16IOPlatformExpert15setGMTTimeOfDayEl -__ZN16IOPlatformExpert16PMRegisterDeviceEP9IOServiceS1_ -__ZN16IOPlatformExpert16atInterruptLevelEv -__ZN16IOPlatformExpert16hasPrivPMFeatureEm -__ZN16IOPlatformExpert20callPlatformFunctionEPK8OSSymbolbPvS3_S3_S3_ -__ZN16IOPlatformExpert21RegisterServiceInTreeEP9IOServiceP12OSDictionaryS3_S1_ -__ZN16IOPlatformExpert21numBatteriesSupportedEv -__ZN16IOPlatformExpert21platformAdjustServiceEP9IOService -__ZN16IOPlatformExpert23registerNVRAMControllerEP17IONVRAMController -__ZN16IOPlatformExpert25PMInstantiatePowerDomainsEv -__ZN16IOPlatformExpert25getPhysicalRangeAllocatorEv -__ZN16IOPlatformExpert25lookUpInterruptControllerEP8OSSymbol -__ZN16IOPlatformExpert25setCPUInterruptPropertiesEP9IOService -__ZN16IOPlatformExpert26_RESERVEDIOPlatformExpert2Ev -__ZN16IOPlatformExpert26_RESERVEDIOPlatformExpert3Ev -__ZN16IOPlatformExpert26_RESERVEDIOPlatformExpert4Ev -__ZN16IOPlatformExpert26_RESERVEDIOPlatformExpert5Ev -__ZN16IOPlatformExpert26_RESERVEDIOPlatformExpert6Ev -__ZN16IOPlatformExpert26_RESERVEDIOPlatformExpert7Ev -__ZN16IOPlatformExpert26_RESERVEDIOPlatformExpert8Ev -__ZN16IOPlatformExpert26_RESERVEDIOPlatformExpert9Ev -__ZN16IOPlatformExpert27_RESERVEDIOPlatformExpert10Ev -__ZN16IOPlatformExpert27_RESERVEDIOPlatformExpert11Ev -__ZN16IOPlatformExpert27registerInterruptControllerEP8OSSymbolP21IOInterruptController -__ZN16IOPlatformExpert30createSystemSerialNumberStringEP6OSData -__ZN16IOPlatformExpert5PMLogEPKcmmm -__ZN16IOPlatformExpert5startEP9IOService -__ZN16IOPlatformExpert6attachEP9IOService -__ZN16IOPlatformExpert9MetaClassC1Ev -__ZN16IOPlatformExpert9MetaClassC2Ev -__ZN16IOPlatformExpert9configureEP9IOService -__ZN16IOPlatformExpert9createNubEP12OSDictionary -__ZN16IOPlatformExpert9metaClassE -__ZN16IOPlatformExpertC1EPK11OSMetaClass -__ZN16IOPlatformExpertC1Ev -__ZN16IOPlatformExpertC2EPK11OSMetaClass -__ZN16IOPlatformExpertC2Ev -__ZN16IOPlatformExpertD0Ev -__ZN16IOPlatformExpertD2Ev -__ZN16IORangeAllocator10deallocateEmm -__ZN16IORangeAllocator10gMetaClassE -__ZN16IORangeAllocator10superClassE -__ZN16IORangeAllocator12allocElementEm -__ZN16IORangeAllocator12getFreeCountEv -__ZN16IORangeAllocator13allocateRangeEmm -__ZN16IORangeAllocator14deallocElementEm -__ZN16IORangeAllocator16getFragmentCountEv -__ZN16IORangeAllocator19getFragmentCapacityEv -__ZN16IORangeAllocator28setFragmentCapacityIncrementEm -__ZN16IORangeAllocator4freeEv -__ZN16IORangeAllocator4initEmmmm -__ZN16IORangeAllocator8allocateEmPmm -__ZN16IORangeAllocator9MetaClassC1Ev -__ZN16IORangeAllocator9MetaClassC2Ev -__ZN16IORangeAllocator9metaClassE -__ZN16IORangeAllocator9withRangeEmmmm -__ZN16IORangeAllocatorC1EPK11OSMetaClass -__ZN16IORangeAllocatorC1Ev -__ZN16IORangeAllocatorC2EPK11OSMetaClass -__ZN16IORangeAllocatorC2Ev -__ZN16IORangeAllocatorD0Ev -__ZN16IORangeAllocatorD2Ev -__ZN17IOBigMemoryCursor10gMetaClassE -__ZN17IOBigMemoryCursor10superClassE -__ZN17IOBigMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm -__ZN17IOBigMemoryCursor17withSpecificationEmmm -__ZN17IOBigMemoryCursor21initWithSpecificationEmmm -__ZN17IOBigMemoryCursor9MetaClassC1Ev -__ZN17IOBigMemoryCursor9MetaClassC2Ev -__ZN17IOBigMemoryCursor9metaClassE -__ZN17IOBigMemoryCursorC1EPK11OSMetaClass -__ZN17IOBigMemoryCursorC1Ev -__ZN17IOBigMemoryCursorC2EPK11OSMetaClass -__ZN17IOBigMemoryCursorC2Ev -__ZN17IOBigMemoryCursorD0Ev -__ZN17IOBigMemoryCursorD2Ev -__ZN17IOPolledInterface10gMetaClassE -__ZN17IOPolledInterface15checkAllForWorkEv -__ZN17IOPolledInterface27_RESERVEDIOPolledInterface0Ev -__ZN17IOPolledInterface27_RESERVEDIOPolledInterface1Ev -__ZN17IOPolledInterface27_RESERVEDIOPolledInterface2Ev -__ZN17IOPolledInterface27_RESERVEDIOPolledInterface3Ev -__ZN17IOPolledInterface27_RESERVEDIOPolledInterface4Ev -__ZN17IOPolledInterface27_RESERVEDIOPolledInterface5Ev -__ZN17IOPolledInterface27_RESERVEDIOPolledInterface6Ev -__ZN17IOPolledInterface27_RESERVEDIOPolledInterface7Ev -__ZN17IOPolledInterface27_RESERVEDIOPolledInterface8Ev -__ZN17IOPolledInterface27_RESERVEDIOPolledInterface9Ev -__ZN17IOPolledInterface28_RESERVEDIOPolledInterface10Ev -__ZN17IOPolledInterface28_RESERVEDIOPolledInterface11Ev -__ZN17IOPolledInterface28_RESERVEDIOPolledInterface12Ev -__ZN17IOPolledInterface28_RESERVEDIOPolledInterface13Ev -__ZN17IOPolledInterface28_RESERVEDIOPolledInterface14Ev -__ZN17IOPolledInterface28_RESERVEDIOPolledInterface15Ev -__ZN17IOPolledInterfaceC2EPK11OSMetaClass -__ZN17IOPolledInterfaceD2Ev -__ZN17IOPowerConnection10gMetaClassE -__ZN17IOPowerConnection10superClassE -__ZN17IOPowerConnection14getAwaitingAckEv -__ZN17IOPowerConnection14setAwaitingAckEb -__ZN17IOPowerConnection16parentKnowsStateEv -__ZN17IOPowerConnection19setParentKnowsStateEb -__ZN17IOPowerConnection21getDesiredDomainStateEv -__ZN17IOPowerConnection21setDesiredDomainStateEm -__ZN17IOPowerConnection22childHasRequestedPowerEv -__ZN17IOPowerConnection23getPreventIdleSleepFlagEv -__ZN17IOPowerConnection23parentCurrentPowerFlagsEv -__ZN17IOPowerConnection23setPreventIdleSleepFlagEm -__ZN17IOPowerConnection25getPreventSystemSleepFlagEv -__ZN17IOPowerConnection25setChildHasRequestedPowerEv -__ZN17IOPowerConnection25setPreventSystemSleepFlagEm -__ZN17IOPowerConnection26setParentCurrentPowerFlagsEm -__ZN17IOPowerConnection9MetaClassC1Ev -__ZN17IOPowerConnection9MetaClassC2Ev -__ZN17IOPowerConnection9metaClassE -__ZN17IOPowerConnectionC1EPK11OSMetaClass -__ZN17IOPowerConnectionC1Ev -__ZN17IOPowerConnectionC2EPK11OSMetaClass -__ZN17IOPowerConnectionC2Ev -__ZN17IOPowerConnectionD0Ev -__ZN17IOPowerConnectionD2Ev -__ZN18IODTPlatformExpert10createNubsEP9IOServiceP10OSIterator -__ZN18IODTPlatformExpert10gMetaClassE -__ZN18IODTPlatformExpert10superClassE -__ZN18IODTPlatformExpert10writeXPRAMEmPhm -__ZN18IODTPlatformExpert11haltRestartEj -__ZN18IODTPlatformExpert12getModelNameEPci -__ZN18IODTPlatformExpert13savePanicInfoEPhm -__ZN18IODTPlatformExpert14getMachineNameEPci -__ZN18IODTPlatformExpert15getNubResourcesEP9IOService -__ZN18IODTPlatformExpert15processTopLevelEP15IORegistryEntry -__ZN18IODTPlatformExpert17readNVRAMPropertyEP15IORegistryEntryPPK8OSSymbolPP6OSData -__ZN18IODTPlatformExpert18getNVRAMPartitionsEv -__ZN18IODTPlatformExpert18readNVRAMPartitionEPK8OSSymbolmPhm -__ZN18IODTPlatformExpert18writeNVRAMPropertyEP15IORegistryEntryPK8OSSymbolP6OSData -__ZN18IODTPlatformExpert19writeNVRAMPartitionEPK8OSSymbolmPhm -__ZN18IODTPlatformExpert23registerNVRAMControllerEP17IONVRAMController -__ZN18IODTPlatformExpert28_RESERVEDIODTPlatformExpert0Ev -__ZN18IODTPlatformExpert28_RESERVEDIODTPlatformExpert1Ev -__ZN18IODTPlatformExpert28_RESERVEDIODTPlatformExpert2Ev -__ZN18IODTPlatformExpert28_RESERVEDIODTPlatformExpert3Ev -__ZN18IODTPlatformExpert28_RESERVEDIODTPlatformExpert4Ev -__ZN18IODTPlatformExpert28_RESERVEDIODTPlatformExpert5Ev -__ZN18IODTPlatformExpert28_RESERVEDIODTPlatformExpert6Ev -__ZN18IODTPlatformExpert28_RESERVEDIODTPlatformExpert7Ev -__ZN18IODTPlatformExpert30createSystemSerialNumberStringEP6OSData -__ZN18IODTPlatformExpert5probeEP9IOServicePl -__ZN18IODTPlatformExpert9MetaClassC1Ev -__ZN18IODTPlatformExpert9MetaClassC2Ev -__ZN18IODTPlatformExpert9configureEP9IOService -__ZN18IODTPlatformExpert9createNubEP15IORegistryEntry -__ZN18IODTPlatformExpert9metaClassE -__ZN18IODTPlatformExpert9readXPRAMEmPhm -__ZN18IODTPlatformExpertC1EPK11OSMetaClass -__ZN18IODTPlatformExpertC2EPK11OSMetaClass -__ZN18IODTPlatformExpertD0Ev -__ZN18IODTPlatformExpertD2Ev -__ZN18IOMemoryDescriptor10addMappingEP11IOMemoryMap -__ZN18IOMemoryDescriptor10gMetaClassE -__ZN18IOMemoryDescriptor10initializeEv -__ZN18IOMemoryDescriptor10setMappingEP4taskjm -__ZN18IOMemoryDescriptor10superClassE -__ZN18IOMemoryDescriptor10withRangesEP14IOVirtualRangem11IODirectionP4taskb -__ZN18IOMemoryDescriptor10writeBytesEmPKvm -__ZN18IOMemoryDescriptor11makeMappingEPS_P4taskjmmm -__ZN18IOMemoryDescriptor11withAddressEPvm11IODirection -__ZN18IOMemoryDescriptor11withAddressEjm11IODirectionP4task -__ZN18IOMemoryDescriptor11withOptionsEPvmmP4taskmP8IOMapper -__ZN18IOMemoryDescriptor12setPurgeableEmPm -__ZN18IOMemoryDescriptor12withSubRangeEPS_mm11IODirection -__ZN18IOMemoryDescriptor13removeMappingEP11IOMemoryMap -__ZN18IOMemoryDescriptor14initWithRangesEP14IOVirtualRangem11IODirectionP4taskb -__ZN18IOMemoryDescriptor15initWithAddressEPvm11IODirection -__ZN18IOMemoryDescriptor15initWithAddressEjm11IODirectionP4task -__ZN18IOMemoryDescriptor15initWithOptionsEPvmmP4taskmP8IOMapper -__ZN18IOMemoryDescriptor16getSourceSegmentEmPm -__ZN18IOMemoryDescriptor16performOperationEmmm -__ZN18IOMemoryDescriptor17getVirtualSegmentEmPm -__ZN18IOMemoryDescriptor18getPhysicalAddressEv -__ZN18IOMemoryDescriptor18getPhysicalSegmentEmPm -__ZN18IOMemoryDescriptor18getPhysicalSegmentEmPmm -__ZN18IOMemoryDescriptor18withPhysicalRangesEP15IOPhysicalRangem11IODirectionb -__ZN18IOMemoryDescriptor19withPhysicalAddressEmm11IODirection -__ZN18IOMemoryDescriptor20getPhysicalSegment64EmPm -__ZN18IOMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb -__ZN18IOMemoryDescriptor23initWithPhysicalAddressEmm11IODirection -__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor8Ev -__ZN18IOMemoryDescriptor28_RESERVEDIOMemoryDescriptor9Ev -__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor10Ev -__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor11Ev -__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor12Ev -__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor13Ev -__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor14Ev -__ZN18IOMemoryDescriptor29_RESERVEDIOMemoryDescriptor15Ev -__ZN18IOMemoryDescriptor30withPersistentMemoryDescriptorEPS_ -__ZN18IOMemoryDescriptor3mapEP4taskjmmm -__ZN18IOMemoryDescriptor3mapEm -__ZN18IOMemoryDescriptor4freeEv -__ZN18IOMemoryDescriptor5doMapEP7_vm_mapPjmmm -__ZN18IOMemoryDescriptor6getTagEv -__ZN18IOMemoryDescriptor6setTagEm -__ZN18IOMemoryDescriptor7doUnmapEP7_vm_mapjm -__ZN18IOMemoryDescriptor8redirectEP4taskb -__ZN18IOMemoryDescriptor9MetaClassC1Ev -__ZN18IOMemoryDescriptor9MetaClassC2Ev -__ZN18IOMemoryDescriptor9metaClassE -__ZN18IOMemoryDescriptor9readBytesEmPvm -__ZN18IOMemoryDescriptorC1EPK11OSMetaClass -__ZN18IOMemoryDescriptorC2EPK11OSMetaClass -__ZN18IOMemoryDescriptorD0Ev -__ZN18IOMemoryDescriptorD2Ev -__ZN18IORegistryIterator10enterEntryEPK15IORegistryPlane -__ZN18IORegistryIterator10enterEntryEv -__ZN18IORegistryIterator10gMetaClassE -__ZN18IORegistryIterator10iterateAllEv -__ZN18IORegistryIterator10superClassE -__ZN18IORegistryIterator11iterateOverEP15IORegistryEntryPK15IORegistryPlanem -__ZN18IORegistryIterator11iterateOverEPK15IORegistryPlanem -__ZN18IORegistryIterator13getNextObjectEv -__ZN18IORegistryIterator15getCurrentEntryEv -__ZN18IORegistryIterator17getNextObjectFlatEv -__ZN18IORegistryIterator22getNextObjectRecursiveEv -__ZN18IORegistryIterator4freeEv -__ZN18IORegistryIterator5resetEv -__ZN18IORegistryIterator7isValidEv -__ZN18IORegistryIterator9MetaClassC1Ev -__ZN18IORegistryIterator9MetaClassC2Ev -__ZN18IORegistryIterator9exitEntryEv -__ZN18IORegistryIterator9metaClassE -__ZN18IORegistryIteratorC1EPK11OSMetaClass -__ZN18IORegistryIteratorC1Ev -__ZN18IORegistryIteratorC2EPK11OSMetaClass -__ZN18IORegistryIteratorC2Ev -__ZN18IORegistryIteratorD0Ev -__ZN18IORegistryIteratorD2Ev -__ZN18IOTimerEventSource10gMetaClassE -__ZN18IOTimerEventSource10setTimeoutE12UnsignedWide -__ZN18IOTimerEventSource10setTimeoutE13mach_timespec -__ZN18IOTimerEventSource10setTimeoutEmm -__ZN18IOTimerEventSource10superClassE -__ZN18IOTimerEventSource10wakeAtTimeE12UnsignedWide -__ZN18IOTimerEventSource10wakeAtTimeE13mach_timespec -__ZN18IOTimerEventSource10wakeAtTimeEmm -__ZN18IOTimerEventSource11setWorkLoopEP10IOWorkLoop -__ZN18IOTimerEventSource12setTimeoutMSEm -__ZN18IOTimerEventSource12setTimeoutUSEm -__ZN18IOTimerEventSource12wakeAtTimeMSEm -__ZN18IOTimerEventSource12wakeAtTimeUSEm -__ZN18IOTimerEventSource13cancelTimeoutEv -__ZN18IOTimerEventSource14setTimeoutFuncEv -__ZN18IOTimerEventSource15setTimeoutTicksEm -__ZN18IOTimerEventSource15wakeAtTimeTicksEm -__ZN18IOTimerEventSource16timerEventSourceEP8OSObjectPFvS1_PS_E -__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource0Ev -__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource1Ev -__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource2Ev -__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource3Ev -__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource4Ev -__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource5Ev -__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource6Ev -__ZN18IOTimerEventSource28_RESERVEDIOTimerEventSource7Ev -__ZN18IOTimerEventSource4freeEv -__ZN18IOTimerEventSource4initEP8OSObjectPFvS1_PS_E -__ZN18IOTimerEventSource6enableEv -__ZN18IOTimerEventSource7disableEv -__ZN18IOTimerEventSource7timeoutEPv -__ZN18IOTimerEventSource9MetaClassC1Ev -__ZN18IOTimerEventSource9MetaClassC2Ev -__ZN18IOTimerEventSource9metaClassE -__ZN18IOTimerEventSourceC1EPK11OSMetaClass -__ZN18IOTimerEventSourceC1Ev -__ZN18IOTimerEventSourceC2EPK11OSMetaClass -__ZN18IOTimerEventSourceC2Ev -__ZN18IOTimerEventSourceD0Ev -__ZN18IOTimerEventSourceD2Ev -__ZN18IOUserNotification10gMetaClassE -__ZN18IOUserNotification10superClassE -__ZN18IOUserNotification15setNotificationEP10IONotifier -__ZN18IOUserNotification4freeEv -__ZN18IOUserNotification4initEv -__ZN18IOUserNotification5resetEv -__ZN18IOUserNotification7isValidEv -__ZN18IOUserNotification9MetaClassC1Ev -__ZN18IOUserNotification9MetaClassC2Ev -__ZN18IOUserNotification9metaClassE -__ZN18IOUserNotificationC1EPK11OSMetaClass -__ZN18IOUserNotificationC2EPK11OSMetaClass -__ZN18IOUserNotificationD0Ev -__ZN18IOUserNotificationD2Ev -__ZN18_IOServiceNotifier10gMetaClassE -__ZN18_IOServiceNotifier10superClassE -__ZN18_IOServiceNotifier4freeEv -__ZN18_IOServiceNotifier4waitEv -__ZN18_IOServiceNotifier6enableEb -__ZN18_IOServiceNotifier6removeEv -__ZN18_IOServiceNotifier7disableEv -__ZN18_IOServiceNotifier9MetaClassC1Ev -__ZN18_IOServiceNotifier9MetaClassC2Ev -__ZN18_IOServiceNotifier9metaClassE -__ZN18_IOServiceNotifierC1EPK11OSMetaClass -__ZN18_IOServiceNotifierC1Ev -__ZN18_IOServiceNotifierC2EPK11OSMetaClass -__ZN18_IOServiceNotifierC2Ev -__ZN18_IOServiceNotifierD0Ev -__ZN18_IOServiceNotifierD2Ev -__ZN19IOPMPowerSourceList10gMetaClassE -__ZN19IOPMPowerSourceList10initializeEv -__ZN19IOPMPowerSourceList10nextInListEP15IOPMPowerSource -__ZN19IOPMPowerSourceList10superClassE -__ZN19IOPMPowerSourceList11firstInListEv -__ZN19IOPMPowerSourceList13numberOfItemsEv -__ZN19IOPMPowerSourceList14removeFromListEP15IOPMPowerSource -__ZN19IOPMPowerSourceList4freeEv -__ZN19IOPMPowerSourceList9MetaClassC1Ev -__ZN19IOPMPowerSourceList9MetaClassC2Ev -__ZN19IOPMPowerSourceList9addToListEP15IOPMPowerSource -__ZN19IOPMPowerSourceList9metaClassE -__ZN19IOPMPowerSourceListC1EPK11OSMetaClass -__ZN19IOPMPowerSourceListC1Ev -__ZN19IOPMPowerSourceListC2EPK11OSMetaClass -__ZN19IOPMPowerSourceListC2Ev -__ZN19IOPMPowerSourceListD0Ev -__ZN19IOPMPowerSourceListD2Ev -__ZN20IOLittleMemoryCursor10gMetaClassE -__ZN20IOLittleMemoryCursor10superClassE -__ZN20IOLittleMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm -__ZN20IOLittleMemoryCursor17withSpecificationEmmm -__ZN20IOLittleMemoryCursor21initWithSpecificationEmmm -__ZN20IOLittleMemoryCursor9MetaClassC1Ev -__ZN20IOLittleMemoryCursor9MetaClassC2Ev -__ZN20IOLittleMemoryCursor9metaClassE -__ZN20IOLittleMemoryCursorC1EPK11OSMetaClass -__ZN20IOLittleMemoryCursorC1Ev -__ZN20IOLittleMemoryCursorC2EPK11OSMetaClass -__ZN20IOLittleMemoryCursorC2Ev -__ZN20IOLittleMemoryCursorD0Ev -__ZN20IOLittleMemoryCursorD2Ev -__ZN20OSCollectionIterator10gMetaClassE -__ZN20OSCollectionIterator10superClassE -__ZN20OSCollectionIterator13getNextObjectEv -__ZN20OSCollectionIterator14withCollectionEPK12OSCollection -__ZN20OSCollectionIterator18initWithCollectionEPK12OSCollection -__ZN20OSCollectionIterator4freeEv -__ZN20OSCollectionIterator5resetEv -__ZN20OSCollectionIterator7isValidEv -__ZN20OSCollectionIterator9MetaClassC1Ev -__ZN20OSCollectionIterator9MetaClassC2Ev -__ZN20OSCollectionIterator9metaClassE -__ZN20OSCollectionIteratorC1EPK11OSMetaClass -__ZN20OSCollectionIteratorC1Ev -__ZN20OSCollectionIteratorC2EPK11OSMetaClass -__ZN20OSCollectionIteratorC2Ev -__ZN20OSCollectionIteratorD0Ev -__ZN20OSCollectionIteratorD2Ev -__ZN20RootDomainUserClient10gMetaClassE -__ZN20RootDomainUserClient10superClassE -__ZN20RootDomainUserClient11clientCloseEv -__ZN20RootDomainUserClient15setPreventativeEmm -__ZN20RootDomainUserClient26getTargetAndMethodForIndexEPP9IOServicem -__ZN20RootDomainUserClient5startEP9IOService -__ZN20RootDomainUserClient9MetaClassC1Ev -__ZN20RootDomainUserClient9MetaClassC2Ev -__ZN20RootDomainUserClient9metaClassE -__ZN20RootDomainUserClientC1EPK11OSMetaClass -__ZN20RootDomainUserClientC1Ev -__ZN20RootDomainUserClientC2EPK11OSMetaClass -__ZN20RootDomainUserClientC2Ev -__ZN20RootDomainUserClientD0Ev -__ZN20RootDomainUserClientD2Ev -__ZN21IOInterruptController10gMetaClassE -__ZN21IOInterruptController10initVectorElP17IOInterruptVector -__ZN21IOInterruptController10superClassE -__ZN21IOInterruptController11causeVectorElP17IOInterruptVector -__ZN21IOInterruptController12enableVectorElP17IOInterruptVector -__ZN21IOInterruptController13getVectorTypeElP17IOInterruptVector -__ZN21IOInterruptController14causeInterruptEP9IOServicei -__ZN21IOInterruptController15enableInterruptEP9IOServicei -__ZN21IOInterruptController15handleInterruptEPvP9IOServicei -__ZN21IOInterruptController16disableInterruptEP9IOServicei -__ZN21IOInterruptController16getInterruptTypeEP9IOServiceiPi -__ZN21IOInterruptController17disableVectorHardElP17IOInterruptVector -__ZN21IOInterruptController17registerInterruptEP9IOServiceiPvPFvS2_S2_S2_iES2_ -__ZN21IOInterruptController17vectorCanBeSharedElP17IOInterruptVector -__ZN21IOInterruptController19unregisterInterruptEP9IOServicei -__ZN21IOInterruptController26getInterruptHandlerAddressEv -__ZN21IOInterruptController31_RESERVEDIOInterruptController0Ev -__ZN21IOInterruptController31_RESERVEDIOInterruptController1Ev -__ZN21IOInterruptController31_RESERVEDIOInterruptController2Ev -__ZN21IOInterruptController31_RESERVEDIOInterruptController3Ev -__ZN21IOInterruptController31_RESERVEDIOInterruptController4Ev -__ZN21IOInterruptController31_RESERVEDIOInterruptController5Ev -__ZN21IOInterruptController9MetaClassC1Ev -__ZN21IOInterruptController9MetaClassC2Ev -__ZN21IOInterruptController9metaClassE -__ZN21IOInterruptControllerC1EPK11OSMetaClass -__ZN21IOInterruptControllerC2EPK11OSMetaClass -__ZN21IOInterruptControllerD0Ev -__ZN21IOInterruptControllerD2Ev -__ZN21IONaturalMemoryCursor10gMetaClassE -__ZN21IONaturalMemoryCursor10superClassE -__ZN21IONaturalMemoryCursor13outputSegmentEN14IOMemoryCursor15PhysicalSegmentEPvm -__ZN21IONaturalMemoryCursor17withSpecificationEmmm -__ZN21IONaturalMemoryCursor21initWithSpecificationEmmm -__ZN21IONaturalMemoryCursor9MetaClassC1Ev -__ZN21IONaturalMemoryCursor9MetaClassC2Ev -__ZN21IONaturalMemoryCursor9metaClassE -__ZN21IONaturalMemoryCursorC1EPK11OSMetaClass -__ZN21IONaturalMemoryCursorC1Ev -__ZN21IONaturalMemoryCursorC2EPK11OSMetaClass -__ZN21IONaturalMemoryCursorC2Ev -__ZN21IONaturalMemoryCursorD0Ev -__ZN21IONaturalMemoryCursorD2Ev -__ZN21IOSubMemoryDescriptor10gMetaClassE -__ZN21IOSubMemoryDescriptor10superClassE -__ZN21IOSubMemoryDescriptor11makeMappingEP18IOMemoryDescriptorP4taskjmmm -__ZN21IOSubMemoryDescriptor12initSubRangeEP18IOMemoryDescriptormm11IODirection -__ZN21IOSubMemoryDescriptor12setPurgeableEmPm -__ZN21IOSubMemoryDescriptor18getPhysicalSegmentEmPmm -__ZN21IOSubMemoryDescriptor4freeEv -__ZN21IOSubMemoryDescriptor7prepareE11IODirection -__ZN21IOSubMemoryDescriptor8completeE11IODirection -__ZN21IOSubMemoryDescriptor8redirectEP4taskb -__ZN21IOSubMemoryDescriptor9MetaClassC1Ev -__ZN21IOSubMemoryDescriptor9MetaClassC2Ev -__ZN21IOSubMemoryDescriptor9metaClassE -__ZN21IOSubMemoryDescriptorC1EPK11OSMetaClass -__ZN21IOSubMemoryDescriptorC1Ev -__ZN21IOSubMemoryDescriptorC2EPK11OSMetaClass -__ZN21IOSubMemoryDescriptorC2Ev -__ZN21IOSubMemoryDescriptorD0Ev -__ZN21IOSubMemoryDescriptorD2Ev -__ZN22IOInterruptEventSource10gMetaClassE -__ZN22IOInterruptEventSource10superClassE -__ZN22IOInterruptEventSource11setWorkLoopEP10IOWorkLoop -__ZN22IOInterruptEventSource12checkForWorkEv -__ZN22IOInterruptEventSource17interruptOccurredEPvP9IOServicei -__ZN22IOInterruptEventSource20interruptEventSourceEP8OSObjectPFvS1_PS_iEP9IOServicei -__ZN22IOInterruptEventSource23normalInterruptOccurredEPvP9IOServicei -__ZN22IOInterruptEventSource24disableInterruptOccurredEPvP9IOServicei -__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource0Ev -__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource1Ev -__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource2Ev -__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource3Ev -__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource4Ev -__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource5Ev -__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource6Ev -__ZN22IOInterruptEventSource32_RESERVEDIOInterruptEventSource7Ev -__ZN22IOInterruptEventSource4freeEv -__ZN22IOInterruptEventSource4initEP8OSObjectPFvS1_PS_iEP9IOServicei -__ZN22IOInterruptEventSource6enableEv -__ZN22IOInterruptEventSource7disableEv -__ZN22IOInterruptEventSource9MetaClassC1Ev -__ZN22IOInterruptEventSource9MetaClassC2Ev -__ZN22IOInterruptEventSource9metaClassE -__ZN22IOInterruptEventSourceC1EPK11OSMetaClass -__ZN22IOInterruptEventSourceC1Ev -__ZN22IOInterruptEventSourceC2EPK11OSMetaClass -__ZN22IOInterruptEventSourceC2Ev -__ZN22IOInterruptEventSourceD0Ev -__ZN22IOInterruptEventSourceD2Ev -__ZN22IOPlatformExpertDevice10gMetaClassE -__ZN22IOPlatformExpertDevice10superClassE -__ZN22IOPlatformExpertDevice12initWithArgsEPvS0_S0_S0_ -__ZN22IOPlatformExpertDevice13setPropertiesEP8OSObject -__ZN22IOPlatformExpertDevice32_RESERVEDIOPlatformExpertDevice0Ev -__ZN22IOPlatformExpertDevice32_RESERVEDIOPlatformExpertDevice1Ev -__ZN22IOPlatformExpertDevice32_RESERVEDIOPlatformExpertDevice2Ev -__ZN22IOPlatformExpertDevice32_RESERVEDIOPlatformExpertDevice3Ev -__ZN22IOPlatformExpertDevice4freeEv -__ZN22IOPlatformExpertDevice9MetaClassC1Ev -__ZN22IOPlatformExpertDevice9MetaClassC2Ev -__ZN22IOPlatformExpertDevice9metaClassE -__ZN22IOPlatformExpertDeviceC1EPK11OSMetaClass -__ZN22IOPlatformExpertDeviceC1Ev -__ZN22IOPlatformExpertDeviceC2EPK11OSMetaClass -__ZN22IOPlatformExpertDeviceC2Ev -__ZN22IOPlatformExpertDeviceD0Ev -__ZN22IOPlatformExpertDeviceD2Ev -__ZN22_IOOpenServiceIterator10gMetaClassE -__ZN22_IOOpenServiceIterator10superClassE -__ZN22_IOOpenServiceIterator13getNextObjectEv -__ZN22_IOOpenServiceIterator4freeEv -__ZN22_IOOpenServiceIterator5resetEv -__ZN22_IOOpenServiceIterator7isValidEv -__ZN22_IOOpenServiceIterator8iteratorEP10OSIteratorPK9IOServiceS4_ -__ZN22_IOOpenServiceIterator9MetaClassC1Ev -__ZN22_IOOpenServiceIterator9MetaClassC2Ev -__ZN22_IOOpenServiceIterator9metaClassE -__ZN22_IOOpenServiceIteratorC1EPK11OSMetaClass -__ZN22_IOOpenServiceIteratorC1Ev -__ZN22_IOOpenServiceIteratorC2EPK11OSMetaClass -__ZN22_IOOpenServiceIteratorC2Ev -__ZN22_IOOpenServiceIteratorD0Ev -__ZN22_IOOpenServiceIteratorD2Ev -__ZN23IOMultiMemoryDescriptor10gMetaClassE -__ZN23IOMultiMemoryDescriptor10superClassE -__ZN23IOMultiMemoryDescriptor15withDescriptorsEPP18IOMemoryDescriptorm11IODirectionb -__ZN23IOMultiMemoryDescriptor18getPhysicalSegmentEmPmm -__ZN23IOMultiMemoryDescriptor19initWithDescriptorsEPP18IOMemoryDescriptorm11IODirectionb -__ZN23IOMultiMemoryDescriptor4freeEv -__ZN23IOMultiMemoryDescriptor7prepareE11IODirection -__ZN23IOMultiMemoryDescriptor8completeE11IODirection -__ZN23IOMultiMemoryDescriptor9MetaClassC1Ev -__ZN23IOMultiMemoryDescriptor9MetaClassC2Ev -__ZN23IOMultiMemoryDescriptor9metaClassE -__ZN23IOMultiMemoryDescriptorC1EPK11OSMetaClass -__ZN23IOMultiMemoryDescriptorC1Ev -__ZN23IOMultiMemoryDescriptorC2EPK11OSMetaClass -__ZN23IOMultiMemoryDescriptorC2Ev -__ZN23IOMultiMemoryDescriptorD0Ev -__ZN23IOMultiMemoryDescriptorD2Ev -__ZN24IOBufferMemoryDescriptor10gMetaClassE -__ZN24IOBufferMemoryDescriptor10superClassE -__ZN24IOBufferMemoryDescriptor11appendBytesEPKvj -__ZN24IOBufferMemoryDescriptor11withOptionsEmjj -__ZN24IOBufferMemoryDescriptor12setDirectionE11IODirection -__ZN24IOBufferMemoryDescriptor12withCapacityEj11IODirectionb -__ZN24IOBufferMemoryDescriptor13initWithBytesEPKvj11IODirectionb -__ZN24IOBufferMemoryDescriptor14getBytesNoCopyEjj -__ZN24IOBufferMemoryDescriptor14getBytesNoCopyEv -__ZN24IOBufferMemoryDescriptor15initWithOptionsEmjj -__ZN24IOBufferMemoryDescriptor15initWithOptionsEmjjP4task -__ZN24IOBufferMemoryDescriptor17getVirtualSegmentEmPm -__ZN24IOBufferMemoryDescriptor17inTaskWithOptionsEP4taskmjj -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor2Ev -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor3Ev -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor4Ev -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor5Ev -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor6Ev -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor7Ev -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor8Ev -__ZN24IOBufferMemoryDescriptor34_RESERVEDIOBufferMemoryDescriptor9Ev -__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor10Ev -__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor11Ev -__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor12Ev -__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor13Ev -__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor14Ev -__ZN24IOBufferMemoryDescriptor35_RESERVEDIOBufferMemoryDescriptor15Ev -__ZN24IOBufferMemoryDescriptor4freeEv -__ZN24IOBufferMemoryDescriptor9MetaClassC1Ev -__ZN24IOBufferMemoryDescriptor9MetaClassC2Ev -__ZN24IOBufferMemoryDescriptor9metaClassE -__ZN24IOBufferMemoryDescriptor9setLengthEj -__ZN24IOBufferMemoryDescriptor9withBytesEPKvj11IODirectionb -__ZN24IOBufferMemoryDescriptorC1EPK11OSMetaClass -__ZN24IOBufferMemoryDescriptorC1Ev -__ZN24IOBufferMemoryDescriptorC2EPK11OSMetaClass -__ZN24IOBufferMemoryDescriptorC2Ev -__ZN24IOBufferMemoryDescriptorD0Ev -__ZN24IOBufferMemoryDescriptorD2Ev -__ZN24IOCPUInterruptController10gMetaClassE -__ZN24IOCPUInterruptController10superClassE -__ZN24IOCPUInterruptController14causeInterruptEP9IOServicei -__ZN24IOCPUInterruptController15enableInterruptEP9IOServicei -__ZN24IOCPUInterruptController15handleInterruptEPvP9IOServicei -__ZN24IOCPUInterruptController16disableInterruptEP9IOServicei -__ZN24IOCPUInterruptController16getInterruptTypeEP9IOServiceiPi -__ZN24IOCPUInterruptController17registerInterruptEP9IOServiceiPvPFvS2_S2_S2_iES2_ -__ZN24IOCPUInterruptController18enableCPUInterruptEP5IOCPU -__ZN24IOCPUInterruptController25setCPUInterruptPropertiesEP9IOService -__ZN24IOCPUInterruptController26initCPUInterruptControllerEi -__ZN24IOCPUInterruptController30registerCPUInterruptControllerEv -__ZN24IOCPUInterruptController34_RESERVEDIOCPUInterruptController0Ev -__ZN24IOCPUInterruptController34_RESERVEDIOCPUInterruptController1Ev -__ZN24IOCPUInterruptController34_RESERVEDIOCPUInterruptController2Ev -__ZN24IOCPUInterruptController34_RESERVEDIOCPUInterruptController3Ev -__ZN24IOCPUInterruptController34_RESERVEDIOCPUInterruptController4Ev -__ZN24IOCPUInterruptController34_RESERVEDIOCPUInterruptController5Ev -__ZN24IOCPUInterruptController9MetaClassC1Ev -__ZN24IOCPUInterruptController9MetaClassC2Ev -__ZN24IOCPUInterruptController9metaClassE -__ZN24IOCPUInterruptControllerC1EPK11OSMetaClass -__ZN24IOCPUInterruptControllerC1Ev -__ZN24IOCPUInterruptControllerC2EPK11OSMetaClass -__ZN24IOCPUInterruptControllerC2Ev -__ZN24IOCPUInterruptControllerD0Ev -__ZN24IOCPUInterruptControllerD2Ev -__ZN25IOGeneralMemoryDescriptor10gMetaClassE -__ZN25IOGeneralMemoryDescriptor10superClassE -__ZN25IOGeneralMemoryDescriptor11setPositionEm -__ZN25IOGeneralMemoryDescriptor11wireVirtualE11IODirection -__ZN25IOGeneralMemoryDescriptor13mapIntoKernelEj -__ZN25IOGeneralMemoryDescriptor14initWithRangesEP14IOVirtualRangem11IODirectionP4taskb -__ZN25IOGeneralMemoryDescriptor15initWithAddressEPvm11IODirection -__ZN25IOGeneralMemoryDescriptor15initWithAddressEjm11IODirectionP4task -__ZN25IOGeneralMemoryDescriptor15initWithOptionsEPvmmP4taskmP8IOMapper -__ZN25IOGeneralMemoryDescriptor15unmapFromKernelEv -__ZN25IOGeneralMemoryDescriptor16getSourceSegmentEmPm -__ZN25IOGeneralMemoryDescriptor17getVirtualSegmentEmPm -__ZN25IOGeneralMemoryDescriptor18getPhysicalSegmentEmPm -__ZN25IOGeneralMemoryDescriptor18getPhysicalSegmentEmPmm -__ZN25IOGeneralMemoryDescriptor20getPhysicalSegment64EmPm -__ZN25IOGeneralMemoryDescriptor22initWithPhysicalRangesEP15IOPhysicalRangem11IODirectionb -__ZN25IOGeneralMemoryDescriptor23initWithPhysicalAddressEmm11IODirection -__ZN25IOGeneralMemoryDescriptor4freeEv -__ZN25IOGeneralMemoryDescriptor5doMapEP7_vm_mapPjmmm -__ZN25IOGeneralMemoryDescriptor7doUnmapEP7_vm_mapjm -__ZN25IOGeneralMemoryDescriptor7prepareE11IODirection -__ZN25IOGeneralMemoryDescriptor8completeE11IODirection -__ZN25IOGeneralMemoryDescriptor9MetaClassC1Ev -__ZN25IOGeneralMemoryDescriptor9MetaClassC2Ev -__ZN25IOGeneralMemoryDescriptor9metaClassE -__ZN25IOGeneralMemoryDescriptorC1EPK11OSMetaClass -__ZN25IOGeneralMemoryDescriptorC1Ev -__ZN25IOGeneralMemoryDescriptorC2EPK11OSMetaClass -__ZN25IOGeneralMemoryDescriptorC2Ev -__ZN25IOGeneralMemoryDescriptorD0Ev -__ZN25IOGeneralMemoryDescriptorD2Ev -__ZN26_IOServiceInterestNotifier10gMetaClassE -__ZN26_IOServiceInterestNotifier10superClassE -__ZN26_IOServiceInterestNotifier4freeEv -__ZN26_IOServiceInterestNotifier4waitEv -__ZN26_IOServiceInterestNotifier6enableEb -__ZN26_IOServiceInterestNotifier6removeEv -__ZN26_IOServiceInterestNotifier7disableEv -__ZN26_IOServiceInterestNotifier9MetaClassC1Ev -__ZN26_IOServiceInterestNotifier9MetaClassC2Ev -__ZN26_IOServiceInterestNotifier9metaClassE -__ZN26_IOServiceInterestNotifierC1EPK11OSMetaClass -__ZN26_IOServiceInterestNotifierC1Ev -__ZN26_IOServiceInterestNotifierC2EPK11OSMetaClass -__ZN26_IOServiceInterestNotifierC2Ev -__ZN26_IOServiceInterestNotifierD0Ev -__ZN26_IOServiceInterestNotifierD2Ev -__ZN27IOSharedInterruptController10gMetaClassE -__ZN27IOSharedInterruptController10superClassE -__ZN27IOSharedInterruptController15enableInterruptEP9IOServicei -__ZN27IOSharedInterruptController15handleInterruptEPvP9IOServicei -__ZN27IOSharedInterruptController16disableInterruptEP9IOServicei -__ZN27IOSharedInterruptController16getInterruptTypeEP9IOServiceiPi -__ZN27IOSharedInterruptController17registerInterruptEP9IOServiceiPvPFvS2_S2_S2_iES2_ -__ZN27IOSharedInterruptController19unregisterInterruptEP9IOServicei -__ZN27IOSharedInterruptController23initInterruptControllerEP21IOInterruptControllerP6OSData -__ZN27IOSharedInterruptController26getInterruptHandlerAddressEv -__ZN27IOSharedInterruptController37_RESERVEDIOSharedInterruptController0Ev -__ZN27IOSharedInterruptController37_RESERVEDIOSharedInterruptController1Ev -__ZN27IOSharedInterruptController37_RESERVEDIOSharedInterruptController2Ev -__ZN27IOSharedInterruptController37_RESERVEDIOSharedInterruptController3Ev -__ZN27IOSharedInterruptController9MetaClassC1Ev -__ZN27IOSharedInterruptController9MetaClassC2Ev -__ZN27IOSharedInterruptController9metaClassE -__ZN27IOSharedInterruptControllerC1EPK11OSMetaClass -__ZN27IOSharedInterruptControllerC1Ev -__ZN27IOSharedInterruptControllerC2EPK11OSMetaClass -__ZN27IOSharedInterruptControllerC2Ev -__ZN27IOSharedInterruptControllerD0Ev -__ZN27IOSharedInterruptControllerD2Ev -__ZN28IOFilterInterruptEventSource10gMetaClassE -__ZN28IOFilterInterruptEventSource10superClassE -__ZN28IOFilterInterruptEventSource15signalInterruptEv -__ZN28IOFilterInterruptEventSource20interruptEventSourceEP8OSObjectPFvS1_P22IOInterruptEventSourceiEP9IOServicei -__ZN28IOFilterInterruptEventSource23normalInterruptOccurredEPvP9IOServicei -__ZN28IOFilterInterruptEventSource24disableInterruptOccurredEPvP9IOServicei -__ZN28IOFilterInterruptEventSource26filterInterruptEventSourceEP8OSObjectPFvS1_P22IOInterruptEventSourceiEPFbS1_PS_EP9IOServicei -__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource0Ev -__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource1Ev -__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource2Ev -__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource3Ev -__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource4Ev -__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource5Ev -__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource6Ev -__ZN28IOFilterInterruptEventSource38_RESERVEDIOFilterInterruptEventSource7Ev -__ZN28IOFilterInterruptEventSource4initEP8OSObjectPFvS1_P22IOInterruptEventSourceiEP9IOServicei -__ZN28IOFilterInterruptEventSource4initEP8OSObjectPFvS1_P22IOInterruptEventSourceiEPFbS1_PS_EP9IOServicei -__ZN28IOFilterInterruptEventSource9MetaClassC1Ev -__ZN28IOFilterInterruptEventSource9MetaClassC2Ev -__ZN28IOFilterInterruptEventSource9metaClassE -__ZN28IOFilterInterruptEventSourceC1EPK11OSMetaClass -__ZN28IOFilterInterruptEventSourceC1Ev -__ZN28IOFilterInterruptEventSourceC2EPK11OSMetaClass -__ZN28IOFilterInterruptEventSourceC2Ev -__ZN28IOFilterInterruptEventSourceD0Ev -__ZN28IOFilterInterruptEventSourceD2Ev -__ZN29IOInterleavedMemoryDescriptor18getPhysicalSegmentEmPmm -__ZN5IOCPU10gMetaClassE -__ZN5IOCPU10superClassE -__ZN5IOCPU11getCPUGroupEv -__ZN5IOCPU11getCPUStateEv -__ZN5IOCPU11setCPUStateEm -__ZN5IOCPU11setPropertyEPK8OSSymbolP8OSObject -__ZN5IOCPU12getCPUNumberEv -__ZN5IOCPU12setCPUNumberEm -__ZN5IOCPU13setPropertiesEP8OSObject -__ZN5IOCPU15_RESERVEDIOCPU0Ev -__ZN5IOCPU15_RESERVEDIOCPU1Ev -__ZN5IOCPU15_RESERVEDIOCPU2Ev -__ZN5IOCPU15_RESERVEDIOCPU3Ev -__ZN5IOCPU15_RESERVEDIOCPU4Ev -__ZN5IOCPU15_RESERVEDIOCPU5Ev -__ZN5IOCPU15_RESERVEDIOCPU6Ev -__ZN5IOCPU15_RESERVEDIOCPU7Ev -__ZN5IOCPU15getCPUGroupSizeEv -__ZN5IOCPU16getMachProcessorEv -__ZN5IOCPU17enableCPUTimeBaseEb -__ZN5IOCPU5startEP9IOService -__ZN5IOCPU8initCPUsEv -__ZN5IOCPU9MetaClassC1Ev -__ZN5IOCPU9MetaClassC2Ev -__ZN5IOCPU9metaClassE -__ZN5IOCPU9signalCPUEPS_ -__ZN5IOCPUC1EPK11OSMetaClass -__ZN5IOCPUC2EPK11OSMetaClass -__ZN5IOCPUD0Ev -__ZN5IOCPUD2Ev -__ZN5OSSet10gMetaClassE -__ZN5OSSet10setOptionsEjjPv -__ZN5OSSet10superClassE -__ZN5OSSet11initWithSetEPKS_j -__ZN5OSSet11withObjectsEPPK8OSObjectjj -__ZN5OSSet12removeObjectEPK15OSMetaClassBase -__ZN5OSSet12withCapacityEj -__ZN5OSSet13initWithArrayEPK7OSArrayj -__ZN5OSSet14copyCollectionEP12OSDictionary -__ZN5OSSet14ensureCapacityEj -__ZN5OSSet15_RESERVEDOSSet0Ev -__ZN5OSSet15_RESERVEDOSSet1Ev -__ZN5OSSet15_RESERVEDOSSet2Ev -__ZN5OSSet15_RESERVEDOSSet3Ev -__ZN5OSSet15_RESERVEDOSSet4Ev -__ZN5OSSet15_RESERVEDOSSet5Ev -__ZN5OSSet15_RESERVEDOSSet6Ev -__ZN5OSSet15_RESERVEDOSSet7Ev -__ZN5OSSet15flushCollectionEv -__ZN5OSSet15initWithObjectsEPPK8OSObjectjj -__ZN5OSSet16initWithCapacityEj -__ZN5OSSet20setCapacityIncrementEj -__ZN5OSSet4freeEv -__ZN5OSSet5mergeEPK7OSArray -__ZN5OSSet5mergeEPKS_ -__ZN5OSSet7withSetEPKS_j -__ZN5OSSet9MetaClassC1Ev -__ZN5OSSet9MetaClassC2Ev -__ZN5OSSet9metaClassE -__ZN5OSSet9setObjectEPK15OSMetaClassBase -__ZN5OSSet9withArrayEPK7OSArrayj -__ZN5OSSetC1EPK11OSMetaClass -__ZN5OSSetC1Ev -__ZN5OSSetC2EPK11OSMetaClass -__ZN5OSSetC2Ev -__ZN5OSSetD0Ev -__ZN5OSSetD2Ev -__ZN6OSData10appendByteEhj -__ZN6OSData10gMetaClassE -__ZN6OSData10superClassE -__ZN6OSData11appendBytesEPKS_ -__ZN6OSData11appendBytesEPKvj -__ZN6OSData12initWithDataEPKS_ -__ZN6OSData12initWithDataEPKS_jj -__ZN6OSData12withCapacityEj -__ZN6OSData13initWithBytesEPKvj -__ZN6OSData14ensureCapacityEj -__ZN6OSData15withBytesNoCopyEPvj -__ZN6OSData16_RESERVEDOSData1Ev -__ZN6OSData16_RESERVEDOSData2Ev -__ZN6OSData16_RESERVEDOSData3Ev -__ZN6OSData16_RESERVEDOSData4Ev -__ZN6OSData16_RESERVEDOSData5Ev -__ZN6OSData16_RESERVEDOSData6Ev -__ZN6OSData16_RESERVEDOSData7Ev -__ZN6OSData16initWithCapacityEj -__ZN6OSData18setDeallocFunctionEPFvPvjE -__ZN6OSData19initWithBytesNoCopyEPvj -__ZN6OSData20setCapacityIncrementEj -__ZN6OSData4freeEv -__ZN6OSData8withDataEPKS_ -__ZN6OSData8withDataEPKS_jj -__ZN6OSData9MetaClassC1Ev -__ZN6OSData9MetaClassC2Ev -__ZN6OSData9metaClassE -__ZN6OSData9withBytesEPKvj -__ZN6OSDataC1EPK11OSMetaClass -__ZN6OSDataC1Ev -__ZN6OSDataC2EPK11OSMetaClass -__ZN6OSDataC2Ev -__ZN6OSDataD0Ev -__ZN6OSDataD2Ev -__ZN7OSArray10gMetaClassE -__ZN7OSArray10setOptionsEjjPv -__ZN7OSArray10superClassE -__ZN7OSArray11withObjectsEPPK8OSObjectjj -__ZN7OSArray12removeObjectEj -__ZN7OSArray12withCapacityEj -__ZN7OSArray13initWithArrayEPKS_j -__ZN7OSArray13replaceObjectEjPK15OSMetaClassBase -__ZN7OSArray14copyCollectionEP12OSDictionary -__ZN7OSArray14ensureCapacityEj -__ZN7OSArray15flushCollectionEv -__ZN7OSArray15initWithObjectsEPPK8OSObjectjj -__ZN7OSArray16initWithCapacityEj -__ZN7OSArray17_RESERVEDOSArray0Ev -__ZN7OSArray17_RESERVEDOSArray1Ev -__ZN7OSArray17_RESERVEDOSArray2Ev -__ZN7OSArray17_RESERVEDOSArray3Ev -__ZN7OSArray17_RESERVEDOSArray4Ev -__ZN7OSArray17_RESERVEDOSArray5Ev -__ZN7OSArray17_RESERVEDOSArray6Ev -__ZN7OSArray17_RESERVEDOSArray7Ev -__ZN7OSArray20setCapacityIncrementEj -__ZN7OSArray4freeEv -__ZN7OSArray5mergeEPKS_ -__ZN7OSArray9MetaClassC1Ev -__ZN7OSArray9MetaClassC2Ev -__ZN7OSArray9metaClassE -__ZN7OSArray9setObjectEPK15OSMetaClassBase -__ZN7OSArray9setObjectEjPK15OSMetaClassBase -__ZN7OSArray9withArrayEPKS_j -__ZN7OSArrayC1EPK11OSMetaClass -__ZN7OSArrayC1Ev -__ZN7OSArrayC2EPK11OSMetaClass -__ZN7OSArrayC2Ev -__ZN7OSArrayD0Ev -__ZN7OSArrayD2Ev -__ZN8IOMapper10allocTableEm -__ZN8IOMapper10gMetaClassE -__ZN8IOMapper10iovmInsertEjmP13upl_page_infom -__ZN8IOMapper10iovmInsertEjmPjm -__ZN8IOMapper10superClassE -__ZN8IOMapper11NewARTTableEmPPvPj -__ZN8IOMapper12FreeARTTableEP6OSDatam -__ZN8IOMapper17setMapperRequiredEb -__ZN8IOMapper18_RESERVEDIOMapper4Ev -__ZN8IOMapper18_RESERVEDIOMapper5Ev -__ZN8IOMapper18_RESERVEDIOMapper6Ev -__ZN8IOMapper18_RESERVEDIOMapper7Ev -__ZN8IOMapper18_RESERVEDIOMapper8Ev -__ZN8IOMapper18_RESERVEDIOMapper9Ev -__ZN8IOMapper19_RESERVEDIOMapper10Ev -__ZN8IOMapper19_RESERVEDIOMapper11Ev -__ZN8IOMapper19_RESERVEDIOMapper12Ev -__ZN8IOMapper19_RESERVEDIOMapper13Ev -__ZN8IOMapper19_RESERVEDIOMapper14Ev -__ZN8IOMapper19_RESERVEDIOMapper15Ev -__ZN8IOMapper19waitForSystemMapperEv -__ZN8IOMapper4freeEv -__ZN8IOMapper5startEP9IOService -__ZN8IOMapper7gSystemE -__ZN8IOMapper9MetaClassC1Ev -__ZN8IOMapper9MetaClassC2Ev -__ZN8IOMapper9metaClassE -__ZN8IOMapperC1EPK11OSMetaClass -__ZN8IOMapperC2EPK11OSMetaClass -__ZN8IOMapperD0Ev -__ZN8IOMapperD2Ev -__ZN8IOPMprot10gMetaClassE -__ZN8IOPMprot10superClassE -__ZN8IOPMprot9MetaClassC1Ev -__ZN8IOPMprot9MetaClassC2Ev -__ZN8IOPMprot9metaClassE -__ZN8IOPMprotC1EPK11OSMetaClass -__ZN8IOPMprotC1Ev -__ZN8IOPMprotC2EPK11OSMetaClass -__ZN8IOPMprotC2Ev -__ZN8IOPMprotD0Ev -__ZN8IOPMprotD2Ev -__ZN8IOSyncer10gMetaClassE -__ZN8IOSyncer10superClassE -__ZN8IOSyncer13privateSignalEv -__ZN8IOSyncer4freeEv -__ZN8IOSyncer4initEb -__ZN8IOSyncer4waitEb -__ZN8IOSyncer6createEb -__ZN8IOSyncer6reinitEv -__ZN8IOSyncer6signalEib -__ZN8IOSyncer9MetaClassC1Ev -__ZN8IOSyncer9MetaClassC2Ev -__ZN8IOSyncer9metaClassE -__ZN8IOSyncerC1EPK11OSMetaClass -__ZN8IOSyncerC1Ev -__ZN8IOSyncerC2EPK11OSMetaClass -__ZN8IOSyncerC2Ev -__ZN8IOSyncerD0Ev -__ZN8IOSyncerD2Ev -__ZN8OSNumber10gMetaClassE -__ZN8OSNumber10superClassE -__ZN8OSNumber10withNumberEPKcj -__ZN8OSNumber10withNumberEyj -__ZN8OSNumber18_RESERVEDOSNumber0Ev -__ZN8OSNumber18_RESERVEDOSNumber1Ev -__ZN8OSNumber18_RESERVEDOSNumber2Ev -__ZN8OSNumber18_RESERVEDOSNumber3Ev -__ZN8OSNumber18_RESERVEDOSNumber4Ev -__ZN8OSNumber18_RESERVEDOSNumber5Ev -__ZN8OSNumber18_RESERVEDOSNumber6Ev -__ZN8OSNumber18_RESERVEDOSNumber7Ev -__ZN8OSNumber4freeEv -__ZN8OSNumber4initEPKcj -__ZN8OSNumber4initEyj -__ZN8OSNumber8addValueEx -__ZN8OSNumber8setValueEy -__ZN8OSNumber9MetaClassC1Ev -__ZN8OSNumber9MetaClassC2Ev -__ZN8OSNumber9metaClassE -__ZN8OSNumberC1EPK11OSMetaClass -__ZN8OSNumberC1Ev -__ZN8OSNumberC2EPK11OSMetaClass -__ZN8OSNumberC2Ev -__ZN8OSNumberD0Ev -__ZN8OSNumberD2Ev -__ZN8OSObject10gMetaClassE -__ZN8OSObject10superClassE -__ZN8OSObject18_RESERVEDOSObject0Ev -__ZN8OSObject18_RESERVEDOSObject1Ev -__ZN8OSObject18_RESERVEDOSObject2Ev -__ZN8OSObject18_RESERVEDOSObject3Ev -__ZN8OSObject18_RESERVEDOSObject4Ev -__ZN8OSObject18_RESERVEDOSObject5Ev -__ZN8OSObject18_RESERVEDOSObject6Ev -__ZN8OSObject18_RESERVEDOSObject7Ev -__ZN8OSObject18_RESERVEDOSObject8Ev -__ZN8OSObject18_RESERVEDOSObject9Ev -__ZN8OSObject19_RESERVEDOSObject10Ev -__ZN8OSObject19_RESERVEDOSObject11Ev -__ZN8OSObject19_RESERVEDOSObject12Ev -__ZN8OSObject19_RESERVEDOSObject13Ev -__ZN8OSObject19_RESERVEDOSObject14Ev -__ZN8OSObject19_RESERVEDOSObject15Ev -__ZN8OSObject4freeEv -__ZN8OSObject4initEv -__ZN8OSObject9MetaClassC1Ev -__ZN8OSObject9MetaClassC2Ev -__ZN8OSObject9metaClassE -__ZN8OSObjectC1EPK11OSMetaClass -__ZN8OSObjectC1Ev -__ZN8OSObjectC2EPK11OSMetaClass -__ZN8OSObjectC2Ev -__ZN8OSObjectD0Ev -__ZN8OSObjectD2Ev -__ZN8OSObjectdlEPvm -__ZN8OSObjectnwEm -__ZN8OSString10gMetaClassE -__ZN8OSString10superClassE -__ZN8OSString10withStringEPKS_ -__ZN8OSString11withCStringEPKc -__ZN8OSString14initWithStringEPKS_ -__ZN8OSString15initWithCStringEPKc -__ZN8OSString17withCStringNoCopyEPKc -__ZN8OSString18_RESERVEDOSString0Ev -__ZN8OSString18_RESERVEDOSString1Ev -__ZN8OSString18_RESERVEDOSString2Ev -__ZN8OSString18_RESERVEDOSString3Ev -__ZN8OSString18_RESERVEDOSString4Ev -__ZN8OSString18_RESERVEDOSString5Ev -__ZN8OSString18_RESERVEDOSString6Ev -__ZN8OSString18_RESERVEDOSString7Ev -__ZN8OSString18_RESERVEDOSString8Ev -__ZN8OSString18_RESERVEDOSString9Ev -__ZN8OSString19_RESERVEDOSString10Ev -__ZN8OSString19_RESERVEDOSString11Ev -__ZN8OSString19_RESERVEDOSString12Ev -__ZN8OSString19_RESERVEDOSString13Ev -__ZN8OSString19_RESERVEDOSString14Ev -__ZN8OSString19_RESERVEDOSString15Ev -__ZN8OSString21initWithCStringNoCopyEPKc -__ZN8OSString4freeEv -__ZN8OSString7setCharEcj -__ZN8OSString9MetaClassC1Ev -__ZN8OSString9MetaClassC2Ev -__ZN8OSString9metaClassE -__ZN8OSStringC1EPK11OSMetaClass -__ZN8OSStringC1Ev -__ZN8OSStringC2EPK11OSMetaClass -__ZN8OSStringC2Ev -__ZN8OSStringD0Ev -__ZN8OSStringD2Ev -__ZN8OSSymbol10gMetaClassE -__ZN8OSSymbol10initializeEv -__ZN8OSSymbol10superClassE -__ZN8OSSymbol10withStringEPK8OSString -__ZN8OSSymbol11withCStringEPKc -__ZN8OSSymbol14initWithStringEPK8OSString -__ZN8OSSymbol15initWithCStringEPKc -__ZN8OSSymbol17withCStringNoCopyEPKc -__ZN8OSSymbol18_RESERVEDOSSymbol0Ev -__ZN8OSSymbol18_RESERVEDOSSymbol1Ev -__ZN8OSSymbol18_RESERVEDOSSymbol2Ev -__ZN8OSSymbol18_RESERVEDOSSymbol3Ev -__ZN8OSSymbol18_RESERVEDOSSymbol4Ev -__ZN8OSSymbol18_RESERVEDOSSymbol5Ev -__ZN8OSSymbol18_RESERVEDOSSymbol6Ev -__ZN8OSSymbol18_RESERVEDOSSymbol7Ev -__ZN8OSSymbol18checkForPageUnloadEPvS0_ -__ZN8OSSymbol21initWithCStringNoCopyEPKc -__ZN8OSSymbol4freeEv -__ZN8OSSymbol9MetaClassC1Ev -__ZN8OSSymbol9MetaClassC2Ev -__ZN8OSSymbol9metaClassE -__ZN8OSSymbolC1EPK11OSMetaClass -__ZN8OSSymbolC1Ev -__ZN8OSSymbolC2EPK11OSMetaClass -__ZN8OSSymbolC2Ev -__ZN8OSSymbolD0Ev -__ZN8OSSymbolD2Ev -__ZN9IOCommand10gMetaClassE -__ZN9IOCommand10superClassE -__ZN9IOCommand4initEv -__ZN9IOCommand9MetaClassC1Ev -__ZN9IOCommand9MetaClassC2Ev -__ZN9IOCommand9metaClassE -__ZN9IOCommandC1EPK11OSMetaClass -__ZN9IOCommandC2EPK11OSMetaClass -__ZN9IOCommandD0Ev -__ZN9IOCommandD2Ev -__ZN9IODTNVRAM10gMetaClassE -__ZN9IODTNVRAM10superClassE -__ZN9IODTNVRAM10writeXPRAMEmPhm -__ZN9IODTNVRAM11setPropertyEPK8OSSymbolP8OSObject -__ZN9IODTNVRAM13savePanicInfoEPhm -__ZN9IODTNVRAM13setPropertiesEP8OSObject -__ZN9IODTNVRAM15initOFVariablesEv -__ZN9IODTNVRAM15syncOFVariablesEv -__ZN9IODTNVRAM16escapeDataToDataEP6OSData -__ZN9IODTNVRAM16updateOWBootArgsEPK8OSSymbolP8OSObject -__ZN9IODTNVRAM17getOWVariableInfoEmPPK8OSSymbolPmS4_ -__ZN9IODTNVRAM17readNVRAMPropertyEP15IORegistryEntryPPK8OSSymbolPP6OSData -__ZN9IODTNVRAM18generateOWChecksumEPh -__ZN9IODTNVRAM18getNVRAMPartitionsEv -__ZN9IODTNVRAM18readNVRAMPartitionEPK8OSSymbolmPhm -__ZN9IODTNVRAM18validateOWChecksumEPh -__ZN9IODTNVRAM18writeNVRAMPropertyEP15IORegistryEntryPK8OSSymbolP6OSData -__ZN9IODTNVRAM19convertObjectToPropEPhPmPK8OSSymbolP8OSObject -__ZN9IODTNVRAM19convertPropToObjectEPhmS0_mPPK8OSSymbolPP8OSObject -__ZN9IODTNVRAM19searchNVRAMPropertyEP17IONVRAMDescriptorPm -__ZN9IODTNVRAM19unescapeBytesToDataEPKhm -__ZN9IODTNVRAM19writeNVRAMPartitionEPK8OSSymbolmPhm -__ZN9IODTNVRAM22readNVRAMPropertyType0EP15IORegistryEntryPPK8OSSymbolPP6OSData -__ZN9IODTNVRAM22readNVRAMPropertyType1EP15IORegistryEntryPPK8OSSymbolPP6OSData -__ZN9IODTNVRAM23registerNVRAMControllerEP17IONVRAMController -__ZN9IODTNVRAM23writeNVRAMPropertyType0EP15IORegistryEntryPK8OSSymbolP6OSData -__ZN9IODTNVRAM23writeNVRAMPropertyType1EP15IORegistryEntryPK8OSSymbolP6OSData -__ZN9IODTNVRAM26calculatePartitionChecksumEPh -__ZN9IODTNVRAM4initEP15IORegistryEntryPK15IORegistryPlane -__ZN9IODTNVRAM4syncEv -__ZN9IODTNVRAM9MetaClassC1Ev -__ZN9IODTNVRAM9MetaClassC2Ev -__ZN9IODTNVRAM9metaClassE -__ZN9IODTNVRAM9readXPRAMEmPhm -__ZN9IODTNVRAMC1EPK11OSMetaClass -__ZN9IODTNVRAMC1Ev -__ZN9IODTNVRAMC2EPK11OSMetaClass -__ZN9IODTNVRAMC2Ev -__ZN9IODTNVRAMD0Ev -__ZN9IODTNVRAMD2Ev -__ZN9IOService10adjustBusyEl -__ZN9IOService10gMetaClassE -__ZN9IOService10handleOpenEPS_mPv -__ZN9IOService10initializeEv -__ZN9IOService10joinPMtreeEPS_ -__ZN9IOService10makeUsableEv -__ZN9IOService10superClassE -__ZN9IOService10systemWakeEv -__ZN9IOService10youAreRootEv -__ZN9IOService11addLocationEP12OSDictionary -__ZN9IOService11getPlatformEv -__ZN9IOService11handleCloseEPS_m -__ZN9IOService11setPlatformEP16IOPlatformExpert -__ZN9IOService11tellClientsEi -__ZN9IOService12clampPowerOnEm -__ZN9IOService12didTerminateEPS_mPb -__ZN9IOService12getBusyStateEv -__ZN9IOService12getResourcesEv -__ZN9IOService12nameMatchingEPK8OSStringP12OSDictionary -__ZN9IOService12nameMatchingEPKcP12OSDictionary -__ZN9IOService12passiveMatchEP12OSDictionaryb -__ZN9IOService12requestProbeEm -__ZN9IOService12tellChangeUpEm -__ZN9IOService12waitForStateEmmP13mach_timespec -__ZN9IOService13addPowerChildEPS_ -__ZN9IOService13askChangeDownEm -__ZN9IOService13checkResourceEP8OSObject -__ZN9IOService13getPMworkloopEv -__ZN9IOService13invokeNotiferEP18_IOServiceNotifier -__ZN9IOService13matchLocationEPS_ -__ZN9IOService13messageClientEmP8OSObjectPvj -__ZN9IOService13newUserClientEP4taskPvmP12OSDictionaryPP12IOUserClient -__ZN9IOService13newUserClientEP4taskPvmPP12IOUserClient -__ZN9IOService13setPowerStateEmPS_ -__ZN9IOService13startMatchingEm -__ZN9IOService13waitMatchIdleEm -__ZN9IOService13willTerminateEPS_m -__ZN9IOService14activityTickleEmm -__ZN9IOService14applyToClientsEPFvPS_PvES1_ -__ZN9IOService14causeInterruptEi -__ZN9IOService14checkResourcesEv -__ZN9IOService14doServiceMatchEm -__ZN9IOService14getServiceRootEv -__ZN9IOService14messageClientsEmPvj -__ZN9IOService14newTemperatureElPS_ -__ZN9IOService14setPowerParentEP17IOPowerConnectionbm -__ZN9IOService14startCandidateEPS_ -__ZN9IOService14tellChangeDownEm -__ZN9IOService14waitForServiceEP12OSDictionaryP13mach_timespec -__ZN9IOService15addNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_l -__ZN9IOService15comparePropertyEP12OSDictionaryPK8OSString -__ZN9IOService15comparePropertyEP12OSDictionaryPKc -__ZN9IOService15enableInterruptEi -__ZN9IOService15errnoFromReturnEi -__ZN9IOService15getDeviceMemoryEv -__ZN9IOService15getPMRootDomainEv -__ZN9IOService15lookupInterruptEibPP21IOInterruptController -__ZN9IOService15nextIdleTimeoutE12UnsignedWideS0_j -__ZN9IOService15powerChangeDoneEm -__ZN9IOService15probeCandidatesEP12OSOrderedSet -__ZN9IOService15publishResourceEPK8OSSymbolP8OSObject -__ZN9IOService15publishResourceEPKcP8OSObject -__ZN9IOService15registerServiceEm -__ZN9IOService15serviceMatchingEPK8OSStringP12OSDictionary -__ZN9IOService15serviceMatchingEPKcP12OSDictionary -__ZN9IOService15setDeviceMemoryEP7OSArray -__ZN9IOService15setPMRootDomainEP14IOPMrootDomain -__ZN9IOService15tellChangeDown1Em -__ZN9IOService15tellChangeDown2Em -__ZN9IOService15terminateClientEPS_m -__ZN9IOService16ack_timer_tickedEv -__ZN9IOService16allowPowerChangeEm -__ZN9IOService16applyToProvidersEPFvPS_PvES1_ -__ZN9IOService16command_receivedEPvS0_S0_S0_ -__ZN9IOService16didYouWakeSystemEv -__ZN9IOService16disableInterruptEi -__ZN9IOService16getInterruptTypeEiPi -__ZN9IOService16propertyMatchingEPK8OSSymbolPK8OSObjectP12OSDictionary -__ZN9IOService16registerInterestEPK8OSSymbolPFiPvS3_mPS_S3_jES3_S3_ -__ZN9IOService16removePowerChildEP17IOPowerConnection -__ZN9IOService16requestTerminateEPS_m -__ZN9IOService16resolveInterruptEPS_i -__ZN9IOService16resourceMatchingEPK8OSStringP12OSDictionary -__ZN9IOService16resourceMatchingEPKcP12OSDictionary -__ZN9IOService16stringFromReturnEi -__ZN9IOService16tellNoChangeDownEm -__ZN9IOService17addNeededResourceEPKc -__ZN9IOService17applyToInterestedEPK8OSSymbolPFvP8OSObjectPvES5_ -__ZN9IOService17cancelPowerChangeEm -__ZN9IOService17catalogNewDriversEP12OSOrderedSet -__ZN9IOService17comparePropertiesEP12OSDictionaryP12OSCollection -__ZN9IOService17currentCapabilityEv -__ZN9IOService17getAggressivenessEmPm -__ZN9IOService17registerInterruptEiP8OSObjectPFvS1_PvPS_iES2_ -__ZN9IOService17setAggressivenessEmm -__ZN9IOService18changePowerStateToEm -__ZN9IOService18doServiceTerminateEm -__ZN9IOService18getResourceServiceEv -__ZN9IOService18lockForArbitrationEb -__ZN9IOService18matchPropertyTableEP12OSDictionary -__ZN9IOService18matchPropertyTableEP12OSDictionaryPl -__ZN9IOService18setIdleTimerPeriodEm -__ZN9IOService18settleTimerExpiredEv -__ZN9IOService18systemWillShutdownEm -__ZN9IOService19installNotificationEPK8OSSymbolP12OSDictionaryPFbPvS5_PS_ES5_S5_lPP10OSIterator -__ZN9IOService19_RESERVEDIOService6Ev -__ZN9IOService19_RESERVEDIOService7Ev -__ZN9IOService19_RESERVEDIOService8Ev -__ZN9IOService19_RESERVEDIOService9Ev -__ZN9IOService19deliverNotificationEPK8OSSymbolmm -__ZN9IOService19getMatchingServicesEP12OSDictionary -__ZN9IOService19powerOverrideOnPrivEv -__ZN9IOService19registerPowerDriverEPS_P14IOPMPowerStatem -__ZN9IOService19start_PM_idle_timerEv -__ZN9IOService19unregisterInterruptEi -__ZN9IOService20_RESERVEDIOService10Ev -__ZN9IOService20_RESERVEDIOService11Ev -__ZN9IOService20_RESERVEDIOService12Ev -__ZN9IOService20_RESERVEDIOService13Ev -__ZN9IOService20_RESERVEDIOService14Ev -__ZN9IOService20_RESERVEDIOService15Ev -__ZN9IOService20_RESERVEDIOService16Ev -__ZN9IOService20_RESERVEDIOService17Ev -__ZN9IOService20_RESERVEDIOService18Ev -__ZN9IOService20_RESERVEDIOService19Ev -__ZN9IOService20_RESERVEDIOService20Ev -__ZN9IOService20_RESERVEDIOService21Ev -__ZN9IOService20_RESERVEDIOService22Ev -__ZN9IOService20_RESERVEDIOService23Ev -__ZN9IOService20_RESERVEDIOService24Ev -__ZN9IOService20_RESERVEDIOService25Ev -__ZN9IOService20_RESERVEDIOService26Ev -__ZN9IOService20_RESERVEDIOService27Ev -__ZN9IOService20_RESERVEDIOService28Ev -__ZN9IOService20_RESERVEDIOService29Ev -__ZN9IOService20_RESERVEDIOService30Ev -__ZN9IOService20_RESERVEDIOService31Ev -__ZN9IOService20_RESERVEDIOService32Ev -__ZN9IOService20_RESERVEDIOService33Ev -__ZN9IOService20_RESERVEDIOService34Ev -__ZN9IOService20_RESERVEDIOService35Ev -__ZN9IOService20_RESERVEDIOService36Ev -__ZN9IOService20_RESERVEDIOService37Ev -__ZN9IOService20_RESERVEDIOService38Ev -__ZN9IOService20_RESERVEDIOService39Ev -__ZN9IOService20_RESERVEDIOService40Ev -__ZN9IOService20_RESERVEDIOService41Ev -__ZN9IOService20_RESERVEDIOService42Ev -__ZN9IOService20_RESERVEDIOService43Ev -__ZN9IOService20_RESERVEDIOService44Ev -__ZN9IOService20_RESERVEDIOService45Ev -__ZN9IOService20_RESERVEDIOService46Ev -__ZN9IOService20_RESERVEDIOService47Ev -__ZN9IOService20callPlatformFunctionEPK8OSSymbolbPvS3_S3_S3_ -__ZN9IOService20callPlatformFunctionEPKcbPvS2_S2_S2_ -__ZN9IOService20getDeviceMemoryCountEv -__ZN9IOService20powerOverrideOffPrivEv -__ZN9IOService20unlockForArbitrationEv -__ZN9IOService21getClientWithCategoryEPK8OSSymbol -__ZN9IOService21powerStateDidChangeToEmmPS_ -__ZN9IOService21temporaryPowerClampOnEv -__ZN9IOService21unregisterAllInterestEv -__ZN9IOService22PM_Clamp_Timer_ExpiredEv -__ZN9IOService22acknowledgePowerChangeEPS_ -__ZN9IOService22changePowerStateToPrivEm -__ZN9IOService22copyClientWithCategoryEPK8OSSymbol -__ZN9IOService22powerDomainDidChangeToEmP17IOPowerConnection -__ZN9IOService22powerStateWillChangeToEmmPS_ -__ZN9IOService23acknowledgeNotificationEPvm -__ZN9IOService23currentPowerConsumptionEv -__ZN9IOService23powerDomainWillChangeToEmP17IOPowerConnection -__ZN9IOService23requestPowerDomainStateEmP17IOPowerConnectionm -__ZN9IOService23tellClientsWithResponseEi -__ZN9IOService24PM_idle_timer_expirationEv -__ZN9IOService24acknowledgeSetPowerStateEv -__ZN9IOService24getDeviceMemoryWithIndexEj -__ZN9IOService24mapDeviceMemoryWithIndexEjm -__ZN9IOService24powerStateForDomainStateEm -__ZN9IOService24registerInterestedDriverEPS_ -__ZN9IOService26deRegisterInterestedDriverEPS_ -__ZN9IOService26temperatureCriticalForZoneEPS_ -__ZN9IOService27maxCapabilityForDomainStateEm -__ZN9IOService27serializedAllowPowerChange2Em -__ZN9IOService28serializedCancelPowerChange2Em -__ZN9IOService31initialPowerStateForDomainStateEm -__ZN9IOService4freeEv -__ZN9IOService4openEPS_mPv -__ZN9IOService4stopEPS_ -__ZN9IOService5closeEPS_m -__ZN9IOService5probeEPS_Pl -__ZN9IOService5startEPS_ -__ZN9IOService6PMfreeEv -__ZN9IOService6PMinitEv -__ZN9IOService6PMstopEv -__ZN9IOService6attachEPS_ -__ZN9IOService6detachEPS_ -__ZN9IOService7messageEmPS_Pv -__ZN9IOService8finalizeEm -__ZN9IOService9MetaClassC1Ev -__ZN9IOService9MetaClassC2Ev -__ZN9IOService9metaClassE -__ZN9IOService9resourcesEv : __ZN9IOService18getResourceServiceEv -__ZN9IOService9terminateEm -__ZN9IOService9waitQuietEP13mach_timespec -__ZN9IOService9waitQuietEy -__ZN9IOServiceC1EPK11OSMetaClass -__ZN9IOServiceC1Ev -__ZN9IOServiceC2EPK11OSMetaClass -__ZN9IOServiceC2Ev -__ZN9IOServiceD0Ev -__ZN9IOServiceD2Ev -__ZN9OSBoolean10gMetaClassE -__ZN9OSBoolean10initializeEv -__ZN9OSBoolean10superClassE -__ZN9OSBoolean11withBooleanEb -__ZN9OSBoolean19_RESERVEDOSBoolean0Ev -__ZN9OSBoolean19_RESERVEDOSBoolean1Ev -__ZN9OSBoolean19_RESERVEDOSBoolean2Ev -__ZN9OSBoolean19_RESERVEDOSBoolean3Ev -__ZN9OSBoolean19_RESERVEDOSBoolean4Ev -__ZN9OSBoolean19_RESERVEDOSBoolean5Ev -__ZN9OSBoolean19_RESERVEDOSBoolean6Ev -__ZN9OSBoolean19_RESERVEDOSBoolean7Ev -__ZN9OSBoolean4freeEv -__ZN9OSBoolean9MetaClassC1Ev -__ZN9OSBoolean9MetaClassC2Ev -__ZN9OSBoolean9metaClassE -__ZN9OSBooleanC1EPK11OSMetaClass -__ZN9OSBooleanC1Ev -__ZN9OSBooleanC2EPK11OSMetaClass -__ZN9OSBooleanC2Ev -__ZN9OSBooleanD0Ev -__ZN9OSBooleanD2Ev -__ZNK10IOMachPort12getMetaClassEv -__ZNK10IOMachPort9MetaClass5allocEv -__ZNK10IONotifier12getMetaClassEv -__ZNK10IONotifier9MetaClass5allocEv -__ZNK10IOWorkLoop12getMetaClassEv -__ZNK10IOWorkLoop19enableAllInterruptsEv -__ZNK10IOWorkLoop20disableAllInterruptsEv -__ZNK10IOWorkLoop21enableAllEventSourcesEv -__ZNK10IOWorkLoop22disableAllEventSourcesEv -__ZNK10IOWorkLoop6inGateEv -__ZNK10IOWorkLoop8onThreadEv -__ZNK10IOWorkLoop9MetaClass5allocEv -__ZNK10IOWorkLoop9getThreadEv -__ZNK10OSIterator12getMetaClassEv -__ZNK10OSIterator9MetaClass5allocEv -__ZNK11IOCatalogue12getMetaClassEv -__ZNK11IOCatalogue12unloadModuleEP8OSString -__ZNK11IOCatalogue13serializeDataEmP11OSSerialize -__ZNK11IOCatalogue14isModuleLoadedEP12OSDictionary -__ZNK11IOCatalogue14isModuleLoadedEP8OSString -__ZNK11IOCatalogue14isModuleLoadedEPKc -__ZNK11IOCatalogue18getGenerationCountEv -__ZNK11IOCatalogue9MetaClass5allocEv -__ZNK11IOCatalogue9serializeEP11OSSerialize -__ZNK11IODataQueue12getMetaClassEv -__ZNK11IODataQueue9MetaClass5allocEv -__ZNK11IOMemoryMap12getMetaClassEv -__ZNK11IOMemoryMap9MetaClass5allocEv -__ZNK11IOResources11getWorkLoopEv -__ZNK11IOResources12getMetaClassEv -__ZNK11IOResources9MetaClass5allocEv -__ZNK11OSMetaClass12getClassNameEv -__ZNK11OSMetaClass12getClassSizeEv -__ZNK11OSMetaClass12getMetaClassEv -__ZNK11OSMetaClass12taggedRetainEPKv -__ZNK11OSMetaClass13checkMetaCastEPK15OSMetaClassBase -__ZNK11OSMetaClass13getSuperClassEv -__ZNK11OSMetaClass13taggedReleaseEPKv -__ZNK11OSMetaClass13taggedReleaseEPKvi -__ZNK11OSMetaClass14getRetainCountEv -__ZNK11OSMetaClass14reservedCalledEi -__ZNK11OSMetaClass16getInstanceCountEv -__ZNK11OSMetaClass18instanceDestructedEv -__ZNK11OSMetaClass19instanceConstructedEv -__ZNK11OSMetaClass6retainEv -__ZNK11OSMetaClass7releaseEi -__ZNK11OSMetaClass7releaseEv -__ZNK11OSMetaClass9serializeEP11OSSerialize -__ZNK11OSSerialize11getCapacityEv -__ZNK11OSSerialize12getMetaClassEv -__ZNK11OSSerialize20getCapacityIncrementEv -__ZNK11OSSerialize4textEv -__ZNK11OSSerialize9MetaClass5allocEv -__ZNK11OSSerialize9getLengthEv -__ZNK12IOPMinformee12getMetaClassEv -__ZNK12IOPMinformee9MetaClass5allocEv -__ZNK12IORootParent12getMetaClassEv -__ZNK12IORootParent9MetaClass5allocEv -__ZNK12IOUserClient12getMetaClassEv -__ZNK12IOUserClient9MetaClass5allocEv -__ZNK12OSCollection12getMetaClassEv -__ZNK12OSCollection9MetaClass5allocEv -__ZNK12OSDictionary11getCapacityEv -__ZNK12OSDictionary12getMetaClassEv -__ZNK12OSDictionary12initIteratorEPv -__ZNK12OSDictionary12iteratorSizeEv -__ZNK12OSDictionary20getCapacityIncrementEv -__ZNK12OSDictionary24getNextObjectForIteratorEPvPP8OSObject -__ZNK12OSDictionary8getCountEv -__ZNK12OSDictionary9MetaClass5allocEv -__ZNK12OSDictionary9getObjectEPK8OSString -__ZNK12OSDictionary9getObjectEPK8OSSymbol -__ZNK12OSDictionary9getObjectEPKc -__ZNK12OSDictionary9isEqualToEPK15OSMetaClassBase -__ZNK12OSDictionary9isEqualToEPKS_ -__ZNK12OSDictionary9isEqualToEPKS_PK12OSCollection -__ZNK12OSDictionary9serializeEP11OSSerialize -__ZNK12OSOrderedSet11getCapacityEv -__ZNK12OSOrderedSet12getMetaClassEv -__ZNK12OSOrderedSet12initIteratorEPv -__ZNK12OSOrderedSet12iteratorSizeEv -__ZNK12OSOrderedSet13getLastObjectEv -__ZNK12OSOrderedSet14containsObjectEPK15OSMetaClassBase -__ZNK12OSOrderedSet14getFirstObjectEv -__ZNK12OSOrderedSet20getCapacityIncrementEv -__ZNK12OSOrderedSet24getNextObjectForIteratorEPvPP8OSObject -__ZNK12OSOrderedSet6memberEPK15OSMetaClassBase -__ZNK12OSOrderedSet8getCountEv -__ZNK12OSOrderedSet9MetaClass5allocEv -__ZNK12OSOrderedSet9getObjectEj -__ZNK12OSOrderedSet9isEqualToEPK15OSMetaClassBase -__ZNK12OSOrderedSet9isEqualToEPKS_ -__ZNK12OSSerializer12getMetaClassEv -__ZNK12OSSerializer9MetaClass5allocEv -__ZNK12OSSerializer9serializeEP11OSSerialize -__ZNK12OSSymbolPool10findSymbolEPKc -__ZNK13IOCommandGate12getMetaClassEv -__ZNK13IOCommandGate9MetaClass5allocEv -__ZNK13IOCommandPool12getMetaClassEv -__ZNK13IOCommandPool9MetaClass5allocEv -__ZNK13IOEventSource11getWorkLoopEv -__ZNK13IOEventSource12getMetaClassEv -__ZNK13IOEventSource7getNextEv -__ZNK13IOEventSource8onThreadEv -__ZNK13IOEventSource9MetaClass5allocEv -__ZNK13IOEventSource9getActionEv -__ZNK13IOEventSource9isEnabledEv -__ZNK13_IOServiceJob12getMetaClassEv -__ZNK13_IOServiceJob9MetaClass5allocEv -__ZNK14IOCommandQueue12getMetaClassEv -__ZNK14IOCommandQueue9MetaClass5allocEv -__ZNK14IOMemoryCursor12getMetaClassEv -__ZNK14IOMemoryCursor9MetaClass5allocEv -__ZNK14IOPMrootDomain12getMetaClassEv -__ZNK14IOPMrootDomain9MetaClass5allocEv -__ZNK15IOConditionLock12getConditionEv -__ZNK15IOConditionLock12getMetaClassEv -__ZNK15IOConditionLock16getInterruptibleEv -__ZNK15IOConditionLock9MetaClass5allocEv -__ZNK15IOPMPowerSource12getMetaClassEv -__ZNK15IOPMPowerSource9MetaClass5allocEv -__ZNK15IOPanicPlatform12getMetaClassEv -__ZNK15IOPanicPlatform9MetaClass5allocEv -__ZNK15IORegistryEntry11compareNameEP8OSStringPS1_ -__ZNK15IORegistryEntry11getLocationEPK15IORegistryPlane -__ZNK15IORegistryEntry11getPropertyEPK8OSString -__ZNK15IORegistryEntry11getPropertyEPK8OSStringPK15IORegistryPlanem -__ZNK15IORegistryEntry11getPropertyEPK8OSSymbol -__ZNK15IORegistryEntry11getPropertyEPK8OSSymbolPK15IORegistryPlanem -__ZNK15IORegistryEntry11getPropertyEPKc -__ZNK15IORegistryEntry11getPropertyEPKcPK15IORegistryPlanem -__ZNK15IORegistryEntry12compareNamesEP8OSObjectPP8OSString -__ZNK15IORegistryEntry12copyLocationEPK15IORegistryPlane -__ZNK15IORegistryEntry12copyPropertyEPK8OSString -__ZNK15IORegistryEntry12copyPropertyEPK8OSStringPK15IORegistryPlanem -__ZNK15IORegistryEntry12copyPropertyEPK8OSSymbol -__ZNK15IORegistryEntry12copyPropertyEPK8OSSymbolPK15IORegistryPlanem -__ZNK15IORegistryEntry12copyPropertyEPKc -__ZNK15IORegistryEntry12copyPropertyEPKcPK15IORegistryPlanem -__ZNK15IORegistryEntry12getMetaClassEv -__ZNK15IORegistryEntry13getChildEntryEPK15IORegistryPlane -__ZNK15IORegistryEntry14applyToParentsEPFvPS_PvES1_PK15IORegistryPlane -__ZNK15IORegistryEntry14copyChildEntryEPK15IORegistryPlane -__ZNK15IORegistryEntry14getParentEntryEPK15IORegistryPlane -__ZNK15IORegistryEntry15applyToChildrenEPFvPS_PvES1_PK15IORegistryPlane -__ZNK15IORegistryEntry15copyParentEntryEPK15IORegistryPlane -__ZNK15IORegistryEntry16getChildIteratorEPK15IORegistryPlane -__ZNK15IORegistryEntry16getPathComponentEPcPiPK15IORegistryPlane -__ZNK15IORegistryEntry16getPropertyTableEv -__ZNK15IORegistryEntry17getParentIteratorEPK15IORegistryPlane -__ZNK15IORegistryEntry19serializePropertiesEP11OSSerialize -__ZNK15IORegistryEntry20getChildSetReferenceEPK15IORegistryPlane -__ZNK15IORegistryEntry21getParentSetReferenceEPK15IORegistryPlane -__ZNK15IORegistryEntry24dictionaryWithPropertiesEv -__ZNK15IORegistryEntry7getNameEPK15IORegistryPlane -__ZNK15IORegistryEntry7getPathEPcPiPK15IORegistryPlane -__ZNK15IORegistryEntry7inPlaneEPK15IORegistryPlane -__ZNK15IORegistryEntry7isChildEPS_PK15IORegistryPlaneb -__ZNK15IORegistryEntry8copyNameEPK15IORegistryPlane -__ZNK15IORegistryEntry8getDepthEPK15IORegistryPlane -__ZNK15IORegistryEntry8hasAliasEPK15IORegistryPlanePcPi -__ZNK15IORegistryEntry8isParentEPS_PK15IORegistryPlaneb -__ZNK15IORegistryEntry8makeLinkEPS_jPK15IORegistryPlane -__ZNK15IORegistryEntry9MetaClass5allocEv -__ZNK15IORegistryEntry9breakLinkEPS_jPK15IORegistryPlane -__ZNK15IORegistryPlane12getMetaClassEv -__ZNK15IORegistryPlane9MetaClass5allocEv -__ZNK15IORegistryPlane9serializeEP11OSSerialize -__ZNK15IOWatchDogTimer12getMetaClassEv -__ZNK15IOWatchDogTimer9MetaClass5allocEv -__ZNK15OSMetaClassBase8metaCastEPK11OSMetaClass -__ZNK15OSMetaClassBase8metaCastEPK8OSString -__ZNK15OSMetaClassBase8metaCastEPK8OSSymbol -__ZNK15OSMetaClassBase8metaCastEPKc -__ZNK15OSMetaClassBase9isEqualToEPKS_ -__ZNK15OSMetaClassMeta5allocEv -__ZNK16IOKitDiagnostics12getMetaClassEv -__ZNK16IOKitDiagnostics9MetaClass5allocEv -__ZNK16IOKitDiagnostics9serializeEP11OSSerialize -__ZNK16IOPMinformeeList12getMetaClassEv -__ZNK16IOPMinformeeList9MetaClass5allocEv -__ZNK16IOPlatformDevice11compareNameEP8OSStringPS1_ -__ZNK16IOPlatformDevice12getMetaClassEv -__ZNK16IOPlatformDevice9MetaClass5allocEv -__ZNK16IOPlatformExpert12getMetaClassEv -__ZNK16IOPlatformExpert14compareNubNameEPK9IOServiceP8OSStringPS4_ -__ZNK16IOPlatformExpert9MetaClass5allocEv -__ZNK16IORangeAllocator12getMetaClassEv -__ZNK16IORangeAllocator9MetaClass5allocEv -__ZNK16IORangeAllocator9serializeEP11OSSerialize -__ZNK17IOBigMemoryCursor12getMetaClassEv -__ZNK17IOBigMemoryCursor9MetaClass5allocEv -__ZNK17IOPowerConnection12getMetaClassEv -__ZNK17IOPowerConnection9MetaClass5allocEv -__ZNK18IODTPlatformExpert12getMetaClassEv -__ZNK18IODTPlatformExpert14compareNubNameEPK9IOServiceP8OSStringPS4_ -__ZNK18IODTPlatformExpert9MetaClass5allocEv -__ZNK18IOMemoryDescriptor12getDirectionEv -__ZNK18IOMemoryDescriptor12getMetaClassEv -__ZNK18IOMemoryDescriptor19dmaCommandOperationEmPvj -__ZNK18IOMemoryDescriptor9MetaClass5allocEv -__ZNK18IOMemoryDescriptor9getLengthEv -__ZNK18IORegistryIterator12getMetaClassEv -__ZNK18IORegistryIterator9MetaClass5allocEv -__ZNK18IOTimerEventSource12getMetaClassEv -__ZNK18IOTimerEventSource9MetaClass5allocEv -__ZNK18IOUserNotification12getMetaClassEv -__ZNK18IOUserNotification9MetaClass5allocEv -__ZNK18_IOServiceNotifier12getMetaClassEv -__ZNK18_IOServiceNotifier9MetaClass5allocEv -__ZNK19IOPMPowerSourceList12getMetaClassEv -__ZNK19IOPMPowerSourceList9MetaClass5allocEv -__ZNK20IOLittleMemoryCursor12getMetaClassEv -__ZNK20IOLittleMemoryCursor9MetaClass5allocEv -__ZNK20OSCollectionIterator12getMetaClassEv -__ZNK20OSCollectionIterator9MetaClass5allocEv -__ZNK20RootDomainUserClient12getMetaClassEv -__ZNK20RootDomainUserClient9MetaClass5allocEv -__ZNK21IOInterruptController12getMetaClassEv -__ZNK21IOInterruptController9MetaClass5allocEv -__ZNK21IONaturalMemoryCursor12getMetaClassEv -__ZNK21IONaturalMemoryCursor9MetaClass5allocEv -__ZNK21IOSubMemoryDescriptor12getMetaClassEv -__ZNK21IOSubMemoryDescriptor9MetaClass5allocEv -__ZNK22IOInterruptEventSource11getIntIndexEv -__ZNK22IOInterruptEventSource11getProviderEv -__ZNK22IOInterruptEventSource12getMetaClassEv -__ZNK22IOInterruptEventSource14getAutoDisableEv -__ZNK22IOInterruptEventSource9MetaClass5allocEv -__ZNK22IOPlatformExpertDevice11compareNameEP8OSStringPS1_ -__ZNK22IOPlatformExpertDevice11getWorkLoopEv -__ZNK22IOPlatformExpertDevice12getMetaClassEv -__ZNK22IOPlatformExpertDevice9MetaClass5allocEv -__ZNK22_IOOpenServiceIterator12getMetaClassEv -__ZNK22_IOOpenServiceIterator9MetaClass5allocEv -__ZNK23IOMultiMemoryDescriptor12getMetaClassEv -__ZNK23IOMultiMemoryDescriptor9MetaClass5allocEv -__ZNK24IOBufferMemoryDescriptor11getCapacityEv -__ZNK24IOBufferMemoryDescriptor12getMetaClassEv -__ZNK24IOBufferMemoryDescriptor9MetaClass5allocEv -__ZNK24IOCPUInterruptController12getMetaClassEv -__ZNK24IOCPUInterruptController9MetaClass5allocEv -__ZNK25IOGeneralMemoryDescriptor12getMetaClassEv -__ZNK25IOGeneralMemoryDescriptor19dmaCommandOperationEmPvj -__ZNK25IOGeneralMemoryDescriptor9MetaClass5allocEv -__ZNK25IOGeneralMemoryDescriptor9serializeEP11OSSerialize -__ZNK26_IOServiceInterestNotifier12getMetaClassEv -__ZNK26_IOServiceInterestNotifier9MetaClass5allocEv -__ZNK27IOSharedInterruptController12getMetaClassEv -__ZNK27IOSharedInterruptController9MetaClass5allocEv -__ZNK28IOFilterInterruptEventSource12getMetaClassEv -__ZNK28IOFilterInterruptEventSource15getFilterActionEv -__ZNK28IOFilterInterruptEventSource9MetaClass5allocEv -__ZNK5IOCPU11getPropertyEPK8OSSymbol -__ZNK5IOCPU12getMetaClassEv -__ZNK5IOCPU19serializePropertiesEP11OSSerialize -__ZNK5IOCPU9MetaClass5allocEv -__ZNK5OSSet11getCapacityEv -__ZNK5OSSet12getAnyObjectEv -__ZNK5OSSet12getMetaClassEv -__ZNK5OSSet12initIteratorEPv -__ZNK5OSSet12iteratorSizeEv -__ZNK5OSSet14containsObjectEPK15OSMetaClassBase -__ZNK5OSSet20getCapacityIncrementEv -__ZNK5OSSet24getNextObjectForIteratorEPvPP8OSObject -__ZNK5OSSet6memberEPK15OSMetaClassBase -__ZNK5OSSet8getCountEv -__ZNK5OSSet9MetaClass5allocEv -__ZNK5OSSet9isEqualToEPK15OSMetaClassBase -__ZNK5OSSet9isEqualToEPKS_ -__ZNK5OSSet9serializeEP11OSSerialize -__ZNK6OSData11getCapacityEv -__ZNK6OSData12getMetaClassEv -__ZNK6OSData14getBytesNoCopyEjj -__ZNK6OSData14getBytesNoCopyEv -__ZNK6OSData20getCapacityIncrementEv -__ZNK6OSData9MetaClass5allocEv -__ZNK6OSData9getLengthEv -__ZNK6OSData9isEqualToEPK15OSMetaClassBase -__ZNK6OSData9isEqualToEPK8OSString -__ZNK6OSData9isEqualToEPKS_ -__ZNK6OSData9isEqualToEPKvj -__ZNK6OSData9serializeEP11OSSerialize -__ZNK7OSArray11getCapacityEv -__ZNK7OSArray12getMetaClassEv -__ZNK7OSArray12initIteratorEPv -__ZNK7OSArray12iteratorSizeEv -__ZNK7OSArray13getLastObjectEv -__ZNK7OSArray20getCapacityIncrementEv -__ZNK7OSArray20getNextIndexOfObjectEPK15OSMetaClassBasej -__ZNK7OSArray24getNextObjectForIteratorEPvPP8OSObject -__ZNK7OSArray8getCountEv -__ZNK7OSArray9MetaClass5allocEv -__ZNK7OSArray9getObjectEj -__ZNK7OSArray9isEqualToEPK15OSMetaClassBase -__ZNK7OSArray9isEqualToEPKS_ -__ZNK7OSArray9serializeEP11OSSerialize -__ZNK8IOMapper12getMetaClassEv -__ZNK8IOMapper13getBypassMaskEPy -__ZNK8IOMapper9MetaClass5allocEv -__ZNK8IOPMprot12getMetaClassEv -__ZNK8IOPMprot9MetaClass5allocEv -__ZNK8IOSyncer12getMetaClassEv -__ZNK8IOSyncer9MetaClass5allocEv -__ZNK8OSNumber12getMetaClassEv -__ZNK8OSNumber12numberOfBitsEv -__ZNK8OSNumber13numberOfBytesEv -__ZNK8OSNumber17unsigned8BitValueEv -__ZNK8OSNumber18unsigned16BitValueEv -__ZNK8OSNumber18unsigned32BitValueEv -__ZNK8OSNumber18unsigned64BitValueEv -__ZNK8OSNumber9MetaClass5allocEv -__ZNK8OSNumber9isEqualToEPK15OSMetaClassBase -__ZNK8OSNumber9isEqualToEPKS_ -__ZNK8OSNumber9serializeEP11OSSerialize -__ZNK8OSObject12getMetaClassEv -__ZNK8OSObject12taggedRetainEPKv -__ZNK8OSObject13taggedReleaseEPKv -__ZNK8OSObject13taggedReleaseEPKvi -__ZNK8OSObject14getRetainCountEv -__ZNK8OSObject6retainEv -__ZNK8OSObject7releaseEi -__ZNK8OSObject7releaseEv -__ZNK8OSObject9MetaClass5allocEv -__ZNK8OSObject9serializeEP11OSSerialize -__ZNK8OSString12getMetaClassEv -__ZNK8OSString16getCStringNoCopyEv -__ZNK8OSString7getCharEj -__ZNK8OSString9MetaClass5allocEv -__ZNK8OSString9getLengthEv -__ZNK8OSString9isEqualToEPK15OSMetaClassBase -__ZNK8OSString9isEqualToEPK6OSData -__ZNK8OSString9isEqualToEPKS_ -__ZNK8OSString9isEqualToEPKc -__ZNK8OSString9serializeEP11OSSerialize -__ZNK8OSSymbol12getMetaClassEv -__ZNK8OSSymbol13taggedReleaseEPKv -__ZNK8OSSymbol13taggedReleaseEPKvi -__ZNK8OSSymbol9MetaClass5allocEv -__ZNK8OSSymbol9isEqualToEPK15OSMetaClassBase -__ZNK8OSSymbol9isEqualToEPKS_ -__ZNK8OSSymbol9isEqualToEPKc -__ZNK9IOCommand12getMetaClassEv -__ZNK9IOCommand9MetaClass5allocEv -__ZNK9IODTNVRAM11getPropertyEPK8OSSymbol -__ZNK9IODTNVRAM11getPropertyEPKc -__ZNK9IODTNVRAM12getMetaClassEv -__ZNK9IODTNVRAM17getOFVariablePermEPK8OSSymbol -__ZNK9IODTNVRAM17getOFVariableTypeEPK8OSSymbol -__ZNK9IODTNVRAM19serializePropertiesEP11OSSerialize -__ZNK9IODTNVRAM9MetaClass5allocEv -__ZNK9IOService10isInactiveEv -__ZNK9IOService11getProviderEv -__ZNK9IOService11getWorkLoopEv -__ZNK9IOService12getMetaClassEv -__ZNK9IOService12handleIsOpenEPKS_ -__ZNK9IOService17getClientIteratorEv -__ZNK9IOService19getProviderIteratorEv -__ZNK9IOService19serializePropertiesEP11OSSerialize -__ZNK9IOService21getOpenClientIteratorEv -__ZNK9IOService23getOpenProviderIteratorEv -__ZNK9IOService6isOpenEPKS_ -__ZNK9IOService8getStateEv -__ZNK9IOService9MetaClass5allocEv -__ZNK9IOService9getClientEv -__ZNK9OSBoolean12getMetaClassEv -__ZNK9OSBoolean12taggedRetainEPKv -__ZNK9OSBoolean13taggedReleaseEPKvi -__ZNK9OSBoolean6isTrueEv -__ZNK9OSBoolean7isFalseEv -__ZNK9OSBoolean8getValueEv -__ZNK9OSBoolean9MetaClass5allocEv -__ZNK9OSBoolean9isEqualToEPK15OSMetaClassBase -__ZNK9OSBoolean9isEqualToEPKS_ -__ZNK9OSBoolean9serializeEP11OSSerialize -__ZTV10IOMachPort -__ZTV10IONotifier -__ZTV10IOWorkLoop -__ZTV10OSIterator -__ZTV11IOCatalogue -__ZTV11IODataQueue -__ZTV11IOMemoryMap -__ZTV11IOResources -__ZTV11OSMetaClass -__ZTV11OSSerialize -__ZTV12IOPMinformee -__ZTV12IORootParent -__ZTV12IOUserClient -__ZTV12OSCollection -__ZTV12OSDictionary -__ZTV12OSOrderedSet -__ZTV12OSSerializer -__ZTV12OSSymbolPool -__ZTV13IOCommandGate -__ZTV13IOCommandPool -__ZTV13IOEventSource -__ZTV13_IOServiceJob -__ZTV14IOCommandQueue -__ZTV14IOMemoryCursor -__ZTV14IOPMrootDomain -__ZTV15IOConditionLock -__ZTV15IOPMPowerSource -__ZTV15IOPanicPlatform -__ZTV15IORegistryEntry -__ZTV15IORegistryPlane -__ZTV15IOWatchDogTimer -__ZTV15OSMetaClassBase -__ZTV15OSMetaClassMeta -__ZTV16IOKitDiagnostics -__ZTV16IOPMinformeeList -__ZTV16IOPlatformDevice -__ZTV16IOPlatformExpert -__ZTV16IORangeAllocator -__ZTV17IOBigMemoryCursor -__ZTV17IOPolledInterface -__ZTV17IOPowerConnection -__ZTV18IODTPlatformExpert -__ZTV18IOMemoryDescriptor -__ZTV18IORegistryIterator -__ZTV18IOTimerEventSource -__ZTV18IOUserNotification -__ZTV18_IOServiceNotifier -__ZTV19IOPMPowerSourceList -__ZTV20IOLittleMemoryCursor -__ZTV20OSCollectionIterator -__ZTV20RootDomainUserClient -__ZTV21IOInterruptController -__ZTV21IONaturalMemoryCursor -__ZTV21IOSubMemoryDescriptor -__ZTV22IOInterruptEventSource -__ZTV22IOPlatformExpertDevice -__ZTV22_IOOpenServiceIterator -__ZTV23IOMultiMemoryDescriptor -__ZTV24IOBufferMemoryDescriptor -__ZTV24IOCPUInterruptController -__ZTV25IOGeneralMemoryDescriptor -__ZTV26_IOServiceInterestNotifier -__ZTV27IOSharedInterruptController -__ZTV28IOFilterInterruptEventSource -__ZTV5IOCPU -__ZTV5OSSet -__ZTV6OSData -__ZTV7OSArray -__ZTV8IOMapper -__ZTV8IOPMprot -__ZTV8IOSyncer -__ZTV8OSNumber -__ZTV8OSObject -__ZTV8OSString -__ZTV8OSSymbol -__ZTV9IOCommand -__ZTV9IODTNVRAM -__ZTV9IOService -__ZTV9OSBoolean -__ZTVN10IOMachPort9MetaClassE -__ZTVN10IONotifier9MetaClassE -__ZTVN10IOWorkLoop9MetaClassE -__ZTVN10OSIterator9MetaClassE -__ZTVN11IOCatalogue9MetaClassE -__ZTVN11IODataQueue9MetaClassE -__ZTVN11IOMemoryMap9MetaClassE -__ZTVN11IOResources9MetaClassE -__ZTVN11OSSerialize9MetaClassE -__ZTVN12IOPMinformee9MetaClassE -__ZTVN12IORootParent9MetaClassE -__ZTVN12IOUserClient9MetaClassE -__ZTVN12OSCollection9MetaClassE -__ZTVN12OSDictionary9MetaClassE -__ZTVN12OSOrderedSet9MetaClassE -__ZTVN12OSSerializer9MetaClassE -__ZTVN13IOCommandGate9MetaClassE -__ZTVN13IOCommandPool9MetaClassE -__ZTVN13IOEventSource9MetaClassE -__ZTVN13_IOServiceJob9MetaClassE -__ZTVN14IOCommandQueue9MetaClassE -__ZTVN14IOMemoryCursor9MetaClassE -__ZTVN14IOPMrootDomain9MetaClassE -__ZTVN15IOConditionLock9MetaClassE -__ZTVN15IOPMPowerSource9MetaClassE -__ZTVN15IOPanicPlatform9MetaClassE -__ZTVN15IORegistryEntry9MetaClassE -__ZTVN15IORegistryPlane9MetaClassE -__ZTVN15IOWatchDogTimer9MetaClassE -__ZTVN16IOKitDiagnostics9MetaClassE -__ZTVN16IOPMinformeeList9MetaClassE -__ZTVN16IOPlatformDevice9MetaClassE -__ZTVN16IOPlatformExpert9MetaClassE -__ZTVN16IORangeAllocator9MetaClassE -__ZTVN17IOBigMemoryCursor9MetaClassE -__ZTVN17IOPowerConnection9MetaClassE -__ZTVN18IODTPlatformExpert9MetaClassE -__ZTVN18IOMemoryDescriptor9MetaClassE -__ZTVN18IORegistryIterator9MetaClassE -__ZTVN18IOTimerEventSource9MetaClassE -__ZTVN18IOUserNotification9MetaClassE -__ZTVN18_IOServiceNotifier9MetaClassE -__ZTVN19IOPMPowerSourceList9MetaClassE -__ZTVN20IOLittleMemoryCursor9MetaClassE -__ZTVN20OSCollectionIterator9MetaClassE -__ZTVN20RootDomainUserClient9MetaClassE -__ZTVN21IOInterruptController9MetaClassE -__ZTVN21IONaturalMemoryCursor9MetaClassE -__ZTVN21IOSubMemoryDescriptor9MetaClassE -__ZTVN22IOInterruptEventSource9MetaClassE -__ZTVN22IOPlatformExpertDevice9MetaClassE -__ZTVN22_IOOpenServiceIterator9MetaClassE -__ZTVN23IOMultiMemoryDescriptor9MetaClassE -__ZTVN24IOBufferMemoryDescriptor9MetaClassE -__ZTVN24IOCPUInterruptController9MetaClassE -__ZTVN25IOGeneralMemoryDescriptor9MetaClassE -__ZTVN26_IOServiceInterestNotifier9MetaClassE -__ZTVN27IOSharedInterruptController9MetaClassE -__ZTVN28IOFilterInterruptEventSource9MetaClassE -__ZTVN5IOCPU9MetaClassE -__ZTVN5OSSet9MetaClassE -__ZTVN6OSData9MetaClassE -__ZTVN7OSArray9MetaClassE -__ZTVN8IOMapper9MetaClassE -__ZTVN8IOPMprot9MetaClassE -__ZTVN8IOSyncer9MetaClassE -__ZTVN8OSNumber9MetaClassE -__ZTVN8OSObject9MetaClassE -__ZTVN8OSString9MetaClassE -__ZTVN8OSSymbol9MetaClassE -__ZTVN9IOCommand9MetaClassE -__ZTVN9IODTNVRAM9MetaClassE -__ZTVN9IOService9MetaClassE -__ZTVN9OSBoolean9MetaClassE -__ZdlPv -__Znwm -___cxa_pure_virtual -__doprnt -__giDebugLogDataInternal -__giDebugLogInternal -__giDebugReserved1 -__giDebugReserved2 -__mh_execute_header -__printf -__start -_absolutetime_to_nanoseconds -_acknowledgeSleepWakeNotification -_appleClut8 -_assert_wait -_assert_wait_timeout -_atoi -_badport_bandlim -_bcd2bin_data -_bcmp -_bcopy -_bdevsw_add -_bdevsw_isfree -_bdevsw_remove -_boot -_bsd_osrelease:_osrelease -_bsd_ostype:_ostype -_bsd_version:_version -_bsd_version_major:_version_major -_bsd_version_minor:_version_minor -_bsd_version_variant:_version_variant -_bzero -_catch_exception_raise -_catch_exception_raise_state -_catch_exception_raise_state_identity -_cdevsw_add -_cdevsw_add_with_bdev -_cdevsw_isfree -_cdevsw_remove -_clock_absolutetime_interval_to_deadline -_clock_delay_until -_clock_get_calendar_microtime -_clock_get_calendar_nanotime -_clock_get_calendar_value -_clock_get_system_microtime -_clock_get_system_nanotime -_clock_get_system_value -_clock_get_uptime -_clock_interval_to_absolutetime_interval -_clock_interval_to_deadline -_clock_timebase_info -_cnputc -_conslog_putc -_convert_port_entry_to_map -_convert_port_entry_to_object -_current_act -_current_proc -_current_proc_EXTERNAL -_current_task -_current_thread -_db_dumpiojunk -_db_piokjunk -_debug_container_malloc_size -_debug_iomalloc_size -_debug_ivars_size -_debug_malloc_size -_delay -_devfs_make_link -_devfs_make_node -_devfs_remove -_device_close -_device_data_action -_devnode_free -_disable_serial_output -_ether_check_multi -_fatfile_getarch -_fatfile_getarch_affinity -_find_entry -_flush_dcache -_flush_dcache64 -_gGearPict -_gIOAppPowerStateInterest -_gIOBusyInterest -_gIOCatalogue -_gIOClassKey -_gIOCommandPoolSizeKey -_gIODTAAPLInterruptsKey -_gIODTAddressCellKey -_gIODTCompatibleKey -_gIODTDefaultInterruptController -_gIODTInterruptCellKey -_gIODTInterruptParentKey -_gIODTModelKey -_gIODTNWInterruptMappingKey -_gIODTNameKey -_gIODTPHandleKey -_gIODTPersistKey -_gIODTPlane -_gIODTRangeKey -_gIODTSizeCellKey -_gIODTTypeKey -_gIODTUnitKey -_gIODefaultMatchCategoryKey -_gIODeviceMemoryKey -_gIOFirstMatchNotification -_gIOFirstPublishNotification -_gIOGeneralInterest -_gIOInterruptControllersKey -_gIOInterruptSpecifiersKey -_gIOKernelConfigTables -_gIOKitDebug -_gIOKitDebugKey -_gIOLocationKey -_gIOLocationMatchKey -_gIOMatchCategoryKey -_gIOMatchedNotification -_gIOMatchedServiceCountKey -_gIOModuleIdentifierKey -_gIONameKey -_gIONameMatchKey -_gIONameMatchedKey -_gIOParentMatchKey -_gIOPathMatchKey -_gIOPowerPlane -_gIOPriorityPowerStateInterest -_gIOProbeScoreKey -_gIOPropertyMatchKey -_gIOProviderClassKey -_gIOPublishNotification -_gIORangeAllocatorLock -_gIOResourceMatchKey -_gIOResourcesKey -_gIOServiceKey -_gIOServicePlane -_gIOTerminatedNotification -_gIOUserClientClassKey -_gOFVariables -_gPEClockFrequencyInfo -_gPESerialBaud -_gPlatformInterruptControllerName -_get_bsdtask_info -_get_bsduthreadarg -_get_bsduthreadrval -_get_inpcb_str_size -_get_kernel_symfile -_get_procrustime -_get_task_map -_invalidate_icache -_invalidate_icache64 -_iokit_add_reference -_iokit_builder:_osbuilder -_iokit_client_died -_iokit_osrelease:_osrelease -_iokit_ostype:_ostype -_iokit_port_for_object -_iokit_remove_reference -_iokit_user_client_trap -_iokit_version:_version -_iokit_version_major:_version_major -_iokit_version_minor:_version_minor -_iokit_version_variant:_version_variant -_ipc_port_release_send -_is_suser -_is_suser1 -_kOSBooleanFalse -_kOSBooleanTrue -_kalloc -_kdebug_enable -_kdp_register_send_receive -_kdp_set_interface -_kdp_unregister_send_receive -_kern_os_free -_kern_os_malloc -_kern_os_malloc_size -_kern_os_realloc -_kernel_debug -_kernel_debug1 -_kernel_map -_kernel_pmap -_kernel_task -_kernel_thread -_kfree -_kmem_alloc -_kmem_free -_kprintf -_libkern_builder:_osbuilder -_libkern_osrelease:_osrelease -_libkern_ostype:_ostype -_libkern_version:_version -_libkern_version_major:_version_major -_libkern_version_minor:_version_minor -_libkern_version_variant:_version_variant -_libsa_builder:_osbuilder -_libsa_osrelease:_osrelease -_libsa_ostype:_ostype -_libsa_version:_version -_libsa_version_major:_version_major -_libsa_version_minor:_version_minor -_libsa_version_variant:_version_variant -_lock_alloc:_lock_alloc_EXT -_lock_done:_lock_done_EXT -_lock_free:_lock_free_EXT -_lock_init:_lock_init_EXT -_lock_read:_lock_read_EXT -_lock_read_to_write:_lock_read_to_write_EXT -_lock_write:_lock_write_EXT -_lock_write_to_read:_lock_write_to_read_EXT -_m_mclfree -_mach_absolute_time -_mach_make_memory_entry_64 -_mach_msg_send_from_kernel -_machine_idle -_max_mem -_mbstat -_mem_size -_memcmp -_memcpy -_memmove -_memory_object_page_op -_memset -_microtime -_microuptime -_ml_at_interrupt_context -_ml_cpu_get_info -_ml_io_map -_ml_phys_read -_ml_phys_write -_ml_probe_read -_ml_processor_register -_ml_set_interrupts_enabled -_ml_thread_policy -_mutex_alloc:_mutex_alloc_EXT -_mutex_free:_mutex_free_EXT -_mutex_init:_mutex_init_EXT -_mutex_lock:_lck_mtx_lock -_mutex_try:_lck_mtx_try_lock -_mutex_unlock:_lck_mtx_unlock -_nanoseconds_to_absolutetime -_nanotime -_nanouptime -_osfmk_osrelease:_osrelease -_osfmk_ostype:_ostype -_osfmk_version:_version -_osfmk_version_major:_version_major -_osfmk_version_minor:_version_minor -_osfmk_version_variant:_version_variant -_osrelease -_ostype -_page_mask -_page_shift -_page_size -_panic -_pe_identify_machine -_pe_init_debug -_pexpert_osrelease:_osrelease -_pexpert_ostype:_ostype -_pexpert_version:_version -_pexpert_version_major:_version_major -_pexpert_version_minor:_version_minor -_pexpert_version_variant:_version_variant -_pmap_extract -_pmap_find_phys -_pmsBuild -_pmsPark -_pmsRun -_pmsStart -_print_vmpage_stat -_printf -_processor_exit -_processor_info -_processor_start -_random -_read_random -_registerPrioritySleepWakeInterest -_registerSleepWakeInterest -_rootDomainRestart -_rootDomainShutdown -_semaphore_create -_semaphore_dereference -_semaphore_destroy -_semaphore_reference -_semaphore_signal -_semaphore_signal_all -_semaphore_timedwait -_semaphore_wait -_sha1_init:_SHA1Init -_sha1_loop:_SHA1Update -_sha1_result:_SHA1Final_r -_snprintf -_spl0 -_splbio -_splclock -_splhigh -_splimp -_spllo -_spln -_splnet -_sploff -_splon -_splpower -_splsched -_splsoftclock -_spltty -_splvm -_splx -_sscanf -_stack_privilege -_strchr -_strcmp -_strlen -_strncat -_strncmp -_strncpy -_strtol -_strtoq -_strtoul -_strtouq -_subyte -_suibyte -_suiword -_suser -_suword -_sysctlbyname -_task_create -_task_deallocate -_task_reference -_task_resume -_task_suspend -_thread_block -_thread_call_allocate -_thread_call_cancel -_thread_call_enter -_thread_call_enter1 -_thread_call_enter1_delayed -_thread_call_enter_delayed -_thread_call_free -_thread_call_func -_thread_call_func_cancel -_thread_call_func_delayed -_thread_call_is_delayed -_thread_cancel_timer -_thread_deallocate -_thread_flavor_array -_thread_policy_set -_thread_reference -_thread_set_timer -_thread_set_timer_deadline -_thread_sleep_lock_write:_thread_sleep_lock_write_EXT -_thread_sleep_mutex:_thread_sleep_mutex_EXT -_thread_sleep_mutex_deadline:_thread_sleep_mutex_deadline_EXT -_thread_sleep_usimple_lock:_thread_sleep_usimple_lock_EXT -_thread_terminate -_thread_wakeup_prim -_upl_get_internal_page_list -_upl_valid_page -_usimple_lock:_usimple_lock_EXT -_usimple_lock_init:_usimple_lock_init_EXT -_usimple_lock_try:_usimple_lock_try_EXT -_usimple_unlock:_usimple_unlock_EXT -_uuid_clear -_uuid_compare -_uuid_copy -_uuid_generate -_uuid_generate_random -_uuid_generate_time -_uuid_is_null -_uuid_parse -_uuid_unparse -_uuid_unparse_lower -_uuid_unparse_upper -_version -_version_major -_version_minor -_version_variant -_vetoSleepWakeNotification -_vm_allocate -_vm_deallocate -_vm_initial_limit_core -_vm_initial_limit_data -_vm_initial_limit_stack -_vm_map -_vm_map_deallocate -_vm_map_unwire -_vm_map_wire -_vm_page_free_count -_vm_protect -_vm_region -_vm_region_object_create -_vsnprintf -_vsscanf -_zalloc -_zfree -_zinit diff --git a/config/System6.0.i386.exports b/config/System6.0.i386.exports deleted file mode 100644 index aecfe0c97..000000000 --- a/config/System6.0.i386.exports +++ /dev/null @@ -1,34 +0,0 @@ -_IOSpinUnlock -_IOTrySpinLock -_PE_install_interrupt_handler -_PE_interrupt_handler -_PE_parse_boot_arg -__ZN11IOMemoryMap19setMemoryDescriptorEP18IOMemoryDescriptory -__ZN11IOMemoryMap8redirectEP18IOMemoryDescriptormm -__ZN11IOMemoryMap8redirectEP18IOMemoryDescriptormy -__ZN24IOBufferMemoryDescriptor20initWithPhysicalMaskEP4taskmyyy -__ZN24IOBufferMemoryDescriptor22inTaskWithPhysicalMaskEP4taskmyy -_acpi_install_wake_handler -_acpi_sleep_kernel -_cpu_number -_cpu_to_lapic -_cpuid_features -_cpuid_info -_ev_try_lock -_ev_unlock -_hfs_addconverter -_hfs_remconverter -_lapic_end_of_interrupt -_ml_get_max_cpus -_mp_broadcast -_mp_cpus_call -_mp_cpus_call1 -_mp_rendezvous_no_intrs -_rtc_clock_stepped -_rtc_clock_stepping -_smp_initialized -_sprintf -_strcat -_strcpy -_thread_funnel_set -_vsprintf diff --git a/config/System6.0.x86_64.exports b/config/System6.0.x86_64.exports deleted file mode 100644 index accc98e65..000000000 --- a/config/System6.0.x86_64.exports +++ /dev/null @@ -1,8 +0,0 @@ -_IOSpinUnlock -_IOTrySpinLock -_ev_try_lock -_ev_unlock -_sprintf -_strcat -_strcpy -_vsprintf diff --git a/config/Unsupported.exports b/config/Unsupported.exports index b4087719b..7bcd7a5cc 100644 --- a/config/Unsupported.exports +++ b/config/Unsupported.exports @@ -83,6 +83,7 @@ _hfs_getconverter _hfs_pickencoding _hfs_relconverter _host_get_special_port +_host_get_exception_ports _host_priv_self _hz _ipc_kernel_map @@ -159,10 +160,13 @@ _sleep _stack_privilege _task_get_special_port _task_resume +_task_resume2 _task_suspend +_task_suspend2 _thread_notrigger _thread_tid _tsleep +_ubc_cs_blob_get _vfs_context_current _vfs_update_vfsstat _vm_allocate diff --git a/config/Unsupported.i386.exports b/config/Unsupported.i386.exports deleted file mode 100644 index 602e7123a..000000000 --- a/config/Unsupported.i386.exports +++ /dev/null @@ -1,136 +0,0 @@ -_OSSpinLockTry -_OSSpinLockUnlock -__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer0Ev -__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer1Ev -__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer2Ev -__ZN15IOWatchDogTimer25_RESERVEDIOWatchDogTimer3Ev -__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice0Ev -__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice1Ev -__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice2Ev -__ZN16IOPlatformDevice26_RESERVEDIOPlatformDevice3Ev -__ZN9IODTNVRAM17getOWVariableInfoEmPPK8OSSymbolPmS4_ -__ZN9IODTNVRAM19convertObjectToPropEPhPmPK8OSSymbolP8OSObject -__ZN9IODTNVRAM19convertPropToObjectEPhmS0_mPPK8OSSymbolPP8OSObject -__ZN9IODTNVRAM19searchNVRAMPropertyEP17IONVRAMDescriptorPm -__ZN9IODTNVRAM19unescapeBytesToDataEPKhm -_clock_get_system_value -_cpu_number -_domains -_dsmos_page_transform_hook -_gPEEFIRuntimeServices -_gPEEFISystemTable -_hibernate_vm_lock -_hibernate_vm_unlock -_ifunit -_in6addr_local -_in_broadcast -_inaddr_local -_inet_domain_mutex -_ip_mutex -_ip_output -_ip_protox -_kdp_register_callout -_kdp_set_ip_and_mac_addresses -_kernel_flock -_kernel_thread -_lapic_set_perfcnt_interrupt_mask -_lapic_set_pmi_func -_lo_ifp -_m_adj:_mbuf_adj -_m_cat -_m_copydata -_m_copym -_m_free:_mbuf_free -_m_freem:_mbuf_freem -_m_get -_m_gethdr -_m_getpacket -_m_getpackets -_m_mclget -_m_mtod -_m_prepend_2 -_m_pullup -_m_split -_m_trailingspace:_mbuf_trailingspace -_mach_msg_rpc_from_kernel -_mach_msg_send_from_kernel_with_options -_mcl_to_paddr:_mbuf_data_to_physical -_ml_cpu_int_event_time -_ml_get_apicid -_ml_get_maxbusdelay -_ml_get_maxsnoop -_mp_rendezvous -_mp_rendezvous_no_intrs -_nd6_storelladdr -_net_add_domain -_net_add_proto -_net_del_domain -_net_del_proto -_pffinddomain -_pffindproto -_pmCPUControl -_pmKextRegister -_pru_abort_notsupp -_pru_accept_notsupp -_pru_bind_notsupp -_pru_connect2_notsupp -_pru_connect_notsupp -_pru_disconnect_notsupp -_pru_listen_notsupp -_pru_peeraddr_notsupp -_pru_rcvd_notsupp -_pru_rcvoob_notsupp -_pru_send_notsupp -_pru_sense_null -_pru_shutdown_notsupp -_pru_sockaddr_notsupp -_pru_sopoll_notsupp -_rdmsr_carefully -_real_ncpus -_rtc_clock_napped -_sbappendaddr -_sbappendrecord -_sbflush -_sbspace -_serial_getc -_serial_init -_serial_putc -_soabort -_sobind -_socantrcvmore -_socantsendmore -_sock_getlistener -_sock_release -_sock_retain -_soclose -_soconnect -_socreate -_sodisconnect -_sofree -_sofreelastref -_soisconnected -_soisconnecting -_soisdisconnected -_soisdisconnecting -_sonewconn -_sooptcopyin -_sooptcopyout -_sopoll -_soreceive -_soreserve -_sorwakeup -_sosend -_sosetopt -_tcbinfo -_thread_call_func -_thread_call_func_cancel -_thread_call_func_delayed -_thread_call_is_delayed -_thread_cancel_timer -_thread_funnel_set -_thread_set_timer -_thread_set_timer_deadline -_tmrCvt -_tsc_get_info -_udbinfo -_PE_state diff --git a/config/Unused.exports b/config/Unused.exports new file mode 100644 index 000000000..bb719a4c0 --- /dev/null +++ b/config/Unused.exports @@ -0,0 +1,3 @@ +# Symbols that are unused as KPI, but must be globally exported +_dtrace_zero* +_gLoadedKextSummaries diff --git a/config/compress-man-pages.pl b/config/compress-man-pages.pl deleted file mode 100755 index 1dbd5a173..000000000 --- a/config/compress-man-pages.pl +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/perl -# Copyright (c) 2005 Apple Computer, Inc. All rights reserved. -# -# @APPLE_LICENSE_HEADER_START@ -# -# This file contains Original Code and/or Modifications of Original Code -# as defined in and that are subject to the Apple Public Source License -# Version 2.0 (the 'License'). You may not use this file except in -# compliance with the License. Please obtain a copy of the License at -# http://www.opensource.apple.com/apsl/ and read it before using this -# file. -# -# The Original Code and all software distributed under the License are -# distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER -# EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, -# INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. -# Please see the License for the specific language governing rights and -# limitations under the License. -# -# @APPLE_LICENSE_HEADER_END@ - -use strict; -use File::Basename (); -use File::Find (); -use Getopt::Std (); - -my $MyName = File::Basename::basename($0); -my $N = 100; -my $MinSize = 64; -my %inodes; -my @symlinks; -our $opt_d = ''; - -sub wanted { - return unless /\.[\dn][a-z]*$/; - if(-l $_) { - push(@symlinks, $_); - } elsif(-f _) { - return if -s _ < $MinSize; - my($dev, $ino) = stat(_); - my $list = $inodes{$ino}; - $list = $inodes{$ino} = [] unless defined($list); - push(@$list, $_); - } -} - -sub usage { - die "Usage: $MyName [-d prefix] dir ...\n"; -} - -Getopt::Std::getopts('d:'); -usage() unless scalar(@ARGV) > 0; - -for my $dir (@ARGV) { - $dir = $opt_d . $dir if $opt_d ne ''; - next unless -e $dir; - die "$dir: no such directory\n" unless -d _; - - %inodes = (); - @symlinks = (); - File::Find::find({ - wanted => \&wanted, - no_chdir => 1, - }, $dir); - - my(@compress, @links); - for(values(%inodes)) { - push(@compress, $_->[0]); - push(@links, $_) if scalar(@$_) > 1; - } - - my $count; - while(($count = scalar(@compress)) > 0) { - $_ = $count > $N ? $N : $count; - my @args = splice(@compress, 0, $_); - print "gzip -f -n @args\n"; - system('gzip', '-f', '-n', @args) == 0 or die "gzip failed\n";; - } - foreach my $list (@links) { - my $main = shift(@$list); - for(@$list) { - printf "rm $_; ln $main.gz $_.gz\n"; - unlink $_ or die "Can't unlink: $!\n"; - unlink "$_.gz"; - link("$main.gz", "$_.gz") or die "Can't link: $!\n";; - } - } - for(@symlinks) { - my $link = readlink($_); - printf "rm $_; ln -s $link.gz $_.gz\n"; - unlink $_ or die "Can't unlink: $!\n"; - symlink("$link.gz", "$_.gz") or die "Can't symlink: $!\n"; - } -} diff --git a/config/list_supported.sh b/config/list_supported.sh index 340632c88..3c49b8b01 100755 --- a/config/list_supported.sh +++ b/config/list_supported.sh @@ -31,26 +31,26 @@ TARGET_FILE=$3 SUPPORTED_KPI_FILES=( BSDKernel Mach IOKit Libkern ) DEPENDENCY_NAMES=( com.apple.kpi.bsd com.apple.kpi.mach com.apple.kpi.iokit com.apple.kpi.libkern ) -rm -f $TARGET_FILE +rm -f ${TARGET_FILE} -if [ ${ARCH} == "ALL" ] +if [ ${ARCH} == "all" ] then - echo "The following symbols are considered sustainable KPI on all architectures." >> $TARGET_FILE - echo "Note that symbols may be exported by some (or all) architectures individually." >> $TARGET_FILE + echo "The following symbols are considered sustainable KPI on all architectures." >> ${TARGET_FILE} + echo "Note that symbols may be exported by some (or all) architectures individually." >> ${TARGET_FILE} else - echo "The following symbols are considered sustainable KPI on architecture ${ARCH}." >> $TARGET_FILE + echo "The following symbols are considered sustainable KPI on architecture ${ARCH}." >> ${TARGET_FILE} fi -echo >> $TARGET_FILE +echo >> ${TARGET_FILE} for (( i = 0 ; i < ${#SUPPORTED_KPI_FILES[@]} ; i++ )) do - echo "Exported by ${DEPENDENCY_NAMES[i]}:" >> $TARGET_FILE - echo >> $TARGET_FILE - if [ $ARCH == "ALL" ] + echo "Exported by ${DEPENDENCY_NAMES[i]}:" >> ${TARGET_FILE} + echo >> ${TARGET_FILE} + if [ ${ARCH} == "all" ] then - cat "${CONFIG_DIR}/${SUPPORTED_KPI_FILES[i]}.exports" | sed "s/^_//" | sed "s/:.*//" | sort >> $TARGET_FILE + cat "${CONFIG_DIR}/${SUPPORTED_KPI_FILES[i]}.exports" | sed "s/^_//" | sed "s/:.*//" | sort >> ${TARGET_FILE} else - cat "${CONFIG_DIR}/${SUPPORTED_KPI_FILES[i]}.${ARCH}.exports" | sed "s/^_//" | sed "s/:.*//" | sort >> $TARGET_FILE + cat "${CONFIG_DIR}/${SUPPORTED_KPI_FILES[i]}.${ARCH}.exports" | sed "s/^_//" | sed "s/:.*//" | sort >> ${TARGET_FILE} fi - echo >> $TARGET_FILE + echo >> ${TARGET_FILE} done diff --git a/config/newvers.pl b/config/newvers.pl index bf5096ad0..9a12b180f 100755 --- a/config/newvers.pl +++ b/config/newvers.pl @@ -47,12 +47,12 @@ die("SRCROOT not defined") unless defined($ENV{'SRCROOT'}); die("OBJROOT not defined") unless defined($ENV{'OBJROOT'}); my $versfile = "MasterVersion"; -$versfile = "$ENV{'SRCROOT'}/config/$versfile"; +$versfile = "$ENV{'SRCROOT'}/config/$versfile" if ($ENV{'SRCROOT'}); my $BUILD_SRCROOT=$ENV{'SRCROOT'}; $BUILD_SRCROOT =~ s,/+$,,; my $BUILD_OBJROOT=$ENV{'OBJROOT'}; $BUILD_OBJROOT =~ s,/+$,,; -my $BUILD_OBJPATH=$ENV{'OBJPATH'} || $ENV{'OBJROOT'}; +my $BUILD_OBJPATH=$ENV{'TARGET'} || $ENV{'OBJROOT'}; $BUILD_OBJPATH =~ s,/+$,,; my $BUILD_DATE = `date`; $BUILD_DATE =~ s/[\n\t]//g; diff --git a/iokit/Drivers/platform/drvAppleMacIO/AppleMacIO.cpp b/iokit/Drivers/platform/drvAppleMacIO/AppleMacIO.cpp deleted file mode 100644 index 846d0aaa1..000000000 --- a/iokit/Drivers/platform/drvAppleMacIO/AppleMacIO.cpp +++ /dev/null @@ -1,305 +0,0 @@ -/* - * Copyright (c) 1998-2008 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1998 Apple Computer, Inc. All rights reserved. - * - * HISTORY - * 23 Nov 98 sdouglas created. - */ - -#include -extern "C" { -#include -} - -#include -#include -#include -#include -#include - -#include - -#include - -#include - -#include - -#include - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -#define super IOService - -OSDefineMetaClassAndAbstractStructors(AppleMacIO, IOService); -OSMetaClassDefineReservedUnused(AppleMacIO, 0); -OSMetaClassDefineReservedUnused(AppleMacIO, 1); -OSMetaClassDefineReservedUnused(AppleMacIO, 2); -OSMetaClassDefineReservedUnused(AppleMacIO, 3); - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -bool AppleMacIO::start( IOService * provider ) -{ - IOPCIDevice *pciNub = (IOPCIDevice *)provider; - - if( !super::start( provider)) - return( false); - - // Make sure memory space is on. - pciNub->setMemoryEnable(true); - - fNub = provider; - fMemory = provider->mapDeviceMemoryWithIndex( 0 ); - if( 0 == fMemory) - IOLog("%s: unexpected ranges\n", getName()); - else if( !selfTest()) - IOLog("Warning: AppleMacIO self test fails\n"); - PMinit(); // initialize for power management - temporaryPowerClampOn(); // hold power on till we get children - return( true); -} - - -IOService * AppleMacIO::createNub( IORegistryEntry * from ) -{ - IOService * nub; - - nub = new AppleMacIODevice; - - if( nub && !nub->init( from, gIODTPlane )) { - nub->free(); - nub = 0; - } - - return( nub); -} - -void AppleMacIO::processNub(IOService * /*nub*/) -{ -} - -const char * AppleMacIO::deleteList ( void ) -{ - return( "('sd', 'st', 'disk', 'tape', 'pram', 'rtc', 'mouse')" ); -} - -const char * AppleMacIO::excludeList( void ) -{ - return( 0 ); -} - -void AppleMacIO::publishBelow( IORegistryEntry * root ) -{ - OSCollectionIterator * kids; - IORegistryEntry * next; - IOService * nub; - - // infanticide - kids = IODTFindMatchingEntries( root, kIODTRecursive, deleteList() ); - if( kids) { - while( (next = (IORegistryEntry *)kids->getNextObject())) { - next->detachAll( gIODTPlane); - } - kids->release(); - } - - // publish everything below, minus excludeList - kids = IODTFindMatchingEntries( root, kIODTRecursive | kIODTExclusive, - excludeList()); - if( kids) { - while( (next = (IORegistryEntry *)kids->getNextObject())) { - - if( 0 == (nub = createNub( next ))) - continue; - - nub->attach( this ); - - processNub(nub); - - nub->registerService(); - } - kids->release(); - } -} - -bool AppleMacIO::compareNubName( const IOService * nub, - OSString * name, OSString ** matched ) const -{ - return( IODTCompareNubName( nub, name, matched ) - || nub->IORegistryEntry::compareName( name, matched ) ); -} - -IOReturn AppleMacIO::getNubResources( IOService * nub ) -{ - if( nub->getDeviceMemory()) - return( kIOReturnSuccess ); - - IODTResolveAddressing( nub, "reg", fNub->getDeviceMemoryWithIndex(0) ); - - return( kIOReturnSuccess); -} - -bool AppleMacIO::selfTest( void ) -{ - IODBDMADescriptor *dmaDescriptors; - UInt32 dmaDescriptorsPhys; - UInt32 i; - UInt32 status; - IODBDMADescriptor *dmaDesc; - IOBufferMemoryDescriptor *buffer; - volatile IODBDMAChannelRegisters *ioBaseDMA; - bool ok = false; - enum { kTestChannel = 0x8000 }; - - ioBaseDMA = (volatile IODBDMAChannelRegisters *) - (((UInt32)fMemory->getVirtualAddress()) - + kTestChannel ); - - do { - buffer = IOBufferMemoryDescriptor::withCapacity(page_size, kIODirectionOutIn, true); - dmaDescriptors = (IODBDMADescriptor*)buffer->getBytesNoCopy(); - - if (!dmaDescriptors) - continue; - - if ( (UInt32)dmaDescriptors & (page_size - 1) ) { - IOLog("AppleMacIO::%s() - DMA Descriptor memory not page aligned!!", __FUNCTION__); - continue; - } - - bzero( dmaDescriptors, page_size ); - - IODBDMAReset( ioBaseDMA ); - - dmaDesc = dmaDescriptors; - - IOMakeDBDMADescriptor( dmaDesc, - kdbdmaNop, - kdbdmaKeyStream0, - kdbdmaIntNever, - kdbdmaBranchNever, - kdbdmaWaitNever, - 0, - 0 ); - - dmaDesc++; - - dmaDescriptorsPhys = (UInt32) (buffer->getPhysicalSegment(0, NULL, 0)); - - IOMakeDBDMADescriptorDep( dmaDesc, - kdbdmaStoreQuad, - kdbdmaKeySystem, - kdbdmaIntNever, - kdbdmaBranchNever, - kdbdmaWaitNever, - 4, - dmaDescriptorsPhys+16*sizeof(IODBDMADescriptor), - 0x12345678 ); - - dmaDesc++; - - IOMakeDBDMADescriptor( dmaDesc, - kdbdmaStop, - kdbdmaKeyStream0, - kdbdmaIntNever, - kdbdmaBranchNever, - kdbdmaWaitNever, - 0, - 0 ); - - - for ( i = 0; (!ok) && (i < 3); i++ ) - { - dmaDescriptors[16].operation = 0; - - IOSetDBDMACommandPtr( ioBaseDMA, dmaDescriptorsPhys ); - IODBDMAContinue( ioBaseDMA ); - - IODelay( 200 ); - - status = IOGetDBDMAChannelStatus( ioBaseDMA ); - - if ( ((status & kdbdmaActive) == 0) - && ((status & kdbdmaDead) == 0) - && (OSReadSwapInt32( &dmaDescriptors[16].operation, 0 ) == 0x12345678 )) - ok = true; - } - - IODBDMAReset( ioBaseDMA ); - - } while (false); - - if (buffer) - buffer->release(); - - return ok; -} - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -#undef super -#define super IOService - -OSDefineMetaClassAndStructors(AppleMacIODevice, IOService); -OSMetaClassDefineReservedUnused(AppleMacIODevice, 0); -OSMetaClassDefineReservedUnused(AppleMacIODevice, 1); -OSMetaClassDefineReservedUnused(AppleMacIODevice, 2); -OSMetaClassDefineReservedUnused(AppleMacIODevice, 3); - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -bool AppleMacIODevice::compareName( OSString * name, - OSString ** matched ) const -{ - return (IODTCompareNubName(this, name, matched) || - IORegistryEntry::compareName(name, matched)); -} - -IOService * AppleMacIODevice::matchLocation( IOService * /* client */ ) -{ - return this; -} - -IOReturn AppleMacIODevice::getResources( void ) -{ - IOService *macIO = this; - - if (getDeviceMemory() != 0) return kIOReturnSuccess; - - while (macIO && ((macIO = macIO->getProvider()) != 0)) - if (strcmp("mac-io", macIO->getName()) == 0) break; - - if (macIO == 0) return kIOReturnError; - - IODTResolveAddressing(this, "reg", macIO->getDeviceMemoryWithIndex(0)); - - return kIOReturnSuccess; -} - diff --git a/iokit/Drivers/platform/drvAppleNMI/AppleNMI.cpp b/iokit/Drivers/platform/drvAppleNMI/AppleNMI.cpp deleted file mode 100644 index bf941dcc5..000000000 --- a/iokit/Drivers/platform/drvAppleNMI/AppleNMI.cpp +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (c) 1998-2008 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1998-2003 Apple Computer, Inc. All rights reserved. - * - * DRI: Josh de Cesare - * - */ - -#include -#include -#include - -#include - -extern "C" { -#include -} - -bool RootRegistered( OSObject * us, void *, IOService * yourDevice, __unused IONotifier * yourNotifier ); - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -#define super IOService - -OSDefineMetaClassAndStructors(AppleNMI, IOService); -OSMetaClassDefineReservedUnused(AppleNMI, 0); -OSMetaClassDefineReservedUnused(AppleNMI, 1); -OSMetaClassDefineReservedUnused(AppleNMI, 2); -OSMetaClassDefineReservedUnused(AppleNMI, 3); - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -bool AppleNMI::start(IOService *provider) -{ - if (!super::start(provider)) return false; - - enable_debugger = FALSE; - mask_NMI = FALSE; - - if (provider->getProperty("enable_debugger")) - enable_debugger = TRUE; // Flag to automatically jump to debugger at NMI press - - if (provider->getProperty("mask_NMI")) - mask_NMI = TRUE; // Flag to mask/unmask NMI @ sleep/wake - - // Get notified when Root Domain registers - addMatchingNotification( gIOPublishNotification, serviceMatching("IOPMrootDomain"), (IOServiceMatchingNotificationHandler) RootRegistered, this, 0 ); - - // Register the interrupt. - IOInterruptAction handler = OSMemberFunctionCast(IOInterruptAction, - this, &AppleNMI::handleInterrupt); - provider->registerInterrupt(0, this, handler, 0); - provider->enableInterrupt(0); - - return true; -} - -// ********************************************************************************** -// The Root Power Domain has registered, so now we register as an interested driver -// so we know when the system is going to sleep or wake -// ********************************************************************************** -bool RootRegistered( OSObject * us, void *, IOService * yourDevice, __unused IONotifier * yourNotifier) -{ - if ( yourDevice != NULL ) { - ((AppleNMI *)us)->rootDomain = yourDevice; - ((IOPMrootDomain *)yourDevice)->registerInterestedDriver((IOService *) us); - } - - return true; -} - -IOReturn AppleNMI::initNMI(IOInterruptController *parentController, OSData *parentSource) -{ - return kIOReturnSuccess; -} - -IOReturn AppleNMI::handleInterrupt(void * /*refCon*/, IOService * /*nub*/, int /*source*/) -{ - if(enable_debugger == TRUE) - Debugger("NMI"); // This is a direct call to the Debugger - else - PE_enter_debugger("NMI"); // This is a indirect call the Debugger that is dependent on the debug flag - - return kIOReturnSuccess; -} - -//********************************************************************************* -// powerStateWillChangeTo -// -// We are notified here of power changes in the root domain. The root domain -// cannot actually turn itself on and off, but it notifies us anyway. -//********************************************************************************* -IOReturn AppleNMI::powerStateWillChangeTo ( IOPMPowerFlags theFlags, unsigned long, IOService*) -{ - volatile unsigned long *nmiIntSourceAddr; - unsigned long nmiIntSource; - - if (mask_NMI == TRUE) - { - if ( ! (theFlags & IOPMPowerOn) ) - { - // Mask NMI and change from edge to level whilst sleeping (copied directly from OS9 code) - nmiIntSourceAddr = (volatile unsigned long *)kExtInt9_NMIIntSource; - nmiIntSource = ml_phys_read((vm_address_t)nmiIntSourceAddr); - nmiIntSource |= kNMIIntLevelMask; - ml_phys_write((vm_address_t)nmiIntSourceAddr, nmiIntSource); - eieio(); - nmiIntSource |= kNMIIntMask; - ml_phys_write((vm_address_t)nmiIntSourceAddr, nmiIntSource); - eieio(); - } - else - { - // Unmask NMI and change back to edge (copied directly from OS9 code) - nmiIntSourceAddr = (volatile unsigned long *)kExtInt9_NMIIntSource; - nmiIntSource = ml_phys_read((vm_address_t)nmiIntSourceAddr); - nmiIntSource &= ~kNMIIntLevelMask; - ml_phys_write((vm_address_t)nmiIntSourceAddr, nmiIntSource); - eieio(); - nmiIntSource &= ~kNMIIntMask; - ml_phys_write((vm_address_t)nmiIntSourceAddr, nmiIntSource); - eieio(); - } - } - - return IOPMAckImplied; -} diff --git a/iokit/Drivers/platform/drvAppleNVRAM/AppleNVRAM.cpp b/iokit/Drivers/platform/drvAppleNVRAM/AppleNVRAM.cpp deleted file mode 100644 index 3391d7863..000000000 --- a/iokit/Drivers/platform/drvAppleNVRAM/AppleNVRAM.cpp +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright (c) 1998-2008 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -#include -#include "AppleNVRAM.h" - - -#define super IONVRAMController -OSDefineMetaClassAndStructors(AppleNVRAM, IONVRAMController); - - -// **************************************************************************** -// start -// -// **************************************************************************** -bool AppleNVRAM::start(IOService *provider) -{ - IOItemCount numRanges; - IOMemoryMap *map; - - numRanges = provider->getDeviceMemoryCount(); - - if (numRanges == 1) { - _nvramType = kNVRAMTypeIOMem; - - // Get the address of the data register. - map = provider->mapDeviceMemoryWithIndex(0); - if (map == 0) return false; - _nvramData = (UInt8 *)map->getVirtualAddress(); - - } else if (numRanges == 2) { - _nvramType = kNVRAMTypePort; - - // Get the address of the port register. - map = provider->mapDeviceMemoryWithIndex(0); - if (map == 0) return false; - _nvramPort = (UInt8 *)map->getVirtualAddress(); - - // Get the address of the data register. - map = provider->mapDeviceMemoryWithIndex(1); - if (map == 0) return false; - _nvramData = (UInt8 *)map->getVirtualAddress(); - - } else { - return false; - } - - return super::start(provider); -} - -// **************************************************************************** -// read -// -// Read data from the NVRAM and return it in buffer. -// -// **************************************************************************** -IOReturn AppleNVRAM::read(IOByteCount offset, UInt8 *buffer, - IOByteCount length) -{ - UInt32 cnt; - - // length and offset can't be less than zero (unsigned), so we don't check - if ((buffer == 0) || (length == 0) || (offset + length > kNVRAMImageSize)) - return kIOReturnBadArgument; - - switch (_nvramType) { - case kNVRAMTypeIOMem : - for (cnt = 0; cnt < length; cnt++) { - buffer[cnt] = _nvramData[(offset + cnt) << 4]; - } - break; - - case kNVRAMTypePort: - for (cnt = 0; cnt < length; cnt++) { - *_nvramPort = (offset + length) >> 5; - eieio(); - buffer[cnt] = _nvramData[((offset + length) & 0x1F) << 4]; - } - break; - - default : - return kIOReturnNotReady; - } - - return kIOReturnSuccess; -} - - -// **************************************************************************** -// write -// -// Write data from buffer into NVRAM. -// -// **************************************************************************** -IOReturn AppleNVRAM::write(IOByteCount offset, UInt8 *buffer, - IOByteCount length) -{ - UInt32 cnt; - - // length and offset can't be less than zero (unsigned), so we don't check - if ((buffer == 0) || (length == 0) || (offset + length > kNVRAMImageSize)) - return kIOReturnBadArgument; - - switch (_nvramType) { - case kNVRAMTypeIOMem : - for (cnt = 0; cnt < length; cnt++) { - _nvramData[(offset + cnt) << 4] = buffer[cnt]; - eieio(); - } - break; - - case kNVRAMTypePort: - for (cnt = 0; cnt < length; cnt++) { - *_nvramPort = (offset + length) >> 5; - eieio(); - _nvramData[((offset + length) & 0x1F) << 4] = buffer[cnt]; - eieio(); - } - break; - - default : - return kIOReturnNotReady; - } - - return kIOReturnSuccess; -} diff --git a/iokit/Drivers/platform/drvApplePlatformExpert/AppleCPU.cpp b/iokit/Drivers/platform/drvApplePlatformExpert/AppleCPU.cpp deleted file mode 100644 index 3b00616fb..000000000 --- a/iokit/Drivers/platform/drvApplePlatformExpert/AppleCPU.cpp +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. - * - * DRI: Josh de Cesare - * - */ - -#include "AppleCPU.h" - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -#undef super -#define super IOCPU - -OSDefineMetaClassAndStructors(AppleCPU, IOCPU); - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -bool AppleCPU::start(IOService *provider) -{ - kern_return_t result; - ml_processor_info_t this_processor_info; - - if (!super::start(provider)) return false; - - cpuIC = new IOCPUInterruptController; - if (cpuIC == 0) return false; - - if (cpuIC->initCPUInterruptController(1) != kIOReturnSuccess) return false; - cpuIC->attach(this); - - cpuIC->registerCPUInterruptController(); - - this_processor_info.cpu_id = (cpu_id_t)this; - this_processor_info.boot_cpu = true; - this_processor_info.start_paddr = 0; - this_processor_info.supports_nap = false; - this_processor_info.l2cr_value = 0; - this_processor_info.time_base_enable = 0; - - // Register this CPU with mach. - result = ml_processor_register( - &this_processor_info, - &machProcessor, - &ipi_handler); - if (result == KERN_FAILURE) return false; - - setCPUState(kIOCPUStateUninitalized); - - processor_start(machProcessor); - - registerService(); - - return true; -} - -void AppleCPU::initCPU(bool boot) -{ - if (boot) { - cpuIC->enableCPUInterrupt(this); - } - - setCPUState(kIOCPUStateRunning); -} - -void AppleCPU::quiesceCPU(void) -{ - // Unsupported. -} - -kern_return_t AppleCPU::startCPU(vm_offset_t /*start_paddr*/, - vm_offset_t /*arg_paddr*/) -{ - return KERN_FAILURE; -} - -void AppleCPU::haltCPU(void) -{ - // Unsupported. -} - -const OSSymbol *AppleCPU::getCPUName(void) -{ - return OSSymbol::withCStringNoCopy("Primary0"); -} - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ diff --git a/iokit/Drivers/platform/drvApplePlatformExpert/ApplePlatformExpert.cpp b/iokit/Drivers/platform/drvApplePlatformExpert/ApplePlatformExpert.cpp deleted file mode 100644 index 029ef3056..000000000 --- a/iokit/Drivers/platform/drvApplePlatformExpert/ApplePlatformExpert.cpp +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * HISTORY - * - */ - -#include -#include -#include - -#include - - -const OSSymbol *gGetDefaultBusSpeedsKey; - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -#define super IODTPlatformExpert - -OSDefineMetaClassAndAbstractStructors(ApplePlatformExpert, IODTPlatformExpert); - -OSMetaClassDefineReservedUnused(ApplePlatformExpert, 0); -OSMetaClassDefineReservedUnused(ApplePlatformExpert, 1); -OSMetaClassDefineReservedUnused(ApplePlatformExpert, 2); -OSMetaClassDefineReservedUnused(ApplePlatformExpert, 3); - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - -bool ApplePlatformExpert::start( IOService * provider ) -{ - UInt16 romVersion; - - gGetDefaultBusSpeedsKey = OSSymbol::withCString("GetDefaultBusSpeeds"); - - if (provider->getProperty(gIODTNWInterruptMappingKey)) { - // new world interrupt mapping => new world, for now - setBootROMType(kBootROMTypeNewWorld); - } else { - setBootROMType(kBootROMTypeOldWorld); - - // Get the Rom Minor Version from the 68k ROM. - romVersion = ml_phys_read_64(0xffc00010ULL) & 0x0000ffff; - provider->setProperty("rom-version", &romVersion, sizeof(romVersion)); - } - - return super::start(provider); -} - -bool ApplePlatformExpert::configure( IOService * provider ) -{ - IORangeAllocator * physicalRanges; - - if((physicalRanges = getPhysicalRangeAllocator())) { - physicalRanges->allocateRange(0,0x80000000); // RAM - physicalRanges->allocateRange(0xff000000,0x01000000); // ROM - } - return(super::configure(provider)); -} - -const char * ApplePlatformExpert::deleteList ( void ) -{ - return( "('packages', 'psuedo-usb', 'psuedo-hid', 'multiboot', 'rtas')" ); -} - -const char * ApplePlatformExpert::excludeList( void ) -{ - return( "('chosen', 'memory', 'openprom', 'AAPL,ROM', 'rom', 'options', 'aliases')"); -} - -void ApplePlatformExpert::registerNVRAMController( IONVRAMController * nvram ) -{ - IOReturn err; - enum { kXPRAMTimeToGMTOffset = 0xEC }; - - super::registerNVRAMController(nvram); - - // Here we are saving off the time zone info that's in PRAM. - // This probably should be a separate call that the - // ApplePlatformExpert does in it's initialization. -ECH - - err = readXPRAM(kXPRAMTimeToGMTOffset, (UInt8 *)&_timeToGMT, - sizeof(_timeToGMT)); - if (err == kIOReturnSuccess) { - // Convert from a SInt24 - sign extend from bit 23. - if (_timeToGMT & (1 << 23)) - _timeToGMT |= 0xFF000000; - else - _timeToGMT &= 0x00FFFFFF; - } -} - -#define SECS_BETWEEN_1904_1970 2082844800 - -long ApplePlatformExpert::getGMTTimeOfDay(void) -{ - long localtime; - - // to avid to hang the kernel at boot - // I set a limit of 15 seconds waiting - // for the real time clock. - mach_timespec_t t; - t.tv_sec = 30; - t.tv_nsec = 0; - if (waitForService(resourceMatching("IORTC"), &t ) != NULL) { - if (PE_read_write_time_of_day(kPEReadTOD, &localtime) == 0) - return (localtime - _timeToGMT - SECS_BETWEEN_1904_1970); - } - else - IOLog("ApplePlatformExpert::getGMTTimeOfDay can not provide time of day RTC did not show up\n"); - - return(0); -} - -void ApplePlatformExpert::setGMTTimeOfDay(long secs) -{ - // to avid to hang the kernel at boot - // I set a limit of 15 seconds waiting - // for the real time clock. - mach_timespec_t t; - t.tv_sec = 30; - t.tv_nsec = 0; - if (waitForService(resourceMatching("IORTC"), &t ) != NULL) { - secs += SECS_BETWEEN_1904_1970; - secs += _timeToGMT; - PE_read_write_time_of_day(kPEWriteTOD, &secs); - } - else - IOLog("ApplePlatformExpert::setGMTTimeOfDay can not set time of day RTC did not show up\n"); - -} - -bool ApplePlatformExpert::getMachineName(char *name, int maxLength) -{ - strncpy(name, "Power Macintosh", maxLength); - - return true; -} diff --git a/iokit/IOKit/AppleKeyStoreInterface.h b/iokit/IOKit/AppleKeyStoreInterface.h index 02cb776c1..b5cb5775b 100644 --- a/iokit/IOKit/AppleKeyStoreInterface.h +++ b/iokit/IOKit/AppleKeyStoreInterface.h @@ -55,6 +55,6 @@ struct aks_volume_key_t // aka kCSFDETargetVEKID #define PLATFORM_FUNCTION_GET_MEDIA_ENCRYPTION_KEY_UUID "CSFDETargetVEKID" -#define AKS_SERVICE_PATH "/IOResources/AppleKeyStore" +#define AKS_SERVICE_PATH "/IOResources/AppleFDEKeyStore" #endif /* _IOKIT_APPLEKEYSTOREINTERFACE_H */ diff --git a/iokit/IOKit/IOCatalogue.h b/iokit/IOKit/IOCatalogue.h index ac8cec46c..63781fc75 100644 --- a/iokit/IOKit/IOCatalogue.h +++ b/iokit/IOKit/IOCatalogue.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Inc. All rights reserved. + * Copyright (c) 1998-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -222,42 +222,6 @@ public: /* This stuff is no longer used at all we keep it around for i386 * binary compatibility only. Symbols are no longer exported. */ -#if __i386__ - /*! - @function recordStartupExtensions - @abstract Records extensions made available by the primary booter. -

- This function is for internal use by the kernel startup linker. - Kernel extensions should never call it. - @result Returns true if startup extensions were successfully recorded, - false if not. - */ - virtual bool recordStartupExtensions(void); - - /*! - @function addExtensionsFromArchive() - @abstract Records an archive of extensions, as from device ROM. -

- This function is currently for internal use. - Kernel extensions should never call it. - @param mkext An OSData object containing a multikext archive. - @result Returns true if mkext was properly unserialized and its - contents recorded, false if not. - */ - virtual bool addExtensionsFromArchive(OSData * mkext); - - - /*! - @function removeKernelLinker - @abstract Removes from memory all code and data related to - boot-time loading of kernel extensions. kextd triggers - this when it first starts in order to pass responsibility - for loading extensions from the kernel itself to kextd. - @result Returns KERN_SUCCESS if the kernel linker is successfully - removed or wasn't present, KERN_FAILURE otherwise. - */ - virtual kern_return_t removeKernelLinker(void); -#endif /* __i386__ */ private: diff --git a/iokit/IOKit/IODMAController.h b/iokit/IOKit/IODMAController.h index a8c1aed8a..4fab6e080 100644 --- a/iokit/IOKit/IODMAController.h +++ b/iokit/IOKit/IODMAController.h @@ -52,9 +52,13 @@ class IODMAController : public IOService IOByteCount byteCount = 0, IOByteCount byteOffset = 0) = 0; virtual IOReturn stopDMACommand(UInt32 dmaIndex, bool flush = false, uint64_t timeout = UINT64_MAX) = 0; virtual void completeDMACommand(IODMAEventSource *dmaES, IODMACommand *dmaCommand); - virtual void notifyDMACommand(IODMAEventSource *dmaES, IODMACommand *dmaCommand, IOReturn status, IOByteCount actualByteCount); + virtual void notifyDMACommand(IODMAEventSource *dmaES, IODMACommand *dmaCommand, IOReturn status, IOByteCount actualByteCount, AbsoluteTime timeStamp); virtual IOReturn queryDMACommand(UInt32 dmaIndex, IODMACommand **dmaCommand, IOByteCount *transferCount, bool waitForIdle = false) = 0; - virtual IOByteCount getFIFODepth(UInt32 dmaIndex) = 0; + virtual IOByteCount getFIFODepth(UInt32 dmaIndex, IODirection direction) = 0; + virtual IOReturn setFIFODepth(UInt32 dmaIndex, IOByteCount depth) = 0; + virtual IOByteCount validFIFODepth(UInt32 dmaIndex, IOByteCount depth, IODirection direction) = 0; + virtual IOReturn setDMAConfig(UInt32 dmaIndex, IOService *provider, UInt32 reqIndex) = 0; + virtual bool validDMAConfig(UInt32 dmaIndex, IOService *provider, UInt32 reqIndex) = 0; public: static const OSSymbol *createControllerName(UInt32 phandle); diff --git a/iokit/IOKit/IODMAEventSource.h b/iokit/IOKit/IODMAEventSource.h index 18a72de50..58d851bf9 100644 --- a/iokit/IOKit/IODMAEventSource.h +++ b/iokit/IOKit/IODMAEventSource.h @@ -43,12 +43,12 @@ class IODMAEventSource : public IOEventSource friend class IODMAController; public: - typedef void (*Action)(OSObject *owner, IODMAEventSource *dmaES, IODMACommand *dmaCommand, IOReturn status, IOByteCount actualByteCount); + typedef void (*Action)(OSObject *owner, IODMAEventSource *dmaES, IODMACommand *dmaCommand, IOReturn status, IOByteCount actualByteCount, AbsoluteTime timeStamp); #define IODMAEventAction IODMAEventSource::Action protected: virtual void completeDMACommand(IODMACommand *dmaCommand); - virtual void notifyDMACommand(IODMACommand *dmaCommand, IOReturn status, IOByteCount actualByteCount); + virtual void notifyDMACommand(IODMACommand *dmaCommand, IOReturn status, IOByteCount actualByteCount, AbsoluteTime timeStamp); public: static IODMAEventSource *dmaEventSource(OSObject *owner, @@ -59,9 +59,15 @@ class IODMAEventSource : public IOEventSource virtual IOReturn startDMACommand(IODMACommand *dmaCommand, IODirection direction, IOByteCount byteCount = 0, IOByteCount byteOffset = 0); virtual IOReturn stopDMACommand(bool flush = false, uint64_t timeout = UINT64_MAX); - + virtual IOReturn queryDMACommand(IODMACommand **dmaCommand, IOByteCount *transferCount, bool waitForIdle = false); - virtual IOByteCount getFIFODepth(); + + virtual IOByteCount getFIFODepth(IODirection direction = kIODirectionNone); + virtual IOReturn setFIFODepth(IOByteCount depth); + virtual IOByteCount validFIFODepth(IOByteCount depth, IODirection direction); + + virtual IOReturn setDMAConfig(UInt32 dmaIndex); + virtual bool validDMAConfig(UInt32 dmaIndex); private: IOService *dmaProvider; @@ -79,6 +85,7 @@ class IODMAEventSource : public IOEventSource Action notification = 0, UInt32 dmaIndex = 0); virtual bool checkForWork(void); + virtual void free(void); }; #endif /* _IOKIT_IODMAEVENTSOURCE_H */ diff --git a/iokit/IOKit/IODeviceTreeSupport.h b/iokit/IOKit/IODeviceTreeSupport.h index 6e3ed1ed1..9b39a2dc2 100644 --- a/iokit/IOKit/IODeviceTreeSupport.h +++ b/iokit/IOKit/IODeviceTreeSupport.h @@ -73,9 +73,6 @@ OSCollectionIterator * IODTFindMatchingEntries( IORegistryEntry * from, typedef SInt32 (*IODTCompareAddressCellFunc) (UInt32 cellCount, UInt32 left[], UInt32 right[]); -typedef SInt64 (*IODTCompareAddressCell64Func) - (UInt32 cellCount, UInt32 left[], UInt32 right[]); - typedef void (*IODTNVLocationFunc) (IORegistryEntry * entry, UInt8 * busNum, UInt8 * deviceNum, UInt8 * functionNum ); @@ -84,6 +81,9 @@ void IODTSetResolving( IORegistryEntry * regEntry, IODTCompareAddressCellFunc compareFunc, IODTNVLocationFunc locationFunc ); +void IODTGetCellCounts( IORegistryEntry * regEntry, + UInt32 * sizeCount, UInt32 * addressCount); + bool IODTResolveAddressCell( IORegistryEntry * regEntry, UInt32 cellsIn[], IOPhysicalAddress * phys, IOPhysicalLength * len ); diff --git a/iokit/IOKit/IOHibernatePrivate.h b/iokit/IOKit/IOHibernatePrivate.h index 36d98707c..525fb5d7a 100644 --- a/iokit/IOKit/IOHibernatePrivate.h +++ b/iokit/IOKit/IOHibernatePrivate.h @@ -102,14 +102,21 @@ struct IOHibernateImageHeader uint32_t sleepTime; uint32_t compression; - uint32_t reserved[62]; // make sizeof == 512 - - uint64_t restoreTime1 __attribute__ ((packed)); - uint64_t restoreTime2 __attribute__ ((packed)); - uint64_t restoreTime3 __attribute__ ((packed)); + uint32_t reserved[58]; // make sizeof == 512 + uint32_t booterTime0; + uint32_t booterTime1; + uint32_t booterTime2; + + uint32_t booterStart; + uint32_t smcStart; + uint32_t connectDisplayTime; + uint32_t splashTime; + uint32_t booterTime; + uint32_t trampolineTime; uint64_t encryptEnd __attribute__ ((packed)); uint64_t deviceBase __attribute__ ((packed)); + uint32_t deviceBlockSize; uint32_t fileExtentMapSize; IOPolledFileExtent fileExtentMap[2]; @@ -256,6 +263,39 @@ struct hibernate_preview_t }; typedef struct hibernate_preview_t hibernate_preview_t; +struct hibernate_statistics_t +{ + uint64_t image1Size; + uint64_t imageSize; + uint32_t image1Pages; + uint32_t imagePages; + uint32_t booterStart; + uint32_t smcStart; + uint32_t booterDuration; + uint32_t booterConnectDisplayDuration; + uint32_t booterSplashDuration; + uint32_t booterDuration0; + uint32_t booterDuration1; + uint32_t booterDuration2; + uint32_t trampolineDuration; + uint32_t kernelImageReadDuration; + + uint32_t graphicsReadyTime; + uint32_t wakeNotificationTime; + uint32_t lockScreenReadyTime; + uint32_t hidReadyTime; + + uint32_t wakeCapability; + uint32_t resvA[15]; +}; +typedef struct hibernate_statistics_t hibernate_statistics_t; + +#define kIOSysctlHibernateStatistics "kern.hibernatestatistics" +#define kIOSysctlHibernateGraphicsReady "kern.hibernategraphicsready" +#define kIOSysctlHibernateWakeNotify "kern.hibernatewakenotification" +#define kIOSysctlHibernateScreenReady "kern.hibernatelockscreenready" +#define kIOSysctlHibernateHIDReady "kern.hibernatehidready" + #ifdef KERNEL #ifdef __cplusplus @@ -269,6 +309,7 @@ IOReturn IOHibernateSystemWake(void); IOReturn IOHibernateSystemPostWake(void); bool IOHibernateWasScreenLocked(void); void IOHibernateSetScreenLocked(uint32_t lockState); +void IOHibernateSetWakeCapabilities(uint32_t capability); void IOHibernateSystemRestart(void); #endif /* __cplusplus */ @@ -301,14 +342,21 @@ kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref, hibernate_page_list_t * hibernate_page_list_allocate(boolean_t log); +kern_return_t +hibernate_alloc_page_lists( + hibernate_page_list_t ** page_list_ret, + hibernate_page_list_t ** page_list_wired_ret, + hibernate_page_list_t ** page_list_pal_ret); + kern_return_t hibernate_setup(IOHibernateImageHeader * header, uint32_t free_page_ratio, uint32_t free_page_time, boolean_t vmflush, - hibernate_page_list_t ** page_list_ret, - hibernate_page_list_t ** page_list_wired_ret, - hibernate_page_list_t ** page_list_pal_ret); + hibernate_page_list_t * page_list, + hibernate_page_list_t * page_list_wired, + hibernate_page_list_t * page_list_pal); + kern_return_t hibernate_teardown(hibernate_page_list_t * page_list, hibernate_page_list_t * page_list_wired, @@ -337,7 +385,8 @@ void hibernate_page_list_setall(hibernate_page_list_t * page_list, hibernate_page_list_t * page_list_wired, hibernate_page_list_t * page_list_pal, - boolean_t preflight, + boolean_t preflight, + boolean_t discard_all, uint32_t * pagesOut); // mark pages to be saved, or pages not to be saved but available diff --git a/iokit/IOKit/IOKitDebug.h b/iokit/IOKit/IOKitDebug.h index de2850d4e..f28cfdbcf 100644 --- a/iokit/IOKit/IOKitDebug.h +++ b/iokit/IOKit/IOKitDebug.h @@ -78,11 +78,15 @@ enum { kIOLogDriverPower1 = 0x01000000ULL, kIOLogDriverPower2 = 0x02000000ULL, kIOStatistics = 0x04000000ULL, + kIOPersistentLog = 0x08000000ULL, + kIOSleepWakeWdogOff = 0x40000000ULL, + kIOAppRespStacksOn = 0x80000000ULL, // debug aids - change behaviour kIONoFreeObjects = 0x00100000ULL, kIOLogSynchronous = 0x00200000ULL, // IOLog completes synchronously kOSTraceObjectAlloc = 0x00400000ULL, + kIOWaitQuietPanics = 0x00800000ULL, _kIODebugTopFlag = 0x8000000000000000ULL // force enum to be 64 bits }; diff --git a/iokit/IOKit/IOKitKeys.h b/iokit/IOKit/IOKitKeys.h index a6d7c8bf5..daf1faa0d 100644 --- a/iokit/IOKit/IOKitKeys.h +++ b/iokit/IOKit/IOKitKeys.h @@ -113,7 +113,10 @@ #define kIOCFPlugInTypesKey "IOCFPlugInTypes" // properties found in services that implement command pooling -#define kIOCommandPoolSizeKey "IOCommandPoolSize" // (OSNumber) +#define kIOCommandPoolSizeKey "IOCommandPoolSize" // (OSNumber) + +// properties found in services that implement priority +#define kIOMaximumPriorityCountKey "IOMaximumPriorityCount" // (OSNumber) // properties found in services that have transfer constraints #define kIOMaximumBlockCountReadKey "IOMaximumBlockCountRead" // (OSNumber) diff --git a/iokit/IOKit/IOKitKeysPrivate.h b/iokit/IOKit/IOKitKeysPrivate.h index b8f3cd2e2..26827aa22 100644 --- a/iokit/IOKit/IOKitKeysPrivate.h +++ b/iokit/IOKit/IOKitKeysPrivate.h @@ -67,10 +67,6 @@ typedef struct _IOUCProcessToken { #define kIOKernelHasSafeSleep 1 -enum { kIOPrepareToPhys32 = 0x04 }; - -#define kIODirectionPrepareToPhys32 ((IODirection) kIOPrepareToPhys32) - #define kIOPlatformSleepActionKey "IOPlatformSleepAction" /* value is OSNumber (priority) */ #define kIOPlatformWakeActionKey "IOPlatformWakeAction" /* value is OSNumber (priority) */ #define kIOPlatformQuiesceActionKey "IOPlatformQuiesceAction" /* value is OSNumber (priority) */ @@ -82,5 +78,10 @@ enum { kIOPrepareToPhys32 = 0x04 }; #define kIOPlatformFunctionHandlerMaxInterruptDelay "IOPlatformFunctionHandlerMaxInterruptDelay" #endif /* defined(__i386__) || defined(__x86_64__) */ +enum { + // these flags are valid for the prepare() method only + kIODirectionPrepareNoZeroFill = 0x00000010, +}; + #endif /* ! _IOKIT_IOKITKEYSPRIVATE_H */ diff --git a/iokit/IOKit/IOLocks.h b/iokit/IOKit/IOLocks.h index 0e762ef55..06ae4aa8d 100644 --- a/iokit/IOKit/IOLocks.h +++ b/iokit/IOKit/IOLocks.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2009 Apple Inc. All rights reserved. + * Copyright (c) 1998-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -79,7 +79,7 @@ IOLock * IOLockAlloc( void ); /*! @function IOLockFree @abstract Frees a mutex. - @discussion Frees a lock allocated with IOLockAlloc. Any blocked waiters will not be woken. + @discussion Frees a lock allocated with IOLockAlloc. Mutex should be unlocked with no waiters. @param lock Pointer to the allocated lock. */ void IOLockFree( IOLock * lock); @@ -122,11 +122,7 @@ boolean_t IOLockTryLock( IOLock * lock); #ifdef IOLOCKS_INLINE #define IOLockUnlock(l) lck_mtx_unlock(l) #else -#if defined(__i386__) -void IOLockUnlock( IOLock * lock) __DARWIN10_ALIAS(IOLockUnlock); -#else /* !__i386__ */ void IOLockUnlock( IOLock * lock); -#endif /* __i386__ */ #endif /* !IOLOCKS_INLINE */ /*! @function IOLockSleep @@ -176,7 +172,7 @@ IORecursiveLock * IORecursiveLockAlloc( void ); /*! @function IORecursiveLockFree @abstract Frees a recursive lock. - @discussion Frees a lock allocated with IORecursiveLockAlloc. Any blocked waiters will not be woken. + @discussion Frees a lock allocated with IORecursiveLockAlloc. Lock should be unlocked with no waiters. @param lock Pointer to the allocated lock. */ void IORecursiveLockFree( IORecursiveLock * lock); @@ -244,7 +240,7 @@ IORWLock * IORWLockAlloc( void ); /*! @function IORWLockFree @abstract Frees a read/write lock. - @discussion Frees a lock allocated with IORWLockAlloc. Any blocked waiters will not be woken. + @discussion Frees a lock allocated with IORWLockAlloc. Lock should be unlocked with no waiters. @param lock Pointer to the allocated lock. */ void IORWLockFree( IORWLock * lock); diff --git a/iokit/IOKit/IOMapper.h b/iokit/IOKit/IOMapper.h index 55356c470..784077d9c 100644 --- a/iokit/IOKit/IOMapper.h +++ b/iokit/IOKit/IOMapper.h @@ -117,6 +117,7 @@ public: { if ((uintptr_t) gSystem & kWaitMask) waitForSystemMapper(); }; static IOMapper * copyMapperForDevice(IOService * device); + static IOMapper * copyMapperForDeviceWithIndex(IOService * device, unsigned int index); // Function will panic if the given address is not found in a valid diff --git a/iokit/IOKit/IOMemoryDescriptor.h b/iokit/IOKit/IOMemoryDescriptor.h index a44ce6f40..80fb15e23 100644 --- a/iokit/IOKit/IOMemoryDescriptor.h +++ b/iokit/IOKit/IOMemoryDescriptor.h @@ -53,7 +53,12 @@ enum IODirection kIODirectionIn = 0x1, // User land 'read', same as VM_PROT_READ kIODirectionOut = 0x2, // User land 'write', same as VM_PROT_WRITE kIODirectionOutIn = kIODirectionOut | kIODirectionIn, - kIODirectionInOut = kIODirectionIn | kIODirectionOut + kIODirectionInOut = kIODirectionIn | kIODirectionOut, + + // these flags are valid for the prepare() method only + kIODirectionPrepareToPhys32 = 0x00000004, + kIODirectionPrepareNoFault = 0x00000008, + kIODirectionPrepareReserved1 = 0x00000010, }; #ifdef __LP64__ typedef IOOptionBits IODirection; @@ -103,9 +108,24 @@ enum { enum { kIOMemoryPurgeableKeepCurrent = 1, + kIOMemoryPurgeableNonVolatile = 2, kIOMemoryPurgeableVolatile = 3, - kIOMemoryPurgeableEmpty = 4 + kIOMemoryPurgeableEmpty = 4, + + // modifiers for kIOMemoryPurgeableVolatile behavior + kIOMemoryPurgeableVolatileGroup0 = VM_VOLATILE_GROUP_0, + kIOMemoryPurgeableVolatileGroup1 = VM_VOLATILE_GROUP_1, + kIOMemoryPurgeableVolatileGroup2 = VM_VOLATILE_GROUP_2, + kIOMemoryPurgeableVolatileGroup3 = VM_VOLATILE_GROUP_3, + kIOMemoryPurgeableVolatileGroup4 = VM_VOLATILE_GROUP_4, + kIOMemoryPurgeableVolatileGroup5 = VM_VOLATILE_GROUP_5, + kIOMemoryPurgeableVolatileGroup6 = VM_VOLATILE_GROUP_6, + kIOMemoryPurgeableVolatileGroup7 = VM_VOLATILE_GROUP_7, + kIOMemoryPurgeableVolatileBehaviorFifo = VM_PURGABLE_BEHAVIOR_FIFO, + kIOMemoryPurgeableVolatileBehaviorLifo = VM_PURGABLE_BEHAVIOR_LIFO, + kIOMemoryPurgeableVolatileOrderingObsolete = VM_PURGABLE_ORDERING_OBSOLETE, + kIOMemoryPurgeableVolatileOrderingNormal = VM_PURGABLE_ORDERING_NORMAL, }; enum { @@ -216,6 +236,17 @@ typedef IOOptionBits DMACommandOps; virtual IOReturn setPurgeable( IOOptionBits newState, IOOptionBits * oldState ); + + +/*! @function getPageCounts + @abstract Retrieve the number of resident and/or dirty pages encompassed by an IOMemoryDescriptor. + @discussion This method returns the number of resident and/or dirty pages encompassed by an IOMemoryDescriptor. + @param residentPageCount - If non-null, a pointer to a byte count that will return the number of resident pages encompassed by this IOMemoryDescriptor. + @param dirtyPageCount - If non-null, a pointer to a byte count that will return the number of dirty pages encompassed by this IOMemoryDescriptor. + @result An IOReturn code. */ + + IOReturn getPageCounts( IOByteCount * residentPageCount, + IOByteCount * dirtyPageCount); /*! @function performOperation @abstract Perform an operation on the memory descriptor's memory. @@ -951,7 +982,7 @@ public: virtual IOReturn setPurgeable( IOOptionBits newState, IOOptionBits * oldState ); - + virtual addr64_t getPhysicalSegment( IOByteCount offset, IOByteCount * length, #ifdef __LP64__ diff --git a/iokit/IOKit/IONVRAM.h b/iokit/IOKit/IONVRAM.h index 15bf709f6..7d96cd050 100644 --- a/iokit/IOKit/IONVRAM.h +++ b/iokit/IOKit/IONVRAM.h @@ -1,5 +1,6 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2007-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -93,6 +94,7 @@ private: bool _systemPaniced; SInt32 _lastDeviceSync; bool _freshInterval; + bool _isProxied; virtual UInt8 calculatePartitionChecksum(UInt8 *partitionHeader); virtual IOReturn initOFVariables(void); @@ -132,6 +134,9 @@ private: const OSSymbol *name, OSData *value); + void initNVRAMImage(void); + void initProxyData(void); + public: virtual bool init(IORegistryEntry *old, const IORegistryPlane *plane); diff --git a/iokit/IOKit/IOPlatformExpert.h b/iokit/IOKit/IOPlatformExpert.h index a27cf64ad..bdd1ef6a2 100644 --- a/iokit/IOKit/IOPlatformExpert.h +++ b/iokit/IOKit/IOPlatformExpert.h @@ -75,6 +75,8 @@ extern boolean_t PEWriteNVRAMProperty(const char *symbol, const void *value, con extern boolean_t PEReadNVRAMProperty(const char *symbol, void *value, unsigned int *len); +extern boolean_t PERemoveNVRAMProperty(const char *symbol); + #ifdef __cplusplus } /* extern "C" */ diff --git a/iokit/IOKit/IOPolledInterface.h b/iokit/IOKit/IOPolledInterface.h index ec500eae2..740ab11c5 100644 --- a/iokit/IOKit/IOPolledInterface.h +++ b/iokit/IOKit/IOPolledInterface.h @@ -30,6 +30,7 @@ #define _IOPOLLEDINTERFACE_H_ #include +#include #define kIOPolledInterfaceSupportKey "IOPolledInterface" #define kIOPolledInterfaceActiveKey "IOPolledInterfaceActive" diff --git a/iokit/IOKit/IOReportMacros.h b/iokit/IOKit/IOReportMacros.h new file mode 100644 index 000000000..24f7607ff --- /dev/null +++ b/iokit/IOKit/IOReportMacros.h @@ -0,0 +1,356 @@ +/* + * @APPLE_LICENSE_HEADER_START@ + * + * Copyright (c) 2012 Apple Computer, Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _IOREPORT_MACROS_H_ +#define _IOREPORT_MACROS_H_ + +#include "IOReportTypes.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + Background + + These macros allow non-I/O Kit software to generate IOReporting + reports. Clients must prevent concurrent access to any given + report buffer from multiple threads. + + While these macros allow non-I/O Kit software to participate + in IOReporting, an IOService instance must lend its driver ID, + respond to the appropriate IOService overrides, and shuttle + data back and forth. In some cases, it may be useful to have + the I/O Kit driver initialize the report buffer with the + appropriate macro. +*/ + +/* + * Returns the buffer size required for a Simple report. + */ +#define SIMPLEREPORT_BUFSIZE (sizeof(IOReportElement)) + +/* + * Initialize a buffer to hold a Simple (integer) report. + * + * void* buffer - ptr to SIMPLEREPORT_BUFSIZE bytes + * size_t bufSize - sanity check of buffer's size + * uint64_t providerID - registry Entry ID of the reporting service + * uint64_t channelID - the report's channel ID + * IOReportCategories categories - categories of this channel + * + * If the buffer is not of sufficient size, the macro performs a + * null pointer reference to trigger a segfault. Then, the buffer is + * filled with 0xbadcafe. + */ +#define SIMPLEREPORT_INIT(buffer, bufSize, providerID, channelID, cats) \ +do { \ + IOReportElement *__elem = (IOReportElement *)(buffer); \ + IOSimpleReportValues *__vals; \ + if ((bufSize) >= SIMPLEREPORT_BUFSIZE) { \ + __elem->channel_id = (channelID); \ + __elem->provider_id = (providerID); \ + __elem->channel_type.report_format = kIOReportFormatSimple; \ + __elem->channel_type.reserved = 0; \ + __elem->channel_type.categories = (cats); \ + __elem->channel_type.nelements = 1; \ + __elem->channel_type.element_idx = 0; \ + __elem->timestamp = 0; \ + __vals = (IOSimpleReportValues*)&__elem->values; \ + __vals->simple_value = kIOReportInvalidValue; \ + } \ + else { \ + uint32_t *__nptr = NULL; \ + *__nptr = 1; \ + POLLUTE_BUF((buffer), (bufSize)); \ + } \ +} while(0) + + +/* + * Sets the SimpleReport channel to a new value. + * + * void* simp_buf - ptr to memory initialized by SIMPLEREPORT_INIT() + * uint64_t new_value - new value for the channel + */ +#define SIMPLEREPORT_SETVALUE(simp_buf, new_value) \ +do { \ + IOReportElement *__elem = (IOReportElement *)(simp_buf); \ + IOSimpleReportValues *__vals; \ + __vals = (IOSimpleReportValues*)&__elem->values; \ + __vals->simple_value = (new_value); \ +} while(0) + +/* + * Prepare simple report buffer for + * IOService::updateReport(kIOReportCopyChannelData...) + * + * void* simp_buf - Ptr to memory updated by SIMPLEREPORT_SETVALUE() + * void* ptr2cpy - On return, 'ptr2cpy' points to the memory that needs to be + * copied for kIOReportCopyChannelData. + * size_t size2cpy - On return, 'size2cpy' is set to the size of the report + * data that needs to be copied for kIOReportCopyChannelData. + */ +#define SIMPLEREPORT_UPDATEPREP(simp_buf, ptr2cpy, size2cpy) \ +do { \ + (ptr2cpy) = (simp_buf); \ + (size2cpy) = sizeof(IOReportElement); \ +} while(0) + + +/* + * Updates the result field received as a parameter for + * kIOReportGetDimensions & kIOReportCopyChannelData actions. + * + * IOReportConfigureAction action - configure/updateReport() 'action' param + * void* result - configure/updateReport() 'result' param + */ + +#define SIMPLEREPORT_UPDATERES(action, result) \ +do { \ + if (((action) == kIOReportGetDimensions) || ((action) == kIOReportCopyChannelData)) { \ + int *__nElements = (int *)(result); \ + *__nElements += 1; \ + } \ +} while (0) + + + +/* + * Returns the channel id from the buffer previously initialized by + * SIMPLEREPORT_INIT(). + * + * void* simp_buf - ptr to memory initialized by SIMPLEREPORT_INIT() + */ + +#define SIMPLEREPORT_GETCHID(simp_buf) \ + (((IOReportElement *)(simp_buf))->channel_id); \ + + + +// Internal struct for State report buffer +typedef struct { + uint16_t curr_state; + uint64_t update_ts; + IOReportElement elem[]; // Array of elements +} IOStateReportInfo; + +/* + * Returns the size required to be allocated for using STATEREPORT_*() + * + * int nstates - number of states for the intended channel + */ +#define STATEREPORT_BUFSIZE(nstates) \ + (sizeof(IOStateReportInfo) + (nstates) * sizeof(IOReportElement)) + + +/* + * Initializes a buffer so it can be used with STATEREPORT_*(). + * + * int nstates - number of states to be reported + * void* buffer - ptr to STATEREPORT_BUFSIZE(nstates) bytes + * size_t bufSize - sanity check of buffer's size + * uint64_t providerID - registry Entry ID of the reporting service + * uint64_t channelID - ID of this channel, see IOREPORT_MAKEID() + * IOReportCategories categories - categories of this channel + * + * If the buffer is not of sufficient size, the macro performs a + * null pointer reference to trigger a segfault. Then, the buffer is + * filled with 0xbadcafe. + */ +#define STATEREPORT_INIT(nstates, buf, bufSize, providerID, channelID, cats) \ +do { \ + IOStateReportInfo *__info = (IOStateReportInfo *)(buf); \ + IOStateReportValues *__rep; \ + IOReportElement *__elem; \ + if ((bufSize) >= STATEREPORT_BUFSIZE(nstates)) { \ + for (unsigned __no = 0; __no < (nstates); __no++) { \ + __elem = &(__info->elem[__no]); \ + __rep = (IOStateReportValues *) &(__elem->values); \ + __elem->channel_id = (channelID); \ + __elem->provider_id = (providerID); \ + __elem->channel_type.report_format = kIOReportFormatState; \ + __elem->channel_type.reserved = 0; \ + __elem->channel_type.categories = (cats); \ + __elem->channel_type.nelements = (nstates); \ + __elem->channel_type.element_idx = __no; \ + __elem->timestamp = 0; \ + __rep->state_id = __no; \ + __rep->intransitions = 0; \ + __rep->upticks = 0; \ + } \ + __info->curr_state = 0; \ + __info->update_ts = 0; \ + } \ + else { \ + int *__nptr = NULL; \ + *__nptr = 1; \ + POLLUTE_BUF((buf), (bufSize)); \ + } \ +} while(0) + +/* + * Initializes the state id field of a state with the specified value. By + * default, STATEREPORT_INIT initializes the state id with the index of + * that state. This macro can be used to provide a more descriptive state id. + * + * void* state_buf - ptr to memory initialized by STATEREPORT_INIT() + * unsigned stateIdx - index of the state, out of bounds -> no-op + * uint64_t stateID - new state id, see IOREPORT_MAKEID() + */ +#define STATEREPORT_SETSTATEID(state_buf, stateIdx, stateID) \ +do { \ + IOStateReportInfo *__info = (IOStateReportInfo *)(state_buf); \ + IOStateReportValues *__rep; \ + if ((stateIdx) < __info->elem[0].channel_type.nelements) { \ + __rep = (IOStateReportValues*) &(__info->elem[(stateIdx)].values); \ + __rep->state_id = (stateID); \ + } \ +} while (0) + + +/* + * Set the state of a State report. + * + * void* state_buf - pointer to memory initialized by STATEREPORT_INIT() + * unsigned newStateIdx - index of new state, out of bounds -> no-op + * uint64_t changeTime - time at which the transition occurred + */ +#define STATEREPORT_SETSTATE(state_buf, newStateIdx, changeTime) \ +do { \ + IOStateReportInfo *__info = (IOStateReportInfo *)(state_buf); \ + IOStateReportValues *__rep; \ + if ((newStateIdx) < __info->elem[0].channel_type.nelements ) { \ + __rep = (IOStateReportValues*) &(__info->elem[__info->curr_state].values); \ + if (__info->update_ts) \ + __rep->upticks += (changeTime) - __info->update_ts; \ + __info->elem[(newStateIdx)].timestamp = (changeTime); \ + __rep = (IOStateReportValues*) &(__info->elem[(newStateIdx)].values); \ + __rep->intransitions++; \ + __info->curr_state = (newStateIdx); \ + __info->update_ts = (changeTime); \ + } \ +} while(0) + +/* + * Prepare StateReport for UpdateReport call + * + * void* state_buf - ptr to memory initialized by STATEREPORT_INIT() + * uint64_t currentTime - current timestamp + * void* ptr2cpy - filled in with pointer to buffer to be copied out + * size_t size2cpy - filled in with the size of the buffer to copy out + */ +#define STATEREPORT_UPDATEPREP(state_buf, currentTime, ptr2cpy, size2cpy) \ +do { \ + IOStateReportInfo *__info = (IOStateReportInfo *)(state_buf); \ + IOReportElement *__elem; \ + IOStateReportValues *__state; \ + (size2cpy) = __info->elem[0].channel_type.nelements * sizeof(IOReportElement); \ + (ptr2cpy) = (void *) &__info->elem[0]; \ + if (__info->update_ts) { \ + __elem = &__info->elem[__info->curr_state]; \ + __state = (IOStateReportValues *)&__elem->values; \ + __elem->timestamp = (currentTime); \ + __state->upticks += (currentTime) - __info->update_ts; \ + __info->update_ts = (currentTime); \ + } \ +} while(0) + +/* + * Updates the result field received as a parameter for kIOReportGetDimensions & + * kIOReportCopyChannelData actions. + * + * void* state_buf - memory initialized by STATEREPORT_INIT() + * IOReportConfigureAction action - configure/updateReport() 'action' + * void* result - configure/updateReport() 'result' + */ + +#define STATEREPORT_UPDATERES(state_buf, action, result) \ +do { \ + IOStateReportInfo *__info = (IOStateReportInfo *)(state_buf); \ + IOReportElement *__elem; \ + int *__nElements = (int *)(result); \ + if (((action) == kIOReportGetDimensions) || ((action) == kIOReportCopyChannelData)) { \ + __elem = &(__info->elem[0]); \ + *__nElements += __elem->channel_type.nelements; \ + } \ +} while (0) + + + +/* + * Returns the channel id from the buffer previously initialized by STATEREPORT_INIT(). + * + * void* state_buf - ptr to memory initialized by STATEREPORT_INIT() + */ + +#define STATEREPORT_GETCHID(state_buf) \ + (((IOStateReportInfo *)(state_buf))->elem[0].channel_id) + +/* + * Returns number of transitions occurred from the given state + * + * void* state_buf - ptr to memory initialized by STATEREPORT_INIT() + * unsigned stateIdx - index of state, out of bounds -> kIOReportInvalidValue + * + */ + +#define STATEREPORT_GETTRANSITIONS(state_buf, stateIdx) \ + (((stateIdx) < ((IOStateReportInfo *)(state_buf))->elem[0].channel_type.nelements) \ + ? ((IOStateReportValues*)&(((IOStateReportInfo*)(state_buf))->elem[(stateIdx)].values))->intransitions \ + : kIOReportInvalidValue) + +/* + * Returns the total number of ticks spent in the given state. + * + * void* state_buf - ptr to memory initialized by STATEREPORT_INIT() + * unsigned stateIdx - index of state, out of bounds -> kIOReportInvalidValue + */ + +#define STATEREPORT_GETTICKS(state_buf, stateIdx) \ + (((stateIdx) < ((IOStateReportInfo*)(state_buf))->elem[0].channel_type.nelements) \ + ? ((IOStateReportValues*)&(((IOStateReportInfo*)(state_buf))->elem[(stateIdx)].values))->upticks \ + : kIOReportInvalidValue) + + +#define POLLUTE_BUF(buf, bufSize) \ +do { \ + int __cnt = (bufSize)/sizeof(uint32_t); \ + while (--__cnt >= 0) \ + ((uint32_t*)(buf))[__cnt] = 0xbadcafe; \ +} while (0) + +#ifdef __cplusplus +} +#endif + +#endif // _IOREPORT_MACROS_H_ + + diff --git a/iokit/IOKit/IOReportTypes.h b/iokit/IOKit/IOReportTypes.h new file mode 100644 index 000000000..fc1399a67 --- /dev/null +++ b/iokit/IOKit/IOReportTypes.h @@ -0,0 +1,186 @@ +/* + * @APPLE_LICENSE_HEADER_START@ + * + * Copyright (c) 2012 Apple Computer, Inc. All Rights Reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _IOREPORT_TYPES_H_ +#define _IOREPORT_TYPES_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/*! @const kIOReportInvalidValue + @const kIOReportInvalidIntValue + @abstract cardinal value used to indicate data errors + + @discussion + kIOReportInvalidValue and kIOReportInvalidIntValue have the + same bit pattern so that clients checking for one or the other + don't have to worry about getting the signedness right. +*/ +#define kIOReportInvalidValue UINT64_MAX +#define kIOReportInvalidIntValue (int64_t)kIOReportInvalidValue + +// IOReportChannelType.categories +typedef uint16_t IOReportCategories; +#define kIOReportCategoryPower (1 << 1) // and energy +#define kIOReportCategoryTraffic (1 << 2) +#define kIOReportCategoryPerformance (1 << 3) +#define kIOReportCategoryPeripheral (1 << 4) +// future categories TBD +#define kIOReportCategoryDebug (1 << 15) +#define kIOReportInvalidCategory UINT16_MAX + + +// IOReportChannelType.report_format +typedef uint8_t IOReportFormat; +enum { + kIOReportInvalidFormat = 0, + kIOReportFormatSimple = 1, + kIOReportFormatState = 2, + kIOReportFormatHistogram = 3 +}; + +// simple report values +typedef struct { + int64_t simple_value; + uint64_t reserved1; + uint64_t reserved2; + uint64_t reserved3; +} __attribute((packed)) IOSimpleReportValues; + +// state report values +typedef struct { + uint64_t state_id; // 0..N-1 or 8-char code (see MAKEID()) + uint64_t intransitions; // number of transitions into this state + uint64_t upticks; // ticks spent in state (local timebase) + uint64_t last_intransition; // ticks at last in-transition +} __attribute((packed)) IOStateReportValues; + +// histograme report values +typedef struct { + uint64_t bucket_hits; + int64_t bucket_min; + int64_t bucket_max; + int64_t bucket_sum; +} __attribute((packed)) IOHistogramReportValues; + +// configuration actions generally change future behavior +typedef uint32_t IOReportConfigureAction; +enum { + // basics (in common operational order) + kIOReportEnable = 0x01, + kIOReportGetDimensions = 0x02, + kIOReportDisable = 0x00, + + // Enable/disable modifiers + kIOReportNotifyHubOnChange = 0x10, // triggered polling + + kIOReportTraceOnChange = 0x20 // kdebug.h tracing +}; + +// update actions should not have observable side effects +typedef uint32_t IOReportUpdateAction; +enum { + kIOReportCopyChannelData = 1, + kIOReportTraceChannelData = 2 +}; + +typedef struct { + uint8_t report_format; // Histogram, StateResidency, etc. + uint8_t reserved; // must be zero + uint16_t categories; // power, traffic, etc (omnibus obs.) + uint16_t nelements; // internal size of channel + + // only meaningful in the data pipeline + int16_t element_idx; // 0..nelements-1 + // -1..-(nelements) = invalid (13127884) +} __attribute((packed)) IOReportChannelType; + +/*! + @define IOREPORT_MAKECHID + @abstract convert up to 8 printable characters into a 64-bit channel ID + @param - printable chars to be packed into a channel ID + @result a 64-bit channel ID with an implicit ASCII name + @discussion A simple example: + IOREPORT_MAKECHID('H', 'i', ' ', 'w', 'o', 'r', 'l', 'd'); + will evaluate to 0x686920776f726c64. Any NUL bytes are + ignored (by libIOReport) for naming purposes, but will + appear in the channel ID. Using a non-NUL non-printable + character will disable the implicit name. Putting NUL + bytes first eliminates trailing zeros when the channel + ID is printed as hex. For example: + IORERPORT_MAKECHID('\0','\0','n','x','f','e','r','s'); + To see the text, use xxd -r -p # not -rp; see 12976241 +*/ +#define __IOR_lshiftchr(c, chshift) ((uint64_t)(c) << (8*(chshift))) +#define IOREPORT_MAKEID(A, B, C, D, E, F, G, H) \ + (__IOR_lshiftchr(A, 7) | __IOR_lshiftchr(B, 6) | __IOR_lshiftchr(C, 5) \ + | __IOR_lshiftchr(D, 4) | __IOR_lshiftchr(E, 3) | __IOR_lshiftchr(F, 2) \ + | __IOR_lshiftchr(G, 1) | __IOR_lshiftchr(H, 0)) + +typedef struct { + uint64_t channel_id; + IOReportChannelType channel_type; +} IOReportChannel; + +typedef struct { + uint32_t nchannels; + IOReportChannel channels[]; +} IOReportChannelList; + +typedef struct { + uint64_t provider_id; + IOReportChannel channel; +} IOReportInterest; + +typedef struct { + uint32_t ninterests; + IOReportInterest interests[]; +} IOReportInterestList; + +typedef struct { + uint64_t v[4]; +} __attribute((packed)) IOReportElementValues; + +typedef struct { + uint64_t provider_id; + uint64_t channel_id; + IOReportChannelType channel_type; + uint64_t timestamp; // mach_absolute_time() + IOReportElementValues values; +} __attribute((packed)) IOReportElement; + +#ifdef __cplusplus +} +#endif + +#endif // _IOREPORT_TYPES_H_ diff --git a/iokit/IOKit/IOReturn.h b/iokit/IOKit/IOReturn.h index 9f1853785..8b2c58155 100644 --- a/iokit/IOKit/IOReturn.h +++ b/iokit/IOKit/IOReturn.h @@ -59,10 +59,15 @@ typedef kern_return_t IOReturn; #define sub_iokit_smbus err_sub(11) #define sub_iokit_ahci err_sub(12) #define sub_iokit_powermanagement err_sub(13) -//#define sub_iokit_hidsystem err_sub(14) +#define sub_iokit_hidsystem err_sub(14) #define sub_iokit_scsi err_sub(16) //#define sub_iokit_pccard err_sub(21) #define sub_iokit_thunderbolt err_sub(29) + +#define sub_iokit_audio_video err_sub(0x45) +#define sub_iokit_hsic err_sub(0x147) +#define sub_iokit_sdio err_sub(0x174) +#define sub_iokit_wlan err_sub(0x208) #define sub_iokit_vendor_specific err_sub(-2) #define sub_iokit_reserved err_sub(-1) diff --git a/iokit/IOKit/IOService.h b/iokit/IOKit/IOService.h index 94a230651..f8a9d59ba 100644 --- a/iokit/IOKit/IOService.h +++ b/iokit/IOKit/IOService.h @@ -32,10 +32,10 @@ * */ /*! - @header - This header contains the definition of the IOService class. IOService is the sole direct subclass of IORegistryEntry and is the base class of almost all I/O Kit family superclasses. IOService defines methods that support the life cycle of I/O Kit drivers. For more information on IOService, see {@linkdoc //apple_ref/doc/uid/TP0000011 I/O Kit Fundamentals}. + @header + This header contains the definition of the IOService class. IOService is the sole direct subclass of IORegistryEntry and is the base class of almost all I/O Kit family superclasses. IOService defines methods that support the life cycle of I/O Kit drivers. For more information on IOService, see {@linkdoc //apple_ref/doc/uid/TP0000011 I/O Kit Fundamentals}. - @seealso //apple_ref/doc/header/IORegistryEntry.h IORegistryEntry + @seealso //apple_ref/doc/header/IORegistryEntry.h IORegistryEntry */ #ifndef _IOKIT_IOSERVICE_H @@ -52,6 +52,7 @@ #include #include +#include extern "C" { #include @@ -61,36 +62,37 @@ extern "C" { #define UINT64_MAX 18446744073709551615ULL #endif + enum { - kIODefaultProbeScore = 0 + kIODefaultProbeScore = 0 }; // masks for getState() enum { - kIOServiceInactiveState = 0x00000001, - kIOServiceRegisteredState = 0x00000002, - kIOServiceMatchedState = 0x00000004, - kIOServiceFirstPublishState = 0x00000008, - kIOServiceFirstMatchState = 0x00000010 + kIOServiceInactiveState = 0x00000001, + kIOServiceRegisteredState = 0x00000002, + kIOServiceMatchedState = 0x00000004, + kIOServiceFirstPublishState = 0x00000008, + kIOServiceFirstMatchState = 0x00000010 }; enum { // options for registerService() - kIOServiceExclusive = 0x00000001, + kIOServiceExclusive = 0x00000001, // options for terminate() - kIOServiceRequired = 0x00000001, - kIOServiceTerminate = 0x00000004, + kIOServiceRequired = 0x00000001, + kIOServiceTerminate = 0x00000004, // options for registerService() & terminate() - kIOServiceSynchronous = 0x00000002, + kIOServiceSynchronous = 0x00000002, // options for registerService() - kIOServiceAsynchronous = 0x00000008 + kIOServiceAsynchronous = 0x00000008 }; // options for open() enum { - kIOServiceSeize = 0x00000001, + kIOServiceSeize = 0x00000001, kIOServiceFamilyOpenOptions = 0xffff0000 }; @@ -101,49 +103,49 @@ enum { typedef void * IONotificationRef; -extern const IORegistryPlane * gIOServicePlane; +extern const IORegistryPlane * gIOServicePlane; extern const IORegistryPlane * gIOPowerPlane; -extern const OSSymbol * gIOResourcesKey; -extern const OSSymbol * gIOResourceMatchKey; -extern const OSSymbol * gIOProviderClassKey; -extern const OSSymbol * gIONameMatchKey; -extern const OSSymbol * gIONameMatchedKey; -extern const OSSymbol * gIOPropertyMatchKey; -extern const OSSymbol * gIOLocationMatchKey; -extern const OSSymbol * gIOParentMatchKey; -extern const OSSymbol * gIOPathMatchKey; -extern const OSSymbol * gIOMatchCategoryKey; -extern const OSSymbol * gIODefaultMatchCategoryKey; -extern const OSSymbol * gIOMatchedServiceCountKey; - -extern const OSSymbol * gIOUserClientClassKey; -extern const OSSymbol * gIOKitDebugKey; -extern const OSSymbol * gIOServiceKey; - -extern const OSSymbol * gIOCommandPoolSizeKey; - -extern const OSSymbol * gIOPublishNotification; -extern const OSSymbol * gIOFirstPublishNotification; -extern const OSSymbol * gIOMatchedNotification; -extern const OSSymbol * gIOFirstMatchNotification; -extern const OSSymbol * gIOTerminatedNotification; - -extern const OSSymbol * gIOGeneralInterest; -extern const OSSymbol * gIOBusyInterest; -extern const OSSymbol * gIOOpenInterest; -extern const OSSymbol * gIOAppPowerStateInterest; -extern const OSSymbol * gIOPriorityPowerStateInterest; -extern const OSSymbol * gIOConsoleSecurityInterest; - -extern const OSSymbol * gIODeviceMemoryKey; -extern const OSSymbol * gIOInterruptControllersKey; -extern const OSSymbol * gIOInterruptSpecifiersKey; +extern const OSSymbol * gIOResourcesKey; +extern const OSSymbol * gIOResourceMatchKey; +extern const OSSymbol * gIOProviderClassKey; +extern const OSSymbol * gIONameMatchKey; +extern const OSSymbol * gIONameMatchedKey; +extern const OSSymbol * gIOPropertyMatchKey; +extern const OSSymbol * gIOLocationMatchKey; +extern const OSSymbol * gIOParentMatchKey; +extern const OSSymbol * gIOPathMatchKey; +extern const OSSymbol * gIOMatchCategoryKey; +extern const OSSymbol * gIODefaultMatchCategoryKey; +extern const OSSymbol * gIOMatchedServiceCountKey; + +extern const OSSymbol * gIOUserClientClassKey; +extern const OSSymbol * gIOKitDebugKey; +extern const OSSymbol * gIOServiceKey; + +extern const OSSymbol * gIOCommandPoolSizeKey; + +extern const OSSymbol * gIOPublishNotification; +extern const OSSymbol * gIOFirstPublishNotification; +extern const OSSymbol * gIOMatchedNotification; +extern const OSSymbol * gIOFirstMatchNotification; +extern const OSSymbol * gIOTerminatedNotification; + +extern const OSSymbol * gIOGeneralInterest; +extern const OSSymbol * gIOBusyInterest; +extern const OSSymbol * gIOOpenInterest; +extern const OSSymbol * gIOAppPowerStateInterest; +extern const OSSymbol * gIOPriorityPowerStateInterest; +extern const OSSymbol * gIOConsoleSecurityInterest; + +extern const OSSymbol * gIODeviceMemoryKey; +extern const OSSymbol * gIOInterruptControllersKey; +extern const OSSymbol * gIOInterruptSpecifiersKey; extern SInt32 IOServiceOrdering( const OSMetaClassBase * inObj1, const OSMetaClassBase * inObj2, void * ref ); typedef void (*IOInterruptAction)( OSObject * target, void * refCon, - IOService * nub, int source ); + IOService * nub, int source ); /*! @typedef IOServiceNotificationHandler @param target Reference supplied when the notification was registered. @@ -151,11 +153,11 @@ typedef void (*IOInterruptAction)( OSObject * target, void * refCon, @param newService The IOService object the notification is delivering. It is retained for the duration of the handler's invocation and doesn't need to be released by the handler. */ typedef bool (*IOServiceNotificationHandler)( void * target, void * refCon, - IOService * newService ); + IOService * newService ); typedef bool (*IOServiceMatchingNotificationHandler)( void * target, void * refCon, - IOService * newService, - IONotifier * notifier ); + IOService * newService, + IONotifier * notifier ); /*! @typedef IOServiceInterestHandler @param target Reference supplied when the notification was registered. @@ -205,80 +207,80 @@ After the drivers have all synchronously been started, the installed "matched" n

Properties used by IOService

- kIOClassKey, extern const OSSymbol * gIOClassKey, "IOClass" + kIOClassKey, extern const OSSymbol * gIOClassKey, "IOClass"

Class of the driver to instantiate on matching providers.

- kIOProviderClassKey, extern const OSSymbol * gIOProviderClassKey, "IOProviderClass" + kIOProviderClassKey, extern const OSSymbol * gIOProviderClassKey, "IOProviderClass"

Class of the provider(s) to be considered for matching, checked with OSDynamicCast so subclasses will also match.

- kIOProbeScoreKey, extern const OSSymbol * gIOProbeScoreKey, "IOProbeScore" + kIOProbeScoreKey, extern const OSSymbol * gIOProbeScoreKey, "IOProbeScore"

The probe score initially used to order multiple matching drivers.

- kIOMatchCategoryKey, extern const OSSymbol * gIOMatchCategoryKey, "IOMatchCategory" + kIOMatchCategoryKey, extern const OSSymbol * gIOMatchCategoryKey, "IOMatchCategory"

A string defining the driver category for matching purposes. All drivers with no IOMatchCategory property are considered to be in the same default category. Only one driver in a category can be started on each provider.

- kIONameMatchKey, extern const OSSymbol * gIONameMatchKey, "IONameMatch" + kIONameMatchKey, extern const OSSymbol * gIONameMatchKey, "IONameMatch"
A string or collection of strings that match the provider's name. The comparison is implemented with the @link //apple_ref/cpp/instm/IORegistryEntry/compareNames/virtualbool/(OSObject*,OSString**) IORegistryEntry::compareNames@/link method, which supports a single string, or any collection (OSArray, OSSet, OSDictionary etc.) of strings. IOService objects with device tree properties (eg. IOPCIDevice) will also be matched based on that standard's "compatible", "name", "device_type" properties. The matching name will be left in the driver's property table in the kIONameMatchedKey property.
Examples

 @textblock
-	IONameMatch
-	pci106b,7
+    IONameMatch
+    pci106b,7
 @/textblock
 
For a list of possible matching names, a serialized array of strings should used, eg.
 @textblock
-	IONameMatch
-	
-		APPL,happy16
-		pci106b,7
-	
+    IONameMatch
+    
+        APPL,happy16
+        pci106b,7
+    
 @/textblock
 

- kIONameMatchedKey, extern const OSSymbol * gIONameMatchedKey, "IONameMatched" + kIONameMatchedKey, extern const OSSymbol * gIONameMatchedKey, "IONameMatched"
The name successfully matched name from the kIONameMatchKey property will be left in the driver's property table as the kIONameMatchedKey property.

- kIOPropertyMatchKey, extern const OSSymbol * gIOPropertyMatchKey, "IOPropertyMatch" + kIOPropertyMatchKey, extern const OSSymbol * gIOPropertyMatchKey, "IOPropertyMatch"
A dictionary of properties that each must exist in the matching IOService and compare successfully with the isEqualTo method.
 @textblock
-	IOPropertyMatch
-	
-		APPL,happy16
-		APPL,meek8
-	
+    IOPropertyMatch
+    
+        APPL,happy16
+        APPL,meek8
+    
 @/textblock
 

- kIOUserClientClassKey, extern const OSSymbol * gIOUserClientClassKey, "IOUserClientClass" + kIOUserClientClassKey, extern const OSSymbol * gIOUserClientClassKey, "IOUserClientClass"
The class name that the service will attempt to allocate when a user client connection is requested. First the device nub is queried, then the nub's provider is queried by default.

- kIOKitDebugKey, extern const OSSymbol * gIOKitDebugKey, "IOKitDebug" + kIOKitDebugKey, extern const OSSymbol * gIOKitDebugKey, "IOKitDebug"
Set some debug flags for logging the driver loading process. Flags are defined in IOKit/IOKitDebug.h, but 65535 works well.*/ @@ -297,17 +299,17 @@ protected: ExpansionData * reserved; private: - IOService * __provider; - SInt32 __providerGeneration; - IOService * __owner; - IOOptionBits __state[2]; - uint64_t __timeBusy; - uint64_t __accumBusy; - IOServicePM * pwrMgt; + IOService * __provider; + SInt32 __providerGeneration; + IOService * __owner; + IOOptionBits __state[2]; + uint64_t __timeBusy; + uint64_t __accumBusy; + IOServicePM * pwrMgt; protected: // TRUE once PMinit has been called - bool initialized; + bool initialized; public: // DEPRECATED @@ -370,19 +372,50 @@ public: /*! @function copyClientWithCategory @availability Mac OS X v10.6 and later @param category An OSSymbol corresponding to an IOMatchCategory matching property. - @result Returns a reference to the IOService child with the given category. The result should be released by the caller. + @result Returns a reference to the IOService child with the given category. The result should be released by the caller. */ virtual IOService * copyClientWithCategory( const OSSymbol * category ); +public: +/*! @function configureReport + @abstract configure IOReporting channels + @availability SPI on OS X v10.9 / iOS 7 and later + + @param channels - channels to configure + @param action - enable/disable/size, etc + @param result - action-specific returned value + @param destination - action-specific default destination +*/ +virtual IOReturn configureReport(IOReportChannelList *channels, + IOReportConfigureAction action, + void *result, + void *destination); + +/*! @function updateReport + @abstract request current data for the specified channels + @availability SPI on OS X 10.9 / iOS 7 and later + + @param channels - channels to be updated + @param action - type/style of update + @param result - returned details about what was updated + @param destination - destination for this update (action-specific) +*/ +virtual IOReturn updateReport(IOReportChannelList *channels, + IOReportUpdateAction action, + void *result, + void *destination); + private: #if __LP64__ - OSMetaClassDeclareReservedUnused(IOService, 0); - OSMetaClassDeclareReservedUnused(IOService, 1); + OSMetaClassDeclareReservedUsed(IOService, 0); + OSMetaClassDeclareReservedUsed(IOService, 1); OSMetaClassDeclareReservedUnused(IOService, 2); OSMetaClassDeclareReservedUnused(IOService, 3); OSMetaClassDeclareReservedUnused(IOService, 4); OSMetaClassDeclareReservedUnused(IOService, 5); + OSMetaClassDeclareReservedUnused(IOService, 6); + OSMetaClassDeclareReservedUnused(IOService, 7); #else OSMetaClassDeclareReservedUsed(IOService, 0); OSMetaClassDeclareReservedUsed(IOService, 1); @@ -390,10 +423,10 @@ private: OSMetaClassDeclareReservedUsed(IOService, 3); OSMetaClassDeclareReservedUsed(IOService, 4); OSMetaClassDeclareReservedUsed(IOService, 5); + OSMetaClassDeclareReservedUsed(IOService, 6); + OSMetaClassDeclareReservedUsed(IOService, 7); #endif - OSMetaClassDeclareReservedUnused(IOService, 6); - OSMetaClassDeclareReservedUnused(IOService, 7); OSMetaClassDeclareReservedUnused(IOService, 8); OSMetaClassDeclareReservedUnused(IOService, 9); OSMetaClassDeclareReservedUnused(IOService, 10); @@ -465,8 +498,8 @@ public: @param score Pointer to the current driver's probe score, which is used to order multiple matching drivers in the same match category. It defaults to the value of the IOProbeScore property in the drivers property table, or kIODefaultProbeScore if none is specified. The probe method may alter the score to affect start order. @result An IOService instance or zero when the probe is unsuccessful. In almost all cases the value of this is returned on success. If another IOService object is returned, the probed instance is detached and freed, and the returned instance is used in its stead for start. */ - virtual IOService * probe( IOService * provider, - SInt32 * score ); + virtual IOService * probe( IOService * provider, + SInt32 * score ); /*! @function start @abstract During an IOService object's instantiation, starts the IOService object that has been selected to run on the provider. @@ -490,9 +523,9 @@ public: @param options Options for the open. The provider family may implement options for open; IOService defines only kIOServiceSeize to request the device be withdrawn from its current owner. @result true if the open was successful; false otherwise. */ - virtual bool open( IOService * forClient, - IOOptionBits options = 0, - void * arg = 0 ); + virtual bool open( IOService * forClient, + IOOptionBits options = 0, + void * arg = 0 ); /*! @function close @abstract Releases active access to a provider. @@ -501,8 +534,8 @@ public: @param options Options available for the close. The provider family may implement options for close; IOService defines none. @param arg Family specific arguments which are ignored by IOService. */ - virtual void close( IOService * forClient, - IOOptionBits options = 0 ); + virtual void close( IOService * forClient, + IOOptionBits options = 0 ); /*! @function isOpen @abstract Determines whether a specific, or any, client has an IOService object open. @@ -519,9 +552,9 @@ public: @param options Options for the open, may be interpreted by the implementor of handleOpen. @result trueif the open was successful; false otherwise. */ - virtual bool handleOpen( IOService * forClient, - IOOptionBits options, - void * arg ); + virtual bool handleOpen( IOService * forClient, + IOOptionBits options, + void * arg ); /*! @function handleClose @abstract Controls the open / close behavior of an IOService object (overrideable by subclasses). @@ -529,8 +562,8 @@ public: @param forClient Designates the client of the provider requesting the close. @param options Options for the close, may be interpreted by the implementor of @link handleOpen handleOpen@/link. */ - virtual void handleClose( IOService * forClient, - IOOptionBits options ); + virtual void handleClose( IOService * forClient, + IOOptionBits options ); /*! @function handleIsOpen @abstract Controls the open / close behavior of an IOService object (overrideable by subclasses). @@ -601,13 +634,13 @@ public: virtual void adjustBusy( SInt32 delta ); APPLE_KEXT_COMPATIBILITY_VIRTUAL - IOReturn waitQuiet(mach_timespec_t * timeout) - APPLE_KEXT_DEPRECATED; + IOReturn waitQuiet(mach_timespec_t * timeout) + APPLE_KEXT_DEPRECATED; /*! @function waitQuiet @abstract Waits for an IOService object's busyState to be zero. @discussion Blocks the caller until an IOService object is non busy. - @param timeout The maximum time to wait in nanoseconds. Default is to wait forever. + @param timeout The maximum time to wait in nanoseconds. Default is to wait forever. @result Returns an error code if Mach synchronization primitives fail, kIOReturnTimeout, or kIOReturnSuccess. */ IOReturn waitQuiet(uint64_t timeout = UINT64_MAX); @@ -621,8 +654,8 @@ public: @param score Pointer to the current driver's probe score, which is used to order multiple matching drivers in the same match category. It defaults to the value of the IOProbeScore property in the drivers property table, or kIODefaultProbeScore if none is specified. @result false if the family considers the matching dictionary does not match in properties it understands; true otherwise. */ - virtual bool matchPropertyTable( OSDictionary * table, - SInt32 * score ); + virtual bool matchPropertyTable( OSDictionary * table, + SInt32 * score ); virtual bool matchPropertyTable( OSDictionary * table ); @@ -659,11 +692,11 @@ public: @abstract Deprecated use addMatchingNotification(). Adds a persistant notification handler to be notified of IOService events. @discussion IOService will deliver notifications of changes in state of an IOService object to registered clients. The type of notification is specified by a symbol, for example gIOMatchedNotification or gIOTerminatedNotification, and notifications will only include IOService objects that match the supplied matching dictionary. Notifications are ordered by a priority set with addNotification. When the notification is installed, its handler will be called with each of any currently existing IOService objects that are in the correct state (eg. registered) and match the supplied matching dictionary, avoiding races between finding preexisting and new IOService events. The notification request is identified by an instance of an IONotifier object, through which it can be enabled, disabled, or removed. addNotification consumes a retain count on the matching dictionary when the notification is removed. @param type An OSSymbol identifying the type of notification and IOService state: -
gIOPublishNotification Delivered when an IOService object is registered. -
gIOFirstPublishNotification Delivered when an IOService object is registered, but only once per IOService instance. Some IOService objects may be reregistered when their state is changed. -
gIOMatchedNotification Delivered when an IOService object has been matched with all client drivers, and they have been probed and started. -
gIOFirstMatchNotification Delivered when an IOService object has been matched with all client drivers, but only once per IOService instance. Some IOService objects may be reregistered when their state is changed. -
gIOTerminatedNotification Delivered after an IOService object has been terminated, during its finalize stage. +
gIOPublishNotification Delivered when an IOService object is registered. +
gIOFirstPublishNotification Delivered when an IOService object is registered, but only once per IOService instance. Some IOService objects may be reregistered when their state is changed. +
gIOMatchedNotification Delivered when an IOService object has been matched with all client drivers, and they have been probed and started. +
gIOFirstMatchNotification Delivered when an IOService object has been matched with all client drivers, but only once per IOService instance. Some IOService objects may be reregistered when their state is changed. +
gIOTerminatedNotification Delivered after an IOService object has been terminated, during its finalize stage. @param matching A matching dictionary to restrict notifications to only matching IOService objects. The dictionary will be released when the notification is removed, consuming the passed-in reference. @param handler A C function callback to deliver notifications. @param target An instance reference for the callback's use. @@ -676,17 +709,17 @@ public: IOServiceNotificationHandler handler, void * target, void * ref = 0, SInt32 priority = 0 ) - APPLE_KEXT_DEPRECATED; + APPLE_KEXT_DEPRECATED; /*! @function addMatchingNotification @abstract Adds a persistant notification handler to be notified of IOService events. @discussion IOService will deliver notifications of changes in state of an IOService object to registered clients. The type of notification is specified by a symbol, for example gIOMatchedNotification or gIOTerminatedNotification, and notifications will only include IOService objects that match the supplied matching dictionary. Notifications are ordered by a priority set with addNotification. When the notification is installed, its handler will be called with each of any currently existing IOService objects that are in the correct state (eg. registered) and match the supplied matching dictionary, avoiding races between finding preexisting and new IOService events. The notification request is identified by an instance of an IONotifier object, through which it can be enabled, disabled, or removed. addMatchingNotification does not consume a reference on the matching dictionary when the notification is removed, unlike addNotification. @param type An OSSymbol identifying the type of notification and IOService state: -
gIOPublishNotification Delivered when an IOService object is registered. -
gIOFirstPublishNotification Delivered when an IOService object is registered, but only once per IOService instance. Some IOService objects may be reregistered when their state is changed. -
gIOMatchedNotification Delivered when an IOService object has been matched with all client drivers, and they have been probed and started. -
gIOFirstMatchNotification Delivered when an IOService object has been matched with all client drivers, but only once per IOService instance. Some IOService objects may be reregistered when their state is changed. -
gIOTerminatedNotification Delivered after an IOService object has been terminated, during its finalize stage. +
gIOPublishNotification Delivered when an IOService object is registered. +
gIOFirstPublishNotification Delivered when an IOService object is registered, but only once per IOService instance. Some IOService objects may be reregistered when their state is changed. +
gIOMatchedNotification Delivered when an IOService object has been matched with all client drivers, and they have been probed and started. +
gIOFirstMatchNotification Delivered when an IOService object has been matched with all client drivers, but only once per IOService instance. Some IOService objects may be reregistered when their state is changed. +
gIOTerminatedNotification Delivered after an IOService object has been terminated, during its finalize stage. @param matching A matching dictionary to restrict notifications to only matching IOService objects. The dictionary is retained while the notification is installed. (Differs from addNotification). @param handler A C function callback to deliver notifications. @param target An instance reference for the callback's use. @@ -714,7 +747,7 @@ public: @abstract Waits for a matching to service to be published. @discussion Provides a method of waiting for an IOService object matching the supplied matching dictionary to be registered and fully matched. @param matching The matching dictionary describing the desired IOService object. (Does not consume a reference of the matching dictionary - differs from waitForService() which does consume a reference on the matching dictionary.) - @param timeout The maximum time to wait in nanoseconds. Default is to wait forever. + @param timeout The maximum time to wait in nanoseconds. Default is to wait forever. @result A published IOService object matching the supplied dictionary. waitForMatchingService returns a reference to the IOService which should be released by the caller. (Differs from waitForService() which does not retain the returned object.) */ static IOService * waitForMatchingService( OSDictionary * matching, @@ -748,7 +781,7 @@ public: @result The matching dictionary created, or passed in, is returned on success, or zero on failure. */ static OSDictionary * serviceMatching( const char * className, - OSDictionary * table = 0 ); + OSDictionary * table = 0 ); /*! @function serviceMatching @abstract Creates a matching dictionary, or adds matching properties to an existing dictionary, that specify an IOService class match. @@ -758,7 +791,7 @@ public: @result The matching dictionary created, or passed in, is returned on success, or zero on failure. */ static OSDictionary * serviceMatching( const OSString * className, - OSDictionary * table = 0 ); + OSDictionary * table = 0 ); /*! @function nameMatching @abstract Creates a matching dictionary, or adds matching properties to an existing dictionary, that specify an IOService name match. @@ -768,7 +801,7 @@ public: @result The matching dictionary created, or passed in, is returned on success, or zero on failure. */ static OSDictionary * nameMatching( const char * name, - OSDictionary * table = 0 ); + OSDictionary * table = 0 ); /*! @function nameMatching @abstract Creates a matching dictionary, or adds matching properties to an existing dictionary, that specify an IOService name match. @@ -778,7 +811,7 @@ public: @result The matching dictionary created, or passed in, is returned on success, or zero on failure. */ static OSDictionary * nameMatching( const OSString* name, - OSDictionary * table = 0 ); + OSDictionary * table = 0 ); /*! @function resourceMatching @abstract Creates a matching dictionary, or adds matching properties to an existing dictionary, that specify a resource service match. @@ -788,7 +821,7 @@ public: @result The matching dictionary created, or passed in, is returned on success, or zero on failure. */ static OSDictionary * resourceMatching( const char * name, - OSDictionary * table = 0 ); + OSDictionary * table = 0 ); /*! @function resourceMatching @abstract Creates a matching dictionary, or adds matching properties to an existing dictionary, that specify a resource service match. @@ -798,7 +831,7 @@ public: @result The matching dictionary created, or passed in, is returned on success, or zero on failure. */ static OSDictionary * resourceMatching( const OSString * name, - OSDictionary * table = 0 ); + OSDictionary * table = 0 ); /*! @function propertyMatching @@ -810,7 +843,7 @@ public: @result The matching dictionary created, or passed in, is returned on success, or zero on failure. */ static OSDictionary * propertyMatching( const OSSymbol * key, const OSObject * value, - OSDictionary * table = 0 ); + OSDictionary * table = 0 ); /*! @function registryEntryIDMatching @abstract Creates a matching dictionary, or adds matching properties to an existing dictionary, that specify a IORegistryEntryID match. @@ -820,7 +853,7 @@ public: @result The matching dictionary created, or passed in, is returned on success, or zero on failure. */ static OSDictionary * registryEntryIDMatching( uint64_t entryID, - OSDictionary * table = 0 ); + OSDictionary * table = 0 ); /*! @function addLocation @@ -936,14 +969,14 @@ public: @result An IOReturn code; kIOReturnSuccess if the function was successfully executed, kIOReturnUnsupported if a service to execute the function could not be found. Other return codes may be returned by the function.*/ virtual IOReturn callPlatformFunction( const OSSymbol * functionName, - bool waitForFunction, - void *param1, void *param2, - void *param3, void *param4 ); + bool waitForFunction, + void *param1, void *param2, + void *param3, void *param4 ); virtual IOReturn callPlatformFunction( const char * functionName, - bool waitForFunction, - void *param1, void *param2, - void *param3, void *param4 ); + bool waitForFunction, + void *param1, void *param2, + void *param3, void *param4 ); /* Some accessors */ @@ -951,7 +984,7 @@ public: /*! @function getPlatform @abstract Returns a pointer to the platform expert instance for the computer. @discussion This method provides an accessor to the platform expert instance for the computer. - @result A pointer to the IOPlatformExport instance. It should not be released by the caller. */ + @result A pointer to the IOPlatformExpert instance. It should not be released by the caller. */ static IOPlatformExpert * getPlatform( void ); @@ -1009,7 +1042,7 @@ public: @result An instance of IOMemoryMap, or zero if the index is beyond the count available. The mapping should be released only when access to it is no longer required. */ virtual IOMemoryMap * mapDeviceMemoryWithIndex( unsigned int index, - IOOptionBits options = 0 ); + IOOptionBits options = 0 ); /*! @function getDeviceMemory @abstract Returns the array of IODeviceMemory objects representing a device's memory mapped ranges. @@ -1037,8 +1070,8 @@ public: @result An IOReturn code.
kIOReturnNoInterrupt is returned if the source is not valid; kIOReturnNoResources is returned if the interrupt already has an installed handler. */ virtual IOReturn registerInterrupt(int source, OSObject *target, - IOInterruptAction handler, - void *refCon = 0); + IOInterruptAction handler, + void *refCon = 0); /*! @function unregisterInterrupt @abstract Removes a C function interrupt handler for a device supplying hardware interrupts. @@ -1204,7 +1237,7 @@ public: #ifdef XNU_KERNEL_PRIVATE public: - // called from other xnu components + // called from other xnu components static void initialize( void ); static void setPlatform( IOPlatformExpert * platform); static void setPMRootDomain( class IOPMrootDomain * rootDomain ); @@ -1212,39 +1245,40 @@ public: uint64_t getAccumulatedBusyTime( void ); static void updateConsoleUsers(OSArray * consoleUsers, IOMessage systemMessage); static void consoleLockTimer(thread_call_param_t p0, thread_call_param_t p1); + void setTerminateDefer(IOService * provider, bool defer); private: static IOReturn waitMatchIdle( UInt32 ms ); static IONotifier * installNotification( - const OSSymbol * type, OSDictionary * matching, - IOServiceMatchingNotificationHandler handler, - void * target, void * ref, - SInt32 priority, OSIterator ** existing ); + const OSSymbol * type, OSDictionary * matching, + IOServiceMatchingNotificationHandler handler, + void * target, void * ref, + SInt32 priority, OSIterator ** existing ); #if !defined(__LP64__) static IONotifier * installNotification( - const OSSymbol * type, OSDictionary * matching, - IOServiceNotificationHandler handler, - void * target, void * ref, - SInt32 priority, OSIterator ** existing); + const OSSymbol * type, OSDictionary * matching, + IOServiceNotificationHandler handler, + void * target, void * ref, + SInt32 priority, OSIterator ** existing); #endif /* !defined(__LP64__) */ #endif private: APPLE_KEXT_COMPATIBILITY_VIRTUAL - bool checkResources( void ); + bool checkResources( void ); APPLE_KEXT_COMPATIBILITY_VIRTUAL - bool checkResource( OSObject * matching ); + bool checkResource( OSObject * matching ); APPLE_KEXT_COMPATIBILITY_VIRTUAL - void probeCandidates( OSOrderedSet * matches ); + void probeCandidates( OSOrderedSet * matches ); APPLE_KEXT_COMPATIBILITY_VIRTUAL - bool startCandidate( IOService * candidate ); + bool startCandidate( IOService * candidate ); public: APPLE_KEXT_COMPATIBILITY_VIRTUAL - IOService * getClientWithCategory( const OSSymbol * category ) - APPLE_KEXT_DEPRECATED; - // copyClientWithCategory is the public replacement + IOService * getClientWithCategory( const OSSymbol * category ) + APPLE_KEXT_DEPRECATED; + // copyClientWithCategory is the public replacement #ifdef XNU_KERNEL_PRIVATE /* Callable within xnu source only - but require vtable entries to be visible */ @@ -1268,35 +1302,35 @@ private: static bool instanceMatch(const OSObject * entry, void * context); static OSObject * copyExistingServices( OSDictionary * matching, - IOOptionBits inState, IOOptionBits options = 0 ); + IOOptionBits inState, IOOptionBits options = 0 ); static IONotifier * setNotification( - const OSSymbol * type, OSDictionary * matching, - IOServiceMatchingNotificationHandler handler, - void * target, void * ref, - SInt32 priority = 0 ); + const OSSymbol * type, OSDictionary * matching, + IOServiceMatchingNotificationHandler handler, + void * target, void * ref, + SInt32 priority = 0 ); static IONotifier * doInstallNotification( - const OSSymbol * type, OSDictionary * matching, - IOServiceMatchingNotificationHandler handler, - void * target, void * ref, - SInt32 priority, OSIterator ** existing ); + const OSSymbol * type, OSDictionary * matching, + IOServiceMatchingNotificationHandler handler, + void * target, void * ref, + SInt32 priority, OSIterator ** existing ); static bool syncNotificationHandler( void * target, void * ref, - IOService * newService, IONotifier * notifier ); + IOService * newService, IONotifier * notifier ); APPLE_KEXT_COMPATIBILITY_VIRTUAL - void deliverNotification( const OSSymbol * type, - IOOptionBits orNewState, IOOptionBits andNewState ); + void deliverNotification( const OSSymbol * type, + IOOptionBits orNewState, IOOptionBits andNewState ); bool invokeNotifer( class _IOServiceNotifier * notify ); - APPLE_KEXT_COMPATIBILITY_VIRTUAL - void unregisterAllInterest( void ); + APPLE_KEXT_COMPATIBILITY_VIRTUAL + void unregisterAllInterest( void ); - APPLE_KEXT_COMPATIBILITY_VIRTUAL - IOReturn waitForState( UInt32 mask, UInt32 value, - mach_timespec_t * timeout = 0 ); + APPLE_KEXT_COMPATIBILITY_VIRTUAL + IOReturn waitForState( UInt32 mask, UInt32 value, + mach_timespec_t * timeout = 0 ); IOReturn waitForState( UInt32 mask, UInt32 value, uint64_t timeout ); @@ -1317,10 +1351,10 @@ private: static void actionStop( IOService * client, IOService * provider, void *, void *, void *); - APPLE_KEXT_COMPATIBILITY_VIRTUAL - IOReturn resolveInterrupt(IOService *nub, int source); - APPLE_KEXT_COMPATIBILITY_VIRTUAL - IOReturn lookupInterrupt(int source, bool resolve, IOInterruptController **interruptController); + APPLE_KEXT_COMPATIBILITY_VIRTUAL + IOReturn resolveInterrupt(IOService *nub, int source); + APPLE_KEXT_COMPATIBILITY_VIRTUAL + IOReturn lookupInterrupt(int source, bool resolve, IOInterruptController **interruptController); #ifdef XNU_KERNEL_PRIVATE /* end xnu internals */ @@ -1435,7 +1469,7 @@ public: /*! @function changePowerStateTo @abstract Sets a driver's power state. @discussion This function is one of several that are used to set a driver's power state. In most circumstances, however, you should call @link changePowerStateToPriv changePowerStateToPriv@/link instead. - Calls to changePowerStateTo, changePowerStateToPriv, and a driver's power children all affect the power state of a driver. For legacy design reasons, they have overlapping functionality. Although you should call changePowerStateToPriv to change your device's power state, you might need to call changePowerStateTo in the following circumstances: + Calls to changePowerStateTo, changePowerStateToPriv, and a driver's power children all affect the power state of a driver. For legacy design reasons, they have overlapping functionality. Although you should call changePowerStateToPriv to change your device's power state, you might need to call changePowerStateTo in the following circumstances:
  • If a driver will be using changePowerStateToPriv to change its power state, it should call changePowerStateTo(0) in its start routine to eliminate the influence changePowerStateTo has on power state calculations.
  • Call changePowerStateTo in conjunction with @link setIdleTimerPeriod setIdleTimerPeriod@/link and @link activityTickle activityTickle@/link to idle a driver into a low power state. For a driver with 3 power states, for example, changePowerStateTo(1) sets a minimum level of power state 1, such that the idle timer period may not set your device's power any lower than state 1.
@param ordinal The number of the desired power state in the power state array. @@ -1645,7 +1679,7 @@ public: @discussion Power management informs interested parties that a device is about to change to a different power state. Interested parties are those that have registered for this notification via @link registerInterestedDriver registerInterestedDriver@/link. If you have called registerInterestedDriver on a power managed driver, you must implement powerStateWillChangeTo and @link powerStateDidChangeTo powerStateDidChangeTo@/link to receive the notifications. powerStateWillChangeTo is called in a clean and separate thread context. powerStateWillChangeTo is called before a power state transition takes place; powerStateDidChangeTo is called after the transition has completed. @param capabilities Flags that describe the capability of the device in the new power state (they come from the capabilityFlags field of the new state in the power state array). - @param stateNumber The number of the state in the state array that the device is switching to. + @param stateNumber The number of the state in the state array that the device is switching to. @param whatDevice A pointer to the driver that is changing. It can be used by a driver that is receiving power state change notifications for multiple devices to distinguish between them. @result The driver returns IOPMAckImplied if it has prepared for the power change when it returns. If it has started preparing but not finished, it should return a number of microseconds which is an upper limit of the time it will need to finish preparing. Then, when it has completed its preparations, it should call @link acknowledgePowerChange acknowledgePowerChange@/link. */ @@ -1659,7 +1693,7 @@ public: @discussion Power management informs interested parties that a device has changed to a different power state. Interested parties are those that have registered for this notification via @link registerInterestedDriver registerInterestedDriver@/link. If you have called registerInterestedDriver on a power managed driver, you must implemnt @link powerStateWillChangeTo powerStateWillChangeTo@/link and powerStateDidChangeTo to receive the notifications. powerStateDidChangeTo is called in a clean and separate thread context. powerStateWillChangeTo is called before a power state transition takes place; powerStateDidChangeTo is called after the transition has completed. @param capabilities Flags that describe the capability of the device in the new power state (they come from the capabilityFlags field of the new state in the power state array). - @param stateNumber The number of the state in the state array that the device is switching to. + @param stateNumber The number of the state in the state array that the device is switching to. @param whatDevice A pointer to the driver that is changing. It can be used by a driver that is receiving power state change notifications for multiple devices to distinguish between them. @result The driver returns IOPMAckImplied if it has prepared for the power change when it returns. If it has started preparing but not finished, it should return a number of microseconds which is an upper limit of the time it will need to finish preparing. Then, when it has completed its preparations, it should call @link acknowledgePowerChange acknowledgePowerChange@/link. */ @@ -1732,10 +1766,15 @@ public: IOReturn synchronizePowerTree( IOOptionBits options = 0, IOService * notifyRoot = 0 ); bool assertPMDriverCall( IOPMDriverCallEntry * callEntry, IOOptionBits options = 0, IOPMinformee * inform = 0 ); void deassertPMDriverCall( IOPMDriverCallEntry * callEntry ); - IOReturn changePowerStateWithOverrideTo( unsigned long ordinal ); - static const char * getIOMessageString( uint32_t msg ); + IOReturn changePowerStateWithOverrideTo( IOPMPowerStateIndex ordinal, IOPMRequestTag tag ); + IOReturn changePowerStateForRootDomain( IOPMPowerStateIndex ordinal ); IOReturn setIgnoreIdleTimer( bool ignore ); + uint32_t getPowerStateForClient( const OSSymbol * client ); + static const char * getIOMessageString( uint32_t msg ); static void setAdvisoryTickleEnable( bool enable ); + void reset_watchdog_timer( void ); + void start_watchdog_timer ( void ); + bool stop_watchdog_timer ( void ); #ifdef __LP64__ static IOWorkLoop * getPMworkloop( void ); @@ -1758,14 +1797,16 @@ private: bool tellChangeDown1 ( unsigned long ); bool tellChangeDown2 ( unsigned long ); IOReturn startPowerChange( IOPMPowerChangeFlags, IOPMPowerStateIndex, IOPMPowerFlags, IOPowerConnection *, IOPMPowerFlags ); - void setParentInfo ( IOPMPowerFlags, IOPowerConnection *, bool ); + void setParentInfo ( IOPMPowerFlags, IOPowerConnection *, bool ); IOReturn notifyAll ( uint32_t nextMS ); bool notifyChild ( IOPowerConnection * child ); + IOPMPowerStateIndex getPowerStateForDomainFlags( IOPMPowerFlags flags ); // power change initiated by driver - void OurChangeStart( void ); + void OurChangeStart( void ); void OurSyncStart ( void ); void OurChangeTellClientsPowerDown ( void ); + void OurChangeTellUserPMPolicyPowerDown ( void ); void OurChangeTellPriorityClientsPowerDown ( void ); void OurChangeTellCapabilityWillChange ( void ); void OurChangeNotifyInterestedDriversWillChange ( void ); @@ -1776,7 +1817,7 @@ private: void OurChangeFinish ( void ); // downward power change initiated by a power parent - IOReturn ParentChangeStart( void ); + IOReturn ParentChangeStart( void ); void ParentChangeTellPriorityClientsPowerDown ( void ); void ParentChangeTellCapabilityWillChange ( void ); void ParentChangeNotifyInterestedDriversWillChange ( void ); @@ -1790,6 +1831,7 @@ private: void all_done ( void ); void start_ack_timer ( void ); void stop_ack_timer ( void ); + void start_ack_timer( UInt32 value, UInt32 scale ); void startSettleTimer( void ); bool checkForDone ( void ); bool responseValid ( uint32_t x, int pid ); @@ -1798,56 +1840,63 @@ private: void tellSystemCapabilityChange( uint32_t nextMS ); void restartIdleTimer( void ); - static void ack_timer_expired( thread_call_param_t, thread_call_param_t ); - static IOReturn actionAckTimerExpired(OSObject *, void *, void *, void *, void * ); - static IOReturn actionDriverCalloutDone(OSObject *, void *, void *, void *, void * ); - static IOPMRequest * acquirePMRequest( IOService * target, IOOptionBits type, IOPMRequest * active = 0 ); - static void releasePMRequest( IOPMRequest * request ); - static void pmDriverCallout( IOService * from ); - static void pmTellAppWithResponse( OSObject * object, void * context ); - static void pmTellClientWithResponse( OSObject * object, void * context ); + static void ack_timer_expired( thread_call_param_t, thread_call_param_t ); + static void watchdog_timer_expired ( thread_call_param_t arg0, thread_call_param_t arg1 ); + static IOReturn actionAckTimerExpired(OSObject *, void *, void *, void *, void * ); + static IOReturn watchdog_timer_expired ( OSObject *, void *, void *, void *, void * ); + + static IOReturn actionDriverCalloutDone(OSObject *, void *, void *, void *, void * ); + static IOPMRequest * acquirePMRequest( IOService * target, IOOptionBits type, IOPMRequest * active = 0 ); + static void releasePMRequest( IOPMRequest * request ); + static void pmDriverCallout( IOService * from ); + static void pmTellAppWithResponse( OSObject * object, void * context ); + static void pmTellClientWithResponse( OSObject * object, void * context ); static void pmTellCapabilityAppWithResponse ( OSObject * object, void * arg ); static void pmTellCapabilityClientWithResponse( OSObject * object, void * arg ); - bool ackTimerTick( void ); - void addPowerChild1( IOPMRequest * request ); - void addPowerChild2( IOPMRequest * request ); - void addPowerChild3( IOPMRequest * request ); - void adjustPowerState( uint32_t clamp = 0 ); - void start_ack_timer( UInt32 value, UInt32 scale ); - void handlePMstop( IOPMRequest * request ); - void handleRegisterPowerDriver( IOPMRequest * request ); - bool handleAcknowledgePowerChange( IOPMRequest * request ); - void handlePowerDomainWillChangeTo( IOPMRequest * request ); - void handlePowerDomainDidChangeTo( IOPMRequest * request ); - void handleRequestPowerState( IOPMRequest * request ); - void handlePowerOverrideChanged( IOPMRequest * request ); - void handleActivityTickle( IOPMRequest * request ); - void handleInterestChanged( IOPMRequest * request ); + bool ackTimerTick( void ); + void addPowerChild1( IOPMRequest * request ); + void addPowerChild2( IOPMRequest * request ); + void addPowerChild3( IOPMRequest * request ); + void adjustPowerState( uint32_t clamp = 0 ); + void handlePMstop( IOPMRequest * request ); + void handleRegisterPowerDriver( IOPMRequest * request ); + bool handleAcknowledgePowerChange( IOPMRequest * request ); + void handlePowerDomainWillChangeTo( IOPMRequest * request ); + void handlePowerDomainDidChangeTo( IOPMRequest * request ); + void handleRequestPowerState( IOPMRequest * request ); + void handlePowerOverrideChanged( IOPMRequest * request ); + void handleActivityTickle( IOPMRequest * request ); + void handleInterestChanged( IOPMRequest * request ); void handleSynchronizePowerTree( IOPMRequest * request ); - void submitPMRequest( IOPMRequest * request ); - void submitPMRequest( IOPMRequest ** request, IOItemCount count ); - void executePMRequest( IOPMRequest * request ); - bool servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ); - bool retirePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ); - bool servicePMRequestQueue( IOPMRequest * request, IOPMRequestQueue * queue ); - bool servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * queue ); - bool servicePMFreeQueue( IOPMRequest * request, IOPMCompletionQueue * queue ); - bool notifyInterestedDrivers( void ); - void notifyInterestedDriversDone( void ); - bool notifyControllingDriver( void ); - void notifyControllingDriverDone( void ); - void driverSetPowerState( void ); - void driverInformPowerChange( void ); - bool isPMBlocked( IOPMRequest * request, int count ); - void notifyChildren( void ); - void notifyChildrenOrdered( void ); - void notifyChildrenDelayed( void ); + void submitPMRequest( IOPMRequest * request ); + void submitPMRequest( IOPMRequest ** request, IOItemCount count ); + void executePMRequest( IOPMRequest * request ); + bool servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ); + bool retirePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ); + bool servicePMRequestQueue( IOPMRequest * request, IOPMRequestQueue * queue ); + bool servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * queue ); + bool servicePMFreeQueue( IOPMRequest * request, IOPMCompletionQueue * queue ); + bool notifyInterestedDrivers( void ); + void notifyInterestedDriversDone( void ); + bool notifyControllingDriver( void ); + void notifyControllingDriverDone( void ); + void driverSetPowerState( void ); + void driverInformPowerChange( void ); + bool isPMBlocked( IOPMRequest * request, int count ); + void notifyChildren( void ); + void notifyChildrenOrdered( void ); + void notifyChildrenDelayed( void ); + void notifyRootDomain( void ); + void notifyRootDomainDone( void ); void cleanClientResponses ( bool logErrors ); - void updatePowerClient( const OSSymbol * client, uint32_t powerState ); - void removePowerClient( const OSSymbol * client ); - uint32_t getPowerStateForClient( const OSSymbol * client ); + void updatePowerClient( const OSSymbol * client, uint32_t powerState ); + void removePowerClient( const OSSymbol * client ); IOReturn requestPowerState( const OSSymbol * client, uint32_t state ); IOReturn requestDomainPower( IOPMPowerStateIndex ourPowerState, IOOptionBits options = 0 ); + IOReturn configurePowerStatesReport( IOReportConfigureAction action, void *result ); + IOReturn updatePowerStatesReport( IOReportConfigureAction action, void *result, void *destination ); + IOReturn configureSimplePowerReport(IOReportConfigureAction action, void *result ); + IOReturn updateSimplePowerReport( IOReportConfigureAction action, void *result, void *destination ); void waitForPMDriverCall( IOService * target = 0 ); #endif /* XNU_KERNEL_PRIVATE */ }; diff --git a/iokit/IOKit/IOServicePM.h b/iokit/IOKit/IOServicePM.h index 2a2c4c400..27d871234 100644 --- a/iokit/IOKit/IOServicePM.h +++ b/iokit/IOKit/IOServicePM.h @@ -47,8 +47,24 @@ class IOPMRequest; class IOPMRequestQueue; class IOPMCompletionQueue; +// PM channels for IOReporting +#ifndef kPMPowerStatesChID +#define kPMPowerStatesChID IOREPORT_MAKEID('P','M','S','t','H','i','s','t') +#endif + +#ifndef kPMCurrStateChID +#define kPMCurrStateChID IOREPORT_MAKEID( 'P','M','C','u','r','S','t','\0' ) +#endif + +// state_id details in PM channels +#define kPMReportPowerOn 0x01 +#define kPMReportDeviceUsable 0x02 +#define kPMReportLowPower 0x04 + + typedef unsigned long IOPMPowerStateIndex; typedef uint32_t IOPMPowerChangeFlags; +typedef uint32_t IOPMRequestTag; struct IOPMDriverCallEntry { queue_chain_t link; @@ -56,6 +72,13 @@ struct IOPMDriverCallEntry { IOService * target; }; +// Power clients (desires) +extern const OSSymbol * gIOPMPowerClientDevice; +extern const OSSymbol * gIOPMPowerClientDriver; +extern const OSSymbol * gIOPMPowerClientChildProxy; +extern const OSSymbol * gIOPMPowerClientChildren; +extern const OSSymbol * gIOPMPowerClientRootDomain; + /* Binary compatibility with drivers that access pm_vars */ #ifdef __LP64__ #define PM_VARS_SUPPORT 0 diff --git a/iokit/IOKit/IOTypes.h b/iokit/IOKit/IOTypes.h index 76bd0acfa..b563ae6d1 100644 --- a/iokit/IOKit/IOTypes.h +++ b/iokit/IOKit/IOTypes.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2012 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -56,15 +56,9 @@ extern "C" { /* * Simple data types. */ -#ifndef __MACTYPES__ /* CF MacTypes.h */ -#ifndef __TYPES__ /* guess... Mac Types.h */ - #include #include -#endif /* __TYPES__ */ -#endif /* __MACTYPES__ */ - #if KERNEL #include #endif @@ -83,13 +77,13 @@ typedef UInt64 IOPhysicalAddress64; typedef UInt32 IOPhysicalLength32; typedef UInt64 IOPhysicalLength64; -#ifdef __LP64__ +#if !defined(__arm__) && !defined(__i386__) typedef mach_vm_address_t IOVirtualAddress; #else typedef vm_address_t IOVirtualAddress; #endif -#if defined(__LP64__) && defined(KERNEL) +#if !defined(__arm__) && !defined(__i386__) && !(defined(__x86_64__) && !defined(KERNEL)) typedef IOByteCount64 IOByteCount; #else typedef IOByteCount32 IOByteCount; @@ -97,7 +91,7 @@ typedef IOByteCount32 IOByteCount; typedef IOVirtualAddress IOLogicalAddress; -#if defined(__LP64__) && defined(KERNEL) +#if !defined(__arm__) && !defined(__i386__) && !(defined(__x86_64__) && !defined(KERNEL)) typedef IOPhysicalAddress64 IOPhysicalAddress; typedef IOPhysicalLength64 IOPhysicalLength; @@ -126,15 +120,15 @@ typedef struct IOByteCount length; } IOVirtualRange; -#ifdef __LP64__ +#if !defined(__arm__) && !defined(__i386__) typedef IOVirtualRange IOAddressRange; -#else /* !__LP64__ */ +#else typedef struct { mach_vm_address_t address; mach_vm_size_t length; } IOAddressRange; -#endif /* !__LP64__ */ +#endif /* * Map between #defined or enum'd constants and text description. diff --git a/iokit/IOKit/IOUserClient.h b/iokit/IOKit/IOUserClient.h index aaab07ebe..453795c74 100644 --- a/iokit/IOKit/IOUserClient.h +++ b/iokit/IOKit/IOUserClient.h @@ -41,6 +41,8 @@ #include #endif +#define _IOUSERCLIENT_SENDASYNCRESULT64WITHOPTIONS_ 1 + enum { kIOUCTypeMask = 0x0000000f, kIOUCScalarIScalarO = 0, @@ -95,6 +97,10 @@ enum { kIOUserNotifyMaxMessageSize = 64 }; +enum { + kIOUserNotifyOptionCanDrop = 0x1 /* Fail if queue is full, rather than infinitely queuing. */ +}; + // keys for clientHasPrivilege #define kIOClientPrivilegeAdministrator "root" #define kIOClientPrivilegeLocalUser "local" @@ -254,9 +260,27 @@ protected: static IOReturn sendAsyncResult64(OSAsyncReference64 reference, IOReturn result, io_user_reference_t args[], UInt32 numArgs); + + /*! + @function sendAsyncResult64WithOptions + @abstract Send a notification as with sendAsyncResult, but with finite queueing. + @discussion IOUserClient::sendAsyncResult64() will infitely queue messages if the client + is not processing them in a timely fashion. This variant will not, for simple + handling of situations where clients may be expected to stop processing messages. + */ + static IOReturn sendAsyncResult64WithOptions(OSAsyncReference64 reference, + IOReturn result, io_user_reference_t args[], UInt32 numArgs, + IOOptionBits options); + static void setAsyncReference64(OSAsyncReference64 asyncRef, mach_port_t wakePort, mach_vm_address_t callback, io_user_reference_t refcon); + + static void setAsyncReference64(OSAsyncReference64 asyncRef, + mach_port_t wakePort, + mach_vm_address_t callback, io_user_reference_t refcon, + task_t task); + public: static IOReturn clientHasPrivilege( void * securityToken, @@ -318,6 +342,8 @@ private: IOVirtualAddress atAddress = 0 ); #endif + static IOReturn _sendAsyncResult64(OSAsyncReference64 reference, + IOReturn result, io_user_reference_t args[], UInt32 numArgs, IOOptionBits options); public: /*! diff --git a/iokit/IOKit/Makefile b/iokit/IOKit/Makefile index 69e6c7aad..e9ee13a71 100644 --- a/iokit/IOKit/Makefile +++ b/iokit/IOKit/Makefile @@ -8,8 +8,8 @@ IOKIT_INCFRAME = $(FRAMEDIR)/IOKit.framework IOKIT_INCDIR = $(IOKIT_INCFRAME)/Versions/$(IOKIT_INCVERS)/Headers IOKIT_PINCDIR = $(IOKIT_INCFRAME)/Versions/$(IOKIT_INCVERS)/PrivateHeaders -export INCDIR = $(IOKIT_INCDIR) -export LCLDIR = $(IOKIT_PINCDIR) +INCDIR = $(IOKIT_INCDIR) +LCLDIR = $(IOKIT_PINCDIR) include $(MakeInc_cmd) include $(MakeInc_def) @@ -22,17 +22,9 @@ INSTINC_SUBDIRS = \ rtc \ system_management -INSTINC_SUBDIRS_I386 = - -INSTINC_SUBDIRS_X86_64 = - - EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} -EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} -EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} - -NOT_EXPORT_HEADERS = +NOT_EXPORT_HEADERS = IOReportMacros.h NOT_KF_MI_HEADERS = $(NOT_EXPORT_HEADERS) \ IOKitKeysPrivate.h IOCPU.h \ @@ -48,8 +40,10 @@ ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) INSTALL_MI_LIST = IOBSD.h IOKitKeys.h IOKitServer.h IOReturn.h\ IOSharedLock.h IOTypes.h OSMessageNotification.h\ IODataQueueShared.h IOMessage.h - -INSTALL_MI_LCL_LIST = IOKitKeysPrivate.h IOHibernatePrivate.h IOLocksPrivate.h IOStatistics.h AppleKeyStoreInterface.h + +INSTALL_MI_LCL_LIST = IOKitKeysPrivate.h IOHibernatePrivate.h \ + IOLocksPrivate.h IOStatistics.h \ + AppleKeyStoreInterface.h IOReportTypes.h INSTALL_MI_DIR = . diff --git a/iokit/IOKit/i386/Makefile b/iokit/IOKit/i386/Makefile deleted file mode 100644 index 514496af6..000000000 --- a/iokit/IOKit/i386/Makefile +++ /dev/null @@ -1,33 +0,0 @@ -export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd -export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def -export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule -export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - -IOKIT_FRAMEDIR = $(FRAMEDIR)/IOKit.framework/Versions/A -export INCDIR = $(IOKIT_FRAMEDIR)/Headers -export LCLDIR = $(IOKIT_FRAMEDIR)/PrivateHeaders - -include $(MakeInc_cmd) -include $(MakeInc_def) - -MD_DIR = i386 -EXCLUDE_HEADERS = - -INSTINC_SUBDIRS = -INSTINC_SUBDIRS_I386 = - -EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} -EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} - -ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) -HEADER_LIST = $(filter-out $(EXCLUDE_HEADERS), $(ALL_HEADERS)) - -INSTALL_MD_LIST = ${HEADER_LIST} -INSTALL_MD_LCL_LIST = "" -INSTALL_MD_DIR = $(MD_DIR) - -EXPORT_MD_LIST = -EXPORT_MD_DIR = IOKit/$(MD_DIR) - -include $(MakeInc_rule) -include $(MakeInc_dir) diff --git a/iokit/IOKit/machine/Makefile b/iokit/IOKit/machine/Makefile index 14dd46d76..d68173f0c 100644 --- a/iokit/IOKit/machine/Makefile +++ b/iokit/IOKit/machine/Makefile @@ -4,8 +4,8 @@ export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir IOKIT_FRAMEDIR = $(FRAMEDIR)/IOKit.framework/Versions/A -export INCDIR = $(IOKIT_FRAMEDIR)/Headers -export LCLDIR = $(IOKIT_FRAMEDIR)/PrivateHeaders +INCDIR = $(IOKIT_FRAMEDIR)/Headers +LCLDIR = $(IOKIT_FRAMEDIR)/PrivateHeaders include $(MakeInc_cmd) include $(MakeInc_def) @@ -13,19 +13,11 @@ include $(MakeInc_def) MI_DIR = machine EXCLUDE_HEADERS = -INSTINC_SUBDIRS = -INSTINC_SUBDIRS_I386 = -INSTINC_SUBDIRS_X86_64 = - -EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} -EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} -EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} - ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) HEADER_LIST = $(filter-out $(EXCLUDE_HEADERS), $(ALL_HEADERS)) INSTALL_MI_LIST = ${HEADER_LIST} -INSTALL_MI_LCL_LIST = "" +INSTALL_MI_LCL_LIST = INSTALL_MI_DIR = $(MI_DIR) EXPORT_MI_LIST = ${HEADER_LIST} diff --git a/iokit/IOKit/nvram/Makefile b/iokit/IOKit/nvram/Makefile index 2a3da6d3c..1f1db4527 100644 --- a/iokit/IOKit/nvram/Makefile +++ b/iokit/IOKit/nvram/Makefile @@ -4,8 +4,8 @@ export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir IOKIT_FRAMEDIR = $(FRAMEDIR)/IOKit.framework/Versions/A -export INCDIR = $(IOKIT_FRAMEDIR)/Headers -export LCLDIR = $(IOKIT_FRAMEDIR)/PrivateHeaders +INCDIR = $(IOKIT_FRAMEDIR)/Headers +LCLDIR = $(IOKIT_FRAMEDIR)/PrivateHeaders include $(MakeInc_cmd) include $(MakeInc_def) @@ -13,20 +13,10 @@ include $(MakeInc_def) MI_DIR = nvram NOT_EXPORT_HEADERS = -INSTINC_SUBDIRS = -INSTINC_SUBDIRS_I386 = -INSTINC_SUBDIRS_X86_64 = -INSTINC_SUBDIRS_ARM = - -EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} -EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} -EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} -EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM} - ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) INSTALL_MI_LIST = -INSTALL_MI_LCL_LIST = "" +INSTALL_MI_LCL_LIST = INSTALL_MI_DIR = $(MI_DIR) EXPORT_MI_LIST = $(filter-out $(NOT_EXPORT_HEADERS), $(ALL_HEADERS)) diff --git a/iokit/IOKit/pci/IOPCIDevice.h b/iokit/IOKit/pci/IOPCIDevice.h deleted file mode 100644 index 92069bbe7..000000000 --- a/iokit/IOKit/pci/IOPCIDevice.h +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -#ifndef _IOKIT_IOPCIDEVICE_H -#define _IOKIT_IOPCIDEVICE_H - -#warning IOPCIDevice.h moved to IOPCIFamily project -#warning IOPCIDevice.h will be removed from xnu; do not edit or add new usage - -#include - -union IOPCIAddressSpace { - UInt32 bits; - struct { -#ifdef __BIG_ENDIAN__ - unsigned int reloc:1; - unsigned int prefetch:1; - unsigned int t:1; - unsigned int resv:3; - unsigned int space:2; - unsigned int busNum:8; - unsigned int deviceNum:5; - unsigned int functionNum:3; - unsigned int registerNum:8; -#elif defined(__LITTLE_ENDIAN__) - unsigned int registerNum:8; - unsigned int functionNum:3; - unsigned int deviceNum:5; - unsigned int busNum:8; - unsigned int space:2; - unsigned int resv:3; - unsigned int t:1; - unsigned int prefetch:1; - unsigned int reloc:1; -#endif - } s; -}; - -class IOPCIBridge; -class IOPCI2PCIBridge; - -class IOPCIDevice : public IOService -{ - OSDeclareDefaultStructors(IOPCIDevice) - - friend class IOPCIBridge; - friend class IOPCI2PCIBridge; - -protected: - IOPCIBridge * parent; - IOMemoryMap * ioMap; - OSObject * slotNameProperty; - - struct ExpansionData { }; - - ExpansionData *reserved; - -public: - IOPCIAddressSpace space; - UInt32 * savedConfig; - -public: - - virtual bool attach( IOService * provider ); - virtual void detach( IOService * provider ); - virtual IOReturn setPowerState( unsigned long, IOService * ); - virtual bool compareName( OSString * name, OSString ** matched = 0 ) const; - virtual bool matchPropertyTable( OSDictionary * table, - SInt32 * score ); - virtual IOService * matchLocation( IOService * client ); - virtual IOReturn getResources( void ); - - /* Config space accessors */ - - virtual UInt32 configRead32( IOPCIAddressSpace space, UInt8 offset ); - virtual void configWrite32( IOPCIAddressSpace space, - UInt8 offset, UInt32 data ); - virtual UInt16 configRead16( IOPCIAddressSpace space, UInt8 offset ); - virtual void configWrite16( IOPCIAddressSpace space, - UInt8 offset, UInt16 data ); - virtual UInt8 configRead8( IOPCIAddressSpace space, UInt8 offset ); - virtual void configWrite8( IOPCIAddressSpace space, - UInt8 offset, UInt8 data ); - - virtual UInt32 configRead32( UInt8 offset ); - virtual UInt16 configRead16( UInt8 offset ); - virtual UInt8 configRead8( UInt8 offset ); - virtual void configWrite32( UInt8 offset, UInt32 data ); - virtual void configWrite16( UInt8 offset, UInt16 data ); - virtual void configWrite8( UInt8 offset, UInt8 data ); - - virtual IOReturn saveDeviceState( IOOptionBits options = 0 ); - virtual IOReturn restoreDeviceState( IOOptionBits options = 0 ); - virtual UInt32 setConfigBits( UInt8 offset, UInt32 mask, UInt32 value ); - - virtual bool setMemoryEnable( bool enable ); - virtual bool setIOEnable( bool enable, bool exclusive = false ); - virtual bool setBusMasterEnable( bool enable ); - virtual UInt32 findPCICapability( UInt8 capabilityID, UInt8 * offset = 0 ); - virtual UInt8 getBusNumber( void ); - virtual UInt8 getDeviceNumber( void ); - virtual UInt8 getFunctionNumber( void ); - virtual IODeviceMemory * getDeviceMemoryWithRegister( UInt8 reg ); - virtual IOMemoryMap * mapDeviceMemoryWithRegister( UInt8 reg, - IOOptionBits options = 0 ); - virtual IODeviceMemory * ioDeviceMemory( void ); - virtual void ioWrite32( UInt16 offset, UInt32 value, - IOMemoryMap * map = 0 ); - virtual void ioWrite16( UInt16 offset, UInt16 value, - IOMemoryMap * map = 0 ); - virtual void ioWrite8( UInt16 offset, UInt8 value, - IOMemoryMap * map = 0 ); - virtual UInt32 ioRead32( UInt16 offset, IOMemoryMap * map = 0 ); - virtual UInt16 ioRead16( UInt16 offset, IOMemoryMap * map = 0 ); - virtual UInt8 ioRead8( UInt16 offset, IOMemoryMap * map = 0 ); - - // Unused Padding - OSMetaClassDeclareReservedUnused(IOPCIDevice, 0); - OSMetaClassDeclareReservedUnused(IOPCIDevice, 1); - OSMetaClassDeclareReservedUnused(IOPCIDevice, 2); - OSMetaClassDeclareReservedUnused(IOPCIDevice, 3); - OSMetaClassDeclareReservedUnused(IOPCIDevice, 4); - OSMetaClassDeclareReservedUnused(IOPCIDevice, 5); - OSMetaClassDeclareReservedUnused(IOPCIDevice, 6); - OSMetaClassDeclareReservedUnused(IOPCIDevice, 7); - OSMetaClassDeclareReservedUnused(IOPCIDevice, 8); - OSMetaClassDeclareReservedUnused(IOPCIDevice, 9); - OSMetaClassDeclareReservedUnused(IOPCIDevice, 10); - OSMetaClassDeclareReservedUnused(IOPCIDevice, 11); - OSMetaClassDeclareReservedUnused(IOPCIDevice, 12); - OSMetaClassDeclareReservedUnused(IOPCIDevice, 13); - OSMetaClassDeclareReservedUnused(IOPCIDevice, 14); - OSMetaClassDeclareReservedUnused(IOPCIDevice, 15); -}; - -#endif /* ! _IOKIT_IOPCIDEVICE_H */ diff --git a/iokit/IOKit/platform/Makefile b/iokit/IOKit/platform/Makefile index 7d5079f87..fd6716035 100644 --- a/iokit/IOKit/platform/Makefile +++ b/iokit/IOKit/platform/Makefile @@ -4,8 +4,8 @@ export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir IOKIT_FRAMEDIR = $(FRAMEDIR)/IOKit.framework/Versions/A -export INCDIR = $(IOKIT_FRAMEDIR)/Headers -export LCLDIR = $(IOKIT_FRAMEDIR)/PrivateHeaders +INCDIR = $(IOKIT_FRAMEDIR)/Headers +LCLDIR = $(IOKIT_FRAMEDIR)/PrivateHeaders include $(MakeInc_cmd) include $(MakeInc_def) @@ -14,16 +14,6 @@ MI_DIR = platform NOT_EXPORT_HEADERS = NOT_KF_MI_HEADERS = -INSTINC_SUBDIRS = -INSTINC_SUBDIRS_I386 = -INSTINC_SUBDIRS_X86_64 = -INSTINC_SUBDIRS_ARM = - -EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} -EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} -EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} -EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM} - ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) INSTALL_MI_LIST = diff --git a/iokit/IOKit/power/Makefile b/iokit/IOKit/power/Makefile index fd1518bd7..acb41d022 100644 --- a/iokit/IOKit/power/Makefile +++ b/iokit/IOKit/power/Makefile @@ -4,8 +4,8 @@ export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir IOKIT_FRAMEDIR = $(FRAMEDIR)/IOKit.framework/Versions/A -export INCDIR = $(IOKIT_FRAMEDIR)/Headers -export LCLDIR = $(IOKIT_FRAMEDIR)/PrivateHeaders +INCDIR = $(IOKIT_FRAMEDIR)/Headers +LCLDIR = $(IOKIT_FRAMEDIR)/PrivateHeaders include $(MakeInc_cmd) include $(MakeInc_def) @@ -13,20 +13,10 @@ include $(MakeInc_def) MI_DIR = power NOT_EXPORT_HEADERS = -INSTINC_SUBDIRS = -INSTINC_SUBDIRS_I386 = -INSTINC_SUBDIRS_X86_64 = -INSTINC_SUBDIRS_ARM = - -EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} -EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} -EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} -EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM} - ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) INSTALL_MI_LIST = -INSTALL_MI_LCL_LIST = "" +INSTALL_MI_LCL_LIST = INSTALL_MI_DIR = $(MI_DIR) EXPORT_MI_LIST = $(filter-out $(NOT_EXPORT_HEADERS), $(ALL_HEADERS)) diff --git a/iokit/IOKit/pwr_mgt/IOPM.h b/iokit/IOKit/pwr_mgt/IOPM.h index f01c84178..cdaf4655f 100644 --- a/iokit/IOKit/pwr_mgt/IOPM.h +++ b/iokit/IOKit/pwr_mgt/IOPM.h @@ -230,6 +230,15 @@ enum { */ #define kIOPMSleepWakeUUIDKey "SleepWakeUUID" +/* kIOPMBootSessionUUIDKey + * Key refers to a CFStringRef that will uniquely identify + * a boot cycle. + * The key becomes valid at boot time and remains valid + * till shutdown. The property value will remain same across + * sleep/wake/hibernate cycle. + */ +#define kIOPMBootSessionUUIDKey "BootSessionUUID" + /* kIOPMDeepSleepEnabledKey * Indicates the Deep Sleep enable state. * It has a boolean value. @@ -328,7 +337,13 @@ enum { /*! kIOPMDriverAssertionMagicPacketWakeEnabledBit * When set, driver is informing PM that magic packet wake is enabled. */ - kIOPMDriverAssertionMagicPacketWakeEnabledBit = 0x100 + kIOPMDriverAssertionMagicPacketWakeEnabledBit = 0x100, + + /*! kIOPMDriverAssertionNetworkKeepAliveActiveBit + * When set, driver is informing PM that it is holding the network + * interface up to do TCPKeepAlive + */ + kIOPMDriverAssertionNetworkKeepAliveActiveBit = 0x200 }; /* kIOPMAssertionsDriverKey @@ -362,6 +377,7 @@ enum { #define kIOPMDriverAssertionModifiedTimeKey "ModifiedTime" #define kIOPMDriverAssertionOwnerStringKey "Owner" #define kIOPMDriverAssertionOwnerServiceKey "ServicePtr" +#define kIOPMDriverAssertionRegistryEntryIDKey "RegistryEntryID" #define kIOPMDriverAssertionLevelKey "Level" #define kIOPMDriverAssertionAssertedKey "Assertions" diff --git a/iokit/IOKit/pwr_mgt/IOPMLibDefs.h b/iokit/IOKit/pwr_mgt/IOPMLibDefs.h index 3acac824e..b74d580e9 100644 --- a/iokit/IOKit/pwr_mgt/IOPMLibDefs.h +++ b/iokit/IOKit/pwr_mgt/IOPMLibDefs.h @@ -40,5 +40,8 @@ #define kPMActivityTickle 10 #define kPMGetSystemSleepType 11 #define kPMSetClamshellSleepState 12 +#define kPMSleepWakeWatchdogEnable 13 +#define kPMSleepWakeDebugTrig 14 +#define kPMSetDisplayPowerOn 15 -#define kNumPMMethods 13 +#define kNumPMMethods 16 diff --git a/iokit/IOKit/pwr_mgt/IOPMPowerSource.h b/iokit/IOKit/pwr_mgt/IOPMPowerSource.h index ed6891819..cb1b62744 100644 --- a/iokit/IOKit/pwr_mgt/IOPMPowerSource.h +++ b/iokit/IOKit/pwr_mgt/IOPMPowerSource.h @@ -40,7 +40,7 @@ enum { kTenMinutesInSeconds = 600 }; -/* class IOPMPowerSource +/*! @class IOPMPowerSource * * See IOKit/pwr_mgt/IOPM.h for power source keys relevant to this class. These * report-type keys are required for calls to IOPMPowerSource::setReportables(), @@ -49,18 +49,19 @@ enum { * * A subclassing driver that doesn't want to do anything fancy should: * 1. Subclass IOPMPowerSource - * 3. Install its own battery change notifications or polling routine that can + * 2. Install its own battery change notifications or polling routine that can * converse with actual battery hardware. - * 4. When battery state changes, change the relevant member variables + * 3. When battery state changes, change the relevant member variables * through setCurrentCapacity() style accessors. - * 5. Call updateStatus() on itself when all such settings have been updated. + * 4. Call updateStatus() on itself when all such settings have been updated. * * The subclass driver should also initially populate its settings and call * updateStatus() on launch. * * - * Settings + * Settings: * + *
  * ExternalConnected
  * Type: bool
  * IORegistry Key: kIOPMPSExternalConnectedKey
@@ -163,6 +164,7 @@ enum {
  * Type: OSDictionary
  * IORegistry Key: kIOPMPSLegacyBatteryInfoKey
  * Dictionary conforming to the OS X 10.0-10.4 
+ * 
*/ class IOPMPowerSource : public IOService @@ -173,14 +175,14 @@ class IOPMPowerSource : public IOService protected: -/* bool settingsChangedSinceLastUpdate +/*! @var settingsChangedSinceLastUpdate * Used by subclasses to determine if any settings have been modified via the * accessors below since last call to update(). true is settings have changed; * false otherwise. */ bool settingsChangedSinceUpdate; -/* OSDictionary properties +/*! @var properties * Stores power source state */ OSDictionary *properties; @@ -289,7 +291,7 @@ protected: void setSerial(OSSymbol *); void setLegacyIOBatteryInfo(OSDictionary *); -/* All of these methods funnel through the generic accessor method +/*! All of these methods funnel through the generic accessor method setPSProperty. Caller can pass in any arbitrary OSSymbol key, and that value will be stored in the PM settings dictionary, and relayed onto the IORegistry at update time. diff --git a/iokit/IOKit/pwr_mgt/IOPMPrivate.h b/iokit/IOKit/pwr_mgt/IOPMPrivate.h index 55f86b475..f9bb27642 100644 --- a/iokit/IOKit/pwr_mgt/IOPMPrivate.h +++ b/iokit/IOKit/pwr_mgt/IOPMPrivate.h @@ -78,7 +78,7 @@ * @abstract Potential system events logged in the system event record. */ enum { - kIOPMEventTypeUndefined = 0, + kIOPMEventTypeUndefined = 0, /* Event types mark driver events */ @@ -92,18 +92,18 @@ enum { /* Start and stop event types bracket major * system power management events. */ - kIOPMEventTypeSleep = 2001, - kIOPMEventTypeSleepDone = 2002, - kIOPMEventTypeWake = 3001, - kIOPMEventTypeWakeDone = 3002, - kIOPMEventTypeDoze = 4001, - kIOPMEventTypeDozeDone = 4002, - kIOPMEventTypeLiteWakeUp = 5001, - kIOPMEventTypeLiteWakeUpDone = 5002, - kIOPMEventTypeLiteWakeDown = 5003, - kIOPMEventTypeLiteWakeDownDone = 5004, - kIOPMEventTypeUUIDSet = 6001, - kIOPMEventTypeUUIDClear = 6002, + kIOPMEventTypeSleep = 2001, + kIOPMEventTypeSleepDone = 2002, + kIOPMEventTypeWake = 3001, + kIOPMEventTypeWakeDone = 3002, + kIOPMEventTypeDoze = 4001, + kIOPMEventTypeDozeDone = 4002, + kIOPMEventTypeLiteWakeUp = 5001, + kIOPMEventTypeLiteWakeUpDone = 5002, + kIOPMEventTypeLiteWakeDown = 5003, + kIOPMEventTypeLiteWakeDownDone = 5004, + kIOPMEventTypeUUIDSet = 6001, + kIOPMEventTypeUUIDClear = 6002, /* Intermediate events that may only occur within the bounds * of a major system event (between the event's initiation and its "done event".) @@ -118,8 +118,28 @@ enum { kIOPMEventTypeCalTimeChange = 503 | kIOPMEventTypeIntermediateFlag }; +/***************************************************************************** + * + * Private Root Domain general interest messages + * + * Check IOPM.h when adding new messages to avoid conflict. + * + *****************************************************************************/ + +/* kIOPMMessageUserIsActiveChanged + * User became active or inactive. Message sent after the kIOPMUserIsActiveKey + * property was updated with a new value. + */ +#define kIOPMMessageUserIsActiveChanged \ + iokit_family_msg(sub_iokit_powermanagement, 0x400) -/* @enum SystemSleepReasons +/* + * Private IOMessage notifications shared between kernel and userspace PM policy + */ +#define kIOPMMessageLastCallBeforeSleep \ + iokit_family_msg(sub_iokit_powermanagement, 0x410) + +/* @enum SystemSleepReasons * @abstract The potential causes for system sleep as logged in the system event record. */ enum { @@ -349,6 +369,7 @@ enum { #define kIOPMStatsTimeMSKey "TimeMS" #define kIOPMStatsApplicationResponseTypeKey "ResponseType" #define kIOPMStatsMessageTypeKey "MessageType" +#define kIOPMStatsPowerCapabilityKey "PowerCaps" // PM Statistics: potential values for the key kIOPMStatsApplicationResponseTypeKey // entry in the application results array. @@ -619,6 +640,8 @@ enum { #define kIOPMSleepWakeFailureLoginKey "LWFailurePhase" #define kIOPMSleepWakeFailureUUIDKey "UUID" #define kIOPMSleepWakeFailureDateKey "Date" +#define kIOPMSleepWakeWdogRebootKey "SWWdogTriggeredRestart" +#define kIOPMSleepWakeWdogLogsValidKey "SWWdogLogsValid" /***************************************************************************** * @@ -666,6 +689,19 @@ enum { */ #define kIOPMDeepIdleSupportedKey "IOPMDeepIdleSupported" +/* kIOPMUserTriggeredFullWakeKey + * Key refers to a boolean value that indicates if the first full wake since + * last system sleep was triggered by the local user. This property is set + * before the initial full wake transition, and removed after powering down + * drivers for system sleep. + */ +#define kIOPMUserTriggeredFullWakeKey "IOPMUserTriggeredFullWake" + +/* kIOPMUserIsActiveKey + * Key refers to a boolean value that indicates if the user is active. + */ +#define kIOPMUserIsActiveKey "IOPMUserIsActive" + /***************************************************************************** * * System Sleep Policy @@ -737,7 +773,9 @@ enum { kIOPMSleepFactorHibernateForced = 0x00010000ULL, kIOPMSleepFactorAutoPowerOffDisabled = 0x00020000ULL, kIOPMSleepFactorAutoPowerOffForced = 0x00040000ULL, - kIOPMSleepFactorExternalDisplay = 0x00080000ULL + kIOPMSleepFactorExternalDisplay = 0x00080000ULL, + kIOPMSleepFactorNetworkKeepAliveActive = 0x00100000ULL, + kIOPMSleepFactorLocalUserActivity = 0x00200000ULL }; // System Sleep Types @@ -757,7 +795,8 @@ enum { enum { kIOPMSleepFlagDisableHibernateAbort = 0x00000001, kIOPMSleepFlagDisableUSBWakeEvents = 0x00000002, - kIOPMSleepFlagDisableBatlowAssertion = 0x00000004 + kIOPMSleepFlagDisableBatlowAssertion = 0x00000004, + kIOPMSleepFlagDisableS4WakeSources = 0x00000008 }; // System Wake Events @@ -800,6 +839,43 @@ struct IOPMSystemSleepParameters uint32_t reserved2[10]; } __attribute__((packed)); + +/* + * Sleep Wake debug buffer header + */ +typedef struct { + uint32_t version; + uint32_t alloc_size; + uint32_t dlog_buf_offset; /* Offset at which root domain's logging is stored */ + volatile uint32_t dlog_cur_pos; /* Offset at which next trace will be copied to */ + uint32_t dlog_size; /* Size reserverd for root domain's logging */ + uint32_t crc; /* CRC for spindump & following data. Doesn't cover hdr & DLOG buf */ + uint32_t spindump_offset; /* Offset at which spindump offset is stored */ + uint32_t spindump_size; + + /* All members from UUID onwards are saved into log file */ + char UUID[44]; + char cps[9]; /* Current power state */ + char PMStatusCode[100]; + char reason[42]; +} swd_hdr; + +#define SWD_BUF_SIZE (20*PAGE_SIZE) +#define SWD_DLOG_SIZE ((4*PAGE_SIZE)-sizeof(swd_hdr)) + +/* Bits in swd_flags */ +#define SWD_WDOG_ENABLED 0x1 +#define SWD_BOOT_BY_WDOG 0x2 +#define SWD_VALID_LOGS 0x4 + + +/* RootDomain IOReporting channels */ +#define kSleepCntChID IOREPORT_MAKEID('S','l','e','e','p','C','n','t') +#define kDarkWkCntChID IOREPORT_MAKEID('G','U','I','W','k','C','n','t') +#define kUserWkCntChID IOREPORT_MAKEID('D','r','k','W','k','C','n','t') + + + #if defined(KERNEL) && defined(__cplusplus) /*! diff --git a/iokit/IOKit/pwr_mgt/IOPMlog.h b/iokit/IOKit/pwr_mgt/IOPMlog.h index 2f82c1b4e..41727cdcb 100644 --- a/iokit/IOKit/pwr_mgt/IOPMlog.h +++ b/iokit/IOKit/pwr_mgt/IOPMlog.h @@ -52,8 +52,8 @@ enum PMLogEnum { kPMLogCriticalTemp, // 24 0x05070060 kPMLogOverrideOn, // 25 0x05070064 kPMLogOverrideOff, // 26 0x05070068 - kPMLogEnqueueErr, // 27 0x0507006c - NOT USED - kPMLogCollapseQueue, // 28 0x05070070 - NOT USED + kPMLogChangeStateForRootDomain, // 27 0x0507006c + kPMLogCSynchronizePowerTree, // 28 0x05070070 kPMLogChangeDone, // 29 0x05070074 kPMLogCtrlDriverTardy, // 30 0x05070078 - controlling driver didn't acknowledge kPMLogIntDriverTardy, // 31 0x0507007c - interested driver didn't acknowledge diff --git a/iokit/IOKit/pwr_mgt/IOPMpowerState.h b/iokit/IOKit/pwr_mgt/IOPMpowerState.h index f7f4c8a60..dd945a43f 100644 --- a/iokit/IOKit/pwr_mgt/IOPMpowerState.h +++ b/iokit/IOKit/pwr_mgt/IOPMpowerState.h @@ -39,7 +39,7 @@ @field outputPowerCharacter Describes the power provided in this state. @field inputPowerRequirement Describes the input power required in this state. @field staticPower Describes average consumption in milliwatts. Unused; drivers may specify 0. - @field unbudgetedPower Describes additional consumption from separate power supply (milliWatts). Unused; drivers may specify 0. + @field stateOrder Valid in version kIOPMPowerStateVersion2 or greater of this structure. Defines ordering of power states independently of the power state ordinal. @field powerToAttain Describes dditional power to attain this state from next lower state (in milliWatts). Unused; drivers may specify 0. @field timeToAttain Describes time required to enter this state from next lower state (in microseconds). Unused; drivers may specify 0. @field settleUpTime Describes settle time required after entering this state from next lower state (microseconds). Unused; drivers may specify 0. @@ -56,7 +56,7 @@ struct IOPMPowerState IOPMPowerFlags outputPowerCharacter; IOPMPowerFlags inputPowerRequirement; unsigned long staticPower; - unsigned long unbudgetedPower; + unsigned long stateOrder; unsigned long powerToAttain; unsigned long timeToAttain; unsigned long settleUpTime; @@ -68,5 +68,6 @@ struct IOPMPowerState typedef struct IOPMPowerState IOPMPowerState; enum { - kIOPMPowerStateVersion1 = 1 + kIOPMPowerStateVersion1 = 1, + kIOPMPowerStateVersion2 = 2 }; diff --git a/iokit/IOKit/pwr_mgt/IOPowerConnection.h b/iokit/IOKit/pwr_mgt/IOPowerConnection.h index a7ece0ad5..4e66198c2 100644 --- a/iokit/IOKit/pwr_mgt/IOPowerConnection.h +++ b/iokit/IOKit/pwr_mgt/IOPowerConnection.h @@ -39,6 +39,13 @@ #include #include +/*! @class IOPowerConnection + * Do not use IOPowerConnection. This class is an implementation detail defined + * for IOPM's management of the IORegistry IOPower plane. + * + * Only Kernel IOKit power management should reference the IOPowerConnection class. + */ + class IOPowerConnection : public IOService { OSDeclareDefaultStructors(IOPowerConnection) diff --git a/iokit/IOKit/pwr_mgt/Makefile b/iokit/IOKit/pwr_mgt/Makefile index db62a3d24..ad7dcbbdf 100644 --- a/iokit/IOKit/pwr_mgt/Makefile +++ b/iokit/IOKit/pwr_mgt/Makefile @@ -4,8 +4,8 @@ export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir IOKIT_FRAMEDIR = $(FRAMEDIR)/IOKit.framework/Versions/A -export INCDIR = $(IOKIT_FRAMEDIR)/Headers -export LCLDIR = $(IOKIT_FRAMEDIR)/PrivateHeaders +INCDIR = $(IOKIT_FRAMEDIR)/Headers +LCLDIR = $(IOKIT_FRAMEDIR)/PrivateHeaders include $(MakeInc_cmd) include $(MakeInc_def) @@ -16,16 +16,6 @@ NOT_EXPORT_HEADERS = \ IOPMinformeeList.h \ IOPMlog.h \ IOPMPrivate.h - -INSTINC_SUBDIRS = -INSTINC_SUBDIRS_I386 = -INSTINC_SUBDIRS_X86_64 = -INSTINC_SUBDIRS_ARM = - -EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} -EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} -EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} -EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM} ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) diff --git a/iokit/IOKit/pwr_mgt/RootDomain.h b/iokit/IOKit/pwr_mgt/RootDomain.h index c817bf40d..3fe0984db 100644 --- a/iokit/IOKit/pwr_mgt/RootDomain.h +++ b/iokit/IOKit/pwr_mgt/RootDomain.h @@ -157,6 +157,7 @@ public: virtual IOReturn setProperties( OSObject * ); virtual bool serializeProperties( OSSerialize * s ) const; + virtual OSObject * copyProperty( const char * aKey ) const; /*! @function systemPowerEventOccurred @abstract Other drivers may inform IOPMrootDomain of system PM events @@ -311,7 +312,7 @@ public: @result On success, returns a new assertion of type IOPMDriverAssertionID * */ IOReturn releasePMAssertion(IOPMDriverAssertionID releaseAssertion); - + private: virtual IOReturn changePowerStateTo( unsigned long ordinal ); virtual IOReturn changePowerStateToPriv( unsigned long ordinal ); @@ -321,6 +322,15 @@ private: virtual bool askChangeDown( unsigned long ); virtual void tellChangeUp( unsigned long ); virtual void tellNoChangeDown( unsigned long ); + virtual IOReturn configureReport(IOReportChannelList *channels, + IOReportConfigureAction action, + void *result, + void *destination); + virtual IOReturn updateReport(IOReportChannelList *channels, + IOReportUpdateAction action, + void *result, + void *destination); + #ifdef XNU_KERNEL_PRIVATE /* Root Domain internals */ public: @@ -329,47 +339,57 @@ public: IOPMActions * actions ); void overrideOurPowerChange( - IOService * service, - IOPMActions * actions, - unsigned long * inOutPowerState, - uint32_t * inOutChangeFlags ); + IOService * service, + IOPMActions * actions, + IOPMPowerStateIndex * inOutPowerState, + IOPMPowerChangeFlags * inOutChangeFlags, + IOPMRequestTag requestTag ); void handleOurPowerChangeStart( - IOService * service, - IOPMActions * actions, - uint32_t powerState, - uint32_t * inOutChangeFlags ); + IOService * service, + IOPMActions * actions, + IOPMPowerStateIndex powerState, + IOPMPowerChangeFlags * inOutChangeFlags, + IOPMRequestTag requestTag ); void handleOurPowerChangeDone( - IOService * service, - IOPMActions * actions, - uint32_t powerState, - uint32_t changeFlags ); + IOService * service, + IOPMActions * actions, + IOPMPowerStateIndex powerState, + IOPMPowerChangeFlags changeFlags, + IOPMRequestTag requestTag ); void overridePowerChangeForUIService( - IOService * service, - IOPMActions * actions, - unsigned long * inOutPowerState, - uint32_t * inOutChangeFlags ); + IOService * service, + IOPMActions * actions, + IOPMPowerStateIndex * inOutPowerState, + IOPMPowerChangeFlags * inOutChangeFlags ); void handleActivityTickleForDisplayWrangler( - IOService * service, - IOPMActions * actions ); + IOService * service, + IOPMActions * actions ); + + void handleUpdatePowerClientForDisplayWrangler( + IOService * service, + IOPMActions * actions, + const OSSymbol * powerClient, + IOPMPowerStateIndex oldPowerState, + IOPMPowerStateIndex newPowerState ); bool shouldDelayChildNotification( - IOService * service ); + IOService * service ); void handlePowerChangeStartForPCIDevice( - IOService * service, - IOPMActions * actions, - uint32_t powerState, - uint32_t * inOutChangeFlags ); + IOService * service, + IOPMActions * actions, + IOPMPowerStateIndex powerState, + IOPMPowerChangeFlags * inOutChangeFlags ); void handlePowerChangeDoneForPCIDevice( - IOService * service, - IOPMActions * actions, - uint32_t powerState, - uint32_t changeFlags ); + IOService * service, + IOPMActions * actions, + IOPMPowerStateIndex powerState, + IOPMPowerChangeFlags changeFlags ); void askChangeDownDone( IOPMPowerChangeFlags * inOutChangeFlags, @@ -384,6 +404,8 @@ public: void handleSuspendPMNotificationClient( uint32_t pid, bool doSuspend); + void willNotifyPowerChildren( IOPMPowerStateIndex newPowerState ); + IOReturn setMaintenanceWakeCalendar( const IOPMCalendarStruct * calendar ); @@ -412,7 +434,7 @@ public: bool systemMessageFilter( void * object, void * arg1, void * arg2, void * arg3 ); - void updatePreventIdleSleepList( + bool updatePreventIdleSleepList( IOService * service, bool addNotRemove ); void updatePreventSystemSleepList( IOService * service, bool addNotRemove ); @@ -426,9 +448,9 @@ public: @result kIOReturn on success. */ IOReturn recordPMEvent( PMEventDetails *details ); + void recordPMEvent( uint32_t type, const char *uuid, + uint32_t reason, uint32_t result ); IOReturn recordAndReleasePMEvent( PMEventDetails *details ); - IOReturn recordPMEventGated( PMEventDetails *details ); - IOReturn recordAndReleasePMEventGated( PMEventDetails *details ); void pmStatsRecordEvent( int eventIndex, @@ -463,6 +485,11 @@ public: uint32_t * hibernateFreeRatio, uint32_t * hibernateFreeTime ); #endif + void sleepWakeDebugTrig(bool restart); + void sleepWakeDebugLog(const char *fmt,...); + void sleepWakeDebugEnableWdog(); + bool sleepWakeDebugIsWdogEnabled(); + static void saveTimeoutAppStackShot(void *p0, void *p1); private: friend class PMSettingObject; @@ -485,11 +512,20 @@ private: IOService * newService, IONotifier * notifier); + static bool IONVRAMMatchPublished( void * target, void * refCon, + IOService * newService, + IONotifier * notifier); + static bool batteryPublished( void * target, void * refCon, IOService * resourceService, IONotifier * notifier); + void initializeBootSessionUUID( void ); + + void fullWakeDelayedWork( void ); + IOService * wrangler; + OSDictionary * wranglerIdleSettings; IOLock *featuresDictLock; // guards supportedFeatures IOPMPowerStateQueue *pmPowerStateQueue; @@ -513,14 +549,21 @@ private: const OSSymbol *_statsTimeMSKey; const OSSymbol *_statsResponseTypeKey; const OSSymbol *_statsMessageTypeKey; + const OSSymbol *_statsPowerCapsKey; + uint32_t sleepCnt; + uint32_t darkWakeCnt; + uint32_t displayWakeCnt; OSString *queuedSleepWakeUUIDString; OSArray *pmStatsAppResponses; + OSOrderedSet *noAckApps; // Apps not returning acks to notifications + IOBufferMemoryDescriptor *spindumpDesc; + thread_call_t stackshotOffloader; bool uuidPublished; // Pref: idle time before idle sleep - unsigned long sleepSlider; + unsigned long sleepSlider; unsigned long idleSeconds; uint64_t autoWakeStart; uint64_t autoWakeEnd; @@ -531,15 +574,7 @@ private: // Used to wait between say display idle and system idle thread_call_t extraSleepTimer; thread_call_t diskSyncCalloutEntry; - - // IOPMActions parameter encoding - enum { - kPMActionsFlagIsDisplayWrangler = 0x00000001, - kPMActionsFlagIsGraphicsDevice = 0x00000002, - kPMActionsFlagIsAudioDevice = 0x00000004, - kPMActionsFlagLimitPower = 0x00000008, - kPMActionsPCIBitNumberMask = 0x000000ff - }; + thread_call_t fullWakeThreadCall; // Track system capabilities. uint32_t _desiredCapability; @@ -551,9 +586,8 @@ private: // Type of clients that can receive system messages. enum { - kSystemMessageClientConfigd = 0x01, - kSystemMessageClientApp = 0x02, - kSystemMessageClientUser = 0x03, + kSystemMessageClientPowerd = 0x01, + kSystemMessageClientLegacyApp = 0x02, kSystemMessageClientKernel = 0x04, kSystemMessageClientAll = 0x07 }; @@ -576,41 +610,52 @@ private: unsigned int clamshellDisabled :1; unsigned int desktopMode :1; unsigned int acAdaptorConnected :1; - unsigned int clamshellSleepDisabled :1; + unsigned int clamshellSleepDisabled :1; unsigned int idleSleepTimerPending :1; unsigned int userDisabledAllSleep :1; unsigned int ignoreTellChangeDown :1; unsigned int wranglerAsleep :1; unsigned int wranglerTickled :1; - unsigned int wranglerSleepIgnored :1; + unsigned int ignoreIdleSleepTimer :1; unsigned int graphicsSuppressed :1; - unsigned int darkWakeThermalAlarm :1; unsigned int capabilityLoss :1; unsigned int pciCantSleepFlag :1; unsigned int pciCantSleepValid :1; - unsigned int logWranglerTickle :1; unsigned int logGraphicsClamp :1; unsigned int darkWakeToSleepASAP :1; unsigned int darkWakeMaintenance :1; unsigned int darkWakeSleepService :1; - unsigned int darkWakePostTickle :1; + unsigned int sleepTimerMaintenance :1; + unsigned int sleepToStandby :1; unsigned int lowBatteryCondition :1; - unsigned int darkWakeThermalEmergency:1; unsigned int hibernateDisabled :1; - unsigned int hibernateNoDefeat :1; - unsigned int rejectWranglerTickle :1; + unsigned int hibernateRetry :1; unsigned int wranglerTickleLatched :1; + unsigned int userIsActive :1; + unsigned int userWasActive :1; + unsigned int displayIdleForDemandSleep :1; uint32_t hibernateMode; + AbsoluteTime userActivityTime; + AbsoluteTime userActivityTime_prev; uint32_t userActivityCount; uint32_t userActivityAtSleep; uint32_t lastSleepReason; + uint32_t fullToDarkReason; uint32_t hibernateAborted; + enum FullWakeReason { + kFullWakeReasonNone = 0, + kFullWakeReasonLocalUser = 1, + kFullWakeReasonDisplayOn = 2, + fFullWakeReasonDisplayOnAndLocalUser = 3 + }; + uint32_t fullWakeReason; + // Info for communicating system state changes to PMCPU int32_t idxPMCPUClamshell; int32_t idxPMCPULimitedPower; @@ -623,7 +668,7 @@ private: thread_call_t aggressivesThreadCall; OSData * aggressivesData; - AbsoluteTime wranglerSleepTime; + AbsoluteTime userBecameInactiveTime; AbsoluteTime systemWakeTime; // PCI top-level PM trace @@ -652,6 +697,12 @@ private: #if HIBERNATION clock_sec_t _standbyTimerResetSeconds; #endif + volatile uint32_t swd_lock; /* Lock to access swd_buffer & and its header */ + void * swd_buffer; /* Memory allocated for dumping sleep/wake logs */ + uint8_t swd_flags; /* Flags defined in IOPMPrivate.h */ + + IOMemoryMap * swd_logBufMap; /* Memory with sleep/wake logs from previous boot */ + int findSuspendedPID(uint32_t pid, uint32_t *outRefCount); @@ -659,7 +710,10 @@ private: IOReturn privateSleepSystem( uint32_t sleepReason ); void reportUserInput( void ); void setDisableClamShellSleep( bool ); - bool checkSystemCanSleep( IOOptionBits options = 0 ); + bool checkSystemSleepAllowed( IOOptionBits options, + uint32_t sleepReason ); + bool checkSystemSleepEnabled( void ); + bool checkSystemCanSleep( uint32_t sleepReason ); bool checkSystemCanSustainFullWake( void ); void adjustPowerState( bool sleepASAP = false ); @@ -679,6 +733,7 @@ private: void startIdleSleepTimer( uint32_t inSeconds ); void cancelIdleSleepTimer( void ); + uint32_t getTimeToIdleSleep( void ); IOReturn setAggressiveness( unsigned long type, @@ -702,12 +757,19 @@ private: void publishSleepWakeUUID( bool shouldPublish ); void evaluatePolicy( int stimulus, uint32_t arg = 0 ); + void requestFullWake( FullWakeReason reason ); + void willEnterFullWake( void ); - void evaluateAssertions(IOPMDriverAssertionType newAssertions, - IOPMDriverAssertionType oldAssertions); + void evaluateAssertions(IOPMDriverAssertionType newAssertions, + IOPMDriverAssertionType oldAssertions); void deregisterPMSettingObject( PMSettingObject * pmso ); + void sleepWakeDebugMemAlloc( ); + void sleepWakeDebugDump(IOMemoryMap *logBufMap); + IOMemoryMap *sleepWakeDebugRetrieve(); + errno_t sleepWakeDebugSaveFile(const char *name, char *buf, int len); + #if HIBERNATION bool getSleepOption( const char * key, uint32_t * option ); bool evaluateSystemSleepPolicy( IOPMSystemSleepParameters * p, @@ -717,6 +779,7 @@ private: #endif /* HIBERNATION */ bool latchDisplayWranglerTickle( bool latch ); + void setDisplayPowerOn( uint32_t options ); #endif /* XNU_KERNEL_PRIVATE */ }; diff --git a/iokit/IOKit/rtc/Makefile b/iokit/IOKit/rtc/Makefile index e16d5b83a..6d08bc486 100644 --- a/iokit/IOKit/rtc/Makefile +++ b/iokit/IOKit/rtc/Makefile @@ -4,8 +4,8 @@ export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir IOKIT_FRAMEDIR = $(FRAMEDIR)/IOKit.framework/Versions/A -export INCDIR = $(IOKIT_FRAMEDIR)/Headers -export LCLDIR = $(IOKIT_FRAMEDIR)/PrivateHeaders +INCDIR = $(IOKIT_FRAMEDIR)/Headers +LCLDIR = $(IOKIT_FRAMEDIR)/PrivateHeaders include $(MakeInc_cmd) include $(MakeInc_def) @@ -13,20 +13,10 @@ include $(MakeInc_def) MI_DIR = rtc NOT_EXPORT_HEADERS = -INSTINC_SUBDIRS = -INSTINC_SUBDIRS_I386 = -INSTINC_SUBDIRS_X86_64 = -INSTINC_SUBDIRS_ARM = - -EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} -EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} -EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} -EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM} - ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) INSTALL_MI_LIST = -INSTALL_MI_LCL_LIST = "" +INSTALL_MI_LCL_LIST = INSTALL_MI_DIR = $(MI_DIR) EXPORT_MI_LIST = $(filter-out $(NOT_EXPORT_HEADERS), $(ALL_HEADERS)) diff --git a/iokit/IOKit/system_management/Makefile b/iokit/IOKit/system_management/Makefile index c887db562..7453b1620 100644 --- a/iokit/IOKit/system_management/Makefile +++ b/iokit/IOKit/system_management/Makefile @@ -4,8 +4,8 @@ export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir IOKIT_FRAMEDIR = $(FRAMEDIR)/IOKit.framework/Versions/A -export INCDIR = $(IOKIT_FRAMEDIR)/Headers -export LCLDIR = $(IOKIT_FRAMEDIR)/PrivateHeaders +INCDIR = $(IOKIT_FRAMEDIR)/Headers +LCLDIR = $(IOKIT_FRAMEDIR)/PrivateHeaders include $(MakeInc_cmd) include $(MakeInc_def) @@ -13,20 +13,10 @@ include $(MakeInc_def) MI_DIR = system_management NOT_EXPORT_HEADERS = -INSTINC_SUBDIRS = -INSTINC_SUBDIRS_I386 = -INSTINC_SUBDIRS_X86_64 = -INSTINC_SUBDIRS_ARM = - -EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} -EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} -EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} -EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM} - ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) INSTALL_MI_LIST = -INSTALL_MI_LCL_LIST = "" +INSTALL_MI_LCL_LIST = INSTALL_MI_DIR = $(MI_DIR) EXPORT_MI_LIST = $(filter-out $(NOT_EXPORT_HEADERS), $(ALL_HEADERS)) diff --git a/iokit/IOKit/x86_64/Makefile b/iokit/IOKit/x86_64/Makefile deleted file mode 100644 index 3b4a79b4e..000000000 --- a/iokit/IOKit/x86_64/Makefile +++ /dev/null @@ -1,33 +0,0 @@ -export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd -export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def -export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule -export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - -IOKIT_FRAMEDIR = $(FRAMEDIR)/IOKit.framework/Versions/A -export INCDIR = $(IOKIT_FRAMEDIR)/Headers -export LCLDIR = $(IOKIT_FRAMEDIR)/PrivateHeaders - -include $(MakeInc_cmd) -include $(MakeInc_def) - -MD_DIR = x86_64 -EXCLUDE_HEADERS = - -INSTINC_SUBDIRS = -INSTINC_SUBDIRS_X86_64 = - -EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} -EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} - -ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h)) -HEADER_LIST = $(filter-out $(EXCLUDE_HEADERS), $(ALL_HEADERS)) - -INSTALL_MD_LIST = ${HEADER_LIST} -INSTALL_MD_LCL_LIST = "" -INSTALL_MD_DIR = $(MD_DIR) - -EXPORT_MD_LIST = -EXPORT_MD_DIR = IOKit/$(MD_DIR) - -include $(MakeInc_rule) -include $(MakeInc_dir) diff --git a/iokit/Kernel/IOBufferMemoryDescriptor.cpp b/iokit/Kernel/IOBufferMemoryDescriptor.cpp index 377dc2129..1b1775916 100644 --- a/iokit/Kernel/IOBufferMemoryDescriptor.cpp +++ b/iokit/Kernel/IOBufferMemoryDescriptor.cpp @@ -71,211 +71,29 @@ enum /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -#if 0 -#undef assert -#define assert(ex) \ - ((ex) ? (void)0 : Assert(__FILE__, __LINE__, # ex)) -#endif - -enum -{ - kIOPageAllocChunkBytes = (PAGE_SIZE / 64), - kIOPageAllocSignature = 'iopa' -}; - -struct io_pagealloc_t -{ - queue_chain_t link; - uint64_t avail; - uint32_t signature; -}; -typedef struct io_pagealloc_t io_pagealloc_t; - -typedef char io_pagealloc_t_assert[(sizeof(io_pagealloc_t) <= kIOPageAllocChunkBytes) ? 1 : -1]; +#define super IOGeneralMemoryDescriptor +OSDefineMetaClassAndStructors(IOBufferMemoryDescriptor, + IOGeneralMemoryDescriptor); -IOSimpleLock * gIOPageAllocLock; -queue_head_t gIOPageAllocList; -vm_size_t gIOPageAllocCount; -vm_size_t gIOPageAllocBytes; +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -static io_pagealloc_t * -iopa_allocpage(void) +static uintptr_t IOBMDPageProc(iopa_t * a) { - kern_return_t kr; - io_pagealloc_t * pa; - vm_address_t vmaddr = 0; + kern_return_t kr; + vm_address_t vmaddr = 0; + int options = 0; // KMA_LOMEM; - int options = 0; // KMA_LOMEM; kr = kernel_memory_allocate(kernel_map, &vmaddr, page_size, 0, options); - if (KERN_SUCCESS != kr) return (0); - - bzero((void *) vmaddr, page_size); - pa = (typeof(pa)) (vmaddr + page_size - kIOPageAllocChunkBytes); - - pa->signature = kIOPageAllocSignature; - pa->avail = -2ULL; - - return (pa); -} - -static void -iopa_freepage(io_pagealloc_t * pa) -{ - kmem_free( kernel_map, trunc_page((uintptr_t) pa), page_size); -} - -static uintptr_t -iopa_allocinpage(io_pagealloc_t * pa, uint32_t count, uint64_t align) -{ - uint32_t n, s; - uint64_t avail = pa->avail; - - assert(avail); - - // find strings of count 1 bits in avail - for (n = count; n > 1; n -= s) - { - s = n >> 1; - avail = avail & (avail << s); - } - // and aligned - avail &= align; - - if (avail) - { - n = __builtin_clzll(avail); - pa->avail &= ~((-1ULL << (64 - count)) >> n); - if (!pa->avail && pa->link.next) - { - remque(&pa->link); - pa->link.next = 0; - } - return (n * kIOPageAllocChunkBytes + trunc_page((uintptr_t) pa)); - } - - return (0); -} - -static uint32_t -log2up(uint32_t size) -{ - if (size <= 1) size = 0; - else size = 32 - __builtin_clz(size - 1); - return (size); -} - -static uintptr_t -iopa_alloc(vm_size_t bytes, uint32_t balign) -{ - static const uint64_t align_masks[] = { - 0xFFFFFFFFFFFFFFFF, - 0xAAAAAAAAAAAAAAAA, - 0x8888888888888888, - 0x8080808080808080, - 0x8000800080008000, - 0x8000000080000000, - 0x8000000000000000, - }; - io_pagealloc_t * pa; - uintptr_t addr = 0; - uint32_t count; - uint64_t align; - - if (!bytes) bytes = 1; - count = (bytes + kIOPageAllocChunkBytes - 1) / kIOPageAllocChunkBytes; - align = align_masks[log2up((balign + kIOPageAllocChunkBytes - 1) / kIOPageAllocChunkBytes)]; - - IOSimpleLockLock(gIOPageAllocLock); - pa = (typeof(pa)) queue_first(&gIOPageAllocList); - while (!queue_end(&gIOPageAllocList, &pa->link)) - { - addr = iopa_allocinpage(pa, count, align); - if (addr) - { - gIOPageAllocBytes += bytes; - break; - } - pa = (typeof(pa)) queue_next(&pa->link); - } - IOSimpleLockUnlock(gIOPageAllocLock); - if (!addr) - { - pa = iopa_allocpage(); - if (pa) - { - addr = iopa_allocinpage(pa, count, align); - IOSimpleLockLock(gIOPageAllocLock); - if (pa->avail) enqueue_head(&gIOPageAllocList, &pa->link); - gIOPageAllocCount++; - if (addr) gIOPageAllocBytes += bytes; - IOSimpleLockUnlock(gIOPageAllocLock); - } - } - - if (addr) - { - assert((addr & ((1 << log2up(balign)) - 1)) == 0); - IOStatisticsAlloc(kIOStatisticsMallocAligned, bytes); -#if IOALLOCDEBUG - debug_iomalloc_size += bytes; -#endif - } - - return (addr); -} - -static void -iopa_free(uintptr_t addr, vm_size_t bytes) -{ - io_pagealloc_t * pa; - uint32_t count; - uintptr_t chunk; - - if (!bytes) bytes = 1; - - chunk = (addr & page_mask); - assert(0 == (chunk & (kIOPageAllocChunkBytes - 1))); - - pa = (typeof(pa)) (addr | (page_size - kIOPageAllocChunkBytes)); - assert(kIOPageAllocSignature == pa->signature); - count = (bytes + kIOPageAllocChunkBytes - 1) / kIOPageAllocChunkBytes; - chunk /= kIOPageAllocChunkBytes; + if (KERN_SUCCESS != kr) vmaddr = 0; + else bzero((void *) vmaddr, page_size); - IOSimpleLockLock(gIOPageAllocLock); - if (!pa->avail) - { - assert(!pa->link.next); - enqueue_tail(&gIOPageAllocList, &pa->link); - } - pa->avail |= ((-1ULL << (64 - count)) >> chunk); - if (pa->avail != -2ULL) pa = 0; - else - { - remque(&pa->link); - pa->link.next = 0; - pa->signature = 0; - gIOPageAllocCount--; - } - gIOPageAllocBytes -= bytes; - IOSimpleLockUnlock(gIOPageAllocLock); - if (pa) iopa_freepage(pa); - -#if IOALLOCDEBUG - debug_iomalloc_size -= bytes; -#endif - IOStatisticsAlloc(kIOStatisticsFreeAligned, bytes); + return ((uintptr_t) vmaddr); } /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -#define super IOGeneralMemoryDescriptor -OSDefineMetaClassAndStructors(IOBufferMemoryDescriptor, - IOGeneralMemoryDescriptor); - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - #ifndef __LP64__ bool IOBufferMemoryDescriptor::initWithOptions( IOOptionBits options, @@ -449,7 +267,14 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( { _internalFlags |= kInternalFlagPageAllocated; needZero = false; - _buffer = (void *) iopa_alloc(capacity, alignment); + _buffer = (void *) iopa_alloc(&gIOBMDPageAllocator, &IOBMDPageProc, capacity, alignment); + if (_buffer) + { + IOStatisticsAlloc(kIOStatisticsMallocAligned, capacity); +#if IOALLOCDEBUG + debug_iomalloc_size += capacity; +#endif + } } else if (alignment > 1) { @@ -500,7 +325,8 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( while (startAddr < endAddr) { - *startAddr; + UInt8 dummyVar = *startAddr; + (void) dummyVar; startAddr += page_size; } } @@ -731,7 +557,16 @@ void IOBufferMemoryDescriptor::free() } else if (kInternalFlagPageAllocated & internalFlags) { - iopa_free((uintptr_t) buffer, size); + uintptr_t page; + page = iopa_free(&gIOBMDPageAllocator, (uintptr_t) buffer, size); + if (page) + { + kmem_free(kernel_map, page, page_size); + } +#if IOALLOCDEBUG + debug_iomalloc_size -= size; +#endif + IOStatisticsAlloc(kIOStatisticsFreeAligned, size); } else if (alignment > 1) { diff --git a/iokit/Kernel/IOCPU.cpp b/iokit/Kernel/IOCPU.cpp index eae15f97c..6278f43cf 100644 --- a/iokit/Kernel/IOCPU.cpp +++ b/iokit/Kernel/IOCPU.cpp @@ -207,7 +207,7 @@ IOServicePlatformAction(void * refcon0, void * refcon1, uint32_t priority, kprintf("%s -> %s\n", function->getCStringNoCopy(), service_name); ret = service->callPlatformFunction(function, false, - (void *) priority, param1, param2, param3); + (void *)(uintptr_t) priority, param1, param2, param3); return (ret); } @@ -687,10 +687,11 @@ void IOCPUInterruptController::enableCPUInterrupt(IOCPU *cpu) IOInterruptHandler, this, &IOCPUInterruptController::handleInterrupt); ml_install_interrupt_handler(cpu, cpu->getCPUNumber(), this, handler, 0); - - enabledCPUs++; - - if (enabledCPUs == numCPUs) thread_wakeup(this); + + // Ensure that the increment is seen by all processors + OSIncrementAtomic(&enabledCPUs); + + if (enabledCPUs == numCPUs) thread_wakeup(this); } IOReturn IOCPUInterruptController::registerInterrupt(IOService *nub, diff --git a/iokit/Kernel/IOCatalogue.cpp b/iokit/Kernel/IOCatalogue.cpp index ee193c027..eb8dfbafb 100644 --- a/iokit/Kernel/IOCatalogue.cpp +++ b/iokit/Kernel/IOCatalogue.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2006 Apple Inc. All rights reserved. + * Copyright (c) 1998-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -351,7 +351,7 @@ bool IOCatalogue::addDrivers( * The catalogue must be able to contain personalities that * are proper supersets of others. * Do not compare just the properties present in one driver - * pesonality or the other. + * personality or the other. */ if (personality->isEqualTo(driver)) { break; @@ -803,17 +803,22 @@ bool IOCatalogue::resetAndAddDrivers(OSArray * drivers, bool doNubMatching) for (idx = 0; (thisOldPersonality = (OSDictionary *) array->getObject(idx)); idx++) { if (thisOldPersonality->getObject("KernelConfigTable")) continue; - if (newPersonalities) for (newIdx = 0; - (thisNewPersonality = (OSDictionary *) newPersonalities->getObject(newIdx)); - newIdx++) + if (newPersonalities) + for (newIdx = 0; + (thisNewPersonality = (OSDictionary *) newPersonalities->getObject(newIdx)); + newIdx++) { /* Unlike in other functions, this comparison must be exact! * The catalogue must be able to contain personalities that * are proper supersets of others. * Do not compare just the properties present in one driver - * pesonality or the other. + * personality or the other. */ - if (thisNewPersonality->isEqualTo(thisOldPersonality)) + if (OSDynamicCast(OSDictionary, thisNewPersonality) == NULL) { + /* skip thisNewPersonality if it is not an OSDictionary */ + continue; + } + if (thisNewPersonality->isEqualTo(thisOldPersonality)) break; } if (thisNewPersonality) @@ -835,15 +840,20 @@ bool IOCatalogue::resetAndAddDrivers(OSArray * drivers, bool doNubMatching) } } - // add new - for (newIdx = 0; - (thisNewPersonality = (OSDictionary *) newPersonalities->getObject(newIdx)); - newIdx++) - { + // add new + for (newIdx = 0; + (thisNewPersonality = (OSDictionary *) newPersonalities->getObject(newIdx)); + newIdx++) + { + if (OSDynamicCast(OSDictionary, thisNewPersonality) == NULL) { + /* skip thisNewPersonality if it is not an OSDictionary */ + continue; + } + OSKext::uniquePersonalityProperties(thisNewPersonality); addPersonality(thisNewPersonality); matchSet->setObject(thisNewPersonality); - } + } /* Finally, start device matching on all new & removed personalities. */ @@ -910,15 +920,3 @@ bool IOCatalogue::serializeData(IOOptionBits kind, OSSerialize * s) const * These functions are no longer used are necessary for C++ binary * compatibility on i386. **********************************************************************/ -#if __i386__ - -bool IOCatalogue::recordStartupExtensions(void) -{ return false; } - -bool IOCatalogue::addExtensionsFromArchive(OSData * mkext) -{ return KERN_NOT_SUPPORTED; } - -kern_return_t IOCatalogue::removeKernelLinker(void) -{ return KERN_NOT_SUPPORTED; } - -#endif /* __i386__ */ diff --git a/iokit/Kernel/IODMACommand.cpp b/iokit/Kernel/IODMACommand.cpp index 1ae4e61ae..3b3c0ee3a 100644 --- a/iokit/Kernel/IODMACommand.cpp +++ b/iokit/Kernel/IODMACommand.cpp @@ -463,7 +463,7 @@ IODMACommand::walkAll(UInt8 op) { offset = 0; numSegments = 0-1; - ret = genIOVMSegments(op, segmentOp, (void *) op, &offset, state, &numSegments); + ret = genIOVMSegments(op, segmentOp, (void *)(uintptr_t) op, &offset, state, &numSegments); } op &= ~kWalkPreflight; @@ -499,7 +499,7 @@ IODMACommand::walkAll(UInt8 op) state->fCopyNext = state->fCopyPageAlloc; offset = 0; numSegments = 0-1; - ret = genIOVMSegments(op, segmentOp, (void *) op, &offset, state, &numSegments); + ret = genIOVMSegments(op, segmentOp, (void *)(uintptr_t) op, &offset, state, &numSegments); state->fPrepared = true; op &= ~(kWalkSyncIn | kWalkSyncOut); } @@ -535,7 +535,7 @@ IODMACommand::walkAll(UInt8 op) state->fCopyNext = state->fCopyPageAlloc; offset = 0; numSegments = 0-1; - ret = genIOVMSegments(op, segmentOp, (void *) op, &offset, state, &numSegments); + ret = genIOVMSegments(op, segmentOp, (void *)(uintptr_t) op, &offset, state, &numSegments); } else if (state->fCopyMD) { @@ -769,7 +769,7 @@ IODMACommand::prepare(UInt64 offset, UInt64 length, bool flushCache, bool synchr { state->fLocalMapperPageAlloc = mapArgs.fAlloc; state->fLocalMapperPageCount = mapArgs.fAllocCount; - state->fMapContig = true; + state->fMapContig = mapArgs.fMapContig; } ret = kIOReturnSuccess; } diff --git a/iokit/Kernel/IODMAController.cpp b/iokit/Kernel/IODMAController.cpp index 603998035..558fde64e 100644 --- a/iokit/Kernel/IODMAController.cpp +++ b/iokit/Kernel/IODMAController.cpp @@ -91,9 +91,9 @@ void IODMAController::completeDMACommand(IODMAEventSource *dmaES, IODMACommand * dmaES->completeDMACommand(dmaCommand); } -void IODMAController::notifyDMACommand(IODMAEventSource *dmaES, IODMACommand *dmaCommand, IOReturn status, IOByteCount actualByteCount) +void IODMAController::notifyDMACommand(IODMAEventSource *dmaES, IODMACommand *dmaCommand, IOReturn status, IOByteCount actualByteCount, AbsoluteTime timeStamp) { - dmaES->notifyDMACommand(dmaCommand, status, actualByteCount); + dmaES->notifyDMACommand(dmaCommand, status, actualByteCount, timeStamp); } diff --git a/iokit/Kernel/IODMAEventSource.cpp b/iokit/Kernel/IODMAEventSource.cpp index a032b9204..1b53b0b0b 100644 --- a/iokit/Kernel/IODMAEventSource.cpp +++ b/iokit/Kernel/IODMAEventSource.cpp @@ -65,6 +65,12 @@ bool IODMAEventSource::init(OSObject *inOwner, return true; } +void IODMAEventSource::free() +{ + if (dmaCommandsCompletedLock != NULL) IOSimpleLockFree(dmaCommandsCompletedLock); + super::free(); +} + IODMAEventSource *IODMAEventSource::dmaEventSource(OSObject *inOwner, IOService *inProvider, Action inCompletion, @@ -119,11 +125,27 @@ IOReturn IODMAEventSource::queryDMACommand(IODMACommand **dmaCommand, IOByteCoun } -IOByteCount IODMAEventSource::getFIFODepth() +IOByteCount IODMAEventSource::getFIFODepth(IODirection direction) +{ + if ((dmaController == 0) || (dmaIndex == 0xFFFFFFFF)) return 0; + + return dmaController->getFIFODepth(dmaIndex, direction); +} + + +IOReturn IODMAEventSource::setFIFODepth(IOByteCount depth) +{ + if ((dmaController == 0) || (dmaIndex == 0xFFFFFFFF)) return kIOReturnError; + + return dmaController->setFIFODepth(dmaIndex, depth); +} + + +IOByteCount IODMAEventSource::validFIFODepth(IOByteCount depth, IODirection direction) { if ((dmaController == 0) || (dmaIndex == 0xFFFFFFFF)) return kIOReturnError; - return dmaController->getFIFODepth(dmaIndex); + return dmaController->validFIFODepth(dmaIndex, depth, direction); } @@ -145,7 +167,7 @@ bool IODMAEventSource::checkForWork(void) IOSimpleLockUnlock(dmaCommandsCompletedLock); if (work) { - (*dmaCompletionAction)(owner, this, dmaCommand, dmaCommand->reserved->fStatus, dmaCommand->reserved->fActualByteCount); + (*dmaCompletionAction)(owner, this, dmaCommand, dmaCommand->reserved->fStatus, dmaCommand->reserved->fActualByteCount, dmaCommand->reserved->fTimeStamp); } return again; @@ -165,10 +187,21 @@ void IODMAEventSource::completeDMACommand(IODMACommand *dmaCommand) } } -void IODMAEventSource::notifyDMACommand(IODMACommand *dmaCommand, IOReturn status, IOByteCount actualByteCount) +void IODMAEventSource::notifyDMACommand(IODMACommand *dmaCommand, IOReturn status, IOByteCount actualByteCount, AbsoluteTime timeStamp) { dmaCommand->reserved->fStatus = status; - dmaCommand->reserved->fActualByteCount = actualByteCount; + dmaCommand->reserved->fActualByteCount = actualByteCount; + dmaCommand->reserved->fTimeStamp = timeStamp; - if (dmaNotificationAction != 0) (*dmaNotificationAction)(owner, this, dmaCommand, status, actualByteCount); + if (dmaNotificationAction != 0) (*dmaNotificationAction)(owner, this, dmaCommand, status, actualByteCount, timeStamp); +} + +IOReturn IODMAEventSource::setDMAConfig(UInt32 newReqIndex) +{ + return dmaController->setDMAConfig(dmaIndex, dmaProvider, newReqIndex); +} + +bool IODMAEventSource::validDMAConfig(UInt32 newReqIndex) +{ + return dmaController->validDMAConfig(dmaIndex, dmaProvider, newReqIndex); } diff --git a/iokit/Kernel/IODataQueue.cpp b/iokit/Kernel/IODataQueue.cpp index 95988aaf4..84f88322e 100644 --- a/iokit/Kernel/IODataQueue.cpp +++ b/iokit/Kernel/IODataQueue.cpp @@ -217,10 +217,11 @@ void IODataQueue::sendDataAvailableNotification() msgh = (mach_msg_header_t *)notifyMsg; if (msgh && msgh->msgh_remote_port) { - kr = mach_msg_send_from_kernel_proper(msgh, msgh->msgh_size); + kr = mach_msg_send_from_kernel_with_options(msgh, msgh->msgh_size, MACH_SEND_TIMEOUT, MACH_MSG_TIMEOUT_NONE); switch(kr) { case MACH_SEND_TIMED_OUT: // Notification already sent case MACH_MSG_SUCCESS: + case MACH_SEND_NO_BUFFER: break; default: IOLog("%s: dataAvailableNotification failed - msg_send returned: %d\n", /*getName()*/"IODataQueue", kr); diff --git a/iokit/Kernel/IODeviceTreeSupport.cpp b/iokit/Kernel/IODeviceTreeSupport.cpp index 4ee53e566..5d6188086 100644 --- a/iokit/Kernel/IODeviceTreeSupport.cpp +++ b/iokit/Kernel/IODeviceTreeSupport.cpp @@ -266,8 +266,8 @@ int IODTGetLoaderInfo( const char *key, void **infoAddr, int *infoSize ) propPtr = (unsigned int *)propObj->getBytesNoCopy(); if ( propPtr == 0 ) return -1; - *infoAddr = (void *)propPtr[0] ; - *infoSize = (int) propPtr[1]; + *infoAddr = (void *)(uintptr_t) (propPtr[0]); + *infoSize = (int) (propPtr[1]); return 0; } @@ -927,11 +927,39 @@ void IODTSetResolving( IORegistryEntry * regEntry, return; } +#if defined(__arm__) || defined(__i386__) || defined(__x86_64__) static SInt32 DefaultCompare( UInt32 cellCount, UInt32 left[], UInt32 right[] ) { cellCount--; return( left[ cellCount ] - right[ cellCount ] ); } +#else +#error Unknown architecture. +#endif + +static void AddLengthToCells( UInt32 numCells, UInt32 *cells, UInt64 offset) +{ + if (numCells == 1) + { + cells[0] += (UInt32)offset; + } + else { + UInt64 sum = cells[numCells - 1] + offset; + cells[numCells - 1] = (UInt32)sum; + if (sum > UINT32_MAX) { + cells[numCells - 2] += (UInt32)(sum >> 32); + } + } +} + +static IOPhysicalAddress CellsValue( UInt32 numCells, UInt32 *cells) +{ + if (numCells == 1) { + return IOPhysical32( 0, cells[0] ); + } else { + return IOPhysical32( cells[numCells - 2], cells[numCells - 1] ); + } +} void IODTGetCellCounts( IORegistryEntry * regEntry, UInt32 * sizeCount, UInt32 * addressCount) @@ -951,7 +979,7 @@ void IODTGetCellCounts( IORegistryEntry * regEntry, bool IODTResolveAddressCell( IORegistryEntry * regEntry, UInt32 cellsIn[], - IOPhysicalAddress * phys, IOPhysicalLength * len ) + IOPhysicalAddress * phys, IOPhysicalLength * lenOut ) { IORegistryEntry *parent; OSData *prop; @@ -960,7 +988,7 @@ bool IODTResolveAddressCell( IORegistryEntry * regEntry, // cells in addresses below regEntry UInt32 childSizeCells, childAddressCells; UInt32 childCells; - UInt32 cell[ 8 ], length; + UInt32 cell[ 8 ], propLen; UInt64 offset = 0; UInt32 endCell[ 8 ]; UInt32 *range; @@ -969,6 +997,7 @@ bool IODTResolveAddressCell( IORegistryEntry * regEntry, UInt32 *endRanges; bool ok = true; SInt64 diff, diff2, endDiff; + UInt64 len, rangeLen; IODTPersistent *persist; IODTCompareAddressCellFunc compare; @@ -980,124 +1009,105 @@ bool IODTResolveAddressCell( IORegistryEntry * regEntry, panic("IODTResolveAddressCell: Invalid device tree (%u,%u)", (uint32_t)childAddressCells, (uint32_t)childSizeCells); bcopy( cellsIn, cell, sizeof(UInt32) * childCells ); - if( childSizeCells > 1) - *len = IOPhysical32( cellsIn[ childAddressCells + 1], - cellsIn[ childAddressCells] ); - else - *len = IOPhysical32( 0, cellsIn[ childAddressCells ] ); + *lenOut = CellsValue( childSizeCells, cellsIn + childAddressCells ); do { - prop = OSDynamicCast( OSData, regEntry->getProperty( gIODTRangeKey )); - if( 0 == prop) { + prop = OSDynamicCast( OSData, regEntry->getProperty( gIODTRangeKey )); + if( 0 == prop) { /* end of the road */ - if (childAddressCells == 2) { - *phys = IOPhysical32( cell[ childAddressCells - 1 ], cell [ childAddressCells - 2 ]); - } else { - *phys = IOPhysical32( 0, cell[ childAddressCells - 1 ]); - } + *phys = CellsValue( childAddressCells, cell ); *phys += offset; - break; - } + break; + } - parent = regEntry->getParentEntry( gIODTPlane ); - IODTGetCellCounts( parent, &sizeCells, &addressCells ); - - if( (length = prop->getLength())) { - // search - startRange = (UInt32 *) prop->getBytesNoCopy(); - range = startRange; - endRanges = range + (length / sizeof(UInt32)); - - prop = (OSData *) regEntry->getProperty( gIODTPersistKey ); - if( prop) { - persist = (IODTPersistent *) prop->getBytesNoCopy(); - compare = persist->compareFunc; - } else if (addressCells == childAddressCells) { - compare = DefaultCompare; - } else { - panic("There is no mixed comparison function yet..."); - } + parent = regEntry->getParentEntry( gIODTPlane ); + IODTGetCellCounts( parent, &sizeCells, &addressCells ); + + if( (propLen = prop->getLength())) { + // search + startRange = (UInt32 *) prop->getBytesNoCopy(); + range = startRange; + endRanges = range + (propLen / sizeof(UInt32)); + + prop = (OSData *) regEntry->getProperty( gIODTPersistKey ); + if( prop) { + persist = (IODTPersistent *) prop->getBytesNoCopy(); + compare = persist->compareFunc; + } else if (addressCells == childAddressCells) { + compare = DefaultCompare; + } else { + panic("There is no mixed comparison function yet..."); + } - for( ok = false; - range < endRanges; - range += (childCells + addressCells) ) { - - // is cell start within range? - diff = (*compare)( childAddressCells, cell, range ); - - if (childAddressCells > sizeof(endCell)/sizeof(endCell[0])) - panic("IODTResolveAddressCell: Invalid device tree (%u)", (uint32_t)childAddressCells); - - bcopy(range, endCell, childAddressCells * sizeof(UInt32)); - - if (childAddressCells == 2) { - uint64_t sum = endCell[childAddressCells - 2] + IOPhysical32(range[childCells + addressCells - 1], range[childCells + addressCells - 2]); - endCell[childAddressCells - 2] = (uint32_t)(sum & 0x00000000FFFFFFFFULL); - if (sum > UINT32_MAX) { - endCell[childAddressCells - 1] += (uint32_t)((sum & 0xFFFFFFFF00000000ULL) >> 32); - } - } else { - endCell[childAddressCells - 1] += range[childCells + addressCells - 1]; - } - - diff2 = (*compare)( childAddressCells, cell, endCell ); - - if ((diff < 0) || (diff2 >= 0)) - continue; - - ok = (0 == cell[childCells - 1]); - if (!ok) - { - // search for cell end - bcopy(cell, endCell, childAddressCells * sizeof(UInt32)); - - if (childSizeCells == 2) { - uint64_t sum; - sum = endCell[childAddressCells - 2] + IOPhysical32(cell[childCells - 1], cell[childCells - 2]) - 1; - endCell[childAddressCells - 2] = (uint32_t)(sum & 0x00000000FFFFFFFFULL); - if (sum > UINT32_MAX) { - endCell[childAddressCells - 1] += (uint32_t)((sum & 0xFFFFFFFF00000000ULL) >> 32); - } - } else { - endCell[childAddressCells - 1] += cell[childCells - 1] - 1; - } - lookRange = startRange; - for( ; - lookRange < endRanges; - lookRange += (childCells + addressCells) ) - { - // is cell >= range start? - endDiff = (*compare)( childAddressCells, endCell, lookRange ); - if( endDiff < 0) - continue; - if ((endDiff - cell[childCells - 1] + 1 + lookRange[childAddressCells + addressCells - 1]) - == (diff + range[childAddressCells + addressCells - 1])) - { - ok = true; - break; - } - } - if (!ok) - continue; - } - offset += diff; - break; - } + for( ok = false; + range < endRanges; + range += (childCells + addressCells) ) { + + // is cell start within range? + diff = (*compare)( childAddressCells, cell, range ); + + if (childAddressCells > sizeof(endCell)/sizeof(endCell[0])) + panic("IODTResolveAddressCell: Invalid device tree (%u)", (uint32_t)childAddressCells); + + bcopy(range, endCell, childAddressCells * sizeof(UInt32)); + + rangeLen = CellsValue(childSizeCells, range + childAddressCells + addressCells); + AddLengthToCells(childAddressCells, endCell, rangeLen); + + diff2 = (*compare)( childAddressCells, cell, endCell ); + + // if start of cell < start of range, or end of range >= start of cell, skip + if ((diff < 0) || (diff2 >= 0)) + continue; + + len = CellsValue(childSizeCells, cell + childAddressCells); + ok = (0 == len); + + if (!ok) + { + // search for cell end + bcopy(cell, endCell, childAddressCells * sizeof(UInt32)); + + AddLengthToCells(childAddressCells, endCell, len - 1); + + for( lookRange = startRange; + lookRange < endRanges; + lookRange += (childCells + addressCells) ) + { + // make sure end of cell >= range start + endDiff = (*compare)( childAddressCells, endCell, lookRange ); + if( endDiff < 0) + continue; + + UInt64 rangeStart = CellsValue(addressCells, range + childAddressCells); + UInt64 lookRangeStart = CellsValue(addressCells, lookRange + childAddressCells); + if ((endDiff - len + 1 + lookRangeStart) == (diff + rangeStart)) + { + ok = true; + break; + } + } + if (!ok) + continue; + } + offset += diff; + break; + } - if (addressCells + sizeCells > sizeof(cell)/sizeof(cell[0])) - panic("IODTResolveAddressCell: Invalid device tree (%u, %u)", (uint32_t)addressCells, (uint32_t)sizeCells); + if (addressCells + sizeCells > sizeof(cell)/sizeof(cell[0])) + panic("IODTResolveAddressCell: Invalid device tree (%u, %u)", (uint32_t)addressCells, (uint32_t)sizeCells); - // Get the physical start of the range from our parent - bcopy( range + childAddressCells, cell, sizeof(UInt32) * addressCells ); - bzero( cell + addressCells, sizeof(UInt32) * sizeCells ); + // Get the physical start of the range from our parent + bcopy( range + childAddressCells, cell, sizeof(UInt32) * addressCells ); + bzero( cell + addressCells, sizeof(UInt32) * sizeCells ); - } /* else zero length range => pass thru to parent */ + } /* else zero length range => pass thru to parent */ - regEntry = parent; - childSizeCells = sizeCells; - childAddressCells = addressCells; - childCells = childAddressCells + childSizeCells; + regEntry = parent; + childSizeCells = sizeCells; + childAddressCells = addressCells; + childCells = childAddressCells + childSizeCells; } while( ok && regEntry); diff --git a/iokit/Kernel/IOHibernateIO.cpp b/iokit/Kernel/IOHibernateIO.cpp index 034ca65ca..83ab5e703 100644 --- a/iokit/Kernel/IOHibernateIO.cpp +++ b/iokit/Kernel/IOHibernateIO.cpp @@ -165,18 +165,22 @@ to restrict I/O ops. #include #include #include "IOHibernateInternal.h" -#include +#include #include "IOKitKernelInternal.h" #include #include #include +#include extern "C" addr64_t kvtophys(vm_offset_t va); extern "C" ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +#define DISABLE_TRIM 0 +#define TRIM_DELAY 5000 + extern unsigned int save_kdebug_enable; extern uint32_t gIOHibernateState; uint32_t gIOHibernateMode; @@ -206,6 +210,8 @@ static struct kern_direct_file_io_ref_t * gIOHibernateFileRef; static hibernate_cryptvars_t gIOHibernateCryptWakeContext; static hibernate_graphics_t _hibernateGraphics; static hibernate_graphics_t * gIOHibernateGraphicsInfo = &_hibernateGraphics; +static hibernate_statistics_t _hibernateStats; +static hibernate_statistics_t * gIOHibernateStats = &_hibernateStats; enum { @@ -560,6 +566,8 @@ file_extent_callback(void * ref, uint64_t start, uint64_t length) extent.start = start; extent.length = length; + HIBLOG("[0x%qx, 0x%qx]\n", start, length); + ctx->extents->appendBytes(&extent, sizeof(extent)); ctx->size += length; } @@ -629,10 +637,6 @@ IOPolledFileOpen( const char * filename, uint64_t setFileSize, do { - HIBLOG("sizeof(IOHibernateImageHeader) == %ld\n", sizeof(IOHibernateImageHeader)); - if (sizeof(IOHibernateImageHeader) != 512) - continue; - vars->io = false; vars->buffer = (uint8_t *) ioBuffer->getBytesNoCopy(); vars->bufferHalf = 0; @@ -685,15 +689,16 @@ IOPolledFileOpen( const char * filename, uint64_t setFileSize, } vars->fileSize = ctx.size; - if (maxiobytes < vars->bufferSize) - vars->bufferSize = maxiobytes; + if (maxiobytes < vars->bufferSize) vars->bufferSize = maxiobytes; vars->extentMap = (IOPolledFileExtent *) extentsData->getBytesNoCopy(); part = IOCopyMediaForDev(block_dev); if (!part) + { + err = kIOReturnNotFound; break; - + } err = part->callPlatformFunction(PLATFORM_FUNCTION_GET_MEDIA_ENCRYPTION_KEY_UUID, false, (void *) &keyUUID, (void *) &keyStoreUUID, NULL, NULL); if ((kIOReturnSuccess == err) && keyUUID && keyStoreUUID) @@ -729,7 +734,10 @@ IOPolledFileOpen( const char * filename, uint64_t setFileSize, part = IOCopyMediaForDev(hibernate_image_dev); if (!part) + { + err = kIOReturnNotFound; break; + } IORegistryEntry * next; IORegistryEntry * child; @@ -737,7 +745,10 @@ IOPolledFileOpen( const char * filename, uint64_t setFileSize, vars->pollers = OSArray::withCapacity(4); if (!vars->pollers) - break; + { + err = kIOReturnNoMemory; + break; + } vars->blockSize = 512; next = part; @@ -761,18 +772,27 @@ IOPolledFileOpen( const char * filename, uint64_t setFileSize, while ((next = child->getParentEntry(gIOServicePlane)) && child->isParent(next, gIOServicePlane, true)); + if (vars->blockSize < 4096) vars->blockSize = 4096; + HIBLOG("hibernate image major %d, minor %d, blocksize %ld, pollers %d\n", major(hibernate_image_dev), minor(hibernate_image_dev), (long)vars->blockSize, vars->pollers->getCount()); + if (vars->pollers->getCount() < kIOHibernateMinPollersNeeded) + { + err = kIOReturnUnsupported; continue; + } + if (vars->blockSize < sizeof(IOHibernateImageHeader)) + { + err = kIOReturnError; + continue; + } err = IOHibernatePollerProbe(vars, (IOService *) part); - if (kIOReturnSuccess != err) - break; + if (kIOReturnSuccess != err) break; err = IOHibernatePollerOpen(vars, kIOPolledPreflightState, ioBuffer); - if (kIOReturnSuccess != err) - break; + if (kIOReturnSuccess != err) break; vars->media = part; next = part; @@ -1133,7 +1153,7 @@ IOHibernateSystemSleep(void) OSObject * obj; OSString * str; OSNumber * num; - bool dsSSD; + bool dsSSD, vmflush; IOHibernateVars * vars; gIOHibernateState = kIOHibernateStateInactive; @@ -1185,7 +1205,7 @@ IOHibernateSystemSleep(void) do { vars->srcBuffer = IOBufferMemoryDescriptor::withOptions(kIODirectionOutIn, - 4 * page_size, page_size); + 2 * page_size + WKdm_SCRATCH_BUF_SIZE, page_size); vars->ioBuffer = IOBufferMemoryDescriptor::withOptions(kIODirectionOutIn, 2 * kDefaultIOSize, page_size); @@ -1218,18 +1238,13 @@ IOHibernateSystemSleep(void) gIOHibernateCurrentHeader->debugFlags = gIOHibernateDebugFlags; gIOHibernateCurrentHeader->signature = kIOHibernateHeaderInvalidSignature; - dsSSD = (kOSBooleanTrue == IOService::getPMRootDomain()->getProperty(kIOPMDeepSleepEnabledKey)); - clock_get_uptime(&startTime); - err = hibernate_setup(gIOHibernateCurrentHeader, - gIOHibernateFreeRatio, gIOHibernateFreeTime, - dsSSD, - &vars->page_list, &vars->page_list_wired, &vars->page_list_pal); - clock_get_uptime(&endTime); - SUB_ABSOLUTETIME(&endTime, &startTime); - absolutetime_to_nanoseconds(endTime, &nsec); - HIBLOG("hibernate_setup(%d) took %qd ms\n", err, nsec / 1000000ULL); - + vmflush = (kOSBooleanTrue == IOService::getPMRootDomain()->getProperty(kIOPMDeepSleepEnabledKey)); uint64_t setFileSize = 0; + err = hibernate_alloc_page_lists(&vars->page_list, + &vars->page_list_wired, + &vars->page_list_pal); + if (KERN_SUCCESS != err) + break; if (vars->fileMinSize || (kIOHibernateModeFileResize & gIOHibernateMode)) { @@ -1237,6 +1252,7 @@ IOHibernateSystemSleep(void) vars->page_list_wired, vars->page_list_pal, true /* preflight */, + vmflush /* discard */, &pageCount); PE_Video consoleInfo; bzero(&consoleInfo, sizeof(consoleInfo)); @@ -1270,6 +1286,16 @@ IOHibernateSystemSleep(void) break; } + clock_get_uptime(&startTime); + err = hibernate_setup(gIOHibernateCurrentHeader, + gIOHibernateFreeRatio, gIOHibernateFreeTime, + vmflush, + vars->page_list, vars->page_list_wired, vars->page_list_pal); + clock_get_uptime(&endTime); + SUB_ABSOLUTETIME(&endTime, &startTime); + absolutetime_to_nanoseconds(endTime, &nsec); + HIBLOG("hibernate_setup(%d) took %qd ms\n", err, nsec / 1000000ULL); + dsSSD = ((0 != (kIOHibernateOptionSSD & vars->fileVars->flags)) && (kOSBooleanTrue == IOService::getPMRootDomain()->getProperty(kIOPMDeepSleepEnabledKey))); if (dsSSD) @@ -1871,7 +1897,7 @@ IOHibernateDone(IOHibernateVars * vars) else gIOOptionsEntry->removeProperty(gIOHibernateBootNextKey); } - gIOOptionsEntry->sync(); + if (kIOHibernateStateWakingFromHibernate != gIOHibernateState) gIOOptionsEntry->sync(); } #endif @@ -1945,11 +1971,16 @@ IOHibernateSystemPostWake(void) if ((fileRef = gIOHibernateFileRef)) { gIOHibernateFileRef = 0; + IOSleep(TRIM_DELAY); kern_close_file_for_direct_io(fileRef, +#if DISABLE_TRIM + 0, 0, 0, 0, 0); +#else 0, (caddr_t) gIOHibernateCurrentHeader, sizeof(IOHibernateImageHeader), - sizeof(IOHibernateImageHeader), + 0, gIOHibernateCurrentHeader->imageSize); +#endif } gFSState = kFSIdle; } @@ -1990,6 +2021,23 @@ SYSCTL_STRING(_kern, OID_AUTO, bootsignature, SYSCTL_UINT(_kern, OID_AUTO, hibernatemode, CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, &gIOHibernateMode, 0, ""); +SYSCTL_STRUCT(_kern, OID_AUTO, hibernatestatistics, + CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, + gIOHibernateStats, hibernate_statistics_t, ""); + +SYSCTL_UINT(_kern, OID_AUTO, hibernategraphicsready, + CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_ANYBODY, + &gIOHibernateStats->graphicsReadyTime, 0, ""); +SYSCTL_UINT(_kern, OID_AUTO, hibernatewakenotification, + CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_ANYBODY, + &gIOHibernateStats->wakeNotificationTime, 0, ""); +SYSCTL_UINT(_kern, OID_AUTO, hibernatelockscreenready, + CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_ANYBODY, + &gIOHibernateStats->lockScreenReadyTime, 0, ""); +SYSCTL_UINT(_kern, OID_AUTO, hibernatehidready, + CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_ANYBODY, + &gIOHibernateStats->hidReadyTime, 0, ""); + void IOHibernateSystemInit(IOPMrootDomain * rootDomain) @@ -2009,6 +2057,11 @@ IOHibernateSystemInit(IOPMrootDomain * rootDomain) sysctl_register_oid(&sysctl__kern_hibernatefile); sysctl_register_oid(&sysctl__kern_bootsignature); sysctl_register_oid(&sysctl__kern_hibernatemode); + sysctl_register_oid(&sysctl__kern_hibernatestatistics); + sysctl_register_oid(&sysctl__kern_hibernategraphicsready); + sysctl_register_oid(&sysctl__kern_hibernatewakenotification); + sysctl_register_oid(&sysctl__kern_hibernatelockscreenready); + sysctl_register_oid(&sysctl__kern_hibernatehidready); gFSLock = IOLockAlloc(); } @@ -2035,10 +2088,6 @@ no_encrypt_page(vm_offset_t ppnum) return false; } -uint32_t wired_pages_encrypted = 0; -uint32_t dirty_pages_encrypted = 0; -uint32_t wired_pages_clear = 0; - static void hibernate_pal_callback(void *vars_arg, vm_offset_t addr) { @@ -2082,12 +2131,16 @@ hibernate_write_image(void) IOItemCount count; uint8_t * src; uint8_t * data; - IOByteCount pageCompressedSize; + uint8_t * compressed; + uint8_t * scratch; + void * zerosCompressed; + IOByteCount pageCompressedSize, zerosCompressedLen; uint64_t compressedSize, uncompressedSize; uint64_t image1Size = 0; uint32_t bitmap_size; bool iterDone, pollerOpen, needEncrypt; uint32_t restore1Sum, sum, sum1, sum2; + int wkresult; uint32_t tag; uint32_t pageType; uint32_t pageAndCount[2]; @@ -2102,12 +2155,18 @@ hibernate_write_image(void) uint32_t progressStamp; uint32_t blob, lastBlob = (uint32_t) -1L; + uint32_t wiredPagesEncrypted; + uint32_t dirtyPagesEncrypted; + uint32_t wiredPagesClear; + uint32_t zeroPageCount; + hibernate_cryptvars_t _cryptvars; hibernate_cryptvars_t * cryptvars = 0; - wired_pages_encrypted = 0; - dirty_pages_encrypted = 0; - wired_pages_clear = 0; + wiredPagesEncrypted = 0; + dirtyPagesEncrypted = 0; + wiredPagesClear = 0; + zeroPageCount = 0; if (!vars->fileVars || !vars->fileVars->pollers || !vars->fileExtents) return (false /* sleep */ ); @@ -2162,6 +2221,9 @@ hibernate_write_image(void) vars->page_list_wired, vars->page_list_pal, false /* !preflight */, + /* discard_all */ + ((0 == (kIOHibernateModeSleep & gIOHibernateMode)) + && (0 != ((kIOHibernateModeDiscardCleanActive | kIOHibernateModeDiscardCleanInactive) & gIOHibernateMode))), &pageCount); HIBLOG("hibernate_page_list_setall found pageCount %d\n", pageCount); @@ -2185,13 +2247,13 @@ hibernate_write_image(void) clock_get_uptime(&allTime); IOService::getPMRootDomain()->pmStatsRecordEvent( kIOPMStatsHibernateImageWrite | kIOPMStatsEventStartFlag, allTime); - do { compressedSize = 0; uncompressedSize = 0; + zeroPageCount = 0; - IOPolledFileSeek(vars->fileVars, sizeof(IOHibernateImageHeader)); + IOPolledFileSeek(vars->fileVars, vars->fileVars->blockSize); HIBLOG("IOHibernatePollerOpen, ml_get_interrupts_enabled %d\n", ml_get_interrupts_enabled()); @@ -2277,9 +2339,12 @@ hibernate_write_image(void) break; } - vars->fileVars->encryptStart = (vars->fileVars->position & ~(AES_BLOCK_SIZE - 1)); - vars->fileVars->encryptEnd = UINT64_MAX; - HIBLOG("encryptStart %qx\n", vars->fileVars->encryptStart); + if (kIOHibernateModeEncrypt & gIOHibernateMode) + { + vars->fileVars->encryptStart = (vars->fileVars->position & ~(AES_BLOCK_SIZE - 1)); + vars->fileVars->encryptEnd = UINT64_MAX; + HIBLOG("encryptStart %qx\n", vars->fileVars->encryptStart); + } // write the preview buffer @@ -2389,7 +2454,17 @@ hibernate_write_image(void) (void)hibernate_pal_callback; src = (uint8_t *) vars->srcBuffer->getBytesNoCopy(); - + compressed = src + page_size; + scratch = compressed + page_size; + + // compress a zero page + bzero(src, page_size); + zerosCompressed = vars->handoffBuffer->getBytesNoCopy(); + zerosCompressedLen = WKdm_compress_new((WK_word*) src, + (WK_word*) zerosCompressed, + (WK_word*) scratch, + page_size - 4); + pagesDone = 0; lastBlob = 0; @@ -2412,9 +2487,12 @@ hibernate_write_image(void) if (kUnwiredEncrypt == pageType) { // start unwired image - vars->fileVars->encryptStart = (vars->fileVars->position & ~(((uint64_t)AES_BLOCK_SIZE) - 1)); - vars->fileVars->encryptEnd = UINT64_MAX; - HIBLOG("encryptStart %qx\n", vars->fileVars->encryptStart); + if (kIOHibernateModeEncrypt & gIOHibernateMode) + { + vars->fileVars->encryptStart = (vars->fileVars->position & ~(((uint64_t)AES_BLOCK_SIZE) - 1)); + vars->fileVars->encryptEnd = UINT64_MAX; + HIBLOG("encryptStart %qx\n", vars->fileVars->encryptStart); + } bcopy(&cryptvars->aes_iv[0], &gIOHibernateCryptWakeContext.aes_iv[0], sizeof(cryptvars->aes_iv)); @@ -2446,9 +2524,9 @@ hibernate_write_image(void) switch (pageType) { - case kWiredEncrypt: wired_pages_encrypted += count; break; - case kWiredClear: wired_pages_clear += count; break; - case kUnwiredEncrypt: dirty_pages_encrypted += count; break; + case kWiredEncrypt: wiredPagesEncrypted += count; break; + case kWiredClear: wiredPagesClear += count; break; + case kUnwiredEncrypt: dirtyPagesEncrypted += count; break; } if (iterDone && (kWiredEncrypt == pageType)) {/* not yet end of wired list */} @@ -2479,25 +2557,30 @@ hibernate_write_image(void) sum2 += sum; clock_get_uptime(&startTime); + wkresult = WKdm_compress_new((WK_word*) src, + (WK_word*) compressed, + (WK_word*) scratch, + page_size - 4); - pageCompressedSize = WKdm_compress ((WK_word*) src, (WK_word*) (src + page_size), PAGE_SIZE_IN_WORDS); - clock_get_uptime(&endTime); ADD_ABSOLUTETIME(&compTime, &endTime); SUB_ABSOLUTETIME(&compTime, &startTime); + compBytes += page_size; - + pageCompressedSize = (-1 == wkresult) ? page_size : wkresult; + + if ((pageCompressedSize == zerosCompressedLen) + && !bcmp(compressed, zerosCompressed, zerosCompressedLen)) + { + pageCompressedSize = 0; + zeroPageCount++; + } + if (kIOHibernateModeEncrypt & gIOHibernateMode) pageCompressedSize = (pageCompressedSize + AES_BLOCK_SIZE - 1) & ~(AES_BLOCK_SIZE - 1); - - if (pageCompressedSize > page_size) - { -// HIBLOG("------------lose: %d\n", pageCompressedSize); - pageCompressedSize = page_size; - } if (pageCompressedSize != page_size) - data = (src + page_size); + data = compressed; else data = src; @@ -2511,8 +2594,7 @@ hibernate_write_image(void) break; compressedSize += pageCompressedSize; - if (pageCompressedSize) - uncompressedSize += page_size; + uncompressedSize += page_size; pagesDone++; if (vars->consoleMapping && (0 == (1023 & pagesDone))) @@ -2545,7 +2627,7 @@ hibernate_write_image(void) if (kIOReturnSuccess != err) break; - if ((kEncrypt & pageType)) + if ((kEncrypt & pageType) && vars->fileVars->encryptStart) { vars->fileVars->encryptEnd = ((vars->fileVars->position + 511) & ~511ULL); HIBLOG("encryptEnd %qx\n", vars->fileVars->encryptEnd); @@ -2599,7 +2681,8 @@ hibernate_write_image(void) header->fileExtentMapSize = sizeof(header->fileExtentMap); bcopy(&fileExtents[0], &header->fileExtentMap[0], count); - header->deviceBase = vars->fileVars->block0; + header->deviceBase = vars->fileVars->block0; + header->deviceBlockSize = vars->fileVars->blockSize; IOPolledFileSeek(vars->fileVars, 0); err = IOPolledFileWrite(vars->fileVars, @@ -2623,8 +2706,7 @@ hibernate_write_image(void) SUB_ABSOLUTETIME(&endTime, &allTime); absolutetime_to_nanoseconds(endTime, &nsec); - HIBLOG("all time: %qd ms, ", - nsec / 1000000ULL); + HIBLOG("all time: %qd ms, ", nsec / 1000000ULL); absolutetime_to_nanoseconds(compTime, &nsec); HIBLOG("comp bytes: %qd time: %qd ms %qd Mb/s, ", @@ -2644,8 +2726,8 @@ hibernate_write_image(void) uncompressedSize ? ((int) ((compressedSize * 100ULL) / uncompressedSize)) : 0, sum1, sum2); - HIBLOG("wired_pages_encrypted %d, wired_pages_clear %d, dirty_pages_encrypted %d\n", - wired_pages_encrypted, wired_pages_clear, dirty_pages_encrypted); + HIBLOG("zeroPageCount %d, wiredPagesEncrypted %d, wiredPagesClear %d, dirtyPagesEncrypted %d\n", + zeroPageCount, wiredPagesEncrypted, wiredPagesClear, dirtyPagesEncrypted); if (vars->fileVars->io) (void) IOHibernatePollerIODone(vars->fileVars, false); @@ -2663,7 +2745,7 @@ hibernate_write_image(void) gIOHibernateState = kIOHibernateStateInactive; KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 1) | DBG_FUNC_END, - wired_pages_encrypted, wired_pages_clear, dirty_pages_encrypted, 0, 0); + wiredPagesEncrypted, wiredPagesClear, dirtyPagesEncrypted, 0, 0); if (kIOReturnSuccess == err) { @@ -2703,13 +2785,15 @@ hibernate_machine_init(void) uint32_t pagesRead = 0; AbsoluteTime startTime, compTime; AbsoluteTime allTime, endTime; + AbsoluteTime startIOTime, endIOTime; + uint64_t nsec, nsecIO; uint64_t compBytes; - uint64_t nsec; uint32_t lastProgressStamp = 0; uint32_t progressStamp; hibernate_cryptvars_t * cryptvars = 0; IOHibernateVars * vars = &gIOHibernateVars; + bzero(gIOHibernateStats, sizeof(hibernate_statistics_t)); if (!vars->fileVars || !vars->fileVars->pollers || !vars->fileExtents) return; @@ -2717,10 +2801,6 @@ hibernate_machine_init(void) sum = gIOHibernateCurrentHeader->actualImage1Sum; pagesDone = gIOHibernateCurrentHeader->actualUncompressedPages; - HIBLOG("hibernate_machine_init: state %d, image pages %d, sum was %x, image1Size %qx, conflictCount %d, nextFree %x\n", - gIOHibernateState, pagesDone, sum, gIOHibernateCurrentHeader->image1Size, - gIOHibernateCurrentHeader->conflictCount, gIOHibernateCurrentHeader->nextFree); - if (kIOHibernateStateWakingFromHibernate != gIOHibernateState) { HIBLOG("regular wake\n"); @@ -2731,14 +2811,42 @@ hibernate_machine_init(void) gIOHibernateCurrentHeader->diag[0], gIOHibernateCurrentHeader->diag[1], gIOHibernateCurrentHeader->diag[2], gIOHibernateCurrentHeader->diag[3]); - HIBLOG("restore times %qd, %qd, %qd ms, tsc 0x%qx scale 0x%x\n", - (((gIOHibernateCurrentHeader->restoreTime1 * pal_rtc_nanotime_info.scale) >> 32) / 1000000), - (((gIOHibernateCurrentHeader->restoreTime2 * pal_rtc_nanotime_info.scale) >> 32) / 1000000), - (((gIOHibernateCurrentHeader->restoreTime3 * pal_rtc_nanotime_info.scale) >> 32) / 1000000), - gIOHibernateCurrentHeader->restoreTime1, pal_rtc_nanotime_info.scale); +#define t40ms(x) (tmrCvt((((uint64_t)(x)) << 8), tscFCvtt2n) / 1000000) +#define tStat(x, y) gIOHibernateStats->x = t40ms(gIOHibernateCurrentHeader->y); + tStat(booterStart, booterStart); + gIOHibernateStats->smcStart = gIOHibernateCurrentHeader->smcStart, + tStat(booterDuration0, booterTime0); + tStat(booterDuration1, booterTime1); + tStat(booterDuration2, booterTime2); + tStat(booterDuration, booterTime); + tStat(booterConnectDisplayDuration, connectDisplayTime); + tStat(booterSplashDuration, splashTime); + tStat(trampolineDuration, trampolineTime); + + gIOHibernateStats->image1Size = gIOHibernateCurrentHeader->image1Size; + gIOHibernateStats->imageSize = gIOHibernateCurrentHeader->imageSize; + gIOHibernateStats->image1Pages = pagesDone; + + HIBLOG("booter start at %d ms smc %d ms, [%d, %d, %d] total %d ms, dsply %d, %d ms, tramp %d ms\n", + gIOHibernateStats->booterStart, + gIOHibernateStats->smcStart, + gIOHibernateStats->booterDuration0, + gIOHibernateStats->booterDuration1, + gIOHibernateStats->booterDuration2, + gIOHibernateStats->booterDuration, + gIOHibernateStats->booterConnectDisplayDuration, + gIOHibernateStats->booterSplashDuration, + gIOHibernateStats->trampolineDuration); + + HIBLOG("hibernate_machine_init: state %d, image pages %d, sum was %x, imageSize 0x%qx, image1Size 0x%qx, conflictCount %d, nextFree %x\n", + gIOHibernateState, pagesDone, sum, gIOHibernateStats->imageSize, gIOHibernateStats->image1Size, + gIOHibernateCurrentHeader->conflictCount, gIOHibernateCurrentHeader->nextFree); - if ((kIOHibernateModeDiscardCleanActive | kIOHibernateModeDiscardCleanInactive) & gIOHibernateMode) + if ((0 != (kIOHibernateModeSleep & gIOHibernateMode)) + && (0 != ((kIOHibernateModeDiscardCleanActive | kIOHibernateModeDiscardCleanInactive) & gIOHibernateMode))) + { hibernate_page_list_discard(vars->page_list); + } cryptvars = (kIOHibernateModeEncrypt & gIOHibernateMode) ? &gIOHibernateCryptWakeContext : 0; @@ -2825,7 +2933,9 @@ hibernate_machine_init(void) (uint8_t *) vars->videoMapping, 0, kIOHibernateProgressCount); uint8_t * src = (uint8_t *) vars->srcBuffer->getBytesNoCopy(); - uint32_t decoOffset; + uint8_t * compressed = src + page_size; + uint8_t * scratch = compressed + page_size; + uint32_t decoOffset; clock_get_uptime(&allTime); AbsoluteTime_to_scalar(&compTime) = 0; @@ -2833,7 +2943,11 @@ hibernate_machine_init(void) HIBLOG("IOHibernatePollerOpen(), ml_get_interrupts_enabled %d\n", ml_get_interrupts_enabled()); err = IOHibernatePollerOpen(vars->fileVars, kIOPolledAfterSleepState, 0); - HIBLOG("IOHibernatePollerOpen(%x)\n", err); + clock_get_uptime(&startIOTime); + endTime = startIOTime; + SUB_ABSOLUTETIME(&endTime, &allTime); + absolutetime_to_nanoseconds(endTime, &nsec); + HIBLOG("IOHibernatePollerOpen(%x) %qd ms\n", err, nsec / 1000000ULL); IOPolledFileSeek(vars->fileVars, gIOHibernateCurrentHeader->image1Size); @@ -2888,39 +3002,30 @@ hibernate_machine_init(void) break; } - if (!compressedSize) - { - ppnum++; - pagesDone++; - continue; - } - - err = IOPolledFileRead(vars->fileVars, src, (compressedSize + 3) & ~3, cryptvars); - if (kIOReturnSuccess != err) - break; - - if (compressedSize < page_size) - { - decoOffset = page_size; - - clock_get_uptime(&startTime); - WKdm_decompress((WK_word*) src, (WK_word*) (src + decoOffset), PAGE_SIZE_IN_WORDS); - clock_get_uptime(&endTime); - ADD_ABSOLUTETIME(&compTime, &endTime); - SUB_ABSOLUTETIME(&compTime, &startTime); - - compBytes += page_size; - } + if (!compressedSize) bzero_phys(ptoa_64(ppnum), page_size); else - decoOffset = 0; - - sum += hibernate_sum_page((src + decoOffset), ppnum); - - err = IOMemoryDescriptorReadToPhysical(vars->srcBuffer, decoOffset, ptoa_64(ppnum), page_size); - if (err) { - HIBLOG("IOMemoryDescriptorReadToPhysical [%ld] %x\n", (long)ppnum, err); - break; + err = IOPolledFileRead(vars->fileVars, src, (compressedSize + 3) & ~3, cryptvars); + if (kIOReturnSuccess != err) break; + if (compressedSize < page_size) + { + decoOffset = page_size; + clock_get_uptime(&startTime); + WKdm_decompress_new((WK_word*) src, (WK_word*) compressed, (WK_word*) scratch, page_size); + clock_get_uptime(&endTime); + ADD_ABSOLUTETIME(&compTime, &endTime); + SUB_ABSOLUTETIME(&compTime, &startTime); + compBytes += page_size; + } + else decoOffset = 0; + + sum += hibernate_sum_page((src + decoOffset), ppnum); + err = IOMemoryDescriptorReadToPhysical(vars->srcBuffer, decoOffset, ptoa_64(ppnum), page_size); + if (err) + { + HIBLOG("IOMemoryDescriptorReadToPhysical [%ld] %x\n", (long)ppnum, err); + break; + } } ppnum++; @@ -2954,6 +3059,8 @@ hibernate_machine_init(void) if (vars->fileVars->io) (void) IOHibernatePollerIODone(vars->fileVars, false); + clock_get_uptime(&endIOTime); + err = IOHibernatePollerClose(vars->fileVars, kIOPolledAfterSleepState); clock_get_uptime(&endTime); @@ -2966,8 +3073,15 @@ hibernate_machine_init(void) SUB_ABSOLUTETIME(&endTime, &allTime); absolutetime_to_nanoseconds(endTime, &nsec); - HIBLOG("hibernate_machine_init pagesDone %d sum2 %x, time: %qd ms, ", - pagesDone, sum, nsec / 1000000ULL); + SUB_ABSOLUTETIME(&endIOTime, &startIOTime); + absolutetime_to_nanoseconds(endIOTime, &nsecIO); + + gIOHibernateStats->kernelImageReadDuration = nsec / 1000000ULL; + gIOHibernateStats->imagePages = pagesDone; + + HIBLOG("hibernate_machine_init pagesDone %d sum2 %x, time: %d ms, disk(0x%x) %qd Mb/s, ", + pagesDone, sum, gIOHibernateStats->kernelImageReadDuration, kDefaultIOSize, + nsecIO ? ((((gIOHibernateCurrentHeader->imageSize - gIOHibernateCurrentHeader->image1Size) * 1000000000ULL) / 1024 / 1024) / nsecIO) : 0); absolutetime_to_nanoseconds(compTime, &nsec); HIBLOG("comp bytes: %qd time: %qd ms %qd Mb/s, ", @@ -2986,6 +3100,16 @@ hibernate_machine_init(void) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +void IOHibernateSetWakeCapabilities(uint32_t capability) +{ + if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) + { + gIOHibernateStats->wakeCapability = capability; + } +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + void IOHibernateSystemRestart(void) { static uint8_t noteStore[32] __attribute__((aligned(32))); diff --git a/iokit/Kernel/IOHibernateRestoreKernel.c b/iokit/Kernel/IOHibernateRestoreKernel.c index b45b2acd3..daf5d2804 100644 --- a/iokit/Kernel/IOHibernateRestoreKernel.c +++ b/iokit/Kernel/IOHibernateRestoreKernel.c @@ -34,7 +34,7 @@ #include #include -#include +#include #include "IOHibernateInternal.h" #include @@ -396,11 +396,13 @@ store_one_page(uint32_t procFlags, uint32_t * src, uint32_t compressedSize, uint32_t * buffer, uint32_t ppnum) { uint64_t dst = ptoa_64(ppnum); + uint8_t scratch[WKdm_SCRATCH_BUF_SIZE] __attribute__ ((aligned (16))); if (compressedSize != PAGE_SIZE) { dst = pal_hib_map(DEST_COPY_AREA, dst); - WKdm_decompress((WK_word*) src, (WK_word*)(uintptr_t)dst, PAGE_SIZE >> 2); + if (compressedSize) WKdm_decompress_new((WK_word*) src, (WK_word*)(uintptr_t)dst, (WK_word*) &scratch[0], PAGE_SIZE); + else bzero((void *) dst, PAGE_SIZE); } else { @@ -410,21 +412,6 @@ store_one_page(uint32_t procFlags, uint32_t * src, uint32_t compressedSize, return hibernate_sum_page((uint8_t *)(uintptr_t)dst, ppnum); } -// used only for small struct copies -static void -bcopy_internal(const void *src, void *dst, uint32_t len) -{ - const char *s = src; - char *d = dst; - uint32_t idx = 0; - - while (idx < len) - { - d[idx] = s[idx]; - idx++; - } -} - #define C_ASSERT(e) typedef char __C_ASSERT__[(e) ? 1 : -1] long @@ -436,7 +423,6 @@ hibernate_kernel_entrypoint(uint32_t p1, uint64_t srcPhys; uint64_t imageReadPhys; uint64_t pageIndexPhys; - uint32_t idx; uint32_t * pageIndexSource; hibernate_page_list_t * map; uint32_t stage; @@ -460,7 +446,7 @@ hibernate_kernel_entrypoint(uint32_t p1, uint32_t handoffPages; uint32_t handoffPageCount; - uint64_t timeStart, time; + uint64_t timeStart; timeStart = rdtsc64(); C_ASSERT(sizeof(IOHibernateImageHeader) == 512); @@ -472,9 +458,9 @@ hibernate_kernel_entrypoint(uint32_t p1, debug_code(kIOHibernateRestoreCodeImageStart, headerPhys); - bcopy_internal((void *) pal_hib_map(IMAGE_AREA, headerPhys), - gIOHibernateCurrentHeader, - sizeof(IOHibernateImageHeader)); + memcpy(gIOHibernateCurrentHeader, + (void *) pal_hib_map(IMAGE_AREA, headerPhys), + sizeof(IOHibernateImageHeader)); debug_code(kIOHibernateRestoreCodeSignature, gIOHibernateCurrentHeader->signature); @@ -517,6 +503,7 @@ hibernate_kernel_entrypoint(uint32_t p1, sum = gIOHibernateCurrentHeader->actualRestore1Sum; gIOHibernateCurrentHeader->diag[0] = atop_64(headerPhys); gIOHibernateCurrentHeader->diag[1] = sum; + gIOHibernateCurrentHeader->trampolineTime = 0; uncompressedPages = 0; conflictCount = 0; @@ -625,28 +612,27 @@ hibernate_kernel_entrypoint(uint32_t p1, if (!conflicts) { -// if (compressedSize) - time = rdtsc64(); pageSum = store_one_page(gIOHibernateCurrentHeader->processorFlags, src, compressedSize, 0, ppnum); - gIOHibernateCurrentHeader->restoreTime2 += (rdtsc64() - time); if (stage != 2) sum += pageSum; uncompressedPages++; } else { - uint32_t bufferPage; + uint32_t bufferPage = 0; uint32_t * dst; // debug_code(kIOHibernateRestoreCodeConflictPage, ppnum); // debug_code(kIOHibernateRestoreCodeConflictSource, (uintptr_t) src); - conflictCount++; - - // alloc new buffer page - bufferPage = hibernate_page_list_grab(map, &nextFree); - + if (compressedSize) + { + // alloc new buffer page + bufferPage = hibernate_page_list_grab(map, &nextFree); + dst = (uint32_t *)pal_hib_map(DEST_COPY_AREA, ptoa_64(bufferPage)); + memcpy(dst, src, compressedSize); + } if (copyPageIndex > ((PAGE_SIZE >> 2) - 3)) { // alloc new copy list page @@ -662,15 +648,10 @@ hibernate_kernel_entrypoint(uint32_t p1, copyPageList[1] = 0; copyPageIndex = 2; } - copyPageList[copyPageIndex++] = ppnum; copyPageList[copyPageIndex++] = bufferPage; copyPageList[copyPageIndex++] = (compressedSize | (stage << 24)); copyPageList[0] = copyPageIndex; - - dst = (uint32_t *)pal_hib_map(DEST_COPY_AREA, ptoa_64(bufferPage)); - for (idx = 0; idx < ((compressedSize + 3) >> 2); idx++) - dst[idx] = src[idx]; } srcPhys += ((compressedSize + 3) & ~3); src += ((compressedSize + 3) >> 2); @@ -682,8 +663,6 @@ hibernate_kernel_entrypoint(uint32_t p1, // -- copy back conflicts - time = rdtsc64(); - pageListPage = copyPageListHeadPage; while (pageListPage) { @@ -707,8 +686,6 @@ hibernate_kernel_entrypoint(uint32_t p1, pal_hib_patchup(); - gIOHibernateCurrentHeader->restoreTime3 = (rdtsc64() - time); - // -- image has been destroyed... gIOHibernateCurrentHeader->actualImage1Sum = sum; @@ -718,7 +695,7 @@ hibernate_kernel_entrypoint(uint32_t p1, gIOHibernateState = kIOHibernateStateWakingFromHibernate; - gIOHibernateCurrentHeader->restoreTime1 = (rdtsc64() - timeStart); + gIOHibernateCurrentHeader->trampolineTime = (((rdtsc64() - timeStart)) >> 8); #if CONFIG_SLEEP #if defined(__i386__) || defined(__x86_64__) diff --git a/iokit/Kernel/IOInterruptController.cpp b/iokit/Kernel/IOInterruptController.cpp index 1000178ad..95287f584 100644 --- a/iokit/Kernel/IOInterruptController.cpp +++ b/iokit/Kernel/IOInterruptController.cpp @@ -1,5 +1,6 @@ /* - * Copyright (c) 1998-2010 Apple Inc. All rights reserved. + * Copyright (c) 2007-2012 Apple Inc. All rights reserved. + * Copyright (c) 1998-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -290,7 +291,10 @@ IOReturn IOInterruptController::enableInterrupt(IOService *nub, int source) if (vector->interruptDisabledSoft) { vector->interruptDisabledSoft = 0; - +#if !defined(__i386__) && !defined(__x86_64__) + OSMemoryBarrier(); +#endif + if (!getPlatform()->atInterruptLevel()) { while (vector->interruptActive) {} @@ -318,6 +322,9 @@ IOReturn IOInterruptController::disableInterrupt(IOService *nub, int source) vector = &vectors[vectorNumber]; vector->interruptDisabledSoft = 1; +#if !defined(__i386__) && !defined(__x86_64__) + OSMemoryBarrier(); +#endif if (!getPlatform()->atInterruptLevel()) { while (vector->interruptActive) @@ -411,6 +418,8 @@ IOReturn IOSharedInterruptController::initInterruptController(IOInterruptControl { int cnt, interruptType; IOReturn error; + + reserved = NULL; if (!super::init()) return kIOReturnNoResources; @@ -644,6 +653,10 @@ IOReturn IOSharedInterruptController::disableInterrupt(IOService *nub, interruptState = IOSimpleLockLockDisableInterrupt(controllerLock); if (!vector->interruptDisabledSoft) { vector->interruptDisabledSoft = 1; +#if !defined(__i386__) && !defined(__x86_64__) + OSMemoryBarrier(); +#endif + vectorsEnabled--; } IOSimpleLockUnlockEnableInterrupt(controllerLock, interruptState); @@ -673,6 +686,10 @@ IOReturn IOSharedInterruptController::handleInterrupt(void * /*refCon*/, vector = &vectors[vectorNumber]; vector->interruptActive = 1; +#if !defined(__i386__) && !defined(__x86_64__) + OSMemoryBarrier(); +#endif + if (!vector->interruptDisabledSoft) { // Call the handler if it exists. diff --git a/iokit/Kernel/IOKitKernelInternal.h b/iokit/Kernel/IOKitKernelInternal.h index fc4f31b5f..d15549f89 100644 --- a/iokit/Kernel/IOKitKernelInternal.h +++ b/iokit/Kernel/IOKitKernelInternal.h @@ -142,6 +142,7 @@ struct IOMDDMAMapArgs { uint64_t fLength; uint64_t fAlloc; ppnum_t fAllocCount; + uint8_t fMapContig; }; struct IODMACommandInternal @@ -180,6 +181,7 @@ struct IODMACommandInternal // IODMAEventSource use IOReturn fStatus; UInt64 fActualByteCount; + AbsoluteTime fTimeStamp; }; struct IOMemoryDescriptorDevicePager { @@ -196,6 +198,35 @@ struct IOMemoryDescriptorReserved { uint64_t kernReserved[4]; }; +struct iopa_t +{ + IOLock * lock; + queue_head_t list; + vm_size_t pagecount; + vm_size_t bytecount; +}; + +struct iopa_page_t +{ + queue_chain_t link; + uint64_t avail; + uint32_t signature; +}; +typedef struct iopa_page_t iopa_page_t; + +typedef uintptr_t (*iopa_proc_t)(iopa_t * a); + +enum +{ + kIOPageAllocChunkBytes = (PAGE_SIZE / 64), + kIOPageAllocSignature = 'iopa' +}; + +extern "C" void iopa_init(iopa_t * a); +extern "C" uintptr_t iopa_alloc(iopa_t * a, iopa_proc_t alloc, vm_size_t bytes, uint32_t balign); +extern "C" uintptr_t iopa_free(iopa_t * a, uintptr_t addr, vm_size_t bytes); + +extern "C" iopa_t gIOBMDPageAllocator; extern "C" struct timeval gIOLastSleepTime; extern "C" struct timeval gIOLastWakeTime; diff --git a/iokit/Kernel/IOLib.cpp b/iokit/Kernel/IOLib.cpp index 886176acf..45504157a 100644 --- a/iokit/Kernel/IOLib.cpp +++ b/iokit/Kernel/IOLib.cpp @@ -92,6 +92,7 @@ __doprnt( extern void cons_putc_locked(char); extern void bsd_log_lock(void); extern void bsd_log_unlock(void); +extern void logwakeup(); /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -109,6 +110,7 @@ void *_giDebugLogDataInternal = NULL; void *_giDebugReserved1 = NULL; void *_giDebugReserved2 = NULL; +iopa_t gIOBMDPageAllocator; /* * Static variables for this module. @@ -134,6 +136,8 @@ static struct { lck_mtx_t * lock; } gIOKitPageableSpace; +static iopa_t gIOPageablePageAllocator; + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ void IOLibInit(void) @@ -165,6 +169,9 @@ void IOLibInit(void) gIOMallocContiguousEntriesLock = lck_mtx_alloc_init(IOLockGroup, LCK_ATTR_NULL); queue_init( &gIOMallocContiguousEntries ); + iopa_init(&gIOBMDPageAllocator); + iopa_init(&gIOPageablePageAllocator); + libInitialized = true; } @@ -626,7 +633,7 @@ static kern_return_t IOMallocPageableCallback(vm_map_t map, void * _ref) return( kr ); } -void * IOMallocPageable(vm_size_t size, vm_size_t alignment) +static void * IOMallocPageablePages(vm_size_t size, vm_size_t alignment) { kern_return_t kr = kIOReturnNotReady; struct IOMallocPageableRef ref; @@ -641,13 +648,6 @@ void * IOMallocPageable(vm_size_t size, vm_size_t alignment) if( kIOReturnSuccess != kr) ref.address = 0; - if( ref.address) { -#if IOALLOCDEBUG - debug_iomallocpageable_size += round_page(size); -#endif - IOStatisticsAlloc(kIOStatisticsMallocPageable, size); - } - return( (void *) ref.address ); } @@ -669,19 +669,206 @@ vm_map_t IOPageableMapForAddress( uintptr_t address ) return( map ); } -void IOFreePageable(void * address, vm_size_t size) +static void IOFreePageablePages(void * address, vm_size_t size) { vm_map_t map; map = IOPageableMapForAddress( (vm_address_t) address); if( map) kmem_free( map, (vm_offset_t) address, size); +} +static uintptr_t IOMallocOnePageablePage(iopa_t * a) +{ + return ((uintptr_t) IOMallocPageablePages(page_size, page_size)); +} + +void * IOMallocPageable(vm_size_t size, vm_size_t alignment) +{ + void * addr; + + if (size >= (page_size - 4*kIOPageAllocChunkBytes)) addr = IOMallocPageablePages(size, alignment); + else addr = ((void * ) iopa_alloc(&gIOPageablePageAllocator, &IOMallocOnePageablePage, size, alignment)); + + if (addr) { #if IOALLOCDEBUG - debug_iomallocpageable_size -= round_page(size); + debug_iomallocpageable_size += size; #endif + IOStatisticsAlloc(kIOStatisticsMallocPageable, size); + } + + return (addr); +} +void IOFreePageable(void * address, vm_size_t size) +{ +#if IOALLOCDEBUG + debug_iomallocpageable_size -= size; +#endif IOStatisticsAlloc(kIOStatisticsFreePageable, size); + + if (size < (page_size - 4*kIOPageAllocChunkBytes)) + { + address = (void *) iopa_free(&gIOPageablePageAllocator, (uintptr_t) address, size); + size = page_size; + } + if (address) IOFreePageablePages(address, size); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#if 0 +#undef assert +#define assert(ex) \ + ((ex) ? (void)0 : Assert(__FILE__, __LINE__, # ex)) +#endif + +typedef char iopa_page_t_assert[(sizeof(iopa_page_t) <= kIOPageAllocChunkBytes) ? 1 : -1]; + +extern "C" void +iopa_init(iopa_t * a) +{ + bzero(a, sizeof(*a)); + a->lock = IOLockAlloc(); + queue_init(&a->list); +} + +static uintptr_t +iopa_allocinpage(iopa_page_t * pa, uint32_t count, uint64_t align) +{ + uint32_t n, s; + uint64_t avail = pa->avail; + + assert(avail); + + // find strings of count 1 bits in avail + for (n = count; n > 1; n -= s) + { + s = n >> 1; + avail = avail & (avail << s); + } + // and aligned + avail &= align; + + if (avail) + { + n = __builtin_clzll(avail); + pa->avail &= ~((-1ULL << (64 - count)) >> n); + if (!pa->avail && pa->link.next) + { + remque(&pa->link); + pa->link.next = 0; + } + return (n * kIOPageAllocChunkBytes + trunc_page((uintptr_t) pa)); + } + + return (0); +} + +static uint32_t +log2up(uint32_t size) +{ + if (size <= 1) size = 0; + else size = 32 - __builtin_clz(size - 1); + return (size); +} + +uintptr_t +iopa_alloc(iopa_t * a, iopa_proc_t alloc, vm_size_t bytes, uint32_t balign) +{ + static const uint64_t align_masks[] = { + 0xFFFFFFFFFFFFFFFF, + 0xAAAAAAAAAAAAAAAA, + 0x8888888888888888, + 0x8080808080808080, + 0x8000800080008000, + 0x8000000080000000, + 0x8000000000000000, + }; + iopa_page_t * pa; + uintptr_t addr = 0; + uint32_t count; + uint64_t align; + + if (!bytes) bytes = 1; + count = (bytes + kIOPageAllocChunkBytes - 1) / kIOPageAllocChunkBytes; + align = align_masks[log2up((balign + kIOPageAllocChunkBytes - 1) / kIOPageAllocChunkBytes)]; + + IOLockLock(a->lock); + pa = (typeof(pa)) queue_first(&a->list); + while (!queue_end(&a->list, &pa->link)) + { + addr = iopa_allocinpage(pa, count, align); + if (addr) + { + a->bytecount += bytes; + break; + } + pa = (typeof(pa)) queue_next(&pa->link); + } + IOLockUnlock(a->lock); + + if (!addr) + { + addr = alloc(a); + if (addr) + { + pa = (typeof(pa)) (addr + page_size - kIOPageAllocChunkBytes); + pa->signature = kIOPageAllocSignature; + pa->avail = -2ULL; + + addr = iopa_allocinpage(pa, count, align); + IOLockLock(a->lock); + if (pa->avail) enqueue_head(&a->list, &pa->link); + a->pagecount++; + if (addr) a->bytecount += bytes; + IOLockUnlock(a->lock); + } + } + + assert((addr & ((1 << log2up(balign)) - 1)) == 0); + return (addr); +} + +uintptr_t +iopa_free(iopa_t * a, uintptr_t addr, vm_size_t bytes) +{ + iopa_page_t * pa; + uint32_t count; + uintptr_t chunk; + + if (!bytes) bytes = 1; + + chunk = (addr & page_mask); + assert(0 == (chunk & (kIOPageAllocChunkBytes - 1))); + + pa = (typeof(pa)) (addr | (page_size - kIOPageAllocChunkBytes)); + assert(kIOPageAllocSignature == pa->signature); + + count = (bytes + kIOPageAllocChunkBytes - 1) / kIOPageAllocChunkBytes; + chunk /= kIOPageAllocChunkBytes; + + IOLockLock(a->lock); + if (!pa->avail) + { + assert(!pa->link.next); + enqueue_tail(&a->list, &pa->link); + } + pa->avail |= ((-1ULL << (64 - count)) >> chunk); + if (pa->avail != -2ULL) pa = 0; + else + { + remque(&pa->link); + pa->link.next = 0; + pa->signature = 0; + a->pagecount--; + // page to free + pa = (typeof(pa)) trunc_page(pa); + } + a->bytecount -= bytes; + IOLockUnlock(a->lock); + + return ((uintptr_t) pa); } /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -797,6 +984,7 @@ void IOLogv(const char *format, va_list ap) bsd_log_lock(); __doprnt(format, ap, _iolog_logputc, NULL, 16); bsd_log_unlock(); + logwakeup(); __doprnt(format, ap2, _iolog_consputc, NULL, 16); } diff --git a/iokit/Kernel/IOMapper.cpp b/iokit/Kernel/IOMapper.cpp index 6abcb5d1a..294e13822 100644 --- a/iokit/Kernel/IOMapper.cpp +++ b/iokit/Kernel/IOMapper.cpp @@ -142,28 +142,47 @@ void IOMapper::waitForSystemMapper() IOMapper * IOMapper::copyMapperForDevice(IOService * device) { + return copyMapperForDeviceWithIndex(device, 0); +} + +IOMapper * IOMapper::copyMapperForDeviceWithIndex(IOService * device, unsigned int index) +{ + OSData *data; OSObject * obj; - IOMapper * mapper; + IOMapper * mapper = NULL; OSDictionary * matching; obj = device->copyProperty("iommu-parent"); if (!obj) - return (NULL); + return (NULL); if ((mapper = OSDynamicCast(IOMapper, obj))) - return (mapper); + return (mapper); - matching = IOService::propertyMatching(gIOMapperIDKey, obj); - if (matching) + if ((data = OSDynamicCast(OSData, obj))) { - mapper = OSDynamicCast(IOMapper, IOService::waitForMatchingService(matching)); - matching->release(); + if (index >= data->getLength() / sizeof(UInt32)) + goto done; + + data = OSData::withBytesNoCopy((UInt32 *)data->getBytesNoCopy() + index, sizeof(UInt32)); + if (!data) + goto done; + + matching = IOService::propertyMatching(gIOMapperIDKey, data); + data->release(); } - if (mapper) - device->setProperty("iommu-parent", mapper); else - obj->release(); - + matching = IOService::propertyMatching(gIOMapperIDKey, obj); + + if (matching) + { + mapper = OSDynamicCast(IOMapper, IOService::waitForMatchingService(matching)); + matching->release(); + } + +done: + if (obj) + obj->release(); return (mapper); } diff --git a/iokit/Kernel/IOMemoryDescriptor.cpp b/iokit/Kernel/IOMemoryDescriptor.cpp index 8d49aeebe..3eee2e740 100644 --- a/iokit/Kernel/IOMemoryDescriptor.cpp +++ b/iokit/Kernel/IOMemoryDescriptor.cpp @@ -88,14 +88,10 @@ unsigned int IOTranslateCacheBits(struct phys_entry *pp); __END_DECLS -#define kIOMaximumMappedIOByteCount (512*1024*1024) - #define kIOMapperWaitSystem ((IOMapper *) 1) static IOMapper * gIOSystemMapper = NULL; -static ppnum_t gIOMaximumMappedIOPageCount = atop_32(kIOMaximumMappedIOByteCount); - ppnum_t gIOLastPage; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -156,9 +152,10 @@ struct ioGMDData { addr64_t fMappedBase; uint64_t fPreparationID; unsigned int fPageCnt; + unsigned char fDiscontig; #if __LP64__ // align arrays to 8 bytes so following macros work - unsigned int fPad; + unsigned char fPad[3]; #endif upl_page_info_t fPageList[1]; /* variable length */ ioPLBlock fBlocks[1]; /* variable length */ @@ -684,13 +681,17 @@ IOGeneralMemoryDescriptor::initWithOptions(void * buffers, IODelete(_ranges.v, IOVirtualRange, _rangesCount); } - if (_memEntry) + options |= (kIOMemoryRedirected & _flags); + if (!(kIOMemoryRedirected & options)) { - ipc_port_release_send((ipc_port_t) _memEntry); - _memEntry = 0; + if (_memEntry) + { + ipc_port_release_send((ipc_port_t) _memEntry); + _memEntry = 0; + } + if (_mappings) + _mappings->flushCollection(); } - if (_mappings) - _mappings->flushCollection(); } else { if (!super::init()) @@ -1203,19 +1204,16 @@ IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void * if (_memoryEntries && data->fMapper) { - bool remap = false; + bool remap; bool whole = ((data->fOffset == 0) && (data->fLength == _length)); dataP = getDataP(_memoryEntries); - if (data->fMapSpec.numAddressBits < dataP->fDMAMapNumAddressBits) - { - dataP->fDMAMapNumAddressBits = data->fMapSpec.numAddressBits; - remap = ((dataP->fMappedBase + _length) > (1ULL << dataP->fDMAMapNumAddressBits)); - } - if (data->fMapSpec.alignment > dataP->fDMAMapAlignment) - { - dataP->fDMAMapAlignment = data->fMapSpec.alignment; - remap |= (dataP->fDMAMapAlignment > page_size); - } + + if (data->fMapSpec.numAddressBits < dataP->fDMAMapNumAddressBits) dataP->fDMAMapNumAddressBits = data->fMapSpec.numAddressBits; + if (data->fMapSpec.alignment > dataP->fDMAMapAlignment) dataP->fDMAMapAlignment = data->fMapSpec.alignment; + + remap = (dataP->fDMAMapNumAddressBits < 64) + && ((dataP->fMappedBase + _length) > (1ULL << dataP->fDMAMapNumAddressBits)); + remap |= (dataP->fDMAMapAlignment > page_size); remap |= (!whole); if (remap || !dataP->fMappedBase) { @@ -1232,6 +1230,7 @@ IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void * data->fAlloc = dataP->fMappedBase; data->fAllocCount = 0; // IOMD owns the alloc } + data->fMapContig = !dataP->fDiscontig; } return (err); @@ -1774,6 +1773,7 @@ IOMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void *vData, UInt data if (params) panic("class %s does not support IODMACommand::kIterateOnly", getMetaClass()->getClassName()); + data->fMapContig = true; err = md->dmaMap(data->fMapper, &data->fMapSpec, data->fOffset, data->fLength, &data->fAlloc, &data->fAllocCount); return (err); } @@ -1788,7 +1788,10 @@ purgeableControlBits(IOOptionBits newState, vm_purgable_t * control, int * state IOReturn err = kIOReturnSuccess; *control = VM_PURGABLE_SET_STATE; - switch (newState) + + enum { kIOMemoryPurgeableControlMask = 15 }; + + switch (kIOMemoryPurgeableControlMask & newState) { case kIOMemoryPurgeableKeepCurrent: *control = VM_PURGABLE_GET_STATE; @@ -1798,7 +1801,7 @@ purgeableControlBits(IOOptionBits newState, vm_purgable_t * control, int * state *state = VM_PURGABLE_NONVOLATILE; break; case kIOMemoryPurgeableVolatile: - *state = VM_PURGABLE_VOLATILE; + *state = VM_PURGABLE_VOLATILE | (newState & ~kIOMemoryPurgeableControlMask); break; case kIOMemoryPurgeableEmpty: *state = VM_PURGABLE_EMPTY; @@ -1815,7 +1818,7 @@ purgeableStateBits(int * state) { IOReturn err = kIOReturnSuccess; - switch (*state) + switch (VM_PURGABLE_STATE_MASK & *state) { case VM_PURGABLE_NONVOLATILE: *state = kIOMemoryPurgeableNonVolatile; @@ -1859,6 +1862,11 @@ IOGeneralMemoryDescriptor::setPurgeable( IOOptionBits newState, err = kIOReturnNotReady; break; } + else if (!_task) + { + err = kIOReturnUnsupported; + break; + } else curMap = get_task_map(_task); @@ -1927,6 +1935,43 @@ IOReturn IOMemoryDescriptor::setPurgeable( IOOptionBits newState, return (err); } + +IOReturn IOMemoryDescriptor::getPageCounts( IOByteCount * residentPageCount, + IOByteCount * dirtyPageCount ) +{ + IOReturn err = kIOReturnSuccess; + unsigned int _residentPageCount, _dirtyPageCount; + + if (kIOMemoryThreadSafe & _flags) LOCK; + + do + { + if (!_memEntry) + { + err = kIOReturnNotReady; + break; + } + if ((residentPageCount == NULL) && (dirtyPageCount == NULL)) + { + err = kIOReturnBadArgument; + break; + } + + err = mach_memory_entry_get_page_counts((ipc_port_t) _memEntry, + residentPageCount ? &_residentPageCount : NULL, + dirtyPageCount ? &_dirtyPageCount : NULL); + if (kIOReturnSuccess != err) break; + if (residentPageCount) *residentPageCount = _residentPageCount; + if (dirtyPageCount) *dirtyPageCount = _dirtyPageCount; + } + while (false); + + if (kIOMemoryThreadSafe & _flags) UNLOCK; + + return (err); +} + + extern "C" void dcache_incoherent_io_flush64(addr64_t pa, unsigned int count); extern "C" void dcache_incoherent_io_store64(addr64_t pa, unsigned int count); @@ -2065,22 +2110,13 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) IOReturn error = kIOReturnCannotWire; ioGMDData *dataP; upl_page_info_array_t pageInfo; - ppnum_t mapBase = 0; - ipc_port_t sharedMem = (ipc_port_t) _memEntry; + ppnum_t mapBase; + ipc_port_t sharedMem; - assert(!_wireCount); assert(kIOMemoryTypeVirtual == type || kIOMemoryTypeVirtual64 == type || kIOMemoryTypeUIO == type); - if (_pages > gIOMaximumMappedIOPageCount) - return kIOReturnNoResources; - - dataP = getDataP(_memoryEntries); - IOMapper *mapper; - mapper = dataP->fMapper; - dataP->fMappedBase = 0; - - if (forDirection == kIODirectionNone) - forDirection = getDirection(); + if ((kIODirectionOutIn & forDirection) == kIODirectionNone) + forDirection = (IODirection) (forDirection | getDirection()); int uplFlags; // This Mem Desc's default flags for upl creation switch (kIODirectionOutIn & forDirection) @@ -2088,7 +2124,6 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) case kIODirectionOut: // Pages do not need to be marked as dirty on commit uplFlags = UPL_COPYOUT_FROM; - _flags |= kIOMemoryPreparedReadOnly; break; case kIODirectionIn: @@ -2096,15 +2131,34 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) uplFlags = 0; // i.e. ~UPL_COPYOUT_FROM break; } - uplFlags |= UPL_SET_IO_WIRE | UPL_SET_LITE; -#ifdef UPL_NEED_32BIT_ADDR + if (_wireCount) + { + if ((kIOMemoryPreparedReadOnly & _flags) && !(UPL_COPYOUT_FROM & uplFlags)) + { + OSReportWithBacktrace("IOMemoryDescriptor 0x%lx prepared read only", VM_KERNEL_ADDRPERM(this)); + error = kIOReturnNotWritable; + } + else error = kIOReturnSuccess; + return (error); + } + + dataP = getDataP(_memoryEntries); + IOMapper *mapper; + mapper = dataP->fMapper; + dataP->fMappedBase = 0; + + uplFlags |= UPL_SET_IO_WIRE | UPL_SET_LITE; if (kIODirectionPrepareToPhys32 & forDirection) { if (!mapper) uplFlags |= UPL_NEED_32BIT_ADDR; if (dataP->fDMAMapNumAddressBits > 32) dataP->fDMAMapNumAddressBits = 32; } -#endif + if (kIODirectionPrepareNoFault & forDirection) uplFlags |= UPL_REQUEST_NO_FAULT; + if (kIODirectionPrepareNoZeroFill & forDirection) uplFlags |= UPL_NOZEROFILLIO; + + mapBase = 0; + sharedMem = (ipc_port_t) _memEntry; // Note that appendBytes(NULL) zeros the data up to the desired length. _memoryEntries->appendBytes(0, dataP->fPageCnt * sizeof(upl_page_info_t)); @@ -2119,9 +2173,9 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) // Iterate over the vector of virtual ranges Ranges vec = _ranges; - unsigned int pageIndex = 0; - IOByteCount mdOffset = 0; - ppnum_t highestPage = 0; + unsigned int pageIndex = 0; + IOByteCount mdOffset = 0; + ppnum_t highestPage = 0; for (UInt range = 0; range < _rangesCount; range++) { ioPLBlock iopl; @@ -2203,7 +2257,7 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) if (baseInfo->device) { numPageInfo = 1; - iopl.fFlags = kIOPLOnDevice; + iopl.fFlags = kIOPLOnDevice; } else { iopl.fFlags = 0; @@ -2211,6 +2265,7 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) iopl.fIOMDOffset = mdOffset; iopl.fPageInfo = pageIndex; + if (mapper && pageIndex && (page_mask & (mdOffset + iopl.fPageOffset))) dataP->fDiscontig = true; #if 0 // used to remove the upl for auto prepares here, for some errant code @@ -2252,6 +2307,8 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) _highestPage = highestPage; + if (UPL_COPYOUT_FROM & uplFlags) _flags |= kIOMemoryPreparedReadOnly; + return kIOReturnSuccess; abortExit: @@ -2272,6 +2329,8 @@ abortExit: if (error == KERN_FAILURE) error = kIOReturnCannotWire; + else if (error == KERN_MEMORY_ERROR) + error = kIOReturnNoResources; return error; } @@ -2302,6 +2361,7 @@ bool IOGeneralMemoryDescriptor::initMemoryEntries(size_t size, IOMapper * mapper dataP->fDMAMapNumAddressBits = 64; dataP->fDMAMapAlignment = 0; dataP->fPreparationID = kIOPreparationIDUnprepared; + dataP->fDiscontig = false; return (true); } @@ -2459,9 +2519,9 @@ IOReturn IOGeneralMemoryDescriptor::prepare(IODirection forDirection) if (_prepareLock) IOLockLock(_prepareLock); - if (!_wireCount - && (kIOMemoryTypeVirtual == type || kIOMemoryTypeVirtual64 == type || kIOMemoryTypeUIO == type) ) { - error = wireVirtual(forDirection); + if (kIOMemoryTypeVirtual == type || kIOMemoryTypeVirtual64 == type || kIOMemoryTypeUIO == type) + { + error = wireVirtual(forDirection); } if (kIOReturnSuccess == error) @@ -3347,8 +3407,22 @@ IOReturn IOMemoryDescriptor::redirect( task_t safeTask, bool doRedirect ) do { if( (iter = OSCollectionIterator::withCollection( _mappings))) { + + memory_object_t pager; + + if( reserved) + pager = (memory_object_t) reserved->dp.devicePager; + else + pager = MACH_PORT_NULL; + while( (mapping = (IOMemoryMap *) iter->getNextObject())) + { mapping->redirect( safeTask, doRedirect ); + if (!doRedirect && !safeTask && pager && (kernel_map == mapping->fAddressMap)) + { + err = handleFault( pager, mapping->fAddressMap, mapping->fAddress, mapping->fOffset, mapping->fLength, kIOMapDefaultCache ); + } + } iter->release(); } @@ -3428,6 +3502,8 @@ IOReturn IOMemoryMap::unmap( void ) if( fAddress && fAddressMap && (0 == fSuperMap) && fMemory && (0 == (fOptions & kIOMapStatic))) { + vm_map_iokit_unmapped_region(fAddressMap, fLength); + err = fMemory->doUnmap(fAddressMap, (IOVirtualAddress) this, 0); } else @@ -3656,12 +3732,7 @@ void IOMemoryDescriptor::initialize( void ) if( 0 == gIOMemoryLock) gIOMemoryLock = IORecursiveLockAlloc(); - IORegistryEntry::getRegistryRoot()->setProperty(kIOMaximumMappedIOByteCountKey, - ptoa_64(gIOMaximumMappedIOPageCount), 64); gIOLastPage = IOGetLastPageNumber(); - - gIOPageAllocLock = IOSimpleLockAlloc(); - queue_init(&gIOPageAllocList); } void IOMemoryDescriptor::free( void ) @@ -3913,6 +3984,10 @@ IOMemoryMap * IOMemoryDescriptor::makeMapping( kr = mapDesc->doMap( 0, (IOVirtualAddress *) &mapping, options, 0, 0 ); if (kIOReturnSuccess == kr) { + if (0 == (mapping->fOptions & kIOMapStatic)) { + vm_map_iokit_mapped_region(mapping->fAddressMap, length); + } + result = mapping; mapDesc->addMapping(result); result->setMemoryDescriptor(mapDesc, offset); diff --git a/iokit/Kernel/IOMultiMemoryDescriptor.cpp b/iokit/Kernel/IOMultiMemoryDescriptor.cpp index 262680dc8..6d209ab1f 100644 --- a/iokit/Kernel/IOMultiMemoryDescriptor.cpp +++ b/iokit/Kernel/IOMultiMemoryDescriptor.cpp @@ -76,7 +76,6 @@ bool IOMultiMemoryDescriptor::initWithDescriptors( // assert(descriptors); - assert(withCount); // Release existing descriptors, if any if ( _descriptors ) @@ -123,7 +122,8 @@ bool IOMultiMemoryDescriptor::initWithDescriptors( descriptors[index]->retain(); _length += descriptors[index]->getLength(); if ( _tag == 0 ) _tag = descriptors[index]->getTag(); - assert(descriptors[index]->getDirection() == withDirection); + assert(descriptors[index]->getDirection() == + (withDirection & kIOMemoryDirectionMask)); } return true; diff --git a/iokit/Kernel/IONVRAM.cpp b/iokit/Kernel/IONVRAM.cpp index 51a72cfb1..7b11cb19c 100644 --- a/iokit/Kernel/IONVRAM.cpp +++ b/iokit/Kernel/IONVRAM.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 1998-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2007-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -63,10 +64,55 @@ bool IODTNVRAM::init(IORegistryEntry *old, const IORegistryPlane *plane) _registryPropertiesKey = OSSymbol::withCStringNoCopy("aapl,pci"); if (_registryPropertiesKey == 0) return false; + // race condition possible between + // IODTNVRAM and IONVRAMController (restore loses boot-args) + initProxyData(); + return true; } +void IODTNVRAM::initProxyData(void) +{ + IORegistryEntry *entry; + const char *key = "nvram-proxy-data"; + OSObject *prop; + OSData *data; + const void *bytes; + + entry = IORegistryEntry::fromPath("/chosen", gIODTPlane); + if (entry != 0) { + prop = entry->getProperty(key); + if (prop != 0) { + data = OSDynamicCast(OSData, prop); + if (data != 0) { + bytes = data->getBytesNoCopy(); + if (bytes != 0) { + bcopy(bytes, _nvramImage, data->getLength()); + initNVRAMImage(); + _isProxied = true; + } + } + } + entry->removeProperty(key); + entry->release(); + } +} + void IODTNVRAM::registerNVRAMController(IONVRAMController *nvram) +{ + if (_nvramController != 0) return; + + _nvramController = nvram; + + // race condition possible between + // IODTNVRAM and IONVRAMController (restore loses boot-args) + if (!_isProxied) { + _nvramController->read(0, _nvramImage, kIODTNVRAMImageSize); + initNVRAMImage(); + } +} + +void IODTNVRAM::initNVRAMImage(void) { char partitionID[18]; UInt32 partitionOffset, partitionLength; @@ -74,12 +120,6 @@ void IODTNVRAM::registerNVRAMController(IONVRAMController *nvram) UInt32 currentLength, currentOffset = 0; OSNumber *partitionOffsetNumber, *partitionLengthNumber; - if (_nvramController != 0) return; - - _nvramController = nvram; - - _nvramController->read(0, _nvramImage, kIODTNVRAMImageSize); - // Find the offsets for the OF, XPRAM, NameRegistry and PanicInfo partitions. _ofPartitionOffset = 0xFFFFFFFF; _xpramPartitionOffset = 0xFFFFFFFF; @@ -217,6 +257,9 @@ void IODTNVRAM::sync(void) // Don't try to sync OF Variables if the system has already paniced. if (!_systemPaniced) syncOFVariables(); + // Don't try to perform controller operations if none has been registered. + if (_nvramController == 0) return; + _nvramController->write(0, _nvramImage, kIODTNVRAMImageSize); _nvramController->sync(); @@ -948,9 +991,6 @@ OFVariable gOFVariables[] = { {"security-password", kOFVariableTypeData, kOFVariablePermRootOnly, -1}, {"boot-image", kOFVariableTypeData, kOFVariablePermUserWrite, -1}, {"com.apple.System.fp-state", kOFVariableTypeData, kOFVariablePermKernelOnly, -1}, -#if CONFIG_EMBEDDED - {"backlight-level", kOFVariableTypeData, kOFVariablePermUserWrite, -1}, -#endif {0, kOFVariableTypeData, kOFVariablePermUserRead, -1} }; diff --git a/iokit/Kernel/IOPMrootDomain.cpp b/iokit/Kernel/IOPMrootDomain.cpp index 742b1df58..d0c2d7d2c 100644 --- a/iokit/Kernel/IOPMrootDomain.cpp +++ b/iokit/Kernel/IOPMrootDomain.cpp @@ -40,16 +40,22 @@ #include #include #include +#include #include "RootDomainUserClient.h" #include "IOKit/pwr_mgt/IOPowerConnection.h" #include "IOPMPowerStateQueue.h" #include +#include #if HIBERNATION #include #endif #include #include #include +#include +#include +#include + #include #include "IOServicePrivate.h" // _IOServiceInterestNotifier #include "IOServicePMPrivate.h" @@ -67,6 +73,8 @@ __END_DECLS #define kIOPMrootDomainClass "IOPMrootDomain" #define LOG_PREFIX "PMRD: " +#define OBFUSCATE(x) ((void *)(VM_KERNEL_ADDRPERM(x))) + #define MSG(x...) \ do { kprintf(LOG_PREFIX x); IOLog(x); } while (false) @@ -75,11 +83,11 @@ __END_DECLS #define DLOG(x...) do { \ if (kIOLogPMRootDomain & gIOKitDebug) \ - kprintf(LOG_PREFIX x); } while (false) + kprintf(LOG_PREFIX x); \ + gRootDomain->sleepWakeDebugLog(x);} while (false) #define _LOG(x...) -#define DARK_WAKE_DEBUG 1 #define SUSPEND_PM_NOTIFICATIONS_DEBUG 1 #define CHECK_THREAD_CONTEXT @@ -112,7 +120,9 @@ do { \ #define CAP_HIGHEST(c) \ ((_highestCapability & (c)) != 0) -#define DARK_TO_FULL_EVALUATE_CLAMSHELL 0 +#if defined(__i386__) || defined(__x86_64__) +#define DARK_TO_FULL_EVALUATE_CLAMSHELL 1 +#endif // Event types for IOPMPowerStateQueue::submitPowerEvent() enum { @@ -129,7 +139,8 @@ enum { kPowerEventAssertionSetLevel, // 11 kPowerEventQueueSleepWakeUUID, // 12 kPowerEventPublishSleepWakeUUID, // 13 - kPowerEventSuspendClient // 14 + kPowerEventSuspendClient, // 14 + kPowerEventSetDisplayPowerOn // 15 }; // For evaluatePolicy() @@ -144,12 +155,17 @@ enum { kStimulusDarkWakeEntry, // 6 kStimulusDarkWakeReentry, // 7 kStimulusDarkWakeEvaluate, // 8 - kStimulusNoIdleSleepPreventers // 9 + kStimulusNoIdleSleepPreventers, // 9 + kStimulusUserIsActive, // 10 + kStimulusUserIsInactive // 11 }; extern "C" { IOReturn OSKextSystemSleepOrWake( UInt32 ); } +extern "C" ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); +extern "C" addr64_t kvtophys(vm_offset_t va); +extern "C" int stack_snapshot_from_kernel(pid_t pid, void *buf, uint32_t size, uint32_t flags, unsigned *bytesTraced); static void idleSleepTimerExpired( thread_call_param_t, thread_call_param_t ); static void notifySystemShutdown( IOService * root, unsigned long event ); @@ -163,6 +179,11 @@ static const OSSymbol *sleepMessagePEFunction = NULL; #define kIOSleepSupportedKey "IOSleepSupported" #define kIOPMSystemCapabilitiesKey "System Capabilities" +#define kIORequestWranglerIdleKey "IORequestIdle" +#define kDefaultWranglerIdlePeriod 25 // in milliseconds + +#define kIOSleepWakeDebugKey "Persistent-memory-note" + #define kRD_AllPowerSources (kIOPMSupportedOnAC \ | kIOPMSupportedOnBatt \ | kIOPMSupportedOnUPS) @@ -177,6 +198,13 @@ enum #define kLocalEvalClamshellCommand (1 << 15) #define kIdleSleepRetryInterval (3 * 60) +enum { + kWranglerPowerStateMin = 0, + kWranglerPowerStateSleep = 2, + kWranglerPowerStateDim = 3, + kWranglerPowerStateMax = 4 +}; + enum { OFF_STATE = 0, RESTART_STATE = 1, @@ -205,13 +233,14 @@ static IOPMPowerState ourPowerStates[NUM_POWER_STATES] = #define kIOPMRootDomainWakeTypeAlarm "Alarm" #define kIOPMRootDomainWakeTypeNetwork "Network" #define kIOPMRootDomainWakeTypeHIDActivity "HID Activity" +#define kIOPMRootDomainWakeTypeNotification "Notification" // Special interest that entitles the interested client from receiving // all system messages. Only used by powerd. // #define kIOPMSystemCapabilityInterest "IOPMSystemCapabilityInterest" -#define kPMSuspendedNotificationClients "PMSuspendedNotificationClients" +#define kPMSuspendedNotificationClients "PMSuspendedNotificationClients" /* * Aggressiveness @@ -280,7 +309,11 @@ static UInt32 gWillShutdown = 0; static UInt32 gPagingOff = 0; static UInt32 gSleepWakeUUIDIsSet = false; static uint32_t gAggressivesState = 0; + +uuid_string_t bootsessionuuid_string; + static uint32_t gDarkWakeFlags = kDarkWakeFlagHIDTickleNone | kDarkWakeFlagIgnoreDiskIOAlways; + static PMStatsStruct gPMStats; #if HIBERNATION @@ -463,6 +496,7 @@ public: IOPMTracePointHandler tracePointHandler; void * tracePointTarget; + uint64_t getPMStatusCode(); private: IOPMrootDomain *owner; IOLock *pciMappingLock; @@ -508,6 +542,7 @@ private: uint64_t modifiedTime; const OSSymbol *ownerString; IOService *ownerService; + uint64_t registryEntryID; IOPMDriverAssertionLevel level; } PMAssertStruct; @@ -610,9 +645,7 @@ extern "C" IOPMRootDomainWillShutdown(); if (OSCompareAndSwap(0, 1, &gPagingOff)) { -#if !CONFIG_EMBEDDED gRootDomain->handlePlatformHaltRestart(kPEPagingOff); -#endif } } @@ -686,9 +719,6 @@ static void disk_sync_callout( thread_call_param_t p0, thread_call_param_t p1 ) if (ON_STATE == powerState) { -#if HIBERNATION - IOHibernateSystemSleep(); -#endif sync_internal(); } #if HIBERNATION @@ -770,7 +800,6 @@ static SYSCTL_PROC(_kern, OID_AUTO, willshutdown, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, 0, 0, sysctl_willshutdown, "I", ""); -#if !CONFIG_EMBEDDED static int sysctl_progressmeterenable @@ -810,7 +839,6 @@ static SYSCTL_PROC(_kern, OID_AUTO, progressmeter, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN | CTLFLAG_LOCKED, 0, 0, sysctl_progressmeter, "I", ""); -#endif static SYSCTL_INT(_debug, OID_AUTO, darkwake, CTLFLAG_RW, &gDarkWakeFlags, 0, ""); @@ -820,6 +848,8 @@ static const OSSymbol * gIOPMSettingDebugWakeRelativeKey; static const OSSymbol * gIOPMSettingMaintenanceWakeCalendarKey; static const OSSymbol * gIOPMSettingSleepServiceWakeCalendarKey; static const OSSymbol * gIOPMSettingSilentRunningKey; +static const OSSymbol * gIOPMUserTriggeredFullWakeKey; +static const OSSymbol * gIOPMUserIsActiveKey; //****************************************************************************** // start @@ -833,6 +863,9 @@ bool IOPMrootDomain::start( IOService * nub ) OSIterator *psIterator; OSDictionary *tmpDict; IORootParent * patriarch; +#if defined(__i386__) || defined(__x86_64__) + IONotifier * notifier; +#endif super::start(nub); @@ -843,6 +876,8 @@ bool IOPMrootDomain::start( IOService * nub ) gIOPMSettingMaintenanceWakeCalendarKey = OSSymbol::withCString(kIOPMSettingMaintenanceWakeCalendarKey); gIOPMSettingSleepServiceWakeCalendarKey = OSSymbol::withCString(kIOPMSettingSleepServiceWakeCalendarKey); gIOPMSettingSilentRunningKey = OSSymbol::withCStringNoCopy(kIOPMSettingSilentRunningKey); + gIOPMUserTriggeredFullWakeKey = OSSymbol::withCStringNoCopy(kIOPMUserTriggeredFullWakeKey); + gIOPMUserIsActiveKey = OSSymbol::withCStringNoCopy(kIOPMUserIsActiveKey); gIOPMStatsApplicationResponseTimedOut = OSSymbol::withCString(kIOPMStatsResponseTimedOut); gIOPMStatsApplicationResponseCancel = OSSymbol::withCString(kIOPMStatsResponseCancel); @@ -890,7 +925,17 @@ bool IOPMrootDomain::start( IOService * nub ) diskSyncCalloutEntry = thread_call_allocate( &disk_sync_callout, (thread_call_param_t) this); - + + stackshotOffloader = thread_call_allocate(&saveTimeoutAppStackShot, + (thread_call_param_t) this); + +#if DARK_TO_FULL_EVALUATE_CLAMSHELL + fullWakeThreadCall = thread_call_allocate( + OSMemberFunctionCast(thread_call_func_t, this, + &IOPMrootDomain::fullWakeDelayedWork), + (thread_call_param_t) this); +#endif + setProperty(kIOSleepSupportedKey, true); bzero(&gPMStats, sizeof(gPMStats)); @@ -910,6 +955,11 @@ bool IOPMrootDomain::start( IOService * nub ) acAdaptorConnected = true; clamshellSleepDisabled = false; + // User active state at boot + fullWakeReason = kFullWakeReasonLocalUser; + userIsActive = userWasActive = true; + setProperty(gIOPMUserIsActiveKey, kOSBooleanTrue); + // Set the default system capabilities at boot. _currentCapability = kIOPMSystemCapabilityCPU | kIOPMSystemCapabilityGraphics | @@ -922,12 +972,15 @@ bool IOPMrootDomain::start( IOService * nub ) setProperty(kIOPMSystemCapabilitiesKey, _currentCapability, 64); queuedSleepWakeUUIDString = NULL; + initializeBootSessionUUID(); pmStatsAppResponses = OSArray::withCapacity(5); _statsNameKey = OSSymbol::withCString(kIOPMStatsNameKey); _statsPIDKey = OSSymbol::withCString(kIOPMStatsPIDKey); _statsTimeMSKey = OSSymbol::withCString(kIOPMStatsTimeMSKey); _statsResponseTypeKey = OSSymbol::withCString(kIOPMStatsApplicationResponseTypeKey); _statsMessageTypeKey = OSSymbol::withCString(kIOPMStatsMessageTypeKey); + _statsPowerCapsKey = OSSymbol::withCString(kIOPMStatsPowerCapabilityKey); + noAckApps = OSOrderedSet::withCapacity(16); idxPMCPUClamshell = kCPUUnknownIndex; idxPMCPULimitedPower = kCPUUnknownIndex; @@ -979,7 +1032,7 @@ bool IOPMrootDomain::start( IOService * nub ) if (gIOKitDebug & (kIOLogDriverPower1 | kIOLogDriverPower2)) { // Setup our PM logging & recording code - timeline = IOPMTimeline::timeline(this); + timeline = IOPMTimeline::timeline(this); if (timeline) { OSDictionary *tlInfo = timeline->copyInfoDictionary(); @@ -1006,6 +1059,30 @@ bool IOPMrootDomain::start( IOService * nub ) } #endif +#if defined(__i386__) || defined(__x86_64__) + + if ((tmpDict = serviceMatching("IODTNVRAM"))) + { + notifier = addMatchingNotification( + gIOFirstPublishNotification, tmpDict, + (IOServiceMatchingNotificationHandler) &IONVRAMMatchPublished, + this, 0); + tmpDict->release(); + } + + wranglerIdleSettings = NULL; + OSNumber * wranglerIdlePeriod = NULL; + wranglerIdleSettings = OSDictionary::withCapacity(1); + wranglerIdlePeriod = OSNumber::withNumber(kDefaultWranglerIdlePeriod, 32); + + if(wranglerIdleSettings && wranglerIdlePeriod) + wranglerIdleSettings->setObject(kIORequestWranglerIdleKey, + wranglerIdlePeriod); + + if(wranglerIdlePeriod) + wranglerIdlePeriod->release(); +#endif + const OSSymbol *ucClassName = OSSymbol::withCStringNoCopy("RootDomainUserClient"); setProperty(gIOUserClientClassKey, (OSObject *) ucClassName); ucClassName->release(); @@ -1032,10 +1109,8 @@ bool IOPMrootDomain::start( IOService * nub ) sysctl_register_oid(&sysctl__kern_sleeptime); sysctl_register_oid(&sysctl__kern_waketime); sysctl_register_oid(&sysctl__kern_willshutdown); -#if !CONFIG_EMBEDDED sysctl_register_oid(&sysctl__kern_progressmeterenable); sysctl_register_oid(&sysctl__kern_progressmeter); -#endif /* !CONFIG_EMBEDDED */ #if HIBERNATION IOHibernateSystemInit(this); @@ -1161,7 +1236,7 @@ bool IOPMrootDomain::pmNotificationIsSuspended(uint32_t pid) void IOPMrootDomain::suspendPMNotificationsForPID(uint32_t pid, bool doSuspend) { if(pmPowerStateQueue) { - pmPowerStateQueue->submitPowerEvent(kPowerEventSuspendClient, (void *)pid, (uint64_t)doSuspend ); + pmPowerStateQueue->submitPowerEvent(kPowerEventSuspendClient, (void *)(uintptr_t)pid, (uint64_t)doSuspend ); } return; } @@ -1600,7 +1675,7 @@ IOReturn IOPMrootDomain::joinAggressiveness( if (!service || (service == this)) return kIOReturnBadArgument; - DLOG("joinAggressiveness %s %p\n", service->getName(), service); + DLOG("joinAggressiveness %s %p\n", service->getName(), OBFUSCATE(service)); request = IONew(AggressivesRequest, 1); if (!request) @@ -1993,6 +2068,53 @@ void IOPMrootDomain::handleSleepTimerExpiration( void ) adjustPowerState(true); } +//****************************************************************************** +// getTimeToIdleSleep +// +// Returns number of seconds left before going into idle sleep. +// Caller has to make sure that idle sleep is allowed at the time of calling +// this function +//****************************************************************************** + +uint32_t IOPMrootDomain::getTimeToIdleSleep( void ) +{ + + AbsoluteTime now, lastActivityTime; + uint64_t nanos; + uint32_t minutesSinceUserInactive = 0; + uint32_t sleepDelay = 0; + + if (sleepSlider == 0) + return 0xffffffff; + + if (userActivityTime) + lastActivityTime = userActivityTime; + else + lastActivityTime = userBecameInactiveTime; + + clock_get_uptime(&now); + if (CMP_ABSOLUTETIME(&now, &lastActivityTime) > 0) + { + SUB_ABSOLUTETIME(&now, &lastActivityTime); + absolutetime_to_nanoseconds(now, &nanos); + minutesSinceUserInactive = nanos / (60000000000ULL); + + if (minutesSinceUserInactive >= sleepSlider) + sleepDelay = 0; + else + sleepDelay = sleepSlider - minutesSinceUserInactive; + } + else + { + sleepDelay = sleepSlider; + } + + DLOG("user inactive %u min, time to idle sleep %u min\n", + minutesSinceUserInactive, sleepDelay); + + return (sleepDelay * 60); +} + //****************************************************************************** // setQuickSpinDownTimeout // @@ -2031,6 +2153,8 @@ IOReturn IOPMrootDomain::sleepSystem( void ) /* private */ IOReturn IOPMrootDomain::sleepSystemOptions( OSDictionary *options ) { + OSObject *obj = NULL; + OSString *reason = NULL; /* sleepSystem is a public function, and may be called by any kernel driver. * And that's bad - drivers should sleep the system by calling * receivePowerNotification() instead. Drivers should not use sleepSystem. @@ -2038,122 +2162,42 @@ IOReturn IOPMrootDomain::sleepSystemOptions( OSDictionary *options ) * Note that user space app calls to IOPMSleepSystem() will also travel * this code path and thus be correctly identified as software sleeps. */ - + if (options && options->getObject("OSSwitch")) { // Log specific sleep cause for OS Switch hibernation return privateSleepSystem( kIOPMSleepReasonOSSwitchHibernate); - } else { - return privateSleepSystem( kIOPMSleepReasonSoftware); } + + if (options && (obj = options->getObject("Sleep Reason"))) + { + reason = OSDynamicCast(OSString, obj); + if (reason && reason->isEqualTo(kIOPMDarkWakeThermalEmergencyKey)) + return privateSleepSystem(kIOPMSleepReasonDarkWakeThermalEmergency); + } + + return privateSleepSystem( kIOPMSleepReasonSoftware); } /* private */ IOReturn IOPMrootDomain::privateSleepSystem( uint32_t sleepReason ) { - static const char * IOPMSleepReasons[] = { - "", - kIOPMClamshellSleepKey, - kIOPMPowerButtonSleepKey, - kIOPMSoftwareSleepKey, - kIOPMOSSwitchHibernationKey, - kIOPMIdleSleepKey, - kIOPMLowPowerSleepKey, - kIOPMThermalEmergencySleepKey, - kIOPMMaintenanceSleepKey, - kIOPMSleepServiceExitKey, - kIOPMDarkWakeThermalEmergencyKey - }; - - PMEventDetails *details; + /* Called from both gated and non-gated context */ - if (!checkSystemCanSleep()) + if (!checkSystemSleepEnabled() || !pmPowerStateQueue) { - // Record why the system couldn't sleep - details = PMEventDetails::eventDetails(kIOPMEventTypeSleep, NULL, - sleepReason, kIOReturnNotPermitted); - - recordAndReleasePMEvent( details ); - return kIOReturnNotPermitted; - } - - if (kIOPMSleepReasonDarkWakeThermalEmergency == sleepReason) - messageClients(kIOPMMessageDarkWakeThermalEmergency); + recordPMEvent(kIOPMEventTypeSleep, NULL, + sleepReason, kIOReturnNotPermitted); - if (timeline) - timeline->setSleepCycleInProgressFlag(true); - - // Time to publish a UUID for the Sleep --> Wake cycle - if(pmPowerStateQueue) { - pmPowerStateQueue->submitPowerEvent(kPowerEventPublishSleepWakeUUID, (void *)true); - } - - // Log the beginning of system sleep. - details = PMEventDetails::eventDetails(kIOPMEventTypeSleep, NULL, - sleepReason, kIOReturnSuccess); - - recordAndReleasePMEvent( details ); - - // Record sleep cause in IORegistry - lastSleepReason = sleepReason; - sleepReason -= (kIOPMSleepReasonClamshell - 1); - if (sleepReason && (sleepReason < sizeof(IOPMSleepReasons)/sizeof(IOPMSleepReasons[0]))) { - setProperty(kRootDomainSleepReasonKey, IOPMSleepReasons[sleepReason]); + return kIOReturnNotPermitted; } - if (pmPowerStateQueue) - pmPowerStateQueue->submitPowerEvent( + pmPowerStateQueue->submitPowerEvent( kPowerEventPolicyStimulus, - (void *) kStimulusDemandSystemSleep ); - - return kIOReturnSuccess; -} + (void *) kStimulusDemandSystemSleep, + sleepReason); -IOReturn IOPMrootDomain::recordPMEventGated(PMEventDetails *record) -{ - // If we don't have a place to log to, we can't actually - // log anything. Chances are, the person who is asking us to do - // the PM logging has forgotten to set the right bootflags - if(!timeline) return kIOReturnSuccess; - - if(gIOPMWorkLoop->inGate() == false) { - - IOReturn ret = gIOPMWorkLoop->runAction( - OSMemberFunctionCast(IOWorkLoop::Action, this, &IOPMrootDomain::recordPMEventGated), - (OSObject *)this, - (void *)record); - - return ret; - } - else { - // Now that we're guaranteed to be running in gate ... - - // Check the validity of the argument we are given - if(!record) - return kIOReturnBadArgument; - - // Record a driver event, or a system event - if(record->eventClassifier == kIOPMEventClassDriverEvent - || record->eventClassifier == kIOPMEventClassSystemEvent) - return this->recordPMEvent(record); - - else - return kIOReturnBadArgument; - } -} - -IOReturn IOPMrootDomain::recordAndReleasePMEventGated(PMEventDetails *record) -{ - IOReturn ret = kIOReturnBadArgument; - - if (record) - { - ret = recordPMEventGated(record); - record->release(); - } - - return ret; } //****************************************************************************** @@ -2164,8 +2208,6 @@ IOReturn IOPMrootDomain::recordAndReleasePMEventGated(PMEventDetails *record) void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) { - PMEventDetails *details; - ASSERT_GATED(); DLOG("PowerChangeDone: %u->%u\n", (uint32_t) previousPowerState, (uint32_t) getPowerState()); @@ -2176,13 +2218,7 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) if (previousPowerState != ON_STATE) break; - details = PMEventDetails::eventDetails( - kIOPMEventTypeSleepDone, - NULL, - NULL, - kIOReturnSuccess); - - recordAndReleasePMEvent( details ); + recordPMEvent(kIOPMEventTypeSleepDone, NULL, 0, kIOReturnSuccess); // re-enable this timer for next sleep cancelIdleSleepTimer(); @@ -2206,6 +2242,7 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) LOG("System Sleep\n"); #endif + ((IOService *)this)->stop_watchdog_timer(); //14456299 getPlatform()->sleepKernel(); // The CPU(s) are off at this point, @@ -2214,6 +2251,7 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) clock_get_uptime(&systemWakeTime); _highestCapability = 0; + ((IOService *)this)->start_watchdog_timer(); //14456299 #if HIBERNATION IOHibernateSystemWake(); #endif @@ -2239,28 +2277,23 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) // And start logging the wake event here // TODO: Publish the wakeReason string as an integer - details = PMEventDetails::eventDetails( - kIOPMEventTypeWake, - NULL, - 0, - kIOReturnSuccess); - - recordAndReleasePMEvent( details ); - + recordPMEvent(kIOPMEventTypeWake, NULL, 0, kIOReturnSuccess); + #ifndef __LP64__ systemWake(); #endif #if defined(__i386__) || defined(__x86_64__) - wranglerTickled = false; - graphicsSuppressed = false; - darkWakePostTickle = false; - logGraphicsClamp = true; - logWranglerTickle = true; + wranglerTickled = false; + graphicsSuppressed = false; + darkWakePostTickle = false; + darkWakeToSleepASAP = true; + logGraphicsClamp = true; sleepTimerMaintenance = false; + sleepToStandby = false; wranglerTickleLatched = false; - darkWakeThermalAlarm = false; - darkWakeThermalEmergency = false; + userWasActive = false; + fullWakeReason = kFullWakeReasonNone; OSString * wakeType = OSDynamicCast( OSString, getProperty(kIOPMRootDomainWakeTypeKey)); @@ -2271,7 +2304,6 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) { lowBatteryCondition = true; darkWakeMaintenance = true; - darkWakeToSleepASAP = true; } else if ((gDarkWakeFlags & kDarkWakeFlagHIDTickleMask) != 0) { @@ -2283,6 +2315,9 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) { // Hibernate aborted, or EFI brought up graphics wranglerTickled = true; + DLOG("hibernation aborted %d, options 0x%x\n", + hibernateAborted, + hibOptions ? hibOptions->unsigned32BitValue() : 0); } else if (wakeType && ( @@ -2298,7 +2333,6 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) { // SMC standby timer trumps SleepX darkWakeMaintenance = true; - darkWakeToSleepASAP = true; sleepTimerMaintenance = true; } else @@ -2313,15 +2347,16 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) wakeType->isEqualTo(kIOPMRootDomainWakeTypeMaintenance)) { darkWakeMaintenance = true; - darkWakeToSleepASAP = true; } else if (wakeType && wakeType->isEqualTo(kIOPMRootDomainWakeTypeSleepService)) { - darkWakeToSleepASAP = true; -// darkWakeMaintenance = true; // ???? + darkWakeMaintenance = true; darkWakeSleepService = true; + if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) { + sleepToStandby = true; + } } else { @@ -2331,8 +2366,6 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) if (_lastDebugWakeSeconds && (!wakeReason || wakeReason->isEqualTo(""))) wranglerTickled = true; - else - darkWakeToSleepASAP = true; } } else @@ -2341,7 +2374,6 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) wakeType->isEqualTo(kIOPMRootDomainWakeTypeSleepTimer)) { darkWakeMaintenance = true; - darkWakeToSleepASAP = true; sleepTimerMaintenance = true; } else if (hibernateAborted || !wakeType || @@ -2354,12 +2386,15 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) else { darkWakeMaintenance = true; - darkWakeToSleepASAP = true; } } if (wranglerTickled) + { + darkWakeToSleepASAP = false; + fullWakeReason = kFullWakeReasonLocalUser; reportUserInput(); + } else if (!darkWakeMaintenance) { // Early/late tickle for non-maintenance wake. @@ -2374,8 +2409,10 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) #else /* !__i386__ && !__x86_64__ */ // stay awake for at least 30 seconds wranglerTickled = true; + fullWakeReason = kFullWakeReasonLocalUser; startIdleSleepTimer(30); #endif + sleepCnt++; changePowerStateToPriv(ON_STATE); } break; @@ -2383,13 +2420,7 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) case ON_STATE: { if (previousPowerState != ON_STATE) { - details = PMEventDetails::eventDetails( - kIOPMEventTypeWakeDone, - NULL, - 0, - kIOReturnSuccess); - - recordAndReleasePMEvent( details ); + recordPMEvent(kIOPMEventTypeWakeDone, NULL, 0, kIOReturnSuccess); } } break; } @@ -2416,24 +2447,26 @@ IOReturn IOPMrootDomain::requestPowerDomainState ( // updatePreventIdleSleepList // // Called by IOService on PM work loop. +// Returns true if PM policy recognized the driver's desire to prevent idle +// sleep and updated the list of idle sleep preventers. Returns false otherwise //****************************************************************************** -void IOPMrootDomain::updatePreventIdleSleepList( +bool IOPMrootDomain::updatePreventIdleSleepList( IOService * service, bool addNotRemove ) { unsigned int oldCount, newCount; ASSERT_GATED(); +#if defined(__i386__) || defined(__x86_64__) // Disregard disk I/O (anything besides the display wrangler) // as a factor preventing idle sleep,except in the case of legacy disk I/O - if ((gDarkWakeFlags & kDarkWakeFlagIgnoreDiskIOAlways) && addNotRemove && (service != wrangler) && (service != this)) { - return; + return false; } - +#endif oldCount = preventIdleSleepList->getCount(); if (addNotRemove) { @@ -2465,6 +2498,15 @@ void IOPMrootDomain::updatePreventIdleSleepList( changePowerStateTo(SLEEP_STATE); evaluatePolicy( kStimulusNoIdleSleepPreventers ); } + +#if defined(__i386__) || defined(__x86_64__) + if (addNotRemove && (service == wrangler) && !checkSystemCanSustainFullWake()) + { + return false; + } +#endif + + return true; } //****************************************************************************** @@ -2517,6 +2559,7 @@ bool IOPMrootDomain::tellChangeDown( unsigned long stateNum ) if (SLEEP_STATE == stateNum) { + // Legacy apps were already told in the full->dark transition if (!ignoreTellChangeDown) tracePoint( kIOPMTracePointSleepApplications ); else @@ -2565,8 +2608,25 @@ bool IOPMrootDomain::askChangeDown( unsigned long stateNum ) //****************************************************************************** // askChangeDownDone // -// Called by PM after all apps have responded to kIOMessageCanSystemSleep. -// pmconfigd may create a deny sleep assertion before ack'ing. +// An opportunity for root domain to cancel the power transition, +// possibily due to an assertion created by powerd in response to +// kIOMessageCanSystemSleep. +// +// Idle sleep: +// full -> dark wake transition +// 1. Notify apps and powerd with kIOMessageCanSystemSleep +// 2. askChangeDownDone() +// dark -> sleep transition +// 1. Notify powerd with kIOMessageCanSystemSleep +// 2. askChangeDownDone() +// +// Demand sleep: +// full -> dark wake transition +// 1. Notify powerd with kIOMessageCanSystemSleep +// 2. askChangeDownDone() +// dark -> sleep transition +// 1. Notify powerd with kIOMessageCanSystemSleep +// 2. askChangeDownDone() //****************************************************************************** void IOPMrootDomain::askChangeDownDone( @@ -2581,9 +2641,9 @@ void IOPMrootDomain::askChangeDownDone( { // Dark->Sleep transition. // Check if there are any deny sleep assertions. - // Full->Dark transition is never cancelled. + // lastSleepReason already set by handleOurPowerChangeStart() - if (!checkSystemCanSleep(true)) + if (!checkSystemCanSleep(lastSleepReason)) { // Cancel dark wake to sleep transition. // Must re-scan assertions upon entering dark wake. @@ -2622,7 +2682,7 @@ void IOPMrootDomain::tellNoChangeDown( unsigned long stateNum ) startIdleSleepTimer(idleSeconds); } } - else if (sleepSlider && wranglerAsleep) + else if (sleepSlider && !userIsActive) { // Display wrangler is already asleep, it won't trigger the next // idle sleep attempt. Schedule a future idle sleep attempt, and @@ -2667,6 +2727,7 @@ void IOPMrootDomain::tellChangeUp( unsigned long stateNum ) if (getPowerState() == ON_STATE) { // this is a quick wake from aborted sleep + ignoreIdleSleepTimer = false; if (idleSeconds && !wrangler) { // stay awake for at least idleSeconds @@ -2678,12 +2739,15 @@ void IOPMrootDomain::tellChangeUp( unsigned long stateNum ) tracePoint( kIOPMTracePointWakeApplications ); - if (pmStatsAppResponses) + +#if defined(__i386__) || defined(__x86_64__) + if (spindumpDesc) { - setProperty(kIOPMSleepStatisticsAppsKey, pmStatsAppResponses); - pmStatsAppResponses->release(); - pmStatsAppResponses = OSArray::withCapacity(5); + AbsoluteTime deadline; + clock_interval_to_deadline( 30, kSecondScale, &deadline ); + thread_call_enter_delayed(stackshotOffloader, deadline); } +#endif tellClients( kIOMessageSystemHasPoweredOn ); } @@ -2700,7 +2764,7 @@ IOReturn IOPMrootDomain::sysPowerDownHandler( UInt32 messageType, IOService * service, void * messageArgs, vm_size_t argSize ) { - IOReturn ret; + IOReturn ret = 0; DLOG("sysPowerDownHandler message %s\n", getIOMessageString(messageType)); @@ -2737,7 +2801,8 @@ IOReturn IOPMrootDomain::sysPowerDownHandler( { params->maxWaitForReply = kCapabilityClientMaxWait; } - DLOG("sysPowerDownHandler timeout %d s\n", (int) (params->maxWaitForReply / 1000 / 1000)); + DLOG("sysPowerDownHandler max wait %d s\n", + (int) (params->maxWaitForReply / 1000 / 1000)); #endif // Notify platform that sleep has begun, after the early @@ -2756,28 +2821,28 @@ IOReturn IOPMrootDomain::sysPowerDownHandler( clock_interval_to_deadline( 30, kSecondScale, &deadline ); thread_call_enter1_delayed( gRootDomain->diskSyncCalloutEntry, - (thread_call_param_t) params->notifyRef, + (thread_call_param_t)(uintptr_t) params->notifyRef, deadline ); } else thread_call_enter1( gRootDomain->diskSyncCalloutEntry, - (thread_call_param_t) params->notifyRef); + (thread_call_param_t)(uintptr_t) params->notifyRef); } -#if HIBERNATION else if ((params->changeFlags & kIOPMSystemCapabilityDidChange) && (params->toCapabilities & kIOPMSystemCapabilityCPU) && (params->fromCapabilities & kIOPMSystemCapabilityCPU) == 0) { +#if HIBERNATION // We will ack within 110 seconds params->maxWaitForReply = 110 * 1000 * 1000; thread_call_enter1( gRootDomain->diskSyncCalloutEntry, - (thread_call_param_t) params->notifyRef); - } + (thread_call_param_t)(uintptr_t) params->notifyRef); #endif + } ret = kIOReturnSuccess; } @@ -2898,6 +2963,24 @@ void IOPMrootDomain::handlePublishSleepWakeUUID( bool shouldPublish ) } } +//****************************************************************************** +// initializeBootSessionUUID +// +// Initialize the boot session uuid at boot up and sets it into registry. +//****************************************************************************** + +void IOPMrootDomain::initializeBootSessionUUID(void) +{ + uuid_t new_uuid; + uuid_string_t new_uuid_string; + + uuid_generate(new_uuid); + uuid_unparse_upper(new_uuid, new_uuid_string); + memcpy(bootsessionuuid_string, new_uuid_string, sizeof(uuid_string_t)); + + setProperty(kIOPMBootSessionUUIDKey, new_uuid_string); +} + //****************************************************************************** // changePowerStateTo & changePowerStateToPriv // @@ -2955,6 +3038,26 @@ hibernate_should_abort(void) return (0); } +//****************************************************************************** +// willNotifyPowerChildren +// +// Called after all interested drivers have all acknowledged the power change, +// but before any power children is informed. Dispatched though a thread call, +// so it is safe to perform work that might block on a sleeping disk. PM state +// machine (not thread) will block w/o timeout until this function returns. +//****************************************************************************** + +void IOPMrootDomain::willNotifyPowerChildren( IOPMPowerStateIndex newPowerState ) +{ +#if HIBERNATION + if (SLEEP_STATE == newPowerState) + { + IOHibernateSystemSleep(); + IOHibernateIOKitSleep(); + } +#endif +} + //****************************************************************************** // sleepOnClamshellClosed // @@ -2989,7 +3092,7 @@ void IOPMrootDomain::sendClientClamshellNotification( void ) * ( kClamshellStateBit | kClamshellSleepBit ) */ messageClients(kIOPMMessageClamshellStateChange, - (void *) ( (clamshellClosed ? kClamshellStateBit : 0) + (void *)(uintptr_t) ( (clamshellClosed ? kClamshellStateBit : 0) | ( shouldSleepOnClamshellClosed() ? kClamshellSleepBit : 0)) ); } @@ -3769,6 +3872,8 @@ bool IOPMrootDomain::evaluateSystemSleepPolicy( // Current factors based on environment and assertions if (sleepTimerMaintenance) currentFactors |= kIOPMSleepFactorSleepTimerWake; + if (standbyEnabled && sleepToStandby) + currentFactors |= kIOPMSleepFactorSleepTimerWake; if (!clamshellClosed) currentFactors |= kIOPMSleepFactorLidOpen; if (acAdaptorConnected) @@ -3796,10 +3901,17 @@ bool IOPMrootDomain::evaluateSystemSleepPolicy( if (getPMAssertionLevel(kIOPMDriverAssertionMagicPacketWakeEnabledBit) != kIOPMDriverAssertionLevelOff) currentFactors |= kIOPMSleepFactorMagicPacketWakeEnabled; +#if TCPKEEPALIVE + if (getPMAssertionLevel(kIOPMDriverAssertionNetworkKeepAliveActiveBit) != + kIOPMDriverAssertionLevelOff) + currentFactors |= kIOPMSleepFactorNetworkKeepAliveActive; +#endif if (!powerOffEnabled) currentFactors |= kIOPMSleepFactorAutoPowerOffDisabled; if (desktopMode) currentFactors |= kIOPMSleepFactorExternalDisplay; + if (userWasActive) + currentFactors |= kIOPMSleepFactorLocalUserActivity; DLOG("sleep factors 0x%llx\n", currentFactors); @@ -3978,7 +4090,7 @@ void IOPMrootDomain::evaluateSystemSleepPolicyEarly( void ) if (evaluateSystemSleepPolicy(&gEarlySystemSleepParams, kIOPMSleepPhase1, &hibernateMode)) { - if (!hibernateNoDefeat && + if (!hibernateRetry && ((getSleepTypeAttributes(gEarlySystemSleepParams.sleepType) & kIOPMSleepAttributeHibernateSetup) == 0)) { @@ -4024,19 +4136,21 @@ void IOPMrootDomain::evaluateSystemSleepPolicyFinal( void ) kIOPMSleepAttributeHibernateSetup)) { // Final evaluation picked a state requiring hibernation, - // but hibernate setup was skipped. Retry using the early - // sleep parameters. + // but hibernate setup was skipped. Arm a short sleep using + // the early non-hibernate sleep parameters. + // Set hibernateRetry flag to force hibernate setup on the + // next sleep. bcopy(&gEarlySystemSleepParams, ¶ms, sizeof(params)); params.sleepType = kIOPMSleepTypeAbortedSleep; params.ecWakeTimer = 1; - hibernateNoDefeat = true; + hibernateRetry = true; DLOG("wake in %u secs for hibernateDisabled %d, hibernateAborted %d\n", params.ecWakeTimer, hibernateDisabled, hibernateAborted); } else { - hibernateNoDefeat = false; + hibernateRetry = false; } paramsData = OSData::withBytes(¶ms, sizeof(params)); @@ -4167,7 +4281,7 @@ platformHaltRestartApplier( OSObject * object, void * context ) ctx = (HaltRestartApplierContext *) context; memset(¬ify, 0, sizeof(notify)); - notify.powerRef = (void *)ctx->Counter; + notify.powerRef = (void *)(uintptr_t)ctx->Counter; notify.returnValue = 0; notify.stateNumber = ctx->PowerState; notify.stateFlags = ctx->PowerFlags; @@ -4190,7 +4304,7 @@ platformHaltRestartApplier( OSObject * object, void * context ) LOG("%s handler %p took %u ms\n", (ctx->MessageType == kIOMessageSystemWillPowerOff) ? "PowerOff" : (ctx->MessageType == kIOMessageSystemPagingOff) ? "PagingOff" : "Restart", - notifier->handler, (uint32_t) deltaTime ); + OBFUSCATE(notifier->handler), (uint32_t) deltaTime ); } } @@ -4380,6 +4494,11 @@ void IOPMrootDomain::tagPowerPlaneService( OSMemberFunctionCast( IOPMActionActivityTickle, this, &IOPMrootDomain::handleActivityTickleForDisplayWrangler); + + actions->actionUpdatePowerClient = + OSMemberFunctionCast( + IOPMActionUpdatePowerClient, this, + &IOPMrootDomain::handleUpdatePowerClientForDisplayWrangler); } return; } @@ -4432,45 +4551,36 @@ void IOPMrootDomain::tagPowerPlaneService( //****************************************************************************** void IOPMrootDomain::overrideOurPowerChange( - IOService * service, - IOPMActions * actions, - unsigned long * inOutPowerState, - uint32_t * inOutChangeFlags ) + IOService * service, + IOPMActions * actions, + IOPMPowerStateIndex * inOutPowerState, + IOPMPowerChangeFlags * inOutChangeFlags, + IOPMRequestTag requestTag ) { - uint32_t powerState = (uint32_t) *inOutPowerState; - uint32_t changeFlags = *inOutChangeFlags; - uint32_t currentPowerState = (uint32_t) getPowerState(); + uint32_t powerState = (uint32_t) *inOutPowerState; + uint32_t changeFlags = *inOutChangeFlags; + uint32_t currentPowerState = (uint32_t) getPowerState(); if (changeFlags & kIOPMParentInitiated) { - // FIXME: cancel any parent change (unexpected) // Root parent is permanently pegged at max power, - // kIOPMParentInitiated is unexpected. + // a parent initiated power change is unexpected. + *inOutChangeFlags |= kIOPMNotDone; return; } if (powerState < currentPowerState) { - if ((changeFlags & kIOPMSkipAskPowerDown) == 0) - { - /* Convenient place to run any code at idle sleep time - * IOPMrootDomain initiates an idle sleep here - * - * Set last sleep cause accordingly. - */ - pmPowerStateQueue->submitPowerEvent(kPowerEventPublishSleepWakeUUID, (void *)true); - - lastSleepReason = kIOPMSleepReasonIdle; - setProperty(kRootDomainSleepReasonKey, kIOPMIdleSleepKey); - } if (CAP_CURRENT(kIOPMSystemCapabilityGraphics)) { // Root domain is dropping power state ON->SLEEP. - // If system is in full wake, first drop to dark wake. + // If system is in full wake, first drop to dark wake by + // converting the power state transitions to a capability + // change transition. darkWakeToSleepASAP = true; - // Drop graphics capability. + // Drop graphics and audio capability. // No transition if system is already in dark wake. _desiredCapability &= ~( @@ -4485,7 +4595,7 @@ void IOPMrootDomain::overrideOurPowerChange( } else { - // Broadcast power down + // Broadcast root power down *inOutChangeFlags |= kIOPMRootChangeDown; } } @@ -4501,13 +4611,16 @@ void IOPMrootDomain::overrideOurPowerChange( } void IOPMrootDomain::handleOurPowerChangeStart( - IOService * service, - IOPMActions * actions, - uint32_t powerState, - uint32_t * inOutChangeFlags ) + IOService * service, + IOPMActions * actions, + IOPMPowerStateIndex powerState, + IOPMPowerChangeFlags * inOutChangeFlags, + IOPMRequestTag requestTag ) { - uint32_t changeFlags = *inOutChangeFlags; - uint32_t currentPowerState = (uint32_t) getPowerState(); + uint32_t changeFlags = *inOutChangeFlags; + uint32_t currentPowerState = (uint32_t) getPowerState(); + uint32_t sleepReason = requestTag ? requestTag : kIOPMSleepReasonIdle; + bool publishSleepReason = false; _systemTransitionType = kSystemTransitionNone; _systemMessageClientMask = 0; @@ -4541,13 +4654,16 @@ void IOPMrootDomain::handleOurPowerChangeStart( _desiredCapability = kIOPMSystemCapabilityCPU | kIOPMSystemCapabilityNetwork; - // Check for early HID events (e.g. LID open) - if (wranglerTickled) + // Early exit from dark wake to full (e.g. LID open) + if (kFullWakeReasonNone != fullWakeReason) { _desiredCapability |= ( kIOPMSystemCapabilityGraphics | kIOPMSystemCapabilityAudio ); } +#if HIBERNATION + IOHibernateSetWakeCapabilities(_desiredCapability); +#endif } // Update pending wake capability at the beginning of every @@ -4578,8 +4694,6 @@ void IOPMrootDomain::handleOurPowerChangeStart( if (__builtin_popcount(_pendingCapability) < __builtin_popcount(_currentCapability)) capabilityLoss = true; - if (CAP_LOSS(kIOPMSystemCapabilityGraphics)) - rejectWranglerTickle = true; } // 1. Capability change. @@ -4590,15 +4704,15 @@ void IOPMrootDomain::handleOurPowerChangeStart( if (CAP_GAIN(kIOPMSystemCapabilityGraphics)) { tracePoint( kIOPMTracePointDarkWakeExit ); - wranglerSleepIgnored = false; - sleepTimerMaintenance = false; - hibernateNoDefeat = false; - _systemMessageClientMask = kSystemMessageClientUser; - if ((_highestCapability & kIOPMSystemCapabilityGraphics) == 0) - _systemMessageClientMask |= kSystemMessageClientKernel; - IOService::setAdvisoryTickleEnable( true ); - tellClients(kIOMessageSystemWillPowerOn); + if (pmStatsAppResponses) + { + setProperty(kIOPMSleepStatisticsAppsKey, pmStatsAppResponses); + pmStatsAppResponses->release(); + pmStatsAppResponses = OSArray::withCapacity(5); + } + + willEnterFullWake(); } // Full to Dark transition. @@ -4606,8 +4720,16 @@ void IOPMrootDomain::handleOurPowerChangeStart( { tracePoint( kIOPMTracePointDarkWakeEntry ); *inOutChangeFlags |= kIOPMSyncTellPowerDown; - _systemMessageClientMask = kSystemMessageClientUser; + _systemMessageClientMask = kSystemMessageClientPowerd | + kSystemMessageClientLegacyApp; IOService::setAdvisoryTickleEnable( false ); + + // Publish the sleep reason for full to dark wake + publishSleepReason = true; + lastSleepReason = fullToDarkReason = sleepReason; + + // Publish a UUID for the Sleep --> Wake cycle + handlePublishSleepWakeUUID(true); } } @@ -4617,43 +4739,85 @@ void IOPMrootDomain::handleOurPowerChangeStart( { // Beginning of a system sleep transition. // Cancellation is still possible. - tracePoint( kIOPMTracePointSleepStarted, lastSleepReason ); + tracePoint( kIOPMTracePointSleepStarted, sleepReason ); _systemMessageClientMask = kSystemMessageClientAll; if ((_currentCapability & kIOPMSystemCapabilityGraphics) == 0) - _systemMessageClientMask &= ~kSystemMessageClientApp; + _systemMessageClientMask &= ~kSystemMessageClientLegacyApp; if ((_highestCapability & kIOPMSystemCapabilityGraphics) == 0) _systemMessageClientMask &= ~kSystemMessageClientKernel; + // Record the reason for dark wake back to sleep + // System may not have ever achieved full wake + + publishSleepReason = true; + lastSleepReason = sleepReason; + + if (timeline) + timeline->setSleepCycleInProgressFlag(true); + + recordPMEvent(kIOPMEventTypeSleep, NULL, sleepReason, kIOReturnSuccess); + // Optimization to ignore wrangler power down thus skipping // the disk spindown and arming the idle timer for demand sleep. if (changeFlags & kIOPMIgnoreChildren) { - wranglerSleepIgnored = true; + ignoreIdleSleepTimer = true; } - - logWranglerTickle = false; } // 3. System wake. else if (kSystemTransitionWake == _systemTransitionType) { - wranglerSleepIgnored = false; + ignoreIdleSleepTimer = false; + tracePoint( kIOPMTracePointWakeWillPowerOnClients ); + if (pmStatsAppResponses) + { + setProperty(kIOPMSleepStatisticsAppsKey, pmStatsAppResponses); + pmStatsAppResponses->release(); + pmStatsAppResponses = OSArray::withCapacity(5); + } if (_pendingCapability & kIOPMSystemCapabilityGraphics) { - _systemMessageClientMask = kSystemMessageClientAll; - IOService::setAdvisoryTickleEnable( true ); + willEnterFullWake(); } else { - _systemMessageClientMask = kSystemMessageClientConfigd; + // Message powerd only + _systemMessageClientMask = kSystemMessageClientPowerd; + tellClients(kIOMessageSystemWillPowerOn); } + } - tracePoint( kIOPMTracePointWakeWillPowerOnClients ); - tellClients(kIOMessageSystemWillPowerOn); + // The only location where the sleep reason is published. At this point + // sleep can still be cancelled, but sleep reason should be published + // early for logging purposes. + + if (publishSleepReason) + { + static const char * IOPMSleepReasons[] = + { + kIOPMClamshellSleepKey, + kIOPMPowerButtonSleepKey, + kIOPMSoftwareSleepKey, + kIOPMOSSwitchHibernationKey, + kIOPMIdleSleepKey, + kIOPMLowPowerSleepKey, + kIOPMThermalEmergencySleepKey, + kIOPMMaintenanceSleepKey, + kIOPMSleepServiceExitKey, + kIOPMDarkWakeThermalEmergencyKey + }; + + // Record sleep cause in IORegistry + uint32_t reasonIndex = sleepReason - kIOPMSleepReasonClamshell; + if (reasonIndex < sizeof(IOPMSleepReasons)/sizeof(IOPMSleepReasons[0])) { + DLOG("sleep reason %s\n", IOPMSleepReasons[reasonIndex]); + setProperty(kRootDomainSleepReasonKey, IOPMSleepReasons[reasonIndex]); + } } if ((kSystemTransitionNone != _systemTransitionType) && @@ -4664,7 +4828,7 @@ void IOPMrootDomain::handleOurPowerChangeStart( DLOG("=== START (%u->%u, 0x%x) type %u, gen %u, msg %x, " "dcp %x:%x:%x\n", - currentPowerState, powerState, *inOutChangeFlags, + currentPowerState, (uint32_t) powerState, *inOutChangeFlags, _systemTransitionType, _systemStateGeneration, _systemMessageClientMask, _desiredCapability, _currentCapability, _pendingCapability); @@ -4672,10 +4836,11 @@ void IOPMrootDomain::handleOurPowerChangeStart( } void IOPMrootDomain::handleOurPowerChangeDone( - IOService * service, - IOPMActions * actions, - uint32_t powerState, - uint32_t changeFlags ) + IOService * service, + IOPMActions * actions, + IOPMPowerStateIndex powerState, + IOPMPowerChangeFlags changeFlags, + IOPMRequestTag requestTag __unused ) { if (kSystemTransitionNewCapClient == _systemTransitionType) { @@ -4693,13 +4858,13 @@ void IOPMrootDomain::handleOurPowerChangeDone( _pendingCapability = _currentCapability; lastSleepReason = 0; - if (((_currentCapability & kIOPMSystemCapabilityGraphics) == 0) && - (_currentCapability & kIOPMSystemCapabilityCPU)) + if (!CAP_CURRENT(kIOPMSystemCapabilityGraphics) && + CAP_CURRENT(kIOPMSystemCapabilityCPU)) { pmPowerStateQueue->submitPowerEvent( kPowerEventPolicyStimulus, (void *) kStimulusDarkWakeReentry, - _systemStateGeneration ); + _systemStateGeneration ); } // Revert device desire to max. @@ -4714,20 +4879,16 @@ void IOPMrootDomain::handleOurPowerChangeDone( { if (CAP_GAIN(kIOPMSystemCapabilityGraphics)) { + lastSleepReason = 0; // stop logging wrangler tickles tellClients(kIOMessageSystemHasPoweredOn); -#if DARK_TO_FULL_EVALUATE_CLAMSHELL - // Re-evaluate clamshell state ourselves when graphics - // will not get kIOMessageSystemHasPoweredOn. - - if (clamshellClosed && - ((_systemMessageClientMask & kSystemMessageClientKernel) == 0)) - { - receivePowerNotification( kLocalEvalClamshellCommand ); - } -#endif } if (CAP_LOSS(kIOPMSystemCapabilityGraphics)) + { + // Going dark, reset full wake state + // userIsActive will be cleared by wrangler powering down wranglerTickled = false; + fullWakeReason = kFullWakeReasonNone; + } } // Reset state after exiting from dark wake. @@ -4738,8 +4899,16 @@ void IOPMrootDomain::handleOurPowerChangeDone( darkWakeMaintenance = false; darkWakeToSleepASAP = false; pciCantSleepValid = false; - rejectWranglerTickle = false; darkWakeSleepService = false; + + if (CAP_LOSS(kIOPMSystemCapabilityCPU)) + { + // Remove the influence of display power assertion + // before next system wake. + if (wrangler) wrangler->changePowerStateForRootDomain( + kWranglerPowerStateMin ); + removeProperty(gIOPMUserTriggeredFullWakeKey); + } } // Entered dark mode. @@ -4747,6 +4916,7 @@ void IOPMrootDomain::handleOurPowerChangeDone( if (((_pendingCapability & kIOPMSystemCapabilityGraphics) == 0) && (_pendingCapability & kIOPMSystemCapabilityCPU)) { +#if DISABLE_SLEEP_ASAP_FOR_NETWORK_WAKE if (((gDarkWakeFlags & kDarkWakeFlagIgnoreDiskIOInDark) == 0) && (kSystemTransitionWake == _systemTransitionType) && (_lastDebugWakeSeconds == 0)) @@ -4768,7 +4938,7 @@ void IOPMrootDomain::handleOurPowerChangeDone( prop->release(); } } - +#endif // Queue an evaluation of whether to remain in dark wake, // and for how long. This serves the purpose of draining // any assertions from the queue. @@ -4782,14 +4952,33 @@ void IOPMrootDomain::handleOurPowerChangeDone( DLOG("=== FINISH (%u->%u, 0x%x) type %u, gen %u, msg %x, " "dcp %x:%x:%x, dbgtimer %u\n", - currentPowerState, powerState, changeFlags, + currentPowerState, (uint32_t) powerState, changeFlags, _systemTransitionType, _systemStateGeneration, _systemMessageClientMask, _desiredCapability, _currentCapability, _pendingCapability, _lastDebugWakeSeconds); - // Update current system capability. + if (_pendingCapability & kIOPMSystemCapabilityGraphics) + { + displayWakeCnt++; +#if DARK_TO_FULL_EVALUATE_CLAMSHELL + if (clamshellExists && fullWakeThreadCall && + CAP_HIGHEST(kIOPMSystemCapabilityGraphics)) + { + // Not the initial graphics full power, graphics won't + // send a power notification to trigger a lid state + // evaluation. + AbsoluteTime deadline; + clock_interval_to_deadline(45, kSecondScale, &deadline); + thread_call_enter_delayed(fullWakeThreadCall, deadline); + } +#endif + } + else if (CAP_GAIN(kIOPMSystemCapabilityCPU)) + darkWakeCnt++; + + // Update current system capability. if (_currentCapability != _pendingCapability) _currentCapability = _pendingCapability; @@ -4816,20 +5005,6 @@ void IOPMrootDomain::handleOurPowerChangeDone( { setProperty(kIOPMSystemCapabilitiesKey, _currentCapability, 64); tracePoint( kIOPMTracePointSystemUp, 0 ); - - // kIOPMDWOverTemp notification handling was postponed - if (darkWakeThermalAlarm) - { - if (!wranglerTickled && !darkWakeThermalEmergency && - CAP_CURRENT(kIOPMSystemCapabilityCPU) && - !CAP_CURRENT(kIOPMSystemCapabilityGraphics)) - { - darkWakeThermalEmergency = true; - privateSleepSystem(kIOPMSleepReasonDarkWakeThermalEmergency); - MSG("DarkWake thermal limits breached. Going to sleep!\n"); - } - darkWakeThermalAlarm = false; - } } _systemTransitionType = kSystemTransitionNone; @@ -4844,10 +5019,10 @@ void IOPMrootDomain::handleOurPowerChangeDone( //****************************************************************************** void IOPMrootDomain::overridePowerChangeForUIService( - IOService * service, - IOPMActions * actions, - unsigned long * inOutPowerState, - uint32_t * inOutChangeFlags ) + IOService * service, + IOPMActions * actions, + IOPMPowerStateIndex * inOutPowerState, + IOPMPowerChangeFlags * inOutChangeFlags ) { uint32_t powerState = (uint32_t) *inOutPowerState; uint32_t changeFlags = (uint32_t) *inOutChangeFlags; @@ -4885,7 +5060,7 @@ void IOPMrootDomain::overridePowerChangeForUIService( if (actions->parameter & kPMActionsFlagLimitPower) { DLOG("+ plimit %s %p\n", - service->getName(), service); + service->getName(), OBFUSCATE(service)); } } else @@ -4908,7 +5083,7 @@ void IOPMrootDomain::overridePowerChangeForUIService( if ((actions->parameter & kPMActionsFlagLimitPower) == 0) { DLOG("- plimit %s %p\n", - service->getName(), service); + service->getName(), OBFUSCATE(service)); } } @@ -4921,18 +5096,21 @@ void IOPMrootDomain::overridePowerChangeForUIService( // Enforce limit for system power/cap transitions. maxPowerState = 0; - if ((actions->parameter & kPMActionsFlagIsDisplayWrangler) && - (service->getPowerState() > 0)) + if ((service->getPowerState() > maxPowerState) && + (actions->parameter & kPMActionsFlagIsDisplayWrangler)) { - // Forces a 3->1 transition sequence - if (changeFlags & kIOPMDomainWillChange) - maxPowerState = 3; - else - maxPowerState = 1; + maxPowerState++; + + // Remove lingering effects of any tickle before entering + // dark wake. It will take a new tickle to return to full + // wake, so the existing tickle state is useless. + + if (changeFlags & kIOPMDomainDidChange) + *inOutChangeFlags |= kIOPMExpireIdleTimer; } else if (actions->parameter & kPMActionsFlagIsGraphicsDevice) { - maxPowerState = 1; + maxPowerState++; } } else @@ -4946,7 +5124,7 @@ void IOPMrootDomain::overridePowerChangeForUIService( if (powerState > maxPowerState) { DLOG("> plimit %s %p (%u->%u, 0x%x)\n", - service->getName(), service, powerState, maxPowerState, + service->getName(), OBFUSCATE(service), powerState, maxPowerState, changeFlags); *inOutPowerState = maxPowerState; @@ -4971,8 +5149,9 @@ void IOPMrootDomain::overridePowerChangeForUIService( clock_get_uptime(&now); SUB_ABSOLUTETIME(&now, &systemWakeTime); absolutetime_to_nanoseconds(now, &nsec); - MSG("Graphics suppressed %u ms\n", - ((int)((nsec) / 1000000ULL))); + if (kIOLogPMRootDomain & gIOKitDebug) + MSG("Graphics suppressed %u ms\n", + ((int)((nsec) / 1000000ULL))); } graphicsSuppressed = true; } @@ -4989,14 +5168,13 @@ void IOPMrootDomain::handleActivityTickleForDisplayWrangler( assert(service == wrangler); - if (service == wrangler) - { - bool aborting = ((lastSleepReason == kIOPMSleepReasonIdle) - || (lastSleepReason == kIOPMSleepReasonMaintenance)); - if (aborting) { - userActivityCount++; - DLOG("display wrangler tickled1 %d lastSleepReason %d\n", userActivityCount, lastSleepReason); - } + clock_get_uptime(&userActivityTime); + bool aborting = ((lastSleepReason == kIOPMSleepReasonIdle) + || (lastSleepReason == kIOPMSleepReasonMaintenance)); + if (aborting) { + userActivityCount++; + DLOG("display wrangler tickled1 %d lastSleepReason %d\n", + userActivityCount, lastSleepReason); } if (!wranglerTickled && @@ -5015,6 +5193,49 @@ void IOPMrootDomain::handleActivityTickleForDisplayWrangler( } } +void IOPMrootDomain::handleUpdatePowerClientForDisplayWrangler( + IOService * service, + IOPMActions * actions, + const OSSymbol * powerClient, + IOPMPowerStateIndex oldPowerState, + IOPMPowerStateIndex newPowerState ) +{ + assert(service == wrangler); + + // This function implements half of the user activity detection. + // User is active if: + // 1. DeviceDesire increases to max, + // and wrangler already in max power state + // (no power state change, caught by this routine) + // + // 2. Power change to max, and DeviceDesire is at max. + // (wrangler must reset DeviceDesire before system sleep) + // + // User is inactive if: + // 1. DeviceDesire drops to sleep state or below + + DLOG("wrangler %s (%u, %u->%u)\n", + powerClient->getCStringNoCopy(), + (uint32_t) service->getPowerState(), + (uint32_t) oldPowerState, (uint32_t) newPowerState); + + if (powerClient == gIOPMPowerClientDevice) + { + if ((newPowerState > oldPowerState) && + (newPowerState == kWranglerPowerStateMax) && + (service->getPowerState() == kWranglerPowerStateMax)) + { + evaluatePolicy( kStimulusUserIsActive ); + } + else + if ((newPowerState < oldPowerState) && + (newPowerState <= kWranglerPowerStateSleep)) + { + evaluatePolicy( kStimulusUserIsInactive ); + } + } +} + //****************************************************************************** // Approve usage of delayed child notification by PM. //****************************************************************************** @@ -5023,7 +5244,7 @@ bool IOPMrootDomain::shouldDelayChildNotification( IOService * service ) { if (((gDarkWakeFlags & kDarkWakeFlagHIDTickleMask) != 0) && - !wranglerTickled && + (kFullWakeReasonNone == fullWakeReason) && (kSystemTransitionWake == _systemTransitionType)) { DLOG("%s: delay child notify\n", service->getName()); @@ -5037,10 +5258,10 @@ bool IOPMrootDomain::shouldDelayChildNotification( //****************************************************************************** void IOPMrootDomain::handlePowerChangeStartForPCIDevice( - IOService * service, - IOPMActions * actions, - uint32_t powerState, - uint32_t * inOutChangeFlags ) + IOService * service, + IOPMActions * actions, + IOPMPowerStateIndex powerState, + IOPMPowerChangeFlags * inOutChangeFlags ) { pmTracer->tracePCIPowerChange( PMTraceWorker::kPowerChangeStart, @@ -5049,10 +5270,10 @@ void IOPMrootDomain::handlePowerChangeStartForPCIDevice( } void IOPMrootDomain::handlePowerChangeDoneForPCIDevice( - IOService * service, - IOPMActions * actions, - uint32_t powerState, - uint32_t changeFlags ) + IOService * service, + IOPMActions * actions, + IOPMPowerStateIndex powerState, + IOPMPowerChangeFlags changeFlags ) { pmTracer->tracePCIPowerChange( PMTraceWorker::kPowerChangeCompleted, @@ -5180,7 +5401,9 @@ bool IOPMrootDomain::systemMessageFilter( } // Capability client will always see kIOMessageCanSystemSleep, - // even for demand sleep. + // even for demand sleep. It will also have a chance to veto + // sleep one last time after all clients have responded to + // kIOMessageSystemWillSleep if ((kIOMessageCanSystemSleep == context->messageType) || (kIOMessageSystemWillNotSleep == context->messageType)) @@ -5198,7 +5421,16 @@ bool IOPMrootDomain::systemMessageFilter( } } - // Reject capability change messages for legacy clients. + if (kIOPMMessageLastCallBeforeSleep == context->messageType) + { + if ((object == (OSObject *) systemCapabilityNotifier) && + CAP_HIGHEST(kIOPMSystemCapabilityGraphics) && + (fullToDarkReason == kIOPMSleepReasonIdle)) + allow = true; + break; + } + + // Reject capability change messages for legacy clients. // Reject legacy system sleep messages for capability client. if (isCapMsg || (object == (OSObject *) systemCapabilityNotifier)) @@ -5209,7 +5441,7 @@ bool IOPMrootDomain::systemMessageFilter( // Filter system sleep messages. if ((context->notifyType == kNotifyApps) && - (_systemMessageClientMask & kSystemMessageClientApp)) + (_systemMessageClientMask & kSystemMessageClientLegacyApp)) { allow = true; } @@ -5245,7 +5477,7 @@ IOReturn IOPMrootDomain::setMaintenanceWakeCalendar( const IOPMCalendarStruct * calendar ) { OSData * data; - IOReturn ret; + IOReturn ret = 0; if (!calendar) return kIOReturnBadArgument; @@ -5300,35 +5532,38 @@ IOReturn IOPMrootDomain::displayWranglerNotification( displayPowerState = params->stateNumber; DLOG("DisplayWrangler message 0x%x, power state %d\n", - (uint32_t) messageType, displayPowerState); + (uint32_t) messageType, displayPowerState); switch (messageType) { case kIOMessageDeviceWillPowerOff: - // Display wrangler has dropped power due to display idle // or force system sleep. // - // 4 Display ON - // 3 Display Dim - // 2 Display Sleep + // 4 Display ON kWranglerPowerStateMax + // 3 Display Dim kWranglerPowerStateDim + // 2 Display Sleep kWranglerPowerStateSleep // 1 Not visible to user - // 0 Not visible to user - - if (displayPowerState > 2) - break; + // 0 Not visible to user kWranglerPowerStateMin - gRootDomain->evaluatePolicy( kStimulusDisplayWranglerSleep ); + if (displayPowerState <= kWranglerPowerStateSleep) + gRootDomain->evaluatePolicy( kStimulusDisplayWranglerSleep ); break; case kIOMessageDeviceHasPoweredOn: - // Display wrangler has powered on due to user activity // or wake from sleep. - if ( 4 != displayPowerState ) - break; + if (kWranglerPowerStateMax == displayPowerState) + { + gRootDomain->evaluatePolicy( kStimulusDisplayWranglerWake ); - gRootDomain->evaluatePolicy( kStimulusDisplayWranglerWake ); + // See comment in handleUpdatePowerClientForDisplayWrangler + if (service->getPowerStateForClient(gIOPMPowerClientDevice) == + kWranglerPowerStateMax) + { + gRootDomain->evaluatePolicy( kStimulusUserIsActive ); + } + } break; } #endif @@ -5359,6 +5594,42 @@ bool IOPMrootDomain::displayWranglerMatchPublished( return true; } +#if defined(__i386__) || defined(__x86_64__) + +bool IOPMrootDomain::IONVRAMMatchPublished( + void * target, + void * refCon, + IOService * newService, + IONotifier * notifier) +{ + unsigned int len = 0; + IOPMrootDomain *rd = (IOPMrootDomain *)target; + + if (PEReadNVRAMProperty(kIOSleepWakeDebugKey, NULL, &len)) + { + rd->swd_flags |= SWD_BOOT_BY_WDOG; + MSG("System was rebooted due to Sleep/Wake failure\n"); + + if ( (rd->swd_logBufMap = rd->sleepWakeDebugRetrieve()) != NULL) { + rd->swd_flags |= SWD_VALID_LOGS; + } + } + if (notifier) notifier->remove(); + return true; +} + +#else +bool IOPMrootDomain::IONVRAMMatchPublished( + void * target, + void * refCon, + IOService * newService, + IONotifier * notifier __unused) +{ + return false; +} + +#endif + //****************************************************************************** // reportUserInput // @@ -5385,7 +5656,7 @@ void IOPMrootDomain::reportUserInput( void ) } //****************************************************************************** -// blockDisplayWranglerTickle +// latchDisplayWranglerTickle //****************************************************************************** bool IOPMrootDomain::latchDisplayWranglerTickle( bool latch ) @@ -5420,6 +5691,21 @@ bool IOPMrootDomain::latchDisplayWranglerTickle( bool latch ) #endif } +//****************************************************************************** +// setDisplayPowerOn +// +// For root domain user client +//****************************************************************************** + +void IOPMrootDomain::setDisplayPowerOn( uint32_t options ) +{ + if (checkSystemCanSustainFullWake()) + { + pmPowerStateQueue->submitPowerEvent( kPowerEventSetDisplayPowerOn, + (void *) 0, options ); + } +} + // MARK: - // MARK: Battery @@ -5449,11 +5735,12 @@ bool IOPMrootDomain::batteryPublished( // MARK: System PM Policy //****************************************************************************** -// checkSystemCanSleep +// checkSystemSleepAllowed // //****************************************************************************** -bool IOPMrootDomain::checkSystemCanSleep( IOOptionBits options ) +bool IOPMrootDomain::checkSystemSleepAllowed( IOOptionBits options, + uint32_t sleepReason ) { int err = 0; @@ -5466,7 +5753,7 @@ bool IOPMrootDomain::checkSystemCanSleep( IOOptionBits options ) break; } - if (systemBooting || systemShutdown) + if (systemBooting || systemShutdown || gWillShutdown) { err = 2; // 2. restart or shutdown in progress break; @@ -5489,7 +5776,7 @@ bool IOPMrootDomain::checkSystemCanSleep( IOOptionBits options ) break; // always sleep on low battery } - if(darkWakeThermalEmergency) + if (sleepReason == kIOPMSleepReasonDarkWakeThermalEmergency) { break; // always sleep on dark wake thermal emergencies } @@ -5515,7 +5802,7 @@ bool IOPMrootDomain::checkSystemCanSleep( IOOptionBits options ) } else if (sleepSupportedPEFunction && CAP_HIGHEST(kIOPMSystemCapabilityGraphics)) - { + { IOReturn ret; OSBitAndAtomic(~kPCICantSleep, &platformSleepSupport); ret = getPlatform()->callPlatformFunction( @@ -5542,6 +5829,17 @@ bool IOPMrootDomain::checkSystemCanSleep( IOOptionBits options ) return true; } +bool IOPMrootDomain::checkSystemSleepEnabled( void ) +{ + return checkSystemSleepAllowed(0, 0); +} + +bool IOPMrootDomain::checkSystemCanSleep( uint32_t sleepReason ) +{ + ASSERT_GATED(); + return checkSystemSleepAllowed(1, sleepReason); +} + //****************************************************************************** // checkSystemCanSustainFullWake //****************************************************************************** @@ -5556,11 +5854,21 @@ bool IOPMrootDomain::checkSystemCanSustainFullWake( void ) return false; } - if (clamshellExists && clamshellClosed && !acAdaptorConnected && - !clamshellSleepDisabled) + if (clamshellExists && clamshellClosed && !clamshellSleepDisabled) { - // Lid closed on battery power - return false; + if (!acAdaptorConnected) + { + DLOG("full wake check: no AC\n"); + return false; + } + + if (CAP_CURRENT(kIOPMSystemCapabilityGraphics) && + !desktopMode && !clamshellDisabled) + { + // No external display + DLOG("full wake check: no ext display\n"); + return false; + } } #endif return true; @@ -5583,7 +5891,7 @@ void IOPMrootDomain::adjustPowerState( bool sleepASAP ) ASSERT_GATED(); - if ((sleepSlider == 0) || !checkSystemCanSleep()) + if ((sleepSlider == 0) || !checkSystemSleepEnabled()) { changePowerStateToPriv(ON_STATE); } @@ -5630,6 +5938,15 @@ void IOPMrootDomain::dispatchPowerEvent( break; } + if (swd_flags & SWD_VALID_LOGS) { + sleepWakeDebugDump(swd_logBufMap); + swd_logBufMap->release(); + swd_logBufMap = 0; + } + else if (swd_flags & SWD_BOOT_BY_WDOG) { + // If logs are invalid, write the failure code + sleepWakeDebugDump(NULL); + } // If lid is closed, re-send lid closed notification // now that booting is complete. if ( clamshellClosed ) @@ -5637,6 +5954,7 @@ void IOPMrootDomain::dispatchPowerEvent( handlePowerNotification(kLocalEvalClamshellCommand); } evaluatePolicy( kStimulusAllowSystemSleepChanged ); + } break; @@ -5730,6 +6048,36 @@ void IOPMrootDomain::dispatchPowerEvent( case kPowerEventSuspendClient: handleSuspendPMNotificationClient((uintptr_t)arg0, (bool)arg1); break; + + case kPowerEventSetDisplayPowerOn: + if (!wrangler) break; + if (arg1 != 0) + { + // Force wrangler to max power state. If system is in dark wake + // this alone won't raise the wrangler's power state. + + wrangler->changePowerStateForRootDomain(kWranglerPowerStateMax); + + // System in dark wake, always requesting full wake should + // not have any bad side-effects, even if the request fails. + + if (!CAP_CURRENT(kIOPMSystemCapabilityGraphics)) + { + setProperty(kIOPMRootDomainWakeTypeKey, kIOPMRootDomainWakeTypeNotification); + requestFullWake( kFullWakeReasonDisplayOn ); + } + } + else + { + // Relenquish desire to power up display. + // Must first transition to state 1 since wrangler doesn't + // power off the displays at state 0. At state 0 the root + // domain is removed from the wrangler's power client list. + + wrangler->changePowerStateForRootDomain(kWranglerPowerStateMin + 1); + wrangler->changePowerStateForRootDomain(kWranglerPowerStateMin); + } + break; } } @@ -5821,7 +6169,7 @@ exit: IOReturn IOPMrootDomain::receivePowerNotification( UInt32 msg ) { pmPowerStateQueue->submitPowerEvent( - kPowerEventReceivedPowerNotification, (void *) msg ); + kPowerEventReceivedPowerNotification, (void *)(uintptr_t) msg ); return kIOReturnSuccess; } @@ -5848,25 +6196,16 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) privateSleepSystem (kIOPMSleepReasonThermalEmergency); } + /* + * Sleep if system is in dark wake + */ if (msg & kIOPMDWOverTemp) { - if (!CAP_CURRENT(kIOPMSystemCapabilityCPU) || - (_systemTransitionType == kSystemTransitionSleep) || - (_systemTransitionType == kSystemTransitionWake) || - (_systemTransitionType == kSystemTransitionCapability)) - { - // During early wake or when system capability is changing, - // set flag and take action at end of transition. - darkWakeThermalAlarm = true; - } - else if (!wranglerTickled && !darkWakeThermalEmergency && - !CAP_CURRENT(kIOPMSystemCapabilityGraphics)) - { - // System in steady state and in dark wake - darkWakeThermalEmergency = true; - privateSleepSystem(kIOPMSleepReasonDarkWakeThermalEmergency); - MSG("DarkWake thermal limits breached. Going to sleep!\n"); - } + DLOG("DarkWake thermal limits message received!\n"); + + // Inform cap client that we're going to sleep + messageClients(kIOPMMessageDarkWakeThermalEmergency); + } /* @@ -5950,10 +6289,7 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) sendClientClamshellNotification(); // Re-evaluate the lid state - if( clamshellClosed ) - { - eval_clamshell = true; - } + eval_clamshell = true; } /* @@ -5976,10 +6312,7 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) sendClientClamshellNotification(); // Re-evaluate the lid state - if( clamshellClosed ) - { - eval_clamshell = true; - } + eval_clamshell = true; // Lack of AC may have latched a display wrangler tickle. // This mirrors the hardware's USB wake event latch, where a latched @@ -5990,6 +6323,10 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) // AC presence will reset the standy timer delay adjustment. _standbyTimerResetSeconds = 0; #endif + if (!userIsActive) { + // Reset userActivityTime when power supply is changed(rdr 13789330) + clock_get_uptime(&userActivityTime); + } } /* @@ -6002,13 +6339,12 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) // Re-evaluate the lid state // System should sleep on external display disappearance // in lid closed operation. - if( clamshellClosed && (true == clamshellDisabled) ) + if (true == clamshellDisabled) { eval_clamshell = true; } clamshellDisabled = false; - sendClientClamshellNotification(); } @@ -6020,22 +6356,18 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) if (msg & kIOPMDisableClamshell) { clamshellDisabled = true; - sendClientClamshellNotification(); } /* * Evaluate clamshell and SLEEP if appropiate */ - if ( eval_clamshell && shouldSleepOnClamshellClosed() ) - { - - - privateSleepSystem (kIOPMSleepReasonClamshell); - } - else if ( eval_clamshell ) + if (eval_clamshell && clamshellClosed) { - evaluatePolicy( kStimulusDarkWakeEvaluate ); + if (shouldSleepOnClamshellClosed()) + privateSleepSystem (kIOPMSleepReasonClamshell); + else + evaluatePolicy( kStimulusDarkWakeEvaluate ); } /* @@ -6073,6 +6405,7 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) int sleepDelayChanged : 1; int evaluateDarkWake : 1; int adjustPowerState : 1; + int userBecameInactive : 1; } bit; uint32_t u32; } flags; @@ -6087,17 +6420,45 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) case kStimulusDisplayWranglerSleep: if (!wranglerAsleep) { + // first transition to wrangler sleep or lower wranglerAsleep = true; - clock_get_uptime(&wranglerSleepTime); flags.bit.displaySleep = true; } break; case kStimulusDisplayWranglerWake: + displayIdleForDemandSleep = false; wranglerAsleep = false; + break; + + case kStimulusUserIsActive: + if (!userIsActive) + { + userIsActive = true; + userWasActive = true; + + // Stay awake after dropping demand for display power on + if (kFullWakeReasonDisplayOn == fullWakeReason) + fullWakeReason = fFullWakeReasonDisplayOnAndLocalUser; + + setProperty(gIOPMUserIsActiveKey, kOSBooleanTrue); + messageClients(kIOPMMessageUserIsActiveChanged); + } flags.bit.idleSleepDisabled = true; break; + case kStimulusUserIsInactive: + if (userIsActive) + { + userIsActive = false; + clock_get_uptime(&userBecameInactiveTime); + flags.bit.userBecameInactive = true; + + setProperty(gIOPMUserIsActiveKey, kOSBooleanFalse); + messageClients(kIOPMMessageUserIsActiveChanged); + } + break; + case kStimulusAggressivenessChanged: { unsigned long minutesToIdleSleep = 0; @@ -6123,7 +6484,7 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) if ( minutesToIdleSleep > minutesToDisplayDim ) minutesDelta = minutesToIdleSleep - minutesToDisplayDim; - else if( minutesToIdleSleep <= minutesToDisplayDim ) + else if ( minutesToIdleSleep == minutesToDisplayDim ) minutesDelta = 1; if ((sleepSlider == 0) && (minutesToIdleSleep != 0)) @@ -6132,7 +6493,8 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) if ((sleepSlider != 0) && (minutesToIdleSleep == 0)) flags.bit.idleSleepDisabled = true; - if ((minutesDelta != extraSleepDelay) && + if (((minutesDelta != extraSleepDelay) || + (userActivityTime != userActivityTime_prev)) && !flags.bit.idleSleepEnabled && !flags.bit.idleSleepDisabled) flags.bit.sleepDelayChanged = true; @@ -6145,10 +6507,23 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) sleepSlider = minutesToIdleSleep; extraSleepDelay = minutesDelta; + userActivityTime_prev = userActivityTime; } break; case kStimulusDemandSystemSleep: - changePowerStateWithOverrideTo( SLEEP_STATE ); + displayIdleForDemandSleep = true; + if(wrangler && wranglerIdleSettings) + { + // Request wrangler idle only when demand sleep is triggered + // from full wake. + if(CAP_CURRENT(kIOPMSystemCapabilityGraphics)) + { + wrangler->setProperties(wranglerIdleSettings); + DLOG("Requested wrangler idle\n"); + } + } + // arg = sleepReason + changePowerStateWithOverrideTo( SLEEP_STATE, arg ); break; case kStimulusAllowSystemSleepChanged: @@ -6158,85 +6533,15 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) case kStimulusDarkWakeActivityTickle: if (false == wranglerTickled) { - uint32_t options = 0; - IOService * pciRoot = 0; - - if (rejectWranglerTickle) - { - DLOG("rejected tickle, type %u capability %x:%x\n", - _systemTransitionType, - _currentCapability, _pendingCapability); - break; - } - if (latchDisplayWranglerTickle(true)) { DLOG("latched tickle\n"); break; } - _desiredCapability |= - (kIOPMSystemCapabilityGraphics | - kIOPMSystemCapabilityAudio); - - if ((kSystemTransitionWake == _systemTransitionType) && - !(_pendingCapability & kIOPMSystemCapabilityGraphics) && - !graphicsSuppressed) - { - DLOG("Promoting to full wake\n"); - - // Elevate to full wake while waking up to dark wake. - // PM will hold off notifying the graphics subsystem about - // system wake as late as possible, so if a HID event does - // arrive, we can turn on graphics on this wake cycle, and - // not have to wait till the following cycle. That latency - // can be huge on some systems. However, once any graphics - // suppression has taken effect, it is too late. All other - // graphics devices must be similarly suppressed. But the - // delay till the following cycle should be very short. - - _pendingCapability |= - (kIOPMSystemCapabilityGraphics | - kIOPMSystemCapabilityAudio); - - // Immediately bring up audio and graphics. - pciRoot = pciHostBridgeDriver; - - // Notify clients about full wake. - _systemMessageClientMask = kSystemMessageClientAll; - IOService::setAdvisoryTickleEnable( true ); - tellClients(kIOMessageSystemWillPowerOn); - } - - // Unsafe to cancel once graphics was powered. - // If system woke from dark wake, the return to sleep can - // be cancelled. But "awake -> dark -> sleep" transition - // cannot be cancelled. - - if (!CAP_HIGHEST(kIOPMSystemCapabilityGraphics)) { - options |= kIOPMSyncCancelPowerDown; - } - - synchronizePowerTree( options, pciRoot ); wranglerTickled = true; - // IOGraphics doesn't lit the display even though graphics - // is enanbled in kIOMessageSystemCapabilityChange message(radar 9502104) - // So, do an explicit activity tickle - if(wrangler) - wrangler->activityTickle(0,0); - - if (logWranglerTickle) - { - AbsoluteTime now; - uint64_t nsec; - - clock_get_uptime(&now); - SUB_ABSOLUTETIME(&now, &systemWakeTime); - absolutetime_to_nanoseconds(now, &nsec); - MSG("HID tickle %u ms\n", - ((int)((nsec) / 1000000ULL))); - logWranglerTickle = false; - } + DLOG("Requesting full wake after dark wake activity tickle\n"); + requestFullWake( kFullWakeReasonLocalUser ); } break; @@ -6249,15 +6554,22 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) { DLOG("dark wake entry\n"); systemDarkWake = true; - wranglerAsleep = true; - clock_get_uptime(&wranglerSleepTime); + + // Keep wranglerAsleep an invariant when wrangler is absent + if (wrangler) + wranglerAsleep = true; + + if (kStimulusDarkWakeEntry == stimulus) + { + clock_get_uptime(&userBecameInactiveTime); + flags.bit.evaluateDarkWake = true; + } // Always accelerate disk spindown while in dark wake, // even if system does not support/allow sleep. cancelIdleSleepTimer(); setQuickSpinDownTimeout(); - flags.bit.evaluateDarkWake = true; } break; @@ -6266,17 +6578,6 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) { flags.bit.evaluateDarkWake = true; } -#if !DARK_TO_FULL_EVALUATE_CLAMSHELL - else - { - // Not through kLocalEvalClamshellCommand to avoid loop. - if (clamshellClosed && shouldSleepOnClamshellClosed() && - checkSystemCanSleep(true)) - { - privateSleepSystem( kIOPMSleepReasonClamshell ); - } - } -#endif break; case kStimulusNoIdleSleepPreventers: @@ -6285,42 +6586,41 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) } /* switch(stimulus) */ - if (flags.bit.evaluateDarkWake && !wranglerTickled) + if (flags.bit.evaluateDarkWake && (kFullWakeReasonNone == fullWakeReason)) { if (darkWakeToSleepASAP || (clamshellClosed && !(desktopMode && acAdaptorConnected))) { - // System currently in dark wake, and no children and - // assertion prevent system sleep. + uint32_t newSleepReason; - if (checkSystemCanSleep(true)) + if (CAP_HIGHEST(kIOPMSystemCapabilityGraphics)) { + // System was previously in full wake. Sleep reason from + // full to dark already recorded in fullToDarkReason. + if (lowBatteryCondition) - { - lastSleepReason = kIOPMSleepReasonLowPower; - setProperty(kRootDomainSleepReasonKey, kIOPMLowPowerSleepKey); - } - else if (darkWakeMaintenance) - { - lastSleepReason = kIOPMSleepReasonMaintenance; - setProperty(kRootDomainSleepReasonKey, kIOPMMaintenanceSleepKey); - } - else if (darkWakeSleepService) - { - lastSleepReason = kIOPMSleepReasonSleepServiceExit; - setProperty(kRootDomainSleepReasonKey, kIOPMSleepServiceExitKey); - } - changePowerStateWithOverrideTo( SLEEP_STATE ); + newSleepReason = kIOPMSleepReasonLowPower; + else + newSleepReason = fullToDarkReason; } else { - // Parked in dark wake, a tickle will return to full wake - rejectWranglerTickle = false; + // In dark wake from system sleep. + + if (darkWakeSleepService) + newSleepReason = kIOPMSleepReasonSleepServiceExit; + else + newSleepReason = kIOPMSleepReasonMaintenance; + } + + if (checkSystemCanSleep(newSleepReason)) + { + privateSleepSystem(newSleepReason); } } else // non-maintenance (network) dark wake { - if (checkSystemCanSleep(true)) + if (checkSystemCanSleep(kIOPMSleepReasonIdle)) { // Release power clamp, and wait for children idle. adjustPowerState(true); @@ -6329,7 +6629,6 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) { changePowerStateToPriv(ON_STATE); } - rejectWranglerTickle = false; } } @@ -6339,40 +6638,35 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) flags.u32 = 0; } - if (flags.bit.displaySleep || flags.bit.sleepDelayChanged) + if ((flags.bit.displaySleep) && + (kFullWakeReasonDisplayOn == fullWakeReason)) + { + // kIOPMSleepReasonMaintenance? + changePowerStateWithOverrideTo( SLEEP_STATE, kIOPMSleepReasonMaintenance ); + } + + if (flags.bit.userBecameInactive || flags.bit.sleepDelayChanged) { bool cancelQuickSpindown = false; if (flags.bit.sleepDelayChanged) { + // Cancel existing idle sleep timer and quick disk spindown. + // New settings will be applied by the idleSleepEnabled flag + // handler below if idle sleep is enabled. + DLOG("extra sleep timer changed\n"); cancelIdleSleepTimer(); cancelQuickSpindown = true; } else { - DLOG("display sleep\n"); + DLOG("user inactive\n"); } - if (wranglerAsleep && !wranglerSleepIgnored) + if (!userIsActive && !ignoreIdleSleepTimer && sleepSlider) { - if ( extraSleepDelay ) - { - // Start a timer here if the System Sleep timer is greater - // than the Display Sleep timer. - - startIdleSleepTimer(gRootDomain->extraSleepDelay * 60); - } - else if ( sleepSlider ) - { - // Accelerate disk spindown if system sleep and display sleep - // sliders are set to the same value (e.g. both set to 5 min), - // and display is about to go dark. Check the system sleep is - // not set to never sleep. Disk sleep setting is ignored. - - setQuickSpinDownTimeout(); - cancelQuickSpindown = false; - } + startIdleSleepTimer(getTimeToIdleSleep()); } if (cancelQuickSpindown) @@ -6392,33 +6686,13 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) } else { - // Start idle sleep timer if wrangler went to sleep - // while system sleep was disabled. Disk spindown is + // Start idle timer if prefs now allow system sleep + // and user is already inactive. Disk spindown is // accelerated upon timer expiration. - if (wranglerAsleep) + if (!userIsActive) { - AbsoluteTime now; - uint64_t nanos; - uint32_t minutesSinceDisplaySleep = 0; - uint32_t sleepDelay = 0; - - clock_get_uptime(&now); - if (CMP_ABSOLUTETIME(&now, &wranglerSleepTime) > 0) - { - SUB_ABSOLUTETIME(&now, &wranglerSleepTime); - absolutetime_to_nanoseconds(now, &nanos); - minutesSinceDisplaySleep = nanos / (60000000000ULL); - } - - if (extraSleepDelay > minutesSinceDisplaySleep) - { - sleepDelay = extraSleepDelay - minutesSinceDisplaySleep; - } - - startIdleSleepTimer(sleepDelay * 60); - DLOG("display slept %u min, set idle timer to %u min\n", - minutesSinceDisplaySleep, sleepDelay); + startIdleSleepTimer(getTimeToIdleSleep()); } } } @@ -6451,16 +6725,146 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) sleepASAP = true; } } - if(sleepASAP) - { - lastSleepReason = kIOPMSleepReasonIdle; - setProperty(kRootDomainSleepReasonKey, kIOPMIdleSleepKey); - } adjustPowerState(sleepASAP); } } +//****************************************************************************** +// requestFullWake +// +// Request transition from dark wake to full wake +//****************************************************************************** + +void IOPMrootDomain::requestFullWake( FullWakeReason reason ) +{ + uint32_t options = 0; + IOService * pciRoot = 0; + + // System must be in dark wake and a valid reason for entering full wake + if ((kFullWakeReasonNone == reason) || + (kFullWakeReasonNone != fullWakeReason) || + (CAP_CURRENT(kIOPMSystemCapabilityGraphics))) + { + return; + } + + // Will clear reason upon exit from full wake + fullWakeReason = reason; + + _desiredCapability |= (kIOPMSystemCapabilityGraphics | + kIOPMSystemCapabilityAudio); + + if ((kSystemTransitionWake == _systemTransitionType) && + !(_pendingCapability & kIOPMSystemCapabilityGraphics) && + !graphicsSuppressed) + { + DLOG("promote to full wake\n"); + + // Promote to full wake while waking up to dark wake due to tickle. + // PM will hold off notifying the graphics subsystem about system wake + // as late as possible, so if a HID tickle does arrive, graphics can + // power up on this same wake cycle. The latency to power up graphics + // on the next cycle can be huge on some systems. However, once any + // graphics suppression has taken effect, it is too late. All other + // graphics devices must be similarly suppressed. But the delay till + // the following cycle should be short. + + _pendingCapability |= (kIOPMSystemCapabilityGraphics | + kIOPMSystemCapabilityAudio); + + // Immediately bring up audio and graphics + pciRoot = pciHostBridgeDriver; + willEnterFullWake(); + } + + // Unsafe to cancel once graphics was powered. + // If system woke from dark wake, the return to sleep can + // be cancelled. "awake -> dark -> sleep" transition + // can be canceled also, during the "dark --> sleep" phase + // *prior* to driver power down. + if (!CAP_HIGHEST(kIOPMSystemCapabilityGraphics) || + _pendingCapability == 0) { + options |= kIOPMSyncCancelPowerDown; + } + + synchronizePowerTree(options, pciRoot); + if (kFullWakeReasonLocalUser == fullWakeReason) + { + // IOGraphics doesn't light the display even though graphics is + // enabled in kIOMessageSystemCapabilityChange message(radar 9502104) + // So, do an explicit activity tickle + if (wrangler) + wrangler->activityTickle(0,0); + } + + if (options & kIOPMSyncCancelPowerDown) + { + AbsoluteTime now; + uint64_t nsec; + + // Log a timestamp for the initial full wake + clock_get_uptime(&now); + SUB_ABSOLUTETIME(&now, &systemWakeTime); + absolutetime_to_nanoseconds(now, &nsec); + MSG("full wake (reason %u) %u ms\n", + fullWakeReason, ((int)((nsec) / 1000000ULL))); + } +} + +//****************************************************************************** +// willEnterFullWake +// +// System will enter full wake from sleep, from dark wake, or from dark +// wake promotion. This function aggregate things that are in common to +// all three full wake transitions. +// +// Assumptions: fullWakeReason was updated +//****************************************************************************** + +void IOPMrootDomain::willEnterFullWake( void ) +{ + hibernateRetry = false; + ignoreIdleSleepTimer = false; + sleepTimerMaintenance = false; + sleepToStandby = false; + + _systemMessageClientMask = kSystemMessageClientPowerd | + kSystemMessageClientLegacyApp; + + if ((_highestCapability & kIOPMSystemCapabilityGraphics) == 0) + { + // Initial graphics full power + _systemMessageClientMask |= kSystemMessageClientKernel; + + // Set kIOPMUserTriggeredFullWakeKey before full wake for IOGraphics + setProperty(gIOPMUserTriggeredFullWakeKey, + (kFullWakeReasonLocalUser == fullWakeReason) ? + kOSBooleanTrue : kOSBooleanFalse); + } + + IOService::setAdvisoryTickleEnable( true ); + tellClients(kIOMessageSystemWillPowerOn); +} + +//****************************************************************************** +// fullWakeDelayedWork +// +// System has already entered full wake. Invoked by a delayed thread call. +//****************************************************************************** + +void IOPMrootDomain::fullWakeDelayedWork( void ) +{ +#if DARK_TO_FULL_EVALUATE_CLAMSHELL + // Not gated, don't modify state + if ((kSystemTransitionNone == _systemTransitionType) && + CAP_CURRENT(kIOPMSystemCapabilityGraphics)) + { + receivePowerNotification( kLocalEvalClamshellCommand ); + } +#endif +} + //****************************************************************************** // evaluateAssertions // @@ -6474,8 +6878,8 @@ void IOPMrootDomain::evaluateAssertions(IOPMDriverAssertionType newAssertions, I if (changedBits & kIOPMDriverAssertionPreventDisplaySleepBit) { if (wrangler) { - bool value = (newAssertions & kIOPMDriverAssertionPreventDisplaySleepBit) ? true : false; + DLOG("wrangler->setIgnoreIdleTimer\(%d)\n", value); wrangler->setIgnoreIdleTimer( value ); } @@ -6494,7 +6898,6 @@ void IOPMrootDomain::evaluateAssertions(IOPMDriverAssertionType newAssertions, I DLOG("Driver assertion ReservedBit7 dropped\n"); updatePreventIdleSleepList(this, false); } - } } @@ -6564,12 +6967,19 @@ void IOPMrootDomain::pmStatsRecordApplicationResponse( { OSDictionary *responseDescription = NULL; OSNumber *delayNum = NULL; + OSNumber *powerCaps = NULL; OSNumber *pidNum = NULL; OSNumber *msgNum = NULL; const OSSymbol *appname; const OSSymbol *entryName; OSObject *entryType; int i; +#if defined(__i386__) || defined(__x86_64__) + swd_hdr *hdr = NULL; + OSString *UUIDstring = NULL; + uint32_t spindumpSize = 0; + const OSSymbol *namesym = NULL; +#endif if (!pmStatsAppResponses || pmStatsAppResponses->getCount() > 50) return; @@ -6579,7 +6989,9 @@ void IOPMrootDomain::pmStatsRecordApplicationResponse( { entryType = responseDescription->getObject(_statsResponseTypeKey); entryName = (OSSymbol *) responseDescription->getObject(_statsNameKey); - if (entryName && (entryType == response) && entryName->isEqualTo(name)) + powerCaps = (OSNumber *) responseDescription->getObject(_statsPowerCapsKey); + if (entryName && (entryType == response) && entryName->isEqualTo(name) && + (powerCaps->unsigned32BitValue() == _pendingCapability)) { OSNumber * entryValue; entryValue = (OSNumber *) responseDescription->getObject(_statsTimeMSKey); @@ -6627,12 +7039,80 @@ void IOPMrootDomain::pmStatsRecordApplicationResponse( delayNum->release(); } + powerCaps = OSNumber::withNumber(_pendingCapability, 32); + if (powerCaps) { + responseDescription->setObject(_statsPowerCapsKey, powerCaps); + powerCaps->release(); + } + + if (pmStatsAppResponses) { pmStatsAppResponses->setObject(responseDescription); } responseDescription->release(); } + +#if defined(__i386__) || defined(__x86_64__) + if ((gIOKitDebug & kIOAppRespStacksOn) == 0) + goto done; + + if (!name || name[0] == '\0' || + !response->isEqualTo(gIOPMStatsApplicationResponseTimedOut)) + goto done; + + namesym = OSSymbol::withCString(name); + + // Skip stackshots of previous offenders + if (noAckApps->containsObject(namesym)) + goto done; + + if (noAckApps->getCount() == noAckApps->getCapacity()) { + // Remove oldest entry from over-flowing list + noAckApps->removeObject(noAckApps->getFirstObject()); + } + noAckApps->setLastObject(namesym); + + if (spindumpDesc != NULL) { + /* Add name of this new process in the header */ + hdr = (swd_hdr *)spindumpDesc->getBytesNoCopy(); + if (!hdr) goto done; + + snprintf(hdr->PMStatusCode, sizeof(hdr->PMStatusCode), "%s,%s",hdr->PMStatusCode, name); + goto done; + } + + spindumpSize = 256*1024; + spindumpDesc = IOBufferMemoryDescriptor::inTaskWithOptions( + kernel_task, kIODirectionIn | kIOMemoryMapperNone, spindumpSize); + + if (!spindumpDesc) + goto done; + + hdr = (swd_hdr *)spindumpDesc->getBytesNoCopy(); + memset(hdr, 0, sizeof(swd_hdr)); + if ((UUIDstring = OSDynamicCast(OSString, + getProperty(kIOPMSleepWakeUUIDKey))) != NULL ) { + snprintf(hdr->UUID, sizeof(hdr->UUID), "UUID: %s\n", UUIDstring->getCStringNoCopy()); + } + snprintf(hdr->cps, sizeof(hdr->cps), "caps: %d\n", _pendingCapability); + snprintf(hdr->PMStatusCode, sizeof(hdr->PMStatusCode), "Process: %s", name); + snprintf(hdr->reason, sizeof(hdr->reason), "\nStackshot reason: App Response Timeout\n"); + + hdr->spindump_offset = sizeof(swd_hdr); + + stack_snapshot_from_kernel(-1, (char*)hdr+hdr->spindump_offset, + spindumpSize - hdr->spindump_offset, + STACKSHOT_SAVE_LOADINFO | STACKSHOT_SAVE_KEXT_LOADINFO, + &hdr->spindump_size); + if (hdr->spindump_size == 0) { + spindumpDesc->release(); + spindumpDesc = NULL; + } +done: + if (namesym) namesym->release(); +#endif + return; } @@ -6700,10 +7180,6 @@ void IOPMrootDomain::tracePoint( uint8_t point ) PMDebug(kPMLogSleepWakeTracePoint, point, 0); pmTracer->tracePoint(point); - -#if HIBERNATION - if (kIOPMTracePointSleepPowerPlaneDrivers == point) IOHibernateIOKitSleep(); -#endif } void IOPMrootDomain::tracePoint( uint8_t point, uint8_t data ) @@ -6720,6 +7196,68 @@ void IOPMrootDomain::traceDetail( uint32_t detail ) pmTracer->traceDetail( detail ); } + +IOReturn IOPMrootDomain::configureReport(IOReportChannelList *channelList, + IOReportConfigureAction action, + void *result, + void *destination) +{ + unsigned cnt; + if (action != kIOReportGetDimensions) goto exit; + + for (cnt = 0; cnt < channelList->nchannels; cnt++) { + if ( (channelList->channels[cnt].channel_id == kSleepCntChID) || + (channelList->channels[cnt].channel_id == kDarkWkCntChID) || + (channelList->channels[cnt].channel_id == kUserWkCntChID) ) { + SIMPLEREPORT_UPDATERES(kIOReportGetDimensions, result); + } + } + +exit: + return super::configureReport(channelList, action, result, destination); +} + + +IOReturn IOPMrootDomain::updateReport(IOReportChannelList *channelList, + IOReportUpdateAction action, + void *result, + void *destination) +{ + uint32_t size2cpy; + void *data2cpy; + uint8_t buf[SIMPLEREPORT_BUFSIZE]; + IOBufferMemoryDescriptor *dest = OSDynamicCast(IOBufferMemoryDescriptor, (OSObject *)destination); + unsigned cnt; + uint64_t ch_id; + + if (action != kIOReportCopyChannelData) goto exit; + + for (cnt = 0; cnt < channelList->nchannels; cnt++) { + ch_id = channelList->channels[cnt].channel_id ; + + if ((ch_id == kSleepCntChID) || + (ch_id == kDarkWkCntChID) || (ch_id == kUserWkCntChID)) { + SIMPLEREPORT_INIT(buf, sizeof(buf), getRegistryEntryID(), ch_id, kIOReportCategoryPower); + } + else continue; + + if (ch_id == kSleepCntChID) + SIMPLEREPORT_SETVALUE(buf, sleepCnt); + else if (ch_id == kDarkWkCntChID) + SIMPLEREPORT_SETVALUE(buf, darkWakeCnt); + else if (ch_id == kUserWkCntChID) + SIMPLEREPORT_SETVALUE(buf, displayWakeCnt); + + SIMPLEREPORT_UPDATEPREP(buf, data2cpy, size2cpy); + SIMPLEREPORT_UPDATERES(kIOReportCopyChannelData, result); + dest->appendBytes(data2cpy, size2cpy); + } + +exit: + return super::updateReport(channelList, action, result, destination); +} + + //****************************************************************************** // PMTraceWorker Class // @@ -6742,7 +7280,7 @@ PMTraceWorker *PMTraceWorker::tracer(IOPMrootDomain *owner) return NULL; } - DLOG("PMTraceWorker %p\n", me); + DLOG("PMTraceWorker %p\n", OBFUSCATE(me)); // Note that we cannot instantiate the PCI device -> bit mappings here, since // the IODeviceTree has not yet been created by IOPlatformExpert. We create @@ -6899,10 +7437,18 @@ void PMTraceWorker::tracePCIPowerChange( service->getName(), bitNum, bitMask, traceData32); } + DLOG("trace point 0x%02x detail 0x%08x\n", tracePhase, traceData32); RTC_TRACE(); } } +uint64_t PMTraceWorker::getPMStatusCode( ) +{ + return (((uint64_t)traceData32 << 32) | (tracePhase << 24) | + (loginWindowPhase << 16) | (traceData8 << 8)); + +} + // MARK: - // MARK: PMHaltWorker @@ -6933,7 +7479,7 @@ PMHaltWorker * PMHaltWorker::worker( void ) if (!me->lock) break; - DLOG("PMHaltWorker %p\n", me); + DLOG("PMHaltWorker %p\n", OBFUSCATE(me)); me->retain(); // thread holds extra retain if (KERN_SUCCESS != kernel_thread_start(&PMHaltWorker::main, (void *) me, &thread)) { @@ -6951,7 +7497,7 @@ PMHaltWorker * PMHaltWorker::worker( void ) void PMHaltWorker::free( void ) { - DLOG("PMHaltWorker free %p\n", this); + DLOG("PMHaltWorker free %p\n", OBFUSCATE(this)); if (lock) { IOLockFree(lock); @@ -6996,7 +7542,7 @@ void PMHaltWorker::main( void * arg, wait_result_t waitResult ) } // No more work to do, terminate thread - DLOG("All done for worker: %p (visits = %u)\n", me, me->visits); + DLOG("All done for worker: %p (visits = %u)\n", OBFUSCATE(me), me->visits); thread_wakeup( &gPMHaltDepth ); me->release(); } @@ -7061,7 +7607,7 @@ void PMHaltWorker::work( PMHaltWorker * me ) LOG("%s driver %s (%p) took %u ms\n", (gPMHaltEvent == kIOMessageSystemWillPowerOff) ? "PowerOff" : "Restart", - service->getName(), service, + service->getName(), OBFUSCATE(service), (uint32_t) deltaTime ); } @@ -7293,7 +7839,7 @@ notifySystemShutdown( IOService * root, unsigned long event ) if (numWorkers > kPMHaltMaxWorkers) numWorkers = kPMHaltMaxWorkers; - DLOG("PM nodes = %u, maxDepth = %u, workers = %u\n", + DLOG("PM nodes %u, maxDepth %u, workers %u\n", totalNodes, gPMHaltArray->getCount(), numWorkers); for (unsigned int i = 0; i < numWorkers; i++) @@ -7338,6 +7884,9 @@ done: return; } +// MARK: - +// MARK: Sleep/Wake Logging + //********************************************************************************* // Sleep/Wake logging // @@ -7356,47 +7905,59 @@ IOReturn IOPMrootDomain::recordPMEvent(PMEventDetails *details) { if (timeline && details) { - IOReturn rc; - - // Record a detailed driver power change event, or... - if(details->eventClassifier == kIOPMEventClassDriverEvent) { - rc = timeline->recordDetailedPowerEvent( details ); - } - - // Record a system power management event - else if(details->eventClassifier == kIOPMEventClassSystemEvent) { - rc = timeline->recordSystemPowerEvent( details ); - } - else { - return kIOReturnBadArgument; - } + IOReturn rc; + + // Record a detailed driver power change event, or... + if(details->eventClassifier == kIOPMEventClassDriverEvent) { + rc = timeline->recordDetailedPowerEvent( details ); + } + + // Record a system power management event + else if(details->eventClassifier == kIOPMEventClassSystemEvent) { + rc = timeline->recordSystemPowerEvent( details ); + } + else { + return kIOReturnBadArgument; + } - // If we get to record this message, then we've reached the - // end of another successful Sleep --> Wake cycle - // At this point, we pat ourselves in the back and allow - // our Sleep --> Wake UUID to be published - if(details->eventType == kIOPMEventTypeWakeDone) { - timeline->setSleepCycleInProgressFlag(false); - } + // If we get to record this message, then we've reached the + // end of another successful Sleep --> Wake cycle + // At this point, we pat ourselves in the back and allow + // our Sleep --> Wake UUID to be published + if(details->eventType == kIOPMEventTypeWakeDone) { + timeline->setSleepCycleInProgressFlag(false); + } /* - // Check if its time to clear the timeline buffer - if(getProperty(kIOPMSleepWakeUUIDKey) - && timeline->isSleepCycleInProgress() == false - && timeline->getNumEventsLoggedThisPeriod() > 500) { + // Check if its time to clear the timeline buffer + if(getProperty(kIOPMSleepWakeUUIDKey) + && timeline->isSleepCycleInProgress() == false + && timeline->getNumEventsLoggedThisPeriod() > 500) { - // Clear the old UUID + // Clear the old UUID if(pmPowerStateQueue) { pmPowerStateQueue->submitPowerEvent(kPowerEventPublishSleepWakeUUID, (void *)false ); } - } */ - return rc; + return rc; } else return kIOReturnNotReady; } +void IOPMrootDomain::recordPMEvent( uint32_t type, + const char *uuid, + uint32_t reason, + uint32_t result ) +{ + PMEventDetails *details = PMEventDetails::eventDetails(type, uuid, reason, result); + if (details) + { + recordPMEvent(details); + details->release(); + } +} + IOReturn IOPMrootDomain::recordAndReleasePMEvent(PMEventDetails *details) { IOReturn ret = kIOReturnBadArgument; @@ -7410,6 +7971,9 @@ IOReturn IOPMrootDomain::recordAndReleasePMEvent(PMEventDetails *details) return ret; } +// MARK: - +// MARK: Kernel Assertion + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ IOPMDriverAssertionID IOPMrootDomain::createPMAssertion( @@ -7423,7 +7987,7 @@ IOPMDriverAssertionID IOPMrootDomain::createPMAssertion( if (!pmAssertions) return 0; - + ret = pmAssertions->createAssertion(whichAssertionBits, assertionLevel, ownerService, ownerDescription, &newAssertion); if (kIOReturnSuccess == ret) @@ -7481,6 +8045,55 @@ bool IOPMrootDomain::serializeProperties( OSSerialize * s ) const return( IOService::serializeProperties(s) ); } +OSObject * IOPMrootDomain::copyProperty( const char * aKey) const +{ + OSObject *obj = NULL; + obj = IOService::copyProperty(aKey); + + if (obj) return obj; + + if (!strncmp(aKey, kIOPMSleepWakeWdogRebootKey, + sizeof(kIOPMSleepWakeWdogRebootKey))) { + if (swd_flags & SWD_BOOT_BY_WDOG) + return OSBoolean::withBoolean(true); + else + return OSBoolean::withBoolean(false); + + } + + if (!strncmp(aKey, kIOPMSleepWakeWdogLogsValidKey, + sizeof(kIOPMSleepWakeWdogLogsValidKey))) { + if (swd_flags & SWD_VALID_LOGS) + return OSBoolean::withBoolean(true); + else + return OSBoolean::withBoolean(false); + + } + + /* + * XXX: We should get rid of "DesktopMode" property when 'kAppleClamshellCausesSleepKey' + * is set properly in darwake from sleep. For that, kIOPMEnableClamshell msg has to be + * issued by DisplayWrangler on darkwake. + */ + if (!strcmp(aKey, "DesktopMode")) { + if (desktopMode) + return OSBoolean::withBoolean(true); + else + return OSBoolean::withBoolean(false); + } + if (!strcmp(aKey, "DisplayIdleForDemandSleep")) { + if (displayIdleForDemandSleep) { + return OSBoolean::withBoolean(true); + } + else { + return OSBoolean::withBoolean(false); + } + } + return NULL; + + +} + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ // MARK: - @@ -8141,8 +8754,9 @@ IOReturn PMAssertionsTracker::createAssertion( track.id = OSIncrementAtomic64((SInt64*) &issuingUniqueID); track.level = level; track.assertionBits = which; - track.ownerString = whoItIs ? OSSymbol::withCString(whoItIs) : 0; + track.ownerString = whoItIs ? OSSymbol::withCString(whoItIs):0; track.ownerService = serviceID; + track.registryEntryID = serviceID ? serviceID->getRegistryEntryID():0; track.modifiedTime = 0; pmEventTimeStamp(&track.createdTime); @@ -8233,7 +8847,7 @@ IOReturn PMAssertionsTracker::setAssertionLevel( { if (owner && owner->pmPowerStateQueue) { owner->pmPowerStateQueue->submitPowerEvent(kPowerEventAssertionSetLevel, - (void *)_level, _id); + (void *)(uintptr_t)_level, _id); } return kIOReturnSuccess; @@ -8318,9 +8932,9 @@ OSArray *PMAssertionsTracker::copyAssertionsArray(void) details->setObject(kIOPMDriverAssertionModifiedTimeKey, _n); _n->release(); } - _n = OSNumber::withNumber((uintptr_t)_a->ownerService, 64); + _n = OSNumber::withNumber((uintptr_t)_a->registryEntryID, 64); if (_n) { - details->setObject(kIOPMDriverAssertionOwnerServiceKey, _n); + details->setObject(kIOPMDriverAssertionRegistryEntryIDKey, _n); _n->release(); } _n = OSNumber::withNumber(_a->level, 64); @@ -8450,3 +9064,496 @@ OSObject * IORootParent::copyProperty( const char * aKey) const return (IOService::copyProperty(aKey)); } + +#if defined(__i386__) || defined(__x86_64__) +void IOPMrootDomain::sleepWakeDebugLog(const char *fmt,...) +{ + char str[100]; + va_list ap; + int retry = 0; + char *ptr; + swd_hdr *hdr; + uint32_t len = 0; + uint32_t ts; + uint32_t curPos = 0, newPos = 0; + bool reset = false; + + if ( !(kIOPersistentLog & gIOKitDebug) || (swd_buffer == NULL)) + return; + + hdr = (swd_hdr *)swd_buffer; + if (hdr->dlog_size == 0) { + if ((hdr->spindump_size != 0) || !OSCompareAndSwap(0, 1, &gRootDomain->swd_lock)) + return; + + hdr->dlog_buf_offset = hdr->dlog_cur_pos = sizeof(swd_hdr); + hdr->dlog_size = SWD_DLOG_SIZE; + hdr->spindump_offset = sizeof(swd_hdr) + hdr->dlog_size; + memset(((char*)hdr)+hdr->dlog_buf_offset, 0, hdr->dlog_size); + gRootDomain->swd_lock = 0; + } + ts = mach_absolute_time() & 0xffffffff; + va_start(ap, fmt); + len = vsnprintf(str, sizeof(str), fmt, ap)+1; + va_end(ap); + if (len > sizeof(str)) len = sizeof(str); + len += 10; // 8 bytes for time stamp + + do { + curPos = hdr->dlog_cur_pos; + newPos = curPos+len; + if (newPos >= (hdr->dlog_buf_offset+hdr->dlog_size)) { + newPos = hdr->dlog_buf_offset+len; + reset = true; + } + else + reset = false; + if (retry++ == 3) return; // Don't try too hard + } while (!OSCompareAndSwap(curPos, newPos, &hdr->dlog_cur_pos)); + + if (reset) curPos = hdr->dlog_buf_offset; + ptr = (char*)hdr+curPos; + snprintf(ptr, len, "%08x: %s", ts, str); + +} + +void IOPMrootDomain::sleepWakeDebugTrig(bool wdogTrigger) +{ + swd_hdr * hdr = NULL; + addr64_t data[3]; + uint32_t wdog_panic = 0; + + char * dstAddr; + uint32_t bytesRemaining; + unsigned int len; + OSString * UUIDstring = NULL; + uint64_t code; + IOMemoryMap * logBufMap = NULL; + + if ( kIOSleepWakeWdogOff & gIOKitDebug ) + return; + + if (wdogTrigger) { + if (PE_parse_boot_argn("swd_panic", &wdog_panic, sizeof(wdog_panic)) && + (wdog_panic == 1)) { + // If boot-arg is set to panic on sleep/wake hang, call panic + panic("Sleep/Wake hang detected\n"); + return; + } + else if (swd_flags & SWD_BOOT_BY_WDOG) { + // If current boot is due to this watch dog trigger restart in previous boot, + // then don't trigger again until at least 1 successful sleep & wake. + if (!(sleepCnt && displayWakeCnt)) { + IOLog("Shutting down due to repeated Sleep/Wake failures\n"); + PEHaltRestart(kPERestartCPU); + return; + } + } + + } + + if (swd_buffer == NULL) { + sleepWakeDebugMemAlloc(); + if (swd_buffer == NULL) return; + } + + if (!OSCompareAndSwap(0, 1, &gRootDomain->swd_lock)) + return; + + + hdr = (swd_hdr *)swd_buffer; + if ((UUIDstring = OSDynamicCast(OSString, getProperty(kIOPMSleepWakeUUIDKey))) != NULL ) { + + if (wdogTrigger || (!UUIDstring->isEqualTo(hdr->UUID))) { + const char *str = UUIDstring->getCStringNoCopy(); + snprintf(hdr->UUID, sizeof(hdr->UUID), "UUID: %s\n", str); + } + else { + DLOG("Data for current UUID already exists\n"); + goto exit; + } + } + + dstAddr = (char*)hdr + hdr->spindump_offset; + bytesRemaining = SWD_BUF_SIZE - hdr->spindump_offset; + + + DLOG("Taking snapshot. bytesRemaining: %d\n", bytesRemaining); + stack_snapshot_from_kernel(-1, dstAddr, bytesRemaining, + STACKSHOT_SAVE_LOADINFO | STACKSHOT_SAVE_KEXT_LOADINFO|STACKSHOT_SAVE_KERNEL_FRAMES_ONLY, + &hdr->spindump_size); + if (hdr->spindump_size != 0) { + DLOG("Traced %d bytes of snapshot\n", hdr->spindump_size); + dstAddr += hdr->spindump_size; + bytesRemaining -= hdr->spindump_size; + } + else { + DLOG("Failed to get spindump\n"); + hdr->spindump_size = 0; + } + + snprintf(hdr->cps, sizeof(hdr->cps), "cps: %d\n", ((IOService*)this)->getPowerState()); + code = pmTracer->getPMStatusCode(); + snprintf(hdr->PMStatusCode, sizeof(hdr->PMStatusCode), "Code: %08x %08x\n", + (uint32_t)((code >> 32) & 0xffffffff), (uint32_t)(code & 0xffffffff)); + snprintf(hdr->reason, sizeof(hdr->reason), "Stackshot reason: Watchdog\n"); + + + data[0] = sizeof(swd_hdr) + hdr->spindump_size + hdr->dlog_size; + /* Header & rootdomain log is constantly changing and is not covered by CRC */ + data[1] = crc32(0, ((char*)swd_buffer+hdr->spindump_offset), hdr->spindump_size); + data[2] = kvtophys((vm_offset_t)swd_buffer); + len = sizeof(addr64_t)*3; + DLOG("bytes: 0x%llx crc:0x%llx paddr:0x%llx\n", + data[0], data[1], data[2]); + + if (PEWriteNVRAMProperty(kIOSleepWakeDebugKey, data, len) == false) + { + DLOG("Failed to update nvram boot-args\n"); + goto exit; + } + +exit: + + gRootDomain->swd_lock = 0; + + if (wdogTrigger) { + IOLog("Restarting to collect Sleep wake debug logs\n"); + PEHaltRestart(kPERestartCPU); + } + else { + logBufMap = sleepWakeDebugRetrieve(); + if (logBufMap) { + sleepWakeDebugDump(logBufMap); + logBufMap->release(); + logBufMap = 0; + } + } +} + +void IOPMrootDomain::sleepWakeDebugMemAlloc( ) +{ + vm_size_t size = SWD_BUF_SIZE; + + swd_hdr *hdr = NULL; + + IOBufferMemoryDescriptor *memDesc = NULL; + + + if ( kIOSleepWakeWdogOff & gIOKitDebug ) + return; + + if (!OSCompareAndSwap(0, 1, &gRootDomain->swd_lock)) + return; + + // Try allocating above 4GB. If that fails, try at 2GB + memDesc = IOBufferMemoryDescriptor::inTaskWithPhysicalMask( + kernel_task, kIOMemoryPhysicallyContiguous|kIOMemoryMapperNone, + size, 0xFFFFFFFF00000000ULL); + if (!memDesc) { + memDesc = IOBufferMemoryDescriptor::inTaskWithPhysicalMask( + kernel_task, kIOMemoryPhysicallyContiguous|kIOMemoryMapperNone, + size, 0xFFFFFFFF10000000ULL); + } + + if (memDesc == NULL) + { + DLOG("Failed to allocate Memory descriptor for sleepWake debug\n"); + goto exit; + } + + + hdr = (swd_hdr *)memDesc->getBytesNoCopy(); + memset(hdr, 0, sizeof(swd_hdr)); + + hdr->version = 1; + hdr->alloc_size = size; + + if (kIOPersistentLog & gIOKitDebug) { + hdr->dlog_buf_offset = hdr->dlog_cur_pos = sizeof(swd_hdr); + hdr->dlog_size = SWD_DLOG_SIZE; + memset(((char*)hdr)+hdr->dlog_buf_offset, 0, hdr->dlog_size); + } + hdr->spindump_offset = sizeof(swd_hdr) + hdr->dlog_size; + + swd_buffer = (void *)hdr; + DLOG("SleepWake debug buffer size:0x%x\n", hdr->alloc_size); + DLOG("DLOG offset: 0x%x size:0x%x spindump offset:0x%x\n", + hdr->dlog_buf_offset, hdr->dlog_size, hdr->spindump_offset); + +exit: + gRootDomain->swd_lock = 0; +} + +void IOPMrootDomain::sleepWakeDebugEnableWdog() +{ + swd_flags |= SWD_WDOG_ENABLED; + if (!swd_buffer) + sleepWakeDebugMemAlloc(); +} + +bool IOPMrootDomain::sleepWakeDebugIsWdogEnabled() +{ + return ((swd_flags & SWD_WDOG_ENABLED) && + !systemBooting && !systemShutdown); +} + +errno_t IOPMrootDomain::sleepWakeDebugSaveFile(const char *name, char *buf, int len) +{ + struct vnode *vp = NULL; + vfs_context_t ctx = vfs_context_current(); + kauth_cred_t cred = vfs_context_ucred(ctx); + struct vnode_attr va; + errno_t error = EIO; + + if (vnode_open(name, (O_CREAT | FWRITE | O_NOFOLLOW), + S_IRUSR|S_IRGRP|S_IROTH, VNODE_LOOKUP_NOFOLLOW, &vp, ctx) != 0) + { + IOLog("Failed to open the file %s\n", name); + goto exit; + } + VATTR_INIT(&va); + VATTR_WANTED(&va, va_nlink); + /* Don't dump to non-regular files or files with links. */ + if (vp->v_type != VREG || + vnode_getattr(vp, &va, ctx) || va.va_nlink != 1) { + IOLog("Bailing as this is not a regular file\n"); + goto exit; + } + VATTR_INIT(&va); + VATTR_SET(&va, va_data_size, 0); + vnode_setattr(vp, &va, ctx); + + + error = vn_rdwr(UIO_WRITE, vp, buf, len, 0, + UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *) 0, vfs_context_proc(ctx)); + if (error != 0) + IOLog("Failed to save sleep wake log. err 0x%x\n", error); + else + DLOG("Saved %d bytes to file %s\n",len, name); + +exit: + if (vp) vnode_close(vp, FWRITE, ctx); + + return error; + +} +void IOPMrootDomain::sleepWakeDebugDump(IOMemoryMap *logBufMap) +{ + IOVirtualAddress srcBuf = NULL; + char *stackBuf = NULL, *logOffset = NULL; + int logSize = 0; + + errno_t error = EIO; + uint64_t bufSize = 0; + swd_hdr *hdr = NULL; + char PMStatusCode[100]; + OSNumber *failStat = NULL; + + if (!OSCompareAndSwap(0, 1, &gRootDomain->swd_lock)) + return; + + if ((logBufMap == 0) || ( (srcBuf = logBufMap->getVirtualAddress()) == 0) ) + { + DLOG("Nothing saved to dump to file\n"); + goto exit; + } + + hdr = (swd_hdr *)srcBuf; + bufSize = logBufMap->getLength(); + if (bufSize <= sizeof(swd_hdr)) + { + IOLog("SleepWake log buffer contents are invalid\n"); + goto exit; + } + + stackBuf = (char*)hdr+hdr->spindump_offset; + + error = sleepWakeDebugSaveFile("/var/tmp/SleepWakeStacks.dump", stackBuf, hdr->spindump_size); + if (error) goto exit; + + logOffset = (char*)hdr+offsetof(swd_hdr, UUID); + logSize = sizeof(swd_hdr)-offsetof(swd_hdr, UUID); + if ((hdr->dlog_buf_offset == sizeof(swd_hdr)) && (hdr->dlog_size == SWD_DLOG_SIZE)) + { + logSize += hdr->dlog_size; + } + error = sleepWakeDebugSaveFile("/var/tmp/SleepWakeLog.dump", logOffset, logSize); + if (error) goto exit; + + hdr->spindump_size = 0; + error = 0; + +exit: + if (error) { + // Write just the SleepWakeLog.dump with failure code + if ((failStat = OSDynamicCast(OSNumber, getProperty(kIOPMSleepWakeFailureCodeKey))) != NULL) { + memset(PMStatusCode, 0x20, sizeof(PMStatusCode)); // Fill with spaces + PMStatusCode[sizeof(PMStatusCode)-1] = 0xa; // And an end-of-line at the end + const uint64_t fcode = failStat->unsigned64BitValue(); + snprintf(PMStatusCode, sizeof(PMStatusCode)-1, "Code: 0x%llx", fcode); + sleepWakeDebugSaveFile("/var/tmp/SleepWakeLog.dump", PMStatusCode, sizeof(PMStatusCode)); + } + } + gRootDomain->swd_lock = 0; +} + +IOMemoryMap *IOPMrootDomain::sleepWakeDebugRetrieve( ) +{ + IOVirtualAddress vaddr = NULL; + IOMemoryDescriptor * desc = NULL; + IOMemoryMap * logBufMap = NULL; + + uint32_t len; + addr64_t data[3]; + uint64_t bufSize = 0; + uint64_t crc = 0; + uint64_t newcrc = 0; + uint64_t paddr = 0; + swd_hdr *hdr = NULL; + bool ret = false; + + + if (!OSCompareAndSwap(0, 1, &gRootDomain->swd_lock)) + return NULL; + + len = sizeof(addr64_t)*3; + if (!PEReadNVRAMProperty(kIOSleepWakeDebugKey, data, &len) || (len != sizeof(addr64_t)*3) ) + { + DLOG("No sleepWakeDebug note to read\n"); + return NULL; + } + PERemoveNVRAMProperty(kIOSleepWakeDebugKey); + + + bufSize = data[0]; + crc = data[1]; + paddr = data[2]; + if ( (bufSize <= sizeof(swd_hdr)) ||(bufSize > SWD_BUF_SIZE) || (crc == 0) ) + { + IOLog("SleepWake log buffer contents are invalid\n"); + return NULL; + } + + DLOG("size:0x%llx crc:0x%llx paddr:0x%llx\n", + bufSize, crc, paddr); + + + desc = IOMemoryDescriptor::withAddressRange( paddr, bufSize, + kIODirectionOutIn | kIOMemoryMapperNone, NULL); + if (desc == NULL) + { + IOLog("Fail to map SleepWake log buffer\n"); + goto exit; + } + + logBufMap = desc->map(); + + vaddr = logBufMap->getVirtualAddress(); + + + if ( (logBufMap->getLength() <= sizeof(swd_hdr)) || (vaddr == NULL) ) { + IOLog("Fail to map SleepWake log buffer\n"); + goto exit; + } + + hdr = (swd_hdr *)vaddr; + if (hdr->spindump_offset+hdr->spindump_size > bufSize) + { + IOLog("SleepWake log buffer contents are invalid\n"); + goto exit; + } + + hdr->crc = crc; + newcrc = crc32(0, (void *)((char*)vaddr+hdr->spindump_offset), + hdr->spindump_size); + if (newcrc != crc) { + IOLog("SleepWake log buffer contents are invalid\n"); + goto exit; + } + + ret = true; + + +exit: + if (!ret) { + if (logBufMap) logBufMap->release(); + logBufMap = 0; + } + if (desc) desc->release(); + gRootDomain->swd_lock = 0; + + return logBufMap; +} + +void IOPMrootDomain::saveTimeoutAppStackShot(void *p0, void *p1) +{ + IOPMrootDomain *rd = (IOPMrootDomain *)p0; + IOBufferMemoryDescriptor *spindumpDesc; + errno_t error = EIO; + swd_hdr *hdr; + + if (rd && rd->spindumpDesc) + { + spindumpDesc = rd->spindumpDesc; + + hdr = (swd_hdr*)spindumpDesc->getBytesNoCopy(); + error = rd->sleepWakeDebugSaveFile("/var/tmp/SleepWakeTimeoutStacks.dump", + (char*)hdr+hdr->spindump_offset, hdr->spindump_size); + if (error) goto done; + + error = rd->sleepWakeDebugSaveFile("/var/tmp/SleepWakeTimeoutLog.dump", + (char*)hdr+offsetof(swd_hdr, UUID), + sizeof(swd_hdr)-offsetof(swd_hdr, UUID)); + + done: + spindumpDesc->release(); + rd->spindumpDesc = 0; + + } + + +} + +#else + +void IOPMrootDomain::sleepWakeDebugLog(const char *fmt,...) +{ +} + +void IOPMrootDomain::sleepWakeDebugTrig(bool restart) +{ +} + +void IOPMrootDomain::sleepWakeDebugMemAlloc( ) +{ +} + +void IOPMrootDomain::sleepWakeDebugDump(IOMemoryMap *map) +{ +} + +IOMemoryMap *IOPMrootDomain::sleepWakeDebugRetrieve( ) +{ + return NULL; +} + +void IOPMrootDomain::sleepWakeDebugEnableWdog() +{ +} + +bool IOPMrootDomain::sleepWakeDebugIsWdogEnabled() +{ + return false; +} + +errno_t IOPMrootDomain::sleepWakeDebugSaveFile(const char *name, char *buf, int len) +{ + return 0; +} + +void IOPMrootDomain::saveTimeoutAppStackShot(void *p0, void *p1) +{ +} +#endif + diff --git a/iokit/Kernel/IOPlatformExpert.cpp b/iokit/Kernel/IOPlatformExpert.cpp index 29c286ea6..494f1916b 100644 --- a/iokit/Kernel/IOPlatformExpert.cpp +++ b/iokit/Kernel/IOPlatformExpert.cpp @@ -715,16 +715,10 @@ static void IOShutdownNotificationsTimedOut( thread_call_param_t p0, thread_call_param_t p1) { -#ifdef CONFIG_EMBEDDED - /* 30 seconds has elapsed - panic */ - panic("Halt/Restart Timed Out"); - -#else /* ! CONFIG_EMBEDDED */ int type = (int)(long)p0; /* 30 seconds has elapsed - resume shutdown */ if(gIOPlatform) gIOPlatform->haltRestart(type); -#endif /* CONFIG_EMBEDDED */ } @@ -777,7 +771,7 @@ int PEHaltRestart(unsigned int type) the timer expires. */ shutdown_hang = thread_call_allocate( &IOShutdownNotificationsTimedOut, - (thread_call_param_t) type); + (thread_call_param_t)(uintptr_t) type); clock_interval_to_deadline( 30, kSecondScale, &deadline ); thread_call_enter1_delayed( shutdown_hang, 0, deadline ); @@ -864,7 +858,7 @@ boolean_t PEReadNVRAMProperty(const char *symbol, void *value, *len = data->getLength(); vlen = min(vlen, *len); - if (vlen) + if (value && vlen) memcpy((void *) value, data->getBytesNoCopy(), vlen); return TRUE; @@ -911,6 +905,32 @@ err: } +boolean_t PERemoveNVRAMProperty(const char *symbol) +{ + const OSSymbol *sym; + + if (!symbol) + goto err; + + if (init_gIOOptionsEntry() < 0) + goto err; + + sym = OSSymbol::withCStringNoCopy(symbol); + if (!sym) + goto err; + + gIOOptionsEntry->removeProperty(sym); + + sym->release(); + + gIOOptionsEntry->sync(); + return TRUE; + +err: + return FALSE; + +} + long PEGetGMTTimeOfDay(void) { long result = 0; @@ -934,41 +954,6 @@ void IOPlatformExpert::registerNVRAMController(IONVRAMController * caller) OSString * string = 0; uuid_string_t uuid; -#if CONFIG_EMBEDDED - entry = IORegistryEntry::fromPath( "/chosen", gIODTPlane ); - if ( entry ) - { - OSData * data1; - - data1 = OSDynamicCast( OSData, entry->getProperty( "unique-chip-id" ) ); - if ( data1 && data1->getLength( ) == 8 ) - { - OSData * data2; - - data2 = OSDynamicCast( OSData, entry->getProperty( "chip-id" ) ); - if ( data2 && data2->getLength( ) == 4 ) - { - SHA1_CTX context; - uint8_t digest[ SHA_DIGEST_LENGTH ]; - const uuid_t space = { 0xA6, 0xDD, 0x4C, 0xCB, 0xB5, 0xE8, 0x4A, 0xF5, 0xAC, 0xDD, 0xB6, 0xDC, 0x6A, 0x05, 0x42, 0xB8 }; - - SHA1Init( &context ); - SHA1Update( &context, space, sizeof( space ) ); - SHA1Update( &context, data1->getBytesNoCopy( ), data1->getLength( ) ); - SHA1Update( &context, data2->getBytesNoCopy( ), data2->getLength( ) ); - SHA1Final( digest, &context ); - - digest[ 6 ] = ( digest[ 6 ] & 0x0F ) | 0x50; - digest[ 8 ] = ( digest[ 8 ] & 0x3F ) | 0x80; - - uuid_unparse( digest, uuid ); - string = OSString::withCString( uuid ); - } - } - - entry->release( ); - } -#else /* !CONFIG_EMBEDDED */ entry = IORegistryEntry::fromPath( "/efi/platform", gIODTPlane ); if ( entry ) { @@ -993,7 +978,6 @@ void IOPlatformExpert::registerNVRAMController(IONVRAMController * caller) entry->release( ); } -#endif /* !CONFIG_EMBEDDED */ if ( string == 0 ) { @@ -1380,6 +1364,7 @@ IOPlatformExpertDevice::initWithArgs( if( !ok) return( false); + reserved = NULL; workLoop = IOWorkLoop::workLoop(); if (!workLoop) return false; diff --git a/iokit/Kernel/IORegistryEntry.cpp b/iokit/Kernel/IORegistryEntry.cpp index 7d0b01167..faef5cb63 100644 --- a/iokit/Kernel/IORegistryEntry.cpp +++ b/iokit/Kernel/IORegistryEntry.cpp @@ -337,7 +337,7 @@ bool IORegistryEntry::init( IORegistryEntry * old, fPropertyTable->retain(); #ifdef IOREGSPLITTABLES fRegistryTable = old->fRegistryTable; - old->fRegistryTable = OSDictionary::withDictionary( fRegistryTable ); + old->fRegistryTable = (OSDictionary *) fRegistryTable->copyCollection(); #endif /* IOREGSPLITTABLES */ old->registryTable()->removeObject( plane->keys[ kParentSetIndex ] ); @@ -1769,8 +1769,11 @@ void IORegistryEntry::detachAbove( const IORegistryPlane * plane ) IORegistryEntry * parent; retain(); - while( (parent = getParentEntry( plane ))) + while( (parent = copyParentEntry( plane ))) + { detachFromParent( parent, plane ); + parent->release(); + } release(); } diff --git a/iokit/Kernel/IOService.cpp b/iokit/Kernel/IOService.cpp index f35c6b6d8..d4ad9ce51 100644 --- a/iokit/Kernel/IOService.cpp +++ b/iokit/Kernel/IOService.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2009 Apple Inc. All rights reserved. + * Copyright (c) 1998-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -57,6 +57,7 @@ #define LOG kprintf //#define LOG IOLog #define MATCH_DEBUG 0 +#define OBFUSCATE(x) ((void *)(VM_KERNEL_ADDRPERM(x))) #include "IOServicePrivate.h" #include "IOKitKernelInternal.h" @@ -124,7 +125,9 @@ const OSSymbol * gIOConsoleSessionScreenLockedTimeKey; clock_sec_t gIOConsoleLockTime; static bool gIOConsoleLoggedIn; +#if HIBERNATION static uint32_t gIOScreenLockState; +#endif static IORegistryEntry * gIOChosenEntry; static int gIOResourceGenerationCount; @@ -616,12 +619,12 @@ void IOService::startMatching( IOOptionBits options ) else __state[1] &= ~kIOServiceSynchronousState; + if( needConfig) prevBusy = _adjustBusy( 1 ); + unlockForArbitration(); if( needConfig) { - prevBusy = _adjustBusy( 1 ); - if( needWake) { IOLockLock( gIOServiceBusyLock ); thread_wakeup( (event_t) this/*&__state[1]*/ ); @@ -1724,7 +1727,9 @@ bool IOService::terminatePhase1( IOOptionBits options ) // -- compat if( options & kIOServiceRecursing) { + lockForArbitration(); __state[1] |= kIOServiceRecursing; + unlockForArbitration(); return( true ); } // -- @@ -1832,6 +1837,24 @@ bool IOService::terminatePhase1( IOOptionBits options ) return( true ); } +void IOService::setTerminateDefer(IOService * provider, bool defer) +{ + lockForArbitration(); + if (defer) __state[1] |= kIOServiceStartState; + else __state[1] &= ~kIOServiceStartState; + unlockForArbitration(); + + if (provider && !defer) + { + provider->lockForArbitration(); + if (provider->__state[0] & kIOServiceInactiveState) + { + provider->scheduleTerminatePhase2(); + } + provider->unlockForArbitration(); + } +} + void IOService::scheduleTerminatePhase2( IOOptionBits options ) { AbsoluteTime deadline; @@ -1886,7 +1909,7 @@ void IOService::scheduleTerminatePhase2( IOOptionBits options ) gIOTerminatePhase2List->setObject( this ); if( 0 == gIOTerminateWork++) { if( !gIOTerminateThread) - kernel_thread_start(&terminateThread, (void *) options, &gIOTerminateThread); + kernel_thread_start(&terminateThread, (void *)(uintptr_t) options, &gIOTerminateThread); else IOLockWakeup(gJobsLock, (event_t) &gIOTerminateWork, /* one-thread */ false ); } @@ -1976,7 +1999,7 @@ bool IOService::didTerminate( IOService * provider, IOOptionBits options, bool * scheduleStop( provider ); // -- compat else { - message( kIOMessageServiceIsRequestingClose, provider, (void *) options ); + message( kIOMessageServiceIsRequestingClose, provider, (void *)(uintptr_t) options ); if( false == provider->handleIsOpen( this )) scheduleStop( provider ); } @@ -2027,7 +2050,7 @@ void IOService::actionDidTerminate( IOService * victim, IOOptionBits options, IOService * client; bool defer = false; - victim->messageClients( kIOMessageServiceIsTerminated, (void *) options ); + victim->messageClients( kIOMessageServiceIsTerminated, (void *)(uintptr_t) options ); iter = victim->getClientIterator(); if( iter) { @@ -2096,6 +2119,7 @@ void IOService::terminateWorker( IOOptionBits options ) OSArray * doPhase2List; OSArray * didPhase2List; OSSet * freeList; + OSIterator * iter; UInt32 workDone; IOService * victim; IOService * client; @@ -2130,6 +2154,13 @@ void IOService::terminateWorker( IOOptionBits options ) if( doPhase2) { doPhase2 = (0 == (victim->__state[1] & kIOServiceTermPhase2State)) && (0 == (victim->__state[1] & kIOServiceConfigState)); + + if (doPhase2 && (iter = victim->getClientIterator())) { + while (doPhase2 && (client = (IOService *) iter->getNextObject())) { + doPhase2 = (0 == (client->__state[1] & kIOServiceStartState)); + } + iter->release(); + } if( doPhase2) victim->__state[1] |= kIOServiceTermPhase2State; } @@ -2143,7 +2174,7 @@ void IOService::terminateWorker( IOOptionBits options ) IOLockUnlock( gJobsLock ); } else { _workLoopAction( (IOWorkLoop::Action) &actionWillTerminate, - victim, (void *) options, (void *) doPhase2List ); + victim, (void *)(uintptr_t) options, (void *)(uintptr_t) doPhase2List ); } didPhase2List->headQ( victim ); } @@ -2162,7 +2193,7 @@ void IOService::terminateWorker( IOOptionBits options ) victim->unlockForArbitration(); } _workLoopAction( (IOWorkLoop::Action) &actionDidTerminate, - victim, (void *) options ); + victim, (void *)(uintptr_t) options ); didPhase2List->removeObject(0); } IOLockLock( gJobsLock ); @@ -2176,7 +2207,7 @@ void IOService::terminateWorker( IOOptionBits options ) IOLockUnlock( gJobsLock ); _workLoopAction( (IOWorkLoop::Action) &actionFinalize, - victim, (void *) options ); + victim, (void *)(uintptr_t) options ); IOLockLock( gJobsLock ); // hold off free freeList->setObject( victim ); @@ -2345,7 +2376,7 @@ static void serviceOpenMessageApplier( OSObject * object, void * ctx ) ServiceOpenMessageContext * context = (ServiceOpenMessageContext *) ctx; if( object != context->excludeClient) - context->service->messageClient( context->type, object, (void *) context->options ); + context->service->messageClient( context->type, object, (void *)(uintptr_t) context->options ); } bool IOService::open( IOService * forClient, @@ -2434,7 +2465,7 @@ bool IOService::handleOpen( IOService * forClient, else if( options & kIOServiceSeize ) { ok = (kIOReturnSuccess == messageClient( kIOMessageServiceIsRequestingClose, - __owner, (void *) options )); + __owner, (void *)(uintptr_t) options )); if( ok && (0 == __owner )) __owner = forClient; else @@ -2656,7 +2687,7 @@ void IOService::probeCandidates( OSOrderedSet * matches ) if( (client = copyClientWithCategory(category)) ) { #if IOMATCHDEBUG - if( debugFlags & kIOLogMatch) + if( (debugFlags & kIOLogMatch) && (this != gIOResources)) LOG("%s: match category %s exists\n", getName(), category->getCStringNoCopy()); #endif @@ -2729,7 +2760,7 @@ void IOService::probeCandidates( OSOrderedSet * matches ) if( !symbol) continue; - //IOLog("%s alloc (symbol %p props %p)\n", symbol->getCStringNoCopy(), symbol, props); + //IOLog("%s alloc (symbol %p props %p)\n", symbol->getCStringNoCopy(), OBFUSCATE(symbol), OBFUSCATE(props)); // alloc the driver instance inst = (IOService *) OSMetaClass::allocClassWithName( symbol); @@ -3022,12 +3053,12 @@ bool IOService::checkResource( OSObject * matching ) } if( gIOKitDebug & kIOLogConfig) - LOG("config(%p): stalling %s\n", IOThreadSelf(), getName()); + LOG("config(%p): stalling %s\n", OBFUSCATE(IOThreadSelf()), getName()); waitForService( table ); if( gIOKitDebug & kIOLogConfig) - LOG("config(%p): waking\n", IOThreadSelf() ); + LOG("config(%p): waking\n", OBFUSCATE(IOThreadSelf()) ); return( true ); } @@ -3330,7 +3361,13 @@ IOReturn IOService::waitForState( UInt32 mask, UInt32 value, IOReturn IOService::waitQuiet( uint64_t timeout ) { - return( waitForState( kIOServiceBusyStateMask, 0, timeout )); + IOReturn ret; + ret = waitForState( kIOServiceBusyStateMask, 0, timeout ); + if ((kIOReturnTimeout == ret) && (timeout >= 30000000000) && (kIOWaitQuietPanics & gIOKitDebug)) + { + panic("IOService 0x%llx (%s) busy timeout", getRegistryEntryID(), getName()); + } + return (ret); } IOReturn IOService::waitQuiet( mach_timespec_t * timeout ) @@ -3346,7 +3383,7 @@ IOReturn IOService::waitQuiet( mach_timespec_t * timeout ) else timeoutNS = UINT64_MAX; - return( waitForState( kIOServiceBusyStateMask, 0, timeoutNS )); + return (waitQuiet(timeoutNS)); } bool IOService::serializeProperties( OSSerialize * s ) const @@ -3398,7 +3435,7 @@ void _IOConfigThread::main(void * arg, wait_result_t result) if( gIOKitDebug & kIOLogConfig) LOG("config(%p): starting on %s, %d\n", - IOThreadSelf(), job->nub->getName(), job->type); + OBFUSCATE(IOThreadSelf()), job->nub->getName(), job->type); switch( job->type) { @@ -3408,7 +3445,7 @@ void _IOConfigThread::main(void * arg, wait_result_t result) default: LOG("config(%p): strange type (%d)\n", - IOThreadSelf(), job->type ); + OBFUSCATE(IOThreadSelf()), job->type ); break; } @@ -3432,7 +3469,7 @@ void _IOConfigThread::main(void * arg, wait_result_t result) } while( alive ); if( gIOKitDebug & kIOLogConfig) - LOG("config(%p): terminating\n", IOThreadSelf() ); + LOG("config(%p): terminating\n", OBFUSCATE(IOThreadSelf()) ); self->release(); } @@ -3686,7 +3723,8 @@ OSObject * IOService::copyExistingServices( OSDictionary * matching, OSSerialize * s2 = OSSerialize::withCapacity(128); current->serialize(s1); _current->serialize(s2); - kprintf("**mismatch** %p %p\n%s\n%s\n%s\n", current, _current, s->text(), s1->text(), s2->text()); + kprintf("**mismatch** %p %p\n%s\n%s\n%s\n", OBFUSCATE(current), + OBFUSCATE(_current), s->text(), s1->text(), s2->text()); s1->release(); s2->release(); } @@ -4348,7 +4386,7 @@ bool IOResources::matchPropertyTable( OSDictionary * table ) OSString * str; OSSet * set; OSIterator * iter; - bool ok = false; + bool ok = true; prop = table->getObject( gIOResourceMatchKey ); str = OSDynamicCast( OSString, prop ); @@ -4457,14 +4495,10 @@ void IOService::updateConsoleUsers(OSArray * consoleUsers, IOMessage systemMessa #if HIBERNATION if (gIOChosenEntry) { - uint32_t screenLockState; - - if (locked == kOSBooleanTrue) screenLockState = kIOScreenLockLocked; - else if (gIOConsoleLockTime) screenLockState = kIOScreenLockUnlocked; - else screenLockState = kIOScreenLockNoLock; - - if (screenLockState != gIOScreenLockState) gIOChosenEntry->setProperty(kIOScreenLockStateKey, &screenLockState, sizeof(screenLockState)); - gIOScreenLockState = screenLockState; + if (locked == kOSBooleanTrue) gIOScreenLockState = kIOScreenLockLocked; + else if (gIOConsoleLockTime) gIOScreenLockState = kIOScreenLockUnlocked; + else gIOScreenLockState = kIOScreenLockNoLock; + gIOChosenEntry->setProperty(kIOScreenLockStateKey, &gIOScreenLockState, sizeof(gIOScreenLockState)); } #endif /* HIBERNATION */ @@ -5008,6 +5042,9 @@ const char * IOService::stringFromReturn( IOReturn rtn ) */ int IOService::errnoFromReturn( IOReturn rtn ) { + if (unix_err(err_get_code(rtn)) == rtn) + return err_get_code(rtn); + switch(rtn) { // (obvious match) case kIOReturnSuccess: @@ -5515,13 +5552,57 @@ IOReturn IOService::causeInterrupt(int source) return interruptController->causeInterrupt(this, source); } +IOReturn IOService::configureReport(IOReportChannelList *channelList, + IOReportConfigureAction action, + void *result, + void *destination) +{ + unsigned cnt; + + for (cnt = 0; cnt < channelList->nchannels; cnt++) { + if ( channelList->channels[cnt].channel_id == kPMPowerStatesChID ) { + if (pwrMgt) configurePowerStatesReport(action, result); + else return kIOReturnUnsupported; + } + else if ( channelList->channels[cnt].channel_id == kPMCurrStateChID ) { + if (pwrMgt) configureSimplePowerReport(action, result); + else return kIOReturnUnsupported; + } + } + + return kIOReturnSuccess; +} + +IOReturn IOService::updateReport(IOReportChannelList *channelList, + IOReportUpdateAction action, + void *result, + void *destination) +{ + unsigned cnt; + + for (cnt = 0; cnt < channelList->nchannels; cnt++) { + if ( channelList->channels[cnt].channel_id == kPMPowerStatesChID ) { + if (pwrMgt) updatePowerStatesReport(action, result, destination); + else return kIOReturnUnsupported; + } + else if ( channelList->channels[cnt].channel_id == kPMCurrStateChID ) { + if (pwrMgt) updateSimplePowerReport(action, result, destination); + else return kIOReturnUnsupported; + } + } + + return kIOReturnSuccess; +} + #if __LP64__ -OSMetaClassDefineReservedUnused(IOService, 0); -OSMetaClassDefineReservedUnused(IOService, 1); +OSMetaClassDefineReservedUsed(IOService, 0); +OSMetaClassDefineReservedUsed(IOService, 1); OSMetaClassDefineReservedUnused(IOService, 2); OSMetaClassDefineReservedUnused(IOService, 3); OSMetaClassDefineReservedUnused(IOService, 4); OSMetaClassDefineReservedUnused(IOService, 5); +OSMetaClassDefineReservedUnused(IOService, 6); +OSMetaClassDefineReservedUnused(IOService, 7); #else OSMetaClassDefineReservedUsed(IOService, 0); OSMetaClassDefineReservedUsed(IOService, 1); @@ -5529,9 +5610,9 @@ OSMetaClassDefineReservedUsed(IOService, 2); OSMetaClassDefineReservedUsed(IOService, 3); OSMetaClassDefineReservedUsed(IOService, 4); OSMetaClassDefineReservedUsed(IOService, 5); +OSMetaClassDefineReservedUsed(IOService, 6); +OSMetaClassDefineReservedUsed(IOService, 7); #endif -OSMetaClassDefineReservedUnused(IOService, 6); -OSMetaClassDefineReservedUnused(IOService, 7); OSMetaClassDefineReservedUnused(IOService, 8); OSMetaClassDefineReservedUnused(IOService, 9); OSMetaClassDefineReservedUnused(IOService, 10); diff --git a/iokit/Kernel/IOServicePM.cpp b/iokit/Kernel/IOServicePM.cpp index 4d35ed45b..3ae21d60f 100644 --- a/iokit/Kernel/IOServicePM.cpp +++ b/iokit/Kernel/IOServicePM.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -50,12 +51,14 @@ #include #include #include +#include // Required for notification instrumentation #include "IOServicePrivate.h" #include "IOServicePMPrivate.h" #include "IOKitKernelInternal.h" + static void settle_timer_expired(thread_call_param_t, thread_call_param_t); static void idle_timer_expired(thread_call_param_t, thread_call_param_t); static void tellKernelClientApplier(OSObject * object, void * arg); @@ -96,13 +99,15 @@ static IOPMRequest * gIOPMRequest = 0; static IOService * gIOPMRootNode = 0; static IOPlatformExpert * gPlatform = 0; -static const OSSymbol * gIOPMPowerClientDevice = 0; -static const OSSymbol * gIOPMPowerClientDriver = 0; -static const OSSymbol * gIOPMPowerClientChildProxy = 0; -static const OSSymbol * gIOPMPowerClientChildren = 0; +const OSSymbol * gIOPMPowerClientDevice = 0; +const OSSymbol * gIOPMPowerClientDriver = 0; +const OSSymbol * gIOPMPowerClientChildProxy = 0; +const OSSymbol * gIOPMPowerClientChildren = 0; +const OSSymbol * gIOPMPowerClientRootDomain = 0; static const OSSymbol * gIOPMPowerClientAdvisoryTickle = 0; static bool gIOPMAdvisoryTickleEnabled = true; +static thread_t gIOPMWatchDogThread = NULL; static uint32_t getPMRequestType( void ) { @@ -112,11 +117,26 @@ static uint32_t getPMRequestType( void ) return type; } +static IOPMRequestTag getPMRequestTag( void ) +{ + IOPMRequestTag tag = 0; + if (gIOPMRequest && + (gIOPMRequest->getType() == kIOPMRequestTypeRequestPowerStateOverride)) + { + tag = gIOPMRequest->fRequestTag; + } + return tag; +} + //****************************************************************************** // Macros //****************************************************************************** -#define PM_ERROR(x...) do { kprintf(x); IOLog(x); } while (false) +#define OBFUSCATE(x) ((void *)(VM_KERNEL_ADDRPERM(x))) + +#define PM_ERROR(x...) do { kprintf(x);IOLog(x); \ + IOService::getPMRootDomain()->sleepWakeDebugLog(x); \ + } while (false) #define PM_LOG(x...) do { kprintf(x); } while (false) #define PM_LOG1(x...) do { \ @@ -135,9 +155,10 @@ static uint32_t getPMRequestType( void ) #define RD_LOG(x...) do { \ if ((kIOLogPMRootDomain & gIOKitDebug) && \ - (getPMRootDomain() == this)) \ - kprintf("PMRD: " x); } while (false) - + (getPMRootDomain() == this)) { \ + kprintf("PMRD: " x); \ + getPMRootDomain()->sleepWakeDebugLog(x); \ + }} while (false) #define PM_ASSERT_IN_GATE(x) \ do { \ assert(gIOPMWorkLoop->inGate()); \ @@ -165,19 +186,17 @@ do { \ #define NS_TO_MS(nsec) ((int)((nsec) / 1000000ULL)) #define NS_TO_US(nsec) ((int)((nsec) / 1000ULL)) -#if CONFIG_EMBEDDED #define SUPPORT_IDLE_CANCEL 1 -#endif -#define kIOPMPowerStateMax 0xFFFFFFFF -#define kInvalidTicklePowerState (-1) +#define kIOPMPowerStateMax 0xFFFFFFFF +#define kInvalidTicklePowerState kIOPMPowerStateMax #define kNoTickleCancelWindow (60ULL * 1000ULL * 1000ULL * 1000ULL) #define IS_PM_ROOT (this == gIOPMRootNode) #define IS_ROOT_DOMAIN (getPMRootDomain() == this) -#define IS_POWER_DROP (fHeadNotePowerState < fCurrentPowerState) -#define IS_POWER_RISE (fHeadNotePowerState > fCurrentPowerState) +#define IS_POWER_DROP (StateOrder(fHeadNotePowerState) < StateOrder(fCurrentPowerState)) +#define IS_POWER_RISE (StateOrder(fHeadNotePowerState) > StateOrder(fCurrentPowerState)) // log setPowerStates longer than (ns): #define LOG_SETPOWER_TIMES (50ULL * 1000ULL * 1000ULL) @@ -207,9 +226,15 @@ enum { #define PM_ACTION_2(a, x, y) \ do { if (fPMActions.a) { \ - (fPMActions.a)(fPMActions.target, this, &fPMActions, x, y); } \ + (fPMActions.a)(fPMActions.target, this, &fPMActions, x, y, \ + getPMRequestTag()); } \ + } while (false) + +#define PM_ACTION_3(a, x, y, z) \ + do { if (fPMActions.a) { \ + (fPMActions.a)(fPMActions.target, this, &fPMActions, x, y, z); } \ } while (false) - + static OSNumber * copyClientIDForNotification( OSObject *object, IOPMInterestContext *context); @@ -229,13 +254,14 @@ enum { kIOPM_Finished = 0, kIOPM_OurChangeTellClientsPowerDown = 1, - kIOPM_OurChangeTellPriorityClientsPowerDown = 2, - kIOPM_OurChangeNotifyInterestedDriversWillChange = 3, - kIOPM_OurChangeSetPowerState = 4, - kIOPM_OurChangeWaitForPowerSettle = 5, - kIOPM_OurChangeNotifyInterestedDriversDidChange = 6, - kIOPM_OurChangeTellCapabilityDidChange = 7, - kIOPM_OurChangeFinish = 8, + kIOPM_OurChangeTellUserPMPolicyPowerDown = 2, + kIOPM_OurChangeTellPriorityClientsPowerDown = 3, + kIOPM_OurChangeNotifyInterestedDriversWillChange = 4, + kIOPM_OurChangeSetPowerState = 5, + kIOPM_OurChangeWaitForPowerSettle = 6, + kIOPM_OurChangeNotifyInterestedDriversDidChange = 7, + kIOPM_OurChangeTellCapabilityDidChange = 8, + kIOPM_OurChangeFinish = 9, kIOPM_ParentChangeTellPriorityClientsPowerDown = 10, kIOPM_ParentChangeNotifyInterestedDriversWillChange = 11, @@ -260,180 +286,13 @@ enum { kIOPM_BadMachineState = 0xFFFFFFFF }; - - /* - Power Management defines a few roles that drivers can play in their own, - and other drivers', power management. We briefly define those here. - - Many drivers implement their policy maker and power controller within the same - IOService object, but that is not required. - -== Policy Maker == - * Virtual IOService PM methods a "policy maker" may implement - * maxCapabilityForDomainState() - * initialPowerStateForDomainState() - * powerStateForDomainState() - - * Virtual IOService PM methods a "policy maker" may CALL - * PMinit() - -== Power Controller == - * Virtual IOService PM methods a "power controller" may implement - * setPowerState() - - * Virtual IOService PM methods a "power controller" may CALL - * joinPMtree() - * registerPowerDriver() - -======================= - There are two different kinds of power state changes. - * One is initiated by a subclassed device object which has either decided - to change power state, or its controlling driver has suggested it, or - some other driver wants to use the idle device and has asked it to become - usable. - * The second kind of power state change is initiated by the power domain - parent. - The two are handled through different code paths. - - We maintain a queue of "change notifications," or change notes. - * Usually the queue is empty. - * When it isn't, usually there is one change note in it - * It's possible to have more than one power state change pending at one - time, so a queue is implemented. - Example: - * The subclass device decides it's idle and initiates a change to a lower - power state. This causes interested parties to be notified, but they - don't all acknowledge right away. This causes the change note to sit - in the queue until all the acks are received. During this time, the - device decides it isn't idle anymore and wants to raise power back up - again. This change can't be started, however, because the previous one - isn't complete yet, so the second one waits in the queue. During this - time, the parent decides to lower or raise the power state of the entire - power domain and notifies the device, and that notification goes into - the queue, too, and can't be actioned until the others are. - - == SelfInitiated == - This is how a power change initiated by the subclass device is handled: - -> First, all interested parties are notified of the change via their - powerStateWillChangeTo method. If they all don't acknowledge via return - code, then we have to wait. If they do, or when they finally all - acknowledge via our acknowledgePowerChange method, then we can continue. - -> We call the controlling driver, instructing it to change to the new state - -> Then we wait for power to settle. If there is no settling-time, or after - it has passed, - -> we notify interested parties again, this time via their - powerStateDidChangeTo methods. - -> When they have all acked, we're done. - If we lowered power and don't need the power domain to be in its current power - state, we suggest to the parent that it lower the power domain state. - - == PowerDomainDownInitiated == -How a change to a lower power domain state initiated by the parent is handled: - -> First, we figure out what power state we will be in when the new domain - state is reached. - -> Then all interested parties are notified that we are moving to that new - state. - -> When they have acknowledged, we call the controlling driver to assume - that state and we wait for power to settle. - -> Then we acknowledge our preparedness to our parent. When all its - interested parties have acknowledged, - -> it lowers power and then notifies its interested parties again. - -> When we get this call, we notify our interested parties that the power - state has changed, and when they have all acknowledged, we're done. - - == PowerDomainUpInitiated == -How a change to a higher power domain state initiated by the parent is handled: - -> We figure out what power state we will be in when the new domain state is - reached. - -> If it is different from our current state we acknowledge the parent. - -> When all the parent's interested parties have acknowledged, it raises - power in the domain and waits for power to settle. - -> Then it notifies everyone that the new state has been reached. - -> When we get this call, we call the controlling driver, instructing it to - assume the new state, and wait for power to settle. - -> Then we notify our interested parties. When they all acknowledge we are - done. - - In either of the two power domain state cases above, it is possible that we - will not be changing state even though the domain is. - Examples: - * A change to a lower domain state may not affect us because we are already - in a low enough state, - * We will not take advantage of a change to a higher domain state, because - we have no need of the higher power. In such cases, there is nothing to - do but acknowledge the parent. So when the parent calls our - powerDomainWillChange method, and we decide that we will not be changing - state, we merely acknowledge the parent, via return code, and wait. - When the parent subsequently calls powerStateDidChange, we acknowledge again - via return code, and the change is complete. - - == 4 Paths Through State Machine == - Power state changes are processed in a state machine, and since there are four - varieties of power state changes, there are four major paths through the state - machine. - - == 5. No Need To change == - The fourth is nearly trivial. In this path, the parent is changing the domain - state, but we are not changing the device state. The change starts when the - parent calls powerDomainWillChange. All we do is acknowledge the parent. When - the parent calls powerStateDidChange, we acknowledge the parent again, and - we're done. - - == 1. OurChange Down == XXX gvdl - The first is fairly simple. It starts: - * when a power domain child calls requestPowerDomainState and we decide to - change power states to accomodate the child, - * or if our power-controlling driver calls changePowerStateTo, - * or if some other driver which is using our device calls makeUsable, - * or if a subclassed object calls changePowerStateToPriv. - These are all power changes initiated by us, not forced upon us by the parent. - - -> We start by notifying interested parties. - -> If they all acknowledge via return code, we can go on to state - "msSetPowerState". - -> Otherwise, we start the ack timer and wait for the stragglers to - acknowlege by calling acknowledgePowerChange. - -> We move on to state "msSetPowerState" when all the - stragglers have acknowledged, or when the ack timer expires on - all those which didn't acknowledge. - In "msSetPowerState" we call the power-controlling driver to change the - power state of the hardware. - -> If it returns saying it has done so, we go on to state - "msWaitForPowerSettle". - -> Otherwise, we have to wait for it, so we set the ack timer and wait. - -> When it calls acknowledgeSetPowerState, or when the ack timer - expires, we go on. - In "msWaitForPowerSettle", we look in the power state array to see if - there is any settle time required when changing from our current state to the - new state. - -> If not, we go right away to "msNotifyInterestedDriversDidChange". - -> Otherwise, we set the settle timer and wait. When it expires, we move on. - In "msNotifyInterestedDriversDidChange" state, we notify all our - interested parties via their powerStateDidChange methods that we have finished - changing power state. - -> If they all acknowledge via return code, we move on to "msFinish". - -> Otherwise we set the ack timer and wait. When they have all - acknowledged, or when the ack timer has expired for those that didn't, - we move on to "msFinish". - In "msFinish" we remove the used change note from the head of the queue - and start the next one if one exists. - - == 2. Parent Change Down == - Start at Stage 2 of OurChange Down XXX gvdl - - == 3. Change Up == - Start at Stage 4 of OurChange Down XXX gvdl - -Note all parent requested changes need to acknowledge the power has changed to the parent when done. - */ - //********************************************************************************* // [public] PMinit // // Initialize power management. //********************************************************************************* -void IOService::PMinit ( void ) +void IOService::PMinit( void ) { if ( !initialized ) { @@ -504,6 +363,9 @@ void IOService::PMinit ( void ) gIOPMPowerClientAdvisoryTickle = OSSymbol::withCStringNoCopy( "AdvisoryTicklePowerState" ); + + gIOPMPowerClientRootDomain = + OSSymbol::withCStringNoCopy( "RootDomainPower" ); } if (gIOPMRequestQueue && gIOPMReplyQueue && gIOPMFreeQueue) @@ -524,15 +386,15 @@ void IOService::PMinit ( void ) fPMLock = IOLockAlloc(); fInterestedDrivers = new IOPMinformeeList; fInterestedDrivers->initialize(); - fDesiredPowerState = 0; - fDeviceDesire = 0; + fDesiredPowerState = kPowerStateZero; + fDeviceDesire = kPowerStateZero; fInitialPowerChange = true; fInitialSetPowerState = true; fPreviousRequestPowerFlags = 0; fDeviceOverrideEnabled = false; fMachineState = kIOPM_Finished; fSavedMachineState = kIOPM_BadMachineState; - fIdleTimerMinPowerState = 0; + fIdleTimerMinPowerState = kPowerStateZero; fActivityLock = IOLockAlloc(); fStrictTreeOrder = false; fActivityTicklePowerState = kInvalidTicklePowerState; @@ -540,9 +402,9 @@ void IOService::PMinit ( void ) fControllingDriver = NULL; fPowerStates = NULL; fNumberOfPowerStates = 0; - fCurrentPowerState = 0; + fCurrentPowerState = kPowerStateZero; fParentsCurrentPowerFlags = 0; - fMaxPowerState = 0; + fMaxPowerState = kPowerStateZero; fName = getName(); fParentsKnowState = false; fSerialNumber = 0; @@ -561,6 +423,12 @@ void IOService::PMinit ( void ) fResetPowerStateOnWake = true; } + if (IS_ROOT_DOMAIN) + { + fWatchdogTimer = thread_call_allocate( + &IOService::watchdog_timer_expired, (thread_call_param_t)this); + } + fAckTimer = thread_call_allocate( &IOService::ack_timer_expired, (thread_call_param_t)this); fSettleTimer = thread_call_allocate( @@ -604,7 +472,7 @@ void IOService::PMinit ( void ) // Free the data created by PMinit. Only called from IOService::free(). //********************************************************************************* -void IOService::PMfree ( void ) +void IOService::PMfree( void ) { initialized = false; pm_vars = 0; @@ -618,6 +486,12 @@ void IOService::PMfree ( void ) assert(queue_empty(&pwrMgt->RequestHead)); assert(queue_empty(&fPMDriverCallQueue)); + if (fWatchdogTimer) { + thread_call_cancel(fWatchdogTimer); + thread_call_free(fWatchdogTimer); + fWatchdogTimer = NULL; + } + if ( fSettleTimer ) { thread_call_cancel(fSettleTimer); thread_call_free(fSettleTimer); @@ -703,7 +577,7 @@ void IOService::PMDebug( uint32_t event, uintptr_t param1, uintptr_t param2 ) // meaning it may not be initialized for power management. //********************************************************************************* -void IOService::joinPMtree ( IOService * driver ) +void IOService::joinPMtree( IOService * driver ) { IOPlatformExpert * platform; @@ -719,7 +593,7 @@ void IOService::joinPMtree ( IOService * driver ) // Power Managment is informing us that we are the root power domain. //********************************************************************************* -IOReturn IOService::youAreRoot ( void ) +IOReturn IOService::youAreRoot( void ) { return IOPMNoErr; } @@ -732,7 +606,7 @@ IOReturn IOService::youAreRoot ( void ) // from power plane. //********************************************************************************* -void IOService::PMstop ( void ) +void IOService::PMstop( void ) { IOPMRequest * request; @@ -760,7 +634,7 @@ void IOService::PMstop ( void ) request = acquirePMRequest( this, kIOPMRequestTypePMStop ); if (request) { - PM_LOG2("%s: %p PMstop\n", getName(), this); + PM_LOG2("%s: %p PMstop\n", getName(), OBFUSCATE(this)); submitPMRequest( request ); } } @@ -771,7 +645,7 @@ void IOService::PMstop ( void ) // Disconnect the node from all parents and children in the power plane. //********************************************************************************* -void IOService::handlePMstop ( IOPMRequest * request ) +void IOService::handlePMstop( IOPMRequest * request ) { OSIterator * iter; OSObject * next; @@ -780,7 +654,7 @@ void IOService::handlePMstop ( IOPMRequest * request ) IOService * theParent; PM_ASSERT_IN_GATE(); - PM_LOG2("%s: %p %s start\n", getName(), this, __FUNCTION__); + PM_LOG2("%s: %p %s start\n", getName(), OBFUSCATE(this), __FUNCTION__); // remove driver from prevent system sleep lists getPMRootDomain()->updatePreventIdleSleepList(this, false); @@ -864,7 +738,7 @@ void IOService::handlePMstop ( IOPMRequest * request ) if (fIdleTimer && thread_call_cancel(fIdleTimer)) release(); - PM_LOG2("%s: %p %s done\n", getName(), this, __FUNCTION__); + PM_LOG2("%s: %p %s done\n", getName(), OBFUSCATE(this), __FUNCTION__); } //********************************************************************************* @@ -873,7 +747,7 @@ void IOService::handlePMstop ( IOPMRequest * request ) // Power Management is informing us who our children are. //********************************************************************************* -IOReturn IOService::addPowerChild ( IOService * child ) +IOReturn IOService::addPowerChild( IOService * child ) { IOPowerConnection * connection = 0; IOPMRequest * requests[3] = {0, 0, 0}; @@ -911,7 +785,7 @@ IOReturn IOService::addPowerChild ( IOService * child ) if (!ok) { PM_LOG("%s: %s (%p) is already a child\n", - getName(), child->getName(), child); + getName(), child->getName(), OBFUSCATE(child)); break; } @@ -981,9 +855,9 @@ IOReturn IOService::addPowerChild ( IOService * child ) // Step 1/3 of adding a power child. Called on the power parent. //********************************************************************************* -void IOService::addPowerChild1 ( IOPMRequest * request ) +void IOService::addPowerChild1( IOPMRequest * request ) { - unsigned long tempDesire = 0; + IOPMPowerStateIndex tempDesire = kPowerStateZero; // Make us temporary usable before adding the child. @@ -992,10 +866,11 @@ void IOService::addPowerChild1 ( IOPMRequest * request ) if (fControllingDriver && inPlane(gIOPowerPlane) && fParentsKnowState) { - tempDesire = fNumberOfPowerStates - 1; + tempDesire = fHighestPowerState; } - if (tempDesire && (IS_PM_ROOT || (fMaxPowerState >= tempDesire))) + if ((tempDesire != kPowerStateZero) && + (IS_PM_ROOT || (StateOrder(fMaxPowerState) >= StateOrder(tempDesire)))) { adjustPowerState(tempDesire); } @@ -1008,7 +883,7 @@ void IOService::addPowerChild1 ( IOPMRequest * request ) // Execution blocked behind addPowerChild1. //********************************************************************************* -void IOService::addPowerChild2 ( IOPMRequest * request ) +void IOService::addPowerChild2( IOPMRequest * request ) { IOPowerConnection * connection = (IOPowerConnection *) request->fArg0; IOService * parent; @@ -1064,7 +939,7 @@ void IOService::addPowerChild2 ( IOPMRequest * request ) // Execution blocked behind addPowerChild2. //********************************************************************************* -void IOService::addPowerChild3 ( IOPMRequest * request ) +void IOService::addPowerChild3( IOPMRequest * request ) { IOPowerConnection * connection = (IOPowerConnection *) request->fArg0; IOService * child; @@ -1102,7 +977,7 @@ void IOService::addPowerChild3 ( IOPMRequest * request ) // to assume that state. //********************************************************************************* -IOReturn IOService::setPowerParent ( +IOReturn IOService::setPowerParent( IOPowerConnection * theParent, bool stateKnown, IOPMPowerFlags powerFlags ) { return kIOReturnUnsupported; @@ -1115,7 +990,7 @@ IOReturn IOService::setPowerParent ( // Called on a parent whose child is being removed by PMstop(). //********************************************************************************* -IOReturn IOService::removePowerChild ( IOPowerConnection * theNub ) +IOReturn IOService::removePowerChild( IOPowerConnection * theNub ) { IORegistryEntry * theChild; @@ -1181,18 +1056,19 @@ IOReturn IOService::removePowerChild ( IOPowerConnection * theNub ) // A driver has called us volunteering to control power to our device. //********************************************************************************* -IOReturn IOService::registerPowerDriver ( +IOReturn IOService::registerPowerDriver( IOService * powerDriver, IOPMPowerState * powerStates, unsigned long numberOfStates ) { - IOPMRequest * request; - IOPMPSEntry * powerStatesCopy = 0; + IOPMRequest * request; + IOPMPSEntry * powerStatesCopy = 0; + IOPMPowerStateIndex stateOrder; + IOReturn error = kIOReturnSuccess; if (!initialized) return IOPMNotYetInitialized; - // Validate arguments. if (!powerStates || (numberOfStates < 2)) { OUR_PMLog(kPMLogControllingDriverErr5, numberOfStates, 0); @@ -1205,7 +1081,7 @@ IOReturn IOService::registerPowerDriver ( return kIOReturnBadArgument; } - if (powerStates[0].version != kIOPMPowerStateVersion1) + if (powerStates[0].version > kIOPMPowerStateVersion2) { OUR_PMLog(kPMLogControllingDriverErr1, powerStates[0].version, 0); return kIOReturnBadArgument; @@ -1215,7 +1091,14 @@ IOReturn IOService::registerPowerDriver ( // Make a copy of the supplied power state array. powerStatesCopy = IONew(IOPMPSEntry, numberOfStates); if (!powerStatesCopy) + { + error = kIOReturnNoMemory; break; + } + + // Initialize to bogus values + for (IOPMPowerStateIndex i = 0; i < numberOfStates; i++) + powerStatesCopy[i].stateOrderToIndex = kIOPMPowerStateMax; for (uint32_t i = 0; i < numberOfStates; i++) { @@ -1225,11 +1108,36 @@ IOReturn IOService::registerPowerDriver ( powerStatesCopy[i].staticPower = powerStates[i].staticPower; powerStatesCopy[i].settleUpTime = powerStates[i].settleUpTime; powerStatesCopy[i].settleDownTime = powerStates[i].settleDownTime; + if (powerStates[i].version >= kIOPMPowerStateVersion2) + stateOrder = powerStates[i].stateOrder; + else + stateOrder = i; + + if (stateOrder < numberOfStates) + { + powerStatesCopy[i].stateOrder = stateOrder; + powerStatesCopy[stateOrder].stateOrderToIndex = i; + } } + for (IOPMPowerStateIndex i = 0; i < numberOfStates; i++) + { + if (powerStatesCopy[i].stateOrderToIndex == kIOPMPowerStateMax) + { + // power state order missing + error = kIOReturnBadArgument; + break; + } + } + if (kIOReturnSuccess != error) + break; + request = acquirePMRequest( this, kIOPMRequestTypeRegisterPowerDriver ); if (!request) + { + error = kIOReturnNoMemory; break; + } powerDriver->retain(); request->fArg0 = (void *) powerDriver; @@ -1237,25 +1145,27 @@ IOReturn IOService::registerPowerDriver ( request->fArg2 = (void *) numberOfStates; submitPMRequest( request ); - return kIOReturnSuccess; + return kIOReturnSuccess; } while (false); if (powerStatesCopy) IODelete(powerStatesCopy, IOPMPSEntry, numberOfStates); - return kIOReturnNoMemory; + + return error; } //********************************************************************************* // [private] handleRegisterPowerDriver //********************************************************************************* -void IOService::handleRegisterPowerDriver ( IOPMRequest * request ) +void IOService::handleRegisterPowerDriver( IOPMRequest * request ) { IOService * powerDriver = (IOService *) request->fArg0; IOPMPSEntry * powerStates = (IOPMPSEntry *) request->fArg1; unsigned long numberOfStates = (unsigned long) request->fArg2; - unsigned long i; + unsigned long i, stateIndex; + unsigned long lowestPowerState; IOService * root; OSIterator * iter; @@ -1275,18 +1185,26 @@ void IOService::handleRegisterPowerDriver ( IOPMRequest * request ) fControllingDriver = powerDriver; fCurrentCapabilityFlags = fPowerStates[0].capabilityFlags; - // make a mask of all the character bits we know about - fOutputPowerCharacterFlags = 0; - for ( i = 0; i < numberOfStates; i++ ) { - fOutputPowerCharacterFlags |= fPowerStates[i].outputPowerFlags; - if (!fDeviceUsablePowerState && - (fPowerStates[i].capabilityFlags & IOPMDeviceUsable)) + lowestPowerState = fPowerStates[0].stateOrderToIndex; + fHighestPowerState = fPowerStates[numberOfStates - 1].stateOrderToIndex; + + // OR'in all the output power flags + fMergedOutputPowerFlags = 0; + fDeviceUsablePowerState = lowestPowerState; + for ( i = 0; i < numberOfStates; i++ ) + { + fMergedOutputPowerFlags |= fPowerStates[i].outputPowerFlags; + + stateIndex = fPowerStates[i].stateOrderToIndex; + assert(stateIndex < numberOfStates); + if ((fDeviceUsablePowerState == lowestPowerState) && + (fPowerStates[stateIndex].capabilityFlags & IOPMDeviceUsable)) { // The minimum power state that the device is usable - fDeviceUsablePowerState = i; + fDeviceUsablePowerState = stateIndex; } } - + // Register powerDriver as interested, unless already done. // We don't want to register the default implementation since // it does nothing. One ramification of not always registering @@ -1314,27 +1232,24 @@ void IOService::handleRegisterPowerDriver ( IOPMRequest * request ) // Examine all existing power clients and perform limit check. - if (fPowerClients) + if (fPowerClients && + (iter = OSCollectionIterator::withCollection(fPowerClients))) { - iter = OSCollectionIterator::withCollection(fPowerClients); - if (iter) + const OSSymbol * client; + while ((client = (const OSSymbol *) iter->getNextObject())) { - const OSSymbol * client; - while ((client = (const OSSymbol *) iter->getNextObject())) + IOPMPowerStateIndex powerState = getPowerStateForClient(client); + if (powerState >= numberOfStates) { - uint32_t powerState = getPowerStateForClient(client); - if (powerState >= numberOfStates) - { - updatePowerClient(client, numberOfStates - 1); - } + updatePowerClient(client, fHighestPowerState); } - iter->release(); } + iter->release(); } if ( inPlane(gIOPowerPlane) && fParentsKnowState ) { - unsigned long tempDesire; + IOPMPowerStateIndex tempDesire; fMaxPowerState = fControllingDriver->maxCapabilityForDomainState(fParentsCurrentPowerFlags); // initially change into the state we are already in tempDesire = fControllingDriver->initialPowerStateForDomainState(fParentsCurrentPowerFlags); @@ -1359,7 +1274,7 @@ void IOService::handleRegisterPowerDriver ( IOPMRequest * request ) // out what the current power state of the device is. //********************************************************************************* -IOPMPowerFlags IOService::registerInterestedDriver ( IOService * driver ) +IOPMPowerFlags IOService::registerInterestedDriver( IOService * driver ) { IOPMRequest * request; bool signal; @@ -1397,7 +1312,7 @@ IOPMPowerFlags IOService::registerInterestedDriver ( IOService * driver ) // [public] deRegisterInterestedDriver //********************************************************************************* -IOReturn IOService::deRegisterInterestedDriver ( IOService * driver ) +IOReturn IOService::deRegisterInterestedDriver( IOService * driver ) { IOPMinformeeList * list; IOPMinformee * item; @@ -1505,7 +1420,7 @@ void IOService::handleInterestChanged( IOPMRequest * request ) // of a "current change note".) //********************************************************************************* -IOReturn IOService::acknowledgePowerChange ( IOService * whichObject ) +IOReturn IOService::acknowledgePowerChange( IOService * whichObject ) { IOPMRequest * request; @@ -1529,7 +1444,7 @@ IOReturn IOService::acknowledgePowerChange ( IOService * whichObject ) // [private] handleAcknowledgePowerChange //********************************************************************************* -bool IOService::handleAcknowledgePowerChange ( IOPMRequest * request ) +bool IOService::handleAcknowledgePowerChange( IOPMRequest * request ) { IOPMinformee * informee; unsigned long childPower = kIOPMUnknown; @@ -1575,7 +1490,7 @@ bool IOService::handleAcknowledgePowerChange ( IOPMRequest * request ) PM_LOG("%s::powerState%sChangeTo(%p, %s, %lu -> %lu) async took %d ms\n", informee->whatObject->getName(), (fDriverCallReason == kDriverCallInformPreChange) ? "Will" : "Did", - informee->whatObject, + OBFUSCATE(informee->whatObject), fName, fCurrentPowerState, fHeadNotePowerState, NS_TO_US(nsec)); uint16_t logType = (fDriverCallReason == kDriverCallInformPreChange) @@ -1590,7 +1505,7 @@ bool IOService::handleAcknowledgePowerChange ( IOPMRequest * request ) 0, 0, 0, NS_TO_MS(nsec)); - getPMRootDomain()->recordAndReleasePMEventGated( details ); + getPMRootDomain()->recordAndReleasePMEvent( details ); } #endif // mark it acked @@ -1652,7 +1567,7 @@ no_err: // We continue to process the power state change. //********************************************************************************* -IOReturn IOService::acknowledgeSetPowerState ( void ) +IOReturn IOService::acknowledgeSetPowerState( void ) { IOPMRequest * request; @@ -1671,7 +1586,7 @@ IOReturn IOService::acknowledgeSetPowerState ( void ) // [private] adjustPowerState //********************************************************************************* -void IOService::adjustPowerState ( uint32_t clamp ) +void IOService::adjustPowerState( uint32_t clamp ) { PM_ASSERT_IN_GATE(); computeDesiredState(clamp, false); @@ -1699,7 +1614,7 @@ void IOService::adjustPowerState ( uint32_t clamp ) // [public] synchronizePowerTree //********************************************************************************* -IOReturn IOService::synchronizePowerTree ( +IOReturn IOService::synchronizePowerTree( IOOptionBits options, IOService * notifyRoot ) { @@ -1711,6 +1626,8 @@ IOReturn IOService::synchronizePowerTree ( if (!initialized) return kIOPMNotYetInitialized; + OUR_PMLog(kPMLogCSynchronizePowerTree, options, (notifyRoot != 0)); + if (notifyRoot) { IOPMRequest * nr; @@ -1749,11 +1666,11 @@ error_no_memory: // [private] handleSynchronizePowerTree //********************************************************************************* -void IOService::handleSynchronizePowerTree ( IOPMRequest * request ) +void IOService::handleSynchronizePowerTree( IOPMRequest * request ) { PM_ASSERT_IN_GATE(); if (fControllingDriver && fParentsKnowState && inPlane(gIOPowerPlane) && - (fCurrentPowerState == fNumberOfPowerStates - 1)) + (fCurrentPowerState == fHighestPowerState)) { IOOptionBits options = (uintptr_t) request->fArg0; @@ -1778,7 +1695,7 @@ void IOService::handleSynchronizePowerTree ( IOPMRequest * request ) // kind of change is occuring in the domain. //********************************************************************************* -IOReturn IOService::powerDomainWillChangeTo ( +IOReturn IOService::powerDomainWillChangeTo( IOPMPowerFlags newPowerFlags, IOPowerConnection * whichParent ) { @@ -1791,7 +1708,7 @@ IOReturn IOService::powerDomainWillChangeTo ( // [private] handlePowerDomainWillChangeTo //********************************************************************************* -void IOService::handlePowerDomainWillChangeTo ( IOPMRequest * request ) +void IOService::handlePowerDomainWillChangeTo( IOPMRequest * request ) { IOPMPowerFlags parentPowerFlags = (IOPMPowerFlags) request->fArg0; IOPowerConnection * whichParent = (IOPowerConnection *) request->fArg1; @@ -1901,7 +1818,7 @@ exit_no_ack: // kind of change is occuring in the domain. //********************************************************************************* -IOReturn IOService::powerDomainDidChangeTo ( +IOReturn IOService::powerDomainDidChangeTo( IOPMPowerFlags newPowerFlags, IOPowerConnection * whichParent ) { @@ -1914,14 +1831,14 @@ IOReturn IOService::powerDomainDidChangeTo ( // [private] handlePowerDomainDidChangeTo //********************************************************************************* -void IOService::handlePowerDomainDidChangeTo ( IOPMRequest * request ) +void IOService::handlePowerDomainDidChangeTo( IOPMRequest * request ) { IOPMPowerFlags parentPowerFlags = (IOPMPowerFlags) request->fArg0; IOPowerConnection * whichParent = (IOPowerConnection *) request->fArg1; IOPMPowerChangeFlags parentChangeFlags = (IOPMPowerChangeFlags)(uintptr_t) request->fArg2; IOPMPowerChangeFlags myChangeFlags; IOPMPowerStateIndex maxPowerState; - IOPMPowerStateIndex initialDesire = 0; + IOPMPowerStateIndex initialDesire = kPowerStateZero; bool computeDesire = false; bool desireChanged = false; bool savedParentsKnowState; @@ -1976,7 +1893,7 @@ void IOService::handlePowerDomainDidChangeTo ( IOPMRequest * request ) // fDesiredPowerState was adjusted before going to sleep // with fDeviceDesire at min. - if (wakePowerState > fDesiredPowerState) + if (StateOrder(wakePowerState) > StateOrder(fDesiredPowerState)) { // Must schedule a power adjustment if we changed the // device desire. That will update the desired domain @@ -2043,7 +1960,7 @@ exit_no_ack: // data together. //********************************************************************************* -void IOService::setParentInfo ( +void IOService::setParentInfo( IOPMPowerFlags newPowerFlags, IOPowerConnection * whichParent, bool knowsState ) @@ -2100,8 +2017,12 @@ void IOService::trackSystemSleepPreventers( if ((oldCapability ^ newCapability) & kIOPMPreventIdleSleep) { + bool idleCancelAllowed = getPMRootDomain()->updatePreventIdleSleepList(this, + ((oldCapability & kIOPMPreventIdleSleep) == 0)); + if(!idleCancelAllowed) + PM_LOG2("Idle cancel was disallowed for %s\n", getName()); #if SUPPORT_IDLE_CANCEL - if ((oldCapability & kIOPMPreventIdleSleep) == 0) + if (idleCancelAllowed && (oldCapability & kIOPMPreventIdleSleep) == 0) { IOPMRequest * cancelRequest; @@ -2113,8 +2034,6 @@ void IOService::trackSystemSleepPreventers( } #endif - getPMRootDomain()->updatePreventIdleSleepList(this, - ((oldCapability & kIOPMPreventIdleSleep) == 0)); } if ((oldCapability ^ newCapability) & kIOPMPreventSystemSleep) @@ -2136,7 +2055,7 @@ IOReturn IOService::requestPowerDomainState( IOPowerConnection * childConnection, unsigned long specification ) { - IOPMPowerStateIndex ps; + IOPMPowerStateIndex order, powerState; IOPMPowerFlags outputPowerFlags; IOService * child; IOPMRequest * subRequest; @@ -2162,6 +2081,9 @@ IOReturn IOService::requestPowerDomainState( child = (IOService *) childConnection->getChildEntry(gIOPowerPlane); assert(child); + // Remove flags from child request which we can't possibly supply + childRequestPowerFlags &= fMergedOutputPowerFlags; + // Merge in the power flags contributed by this power parent // at its current or impending power state. @@ -2170,28 +2092,28 @@ IOReturn IOService::requestPowerDomainState( { if (IS_POWER_DROP && !IS_ROOT_DOMAIN) { - // Use the lower power state when dropping power. - // Must be careful since a power drop can be canceled - // from the following states: - // - kIOPM_OurChangeTellClientsPowerDown - // - kIOPM_OurChangeTellPriorityClientsPowerDown - // - // The child must not wait for this parent to raise power - // if the power drop was cancelled. The solution is to cancel - // the power drop if possible, then schedule an adjustment to - // re-evaluate the parent's power state. - // - // Root domain is excluded to avoid idle sleep issues. And permit - // root domain children to pop up when system is going to sleep. - - if ((fMachineState == kIOPM_OurChangeTellClientsPowerDown) || - (fMachineState == kIOPM_OurChangeTellPriorityClientsPowerDown)) - { - fDoNotPowerDown = true; // cancel power drop - adjustPower = true; // schedule an adjustment - PM_LOG1("%s: power drop cancelled in state %u by %s\n", - getName(), fMachineState, child->getName()); - } + // Use the lower power state when dropping power. + // Must be careful since a power drop can be cancelled + // from the following states: + // - kIOPM_OurChangeTellClientsPowerDown + // - kIOPM_OurChangeTellPriorityClientsPowerDown + // + // The child must not wait for this parent to raise power + // if the power drop was cancelled. The solution is to cancel + // the power drop if possible, then schedule an adjustment to + // re-evaluate the parent's power state. + // + // Root domain is excluded to avoid idle sleep issues. And allow + // root domain children to pop up when system is going to sleep. + + if ((fMachineState == kIOPM_OurChangeTellClientsPowerDown) || + (fMachineState == kIOPM_OurChangeTellPriorityClientsPowerDown)) + { + fDoNotPowerDown = true; // cancel power drop + adjustPower = true; // schedule an adjustment + PM_LOG1("%s: power drop cancelled in state %u by %s\n", + getName(), fMachineState, child->getName()); + } else { // Beyond cancellation point, report the impending state. @@ -2216,15 +2138,16 @@ IOReturn IOService::requestPowerDomainState( // Map child's requested power flags to one of our power state. - for (ps = 0; ps < fNumberOfPowerStates; ps++) + for (order = 0; order < fNumberOfPowerStates; order++) { - if ((fPowerStates[ps].outputPowerFlags & childRequestPowerFlags) == - (fOutputPowerCharacterFlags & childRequestPowerFlags)) + powerState = fPowerStates[order].stateOrderToIndex; + if ((fPowerStates[powerState].outputPowerFlags & childRequestPowerFlags) + == childRequestPowerFlags) break; } - if (ps >= fNumberOfPowerStates) + if (order >= fNumberOfPowerStates) { - ps = 0; // should never happen + powerState = kPowerStateZero; } // Conditions that warrants a power adjustment on this parent. @@ -2232,7 +2155,7 @@ IOReturn IOService::requestPowerDomainState( // prevent idle/sleep flags towards the root domain. if (!childConnection->childHasRequestedPower() || - (ps != childConnection->getDesiredDomainState())) + (powerState != childConnection->getDesiredDomainState())) adjustPower = true; #if ENABLE_DEBUG_LOGS @@ -2242,13 +2165,13 @@ IOReturn IOService::requestPowerDomainState( getName(), child->getName(), !childConnection->childHasRequestedPower(), (uint32_t) childConnection->getDesiredDomainState(), - (uint32_t) ps); + (uint32_t) powerState); } #endif // Record the child's desires on the connection. childConnection->setChildHasRequestedPower(); - childConnection->setDesiredDomainState( ps ); + childConnection->setDesiredDomainState( powerState ); // Schedule a request to re-evaluate all children desires and // adjust power state. Submit a request if one wasn't pending, @@ -2278,7 +2201,7 @@ IOReturn IOService::requestPowerDomainState( // We enter the highest state until addPowerChild is called. //********************************************************************************* -IOReturn IOService::temporaryPowerClampOn ( void ) +IOReturn IOService::temporaryPowerClampOn( void ) { return requestPowerState( gIOPMPowerClientChildProxy, kIOPMPowerStateMax ); } @@ -2295,7 +2218,7 @@ IOReturn IOService::temporaryPowerClampOn ( void ) // highest power state. //********************************************************************************* -IOReturn IOService::makeUsable ( void ) +IOReturn IOService::makeUsable( void ) { OUR_PMLog(kPMLogMakeUsable, 0, 0); return requestPowerState( gIOPMPowerClientDevice, kIOPMPowerStateMax ); @@ -2305,7 +2228,7 @@ IOReturn IOService::makeUsable ( void ) // [public] currentCapability //********************************************************************************* -IOPMPowerFlags IOService::currentCapability ( void ) +IOPMPowerFlags IOService::currentCapability( void ) { if (!initialized) return IOPMNotPowerManaged; @@ -2321,7 +2244,7 @@ IOPMPowerFlags IOService::currentCapability ( void ) // power states differ, then a power state change is initiated. //********************************************************************************* -IOReturn IOService::changePowerStateTo ( unsigned long ordinal ) +IOReturn IOService::changePowerStateTo( unsigned long ordinal ) { OUR_PMLog(kPMLogChangeStateTo, ordinal, 0); return requestPowerState( gIOPMPowerClientDriver, ordinal ); @@ -2335,14 +2258,14 @@ IOReturn IOService::changePowerStateTo ( unsigned long ordinal ) // power states differ, then a power state change is initiated. //********************************************************************************* -IOReturn IOService::changePowerStateToPriv ( unsigned long ordinal ) +IOReturn IOService::changePowerStateToPriv( unsigned long ordinal ) { OUR_PMLog(kPMLogChangeStateToPriv, ordinal, 0); return requestPowerState( gIOPMPowerClientDevice, ordinal ); } //********************************************************************************* -// [protected] changePowerStateWithOverrideTo +// [public] changePowerStateWithOverrideTo // // Called by our driver subclass to change power state. The new desired power // state is computed and compared against the current power state. If those @@ -2350,7 +2273,8 @@ IOReturn IOService::changePowerStateToPriv ( unsigned long ordinal ) // Override enforced - Children and Driver desires are ignored. //********************************************************************************* -IOReturn IOService::changePowerStateWithOverrideTo ( unsigned long ordinal ) +IOReturn IOService::changePowerStateWithOverrideTo( IOPMPowerStateIndex ordinal, + IOPMRequestTag tag ) { IOPMRequest * request; @@ -2364,6 +2288,7 @@ IOReturn IOService::changePowerStateWithOverrideTo ( unsigned long ordinal ) return kIOReturnNoMemory; gIOPMPowerClientDevice->retain(); + request->fRequestTag = tag; request->fArg0 = (void *) ordinal; request->fArg1 = (void *) gIOPMPowerClientDevice; request->fArg2 = 0; @@ -2377,7 +2302,7 @@ IOReturn IOService::changePowerStateWithOverrideTo ( unsigned long ordinal ) if (gIOPMWorkLoop->inGate() && (ordinal < fNumberOfPowerStates)) { - fTempClampPowerState = max(fTempClampPowerState, ordinal); + fTempClampPowerState = StateMax(fTempClampPowerState, ordinal); fTempClampCount++; fOverrideMaxPowerState = ordinal; request->fArg2 = (void *) (uintptr_t) true; @@ -2387,11 +2312,23 @@ IOReturn IOService::changePowerStateWithOverrideTo ( unsigned long ordinal ) return IOPMNoErr; } +//********************************************************************************* +// [public] changePowerStateForRootDomain +// +// Adjust the root domain's power desire on the target +//********************************************************************************* + +IOReturn IOService::changePowerStateForRootDomain( IOPMPowerStateIndex ordinal ) +{ + OUR_PMLog(kPMLogChangeStateForRootDomain, ordinal, 0); + return requestPowerState( gIOPMPowerClientRootDomain, ordinal ); +} + //********************************************************************************* // [private] requestPowerState //********************************************************************************* -IOReturn IOService::requestPowerState ( +IOReturn IOService::requestPowerState( const OSSymbol * client, uint32_t state ) { @@ -2407,8 +2344,8 @@ IOReturn IOService::requestPowerState ( return kIOReturnNoMemory; client->retain(); - request->fArg0 = (void *) state; - request->fArg1 = (void *) client; + request->fArg0 = (void *)(uintptr_t) state; + request->fArg1 = (void *) client; request->fArg2 = 0; #if NOT_READY if (action) @@ -2420,7 +2357,7 @@ IOReturn IOService::requestPowerState ( if (gIOPMWorkLoop->inGate() && (state < fNumberOfPowerStates)) { - fTempClampPowerState = max(fTempClampPowerState, state); + fTempClampPowerState = StateMax(fTempClampPowerState, state); fTempClampCount++; request->fArg2 = (void *) (uintptr_t) true; } @@ -2433,7 +2370,7 @@ IOReturn IOService::requestPowerState ( // [private] handleRequestPowerState //********************************************************************************* -void IOService::handleRequestPowerState ( IOPMRequest * request ) +void IOService::handleRequestPowerState( IOPMRequest * request ) { const OSSymbol * client = (const OSSymbol *) request->fArg1; uint32_t state = (uint32_t)(uintptr_t) request->fArg0; @@ -2443,11 +2380,11 @@ void IOService::handleRequestPowerState ( IOPMRequest * request ) { assert(fTempClampCount != 0); if (fTempClampCount) fTempClampCount--; - if (!fTempClampCount) fTempClampPowerState = 0; + if (!fTempClampCount) fTempClampPowerState = kPowerStateZero; } if (fNumberOfPowerStates && (state >= fNumberOfPowerStates)) - state = fNumberOfPowerStates - 1; + state = fHighestPowerState; // The power suppression due to changePowerStateWithOverrideTo() expires // upon the next "device" power request - changePowerStateToPriv(). @@ -2456,7 +2393,7 @@ void IOService::handleRequestPowerState ( IOPMRequest * request ) (client == gIOPMPowerClientDevice)) fOverrideMaxPowerState = kIOPMPowerStateMax; - if ((state == 0) && + if ((state == kPowerStateZero) && (client != gIOPMPowerClientDevice) && (client != gIOPMPowerClientDriver) && (client != gIOPMPowerClientChildProxy)) @@ -2474,13 +2411,18 @@ void IOService::handleRequestPowerState ( IOPMRequest * request ) void IOService::updatePowerClient( const OSSymbol * client, uint32_t powerState ) { + IOPMPowerStateIndex oldPowerState = kPowerStateZero; + if (!fPowerClients) fPowerClients = OSDictionary::withCapacity(4); if (fPowerClients && client) { OSNumber * num = (OSNumber *) fPowerClients->getObject(client); if (num) + { + oldPowerState = num->unsigned32BitValue(); num->setValue(powerState); + } else { num = OSNumber::withNumber(powerState, 32); @@ -2490,6 +2432,8 @@ void IOService::updatePowerClient( const OSSymbol * client, uint32_t powerState num->release(); } } + + PM_ACTION_3(actionUpdatePowerClient, client, oldPowerState, powerState); } } @@ -2501,7 +2445,7 @@ void IOService::removePowerClient( const OSSymbol * client ) uint32_t IOService::getPowerStateForClient( const OSSymbol * client ) { - uint32_t powerState = 0; + uint32_t powerState = kPowerStateZero; if (fPowerClients && client) { @@ -2515,7 +2459,7 @@ uint32_t IOService::getPowerStateForClient( const OSSymbol * client ) // [protected] powerOverrideOnPriv //********************************************************************************* -IOReturn IOService::powerOverrideOnPriv ( void ) +IOReturn IOService::powerOverrideOnPriv( void ) { IOPMRequest * request; @@ -2540,7 +2484,7 @@ IOReturn IOService::powerOverrideOnPriv ( void ) // [protected] powerOverrideOffPriv //********************************************************************************* -IOReturn IOService::powerOverrideOffPriv ( void ) +IOReturn IOService::powerOverrideOffPriv( void ) { IOPMRequest * request; @@ -2565,7 +2509,7 @@ IOReturn IOService::powerOverrideOffPriv ( void ) // [private] handlePowerOverrideChanged //********************************************************************************* -void IOService::handlePowerOverrideChanged ( IOPMRequest * request ) +void IOService::handlePowerOverrideChanged( IOPMRequest * request ) { PM_ASSERT_IN_GATE(); if (request->getType() == kIOPMRequestTypePowerOverrideOnPriv) @@ -2591,16 +2535,15 @@ void IOService::computeDesiredState( unsigned long localClamp, bool computeOnly OSIterator * iter; OSObject * next; IOPowerConnection * connection; - uint32_t desiredState = 0; - uint32_t newPowerState = 0; + uint32_t desiredState = kPowerStateZero; + uint32_t newPowerState = kPowerStateZero; bool hasChildren = false; // Desired power state is always 0 without a controlling driver. if (!fNumberOfPowerStates) { - fDesiredPowerState = 0; - //PM_LOG("%s::%s no controlling driver\n", getName(), __FUNCTION__); + fDesiredPowerState = kPowerStateZero; return; } @@ -2621,8 +2564,7 @@ void IOService::computeDesiredState( unsigned long localClamp, bool computeOnly } if (connection->childHasRequestedPower()) hasChildren = true; - if (connection->getDesiredDomainState() > desiredState) - desiredState = connection->getDesiredDomainState(); + desiredState = StateMax(connection->getDesiredDomainState(), desiredState); } } iter->release(); @@ -2661,7 +2603,7 @@ void IOService::computeDesiredState( unsigned long localClamp, bool computeOnly PM_LOG1(" %u %s\n", desiredState, client->getCStringNoCopy()); - newPowerState = max(newPowerState, desiredState); + newPowerState = StateMax(newPowerState, desiredState); if (client == gIOPMPowerClientDevice) fDeviceDesire = desiredState; @@ -2671,17 +2613,17 @@ void IOService::computeDesiredState( unsigned long localClamp, bool computeOnly // Factor in the temporary power desires. - newPowerState = max(newPowerState, localClamp); - newPowerState = max(newPowerState, fTempClampPowerState); + newPowerState = StateMax(newPowerState, localClamp); + newPowerState = StateMax(newPowerState, fTempClampPowerState); // Limit check against max power override. - newPowerState = min(newPowerState, fOverrideMaxPowerState); + newPowerState = StateMin(newPowerState, fOverrideMaxPowerState); // Limit check against number of power states. if (newPowerState >= fNumberOfPowerStates) - newPowerState = fNumberOfPowerStates - 1; + newPowerState = fHighestPowerState; fDesiredPowerState = newPowerState; @@ -2721,7 +2663,7 @@ void IOService::computeDesiredState( unsigned long localClamp, bool computeOnly // //********************************************************************************* -unsigned long IOService::currentPowerConsumption ( void ) +unsigned long IOService::currentPowerConsumption( void ) { if (!initialized) return kIOPMUnknown; @@ -2733,7 +2675,7 @@ unsigned long IOService::currentPowerConsumption ( void ) // [deprecated] getPMworkloop //********************************************************************************* -IOWorkLoop * IOService::getPMworkloop ( void ) +IOWorkLoop * IOService::getPMworkloop( void ) { return gIOPMWorkLoop; } @@ -2833,7 +2775,7 @@ void IOService::setAdvisoryTickleEnable( bool enable ) // should be intercepted by a subclass. //********************************************************************************* -bool IOService::activityTickle ( unsigned long type, unsigned long stateNumber ) +bool IOService::activityTickle( unsigned long type, unsigned long stateNumber ) { IOPMRequest * request; bool noPowerChange = true; @@ -2842,7 +2784,7 @@ bool IOService::activityTickle ( unsigned long type, unsigned long stateNumber ) if (!initialized) return true; // no power change - if ((type == kIOPMSuperclassPolicy1) && stateNumber) + if ((type == kIOPMSuperclassPolicy1) && StateOrder(stateNumber)) { IOLockLock(fActivityLock); @@ -2858,7 +2800,8 @@ bool IOService::activityTickle ( unsigned long type, unsigned long stateNumber ) // This helps to filter out redundant tickles as // this function may be called from the data path. - if (fActivityTicklePowerState < (int)stateNumber) + if ((fActivityTicklePowerState == kInvalidTicklePowerState) + || StateOrder(fActivityTicklePowerState) < StateOrder(stateNumber)) { fActivityTicklePowerState = stateNumber; noPowerChange = false; @@ -2867,9 +2810,9 @@ bool IOService::activityTickle ( unsigned long type, unsigned long stateNumber ) request = acquirePMRequest( this, kIOPMRequestTypeActivityTickle ); if (request) { - request->fArg0 = (void *) stateNumber; - request->fArg1 = (void *) tickleFlags; - request->fArg2 = (void *) gIOPMTickleGeneration; + request->fArg0 = (void *) stateNumber; + request->fArg1 = (void *)(uintptr_t) tickleFlags; + request->fArg2 = (void *)(uintptr_t) gIOPMTickleGeneration; submitPMRequest(request); } } @@ -2878,13 +2821,13 @@ bool IOService::activityTickle ( unsigned long type, unsigned long stateNumber ) } else if ((type == kIOPMActivityTickleTypeAdvisory) && - ((stateNumber = fDeviceUsablePowerState))) + ((stateNumber = fDeviceUsablePowerState) != kPowerStateZero)) { IOLockLock(fActivityLock); fAdvisoryTickled = true; - if (fAdvisoryTicklePowerState != (int)stateNumber) + if (fAdvisoryTicklePowerState != stateNumber) { fAdvisoryTicklePowerState = stateNumber; noPowerChange = false; @@ -2893,9 +2836,9 @@ bool IOService::activityTickle ( unsigned long type, unsigned long stateNumber ) request = acquirePMRequest( this, kIOPMRequestTypeActivityTickle ); if (request) { - request->fArg0 = (void *) stateNumber; - request->fArg1 = (void *) tickleFlags; - request->fArg2 = (void *) gIOPMTickleGeneration; + request->fArg0 = (void *) stateNumber; + request->fArg1 = (void *)(uintptr_t) tickleFlags; + request->fArg2 = (void *)(uintptr_t) gIOPMTickleGeneration; submitPMRequest(request); } } @@ -2913,7 +2856,7 @@ bool IOService::activityTickle ( unsigned long type, unsigned long stateNumber ) // [private] handleActivityTickle //********************************************************************************* -void IOService::handleActivityTickle ( IOPMRequest * request ) +void IOService::handleActivityTickle( IOPMRequest * request ) { uint32_t ticklePowerState = (uint32_t)(uintptr_t) request->fArg0; uint32_t tickleFlags = (uint32_t)(uintptr_t) request->fArg1; @@ -2935,9 +2878,11 @@ void IOService::handleActivityTickle ( IOPMRequest * request ) if (tickleFlags & kTickleTypeActivity) { + IOPMPowerStateIndex deviceDesireOrder = StateOrder(fDeviceDesire); + if (tickleFlags & kTickleTypePowerRise) { - if ((ticklePowerState > fDeviceDesire) && + if ((StateOrder(ticklePowerState) > deviceDesireOrder) && (ticklePowerState < fNumberOfPowerStates)) { fIdleTimerMinPowerState = ticklePowerState; @@ -2945,7 +2890,7 @@ void IOService::handleActivityTickle ( IOPMRequest * request ) adjustPower = true; } } - else if (fDeviceDesire > fIdleTimerMinPowerState) + else if (deviceDesireOrder > StateOrder(fIdleTimerMinPowerState)) { // Power drop due to idle timer expiration. // Do not allow idle timer to reduce power below tickle power. @@ -2953,10 +2898,14 @@ void IOService::handleActivityTickle ( IOPMRequest * request ) // to zero and cancelling the effect of a pre-sleep tickle when // system wakes up to doze state, while the device is unable to // raise its power state to satisfy the tickle. - - ticklePowerState = fDeviceDesire - 1; - updatePowerClient(gIOPMPowerClientDevice, ticklePowerState); - adjustPower = true; + + deviceDesireOrder--; + if (deviceDesireOrder < fNumberOfPowerStates) + { + ticklePowerState = fPowerStates[deviceDesireOrder].stateOrderToIndex; + updatePowerClient(gIOPMPowerClientDevice, ticklePowerState); + adjustPower = true; + } } } else // advisory tickle @@ -2999,7 +2948,7 @@ void IOService::handleActivityTickle ( IOPMRequest * request ) // Start the idle timer. Period is in seconds. //****************************************************************************** -IOReturn IOService::setIdleTimerPeriod ( unsigned long period ) +IOReturn IOService::setIdleTimerPeriod( unsigned long period ) { if (!initialized) return IOPMNotYetInitialized; @@ -3073,7 +3022,7 @@ SInt32 IOService::nextIdleTimeout( // [public] start_PM_idle_timer //********************************************************************************* -void IOService::start_PM_idle_timer ( void ) +void IOService::start_PM_idle_timer( void ) { static const int maxTimeout = 100000; static const int minTimeout = 1; @@ -3117,7 +3066,7 @@ void IOService::start_PM_idle_timer ( void ) void IOService::restartIdleTimer( void ) { - if (fDeviceDesire != 0) + if (fDeviceDesire != kPowerStateZero) { fIdleTimerStopped = false; fActivityTickleCount = 0; @@ -3140,7 +3089,7 @@ void IOService::restartIdleTimer( void ) //********************************************************************************* static void -idle_timer_expired ( +idle_timer_expired( thread_call_param_t arg0, thread_call_param_t arg1 ) { IOService * me = (IOService *) arg0; @@ -3185,19 +3134,20 @@ void IOService::idleTimerExpired( void ) { // No device activity - drop power state by one level. // Decrement the cached tickle power state when possible. - // This value may be (-1) before activityTickle() is called, - // but the power drop request must be issued regardless. + // This value may be kInvalidTicklePowerState before activityTickle() + // is called, but the power drop request must be issued regardless. - if (fActivityTicklePowerState > 0) + if ((fActivityTicklePowerState != kInvalidTicklePowerState) && + (fActivityTicklePowerState != kPowerStateZero)) fActivityTicklePowerState--; tickleFlags = kTickleTypeActivity | kTickleTypePowerDrop; request = acquirePMRequest( this, kIOPMRequestTypeActivityTickle ); if (request) { - request->fArg0 = (void *) 0; // irrelevant - request->fArg1 = (void *) tickleFlags; - request->fArg2 = (void *) gIOPMTickleGeneration; + request->fArg0 = (void *) kPowerStateZero; // irrelevant + request->fArg1 = (void *)(uintptr_t) tickleFlags; + request->fArg2 = (void *)(uintptr_t) gIOPMTickleGeneration; submitPMRequest( request ); // Do not restart timer until after the tickle request has been @@ -3220,9 +3170,9 @@ void IOService::idleTimerExpired( void ) request = acquirePMRequest( this, kIOPMRequestTypeActivityTickle ); if (request) { - request->fArg0 = (void *) 0; // irrelevant - request->fArg1 = (void *) tickleFlags; - request->fArg2 = (void *) gIOPMTickleGeneration; + request->fArg0 = (void *) kPowerStateZero; // irrelevant + request->fArg1 = (void *)(uintptr_t) tickleFlags; + request->fArg2 = (void *)(uintptr_t) gIOPMTickleGeneration; submitPMRequest( request ); // Do not restart timer until after the tickle request has been @@ -3243,7 +3193,7 @@ void IOService::idleTimerExpired( void ) // [deprecated] PM_idle_timer_expiration //********************************************************************************* -void IOService::PM_idle_timer_expiration ( void ) +void IOService::PM_idle_timer_expiration( void ) { } @@ -3251,7 +3201,7 @@ void IOService::PM_idle_timer_expiration ( void ) // [deprecated] command_received //********************************************************************************* -void IOService::command_received ( void *statePtr , void *, void * , void * ) +void IOService::command_received( void *statePtr , void *, void * , void * ) { } #endif /* !__LP64__ */ @@ -3263,7 +3213,7 @@ void IOService::command_received ( void *statePtr , void *, void * , void * ) // power domains will pass it on to their children, etc. //********************************************************************************* -IOReturn IOService::setAggressiveness ( unsigned long type, unsigned long newLevel ) +IOReturn IOService::setAggressiveness( unsigned long type, unsigned long newLevel ) { return kIOReturnSuccess; } @@ -3274,7 +3224,7 @@ IOReturn IOService::setAggressiveness ( unsigned long type, unsigned long newLev // Called by the user client. //********************************************************************************* -IOReturn IOService::getAggressiveness ( unsigned long type, unsigned long * currentLevel ) +IOReturn IOService::getAggressiveness( unsigned long type, unsigned long * currentLevel ) { IOPMrootDomain * rootDomain = getPMRootDomain(); @@ -3289,10 +3239,10 @@ IOReturn IOService::getAggressiveness ( unsigned long type, unsigned long * curr // //********************************************************************************* -UInt32 IOService::getPowerState ( void ) +UInt32 IOService::getPowerState( void ) { if (!initialized) - return 0; + return kPowerStateZero; return fCurrentPowerState; } @@ -3305,7 +3255,7 @@ UInt32 IOService::getPowerState ( void ) // power domains will pass it on to their children, etc. //********************************************************************************* -IOReturn IOService::systemWake ( void ) +IOReturn IOService::systemWake( void ) { OSIterator * iter; OSObject * next; @@ -3352,7 +3302,7 @@ IOReturn IOService::systemWake ( void ) // [deprecated] temperatureCriticalForZone //********************************************************************************* -IOReturn IOService::temperatureCriticalForZone ( IOService * whichZone ) +IOReturn IOService::temperatureCriticalForZone( IOService * whichZone ) { IOService * theParent; IOService * theNub; @@ -3403,6 +3353,31 @@ IOReturn IOService::startPowerChange( fIsPreChange = true; PM_ACTION_2(actionPowerChangeOverride, &powerState, &changeFlags); + if (changeFlags & kIOPMExpireIdleTimer) + { + // Root domain requested removal of tickle influence + if (StateOrder(fDeviceDesire) > StateOrder(powerState)) + { + // Reset device desire down to the clamped power state + updatePowerClient(gIOPMPowerClientDevice, powerState); + computeDesiredState(kPowerStateZero, true); + + // Invalidate tickle cache so the next tickle will issue a request + IOLockLock(fActivityLock); + fDeviceWasActive = false; + fActivityTicklePowerState = kInvalidTicklePowerState; + IOLockUnlock(fActivityLock); + + fIdleTimerMinPowerState = kPowerStateZero; + } + } + + // Root domain's override handler may cancel the power change by + // setting the kIOPMNotDone flag. + + if (changeFlags & kIOPMNotDone) + return IOPMAckImplied; + // Forks to either Driver or Parent initiated power change paths. fHeadNoteChangeFlags = changeFlags; @@ -3432,7 +3407,7 @@ IOReturn IOService::startPowerChange( // [private] notifyInterestedDrivers //********************************************************************************* -bool IOService::notifyInterestedDrivers ( void ) +bool IOService::notifyInterestedDrivers( void ) { IOPMinformee * informee; IOPMinformeeList * list = fInterestedDrivers; @@ -3502,7 +3477,7 @@ done: // [private] notifyInterestedDriversDone //********************************************************************************* -void IOService::notifyInterestedDriversDone ( void ) +void IOService::notifyInterestedDriversDone( void ) { IOPMinformee * informee; IOItemCount count; @@ -3568,30 +3543,60 @@ void IOService::notifyInterestedDriversDone ( void ) } } - MS_POP(); // pushed by notifyAll() + MS_POP(); // pop the machine state passed to notifyAll() - // If interest acks are outstanding, wait for fHeadNotePendingAcks to become - // zero before notifying children. This enforces the children after interest - // ordering even for async interest clients. + // If interest acks are outstanding, block the state machine until + // fHeadNotePendingAcks drops to zero before notifying root domain. + // Otherwise notify root domain directly. if (!fHeadNotePendingAcks) { - notifyChildren(); + notifyRootDomain(); } else { MS_PUSH(fMachineState); fMachineState = kIOPM_NotifyChildrenStart; - PM_LOG2("%s: %u outstanding async interest\n", - getName(), fHeadNotePendingAcks); } } +//********************************************************************************* +// [private] notifyRootDomain +//********************************************************************************* + +void IOService::notifyRootDomain( void ) +{ + assert( fDriverCallBusy == false ); + + // Only for root domain in the will-change phase + if (!IS_ROOT_DOMAIN || (fMachineState != kIOPM_OurChangeSetPowerState)) + { + notifyChildren(); + return; + } + + MS_PUSH(fMachineState); // push notifyAll() machine state + fMachineState = kIOPM_DriverThreadCallDone; + + fDriverCallReason = kRootDomainInformPreChange; + fDriverCallBusy = true; + thread_call_enter( fDriverCallEntry ); +} + +void IOService::notifyRootDomainDone( void ) +{ + assert( fDriverCallBusy == false ); + assert( fMachineState == kIOPM_DriverThreadCallDone ); + + MS_POP(); // pop notifyAll() machine state + notifyChildren(); +} + //********************************************************************************* // [private] notifyChildren //********************************************************************************* -void IOService::notifyChildren ( void ) +void IOService::notifyChildren( void ) { OSIterator * iter; OSObject * next; @@ -3690,7 +3695,7 @@ void IOService::notifyChildren ( void ) // [private] notifyChildrenOrdered //********************************************************************************* -void IOService::notifyChildrenOrdered ( void ) +void IOService::notifyChildrenOrdered( void ) { PM_ASSERT_IN_GATE(); assert(fNotifyChildArray); @@ -3725,7 +3730,7 @@ void IOService::notifyChildrenOrdered ( void ) // [private] notifyChildrenDelayed //********************************************************************************* -void IOService::notifyChildrenDelayed ( void ) +void IOService::notifyChildrenDelayed( void ) { IOPowerConnection * connection; @@ -3757,9 +3762,9 @@ void IOService::notifyChildrenDelayed ( void ) // [private] notifyAll //********************************************************************************* -IOReturn IOService::notifyAll ( uint32_t nextMS ) +IOReturn IOService::notifyAll( uint32_t nextMS ) { - // Save the next machine_state to be restored by notifyInterestedDriversDone() + // Save the machine state to be restored by notifyInterestedDriversDone() PM_ASSERT_IN_GATE(); MS_PUSH(nextMS); @@ -3779,7 +3784,7 @@ IOReturn IOService::notifyAll ( uint32_t nextMS ) // Thread call context //********************************************************************************* -IOReturn IOService::actionDriverCalloutDone ( +IOReturn IOService::actionDriverCalloutDone( OSObject * target, void * arg0, void * arg1, void * arg2, void * arg3 ) @@ -3795,7 +3800,7 @@ IOReturn IOService::actionDriverCalloutDone ( return kIOReturnSuccess; } -void IOService::pmDriverCallout ( IOService * from ) +void IOService::pmDriverCallout( IOService * from ) { assert(from); switch (from->fDriverCallReason) @@ -3809,6 +3814,10 @@ void IOService::pmDriverCallout ( IOService * from ) from->driverInformPowerChange(); break; + case kRootDomainInformPreChange: + getPMRootDomain()->willNotifyPowerChildren(from->fHeadNotePowerState); + break; + default: panic("IOService::pmDriverCallout bad machine state %x", from->fDriverCallReason); @@ -3825,7 +3834,7 @@ void IOService::pmDriverCallout ( IOService * from ) // Thread call context //********************************************************************************* -void IOService::driverSetPowerState ( void ) +void IOService::driverSetPowerState( void ) { IOPMPowerStateIndex powerState; DriverCallParam * param; @@ -3878,7 +3887,7 @@ void IOService::driverSetPowerState ( void ) 0, // result NS_TO_US(nsec)); // usec completion time - getPMRootDomain()->recordAndReleasePMEventGated( details ); + getPMRootDomain()->recordAndReleasePMEvent( details ); } #endif } @@ -3894,7 +3903,7 @@ void IOService::driverSetPowerState ( void ) // Thread call context //********************************************************************************* -void IOService::driverInformPowerChange ( void ) +void IOService::driverInformPowerChange( void ) { IOPMinformee * informee; IOService * driver; @@ -3969,7 +3978,7 @@ void IOService::driverInformPowerChange ( void ) 0, // result NS_TO_US(nsec)); // usec completion time - getPMRootDomain()->recordAndReleasePMEventGated( details ); + getPMRootDomain()->recordAndReleasePMEvent( details ); } #endif } @@ -3988,7 +3997,7 @@ void IOService::driverInformPowerChange ( void ) // If the object acknowledges the current change, we return TRUE. //********************************************************************************* -bool IOService::notifyChild ( IOPowerConnection * theNub ) +bool IOService::notifyChild( IOPowerConnection * theNub ) { IOReturn ret = IOPMAckImplied; unsigned long childPower; @@ -4010,7 +4019,7 @@ bool IOService::notifyChild ( IOPowerConnection * theNub ) theNub->setAwaitingAck(true); requestArg2 = fHeadNoteChangeFlags; - if (fHeadNotePowerState < fCurrentPowerState) + if (StateOrder(fHeadNotePowerState) < StateOrder(fCurrentPowerState)) requestArg2 |= kIOPMDomainPowerDrop; requestType = fIsPreChange ? @@ -4021,9 +4030,9 @@ bool IOService::notifyChild ( IOPowerConnection * theNub ) if (childRequest) { theNub->retain(); - childRequest->fArg0 = (void *) fHeadNotePowerArrayEntry->outputPowerFlags; - childRequest->fArg1 = (void *) theNub; - childRequest->fArg2 = (void *) requestArg2; + childRequest->fArg0 = (void *) fHeadNotePowerArrayEntry->outputPowerFlags; + childRequest->fArg1 = (void *) theNub; + childRequest->fArg2 = (void *)(uintptr_t) requestArg2; theChild->submitPMRequest( childRequest ); ret = IOPMWillAckLater; } @@ -4050,7 +4059,7 @@ bool IOService::notifyChild ( IOPowerConnection * theNub ) // [private] notifyControllingDriver //********************************************************************************* -bool IOService::notifyControllingDriver ( void ) +bool IOService::notifyControllingDriver( void ) { DriverCallParam * param; @@ -4156,13 +4165,14 @@ void IOService::notifyControllingDriverDone( void ) // A power change is done. //********************************************************************************* -void IOService::all_done ( void ) +void IOService::all_done( void ) { IOPMPowerStateIndex prevPowerState; const IOPMPSEntry * powerStatePtr; IOPMDriverCallEntry callEntry; uint32_t prevMachineState = fMachineState; bool callAction = false; + uint64_t ts; fMachineState = kIOPM_Finished; @@ -4219,7 +4229,7 @@ void IOService::all_done ( void ) requestDomainPower(fHeadNotePowerState); // yes, did power raise? - if ( fCurrentPowerState < fHeadNotePowerState ) + if ( StateOrder(fCurrentPowerState) < StateOrder(fHeadNotePowerState) ) { // yes, inform clients and apps tellChangeUp (fHeadNotePowerState); @@ -4227,6 +4237,12 @@ void IOService::all_done ( void ) prevPowerState = fCurrentPowerState; // either way fCurrentPowerState = fHeadNotePowerState; + PM_LOCK(); + if (fReportBuf) { + ts = mach_absolute_time(); + STATEREPORT_SETSTATE(fReportBuf, fCurrentPowerState, ts); + } + PM_UNLOCK(); #if PM_VARS_SUPPORT fPMVars->myCurrentState = fCurrentPowerState; #endif @@ -4268,15 +4284,15 @@ void IOService::all_done ( void ) ParentChangeRootChangeDown(); if (((fHeadNoteChangeFlags & kIOPMDomainWillChange) && - (fCurrentPowerState >= fHeadNotePowerState)) || + (StateOrder(fCurrentPowerState) >= StateOrder(fHeadNotePowerState))) || ((fHeadNoteChangeFlags & kIOPMDomainDidChange) && - (fCurrentPowerState < fHeadNotePowerState))) + (StateOrder(fCurrentPowerState) < StateOrder(fHeadNotePowerState)))) { trackSystemSleepPreventers( fCurrentPowerState, fHeadNotePowerState, fHeadNoteChangeFlags); // did power raise? - if ( fCurrentPowerState < fHeadNotePowerState ) + if ( StateOrder(fCurrentPowerState) < StateOrder(fHeadNotePowerState) ) { // yes, inform clients and apps tellChangeUp (fHeadNotePowerState); @@ -4284,6 +4300,12 @@ void IOService::all_done ( void ) // either way prevPowerState = fCurrentPowerState; fCurrentPowerState = fHeadNotePowerState; + PM_LOCK(); + if (fReportBuf) { + ts = mach_absolute_time(); + STATEREPORT_SETSTATE(fReportBuf, fCurrentPowerState, ts); + } + PM_UNLOCK(); #if PM_VARS_SUPPORT fPMVars->myCurrentState = fCurrentPowerState; #endif @@ -4312,9 +4334,9 @@ void IOService::all_done ( void ) // When power rises enough to satisfy the tickle's desire for more power, // the condition preventing idle-timer from dropping power is removed. - if (fCurrentPowerState >= fIdleTimerMinPowerState) + if (StateOrder(fCurrentPowerState) >= StateOrder(fIdleTimerMinPowerState)) { - fIdleTimerMinPowerState = 0; + fIdleTimerMinPowerState = kPowerStateZero; } if (!callAction) @@ -4333,7 +4355,7 @@ void IOService::all_done ( void ) // Begin the processing of a power change initiated by us. //********************************************************************************* -void IOService::OurChangeStart ( void ) +void IOService::OurChangeStart( void ) { PM_ASSERT_IN_GATE(); OUR_PMLog( kPMLogStartDeviceChange, fHeadNotePowerState, fCurrentPowerState ); @@ -4342,7 +4364,7 @@ void IOService::OurChangeStart ( void ) // power state of our parents. If we are trying to raise power beyond the // maximum, send an async request for more power to all parents. - if (!IS_PM_ROOT && (fMaxPowerState < fHeadNotePowerState)) + if (!IS_PM_ROOT && (StateOrder(fMaxPowerState) < StateOrder(fHeadNotePowerState))) { fHeadNoteChangeFlags |= kIOPMNotDone; requestDomainPower(fHeadNotePowerState); @@ -4495,7 +4517,7 @@ IOReturn IOService::requestDomainPower( maxPowerState = fControllingDriver->maxCapabilityForDomainState( fHeadNoteDomainTargetFlags ); - if (maxPowerState < ourPowerState) + if (StateOrder(maxPowerState) < StateOrder(ourPowerState)) { PM_LOG1("%s: power desired %u:0x%x got %u:0x%x\n", getName(), @@ -4513,7 +4535,7 @@ done: // [private] OurSyncStart //********************************************************************************* -void IOService::OurSyncStart ( void ) +void IOService::OurSyncStart( void ) { PM_ASSERT_IN_GATE(); @@ -4551,12 +4573,32 @@ void IOService::OurSyncStart ( void ) // power. Here we notify them that we will lower the power and wait for acks. //********************************************************************************* -void IOService::OurChangeTellClientsPowerDown ( void ) +void IOService::OurChangeTellClientsPowerDown( void ) { - fMachineState = kIOPM_OurChangeTellPriorityClientsPowerDown; + if(!IS_ROOT_DOMAIN) + fMachineState = kIOPM_OurChangeTellPriorityClientsPowerDown; + else + { + fMachineState = kIOPM_OurChangeTellUserPMPolicyPowerDown; + } tellChangeDown1(fHeadNotePowerState); } +//********************************************************************************* +// [private] OurChangeTellUserPMPolicyPowerDown +// +// All applications and kernel clients have acknowledged our permission to drop +// power. Here we notify power management policy in user-space and wait for acks +// one last time before we lower power +//********************************************************************************* +void IOService::OurChangeTellUserPMPolicyPowerDown ( void ) +{ + fMachineState = kIOPM_OurChangeTellPriorityClientsPowerDown; + fOutOfBandParameter = kNotifyApps; + + tellClientsWithResponse(kIOPMMessageLastCallBeforeSleep); +} + //********************************************************************************* // [private] OurChangeTellPriorityClientsPowerDown // @@ -4564,7 +4606,7 @@ void IOService::OurChangeTellClientsPowerDown ( void ) // power. Here we notify "priority" clients that we are lowering power. //********************************************************************************* -void IOService::OurChangeTellPriorityClientsPowerDown ( void ) +void IOService::OurChangeTellPriorityClientsPowerDown( void ) { fMachineState = kIOPM_OurChangeNotifyInterestedDriversWillChange; tellChangeDown2(fHeadNotePowerState); @@ -4577,7 +4619,7 @@ void IOService::OurChangeTellPriorityClientsPowerDown ( void ) // system capability change when raising power state. //********************************************************************************* -void IOService::OurChangeTellCapabilityWillChange ( void ) +void IOService::OurChangeTellCapabilityWillChange( void ) { if (!IS_ROOT_DOMAIN) return OurChangeNotifyInterestedDriversWillChange(); @@ -4592,7 +4634,7 @@ void IOService::OurChangeTellCapabilityWillChange ( void ) // Here we notify interested drivers pre-change. //********************************************************************************* -void IOService::OurChangeNotifyInterestedDriversWillChange ( void ) +void IOService::OurChangeNotifyInterestedDriversWillChange( void ) { IOPMrootDomain * rootDomain; if ((rootDomain = getPMRootDomain()) == this) @@ -4606,7 +4648,7 @@ void IOService::OurChangeNotifyInterestedDriversWillChange ( void ) NULL, 100, kIOReturnSuccess); - rootDomain->recordAndReleasePMEventGated( details ); + rootDomain->recordAndReleasePMEvent( details ); } else rootDomain->tracePoint( kIOPMTracePointWakeWillChangeInterests ); @@ -4622,7 +4664,7 @@ void IOService::OurChangeNotifyInterestedDriversWillChange ( void ) // change. Wait for async completions. //********************************************************************************* -void IOService::OurChangeSetPowerState ( void ) +void IOService::OurChangeSetPowerState( void ) { MS_PUSH( kIOPM_OurChangeWaitForPowerSettle ); fMachineState = kIOPM_DriverThreadCallDone; @@ -4639,7 +4681,7 @@ void IOService::OurChangeSetPowerState ( void ) // Wait for the driver specified settle time to expire. //********************************************************************************* -void IOService::OurChangeWaitForPowerSettle ( void ) +void IOService::OurChangeWaitForPowerSettle( void ) { fMachineState = kIOPM_OurChangeNotifyInterestedDriversDidChange; startSettleTimer(); @@ -4652,7 +4694,7 @@ void IOService::OurChangeWaitForPowerSettle ( void ) // all our interested drivers post-change. //********************************************************************************* -void IOService::OurChangeNotifyInterestedDriversDidChange ( void ) +void IOService::OurChangeNotifyInterestedDriversDidChange( void ) { IOPMrootDomain * rootDomain; if ((rootDomain = getPMRootDomain()) == this) @@ -4671,7 +4713,7 @@ void IOService::OurChangeNotifyInterestedDriversDidChange ( void ) // For root domain to notify capability power-change. //********************************************************************************* -void IOService::OurChangeTellCapabilityDidChange ( void ) +void IOService::OurChangeTellCapabilityDidChange( void ) { if (!IS_ROOT_DOMAIN) return OurChangeFinish(); @@ -4689,7 +4731,7 @@ void IOService::OurChangeTellCapabilityDidChange ( void ) // Done with this self-induced power state change. //********************************************************************************* -void IOService::OurChangeFinish ( void ) +void IOService::OurChangeFinish( void ) { all_done(); } @@ -4703,7 +4745,7 @@ void IOService::OurChangeFinish ( void ) // Here we begin the processing of a power change initiated by our parent. //********************************************************************************* -IOReturn IOService::ParentChangeStart ( void ) +IOReturn IOService::ParentChangeStart( void ) { PM_ASSERT_IN_GATE(); OUR_PMLog( kPMLogStartParentChange, fHeadNotePowerState, fCurrentPowerState ); @@ -4720,7 +4762,7 @@ IOReturn IOService::ParentChangeStart ( void ) } // Power domain is forcing us to lower power - if ( fHeadNotePowerState < fCurrentPowerState ) + if ( StateOrder(fHeadNotePowerState) < StateOrder(fCurrentPowerState) ) { PM_ACTION_2(actionPowerChangeStart, fHeadNotePowerState, &fHeadNoteChangeFlags); @@ -4731,12 +4773,12 @@ IOReturn IOService::ParentChangeStart ( void ) return IOPMWillAckLater; } - // Power domain is raising power - if ( fHeadNotePowerState > fCurrentPowerState ) + // Power domain is allowing us to raise power up to fHeadNotePowerState + if ( StateOrder(fHeadNotePowerState) > StateOrder(fCurrentPowerState) ) { - if ( fDesiredPowerState > fCurrentPowerState ) + if ( StateOrder(fDesiredPowerState) > StateOrder(fCurrentPowerState) ) { - if ( fDesiredPowerState < fHeadNotePowerState ) + if ( StateOrder(fDesiredPowerState) < StateOrder(fHeadNotePowerState) ) { // We power up, but not all the way fHeadNotePowerState = fDesiredPowerState; @@ -4753,7 +4795,7 @@ IOReturn IOService::ParentChangeStart ( void ) if ( fHeadNoteChangeFlags & kIOPMDomainDidChange ) { - if ( fHeadNotePowerState > fCurrentPowerState ) + if ( StateOrder(fHeadNotePowerState) > StateOrder(fCurrentPowerState) ) { PM_ACTION_2(actionPowerChangeStart, fHeadNotePowerState, &fHeadNoteChangeFlags); @@ -4803,10 +4845,10 @@ void IOService::ParentChangeRootChangeDown( void ) // Advisory tickle desire is intentionally untouched since // it has no effect until system is promoted to full wake. - if (fDeviceDesire != 0) + if (fDeviceDesire != kPowerStateZero) { - updatePowerClient(gIOPMPowerClientDevice, 0); - computeDesiredState(0, true); + updatePowerClient(gIOPMPowerClientDevice, kPowerStateZero); + computeDesiredState(kPowerStateZero, true); PM_LOG1("%s: tickle desire removed\n", fName); } @@ -4816,7 +4858,7 @@ void IOService::ParentChangeRootChangeDown( void ) fActivityTicklePowerState = kInvalidTicklePowerState; IOLockUnlock(fActivityLock); - fIdleTimerMinPowerState = 0; + fIdleTimerMinPowerState = kPowerStateZero; } else if (fAdvisoryTickleUsed) { @@ -4829,8 +4871,8 @@ void IOService::ParentChangeRootChangeDown( void ) bool dropTickleDesire = false; if (fIdleTimerPeriod && !fIdleTimerIgnored && - (fIdleTimerMinPowerState == 0) && - (fDeviceDesire != 0)) + (fIdleTimerMinPowerState == kPowerStateZero) && + (fDeviceDesire != kPowerStateZero)) { IOLockLock(fActivityLock); @@ -4868,8 +4910,8 @@ void IOService::ParentChangeRootChangeDown( void ) // Advisory tickle desire is intentionally untouched since // it has no effect until system is promoted to full wake. - updatePowerClient(gIOPMPowerClientDevice, 0); - computeDesiredState(0, true); + updatePowerClient(gIOPMPowerClientDevice, kPowerStateZero); + computeDesiredState(kPowerStateZero, true); PM_LOG1("%s: tickle desire dropped\n", fName); } } @@ -4882,7 +4924,7 @@ void IOService::ParentChangeRootChangeDown( void ) // power. Here we notify "priority" clients that we are lowering power. //********************************************************************************* -void IOService::ParentChangeTellPriorityClientsPowerDown ( void ) +void IOService::ParentChangeTellPriorityClientsPowerDown( void ) { fMachineState = kIOPM_ParentChangeNotifyInterestedDriversWillChange; tellChangeDown2(fHeadNotePowerState); @@ -4895,7 +4937,7 @@ void IOService::ParentChangeTellPriorityClientsPowerDown ( void ) // root domain to notify apps and drivers about the system capability change. //********************************************************************************* -void IOService::ParentChangeTellCapabilityWillChange ( void ) +void IOService::ParentChangeTellCapabilityWillChange( void ) { if (!IS_ROOT_DOMAIN) return ParentChangeNotifyInterestedDriversWillChange(); @@ -4910,7 +4952,7 @@ void IOService::ParentChangeTellCapabilityWillChange ( void ) // Here we notify interested drivers pre-change. //********************************************************************************* -void IOService::ParentChangeNotifyInterestedDriversWillChange ( void ) +void IOService::ParentChangeNotifyInterestedDriversWillChange( void ) { notifyAll( kIOPM_ParentChangeSetPowerState ); } @@ -4922,7 +4964,7 @@ void IOService::ParentChangeNotifyInterestedDriversWillChange ( void ) // change. Wait for async completions. //********************************************************************************* -void IOService::ParentChangeSetPowerState ( void ) +void IOService::ParentChangeSetPowerState( void ) { MS_PUSH( kIOPM_ParentChangeWaitForPowerSettle ); fMachineState = kIOPM_DriverThreadCallDone; @@ -4939,7 +4981,7 @@ void IOService::ParentChangeSetPowerState ( void ) // parent. Wait for the driver specified settle time to expire. //********************************************************************************* -void IOService::ParentChangeWaitForPowerSettle ( void ) +void IOService::ParentChangeWaitForPowerSettle( void ) { fMachineState = kIOPM_ParentChangeNotifyInterestedDriversDidChange; startSettleTimer(); @@ -4952,7 +4994,7 @@ void IOService::ParentChangeWaitForPowerSettle ( void ) // all our interested drivers post-change. //********************************************************************************* -void IOService::ParentChangeNotifyInterestedDriversDidChange ( void ) +void IOService::ParentChangeNotifyInterestedDriversDidChange( void ) { notifyAll( kIOPM_ParentChangeTellCapabilityDidChange ); } @@ -4963,7 +5005,7 @@ void IOService::ParentChangeNotifyInterestedDriversDidChange ( void ) // For root domain to notify capability power-change. //********************************************************************************* -void IOService::ParentChangeTellCapabilityDidChange ( void ) +void IOService::ParentChangeTellCapabilityDidChange( void ) { if (!IS_ROOT_DOMAIN) return ParentChangeAcknowledgePowerChange(); @@ -4977,7 +5019,7 @@ void IOService::ParentChangeTellCapabilityDidChange ( void ) // Acknowledge our power parent that our power change is done. //********************************************************************************* -void IOService::ParentChangeAcknowledgePowerChange ( void ) +void IOService::ParentChangeAcknowledgePowerChange( void ) { IORegistryEntry * nub; IOService * parent; @@ -5045,30 +5087,36 @@ void IOService::startSettleTimer( void ) // from settleUpTime and settleDownTime in the power state table. AbsoluteTime deadline; - IOPMPowerStateIndex i; + IOPMPowerStateIndex stateIndex; + IOPMPowerStateIndex currentOrder, newOrder, i; uint32_t settleTime = 0; boolean_t pending; PM_ASSERT_IN_GATE(); - i = fCurrentPowerState; + currentOrder = StateOrder(fCurrentPowerState); + newOrder = StateOrder(fHeadNotePowerState); + + i = currentOrder; // lowering power - if ( fHeadNotePowerState < fCurrentPowerState ) + if ( newOrder < currentOrder ) { - while ( i > fHeadNotePowerState ) + while ( i > newOrder ) { - settleTime += (uint32_t) fPowerStates[i].settleDownTime; + stateIndex = fPowerStates[i].stateOrderToIndex; + settleTime += (uint32_t) fPowerStates[stateIndex].settleDownTime; i--; } } // raising power - if ( fHeadNotePowerState > fCurrentPowerState ) + if ( newOrder > currentOrder ) { - while ( i < fHeadNotePowerState ) + while ( i < newOrder ) { - settleTime += (uint32_t) fPowerStates[i+1].settleUpTime; + stateIndex = fPowerStates[i+1].stateOrderToIndex; + settleTime += (uint32_t) fPowerStates[stateIndex].settleUpTime; i++; } } @@ -5126,7 +5174,7 @@ bool IOService::ackTimerTick( void ) OUR_PMLog(kPMLogCtrlDriverTardy, 0, 0); setProperty(kIOPMTardyAckSPSKey, kOSBooleanTrue); PM_ERROR("%s::setPowerState(%p, %lu -> %lu) timed out after %d ms\n", - fName, this, fCurrentPowerState, fHeadNotePowerState, NS_TO_MS(nsec)); + fName, OBFUSCATE(this), fCurrentPowerState, fHeadNotePowerState, NS_TO_MS(nsec)); #if LOG_SETPOWER_TIMES PMEventDetails *details = PMEventDetails::eventDetails( @@ -5139,7 +5187,7 @@ bool IOService::ackTimerTick( void ) kIOReturnTimeout, // result NS_TO_US(nsec)); // usec completion time - getPMRootDomain()->recordAndReleasePMEventGated( details ); + getPMRootDomain()->recordAndReleasePMEvent( details ); #endif if (gIOKitDebug & kIOLogDebugPower) @@ -5180,7 +5228,7 @@ bool IOService::ackTimerTick( void ) PM_ERROR("%s::powerState%sChangeTo(%p, %s, %lu -> %lu) timed out after %d ms\n", nextObject->whatObject->getName(), (fDriverCallReason == kDriverCallInformPreChange) ? "Will" : "Did", - nextObject->whatObject, fName, fCurrentPowerState, fHeadNotePowerState, + OBFUSCATE(nextObject->whatObject), fName, fCurrentPowerState, fHeadNotePowerState, NS_TO_MS(nsec)); #if LOG_SETPOWER_TIMES @@ -5198,7 +5246,7 @@ bool IOService::ackTimerTick( void ) kIOReturnTimeout, // result NS_TO_US(nsec)); // usec completion time - getPMRootDomain()->recordAndReleasePMEventGated( details ); + getPMRootDomain()->recordAndReleasePMEvent( details ); #endif // Pretend driver has acked. @@ -5222,6 +5270,7 @@ bool IOService::ackTimerTick( void ) // TODO: aggreggate this case kIOPM_OurChangeTellClientsPowerDown: + case kIOPM_OurChangeTellUserPMPolicyPowerDown: case kIOPM_OurChangeTellPriorityClientsPowerDown: case kIOPM_OurChangeNotifyInterestedDriversWillChange: case kIOPM_ParentChangeTellPriorityClientsPowerDown: @@ -5245,37 +5294,117 @@ bool IOService::ackTimerTick( void ) return done; } +//********************************************************************************* +// [private] start_watchdog_timer +//********************************************************************************* +void IOService::start_watchdog_timer( void ) +{ + AbsoluteTime deadline; + boolean_t pending; + + if (!fWatchdogTimer || (kIOSleepWakeWdogOff & gIOKitDebug) || + (getPMRootDomain()->sleepWakeDebugIsWdogEnabled() == false)) + return; + + if (thread_call_isactive(fWatchdogTimer)) return; + + clock_interval_to_deadline(WATCHDOG_TIMER_PERIOD, kSecondScale, &deadline); + + retain(); + pending = thread_call_enter_delayed(fWatchdogTimer, deadline); + if (pending) release(); + +} + +//********************************************************************************* +// [private] stop_watchdog_timer +// Returns true if watchdog was enabled and stopped now +//********************************************************************************* + +bool IOService::stop_watchdog_timer( void ) +{ + boolean_t pending; + + if (!fWatchdogTimer || (kIOSleepWakeWdogOff & gIOKitDebug) || + (getPMRootDomain()->sleepWakeDebugIsWdogEnabled() == false)) + return false; + + pending = thread_call_cancel(fWatchdogTimer); + if (pending) release(); + + return pending; +} + +//********************************************************************************* +// reset_watchdog_timer +//********************************************************************************* + +void IOService::reset_watchdog_timer( void ) +{ + if (stop_watchdog_timer()) + start_watchdog_timer(); +} + + +//********************************************************************************* +// [static] watchdog_timer_expired +// +// Inside PM work loop's gate. +//********************************************************************************* + +void +IOService::watchdog_timer_expired( thread_call_param_t arg0, thread_call_param_t arg1 ) +{ + IOService * me = (IOService *) arg0; + + + gIOPMWatchDogThread = current_thread(); + getPMRootDomain()->sleepWakeDebugTrig(true); + gIOPMWatchDogThread = 0; + me->release(); + + return ; +} + + //********************************************************************************* // [private] start_ack_timer //********************************************************************************* -void IOService::start_ack_timer ( void ) +void IOService::start_ack_timer( void ) { start_ack_timer( ACK_TIMER_PERIOD, kNanosecondScale ); } void IOService::start_ack_timer ( UInt32 interval, UInt32 scale ) { - AbsoluteTime deadline; - boolean_t pending; + AbsoluteTime deadline; + boolean_t pending; clock_interval_to_deadline(interval, scale, &deadline); - retain(); + retain(); pending = thread_call_enter_delayed(fAckTimer, deadline); - if (pending) release(); + if (pending) release(); + + // Stop watchdog if ack is delayed by more than a sec + if (interval * scale > kSecondScale) { + stop_watchdog_timer(); + } } //********************************************************************************* // [private] stop_ack_timer //********************************************************************************* -void IOService::stop_ack_timer ( void ) +void IOService::stop_ack_timer( void ) { - boolean_t pending; + boolean_t pending; pending = thread_call_cancel(fAckTimer); - if (pending) release(); + if (pending) release(); + + start_watchdog_timer(); } //********************************************************************************* @@ -5285,22 +5414,25 @@ void IOService::stop_ack_timer ( void ) //********************************************************************************* IOReturn -IOService::actionAckTimerExpired ( - OSObject * target, - void * arg0, void * arg1, - void * arg2, void * arg3 ) +IOService::actionAckTimerExpired( + OSObject * target, + void * arg0, void * arg1, + void * arg2, void * arg3 ) { - IOService * me = (IOService *) target; - bool done; + IOService * me = (IOService *) target; + bool done; - // done will be true if the timer tick unblocks the machine state, - // otherwise no need to signal the work loop. + // done will be true if the timer tick unblocks the machine state, + // otherwise no need to signal the work loop. - done = me->ackTimerTick(); - if (done && gIOPMWorkQueue) - gIOPMWorkQueue->signalWorkAvailable(); + done = me->ackTimerTick(); + if (done && gIOPMWorkQueue) + { + gIOPMWorkQueue->signalWorkAvailable(); + me->start_watchdog_timer(); + } - return kIOReturnSuccess; + return kIOReturnSuccess; } //********************************************************************************* @@ -5310,7 +5442,7 @@ IOService::actionAckTimerExpired ( //********************************************************************************* void -IOService::ack_timer_expired ( thread_call_param_t arg0, thread_call_param_t arg1 ) +IOService::ack_timer_expired( thread_call_param_t arg0, thread_call_param_t arg1 ) { IOService * me = (IOService *) arg0; @@ -5360,7 +5492,7 @@ void IOService::tellSystemCapabilityChange( uint32_t nextMS ) // Return true if we don't have to wait for acknowledgements //********************************************************************************* -bool IOService::askChangeDown ( unsigned long stateNum ) +bool IOService::askChangeDown( unsigned long stateNum ) { return tellClientsWithResponse( kIOMessageCanDevicePowerOff ); } @@ -5374,7 +5506,7 @@ bool IOService::askChangeDown ( unsigned long stateNum ) // Return true if we don't have to wait for acknowledgements //********************************************************************************* -bool IOService::tellChangeDown1 ( unsigned long stateNum ) +bool IOService::tellChangeDown1( unsigned long stateNum ) { fOutOfBandParameter = kNotifyApps; return tellChangeDown(stateNum); @@ -5388,7 +5520,7 @@ bool IOService::tellChangeDown1 ( unsigned long stateNum ) // Return true if we don't have to wait for acknowledgements //********************************************************************************* -bool IOService::tellChangeDown2 ( unsigned long stateNum ) +bool IOService::tellChangeDown2( unsigned long stateNum ) { fOutOfBandParameter = kNotifyPriority; return tellChangeDown(stateNum); @@ -5406,7 +5538,7 @@ bool IOService::tellChangeDown2 ( unsigned long stateNum ) // Return true if we don't have to wait for acknowledgements //********************************************************************************* -bool IOService::tellChangeDown ( unsigned long stateNum ) +bool IOService::tellChangeDown( unsigned long stateNum ) { return tellClientsWithResponse( kIOMessageDeviceWillPowerOff ); } @@ -5416,11 +5548,13 @@ bool IOService::tellChangeDown ( unsigned long stateNum ) // //********************************************************************************* -static void logAppTimeouts ( OSObject * object, void * arg ) +static void logAppTimeouts( OSObject * object, void * arg ) { IOPMInterestContext * context = (IOPMInterestContext *) arg; OSObject * flag; unsigned int clientIndex; + int pid = -1; + char name[128]; if (OSDynamicCast(_IOServiceInterestNotifier, object)) { @@ -5434,29 +5568,27 @@ static void logAppTimeouts ( OSObject * object, void * arg ) (flag = context->responseArray->getObject(clientIndex)) && (flag != kOSBooleanTrue)) { - OSString *logClientID = NULL; OSNumber *clientID = copyClientIDForNotification(object, context); + name[0] = '\0'; if (clientID) { - logClientID = IOCopyLogNameForPID(clientID->unsigned32BitValue()); + pid = clientID->unsigned32BitValue(); + proc_name(pid, name, sizeof(name)); clientID->release(); } - PM_ERROR(context->errorLog, logClientID ? logClientID->getCStringNoCopy() : ""); + PM_ERROR(context->errorLog, pid, name); // TODO: record message type if possible IOService::getPMRootDomain()->pmStatsRecordApplicationResponse( gIOPMStatsApplicationResponseTimedOut, - logClientID ? logClientID->getCStringNoCopy() : "", - 0, (30*1000), -1); + name, 0, (30*1000), pid); - if (logClientID) - logClientID->release(); } } } -void IOService::cleanClientResponses ( bool logErrors ) +void IOService::cleanClientResponses( bool logErrors ) { if (logErrors && fResponseArray) { @@ -5479,7 +5611,7 @@ void IOService::cleanClientResponses ( bool logErrors ) context.stateNumber = fHeadNotePowerState; context.stateFlags = fHeadNotePowerArrayEntry->capabilityFlags; context.changeFlags = fHeadNoteChangeFlags; - context.errorLog = "PM notification timeout (%s)\n"; + context.errorLog = "PM notification timeout (pid %d, %s)\n"; applyToInterested(gIOAppPowerStateInterest, logAppTimeouts, (void *) &context); } @@ -5518,7 +5650,7 @@ void IOService::cleanClientResponses ( bool logErrors ) // Return true if we don't have to wait for acknowledgements //********************************************************************************* -bool IOService::tellClientsWithResponse ( int messageType ) +bool IOService::tellClientsWithResponse( int messageType ) { IOPMInterestContext context; bool isRootDomain = IS_ROOT_DOMAIN; @@ -5527,8 +5659,12 @@ bool IOService::tellClientsWithResponse ( int messageType ) assert( fResponseArray == NULL ); assert( fNotifyClientArray == NULL ); - RD_LOG("tellClientsWithResponse( %s, %d )\n", - getIOMessageString(messageType), fOutOfBandParameter); + if(messageType == (int)kIOPMMessageLastCallBeforeSleep) + RD_LOG("tellClientsWithResponse( kIOPMMessageLastCallBeforeSleep, %d )\n", + fOutOfBandParameter); + else + RD_LOG("tellClientsWithResponse( %s, %d )\n", + getIOMessageString(messageType), fOutOfBandParameter); fResponseArray = OSArray::withCapacity( 1 ); if (!fResponseArray) @@ -5563,7 +5699,8 @@ bool IOService::tellClientsWithResponse ( int messageType ) if (isRootDomain && (fMachineState != kIOPM_OurChangeTellClientsPowerDown) && - (fMachineState != kIOPM_SyncTellClientsPowerDown)) + (fMachineState != kIOPM_SyncTellClientsPowerDown) && + (context.messageType != kIOPMMessageLastCallBeforeSleep)) { // Notify capability app for tellChangeDown1() // but not for askChangeDown(). @@ -5616,7 +5753,7 @@ bool IOService::tellClientsWithResponse ( int messageType ) OUR_PMLog(kPMLogStartAckTimer, context.maxTimeRequested, 0); if (context.enableTracing) getPMRootDomain()->traceDetail( context.maxTimeRequested / 1000 ); - start_ack_timer( context.maxTimeRequested / 1000, kMillisecondScale ); + start_ack_timer( context.maxTimeRequested / 1000, kMillisecondScale ); return false; } @@ -5643,7 +5780,7 @@ exit: // cookie we can identify the response with. //********************************************************************************* -void IOService::pmTellAppWithResponse ( OSObject * object, void * arg ) +void IOService::pmTellAppWithResponse( OSObject * object, void * arg ) { IOPMInterestContext * context = (IOPMInterestContext *) arg; IOServicePM * pwrMgt = context->us->pwrMgt; @@ -5721,7 +5858,7 @@ void IOService::pmTellAppWithResponse ( OSObject * object, void * arg ) if (context->notifyClients) context->notifyClients->setObject(msgIndex, object); - context->us->messageClient(msgType, object, (void *) msgRef); + context->us->messageClient(msgType, object, (void *)(uintptr_t) msgRef); } //********************************************************************************* @@ -5731,7 +5868,7 @@ void IOService::pmTellAppWithResponse ( OSObject * object, void * arg ) // so we compute a cookie we can identify the response with. //********************************************************************************* -void IOService::pmTellClientWithResponse ( OSObject * object, void * arg ) +void IOService::pmTellClientWithResponse( OSObject * object, void * arg ) { IOPowerStateChangeNotification notify; IOPMInterestContext * context = (IOPMInterestContext *) arg; @@ -5750,7 +5887,7 @@ void IOService::pmTellClientWithResponse ( OSObject * object, void * arg ) PM_LOG("%s DROP Client %s, notifier %p, handler %p\n", context->us->getName(), getIOMessageString(context->messageType), - object, n->handler); + OBFUSCATE(object), OBFUSCATE(n->handler)); } return; } @@ -5776,7 +5913,7 @@ void IOService::pmTellClientWithResponse ( OSObject * object, void * arg ) PM_LOG("%s MESG Client %s, notifier %p, handler %p\n", context->us->getName(), getIOMessageString(msgType), - object, notifier->handler); + OBFUSCATE(object), OBFUSCATE(notifier->handler)); } notify.powerRef = (void *)(uintptr_t) msgRef; @@ -5807,7 +5944,7 @@ void IOService::pmTellClientWithResponse ( OSObject * object, void * arg ) context->maxTimeRequested = kPriorityClientMaxWait; PM_ERROR("%s: client %p returned %llu for %s\n", context->us->getName(), - notifier ? (void *) notifier->handler : object, + notifier ? (void *) OBFUSCATE(notifier->handler) : OBFUSCATE(object), (uint64_t) notify.returnValue, getIOMessageString(msgType)); } @@ -5828,7 +5965,7 @@ void IOService::pmTellClientWithResponse ( OSObject * object, void * arg ) // [static private] pmTellCapabilityAppWithResponse //********************************************************************************* -void IOService::pmTellCapabilityAppWithResponse ( OSObject * object, void * arg ) +void IOService::pmTellCapabilityAppWithResponse( OSObject * object, void * arg ) { IOPMSystemCapabilityChangeParameters msgArg; IOPMInterestContext * context = (IOPMInterestContext *) arg; @@ -5935,7 +6072,7 @@ void IOService::pmTellCapabilityClientWithResponse( PM_LOG("%s DROP Client %s, notifier %p, handler %p\n", context->us->getName(), getIOMessageString(context->messageType), - object, n->handler); + OBFUSCATE(object), OBFUSCATE(n->handler)); } return; } @@ -5961,7 +6098,7 @@ void IOService::pmTellCapabilityClientWithResponse( PM_LOG("%s MESG Client %s, notifier %p, handler %p\n", context->us->getName(), getIOMessageString(msgType), - object, notifier->handler); + OBFUSCATE(object), OBFUSCATE(notifier->handler)); } msgArg.notifyRef = msgRef; @@ -5995,7 +6132,7 @@ void IOService::pmTellCapabilityClientWithResponse( context->maxTimeRequested = kCapabilityClientMaxWait; PM_ERROR("%s: client %p returned %u for %s\n", context->us->getName(), - notifier ? (void *) notifier->handler : object, + notifier ? (void *) OBFUSCATE(notifier->handler) : OBFUSCATE(object), msgArg.maxWaitForReply, getIOMessageString(msgType)); } @@ -6024,7 +6161,7 @@ void IOService::pmTellCapabilityClientWithResponse( // the aborted destination state number. //********************************************************************************* -void IOService::tellNoChangeDown ( unsigned long ) +void IOService::tellNoChangeDown( unsigned long ) { return tellClients( kIOMessageDeviceWillNotPowerOff ); } @@ -6038,7 +6175,7 @@ void IOService::tellNoChangeDown ( unsigned long ) // the aborted destination state number. //********************************************************************************* -void IOService::tellChangeUp ( unsigned long ) +void IOService::tellChangeUp( unsigned long ) { return tellClients( kIOMessageDeviceHasPoweredOn ); } @@ -6049,7 +6186,7 @@ void IOService::tellChangeUp ( unsigned long ) // Notify registered applications and kernel clients of something. //********************************************************************************* -void IOService::tellClients ( int messageType ) +void IOService::tellClients( int messageType ) { IOPMInterestContext context; @@ -6086,7 +6223,7 @@ void IOService::tellClients ( int messageType ) // Message a kernel client. //********************************************************************************* -static void tellKernelClientApplier ( OSObject * object, void * arg ) +static void tellKernelClientApplier( OSObject * object, void * arg ) { IOPowerStateChangeNotification notify; IOPMInterestContext * context = (IOPMInterestContext *) arg; @@ -6101,7 +6238,7 @@ static void tellKernelClientApplier ( OSObject * object, void * arg ) PM_LOG("%s DROP Client %s, notifier %p, handler %p\n", context->us->getName(), IOService::getIOMessageString(context->messageType), - object, n->handler); + OBFUSCATE(object), OBFUSCATE(n->handler)); } return; } @@ -6120,7 +6257,7 @@ static void tellKernelClientApplier ( OSObject * object, void * arg ) PM_LOG("%s MESG Client %s, notifier %p, handler %p\n", context->us->getName(), IOService::getIOMessageString(context->messageType), - object, n->handler); + OBFUSCATE(object), OBFUSCATE(n->handler)); } } @@ -6161,8 +6298,7 @@ static void logClientIDForNotification( return; } - -static void tellAppClientApplier ( OSObject * object, void * arg ) +static void tellAppClientApplier( OSObject * object, void * arg ) { IOPMInterestContext * context = (IOPMInterestContext *) arg; OSNumber * clientID = NULL; @@ -6213,7 +6349,7 @@ static void tellAppClientApplier ( OSObject * object, void * arg ) // [private] checkForDone //********************************************************************************* -bool IOService::checkForDone ( void ) +bool IOService::checkForDone( void ) { int i = 0; OSObject * theFlag; @@ -6240,7 +6376,7 @@ bool IOService::checkForDone ( void ) // [public] responseValid //********************************************************************************* -bool IOService::responseValid ( uint32_t refcon, int pid ) +bool IOService::responseValid( uint32_t refcon, int pid ) { UInt16 serialComponent; UInt16 ordinalComponent; @@ -6273,8 +6409,10 @@ bool IOService::responseValid ( uint32_t refcon, int pid ) AbsoluteTime now; AbsoluteTime start; uint64_t nsec; - OSString *name = IOCopyLogNameForPID(pid); + char name[128]; + name[0] = '\0'; + proc_name(pid, name, sizeof(name)); clock_get_uptime(&now); AbsoluteTime_to_scalar(&start) = num->unsigned64BitValue(); SUB_ABSOLUTETIME(&now, &start); @@ -6282,7 +6420,7 @@ bool IOService::responseValid ( uint32_t refcon, int pid ) PMEventDetails *details = PMEventDetails::eventDetails( kIOPMEventTypeAppResponse, // type - name ? name->getCStringNoCopy() : "", // who + name, // who (uintptr_t)pid, // owner unique NULL, // interest name 0, // old @@ -6290,7 +6428,7 @@ bool IOService::responseValid ( uint32_t refcon, int pid ) 0, // result NS_TO_US(nsec)); // usec completion time - getPMRootDomain()->recordAndReleasePMEventGated( details ); + getPMRootDomain()->recordAndReleasePMEvent( details ); if (kIOLogDebugPower & gIOKitDebug) { @@ -6302,21 +6440,18 @@ bool IOService::responseValid ( uint32_t refcon, int pid ) // > 100 ms if (nsec > LOG_APP_RESPONSE_TIMES) { - PM_LOG("PM response took %d ms (%s)\n", NS_TO_MS(nsec), - name ? name->getCStringNoCopy() : ""); + PM_LOG("PM response took %d ms (%d, %s)\n", NS_TO_MS(nsec), + pid, name); if (nsec > LOG_APP_RESPONSE_MSG_TRACER) { // TODO: populate the messageType argument getPMRootDomain()->pmStatsRecordApplicationResponse( gIOPMStatsApplicationResponseSlow, - name ? name->getCStringNoCopy() : "", 0, - NS_TO_MS(nsec), pid); + name, 0, NS_TO_MS(nsec), pid); } } - if (name) - name->release(); #endif theFlag = kOSBooleanFalse; } @@ -6337,7 +6472,7 @@ bool IOService::responseValid ( uint32_t refcon, int pid ) // so, and all acknowledgements are positive, we continue with the power change. //********************************************************************************* -IOReturn IOService::allowPowerChange ( unsigned long refcon ) +IOReturn IOService::allowPowerChange( unsigned long refcon ) { IOPMRequest * request; @@ -6351,16 +6486,16 @@ IOReturn IOService::allowPowerChange ( unsigned long refcon ) if (!request) return kIOReturnNoMemory; - request->fArg0 = (void *) refcon; - request->fArg1 = (void *) proc_selfpid(); - request->fArg2 = (void *) 0; + request->fArg0 = (void *) refcon; + request->fArg1 = (void *)(uintptr_t) proc_selfpid(); + request->fArg2 = (void *) 0; submitPMRequest( request ); return kIOReturnSuccess; } #ifndef __LP64__ -IOReturn IOService::serializedAllowPowerChange2 ( unsigned long refcon ) +IOReturn IOService::serializedAllowPowerChange2( unsigned long refcon ) { // [deprecated] public return kIOReturnUnsupported; @@ -6375,10 +6510,11 @@ IOReturn IOService::serializedAllowPowerChange2 ( unsigned long refcon ) // client to respond, we abandon the power change. //********************************************************************************* -IOReturn IOService::cancelPowerChange ( unsigned long refcon ) +IOReturn IOService::cancelPowerChange( unsigned long refcon ) { IOPMRequest * request; - OSString * name; + char name[128]; + pid_t pid = proc_selfpid(); if ( !initialized ) { @@ -6386,27 +6522,26 @@ IOReturn IOService::cancelPowerChange ( unsigned long refcon ) return kIOReturnSuccess; } - name = IOCopyLogNameForPID(proc_selfpid()); - PM_ERROR("PM notification cancel (%s)\n", name ? name->getCStringNoCopy() : ""); + name[0] = '\0'; + proc_name(pid, name, sizeof(name)); + PM_ERROR("PM notification cancel (pid %d, %s)\n", pid, name); request = acquirePMRequest( this, kIOPMRequestTypeCancelPowerChange ); if (!request) { - if (name) - name->release(); return kIOReturnNoMemory; } - request->fArg0 = (void *) refcon; - request->fArg1 = (void *) proc_selfpid(); - request->fArg2 = (void *) name; + request->fArg0 = (void *) refcon; + request->fArg1 = (void *)(uintptr_t) proc_selfpid(); + request->fArg2 = (void *) OSString::withCString(name); submitPMRequest( request ); return kIOReturnSuccess; } #ifndef __LP64__ -IOReturn IOService::serializedCancelPowerChange2 ( unsigned long refcon ) +IOReturn IOService::serializedCancelPowerChange2( unsigned long refcon ) { // [deprecated] public return kIOReturnUnsupported; @@ -6418,7 +6553,7 @@ IOReturn IOService::serializedCancelPowerChange2 ( unsigned long refcon ) // called when clamp timer expires...set power state to 0. //********************************************************************************* -void IOService::PM_Clamp_Timer_Expired ( void ) +void IOService::PM_Clamp_Timer_Expired( void ) { } @@ -6428,11 +6563,233 @@ void IOService::PM_Clamp_Timer_Expired ( void ) // Set to highest available power state for a minimum of duration milliseconds //********************************************************************************* -void IOService::clampPowerOn ( unsigned long duration ) +void IOService::clampPowerOn( unsigned long duration ) { } #endif /* !__LP64__ */ +//********************************************************************************* +// configurePowerStateReport +// +// Configures the IOStateReport for kPMPowerStateChannel +//********************************************************************************* +IOReturn IOService::configurePowerStatesReport( IOReportConfigureAction action, void *result ) +{ + + IOReturn rc = kIOReturnSuccess; + size_t reportSize; + unsigned long i; + uint64_t ts; + + if (!pwrMgt) + return kIOReturnUnsupported; + + if (!fNumberOfPowerStates) + return kIOReturnSuccess; // For drivers which are in power plane, but haven't called registerPowerDriver() + PM_LOCK(); + + switch (action) + { + case kIOReportEnable: + if (fReportBuf) + { + fReportClientCnt++; + break; + } + reportSize = STATEREPORT_BUFSIZE(fNumberOfPowerStates); + fReportBuf = IOMalloc(reportSize); + if (!fReportBuf) { + rc = kIOReturnNoMemory; + break; + } + memset(fReportBuf, 0, reportSize); + + STATEREPORT_INIT(fNumberOfPowerStates, fReportBuf, reportSize, + getRegistryEntryID(), kPMPowerStatesChID, kIOReportCategoryPower); + + for (i = 0; i < fNumberOfPowerStates; i++) { + unsigned bits = 0; + + if (fPowerStates[i].capabilityFlags & kIOPMPowerOn) + bits |= kPMReportPowerOn; + if (fPowerStates[i].capabilityFlags & kIOPMDeviceUsable) + bits |= kPMReportDeviceUsable; + if (fPowerStates[i].capabilityFlags & kIOPMLowPower) + bits |= kPMReportLowPower; + + STATEREPORT_SETSTATEID(fReportBuf, i, ((bits & 0xff) << 8) | + ((StateOrder(fMaxPowerState) & 0xf) << 4) | (StateOrder(i) & 0xf)); + } + ts = mach_absolute_time(); + STATEREPORT_SETSTATE(fReportBuf, fCurrentPowerState, ts); + break; + + case kIOReportDisable: + if (fReportClientCnt == 0) { + rc = kIOReturnBadArgument; + break; + } + if (fReportClientCnt == 1) + { + IOFree(fReportBuf, STATEREPORT_BUFSIZE(fNumberOfPowerStates)); + fReportBuf = NULL; + } + fReportClientCnt--; + break; + + case kIOReportGetDimensions: + if (fReportBuf) + STATEREPORT_UPDATERES(fReportBuf, kIOReportGetDimensions, result); + break; + } + + PM_UNLOCK(); + + return rc; +} + +//********************************************************************************* +// updatePowerStateReport +// +// Updates the IOStateReport for kPMPowerStateChannel +//********************************************************************************* +IOReturn IOService::updatePowerStatesReport( IOReportConfigureAction action, void *result, void *destination ) +{ + uint32_t size2cpy; + void *data2cpy; + uint64_t ts; + IOReturn rc = kIOReturnSuccess; + IOBufferMemoryDescriptor *dest = OSDynamicCast(IOBufferMemoryDescriptor, (OSObject *)destination); + + + if (!pwrMgt) + return kIOReturnUnsupported; + if (!fNumberOfPowerStates) + return kIOReturnSuccess; + + if ( !result || !dest ) return kIOReturnBadArgument; + PM_LOCK(); + + switch (action) { + case kIOReportCopyChannelData: + if ( !fReportBuf ) { + rc = kIOReturnNotOpen; + break; + } + + ts = mach_absolute_time(); + STATEREPORT_UPDATEPREP(fReportBuf, ts, data2cpy, size2cpy); + if (size2cpy > (dest->getCapacity() - dest->getLength()) ) { + rc = kIOReturnOverrun; + break; + } + + STATEREPORT_UPDATERES(fReportBuf, kIOReportCopyChannelData, result); + dest->appendBytes(data2cpy, size2cpy); + + default: + break; + + } + + PM_UNLOCK(); + + return rc; + +} + +//********************************************************************************* +// configureSimplePowerReport +// +// Configures the IOSimpleReport for given channel id +//********************************************************************************* +IOReturn IOService::configureSimplePowerReport(IOReportConfigureAction action, void *result ) +{ + + IOReturn rc = kIOReturnSuccess; + + if ( !pwrMgt ) + return kIOReturnUnsupported; + + if ( !fNumberOfPowerStates ) + return rc; + + switch (action) + { + case kIOReportEnable: + case kIOReportDisable: + break; + + case kIOReportGetDimensions: + SIMPLEREPORT_UPDATERES(kIOReportGetDimensions, result); + break; + } + + + return rc; +} + +//********************************************************************************* +// updateSimplePowerReport +// +// Updates the IOSimpleReport for the given chanel id +//********************************************************************************* +IOReturn IOService::updateSimplePowerReport( IOReportConfigureAction action, void *result, void *destination ) +{ + uint32_t size2cpy; + void *data2cpy; + uint64_t buf[SIMPLEREPORT_BUFSIZE/sizeof(uint64_t)+1]; // Force a 8-byte alignment + IOBufferMemoryDescriptor *dest = OSDynamicCast(IOBufferMemoryDescriptor, (OSObject *)destination); + IOReturn rc = kIOReturnSuccess; + unsigned bits = 0; + + + if ( !pwrMgt ) + return kIOReturnUnsupported; + if ( !result || !dest ) return kIOReturnBadArgument; + + if ( !fNumberOfPowerStates ) + return rc; + PM_LOCK(); + + switch (action) { + case kIOReportCopyChannelData: + + SIMPLEREPORT_INIT(buf, sizeof(buf), getRegistryEntryID(), kPMCurrStateChID, kIOReportCategoryPower); + + if (fPowerStates[fCurrentPowerState].capabilityFlags & kIOPMPowerOn) + bits |= kPMReportPowerOn; + if (fPowerStates[fCurrentPowerState].capabilityFlags & kIOPMDeviceUsable) + bits |= kPMReportDeviceUsable; + if (fPowerStates[fCurrentPowerState].capabilityFlags & kIOPMLowPower) + bits |= kPMReportLowPower; + + + SIMPLEREPORT_SETVALUE(buf, ((bits & 0xff) << 8) | ((StateOrder(fMaxPowerState) & 0xf) << 4) | + (StateOrder(fCurrentPowerState) & 0xf)); + + SIMPLEREPORT_UPDATEPREP(buf, data2cpy, size2cpy); + if (size2cpy > (dest->getCapacity() - dest->getLength())) { + rc = kIOReturnOverrun; + break; + } + + SIMPLEREPORT_UPDATERES(kIOReportCopyChannelData, result); + dest->appendBytes(data2cpy, size2cpy); + + default: + break; + + } + + PM_UNLOCK(); + + return kIOReturnSuccess; + +} + + + // MARK: - // MARK: Driver Overrides @@ -6442,7 +6799,7 @@ void IOService::clampPowerOn ( unsigned long duration ) // Does nothing here. This should be implemented in a subclass driver. //********************************************************************************* -IOReturn IOService::setPowerState ( +IOReturn IOService::setPowerState( unsigned long powerStateOrdinal, IOService * whatDevice ) { return IOPMNoErr; @@ -6451,88 +6808,62 @@ IOReturn IOService::setPowerState ( //********************************************************************************* // [public] maxCapabilityForDomainState // -// Finds the highest power state in the array whose input power -// requirement is equal to the input parameter. Where a more intelligent -// decision is possible, override this in the subclassed driver. +// Finds the highest power state in the array whose input power requirement +// is equal to the input parameter. Where a more intelligent decision is +// possible, override this in the subclassed driver. //********************************************************************************* -unsigned long IOService::maxCapabilityForDomainState ( IOPMPowerFlags domainState ) +IOPMPowerStateIndex IOService::getPowerStateForDomainFlags( IOPMPowerFlags flags ) { - int i; + IOPMPowerStateIndex stateIndex; + + if (!fNumberOfPowerStates) + return kPowerStateZero; + + for ( int order = fNumberOfPowerStates - 1; order >= 0; order-- ) + { + stateIndex = fPowerStates[order].stateOrderToIndex; + + if ( (flags & fPowerStates[stateIndex].inputPowerFlags) == + fPowerStates[stateIndex].inputPowerFlags ) + { + return stateIndex; + } + } + return kPowerStateZero; +} - if (fNumberOfPowerStates == 0 ) - { - return 0; - } - for ( i = fNumberOfPowerStates - 1; i >= 0; i-- ) - { - if ( (domainState & fPowerStates[i].inputPowerFlags) == - fPowerStates[i].inputPowerFlags ) - { - return i; - } - } - return 0; +unsigned long IOService::maxCapabilityForDomainState( IOPMPowerFlags domainState ) +{ + return getPowerStateForDomainFlags(domainState); } //********************************************************************************* // [public] initialPowerStateForDomainState // -// Finds the highest power state in the array whose input power -// requirement is equal to the input parameter. Where a more intelligent -// decision is possible, override this in the subclassed driver. +// Called to query the power state for the initial power transition. //********************************************************************************* -unsigned long IOService::initialPowerStateForDomainState ( IOPMPowerFlags domainState ) +unsigned long IOService::initialPowerStateForDomainState( IOPMPowerFlags domainState ) { - int i; - if (fResetPowerStateOnWake && (domainState & kIOPMRootDomainState)) { // Return lowest power state for any root power domain changes - return 0; + return kPowerStateZero; } - if (fNumberOfPowerStates == 0 ) - { - return 0; - } - for ( i = fNumberOfPowerStates - 1; i >= 0; i-- ) - { - if ( (domainState & fPowerStates[i].inputPowerFlags) == - fPowerStates[i].inputPowerFlags ) - { - return i; - } - } - return 0; + return getPowerStateForDomainFlags(domainState); } //********************************************************************************* // [public] powerStateForDomainState // -// Finds the highest power state in the array whose input power -// requirement is equal to the input parameter. Where a more intelligent -// decision is possible, override this in the subclassed driver. +// This method is not called from PM. //********************************************************************************* -unsigned long IOService::powerStateForDomainState ( IOPMPowerFlags domainState ) +unsigned long IOService::powerStateForDomainState( IOPMPowerFlags domainState ) { - int i; - - if (fNumberOfPowerStates == 0 ) - { - return 0; - } - for ( i = fNumberOfPowerStates - 1; i >= 0; i-- ) - { - if ( (domainState & fPowerStates[i].inputPowerFlags) == - fPowerStates[i].inputPowerFlags ) - { - return i; - } - } - return 0; + return getPowerStateForDomainFlags(domainState); } #ifndef __LP64__ @@ -6542,7 +6873,7 @@ unsigned long IOService::powerStateForDomainState ( IOPMPowerFlags domainState ) // Does nothing here. This should be implemented in a subclass driver. //********************************************************************************* -bool IOService::didYouWakeSystem ( void ) +bool IOService::didYouWakeSystem( void ) { return false; } @@ -6554,7 +6885,7 @@ bool IOService::didYouWakeSystem ( void ) // Does nothing here. This should be implemented in a subclass driver. //********************************************************************************* -IOReturn IOService::powerStateWillChangeTo ( IOPMPowerFlags, unsigned long, IOService * ) +IOReturn IOService::powerStateWillChangeTo( IOPMPowerFlags, unsigned long, IOService * ) { return kIOPMAckImplied; } @@ -6565,7 +6896,7 @@ IOReturn IOService::powerStateWillChangeTo ( IOPMPowerFlags, unsigned long, IOSe // Does nothing here. This should be implemented in a subclass driver. //********************************************************************************* -IOReturn IOService::powerStateDidChangeTo ( IOPMPowerFlags, unsigned long, IOService * ) +IOReturn IOService::powerStateDidChangeTo( IOPMPowerFlags, unsigned long, IOService * ) { return kIOPMAckImplied; } @@ -6577,7 +6908,7 @@ IOReturn IOService::powerStateDidChangeTo ( IOPMPowerFlags, unsigned long, IOSer // Does nothing here. This should be implemented in a subclass policy-maker. //********************************************************************************* -void IOService::powerChangeDone ( unsigned long ) +void IOService::powerChangeDone( unsigned long ) { } @@ -6588,7 +6919,7 @@ void IOService::powerChangeDone ( unsigned long ) // Does nothing here. This should be implemented in a subclass driver. //********************************************************************************* -IOReturn IOService::newTemperature ( long currentTemp, IOService * whichZone ) +IOReturn IOService::newTemperature( long currentTemp, IOService * whichZone ) { return IOPMNoErr; } @@ -6664,9 +6995,10 @@ void IOService::submitPMRequest( IOPMRequest * request ) assert( gIOPMRequestQueue ); PM_LOG1("[+ %02lx] %p [%p %s] %p %p %p\n", - (long)request->getType(), request, - request->getTarget(), request->getTarget()->getName(), - request->fArg0, request->fArg1, request->fArg2); + (long)request->getType(), OBFUSCATE(request), + OBFUSCATE(request->getTarget()), request->getTarget()->getName(), + OBFUSCATE(request->fArg0), + OBFUSCATE(request->fArg1), OBFUSCATE(request->fArg2)); if (request->isReplyType()) gIOPMReplyQueue->queuePMRequest( request ); @@ -6684,9 +7016,10 @@ void IOService::submitPMRequest( IOPMRequest ** requests, IOItemCount count ) { IOPMRequest * req = requests[i]; PM_LOG1("[+ %02lx] %p [%p %s] %p %p %p\n", - (long)req->getType(), req, - req->getTarget(), req->getTarget()->getName(), - req->fArg0, req->fArg1, req->fArg2); + (long)req->getType(), OBFUSCATE(req), + OBFUSCATE(req->getTarget()), req->getTarget()->getName(), + OBFUSCATE(req->fArg0), + OBFUSCATE(req->fArg1), OBFUSCATE(req->fArg2)); } gIOPMRequestQueue->queuePMRequestChain( requests, count ); @@ -6756,7 +7089,8 @@ bool IOService::retirePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) assert(request && queue); PM_LOG1("[- %02x] %p [%p %s] state %d, busy %d\n", - request->getType(), request, this, getName(), + request->getType(), OBFUSCATE(request), + OBFUSCATE(this), getName(), fMachineState, gIOPMBusyCount); // Catch requests created by idleTimerExpired(). @@ -6780,7 +7114,7 @@ bool IOService::retirePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) // Check if machine state transition is blocked. //********************************************************************************* -bool IOService::isPMBlocked ( IOPMRequest * request, int count ) +bool IOService::isPMBlocked( IOPMRequest * request, int count ) { int reason = 0; @@ -6793,6 +7127,7 @@ bool IOService::isPMBlocked ( IOPMRequest * request, int count ) // 5 = kDriverCallInformPreChange // 6 = kDriverCallInformPostChange // 7 = kDriverCallSetPowerState + // 8 = kRootDomainInformPreChange if (fDriverCallBusy) reason = 5 + fDriverCallReason; break; @@ -6830,7 +7165,8 @@ bool IOService::isPMBlocked ( IOPMRequest * request, int count ) if (count) { PM_LOG1("[B %02x] %p [%p %s] state %d, reason %d\n", - request->getType(), request, this, getName(), + request->getType(), OBFUSCATE(request), + OBFUSCATE(this), getName(), fMachineState, reason); } @@ -6856,7 +7192,8 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) while (isPMBlocked(request, loop++) == false) { PM_LOG1("[W %02x] %p [%p %s] state %d\n", - request->getType(), request, this, getName(), fMachineState); + request->getType(), OBFUSCATE(request), + OBFUSCATE(this), getName(), fMachineState); gIOPMRequest = request; gIOPMWorkCount++; @@ -6866,6 +7203,8 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) switch ( fMachineState ) { case kIOPM_Finished: + start_watchdog_timer(); + executePMRequest( request ); break; @@ -6889,7 +7228,7 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) 0, 0); - getPMRootDomain()->recordAndReleasePMEventGated( details ); + getPMRootDomain()->recordAndReleasePMEvent( details ); } // no, we can continue @@ -6904,11 +7243,11 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) 1, /* reason: 1 == Ask clients succeeded */ kIOReturnAborted); /* result */ - getPMRootDomain()->recordAndReleasePMEventGated( details ); + getPMRootDomain()->recordAndReleasePMEvent( details ); } OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState); - PM_ERROR("%s: idle cancel\n", fName); + PM_ERROR("%s: idle cancel, state %u\n", fName, fMachineState); // yes, rescind the warning tellNoChangeDown(fHeadNotePowerState); // mark the change note un-actioned @@ -6918,8 +7257,26 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) } break; + case kIOPM_OurChangeTellUserPMPolicyPowerDown: + // PMRD: tellChangeDown/kNotifyApps done, was it cancelled? + if (fDoNotPowerDown) + { + OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState); + PM_ERROR("%s: idle cancel, state %u\n", fName, fMachineState); + // yes, rescind the warning + tellNoChangeDown(fHeadNotePowerState); + // mark the change note un-actioned + fHeadNoteChangeFlags |= kIOPMNotDone; + // and we're done + OurChangeFinish(); + } + else + OurChangeTellUserPMPolicyPowerDown(); + break; + case kIOPM_OurChangeTellPriorityClientsPowerDown: - // tellChangeDown(kNotifyApps) done, was it cancelled? + // PMRD: LastCallBeforeSleep notify done + // Non-PMRD: tellChangeDown/kNotifyApps done if (fDoNotPowerDown) { if (IS_ROOT_DOMAIN) { @@ -6929,10 +7286,10 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) 2, /* reason: 2 == Client cancelled wake */ kIOReturnAborted); /* result */ - getPMRootDomain()->recordAndReleasePMEventGated( details ); + getPMRootDomain()->recordAndReleasePMEvent( details ); } OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState); - PM_ERROR("%s: idle revert\n", fName); + PM_ERROR("%s: idle revert, state %u\n", fName, fMachineState); // no, tell clients we're back in the old state tellChangeUp(fCurrentPowerState); // mark the change note un-actioned @@ -6949,7 +7306,7 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) 2, /* reason: 2 == TellPriorityClientsDone */ kIOReturnSuccess); /* result */ - getPMRootDomain()->recordAndReleasePMEventGated( details ); + getPMRootDomain()->recordAndReleasePMEvent( details ); } // yes, we can continue OurChangeTellPriorityClientsPowerDown(); @@ -7009,10 +7366,22 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) break; case kIOPM_DriverThreadCallDone: - if (fDriverCallReason == kDriverCallSetPowerState) - notifyControllingDriverDone(); - else - notifyInterestedDriversDone(); + switch (fDriverCallReason) + { + case kDriverCallInformPreChange: + case kDriverCallInformPostChange: + notifyInterestedDriversDone(); + break; + case kDriverCallSetPowerState: + notifyControllingDriverDone(); + break; + case kRootDomainInformPreChange: + notifyRootDomainDone(); + break; + default: + panic("%s: bad call reason %x", + getName(), fDriverCallReason); + } break; case kIOPM_NotifyChildrenOrdered: @@ -7024,9 +7393,9 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) break; case kIOPM_NotifyChildrenStart: - PM_LOG2("%s: kIOPM_NotifyChildrenStart done\n", getName()); - MS_POP(); // from notifyInterestedDriversDone() - notifyChildren(); + // pop notifyAll() state saved by notifyInterestedDriversDone() + MS_POP(); + notifyRootDomain(); break; case kIOPM_SyncTellClientsPowerDown: @@ -7037,7 +7406,7 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) getPMRootDomain()->askChangeDownDone( &fHeadNoteChangeFlags, &cancel); fDoNotPowerDown = cancel; - } + } if (!fDoNotPowerDown) { fMachineState = kIOPM_SyncTellPriorityClientsPowerDown; @@ -7046,8 +7415,10 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) } else { + // Cancelled by IOPMrootDomain::askChangeDownDone() or + // askChangeDown/kNotifyApps OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState); - PM_ERROR("%s: idle cancel\n", fName); + PM_ERROR("%s: idle cancel, state %u\n", fName, fMachineState); tellNoChangeDown(fHeadNotePowerState); fHeadNoteChangeFlags |= kIOPMNotDone; OurChangeFinish(); @@ -7055,6 +7426,7 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) break; case kIOPM_SyncTellPriorityClientsPowerDown: + // PMRD: tellChangeDown/kNotifyApps done, was it cancelled? if (!fDoNotPowerDown) { fMachineState = kIOPM_SyncNotifyWillChange; @@ -7064,7 +7436,7 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) else { OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState); - PM_ERROR("%s: idle revert\n", fName); + PM_ERROR("%s: idle revert, state %u\n", fName, fMachineState); tellChangeUp(fCurrentPowerState); fHeadNoteChangeFlags |= kIOPMNotDone; OurChangeFinish(); @@ -7141,6 +7513,7 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue ) if (fMachineState == kIOPM_Finished) { + stop_watchdog_timer(); done = true; break; } @@ -7239,7 +7612,8 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q assert( request->isReplyType() ); PM_LOG1("[A %02x] %p [%p %s] state %d\n", - request->getType(), request, this, getName(), fMachineState); + request->getType(), OBFUSCATE(request), + OBFUSCATE(this), getName(), fMachineState); switch ( request->getType() ) { @@ -7253,8 +7627,13 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q { // Clients are not allowed to cancel when kIOPMSkipAskPowerDown // flag is set. Only root domain will set this flag. + // However, there is one exception to this rule. User-space PM + // policy may choose to cancel sleep even after all clients have + // been notified that we will lower power. - if ((fHeadNoteChangeFlags & kIOPMSkipAskPowerDown) == 0) + if ((fMachineState == kIOPM_OurChangeTellUserPMPolicyPowerDown) + || (fMachineState == kIOPM_OurChangeTellPriorityClientsPowerDown) + || ((fHeadNoteChangeFlags & kIOPMSkipAskPowerDown) == 0)) { fDoNotPowerDown = true; @@ -7303,7 +7682,7 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q uint64_t nsec = computeTimeDeltaNS(&fDriverCallStartTime); if (nsec > LOG_SETPOWER_TIMES) PM_LOG("%s::setPowerState(%p, %lu -> %lu) async took %d ms\n", - fName, this, fCurrentPowerState, fHeadNotePowerState, NS_TO_MS(nsec)); + fName, OBFUSCATE(this), fCurrentPowerState, fHeadNotePowerState, NS_TO_MS(nsec)); PMEventDetails *details = PMEventDetails::eventDetails( kIOPMEventTypeSetPowerStateDelayed, // type @@ -7315,7 +7694,7 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q 0, // result NS_TO_US(nsec)); // usec completion time - getPMRootDomain()->recordAndReleasePMEventGated( details ); + getPMRootDomain()->recordAndReleasePMEvent( details ); #endif OUR_PMLog(kPMLogDriverAcknowledgeSet, (uintptr_t) this, fDriverTimer); fDriverTimer = 0; @@ -7334,7 +7713,8 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q break; case kIOPMRequestTypeIdleCancel: - if ((fMachineState == kIOPM_OurChangeTellClientsPowerDown) + if ((fMachineState == kIOPM_OurChangeTellClientsPowerDown) + || (fMachineState == kIOPM_OurChangeTellUserPMPolicyPowerDown) || (fMachineState == kIOPM_OurChangeTellPriorityClientsPowerDown) || (fMachineState == kIOPM_SyncTellClientsPowerDown) || (fMachineState == kIOPM_SyncTellPriorityClientsPowerDown)) @@ -7345,7 +7725,8 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q fDoNotPowerDown = true; // Stop waiting for app replys. if ((fMachineState == kIOPM_OurChangeTellPriorityClientsPowerDown) || - (fMachineState == kIOPM_SyncTellPriorityClientsPowerDown)) + (fMachineState == kIOPM_OurChangeTellUserPMPolicyPowerDown) || + (fMachineState == kIOPM_SyncTellPriorityClientsPowerDown)) cleanClientResponses(false); more = true; } @@ -7496,7 +7877,7 @@ void IOService::waitForPMDriverCall( IOService * target ) const char * IOService::getIOMessageString( uint32_t msg ) { -#define MSG_ENTRY(x) {x, #x} +#define MSG_ENTRY(x) {(int) x, #x} static const IONamedValue msgNames[] = { MSG_ENTRY( kIOMessageCanDevicePowerOff ), @@ -7512,12 +7893,14 @@ const char * IOService::getIOMessageString( uint32_t msg ) MSG_ENTRY( kIOMessageSystemHasPoweredOn ), MSG_ENTRY( kIOMessageSystemWillRestart ), MSG_ENTRY( kIOMessageSystemWillPowerOn ), - MSG_ENTRY( kIOMessageSystemCapabilityChange ) + MSG_ENTRY( kIOMessageSystemCapabilityChange ), + MSG_ENTRY( kIOPMMessageLastCallBeforeSleep ) }; return IOFindNameForValue(msg, msgNames); } + // MARK: - // MARK: IOPMRequest @@ -7547,7 +7930,9 @@ bool IOPMRequest::init( IOService * target, IOOptionBits type ) fType = type; fTarget = target; +#if NOT_READY fCompletionStatus = kIOReturnSuccess; +#endif if (fTarget) fTarget->retain(); @@ -7591,7 +7976,7 @@ bool IOPMRequest::attachNextRequest( IOPMRequest * next ) fRequestNext->fWorkWaitCount++; #if LOG_REQUEST_ATTACH PM_LOG("Attached next: %p [0x%x] -> %p [0x%x, %u] %s\n", - this, (uint32_t) fType, fRequestNext, + OBFUSCATE(this), (uint32_t) fType, OBFUSCATE(fRequestNext), (uint32_t) fRequestNext->fType, (uint32_t) fRequestNext->fWorkWaitCount, fTarget->getName()); @@ -7612,7 +7997,7 @@ bool IOPMRequest::detachNextRequest( void ) fRequestNext->fWorkWaitCount--; #if LOG_REQUEST_ATTACH PM_LOG("Detached next: %p [0x%x] -> %p [0x%x, %u] %s\n", - this, (uint32_t) fType, fRequestNext, + OBFUSCATE(this), (uint32_t) fType, OBFUSCATE(fRequestNext), (uint32_t) fRequestNext->fType, (uint32_t) fRequestNext->fWorkWaitCount, fTarget->getName()); @@ -7635,7 +8020,7 @@ bool IOPMRequest::attachRootRequest( IOPMRequest * root ) fRequestRoot->fFreeWaitCount++; #if LOG_REQUEST_ATTACH PM_LOG("Attached root: %p [0x%x] -> %p [0x%x, %u] %s\n", - this, (uint32_t) fType, fRequestRoot, + OBFUSCATE(this), (uint32_t) fType, OBFUSCATE(fRequestRoot), (uint32_t) fRequestRoot->fType, (uint32_t) fRequestRoot->fFreeWaitCount, fTarget->getName()); @@ -7656,7 +8041,7 @@ bool IOPMRequest::detachRootRequest( void ) fRequestRoot->fFreeWaitCount--; #if LOG_REQUEST_ATTACH PM_LOG("Detached root: %p [0x%x] -> %p [0x%x, %u] %s\n", - this, (uint32_t) fType, fRequestRoot, + OBFUSCATE(this), (uint32_t) fType, OBFUSCATE(fRequestRoot), (uint32_t) fRequestRoot->fType, (uint32_t) fRequestRoot->fFreeWaitCount, fTarget->getName()); @@ -7827,7 +8212,7 @@ bool IOPMWorkQueue::queuePMRequest( IOPMRequest * request, IOServicePM * pwrMgt queue_enter(&fWorkQueue, pwrMgt, IOServicePM *, WorkChain); fQueueLength++; PM_LOG3("IOPMWorkQueue: [%u] added %s@%p to queue\n", - fQueueLength, pwrMgt->Name, pwrMgt); + fQueueLength, pwrMgt->Name, OBFUSCATE(pwrMgt)); } } @@ -7919,7 +8304,7 @@ bool IOPMWorkQueue::checkForWork( void ) assert(fQueueLength); if (fQueueLength) fQueueLength--; PM_LOG3("IOPMWorkQueue: [%u] removed %s@%p from queue\n", - fQueueLength, entry->Name, entry); + fQueueLength, entry->Name, OBFUSCATE(entry)); queue_remove(&fWorkQueue, entry, IOServicePM *, WorkChain); } entry = next; @@ -8033,15 +8418,25 @@ setPMProperty( OSDictionary * dict, const char * key, uint64_t value ) } } -IOReturn IOServicePM::gatedSerialize( OSSerialize * s ) +IOReturn IOServicePM::gatedSerialize( OSSerialize * s ) const { OSDictionary * dict; bool ok = false; + int powerClamp = -1; int dictSize = 5; if (IdleTimerPeriod) dictSize += 4; + if (PMActions.parameter & kPMActionsFlagLimitPower) + { + dictSize += 1; + powerClamp = 0; + if (PMActions.parameter & + (kPMActionsFlagIsDisplayWrangler | kPMActionsFlagIsGraphicsDevice)) + powerClamp++; + } + #if WORK_QUEUE_STATS if (gIOPMRootNode == ControllingDriver) dictSize += 4; @@ -8064,6 +8459,8 @@ IOReturn IOServicePM::gatedSerialize( OSSerialize * s ) setPMProperty(dict, "MachineState", MachineState); if (DeviceOverrideEnabled) dict->setObject("PowerOverrideOn", kOSBooleanTrue); + if (powerClamp >= 0) + setPMProperty(dict, "PowerClamp", powerClamp); if (IdleTimerPeriod) { @@ -8129,7 +8526,13 @@ bool IOServicePM::serialize( OSSerialize * s ) const { IOReturn ret = kIOReturnNotReady; - if (gIOPMWorkLoop) + if (gIOPMWatchDogThread == current_thread()) + { + // Calling without lock as this data is collected for debug purpose, before reboot. + // The workloop is probably already hung in state machine. + ret = gatedSerialize(s); + } + else if (gIOPMWorkLoop) { ret = gIOPMWorkLoop->runAction( OSMemberFunctionCast(IOWorkLoop::Action, this, &IOServicePM::gatedSerialize), diff --git a/iokit/Kernel/IOServicePMPrivate.h b/iokit/Kernel/IOServicePMPrivate.h index 8651e6af8..40baaa62c 100644 --- a/iokit/Kernel/IOServicePMPrivate.h +++ b/iokit/Kernel/IOServicePMPrivate.h @@ -75,33 +75,46 @@ struct IOPMActions; typedef void (*IOPMActionPowerChangeStart)( - void * target, - IOService * service, - IOPMActions * actions, - uint32_t powerState, - uint32_t * changeFlags ); + void * target, + IOService * service, + IOPMActions * actions, + IOPMPowerStateIndex powerState, + IOPMPowerChangeFlags * changeFlags, + IOPMRequestTag requestTag ); typedef void (*IOPMActionPowerChangeDone)( - void * target, - IOService * service, - IOPMActions * actions, - uint32_t powerState, - uint32_t changeFlags ); + void * target, + IOService * service, + IOPMActions * actions, + IOPMPowerStateIndex powerState, + IOPMPowerChangeFlags changeFlags, + IOPMRequestTag requestTag ); typedef void (*IOPMActionPowerChangeOverride)( - void * target, - IOService * service, - IOPMActions * actions, - unsigned long * powerState, - uint32_t * changeFlags ); + void * target, + IOService * service, + IOPMActions * actions, + IOPMPowerStateIndex * powerState, + IOPMPowerChangeFlags * changeFlags, + IOPMRequestTag requestTag ); typedef void (*IOPMActionActivityTickle)( - void * target, - IOService * service, - IOPMActions * actions ); + void * target, + IOService * service, + IOPMActions * actions ); + +typedef void +(*IOPMActionUpdatePowerClient)( + void * target, + IOService * service, + IOPMActions * actions, + const OSSymbol * powerClient, + IOPMPowerStateIndex oldPowerState, + IOPMPowerStateIndex newPowerState +); struct IOPMActions { void * target; @@ -110,6 +123,16 @@ struct IOPMActions { IOPMActionPowerChangeDone actionPowerChangeDone; IOPMActionPowerChangeOverride actionPowerChangeOverride; IOPMActionActivityTickle actionActivityTickle; + IOPMActionUpdatePowerClient actionUpdatePowerClient; +}; + +// IOPMActions parameter flags +enum { + kPMActionsFlagIsDisplayWrangler = 0x00000100, + kPMActionsFlagIsGraphicsDevice = 0x00000200, + kPMActionsFlagIsAudioDevice = 0x00000400, + kPMActionsFlagLimitPower = 0x00000800, + kPMActionsPCIBitNumberMask = 0x000000ff }; //****************************************************************************** @@ -126,43 +149,45 @@ class PMEventDetails : public OSObject friend class IOPMrootDomain; friend class IOPMTimeline; public: - static PMEventDetails *eventDetails(uint32_t type, - const char *ownerName, - uintptr_t ownerUnique, - const char *interestName, - uint8_t oldState, - uint8_t newState, - uint32_t result, - uint32_t elapsedTimeUS); - - static PMEventDetails *eventDetails(uint32_t type, - const char *uuid, - uint32_t reason, - uint32_t result); + static PMEventDetails *eventDetails(uint32_t type, + const char *ownerName, + uintptr_t ownerUnique, + const char *interestName, + uint8_t oldState, + uint8_t newState, + uint32_t result, + uint32_t elapsedTimeUS); + + static PMEventDetails *eventDetails(uint32_t type, + const char *uuid, + uint32_t reason, + uint32_t result); private: - uint8_t eventClassifier; - uint32_t eventType; - const char *ownerName; - uintptr_t ownerUnique; - const char *interestName; - uint8_t oldState; - uint8_t newState; - uint32_t result; - uint32_t elapsedTimeUS; - - const char *uuid; - uint32_t reason; + uint8_t eventClassifier; + uint32_t eventType; + const char *ownerName; + uintptr_t ownerUnique; + const char *interestName; + uint8_t oldState; + uint8_t newState; + uint32_t result; + uint32_t elapsedTimeUS; + + const char *uuid; + uint32_t reason; }; // Internal concise representation of IOPMPowerState struct IOPMPSEntry { - IOPMPowerFlags capabilityFlags; - IOPMPowerFlags outputPowerFlags; - IOPMPowerFlags inputPowerFlags; - uint32_t staticPower; - uint32_t settleUpTime; - uint32_t settleDownTime; + IOPMPowerFlags capabilityFlags; + IOPMPowerFlags outputPowerFlags; + IOPMPowerFlags inputPowerFlags; + uint32_t staticPower; + uint32_t settleUpTime; + uint32_t settleDownTime; + IOPMPowerStateIndex stateOrder; + IOPMPowerStateIndex stateOrderToIndex; }; //****************************************************************************** @@ -198,6 +223,7 @@ private: thread_call_t AckTimer; thread_call_t SettleTimer; thread_call_t IdleTimer; + thread_call_t WatchdogTimer; // Settle time after changing power state. uint32_t SettleTimeUS; @@ -272,6 +298,9 @@ private: // Number of power states in the power array. IOPMPowerStateIndex NumberOfPowerStates; + // Ordered highest power state in the power array. + IOPMPowerStateIndex HighestPowerState; + // Power state array. IOPMPSEntry * PowerStates; @@ -287,8 +316,8 @@ private: // The highest power state we can achieve in current power domain. IOPMPowerStateIndex MaxPowerState; - // Logical OR of all output power character flags in the array. - IOPMPowerFlags OutputPowerCharacterFlags; + // Logical OR of all output power flags in the power state array. + IOPMPowerFlags MergedOutputPowerFlags; // OSArray which manages responses from notified apps and clients. OSArray * ResponseArray; @@ -318,8 +347,8 @@ private: uint32_t DeviceUsablePowerState; // Protected by ActivityLock - BEGIN - int ActivityTicklePowerState; - int AdvisoryTicklePowerState; + IOPMPowerStateIndex ActivityTicklePowerState; + IOPMPowerStateIndex AdvisoryTicklePowerState; uint32_t ActivityTickleCount; uint32_t DeviceWasActive : 1; uint32_t AdvisoryTickled : 1; @@ -338,8 +367,14 @@ private: queue_head_t PMDriverCallQueue; OSSet * InsertInterestSet; OSSet * RemoveInterestSet; + + + // IOReporter Data + uint32_t ReportClientCnt; + void * ReportBuf; // Protected by PMLock - END + #if PM_VARS_SUPPORT IOPMprot * PMVars; #endif @@ -347,7 +382,7 @@ private: IOPMActions PMActions; // Serialize IOServicePM state for debug output. - IOReturn gatedSerialize( OSSerialize * s ); + IOReturn gatedSerialize( OSSerialize * s ) const; virtual bool serialize( OSSerialize * s ) const; // PM log and trace @@ -362,6 +397,7 @@ private: #define fAckTimer pwrMgt->AckTimer #define fSettleTimer pwrMgt->SettleTimer #define fIdleTimer pwrMgt->IdleTimer +#define fWatchdogTimer pwrMgt->WatchdogTimer #define fSettleTimeUS pwrMgt->SettleTimeUS #define fHeadNoteChangeFlags pwrMgt->HeadNoteChangeFlags #define fHeadNotePowerState pwrMgt->HeadNotePowerState @@ -397,12 +433,13 @@ private: #define fPreviousRequestPowerFlags pwrMgt->PreviousRequestPowerFlags #define fName pwrMgt->Name #define fNumberOfPowerStates pwrMgt->NumberOfPowerStates +#define fHighestPowerState pwrMgt->HighestPowerState #define fPowerStates pwrMgt->PowerStates #define fControllingDriver pwrMgt->ControllingDriver #define fCurrentPowerState pwrMgt->CurrentPowerState #define fParentsCurrentPowerFlags pwrMgt->ParentsCurrentPowerFlags #define fMaxPowerState pwrMgt->MaxPowerState -#define fOutputPowerCharacterFlags pwrMgt->OutputPowerCharacterFlags +#define fMergedOutputPowerFlags pwrMgt->MergedOutputPowerFlags #define fResponseArray pwrMgt->ResponseArray #define fNotifyClientArray pwrMgt->NotifyClientArray #define fSerialNumber pwrMgt->SerialNumber @@ -434,9 +471,19 @@ private: #define fPMDriverCallQueue pwrMgt->PMDriverCallQueue #define fInsertInterestSet pwrMgt->InsertInterestSet #define fRemoveInterestSet pwrMgt->RemoveInterestSet +#define fReportClientCnt pwrMgt->ReportClientCnt +#define fReportBuf pwrMgt->ReportBuf #define fPMVars pwrMgt->PMVars #define fPMActions pwrMgt->PMActions +#define StateOrder(state) (((state) < fNumberOfPowerStates) \ + ? pwrMgt->PowerStates[(state)].stateOrder \ + : (state)) +#define StateMax(a,b) (StateOrder((a)) < StateOrder((b)) ? (b) : (a)) +#define StateMin(a,b) (StateOrder((a)) < StateOrder((b)) ? (a) : (b)) + +#define kPowerStateZero (0) + /* When an IOService is waiting for acknowledgement to a power change notification from an interested driver or the controlling driver, @@ -445,6 +492,8 @@ the ack timer is ticking every tenth of a second. */ #define ACK_TIMER_PERIOD 100000000 +#define WATCHDOG_TIMER_PERIOD (300) // 300 secs + // Max wait time in microseconds for kernel priority and capability clients // with async message handlers to acknowledge. // @@ -454,8 +503,8 @@ the ack timer is ticking every tenth of a second. // Attributes describing a power state change. // See IOPMPowerChangeFlags data type. // -#define kIOPMParentInitiated 0x0001 // this power change initiated by our parent -#define kIOPMSelfInitiated 0x0002 // this power change initiated by this device +#define kIOPMParentInitiated 0x0001 // power change initiated by our parent +#define kIOPMSelfInitiated 0x0002 // power change initiated by this device #define kIOPMNotDone 0x0004 // we couldn't make this change #define kIOPMDomainWillChange 0x0008 // change started by PowerDomainWillChangeTo #define kIOPMDomainDidChange 0x0010 // change started by PowerDomainDidChangeTo @@ -469,6 +518,7 @@ the ack timer is ticking every tenth of a second. #define kIOPMInitialPowerChange 0x1000 // set for initial power change #define kIOPMRootChangeUp 0x2000 // Root power domain change up #define kIOPMRootChangeDown 0x4000 // Root power domain change down +#define kIOPMExpireIdleTimer 0x8000 // Accelerate idle timer expiration #define kIOPMRootBroadcastFlags (kIOPMSynchronize | \ kIOPMRootChangeUp | kIOPMRootChangeDown) @@ -482,7 +532,8 @@ the ack timer is ticking every tenth of a second. enum { kDriverCallInformPreChange, kDriverCallInformPostChange, - kDriverCallSetPowerState + kDriverCallSetPowerState, + kRootDomainInformPreChange }; struct DriverCallParam { @@ -550,12 +601,15 @@ protected: IOItemCount fFreeWaitCount; // completion blocked if non-zero uint32_t fType; // request type +#if NOT_READY IOPMCompletionAction fCompletionAction; void * fCompletionTarget; void * fCompletionParam; IOReturn fCompletionStatus; +#endif public: + uint32_t fRequestTag; void * fArg0; void * fArg1; void * fArg2; @@ -578,7 +632,9 @@ public: inline IOPMRequest * getRootRequest( void ) const { if (fRequestRoot) return fRequestRoot; +#if NOT_READY if (fCompletionAction) return (IOPMRequest *) this; +#endif return 0; } @@ -597,6 +653,7 @@ public: return fTarget; } +#if NOT_READY inline bool isCompletionInstalled( void ) { return (fCompletionAction != 0); @@ -611,6 +668,7 @@ public: fCompletionTarget = target; fCompletionParam = param; } +#endif /* NOT_READY */ static IOPMRequest * create( void ); bool init( IOService * owner, IOOptionBits type ); diff --git a/iokit/Kernel/IOServicePrivate.h b/iokit/Kernel/IOServicePrivate.h index cba83742d..4fc6f9170 100644 --- a/iokit/Kernel/IOServicePrivate.h +++ b/iokit/Kernel/IOServicePrivate.h @@ -56,14 +56,11 @@ enum { kIOServiceSyncPubState = 0x08000000, kIOServiceConfigState = 0x04000000, + kIOServiceStartState = 0x02000000, kIOServiceTermPhase2State = 0x01000000, kIOServiceTermPhase3State = 0x00800000, kIOServiceTermPhase1State = 0x00400000, - kIOServiceTerm1WaiterState = 0x00200000 -}; - -// options for terminate() -enum { + kIOServiceTerm1WaiterState = 0x00200000, kIOServiceRecursing = 0x00100000, }; diff --git a/iokit/Kernel/IOStartIOKit.cpp b/iokit/Kernel/IOStartIOKit.cpp index 49397d4cb..4a218304d 100644 --- a/iokit/Kernel/IOStartIOKit.cpp +++ b/iokit/Kernel/IOStartIOKit.cpp @@ -51,7 +51,7 @@ extern "C" { extern void OSlibkernInit (void); -void iokit_post_constructor_init(void) __attribute__((section("__TEXT, initcode"))); +void iokit_post_constructor_init(void); #include #include diff --git a/iokit/Kernel/IOStatistics.cpp b/iokit/Kernel/IOStatistics.cpp index def362e60..6e72eb495 100644 --- a/iokit/Kernel/IOStatistics.cpp +++ b/iokit/Kernel/IOStatistics.cpp @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -259,10 +260,10 @@ void IOStatistics::onKextUnload(OSKext *kext) IOWorkLoopCounter *wlc; IOUserClientProcessEntry *uce; - /* Free up the list of counters */ + /* Disconnect workloop counters; cleanup takes place in unregisterWorkLoop() */ while ((wlc = SLIST_FIRST(&found->workLoopList))) { SLIST_REMOVE_HEAD(&found->workLoopList, link); - kfree(wlc, sizeof(IOWorkLoopCounter)); + wlc->parentKext = NULL; } /* Free up the user client list */ @@ -520,8 +521,9 @@ void IOStatistics::unregisterWorkLoop(IOWorkLoopCounter *counter) } IORWLockWrite(lock); - - SLIST_REMOVE(&counter->parentKext->workLoopList, counter, IOWorkLoopCounter, link); + if (counter->parentKext) { + SLIST_REMOVE(&counter->parentKext->workLoopList, counter, IOWorkLoopCounter, link); + } kfree(counter, sizeof(IOWorkLoopCounter)); registeredWorkloops--; @@ -1213,8 +1215,13 @@ KextNode *IOStatistics::getKextNodeFromBacktrace(boolean_t write) { vm_offset_t *scanAddr = NULL; uint32_t i; KextNode *found = NULL, *ke = NULL; - - btCount = OSBacktrace(bt, btCount); + + /* + * Gathering the backtrace is a significant source of + * overhead. OSBacktrace does many safety checks that + * are not needed in this situation. + */ + btCount = fastbacktrace((uintptr_t*)bt, btCount); if (write) { IORWLockWrite(lock); diff --git a/iokit/Kernel/IOTimerEventSource.cpp b/iokit/Kernel/IOTimerEventSource.cpp index e42fa436c..df939da91 100644 --- a/iokit/Kernel/IOTimerEventSource.cpp +++ b/iokit/Kernel/IOTimerEventSource.cpp @@ -41,7 +41,6 @@ __END_DECLS #include #include - #if CONFIG_DTRACE #include #endif @@ -118,7 +117,7 @@ void IOTimerEventSource::timeout(void *self) if (trace) IOTimeStampStartConstant(IODBG_TIMES(IOTIMES_ACTION), - (uintptr_t) doit, (uintptr_t) me->owner); + VM_KERNEL_UNSLIDE(doit), (uintptr_t) me->owner); (*doit)(me->owner, me); #if CONFIG_DTRACE @@ -127,7 +126,7 @@ void IOTimerEventSource::timeout(void *self) if (trace) IOTimeStampEndConstant(IODBG_TIMES(IOTIMES_ACTION), - (uintptr_t) doit, (uintptr_t) me->owner); + VM_KERNEL_UNSLIDE(doit), (uintptr_t) me->owner); } IOStatisticsOpenGate(); wl->openGate(); @@ -160,7 +159,7 @@ void IOTimerEventSource::timeoutAndRelease(void * self, void * c) if (trace) IOTimeStampStartConstant(IODBG_TIMES(IOTIMES_ACTION), - (uintptr_t) doit, (uintptr_t) me->owner); + VM_KERNEL_UNSLIDE(doit), (uintptr_t) me->owner); (*doit)(me->owner, me); #if CONFIG_DTRACE @@ -169,7 +168,7 @@ void IOTimerEventSource::timeoutAndRelease(void * self, void * c) if (trace) IOTimeStampEndConstant(IODBG_TIMES(IOTIMES_ACTION), - (uintptr_t) doit, (uintptr_t) me->owner); + VM_KERNEL_UNSLIDE(doit), (uintptr_t) me->owner); } IOStatisticsOpenGate(); wl->openGate(); @@ -370,7 +369,7 @@ IOReturn IOTimerEventSource::wakeAtTime(AbsoluteTime inAbstime) reserved->workLoop = workLoop; reserved->calloutGeneration++; if (thread_call_enter1_delayed((thread_call_t) calloutEntry, - (void *) reserved->calloutGeneration, inAbstime)) + (void *)(uintptr_t) reserved->calloutGeneration, inAbstime)) { release(); workLoop->release(); diff --git a/iokit/Kernel/IOUserClient.cpp b/iokit/Kernel/IOUserClient.cpp index 29fb9577f..dcc6e69e2 100644 --- a/iokit/Kernel/IOUserClient.cpp +++ b/iokit/Kernel/IOUserClient.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2011 Apple Inc. All rights reserved. + * Copyright (c) 1998-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -61,7 +62,7 @@ extern "C" { #define SCALAR64(x) ((io_user_scalar_t)((unsigned int)x)) #define SCALAR32(x) ((uint32_t )x) -#define ARG32(x) ((void *)SCALAR32(x)) +#define ARG32(x) ((void *)(uintptr_t)SCALAR32(x)) #define REF64(x) ((io_user_reference_t)((UInt64)(x))) #define REF32(x) ((int)(x)) @@ -674,8 +675,10 @@ bool IOServiceUserNotification::handler( void * ref, else pingMsg->msgHdr.msgh_local_port = NULL; - kr = mach_msg_send_from_kernel_proper( &pingMsg->msgHdr, - pingMsg->msgHdr.msgh_size); + kr = mach_msg_send_from_kernel_with_options( &pingMsg->msgHdr, + pingMsg->msgHdr.msgh_size, + (MACH_SEND_MSG | MACH_SEND_ALWAYS | MACH_SEND_IMPORTANCE), + 0); if( port) iokit_release_port( port ); @@ -831,8 +834,10 @@ IOReturn IOServiceMessageUserNotification::handler( void * ref, pingMsg->ports[0].name = providerPort; thisPort = iokit_port_for_object( this, IKOT_IOKIT_OBJECT ); pingMsg->msgHdr.msgh_local_port = thisPort; - kr = mach_msg_send_from_kernel_proper( &pingMsg->msgHdr, - pingMsg->msgHdr.msgh_size); + kr = mach_msg_send_from_kernel_with_options( &pingMsg->msgHdr, + pingMsg->msgHdr.msgh_size, + (MACH_SEND_MSG | MACH_SEND_ALWAYS | MACH_SEND_IMPORTANCE), + 0); if( thisPort) iokit_release_port( thisPort ); if( providerPort) @@ -882,6 +887,16 @@ void IOUserClient::setAsyncReference64(OSAsyncReference64 asyncRef, asyncRef[kIOAsyncCalloutRefconIndex] = refcon; } +void IOUserClient::setAsyncReference64(OSAsyncReference64 asyncRef, + mach_port_t wakePort, + mach_vm_address_t callback, io_user_reference_t refcon, task_t task) +{ + setAsyncReference64(asyncRef, wakePort, callback, refcon); + if (vm_map_is_64bit(get_task_map(task))) { + asyncRef[kIOAsyncReservedIndex] |= kIOUCAsync64Flag; + } +} + static OSDictionary * CopyConsoleUser(UInt32 uid) { OSArray * array; @@ -944,7 +959,7 @@ IOReturn IOUserClient::clientHasPrivilege( void * securityToken, sizeof(kIOClientPrivilegeForeground))) { /* is graphics access denied for current task? */ - if (proc_get_task_selfgpuacc_deny() != 0) + if (proc_get_effective_task_policy(current_task(), TASK_POLICY_GPU_DENY) != 0) return (kIOReturnNotPrivileged); else return (kIOReturnSuccess); @@ -1064,16 +1079,16 @@ bool IOUserClient::initWithTask(task_t owningTask, bool IOUserClient::reserve() { - if(!reserved) { - reserved = IONew(ExpansionData, 1); - if (!reserved) { - return false; - } + if(!reserved) { + reserved = IONew(ExpansionData, 1); + if (!reserved) { + return false; } - - IOStatisticsRegisterCounter(); - - return true; + } + setTerminateDefer(NULL, true); + IOStatisticsRegisterCounter(); + + return true; } void IOUserClient::free() @@ -1179,7 +1194,6 @@ IOReturn IOUserClient::exportObjectToClient(task_t task, mach_port_name_t name; name = IOMachPort::makeSendRightForTask( task, obj, IKOT_IOKIT_OBJECT ); - assert( name ); *(mach_port_name_t *)clientObj = name; return kIOReturnSuccess; @@ -1273,8 +1287,20 @@ IOReturn IOUserClient::sendAsyncResult(OSAsyncReference reference, return (sendAsyncResult64(reference64, result, args64, numArgs)); } +IOReturn IOUserClient::sendAsyncResult64WithOptions(OSAsyncReference64 reference, + IOReturn result, io_user_reference_t args[], UInt32 numArgs, IOOptionBits options) +{ + return _sendAsyncResult64(reference, result, args, numArgs, options); +} + IOReturn IOUserClient::sendAsyncResult64(OSAsyncReference64 reference, IOReturn result, io_user_reference_t args[], UInt32 numArgs) +{ + return _sendAsyncResult64(reference, result, args, numArgs, 0); +} + +IOReturn IOUserClient::_sendAsyncResult64(OSAsyncReference64 reference, + IOReturn result, io_user_reference_t args[], UInt32 numArgs, IOOptionBits options) { struct ReplyMsg { @@ -1347,9 +1373,15 @@ IOReturn IOUserClient::sendAsyncResult64(OSAsyncReference64 reference, replyMsg.m.msg32.args[idx] = REF32(args[idx]); } - kr = mach_msg_send_from_kernel_proper( &replyMsg.msgHdr, - replyMsg.msgHdr.msgh_size); - if( KERN_SUCCESS != kr) + if ((options & kIOUserNotifyOptionCanDrop) != 0) { + kr = mach_msg_send_from_kernel_with_options( &replyMsg.msgHdr, + replyMsg.msgHdr.msgh_size, MACH_SEND_TIMEOUT, MACH_MSG_TIMEOUT_NONE); + } else { + /* Fail on full queue. */ + kr = mach_msg_send_from_kernel_proper( &replyMsg.msgHdr, + replyMsg.msgHdr.msgh_size); + } + if ((KERN_SUCCESS != kr) && (MACH_SEND_TIMED_OUT != kr)) IOLog("%s: mach_msg_send_from_kernel_proper {%x}\n", __FILE__, kr ); return kr; } @@ -1954,7 +1986,7 @@ kern_return_t is_io_service_acknowledge_notification( { CHECK( IOService, _service, service ); - return( service->acknowledgeNotification( (IONotificationRef) notify_ref, + return( service->acknowledgeNotification( (IONotificationRef)(uintptr_t) notify_ref, (IOOptionBits) response )); } @@ -2368,13 +2400,16 @@ kern_return_t is_io_registry_entry_set_properties CHECK( IORegistryEntry, registry_entry, entry ); + if( propertiesCnt > sizeof(io_struct_inband_t) * 1024) + return( kIOReturnMessageTooLarge); + err = vm_map_copyout( kernel_map, &map_data, (vm_map_copy_t) properties ); data = CAST_DOWN(vm_offset_t, map_data); if( KERN_SUCCESS == err) { // must return success after vm_map_copyout() succeeds - obj = OSUnserializeXML( (const char *) data ); + obj = OSUnserializeXML( (const char *) data, propertiesCnt ); vm_deallocate( kernel_map, data, propertiesCnt ); if (!obj) @@ -2505,13 +2540,16 @@ kern_return_t is_io_service_open_extended( vm_offset_t data; vm_map_offset_t map_data; + if( propertiesCnt > sizeof(io_struct_inband_t)) + return( kIOReturnMessageTooLarge); + err = vm_map_copyout( kernel_map, &map_data, (vm_map_copy_t) properties ); res = err; data = CAST_DOWN(vm_offset_t, map_data); if (KERN_SUCCESS == err) { // must return success after vm_map_copyout() succeeds - obj = OSUnserializeXML( (const char *) data ); + obj = OSUnserializeXML( (const char *) data, propertiesCnt ); vm_deallocate( kernel_map, data, propertiesCnt ); propertiesDict = OSDynamicCast(OSDictionary, obj); if (!propertiesDict) @@ -2572,6 +2610,7 @@ kern_return_t is_io_service_open_extended( client->setProperty(kIOUserClientCreatorKey, creatorName); creatorName->release(); } + client->setTerminateDefer(service, false); } } while (false); @@ -2675,7 +2714,6 @@ kern_return_t is_io_connect_map_memory_into_task mach_port_name_t name __unused = IOMachPort::makeSendRightForTask( into_task, map, IKOT_IOKIT_OBJECT ); - assert( name ); } else { // keep it with the user client @@ -3679,25 +3717,25 @@ kern_return_t shim_io_connect_method_scalarI_structureI( case 4: err = (object->*func)( ARG32(input[0]), ARG32(input[1]), (void *) input[2], ARG32(input[3]), - inputStruct, (void *)inputStructCount ); + inputStruct, (void *)(uintptr_t)inputStructCount ); break; case 3: err = (object->*func)( ARG32(input[0]), ARG32(input[1]), ARG32(input[2]), - inputStruct, (void *)inputStructCount, + inputStruct, (void *)(uintptr_t)inputStructCount, 0 ); break; case 2: err = (object->*func)( ARG32(input[0]), ARG32(input[1]), - inputStruct, (void *)inputStructCount, + inputStruct, (void *)(uintptr_t)inputStructCount, 0, 0 ); break; case 1: err = (object->*func)( ARG32(input[0]), - inputStruct, (void *)inputStructCount, + inputStruct, (void *)(uintptr_t)inputStructCount, 0, 0, 0 ); break; case 0: - err = (object->*func)( inputStruct, (void *)inputStructCount, + err = (object->*func)( inputStruct, (void *)(uintptr_t)inputStructCount, 0, 0, 0, 0 ); break; @@ -3758,29 +3796,29 @@ kern_return_t shim_io_async_method_scalarI_structureI( err = (object->*func)( reference, ARG32(input[0]), ARG32(input[1]), ARG32(input[2]), ARG32(input[3]), - inputStruct, (void *)inputStructCount ); + inputStruct, (void *)(uintptr_t)inputStructCount ); break; case 3: err = (object->*func)( reference, ARG32(input[0]), ARG32(input[1]), ARG32(input[2]), - inputStruct, (void *)inputStructCount, + inputStruct, (void *)(uintptr_t)inputStructCount, 0 ); break; case 2: err = (object->*func)( reference, ARG32(input[0]), ARG32(input[1]), - inputStruct, (void *)inputStructCount, + inputStruct, (void *)(uintptr_t)inputStructCount, 0, 0 ); break; case 1: err = (object->*func)( reference, ARG32(input[0]), - inputStruct, (void *)inputStructCount, + inputStruct, (void *)(uintptr_t)inputStructCount, 0, 0, 0 ); break; case 0: err = (object->*func)( reference, - inputStruct, (void *)inputStructCount, + inputStruct, (void *)(uintptr_t)inputStructCount, 0, 0, 0, 0 ); break; @@ -3845,12 +3883,12 @@ kern_return_t shim_io_connect_method_structureI_structureO( if( method->count1) { if( method->count0) { err = (object->*func)( input, output, - (void *)inputCount, outputCount, 0, 0 ); + (void *)(uintptr_t)inputCount, outputCount, 0, 0 ); } else { err = (object->*func)( output, outputCount, 0, 0, 0, 0 ); } } else { - err = (object->*func)( input, (void *)inputCount, 0, 0, 0, 0 ); + err = (object->*func)( input, (void *)(uintptr_t)inputCount, 0, 0, 0, 0 ); } } while( false); @@ -3900,14 +3938,14 @@ kern_return_t shim_io_async_method_structureI_structureO( if( method->count0) { err = (object->*func)( reference, input, output, - (void *)inputCount, outputCount, 0, 0 ); + (void *)(uintptr_t)inputCount, outputCount, 0, 0 ); } else { err = (object->*func)( reference, output, outputCount, 0, 0, 0, 0 ); } } else { err = (object->*func)( reference, - input, (void *)inputCount, 0, 0, 0, 0 ); + input, (void *)(uintptr_t)inputCount, 0, 0, 0, 0 ); } } while( false); @@ -3943,6 +3981,9 @@ kern_return_t is_io_catalog_send_data( if (inData) { vm_map_offset_t map_data; + if( inDataCount > sizeof(io_struct_inband_t) * 1024) + return( kIOReturnMessageTooLarge); + kr = vm_map_copyout( kernel_map, &map_data, (vm_map_copy_t)inData); data = CAST_DOWN(vm_offset_t, map_data); @@ -3952,7 +3993,7 @@ kern_return_t is_io_catalog_send_data( // must return success after vm_map_copyout() succeeds if( inDataCount ) { - obj = (OSObject *)OSUnserializeXML((const char *)data); + obj = (OSObject *)OSUnserializeXML((const char *)data, inDataCount); vm_deallocate( kernel_map, data, inDataCount ); if( !obj) { *result = kIOReturnNoMemory; @@ -3968,7 +4009,7 @@ kern_return_t is_io_catalog_send_data( array = OSDynamicCast(OSArray, obj); if (array) { - if ( !gIOCatalogue->resetAndAddDrivers(array, + if ( !gIOCatalogue->resetAndAddDrivers(array, flag == kIOCatalogResetDrivers) ) { kr = kIOReturnError; @@ -4327,7 +4368,7 @@ IOReturn IOUserClient::externalMethod( uint32_t selector, IOExternalMethodArgume if (kIOUCForegroundOnly & method->flags) { /* is graphics access denied for current task? */ - if (proc_get_task_selfgpuacc_deny() != 0) + if (proc_get_effective_task_policy(current_task(), TASK_POLICY_GPU_DENY) != 0) return (kIOReturnNotPermitted); } @@ -4377,7 +4418,7 @@ IOReturn IOUserClient::externalMethod( uint32_t selector, IOExternalMethodArgume if (kIOUCForegroundOnly & method->flags) { /* is graphics access denied for current task? */ - if (proc_get_task_selfgpuacc_deny() != 0) + if (proc_get_effective_task_policy(current_task(), TASK_POLICY_GPU_DENY) != 0) return (kIOReturnNotPermitted); } diff --git a/iokit/Kernel/RootDomainUserClient.cpp b/iokit/Kernel/RootDomainUserClient.cpp index c3835903a..cfc365b51 100644 --- a/iokit/Kernel/RootDomainUserClient.cpp +++ b/iokit/Kernel/RootDomainUserClient.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2012 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -37,6 +37,7 @@ #include "RootDomainUserClient.h" #include #include +#include #define super IOUserClient @@ -79,7 +80,7 @@ IOReturn RootDomainUserClient::secureSleepSystem( uint32_t *return_code ) IOReturn RootDomainUserClient::secureSleepSystemOptions( const void *inOptions, - IOByteCount inOptionsSize __unused, + IOByteCount inOptionsSize, uint32_t *returnCode) { @@ -99,7 +100,7 @@ IOReturn RootDomainUserClient::secureSleepSystemOptions( if (inOptions) { unserializedOptions = OSDynamicCast( OSDictionary, - OSUnserializeXML((const char *)inOptions, &unserializeErrorString)); + OSUnserializeXML((const char *)inOptions, inOptionsSize, &unserializeErrorString)); if (!unserializedOptions) { IOLog("IOPMRootDomain SleepSystem unserialization failure: %s\n", @@ -107,9 +108,14 @@ IOReturn RootDomainUserClient::secureSleepSystemOptions( } } - if ( (local_priv || admin_priv) - && fOwner ) + if ( (local_priv || admin_priv) && fOwner ) { + proc_t p; + p = (proc_t)get_bsdtask_info(fOwningTask); + if (p) { + fOwner->setProperty("SleepRequestedByPID", proc_pid(p), 32); + } + if (unserializedOptions) { // Publish Sleep Options in registry under root_domain @@ -348,6 +354,28 @@ IOReturn RootDomainUserClient::externalMethod( (uint32_t *) &arguments->scalarOutput[0]); } break; + + case kPMSleepWakeWatchdogEnable: + ret = clientHasPrivilege(fOwningTask, kIOClientPrivilegeAdministrator); + if (ret == kIOReturnSuccess) + fOwner->sleepWakeDebugEnableWdog(); + break; + + + case kPMSleepWakeDebugTrig: + ret = clientHasPrivilege(fOwningTask, kIOClientPrivilegeAdministrator); + if (ret == kIOReturnSuccess) + fOwner->sleepWakeDebugTrig(false); + break; + + case kPMSetDisplayPowerOn: + if (1 == arguments->scalarInputCount) + { + ret = clientHasPrivilege(fOwningTask, kIOClientPrivilegeAdministrator); + if (ret == kIOReturnSuccess) + fOwner->setDisplayPowerOn((uint32_t)arguments->scalarInput[0]); + } + break; /* case kPMMethodCopySystemTimeline: // intentional fallthrough diff --git a/iokit/Kernel/i386/IOKeyStoreHelper.cpp b/iokit/Kernel/i386/IOKeyStoreHelper.cpp index fb09d9c2e..a1d41b8d0 100644 --- a/iokit/Kernel/i386/IOKeyStoreHelper.cpp +++ b/iokit/Kernel/i386/IOKeyStoreHelper.cpp @@ -90,7 +90,7 @@ IOGetBootKeyStoreData(void) ranges.address = args->keyStoreDataStart; ranges.length = args->keyStoreDataSize; - options = kIODirectionInOut | kIOMemoryTypePhysical64; + options = kIODirectionInOut | kIOMemoryTypePhysical64 | kIOMemoryMapperNone; memoryDescriptor = IOMemoryDescriptor::withOptions(&ranges, 1, diff --git a/iokit/KernelConfigTables.cpp b/iokit/KernelConfigTables.cpp index ee06e47e5..de08bbf4b 100644 --- a/iokit/KernelConfigTables.cpp +++ b/iokit/KernelConfigTables.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -46,6 +46,3 @@ const char * gIOKernelConfigTables = * See libkern's OSKext.cpp for other symbols, which have been moved * there for sanity. */ -#if __i386__ -const char * gIOKernelKmods = ""; -#endif /* __i386__ */ diff --git a/iokit/Makefile b/iokit/Makefile index 498a2540a..2c53d494f 100644 --- a/iokit/Makefile +++ b/iokit/Makefile @@ -8,23 +8,11 @@ include $(MakeInc_cmd) include $(MakeInc_def) INSTINC_SUBDIRS = IOKit -INSTINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS} -INSTINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS} -INSTINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS} EXPINC_SUBDIRS = IOKit -EXPINC_SUBDIRS_I386 = ${EXPINC_SUBDIRS} -EXPINC_SUBDIRS_X86_64 = ${EXPINC_SUBDIRS} -EXPINC_SUBDIRS_ARM = ${EXPINC_SUBDIRS} - - -SETUP_SUBDIRS = COMP_SUBDIRS = conf -INST_SUBDIRS = User - - include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/iokit/User/Makefile b/iokit/User/Makefile deleted file mode 100644 index e1c339083..000000000 --- a/iokit/User/Makefile +++ /dev/null @@ -1,42 +0,0 @@ -export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd -export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def -export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule -export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - - -include $(MakeInc_cmd) -include $(MakeInc_def) - -ifneq ($(MACHINE_CONFIG), DEFAULT) -export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT) -else -export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT) -endif - -do_all: - @echo "[ $(SOURCE) ] Starting do_all $(COMPONENT) $(MACH_KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"; \ - ($(MKDIR) $(COMPOBJROOT)/User; \ - cd $(COMPOBJROOT)/User; \ - ${MAKE} MAKEFILES=$(SOURCE)/Makefile.user \ - TARGET=$(TARGET) \ - do_build_all \ - ); \ - echo "[ $(SOURCE) ] Returning do_all $(COMPONENT) $(MACH_KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"; - - -do_build_all: do_all - -do_install: - @echo "[ $(SOURCE) ] Starting do_install $(COMPONENT) $(MACH_KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"; \ - ($(MKDIR) $(COMPOBJROOT)/User; \ - cd $(COMPOBJROOT)/User; \ - ${MAKE} MAKEFILES=$(SOURCE)/Makefile.user \ - TARGET=$(TARGET) \ - do_build_install \ - ); \ - echo "[ $(SOURCE) ] Returning do_install $(COMPONENT) $(MACH_KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"; - -do_build_install: do_install - -include $(MakeInc_rule) -include $(MakeInc_dir) diff --git a/iokit/User/Makefile.user b/iokit/User/Makefile.user deleted file mode 100644 index b8af24563..000000000 --- a/iokit/User/Makefile.user +++ /dev/null @@ -1,41 +0,0 @@ -# -# User level code makefile -# - -export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd -export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def -export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule -export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - -include $(MakeInc_cmd) -include $(MakeInc_def) - -INSTALL_DIR= /usr/lib -LIB_NAME= libIOKit -BUILD_VERS= .A -COMPAT_VERS= 1 -LIB_INSTALL_FLAGS= -c -m 555 -S "-S" -BUILD_NAME= $(LIB_NAME)$(BUILD_VERS).dylib -FRAMEWORK_NAME= /System/Library/Frameworks/IOKit.framework/Versions/A/IOKit - -# ALLARCHLIBS = $(foreach archlib, $(INSTALL_ARCHS), $(OBJROOT)/$(KERNEL_CONFIG)_$(archlib)/$(COMPONENT)/User/$(BUILD_NAME)) - -do_build_all: - -$(DSTROOT)$(INSTALL_DIR)/$(BUILD_NAME): - ${MKDIR} $(DSTROOT)$(INSTALL_DIR); \ - (cd $(DSTROOT)$(INSTALL_DIR); \ - if [ ! -h $(BUILD_NAME) ]; then \ - $(LN) $(FRAMEWORK_NAME) $(BUILD_NAME); \ - fi ); - -$(DSTROOT)$(INSTALL_DIR)/$(LIB_NAME).dylib: $(DSTROOT)$(INSTALL_DIR)/$(BUILD_NAME) - (cd $(DSTROOT)$(INSTALL_DIR); \ - if [ ! -h $(LIB_NAME).dylib ]; then \ - $(LN) $(BUILD_NAME) $(LIB_NAME).dylib; \ - fi ); - -do_build_install: $(DSTROOT)$(INSTALL_DIR)/$(LIB_NAME).dylib - -# include $(MakeInc_rule) -include $(MakeInc_dir) diff --git a/iokit/bsddev/IOKitBSDInit.cpp b/iokit/bsddev/IOKitBSDInit.cpp index 35ca86d8d..ac28f446b 100644 --- a/iokit/bsddev/IOKitBSDInit.cpp +++ b/iokit/bsddev/IOKitBSDInit.cpp @@ -264,7 +264,7 @@ static bool IORegisterNetworkInterface( IOService * netif ) OSDictionary * IOOFPathMatching( const char * path, char * buf, int maxLen ) { - OSDictionary * matching; + OSDictionary * matching = NULL; OSString * str; char * comp; int len; @@ -306,6 +306,7 @@ OSDictionary * IOOFPathMatching( const char * path, char * buf, int maxLen ) } static int didRam = 0; +enum { kMaxPathBuf = 512, kMaxBootVar = 128 }; kern_return_t IOFindBSDRoot( char * rootName, unsigned int rootNameSize, dev_t * root, u_int32_t * oflags ) @@ -322,7 +323,6 @@ kern_return_t IOFindBSDRoot( char * rootName, unsigned int rootNameSize, int mnr, mjr; const char * mediaProperty = 0; char * rdBootVar; - enum { kMaxPathBuf = 512, kMaxBootVar = 128 }; char * str; const char * look = 0; int len; @@ -389,8 +389,8 @@ kern_return_t IOFindBSDRoot( char * rootName, unsigned int rootNameSize, if((regEntry = IORegistryEntry::fromPath( "/chosen/memory-map", gIODTPlane ))) { /* Find the map node */ data = (OSData *)regEntry->getProperty("RAMDisk"); /* Find the ram disk, if there */ if(data) { /* We found one */ - UInt32 *ramdParms = 0; - ramdParms = (UInt32 *)data->getBytesNoCopy(); /* Point to the ram disk base and size */ + uintptr_t *ramdParms; + ramdParms = (uintptr_t *)data->getBytesNoCopy(); /* Point to the ram disk base and size */ (void)mdevadd(-1, ml_static_ptovirt(ramdParms[0]) >> 12, ramdParms[1] >> 12, 0); /* Initialize it and pass back the device number */ } regEntry->release(); /* Toss the entry */ @@ -582,23 +582,20 @@ iofrootx: return( kIOReturnSuccess ); } +bool IORamDiskBSDRoot(void) +{ + char rdBootVar[kMaxBootVar]; + if (PE_parse_boot_argn("rd", rdBootVar, kMaxBootVar ) + || PE_parse_boot_argn("rootdev", rdBootVar, kMaxBootVar )) { + if((rdBootVar[0] == 'm') && (rdBootVar[1] == 'd') && (rdBootVar[3] == 0)) { + return true; + } + } + return false; +} + void IOSecureBSDRoot(const char * rootName) { -#if CONFIG_EMBEDDED - IOReturn result; - IOPlatformExpert *pe; - const OSSymbol *functionName = OSSymbol::withCStringNoCopy("SecureRootName"); - - while ((pe = IOService::getPlatform()) == 0) IOSleep(1 * 1000); - - // Returns kIOReturnNotPrivileged is the root device is not secure. - // Returns kIOReturnUnsupported if "SecureRootName" is not implemented. - result = pe->callPlatformFunction(functionName, false, (void *)rootName, (void *)0, (void *)0, (void *)0); - - functionName->release(); - - if (result == kIOReturnNotPrivileged) mdevremoveall(); -#endif } void * @@ -677,47 +674,6 @@ kern_return_t IOBSDGetPlatformSerialNumber( char *serial_number_str, u_int32_t l return KERN_SUCCESS; } -dev_t IOBSDGetMediaWithUUID( const char *uuid_cstring, char *bsd_name, int bsd_name_len, int timeout) -{ - dev_t dev = 0; - OSDictionary *dictionary; - OSString *uuid_string; - - if (bsd_name_len < 1) { - return 0; - } - bsd_name[0] = '\0'; - - dictionary = IOService::serviceMatching( "IOMedia" ); - if( dictionary ) { - uuid_string = OSString::withCString( uuid_cstring ); - if( uuid_string ) { - IOService *service; - mach_timespec_t tv = { timeout, 0 }; // wait up to "timeout" seconds for the device - - dictionary->setObject( "UUID", uuid_string ); - dictionary->retain(); - service = IOService::waitForService( dictionary, &tv ); - if( service ) { - OSNumber *dev_major = (OSNumber *) service->getProperty( kIOBSDMajorKey ); - OSNumber *dev_minor = (OSNumber *) service->getProperty( kIOBSDMinorKey ); - OSString *iostr = (OSString *) service->getProperty( kIOBSDNameKey ); - - if( iostr) - strlcpy( bsd_name, iostr->getCStringNoCopy(), bsd_name_len ); - - if ( dev_major && dev_minor ) - dev = makedev( dev_major->unsigned32BitValue(), dev_minor->unsigned32BitValue() ); - } - uuid_string->release(); - } - dictionary->release(); - } - - return dev; -} - - void IOBSDIterateMediaWithContent(const char *content_uuid_cstring, int (*func)(const char *bsd_dev_name, const char *uuid_str, void *arg), void *arg) { OSDictionary *dictionary; diff --git a/iokit/conf/MASTER b/iokit/conf/MASTER index 3eff425fe..a0558a788 100644 --- a/iokit/conf/MASTER +++ b/iokit/conf/MASTER @@ -91,21 +91,14 @@ options CONFIG_NO_PANIC_STRINGS # options CONFIG_NO_PRINTF_STRINGS # options CONFIG_NO_KPRINTF_STRINGS # -# configurable kernel - general switch to say we are building for an -# embedded device -# -options CONFIG_EMBEDDED # - # secure_kernel - secure kernel from user programs options SECURE_KERNEL # options MACH_ASSERT # -# -# Note: MAC/AUDIT options must be set in all the bsd/conf, osfmk/conf, and -# security/conf MASTER files. -# -options CONFIG_MACF # Mandatory Access Control Framework - options DEVELOPMENT # options DEBUG # + +options CONFIG_MEMORYSTATUS # +options CONFIG_JETSAM # +options CONFIG_FREEZE # diff --git a/iokit/conf/MASTER.i386 b/iokit/conf/MASTER.i386 deleted file mode 100644 index b75268921..000000000 --- a/iokit/conf/MASTER.i386 +++ /dev/null @@ -1,20 +0,0 @@ -###################################################################### -# -# Standard Apple Mac OS Configurations: -# -------- ----- ------ --------------- -# -# RELEASE = [ intel mach iokitcpp hibernation medium crypto config_dtrace config_sleep iokitstats vm_pressure_events ] -# PROFILE = [ RELEASE profile ] -# DEBUG = [ RELEASE debug ] -# -# EMBEDDED = [ intel mach iokitcpp hibernation no_kextd bsmall crypto ] -# DEVELOPMENT = [ EMBEDDED config_dtrace development] -# -###################################################################### - -machine "i386" # -cpu "i386" # - -options NO_KEXTD # - -options NO_NESTED_PMAP # diff --git a/iokit/conf/MASTER.x86_64 b/iokit/conf/MASTER.x86_64 index b1fceabab..72f503441 100644 --- a/iokit/conf/MASTER.x86_64 +++ b/iokit/conf/MASTER.x86_64 @@ -3,7 +3,7 @@ # Standard Apple Mac OS Configurations: # -------- ----- ------ --------------- # -# RELEASE = [ intel mach iokitcpp hibernation medium crypto config_dtrace config_sleep iokitstats vm_pressure_events ] +# RELEASE = [ intel mach iokitcpp hibernation medium crypto config_dtrace config_sleep iokitstats vm_pressure_events memorystatus ] # PROFILE = [ RELEASE profile ] # DEBUG = [ RELEASE debug mach_assert ] # @@ -15,6 +15,12 @@ machine "x86_64" # cpu "x86_64" # +# +# Note: MAC/AUDIT options must be set in all the bsd/conf, osfmk/conf, and +# security/conf MASTER files. +# +options CONFIG_MACF # Mandatory Access Control Framework + options NO_KEXTD # options NO_NESTED_PMAP # diff --git a/iokit/conf/Makefile b/iokit/conf/Makefile index 868b1422b..25a42ef5e 100644 --- a/iokit/conf/Makefile +++ b/iokit/conf/Makefile @@ -3,56 +3,37 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - include $(MakeInc_cmd) include $(MakeInc_def) -SETUP_SUBDIRS = - -COMP_SUBDIRS = - -INST_SUBDIRS = - -ifndef IOKIT_KERNEL_CONFIG -export IOKIT_KERNEL_CONFIG = $(KERNEL_CONFIG) -endif +MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) -ifneq ($(MACHINE_CONFIG), DEFAULT) -export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT) -else -export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT) -endif - -MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(ARCH_CONFIG_LC).$(MACHINE_CONFIG_LC) - -$(COMPOBJROOT)/$(IOKIT_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ - $(SOURCE)/MASTER.$(ARCH_CONFIG_LC) \ - $(SOURCE)/Makefile.template \ - $(SOURCE)/Makefile.$(ARCH_CONFIG_LC) \ - $(SOURCE)/files \ - $(SOURCE)/files.$(ARCH_CONFIG_LC) - $(_v)(doconf_target=$(addsuffix /conf, $(TARGET)); \ - $(MKDIR) $${doconf_target}; \ - cd $${doconf_target}; \ +$(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ + $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC) \ + $(SOURCE)/Makefile.template \ + $(SOURCE)/Makefile.$(CURRENT_ARCH_CONFIG_LC) \ + $(SOURCE)/files \ + $(SOURCE)/files.$(CURRENT_ARCH_CONFIG_LC) + $(_v)$(MKDIR) $(addsuffix /conf, $(TARGET)); \ + cd $(addsuffix /conf, $(TARGET)); \ rm -f $(notdir $?); \ - cp $? $${doconf_target}; \ - if [ -f $(MASTER_CPU_PER_SOC) ]; then cp $(MASTER_CPU_PER_SOC) $${doconf_target}; fi; \ - $(SRCROOT)/SETUP/config/doconf -c -cpu $(ARCH_CONFIG_LC) -soc $(MACHINE_CONFIG_LC) -d $(TARGET)/$(IOKIT_KERNEL_CONFIG) $(IOKIT_KERNEL_CONFIG); \ - ); - -do_all: $(COMPOBJROOT)/$(IOKIT_KERNEL_CONFIG)/Makefile - $(_v)next_source=$(subst conf/,,$(SOURCE)); \ - next_relsource=$(subst conf/,,$(RELATIVE_SOURCE_PATH)); \ - ${MAKE} -C $(COMPOBJROOT)/$(IOKIT_KERNEL_CONFIG) \ - MAKEFILES=$(TARGET)/$(IOKIT_KERNEL_CONFIG)/Makefile \ - SOURCE=$${next_source} \ - RELATIVE_SOURCE_PATH=$${next_relsource} \ - TARGET=$(TARGET) \ - INCL_MAKEDEP=FALSE \ - KERNEL_CONFIG=$(IOKIT_KERNEL_CONFIG) \ + cp $? .; \ + if [ $(MASTER_CPU_PER_SOC) -nt $@ ]; then cp $(MASTER_CPU_PER_SOC) .; fi; \ + $(SRCROOT)/SETUP/config/doconf -c -cpu $(CURRENT_ARCH_CONFIG_LC) -soc $(CURRENT_MACHINE_CONFIG_LC) -d $(TARGET)/$(CURRENT_KERNEL_CONFIG) $(CURRENT_KERNEL_CONFIG); + +do_all: $(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile + $(_v)${MAKE} \ + -C $(TARGET)/$(CURRENT_KERNEL_CONFIG) \ + -f $(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile \ + CURRENT_KERNEL_CONFIG=${CURRENT_KERNEL_CONFIG} \ + CURRENT_ARCH_CONFIG=${CURRENT_ARCH_CONFIG} \ + CURRENT_MACHINE_CONFIG=${CURRENT_MACHINE_CONFIG} \ + SOURCE=$(subst conf/,,$(SOURCE)) \ + TARGET=${TARGET} \ + OBJPATH=${OBJPATH} \ build_all; -do_build_all: do_all +do_build_all:: do_all include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/iokit/conf/Makefile.i386 b/iokit/conf/Makefile.i386 deleted file mode 100644 index df2fbb323..000000000 --- a/iokit/conf/Makefile.i386 +++ /dev/null @@ -1,16 +0,0 @@ -###################################################################### -#BEGIN Machine dependent Makefile fragment for i386 -###################################################################### - -# Files that must go in the __HIB segment: -UNCONFIGURED_HIB_FILES= \ - IOHibernateRestoreKernel.o - -HIB_FILES=$(filter $(UNCONFIGURED_HIB_FILES),$(OBJS)) - -IOHibernateRestoreKernel.o_CFLAGS_ADD += -fno-stack-protector - -###################################################################### -#END Machine dependent Makefile fragment for i386 -###################################################################### - diff --git a/iokit/conf/Makefile.template b/iokit/conf/Makefile.template index 96fe217a1..4a72c656f 100644 --- a/iokit/conf/Makefile.template +++ b/iokit/conf/Makefile.template @@ -5,16 +5,6 @@ # the terms and conditions for use and redistribution. # -# -# Export IDENT for sub-makefiles -# -export IDENT - -# -# XXX: INCFLAGS -# -INCFLAGS_MAKEFILE= $(INCFLAGS_POSIX) -I$(SOURCE) -I$(SOURCE)include - export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule @@ -27,18 +17,30 @@ include $(MakeInc_def) # XXX: CFLAGS # CFLAGS+= -include meta_features.h -DDRIVER_PRIVATE \ - -DIOMATCHDEBUG=1 -DIOALLOCDEBUG=1 \ - -include meta_features.h $(CFLAGS_INLINE_CONFIG) + -DIOKIT_KERNEL_PRIVATE -DIOMATCHDEBUG=1 -DIOALLOCDEBUG=1 #-DIOKITDEBUG=-1 CWARNFLAGS = $(CWARNFLAGS_STD) -Wno-unused-parameter CXXWARNFLAGS = $(CXXWARNFLAGS_STD) -Wno-unused-parameter -Wno-cast-qual -Wno-shadow +# Objects that don't want -Wcast-align warning (8474835) +IOHibernateRestoreKernel.o_CWARNFLAGS_ADD = -Wno-cast-align +CXXOBJS_NO_CAST_ALIGN = \ + IODMACommand.cpo \ + IODataQueue.cpo \ + IOHibernateIO.cpo \ + IOMemoryDescriptor.cpo \ + IONVRAM.cpo \ + IOPMrootDomain.cpo \ + IOSharedDataQueue.cpo \ + IOUserClient.cpo + +$(foreach file,$(CXXOBJS_NO_CAST_ALIGN),$(eval $(call add_perfile_cxxflags,$(file),-Wno-cast-align))) + CFLAGS_RELEASE += -DIOASSERT=0 +CFLAGS_DEVELOPMENT += -DIOASSERT=1 CFLAGS_DEBUG += -DIOASSERT=1 -SFLAGS+= -DKERNEL - # # Directories for mig generated files # @@ -62,13 +64,8 @@ COMP_SUBDIRS = %CFILES -%MFILES - %SFILES -%BFILES - -%ORDERED %MACHDEP # @@ -78,31 +75,22 @@ COMP_SUBDIRS = # ${OBJS}: ${OBJSDEPS} - -%LOAD - LDOBJS = $(OBJS) $(COMPONENT).filelist: $(LDOBJS) - $(_v)if [ $(BUILD_MACHO_OBJ) -eq 1 ]; then \ - for hib_file in ${HIB_FILES}; \ + $(_v)for hib_file in ${HIB_FILES}; \ do \ - $(SEG_HACK) __HIB $${hib_file} -o $${hib_file}__; \ + $(SEG_HACK) -n __HIB -o $${hib_file}__ $${hib_file} ; \ mv $${hib_file}__ $${hib_file} ; \ - done; \ - fi + done @echo LDFILELIST $(COMPONENT) $(_v)( for obj in ${LDOBJS}; do \ - echo $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$${obj}; \ + echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \ done; ) > $(COMPONENT).filelist -do_depend: do_all - $(_v)${MD} -u Makedep -f -d `ls *.d` - - do_all: $(COMPONENT).filelist -do_build_all: do_depend +do_build_all:: do_all %RULES diff --git a/iokit/conf/Makefile.x86_64 b/iokit/conf/Makefile.x86_64 index 39d2cc065..89d432cc6 100644 --- a/iokit/conf/Makefile.x86_64 +++ b/iokit/conf/Makefile.x86_64 @@ -8,7 +8,8 @@ UNCONFIGURED_HIB_FILES= \ HIB_FILES=$(filter $(UNCONFIGURED_HIB_FILES),$(OBJS)) -IOHibernateRestoreKernel.o_CFLAGS_ADD += -fno-stack-protector +# Unconfigured __HIB files must be Mach-O for "setsegname" +IOHibernateRestoreKernel.o_CFLAGS_ADD += -fno-stack-protector $(CFLAGS_NOLTO_FLAG) ###################################################################### #END Machine dependent Makefile fragment for x86_64 diff --git a/iokit/conf/files.i386 b/iokit/conf/files.i386 deleted file mode 100644 index 5f337b90a..000000000 --- a/iokit/conf/files.i386 +++ /dev/null @@ -1,15 +0,0 @@ - -# libIOKit - -iokit/Kernel/IOSyncer.cpp optional iokitcpp - -# Shared lock - -iokit/Kernel/i386/IOSharedLock.s standard -iokit/Kernel/i386/IOAsmSupport.s standard - -# Power Domains -iokit/Kernel/IOPMrootDomain.cpp optional iokitcpp - -# Key Store helper -iokit/Kernel/i386/IOKeyStoreHelper.cpp standard diff --git a/kgmacros b/kgmacros deleted file mode 100644 index 6a12bb11c..000000000 --- a/kgmacros +++ /dev/null @@ -1,13670 +0,0 @@ - -# Kernel gdb macros -# -# These gdb macros should be useful during kernel development in -# determining what's going on in the kernel. -# -# All the convenience variables used by these macros begin with $kgm_ - -set print asm-demangle on -set cp-abi gnu-v2 - -# This option tells gdb to relax its stack tracing heuristics -# Useful for debugging across stack switches -# (to the interrupt stack, for instance). Requires gdb-675 or greater. -set backtrace sanity-checks off - -echo Loading Kernel GDB Macros package. Type "help kgm" for more info.\n - -define kgm -printf "" -echo These are the gdb macros for kernel debugging. Type "help kgm" for more info.\n -end - -document kgm -| These are the kernel gdb macros. These gdb macros are intended to be -| used when debugging a remote kernel via the kdp protocol. Typically, you -| would connect to your remote target like so: -| (gdb) target remote-kdp -| (gdb) attach -| -| The following macros are available in this package: -| showversion Displays a string describing the remote kernel version -| -| showalltasks Display a summary listing of all tasks -| showallthreads Display info about all threads in the system -| showallstacks Display the stack for each thread in the system -| showcurrentthreads Display info about the thread running on each cpu -| showcurrentstacks Display the stack for the thread running on each cpu -| showallvm Display a summary listing of all the vm maps -| showallvme Display a summary listing of all the vm map entries -| showallipc Display a summary listing of all the ipc spaces -| showipcsummary Display a summary listing of the ipc spaces of all tasks -| showallrights Display a summary listing of all the ipc rights -| showallkexts Display a summary listing of all loaded kexts (alias: showallkmods) -| showallknownkexts Display a summary listing of all kexts, loaded or not -| showallbusyports Display a listing of all ports with unread messages -| showallprocessors Display a listing of all psets and processors -| -| showallclasses Display info about all OSObject subclasses in the system -| showobject Show info about an OSObject - its vtable ptr and retain count, & more info for simple container classes. -| showregistry Show info about all registry entries in the current plane -| showregistryprops Show info about all registry entries in the current plane, and their properties -| showregistryentry Show info about a registry entry; its properties and descendants in the current plane -| setregistryplane Set the plane to be used for the iokit registry macros (pass zero for list) -| -| setfindregistrystr Set the encoded string for matching with -| findregistryentry or findregistryprop (created from -| strcmp_arg_pack64) -| findregistryentry Find a registry entry that matches the encoded string -| findregistryentries Find all the registry entries that match the encoded string -| findregistryprop Search the registry entry for a property that matches -| the encoded string -| -| showtask Display info about the specified task -| showtaskthreads Display info about the threads in the task -| showtaskstacks Display the stack for each thread in the task -| showtaskvm Display info about the specified task's vm_map -| showtaskvme Display info about the task's vm_map entries -| showtaskipc Display info about the specified task's ipc space -| showtaskrights Display info about the task's ipc space entries -| showtaskrightsbt Display info about the task's ipc space entries with back traces -| showtaskbusyports Display all of the task's ports with unread messages -| -| showact Display info about a thread specified by activation -| showactstack Display the stack for a thread specified by activation -| -| showmap Display info about the specified vm_map -| showmapvme Display a summary list of the specified vm_map's entries -| -| showipc Display info about the specified ipc space -| showrights Display a summary list of all the rights in an ipc space -| -| showpid Display info about the process identified by pid -| showproc Display info about the process identified by proc struct -| showprocinfo Display detailed info about the process identified by proc struct -| showprocfiles Given a proc_t pointer, display the list of open file descriptors -| showproclocks Given a proc_t pointer, display the list of advisory file locks -| zombproc Print out all procs in the zombie list -| showproctree Show all the processes in a hierarchical tree form -| allproc Print out all process in the system not in the zombie list -| zombstacks Print out all stacks of tasks that are exiting -| -| showinitchild Print out all processes in the system which are children of init process -| -| showkext Display info about a kext (alias: showkmod) -| showkextaddr Given an address, display the kext and offset (alias: showkmodaddr) -| -| dumpcallqueue Dump out all the entries given a queue head -| -| showallmtx Display info about mutexes usage -| showallrwlck Display info about reader/writer locks usage -| -| zprint Display info about the memory zones -| showioalloc Display info about iokit allocations -| paniclog Display the panic log info -| -| switchtoact Switch to different context specified by activation -| switchtoctx Switch to different context -| showuserstack Display numeric backtrace of the user stack for an -| activation -| showtaskuserstacks Display user stacks for a specified task -| showuserregisters Display user registers for the specified thread -| showtaskuserregisters Display user registers for the specified task -| -| switchtouserthread Switch to the user context of the specified thread -| resetstacks Return to the original kernel context -| -| resetctx Reset context -| resume_on Resume when detaching from gdb -| resume_off Don't resume when detaching from gdb -| -| sendcore Configure kernel to send a coredump to the specified IP -| sendsyslog Configure kernel to send a system log to the specified IP -| sendpaniclog Configure kernel to send a panic log to the specified IP -| disablecore Configure the kernel to disable coredump transmission -| getdumpinfo Retrieve the current remote dump parameters -| setdumpinfo Configure the remote dump parameters -| -| switchtocorethread Corefile version of "switchtoact" -| resetcorectx Corefile version of "resetctx" -| -| readphys8 Reads the specified untranslated address (8-bit read) -| readphys16 Reads the specified untranslated address (16-bit read) -| readphys32 Reads the specified untranslated address (32-bit read) -| readphys64 Reads the specified untranslated address (64-bit read) -| writephys8 Writes to the specified untranslated address (8-bit write) -| writephys16 Writes to the specified untranslated address (16-bit write) -| writephys32 Writes to the specified untranslated address (32-bit write) -| writephys64 Writes to the specified untranslated address (64-bit write) -| -| readioport8 Read 8-bits from the specified I/O Port -| readioport16 Read 16-bits from the specified I/O Port -| readioport32 Read 32-bits from the specified I/O Port -| writeioport8 Write 8-bits into the specified I/O Port -| writeioport16 Write 16-bits into the specified I/O Port -| writeioport32 Write 32-bits into the specified I/O Port -| -| readmsr64 Read 64-bits from the specified MSR -| writemsr64 Write 64-bits into the specified MSR -| -| rtentry_showdbg Print the debug information of a route entry -| rtentry_trash Walk the list of trash route entries -| -| inifa_showdbg Print the debug information of an IPv4 interface address -| in6ifa_showdbg Print the debug information of an IPv6 interface address -| inm_showdbg Print the debug information of an IPv4 multicast address -| ifma_showdbg Print the debug information of a link multicast address -| ifpref_showdbg Print the debug information of an interface ref count -| -| ndpr_showdbg Print the debug information of a nd_prefix structure -| nddr_showdbg Print the debug information of a nd_defrouter structure -| -| imo_showdbg Print the debug information of a ip_moptions structure -| im6o_showdbg Print the debug information of a ip6_moptions structure -| -| inifa_trash Walk the list of trash in_ifaddr entries -| in6ifa_trash Walk the list of trash in6_ifaddr entries -| inm_trash Walk the list of trash in_multi entries -| in6m_trash Walk the list of trash in6_multi entries -| ifma_trash Walk the list of trash ifmultiaddr entries -| -| mbuf_walkpkt Walk the mbuf packet chain (m_nextpkt) -| mbuf_walk Walk the mbuf chain (m_next) -| mbuf_buf2slab Find the slab structure of the corresponding buffer -| mbuf_buf2mca Find the mcache audit structure of the corresponding mbuf -| mbuf_showmca Print the contents of an mbuf mcache audit structure -| mbuf_showactive Print all active/in-use mbuf objects -| mbuf_showinactive Print all freed/in-cache mbuf objects -| mbuf_showall Print all mbuf objects -| mbuf_slabs Print all slabs in the group -| mbuf_slabstbl Print slabs table -| mbuf_stat Print extended mbuf allocator statistics -| mbuf_countchain Count the length of an mbuf chain -| mbuf_topleak Print the top suspected mbuf leakers -| mbuf_traceleak Print the leak information for a given leak address -| -| mcache_walkobj Walk the mcache object chain (obj_next) -| mcache_stat Print all mcaches in the system -| mcache_showcache Display the number of objects in the cache -| -| showbootargs Display boot arguments passed to the target kernel -| showbootermemorymap Dump phys memory map from EFI -| -| systemlog Display the kernel's printf ring buffer -| -| hexdump Show the contents of memory as a hex/ASCII dump -| -| showvnodepath Print the path for a vnode -| showvnodelocks Display list of advisory locks held/blocked on a vnode -| showvnodedev Display information about a device vnode -| showtty Display information about a struct tty -| showallvols Display a summary of mounted volumes -| showvnode Display info about one vnode -| showvolvnodes Display info about all vnodes of a given volume -| showvolbusyvnodes Display info about busy (iocount!=0) vnodes of a given volume -| showallbusyvnodes Display info about all busy (iocount!=0) vnodes -| showallvnodes Display info about all vnodes -| print_vnode Print out the fields of a vnode struct -| showprocvnodes Print out all the open fds which are vnodes in a process -| showallprocvnodes Print out all the open fds which are vnodes in any process -| showmountvnodes Print the vnode list -| showmountallvnodes Print the vnode inactive list -| showworkqvnodes Print the vnode worker list -| shownewvnodes Print the new vnode list -| -| ifconfig display ifconfig-like output -| showifnets show the list of attached and detached interfaces -| showifaddrs show the list of addresses for the given ifp -| showifmultiaddrs show the list of multicast addresses for the given ifp -| showinmultiaddrs show the list of IPv4 multicast addresses records -| showin6multiaddrs show the list of IPv6 multicast addresses records -| -| showsocket Display information about a socket -| showprocsockets Given a proc_t pointer, display information about its sockets -| showallprocsockets Display information about the sockets of all the processes -| -| show_tcp_pcbinfo Display the list of the TCP protocol control blocks -| show_tcp_timewaitslots Display the list of the TCP protocol control blocks in TIMEWAIT -| show_udp_pcbinfo Display the list of UDP protocol control blocks -| -| show_rt_inet Display the IPv4 routing table -| show_rt_inet6 Display the IPv6 routing table -| -| showpmworkqueue Display the IOPMWorkQueue object -| showregistrypmstate Display power management state for all IOPower registry entries -| showioservicepm Display the IOServicePM object -| showstacksaftertask showallstacks starting after a given task -| showstacksafterthread showallstacks starting after a given thread -| -| showMCAstate Print machine-check register state after MC exception. -| -| showallgdbstacks Cause GDB to trace all thread stacks -| showallgdbcorestacks Corefile equivalent of "showallgdbstacks" -| kdp-reenter Schedule reentry into the debugger and continue. -| kdp-reboot Restart remote target -| kdp-version Get KDP version number -| -| zstack Print zalloc caller stack (zone leak debugging) -| findoldest Find oldest zone leak debugging record -| countpcs Print how often a pc occurs in the zone leak log -| -| showtopztrace Print the ztrace with the most outstanding allocated memory -| showztrace Print a backtrace record given its index -| showzalloc Print an allocation record + stacktrace at index -| showztraceaddr Print a backtrace record given its address -| showztracesabove Print all the backtrace records with a size bigger than X -| showzstacktrace Symbolicate and print a stored OSBacktrace -| -| showztraces Finds all in-use traces in the ztraces table -| showzallocs Finds all in-use allocations in the zallocs table -| showzstats Shows the statistics gathered about the hash tables -| -| showzallocsfortrace Print all the allocations that refer to a trace -| showztracehistogram Prints a histogram of the ztraces table -| showzallochistogram Prints a histogram of the zallocs table -| -| pmap_walk Perform a page-table walk -| pmap_vtop Translate a virtual address to physical address -| -| showuserdyldinfo Show dyld information and error messages -| in the target task -| showuserlibraries Show binary images known by dyld in the -| target task -| showallvmstats Prints a summary of vm statistics in a table format -| memstats Displays memory statistics in a table format -| -| showthreadfortid Displays the address of the thread structure -| for a given thread_id value. -| -| strcmp_nomalloc A version of strcmp that avoids the use of malloc -| through the use of encoded strings created via -| strcmp_arg_pack64. -| strcmp_arg_pack64 Pack a string into a 64-bit quantity for use by -| strcmp_nomalloc -| -| pci_cfg_read8 Read 8-bits from a PCI config space register -| pci_cfg_read16 Read 16-bits from a PCI config space register -| pci_cfg_read32 Read 32-bits from a PCI config space register -| pci_cfg_write8 Write 8-bits into a PCI config space register -| pci_cfg_write16 Write 16-bits into a PCI config space register -| pci_cfg_write32 Write 32-bits into a PCI config space register -| pci_cfg_dump Dump entire config space for a PCI device -| pci_cfg_scan Perform a scan for PCI devices -| pci_cfg_dump_all Dump config spaces for all detected PCI devices -| -| lapic_read32 Read APIC entry -| lapic_write32 Write APIC entry -| lapic_dump Dump APIC entries -| -| ioapic_read32 Read IOAPIC entry -| ioapic_write32 Write IOAPIC entry -| ioapic_dump Dump IOAPIC entries -| -| showallproviders Display summary listing of all dtrace_providers -| showallmodctls Display summary listing of all dtrace modctls -| showmodctl Display info about a dtrace modctl -| showfbtprobe Display info about an fbt probe given an id (traverses fbt_probetab) -| processortimers Display all processor timers, noting any inconsistencies -| -| maplocalcache Enable local caching in GDB for improved debug speeds -| flushlocalcahe Disable local caching in GDB (deletes all memory regions) -| -| Type "help " for more specific help on a particular macro. -| Type "show user " to see what the macro is really doing. -end - -# This macro should appear before any symbol references, to facilitate -# a gdb "source" without a loaded symbol file. -define showversion - kdp-kernelversion -end - -document showversion -Syntax: showversion -| Read the kernel version string from a fixed address in low -| memory. Useful if you don't know which kernel is on the other end, -| and need to find the appropriate symbols. Beware that if you've -| loaded a symbol file, but aren't connected to a remote target, -| the version string from the symbol file will be displayed instead. -| This macro expects to be connected to the remote kernel to function -| correctly. -end - -set $kgm_mtype_ppc = 0x00000012 -set $kgm_mtype_arm = 0x0000000C - -set $kgm_mtype_i386 = 0x00000007 -set $kgm_mtype_x86_64 = 0x01000007 -set $kgm_mtype_x86_any = $kgm_mtype_i386 -set $kgm_mtype_x86_mask = 0xFEFFFFFF - -set $kgm_mtype = ((unsigned int *)&_mh_execute_header)[1] -set $kgm_lp64 = $kgm_mtype & 0x01000000 - -set $kgm_manual_pkt_ppc = 0x549C -set $kgm_manual_pkt_i386 = 0x249C -set $kgm_manual_pkt_x86_64 = 0xFFFFFF8000002930 -set $kgm_manual_pkt_arm = 0xFFFF04A0 - -set $kgm_kdp_pkt_data_len = 128 - -# part of data packet -set $kgm_kdp_pkt_hdr_req_off = 0 -set $kgm_kdp_pkt_hdr_seq_off = 1 -set $kgm_kdp_pkt_hdr_len_off = 2 -set $kgm_kdp_pkt_hdr_key_off = 4 - -# after data packet -set $kgm_kdp_pkt_len_off = $kgm_kdp_pkt_data_len -set $kgm_kdp_pkt_input_off = $kgm_kdp_pkt_data_len + 4 - -set $kgm_kdp_pkt_hostreboot = 0x13 -set $kgm_kdp_pkt_hdr_size = 8 - - -set $kgm_readphys_force_kdp = 0 -set $kgm_readphys_force_physmap = 0 - -set $kgm_lcpu_self = 0xFFFE - -set $kgm_reg_depth = 0 -set $kgm_reg_depth_max = 0xFFFF -set $kgm_reg_plane = (IORegistryPlane *) gIOServicePlane -set $kgm_namekey = (OSSymbol *) 0 -set $kgm_childkey = (OSSymbol *) 0 - -set $kgm_show_object_addrs = 0 -set $kgm_show_object_retain = 0 -set $kgm_show_props = 0 -set $kgm_show_data_alwaysbytes = 0 - -set $kgm_show_kmod_syms = 0 - -# send a manual packet header that doesn't require knowing the location -# of everything. -define manualhdrint - set $req = $arg0 - - set $hdrp = (uint32_t *) $kgm_manual_pkt_i386 - if ($kgm_mtype == $kgm_mtype_ppc) - set $hdrp = (uint32_t *) $kgm_manual_pkt_ppc - set $req = $req << 1 # shift to deal with endiannness - end - if ($kgm_mtype == $kgm_mtype_x86_64) - set $hdrp = (uint64_t *) $kgm_manual_pkt_x86_64 - end - if ($kgm_mtype == $kgm_mtype_arm) - set $hdrp = (uint32_t *) $kgm_manual_pkt_arm - end - - set $pkt_hdr = *$hdrp - set *((uint8_t *) ($pkt_hdr + $kgm_kdp_pkt_input_off)) = 0 - set *((uint32_t *) ($pkt_hdr + $kgm_kdp_pkt_len_off)) = $kgm_kdp_pkt_hdr_size - - set *((uint8_t *) ($pkt_hdr + $kgm_kdp_pkt_hdr_req_off)) = $req - set *((uint8_t *) ($pkt_hdr + $kgm_kdp_pkt_hdr_seq_off)) = 0 - set *((uint16_t *) ($pkt_hdr + $kgm_kdp_pkt_hdr_len_off)) = $kgm_kdp_pkt_hdr_size - set *((uint32_t *) ($pkt_hdr + $kgm_kdp_pkt_hdr_key_off)) = 0 - set *((uint8_t *) ($pkt_hdr + $kgm_kdp_pkt_input_off)) = 1 - - # dummy to make sure manual packet is executed - set $kgm_dummy = &_mh_execute_header -end - -# Print a pointer -define showptr - if $kgm_lp64 - printf "0x%016llx", $arg0 - else - printf "0x%08x", $arg0 - end -end - -# for headers, leave 8 chars for LP64 pointers -define showptrhdrpad - if $kgm_lp64 - printf " " - end -end - -# Print a userspace pointer, using $kgm_tasp -define showuserptr - set $kgm_userptr_task_64 = ( $kgm_taskp->taskFeatures[0] & 0x80000000) - if $kgm_userptr_task_64 - printf "0x%016llx", $arg0 - else - printf "0x%08x", $arg0 - end -end - -define showkmodheader - printf "kmod_info " - showptrhdrpad - printf " address " - showptrhdrpad - printf " size " - showptrhdrpad - printf " id refs version name\n" -end - -define showkmodint - set $kgm_kmodp = (struct kmod_info *)$arg0 - showptr $kgm_kmodp - printf " " - showptr $kgm_kmodp->address - printf " " - showptr $kgm_kmodp->size - printf " " - printf "%3d ", $kgm_kmodp->id - printf "%5d ", $kgm_kmodp->reference_count - printf "%10s ", $kgm_kmodp->version - printf "%s\n", $kgm_kmodp->name -end - -# cached info of the last kext found, to speed up subsequent lookups -set $kgm_pkmod = 0 -set $kgm_pkmodst = 0 -set $kgm_pkmoden = 0 - -define showkmodaddrint - showptr $arg0 - if ((unsigned long)$arg0 >= (unsigned long)$kgm_pkmodst) && ((unsigned long)$arg0 < (unsigned long)$kgm_pkmoden) - set $kgm_off = ((unsigned long)$arg0 - (unsigned long)$kgm_pkmodst) - printf " <%s + 0x%x>", $kgm_pkmod->name, $kgm_off - else - set $kgm_kmodp = (struct kmod_info *)kmod - if ($kgm_mtype == $kgm_mtype_x86_64) && ($arg0 >= (unsigned long)&_mh_execute_header) - # kexts are loaded below the kernel for x86_64 - set $kgm_kmodp = 0 - end - while $kgm_kmodp - set $kgm_off = ((unsigned long)$arg0 - (unsigned long)$kgm_kmodp->address) - if ($kgm_kmodp->address <= $arg0) && ($kgm_off < $kgm_kmodp->size) - printf " <%s + 0x%x>", $kgm_kmodp->name, $kgm_off - set $kgm_pkmod = $kgm_kmodp - set $kgm_pkmodst = $kgm_kmodp->address - set $kgm_pkmoden = $kgm_pkmodst + $kgm_kmodp->size - set $kgm_kmodp = 0 - else - set $kgm_kmodp = $kgm_kmodp->next - end - end - end -end - -define showkmodaddr - showkmodaddrint $arg0 -end -document showkmodaddr -Syntax: (gdb) showkmodaddr -| Given an address, print the offset and name for the kmod containing it -end - -define showkmod - showkmodheader - showkmodint $arg0 -end -document showkmod -Syntax: (gdb) showkmod -| Routine to print info about a kext -end - -define showkext - showkmod $arg0 -end -document showkext -Syntax: (gdb) showkext -| Routine to print info about a kext -end - -define showallkmods - showkmodheader - set $kgm_kmodp = (struct kmod_info *)kmod - while $kgm_kmodp - showkmodint $kgm_kmodp - set $kgm_kmodp = $kgm_kmodp->next - end -end -document showallkmods -Syntax: (gdb) showallkmods -| Routine to print a summary listing of all loaded kexts -end - -define showallkexts - showallkmods -end -document showallkexts -Syntax: (gdb) showallkexts -| Routine to print a summary listing of all loaded kexts -end - -# See OSKextVersion.c for the C code this is based on -# -set $KGM_OSKEXT_VERS_MAJ_MULT = 100000000 -set $KGM_OSKEXT_VERS_MIN_MULT = 1000000 -set $KGM_OSKEXT_VERS_REV_MULT = 10000 -set $KGM_OSKEXT_VERS_STAGE_MULT = 1000 - -define printoskextversion - set $vers_scratch = $arg0 - - if ($vers_scratch == -1) - printf "(invalid)" - else - - set $vers_major = $vers_scratch / $KGM_OSKEXT_VERS_MAJ_MULT - - set $vers_scratch = $vers_scratch - ($vers_major * $KGM_OSKEXT_VERS_MAJ_MULT) - set $vers_minor = $vers_scratch / $KGM_OSKEXT_VERS_MIN_MULT - - set $vers_scratch = $vers_scratch - ( $vers_minor * $KGM_OSKEXT_VERS_MIN_MULT) - set $vers_revision = $vers_scratch / $KGM_OSKEXT_VERS_REV_MULT - - set $vers_scratch = $vers_scratch - ( $vers_revision * $KGM_OSKEXT_VERS_REV_MULT) - set $vers_stage = $vers_scratch / $KGM_OSKEXT_VERS_STAGE_MULT - - set $vers_scratch = $vers_scratch - ( $vers_stage * $KGM_OSKEXT_VERS_STAGE_MULT) - set $vers_stagelevel = $vers_scratch - - printf "%d.%d", $vers_major, $vers_minor - if ($vers_revision > 0) - printf ".%d", $vers_revision - end - - if ($vers_stage == 1) - printf "d" - end - if ($vers_stage == 3) - printf "a" - end - if ($vers_stage == 5) - printf "b" - end - if ($vers_stage == 7) - printf "fc" - end - if ($vers_stage == 1 || $vers_stage == 3 || $vers_stage == 5 || $vers_stage == 7) - printf "%d", $vers_stagelevel - end - end -end - -define showallknownkexts - set $kext_count = sKextsByID->count - set $kext_index = 0 - printf "%d kexts in sKextsByID:\n", $kext_count - - printf "OSKext * " - showptrhdrpad - printf "load_addr " - showptrhdrpad - - printf " id name (version)\n" - - while $kext_index < $kext_count - set $kext_id = sKextsByID->dictionary[$kext_index].key->string - set $oskext = (OSKext *)sKextsByID->dictionary[$kext_index].value - - showptr $oskext - printf " " - - if ($oskext->flags.loaded) - showptr $oskext->kmod_info - printf " " - printf "%3d", $oskext->loadTag - else - showptrhdrpad - printf " -------- " - printf " " - printf " --" - end - printf " " - - printf "%.64s (", $kext_id - printoskextversion (uint64_t)$oskext->version - printf ")\n" - set $kext_index = $kext_index + 1 - end -end -document showallknownkexts -Syntax: (gdb) showallknownkexts -| Routine to print a summary listing of all kexts, loaded or not -end - -define showactheader - printf " " - showptrhdrpad - printf " thread " - showptrhdrpad - printf " thread_id " - showptrhdrpad - printf " processor " - showptrhdrpad - printf " pri io_policy state wait_queue" - showptrhdrpad - printf " wait_event\n" -end - - -define showactint - printf " " - showptrhdrpad - set $kgm_thread = *(struct thread *)$arg0 - showptr $arg0 - if ($kgm_thread.static_param) - printf "[WQ]" - else - printf " " - end - printf " 0x%llx ", $kgm_thread.thread_id - showptr $kgm_thread.last_processor - printf " %3d ", $kgm_thread.sched_pri - if ($kgm_thread.uthread != 0) - set $kgm_printed = 0 - set $kgm_uthread = (struct uthread *)$kgm_thread.uthread - if ($kgm_uthread->uu_flag & 0x400) - printf "RAGE " - else - printf " " - end - set $diskpolicy = 0 - if ($kgm_thread->ext_appliedstate.hw_disk != 0) - set $diskpolicy = $kgm_thread->ext_appliedstate.hw_disk - else - if ($kgm_thread->appliedstate.hw_disk != 0) - set $diskpolicy = $kgm_thread->appliedstate.hw_disk - end - end - if ($kgm_thread->ext_appliedstate.hw_bg != 0) - set $diskpolicy = 5 - end - if ($kgm_thread->appliedstate.hw_bg != 0) - set $diskpolicy = 4 - end - if ($diskpolicy == 2) - printf "PASS " - set $kgm_printed = 1 - end - if ($diskpolicy == 3) - printf "THROT " - set $kgm_printed = 1 - end - if ($diskpolicy == 4) - printf "BG_THRT " - set $kgm_printed = 1 - end - if ($diskpolicy == 5) - printf "EBG_THRT" - set $kgm_printed = 1 - end - if ($kgm_printed == 0) - printf " " - end - end - set $kgm_state = $kgm_thread.state - if $kgm_state & 0x80 - printf "I" - end - if $kgm_state & 0x40 - printf "P" - end - if $kgm_state & 0x20 - printf "A" - end - if $kgm_state & 0x10 - printf "H" - end - if $kgm_state & 0x08 - printf "U" - end - if $kgm_state & 0x04 - printf "R" - end - if $kgm_state & 0x02 - printf "S" - end - if $kgm_state & 0x01 - printf "W" - printf "\t " - showptr $kgm_thread.wait_queue - printf " " - if (((unsigned long)$kgm_thread.wait_event > (unsigned long)&last_kernel_symbol) \ - && ($arg1 != 2) && ($kgm_show_kmod_syms == 0)) - showkmodaddr $kgm_thread.wait_event - else - output /a $kgm_thread.wait_event - end - if ($kgm_thread.uthread != 0) - set $kgm_uthread = (struct uthread *)$kgm_thread.uthread - if ($kgm_uthread->uu_wmesg != 0) - printf "\t \"%s\"", $kgm_uthread->uu_wmesg - end - end - end - if ($kgm_thread.uthread != 0) - set $kgm_uthread = (struct uthread *)$kgm_thread.uthread - if ($kgm_uthread->pth_name && $kgm_uthread->pth_name[0]) - printf "\n\t\tThread Name: %s", $kgm_uthread->pth_name - end - end - if $arg1 != 0 - if ($kgm_thread.kernel_stack != 0) - if ($kgm_thread.uthread != 0) - printf "\n " - set $kgm_uthread = (struct uthread *)$kgm_thread.uthread - if ($kgm_uthread->uu_kwe.kwe_kwqqueue != 0) - set $kwq = (ksyn_wait_queue_t)$kgm_uthread->uu_kwe.kwe_kwqqueue - printf " kwq_lockcount:0x%x; kwq_retval:0x%x", $kgm_uthread->uu_kwe.kwe_lockseq, $kgm_uthread->uu_kwe.kwe_psynchretval - printf "\n " - show_kwq $kwq - printf " " - end - end - if ($kgm_thread.reserved_stack != 0) - printf "\n " - showptrhdrpad - printf " reserved_stack=" - showptr $kgm_thread.reserved_stack - end - printf "\n " - showptrhdrpad - printf " kernel_stack=" - showptr $kgm_thread.kernel_stack - if ($kgm_mtype == $kgm_mtype_ppc) - set $mysp = $kgm_thread.machine.pcb->save_r1 - end - if (($kgm_mtype & $kgm_mtype_x86_mask) == $kgm_mtype_x86_any) - set $kgm_statep = (struct x86_kernel_state *) \ - ($kgm_thread->kernel_stack + kernel_stack_size \ - - sizeof(struct x86_kernel_state)) - if ($kgm_mtype == $kgm_mtype_i386) - set $mysp = $kgm_statep->k_ebp - else - set $mysp = $kgm_statep->k_rbp - end - end - if ($kgm_mtype == $kgm_mtype_arm) - if (((unsigned long)$r7 < ((unsigned long) ($kgm_thread->kernel_stack+kernel_stack_size))) \ - && ((unsigned long)$r7 > (unsigned long) ($kgm_thread->kernel_stack))) - set $mysp = $r7 - else - set $kgm_statep = (struct arm_saved_state *)$kgm_thread.machine.kstackptr - set $mysp = $kgm_statep->r[7] - end - end - set $prevsp = $mysp - 16 - printf "\n " - showptrhdrpad - printf " stacktop=" - showptr $mysp - if ($kgm_mtype == $kgm_mtype_ppc) - set $stkmask = 0xf - else - set $stkmask = 0x3 - end - set $kgm_return = 0 - set $kgm_actint_framecount = 0 - while ($mysp != 0) && (($mysp & $stkmask) == 0) \ - && ($mysp != $prevsp) \ - && ((((unsigned long) $mysp - (unsigned long) $prevsp) < 0x4000) \ - || (((unsigned long)$mysp < ((unsigned long) ($kgm_thread->kernel_stack+kernel_stack_size))) \ - && ((unsigned long)$mysp > (unsigned long) ($kgm_thread->kernel_stack)))) \ - && ($kgm_actint_framecount < 128) - printf "\n " - set $kgm_actint_framecount = $kgm_actint_framecount + 1 - showptrhdrpad - printf " " - showptr $mysp - printf " " - if ($kgm_mtype == $kgm_mtype_ppc) - set $kgm_return = *($mysp + 8) - end - if ($kgm_mtype == $kgm_mtype_i386) - set $kgm_return = *($mysp + 4) - end - if ($kgm_mtype == $kgm_mtype_x86_64) - set $kgm_return = *(unsigned long *)($mysp + 8) - end - if ($kgm_mtype == $kgm_mtype_arm) - set $kgm_return = *($mysp + 4) - end - if (((unsigned long) $kgm_return < (unsigned long) &_mh_execute_header || \ - (unsigned long) $kgm_return >= (unsigned long) &last_kernel_symbol ) \ - && ($kgm_show_kmod_syms == 0)) - showkmodaddr $kgm_return - else - output /a $kgm_return - end - set $prevsp = $mysp - set $mysp = *(unsigned long *)$mysp - end - set $kgm_return = 0 - printf "\n " - showptrhdrpad - printf " stackbottom=" - showptr $prevsp - else - printf "\n " - showptrhdrpad - printf " continuation=" - output /a $kgm_thread.continuation - end - printf "\n" - else - printf "\n" - end -end - -define showact - showactheader - showactint $arg0 0 -end -document showact -Syntax: (gdb) showact -| Routine to print out the state of a specific thread. -end - - -define showactstack - showactheader - showactint $arg0 1 -end -document showactstack -Syntax: (gdb) showactstack -| Routine to print out the stack of a specific thread. -end - - -define showallthreads - set $kgm_head_taskp = &tasks - set $kgm_taskp = (struct task *)($kgm_head_taskp->next) - while $kgm_taskp != $kgm_head_taskp - showtaskheader - showtaskint $kgm_taskp - showactheader - set $kgm_head_actp = &($kgm_taskp->threads) - set $kgm_actp = (struct thread *)($kgm_taskp->threads.next) - while $kgm_actp != $kgm_head_actp - showactint $kgm_actp 0 - set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) - end - printf "\n" - set $kgm_taskp = (struct task *)($kgm_taskp->tasks.next) - end -end -document showallthreads -Syntax: (gdb) showallthreads -| Routine to print out info about all threads in the system. -end - -define showprocessorint - set $kgm_processor_int = (struct processor *)$arg0 - printf "Processor " - showptr $kgm_processor_int - printf " State %d (cpu_id 0x%x)\n", ($kgm_processor_int)->state, ($kgm_processor_int)->cpu_id -end - -define showcurrentthreads - set $kgm_prp = (struct processor *)processor_list - while $kgm_prp != 0 - showprocessorint $kgm_prp - if ($kgm_prp)->active_thread != 0 - set $kgm_actp = ($kgm_prp)->active_thread - showtaskheader - showtaskint ($kgm_actp)->task - showactheader - showactint $kgm_actp 0 - printf "\n" - end - set $kgm_prp = ($kgm_prp)->processor_list - end -end -document showcurrentthreads -Syntax: (gdb) showcurrentthreads -| Routine to print out info about the thread running on each cpu. -end - - -define _showrunqint - set $kgm_runq = (struct run_queue *)$arg0 - - printf " Priority Run Queue Info: Count %d\n", $kgm_runq->count - set $kgm_runq_queue_i = 0 - set $kgm_runq_queue_count = sizeof($kgm_runq->queues)/sizeof($kgm_runq->queues[0]) - while $kgm_runq->count && $kgm_runq_queue_i < $kgm_runq_queue_count - set $kgm_runq_queue_head = &$kgm_runq->queues[$kgm_runq_queue_i] - set $kgm_runq_queue_p = $kgm_runq_queue_head->next - if $kgm_runq_queue_p != $kgm_runq_queue_head - set $kgm_runq_queue_this_count = 0 - while $kgm_runq_queue_p != $kgm_runq_queue_head - set $kgm_runq_queue_this_count = $kgm_runq_queue_this_count + 1 - showtask ((thread_t)$kgm_runq_queue_p)->task - showactstack $kgm_runq_queue_p - set $kgm_runq_queue_p = $kgm_runq_queue_p->next - end - printf " Queue Priority %3d [", $kgm_runq_queue_i - showptr $kgm_runq_queue_head - printf "] Count %d\n", $kgm_runq_queue_this_count - end - set $kgm_runq_queue_i = $kgm_runq_queue_i + 1 - end - -end - -define _showgrrrint - set $kgm_grrr_runq = $arg0 - - printf " GRRR Info: Count %d Weight %d Current Group ", $kgm_grrr_runq->count, $kgm_grrr_runq->weight - showptr $kgm_grrr_runq->current_group - printf "\n" - set $kgm_grrr_group_i = 0 - set $kgm_grrr_group_count = sizeof($kgm_grrr_runq->groups)/sizeof($kgm_grrr_runq->groups[0]) - while $kgm_grrr_runq->count && $kgm_grrr_group_i < $kgm_grrr_group_count - set $kgm_grrr_group = &$kgm_grrr_runq->groups[$kgm_grrr_group_i] - if $kgm_grrr_group->count > 0 - printf " Group %3d [", $kgm_grrr_group->index - showptr $kgm_grrr_group - printf "] Count %d Weight %d\n", $kgm_grrr_group->count, $kgm_grrr_group->weight - set $kgm_grrr_group_client_head = &$kgm_grrr_group->clients - set $kgm_grrr_group_client = $kgm_grrr_group_client_head->next - while $kgm_grrr_group_client != $kgm_grrr_group_client_head - # showtask ((thread_t)$kgm_grrr_group_client)->task - # showactstack $kgm_grrr_group_client - set $kgm_grrr_group_client = $kgm_grrr_group_client->next - end - end - set $kgm_grrr_group_i = $kgm_grrr_group_i + 1 - end -end - -define showallprocessors - set $kgm_pset = &pset0 - - set $kgm_show_grrr = 0 - set $kgm_show_priority_runq = 0 - set $kgm_show_priority_pset_runq = 0 - set $kgm_show_fairshare_grrr = 0 - set $kgm_show_fairshare_list = 0 - - if _sched_enum == 1 - set $kgm_show_priority_runq = 1 - set $kgm_show_fairshare_list = 1 - end - if _sched_enum == 2 - set $kgm_show_priority_pset_runq = 1 - set $kgm_show_fairshare_list = 1 - end - if _sched_enum == 4 - set $kgm_show_grrr = 1 - set $kgm_show_fairshare_grrr = 1 - end - if _sched_enum == 5 - set $kgm_show_priority_runq = 1 - set $kgm_show_fairshare_list = 1 - end - if _sched_enum == 6 - set $kgm_show_priority_pset_runq = 1 - set $kgm_show_fairshare_list = 1 - end - - while $kgm_pset != 0 - printf "Processor Set " - showptr $kgm_pset - printf " Count %d (cpu_id 0x%x-0x%x)\n", ($kgm_pset)->cpu_set_count, ($kgm_pset)->cpu_set_low, ($kgm_pset)->cpu_set_hi - printf " Active Processors:\n" - set $kgm_active_queue_head = &($kgm_pset)->active_queue - set $kgm_active_elt = $kgm_active_queue_head->next - while $kgm_active_elt != $kgm_active_queue_head - set $kgm_processor = (processor_t)$kgm_active_elt - printf " " - showprocessorint $kgm_processor - - if $kgm_show_priority_runq - set $kgm_runq = &$kgm_processor->runq - _showrunqint $kgm_runq - end - if $kgm_show_grrr - set $kgm_grrr_runq = &$kgm_processor->grrr_runq - _showgrrrint $kgm_grrr_runq - end - - if $kgm_processor->processor_meta != 0 && $kgm_processor->processor_meta->primary == $kgm_processor - set $kgm_processor_meta_idle_head = &$kgm_processor->processor_meta->idle_queue - set $kgm_processor_meta_idle = $kgm_processor_meta_idle_head->next - while $kgm_processor_meta_idle != $kgm_processor_meta_idle_head - printf " Idle Meta Processor: " - showprocessorint $kgm_processor_meta_idle - set $kgm_processor_meta_idle = $kgm_processor_meta_idle->next - end - end - - set $kgm_active_elt = $kgm_active_elt->next - end - printf " Idle Processors:\n" - set $kgm_idle_queue_head = &($kgm_pset)->idle_queue - set $kgm_idle_elt = $kgm_idle_queue_head->next - while $kgm_idle_elt != $kgm_idle_queue_head - set $kgm_processor = (processor_t)$kgm_idle_elt - printf " " - showprocessorint $kgm_processor - - if $kgm_processor->processor_meta != 0 && $kgm_processor->processor_meta->primary == $kgm_processor - set $kgm_processor_meta_idle_head = &$kgm_processor->processor_meta->idle_queue - set $kgm_processor_meta_idle = $kgm_processor_meta_idle_head->next - while $kgm_processor_meta_idle != $kgm_processor_meta_idle_head - printf " Idle Meta Processor: " - showprocessorint $kgm_processor_meta_idle - set $kgm_processor_meta_idle = $kgm_processor_meta_idle->next - end - end - - set $kgm_idle_elt = $kgm_idle_elt->next - end - - if $kgm_show_priority_pset_runq - set $kgm_runq = &$kgm_pset->pset_runq - printf "\n" - _showrunqint $kgm_runq - end - set $kgm_pset = ($kgm_pset)->pset_list - end - - printf "\n" - printf "Realtime Queue Count %d\n", rt_runq.count - set $kgm_rt_runq_head = &rt_runq.queue - set $kgm_rt_runq = $kgm_rt_runq_head->next - while $kgm_rt_runq != $kgm_rt_runq_head - showtask ((thread_t)$kgm_rt_runq)->task - showact $kgm_rt_runq - set $kgm_rt_runq = $kgm_rt_runq->next - end - - printf "\n" - if $kgm_show_fairshare_list - printf "Fair Share Queue Count %d\n", fs_runq.count - set $kgm_fs_runq_head = &fs_runq.queue - set $kgm_fs_runq = $kgm_fs_runq_head->next - while $kgm_fs_runq != $kgm_fs_runq_head - showtask ((thread_t)$kgm_fs_runq)->task - showact $kgm_fs_runq - set $kgm_fs_runq = $kgm_fs_runq->next - end - end - if $kgm_show_fairshare_grrr - printf "Fair Share Queue Count %d\n", fs_grrr_runq.count - set $kgm_fs_grrr = &fs_grrr_runq - _showgrrrint $kgm_fs_grrr - end -end -document showallprocessors -Syntax: (gdb) showallprocessors -| Routine to print out info about all psets and processors -end - -set $decode_wait_events = 0 -define showallstacks - set $kgm_head_taskp = &tasks - set $kgm_taskp = (struct task *)($kgm_head_taskp->next) - while $kgm_taskp != $kgm_head_taskp - showtaskheader - showtaskint $kgm_taskp - set $kgm_head_actp = &($kgm_taskp->threads) - set $kgm_actp = (struct thread *)($kgm_taskp->threads.next) - while $kgm_actp != $kgm_head_actp - showactheader - if ($decode_wait_events > 0) - showactint $kgm_actp 1 - else - showactint $kgm_actp 2 - end - set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) - end - printf "\n" - set $kgm_taskp = (struct task *)($kgm_taskp->tasks.next) - end - - printf "\nZombie Processes:\n" - zombstacks -end - -document showallstacks -Syntax: (gdb) showallstacks -| Routine to print out the stack for each thread in the system. -| If the variable $decode_wait_events is non-zero, the routine attempts to -| interpret thread wait_events as kernel module offsets, which can add to -| processing time. -end - -define showcurrentstacks - set $kgm_prp = processor_list - while $kgm_prp != 0 - showprocessorint $kgm_prp - if ($kgm_prp)->active_thread != 0 - set $kgm_actp = ($kgm_prp)->active_thread - showtaskheader - showtaskint ($kgm_actp)->task - showactheader - showactint $kgm_actp 1 - printf "\n" - end - set $kgm_prp = ($kgm_prp)->processor_list - end -end - -document showcurrentstacks -Syntax: (gdb) showcurrentstacks -| Routine to print out the thread running on each cpu (incl. its stack) -end - -define showwaiterheader - printf "waiters thread " - printf "processor pri state wait_queue wait_event\n" -end - -define showwaitqwaiters - set $kgm_w_waitqp = (WaitQueue*)$arg0 - set $kgm_w_linksp = &($kgm_w_waitqp->wq_queue) - set $kgm_w_wqe = (WaitQueueElement *)$kgm_w_linksp->next - set $kgm_w_found = 0 - while ( (queue_entry_t)$kgm_w_wqe != (queue_entry_t)$kgm_w_linksp) - if ($kgm_w_wqe->wqe_type != &_wait_queue_link) - if !$kgm_w_found - set $kgm_w_found = 1 - showwaiterheader - end - set $kgm_w_shuttle = (struct thread *)$kgm_w_wqe - showactint $kgm_w_shuttle 0 - end - set $kgm_w_wqe = (WaitQueueElement *)$kgm_w_wqe->wqe_links.next - end -end - -define showwaitqwaitercount - set $kgm_wc_waitqp = (WaitQueue*)$arg0 - set $kgm_wc_linksp = &($kgm_wc_waitqp->wq_queue) - set $kgm_wc_wqe = (WaitQueueElement *)$kgm_wc_linksp->next - set $kgm_wc_count = 0 - while ( (queue_entry_t)$kgm_wc_wqe != (queue_entry_t)$kgm_wc_linksp) - if ($kgm_wc_wqe->wqe_type != &_wait_queue_link) - set $kgm_wc_count = $kgm_wc_count + 1 - end - set $kgm_wc_wqe = (WaitQueueElement *)$kgm_wc_wqe->wqe_links.next - end - printf "0x%08x ", $kgm_wc_count -end - -define showwaitqmembercount - set $kgm_mc_waitqsetp = (WaitQueueSet*)$arg0 - set $kgm_mc_setlinksp = &($kgm_mc_waitqsetp->wqs_setlinks) - set $kgm_mc_wql = (WaitQueueLink *)$kgm_mc_setlinksp->next - set $kgm_mc_count = 0 - while ( (queue_entry_t)$kgm_mc_wql != (queue_entry_t)$kgm_mc_setlinksp) - set $kgm_mc_count = $kgm_mc_count + 1 - set $kgm_mc_wql = (WaitQueueLink *)$kgm_mc_wql->wql_setlinks.next - end - printf "0x%08x ", $kgm_mc_count -end - - -define showwaitqmemberheader - printf "set-members wait_queue interlock " - printf "pol type member_cnt waiter_cnt\n" -end - -define showwaitqmemberint - set $kgm_m_waitqp = (WaitQueue*)$arg0 - printf " 0x%08x ", $kgm_m_waitqp - printf "0x%08x ", $kgm_m_waitqp->wq_interlock.lock_data - if ($kgm_m_waitqp->wq_fifo) - printf "Fifo " - else - printf "Prio " - end - if ($kgm_m_waitqp->wq_type == 0xf1d1) - printf "Set " - showwaitqmembercount $kgm_m_waitqp - else - printf "Que 0x00000000 " - end - showwaitqwaitercount $kgm_m_waitqp - printf "\n" -end - - -define showwaitqmemberofheader - printf "member-of wait_queue interlock " - printf "pol type member_cnt waiter_cnt\n" -end - -define showwaitqmemberof - set $kgm_mo_waitqp = (WaitQueue*)$arg0 - set $kgm_mo_linksp = &($kgm_mo_waitqp->wq_queue) - set $kgm_mo_wqe = (WaitQueueElement *)$kgm_mo_linksp->next - set $kgm_mo_found = 0 - while ( (queue_entry_t)$kgm_mo_wqe != (queue_entry_t)$kgm_mo_linksp) - if ($kgm_mo_wqe->wqe_type == &_wait_queue_link) - if !$kgm_mo_found - set $kgm_mo_found = 1 - showwaitqmemberofheader - end - set $kgm_mo_wqlp = (WaitQueueLink *)$kgm_mo_wqe - set $kgm_mo_wqsetp = (WaitQueue*)($kgm_mo_wqlp->wql_setqueue) - showwaitqmemberint $kgm_mo_wqsetp - end - set $kgm_mo_wqe = (WaitQueueElement *)$kgm_mo_wqe->wqe_links.next - end -end - -define showwaitqmembers - set $kgm_ms_waitqsetp = (WaitQueueSet*)$arg0 - set $kgm_ms_setlinksp = &($kgm_ms_waitqsetp->wqs_setlinks) - set $kgm_ms_wql = (WaitQueueLink *)$kgm_ms_setlinksp->next - set $kgm_ms_found = 0 - while ( (queue_entry_t)$kgm_ms_wql != (queue_entry_t)$kgm_ms_setlinksp) - set $kgm_ms_waitqp = $kgm_ms_wql->wql_element.wqe_queue - if !$kgm_ms_found - showwaitqmemberheader - set $kgm_ms_found = 1 - end - showwaitqmemberint $kgm_ms_waitqp - set $kgm_ms_wql = (WaitQueueLink *)$kgm_ms_wql->wql_setlinks.next - end -end - -define showwaitqheader - printf "wait_queue ref_count interlock " - printf "pol type member_cnt waiter_cnt\n" -end - -define showwaitqint - set $kgm_waitqp = (WaitQueue*)$arg0 - printf "0x%08x ", $kgm_waitqp - if ($kgm_waitqp->wq_type == 0xf1d1) - printf "0x%08x ", ((WaitQueueSet*)$kgm_waitqp)->wqs_refcount - else - printf "0x00000000 " - end - printf "0x%08x ", $kgm_waitqp->wq_interlock.lock_data - if ($kgm_waitqp->wq_fifo) - printf "Fifo " - else - printf "Prio " - end - if ($kgm_waitqp->wq_type == 0xf1d1) - printf "Set " - showwaitqmembercount $kgm_waitqp - else - printf "Que 0x00000000 " - end - showwaitqwaitercount $kgm_waitqp - printf "\n" -end - -define showwaitq - set $kgm_waitq1p = (WaitQueue*)$arg0 - showwaitqheader - showwaitqint $kgm_waitq1p - if ($kgm_waitq1p->wq_type == 0xf1d1) - showwaitqmembers $kgm_waitq1p - else - showwaitqmemberof $kgm_waitq1p - end - showwaitqwaiters $kgm_waitq1p -end - -define showmapheader - printf "vm_map " - showptrhdrpad - printf " pmap " - showptrhdrpad - printf " vm_size " - showptrhdrpad - printf " #ents rpage hint " - showptrhdrpad - printf " first_free\n" -end - -define showvmeheader - printf " entry " - showptrhdrpad - printf " start prot #page object " - showptrhdrpad - printf " offset\n" -end - -define showvmint - set $kgm_mapp = (vm_map_t)$arg0 - set $kgm_map = *$kgm_mapp - showptr $arg0 - printf " " - showptr $kgm_map.pmap - printf " " - showptr $kgm_map.size - printf " %3d ", $kgm_map.hdr.nentries - if $kgm_map.pmap - printf "%5d ", $kgm_map.pmap->stats.resident_count - else - printf " " - end - showptr $kgm_map.hint - printf " " - showptr $kgm_map.first_free - printf "\n" - if $arg1 != 0 - showvmeheader - set $kgm_head_vmep = &($kgm_mapp->hdr.links) - set $kgm_vmep = $kgm_map.hdr.links.next - while (($kgm_vmep != 0) && ($kgm_vmep != $kgm_head_vmep)) - set $kgm_vme = *$kgm_vmep - printf " " - showptr $kgm_vmep - printf " 0x%016llx ", $kgm_vme.links.start - printf "%1x", $kgm_vme.protection - printf "%1x", $kgm_vme.max_protection - if $kgm_vme.inheritance == 0x0 - printf "S" - end - if $kgm_vme.inheritance == 0x1 - printf "C" - end - if $kgm_vme.inheritance == 0x2 - printf "-" - end - if $kgm_vme.inheritance == 0x3 - printf "D" - end - if $kgm_vme.is_sub_map - printf "s " - else - if $kgm_vme.needs_copy - printf "n " - else - printf " " - end - end - printf "%6d ",($kgm_vme.links.end - $kgm_vme.links.start) >> 12 - showptr $kgm_vme.object.vm_object - printf " 0x%016llx\n", $kgm_vme.offset - set $kgm_vmep = $kgm_vme.links.next - end - end - printf "\n" -end - - -define showmapwiredp - set $kgm_mapp = (vm_map_t)$arg0 - set $kgm_map = *$kgm_mapp - set $kgm_head_vmep = &($kgm_mapp->hdr.links) - set $kgm_vmep = $kgm_map.hdr.links.next - set $kgm_objp_prev = (struct vm_object *)0 - if $arg1 == 0 - set $kgm_saw_kernel_obj = 0 - set $kgm_wired_count = 0 - set $kgm_objp_print_space = 1 - else - set $kgm_objp_print_space = 0 - end - while (($kgm_vmep != 0) && ($kgm_vmep != $kgm_head_vmep)) - set $kgm_vme = *$kgm_vmep - set $kgm_objp = $kgm_vme.object.vm_object - if $kgm_vme.is_sub_map - if $arg1 == 0 - set $kgm_mapp_orig = $kgm_mapp - set $kgm_vmep_orig = $kgm_vmep - set $kgm_vme_orig = $kgm_vme - set $kgm_head_vmep_orig = $kgm_head_vmep - printf "\n****" - showptr $kgm_objp - showmapwiredp $kgm_objp 1 - set $kgm_vme = $kgm_vme_orig - set $kgm_vmep = $kgm_vmep_orig - set $kgm_mapp = $kgm_mapp_orig - set $kgm_head_vmep = $kgm_head_vmep_orig - set $kgm_objp = (struct vm_object *)0 - else - printf "\n????" - showptr $kgm_mapp - printf " " - showptr $kgm_vmep - set $kgm_objp = (struct vm_object *)0 - printf "\n" - end - end - if ($kgm_objp == $kgm_objp_prev) - set $kgm_objp = (struct vm_object *)0 - end - if $kgm_objp == kernel_object - if $kgm_saw_kernel_obj - set $kgm_objp = (struct vm_object *)0 - end - set $kgm_saw_kernel_obj = 1 - end - if $kgm_objp && $kgm_objp->wired_page_count - if $kgm_objp_print_space == 1 - printf " " - showptr $kgm_mapp - end - set $kgm_objp_print_space = 1 - printf " " - showptr $kgm_vmep - printf " 0x%016llx ", $kgm_vme.links.start - printf "%5d", $kgm_vme.alias - printf "%6d ",($kgm_vme.links.end - $kgm_vme.links.start) >> 12 - showptr $kgm_objp - printf "[%3d]", $kgm_objp->ref_count - printf "%7d\n", $kgm_objp->wired_page_count - set $kgm_wired_count = $kgm_wired_count + $kgm_objp->wired_page_count - set $kgm_objp_prev = $kgm_objp - end - set $kgm_vmep = $kgm_vme.links.next - end - if $arg1 == 0 - printf "total wired count = %d\n", $kgm_wired_count - end -end - -define showmapwired - printf " map " - showptrhdrpad - printf " entry " - showptrhdrpad - printf " start alias #page object " - showptrhdrpad - printf " wired\n" - showmapwiredp $arg0 0 -end -document showmapwired -Syntax: (gdb) showmapwired -| Routine to print out a summary listing of all the entries with wired pages in a vm_map -end - -define showmapvme - showmapheader - showvmint $arg0 1 -end -document showmapvme -Syntax: (gdb) showmapvme -| Routine to print out a summary listing of all the entries in a vm_map -end - - -define showmap - showmapheader - showvmint $arg0 0 -end -document showmap -Syntax: (gdb) showmap -| Routine to print out info about the specified vm_map -end - -define showallvm - set $kgm_head_taskp = &tasks - set $kgm_taskp = (struct task *)($kgm_head_taskp->next) - while $kgm_taskp != $kgm_head_taskp - showtaskheader - showmapheader - showtaskint $kgm_taskp - showvmint $kgm_taskp->map 0 - set $kgm_taskp = (struct task *)($kgm_taskp->tasks.next) - end -end -document showallvm -Syntax: (gdb) showallvm -| Routine to print a summary listing of all the vm maps -end - - -define showallvme - set $kgm_head_taskp = &tasks - set $kgm_taskp = (struct task *)($kgm_head_taskp->next) - while $kgm_taskp != $kgm_head_taskp - showtaskheader - showmapheader - showtaskint $kgm_taskp - showvmint $kgm_taskp->map 1 - set $kgm_taskp = (struct task *)($kgm_taskp->tasks.next) - end -end -document showallvme -Syntax: (gdb) showallvme -| Routine to print a summary listing of all the vm map entries -end - - -define showipcheader - printf "ipc_space " - showptrhdrpad - printf " is_task " - showptrhdrpad - printf " is_table " - showptrhdrpad - printf " flags ports table_next " - showptrhdrpad - printf " low_mod high_mod\n" -end - -define showipceheader - printf " " - showptrhdrpad - printf "object " - showptrhdrpad - showptrhdrpad - printf "name rite urefs destname " - showptrhdrpad - printf "destination\n" -end - -define showipceint - set $kgm_ie = *(ipc_entry_t)$arg0 - printf " " - showptrhdrpad - showptr $kgm_ie.ie_object - showptrhdrpad - printf " 0x%08x ", $arg1 - if $kgm_ie.ie_bits & 0x00100000 - printf "Dead " - printf "%5d\n", $kgm_ie.ie_bits & 0xffff - else - if $kgm_ie.ie_bits & 0x00080000 - printf "SET " - printf "%5d\n", $kgm_ie.ie_bits & 0xffff - else - if $kgm_ie.ie_bits & 0x00010000 - if $kgm_ie.ie_bits & 0x00020000 - printf " SR" - else - printf " S" - end - else - if $kgm_ie.ie_bits & 0x00020000 - printf " R" - end - end - if $kgm_ie.ie_bits & 0x00040000 - printf " O" - end - if $kgm_ie.index.request - set $kgm_port = (ipc_port_t)$kgm_ie.ie_object - set $kgm_requests = $kgm_port->ip_requests - set $kgm_req_soright = $kgm_requests[$kgm_ie.index.request].notify.port - if $kgm_req_soright -# Armed send-possible notification? - if (uintptr_t)$kgm_req_soright & 0x1 - printf "s" - else -# Delayed send-possible notification? - if (uintptr_t)$kgm_req_soright & 0x2 - printf "d" - else -# Dead-name notification - printf "n" - end - end - else - printf " " - end - else - printf " " - end -# Collision (with tree)? - if $kgm_ie.ie_bits & 0x00800000 - printf "c" - else - printf " " - end - printf "%5d ", $kgm_ie.ie_bits & 0xffff - showportdest $kgm_ie.ie_object - end - end -end - -define showipcint - set $kgm_isp = (ipc_space_t)$arg0 - set $kgm_is = *$kgm_isp - showptr $arg0 - printf " " - showptr $kgm_is.is_task - printf " " - showptr $kgm_is.is_table - printf " " - if ($kgm_is.is_bits & 0x40000000) == 0 - printf "A" - else - printf " " - end - if ($kgm_is.is_bits & 0x20000000) != 0 - printf "G " - else - printf " " - end - printf "%5d ", $kgm_is.is_table_size - showptr $kgm_is.is_table_next - printf " " - printf "%10d ", $kgm_is.is_low_mod - printf "%10d", $kgm_is.is_high_mod - printf "\n" - if $arg1 != 0 - showipceheader - set $kgm_iindex = 0 - set $kgm_iep = $kgm_is.is_table - set $kgm_destspacep = (ipc_space_t)0 - while ( $kgm_iindex < $kgm_is.is_table_size ) - set $kgm_ie = *$kgm_iep - if $kgm_ie.ie_bits & 0x001f0000 - set $kgm_name = (($kgm_iindex << 8)|($kgm_ie.ie_bits >> 24)) - showipceint $kgm_iep $kgm_name - if $arg2 != 0 - if $kgm_ie.ie_object != 0 && ($kgm_ie.ie_bits & 0x00070000) && ((ipc_port_t) $kgm_ie.ie_object)->ip_callstack[0] != 0 - printf " user bt: " - showportbt $kgm_ie.ie_object $kgm_is.is_task - end - end - end - set $kgm_iindex = $kgm_iindex + 1 - set $kgm_iep = &($kgm_is.is_table[$kgm_iindex]) - end - end - printf "\n" -end - - -define showipc - set $kgm_isp = (ipc_space_t)$arg0 - showipcheader - showipcint $kgm_isp 0 0 -end -document showipc -Syntax: (gdb) showipc -| Routine to print the status of the specified ipc space -end - -define showrights - set $kgm_isp = (ipc_space_t)$arg0 - showipcheader - showipcint $kgm_isp 1 0 -end -document showrights -Syntax: (gdb) showrights -| Routine to print a summary list of all the rights in a specified ipc space -end - - -define showtaskipc - set $kgm_taskp = (task_t)$arg0 - showtaskheader - showtaskint $kgm_taskp - showipcheader - showipcint $kgm_taskp->itk_space 0 0 -end -document showtaskipc -Syntax: (gdb) showtaskipc -| Routine to print info about the ipc space for a task -end - - -define showtaskrights - set $kgm_taskp = (task_t)$arg0 - showtaskheader - showtaskint $kgm_taskp - showipcheader - showipcint $kgm_taskp->itk_space 1 0 -end -document showtaskrights -Syntax: (gdb) showtaskrights -| Routine to print info about the ipc rights for a task -end - -define showtaskrightsbt - set $kgm_taskp = (task_t)$arg0 - showtaskheader - showtaskint $kgm_taskp - showipcheader - showipcint $kgm_taskp->itk_space 1 1 -end -document showtaskrightsbt -Syntax: (gdb) showtaskrightsbt -| Routine to print info about the ipc rights for a task with backtraces -end - -define showallipc - set $kgm_head_taskp = &tasks - set $kgm_cur_taskp = (struct task *)($kgm_head_taskp->next) - while $kgm_cur_taskp != $kgm_head_taskp - showtaskheader - showtaskint $kgm_cur_taskp - showipcheader - showipcint $kgm_cur_taskp->itk_space 0 0 - set $kgm_cur_taskp = (struct task *)($kgm_cur_taskp->tasks.next) - end -end -document showallipc -Syntax: (gdb) showallipc -| Routine to print a summary listing of all the ipc spaces -end - -define showipcsumheader - printf "task " - showptrhdrpad - printf " pid " - printf " #acts " - printf " tsize " - printf "command\n" -end - -define showipcsummaryint - set $kgm_taskp = (struct task *)$arg0 - showptr $arg0 - printf "%7d", ((struct proc *)$kgm_taskp->bsd_info)->p_pid - printf "%15d", $kgm_taskp->thread_count - printf "%15d", $kgm_cur_taskp->itk_space.is_table_size - printf " %s\n", ((struct proc *)$kgm_taskp->bsd_info)->p_comm -end - -define showipcsummary - showipcsumheader - set $kgm_head_taskp = &tasks - set $kgm_cur_taskp = (struct task *)($kgm_head_taskp->next) - while $kgm_cur_taskp != $kgm_head_taskp - showipcsummaryint $kgm_cur_taskp - set $kgm_cur_taskp = (struct task *)($kgm_cur_taskp->tasks.next) - end -end - -document showipcsummary -Syntax: (gdb) showipcsummary -| Summarizes the IPC state of all tasks. This is a convenient way to dump -| some basic clues about IPC messaging. You can use the output to determine -| tasks that are candidates for further investigation. -end - - -define showallrights - set $kgm_head_taskp = &tasks - set $kgm_cur_taskp = (struct task *)($kgm_head_taskp->next) - while $kgm_cur_taskp != $kgm_head_taskp - showtaskheader - showtaskint $kgm_cur_taskp - showipcheader - showipcint $kgm_cur_taskp->itk_space 1 0 - set $kgm_cur_taskp = (struct task *)($kgm_cur_taskp->tasks.next) - end -end -document showallrights -Syntax: (gdb) showallrights -| Routine to print a summary listing of all the ipc rights -end - - -define showtaskvm - set $kgm_taskp = (task_t)$arg0 - showtaskheader - showmapheader - showtaskint $kgm_taskp - showvmint $kgm_taskp->map 0 -end -document showtaskvm -Syntax: (gdb) showtaskvm -| Routine to print out info about a task's vm_map -end - -define showtaskvme - set $kgm_taskp = (task_t)$arg0 - showtaskheader - showtaskint $kgm_taskp - showmapheader - showvmint $kgm_taskp->map 1 -end -document showtaskvme -Syntax: (gdb) showtaskvme -| Routine to print out info about a task's vm_map_entries -end - - -define showtaskheader - printf "task " - showptrhdrpad - printf " vm_map " - showptrhdrpad - printf " ipc_space " - showptrhdrpad - printf " #acts " - showprocheader -end - - -define showtaskint - set $kgm_taskp = (struct task *)$arg0 - showptr $arg0 - printf " " - showptr $kgm_taskp->map - printf " " - showptr $kgm_taskp->itk_space - printf " %5d ", $kgm_taskp->thread_count - showprocint $kgm_taskp->bsd_info -end - -define showtask - showtaskheader - showtaskint $arg0 -end -document showtask -Syntax (gdb) showtask -| Routine to print out info about a task. -end - - -define showtaskthreads - showtaskheader - set $kgm_taskp = (struct task *)$arg0 - showtaskint $kgm_taskp - showactheader - set $kgm_head_actp = &($kgm_taskp->threads) - set $kgm_actp = (struct thread *)($kgm_taskp->threads.next) - while $kgm_actp != $kgm_head_actp - showactint $kgm_actp 0 - set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) - end -end -document showtaskthreads -Syntax: (gdb) showtaskthreads -| Routine to print info about the threads in a task. -end - - -define showtaskstacks - showtaskheader - set $kgm_taskp = (struct task *)$arg0 - showtaskint $kgm_taskp - set $kgm_head_actp = &($kgm_taskp->threads) - set $kgm_actp = (struct thread *)($kgm_taskp->threads.next) - while $kgm_actp != $kgm_head_actp - showactheader - showactint $kgm_actp 1 - set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) - end -end -document showtaskstacks -Syntax: (gdb) showtaskstacks -| Routine to print out the stack for each thread in a task. -end - -define showqueue_elems - set $queue_head = (struct queue_entry *)($arg0) - set $queue = (struct queue_entry *)($queue_head->next) - while $queue != $queue_head - showptr $queue - printf " " - set $thread = (struct thread *)$queue - set $task = (struct task *)$thread->task - set $bsd = (struct proc *)$task->bsd_info - set $guy = (char *)$bsd->p_comm - showptr $thread - printf " " - showptr $task - printf " " - showptr $bsd - printf " " - showptr $guy - #printf " %s\n", $kgm_procp->p_comm - printf "\n" - set $queue = (struct queue_entry *)($queue->next) - end -end - -define showalltasks - showtaskheader - set $kgm_head_taskp = &tasks - set $kgm_taskp = (struct task *)($kgm_head_taskp->next) - while $kgm_taskp != $kgm_head_taskp - showtaskint $kgm_taskp - set $kgm_taskp = (struct task *)($kgm_taskp->tasks.next) - end -end -document showalltasks -Syntax: (gdb) showalltasks -| Routine to print a summary listing of all the tasks -| wq_state -> reports "number of workq threads", "number of scheduled workq threads", "number of pending work items" -| if "number of pending work items" seems stuck at non-zero, it may indicate that the workqueue mechanism is hung -| io_policy -> RAGE - rapid aging of vnodes requested -| NORM - normal I/O explicitly requested (this is the default) -| PASS - passive I/O requested (i.e. I/Os do not affect throttling decisions) -| THROT - throttled I/O requested (i.e. thread/task may be throttled after each I/O completes) -end - -define showprocheader - printf " pid process " - showptrhdrpad - printf "io_policy wq_state command\n" -end - -define showprocint - set $kgm_procp = (struct proc *)$arg0 - if $kgm_procp != 0 - set $kgm_printed = 0 - printf "%5d ", $kgm_procp->p_pid - showptr $kgm_procp - if ($kgm_procp->p_lflag & 0x400000) - printf " RAGE " - else - printf " " - end - set $ptask = (struct task *)$kgm_procp->task - set $diskpolicy = 0 - if ($ptask->ext_appliedstate.hw_disk != 0) - set $diskpolicy = $ptask->ext_appliedstate.hw_disk - else - if ($ptask->appliedstate.hw_disk != 0) - set $diskpolicy = $ptask->appliedstate.hw_disk - end - end - if ($ptask->ext_appliedstate.hw_bg != 0) - set $diskpolicy = 5 - end - if ($ptask->appliedstate.hw_bg != 0) - set $diskpolicy = 4 - end - if ($ptask->ext_appliedstate.apptype == 2) - set $diskpolicy = 6 - end - if ($diskpolicy == 2) - printf "PASS " - set $kgm_printed = 1 - end - if ($diskpolicy == 3) - printf "THROT " - set $kgm_printed = 1 - end - if ($diskpolicy == 4) - printf "BG_THRT " - set $kgm_printed = 1 - end - if ($diskpolicy == 5) - printf "EBG_THRT" - set $kgm_printed = 1 - end - if ($diskpolicy == 6) - printf "APD_THRT" - set $kgm_printed = 1 - end - if ($kgm_printed == 0) - printf " " - end - set $kgm_wqp = (struct workqueue *)$kgm_procp->p_wqptr - if $kgm_wqp != 0 - printf " %2d %2d %2d ", $kgm_wqp->wq_nthreads, $kgm_wqp->wq_thidlecount, $kgm_wqp->wq_reqcount - else - printf " " - end - printf " %s\n", $kgm_procp->p_comm - else - printf " *0* " - showptr 0 - printf " --\n" - end -end - -define showpid - showtaskheader - set $kgm_head_taskp = &tasks - set $kgm_taskp = (struct task *)($kgm_head_taskp->next) - while $kgm_taskp != $kgm_head_taskp - set $kgm_procp = (struct proc *)$kgm_taskp->bsd_info - if (($kgm_procp != 0) && ($kgm_procp->p_pid == $arg0)) - showtaskint $kgm_taskp - set $kgm_taskp = $kgm_head_taskp - else - set $kgm_taskp = (struct task *)($kgm_taskp->tasks.next) - end - end -end -document showpid -Syntax: (gdb) showpid -| Routine to print a single process by pid -end - -define showproc - showtaskheader - set $kgm_procp = (struct proc *)$arg0 - showtaskint $kgm_procp->task -end - - -define kdb - set switch_debugger=1 - continue -end -document kdb -| kdb - Switch to the inline kernel debugger -| -| usage: kdb -| -| The kdb macro allows you to invoke the inline kernel debugger. -end - -define showpsetheader - printf "portset " - showptrhdrpad - printf "waitqueue " - showptrhdrpad - showptrhdrpad - printf "recvname flags refs recvname " - showptrhdrpad - printf "process\n" -end - -define showportheader - printf "port " - showptrhdrpad - printf "mqueue " - showptrhdrpad - showptrhdrpad - printf "recvname flags refs recvname " - showptrhdrpad - printf "dest\n" -end - -define showportmemberheader - printf "members " - showptrhdrpad - printf "port " - showptrhdrpad - showptrhdrpad - printf "recvname " - printf "flags refs mqueue " - showptrhdrpad - printf "msgcount\n" -end - -define showkmsgheader - printf "dest-port " - showptrhdrpad - printf "kmsg " - showptrhdrpad - showptrhdrpad - printf "msgid " - printf "disp size " - printf "reply-port " - showptrhdrpad - printf "source\n" -end - -define showkmsgsrcint - set $kgm_kmsgsrchp = ((ipc_kmsg_t)$arg0)->ikm_header -# set $kgm_kmsgsrctp = (mach_msg_audit_trailer_t *)((uintptr_t)$kgm_kmsgsrchp + $kgm_kmsgsrchp->msgh_size) -# set $kgm_kmsgpid = $kgm_kmsgsrctp->msgh_audit.val[5] - set $kgm_kmsgpid = (pid_t)((uint *)((uintptr_t)$kgm_kmsgsrchp + $kgm_kmsgsrchp->msgh_size))[10] -# compare against a well-known or cached value as this may be slow - if ($kgm_kmsgpid == 0) - set $kgm_kmsgsrcpid = (pid_t)0 - set $kgm_kmsgsrcprocp = (struct proc *)kernel_task->bsd_info - else - if ($kgm_kmsgpid != $kgm_kmsgsrcpid) - set $kgm_kmsgsrchead_taskp = &tasks - set $kgm_kmsgsrctaskp = (struct task *)($kgm_kmsgsrchead_taskp->next) - while $kgm_kmsgsrctaskp != $kgm_kmsgsrchead_taskp - set $kgm_kmsgsrcprocp = (struct proc *)$kgm_kmsgsrctaskp->bsd_info - set $kgm_kmsgsrcpid = $kgm_kmsgsrcprocp->p_pid - if (($kgm_kmsgsrcprocp != 0) && ($kgm_kmsgsrcprocp->p_pid == $kgm_kmsgpid)) - set $kgm_kmsgsrctaskp = $kgm_kmsgsrchead_taskp - else - set $kgm_kmsgsrctaskp = (struct task *)($kgm_kmsgsrctaskp->tasks.next) - end - end - end - end - if ($kgm_kmsgsrcprocp->p_pid == $kgm_kmsgpid) - printf "%s(%d)\n", $kgm_kmsgsrcprocp->p_comm, $kgm_kmsgpid - else - printf "unknown(%d)\n", $kgm_kmsgpid - end -end - -define showkmsgint - set $kgm_kmsghp = ((ipc_kmsg_t)$arg0)->ikm_header - set $kgm_kmsgh = *$kgm_kmsghp - if ($arg1 != 0) - printf " " - showptrhdrpad - else - showptr $kgm_kmsgh.msgh_remote_port - end - showptr $arg0 - showptrhdrpad - printf " 0x%08x ", $kgm_kmsgh.msgh_id - if (($kgm_kmsgh.msgh_bits & 0xff) == 19) - printf "rC" - else - printf "rM" - end - if (($kgm_kmsgh.msgh_bits & 0xff00) == (19 << 8)) - printf "lC" - else - printf "lM" - end - if ($kgm_kmsgh.msgh_bits & 0xf0000000) - printf "c" - else - printf "s" - end - printf "%5d ", $kgm_kmsgh.msgh_size - showptr $kgm_kmsgh.msgh_local_port - printf " " - set $kgm_kmsgsrcpid = (pid_t)0 - showkmsgsrcint $arg0 -end - -define showkmsg - showkmsgint $arg0 0 -end - -define showkobject - set $kgm_portp = (struct ipc_port *)$arg0 - showptr $kgm_portp->ip_kobject - printf " kobject(" - set $kgm_kotype = ($kgm_portp->ip_object.io_bits & 0x00000fff) - if ($kgm_kotype == 1) - printf "THREAD" - end - if ($kgm_kotype == 2) - printf "TASK" - end - if ($kgm_kotype == 3) - printf "HOST" - end - if ($kgm_kotype == 4) - printf "HOST_PRIV" - end - if ($kgm_kotype == 5) - printf "PROCESSOR" - end - if ($kgm_kotype == 6) - printf "PSET" - end - if ($kgm_kotype == 7) - printf "PSET_NAME" - end - if ($kgm_kotype == 8) - printf "TIMER" - end - if ($kgm_kotype == 9) - printf "PAGER_REQ" - end - if ($kgm_kotype == 10) - printf "DEVICE" - end - if ($kgm_kotype == 11) - printf "XMM_OBJECT" - end - if ($kgm_kotype == 12) - printf "XMM_PAGER" - end - if ($kgm_kotype == 13) - printf "XMM_KERNEL" - end - if ($kgm_kotype == 14) - printf "XMM_REPLY" - end - if ($kgm_kotype == 15) - printf "NOTDEF 15" - end - if ($kgm_kotype == 16) - printf "NOTDEF 16" - end - if ($kgm_kotype == 17) - printf "HOST_SEC" - end - if ($kgm_kotype == 18) - printf "LEDGER" - end - if ($kgm_kotype == 19) - printf "MASTER_DEV" - end - if ($kgm_kotype == 20) - printf "ACTIVATION" - end - if ($kgm_kotype == 21) - printf "SUBSYSTEM" - end - if ($kgm_kotype == 22) - printf "IO_DONE_QUE" - end - if ($kgm_kotype == 23) - printf "SEMAPHORE" - end - if ($kgm_kotype == 24) - printf "LOCK_SET" - end - if ($kgm_kotype == 25) - printf "CLOCK" - end - if ($kgm_kotype == 26) - printf "CLOCK_CTRL" - end - if ($kgm_kotype == 27) - printf "IOKIT_SPARE" - end - if ($kgm_kotype == 28) - printf "NAMED_MEM" - end - if ($kgm_kotype == 29) - printf "IOKIT_CON" - end - if ($kgm_kotype == 30) - printf "IOKIT_OBJ" - end - if ($kgm_kotype == 31) - printf "UPL" - end - if ($kgm_kotype == 34) - printf "FD" - end - printf ")\n" -end - -define showportdestproc - set $kgm_portp = (struct ipc_port *)$arg0 - set $kgm_spacep = $kgm_portp->data.receiver -# check against the previous cached value - this is slow - if ($kgm_spacep != $kgm_destspacep) - set $kgm_destprocp = (struct proc *)0 - set $kgm_head_taskp = &tasks - set $kgm_desttaskp = (struct task *)($kgm_head_taskp->next) - while (($kgm_destprocp == 0) && ($kgm_desttaskp != $kgm_head_taskp)) - set $kgm_destspacep = $kgm_desttaskp->itk_space - if ($kgm_destspacep == $kgm_spacep) - set $kgm_destprocp = (struct proc *)$kgm_desttaskp->bsd_info - else - set $kgm_desttaskp = (struct task *)($kgm_desttaskp->tasks.next) - end - end - end - if $kgm_destprocp != 0 - printf "%s(%d)\n", $kgm_destprocp->p_comm, $kgm_destprocp->p_pid - else - printf "task " - showptr $kgm_desttaskp - printf "\n" - end -end - -define showportdest - set $kgm_portp = (struct ipc_port *)$arg0 - set $kgm_spacep = $kgm_portp->data.receiver - if ((uintptr_t)$kgm_spacep == (uintptr_t)ipc_space_kernel) - showkobject $kgm_portp - else - if ($kgm_portp->ip_object.io_bits & 0x80000000) - showptr $kgm_portp->ip_messages.data.port.receiver_name - printf " " - showportdestproc $kgm_portp - else - showptr $kgm_portp - printf " inactive-port\n" - end - end -end - -define showportmember - printf " " - showptrhdrpad - showptr $arg0 - showptrhdrpad - set $kgm_portp = (struct ipc_port *)$arg0 - printf " 0x%08x ", $kgm_portp->ip_messages.data.port.receiver_name - if ($kgm_portp->ip_object.io_bits & 0x80000000) - printf "A" - else - printf " " - end - printf "Port" - printf "%5d ", $kgm_portp->ip_object.io_references - showptr &($kgm_portp->ip_messages) - printf " 0x%08x\n", $kgm_portp->ip_messages.data.port.msgcount -end - -define showportbt - set $kgm_iebt = ((ipc_port_t) $arg0)->ip_callstack - set $kgm_iepid = ((ipc_port_t) $arg0)->ip_spares[0] - set $kgm_procpid = ((proc_t) (((task_t) $arg1)->bsd_info))->p_pid - if $kgm_iebt[0] != 0 - showptr $kgm_iebt[0] - set $kgm_iebt_loop_ctr = 1 - while ($kgm_iebt_loop_ctr < 16 && $kgm_iebt[$kgm_iebt_loop_ctr]) - printf " " - showptr $kgm_iebt[$kgm_iebt_loop_ctr] - set $kgm_iebt_loop_ctr = $kgm_iebt_loop_ctr + 1 - end - if $kgm_iepid != $kgm_procpid - printf " (%d)", $kgm_iepid - end - printf "\n" - end -end - -define showportint - showptr $arg0 - printf " " - set $kgm_portp = (struct ipc_port *)$arg0 - showptr &($kgm_portp->ip_messages) - showptrhdrpad - printf " 0x%08x ", $kgm_portp->ip_messages.data.port.receiver_name - if ($kgm_portp->ip_object.io_bits & 0x80000000) - printf "A" - else - printf "D" - end - printf "Port" - printf "%5d ", $kgm_portp->ip_object.io_references - set $kgm_destspacep = (struct ipc_space *)0 - showportdest $kgm_portp - set $kgm_kmsgp = (ipc_kmsg_t)$kgm_portp->ip_messages.data.port.messages.ikmq_base - if $arg1 && $kgm_kmsgp - showkmsgheader - showkmsgint $kgm_kmsgp 1 - set $kgm_kmsgheadp = $kgm_kmsgp - set $kgm_kmsgp = $kgm_kmsgp->ikm_next - while $kgm_kmsgp != $kgm_kmsgheadp - showkmsgint $kgm_kmsgp 1 - set $kgm_kmsgp = $kgm_kmsgp->ikm_next - end - end -end - -define showpsetint - showptr $arg0 - printf " " - set $kgm_psetp = (struct ipc_pset *)$arg0 - showptr &($kgm_psetp->ips_messages) - showptrhdrpad - printf " 0x%08x ", $kgm_psetp->ips_messages.data.pset.local_name - if ($kgm_psetp->ips_object.io_bits & 0x80000000) - printf "A" - else - printf "D" - end - printf "Set " - printf "%5d ", $kgm_psetp->ips_object.io_references - showptr $kgm_psetp->ips_messages.data.pset.local_name - printf " " - set $kgm_setlinksp = &($kgm_psetp->ips_messages.data.pset.set_queue.wqs_setlinks) - set $kgm_wql = (WaitQueueLink *)$kgm_setlinksp->next - set $kgm_found = 0 - while ( (queue_entry_t)$kgm_wql != (queue_entry_t)$kgm_setlinksp) - set $kgm_portp = (struct ipc_port *)((uintptr_t)($kgm_wql->wql_element->wqe_queue) - (uintptr_t)$kgm_portoff) - if !$kgm_found - set $kgm_destspacep = (struct ipc_space *)0 - showportdestproc $kgm_portp - showportmemberheader - set $kgm_found = 1 - end - showportmember $kgm_portp 0 - set $kgm_wql = (WaitQueueLink *)$kgm_wql->wql_setlinks.next - end - if !$kgm_found - printf "--n/e--\n" - end -end - -define showpset - set $kgm_portoff = &(((struct ipc_port *)0)->ip_messages) - showpsetheader - showpsetint $arg0 1 -end - -define showport - showportheader - showportint $arg0 1 -end - -define showipcobject - set $kgm_objectp = (ipc_object_t)$arg0 - if ($kgm_objectp->io_bits & 0x7fff0000) - set $kgm_portoff = &(((struct ipc_port *)0)->ip_messages) - showpset $kgm_objectp - else - showport $kgm_objectp - end -end - -define showmqueue - set $kgm_mqueue = *(struct ipc_mqueue *)$arg0 - if ($kgm_mqueue.data.pset.set_queue.wqs_wait_queue.wq_type == 0xf1d1) - set $kgm_psetoff = &(((struct ipc_pset *)0)->ips_messages) - set $kgm_pset = (((long)$arg0) - ((long)$kgm_psetoff)) - showpsetheader - showpsetint $kgm_pset 1 - end - if ($kgm_mqueue.data.pset.set_queue.wqs_wait_queue.wq_type == 0xf1d0) - set $kgm_portoff = &(((struct ipc_port *)0)->ip_messages) - set $kgm_port = (((long)$arg0) - ((long)$kgm_portoff)) - showportheader - showportint $kgm_port 1 - end -end - -define zprint_one - set $kgm_zone = (struct zone *)$arg0 - - showptr $kgm_zone - printf " %8d ",$kgm_zone->count - printf "%8x ",$kgm_zone->cur_size - printf "%8x ",$kgm_zone->max_size - printf "%8d ",$kgm_zone->elem_size - printf "%8x ",$kgm_zone->alloc_size - if ($kgm_mtype != $kgm_mtype_arm) - printf " %16ld ",$kgm_zone->num_allocs - printf "%16ld ",$kgm_zone->num_frees - end - printf "%s ",$kgm_zone->zone_name - - if ($kgm_zone->exhaustible) - printf "H" - end - if ($kgm_zone->collectable) - printf "C" - end - if ($kgm_zone->expandable) - printf "X" - end - if ($kgm_zone->noencrypt) - printf "$" - end - printf "\n" -end - - -define zprint - printf "ZONE " - showptrhdrpad - printf " COUNT TOT_SZ MAX_SZ ELT_SZ ALLOC_SZ TOT_ALLOC TOT_FREE NAME\n" - set $kgm_zone_ptr = (struct zone *)first_zone - while ($kgm_zone_ptr != 0) - zprint_one $kgm_zone_ptr - set $kgm_zone_ptr = $kgm_zone_ptr->next_zone - end - printf "\n" -end -document zprint -Syntax: (gdb) zprint -| Routine to print a summary listing of all the kernel zones -end - -define showmtxgrp - set $kgm_mtxgrp = (struct _lck_grp_ *)$arg0 - - if ($kgm_mtxgrp->lck_grp_mtxcnt) - showptr $kgm_mtxgrp - printf " %8d ",$kgm_mtxgrp->lck_grp_mtxcnt - printf "%12u ",$kgm_mtxgrp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_util_cnt - printf "%8u ",$kgm_mtxgrp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_miss_cnt - printf "%8u ",$kgm_mtxgrp->lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cnt - printf "%s ",&$kgm_mtxgrp->lck_grp_name - printf "\n" - end -end - - -define showallmtx - printf "LCK GROUP " - showptrhdrpad - printf " CNT UTIL MISS WAIT NAME\n" - set $kgm_mtxgrp_ptr = (struct _lck_grp_ *)&lck_grp_queue - set $kgm_mtxgrp_ptr = (struct _lck_grp_ *)$kgm_mtxgrp_ptr->lck_grp_link.next - while ($kgm_mtxgrp_ptr != (struct _lck_grp_ *)&lck_grp_queue) - showmtxgrp $kgm_mtxgrp_ptr - set $kgm_mtxgrp_ptr = (struct _lck_grp_ *)$kgm_mtxgrp_ptr->lck_grp_link.next - end - printf "\n" -end -document showallmtx -Syntax: (gdb) showallmtx -| Routine to print a summary listing of all mutexes -end - -define showrwlckgrp - set $kgm_rwlckgrp = (struct _lck_grp_ *)$arg0 - - if ($kgm_rwlckgrp->lck_grp_rwcnt) - showptr $kgm_rwlckgrp - printf " %8d ",$kgm_rwlckgrp->lck_grp_rwcnt - printf "%12u ",$kgm_rwlckgrp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt - printf "%8u ",$kgm_rwlckgrp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt - printf "%8u ",$kgm_rwlckgrp->lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt - printf "%s ",&$kgm_rwlckgrp->lck_grp_name - printf "\n" - end -end - - -define showallrwlck - printf "LCK GROUP " - showptrhdrpad - printf " CNT UTIL MISS WAIT NAME\n" - set $kgm_rwlckgrp_ptr = (struct _lck_grp_ *)&lck_grp_queue - set $kgm_rwlckgrp_ptr = (struct _lck_grp_ *)$kgm_rwlckgrp_ptr->lck_grp_link.next - while ($kgm_rwlckgrp_ptr != (struct _lck_grp_ *)&lck_grp_queue) - showrwlckgrp $kgm_rwlckgrp_ptr - set $kgm_rwlckgrp_ptr = (struct _lck_grp_ *)$kgm_rwlckgrp_ptr->lck_grp_link.next - end - printf "\n" -end -document showallrwlck -Syntax: (gdb) showallrwlck -| Routine to print a summary listing of all read/writer locks -end - -set $kdp_act_counter = 0 -set $kdp_arm_act_counter = 0 - -set $r0_save = 0 -set $r1_save = 0 -set $r2_save = 0 -set $r3_save = 0 -set $r4_save = 0 -set $r5_save = 0 -set $r6_save = 0 -set $r7_save = 0 -set $r8_save = 0 -set $r9_save = 0 -set $r10_save = 0 -set $r11_save = 0 -set $r12_save = 0 -set $sp_save = 0 -set $lr_save = 0 -set $pc_save = 0 - -define showcontext_int - echo Context switched, current instruction pointer: - output/a $pc - echo \n -end - -define switchtoact - set $newact = (struct thread *) $arg0 - select 0 - if ($newact->kernel_stack == 0) - echo This activation does not have a stack.\n - echo continuation: - output/a (unsigned) $newact.continuation - echo \n - else - if ($kgm_mtype == $kgm_mtype_ppc) - if ($kdp_act_counter == 0) - set $kdpstate = (struct savearea *) kdp.saved_state - end - set $kdp_act_counter = $kdp_act_counter + 1 - set (struct savearea *) kdp.saved_state=$newact->machine->pcb - flushregs - flushstack - set $pc=$newact->machine->pcb.save_srr0 - update - end - if ($kgm_mtype == $kgm_mtype_i386) - set $kdpstatep = (struct x86_saved_state32 *) kdp.saved_state - if ($kdp_act_counter == 0) - set $kdpstate = *($kdpstatep) - end - set $kdp_act_counter = $kdp_act_counter + 1 - - set $kgm_statep = (struct x86_kernel_state *) \ - ($newact->kernel_stack + kernel_stack_size \ - - sizeof(struct x86_kernel_state)) - set $kdpstatep->ebx = $kgm_statep->k_ebx - set $kdpstatep->ebp = $kgm_statep->k_ebp - set $kdpstatep->edi = $kgm_statep->k_edi - set $kdpstatep->esi = $kgm_statep->k_esi - set $kdpstatep->eip = $kgm_statep->k_eip - flushregs - flushstack - set $pc = $kgm_statep->k_eip - update - end - if ($kgm_mtype == $kgm_mtype_x86_64) - set $kdpstatep = (struct x86_saved_state64 *) kdp.saved_state - if ($kdp_act_counter == 0) - set $kdpstate = *($kdpstatep) - end - set $kdp_act_counter = $kdp_act_counter + 1 - - set $kgm_statep = (struct x86_kernel_state *) \ - ($newact->kernel_stack + kernel_stack_size \ - - sizeof(struct x86_kernel_state)) - set $kdpstatep->rbx = $kgm_statep->k_rbx - set $kdpstatep->rbp = $kgm_statep->k_rbp - set $kdpstatep->r12 = $kgm_statep->k_r12 - set $kdpstatep->r13 = $kgm_statep->k_r13 - set $kdpstatep->r14 = $kgm_statep->k_r14 - set $kdpstatep->r15 = $kgm_statep->k_r15 - set $kdpstatep->isf.rsp = $kgm_statep->k_rsp - flushregs - flushstack - set $pc = $kgm_statep->k_rip - update - end - if ($kgm_mtype == $kgm_mtype_arm) - set $kdp_arm_act_counter = $kdp_arm_act_counter + 1 - if ($kdp_arm_act_counter == 1) - set $r0_save = $r0 - set $r1_save = $r1 - set $r2_save = $r2 - set $r3_save = $r3 - set $r4_save = $r4 - set $r5_save = $r5 - set $r6_save = $r6 - set $r7_save = $r7 - set $r8_save = $r8 - set $r9_save = $r9 - set $r10_save = $r10 - set $r11_save = $r11 - set $r12_save = $r12 - set $sp_save = $sp - set $lr_save = $lr - set $pc_save = $pc - end - set $pc_ctx = load_reg+8 - set $kgm_statep = (struct arm_saved_state *)((struct thread*)$arg0)->machine.kstackptr - set $r0 = $kgm_statep->r[0] - set $r1 = $kgm_statep->r[1] - set $r2 = $kgm_statep->r[2] - set $r3 = $kgm_statep->r[3] - set $r4 = $kgm_statep->r[4] - set $r5 = $kgm_statep->r[5] - set $r6 = $kgm_statep->r[6] - set $r8 = $kgm_statep->r[8] - set $r9 = $kgm_statep->r[9] - set $r10 = $kgm_statep->r[10] - set $r11 = $kgm_statep->r[11] - set $r12 = $kgm_statep->r[12] - set $sp = $kgm_statep->sp - set $lr = $kgm_statep->lr - set $pc = $pc_ctx - set $r7 = $kgm_statep->r[7] - flushregs - flushstack - end - end - showcontext_int -end - -document switchtoact -Syntax: switchtoact
-| This command allows gdb to examine the execution context and call -| stack for the specified activation. For example, to view the backtrace -| for an activation issue "switchtoact
", followed by "bt". -| Before resuming execution, issue a "resetctx" command, to -| return to the original execution context. -end - -define switchtoctx - select 0 - if ($kgm_mtype == $kgm_mtype_ppc) - if ($kdp_act_counter == 0) - set $kdpstate = (struct savearea *) kdp.saved_state - end - set $kdp_act_counter = $kdp_act_counter + 1 - set (struct savearea *) kdp.saved_state=(struct savearea *) $arg0 - flushregs - flushstack - set $pc=((struct savearea *) $arg0)->save_srr0 - update - else - if ($kgm_mtype == $kgm_mtype_arm) - select 0 - set $kdp_arm_act_counter = $kdp_arm_act_counter + 1 - if ($kdp_arm_act_counter == 1) - set $r0_save = $r0 - set $r1_save = $r1 - set $r2_save = $r2 - set $r3_save = $r3 - set $r4_save = $r4 - set $r5_save = $r5 - set $r6_save = $r6 - set $r7_save = $r7 - set $r8_save = $r8 - set $r9_save = $r9 - set $r10_save = $r10 - set $r11_save = $r11 - set $r12_save = $r12 - set $sp_save = $sp - set $lr_save = $lr - set $pc_save = $pc - end - set $kgm_statep = (struct arm_saved_state *)$arg0 - set $r0 = $kgm_statep->r[0] - set $r1 = $kgm_statep->r[1] - set $r2 = $kgm_statep->r[2] - set $r3 = $kgm_statep->r[3] - set $r4 = $kgm_statep->r[4] - set $r5 = $kgm_statep->r[5] - set $r6 = $kgm_statep->r[6] - set $r8 = $kgm_statep->r[8] - set $r9 = $kgm_statep->r[9] - set $r10 = $kgm_statep->r[10] - set $r11 = $kgm_statep->r[11] - set $r12 = $kgm_statep->r[12] - set $sp = $kgm_statep->sp - set $lr = $kgm_statep->lr - set $r7 = $kgm_statep->r[7] - set $pc = $kgm_statep->pc - flushregs - flushstack - update - else - echo switchtoctx not implemented for this architecture.\n - end - end -end - - -document switchtoctx -Syntax: switchtoctx
-| This command allows gdb to examine an execution context and dump the -| backtrace for this execution context. -| Before resuming execution, issue a "resetctx" command, to -| return to the original execution context. -end - -define resetctx - select 0 - if ($kdp_act_counter != 0) - if ($kgm_mtype == $kgm_mtype_ppc) - set (struct savearea *)kdp.saved_state=$kdpstate - flushregs - flushstack - set $pc=((struct savearea *) kdp.saved_state)->save_srr0 - update - set $kdp_act_counter = 0 - end - if ($kgm_mtype == $kgm_mtype_i386) - set $kdpstatep = (struct x86_saved_state32 *) kdp.saved_state - set *($kdpstatep)=$kdpstate - flushregs - flushstack - set $pc=$kdpstatep->eip - update - set $kdp_act_counter = 0 - end - if ($kgm_mtype == $kgm_mtype_x86_64) - set $kdpstatep = (struct x86_saved_state64 *) kdp.saved_state - set *($kdpstatep)=$kdpstate - flushregs - flushstack - set $pc=$kdpstatep->isf.rip - update - set $kdp_act_counter = 0 - end - showcontext_int - end - if ($kgm_mtype == $kgm_mtype_arm && $kdp_arm_act_counter != 0) - echo Restoring context\n - set $r0 = $r0_save - flushregs - set $r1 = $r1_save - flushregs - set $r2 = $r2_save - flushregs - set $r3 = $r3_save - flushregs - set $r4 = $r4_save - flushregs - set $r5 = $r5_save - flushregs - set $r6 = $r6_save - flushregs - set $r8 = $r8_save - flushregs - set $r9 = $r9_save - flushregs - set $r10 = $r10_save - flushregs - set $r11 = $r11_save - flushregs - set $r12 = $r12_save - flushregs - set $sp = $sp_save - flushregs - set $lr = $lr_save - flushregs - set $pc = $pc_save - flushregs - set $r7 = $r7_save - flushregs - flushstack - update - set $kdp_arm_act_counter = 0 - end -end - -document resetctx -| Syntax: resetctx -| Returns to the original execution context. This command should be -| issued if you wish to resume execution after using the "switchtoact" -| or "switchtoctx" commands. -end - -# This is a pre-hook for the continue command, to prevent inadvertent attempts -# to resume from the context switched to for examination. -define hook-continue - resetctx -end - -# This is a pre-hook for the detach command, to prevent inadvertent attempts -# to resume from the context switched to for examination. -define hook-detach - resetctx -end - -define resume_on - set $resume = KDP_DUMPINFO_SETINFO | KDP_DUMPINFO_RESUME - dumpinfoint $resume -end - -document resume_on -| Syntax: resume_on -| The target system will resume when detaching or exiting from gdb. -| This is the default behavior. -end - -define resume_off - set $noresume = KDP_DUMPINFO_SETINFO | KDP_DUMPINFO_NORESUME - dumpinfoint $noresume -end - -document resume_off -| Syntax: resume_off -| The target system won't resume after detaching from gdb and -| can be attached with a new gdb session -end - -define paniclog - set $kgm_panic_bufptr = debug_buf - set $kgm_panic_bufptr_max = debug_buf_ptr - while $kgm_panic_bufptr < $kgm_panic_bufptr_max - if *(char *)$kgm_panic_bufptr == 10 - printf "\n" - else - printf "%c", *(char *)$kgm_panic_bufptr - end - set $kgm_panic_bufptr= (char *)$kgm_panic_bufptr + 1 - end -end - -document paniclog -| Syntax: paniclog -| Display the panic log information -| -end - -define dumpcallqueue - set $kgm_callhead = $arg0 - set $kgm_callentry = $kgm_callhead->next - set $kgm_i = 0 - while $kgm_callentry != $kgm_callhead - set $kgm_call = (struct call_entry *)$kgm_callentry - showptr $kgm_call - printf "0x%lx 0x%lx ", $kgm_call->param0, $kgm_call->param1 - output $kgm_call->deadline - printf "\t" - output $kgm_call->func - printf "\n" - set $kgm_i = $kgm_i + 1 - set $kgm_callentry = $kgm_callentry->next - end - printf "%d entries\n", $kgm_i -end - -document dumpcallqueue -| Syntax: dumpcallqueue -| Displays the contents of the specified call_entry queue. -end - -define showtaskacts -showtaskthreads $arg0 -end -document showtaskacts -| See help showtaskthreads. -end - -define showallacts -showallthreads -end -document showallacts -| See help showallthreads. -end - - -define resetstacks - _kgm_flush_loop - set kdp_pmap = 0 - _kgm_flush_loop - resetctx - _kgm_flush_loop - _kgm_update_loop - resetctx - _kgm_update_loop -end - -document resetstacks -| Syntax: resetstacks -| Internal kgmacro routine used by the "showuserstack" macro -| to reset the target pmap to the kernel pmap. -end - -#Barely effective hacks to work around bugs in the "flush" and "update" -#gdb commands in Tiger (up to 219); these aren't necessary with Panther -#gdb, but do no harm. -define _kgm_flush_loop - set $kgm_flush_loop_ctr = 0 - while ($kgm_flush_loop_ctr < 30) - flushregs - flushstack - set $kgm_flush_loop_ctr = $kgm_flush_loop_ctr + 1 - end -end - -define _kgm_update_loop - set $kgm_update_loop_ctr = 0 - while ($kgm_update_loop_ctr < 30) - update - set $kgm_update_loop_ctr = $kgm_update_loop_ctr + 1 - end -end -# Internal routine used by "_loadfrom" to read from 64-bit addresses -# on 32-bit kernels -define _loadk32m64 - # set up the manual KDP packet - set manual_pkt.input = 0 - set manual_pkt.len = sizeof(kdp_readmem64_req_t) - set $kgm_pkt = (kdp_readmem64_req_t *)&manual_pkt.data - set $kgm_pkt->hdr.request = KDP_READMEM64 - set $kgm_pkt->hdr.len = sizeof(kdp_readmem64_req_t) - set $kgm_pkt->hdr.is_reply = 0 - set $kgm_pkt->hdr.seq = 0 - set $kgm_pkt->hdr.key = 0 - set $kgm_pkt->address = (uint64_t)$arg0 - set $kgm_pkt->nbytes = sizeof(uint64_t) - set manual_pkt.input = 1 - # dummy to make sure manual packet is executed - set $kgm_dummy = &_mh_execute_header - set $kgm_pkt = (kdp_readmem64_reply_t *)&manual_pkt.data - if ($kgm_pkt->error == 0) - set $kgm_k32read64 = *(uint64_t *)$kgm_pkt->data - else - set $kgm_k32read64 = 0 - end -end - -# Internal routine used by "showx86backtrace" to abstract possible loads from -# user space -define _loadfrom - if (kdp_pmap == 0) - set $kgm_loadval = *(uintptr_t *)$arg0 - else - if ($kgm_x86_abi == 0xe) - set $kgm_loadval = *(uint32_t *)$arg0 - else - if ($kgm_x86_abi == 0xf) - if ($kgm_mtype == $kgm_mtype_i386) - _loadk32m64 $arg0 - set $kgm_loadval = $kgm_k32read64 - else - set $kgm_loadval = *(uint64_t *)$arg0 - end - end - end -end -end - - -#This is necessary since gdb often doesn't do backtraces on x86 correctly -#in the absence of symbols.The code below in showuserstack and -#showx86backtrace also contains several workarouds for the gdb bug where -#gdb stops macro evaluation because of spurious "Cannot read memory" -#errors on x86. These errors appear on ppc as well, but they don't -#always stop macro evaluation. - -set $kgm_cur_frame = 0 -set $kgm_cur_pc = 0 -set $kgm_x86_abi = 0 -define showx86backtrace - if ($kgm_mtype == $kgm_mtype_i386) - set $kgm_frame_reg = $ebp - set $kgm_pc = $eip - set $kgm_ret_off = 4 - end - if ($kgm_mtype == $kgm_mtype_x86_64) - set $kgm_frame_reg = $rbp - set $kgm_pc = $rip - set $kgm_ret_off = 8 - end - - if ($kgm_x86_abi == 0xe) - set $kgm_ret_off = 4 - end - if ($kgm_x86_abi == 0xf) - set $kgm_ret_off = 8 - end - - if ($kgm_cur_frame == 0) - set $kgm_cur_frame = $kgm_frame_reg - end - if ($kgm_cur_pc == 0) - set $kgm_cur_pc = $kgm_pc - end - printf "0: Frame: 0x%016llx PC: 0x%016llx\n", $kgm_cur_frame, $kgm_cur_pc - if (!(($kgm_x86_abi == 0xf) && ($kgm_mtype == $kgm_mtype_i386))) - x/i $kgm_cur_pc - end - set $kgm_tmp_frame = $kgm_cur_frame - set $kgm_cur_frame = 0 - set $kgm_cur_pc = 0 - _loadfrom ($kgm_tmp_frame) - set $kgm_prev_frame = $kgm_loadval - _loadfrom ($kgm_tmp_frame+$kgm_ret_off) - set $kgm_prev_pc = $kgm_loadval - set $kgm_frameno = 1 - while ($kgm_prev_frame != 0) && ($kgm_prev_frame != 0x0000000800000008) - printf "%d: Saved frame: 0x%016llx Saved PC: 0x%016llx\n", $kgm_frameno, $kgm_prev_frame, $kgm_prev_pc - if (!(($kgm_x86_abi == 0xf) && ($kgm_mtype == $kgm_mtype_i386))) - x/i $kgm_prev_pc - end - _loadfrom ($kgm_prev_frame+$kgm_ret_off) - set $kgm_prev_pc = $kgm_loadval - _loadfrom ($kgm_prev_frame) - set $kgm_prev_frame = $kgm_loadval - set $kgm_frameno = $kgm_frameno + 1 - end - set kdp_pmap = 0 - set $kgm_x86_abi = 0 -end - -define showx86backtrace2 - set $kgm_cur_frame = $arg0 - set $kgm_cur_pc = $arg1 - showx86backtrace -end - -define showuserstack - select 0 - if ($kgm_mtype == $kgm_mtype_ppc) - if ($kdp_act_counter == 0) - set $kdpstate = (struct savearea *) kdp.saved_state - end - set $kdp_act_counter = $kdp_act_counter + 1 - set $newact = (struct thread *) $arg0 - _kgm_flush_loop - set $checkpc = $newact->machine->upcb.save_srr0 - if ($checkpc == 0) - echo This activation does not appear to have - echo \20 a valid user context.\n - else - set (struct savearea *) kdp.saved_state=$newact->machine->upcb - set $pc = $checkpc -#flush and update seem to be executed lazily by gdb on Tiger, hence the -#repeated invocations - see 3743135 - _kgm_flush_loop -# This works because the new pmap is used only for reads - set kdp_pmap = $newact->task->map->pmap - _kgm_flush_loop - _kgm_update_loop - bt - resetstacks - _kgm_flush_loop - _kgm_update_loop - resetstacks - _kgm_flush_loop - _kgm_update_loop - end - else - if (($kgm_mtype & $kgm_mtype_x86_mask) == $kgm_mtype_x86_any) - set $newact = (struct thread *) $arg0 - set $newiss = (x86_saved_state_t *) ($newact->machine->iss) - set $kgm_x86_abi = $newiss.flavor - if ($newiss.flavor == 0xf) - set $checkpc = $newiss.uss.ss_64.isf.rip - set $checkframe = $newiss.uss.ss_64.rbp - - else - set $checkpc = $newiss.uss.ss_32.eip - set $checkframe = $newiss.uss.ss_32.ebp - end - - if ($checkpc == 0) - echo This activation does not appear to have - echo \20 a valid user context.\n - else - set $kgm_cur_frame = $checkframe - set $kgm_cur_pc = $checkpc -# When have more than one argument is present, don't print usage - if ( $argc == 1 ) - printf "You may now issue the showx86backtrace command to see the user space backtrace for this thread (" - showptr $arg0 - printf "); you can also examine memory locations in this address space (pmap " - showptr $newact->task->map->pmap - printf ") before issuing the backtrace. This two-step process is necessary to work around various bugs in x86 gdb, which cause it to stop memory evaluation on spurious memory read errors. Additionally, you may need to issue a set kdp_pmap = 0 command after the showx86backtrace completes, to resume reading from the kernel address space.\n" - end - set kdp_pmap = $newact->task->map->pmap - _kgm_flush_loop - _kgm_update_loop - end - else - if ($kgm_mtype == $kgm_mtype_arm) - if (kdp->is_conn > 0) - set $kgm_threadp = (struct thread *)$arg0 - set $kgm_saved_pmap = kdp_pmap - showactheader - showactint $kgm_threadp 0 - set $kgm_thread_pmap = $kgm_threadp->task->map->pmap - set $kgm_thread_sp = $kgm_threadp.machine->PcbData.r[7] - showptrhdrpad - printf " " - showptr 0 - printf " " - showptr $kgm_threadp.machine->PcbData.pc - printf "\n" - set kdp_pmap = $kgm_thread_pmap - while ($kgm_thread_sp != 0) - set $link_register = *($kgm_thread_sp + 4) - showptrhdrpad - printf " " - showptr $kgm_thread_sp - printf " " - showptr $link_register - printf "\n" - set $kgm_thread_sp = *$kgm_thread_sp - end - set kdp_pmap = $kgm_saved_pmap - else - set $kgm_threadp = (struct thread *)$arg0 - showactheader - showactint $kgm_threadp 0 - set $kgm_thread_sp = $kgm_threadp.machine->PcbData.r[7] - while ($kgm_thread_sp != 0) - _map_user_data_from_task $kgm_threadp->task $kgm_thread_sp 8 - set $kgm_thread_sp_window = (int *)$kgm_map_user_window - set $link_register = *($kgm_thread_sp_window + 1) - showptrhdrpad - printf " " - showptr $kgm_thread_sp - printf " " - showptr $link_register - printf "\n" - set $kgm_thread_sp = *$kgm_thread_sp_window - _unmap_user_data_from_task - end - end - else - echo showuserstack not supported on this architecture\n - end - end - end -end -document showuserstack -Syntax: showuserstack
-|This command displays a numeric backtrace for the user space stack of -|the given thread activation. It may, of course, fail to display a -|complete backtrace if portions of the user stack are not mapped in. -|Symbolic backtraces can be obtained either by running gdb on the -|user space binary, or a tool such as "symbolicate". -|Note that while this command works on Panther's gdb, an issue -|with Tiger gdb (3743135) appears to hamper the evaluation of this -|macro in some cases. -end - -define showtaskuserstacks - set $kgm_taskp = (struct task *)$arg0 - set $kgm_head_actp = &($kgm_taskp->threads) - set $kgm_actp = (struct thread *)($kgm_taskp->threads.next) - while $kgm_actp != $kgm_head_actp - printf "For thread " - showptr $kgm_actp - printf "\n" - showuserstack $kgm_actp quiet - if (($kgm_mtype & $kgm_mtype_x86_mask) == $kgm_mtype_x86_any) - showx86backtrace - end - set kdp_pmap=0 - set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) - printf "\n" - end - showuserlibraries $kgm_taskp -end -document showtaskuserstacks -Syntax: (gdb) showtaskuserstacks -| Print out the user stack for each thread in a task, followed by the user libraries. -end - - -define showuserregisters - set $kgm_threadp = (struct thread *)$arg0 - set $kgm_taskp = $kgm_threadp->task - if (($kgm_mtype & $kgm_mtype_x86_mask) == $kgm_mtype_x86_any) - set $newiss = (x86_saved_state_t *) ($kgm_threadp->machine.iss) - set $kgm_x86_abi = $newiss.flavor - if ($newiss.flavor == 0xf) - printf "X86 Thread State (64-bit):\n" - set $kgm_ss64 = $newiss.uss.ss_64 - - printf " rax: " - showuserptr $kgm_ss64.rax - printf " rbx: " - showuserptr $kgm_ss64.rbx - printf " rcx: " - showuserptr $kgm_ss64.rcx - printf " rdx: " - showuserptr $kgm_ss64.rdx - printf "\n" - - printf " rdi: " - showuserptr $kgm_ss64.rdi - printf " rsi: " - showuserptr $kgm_ss64.rsi - printf " rbp: " - showuserptr $kgm_ss64.rbp - printf " rsp: " - showuserptr $kgm_ss64.isf.rsp - printf "\n" - - printf " r8: " - showuserptr $kgm_ss64.r8 - printf " r9: " - showuserptr $kgm_ss64.r9 - printf " r10: " - showuserptr $kgm_ss64.r10 - printf " r11: " - showuserptr $kgm_ss64.r11 - printf "\n" - - printf " r12: " - showuserptr $kgm_ss64.r12 - printf " r13: " - showuserptr $kgm_ss64.r13 - printf " r14: " - showuserptr $kgm_ss64.r14 - printf " r15: " - showuserptr $kgm_ss64.r15 - printf "\n" - - printf " rip: " - showuserptr $kgm_ss64.isf.rip - printf " rfl: " - showuserptr $kgm_ss64.isf.rflags - printf " cr2: " - showuserptr $kgm_ss64.cr2 - printf "\n" - else - printf "X86 Thread State (32-bit):\n" - set $kgm_ss32 = $newiss.uss.ss_32 - - printf " eax: " - showuserptr $kgm_ss32.eax - printf " ebx: " - showuserptr $kgm_ss32.ebx - printf " ecx: " - showuserptr $kgm_ss32.ecx - printf " edx: " - showuserptr $kgm_ss32.edx - printf "\n" - - printf " edi: " - showuserptr $kgm_ss32.edi - printf " esi: " - showuserptr $kgm_ss32.esi - printf " ebp: " - showuserptr $kgm_ss32.ebp - printf " esp: " - showuserptr $kgm_ss32.uesp - printf "\n" - - printf " ss: " - showuserptr $kgm_ss32.ss - printf " efl: " - showuserptr $kgm_ss32.efl - printf " eip: " - showuserptr $kgm_ss32.eip - printf " cs: " - showuserptr $kgm_ss32.cs - printf "\n" - - printf " ds: " - showuserptr $kgm_ss32.ds - printf " es: " - showuserptr $kgm_ss32.es - printf " fs: " - showuserptr $kgm_ss32.fs - printf " gs: " - showuserptr $kgm_ss32.gs - printf "\n" - - printf " cr2: " - showuserptr $kgm_ss32.cr2 - printf "\n" - end - else - if ($kgm_mtype == $kgm_mtype_arm) - printf "ARM Thread State:\n" - set $kgm_pcb = (arm_saved_state_t *) (&$kgm_threadp->machine.PcbData) - - printf " r0: " - showuserptr $kgm_pcb.r[0] - printf " r1: " - showuserptr $kgm_pcb.r[1] - printf " r2: " - showuserptr $kgm_pcb.r[2] - printf " r3: " - showuserptr $kgm_pcb.r[3] - printf "\n" - - printf " r4: " - showuserptr $kgm_pcb.r[4] - printf " r5: " - showuserptr $kgm_pcb.r[5] - printf " r6: " - showuserptr $kgm_pcb.r[6] - printf " r7: " - showuserptr $kgm_pcb.r[7] - printf "\n" - - printf " r8: " - showuserptr $kgm_pcb.r[8] - printf " r9: " - showuserptr $kgm_pcb.r[9] - printf " r10: " - showuserptr $kgm_pcb.r[10] - printf " r11: " - showuserptr $kgm_pcb.r[11] - printf "\n" - - printf " ip: " - showuserptr $kgm_pcb.r[12] - printf " sp: " - showuserptr $kgm_pcb.sp - printf " lr: " - showuserptr $kgm_pcb.lr - printf " pc: " - showuserptr $kgm_pcb.pc - printf "\n" - - printf " cpsr: " - showuserptr $kgm_pcb.cpsr - printf "\n" - else - echo showuserregisters not supported on this architecture\n - end - end -end -document showuserregisters -Syntax: showuserstack
-|This command displays the last known user register state -|for the thread. This map not be correct for cases where -|the thread is currently executing in userspace. However -|for threads that have entered the kernel (either explicitly -|with a system call or implicitly with a fault), it should -|be accurate -end - -define showtaskuserregisters - set $kgm_taskp = (struct task *)$arg0 - set $kgm_head_actp = &($kgm_taskp->threads) - set $kgm_actp = (struct thread *)($kgm_taskp->threads.next) - while $kgm_actp != $kgm_head_actp - printf "For thread " - showptr $kgm_actp - printf "\n" - showuserregisters $kgm_actp - set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) - printf "\n" - end -end -document showtaskuserregisters -Syntax: (gdb) showtaskuserregisters -| Print out the user registers for each thread in a task -end - -define kdp-reboot -# Alternatively, set *(*(unsigned **) 0x2498) = 1 -# (or 0x5498 on PPC, 0xffffff8000002928 on x86_64, 0xffff049c on arm) - manualhdrint $kgm_kdp_pkt_hostreboot - detach -end - -document kdp-reboot -Syntax: kdp-reboot -|Reboot the remote target machine; not guaranteed to succeed. -end - -define kdpversionint - # set up the manual KDP packet - set manual_pkt.input = 0 - set manual_pkt.len = sizeof(kdp_version_req_t) - set $kgm_pkt = (kdp_version_req_t *)&manual_pkt.data - set $kgm_pkt->hdr.request = KDP_VERSION - set $kgm_pkt->hdr.len = sizeof(kdp_version_req_t) - set $kgm_pkt->hdr.is_reply = 0 - set $kgm_pkt->hdr.seq = 0 - set $kgm_pkt->hdr.key = 0 - set manual_pkt.input = 1 - # dummy to make sure manual packet is executed - set $kgm_dummy = &_mh_execute_header - set $kgm_pkt = (kdp_version_reply_t *)&manual_pkt.data - set $kgm_kdp_version = $kgm_pkt->version - set $kgm_kdp_feature = $kgm_pkt->feature -end - -define kdp-version - kdpversionint - printf "KDP VERSION = %d, FEATURE = 0x%x\n", $kgm_kdp_version, $kgm_kdp_feature -end - -document kdp-version -Syntax: kdp-version -|Get the KDP protocol version being used by the kernel. -end - -define dumpinfoint - # set up the manual KDP packet - set manual_pkt.input = 0 - - set manual_pkt.len = sizeof(kdp_dumpinfo_req_t) - set $kgm_pkt = (kdp_dumpinfo_req_t *)&manual_pkt.data - set $kgm_pkt->hdr.request = KDP_DUMPINFO - set $kgm_pkt->hdr.len = sizeof(kdp_dumpinfo_req_t) - set $kgm_pkt->hdr.is_reply = 0 - set $kgm_pkt->hdr.seq = 0 - set $kgm_pkt->hdr.key = 0 - set $kgm_pkt->type = $arg0 - set $kgm_pkt->name = "" - set $kgm_pkt->destip = "" - set $kgm_pkt->routerip = "" - set $kgm_pkt->port = 0 - - if $argc > 1 - set $kgm_pkt->name = "$arg1" - end - if $argc > 2 - set $kgm_pkt->destip = "$arg2" - end - if $argc > 3 - set $kgm_pkt->routerip = "$arg3" - end - if $argc > 4 - set $kgm_pkt->port = $arg4 - end - - set manual_pkt.input = 1 - # dummy to make sure manual packet is executed - set $kgm_dummy = &_mh_execute_header -end - -define sendcore - if $argc > 1 - dumpinfoint KDP_DUMPINFO_CORE $arg1 $arg0 - else - dumpinfoint KDP_DUMPINFO_CORE \0 $arg0 - end -end - -document sendcore -Syntax: sendcore [filename] -|Configure the kernel to transmit a kernel coredump to a server (kdumpd) -|at the specified IP address. This is useful when the remote target has -|not been previously configured to transmit coredumps, and you wish to -|preserve kernel state for later examination. NOTE: You must issue a "continue" -|command after using this macro to trigger the kernel coredump. The kernel -|will resume waiting in the debugger after completion of the coredump. You -|may disable coredumps by executing the "disablecore" macro. You can -|optionally specify the filename to be used for the generated core file. -end - -define sendsyslog - if $argc > 1 - dumpinfoint KDP_DUMPINFO_SYSTEMLOG $arg1 $arg0 - else - dumpinfoint KDP_DUMPINFO_SYSTEMLOG \0 $arg0 - end -end - -document sendsyslog -Syntax: sendsyslog [filename] -|Configure the kernel to transmit a kernel system log to a server (kdumpd) -|at the specified IP address. NOTE: You must issue a "continue" -|command after using this macro to trigger the kernel system log. The kernel -|will resume waiting in the debugger after completion. You can optionally -|specify the name to be used for the generated system log. -end - -define sendpaniclog - if panicstr - if $argc > 1 - dumpinfoint KDP_DUMPINFO_PANICLOG $arg1 $arg0 - else - dumpinfoint KDP_DUMPINFO_PANICLOG \0 $arg0 - end - else - printf "No panic log available.\n" - end -end - -document sendpaniclog -Syntax: sendpaniclog [filename] -|Configure the kernel to transmit a kernel paniclog to a server (kdumpd) -|at the specified IP address. NOTE: You must issue a "continue" -|command after using this macro to trigger the kernel panic log. The kernel -|will resume waiting in the debugger after completion. You can optionally -|specify the name to be used for the generated panic log. -end - -define getdumpinfo - dumpinfoint KDP_DUMPINFO_GETINFO - set $kgm_dumpinfo = (kdp_dumpinfo_reply_t *) manual_pkt.data - if $kgm_dumpinfo->type & KDP_DUMPINFO_REBOOT - printf "System will reboot after kernel info gets dumped.\n" - else - printf "System will not reboot after kernel info gets dumped.\n" - end - if $kgm_dumpinfo->type & KDP_DUMPINFO_NORESUME - printf "System will allow a re-attach after a KDP disconnect.\n" - else - printf "System will resume after a KDP disconnect.\n" - end - set $kgm_dumpinfo_type = $kgm_dumpinfo->type & KDP_DUMPINFO_MASK - if $kgm_dumpinfo_type == KDP_DUMPINFO_DISABLE - printf "Kernel not setup for remote dumps.\n" - else - printf "Remote dump type: " - if $kgm_dumpinfo_type == KDP_DUMPINFO_CORE - printf "Core file\n" - end - if $kgm_dumpinfo_type == KDP_DUMPINFO_PANICLOG - printf "Panic log\n" - end - if $kgm_dumpinfo_type == KDP_DUMPINFO_SYSTEMLOG - printf "System log\n" - end - - printf "Name: " - if $kgm_dumpinfo->name[0] == '\0' - printf "(autogenerated)\n" - else - printf "%s\n", $kgm_dumpinfo->name - end - - printf "Network Info: %s[%d] ", $kgm_dumpinfo->destip, $kgm_dumpinfo->port - if $kgm_dumpinfo->routerip[0] == '\0' - printf "\n" - else - printf "Router: %s\n", $kgm_dumpinfo->routerip - end - end -end - -document getdumpinfo -Syntax: getdumpinfo -|Retrieve the current remote dump settings. -end - -define setdumpinfo - dumpinfoint KDP_DUMPINFO_SETINFO $arg0 $arg1 $arg2 $arg3 -end - -document setdumpinfo -Syntax: setdumpinfo -|Configure the current remote dump settings. Specify \0 if you -|want to use the defaults (filename) or previously configured -|settings (ip/router). Specify 0 for the port if you wish to -|use the previously configured/default setting for that. -end - -define disablecore - dumpinfoint KDP_DUMPINFO_DISABLE -end - -document disablecore -Syntax: disablecore -|Reconfigures the kernel so that it no longer transmits kernel coredumps. This -|complements the "sendcore" macro, but it may be used if the kernel has been -|configured to transmit coredumps through boot-args as well. -end - -define switchtocorethread - set $newact = (struct thread *) $arg0 - select 0 - if ($newact->kernel_stack == 0) - echo This thread does not have a stack.\n - echo continuation: - output/a (unsigned) $newact.continuation - echo \n - else - if ($kgm_mtype == $kgm_mtype_ppc) - loadcontext $newact->machine->pcb - flushstack - set $pc = $newact->machine->pcb.save_srr0 - else - if (($kgm_mtype & $kgm_mtype_x86_mask) == $kgm_mtype_x86_any) - set $kgm_cstatep = (struct x86_kernel_state *) \ - ($newact->kernel_stack + kernel_stack_size \ - - sizeof(struct x86_kernel_state)) - loadcontext $kgm_cstatep - flushstack - else - echo switchtocorethread not supported on this architecture\n - end - end - showcontext_int - end -end - -document switchtocorethread -Syntax: switchtocorethread
-| The corefile equivalent of "switchtoact". When debugging a kernel coredump -| file, this command can be used to examine the execution context and stack -| trace for a given thread activation. For example, to view the backtrace -| for a thread issue "switchtocorethread
", followed by "bt". -| Before resuming execution, issue a "resetcorectx" command, to -| return to the original execution context. Note that this command -| requires gdb support, as documented in Radar 3401283. -end - -define loadcontext - select 0 - if ($kgm_mtype == $kgm_mtype_ppc) - set $kgm_contextp = (struct savearea *) $arg0 - set $pc = $kgm_contextp.save_srr0 - set $r1 = $kgm_contextp.save_r1 - set $lr = $kgm_contextp.save_lr - - set $r2 = $kgm_contextp.save_r2 - set $r3 = $kgm_contextp.save_r3 - set $r4 = $kgm_contextp.save_r4 - set $r5 = $kgm_contextp.save_r5 - set $r6 = $kgm_contextp.save_r6 - set $r7 = $kgm_contextp.save_r7 - set $r8 = $kgm_contextp.save_r8 - set $r9 = $kgm_contextp.save_r9 - set $r10 = $kgm_contextp.save_r10 - set $r11 = $kgm_contextp.save_r11 - set $r12 = $kgm_contextp.save_r12 - set $r13 = $kgm_contextp.save_r13 - set $r14 = $kgm_contextp.save_r14 - set $r15 = $kgm_contextp.save_r15 - set $r16 = $kgm_contextp.save_r16 - set $r17 = $kgm_contextp.save_r17 - set $r18 = $kgm_contextp.save_r18 - set $r19 = $kgm_contextp.save_r19 - set $r20 = $kgm_contextp.save_r20 - set $r21 = $kgm_contextp.save_r21 - set $r22 = $kgm_contextp.save_r22 - set $r23 = $kgm_contextp.save_r23 - set $r24 = $kgm_contextp.save_r24 - set $r25 = $kgm_contextp.save_r25 - set $r26 = $kgm_contextp.save_r26 - set $r27 = $kgm_contextp.save_r27 - set $r28 = $kgm_contextp.save_r28 - set $r29 = $kgm_contextp.save_r29 - set $r30 = $kgm_contextp.save_r30 - set $r31 = $kgm_contextp.save_r31 - - set $cr = $kgm_contextp.save_cr - set $ctr = $kgm_contextp.save_ctr - else - if ($kgm_mtype == $kgm_mtype_i386) - set $kgm_contextp = (struct x86_kernel_state *) $arg0 - set $ebx = $kgm_contextp->k_ebx - set $ebp = $kgm_contextp->k_ebp - set $edi = $kgm_contextp->k_edi - set $esi = $kgm_contextp->k_esi - set $eip = $kgm_contextp->k_eip - set $pc = $kgm_contextp->k_eip - else - if ($kgm_mtype == $kgm_mtype_x86_64) - set $kgm_contextp = (struct x86_kernel_state *) $arg0 - set $rbx = $kgm_contextp->k_rbx - set $rbp = $kgm_contextp->k_rbp - set $r12 = $kgm_contextp->k_r12 - set $r13 = $kgm_contextp->k_r13 - set $r14 = $kgm_contextp->k_r14 - set $r15 = $kgm_contextp->k_r15 - set $rip = $kgm_contextp->k_rip - set $pc = $kgm_contextp->k_rip - else - echo loadcontext not supported on this architecture\n - end - end - end -end - -define resetcorectx - select 0 - if ($kgm_mtype == $kgm_mtype_ppc) - set $kgm_corecontext = (struct savearea *) kdp.saved_state - loadcontext $kgm_corecontext - else - if ($kgm_mtype == $kgm_mtype_i386) - set $kdpstatep = (struct x86_saved_state32 *) kdp.saved_state - set $ebx = $kdpstatep->ebx - set $ebp = $kdpstatep->ebp - set $edi = $kdpstatep->edi - set $esi = $kdpstatep->esi - set $eip = $kdpstatep->eip - set $eax = $kdpstatep->eax - set $ecx = $kdpstatep->ecx - set $edx = $kdpstatep->edx - flushregs - flushstack - set $pc = $kdpstatep->eip - update - else - echo resetcorectx not supported on this architecture\n - end - end - showcontext_int -end - -document resetcorectx -Syntax: resetcorectx -| The corefile equivalent of "resetctx". Returns to the original -| execution context (that of the active thread at the time of the NMI or -| panic). This command should be issued if you wish to resume -| execution after using the "switchtocorethread" command. -end - -#Helper function for "showallgdbstacks" - -define showgdbthread - printf " 0x%08x ", $arg0 - set $kgm_thread = *(struct thread *)$arg0 - printf "0x%08x ", $arg0 - printf "%3d ", $kgm_thread.sched_pri - set $kgm_state = $kgm_thread.state - if $kgm_state & 0x80 - printf "I" - end - if $kgm_state & 0x40 - printf "P" - end - if $kgm_state & 0x20 - printf "A" - end - if $kgm_state & 0x10 - printf "H" - end - if $kgm_state & 0x08 - printf "U" - end - if $kgm_state & 0x04 - printf "R" - end - if $kgm_state & 0x02 - printf "S" - end - if $kgm_state & 0x01 - printf "W\t" - printf "0x%08x ", $kgm_thread.wait_queue - output /a (unsigned) $kgm_thread.wait_event - if ($kgm_thread.uthread != 0) - set $kgm_uthread = (struct uthread *)$kgm_thread.uthread - if ($kgm_uthread->uu_wmesg != 0) - printf " \"%s\"", $kgm_uthread->uu_wmesg - end - end - end - if $arg1 != 0 - if ($kgm_thread.kernel_stack != 0) - if ($kgm_thread.reserved_stack != 0) - printf "\n\t\treserved_stack=0x%08x", $kgm_thread.reserved_stack - end - printf "\n\t\tkernel_stack=0x%08x", $kgm_thread.kernel_stack - if ($kgm_mtype == $kgm_mtype_ppc) - set $mysp = $kgm_thread.machine.pcb->save_r1 - end - if ($kgm_mtype == $kgm_mtype_i386) - set $kgm_statep = (struct x86_kernel_state *) \ - ($kgm_thread->kernel_stack + kernel_stack_size \ - - sizeof(struct x86_kernel_state)) - set $mysp = $kgm_statep->k_ebp - end - if ($kgm_mtype == $kgm_mtype_arm) - if (((unsigned long)$r7 < ((unsigned long) ($kgm_thread->kernel_stack+kernel_stack_size))) \ - && ((unsigned long)$r7 > (unsigned long) ($kgm_thread->kernel_stack))) - set $mysp = $r7 - else - set $kgm_statep = (struct arm_saved_state *)$kgm_thread.machine.kstackptr - set $mysp = $kgm_statep->r[7] - end - end - set $prevsp = 0 - printf "\n\t\tstacktop=0x%08x", $mysp - if ($arg2 == 0) - switchtoact $arg0 - else - switchtocorethread $arg0 - end - bt - else - printf "\n\t\t\tcontinuation=" - output /a (unsigned) $kgm_thread.continuation - end - printf "\n" - else - printf "\n" - end -end - -#Use of this macro is currently (8/04) blocked by the fact that gdb -#stops evaluating macros when encountering an error, such as a failure -#to read memory from a certain location. Until this issue (described in -#3758949) is addressed, evaluation of this macro may stop upon -#encountering such an error. - -define showallgdbstacks - set $kgm_head_taskp = &tasks - set $kgm_taskp = (struct task *)($kgm_head_taskp->next) - while $kgm_taskp != $kgm_head_taskp - showtaskheader - showtaskint $kgm_taskp - set $kgm_head_actp = &($kgm_taskp->threads) - set $kgm_actp = (struct thread *)($kgm_taskp->threads.next) - while $kgm_actp != $kgm_head_actp - showactheader - showgdbthread $kgm_actp 1 0 - set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) - end - printf "\n" - set $kgm_taskp = (struct task *)($kgm_taskp->tasks.next) - end - resetctx -end - -document showallgdbstacks -Syntax: showallgdbstacks -| An alternative to "showallstacks". Iterates through the task list and -| displays a gdb generated backtrace for each kernel thread. It is -| advantageous in that it is much faster than "showallstacks", and -| decodes function call arguments and displays source level traces, but -| it has the drawback that it doesn't determine if frames belong to -| functions from kernel extensions, as with "showallstacks". -| This command may terminate prematurely because of a gdb bug -| (Radar 3758949), which stops macro evaluation on memory read -| errors. -end - -define showallgdbcorestacks - select 0 - set $kgm_head_taskp = &tasks - set $kgm_taskp = (struct task *)($kgm_head_taskp->next) - while $kgm_taskp != $kgm_head_taskp - showtaskheader - showtaskint $kgm_taskp - set $kgm_head_actp = &($kgm_taskp->threads) - set $kgm_actp = (struct thread *)($kgm_taskp->threads.next) - while $kgm_actp != $kgm_head_actp - showactheader - showgdbthread $kgm_actp 1 1 - set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) - end - printf "\n" - set $kgm_taskp = (struct task *)($kgm_taskp->tasks.next) - end - resetcorectx -end - - -document showallgdbcorestacks -Syntax: showallgdbcorestacks -|Corefile version of "showallgdbstacks" -end - - -define switchtouserthread - select 0 - if ($kgm_mtype == $kgm_mtype_ppc) - if ($kdp_act_counter == 0) - set $kdpstate = (struct savearea *) kdp.saved_state - end - set $kdp_act_counter = $kdp_act_counter + 1 - set $newact = (struct thread *) $arg0 - _kgm_flush_loop - set $checkpc = $newact->machine->upcb.save_srr0 - if ($checkpc == 0) - echo This activation does not appear to have - echo \20 a valid user context.\n - else - set (struct savearea *) kdp.saved_state=$newact->machine->upcb - set $pc = $checkpc -#flush and update seem to be executed lazily by gdb on Tiger, hence the -#repeated invocations - see 3743135 - _kgm_flush_loop -# This works because the new pmap is used only for reads - set kdp_pmap = $newact->task->map->pmap - _kgm_flush_loop - _kgm_update_loop - end - else - echo switchtouserthread not implemented for this architecture.\n - end -end - -document switchtouserthread -Syntax: switchtouserthread
-| Analogous to switchtoact, but switches to the user context of a -| specified thread address. Similar to the "showuserstack" -| command, but this command does not return gdb to the kernel context -| immediately. This is to assist with the following (rather risky) -| manoeuvre - upon switching to the user context and virtual address -| space, the user may choose to call remove-symbol-file on the -| mach_kernel symbol file, and then add-symbol-file on the user space -| binary's symfile. gdb can then generate symbolic backtraces -| for the user space thread. To return to the -| kernel context and virtual address space, the process must be -| reversed, i.e. call remove-symbol-file on the user space symbols, and -| then add-symbol-file on the appropriate mach_kernel, and issue the -| "resetstacks" command. Note that gdb may not react kindly to all these -| symbol file switches. The same restrictions that apply to "showuserstack" -| apply here - pages that have been paged out cannot be read while in the -| debugger context, so backtraces may terminate early. -| If the virtual addresses in the stack trace do not conflict with those -| of symbols in the kernel's address space, it may be sufficient to -| just do an add-symbol-file on the user space binary's symbol file. -| Note that while this command works on Panther's gdb, an issue -| with Tiger gdb (3743135) appears to hamper the evaluation of this -| macro in some cases. -end - -define showmetaclass - set $kgm_metaclassp = (OSMetaClass *)$arg0 - printf "%-5d", $kgm_metaclassp->instanceCount - printf "x %5d bytes", $kgm_metaclassp->classSize - printf " %s\n", $kgm_metaclassp->className->string -end - -define showstring - printf "\"%s\"", ((OSString *)$arg0)->string -end - -define shownumber - printf "%lld", ((OSNumber *)$arg0)->value -end - -define showboolean - if ($arg0 == gOSBooleanFalse) - printf "No" - else - printf "Yes" - end -end - -define showdatabytes - set $kgm_data = (OSData *)$arg0 - - printf "<" - set $kgm_datap = (const unsigned char *) $kgm_data->data - set $kgm_idx = 0 - while ( $kgm_idx < $kgm_data->length ) - printf "%02X", *$kgm_datap - set $kgm_datap = $kgm_datap + 1 - set $kgm_idx = $kgm_idx + 1 - end - printf ">\n" -end - -define showdata - set $kgm_data = (OSData *)$arg0 - - printf "<" - set $kgm_datap = (const unsigned char *) $kgm_data->data - - set $kgm_printstr = 0 - if (0 == (3 & (unsigned int)$kgm_datap) && ($kgm_data->length >= 3)) - set $kgm_bytes = *(unsigned int *) $kgm_datap - if (0xffff0000 & $kgm_bytes) - set $kgm_idx = 0 - set $kgm_printstr = 1 - while ($kgm_idx++ < 4) - set $kgm_bytes = $kgm_bytes >> 8 - set $kgm_char = 0xff & $kgm_bytes - if ($kgm_char && (($kgm_char < 0x20) || ($kgm_char > 0x7e))) - set $kgm_printstr = 0 - end - end - end - end - - set $kgm_idx = 0 - if ($kgm_printstr) - set $kgm_quoted = 0 - while ($kgm_idx < $kgm_data->length) - set $kgm_char = $kgm_datap[$kgm_idx++] - if ($kgm_char) - if (0 == $kgm_quoted) - set $kgm_quoted = 1 - if ($kgm_idx > 1) - printf ",\"" - else - printf "\"" - end - end - printf "%c", $kgm_char - else - if ($kgm_quoted) - set $kgm_quoted = 0 - printf "\"" - end - end - end - if ($kgm_quoted) - printf "\"" - end - else - if (0 == (3 & (unsigned int)$kgm_datap)) - while (($kgm_idx + 3) <= $kgm_data->length) - printf "%08x", *(unsigned int *) &$kgm_datap[$kgm_idx] - set $kgm_idx = $kgm_idx + 4 - end - end - while ($kgm_idx < $kgm_data->length) - printf "%02x", $kgm_datap[$kgm_idx++] - end - end - printf ">" -end - -define showdictionaryint - set $kgm$arg0_dict = (OSDictionary *)$arg1 - - printf "{" - set $kgm$arg0_idx = 0 - while ($kgm$arg0_idx < $kgm$arg0_dict->count) - set $kgm_obj = $kgm$arg0_dict->dictionary[$kgm$arg0_idx].key - showobjectint _$arg0 $kgm_obj - printf "=" - set $kgm_obj = $kgm$arg0_dict->dictionary[$kgm$arg0_idx++].value - showobjectint _$arg0 $kgm_obj - if ($kgm$arg0_idx < $kgm$arg0_dict->count) - printf "," - end - end - printf "}" -end - -define indent - set $kgm_idx = 0 - while ($kgm_idx < $arg0) - if ($arg1 & (1 << $kgm_idx++)) - printf "| " - else - printf " " - end - end -end - -define showregdictionary - indent $kgm_reg_depth+2 $arg1 - printf "{\n" - - set $kgm_reg_idx = 0 - while ($kgm_reg_idx < $arg0->count) - indent $kgm_reg_depth+2 $arg1 - printf " " - set $kgm_obj = $arg0->dictionary[$kgm_reg_idx].key - showobjectint _ $kgm_obj - printf " = " - - set $kgm_obj = $arg0->dictionary[$kgm_reg_idx++].value - showobjectint _ $kgm_obj - printf "\n" - end - indent $kgm_reg_depth+2 $arg1 - printf "}\n" -end - - -define showorderedsetarrayint - set $kgm$arg0_array = (_Element *)$arg1 - set $kgm$arg0_count = $arg2 - - set $kgm$arg0_idx = 0 - while ($kgm$arg0_idx < $kgm$arg0_count) - set $kgm_obj = $kgm$arg0_array[$kgm$arg0_idx++] - showobjectint _$arg0 $kgm_obj - if ($kgm$arg0_idx < $kgm$arg0_count) - printf "," - end - end -end - -define showorderedsetint - set $kgm_array = ((OSOrderedSet *)$arg1)->array - set $count = ((OSOrderedSet *)$arg1)->count - printf "[" - showorderedsetarrayint $arg0 $kgm_array $count - printf "]" -end - -define showarraysetint - set $kgm$arg0_array = (OSArray *)$arg1 - - set $kgm$arg0_idx = 0 - while ($kgm$arg0_idx < $kgm$arg0_array->count) - set $kgm_obj = $kgm$arg0_array->array[$kgm$arg0_idx++] - showobjectint _$arg0 $kgm_obj - if ($kgm$arg0_idx < $kgm$arg0_array->count) - printf "," - end - end -end - -define showarrayint - printf "(" - showarraysetint $arg0 $arg1 - printf ")" -end - -define showsetint - set $kgm_array = ((OSSet *)$arg1)->members - printf "[" - showarraysetint $arg0 $kgm_array - printf "]" -end - - -define showobjectint - set $kgm_obj = (OSObject *) $arg1 - set $kgm_vt = *((void **) $arg1) - - if ($kgm_lp64 || $kgm_mtype == $kgm_mtype_arm) - set $kgm_vt = $kgm_vt - 2 * sizeof(void *) - end - - if ($kgm_show_object_addrs) - printf "`object " - showptr $arg1 - printf ", vt " - output /a (unsigned long) $kgm_vt - if ($kgm_show_object_retain) - printf ", retain count %d, container retain %d", (0xffff & $kgm_obj->retainCount), $kgm_obj->retainCount >> 16 - end - printf "` " - end - - # No multiple-inheritance - set $kgm_shown = 0 - if ($kgm_vt == &_ZTV8OSString) - showstring $arg1 - set $kgm_shown = 1 - end - if ($kgm_vt == &_ZTV8OSSymbol) - showstring $arg1 - set $kgm_shown = 1 - end - if ($kgm_vt == &_ZTV8OSNumber) - shownumber $arg1 - set $kgm_shown = 1 - end - if ($kgm_vt == &_ZTV6OSData) - if $kgm_show_data_alwaysbytes == 1 - showdatabytes $arg1 - else - showdata $arg1 - end - set $kgm_shown = 1 - end - if ($kgm_vt == &_ZTV9OSBoolean) - showboolean $arg1 - set $kgm_shown = 1 - end - if ($kgm_vt == &_ZTV12OSDictionary) - showdictionaryint _$arg0 $arg1 - set $kgm_shown = 1 - end - if ($kgm_vt == &_ZTV7OSArray) - showarrayint _$arg0 $arg1 - set $kgm_shown = 1 - end - if ($kgm_vt == &_ZTV5OSSet) - showsetint _$arg0 $arg1 - set $kgm_shown = 1 - end - if ($kgm_vt == &_ZTV12OSOrderedSet) - showorderedsetint _$arg0 $arg1 - set $kgm_shown = 1 - end - - if ($kgm_shown != 1) - if ($kgm_show_object_addrs == 0) - printf "`object " - showptr $arg1 - printf ", vt " - output /a (unsigned long) $kgm_vt - printf "`" - end - end -end - -define showobject - set $kgm_save = $kgm_show_object_addrs - set $kgm_show_object_addrs = 1 - set $kgm_show_object_retain = 1 - showobjectint _ $arg0 - set $kgm_show_object_addrs = $kgm_save - set $kgm_show_object_retain = 0 - printf "\n" -end -document showobject -Syntax: (gdb) showobject -| Show info about an OSObject - its vtable ptr and retain count. -| If the object is a simple container class, more info will be shown. -end - -define dictget - set $kgm_dictp = (OSDictionary *)$arg0 - set $kgm_keyp = (const OSSymbol *)$arg1 - set $kgm_idx = 0 - set $kgm_result = 0 - while (($kgm_idx < $kgm_dictp->count) && ($kgm_result == 0)) - if ($kgm_keyp == $kgm_dictp->dictionary[$kgm_idx].key) - set $kgm_result = $kgm_dictp->dictionary[$kgm_idx].value - end - set $kgm_idx = $kgm_idx + 1 - end -end - - -define _registryentryrecurseinit - set $kgm_re = (IOService *)$arg1 - set $kgm$arg0_stack = (unsigned long long) $arg2 - - if ($arg3) - set $kgm$arg0_stack = $kgm$arg0_stack | (1ULL << $kgm_reg_depth) - else - set $kgm$arg0_stack = $kgm$arg0_stack & ~(1ULL << $kgm_reg_depth) - end - - dictget $kgm_re->fRegistryTable $kgm_childkey - set $kgm$arg0_child_array = (OSArray *) $kgm_result - - if ($kgm$arg0_child_array) - set $kgm$arg0_child_count = $kgm$arg0_child_array->count - else - set $kgm$arg0_child_count = 0 - end - - if ($kgm$arg0_child_count) - set $kgm$arg0_stack = $kgm$arg0_stack | (2ULL << $kgm_reg_depth) - else - set $kgm$arg0_stack = $kgm$arg0_stack & ~(2ULL << $kgm_reg_depth) - end -end - -define findregistryentryrecurse - set $kgm_registry_entry = 0 - _registryentryrecurseinit $arg0 $arg1 $arg2 $arg3 - - dictget $kgm_re->fRegistryTable $kgm_namekey - if ($kgm_result == 0) - dictget $kgm_re->fRegistryTable gIONameKey - end - if ($kgm_result == 0) - dictget $kgm_re->fPropertyTable gIOClassKey - end - - if ($kgm_result != 0) - set $str = ((OSString *) $kgm_result)->string - strcmp_nomalloc $str $kgm_reg_find_str0 $kgm_reg_find_str1 $kgm_reg_find_str2 $kgm_reg_find_str3 $kgm_reg_find_str4 $kgm_reg_find_str5 $kgm_reg_find_str6 $kgm_reg_find_str7 $kgm_reg_find_str8 - if $kgm_findregistry_verbose - echo . - end - - if $kgm_strcmp_result == 0 - if $kgm_findregistry_verbose - printf "\n%s:\n | ", ((OSString *) $kgm_result)->string - showobject $kgm_re - printf " | " - print $kgm_re - end - - # don't populate $kgm_registry_entry if we want to show everything - if !$kgm_findregistry_continue - set $kgm_registry_entry = $kgm_re - end - end - end - - # recurse - if (!$kgm_registry_entry && ($kgm$arg0_child_count != 0)) - set $kgm_reg_depth = $kgm_reg_depth + 1 - set $kgm$arg0_child_idx = 0 - - while ($kgm$arg0_child_idx < $kgm$arg0_child_count) - set $kgm_re = $kgm$arg0_child_array->array[$kgm$arg0_child_idx++] - set $kgm_more_sib = ($kgm$arg0_child_idx < $kgm$arg0_child_count) - if $kgm_reg_depth >= $kgm_reg_depth_max + 1 - loop_break - end - findregistryentryrecurse _$arg0 $kgm_re $kgm$arg0_stack $kgm_more_sib - if $kgm_registry_entry - loop_break - end - end - set $kgm_reg_depth = $kgm_reg_depth - 1 - end -end - -define findregdictvalue - set $kgm_registry_value = 0 - set $kgm_reg_idx = 0 - while ($kgm_reg_idx < $arg0->count) - set $kgm_obj = $arg0->dictionary + $kgm_reg_idx - set $str = ((OSString *)$kgm_obj->key)->string - strcmp_nomalloc $str $kgm_reg_find_str0 $kgm_reg_find_str1 $kgm_reg_find_str2 $kgm_reg_find_str3 $kgm_reg_find_str4 $kgm_reg_find_str5 $kgm_reg_find_str6 $kgm_reg_find_str7 $kgm_reg_find_str8 - - if $kgm_strcmp_result == 0 - set $kgm_registry_value = $kgm_obj->value - if $kgm_findregistry_verbose - showobject $kgm_registry_value - print $kgm_registry_value - end - loop_break - end - set $kgm_reg_idx = $kgm_reg_idx + 1 - end -end - -define setfindregistrystr - set $kgm_reg_find_str0 = 0 - set $kgm_reg_find_str1 = 0 - set $kgm_reg_find_str2 = 0 - set $kgm_reg_find_str3 = 0 - set $kgm_reg_find_str4 = 0 - set $kgm_reg_find_str5 = 0 - set $kgm_reg_find_str6 = 0 - set $kgm_reg_find_str7 = 0 - set $kgm_reg_find_str8 = 0 - - if $argc > 0 - set $kgm_reg_find_str0 = $arg0 - end - if $argc > 1 - set $kgm_reg_find_str1 = $arg1 - end - if $argc > 2 - set $kgm_reg_find_str2 = $arg2 - end - if $argc > 3 - set $kgm_reg_find_str3 = $arg3 - end - if $argc > 4 - set $kgm_reg_find_str4 = $arg4 - end - if $argc > 5 - set $kgm_reg_find_str5 = $arg5 - end - if $argc > 6 - set $kgm_reg_find_str6 = $arg6 - end - if $argc > 7 - set $kgm_reg_find_str7 = $arg7 - end - if $argc > 8 - set $kgm_reg_find_str8 = $arg8 - end -end - -document setfindregistrystr -Syntax: (gdb) setfindregistrystr [a] [b] [c] [d] [e] [f] [g] [h] [i] -| Store an encoded string into up to 9 arguments for use by -| findregistryprop or findregistryentry. The arguments are created -| through calls to strcmp_arg_pack64 -end - -define _findregistryprop - set $reg = (IOService *) $arg0 - set $kgm_props = $reg->fPropertyTable - set $kgm_findregistry_verbose = 0 - - findregdictvalue $kgm_props -end - -define findregistryprop - set $reg = (IOService *) $arg0 - set $kgm_props = $reg->fPropertyTable - - set $kgm_findregistry_verbose = 1 - findregdictvalue $kgm_props -end - -document findregistryprop -Syntax: (gdb) findregistryprop -| Given a registry entry, print out the contents for the property that matches -| the encoded string specified via setfindregistrystr. -| -| For example, the following will print out the "intel-pic" property stored in -| the AppleACPIPlatformExpert registry entry $pe_entry: -| strcmp_arg_pack64 'i' 'n' 't' 'e' 'l' '-' 'p' 'i' -| set $intel_pi = $kgm_strcmp_arg -| strcmp_arg_pack64 'c' 0 0 0 0 0 0 0 -| set $c = $kgm_strcmp_arg -| setfindregistrystr $intel_pi $c -| findregistryprop $pe_entry -end - -define findregistryentryint - if !$kgm_reg_plane - set $kgm_reg_plane = (IORegistryPlane *) gIOServicePlane - end - - if !$kgm_reg_plane - printf "Please load kgmacros after KDP attaching to the target.\n" - else - set $kgm_namekey = (OSSymbol *) $kgm_reg_plane->nameKey - set $kgm_childkey = (OSSymbol *) $kgm_reg_plane->keys[1] - if $kgm_findregistry_verbose - printf "Searching" - end - findregistryentryrecurse _ $arg0 0 0 - end -end - -define _findregistryentry - set $kgm_findregistry_verbose = 0 - set $kgm_findregistry_continue = 0 - set $kgm_reg_depth = 0 - - findregistryentryint gRegistryRoot -end - -define findregistryentry - set $kgm_findregistry_verbose = 1 - set $kgm_findregistry_continue = 0 - set $kgm_reg_depth = 0 - - findregistryentryint gRegistryRoot -end - -define findregistryentries - set $kgm_findregistry_verbose = 1 - set $kgm_findregistry_continue = 1 - set $kgm_reg_depth = 0 - - findregistryentryint gRegistryRoot -end - -document findregistryentry -Syntax: (gdb) findregistryentry -| Search for a registry entry that matches the encoded string specified through -| setfindregistrystr. You can alter the search depth through use of -| $kgm_reg_depth_max. -| -| For example, the following will pull out the AppleACPIPlatformExpert registry -| entry: -| strcmp_arg_pack64 'A' 'p' 'p' 'l' 'e' 'A' 'C' 'P' -| set $AppleACP = $kgm_strcmp_arg -| strcmp_arg_pack64 'I' 'P' 'l' 'a' 't' 'f' 'o' 'r' -| set $IPlatfor = $kgm_strcmp_arg -| strcmp_arg_pack64 'm' 'E' 'x' 'p' 'e' 'r' 't' 0 -| set $mExpert = $kgm_strcmp_arg -| setfindregistrystr $AppleACP $IPlatfor $mExpert -| findregistryentry -end - -document findregistryentries -Syntax: (gdb) findregistryentries -| Search for all registry entries that match the encoded string specified through -| setfindregistrystr. You can alter the search depth through use of -| $kgm_reg_depth_max. See findregistryentry for an example of how to encode a string. -end - - -define showregistryentryrecurse - _registryentryrecurseinit $arg0 $arg1 $arg2 $arg3 - - indent $kgm_reg_depth $kgm$arg0_stack - printf "+-o " - - dictget $kgm_re->fRegistryTable $kgm_namekey - if ($kgm_result == 0) - dictget $kgm_re->fRegistryTable gIONameKey - end - if ($kgm_result == 0) - dictget $kgm_re->fPropertyTable gIOClassKey - end - - if ($kgm_result != 0) - printf "%s", ((OSString *)$kgm_result)->string - else - if (((IOService*)$kgm_re)->pwrMgt && ((IOService*)$kgm_re)->pwrMgt->Name) - printf "%s", ((IOService*)$kgm_re)->pwrMgt->Name - else -# printf ", guessclass " -# guessclass $kgm_re - printf "??" - end - end - - - printf " IORegistryEntry::reserved->fRegistryEntryID - printf "vtable " - set $kgm_vt = (unsigned long) *(void**) $kgm_re - if ($kgm_lp64 || $kgm_mtype == $kgm_mtype_arm) - set $kgm_vt = $kgm_vt - 2 * sizeof(void *) - end - output /a $kgm_vt - - if ($kgm_vt != &_ZTV15IORegistryEntry) - printf ", " - set $kgm_state = $kgm_re->__state[0] - # kIOServiceRegisteredState - if (0 == ($kgm_state & 2)) - printf "!" - end - printf "registered, " - # kIOServiceMatchedState - if (0 == ($kgm_state & 4)) - printf "!" - end - printf "matched, " - # kIOServiceInactiveState - if ($kgm_state & 1) - printf "in" - end - printf "active, busy %d, retain count %d", (0xff & $kgm_re->__state[1]), (0xffff & $kgm_re->retainCount) - end - printf ">\n" - - if ($kgm_show_props) - set $kgm_props = $kgm_re->fPropertyTable - showregdictionary $kgm_props $kgm$arg0_stack - end - - # recurse - if ($kgm$arg0_child_count != 0) - - set $kgm_reg_depth = $kgm_reg_depth + 1 - set $kgm$arg0_child_idx = 0 - - while ($kgm$arg0_child_idx < $kgm$arg0_child_count) - set $kgm_re = $kgm$arg0_child_array->array[$kgm$arg0_child_idx++] - set $kgm_more_sib = ($kgm$arg0_child_idx < $kgm$arg0_child_count) - if $kgm_reg_depth >= $kgm_reg_depth_max + 1 - loop_break - end - showregistryentryrecurse _$arg0 $kgm_re $kgm$arg0_stack $kgm_more_sib - end - - set $kgm_reg_depth = $kgm_reg_depth - 1 - end -end - -define showregistryentryint - if !$kgm_reg_plane - set $kgm_reg_plane = (IORegistryPlane *) gIOServicePlane - end - - if !$kgm_reg_plane - printf "Please load kgmacros after KDP attaching to the target.\n" - else - set $kgm_namekey = (OSSymbol *) $kgm_reg_plane->nameKey - set $kgm_childkey = (OSSymbol *) $kgm_reg_plane->keys[1] - showregistryentryrecurse _ $arg0 0 0 - end -end - -define showregistry - set $kgm_reg_depth = 0 - set $kgm_show_props = 0 - showregistryentryint gRegistryRoot -end -document showregistry -Syntax: (gdb) showregistry -| Show info about all registry entries in the current plane. You can specify the maximum -| display depth with $kgm_reg_depth_max. -end - -define showregistryprops - set $kgm_reg_depth = 0 - set $kgm_show_props = 1 - showregistryentryint gRegistryRoot -end -document showregistryprops -Syntax: (gdb) showregistryprops -| Show info about all registry entries in the current plane, and their properties. -| set $kgm_show_object_addrs = 1 and/or set $kgm_show_object_retain = 1 will display -| more verbose information -end - -define showregistryentry - set $kgm_reg_depth = 0 - set $kgm_show_props = 1 - showregistryentryint $arg0 -end -document showregistryentry -Syntax: (gdb) showregistryentry -| Show info about a registry entry; its properties and descendants in the current plane. -end - -define setregistryplane - if ($arg0 != 0) - set $kgm_reg_plane = (IORegistryPlane *) $arg0 - else - showobjectint _ gIORegistryPlanes - printf "\n" - end -end -document setregistryplane -Syntax: (gdb) setregistryplane -| Set the plane to be used for the iokit registry macros. An argument of zero will -| display known planes. -end - -define guessclass - set $kgm_classidx = 0 - set $kgm_lookvt = *((void **) $arg0) - set $kgm_bestvt = (void *) 0 - set $kgm_bestidx = 0 - - while $kgm_classidx < sAllClassesDict->count - set $kgm_meta = (OSMetaClass *) sAllClassesDict->dictionary[$kgm_classidx].value - - set $kgm_vt = *((void **) $kgm_meta) - - if (($kgm_vt > $kgm_bestvt) && ($kgm_vt < $kgm_lookvt)) - set $kgm_bestvt = $kgm_vt - set $kgm_bestidx = $kgm_classidx - end - set $kgm_classidx = $kgm_classidx + 1 - end - printf "%s", sAllClassesDict->dictionary[$kgm_bestidx].key->string -end - -define showallclasses - set $kgm_classidx = 0 - while $kgm_classidx < sAllClassesDict->count - set $kgm_meta = (OSMetaClass *) sAllClassesDict->dictionary[$kgm_classidx++].value - showmetaclass $kgm_meta - end -end - -document showallclasses -Syntax: (gdb) showallclasses -| Show the instance counts and ivar size of all OSObject subclasses. See ioclasscount man page for details. -end - -define showioalloc - printf " Instance allocation = 0x%08lx = %4ld K\n", (int) debug_ivars_size, ((int) debug_ivars_size) / 1024 - printf "Container allocation = 0x%08lx = %4ld K\n", (int) debug_container_malloc_size, ((int) debug_container_malloc_size) / 1024 - printf " IOMalloc allocation = 0x%08lx = %4ld K\n", (int) debug_iomalloc_size, ((int) debug_iomalloc_size) / 1024 - printf " Pageable allocation = 0x%08lx = %4ld K\n", (vm_size_t) debug_iomallocpageable_size, ((vm_size_t) debug_iomallocpageable_size) / 1024 -end - -document showioalloc -Syntax: (gdb) showioalloc -| Show some accounting of memory allocated by IOKit allocators. See ioalloccount man page for details. -end - -define showosobjecttracking - set $kgm_next = (OSObjectTracking *) gOSObjectTrackList.next - while $kgm_next != &gOSObjectTrackList - set $obj = (OSObject *) ($kgm_next+1) - showobject $obj - set $kgm_idx = 0 - while $kgm_idx < (sizeof($kgm_next->bt) / sizeof($kgm_next->bt[0])) - if ((unsigned long) $kgm_next->bt[$kgm_idx] > (unsigned long) &last_kernel_symbol) - showkmodaddr $kgm_next->bt[$kgm_idx] - printf "\n" - else - if ((unsigned long) $kgm_next->bt[$kgm_idx] > 0) - output /a $kgm_next->bt[$kgm_idx] - printf "\n" - end - end - set $kgm_idx = $kgm_idx + 1 - end - printf "\n" - set $kgm_next = (OSObjectTracking *) $kgm_next->link.next - end -end - -document showosobjecttracking -Syntax: (gdb) showosobjecttracking -| Show the list of tracked OSObject allocations with backtraces. -| Boot with the kOSTraceObjectAlloc (0x00400000) io debug flag set. -| Set gOSObjectTrackThread to 1 or a thread_t to capture new OSObjects allocated by a thread or all threads. -end - -# $kgm_readphys_force_kdp and $kgm_readphys_force_physmap -# can respectively cause physical memory access to use -# a KDP manual packet or the physical memory mapping -# even if the default behavior would be otherwise. -define readphysint - set $kgm_readphysint_result = 0xBAD10AD - - if ($kgm_readphys_force_kdp != 0) - set $kgm_readphys_use_kdp = 1 - else - if ($kgm_readphys_force_physmap) - set $kgm_readphys_use_kdp = 0 - else - set $kgm_readphys_use_kdp = ( kdp->is_conn > 0 ) - end - end - - if ($kgm_readphys_use_kdp) - - # set up the manual KDP packet - set manual_pkt.input = 0 - set manual_pkt.len = sizeof(kdp_readphysmem64_req_t) - set $kgm_pkt = (kdp_readphysmem64_req_t *)&manual_pkt.data - set $kgm_pkt->hdr.request = KDP_READPHYSMEM64 - set $kgm_pkt->hdr.len = sizeof(kdp_readphysmem64_req_t) - set $kgm_pkt->hdr.is_reply = 0 - set $kgm_pkt->hdr.seq = 0 - set $kgm_pkt->hdr.key = 0 - set $kgm_pkt->address = (uint64_t)$arg0 - set $kgm_pkt->nbytes = $arg1 >> 3 - set $kgm_pkt->lcpu = $arg2 - set manual_pkt.input = 1 - # dummy to make sure manual packet is executed - set $kgm_dummy = &_mh_execute_header - set $kgm_pkt = (kdp_readphysmem64_reply_t *)&manual_pkt.data - if ($kgm_pkt->error == 0) - if $arg1 == 8 - set $kgm_readphysint_result = *((uint8_t *)$kgm_pkt->data) - end - if $arg1 == 16 - set $kgm_readphysint_result = *((uint16_t *)$kgm_pkt->data) - end - if $arg1 == 32 - set $kgm_readphysint_result = *((uint32_t *)$kgm_pkt->data) - end - if $arg1 == 64 - set $kgm_readphysint_result = *((uint64_t *)$kgm_pkt->data) - end - end - - else - # No KDP. Attempt to use physical memory mapping - - if ($kgm_mtype == $kgm_mtype_x86_64) - set $kgm_readphys_paddr_in_kva = (unsigned long long)$arg0 + physmap_base - else - if ($kgm_mtype == $kgm_mtype_arm) - set $kgm_readphys_paddr_in_kva = (unsigned long long)$arg0 - gPhysBase + gVirtBase - else - printf "readphys not available for current architecture.\n" - set $kgm_readphys_paddr_in_kva = 0 - end - end - if $kgm_readphys_paddr_in_kva - if $arg1 == 8 - set $kgm_readphysint_result = *((uint8_t *)$kgm_readphys_paddr_in_kva) - end - if $arg1 == 16 - set $kgm_readphysint_result = *((uint16_t *)$kgm_readphys_paddr_in_kva) - end - if $arg1 == 32 - set $kgm_readphysint_result = *((uint32_t *)$kgm_readphys_paddr_in_kva) - end - if $arg1 == 64 - set $kgm_readphysint_result = *((uint64_t *)$kgm_readphys_paddr_in_kva) - end - end - end -end - -define readphys8 - readphysint $arg0 8 $kgm_lcpu_self - output /a $arg0 - printf ":\t0x%02hhx\n", $kgm_readphysint_result - set $kgm_readphys_result = (uint64_t)$kgm_readphysint_result -end - -define readphys16 - readphysint $arg0 16 $kgm_lcpu_self - output /a $arg0 - printf ":\t0x%04hx\n", $kgm_readphysint_result - set $kgm_readphys_result = (uint64_t)$kgm_readphysint_result -end - -define readphys32 - readphysint $arg0 32 $kgm_lcpu_self - output /a $arg0 - printf ":\t0x%08x\n", $kgm_readphysint_result - set $kgm_readphys_result = (uint64_t)$kgm_readphysint_result -end - -define readphys64 - readphysint $arg0 64 $kgm_lcpu_self - output /a $arg0 - printf ":\t0x%016llx\n", $kgm_readphysint_result - set $kgm_readphys_result = (uint64_t)$kgm_readphysint_result -end - -define readphys - readphys32 $arg0 -end - -document readphys8 -| See readphys64 -end - -document readphys16 -| See readphys64 -end - -document readphys32 -| See readphys64 -end - -document readphys64 -| The argument is interpreted as a physical address, and the 64-bit word -| addressed is displayed. Saves 64-bit result in $kgm_readphys_result. -end - -define writephysint - # set up the manual KDP packet - set manual_pkt.input = 0 - set manual_pkt.len = sizeof(kdp_writephysmem64_req_t) - set $kgm_pkt = (kdp_writephysmem64_req_t *)&manual_pkt.data - set $kgm_pkt->hdr.request = KDP_WRITEPHYSMEM64 - set $kgm_pkt->hdr.len = sizeof(kdp_writephysmem64_req_t) - set $kgm_pkt->hdr.is_reply = 0 - set $kgm_pkt->hdr.seq = 0 - set $kgm_pkt->hdr.key = 0 - set $kgm_pkt->address = (uint64_t)$arg0 - set $kgm_pkt->nbytes = $arg1 >> 3 - set $kgm_pkt->lcpu = $arg3 - if $arg1 == 8 - set *(uint8_t *)$kgm_pkt->data = (uint8_t)$arg2 - end - if $arg1 == 16 - set *(uint16_t *)$kgm_pkt->data = (uint16_t)$arg2 - end - if $arg1 == 32 - set *(uint32_t *)$kgm_pkt->data = (uint32_t)$arg2 - end - if $arg1 == 64 - set *(uint64_t *)$kgm_pkt->data = (uint64_t)$arg2 - end - set manual_pkt.input = 1 - # dummy to make sure manual packet is executed - set $kgm_dummy = &_mh_execute_header - set $kgm_pkt = (kdp_writephysmem64_reply_t *)&manual_pkt.data - set $kgm_writephysint_result = $kgm_pkt->error -end - -define writephys8 - writephysint $arg0 8 $arg1 $kgm_lcpu_self -end - -define writephys16 - writephysint $arg0 16 $arg1 $kgm_lcpu_self -end - -define writephys32 - writephysint $arg0 32 $arg1 $kgm_lcpu_self -end - -define writephys64 - writephysint $arg0 64 $arg1 $kgm_lcpu_self -end - -document writephys8 -| See writephys64 -end - -document writephys16 -| See writephys64 -end - -document writephys32 -| See writephys64 -end - -document writephys64 -| The argument is interpreted as a physical address, and the second argument is -| written to that address as a 64-bit word. -end - -define addkextsyms - if ($argc <= 1) - if ($argc == 0) - printf "Adding kext symbols from in-kernel summary data.\n" - add-all-kexts - else - printf "Adding kext symbols from $arg0.\n" - shell echo cd `pwd` > /tmp/gdb-cd - cd $arg0 - source kcbmacros - source /tmp/gdb-cd - end - set $kgm_show_kmod_syms = 1 - else - printf "| Usage:\n|\n" - help addkextsyms - end -end - -document addkextsyms -| If specified without an argument, uses gdb's add-all-kexts command to load -| kext symbols. Otherwise, takes a directory of kext symbols generated with -| kextcache -y or kcgen and loads them into gdb. -| (gdb) addkextsyms -| - or - -| (gdb) addkextsyms /path/to/symboldir -end - -define showprocfiles - if ($argc == 1) - _showprocheader - _showprocfiles $arg0 - else - printf "| Usage:\n|\n" - help showprocfiles - end -end -document showprocfiles -Syntax: (gdb) showprocfiles -| Given a proc_t pointer, display the list of open file descriptors for the -| referenced process. -end - -define _showprocheader - printf "fd fileglob " - showptrhdrpad - printf " fg flags fg type fg data " - showptrhdrpad - printf " info\n" - printf "----- ----------" - if $kgm_lp64 - printf "--------" - end - printf " ---------- -------- ----------" - if $kgm_lp64 - printf "--------" - end - printf " -------------------\n" -end - -define _showprocfiles - set $kgm_spf_filedesc = ((proc_t)$arg0)->p_fd - set $kgm_spf_last = $kgm_spf_filedesc->fd_lastfile - set $kgm_spf_ofiles = $kgm_spf_filedesc->fd_ofiles - set $kgm_spf_count = 0 - while ($kgm_spf_count <= $kgm_spf_last) - if ($kgm_spf_ofiles[$kgm_spf_count] == 0) - # DEBUG: For files that were open, but are now closed - # printf "%-5d FILEPROC_NULL\n", $kgm_spf_count - else - # display fd #, fileglob address, fileglob flags - set $kgm_spf_flags = $kgm_spf_ofiles[$kgm_spf_count].f_flags - set $kgm_spf_fg = $kgm_spf_ofiles[$kgm_spf_count].f_fglob - printf "%-5d ", $kgm_spf_count - showptr $kgm_spf_fg - printf " 0x%08x ", $kgm_spf_flags - # decode fileglob type - set $kgm_spf_fgt = $kgm_spf_fg->fg_type - if ($kgm_spf_fgt == 1) - printf "VNODE " - end - if ($kgm_spf_fgt == 2) - printf "SOCKET " - end - if ($kgm_spf_fgt == 3) - printf "PSXSHM " - end - if ($kgm_spf_fgt == 4) - printf "PSXSEM " - end - if ($kgm_spf_fgt == 5) - printf "KQUEUE " - end - if ($kgm_spf_fgt == 6) - printf "PIPE " - end - if ($kgm_spf_fgt == 7) - printf "FSEVENTS" - end - if ($kgm_spf_fgt < 1 || $kgm_spf_fgt > 7) - printf "?: %-5d", $kgm_spf_fgt - end - - # display fileglob data address and decode interesting fact(s) - # about data, if we know any - set $kgm_spf_fgd = $kgm_spf_fg->fg_data - printf " " - showptr $kgm_spf_fgd - printf " " - if ($kgm_spf_fgt == 1) - set $kgm_spf_name = ((struct vnode *)$kgm_spf_fgd)->v_name - if ($kgm_spf_name == 0) - printf "(null)" - else - printf "%s", $kgm_spf_name - end - end - printf "\n" - end - set $kgm_spf_count = $kgm_spf_count + 1 - end -end - -# -# Show all the advisory file locks held by a process for each of the vnode -# type files that it has open; do this by walking the per process open file -# table and looking at any vnode type fileglob that has a non-NULL lock list -# associated with it. -# -define showproclocks - if ($argc == 1) - _showproclocks $arg0 - else - printf "| Usage:\n|\n" - help showproclocks - end -end -document showproclocks -Syntax: (gdb) showproclocks -| Given a proc_t pointer, display the list of advisory file locks held by the -| referenced process. -end - -define _showproclocks - set $kgm_spl_filedesc = ((proc_t)$arg0)->p_fd - set $kgm_spl_last = $kgm_spl_filedesc->fd_lastfile - set $kgm_spl_ofiles = $kgm_spl_filedesc->fd_ofiles - set $kgm_spl_count = 0 - set $kgm_spl_seen = 0 - while ($kgm_spl_count <= $kgm_spl_last) - if ($kgm_spl_ofiles[$kgm_spl_count] == 0) - # DEBUG: For files that were open, but are now closed - # printf "%-5d FILEPROC_NULL\n", $kgm_spl_count - else - set $kgm_spl_fg = $kgm_spl_ofiles[$kgm_spl_count].f_fglob - # decode fileglob type - set $kgm_spl_fgt = $kgm_spl_fg->fg_type - if ($kgm_spl_fgt == 1) - set $kgm_spl_fgd = $kgm_spl_fg->fg_data - set $kgm_spl_name = ((struct vnode *)$kgm_spl_fgd)->v_name - set $kgm_spl_vnode = ((vnode_t)$kgm_spl_fgd) - set $kgm_spl_lockiter = $kgm_spl_vnode->v_lockf - if ($kgm_spl_lockiter != 0) - if ($kgm_spl_seen == 0) - _showvnodelockheader - end - set $kgm_spl_seen = $kgm_spl_seen + 1 - printf "( fd %d, name ", $kgm_spl_count - if ($kgm_spl_name == 0) - printf "(null) )" - else - printf "%s )\n", $kgm_spl_name - end - _showvnodelocks $kgm_spl_fgd - end - end - end - set $kgm_spl_count = $kgm_spf_count + 1 - end - printf "%d total locks for ", $kgm_spl_seen - showptr $arg0 - printf "\n" -end - -define showprocinfo - set $kgm_spi_proc = (proc_t)$arg0 - printf "Process " - showptr $kgm_spi_proc - printf "\n" - printf " name %s\n", $kgm_spi_proc->p_comm - printf " pid:%.8d", $kgm_spi_proc->p_pid - printf " task:" - showptr $kgm_spi_proc->task - printf " p_stat:%.1d", $kgm_spi_proc->p_stat - printf " parent pid:%.8d", $kgm_spi_proc->p_ppid - printf "\n" - # decode part of credential - set $kgm_spi_cred = $kgm_spi_proc->p_ucred - if ($kgm_spi_cred != 0) - printf "Cred: euid %d ruid %d svuid %d\n", $kgm_spi_cred->cr_posix.cr_uid, $kgm_spi_cred->cr_posix.cr_ruid, $kgm_spi_cred->cr_posix.cr_svuid - else - printf "Cred: (null)\n" - end - # decode flags - set $kgm_spi_flag = $kgm_spi_proc->p_flag - printf "Flags: 0x%08x\n", $kgm_spi_flag - if ($kgm_spi_flag & 0x00000001) - printf " 0x00000001 - may hold advisory locks\n" - end - if ($kgm_spi_flag & 0x00000002) - printf " 0x00000002 - has a controlling tty\n" - end - if ($kgm_spi_flag & 0x00000004) - printf " 0x00000004 - process is 64 bit\n" - else - printf " !0x00000004 - process is 32 bit\n" - end - if ($kgm_spi_flag & 0x00000008) - printf " 0x00000008 - no SIGCHLD on child stop\n" - end - if ($kgm_spi_flag & 0x00000010) - printf " 0x00000010 - waiting for child exec/exit\n" - end - if ($kgm_spi_flag & 0x00000020) - printf " 0x00000020 - has started profiling\n" - end - if ($kgm_spi_flag & 0x00000040) - printf " 0x00000040 - in select; wakeup/waiting danger\n" - end - if ($kgm_spi_flag & 0x00000080) - printf " 0x00000080 - was stopped and continued\n" - end - if ($kgm_spi_flag & 0x00000100) - printf " 0x00000100 - has set privileges since exec\n" - end - if ($kgm_spi_flag & 0x00000200) - printf " 0x00000200 - system process: no signals, stats, or swap\n" - end - if ($kgm_spi_flag & 0x00000400) - printf " 0x00000400 - timing out during a sleep\n" - end - if ($kgm_spi_flag & 0x00000800) - printf " 0x00000800 - debugged process being traced\n" - end - if ($kgm_spi_flag & 0x00001000) - printf " 0x00001000 - debugging process has waited for child\n" - end - if ($kgm_spi_flag & 0x00002000) - printf " 0x00002000 - exit in progress\n" - end - if ($kgm_spi_flag & 0x00004000) - printf " 0x00004000 - process has called exec\n" - end - if ($kgm_spi_flag & 0x00008000) - printf " 0x00008000 - owe process an addupc() XXX\n" - end - if ($kgm_spi_flag & 0x00010000) - printf " 0x00010000 - affinity for Rosetta children\n" - end - if ($kgm_spi_flag & 0x00020000) - printf " 0x00020000 - wants to run Rosetta\n" - end - if ($kgm_spi_flag & 0x00040000) - printf " 0x00040000 - has wait() in progress\n" - end - if ($kgm_spi_flag & 0x00080000) - printf " 0x00080000 - kdebug tracing on for this process\n" - end - if ($kgm_spi_flag & 0x00100000) - printf " 0x00100000 - blocked due to SIGTTOU or SIGTTIN\n" - end - if ($kgm_spi_flag & 0x00200000) - printf " 0x00200000 - has called reboot()\n" - end - if ($kgm_spi_flag & 0x00400000) - printf " 0x00400000 - is TBE state\n" - end - if ($kgm_spi_flag & 0x00800000) - printf " 0x00800000 - signal exceptions\n" - end - if ($kgm_spi_flag & 0x01000000) - printf " 0x01000000 - has thread cwd\n" - end - if ($kgm_spi_flag & 0x02000000) - printf " 0x02000000 - has vfork() children\n" - end - if ($kgm_spi_flag & 0x04000000) - printf " 0x04000000 - not allowed to attach\n" - end - if ($kgm_spi_flag & 0x08000000) - printf " 0x08000000 - vfork() in progress\n" - end - if ($kgm_spi_flag & 0x10000000) - printf " 0x10000000 - no shared libraries\n" - end - if ($kgm_spi_flag & 0x20000000) - printf " 0x20000000 - force quota for root\n" - end - if ($kgm_spi_flag & 0x40000000) - printf " 0x40000000 - no zombies when children exit\n" - end - if ($kgm_spi_flag & 0x80000000) - printf " 0x80000000 - don't hang on remote FS ops\n" - end - # decode state - set $kgm_spi_state = $kgm_spi_proc->p_stat - printf "State: " - if ($kgm_spi_state == 1) - printf "Idle\n" - end - if ($kgm_spi_state == 2) - printf "Run\n" - end - if ($kgm_spi_state == 3) - printf "Sleep\n" - end - if ($kgm_spi_state == 4) - printf "Stop\n" - end - if ($kgm_spi_state == 5) - printf "Zombie\n" - end - if ($kgm_spi_state == 6) - printf "Reaping\n" - end - if ($kgm_spi_state < 1 || $kgm_spi_state > 6) - printf "(Unknown)\n" - end -end - -document showprocinfo -Syntax: (gdb) showprocinfo -| Displays name, pid, parent and task for a proc_t. Decodes cred, flag and p_stat fields. -end - -# -# dump the zombprocs -# -define zombproc - set $basep = (struct proc *)zombproc->lh_first - set $pp = $basep - while $pp - showprocinfo $pp - set $pp = $pp->p_list.le_next - end -end - -document zombproc -Syntax: (gdb) zombproc -| Routine to print out all procs in the zombie list -end - -# -# dump the zombstacks -# -define zombstacks - set $basep = (struct proc *)zombproc->lh_first - set $pp = $basep - while $pp - if $pp->p_stat != 5 - showtaskstacks $pp->task - end - set $pp = $pp->p_list.le_next - end -end - -document zombstacks -Syntax: (gdb) zombstacks -| Routine to print out all stacks of tasks that are exiting -end - - -# -# dump the allprocs -# -define allproc - set $basep = (struct proc *)allproc->lh_first - set $pp = $basep - while $pp - showprocinfo $pp - set $pp = $pp->p_list.le_next - end -end - -document allproc -Syntax: (gdb) allproc -| Routine to print out all process in the system -| which are not in the zombie list -end -define showprocsiblingint - set $kgm_sibling_ptr = (struct proc *)$arg0 - set $kgm_lx = $arg1 - while $kgm_lx - printf "| " - set $kgm_lx = $kgm_lx-3 - end - printf "|--%d %s [ 0x%llx ]\n", $kgm_sibling_ptr->p_pid, $kgm_sibling_ptr->p_comm, $kgm_sibling_ptr -end -define showproctreeint -#Initialize all the set variables used in this macro - set $kgm_basep1 = 0 - set $kgm_sibling_ptr = 0 - set $kgm_lx = 0 - set $kgm_tmp_base = 0 - set $kgm_head_ptr = 0 - set $kgm_search_pid = 0 - set $kgm_rev = 0 - set $kgm_x = 0 - - set $kgm_basep1 = (struct proc *)allproc->lh_first - if ($arg0 == 0) - set $kgm_head_ptr = (struct proc *)initproc - end - if ($arg0 > 0) - set $kgm_tmp_base = (struct proc *)allproc->lh_first - set $kgm_search_pid = $arg0 - while $kgm_tmp_base - if ( $kgm_tmp_base->p_pid == $kgm_search_pid) - if ($kgm_tmp_base->p_childrencnt > 0) - set $kgm_head_ptr = $kgm_tmp_base->p_children.lh_first - else - set $kgm_head_ptr = 0 - printf "No children present for PID=%d", $kgm_search_pid - end - loop_break - end - set $kgm_tmp_base = $kgm_tmp_base->p_list.le_next - end - end - set $kgm_rev = 0 - set $kgm_x = 0 - if ($kgm_head_ptr) - printf "PID PROCESS POINTER]\n" - printf "=== ======= =======\n" - printf "%d %s [ 0x%llx ]\n", $kgm_head_ptr->p_ppid, $kgm_head_ptr->p_pptr->p_comm, $kgm_head_ptr - printf "|--%d %s [ 0x%llx ]\n", $kgm_head_ptr->p_pid, $kgm_head_ptr->p_comm, $kgm_head_ptr - end - while ($kgm_head_ptr) - #Is childrencnt = 0? YES {=> no children} - if ($kgm_head_ptr->p_childrencnt == 0) - # Does it have sibling? - if($kgm_head_ptr->p_sibling.le_next == 0) - #No, it does not have sibling, so go back to its parent which will go to its sibling - if($kgm_head_ptr == $kgm_head_ptr->p_pptr) - loop_break - end - set $kgm_head_ptr = $kgm_head_ptr->p_pptr - if ($kgm_head_ptr == $kgm_tmp_base) - loop_break - end - if ($kgm_x > 3) - set $kgm_x = $kgm_x - 3 - end - set $kgm_rev = 1 - end - if($kgm_head_ptr->p_sibling.le_next != 0) - # Yes, it has sibling. So print sibling - set $kgm_rev = 0 - showprocsiblingint $kgm_head_ptr->p_sibling.le_next $kgm_x - set $kgm_head_ptr = $kgm_head_ptr->p_sibling.le_next - end - # childrencnt != 0 {=> it has children} - else - if ($kgm_rev == 1) - if($kgm_head_ptr->p_sibling.le_next == 0) - #No, it does not have sibling, so go back to its parent which will go to its sibling - if($kgm_head_ptr == $kgm_head_ptr->p_pptr) - loop_break - end - set $kgm_head_ptr = $kgm_head_ptr->p_pptr - if ($kgm_head_ptr == $kgm_tmp_base) - loop_break - end - - if ($kgm_x > 3) - set $kgm_x = $kgm_x - 3 - end - set $kgm_rev = 1 - end - if($kgm_head_ptr->p_sibling.le_next != 0) - set $kgm_rev = 0 - # Yes, it has sibling. So print sibling - showprocsiblingint $kgm_head_ptr->p_sibling.le_next $kgm_x - set $kgm_head_ptr = $kgm_head_ptr->p_sibling.le_next - end - else - set $kgm_head_ptr = $kgm_head_ptr->p_children.lh_first - set $kgm_x = $kgm_x + 3 - set $kgm_lx = $kgm_x - while $kgm_lx - printf "| " - set $kgm_lx = $kgm_lx-3 - end - printf "|--%d %s [ 0x%llx ] \n", $kgm_head_ptr->p_pid, $kgm_head_ptr->p_comm, $kgm_head_ptr - end - end - end - printf "\n" -#Unset all the set variables used in this macro - set $kgm_basep1 = 0 - set $kgm_sibling_ptr = 0 - set $kgm_lx = 0 - set $kgm_tmp_base = 0 - set $kgm_head_ptr = 0 - set $kgm_search_pid = 0 - set $kgm_rev = 0 - set $kgm_x = 0 -end -define showproctree - if ($argc > 0) - showproctreeint $arg0 - else - showproctreeint 0 - end -end -document showproctree -Syntax: (gdb) showproctree -| Routine to print the processes in the system in a hierarchical tree form. This routine does not print zombie processes. -| If no argument is given, showproctree will print all the processes in the system. -| If pid is specified, showproctree prints all the descendants of the indicated process -end - - -define print_vnode - set $vp = (struct vnode *)$arg0 - printf " " - printf " vp " - showptr $vp - printf " use %d", $vp->v_usecount - printf " io %d", $vp->v_iocount - printf " kuse %d", $vp->v_kusecount - printf " type %d", $vp->v_type - printf " flg 0x%.8x", $vp->v_flag - printf " lflg 0x%.8x", $vp->v_lflag - printf " par " - showptr $vp->v_parent - set $_name = (char *)$vp->v_name - if ($_name != 0) - printf " %s", $_name - end - if ($vp->v_type == VREG) && ($vp->v_un.vu_ubcinfo != 0) - printf " mapped %d", ($vp->v_un.vu_ubcinfo.ui_flags & 0x08) ? 1 : 0 - end - printf "\n" -end - -document print_vnode -Syntax: (gdb) print_vnode -| Prints out the fields of a vnode struct -end - -define showprocvnodes - set $pp = (struct proc *)$arg0 - set $fdp = (struct filedesc *)$pp->p_fd - set $cvp = $fdp->fd_cdir - set $rvp = $fdp->fd_rdir - if $cvp - printf "Current Working Directory \n" - print_vnode $cvp - printf "\n" - end - if $rvp - printf "Current Root Directory \n" - print_vnode $rvp - printf "\n" - end - set $count = 0 - set $fpp = (struct fileproc **)($fdp->fd_ofiles) - set $fpo = (char)($fdp->fd_ofileflags[0]) - while $count < $fdp->fd_nfiles - #printf"fpp %x ", *$fpp - if *$fpp - set $fg =(struct fileglob *)((**$fpp)->f_fglob) - if $fg && (($fg)->fg_type == 1) - if $fdp->fd_ofileflags[$count] & 4 - printf "U: " - else - printf " " - end - printf "fd = %d ", $count - print_vnode $fg->fg_data - end - end - set $fpp = $fpp + 1 - set $count = $count + 1 - end -end - -document showprocvnodes -Syntax: (gdb) showprocvnodes -| Routine to print out all the open fds -| which are vnodes in a process -end - -define showallprocvnodes - set $basep = (struct proc *)allproc->lh_first - set $pp = $basep - while $pp - printf "============================================ \n" - showprocinfo $pp - showprocvnodes $pp - set $pp = $pp->p_list.le_next - end -end - -document showallprocvnodes -Syntax: (gdb) showallprocvnodes -| Routine to print out all the open fds -| which are vnodes -end - - -# -# dump the childrent of a proc -# -define showinitchild - set $basep = (struct proc *)initproc->p_children.lh_first - set $pp = $basep - while $pp - showprocinfo $pp - set $pp = $pp->p_sibling.le_next - end -end - -document showinitchild -Syntax: (gdb) showinitchild -| Routine to print out all processes in the system -| which are children of init process -end - - -define showmountallvnodes - set $mp = (struct mount *)$arg0 - set $basevp = (struct vnode *)$mp->mnt_vnodelist.tqh_first - set $vp = $basevp - printf "____________________ Vnode list Queue ---------------\n" - while $vp - print_vnode $vp - set $vp = $vp->v_mntvnodes->tqe_next - end - set $basevp = (struct vnode *)$mp->mnt_workerqueue.tqh_first - set $vp = $basevp - printf "____________________ Worker Queue ---------------\n" - while $vp - print_vnode $vp - set $vp = $vp->v_mntvnodes->tqe_next - end - set $basevp = (struct vnode *)$mp->mnt_newvnodes.tqh_first - set $vp = $basevp - printf "____________________ New vnodes Queue ---------------\n" - while $vp - print_vnode $vp - set $vp = $vp->v_mntvnodes->tqe_next - end -end -document showmountallvnodes -Syntax: showmountallvnodes -| Print the vnode inactive list -end - - -define showmountvnodes - set $mp = (struct mount *)$arg0 - set $basevp = (struct vnode *)$mp->mnt_vnodelist.tqh_first - set $vp = $basevp - printf "____________________ Vnode list Queue ---------------\n" - while $vp - print_vnode $vp - set $vp = $vp->v_mntvnodes->tqe_next - end -end -document showmountvnodes -Syntax: showmountvnodes -| Print the vnode list -end - - - -define showworkqvnodes - set $mp = (struct mount *)$arg0 - set $basevp = (struct vnode *)$mp->mnt_workerqueue.tqh_first - set $vp = $basevp - printf "____________________ Worker Queue ---------------\n" - while $vp - print_vnode $vp - set $vp = $vp->v_mntvnodes->tqe_next - end -end -document showworkqvnodes -Syntax: showworkqvnodes -| Print the vnode worker list -end - - -define shownewvnodes - set $mp = (struct mount *)$arg0 - set $basevp = (struct vnode *)$mp->mnt_newvnodes.tqh_first - set $vp = $basevp - printf "____________________ New vnodes Queue ---------------\n" - while $vp - print_vnode $vp - set $vp = $vp->v_mntvnodes->tqe_next - end -end - -document shownewvnodes -Syntax: shownewvnodes -| Print the new vnode list -end - - -# -# print mount point info -define print_mount - set $mp = (struct mount *)$arg0 - printf " " - printf " mp " - showptr $mp - printf " flag %x", $mp->mnt_flag - printf " kern_flag %x", $mp->mnt_kern_flag - printf " lflag %x", $mp->mnt_lflag - printf " type: %s", $mp->mnt_vfsstat.f_fstypename - printf " mnton: %s", $mp->mnt_vfsstat.f_mntonname - printf " mntfrom: %s", $mp->mnt_vfsstat.f_mntfromname - printf "\n" -end - -define showallmounts - set $mp=(struct mount *)mountlist.tqh_first - while $mp - print_mount $mp - set $mp = $mp->mnt_list.tqe_next - end -end - -document showallmounts -Syntax: showallmounts -| Print all mount points -end - -define pcprint - if (((unsigned long) $arg0 < (unsigned long) &_mh_execute_header || \ - (unsigned long) $arg0 >= (unsigned long) &last_kernel_symbol )) - showkmodaddr $arg0 - else - output /a $arg0 - end -end - -define mbuf_walkpkt - set $mp = (struct mbuf *)$arg0 - set $cnt = 1 - set $tot = 0 - while $mp - printf "%4d: %p [len %4d, type %2d, ", $cnt, $mp, \ - $mp->m_hdr.mh_len, $mp->m_hdr.mh_type - if mclaudit != 0 - mbuf_buf2mca $mp - printf ", " - end - set $tot = $tot + $mp->m_hdr.mh_len - printf "total %d]\n", $tot - set $mp = $mp->m_hdr.mh_nextpkt - set $cnt = $cnt + 1 - end -end - -document mbuf_walkpkt -Syntax: (gdb) mbuf_walkpkt -| Given an mbuf address, walk its m_nextpkt pointer -end - -define mbuf_walk - set $mp = (struct mbuf *)$arg0 - set $cnt = 1 - set $tot = 0 - while $mp - printf "%4d: %p [len %4d, type %2d, ", $cnt, $mp, \ - $mp->m_hdr.mh_len, $mp->m_hdr.mh_type - if mclaudit != 0 - mbuf_buf2mca $mp - printf ", " - end - set $tot = $tot + $mp->m_hdr.mh_len - printf "total %d]\n", $tot - set $mp = $mp->m_hdr.mh_next - set $cnt = $cnt + 1 - end -end - -document mbuf_walk -Syntax: (gdb) mbuf_walk -| Given an mbuf address, walk its m_next pointer -end - -define mbuf_buf2slab - set $addr = $arg0 - set $gix = ((char *)$addr - (char *)mbutl) >> 20 - set $ix = ((char *)$addr - (char *)slabstbl[$gix].slg_slab[0].sl_base) >> 12 - set $slab = &slabstbl[$gix].slg_slab[$ix] - if $kgm_lp64 - printf "0x%-16llx", $slab - else - printf "0x%-8x", $slab - end -end - -document mbuf_buf2slab -| Given an mbuf object, find its corresponding slab address. -end - -define mbuf_buf2mca - set $addr = $arg0 - set $ix = ((char *)$addr - (char *)mbutl) >> 12 - set $clbase = ((union mbigcluster *)mbutl) + $ix - set $mclidx = (((char *)$addr - (char *)$clbase) >> 8) - set $mca = mclaudit[$ix].cl_audit[$mclidx] - if $kgm_lp64 - printf "mca: 0x%-16llx", $mca - else - printf "mca: 0x%-8x", $mca - end -end - -document mbuf_buf2mca -Syntax: (gdb) mbuf_buf2mca -| Given an mbuf object, find its buffer audit structure address. -| This requires mbuf buffer auditing to be turned on, by setting -| the appropriate flags to the "mbuf_debug" boot-args parameter. -end - -define mbuf_showmca - set language c - set $mca = (mcache_audit_t *)$arg0 - set $cp = (mcache_t *)$mca->mca_cache - printf "object type:\t\t" - mbuf_mca_ctype $mca 1 - printf "\ncontrolling mcache:\t%p (%s)\n", $mca->mca_cache, $cp->mc_name - if $mca->mca_uflags & $MB_SCVALID - set $ix = ((char *)$mca->mca_addr - (char *)mbutl) >> 12 - set $clbase = ((union mbigcluster *)mbutl) + $ix - set $mclidx = (((char *)$mca->mca_addr - (char *)$clbase) >> 8) - printf "mbuf obj:\t\t%p\n", $mca->mca_addr - printf "mbuf index:\t\t%d (out of 16) in cluster base %p\n", \ - $mclidx + 1, $clbase - if $mca->mca_uptr != 0 - set $peer_mca = (mcache_audit_t *)$mca->mca_uptr - printf "paired cluster obj:\t%p (mca %p)\n", \ - $peer_mca->mca_addr, $peer_mca - end - printf "saved contents:\t\t%p (%d bytes)\n", \ - $mca->mca_contents, $mca->mca_contents_size - else - printf "cluster obj:\t\t%p\n", $mca->mca_addr - if $mca->mca_uptr != 0 - set $peer_mca = (mcache_audit_t *)$mca->mca_uptr - printf "paired mbuf obj:\t%p (mca %p)\n", \ - $peer_mca->mca_addr, $peer_mca - end - end - printf "recent transaction for this buffer (thread %p):\n", \ - $mca->mca_thread - set $cnt = 0 - while $cnt < $mca->mca_depth - set $kgm_pc = $mca->mca_stack[$cnt] - printf "%4d: ", $cnt + 1 - pcprint $kgm_pc - printf "\n" - set $cnt = $cnt + 1 - end - if $mca->mca_pdepth > 0 - printf "previous transaction for this buffer (thread %p):\n", \ - $mca->mca_pthread - end - set $cnt = 0 - while $cnt < $mca->mca_pdepth - set $kgm_pc = $mca->mca_pstack[$cnt] - printf "%4d: ", $cnt + 1 - pcprint $kgm_pc - printf "\n" - set $cnt = $cnt + 1 - end - set language auto -end - -document mbuf_showmca -Syntax: (gdb) mbuf_showmca -| Given an mbuf/cluster buffer audit structure address, print the audit -| records including the stack trace of the last buffer transaction. -end - -define mbuf_topleak - set language c - set $topcnt = 0 - if $arg0 < 5 - set $maxcnt = $arg0 - else - set $maxcnt = 5 - end - while $topcnt < $maxcnt - mbuf_traceleak mleak_top_trace[$topcnt] - set $topcnt = $topcnt + 1 - end - set language auto -end - -document mbuf_topleak -Syntax: (gdb) mbuf_topleak -| Prints information about the top suspected mbuf leakers -| where is a value <= 5 -end - -define mbuf_traceleak - set language c - set $trace = (struct mtrace *) $arg0 - if $trace->allocs != 0 - printf "%p:%d outstanding allocs\n", $trace, $trace->allocs - printf "backtrace saved %d deep:\n", $trace->depth - if $trace->depth != 0 - set $cnt = 0 - while $cnt < $trace->depth - printf "%4d: ", $cnt + 1 - pcprint $trace->addr[$cnt] - printf "\n" - set $cnt = $cnt + 1 - end - end - end - set language auto -end - -document mbuf_traceleak -Syntax: (gdb) mbuf_traceleak -| Given an mbuf leak trace (mtrace) structure address, print out the -| stored information with that trace -end - -set $MCF_NOCPUCACHE = 0x10 - -define mcache_stat - set $head = (mcache_t *)mcache_head - set $mc = $head - - if $kgm_lp64 - printf "cache cache cache buf buf backing (# of retries) bufs\n" - printf "name state addr size align zone wait nowait failed incache\n" - printf "------------------------- -------- ------------------ ------ ----- ------------------ -------------------------- --------\n" - else - printf "cache cache cache buf buf backing (# of retries) bufs\n" - printf "name state addr size align zone wait nowait failed incache\n" - printf "------------------------- -------- ---------- ------ ----- ---------- -------------------------- --------\n" - end - while $mc != 0 - set $bktsize = $mc->mc_cpu.cc_bktsize - printf "%-25s ", $mc->mc_name - if ($mc->mc_flags & $MCF_NOCPUCACHE) - printf "disabled" - else - if $mc->mc_purge_cnt > 0 - printf " purging" - else - if $bktsize == 0 - printf " offline" - else - printf " online" - end - end - end - printf " %p %6d %5d ",$mc, \ - $mc->mc_bufsize, $mc->mc_align - if $mc->mc_slab_zone != 0 - printf "%p", $mc->mc_slab_zone - else - if $kgm_lp64 - printf " custom" - else - printf " custom" - end - end - set $tot = 0 - set $tot += $mc->mc_full.bl_total * $bktsize - set $ccp = (mcache_cpu_t *)$mc->mc_cpu - set $n = 0 - while $n < ncpu - if $ccp->cc_objs > 0 - set $tot += $ccp->cc_objs - end - if $ccp->cc_pobjs > 0 - set $tot += $ccp->cc_pobjs - end - set $n += 1 - set $ccp += 1 - end - printf " %8d %8d %8d %8d", $mc->mc_wretry_cnt, \ - $mc->mc_nwretry_cnt, $mc->mc_nwfail_cnt, $tot - printf "\n" - set $mc = (mcache_t *)$mc->mc_list.le_next - end -end - -document mcache_stat -Syntax: (gdb) mcache_stat -| Print all mcaches in the system. -end - -define mcache_showzone - set $mc = (mcache_t *)$arg0 - if $mc->mc_slab_zone != 0 - printf "%p", $mc->mc_slab_zone - else - printf " custom" -end - -document mcache_showzone -Syntax: (gdb) mcache_showzone -| Print the type of backend (custom or zone) of a mcache. -end - -define mcache_walkobj - set $p = (mcache_obj_t *)$arg0 - set $cnt = 1 - set $tot = 0 - while $p - printf "%4d: %p\n", $cnt, $p, - set $p = $p->obj_next - set $cnt = $cnt + 1 - end -end - -document mcache_walkobj -Syntax: (gdb) mcache_walkobj -| Given a mcache object address, walk its obj_next pointer -end - -define mcache_showcache - set $cp = (mcache_t *)$arg0 - set $ccp = (mcache_cpu_t *)$cp->mc_cpu - set $bktsize = $cp->mc_cpu.cc_bktsize - set $cnt = 0 - set $tot = 0 - printf "Showing cache '%s':\n\n", $cp->mc_name - printf " CPU cc_objs cc_pobjs total\n" - printf "---- -------- -------- --------\n" - while $cnt < ncpu - set $objs = $ccp->cc_objs - if $objs <= 0 - set $objs = 0 - end - set $pobjs = $ccp->cc_pobjs - if $pobjs <= 0 - set $pobjs = 0 - end - set $tot_cpu = $objs + $pobjs - set $tot += $tot_cpu - printf "%4d %8d %8d %8d\n", $cnt, $objs, $pobjs, $tot_cpu - set $ccp += 1 - set $cnt += 1 - end - printf " ========\n" - printf " %8d\n", $tot - printf "\n" - set $tot += $cp->mc_full.bl_total * $bktsize - printf "Total # of full buckets (%d objs/bkt):\t%-8d\n", \ - $bktsize, $cp->mc_full.bl_total - printf "Total # of objects cached:\t\t%-8d\n", $tot -end - -document mcache_showcache -| Display the number of objects in the cache -end - -set $NSLABSPMB = sizeof(mcl_slabg_t)/sizeof(mcl_slab_t) - -define mbuf_slabstbl - set $x = 0 - - if $kgm_lp64 - printf "slot slabg slabs range\n" - printf "---- ------------------ -------------------------------------------\n" - else - printf "slot slabg slabs range\n" - printf "---- ---------- ---------------------------\n" - end - while $x < maxslabgrp - set $slg = slabstbl[$x] - printf "%3d: ", $x - if $slg == 0 - printf "-\n" - else - if $kgm_lp64 - printf "0x%-16llx [ 0x%-16llx - 0x%-16llx ]\n", $slg, &$slg->slg_slab[0], \ - &$slg->slg_slab[$NSLABSPMB-1] - else - printf "0x%-8x [ 0x%-8x - 0x%-8x ]\n", $slg, &$slg->slg_slab[0], \ - &$slg->slg_slab[$NSLABSPMB-1] - end - end - set $x += 1 - end -end - -document mbuf_slabstbl -| Display the mbuf slabs table -end - -set $SLF_MAPPED=0x0001 -set $SLF_PARTIAL=0x0002 -set $SLF_DETACHED=0x0004 - -define mbuf_slabs - set $slg = (mcl_slabg_t *)$arg0 - set $x = 0 - - if $kgm_lp64 - printf "slot slab next obj mca C R N size flags\n" - printf "---- ------------------ ------------------ ------------------ ------------------ -- -- -- ------ -----\n" - else - printf "slot slab next obj mca C R N size flags\n" - printf "---- ---------- ---------- ---------- ---------- -- -- -- ------ -----\n" - end - while $x < $NSLABSPMB - set $sl = &$slg->slg_slab[$x] - set $mca = 0 - set $obj = $sl->sl_base - - if mclaudit != 0 - set $ix = ((char *)$obj - (char *)mbutl) >> 12 - set $clbase = ((union mbigcluster *)mbutl) + $ix - set $mclidx = (((char *)$obj - (char *)$clbase) >> 8) - set $mca = mclaudit[$ix].cl_audit[$mclidx] - end - - if $kgm_lp64 - printf "%3d: 0x%-16llx 0x%-16llx 0x%-16llx 0x%-16llx %2d %2d %2d %6d 0x%04x ", \ - $x + 1, $sl, $sl->sl_next, $obj, $mca, $sl->sl_class, \ - $sl->sl_refcnt, $sl->sl_chunks, $sl->sl_len, \ - $sl->sl_flags - else - printf "%3d: 0x%-8x 0x%-8x 0x%-8x 0x%-8x %2d %2d %2d %6d 0x%04x ", \ - $x + 1, $sl, $sl->sl_next, $obj, $mca, $sl->sl_class, \ - $sl->sl_refcnt, $sl->sl_chunks, $sl->sl_len, \ - $sl->sl_flags - end - if $sl->sl_flags != 0 - printf "<" - if $sl->sl_flags & $SLF_MAPPED - printf "mapped" - end - if $sl->sl_flags & $SLF_PARTIAL - printf ",partial" - end - if $sl->sl_flags & $SLF_DETACHED - printf ",detached" - end - printf ">" - end - printf "\n" - - if $sl->sl_chunks > 1 - set $z = 1 - set $c = $sl->sl_len / $sl->sl_chunks - - while $z < $sl->sl_chunks - set $obj = $sl->sl_base + ($c * $z) - set $mca = 0 - - if mclaudit != 0 - set $ix = ((char *)$obj - (char *)mbutl) >> 12 - set $clbase = ((union mbigcluster *)mbutl) + $ix - set $mclidx = (((char *)$obj - (char *)$clbase) >> 8) - set $mca = mclaudit[$ix].cl_audit[$mclidx] - end - - if $kgm_lp64 - printf " 0x%-16llx 0x%-16llx\n", $obj, $mca - else - printf " 0x%-8x 0x%-8x\n", $obj, $mca - end - set $z += 1 - end - end - - set $x += 1 - end -end - -document mbuf_slabs -| Display all mbuf slabs in the group -end - -define mbuf_stat - set $x = 0 - - printf "class total cached uncached inuse failed waiter notified purge\n" - printf "name objs objs objs / slabs objs alloc count count count count\n" - printf "---------------- -------- -------- ------------------- -------- ---------------- -------- -------- --------\n" - while $x < (sizeof(mbuf_table) / sizeof(mbuf_table[0])) - set $mbt = mbuf_table[$x] - set $mcs = (mb_class_stat_t *)mbuf_table[$x].mtbl_stats - set $tot = 0 - set $mc = $mbt->mtbl_cache - set $bktsize = $mc->mc_cpu.cc_bktsize - set $tot += $mc->mc_full.bl_total * $bktsize - set $ccp = (mcache_cpu_t *)$mc->mc_cpu - set $n = 0 - while $n < ncpu - if $ccp->cc_objs > 0 - set $tot += $ccp->cc_objs - end - if $ccp->cc_pobjs > 0 - set $tot += $ccp->cc_pobjs - end - set $n += 1 - set $ccp += 1 - end - - printf "%-16s %8d %8d %8d / %-8d %8d %16llu %8d %8llu %8llu", \ - $mcs->mbcl_cname, $mcs->mbcl_total, $tot, \ - $mcs->mbcl_infree, $mcs->mbcl_slab_cnt, \ - ($mcs->mbcl_total - $tot - $mcs->mbcl_infree), \ - $mcs->mbcl_fail_cnt, $mc->mc_waiter_cnt, \ - $mcs->mbcl_notified, $mcs->mbcl_purge_cnt - printf "\n" - set $x += 1 - end -end - -document mbuf_stat -| Print extended mbuf allocator statistics. -end - -set $MB_INUSE = 0x1 -set $MB_COMP_INUSE = 0x2 -set $MB_SCVALID = 0x4 - -set $MCLBYTES = 2048 -set $MSIZE = 256 -set $NBPG = 4096 -set $M16KCLBYTES = 16384 - -define mbuf_mca_ctype - set $mca = (mcache_audit_t *)$arg0 - set $vopt = $arg1 - set $cp = $mca->mca_cache - set $class = (unsigned int)$cp->mc_private - set $csize = mbuf_table[$class].mtbl_stats->mbcl_size - set $done = 0 - if $csize == $MSIZE - if $vopt - printf "M (mbuf) " - else - printf "M " - end - set $done = 1 - end - if !$done && $csize == $MCLBYTES - if $vopt - printf "CL (2K cluster) " - else - printf "CL " - end - set $done = 1 - end - if !$done && $csize == $NBPG - if $vopt - printf "BCL (4K cluster) " - else - printf "BCL " - end - set $done = 1 - end - if !$done && $csize == $M16KCLBYTES - if $vopt - printf "JCL (16K cluster) " - else - printf "JCL " - end - set $done = 1 - end - if !$done && $csize == ($MSIZE+$MCLBYTES) - if $mca->mca_uflags & $MB_SCVALID - if $mca->mca_uptr - printf "M+CL " - if $vopt - printf "(paired mbuf, 2K cluster)" - end - else - printf "M-CL " - if $vopt - printf "(unpaired mbuf, 2K cluster) " - end - end - else - if $mca->mca_uptr - printf "CL+M " - if $vopt - printf "(paired 2K cluster, mbuf) " - end - else - printf "CL-M " - if $vopt - printf "(paired 2K cluster, mbuf) " - end - end - end - set $done = 1 - end - if !$done && $csize == ($MSIZE+$NBPG) - if $mca->mca_uflags & $MB_SCVALID - if $mca->mca_uptr - printf "M+BCL " - if $vopt - printf "(paired mbuf, 4K cluster) " - end - else - printf "M-BCL " - if $vopt - printf "(unpaired mbuf, 4K cluster) " - end - end - else - if $mca->mca_uptr - printf "BCL+M " - if $vopt - printf "(paired 4K cluster, mbuf) " - end - else - printf "BCL-M " - if $vopt - printf "(unpaired 4K cluster, mbuf) " - end - end - end - set $done = 1 - end - if !$done && $csize == ($MSIZE+$M16KCLBYTES) - if $mca->mca_uflags & $MB_SCVALID - if $mca->mca_uptr - printf "M+JCL " - if $vopt - printf "(paired mbuf, 16K cluster) " - end - else - printf "M-JCL " - if $vopt - printf "(unpaired mbuf, 16K cluster) " - end - end - else - if $mca->mca_uptr - printf "JCL+M " - if $vopt - printf "(paired 16K cluster, mbuf) " - end - else - printf "JCL-M " - if $vopt - printf "(unpaired 16K cluster, mbuf) " - end - end - end - set $done = 1 - end - if !$done - printf "unknown: %s ", $cp->mc_name - end -end - -document mbuf_mca_ctype -| This is a helper macro for mbuf_show{active,inactive,all} that prints -| out the mbuf object type represented by a given mcache audit structure. -end - -define mbuf_showactive - if $argc == 0 - mbuf_walkallslabs 1 0 - else - mbuf_walkallslabs 1 0 $arg0 - end -end - -document mbuf_showactive -Syntax: (gdb) mbuf_showactive -| Walk the mbuf objects pool and print only the active ones; this -| requires mbuf debugging to be turned on, by setting the appropriate flags -| to the "mbuf_debug" boot-args parameter. Active objects are those that -| are outstanding (have not returned to the mbuf slab layer) and in use -| by the client (have not been freed). -end - -define mbuf_showinactive - mbuf_walkallslabs 0 1 -end - -document mbuf_showinactive -Syntax: (gdb) mbuf_showinactive -| Walk the mbuf objects pool and print only the inactive ones; this -| requires mbuf debugging to be turned on, by setting the appropriate flags -| to the "mbuf_debug" boot-args parameter. Inactive objects are those that -| are outstanding (have not returned to the mbuf slab layer) but have been -| freed by the client, i.e. they still reside in the mcache layer ready to -| be used for subsequent allocation requests. -end - -define mbuf_showall - mbuf_walkallslabs 1 1 -end - -document mbuf_showall -Syntax: (gdb) mbuf_showall -| Walk the mbuf objects pool and print them all; this requires -| mbuf debugging to be turned on, by setting the appropriate flags to the -| "mbuf_debug" boot-args parameter. -end - -define mbuf_mcaobjs -end - -define mbuf_walkallslabs - set $show_a = $arg0 - set $show_f = $arg1 - if $argc == 3 - set $show_tr = $arg2 - else - set $show_tr = 0 - end - set $x = 0 - set $total = 0 - set $total_a = 0 - set $total_f = 0 - - printf "(" - if $show_a && !$show_f - printf "Searching only for active " - end - if !$show_a && $show_f - printf "Searching only for inactive " - end - if $show_a && $show_f - printf "Displaying all " - end - printf "objects; this may take a while ...)\n\n" - - if $kgm_lp64 - printf " slab mca obj allocation\n" - printf "slot idx address address address type state\n" - printf "---- ---- ------------------ ------------------ ------------------ ----- -----------\n" - else - printf " slab mca obj allocation\n" - printf "slot idx address address address type state\n" - printf "---- ---- ---------- ---------- ---------- ----- -----------\n" - end - - while $x < slabgrp - set $slg = slabstbl[$x] - set $y = 0 - set $stop = 0 - while $y < $NSLABSPMB && $stop == 0 - set $sl = &$slg->slg_slab[$y] - set $base = (char *)$sl->sl_base - set $ix = ($base - (char *)mbutl) >> 12 - set $clbase = ((union mbigcluster *)mbutl) + $ix - set $mclidx = ($base - (char *)$clbase) >> 8 - set $mca = mclaudit[$ix].cl_audit[$mclidx] - set $first = 1 - - while $mca != 0 && $mca->mca_addr != 0 - set $printmca = 0 - if $mca->mca_uflags & ($MB_INUSE|$MB_COMP_INUSE) - set $total_a = $total_a + 1 - set $printmca = $show_a - else - set $total_f = $total_f + 1 - set $printmca = $show_f - end - - if $printmca != 0 - if $first == 1 - if $kgm_lp64 - printf "%4d %4d 0x%-16llx ", $x, $y, $sl - else - printf "%4d %4d 0x%-8x ", $x, $y, $sl - end - else - if $kgm_lp64 - printf " " - else - printf " " - end - end - - if $kgm_lp64 - printf "0x%-16llx 0x%-16llx ", $mca, $mca->mca_addr - else - printf "0x%-8x 0x%-8x ", $mca, $mca->mca_addr - end - - mbuf_mca_ctype $mca 0 - if $mca->mca_uflags & ($MB_INUSE|$MB_COMP_INUSE) - printf "active " - else - printf " freed " - end - if $first == 1 - set $first = 0 - end - printf "\n" - set $total = $total + 1 - - if $show_tr != 0 - printf "recent transaction for this buffer (thread %p):\n", \ - $mca->mca_thread - set $cnt = 0 - while $cnt < $mca->mca_depth - set $kgm_pc = $mca->mca_stack[$cnt] - printf "%4d: ", $cnt + 1 - pcprint $kgm_pc - printf "\n" - set $cnt = $cnt + 1 - end - end - end - - set $mca = $mca->mca_next - end - set $y += 1 - if $slg->slg_slab[$y].sl_base == 0 - set $stop = 1 - end - end - set $x += 1 - end - if $total && $show_a && $show_f - printf "\ntotal objects:\t%d\n", $total - printf "active/unfreed:\t%d\n", $total_a - printf "freed/in_cache:\t%d\n", $total_f - end -end - -document mbuf_walkallslabs -| Walk the mbuf objects pool; this requires mbuf debugging to be -| turned on, by setting the appropriate flags to the "mbuf_debug" boot-args -| parameter. This is a backend routine for mbuf_show{active,inactive,all}. -end - -define mbuf_countchain - set $mp = (struct mbuf *)$arg0 - - set $pkt = 0 - set $nxt = 0 - - while $mp != 0 - set $pkt = $pkt + 1 - - set $mn = (struct mbuf *)$mp->m_hdr.mh_next - while $mn != 0 - set $nxt = $nxt + 1 - - set $mn = (struct mbuf *)$mn->m_hdr.mh_next - end - - set $mp = $mp->m_hdr.mh_nextpkt - - if (($pkt + $nxt) % 50) == 0 - printf "... %d\n", $pkt + $nxt - end - end - - printf "\ntotal: %d (via m_next: %d)\n", $pkt + $nxt, $nxt -end - -document mbuf_countchain -Syntax: mbuf_countchain -| Count the total number of mbufs chained from the given the address of an mbuf. -| The routine follows both the m_next pointers and m_nextpkt pointers. -end - -set $RTF_UP = 0x1 -set $RTF_GATEWAY = 0x2 -set $RTF_HOST = 0x4 -set $RTF_REJECT = 0x8 -set $RTF_DYNAMIC = 0x10 -set $RTF_MODIFIED = 0x20 -set $RTF_DONE = 0x40 -set $RTF_DELCLONE = 0x80 -set $RTF_CLONING = 0x100 -set $RTF_XRESOLVE = 0x200 -set $RTF_LLINFO = 0x400 -set $RTF_STATIC = 0x800 -set $RTF_BLACKHOLE = 0x1000 -set $RTF_PROTO2 = 0x4000 -set $RTF_PROTO1 = 0x8000 -set $RTF_PRCLONING = 0x10000 -set $RTF_WASCLONED = 0x20000 -set $RTF_PROTO3 = 0x40000 -set $RTF_PINNED = 0x100000 -set $RTF_LOCAL = 0x200000 -set $RTF_BROADCAST = 0x400000 -set $RTF_MULTICAST = 0x800000 -set $RTF_IFSCOPE = 0x1000000 -set $RTF_CONDEMNED = 0x2000000 -set $RTF_IFREF = 0x4000000 -set $RTF_PROXY = 0x8000000 -set $RTF_ROUTER = 0x10000000 - -set $AF_INET = 2 -set $AF_INET6 = 30 -set $AF_LINK = 18 - -define rtentry_prdetails - set $rt = (struct rtentry *)$arg0 - set $is_v6 = 0 - - set $dst = (struct sockaddr *)$rt->rt_nodes->rn_u.rn_leaf.rn_Key - if $dst->sa_family == $AF_INET - showsockaddr_in $dst - printf " " - else - if $dst->sa_family == $AF_INET6 - showsockaddr_in6 $dst - printf " " - set $is_v6 = 1 - else - if $dst->sa_family == $AF_LINK - showsockaddr_dl $dst - printf " " - else - showsockaddr_unspec $dst - end - end - end - - set $dst = (struct sockaddr *)$rt->rt_gateway - if $dst->sa_family == $AF_INET - showsockaddr_in $dst - printf " " - else - if $dst->sa_family == $AF_INET6 - set $is_v6 = 1 - showsockaddr_in6 $dst - printf " " - else - if $dst->sa_family == $AF_LINK - showsockaddr_dl $dst - if $is_v6 - printf " " - else - printf " " - end - else - showsockaddr_unspec $dst - end - end - end - - if $rt->rt_flags & $RTF_WASCLONED - if $kgm_lp64 - printf "%18p ", $rt->rt_parent - else - printf "%10p ", $rt->rt_parent - end - else - if $kgm_lp64 - printf " " - else - printf " " - end - end - - printf "%6u %8u ", $rt->rt_refcnt, $rt->rt_rmx.rmx_pksent - - if $rt->rt_flags & $RTF_UP - printf "U" - end - if $rt->rt_flags & $RTF_GATEWAY - printf "G" - end - if $rt->rt_flags & $RTF_HOST - printf "H" - end - if $rt->rt_flags & $RTF_REJECT - printf "R" - end - if $rt->rt_flags & $RTF_DYNAMIC - printf "D" - end - if $rt->rt_flags & $RTF_MODIFIED - printf "M" - end - if $rt->rt_flags & $RTF_CLONING - printf "C" - end - if $rt->rt_flags & $RTF_PRCLONING - printf "c" - end - if $rt->rt_flags & $RTF_LLINFO - printf "L" - end - if $rt->rt_flags & $RTF_STATIC - printf "S" - end - if $rt->rt_flags & $RTF_PROTO1 - printf "1" - end - if $rt->rt_flags & $RTF_PROTO2 - printf "2" - end - if $rt->rt_flags & $RTF_PROTO3 - printf "3" - end - if $rt->rt_flags & $RTF_WASCLONED - printf "W" - end - if $rt->rt_flags & $RTF_BROADCAST - printf "b" - end - if $rt->rt_flags & $RTF_MULTICAST - printf "m" - end - if $rt->rt_flags & $RTF_XRESOLVE - printf "X" - end - if $rt->rt_flags & $RTF_BLACKHOLE - printf "B" - end - if $rt->rt_flags & $RTF_IFSCOPE - printf "I" - end - if $rt->rt_flags & $RTF_CONDEMNED - printf "Z" - end - if $rt->rt_flags & $RTF_IFREF - printf "i" - end - if $rt->rt_flags & $RTF_PROXY - printf "Y" - end - if $rt->rt_flags & $RTF_ROUTER - printf "r" - end - - printf "/%s%d", $rt->rt_ifp->if_name, $rt->rt_ifp->if_unit -end - -set $RNF_ROOT = 2 - -define _rttable_dump - set $rnh = $arg0 - set $rn = (struct radix_node *)$rnh->rnh_treetop - set $rnh_cnt = $rnh->rnh_cnt - - while $rn->rn_bit >= 0 - set $rn = $rn->rn_u.rn_node.rn_L - end - - while 1 - set $base = (struct radix_node *)$rn - while ($rn->rn_parent->rn_u.rn_node.rn_R == $rn) && ($rn->rn_flags & $RNF_ROOT) == 0 - set $rn = $rn->rn_parent - end - set $rn = $rn->rn_parent->rn_u.rn_node.rn_R - while $rn->rn_bit >= 0 - set $rn = $rn->rn_u.rn_node.rn_L - end - set $next = $rn - while $base != 0 - set $rn = $base - set $base = $rn->rn_u.rn_leaf.rn_Dupedkey - if ($rn->rn_flags & $RNF_ROOT) == 0 - - set $rt = (struct rtentry *)$rn - - if $kgm_lp64 - printf "%18p ", $rt - else - printf "%10p ", $rt - end - rtentry_prdetails $rt - printf "\n" - - end - end - set $rn = $next - if ($rn->rn_flags & $RNF_ROOT) != 0 - loop_break - end - end -end - - -define show_rt_inet - if $kgm_lp64 - printf " rtentry dst gw parent Refs Use flags/if\n" - printf " ----------------- --------------- ----------------- ------------------ ------ -------- -----------\n" - else - printf " rtentry dst gw parent Refs Use flags/if\n" - printf " --------- --------------- ----------------- ---------- ------ -------- -----------\n" - end - _rttable_dump rt_tables[2] -end - -document show_rt_inet -Syntax: (gdb) show_rt_inet -| Show the entries of the IPv4 routing table. -end - -define show_rt_inet6 - if $kgm_lp64 - printf " rtentry dst gw parent Refs Use flags/if\n" - printf " ----------------- --------------------------------------- --------------------------------------- ------------------ ------ -------- -----------\n" - else - printf " rtentry dst gw parent Refs Use flags/if\n" - printf " --------- --------------------------------------- --------------------------------------- ---------- ------ -------- -----------\n" - end - _rttable_dump rt_tables[30] -end - -document show_rt_inet6 -Syntax: (gdb) show_rt_inet6 -| Show the entries of the IPv6 routing table. -end - -define rtentry_trash - set $rtd = (struct rtentry_dbg *)rttrash_head.tqh_first - set $cnt = 0 - while $rtd != 0 - if $cnt == 0 - if $kgm_lp64 - printf " rtentry ref hold rele dst gw parent flags/if\n" - printf " ----------------- --- ------ ------ --------------- ----- ------------------ -----------\n" - else - printf " rtentry ref hold rele dst gw parent flags/if\n" - printf " --------- --- ------ ------ --------------- ----- ---------- -----------\n" - end - end - printf "%4d: %p %3d %6d %6d ", $cnt + 1, $rtd, \ - $rtd->rtd_refhold_cnt - $rtd->rtd_refrele_cnt, \ - $rtd->rtd_refhold_cnt, $rtd->rtd_refrele_cnt - rtentry_prdetails $rtd - printf "\n" - set $rtd = $rtd->rtd_trash_link.tqe_next - set $cnt = $cnt + 1 - end -end - -document rtentry_trash -Syntax: (gdb) rtentry_trash -| Walk the list of trash route entries; this requires route entry -| debugging to be turned on, by setting the appropriate flags to the -| "rte_debug" boot-args parameter. -end - -set $CTRACE_STACK_SIZE = ctrace_stack_size -set $CTRACE_HIST_SIZE = ctrace_hist_size - -define rtentry_showdbg - set $rtd = (struct rtentry_dbg *)$arg0 - set $cnt = 0 - - printf "Total holds:\t%d\n", $rtd->rtd_refhold_cnt - printf "Total releases:\t%d\n", $rtd->rtd_refrele_cnt - - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $rtd->rtd_alloc.pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nAlloc (thread %p):\n", \ - $rtd->rtd_alloc.th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $rtd->rtd_free.pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nFree: (thread %p)\n", \ - $rtd->rtd_free.th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - while $cnt < $CTRACE_HIST_SIZE - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $rtd->rtd_refhold[$cnt].pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nHold [%d] (thread %p):\n", \ - $cnt, $rtd->rtd_refhold[$cnt].th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $cnt = $cnt + 1 - end - set $cnt = 0 - while $cnt < $CTRACE_HIST_SIZE - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $rtd->rtd_refrele[$cnt].pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nRelease [%d] (thread %p):\n",\ - $cnt, $rtd->rtd_refrele[$cnt].th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $cnt = $cnt + 1 - end - - printf "\nTotal locks:\t%d\n", $rtd->rtd_lock_cnt - printf "Total unlocks:\t%d\n", $rtd->rtd_unlock_cnt - - set $cnt = 0 - while $cnt < $CTRACE_HIST_SIZE - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $rtd->rtd_lock[$cnt].pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nLock [%d] (thread %p):\n",\ - $cnt, $rtd->rtd_lock[$cnt].th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $cnt = $cnt + 1 - end - set $cnt = 0 - while $cnt < $CTRACE_HIST_SIZE - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $rtd->rtd_unlock[$cnt].pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nUnlock [%d] (thread %p):\n",\ - $cnt, $rtd->rtd_unlock[$cnt].th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $cnt = $cnt + 1 - end -end - -document rtentry_showdbg -Syntax: (gdb) rtentry_showdbg -| Given a route entry structure address, print the debug information -| related to it. This requires route entry debugging to be turned -| on, by setting the appropriate flags to the "rte_debug" boot-args -| parameter. -end - -set $INIFA_TRACE_HIST_SIZE = inifa_trace_hist_size - -define inifa_showdbg - set $inifa = (struct in_ifaddr_dbg *)$arg0 - set $cnt = 0 - - printf "Total holds:\t%d\n", $inifa->inifa_refhold_cnt - printf "Total releases:\t%d\n", $inifa->inifa_refrele_cnt - - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $inifa->inifa_alloc.pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nAlloc (thread %p):\n", \ - $inifa->inifa_alloc.th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $inifa->inifa_free.pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nFree: (thread %p)\n", \ - $inifa->inifa_free.th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - while $cnt < $INIFA_TRACE_HIST_SIZE - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $inifa->inifa_refhold[$cnt].pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nHold [%d] (thread %p):\n", \ - $cnt, $inifa->inifa_refhold[$cnt].th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $cnt = $cnt + 1 - end - set $cnt = 0 - while $cnt < $INIFA_TRACE_HIST_SIZE - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $inifa->inifa_refrele[$cnt].pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nRelease [%d] (thread %p):\n",\ - $cnt, $inifa->inifa_refrele[$cnt].th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $cnt = $cnt + 1 - end -end - -document inifa_showdbg -Syntax: (gdb) inifa_showdbg -| Given an IPv4 interface structure address, print the debug information -| related to it. This requires interface address debugging to be turned -| on, by setting the appropriate flags to the "ifa_debug" boot-args -| parameter. -end - -set $IN6IFA_TRACE_HIST_SIZE = in6ifa_trace_hist_size - -define in6ifa_showdbg - set $in6ifa = (struct in6_ifaddr_dbg *)$arg0 - set $cnt = 0 - - printf "Total holds:\t%d\n", $in6ifa->in6ifa_refhold_cnt - printf "Total releases:\t%d\n", $in6ifa->in6ifa_refrele_cnt - - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $in6ifa->in6ifa_alloc.pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nAlloc (thread %p):\n", \ - $in6ifa->in6ifa_alloc.th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $in6ifa->in6ifa_free.pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nFree: (thread %p)\n", \ - $in6ifa->in6ifa_free.th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - while $cnt < $IN6IFA_TRACE_HIST_SIZE - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $in6ifa->in6ifa_refhold[$cnt].pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nHold [%d] (thread %p):\n", \ - $cnt, $in6ifa->in6ifa_refhold[$cnt].th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $cnt = $cnt + 1 - end - set $cnt = 0 - while $cnt < $IN6IFA_TRACE_HIST_SIZE - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $in6ifa->in6ifa_refrele[$cnt].pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nRelease [%d] (thread %p):\n",\ - $cnt, $in6ifa->in6ifa_refrele[$cnt].th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $cnt = $cnt + 1 - end -end - -document in6ifa_showdbg -Syntax: (gdb) in6ifa_showdbg -| Given an IPv6 interface structure address, print the debug information -| related to it. This requires interface address debugging to be turned -| on, by setting the appropriate flags to the "ifa_debug" boot-args -| parameter. -end - -set $IFMA_TRACE_HIST_SIZE = ifma_trace_hist_size - -define ifma_showdbg - set $ifma = (struct ifmultiaddr_dbg *)$arg0 - set $cnt = 0 - - printf "Total holds:\t%d\n", $ifma->ifma_refhold_cnt - printf "Total releases:\t%d\n", $ifma->ifma_refrele_cnt - - while $cnt < $IFMA_TRACE_HIST_SIZE - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $ifma->ifma_refhold[$cnt].pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nHold [%d] (thread %p):\n", \ - $cnt, $ifma->ifma_refhold[$cnt].th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $cnt = $cnt + 1 - end - set $cnt = 0 - while $cnt < $IFMA_TRACE_HIST_SIZE - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $ifma->ifma_refrele[$cnt].pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nRelease [%d] (thread %p):\n",\ - $cnt, $ifma->ifma_refrele[$cnt].th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $cnt = $cnt + 1 - end -end - -document ifma_showdbg -Syntax: (gdb) ifma_showdbg -| Given a link multicast structure address, print the debug information -| related to it. This requires interface address debugging to be turned -| on, by setting the appropriate flags to the "ifa_debug" boot-args -| parameter. -end - -set $INM_TRACE_HIST_SIZE = inm_trace_hist_size - -define inm_showdbg - set $inm = (struct in_multi_dbg *)$arg0 - set $cnt = 0 - - printf "Total holds:\t%d\n", $inm->inm_refhold_cnt - printf "Total releases:\t%d\n", $inm->inm_refrele_cnt - - while $cnt < $INM_TRACE_HIST_SIZE - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $inm->inm_refhold[$cnt].pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nHold [%d] (thread %p):\n", \ - $cnt, $inm->inm_refhold[$cnt].th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $cnt = $cnt + 1 - end - set $cnt = 0 - while $cnt < $INM_TRACE_HIST_SIZE - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $inm->inm_refrele[$cnt].pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nRelease [%d] (thread %p):\n",\ - $cnt, $inm->inm_refrele[$cnt].th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $cnt = $cnt + 1 - end -end - -document inm_showdbg -Syntax: (gdb) inm_showdbg -| Given an IPv4 multicast structure address, print the debug information -| related to it. This requires interface address debugging to be turned -| on, by setting the appropriate flags to the "ifa_debug" boot-args -| parameter. -end - -set $IF_REF_TRACE_HIST_SIZE = if_ref_trace_hist_size - -define ifpref_showdbg - set $dl_if = (struct dlil_ifnet_dbg *)$arg0 - set $cnt = 0 - - printf "Total references:\t%d\n", $dl_if->dldbg_if_refhold_cnt - printf "Total releases:\t\t%d\n", $dl_if->dldbg_if_refrele_cnt - - while $cnt < $IF_REF_TRACE_HIST_SIZE - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $dl_if->dldbg_if_refhold[$cnt].pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nHold [%d] (thread %p):\n", \ - $cnt, \ - $dl_if->dldbg_if_refhold[$cnt].th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $cnt = $cnt + 1 - end - set $cnt = 0 - while $cnt < $IF_REF_TRACE_HIST_SIZE - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $dl_if->dldbg_if_refrele[$cnt].pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nRelease [%d] (thread %p):\n",\ - $cnt, \ - $dl_if->dldbg_if_refrele[$cnt].th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $cnt = $cnt + 1 - end -end - -document ifpref_showdbg -Syntax: (gdb) ifpref_showdbg -| Given an ifnet structure address, print the debug information -| related to its refcnt. This requires ifnet debugging to be turned -| on, by setting the appropriate flags to the "ifnet_debug" boot-args -| parameter. -end - -define in6ifa_trash - set $ifa = (struct in6_ifaddr_dbg *)in6ifa_trash_head.tqh_first - set $cnt = 0 - while $ifa != 0 - if $cnt == 0 - if $kgm_lp64 - printf " in6_ifa ref hold rele\n" - printf " ----------------- --- ------ ------\n" - else - printf " in6_ifa ref hold rele\n" - printf " --------- --- ------ ------\n" - end - end - printf "%4d: %p %3d %6d %6d ", $cnt + 1, $ifa, \ - $ifa->in6ifa_refhold_cnt - $ifa->in6ifa_refrele_cnt, \ - $ifa->in6ifa_refhold_cnt, $ifa->in6ifa_refrele_cnt - showsockaddr_in6 $ifa->in6ifa.ia_ifa.ifa_addr - printf "\n" - set $ifa = $ifa->in6ifa_trash_link.tqe_next - set $cnt = $cnt + 1 - end -end - -set $NDPR_TRACE_HIST_SIZE = ndpr_trace_hist_size - -define ndpr_showdbg - set $ndpr = (struct nd_prefix_dbg *)$arg0 - set $cnt = 0 - - printf "Total references:\t%d\n", $ndpr->ndpr_refhold_cnt - printf "Total releases:\t\t%d\n", $ndpr->ndpr_refrele_cnt - - while $cnt < $NDPR_TRACE_HIST_SIZE - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $ndpr->ndpr_refhold[$cnt].pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nHold [%d] (thread %p):\n", \ - $cnt, \ - $ndpr->ndpr_refhold[$cnt].th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $cnt = $cnt + 1 - end - set $cnt = 0 - while $cnt < $NDPR_TRACE_HIST_SIZE - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $ndpr->ndpr_refrele[$cnt].pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nRelease [%d] (thread %p):\n",\ - $cnt, \ - $ndpr->ndpr_refrele[$cnt].th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $cnt = $cnt + 1 - end -end - -document ndpr_showdbg -Syntax: (gdb) ndpr_showdbg -| Given a nd_prefix structure address, print the debug information -| related to its refcnt. This requires the interface address debugging -| to be turned on, by setting the appropriate flags to the "ifa_debug" -| boot-args parameter. -end - -set $NDDR_TRACE_HIST_SIZE = nddr_trace_hist_size - -define nddr_showdbg - set $nddr = (struct nd_defrouter_dbg *)$arg0 - set $cnt = 0 - - printf "Total references:\t%d\n", $nddr->nddr_refhold_cnt - printf "Total releases:\t\t%d\n", $nddr->nddr_refrele_cnt - - while $cnt < $NDDR_TRACE_HIST_SIZE - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $nddr->nddr_refhold[$cnt].pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nHold [%d] (thread %p):\n", \ - $cnt, \ - $nddr->nddr_refhold[$cnt].th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $cnt = $cnt + 1 - end - set $cnt = 0 - while $cnt < $NDDR_TRACE_HIST_SIZE - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $nddr->nddr_refrele[$cnt].pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nRelease [%d] (thread %p):\n",\ - $cnt, \ - $nddr->nddr_refrele[$cnt].th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $cnt = $cnt + 1 - end -end - -document nddr_showdbg -Syntax: (gdb) nddr_showdbg -| Given a nd_defrouter structure address, print the debug information -| related to its refcnt. This requires the interface address debugging -| to be turned on, by setting the appropriate flags to the "ifa_debug" -| boot-args parameter. -end -set $IMO_TRACE_HIST_SIZE = imo_trace_hist_size - -define imo_showdbg - set $imo = (struct ip_moptions_dbg *)$arg0 - set $cnt = 0 - - printf "Total references:\t%d\n", $imo->imo_refhold_cnt - printf "Total releases:\t\t%d\n", $imo->imo_refrele_cnt - - while $cnt < $IMO_TRACE_HIST_SIZE - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $imo->imo_refhold[$cnt].pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nHold [%d] (thread %p):\n", \ - $cnt, \ - $imo->imo_refhold[$cnt].th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $cnt = $cnt + 1 - end - set $cnt = 0 - while $cnt < $IMO_TRACE_HIST_SIZE - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $imo->imo_refrele[$cnt].pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nRelease [%d] (thread %p):\n",\ - $cnt, \ - $imo->imo_refrele[$cnt].th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $cnt = $cnt + 1 - end -end - -document imo_showdbg -Syntax: (gdb) imo_showdbg -| Given a ip_moptions structure address, print the debug information -| related to its refcnt. This requires the interface address debugging -| to be turned on, by setting the appropriate flags to the "ifa_debug" -| boot-args parameter. -end - -set $IM6O_TRACE_HIST_SIZE = im6o_trace_hist_size - -define im6o_showdbg - set $im6o = (struct ip6_moptions_dbg *)$arg0 - set $cnt = 0 - - printf "Total references:\t%d\n", $im6o->im6o_refhold_cnt - printf "Total releases:\t\t%d\n", $im6o->im6o_refrele_cnt - - while $cnt < $IM6O_TRACE_HIST_SIZE - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $im6o->im6o_refhold[$cnt].pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nHold [%d] (thread %p):\n", \ - $cnt, \ - $im6o->im6o_refhold[$cnt].th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $cnt = $cnt + 1 - end - set $cnt = 0 - while $cnt < $IM6O_TRACE_HIST_SIZE - set $ix = 0 - while $ix < $CTRACE_STACK_SIZE - set $kgm_pc = $im6o->im6o_refrele[$cnt].pc[$ix] - if $kgm_pc != 0 - if $ix == 0 - printf "\nRelease [%d] (thread %p):\n",\ - $cnt, \ - $im6o->im6o_refrele[$cnt].th - end - printf "%4d: ", $ix + 1 - pcprint $kgm_pc - printf "\n" - end - set $ix = $ix + 1 - end - set $cnt = $cnt + 1 - end -end - -document im6o_showdbg -Syntax: (gdb) im6o_showdbg -| Given a ip6_moptions structure address, print the debug information -| related to its refcnt. This requires the interface address debugging -| to be turned on, by setting the appropriate flags to the "ifa_debug" -| boot-args parameter. -end - -document in6ifa_trash -Syntax: (gdb) in6ifa_trash -| Walk the list of trash in6_ifaddr entries; this requires interface -| address debugging to be turned on, by setting the appropriate flags -| to the "ifa_debug" boot-args parameter. -end - -define inifa_trash - set $ifa = (struct in_ifaddr_dbg *)inifa_trash_head.tqh_first - set $cnt = 0 - while $ifa != 0 - if $cnt == 0 - if $kgm_lp64 - printf " in_ifa ref hold rele\n" - printf " ----------------- --- ------ ------\n" - else - printf " in_ifa ref hold rele\n" - printf " --------- --- ------ ------\n" - end - end - printf "%4d: %p %3d %6d %6d ", $cnt + 1, $ifa, \ - $ifa->inifa_refhold_cnt - $ifa->inifa_refrele_cnt, \ - $ifa->inifa_refhold_cnt, $ifa->inifa_refrele_cnt - showsockaddr_in $ifa->inifa.ia_ifa.ifa_addr - printf "\n" - set $ifa = $ifa->inifa_trash_link.tqe_next - set $cnt = $cnt + 1 - end -end - -document inifa_trash -Syntax: (gdb) inifa_trash -| Walk the list of trash in_ifaddr entries; this requires interface -| address debugging to be turned on, by setting the appropriate flags -| to the "ifa_debug" boot-args parameter. -end - -define ifma_trash - set $ifma = (struct ifmultiaddr_dbg *)ifma_trash_head.tqh_first - set $cnt = 0 - while $ifma != 0 - if $cnt == 0 - if $kgm_lp64 - printf " ifma ref hold rele\n" - printf " ----------------- --- ------ ------\n" - else - printf " ifma ref hold rele\n" - printf " --------- --- ------ ------\n" - end - end - printf "%4d: %p %3d %6d %6d ", $cnt + 1, $ifma, \ - $ifma->ifma_refhold_cnt - $ifma->ifma_refrele_cnt, \ - $ifma->ifma_refhold_cnt, $ifma->ifma_refrele_cnt - showsockaddr $ifma->ifma.ifma_addr - printf " @ %s%d", $ifma->ifma.ifma_ifp->if_name, \ - $ifma->ifma.ifma_ifp->if_unit - printf "\n" - set $ifma = $ifma->ifma_trash_link.tqe_next - set $cnt = $cnt + 1 - end -end - -document ifma_trash -Syntax: (gdb) ifma_trash -| Walk the list of trash ifmultiaddr entries; this requires interface -| address debugging to be turned on, by setting the appropriate flags -| to the "ifa_debug" boot-args parameter. -end - -define inm_trash - set $inm = (struct in_multi_dbg *)inm_trash_head.tqh_first - set $cnt = 0 - while $inm != 0 - if $cnt == 0 - if $kgm_lp64 - printf " inm ref hold rele\n" - printf " ----------------- --- ------ ------\n" - else - printf " inm ref hold rele\n" - printf " --------- --- ------ ------\n" - end - end - printf "%4d: %p %3d %6d %6d ", $cnt + 1, $inm, \ - $inm->inm_refhold_cnt - $inm->inm_refrele_cnt, \ - $inm->inm_refhold_cnt, $inm->inm_refrele_cnt - show_in_addr &($inm->inm.inm_addr) - printf "\n" - set $inm = $inm->inm_trash_link.tqe_next - set $cnt = $cnt + 1 - end -end - -document inm_trash -Syntax: (gdb) inm_trash -| Walk the list of trash in_multi entries; this requires interface -| address debugging to be turned on, by setting the appropriate flags -| to the "ifa_debug" boot-args parameter. -end - -define in6m_trash - set $in6m = (struct in6_multi_dbg *)in6m_trash_head.tqh_first - set $cnt = 0 - while $in6m != 0 - if $cnt == 0 - if $kgm_lp64 - printf " in6m ref hold rele\n" - printf " ----------------- --- ------ ------\n" - else - printf " in6m ref hold rele\n" - printf " --------- --- ------ ------\n" - end - end - printf "%4d: %p %3d %6d %6d ", $cnt + 1, $in6m, \ - $in6m->in6m_refhold_cnt - $in6m->in6m_refrele_cnt, \ - $in6m->in6m_refhold_cnt, $in6m->in6m_refrele_cnt - show_in_addr &($in6m->in6m.in6m_addr) - printf "\n" - set $in6m = $in6m->in6m_trash_link.tqe_next - set $cnt = $cnt + 1 - end -end - -document in6m_trash -Syntax: (gdb) in6m_trash -| Walk the list of trash in6_multi entries; this requires interface -| address debugging to be turned on, by setting the appropriate flags -| to the "ifa_debug" boot-args parameter. -end - -# -# print all OSMalloc stats - -define ostag_print -set $kgm_tagp = (OSMallocTag)$arg0 -printf "0x%08x: ", $kgm_tagp -printf "%8d ",$kgm_tagp->OSMT_refcnt -printf "%8x ",$kgm_tagp->OSMT_state -printf "%8x ",$kgm_tagp->OSMT_attr -printf "%s ",$kgm_tagp->OSMT_name -printf "\n" -end - - -define showosmalloc -printf "TAG COUNT STATE ATTR NAME\n" -set $kgm_tagheadp = (struct _OSMallocTag_ *)&OSMalloc_tag_list - set $kgm_tagptr = (struct _OSMallocTag_ * )($kgm_tagheadp->OSMT_link.next) - while $kgm_tagptr != $kgm_tagheadp - ostag_print $kgm_tagptr - set $kgm_tagptr = (struct _OSMallocTag_ *)$kgm_tagptr->OSMT_link.next - end - printf "\n" -end -document showosmalloc -Syntax: (gdb) showosmalloc -| Print the outstanding allocation count by OSMallocTags. -end - - -define systemlog - if msgbufp->msg_bufc[msgbufp->msg_bufx] == 0 \ - && msgbufp->msg_bufc[0] != 0 - # The buffer hasn't wrapped, so take the easy (and fast!) path - printf "%s", msgbufp->msg_bufc - else - set $kgm_msgbuf = *msgbufp - set $kgm_syslog_bufsize = $kgm_msgbuf.msg_size - set $kgm_syslog_bufend = $kgm_msgbuf.msg_bufx - if $kgm_syslog_bufend >= $kgm_syslog_bufsize - set $kgm_syslog_bufend = 0 - end - - # print older messages from msg_bufx to end of buffer - set $kgm_i = $kgm_syslog_bufend - while $kgm_i < $kgm_syslog_bufsize - set $kgm_syslog_char = $kgm_msgbuf.msg_bufc[$kgm_i] - if $kgm_syslog_char == 0 - # break out of loop - set $kgm_i = $kgm_syslog_bufsize - else - printf "%c", $kgm_syslog_char - end - set $kgm_i = $kgm_i + 1 - end - - # print newer messages from start of buffer to msg_bufx - set $kgm_i = 0 - while $kgm_i < $kgm_syslog_bufend - set $kgm_syslog_char = $kgm_msgbuf.msg_bufc[$kgm_i] - if $kgm_syslog_char != 0 - printf "%c", $kgm_syslog_char - end - set $kgm_i = $kgm_i + 1 - end - end - printf "\n" -end -document systemlog -| Syntax: systemlog -| Display the kernel's printf ring buffer -end - - -define hexdump - set $kgm_addr = (unsigned char *)$arg0 - set $kgm_len = $arg1 - while $kgm_len > 0 - showptr $kgm_addr - printf ": " - set $kgm_i = 0 - while $kgm_i < 16 - printf "%02x ", *($kgm_addr+$kgm_i) - set $kgm_i += 1 - end - printf " |" - set $kgm_i = 0 - while $kgm_i < 16 - set $kgm_temp = *($kgm_addr+$kgm_i) - if $kgm_temp < 32 || $kgm_temp >= 127 - printf "." - else - printf "%c", $kgm_temp - end - set $kgm_i += 1 - end - printf "|\n" - set $kgm_addr += 16 - set $kgm_len -= 16 - end -end -document hexdump -| Show the contents of memory as a hex/ASCII dump -| The following is the syntax: -| (gdb) hexdump
-end - - -define printcolonhex - if ($argc == 2) - set $addr = $arg0 - set $count = $arg1 - set $li = 0 - while ($li < $count) - if ($li == 0) - printf "%02x", (u_char)$addr[$li] - end - if ($li != 0) - printf ":%02x", (u_char)$addr[$li] - end - set $li = $li + 1 - end - end -end - -define showsockaddr_dl - set $sdl = (struct sockaddr_dl *)$arg0 - if ($sdl == 0) - printf "(null) " - else - if $sdl->sdl_nlen == 0 && $sdl->sdl_alen == 0 && $sdl->sdl_slen == 0 - printf "link#%3d ", $sdl->sdl_index - else - set $addr = $sdl->sdl_data + $sdl->sdl_nlen - set $count = $sdl->sdl_alen - printcolonhex $addr $count - end - end -end - -define showsockaddr_unspec - set $sockaddr = (struct sockaddr *)$arg0 - set $addr = $sockaddr->sa_data - set $count = $sockaddr->sa_len - 2 - printcolonhex $addr $count -end - -define showsockaddr_at - set $sockaddr = (struct sockaddr *)$arg0 - set $addr = $sockaddr->sa_data - set $count = $sockaddr->sa_len - 2 - printcolonhex $addr $count -end - -define show_in_addr - set $ia = (unsigned char *)$arg0 - printf "%3u.%03u.%03u.%03u", $ia[0], $ia[1], $ia[2], $ia[3] -end - -define showsockaddr_in - set $sin = (struct sockaddr_in *)$arg0 - set $sa_bytes = (unsigned char *)&($sin->sin_addr) - show_in_addr $sa_bytes -end - -define show_in6_addr - set $ia = (unsigned char *)$arg0 - printf "%2x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x", \ - $ia[0], $ia[1], $ia[2], $ia[3], $ia[4], $ia[5], $ia[6], $ia[7], $ia[8], $ia[9], $ia[10], $ia[11], $ia[12], $ia[13], $ia[14], $ia[15] -end - -define showsockaddr_in6 - set $sin6 = (struct sockaddr_in6 *)$arg0 - set $sa_bytes = $sin6->sin6_addr.__u6_addr.__u6_addr8 - show_in6_addr $sa_bytes -end - -define showsockaddr_un - set $sun = (struct sockaddr_un *)$arg0 - if $sun == 0 - printf "(null)" - else - if $sun->sun_path[0] == 0 - printf "\"\"" - else - printf "%s", $sun->sun_path - end - end -end - -define showifmultiaddrs - set $ifp = (struct ifnet *)$arg0 - set $if_multi = (struct ifmultiaddr *)$ifp->if_multiaddrs->lh_first - set $mymulti = $if_multi - set $myi = 0 - while ($mymulti != 0) - printf "%2d. %p ", $myi, $mymulti - set $sa_family = $mymulti->ifma_addr.sa_family - if ($sa_family == 2) - if ($mymulti->ifma_ll != 0) - showsockaddr_dl $mymulti->ifma_ll->ifma_addr - printf " " - end - showsockaddr_in $mymulti->ifma_addr - end - if ($sa_family == 30) - if ($mymulti->ifma_ll != 0) - showsockaddr_dl $mymulti->ifma_ll->ifma_addr - printf " " - end - showsockaddr_in6 $mymulti->ifma_addr - end - if ($sa_family == 18) - showsockaddr_dl $mymulti->ifma_addr - end - if ($sa_family == 0) - showsockaddr_unspec $mymulti->ifma_addr 6 - end - printf " [%d]", $mymulti->ifma_refcount - printf "\n" - set $mymulti = $mymulti->ifma_link.le_next - set $myi = $myi + 1 - end -end - -document showifmultiaddrs -Syntax showifmultiaddrs -| show the (struct ifnet).if_multiaddrs list of multicast addresses for the given ifp -end - -define showinmultiaddrs - set $in_multi = (struct in_multi *)(in_multihead->lh_first) - set $mymulti = $in_multi - set $myi = 0 - while ($mymulti != 0) - set $ifp = (struct ifnet *)$mymulti->inm_ifp - printf "%2d. %p ", $myi, $mymulti - show_in_addr &($mymulti->inm_addr) - printf " (ifp %p [%s%d] ifma %p) ", $ifp, $ifp->if_name, \ - $ifp->if_unit, $mymulti->inm_ifma - printf "\n" - set $mymulti = $mymulti->inm_link.le_next - set $myi = $myi + 1 - end -end - -document showinmultiaddrs -Syntax showinmultiaddrs -| show the contents of IPv4 multicast address records -end - -define showin6multiaddrs - set $in6_multi = (struct in6_multi *)(in6_multihead->lh_first) - set $mymulti = $in6_multi - set $myi = 0 - while ($mymulti != 0) - set $ifp = (struct ifnet *)$mymulti->in6m_ifp - printf "%2d. %p ", $myi, $mymulti - show_in6_addr &($mymulti->in6m_addr) - printf " (ifp %p [%s%d] ifma %p) ", $ifp, $ifp->if_name, \ - $ifp->if_unit, $mymulti->in6m_ifma - printf "\n" - set $mymulti = $mymulti->in6m_entry.le_next - set $myi = $myi + 1 - end -end - -document showin6multiaddrs -Syntax showin6multiaddrs -| show the contents of IPv6 multicast address records -end - -define showsockaddr - set $mysock = (struct sockaddr *)$arg0 - set $showsockaddr_handled = 0 - if ($mysock == 0) - printf "(null)" - else - if ($mysock->sa_family == 0) - printf "UNSPC" - showsockaddr_unspec $mysock - set $showsockaddr_handled = 1 - end - if ($mysock->sa_family == 1) - printf "UNIX " - showsockaddr_un $mysock - set $showsockaddr_handled = 1 - end - if ($mysock->sa_family == 2) - printf "INET " - showsockaddr_in $mysock - set $showsockaddr_handled = 1 - end - if ($mysock->sa_family == 30) - printf "INET6 " - showsockaddr_in6 $mysock - set $showsockaddr_handled = 1 - end - if ($mysock->sa_family == 18) - printf "LINK " - showsockaddr_dl $mysock - set $showsockaddr_handled = 1 - end - if ($mysock->sa_family == 16) - printf "ATLK " - showsockaddr_at $mysock - set $showsockaddr_handled = 1 - end - if ($showsockaddr_handled == 0) - printf "FAM %d ", $mysock->sa_family - set $addr = $mysock->sa_data - set $count = $mysock->sa_len - printcolonhex $addr $count - end - end -end - -define showifflags - set $flags = (u_short)$arg0 - set $first = 1 - printf "<" - if ($flags & 0x1) - printf "UP" - set $first = 0 - end - if ($flags & 0x2) - if ($first == 1) - set $first = 0 - else - printf "," - end - printf "BROADCAST" - end - if ($flags & 0x4) - printf "DEBUG" - end - if ($flags & 0x8) - if ($first == 1) - set $first = 0 - else - printf "," - end - printf "LOOPBACK" - end - if ($flags & 0x10) - if ($first == 1) - set $first = 0 - else - printf "," - end - printf "POINTTOPOINT" - end -## if ($flags & 0x20) -## if ($first == 1) -# set $first = 0 -## else -# printf "," -# end -# printf "NOTRAILERS" -# end - if ($flags & 0x40) - if ($first == 1) - set $first = 0 - else - printf "," - end - printf "RUNNING" - end - if ($flags & 0x80) - if ($first == 1) - set $first = 0 - else - printf "," - end - printf "NOARP" - end - if ($flags & 0x100) - if ($first == 1) - set $first = 0 - else - printf "," - end - printf "PROMISC" - end - if ($flags & 0x200) - if ($first == 1) - set $first = 0 - else - printf "," - end - printf "ALLMULTI" - end - if ($flags & 0x400) - if ($first == 1) - set $first = 0 - else - printf "," - end - printf "OACTIVE" - end - if ($flags & 0x800) - if ($first == 1) - set $first = 0 - else - printf "," - end - printf "SIMPLEX" - end - if ($flags & 0x1000) - if ($first == 1) - set $first = 0 - else - printf "," - end - printf "LINK0" - end - if ($flags & 0x2000) - if ($first == 1) - set $first = 0 - else - printf "," - end - printf "LINK1" - end - if ($flags & 0x4000) - if ($first == 1) - set $first = 0 - else - printf "," - end - printf "LINK2-ALTPHYS" - end - if ($flags & 0x8000) - if ($first == 1) - set $first = 0 - else - printf "," - end - printf "MULTICAST" - end - printf ">" -end - -define showifaddrs - set $ifp = (struct ifnet *)$arg0 - set $myifaddr = (struct ifaddr *)$ifp->if_addrhead->tqh_first - set $myi = 0 - while ($myifaddr != 0) - printf "\t%d. %p ", $myi, $myifaddr - showsockaddr $myifaddr->ifa_addr - printf " [%d]\n", $myifaddr->ifa_refcnt - set $myifaddr = $myifaddr->ifa_link->tqe_next - set $myi = $myi + 1 - end -end - -document showifaddrs -Syntax: showifaddrs -| show the (struct ifnet).if_addrhead list of addresses for the given ifp -end - -define ifconfig - set $ifconfig_all = 0 - if ($argc == 1) - set $ifconfig_all = 1 - end - set $ifp = (struct ifnet *)(ifnet_head->tqh_first) - while ($ifp != 0) - printf "%s%d: flags=%hx", $ifp->if_name, $ifp->if_unit, (u_short)$ifp->if_flags - showifflags $ifp->if_flags - printf " index %d", $ifp->if_index - printf " mtu %d\n", $ifp->if_data.ifi_mtu - printf "\t(struct ifnet *)" - showptr $ifp - printf "\n" - if ($ifconfig_all == 1) - showifaddrs $ifp - end - set $ifp = $ifp->if_link->tqe_next - end -end -document ifconfig -Syntax: (gdb) ifconfig -| display ifconfig-like output, and print the (struct ifnet *) pointers for further inspection -end - -set $DLIF_INUSE = 0x1 -set $DLIF_REUSE = 0x2 - -define showifnets - set $all = 0 - if ($argc == 1) - set $all = 1 - end - set $dlifp = (struct dlil_ifnet *)(dlil_ifnet_head->tqh_first) - while ($dlifp != 0) - set $ifp = (struct ifnet *)$dlifp - if ($dlifp->dl_if_flags & $DLIF_REUSE) - printf "*" - end - if ($dlifp->dl_if_flags & $DLIF_INUSE) - printf "%s%d: ", $ifp->if_name, $ifp->if_unit - else - printf "[%s%d]: ", $ifp->if_name, $ifp->if_unit - end - printf "flags=%hx", (u_short)$ifp->if_flags - showifflags $ifp->if_flags - printf " index %d", $ifp->if_index - printf " mtu %d\n", $ifp->if_data.ifi_mtu - printf "\t(struct ifnet *)" - showptr $ifp - printf "\n" - if ($all == 1) - showifaddrs $ifp - end - set $dlifp = $dlifp->dl_if_link->tqe_next - end -end - -document showifnets -Syntax: (gdb) showifnets -| Display ifconfig-like output for all attached and detached interfaces -end - -define _show_unix_domain_socket - set $so = (struct socket *)$arg0 - set $pcb = (struct unpcb *)$so->so_pcb - if $pcb == 0 - printf "unpcb: (null) " - else - printf "unpcb: %p ", $pcb - printf "unp_vnode: %p ", $pcb->unp_vnode - printf "unp_conn: %p ", $pcb->unp_conn - printf "unp_addr: " - showsockaddr_un $pcb->unp_addr - end -end - -define _show_in_port - set $str = (unsigned char *)$arg0 - set $port = *(unsigned short *)$arg0 - - if (((($port & 0xff00) >> 8) == $str[0])) && ((($port & 0x00ff) == $str[1])) - #printf "big endian " - printf ":%d ", $port - else - #printf "little endian " - printf ":%d ", (($port & 0xff00) >> 8) | (($port & 0x00ff) << 8) - end -end - -define _show_in_addr_4in6 - set $ia = (unsigned char *)$arg0 - if $ia - printf "%3u.%03u.%03u.%03u", $ia[0], $ia[1], $ia[2], $ia[3] - end -end - -define _show_in6_addr - set $ia = (unsigned char *)$arg0 - if $ia - printf "%2x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x", \ - $ia[0], $ia[1], $ia[2], $ia[3], $ia[4], $ia[5], $ia[6], $ia[7], \ - $ia[8], $ia[9], $ia[10], $ia[11], $ia[12], $ia[13], $ia[14], $ia[15] - end -end - -define _showtcpstate - set $tp = (struct tcpcb *)$arg0 - if $tp - if $tp->t_state == 0 - printf "CLOSED " - end - if $tp->t_state == 1 - printf "LISTEN " - end - if $tp->t_state == 2 - printf "SYN_SENT " - end - if $tp->t_state == 3 - printf "SYN_RCVD " - end - if $tp->t_state == 4 - printf "ESTABLISHED " - end - if $tp->t_state == 5 - printf "CLOSE_WAIT " - end - if $tp->t_state == 6 - printf "FIN_WAIT_1 " - end - if $tp->t_state == 7 - printf "CLOSING " - end - if $tp->t_state == 8 - printf "LAST_ACK " - end - if $tp->t_state == 9 - printf "FIN_WAIT_2 " - end - if $tp->t_state == 10 - printf "TIME_WAIT " - end - end -end - -define _showsockprotocol - set $so = (struct socket *)$arg0 - set $inpcb = (struct inpcb *)$so->so_pcb - - if $so->so_proto->pr_protocol == 6 - printf "TCP " - _showtcpstate $inpcb->inp_ppcb - end - if $so->so_proto->pr_protocol == 17 - printf "UDP " - end - if $so->so_proto->pr_protocol == 1 - printf "ICMP " - end - if $so->so_proto->pr_protocol == 254 - printf "DIVERT " - end - if $so->so_proto->pr_protocol == 255 - printf "RAW " - end -end - -define _show_ipv4_socket - set $so = (struct socket *)$arg0 - set $inpcb = (struct inpcb *)$so->so_pcb - if $inpcb == 0 - printf "inpcb: (null) " - else - printf "inpcb: %p ", $inpcb - - _showsockprotocol $so - - _show_in_addr_4in6 &$inpcb->inp_dependladdr.inp46_local - _show_in_port &$inpcb->inp_lport - printf "-> " - _show_in_addr_4in6 &$inpcb->inp_dependfaddr.inp46_foreign - _show_in_port &$inpcb->inp_fport - end -end - -define _show_ipv6_socket - set $so = (struct socket *)$arg0 - set $pcb = (struct inpcb *)$so->so_pcb - if $pcb == 0 - printf "inpcb: (null) " - else - printf "inpcb: %p ", $pcb - - _showsockprotocol $so - - _show_in6_addr &$pcb->inp_dependladdr.inp6_local - _show_in_port &$pcb->inp_lport - printf "-> " - _show_in6_addr &$pcb->inp_dependfaddr.inp6_foreign - _show_in_port &$pcb->inp_fport - end -end - - -define showsocket - set $so = (struct socket *)$arg0 - if $so == 0 - printf "so: (null) " - else - printf "so: %p ", $so - if $so && $so->so_proto && $so->so_proto->pr_domain - set $domain = (struct domain *) $so->so_proto->pr_domain - - printf "%s ", $domain->dom_name - if $domain->dom_family == 1 - _show_unix_domain_socket $so - end - if $domain->dom_family == 2 - _show_ipv4_socket $so - end - if $domain->dom_family == 30 - _show_ipv6_socket $so - end - end - end - printf "\n" -end -document showsocket -Syntax: (gdb) showsocket -| Routine to print out a socket -end - -define showprocsockets - set $pp = (struct proc *)$arg0 - set $fdp = (struct filedesc *)$pp->p_fd - - set $count = 0 - set $fpp = (struct fileproc **)($fdp->fd_ofiles) - set $fpo = (char)($fdp->fd_ofileflags[0]) - while $count < $fdp->fd_nfiles - if *$fpp - set $fg =(struct fileglob *)((**$fpp)->f_fglob) - if $fg && (($fg)->fg_type == 2) - if $fdp->fd_ofileflags[$count] & 4 - printf "U: " - else - printf " " - end - printf "fd = %d ", $count - if $fg->fg_data - showsocket $fg->fg_data - else - printf "\n" - end - end - end - set $fpp = $fpp + 1 - set $count = $count + 1 - end -end -document showprocsockets -Syntax: (gdb) showprocsockets -| Routine to print out all the open fds -| which are sockets in a process -end - -define showallprocsockets - set $basep = (struct proc *)allproc->lh_first - set $pp = $basep - while $pp - printf "============================================ \n" - showproc $pp - showprocsockets $pp - set $pp = $pp->p_list.le_next - end -end -document showallprocsockets -Syntax: (gdb) showallprocsockets -| Routine to print out all the open fds -| which are sockets -end - -define _print_ntohs - set $port = (unsigned short)$arg0 - set $port = (unsigned short)((($arg0 & 0xff00) >> 8) & 0xff) - set $port |= (unsigned short)(($arg0 & 0xff) << 8) - printf "%5d", $port -end - -set $INPCB_STATE_INUSE=0x1 -set $INPCB_STATE_CACHED=0x2 -set $INPCB_STATE_DEAD=0x3 - -set $INP_RECVOPTS=0x01 -set $INP_RECVRETOPTS=0x02 -set $INP_RECVDSTADDR=0x04 -set $INP_HDRINCL=0x08 -set $INP_HIGHPORT=0x10 -set $INP_LOWPORT=0x20 -set $INP_ANONPORT=0x40 -set $INP_RECVIF=0x80 -set $INP_MTUDISC=0x100 -set $INP_STRIPHDR=0x200 -set $INP_RECV_ANYIF=0x400 -set $INP_INADDR_ANY=0x800 -set $INP_RECVTTL=0x1000 -set $INP_UDP_NOCKSUM=0x2000 -set $IN6P_IPV6_V6ONLY=0x008000 -set $IN6P_PKTINFO=0x010000 -set $IN6P_HOPLIMIT=0x020000 -set $IN6P_HOPOPTS=0x040000 -set $IN6P_DSTOPTS=0x080000 -set $IN6P_RTHDR=0x100000 -set $IN6P_RTHDRDSTOPTS=0x200000 -set $IN6P_AUTOFLOWLABEL=0x800000 -set $IN6P_BINDV6ONLY=0x10000000 - -set $INP_IPV4=0x1 -set $INP_IPV6=0x2 - -set $IPPROTO_TCP=6 -set $IPPROTO_UDP=17 - -define _dump_inpcb - set $pcb = (struct inpcb *)$arg0 - if $kgm_lp64 - printf "%18p", $pcb - else - printf "%10p ", $pcb - end - if $arg1 == $IPPROTO_TCP - printf "tcp" - else - if $arg1 == $IPPROTO_UDP - printf "udp" - else - printf "%2d.", $arg1 - end - end - if ($pcb->inp_vflag & $INP_IPV4) - printf "4 " - end - if ($pcb->inp_vflag & $INP_IPV6) - printf "6 " - end - - if ($pcb->inp_vflag & $INP_IPV4) - printf " " - _show_in_addr &$pcb->inp_dependladdr.inp46_local.ia46_addr4 - else - _show_in6_addr &$pcb->inp_dependladdr.inp6_local - end - printf " " - _print_ntohs $pcb->inp_lport - printf " " - if ($pcb->inp_vflag & $INP_IPV4) - printf " " - _show_in_addr &($pcb->inp_dependfaddr.inp46_foreign.ia46_addr4) - else - _show_in6_addr &($pcb->inp_dependfaddr.inp6_foreign) - end - printf " " - _print_ntohs $pcb->inp_fport - printf " " - - if $arg1 == $IPPROTO_TCP - _showtcpstate $pcb->inp_ppcb - end - -# printf "phd " -# set $phd = $pcb->inp_phd -# while $phd != 0 -# printf " " -# _print_ntohs $phd->phd_port -# set $phd = $phd->phd_hash.le_next -# end -# printf ", " - if ($pcb->inp_flags & $INP_RECVOPTS) - printf "recvopts " - end - if ($pcb->inp_flags & $INP_RECVRETOPTS) - printf "recvretopts " - end - if ($pcb->inp_flags & $INP_RECVDSTADDR) - printf "recvdstaddr " - end - if ($pcb->inp_flags & $INP_HDRINCL) - printf "hdrincl " - end - if ($pcb->inp_flags & $INP_HIGHPORT) - printf "highport " - end - if ($pcb->inp_flags & $INP_LOWPORT) - printf "lowport " - end - if ($pcb->inp_flags & $INP_ANONPORT) - printf "anonport " - end - if ($pcb->inp_flags & $INP_RECVIF) - printf "recvif " - end - if ($pcb->inp_flags & $INP_MTUDISC) - printf "mtudisc " - end - if ($pcb->inp_flags & $INP_STRIPHDR) - printf "striphdr " - end - if ($pcb->inp_flags & $INP_RECV_ANYIF) - printf "recv_anyif " - end - if ($pcb->inp_flags & $INP_INADDR_ANY) - printf "inaddr_any " - end - if ($pcb->inp_flags & $INP_RECVTTL) - printf "recvttl " - end - if ($pcb->inp_flags & $INP_UDP_NOCKSUM) - printf "nocksum " - end - if ($pcb->inp_flags & $IN6P_IPV6_V6ONLY) - printf "v6only " - end - if ($pcb->inp_flags & $IN6P_PKTINFO) - printf "pktinfo " - end - if ($pcb->inp_flags & $IN6P_HOPLIMIT) - printf "hoplimit " - end - if ($pcb->inp_flags & $IN6P_HOPOPTS) - printf "hopopts " - end - if ($pcb->inp_flags & $IN6P_DSTOPTS) - printf "dstopts " - end - if ($pcb->inp_flags & $IN6P_RTHDR) - printf "rthdr " - end - if ($pcb->inp_flags & $IN6P_RTHDRDSTOPTS) - printf "rthdrdstopts " - end - if ($pcb->inp_flags & $IN6P_AUTOFLOWLABEL) - printf "autoflowlabel " - end - if ($pcb->inp_flags & $IN6P_BINDV6ONLY) - printf "bindv6only " - end - set $so = (struct socket *)$pcb->inp_socket - if $so != 0 - printf "[so=%p s=%ld r=%ld usecnt=%ld] ", $so, $so->so_snd.sb_cc, \ - $so->so_rcv.sb_cc, $so->so_usecount - end - if ($pcb->inp_state == 0 || $pcb->inp_state == $INPCB_STATE_INUSE) - printf "inuse, " - else - if ($pcb->inp_state == $INPCB_STATE_CACHED) - printf "cached, " - else - if ($pcb->inp_state == $INPCB_STATE_DEAD) - printf "dead, " - else - printf "unknown (%d), ", $pcb->inp_state - end - end - end -end - -define _dump_inpcbport - set $ppcb = (struct inpcbport *)$arg0 - printf "%p: lport ", $ppcb - _print_ntohs $ppcb->phd_port -end - -set $UDBHASHSIZE=16 - -define _dump_pcbinfo - set $snd_cc = 0 - set $snd_buf = (unsigned int)0 - set $rcv_cc = 0 - set $rcv_buf = (unsigned int)0 - set $pcbseen = 0 - set $pcbi = (struct inpcbinfo *)$arg0 - printf "lastport %d lastlow %d lasthi %d\n", \ - $pcbi->lastport, $pcbi->lastlow, $pcbi->lasthi - printf "active pcb count is %d\n", $pcbi->ipi_count - set $hashsize = $pcbi->hashmask + 1 - printf "hash size is %d\n", $hashsize - printf "hash base %p has the following inpcb(s):\n", $pcbi->hashbase - if $kgm_lp64 - printf "pcb prot source address port destination address port\n" - printf "------------------ ---- --------------------------------------- ----- --------------------------------------- -----\n" - else - printf "pcb prot source address port destination address port\n" - printf "---------- ---- --------------------------------------- ----- --------------------------------------- -----\n" - end - set $i = 0 - set $hashbase = $pcbi->hashbase - set $head = *(uintptr_t *)$hashbase - while $i < $hashsize - if $head != 0 - set $pcb0 = (struct inpcb *)$head - while $pcb0 != 0 - set $pcbseen += 1 - _dump_inpcb $pcb0 $arg1 - set $so = (struct socket *)$pcb->inp_socket - if $so != 0 - set $snd_cc += $so->so_snd.sb_cc - set $mp = $so->so_snd.sb_mb - while $mp - set $snd_buf += 256 - if ($mp->m_hdr.mh_flags & 0x01) - set $snd_buf += $mp->M_dat.MH.MH_dat.MH_ext.ext_size - end - set $mp = $mp->m_hdr.mh_next - end - set $rcv_cc += $so->so_rcv.sb_cc - set $mp = $so->so_rcv.sb_mb - while $mp - set $rcv_buf += 256 - if ($mp->m_hdr.mh_flags & 0x01) - set $rcv_buf += $mp->M_dat.MH.MH_dat.MH_ext.ext_size - end - set $mp = $mp->m_hdr.mh_next - end - end - set $pcb0 = $pcb0->inp_hash.le_next - printf "\n" - end - end - set $i += 1 - set $hashbase += 1 - set $head = *(uintptr_t *)$hashbase - end - printf "total seen %ld snd_cc %ld rcv_cc %ld\n", $pcbseen, $snd_cc, $rcv_cc - printf "total snd_buf %u rcv_buf %u \n", (unsigned int)$snd_buf, (unsigned int)$rcv_buf - printf "port hash base is %p\n", $pcbi->porthashbase - set $i = 0 - set $hashbase = $pcbi->porthashbase - set $head = *(uintptr_t *)$hashbase - while $i < $hashsize - if $head != 0 - set $pcb0 = (struct inpcbport *)$head - while $pcb0 != 0 - printf "\t" - _dump_inpcbport $pcb0 - printf "\n" - set $pcb0 = $pcb0->phd_hash.le_next - end - end - set $i += 1 - set $hashbase += 1 - set $head = *(uintptr_t *)$hashbase - end -end - -set $N_TIME_WAIT_SLOTS=128 - -define show_tcp_timewaitslots - set $slot = -1 - set $all = 0 - if $argc == 1 - if (int)$arg0 == -1 - set $all = 1 - else - set $slot = (int)$arg0 - end - end - printf "time wait slot size %d cur_tw_slot %ld\n", $N_TIME_WAIT_SLOTS, cur_tw_slot - set $i = 0 - while $i < $N_TIME_WAIT_SLOTS - set $perslot = 0 - set $head = (uintptr_t *)time_wait_slots[$i] - if $i == $slot || $slot == -1 - if $head != 0 - set $pcb0 = (struct inpcb *)$head - while $pcb0 != 0 - set $perslot += 1 - set $pcb0 = $pcb0->inp_list.le_next - end - end - printf " slot %ld count %ld\n", $i, $perslot - end - if $all || $i == $slot - if $head != 0 - set $pcb0 = (struct inpcb *)$head - while $pcb0 != 0 - printf "\t" - _dump_inpcb $pcb0 $IPPROTO_TCP - printf "\n" - set $pcb0 = $pcb0->inp_list.le_next - end - end - end - set $i += 1 - end -end -document show_tcp_timewaitslots -Syntax: (gdb) show_tcp_timewaitslots -| Print the list of TCP protocol control block in the TIMEWAIT state -| Pass -1 to see the list of PCB for each slot -| Pass a slot number to see information for that slot with the list of PCB -end - -define show_tcp_pcbinfo - _dump_pcbinfo &tcbinfo $IPPROTO_TCP -end -document show_tcp_pcbinfo -Syntax: (gdb) show_tcp_pcbinfo -| Print the list of TCP protocol control block information -end - - -define show_udp_pcbinfo - _dump_pcbinfo &udbinfo $IPPROTO_UDP -end -document show_udp_pcbinfo -Syntax: (gdb) show_udp_pcbinfo -| Print the list of UDP protocol control block information -end - -define showbpfdtab - set $myi = 0 - while ($myi < bpf_dtab_size) - if (bpf_dtab[$myi] != 0) - printf "Address 0x%x, bd_next 0x%x\n", bpf_dtab[$myi], bpf_dtab[$myi]->bd_next - print *bpf_dtab[$myi] - end - set $myi = $myi + 1 - end -end - -define printvnodepathint_recur - if $arg0 != 0 - if ($arg0->v_flag & 0x000001) && ($arg0->v_mount != 0) - if $arg0->v_mount->mnt_vnodecovered != 0 - printvnodepathint_recur $arg0->v_mount->mnt_vnodecovered $arg0->v_mount->mnt_vnodecovered->v_name - end - else - printvnodepathint_recur $arg0->v_parent $arg0->v_parent->v_name - printf "/%s", $arg1 - end - end -end - -define showvnodepath - set $vp = (struct vnode *)$arg0 - if $vp != 0 - if ($vp->v_flag & 0x000001) && ($vp->v_mount != 0) && ($vp->v_mount->mnt_flag & 0x00004000) - printf "/" - else - printvnodepathint_recur $vp $vp->v_name - end - end - printf "\n" -end - -document showvnodepath -Syntax: (gdb) showvnodepath -| Prints the path for a vnode -end - -define showallvols - printf "volume " - showptrhdrpad - printf " mnt_data " - showptrhdrpad - printf " mnt_devvp " - showptrhdrpad - printf " typename mountpoint\n" - set $kgm_vol = (mount_t) mountlist.tqh_first - while $kgm_vol - showptr $kgm_vol - printf " " - showptr $kgm_vol->mnt_data - printf " " - showptr $kgm_vol->mnt_devvp - printf " " - if ($kgm_vol->mnt_vtable->vfc_name[0] == 'h') && \ - ($kgm_vol->mnt_vtable->vfc_name[1] == 'f') && \ - ($kgm_vol->mnt_vtable->vfc_name[2] == 's') && \ - ($kgm_vol->mnt_vtable->vfc_name[3] == '\0') - set $kgm_hfsmount = \ - (struct hfsmount *) $kgm_vol->mnt_data - if $kgm_hfsmount->hfs_freezing_proc != 0 - printf "FROZEN hfs " - else - printf "hfs " - end - else - printf "%-10s ", $kgm_vol->mnt_vtable->vfc_name - end - printf "%s\n", $kgm_vol->mnt_vfsstat.f_mntonname - - set $kgm_vol = (mount_t) $kgm_vol->mnt_list.tqe_next - end -end - -document showallvols -Syntax: (gdb) showallvols -| Display a summary of mounted volumes -end - -define showvnodeheader - printf "vnode " - showptrhdrpad - printf " usecount iocount v_data " - showptrhdrpad - printf " vtype parent " - showptrhdrpad - printf " name\n" -end - -define showvnodeint - set $kgm_vnode = (vnode_t) $arg0 - showptr $kgm_vnode - printf " %8d ", $kgm_vnode->v_usecount - printf "%7d ", $kgm_vnode->v_iocount -# print information about clean/dirty blocks? - showptr $kgm_vnode->v_data - printf " " - # print the vtype, using the enum tag - set $kgm_vtype = $kgm_vnode->v_type - if $kgm_vtype == VNON - printf "VNON " - end - if $kgm_vtype == VREG - printf "VREG " - end - if $kgm_vtype == VDIR - printf "VDIR " - end - if $kgm_vtype == VBLK - printf "VBLK " - end - if $kgm_vtype == VCHR - printf "VCHR " - end - if $kgm_vtype == VLNK - printf "VLNK " - end - if $kgm_vtype == VSOCK - printf "VSOCK " - end - if $kgm_vtype == VFIFO - printf "VFIFO " - end - if $kgm_vtype == VBAD - printf "VBAD " - end - if ($kgm_vtype < VNON) || ($kgm_vtype > VBAD) - printf "%5d ", $kgm_vtype - end - - showptr $kgm_vnode->v_parent - printf " " - if ($kgm_vnode->v_name != 0) - printf "%s\n", $kgm_vnode->v_name - else - # If this is HFS vnode, get name from the cnode - if ($kgm_vnode->v_tag == 16) - set $kgm_cnode = (struct cnode *)$kgm_vnode->v_data - printf "hfs: %s\n", (char *)$kgm_cnode->c_desc->cd_nameptr - else - printf "\n" - end - end -end - -define showvnode - showvnodeheader - showvnodeint $arg0 -end - -document showvnode -Syntax: (gdb) showvnode -| Display info about one vnode -end - -define showvolvnodes - showvnodeheader - set $kgm_vol = (mount_t) $arg0 - set $kgm_vnode = (vnode_t) $kgm_vol.mnt_vnodelist.tqh_first - while $kgm_vnode - showvnodeint $kgm_vnode - set $kgm_vnode = (vnode_t) $kgm_vnode->v_mntvnodes.tqe_next - end -end - -document showvolvnodes -Syntax: (gdb) showvolvnodes -| Display info about all vnodes of a given mount_t -end - -define showvolbusyvnodes - showvnodeheader - set $kgm_vol = (mount_t) $arg0 - set $kgm_vnode = (vnode_t) $kgm_vol.mnt_vnodelist.tqh_first - while $kgm_vnode - if $kgm_vnode->v_iocount != 0 - showvnodeint $kgm_vnode - end - set $kgm_vnode = (vnode_t) $kgm_vnode->v_mntvnodes.tqe_next - end -end - -document showvolbusyvnodes -Syntax: (gdb) showvolbusyvnodes -| Display info about busy (iocount!=0) vnodes of a given mount_t -end - -define showallbusyvnodes - showvnodeheader - set $kgm_vol = (mount_t) mountlist.tqh_first - while $kgm_vol - set $kgm_vnode = (vnode_t) $kgm_vol.mnt_vnodelist.tqh_first - while $kgm_vnode - if $kgm_vnode->v_iocount != 0 - showvnodeint $kgm_vnode - end - set $kgm_vnode = (vnode_t) $kgm_vnode->v_mntvnodes.tqe_next - end - set $kgm_vol = (mount_t) $kgm_vol->mnt_list.tqe_next - end -end - -document showallbusyvnodes -Syntax: (gdb) showallbusyvnodes -| Display info about all busy (iocount!=0) vnodes -end - -define showallvnodes - showvnodeheader - set $kgm_vol = (mount_t) mountlist.tqh_first - while $kgm_vol - set $kgm_vnode = (vnode_t) $kgm_vol.mnt_vnodelist.tqh_first - while $kgm_vnode - showvnodeint $kgm_vnode - set $kgm_vnode = (vnode_t) $kgm_vnode->v_mntvnodes.tqe_next - end - set $kgm_vol = (mount_t) $kgm_vol->mnt_list.tqe_next - end -end - -document showallvnodes -Syntax: (gdb) showallvnodes -| Display info about all vnodes -end - -define _showvnodelockheader - printf "* type W held by lock type start end\n" - printf "- ----- - ------------- --------- ------------------ ------------------\n" -end - -define _showvnodelock - set $kgm_svl_lock = ((struct lockf *)$arg0) - - # decode flags - set $kgm_svl_flags = $kgm_svl_lock->lf_flags - set $kgm_svl_type = $kgm_svl_lock->lf_type - if ($kgm_svl_flags & 0x20) - printf "flock" - end - if ($kgm_svl_flags & 0x40) - printf "posix" - end - if ($kgm_svl_flags & 0x80) - printf "prov " - end - if ($kgm_svl_flags & 0x10) - printf " W " - else - printf " . " - end - - # POSIX file vs. advisory range locks - if ($kgm_svl_flags & 0x40) - set $kgm_svl_proc = (proc_t)$kgm_svl_lock->lf_id - printf "PID %8d ", $kgm_svl_proc->p_pid - else - printf "ID 0x%08x ", $kgm_svl_lock->lf_id - end - - # lock type - if ($kgm_svl_type == 1) - printf "shared " - else - if ($kgm_svl_type == 3) - printf "exclusive " - else - if ($kgm_svl_type == 2) - printf "unlock " - else - printf "unknown " - end - end - end - - # start and stop - printf "0x%016x..", $kgm_svl_lock->lf_start - printf "0x%016x ", $kgm_svl_lock->lf_end - printf "\n" -end -# Body of showvnodelocks, not including header -define _showvnodelocks - set $kgm_svl_vnode = ((vnode_t)$arg0) - set $kgm_svl_lockiter = $kgm_svl_vnode->v_lockf - while ($kgm_svl_lockiter != 0) - # locks that are held - printf "H " - _showvnodelock $kgm_svl_lockiter - - # and any locks blocked by them - set $kgm_svl_blocker = $kgm_svl_lockiter->lf_blkhd.tqh_first - while ($kgm_svl_blocker != 0) - printf "> " - _showvnodelock $kgm_svl_blocker - set $kgm_svl_blocker = $kgm_svl_blocker->lf_block.tqe_next - end - - # and on to the next one... - set $kgm_svl_lockiter = $kgm_svl_lockiter->lf_next - end -end - - -define showvnodelocks - if ($argc == 1) - _showvnodelockheader - _showvnodelocks $arg0 - else - printf "| Usage:\n|\n" - help showvnodelocks - end -end - -document showvnodelocks -Syntax: (gdb) showvnodelocks -| Given a vnodet pointer, display the list of advisory record locks for the -| referenced pvnodes -end - -define showbootargs - printf "%s\n", (char*)((boot_args*)PE_state.bootArgs).CommandLine -end - -document showbootargs -Syntax: showbootargs -| Display boot arguments passed to the target kernel -end - -define showbootermemorymap - if ($kgm_mtype == $kgm_mtype_i386) - set $kgm_voffset = 0 - else - if ($kgm_mtype == $kgm_mtype_x86_64) - set $kgm_voffset = 0xFFFFFF8000000000ULL - else - echo showbootermemorymap not supported on this architecture - end - end - - set $kgm_boot_args = kernelBootArgs - set $kgm_msize = kernelBootArgs->MemoryMapDescriptorSize - set $kgm_mcount = kernelBootArgs->MemoryMapSize / $kgm_msize - set $kgm_i = 0 - - printf "Type Physical Start Number of Pages Virtual Start Attributes\n" - while $kgm_i < $kgm_mcount - set $kgm_mptr = (EfiMemoryRange *)((unsigned long)kernelBootArgs->MemoryMap + $kgm_voffset + $kgm_i * $kgm_msize) -# p/x *$kgm_mptr - if $kgm_mptr->Type == 0 - printf "Reserved " - end - if $kgm_mptr->Type == 1 - printf "LoaderCode" - end - if $kgm_mptr->Type == 2 - printf "LoaderData" - end - if $kgm_mptr->Type == 3 - printf "BS_code " - end - if $kgm_mptr->Type == 4 - printf "BS_data " - end - if $kgm_mptr->Type == 5 - printf "RT_code " - end - if $kgm_mptr->Type == 6 - printf "RT_data " - end - if $kgm_mptr->Type == 7 - printf "Convention" - end - if $kgm_mptr->Type == 8 - printf "Unusable " - end - if $kgm_mptr->Type == 9 - printf "ACPI_recl " - end - if $kgm_mptr->Type == 10 - printf "ACPI_NVS " - end - if $kgm_mptr->Type == 11 - printf "MemMapIO " - end - if $kgm_mptr->Type == 12 - printf "MemPortIO " - end - if $kgm_mptr->Type == 13 - printf "PAL_code " - end - if $kgm_mptr->Type > 13 - printf "UNKNOWN " - end - - printf " %016llx %016llx", $kgm_mptr->PhysicalStart, $kgm_mptr->NumberOfPages - if $kgm_mptr->VirtualStart != 0 - printf " %016llx", $kgm_mptr->VirtualStart - else - printf " " - end - printf " %016llx\n", $kgm_mptr->Attribute - set $kgm_i = $kgm_i + 1 - end -end - -document showbootermemorymap -Syntax: (gdb) showbootermemorymap -| Prints out the phys memory map from kernelBootArgs -end - - -define showstacksaftertask - set $kgm_head_taskp = &tasks - set $kgm_taskp = (struct task *)$arg0 - set $kgm_taskp = (struct task *)$kgm_taskp->tasks.next - while $kgm_taskp != $kgm_head_taskp - showtaskheader - showtaskint $kgm_taskp - set $kgm_head_actp = &($kgm_taskp->threads) - set $kgm_actp = (struct thread *)($kgm_taskp->threads.next) - while $kgm_actp != $kgm_head_actp - showactheader - if ($decode_wait_events > 0) - showactint $kgm_actp 1 - else - showactint $kgm_actp 2 - end - set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) - end - printf "\n" - set $kgm_taskp = (struct task *)($kgm_taskp->tasks.next) - end -end -document showstacksaftertask -Syntax: (gdb) showstacksaftertask -| Routine to print out all stacks (as in showallstacks) starting after a given task -| Useful if that gdb refuses to print a certain task's stack. -end - -define showpmworkqueueint - set $kgm_pm_workqueue = (IOPMWorkQueue *)$arg0 - set $kgm_pm_wq = &($kgm_pm_workqueue->fWorkQueue) - set $kgm_pm_wqe = (IOServicePM *)$kgm_pm_wq->next - while ((queue_entry_t) $kgm_pm_wqe != (queue_entry_t) $kgm_pm_wq) - printf "service " - showptrhdrpad - printf " ps ms wr name\n" - showptr $kgm_pm_wqe->Owner - printf " " - printf "%02d ", $kgm_pm_wqe->CurrentPowerState - printf "%02d ", $kgm_pm_wqe->MachineState - printf "%02d ", $kgm_pm_wqe->WaitReason - printf "%s\n", $kgm_pm_wqe->Name - printf "request " - showptrhdrpad - printf " type next " - showptrhdrpad - printf " root " - showptrhdrpad - printf " work_wait free_wait\n" - set $kgm_pm_rq = &($kgm_pm_wqe->RequestHead) - set $kgm_pm_rqe = (IOPMRequest *)$kgm_pm_rq->next - while ((queue_entry_t) $kgm_pm_rqe != (queue_entry_t) $kgm_pm_rq) - showptr $kgm_pm_rqe - printf " 0x%02x ", $kgm_pm_rqe->fType - showptr $kgm_pm_rqe->fRequestNext - printf " " - showptr $kgm_pm_rqe->fRequestRoot - printf " 0x%08x 0x%08x\n", $kgm_pm_rqe->fWorkWaitCount, $kgm_pm_rqe->fFreeWaitCount - showptrhdrpad - printf " args " - showptr $kgm_pm_rqe->fArg0 - printf " " - showptr $kgm_pm_rqe->fArg1 - printf " " - showptr $kgm_pm_rqe->fArg2 - printf "\n" - set $kgm_pm_rqe = (IOPMRequest *)$kgm_pm_rqe->fCommandChain.next - end - printf "\n" - set $kgm_pm_wqe = (IOServicePM *)$kgm_pm_wqe->WorkChain.next - end -end - -define showpmworkqueue - printf "IOPMWorkQueue " - showptr gIOPMWorkQueue - printf " length " - printf "%u", gIOPMWorkQueue->fQueueLength - printf "\n" - if (gIOPMWorkQueue->fQueueLength > 0) - showpmworkqueueint gIOPMWorkQueue - end -end - -document showpmworkqueue -Syntax: (gdb) showpmworkqueue -| Display the IOPMWorkQueue object -end - -define showioservicepm - set $kgm_iopmpriv = (IOServicePM *)$arg0 - printf "{ " - printf "MachineState = %d (", $kgm_iopmpriv->MachineState - if ( $kgm_iopmpriv->MachineState == 0 ) - printf "kIOPM_Finished" - else - if ( $kgm_iopmpriv->MachineState == 1 ) - printf "kIOPM_OurChangeTellClientsPowerDown" - else - if ( $kgm_iopmpriv->MachineState == 2 ) - printf "kIOPM_OurChangeTellPriorityClientsPowerDown" - else - if ( $kgm_iopmpriv->MachineState == 3 ) - printf "kIOPM_OurChangeNotifyInterestedDriversWillChange" - else - if ( $kgm_iopmpriv->MachineState == 4 ) - printf "kIOPM_OurChangeSetPowerState" - else - if ( $kgm_iopmpriv->MachineState == 5 ) - printf "kIOPM_OurChangeWaitForPowerSettle" - else - if ( $kgm_iopmpriv->MachineState == 6 ) - printf "kIOPM_OurChangeNotifyInterestedDriversDidChange" - else - if ( $kgm_iopmpriv->MachineState == 7 ) - printf "kIOPM_OurChangeTellCapabilityDidChange" - else - if ( $kgm_iopmpriv->MachineState == 8 ) - printf "kIOPM_OurChangeFinish" - else - if ( $kgm_iopmpriv->MachineState == 9 ) - printf "Unused_MachineState_9" - else - if ( $kgm_iopmpriv->MachineState == 10 ) - printf "kIOPM_ParentChangeTellPriorityClientsPowerDown" - else - if ( $kgm_iopmpriv->MachineState == 11 ) - printf "kIOPM_ParentChangeNotifyInterestedDriversWillChange" - else - if ( $kgm_iopmpriv->MachineState == 12 ) - printf "kIOPM_ParentChangeSetPowerState" - else - if ( $kgm_iopmpriv->MachineState == 13 ) - printf "kIOPM_ParentChangeWaitForPowerSettle" - else - if ( $kgm_iopmpriv->MachineState == 14) - printf "kIOPM_ParentChangeNotifyInterestedDriversDidChange" - else - if ( $kgm_iopmpriv->MachineState == 15) - printf "kIOPM_ParentChangeTellCapabilityDidChange" - else - if ( $kgm_iopmpriv->MachineState == 16) - printf "kIOPM_ParentChangeAcknowledgePowerChange" - else - if ( $kgm_iopmpriv->MachineState == 17) - printf "kIOPM_NotifyChildrenStart" - else - if ( $kgm_iopmpriv->MachineState == 18) - printf "kIOPM_NotifyChildrenOrdered" - else - if ( $kgm_iopmpriv->MachineState == 19) - printf "kIOPM_NotifyChildrenDelayed" - else - if ( $kgm_iopmpriv->MachineState == 20) - printf "kIOPM_SyncTellClientsPowerDown" - else - if ( $kgm_iopmpriv->MachineState == 21) - printf "kIOPM_SyncTellPriorityClientsPowerDown" - else - if ( $kgm_iopmpriv->MachineState == 22) - printf "kIOPM_SyncNotifyWillChange" - else - if ( $kgm_iopmpriv->MachineState == 23) - printf "kIOPM_SyncNotifyDidChange" - else - if ( $kgm_iopmpriv->MachineState == 24) - printf "kIOPM_SyncTellCapabilityDidChange" - else - if ( $kgm_iopmpriv->MachineState == 25) - printf "kIOPM_SyncFinish" - else - if ( $kgm_iopmpriv->MachineState == 26) - printf "kIOPM_TellCapabilityChangeDone" - else - if ( $kgm_iopmpriv->MachineState == 27) - printf "kIOPM_DriverThreadCallDone" - else - printf "Unknown_MachineState" - end - end - end - end - end - end - end - end - end - end - end - end - end - end - end - end - end - end - end - end - end - end - end - end - end - end - end - end - printf "), " - - if ( $kgm_iopmpriv->MachineState != 20 ) - printf "DriverTimer = %d, ",(unsigned int)$kgm_iopmpriv->DriverTimer - printf "SettleTime = %d, ",(unsigned int)$kgm_iopmpriv->SettleTimeUS - printf "HeadNoteFlags = %08x, ",(unsigned int)$kgm_iopmpriv->HeadNoteChangeFlags - printf "HeadNotePendingAcks = %x, ",(unsigned int)$kgm_iopmpriv->HeadNotePendingAcks - end - - if ( $kgm_iopmpriv->DeviceOverrideEnabled != 0 ) - printf"DeviceOverrides, " - end - - printf "DeviceDesire = %d, ",(unsigned int)$kgm_iopmpriv->DeviceDesire - printf "DesiredPowerState = %d, ",(unsigned int)$kgm_iopmpriv->DesiredPowerState - printf "PreviousRequest = %d }\n",(unsigned int)$kgm_iopmpriv->PreviousRequestPowerFlags -end - -document showioservicepm -Syntax: (gdb) showioservicepm -| Routine to dump the IOServicePM object -end - -define showregistryentryrecursepmstate - set $kgm_re = (IOService *)$arg1 - set $kgm$arg0_stack = (unsigned long long) $arg2 - - if ($arg3) - set $kgm$arg0_stack = $kgm$arg0_stack | (1ULL << $kgm_reg_depth) - else - set $kgm$arg0_stack = $kgm$arg0_stack & ~(1ULL << $kgm_reg_depth) - end - - dictget $kgm_re->fRegistryTable $kgm_childkey - set $kgm$arg0_child_array = (OSArray *) $kgm_result - - if ($kgm$arg0_child_array) - set $kgm$arg0_child_count = $kgm$arg0_child_array->count - else - set $kgm$arg0_child_count = 0 - end - - if ($kgm$arg0_child_count) - set $kgm$arg0_stack = $kgm$arg0_stack | (2ULL << $kgm_reg_depth) - else - set $kgm$arg0_stack = $kgm$arg0_stack & ~(2ULL << $kgm_reg_depth) - end - - indent $kgm_reg_depth $kgm$arg0_stack - printf "+-o " - - dictget $kgm_re->fRegistryTable $kgm_namekey - if ($kgm_result == 0) - dictget $kgm_re->fRegistryTable gIONameKey - end - if ($kgm_result == 0) - dictget $kgm_re->fPropertyTable gIOClassKey - end - - if ($kgm_result != 0) - printf "%s <%p>", ((OSString *)$kgm_result)->string, $kgm_re - else - if (((IOService*)$kgm_re)->pwrMgt && ((IOService*)$kgm_re)->pwrMgt->Name) - printf "%s <", ((IOService*)$kgm_re)->pwrMgt->Name - showptr $kgm_re - printf ">" - else - printf "?? <" - showptr $kgm_re - printf ">" - end - end - - if (((IOService*)$kgm_re)->pwrMgt ) - printf " Current Power State: %ld ", ((IOService*)$kgm_re)->pwrMgt->CurrentPowerState - #printf " Mach State %ld", ((IOService*)$kgm_re)->pwrMgt->MachineState - showioservicepm ((IOService*)$kgm_re)->pwrMgt - end - printf "\n" - - - # recurse - if ($kgm$arg0_child_count != 0) - - set $kgm_reg_depth = $kgm_reg_depth + 1 - set $kgm$arg0_child_idx = 0 - - while ($kgm$arg0_child_idx < $kgm$arg0_child_count) - set $kgm_re = $kgm$arg0_child_array->array[$kgm$arg0_child_idx++] - set $kgm_more_sib = ($kgm$arg0_child_idx < $kgm$arg0_child_count) - if $kgm_reg_depth >= $kgm_reg_depth_max + 1 - loop_break - end - showregistryentryrecursepmstate _$arg0 $kgm_re $kgm$arg0_stack $kgm_more_sib - end - - set $kgm_reg_depth = $kgm_reg_depth - 1 - end -end - -define showregistryentryintpmstate - if !$kgm_reg_plane - set $kgm_reg_plane = (IORegistryPlane *) gIOServicePlane - end - - if !$kgm_reg_plane - printf "Please load kgmacros after KDP attaching to the target.\n" - else - set $kgm_namekey = (OSSymbol *) $kgm_reg_plane->nameKey - set $kgm_childkey = (OSSymbol *) $kgm_reg_plane->keys[1] - showregistryentryrecursepmstate _ $arg0 0 0 - end -end - -define showregistrypmstate -# setregistryplane gIOPowerPlane - set $kgm_reg_depth = 0 - set $kgm_show_props = 1 - showregistryentryintpmstate gRegistryRoot -end - -document showregistrypmstate -Syntax: (gdb) showregistrypmstate -| Routine to dump the PM state of each IOPower registry entry -end - -define showstacksafterthread - set $kgm_head_taskp = &tasks - set $kgm_actp = (struct thread *)$arg0 - set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) - set $kgm_taskp = (struct task *)$kgm_actp->task - while $kgm_taskp != $kgm_head_taskp - showtaskheader - showtaskint $kgm_taskp - set $kgm_head_actp = &($kgm_taskp->threads) - if $kgm_actp == 0 - set $kgm_actp = (struct thread *)($kgm_taskp->threads.next) - end - while $kgm_actp != $kgm_head_actp - showactheader - if ($decode_wait_events > 0) - showactint $kgm_actp 1 - else - showactint $kgm_actp 2 - end - set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) - end - printf "\n" - set $kgm_taskp = (struct task *)($kgm_taskp->tasks.next) - set $kgm_actp = 0 - end -end - -document showstacksafterthread -Syntax: (gdb) showstacksafterthread -| Routine to print out all stacks (as in showallstacks) starting after a given thread -| Useful if that gdb refuses to print a certain task's stack. -end - -define kdp-reenter - set kdp_reentry_deadline = ((unsigned) $arg0)*1000 - continue -end - -document kdp-reenter -Syntax: (gdb) kdp-reenter -| Schedules reentry into the debugger after seconds, and resumes -| the target system. -end - -define _if_present - if (!$arg0) - printf " not" - end - printf " present" -end - -define showMCAstate - if (($kgm_mtype & $kgm_mtype_x86_mask) != $kgm_mtype_x86_any) - printf "Not available for current architecture.\n" - else - printf "MCA" - _if_present mca_MCA_present - printf ", control MSR" - _if_present mca_control_MSR_present - printf ", threshold status" - _if_present mca_threshold_status_present - printf "\n%d error banks, ", mca_error_bank_count - printf "family code 0x%x, ", mca_family - printf "machine-check dump state: %d\n", mca_dump_state - set $kgm_cpu = 0 - while cpu_data_ptr[$kgm_cpu] != 0 - set $kgm_mcp = cpu_data_ptr[$kgm_cpu]->cpu_mca_state - if $kgm_mcp - printf "CPU %d:", $kgm_cpu - printf " mca_mcg_ctl: 0x%016llx", $kgm_mcp->mca_mcg_ctl - printf " mca_mcg_status: 0x%016llx\n", $kgm_mcp->mca_mcg_status.u64 - printf "bank " - printf "mca_mci_ctl " - printf "mca_mci_status " - printf "mca_mci_addr " - printf "mca_mci_misc\n" - set $kgm_bank = 0 - while $kgm_bank < mca_error_bank_count - set $kgm_bp = &$kgm_mcp->mca_error_bank[$kgm_bank] - printf " %2d:", $kgm_bank - printf " 0x%016llx", $kgm_bp->mca_mci_ctl - printf " 0x%016llx", $kgm_bp->mca_mci_status.u64 - printf " 0x%016llx", $kgm_bp->mca_mci_addr - printf " 0x%016llx\n", $kgm_bp->mca_mci_misc - set $kgm_bank = $kgm_bank + 1 - end - end - set $kgm_cpu = $kgm_cpu + 1 - end - end -end - -document showMCAstate -Syntax: showMCAstate -| Print machine-check register state after MC exception. -end - -define _pt_step - # - # Step to lower-level page table and print attributes - # $kgm_pt_paddr: current page table entry physical address - # $kgm_pt_index: current page table entry index (0..511) - # returns - # $kgm_pt_paddr: next level page table entry physical address - # or null if invalid - # $kgm_pt_valid: 1 if $kgm_pt_paddr is valid, 0 if the walk - # should be aborted - # $kgm_pt_large: 1 if kgm_pt_paddr is a page frame address - # of a large page and not another page table entry - # For $kgm_pt_verbose = 0: print nothing - # 1: print basic information - # 2: print basic information and hex table dump - # - set $kgm_entryp = $kgm_pt_paddr + 8*$kgm_pt_index - readphysint $kgm_entryp 64 $kgm_lcpu_self - set $entry = $kgm_readphysint_result - if $kgm_pt_verbose >= 3 - set $kgm_pte_loop = 0 - while $kgm_pte_loop < 512 - set $kgm_pt_paddr_tmp = $kgm_pt_paddr + $kgm_pte_loop*8 - readphys64 $kgm_pt_paddr_tmp - set $kgm_pte_loop = $kgm_pte_loop + 1 - end - end - set $kgm_paddr_mask = ~((0xfffULL<<52) | 0xfffULL) - set $kgm_paddr_largemask = ~((0xfffULL<<52) | 0x1fffffULL) - if $kgm_pt_verbose < 2 - if $entry & (0x1 << 0) - set $kgm_pt_valid = 1 - if $entry & (0x1 << 7) - set $kgm_pt_large = 1 - set $kgm_pt_paddr = $entry & $kgm_paddr_largemask - else - set $kgm_pt_large = 0 - set $kgm_pt_paddr = $entry & $kgm_paddr_mask - end - else - set $kgm_pt_valid = 0 - set $kgm_pt_large = 0 - set $kgm_pt_paddr = 0 - end - else - printf "0x%016llx:\n\t0x%016llx\n\t", $kgm_entryp, $entry - if $entry & (0x1 << 0) - printf "valid" - set $kgm_pt_paddr = $entry & $kgm_paddr_mask - set $kgm_pt_valid = 1 - else - printf "invalid" - set $kgm_pt_paddr = 0 - set $kgm_pt_valid = 0 - # stop decoding other bits - set $entry = 0 - end - if $entry & (0x1 << 1) - printf " writeable" - else - printf " read-only" - end - if $entry & (0x1 << 2) - printf " user" - else - printf " supervisor" - end - if $entry & (0x1 << 3) - printf " PWT" - end - if $entry & (0x1 << 4) - printf " PCD" - end - if $entry & (0x1 << 5) - printf " accessed" - end - if $entry & (0x1 << 6) - printf " dirty" - end - if $entry & (0x1 << 7) - printf " large" - set $kgm_pt_large = 1 - else - set $kgm_pt_large = 0 - end - if $entry & (0x1 << 8) - printf " global" - end - if $entry & (0x3 << 9) - printf " avail:0x%x", ($entry >> 9) & 0x3 - end - if $entry & (0x1ULL << 63) - printf " noexec" - end - printf "\n" - end -end - -define _pml4_walk - set $kgm_pt_paddr = $arg0 - set $kgm_vaddr = $arg1 - set $kgm_pt_valid = $kgm_pt_paddr != 0 - set $kgm_pt_large = 0 - set $kgm_pframe_offset = 0 - if $kgm_pt_valid && cpu_64bit - # Look up bits 47:39 of the linear address in PML4T - set $kgm_pt_index = ($kgm_vaddr >> 39) & 0x1ffULL - set $kgm_pframe_offset = $kgm_vaddr & 0x7fffffffffULL - if $kgm_pt_verbose >= 2 - printf "pml4 (index %d):\n", $kgm_pt_index - end - _pt_step - end - if $kgm_pt_valid - # Look up bits 38:30 of the linear address in PDPT - set $kgm_pt_index = ($kgm_vaddr >> 30) & 0x1ffULL - set $kgm_pframe_offset = $kgm_vaddr & 0x3fffffffULL - if $kgm_pt_verbose >= 2 - printf "pdpt (index %d):\n", $kgm_pt_index - end - _pt_step - end - if $kgm_pt_valid && !$kgm_pt_large - # Look up bits 29:21 of the linear address in PDT - set $kgm_pt_index = ($kgm_vaddr >> 21) & 0x1ffULL - set $kgm_pframe_offset = $kgm_vaddr & 0x1fffffULL - if $kgm_pt_verbose >= 2 - printf "pdt (index %d):\n", $kgm_pt_index - end - _pt_step - end - if $kgm_pt_valid && !$kgm_pt_large - # Look up bits 20:21 of the linear address in PT - set $kgm_pt_index = ($kgm_vaddr >> 12) & 0x1ffULL - set $kgm_pframe_offset = $kgm_vaddr & 0xfffULL - if $kgm_pt_verbose >= 2 - printf "pt (index %d):\n", $kgm_pt_index - end - _pt_step - end - - if $kgm_pt_valid - set $kgm_paddr = $kgm_pt_paddr + $kgm_pframe_offset - set $kgm_paddr_isvalid = 1 - else - set $kgm_paddr = 0 - set $kgm_paddr_isvalid = 0 - end - - if $kgm_pt_verbose >= 1 - if $kgm_paddr_isvalid - readphysint $kgm_paddr 32 $kgm_lcpu_self - set $kgm_value = $kgm_readphysint_result - printf "phys 0x%016llx: 0x%08x\n", $kgm_paddr, $kgm_value - else - printf "(no translation)\n" - end - end -end - -define _pmap_walk_x86 - set $kgm_pmap = (pmap_t) $arg0 - _pml4_walk $kgm_pmap->pm_cr3 $arg1 -end - -define _pmap_walk_arm_level1_section - set $kgm_tte_p = $arg0 - set $kgm_tte = *$kgm_tte_p - set $kgm_vaddr = $arg1 - - # Supersection or just section? - if (($kgm_tte & 0x00040000) == 0x00040000) - set $kgm_paddr = ($kgm_tte & 0xFF000000) | ($kgm_vaddr & 0x00FFFFFF) - set $kgm_paddr_isvalid = 1 - else - set $kgm_paddr = ($kgm_tte & 0xFFF00000) | ($kgm_vaddr & 0x000FFFFF) - set $kgm_paddr_isvalid = 1 - end - - if $kgm_pt_verbose >= 2 - printf "0x%08x\n\t0x%08x\n\t", (unsigned long)$kgm_tte_p, $kgm_tte - - # bit [1:0] evaluated in _pmap_walk_arm - - # B bit 2 - set $kgm_b_bit = (($kgm_tte & 0x00000004) >> 2) - - # C bit 3 - set $kgm_c_bit = (($kgm_tte & 0x00000008) >> 3) - - # XN bit 4 - if ($kgm_tte & 0x00000010) - printf "no-execute" - else - printf "execute" - end - - # Domain bit [8:5] if not supersection - if (($kgm_tte & 0x00040000) == 0x00000000) - printf " domain(%d)", (($kgm_tte & 0x000001e0) >> 5) - end - - # IMP bit 9 - printf " imp(%d)", (($kgm_tte & 0x00000200) >> 9) - - # AP bit 15 and [11:10], merged to a single 3-bit value - set $kgm_access = (($kgm_tte & 0x00000c00) >> 10) | (($kgm_tte & 0x00008000) >> 13) - if ($kgm_access == 0x0) - printf " noaccess" - end - if ($kgm_access == 0x1) - printf " supervisor(readwrite) user(noaccess)" - end - if ($kgm_access == 0x2) - printf " supervisor(readwrite) user(readonly)" - end - if ($kgm_access == 0x3) - printf " supervisor(readwrite) user(readwrite)" - end - if ($kgm_access == 0x4) - printf " noaccess(reserved)" - end - if ($kgm_access == 0x5) - printf " supervisor(readonly) user(noaccess)" - end - if ($kgm_access == 0x6) - printf " supervisor(readonly) user(readonly)" - end - if ($kgm_access == 0x7) - printf " supervisor(readonly) user(readonly)" - end - - # TEX bit [14:12] - set $kgm_tex_bits = (($kgm_tte & 0x00007000) >> 12) - - # Print TEX, C, B all together - printf " TEX:C:B(%d%d%d:%d:%d)", ($kgm_tex_bits & 0x4 ? 1 : 0), ($kgm_tex_bits & 0x2 ? 1 : 0), ($kgm_tex_bits & 0x1 ? 1 : 0), $kgm_c_bit, $kgm_b_bit - - # S bit 16 - if ($kgm_tte & 0x00010000) - printf " shareable" - else - printf " not-shareable" - end - - # nG bit 17 - if ($kgm_tte & 0x00020000) - printf " not-global" - else - printf " global" - end - - # Supersection bit 18 - if ($kgm_tte & 0x00040000) - printf " supersection" - else - printf " section" - end - - # NS bit 19 - if ($kgm_tte & 0x00080000) - printf " no-secure" - else - printf " secure" - end - - printf "\n" - end -end - -define _pmap_walk_arm_level2 - set $kgm_tte_p = $arg0 - set $kgm_tte = *$kgm_tte_p - set $kgm_vaddr = $arg1 - - set $kgm_pte_pbase = (($kgm_tte & 0xFFFFFC00) - gPhysBase + gVirtBase) - set $kgm_pte_index = ($kgm_vaddr >> 12) & 0x000000FF - set $kgm_pte_p = &((pt_entry_t *)$kgm_pte_pbase)[$kgm_pte_index] - set $kgm_pte = *$kgm_pte_p - - # Print first level symbolically - if $kgm_pt_verbose >= 2 - printf "0x%08x\n\t0x%08x\n\t", (unsigned long)$kgm_tte_p, $kgm_tte - - # bit [1:0] evaluated in _pmap_walk_arm - - # NS bit 3 - if ($kgm_tte & 0x00000008) - printf "no-secure" - else - printf "secure" - end - - # Domain bit [8:5] - printf " domain(%d)", (($kgm_tte & 0x000001e0) >> 5) - - # IMP bit 9 - printf " imp(%d)", (($kgm_tte & 0x00000200) >> 9) - - printf "\n" - end - - if $kgm_pt_verbose >= 2 - printf "second-level table (index %d):\n", $kgm_pte_index - end - if $kgm_pt_verbose >= 3 - set $kgm_pte_loop = 0 - while $kgm_pte_loop < 256 - set $kgm_pte_p_tmp = &((pt_entry_t *)$kgm_pte_pbase)[$kgm_pte_loop] - printf "0x%08x:\t0x%08x\n", (unsigned long)$kgm_pte_p_tmp, *$kgm_pte_p_tmp - set $kgm_pte_loop = $kgm_pte_loop + 1 - end - end - - if ($kgm_pte & 0x00000003) - set $kgm_pve_p = (pv_entry_t *)($kgm_pte_pbase + 0x100*sizeof(pt_entry_t) + $kgm_pte_index*sizeof(pv_entry_t)) - if ($kgm_pve_p->shadow != 0) - set $kgm_spte = $kgm_pve_p->shadow ^ ($kgm_vaddr & ~0xFFF) - set $kgm_paddr = ($kgm_spte & 0xFFFFF000) | ($kgm_vaddr & 0xFFF) - set $kgm_paddr_isvalid = 1 - else - set $kgm_paddr = (*$kgm_pte_p & 0xFFFFF000) | ($kgm_vaddr & 0xFFF) - set $kgm_paddr_isvalid = 1 - end - else - set $kgm_paddr = 0 - set $kgm_paddr_isvalid = 0 - end - - if $kgm_pt_verbose >= 2 - printf "0x%08x\n\t0x%08x\n\t", (unsigned long)$kgm_pte_p, $kgm_pte - if (($kgm_pte & 0x00000003) == 0x00000000) - printf "invalid" - else - if (($kgm_pte & 0x00000003) == 0x00000001) - printf "large" - - # XN bit 15 - if ($kgm_pte & 0x00008000) == 0x00008000 - printf " no-execute" - else - printf " execute" - end - else - printf "small" - - # XN bit 0 - if ($kgm_pte & 0x00000001) == 0x00000001 - printf " no-execute" - else - printf " execute" - end - end - - # B bit 2 - set $kgm_b_bit = (($kgm_pte & 0x00000004) >> 2) - - # C bit 3 - set $kgm_c_bit = (($kgm_pte & 0x00000008) >> 3) - - # AP bit 9 and [5:4], merged to a single 3-bit value - set $kgm_access = (($kgm_pte & 0x00000030) >> 4) | (($kgm_pte & 0x00000200) >> 7) - if ($kgm_access == 0x0) - printf " noaccess" - end - if ($kgm_access == 0x1) - printf " supervisor(readwrite) user(noaccess)" - end - if ($kgm_access == 0x2) - printf " supervisor(readwrite) user(readonly)" - end - if ($kgm_access == 0x3) - printf " supervisor(readwrite) user(readwrite)" - end - if ($kgm_access == 0x4) - printf " noaccess(reserved)" - end - if ($kgm_access == 0x5) - printf " supervisor(readonly) user(noaccess)" - end - if ($kgm_access == 0x6) - printf " supervisor(readonly) user(readonly)" - end - if ($kgm_access == 0x7) - printf " supervisor(readonly) user(readonly)" - end - - # TEX bit [14:12] for large, [8:6] for small - if (($kgm_pte & 0x00000003) == 0x00000001) - set $kgm_tex_bits = (($kgm_pte & 0x00007000) >> 12) - else - set $kgm_tex_bits = (($kgm_pte & 0x000001c0) >> 6) - end - - # Print TEX, C, B all together - printf " TEX:C:B(%d%d%d:%d:%d)", ($kgm_tex_bits & 0x4 ? 1 : 0), ($kgm_tex_bits & 0x2 ? 1 : 0), ($kgm_tex_bits & 0x1 ? 1 : 0), $kgm_c_bit, $kgm_b_bit - - # S bit 10 - if ($kgm_pte & 0x00000400) - printf " shareable" - else - printf " not-shareable" - end - - # nG bit 11 - if ($kgm_pte & 0x00000800) - printf " not-global" - else - printf " global" - end - - end - printf "\n" - end -end - -# See ARM ARM Section B3.3 -define _pmap_walk_arm - set $kgm_pmap = (pmap_t) $arg0 - set $kgm_vaddr = $arg1 - set $kgm_paddr = 0 - set $kgm_paddr_isvalid = 0 - - # Shift by TTESHIFT (20) to get tte index - set $kgm_tte_index = (($kgm_vaddr - $kgm_pmap->min) >> 20) - set $kgm_tte_p = &$kgm_pmap->tte[$kgm_tte_index] - set $kgm_tte = *$kgm_tte_p - if $kgm_pt_verbose >= 2 - printf "first-level table (index %d):\n", $kgm_tte_index - end - if $kgm_pt_verbose >= 3 - set $kgm_tte_loop = 0 - while $kgm_tte_loop < 4096 - set $kgm_tte_p_tmp = &$kgm_pmap->tte[$kgm_tte_loop] - printf "0x%08x:\t0x%08x\n", (unsigned long)$kgm_tte_p_tmp, *$kgm_tte_p_tmp - set $kgm_tte_loop = $kgm_tte_loop + 1 - end - end - - if (($kgm_tte & 0x00000003) == 0x00000001) - _pmap_walk_arm_level2 $kgm_tte_p $kgm_vaddr - else - if (($kgm_tte & 0x00000003) == 0x00000002) - _pmap_walk_arm_level1_section $kgm_tte_p $kgm_vaddr - else - set $kgm_paddr = 0 - set $kgm_paddr_isvalid = 0 - if $kgm_pt_verbose >= 2 - printf "Invalid First-Level Translation Table Entry: 0x%08x\n", $kgm_tte - end - end - end - - if $kgm_pt_verbose >= 1 - if $kgm_paddr_isvalid - readphysint $kgm_paddr 32 $kgm_lcpu_self - set $kgm_value = $kgm_readphysint_result - printf "phys 0x%016llx: 0x%08x\n", $kgm_paddr, $kgm_value - else - printf "(no translation)\n" - end - end -end - -define pmap_walk - if $argc != 2 - printf "pmap_walk \n" - else - if !$kgm_pt_verbose - set $kgm_pt_verbose = 2 - else - if $kgm_pt_verbose > 3 - set $kgm_pt_verbose = 2 - end - end - if (($kgm_mtype & $kgm_mtype_x86_mask) == $kgm_mtype_x86_any) - _pmap_walk_x86 $arg0 $arg1 - else - if ($kgm_mtype == $kgm_mtype_arm) - _pmap_walk_arm $arg0 $arg1 - else - printf "Not available for current architecture.\n" - end - end - end -end - -document pmap_walk -Syntax: (gdb) pmap_walk -| Perform a page-table walk in for . -| Set: -| $kgm_pt_verbose=0 for no output, $kgm_paddr will be set -| if $kgm_paddr_isvalid is 1 -| $kgm_pt_verbose=1 for final physical address -| $kgm_pt_verbose=2 for dump of page table entry. -| $kgm_pt_verbose=3 for full hex dump of page tables. -end - -define pmap_vtop - if $argc != 2 - printf "pmap_vtop \n" - else - set $kgm_pt_verbose = 1 - if (($kgm_mtype & $kgm_mtype_x86_mask) == $kgm_mtype_x86_any) - _pmap_walk_x86 $arg0 $arg1 - else - if ($kgm_mtype == $kgm_mtype_arm) - _pmap_walk_arm $arg0 $arg1 - else - printf "Not available for current architecture.\n" - end - end - end -end - -document pmap_vtop -Syntax: (gdb) pmap_vtop -| For page-tables in translate to physical address. -end - -define zstack - set $index = $arg0 - - if (log_records == 0) - set $count = 0 - printf "Zone logging not enabled. Add 'zlog=' to boot-args.\n" - else - if ($argc == 2) - set $count = $arg1 - else - set $count = 1 - end - end - - while ($count) - printf "\n--------------- " - - if (zrecords[$index].z_opcode == 1) - printf "ALLOC " - else - printf "FREE " - end - showptr zrecords[$index].z_element - printf " : index %d : ztime %d -------------\n", $index, zrecords[$index].z_time - - set $frame = 0 - - while ($frame < 15) - set $frame_pc = zrecords[$index].z_pc[$frame] - - if ($frame_pc == 0) - loop_break - end - - x/i $frame_pc - set $frame = $frame + 1 - end - - set $index = $index + 1 - set $count = $count - 1 - end -end - -document zstack -Syntax: (gdb) zstack [] -| Zone leak debugging: print the stack trace of log element at . -| If a is supplied, it prints log elements starting at . -| -| The suggested usage is to look at indexes below zcurrent and look for common stack traces. -| The stack trace that occurs the most is probably the cause of the leak. Find the pc of the -| function calling into zalloc and use the countpcs kgmacro to find out how often that pc occurs in the log. -| The pc occuring in a high percentage of records is most likely the source of the leak. -| -| The findoldest kgmacro is also useful for leak debugging since it identifies the oldest record -| in the log, which may indicate the leaker. -end - -define findoldest - set $index = 0 - set $count = log_records - set $cur_min = 2000000000 - set $cur_index = 0 - - if (log_records == 0) - printf "Zone logging not enabled. Add 'zlog=' to boot-args.\n" - else - - while ($count) - if (zrecords[$index].z_element && zrecords[$index].z_time < $cur_min) - set $cur_index = $index - set $cur_min = zrecords[$index].z_time - end - - set $count = $count - 1 - set $index = $index + 1 - end - - printf "oldest record is at log index %d:\n", $cur_index - zstack $cur_index - end -end - -document findoldest -Syntax: (gdb) findoldest -| Zone leak debugging: find and print the oldest record in the log. Note that this command -| can take several minutes to run since it uses linear search. -| -| Once it prints a stack trace, find the pc of the caller above all the zalloc, kalloc and -| IOKit layers. Then use the countpcs kgmacro to see how often this caller has allocated -| memory. A caller with a high percentage of records in the log is probably the leaker. -end - -define countpcs - set $target_pc = $arg0 - set $index = 0 - set $count = log_records - set $found = 0 - - if (log_records == 0) - printf "Zone logging not enabled. Add 'zlog=' to boot-args.\n" - else - - while ($count) - set $frame = 0 - - if (zrecords[$index].z_element != 0) - while ($frame < 15) - if (zrecords[$index].z_pc[$frame] == $target_pc) - set $found = $found + 1 - set $frame = 15 - end - - set $frame = $frame + 1 - end - end - - set $index = $index + 1 - set $count = $count - 1 - end - - printf "occurred %d times in log (%d%c of records)\n", $found, ($found * 100) / zrecorded, '%' - end -end - -document countpcs -Syntax: (gdb) countpcs -| Zone leak debugging: search the log and print a count of all log entries that contain the given -| in the stack trace. This is useful for verifying a suspected as being the source of -| the leak. If a high percentage of the log entries contain the given , then it's most -| likely the source of the leak. Note that this command can take several minutes to run. -end - -define findelem - set $fe_index = zcurrent - set $fe_count = log_records - set $fe_elem = $arg0 - set $fe_prev_op = -1 - - if (log_records == 0) - printf "Zone logging not enabled. Add 'zlog=' to boot-args.\n" - end - - while ($fe_count) - if (zrecords[$fe_index].z_element == $fe_elem) - zstack $fe_index - - if (zrecords[$fe_index].z_opcode == $fe_prev_op) - printf "*************** DOUBLE OP! *********************\n" - end - - set $fe_prev_op = zrecords[$fe_index].z_opcode - end - - set $fe_count = $fe_count - 1 - set $fe_index = $fe_index + 1 - - if ($fe_index >= log_records) - set $fe_index = 0 - end - end -end - -document findelem -Syntax: (gdb) findelem -| Zone corruption debugging: search the log and print out the stack traces for all log entries that -| refer to the given zone element. When the kernel panics due to a corrupted zone element, get the -| element address and use this macro. This will show you the stack traces of all logged zalloc and -| zfree operations which tells you who touched the element in the recent past. This also makes -| double-frees readily apparent. -end - - -# This implements a shadowing scheme in kgmacros. If the -# current user data can be accessed by simply changing kdp_pmap, -# that is used. Otherwise, we copy data into a temporary buffer -# in the kernel's address space and use that instead. Don't rely on -# kdp_pmap between invocations of map/unmap. Since the shadow -# codepath uses a manual KDP packet, request no more than 128 bytes. -# Uses $kgm_lp64 for kernel address space size, and -# $kgm_readphys_use_kdp/$kgm_readphys_force_physmap to override -# how the user pages are accessed ($kgm_readphys_force_physmap -# implies walking the user task's pagetables to get a physical -# address and then shadowing data from there using the -# physical mapping of memory). -define _map_user_data_from_task - set $kgm_map_user_taskp = (task_t)$arg0 - set $kgm_map_user_map = $kgm_map_user_taskp->map - set $kgm_map_user_pmap = $kgm_map_user_map->pmap - set $kgm_map_user_task_64 = ( $kgm_map_user_taskp->taskFeatures[0] & 0x80000000) - set $kgm_map_user_window = 0 - set $kgm_map_switch_map = 0 - - if ($kgm_readphys_force_kdp != 0) - set $kgm_readphys_use_kdp = 1 - else - if ($kgm_readphys_force_physmap) - set $kgm_readphys_use_kdp = 0 - else - set $kgm_readphys_use_kdp = ( kdp->is_conn > 0 ) - end - end - - if ($kgm_readphys_use_kdp) - - if $kgm_lp64 - set $kgm_map_switch_map = 1 - else - if !$kgm_map_user_task_64 - set $kgm_map_switch_map = 1 - end - end - - if ($kgm_map_switch_map) - # switch the map safely - set $kgm_map_user_window = $arg1 - set kdp_pmap = $kgm_map_user_pmap - else - # requires shadowing/copying - - # set up the manual KDP packet - set manual_pkt.input = 0 - set manual_pkt.len = sizeof(kdp_readmem64_req_t) - set $kgm_pkt = (kdp_readmem64_req_t *)&manual_pkt.data - set $kgm_pkt->hdr.request = KDP_READMEM64 - set $kgm_pkt->hdr.len = sizeof(kdp_readmem64_req_t) - set $kgm_pkt->hdr.is_reply = 0 - set $kgm_pkt->hdr.seq = 0 - set $kgm_pkt->hdr.key = 0 - set $kgm_pkt->address = (uint64_t)$arg1 - set $kgm_pkt->nbytes = (uint32_t)$arg2 - - set kdp_pmap = $kgm_map_user_pmap - set manual_pkt.input = 1 - # dummy to make sure manual packet is executed - set $kgm_dummy = &_mh_execute_header - # Go back to kernel map so that we can access buffer directly - set kdp_pmap = 0 - - set $kgm_pkt = (kdp_readmem64_reply_t *)&manual_pkt.data - if ($kgm_pkt->error == 0) - set $kgm_map_user_window = $kgm_pkt->data - else - set $kgm_map_user_window = 0 - end - end - - else - # without the benefit of a KDP stub on the target, try to - # find the user task's physical mapping and memcpy the data. - # If it straddles a page boundary, copy in two passes - set $kgm_vaddr_range1_start = (unsigned long long)$arg1 - set $kgm_vaddr_range1_count = (unsigned long long)$arg2 - if (($kgm_vaddr_range1_start + $kgm_vaddr_range1_count) & 0xFFF) < $kgm_vaddr_range1_count - set $kgm_vaddr_range2_start = ($kgm_vaddr_range1_start + $kgm_vaddr_range1_count) & ~((unsigned long long)0xFFF) - set $kgm_vaddr_range2_count = $kgm_vaddr_range1_start + $kgm_vaddr_range1_count - $kgm_vaddr_range2_start - set $kgm_vaddr_range1_count = $kgm_vaddr_range2_start - $kgm_vaddr_range1_start - else - set $kgm_vaddr_range2_start = 0 - set $kgm_vaddr_range2_count = 0 - end - set $kgm_paddr_range1_in_kva = 0 - set $kgm_paddr_range2_in_kva = 0 - - if ($kgm_mtype == $kgm_mtype_x86_64) - set $kgm_pt_verbose = 0 - _pmap_walk_x86 $kgm_map_user_pmap $kgm_vaddr_range1_start - if $kgm_paddr_isvalid - set $kgm_paddr_range1_in_kva = $kgm_paddr + physmap_base - end - if $kgm_vaddr_range2_start - _pmap_walk_x86 $kgm_map_user_pmap $kgm_vaddr_range2_start - if $kgm_paddr_isvalid - set $kgm_paddr_range2_in_kva = $kgm_paddr + physmap_base - end - end - else - if ($kgm_mtype == $kgm_mtype_arm) - set $kgm_pt_verbose = 0 - _pmap_walk_arm $kgm_map_user_pmap $kgm_vaddr_range1_start - if $kgm_paddr_isvalid - set $kgm_paddr_range1_in_kva = $kgm_paddr - gPhysBase + gVirtBase - end - if $kgm_vaddr_range2_start - _pmap_walk_arm $kgm_map_user_pmap $kgm_vaddr_range2_start - if $kgm_paddr_isvalid - set $kgm_paddr_range2_in_kva = $kgm_paddr - gPhysBase + gVirtBase - end - end - else - printf "Not available for current architecture.\n" - set $kgm_paddr_isvalid = 0 - end - end - if $kgm_paddr_range1_in_kva - set $kgm_pkt = (kdp_readmem64_reply_t *)&manual_pkt.data - memcpy $kgm_pkt->data $kgm_paddr_range1_in_kva $kgm_vaddr_range1_count - if $kgm_paddr_range2_in_kva - memcpy &$kgm_pkt->data[$kgm_vaddr_range1_count] $kgm_paddr_range2_in_kva $kgm_vaddr_range2_count - end - set $kgm_map_user_window = $kgm_pkt->data - else - set $kgm_map_user_window = 0 - end - end -end - -define _unmap_user_data_from_task - set kdp_pmap = 0 -end - -# uses $kgm_taskp. Maps 32 bytes at a time and prints it -define _print_path_for_image - set $kgm_print_path_address = (unsigned long long)$arg0 - set $kgm_path_str_notdone = 1 - - if ($kgm_print_path_address == 0) - set $kgm_path_str_notdone = 0 - end - - while $kgm_path_str_notdone - _map_user_data_from_task $kgm_taskp $kgm_print_path_address 32 - - set $kgm_print_path_ptr = (char *)$kgm_map_user_window - set $kgm_path_i = 0 - while ($kgm_path_i < 32 && $kgm_print_path_ptr[$kgm_path_i] != '\0') - set $kgm_path_i = $kgm_path_i + 1 - end - printf "%.32s", $kgm_print_path_ptr - - _unmap_user_data_from_task $kgm_taskp - - # break out if we terminated on NUL - if $kgm_path_i < 32 - set $kgm_path_str_notdone = 0 - else - set $kgm_print_path_address = $kgm_print_path_address + 32 - end - end -end - -# uses $kgm_taskp and $kgm_task_64. May modify $kgm_dyld_load_path -define _print_image_info - set $kgm_mh_image_address = (unsigned long long)$arg0 - set $kgm_mh_path_address = (unsigned long long)$arg1 - - # 32 bytes enough for mach_header/mach_header_64 - _map_user_data_from_task $kgm_taskp $kgm_mh_image_address 32 - - set $kgm_mh_ptr = (unsigned int*)$kgm_map_user_window - set $kgm_mh_magic = $kgm_mh_ptr[0] - set $kgm_mh_cputype = $kgm_mh_ptr[1] - set $kgm_mh_cpusubtype = $kgm_mh_ptr[2] - set $kgm_mh_filetype = $kgm_mh_ptr[3] - set $kgm_mh_ncmds = $kgm_mh_ptr[4] - set $kgm_mh_sizeofcmds = $kgm_mh_ptr[5] - set $kgm_mh_flags = $kgm_mh_ptr[6] - - _unmap_user_data_from_task $kgm_taskp - - if $kgm_mh_magic == 0xfeedfacf - set $kgm_mh_64 = 1 - set $kgm_lc_address = $kgm_mh_image_address + 32 - else - set $kgm_mh_64 = 0 - set $kgm_lc_address = $kgm_mh_image_address + 28 - end - - set $kgm_lc_idx = 0 - set $kgm_uuid_data = 0 - while $kgm_lc_idx < $kgm_mh_ncmds - - # 24 bytes is size of uuid_command - _map_user_data_from_task $kgm_taskp $kgm_lc_address 24 - - set $kgm_lc_ptr = (unsigned int *)$kgm_map_user_window - set $kgm_lc_cmd = $kgm_lc_ptr[0] - set $kgm_lc_cmd_size = $kgm_lc_ptr[1] - set $kgm_lc_data = (unsigned char *)$kgm_lc_ptr + 8 - - if $kgm_lc_cmd == 0x1b - set $kgm_uuid_data = $kgm_lc_data - if $kgm_mh_64 - printf "0x%016llx ", $kgm_mh_image_address - else - printf "0x%08x ", $kgm_mh_image_address - end - - set $kgm_printed_type = 0 - if $kgm_mh_filetype == 0x2 - printf "MH_EXECUTE " - set $kgm_printed_type = 1 - end - if $kgm_mh_filetype == 0x6 - printf "MH_DYLIB " - set $kgm_printed_type = 1 - end - if $kgm_mh_filetype == 0x7 - printf "MH_DYLINKER " - set $kgm_printed_type = 1 - end - if $kgm_mh_filetype == 0x8 - printf "MH_BUNDLE " - set $kgm_printed_type = 1 - end - if !$kgm_printed_type - printf "UNKNOWN " - end - printf "%02.2X%02.2X%02.2X%02.2X-", $kgm_uuid_data[0], $kgm_uuid_data[1], $kgm_uuid_data[2], $kgm_uuid_data[3] - printf "%02.2X%02.2X-", $kgm_uuid_data[4], $kgm_uuid_data[5] - printf "%02.2X%02.2X-", $kgm_uuid_data[6], $kgm_uuid_data[7] - printf "%02.2X%02.2X-", $kgm_uuid_data[8], $kgm_uuid_data[9] - printf "%02.2X%02.2X%02.2X%02.2X%02.2X%02.2X", $kgm_uuid_data[10], $kgm_uuid_data[11], $kgm_uuid_data[12], $kgm_uuid_data[13], $kgm_uuid_data[14], $kgm_uuid_data[15] - - _unmap_user_data_from_task $kgm_taskp - - printf " " - _print_path_for_image $kgm_mh_path_address - printf "\n" - - loop_break - else - if $kgm_lc_cmd == 0xe - set $kgm_load_dylinker_data = $kgm_lc_data - set $kgm_dyld_load_path = $kgm_lc_address + *((unsigned int *)$kgm_load_dylinker_data) - end - _unmap_user_data_from_task $kgm_taskp - end - - set $kgm_lc_address = $kgm_lc_address + $kgm_lc_cmd_size - set $kgm_lc_idx = $kgm_lc_idx + 1 - end - - if (!$kgm_uuid_data) - # didn't find LC_UUID, for a dylib, just print out basic info - if $kgm_mh_64 - printf "0x%016llx ", $kgm_mh_image_address - else - printf "0x%08x ", $kgm_mh_image_address - end - set $kgm_printed_type = 0 - if $kgm_mh_filetype == 0x2 - printf "MH_EXECUTE " - set $kgm_printed_type = 1 - end - if $kgm_mh_filetype == 0x6 - printf "MH_DYLIB " - set $kgm_printed_type = 1 - end - if $kgm_mh_filetype == 0x7 - printf "MH_DYLINKER " - set $kgm_printed_type = 1 - end - if $kgm_mh_filetype == 0x8 - printf "MH_BUNDLE " - set $kgm_printed_type = 1 - end - if !$kgm_printed_type - printf "UNKNOWN " - end - printf " ", - - printf " " - _print_path_for_image $kgm_mh_path_address - printf "\n" - - end - -end - -define _print_images_for_dyld_image_info - set $kgm_taskp = $arg0 - set $kgm_task_64 = $arg1 - set $kgm_dyld_all_image_infos_address = (unsigned long long)$arg2 - - _map_user_data_from_task $kgm_taskp $kgm_dyld_all_image_infos_address 112 - - set $kgm_dyld_all_image_infos = (unsigned int *)$kgm_map_user_window - set $kgm_dyld_all_image_infos_version = $kgm_dyld_all_image_infos[0] - if ($kgm_dyld_all_image_infos_version > 12) - printf "Unknown dyld all_image_infos version number %d\n", $kgm_dyld_all_image_infos_version - end - set $kgm_image_info_count = $kgm_dyld_all_image_infos[1] - - set $kgm_dyld_load_path = 0 - if $kgm_task_64 - set $kgm_image_info_size = 24 - set $kgm_image_info_array_address = ((unsigned long long *)$kgm_dyld_all_image_infos)[1] - set $kgm_dyld_load_address = ((unsigned long long *)$kgm_dyld_all_image_infos)[4] - set $kgm_dyld_all_image_infos_address_from_struct = ((unsigned long long *)$kgm_dyld_all_image_infos)[13] - else - set $kgm_image_info_size = 12 - set $kgm_image_info_array_address = ((unsigned int *)$kgm_dyld_all_image_infos)[2] - set $kgm_dyld_load_address = ((unsigned int *)$kgm_dyld_all_image_infos)[5] - set $kgm_dyld_all_image_infos_address_from_struct = ((unsigned int *)$kgm_dyld_all_image_infos)[14] - end - - _unmap_user_data_from_task $kgm_taskp - - # Account for ASLR slide before dyld can fix the structure - set $kgm_dyld_load_address = $kgm_dyld_load_address + ($kgm_dyld_all_image_infos_address - $kgm_dyld_all_image_infos_address_from_struct) - - set $kgm_image_info_i = 0 - while $kgm_image_info_i < $kgm_image_info_count - - set $kgm_image_info_address = $kgm_image_info_array_address + $kgm_image_info_size*$kgm_image_info_i - - _map_user_data_from_task $kgm_taskp $kgm_image_info_address $kgm_image_info_size - if $kgm_task_64 - set $kgm_image_info_addr = ((unsigned long long *)$kgm_map_user_window)[0] - set $kgm_image_info_path = ((unsigned long long *)$kgm_map_user_window)[1] - else - set $kgm_image_info_addr = ((unsigned int *)$kgm_map_user_window)[0] - set $kgm_image_info_path = ((unsigned int *)$kgm_map_user_window)[1] - end - _unmap_user_data_from_task $kgm_taskp - - # printf "[%d] = image address %llx path address %llx\n", $kgm_image_info_i, $kgm_image_info_addr, $kgm_image_info_path - _print_image_info $kgm_image_info_addr $kgm_image_info_path - - set $kgm_image_info_i = $kgm_image_info_i + 1 - end - - # $kgm_dyld_load_path may get set when the main executable is processed - # printf "[dyld] = image address %llx path address %llx\n", $kgm_dyld_load_address, $kgm_dyld_load_path - _print_image_info $kgm_dyld_load_address $kgm_dyld_load_path - -end - -define showuserlibraries - set $kgm_taskp = (task_t)$arg0 - set $kgm_dyld_image_info = $kgm_taskp->all_image_info_addr - - set $kgm_map = $kgm_taskp->map - set $kgm_task_64 = ( $kgm_taskp->taskFeatures[0] & 0x80000000) - - if ($kgm_dyld_image_info != 0) - printf "address " - if $kgm_task_64 - printf " " - end - printf " type " - printf " uuid " - printf "path\n" - - _print_images_for_dyld_image_info $kgm_taskp $kgm_task_64 $kgm_dyld_image_info - else - printf "No dyld shared library information available for task\n" - end -end -document showuserlibraries -Syntax: (gdb) showuserlibraries -| For a given user task, inspect the dyld shared library state and print -| information about all Mach-O images. -end - -define showuserdyldinfo - set $kgm_taskp = (task_t)$arg0 - set $kgm_dyld_all_image_infos_address = (unsigned long long)$kgm_taskp->all_image_info_addr - - set $kgm_map = $kgm_taskp->map - set $kgm_task_64 = ( $kgm_taskp->taskFeatures[0] & 0x80000000) - - if ($kgm_dyld_all_image_infos_address != 0) - - _map_user_data_from_task $kgm_taskp $kgm_dyld_all_image_infos_address 112 - - set $kgm_dyld_all_image_infos = (unsigned char *)$kgm_map_user_window - set $kgm_dyld_all_image_infos_version = ((unsigned int *)$kgm_dyld_all_image_infos)[0] - if ($kgm_dyld_all_image_infos_version > 12) - printf "Unknown dyld all_image_infos version number %d\n", $kgm_dyld_all_image_infos_version - end - - # Find fields by byte offset. We assume at least version 9 is supported - if $kgm_task_64 - set $kgm_dyld_all_image_infos_infoArrayCount = *(unsigned int *)(&$kgm_dyld_all_image_infos[4]) - set $kgm_dyld_all_image_infos_infoArray = *(unsigned long long *)(&$kgm_dyld_all_image_infos[8]) - set $kgm_dyld_all_image_infos_notification = *(unsigned long long *)(&$kgm_dyld_all_image_infos[16]) - set $kgm_dyld_all_image_infos_processDetachedFromSharedRegion = *(unsigned char *)(&$kgm_dyld_all_image_infos[24]) - set $kgm_dyld_all_image_infos_libSystemInitialized = *(unsigned char *)(&$kgm_dyld_all_image_infos[25]) - set $kgm_dyld_all_image_infos_dyldImageLoadAddress = *(unsigned long long *)(&$kgm_dyld_all_image_infos[32]) - set $kgm_dyld_all_image_infos_jitInfo = *(unsigned long long *)(&$kgm_dyld_all_image_infos[40]) - set $kgm_dyld_all_image_infos_dyldVersion = *(unsigned long long *)(&$kgm_dyld_all_image_infos[48]) - set $kgm_dyld_all_image_infos_errorMessage = *(unsigned long long *)(&$kgm_dyld_all_image_infos[56]) - set $kgm_dyld_all_image_infos_terminationFlags = *(unsigned long long *)(&$kgm_dyld_all_image_infos[64]) - set $kgm_dyld_all_image_infos_coreSymbolicationShmPage = *(unsigned long long *)(&$kgm_dyld_all_image_infos[72]) - set $kgm_dyld_all_image_infos_systemOrderFlag = *(unsigned long long *)(&$kgm_dyld_all_image_infos[80]) - set $kgm_dyld_all_image_infos_uuidArrayCount = *(unsigned long long *)(&$kgm_dyld_all_image_infos[88]) - set $kgm_dyld_all_image_infos_uuidArray = *(unsigned long long *)(&$kgm_dyld_all_image_infos[96]) - set $kgm_dyld_all_image_infos_dyldAllImageInfosAddress = *(unsigned long long *)(&$kgm_dyld_all_image_infos[104]) - else - set $kgm_dyld_all_image_infos_infoArrayCount = *(unsigned int *)(&$kgm_dyld_all_image_infos[4]) - set $kgm_dyld_all_image_infos_infoArray = *(unsigned int *)(&$kgm_dyld_all_image_infos[8]) - set $kgm_dyld_all_image_infos_notification = *(unsigned int *)(&$kgm_dyld_all_image_infos[12]) - set $kgm_dyld_all_image_infos_processDetachedFromSharedRegion = *(unsigned char *)(&$kgm_dyld_all_image_infos[16]) - set $kgm_dyld_all_image_infos_libSystemInitialized = *(unsigned char *)(&$kgm_dyld_all_image_infos[17]) - set $kgm_dyld_all_image_infos_dyldImageLoadAddress = *(unsigned int *)(&$kgm_dyld_all_image_infos[20]) - set $kgm_dyld_all_image_infos_jitInfo = *(unsigned int *)(&$kgm_dyld_all_image_infos[24]) - set $kgm_dyld_all_image_infos_dyldVersion = *(unsigned int *)(&$kgm_dyld_all_image_infos[28]) - set $kgm_dyld_all_image_infos_errorMessage = *(unsigned int *)(&$kgm_dyld_all_image_infos[32]) - set $kgm_dyld_all_image_infos_terminationFlags = *(unsigned int *)(&$kgm_dyld_all_image_infos[36]) - set $kgm_dyld_all_image_infos_coreSymbolicationShmPage = *(unsigned int *)(&$kgm_dyld_all_image_infos[40]) - set $kgm_dyld_all_image_infos_systemOrderFlag = *(unsigned int *)(&$kgm_dyld_all_image_infos[44]) - set $kgm_dyld_all_image_infos_uuidArrayCount = *(unsigned int *)(&$kgm_dyld_all_image_infos[48]) - set $kgm_dyld_all_image_infos_uuidArray = *(unsigned int *)(&$kgm_dyld_all_image_infos[52]) - set $kgm_dyld_all_image_infos_dyldAllImageInfosAddress = *(unsigned int *)(&$kgm_dyld_all_image_infos[56]) - end - - _unmap_user_data_from_task $kgm_taskp - - set $kgm_dyld_all_imfo_infos_slide = ( $kgm_dyld_all_image_infos_address - $kgm_dyld_all_image_infos_dyldAllImageInfosAddress ) - set $kgm_dyld_all_image_infos_dyldVersion_postslide = ( $kgm_dyld_all_image_infos_dyldVersion + $kgm_dyld_all_imfo_infos_slide ) - - printf " version %u\n", $kgm_dyld_all_image_infos_version - printf " infoArrayCount %u\n", $kgm_dyld_all_image_infos_infoArrayCount - printf " infoArray " - showuserptr $kgm_dyld_all_image_infos_infoArray - printf "\n" - printf " notification " - showuserptr $kgm_dyld_all_image_infos_notification - printf "\n" - printf "processDetachedFromSharedRegion %d\n", $kgm_dyld_all_image_infos_processDetachedFromSharedRegion - printf " libSystemInitialized %d\n", $kgm_dyld_all_image_infos_libSystemInitialized - printf " dyldImageLoadAddress " - showuserptr $kgm_dyld_all_image_infos_dyldImageLoadAddress - printf "\n" - printf " jitInfo " - showuserptr $kgm_dyld_all_image_infos_jitInfo - printf "\n" - printf " dyldVersion " - showuserptr $kgm_dyld_all_image_infos_dyldVersion - printf "\n" - printf " " - _print_path_for_image $kgm_dyld_all_image_infos_dyldVersion_postslide - if ($kgm_dyld_all_imfo_infos_slide != 0) - printf " (currently " - showuserptr $kgm_dyld_all_image_infos_dyldVersion_postslide - printf ")" - end - printf "\n" - - printf " errorMessage " - showuserptr $kgm_dyld_all_image_infos_errorMessage - printf "\n" - if $kgm_dyld_all_image_infos_errorMessage != 0 - printf " " - _print_path_for_image $kgm_dyld_all_image_infos_errorMessage - printf "\n" - end - - printf " terminationFlags " - showuserptr $kgm_dyld_all_image_infos_terminationFlags - printf "\n" - printf " coreSymbolicationShmPage " - showuserptr $kgm_dyld_all_image_infos_coreSymbolicationShmPage - printf "\n" - printf " systemOrderFlag " - showuserptr $kgm_dyld_all_image_infos_systemOrderFlag - printf "\n" - printf " uuidArrayCount " - showuserptr $kgm_dyld_all_image_infos_uuidArrayCount - printf "\n" - printf " uuidArray " - showuserptr $kgm_dyld_all_image_infos_uuidArray - printf "\n" - printf " dyldAllImageInfosAddress " - showuserptr $kgm_dyld_all_image_infos_dyldAllImageInfosAddress - printf "\n" - printf " (currently " - showuserptr $kgm_dyld_all_image_infos_address - printf ")\n" - - if $kgm_task_64 - set $kgm_dyld_all_image_infos_address = $kgm_dyld_all_image_infos_address + 112 - _map_user_data_from_task $kgm_taskp $kgm_dyld_all_image_infos_address 64 - set $kgm_dyld_all_image_infos_v10 = (unsigned char *)$kgm_map_user_window - set $kgm_dyld_all_image_infos_initialImageCount = *(unsigned long long *)(&$kgm_dyld_all_image_infos_v10[112-112]) - set $kgm_dyld_all_image_infos_errorKind = *(unsigned long long *)(&$kgm_dyld_all_image_infos_v10[120-112]) - set $kgm_dyld_all_image_infos_errorClientOfDylibPath = *(unsigned long long *)(&$kgm_dyld_all_image_infos_v10[128-112]) - set $kgm_dyld_all_image_infos_errorTargetDylibPath = *(unsigned long long *)(&$kgm_dyld_all_image_infos_v10[136-112]) - set $kgm_dyld_all_image_infos_errorSymbol = *(unsigned long long *)(&$kgm_dyld_all_image_infos_v10[144-112]) - set $kgm_dyld_all_image_infos_sharedCacheSlide = *(unsigned long long *)(&$kgm_dyld_all_image_infos_v10[152-112]) - - _unmap_user_data_from_task $kgm_taskp - else - set $kgm_dyld_all_image_infos_address = $kgm_dyld_all_image_infos_address + 60 - _map_user_data_from_task $kgm_taskp $kgm_dyld_all_image_infos_address 64 - set $kgm_dyld_all_image_infos_v10 = (unsigned char *)$kgm_map_user_window - set $kgm_dyld_all_image_infos_initialImageCount = *(unsigned int *)(&$kgm_dyld_all_image_infos_v10[60-60]) - set $kgm_dyld_all_image_infos_errorKind = *(unsigned int *)(&$kgm_dyld_all_image_infos_v10[64-60]) - set $kgm_dyld_all_image_infos_errorClientOfDylibPath = *(unsigned int *)(&$kgm_dyld_all_image_infos_v10[68-60]) - set $kgm_dyld_all_image_infos_errorTargetDylibPath = *(unsigned int *)(&$kgm_dyld_all_image_infos_v10[72-60]) - set $kgm_dyld_all_image_infos_errorSymbol = *(unsigned int *)(&$kgm_dyld_all_image_infos_v10[76-60]) - set $kgm_dyld_all_image_infos_sharedCacheSlide = *(unsigned int *)(&$kgm_dyld_all_image_infos_v10[80-60]) - _unmap_user_data_from_task $kgm_taskp - end - - if $kgm_dyld_all_image_infos_version >= 10 - printf " initialImageCount " - showuserptr $kgm_dyld_all_image_infos_initialImageCount - printf "\n" - end - - if $kgm_dyld_all_image_infos_version >= 11 - printf " errorKind " - showuserptr $kgm_dyld_all_image_infos_errorKind - printf "\n" - printf " errorClientOfDylibPath " - showuserptr $kgm_dyld_all_image_infos_errorClientOfDylibPath - printf "\n" - if $kgm_dyld_all_image_infos_errorClientOfDylibPath != 0 - printf " " - _print_path_for_image $kgm_dyld_all_image_infos_errorClientOfDylibPath - printf "\n" - end - printf " errorTargetDylibPath " - showuserptr $kgm_dyld_all_image_infos_errorTargetDylibPath - printf "\n" - if $kgm_dyld_all_image_infos_errorTargetDylibPath != 0 - printf " " - _print_path_for_image $kgm_dyld_all_image_infos_errorTargetDylibPath - printf "\n" - end - printf " errorSymbol " - showuserptr $kgm_dyld_all_image_infos_errorSymbol - printf "\n" - if $kgm_dyld_all_image_infos_errorSymbol != 0 - printf " " - _print_path_for_image $kgm_dyld_all_image_infos_errorSymbol - printf "\n" - end - end - - if $kgm_dyld_all_image_infos_version >= 12 - printf " sharedCacheSlide " - showuserptr $kgm_dyld_all_image_infos_sharedCacheSlide - printf "\n" - end - - else - printf "No dyld information available for task\n" - end -end -document showuserdyldinfo -Syntax: (gdb) showuserdyldinfo -| For a given user task, inspect the dyld global info and print -| out all fields, including error messages. -end - -define showkerneldebugheader - printf "kd_buf " - showptrhdrpad - printf "CPU Thread " - showptrhdrpad - printf "Timestamp S/E Class Sub Code Code Specific Info\n" -end - -define _printevflags - if $arg0 & 1 - printf "EV_RE " - end - if $arg0 & 2 - printf "EV_WR " - end - if $arg0 & 4 - printf "EV_EX " - end - if $arg0 & 8 - printf "EV_RM " - end - - if $arg0 & 0x00100 - printf "EV_RBYTES " - end - if $arg0 & 0x00200 - printf "EV_WBYTES " - end - if $arg0 & 0x00400 - printf "EV_RCLOSED " - end - if $arg0 & 0x00800 - printf "EV_RCONN " - end - if $arg0 & 0x01000 - printf "EV_WCLOSED " - end - if $arg0 & 0x02000 - printf "EV_WCONN " - end - if $arg0 & 0x04000 - printf "EV_OOB " - end - if $arg0 & 0x08000 - printf "EV_FIN " - end - if $arg0 & 0x10000 - printf "EV_RESET " - end - if $arg0 & 0x20000 - printf "EV_TIMEOUT " - end -end - -define showkerneldebugbufferentry - set $kgm_kdebug_entry = (kd_buf *) $arg0 - - set $kgm_debugid = $kgm_kdebug_entry->debugid - set $kgm_kdebug_arg1 = $kgm_kdebug_entry->arg1 - set $kgm_kdebug_arg2 = $kgm_kdebug_entry->arg2 - set $kgm_kdebug_arg3 = $kgm_kdebug_entry->arg3 - set $kgm_kdebug_arg4 = $kgm_kdebug_entry->arg4 - - if $kgm_lp64 - set $kgm_kdebug_cpu = $kgm_kdebug_entry->cpuid - set $kgm_ts_hi = ($kgm_kdebug_entry->timestamp >> 32) & 0xFFFFFFFF - set $kgm_ts_lo = $kgm_kdebug_entry->timestamp & 0xFFFFFFFF - else - set $kgm_kdebug_cpu = ($kgm_kdebug_entry->timestamp >> 56) - set $kgm_ts_hi = ($kgm_kdebug_entry->timestamp >> 32) & 0x00FFFFFF - set $kgm_ts_lo = $kgm_kdebug_entry->timestamp & 0xFFFFFFFF - end - - set $kgm_kdebug_class = ($kgm_debugid >> 24) & 0x000FF - set $kgm_kdebug_subclass = ($kgm_debugid >> 16) & 0x000FF - set $kgm_kdebug_code = ($kgm_debugid >> 2) & 0x03FFF - set $kgm_kdebug_qual = ($kgm_debugid ) & 0x00003 - - if $kgm_kdebug_qual == 0 - set $kgm_kdebug_qual = '-' - else - if $kgm_kdebug_qual == 1 - set $kgm_kdebug_qual = 'S' - else - if $kgm_kdebug_qual == 2 - set $kgm_kdebug_qual = 'E' - else - if $kgm_kdebug_qual == 3 - set $kgm_kdebug_qual = '?' - end - end - end - end - - # preamble and qual - - showptr $kgm_kdebug_entry - printf " %d ", $kgm_kdebug_cpu - showptr $kgm_kdebug_entry->arg5 - printf " 0x%08X%08X %c ", $kgm_ts_hi, $kgm_ts_lo, $kgm_kdebug_qual - - # class - - if $kgm_kdebug_class == 1 - printf "MACH" - else - if $kgm_kdebug_class == 2 - printf "NET " - else - if $kgm_kdebug_class == 3 - printf "FS " - else - if $kgm_kdebug_class == 4 - printf "BSD " - else - if $kgm_kdebug_class == 5 - printf "IOK " - else - if $kgm_kdebug_class == 6 - printf "DRVR" - else - if $kgm_kdebug_class == 7 - printf "TRAC" - else - if $kgm_kdebug_class == 8 - printf "DLIL" - else - if $kgm_kdebug_class == 8 - printf "SEC " - else - if $kgm_kdebug_class == 20 - printf "MISC" - else - if $kgm_kdebug_class == 31 - printf "DYLD" - else - if $kgm_kdebug_class == 32 - printf "QT " - else - if $kgm_kdebug_class == 33 - printf "APPS" - else - if $kgm_kdebug_class == 255 - printf "MIG " - else - printf "0x%02X", $kgm_kdebug_class - end - end - end - end - end - end - end - end - end - end - end - end - end - end - - # subclass and code - - printf " 0x%02X %5d ", $kgm_kdebug_subclass, $kgm_kdebug_code - - # space for debugid-specific processing - - # EVPROC from bsd/kern/sys_generic.c - - # MISCDBG_CODE(DBG_EVENT,DBG_WAIT) - if $kgm_debugid == 0x14100048 - printf "waitevent " - if $kgm_kdebug_arg1 == 1 - printf "before sleep" - else - if $kgm_kdebug_arg1 == 2 - printf "after sleep" - else - printf "????????????" - end - end - printf " chan=0x%08X ", $kgm_kdebug_arg2 - else - # MISCDBG_CODE(DBG_EVENT,DBG_WAIT|DBG_FUNC_START) - if $kgm_debugid == 0x14100049 - printf "waitevent " - else - # MISCDBG_CODE(DBG_EVENT,DBG_WAIT|DBG_FUNC_END) - if $kgm_debugid == 0x1410004a - printf "waitevent error=%d ", $kgm_kdebug_arg1 - printf "eqp=0x%08X ", $kgm_kdebug_arg4 - _printevflags $kgm_kdebug_arg3 - printf "er_handle=%d ", $kgm_kdebug_arg2 - else - # MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE|DBG_FUNC_START) - if $kgm_debugid == 0x14100059 - printf "evprocdeque proc=0x%08X ", $kgm_kdebug_arg1 - if $kgm_kdebug_arg2 == 0 - printf "remove first " - else - printf "remove 0x%08X ", $kgm_kdebug_arg2 - end - else - # MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE|DBG_FUNC_END) - if $kgm_debugid == 0x1410005a - printf "evprocdeque " - if $kgm_kdebug_arg1 == 0 - printf "result=NULL " - else - printf "result=0x%08X ", $kgm_kdebug_arg1 - end - else - # MISCDBG_CODE(DBG_EVENT,DBG_POST|DBG_FUNC_START) - if $kgm_debugid == 0x14100041 - printf "postevent " - _printevflags $kgm_kdebug_arg1 - else - # MISCDBG_CODE(DBG_EVENT,DBG_POST) - if $kgm_debugid == 0x14100040 - printf "postevent " - printf "evq=0x%08X ", $kgm_kdebug_arg1 - printf "er_eventbits=" - _printevflags $kgm_kdebug_arg2 - printf "mask=" - _printevflags $kgm_kdebug_arg3 - else - # MISCDBG_CODE(DBG_EVENT,DBG_POST|DBG_FUNC_END) - if $kgm_debugid == 0x14100042 - printf "postevent " - else - # MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE|DBG_FUNC_START) - if $kgm_debugid == 0x14100055 - printf "evprocenque eqp=0x%08d ", $kgm_kdebug_arg1 - if $kgm_kdebug_arg2 & 1 - printf "EV_QUEUED " - end - _printevflags $kgm_kdebug_arg3 - else - - # MISCDBG_CODE(DBG_EVENT,DBG_EWAKEUP) - if $kgm_debugid == 0x14100050 - printf "evprocenque before wakeup eqp=0x%08d ", $kgm_kdebug_arg4 - else - # MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE|DBG_FUNC_END) - if $kgm_debugid == 0x14100056 - printf "evprocenque " - else - # MISCDBG_CODE(DBG_EVENT,DBG_MOD|DBG_FUNC_START) - if $kgm_debugid == 0x1410004d - printf "modwatch " - else - # MISCDBG_CODE(DBG_EVENT,DBG_MOD) - if $kgm_debugid == 0x1410004c - printf "modwatch er_handle=%d ", $kgm_kdebug_arg1 - _printevflags $kgm_kdebug_arg2 - printf "evq=0x%08X ", $kgm_kdebug_arg3 - else - # MISCDBG_CODE(DBG_EVENT,DBG_MOD|DBG_FUNC_END) - if $kgm_debugid == 0x1410004e - printf "modwatch er_handle=%d ", $kgm_kdebug_arg1 - printf "ee_eventmask=" - _printevflags $kgm_kdebug_arg2 - printf "sp=0x%08X ", $kgm_kdebug_arg3 - printf "flag=" - _printevflags $kgm_kdebug_arg4 - else - printf "arg1=0x%08X ", $kgm_kdebug_arg1 - printf "arg2=0x%08X ", $kgm_kdebug_arg2 - printf "arg3=0x%08X ", $kgm_kdebug_arg3 - printf "arg4=0x%08X ", $kgm_kdebug_arg4 - end - end - end - end - end - end - end - end - end - end - end - end - end - end - - # finish up - - printf "\n" -end - -define showkerneldebugbuffercpu - set $kgm_cpu_number = (int) $arg0 - set $kgm_entry_count = (int) $arg1 - set $kgm_debugentriesfound = 0 - # 0x80000000 == KDBG_BFINIT - if (kd_ctrl_page.kdebug_flags & 0x80000000) - showkerneldebugheader - - if $kgm_entry_count == 0 - printf " is 0, dumping 50 entries\n" - set $kgm_entry_count = 50 - end - - if $kgm_cpu_number >= kd_cpus - printf "cpu number too big\n" - else - set $kgm_kdbp = &kdbip[$kgm_cpu_number] - set $kgm_kdsp = $kgm_kdbp->kd_list_head - while (($kgm_kdsp.raw != 0) && ($kgm_entry_count > 0)) - set $kgm_kdsp_actual = &kd_bufs[$kgm_kdsp.buffer_index].kdsb_addr[$kgm_kdsp.offset] - if $kgm_kdsp_actual->kds_readlast != $kgm_kdsp_actual->kds_bufindx - set $kgm_kds_bufptr = &$kgm_kdsp_actual->kds_records[$kgm_kdsp_actual->kds_bufindx] - while (($kgm_kds_bufptr > &$kgm_kdsp_actual->kds_records[$kgm_kdsp_actual->kds_readlast]) && ($kgm_entry_count > 0)) - set $kgm_kds_bufptr = $kgm_kds_bufptr - 1 - set $kgm_entry_count = $kgm_entry_count - 1 - showkerneldebugbufferentry $kgm_kds_bufptr - end - end - set $kgm_kdsp = $kgm_kdsp_actual->kds_next - end - end - else - printf "Trace buffer not enabled\n" - end -end - -document showkerneldebugbuffercpu -Syntax: showkerneldebugbuffercpu -| Prints the last N entries in the kernel debug buffer for CPU x. -end - -define showkerneldebugbuffer - # 0x80000000 == KDBG_BFINIT - if (kd_ctrl_page.kdebug_flags & 0x80000000) - - set $kgm_entrycount = (int) $arg0 - - if $kgm_entrycount == 0 - printf " is 0, dumping 50 entries per cpu\n" - set $kgm_entrycount = 50 - end - - set $kgm_cpu = (int) 0 - - while $kgm_cpu < kd_cpus - showkerneldebugbuffercpu $kgm_cpu $kgm_entrycount - set $kgm_cpu = $kgm_cpu + 1 - end - else - printf "Trace buffer not enabled\n" - end -end - -document showkerneldebugbuffer -Syntax: showkerneldebugbuffer -| Prints the last N entries in the kernel debug buffer per cpu. i.e. showkerneldebugbuffer 50 will -| display the last 50 entries in each CPU's debug buffer. -end - -define showallvmstats - printf " pid command #ents wired vsize rsize max rsize\n" - printf " (pages) (pages) (pages) (pages)\n" - set $kgm_head_taskp = &tasks - set $kgm_taskp = (struct task *)($kgm_head_taskp->next) - while $kgm_taskp != $kgm_head_taskp - set $kgm_procp = (struct proc *)($kgm_taskp->bsd_info) - set $kgm_mapp = (struct _vm_map *)($kgm_taskp->map) - printf "%8d %17s %8d %15d %15d %15d %15d\n", $kgm_procp->p_pid, $kgm_procp->p_comm, $kgm_mapp->hdr.nentries, $kgm_mapp->pmap->stats.wired_count, $kgm_mapp->size >> 12, $kgm_mapp->pmap->stats.resident_count, $kgm_mapp->pmap->stats.resident_max - set $kgm_taskp = (struct task *)($kgm_taskp->tasks.next) - end -end - -document showallvmstats -Syntax: showallvmstats -| prints a summary of vm statistics in a table format -end - -define memstats - if ($kgm_mtype == $kgm_mtype_arm) - printf "kern_memorystatus_level: %8d\n", kern_memorystatus_level - end - printf "vm_page_throttled_count: %8d\n", vm_page_throttled_count - printf "vm_page_active_count: %8d\n", vm_page_active_count - printf "vm_page_inactive_count: %8d\n", vm_page_inactive_count - printf "vm_page_wire_count: %8d\n", vm_page_wire_count - printf "vm_page_free_count: %8d\n", vm_page_free_count - printf "vm_page_purgeable_count: %8d\n", vm_page_purgeable_count - printf "vm_page_inactive_target: %8d\n", vm_page_inactive_target - printf "vm_page_free_target: %8d\n", vm_page_free_target - printf "inuse_ptepages_count: %8d\n", inuse_ptepages_count - printf "vm_page_free_reserved: %8d\n", vm_page_free_reserved -end - -document memstats -Syntax: (gdb) memstats -| Prints out a summary of various memory statistics. In particular vm_page_wire_count should -| be greater than 2K or you are under memory pressure. -end - -define show_user_registers - showuserregisters $arg0 -end - -document show_user_registers -Syntax: show_user_registers -| Display user registers associated with a kernel thread -| properly displays the 32 bit or 64 bit registers for intel architecture -end - -define _cmp - set $cmp0 = $arg0 - set $cmp1 = $arg1 - - # check for end of string. cmp0 can be longer than cmp1. it - # can't be shorter. - if $cmp1 == '\0' - set $kgm_strcmp_result = 0 - set $kgm_strcmp_done = 1 - end - - if !$kgm_strcmp_done && $cmp0 == '\0' - set $kgm_strcmp_result = -1 - set $kgm_strcmp_done = 1 - end - - # do they match? - if !$kgm_strcmp_done - set $kgm_strcmp_result = (uint8_t) $cmp0 - (uint8_t) $cmp1 - if $kgm_strcmp_result != 0 - set $kgm_strcmp_done = 1 - end - end -end - -define _cmp_arg64 - set $cmp = $arg1 - set $masked = $cmp & 0xFF - _cmp $arg0[0] $masked - - if !$kgm_strcmp_done - set $cmp = $cmp >> 8 - set $masked = $cmp & 0xFF - _cmp $arg0[1] $masked - end - if !$kgm_strcmp_done - set $cmp = $cmp >> 8 - set $masked = $cmp & 0xFF - _cmp $arg0[2] $masked - end - if !$kgm_strcmp_done - set $cmp = $cmp >> 8 - set $masked = $cmp & 0xFF - _cmp $arg0[3] $masked - end - if !$kgm_strcmp_done - set $cmp = $cmp >> 8 - set $masked = $cmp & 0xFF - _cmp $arg0[4] $masked - end - if !$kgm_strcmp_done - set $cmp = $cmp >> 8 - set $masked = $cmp & 0xFF - _cmp $arg0[5] $masked - end - if !$kgm_strcmp_done - set $cmp = $cmp >> 8 - set $masked = $cmp & 0xFF - _cmp $arg0[6] $masked - end - if !$kgm_strcmp_done - set $cmp = $cmp >> 8 - set $masked = $cmp & 0xFF - _cmp $arg0[7] $masked - end -end - -define strcmp_arg_pack64 - set $kgm_strcmp_arg = ((((((((((((((uint64_t) $arg7 << 8) | $arg6) << 8) | $arg5) << 8) | $arg4) << 8) | $arg3) << 8) | $arg2) << 8) | $arg1) << 8) | $arg0 -end - -document strcmp_arg_pack64 -Syntax: strcmp_arg_pack64 -| Packs a string given as 8 character arguments into a 64-bit int stored in -| $kgm_strcmp_arg. Use 0 or '\0' for unused arguments. The encoded string -| is suitable for use by strcmp_nomalloc and setfindregistrystr. -| e.g., strcmp_arg_pack64 'H' 'e' 'l' 'l' 'o' 0 0 0 -| packs "Hello" into $kgm_strcmp_arg. -| -end - -define strcmp_nomalloc - set $str = $arg0 - set $count = $argc - 1 - - set $kgm_strcmp_result = 0 - set $kgm_strcmp_done = 0 - - if $count > 0 - _cmp_arg64 $str $arg1 - end - if !$kgm_strcmp_done && $count > 1 - set $str = $str + 8 - _cmp_arg64 $str $arg2 - end - if !$kgm_strcmp_done && $count > 2 - set $str = $str + 8 - _cmp_arg64 $str $arg3 - end - if !$kgm_strcmp_done && $count > 3 - set $str = $str + 8 - _cmp_arg64 $str $arg4 - end - if !$kgm_strcmp_done && $count > 4 - set $str = $str + 8 - _cmp_arg64 $str $arg5 - end - if !$kgm_strcmp_done && $count > 5 - set $str = $str + 8 - _cmp_arg64 $str $arg6 - end - if !$kgm_strcmp_done && $count > 6 - set $str = $str + 8 - _cmp_arg64 $str $arg7 - end - if !$kgm_strcmp_done && $count > 7 - set $str = $str + 8 - _cmp_arg64 $str $arg8 - end - if !$kgm_strcmp_done && $count > 8 - set $str = $str + 8 - _cmp_arg64 $str $arg9 - end -end - -document strcmp_nomalloc -Syntax: strcmp_nomalloc [b] [c] [d] [e] [f] [g] [h] [i] -| Given a pre-allocated , perform a string compare with the -| encoded string stored in arguments a - i. The result is stored in -| $kgm_strcmp_result. -| -| For example, the following will result in $kgm_strcmp_result == 0: -| strcmp_arg_pack64 'D' 'a' 'r' 'w' 'i' 'n' ' ' 'K' -| strcmp_nomalloc version $kgm_strcmp_arg -end - -define memcpy - set $kgm_dst = (unsigned char *)$arg0 - set $kgm_src = (unsigned char *)$arg1 - set $kgm_count = $arg2 - - # printf "src %p dst %p len %d\n", $kgm_src, $kgm_dst, $kgm_count - - while ($kgm_count >= 8) - set *(unsigned long long *)$kgm_dst = *(unsigned long long *)$kgm_src - - set $kgm_dst = $kgm_dst + 8 - set $kgm_src = $kgm_src + 8 - set $kgm_count = $kgm_count - 8 - end - while ($kgm_count > 0) - set *$kgm_dst = *$kgm_src - - set $kgm_dst = $kgm_dst + 1 - set $kgm_src = $kgm_src + 1 - set $kgm_count = $kgm_count - 1 - end -end - -document memcpy -Syntax: memcpy -| Given two addresses that are accessible by the debugger, perform -| a memory copy of bytes from to -end - -# _pci_cfg_addr_value $addr $size -define _pci_cfg_addr_value - readphysint $arg0 $arg1 $kgm_lcpu_self - set $kgm_pci_cfg_value = $kgm_readphysint_result -end - - -set $kgm_pci_cfg_init = 0 -define _pci_cfg_init - # get this from the registry if it exists there - if $kgm_pci_cfg_init == 0 - strcmp_arg_pack64 'A' 'p' 'p' 'l' 'e' 'A' 'C' 'P' - set $AppleACP = $kgm_strcmp_arg - strcmp_arg_pack64 'I' 'P' 'l' 'a' 't' 'f' 'o' 'r' - set $IPlatfor = $kgm_strcmp_arg - strcmp_arg_pack64 'm' 'E' 'x' 'p' 'e' 'r' 't' 0 - set $mExpert = $kgm_strcmp_arg - setfindregistrystr $AppleACP $IPlatfor $mExpert - - set $olddepth = $kgm_reg_depth_max - set $kgm_reg_depth_max = 2 - _findregistryentry - set $kgm_reg_depth_max = $olddepth - - if $kgm_registry_entry - strcmp_arg_pack64 'a' 'c' 'p' 'i' '-' 'm' 'm' 'c' - set $acpi_mmc = $kgm_strcmp_arg - strcmp_arg_pack64 'f' 'g' '-' 's' 'e' 'g' '0' 0 - set $fg_seg0 = $kgm_strcmp_arg - setfindregistrystr $acpi_mmc $fg_seg0 - - _findregistryprop $kgm_registry_entry - if $kgm_registry_value - set $kgm_pci_cfg_base = ((OSNumber *) $kgm_registry_value)->value - set $kgm_pci_cfg_init = 1 - end - end - end - - # search for 0:0:0 in likely places if the above fails - if $kgm_pci_cfg_init == 0 - set $kgm_pci_cfg_base = 0xF0000000 - while $kgm_pci_cfg_init == 0 && $kgm_pci_cfg_base > 0xA0000000 - _pci_cfg_addr_value $kgm_pci_cfg_base 8 - if $kgm_pci_cfg_value > 0x0 && $kgm_pci_cfg_value < 0xFF - set $kgm_pci_cfg_init = 1 - else - set $kgm_pci_cfg_base = $kgm_pci_cfg_base - 0x10000000 - end - end - end -end - -# _pci_cfg_addr $bus $dev $fcn $off -define _pci_cfg_addr - set $bus = $arg0 - set $dev = $arg1 - set $fcn = $arg2 - set $off = $arg3 - - _pci_cfg_init - set $kgm_pci_cfg_addr = $kgm_pci_cfg_base | ($bus << 20) | ($dev << 15) | ($fcn << 12) | $off -end - -define _pci_cfg_value - _pci_cfg_addr $arg0 $arg1 $arg2 $arg3 - _pci_cfg_addr_value $kgm_pci_cfg_addr $arg4 -end - -define pci_cfg_read8 - _pci_cfg_value $arg0 $arg1 $arg2 $arg3 8 - printf "%08X: %02X\n", $kgm_pci_cfg_addr, $kgm_pci_cfg_value -end - -define pci_cfg_read16 - _pci_cfg_value $arg0 $arg1 $arg2 $arg3 16 - printf "%08X: %04X\n", $kgm_pci_cfg_addr, $kgm_pci_cfg_value -end - -define pci_cfg_read32 - _pci_cfg_value $arg0 $arg1 $arg2 $arg3 32 - printf "%08X: %08X\n", $kgm_pci_cfg_addr, $kgm_pci_cfg_value -end - -document pci_cfg_read8 -Syntax: (gdb) pci_cfg_read8 -| read 8 bits for the given of the pci device located at -| ::. -end - -document pci_cfg_read16 -Syntax: (gdb) pci_cfg_read -| read 16 bits for the given of the pci device located at -| ::. -end - -document pci_cfg_read32 -Syntax: (gdb) pci_cfg_read -| read 32 bits for the given of the pci device located at -| ::. -end - -define pci_cfg_write8 - _pci_cfg_addr $arg0 $arg1 $arg2 $arg3 - writephysint $kgm_pci_cfg_addr 8 $arg4 $kgm_lcpu_self -end - -define pci_cfg_write16 - _pci_cfg_addr $arg0 $arg1 $arg2 $arg3 - writephysint $kgm_pci_cfg_addr 16 $arg4 $kgm_lcpu_self -end - -define pci_cfg_write32 - _pci_cfg_addr $arg0 $arg1 $arg2 $arg3 - writephysint $kgm_pci_cfg_addr 32 $arg4 $kgm_lcpu_self -end - -document pci_cfg_write8 -Syntax: (gdb) pci_cfg_write8 -| write an 8-bit into the given of the pci device located at -| ::. -end - -document pci_cfg_write16 -Syntax: (gdb) pci_cfg_write16 -| write a 16-bit into the given of the pci device located at -| ::. -end - -document pci_cfg_write32 -Syntax: (gdb) pci_cfg_write32 -| write a 32-bit into the given of the pci device located at -| ::. -end - - -define pci_cfg_dump - set $bus = $arg0 - set $dev = $arg1 - set $fcn = $arg2 - set $off = 0 - - # check for a valid pci device - _pci_cfg_value $bus $dev $fcn $off 8 - if $kgm_pci_cfg_value > 0x0 && $kgm_pci_cfg_value < 0xff - printf " address: 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F\n" - printf "---------------------------------------------------------" - - while $off < 256 - _pci_cfg_value $bus $dev $fcn $off 32 - if ($off & 0xF) == 0 - printf "\n%08X: ", $kgm_pci_cfg_addr - end - printf "%02X %02X %02X %02X ", $kgm_pci_cfg_value & 0xFF, ($kgm_pci_cfg_value >> 8) & 0xFF, ($kgm_pci_cfg_value >> 16) & 0xFF, ($kgm_pci_cfg_value >> 24) & 0xFF - set $off = $off + 4 - end - printf "\n" - - # check for pcie extended capability config space - _pci_cfg_value $bus $dev $fcn $off 8 - if $kgm_pci_cfg_value < 0xff - while $off < 4096 - _pci_cfg_value $bus $dev $fcn $off 32 - if ($off & 0xF) == 0 - printf "\n%08X: ", $kgm_pci_cfg_addr - end - printf "%02X %02X %02X %02X ", $kgm_pci_cfg_value & 0xFF, ($kgm_pci_cfg_value >> 8) & 0xFF, ($kgm_pci_cfg_value >> 16) & 0xFF, ($kgm_pci_cfg_value >> 24) & 0xFF - set $off = $off + 4 - end - printf "\n" - end - end -end - -document pci_cfg_dump -Syntax: (gdb) pci_cfg_dump -| dump config space for the pci device located at :: -| if you specify an invalid/inaccessible pci device, nothing will be -| printed out. -end - -set $kgm_pci_cfg_bus_start = 0 -set $kgm_pci_cfg_bus_max = 8 -set $kgm_pci_cfg_device_max = 32 -set $kgm_pci_cfg_function_max = 8 -define _pci_cfg_scan - set $dump = $arg0 - - set $bus = $kgm_pci_cfg_bus_start - while $bus < $kgm_pci_cfg_bus_max - # check for bus:0:0 to see if we should - # probe this bus further - _pci_cfg_value $bus 0x0 0x0 0x0 32 - if $kgm_pci_cfg_value > 0 && $kgm_pci_cfg_value < 0xFFFFFFFF - - set $dev = 0 - while $dev < $kgm_pci_cfg_device_max - - set $fcn = 0 - while $fcn < $kgm_pci_cfg_function_max - _pci_cfg_value $bus $dev $fcn 0x0 32 - if $kgm_pci_cfg_value > 0 && $kgm_pci_cfg_value < 0xFFFFFFFF - if $dump == 0 - printf "%03X:%03X:%03X: %02X%02X %02X%02X", $bus, $dev, $fcn, ($kgm_pci_cfg_value >> 8) & 0xFF, $kgm_pci_cfg_value & 0xFF, ($kgm_pci_cfg_value >> 24) & 0xFF, ($kgm_pci_cfg_value >> 16) & 0xFF - _pci_cfg_value $bus $dev $fcn 0x8 32 - printf " %02X | %02X%02X%02X\n", $kgm_pci_cfg_value & 0xFF, ($kgm_pci_cfg_value >> 24) & 0xFF, ($kgm_pci_cfg_value >> 16) & 0xFF, ($kgm_pci_cfg_value >> 8) & 0xFF - else - printf " device: %03X:%03X:%03X\n", $bus, $dev, $fcn - pci_cfg_dump $bus $dev $fcn - printf "\n" - end - end - set $fcn = $fcn + 1 - end - set $dev = $dev + 1 - end - end - set $bus = $bus + 1 - end -end - -define pci_cfg_dump_all - _pci_cfg_scan 1 -end - -document pci_cfg_dump_all -Syntax: (gdb) pci_cfg_dump_all -| dump config spaces for scanned pci devices. the number of busses to scan -| is stored in $kgm_pci_cfg_bus_max. the default for that is 8. you can also -| specify the starting bus with $kgm_pci_cfg_bus_start. -end - -define pci_cfg_scan - printf "bus:dev:fcn: vendor device rev | class\n" - printf "---------------------------------------\n" - _pci_cfg_scan 0 -end - -document pci_cfg_scan -Syntax: (gdb) pci_cfg_scan -| scan for pci devices. the number of busses to scan is stored in -| $kgm_pci_cfg_bus_max. the default for that is 8. you can also specify the -| starting bus with $kgm_pci_cfg_bus_start. -end - -define readioportint - set $kgm_readioportint_result = 0xBAD10AD - # set up the manual KDP packet - set manual_pkt.input = 0 - set manual_pkt.len = sizeof(kdp_readioport_req_t) - set $kgm_pkt = (kdp_readioport_req_t *)&manual_pkt.data - set $kgm_pkt->hdr.request = KDP_READIOPORT - set $kgm_pkt->hdr.len = sizeof(kdp_readioport_req_t) - set $kgm_pkt->hdr.is_reply = 0 - set $kgm_pkt->hdr.seq = 0 - set $kgm_pkt->hdr.key = 0 - set $kgm_pkt->address = (uint16_t)$arg0 - set $kgm_pkt->nbytes = $arg1 >> 3 - set $kgm_pkt->lcpu = (uint16_t)$arg2 - set manual_pkt.input = 1 - # dummy to make sure manual packet is executed - set $kgm_dummy = &_mh_execute_header - set $kgm_pkt = (kdp_readioport_reply_t *)&manual_pkt.data - if ($kgm_pkt->error == 0) - if $arg1 == 8 - set $kgm_readioportint_result = *((uint8_t *) $kgm_pkt->data) - end - if $arg1 == 16 - set $kgm_readioportint_result = *((uint16_t *) $kgm_pkt->data) - end - if $arg1 == 32 - set $kgm_readioportint_result = *((uint32_t *) $kgm_pkt->data) - end - end -end - -define readioport8 - set $lcpu = $kgm_lcpu_self - if $argc > 1 - set $lcpu = $arg1 - end - readioportint $arg0 8 $lcpu - output /a $arg0 - printf ":\t0x%02hhx\n", $kgm_readioportint_result -end - -define readioport16 - set $lcpu = $kgm_lcpu_self - if $argc > 1 - set $lcpu = $arg1 - end - readioportint $arg0 16 $lcpu - output /a $arg0 - printf ":\t0x%04hx\n", $kgm_readioportint_result -end - -define readioport32 - set $lcpu = $kgm_lcpu_self - if $argc > 1 - set $lcpu = $arg1 - end - readioportint $arg0 32 $lcpu - output /a $arg0 - printf ":\t0x%08x\n", $kgm_readioportint_result -end - -document readioport8 -| See readioport32. -end - -document readioport16 -| See readioport32. -end - -document readioport32 -Syntax: (gdb) readioport32 [lcpu (kernel's numbering convention)] -| Read value stored in the specified IO port. The CPU can be optionally -| specified as well. -end - -define writeioportint - # set up the manual KDP packet - set manual_pkt.input = 0 - set manual_pkt.len = sizeof(kdp_writeioport_req_t) - set $kgm_pkt = (kdp_writeioport_req_t *)&manual_pkt.data - set $kgm_pkt->hdr.request = KDP_WRITEIOPORT - set $kgm_pkt->hdr.len = sizeof(kdp_writeioport_req_t) - set $kgm_pkt->hdr.is_reply = 0 - set $kgm_pkt->hdr.seq = 0 - set $kgm_pkt->hdr.key = 0 - set $kgm_pkt->address = (uint16_t)$arg0 - set $kgm_pkt->nbytes = $arg1 >> 3 - set $kgm_pkt->lcpu = (uint16_t)$arg3 - if $arg1 == 8 - set *(uint8_t *)$kgm_pkt->data = (uint8_t)$arg2 - end - if $arg1 == 16 - set *(uint16_t *)$kgm_pkt->data = (uint16_t)$arg2 - end - if $arg1 == 32 - set *(uint32_t *)$kgm_pkt->data = (uint32_t)$arg2 - end - set manual_pkt.input = 1 - # dummy to make sure manual packet is executed - set $kgm_dummy = &_mh_execute_header - set $kgm_pkt = (kdp_writeioport_reply_t *)&manual_pkt.data - set $kgm_writeioportint_result = $kgm_pkt->error -end - -define writeioport8 - set $lcpu = $kgm_lcpu_self - if $argc > 2 - set $lcpu = $arg2 - end - writeioportint $arg0 8 $arg1 $lcpu -end - -define writeioport16 - set $lcpu = $kgm_lcpu_self - if $argc > 2 - set $lcpu = $arg2 - end - writeioportint $arg0 16 $arg1 $lcpu -end - -define writeioport32 - set $lcpu = $kgm_lcpu_self - if $argc > 2 - set $lcpu = $arg2 - end - writeioportint $arg0 32 $arg1 $lcpu -end - -document writeioport8 -| See writeioport32. -end - -document writeioport16 -| See writeioport32. -end - -document writeioport32 -Syntax: (gdb) writeioport32 [lcpu (kernel's numbering convention)] -| Write the value to the specified IO port. The size of the value is -| determined by the name of the command. The CPU used can be optionally -| specified. -end - -define readmsr64int - set $kgm_readmsr64int_result = 0xBAD10AD - # set up the manual KDP packet - set manual_pkt.input = 0 - set manual_pkt.len = sizeof(kdp_readmsr64_req_t) - set $kgm_pkt = (kdp_readmsr64_req_t *)&manual_pkt.data - set $kgm_pkt->hdr.request = KDP_READMSR64 - set $kgm_pkt->hdr.len = sizeof(kdp_readmsr64_req_t) - set $kgm_pkt->hdr.is_reply = 0 - set $kgm_pkt->hdr.seq = 0 - set $kgm_pkt->hdr.key = 0 - set $kgm_pkt->address = (uint32_t)$arg0 - set $kgm_pkt->lcpu = (uint16_t)$arg1 - set manual_pkt.input = 1 - # dummy to make sure manual packet is executed - set $kgm_dummy = &_mh_execute_header - set $kgm_pkt = (kdp_readmsr64_reply_t *)&manual_pkt.data - if ($kgm_pkt->error == 0) - set $kgm_readmsr64int_result = *((uint64_t *) $kgm_pkt->data) - end -end - -define readmsr64 - set $lcpu = $kgm_lcpu_self - if $argc > 1 - set $lcpu = $arg1 - end - readmsr64int $arg0 $lcpu - output /a $arg0 - printf ":\t0x%016llx\n", $kgm_readmsr64int_result -end - -define writemsr64int - # set up the manual KDP packet - set manual_pkt.input = 0 - set manual_pkt.len = sizeof(kdp_writemsr64_req_t) - set $kgm_pkt = (kdp_writemsr64_req_t *)&manual_pkt.data - set $kgm_pkt->hdr.request = KDP_WRITEMSR64 - set $kgm_pkt->hdr.len = sizeof(kdp_writemsr64_req_t) - set $kgm_pkt->hdr.is_reply = 0 - set $kgm_pkt->hdr.seq = 0 - set $kgm_pkt->hdr.key = 0 - set $kgm_pkt->address = (uint32_t)$arg0 - set $kgm_pkt->lcpu = (uint16_t)$arg2 - set *(uint64_t *)$kgm_pkt->data = (uint64_t)$arg1 - set manual_pkt.input = 1 - # dummy to make sure manual packet is executed - set $kgm_dummy = &_mh_execute_header - set $kgm_pkt = (kdp_writemsr64_reply_t *)&manual_pkt.data - set $kgm_writemsr64int_result = $kgm_pkt->error -end - -define writemsr64 - set $lcpu = $kgm_lcpu_self - if $argc > 2 - set $lcpu = $arg2 - end - writemsr64int $arg0 $arg1 $lcpu -end - -document writemsr64 -Syntax: (gdb) writemsr64 [lcpu (kernel's numbering convention)] -| Write to the specified MSR. The CPU can be optionally specified. -end - -document readmsr64 -Syntax: (gdb) readmsr64 [lcpu (kernel's numbering convention)] -| Read the specified MSR. The CPU can be optionally specified. -end - -# default if we can't find a registry entry -set $kgm_ioapic_addr = 0xFEC00000 -set $kgm_ioapic_init = 0 - -set $_ioapic_index_off = 0x00 -set $_ioapic_data_off = 0x10 -set $_ioapic_eoi_off = 0x40 - -set $_ioapic_index_id = 0x00 -set $_ioapic_index_ver = 0x01 -set $_ioapic_index_redir_base = 0x10 - -set $_apic_vector_mask = 0xFF -set $_apic_timer_tsc_deadline = 0x40000 -set $_apic_timer_periodic = 0x20000 -set $_apic_masked = 0x10000 -set $_apic_trigger_level = 0x08000 -set $_apic_polarity_high = 0x02000 -set $_apic_pending = 0x01000 - -define _ioapic_init - if $kgm_ioapic_init == 0 - strcmp_arg_pack64 'i' 'o' '-' 'a' 'p' 'i' 'c' 0 - setfindregistrystr $kgm_strcmp_arg - - set $olddepth = $kgm_reg_depth_max - set $kgm_reg_depth_max = 3 - _findregistryentry - set $kgm_reg_depth_max = $olddepth - - if $kgm_registry_entry - strcmp_arg_pack64 'P' 'h' 'y' 's' 'i' 'c' 'a' 'l' - set $Physical = $kgm_strcmp_arg - strcmp_arg_pack64 ' ' 'A' 'd' 'd' 'r' 'e' 's' 's' - set $_Address = $kgm_strcmp_arg - setfindregistrystr $Physical $_Address - - _findregistryprop $kgm_registry_entry - if $kgm_registry_value - set $kgm_ioapic_addr = ((OSNumber *) $kgm_registry_value)->value - end - end - set $kgm_ioapic_index_addr = $kgm_ioapic_addr + $_ioapic_index_off - set $kgm_ioapic_data_addr = $kgm_ioapic_addr + $_ioapic_data_off - set $kgm_ioapic_init = 1 - end -end - -define _ioapic_addr_value - _ioapic_init - writephysint $kgm_ioapic_index_addr 8 $arg0 $kgm_lcpu_self - if $argc > 1 - writephysint $kgm_ioapic_data_addr 32 $arg1 $kgm_lcpu_self - else - readphysint $kgm_ioapic_data_addr 32 $kgm_lcpu_self - set $kgm_ioapic_value = $kgm_readphysint_result - end -end - -define _apic_print - set $value = $arg0 - - printf "[VEC=%3d", $value & $_apic_vector_mask - if $value & $_apic_masked - printf " MASK=yes" - else - printf " MASK=no " - end - - if $value & $_apic_trigger_level - printf " TRIG=level" - else - printf " TRIG=edge " - end - - if $value & $_apic_polarity_high - printf " POL=high" - else - printf " POL=low " - end - - if $value & $_apic_pending - printf " PEND=yes" - else - printf " PEND=no " - end - - if $value & $_apic_timer_periodic - printf " PERIODIC" - end - if $value & $_apic_timer_tsc_deadline - printf " TSC_DEADLINE" - end - - printf "]\n" -end - -define ioapic_read32 - if (($kgm_mtype & $kgm_mtype_x86_mask) != $kgm_mtype_x86_any) - printf "ioapic_read32 not supported on this architecture.\n" - else - _ioapic_addr_value $arg0 - printf "IOAPIC[0x%02X]: 0x%08X\n", $arg0, $kgm_ioapic_value - end -end - -document ioapic_read32 -Syntax: (gdb) ioapic_read -| Read the IOAPIC register at the offset specified. -end - -define ioapic_write32 - if (($kgm_mtype & $kgm_mtype_x86_mask) != $kgm_mtype_x86_any) - printf "ioapic_write32 not supported on this architecture.\n" - else - _ioapic_addr_value $arg0 $arg1 - end -end - -document ioapic_write32 -Syntax: (gdb) ioapic_write32 -| Write the IOAPIC register at the offset specified. -end - -define ioapic_dump - if (($kgm_mtype & $kgm_mtype_x86_mask) != $kgm_mtype_x86_any) - printf "ioapic_dump not supported on this architecture.\n" - else - # id - _ioapic_addr_value $_ioapic_index_id - printf "IOAPIC[0x%02X] ID: 0x%08X\n", $_ioapic_index_id, $kgm_ioapic_value - - # version - _ioapic_addr_value $_ioapic_index_ver - set $maxredir = (($kgm_ioapic_value & 0xFF0000) >> 16) + 1 - - printf "IOAPIC[0x%02X] VERSION: 0x%08X [", $_ioapic_index_ver, $kgm_ioapic_value - printf "MAXREDIR=%02d PRQ=%d VERSION=0x%02X]\n", $maxredir, ($kgm_ioapic_value >> 15) & 0x1, $kgm_ioapic_value & 0xFF - - # all the redir entries - set $i = 0 - while $i < $maxredir - set $addr0 = $_ioapic_index_redir_base + ($i << 1) - set $addr1 = $addr0 + 1 - _ioapic_addr_value $addr1 - printf "IOAPIC[0x%02X] IOREDIR%02d: 0x%08X", $addr0, $i, $kgm_ioapic_value - - _ioapic_addr_value $addr0 - printf "%08X ", $kgm_ioapic_value - _apic_print $kgm_ioapic_value - set $i = $i + 1 - end - end -end - -document ioapic_dump -Syntax: (gdb) ioapic_dump -| Dump all the IOAPIC entries. -end - - -set $_lapic_base_addr = 0xFEE00000 -set $_lapic_id = 0x20 -set $_lapic_version = 0x30 -set $_lapic_tpr = 0x80 -set $_lapic_apr = 0x90 -set $_lapic_ppr = 0xA0 -set $_lapic_eoi = 0xB0 -set $_lapic_ldr = 0xD0 -set $_lapic_dfr = 0xE0 -set $_lapic_sivr = 0xF0 - -set $_lapic_isr_size = 0x10 -set $_lapic_isr_num = 8 -set $_lapic_isr0 = 0x100 -set $_lapic_tmr0 = 0x180 -set $_lapic_irr0 = 0x200 - -set $_lapic_esr = 0x280 -set $_lapic_esr_register = 0x80 -set $_lapic_esr_recv_vect = 0x40 -set $_lapic_esr_send_vect = 0x20 - -set $_lapic_icr0 = 0x300 -set $_lapic_icr1 = 0x310 - -set $_lapic_lvt_timer = 0x320 -set $_lapic_lvt_thermal = 0x330 -set $_lapic_lvt_pmcr = 0x340 -set $_lapic_lvt_lint0 = 0x350 -set $_lapic_lvt_lint1 = 0x360 -set $_lapic_lvt_error = 0x370 - -set $_lapic_icr = 0x380 -set $_lapic_ccr = 0x390 -set $_lapic_dcr = 0x3E0 - -set $_apic_cfg_msr = 0x1B -set $_apic_cfg_msr_x2EN = 0x00000C00 -set $_x2apic_enabled = -1 - -# _lapic_addr $offset returns the actual address to use -define _lapic_addr - if $_x2apic_enabled < 0 - readmsr64int $_apic_cfg_msr $kgm_lcpu_self - if ($kgm_readmsr64int_result & $_apic_cfg_msr_x2EN) == $_apic_cfg_msr_x2EN - set $_x2apic_enabled = 1 - else - set $_x2apic_enabled = 0 - end - end - - if $_x2apic_enabled - # x2APIC addresses are MSRs that use xAPIC offsets that - # are 4-bit shifted - set $kgm_lapic_addr = $arg0 >> 4 - else - set $kgm_lapic_addr = $_lapic_base_addr + $arg0 - end -end - -# _lapic_addr_value $offset $lcpu -define _lapic_addr_value - _lapic_addr $arg0 - if $_x2apic_enabled - readmsr64int $kgm_lapic_addr $arg1 - set $kgm_lapic_value = $kgm_readmsr64int_result - else - readphysint $kgm_lapic_addr 32 $arg1 - set $kgm_lapic_value = $kgm_readphysint_result - end -end - -# lapic_read32 $offset [$lcpu] -define lapic_read32 - if (($kgm_mtype & $kgm_mtype_x86_mask) != $kgm_mtype_x86_any) - printf "lapic_read32 not supported on this architecture.\n" - else - set $lcpu = $kgm_lcpu_self - if $argc > 1 - set $lcpu = $arg1 - end - _lapic_addr_value $arg0 $lcpu - printf "LAPIC[0x%03X]: 0x%08X\n", $arg0, $kgm_lapic_value - end -end - -document lapic_read32 -Syntax: (gdb) apic_read32_cpu [lcpu (kernel's numbering convention)] -| Read the LAPIC register at the offset specified. The CPU can be optionally -| specified. -end - -# lapic_write32 $offset $value [$lcpu] -define lapic_write32 - if (($kgm_mtype & $kgm_mtype_x86_mask) != $kgm_mtype_x86_any) - printf "lapic_write32_cpu not supported on this architecture.\n" - else - set $lcpu = $kgm_lcpu_self - if $argc > 2 - set $lcpu = $arg2 - end - - _lapic_addr $arg0 - if $_x2apic_enabled - writemsr64int $kgm_lapic_addr $arg1 $lcpu - else - writephysint $kgm_lapic_addr 32 $arg1 $lcpu - end - end -end - -document lapic_write32 -Syntax: (gdb) lapic_write32 [lcpu (kernel's numbering convention)] -| Write the LAPIC register at the offset specified. The CPU can be optionally -| specified. -end - -# lapic_dump [lcpu] -define lapic_dump - if (($kgm_mtype & $kgm_mtype_x86_mask) != $kgm_mtype_x86_any) - printf "lapic_dump not supported on this architecture.\n" - else - set $lcpu = $kgm_lcpu_self - if $argc > 0 - set $lcpu = $arg0 - end - - _lapic_addr_value $_lapic_id $lcpu - - # the above also figures out if we're using an xAPIC or an x2APIC - printf "LAPIC operating mode: " - if $_x2apic_enabled - printf " x2APIC\n" - else - printf " xAPIC\n" - end - - printf "LAPIC[0x%03X] ID: 0x%08X\n", $_lapic_id, $kgm_lapic_value - - _lapic_addr_value $_lapic_version $lcpu - set $lvt_num = ($kgm_lapic_value >> 16) + 1 - printf "LAPIC[0x%03X] VERSION: 0x%08X [VERSION=%d MaxLVT=%d]\n", $_lapic_version, $kgm_lapic_value, $kgm_lapic_value & 0xFF, $lvt_num - - _lapic_addr_value $_lapic_tpr $lcpu - printf "LAPIC[0x%03X] TASK PRIORITY: 0x%08X\n", $_lapic_tpr, $kgm_lapic_value - - _lapic_addr_value $_lapic_ppr $lcpu - printf "LAPIC[0x%03X] PROCESSOR PRIORITY: 0x%08X\n", $_lapic_ppr, $kgm_lapic_value - - _lapic_addr_value $_lapic_ldr $lcpu - printf "LAPIC[0x%03X] LOGICAL DEST: 0x%08X\n", $_lapic_ldr, $kgm_lapic_value - - _lapic_addr_value $_lapic_dfr $lcpu - printf "LAPIC[0x%03X] DEST FORMAT: 0x%08X\n", $_lapic_dfr, $kgm_lapic_value - - _lapic_addr_value $_lapic_sivr $lcpu - printf "LAPIC[0x%03X] SPURIOUS VECTOR: 0x%08X [VEC=%3d ENABLED=%d]\n", $_lapic_sivr, $kgm_lapic_value, $kgm_lapic_value & $_apic_vector_mask, ($kgm_lapic_value & 0x100) >> 8, - - set $i = 0 - while $i < $_lapic_isr_num - set $addr = $_lapic_isr0 + $i * $_lapic_isr_size - _lapic_addr_value $addr $lcpu - printf "LAPIC[0x%03X] ISR[%03d:%03d]: 0x%08X\n", $addr, 32*($i + 1) - 1, 32*$i, $kgm_lapic_value - set $i = $i + 1 - end - - set $i = 0 - while $i < $_lapic_isr_num - set $addr = $_lapic_tmr0 + $i * $_lapic_isr_size - _lapic_addr_value $addr $lcpu - printf "LAPIC[0x%03X] TMR[%03d:%03d]: 0x%08X\n", $addr, 32*($i + 1) - 1, 32*$i, $kgm_lapic_value - set $i = $i + 1 - end - - set $i = 0 - while $i < $_lapic_isr_num - set $addr = $_lapic_irr0 + $i * $_lapic_isr_size - _lapic_addr_value $addr $lcpu - printf "LAPIC[0x%03X] IRR[%03d:%03d]: 0x%08X\n", $addr, 32*($i + 1) - 1, 32*$i, $kgm_lapic_value - set $i = $i + 1 - end - - _lapic_addr_value $_lapic_esr $lcpu - printf "LAPIC[0x%03X] ERROR STATUS: 0x%08X ", $_lapic_esr, $kgm_lapic_value - if $kgm_lapic_value - printf "[" - end - if $kgm_lapic_value & $_lapic_esr_register - printf "Register " - end - if $kgm_lapic_value & $_lapic_esr_recv_vect - printf "Received Vector " - end - if $kgm_lapic_value & $_lapic_esr_send_vect - printf "Send Vector" - end - if $kgm_lapic_value - printf "]" - end - printf "\n" - - _lapic_addr_value $_lapic_icr1 $lcpu - printf "LAPIC[0x%03X] Interrupt Command: 0x%08X [DEST=%d]\n", $_lapic_icr0, $kgm_lapic_value, $kgm_lapic_value >> 24 - _lapic_addr_value $_lapic_icr0 $lcpu - printf " 0x%08X ", $kgm_lapic_value - _apic_print $kgm_lapic_value - - if $lvt_num > 0 - _lapic_addr_value $_lapic_lvt_timer $lcpu - printf "LAPIC[0x%03X] LVT Timer: 0x%08X ", $_lapic_lvt_timer, $kgm_lapic_value - _apic_print $kgm_lapic_value - end - - if $lvt_num > 1 - _lapic_addr_value $_lapic_lvt_lint0 $lcpu - printf "LAPIC[0x%03X] LVT LINT0: 0x%08X ", $_lapic_lvt_lint0, $kgm_lapic_value - _apic_print $kgm_lapic_value - end - - if $lvt_num > 2 - _lapic_addr_value $_lapic_lvt_lint1 $lcpu - printf "LAPIC[0x%03X] LVT LINT1: 0x%08X ", $_lapic_lvt_lint1, $kgm_lapic_value - _apic_print $kgm_lapic_value - end - - if $lvt_num > 3 - _lapic_addr_value $_lapic_lvt_error $lcpu - printf "LAPIC[0x%03X] LVT Error: 0x%08X ", $_lapic_lvt_error, $kgm_lapic_value - _apic_print $kgm_lapic_value - end - - if $lvt_num > 4 - _lapic_addr_value $_lapic_lvt_pmcr $lcpu - printf "LAPIC[0x%03X] LVT PerfMon: 0x%08X ", $_lapic_lvt_pmcr, $kgm_lapic_value - _apic_print $kgm_lapic_value - end - - if $lvt_num > 5 - _lapic_addr_value $_lapic_lvt_thermal $lcpu - printf "LAPIC[0x%03X] LVT Thermal: 0x%08X ", $_lapic_lvt_thermal, $kgm_lapic_value - _apic_print $kgm_lapic_value - end - - _lapic_addr_value $_lapic_dcr $lcpu - printf "LAPIC[0x%03X] Timer Divide: 0x%08X [Divide by ", $_lapic_dcr, $kgm_lapic_value - set $kgm_lapic_value = ($kgm_lapic_value & 0x8) >> 1 | $kgm_lapic_value & 0x3 - if $kgm_lapic_value == 0x7 - printf "1]\n" - else - printf "%d]\n", 2 << $kgm_lapic_value - end - - _lapic_addr_value $_lapic_icr $lcpu - printf "LAPIC[0x%03X] Timer Init Count: 0x%08X\n", $_lapic_icr, $kgm_lapic_value - - _lapic_addr_value $_lapic_ccr $lcpu - printf "LAPIC[0x%03X] Timer Cur Count: 0x%08X\n", $_lapic_ccr, $kgm_lapic_value - end -end - -document lapic_dump -Syntax: (gdb) lapic_dump [lcpu (kernel's numbering convention)] -| Dump all the LAPIC entries. The CPU can be optionally specified. -end - -define showknoteheader - printf " knote filter ident kn_ptr status\n" -end - -define showknoteint - set $kgm_knotep = ((struct knote *) $arg0) - printf " " - showptr $kgm_knotep - printf " " - set $kgm_filt = -$kgm_knotep->kn_kevent.filter - if ($kgm_filt == 1) - printf "EVFILT_READ " - end - if ($kgm_filt == 2) - printf "EVFILT_WRITE " - end - if ($kgm_filt == 3) - printf "EVFILT_AIO " - end - if ($kgm_filt == 4) - printf "EVFILT_VNODE " - end - if ($kgm_filt == 5) - printf "EVFILT_PROC " - end - if ($kgm_filt == 6) - printf "EVFILT_SIGNAL " - end - if ($kgm_filt == 7) - printf "EVFILT_TIMER " - end - if ($kgm_filt == 8) - printf "EVFILT_MACHPORT" - end - if ($kgm_filt == 9) - printf "EVFILT_FS " - end - if ($kgm_filt == 10) - printf "EVFILT_USER " - end - if ($kgm_filt == 11) - printf "EVFILT_SESSION " - end - printf "%7d ", $kgm_knotep->kn_kevent.ident - showptr $kgm_knotep->kn_ptr.p_fp - printf " " - if ($kgm_knotep->kn_status == 0) - printf "-" - else - if ($kgm_knotep->kn_status & 0x01) - printf "A" - end - if ($kgm_knotep->kn_status & 0x02) - printf "Q" - end - if ($kgm_knotep->kn_status & 0x04) - printf "Dis" - end - if ($kgm_knotep->kn_status & 0x08) - printf "Dr" - end - if ($kgm_knotep->kn_status & 0x10) - printf "Uw" - end - if ($kgm_knotep->kn_status & 0x20) - printf "Att" - end - if ($kgm_knotep->kn_status & 0x40) - printf "Stq" - end - end - printf "\n" -end - -define showprocknotes - showknoteheader - set $kgm_fdp = ((proc_t)$arg0)->p_fd - set $kgm_knlist = $kgm_fdp->fd_knlist - set $i = 0 - while (($i < $kgm_fdp->fd_knlistsize) && ($kgm_knlist != 0)) - set $kgm_kn = ((struct knote *)$kgm_knlist[$i].slh_first) - while ($kgm_kn != 0) - showknoteint $kgm_kn - set $kgm_kn = ((struct knote *)$kgm_kn->kn_link.sle_next) - end - set $i = $i + 1 - end - set $kgm_knhash = $kgm_fdp->fd_knhash - set $i = 0 - while (($i < $kgm_fdp->fd_knhashmask + 1) && ($kgm_knhash != 0)) - set $kgm_kn = ((struct knote *)$kgm_knhash[$i].slh_first) - while ($kgm_kn != 0) - showknoteint $kgm_kn - set $kgm_kn = ((struct knote *)$kgm_kn->kn_link.sle_next) - end - set $i = $i + 1 - end -end - -define showallknotes - set $kgm_head_taskp = &tasks - set $kgm_taskp = (struct task *)($kgm_head_taskp->next) - while $kgm_taskp != $kgm_head_taskp - showtaskheader - showtaskint $kgm_taskp - showprocknotes $kgm_taskp->bsd_info - set $kgm_taskp = (struct task *)($kgm_taskp->tasks.next) - end -end -document showprocknotes -Syntax: showprocknotes -| Displays filter and status information for every kevent registered for -| the process. -end - -# -# Device node related debug macros -# - -define _showtty - set $kgm_tty = (struct tty *) $arg0 - printf "tty struct at " - showptr $kgm_tty - printf "\n" - printf "-last input to raw queue:\n" - p $kgm_tty->t_rawq->c_cs - printf "-last input to canonical queue:\n" - p $kgm_tty->t_canq->c_cs - printf "-last output data:\n" - p $kgm_tty->t_outq->c_cs - printf "state:\n" - if ($kgm_tty->t_state & 0x00000001) - printf " TS_SO_OLOWAT (Wake up when output <= low water)\n" - end - if ($kgm_tty->t_state & 0x00000002) - printf " TS_ASYNC (async I/O mode)\n" - else - printf " - (synchronous I/O mode)\n" - end - if ($kgm_tty->t_state & 0x00000004) - printf " TS_BUSY (Draining output)\n" - end - if ($kgm_tty->t_state & 0x00000008) - printf " TS_CARR_ON (Carrier is present)\n" - else - printf " - (Carrier is NOT present)\n" - end - if ($kgm_tty->t_state & 0x00000010) - printf " TS_FLUSH (Outq has been flushed during DMA)\n" - end - if ($kgm_tty->t_state & 0x00000020) - printf " TS_ISOPEN (Open has completed)\n" - else - printf " - (Open has NOT completed)\n" - end - if ($kgm_tty->t_state & 0x00000040) - printf " TS_TBLOCK (Further input blocked)\n" - end - if ($kgm_tty->t_state & 0x00000080) - printf " TS_TIMEOUT (Wait for output char processing)\n" - end - if ($kgm_tty->t_state & 0x00000100) - printf " TS_TTSTOP (Output paused)\n" - end - if ($kgm_tty->t_state & 0x00000200) - printf " TS_WOPEN (Open in progress)\n" - end - if ($kgm_tty->t_state & 0x00000400) - printf " TS_XCLUDE (Tty requires exclusivity)\n" - end - if ($kgm_tty->t_state & 0x00000800) - printf " TS_BKSL (State for lowercase \\ work)\n" - end - if ($kgm_tty->t_state & 0x00001000) - printf " TS_CNTTB (Counting tab width, ignore FLUSHO)\n" - end - if ($kgm_tty->t_state & 0x00002000) - printf " TS_ERASE (Within a \\.../ for PRTRUB)\n" - end - if ($kgm_tty->t_state & 0x00004000) - printf " TS_LNCH (Next character is literal)\n" - end - if ($kgm_tty->t_state & 0x00008000) - printf " TS_TYPEN (Retyping suspended input (PENDIN))\n" - end - if ($kgm_tty->t_state & 0x00010000) - printf " TS_CAN_BYPASS_L_RINT (Device in "raw" mode)\n" - end - if ($kgm_tty->t_state & 0x00020000) - printf " TS_CONNECTED (Connection open)\n" - else - printf " - (Connection NOT open)\n" - end - if ($kgm_tty->t_state & 0x00040000) - printf " TS_SNOOP (Device is being snooped on)\n" - end - if ($kgm_tty->t_state & 0x80000) - printf " TS_SO_OCOMPLETE (Wake up when output completes)\n" - end - if ($kgm_tty->t_state & 0x00100000) - printf " TS_ZOMBIE (Connection lost)\n" - end - if ($kgm_tty->t_state & 0x00200000) - printf " TS_CAR_OFLOW (For MDMBUF - handle in driver)\n" - end - if ($kgm_tty->t_state & 0x00400000) - printf " TS_CTS_OFLOW (For CCTS_OFLOW - handle in driver)\n" - end - if ($kgm_tty->t_state & 0x00800000) - printf " TS_DSR_OFLOW (For CDSR_OFLOW - handle in driver)\n" - end - # xxx todo: do we care about decoding flags? - printf "flags: 0x%08x\n", $kgm_tty->t_flags - printf "foreground process group: " - showptr $kgm_tty->t_pgrp - printf "\n" - printf "enclosing session: " - showptr $kgm_tty->t_session - printf "\n" - printf "Termios:\n" - # XXX todo: decode these flags, someday - printf " Input flags: 0x%08x\n", $kgm_tty->t_termios.c_iflag - printf " Output flags: 0x%08x\n", $kgm_tty->t_termios.c_oflag - printf " Control flags: 0x%08x\n", $kgm_tty->t_termios.c_cflag - printf " Local flags: 0x%08x\n", $kgm_tty->t_termios.c_lflag - printf " Input speed: %d\n", $kgm_tty->t_termios.c_ispeed - printf " Output speed: %d\n", $kgm_tty->t_termios.c_ospeed - # XXX todo: useful to decode t_winsize? t_iokit? c_cc? anything else? - printf "high watermark: %d bytes\n", $kgm_tty->t_hiwat - printf "low watermark: %d bytes\n", $kgm_tty->t_lowat -end - -define _showwhohas - # _showwhohas - printf "fd " - printf "fileglob " -showptrhdrpad - printf "vnode " -showptrhdrpad - printf "process " -showptrhdrpad - printf "name\n" - - set $kgm_swh_devnode_dev = (((int) $arg0) << 24) | (int) $arg1 - # iterate all tasks to iterate all processes to iterate all - # open files in each process to see who has a given major/minor - # device open - set $kgm_taskp = (struct task *)($kgm_head_taskp->next) - while $kgm_taskp != $kgm_head_taskp - set $kgm_procp = (proc_t) $kgm_taskp->bsd_info - set $kgm_spf_filedesc = $kgm_procp->p_fd - set $kgm_spf_last = $kgm_spf_filedesc->fd_lastfile - set $kgm_spf_ofiles = $kgm_spf_filedesc->fd_ofiles - set $kgm_spf_count = 0 - while (($kgm_spf_ofiles != 0) && ($kgm_spf_count <= $kgm_spf_last)) - # only files currently open - if ($kgm_spf_ofiles[$kgm_spf_count] != 0) - set $kgm_spf_fg = $kgm_spf_ofiles[$kgm_spf_count].f_fglob - if ($kgm_spf_fg->fg_type == 1) - # display fd #, fileglob & vnode address, proc name - set $kgm_swh_m_vnode = (vnode_t) $kgm_spf_fg->fg_data - set $kgm_swh_m_vtype = (enum vtype) $kgm_swh_m_vnode->v_type - if (($kgm_swh_m_vtype == VBLK) || ($kgm_swh_m_vtype == VCHR)) && ((((devnode_t *)$kgm_swh_m_vnode->v_data)->dn_typeinfo.dev) == $kgm_swh_devnode_dev) - printf "%-5d ", $kgm_spf_count - showptr $kgm_spf_fg - printf " " - showptr $kgm_swh_m_vnode - printf " " - showptr $kgm_procp - printf " %s\n", $kgm_procp->p_comm - end - end - end - set $kgm_spf_count = $kgm_spf_count + 1 - end - - set $kgm_taskp = (struct task *)($kgm_taskp->tasks.next) - end -end - -define _showvnodedev_cpty - set $kgm_ptmx_major = (int) $arg0 - set $kgm_ptmx_minor = (int) $arg1 - set $kgm_ptmx_ioctl = _state.pis_ioctl_list[$kgm_ptmx_minor] - set $kgm_ptmx_ioctl = _state.pis_ioctl_list[$kgm_ptmx_minor] - printf " ptmx_ioctl struct at " - showptr $kgm_ptmx_ioctl - printf "\n" - printf " flags:\n" - if ($kgm_ptmx_ioctl->pt_flags & 0x0008) - printf " PF_PKT (packet mode)\n" - end - if ($kgm_ptmx_ioctl->pt_flags & 0x0010) - printf " PF_STOPPED (user told stopped)\n" - end - if ($kgm_ptmx_ioctl->pt_flags & 0x0020) - printf " PF_REMOTE (remote and flow controlled input)\n" - end - if ($kgm_ptmx_ioctl->pt_flags & 0x0040) - printf " PF_NOSTOP" - end - if ($kgm_ptmx_ioctl->pt_flags & 0x0080) - printf " PF_UCNTL (user control mode)\n" - end - if ($kgm_ptmx_ioctl->pt_flags & 0x0100) - printf " PF_UNLOCKED (slave unlock - master open resets)\n" - end - if ($kgm_ptmx_ioctl->pt_flags & 0x0200) - printf " PF_OPEN_M (master is open)\n" - # XXX we should search for who has the master open, but - # XXX each master gets the same minor, even though it - # XXX gets a different vnode. we chold probably change - # XXX this, but to do it we would need some way of - # XXX expressing the information in the vnode structure - # XXX somewhere. If we *did* change it, it would buy us - # XXX the ability to determine who has the corresponding - # XXX master end of the pty open - else - printf " PF_OPEN_M (master is closed)\n" - end - if ($kgm_ptmx_ioctl->pt_flags & 0x0400) - printf " PF_OPEN_S (slave is open)\n" - printf "---vvvvv--- fds open on this device ---vvvvv---\n" - _showwhohas ($kgm_ptmx_major) ($kgm_ptmx_minor) - printf "---^^^^^--- fds open on this device ---^^^^^---\n" - else - printf " - (slave is closed)\n" - end - printf "TTY Specific Information\n" - _showtty $kgm_ptmx_ioctl->pt_tty -end - -define showvnodedev - if ($argc == 1) - set $kgm_vnode = (vnode_t) $arg0 - set $kgm_vtype = (enum vtype) $kgm_vnode->v_type - if (($kgm_vtype == VBLK) || ($kgm_vtype == VCHR)) - set $kgm_devnode = (devnode_t *) $kgm_vnode->v_data - set $kgm_devnode_dev = $kgm_devnode->dn_typeinfo.dev - set $kgm_devnode_major = ($kgm_devnode_dev >> 24) & 0xff - set $kgm_devnode_minor = $kgm_devnode_dev & 0x00ffffff - - # boilerplate device information for a vnode - printf "Device Info:\n" - printf " vnode: " - showptr $kgm_vnode - printf "\n" - printf " type: " - if ($kgm_vtype == VBLK) - printf "VBLK " - end - if ($kgm_vtype == VCHR) - printf "VCHR" - end - printf "\n" - printf " name: %s\n", $kgm_vnode->v_name - printf " major, minor: %d, %d\n", $kgm_devnode_major, $kgm_devnode_minor - printf " mode 0%o\n", $kgm_devnode->dn_mode - printf " owner (u,g): %d %d", $kgm_devnode->dn_uid, $kgm_devnode->dn_gid - printf "\n" - - # decode device specific data - printf "Device Specific Information: " - if ($kgm_vtype == VBLK) - printf " Sorry, I do not know how to decode block devices yet!\n" - printf " Maybe you can write me!" - end - if ($kgm_vtype == VCHR) - # Device information; this is scanty - # range check - if ($kgm_devnode_major > 42) || ($kgm_devnode_major < 0) - printf "Invalid major #\n" - else - # static assignments in conf - if ($kgm_devnode_major == 0) - printf "Console mux device\n" - else - if ($kgm_devnode_major == 2) - printf "Current tty alias\n" - else - if ($kgm_devnode_major == 3) - printf "NULL device\n" - else - if ($kgm_devnode_major == 4) - printf "Old pty slave\n" - else - if ($kgm_devnode_major == 5) - printf "Old pty master\n" - else - if ($kgm_devnode_major == 6) - printf "Kernel log\n" - else - if ($kgm_devnode_major == 12) - printf "Memory devices\n" - else - # Statically linked dynamic assignments - if cdevsw[$kgm_devnode_major].d_open == ptmx_open - printf "Cloning pty master\n" - _showvnodedev_cpty ($kgm_devnode_major) ($kgm_devnode_minor) - else - if cdevsw[$kgm_devnode_major].d_open == ptsd_open - printf "Cloning pty slave\n" - _showvnodedev_cpty ($kgm_devnode_major) ($kgm_devnode_minor) - else - printf "RESERVED SLOT\n" - end - end - end - end - end - end - end - end - end - end - end - else - showptr $kgm_vnode - printf " is not a device\n" - end - else - printf "| Usage:\n|\n" - help showvnodedev - end -end -document showvnodedev -Syntax: (gdb) showvnodedev -| showvnodedev Display information about a device vnode -end - -define showtty - if ($argc == 1) - _showtty $arg0 - else - printf "| Usage:\n|\n" - help showtty - end -end -document showtty -Syntax: (gdb) showtty -| showtty Display information about a struct tty -end - -define showeventsourceobject - set $kgm_vt = *((void **) $arg1) - if $kgm_lp64 - set $kgm_vt = $kgm_vt - 16 - end - pcprint $kgm_vt -end -document showeventsourceobject -Syntax: (gdb) showeventsourceobject -| Routine to display information about an IOEventSource subclass. -end - -define showworkloopallocator - set $kgm_workloop = (struct IOWorkLoop*)$arg0 - set $kgm_bt = (void**)$kgm_workloop->reserved->allocationBacktrace - set $kgm_bt_count = 0 - while $kgm_bt_count != (sizeof(IOWorkLoop::ExpansionData.allocationBacktrace) / sizeof(IOWorkLoop::ExpansionData.allocationBacktrace[0])) - set $kgm_frame_address = (void*)$kgm_bt[$kgm_bt_count] - if $kgm_frame_address != 0 - if (((unsigned long) $kgm_frame_address < (unsigned long) &_mh_execute_header || \ - (unsigned long) $kgm_frame_address >= (unsigned long) &last_kernel_symbol ) \ - && ($kgm_show_kmod_syms == 0)) - showkmodaddr $kgm_frame_address - else - output /a $kgm_frame_address - end - printf "\n" - end - set $kgm_bt_count = $kgm_bt_count + 1 - end -end -document showworkloopallocator -Syntax: (gdb) showworkloopallocator -| Routine to display the backtrace of the thread which allocated the workloop in question. Only -| valid on DEBUG kernels. -end - -define showworkloopeventsources - set $kgm_eventsource = (struct IOEventSource*)$arg0 - while $kgm_eventsource != 0 - printf " " - printf "EventSource:\t" - showptr $kgm_eventsource - printf " Description: " - showeventsourceobject _ $kgm_eventsource - printf "\n" - if $kgm_eventsource->action != 0 - printf " " - printf "Action: \t" - pcprint $kgm_eventsource->action - printf "\n" - end - if $kgm_eventsource->owner != 0 - printf " " - printf "Owner: \t" - showptr $kgm_eventsource->owner - printf " Description: " - showeventsourceobject _ $kgm_eventsource->owner - printf "\n" - end - set $kgm_eventsource = $kgm_eventsource->eventChainNext - printf "\n" - end -end -document showworkloopeventsources -Syntax: (gdb) showworkloopeventsources -| Routine to walk an IOEventSource chain associated with an IOWorkLoop and print information -| about each event source in the chain. -end - -define showworkloopheader - printf "thread " - showptrhdrpad - printf " workloop " - showptrhdrpad - printf " pri state\tLockGroupName\n" -end -document showworkloopheader -Syntax: (gdb) showworkloopheader -| Routine to print out header info about an IOKit workloop. -end - -define showworkloop - set $kgm_workloopthread = (struct thread*)$arg0 - set $kgm_workloop = (struct IOWorkLoop*)$arg1 - showptr $kgm_workloopthread - printf " " - showptr $kgm_workloop - printf " %3d ", $kgm_workloopthread.sched_pri - set $kgm_state = $kgm_workloopthread.state - if $kgm_state & 0x80 - printf "I" - end - if $kgm_state & 0x40 - printf "P" - end - if $kgm_state & 0x20 - printf "A" - end - if $kgm_state & 0x10 - printf "H" - end - if $kgm_state & 0x08 - printf "U" - end - if $kgm_state & 0x04 - printf "R" - end - if $kgm_state & 0x02 - printf "S" - end - if $kgm_state & 0x01 - printf "W" - end - printf "\t\t" - set $kgm_gateLock = ( struct _IORecursiveLock *)$kgm_workloop->gateLock - if $kgm_gateLock != 0 - set $kgm_lockGroup = (struct _lck_grp_*)($kgm_gateLock->group) - printf "%s", $kgm_lockGroup->lck_grp_name - else - printf "No WorkLoop Lock found" - end - printf "\n\n" - - #Allocation backtrace is only valid on DEBUG kernels. - #printf "Allocation path:\n\n" - #showworkloopallocator $kgm_workloop - #printf "\n\n" - - if $kgm_workloop->eventChain != 0 - printf "Active event sources:\n\n" - showworkloopeventsources $kgm_workloop->eventChain - end - if $kgm_workloop->reserved->passiveEventChain != 0 - printf "Passive event sources:\n" - showworkloopeventsources $kgm_workloop->reserved->passiveEventChain - end -end -document showworkloop -Syntax: (gdb) showworkloop -| Routine to print out info about an IOKit workloop. -end - -define showallworkloopthreads - set $kgm_head_taskp = &tasks - set $kgm_taskp = (struct task *)($kgm_head_taskp->next) - set $kgm_head_actp = &($kgm_taskp->threads) - set $kgm_actp = (struct thread *)($kgm_taskp->threads.next) - while $kgm_actp != $kgm_head_actp - if ($kgm_actp->continuation == _ZN10IOWorkLoop10threadMainEv) - showworkloopheader - showworkloop $kgm_actp $kgm_actp->parameter - else - if ($kgm_actp->kernel_stack != 0) - if ($kgm_mtype == $kgm_mtype_x86_64) - #Warning: Grokking stack looking for hopeful workloops until we squirrel some info in thread_t. - set $kgm_workloop = *((struct IOWorkLoop **)($kgm_actp->kernel_stack + kernel_stack_size - 0xB8)) - else - if ($kgm_mtype == $kgm_mtype_i386) - set $kgm_workloop = *((struct IOWorkLoop **)($kgm_actp->kernel_stack + kernel_stack_size - 0x3C)) - end - end - if ($kgm_workloop != 0) - set $kgm_vt = *((void **) $kgm_workloop) - if $kgm_lp64 - set $kgm_vt = $kgm_vt - 16 - end - if ($kgm_vt == &_ZTV10IOWorkLoop) - showworkloopheader - showworkloop $kgm_actp $kgm_workloop - end - end - end - end - set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) - end - printf "\n" -end -document showallworkloopthreads -Syntax: (gdb) showallworkloopthreads -| Routine to print out info about all IOKit workloop threads in the system. This macro will find -| all IOWorkLoop threads blocked in continuations and on i386 and x86_64 systems will make a -| best-effort guess to find any workloops that are actually not blocked in a continuation. For a -| complete list, it is best to compare the output of this macro against the output of 'showallstacks'. -end - -define showthreadfortid - set $kgm_id_found = 0 - - set $kgm_head_taskp = &tasks - set $kgm_taskp = (struct task *)($kgm_head_taskp->next) - while $kgm_taskp != $kgm_head_taskp - set $kgm_head_actp = &($kgm_taskp->threads) - set $kgm_actp = (struct thread *)($kgm_taskp->threads.next) - while $kgm_actp != $kgm_head_actp - set $kgm_thread = *(struct thread *)$kgm_actp - set $kgm_thread_id = $kgm_thread.thread_id - if ($kgm_thread_id == $arg0) - showptr $kgm_actp - printf "\n" - set $kgm_id_found = 1 - loop_break - end - set $kgm_actp = (struct thread *)($kgm_actp->task_threads.next) - end - if ($kgm_id_found == 1) - loop_break - end - set $kgm_taskp = (struct task *)($kgm_taskp->tasks.next) - end - if ($kgm_id_found == 0) - printf "Not a valid thread_id\n" - end -end - -document showthreadfortid -Syntax: showthreadfortid -|The thread structure contains a unique thread_id value for each thread. -|This command is used to retrieve the address of the thread structure(thread_t) -|corresponding to a given thread_id. -end - -define showtaskbusyportsint - set $kgm_isp = ((task_t)$arg0)->itk_space - set $kgm_iindex = 0 - while ( $kgm_iindex < $kgm_isp->is_table_size ) - set $kgm_iep = &($kgm_isp->is_table[$kgm_iindex]) - if $kgm_iep->ie_bits & 0x00020000 - set $kgm_port = ((ipc_port_t)$kgm_iep->ie_object) - if $kgm_port->ip_messages.data.port.msgcount > 0 - showport $kgm_port - end - end - set $kgm_iindex = $kgm_iindex + 1 - end -end - -define showtaskbusyports - showtaskbusyportsint $arg0 -end - -document showtaskbusyports -Syntax: showtaskbusyports -|Routine to print information about receive rights belonging to this task that -|have enqueued messages. This is often a sign of a blocked or hung process. -end - -define showallbusyports - set $kgm_head_taskp = &tasks - set $kgm_cur_taskp = (struct task *)($kgm_head_taskp->next) - while $kgm_cur_taskp != $kgm_head_taskp - showtaskbusyportsint $kgm_cur_taskp - set $kgm_cur_taskp = (struct task *)($kgm_cur_taskp->tasks.next) - end -end - -document showallbusyports -Syntax: showallbusyports -|Routine to print information about all receive rights on the system that -|have enqueued messages. -end - -define showallproviders - set $kgm_providerp = dtrace_provider - while $kgm_providerp - p *(dtrace_provider_t *)$kgm_providerp - printf "\n" - set $kgm_providerp = (dtrace_provider_t *)($kgm_providerp->dtpv_next) - end -end - -document showallproviders -Syntax: showallproviders -| Display summary listing of all dtrace_providers -end - -define showmodctlheader - printf "modctl " - showptrhdrpad - printf " stale " - showptrhdrpad - printf " symbols " - showptrhdrpad - printf " address " - showptrhdrpad - printf " size " - showptrhdrpad - printf " loadid loaded nenabled flags name\n" -end - -define showmodctlint - set $kgm_modctlp = (struct modctl *)$arg0 - showptr $kgm_modctlp - printf " " - showptr $kgm_modctlp->mod_stale - printf " " - showptr $kgm_modctlp->mod_user_symbols - printf " " - showptr $kgm_modctlp->mod_address - printf " " - showptr $kgm_modctlp->mod_size - printf " " - printf "%6d ", $kgm_modctlp->mod_loadcnt - printf "%6d ", $kgm_modctlp->mod_loaded - printf "%6d ", $kgm_modctlp->mod_nenabled - printf " 0x%x ", $kgm_modctlp->mod_flags - printf "%s\n", $kgm_modctlp->mod_modname -end - -define showmodctl - showmodctlheader - showmodctlint $arg0 -end -document showmodctl -Syntax: (gdb) showmodctl -| Display info about a dtrace modctl -end - -define showallmodctls - showmodctlheader - set $kgm_modctlp = (struct modctl *)dtrace_modctl_list - while $kgm_modctlp - showmodctlint $kgm_modctlp - set $kgm_modctlp = $kgm_modctlp->mod_next - end -end -document showallmodctls -Syntax: (gdb) showallmodctls -| Display summary listing of all dtrace modctls -end - -define showfbtprobe - printf "Be very patient, this traverses a large list \n" - set $kgm_indx = 0 - set $kgm_found = 0 - set $kgm_depth = 0 - while $kgm_indx < fbt_probetab_size && !$kgm_found - set $kgm_fbt_probep = (struct fbt_probe *)fbt_probetab[$kgm_indx] - set $kgm_depth = 0 - if $kgm_fbt_probep - set $kgm_probeid = (struct fbt_probe *)$kgm_fbt_probep->fbtp_id - if $kgm_probeid == $arg0 - set $kgm_found = 1 - loop_break - else - set $kgm_fbt_probep = $kgm_fbt_probep->fbtp_hashnext - while $kgm_fbt_probep - set $kgm_depth++ - set $kgm_probeid = (struct fbt_probe *)$kgm_fbt_probep->fbtp_id - if $kgm_probeid == $arg0 - set $kgm_found = 1 - loop_break - else - set $kgm_fbt_probep = $kgm_fbt_probep->fbtp_hashnext - end - end - end - end - if !$kgm_found - set $kgm_indx++ - else - printf "fbt_probetab[index=%d], depth=%d, 0x%x\n", $kgm_indx, $kgm_depth, $kgm_fbt_probep - printf "(gdb) p *(struct fbt_probe *)0x%x\n", $kgm_fbt_probep - p *(struct fbt_probe *)$kgm_fbt_probep - set $kgm_fbtp_ctl = (struct fbt_probe *)$kgm_fbt_probep->fbtp_ctl - showmodctl $kgm_fbtp_ctl - loop_break - end - end -end -document showfbtprobe -Syntax: (gdb) showfbtprobe -| Display info about an fbt probe given an id. -| Traverses fbt_probetab and matches with fbtp_id. -| The is found using dtrace -l -end - -define showzstacktrace - set $kgm_trace = (void*)$arg0 - if ($argc == 1) - set $kgm_trace_size = 15 - end - if ($argc == 2) - set $kgm_trace_size = $arg1 - end - set $kgm_trace_current = 0 - while ($kgm_trace_current < $kgm_trace_size) - set $kgm_trace_addr = (void**)$kgm_trace + $kgm_trace_current - set $kgm_trace_value = *((void**)$kgm_trace_addr) - #printf "\t\t" - output /a $kgm_trace_value - set $kgm_trace_current = $kgm_trace_current + 1 - printf "\n" - end -end - -document showzstacktrace -Syntax: showzstacktrace [size] -| Routine to print a stacktrace stored by OSBacktrace. -| size is optional, defaults to 15. -end - -define showzalloc - set $kgm_zallocation = zallocations[$arg0] - print $kgm_zallocation - showztrace $kgm_zallocation->za_trace_index -end - -document showzalloc -Syntax: showzalloc -| Prints a zallocation from the zallocations array based off its index, -| and prints the associated symbolicated backtrace. -end - -define showztrace - set $kgm_ztrace = &ztraces[$arg0] - showztraceaddr $kgm_ztrace -end - -document showztrace -Syntax: showztrace -| Prints the backtrace from the ztraces array at index -end - -define showztraceaddr - print *$arg0 - showzstacktrace $arg0->zt_stack ($arg0)->zt_depth -end - -document showztraceaddr -Syntax: showztraceaddr -| Prints the struct ztrace passed in -end - -#TODO: Iterate through the hash table, or make top_ztrace accurate in the face of deallocations (better idea). -define showtopztrace - set $kgm_top_ztrace = top_ztrace - printf "Index: %d\n", (top_ztrace - ztraces) - showztraceaddr $kgm_top_ztrace -end - -document showtopztrace -Syntax: showtopztrace -| Shows the ztrace with the biggest size. (according to top_ztrace, not by iterating through the hash table) -end - -define showzallocs - set $kgm_zallocation_current_index = 0 - set $kgm_zallocations_count = 0 - set $kgm_max_zallocation = zleak_alloc_buckets - printf "INDEX ADDRESS " - if $kgm_lp64 - printf " " - end - printf "TRACE SIZE\n" - while ($kgm_zallocation_current_index < $kgm_max_zallocation) - set $kgm_zallocation_current = zallocations[$kgm_zallocation_current_index] - if ($kgm_zallocation_current->element != 0) - printf "%5d %p ", $kgm_zallocation_current_index, $kgm_zallocation_current->za_element - printf "%5d %6lu\n", $kgm_zallocation_current->za_trace_index, $kgm_zallocation_current->za_size - set $kgm_zallocations_count = $kgm_zallocations_count + 1 - end - set $kgm_zallocation_current_index = $kgm_zallocation_current_index + 1 - end - printf "Total allocations: %d\n", $kgm_zallocations_count -end - -document showzallocs -Syntax: showzallocs -| Prints all allocations in the zallocations table -end - -define showzallocsfortrace - set $kgm_zallocation_current_index = 0 - set $kgm_zallocations_count = 0 - set $kgm_max_zallocation = zleak_alloc_buckets - printf "INDEX ADDRESS " - if $kgm_lp64 - printf " " - end - printf "SIZE\n" - while ($kgm_zallocation_current_index < $kgm_max_zallocation) - set $kgm_zallocation_current = zallocations[$kgm_zallocation_current_index] - if ($kgm_zallocation_current->element != 0 && $kgm_zallocation_current->za_trace_index == $arg0) - printf "%5d %p ", $kgm_zallocation_current_index, $kgm_zallocation_current->za_element - printf "%6lu\n", $kgm_zallocation_current->size - set $kgm_zallocations_count = $kgm_zallocations_count + 1 - end - set $kgm_zallocation_current_index = $kgm_zallocation_current_index + 1 - end - printf "Total allocations: %d\n", $kgm_zallocations_count -end - -document showzallocsfortrace -Syntax: showzallocsfortrace -| Prints all allocations pointing to the passed in trace's index into ztraces by looking through zallocations table -end - -define showztraces - showztracesabove 0 -end - -document showztraces -Syntax: showztraces -| Prints all traces with size > 0 -end - -define showztracesabove - set $kgm_ztrace_current_index = 0 - set $kgm_ztrace_count = 0 - set $kgm_max_ztrace = zleak_trace_buckets - printf "INDEX SIZE\n" - while ($kgm_ztrace_current_index < $kgm_max_ztrace) - set $kgm_ztrace_current = ztraces[$kgm_ztrace_current_index] - if ($kgm_ztrace_current->zt_size > $arg0) - printf "%5d %6lu\n", $kgm_ztrace_current_index, $kgm_ztrace_current->zt_size - set $kgm_ztrace_count = $kgm_ztrace_count + 1 - end - set $kgm_ztrace_current_index = $kgm_ztrace_current_index + 1 - end - printf "Total traces: %d\n", $kgm_ztrace_count -end - -document showztracesabove -Syntax: showztracesabove -| Prints all traces with size greater than X -end - -define showztracehistogram - set $kgm_ztrace_current_index = 0 - set $kgm_ztrace_count = 0 - set $kgm_max_ztrace = zleak_trace_buckets - printf "INDEX HIT_COUNT COLLISIONS\n" - while ($kgm_ztrace_current_index < $kgm_max_ztrace) - set $kgm_ztrace_current = ztraces[$kgm_ztrace_current_index] - if ($kgm_ztrace_current->zt_hit_count != 0) - printf "%5d %5d %5d\n", $kgm_ztrace_current_index, $kgm_ztrace_current->zt_hit_count, $kgm_ztrace_current->zt_collisions - set $kgm_ztrace_count = $kgm_ztrace_count + 1 - end - set $kgm_ztrace_current_index = $kgm_ztrace_current_index + 1 - end - printf "Total traces: %d\n", $kgm_ztrace_count -end - -document showztracehistogram -Syntax: showztracehistogram -| Prints the histogram of the ztrace table -end - -define showzallochistogram - set $kgm_zallocation_current_index = 0 - set $kgm_zallocations_count = 0 - set $kgm_max_zallocation = zleak_alloc_buckets - printf "INDEX HIT_COUNT\n" - while ($kgm_zallocation_current_index < $kgm_max_zallocation) - set $kgm_zallocation_current = zallocations[$kgm_zallocation_current_index] - if ($kgm_zallocation_current->za_hit_count != 0) - printf "%5d %5d\n", $kgm_zallocation_current_index, $kgm_zallocation_current->za_hit_count - set $kgm_zallocations_count = $kgm_zallocations_count + 1 - end - set $kgm_zallocation_current_index = $kgm_zallocation_current_index + 1 - end - printf "Total allocations: %d\n", $kgm_zallocations_count -end - -document showzallochistogram -Syntax: showzallochistogram -| Prints the histogram for the zalloc table -end - -define showzstats - printf "z_alloc_collisions: %u, z_trace_collisions: %u\n", z_alloc_collisions, z_trace_collisions - printf "z_alloc_overwrites: %u, z_trace_overwrites: %u\n", z_alloc_overwrites, z_trace_overwrites - printf "z_alloc_recorded: %u, z_trace_recorded: %u\n", z_alloc_recorded, z_trace_recorded -end - -document showzstats -Syntax: showzstats -| Prints the zone leak detection stats -end - - -set $kgm_au_sentry_hash_table_size = 97 - -define showsession1 - set $p = (struct au_sentry *)$arg0 - showptr $p - printf " 0x%08x 0x%08x 0x%016x", $p->se_auinfo.ai_asid, $p->se_auinfo.ai_auid, $p->se_auinfo.ai_flags - printf " %3ld %3ld", $p->se_refcnt, $p->se_procnt - printf "\n" -end - -define showsessionhdr - printf "au_sentry " - showptrhdrpad - printf " ASID AUID FLAGS C P\n" -end - -define showsession - showsessionhdr - showsession1 $arg0 -end - -document showsession -Syntax: showsession -| Display info about a specified audit session -end - -define showallsessions - showsessionhdr - set $kgm_au_sentry_hash_table = au_sentry_bucket - set $i = $kgm_au_sentry_hash_table_size - 1 - while $i >= 0 - set $p = $kgm_au_sentry_hash_table[$i].lh_first - while $p != 0 - showsession1 $p - set $p = $p->se_link.le_next - end - set $i = $i - 1 - end -end - -document showallsessions -Syntax: showallsessions -| Prints the audit sessions in the global hash table -end - -define showauhistorystack - set $ii = $arg0 - set $pp = (void **)$arg1 - while $ii > 0 - printf " " - x/i $pp[$ii-1] - set $ii = $ii - 1 - end -end - -define showauhistory1 - set $p = (struct au_history *)$arg0 - set $stack_depth = $p->stack_depth - set $stack = $p->stack - showptr $p->ptr - if $p->event == 1 - printf " REF" - end - if $p->event == 2 - printf " UNREF" - end - if $p->event == 3 - printf " BIRTH" - end - if $p->event == 4 - printf " DEATH" - end - if $p->event == 5 - printf " FIND" - end - set $p = &$p->se - printf " 0x%08x 0x%08x 0x%016x", $p->se_auinfo.ai_asid, $p->se_auinfo.ai_auid, $p->se_auinfo.ai_flags - printf " %3ld %3ld", $p->se_refcnt, $p->se_procnt - printf "\n" - showauhistorystack $stack_depth $stack -end - -define showauhistory - set $i = (au_history_index-1) % au_history_size - if au_history_index >= au_history_size - set $n = au_history_size - else - set $n = au_history_index - end - while $n > 0 - if au_history[$i].ptr != 0 && (0 == $arg0 || au_history[$i].ptr == $arg0) - printf "[% 4d] ", $i - showauhistory1 &au_history[$i] - end - set $n = $n - 1 - set $i = ($i - 1) % au_history_size - end -end - -define showallauhistory - showauhistory 0 -end - -define showkwqheader - printf " kwq " - showptrhdrpad - printf " kwqaddr " - showptrhdrpad - printf " inqueue fakecount highseq lowseq flags lastunlock p_rwwc" - printf "\n " -end - -define showkwqint - printf " " - set $kgm_kwq = (ksyn_wait_queue_t)$arg0 - showptr $kgm_kwq - printf " " - showptr $kgm_kwq->kw_addr - printf " " - printf " %d ", $kgm_kwq->kw_inqueue - printf " %d ", $kgm_kwq->kw_fakecount - printf " 0x%x ", $kgm_kwq->kw_highseq - printf " 0x%x ", $kgm_kwq->kw_lowseq - printf " 0x%x ", $kgm_kwq->kw_flags - printf " 0x%x ", $kgm_kwq->kw_lastunlockseq - printf " 0x%x ", $kgm_kwq->kw_pre_rwwc - printf "\n" -end - -define show_kwq - showkwqheader - showkwqint $arg0 -end - -document show_kwq -Syntax: (gdb) show_kwq -| Display info about one ksyn_wait_queue -end - -# Internal routine used by "showpthread_mutex" to abstract possible loads from -# user space -define _loadfrommutex - if (kdp_pmap == 0) - set $kgm_loadval = *(uintptr_t *)$arg0 - else - if ($kgm_x86_abi == 0xe) - set $kgm_loadval = *(uint32_t *)$arg0 - else - if ($kgm_x86_abi == 0xf) - if ($kgm_mtype == $kgm_mtype_i386) - _loadk32m64 $arg0 - set $kgm_loadval = $kgm_k32read64 - else - set $kgm_loadval = *(uint32_t *)$arg0 - end - end - end -end -end - -define show_pthreadmutex - set $newact = (struct thread *) $arg0 - set $ourtask = (struct task *)($newact->task) - set $our_user_is64 = ($ourtask->taskFeatures[0] & 0x80000000) - _kgm_flush_loop - set $mutex = (void *)$arg1 - set kdp_pmap = $newact->task->map->pmap - _kgm_flush_loop - _kgm_update_loop - set $newiss = (x86_saved_state_t *) ($newact->machine.pcb->iss) - set $kgm_x86_abi = $newiss.flavor - if ($our_user_is64 != 0) - printf "\tUser 64Bit\n " - printf "\tSignature: " - set $nextval = $mutex - _loadfrommutex $nextval - printf "0x%x\n",$kgm_loadval - printf "\tflags: " - set $nextval = $mutex + 12 - _loadfrommutex $nextval - printf "0x%x\n",$kgm_loadval - printf "\tSeqs: " - set $nextval = $mutex + 20 - _loadfrommutex $nextval - printf "0x%x ",$kgm_loadval - set $nextval = $mutex + 24 - _loadfrommutex $nextval - printf "0x%x ",$kgm_loadval - set $nextval = $mutex + 28 - _loadfrommutex $nextval - printf "0x%x\n",$kgm_loadval - printf "\ttid[0]: " - set $nextval = $mutex + 32 - _loadfrommutex $nextval - printf "0x%x\n",$kgm_loadval - printf "\ttid[1]: " - set $nextval = $mutex + 36 - _loadfrommutex $nextval - printf "0x%x\n",$kgm_loadval - else - printf "\tUser 32Bit\n " - printf "\tSignature: " - set $nextval = $mutex - _loadfrommutex $nextval - printf "0x%x\n",$kgm_loadval - printf "\tflags: " - set $nextval = $mutex + 8 - _loadfrommutex $nextval - printf "0x%x\n",$kgm_loadval - printf "\tSeqs: " - set $nextval = $mutex + 16 - _loadfrommutex $nextval - printf "0x%x ",$kgm_loadval - set $nextval = $mutex + 20 - _loadfrommutex $nextval - printf "0x%x ",$kgm_loadval - set $nextval = $mutex + 24 - _loadfrommutex $nextval - printf "0x%x\n",$kgm_loadval - printf "\ttid[0]: " - set $nextval = $mutex + 32 - _loadfrommutex $nextval - printf "0x%x\n",$kgm_loadval - printf "\ttid[1]: " - set $nextval = $mutex + 36 - _loadfrommutex $nextval - printf "0x%x\n",$kgm_loadval - end - printf "\n" - resetstacks -end - - -document show_pthreadmutex -Syntax: (gdb) show_pthreadmutex -| Display the mutex contents from userspace. -end - - -define show_pthreadcondition - set $newact = (struct thread *) $arg0 - set $ourtask = (struct task *)($newact->task) - set $our_user_is64 = ($ourtask->taskFeatures[0] & 0x80000000) - _kgm_flush_loop - set $cond = (void *)$arg1 - set kdp_pmap = $newact->task->map->pmap - _kgm_flush_loop - _kgm_update_loop - set $newiss = (x86_saved_state_t *) ($newact->machine.pcb->iss) - set $kgm_x86_abi = $newiss.flavor - if ($our_user_is64 != 0) - printf "\tUser 64Bit\n " - printf "\tSignature: " - set $nextval = $cond - _loadfrommutex $nextval - printf "0x%x\n",$kgm_loadval - printf "\tflags: " - set $nextval = $cond + 12 - _loadfrommutex $nextval - printf "0x%x\n",$kgm_loadval - printf "\tSeqs: " - set $nextval = $cond + 24 - _loadfrommutex $nextval - printf "0x%x ",$kgm_loadval - set $nextval = $cond + 28 - _loadfrommutex $nextval - printf "0x%x ",$kgm_loadval - set $nextval = $cond + 32 - _loadfrommutex $nextval - printf "0x%x\n",$kgm_loadval - printf "\tMutex lowaddr: " - set $nextval = $cond + 16 - _loadfrommutex $nextval - printf "0x%08x\n",$kgm_loadval - printf "\tMutex highaddr: " - set $nextval = $cond + 20 - _loadfrommutex $nextval - printf "0x%x\n",$kgm_loadval - else - printf "\tUser 32Bit\n " - printf "\tSignature: " - set $nextval = $cond - _loadfrommutex $nextval - printf "0x%x\n",$kgm_loadval - printf "\tflags: " - set $nextval = $cond + 8 - _loadfrommutex $nextval - printf "0x%x\n",$kgm_loadval - printf "\tSeqs: " - set $nextval = $cond + 16 - _loadfrommutex $nextval - printf "0x%x ",$kgm_loadval - set $nextval = $cond + 20 - _loadfrommutex $nextval - printf "0x%x ",$kgm_loadval - set $nextval = $cond + 24 - _loadfrommutex $nextval - printf "0x%x\n",$kgm_loadval - printf "\tMutex addr: " - set $nextval = $cond + 12 - _loadfrommutex $nextval - printf "0x%x\n",$kgm_loadval - end - printf "\n" - resetstacks -end - - -document show_pthreadcondition -Syntax: (gdb) show_pthreadcondition -| Display the condition variable contents from userspace. -end - -define processortimers - set $kgm_p = processor_list - printf "Processor\t\t\t Last dispatch\t\t Next deadline\t\t difference\n" - while $kgm_p - printf "Processor %d: %p\t", $kgm_p->cpu_id, $kgm_p - printf " 0x%016llx\t", $kgm_p->last_dispatch - set $kgm_rt_timer = &(cpu_data_ptr[$kgm_p->cpu_id].rtclock_timer) - printf " 0x%016llx \t", $kgm_rt_timer->deadline - set $kgm_rt_diff = ((long long)$kgm_p->last_dispatch) - ((long long)$kgm_rt_timer->deadline) - printf " 0x%016llx ", $kgm_rt_diff -# normally the $kgm_rt_diff will be close to the last dispatch time, or negative -# When it isn't, mark the result as bad. This is a suggestion, not an absolute - if ( ($kgm_rt_diff > 0) && ((long long)$kgm_p->last_dispatch) - ($kgm_rt_diff + 1) > 0 ) - printf "probably BAD\n" - else - printf "(ok)\n" - end - # dump the call entries (Intel only) - if (($kgm_mtype & $kgm_mtype_x86_mask) == $kgm_mtype_x86_any) - printf "Next deadline set at: 0x%016llx. Timer call list:", $kgm_rt_timer->when_set - set $kgm_entry = (queue_t *)$kgm_rt_timer->queue - if ($kgm_entry == $kgm_rt_timer) - printf " (empty)\n" - else - printf "\n entry: " - showptrhdrpad - printf "deadline soft_deadline delta (*func)(param0,param1)\n" - while $kgm_entry != $kgm_rt_timer - set $kgm_timer_call = (timer_call_t) $kgm_entry - set $kgm_call_entry = (struct call_entry *) $kgm_entry - printf " " - showptr $kgm_entry - printf ": 0x%016llx 0x%016llx 0x%08x (%p)(%p,%p)\n", \ - $kgm_call_entry->deadline, \ - $kgm_timer_call->soft_deadline, \ - ($kgm_call_entry->deadline - $kgm_timer_call->soft_deadline), \ - $kgm_call_entry->func, \ - $kgm_call_entry->param0, $kgm_call_entry->param1 - set $kgm_entry = $kgm_entry->next - end - end - end - set $kgm_p = $kgm_p->processor_list - end - printf "\n" -end - -document processortimers -Syntax: (gdb) processortimers -| Print details of processor timers, noting any timer which might be suspicious -end - -define maplocalcache - if ($kgm_mtype == $kgm_mtype_arm) - mem 0x80000000 0xefffffff cache - set dcache-linesize-power 9 - printf "GDB memory caching enabled. Be sure to disable by calling flushlocalcache before detaching or connecting to a new device\n" - end -end - -document maplocalcache -Syntax: (gdb) maplocalcache -| Sets up memory regions for GDB to cache on read. Significantly increases debug speed over KDP -end - -define flushlocalcache - if ($kgm_mtype == $kgm_mtype_arm) - delete mem - printf "GDB memory caching disabled.\n" - end -end - -document flushlocalcache -Syntax: (gdb) flushlocalcache -| Clears all memory regions -end diff --git a/libkern/Makefile b/libkern/Makefile index 67e6f4c99..6f52aee26 100644 --- a/libkern/Makefile +++ b/libkern/Makefile @@ -7,27 +7,16 @@ include $(MakeInc_cmd) include $(MakeInc_def) INSTINC_SUBDIRS = \ - libkern \ - uuid -INSTINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS} -INSTINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS} -INSTINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS} + libkern +INSTINC_SUBDIRS_X86_64 = libkern +INSTINC_SUBDIRS_ARM = libkern EXPINC_SUBDIRS = \ - libkern \ - uuid -EXPINC_SUBDIRS_I386 = ${EXPINC_SUBDIRS} -EXPINC_SUBDIRS_X86_64 = ${EXPINC_SUBDIRS} -EXPINC_SUBDIRS_ARM = ${EXPINC_SUBDIRS} + libkern +EXPINC_SUBDIRS_X86_64 = libkern +EXPINC_SUBDIRS_ARM = libkern -SETUP_SUBDIRS = - -COMP_SUBDIRS_I386 = conf -COMP_SUBDIRS_X86_64 = conf -COMP_SUBDIRS_ARM = conf - - -INST_SUBDIRS = +COMP_SUBDIRS = conf include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/libkern/OSKextLib.cpp b/libkern/OSKextLib.cpp index 73c216fcf..9dbc988f8 100644 --- a/libkern/OSKextLib.cpp +++ b/libkern/OSKextLib.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Apple Inc. All rights reserved. + * Copyright (c) 2008-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -339,10 +339,12 @@ finish: (void)vm_deallocate(kernel_map, (vm_offset_t)request, requestLengthIn); } if (response) { - kmem_free(kernel_map, (vm_offset_t)response, responseLength); + /* 11981737 - clear uninitialized data in last page */ + kmem_free(kernel_map, (vm_offset_t)response, round_page(responseLength)); } if (logData) { - kmem_free(kernel_map, (vm_offset_t)logData, logDataLength); + /* 11981737 - clear uninitialized data in last page */ + kmem_free(kernel_map, (vm_offset_t)logData, round_page(logDataLength)); } return result; @@ -458,15 +460,6 @@ kmod_dump_log( * Compatibility implementation for kmod_get_info() host_priv routine. * Only supported on old 32-bit architectures. *********************************************************************/ -#if __i386__ -kern_return_t -kext_get_kmod_info( - kmod_info_array_t * kmod_list, - mach_msg_type_number_t * kmodCount) -{ - return OSKext::getKmodInfo(kmod_list, kmodCount); -} -#endif /* __i386__ */ #if PRAGMA_MARK #pragma mark Loaded Kext Summary diff --git a/libkern/c++/OSData.cpp b/libkern/c++/OSData.cpp index 61b4342d2..295193465 100644 --- a/libkern/c++/OSData.cpp +++ b/libkern/c++/OSData.cpp @@ -447,8 +447,8 @@ void OSData::setDeallocFunction(DeallocFunction func) if (!reserved) { reserved = (typeof(reserved)) kalloc(sizeof(ExpansionData)); - if (!reserved) return; - bzero(reserved, sizeof(ExpansionData)); + if (!reserved) return; + bzero(reserved, sizeof(ExpansionData)); } reserved->deallocFunction = func; } diff --git a/libkern/c++/OSKext.cpp b/libkern/c++/OSKext.cpp index 73d92b9f9..2e15a06e7 100644 --- a/libkern/c++/OSKext.cpp +++ b/libkern/c++/OSKext.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2009 Apple Inc. All rights reserved. + * Copyright (c) 2008-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -45,6 +45,11 @@ extern "C" { #include // 04/18/11 - gab: #include + +#if CONFIG_MACF +#include +#include +#endif }; #include @@ -226,6 +231,7 @@ static bool sKeepSymbols = false; static IORecursiveLock * sKextLock = NULL; static OSDictionary * sKextsByID = NULL; +static OSDictionary * sExcludeListByID = NULL; static OSArray * sLoadedKexts = NULL; static OSArray * sUnloadedPrelinkedKexts = NULL; @@ -343,11 +349,16 @@ static AbsoluteTime sLastWakeTime; // last time we * * gLoadedKextSummaries is accessed by other modules, but only during * a panic so the lock isn't needed then. +* +* gLoadedKextSummaries has the "used" attribute in order to ensure +* that it remains visible even when we are performing extremely +* aggressive optimizations, as it is needed to allow the debugger +* to automatically parse the list of loaded kexts. **********/ static IOLock * sKextSummariesLock = NULL; void (*sLoadedKextSummariesUpdated)(void) = OSKextLoadedKextSummariesUpdated; -OSKextLoadedKextSummaryHeader * gLoadedKextSummaries = NULL; +OSKextLoadedKextSummaryHeader * gLoadedKextSummaries __attribute__((used)) = NULL; static size_t sLoadedKextSummariesAllocSize = 0; OSKextLoadedKextSummaryHeader * sPrevLoadedKextSummaries = NULL; static size_t sPrevLoadedKextSummariesAllocSize = 0; @@ -362,7 +373,7 @@ static const OSKextLogSpec kDefaultKernelLogFilter = kOSKextLogBasicLevel | kOSKextLogVerboseFlagsMask; static OSKextLogSpec sKernelLogFilter = kDefaultKernelLogFilter; static bool sBootArgLogFilterFound = false; -SYSCTL_INT(_debug, OID_AUTO, kextlog, CTLFLAG_RW | CTLFLAG_LOCKED, &sKernelLogFilter, +SYSCTL_UINT(_debug, OID_AUTO, kextlog, CTLFLAG_RW | CTLFLAG_LOCKED, &sKernelLogFilter, sKernelLogFilter, "kernel kext logging"); static OSKextLogSpec sUserSpaceKextLogFilter = kOSKextLogSilentFilter; @@ -444,7 +455,6 @@ kern_allocate( goto finish; } linkBuffer->setDeallocFunction(osdata_kext_free); - OSKextLog(theKext, kOSKextLogProgressLevel | kOSKextLogLoadFlag | kOSKextLogLinkFlag, @@ -1474,7 +1484,6 @@ OSKext::initWithPrelinkedInfoDict( prelinkedExecutable->setDeallocFunction(osdata_phys_free); #endif setLinkedExecutable(prelinkedExecutable); - addressNum = OSDynamicCast(OSNumber, anInfoDict->getObject(kPrelinkKmodInfoKey)); if (!addressNum) { @@ -2042,6 +2051,15 @@ OSKext::setInfoDictionaryAndPath( goto finish; } } + + /* Check to see if this kext is in exclude list */ + if ( isInExcludeList() ) { + OSKextLog(this, + kOSKextLogErrorLevel | kOSKextLogGeneralFlag, + "Kext %s is in exclude list, not loadable", + getIdentifierCString()); + goto finish; + } /* Set flags for later use if the infoDict gets flushed. We only * check for true values, not false ones(!) @@ -3393,7 +3411,7 @@ OSKext::serializeLogInfo( logInfo = serializer->text(); logInfoLength = serializer->getLength(); - kmem_result = kmem_alloc(kernel_map, (vm_offset_t *)&buffer, logInfoLength); + kmem_result = kmem_alloc(kernel_map, (vm_offset_t *)&buffer, round_page(logInfoLength)); if (kmem_result != KERN_SUCCESS) { OSKextLog(/* kext */ NULL, kOSKextLogErrorLevel | @@ -3402,6 +3420,9 @@ OSKext::serializeLogInfo( /* Incidental error; we're going to (try to) allow the request * to succeed. */ } else { + /* 11981737 - clear uninitialized data in last page */ + bzero((void *)(buffer + logInfoLength), + (round_page(logInfoLength) - logInfoLength)); memcpy(buffer, logInfo, logInfoLength); *logInfoOut = buffer; *logInfoLengthOut = logInfoLength; @@ -3533,6 +3554,10 @@ OSKext::removeKext( { OSReturn result = kOSKextReturnInUse; OSKext * checkKext = NULL; // do not release +#if CONFIG_MACF + int macCheckResult = 0; + kauth_cred_t cred = NULL; +#endif IORecursiveLockLock(sKextLock); @@ -3552,6 +3577,23 @@ OSKext::removeKext( } if (aKext->isLoaded()) { +#if CONFIG_MACF + if (current_task() != kernel_task) { + cred = kauth_cred_get_with_ref(); + macCheckResult = mac_kext_check_unload(cred, aKext->getIdentifierCString()); + kauth_cred_unref(&cred); + } + + if (macCheckResult != 0) { + result = kOSReturnError; + OSKextLog(aKext, + kOSKextLogErrorLevel | + kOSKextLogKextBookkeepingFlag, + "Failed to remove kext %s (MAC policy error 0x%x).", + aKext->getIdentifierCString(), macCheckResult); + goto finish; + } +#endif /* If we are terminating, send the request to the IOCatalogue * (which will actually call us right back but that's ok we have @@ -3689,7 +3731,172 @@ OSKext::copyKexts(void) return result; } - + +/********************************************************************* + *********************************************************************/ +#define BOOTER_KEXT_PREFIX "Driver-" + +typedef struct _DeviceTreeBuffer { + uint32_t paddr; + uint32_t length; +} _DeviceTreeBuffer; + +/********************************************************************* + * Create a dictionary of excluded kexts from the given booter data. + *********************************************************************/ +/* static */ +void +OSKext::createExcludeListFromBooterData( + OSDictionary * theDictionary, + OSCollectionIterator * theIterator ) +{ + OSString * deviceTreeName = NULL; // do not release + const _DeviceTreeBuffer * deviceTreeBuffer = NULL; // do not release + char * booterDataPtr = NULL; // do not release + _BooterKextFileInfo * kextFileInfo = NULL; // do not release + char * infoDictAddr = NULL; // do not release + OSObject * parsedXML = NULL; // must release + OSDictionary * theInfoDict = NULL; // do not release + + theIterator->reset(); + + /* look for AppleKextExcludeList.kext */ + while ( (deviceTreeName = + OSDynamicCast(OSString, theIterator->getNextObject())) ) { + + const char * devTreeNameCString; + OSData * deviceTreeEntry; + OSString * myBundleID; // do not release + + OSSafeReleaseNULL(parsedXML); + + deviceTreeEntry = + OSDynamicCast(OSData, theDictionary->getObject(deviceTreeName)); + if (!deviceTreeEntry) { + continue; + } + + /* Make sure it is a kext */ + devTreeNameCString = deviceTreeName->getCStringNoCopy(); + if (strncmp(devTreeNameCString, BOOTER_KEXT_PREFIX, + (sizeof(BOOTER_KEXT_PREFIX) - 1)) != 0) { + OSKextLog(NULL, + kOSKextLogErrorLevel | kOSKextLogGeneralFlag, + "\"%s\" not a kext", + devTreeNameCString); + continue; + } + + deviceTreeBuffer = (const _DeviceTreeBuffer *) + deviceTreeEntry->getBytesNoCopy(0, sizeof(deviceTreeBuffer)); + if (!deviceTreeBuffer) { + continue; + } + + booterDataPtr = (char *)ml_static_ptovirt(deviceTreeBuffer->paddr); + if (!booterDataPtr) { + continue; + } + + kextFileInfo = (_BooterKextFileInfo *) booterDataPtr; + if (!kextFileInfo->infoDictPhysAddr || + !kextFileInfo->infoDictLength) { + continue; + } + + infoDictAddr = (char *) + ml_static_ptovirt(kextFileInfo->infoDictPhysAddr); + if (!infoDictAddr) { + continue; + } + + parsedXML = OSUnserializeXML(infoDictAddr); + if (!parsedXML) { + continue; + } + + theInfoDict = OSDynamicCast(OSDictionary, parsedXML); + if (!theInfoDict) { + continue; + } + + myBundleID = + OSDynamicCast(OSString, + theInfoDict->getObject(kCFBundleIdentifierKey)); + if ( myBundleID && + strcmp( myBundleID->getCStringNoCopy(), "com.apple.driver.KextExcludeList" ) == 0 ) { + + /* get copy of exclusion list dictionary */ + OSDictionary * myTempDict; // do not free + + myTempDict = OSDynamicCast( + OSDictionary, + theInfoDict->getObject("OSKextExcludeList")); + if ( myTempDict ) { + IORecursiveLockLock(sKextLock); + + /* get rid of old exclusion list */ + if (sExcludeListByID) { + sExcludeListByID->flushCollection(); + OSSafeRelease(sExcludeListByID); + } + sExcludeListByID = OSDictionary::withDictionary(myTempDict, 0); + IORecursiveLockUnlock(sKextLock); + } + break; + } + + } // while ( (deviceTreeName = ...) ) + + OSSafeReleaseNULL(parsedXML); + return; +} + +/********************************************************************* + * Create a dictionary of excluded kexts from the given prelink + * info (kernelcache). + *********************************************************************/ +/* static */ +void +OSKext::createExcludeListFromPrelinkInfo( OSArray * theInfoArray ) +{ + OSDictionary * myInfoDict = NULL; // do not release + OSString * myBundleID; // do not release + u_int i; + + /* Find com.apple.driver.KextExcludeList. */ + for (i = 0; i < theInfoArray->getCount(); i++) { + myInfoDict = OSDynamicCast(OSDictionary, theInfoArray->getObject(i)); + if (!myInfoDict) { + continue; + } + myBundleID = + OSDynamicCast(OSString, + myInfoDict->getObject(kCFBundleIdentifierKey)); + if ( myBundleID && + strcmp( myBundleID->getCStringNoCopy(), "com.apple.driver.KextExcludeList" ) == 0 ) { + // get copy of exclude list dictionary + OSDictionary * myTempDict; // do not free + myTempDict = OSDynamicCast(OSDictionary, + myInfoDict->getObject("OSKextExcludeList")); + if ( myTempDict ) { + IORecursiveLockLock(sKextLock); + // get rid of old exclude list + if (sExcludeListByID) { + sExcludeListByID->flushCollection(); + OSSafeRelease(sExcludeListByID); + } + + sExcludeListByID = OSDictionary::withDictionary(myTempDict, 0); + IORecursiveLockUnlock(sKextLock); + } + break; + } + } // for (i = 0; i < theInfoArray->getCount()... + + return; +} + #if PRAGMA_MARK #pragma mark Accessors #endif @@ -4012,9 +4219,7 @@ finish: /********************************************************************* *********************************************************************/ -#if defined (__i386__) -#define ARCHNAME "i386" -#elif defined (__x86_64__) +#if defined (__x86_64__) #define ARCHNAME "x86_64" #else #error architecture not supported @@ -4088,6 +4293,112 @@ finish: #if PRAGMA_MARK #pragma mark Load/Start/Stop/Unload #endif + +#define isWhiteSpace(c) ((c) == ' ' || (c) == '\t' || (c) == '\r' || (c) == ',' || (c) == '\n') + +/********************************************************************* + * sExcludeListByID is a dictionary with keys / values of: + * key = bundleID string of kext we will not allow to load + * value = version string(s) of the kext that is to be denied loading. + * The version strings can be comma delimited. For example if kext + * com.foocompany.fookext has two versions that we want to deny + * loading then the version strings might look like: + * 1.0.0, 1.0.1 + * If the current fookext has a version of 1.0.0 OR 1.0.1 we will + * not load the kext. + * + * Value may also be in the form of "LE 2.0.0" (version numbers + * less than or equal to 2.0.0 will not load) or "LT 2.0.0" (version + * number less than 2.0.0 will not load) + * + * NOTE - we cannot use the characters "<=" or "<" because we have code + * that serializes plists and treats '<' as a special character. + *********************************************************************/ +bool +OSKext::isInExcludeList(void) +{ + OSString * versionString = NULL; // do not release + char * versionCString = NULL; // do not free + size_t i; + boolean_t wantLessThan = false; + boolean_t wantLessThanEqualTo = false; + char myBuffer[32]; + + if (!sExcludeListByID) { + return(false); + } + /* look up by bundleID in our exclude list and if found get version + * string (or strings) that we will not allow to load + */ + versionString = OSDynamicCast(OSString, sExcludeListByID->getObject(bundleID)); + if (!versionString) { + return(false); + } + + /* parse version strings */ + versionCString = (char *) versionString->getCStringNoCopy(); + + /* look for "LT" or "LE" form of version string, must be in first two + * positions. + */ + if (*versionCString == 'L' && *(versionCString + 1) == 'T') { + wantLessThan = true; + versionCString +=2; + } + else if (*versionCString == 'L' && *(versionCString + 1) == 'E') { + wantLessThanEqualTo = true; + versionCString +=2; + } + + for (i = 0; *versionCString != 0x00; versionCString++) { + /* skip whitespace */ + if (isWhiteSpace(*versionCString)) { + continue; + } + + /* peek ahead for version string separator or null terminator */ + if (*(versionCString + 1) == ',' || *(versionCString + 1) == 0x00) { + + /* OK, we have a version string */ + myBuffer[i++] = *versionCString; + myBuffer[i] = 0x00; + + OSKextVersion excludeVers; + excludeVers = OSKextParseVersionString(myBuffer); + + if (wantLessThanEqualTo) { + if (version <= excludeVers) { + return(true); + } + } + else if (wantLessThan) { + if (version < excludeVers) { + return(true); + } + } + else if ( version == excludeVers ) { + return(true); + } + + /* reset for the next (if any) version string */ + i = 0; + wantLessThan = false; + wantLessThanEqualTo = false; + } + else { + /* save valid version character */ + myBuffer[i++] = *versionCString; + + /* make sure bogus version string doesn't overrun local buffer */ + if ( i >= sizeof(myBuffer) ) { + break; + } + } + } + + return(false); +} + /********************************************************************* *********************************************************************/ /* static */ @@ -4133,6 +4444,10 @@ OSKext::loadKextWithIdentifier( OSKext * theKext = NULL; // do not release OSDictionary * loadRequest = NULL; // must release const OSSymbol * kextIdentifierSymbol = NULL; // must release +#if CONFIG_MACF + int macCheckResult = 0; + kauth_cred_t cred = NULL; +#endif IORecursiveLockLock(sKextLock); @@ -4210,6 +4525,26 @@ OSKext::loadKextWithIdentifier( goto finish; } +#if CONFIG_MACF + if (current_task() != kernel_task) { + cred = kauth_cred_get_with_ref(); + macCheckResult = mac_kext_check_load(cred, kextIdentifier->getCStringNoCopy()); + kauth_cred_unref(&cred); + } + + if (macCheckResult != 0) { + result = kOSReturnError; + + OSKextLog(theKext, + kOSKextLogErrorLevel | + kOSKextLogLoadFlag, + "Failed to load kext %s (MAC policy error 0x%x).", + kextIdentifier->getCStringNoCopy(), macCheckResult); + + goto finish; + } +#endif + result = theKext->load(startOpt, startMatchingOpt, personalityNames); if (result != kOSReturnSuccess) { @@ -4305,6 +4640,17 @@ OSKext::load( Boolean alreadyLoaded = false; OSKext * lastLoadedKext = NULL; + if (isInExcludeList()) { + OSKextLog(this, + kOSKextLogErrorLevel | kOSKextLogGeneralFlag | + kOSKextLogLoadFlag, + "Kext %s is in exclude list, not loadable", + getIdentifierCString()); + + result = kOSKextReturnNotLoadable; + goto finish; + } + if (isLoaded()) { alreadyLoaded = true; result = kOSReturnSuccess; @@ -4615,6 +4961,9 @@ OSKext::slidePrelinkedExecutable() mh = (kernel_mach_header_t *)linkedExecutable->getBytesNoCopy(); for (seg = firstsegfromheader(mh); seg != NULL; seg = nextsegfromheader(mh, seg)) { + if (!seg->vmaddr) { + continue; + } seg->vmaddr += vm_kernel_slide; #if KASLR_KEXT_DEBUG @@ -4712,9 +5061,6 @@ OSKext::slidePrelinkedExecutable() if ( reloc[i].r_extern != 0 || reloc[i].r_type != 0 || reloc[i].r_length != (sizeof(void *) == 8 ? 3 : 2) -#if __i386__ - || (reloc[i].r_address & R_SCATTERED) -#endif ) { OSKextLog(this, kOSKextLogErrorLevel | kOSKextLogLoadFlag | @@ -4779,6 +5125,11 @@ OSKext::slidePrelinkedExecutable() /* Fix up kmod info and linkedExecutable. */ kmod_info->size = new_kextsize; +#if VM_MAPPED_KEXTS + new_osdata->setDeallocFunction(osdata_kext_free); +#else + new_osdata->setDeallocFunction(osdata_phys_free); +#endif linkedExecutable->setDeallocFunction(NULL); linkedExecutable->release(); linkedExecutable = new_osdata; @@ -5200,6 +5551,12 @@ OSKext::jettisonLinkeditSegment(void) /* Fix the kmod info and linkedExecutable. */ kmod_info->size = kextsize; + +#if VM_MAPPED_KEXTS + data->setDeallocFunction(osdata_kext_free); +#else + data->setDeallocFunction(osdata_phys_free); +#endif linkedExecutable->setDeallocFunction(NULL); linkedExecutable->release(); linkedExecutable = data; @@ -7377,14 +7734,16 @@ OSKext::handleRequest( kOSKextLogIPCFlag, "Received '%s' request from user space.", predicate->getCStringNoCopy()); - + result = kOSKextReturnNotPrivileged; if (hostPriv == HOST_PRIV_NULL) { if (sPrelinkBoot) { hideTheSlide = true; - /* must be root to use these kext requests */ - if (predicate->isEqualTo(kKextRequestPredicateGetKernelLoadAddress) ) { + if (predicate->isEqualTo(kKextRequestPredicateGetKernelLoadAddress) || + predicate->isEqualTo(kKextRequestPredicateUnload) || + predicate->isEqualTo(kKextRequestPredicateStart) || + predicate->isEqualTo(kKextRequestPredicateStop) ) { OSKextLog(/* kext */ NULL, kOSKextLogErrorLevel | kOSKextLogIPCFlag, @@ -7516,9 +7875,10 @@ OSKext::handleRequest( "Returning loaded kext info."); result = kOSReturnSuccess; } - +#if !SECURE_KERNEL } else if (predicate->isEqualTo(kKextRequestPredicateGetKernelLoadAddress)) { OSNumber * addressNum = NULL; // released as responseObject + unsigned long long unslid_addr = 0; kernel_segment_command_t * textseg = getsegbyname("__TEXT"); if (!textseg) { @@ -7530,17 +7890,19 @@ OSKext::handleRequest( goto finish; } + unslid_addr = VM_KERNEL_UNSLIDE(textseg->vmaddr); + OSKextLog(/* kext */ NULL, kOSKextLogDebugLevel | kOSKextLogIPCFlag, "Returning kernel load address 0x%llx.", - (unsigned long long) textseg->vmaddr ); + (unsigned long long) unslid_addr); - addressNum = OSNumber::withNumber((long long unsigned int)textseg->vmaddr, + addressNum = OSNumber::withNumber((long long unsigned int) unslid_addr, 8 * sizeof(long long unsigned int)); responseObject = addressNum; result = kOSReturnSuccess; - +#endif } else if (predicate->isEqualTo(kKextRequestPredicateGetKernelRequests)) { /* Hand the current sKernelRequests array to the caller @@ -7566,6 +7928,14 @@ OSKext::handleRequest( "Returning load requests."); result = kOSReturnSuccess; } + else { + OSKextLog(/* kext */ NULL, + kOSKextLogDebugLevel | + kOSKextLogIPCFlag, + "Received '%s' invalid request from user space.", + predicate->getCStringNoCopy()); + goto finish; + } /********** * Now we have handle the request, or not. Gather up the response & logging @@ -7621,7 +7991,7 @@ OSKext::handleRequest( /* This kmem_alloc sets the return value of the function. */ kmem_result = kmem_alloc(kernel_map, (vm_offset_t *)&buffer, - responseLength); + round_page(responseLength)); if (kmem_result != KERN_SUCCESS) { OSKextLog(/* kext */ NULL, kOSKextLogErrorLevel | @@ -7630,6 +8000,9 @@ OSKext::handleRequest( result = kmem_result; goto finish; } else { + /* 11981737 - clear uninitialized data in last page */ + bzero((void *)(buffer + responseLength), + (round_page(responseLength) - responseLength)); memcpy(buffer, response, responseLength); *responseOut = buffer; *responseLengthOut = responseLength; @@ -7844,11 +8217,11 @@ OSKext::copyInfo(OSArray * infoKeys) segp->vmsize, segp->nsects); #endif segp->vmaddr = VM_KERNEL_UNSLIDE(segp->vmaddr); - + for (secp = firstsect(segp); secp != NULL; secp = nextsect(segp, secp)) { secp->addr = VM_KERNEL_UNSLIDE(secp->addr); } - } + } lcp = (struct load_command *)((caddr_t)lcp + lcp->cmdsize); } result->setObject(kOSBundleMachOHeadersKey, headerData); @@ -10305,6 +10678,8 @@ OSKext::updateLoadedKextSummaries(void) start = (vm_map_offset_t) summaryHeader; end = start + summarySize; + result = vm_map_protect(kernel_map, start, end, VM_PROT_DEFAULT, FALSE); + if (result != KERN_SUCCESS) goto finish; } /* Populate the summary header. @@ -10331,6 +10706,8 @@ OSKext::updateLoadedKextSummaries(void) start = (vm_map_offset_t) summaryHeader; end = start + summarySize; + result = vm_map_protect(kernel_map, start, end, VM_PROT_READ, FALSE); + if (result != KERN_SUCCESS) goto finish; sPrevLoadedKextSummaries = gLoadedKextSummaries; sPrevLoadedKextSummariesAllocSize = sLoadedKextSummariesAllocSize; @@ -10386,108 +10763,6 @@ OSKext::updateLoadedKextSummary(OSKextLoadedKextSummary *summary) /********************************************************************* *********************************************************************/ -#if __i386__ -/* static */ -kern_return_t -OSKext::getKmodInfo( - kmod_info_array_t * kmodList, - mach_msg_type_number_t * kmodCount) -{ - kern_return_t result = KERN_FAILURE; - vm_offset_t data = 0; - kmod_info_t * k, * kmod_info_scan_ptr; - kmod_reference_t * r, * ref_scan_ptr; - int ref_count; - unsigned size = 0; - - *kmodList = (kmod_info_t *)0; - *kmodCount = 0; - - IORecursiveLockLock(sKextLock); - - k = kmod; - while (k) { - size += sizeof(kmod_info_t); - r = k->reference_list; - while (r) { - size +=sizeof(kmod_reference_t); - r = r->next; - } - k = k->next; - } - if (!size) { - result = KERN_SUCCESS; - goto finish; - } - - result = kmem_alloc(kernel_map, &data, size); - if (result != KERN_SUCCESS) { - goto finish; - } - - /* Copy each kmod_info struct sequentially into the data buffer. - * Set each struct's nonzero 'next' pointer back to itself as a sentinel; - * the kernel space address is used to match refs, and a zero 'next' flags - * the end of kmod_infos in the data buffer and the beginning of references. - */ - k = kmod; - kmod_info_scan_ptr = (kmod_info_t *)data; - while (k) { - *kmod_info_scan_ptr = *k; - if (k->next) { - kmod_info_scan_ptr->next = k; - } - kmod_info_scan_ptr++; - k = k->next; - } - - /* Now add references after the kmod_info structs in the same buffer. - * Update each kmod_info with the ref_count so we can associate - * references with kmod_info structs. - */ - k = kmod; - ref_scan_ptr = (kmod_reference_t *)kmod_info_scan_ptr; - kmod_info_scan_ptr = (kmod_info_t *)data; - while (k) { - r = k->reference_list; - ref_count = 0; - while (r) { - /* Note the last kmod_info in the data buffer has its next == 0. - * Since there can only be one like that, - * this case is handled by the caller. - */ - *ref_scan_ptr = *r; - ref_scan_ptr++; - r = r->next; - ref_count++; - } - /* Stuff the # of refs into the 'reference_list' field of the kmod_info - * struct for the client to interpret. - */ - kmod_info_scan_ptr->reference_list = (kmod_reference_t *)(long)ref_count; - kmod_info_scan_ptr++; - k = k->next; - } - - result = vm_map_copyin(kernel_map, data, size, TRUE, (vm_map_copy_t *)kmodList); - if (result != KERN_SUCCESS) { - goto finish; - } - - *kmodCount = size; - result = KERN_SUCCESS; - -finish: - IORecursiveLockUnlock(sKextLock); - - if (result != KERN_SUCCESS && data) { - kmem_free(kernel_map, data, size); - *kmodList = (kmod_info_t *)0; - *kmodCount = 0; - } - return result; -} -#endif /* __i386__ */ #if CONFIG_KEC_FIPS diff --git a/libkern/c++/OSSymbol.cpp b/libkern/c++/OSSymbol.cpp index d2eca1bf7..c90107c1b 100644 --- a/libkern/c++/OSSymbol.cpp +++ b/libkern/c++/OSSymbol.cpp @@ -40,7 +40,7 @@ __END_DECLS #define super OSString -typedef struct { int i, j; } OSSymbolPoolState; +typedef struct { unsigned int i, j; } OSSymbolPoolState; #if OSALLOCDEBUG extern "C" { @@ -172,6 +172,13 @@ OSSymbolPool::OSSymbolPool(const OSSymbolPool *old) OSSymbolPool::~OSSymbolPool() { if (buckets) { + Bucket *thisBucket; + for (thisBucket = &buckets[0]; thisBucket < &buckets[nBuckets]; thisBucket++) { + if (thisBucket->count > 1) { + kfree(thisBucket->symbolP, thisBucket->count * sizeof(OSSymbol *)); + ACCUMSIZE(-(thisBucket->count * sizeof(OSSymbol *))); + } + } kfree(buckets, nBuckets * sizeof(Bucket)); ACCUMSIZE(-(nBuckets * sizeof(Bucket))); } @@ -363,7 +370,7 @@ void OSSymbolPool::removeSymbol(OSSymbol *sym) if (!j) { // couldn't find the symbol; probably means string hash changed - panic("removeSymbol"); + panic("removeSymbol %s count %d ", sym->string ? sym->string : "no string", count); return; } @@ -378,7 +385,7 @@ void OSSymbolPool::removeSymbol(OSSymbol *sym) return; } // couldn't find the symbol; probably means string hash changed - panic("removeSymbol"); + panic("removeSymbol %s count %d ", sym->string ? sym->string : "no string", count); return; } @@ -405,7 +412,7 @@ void OSSymbolPool::removeSymbol(OSSymbol *sym) return; } // couldn't find the symbol; probably means string hash changed - panic("removeSymbol"); + panic("removeSymbol %s count %d ", sym->string ? sym->string : "no string", count); return; } @@ -432,7 +439,7 @@ void OSSymbolPool::removeSymbol(OSSymbol *sym) } } // couldn't find the symbol; probably means string hash changed - panic("removeSymbol"); + panic("removeSymbol %s count %d ", sym->string ? sym->string : "no string", count); } /* diff --git a/libkern/c++/OSUnserializeXML.cpp b/libkern/c++/OSUnserializeXML.cpp index 7a424634b..e5a692141 100644 --- a/libkern/c++/OSUnserializeXML.cpp +++ b/libkern/c++/OSUnserializeXML.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2009 Apple Inc. All rights reserved. + * Copyright (c) 1999-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -159,6 +159,8 @@ #include #include +#define MAX_OBJECTS 65535 + #define YYSTYPE object_t * #define YYPARSE_PARAM state #define YYLEX_PARAM (parser_state_t *)state @@ -170,7 +172,7 @@ typedef struct object { struct object *free; struct object *elements; OSObject *object; - OSString *key; // for dictionary + OSSymbol *key; // for dictionary int size; void *data; // for data char *string; // for string & symbol @@ -189,6 +191,7 @@ typedef struct parser_state { OSDictionary *tags; // used to remember "ID" tags OSString **errorString; // parse error with line OSObject *parsedObject; // resultant object of parsed text + int parsedObjectCount; } parser_state_t; #define STATE ((parser_state_t *)state) @@ -209,6 +212,7 @@ static object_t *buildDictionary(parser_state_t *state, object_t *o); static object_t *buildArray(parser_state_t *state, object_t *o); static object_t *buildSet(parser_state_t *state, object_t *o); static object_t *buildString(parser_state_t *state, object_t *o); +static object_t *buildSymbol(parser_state_t *state, object_t *o); static object_t *buildData(parser_state_t *state, object_t *o); static object_t *buildNumber(parser_state_t *state, object_t *o); static object_t *buildBoolean(parser_state_t *state, object_t *o); @@ -257,7 +261,7 @@ typedef int YYSTYPE; /* Line 216 of yacc.c. */ -#line 211 "OSUnserializeXML.tab.c" +#line 215 "OSUnserializeXML.tab.c" #ifdef short # undef short @@ -547,12 +551,12 @@ static const yytype_int8 yyrhs[] = }; /* YYRLINE[YYN] -- source line where rule number YYN was defined. */ -static const yytype_uint8 yyrline[] = +static const yytype_uint16 yyrline[] = { - 0, 145, 145, 148, 153, 158, 159, 160, 161, 162, - 163, 164, 165, 178, 181, 184, 187, 188, 193, 202, - 207, 210, 213, 216, 219, 222, 225, 228, 235, 238, - 241, 244, 247 + 0, 149, 149, 152, 157, 162, 170, 178, 186, 194, + 202, 210, 218, 237, 240, 243, 246, 247, 262, 271, + 283, 286, 289, 292, 295, 298, 301, 304, 311, 314, + 317, 320, 323 }; #endif @@ -1490,14 +1494,14 @@ yyreduce: switch (yyn) { case 2: -#line 145 "OSUnserializeXML.y" +#line 149 "OSUnserializeXML.y" { yyerror("unexpected end of buffer"); YYERROR; ;} break; case 3: -#line 148 "OSUnserializeXML.y" +#line 152 "OSUnserializeXML.y" { STATE->parsedObject = (yyvsp[(1) - (1)])->object; (yyvsp[(1) - (1)])->object = 0; freeObject(STATE, (yyvsp[(1) - (1)])); @@ -1506,49 +1510,98 @@ yyreduce: break; case 4: -#line 153 "OSUnserializeXML.y" +#line 157 "OSUnserializeXML.y" { yyerror("syntax error"); YYERROR; ;} break; case 5: -#line 158 "OSUnserializeXML.y" - { (yyval) = buildDictionary(STATE, (yyvsp[(1) - (1)])); ;} +#line 162 "OSUnserializeXML.y" + { (yyval) = buildDictionary(STATE, (yyvsp[(1) - (1)])); + + STATE->parsedObjectCount++; + if (STATE->parsedObjectCount > MAX_OBJECTS) { + yyerror("maximum object count"); + YYERROR; + } + ;} break; case 6: -#line 159 "OSUnserializeXML.y" - { (yyval) = buildArray(STATE, (yyvsp[(1) - (1)])); ;} +#line 170 "OSUnserializeXML.y" + { (yyval) = buildArray(STATE, (yyvsp[(1) - (1)])); + + STATE->parsedObjectCount++; + if (STATE->parsedObjectCount > MAX_OBJECTS) { + yyerror("maximum object count"); + YYERROR; + } + ;} break; case 7: -#line 160 "OSUnserializeXML.y" - { (yyval) = buildSet(STATE, (yyvsp[(1) - (1)])); ;} +#line 178 "OSUnserializeXML.y" + { (yyval) = buildSet(STATE, (yyvsp[(1) - (1)])); + + STATE->parsedObjectCount++; + if (STATE->parsedObjectCount > MAX_OBJECTS) { + yyerror("maximum object count"); + YYERROR; + } + ;} break; case 8: -#line 161 "OSUnserializeXML.y" - { (yyval) = buildString(STATE, (yyvsp[(1) - (1)])); ;} +#line 186 "OSUnserializeXML.y" + { (yyval) = buildString(STATE, (yyvsp[(1) - (1)])); + + STATE->parsedObjectCount++; + if (STATE->parsedObjectCount > MAX_OBJECTS) { + yyerror("maximum object count"); + YYERROR; + } + ;} break; case 9: -#line 162 "OSUnserializeXML.y" - { (yyval) = buildData(STATE, (yyvsp[(1) - (1)])); ;} +#line 194 "OSUnserializeXML.y" + { (yyval) = buildData(STATE, (yyvsp[(1) - (1)])); + + STATE->parsedObjectCount++; + if (STATE->parsedObjectCount > MAX_OBJECTS) { + yyerror("maximum object count"); + YYERROR; + } + ;} break; case 10: -#line 163 "OSUnserializeXML.y" - { (yyval) = buildNumber(STATE, (yyvsp[(1) - (1)])); ;} +#line 202 "OSUnserializeXML.y" + { (yyval) = buildNumber(STATE, (yyvsp[(1) - (1)])); + + STATE->parsedObjectCount++; + if (STATE->parsedObjectCount > MAX_OBJECTS) { + yyerror("maximum object count"); + YYERROR; + } + ;} break; case 11: -#line 164 "OSUnserializeXML.y" - { (yyval) = buildBoolean(STATE, (yyvsp[(1) - (1)])); ;} +#line 210 "OSUnserializeXML.y" + { (yyval) = buildBoolean(STATE, (yyvsp[(1) - (1)])); + + STATE->parsedObjectCount++; + if (STATE->parsedObjectCount > MAX_OBJECTS) { + yyerror("maximum object count"); + YYERROR; + } + ;} break; case 12: -#line 165 "OSUnserializeXML.y" +#line 218 "OSUnserializeXML.y" { (yyval) = retrieveObject(STATE, (yyvsp[(1) - (1)])->idref); if ((yyval)) { (yyval)->object->retain(); @@ -1557,34 +1610,50 @@ yyreduce: YYERROR; } freeObject(STATE, (yyvsp[(1) - (1)])); + + STATE->parsedObjectCount++; + if (STATE->parsedObjectCount > MAX_OBJECTS) { + yyerror("maximum object count"); + YYERROR; + } ;} break; case 13: -#line 178 "OSUnserializeXML.y" +#line 237 "OSUnserializeXML.y" { (yyval) = (yyvsp[(1) - (2)]); (yyval)->elements = NULL; ;} break; case 14: -#line 181 "OSUnserializeXML.y" +#line 240 "OSUnserializeXML.y" { (yyval) = (yyvsp[(1) - (3)]); (yyval)->elements = (yyvsp[(2) - (3)]); ;} break; case 17: -#line 188 "OSUnserializeXML.y" +#line 247 "OSUnserializeXML.y" { (yyval) = (yyvsp[(2) - (2)]); (yyval)->next = (yyvsp[(1) - (2)]); + + object_t *o; + o = (yyval)->next; + while (o) { + if (o->key == (yyval)->key) { + yyerror("duplicate dictionary key"); + YYERROR; + } + o = o->next; + } ;} break; case 18: -#line 193 "OSUnserializeXML.y" +#line 262 "OSUnserializeXML.y" { (yyval) = (yyvsp[(1) - (2)]); - (yyval)->key = (OSString *)(yyval)->object; + (yyval)->key = (OSSymbol *)(yyval)->object; (yyval)->object = (yyvsp[(2) - (2)])->object; (yyval)->next = NULL; (yyvsp[(2) - (2)])->object = 0; @@ -1593,47 +1662,54 @@ yyreduce: break; case 19: -#line 202 "OSUnserializeXML.y" - { (yyval) = buildString(STATE, (yyvsp[(1) - (1)])); ;} +#line 271 "OSUnserializeXML.y" + { (yyval) = buildSymbol(STATE, (yyvsp[(1) - (1)])); + +// STATE->parsedObjectCount++; +// if (STATE->parsedObjectCount > MAX_OBJECTS) { +// yyerror("maximum object count"); +// YYERROR; +// } + ;} break; case 20: -#line 207 "OSUnserializeXML.y" +#line 283 "OSUnserializeXML.y" { (yyval) = (yyvsp[(1) - (2)]); (yyval)->elements = NULL; ;} break; case 21: -#line 210 "OSUnserializeXML.y" +#line 286 "OSUnserializeXML.y" { (yyval) = (yyvsp[(1) - (3)]); (yyval)->elements = (yyvsp[(2) - (3)]); ;} break; case 23: -#line 216 "OSUnserializeXML.y" +#line 292 "OSUnserializeXML.y" { (yyval) = (yyvsp[(1) - (2)]); (yyval)->elements = NULL; ;} break; case 24: -#line 219 "OSUnserializeXML.y" +#line 295 "OSUnserializeXML.y" { (yyval) = (yyvsp[(1) - (3)]); (yyval)->elements = (yyvsp[(2) - (3)]); ;} break; case 26: -#line 225 "OSUnserializeXML.y" +#line 301 "OSUnserializeXML.y" { (yyval) = (yyvsp[(1) - (1)]); (yyval)->next = NULL; ;} break; case 27: -#line 228 "OSUnserializeXML.y" +#line 304 "OSUnserializeXML.y" { (yyval) = (yyvsp[(2) - (2)]); (yyval)->next = (yyvsp[(1) - (2)]); ;} @@ -1641,7 +1717,7 @@ yyreduce: /* Line 1267 of yacc.c. */ -#line 1595 "OSUnserializeXML.tab.c" +#line 1671 "OSUnserializeXML.tab.c" default: break; } YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); @@ -1855,7 +1931,7 @@ yyreturn: } -#line 250 "OSUnserializeXML.y" +#line 326 "OSUnserializeXML.y" int @@ -2656,6 +2732,21 @@ buildString(parser_state_t *state, object_t *o) return o; }; +object_t * +buildSymbol(parser_state_t *state, object_t *o) +{ + OSSymbol *symbol; + + symbol = (OSSymbol *)OSSymbol::withCString(o->string); + if (o->idref >= 0) rememberObject(state, o->idref, symbol); + + free(o->string); + o->string = 0; + o->object = symbol; + + return o; +}; + object_t * buildData(parser_state_t *state, object_t *o) { @@ -2712,6 +2803,7 @@ OSUnserializeXML(const char *buffer, OSString **errorString) state->tags = OSDictionary::withCapacity(128); state->errorString = errorString; state->parsedObject = 0; + state->parsedObjectCount = 0; (void)yyparse((void *)state); @@ -2724,6 +2816,17 @@ OSUnserializeXML(const char *buffer, OSString **errorString) return object; } +OSObject* +OSUnserializeXML(const char *buffer, size_t bufferSize, OSString **errorString) +{ + if ((!buffer) || (!bufferSize)) return 0; + + // XML must be null terminated + if (buffer[bufferSize - 1] || strnlen(buffer, bufferSize) == bufferSize) return 0; + + return OSUnserializeXML(buffer, errorString); +} + // // diff --git a/libkern/c++/OSUnserializeXML.y b/libkern/c++/OSUnserializeXML.y index abcc20484..10465a9aa 100644 --- a/libkern/c++/OSUnserializeXML.y +++ b/libkern/c++/OSUnserializeXML.y @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2009 Apple Inc. All rights reserved. + * Copyright (c) 1999-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -64,6 +64,8 @@ #include #include +#define MAX_OBJECTS 65535 + #define YYSTYPE object_t * #define YYPARSE_PARAM state #define YYLEX_PARAM (parser_state_t *)state @@ -75,7 +77,7 @@ typedef struct object { struct object *free; struct object *elements; OSObject *object; - OSString *key; // for dictionary + OSSymbol *key; // for dictionary int size; void *data; // for data char *string; // for string & symbol @@ -94,6 +96,7 @@ typedef struct parser_state { OSDictionary *tags; // used to remember "ID" tags OSString **errorString; // parse error with line OSObject *parsedObject; // resultant object of parsed text + int parsedObjectCount; } parser_state_t; #define STATE ((parser_state_t *)state) @@ -114,6 +117,7 @@ static object_t *buildDictionary(parser_state_t *state, object_t *o); static object_t *buildArray(parser_state_t *state, object_t *o); static object_t *buildSet(parser_state_t *state, object_t *o); static object_t *buildString(parser_state_t *state, object_t *o); +static object_t *buildSymbol(parser_state_t *state, object_t *o); static object_t *buildData(parser_state_t *state, object_t *o); static object_t *buildNumber(parser_state_t *state, object_t *o); static object_t *buildBoolean(parser_state_t *state, object_t *o); @@ -155,13 +159,62 @@ input: /* empty */ { yyerror("unexpected end of buffer"); } ; -object: dict { $$ = buildDictionary(STATE, $1); } - | array { $$ = buildArray(STATE, $1); } - | set { $$ = buildSet(STATE, $1); } - | string { $$ = buildString(STATE, $1); } - | data { $$ = buildData(STATE, $1); } - | number { $$ = buildNumber(STATE, $1); } - | boolean { $$ = buildBoolean(STATE, $1); } +object: dict { $$ = buildDictionary(STATE, $1); + + STATE->parsedObjectCount++; + if (STATE->parsedObjectCount > MAX_OBJECTS) { + yyerror("maximum object count"); + YYERROR; + } + } + | array { $$ = buildArray(STATE, $1); + + STATE->parsedObjectCount++; + if (STATE->parsedObjectCount > MAX_OBJECTS) { + yyerror("maximum object count"); + YYERROR; + } + } + | set { $$ = buildSet(STATE, $1); + + STATE->parsedObjectCount++; + if (STATE->parsedObjectCount > MAX_OBJECTS) { + yyerror("maximum object count"); + YYERROR; + } + } + | string { $$ = buildString(STATE, $1); + + STATE->parsedObjectCount++; + if (STATE->parsedObjectCount > MAX_OBJECTS) { + yyerror("maximum object count"); + YYERROR; + } + } + | data { $$ = buildData(STATE, $1); + + STATE->parsedObjectCount++; + if (STATE->parsedObjectCount > MAX_OBJECTS) { + yyerror("maximum object count"); + YYERROR; + } + } + | number { $$ = buildNumber(STATE, $1); + + STATE->parsedObjectCount++; + if (STATE->parsedObjectCount > MAX_OBJECTS) { + yyerror("maximum object count"); + YYERROR; + } + } + | boolean { $$ = buildBoolean(STATE, $1); + + STATE->parsedObjectCount++; + if (STATE->parsedObjectCount > MAX_OBJECTS) { + yyerror("maximum object count"); + YYERROR; + } + } | idref { $$ = retrieveObject(STATE, $1->idref); if ($$) { $$->object->retain(); @@ -170,6 +223,12 @@ object: dict { $$ = buildDictionary(STATE, $1); } YYERROR; } freeObject(STATE, $1); + + STATE->parsedObjectCount++; + if (STATE->parsedObjectCount > MAX_OBJECTS) { + yyerror("maximum object count"); + YYERROR; + } } ; @@ -187,11 +246,21 @@ dict: '{' '}' { $$ = $1; pairs: pair | pairs pair { $$ = $2; $$->next = $1; + + object_t *o; + o = $$->next; + while (o) { + if (o->key == $$->key) { + yyerror("duplicate dictionary key"); + YYERROR; + } + o = o->next; + } } ; pair: key object { $$ = $1; - $$->key = (OSString *)$$->object; + $$->key = (OSSymbol *)$$->object; $$->object = $2->object; $$->next = NULL; $2->object = 0; @@ -199,7 +268,14 @@ pair: key object { $$ = $1; } ; -key: KEY { $$ = buildString(STATE, $1); } +key: KEY { $$ = buildSymbol(STATE, $1); + +// STATE->parsedObjectCount++; +// if (STATE->parsedObjectCount > MAX_OBJECTS) { +// yyerror("maximum object count"); +// YYERROR; +// } + } ; //------------------------------------------------------------------------------ @@ -1047,6 +1123,21 @@ buildString(parser_state_t *state, object_t *o) return o; }; +object_t * +buildSymbol(parser_state_t *state, object_t *o) +{ + OSSymbol *symbol; + + symbol = (OSSymbol *)OSSymbol::withCString(o->string); + if (o->idref >= 0) rememberObject(state, o->idref, symbol); + + free(o->string); + o->string = 0; + o->object = symbol; + + return o; +}; + object_t * buildData(parser_state_t *state, object_t *o) { @@ -1103,6 +1194,7 @@ OSUnserializeXML(const char *buffer, OSString **errorString) state->tags = OSDictionary::withCapacity(128); state->errorString = errorString; state->parsedObject = 0; + state->parsedObjectCount = 0; (void)yyparse((void *)state); @@ -1115,6 +1207,17 @@ OSUnserializeXML(const char *buffer, OSString **errorString) return object; } +OSObject* +OSUnserializeXML(const char *buffer, size_t bufferSize, OSString **errorString) +{ + if ((!buffer) || (!bufferSize)) return 0; + + // XML must be null terminated + if (buffer[bufferSize - 1] || strnlen(buffer, bufferSize) == bufferSize) return 0; + + return OSUnserializeXML(buffer, errorString); +} + // // diff --git a/libkern/conf/MASTER b/libkern/conf/MASTER index 1e25a04c3..edcb732a6 100644 --- a/libkern/conf/MASTER +++ b/libkern/conf/MASTER @@ -81,11 +81,6 @@ options CONFIG_KEC_FIPS # Kernel External Components for FIPS compliance (KEC_ # libkern/kmod/cplus_{start.c,stop.c}. options CONFIG_STATIC_CPPINIT # Static library initializes kext cpp runtime # -# configurable kernel - general switch to say we are building for an -# embedded device -# -options CONFIG_EMBEDDED # - # secure_kernel - secure kernel from user programs options SECURE_KERNEL # diff --git a/libkern/conf/MASTER.i386 b/libkern/conf/MASTER.i386 deleted file mode 100644 index fab8b50c8..000000000 --- a/libkern/conf/MASTER.i386 +++ /dev/null @@ -1,15 +0,0 @@ -###################################################################### -# -# RELEASE = [ intel mach libkerncpp hibernation networking config_dtrace crypto allcrypto zlib config_kxld config_static_cppinit iokitstats vm_pressure_events ] -# PROFILE = [ RELEASE profile ] -# DEBUG = [ RELEASE debug ] -# -# EMBEDDED = [ intel mach libkerncpp networking crypto zlib ] -# DEVELOPMENT = [ EMBEDDED config_dtrace ] -# -###################################################################### - -machine "i386" # -cpu "i386" # - -options NO_NESTED_PMAP # diff --git a/libkern/conf/MASTER.x86_64 b/libkern/conf/MASTER.x86_64 index 311403c6f..0813228a0 100644 --- a/libkern/conf/MASTER.x86_64 +++ b/libkern/conf/MASTER.x86_64 @@ -13,3 +13,5 @@ machine "x86_64" # cpu "x86_64" # options NO_NESTED_PMAP # + +options CONFIG_MACF # Mandatory Access Control Framework diff --git a/libkern/conf/Makefile b/libkern/conf/Makefile index 65190ee61..25a42ef5e 100644 --- a/libkern/conf/Makefile +++ b/libkern/conf/Makefile @@ -3,56 +3,37 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - include $(MakeInc_cmd) include $(MakeInc_def) -SETUP_SUBDIRS = - -COMP_SUBDIRS = - -INST_SUBDIRS = - -ifndef LIBKERN_KERNEL_CONFIG -export LIBKERN_KERNEL_CONFIG = $(KERNEL_CONFIG) -endif +MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) -ifneq ($(MACHINE_CONFIG), DEFAULT) -export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT) -else -export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT) -endif - -MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(ARCH_CONFIG_LC).$(MACHINE_CONFIG_LC) - -$(COMPOBJROOT)/$(LIBKERN_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ - $(SOURCE)/MASTER.$(ARCH_CONFIG_LC) \ - $(SOURCE)/Makefile.template \ - $(SOURCE)/Makefile.$(ARCH_CONFIG_LC) \ - $(SOURCE)/files \ - $(SOURCE)/files.$(ARCH_CONFIG_LC) - $(_v)(doconf_target=$(addsuffix /conf, $(TARGET)); \ - $(MKDIR) $${doconf_target}; \ - cd $${doconf_target}; \ +$(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ + $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC) \ + $(SOURCE)/Makefile.template \ + $(SOURCE)/Makefile.$(CURRENT_ARCH_CONFIG_LC) \ + $(SOURCE)/files \ + $(SOURCE)/files.$(CURRENT_ARCH_CONFIG_LC) + $(_v)$(MKDIR) $(addsuffix /conf, $(TARGET)); \ + cd $(addsuffix /conf, $(TARGET)); \ rm -f $(notdir $?); \ - cp $? $${doconf_target}; \ - if [ -f $(MASTER_CPU_PER_SOC) ]; then cp $(MASTER_CPU_PER_SOC) $${doconf_target}; fi; \ - $(SRCROOT)/SETUP/config/doconf -c -cpu $(ARCH_CONFIG_LC) -soc $(MACHINE_CONFIG_LC) -d $(TARGET)/$(LIBKERN_KERNEL_CONFIG) $(LIBKERN_KERNEL_CONFIG); \ - ); - -do_all: $(COMPOBJROOT)/$(LIBKERN_KERNEL_CONFIG)/Makefile - $(_v)next_source=$(subst conf/,,$(SOURCE)); \ - next_relsource=$(subst conf/,,$(RELATIVE_SOURCE_PATH)); \ - ${MAKE} -C $(COMPOBJROOT)/$(LIBKERN_KERNEL_CONFIG) \ - MAKEFILES=$(TARGET)/$(LIBKERN_KERNEL_CONFIG)/Makefile \ - SOURCE=$${next_source} \ - RELATIVE_SOURCE_PATH=$${next_relsource} \ - TARGET=$(TARGET) \ - INCL_MAKEDEP=FALSE \ - KERNEL_CONFIG=$(LIBKERN_KERNEL_CONFIG) \ + cp $? .; \ + if [ $(MASTER_CPU_PER_SOC) -nt $@ ]; then cp $(MASTER_CPU_PER_SOC) .; fi; \ + $(SRCROOT)/SETUP/config/doconf -c -cpu $(CURRENT_ARCH_CONFIG_LC) -soc $(CURRENT_MACHINE_CONFIG_LC) -d $(TARGET)/$(CURRENT_KERNEL_CONFIG) $(CURRENT_KERNEL_CONFIG); + +do_all: $(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile + $(_v)${MAKE} \ + -C $(TARGET)/$(CURRENT_KERNEL_CONFIG) \ + -f $(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile \ + CURRENT_KERNEL_CONFIG=${CURRENT_KERNEL_CONFIG} \ + CURRENT_ARCH_CONFIG=${CURRENT_ARCH_CONFIG} \ + CURRENT_MACHINE_CONFIG=${CURRENT_MACHINE_CONFIG} \ + SOURCE=$(subst conf/,,$(SOURCE)) \ + TARGET=${TARGET} \ + OBJPATH=${OBJPATH} \ build_all; -do_build_all: do_all +do_build_all:: do_all include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/libkern/conf/Makefile.i386 b/libkern/conf/Makefile.i386 deleted file mode 100644 index d75614478..000000000 --- a/libkern/conf/Makefile.i386 +++ /dev/null @@ -1,17 +0,0 @@ -###################################################################### -#BEGIN Machine dependent Makefile fragment for i386 -###################################################################### - -# sha1 Files to build with -DSHA1_USE_ASSEMBLY=1 -sha1.o_CFLAGS_ADD += -DSHA1_USE_ASSEMBLY=1 - -# Files that must go in the __HIB segment: -UNCONFIGURED_HIB_FILES= \ - WKdmDecompress.o - -HIB_FILES=$(filter $(UNCONFIGURED_HIB_FILES),$(OBJS)) - - -###################################################################### -#END Machine dependent Makefile fragment for i386 -###################################################################### diff --git a/libkern/conf/Makefile.template b/libkern/conf/Makefile.template index 7d1848535..a9b7a9af7 100644 --- a/libkern/conf/Makefile.template +++ b/libkern/conf/Makefile.template @@ -5,16 +5,6 @@ # the terms and conditions for use and redistribution. # -# -# Export IDENT for sub-makefiles -# -export IDENT - -# -# INCFLAGS -# -INCFLAGS_MAKEFILE= - export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule @@ -26,8 +16,15 @@ include $(MakeInc_def) # # CFLAGS # -CFLAGS+= -include meta_features.h -DLIBKERN_KERNEL_PRIVATE -DOSALLOCDEBUG=1 \ - $(CFLAGS_INLINE_CONFIG) +CFLAGS+= -include meta_features.h -DLIBKERN_KERNEL_PRIVATE -DOSALLOCDEBUG=1 + +# Objects that don't want -Wcast-align warning (8474835) +OSKextLib.cpo_CXXWARNFLAGS_ADD = -Wno-cast-align +OSKext.cpo_CXXWARNFLAGS_ADD = -Wno-cast-align +OSMetaClass.cpo_CXXWARNFLAGS_ADD = -Wno-cast-align +OSUnserialize.cpo_CXXWARNFLAGS_ADD = -Wno-cast-align +corecrypto_md5.o_CWARNFLAGS_ADD = -Wno-cast-align +corecrypto_sha1.o_CWARNFLAGS_ADD = -Wno-cast-align # zlib is 3rd party source compress.o_CWARNFLAGS_ADD = -Wno-cast-qual @@ -67,15 +64,26 @@ COMP_SUBDIRS = %CFILES -%MFILES - %SFILES -%BFILES - -%ORDERED %MACHDEP +# +# Machine-independent per-file flags +# + +# zlib is 3rd party source +compress.o_CWARNFLAGS_ADD = -Wno-cast-qual +deflate.o_CWARNFLAGS_ADD = -Wno-cast-qual +infback.o_CWARNFLAGS_ADD = -Wno-cast-qual +inffast.o_CWARNFLAGS_ADD = -Wno-cast-qual +inflate.o_CWARNFLAGS_ADD = -Wno-cast-qual +trees.o_CWARNFLAGS_ADD = -Wno-cast-qual +uncompr.o_CWARNFLAGS_ADD = -Wno-cast-qual + +# warnings in bison-generated code +OSUnserializeXML.cpo_CXXWARNFLAGS_ADD = -Wno-uninitialized + # # OBJSDEPS is the set of files (defined in the machine dependent # template if necessary) which all objects depend on (such as an @@ -83,30 +91,22 @@ COMP_SUBDIRS = # ${OBJS}: ${OBJSDEPS} - -%LOAD - LDOBJS = $(OBJS) $(COMPONENT).filelist: $(LDOBJS) - $(_v)if [ $(BUILD_MACHO_OBJ) -eq 1 ]; then \ - for hib_file in ${HIB_FILES}; \ + $(_v)for hib_file in ${HIB_FILES}; \ do \ - $(SEG_HACK) __HIB $${hib_file} -o $${hib_file}__; \ + $(SEG_HACK) -n __HIB -o $${hib_file}__ $${hib_file} ; \ mv $${hib_file}__ $${hib_file} ; \ - done; \ - fi + done @echo LDFILELIST $(COMPONENT) $(_v)( for obj in ${LDOBJS}; do \ - echo $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$${obj}; \ + echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \ done; ) > $(COMPONENT).filelist do_all: $(COMPONENT).filelist -do_depend: do_all - $(_v)${MD} -u Makedep -f -d `ls *.d` - -do_build_all: do_depend +do_build_all:: do_all %RULES diff --git a/libkern/conf/Makefile.x86_64 b/libkern/conf/Makefile.x86_64 index 719fd1d29..7b0de925d 100644 --- a/libkern/conf/Makefile.x86_64 +++ b/libkern/conf/Makefile.x86_64 @@ -2,15 +2,6 @@ #BEGIN Machine dependent Makefile fragment for x86_64 ###################################################################### -# sha1 Files to build with -DSHA1_USE_ASSEMBLY=1 -sha1.o_CFLAGS_ADD += -DSHA1_USE_ASSEMBLY=1 - -# Files that must go in the __HIB segment: -UNCONFIGURED_HIB_FILES= \ - WKdmDecompress.o - -HIB_FILES=$(filter $(UNCONFIGURED_HIB_FILES),$(OBJS)) - ###################################################################### #END Machine dependent Makefile fragment for x86_64 ###################################################################### diff --git a/libkern/conf/files b/libkern/conf/files index 87c3c331d..3eaf35007 100644 --- a/libkern/conf/files +++ b/libkern/conf/files @@ -41,6 +41,11 @@ libkern/OSKextLib.cpp standard libkerncpp libkern/mkext.c standard libkern/OSKextVersion.c standard +libkern/net/inet_aton.c standard +libkern/net/inet_ntoa.c standard +libkern/net/inet_ntop.c standard +libkern/net/inet_pton.c standard + libkern/stdio/scanf.c standard libkern/uuid/uuid.c standard @@ -61,7 +66,7 @@ libkern/zlib/uncompr.c optional zlib libkern/zlib/zutil.c optional zlib libkern/crypto/register_crypto.c optional crypto -libkern/crypto/corecrypto_sha2.c optional crypto allcrypto +libkern/crypto/corecrypto_sha2.c optional crypto allcrypto libkern/crypto/corecrypto_sha1.c optional crypto libkern/crypto/corecrypto_sha1.c optional ipsec libkern/crypto/corecrypto_md5.c optional crypto diff --git a/libkern/conf/files.i386 b/libkern/conf/files.i386 deleted file mode 100644 index 2982431f7..000000000 --- a/libkern/conf/files.i386 +++ /dev/null @@ -1,7 +0,0 @@ -libkern/i386/OSAtomic.s standard -libkern/zlib/intel/inffastS.s optional zlib -libkern/zlib/intel/adler32vec.s optional zlib - -# Optimized WKdm compressor -libkern/kxld/i386/WKdmCompress.s optional hibernation -libkern/kxld/i386/WKdmDecompress.s optional hibernation diff --git a/libkern/conf/files.x86_64 b/libkern/conf/files.x86_64 index b1f7e44fa..51d4530fe 100644 --- a/libkern/conf/files.x86_64 +++ b/libkern/conf/files.x86_64 @@ -1,7 +1,2 @@ libkern/x86_64/OSAtomic.s standard -libkern/zlib/intel/inffastS.s optional zlib -libkern/zlib/intel/adler32vec.s optional zlib -# Optimized WKdm compressor -libkern/kxld/i386/WKdmCompress.s optional hibernation -libkern/kxld/i386/WKdmDecompress.s optional hibernation diff --git a/libkern/crypto/corecrypto_aes.c b/libkern/crypto/corecrypto_aes.c index 161715af1..f70fc0cb8 100644 --- a/libkern/crypto/corecrypto_aes.c +++ b/libkern/crypto/corecrypto_aes.c @@ -57,11 +57,13 @@ aes_rval aes_encrypt_cbc(const unsigned char *in_blk, const unsigned char *in_iv return aes_good; } +#if defined (__i386__) || defined (__x86_64__) /* This does one block of ECB, using the CBC implementation - this allow to use the same context for both CBC and ECB */ aes_rval aes_encrypt(const unsigned char *in_blk, unsigned char *out_blk, aes_encrypt_ctx cx[1]) { return aes_encrypt_cbc(in_blk, NULL, 1, out_blk, cx); } +#endif aes_rval aes_decrypt_key(const unsigned char *key, int key_len, aes_decrypt_ctx cx[1]) { @@ -88,11 +90,13 @@ aes_rval aes_decrypt_cbc(const unsigned char *in_blk, const unsigned char *in_iv return aes_good; } +#if defined (__i386__) || defined (__x86_64__) /* This does one block of ECB, using the CBC implementation - this allow to use the same context for both CBC and ECB */ aes_rval aes_decrypt(const unsigned char *in_blk, unsigned char *out_blk, aes_decrypt_ctx cx[1]) { return aes_decrypt_cbc(in_blk, NULL, 1, out_blk, cx); } +#endif aes_rval aes_encrypt_key128(const unsigned char *key, aes_encrypt_ctx cx[1]) { diff --git a/libkern/crypto/intel/sha1edp.h b/libkern/crypto/intel/sha1edp.h deleted file mode 100644 index ba90122fd..000000000 --- a/libkern/crypto/intel/sha1edp.h +++ /dev/null @@ -1,51 +0,0 @@ -#if !defined sha1edp_h -#define sha1edp_h - - -/* This file is included in sha1edpLittleEndian.s and sha1edpBigEndian.s to - define the symbols below for use in assembly code. - - It is also included in sha1_locl.h and compiled in C to test that the - hard-coded values here match the values used in C. CC_SHA1_BLOCK_BYTES - is defined in another header, so an error will be generated if its - definition here conflicts. The other symbols are tested below, with - the CheckAssertion definition. -*/ - - -// Number of bytes in a SHA-1 block. -#define CC_SHA1_BLOCK_BYTES 64 - -// Offset of h0 to h4 members in SHA-1 context structure. -#define Contexth0 (0*4) -#define Contexth1 (1*4) -#define Contexth2 (2*4) -#define Contexth3 (3*4) -#define Contexth4 (4*4) - - -#if !defined __ASSEMBLER__ - - #include // Get offsetof macro. - - /* Declare CheckAssertion so that if any of the declarations below - differ from it, the compiler will report an error. - */ - extern char CheckAssertion[1]; - - /* Ensure that Contexth0 through Contexth4 are the byte offsets of the - h0 through h4 members of the SHA-1 context structure. - */ - extern char CheckAssertion[Contexth0 == offsetof(SHA_CTX, h0)]; - extern char CheckAssertion[Contexth1 == offsetof(SHA_CTX, h1)]; - extern char CheckAssertion[Contexth2 == offsetof(SHA_CTX, h2)]; - extern char CheckAssertion[Contexth3 == offsetof(SHA_CTX, h3)]; - extern char CheckAssertion[Contexth4 == offsetof(SHA_CTX, h4)]; - /* If these assertions fail, change the definitions of Contexth0 to - Contexth4 to match the offsets of the members. - */ - -#endif // !defined __ASSEMBLER__ - - -#endif // !defined sha1edp_h diff --git a/libkern/crypto/intel/sha1edp.s b/libkern/crypto/intel/sha1edp.s deleted file mode 100644 index 8c52a5e7b..000000000 --- a/libkern/crypto/intel/sha1edp.s +++ /dev/null @@ -1,1495 +0,0 @@ -/* sha1edp.s : this file provides optimized x86_64 and i386 implementation of the sha1 function - CoreOS - vector and numerics group - cclee 6-21-10 - - The implementation is based on the principle described in an Intel online article - "Improving the Performance of the Secure Hash Algorithm (SHA-1)" - http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1/ - - - Update HASH[] by processing a one 64-byte block in MESSAGE[] can be represented by the following C function - -void SHA1( int HASH[], int MESSAGE[] ) -{ - int A[81], B[81], C[81], D[81], E[81]; - int W[80]; - - int i, FN; - - A[0] = HASH[0]; - B[0] = HASH[1]; - C[0] = HASH[2]; - D[0] = HASH[3]; - E[0] = HASH[4]; - - for ( i=0; i<80; ++i ) - { - if ( i < 16 ) - W[i] = BIG_ENDIAN_LOAD( MESSAGE[i] ); - else - W[i] = ROTATE_LEFT( W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16], 1 ); - - FN = F( i, B[i], C[i], D[i] ); - - A[i+1] = FN + E[i] + ROTATE_LEFT( A[i], 5 ) + W[i] + K(i); - B[i+1] = A[i]; - C[i+1] = ROTATE_LEFT( B[i], 30 ); - D[i+1] = C[i]; - E[i+1] = D[i]; - } - - HASH[0] += A[80]; - HASH[1] += B[80]; - HASH[2] += C[80]; - HASH[3] += D[80]; - HASH[4] += E[80]; -} - - For i=0:15, W[i] is simply big-endian loading of MESSAGE[i]. For i=16:79, W[i] is updated according to W[i] = ROTATE_LEFT( W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16], 1 ); - - The approach (by Dean Gaudet) can be used to vectorize the computation of W[i] for i=16:79, - - 1. done on 4 consequtive W[i] values in a single XMM register - W[i ] = (W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16]) rol 1 - W[i+1] = (W[i-2] ^ W[i-7] ^ W[i-13] ^ W[i-15]) rol 1 - W[i+2] = (W[i-1] ^ W[i-6] ^ W[i-12] ^ W[i-14]) rol 1 - W[i+3] = ( 0 ^ W[i-5] ^ W[i-11] ^ W[i-13]) rol 1 - - 2. this additional calculation unfortunately requires many additional operations - W[i+3] ^= W[i] rol 1 - - 3. once we have 4 W[i] values in XMM we can also add four K values with one instruction - W[i:i+3] += {K,K,K,K} - - Let W0 = {W[i] W[i+1] W[i+2] W[i+3]} be the current W-vector to be computed, W4 = {W[i-4] W[i-3] W[i-2] W[i-1]} be the previous vector, and so on - The Dean Gaudet approach can be expressed as - - 1. W0 = rotate_left(left_shift(W4,32) ^ W8 ^ left_shift(concatenate(W16,W12),64) ^ W16,1); - 2. W[i+3] ^= W[i] rol 1 - 3. W0 += {K,K,K,K} - - For i>=32, the Intel online article suggests that (using a basic identity (X rol 1) rol 1 = X rol 2) the update equation is equivalent to - - 1. W0 = rotate_left(left_shift(concatenate(W8,W4),64) ^ W16 ^ W28 ^ W32, 2); - - Note: - 1. In total, we need 8 16-byte registers or memory for W0,W4,...,W28. W0 and W32 can be the same register or memory. - 2. The registers are used in a circular buffering mode. For example, we start with W28,W24,...,W0 (with W0 indicating the most recent 16-byte) - i=0, W28,W24,...,W0 - i=4, W24,W20,...,W28 - i=8, W20,W16,...,W24 - . - . - and so forth. - 3. 2 ssse3 instructions are used in the Intel article, pshufb and palignr. - a. pshufb is used to simplify the BIG_ENDIAN_LOAD operation - b. palignr is used to simplify the computation of left_shift(concatenate(W12,W8),64) - 4. we probe __cpu_capabilities to detect ssse3 support and dispatch code with ssse3 support when available. - If ssse3 is not supported, a suboptimal code (pshufb and palignr workaround) is dispatched. - -*/ - -/* the code can be compiled into single block (64 bytes) per call mode by setting Multiple_blocks to 0 */ -#define Multiple_Blocks 1 - -#if defined (__x86_64__) || defined(__i386__) // x86_64 or i386 architectures - -#if defined(__x86_64__) - - // set up for x86_64 -#define stack_size (8+16*11+16*4) // 8 (alignedment) + x0-x10 + 4 128-bits for intermediate WK(t) storage -#define sp %rsp // unifying architectural stack pointer representation -#define ctx %rdi // 1st input argument, will move to HASH_PTR (%r9) -#define buf %rsi // 2nd input argument, will move to BUFFER_PTR (%r10) -#define cnt %r11 // will copy from the 3rd input argument (%rdx) -#define K_BASE %r8 // an aligned pointer to point to shufb reference numbers of table of K values -#define HASH_PTR %r9 // pointer to Hash values (A,B,C,D,E) -#define BUFFER_PTR %r10 // pointer to input blocks - -#else // !__x86_64__ - - // set up for i386 -#define stack_size (12+16*2+16*11+16*4) // 12-bytes (alignment) + extra 2 + 3 (W24/W28/XMM_SHUFB_BSWAP) + 8 (xmm0-xmm7) + 4 (WK(t)) -#define sp %esp // unifying architectural stack pointer representation -#define HASH_PTR stack_size+16+4(sp) // use 1st input argument from caller function, 16 for (esi/edi/ebx/ebp) -#define BUFFER_PTR stack_size+16+8(sp) // use 2nd input argument from caller function -#define cnt stack_size+16+12(sp) // use 3rd input argument from caller function -#define K_BASE stack_size-4(sp) // use for K_BASE - -#endif // __x86_64__ - -// symbolizing registers or stack memory with algorithmic variables W0,W4,...,W28 + W_TMP, W_TMP2, and XMM_SHUFB_BSWAP for code with ssse3 support - -#define W_TMP %xmm0 -#define W_TMP2 %xmm1 -#define W0 %xmm2 -#define W4 %xmm3 -#define W8 %xmm4 -#define W12 %xmm5 -#define W16 %xmm6 -#define W20 %xmm7 -#if defined(__x86_64__) -#define W24 %xmm8 -#define W28 %xmm9 -#define XMM_SHUFB_BSWAP %xmm10 // used only when ssse3 is supported -#else // defined (__i386__) -#define W24 12*16(sp) -#define W28 13*16(sp) -#define XMM_SHUFB_BSWAP 14*16(sp) // used only when ssse3 is supported -#endif - -#define xmov movaps // aligned 16-byte move -#define xmovu movups // unaligned 16-byte move - -// intermediate hash variables -#define A %ecx -#define B %esi -#define C %edi -#define D %ebp -#define E %edx - -// temp variables -#define T1 %eax -#define T2 %ebx - -#define WK(t) (t&15)*4(sp) - - // int F1(int B, int C, int D) { return (D ^ ( B & (C ^ D)); } - // result in T1 - .macro F1 - mov $1, T1 - xor $2, T1 - and $0, T1 - xor $2, T1 - .endm - - // int F2(int B, int C, int D) { return (D ^ B ^ C); } - // result in T1 - .macro F2 - mov $2, T1 - xor $1, T1 - xor $0, T1 - .endm - - // int F3(int B, int C, int D) { return (B & C) | (D & (B ^ C)); } - // result in T1 - .macro F3 - mov $1, T1 - mov $0, T2 - or $0, T1 - and $1, T2 - and $2, T1 - or T2, T1 - .endm - - // for i=60:79, F4 is identical to F2 - #define F4 F2 - - - /* - i=0:15, W[i] = BIG_ENDIAN_LOAD(MESSAGE[i]); - - with ssse3 support, this is achived via - for (i=0;i<16;i+=4) { - 1. W_TMP = new 16 bytes from MESSAGE[] - 2. W_TMP = pshufb(W_TMP, XMM_SHUFB_BSWAP); save to W circular buffer for updating W - 3. WTMP += {K,K,K,K}; - 4. save quadruple W[i]+K[i] = W_TMP in the stack memory; - } - - each step is represented in one of the following 4 macro definitions - - */ - - .macro W_PRECALC_00_15_0_ssse3 // input argument $0 : 0/4/8/12 -#if defined (__x86_64__) // BUFFER_PTR is already an address register in x86_64 - xmovu $0*4(BUFFER_PTR), W_TMP // read 16-bytes into W_TMP, BUFFER_PTR possibly not 16-byte aligned -#else // BUFFER_PTR is from the argument set up in the caller - mov BUFFER_PTR, T1 // T1 = BUFFER_PTR - xmovu $0*4(T1), W_TMP // read 16-bytes into W_TMP, BUFFER_PTR possibly not 16-byte aligned -#endif - .endm - - .macro W_PRECALC_00_15_1_ssse3 // input argument $0 : current 16-bytes in the circular buffer, one of W0,W4,W8,...,W28 - pshufb XMM_SHUFB_BSWAP, W_TMP // convert W_TMP from little-endian into big-endian - xmov W_TMP, $0 // save W_TMP in the circular buffer - .endm - - .macro W_PRECALC_00_15_2 // K_BASE points to the current K quadruple. -#if defined (__x86_64__) // K_BASE is already an address register in x86_64 - paddd (K_BASE), W_TMP // W_TMP += {K,K,K,K}; -#else // K_BASE is previously set up in the stack memory - mov K_BASE, T1 // T1 = K_BASE - paddd (T1), W_TMP // W_TMP += {K,K,K,K}; -#endif - .endm - - .macro W_PRECALC_00_15_3 - xmov W_TMP, WK($0&~3) // save quadruple W[i]+K in the stack memory, which would be used later for updating the hashes A/B/C/D/E - .endm - - /* - without ssse3 support, steps 1 and 2 need to be modified - 1. sequentially load 4 words into T1, bswap T1, and save it to 4-bytes in the stack space - 2. load the 16-bytes from the aligned stack memory into W_TMP - */ - - .macro W_PRECALC_00_15_0_nossse3 // input argument $0 : 0/4/8/12 - -#if defined (__x86_64__) - #define BUFFERP BUFFER_PTR -#else - mov BUFFER_PTR, T2 // copy BUFFER_PTR (from caller 2nd argument) to T2 - #define BUFFERP T2 -#endif - - // load 1st word, bswap it, save it to stack - mov $0*4(BUFFERP), T1 - bswap T1 - mov T1, 14*16(sp) - - // load 2nd word, bswap it, save it to stack - mov 4+$0*4(BUFFERP), T1 - bswap T1 - mov T1, 4+14*16(sp) - - // load 3rd word, bswap it, save it to stack - mov 8+$0*4(BUFFERP), T1 - bswap T1 - mov T1, 8+14*16(sp) - - // load 4th word, bswap it, save it to stack - mov 12+$0*4(BUFFERP), T1 - bswap T1 - mov T1, 12+14*16(sp) - .endm - - .macro W_PRECALC_00_15_1_nossse3 // input argument $0 : current 16-bytes in the circular buffer, one of W0,W4,W8,...,W28 - xmov 14*16(sp), W_TMP // load the bswapped 16-bytes from the aligned stack memory - xmov W_TMP, $0 // save W = W_TMP in the circular buffer - .endm - - // rounds 16-31 compute W[0] using the vectorization approach by Dean Gaudet - /* - W[i ] = (W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16]) rol 1 - W[i+1] = (W[i-2] ^ W[i-7] ^ W[i-13] ^ W[i-15]) rol 1 - W[i+2] = (W[i-1] ^ W[i-6] ^ W[i-12] ^ W[i-14]) rol 1 - W[i+3] = ( 0 ^ W[i-5] ^ W[i-11] ^ W[i-13]) rol 1 - - W[i+3] ^= W[i] rol 1; // this W[i] is already rol by 1, if we are taking from the intial W before rol 1, we should rol this by 2 - - The operation (updating W and W+K) is scheduled as and divided into 4 steps - - 0. W_tmp = W3; W = W14 ^ W8 - 1. W = W3 ^ W8 ^ W14 ^ W16; W_TMP = W; W_TMP2 = (W[i] 0 0 0); - 2. W_TMP = (W3 ^ W8 ^ W14 ^ W16) rol 1; split (W[i] 0 0 0) rol 2 in W_TMP2 and W - 3. W = W_TMP = W_TMP ^ W_TMP2 ^ W = (W3 ^ W8 ^ W14 ^ W16) rol 1 ^ (W[i] 0 0 0) rol 2; WK = W _TMP+K; - - */ - - .macro W_PRECALC_16_31_0_ssse3 // input arguments : W16,W12,W8,W4,W - xmov $1, $4 // W = W12 - palignr $$8, $0, $4 // W = W14 - xmov $3, W_TMP // W_TMP = W4 - psrldq $$4, W_TMP // W_TMP = W3 - pxor $2, $4 // W = W8 ^ W14 - .endm - - .macro W_PRECALC_16_31_1 // input arguments : W16,W - pxor $0, W_TMP // W_TMP = W3 ^ W16 - pxor W_TMP, $1 // W = W3 ^ W16 ^ W8 ^ W14 - xmov $1, W_TMP2 // W_TMP2 = W3 ^ W16 ^ W8 ^ W14 - xmov $1, W_TMP // W_TMP = W3 ^ W16 ^ W8 ^ W14 - pslldq $$12, W_TMP2 // W_TMP2 = (W[i] 0 0 0) - .endm - - .macro W_PRECALC_16_31_2 // input argument : W - psrld $$31, $0 // (W3 ^ W16 ^ W8 ^ W14)>>31 - pslld $$1, W_TMP // (W3 ^ W16 ^ W8 ^ W14)<<1 - por $0, W_TMP // W_TMP = (W3 ^ W16 ^ W8 ^ W14) rol 1 - xmov W_TMP2, $0 // copy W[i] at location of W[i+3] - psrld $$30, W_TMP2 // W_TMP2 = W[i] lower 2 bits after rol 2 - pslld $$2, $0 // W = W[i] higher 30 bits after rol 2 - .endm - - .macro W_PRECALC_16_31_3 // input arguments: W, i, K_XMM -#if defined (__i386__) - mov K_BASE, T1 // K_BASE is store in the stack memory for i386 -#endif - pxor $0, W_TMP - pxor W_TMP2, W_TMP // W_TMP = (W3 ^ W16 ^ W8 ^ W14) rol 1 ^ (W[i] 0 0 0) rol 2 - xmov W_TMP, $0 // save W = W_TMP in the W circular buffer -#if defined (__x86_64__) - paddd $2(K_BASE), W_TMP // W+K -#else - paddd $2(T1), W_TMP // W+K -#endif - xmov W_TMP, WK($1&~3) // save WK = W+K for later update of the hashes A/B/C/D/E - .endm - - // the following is a variant of W_PRECALC_16_31_0_ssse3 to be used for system without ssse3, palignr is replaced with 4 instructions - - .macro W_PRECALC_16_31_0_nossse3 // input arguments : W16,W12,W8,W4,W - xmov $1, $4 // W = W12 = (w9 w10 w11 w12) - - // the following is a wrokaround for palignr - xmov $0, W_TMP // W16 = (w13 w14 w15 w16) - pslldq $$8, $4 // shift left to make (w11 w12 0 0) - psrldq $$8, W_TMP // shift right to make (0 0 w13 w14) - por W_TMP, $4 // W = W14 = (w11 w12 w13 w14) - - xmov $3, W_TMP // W_TMP = W4 = (w1 w2 w3 w4) - psrldq $$4, W_TMP // W_TMP = W3 = (0 w1 w2 w3) - pxor $2, $4 // W = W8 ^ W14 - .endm - - /* rounds 32-79 compute W und W+K iusing the vectorization approach from the Intel article - - W = rotate_left(left_shift(concatenate(W8,W4),64) ^ W16 ^ W28 ^ W32, 2); - - where left_shift(concatenate(W8,W4),64) is equivalent to W6. Note also that W32 and W use the same register. - - - 0. W_tmp = W6; W = W28 ^ W32; - 1. W = W_tmp = W6 ^ W16 ^ W28 ^ W32; - 2. W_tmp = (W6 ^ W16 ^ W28 ^ W32) rol 2; - 3. W = W_Tmp; WK = W_tmp + K; - - */ - - - .macro W_PRECALC_32_79_0_ssse3 // inputr arguments : W28,W8,W4,W - xmov $2, W_TMP // (w1 w2 w3 w4) - pxor $0, $3 // W = W28 ^ W32; - palignr $$8, $1, W_TMP // W_tmp = (w3 w4 w5 w6) = W6; - .endm - - // the following is a variant and will be used for system without ssse3 support - .macro W_PRECALC_32_79_0_nossse3 // input arguments : W28,W8,W4,W - xmov $2, W_TMP // (w1 w2 w3 w4) - xmov $1, W_TMP2 // (w5 w6 w7 w8) - pxor $0, $3 // W = W28 ^ W32 - pslldq $$8, W_TMP // (w3 w4 0 0) - psrldq $$8, W_TMP2 // (0 0 w5 w6) - por W_TMP2, W_TMP // W_tmp = (w3 w4 w5 w6) = W6 - .endm - - // this is a variant of W_PRECALC_32_79_0_ssse3 for i386 (as W24/W28 are stored in memory, not in registers) - .macro W_PRECALC_32_79_0_i386_ssse3 // input arguments : W28,W8,W4,W - xmov $3, W_TMP // W32 - pxor $0, W_TMP // W28 ^ W32 - xmov W_TMP, $3 // W = W28 ^ W32; - xmov $2, W_TMP // W4 - palignr $$8, $1, W_TMP // W_tmp = (w3 w4 w5 w6) = W6; - .endm - - // this is a variant of W_PRECALC_32_79_0_nossse3 for i386 (as W24/W28 are stored in memory, not in registers) - .macro W_PRECALC_32_79_0_i386_nossse3 // input arguments : W28,W8,W4,W - xmov $3, W_TMP // W32 - pxor $0, W_TMP // W28 ^ W32 - xmov W_TMP, $3 // W = W28 ^ W32 - xmov $2, W_TMP // W4 = (w1 w2 w3 w4) - xmov $1, W_TMP2 // W8 = (w5 w6 w7 w8) - pslldq $$8, W_TMP // (w3 w4 0 0) - psrldq $$8, W_TMP2 // (0 0 w5 w6) - por W_TMP2, W_TMP // W_tmp = (w3 w4 w5 w6) = W6 - .endm - - .macro W_PRECALC_32_79_1 // input arguments : W16,W - pxor $0, W_TMP // W_tmp = W6 ^ W16 - pxor $1, W_TMP // W_tmp = W6 ^ W16 ^ W28 ^ W32 - xmov W_TMP, $1 // W = W_tmp = W6 ^ W16 ^ W28 ^ W32 - .endm - - .macro W_PRECALC_32_79_2 // input argument : W - psrld $$30, $0 // W >> 30 - pslld $$2, W_TMP // W << 2 - por $0, W_TMP // W_tmp = (W6 ^ W16 ^ W28 ^ W32) rol 2 - .endm - - // this is a variant of W_PRECALC_32_79_2 for i386 (as W24/W28 are stored in memory, not in registers) - // this should be used when the input is either W24 or W28 on i386 architecture - .macro W_PRECALC_32_79_2_i386 // input argument : W - xmov $0, W_TMP2 // W - psrld $$30, W_TMP2 // W >> 30 - xmov W_TMP2, $0 // save (W >> 30) at W - pslld $$2, W_TMP // W_tmp << 2 - por $0, W_TMP // W_tmp = (W6 ^ W16 ^ W28 ^ W32) rol 2 - .endm - - .macro W_PRECALC_32_79_3 // input argument W, i, K_XMM -#if defined (__x86_64__) - xmov W_TMP, $0 // W = (W6 ^ W16 ^ W28 ^ W32) rol 2 - paddd $2(K_BASE), W_TMP // W + K - xmov W_TMP, WK($1&~3) // write W+K -#else - mov K_BASE, T1 // T1 = K_BASE (which is in the caller argument) - xmov W_TMP, $0 // W = (W6 ^ W16 ^ W28 ^ W32) rol 2 - paddd $2(T1), W_TMP // W_tmp = W + K - xmov W_TMP, WK($1&~3) // write WK -#endif - .endm - - - /* The hash update operation is completed by the following statements. - - A[i+1] = FN + E[i] + ROTATE_LEFT( A[i], 5 ) + WK(i); - B[i+1] = A[i]; - C[i+1] = ROTATE_LEFT( B[i], 30 ); - D[i+1] = C[i]; - E[i+1] = D[i]; - - Suppose we start with A0,B0,C0,D0,E0. The 1st iteration can be expressed as follows: - - A1 = FN + E0 + rol(A0,5) + WK; - B1 = A0; - C1 = rol(B0, 30); - D1 = C0; - E1 = D0; - - to avoid excessive memory movement between registers, - 1. A1 = FN + E0 + rol(A0,5) + WK; can be temporarily saved in E0, - 2. C1 = rol(B0,30) can be temporarily saved in B0. - - Therefore, ignoring the time index, the update operation is equivalent to - 1. E = FN(B,C,D) + E + rol(A,5) + WK(i) - 2. B = rol(B,30) - 3. the hashes are now stored in the order of E,A,B,C,D - - - To pack 2 hash update operations in 1 iteration, starting with A,B,C,D,E - 1. E = FN(B,C,D) + E + rol(A,5) + WK(i) - 2. B = rol(B,30) - // now the hashes are in the order of E,A,B,C,D - 3. D = FN(A,B,C) + D + rol(E,5) + WK(i+1) - 4. A = rol(A,30) - // now the hashes are in the order of D,E,A,B,C - - These operations are distributed into the following 2 macro definitions RR0 and RR1. - - */ - - .macro RR0 // input arguments : FN, A, B, C, D, E, i - $0 $2, $3, $4 // T1 = FN(B,C,D) - add WK($6), $5 // E + WK(i) - rol $$30, $2 // B = rol(B,30) - mov $1, T2 // T2 = A - add WK($6+1), $4 // D + WK(i+1) - rol $$5, T2 // rol(A,5) - add T1, $5 // E = FN(B,C,D) + E + WK(i) - .endm - - .macro RR1 - add $5, T2 // T2 = FN(B,C,D) + E + rol(A,5) + WK(i) - mov T2, $5 // E = FN(B,C,D) + E + rol(A,5) + WK(i) - rol $$5, T2 // rol(E,5) - add T2, $4 // D + WK(i+1) + rol(E,5) - $0 $1, $2, $3 // FN(A,B,C) - add T1, $4 // D = FN(A,B,C) + D + rol(E,5) + WK(i+1) - rol $$30, $1 // A = rol(A,30) - .endm - - - - /* - - The following macro definitions are used to expand code for the per-block sha1 operation. - - INITIAL_W_PRECALC_ssse3 : BIG_ENDIAN_LOAD(64 bytes block) into W (i=0:15) and store W+K into the stack memory - INTERNAL_ssse3 : updating W (16:79) and update the digests A/B/C/D/E (i=0:63, based on W+K stored in the stack memory) - ENDING : finishing up update the digests A/B/C/D/E (i=64:79) - - For multiple-block sha1 operation (Multiple_Blocks = 1), INITIAL_W_PRECALC_ssse3 and ENDING are combined - into 1 macro definition for software pipeling. - - SOFTWARE_PIPELINING_ssse3 : BIG_ENDIAN_LOAD(64 bytes block) into W (i=0:15) and store W+K into the stack, and finishing up update the digests A/B/C/D/E (i=64:79) - - assume cnt (the number of blocks) >= 1, the main code body should look like - - INITIAL_W_PRECALC_ssse3 // W = big_endian_load and pre-compute W+K (i=0:15) - do { - INTERNAL_ssse3 // update W(i=16:79), and update hash digests A/B/C/D/E (i=0:63) - cnt--; - if (cnt==0) break; - BUFFER_PTR += 64; - SOFTWARE_PIPELINING_ssse3; // update hash digests A/B/C/D/E (i=64:79) + W = big_endian_load and pre-compute W+K (i=0:15) - } - ENDING // update hash digests A/B/C/D/E (i=64:79) - - */ - - #define W_PRECALC_00_15_0 W_PRECALC_00_15_0_ssse3 - #define W_PRECALC_00_15_1 W_PRECALC_00_15_1_ssse3 - #define W_PRECALC_16_31_0 W_PRECALC_16_31_0_ssse3 - #define W_PRECALC_32_79_0 W_PRECALC_32_79_0_ssse3 - #define W_PRECALC_32_79_0_i386 W_PRECALC_32_79_0_i386_ssse3 - - - .macro INITIAL_W_PRECALC_ssse3 // BIG_ENDIAN_LOAD(64 bytes block) into W (i=0:15) and store W+K into the stack memory - - // i=0 : W28,W24,W20,W16,W12,W8,W4,W0 - W_PRECALC_00_15_0 0 // W_TMP = (BUFFER_PTR) - W_PRECALC_00_15_1 W0 // convert W_TMP to big-endian, and save W0 = W_TMP - W_PRECALC_00_15_2 // W_TMP = W0 + K - W_PRECALC_00_15_3 3 // (sp) = W_TMP = W0 + K - - // i=4 : W24,W20,W16,W12,W8,W4,W0,W28 - W_PRECALC_00_15_0 4 // W_TMP = 16(BUFFER_PTR) - W_PRECALC_00_15_1 W28 // convert W_TMP to big-endian, and save W28 = W_TMP - W_PRECALC_00_15_2 // W_TMP = W28 + K - W_PRECALC_00_15_3 7 // 16(sp) = W_TMP = W28 + K - - // i=8 : W20,W16,W12,W8,W4,W0,W28,W24 - W_PRECALC_00_15_0 8 // W_TMP = 32(BUFFER_PTR) - W_PRECALC_00_15_1 W24 // convert W_TMP to big-endian, and save W24 = W_TMP - W_PRECALC_00_15_2 // W_TMP = W24 + K - W_PRECALC_00_15_3 11 // 32(sp) = W_TMP = W24 + K - - // i=12 : W16,W12,W8,W4,W0,W28,W24,W20 - W_PRECALC_00_15_0 12 // W_TMP = 48(BUFFER_PTR) - W_PRECALC_00_15_1 W20 // convert W_TMP to big-endian, and save W20 = W_TMP - W_PRECALC_00_15_2 // W_TMP = W20 + K - W_PRECALC_00_15_3 15 // 48(sp) = W_TMP = W20 + K - - .endm - - - .macro INTERNAL_ssse3 // updating W (16:79) and update the digests A/B/C/D/E (i=0:63, based on W+K stored in the stack memory) - - // i=16 : W12,W8,W4,W0,W28,W24,W20,W16 - W_PRECALC_16_31_0 W0,W28,W24,W20,W16 - RR0 F1,A,B,C,D,E,0 - W_PRECALC_16_31_1 W0,W16 - RR1 F1,A,B,C,D,E,0 - W_PRECALC_16_31_2 W16 - RR0 F1,D,E,A,B,C,2 - W_PRECALC_16_31_3 W16, 2, 0 - RR1 F1,D,E,A,B,C,2 - - // i=20 : W8,W4,W0,W28,W24,W20,W16,W12 - W_PRECALC_16_31_0 W28,W24,W20,W16,W12 - RR0 F1,B,C,D,E,A,4 - W_PRECALC_16_31_1 W28,W12 - RR1 F1,B,C,D,E,A,4 - W_PRECALC_16_31_2 W12 - RR0 F1,E,A,B,C,D,6 - W_PRECALC_16_31_3 W12, 6, 16 - RR1 F1,E,A,B,C,D,6 - - // i=24 : W4,W0,W28,W24,W20,W16,W12,W8 - W_PRECALC_16_31_0 W24,W20,W16,W12,W8 - RR0 F1,C,D,E,A,B,8 - W_PRECALC_16_31_1 W24,W8 - RR1 F1,C,D,E,A,B,8 - W_PRECALC_16_31_2 W8 - RR0 F1,A,B,C,D,E,10 - W_PRECALC_16_31_3 W8,10,16 - RR1 F1,A,B,C,D,E,10 - - // i=28 : W0,W28,W24,W20,W16,W12,W8,W4 - W_PRECALC_16_31_0 W20,W16,W12,W8,W4 - RR0 F1,D,E,A,B,C,12 - W_PRECALC_16_31_1 W20,W4 - RR1 F1,D,E,A,B,C,12 - W_PRECALC_16_31_2 W4 - RR0 F1,B,C,D,E,A,14 - W_PRECALC_16_31_3 W4,14,16 - RR1 F1,B,C,D,E,A,14 - - // i=32 : W28,W24,W20,W16,W12,W8,W4,W0 - W_PRECALC_32_79_0 W28,W8,W4,W0 - RR0 F1,E,A,B,C,D,16 - W_PRECALC_32_79_1 W16,W0 - RR1 F1,E,A,B,C,D,16 - W_PRECALC_32_79_2 W0 - RR0 F1,C,D,E,A,B,18 - W_PRECALC_32_79_3 W0,18,16 - RR1 F1,C,D,E,A,B,18 - - // starting using F2 - - // i=36 : W24,W20,W16,W12,W8,W4,W0,W28 -#if defined (__x86_64__) - W_PRECALC_32_79_0 W24,W4,W0,W28 -#else - W_PRECALC_32_79_0_i386 W24,W4,W0,W28 -#endif - RR0 F2,A,B,C,D,E,20 - W_PRECALC_32_79_1 W12,W28 - RR1 F2,A,B,C,D,E,20 -#if defined (__x86_64__) - W_PRECALC_32_79_2 W28 -#else - W_PRECALC_32_79_2_i386 W28 -#endif - RR0 F2,D,E,A,B,C,22 - W_PRECALC_32_79_3 W28,22,16 - RR1 F2,D,E,A,B,C,22 - - // i=40 : W20,W16,W12,W8,W4,W0,W28,W24 - #undef K_XMM - #define K_XMM 32 -#if defined (__x86_64__) - W_PRECALC_32_79_0 W20,W0,W28,W24 -#else - W_PRECALC_32_79_0_i386 W20,W0,W28,W24 -#endif - RR0 F2,B,C,D,E,A,24 - W_PRECALC_32_79_1 W8,W24 - RR1 F2,B,C,D,E,A,24 -#if defined (__x86_64__) - W_PRECALC_32_79_2 W24 -#else - W_PRECALC_32_79_2_i386 W24 -#endif - RR0 F2,E,A,B,C,D,26 - W_PRECALC_32_79_3 W24,26,K_XMM - RR1 F2,E,A,B,C,D,26 - - // i=44 : W16,W12,W8,W4,W0,W28,W24,W20 - W_PRECALC_32_79_0 W16,W28,W24,W20 - RR0 F2,C,D,E,A,B,28 - W_PRECALC_32_79_1 W4,W20 - RR1 F2,C,D,E,A,B,28 - W_PRECALC_32_79_2 W20 - RR0 F2,A,B,C,D,E,30 - W_PRECALC_32_79_3 W20,30,K_XMM - RR1 F2,A,B,C,D,E,30 - - // i=48 : W12,W8,W4,W0,W28,W24,W20,W16 - W_PRECALC_32_79_0 W12,W24,W20,W16 - RR0 F2,D,E,A,B,C,32 - W_PRECALC_32_79_1 W0,W16 - RR1 F2,D,E,A,B,C,32 - W_PRECALC_32_79_2 W16 - RR0 F2,B,C,D,E,A,34 - W_PRECALC_32_79_3 W16,34,K_XMM - RR1 F2,B,C,D,E,A,34 - - // i=52 : W8,W4,W0,W28,W24,W20,W16,W12 - W_PRECALC_32_79_0 W8,W20,W16,W12 - RR0 F2,E,A,B,C,D,36 - W_PRECALC_32_79_1 W28,W12 - RR1 F2,E,A,B,C,D,36 - W_PRECALC_32_79_2 W12 - RR0 F2,C,D,E,A,B,38 - W_PRECALC_32_79_3 W12,38,K_XMM - RR1 F2,C,D,E,A,B,38 - - // starting using F3 - - // i=56 : W4,W0,W28,W24,W20,W16,W12,W8 - W_PRECALC_32_79_0 W4,W16,W12,W8 - RR0 F3,A,B,C,D,E,40 - W_PRECALC_32_79_1 W24,W8 - RR1 F3,A,B,C,D,E,40 - W_PRECALC_32_79_2 W8 - RR0 F3,D,E,A,B,C,42 - W_PRECALC_32_79_3 W8,42,K_XMM - RR1 F3,D,E,A,B,C,42 - - // i=60 : W0,W28,W24,W20,W16,W12,W8,W4 - #undef K_XMM - #define K_XMM 48 - W_PRECALC_32_79_0 W0,W12,W8,W4 - RR0 F3,B,C,D,E,A,44 - W_PRECALC_32_79_1 W20,W4 - RR1 F3,B,C,D,E,A,44 - W_PRECALC_32_79_2 W4 - RR0 F3,E,A,B,C,D,46 - W_PRECALC_32_79_3 W4,46,K_XMM - RR1 F3,E,A,B,C,D,46 - - // i=64 : W28,W24,W20,W16,W12,W8,W4,W0 - W_PRECALC_32_79_0 W28,W8,W4,W0 - RR0 F3,C,D,E,A,B,48 - W_PRECALC_32_79_1 W16,W0 - RR1 F3,C,D,E,A,B,48 - W_PRECALC_32_79_2 W0 - RR0 F3,A,B,C,D,E,50 - W_PRECALC_32_79_3 W0,50,K_XMM - RR1 F3,A,B,C,D,E,50 - - // i=68 : W24,W20,W16,W12,W8,W4,W0,W28 -#if defined (__x86_64__) - W_PRECALC_32_79_0 W24,W4,W0,W28 -#else - W_PRECALC_32_79_0_i386 W24,W4,W0,W28 -#endif - RR0 F3,D,E,A,B,C,52 - W_PRECALC_32_79_1 W12,W28 - RR1 F3,D,E,A,B,C,52 -#if defined (__x86_64__) - W_PRECALC_32_79_2 W28 -#else - W_PRECALC_32_79_2_i386 W28 -#endif - RR0 F3,B,C,D,E,A,54 - W_PRECALC_32_79_3 W28,54,K_XMM - RR1 F3,B,C,D,E,A,54 - - // i=72 : W20,W16,W12,W8,W4,W0,W28,W24 -#if defined (__x86_64__) - W_PRECALC_32_79_0 W20,W0,W28,W24 -#else - W_PRECALC_32_79_0_i386 W20,W0,W28,W24 -#endif - RR0 F3,E,A,B,C,D,56 - W_PRECALC_32_79_1 W8,W24 - RR1 F3,E,A,B,C,D,56 -#if defined (__x86_64__) - W_PRECALC_32_79_2 W24 -#else - W_PRECALC_32_79_2_i386 W24 -#endif - RR0 F3,C,D,E,A,B,58 - W_PRECALC_32_79_3 W24,58,K_XMM - RR1 F3,C,D,E,A,B,58 - - // starting using F4 - - // i=76 : W16,W12,W8,W4,W0,W28,W24,W20 - W_PRECALC_32_79_0 W16,W28,W24,W20 - RR0 F4,A,B,C,D,E,60 - W_PRECALC_32_79_1 W4,W20 - RR1 F4,A,B,C,D,E,60 - W_PRECALC_32_79_2 W20 - RR0 F4,D,E,A,B,C,62 - W_PRECALC_32_79_3 W20,62,K_XMM - RR1 F4,D,E,A,B,C,62 - - .endm - - .macro SOFTWARE_PIPELINING_ssse3 - // i=0 : W28,W24,W20,W16,W12,W8,W4,W0 - W_PRECALC_00_15_0 0 // W_TMP = (BUFFER_PTR) - RR0 F4,B,C,D,E,A,64 - W_PRECALC_00_15_1 W0 // convert W_TMP to big-endian, and save W0 = W_TMP - RR1 F4,B,C,D,E,A,64 - W_PRECALC_00_15_2 // W_TMP = W0 + K - RR0 F4,E,A,B,C,D,66 - W_PRECALC_00_15_3 3 // (sp) = W_TMP = W0 + K - RR1 F4,E,A,B,C,D,66 - - // i=4 : W24,W20,W16,W12,W8,W4,W0,W28 - W_PRECALC_00_15_0 4 // W_TMP = 16(BUFFER_PTR) - RR0 F4,C,D,E,A,B,68 - W_PRECALC_00_15_1 W28 // convert W_TMP to big-endian, and save W28 = W_TMP - RR1 F4,C,D,E,A,B,68 - W_PRECALC_00_15_2 // W_TMP = W28 + K - RR0 F4,A,B,C,D,E,70 - W_PRECALC_00_15_3 7 // 16(sp) = W_TMP = W28 + K[0] - RR1 F4,A,B,C,D,E,70 - - // i=8 : W20,W16,W12,W8,W4,W0,W28,W24 - W_PRECALC_00_15_0 8 // W_TMP = 32(BUFFER_PTR) - RR0 F4,D,E,A,B,C,72 - W_PRECALC_00_15_1 W24 // convert W_TMP to big-endian, and save W24 = W_TMP - RR1 F4,D,E,A,B,C,72 - W_PRECALC_00_15_2 // W_TMP = W24 + K - RR0 F4,B,C,D,E,A,74 - W_PRECALC_00_15_3 11 // 32(sp) = W_TMP = W24 + K - RR1 F4,B,C,D,E,A,74 - - // i=12 : W16,W12,W8,W4,W0,W28,W24,W20 - W_PRECALC_00_15_0 12 // W_TMP = 48(BUFFER_PTR) - RR0 F4,E,A,B,C,D,76 - W_PRECALC_00_15_1 W20 // convert W_TMP to big-endian, and save W20 = W_TMP - RR1 F4,E,A,B,C,D,76 - W_PRECALC_00_15_2 // W_TMP = W20 + K - RR0 F4,C,D,E,A,B,78 - W_PRECALC_00_15_3 15 // 48(sp) = W_TMP = W20 + K - RR1 F4,C,D,E,A,B,78 - .endm - - - #undef W_PRECALC_00_15_0 - #undef W_PRECALC_00_15_1 - #undef W_PRECALC_16_31_0 - #undef W_PRECALC_32_79_0 - #undef W_PRECALC_32_79_0_i386 - - - - /* - - The following are 3 macro definitions that are no-ssse3 variants of the previous 3 macro definitions. - - INITIAL_W_PRECALC_nossse3 - INTERNAL_nossse3 - SOFTWARE_PIPELINING_nossse3 - - They will be used in a sha1 code main body definition that will be used for system without ssse3 support. - - */ - - #define W_PRECALC_00_15_0 W_PRECALC_00_15_0_nossse3 - #define W_PRECALC_00_15_1 W_PRECALC_00_15_1_nossse3 - #define W_PRECALC_16_31_0 W_PRECALC_16_31_0_nossse3 - #define W_PRECALC_32_79_0 W_PRECALC_32_79_0_nossse3 - #define W_PRECALC_32_79_0_i386 W_PRECALC_32_79_0_i386_nossse3 - - - .macro INITIAL_W_PRECALC_nossse3 - - // i=0 : W28,W24,W20,W16,W12,W8,W4,W0 - W_PRECALC_00_15_0 0 // W_TMP = (BUFFER_PTR) - W_PRECALC_00_15_1 W0 // convert W_TMP to big-endian, and save W0 = W_TMP - W_PRECALC_00_15_2 // W_TMP = W0 + K - W_PRECALC_00_15_3 3 // (sp) = W_TMP = W0 + K - - // i=4 : W24,W20,W16,W12,W8,W4,W0,W28 - W_PRECALC_00_15_0 4 // W_TMP = 16(BUFFER_PTR) - W_PRECALC_00_15_1 W28 // convert W_TMP to big-endian, and save W28 = W_TMP - W_PRECALC_00_15_2 // W_TMP = W28 + K - W_PRECALC_00_15_3 7 // 16(sp) = W_TMP = W28 + K - - // i=8 : W20,W16,W12,W8,W4,W0,W28,W24 - W_PRECALC_00_15_0 8 // W_TMP = 32(BUFFER_PTR) - W_PRECALC_00_15_1 W24 // convert W_TMP to big-endian, and save W24 = W_TMP - W_PRECALC_00_15_2 // W_TMP = W24 + K - W_PRECALC_00_15_3 11 // 32(sp) = W_TMP = W24 + K - - // i=12 : W16,W12,W8,W4,W0,W28,W24,W20 - W_PRECALC_00_15_0 12 // W_TMP = 48(BUFFER_PTR) - W_PRECALC_00_15_1 W20 // convert W_TMP to big-endian, and save W20 = W_TMP - W_PRECALC_00_15_2 // W_TMP = W20 + K - W_PRECALC_00_15_3 15 // 48(sp) = W_TMP = W20 + K - - .endm - - - .macro INTERNAL_nossse3 - // i=16 - // circular buffer : W12,W8,W4,W0,W28,W24,W20,W16 - W_PRECALC_16_31_0 W0,W28,W24,W20,W16 - RR0 F1,A,B,C,D,E,0 - W_PRECALC_16_31_1 W0,W16 - RR1 F1,A,B,C,D,E,0 - W_PRECALC_16_31_2 W16 - RR0 F1,D,E,A,B,C,2 - W_PRECALC_16_31_3 W16, 2, 0 - RR1 F1,D,E,A,B,C,2 - - // i=20, - // W8,W4,W0,W28,W24,W20,W16,W12 - W_PRECALC_16_31_0 W28,W24,W20,W16,W12 - RR0 F1,B,C,D,E,A,4 - W_PRECALC_16_31_1 W28,W12 - RR1 F1,B,C,D,E,A,4 - - W_PRECALC_16_31_2 W12 - RR0 F1,E,A,B,C,D,6 - W_PRECALC_16_31_3 W12, 6, 16 - RR1 F1,E,A,B,C,D,6 - - // i=24, - // W4,W0,W28,W24,W20,W16,W12,W8 - W_PRECALC_16_31_0 W24,W20,W16,W12,W8 - RR0 F1,C,D,E,A,B,8 - W_PRECALC_16_31_1 W24,W8 - RR1 F1,C,D,E,A,B,8 - - W_PRECALC_16_31_2 W8 - RR0 F1,A,B,C,D,E,10 - W_PRECALC_16_31_3 W8,10,16 - RR1 F1,A,B,C,D,E,10 - - // i=28 - // W0,W28,W24,W20,W16,W12,W8,W4 - W_PRECALC_16_31_0 W20,W16,W12,W8,W4 - RR0 F1,D,E,A,B,C,12 - W_PRECALC_16_31_1 W20,W4 - RR1 F1,D,E,A,B,C,12 - - W_PRECALC_16_31_2 W4 - RR0 F1,B,C,D,E,A,14 - W_PRECALC_16_31_3 W4,14,16 - RR1 F1,B,C,D,E,A,14 - - //i=32 - // W28,W24,W20,W16,W12,W8,W4,W0 - W_PRECALC_32_79_0 W28,W8,W4,W0 - RR0 F1,E,A,B,C,D,16 - W_PRECALC_32_79_1 W16,W0 - RR1 F1,E,A,B,C,D,16 - W_PRECALC_32_79_2 W0 - RR0 F1,C,D,E,A,B,18 - W_PRECALC_32_79_3 W0,18,16 - RR1 F1,C,D,E,A,B,18 - - //i=36 - // W24,W20,W16,W12,W8,W4,W0,W28 -#if defined (__x86_64__) - W_PRECALC_32_79_0 W24,W4,W0,W28 -#else - W_PRECALC_32_79_0_i386 W24,W4,W0,W28 -#endif - RR0 F2,A,B,C,D,E,20 - W_PRECALC_32_79_1 W12,W28 - RR1 F2,A,B,C,D,E,20 -#if defined (__x86_64__) - W_PRECALC_32_79_2 W28 -#else - W_PRECALC_32_79_2_i386 W28 -#endif - RR0 F2,D,E,A,B,C,22 - W_PRECALC_32_79_3 W28,22,16 - RR1 F2,D,E,A,B,C,22 - - //i=40 - #undef K_XMM - #define K_XMM 32 - // W20,W16,W12,W8,W4,W0,W28,W24 -#if defined (__x86_64__) - W_PRECALC_32_79_0 W20,W0,W28,W24 -#else - W_PRECALC_32_79_0_i386 W20,W0,W28,W24 -#endif - RR0 F2,B,C,D,E,A,24 - W_PRECALC_32_79_1 W8,W24 - RR1 F2,B,C,D,E,A,24 -#if defined (__x86_64__) - W_PRECALC_32_79_2 W24 -#else - W_PRECALC_32_79_2_i386 W24 -#endif - RR0 F2,E,A,B,C,D,26 - W_PRECALC_32_79_3 W24,26,K_XMM - RR1 F2,E,A,B,C,D,26 - - //i=44 - // W16,W12,W8,W4,W0,W28,W24,W20 - W_PRECALC_32_79_0 W16,W28,W24,W20 - RR0 F2,C,D,E,A,B,28 - W_PRECALC_32_79_1 W4,W20 - RR1 F2,C,D,E,A,B,28 - W_PRECALC_32_79_2 W20 - RR0 F2,A,B,C,D,E,30 - W_PRECALC_32_79_3 W20,30,K_XMM - RR1 F2,A,B,C,D,E,30 - - //i=48 - // W12,W8,W4,W0,W28,W24,W20,W16 - W_PRECALC_32_79_0 W12,W24,W20,W16 - RR0 F2,D,E,A,B,C,32 - W_PRECALC_32_79_1 W0,W16 - RR1 F2,D,E,A,B,C,32 - W_PRECALC_32_79_2 W16 - RR0 F2,B,C,D,E,A,34 - W_PRECALC_32_79_3 W16,34,K_XMM - RR1 F2,B,C,D,E,A,34 - - //i=52 - // W8,W4,W0,W28,W24,W20,W16,W12 - W_PRECALC_32_79_0 W8,W20,W16,W12 - RR0 F2,E,A,B,C,D,36 - W_PRECALC_32_79_1 W28,W12 - RR1 F2,E,A,B,C,D,36 - W_PRECALC_32_79_2 W12 - RR0 F2,C,D,E,A,B,38 - W_PRECALC_32_79_3 W12,38,K_XMM - RR1 F2,C,D,E,A,B,38 - - //i=56 - // W4,W0,W28,W24,W20,W16,W12,W8 - W_PRECALC_32_79_0 W4,W16,W12,W8 - RR0 F3,A,B,C,D,E,40 - W_PRECALC_32_79_1 W24,W8 - RR1 F3,A,B,C,D,E,40 - W_PRECALC_32_79_2 W8 - RR0 F3,D,E,A,B,C,42 - W_PRECALC_32_79_3 W8,42,K_XMM - RR1 F3,D,E,A,B,C,42 - - //i=60 - #undef K_XMM - #define K_XMM 48 - // W0,W28,W24,W20,W16,W12,W8,W4 - W_PRECALC_32_79_0 W0,W12,W8,W4 - RR0 F3,B,C,D,E,A,44 - W_PRECALC_32_79_1 W20,W4 - RR1 F3,B,C,D,E,A,44 - W_PRECALC_32_79_2 W4 - RR0 F3,E,A,B,C,D,46 - W_PRECALC_32_79_3 W4,46,K_XMM - RR1 F3,E,A,B,C,D,46 - - //i=64 - // W28,W24,W20,W16,W12,W8,W4,W0 - W_PRECALC_32_79_0 W28,W8,W4,W0 - RR0 F3,C,D,E,A,B,48 - W_PRECALC_32_79_1 W16,W0 - RR1 F3,C,D,E,A,B,48 - W_PRECALC_32_79_2 W0 - RR0 F3,A,B,C,D,E,50 - W_PRECALC_32_79_3 W0,50,K_XMM - RR1 F3,A,B,C,D,E,50 - - //i=68 - // W24,W20,W16,W12,W8,W4,W0,W28 -#if defined (__x86_64__) - W_PRECALC_32_79_0 W24,W4,W0,W28 -#else - W_PRECALC_32_79_0_i386 W24,W4,W0,W28 -#endif - RR0 F3,D,E,A,B,C,52 - W_PRECALC_32_79_1 W12,W28 - RR1 F3,D,E,A,B,C,52 -#if defined (__x86_64__) - W_PRECALC_32_79_2 W28 -#else - W_PRECALC_32_79_2_i386 W28 -#endif - RR0 F3,B,C,D,E,A,54 - W_PRECALC_32_79_3 W28,54,K_XMM - RR1 F3,B,C,D,E,A,54 - - //i=72 - // W20,W16,W12,W8,W4,W0,W28,W24 -#if defined (__x86_64__) - W_PRECALC_32_79_0 W20,W0,W28,W24 -#else - W_PRECALC_32_79_0_i386 W20,W0,W28,W24 -#endif - RR0 F3,E,A,B,C,D,56 - W_PRECALC_32_79_1 W8,W24 - RR1 F3,E,A,B,C,D,56 -#if defined (__x86_64__) - W_PRECALC_32_79_2 W24 -#else - W_PRECALC_32_79_2_i386 W24 -#endif - RR0 F3,C,D,E,A,B,58 - W_PRECALC_32_79_3 W24,58,K_XMM - RR1 F3,C,D,E,A,B,58 - - // starting using F4 - - //i=76 - // W16,W12,W8,W4,W0,W28,W24,W20 - W_PRECALC_32_79_0 W16,W28,W24,W20 - RR0 F4,A,B,C,D,E,60 - W_PRECALC_32_79_1 W4,W20 - RR1 F4,A,B,C,D,E,60 - W_PRECALC_32_79_2 W20 - RR0 F4,D,E,A,B,C,62 - W_PRECALC_32_79_3 W20,62,K_XMM - RR1 F4,D,E,A,B,C,62 - - .endm - - .macro SOFTWARE_PIPELINING_nossse3 - // i=0 : W28,W24,W20,W16,W12,W8,W4,W0 - W_PRECALC_00_15_0 0 // W_TMP = (BUFFER_PTR) - RR0 F4,B,C,D,E,A,64 - W_PRECALC_00_15_1 W0 // convert W_TMP to big-endian, and save W0 = W_TMP - RR1 F4,B,C,D,E,A,64 - W_PRECALC_00_15_2 // W_TMP = W0 + K - RR0 F4,E,A,B,C,D,66 - W_PRECALC_00_15_3 3 // (sp) = W_TMP = W0 + K - RR1 F4,E,A,B,C,D,66 - - // i=4 : W24,W20,W16,W12,W8,W4,W0,W28 - W_PRECALC_00_15_0 4 // W_TMP = 16(BUFFER_PTR) - RR0 F4,C,D,E,A,B,68 - W_PRECALC_00_15_1 W28 // convert W_TMP to big-endian, and save W28 = W_TMP - RR1 F4,C,D,E,A,B,68 - W_PRECALC_00_15_2 // W_TMP = W28 + K - RR0 F4,A,B,C,D,E,70 - W_PRECALC_00_15_3 7 // 16(sp) = W_TMP = W28 + K[0] - RR1 F4,A,B,C,D,E,70 - - // i=8 : W20,W16,W12,W8,W4,W0,W28,W24 - W_PRECALC_00_15_0 8 // W_TMP = 32(BUFFER_PTR) - RR0 F4,D,E,A,B,C,72 - W_PRECALC_00_15_1 W24 // convert W_TMP to big-endian, and save W24 = W_TMP - RR1 F4,D,E,A,B,C,72 - W_PRECALC_00_15_2 // W_TMP = W24 + K - RR0 F4,B,C,D,E,A,74 - W_PRECALC_00_15_3 11 // 32(sp) = W_TMP = W24 + K - RR1 F4,B,C,D,E,A,74 - - // i=12 : W16,W12,W8,W4,W0,W28,W24,W20 - W_PRECALC_00_15_0 12 // W_TMP = 48(BUFFER_PTR) - RR0 F4,E,A,B,C,D,76 - W_PRECALC_00_15_1 W20 // convert W_TMP to big-endian, and save W20 = W_TMP - RR1 F4,E,A,B,C,D,76 - W_PRECALC_00_15_2 // W_TMP = W20 + K - RR0 F4,C,D,E,A,B,78 - W_PRECALC_00_15_3 15 // 48(sp) = W_TMP = W20 + K - RR1 F4,C,D,E,A,B,78 - .endm - - .macro ENDING // finish up updating hash digests (i=64:79) - //i=80 - RR0 F4,B,C,D,E,A,64 - RR1 F4,B,C,D,E,A,64 - RR0 F4,E,A,B,C,D,66 - RR1 F4,E,A,B,C,D,66 - - //i=84 - RR0 F4,C,D,E,A,B,68 - RR1 F4,C,D,E,A,B,68 - RR0 F4,A,B,C,D,E,70 - RR1 F4,A,B,C,D,E,70 - - //i=88 - RR0 F4,D,E,A,B,C,72 - RR1 F4,D,E,A,B,C,72 - RR0 F4,B,C,D,E,A,74 - RR1 F4,B,C,D,E,A,74 - - //i=92 - RR0 F4,E,A,B,C,D,76 - RR1 F4,E,A,B,C,D,76 - RR0 F4,C,D,E,A,B,78 - RR1 F4,C,D,E,A,B,78 - .endm - - // load hash digests A,B,C,D,E from memory into registers - .macro LOAD_HASH -#if defined (__x86_64__) - mov (HASH_PTR), A - mov 4(HASH_PTR), B - mov 8(HASH_PTR), C - mov 12(HASH_PTR), D - mov 16(HASH_PTR), E -#else - mov HASH_PTR, T1 - mov (T1), A - mov 4(T1), B - mov 8(T1), C - mov 12(T1), D - mov 16(T1), E -#endif - .endm - - .macro UPDATE_HASH - add $0, $1 - mov $1, $0 - .endm - - .macro UPDATE_ALL_HASH -#if defined (__x86_64__) - UPDATE_HASH (HASH_PTR), A - UPDATE_HASH 4(HASH_PTR), B - UPDATE_HASH 8(HASH_PTR), C - UPDATE_HASH 12(HASH_PTR), D - UPDATE_HASH 16(HASH_PTR), E -#else - mov HASH_PTR, T1 - UPDATE_HASH (T1), A - UPDATE_HASH 4(T1), B - UPDATE_HASH 8(T1), C - UPDATE_HASH 12(T1), D - UPDATE_HASH 16(T1), E -#endif - .endm - - - /* - main sha1 code for system without ssse3 support - */ - - .macro SHA1_PIPELINED_MAIN_BODY_nossse3 - LOAD_HASH // load initial hashes into A,B,C,D,E (registers) - INITIAL_W_PRECALC_nossse3 // big_endian_load(W) and W+K (i=0:15) - .align 4,0x90 -0: - INTERNAL_nossse3 // update W (i=16:79) and update ABCDE (i=0:63) -#if Multiple_Blocks -#if defined(__x86_64__) - add $$64, BUFFER_PTR // BUFFER_PTR+=64; - sub $$1, cnt // pre-decrement cnt by 1 -#else - addl $$64, BUFFER_PTR // BUFFER_PTR+=64; - subl $$1, cnt // pre-decrement cnt by 1 -#endif - jbe 1f // if cnt <= 0, branch to finish off - SOFTWARE_PIPELINING_nossse3 // update ABCDE (i=64:79) || big_endian_load(W) and W+K (i=0:15) - UPDATE_ALL_HASH // update output hashes - jmp 0b // repeat for next block - .align 4,0x90 -1: -#endif - ENDING // update ABCDE (i=64:79) - UPDATE_ALL_HASH // update output hashes - .endm - - /* - main sha1 code for system with ssse3 support - */ - - .macro SHA1_PIPELINED_MAIN_BODY_ssse3 - LOAD_HASH // load initial hashes into A,B,C,D,E - INITIAL_W_PRECALC_ssse3 // big_endian_load(W) and W+K (i=0:15) - .align 4,0x90 -0: - INTERNAL_ssse3 // update W (i=16:79) and update ABCDE (i=0:63) -#if Multiple_Blocks -#if defined(__x86_64__) - add $$64, BUFFER_PTR // BUFFER_PTR+=64; - sub $$1, cnt // pre-decrement cnt by 1 -#else - addl $$64, BUFFER_PTR // BUFFER_PTR+=64; - subl $$1, cnt // pre-decrement cnt by 1 -#endif - jbe 1f // if cnt <= 0, branch to finish off - SOFTWARE_PIPELINING_ssse3 // update ABCDE (i=64:79) || big_endian_load(W) and W+K (i=0:15) - UPDATE_ALL_HASH // update output hashes - jmp 0b // repeat for next block - .align 4,0x90 -1: -#endif - ENDING // update ABCDE (i=64:79) - UPDATE_ALL_HASH // update output hashes - .endm - -#ifdef KERNEL -#include -#else -#include -#endif - - .text - - .globl _SHA1Transform - //.private_extern _SHA1Transform -_SHA1Transform: - - // detect SSSE3 and dispatch appropriate code branch - #if defined __x86_64__ - movq __cpu_capabilities@GOTPCREL(%rip), %rax // %rax -> __cpu_capabilities - mov (%rax), %eax // %eax = __cpu_capabilities - #else // i386 - #if defined KERNEL - leal __cpu_capabilities, %eax // %eax -> __cpu_capabilities - mov (%eax), %eax // %eax = __cpu_capabilities - #else - mov _COMM_PAGE_CPU_CAPABILITIES, %eax - #endif - #endif - test $(kHasSupplementalSSE3), %eax - je _SHA1Transform_nossse3 // branch to no-ssse3 code - - - // start the sha1 code with ssse3 support - - // save callee-save registers -#if defined (__x86_64__) - push %rbx - push %rbp -#else - push %ebx - push %ebp - push %esi - push %edi -#endif - - sub $stack_size, sp // allocate stack memory for use - - // save used xmm register if this is for kernel -#if KERNEL - xmov %xmm0, 4*16(sp) - xmov %xmm1, 5*16(sp) - xmov %xmm2, 6*16(sp) - xmov %xmm3, 7*16(sp) - xmov %xmm4, 8*16(sp) - xmov %xmm5, 9*16(sp) - xmov %xmm6, 10*16(sp) - xmov %xmm7, 11*16(sp) -#if defined (__x86_64__) - xmov %xmm8, 12*16(sp) - xmov %xmm9, 13*16(sp) - xmov %xmm10, 14*16(sp) -#endif -#endif - -#if defined (__x86_64__) - - // set up registers to free %edx/%edi/%esi for other use (ABCDE) - mov ctx, HASH_PTR - mov buf, BUFFER_PTR -#if Multiple_Blocks - mov %rdx, cnt -#endif - lea K_XMM_AR(%rip), K_BASE - xmov 0x40(K_BASE), XMM_SHUFB_BSWAP - -#else // __i386__ - -#if KERNEL - lea K_XMM_AR, %eax -#else - // Get address of 0 in R. - call 0f // Push program counter onto stack. - 0: pop %eax // Get program counter. - lea K_XMM_AR-0b(%eax), %eax -#endif - mov %eax, K_BASE - xmov 0x40(%eax), %xmm0 - xmov %xmm0, XMM_SHUFB_BSWAP - -#endif - - SHA1_PIPELINED_MAIN_BODY_ssse3 - - // restore used xmm registers if this is for kernel -#if KERNEL - xmov 4*16(sp), %xmm0 - xmov 5*16(sp), %xmm1 - xmov 6*16(sp), %xmm2 - xmov 7*16(sp), %xmm3 - xmov 8*16(sp), %xmm4 - xmov 9*16(sp), %xmm5 - xmov 10*16(sp), %xmm6 - xmov 11*16(sp), %xmm7 -#if defined (__x86_64__) - xmov 12*16(sp), %xmm8 - xmov 13*16(sp), %xmm9 - xmov 14*16(sp), %xmm10 -#endif -#endif - - add $stack_size, sp // deallocate stack memory - - // restore callee-save registers -#if defined (__x86_64__) - pop %rbp - pop %rbx -#else - pop %edi - pop %esi - pop %ebp - pop %ebx -#endif - - ret // return - - // this is equivalent to the above function _SHA1Transform, but it does not use ssse3 instructions - - .globl _SHA1Transform_nossse3 - .private_extern _SHA1Transform_nossse3 -_SHA1Transform_nossse3: - - // push callee-save registers -#if defined (__x86_64__) - push %rbx - push %rbp -#else - push %ebx - push %ebp - push %esi - push %edi -#endif - - sub $stack_size, sp // allocate stack memory for local use - - // save used xmm registers if this is for kernel -#if KERNEL - xmov %xmm0, 4*16(sp) - xmov %xmm1, 5*16(sp) - xmov %xmm2, 6*16(sp) - xmov %xmm3, 7*16(sp) - xmov %xmm4, 8*16(sp) - xmov %xmm5, 9*16(sp) - xmov %xmm6, 10*16(sp) - xmov %xmm7, 11*16(sp) -#if defined (__x86_64__) - xmov %xmm8, 12*16(sp) - xmov %xmm9, 13*16(sp) -#endif -#endif - -#if defined (__x86_64__) - - // set up registers to free %edx/%edi/%esi for other use (ABCDE) - mov ctx, HASH_PTR - mov buf, BUFFER_PTR -#if Multiple_Blocks - mov %rdx, cnt -#endif - lea K_XMM_AR(%rip), K_BASE - -#else // __i386__ - -#if KERNEL - lea K_XMM_AR, %eax -#else - // Get address of 0 in R. - call 0f // Push program counter onto stack. - 0: pop %eax // Get program counter. - lea K_XMM_AR-0b(%eax), %eax -#endif - mov %eax, K_BASE - -#endif - - SHA1_PIPELINED_MAIN_BODY_nossse3 - - // restore used xmm registers if this is for kernel -#if KERNEL - xmov 4*16(sp), %xmm0 - xmov 5*16(sp), %xmm1 - xmov 6*16(sp), %xmm2 - xmov 7*16(sp), %xmm3 - xmov 8*16(sp), %xmm4 - xmov 9*16(sp), %xmm5 - xmov 10*16(sp), %xmm6 - xmov 11*16(sp), %xmm7 -#if defined (__x86_64__) - xmov 12*16(sp), %xmm8 - xmov 13*16(sp), %xmm9 -#endif -#endif - - add $stack_size, sp // deallocate stack memory - - // restore callee-save registers -#if defined (__x86_64__) - pop %rbp - pop %rbx -#else - pop %edi - pop %esi - pop %ebp - pop %ebx -#endif - - ret // return - - .const - .align 4, 0x90 - -#define K1 0x5a827999 -#define K2 0x6ed9eba1 -#define K3 0x8f1bbcdc -#define K4 0xca62c1d6 - -K_XMM_AR: - .long K1 - .long K1 - .long K1 - .long K1 - .long K2 - .long K2 - .long K2 - .long K2 - .long K3 - .long K3 - .long K3 - .long K3 - .long K4 - .long K4 - .long K4 - .long K4 -// bswap_shufb_ctl: invoked thru 0x40(K_XMM_AR) - .long 0x00010203 - .long 0x04050607 - .long 0x08090a0b - .long 0x0c0d0e0f - - - -#endif // architecture x86_64 or i386 diff --git a/libkern/gen/OSAtomicOperations.c b/libkern/gen/OSAtomicOperations.c index cfb15c5c1..25ff477ff 100644 --- a/libkern/gen/OSAtomicOperations.c +++ b/libkern/gen/OSAtomicOperations.c @@ -103,6 +103,7 @@ UInt32 OSBitXorAtomic(UInt32 mask, volatile UInt32 * value) return OSBitwiseAtomic((UInt32) -1, 0, mask, value); } +#if defined(__i386__) || defined(__x86_64__) static Boolean OSCompareAndSwap8(UInt8 oldValue8, UInt8 newValue8, volatile UInt8 * value8) { UInt32 mask = 0x000000ff; @@ -121,6 +122,7 @@ static Boolean OSCompareAndSwap8(UInt8 oldValue8, UInt8 newValue8, volatile UInt return OSCompareAndSwap(oldValue, newValue, value32); } +#endif static Boolean OSTestAndSetClear(UInt32 bit, Boolean wantSet, volatile UInt8 * startAddress) { @@ -166,6 +168,7 @@ SInt8 OSDecrementAtomic8(volatile SInt8 * value) return OSAddAtomic8(-1, value); } +#if defined(__i386__) || defined(__x86_64__) SInt8 OSAddAtomic8(SInt32 amount, volatile SInt8 * value) { SInt8 oldValue; @@ -178,6 +181,7 @@ SInt8 OSAddAtomic8(SInt32 amount, volatile SInt8 * value) return oldValue; } +#endif static UInt8 OSBitwiseAtomic8(UInt32 and_mask, UInt32 or_mask, UInt32 xor_mask, volatile UInt8 * value) { @@ -207,6 +211,7 @@ UInt8 OSBitXorAtomic8(UInt32 mask, volatile UInt8 * value) return OSBitwiseAtomic8((UInt32) -1, 0, mask, value); } +#if defined(__i386__) || defined(__x86_64__) static Boolean OSCompareAndSwap16(UInt16 oldValue16, UInt16 newValue16, volatile UInt16 * value16) { UInt32 mask = 0x0000ffff; @@ -225,6 +230,7 @@ static Boolean OSCompareAndSwap16(UInt16 oldValue16, UInt16 newValue16, volatile return OSCompareAndSwap(oldValue, newValue, value32); } +#endif SInt16 OSIncrementAtomic16(volatile SInt16 * value) { @@ -236,6 +242,7 @@ SInt16 OSDecrementAtomic16(volatile SInt16 * value) return OSAddAtomic16(-1, value); } +#if defined(__i386__) || defined(__x86_64__) SInt16 OSAddAtomic16(SInt32 amount, volatile SInt16 * value) { SInt16 oldValue; @@ -248,6 +255,7 @@ SInt16 OSAddAtomic16(SInt32 amount, volatile SInt16 * value) return oldValue; } +#endif static UInt16 OSBitwiseAtomic16(UInt32 and_mask, UInt32 or_mask, UInt32 xor_mask, volatile UInt16 * value) { diff --git a/libkern/gen/OSDebug.cpp b/libkern/gen/OSDebug.cpp index 14ee32621..f5bbec683 100644 --- a/libkern/gen/OSDebug.cpp +++ b/libkern/gen/OSDebug.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2005-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -106,8 +106,11 @@ OSReportWithBacktrace(const char *str, ...) lck_mtx_lock(sOSReportLock); { - printf("%s\nBacktrace %p %p %p %p %p %p %p\n", - buf, bt[2], bt[3], bt[4], bt[5], bt[6], bt[7], bt[8]); + printf("%s\nBacktrace 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", buf, + (unsigned long) VM_KERNEL_UNSLIDE(bt[2]), (unsigned long) VM_KERNEL_UNSLIDE(bt[3]), + (unsigned long) VM_KERNEL_UNSLIDE(bt[4]), (unsigned long) VM_KERNEL_UNSLIDE(bt[5]), + (unsigned long) VM_KERNEL_UNSLIDE(bt[6]), (unsigned long) VM_KERNEL_UNSLIDE(bt[7]), + (unsigned long) VM_KERNEL_UNSLIDE(bt[8])); kmod_dump_log((vm_offset_t *) &bt[2], cnt - 2); } lck_mtx_unlock(sOSReportLock); @@ -116,38 +119,6 @@ OSReportWithBacktrace(const char *str, ...) static vm_offset_t minstackaddr = min_valid_stack_address(); static vm_offset_t maxstackaddr = max_valid_stack_address(); -#if __i386__ -#define i386_RETURN_OFFSET 4 - -static unsigned int -i386_validate_stackptr(vm_offset_t stackptr) -{ - /* Existence and alignment check - */ - if (!stackptr || (stackptr & 0x3)) - return 0; - - /* Is a virtual->physical translation present? - */ - if (!kvtophys(stackptr)) - return 0; - - /* Check if the return address lies on the same page; - * If not, verify that a translation exists. - */ - if (((PAGE_SIZE - (stackptr & PAGE_MASK)) < i386_RETURN_OFFSET) && - !kvtophys(stackptr + i386_RETURN_OFFSET)) - return 0; - return 1; -} - -static unsigned int -i386_validate_raddr(vm_offset_t raddr) -{ - return ((raddr > VM_MIN_KERNEL_AND_KEXT_ADDRESS) && - (raddr < VM_MAX_KERNEL_ADDRESS)); -} -#endif #if __x86_64__ #define x86_64_RETURN_OFFSET 8 @@ -196,49 +167,7 @@ unsigned OSBacktrace(void **bt, unsigned maxAddrs) { unsigned frame; -#if __i386__ -#define SANE_i386_FRAME_SIZE (kernel_stack_size >> 1) - vm_offset_t stackptr, stackptr_prev, raddr; - unsigned frame_index = 0; -/* Obtain current frame pointer */ - __asm__ volatile("movl %%ebp, %0" : "=m" (stackptr)); - - if (!i386_validate_stackptr(stackptr)) - goto pad; - - raddr = *((vm_offset_t *) (stackptr + i386_RETURN_OFFSET)); - - if (!i386_validate_raddr(raddr)) - goto pad; - - bt[frame_index++] = (void *) raddr; - - for ( ; frame_index < maxAddrs; frame_index++) { - stackptr_prev = stackptr; - stackptr = *((vm_offset_t *) stackptr_prev); - - if (!i386_validate_stackptr(stackptr)) - break; - /* Stack grows downwards */ - if (stackptr < stackptr_prev) - break; - - if ((stackptr - stackptr_prev) > SANE_i386_FRAME_SIZE) - break; - - raddr = *((vm_offset_t *) (stackptr + i386_RETURN_OFFSET)); - - if (!i386_validate_raddr(raddr)) - break; - - bt[frame_index] = (void *) raddr; - } -pad: - frame = frame_index; - - for ( ; frame_index < maxAddrs; frame_index++) - bt[frame_index] = (void *) 0; -#elif __x86_64__ +#if __x86_64__ #define SANE_x86_64_FRAME_SIZE (kernel_stack_size >> 1) vm_offset_t stackptr, stackptr_prev, raddr; unsigned frame_index = 0; diff --git a/libkern/i386/OSAtomic.s b/libkern/i386/OSAtomic.s deleted file mode 100644 index 72adbb29d..000000000 --- a/libkern/i386/OSAtomic.s +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -#;*************************************************************************** -#;* Boolean OSCompareAndSwap(SInt32 oldValue, SInt32 newValue, SInt32 *ptr) * -#;*************************************************************************** - - .globl _OSCompareAndSwap - .globl _OSCompareAndSwapPtr - -_OSCompareAndSwap: -_OSCompareAndSwapPtr: - movl 4(%esp), %eax #; oldValue - movl 8(%esp), %edx #; newValue - movl 12(%esp), %ecx #; ptr - lock - cmpxchgl %edx, 0(%ecx) #; CAS (eax is an implicit operand) - sete %al #; did CAS succeed? (TZ=1) - movzbl %al, %eax #; clear out the high bytes - ret - -#;***************************************************************************** -#;* Boolean OSCompareAndSwap64(SInt64 oldValue, SInt64 newValue, SInt64 *ptr) * -#;***************************************************************************** - - .globl _OSCompareAndSwap64 - -_OSCompareAndSwap64: - pushl %edi - pushl %ebx - - movl 4+8(%esp), %eax #; low 32-bits of oldValue - movl 8+8(%esp), %edx #; high 32-bits of oldValue - movl 12+8(%esp), %ebx #; low 32-bits of newValue - movl 16+8(%esp), %ecx #; high 32-bits of newValue - movl 20+8(%esp), %edi #; ptr - lock - cmpxchg8b 0(%edi) #; CAS (eax:edx, ebx:ecx implicit) - sete %al #; did CAS succeed? (TZ=1) - movzbl %al, %eax #; clear out the high bytes - - popl %ebx - popl %edi - ret - -#;******************************************************* -#;* SInt64 OSAddAtomic64(SInt64 theAmount, SInt64 *ptr) * -#;******************************************************* - - .globl _OSAddAtomic64 -_OSAddAtomic64: - pushl %edi - pushl %ebx - - movl 12+8(%esp), %edi #; ptr - movl 0(%edi), %eax #; load low 32-bits of *ptr - movl 4(%edi), %edx #; load high 32-bits of *ptr -1: - movl %eax, %ebx - movl %edx, %ecx #; ebx:ecx := *ptr - addl 4+8(%esp), %ebx - adcl 8+8(%esp), %ecx #; ebx:ecx := *ptr + theAmount - lock - cmpxchg8b 0(%edi) #; CAS (eax:edx, ebx:ecx implicit) - jnz 1b #; - failure: eax:edx re-loaded, retry - #; - success: old value in eax:edx - popl %ebx - popl %edi - ret - -#;******************************************************* -#; SInt32 OSAddAtomic(SInt32 delta, SInt32 *address) -#;******************************************************* - - .globl _OSAddAtomic - .globl _OSAddAtomicLong -_OSAddAtomic: -_OSAddAtomicLong: - movl 4(%esp), %eax #; Load addend - movl 8(%esp), %ecx #; Load address of operand - lock - xaddl %eax, 0(%ecx) #; Atomic exchange and add - ret diff --git a/libkern/kmod/cplus_start.c b/libkern/kmod/cplus_start.c index 1a2f3b9a0..eb77e72a9 100644 --- a/libkern/kmod/cplus_start.c +++ b/libkern/kmod/cplus_start.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000,2008-2009 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000,2008-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -46,59 +46,3 @@ /* The following preprocessor test must match exactly with the architectures * that define the CONFIG_STATIC_CPPINIT config option. */ -#if __i386__ - -#include -#include - -asm(".constructors_used = 0"); -asm(".private_extern .constructors_used"); - -// This global symbols will be defined by CreateInfo script's info.c file. -extern kmod_start_func_t *_realmain; -extern kmod_info_t KMOD_INFO_NAME; - -// Functions defined in libkern/c++/OSRuntime.cpp -extern kern_return_t OSRuntimeInitializeCPP(kmod_info_t *ki, void *data); -extern kern_return_t OSRuntimeFinalizeCPP(kmod_info_t *ki, void *data); - -/********************************************************************* -*********************************************************************/ -__private_extern__ kern_return_t _start(kmod_info_t *ki, void *data) -{ - kern_return_t result = OSRuntimeInitializeCPP(ki, data); - - if ((result == KERN_SUCCESS) && _realmain) { - result = (*_realmain)(ki, data); - - /* If _realmain failed, tear down C++. - */ - if (result != KERN_SUCCESS) { - (void)OSRuntimeFinalizeCPP(ki, data); - } - } - - return result; -} - -/********************************************************************* -*********************************************************************/ -__private_extern__ const char * OSKextGetCurrentIdentifier(void) -{ - return KMOD_INFO_NAME.name; -} - -/********************************************************************* -*********************************************************************/ -__private_extern__ const char * OSKextGetCurrentVersionString(void) -{ - return KMOD_INFO_NAME.version; -} - -/********************************************************************* -*********************************************************************/ -__private_extern__ OSKextLoadTag OSKextGetCurrentLoadTag(void) -{ - return (OSKextLoadTag)KMOD_INFO_NAME.id; -} -#endif diff --git a/libkern/kmod/cplus_stop.c b/libkern/kmod/cplus_stop.c index 2b2bdc688..eb77e72a9 100644 --- a/libkern/kmod/cplus_stop.c +++ b/libkern/kmod/cplus_stop.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000,2008-2009 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000,2008-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -46,31 +46,3 @@ /* The following preprocessor test must match exactly with the architectures * that define the CONFIG_STATIC_CPPINIT config option. */ -#if __i386__ - -#include - -asm(".destructors_used = 0"); -asm(".private_extern .destructors_used"); - -// Functions defined in libkern/c++/OSRuntime.cpp -extern kern_return_t OSRuntimeFinalizeCPP(kmod_info_t *ki, void *data); - -// This global symbols will be defined by CreateInfo script's info.c file. -extern kmod_stop_func_t *_antimain; - -__private_extern__ kern_return_t _stop(kmod_info_t *ki, void *data) -{ - kern_return_t result = KERN_SUCCESS; - - if (_antimain) { - result = (*_antimain)(ki, data); - } - - if (result == KERN_SUCCESS) { - result = OSRuntimeFinalizeCPP(ki, data); - } - - return result; -} -#endif diff --git a/libkern/kxld/Makefile b/libkern/kxld/Makefile index 8c322053c..6b9ec5a38 100644 --- a/libkern/kxld/Makefile +++ b/libkern/kxld/Makefile @@ -31,7 +31,7 @@ endif ifdef RC_CFLAGS ARCHS=$(addprefix -arch , $(RC_ARCHS)) else -ARCHS=-arch i386 -arch x86_64 +ARCHS=-arch x86_64 RC_CFLAGS=$(ARCHS) -pipe endif ifdef INSTALL_LOCATION @@ -57,6 +57,7 @@ TESTDST=./BUILD/tests # Flags SDKROOT ?= / +DEFINES = -DPRIVATE CFLAGS=-std=c99 -Wall -Wextra -Werror -pedantic -Wformat=2 -Wcast-align \ -Wwrite-strings -Wshorten-64-to-32 -Wshadow -Winit-self -Wpointer-arith \ -Wno-format-y2k -W -Wstrict-prototypes -Wmissing-prototypes -Wreturn-type \ @@ -65,7 +66,7 @@ CFLAGS=-std=c99 -Wall -Wextra -Werror -pedantic -Wformat=2 -Wcast-align \ LDFLAGS=$(ARCHS) -dynamiclib -install_name $(LIBKXLD_INSTALLNAME) \ -compatibility_version $(COMPATIBILITY_VERSION) \ -current_version $(CURRENT_VERSION) -lstdc++ -INCLUDES=-I$(HDRSRC) -I$(EXTHDRSRC) +INCLUDES=-I$(HDRSRC) -isystem $(EXTHDRSRC) ifneq ($(SDKROOT),/) CFLAGS += -isysroot $(SDKROOT) @@ -79,11 +80,11 @@ LIBTOOL = xcrun -sdk $(SDKROOT) libtool STRIP = xcrun -sdk $(SDKROOT) strip # Files -HDR_NAMES=kxld.h kxld_types.h WKdm.h +HDR_NAMES=kxld.h kxld_types.h OBJ_NAMES=kxld.o kxld_array.o kxld_copyright.o kxld_demangle.o kxld_dict.o \ kxld_kext.o kxld_object.o kxld_reloc.o kxld_sect.o kxld_seg.o \ kxld_srcversion.o kxld_sym.o kxld_symtab.o kxld_util.o kxld_uuid.o \ - kxld_vtable.o kxld_versionmin.o WKdmCompress.o WKdmDecompress.o + kxld_vtable.o kxld_versionmin.o HDRS=$(addprefix $(HDRSRC)/, $(HDR_NAMES)) OBJS=$(addprefix $(OBJROOT)/, $(OBJ_NAMES)) @@ -93,9 +94,9 @@ $(shell [ -d $(OBJROOT) ] || mkdir -p $(OBJROOT)) # Implicit rules %.o : %.c $(OBJROOT)/%.o : $(OBJSRC)/%.c - $(CC) $(RC_CFLAGS) $(CFLAGS) $(OPTIM) $(INCLUDES) -c $< -o $@ + $(CC) $(RC_CFLAGS) $(CFLAGS) $(DEFINES) $(OPTIM) $(INCLUDES) -c $< -o $@ $(OBJROOT)/%.o : $(TESTSRC)/%.c - $(CC) $(RC_CFLAGS) $(CFLAGS) -O0 -DDEBUG $(INCLUDES) -I $(SRCROOT) -c $< -o $@ + $(CC) $(RC_CFLAGS) $(CFLAGS) $(DEFINES) -O0 -DDEBUG $(INCLUDES) -I $(SRCROOT) -c $< -o $@ SRCROOTESC=$(subst /,\/,$(SRCROOT)) OBJROOTESC=$(subst /,\/,$(OBJROOT)) diff --git a/libkern/kxld/i386/WKdmCompress.s b/libkern/kxld/i386/WKdmCompress.s deleted file mode 100644 index f7d98440c..000000000 --- a/libkern/kxld/i386/WKdmCompress.s +++ /dev/null @@ -1,597 +0,0 @@ -// $Id: WKdmCompress.intel.s,v 1.1 2010/01/28 22:33:24 cclee Exp cclee $ -// -// This file contains i386 and x86_64 (no SSE) optimized implementation of WKdm Compressor. The function prototype is -// -// unsigned int WKdm_compress (WK_word* src_buf, WK_word* dest_buf, unsigned int num_input_words); -// -// The implementation assumes the input buffer is a memory page (4096 bytes or 1024 words), or something less than 4KB. -// -// WKdm Compression algorithm is briefly stated as follows: -// -// There is a dynamically updated dictionary of 16 words, each initialized with "1". -// -// the dictionary is indexed as follows, -// 0, x = input_word -// 1, hash_index = (x>>10)&255 -// 2, dict_location = &dictionary[hash_index] -// 3, dict_word = *dict_location -// -// Sequentially for each input word, it is classified/tagged into 4 classes -// 0 : if the input word is 0 -// 1 : the higher 22 bits of the input word is identically to the higher bits from the dictionary (hash table indexed) -// 2 : the above condition (partially 22 higher bits matched) is not met, a dictionary miss condition -// 3 : the input word is exactly matched to the word from the dictionary (hash table index) -// -// after each input word is classified, each tag is represented by 2 bits. Furthermore, for each class -// 0 : no further info is needed -// 1 : the hash_index is represented by 4-bits (8 packed into a word), -// the lower 10-bits is sent to the decompressor (3 packed into a word) -// 2 : the 32-bit word is sent to the decompressor -// 3 : the hash_index is represented by 4-bits (8 packed into a word) -// -// for classes 1 and 2, the input word is used to update the dictionary after it is classified/tagged -// -// the following implementation was started from compiling (gcc -O3) the original C code (WKdmCompress.c) -// and then subsequentially improved and documented. -// For i386, it speeds up ~ 1.5 times -// For x86_64, it speeds up ~ 1.3 times -// -// cclee, 1/28/10 - -#if !(defined __i386__ || defined __x86_64__) - -typedef char DummyDefinition; - -#else // i386 or x86_64 architectures - -#if defined __i386__ // 32-bit implementation - - .text - .align 4,0x90 - -.globl _WKdm_compress -_WKdm_compress: - - pushl %ebp - movl %esp, %ebp - - pushl %edi - pushl %esi - pushl %ebx - - // allocate stack memory for local variables - - subl $6316, %esp - - leal _hashLookupTable, %ebx // hashTable - - movl 8(%ebp), %edx // %edx = src_buf - movl 12(%ebp), %esi // %esi = dest_buf - movl 16(%ebp), %eax // %eax = num_input_words - - leal -1112(%ebp), %ecx // tempTagsArray - movl %ecx, -6272(%ebp) // a copy of char* next_tag = (char *) tempTagsArray; - - leal -2136(%ebp), %ecx // tempQPosArray - movl %ecx, -6264(%ebp) // char* next_qp = (char *) tempQPosArray; - movl %ecx, -6252(%ebp) - - leal (%edx,%eax,4), %ecx // src_buf + num_input_words*4 - movl %ecx, -6244(%ebp) // end_of_input = src_buf + num_input_words; - - // PRELOAD_DICTIONARY; - movl $1, -88(%ebp) - movl $1, -84(%ebp) - movl $1, -80(%ebp) - movl $1, -76(%ebp) - movl $1, -72(%ebp) - movl $1, -68(%ebp) - movl $1, -64(%ebp) - movl $1, -60(%ebp) - movl $1, -56(%ebp) - movl $1, -52(%ebp) - movl $1, -48(%ebp) - movl $1, -44(%ebp) - movl $1, -40(%ebp) - movl $1, -36(%ebp) - movl $1, -32(%ebp) - movl $1, -28(%ebp) - - shrl $4, %eax // (num_input_words / 16) - leal 16(%esi,%eax,4), %eax // dest_buf + [TAGS_AREA_OFFSET + (num_input_words / 16)]*4 - movl %eax, -6256(%ebp) // next_full_patt = dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16); - - leal -6232(%ebp), %eax // &tempLowBitsArray[0] - movl %eax, -6260(%ebp) // save a copy of &tempLowBitsArray[0] - movl %eax, -6248(%ebp) // save a copy of &tempLowBitsArray[0] - - cmpl %ecx, %edx // next_input_word (%edx) vs end_of_input (%ecx) - jae L_done_search // if (next_input_word >= end_of_input) skip the following search loop - - leal -1111(%ebp), %esi // &next_tag[1] - leal -88(%ebp), %ebp // dictionary - - movl %edx, %edi // next_input_word - - #define next_input_word %edi - #define dictionary %ebp - #define next_tag %esi - - jmp L5 - - .align 4,0x90 -L_RECORD_ZERO: - movb $0, -1(next_tag) // *next_tag = ZERO; -L8: - addl $4, next_input_word // next_input_word++; - incl next_tag // next_tag++ - cmpl next_input_word, 84(%esp) // end_of_input vs next_input_word - jbe L_done_search // if (next_input_word>=end_of_input), skip to L_done_search -L5: - movl (next_input_word), %ecx // input_word = *next_input_word; - movl %ecx, %eax // a copy of input_word - testl %ecx, %ecx // input_word - je L_RECORD_ZERO // if (input_word==0) RECORD_ZERO - shrl $10, %eax // input_high_bits = HIGH_BITS(input_word); - movl %eax, (%esp) // save a copy of input_high_bits; - andl $255, %eax // 8 bits index to Hash Table - movsbl (%ebx,%eax),%edx // HASH_TO_DICT_BYTE_OFFSET(input_word) - addl dictionary, %edx // ((char*) dictionary) + HASH_TO_DICT_BYTE_OFFSET(input_word)); - movl (%edx), %eax // dict_word = *dict_location; - cmpl %eax, %ecx // cmp input_word vs dict_word - je L_RECORD_EXACT - shrl $10, %eax // HIGH_BITS(dict_word) - cmpl %eax, (%esp) // input_high_bits vs HIGH_BITS(dict_word) - je L_RECORD_PARTIAL // if (input_high_bits == HIGH_BITS(dict_word)) RECORD_PARTIAL - -L_RECORD_MISS: - movb $2, -1(next_tag) // *next_tag = 2 for miss - movl 72(%esp), %eax // next_full_patt - movl %ecx, (%eax) // *next_full_patt = input_word; - addl $4, %eax // next_full_patt++; - movl %eax, 72(%esp) // save next_full_patt - movl %ecx, (%edx) // *dict_location = input_word - jmp L8 - - .align 4,0x90 -L_RECORD_EXACT: - movb $3, -1(next_tag) // *next_tag = 3 for exact - subl dictionary, %edx // dict_location - dictionary - sarl $2, %edx // divide by 4 for word offset - movl 76(%esp), %eax // next_qp - movb %dl, (%eax) // *next_qp = word offset (4-bit) - incl %eax // next_qp++ - movl %eax, 76(%esp) // save next_qp - jmp L8 - -L_done_search: - - // restore %ebp as normal use (was used as dictionary) - movl %esp, %ebp - addl $6328, %ebp - - // SET_QPOS_AREA_START(dest_buf,next_full_patt); - movl -6256(%ebp), %edi // next_full_patt - subl 12(%ebp), %edi // next_full_patt - dest_buf - movl %edi, %eax // next_full_patt - dest_buf - sarl $2, %eax // in 4-byte words - movl %eax, -6240(%ebp) // save (next_full_patt - dest_buf) in words - movl 12(%ebp), %edx // dest_buf - movl %eax, 4(%edx) // dest_buf[1] = next_full_patt - dest_buf - - movl -6272(%ebp), %ecx // &tempTagsArray[0] - decl next_tag - cmpl next_tag, %ecx // next_tag vs &tempTagsArray[0] - jae L13 // if &tempTagsArray[0] >= next_tag, skip the following WK_pack_2bits - - movl %edx, %ebx // a copy of dest_buf - - // boundary_tmp = WK_pack_2bits(tempTagsArray, (WK_word *) next_tag, dest_buf + HEADER_SIZE_IN_WORDS); - - .align 4,0x90 -L_WK_pack_2bits: - movl 4(%ecx), %eax // w1 - sall $2, %eax // w1 << 2 - movl 8(%ecx), %edx // w2 - sall $4, %edx // w2 << 4 - orl %edx, %eax // (w1<<2) | (w2<<4) - orl (%ecx), %eax // (w0) | (w1<<2) | (w2<<4) - movl 12(%ecx), %edx // w3 - sall $6, %edx // (w3<<6) - orl %edx, %eax // (w0) | (w1<<2) | (w2<<4) | (w3<<6) - movl %eax, 16(%ebx) // save at *(dest_buf + HEADER_SIZE_IN_WORDS) - addl $16, %ecx // tempTagsArray += 16; - addl $4, %ebx // dest_buf += 4; - cmpl %ecx, next_tag // cmp next_tag vs dest_buf - ja L_WK_pack_2bits // if (next_tag > dest_buf) repeat L_WK_pack_2bits - - /* Pack the queue positions into the area just after the full words. */ -L13: - movl -6252(%ebp), %eax // next_qp - movl -6264(%ebp), %ecx // (char *) tempQPosArray - movl %eax, %esi // next_qp - subl %ecx, %eax // num_bytes_to_pack = next_qp - (char *) tempQPosArray; - addl $7, %eax // num_bytes_to_pack + 7 - andl $-8, %eax // clear lower 3 bits, (num_packed_words<<3) - addl %eax, %ecx // endQPosArray = tempQPosArray + num_source_words; - cmpl %ecx, %esi // next_qp vs endQPosArray - jae L16 - .align 4,0x90 -L30: - movb $0, (%esi) // *next_qp = 0; - incl %esi // next_qp++ - cmpl %ecx, %esi // next_qp vs endQPosArray - jne L30 // - -L16: - movl -6256(%ebp), %ebx // next_full_patt - cmpl -6264(%ebp), %ecx // endQPosArray vs tempQPosArray - jbe L20 // if (endQPosArray<=tempQPosArray) skip L_WK_pack_4bits - movl -6264(%ebp), %edx // tempQPosArray - - - // boundary_tmp = WK_pack_4bits(tempQPosArray, endQPosArray, next_full_patt); - - .align 4,0x90 -L21: - movl 4(%edx), %eax // src_next[1] - sall $4, %eax // (src_next[1] << 4) - orl (%edx), %eax // temp = src_next[0] | (src_next[1] << 4) - movl %eax, (%ebx) // dest_next[0] = temp; - addl $4, %ebx // dest_next++; - addl $8, %edx // src_next += 2; - cmpl %edx, %ecx // source_end vs src_next - ja L21 // while (src_next < source_end) repeat the loop - - movl %ebx, %edi // boundary_tmp - - subl 12(%ebp), %edi // boundary_tmp - dest_buf - movl %edi, %eax // boundary_tmp - dest_buf - sarl $2, %eax // translate into word offset - - movl %eax, -6240(%ebp) // save (next_full_patt - dest_buf) in words - -L20: - // SET_LOW_BITS_AREA_START(dest_buf,boundary_tmp); - movl -6240(%ebp), %ecx // boundary_tmp - dest_buf - movl 12(%ebp), %edx // dest_buf - movl %ecx, 8(%edx) // dest_buf[2] = boundary_tmp - dest_buf - - movl -6260(%ebp), %ecx // tempLowBitsArray - movl -6248(%ebp), %edx // next_low_bits - subl %ecx, %edx // next_low_bits - tempLowBitsArray - sarl $2, %edx // num_tenbits_to_pack - - subl $3, %edx // pre-decrement num_tenbits_to_pack by 3 - jl 1f // if num_tenbits_to_pack < 3, skip the following loop - .align 4,0x90 -0: - movl 4(%ecx), %eax // w1 - sall $10, %eax // w1<<10 - movl 8(%ecx), %esi // w2 - sall $20, %esi // w2<<20 - orl %esi, %eax // (w1<<10) | (w2<<20) - orl (%ecx), %eax // (w0) | (w1<<10) | (w2<<20) - movl %eax, (%ebx) // pack w0,w1,w2 into 1 dest_buf word - addl $4, %ebx // dest_buf++ - addl $12, %ecx // next w0/w1/w2 triplet - subl $3, %edx // num_tenbits_to_pack-=3 - jge 0b // if no less than 3 elements, back to loop head - -1: addl $3, %edx // post-increment num_tenbits_to_pack by 3 - je 3f // if num_tenbits_to_pack is a multiple of 3, skip the following - movl (%ecx), %eax // w0 - subl $1, %edx // num_tenbits_to_pack -- - je 2f // - movl 4(%ecx), %esi // w1 - sall $10, %esi // w1<<10 - orl %esi, %eax -2: - movl %eax, (%ebx) // write the final dest_buf word - addl $4, %ebx // dest_buf++ -3: - movl %ebx, %eax // boundary_tmp - subl 12(%ebp), %eax // boundary_tmp - dest_buf - sarl $2, %eax // boundary_tmp - dest_buf in terms of words - movl 12(%ebp), %esi // dest_buf - movl %eax, 12(%esi) // SET_LOW_BITS_AREA_END(dest_buf,boundary_tmp); - sall $2, %eax // boundary_tmp - dest_buf in terms of bytes - addl $6316, %esp // pop out stack memory - popl %ebx - popl %esi - popl %edi - leave - ret - - .align 4,0x90 - -L_RECORD_PARTIAL: - movb $1, -1(next_tag) // *next_tag = 1 for partial matched - movl %edx, %eax // dict_location - subl dictionary, %eax // %eax = dict_location - dictionary - movl %ecx, (%edx) // *dict_location = input_word; - sarl $2, %eax // offset in 32-bit word - movl 76(%esp), %edx // next_qp - movb %al, (%edx) // update *next_qp - incl %edx // next_qp++ - movl %edx, 76(%esp) // save next_qp - movl %ecx, %eax // a copy of input_word - andl $1023, %eax // lower 10 bits - movl 80(%esp), %edx // next_low_bits - movl %eax, (%edx) // EMIT_WORD(next_low_bits,(low_bits_pattern)) - addl $4, %edx // next_low_bits++ - movl %edx, 80(%esp) // save next_low_bits - jmp L8 - -#endif // i386 architectures - -#if defined __x86_64__ // 64-bit implementation - .text - .align 4,0x90 - -.globl _WKdm_compress -_WKdm_compress: - pushq %rbp - movq %rsp, %rbp - pushq %r15 - pushq %r14 - pushq %r13 - pushq %r12 - pushq %rbx - subq $6112, %rsp - - #define tempTagsArray -6264(%rbp) - #define tempLowBitsArray -6272(%rbp) - #define next_tag %r8 - #define next_input_word %rdi - #define end_of_input %r13 - #define next_full_patt %rbx - #define dict_location %rcx - #define next_qp %r10 - #define dictionary %r11 - #define dest_buf %r12 - #define hashTable %r14 - #define tempQPosArray %r15 - #define next_low_bits %rsi - - movq %rsi, %r12 // dest_buf - - leaq -1136(%rbp), %rax // &tempTagsArray[0] - movq %rax, tempTagsArray - leaq 1(%rax), next_tag // next_tag always points to the one following the current tag - - leaq -2160(%rbp), %r15 // &tempQPosArray[0] - movq %r15, next_qp // next_qp - - mov %edx, %eax // num_input_words - leaq (%rdi,%rax,4), end_of_input // end_of_input = src_buf + num_input_words - - // PRELOAD_DICTIONARY; - movl $1, -112(%rbp) - movl $1, -108(%rbp) - movl $1, -104(%rbp) - movl $1, -100(%rbp) - movl $1, -96(%rbp) - movl $1, -92(%rbp) - movl $1, -88(%rbp) - movl $1, -84(%rbp) - movl $1, -80(%rbp) - movl $1, -76(%rbp) - movl $1, -72(%rbp) - movl $1, -68(%rbp) - movl $1, -64(%rbp) - movl $1, -60(%rbp) - movl $1, -56(%rbp) - movl $1, -52(%rbp) - - shrl $4, %edx // (num_input_words / 16) - mov %edx, %edx // sign extension into quad word - leaq 16(%rsi,%rdx,4), %rbx // dest_buf + [TAGS_AREA_OFFSET + (num_input_words / 16)]*4 - - leaq -6256(%rbp), %rax // &tempLowBitsArray[0] - movq %rax, tempLowBitsArray // save for later reference - movq %rax, next_low_bits // next_low_bits - - cmpq end_of_input, next_input_word // next_input_word vs end_of_input - jae L_done_search // if (next_input_word>=end_of_input) no work to do in search - leaq -112(%rbp), dictionary // dictionary - leaq _hashLookupTable(%rip), hashTable // hash look up table - jmp L5 - - .align 4,0x90 -L_RECORD_ZERO: - movb $0, -1(next_tag) // *next_tag = ZERO; -L8: - addq $4, next_input_word // next_input_word++; - incq next_tag // next_tag++ - cmpq next_input_word, end_of_input // end_of_input vs next_input_word - jbe L_done_search -L5: - movl (next_input_word), %edx // input_word = *next_input_word; - movl %edx, %r9d // a copy of input_word - testl %edx, %edx // input_word - je L_RECORD_ZERO // if (input_word==0) RECORD_ZERO - shrl $10, %r9d // input_high_bits = HIGH_BITS(input_word); - movzbl %r9b, %eax // 8-bit index to the Hash Table - movsbq (hashTable,%rax),%rax // HASH_TO_DICT_BYTE_OFFSET(input_word) - leaq (dictionary, %rax), dict_location // ((char*) dictionary) + HASH_TO_DICT_BYTE_OFFSET(input_word)); - movl (dict_location), %eax // dict_word = *dict_location; - cmpl %eax, %edx // dict_word vs input_word - je L_RECORD_EXACT // if identical, RECORD_EXACT - shrl $10, %eax // HIGH_BITS(dict_word) - cmpl %eax, %r9d // input_high_bits vs HIGH_BITS(dict_word) - je L_RECORD_PARTIAL // if identical, RECORD_PARTIAL - -L_RECORD_MISS: - movb $2, -1(next_tag) // *next_tag = 2 for miss - movl %edx, (next_full_patt) // *next_full_patt = input_word; - addq $4, next_full_patt // next_full_patt++ - movl %edx, (dict_location) // *dict_location = input_word - addq $4, next_input_word // next_input_word++ - incq next_tag // next_tag++ - cmpq next_input_word, end_of_input // end_of_input vs next_input_word - ja L5 // if (end_of_input>next_input_word) repeat from L5 - -L_done_search: - - // SET_QPOS_AREA_START(dest_buf,next_full_patt); - //movq next_full_patt, %r11 // next_full_patt - movq next_full_patt, %rax // next_full_patt - subq dest_buf, %rax // next_full_patt - dest_buf - sarq $2, %rax // offset in 4-bytes - movl %eax, %r13d // r13d = (next_full_patt - dest_buf) - movl %eax, 4(dest_buf) // dest_buf[1] = next_full_patt - dest_buf - - decq next_tag - cmpq next_tag, tempTagsArray // &tempTagsArray[0] vs next_tag - jae L13 // if (&tempTagsArray[0] >= next_tag), skip the following - - // boundary_tmp = WK_pack_2bits(tempTagsArray, (WK_word *) next_tag, dest_buf + HEADER_SIZE_IN_WORDS); - - movq dest_buf, %rdi // dest_buf - movq tempTagsArray, %rcx // &tempTagsArray[0] - - .align 4,0x90 -L_pack_2bits: - movl 4(%rcx), %eax // w1 - sall $2, %eax // w1 << 2 - movl 8(%rcx), %edx // w2 - sall $4, %edx // w2 << 4 - orl %edx, %eax // (w1<<2) | (w2<<4) - orl (%rcx), %eax // (w0) | (w1<<2) | (w2<<4) - movl 12(%rcx), %edx // w3 - sall $6, %edx // w3 << 6 - orl %edx, %eax // (w0) | (w1<<2) | (w2<<4) | (w3<<6) - movl %eax, 16(%rdi) // save at *(dest_buf + HEADER_SIZE_IN_WORDS) - addq $16, %rcx // tempTagsArray += 16; - addq $4, %rdi // dest_buf += 4; - cmpq %rcx, next_tag // cmp next_tag vs dest_buf - ja L_pack_2bits // if (next_tag > dest_buf) repeat L_pack_2bits - - /* Pack the queue positions into the area just after the full words. */ - -L13: - movl %r10d, %eax // next_qp - subl %r15d, %eax // num_bytes_to_pack = next_qp - (char *) tempQPosArray; - addl $7, %eax // num_bytes_to_pack+7 - shrl $3, %eax // num_packed_words = (num_bytes_to_pack + 7) >> 3 - addl %eax, %eax // num_source_words = num_packed_words * 2; - mov %eax, %eax - leaq (tempQPosArray,%rax,4), %rcx // endQPosArray = tempQPosArray + num_source_words - cmpq %rcx, %r10 // next_qp vs endQPosArray - jae L16 // if (next_qp >= endQPosArray) skip the following zero paddings - .align 4,0x90 -L30: - movb $0, (next_qp) // *next_qp = 0 - incq next_qp // next_qp++ - cmpq %rcx, next_qp // next_qp vs endQPosArray - jne L30 // repeat while next_qp < endQPosArray -L16: - movq %rbx, %rdi // next_full_patt - cmpq tempQPosArray, %rcx // endQPosArray vs tempQPosArray - jbe L20 // if (endQPosArray <= tempQPosArray) skip the following - movq tempQPosArray, %rdx // tempQPosArray - - .align 4,0x90 -L_pack_4bits: - movl 4(%rdx), %eax // src_next[1] - sall $4, %eax // (src_next[1] << 4) - orl (%rdx), %eax // temp = src_next[0] | (src_next[1] << 4) - movl %eax, (%rdi) // dest_next[0] = temp; - addq $4, %rdi // dest_next++; - addq $8, %rdx // src_next += 2; - cmpq %rdx, %rcx // source_end vs src_next - ja L_pack_4bits // while (src_next < source_end) repeat the loop - - // SET_LOW_BITS_AREA_START(dest_buf,boundary_tmp); - //movq %rdi, %r11 // boundary_tmp - movq %rdi, %rax // boundary_tmp - subq dest_buf, %rax // boundary_tmp - dest_buf - movq %rax, %r13 // boundary_tmp - dest_buf - shrq $2, %r13 // boundary_tmp - dest_buf in words -L20: - movl %r13d, 8(dest_buf) // dest_buf[2] = boundary_tmp - dest_buf - - movq tempLowBitsArray, %rcx // tempLowBitsArray - movq next_low_bits, %rbx // next_low_bits - subq %rcx, %rbx // next_low_bits - tempLowBitsArray (in bytes) - sarq $2, %rbx // num_tenbits_to_pack (in words) - - #define size %ebx - - subl $3, size // pre-decrement num_tenbits_to_pack by 3 - jl 1f // if num_tenbits_to_pack < 3, skip the following loop - - .align 4,0x90 -0: - movl 4(%rcx), %eax // w1 - sall $10, %eax // w1 << 10 - movl 8(%rcx), %edx // w2 - sall $20, %edx // w2 << 20 - orl %edx, %eax // (w1<<10) | (w2<<20) - orl (%rcx), %eax // (w0) | (w1<<10) | (w2<<20) - movl %eax, (%rdi) // pack w0,w1,w2 into 1 dest_buf word - addq $4, %rdi // dest_buf++ - addq $12, %rcx // next w0/w1/w2 triplet - subl $3, size // num_tenbits_to_pack-=3 - jge 0b // if no less than 3 elements, back to loop head - -1: addl $3, size // post-increment num_tenbits_to_pack by 3 - je 3f // if num_tenbits_to_pack is a multiple of 3, skip the following - movl (%rcx), %eax // w0 - subl $1, size // num_tenbits_to_pack-- - je 2f // - movl 4(%rcx), %edx // w1 - sall $10, %edx // w1 << 10 - orl %edx, %eax // w0 | (w1<<10) - -2: movl %eax, (%rdi) // write the final dest_buf word - addq $4, %rdi // dest_buf++ - -3: movq %rdi, %rax // boundary_tmp - subq dest_buf, %rax // boundary_tmp - dest_buf - shrq $2, %rax // boundary_tmp - dest_buf in terms of words - movl %eax, 12(dest_buf) // SET_LOW_BITS_AREA_END(dest_buf,boundary_tmp) - shlq $2, %rax // boundary_tmp - dest_buf in terms of bytes - - // restore registers and return - addq $6112, %rsp - popq %rbx - popq %r12 - popq %r13 - popq %r14 - popq %r15 - leave - ret - - .align 4,0x90 -L_RECORD_EXACT: - movb $3, -1(next_tag) // *next_tag = 3 for exact - subq dictionary, %rcx // dict_location - dictionary - sarq $2, %rcx // divide by 4 for word offset - movb %cl, (next_qp) // *next_qp = word offset (4-bit) - incq next_qp // next_qp++ - jmp L8 - - .align 4,0x90 -L_RECORD_PARTIAL: - movb $1, -1(next_tag) // *next_tag = 1 for partial matched - movq %rcx, %rax // dict_location - subq dictionary, %rax // dict_location - dictionary - movl %edx, (%rcx) // *dict_location = input_word; - sarq $2, %rax // offset in 32-bit word - movb %al, (next_qp) // update *next_qp - incq next_qp // next_qp++ - andl $1023, %edx // lower 10 bits - movl %edx, (next_low_bits) // save next_low_bits - addq $4, next_low_bits // next_low_bits++ - jmp L8 - - // for some reason, keeping the following never executed code yields a better performance -L41: - leaq -6256(%rbp), %rax - movq %rax, -6272(%rbp) - movq %rax, %rsi - jmp L_done_search -#endif // x86_64 architectures -#endif // i386 or x86_64 architectures diff --git a/libkern/kxld/i386/WKdmDecompress.s b/libkern/kxld/i386/WKdmDecompress.s deleted file mode 100644 index c2e6e9345..000000000 --- a/libkern/kxld/i386/WKdmDecompress.s +++ /dev/null @@ -1,675 +0,0 @@ -// $Id: WKdmDecompress.intel.s,v 1.1 2010/01/30 00:39:21 cclee Exp cclee $ - -// This file contains i386 and x86_64 (no SSE) optimized implementation of WKdm Decompressor. -// The implementation is derived by compiling (gcc -O3) the original C code (WKdmDecompress.c) -// followed by hand tweaking of the compiled assembly code. -// cclee, 1/29/10 - -#if defined __i386__ - .text - .align 4,0x90 - - .globl _WKdm_decompress -_WKdm_decompress: - - // save registers, set up base pointer %ebp, and allocate stack memory for local veriables - - pushl %ebp - movl %esp, %ebp - pushl %edi - pushl %esi - pushl %ebx - subl $7324, %esp - - // PRELOAD_DICTIONARY; dictionary starting address : -88(%ebp) - movl $1, -88(%ebp) - movl $1, -84(%ebp) - movl $1, -80(%ebp) - movl $1, -76(%ebp) - movl $1, -72(%ebp) - movl $1, -68(%ebp) - movl $1, -64(%ebp) - movl $1, -60(%ebp) - movl $1, -56(%ebp) - movl $1, -52(%ebp) - movl $1, -48(%ebp) - movl $1, -44(%ebp) - movl $1, -40(%ebp) - movl $1, -36(%ebp) - movl $1, -32(%ebp) - movl $1, -28(%ebp) - - #define dictionary_addr -88(%ebp) - #define TAGS_AREA_END -7292(%ebp) - #define tempTagsArray -7300(%ebp) - #define tempQPosArray -2488(%ebp) - #define tempLowBitsArray -7288(%ebp) - #define next_low_bits -7296(%ebp) - #define dictionary -7308(%ebp) - #define tag_area_end -7304(%ebp) - - // WK_unpack_2bits(TAGS_AREA_START(src_buf), TAGS_AREA_END(src_buf), tempTagsArray); - - movl 8(%ebp), %eax // src_buf - addl $272, %eax // src_buf + 16 (WKdm Header) + 256 (Tags) - movl %eax, TAGS_AREA_END // TAGS_AREA_END(src_buf) - movl 8(%ebp), %eax // src_buf - movl %eax, %edi // src_buf - addl $16, %eax // TAGS_AREA_START(src_buf) = src_buf + 16 (WKdm Header) - leal -1288(%ebp), %edx // tempTagsArray - movl %edx, tempTagsArray // save a copy of tempTagsArray[] at the said location - cmpl %eax, TAGS_AREA_END // TAGS_AREA_END vs TAGS_AREA_START - jbe 1f // if TAGS_AREA_END<=TAGS_AREA_START, no need for WK_unpack_2bits - movl %edx, %ecx // %ecx -> tempTagsArray[0] - xorl %esi, %esi // i=0 - movl $50529027, %ebx // 0x03030303, mask to extract 4 2-bit tags - .align 4,0x90 -L_WK_unpack_2bits: - movl 16(%edi,%esi,4), %edx // src_buf[i] for 16 tags, 16 (WKdm header) - movl %edx, %eax // w = src_buf[i] - andl %ebx, %eax // 1st 4 tags, each in bytes - movl %eax, (%ecx) // save 1st 4 tags - movl %edx, %eax // w = src_buf[i] - shrl $2, %eax // shift down 2 bits - andl %ebx, %eax // 2nd 4 tags, each in bytes - movl %eax, 4(%ecx) // save 2nd 4 tags - shrl $4, %edx // shift down w by 4 bits - movl %edx, %eax // w>>4 - andl %ebx, %eax // 3rd 4 tags - movl %eax, 8(%ecx) // save 3rd 4 tags - shrl $2, %edx // w>>6 - andl %ebx, %edx // 4th 4 tags - movl %edx, 12(%ecx) // save 4th 4 tags - addl $16, %ecx // point to next tempTagsArray[i*16] - incl %esi // i++ - cmpl $64, %esi // i vs 64 - jne L_WK_unpack_2bits // repeat the loop until i==64 -1: - - // WK_unpack_4bits(QPOS_AREA_START(src_buf), QPOS_AREA_END(src_buf), tempQPosArray); - - movl 8(%edi), %eax // WKdm header qpos end - leal (%edi,%eax,4), %esi // QPOS_AREA_END - movl 4(%edi), %eax // WKdm header qpos start - leal (%edi,%eax,4), %ecx // QPOS_AREA_START - cmpl %ecx, %esi // QPOS_AREA_END vs QPOS_AREA_START - jbe 1f // if QPOS_AREA_END <= QPOS_AREA_START, skip WK_unpack_4bits - leal tempQPosArray, %edi // tempQPosArray - movl $252645135, %ebx // 0x0f0f0f0f : mask to extract 4 4-bit qpos -L_WK_unpack_4bits: - movl (%ecx), %eax // w - movl %eax, %edx // w - andl %ebx, %edx // 1st 4 qpos - movl %edx, (%edi) // save 1st 4 qpos - shrl $4, %eax // w>>4 - andl %ebx, %eax // 2nd 4 qpos - movl %eax, 4(%edi) // save 2nd 4 qpos - addl $4, %ecx // point to next word w - addl $8, %edi // qpos += 8 - cmpl %ecx, %esi // QPOS_AREA_END vs qpos_pointer - ja L_WK_unpack_4bits // repeat until qpos_pointer >= QPOS_AREA_END - - // WK_unpack_3_tenbits(LOW_BITS_AREA_START(src_buf), LOW_BITS_AREA_END(src_buf), tempLowBitsArray); - -1: - movl 8(%ebp), %edx // src_buf - movl 12(%edx), %eax // LOW_BITS_AREA_END offset - leal (%edx,%eax,4), %edi // LOW_BITS_AREA_END - cmpl %edi, %esi // LOW_BITS_AREA_START(=QPOS_AREA_END) vs LOW_BITS_AREA_END - jae 1f // if (LOW_BITS_AREA_START>=LOW_BITS_AREA_END) skip unpack_3_tenbits - leal tempLowBitsArray, %ecx // tempLowBitsArray - movl $1023, %ebx // 0x03ff to extact lower 10-bits - - .align 4,0x90 -L_WK_unpack_3_tenbits: - movl (%esi), %eax // w = *next_low_bits - movl %eax, %edx // w - andl %ebx, %edx // 1st 10-bit - movl %edx, (%ecx) // save 1st 10-bit - shrl $10, %eax // (w>>10) - movl %eax, %edx // (w>>10) - andl %ebx, %edx // 2nd 10-bit - movl %edx, 4(%ecx) // save 2nd 10-bit - shrl $10, %eax // (w>>20), no need to and with mask, the top 2 bits should be zero - movl %eax, 8(%ecx) // save 3rd 10-bits - addl $4, %esi // point to next w - addl $12, %ecx // tempLowBitsArray += 3; - cmpl %esi, %edi // LOW_BITS_AREA_END vs next_low_bits - ja L_WK_unpack_3_tenbits // repeat until next_low_bits>=LOW_BITS_AREA_END -1: - call Lhash -Lhash: - popl %ebx // set up %ebx for use in Hash Table loopup[ - - #define next_tag %esi - #define next_qpos %edi - - movl tempTagsArray, next_tag // next_tag = tempTagsArray - leal tempQPosArray, next_qpos // next_qpos = tempQPosArray - movl 12(%ebp), %ecx // dest_buf - addl $4, %ecx // for some reason, performance is better if we points to the next one - leal tempLowBitsArray, %eax // tempLowBitsArray - movl %eax, next_low_bits // next_low_bits = next_low_bits; - leal -264(%ebp), %edx - movl %edx, tag_area_end // tag_area_end - leal dictionary_addr, %eax // dictionary starting address - movl %eax, dictionary // dictionary - jmp L11 - .align 4,0x90 -L29: - jle L_ZERO_TAG - cmpb $2, %al // MISS_TAG - je L_MISS_TAG -L_EXACT_TAG: - movsbl (next_qpos),%eax // qpos = *next_qpos - incl next_qpos // next_qpos++ - movl dictionary, %edx // dictionary - movl (%edx,%eax,4), %eax // w = dictionary[qpos] - movl %eax, -4(%ecx) // *dest_buf = w - .align 4,0x90 -L_next: - incl next_tag // next_tag++ - addl $4, %ecx // dest_buf++ - cmpl tag_area_end, next_tag // next_tag vs tag_area_end - jae L_done // if (next_tag>=tag_area_end) -L11: - movzbl (next_tag), %eax // tag = *next_tag - cmpb $1, %al // Partial match? - jne L29 -L_PARTIAL_TAG: - movsbl (next_qpos),%edx // qpos = *next_qpos - movl dictionary, %eax // dictionary - leal (%eax,%edx,4), %edx // dict_location = &dictionary[qpos] - movl %edx, -7324(%ebp) // save dict_location to release %edx - incl next_qpos // next_qpos++ - movl (%edx), %eax // read dictionary word - andl $-1024, %eax // keep only higher 22-bits - movl next_low_bits, %edx // low_bits = *next_low_bits - orl (%edx), %eax // construct the new partially matched word - addl $4, %edx // - movl %edx, next_low_bits // next_low_bits++ - movl -7324(%ebp), %edx // dict_location - movl %eax, (%edx) // update *dict_location with the newly constructed word - movl %eax, -4(%ecx) // *dest_buf = the newly constructed word - incl next_tag // next_tag++ - addl $4, %ecx // dest_buf++ - cmpl tag_area_end, next_tag // next_tag vs tag_area_end - jb L11 // if next_tag < tag_area_end, repeat the loop -L_done: - - // release stack memory, restore registers, and return - addl $7324, %esp - popl %ebx - popl %esi - popl %edi - leave - ret - - #define next_full_patt -7292(%ebp) /* next_full_patt starts with initial value of TAGS_AREA_END */ - - .align 4,0x90 -L_MISS_TAG: - movl next_full_patt, %edx // next_full_patt - movl (%edx), %eax // word = *next_full_patt - addl $4, %edx // next_full_patt++ - movl %edx, next_full_patt // save next_full_patt - movl %eax, %edx // word - shrl $10, %edx // word>>10 - andl $255, %edx // 8-bit hash table index - movsbl _hashLookupTable-Lhash(%ebx,%edx),%edx // qpos - movl %eax, -88(%ebp,%edx) // dictionary[qpos] = word - movl %eax, -4(%ecx) // *dest_buf = word - jmp L_next // repeat the loop - - .align 4,0x90 -L_ZERO_TAG: - movl $0, -4(%ecx) // *dest_buf = 0 - jmp L_next // repeat the loop - -#endif // __i386__ - -#if defined __x86_64__ - - - .text - .align 4,0x90 - - .globl _WKdm_decompress -_WKdm_decompress: - - // save registers, and allocate stack memory for local variables - - pushq %rbp - movq %rsp, %rbp - pushq %r12 - pushq %rbx - subq $7144, %rsp - - movq %rsi, %r12 // dest_buf - - // PRELOAD_DICTIONARY; dictionary starting address : starting address -80(%rpb) - movl $1, -80(%rbp) - movl $1, -76(%rbp) - movl $1, -72(%rbp) - movl $1, -68(%rbp) - movl $1, -64(%rbp) - movl $1, -60(%rbp) - movl $1, -56(%rbp) - movl $1, -52(%rbp) - movl $1, -48(%rbp) - movl $1, -44(%rbp) - movl $1, -40(%rbp) - movl $1, -36(%rbp) - movl $1, -32(%rbp) - movl $1, -28(%rbp) - movl $1, -24(%rbp) - movl $1, -20(%rbp) - - // WK_unpack_2bits(TAGS_AREA_START(src_buf), TAGS_AREA_END(src_buf), tempTagsArray); - leaq 272(%rdi), %r10 // TAGS_AREA_END - leaq 16(%rdi), %rax // TAGS_AREA_START - leaq -1280(%rbp), %rsi // tempTagsArray - cmpq %rax, %r10 // TAGS_AREA_END vs TAGS_AREA_START - jbe 1f // if TAGS_AREA_END <= TAGS_AREA_START, skip L_WK_unpack_2bits - movq %rsi, %rcx // next_word - xorl %r8d, %r8d // i = 0 - .align 4,0x90 -L_WK_unpack_2bits: - movl 16(%rdi,%r8,4), %edx // w = *next_word - movl %edx, %eax // w - andl $50529027, %eax // 1st 4 tags - movl %eax, (%rcx) // write 1st 4 tags - movl %edx, %eax // w - shrl $2, %eax // w>>2 - andl $50529027, %eax // 2nd 4 tags - movl %eax, 4(%rcx) // write 2nd 4 tags - shrl $4, %edx // w>>4 - movl %edx, %eax // w>>4 - andl $50529027, %eax // 3rd 4 tags - movl %eax, 8(%rcx) // write 3rd 4 tags - shrl $2, %edx // w>>6 - andl $50529027, %edx // 4th 4 tags - movl %edx, 12(%rcx) // write 4th 4 tags - addq $16, %rcx // next_tags += 16 - incq %r8 // i++ - cmpq $64, %r8 // i vs 64 - jne L_WK_unpack_2bits // repeat loop until i==64 -1: - - // WK_unpack_4bits(QPOS_AREA_START(src_buf), QPOS_AREA_END(src_buf), tempQPosArray); - - mov 8(%rdi), %eax // WKdm header qpos end - leaq (%rdi,%rax,4), %r9 // QPOS_AREA_END - mov 4(%rdi), %eax // WKdm header qpos start - leaq (%rdi,%rax,4), %r8 // QPOS_AREA_START - leaq -2480(%rbp), %rbx // tempQPosArray - cmpq %r8, %r9 // QPOS_AREA_END vs QPOS_AREA_START - jbe 1f // if QPOS_AREA_END <= QPOS_AREA_START, skip L_WK_unpack_4bits - leaq 8(%rbx), %rcx // next_qpos -L_WK_unpack_4bits: - movl (%r8), %eax // w = *next_word - movl %eax, %edx // w - andl $252645135, %edx // 1st 4 qpos - movl %edx, -8(%rcx) // write 1st 4 qpos - shrl $4, %eax // w>>4 - andl $252645135, %eax // 2nd 4 qpos - movl %eax, -4(%rcx) // write 2nd 4 qpos - addq $4, %r8 // next_word++ - addq $8, %rcx // next_qpos+=8 - cmpq %r8, %r9 // QPOS_AREA_END vs QPOS_AREA_START - ja L_WK_unpack_4bits // repeat loop until QPOS_AREA_END <= QPOS_AREA_START -1: - - // WK_unpack_3_tenbits(LOW_BITS_AREA_START(src_buf), LOW_BITS_AREA_END(src_buf), tempLowBitsArray); - - mov 12(%rdi), %eax // LOW_BITS_AREA_END offset - leaq (%rdi,%rax,4), %rdi // LOW_BITS_AREA_END - leaq -7280(%rbp), %r11 // tempLowBitsArray - cmpq %rdi, %r9 // LOW_BITS_AREA_START vs LOW_BITS_AREA_END - jae 1f // if START>=END, skip L_WK_unpack_3_tenbits - leaq 12(%r11), %rcx // next_low_bits -L_WK_unpack_3_tenbits: - movl (%r9), %eax // w = *next_word - movl %eax, %edx // w - andl $1023, %edx // 1st tenbits - movl %edx, -12(%rcx) // write 1st tenbits - shrl $10, %eax // w >> 10 - movl %eax, %edx // w >> 10 - andl $1023, %edx // 2nd tenbits - movl %edx, -8(%rcx) // write 2nd tenbits - shrl $10, %eax // w >> 20, 3rd tenbits - movl %eax, -4(%rcx) // write 3rd tenbits - addq $4, %r9 // next_word++ - addq $12, %rcx // next_low_bits += 3 - cmpq %r9, %rdi // LOW_BITS_AREA_END vs next_word - ja L_WK_unpack_3_tenbits // repeat loop if LOW_BITS_AREA_END > next_word -1: - movq %rsi, %rdi // next_tag - movq %rbx, %r8 // next_qpos - leaq 4(%r12), %rcx // dest_buf - movq %r11, %r9 // next_low_bits - leaq -80(%rbp), %r11 // dictionary - leaq _hashLookupTable(%rip), %rbx // hash look up table - leaq 1024(%rsi), %rsi // tag_area_end - - jmp L11 - .align 4,0x90 -L31: - jle L_ZERO_TAG - cmpb $2, %al // MISS_TAG - je L_MISS_TAG -L_EXACT_TAG: - movsbq (%r8),%rax // qpos = *next_qpos - incq %r8 // next_qpos++ - movl (%r11,%rax,4), %eax // w = dictionary[qpos] - movl %eax, -4(%rcx) // *dest_buf = w - .align 4,0x90 -L_next: - incq %rdi // next_tag++ - addq $4, %rcx // dest_buf++ - cmpq %rsi, %rdi // next_tag vs tag_area_end - jae L_done // if next_tag >= tag_area_end, we're done -L11: - movzbl (%rdi), %eax // tag = *next_tag - cmpb $1, %al // partial match tag ? - jne L31 -L_PARTIAL_TAG: - movsbq (%r8),%rdx // qpos = *next_qpos - leaq (%r11,%rdx,4), %rdx // dict_location = &dictionary[qpos] - incq %r8 // next_qpos++ - movl (%rdx), %eax // read dictionary word - andl $-1024, %eax // clear lower 10 bits - orl (%r9), %eax // pad the lower 10-bits from *next_low_bits - addq $4, %r9 // next_low_bits++ - movl %eax, (%rdx) // *dict_location = newly formed word - movl %eax, -4(%rcx) // *dest_buf = newly formed word - cmpq %rsi, %rdi // compare next_tag vs tag_area_end - jne L_next // repeat loop until next_tag==tag_area_end -L_done: - - // release stack memory, restore registers, and return - addq $7144, %rsp - popq %rbx - popq %r12 - leave - ret - - .align 4,0x90 -L_MISS_TAG: - movl (%r10), %eax // w = *next_full_patt - addq $4, %r10 // next_full_patt++ - movl %eax, %edx // w - shrl $10, %edx // w>>10 - movzbl %dl, %edx // 8-bit hash table index - movsbq (%rbx,%rdx),%rdx // qpos - movl %eax, -80(%rbp,%rdx) // dictionary[qpos] = word - movl %eax, -4(%rcx) // *dest_buf = word - jmp L_next // repeat the loop - - .align 4,0x90 -L_ZERO_TAG: - movl $0, -4(%rcx) // *dest_buf = 0 - jmp L_next // repeat the loop - -#endif // --X86_64__ - -.globl _hashLookupTable - .const - .align 5 -_hashLookupTable: - .byte 0 - .byte 52 - .byte 8 - .byte 56 - .byte 16 - .byte 12 - .byte 28 - .byte 20 - .byte 4 - .byte 36 - .byte 48 - .byte 24 - .byte 44 - .byte 40 - .byte 32 - .byte 60 - .byte 8 - .byte 12 - .byte 28 - .byte 20 - .byte 4 - .byte 60 - .byte 16 - .byte 36 - .byte 24 - .byte 48 - .byte 44 - .byte 32 - .byte 52 - .byte 56 - .byte 40 - .byte 12 - .byte 8 - .byte 48 - .byte 16 - .byte 52 - .byte 60 - .byte 28 - .byte 56 - .byte 32 - .byte 20 - .byte 24 - .byte 36 - .byte 40 - .byte 44 - .byte 4 - .byte 8 - .byte 40 - .byte 60 - .byte 32 - .byte 20 - .byte 44 - .byte 4 - .byte 36 - .byte 52 - .byte 24 - .byte 16 - .byte 56 - .byte 48 - .byte 12 - .byte 28 - .byte 16 - .byte 8 - .byte 40 - .byte 36 - .byte 28 - .byte 32 - .byte 12 - .byte 4 - .byte 44 - .byte 52 - .byte 20 - .byte 24 - .byte 48 - .byte 60 - .byte 56 - .byte 40 - .byte 48 - .byte 8 - .byte 32 - .byte 28 - .byte 36 - .byte 4 - .byte 44 - .byte 20 - .byte 56 - .byte 60 - .byte 24 - .byte 52 - .byte 16 - .byte 12 - .byte 12 - .byte 4 - .byte 48 - .byte 20 - .byte 8 - .byte 52 - .byte 16 - .byte 60 - .byte 24 - .byte 36 - .byte 44 - .byte 28 - .byte 56 - .byte 40 - .byte 32 - .byte 36 - .byte 20 - .byte 24 - .byte 60 - .byte 40 - .byte 44 - .byte 52 - .byte 16 - .byte 32 - .byte 4 - .byte 48 - .byte 8 - .byte 28 - .byte 56 - .byte 12 - .byte 28 - .byte 32 - .byte 40 - .byte 52 - .byte 36 - .byte 16 - .byte 20 - .byte 48 - .byte 8 - .byte 4 - .byte 60 - .byte 24 - .byte 56 - .byte 44 - .byte 12 - .byte 8 - .byte 36 - .byte 24 - .byte 28 - .byte 16 - .byte 60 - .byte 20 - .byte 56 - .byte 32 - .byte 40 - .byte 48 - .byte 12 - .byte 4 - .byte 44 - .byte 52 - .byte 44 - .byte 40 - .byte 12 - .byte 56 - .byte 8 - .byte 36 - .byte 24 - .byte 60 - .byte 28 - .byte 48 - .byte 4 - .byte 32 - .byte 20 - .byte 16 - .byte 52 - .byte 60 - .byte 12 - .byte 24 - .byte 36 - .byte 8 - .byte 4 - .byte 16 - .byte 56 - .byte 48 - .byte 44 - .byte 40 - .byte 52 - .byte 32 - .byte 20 - .byte 28 - .byte 32 - .byte 12 - .byte 36 - .byte 28 - .byte 24 - .byte 56 - .byte 40 - .byte 16 - .byte 52 - .byte 44 - .byte 4 - .byte 20 - .byte 60 - .byte 8 - .byte 48 - .byte 48 - .byte 52 - .byte 12 - .byte 20 - .byte 32 - .byte 44 - .byte 36 - .byte 28 - .byte 4 - .byte 40 - .byte 24 - .byte 8 - .byte 56 - .byte 60 - .byte 16 - .byte 36 - .byte 32 - .byte 8 - .byte 40 - .byte 4 - .byte 52 - .byte 24 - .byte 44 - .byte 20 - .byte 12 - .byte 28 - .byte 48 - .byte 56 - .byte 16 - .byte 60 - .byte 4 - .byte 52 - .byte 60 - .byte 48 - .byte 20 - .byte 16 - .byte 56 - .byte 44 - .byte 24 - .byte 8 - .byte 40 - .byte 12 - .byte 32 - .byte 28 - .byte 36 - .byte 24 - .byte 32 - .byte 12 - .byte 4 - .byte 20 - .byte 16 - .byte 60 - .byte 36 - .byte 28 - .byte 8 - .byte 52 - .byte 40 - .byte 48 - .byte 44 - .byte 56 diff --git a/libkern/kxld/kxld_kext.c b/libkern/kxld/kxld_kext.c index a9ef47798..6b09346d2 100644 --- a/libkern/kxld/kxld_kext.c +++ b/libkern/kxld/kxld_kext.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Apple Inc. All rights reserved. + * Copyright (c) 2008, 2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -738,7 +738,7 @@ resolve_symbols(KXLDKext *kext, const KXLDDict *defined_symbols, kxld_log(kKxldLogLinking, kKxldLogErr, "This kext has weak references but does not test for " "them. Test for weak references with " - "OSKextIsSymbolResolved().")); + "OSKextSymbolIsResolved(). (found in )")); #if KERNEL /* Get the address of the default weak address. diff --git a/libkern/kxld/kxld_object.c b/libkern/kxld/kxld_object.c index 752518b7a..1995cb88c 100644 --- a/libkern/kxld/kxld_object.c +++ b/libkern/kxld/kxld_object.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2011-2012 Apple Inc. All rights reserved. + * Copyright (c) 2009-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -327,11 +327,7 @@ get_target_machine_info(KXLDObject *object, cpu_type_t cputype __unused, check(object); -#if defined(__i386__) - object->cputype = CPU_TYPE_I386; - object->cpusubtype = CPU_SUBTYPE_I386_ALL; - return KERN_SUCCESS; -#elif defined(__x86_64__) +#if defined(__x86_64__) object->cputype = CPU_TYPE_X86_64; object->cpusubtype = CPU_SUBTYPE_X86_64_ALL; return KERN_SUCCESS; @@ -380,6 +376,7 @@ get_target_machine_info(KXLDObject *object, cpu_type_t cputype __unused, break; default: object->cpusubtype = 0; + break; } } @@ -656,6 +653,8 @@ init_from_final_linked_image(KXLDObject *object, u_int *filetype_out, kxld_log(kKxldLogLinking, kKxldLogErr, kKxldLogMalformedMachO "LC_UNIXTHREAD/LC_MAIN segment is not valid in a kext.")); break; + case LC_SEGMENT_SPLIT_INFO: + /* To be implemented later; treat as uninteresting for now */ case LC_CODE_SIGNATURE: case LC_DYLD_INFO: case LC_DYLD_INFO_ONLY: @@ -1478,7 +1477,8 @@ static boolean_t target_supports_protected_segments(const KXLDObject *object) { return (object->is_final_image && - object->cputype == CPU_TYPE_X86_64); + (object->cputype == CPU_TYPE_X86_64 || + object->cputype == CPU_TYPE_ARM)); } /******************************************************************************* @@ -2333,8 +2333,6 @@ target_supports_slideable_kexts(const KXLDObject *object) { check(object); - return ( object->cputype != CPU_TYPE_I386 - && object->include_kaslr_relocs - ); + return (object->cputype != CPU_TYPE_I386 && object->include_kaslr_relocs); } #endif /* KXLD_PIC_KEXTS */ diff --git a/libkern/kxld/kxld_reloc.c b/libkern/kxld/kxld_reloc.c index 583b5bc5f..5f41b511c 100644 --- a/libkern/kxld/kxld_reloc.c +++ b/libkern/kxld/kxld_reloc.c @@ -227,6 +227,7 @@ kxld_relocator_init(KXLDRelocator *relocator, u_char *file, relocator->may_scatter = FALSE; break; #endif /* KXLD_USER_OR_ARM */ + default: rval = KERN_FAILURE; kxld_log(kKxldLogLinking, kKxldLogErr, diff --git a/libkern/kxld/kxld_seg.c b/libkern/kxld/kxld_seg.c index ca3d2fb4f..7160f7e7d 100644 --- a/libkern/kxld/kxld_seg.c +++ b/libkern/kxld/kxld_seg.c @@ -238,7 +238,6 @@ finish: * * Kernel sections: * __TEXT,__text -* __TEXT,__initcode * __TEXT,__const * __DATA,__data * @@ -755,9 +754,6 @@ kxld_seg_finish_init(KXLDSeg *seg) } } - /* XXX Cross architecture linking will fail if the page size ever differs - * from 4096. (As of this writing, we're fine on i386, x86_64, and arm). - */ seg->vmsize = round_page(maxaddr + maxsize - seg->base_addr); } @@ -781,7 +777,7 @@ kxld_seg_set_vm_protections(KXLDSeg *seg, boolean_t strict_protections) if (strict_protections) { if (streq_safe(seg->segname, SEG_TEXT, const_strlen(SEG_TEXT))) { seg->initprot = TEXT_SEG_PROT; - seg->maxprot = VM_PROT_ALL; + seg->maxprot = TEXT_SEG_PROT; } else { seg->initprot = DATA_SEG_PROT; seg->maxprot = DATA_SEG_PROT; diff --git a/libkern/libkern/Makefile b/libkern/libkern/Makefile index 55954e985..222b91341 100644 --- a/libkern/libkern/Makefile +++ b/libkern/libkern/Makefile @@ -3,7 +3,6 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - include $(MakeInc_cmd) include $(MakeInc_def) @@ -11,14 +10,11 @@ INSTINC_SUBDIRS = \ machine \ c++ \ crypto -INSTINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS} \ - i386 -INSTINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS} \ +INSTINC_SUBDIRS_X86_64 = \ i386 EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} -EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} DATAFILES = \ @@ -41,8 +37,7 @@ PRIVATE_DATAFILES = \ OSKextLibPrivate.h \ kext_request_keys.h \ mkext.h \ - prelink.h \ - WKdm.h + prelink.h INSTALL_MI_LIST = \ OSByteOrder.h \ @@ -61,8 +56,6 @@ INSTALL_MI_LCL_LIST = \ kext_panic_report.h \ OSCrossEndian.h - - INSTALL_KF_MI_LIST = \ ${DATAFILES} diff --git a/libkern/libkern/OSAtomic.h b/libkern/libkern/OSAtomic.h index 98e0eb99b..c802adeff 100644 --- a/libkern/libkern/OSAtomic.h +++ b/libkern/libkern/OSAtomic.h @@ -1,5 +1,6 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2007-2012 Apple Inc. All rights reserved. + * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -65,8 +66,6 @@ extern "C" { * reading and updating of values. */ -#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) - /*! * @function OSCompareAndSwap64 * @@ -83,8 +82,6 @@ extern Boolean OSCompareAndSwap64( #define OSCompareAndSwap64(a, b, c) \ (OSCompareAndSwap64(a, b, __SAFE_CAST_PTR(volatile UInt64*,c))) -#endif /* defined(__i386__) || defined(__x86_64__) */ - /*! * @function OSAddAtomic64 * @@ -174,6 +171,9 @@ inline static long OSDecrementAtomicLong(volatile long * address) } #endif /* XNU_KERNEL_PRIVATE */ +#if XNU_KERNEL_PRIVATE +#endif /* XNU_KERNEL_PRIVATE */ + /*! * @function OSCompareAndSwap * @@ -228,7 +228,7 @@ extern Boolean OSCompareAndSwapPtr( * @discussion * The OSAddAtomic function adds the specified amount to the value at the specified address and returns the original value. * - * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Previous incarnations of this function incorporated a memory barrier on systems with weakly-ordered memory architectures, but current versions contain no barriers. * @param amount The amount to add. * @param address The 4-byte aligned address of the value to update atomically. * @result The value before the addition @@ -248,8 +248,7 @@ extern SInt32 OSAddAtomic( * @discussion * The OSAddAtomic16 function adds the specified amount to the value at the specified address and returns the original value. * - * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. - * @param amount The amount to add. + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Previous incarnations of this function incorporated a memory barrier on systems with weakly-ordered memory architectures, but current versions contain no barriers. * @param address The 2-byte aligned address of the value to update atomically. * @result The value before the addition */ @@ -266,7 +265,7 @@ extern SInt16 OSAddAtomic16( * @discussion * The OSAddAtomic8 function adds the specified amount to the value at the specified address and returns the original value. * - * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Previous incarnations of this function incorporated a memory barrier on systems with weakly-ordered memory architectures, but current versions contain no barriers. * @param amount The amount to add. * @param address The address of the value to update atomically. * @result The value before the addition. @@ -301,7 +300,7 @@ extern SInt32 OSIncrementAtomic(volatile SInt32 * address); * @discussion * The OSIncrementAtomic16 function increments the value at the specified address by one and returns the original value. * - * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Previous incarnations of this function incorporated a memory barrier on systems with weakly-ordered memory architectures, but current versions contain no barriers. * @param address The 2-byte aligned address of the value to update atomically. * @result The value before the increment. */ @@ -316,7 +315,7 @@ extern SInt16 OSIncrementAtomic16(volatile SInt16 * address); * @discussion * The OSIncrementAtomic8 function increments the value at the specified address by one and returns the original value. * - * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Previous incarnations of this function incorporated a memory barrier on systems with weakly-ordered memory architectures, but current versions contain no barriers. * @param address The address of the value to update atomically. * @result The value before the increment. */ @@ -331,7 +330,7 @@ extern SInt8 OSIncrementAtomic8(volatile SInt8 * address); * @discussion * The OSDecrementAtomic function decrements the value at the specified address by one and returns the original value. * - * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Previous incarnations of this function incorporated a memory barrier on systems with weakly-ordered memory architectures, but current versions contain no barriers. * @param address The 4-byte aligned address of the value to update atomically. * @result The value before the decrement. */ @@ -348,7 +347,7 @@ extern SInt32 OSDecrementAtomic(volatile SInt32 * address); * @discussion * The OSDecrementAtomic16 function decrements the value at the specified address by one and returns the original value. * - * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Previous incarnations of this function incorporated a memory barrier on systems with weakly-ordered memory architectures, but current versions contain no barriers. * @param address The 2-byte aligned address of the value to update atomically. * @result The value before the decrement. */ @@ -363,7 +362,7 @@ extern SInt16 OSDecrementAtomic16(volatile SInt16 * address); * @discussion * The OSDecrementAtomic8 function decrements the value at the specified address by one and returns the original value. * - * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Previous incarnations of this function incorporated a memory barrier on systems with weakly-ordered memory architectures, but current versions contain no barriers. * @param address The address of the value to update atomically. * @result The value before the decrement. */ @@ -378,7 +377,7 @@ extern SInt8 OSDecrementAtomic8(volatile SInt8 * address); * @discussion * The OSBitAndAtomic function logically ands the bits of the specified mask into the value at the specified address and returns the original value. * - * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Additionally, this function incorporates a memory barrier on systems with weakly-ordered memory architectures. + * This function guarantees atomicity only with main system memory. It is specifically unsuitable for use on noncacheable memory such as that in devices; this function cannot guarantee atomicity, for example, on memory mapped from a PCI device. Previous incarnations of this function incorporated a memory barrier on systems with weakly-ordered memory architectures, but current versions contain no barriers.. * @param mask The mask to logically and with the value. * @param address The 4-byte aligned address of the value to update atomically. * @result The value before the bitwise operation @@ -634,13 +633,19 @@ extern void OSSpinLockUnlock(volatile OSSpinLock * lock); static __inline__ void OSSynchronizeIO(void) { } -#if defined(XNU_KERNEL_PRIVATE) + +#if defined(KERNEL_PRIVATE) + #if defined(__i386__) || defined(__x86_64__) +#if defined(XNU_KERNEL_PRIVATE) static inline void OSMemoryBarrier(void) { __asm__ volatile("mfence" ::: "memory"); } +#endif /* XNU_KERNEL_PRIVATE */ #endif -#endif /*XNU_KERNEL_PRIVATE */ + +#endif /* KERNEL_PRIVATE */ + #if defined(__cplusplus) } #endif diff --git a/libkern/libkern/OSKextLibPrivate.h b/libkern/libkern/OSKextLibPrivate.h index 15f85461d..06a7fe8d3 100644 --- a/libkern/libkern/OSKextLibPrivate.h +++ b/libkern/libkern/OSKextLibPrivate.h @@ -79,6 +79,13 @@ typedef uint8_t OSKextExcludeLevel; */ #define kOSBundleDeveloperOnlyKey "OSBundleDeveloperOnly" +/*! + * @define kOSBundleRamDiskOnlyKey + * @abstract A boolean value indicating whether the kext should only load when + * booted from a ram disk. + */ +#define kOSBundleRamDiskOnlyKey "OSBundleRamDiskOnly" + /*! * @define kAppleSecurityExtensionKey diff --git a/libkern/libkern/OSTypes.h b/libkern/libkern/OSTypes.h index 0945952bb..1119f7303 100644 --- a/libkern/libkern/OSTypes.h +++ b/libkern/libkern/OSTypes.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1999-2012 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,12 +25,10 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* - * Copyright (c) 1999 Apple Computer, Inc. All rights reserved. - * - * HISTORY - * - */ + +#if !defined(KERNEL) +#include +#endif /* !KERNEL */ #ifndef _OS_OSTYPES_H #define _OS_OSTYPES_H @@ -40,8 +38,7 @@ typedef unsigned int UInt; typedef signed int SInt; -#ifndef __MACTYPES__ /* CF MacTypes.h */ -#ifndef __TYPES__ /* guess... Mac Types.h */ +#if defined(KERNEL) typedef unsigned char UInt8; typedef unsigned short UInt16; @@ -55,12 +52,12 @@ typedef unsigned long long UInt64; typedef struct UnsignedWide { UInt32 hi; UInt32 lo; -} UnsignedWide; +} UnsignedWide __attribute__((deprecated)); #elif defined(__LITTLE_ENDIAN__) typedef struct UnsignedWide { UInt32 lo; UInt32 hi; -} UnsignedWide; +} UnsignedWide __attribute__((deprecated)); #else #error Unknown endianess. #endif @@ -73,34 +70,17 @@ typedef signed int SInt32; typedef signed long SInt32; #endif typedef signed long long SInt64; -#if defined(__BIG_ENDIAN__) -typedef struct wide { - SInt32 hi; - UInt32 lo; -} wide; -#elif defined(__LITTLE_ENDIAN__) -typedef struct wide { - UInt32 lo; - SInt32 hi; -} wide; -#else -#error Unknown endianess. -#endif typedef SInt32 OSStatus; -#if (defined(__LP64__) || defined (__arm__)) && defined(KERNEL) #ifndef ABSOLUTETIME_SCALAR_TYPE #define ABSOLUTETIME_SCALAR_TYPE 1 #endif typedef UInt64 AbsoluteTime; -#else -typedef UnsignedWide AbsoluteTime; -#endif -typedef UInt32 OptionBits; +typedef UInt32 OptionBits __attribute__((deprecated)); -#if defined(KERNEL) && defined(__LP64__) +#if defined(__LP64__) /* * Use intrinsic boolean types for the LP64 kernel, otherwise maintain * source and binary backward compatibility. This attempts to resolve @@ -117,15 +97,12 @@ typedef _Bool Boolean; typedef unsigned char Boolean; #endif /* !c99 */ #endif /* !__cplusplus */ -#else /* !(KERNEL && __LP64__) */ +#else /* !__LP64__ */ typedef unsigned char Boolean; -#endif /* !(KERNEL && __LP64__) */ +#endif /* !__LP64__ */ -#endif /* __TYPES__ */ -#endif /* __MACTYPES__ */ +#endif /* KERNEL */ -#if !defined(OS_INLINE) -# define OS_INLINE static inline -#endif +#include #endif /* _OS_OSTYPES_H */ diff --git a/libkern/libkern/c++/Makefile b/libkern/libkern/c++/Makefile index 9b7738bd7..e3d245ce1 100644 --- a/libkern/libkern/c++/Makefile +++ b/libkern/libkern/c++/Makefile @@ -3,23 +3,10 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = - -INSTINC_SUBDIRS_I386 = - -INSTINC_SUBDIRS_X86_64 = - - -EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} - -EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} - -EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} - - DATAFILES = \ OSArray.h \ OSBoolean.h \ @@ -43,13 +30,13 @@ DATAFILES = \ OSSymbol.h \ OSUnserialize.h -INSTALL_MD_LIST = +INSTALL_MI_LIST = -INSTALL_MD_DIR = libkern/c++ +INSTALL_MI_DIR = libkern/c++ -EXPORT_MD_LIST = ${DATAFILES} +EXPORT_MI_LIST = ${DATAFILES} -EXPORT_MD_DIR = libkern/c++ +EXPORT_MI_DIR = libkern/c++ include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/libkern/libkern/c++/OSBoolean.h b/libkern/libkern/c++/OSBoolean.h index cd99b47c9..039e8f609 100644 --- a/libkern/libkern/c++/OSBoolean.h +++ b/libkern/libkern/c++/OSBoolean.h @@ -237,34 +237,34 @@ public: }; /*! - * @const kOSBooleantrue + * @const kOSBooleanTrue * * @abstract * The OSBoolean constant for true. * * @discussion - * The OSBoolean constant for true. + * kOSBooleanTrue is the OSBoolean constant for true. * This object does not need to be retained or released (but it can be). * Comparisons of the form - * booleanObject == kOSBooleanTrue are acceptable - * and are equivalent to + * booleanObject == kOSBooleanTrue + * are acceptable and are equivalent to * booleanObject->getValue() == true. */ extern OSBoolean * const & kOSBooleanTrue; /*! - * @const kOSBooleanfalse + * @const kOSBooleanFalse * * @abstract * The OSBoolean constant for false. * * @discussion - * The OSBoolean constant for false. + * kOSBooleanFalse is the OSBoolean constant for false. * This object does not need to be retained or released (but it can be). * Comparisons of the form * booleanObject == kOSBooleanFalse * are acceptable and are equivalent to - * booleanObject->getValue() == false. + * booleanObject->getValue() == false. */ extern OSBoolean * const & kOSBooleanFalse; diff --git a/libkern/libkern/c++/OSKext.h b/libkern/libkern/c++/OSKext.h index c8d5edd0c..a054c0281 100644 --- a/libkern/libkern/c++/OSKext.h +++ b/libkern/libkern/c++/OSKext.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2009 Apple Inc. All rights reserved. + * Copyright (c) 2008-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -97,11 +97,6 @@ kern_return_t is_io_catalog_send_data( void kmod_dump_log(vm_offset_t*, unsigned int); -#if __i386__ -kern_return_t kext_get_kmod_info( - kmod_info_array_t * kmod_list, - mach_msg_type_number_t * kmodCount); -#endif /* __i386__ */ #endif /* XNU_KERNEL_PRIVATE */ }; @@ -183,11 +178,6 @@ class OSKext : public OSObject friend void kmod_dump_log(vm_offset_t*, unsigned int); friend void kext_dump_panic_lists(int (*printf_func)(const char * fmt, ...)); -#if __i386__ - friend kern_return_t kext_get_kmod_info( - kmod_info_array_t * kmod_list, - mach_msg_type_number_t * kmodCount); -#endif /* __i386__ */ #endif /* XNU_KERNEL_PRIVATE */ @@ -314,6 +304,8 @@ private: OSKext * aKext, bool terminateServicesAndRemovePersonalitiesFlag = false); + virtual bool isInExcludeList(void); + /* Mkexts. */ static OSReturn readMkextArchive( @@ -514,13 +506,6 @@ private: */ virtual void setCPPInitialized(bool initialized=true); -#if __i386__ - /* Backward compatibility for kmod_get_info() MIG call. - */ - static kern_return_t getKmodInfo( - kmod_info_array_t * kmodList, - mach_msg_type_number_t * kmodCount); -#endif /* __i386__ */ #if PRAGMA_MARK @@ -573,6 +558,10 @@ public: static void setKextdActive(Boolean active = true); static void setDeferredLoadSucceeded(Boolean succeeded = true); static void considerRebuildOfPrelinkedKernel(void); + static void createExcludeListFromBooterData( + OSDictionary * theDictionary, + OSCollectionIterator * theIterator); + static void createExcludeListFromPrelinkInfo(OSArray * theInfoArray); virtual bool setAutounloadEnabled(bool flag); diff --git a/libkern/libkern/c++/OSMetaClass.h b/libkern/libkern/c++/OSMetaClass.h index fe211c724..a16e072e7 100644 --- a/libkern/libkern/c++/OSMetaClass.h +++ b/libkern/libkern/c++/OSMetaClass.h @@ -59,12 +59,8 @@ class OSOrderedSet; #ifdef XNU_KERNEL_PRIVATE -#ifdef CONFIG_EMBEDDED -#define APPLE_KEXT_VTABLE_PADDING 0 -#else /* CONFIG_EMBEDDED */ /*! @parseOnly */ #define APPLE_KEXT_VTABLE_PADDING 1 -#endif /* CONFIG_EMBEDDED */ #else /* XNU_KERNEL_PRIVATE */ #include diff --git a/libkern/libkern/c++/OSUnserialize.h b/libkern/libkern/c++/OSUnserialize.h index 073fb86c6..c01f07d93 100644 --- a/libkern/libkern/c++/OSUnserialize.h +++ b/libkern/libkern/c++/OSUnserialize.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -31,6 +31,7 @@ #define _OS_OSUNSERIALIZE_H #include +#include class OSObject; class OSString; @@ -63,10 +64,37 @@ class OSString; * @discussion * Not safe to call in a primary interrupt handler. */ -extern OSObject * OSUnserializeXML( +extern "C++" OSObject * OSUnserializeXML( const char * buffer, OSString ** errorString = 0); +/*! + * @function OSUnserializeXML + * + * @abstract + * Recreates an OSContainer object + * from its previously serialized OSContainer class instance data. + * + * @param buffer A buffer containing nul-terminated XML data + * representing the object to be recreated. + * @param bufferSize The size of the block of memory. The function + * never scans beyond the first bufferSize bytes. + * @param errorString If non-NULL, and the XML parser + * finds an error in buffer, + * *errorString indicates the line number + * and type of error encountered. + * + * @result + * The recreated object, or NULL on failure. + * + * @discussion + * Not safe to call in a primary interrupt handler. + */ +extern "C++" OSObject * OSUnserializeXML( + const char * buffer, + size_t bufferSize, + OSString ** errorString = 0); + #ifdef __APPLE_API_OBSOLETE extern OSObject* OSUnserialize(const char *buffer, OSString **errorString = 0); #endif /* __APPLE_API_OBSOLETE */ diff --git a/libkern/libkern/crypto/Makefile b/libkern/libkern/crypto/Makefile index 1b8cc587c..0c703fd95 100644 --- a/libkern/libkern/crypto/Makefile +++ b/libkern/libkern/crypto/Makefile @@ -3,17 +3,10 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = - -INSTINC_SUBDIRS_I386 = - -EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} - -EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} - DATAFILES = md5.h sha1.h PRIVATE_DATAFILES = register_crypto.h sha2.h des.h aes.h aesxts.h diff --git a/libkern/libkern/crypto/sha2.h b/libkern/libkern/crypto/sha2.h index 7908f7ef8..af660ec9e 100644 --- a/libkern/libkern/crypto/sha2.h +++ b/libkern/libkern/crypto/sha2.h @@ -40,10 +40,10 @@ extern "C" { #define SHA256_DIGEST_LENGTH CCSHA256_OUTPUT_SIZE #define SHA256_DIGEST_STRING_LENGTH (SHA256_DIGEST_LENGTH * 2 + 1) #define SHA384_BLOCK_LENGTH CCSHA512_BLOCK_SIZE -#define SHA384_DIGEST_LENGTH CCSHA384_OUTPUT_SIZE +#define SHA384_DIGEST_LENGTH CCSHA512_OUTPUT_SIZE #define SHA384_DIGEST_STRING_LENGTH (SHA384_DIGEST_LENGTH * 2 + 1) #define SHA512_BLOCK_LENGTH CCSHA512_BLOCK_SIZE -#define SHA512_DIGEST_LENGTH CCSHA384_OUTPUT_SIZE +#define SHA512_DIGEST_LENGTH CCSHA512_OUTPUT_SIZE #define SHA512_DIGEST_STRING_LENGTH (SHA512_DIGEST_LENGTH * 2 + 1) typedef struct { @@ -51,7 +51,7 @@ typedef struct { } SHA256_CTX; typedef struct SHA512_CTX { - ccdigest_ctx_decl(CCSHA256_STATE_SIZE, CCSHA256_BLOCK_SIZE, ctx); + ccdigest_ctx_decl(CCSHA512_STATE_SIZE, CCSHA512_BLOCK_SIZE, ctx); } SHA512_CTX; typedef SHA512_CTX SHA384_CTX; diff --git a/libkern/libkern/i386/Makefile b/libkern/libkern/i386/Makefile index 728ba4601..40f5b0fc0 100644 --- a/libkern/libkern/i386/Makefile +++ b/libkern/libkern/i386/Makefile @@ -3,21 +3,10 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = - -INSTINC_SUBDIRS_I386 = - -INSTINC_SUBDIRS_X86_64 = - -EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} - -EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} - -EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} - DATAFILES = \ OSByteOrder.h \ _OSByteOrder.h diff --git a/libkern/libkern/i386/OSByteOrder.h b/libkern/libkern/i386/OSByteOrder.h index 7f197367c..f38e0652e 100644 --- a/libkern/libkern/i386/OSByteOrder.h +++ b/libkern/libkern/i386/OSByteOrder.h @@ -31,10 +31,7 @@ #include #include - -#if !defined(OS_INLINE) -#define OS_INLINE __DARWIN_OS_INLINE -#endif +#include /* Functions for byte reversed loads. */ diff --git a/libkern/libkern/i386/_OSByteOrder.h b/libkern/libkern/i386/_OSByteOrder.h index 51477bb61..e60b4fd97 100644 --- a/libkern/libkern/i386/_OSByteOrder.h +++ b/libkern/libkern/i386/_OSByteOrder.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2006-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -47,7 +47,7 @@ _OSSwapInt16( __uint16_t _data ) { - return ((_data << 8) | (_data >> 8)); + return ((__uint16_t)((_data << 8) | (_data >> 8))); } __DARWIN_OS_INLINE diff --git a/libkern/libkern/kxld_types.h b/libkern/libkern/kxld_types.h index 0aad7abbe..8e5fac9e7 100644 --- a/libkern/libkern/kxld_types.h +++ b/libkern/libkern/kxld_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2008, 2012 Apple Inc. All rights reserved. + * Copyright (c) 2007-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -68,9 +68,7 @@ #endif /* For linking code specific to architectures that support strict patching */ -#if (!KERNEL || !__i386__) #define KXLD_USER_OR_STRICT_PATCHING 1 -#endif /* For linking code specific to architectures that use MH_OBJECT */ #if (!KERNEL || __i386__) diff --git a/libkern/libkern/machine/Makefile b/libkern/libkern/machine/Makefile index c5f944fa0..f89b1afaf 100644 --- a/libkern/libkern/machine/Makefile +++ b/libkern/libkern/machine/Makefile @@ -3,33 +3,20 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = - -INSTINC_SUBDIRS_I386 = - -INSTINC_SUBDIRS_X86_64 = - - -EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} - -EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} - -EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} - - DATAFILES = \ OSByteOrder.h -INSTALL_MD_LIST = ${DATAFILES} +INSTALL_MI_LIST = ${DATAFILES} -INSTALL_MD_DIR = libkern/machine +INSTALL_MI_DIR = libkern/machine -EXPORT_MD_LIST = ${DATAFILES} +EXPORT_MI_LIST = ${DATAFILES} -EXPORT_MD_DIR = libkern/machine +EXPORT_MI_DIR = libkern/machine include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/libkern/mkext.c b/libkern/mkext.c index 86238fc35..6dba0e0aa 100644 --- a/libkern/mkext.c +++ b/libkern/mkext.c @@ -46,22 +46,6 @@ mkext_adler32(uint8_t *buf, int32_t len) unsigned long s2 = 0; // (adler >> 16) & 0xffff; int k; -#if defined _ARM_ARCH_6 - - /* align buf to 16-byte boundary */ - while ((((uintptr_t)buf)&15)&&(len>0)) { /* not on a 16-byte boundary */ - len--; - s1 += *buf++; - s2 += s1; - if (s1 >= BASE) s1 -= BASE; - } - s2 %= BASE; - - if (len>=16) { - return adler32_vec(s1, s2, buf, len); - } - -#endif while (len > 0) { k = len < NMAX ? len : NMAX; diff --git a/libkern/net/inet_aton.c b/libkern/net/inet_aton.c new file mode 100644 index 000000000..9c0d94ae3 --- /dev/null +++ b/libkern/net/inet_aton.c @@ -0,0 +1,138 @@ +/*- + * Copyright (c) 2001 Charles Mott + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include +#include + +#include + +/* XXX ctype.h is missing, see libkern/stdio/scanf.c */ +#if 1 +static inline int +isspace(char c) +{ + return (c == ' ' || c == '\t' || c == '\n' || c == '\12'); +} +#endif + +int +inet_aton(const char *cp, struct in_addr *addr) +{ + u_long parts[4]; + in_addr_t val; + const char *c; + char *endptr; + int gotend, n; + + c = (const char *)cp; + n = 0; + + /* + * Run through the string, grabbing numbers until + * the end of the string, or some error + */ + gotend = 0; + while (!gotend) { + unsigned long l; + + l = strtoul(c, &endptr, 0); + + if (l == ULONG_MAX || (l == 0 && endptr == c)) + return (0); + + val = (in_addr_t)l; + + /* + * If the whole string is invalid, endptr will equal + * c.. this way we can make sure someone hasn't + * gone '.12' or something which would get past + * the next check. + */ + if (endptr == c) + return (0); + parts[n] = val; + c = endptr; + + /* Check the next character past the previous number's end */ + switch (*c) { + case '.' : + + /* Make sure we only do 3 dots .. */ + if (n == 3) /* Whoops. Quit. */ + return (0); + n++; + c++; + break; + + case '\0': + gotend = 1; + break; + + default: + if (isspace((unsigned char)*c)) { + gotend = 1; + break; + } else { + + /* Invalid character, then fail. */ + return (0); + } + } + + } + + /* Concoct the address according to the number of parts specified. */ + switch (n) { + case 0: /* a -- 32 bits */ + + /* + * Nothing is necessary here. Overflow checking was + * already done in strtoul(). + */ + break; + case 1: /* a.b -- 8.24 bits */ + if (val > 0xffffff || parts[0] > 0xff) + return (0); + val |= parts[0] << 24; + break; + + case 2: /* a.b.c -- 8.8.16 bits */ + if (val > 0xffff || parts[0] > 0xff || parts[1] > 0xff) + return (0); + val |= (parts[0] << 24) | (parts[1] << 16); + break; + + case 3: /* a.b.c.d -- 8.8.8.8 bits */ + if (val > 0xff || parts[0] > 0xff || parts[1] > 0xff || + parts[2] > 0xff) + return (0); + val |= (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8); + break; + } + + if (addr != NULL) + addr->s_addr = htonl(val); + return (1); +} diff --git a/libkern/net/inet_ntoa.c b/libkern/net/inet_ntoa.c new file mode 100644 index 000000000..d912bdaeb --- /dev/null +++ b/libkern/net/inet_ntoa.c @@ -0,0 +1,59 @@ +/*- + * Copyright 1994, 1995 Massachusetts Institute of Technology + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby + * granted, provided that both the above copyright notice and this + * permission notice appear in all copies, that both the above + * copyright notice and this permission notice appear in all + * supporting documentation, and that the name of M.I.T. not be used + * in advertising or publicity pertaining to distribution of the + * software without specific, written prior permission. M.I.T. makes + * no representations about the suitability of this software for any + * purpose. It is provided "as is" without express or implied + * warranty. + * + * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS + * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT + * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include +#include + +#include + +char * +inet_ntoa(struct in_addr ina) +{ + static char buf[4*sizeof "123"]; + unsigned char *ucp = (unsigned char *)&ina; + + snprintf(buf, sizeof(buf), "%d.%d.%d.%d", + ucp[0] & 0xff, + ucp[1] & 0xff, + ucp[2] & 0xff, + ucp[3] & 0xff); + return buf; +} + +char * +inet_ntoa_r(struct in_addr ina, char *buf, size_t buflen) +{ + unsigned char *ucp = (unsigned char *)&ina; + + snprintf(buf, buflen, "%d.%d.%d.%d", + ucp[0] & 0xff, + ucp[1] & 0xff, + ucp[2] & 0xff, + ucp[3] & 0xff); + return buf; +} diff --git a/libkern/net/inet_ntop.c b/libkern/net/inet_ntop.c new file mode 100644 index 000000000..309d35bb9 --- /dev/null +++ b/libkern/net/inet_ntop.c @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC") + * Copyright (c) 1996-1999 by Internet Software Consortium. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static const char rcsid[] = "$Id: inet_ntop.c,v 1.3.18.2 2005/11/03 23:02:22 marka Exp $"; +#endif /* LIBC_SCCS and not lint */ + +#include +#include +#include + +#include + +/*% + * WARNING: Don't even consider trying to compile this on a system where + * sizeof(int) < 4. sizeof(int) > 4 is fine; all the world's not a VAX. + */ + +static char *inet_ntop4(const u_char *src, char *dst, socklen_t size); +static char *inet_ntop6(const u_char *src, char *dst, socklen_t size); + +/* char * + * inet_ntop(af, src, dst, size) + * convert a network format address to presentation format. + * return: + * pointer to presentation format address (`dst'), or NULL (see errno). + * author: + * Paul Vixie, 1996. + */ +const char * +inet_ntop(int af, const void *src, char *dst, socklen_t size) +{ + switch (af) { + case AF_INET: + return (inet_ntop4(src, dst, size)); + case AF_INET6: + return (inet_ntop6(src, dst, size)); + default: + return (NULL); + } + /* NOTREACHED */ +} + +/* const char * + * inet_ntop4(src, dst, size) + * format an IPv4 address + * return: + * `dst' (as a const) + * notes: + * (1) uses no statics + * (2) takes a u_char* not an in_addr as input + * author: + * Paul Vixie, 1996. + */ +static char * +inet_ntop4(const u_char *src, char *dst, socklen_t size) +{ + static const char fmt[] = "%u.%u.%u.%u"; + char tmp[sizeof "255.255.255.255"]; + int l; + + l = snprintf(tmp, sizeof(tmp), fmt, src[0], src[1], src[2], src[3]); + if (l <= 0 || (socklen_t) l >= size) { + return (NULL); + } + strlcpy(dst, tmp, size); + return (dst); +} + +/* const char * + * inet_ntop6(src, dst, size) + * convert IPv6 binary address into presentation (printable) format + * author: + * Paul Vixie, 1996. + */ +static char * +inet_ntop6(const u_char *src, char *dst, socklen_t size) +{ + /* + * Note that int32_t and int16_t need only be "at least" large enough + * to contain a value of the specified size. On some systems, like + * Crays, there is no such thing as an integer variable with 16 bits. + * Keep this in mind if you think this function should have been coded + * to use pointer overlays. All the world's not a VAX. + */ + char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"], *tp; + struct { int base, len; } best, cur; +#define NS_IN6ADDRSZ 16 +#define NS_INT16SZ 2 + u_int words[NS_IN6ADDRSZ / NS_INT16SZ]; + int i; + + /* + * Preprocess: + * Copy the input (bytewise) array into a wordwise array. + * Find the longest run of 0x00's in src[] for :: shorthanding. + */ + memset(words, '\0', sizeof words); + for (i = 0; i < NS_IN6ADDRSZ; i++) + words[i / 2] |= (src[i] << ((1 - (i % 2)) << 3)); + best.base = -1; + best.len = 0; + cur.base = -1; + cur.len = 0; + for (i = 0; i < (NS_IN6ADDRSZ / NS_INT16SZ); i++) { + if (words[i] == 0) { + if (cur.base == -1) + cur.base = i, cur.len = 1; + else + cur.len++; + } else { + if (cur.base != -1) { + if (best.base == -1 || cur.len > best.len) + best = cur; + cur.base = -1; + } + } + } + if (cur.base != -1) { + if (best.base == -1 || cur.len > best.len) + best = cur; + } + if (best.base != -1 && best.len < 2) + best.base = -1; + + /* + * Format the result. + */ + tp = tmp; + for (i = 0; i < (NS_IN6ADDRSZ / NS_INT16SZ); i++) { + /* Are we inside the best run of 0x00's? */ + if (best.base != -1 && i >= best.base && + i < (best.base + best.len)) { + if (i == best.base) + *tp++ = ':'; + continue; + } + /* Are we following an initial run of 0x00s or any real hex? */ + if (i != 0) + *tp++ = ':'; + /* Is this address an encapsulated IPv4? */ + if (i == 6 && best.base == 0 && (best.len == 6 || + (best.len == 7 && words[7] != 0x0001) || + (best.len == 5 && words[5] == 0xffff))) { + if (!inet_ntop4(src+12, tp, sizeof tmp - (tp - tmp))) + return (NULL); + tp += strlen(tp); + break; + } + tp += snprintf(tp, sizeof(tmp), "%x", words[i]); + } + /* Was it a trailing run of 0x00's? */ + if (best.base != -1 && (best.base + best.len) == + (NS_IN6ADDRSZ / NS_INT16SZ)) + *tp++ = ':'; + *tp++ = '\0'; + + /* + * Check for overflow, copy, and we're done. + */ + if ((socklen_t)(tp - tmp) > size) { + return (NULL); + } + strlcpy(dst, tmp, size); + return (dst); +} + +/*! \file */ diff --git a/libkern/net/inet_pton.c b/libkern/net/inet_pton.c new file mode 100644 index 000000000..417326d3d --- /dev/null +++ b/libkern/net/inet_pton.c @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC") + * Copyright (c) 1996,1999 by Internet Software Consortium. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#if defined(LIBC_SCCS) && !defined(lint) +static const char rcsid[] = "$Id: inet_pton.c,v 1.3.18.2 2005/07/28 07:38:07 marka Exp $"; +#endif /* LIBC_SCCS and not lint */ + +#include + +#include +#include +#include + +#include + +/*% + * WARNING: Don't even consider trying to compile this on a system where + * sizeof(int) < 4. sizeof(int) > 4 is fine; all the world's not a VAX. + */ + +static int inet_pton4(const char *src, u_char *dst); +static int inet_pton6(const char *src, u_char *dst); + +/* int + * inet_pton(af, src, dst) + * convert from presentation format (which usually means ASCII printable) + * to network format (which is usually some kind of binary format). + * return: + * 1 if the address was valid for the specified address family + * 0 if the address wasn't valid (`dst' is untouched in this case) + * -1 if some other error occurred (`dst' is untouched in this case, too) + * author: + * Paul Vixie, 1996. + */ +int +inet_pton(int af, const char *src, void *dst) +{ + switch (af) { + case AF_INET: + return (inet_pton4(src, dst)); + case AF_INET6: + return (inet_pton6(src, dst)); + default: + return (-1); + } + /* NOTREACHED */ +} + +/* int + * inet_pton4(src, dst) + * like inet_aton() but without all the hexadecimal and shorthand. + * return: + * 1 if `src' is a valid dotted quad, else 0. + * notice: + * does not touch `dst' unless it's returning 1. + * author: + * Paul Vixie, 1996. + */ +static int +inet_pton4(const char *src, u_char *dst) +{ + static const char digits[] = "0123456789"; + int saw_digit, octets, ch; +#define NS_INADDRSZ 4 + u_char tmp[NS_INADDRSZ], *tp; + + saw_digit = 0; + octets = 0; + *(tp = tmp) = 0; + while ((ch = *src++) != '\0') { + const char *pch; + + if ((pch = strchr(digits, ch)) != NULL) { + u_int new = *tp * 10 + (pch - digits); + + if (saw_digit && *tp == 0) + return (0); + if (new > 255) + return (0); + *tp = new; + if (!saw_digit) { + if (++octets > 4) + return (0); + saw_digit = 1; + } + } else if (ch == '.' && saw_digit) { + if (octets == 4) + return (0); + *++tp = 0; + saw_digit = 0; + } else + return (0); + } + if (octets < 4) + return (0); + memcpy(dst, tmp, NS_INADDRSZ); + return (1); +} + +/* int + * inet_pton6(src, dst) + * convert presentation level address to network order binary form. + * return: + * 1 if `src' is a valid [RFC1884 2.2] address, else 0. + * notice: + * (1) does not touch `dst' unless it's returning 1. + * (2) :: in a full address is silently ignored. + * credit: + * inspired by Mark Andrews. + * author: + * Paul Vixie, 1996. + */ +static int +inet_pton6(const char *src, u_char *dst) +{ + static const char xdigits_l[] = "0123456789abcdef", + xdigits_u[] = "0123456789ABCDEF"; +#define NS_IN6ADDRSZ 16 +#define NS_INT16SZ 2 + u_char tmp[NS_IN6ADDRSZ], *tp, *endp, *colonp; + const char *xdigits, *curtok; + int ch, seen_xdigits; + u_int val; + + memset((tp = tmp), '\0', NS_IN6ADDRSZ); + endp = tp + NS_IN6ADDRSZ; + colonp = NULL; + /* Leading :: requires some special handling. */ + if (*src == ':') + if (*++src != ':') + return (0); + curtok = src; + seen_xdigits = 0; + val = 0; + while ((ch = *src++) != '\0') { + const char *pch; + + if ((pch = strchr((xdigits = xdigits_l), ch)) == NULL) + pch = strchr((xdigits = xdigits_u), ch); + if (pch != NULL) { + val <<= 4; + val |= (pch - xdigits); + if (++seen_xdigits > 4) + return (0); + continue; + } + if (ch == ':') { + curtok = src; + if (!seen_xdigits) { + if (colonp) + return (0); + colonp = tp; + continue; + } else if (*src == '\0') { + return (0); + } + if (tp + NS_INT16SZ > endp) + return (0); + *tp++ = (u_char) (val >> 8) & 0xff; + *tp++ = (u_char) val & 0xff; + seen_xdigits = 0; + val = 0; + continue; + } + if (ch == '.' && ((tp + NS_INADDRSZ) <= endp) && + inet_pton4(curtok, tp) > 0) { + tp += NS_INADDRSZ; + seen_xdigits = 0; + break; /*%< '\\0' was seen by inet_pton4(). */ + } + return (0); + } + if (seen_xdigits) { + if (tp + NS_INT16SZ > endp) + return (0); + *tp++ = (u_char) (val >> 8) & 0xff; + *tp++ = (u_char) val & 0xff; + } + if (colonp != NULL) { + /* + * Since some memmove()'s erroneously fail to handle + * overlapping regions, we'll do the shift by hand. + */ + const int n = tp - colonp; + int i; + + if (tp == endp) + return (0); + for (i = 1; i <= n; i++) { + endp[- i] = colonp[n - i]; + colonp[n - i] = 0; + } + tp = endp; + } + if (tp != endp) + return (0); + memcpy(dst, tmp, NS_IN6ADDRSZ); + return (1); +} diff --git a/libkern/uuid/Makefile b/libkern/uuid/Makefile deleted file mode 100644 index f20633ddb..000000000 --- a/libkern/uuid/Makefile +++ /dev/null @@ -1,40 +0,0 @@ -export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd -export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def -export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule -export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - - -include $(MakeInc_cmd) -include $(MakeInc_def) - -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -INSTINC_SUBDIRS_X86_64 = \ - - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS_X86_64 = \ - - - -# uuid.h is now installed by bsd/uuid/Makefile -DATAFILES = \ - -INSTALL_MI_LIST = \ - -INSTALL_MI_DIR = \ - -EXPORT_MI_LIST = ${DATAFILES} - -EXPORT_MI_DIR = - - -include $(MakeInc_rule) -include $(MakeInc_dir) - - diff --git a/libkern/zlib/adler32.c b/libkern/zlib/adler32.c index 00214cd2e..e4b6756e9 100644 --- a/libkern/zlib/adler32.c +++ b/libkern/zlib/adler32.c @@ -40,10 +40,6 @@ #include "zlib.h" #endif /* KERNEL */ -#if defined __x86_64__ || defined __i386__ || defined _ARM_ARCH_6 -#include // For uintptr_t. - extern uLong adler32_vec(uLong adler, uLong sum2, const Bytef *buf, uInt len); -#endif #define BASE 65521UL /* largest prime smaller than 65536 */ #define NMAX 5552 @@ -130,22 +126,6 @@ uLong ZEXPORT adler32(adler, buf, len) return adler | (sum2 << 16); } -#if defined __x86_64__ || defined __i386__ || defined _ARM_ARCH_6 - - if (len>=32000) { /* use vector code only if len is sufficiently large to compensate registers save/restore */ - /* align buf to 16-byte boundary */ - while (((uintptr_t)buf)&15) { /* not on a 16-byte boundary */ - len--; - adler += *buf++; - sum2 += adler; - if (adler >= BASE) adler -= BASE; - MOD4(sum2); /* only added so many BASE's */ - } - - return adler32_vec(adler, sum2, buf, len); // x86_64 or i386 (up to SSE3) or armv6 or up - } - -#endif // defined __x86_64__ || defined __i386__ || defined _ARM_ARCH_6 /* do length NMAX blocks -- requires just one modulo operation */ while (len >= NMAX) { diff --git a/libkern/zlib/crc32.c b/libkern/zlib/crc32.c index d707bdc5a..4cafa3157 100644 --- a/libkern/zlib/crc32.c +++ b/libkern/zlib/crc32.c @@ -46,6 +46,7 @@ one thread to use crc32(). */ + #ifdef MAKECRCH # include # ifndef DYNAMIC_CRC_TABLE diff --git a/libkern/zlib/inffast.c b/libkern/zlib/inffast.c index 8be51094c..cb93ddc5d 100644 --- a/libkern/zlib/inffast.c +++ b/libkern/zlib/inffast.c @@ -31,14 +31,6 @@ */ -#if defined __x86_64__ || defined __i386__ || defined _ARM_ARCH_6 - - // dummy definition, for x86_64 or i386 or armv6 or up, compile code from inffastS.s - typedef char DummyDefinition; - -#else // architecture - - #include "zutil.h" #include "inftrees.h" #include "inflate.h" @@ -353,4 +345,3 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ #endif /* !ASMINF */ -#endif // architecture diff --git a/libkern/zlib/intel/adler32vec.s b/libkern/zlib/intel/adler32vec.s deleted file mode 100644 index df9dcf328..000000000 --- a/libkern/zlib/intel/adler32vec.s +++ /dev/null @@ -1,1050 +0,0 @@ -/* Apple Copyright 2009 - CoreOS - vector & Numerics, cclee 10-22-09 - - This following source code implements a vectorized version of adler32 computation that is defined in zlib. - The target architectures are x86_64 and i386. - - Given 2 unsigned 32-bit alder and sum2 (both pre-modulo by BASE=65521) and a sequence of input bytes x[0],...x[N-1]. - The adler-sum2 pair is updated according to - - for (i=0;i= NMAX) { -// len -= NMAX; -// n = NMAX / 16; /* NMAX is divisible by 16 */ -// do { -// DO16(buf); /* 16 sums unrolled */ -// buf += 16; -// } while (--n); -// MOD(adler); -// MOD(sum2); -// } -// if (len) { /* avoid modulos if none remaining */ -// while (len >= 16) { -// len -= 16; -// DO16(buf); -// buf += 16; -// } -// while (len--) { -// adler += *buf++; -// sum2 += adler; -// } -// MOD(adler); -// MOD(sum2); -// } -// return adler | (sum2 << 16); -// } - -#if (defined __i386__ || defined __x86_64__) - -#include - - .text - .align 4,0x90 -.globl _adler32_vec -_adler32_vec: - -#if (defined __i386__) - - pushl %ebp - movl %esp, %ebp - - pushl %ebx - pushl %edi - pushl %esi - -#ifdef KERNEL // if this is for kernel, need to save xmm registers - subl $140, %esp // to save %xmm0-%xmm7 into stack, extra 12 to align %esp to 16-byte boundary - movaps %xmm0, 0(%esp) // save xmm0, offset -12 for ebx/edi/esi - movaps %xmm1, 16(%esp) // save xmm1 - movaps %xmm2, 32(%esp) // save xmm2 - movaps %xmm3, 48(%esp) // save xmm3 - movaps %xmm4, 64(%esp) // save xmm4 - movaps %xmm5, 80(%esp) // save xmm5 - movaps %xmm6, 96(%esp) // save xmm6 - movaps %xmm7, 112(%esp) // save xmm7, if this is for SSSE3 or above -#endif - - #define adler %edi // 8(%ebp) - #define sum2 %esi // 12(%ebp) - #define buf %ecx // 16(%ebp) - #define len %ebx // 20(%ebp) - #define zero %xmm0 - #define ones %xmm5 - - movl 8(%ebp), adler - movl 12(%ebp), sum2 - movl 16(%ebp), buf // use ecx as buf pointer - movl 20(%ebp), len - - .macro modulo_BASE - movl $$-2146992015, %eax // 1/BASE in Q47 - mull adler // edx:eax = adler divided by BASE in Q47 - shrl $$15, %edx // edx is now the floor integer of adler and BASE - imull $$BASE, %edx, %edx // edx * BASE - subl %edx, adler // adler -= edx*BASE - movl $$-2146992015, %eax // 1/BASE in Q47 - mull sum2 // edx:eax = sum2 divided by BASE in Q47 - shrl $$15, %edx // edx is now the floor integer of sum2 and BASE - imull $$BASE, %edx, %eax // eax = edx * BASE - subl %eax, sum2 // sum2 -= sdx*BASE - .endmacro - - // update adler/sum2 according to a new 16-byte vector - .macro DO16 - movaps (buf), %xmm1 // 16 bytes vector, in xmm1 - movaps %xmm1, %xmm3 // a copy of the vector, used for unsigned byte in the destination of pmaddubsw - addl $$16, buf // buf -> next vector - psadbw zero, %xmm1 // 2 16-bit words to be added for adler in xmm1 - pmaddubsw %xmm4, %xmm3 // 8 16-bit words to be added for sum2 in xmm3 - imull $$16, adler, %edx // edx = 16*adler; - movhlps %xmm1, %xmm2 // higher 16-bit word (for adler) in xmm2 - pmaddwd ones, %xmm3 // 4 32-bit elements to be added for sum2 in xmm3 - paddq %xmm2, %xmm1 // xmm1 lower 32-bit to be added to adler - addl %edx, sum2 // sum2 += adler*16; - movhlps %xmm3, %xmm2 // 2 higher 32-bit elements of xmm3 to be added to lower 2 32-bit elements - movd %xmm1, %edx // to be added to adler - paddd %xmm2, %xmm3 // 2 32-bits elements in xmm3 to be added to sum2 - addl %edx, adler // update adler - movd %xmm3, %edx // to be added to sum2 - psrlq $$32, %xmm3 // another 32-bit to be added to sum2 - addl %edx, sum2 // sum2 += 1st half of update - movd %xmm3, %edx // to be added to sum2 - addl %edx, sum2 // sum2 += 2nd half of update - .endm - - // update adler/sum2 according to a new 32-byte vector - .macro DO32 - imull $$32, adler, %edx // edx = 32*adler - movaps (buf), %xmm1 // 1st 16 bytes vector - movaps 16(buf), %xmm7 // 2nd 16 bytes vector - movaps %xmm1, %xmm3 // a copy of 1st vector, used for unsigned byte in the destination of pmaddubsw - movaps %xmm7, %xmm2 // a copy of 2nd vector, used for unsigned byte in the destination of pmaddubsw - psadbw zero, %xmm1 // 2 16-bit words to be added for adler in xmm1 - psadbw zero, %xmm7 // 2 16-bit words to be added for adler in xmm7 - addl %edx, sum2 // sum2 += adler*32; - pmaddubsw %xmm6, %xmm3 // 8 16-bit words to be added for sum2 in xmm3 - pmaddubsw %xmm4, %xmm2 // 8 16-bit words to be added for sum2 in xmm2 - paddd %xmm7, %xmm1 // 2 16-bit words to be added for adler in xmm1 - paddd %xmm2, %xmm3 // 8 16-bit words to be added for sum2 in xmm3 - addl $$32, buf // buf -> vector for next iteration - movhlps %xmm1, %xmm2 // higher 16-bit word (for adler) in xmm2 - pmaddwd ones, %xmm3 // 4 32-bit elements to be added for sum2 in xmm3 - paddq %xmm2, %xmm1 // xmm1 lower 32-bit to be added to adler - movhlps %xmm3, %xmm2 // 2 higher 32-bit elements of xmm3 to be added to lower 2 32-bit elements - movd %xmm1, %edx // to be added to adler - paddd %xmm2, %xmm3 // 2 32-bits elements in xmm3 to be added to sum2 - addl %edx, adler // update adler - movd %xmm3, %edx // to be added to sum2 - psrlq $$32, %xmm3 // another 32-bit to be added to sum2 - addl %edx, sum2 // sum2 += 1st half of update - movd %xmm3, %edx // to be added to sum2 - addl %edx, sum2 // sum2 += 2nd half of update - .endm - - // this defines the macro DO16 for SSSE3 not supported - .macro DO16_nossse3 - movaps (buf), %xmm1 // 16 bytes vector - movaps %xmm1, %xmm3 // a copy of the vector, the lower 8 bytes to be shuffled into 8 words - movaps %xmm1, %xmm2 // a copy of the vector, the higher 8 bytes to be shuffled into 8 words - psrldq $$8, %xmm2 // shift down 8 bytes, to reuse the shuffle vector - punpcklbw zero, %xmm3 // convert lower 8 bytes into 8 words - punpcklbw zero, %xmm2 // convert higher 8 bytes into 8 words - pmullw %xmm6, %xmm3 // lower 8 words * 16:9 - pmullw %xmm4, %xmm2 // higher 8 words * 8:1 - addl $$16, buf // buf -> next vector - psadbw zero, %xmm1 // 2 16-bit words to be added for adler in xmm1 - paddw %xmm2, %xmm3 // 8 16-bit words to be added for sum2 in xmm3 - imull $$16, adler, %edx // edx = 16*adler; - movhlps %xmm1, %xmm2 // higher 16-bit word (for adler) in xmm2 - pmaddwd ones, %xmm3 // 4 32-bit elements to be added for sum2 in xmm3 - paddq %xmm2, %xmm1 // xmm1 lower 32-bit to be added to adler - addl %edx, sum2 // sum2 += adler*16; - movhlps %xmm3, %xmm2 // 2 higher 32-bit elements of xmm3 to be added to lower 2 32-bit elements - movd %xmm1, %edx // to be added to adler - paddd %xmm2, %xmm3 // 2 32-bits elements in xmm3 to be added to sum2 - addl %edx, adler // update adler - movd %xmm3, %edx // to be added to sum2 - psrlq $$32, %xmm3 // another 32-bit to be added to sum2 - addl %edx, sum2 // sum2 += 1st half of update - movd %xmm3, %edx // to be added to sum2 - addl %edx, sum2 // sum2 += 2nd half of update - .endm - -#ifdef KERNEL - leal __cpu_capabilities, %eax // %eax -> __cpu_capabilities - mov (%eax), %eax // %eax = __cpu_capabilities -#else - mov _COMM_PAGE_CPU_CAPABILITIES, %eax -#endif - test $(kHasSupplementalSSE3), %eax // __cpu_capabilities & kHasAES - je L_no_ssse3 - - // i386 adler32 with ssse3 - - // need to fill up xmm4/xmm5/xmm6 only if len>=16 - cmpl $16, len - jl L_skip_loading_tables - - // set up table starting address to %eax - leal sum2_coefficients, %eax - - // reading coefficients - pxor zero, zero - movaps (%eax), %xmm6 // coefficients for computing sum2 : pmaddubsw 32:17 - movaps 16(%eax), %xmm4 // coefficients for computing sum2 : pmaddubsw 16:1 - movaps 32(%eax), ones // coefficients for computing sum2 : pmaddwd 1,1,...,1 - -L_skip_loading_tables: - - cmpl $NMAX, len // len vs NMAX - jl len_lessthan_NMAX // if (len < NMAX), skip the following NMAX batches processing - -len_ge_NMAX_loop: // while (len>=NMAX) { - - subl $NMAX, len // len -= NMAX - movl $(NMAX/32), %eax // n = NMAX/32 - -n_loop: // do { - DO32 // update adler/sum2 for a 32-byte input - decl %eax // n--; - jg n_loop // } while (n); - DO16 // update adler/sum2 for a 16-byte input - modulo_BASE // (adler/sum2) modulo BASE; - cmpl $NMAX, len // - jge len_ge_NMAX_loop // } /* len>=NMAX */ - -len_lessthan_NMAX: - - subl $32, len // pre-decrement len by 32 - jl len_lessthan_32 // if len < 32, skip the 32-vector code -len32_loop: // while (len>=32) { - DO32 // update adler/sum2 for a 32-byte input - subl $32, len // len -= 32; - jge len32_loop // } - -len_lessthan_32: - - addl $(32-16), len // post-increment by 32 + pre-decrement by 16 on len - jl L_len_lessthan_16 // if len < 16, skip the 16-vector code - DO16 // update adler/sum2 for a 16-byte input - subl $16, len // len -= 16; - -L_len_lessthan_16: - addl $16, len // post-increment len by 16 - jz len_is_zero // if len==0, branch over scalar processing - -0: // while (len) { - movzbl (buf), %edx // new input byte - incl buf // buf++ - addl %edx, adler // adler += *buf - addl adler, sum2 // sum2 += adler - subl $1, len // len-- - jg 0b // } - -len_is_zero: - - modulo_BASE // (adler/sum2) modulo BASE; - - // construct 32-bit (sum2<<16 | adler) to be returned - - sall $16, sum2 // sum2 <<16 - movl adler, %eax // adler - orl sum2, %eax // sum2<<16 | adler - - -#ifdef KERNEL // if this is for kernel code, need to restore xmm registers - movaps (%esp), %xmm0 // restore xmm0, offset -12 for ebx/edi/esi - movaps 16(%esp), %xmm1 // restore xmm1 - movaps 32(%esp), %xmm2 // restore xmm2 - movaps 48(%esp), %xmm3 // restore xmm3 - movaps 64(%esp), %xmm4 // restore xmm4 - movaps 80(%esp), %xmm5 // restore xmm5 - movaps 96(%esp), %xmm6 // restore xmm6 - movaps 112(%esp), %xmm7 // restore xmm7, if this is for SSSE3 or above - addl $140, %esp // we've already restored %xmm0-%xmm7 from stack -#endif - - popl %esi - popl %edi - popl %ebx - leave // pop ebp out from stack - ret - - -L_no_ssse3: - - // i386 adler32 without ssse3 - - // need to fill up xmm4/xmm5/xmm6 only if len>=16 - cmpl $16, len - jl 2f - - // set up table starting address to %eax - leal sum2_coefficients, %eax - - // reading coefficients - pxor zero, zero - movaps 48(%eax), %xmm6 // coefficients for computing sum2 : pmaddubsw 16:9 - movaps 64(%eax), %xmm4 // coefficients for computing sum2 : pmaddubsw 8:1 - movaps 80(%eax), ones // coefficients for computing sum2 : pmaddwd 1,1,...,1 - -2: - - cmpl $NMAX, len // len vs NMAX - jl 3f // if (len < NMAX), skip the following NMAX batches processing - -0: // while (len>=NMAX) { - - subl $NMAX, len // len -= NMAX - movl $(NMAX/16), %eax // n = NMAX/16 - -1: // do { - DO16_nossse3 // update adler/sum2 for a 16-byte input - decl %eax // n--; - jg 1b // } while (n); - - modulo_BASE // (adler/sum2) modulo BASE; - - cmpl $NMAX, len // - jge 0b // } /* len>=NMAX */ - -3: - - subl $16, len // pre-decrement len by 16 - jl L_len_lessthan_16 // if len < 16, skip the 16-vector code - DO16_nossse3 // update adler/sum2 for a 16-byte input - subl $16, len // len -= 16; - jmp L_len_lessthan_16 - - - .const - .align 4 -sum2_coefficients: // used for vectorizing adler32 computation - - .byte 32 - .byte 31 - .byte 30 - .byte 29 - .byte 28 - .byte 27 - .byte 26 - .byte 25 - .byte 24 - .byte 23 - .byte 22 - .byte 21 - .byte 20 - .byte 19 - .byte 18 - .byte 17 - .byte 16 - .byte 15 - .byte 14 - .byte 13 - .byte 12 - .byte 11 - .byte 10 - .byte 9 - .byte 8 - .byte 7 - .byte 6 - .byte 5 - .byte 4 - .byte 3 - .byte 2 - .byte 1 - - // coefficients for pmaddwd, to combine into 4 32-bit elements for sum2 - .word 1 - .word 1 - .word 1 - .word 1 - .word 1 - .word 1 - .word 1 - .word 1 - - - // data for without ssse3 - - .word 16 - .word 15 - .word 14 - .word 13 - .word 12 - .word 11 - .word 10 - .word 9 - .word 8 - .word 7 - .word 6 - .word 5 - .word 4 - .word 3 - .word 2 - .word 1 - - // coefficients for pmaddwd, to combine into 4 32-bit elements for sum2 - .word 1 - .word 1 - .word 1 - .word 1 - .word 1 - .word 1 - .word 1 - .word 1 - -#else // (defined __x86_64__) - - movq __cpu_capabilities@GOTPCREL(%rip), %rax // %rax -> __cpu_capabilities - mov (%rax), %eax // %eax = __cpu_capabilities - test $(kHasSupplementalSSE3), %eax // __cpu_capabilities & kHasSupplementalSSE3 - jne L_has_ssse3 - - // ---------------------------------------------------------------------------------- - // the following is added for x86_64 without SSSE3 support - // it is essentially a translated copy of the i386 code without SSSE3 code - // ---------------------------------------------------------------------------------- - - // input : - // adler : rdi - // sum2 : rsi - // buf : rdx - // len : rcx - - pushq %rbp - movq %rsp, %rbp - pushq %rbx - -#ifdef KERNEL // if for kernel, save %xmm0-%xmm11 - subq $200, %rsp // allocate for %xmm0-%xmm11 (192 bytes), extra 8 to align %rsp to 16-byte boundary - movaps %xmm0, -32(%rbp) - movaps %xmm1, -48(%rbp) - movaps %xmm2, -64(%rbp) - movaps %xmm3, -80(%rbp) - movaps %xmm4, -96(%rbp) - movaps %xmm5, -112(%rbp) - movaps %xmm6, -128(%rbp) -#endif - - #define adler %rdi // 16(%rbp) - #define sum2 %rsi // 24(%ebp) - #define buf %rcx // 32(%ebp) - #define len %rbx // 40(%ebp) - #define zero %xmm0 - #define ones %xmm5 - - movq %rcx, len - movq %rdx, buf - - .macro modulo_BASE - movl $$-2146992015, %eax // 1/BASE in Q47 - mull %edi // edx:eax = adler divided by BASE in Q47 - shrl $$15, %edx // edx is now the floor integer of adler and BASE - imull $$BASE, %edx, %edx // edx * BASE - subq %rdx, adler // adler -= edx*BASE - movl $$-2146992015, %eax // 1/BASE in Q47 - mull %esi // edx:eax = sum2 divided by BASE in Q47 - shrl $$15, %edx // edx is now the floor integer of sum2 and BASE - imull $$BASE, %edx, %eax // eax = edx * BASE - subq %rax, sum2 // sum2 -= sdx*BASE - .endmacro - - // update adler/sum2 according to a new 16-byte vector, no ssse3 - .macro DO16_nossse3 - movaps (buf), %xmm1 // 16 bytes vector - movaps %xmm1, %xmm3 // a copy of the vector, the lower 8 bytes to be shuffled into 8 words - movaps %xmm1, %xmm2 // a copy of the vector, the higher 8 bytes to be shuffled into 8 words - psrldq $$8, %xmm2 // shift down 8 bytes, to reuse the shuffle vector - punpcklbw zero, %xmm3 // convert lower 8 bytes into 8 words - punpcklbw zero, %xmm2 // convert higher 8 bytes into 8 words - pmullw %xmm6, %xmm3 // lower 8 words * 16:9 - pmullw %xmm4, %xmm2 // higher 8 words * 8:1 - add $$16, buf // buf -> next vector - psadbw zero, %xmm1 // 2 16-bit words to be added for adler in xmm1 - paddw %xmm2, %xmm3 // 8 16-bit words to be added for sum2 in xmm3 - imulq $$16, adler, %rdx // edx = 16*adler; - movhlps %xmm1, %xmm2 // higher 16-bit word (for adler) in xmm2 - pmaddwd ones, %xmm3 // 4 32-bit elements to be added for sum2 in xmm3 - paddq %xmm2, %xmm1 // xmm1 lower 32-bit to be added to adler - add %rdx, sum2 // sum2 += adler*16; - movhlps %xmm3, %xmm2 // 2 higher 32-bit elements of xmm3 to be added to lower 2 32-bit elements - movd %xmm1, %edx // to be added to adler - paddd %xmm2, %xmm3 // 2 32-bits elements in xmm3 to be added to sum2 - addq %rdx, adler // update adler - movd %xmm3, %edx // to be added to sum2 - psrlq $$32, %xmm3 // another 32-bit to be added to sum2 - addq %rdx, sum2 // sum2 += 1st half of update - movd %xmm3, %edx // to be added to sum2 - addq %rdx, sum2 // sum2 += 2nd half of update - .endm - - // need to fill up xmm4/xmm5/xmm6 only if len>=16 - cmpq $16, len - jl 0f - - // set up table starting address to %eax - leaq sum2_coefficients_nossse3(%rip), %rax - - // reading coefficients - pxor zero, zero - movaps (%rax), %xmm6 // coefficients for computing sum2 : pmaddubsw 16:9 - movaps 16(%rax), %xmm4 // coefficients for computing sum2 : pmaddubsw 8:1 - movaps 32(%rax), ones // coefficients for computing sum2 : pmaddwd 1,1,...,1 -0: - - cmp $NMAX, len // len vs NMAX - jl 3f // if (len < NMAX), skip the following NMAX batches processing - -0: // while (len>=NMAX) { - - sub $NMAX, len // len -= NMAX - mov $(NMAX/16), %eax // n = NMAX/16 - -1: // do { - DO16_nossse3 // update adler/sum2 for a 16-byte input - decl %eax // n--; - jg 1b // } while (n); - - modulo_BASE // (adler/sum2) modulo BASE; - - cmp $NMAX, len // - jge 0b // } /* len>=NMAX */ - -3: - - sub $16, len // pre-decrement len by 16 - jl 2f // if len < 16, skip the 16-vector code - DO16_nossse3 // update adler/sum2 for a 16-byte input - sub $16, len // len -= 16; - -2: - add $16, len // post-increment len by 16 - jz 1f // if len==0, branch over scalar processing - -0: // while (len) { - movzbq (buf), %rdx // new input byte - incq buf // buf++ - addq %rdx, adler // adler += *buf - addq adler, sum2 // sum2 += adler - decq len // len-- - jg 0b // } - -1: - - modulo_BASE // (adler/sum2) modulo BASE; - - // construct 32-bit (sum2<<16 | adler) to be returned - - salq $16, sum2 // sum2 <<16 - movq adler, %rax // adler - orq sum2, %rax // sum2<<16 | adler - -#ifdef KERNEL // if this is for kernel code, need to restore xmm registers - movaps -32(%rbp), %xmm0 - movaps -48(%rbp), %xmm1 - movaps -64(%rbp), %xmm2 - movaps -80(%rbp), %xmm3 - movaps -96(%rbp), %xmm4 - movaps -112(%rbp), %xmm5 - movaps -128(%rbp), %xmm6 - addq $200, %rsp // we've already restored %xmm0-%xmm11 from stack -#endif - - popq %rbx - leave - ret - - - - .const - .align 4 -sum2_coefficients_nossse3: // used for vectorizing adler32 computation - - // data for without ssse3 - - .word 16 - .word 15 - .word 14 - .word 13 - .word 12 - .word 11 - .word 10 - .word 9 - .word 8 - .word 7 - .word 6 - .word 5 - .word 4 - .word 3 - .word 2 - .word 1 - - // coefficients for pmaddwd, to combine into 4 32-bit elements for sum2 - .word 1 - .word 1 - .word 1 - .word 1 - .word 1 - .word 1 - .word 1 - .word 1 - - - .text - - // ---------------------------------------------------------------------------------- - // the following is the original x86_64 adler32_vec code that uses SSSE3 instructions - // ---------------------------------------------------------------------------------- - -L_has_ssse3: - - // input : - // adler : rdi - // sum2 : rsi - // buf : rdx - // len : rcx - - pushq %rbp - movq %rsp, %rbp - pushq %rbx - -#ifdef KERNEL // if for kernel, save %xmm0-%xmm11 - subq $200, %rsp // allocate for %xmm0-%xmm11 (192 bytes), extra 8 to align %rsp to 16-byte boundary - movaps %xmm0, -32(%rbp) - movaps %xmm1, -48(%rbp) - movaps %xmm2, -64(%rbp) - movaps %xmm3, -80(%rbp) - movaps %xmm4, -96(%rbp) - movaps %xmm5, -112(%rbp) - movaps %xmm6, -128(%rbp) - movaps %xmm7, -144(%rbp) - movaps %xmm8, -160(%rbp) - movaps %xmm9, -176(%rbp) - movaps %xmm10, -192(%rbp) - movaps %xmm11, -208(%rbp) -#endif - - #define adler %rdi // 16(%rbp) - #define sum2 %rsi // 24(%ebp) - #define buf %rcx // 32(%ebp) - #define len %rbx // 40(%ebp) - #define zero %xmm0 - #define ones %xmm5 - - movq %rcx, len - movq %rdx, buf - - // update adler/sum2 according to a new 16-byte vector - .macro DO16 - movaps (buf), %xmm1 // 16 bytes vector - movaps %xmm1, %xmm3 // a copy of the vector, used for unsigned byte in the destination of pmaddubsw - addq $$16, buf // buf -> next vector - psadbw zero, %xmm1 // 2 16-bit words to be added for adler in xmm1 - pmaddubsw %xmm4, %xmm3 // 8 16-bit words to be added for sum2 in xmm3 - imulq $$16, adler, %rdx // edx = 16*adler; - movhlps %xmm1, %xmm2 // higher 16-bit word (for adler) in xmm2 - pmaddwd ones, %xmm3 // 4 32-bit elements to be added for sum2 in xmm3 - paddq %xmm2, %xmm1 // xmm1 lower 32-bit to be added to adler - addq %rdx, sum2 // sum2 += adler*16; - movhlps %xmm3, %xmm2 // 2 higher 32-bit elements of xmm3 to be added to lower 2 32-bit elements - movd %xmm1, %edx // to be added to adler - paddd %xmm2, %xmm3 // 2 32-bits elements in xmm3 to be added to sum2 - addq %rdx, adler // update adler - movd %xmm3, %edx // to be added to sum2 - psrlq $$32, %xmm3 // another 32-bit to be added to sum2 - addq %rdx, sum2 // sum2 += 1st half of update - movd %xmm3, %edx // to be added to sum2 - addq %rdx, sum2 // sum2 += 2nd half of update - .endm - - // update adler/sum2 according to a new 32-byte vector - .macro DO32 - imulq $$32, adler, %rdx // edx = 32*adler - movaps (buf), %xmm1 // 1st 16 bytes vector - movaps 16(buf), %xmm7 // 2nd 16 bytes vector - movaps %xmm1, %xmm3 // a copy of 1st vector, used for unsigned byte in the destination of pmaddubsw - movaps %xmm7, %xmm2 // a copy of 2nd vector, used for unsigned byte in the destination of pmaddubsw - psadbw zero, %xmm1 // 2 16-bit words to be added for adler in xmm1 - psadbw zero, %xmm7 // 2 16-bit words to be added for adler in xmm7 - addq %rdx, sum2 // sum2 += adler*32; - pmaddubsw %xmm6, %xmm3 // 8 16-bit words to be added for sum2 in xmm3 - pmaddubsw %xmm4, %xmm2 // 8 16-bit words to be added for sum2 in xmm2 - paddd %xmm7, %xmm1 // 2 16-bit words to be added for adler in xmm1 - paddw %xmm2, %xmm3 // 8 16-bit words to be added for sum2 in xmm3 - addq $$32, buf // buf -> vector for next iteration - movhlps %xmm1, %xmm2 // higher 16-bit word (for adler) in xmm2 - pmaddwd ones, %xmm3 // 4 32-bit elements to be added for sum2 in xmm3 - paddq %xmm2, %xmm1 // xmm1 lower 32-bit to be added to adler - movhlps %xmm3, %xmm2 // 2 higher 32-bit elements of xmm3 to be added to lower 2 32-bit elements - movd %xmm1, %edx // to be added to adler - paddd %xmm2, %xmm3 // 2 32-bits elements in xmm3 to be added to sum2 - addq %rdx, adler // update adler - movd %xmm3, %edx // to be added to sum2 - psrlq $$32, %xmm3 // another 32-bit to be added to sum2 - addq %rdx, sum2 // sum2 += 1st half of update - movd %xmm3, %edx // to be added to sum2 - addq %rdx, sum2 // sum2 += 2nd half of update - .endm - - // update adler/sum2 according to a new 48-byte vector - - .macro DO48 - imulq $$48, adler, %rdx // edx = 48*adler - - movaps (buf), %xmm7 // 1st 16 bytes vector - movaps 16(buf), %xmm10 // 2nd 16 bytes vector - movaps 32(buf), %xmm11 // 3rd 16 bytes vector - - movaps %xmm7, %xmm1 // 1st vector - movaps %xmm10, %xmm2 // 2nd vector - movaps %xmm11, %xmm3 // 3rd vector - - psadbw zero, %xmm7 // 1st vector for adler - psadbw zero, %xmm10 // 2nd vector for adler - psadbw zero, %xmm11 // 3rd vector for adler - - addq %rdx, sum2 // sum2 += adler*48; - - pmaddubsw %xmm9, %xmm1 // 8 16-bit words to be added for sum2 : 1st vector - pmaddubsw %xmm6, %xmm2 // 8 16-bit words to be added for sum2 : 2nd vector - pmaddubsw %xmm4, %xmm3 // 8 16-bit words to be added for sum2 : 3rd vector - - pmaddwd ones, %xmm1 // 4 32-bit elements to be added for sum2 in xmm1 - pmaddwd ones, %xmm2 // 4 32-bit elements to be added for sum2 in xmm2 - pmaddwd ones, %xmm3 // 4 32-bit elements to be added for sum2 in xmm3 - - paddd %xmm10, %xmm7 // 2 16-bit words to be added for adler - paddd %xmm11, %xmm7 // 2 16-bit words to be added for adler - - paddd %xmm1, %xmm3 // 4 32-bit elements to be added for sum2 - paddd %xmm2, %xmm3 // 4 32-bit elements to be added for sum2 - - addq $$48, buf // buf -> vector for next iteration - - movhlps %xmm7, %xmm2 // higher 16-bit word (for adler) in xmm2 - paddq %xmm2, %xmm7 // xmm7 lower 32-bit to be added to adler - - movhlps %xmm3, %xmm2 // 2 higher 32-bit elements of xmm3 to be added to lower 2 32-bit elements - movd %xmm7, %edx // to be added to adler - paddd %xmm2, %xmm3 // 2 32-bits elements in xmm3 to be added to sum2 - addq %rdx, adler // update adler - movd %xmm3, %edx // to be added to sum2 - psrlq $$32, %xmm3 // another 32-bit to be added to sum2 - addq %rdx, sum2 // sum2 += 1st half of update - movd %xmm3, %edx // to be added to sum2 - addq %rdx, sum2 // sum2 += 2nd half of update - .endm - - // update adler/sum2 according to a new 64-byte vector - .macro DO64 - imulq $$64, adler, %rdx // edx = 64*adler - - movaps (buf), %xmm1 // 1st 16 bytes vector - movaps 16(buf), %xmm7 // 2nd 16 bytes vector - movaps 32(buf), %xmm10 // 3rd 16 bytes vector - movaps 48(buf), %xmm11 // 4th 16 bytes vector - - movaps %xmm1, %xmm3 // 1st vector - movaps %xmm11, %xmm2 // 4th vector - psadbw zero, %xmm1 // 1st vector for adler - psadbw zero, %xmm11 // 4th vector for adler - - addq %rdx, sum2 // sum2 += adler*64; - - pmaddubsw %xmm8, %xmm3 // 8 16-bit words to be added for sum2 : 1st vector - pmaddubsw %xmm4, %xmm2 // 8 16-bit words to be added for sum2 : 4th vector - pmaddwd ones, %xmm3 // 4 32-bit elements to be added for sum2 in xmm3 - pmaddwd ones, %xmm2 // 4 32-bit elements to be added for sum2 in xmm2 - - paddd %xmm11, %xmm1 // 2 16-bit words to be added for adler in xmm1 - paddd %xmm2, %xmm3 // 4 32-bit elements to be added for sum2 in xmm3 - - movaps %xmm7, %xmm2 // 2nd vector - movaps %xmm10, %xmm11 // 3rd vector - - psadbw zero, %xmm7 // 2nd vector for adler - psadbw zero, %xmm10 // 3rd vector for adler - - pmaddubsw %xmm9, %xmm2 // 8 16-bit words to be added for sum2 : 2nd vector - pmaddubsw %xmm6, %xmm11 // 8 16-bit words to be added for sum2 : 3rd vector - pmaddwd ones, %xmm2 // 4 32-bit elements to be added for sum2 in xmm2 - pmaddwd ones, %xmm11 // 4 32-bit elements to be added for sum2 in xmm11 - - paddd %xmm7, %xmm1 // 2 16-bit words to be added for adler in xmm1 - paddd %xmm10, %xmm1 // 2 16-bit words to be added for adler in xmm1 - - paddd %xmm2, %xmm3 // 4 32-bit elements to be added for sum2 in xmm3 - paddd %xmm11, %xmm3 // 4 32-bit elements to be added for sum2 in xmm3 - - addq $$64, buf // buf -> vector for next iteration - - movhlps %xmm1, %xmm2 // higher 16-bit word (for adler) in xmm2 - paddq %xmm2, %xmm1 // xmm1 lower 32-bit to be added to adler - movhlps %xmm3, %xmm2 // 2 higher 32-bit elements of xmm3 to be added to lower 2 32-bit elements - movd %xmm1, %edx // to be added to adler - paddd %xmm2, %xmm3 // 2 32-bits elements in xmm3 to be added to sum2 - addq %rdx, adler // update adler - movd %xmm3, %edx // to be added to sum2 - psrlq $$32, %xmm3 // another 32-bit to be added to sum2 - addq %rdx, sum2 // sum2 += 1st half of update - movd %xmm3, %edx // to be added to sum2 - addq %rdx, sum2 // sum2 += 2nd half of update - .endm - - // need to fill up xmm4/xmm5/xmm6 only if len>=16 - cmpq $16, len - jl skip_loading_tables - - // set up table starting address to %eax - leaq sum2_coefficients(%rip), %rax - - // reading coefficients - pxor zero, zero - movaps (%rax), %xmm8 // coefficients for computing sum2 : pmaddubsw 64:49 - movaps 16(%rax), %xmm9 // coefficients for computing sum2 : pmaddubsw 48:33 - movaps 32(%rax), %xmm6 // coefficients for computing sum2 : pmaddubsw 32:17 - movaps 48(%rax), %xmm4 // coefficients for computing sum2 : pmaddubsw 16:1 - movaps 64(%rax), ones // coefficients for computing sum2 : pmaddwd 1,1,...,1 - -skip_loading_tables: - - - cmpq $NMAX, len // len vs NMAX - jl len_lessthan_NMAX // if (len < NMAX), skip the following NMAX batches processing - -len_ge_NMAX_loop: // while (len>=NMAX) { - - subq $NMAX, len // len -= NMAX - movq $(NMAX/64), %rax // n = NMAX/64 - -n_loop: // do { - DO64 // update adler/sum2 for a 64-byte input - decq %rax // n--; - jg n_loop // } while (n); - - DO48 // update adler/sum2 for a 48-byte input - - modulo_BASE // (adler/sum2) modulo BASE; - - cmpq $NMAX, len // - jge len_ge_NMAX_loop // } /* len>=NMAX */ - -len_lessthan_NMAX: - - subq $64, len // pre-decrement len by 64 - jl len_lessthan_64 // if len < 64, skip the 64-vector code -len64_loop: // while (len>=64) { - DO64 // update adler/sum2 for a 64-byte input - subq $64, len // len -= 64; - jge len64_loop // } - -len_lessthan_64: - addq $(64-32), len // post-increment 64 + pre-decrement 32 of len - jl len_lessthan_32 // if len < 32, skip the 32-vector code - DO32 // update adler/sum2 for a 32-byte input - subq $32, len // len -= 32; - -len_lessthan_32: - - addq $(32-16), len // post-increment by 32 + pre-decrement by 16 on len - jl len_lessthan_16 // if len < 16, skip the 16-vector code - DO16 // update adler/sum2 for a 16-byte input - subq $16, len // len -= 16; - -len_lessthan_16: - addq $16, len // post-increment len by 16 - jz len_is_zero // if len==0, branch over scalar processing - -scalar_loop: // while (len) { - movzbq (buf), %rdx // new input byte - incq buf // buf++ - addq %rdx, adler // adler += *buf - addq adler, sum2 // sum2 += adler - decq len // len-- - jg scalar_loop // } - -len_is_zero: - - modulo_BASE // (adler/sum2) modulo BASE; - - // construct 32-bit (sum2<<16 | adler) to be returned - - salq $16, sum2 // sum2 <<16 - movq adler, %rax // adler - orq sum2, %rax // sum2<<16 | adler - - -#ifdef KERNEL // if for kernel, restore %xmm0-%xmm11 - movaps -32(%rbp), %xmm0 - movaps -48(%rbp), %xmm1 - movaps -64(%rbp), %xmm2 - movaps -80(%rbp), %xmm3 - movaps -96(%rbp), %xmm4 - movaps -112(%rbp), %xmm5 - movaps -128(%rbp), %xmm6 - movaps -144(%rbp), %xmm7 - movaps -160(%rbp), %xmm8 - movaps -176(%rbp), %xmm9 - movaps -192(%rbp), %xmm10 - movaps -208(%rbp), %xmm11 - addq $200, %rsp // we've already restored %xmm0-%xmm11 from stack -#endif - - popq %rbx - leave // pop ebp out from stack - ret - - - .const - .align 4 -sum2_coefficients: // used for vectorizing adler32 computation - - // coefficients for pmaddubsw instruction, used to generate 16-bit elements for sum2 - - .byte 64 - .byte 63 - .byte 62 - .byte 61 - .byte 60 - .byte 59 - .byte 58 - .byte 57 - .byte 56 - .byte 55 - .byte 54 - .byte 53 - .byte 52 - .byte 51 - .byte 50 - .byte 49 - .byte 48 - .byte 47 - .byte 46 - .byte 45 - .byte 44 - .byte 43 - .byte 42 - .byte 41 - .byte 40 - .byte 39 - .byte 38 - .byte 37 - .byte 36 - .byte 35 - .byte 34 - .byte 33 - .byte 32 - .byte 31 - .byte 30 - .byte 29 - .byte 28 - .byte 27 - .byte 26 - .byte 25 - .byte 24 - .byte 23 - .byte 22 - .byte 21 - .byte 20 - .byte 19 - .byte 18 - .byte 17 - .byte 16 - .byte 15 - .byte 14 - .byte 13 - .byte 12 - .byte 11 - .byte 10 - .byte 9 - .byte 8 - .byte 7 - .byte 6 - .byte 5 - .byte 4 - .byte 3 - .byte 2 - .byte 1 - - // coefficients for pmaddwd, to combine into 4 32-bit elements for sum2 - .word 1 - .word 1 - .word 1 - .word 1 - .word 1 - .word 1 - .word 1 - .word 1 - -#endif // (defined __i386__) - -#endif // (defined __i386__ || defined __x86_64__) diff --git a/libkern/zlib/intel/inffastS.s b/libkern/zlib/intel/inffastS.s deleted file mode 100644 index 4252121bf..000000000 --- a/libkern/zlib/intel/inffastS.s +++ /dev/null @@ -1,1179 +0,0 @@ -#if (defined __i386__) - -/* this assembly was 1st compiled from inffast.c (assuming POSTINC defined, OFF=0) and then hand optimized */ - - .cstring -LC0: - .ascii "invalid distance too far back\0" -LC1: - .ascii "invalid distance code\0" -LC2: - .ascii "invalid literal/length code\0" - .text - .align 4,0x90 - - -#ifdef INFLATE_STRICT - .byte 0 - .byte 0 - .byte 0 - .byte 0 - .byte 0 - .byte 0 - .byte 0 - .byte 0 - .byte 0 - .byte 0 -#endif -.globl _inflate_fast -_inflate_fast: - - // set up ebp to refer to arguments strm and start - pushl %ebp - movl %esp, %ebp - - // push edi/esi/ebx into stack - pushl %edi - pushl %esi - pushl %ebx - - // allocate for local variables 92-12=80, + 12 to align %esp to 16-byte boundary - subl $92, %esp - movl 8(%ebp), %ebx - - /* definitions to help code readability */ - - #define bits %edi - #define strm %ebx - #define state 28(strm) // state = (struct inflate_state FAR *)strm->state; - #define in -84(%ebp) // in = strm->next_in - OFF; OFF=0 - #define last -80(%ebp) // last = in + (strm->avail_in - 5); - #define out -28(%ebp) // out = strm->next_out - OFF; - #define beg -76(%ebp) // beg = out - (start - strm->avail_out); - #define end -72(%ebp) // end = out + (strm->avail_out - 257); - #define wsize -68(%ebp) // wsize = state->wsize; - #define whave -64(%ebp) // whave = state->whave; - #define write -60(%ebp) // write = state->write; - #define window -56(%ebp) // window = state->window; - #define hold -52(%ebp) // hold = state->hold; - #define lcode -48(%ebp) // lcode = state->lencode; - #define dcode -44(%ebp) // dcode = state->distcode; - #define lmask -40(%ebp) // lmask = (1U << state->lenbits) - 1; - #define dmask -36(%ebp) // dmask = (1U << state->distbits) - 1; - #define len -32(%ebp) - #define dmax -20(%ebp) - #define dist -16(%ebp) // dist - #define write_wsize -24(%ebp) // write+wsize - #define write_1 -88(%ebp) // write-1 - #define op -92(%ebp) // op - - movl (strm), %eax // strm->next_in - movl %eax, in // in = strm->next_in - OFF; OFF=0 - - subl $5, %eax // in - 5; - movl 4(strm), %ecx // strm->avail_in - addl %ecx, %eax // in + (strm->avail_in - 5); - movl %eax, last // last = in + (strm->avail_in - 5); - - movl 12(strm), %esi // strm->next_out - movl %esi, out // out = strm->next_out - OFF; - - movl 16(strm), %ecx // strm->avail_out - movl %esi, %eax // out - subl 12(%ebp), %eax // out - start - addl %ecx, %eax // out - (start - strm->avail_out); - movl %eax, beg // beg = out - (start - strm->avail_out); - - leal -257(%esi,%ecx), %ecx // out + (strm->avail_out - 257); - movl %ecx, end // end = out + (strm->avail_out - 257); - - movl state, %edx - -#ifdef INFLATE_STRICT - movl 20(%edx), %ecx // state->dmax - movl %ecx, dmax // dmax = state->dmax; -#endif - - movl 40(%edx), %ecx // state->wsize - movl %ecx, wsize // wsize = state->wsize; - - movl 44(%edx), %ecx // state->whave - movl %ecx, whave // whave = state->whave; - - movl 48(%edx), %esi // state->write - movl %esi, write // write = state->write; - - movl 52(%edx), %eax // state->window - movl %eax, window // window = state->window; - - - movl 56(%edx), %ecx // state->hold - movl %ecx, hold // hold = state->hold - - movl 60(%edx), bits // bits = state->bits; - - movl 76(%edx), %esi // state->lencode - movl %esi, lcode // lcode = state->lencode; - - movl 80(%edx), %eax // state->distcode - movl %eax, dcode // dcode = state->distcode; - - movl 84(%edx), %ecx // state->lenbits - movl $1, %eax - movl %eax, %esi // a copy of 1 - sall %cl, %esi // 1 << state->lenbits - decl %esi // (1U << state->lenbits) - 1; - movl %esi, lmask // lmask = (1U << state->lenbits) - 1; - - movl 88(%edx), %ecx // state->distbits - sall %cl, %eax // 1 << state->distbits - decl %eax // (1U << state->distbits) - 1; - movl %eax, dmask // dmask = (1U << state->distbits) - 1; - - - // these 2 might be used often, precomputed and saved in stack - movl write, %eax - addl wsize, %eax - movl %eax, write_wsize // write+wsize - - movl write, %edx - decl %edx - movl %edx, write_1 // write-1 - - -L_do_while_loop: // do { - - cmpl $15, bits - jae bits_ge_15 // if (bits < 15) { -#if 0 - leal 8(bits), %esi // esi = bits+8 - movl in, %eax // eax = in - movzbl (%eax), %edx // edx = *in++ - movl bits, %ecx // cl = bits - sall %cl, %edx // 1st *in << bits - addl hold, %edx // hold += 1st *in << bits - movzbl 1(%eax), %eax // 2nd *in - movl %esi, %ecx // cl = bits+8 - sall %cl, %eax // 2nd *in << (bits+8) - addl %eax, %edx // hold += 2nd *in << (bits+8) - movl %edx, hold // update hold - addl $2, in // in += 2 - addl $16, bits // bits += 16; -#else - /* from simulation, this code segment performs better than the other case - possibly, we are more often hit with aligned memory access */ - movl in, %ecx // unsigned short *inp = (unsigned short *) (in+OFF); - movzwl (%ecx), %eax // *((unsigned short *) in); - movl bits, %ecx // bits - sall %cl, %eax // *((unsigned short *) in) << bits - addl %eax, hold // hold += (unsigned long) *((unsigned short *) in) << bits; - addl $2, in // in += 2; - addl $16, bits // bits += 16; -#endif - -bits_ge_15: // } /* bits < 15 */ - - movl hold, %eax // hold - andl lmask, %eax // hold & lmask; - movl lcode, %esi // lcode[] : 4-byte aligned - movl (%esi,%eax,4), %eax // this = lcode[hold&lmask]; - jmp dolen - .align 4,0x90 -op_nonzero: - movzbl %al, %ecx // a copy of op to cl - testb $16, %cl // if op&16 - jne Llength_base // branch to length_base - - testb $64, %cl // elif op&64 - jne length_2nd_level_else // branch to 2nd level length code else conditions - - // 2nd level length code - - movl $1, %eax - sall %cl, %eax // 1 << op - decl %eax // ((1<>= op; - subl %ecx, bits // bits -= op; - testb %al, %al // op = (unsigned)(this.op); - jne op_nonzero // if op!=0, branch to op_nonzero - - movl %esi, %ecx // this.val; - movl out, %eax // out - movb %cl, (%eax) // PUP(out) = (unsigned char)(this.val); - incl %eax // out++; - movl %eax, out // save out - -L_tst_do_while_loop_end: - movl last, %eax // last - cmpl %eax, in // in vs last - jae return_unused_bytes // branch to return_unused_bytes if in >= last - movl end, %edx // end - cmpl %edx, out // out vs end - jb L_do_while_loop // branch to do loop if out < end - -return_unused_bytes: - - movl bits, %eax // bits - shrl $3, %eax // len = bits >> 3 - movl in, %edx // in - subl %eax, %edx // in -= len - sall $3, %eax // len << 3 - movl bits, %ecx // bits - subl %eax, %ecx // bits -= len << 3 - - movl %edx, (strm) // strm->next_in = in + OFF; - movl out, %eax - movl %eax, 12(strm) // strm->next_out = out + OFF; - - cmpl %edx, last // last vs in - jbe L67 // if (last <= in) branch to L67 and return to L69 - movl last, %eax // last - addl $5, %eax // 5 + last - subl %edx, %eax // 5 + last - in -L69: - movl %eax, 4(strm) // update strm->avail_in - - movl end, %eax - cmpl %eax, out // out vs end - jae L70 // if (out>=end) branch to L70, and return to L72 - addl $257, %eax // 257 + end - subl out, %eax // 257 + end - out -L72: - movl %eax, 16(strm) // update strm->avail_out - - movl $1, %eax - sall %cl, %eax // 1 << bits - decl %eax // (1 << bits) -1 - andl hold, %eax // hold &= (1U << bits) - 1; - movl state, %esi - movl %eax, 56(%esi) // state->hold = hold; - movl %ecx, 60(%esi) // state->bits = bits; - - addl $92, %esp // pop out local from stack - - // restore saved registers and return - popl %ebx - popl %esi - popl %edi - leave - ret - - // this code segment is branched in from op_nonzero, with op in cl and this.value in esi -Llength_base: - movzwl %si, %esi // this instruction might not be needed, pad here to give better performance - movl %esi, len // len = (unsigned)(this.val); - - movl %ecx, %esi // leave a copy of op at ecx - andl $15, %esi // op&=15; - je Lop_is_zero // if (op) { - cmpl bits, %esi // op vs bits - jbe Lop_be_bits // if (bits < op) { - movl in, %edx // in - movzbl (%edx), %eax // *in - movl bits, %ecx // bits - sall %cl, %eax // *in << bits - addl %eax, hold // hold += (unsigned long)(PUP(in)) << bits; - incl %edx // in++ - movl %edx, in // update in - addl $8, bits // bits += 8 -Lop_be_bits: // } - movl $1, %eax // 1 - movl %esi, %ecx // op - sall %cl, %eax // 1 << op - decl %eax // (1<>= op; - subl %esi, bits // bits -= op; -Lop_is_zero: // } - cmpl $14, bits // if (bits < 15) { - jbe bits_le_14 // branch to refill 16-bit into hold, and branch back to next -L19: // } - movl hold, %eax // hold - andl dmask, %eax // hold&dmask - movl dcode, %esi // dcode[] : 4-byte aligned - movl (%esi,%eax,4), %eax // this = dcode[hold & dmask]; - jmp dodist - -Lop_16_zero: - testb $64, %cl // op&64 - jne Linvalid_distance_code // if (op&64)!=0, branch to invalid distance code - movl $1, %eax // 1 - sall %cl, %eax // (1<>= op; - subl %ecx, bits // bits -= op; - movzbl %al, %ecx // op = (unsigned)(this.op); - testb $16, %cl // op & 16 - je Lop_16_zero // if (op&16)==0 goto test op&64 - -Ldistance_base: // if (op&16) { /* distance base */ - andl $15, %ecx // op &= 15; edx = dist = this.val; - movl %ecx, op // save a copy of op - cmpl bits, %ecx // op vs bits - jbe 0f // if (bits < op) { - movl in, %ecx // in - movzbl (%ecx), %eax // *in - movl bits, %ecx // bits - sall %cl, %eax // *in << bits - addl %eax, hold // hold += (unsigned long)(PUP(in)) << bits; - incl in // in++ - addl $8, bits // bits += 8 - cmpl bits, op // op vs bits - jbe 0f // if (bits < op) { - movl in, %esi // i - movzbl (%esi), %eax // *in - movl bits, %ecx // cl = bits - sall %cl, %eax // *in << bits - addl %eax, hold // hold += (unsigned long)(PUP(in)) << bits; - incl %esi // in++ - movl %esi, in // update in - addl $8, bits // bits += 8 -0: // } } - - movzwl %dx, %edx // dist = (unsigned)(this.val); - movl $1, %eax // 1 - movzbl op, %ecx // cl = op - sall %cl, %eax // 1 << op - decl %eax // ((1U << op) - 1) - andl hold, %eax // (unsigned)hold & ((1U << op) - 1) - addl %edx, %eax // dist += (unsigned)hold & ((1U << op) - 1); - -#ifdef INFLATE_STRICT - - cmpl dmax, %eax // dist vs dmax - ja Linvalid_distance_too_far_back // if (dist > dmax) break for invalid distance too far back - -#endif - - movl %eax, dist // save a copy of dist in stack - shrl %cl, hold // hold >>= op; - subl %ecx, bits // bits -= op; - - movl out, %eax - subl beg, %eax // eax = op = out - beg - cmpl %eax, dist // dist vs op - jbe Lcopy_direct_from_output // if (dist <= op) branch to copy direct from output - - // if (dist > op) { - movl dist, %ecx // dist - subl %eax, %ecx // esi = op = dist - op; - cmpl %ecx, whave // whave vs op - jb Linvalid_distance_too_far_back // if (op > whave) break for error; - - movl write, %edx - testl %edx, %edx - jne Lwrite_non_zero // if (write==0) { - movl wsize, %eax // wsize - subl %ecx, %eax // wsize-op - movl window, %esi // from=window-OFF - addl %eax, %esi // from += wsize-op - movl out, %edx // out - cmpl %ecx, len // len vs op - jbe L38 // if !(op < len) skip - subl %ecx, len // len - op -0: // do { - movzbl (%esi), %eax // - movb %al, (%edx) // - incl %edx // - incl %esi // PUP(out) = PUP(from); - decl %ecx // --op; - jne 0b // } while (op); - - movl %edx, out // update out - movl %edx, %esi // out - subl dist, %esi // esi = from = out - dist; - -L38: /* copy from output */ - - // while (len > 2) { - // PUP(out) = PUP(from); - // PUP(out) = PUP(from); - // PUP(out) = PUP(from); - // len -= 3; - // } - // if (len) { - // PUP(out) = PUP(from); - // if (len > 1) - // PUP(out) = PUP(from); - // } - - movl len, %ecx // len - movl out, %edx // out - subl $3, %ecx // pre-decrement len by 3 - jl 1f // if len < 3, branch to 1f for remaining processing -0: // while (len>2) { - movzbl (%esi), %eax - movb %al, (%edx) // PUP(out) = PUP(from); - movzbl 1(%esi), %eax - movb %al, 1(%edx) // PUP(out) = PUP(from); - movzbl 2(%esi), %eax - movb %al, 2(%edx) // PUP(out) = PUP(from); - addl $3, %esi // from += 3; - addl $3, %edx // out += 3; - subl $3, %ecx // len -= 3; - jge 0b // } - movl %edx, out // update out, in case len == 0 -1: - addl $3, %ecx // post-increment len by 3 - je L_tst_do_while_loop_end // if (len) { - movzbl (%esi), %eax // - movb %al, (%edx) // PUP(out) = PUP(from); - incl %edx // out++ - movl %edx, out // update out, in case len == 1 - cmpl $2, %ecx // - jne L_tst_do_while_loop_end // if len==1, break - movzbl 1(%esi), %eax - movb %al, (%edx) // PUP(out) = PUP(from); - incl %edx // out++ - movl %edx, out // update out - jmp L_tst_do_while_loop_end // } - - .align 4,0x90 -length_2nd_level_else: - andl $32, %ecx // test end-of-block - je invalid_literal_length_code // if (op&32)==0, branch for invalid literal/length code break - movl state, %edx // if (op&32), end-of-block is detected - movl $11, (%edx) // state->mode = TYPE - jmp return_unused_bytes - -L70: - movl out, %edx // out - subl %edx, end // (end-out) - movl end, %esi // %esi = (end-out) = -(out - end); - leal 257(%esi), %eax // %eax = 257 + %esi = 257 - (out -end) - jmp L72 // return to update state and return - -L67: // %edx = in, to return 5 - (in - last) in %eax - subl %edx, last // last - in - movl last, %edx // %edx = last - in = - (in - last); - leal 5(%edx), %eax // %eax = 5 + %edx = 5 - (in - last); - jmp L69 // return to update state and return - -bits_le_14: -#if 1 - leal 8(bits), %esi // esi = bits+8 - movl in, %eax // eax = in - movzbl (%eax), %edx // edx = *in++ - movl bits, %ecx // cl = bits - sall %cl, %edx // 1st *in << bits - addl hold, %edx // hold += 1st *in << bits - movzbl 1(%eax), %eax // 2nd *in - movl %esi, %ecx // cl = bits+8 - sall %cl, %eax // 2nd *in << (bits+8) - addl %eax, %edx // hold += 2nd *in << (bits+8) - movl %edx, hold // update hold - addl $2, in // in += 2 - addl $16, bits // bits += 16; - jmp L19 -#else - /* this code segment does not run as fast as the other original code segment, possibly the processor - need extra time to handle unaligned short access */ - movl in, %edx // unsigned short *inp = (unsigned short *) (in+OFF); - movzwl (%edx), %eax // *((unsigned short *) in); - movl bits, %ecx // bits - sall %cl, %eax // *((unsigned short *) in) << bits - addl %eax, hold // hold += (unsigned long) *((unsigned short *) in) << bits; - addl $2, %edx // in += 2; - addl $16, %ecx // bits += 16; - movl %edx, in - movl %ecx, bits - jmp L19 -#endif -invalid_literal_length_code: - call 0f -0: popl %eax - leal LC2-0b(%eax), %eax - movl %eax, 24(strm) - movl state, %esi - movl $27, (%esi) - jmp return_unused_bytes -Linvalid_distance_code: - call 0f -0: popl %eax - leal LC1-0b(%eax), %eax - movl %eax, 24(strm) - movl state, %eax - movl $27, (%eax) - jmp return_unused_bytes - -#ifdef INFLATE_STRICT - .align 4,0x90 - .byte 0 - .byte 0 - .byte 0 - .byte 0 - .byte 0 - .byte 0 - .byte 0 - .byte 0 - .byte 0 -#endif -Lcopy_direct_from_output: - movl out, %edx // out - subl dist, %edx // from = out - dist - movl out, %ecx // out - movl len, %esi // len - subl $3, %esi // pre-decement len by 3 -0: // do { - movzbl (%edx), %eax - movb %al, (%ecx) // PUP(out) = PUP(from); - movzbl 1(%edx), %eax - movb %al, 1(%ecx) // PUP(out) = PUP(from); - movzbl 2(%edx), %eax - movb %al, 2(%ecx) // PUP(out) = PUP(from); - addl $3, %edx // from += 3 - addl $3, %ecx // out += 3 - subl $3, %esi // len -= 3 - jge 0b // } while (len > 2); - movl %ecx, out // update out in case len == 0 - addl $3, %esi // post-increment len by 3 - je L_tst_do_while_loop_end // if (len) { - movzbl (%edx), %eax - movb %al, (%ecx) // PUP(out) = PUP(from); - incl %ecx - movl %ecx, out // out++ - cmpl $2, %esi // - jne L_tst_do_while_loop_end // if (len>2) - movzbl 1(%edx), %eax - movb %al, (%ecx) // PUP(out) = PUP(from); - incl %ecx - movl %ecx, out // out++ - jmp L_tst_do_while_loop_end // } - - .align 4,0x90 -Lwrite_non_zero: // %edx = write, %ecx = op - movl window, %esi // from = window - OFF; - cmp %ecx, %edx // write vs op, test for wrap around window or contiguous in window - jae Lcontiguous_in_window // if (write >= op) branch to contiguous in window - -Lwrap_around_window: // wrap around window - addl write_wsize, %esi // from += write+wsize - subl %ecx, %esi // from += wsize + write - op; - subl %edx, %ecx // op -= write - cmpl %ecx, len // len vs op - jbe L38 // if (len <= op) break to copy from output - subl %ecx, len // len -= op; - movl out, %edx // out -0: // do { - movzbl (%esi), %eax // *from - movb %al, (%edx) // *out - incl %esi // from++ - incl %edx // out++ - decl %ecx // --op - jne 0b // } while (op); - - movl %edx, out // save out in case we need to break to L38 - movl window, %esi // from = window - OFF; - movl len, %eax // len - cmpl %eax, write // write vs len - jae L38 // if (write >= len) break to L38 - - movl write, %ecx // op = write - subl %ecx, len // len -= op; -0: // do { - movzbl (%esi), %eax // *from - movb %al, (%edx) // *out - incl %esi // from++ - incl %edx // out++ - decl %ecx // --op - jne 0b // } while (op); - - movl %edx, %esi // from = out - movl %edx, out // save a copy of out - subl dist, %esi // from = out - dist; - jmp L38 // break to copy from output - -Lcontiguous_in_window: // contiguous in window, edx = write, %ecx = op - subl %ecx, %edx // write - op - addl %edx, %esi // from += write - op; - cmpl %ecx, len // len vs op - jbe L38 // if (len <= op) break to copy from output - movl out, %edx // out - subl %ecx, len // len -= op; - -0: // do { - movzbl (%esi), %eax // *from - movb %al, (%edx) // *out - incl %esi // from++ - incl %edx // out++ - decl %ecx // op-- - jne 0b // } while (op); - - movl %edx, out // update out - movl %edx, %esi // from = out - subl dist, %esi // from = out - dist; - jmp L38 - -Linvalid_distance_too_far_back: - call 0f -0: popl %eax - leal LC0-0b(%eax), %eax - movl %eax, 24(strm) - movl state, %ecx - movl $27, (%ecx) - jmp return_unused_bytes - -#endif - -#if (defined __x86_64__) - .cstring -LC0: - .ascii "invalid distance too far back\0" -LC1: - .ascii "invalid distance code\0" -LC2: - .ascii "invalid literal/length code\0" - .text - .align 4,0x90 - -#ifdef INFLATE_STRICT - .byte 0 - .byte 0 - .byte 0 - .byte 0 - .byte 0 - .byte 0 - .byte 0 - .byte 0 - .byte 0 - .byte 0 - .byte 0 - .byte 0 -#endif - -.globl _inflate_fast -_inflate_fast: - - // set up rbp - pushq %rbp - movq %rsp, %rbp - - // save registers in stack - pushq %r15 - pushq %r14 - pushq %r13 - pushq %r12 - pushq %rbx - - #define strm %r13 - #define state %rdi - #define in %r12 - #define in_d %r12d - #define out %r10 - #define out_d %r10d - #define write %r15d - #define hold %r9 - #define holdd %r9d - #define bits %r8d - #define lcode %r14 - #define len %ebx - #define from %rcx - #define dmax %r11d - - #define last -104(%rbp) - #define beg -96(%rbp) - #define end -88(%rbp) - #define wsize -80(%rbp) - #define whave -76(%rbp) - #define window -72(%rbp) - #define dcode -64(%rbp) - #define lmask -56(%rbp) - #define dmask -112(%rbp) - #define wsize_write -116(%rbp) - #define write_1 -128(%rbp) - #define dist -44(%rbp) - - // reserve stack memory for local variables 128-40=88 - subq $88, %rsp - - movq %rdi, strm - movq 56(%rdi), state // state = (struct inflate_state FAR *)strm->state; - movq (strm), in // in = strm->next_in - OFF; - movl 8(strm), %eax // strm->avail_in - subl $5, %eax // (strm->avail_in - 5) - addq in, %rax // in + (strm->avail_in - 5) - movq %rax, last // last = in + (strm->avail_in - 5) - movq 24(strm), out // out = strm->next_out - movl 32(strm), %eax // strm->avail_out - subl %eax, %esi // (start - strm->avail_out); - movq out, %rdx // strm->next_out - subq %rsi, %rdx // out - (start - strm->avail_out); - movq %rdx, beg // beg = out - (start - strm->avail_out); - subl $257, %eax // (strm->avail_out - 257) - addq out, %rax // out + (strm->avail_out - 257); - movq %rax, end // end = out + (strm->avail_out - 257); - -#ifdef INFLATE_STRICT - movl 20(state), dmax // dmax = state->dmax; -#endif - - movl 52(state), %ecx // state->wsize - movl %ecx, wsize // wsize = state->wsize; - movl 56(state), %ebx // state->whave; - movl %ebx, whave // whave = state->whave; - movl 60(state), write // write = state->write; - movq 64(state), %rax // state->window - movq %rax, window // window = state->window; - movq 72(state), hold // hold = state->hold; - movl 80(state), bits // bits = state->bits; - - movq 96(state), lcode // lcode = state->lencode; - movq 104(state), %rdx // state->distcode; - movq %rdx, dcode // dcode = state->distcode; - - movl 116(state), %ecx // state->distbits - movl $1, %eax - movl %eax, %edx // 1 - sall %cl, %edx // (1U << state->distbits) - movl 112(state), %ecx // state->lenbits - sall %cl, %eax // (1U << state->lenbits) - decl %eax // (1U << state->lenbits) - 1 - movq %rax, lmask // lmask = (1U << state->lenbits) - 1 - decl %edx // (1U << state->distbits) - 1 - movq %rdx, dmask // dmask = (1U << state->distbits) - 1 - - movl wsize, %ecx // wsize - addl write, %ecx // wsize + write - movl %ecx, wsize_write // wsize_write = wsize + write - - leal -1(%r15), %ebx // write - 1 - movq %rbx, write_1 // write_1 = write - 1 - -L_do_while_loop: - cmpl $14, bits // bits vs 14 - ja 0f // if (bits < 15) { - movzwl (in), %eax // read 2 bytes from in - movl bits, %ecx // set up cl = bits - salq %cl, %rax // (*in) << bits - addq %rax, hold // hold += (*in) << bits - addq $2, in // in += 2 - addl $16, bits // bits += 16 -0: // } - movq lmask, %rax // lmask - andq hold, %rax // hold & lmask - jmp 1f - .align 4,0x90 -Lop_nonzero: - movzbl %al, %ecx // op in al and cl - testb $16, %cl // check for length base processing (op&16) - jne L_length_base // if (op&16) branch to length base processing - testb $64, %cl // check for 2nd level length code (op&64==0) - jne L_end_of_block // if (op&64)!=0, branch for end-of-block processing - - /* 2nd level length code : (op&64) == 0*/ -L_2nd_level_length_code: - movl $1, %eax // 1 - sall %cl, %eax // 1 << op - decl %eax // ((1U << op) - 1) - andq hold, %rax // (hold & ((1U << op) - 1)) - movzwl %dx, %edx - addq %rdx, %rax // this = lcode[this.val + (hold & ((1U << op) - 1))]; -1: - movl (lcode,%rax,4), %eax // this = lcode[hold & lmask]; -Ldolen: - movl %eax, %edx // a copy of this - shrl $16, %edx // edx = this.val; - movzbl %ah, %ecx // op = this.bits - shrq %cl, hold // hold >>= op; - subl %ecx, bits // bits -= op; - testb %al, %al // op = (unsigned)(this.op); - jne Lop_nonzero // if (op!-0) branch for copy operation -L_literal: - movb %dl, (out) // *out = this.val - incq out // out ++ -L_do_while_loop_check: - cmpq last, in // in vs last - jae L_return_unused_byte // if in >= last, break to return unused byte processing - cmpq end, out // out vs end - jb L_do_while_loop // back to do_while_loop if out < end - - /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ - -L_return_unused_byte: - movl out_d, %esi - jmp L34 - -L_length_base: /* al = cl = op, edx = this.val, op&16 = 16 */ - movzwl %dx, len // len = (unsigned)(this.val); - movl %ecx, %edx // op - andl $15, %edx // op &= 15; - je 1f // if (op) { - cmpl bits, %edx // op vs bits - jbe 0f // if (bits < op) { - movzbl (in), %eax // *in - movl bits, %ecx // cl = bits - salq %cl, %rax // *in << bits - addq %rax, hold // hold += (unsigned long)(PUP(in)) << bits; - incq in // in++ - addl $8, bits // bits += 8 -0: // } - movl $1, %eax // 1 - movl %edx, %ecx // cl = op - sall %cl, %eax // 1 << op - decl %eax // (1 << op) - 1 - andl holdd, %eax // (unsigned)hold & ((1U << op) - 1); - addl %eax, len // len += (unsigned)hold & ((1U << op) - 1); - shrq %cl, hold // hold >>= op; - subl %edx, bits // bits -= op; -1: // } - cmpl $14, bits // bits vs 14 - jbe L99 // if (bits < 15) go to loading to hold and return to L19 -L19: // } - movq dmask, %rax // dmask - andq hold, %rax // hold & dmask - movq dcode, %rdx // dcode[] - movl (%rdx,%rax,4), %eax // this = dcode[hold & dmask]; - jmp L_dodist - .align 4,0x90 -0: // op&16 == 0, test (op&64)==0 for 2nd level distance code - testb $64, %cl // op&64 - jne L_invalid_distance_code // if ((op&64)==0) { /* 2nd level distance code */ - movl $1, %eax // 1 - sall %cl, %eax // 1 << op - decl %eax // (1 << op) - 1 - andq hold, %rax // (hold & ((1U << op) - 1)) - movzwl %dx, %edx // this.val - addq %rdx, %rax // this.val + (hold & ((1U << op) - 1)) - movq dcode, %rcx // dcode[] - movl (%rcx,%rax,4), %eax // this = dcode[this.val + (hold & ((1U << op) - 1))]; -L_dodist: - movl %eax, %edx // this - shrl $16, %edx // dist = (unsigned)(this.val); - movzbl %ah, %ecx // cl = op = this.bits - shrq %cl, hold // hold >>= op; - subl %ecx, bits // bits -= op; - movzbl %al, %ecx // op = (unsigned)(this.op); - testb $16, %cl // (op & 16) test for distance base - je 0b // if (op&16) == 0, branch to check for 2nd level distance code - -L_distance_base: /* distance base */ - - movl %ecx, %esi // op - andl $15, %esi // op&=15 - cmpl bits, %esi // op vs bits - jbe 1f // if (bits < op) { - movzbl (in), %eax // *in - movl bits, %ecx // cl = bits - salq %cl, %rax // *in << bits - addq %rax, hold // hold += (unsigned long)(PUP(in)) << bits; - incq in // in++ - addl $8, bits // bits += 8 - cmpl bits, %esi // op vs bits - jbe 1f // if (bits < op) { - movzbl (in), %eax // *in - movl bits, %ecx // cl = bits - salq %cl, %rax // *in << bits - addq %rax, hold // hold += (unsigned long)(PUP(in)) << bits; - incq in // in++ - addl $8, bits // bits += 8 -1: // } } - - movzwl %dx, %edx // dist - movl $1, %eax // 1 - movl %esi, %ecx // cl = op - sall %cl, %eax // (1 << op) - decl %eax // (1 << op) - 1 - andl holdd, %eax // (unsigned)hold & ((1U << op) - 1) - addl %edx, %eax // dist += (unsigned)hold & ((1U << op) - 1); - movl %eax, dist // save a copy of dist in stack - -#ifdef INFLATE_STRICT - cmp %eax, dmax // dmax vs dist - jb L_invalid_distance_too_far_back // if (dmax < dist) break for invalid distance too far back -#endif - - shrq %cl, hold // hold >>= op; - subl %esi, bits // bits -= op; - movl out_d, %esi // out - movl out_d, %eax // out - subl beg, %eax // op = out - beg - cmpl %eax, dist // dist vs op, /* see if copy from window */ - jbe L_copy_direct_from_output // if (dist <= op) branch to copy direct from output - -L_distance_back_in_window: - - movl dist, %edx // dist - subl %eax, %edx // op = dist - op; /* distance back in window */ - - cmpl %edx, whave // whave vs op - jb L_invalid_distance_too_far_back // if (op > whave), break for invalid distance too far back - - testl write, write // if (write!=0) - jne L_wrap_around_window // branch to wrap around window - -L_very_common_case: - - movl wsize, %eax // wsize - subl %edx, %eax // wsize - op - movq window, from // from = window - OFF; - addq %rax, from // from += wsize - op; - - movl %edx, %esi // op - cmpl %edx, len // len vs op - ja L_some_from_window // if (len > op), branch for aligned code block L_some_from_window -L38: - subl $3, len // pre-decrement len by 3 - jge 0f // if len >= 3, branch to the aligned code block -1: addl $3, len // post-increment len by 3 - je L_do_while_loop_check // if (len==0) break to L_do_while_loop_check - movzbl (from), %eax // *from - movb %al, (out) // *out - incq out // out++ - cmpl $2, len // len vs 2 - jne L_do_while_loop_check // if len!=2 break to L_do_while_loop_check - movzbl 1(from), %eax // *from - movb %al, (out) // *out - incq out // out++ - jmp L_do_while_loop_check // break to L_do_while_loop_check - - .align 4,0x90 -0: // do { - movzbl (from), %eax // *from - movb %al, (out) // *out - movzbl 1(from), %eax // *from - movb %al, 1(out) // *out - movzbl 2(from), %eax // *from - movb %al, 2(out) // *out - addq $3, out // out += 3 - addq $3, from // from += 3 - subl $3, len // len -= 3 - jge 0b // } while (len>=0); - jmp 1b // branch back to the possibly unaligned code - - .align 4,0x90 -L_end_of_block: - andl $32, %ecx // op & 32 - jne L101 // if (op&32) branch to end-of-block break - leaq LC2(%rip), from - movq from, 48(strm) // state->mode - movl $27, (state) // state->mode = BAD; - movl out_d, %esi - -L34: - movl bits, %eax // bits - shrl $3, %eax // len = bits >> 3; - mov %eax, %edx // len - subq %rdx, in // in -= len - sall $3, %eax // len << 3 - movl bits, %ecx // bits - subl %eax, %ecx // bits -= len << 3 - movq in, (strm) // strm->next_in = in + OFF; - movq out, 24(strm) // strm->next_out = out + OFF; - cmpq in, last // last vs in - jbe L67 // if (last <= in) branch to L67 and return to L69 - movl last, %eax // last - addl $5, %eax // last + 5 - subl in_d, %eax // 5 + last - in -L69: - movl %eax, 8(strm) // update strm->avail_in - - cmpq end, out // out vs end - jae L70 // if out<=end branch to L70 and return to L72 - movl end, %eax // end - addl $257, %eax // 257 + end - subl %esi, %eax // 257 + end - out; -L72: - movl %eax, 32(strm) // update strm->avail_out - - movl $1, %eax // 1 - sall %cl, %eax // 1 << bits - decl %eax // (1U << bits) - 1 - andq hold, %rax // hold &= (1U << bits) - 1; - movq %rax, 72(state) // state->hold = hold; - movl %ecx, 80(state) // state->bits = bits; - - // clear stack memory for local variables - addq $88, %rsp - - // restore registers from stack - popq %rbx - popq %r12 - popq %r13 - popq %r14 - popq %r15 - - // return to caller - leave - ret - - .align 4,0x90 -L99: - leal 8(bits), %esi // esi = bits+8 - movzbl (in), %edx // 1st *in - movl bits, %ecx // cl = bits - salq %cl, %rdx // 1st *in << 8 - addq %rdx, hold // 1st hold += (unsigned long)(PUP(in)) << bits; - movzbl 1(in), %eax // 2nd *in - movl %esi, %ecx // cl = bits + 8 - salq %cl, %rax // 2nd *in << bits+8 - addq %rax, hold // 2nd hold += (unsigned long)(PUP(in)) << bits; - addq $2, in // in += 2 - addl $16, bits // bits += 16 - jmp L19 - -L101: - movl $11, (state) - movl out_d, %esi - jmp L34 - .align 4,0x90 -L70: - movl end, %eax // end - subl %esi, %eax // end - out - addl $257, %eax // 257 + end - out - jmp L72 - .align 4,0x90 -L67: - movl last, %eax // last - subl in_d, %eax // last - in - addl $5, %eax // 5 + last - in - jmp L69 - - - .align 4,0x90 - - // stuffing the following 4 bytes to align the major loop to a 16-byte boundary to give the better performance - .byte 0 - .byte 0 - .byte 0 - .byte 0 -L_copy_direct_from_output: - mov dist, %eax // dist - movq out, %rdx // out - subq %rax, %rdx // from = out - dist; - subl $3, len // pre-decrement len by 3 - // do { -0: movzbl (%rdx), %eax // *from - movb %al, (out) // *out - movzbl 1(%rdx), %eax // *from - movb %al, 1(out) // *out - movzbl 2(%rdx), %eax // *from - movb %al, 2(out) // *out - addq $3, out // out+=3 - addq $3, %rdx // from+=3 - subl $3, len // len-=3 - jge 0b // } while (len>=0); -1: addl $3, len // post-increment len by 3 - je L_do_while_loop_check // if len==0, branch to do_while_loop_check - - movzbl (%rdx), %eax // *from - movb %al, (out) // *out - incq out // out++ - cmpl $2, len // len == 2 ? - jne L_do_while_loop_check // if len==1, branch to do_while_loop_check - - movzbl 1(%rdx), %eax // *from - movb %al, (out) // *out - incq out // out++ - jmp L_do_while_loop_check // branch to do_while_loop_check - - .align 4,0x90 -L_some_from_window: // from : from, out, %esi/%edx = op - // do { - movzbl (from), %eax // *from - movb %al, (out) // *out - incq from // from++ - incq out // out++ - decl %esi // --op - jne L_some_from_window // } while (op); - subl %edx, len // len -= op; - mov dist, %eax // dist - movq out, from // out - subq %rax, from // from = out - dist; - jmp L38 // copy from output - - .align 4,0x90 -L_wrap_around_window: - cmpl %edx, write // write vs op - jae L_contiguous_in_window // if (write >= op) branch to contiguous in window - movl wsize_write, %eax // wsize+write - subl %edx, %eax // wsize+write-op - movq window, from // from = window - OFF - addq %rax, from // from += wsize+write-op - subl write, %edx // op -= write - cmpl %edx, len // len vs op - jbe L38 // if (len<=op) branch to copy from output - - subl %edx, len // len -= op; -0: // do { - movzbl (from), %eax // *from - movb %al, (out) // *out - incq from // from++ - incq out // out++ - decl %edx // op-- - jne 0b // } while (op); - movq window, from - - cmpl len, write // write vs len - jae L38 // if (write >= len) branch to copy from output - movl write, %esi // op = write - subl write, len // len -= op -1: // do { - movzbl (from), %eax // *from - movb %al, (out) // *out - incq from // from++ - incq out // out++ - decl %esi // op-- - jne 1b // } while (op); - mov dist, %eax // dist - movq out, from // out - subq %rax, from // from = out - dist; - jmp L38 - - .align 4,0x90 -L_contiguous_in_window: - movl write, %eax // write - subl %edx, %eax // write - op - movq window, from // from = window - OFF - addq %rax, from // from += write - op - cmpl %edx, len // len vs op - jbe L38 // if (len <= op) branch to copy from output - subl %edx, len // len -= op; -2: // do { - movzbl (from), %eax // *from - movb %al, (out) // *out - incq from // from++ - incq out // out++ - decl %edx // op-- - jne 2b // } while (op); - - mov dist, %eax // dist - movq out, from // out - subq %rax, from // from = out - dist; - jmp L38 // copy from output - - .align 4,0x90 -L_invalid_distance_code: - leaq LC1(%rip), %rdx - movq %rdx, 48(strm) - movl $27, (state) - movl out_d, %esi - jmp L34 - -L_invalid_distance_too_far_back: - leaq LC0(%rip), %rbx - movq %rbx, 48(strm) // error message - movl $27, (state) // state->mode = BAD - jmp L34 - -#endif diff --git a/libsa/Makefile b/libsa/Makefile index 3815c667f..4e8c0e8f1 100644 --- a/libsa/Makefile +++ b/libsa/Makefile @@ -7,25 +7,8 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = libsa -INSTINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS} -INSTINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS} -INSTINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS} - - -EXPINC_SUBDIRS = libsa -EXPINC_SUBDIRS_I386 = ${EXPINC_SUBDIRS} -EXPINC_SUBDIRS_X86_64 = ${EXPINC_SUBDIRS} -EXPINC_SUBDIRS_ARM = ${EXPINC_SUBDIRS} - - -SETUP_SUBDIRS = - COMP_SUBDIRS = conf -INST_SUBDIRS = - - include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/libsa/bootstrap.cpp b/libsa/bootstrap.cpp index f24547c95..c36e98fde 100644 --- a/libsa/bootstrap.cpp +++ b/libsa/bootstrap.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -71,6 +71,10 @@ static void bootstrapRecordStartupExtensions(void); static void bootstrapLoadSecurityExtensions(void); +#if NO_KEXTD +extern "C" bool IORamDiskBSDRoot(void); +#endif + #if PRAGMA_MARK #pragma mark Macros #endif @@ -226,11 +230,6 @@ KLDBootstrap::readPrelinkedExtensions( void * prelinkData = NULL; // see code vm_size_t prelinkLength = 0; -#if __i386__ - vm_map_offset_t prelinkDataMapOffset = 0; - void * prelinkCopy = NULL; // see code - kern_return_t mem_result = KERN_SUCCESS; -#endif OSDictionary * infoDict = NULL; // do not release @@ -239,7 +238,9 @@ KLDBootstrap::readPrelinkedExtensions( u_int i = 0; #if NO_KEXTD + bool ramDiskBoot; bool developerDevice; + bool dontLoad; #endif OSKextLog(/* kext */ NULL, @@ -302,67 +303,6 @@ KLDBootstrap::readPrelinkedExtensions( prelinkData = (void *) prelinkTextSegment->vmaddr; prelinkLength = prelinkTextSegment->vmsize; -#if __i386__ - /* To enable paging and write/execute protections on the kext - * executables, we need to copy them out of the booter-created - * memory, reallocate that space with VM, then prelinkCopy them back in. - * - * This isn't necessary on x86_64 because kexts have their own VM - * region for that architecture. - * - * XXX: arm's pmap implementation doesn't seem to let us do this. - */ - - mem_result = kmem_alloc(kernel_map, (vm_offset_t *)&prelinkCopy, - prelinkLength); - if (mem_result != KERN_SUCCESS) { - OSKextLog(/* kext */ NULL, - kOSKextLogErrorLevel | - kOSKextLogGeneralFlag | kOSKextLogArchiveFlag, - "Can't copy prelinked kexts' text for VM reassign."); - goto finish; - } - - /* Copy it out. - */ - memcpy(prelinkCopy, prelinkData, prelinkLength); - - /* Dump the booter memory. - */ - ml_static_mfree((vm_offset_t)prelinkData, prelinkLength); - - /* Set up the VM region. - */ - prelinkDataMapOffset = (vm_map_offset_t)(uintptr_t)prelinkData; - mem_result = vm_map_enter_mem_object( - kernel_map, - &prelinkDataMapOffset, - prelinkLength, /* mask */ 0, - VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, - (ipc_port_t)NULL, - (vm_object_offset_t) 0, - /* copy */ FALSE, - /* cur_protection */ VM_PROT_ALL, - /* max_protection */ VM_PROT_ALL, - /* inheritance */ VM_INHERIT_DEFAULT); - if ((mem_result != KERN_SUCCESS) || - (prelinkTextSegment->vmaddr != prelinkDataMapOffset)) - { - OSKextLog(/* kext */ NULL, - kOSKextLogErrorLevel | - kOSKextLogGeneralFlag | kOSKextLogArchiveFlag, - "Can't create kexts' text VM entry at 0x%llx, length 0x%x (error 0x%x).", - (unsigned long long) prelinkDataMapOffset, prelinkLength, mem_result); - goto finish; - } - prelinkData = (void *)(uintptr_t)prelinkDataMapOffset; - - /* And copy it back. - */ - memcpy(prelinkData, prelinkCopy, prelinkLength); - - kmem_free(kernel_map, (vm_offset_t)prelinkCopy, prelinkLength); -#endif /* __i386__ */ /* Unserialize the info dictionary from the prelink info section. */ @@ -385,19 +325,13 @@ KLDBootstrap::readPrelinkedExtensions( } #if NO_KEXTD - /* Check if we should keep developer kexts around. Default: - * Release: No - * Development: Yes - * Debug : Yes + /* Check if we should keep developer kexts around. * TODO: Check DeviceTree instead of a boot-arg */ -#if DEVELOPMENT developerDevice = true; -#else - developerDevice = false; -#endif - PE_parse_boot_argn("developer", &developerDevice, sizeof(developerDevice)); + + ramDiskBoot = IORamDiskBSDRoot(); #endif /* NO_KEXTD */ infoDictArray = OSDynamicCast(OSArray, @@ -407,9 +341,13 @@ KLDBootstrap::readPrelinkedExtensions( "The prelinked kernel has no kext info dictionaries"); goto finish; } + + /* Create dictionary of excluded kexts + */ + OSKext::createExcludeListFromPrelinkInfo(infoDictArray); - /* Create OSKext objects for each info dictionary. - */ + /* Create OSKext objects for each info dictionary. + */ for (i = 0; i < infoDictArray->getCount(); ++i) { infoDict = OSDynamicCast(OSDictionary, infoDictArray->getObject(i)); if (!infoDict) { @@ -421,30 +359,46 @@ KLDBootstrap::readPrelinkedExtensions( } #if NO_KEXTD + dontLoad = false; + /* If we're not on a developer device, skip and free developer kexts. */ if (developerDevice == false) { OSBoolean *devOnlyBool = OSDynamicCast(OSBoolean, infoDict->getObject(kOSBundleDeveloperOnlyKey)); if (devOnlyBool == kOSBooleanTrue) { - OSString *bundleID = OSDynamicCast(OSString, - infoDict->getObject(kCFBundleIdentifierKey)); - if (bundleID) { - OSKextLog(NULL, kOSKextLogWarningLevel | kOSKextLogGeneralFlag, - "Kext %s not loading on non-dev device.", bundleID->getCStringNoCopy()); - } - - OSNumber *addressNum = OSDynamicCast(OSNumber, - infoDict->getObject(kPrelinkExecutableLoadKey)); - OSNumber *lengthNum = OSDynamicCast(OSNumber, - infoDict->getObject(kPrelinkExecutableSizeKey)); - if (addressNum && lengthNum) { -#error Pick the right way to free prelinked data on this arch - } + dontLoad = true; + } + } - infoDictArray->removeObject(i--); - continue; + /* Skip and free kexts that are only needed when booted from a ram disk. + */ + if (ramDiskBoot == false) { + OSBoolean *ramDiskOnlyBool = OSDynamicCast(OSBoolean, + infoDict->getObject(kOSBundleRamDiskOnlyKey)); + if (ramDiskOnlyBool == kOSBooleanTrue) { + dontLoad = true; + } + } + + if (dontLoad == true) { + OSString *bundleID = OSDynamicCast(OSString, + infoDict->getObject(kCFBundleIdentifierKey)); + if (bundleID) { + OSKextLog(NULL, kOSKextLogWarningLevel | kOSKextLogGeneralFlag, + "Kext %s not loading.", bundleID->getCStringNoCopy()); + } + + OSNumber *addressNum = OSDynamicCast(OSNumber, + infoDict->getObject(kPrelinkExecutableLoadKey)); + OSNumber *lengthNum = OSDynamicCast(OSNumber, + infoDict->getObject(kPrelinkExecutableSizeKey)); + if (addressNum && lengthNum) { +#error Pick the right way to free prelinked data on this arch } + + infoDictArray->removeObject(i--); + continue; } #endif /* NO_KEXTD */ @@ -558,6 +512,11 @@ KLDBootstrap::readBooterExtensions(void) goto finish; } + /* Create dictionary of excluded kexts + */ + OSKext::createExcludeListFromBooterData(propertyDict, keyIterator); + keyIterator->reset(); + while ( ( deviceTreeName = OSDynamicCast(OSString, keyIterator->getNextObject() ))) { diff --git a/libsa/conf/MASTER b/libsa/conf/MASTER index a47e74813..5f90761b8 100644 --- a/libsa/conf/MASTER +++ b/libsa/conf/MASTER @@ -71,11 +71,6 @@ options CONFIG_KXLD # kxld/runtime linking of kexts # options DEVELOPMENT # dev kernel # -# configurable kernel - general switch to say we are building for an -# embedded device -# -options CONFIG_EMBEDDED # - # CONFIG_KEXT_BASEMENT - alloc post boot loaded kexts after prelinked kexts # options CONFIG_KEXT_BASEMENT # # diff --git a/libsa/conf/MASTER.i386 b/libsa/conf/MASTER.i386 deleted file mode 100644 index 8e7b8008c..000000000 --- a/libsa/conf/MASTER.i386 +++ /dev/null @@ -1,15 +0,0 @@ -###################################################################### -# -# RELEASE = [ intel mach libkerncpp config_dtrace config_kxld vm_pressure_events ] -# PROFILE = [ RELEASE profile ] -# DEBUG = [ RELEASE debug ] -# -# EMBEDDED = [ intel mach libkerncpp ] -# DEVELOPMENT = [ EMBEDDED config_dtrace ] -# -###################################################################### - -machine "i386" # -cpu "i386" # - -options NO_NESTED_PMAP # diff --git a/libsa/conf/Makefile b/libsa/conf/Makefile index 45981d362..25a42ef5e 100644 --- a/libsa/conf/Makefile +++ b/libsa/conf/Makefile @@ -3,57 +3,37 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - include $(MakeInc_cmd) include $(MakeInc_def) -SETUP_SUBDIRS = - -COMP_SUBDIRS = - -INST_SUBDIRS = - -ifndef LIBSA_KERNEL_CONFIG -export LIBSA_KERNEL_CONFIG = $(KERNEL_CONFIG) -endif +MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) -ifneq ($(MACHINE_CONFIG), DEFAULT) -export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT) -else -export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT) -endif - -MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(ARCH_CONFIG_LC).$(MACHINE_CONFIG_LC) - -$(COMPOBJROOT)/$(LIBSA_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ - $(SOURCE)/MASTER.$(ARCH_CONFIG_LC) \ - $(SOURCE)/Makefile.template \ - $(SOURCE)/Makefile.$(ARCH_CONFIG_LC) \ - $(SOURCE)/files \ - $(SOURCE)/files.$(ARCH_CONFIG_LC) - $(_v)(doconf_target=$(addsuffix /conf, $(TARGET)); \ - $(MKDIR) $${doconf_target}; \ - cd $${doconf_target}; \ +$(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ + $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC) \ + $(SOURCE)/Makefile.template \ + $(SOURCE)/Makefile.$(CURRENT_ARCH_CONFIG_LC) \ + $(SOURCE)/files \ + $(SOURCE)/files.$(CURRENT_ARCH_CONFIG_LC) + $(_v)$(MKDIR) $(addsuffix /conf, $(TARGET)); \ + cd $(addsuffix /conf, $(TARGET)); \ rm -f $(notdir $?); \ - cp $? $${doconf_target}; \ - if [ -f $(MASTER_CPU_PER_SOC) ]; then cp $(MASTER_CPU_PER_SOC) $${doconf_target}; fi; \ - $(SRCROOT)/SETUP/config/doconf -c -cpu $(ARCH_CONFIG_LC) -soc $(MACHINE_CONFIG_LC) -d $(TARGET)/$(LIBSA_KERNEL_CONFIG) $(LIBSA_KERNEL_CONFIG); \ - ); - -do_all: $(COMPOBJROOT)/$(LIBSA_KERNEL_CONFIG)/Makefile - $(_v)next_source=$(subst conf/,,$(SOURCE)); \ - next_relsource=$(subst conf/,,$(RELATIVE_SOURCE_PATH)); \ - ${MAKE} -C $(COMPOBJROOT)/$(LIBSA_KERNEL_CONFIG) \ - MAKEFILES=$(TARGET)/$(LIBSA_KERNEL_CONFIG)/Makefile \ - SOURCE=$${next_source} \ - RELATIVE_SOURCE_PATH=$${next_relsource} \ - TARGET=$(TARGET) \ - INCL_MAKEDEP=FALSE \ - KERNEL_CONFIG=$(LIBSA_KERNEL_CONFIG) \ + cp $? .; \ + if [ $(MASTER_CPU_PER_SOC) -nt $@ ]; then cp $(MASTER_CPU_PER_SOC) .; fi; \ + $(SRCROOT)/SETUP/config/doconf -c -cpu $(CURRENT_ARCH_CONFIG_LC) -soc $(CURRENT_MACHINE_CONFIG_LC) -d $(TARGET)/$(CURRENT_KERNEL_CONFIG) $(CURRENT_KERNEL_CONFIG); + +do_all: $(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile + $(_v)${MAKE} \ + -C $(TARGET)/$(CURRENT_KERNEL_CONFIG) \ + -f $(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile \ + CURRENT_KERNEL_CONFIG=${CURRENT_KERNEL_CONFIG} \ + CURRENT_ARCH_CONFIG=${CURRENT_ARCH_CONFIG} \ + CURRENT_MACHINE_CONFIG=${CURRENT_MACHINE_CONFIG} \ + SOURCE=$(subst conf/,,$(SOURCE)) \ + TARGET=${TARGET} \ + OBJPATH=${OBJPATH} \ build_all; -do_build_all: do_all +do_build_all:: do_all include $(MakeInc_rule) include $(MakeInc_dir) - diff --git a/libsa/conf/Makefile.i386 b/libsa/conf/Makefile.i386 deleted file mode 100644 index b89fdd145..000000000 --- a/libsa/conf/Makefile.i386 +++ /dev/null @@ -1,8 +0,0 @@ -###################################################################### -#BEGIN Machine dependent Makefile fragment for i386 -###################################################################### - - -###################################################################### -#END Machine dependent Makefile fragment for i386 -###################################################################### diff --git a/libsa/conf/Makefile.template b/libsa/conf/Makefile.template index 26aede6b2..88f2eb5a3 100644 --- a/libsa/conf/Makefile.template +++ b/libsa/conf/Makefile.template @@ -5,17 +5,6 @@ # the terms and conditions for use and redistribution. # -# -# Export IDENT for sub-makefiles -# -export IDENT - -# -# INCFLAGS -# -INCFLAGS_MAKEFILE= - - export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule @@ -27,8 +16,7 @@ include $(MakeInc_def) # # CFLAGS # -CFLAGS+= -include meta_features.h -DLIBSA_KERNEL_PRIVATE \ - $(CFLAGS_INLINE_CONFIG) +CFLAGS+= -include meta_features.h -DLIBSA_KERNEL_PRIVATE # # Directories for mig generated files @@ -53,13 +41,8 @@ COMP_SUBDIRS = %CFILES -%MFILES - %SFILES -%BFILES - -%ORDERED %MACHDEP # @@ -69,33 +52,24 @@ COMP_SUBDIRS = # ${OBJS}: ${OBJSDEPS} - -%LOAD - LDOBJS = $(OBJS) $(COMPONENT).filelist: $(LDOBJS) - $(_v)if [ $(BUILD_MACHO_OBJ) -eq 1 ]; then \ - for kld_file in ${LDOBJS}; do \ - $(SEG_HACK) __KLD $${kld_file} -o $${kld_file}__; \ + $(_v)for kld_file in ${LDOBJS}; do \ + $(SEG_HACK) -n __KLD -o $${kld_file}__ $${kld_file} ; \ mv $${kld_file}__ $${kld_file} ; \ - done; \ - fi + done @echo LDFILELIST $(COMPONENT) $(_v)( for obj in ${LDOBJS}; do \ - echo $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$${obj}; \ + echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \ done; ) > $(COMPONENT).filelist -do_all: $(COMPONENT).filelist -do_depend: do_all - $(_v)${MD} -u Makedep -f -d `ls *.d` - -do_build_all: do_depend +do_all: $(COMPONENT).filelist +do_build_all:: do_all %RULES include $(MakeInc_rule) include $(MakeInc_dir) - diff --git a/libsa/conf/Makefile.x86_64 b/libsa/conf/Makefile.x86_64 index d7024f6c7..50809235f 100644 --- a/libsa/conf/Makefile.x86_64 +++ b/libsa/conf/Makefile.x86_64 @@ -3,6 +3,9 @@ ###################################################################### +# Bootstrap __KLD files must be Mach-O for "setsegname" +$(foreach file,$(OBJS),$(eval $(file)_CFLAGS_ADD += $(CFLAGS_NOLTO_FLAG))) + ###################################################################### #END Machine dependent Makefile fragment for x86_64 ###################################################################### diff --git a/libsa/lastkernelconstructor.c b/libsa/lastkernelconstructor.c index 5b62f3fe6..aeffadaba 100644 --- a/libsa/lastkernelconstructor.c +++ b/libsa/lastkernelconstructor.c @@ -28,11 +28,13 @@ extern void iokit_post_constructor_init(void); -static void last_kernel_constructor(void) __attribute__ ((constructor,section("__TEXT, initcode"))); +static void last_kernel_constructor(void) __attribute__ ((constructor)); static void last_kernel_constructor(void) { iokit_post_constructor_init(); } +__asm__(".globl _last_kernel_symbol"); __asm__(".zerofill __LAST, __last, _last_kernel_symbol, 0"); + diff --git a/libsa/conf/files.i386 b/libsyscall/Libsyscall.aliases similarity index 100% rename from libsa/conf/files.i386 rename to libsyscall/Libsyscall.aliases diff --git a/libsyscall/Libsyscall.xcconfig b/libsyscall/Libsyscall.xcconfig index 3f9e34bdb..4fa5f1097 100644 --- a/libsyscall/Libsyscall.xcconfig +++ b/libsyscall/Libsyscall.xcconfig @@ -1,5 +1,6 @@ #include "/Makefiles/CoreOS/Xcode/BSD.xcconfig" BUILD_VARIANTS = normal +SUPPORTED_PLATFORMS = macosx iphoneos ONLY_ACTIVE_ARCH = NO DEBUG_INFORMATION_FORMAT = dwarf-with-dsym INSTALL_PATH = /usr/lib/system @@ -7,30 +8,34 @@ INSTALL_PATH[sdk=iphoneos*] = /usr/lib/system INSTALL_PATH[sdk=iphonesimulator*] = $(SDKROOT)/usr/lib/system INSTALL_PATH[sdk=macosx*] = /usr/lib/system PUBLIC_HEADERS_FOLDER_PATH = /usr/include/mach -PUBLIC_HEADERS_FOLDER_PATH[sdk=iphoneos*] = /usr/include/mach -PUBLIC_HEADERS_FOLDER_PATH[sdk=iphonesimulator*] = $(SDKROOT)/usr/include/mach -PUBLIC_HEADERS_FOLDER_PATH[sdk=macosx*] = /usr/include/mach +PUBLIC_HEADERS_FOLDER_PATH[sdk=iphoneos*] = /usr/include +PUBLIC_HEADERS_FOLDER_PATH[sdk=iphonesimulator*] = $(SDKROOT)/usr/include +PUBLIC_HEADERS_FOLDER_PATH[sdk=macosx*] = /usr/include PRIVATE_HEADERS_FOLDER_PATH = /usr/local/include PRIVATE_HEADERS_FOLDER_PATH[sdk=iphoneos*] = /usr/local/include PRIVATE_HEADERS_FOLDER_PATH[sdk=iphonesimulator*] = $(SDKROOT)/usr/local/include PRIVATE_HEADERS_FOLDER_PATH[sdk=macosx*] = /usr/local/include +OS_PRIVATE_HEADERS_FOLDER_PATH = /usr/local/include/os +OS_PRIVATE_HEADERS_FOLDER_PATH[sdk=iphonesimulator*] = $(SDKROOT)/usr/local/include/os EXECUTABLE_PREFIX = libsystem_ PRODUCT_NAME = kernel ALWAYS_SEARCH_USER_PATHS = NO ORDER_FILE[sdk=iphoneos*] = $(SDKROOT)/$(APPLE_INTERNAL_DIR)/OrderFiles/libsystem_kernel.order -OTHER_CFLAGS = -fdollars-in-identifiers -no-cpp-precomp -fno-common -fno-stack-protector -pipe -DLIBSYSCALL_INTERFACE -D__DARWIN_VERS_1050=1 +OTHER_CFLAGS = -fdollars-in-identifiers -no-cpp-precomp -fno-common -fno-stack-protector -momit-leaf-frame-pointer -DLIBSYSCALL_INTERFACE -D__DARWIN_VERS_1050=1 OTHER_CFLAGS[sdk=macosx*] = $(inherited) -DSYSCALL_PRE1050 OTHER_CFLAGS[sdk=macosx*][arch=x86_64] = $(inherited) -DNO_SYSCALL_LEGACY OTHER_CFLAGS[sdk=iphoneos*] = $(inherited) -DNO_SYSCALL_LEGACY GCC_PREPROCESSOR_DEFINITIONS = CF_OPEN_SOURCE CF_EXCLUDE_CSTD_HEADERS DEBUG _FORTIFY_SOURCE=0 -HEADER_SEARCH_PATHS = $(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders $(PROJECT_DIR)/mach $(PROJECT_DIR)/wrappers +HEADER_SEARCH_PATHS = $(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders $(PROJECT_DIR)/mach $(PROJECT_DIR)/os $(PROJECT_DIR)/wrappers $(PROJECT_DIR)/wrappers/string $(PROJECT_DIR)/wrappers/libproc $(PROJECT_DIR)/wrappers/libproc/spawn WARNING_CFLAGS = -Wmost GCC_TREAT_WARNINGS_AS_ERRORS = YES GCC_WARN_ABOUT_MISSING_NEWLINE = YES CODE_SIGN_IDENTITY = - DYLIB_CURRENT_VERSION = $(RC_ProjectSourceVersion) +DYLIB_LDFLAGS = -umbrella System -all_load -Wl,-alias_list,$(SRCROOT)/Libsyscall.aliases OTHER_LDFLAGS = INSTALLHDRS_SCRIPT_PHASE = YES +INSTALLHDRS_COPY_PHASE = YES USE_HEADERMAP = NO LINK_WITH_STANDARD_LIBRARIES = NO ALWAYS_SEARCH_USER_PATHS = YES diff --git a/libsyscall/Libsyscall.xcodeproj/project.pbxproj b/libsyscall/Libsyscall.xcodeproj/project.pbxproj index c391bcf32..f2e78e2e1 100644 --- a/libsyscall/Libsyscall.xcodeproj/project.pbxproj +++ b/libsyscall/Libsyscall.xcodeproj/project.pbxproj @@ -3,7 +3,7 @@ archiveVersion = 1; classes = { }; - objectVersion = 45; + objectVersion = 46; objects = { /* Begin PBXAggregateTarget section */ @@ -89,8 +89,39 @@ 24E4782712088267009A384D /* _libc_funcptr.c in Sources */ = {isa = PBXBuildFile; fileRef = 24E47824120881DF009A384D /* _libc_funcptr.c */; }; 291D3C281354FDD100D46061 /* mach_port.c in Sources */ = {isa = PBXBuildFile; fileRef = 291D3C261354FDD100D46061 /* mach_port.c */; }; 291D3C291354FDD100D46061 /* mach_vm.c in Sources */ = {isa = PBXBuildFile; fileRef = 291D3C271354FDD100D46061 /* mach_vm.c */; }; + 467DAFD4157E8AF200CE68F0 /* guarded_open_np.c in Sources */ = {isa = PBXBuildFile; fileRef = 467DAFD3157E8AF200CE68F0 /* guarded_open_np.c */; }; + 729B7D0A15C8938C000E2501 /* carbon_delete.c in Sources */ = {isa = PBXBuildFile; fileRef = FB50F1B315AB7DE700F814BA /* carbon_delete.c */; }; + 7466C924170CBA53004557CC /* vm_page_size.h in Headers */ = {isa = PBXBuildFile; fileRef = 7466C923170CB99B004557CC /* vm_page_size.h */; settings = {ATTRIBUTES = (Private, ); }; }; + A59CB95616669EFB00B064B3 /* stack_logging_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = A59CB95516669DB700B064B3 /* stack_logging_internal.h */; }; + A59CB9581666A1A200B064B3 /* munmap.c in Sources */ = {isa = PBXBuildFile; fileRef = A59CB9571666A1A200B064B3 /* munmap.c */; }; + C639F0E51741C25800A39F47 /* gethostuuid.h in Headers */ = {isa = PBXBuildFile; fileRef = C639F0E41741C09A00A39F47 /* gethostuuid.h */; settings = {ATTRIBUTES = (Public, ); }; }; + C6AB38DB174202C10036DD9F /* gethostuuid.h in Headers */ = {isa = PBXBuildFile; fileRef = C639F0E41741C09A00A39F47 /* gethostuuid.h */; settings = {ATTRIBUTES = (Public, ); }; }; + C6C40122174155E3000AE69F /* gethostuuid_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C6C40121174154D9000AE69F /* gethostuuid_private.h */; settings = {ATTRIBUTES = (Private, ); }; }; + C6C4012317415637000AE69F /* gethostuuid.c in Sources */ = {isa = PBXBuildFile; fileRef = C6C4012017415384000AE69F /* gethostuuid.c */; }; + C6C401241741566D000AE69F /* gethostuuid_private.h in Headers */ = {isa = PBXBuildFile; fileRef = C6C40121174154D9000AE69F /* gethostuuid_private.h */; settings = {ATTRIBUTES = (Private, ); }; }; + C6D3EFB616542C510052CF30 /* errorlib.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCD1114B00600000D8B9 /* errorlib.h */; }; + C6D3EFB716542C510052CF30 /* externs.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCD6114B00600000D8B9 /* externs.h */; }; + C6D3EFB816542C510052CF30 /* errorlib.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCD9114B00600000D8B9 /* errorlib.h */; }; + C6D3EFB916542C510052CF30 /* mach.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCDA114B00600000D8B9 /* mach.h */; }; + C6D3EFBA16542C510052CF30 /* mach_error.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCDB114B00600000D8B9 /* mach_error.h */; }; + C6D3EFBB16542C510052CF30 /* mach_init.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCDC114B00600000D8B9 /* mach_init.h */; }; + C6D3EFBC16542C510052CF30 /* mach_interface.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCDD114B00600000D8B9 /* mach_interface.h */; }; + C6D3EFBD16542C510052CF30 /* port_obj.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCDF114B00600000D8B9 /* port_obj.h */; }; + C6D3EFBE16542C510052CF30 /* sync.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCE0114B00600000D8B9 /* sync.h */; }; + C6D3EFC116542C510052CF30 /* vm_task.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCE3114B00600000D8B9 /* vm_task.h */; }; + C6D3EFC216542C510052CF30 /* key_defs.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BD08114B00600000D8B9 /* key_defs.h */; }; + C6D3EFC316542C510052CF30 /* ls_defs.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BD09114B00600000D8B9 /* ls_defs.h */; }; + C6D3EFC416542C510052CF30 /* netname_defs.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BD0C114B00600000D8B9 /* netname_defs.h */; }; + C6D3EFC516542C510052CF30 /* nm_defs.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BD0D114B00600000D8B9 /* nm_defs.h */; }; + C6D3EFC616542C510052CF30 /* SYS.h in Headers */ = {isa = PBXBuildFile; fileRef = 24D1157411E671B20063D54D /* SYS.h */; }; + C6D3EFC716542C510052CF30 /* abort.h in Headers */ = {isa = PBXBuildFile; fileRef = 247A08FF11F8E18000E4693F /* abort.h */; }; + C6D3EFC816542C510052CF30 /* exc_catcher.h in Headers */ = {isa = PBXBuildFile; fileRef = 247A091611F8E7A800E4693F /* exc_catcher.h */; }; + C6D3EFC916542C510052CF30 /* _libkernel_init.h in Headers */ = {isa = PBXBuildFile; fileRef = 247A08B211F8B05900E4693F /* _libkernel_init.h */; settings = {ATTRIBUTES = (Private, ); }; }; + C6D3F03016542C980052CF30 /* dummy.c in Sources */ = {isa = PBXBuildFile; fileRef = C6D3F02F16542C980052CF30 /* dummy.c */; }; C99A4F501305B2BD0054B7B7 /* __get_cpu_capabilities.s in Sources */ = {isa = PBXBuildFile; fileRef = C99A4F4E1305B1B70054B7B7 /* __get_cpu_capabilities.s */; }; C99A4F531305B43F0054B7B7 /* init_cpu_capabilities.c in Sources */ = {isa = PBXBuildFile; fileRef = C99A4F511305B43F0054B7B7 /* init_cpu_capabilities.c */; }; + C9A3D6EB1672AD1000A5CAA3 /* tsd.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = C9EE57F51669673D00337E4B /* tsd.h */; }; + C9B6A5ED153795DE00749EBA /* alloc_once.c in Sources */ = {isa = PBXBuildFile; fileRef = C9C1824F15338C0B00933F23 /* alloc_once.c */; }; C9D9BD17114B00600000D8B9 /* vm_map_compat.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCC2114B00600000D8B9 /* vm_map_compat.c */; }; C9D9BD19114B00600000D8B9 /* clock_priv.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCC5114B00600000D8B9 /* clock_priv.defs */; }; C9D9BD1A114B00600000D8B9 /* clock_reply.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCC6114B00600000D8B9 /* clock_reply.defs */; }; @@ -105,15 +136,13 @@ C9D9BD23114B00600000D8B9 /* externs.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCD6114B00600000D8B9 /* externs.h */; }; C9D9BD24114B00600000D8B9 /* fprintf_stderr.c in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCD7114B00600000D8B9 /* fprintf_stderr.c */; }; C9D9BD25114B00600000D8B9 /* errorlib.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCD9114B00600000D8B9 /* errorlib.h */; }; - C9D9BD26114B00600000D8B9 /* mach.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCDA114B00600000D8B9 /* mach.h */; settings = {ATTRIBUTES = (Public, ); }; }; - C9D9BD27114B00600000D8B9 /* mach_error.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCDB114B00600000D8B9 /* mach_error.h */; settings = {ATTRIBUTES = (Public, ); }; }; - C9D9BD28114B00600000D8B9 /* mach_init.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCDC114B00600000D8B9 /* mach_init.h */; settings = {ATTRIBUTES = (Public, ); }; }; - C9D9BD29114B00600000D8B9 /* mach_interface.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCDD114B00600000D8B9 /* mach_interface.h */; settings = {ATTRIBUTES = (Public, ); }; }; - C9D9BD2B114B00600000D8B9 /* port_obj.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCDF114B00600000D8B9 /* port_obj.h */; settings = {ATTRIBUTES = (Public, ); }; }; - C9D9BD2C114B00600000D8B9 /* sync.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCE0114B00600000D8B9 /* sync.h */; settings = {ATTRIBUTES = (Public, ); }; }; - C9D9BD2D114B00600000D8B9 /* task.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCE1114B00600000D8B9 /* task.h */; settings = {ATTRIBUTES = (Public, ); }; }; - C9D9BD2E114B00600000D8B9 /* thread_act.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCE2114B00600000D8B9 /* thread_act.h */; settings = {ATTRIBUTES = (Public, ); }; }; - C9D9BD2F114B00600000D8B9 /* vm_task.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCE3114B00600000D8B9 /* vm_task.h */; settings = {ATTRIBUTES = (Public, ); }; }; + C9D9BD26114B00600000D8B9 /* mach.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCDA114B00600000D8B9 /* mach.h */; }; + C9D9BD27114B00600000D8B9 /* mach_error.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCDB114B00600000D8B9 /* mach_error.h */; }; + C9D9BD28114B00600000D8B9 /* mach_init.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCDC114B00600000D8B9 /* mach_init.h */; }; + C9D9BD29114B00600000D8B9 /* mach_interface.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCDD114B00600000D8B9 /* mach_interface.h */; }; + C9D9BD2B114B00600000D8B9 /* port_obj.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCDF114B00600000D8B9 /* port_obj.h */; }; + C9D9BD2C114B00600000D8B9 /* sync.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCE0114B00600000D8B9 /* sync.h */; }; + C9D9BD2F114B00600000D8B9 /* vm_task.h in Headers */ = {isa = PBXBuildFile; fileRef = C9D9BCE3114B00600000D8B9 /* vm_task.h */; }; C9D9BD30114B00600000D8B9 /* host_priv.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCE4114B00600000D8B9 /* host_priv.defs */; }; C9D9BD31114B00600000D8B9 /* host_security.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCE5114B00600000D8B9 /* host_security.defs */; }; C9D9BD35114B00600000D8B9 /* lock_set.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BCEA114B00600000D8B9 /* lock_set.defs */; }; @@ -145,6 +174,30 @@ C9D9BD57114B00600000D8B9 /* task.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BD0F114B00600000D8B9 /* task.defs */; }; C9D9BD58114B00600000D8B9 /* thread_act.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BD10114B00600000D8B9 /* thread_act.defs */; }; C9D9BD59114B00600000D8B9 /* vm_map.defs in Sources */ = {isa = PBXBuildFile; fileRef = C9D9BD11114B00600000D8B9 /* vm_map.defs */; }; + C9FD8508166D6BD400963B73 /* tsd.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = C9EE57F51669673D00337E4B /* tsd.h */; }; + E453AF351700FD3C00F2C94C /* getiopolicy_np.c in Sources */ = {isa = PBXBuildFile; fileRef = E453AF341700FD3C00F2C94C /* getiopolicy_np.c */; }; + E453AF3617013CBF00F2C94C /* libproc.h in Headers */ = {isa = PBXBuildFile; fileRef = E4D45C2B16F868ED0002AF25 /* libproc.h */; settings = {ATTRIBUTES = (Public, ); }; }; + E453AF3717013CC200F2C94C /* libproc_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = E4D45C2C16F868ED0002AF25 /* libproc_internal.h */; settings = {ATTRIBUTES = (Private, ); }; }; + E453AF3817013F1400F2C94C /* spawn.h in Headers */ = {isa = PBXBuildFile; fileRef = E4D45C3D16FB20970002AF25 /* spawn.h */; settings = {ATTRIBUTES = (Public, ); }; }; + E453AF3917013F1B00F2C94C /* spawn_private.h in Headers */ = {isa = PBXBuildFile; fileRef = E4D45C3E16FB20970002AF25 /* spawn_private.h */; settings = {ATTRIBUTES = (Private, ); }; }; + E453AF3A17013F4C00F2C94C /* stack_logging_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = A59CB95516669DB700B064B3 /* stack_logging_internal.h */; }; + E4D45C2416F856900002AF25 /* __commpage_gettimeofday.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D45C2116F856900002AF25 /* __commpage_gettimeofday.c */; }; + E4D45C2516F856900002AF25 /* __commpage_gettimeofday.s in Sources */ = {isa = PBXBuildFile; fileRef = E4D45C2216F856900002AF25 /* __commpage_gettimeofday.s */; }; + E4D45C2616F856900002AF25 /* mach_absolute_time.s in Sources */ = {isa = PBXBuildFile; fileRef = E4D45C2316F856900002AF25 /* mach_absolute_time.s */; }; + E4D45C2E16F868ED0002AF25 /* libproc.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D45C2A16F868ED0002AF25 /* libproc.c */; }; + E4D45C2F16F868ED0002AF25 /* libproc.h in Headers */ = {isa = PBXBuildFile; fileRef = E4D45C2B16F868ED0002AF25 /* libproc.h */; settings = {ATTRIBUTES = (Public, ); }; }; + E4D45C3016F868ED0002AF25 /* libproc_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = E4D45C2C16F868ED0002AF25 /* libproc_internal.h */; settings = {ATTRIBUTES = (Private, ); }; }; + E4D45C3116F868ED0002AF25 /* proc_listpidspath.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D45C2D16F868ED0002AF25 /* proc_listpidspath.c */; }; + E4D45C3616F86BD80002AF25 /* posix_spawn.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D45C3316F86BD80002AF25 /* posix_spawn.c */; }; + E4D45C3F16FB20D30002AF25 /* spawn.h in Headers */ = {isa = PBXBuildFile; fileRef = E4D45C3D16FB20970002AF25 /* spawn.h */; settings = {ATTRIBUTES = (Public, ); }; }; + E4D45C4016FB20DC0002AF25 /* spawn_private.h in Headers */ = {isa = PBXBuildFile; fileRef = E4D45C3E16FB20970002AF25 /* spawn_private.h */; settings = {ATTRIBUTES = (Private, ); }; }; + E4D7E55C16F8776300F92D8D /* index.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D7E55316F8776300F92D8D /* index.c */; }; + E4D7E55E16F8776300F92D8D /* memset.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D7E55516F8776300F92D8D /* memset.c */; }; + E4D7E55F16F8776300F92D8D /* strcmp.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D7E55616F8776300F92D8D /* strcmp.c */; }; + E4D7E56016F8776300F92D8D /* strcpy.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D7E55716F8776300F92D8D /* strcpy.c */; }; + E4D7E56116F8776300F92D8D /* strlcpy.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D7E55916F8776300F92D8D /* strlcpy.c */; }; + E4D7E56216F8776300F92D8D /* strlen.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D7E55A16F8776300F92D8D /* strlen.c */; }; + E4D7E56316F8776300F92D8D /* strsep.c in Sources */ = {isa = PBXBuildFile; fileRef = E4D7E55B16F8776300F92D8D /* strsep.c */; }; EE3F605A149A6D66003BAEBA /* getaudit.c in Sources */ = {isa = PBXBuildFile; fileRef = EE3F6059149A6D66003BAEBA /* getaudit.c */; }; /* End PBXBuildFile section */ @@ -172,6 +225,29 @@ }; /* End PBXContainerItemProxy section */ +/* Begin PBXCopyFilesBuildPhase section */ + C63F480B1654203800A1F78F /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 8; + dstPath = "$(OS_PRIVATE_HEADERS_FOLDER_PATH)"; + dstSubfolderSpec = 0; + files = ( + C9FD8508166D6BD400963B73 /* tsd.h in CopyFiles */, + ); + runOnlyForDeploymentPostprocessing = 1; + }; + C6D3EFCA16542C510052CF30 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 8; + dstPath = "$(OS_PRIVATE_HEADERS_FOLDER_PATH)"; + dstSubfolderSpec = 0; + files = ( + C9A3D6EB1672AD1000A5CAA3 /* tsd.h in CopyFiles */, + ); + runOnlyForDeploymentPostprocessing = 1; + }; +/* End PBXCopyFilesBuildPhase section */ + /* Begin PBXFileReference section */ 030B179A135377B400DAD1F0 /* open_dprotected_np.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = open_dprotected_np.c; sourceTree = ""; }; 240D716711933ED300556E97 /* mach_install_mig.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = mach_install_mig.sh; sourceTree = ""; }; @@ -180,7 +256,6 @@ 242AB66511EBDC1200107336 /* errno.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = errno.c; sourceTree = ""; }; 24484A7311F51E9800E10CD2 /* string.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = string.h; sourceTree = ""; }; 24484A7411F51E9800E10CD2 /* string.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = string.c; sourceTree = ""; }; - 24484A9211F61D1900E10CD2 /* mig_reply_port.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mig_reply_port.h; sourceTree = ""; }; 24484A9311F61D1900E10CD2 /* mig_reply_port.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mig_reply_port.c; sourceTree = ""; }; 24614EA111E7A2ED00E78584 /* compile-syscalls.pl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.perl; path = "compile-syscalls.pl"; sourceTree = ""; }; 24614F0311E7CB5B00E78584 /* syscalls.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = syscalls.a; path = $BUILD_ROOT/syscalls.a; sourceTree = ""; }; @@ -239,16 +314,14 @@ 24D1156811E671B20063D54D /* __gettimeofday.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __gettimeofday.s; sourceTree = ""; }; 24D1156911E671B20063D54D /* __lseek.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __lseek.s; sourceTree = ""; }; 24D1156A11E671B20063D54D /* __pipe.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __pipe.s; sourceTree = ""; }; - 24D1156B11E671B20063D54D /* __psynch_cvbroad.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __psynch_cvbroad.s; sourceTree = ""; }; - 24D1156C11E671B20063D54D /* __psynch_cvwait.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __psynch_cvwait.s; sourceTree = ""; }; 24D1156D11E671B20063D54D /* __ptrace.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __ptrace.s; sourceTree = ""; }; 24D1156E11E671B20063D54D /* __sigaltstack.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __sigaltstack.s; sourceTree = ""; }; 24D1156F11E671B20063D54D /* __sigreturn.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __sigreturn.s; sourceTree = ""; }; 24D1157011E671B20063D54D /* __syscall.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __syscall.s; sourceTree = ""; }; 24D1157111E671B20063D54D /* __thread_selfid.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __thread_selfid.s; sourceTree = ""; }; - 24D1157211E671B20063D54D /* __vfork.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __vfork.s; sourceTree = ""; }; - 24D1157311E671B20063D54D /* custom.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = custom.s; sourceTree = ""; }; - 24D1157411E671B20063D54D /* SYS.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SYS.h; sourceTree = ""; }; + 24D1157211E671B20063D54D /* __vfork.s */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 8; lastKnownFileType = sourcecode.asm; path = __vfork.s; sourceTree = ""; tabWidth = 8; }; + 24D1157311E671B20063D54D /* custom.s */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 8; lastKnownFileType = sourcecode.asm; path = custom.s; sourceTree = ""; tabWidth = 8; }; + 24D1157411E671B20063D54D /* SYS.h */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 8; lastKnownFileType = sourcecode.c.h; path = SYS.h; sourceTree = ""; tabWidth = 8; }; 24D1158C11E672270063D54D /* syscall.map */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = syscall.map; sourceTree = ""; }; 24D1158F11E672270063D54D /* syscall.map */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = syscall.map; sourceTree = ""; }; 24D1159111E672270063D54D /* syscall.map */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = syscall.map; sourceTree = ""; }; @@ -256,10 +329,20 @@ 24D1159811E672270063D54D /* syscall.map */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = syscall.map; sourceTree = ""; }; 24D1159911E6723E0063D54D /* create-syscalls.pl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.perl; path = "create-syscalls.pl"; sourceTree = ""; }; 24E47824120881DF009A384D /* _libc_funcptr.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = _libc_funcptr.c; sourceTree = ""; }; - 291D3C261354FDD100D46061 /* mach_port.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = mach_port.c; path = mach/mach_port.c; sourceTree = ""; }; - 291D3C271354FDD100D46061 /* mach_vm.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = mach_vm.c; path = mach/mach_vm.c; sourceTree = ""; }; + 291D3C261354FDD100D46061 /* mach_port.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mach_port.c; sourceTree = ""; }; + 291D3C271354FDD100D46061 /* mach_vm.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = mach_vm.c; sourceTree = ""; }; + 467DAFD3157E8AF200CE68F0 /* guarded_open_np.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = guarded_open_np.c; sourceTree = ""; }; + 7466C923170CB99B004557CC /* vm_page_size.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = vm_page_size.h; sourceTree = ""; }; + A59CB95516669DB700B064B3 /* stack_logging_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = stack_logging_internal.h; sourceTree = ""; }; + A59CB9571666A1A200B064B3 /* munmap.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = munmap.c; sourceTree = ""; }; + C639F0E41741C09A00A39F47 /* gethostuuid.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = gethostuuid.h; sourceTree = ""; }; + C6C4012017415384000AE69F /* gethostuuid.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = gethostuuid.c; sourceTree = ""; }; + C6C40121174154D9000AE69F /* gethostuuid_private.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = gethostuuid_private.h; sourceTree = ""; }; + C6D3F02E16542C510052CF30 /* libsystem_Libsyscall_headers_Sim.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libsystem_Libsyscall_headers_Sim.a; sourceTree = BUILT_PRODUCTS_DIR; }; + C6D3F02F16542C980052CF30 /* dummy.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dummy.c; sourceTree = ""; }; C99A4F4E1305B1B70054B7B7 /* __get_cpu_capabilities.s */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.asm; path = __get_cpu_capabilities.s; sourceTree = ""; }; C99A4F511305B43F0054B7B7 /* init_cpu_capabilities.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = init_cpu_capabilities.c; sourceTree = ""; }; + C9C1824F15338C0B00933F23 /* alloc_once.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = alloc_once.c; sourceTree = ""; }; C9D9BCBF114B00600000D8B9 /* .open_source_exclude */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = .open_source_exclude; sourceTree = ""; }; C9D9BCC2114B00600000D8B9 /* vm_map_compat.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = vm_map_compat.c; sourceTree = ""; }; C9D9BCC5114B00600000D8B9 /* clock_priv.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = clock_priv.defs; sourceTree = ""; }; @@ -288,8 +371,6 @@ C9D9BCDD114B00600000D8B9 /* mach_interface.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mach_interface.h; sourceTree = ""; }; C9D9BCDF114B00600000D8B9 /* port_obj.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = port_obj.h; sourceTree = ""; }; C9D9BCE0114B00600000D8B9 /* sync.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sync.h; sourceTree = ""; }; - C9D9BCE1114B00600000D8B9 /* task.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = task.h; sourceTree = ""; }; - C9D9BCE2114B00600000D8B9 /* thread_act.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = thread_act.h; sourceTree = ""; }; C9D9BCE3114B00600000D8B9 /* vm_task.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = vm_task.h; sourceTree = ""; }; C9D9BCE4114B00600000D8B9 /* host_priv.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = host_priv.defs; sourceTree = ""; }; C9D9BCE5114B00600000D8B9 /* host_security.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = host_security.defs; sourceTree = ""; }; @@ -323,8 +404,30 @@ C9D9BD10114B00600000D8B9 /* thread_act.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = thread_act.defs; sourceTree = ""; }; C9D9BD11114B00600000D8B9 /* vm_map.defs */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.mig; path = vm_map.defs; sourceTree = ""; }; C9D9BE0F114FFADC0000D8B9 /* Libsyscall.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = Libsyscall.xcconfig; sourceTree = ""; }; + C9EE57F51669673D00337E4B /* tsd.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = tsd.h; sourceTree = ""; }; D2AAC0630554660B00DB518D /* libsystem_kernel.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libsystem_kernel.a; sourceTree = BUILT_PRODUCTS_DIR; }; + E40C845216FAFB3F00C238DD /* Libsyscall.aliases */ = {isa = PBXFileReference; lastKnownFileType = text; path = Libsyscall.aliases; sourceTree = ""; }; + E453AF341700FD3C00F2C94C /* getiopolicy_np.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = getiopolicy_np.c; sourceTree = ""; }; + E4D45C2116F856900002AF25 /* __commpage_gettimeofday.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = __commpage_gettimeofday.c; sourceTree = ""; }; + E4D45C2216F856900002AF25 /* __commpage_gettimeofday.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = __commpage_gettimeofday.s; sourceTree = ""; }; + E4D45C2316F856900002AF25 /* mach_absolute_time.s */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = mach_absolute_time.s; sourceTree = ""; }; + E4D45C2A16F868ED0002AF25 /* libproc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = libproc.c; sourceTree = ""; }; + E4D45C2B16F868ED0002AF25 /* libproc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = libproc.h; sourceTree = ""; }; + E4D45C2C16F868ED0002AF25 /* libproc_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = libproc_internal.h; sourceTree = ""; }; + E4D45C2D16F868ED0002AF25 /* proc_listpidspath.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = proc_listpidspath.c; sourceTree = ""; }; + E4D45C3316F86BD80002AF25 /* posix_spawn.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = posix_spawn.c; sourceTree = ""; }; + E4D45C3D16FB20970002AF25 /* spawn.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = spawn.h; sourceTree = ""; }; + E4D45C3E16FB20970002AF25 /* spawn_private.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = spawn_private.h; sourceTree = ""; }; + E4D7E55316F8776300F92D8D /* index.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = index.c; sourceTree = ""; }; + E4D7E55516F8776300F92D8D /* memset.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = memset.c; sourceTree = ""; }; + E4D7E55616F8776300F92D8D /* strcmp.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = strcmp.c; sourceTree = ""; }; + E4D7E55716F8776300F92D8D /* strcpy.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = strcpy.c; sourceTree = ""; }; + E4D7E55816F8776300F92D8D /* strings.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = strings.h; sourceTree = ""; }; + E4D7E55916F8776300F92D8D /* strlcpy.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = strlcpy.c; sourceTree = ""; }; + E4D7E55A16F8776300F92D8D /* strlen.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = strlen.c; sourceTree = ""; }; + E4D7E55B16F8776300F92D8D /* strsep.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = strsep.c; sourceTree = ""; }; EE3F6059149A6D66003BAEBA /* getaudit.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = getaudit.c; sourceTree = ""; }; + FB50F1B315AB7DE700F814BA /* carbon_delete.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = carbon_delete.c; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -350,18 +453,19 @@ 08FB7794FE84155DC02AAC07 /* mach */ = { isa = PBXGroup; children = ( - 291D3C261354FDD100D46061 /* mach_port.c */, - 291D3C271354FDD100D46061 /* mach_vm.c */, C9D9BE0F114FFADC0000D8B9 /* Libsyscall.xcconfig */, + E40C845216FAFB3F00C238DD /* Libsyscall.aliases */, 24D1158911E672270063D54D /* Platforms */, 24D1156511E671B20063D54D /* custom */, 08FB7795FE84155DC02AAC07 /* mach */, + C9C1824E15338BEB00933F23 /* os */, 247A08B011F8AF1700E4693F /* wrappers */, 240D716611933ED300556E97 /* xcodescripts */, 1AB674ADFE9D54B511CA2CBB /* Products */, ); name = mach; sourceTree = ""; + usesTabs = 1; }; 08FB7795FE84155DC02AAC07 /* mach */ = { isa = PBXGroup; @@ -398,14 +502,15 @@ C9D9BCF0114B00600000D8B9 /* mach_init.c */, 2485235411582D8F0051B413 /* mach_legacy.c */, C9D9BCF1114B00600000D8B9 /* mach_msg.c */, + 291D3C261354FDD100D46061 /* mach_port.c */, C9D9BCF2114B00600000D8B9 /* mach_port.defs */, C9D9BCF3114B00600000D8B9 /* mach_traps.s */, + 291D3C271354FDD100D46061 /* mach_vm.c */, C9D9BCF4114B00600000D8B9 /* mach_vm.defs */, C9D9BCF6114B00600000D8B9 /* mig_allocate.c */, C9D9BCF7114B00600000D8B9 /* mig_deallocate.c */, C9D9BCF8114B00600000D8B9 /* mig_reply_setup.c */, C9D9BCF9114B00600000D8B9 /* mig_strncpy.c */, - 24484A9211F61D1900E10CD2 /* mig_reply_port.h */, 24484A9311F61D1900E10CD2 /* mig_reply_port.c */, C9D9BCFA114B00600000D8B9 /* ms_thread_switch.c */, C9D9BCFB114B00600000D8B9 /* notify.defs */, @@ -416,6 +521,7 @@ C9D9BD06114B00600000D8B9 /* semaphore.c */, C9D9BD07114B00600000D8B9 /* servers */, C9D9BD0E114B00600000D8B9 /* slot_name.c */, + A59CB95516669DB700B064B3 /* stack_logging_internal.h */, 24484A7311F51E9800E10CD2 /* string.h */, 24484A7411F51E9800E10CD2 /* string.c */, C9D9BD0F114B00600000D8B9 /* task.defs */, @@ -432,6 +538,7 @@ 24614F0311E7CB5B00E78584 /* syscalls.a */, D2AAC0630554660B00DB518D /* libsystem_kernel.a */, 249C60FF1194747600ED73F3 /* libsystem_kernel.dylib */, + C6D3F02E16542C510052CF30 /* libsystem_Libsyscall_headers_Sim.a */, ); name = Products; sourceTree = ""; @@ -454,6 +561,7 @@ 248BA01E121C607E008C073F /* fchmod.c */, 248BA068121D9E27008C073F /* getrlimit.c */, 248BA086121DA72D008C073F /* mmap.c */, + A59CB9571666A1A200B064B3 /* munmap.c */, 248BA0CC121DEBEF008C073F /* setrlimit.c */, ); path = unix03; @@ -465,22 +573,33 @@ 248BA04A121C8EE4008C073F /* cancelable */, 2419382912135FE1003CDE41 /* unix03 */, 24A7C6951200AF8A007669EB /* legacy */, - C99A4F4E1305B1B70054B7B7 /* __get_cpu_capabilities.s */, + E4D45C2916F868ED0002AF25 /* libproc */, + E4D45C3B16FB20970002AF25 /* spawn */, + E4D7E55216F8776300F92D8D /* string */, 247A08B211F8B05900E4693F /* _libkernel_init.h */, 247A08B311F8B05900E4693F /* _libkernel_init.c */, - 030B179A135377B400DAD1F0 /* open_dprotected_np.c */, - 24E47824120881DF009A384D /* _libc_funcptr.c */, + E4D45C2116F856900002AF25 /* __commpage_gettimeofday.c */, + E4D45C2216F856900002AF25 /* __commpage_gettimeofday.s */, + C99A4F4E1305B1B70054B7B7 /* __get_cpu_capabilities.s */, 24A7C5CB11FF973C007669EB /* _errno.h */, + 24E47824120881DF009A384D /* _libc_funcptr.c */, + FB50F1B315AB7DE700F814BA /* carbon_delete.c */, + E453AF341700FD3C00F2C94C /* getiopolicy_np.c */, + 467DAFD3157E8AF200CE68F0 /* guarded_open_np.c */, C99A4F511305B43F0054B7B7 /* init_cpu_capabilities.c */, 248BA07F121DA36B008C073F /* ioctl.c */, 248BA081121DA4F3008C073F /* kill.c */, - 24B028D511FF4FBB00CA64A9 /* memcpy.c */, + E4D45C2316F856900002AF25 /* mach_absolute_time.s */, 24B8C2611237F53900D36CC3 /* remove-counter.c */, + 030B179A135377B400DAD1F0 /* open_dprotected_np.c */, 248AA966122C7CDA0085F5B1 /* rename.c */, 248AA964122C7C330085F5B1 /* rmdir.c */, 248BA090121DDD7F008C073F /* select-base.c */, 24B223B3121DFF12007DAEDE /* sigsuspend-base.c */, 248AA962122C7B2A0085F5B1 /* unlink.c */, + C6C40121174154D9000AE69F /* gethostuuid_private.h */, + C639F0E41741C09A00A39F47 /* gethostuuid.h */, + C6C4012017415384000AE69F /* gethostuuid.c */, ); path = wrappers; sourceTree = ""; @@ -532,13 +651,12 @@ 24D1156511E671B20063D54D /* custom */ = { isa = PBXGroup; children = ( + C6D3F02F16542C980052CF30 /* dummy.c */, 24D1156611E671B20063D54D /* __fork.s */, 24D1156711E671B20063D54D /* __getpid.s */, 24D1156811E671B20063D54D /* __gettimeofday.s */, 24D1156911E671B20063D54D /* __lseek.s */, 24D1156A11E671B20063D54D /* __pipe.s */, - 24D1156B11E671B20063D54D /* __psynch_cvbroad.s */, - 24D1156C11E671B20063D54D /* __psynch_cvwait.s */, 24D1156D11E671B20063D54D /* __ptrace.s */, 24D1156E11E671B20063D54D /* __sigaltstack.s */, 24D1156F11E671B20063D54D /* __sigreturn.s */, @@ -612,6 +730,15 @@ path = x86_64; sourceTree = ""; }; + C9C1824E15338BEB00933F23 /* os */ = { + isa = PBXGroup; + children = ( + C9C1824F15338C0B00933F23 /* alloc_once.c */, + C9EE57F51669673D00337E4B /* tsd.h */, + ); + path = os; + sourceTree = ""; + }; C9D9BCBE114B00600000D8B9 /* arm */ = { isa = PBXGroup; children = ( @@ -631,9 +758,8 @@ C9D9BCDD114B00600000D8B9 /* mach_interface.h */, C9D9BCDF114B00600000D8B9 /* port_obj.h */, C9D9BCE0114B00600000D8B9 /* sync.h */, - C9D9BCE1114B00600000D8B9 /* task.h */, - C9D9BCE2114B00600000D8B9 /* thread_act.h */, C9D9BCE3114B00600000D8B9 /* vm_task.h */, + 7466C923170CB99B004557CC /* vm_page_size.h */, ); path = mach; sourceTree = ""; @@ -650,24 +776,94 @@ path = servers; sourceTree = ""; }; + E4D45C2916F868ED0002AF25 /* libproc */ = { + isa = PBXGroup; + children = ( + E4D45C2A16F868ED0002AF25 /* libproc.c */, + E4D45C2D16F868ED0002AF25 /* proc_listpidspath.c */, + E4D45C2B16F868ED0002AF25 /* libproc.h */, + E4D45C2C16F868ED0002AF25 /* libproc_internal.h */, + ); + path = libproc; + sourceTree = ""; + }; + E4D45C3B16FB20970002AF25 /* spawn */ = { + isa = PBXGroup; + children = ( + E4D45C3316F86BD80002AF25 /* posix_spawn.c */, + E4D45C3D16FB20970002AF25 /* spawn.h */, + E4D45C3E16FB20970002AF25 /* spawn_private.h */, + ); + path = spawn; + sourceTree = ""; + }; + E4D7E55216F8776300F92D8D /* string */ = { + isa = PBXGroup; + children = ( + E4D7E55316F8776300F92D8D /* index.c */, + 24B028D511FF4FBB00CA64A9 /* memcpy.c */, + E4D7E55516F8776300F92D8D /* memset.c */, + E4D7E55616F8776300F92D8D /* strcmp.c */, + E4D7E55716F8776300F92D8D /* strcpy.c */, + E4D7E55816F8776300F92D8D /* strings.h */, + E4D7E55916F8776300F92D8D /* strlcpy.c */, + E4D7E55A16F8776300F92D8D /* strlen.c */, + E4D7E55B16F8776300F92D8D /* strsep.c */, + ); + path = string; + sourceTree = ""; + }; /* End PBXGroup section */ /* Begin PBXHeadersBuildPhase section */ + C6D3EFB516542C510052CF30 /* Headers */ = { + isa = PBXHeadersBuildPhase; + buildActionMask = 2147483647; + files = ( + C6D3EFB616542C510052CF30 /* errorlib.h in Headers */, + C6D3EFB716542C510052CF30 /* externs.h in Headers */, + C6AB38DB174202C10036DD9F /* gethostuuid.h in Headers */, + C6D3EFB816542C510052CF30 /* errorlib.h in Headers */, + C6C401241741566D000AE69F /* gethostuuid_private.h in Headers */, + C6D3EFB916542C510052CF30 /* mach.h in Headers */, + C6D3EFBA16542C510052CF30 /* mach_error.h in Headers */, + C6D3EFBB16542C510052CF30 /* mach_init.h in Headers */, + C6D3EFBC16542C510052CF30 /* mach_interface.h in Headers */, + C6D3EFBD16542C510052CF30 /* port_obj.h in Headers */, + C6D3EFBE16542C510052CF30 /* sync.h in Headers */, + C6D3EFC116542C510052CF30 /* vm_task.h in Headers */, + C6D3EFC216542C510052CF30 /* key_defs.h in Headers */, + C6D3EFC316542C510052CF30 /* ls_defs.h in Headers */, + C6D3EFC416542C510052CF30 /* netname_defs.h in Headers */, + C6D3EFC516542C510052CF30 /* nm_defs.h in Headers */, + C6D3EFC616542C510052CF30 /* SYS.h in Headers */, + C6D3EFC716542C510052CF30 /* abort.h in Headers */, + C6D3EFC816542C510052CF30 /* exc_catcher.h in Headers */, + C6D3EFC916542C510052CF30 /* _libkernel_init.h in Headers */, + E453AF3A17013F4C00F2C94C /* stack_logging_internal.h in Headers */, + E453AF3817013F1400F2C94C /* spawn.h in Headers */, + E453AF3917013F1B00F2C94C /* spawn_private.h in Headers */, + E453AF3617013CBF00F2C94C /* libproc.h in Headers */, + E453AF3717013CC200F2C94C /* libproc_internal.h in Headers */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; D2AAC0600554660B00DB518D /* Headers */ = { isa = PBXHeadersBuildPhase; buildActionMask = 2147483647; files = ( C9D9BD1E114B00600000D8B9 /* errorlib.h in Headers */, C9D9BD23114B00600000D8B9 /* externs.h in Headers */, + C639F0E51741C25800A39F47 /* gethostuuid.h in Headers */, C9D9BD25114B00600000D8B9 /* errorlib.h in Headers */, C9D9BD26114B00600000D8B9 /* mach.h in Headers */, C9D9BD27114B00600000D8B9 /* mach_error.h in Headers */, C9D9BD28114B00600000D8B9 /* mach_init.h in Headers */, + C6C40122174155E3000AE69F /* gethostuuid_private.h in Headers */, C9D9BD29114B00600000D8B9 /* mach_interface.h in Headers */, C9D9BD2B114B00600000D8B9 /* port_obj.h in Headers */, + 7466C924170CBA53004557CC /* vm_page_size.h in Headers */, C9D9BD2C114B00600000D8B9 /* sync.h in Headers */, - C9D9BD2D114B00600000D8B9 /* task.h in Headers */, - C9D9BD2E114B00600000D8B9 /* thread_act.h in Headers */, C9D9BD2F114B00600000D8B9 /* vm_task.h in Headers */, C9D9BD50114B00600000D8B9 /* key_defs.h in Headers */, C9D9BD51114B00600000D8B9 /* ls_defs.h in Headers */, @@ -677,15 +873,20 @@ 247A090011F8E18000E4693F /* abort.h in Headers */, 247A091711F8E7A800E4693F /* exc_catcher.h in Headers */, 24B028F511FF5C3500CA64A9 /* _libkernel_init.h in Headers */, + A59CB95616669EFB00B064B3 /* stack_logging_internal.h in Headers */, + E4D45C3F16FB20D30002AF25 /* spawn.h in Headers */, + E4D45C4016FB20DC0002AF25 /* spawn_private.h in Headers */, + E4D45C2F16F868ED0002AF25 /* libproc.h in Headers */, + E4D45C3016F868ED0002AF25 /* libproc_internal.h in Headers */, ); runOnlyForDeploymentPostprocessing = 0; }; /* End PBXHeadersBuildPhase section */ /* Begin PBXNativeTarget section */ - 249C60FE1194747600ED73F3 /* Libmach Dynamic */ = { + 249C60FE1194747600ED73F3 /* Libsyscall_dynamic */ = { isa = PBXNativeTarget; - buildConfigurationList = 249C6102119474D700ED73F3 /* Build configuration list for PBXNativeTarget "Libmach Dynamic" */; + buildConfigurationList = 249C6102119474D700ED73F3 /* Build configuration list for PBXNativeTarget "Libsyscall_dynamic" */; buildPhases = ( 249C61281194815000ED73F3 /* Sources */, 249C60FD1194747600ED73F3 /* Frameworks */, @@ -696,17 +897,36 @@ dependencies = ( 249C610A1194750700ED73F3 /* PBXTargetDependency */, ); - name = "Libmach Dynamic"; + name = Libsyscall_dynamic; productName = Libmach; productReference = 249C60FF1194747600ED73F3 /* libsystem_kernel.dylib */; productType = "com.apple.product-type.library.dynamic"; }; - D2AAC0620554660B00DB518D /* Libmach */ = { + C6D3EFB216542C510052CF30 /* Libsyscall_headers_Sim */ = { + isa = PBXNativeTarget; + buildConfigurationList = C6D3F02C16542C510052CF30 /* Build configuration list for PBXNativeTarget "Libsyscall_headers_Sim" */; + buildPhases = ( + C6D3EFB516542C510052CF30 /* Headers */, + C6D3EFCA16542C510052CF30 /* CopyFiles */, + C6D3EFCC16542C510052CF30 /* Install MIG Headers */, + C6D3EFCD16542C510052CF30 /* Sources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = Libsyscall_headers_Sim; + productName = mach; + productReference = C6D3F02E16542C510052CF30 /* libsystem_Libsyscall_headers_Sim.a */; + productType = "com.apple.product-type.library.static"; + }; + D2AAC0620554660B00DB518D /* Libsyscall_static */ = { isa = PBXNativeTarget; - buildConfigurationList = 1DEB914A08733D8E0010E9CD /* Build configuration list for PBXNativeTarget "Libmach" */; + buildConfigurationList = 1DEB914A08733D8E0010E9CD /* Build configuration list for PBXNativeTarget "Libsyscall_static" */; buildPhases = ( D2AAC0600554660B00DB518D /* Headers */, - 2487545E11629934000975E0 /* Install Headers */, + C63F480B1654203800A1F78F /* CopyFiles */, + 2487545E11629934000975E0 /* Install MIG Headers */, D2AAC0610554660B00DB518D /* Sources */, D289988505E68E00004EDB86 /* Frameworks */, ); @@ -715,7 +935,7 @@ dependencies = ( 242AB67911ED03ED00107336 /* PBXTargetDependency */, ); - name = Libmach; + name = Libsyscall_static; productName = mach; productReference = D2AAC0630554660B00DB518D /* libsystem_kernel.a */; productType = "com.apple.product-type.library.static"; @@ -725,8 +945,11 @@ /* Begin PBXProject section */ 08FB7793FE84155DC02AAC07 /* Project object */ = { isa = PBXProject; + attributes = { + LastUpgradeCheck = 0500; + }; buildConfigurationList = 1DEB914E08733D8E0010E9CD /* Build configuration list for PBXProject "Libsyscall" */; - compatibilityVersion = "Xcode 3.1"; + compatibilityVersion = "Xcode 3.2"; developmentRegion = English; hasScannedForEncodings = 1; knownRegions = ( @@ -741,8 +964,9 @@ targets = ( 249C61101194755D00ED73F3 /* Build */, 24614EF311E7C98600E78584 /* Syscalls */, - D2AAC0620554660B00DB518D /* Libmach */, - 249C60FE1194747600ED73F3 /* Libmach Dynamic */, + D2AAC0620554660B00DB518D /* Libsyscall_static */, + 249C60FE1194747600ED73F3 /* Libsyscall_dynamic */, + C6D3EFB216542C510052CF30 /* Libsyscall_headers_Sim */, ); }; /* End PBXProject section */ @@ -774,7 +998,7 @@ ); runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; - shellScript = "set -x\n\nmkdir -p $OBJROOT/sys\n\n$SRCROOT/xcodescripts/create-syscalls.pl \\\n\t$SRCROOT/../bsd/kern/syscalls.master \\\n\t$SRCROOT/custom \\\n\t$SRCROOT/Platforms \\\n\t$MAP_PLATFORM \\\n\t$OBJROOT/sys\n"; + shellScript = "set -x\n[[ $ACTION == \"installhdrs\" ]] && exit 0\n\nmkdir -p $OBJROOT/sys\n\n$SRCROOT/xcodescripts/create-syscalls.pl \\\n\t$SRCROOT/../bsd/kern/syscalls.master \\\n\t$SRCROOT/custom \\\n\t$SRCROOT/Platforms \\\n\t$MAP_PLATFORM \\\n\t$OBJROOT/sys\n"; }; 24614EF611E7C9A000E78584 /* Compile Syscalls */ = { isa = PBXShellScriptBuildPhase; @@ -789,16 +1013,30 @@ ); runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; - shellScript = "set -x\n\nmkdir -p $OBJROOT/UninstalledProducts\n\n$SRCROOT/xcodescripts/compile-syscalls.pl \\\n\t$OBJROOT/sys/stubs.list \\\n\t$BUILD_ROOT/syscalls.a"; + shellScript = "set -x\n[[ $ACTION == \"installhdrs\" ]] && exit 0\n\nmkdir -p $OBJROOT/UninstalledProducts\n\n$SRCROOT/xcodescripts/compile-syscalls.pl \\\n\t$OBJROOT/sys/stubs.list \\\n\t$BUILD_ROOT/syscalls.a"; + }; + 2487545E11629934000975E0 /* Install MIG Headers */ = { + isa = PBXShellScriptBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + inputPaths = ( + ); + name = "Install MIG Headers"; + outputPaths = ( + ); + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "\"$PROJECT_DIR\"/xcodescripts/mach_install_mig.sh"; }; - 2487545E11629934000975E0 /* Install Headers */ = { + C6D3EFCC16542C510052CF30 /* Install MIG Headers */ = { isa = PBXShellScriptBuildPhase; buildActionMask = 2147483647; files = ( ); inputPaths = ( ); - name = "Install Headers"; + name = "Install MIG Headers"; outputPaths = ( ); runOnlyForDeploymentPostprocessing = 0; @@ -813,11 +1051,26 @@ buildActionMask = 2147483647; files = ( 24E4782712088267009A384D /* _libc_funcptr.c in Sources */, + E4D7E56216F8776300F92D8D /* strlen.c in Sources */, + E4D7E55C16F8776300F92D8D /* index.c in Sources */, + E4D7E55F16F8776300F92D8D /* strcmp.c in Sources */, + E4D7E55E16F8776300F92D8D /* memset.c in Sources */, 240BAC4C1214770F000A1719 /* memcpy.c in Sources */, + E4D7E56316F8776300F92D8D /* strsep.c in Sources */, + E4D7E56016F8776300F92D8D /* strcpy.c in Sources */, + E4D7E56116F8776300F92D8D /* strlcpy.c in Sources */, 249C612F1194828600ED73F3 /* dylib_link.c in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; + C6D3EFCD16542C510052CF30 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + C6D3F03016542C980052CF30 /* dummy.c in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; D2AAC0610554660B00DB518D /* Sources */ = { isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; @@ -848,10 +1101,13 @@ C9D9BD36114B00600000D8B9 /* mach_error_string.c in Sources */, C9D9BD37114B00600000D8B9 /* mach_error.c in Sources */, C9D9BD3B114B00600000D8B9 /* mach_init.c in Sources */, + E4D45C2616F856900002AF25 /* mach_absolute_time.s in Sources */, C9D9BD3C114B00600000D8B9 /* mach_msg.c in Sources */, C9D9BD3E114B00600000D8B9 /* mach_traps.s in Sources */, C9D9BD41114B00600000D8B9 /* mig_allocate.c in Sources */, + E4D45C2516F856900002AF25 /* __commpage_gettimeofday.s in Sources */, C9D9BD42114B00600000D8B9 /* mig_deallocate.c in Sources */, + E4D45C2416F856900002AF25 /* __commpage_gettimeofday.c in Sources */, C9D9BD43114B00600000D8B9 /* mig_reply_setup.c in Sources */, 24484A9411F61D2B00E10CD2 /* mig_reply_port.c in Sources */, C9D9BD44114B00600000D8B9 /* mig_strncpy.c in Sources */, @@ -861,12 +1117,15 @@ C9D9BD4F114B00600000D8B9 /* semaphore.c in Sources */, C9D9BD56114B00600000D8B9 /* slot_name.c in Sources */, 24484A7511F6178E00E10CD2 /* string.c in Sources */, + E453AF351700FD3C00F2C94C /* getiopolicy_np.c in Sources */, 2485235511582D8F0051B413 /* mach_legacy.c in Sources */, C9D9BD17114B00600000D8B9 /* vm_map_compat.c in Sources */, 242AB66611EBDC1200107336 /* errno.c in Sources */, + E4D45C2E16F868ED0002AF25 /* libproc.c in Sources */, 247A08C211F8BDC900E4693F /* _libkernel_init.c in Sources */, 24A7C5BC11FF8DA6007669EB /* accept.c in Sources */, 24A7C5BD11FF8DA6007669EB /* bind.c in Sources */, + C6C4012317415637000AE69F /* gethostuuid.c in Sources */, 24A7C5BF11FF8DA6007669EB /* getattrlist.c in Sources */, 24A7C5C011FF8DA6007669EB /* getpeername.c in Sources */, 24A7C5C111FF8DA6007669EB /* getsockname.c in Sources */, @@ -881,6 +1140,7 @@ 2419382B12135FF6003CDE41 /* chmod.c in Sources */, 248BA01D121C56BF008C073F /* connect.c in Sources */, 248BA01F121C607E008C073F /* fchmod.c in Sources */, + E4D45C3616F86BD80002AF25 /* posix_spawn.c in Sources */, 248BA04F121C8F06008C073F /* fcntl.c in Sources */, 248BA05C121C9649008C073F /* fcntl-cancel.c in Sources */, 248BA069121D9E27008C073F /* getrlimit.c in Sources */, @@ -907,9 +1167,14 @@ C99A4F501305B2BD0054B7B7 /* __get_cpu_capabilities.s in Sources */, C99A4F531305B43F0054B7B7 /* init_cpu_capabilities.c in Sources */, 030B179B135377B400DAD1F0 /* open_dprotected_np.c in Sources */, + E4D45C3116F868ED0002AF25 /* proc_listpidspath.c in Sources */, 291D3C281354FDD100D46061 /* mach_port.c in Sources */, 291D3C291354FDD100D46061 /* mach_vm.c in Sources */, EE3F605A149A6D66003BAEBA /* getaudit.c in Sources */, + C9B6A5ED153795DE00749EBA /* alloc_once.c in Sources */, + 467DAFD4157E8AF200CE68F0 /* guarded_open_np.c in Sources */, + 729B7D0A15C8938C000E2501 /* carbon_delete.c in Sources */, + A59CB9581666A1A200B064B3 /* munmap.c in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -923,12 +1188,12 @@ }; 249C610A1194750700ED73F3 /* PBXTargetDependency */ = { isa = PBXTargetDependency; - target = D2AAC0620554660B00DB518D /* Libmach */; + target = D2AAC0620554660B00DB518D /* Libsyscall_static */; targetProxy = 249C61091194750700ED73F3 /* PBXContainerItemProxy */; }; 249C61151194756A00ED73F3 /* PBXTargetDependency */ = { isa = PBXTargetDependency; - target = 249C60FE1194747600ED73F3 /* Libmach Dynamic */; + target = 249C60FE1194747600ED73F3 /* Libsyscall_dynamic */; targetProxy = 249C61141194756A00ED73F3 /* PBXContainerItemProxy */; }; /* End PBXTargetDependency section */ @@ -943,31 +1208,35 @@ "INSTALL_PATH[sdk=iphoneos*]" = /usr/local/lib/dyld; "INSTALL_PATH[sdk=iphonesimulator*]" = "$(SDKROOT)/usr/local/lib/dyld"; "INSTALL_PATH[sdk=macosx*]" = /usr/local/lib/dyld; + STRIP_INSTALLED_PRODUCT = NO; }; name = Release; }; 1DEB915008733D8E0010E9CD /* Release */ = { isa = XCBuildConfiguration; + baseConfigurationReference = C9D9BE0F114FFADC0000D8B9 /* Libsyscall.xcconfig */; buildSettings = { GCC_C_LANGUAGE_STANDARD = gnu99; GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_PARAMETER = YES; GCC_WARN_UNUSED_VARIABLE = YES; - PREBINDING = NO; }; name = Release; }; 24614EF411E7C98600E78584 /* Release */ = { isa = XCBuildConfiguration; + baseConfigurationReference = C9D9BE0F114FFADC0000D8B9 /* Libsyscall.xcconfig */; buildSettings = { ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; COPY_PHASE_STRIP = YES; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; - GCC_ENABLE_FIX_AND_CONTINUE = NO; MAP_PLATFORM = "$(MAP_PLATFORM_$(PLATFORM_NAME))"; MAP_PLATFORM_iphoneos = iPhoneOS; MAP_PLATFORM_macosx = MacOSX; PRODUCT_NAME = Syscalls; - ZERO_LINK = NO; + STRIP_STYLE = debugging; }; name = Release; }; @@ -975,11 +1244,8 @@ isa = XCBuildConfiguration; baseConfigurationReference = C9D9BE0F114FFADC0000D8B9 /* Libsyscall.xcconfig */; buildSettings = { - OTHER_LDFLAGS = ( - "-umbrella", - System, - "-all_load", - ); + OTHER_LDFLAGS = "$(DYLIB_LDFLAGS)"; + STRIP_INSTALLED_PRODUCT = NO; VERSION_INFO_PREFIX = "___"; }; name = Release; @@ -989,13 +1255,24 @@ baseConfigurationReference = C9D9BE0F114FFADC0000D8B9 /* Libsyscall.xcconfig */; buildSettings = { PRODUCT_NAME = Build; + STRIP_STYLE = debugging; + }; + name = Release; + }; + C6D3F02D16542C510052CF30 /* Release */ = { + isa = XCBuildConfiguration; + baseConfigurationReference = C9D9BE0F114FFADC0000D8B9 /* Libsyscall.xcconfig */; + buildSettings = { + COPY_PHASE_STRIP = NO; + PRODUCT_NAME = Libsyscall_headers_Sim; + SKIP_INSTALL = YES; }; name = Release; }; /* End XCBuildConfiguration section */ /* Begin XCConfigurationList section */ - 1DEB914A08733D8E0010E9CD /* Build configuration list for PBXNativeTarget "Libmach" */ = { + 1DEB914A08733D8E0010E9CD /* Build configuration list for PBXNativeTarget "Libsyscall_static" */ = { isa = XCConfigurationList; buildConfigurations = ( 1DEB914C08733D8E0010E9CD /* Release */, @@ -1019,7 +1296,7 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; - 249C6102119474D700ED73F3 /* Build configuration list for PBXNativeTarget "Libmach Dynamic" */ = { + 249C6102119474D700ED73F3 /* Build configuration list for PBXNativeTarget "Libsyscall_dynamic" */ = { isa = XCConfigurationList; buildConfigurations = ( 249C61001194747600ED73F3 /* Release */, @@ -1035,6 +1312,14 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; + C6D3F02C16542C510052CF30 /* Build configuration list for PBXNativeTarget "Libsyscall_headers_Sim" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + C6D3F02D16542C510052CF30 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; /* End XCConfigurationList section */ }; rootObject = 08FB7793FE84155DC02AAC07 /* Project object */; diff --git a/libsyscall/Platforms/MacOSX/i386/syscall.map b/libsyscall/Platforms/MacOSX/i386/syscall.map index 656bd8fdb..be628a466 100644 --- a/libsyscall/Platforms/MacOSX/i386/syscall.map +++ b/libsyscall/Platforms/MacOSX/i386/syscall.map @@ -34,9 +34,9 @@ _msgrcv$UNIX2003 ___msgrcv _msgsnd ___msgsnd_nocancel _msgsnd$NOCANCEL$UNIX2003 ___msgsnd_nocancel _msgsnd$UNIX2003 ___msgsnd +_msgsys ___msgsys _msync$NOCANCEL$UNIX2003 ___msync_nocancel _msync$UNIX2003 ___msync -_munmap$UNIX2003 ___munmap _open$NOCANCEL$UNIX2003 ___open_nocancel _open$UNIX2003 ___open _poll ___poll_nocancel @@ -65,6 +65,7 @@ _sem_wait ___sem_wait_nocancel _sem_wait$NOCANCEL$UNIX2003 ___sem_wait_nocancel _sem_wait$UNIX2003 ___sem_wait _semctl$UNIX2003 ___semctl +_semsys ___semsys _sendmsg$NOCANCEL$UNIX2003 ___sendmsg_nocancel _sendmsg$UNIX2003 ___sendmsg _sendto$NOCANCEL$UNIX2003 ___sendto_nocancel @@ -74,6 +75,8 @@ _setpgrp ___setpgid _setregid$UNIX2003 ___setregid _setreuid$UNIX2003 ___setreuid _shmctl$UNIX2003 ___shmctl +_shmsys ___shmsys +_shm_open ___shm_open _socketpair$UNIX2003 ___socketpair _stat$INODE64 ___stat64 _statfs$INODE64 ___statfs64 diff --git a/libsyscall/Platforms/MacOSX/x86_64/syscall.map b/libsyscall/Platforms/MacOSX/x86_64/syscall.map index d717a159c..2769c32a6 100644 --- a/libsyscall/Platforms/MacOSX/x86_64/syscall.map +++ b/libsyscall/Platforms/MacOSX/x86_64/syscall.map @@ -9,6 +9,7 @@ _getfsstat$INODE64 ___getfsstat64 _lstat$INODE64 ___lstat64 _msgrcv$NOCANCEL ___msgrcv_nocancel _msgsnd$NOCANCEL ___msgsnd_nocancel +_msgsys ___msgsys _msync$NOCANCEL ___msync_nocancel _open$NOCANCEL ___open_nocancel _poll$NOCANCEL ___poll_nocancel @@ -21,6 +22,7 @@ _recvmsg$NOCANCEL ___recvmsg_nocancel _select$DARWIN_EXTSN ___select _select$DARWIN_EXTSN$NOCANCEL ___select_nocancel _sem_wait$NOCANCEL ___sem_wait_nocancel +_semsys ___semsys _sendmsg$NOCANCEL ___sendmsg_nocancel _sendto$NOCANCEL ___sendto_nocancel _stat$INODE64 ___stat64 @@ -40,7 +42,6 @@ _listen ___listen _mprotect ___mprotect _msgctl ___msgctl _msync ___msync -_munmap ___munmap _open ___open _recvfrom ___recvfrom _recvmsg ___recvmsg @@ -51,5 +52,7 @@ _sendto ___sendto _setattrlist ___setattrlist _setregid ___setregid _setreuid ___setreuid -_shmctl ___shmctl +_shmctl ___shmctl +_shmsys ___shmsys +_shm_open ___shm_open _socketpair ___socketpair diff --git a/libsyscall/custom/SYS.h b/libsyscall/custom/SYS.h index 675fede15..46e3735d9 100644 --- a/libsyscall/custom/SYS.h +++ b/libsyscall/custom/SYS.h @@ -70,21 +70,21 @@ #define UNIX_SYSCALL_SYSENTER call __sysenter_trap #define UNIX_SYSCALL(name, nargs) \ - .globl cerror ;\ + .globl tramp_cerror ;\ LEAF(_##name, 0) ;\ movl $ SYS_##name, %eax ;\ UNIX_SYSCALL_SYSENTER ;\ jnb 2f ;\ - BRANCH_EXTERN(cerror) ;\ + BRANCH_EXTERN(tramp_cerror) ;\ 2: #define UNIX_SYSCALL_INT(name, nargs) \ - .globl cerror ;\ + .globl tramp_cerror ;\ LEAF(_##name, 0) ;\ movl $ SYS_##name, %eax ;\ UNIX_SYSCALL_TRAP ;\ jnb 2f ;\ - BRANCH_EXTERN(cerror) ;\ + BRANCH_EXTERN(tramp_cerror) ;\ 2: #if defined(__SYSCALL_32BIT_ARG_BYTES) && ((__SYSCALL_32BIT_ARG_BYTES >= 4) && (__SYSCALL_32BIT_ARG_BYTES <= 20)) @@ -92,31 +92,30 @@ LEAF(_##name, 0) ;\ movl $(SYS_##name | (__SYSCALL_32BIT_ARG_BYTES << I386_SYSCALL_ARG_BYTES_SHIFT)), %eax ;\ UNIX_SYSCALL_SYSENTER ;\ jnb 2f ;\ - BRANCH_EXTERN(cerror) ;\ + BRANCH_EXTERN(tramp_##cerror) ;\ 2: #else /* __SYSCALL_32BIT_ARG_BYTES < 4 || > 20 */ -#define UNIX_SYSCALL_NONAME(name, nargs, cerror) \ - .globl cerror ;\ +#define UNIX_SYSCALL_NONAME(name, nargs, cerror) \ movl $ SYS_##name, %eax ;\ UNIX_SYSCALL_SYSENTER ;\ jnb 2f ;\ - BRANCH_EXTERN(cerror) ;\ + BRANCH_EXTERN(tramp_##cerror) ;\ 2: #endif #define UNIX_SYSCALL_INT_NONAME(name, nargs) \ - .globl cerror ;\ + .globl tramp_cerror_nocancel ;\ movl $ SYS_##name, %eax ;\ UNIX_SYSCALL_TRAP ;\ jnb 2f ;\ - BRANCH_EXTERN(cerror_nocancel) ;\ + BRANCH_EXTERN(tramp_cerror_nocancel) ;\ 2: -#define PSEUDO(pseudo, name, nargs, cerror) \ +#define PSEUDO(pseudo, name, nargs, cerror) \ LEAF(pseudo, 0) ;\ UNIX_SYSCALL_NONAME(name, nargs, cerror) -#define PSEUDO_INT(pseudo, name, nargs) \ +#define PSEUDO_INT(pseudo, name, nargs) \ LEAF(pseudo, 0) ;\ UNIX_SYSCALL_INT_NONAME(name, nargs) @@ -125,7 +124,7 @@ LEAF(pseudo, 0) ;\ ret #define __SYSCALL(pseudo, name, nargs) \ - PSEUDO(pseudo, name, nargs, cerror) ;\ + PSEUDO(pseudo, name, nargs, cerror) ;\ ret #define __SYSCALL_INT(pseudo, name, nargs) \ @@ -141,21 +140,23 @@ LEAF(pseudo, 0) ;\ movq %rcx, %r10 ;\ syscall -#define UNIX_SYSCALL(name, nargs) \ - .globl cerror ;\ -LEAF(_##name, 0) ;\ - movl $ SYSCALL_CONSTRUCT_UNIX(SYS_##name), %eax ;\ - UNIX_SYSCALL_SYSCALL ;\ - jnb 2f ;\ - BRANCH_EXTERN(cerror) ;\ +#define UNIX_SYSCALL(name, nargs) \ + .globl cerror ;\ +LEAF(_##name, 0) ;\ + movl $ SYSCALL_CONSTRUCT_UNIX(SYS_##name), %eax ;\ + UNIX_SYSCALL_SYSCALL ;\ + jnb 2f ;\ + movq %rax, %rdi ;\ + BRANCH_EXTERN(_cerror) ;\ 2: -#define UNIX_SYSCALL_NONAME(name, nargs, cerror) \ - .globl cerror ;\ - movl $ SYSCALL_CONSTRUCT_UNIX(SYS_##name), %eax ;\ - UNIX_SYSCALL_SYSCALL ;\ - jnb 2f ;\ - BRANCH_EXTERN(cerror) ;\ +#define UNIX_SYSCALL_NONAME(name, nargs, cerror) \ + .globl cerror ;\ + movl $ SYSCALL_CONSTRUCT_UNIX(SYS_##name), %eax ;\ + UNIX_SYSCALL_SYSCALL ;\ + jnb 2f ;\ + movq %rax, %rdi ;\ + BRANCH_EXTERN(_##cerror) ;\ 2: #define PSEUDO(pseudo, name, nargs, cerror) \ diff --git a/libsyscall/custom/__fork.s b/libsyscall/custom/__fork.s index 2de3a9a28..5857ab2ad 100644 --- a/libsyscall/custom/__fork.s +++ b/libsyscall/custom/__fork.s @@ -55,7 +55,7 @@ LEAF(___fork, 0) UNIX_SYSCALL_TRAP // do the system call jnc L1 // jump if CF==0 - CALL_EXTERN(cerror) + CALL_EXTERN(tramp_cerror) movl $-1,%eax addl $28, %esp // restore the stack ret @@ -81,7 +81,8 @@ LEAF(___fork, 0) UNIX_SYSCALL_TRAP // do the system call jnc L1 // jump if CF==0 - CALL_EXTERN(cerror) + movq %rax, %rdi + CALL_EXTERN(_cerror) movq $-1, %rax addq $24, %rsp // restore the stack ret diff --git a/libsyscall/custom/__syscall.s b/libsyscall/custom/__syscall.s index 73735bd4b..f00894425 100644 --- a/libsyscall/custom/__syscall.s +++ b/libsyscall/custom/__syscall.s @@ -39,7 +39,7 @@ LEAF(___syscall, 0) movl (%esp),%edx // add one element to stack so pushl %ecx // caller "pop" will work jnb 2f - BRANCH_EXTERN(cerror) + BRANCH_EXTERN(tramp_cerror) 2: END(___syscall) diff --git a/libsyscall/custom/__vfork.s b/libsyscall/custom/__vfork.s index 91408f9c3..8f5cd224d 100644 --- a/libsyscall/custom/__vfork.s +++ b/libsyscall/custom/__vfork.s @@ -72,7 +72,7 @@ LEAF(___vfork, 0) lock incl __current_pid pushl %ecx - BRANCH_EXTERN(cerror) + BRANCH_EXTERN(tramp_cerror) L1: testl %edx, %edx // CF=OF=0, ZF set if zero result @@ -108,13 +108,13 @@ LEAF(___vfork, 0) popq %rdi // return address in %rdi movq $ SYSCALL_CONSTRUCT_UNIX(SYS_vfork), %rax // code for vfork -> rax UNIX_SYSCALL_TRAP // do the system call - jnb L1 // jump if CF==0 + jnb L1 // jump if CF==0 pushq %rdi // put return address back on stack for cerror movq __current_pid@GOTPCREL(%rip), %rcx lock addq $1, (%rcx) - movq (%rcx), %rdi - BRANCH_EXTERN(cerror) + movq %rax, %rdi + BRANCH_EXTERN(_cerror) L1: testl %edx, %edx // CF=OF=0, ZF set if zero result diff --git a/libsyscall/custom/custom.s b/libsyscall/custom/custom.s index a6a4f8bb8..b76b96fe7 100644 --- a/libsyscall/custom/custom.s +++ b/libsyscall/custom/custom.s @@ -32,24 +32,29 @@ #if defined(__i386__) - .globl _errno +/* + * i386 needs custom assembly to transform the return from syscalls + * into a proper stack for a function call out to cerror{,_nocancel}. + */ + +LABEL(tramp_cerror) + mov %esp, %edx + andl $0xfffffff0, %esp + subl $16, %esp + movl %edx, 4(%esp) + movl %eax, (%esp) + CALL_EXTERN(_cerror) + movl 4(%esp), %esp + ret -LABEL(cerror) - movl $0,%ecx - jmp 1f -LABEL(cerror_nocancel) - movl $1,%ecx -1: REG_TO_EXTERN(%eax, _errno) - mov %esp,%edx - andl $0xfffffff0,%esp - subl $16,%esp - movl %edx,8(%esp) - movl %ecx,4(%esp) - movl %eax,(%esp) - CALL_EXTERN(_cthread_set_errno_self) - movl 8(%esp),%esp - movl $-1,%eax - movl $-1,%edx /* in case a 64-bit value is returned */ +LABEL(tramp_cerror_nocancel) + mov %esp, %edx + andl $0xfffffff0, %esp + subl $16, %esp + movl %edx, 4(%esp) + movl %eax, (%esp) + CALL_EXTERN(_cerror_nocancel) + movl 4(%esp), %esp ret LABEL(__sysenter_trap) @@ -57,45 +62,35 @@ LABEL(__sysenter_trap) movl %esp, %ecx sysenter -#elif defined(__x86_64__) - - .globl _errno + .globl _i386_get_ldt + ALIGN +_i386_get_ldt: + movl $6,%eax + MACHDEP_SYSCALL_TRAP + jnb 2f + jmp tramp_cerror +2: ret -LABEL(cerror) - /* cancelable syscall, for arg1 to _cthread_set_errno_self */ - movq $0,%rsi - jmp 1f -LABEL(cerror_nocancel) - /* non-cancelable, see above. */ - movq $1,%rsi -1: PICIFY(_errno) /* address -> %r11 */ - movl %eax,(%r11) - mov %rsp,%rdx - andq $-16,%rsp - subq $16,%rsp - // Preserve the original stack - movq %rdx,(%rsp) - movq %rax,%rdi - CALL_EXTERN(_cthread_set_errno_self) - // Restore the original stack - movq (%rsp),%rsp - movq $-1,%rax - movq $-1,%rdx /* in case a 128-bit value is returned */ - ret -#else -#error Unsupported architecture -#endif + .globl _i386_set_ldt + ALIGN +_i386_set_ldt: + movl $5,%eax + MACHDEP_SYSCALL_TRAP + jnb 2f + jmp tramp_cerror +2: ret -#if defined(__i386__) || defined(__x86_64__) +#elif defined(__x86_64__) .globl _i386_get_ldt ALIGN _i386_get_ldt: movl $6,%eax MACHDEP_SYSCALL_TRAP - jnb 2f - jmp cerror + jnb 2f + movq %rax, %rdi + jmp _cerror 2: ret @@ -104,8 +99,9 @@ _i386_get_ldt: _i386_set_ldt: movl $5,%eax MACHDEP_SYSCALL_TRAP - jnb 2f - jmp cerror + jnb 2f + movq %rax, %rdi + jmp _cerror 2: ret #endif diff --git a/libsyscall/custom/dummy.c b/libsyscall/custom/dummy.c new file mode 100644 index 000000000..62131d9b9 --- /dev/null +++ b/libsyscall/custom/dummy.c @@ -0,0 +1 @@ +/* Empty source file for Libsyscall_headers_Sim */ diff --git a/libsyscall/custom/errno.c b/libsyscall/custom/errno.c index 58da2c114..640ed14fd 100644 --- a/libsyscall/custom/errno.c +++ b/libsyscall/custom/errno.c @@ -26,4 +26,73 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ +#include +#include +#include +#include "tsd.h" + +/* + * cerror takes the return value of the syscall, being non-zero, and + * stores it in errno. It needs to return -1 to indicate failure but + * 64-bit platforms need to ensure that possible 128-bit wide return + * values are also properly set. + */ +#ifdef __LP64__ +typedef unsigned __int128 cerror_return_t; +#else +typedef uint64_t cerror_return_t; +#endif + +extern void _pthread_exit_if_canceled(int error); + +#undef errno int errno; + +int * +__error(void) +{ + void *ptr = _os_tsd_get_direct(__TSD_ERRNO); + if (ptr != NULL) { + return (int*)ptr; + } + return &errno; +} + +__attribute__((noinline)) +cerror_return_t +cerror_nocancel(int err) +{ + errno = err; + int *tsderrno = (int*)_os_tsd_get_direct(__TSD_ERRNO); + if (tsderrno) { + *tsderrno = err; + } + return -1; +} + +__attribute__((noinline)) +cerror_return_t +cerror(int err) +{ + _pthread_exit_if_canceled(err); + return cerror_nocancel(err); +} + +#if !TARGET_OS_EMBEDDED + +// Internal symbol no longer used by anybody in Libsystem but required for +// backwards compatibility with 3rd parties + +void +cthread_set_errno_self(int err, int nocancel) +{ + asm(".global $ld$hide$os10.9$_cthread_set_errno_self\n\t" + ".set $ld$hide$os10.9$_cthread_set_errno_self, _cthread_set_errno_self"); + if (nocancel) { + cerror_nocancel(err); + } else { + cerror(err); + } +} + +#endif diff --git a/libsyscall/mach/abort.h b/libsyscall/mach/abort.h index 7b99c1cf0..5954a2187 100644 --- a/libsyscall/mach/abort.h +++ b/libsyscall/mach/abort.h @@ -30,7 +30,12 @@ #define __SIGABRT 6 #define __STDERR_FILENO 2 -int __getpid(void); -int __kill(int pid, int signum, int posix); +extern int __getpid(void); +extern int __kill(int pid, int signum, int posix); +extern int __exit(int) __attribute__((noreturn)); -#define abort() __kill(__getpid(), __SIGABRT, 0) +static inline void __attribute__((noreturn)) +abort(void) { + (void)__kill(__getpid(), __SIGABRT, 0); + __exit(1); +} diff --git a/libsyscall/mach/clock_sleep.c b/libsyscall/mach/clock_sleep.c index dbcca39d2..8cf83d094 100644 --- a/libsyscall/mach/clock_sleep.c +++ b/libsyscall/mach/clock_sleep.c @@ -25,6 +25,7 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ +#include #include #include #include diff --git a/libsyscall/mach/err_iokit.sub b/libsyscall/mach/err_iokit.sub old mode 100755 new mode 100644 diff --git a/libsyscall/mach/exc_catcher.h b/libsyscall/mach/exc_catcher.h index 28aac2508..a7db99753 100644 --- a/libsyscall/mach/exc_catcher.h +++ b/libsyscall/mach/exc_catcher.h @@ -32,31 +32,31 @@ #include "_libkernel_init.h" typedef kern_return_t (*_libkernel_exc_raise_func_t)(mach_port_t, - mach_port_t, - mach_port_t, - exception_type_t, - exception_data_t, - mach_msg_type_number_t); + mach_port_t, + mach_port_t, + exception_type_t, + exception_data_t, + mach_msg_type_number_t); typedef kern_return_t (*_libkernel_exc_raise_state_func_t)(mach_port_t, - exception_type_t, - exception_data_t, - mach_msg_type_number_t, - int *, - thread_state_t, - mach_msg_type_number_t, - thread_state_t, - mach_msg_type_number_t *); + exception_type_t, + exception_data_t, + mach_msg_type_number_t, + int *, + thread_state_t, + mach_msg_type_number_t, + thread_state_t, + mach_msg_type_number_t *); typedef kern_return_t (*_libkernel_exec_raise_state_identity_t)(mach_port_t, - mach_port_t, mach_port_t, - exception_type_t, - exception_data_t, - mach_msg_type_number_t, - int *, thread_state_t, - mach_msg_type_number_t, - thread_state_t, - mach_msg_type_number_t *); + mach_port_t, mach_port_t, + exception_type_t, + exception_data_t, + mach_msg_type_number_t, + int *, thread_state_t, + mach_msg_type_number_t, + thread_state_t, + mach_msg_type_number_t *); #define RTLD_DEFAULT ((void *) -2) extern void* (*_dlsym)(void*, const char*); diff --git a/libsyscall/mach/mach/mach.h b/libsyscall/mach/mach/mach.h index 0b3b1a0a5..a94230e0b 100644 --- a/libsyscall/mach/mach/mach.h +++ b/libsyscall/mach/mach/mach.h @@ -116,6 +116,12 @@ extern mach_msg_return_t mach_msg_server(boolean_t (*) mach_port_t, mach_msg_options_t); +extern mach_msg_return_t mach_msg_server_importance(boolean_t (*) + (mach_msg_header_t *, + mach_msg_header_t *), + mach_msg_size_t, + mach_port_t, + mach_msg_options_t); /* * Prototypes for compatibility */ diff --git a/bsd/netat/ep.h b/libsyscall/mach/mach/vm_page_size.h similarity index 67% rename from bsd/netat/ep.h rename to libsyscall/mach/mach/vm_page_size.h index 5ab512f3c..4d8f1c0b0 100644 --- a/bsd/netat/ep.h +++ b/libsyscall/mach/mach/vm_page_size.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,29 +22,25 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* - * ORIGINS: 82 - * - * (C) COPYRIGHT Apple Computer, Inc. 1992-1996 - * All Rights Reserved - * - */ -#ifndef _NETAT_EP_H_ -#define _NETAT_EP_H_ -#include +#ifndef _VM_PAGE_SIZE_H_ +#define _VM_PAGE_SIZE_H_ + +#include +#include -#ifdef __APPLE_API_OBSOLETE +__BEGIN_DECLS -#define EP_REQUEST 1 /* Echo request packet */ -#define EP_REPLY 2 /* Echo reply packet */ +extern vm_size_t vm_kernel_page_size __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0); +extern vm_size_t vm_kernel_page_mask __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0); +extern int vm_kernel_page_shift __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0); -/* Misc. definitions */ +#define trunc_page_kernel(x) ((x) & (~vm_kernel_page_mask)) +#define round_page_kernel(x) trunc_kernel_page((x) + vm_kernel_page_mask) -#define EP_DATA_SIZE 585 /* Maximum size of EP data */ +__END_DECLS -#endif /* __APPLE_API_OBSOLETE */ -#endif /* _NETAT_EP_H_ */ +#endif diff --git a/libsyscall/mach/mach_init.c b/libsyscall/mach/mach_init.c index c2702539e..19e87120e 100644 --- a/libsyscall/mach/mach_init.c +++ b/libsyscall/mach/mach_init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2010 Apple Inc. All rights reserved. + * Copyright (c) 1999-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -60,24 +60,29 @@ #include #include #include "externs.h" -#include "mig_reply_port.h" -mach_port_t mach_task_self_ = MACH_PORT_NULL; +mach_port_t bootstrap_port = MACH_PORT_NULL; +mach_port_t mach_task_self_ = MACH_PORT_NULL; #ifdef __i386__ -mach_port_t mach_host_self_ = MACH_PORT_NULL; +mach_port_t mach_host_self_ = MACH_PORT_NULL; #endif +extern mach_port_t _task_reply_port; -vm_size_t vm_page_size = PAGE_SIZE; -vm_size_t vm_page_mask = PAGE_MASK; -int vm_page_shift = PAGE_SHIFT; +vm_size_t vm_kernel_page_size = KERNEL_PAGE_SIZE; +vm_size_t vm_kernel_page_mask = KERNEL_PAGE_MASK; +int vm_kernel_page_shift = KERNEL_PAGE_SHIFT; + +vm_size_t vm_page_size = PAGE_SIZE; +vm_size_t vm_page_mask = PAGE_MASK; +int vm_page_shift = PAGE_SHIFT; int mach_init(void); int _mach_fork_child(void); -static int mach_init_doit(bool forkchild); +static void mach_init_doit(void); extern void _pthread_set_self(void *); -extern void cthread_set_self(void *); +extern void _init_cpu_capabilities(void); kern_return_t host_page_size(__unused host_t host, vm_size_t *out_page_size) @@ -94,72 +99,28 @@ int mach_init(void) { static bool mach_init_inited = false; - - if (mach_init_inited) { - return 0; + if (!mach_init_inited) { + mach_init_doit(); + mach_init_inited = true; } - mach_init_inited = true; - - return mach_init_doit(false); + return 0; } // called by libSystem_atfork_child() int _mach_fork_child(void) { - return mach_init_doit(true); + mach_init_doit(); + return 0; } -int -mach_init_doit(bool forkchild) +void +mach_init_doit(void) { - /* - * Get the important ports into the cached values, - * as required by "mach_init.h". - */ + // Initialize cached mach ports defined in mach_init.h mach_task_self_ = task_self_trap(); - - /* - * Initialize the single mig reply port - */ + _task_reply_port = mach_reply_port(); + _init_cpu_capabilities(); _pthread_set_self(0); - _mig_init(0); - -#if WE_REALLY_NEED_THIS_GDB_HACK - /* - * Check to see if GDB wants us to stop - */ - { - task_user_data_data_t user_data; - mach_msg_type_number_t user_data_count = TASK_USER_DATA_COUNT; - - user_data.user_data = 0; - (void)task_info(mach_task_self_, TASK_USER_DATA, - (task_info_t)&user_data, &user_data_count); -#define MACH_GDB_RUN_MAGIC_NUMBER 1 -#ifdef MACH_GDB_RUN_MAGIC_NUMBER - /* This magic number is set in mach-aware gdb - * for RUN command to allow us to suspend user's - * executable (linked with this libmach!) - * with the code below. - * This hack should disappear when gdb improves. - */ - if ((int)user_data.user_data == MACH_GDB_RUN_MAGIC_NUMBER) { - kern_return_t ret; - user_data.user_data = 0; - - ret = task_suspend(mach_task_self_); - if (ret != KERN_SUCCESS) { - while (1) { - (void)task_terminate(mach_task_self_); - } - } - } -#undef MACH_GDB_RUN_MAGIC_NUMBER -#endif /* MACH_GDB_RUN_MAGIC_NUMBER */ - } -#endif /* WE_REALLY_NEED_THIS_GDB_HACK */ - - return 0; } diff --git a/libsyscall/mach/mach_msg.c b/libsyscall/mach/mach_msg.c index 87f6cb573..415d6a70a 100644 --- a/libsyscall/mach/mach_msg.c +++ b/libsyscall/mach/mach_msg.c @@ -58,6 +58,10 @@ #include #include #include +#include + +extern int proc_importance_assertion_begin_with_msg(mach_msg_header_t * msg, mach_msg_trailer_t * trailer, uint64_t * assertion_handlep); +extern int proc_importance_assertion_complete(uint64_t assertion_handle); #define MACH_MSG_TRAP(msg, opt, ssize, rsize, rname, to, not) \ mach_msg_trap((msg), (opt), (ssize), (rsize), (rname), (to), (not)) @@ -632,3 +636,142 @@ mach_msg_server( reply_alloc); return mr; } + +/* + * Routine: mach_msg_server_importance + * Purpose: + * A simple generic server function which handles importance + * promotion assertions for adaptive daemons. + */ +mach_msg_return_t +mach_msg_server_importance( + boolean_t (*demux)(mach_msg_header_t *, mach_msg_header_t *), + mach_msg_size_t max_size, + mach_port_t rcv_name, + mach_msg_options_t options) +{ + mig_reply_error_t *bufRequest, *bufReply; + mach_msg_size_t request_size; + mach_msg_size_t new_request_alloc; + mach_msg_size_t request_alloc; + mach_msg_size_t trailer_alloc; + mach_msg_size_t reply_alloc; + mach_msg_return_t mr; + kern_return_t kr; + mach_port_t self = mach_task_self_; + int retval = 1; + uint64_t token; + + options &= ~(MACH_SEND_MSG|MACH_RCV_MSG|MACH_RCV_OVERWRITE); + + reply_alloc = round_page((options & MACH_SEND_TRAILER) ? + (max_size + MAX_TRAILER_SIZE) : max_size); + + kr = vm_allocate(self, + (vm_address_t *)&bufReply, + reply_alloc, + VM_MAKE_TAG(VM_MEMORY_MACH_MSG)|TRUE); + if (kr != KERN_SUCCESS) + return kr; + + request_alloc = 0; + trailer_alloc = REQUESTED_TRAILER_SIZE(options); + new_request_alloc = round_page(max_size + trailer_alloc); + + request_size = (options & MACH_RCV_LARGE) ? + new_request_alloc : max_size + trailer_alloc; + + for (;;) { + if (request_alloc < new_request_alloc) { + request_alloc = new_request_alloc; + kr = vm_allocate(self, + (vm_address_t *)&bufRequest, + request_alloc, + VM_MAKE_TAG(VM_MEMORY_MACH_MSG)|TRUE); + if (kr != KERN_SUCCESS) { + vm_deallocate(self, + (vm_address_t)bufReply, + reply_alloc); + return kr; + } + } + + mr = mach_msg(&bufRequest->Head, MACH_RCV_MSG|options, + 0, request_size, rcv_name, + MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); + + if (mr == MACH_MSG_SUCCESS) { + /* we have another request message */ + + retval = proc_importance_assertion_begin_with_msg(&bufRequest->Head, NULL, &token); + (void) (*demux)(&bufRequest->Head, &bufReply->Head); + + if (!(bufReply->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX)) { + if (bufReply->RetCode == MIG_NO_REPLY) + bufReply->Head.msgh_remote_port = MACH_PORT_NULL; + else if ((bufReply->RetCode != KERN_SUCCESS) && + (bufRequest->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX)) { + /* destroy the request - but not the reply port */ + bufRequest->Head.msgh_remote_port = MACH_PORT_NULL; + mach_msg_destroy(&bufRequest->Head); + } + } + + /* + * We don't want to block indefinitely because the client + * isn't receiving messages from the reply port. + * If we have a send-once right for the reply port, then + * this isn't a concern because the send won't block. + * If we have a send right, we need to use MACH_SEND_TIMEOUT. + * To avoid falling off the kernel's fast RPC path, + * we only supply MACH_SEND_TIMEOUT when absolutely necessary. + */ + if (bufReply->Head.msgh_remote_port != MACH_PORT_NULL) { + + mr = mach_msg( + &bufReply->Head, + (MACH_MSGH_BITS_REMOTE(bufReply->Head.msgh_bits) == + MACH_MSG_TYPE_MOVE_SEND_ONCE) ? + MACH_SEND_MSG|options : + MACH_SEND_MSG|MACH_SEND_TIMEOUT|options, + bufReply->Head.msgh_size, 0, MACH_PORT_NULL, + MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); + + if ((mr != MACH_SEND_INVALID_DEST) && + (mr != MACH_SEND_TIMED_OUT)) { + if (retval == 0) + proc_importance_assertion_complete(token); + continue; + } + mr = MACH_MSG_SUCCESS; + } + if (bufReply->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX) + mach_msg_destroy(&bufReply->Head); + if (retval == 0) + proc_importance_assertion_complete(token); + + } /* if (mr == MACH_MSG_SUCCESS) */ + + if ((mr == MACH_RCV_TOO_LARGE) && (options & MACH_RCV_LARGE)) { + new_request_alloc = round_page(bufRequest->Head.msgh_size + + trailer_alloc); + request_size = new_request_alloc; + vm_deallocate(self, + (vm_address_t) bufRequest, + request_alloc); + continue; + } else if (mr == MACH_MSG_SUCCESS) + continue; + else + break; + + } /* for(;;) */ + + (void)vm_deallocate(self, + (vm_address_t) bufRequest, + request_alloc); + (void)vm_deallocate(self, + (vm_address_t) bufReply, + reply_alloc); + return mr; +} diff --git a/libsyscall/mach/mach_port.c b/libsyscall/mach/mach_port.c index 954d45e56..2aadae90d 100644 --- a/libsyscall/mach/mach_port.c +++ b/libsyscall/mach/mach_port.c @@ -163,6 +163,26 @@ mach_port_mod_refs( return (rv); } +kern_return_t +mach_port_peek( + ipc_space_t task, + mach_port_name_t name, + mach_msg_trailer_type_t trailer_type, + mach_port_seqno_t *seqnop, + mach_msg_size_t *msg_sizep, + mach_msg_id_t *msg_idp, + mach_msg_trailer_info_t trailer_infop, + mach_msg_type_number_t *trailer_sizep) +{ + kern_return_t rv; + + rv = _kernelrpc_mach_port_peek(task, name, trailer_type, + seqnop, msg_sizep, msg_idp, + trailer_infop, trailer_sizep); + + return (rv); +} + kern_return_t mach_port_set_mscount( ipc_space_t task, @@ -481,3 +501,74 @@ mach_port_kobject( return (rv); } + +kern_return_t +mach_port_construct( + ipc_space_t task, + mach_port_options_t *options, + mach_port_context_t context, + mach_port_name_t *name) +{ + kern_return_t rv; + + rv = _kernelrpc_mach_port_construct_trap(task, options, (uint64_t) context, name); + + if (rv == MACH_SEND_INVALID_DEST) + rv = _kernelrpc_mach_port_construct(task, options, (uint64_t) context, name); + + return (rv); +} + +kern_return_t +mach_port_destruct( + ipc_space_t task, + mach_port_name_t name, + mach_port_delta_t srdelta, + mach_port_context_t guard) +{ + kern_return_t rv; + + rv = _kernelrpc_mach_port_destruct_trap(task, name, srdelta, (uint64_t) guard); + + if (rv == MACH_SEND_INVALID_DEST) + rv = _kernelrpc_mach_port_destruct(task, name, srdelta, (uint64_t) guard); + + return (rv); + +} + +kern_return_t +mach_port_guard( + ipc_space_t task, + mach_port_name_t name, + mach_port_context_t guard, + boolean_t strict) +{ + kern_return_t rv; + + rv = _kernelrpc_mach_port_guard_trap(task, name, (uint64_t) guard, strict); + + if (rv == MACH_SEND_INVALID_DEST) + rv = _kernelrpc_mach_port_guard(task, name, (uint64_t) guard, strict); + + return (rv); + +} + +kern_return_t +mach_port_unguard( + ipc_space_t task, + mach_port_name_t name, + mach_port_context_t guard) +{ + kern_return_t rv; + + rv = _kernelrpc_mach_port_unguard_trap(task, name, (uint64_t) guard); + + if (rv == MACH_SEND_INVALID_DEST) + rv = _kernelrpc_mach_port_unguard(task, name, (uint64_t) guard); + + return (rv); + +} + diff --git a/libsyscall/mach/mach_vm.c b/libsyscall/mach/mach_vm.c index 2db383021..1b6d7f98e 100644 --- a/libsyscall/mach/mach_vm.c +++ b/libsyscall/mach/mach_vm.c @@ -39,12 +39,16 @@ #undef _mach_vm_user_ #include +#include "stack_logging_internal.h" + +malloc_logger_t *__syscall_logger = NULL; // This may get set by Libc's malloc stack logging initialization code. + kern_return_t mach_vm_allocate( - mach_port_name_t target, - mach_vm_address_t *address, - mach_vm_size_t size, - int flags) + mach_port_name_t target, + mach_vm_address_t *address, + mach_vm_size_t size, + int flags) { kern_return_t rv; @@ -53,6 +57,11 @@ mach_vm_allocate( if (rv == MACH_SEND_INVALID_DEST) rv = _kernelrpc_mach_vm_allocate(target, address, size, flags); + if (__syscall_logger) { + int userTagFlags = flags & VM_FLAGS_ALIAS_MASK; + __syscall_logger(stack_logging_type_vm_allocate | userTagFlags, (uintptr_t)target, (uintptr_t)size, 0, (uintptr_t)*address, 0); + } + return (rv); } @@ -69,6 +78,10 @@ mach_vm_deallocate( if (rv == MACH_SEND_INVALID_DEST) rv = _kernelrpc_mach_vm_deallocate(target, address, size); + if (__syscall_logger) { + __syscall_logger(stack_logging_type_vm_deallocate, (uintptr_t)target, (uintptr_t)address, size, 0, 0); + } + return (rv); } @@ -140,3 +153,167 @@ vm_protect( return (rv); } + +kern_return_t +mach_vm_map( + mach_port_name_t target, + mach_vm_address_t *address, + mach_vm_size_t size, + mach_vm_offset_t mask, + int flags, + mem_entry_name_port_t object, + memory_object_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) +{ + kern_return_t rv = MACH_SEND_INVALID_DEST; + + if (object == MEMORY_OBJECT_NULL && max_protection == VM_PROT_ALL && + inheritance == VM_INHERIT_DEFAULT) + rv = _kernelrpc_mach_vm_map_trap(target, address, size, mask, flags, + cur_protection); + + if (rv == MACH_SEND_INVALID_DEST) + rv = _kernelrpc_mach_vm_map(target, address, size, mask, flags, object, + offset, copy, cur_protection, max_protection, inheritance); + + if (__syscall_logger) { + int eventTypeFlags = stack_logging_type_vm_allocate | stack_logging_type_mapped_file_or_shared_mem; + int userTagFlags = flags & VM_FLAGS_ALIAS_MASK; + __syscall_logger(eventTypeFlags | userTagFlags, (uintptr_t)target, (uintptr_t)size, 0, (uintptr_t)*address, 0); + } + + return (rv); +} + +kern_return_t +mach_vm_remap( + mach_port_name_t target, + mach_vm_address_t *address, + mach_vm_size_t size, + mach_vm_offset_t mask, + int flags, + mach_port_name_t src_task, + mach_vm_address_t src_address, + boolean_t copy, + vm_prot_t *cur_protection, + vm_prot_t *max_protection, + vm_inherit_t inheritance) +{ + kern_return_t rv; + + rv = _kernelrpc_mach_vm_remap(target, address, size, mask, flags, + src_task, src_address, copy, cur_protection, max_protection, + inheritance); + + if (__syscall_logger) { + int eventTypeFlags = stack_logging_type_vm_allocate | stack_logging_type_mapped_file_or_shared_mem; + int userTagFlags = flags & VM_FLAGS_ALIAS_MASK; + __syscall_logger(eventTypeFlags | userTagFlags, (uintptr_t)target, (uintptr_t)size, 0, (uintptr_t)*address, 0); + } + + return (rv); +} + +kern_return_t +mach_vm_read( + mach_port_name_t target, + mach_vm_address_t address, + mach_vm_size_t size, + vm_offset_t *data, + mach_msg_type_number_t *dataCnt) +{ + kern_return_t rv; + + rv = _kernelrpc_mach_vm_read(target, address, size, data, dataCnt); + + if (__syscall_logger) { + int eventTypeFlags = stack_logging_type_vm_allocate | stack_logging_type_mapped_file_or_shared_mem; + // The target argument is the remote task from which data is being read, + // so pass mach_task_self() as the destination task receiving the allocation. + __syscall_logger(eventTypeFlags, (uintptr_t)mach_task_self(), (uintptr_t)*dataCnt, 0, *data, 0); + } + + return (rv); +} + +kern_return_t +vm_map( + mach_port_name_t target, + vm_address_t *address, + vm_size_t size, + vm_offset_t mask, + int flags, + mem_entry_name_port_t object, + vm_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) +{ + kern_return_t rv; + + rv = _kernelrpc_vm_map(target, address, size, mask, flags, object, + offset, copy, cur_protection, max_protection, inheritance); + + if (__syscall_logger) { + int eventTypeFlags = stack_logging_type_vm_allocate | stack_logging_type_mapped_file_or_shared_mem; + int userTagFlags = flags & VM_FLAGS_ALIAS_MASK; + __syscall_logger(eventTypeFlags | userTagFlags, (uintptr_t)target, (uintptr_t)size, 0, (uintptr_t)*address, 0); + } + + return (rv); +} + +kern_return_t +vm_remap( + mach_port_name_t target, + vm_address_t *address, + vm_size_t size, + vm_offset_t mask, + int flags, + mach_port_name_t src_task, + vm_address_t src_address, + boolean_t copy, + vm_prot_t *cur_protection, + vm_prot_t *max_protection, + vm_inherit_t inheritance) +{ + kern_return_t rv; + + rv = _kernelrpc_vm_remap(target, address, size, mask, flags, + src_task, src_address, copy, cur_protection, max_protection, + inheritance); + + if (__syscall_logger) { + int eventTypeFlags = stack_logging_type_vm_allocate | stack_logging_type_mapped_file_or_shared_mem; + int userTagFlags = flags & VM_FLAGS_ALIAS_MASK; + __syscall_logger(eventTypeFlags | userTagFlags, (uintptr_t)target, (uintptr_t)size, 0, (uintptr_t)*address, 0); + } + + return (rv); +} + +kern_return_t +vm_read( + mach_port_name_t target, + vm_address_t address, + vm_size_t size, + vm_offset_t *data, + mach_msg_type_number_t *dataCnt) +{ + kern_return_t rv; + + rv = _kernelrpc_vm_read(target, address, size, data, dataCnt); + + if (__syscall_logger) { + int eventTypeFlags = stack_logging_type_vm_allocate | stack_logging_type_mapped_file_or_shared_mem; + // The target argument is the remote task from which data is being read, + // so pass mach_task_self() as the destination task receiving the allocation. + __syscall_logger(eventTypeFlags, (uintptr_t)mach_task_self(), (uintptr_t)*dataCnt, 0, *data, 0); + } + + return (rv); +} diff --git a/libsyscall/mach/mig_deallocate.c b/libsyscall/mach/mig_deallocate.c index 2b58e2c70..0f406a43d 100644 --- a/libsyscall/mach/mig_deallocate.c +++ b/libsyscall/mach/mig_deallocate.c @@ -59,7 +59,7 @@ void mig_deallocate(vm_address_t addr, vm_size_t size) { - (void) vm_deallocate(mach_task_self_, + (void)vm_deallocate(mach_task_self_, addr, size); } diff --git a/libsyscall/mach/mig_reply_port.c b/libsyscall/mach/mig_reply_port.c index aa2890ac6..934c1aa20 100644 --- a/libsyscall/mach/mig_reply_port.c +++ b/libsyscall/mach/mig_reply_port.c @@ -28,23 +28,21 @@ #include #include +#include +#include "tsd.h" -//extern mach_port_t _pthread_reply_port(pthread_t); -static mach_port_t _task_reply_port = MACH_PORT_NULL; +__XNU_PRIVATE_EXTERN mach_port_t _task_reply_port = MACH_PORT_NULL; -extern mach_port_t _mig_get_reply_port(void); -extern void _mig_set_reply_port(mach_port_t port); +static inline mach_port_t +_mig_get_reply_port() +{ + return _os_tsd_get_direct(__TSD_MIG_REPLY); +} -/* - * Called by mach_init with 0 before cthread_init is - * called and again with 1 at the end of cthread_init. - */ -void -_mig_init(int init_done) +static inline void +_mig_set_reply_port(mach_port_t port) { - if (init_done == 0) { - _task_reply_port = mach_reply_port(); - } + _os_tsd_set_direct(__TSD_MIG_REPLY, port); } /* @@ -56,7 +54,7 @@ _mig_init(int init_done) mach_port_t mig_get_reply_port(void) { - register mach_port_t port = _mig_get_reply_port(); + mach_port_t port = _mig_get_reply_port(); if (port == MACH_PORT_NULL) { port = mach_reply_port(); _mig_set_reply_port(port); @@ -71,9 +69,7 @@ mig_get_reply_port(void) void mig_dealloc_reply_port(mach_port_t migport) { - register mach_port_t port; - - port = _mig_get_reply_port(); + mach_port_t port = _mig_get_reply_port(); if (port != MACH_PORT_NULL && port != _task_reply_port) { _mig_set_reply_port(_task_reply_port); (void) mach_port_mod_refs(mach_task_self(), port, MACH_PORT_RIGHT_RECEIVE, -1); @@ -90,6 +86,6 @@ mig_dealloc_reply_port(mach_port_t migport) ***********************************************************/ void -mig_put_reply_port(mach_port_t reply_port) +mig_put_reply_port(mach_port_t reply_port __unused) { } diff --git a/libsyscall/mach/panic.c b/libsyscall/mach/panic.c index 3992225f1..dd7332742 100644 --- a/libsyscall/mach/panic.c +++ b/libsyscall/mach/panic.c @@ -61,7 +61,7 @@ #include "abort.h" #include "string.h" -int write(int fd, const char* cbuf, int nbyte); +extern int write(int fd, const char* cbuf, int nbyte); static mach_port_t master_host_port; diff --git a/libsyscall/mach/port_obj.c b/libsyscall/mach/port_obj.c index b23054a15..788adcee9 100644 --- a/libsyscall/mach/port_obj.c +++ b/libsyscall/mach/port_obj.c @@ -42,8 +42,8 @@ struct port_obj_tentry *port_obj_table; int port_obj_table_size = DEFAULT_TABLE_SIZE; -void port_obj_init( - int maxsize) +void +port_obj_init(int maxsize) { kern_return_t kr; diff --git a/libsyscall/mach/semaphore.c b/libsyscall/mach/semaphore.c index 5d7b45e11..26a88594b 100644 --- a/libsyscall/mach/semaphore.c +++ b/libsyscall/mach/semaphore.c @@ -33,51 +33,48 @@ #include #include -kern_return_t semaphore_signal( - mach_port_t signal_semaphore) +kern_return_t +semaphore_signal(mach_port_t signal_semaphore) { return semaphore_signal_trap(signal_semaphore); } -kern_return_t semaphore_signal_all( - mach_port_t signal_semaphore) +kern_return_t +semaphore_signal_all(mach_port_t signal_semaphore) { return semaphore_signal_all_trap(signal_semaphore); } -kern_return_t semaphore_signal_thread( - mach_port_t signal_semaphore, - mach_port_t thread_act) +kern_return_t +semaphore_signal_thread(mach_port_t signal_semaphore, mach_port_t thread_act) { return semaphore_signal_thread_trap(signal_semaphore, thread_act); } -kern_return_t semaphore_wait ( - mach_port_t wait_semaphore) +kern_return_t +semaphore_wait(mach_port_t wait_semaphore) { return semaphore_wait_trap(wait_semaphore); } -kern_return_t semaphore_timedwait ( - mach_port_t wait_semaphore, - mach_timespec_t wait_time) +kern_return_t +semaphore_timedwait(mach_port_t wait_semaphore, mach_timespec_t wait_time) { return semaphore_timedwait_trap(wait_semaphore, wait_time.tv_sec, wait_time.tv_nsec); } -kern_return_t semaphore_wait_signal ( - mach_port_t wait_semaphore, - mach_port_t signal_semaphore) +kern_return_t +semaphore_wait_signal(mach_port_t wait_semaphore, mach_port_t signal_semaphore) { return semaphore_wait_signal_trap(wait_semaphore, signal_semaphore); } -kern_return_t semaphore_timedwait_signal ( - mach_port_t wait_semaphore, - mach_port_t signal_semaphore, - mach_timespec_t wait_time) +kern_return_t +semaphore_timedwait_signal(mach_port_t wait_semaphore, + mach_port_t signal_semaphore, + mach_timespec_t wait_time) { return semaphore_timedwait_signal_trap(wait_semaphore, signal_semaphore, diff --git a/libsyscall/mach/slot_name.c b/libsyscall/mach/slot_name.c index fa733527c..180b97b45 100644 --- a/libsyscall/mach/slot_name.c +++ b/libsyscall/mach/slot_name.c @@ -46,23 +46,28 @@ #include #include -kern_return_t msg_rpc(void) { +kern_return_t +msg_rpc(void) { return KERN_FAILURE; } -kern_return_t msg_send(void) { +kern_return_t +msg_send(void) { return KERN_FAILURE; } -kern_return_t msg_receive(void) { +kern_return_t +msg_receive(void) { return KERN_FAILURE; } -mach_port_t task_self_(void) { +mach_port_t +task_self_(void) { return mach_task_self(); } -mach_port_t host_self(void) { +mach_port_t +host_self(void) { return mach_host_self(); } diff --git a/bsd/netat/adsp_InitGlobals.c b/libsyscall/mach/stack_logging_internal.h similarity index 56% rename from bsd/netat/adsp_InitGlobals.c rename to libsyscall/mach/stack_logging_internal.h index 8aeeac0c9..fdda28e0b 100644 --- a/bsd/netat/adsp_InitGlobals.c +++ b/libsyscall/mach/stack_logging_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,61 +25,21 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* InitGlobals.c - * - * From v01.08 06/06/90 mbs - * Modified for MP, 1996 by Tuyen Nguyen - * Modified, April 9, 1997 by Tuyen Nguyen for MacOSX. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * InitGlobals - * - * INPUTS: - * none - * OUTPUTS: - * none - */ -void InitGlobals() -{ +// These declarations must match those in Libc's stack_logging.h - adspGlobal.lastCID = (random() & 0xffff); - adspGlobal.inTimer = 0; - TimerTick(); /* start the ADSP timer */ +#include // to get VM_FLAGS_ALIAS_MASK -} +#define stack_logging_type_vm_allocate 16 // mach_vm_allocate, mmap, mach_vm_map, mach_vm_remap, etc +#define stack_logging_type_vm_deallocate 32 // mach_vm_deallocate or munmap +#define stack_logging_type_mapped_file_or_shared_mem 128 // a hint that the VM region *might* be a mapped file or shared memory +// For logging VM allocation and deallocation, arg1 here +// is the mach_port_name_t of the target task in which the +// alloc or dealloc is occurring. For example, for mmap() +// that would be mach_task_self(), but for a cross-task-capable +// call such as mach_vm_map(), it is the target task. -/* - * CleanupGlobals - * - * INPUTS: - * none - * OUTPUTS: - * none - */ +typedef void (malloc_logger_t)(uint32_t type, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, uintptr_t result, uint32_t num_hot_frames_to_skip); -void CleanupGlobals() -{ - TimerStop(); -} +extern malloc_logger_t *__syscall_logger; diff --git a/libsyscall/mach/string.h b/libsyscall/mach/string.h index 39a02753a..b3c00458e 100644 --- a/libsyscall/mach/string.h +++ b/libsyscall/mach/string.h @@ -31,20 +31,9 @@ #include #include <_types.h> - -#ifndef SIZE_T -#define SIZE_T -typedef __darwin_size_t size_t; -#endif - -#ifndef NULL -#define NULL __DARWIN_NULL -#endif - -#ifndef _UINTPTR_T -#define _UINTPTR_T -typedef unsigned long uintptr_t; -#endif /* _UINTPTR_T */ +#include +#include +#include // We're purposefully called "string.h" in order to superceed any use // of Libc's string.h (which no one should be using bar MIG) in order diff --git a/libsyscall/os/alloc_once.c b/libsyscall/os/alloc_once.c new file mode 100644 index 000000000..13632b5b9 --- /dev/null +++ b/libsyscall/os/alloc_once.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +// Keep in sync with libplatform alloc_once.c +#define OS_ALLOC_ONCE_KEY_MAX 100 + +struct _os_alloc_once_s { + long once; + void *ptr; +}; + +__attribute__((visibility("default"))) +extern struct _os_alloc_once_s _os_alloc_once_table[]; +struct _os_alloc_once_s _os_alloc_once_table[OS_ALLOC_ONCE_KEY_MAX]; diff --git a/libsyscall/os/tsd.h b/libsyscall/os/tsd.h new file mode 100644 index 000000000..279f65d59 --- /dev/null +++ b/libsyscall/os/tsd.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef OS_TSD_H +#define OS_TSD_H + +#include + +/* The low nine slots of the TSD are reserved for libsyscall usage. */ +#define __TSD_RESERVED_BASE 0 +#define __TSD_RESERVED_MAX 9 + +#define __TSD_THREAD_SELF 0 +#define __TSD_ERRNO 1 +#define __TSD_MIG_REPLY 2 +#define __TSD_SEMAPHORE_CACHE 9 + + +__attribute__((always_inline)) +static __inline__ unsigned int +_os_cpu_number(void) +{ + /* Not yet implemented */ + return 0; +} + +__attribute__((always_inline)) +static __inline__ void* +_os_tsd_get_direct(unsigned long slot) +{ + void *ret; +#if defined(__i386__) || defined(__x86_64__) + __asm__("mov %%gs:%1, %0" : "=r" (ret) : "m" (*(void **)(slot * sizeof(void *)))); +#endif + + + return ret; +} + +__attribute__((always_inline)) +static __inline__ int +_os_tsd_set_direct(unsigned long slot, void* val) +{ +#if defined(__i386__) && defined(__PIC__) + __asm__("movl %1, %%gs:%0" : "=m" (*(void **)(slot * sizeof(void *))) : "rn" (val)); +#elif defined(__i386__) && !defined(__PIC__) + __asm__("movl %1, %%gs:%0" : "=m" (*(void **)(slot * sizeof(void *))) : "ri" (val)); +#elif defined(__x86_64__) + __asm__("movq %1, %%gs:%0" : "=m" (*(void **)(slot * sizeof(void *))) : "rn" (val)); +#endif + + return 0; +} + +#endif diff --git a/libsyscall/wrappers/__commpage_gettimeofday.c b/libsyscall/wrappers/__commpage_gettimeofday.c new file mode 100644 index 000000000..0bc34b4ab --- /dev/null +++ b/libsyscall/wrappers/__commpage_gettimeofday.c @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2008 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + diff --git a/libsyscall/wrappers/__commpage_gettimeofday.s b/libsyscall/wrappers/__commpage_gettimeofday.s new file mode 100644 index 000000000..da920f28b --- /dev/null +++ b/libsyscall/wrappers/__commpage_gettimeofday.s @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include + +#define NSEC_PER_SEC 1000*1000*1000 +#define NSEC_PER_USEC 1000 + +#if defined(__i386__) + + .align 4 + .globl ___commpage_gettimeofday +___commpage_gettimeofday: + push %ebp + mov %esp,%ebp + push %esi + push %ebx +0: + movl _COMM_PAGE_GTOD_GENERATION,%esi /* get generation (0 if disabled) */ + testl %esi,%esi /* disabled? */ + jz 4f + + call _mach_absolute_time /* get nanotime in %edx:%eax */ + + sub _COMM_PAGE_GTOD_NS_BASE,%eax + sbb _COMM_PAGE_GTOD_NS_BASE+4,%edx + mov _COMM_PAGE_GTOD_SEC_BASE,%ebx /* load all the data before checking generation */ + mov $ NSEC_PER_SEC,%ecx + + cmpl _COMM_PAGE_GTOD_GENERATION,%esi /* has time data changed out from under us? */ + jne 0b + + div %ecx + add %eax,%ebx + + mov $ NSEC_PER_USEC,%ecx + mov %edx,%eax + xor %edx,%edx + div %ecx + + mov 8(%ebp),%ecx + mov %ebx,(%ecx) + mov %eax,4(%ecx) + xor %eax,%eax +3: + pop %ebx + pop %esi + pop %ebp + ret +4: /* fail */ + movl $1,%eax + jmp 3b + +#elif defined(__x86_64__) + + .align 4, 0x90 + .globl ___commpage_gettimeofday +___commpage_gettimeofday: +// %rdi = ptr to timeval + pushq %rbp // set up a frame for backtraces + pushq %r12 // push callee-saved registers we want to use + pushq %r13 + pushq %r14 + subq $8, %rsp + movq %rsp,%rbp + movq %rdi,%r12 // save ptr to timeval + movq $(_COMM_PAGE_TIME_DATA_START),%r13 +0: + movl _GTOD_GENERATION(%r13),%r14d // get generation (0 if disabled) + testl %r14d,%r14d // disabled? + jz 4f + + call _mach_absolute_time // get %rax <- nanotime() + + movl _GTOD_SEC_BASE(%r13),%r8d // get _COMM_PAGE_TIMESTAMP + subq _GTOD_NS_BASE(%r13),%rax // generate nanoseconds since timestamp + cmpl _GTOD_GENERATION(%r13),%r14d // has data changed out from under us? + jne 0b + + movl $ NSEC_PER_SEC,%ecx + movq %rax,%rdx + shrq $32,%rdx // get high half of delta in %edx + divl %ecx // %eax <- seconds since timestamp, %edx <- nanoseconds + addl %eax,%r8d // add seconds elapsed to timestamp seconds + + movl $ NSEC_PER_USEC,%ecx + movl %edx,%eax + xorl %edx,%edx + divl %ecx // divide residual ns by 1000 to get residual us in %eax + + movq %r8,(%r12) // store 64-bit seconds into timeval + movl %eax,8(%r12) // store 32-bit useconds into timeval + xorl %eax,%eax // return 0 for success +3: + addq $8, %rsp + popq %r14 + popq %r13 + popq %r12 + popq %rbp + ret +4: // fail + movl $1,%eax + jmp 3b + +#endif diff --git a/libsyscall/wrappers/_errno.h b/libsyscall/wrappers/_errno.h index 0c3c2da96..792e927c3 100644 --- a/libsyscall/wrappers/_errno.h +++ b/libsyscall/wrappers/_errno.h @@ -27,7 +27,3 @@ */ #include - -extern int* (*_libc_get_errno)(void); -#undef errno -#define errno (*_libc_get_errno()) diff --git a/libsyscall/wrappers/_libc_funcptr.c b/libsyscall/wrappers/_libc_funcptr.c index 60fd52142..8a2ba68c8 100644 --- a/libsyscall/wrappers/_libc_funcptr.c +++ b/libsyscall/wrappers/_libc_funcptr.c @@ -26,47 +26,47 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -#include -#include +#include "_libkernel_init.h" +extern _libkernel_functions_t _libkernel_functions; -extern void (*_libc_set_errno)(int); - -static mach_port_t (*_libc_get_reply_port)(void); -static void (*_libc_set_reply_port)(mach_port_t); +__attribute__((visibility("hidden"))) +void * +malloc(size_t size) +{ + return _libkernel_functions->malloc(size); +} -/* - * Called at Libsystem initialise time, sets up callbacks we - * need to get at thread variables inside of Libc - */ +__attribute__((visibility("hidden"))) void -_mig_reply_port_callbacks(mach_port_t (*get)(void), void (*set)(mach_port_t)) +free(void *ptr) { - _libc_get_reply_port = get; - _libc_set_reply_port = set; + return _libkernel_functions->free(ptr); } -mach_port_t _mig_get_reply_port(void) __attribute__((visibility("hidden"))); -mach_port_t -_mig_get_reply_port() +__attribute__((visibility("hidden"))) +void * +realloc(void *ptr, size_t size) { - return _libc_get_reply_port(); + return _libkernel_functions->realloc(ptr, size); } -void _mig_set_reply_port(mach_port_t port) __attribute__((visibility("hidden"))); -void -_mig_set_reply_port(mach_port_t port) +__attribute__((visibility("hidden"))) +void * +reallocf(void *ptr, size_t size) { - _libc_set_reply_port(port); + void *nptr = realloc(ptr, size); + if (!nptr && ptr) + free(ptr); + return (nptr); } -void cthread_set_errno_self(int errno) __attribute__((visibility("hidden"))); +__attribute__((visibility("hidden"))) void -cthread_set_errno_self(int errno) +_pthread_exit_if_canceled(int error) { - _libc_set_errno(errno); + return _libkernel_functions->_pthread_exit_if_canceled(error); } - -void _pthread_set_self(void* ptr) __attribute__((visibility("hidden"))); +__attribute__((visibility("hidden"))) void -_pthread_set_self(void* ptr) {} +_pthread_set_self(void *ptr __attribute__((__unused__))) {} diff --git a/libsyscall/wrappers/_libkernel_init.c b/libsyscall/wrappers/_libkernel_init.c index 274dbb917..16d7e1917 100644 --- a/libsyscall/wrappers/_libkernel_init.c +++ b/libsyscall/wrappers/_libkernel_init.c @@ -27,24 +27,24 @@ */ #include "_libkernel_init.h" -#include "mig_reply_port.h" -void (*_libc_set_errno)(int) __attribute__((visibility("hidden"))); -int* (*_libc_get_errno)(void) __attribute__((visibility("hidden"))); +extern int mach_init(void); /* dlsym() funcptr is for legacy support in exc_catcher */ void* (*_dlsym)(void*, const char*) __attribute__((visibility("hidden"))); +__attribute__((visibility("hidden"))) +_libkernel_functions_t _libkernel_functions; + void -_libkernel_init(_libkernel_functions_t fns) +__libkernel_init(_libkernel_functions_t fns, + const char *envp[] __attribute__((unused)), + const char *apple[] __attribute__((unused)), + const struct ProgramVars *vars __attribute__((unused))) { - /* libc */ - _libc_set_errno = fns.set_errno; - _libc_get_errno = fns.get_errno; - - /* mach */ - _mig_reply_port_callbacks(fns.get_reply_port, fns.set_reply_port); - - /* dlsym */ - _dlsym = fns.dlsym; + _libkernel_functions = fns; + if (fns->dlsym) { + _dlsym = fns->dlsym; + } + mach_init(); } diff --git a/libsyscall/wrappers/_libkernel_init.h b/libsyscall/wrappers/_libkernel_init.h index 609975abd..f5bef896f 100644 --- a/libsyscall/wrappers/_libkernel_init.h +++ b/libsyscall/wrappers/_libkernel_init.h @@ -29,26 +29,21 @@ #ifndef __LIBKERNEL_INIT_H #define __LIBKERNEL_INIT_H -#include +#include -typedef struct _libkernel_functions { - /* for mach dependencies on libc */ - mach_port_t (*get_reply_port)(void); - void (*set_reply_port)(mach_port_t); - - /* dlsym() for looking up catch_exception_raise */ +typedef const struct _libkernel_functions { + /* Structure version 1. Subsequent versions must only add pointers! */ + unsigned long version; void* (*dlsym)(void*, const char*); + void* (*malloc)(size_t); + void (*free)(void*); + void* (*realloc)(void*, size_t); + void (*_pthread_exit_if_canceled)(int); +} *_libkernel_functions_t; - /* placeholders for struct layout compatibility with Libsystem */ - void *_placeholder_1; - void *_placeholder_2; - - /* for setting errno in libc */ - void (*set_errno)(int); - int* (*get_errno)(void); +struct ProgramVars; /* forward reference */ -} _libkernel_functions_t; - -void _libkernel_init(_libkernel_functions_t fns); +void __libkernel_init(_libkernel_functions_t fns, const char *envp[], + const char *apple[], const struct ProgramVars *vars); #endif // __LIBKERNEL_INIT_H` diff --git a/libsyscall/wrappers/cancelable/fcntl-base.c b/libsyscall/wrappers/cancelable/fcntl-base.c index 589af9a69..7a9a6f970 100644 --- a/libsyscall/wrappers/cancelable/fcntl-base.c +++ b/libsyscall/wrappers/cancelable/fcntl-base.c @@ -42,6 +42,7 @@ fcntl(int fd, int cmd, ...) case F_GETLK: case F_SETLK: case F_SETLKW: + case F_SETLKWTIMEOUT: case F_PREALLOCATE: case F_SETSIZE: case F_RDADVISE: @@ -54,6 +55,7 @@ fcntl(int fd, int cmd, ...) case F_UNLINKFROM: case F_ADDSIGS: case F_ADDFILESIGS: + case F_FINDSIGS: arg = va_arg(ap, void *); break; default: diff --git a/libsyscall/wrappers/carbon_delete.c b/libsyscall/wrappers/carbon_delete.c new file mode 100644 index 000000000..8c2e49d68 --- /dev/null +++ b/libsyscall/wrappers/carbon_delete.c @@ -0,0 +1,34 @@ + +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +void __inc_remove_counter(void); +int __delete(const char *path); + +int +__carbon_delete(const char *path) +{ + int res = __delete(path); + if (res == 0) __inc_remove_counter(); + return res; +} diff --git a/libsyscall/wrappers/gethostuuid.c b/libsyscall/wrappers/gethostuuid.c new file mode 100644 index 000000000..691c4fa86 --- /dev/null +++ b/libsyscall/wrappers/gethostuuid.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include + +#include "gethostuuid_private.h" + +extern int __gethostuuid(uuid_t, const struct timespec *, int); + +static volatile int (*_gethostuuid_callback)(uuid_t) = (void *)0; + +int +gethostuuid(uuid_t uuid, const struct timespec *timeout) +{ + int result; + + result = __gethostuuid(uuid, timeout, 0); + if ((result == -1) && (errno == EPERM)) { + if (_gethostuuid_callback) { + result = _gethostuuid_callback(uuid); + } else { + /* no fallback, return -1/EPERM */ + memset(uuid, 0x00, sizeof(*uuid)); + } + } + + return result; +} + +/* SPI to call gethostuuid syscall directly, without fallback */ +int +_getprivatesystemidentifier(uuid_t uuid, const struct timespec *timeout) +{ + return __gethostuuid(uuid, timeout, 1); +} + +int +_register_gethostuuid_callback(int (*new_callback)(uuid_t)) +{ + + if (__sync_bool_compare_and_swap((void **)&_gethostuuid_callback, (void *)0, (void *)new_callback)) { + return 0; + } else { + return EINVAL; + } +} diff --git a/libsyscall/wrappers/gethostuuid.h b/libsyscall/wrappers/gethostuuid.h new file mode 100644 index 000000000..94808a7f7 --- /dev/null +++ b/libsyscall/wrappers/gethostuuid.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef __GETHOSTUUID_H +#define __GETHOSTUUID_H + +#include +#include +#include + +#if defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && (__IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_7_0) +int gethostuuid(uuid_t, const struct timespec *) __OSX_AVAILABLE_BUT_DEPRECATED_MSG(__MAC_NA, __MAC_NA, __IPHONE_2_0, __IPHONE_5_0, "gethostuuid() is no longer supported"); +#else +int gethostuuid(uuid_t, const struct timespec *) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_NA); +#endif + +#endif /* __GETHOSTUUID_H */ diff --git a/libsyscall/wrappers/gethostuuid_private.h b/libsyscall/wrappers/gethostuuid_private.h new file mode 100644 index 000000000..f8c6794ac --- /dev/null +++ b/libsyscall/wrappers/gethostuuid_private.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef __GETHOSTUUID_PRIVATE_H +#define __GETHOSTUUID_PRIVATE_H + +#include +#include +#include + +/* SPI prototype, TEMPORARY */ +int _getprivatesystemidentifier(uuid_t uuid, const struct timespec *timeout) __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0); + +/* internal prototype */ +int gethostuuid(uuid_t, const struct timespec *) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +/* Callback should return -1 and set errno on failure */ +int _register_gethostuuid_callback(int (*)(uuid_t)) __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0); + +#endif /* __GETHOSTUUID_PRIVATE_H */ diff --git a/libsyscall/wrappers/getiopolicy_np.c b/libsyscall/wrappers/getiopolicy_np.c new file mode 100644 index 000000000..146fdac6d --- /dev/null +++ b/libsyscall/wrappers/getiopolicy_np.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#include +#include + +extern int __iopolicysys(int, struct _iopol_param_t *); + +int +getiopolicy_np(int iotype, int scope) +{ + int policy, error; + struct _iopol_param_t iop_param; + + if (iotype != IOPOL_TYPE_DISK || + (scope != IOPOL_SCOPE_PROCESS && scope != IOPOL_SCOPE_THREAD)) { + errno = EINVAL; + policy = -1; + goto exit; + } + + iop_param.iop_scope = scope; + iop_param.iop_iotype = iotype; + error = __iopolicysys(IOPOL_CMD_GET, &iop_param); + if (error != 0) { + errno = error; + policy = -1; + goto exit; + } + + policy = iop_param.iop_policy; + + exit: + return policy; +} + +int +setiopolicy_np(int iotype, int scope, int policy) +{ + /* kernel validates the indiv values, no need to repeat it */ + struct _iopol_param_t iop_param; + + iop_param.iop_scope = scope; + iop_param.iop_iotype = iotype; + iop_param.iop_policy = policy; + + return( __iopolicysys(IOPOL_CMD_SET, &iop_param)); +} diff --git a/libsyscall/wrappers/guarded_open_np.c b/libsyscall/wrappers/guarded_open_np.c new file mode 100644 index 000000000..1322e0598 --- /dev/null +++ b/libsyscall/wrappers/guarded_open_np.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include +#include + +int __guarded_open_np(const char *path, + const guardid_t *guard, u_int guardflags, int flags, int mode); + +int +guarded_open_np(const char *path, + const guardid_t *guard, u_int guardflags, int flags, ...) +{ + int mode = 0; + + if (flags & O_CREAT) { + va_list ap; + va_start(ap, flags); + mode = va_arg(ap, int); + va_end(ap); + } + return (__guarded_open_np(path, guard, guardflags, flags, mode)); +} diff --git a/libsyscall/wrappers/legacy/munmap.c b/libsyscall/wrappers/legacy/munmap.c index 24b5b5eaa..3a69297eb 100644 --- a/libsyscall/wrappers/legacy/munmap.c +++ b/libsyscall/wrappers/legacy/munmap.c @@ -29,6 +29,8 @@ #include #include #include +#include +#include "stack_logging_internal.h" /* * Stub function to account for the differences in standard compliance @@ -59,7 +61,13 @@ munmap(void *addr, size_t len) offset = ((uintptr_t) addr) & PAGE_MASK; addr = (void *) (((uintptr_t) addr) & ~PAGE_MASK); len += offset; - return __munmap(addr, len); -} + int result = __munmap(addr, len); + + if (__syscall_logger) { + __syscall_logger(stack_logging_type_vm_deallocate, (uintptr_t)mach_task_self(), (uintptr_t)addr, len, 0, 0); + } + + return result; +} #endif /* NO_SYSCALL_LEGACY */ diff --git a/libsyscall/wrappers/libproc/libproc.c b/libsyscall/wrappers/libproc/libproc.c new file mode 100644 index 000000000..fa0b8c16e --- /dev/null +++ b/libsyscall/wrappers/libproc/libproc.c @@ -0,0 +1,814 @@ +/* + * Copyright (c) 2006, 2010 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#define BUILD_LIBSYSCALL 1 +#include +#include + +#include "libproc_internal.h" + +int __proc_info(int callnum, int pid, int flavor, uint64_t arg, void * buffer, int buffersize); +__private_extern__ int proc_setthreadname(void * buffer, int buffersize); +int __process_policy(int scope, int action, int policy, int policy_subtype, proc_policy_attribute_t * attrp, pid_t target_pid, uint64_t target_threadid); +int proc_rlimit_control(pid_t pid, int flavor, void *arg); + +int +proc_listpids(uint32_t type, uint32_t typeinfo, void *buffer, int buffersize) +{ + int retval; + + if ((type >= PROC_ALL_PIDS) || (type <= PROC_PPID_ONLY)) { + if ((retval = __proc_info(PROC_INFO_CALL_LISTPIDS, type, typeinfo,(uint64_t)0, buffer, buffersize)) == -1) + return(0); + } else { + errno = EINVAL; + retval = 0; + } + return(retval); +} + + +int +proc_listallpids(void * buffer, int buffersize) +{ + int numpids; + numpids = proc_listpids(PROC_ALL_PIDS, (uint32_t)0, buffer, buffersize); + + if (numpids == -1) + return(-1); + else + return(numpids/sizeof(int)); +} + +int +proc_listpgrppids(pid_t pgrpid, void * buffer, int buffersize) +{ + int numpids; + numpids = proc_listpids(PROC_PGRP_ONLY, (uint32_t)pgrpid, buffer, buffersize); + if (numpids == -1) + return(-1); + else + return(numpids/sizeof(int)); +} + +int +proc_listchildpids(pid_t ppid, void * buffer, int buffersize) +{ + int numpids; + numpids = proc_listpids(PROC_PPID_ONLY, (uint32_t)ppid, buffer, buffersize); + if (numpids == -1) + return(-1); + else + return(numpids/sizeof(int)); +} + + +int +proc_pidinfo(int pid, int flavor, uint64_t arg, void *buffer, int buffersize) +{ + int retval; + + if ((retval = __proc_info(PROC_INFO_CALL_PIDINFO, pid, flavor, arg, buffer, buffersize)) == -1) + return(0); + + return(retval); +} + +int +proc_pid_rusage(int pid, int flavor, rusage_info_t *buffer) +{ + return (__proc_info(PROC_INFO_CALL_PIDRUSAGE, pid, flavor, 0, buffer, 0)); +} + +int +proc_pidfdinfo(int pid, int fd, int flavor, void * buffer, int buffersize) +{ + int retval; + + if ((retval = __proc_info(PROC_INFO_CALL_PIDFDINFO, pid, flavor, (uint64_t)fd, buffer, buffersize)) == -1) + return(0); + + return (retval); +} + + +int +proc_pidfileportinfo(int pid, uint32_t fileport, int flavor, void *buffer, int buffersize) +{ + int retval; + + if ((retval = __proc_info(PROC_INFO_CALL_PIDFILEPORTINFO, pid, flavor, (uint64_t)fileport, buffer, buffersize)) == -1) + return (0); + return (retval); +} + + +int +proc_name(int pid, void * buffer, uint32_t buffersize) +{ + int retval = 0, len; + struct proc_bsdinfo pbsd; + + + if (buffersize < sizeof(pbsd.pbi_name)) { + errno = ENOMEM; + return(0); + } + + retval = proc_pidinfo(pid, PROC_PIDTBSDINFO, (uint64_t)0, &pbsd, sizeof(struct proc_bsdinfo)); + if (retval != 0) { + if (pbsd.pbi_name[0]) { + bcopy(&pbsd.pbi_name, buffer, sizeof(pbsd.pbi_name)); + } else { + bcopy(&pbsd.pbi_comm, buffer, sizeof(pbsd.pbi_comm)); + } + len = strlen(buffer); + return(len); + } + return(0); +} + +int +proc_regionfilename(int pid, uint64_t address, void * buffer, uint32_t buffersize) +{ + int retval = 0, len; + struct proc_regionwithpathinfo reginfo; + + if (buffersize < MAXPATHLEN) { + errno = ENOMEM; + return(0); + } + + retval = proc_pidinfo(pid, PROC_PIDREGIONPATHINFO, (uint64_t)address, ®info, sizeof(struct proc_regionwithpathinfo)); + if (retval != -1) { + len = strlen(®info.prp_vip.vip_path[0]); + if (len != 0) { + if (len > MAXPATHLEN) + len = MAXPATHLEN; + bcopy(®info.prp_vip.vip_path[0], buffer, len); + return(len); + } + return(0); + } + return(0); + +} + +int +proc_kmsgbuf(void * buffer, uint32_t buffersize) +{ + int retval; + + if ((retval = __proc_info(PROC_INFO_CALL_KERNMSGBUF, 0, 0, (uint64_t)0, buffer, buffersize)) == -1) + return(0); + return (retval); +} + +int +proc_pidpath(int pid, void * buffer, uint32_t buffersize) +{ + int retval, len; + + if (buffersize < PROC_PIDPATHINFO_SIZE) { + errno = ENOMEM; + return(0); + } + if (buffersize > PROC_PIDPATHINFO_MAXSIZE) { + errno = EOVERFLOW; + return(0); + } + + retval = __proc_info(PROC_INFO_CALL_PIDINFO, pid, PROC_PIDPATHINFO, (uint64_t)0, buffer, buffersize); + if (retval != -1) { + len = strlen(buffer); + return(len); + } + return (0); +} + + +int +proc_libversion(int *major, int * minor) +{ + + if (major != NULL) + *major = 1; + if (minor != NULL) + *minor = 1; + return(0); +} + +int +proc_setpcontrol(const int control) +{ + int retval ; + + if (control < PROC_SETPC_NONE || control > PROC_SETPC_TERMINATE) + return(EINVAL); + + if ((retval = __proc_info(PROC_INFO_CALL_SETCONTROL, getpid(), PROC_SELFSET_PCONTROL, (uint64_t)control, NULL, 0)) == -1) + return(errno); + + return(0); +} + + +__private_extern__ int +proc_setthreadname(void * buffer, int buffersize) +{ + int retval; + + retval = __proc_info(PROC_INFO_CALL_SETCONTROL, getpid(), PROC_SELFSET_THREADNAME, (uint64_t)0, buffer, buffersize); + + if (retval == -1) + return(errno); + else + return(0); +} + +int +proc_track_dirty(pid_t pid, uint32_t flags) +{ + if (__proc_info(PROC_INFO_CALL_DIRTYCONTROL, pid, PROC_DIRTYCONTROL_TRACK, flags, NULL, 0) == -1) { + return errno; + } + + return 0; +} + +int +proc_set_dirty(pid_t pid, bool dirty) +{ + if (__proc_info(PROC_INFO_CALL_DIRTYCONTROL, pid, PROC_DIRTYCONTROL_SET, dirty, NULL, 0) == -1) { + return errno; + } + + return 0; +} + +int +proc_get_dirty(pid_t pid, uint32_t *flags) +{ + int retval; + + if (!flags) { + return EINVAL; + } + + retval = __proc_info(PROC_INFO_CALL_DIRTYCONTROL, pid, PROC_DIRTYCONTROL_GET, 0, NULL, 0); + if (retval == -1) { + return errno; + } + + *flags = retval; + + return 0; +} + +int +proc_terminate(pid_t pid, int *sig) +{ + int retval; + + if (!sig) { + return EINVAL; + } + + retval = __proc_info(PROC_INFO_CALL_TERMINATE, pid, 0, 0, NULL, 0); + if (retval == -1) { + return errno; + } + + *sig = retval; + + return 0; +} + +int +proc_set_cpumon_params(pid_t pid, int percentage, int interval) +{ + proc_policy_cpuusage_attr_t attr; + + attr.ppattr_cpu_attr = PROC_POLICY_RSRCACT_NOTIFY_EXC; + attr.ppattr_cpu_percentage = percentage; + attr.ppattr_cpu_attr_interval = (uint64_t)interval; + attr.ppattr_cpu_attr_deadline = 0; + + return(__process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_SET, PROC_POLICY_RESOURCE_USAGE, + PROC_POLICY_RUSAGE_CPU, (proc_policy_attribute_t*)&attr, pid, 0)); +} + +int +proc_get_cpumon_params(pid_t pid, int *percentage, int *interval) +{ + proc_policy_cpuusage_attr_t attr; + int ret; + + ret = __process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_GET, PROC_POLICY_RESOURCE_USAGE, + PROC_POLICY_RUSAGE_CPU, (proc_policy_attribute_t*)&attr, pid, 0); + + if ((ret == 0) && (attr.ppattr_cpu_attr == PROC_POLICY_RSRCACT_NOTIFY_EXC)) { + *percentage = attr.ppattr_cpu_percentage; + *interval = attr.ppattr_cpu_attr_interval; + } else { + *percentage = 0; + *interval = 0; + } + + return (ret); +} + +int +proc_set_cpumon_defaults(pid_t pid) +{ + proc_policy_cpuusage_attr_t attr; + + attr.ppattr_cpu_attr = PROC_POLICY_RSRCACT_NOTIFY_EXC; + attr.ppattr_cpu_percentage = PROC_POLICY_CPUMON_DEFAULTS; + attr.ppattr_cpu_attr_interval = 0; + attr.ppattr_cpu_attr_deadline = 0; + + return(__process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_SET, PROC_POLICY_RESOURCE_USAGE, + PROC_POLICY_RUSAGE_CPU, (proc_policy_attribute_t*)&attr, pid, 0)); +} + +int +proc_disable_cpumon(pid_t pid) +{ + proc_policy_cpuusage_attr_t attr; + + attr.ppattr_cpu_attr = PROC_POLICY_RSRCACT_NOTIFY_EXC; + attr.ppattr_cpu_percentage = PROC_POLICY_CPUMON_DISABLE; + attr.ppattr_cpu_attr_interval = 0; + attr.ppattr_cpu_attr_deadline = 0; + + return(__process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_SET, PROC_POLICY_RESOURCE_USAGE, + PROC_POLICY_RUSAGE_CPU, (proc_policy_attribute_t*)&attr, pid, 0)); +} + +int +proc_set_wakemon_params(pid_t pid, int rate_hz, int flags __unused) +{ + struct proc_rlimit_control_wakeupmon params; + + params.wm_flags = WAKEMON_ENABLE; + params.wm_rate = rate_hz; + + return (proc_rlimit_control(pid, RLIMIT_WAKEUPS_MONITOR, ¶ms)); +} + +#ifndef WAKEMON_GET_PARAMS +#define WAKEMON_GET_PARAMS 0x4 +#define WAKEMON_SET_DEFAULTS 0x8 +#endif + +int +proc_get_wakemon_params(pid_t pid, int *rate_hz, int *flags) +{ + struct proc_rlimit_control_wakeupmon params; + int error; + + params.wm_flags = WAKEMON_GET_PARAMS; + + if ((error = proc_rlimit_control(pid, RLIMIT_WAKEUPS_MONITOR, ¶ms)) != 0) { + return (error); + } + + *rate_hz = params.wm_rate; + *flags = params.wm_flags; + + return (0); +} + +int +proc_set_wakemon_defaults(pid_t pid) +{ + struct proc_rlimit_control_wakeupmon params; + + params.wm_flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS; + params.wm_rate = -1; + + return (proc_rlimit_control(pid, RLIMIT_WAKEUPS_MONITOR, ¶ms)); +} + +int +proc_disable_wakemon(pid_t pid) +{ + struct proc_rlimit_control_wakeupmon params; + + params.wm_flags = WAKEMON_DISABLE; + params.wm_rate = -1; + + return (proc_rlimit_control(pid, RLIMIT_WAKEUPS_MONITOR, ¶ms)); +} + + +#if TARGET_OS_EMBEDDED + +int +proc_setcpu_percentage(pid_t pid, int action, int percentage) +{ + proc_policy_cpuusage_attr_t attr; + + bzero(&attr, sizeof(proc_policy_cpuusage_attr_t)); + attr.ppattr_cpu_attr = action; + attr.ppattr_cpu_percentage = percentage; + if (__process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_APPLY, PROC_POLICY_RESOURCE_USAGE, PROC_POLICY_RUSAGE_CPU, (proc_policy_attribute_t*)&attr, pid, (uint64_t)0) != -1) + return(0); + else + return(errno); +} + +int +proc_setcpu_deadline(pid_t pid, int action, uint64_t deadline) +{ + proc_policy_cpuusage_attr_t attr; + + bzero(&attr, sizeof(proc_policy_cpuusage_attr_t)); + attr.ppattr_cpu_attr = action; + attr.ppattr_cpu_attr_deadline = deadline; + if (__process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_APPLY, PROC_POLICY_RESOURCE_USAGE, PROC_POLICY_RUSAGE_CPU, (proc_policy_attribute_t*)&attr, pid, (uint64_t)0) != -1) + return(0); + else + return(errno); + +} + + +int +proc_setcpu_percentage_withdeadline(pid_t pid, int action, int percentage, uint64_t deadline) +{ + proc_policy_cpuusage_attr_t attr; + + bzero(&attr, sizeof(proc_policy_cpuusage_attr_t)); + attr.ppattr_cpu_attr = action; + attr.ppattr_cpu_percentage = percentage; + attr.ppattr_cpu_attr_deadline = deadline; + if (__process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_APPLY, PROC_POLICY_RESOURCE_USAGE, PROC_POLICY_RUSAGE_CPU, (proc_policy_attribute_t*)&attr, pid, (uint64_t)0) != -1) + return(0); + else + return(errno); +} + +int +proc_clear_cpulimits(pid_t pid) +{ + if (__process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_RESTORE, PROC_POLICY_RESOURCE_USAGE, PROC_POLICY_RUSAGE_CPU, NULL, pid, (uint64_t)0) != -1) + return(0); + else + return(errno); + + +} + +int +proc_appstate(int pid, int * appstatep) +{ + int state; + + if (__process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_GET, PROC_POLICY_APP_LIFECYCLE, PROC_POLICY_APPLIFE_STATE, (proc_policy_attribute_t*)&state, pid, (uint64_t)0) != -1) { + if (appstatep != NULL) + *appstatep = state; + return(0); + } else + return(errno); + +} + + +int +proc_setappstate(int pid, int appstate) +{ + int state = appstate; + + switch (state) { + case PROC_APPSTATE_NONE: + case PROC_APPSTATE_ACTIVE: + case PROC_APPSTATE_INACTIVE: + case PROC_APPSTATE_BACKGROUND: + case PROC_APPSTATE_NONUI: + break; + default: + return(EINVAL); + } + if (__process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_APPLY, PROC_POLICY_APP_LIFECYCLE, PROC_POLICY_APPLIFE_STATE, (proc_policy_attribute_t*)&state, pid, (uint64_t)0) != -1) + return(0); + else + return(errno); +} + +int +proc_devstatusnotify(int devicestatus) +{ + int state = devicestatus; + + switch (devicestatus) { + case PROC_DEVSTATUS_SHORTTERM: + case PROC_DEVSTATUS_LONGTERM: + break; + default: + return(EINVAL); + } + + if (__process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_APPLY, PROC_POLICY_APP_LIFECYCLE, PROC_POLICY_APPLIFE_DEVSTATUS, (proc_policy_attribute_t*)&state, getpid(), (uint64_t)0) != -1) { + return(0); + } else + return(errno); + +} + +int +proc_pidbind(int pid, uint64_t threadid, int bind) +{ + int state = bind; + pid_t passpid = pid; + + switch (bind) { + case PROC_PIDBIND_CLEAR: + passpid = getpid(); /* ignore pid on clear */ + break; + case PROC_PIDBIND_SET: + break; + default: + return(EINVAL); + } + if (__process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_APPLY, PROC_POLICY_APP_LIFECYCLE, PROC_POLICY_APPLIFE_PIDBIND, (proc_policy_attribute_t*)&state, passpid, threadid) != -1) + return(0); + else + return(errno); +} +#endif /* TARGET_OS_EMBEDDED */ + + +/* Donate importance to adaptive processes from this process */ +int +proc_donate_importance_boost() +{ + int rval; + +#if TARGET_OS_EMBEDDED + rval = __process_policy(PROC_POLICY_SCOPE_PROCESS, + PROC_POLICY_ACTION_ENABLE, + PROC_POLICY_APPTYPE, + PROC_POLICY_IOS_DONATEIMP, + NULL, getpid(), (uint64_t)0); +#else /* TARGET_OS_EMBEDDED */ + rval = __process_policy(PROC_POLICY_SCOPE_PROCESS, + PROC_POLICY_ACTION_SET, + PROC_POLICY_BOOST, + PROC_POLICY_IMP_DONATION, + NULL, getpid(), 0); +#endif /* TARGET_OS_EMBEDDED */ + + if (rval == 0) + return (0); + else + return (errno); +} + +static __attribute__((noinline)) void +proc_importance_bad_assertion(char *reason) { + (void)reason; +} + +/* + * Use the address of these variables as the token. This way, they can be + * printed in the debugger as useful names. + */ +uint64_t important_boost_assertion_token = 0xfafafafafafafafa; +uint64_t normal_boost_assertion_token = 0xfbfbfbfbfbfbfbfb; +uint64_t non_boost_assertion_token = 0xfcfcfcfcfcfcfcfc; + +/* + * Accept the boost on a message, or request another boost assertion + * if we have already accepted the implicit boost for this message. + * + * Returns EOVERFLOW if an attempt is made to take an extra assertion when not boosted. + * + * Returns EIO if the message was not a boosting message. + * TODO: Return a 'non-boost' token instead. + */ +int +proc_importance_assertion_begin_with_msg(mach_msg_header_t *msg, + __unused mach_msg_trailer_t *trailer, + uint64_t *assertion_token) +{ + int rval = 0; + + if (assertion_token == NULL) + return (EINVAL); + + /* Is this a boosting message? */ + if ((msg->msgh_bits & MACH_MSGH_BITS_RAISEIMP) != 0) { + + /* + * Have we accepted the implicit boost for this message yet? + * If we haven't accepted it yet, no need to call into kernel. + */ + if ((msg->msgh_bits & MACH_MSGH_BITS_IMPHOLDASRT) == 0) { + msg->msgh_bits |= MACH_MSGH_BITS_IMPHOLDASRT; + *assertion_token = (uint64_t) &important_boost_assertion_token; + return (0); + } + + /* Request an additional boost count */ + +#if TARGET_OS_EMBEDDED + rval = __process_policy(PROC_POLICY_SCOPE_PROCESS, + PROC_POLICY_ACTION_ENABLE, + PROC_POLICY_APPTYPE, + PROC_POLICY_IOS_HOLDIMP, + NULL, getpid(), 0); +#else /* TARGET_OS_EMBEDDED */ + rval = __process_policy(PROC_POLICY_SCOPE_PROCESS, + PROC_POLICY_ACTION_HOLD, + PROC_POLICY_BOOST, + PROC_POLICY_IMP_IMPORTANT, + NULL, getpid(), 0); +#endif /* TARGET_OS_EMBEDDED */ + + if (rval == 0) { + *assertion_token = (uint64_t) &important_boost_assertion_token; + return (0); + } else if (errno == EOVERFLOW) { + proc_importance_bad_assertion("Attempted to take assertion while not boosted"); + return (errno); + } else { + return (errno); + } + } + + return (EIO); +} + + +/* + * Drop a boost assertion. + * Returns EOVERFLOW on boost assertion underflow. + */ +int +proc_importance_assertion_complete(uint64_t assertion_token) +{ + int rval = 0; + + if (assertion_token == 0) + return (0); + + if (assertion_token == (uint64_t) &important_boost_assertion_token) { + +#if TARGET_OS_EMBEDDED + rval = __process_policy(PROC_POLICY_SCOPE_PROCESS, + PROC_POLICY_ACTION_ENABLE, + PROC_POLICY_APPTYPE, + PROC_POLICY_IOS_DROPIMP, + NULL, getpid(), 0); +#else /* TARGET_OS_EMBEDDED */ + rval = __process_policy(PROC_POLICY_SCOPE_PROCESS, + PROC_POLICY_ACTION_DROP, + PROC_POLICY_BOOST, + PROC_POLICY_IMP_IMPORTANT, + NULL, getpid(), 0); +#endif /* TARGET_OS_EMBEDDED */ + + if (rval == 0) { + return (0); + } else if (errno == EOVERFLOW) { + proc_importance_bad_assertion("Attempted to drop too many assertions"); + return (errno); + } else { + return (errno); + } + } else { + proc_importance_bad_assertion("Attempted to drop assertion with invalid token"); + return (EIO); + } +} + +#if !TARGET_OS_EMBEDDED + +int +proc_clear_vmpressure(pid_t pid) +{ + if (__process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_RESTORE, PROC_POLICY_RESOURCE_STARVATION, PROC_POLICY_RS_VIRTUALMEM, NULL, pid, (uint64_t)0) != -1) + return(0); + else + return(errno); +} + +/* set the current process as one who can resume suspended processes due to low virtual memory. Need to be root */ +int +proc_set_owner_vmpressure(void) +{ + int retval; + + if ((retval = __proc_info(PROC_INFO_CALL_SETCONTROL, getpid(), PROC_SELFSET_VMRSRCOWNER, (uint64_t)0, NULL, 0)) == -1) + return(errno); + + return(0); +} + +/* mark yourself to delay idle sleep on disk IO */ +int +proc_set_delayidlesleep(void) +{ + int retval; + + if ((retval = __proc_info(PROC_INFO_CALL_SETCONTROL, getpid(), PROC_SELFSET_DELAYIDLESLEEP, (uint64_t)1, NULL, 0)) == -1) + return(errno); + + return(0); +} + +/* Reset yourself to delay idle sleep on disk IO, if already set */ +int +proc_clear_delayidlesleep(void) +{ + int retval; + + if ((retval = __proc_info(PROC_INFO_CALL_SETCONTROL, getpid(), PROC_SELFSET_DELAYIDLESLEEP, (uint64_t)0, NULL, 0)) == -1) + return(errno); + + return(0); +} + +/* disable the launch time backgroudn policy and restore the process to default group */ +int +proc_disable_apptype(pid_t pid, int apptype) +{ + switch (apptype) { + case PROC_POLICY_OSX_APPTYPE_TAL: + case PROC_POLICY_OSX_APPTYPE_DASHCLIENT: + break; + default: + return(EINVAL); + } + + if (__process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_DISABLE, PROC_POLICY_APPTYPE, apptype, NULL, pid, (uint64_t)0) != -1) + return(0); + else + return(errno); + +} + +/* re-enable the launch time background policy if it had been disabled. */ +int +proc_enable_apptype(pid_t pid, int apptype) +{ + switch (apptype) { + case PROC_POLICY_OSX_APPTYPE_TAL: + case PROC_POLICY_OSX_APPTYPE_DASHCLIENT: + break; + default: + return(EINVAL); + + } + + if (__process_policy(PROC_POLICY_SCOPE_PROCESS, PROC_POLICY_ACTION_ENABLE, PROC_POLICY_APPTYPE, apptype, NULL, pid, (uint64_t)0) != -1) + return(0); + else + return(errno); + +} + +#if !TARGET_IPHONE_SIMULATOR + +int +proc_suppress(__unused pid_t pid, __unused uint64_t *generation) +{ + return 0; +} + +#endif /* !TARGET_IPHONE_SIMULATOR */ + +#endif /* !TARGET_OS_EMBEDDED */ + + + diff --git a/libsyscall/wrappers/libproc/libproc.h b/libsyscall/wrappers/libproc/libproc.h new file mode 100644 index 000000000..5fda14853 --- /dev/null +++ b/libsyscall/wrappers/libproc/libproc.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2006, 2007, 2010 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#ifndef _LIBPROC_H_ +#define _LIBPROC_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +/* + * This header file contains private interfaces to obtain process information. + * These interfaces are subject to change in future releases. + */ + +/*! + @define PROC_LISTPIDSPATH_PATH_IS_VOLUME + @discussion This flag indicates that all processes that hold open + file references on the volume associated with the specified + path should be returned. + */ +#define PROC_LISTPIDSPATH_PATH_IS_VOLUME 1 + + +/*! + @define PROC_LISTPIDSPATH_EXCLUDE_EVTONLY + @discussion This flag indicates that file references that were opened + with the O_EVTONLY flag should be excluded from the matching + criteria. + */ +#define PROC_LISTPIDSPATH_EXCLUDE_EVTONLY 2 + +__BEGIN_DECLS + + +/*! + @function proc_listpidspath + @discussion A function which will search through the current + processes looking for open file references which match + a specified path or volume. + @param type types of processes to be searched (see proc_listpids) + @param typeinfo adjunct information for type + @param path file or volume path + @param pathflags flags to control which files should be considered + during the process search. + @param buffer a C array of int-sized values to be filled with + process identifiers that hold an open file reference + matching the specified path or volume. Pass NULL to + obtain the minimum buffer size needed to hold the + currently active processes. + @param buffersize the size (in bytes) of the provided buffer. + @result the number of bytes of data returned in the provided buffer; + -1 if an error was encountered; + */ +int proc_listpidspath(uint32_t type, + uint32_t typeinfo, + const char *path, + uint32_t pathflags, + void *buffer, + int buffersize) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +int proc_listpids(uint32_t type, uint32_t typeinfo, void *buffer, int buffersize) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int proc_listallpids(void * buffer, int buffersize) __OSX_AVAILABLE_STARTING(__MAC_10_7, __IPHONE_4_1); +int proc_listpgrppids(pid_t pgrpid, void * buffer, int buffersize) __OSX_AVAILABLE_STARTING(__MAC_10_7, __IPHONE_4_1); +int proc_listchildpids(pid_t ppid, void * buffer, int buffersize) __OSX_AVAILABLE_STARTING(__MAC_10_7, __IPHONE_4_1); +int proc_pidinfo(int pid, int flavor, uint64_t arg, void *buffer, int buffersize) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int proc_pidfdinfo(int pid, int fd, int flavor, void * buffer, int buffersize) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int proc_pidfileportinfo(int pid, uint32_t fileport, int flavor, void *buffer, int buffersize) __OSX_AVAILABLE_STARTING(__MAC_10_7, __IPHONE_4_3); +int proc_name(int pid, void * buffer, uint32_t buffersize) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int proc_regionfilename(int pid, uint64_t address, void * buffer, uint32_t buffersize) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int proc_kmsgbuf(void * buffer, uint32_t buffersize) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int proc_pidpath(int pid, void * buffer, uint32_t buffersize) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int proc_libversion(int *major, int * minor) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +/* + * Return resource usage information for the given pid, which can be a live process or a zombie. + * + * Returns 0 on success; or -1 on failure, with errno set to indicate the specific error. + */ +int proc_pid_rusage(int pid, int flavor, rusage_info_t *buffer) __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0); + +/* + * A process can use the following api to set its own process control + * state on resoure starvation. The argument can have one of the PROC_SETPC_XX values + */ +#define PROC_SETPC_NONE 0 +#define PROC_SETPC_THROTTLEMEM 1 +#define PROC_SETPC_SUSPEND 2 +#define PROC_SETPC_TERMINATE 3 + +int proc_setpcontrol(const int control) __OSX_AVAILABLE_STARTING(__MAC_10_6, __IPHONE_3_2); +int proc_setpcontrol(const int control); + +int proc_track_dirty(pid_t pid, uint32_t flags); +int proc_set_dirty(pid_t pid, bool dirty); +int proc_get_dirty(pid_t pid, uint32_t *flags); + +int proc_terminate(pid_t pid, int *sig); + +__END_DECLS + +#endif /*_LIBPROC_H_ */ diff --git a/libsyscall/wrappers/libproc/libproc_internal.h b/libsyscall/wrappers/libproc/libproc_internal.h new file mode 100644 index 000000000..f04e9e1a3 --- /dev/null +++ b/libsyscall/wrappers/libproc/libproc_internal.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2010 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#ifndef _LIBPROC_INTERNALH_ +#define _LIBPROC_INTERNALH_ + +#include + +#include +#include + +__BEGIN_DECLS + +#if TARGET_OS_EMBEDDED + +#define PROC_SETCPU_ACTION_NONE 0 +#define PROC_SETCPU_ACTION_THROTTLE 1 +#define PROC_SETCPU_ACTION_SUSPEND 2 +#define PROC_SETCPU_ACTION_TERMINATE 3 +#define PROC_SETCPU_ACTION_NOTIFY 4 + +int proc_setcpu_percentage(pid_t pid, int action, int percentage) __OSX_AVAILABLE_STARTING(__MAC_NA, __IPHONE_5_0); +int proc_setcpu_deadline(pid_t pid, int action, uint64_t deadline) __OSX_AVAILABLE_STARTING(__MAC_NA, __IPHONE_5_0); +int proc_setcpu_percentage_withdeadline(pid_t pid, int action, int percentage, uint64_t deadline) __OSX_AVAILABLE_STARTING(__MAC_NA, __IPHONE_5_0); +int proc_clear_cpulimits(pid_t pid) __OSX_AVAILABLE_STARTING(__MAC_NA, __IPHONE_5_0); + +#define PROC_APPSTATE_NONE 0 +#define PROC_APPSTATE_ACTIVE 1 +#define PROC_APPSTATE_BACKGROUND 2 +#define PROC_APPSTATE_NONUI 3 +#define PROC_APPSTATE_INACTIVE 4 + +int proc_setappstate(int pid, int appstate); +int proc_appstate(int pid, int * appstatep); + +#define PROC_DEVSTATUS_SHORTTERM 1 +#define PROC_DEVSTATUS_LONGTERM 2 + +int proc_devstatusnotify(int devicestatus); + +#define PROC_PIDBIND_CLEAR 0 +#define PROC_PIDBIND_SET 1 +int proc_pidbind(int pid, uint64_t threadid, int bind); + +#else /* TARGET_OS_EMBEDDED */ + +/* resume the process suspend due to low VM resource */ +int proc_clear_vmpressure(pid_t pid); +/* set self as the one who is going to resume suspended processes due to low VM. Need to be root */ +int proc_set_owner_vmpressure(void); + +/* mark yourself to delay idle sleep on disk IO */ +int proc_set_delayidlesleep(void); +/* Reset yourself to delay idle sleep on disk IO, if already set */ +int proc_clear_delayidlesleep(void); + + +/* sub policies for PROC_POLICY_APPTYPE */ +#define PROC_POLICY_OSX_APPTYPE_NONE 0 +#define PROC_POLICY_OSX_APPTYPE_TAL 1 /* TAL based launched */ +#define PROC_POLICY_OSX_APPTYPE_WIDGET 2 /* for dashboard client */ +#define PROC_POLICY_OSX_APPTYPE_DASHCLIENT 2 /* rename to move away from widget */ + +/* + * Resumes the backgrounded TAL or dashboard client. Only priv users can disable TAL apps. + * Valid apptype are PROC_POLICY_OSX_APPTYPE_DASHCLIENT and PROC_POLICY_OSX_APPTYPE_TAL. + * Returns 0 on success otherwise appropriate error code. + */ +int proc_disable_apptype(pid_t pid, int apptype); +int proc_enable_apptype(pid_t pid, int apptype); + +#endif /* TARGET_OS_EMBEDDED */ + +/* mark process as importance donating */ +int proc_donate_importance_boost(void); + +/* check the message for an importance boost and take an assertion on it */ +int proc_importance_assertion_begin_with_msg(mach_msg_header_t *msg, + mach_msg_trailer_t *trailer, + uint64_t *assertion_token); + +/* drop an assertion */ +int proc_importance_assertion_complete(uint64_t assertion_handle); + +int proc_set_cpumon_params(pid_t pid, int percentage, int interval) __OSX_AVAILABLE_STARTING(__MAC_10_8, __IPHONE_6_0); +int proc_get_cpumon_params(pid_t pid, int *percentage, int *interval) __OSX_AVAILABLE_STARTING(__MAC_10_8, __IPHONE_6_0); +int proc_set_cpumon_defaults(pid_t pid) __OSX_AVAILABLE_STARTING(__MAC_10_8, __IPHONE_6_0); +int proc_disable_cpumon(pid_t pid) __OSX_AVAILABLE_STARTING(__MAC_10_8, __IPHONE_6_0); + +int proc_set_wakemon_params(pid_t pid, int rate_hz, int flags) __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0); +int proc_get_wakemon_params(pid_t pid, int *rate_hz, int *flags) __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0); +int proc_set_wakemon_defaults(pid_t pid) __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0); +int proc_disable_wakemon(pid_t pid) __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0); + +#if !TARGET_IPHONE_SIMULATOR + +#define PROC_SUPPRESS_SUCCESS (0) +#define PROC_SUPPRESS_BAD_ARGUMENTS (-1) +#define PROC_SUPPRESS_OLD_GENERATION (-2) +#define PROC_SUPPRESS_ALREADY_SUPPRESSED (-3) + +int proc_suppress(pid_t pid, uint64_t *generation); +#endif /* !TARGET_IPHONE_SIMULATOR */ + +__END_DECLS + +#endif /* _LIBPROC_INTERNALH_ */ + diff --git a/libsyscall/wrappers/libproc/proc_listpidspath.c b/libsyscall/wrappers/libproc/proc_listpidspath.c new file mode 100644 index 000000000..57494deb7 --- /dev/null +++ b/libsyscall/wrappers/libproc/proc_listpidspath.c @@ -0,0 +1,611 @@ +/* + * Copyright (c) 2007, 2008 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include + + +typedef struct { + // process IDs + int *pids; + int pids_count; + size_t pids_size; + + // threads + uint64_t *threads; + int thr_count; + size_t thr_size; + + // open file descriptors + struct proc_fdinfo *fds; + int fds_count; + size_t fds_size; + + // file/volume of interest + struct stat match_stat; + + // flags + uint32_t flags; + +} fdOpenInfo, *fdOpenInfoRef; + + +/* + * check_init + */ +static fdOpenInfoRef +check_init(const char *path, uint32_t flags) +{ + fdOpenInfoRef info; + int status; + + info = malloc(sizeof(*info)); + if (!info) + return NULL; + + info->pids = NULL; + info->pids_count = 0; + info->pids_size = 0; + + info->threads = NULL; + info->thr_count = 0; + info->thr_size = 0; + + info->fds = NULL; + info->fds_count = 0; + info->fds_size = 0; + + status = stat(path, &info->match_stat); + if (status == -1) { + goto fail; + } + + info->flags = flags; + + return info; + + fail : + + free(info); + return NULL; +} + + +/* + * check_free + */ +static void +check_free(fdOpenInfoRef info) +{ + if (info->pids != NULL) { + free(info->pids); + } + + if (info->threads != NULL) { + free(info->threads); + } + + if (info->fds != NULL) { + free(info->fds); + } + + free(info); + + return; +} + + +/* + * check_file + * check if a process vnode is of interest + * + * in : vnode stat(2) + * out : -1 if error + * 0 if no match + * 1 if match + */ +static int +check_file(fdOpenInfoRef info, struct vinfo_stat *sb) +{ + if (sb->vst_dev == 0) { + // if no info + return 0; + } + + if (sb->vst_dev != info->match_stat.st_dev) { + // if not the requested filesystem + return 0; + } + + if (!(info->flags & PROC_LISTPIDSPATH_PATH_IS_VOLUME) && + (sb->vst_ino != info->match_stat.st_ino)) { + // if not the requested file + return 0; + } + + return 1; +} + + +/* + * check_process_vnodes + * check [process] current working directory + * check [process] root directory + * + * in : pid + * out : -1 if error + * 0 if no match + * 1 if match + */ +static int +check_process_vnodes(fdOpenInfoRef info, int pid) +{ + int buf_used; + int status; + struct proc_vnodepathinfo vpi; + + buf_used = proc_pidinfo(pid, PROC_PIDVNODEPATHINFO, 0, &vpi, sizeof(vpi)); + if (buf_used <= 0) { + if (errno == ESRCH) { + // if the process is gone + return 0; + } + return -1; + } else if (buf_used < sizeof(vpi)) { + // if we didn't get enough information + return -1; + } + + // processing current working directory + status = check_file(info, &vpi.pvi_cdir.vip_vi.vi_stat); + if (status != 0) { + // if error or match + return status; + } + + // processing root directory + status = check_file(info, &vpi.pvi_rdir.vip_vi.vi_stat); + if (status != 0) { + // if error or match + return status; + } + + return 0; +} + + +/* + * check_process_text + * check [process] text (memory) + * + * in : pid + * out : -1 if error + * 0 if no match + * 1 if match + */ +static int +check_process_text(fdOpenInfoRef info, int pid) +{ + uint64_t a = 0; + int status; + + while (1) { // for all memory regions + int buf_used; + struct proc_regionwithpathinfo rwpi; + + // processing next address + buf_used = proc_pidinfo(pid, PROC_PIDREGIONPATHINFO, a, &rwpi, sizeof(rwpi)); + if (buf_used <= 0) { + if ((errno == ESRCH) || (errno == EINVAL)) { + // if no more text information is available for this process. + break; + } + return -1; + } else if (buf_used < sizeof(rwpi)) { + // if we didn't get enough information + return -1; + } + + status = check_file(info, &rwpi.prp_vip.vip_vi.vi_stat); + if (status != 0) { + // if error or match + return status; + } + + a = rwpi.prp_prinfo.pri_address + rwpi.prp_prinfo.pri_size; + } + + return 0; +} + + +/* + * check_process_fds + * check [process] open file descriptors + * + * in : pid + * out : -1 if error + * 0 if no match + * 1 if match + */ +static int +check_process_fds(fdOpenInfoRef info, int pid) +{ + int buf_used; + int i; + int status; + + // get list of open file descriptors + buf_used = proc_pidinfo(pid, PROC_PIDLISTFDS, 0, NULL, 0); + if (buf_used <= 0) { + return -1; + } + + while (1) { + if (buf_used > info->fds_size) { + // if we need to allocate [more] space + while (buf_used > info->fds_size) { + info->fds_size += (sizeof(struct proc_fdinfo) * 32); + } + + if (info->fds == NULL) { + info->fds = malloc(info->fds_size); + } else { + info->fds = reallocf(info->fds, info->fds_size); + } + if (info->fds == NULL) { + return -1; + } + } + + buf_used = proc_pidinfo(pid, PROC_PIDLISTFDS, 0, info->fds, info->fds_size); + if (buf_used <= 0) { + return -1; + } + + if ((buf_used + sizeof(struct proc_fdinfo)) >= info->fds_size) { + // if not enough room in the buffer for an extra fd + buf_used = info->fds_size + sizeof(struct proc_fdinfo); + continue; + } + + info->fds_count = buf_used / sizeof(struct proc_fdinfo); + break; + } + + // iterate through each file descriptor + for (i = 0; i < info->fds_count; i++) { + struct proc_fdinfo *fdp; + + fdp = &info->fds[i]; + switch (fdp->proc_fdtype) { + case PROX_FDTYPE_VNODE : { + int buf_used; + struct vnode_fdinfo vi; + + buf_used = proc_pidfdinfo(pid, fdp->proc_fd, PROC_PIDFDVNODEINFO, &vi, sizeof(vi)); + if (buf_used <= 0) { + if (errno == ENOENT) { + /* + * The file descriptor's vnode may have been revoked. This is a + * bit of a hack, since an ENOENT error might not always mean the + * descriptor's vnode has been revoked. As the libproc API + * matures, this code may need to be revisited. + */ + continue; + } + return -1; + } else if (buf_used < sizeof(vi)) { + // if we didn't get enough information + return -1; + } + + if ((info->flags & PROC_LISTPIDSPATH_EXCLUDE_EVTONLY) && + (vi.pfi.fi_openflags & O_EVTONLY)) { + // if this file should be excluded + continue; + } + + status = check_file(info, &vi.pvi.vi_stat); + if (status != 0) { + // if error or match + return status; + } + break; + } + default : + break; + } + } + + return 0; +} + + +/* + * check_process_threads + * check [process] thread working directories + * + * in : pid + * out : -1 if error + * 0 if no match + * 1 if match + */ +static int +check_process_threads(fdOpenInfoRef info, int pid) +{ + int buf_used; + int status; + struct proc_taskallinfo tai; + + buf_used = proc_pidinfo(pid, PROC_PIDTASKALLINFO, 0, &tai, sizeof(tai)); + if (buf_used <= 0) { + if (errno == ESRCH) { + // if the process is gone + return 0; + } + return -1; + } else if (buf_used < sizeof(tai)) { + // if we didn't get enough information + return -1; + } + + // check thread info + if (tai.pbsd.pbi_flags & PROC_FLAG_THCWD) { + int i; + + // get list of threads + buf_used = tai.ptinfo.pti_threadnum * sizeof(uint64_t); + + while (1) { + if (buf_used > info->thr_size) { + // if we need to allocate [more] space + while (buf_used > info->thr_size) { + info->thr_size += (sizeof(uint64_t) * 32); + } + + if (info->threads == NULL) { + info->threads = malloc(info->thr_size); + } else { + info->threads = reallocf(info->threads, info->thr_size); + } + if (info->threads == NULL) { + return -1; + } + } + + buf_used = proc_pidinfo(pid, PROC_PIDLISTTHREADS, 0, info->threads, info->thr_size); + if (buf_used <= 0) { + return -1; + } + + if ((buf_used + sizeof(uint64_t)) >= info->thr_size) { + // if not enough room in the buffer for an extra thread + buf_used = info->thr_size + sizeof(uint64_t); + continue; + } + + info->thr_count = buf_used / sizeof(uint64_t); + break; + } + + // iterate through each thread + for (i = 0; i < info->thr_count; i++) { + uint64_t thr = info->threads[i]; + struct proc_threadwithpathinfo tpi; + + buf_used = proc_pidinfo(pid, PROC_PIDTHREADPATHINFO, thr, &tpi, sizeof(tpi)); + if (buf_used <= 0) { + if ((errno == ESRCH) || (errno == EINVAL)) { + // if the process or thread is gone + continue; + } + } else if (buf_used < sizeof(tai)) { + // if we didn't get enough information + return -1; + } + + status = check_file(info, &tpi.pvip.vip_vi.vi_stat); + if (status != 0) { + // if error or match + return status; + } + } + } + + return 0; +} + + +/* + * check_process + * check [process] current working and root directories + * check [process] text (memory) + * check [process] open file descriptors + * + * in : pid + * out : -1 if error + * 0 if no match + * 1 if match + */ +static int +check_process(fdOpenInfoRef info, int pid) +{ + int status; + + // check root and current working directory + status = check_process_vnodes(info, pid); + if (status != 0) { + // if error or match + return status; + } + + // check process text (memory) + status = check_process_text(info, pid); + if (status != 0) { + // if error or match + return status; + } + + // check open file descriptors + status = check_process_fds(info, pid); + if (status != 0) { + // if error or match + return status; + } + + // check per-thread working directories + status = check_process_threads(info, pid); + if (status != 0) { + // if error or match + return status; + } + + return 0; +} + + +/* + * proc_listpidspath + * + * in : type + * : typeinfo + * : path + * : pathflags + * : buffer + * : buffersize + * out : buffer filled with process IDs that have open file + * references that match the specified path or volume; + * return value is the bytes of the returned buffer + * that contains valid information. + */ +int +proc_listpidspath(uint32_t type, + uint32_t typeinfo, + const char *path, + uint32_t pathflags, + void *buffer, + int buffersize) +{ + int buf_used; + int *buf_next = (int *)buffer; + int i; + fdOpenInfoRef info; + int status = -1; + + if (buffer == NULL) { + // if this is a sizing request + return proc_listpids(type, typeinfo, NULL, 0); + } + + buffersize -= (buffersize % sizeof(int)); // make whole number of ints + if (buffersize < sizeof(int)) { + // if we can't even return a single PID + errno = ENOMEM; + return -1; + } + + // init + info = check_init(path, pathflags); + if (info == NULL) { + return -1; + } + + // get list of processes + buf_used = proc_listpids(type, typeinfo, NULL, 0); + if (buf_used <= 0) { + goto done; + } + + while (1) { + if (buf_used > info->pids_size) { + // if we need to allocate [more] space + while (buf_used > info->pids_size) { + info->pids_size += (sizeof(int) * 32); + } + + if (info->pids == NULL) { + info->pids = malloc(info->pids_size); + } else { + info->pids = reallocf(info->pids, info->pids_size); + } + if (info->pids == NULL) { + goto done; + } + } + + buf_used = proc_listpids(type, typeinfo, info->pids, info->pids_size); + if (buf_used <= 0) { + goto done; + } + + if ((buf_used + sizeof(int)) >= info->pids_size) { + // if not enough room in the buffer for an extra pid + buf_used = info->pids_size + sizeof(int); + continue; + } + + info->pids_count = buf_used / sizeof(int); + break; + } + + // iterate through each process + buf_used = 0; + for (i = info->pids_count - 1; i >= 0; i--) { + int pid; + int status; + + pid = info->pids[i]; + if (pid == 0) { + continue; + } + + status = check_process(info, pid); + if (status != 1) { + // if not a match + continue; + } + + *buf_next++ = pid; + buf_used += sizeof(int); + + if (buf_used >= buffersize) { + // if we have filled the buffer + break; + } + } + + status = buf_used; + + done : + + // cleanup + check_free(info); + + return status; +} diff --git a/libsyscall/wrappers/mach_absolute_time.s b/libsyscall/wrappers/mach_absolute_time.s new file mode 100644 index 000000000..603504a9a --- /dev/null +++ b/libsyscall/wrappers/mach_absolute_time.s @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include + +#if defined(__i386__) + +/* return mach_absolute_time in %edx:%eax + * + * The algorithm we use is: + * + * ns = ((((rdtsc - rnt_tsc_base)< SLOW_TSC_THRESHOLD + * + * Where SLOW_TSC_THRESHOLD is about 10e9. Since most processor's tscFreq is greater + * than 1GHz, rnt_shift is usually 0. rnt_tsc_scale is also a 32-bit constant: + * + * rnt_tsc_scale = (10e9 * 2**32) / (tscFreq << rnt_shift); + */ + + .globl _mach_absolute_time +_mach_absolute_time: + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %ebx + +0: + movl _COMM_PAGE_NT_GENERATION,%esi /* get generation (0 if being changed) */ + testl %esi,%esi /* if being updated, loop until stable */ + jz 0b + + lfence + rdtsc /* get TSC in %edx:%eax */ + lfence + + subl _COMM_PAGE_NT_TSC_BASE,%eax + sbbl _COMM_PAGE_NT_TSC_BASE+4,%edx + + /* + * Prior to supporting "slow" processors, xnu always set _NT_SHIFT to 32. + * Now it defaults to 0, unless the processor is slow. The shifts + * below implicitly mask the count down to 5 bits, handling either default. + */ + movl _COMM_PAGE_NT_SHIFT,%ecx + shldl %cl,%eax,%edx /* shift %edx left, filling in from %eax */ + shll %cl,%eax /* finish shifting %edx:%eax left by _COMM_PAGE_NT_SHIFT bits */ + + movl _COMM_PAGE_NT_SCALE,%ecx + + movl %edx,%ebx + mull %ecx + movl %ebx,%eax + movl %edx,%ebx + mull %ecx + addl %ebx,%eax + adcl $0,%edx + + addl _COMM_PAGE_NT_NS_BASE,%eax + adcl _COMM_PAGE_NT_NS_BASE+4,%edx + + cmpl _COMM_PAGE_NT_GENERATION,%esi /* have the parameters changed? */ + jne 0b /* yes, loop until stable */ + + popl %ebx + popl %esi + popl %ebp + ret + +#elif defined(__x86_64__) + +/* + * 64-bit version _mach_absolute_time. We return the 64-bit nanotime in %rax. + * + * The algorithm we use is: + * + * ns = ((((rdtsc - rnt_tsc_base)< SLOW_TSC_THRESHOLD + * + * Where SLOW_TSC_THRESHOLD is about 10e9. Since most processor's tscFreqs are greater + * than 1GHz, rnt_shift is usually 0. rnt_tsc_scale is also a 32-bit constant: + * + * rnt_tsc_scale = (10e9 * 2**32) / (tscFreq << rnt_shift); + * + */ + .globl _mach_absolute_time +_mach_absolute_time: + pushq %rbp // set up a frame for backtraces + movq %rsp,%rbp + movq $(_COMM_PAGE_TIME_DATA_START),%rsi +1: + movl _NT_GENERATION(%rsi),%r8d // get generation + testl %r8d,%r8d // if 0, data is being changed... + jz 1b // ...so loop until stable + lfence + rdtsc // edx:eax := tsc + lfence + shlq $32,%rdx // rax := ((edx << 32) | eax), ie 64-bit tsc + orq %rdx,%rax + + /* + * Prior to supporting "slow" processors, xnu always set _NT_SHIFT to 32. + * Now it defaults to 0, unless the processor is slow. In order to maintain + * compatibility with both old and new versions of xnu, we mask the shift + * down to 0x1F, which maps the old default (32) into the new default (0). + */ + movl _NT_SHIFT(%rsi),%ecx + andl $0x1F,%ecx // *** remove this line once 10.9 is GM *** + subq _NT_TSC_BASE(%rsi), %rax // rax := (tsc - base_tsc) + shlq %cl,%rax // rax := (tsc - base_tsc) << NT_SHIFT + movl _NT_SCALE(%rsi),%ecx + mulq %rcx // rdx:rax := ((tsc - base_tsc)<> 32) + ns_base + + cmpl _NT_GENERATION(%rsi),%r8d // did the data change during computation? + jne 1b + popq %rbp + ret + +#else +#error Unsupported architecture +#endif diff --git a/libsyscall/wrappers/spawn/posix_spawn.c b/libsyscall/wrappers/spawn/posix_spawn.c new file mode 100644 index 000000000..3d8369be0 --- /dev/null +++ b/libsyscall/wrappers/spawn/posix_spawn.c @@ -0,0 +1,1520 @@ +/* + * Copyright (c) 2006-2012 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* + * [SPN] Support for _POSIX_SPAWN + */ + +#define CONFIG_MEMORYSTATUS 1 // +#include /* for user_size_t */ +#include +#include +#include +#include +#include +#include +#include /* for OPEN_MAX, PATH_MAX */ +#include +#include +#include +#include + +#if TARGET_OS_EMBEDDED +#include +#endif + +/* + * posix_spawnattr_init + * + * Description: Initialize a spawn attributes object attr with default values + * + * Parameters: attr The spawn attributes object to be + * initialized + * + * Returns: 0 Success + * ENOMEM Insufficient memory exists to + * initialize the spawn attributes object. + * + * Note: As an implementation detail, the externally visibily type + * posix_spawnattr_t is defined to be a void *, and initialization + * involves allocation of a memory object. Subsequent changes to + * the spawn attributes may result in reallocation under the + * covers. + * + * Reinitialization of an already initialized spawn attributes + * object will result in memory being leaked. Because spawn + * attributes are not required to be used in conjunction with a + * static initializer, there is no way to distinguish a spawn + * attribute with stack garbage from one that's been initialized. + * This is arguably an API design error. + */ +int +posix_spawnattr_init(posix_spawnattr_t *attr) +{ + _posix_spawnattr_t *psattrp = (_posix_spawnattr_t *)attr; + int err = 0; + + if ((*psattrp = (_posix_spawnattr_t)malloc(sizeof(struct _posix_spawnattr))) == NULL) { + err = ENOMEM; + } else { + + /* + * The default value of this attribute shall be as if no + * flags were set + */ + (*psattrp)->psa_flags = 0; + + /* + * The default value of this attribute shall be an empty + * signal set + */ + (*psattrp)->psa_sigdefault = 0; + + /* The default value of this attribute is unspecified */ + (*psattrp)->psa_sigmask = 0; + + /* The default value of this attribute shall be zero */ + (*psattrp)->psa_pgroup = 0; /* doesn't matter */ + + /* Default is no binary preferences, i.e. use normal grading */ + memset((*psattrp)->psa_binprefs, 0, + sizeof((*psattrp)->psa_binprefs)); + + /* Default is no port actions to take */ + (*psattrp)->psa_ports = NULL; + + /* + * The default value of this attribute shall be an no + * process control on resource starvation + */ + (*psattrp)->psa_pcontrol = 0; + + /* + * Initializing the alignment paddings. + */ + + (*psattrp)->short_padding = 0; + (*psattrp)->flags_padding = 0; + (*psattrp)->int_padding = 0; + + + /* + * The default value of this attribute shall be an no + * process control on resource starvation + */ + (*psattrp)->psa_apptype = 0; + + /* Jetsam related */ + (*psattrp)->psa_jetsam_flags = 0; + (*psattrp)->psa_priority = -1; + (*psattrp)->psa_high_water_mark = -1; + + /* Default is no CPU usage monitor active. */ + (*psattrp)->psa_cpumonitor_percent = 0; + (*psattrp)->psa_cpumonitor_interval = 0; + + /* Default is no MAC policy extensions. */ + (*psattrp)->psa_mac_extensions = NULL; + } + + return (err); +} + + +/* + * posix_spawnattr_destroy + * + * Description: Destroy a spawn attributes object that was previously + * initialized via posix_spawnattr_init() by freeing any + * memory associated with it and setting it to an invalid value. + * + * Parameters: attr The spawn attributes object to be + * destroyed. + * + * Returns: 0 Success + * + * Notes: The destroyed spawn attribute results in the void * pointer + * being set to NULL; subsequent use without reinitialization + * will result in explicit program failure (rather than merely + * "undefined behaviour"). + * + * NOTIMP: Allowed failures (checking NOT required): + * EINVAL The value specified by attr is invalid. + */ +static int posix_spawn_destroyportactions_np(posix_spawnattr_t *); + +int +posix_spawnattr_destroy(posix_spawnattr_t *attr) +{ + _posix_spawnattr_t psattr; + + if (attr == NULL || *attr == NULL) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + posix_spawn_destroyportactions_np(attr); + + free(psattr); + *attr = NULL; + + return (0); +} + + +/* + * posix_spawnattr_setflags + * + * Description: Set the spawn flags attribute for the spawn attribute object + * referred to by 'attr'. + * + * Parameters: attr The spawn attributes object whose flags + * are to be set + * flags The flags value to set + * + * Returns: 0 Success + * + * NOTIMP: Allowed failures (checking NOT required): + * EINVAL The value specified by attr is invalid. + * EINVAL The value of the attribute being set is not valid. + */ +int +posix_spawnattr_setflags(posix_spawnattr_t *attr, short flags) +{ + _posix_spawnattr_t psattr; + + if (attr == NULL || *attr == NULL) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + psattr->psa_flags = flags; + + return (0); +} + + +/* + * posix_spawnattr_getflags + * + * Description: Retrieve the spawn attributes flag for the spawn attributes + * object referenced by 'attr' and place them in the memory + * location referenced by 'flagsp' + * + * Parameters: attr The spawn attributes object whose flags + * are to be retrieved + * flagsp A pointer to a short value to receive + * the flags + * + * Returns: 0 Success + * + * Implicit Returns: + * *flagps (modified) The flags value from the spawn + * attributes object + * + * NOTIMP: Allowed failures (checking NOT required): + * EINVAL The value specified by attr is invalid. + * EINVAL The value of the attribute being set is not valid. + */ +int +posix_spawnattr_getflags(const posix_spawnattr_t * __restrict attr, + short * __restrict flagsp) +{ + _posix_spawnattr_t psattr; + + if (attr == NULL || *attr == NULL) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + *flagsp = psattr->psa_flags; + + return (0); +} + + +/* + * posix_spawnattr_getsigdefault + * + * Description: Retrieve the set of signals to be set to default according to + * the spawn attribute value referenced by 'attr' and place the + * result into the memory containing the sigset_t referenced by + * 'sigdefault' + * + * Parameters: attr The spawn attributes object whose + * signal set for default signals is to + * be retrieved + * sigdefault A pointer to the sigset_t to receive + * the signal set + * + * Returns: 0 Success + * + * Implicit Returns: + * *sigdefault (modified) The signal set of signals to default + * from the spawn attributes object + */ +int +posix_spawnattr_getsigdefault(const posix_spawnattr_t * __restrict attr, + sigset_t * __restrict sigdefault) +{ + _posix_spawnattr_t psattr; + + if (attr == NULL || *attr == NULL) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + *sigdefault = psattr->psa_sigdefault; + + return (0); +} + + +/* + * posix_spawnattr_getpgroup + * + * Description: Obtain the value of the spawn process group attribute from the + * spawn attributes object referenced by 'attr' and place the + * results in the memory location referenced by 'pgroup' + * + * Parameters: attr The spawn attributes object whose + * process group information is to be + * retrieved + * pgroup A pointer to the pid_t to receive the + * process group + * + * Returns: 0 Success + * + * Implicit Returns: + * *pgroup (modified) The process group information from the + * spawn attributes object + */ +int +posix_spawnattr_getpgroup(const posix_spawnattr_t * __restrict attr, + pid_t * __restrict pgroup) +{ + _posix_spawnattr_t psattr; + + if (attr == NULL || *attr == NULL) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + *pgroup = psattr->psa_pgroup; + + return (0); +} + + +/* + * posix_spawnattr_getsigmask + * + * Description: Obtain the value of the spawn signal mask attribute from the + * spawn attributes object referenced by 'attr' and place the + * result into the memory containing the sigset_t referenced by + * 'sigmask' + * + * Parameters: attr The spawn attributes object whose + * signal set for masked signals is to + * be retrieved + * sigmask A pointer to the sigset_t to receive + * the signal set + * + * Returns: 0 Success + * + * Implicit Returns: + * *sigmask (modified) The signal set of signals to mask + * from the spawn attributes object + */ +int +posix_spawnattr_getsigmask(const posix_spawnattr_t * __restrict attr, + sigset_t * __restrict sigmask) +{ + _posix_spawnattr_t psattr; + + if (attr == NULL || *attr == NULL) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + *sigmask = psattr->psa_sigmask; + + return (0); +} + +/* + * posix_spawnattr_getbinpref_np + * + * Description: Obtain the value of the spawn binary preferences attribute from + * the spawn attributes object referenced by 'attr' and place the + * result into the memory referenced by 'pref'. + * + * Parameters: attr The spawn attributes object whose + * binary preferences are to be retrieved + * count The size of the cpu_type_t array + * pref An array of cpu types + * ocount The actual number copied + * + * Returns: 0 No binary preferences found + * > 0 The number of cpu types (less than + * count) copied over from 'attr'. + * + * Implicit Returns: + * *pref (modified) The binary preferences array + * from the spawn attributes object + */ +int +posix_spawnattr_getbinpref_np(const posix_spawnattr_t * __restrict attr, + size_t count, cpu_type_t *pref, size_t * __restrict ocount) +{ + _posix_spawnattr_t psattr; + int i = 0; + + if (attr == NULL || *attr == NULL) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + for (i = 0; i < count && i < 4; i++) { + pref[i] = psattr->psa_binprefs[i]; + } + + if (ocount) + *ocount = i; + return 0; +} + + +/* + * posix_spawnattr_getpcontrol_np + * + * Description: Retrieve the process control property set default according to + * the spawn attribute value referenced by 'attr' and place the + * result into the memory containing the control referenced by + * 'pcontrol' + * + * Parameters: attr The spawn attributes object whose + * signal set for default signals is to + * be retrieved + * pcontrol A pointer to an int to receive + * the process control info + * + * Returns: 0 Success + * + * Implicit Returns: + * *pcontrol (modified) The signal set of signals to default + * from the spawn attributes object + */ +int +posix_spawnattr_getpcontrol_np(const posix_spawnattr_t * __restrict attr, + int * __restrict pcontrol) +{ + _posix_spawnattr_t psattr; + + if (attr == NULL || *attr == NULL) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + *pcontrol = psattr->psa_pcontrol; + + return (0); +} + +/* + * posix_spawnattr_getprocesstype_np + * + * Description: Retrieve the process specific behaviors and app launch types + * spawn attribute value referenced by 'attr' and place the + * result into the memory containing the control referenced by + * 'proctype' + * + * Parameters: attr The spawn attributes object whose + * signal set for default signals is to + * be retrieved + * proctype A pointer to an int to receive + * the process type info + * + * Returns: 0 Success + * + * Implicit Returns: + * *proctype (modified) The process type set to value + * from the spawn attributes object + */ +int +posix_spawnattr_getprocesstype_np(const posix_spawnattr_t * __restrict attr, + int * __restrict proctype) +{ + _posix_spawnattr_t psattr; + + if (attr == NULL || *attr == NULL) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + *proctype = psattr->psa_apptype; + + return (0); +} +/* + * posix_spawnattr_setsigdefault + * + * Description: Set the set of signals to be set to default for the spawn + * attribute value referenced by 'attr' from the memory + * containing the sigset_t referenced by 'sigdefault' + * + * Parameters: attr The spawn attributes object whose + * signal set for default signals is to + * be set + * sigdefault A pointer to the sigset_t from which to + * obtain the signal set + * + * Returns: 0 Success + */ +int +posix_spawnattr_setsigdefault(posix_spawnattr_t * __restrict attr, + const sigset_t * __restrict sigdefault) +{ + _posix_spawnattr_t psattr; + + if (attr == NULL || *attr == NULL) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + psattr->psa_sigdefault = *sigdefault; + + return (0); +} + + +/* + * posix_spawnattr_setpgroup + * + * Description: Set the value of the spawn process group attribute for the + * spawn attributes object referenced by 'attr' from the value + * of 'pgroup' + * + * Parameters: attr The spawn attributes object for which + * the process group information is to be + * set + * pgroup The process group to set + * + * Returns: 0 Success + */ +int +posix_spawnattr_setpgroup(posix_spawnattr_t * attr, pid_t pgroup) +{ + _posix_spawnattr_t psattr; + + if (attr == NULL || *attr == NULL) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + psattr->psa_pgroup = pgroup; + + return (0); +} + + +/* + * posix_spawnattr_setsigmask + * + * Description: Set the set of signals to be masked for the spawn attribute + * value referenced by 'attr' from the memory containing the + * sigset_t referenced by 'sigmask' + * + * Parameters: attr The spawn attributes object whose + * signal set for masked signals is to + * be set + * sigmask A pointer to the sigset_t from which to + * obtain the signal set + * + * Returns: 0 Success + */ +int +posix_spawnattr_setsigmask(posix_spawnattr_t * __restrict attr, + const sigset_t * __restrict sigmask) +{ + _posix_spawnattr_t psattr; + + if (attr == NULL || *attr == NULL) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + psattr->psa_sigmask = *sigmask; + + return (0); +} + + +/* + * posix_spawnattr_setbinpref_np + * + * Description: Set the universal binary preferences for the spawn attribute + * value referenced by 'attr' from the memory containing the + * cpu_type_t array referenced by 'pref', size of 'count' + * + * Parameters: attr The spawn attributes object whose + * binary preferences are to be set + * count Size of the array pointed to by 'pref' + * pref cpu_type_t array of binary preferences + * ocount The actual number copied + * + * Returns: 0 No preferences copied + * > 0 Number of preferences copied + * + * Note: The posix_spawnattr_t currently only holds four cpu_type_t's. + * If the caller provides more preferences than this limit, they + * will be ignored, as reflected in the return value. + */ +int +posix_spawnattr_setbinpref_np(posix_spawnattr_t * __restrict attr, + size_t count, cpu_type_t *pref, size_t * __restrict ocount) +{ + _posix_spawnattr_t psattr; + int i = 0; + + if (attr == NULL || *attr == NULL) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + for (i = 0; i < count && i < 4; i++) { + psattr->psa_binprefs[i] = pref[i]; + } + + /* return number of binprefs copied over */ + if (ocount) + *ocount = i; + return 0; +} + + +/* + * posix_spawnattr_setpcontrol_np + * + * Description: Set the process control property according to + * attribute value referenced by 'attr' from the memory + * containing the int value 'pcontrol' + * + * Parameters: attr The spawn attributes object whose + * signal set for default signals is to + * be set + * pcontrol An int value of the process control info + * + * Returns: 0 Success + */ +int +posix_spawnattr_setpcontrol_np(posix_spawnattr_t * __restrict attr, + const int pcontrol) +{ + _posix_spawnattr_t psattr; + + if (attr == NULL || *attr == NULL) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + psattr->psa_pcontrol = pcontrol; + + return (0); +} + + +/* + * posix_spawnattr_setprocesstype_np + * + * Description: Set the process specific behaviors and app launch type + * attribute value referenced by 'attr' from the memory + * containing the int value 'proctype' + * + * Parameters: attr The spawn attributes object whose + * signal set for default signals is to + * be set + * proctype An int value of the process type info + * + * Returns: 0 Success + */ +int +posix_spawnattr_setprocesstype_np(posix_spawnattr_t * __restrict attr, + const int proctype) +{ + _posix_spawnattr_t psattr; + + if (attr == NULL || *attr == NULL) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + psattr->psa_apptype = proctype; + + return (0); +} + +/* + * posix_spawn_createportactions_np + * Description: create a new posix_spawn_port_actions struct and link + * it into the posix_spawnattr. + */ +static int +posix_spawn_createportactions_np(posix_spawnattr_t *attr) +{ + _posix_spawnattr_t psattr; + _posix_spawn_port_actions_t acts; + + if (attr == NULL || *attr == NULL) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + acts = (_posix_spawn_port_actions_t)malloc(PS_PORT_ACTIONS_SIZE(2)); + if (acts == NULL) + return ENOMEM; + + acts->pspa_alloc = 2; + acts->pspa_count = 0; + + psattr->psa_ports = acts; + return 0; +} + +/* + * posix_spawn_growportactions_np + * Description: Enlarge the size of portactions if necessary + */ +static int +posix_spawn_growportactions_np(posix_spawnattr_t *attr) +{ + _posix_spawnattr_t psattr; + _posix_spawn_port_actions_t acts; + int newnum; + + if (attr == NULL || *attr == NULL) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + acts = psattr->psa_ports; + if (acts == NULL) + return EINVAL; + + /* Double number of port actions allocated for */ + newnum = 2 * acts->pspa_alloc; + acts = realloc(acts, PS_PORT_ACTIONS_SIZE(newnum)); + if (acts == NULL) + return ENOMEM; + + acts->pspa_alloc = newnum; + psattr->psa_ports = acts; + return 0; +} + +/* + * posix_spawn_destroyportactions_np + * Description: clean up portactions struct in posix_spawnattr_t attr + */ +static int +posix_spawn_destroyportactions_np(posix_spawnattr_t *attr) +{ + _posix_spawnattr_t psattr; + _posix_spawn_port_actions_t acts; + + if (attr == NULL || *attr == NULL) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + acts = psattr->psa_ports; + if (acts == NULL) + return EINVAL; + + free(acts); + return 0; +} + +/* + * posix_spawn_appendportaction_np + * Description: append a port action, grow the array if necessary + */ +static int +posix_spawn_appendportaction_np(posix_spawnattr_t *attr, _ps_port_action_t *act) +{ + _posix_spawnattr_t psattr; + _posix_spawn_port_actions_t acts; + + if (attr == NULL || *attr == NULL || act == NULL) { + return EINVAL; + } + + psattr = *(_posix_spawnattr_t *)attr; + acts = psattr->psa_ports; + + // Have any port actions been created yet? + if (acts == NULL) { + int err = posix_spawn_createportactions_np(attr); + if (err) { + return err; + } + acts = psattr->psa_ports; + } + + // Is there enough room? + if (acts->pspa_alloc == acts->pspa_count) { + int err = posix_spawn_growportactions_np(attr); + if (err) { + return err; + } + acts = psattr->psa_ports; + } + + // Add this action to next spot in array + acts->pspa_actions[acts->pspa_count] = *act; + acts->pspa_count++; + + return 0; +} + +/* + * posix_spawnattr_setspecialport_np + * + * Description: Set a new value for a mach special port in the spawned task. + * + * Parameters: attr The spawn attributes object for the + * new process + * new_port The new value for the special port + * which The particular port to be set + * (see task_set_special_port for details) + * + * Returns: 0 Success + * ENOMEM Couldn't allocate memory + */ +int +posix_spawnattr_setspecialport_np( + posix_spawnattr_t *attr, + mach_port_t new_port, + int which) +{ + _ps_port_action_t action = { + .port_type = PSPA_SPECIAL, + .new_port = new_port, + .which = which, + }; + return posix_spawn_appendportaction_np(attr, &action); +} + +/* + * posix_spawnattr_setexceptionports_np + * + * Description: Set a new port for a set of exception ports in the spawned task. + * + * Parameters: attr The spawn attributes object for the + * new process + * mask A bitfield indicating which exceptions + * to associate the port with + * new_port The new value for the exception port + * behavior The default behavior for the port + * flavor The default flavor for the port + * (see task_set_exception_ports) + * + * Returns: 0 Success + */ +int +posix_spawnattr_setexceptionports_np( + posix_spawnattr_t *attr, + exception_mask_t mask, + mach_port_t new_port, + exception_behavior_t behavior, + thread_state_flavor_t flavor) +{ + _ps_port_action_t action = { + .port_type = PSPA_EXCEPTION, + .mask = mask, + .new_port = new_port, + .behavior = behavior, + .flavor = flavor, + }; + return posix_spawn_appendportaction_np(attr, &action); +} + +/* + * posix_spawnattr_setauditsessionport_np + * + * Description: Set the audit session port rights attribute in the spawned task. + * This is used to securely set the audit session information for + * the new task. + * + * Parameters: attr The spawn attributes object for the + * new process + * au_sessionport The audit session send port right + * + * Returns: 0 Success + */ +int +posix_spawnattr_setauditsessionport_np( + posix_spawnattr_t *attr, + mach_port_t au_sessionport) +{ + _ps_port_action_t action = { + .port_type = PSPA_AU_SESSION, + .new_port = au_sessionport, + }; + return posix_spawn_appendportaction_np(attr, &action); +} + + +/* + * posix_spawn_file_actions_init + * + * Description: Initialize a spawn file actions object attr with default values + * + * Parameters: file_actions The spawn file actions object to be + * initialized + * + * Returns: 0 Success + * ENOMEM Insufficient memory exists to + * initialize the spawn file actions + * object. + * + * Note: As an implementation detail, the externally visibily type + * posix_spawn_file_actions_t is defined to be a void *, and + * initialization involves allocation of a memory object. + * Subsequent changes to the spawn file actions may result in + * reallocation under the covers. + * + * Reinitialization of an already initialized spawn file actions + * object will result in memory being leaked. Because spawn + * file actions are not required to be used in conjunction with a + * static initializer, there is no way to distinguish a spawn + * file actions with stack garbage from one that's been + * initialized. This is arguably an API design error. + */ +int +posix_spawn_file_actions_init(posix_spawn_file_actions_t *file_actions) +{ + _posix_spawn_file_actions_t *psactsp = (_posix_spawn_file_actions_t *)file_actions; + int err = 0; + + if ((*psactsp = (_posix_spawn_file_actions_t)malloc(PSF_ACTIONS_SIZE(PSF_ACTIONS_INIT_COUNT))) == NULL) { + err = ENOMEM; + } else { + (*psactsp)->psfa_act_alloc = PSF_ACTIONS_INIT_COUNT; + (*psactsp)->psfa_act_count = 0; + } + + return (err); +} + + +/* + * posix_spawn_file_actions_destroy + * + * Description: Destroy a spawn file actions object that was previously + * initialized via posix_spawn_file_actions_init() by freeing any + * memory associated with it and setting it to an invalid value. + * + * Parameters: attr The spawn file actions object to be + * destroyed. + * + * Returns: 0 Success + * + * Notes: The destroyed spawn file actions results in the void * pointer + * being set to NULL; subsequent use without reinitialization + * will result in explicit program failure (rather than merely + * "undefined behaviour"). + * + * NOTIMP: Allowed failures (checking NOT required): + * EINVAL The value specified by file_actions is invalid. + */ +int +posix_spawn_file_actions_destroy(posix_spawn_file_actions_t *file_actions) +{ + _posix_spawn_file_actions_t psacts; + + if (file_actions == NULL || *file_actions == NULL) + return EINVAL; + + psacts = *(_posix_spawn_file_actions_t *)file_actions; + free(psacts); + *file_actions = NULL; + + return (0); +} + + +/* + * _posix_spawn_file_actions_grow + * + * Description: Grow the available list of file actions associated with the + * pointer to the structure provided; replace the contents of the + * pointer as a side effect. + * + * Parameters: psactsp Pointer to _posix_spawn_file_actions_t + * to grow + * + * Returns: 0 Success + * ENOMEM Insufficient memory for operation + * + * Notes: This code is common to all posix_spawn_file_actions_*() + * functions, since we use a naieve data structure implementation + * at present. Future optimization will likely change this. + */ +static int +_posix_spawn_file_actions_grow(_posix_spawn_file_actions_t *psactsp) +{ + int new_alloc = (*psactsp)->psfa_act_alloc * 2; + _posix_spawn_file_actions_t new_psacts; + + /* + * XXX may want to impose an administrative limit here; POSIX does + * XXX not provide for an administrative error return in this case, + * XXX so it's probably acceptable to just fail catastrophically + * XXX instead of implementing one. + */ + if ((new_psacts = (_posix_spawn_file_actions_t)realloc((*psactsp), PSF_ACTIONS_SIZE(new_alloc))) == NULL) { + return (ENOMEM); + } + new_psacts->psfa_act_alloc = new_alloc; + *psactsp = new_psacts; + + return (0); +} + + +/* + * posix_spawn_file_actions_addopen + * + * Description: Add an open action to the object referenced by 'file_actions' + * that will cause the file named by 'path' to be attempted to be + * opened with flags 'oflag' and mode 'mode', and, if successful, + * return as descriptor 'filedes' to the spawned process. + * + * Parameters: file_actions File action object to augment + * filedes fd that open is to use + * path path to file to open + * oflag open file flags + * mode open file mode + * + * Returns: 0 Success + * EBADF The value specified by fildes is + * negative or greater than or equal to + * {OPEN_MAX}. + * ENOMEM Insufficient memory exists to add to + * the spawn file actions object. + * + * NOTIMP: Allowed failures (checking NOT required): + * EINVAL The value specified by file_actions is invalid. + */ +int +posix_spawn_file_actions_addopen( + posix_spawn_file_actions_t * __restrict file_actions, + int filedes, const char * __restrict path, int oflag, + mode_t mode) +{ + _posix_spawn_file_actions_t *psactsp; + _psfa_action_t *psfileact; + + if (file_actions == NULL || *file_actions == NULL) + return EINVAL; + + psactsp = (_posix_spawn_file_actions_t *)file_actions; + /* Range check; required by POSIX */ + if (filedes < 0 || filedes >= OPEN_MAX) + return (EBADF); + + /* If we do not have enough slots, grow the structure */ + if ((*psactsp)->psfa_act_count == (*psactsp)->psfa_act_alloc) { + /* need to grow file actions structure */ + if (_posix_spawn_file_actions_grow(psactsp)) + return (ENOMEM); + } + + /* + * Allocate next available slot and fill it out + */ + psfileact = &(*psactsp)->psfa_act_acts[(*psactsp)->psfa_act_count++]; + + psfileact->psfaa_type = PSFA_OPEN; + psfileact->psfaa_filedes = filedes; + psfileact->psfaa_openargs.psfao_oflag = oflag; + psfileact->psfaa_openargs.psfao_mode = mode; + strlcpy(psfileact->psfaa_openargs.psfao_path, path, PATH_MAX); + + return (0); +} + + +/* + * posix_spawn_file_actions_addclose + * + * Description: Add a close action to the object referenced by 'file_actions' + * that will cause the file referenced by 'filedes' to be + * attempted to be closed in the spawned process. + * + * Parameters: file_actions File action object to augment + * filedes fd to close + * + * Returns: 0 Success + * EBADF The value specified by fildes is + * negative or greater than or equal to + * {OPEN_MAX}. + * ENOMEM Insufficient memory exists to add to + * the spawn file actions object. + * + * NOTIMP: Allowed failures (checking NOT required): + * EINVAL The value specified by file_actions is invalid. + */ +int +posix_spawn_file_actions_addclose(posix_spawn_file_actions_t *file_actions, + int filedes) +{ + _posix_spawn_file_actions_t *psactsp; + _psfa_action_t *psfileact; + + if (file_actions == NULL || *file_actions == NULL) + return EINVAL; + + psactsp = (_posix_spawn_file_actions_t *)file_actions; + /* Range check; required by POSIX */ + if (filedes < 0 || filedes >= OPEN_MAX) + return (EBADF); + + /* If we do not have enough slots, grow the structure */ + if ((*psactsp)->psfa_act_count == (*psactsp)->psfa_act_alloc) { + /* need to grow file actions structure */ + if (_posix_spawn_file_actions_grow(psactsp)) + return (ENOMEM); + } + + /* + * Allocate next available slot and fill it out + */ + psfileact = &(*psactsp)->psfa_act_acts[(*psactsp)->psfa_act_count++]; + + psfileact->psfaa_type = PSFA_CLOSE; + psfileact->psfaa_filedes = filedes; + + return (0); +} + + +/* + * posix_spawn_file_actions_adddup2 + * + * Description: Add a dup2 action to the object referenced by 'file_actions' + * that will cause the file referenced by 'filedes' to be + * attempted to be dup2'ed to the descriptor 'newfiledes' in the + * spawned process. + * + * Parameters: file_actions File action object to augment + * filedes fd to dup2 + * newfiledes fd to dup2 it to + * + * Returns: 0 Success + * EBADF The value specified by either fildes + * or by newfiledes is negative or greater + * than or equal to {OPEN_MAX}. + * ENOMEM Insufficient memory exists to add to + * the spawn file actions object. + * + * NOTIMP: Allowed failures (checking NOT required): + * EINVAL The value specified by file_actions is invalid. + */ +int +posix_spawn_file_actions_adddup2(posix_spawn_file_actions_t *file_actions, + int filedes, int newfiledes) +{ + _posix_spawn_file_actions_t *psactsp; + _psfa_action_t *psfileact; + + if (file_actions == NULL || *file_actions == NULL) + return EINVAL; + + psactsp = (_posix_spawn_file_actions_t *)file_actions; + /* Range check; required by POSIX */ + if (filedes < 0 || filedes >= OPEN_MAX || + newfiledes < 0 || newfiledes >= OPEN_MAX) + return (EBADF); + + /* If we do not have enough slots, grow the structure */ + if ((*psactsp)->psfa_act_count == (*psactsp)->psfa_act_alloc) { + /* need to grow file actions structure */ + if (_posix_spawn_file_actions_grow(psactsp)) + return (ENOMEM); + } + + /* + * Allocate next available slot and fill it out + */ + psfileact = &(*psactsp)->psfa_act_acts[(*psactsp)->psfa_act_count++]; + + psfileact->psfaa_type = PSFA_DUP2; + psfileact->psfaa_filedes = filedes; + psfileact->psfaa_openargs.psfao_oflag = newfiledes; + + return (0); +} + +/* + * posix_spawn_file_actions_addinherit_np + * + * Description: Add the "inherit" action to the object referenced by + * 'file_actions' that will cause the file referenced by + * 'filedes' to continue to be available in the spawned + * process via the same descriptor. + * + * Inheritance is the normal default behaviour for + * file descriptors across exec and spawn; but if the + * POSIX_SPAWN_CLOEXEC_DEFAULT flag is set, the usual + * default is reversed for the purposes of the spawn + * invocation. Any pre-existing descriptors that + * need to be made available to the spawned process can + * be marked explicitly as 'inherit' via this interface. + * Otherwise they will be automatically closed. + * + * Note that any descriptors created via the other file + * actions interfaces are automatically marked as 'inherit'. + * + * Parameters: file_actions File action object to augment + * filedes fd to inherit. + * + * Returns: 0 Success + * EBADF The value specified by fildes is + * negative or greater than or equal to + * {OPEN_MAX}. + * ENOMEM Insufficient memory exists to add to + * the spawn file actions object. + * + * NOTIMP: Allowed failures (checking NOT required): + * EINVAL The value specified by file_actions is invalid. + */ +int +posix_spawn_file_actions_addinherit_np(posix_spawn_file_actions_t *file_actions, + int filedes) +{ + _posix_spawn_file_actions_t *psactsp; + _psfa_action_t *psfileact; + + if (file_actions == NULL || *file_actions == NULL) + return (EINVAL); + + psactsp = (_posix_spawn_file_actions_t *)file_actions; + /* Range check; required by POSIX */ + if (filedes < 0 || filedes >= OPEN_MAX) + return (EBADF); + +#if defined(POSIX_SPAWN_CLOEXEC_DEFAULT) // TODO: delete this check + /* If we do not have enough slots, grow the structure */ + if ((*psactsp)->psfa_act_count == (*psactsp)->psfa_act_alloc) { + /* need to grow file actions structure */ + if (_posix_spawn_file_actions_grow(psactsp)) + return (ENOMEM); + } + + /* + * Allocate next available slot and fill it out + */ + psfileact = &(*psactsp)->psfa_act_acts[(*psactsp)->psfa_act_count++]; + + psfileact->psfaa_type = PSFA_INHERIT; + psfileact->psfaa_filedes = filedes; +#endif + return (0); +} + +int +posix_spawnattr_setcpumonitor_default(posix_spawnattr_t * __restrict attr) +{ + return (posix_spawnattr_setcpumonitor(attr, PROC_POLICY_CPUMON_DEFAULTS, 0)); +} + +int +posix_spawnattr_setcpumonitor(posix_spawnattr_t * __restrict attr, + uint64_t percent, uint64_t interval) +{ + _posix_spawnattr_t psattr; + + if (attr == NULL || *attr == NULL) + return (EINVAL); + + psattr = *(_posix_spawnattr_t *)attr; + + psattr->psa_cpumonitor_percent = percent; + psattr->psa_cpumonitor_interval = interval; + + return (0); +} + +int +posix_spawnattr_getcpumonitor(posix_spawnattr_t * __restrict attr, + uint64_t *percent, uint64_t *interval) +{ + _posix_spawnattr_t psattr; + + if (attr == NULL || *attr == NULL) + return (EINVAL); + + psattr = *(_posix_spawnattr_t *)attr; + + *percent = psattr->psa_cpumonitor_percent; + *interval = psattr->psa_cpumonitor_interval; + + return (0); +} + +#if TARGET_OS_EMBEDDED +/* + * posix_spawnattr_setjetsam + * + * Description: Set jetsam attributes for the spawn attribute object + * referred to by 'attr'. + * + * Parameters: flags The flags value to set + * priority Relative jetsam priority + * high_water_mark Value in pages; resident page + * counts above this level can + * result in termination + * + * Returns: 0 Success + */ +int +posix_spawnattr_setjetsam(posix_spawnattr_t * __restrict attr, + short flags, int priority, int high_water_mark) +{ + _posix_spawnattr_t psattr; + + if (attr == NULL || *attr == NULL) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + + psattr->psa_jetsam_flags = flags; + psattr->psa_jetsam_flags |= POSIX_SPAWN_JETSAM_SET; + psattr->psa_priority = priority; + psattr->psa_high_water_mark = high_water_mark; + + return (0); +} +#endif + + +/* + * posix_spawnattr_set_importancewatch_port_np + * + * Description: Mark ports referred to by these rights + * to boost the new task instead of their current task + * for the spawn attribute object referred to by 'attr'. + * Ports must be valid at posix_spawn time. They will NOT be + * consumed by the kernel, so they must be deallocated after the spawn returns. + * (If you are SETEXEC-ing, they are cleaned up by the exec operation). + * + * The maximum number of watch ports allowed is defined by POSIX_SPAWN_IMPORTANCE_PORT_COUNT. + * + * Parameters: count Number of ports in portarray + * portarray Array of rights + * + * Returns: 0 Success + * EINVAL Bad port count + * ENOMEM Insufficient memory exists to add to + * the spawn port actions object. + */ +int +posix_spawnattr_set_importancewatch_port_np(posix_spawnattr_t * __restrict attr, + int count, mach_port_t portarray[]) +{ + int err = 0, i; + + if (count < 0 || count > POSIX_SPAWN_IMPORTANCE_PORT_COUNT) { + return EINVAL; + } + + for (i = 0; i < count; i++) { + _ps_port_action_t action = { + .port_type = PSPA_IMP_WATCHPORTS, + .new_port = portarray[i], + }; + int err = posix_spawn_appendportaction_np(attr, &action); + if (err) { + break; + } + } + return err; +} + + + +static +_ps_mac_policy_extension_t * +posix_spawnattr_macpolicyinfo_lookup(_posix_spawn_mac_policy_extensions_t psmx, const char *policyname) +{ + int i; + + if (psmx == NULL) + return NULL; + + for (i = 0; i < psmx->psmx_count; i++) { + _ps_mac_policy_extension_t *extension = &psmx->psmx_extensions[i]; + if (strcmp(extension->policyname, policyname) == 0) + return extension; + } + return NULL; +} + +int +posix_spawnattr_getmacpolicyinfo_np(const posix_spawnattr_t * __restrict attr, + const char *policyname, void **datap, size_t *datalenp) +{ + _posix_spawnattr_t psattr; + _ps_mac_policy_extension_t *extension; + + if (attr == NULL || *attr == NULL || policyname == NULL || datap == NULL) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + extension = posix_spawnattr_macpolicyinfo_lookup(psattr->psa_mac_extensions, policyname); + if (extension == NULL) + return ESRCH; + *datap = (void *)(uintptr_t)extension->data; + if (datalenp != NULL) + *datalenp = extension->datalen; + return 0; +} + +int +posix_spawnattr_setmacpolicyinfo_np(posix_spawnattr_t * __restrict attr, + const char *policyname, void *data, size_t datalen) +{ + _posix_spawnattr_t psattr; + _posix_spawn_mac_policy_extensions_t psmx; + _ps_mac_policy_extension_t *extension; + + if (attr == NULL || *attr == NULL || policyname == NULL) + return EINVAL; + + psattr = *(_posix_spawnattr_t *)attr; + psmx = psattr->psa_mac_extensions; + extension = posix_spawnattr_macpolicyinfo_lookup(psattr->psa_mac_extensions, policyname); + if (extension != NULL) { + extension->data = (uintptr_t)data; + extension->datalen = datalen; + return 0; + } + else if (psmx == NULL) { + psmx = psattr->psa_mac_extensions = malloc(PS_MAC_EXTENSIONS_SIZE(PS_MAC_EXTENSIONS_INIT_COUNT)); + if (psmx == NULL) + return ENOMEM; + psmx->psmx_alloc = PS_MAC_EXTENSIONS_INIT_COUNT; + psmx->psmx_count = 0; + } + else if (psmx->psmx_count == psmx->psmx_alloc) { + psmx = psattr->psa_mac_extensions = reallocf(psmx, PS_MAC_EXTENSIONS_SIZE(psmx->psmx_alloc * 2)); + if (psmx == NULL) + return ENOMEM; + psmx->psmx_alloc *= 2; + } + extension = &psmx->psmx_extensions[psmx->psmx_count]; + strlcpy(extension->policyname, policyname, sizeof(extension->policyname)); + extension->data = (uintptr_t)data; + extension->datalen = datalen; + psmx->psmx_count += 1; + return 0; +} + +/* + * posix_spawn + * + * Description: Create a new process from the process image corresponding to + * the supplied 'path' argument. + * + * Parameters: pid Pointer to pid_t to receive the + * PID of the spawned process, if + * successful and 'pid' != NULL + * path Path of image file to spawn + * file_actions spawn file actions object which + * describes file actions to be + * performed during the spawn + * attrp spawn attributes object which + * describes attributes to be + * applied during the spawn + * argv argument vector array; NULL + * terminated + * envp environment vector array; NULL + * terminated + * + * Returns: 0 Success + * !0 An errno value indicating the + * cause of the failure to spawn + * + * Notes: Unlike other system calls, the return value of this system + * call is expected to either be a 0 or an errno, rather than a + * 0 or a -1, with the 'errno' variable being set. + */ +extern int __posix_spawn(pid_t * __restrict, const char * __restrict, + struct _posix_spawn_args_desc *, + char *const argv[ __restrict], char *const envp[ __restrict]); + +int +posix_spawn(pid_t * __restrict pid, const char * __restrict path, + const posix_spawn_file_actions_t *file_actions, + const posix_spawnattr_t * __restrict attrp, + char *const argv[ __restrict], char *const envp[ __restrict]) +{ + int saveerrno = errno; + int ret; + /* + * Only do extra work if we have file actions or attributes to push + * down. We use a descriptor to push this information down, since we + * want to have size information, which will let us (1) preallocate a + * single chunk of memory for the copyin(), and (2) allow us to do a + * single copyin() per attributes or file actions as a monlithic block. + * + * Note: A future implementation may attempt to do the same + * thing for the argv/envp data, which could potentially + * result in a performance improvement due to increased + * kernel efficiency, even though it would mean copying + * the data in user space. + */ + if ((file_actions != NULL && (*file_actions != NULL) && (*(_posix_spawn_file_actions_t *)file_actions)->psfa_act_count > 0) || attrp != NULL) { + struct _posix_spawn_args_desc ad; + + memset(&ad, 0, sizeof(ad)); + if (attrp != NULL && *attrp != NULL) { + _posix_spawnattr_t psattr = *(_posix_spawnattr_t *)attrp; + ad.attr_size = sizeof(struct _posix_spawnattr); + ad.attrp = psattr; + + if (psattr->psa_ports != NULL) { + ad.port_actions = psattr->psa_ports; + ad.port_actions_size = PS_PORT_ACTIONS_SIZE( + ad.port_actions->pspa_count); + } + if (psattr->psa_mac_extensions != NULL) { + ad.mac_extensions = psattr->psa_mac_extensions; + ad.mac_extensions_size = PS_MAC_EXTENSIONS_SIZE( + ad.mac_extensions->psmx_count); + } + } + if (file_actions != NULL && *file_actions != NULL) { + _posix_spawn_file_actions_t psactsp = + *(_posix_spawn_file_actions_t *)file_actions; + + if (psactsp->psfa_act_count > 0) { + ad.file_actions_size = PSF_ACTIONS_SIZE(psactsp->psfa_act_count); + ad.file_actions = psactsp; + } + } + + ret = __posix_spawn(pid, path, &ad, argv, envp); + } else + ret = __posix_spawn(pid, path, NULL, argv, envp); + + if (ret < 0) + ret = errno; + errno = saveerrno; + return ret; +} + diff --git a/libsyscall/wrappers/spawn/spawn.h b/libsyscall/wrappers/spawn/spawn.h new file mode 100644 index 000000000..53b4ecb86 --- /dev/null +++ b/libsyscall/wrappers/spawn/spawn.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2006, 2010 Apple Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + + +#ifndef _SPAWN_H_ +#define _SPAWN_H_ + +/* + * [SPN] Support for _POSIX_SPAWN + */ + +#include +#include <_types.h> +#include /* shared types */ + +#include + +/* + * [SPN] Inclusion of the header may make visible symbols defined + * in the , , and headers. + */ +#include +#include +#include + +/* + * Opaque types for use with posix_spawn() family functions. Internals are + * not defined, and should not be accessed directly. Types are defined as + * mandated by POSIX. + */ +typedef void *posix_spawnattr_t; +typedef void *posix_spawn_file_actions_t; + +__BEGIN_DECLS +/* + * gcc under c99 mode won't compile "[ __restrict]" by itself. As a workaround, + * a dummy argument name is added. + */ +int posix_spawn(pid_t * __restrict, const char * __restrict, + const posix_spawn_file_actions_t *, + const posix_spawnattr_t * __restrict, + char *const __argv[ __restrict], + char *const __envp[ __restrict]) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int posix_spawnp(pid_t * __restrict, const char * __restrict, + const posix_spawn_file_actions_t *, + const posix_spawnattr_t * __restrict, + char *const __argv[ __restrict], + char *const __envp[ __restrict]) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int posix_spawn_file_actions_addclose(posix_spawn_file_actions_t *, int) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int posix_spawn_file_actions_adddup2(posix_spawn_file_actions_t *, int, + int) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int posix_spawn_file_actions_addopen( + posix_spawn_file_actions_t * __restrict, int, + const char * __restrict, int, mode_t) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int posix_spawn_file_actions_destroy(posix_spawn_file_actions_t *) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int posix_spawn_file_actions_init(posix_spawn_file_actions_t *) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int posix_spawnattr_destroy(posix_spawnattr_t *) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int posix_spawnattr_getsigdefault(const posix_spawnattr_t * __restrict, + sigset_t * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int posix_spawnattr_getflags(const posix_spawnattr_t * __restrict, + short * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int posix_spawnattr_getpgroup(const posix_spawnattr_t * __restrict, + pid_t * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int posix_spawnattr_getsigmask(const posix_spawnattr_t * __restrict, + sigset_t * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int posix_spawnattr_init(posix_spawnattr_t *) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int posix_spawnattr_setsigdefault(posix_spawnattr_t * __restrict, + const sigset_t * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int posix_spawnattr_setflags(posix_spawnattr_t *, short) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int posix_spawnattr_setpgroup(posix_spawnattr_t *, pid_t) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int posix_spawnattr_setsigmask(posix_spawnattr_t * __restrict, + const sigset_t * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + +#if 0 /* _POSIX_PRIORITY_SCHEDULING [PS] : not supported */ +int posix_spawnattr_setschedparam(posix_spawnattr_t * __restrict, + const struct sched_param * __restrict); +int posix_spawnattr_setschedpolicy(posix_spawnattr_t *, int); +int posix_spawnattr_getschedparam(const posix_spawnattr_t * __restrict, + struct sched_param * __restrict); +int posix_spawnattr_getschedpolicy(const posix_spawnattr_t * __restrict, + int * __restrict); +#endif /* 0 */ + +__END_DECLS + +#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE) +/* + * Darwin-specific extensions below + */ +#include +#include +#include + +#include + +__BEGIN_DECLS + +int posix_spawnattr_getbinpref_np(const posix_spawnattr_t * __restrict, + size_t, cpu_type_t *__restrict, size_t *__restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int posix_spawnattr_setauditsessionport_np(posix_spawnattr_t *__restrict, + mach_port_t) __OSX_AVAILABLE_STARTING(__MAC_10_6, __IPHONE_3_2); +int posix_spawnattr_setbinpref_np(posix_spawnattr_t * __restrict, + size_t, cpu_type_t *__restrict, size_t *__restrict) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int posix_spawnattr_setexceptionports_np(posix_spawnattr_t *__restrict, + exception_mask_t, mach_port_t, + exception_behavior_t, thread_state_flavor_t) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int posix_spawnattr_setspecialport_np(posix_spawnattr_t *__restrict, + mach_port_t, int) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); +int posix_spawn_file_actions_addinherit_np(posix_spawn_file_actions_t *, + int) __OSX_AVAILABLE_STARTING(__MAC_10_7, __IPHONE_4_3); + +__END_DECLS + +#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */ +#endif /* _SPAWN_H_ */ diff --git a/libsyscall/wrappers/spawn/spawn_private.h b/libsyscall/wrappers/spawn/spawn_private.h new file mode 100644 index 000000000..4dd89b859 --- /dev/null +++ b/libsyscall/wrappers/spawn/spawn_private.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2006, 2008 Apple,Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _SPAWN_PRIVATE_H_ +#define _SPAWN_PRIVATE_H_ + +#include +#include +#include +#include + +int posix_spawnattr_getpcontrol_np(const posix_spawnattr_t * __restrict, int * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_6, __IPHONE_3_2); +int posix_spawnattr_setpcontrol_np(posix_spawnattr_t *, const int) __OSX_AVAILABLE_STARTING(__MAC_10_6, __IPHONE_3_2); + +int posix_spawnattr_getprocesstype_np(const posix_spawnattr_t * __restrict, int * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_8, __IPHONE_6_0); +int posix_spawnattr_setprocesstype_np(posix_spawnattr_t *, const int) __OSX_AVAILABLE_STARTING(__MAC_10_8, __IPHONE_6_0); + +int posix_spawnattr_setcpumonitor(posix_spawnattr_t * __restrict, uint64_t, uint64_t) __OSX_AVAILABLE_STARTING(__MAC_10_8, __IPHONE_6_0); +int posix_spawnattr_getcpumonitor(posix_spawnattr_t * __restrict, uint64_t *, uint64_t *) __OSX_AVAILABLE_STARTING(__MAC_10_8, __IPHONE_6_0); +int posix_spawnattr_setcpumonitor_default(posix_spawnattr_t * __restrict) __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_6_0); + +#if TARGET_OS_EMBEDDED +int posix_spawnattr_setjetsam(posix_spawnattr_t * __restrict attr, + short flags, int priority, int high_water_mark) __OSX_AVAILABLE_STARTING(__MAC_NA, __IPHONE_5_0); +#endif + +#define POSIX_SPAWN_IMPORTANCE_PORT_COUNT 32 +int posix_spawnattr_set_importancewatch_port_np(posix_spawnattr_t * __restrict attr, + int count, mach_port_t portarray[]) __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_6_0); + +#define POSIX_SPAWN_MACPOLICYINFO_WITHSIZE 1 +int posix_spawnattr_getmacpolicyinfo_np(const posix_spawnattr_t * __restrict, const char *, void **, size_t *) __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0); +int posix_spawnattr_setmacpolicyinfo_np(posix_spawnattr_t * __restrict, const char *, void *, size_t) __OSX_AVAILABLE_STARTING(__MAC_10_9, __IPHONE_7_0); + +#endif /* !defined _SPAWN_PRIVATE_H_*/ diff --git a/libsyscall/wrappers/string/index.c b/libsyscall/wrappers/string/index.c new file mode 100644 index 000000000..8c4d3e05e --- /dev/null +++ b/libsyscall/wrappers/string/index.c @@ -0,0 +1,47 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "strings.h" + +__attribute__((visibility("hidden"))) +char * +index +(const char *p, int ch) +{ + char c; + + c = ch; + for (;; ++p) { + if (*p == c) + return ((char *)p); + if (*p == '\0') + return (NULL); + } + /* NOTREACHED */ +} diff --git a/libsyscall/wrappers/memcpy.c b/libsyscall/wrappers/string/memcpy.c similarity index 96% rename from libsyscall/wrappers/memcpy.c rename to libsyscall/wrappers/string/memcpy.c index 5bce5933a..ef30a90ba 100644 --- a/libsyscall/wrappers/memcpy.c +++ b/libsyscall/wrappers/string/memcpy.c @@ -47,7 +47,7 @@ typedef int word; /* "word" used for optimal copy speed */ * (the portable versions of) bcopy, memcpy, and memmove. */ -__private_extern__ +__attribute__((visibility("hidden"))) void * memcpy(void *dst0, const void *src0, size_t length) { char *dst = dst0; @@ -113,13 +113,15 @@ done: return (dst0); } -__private_extern__ void * +__attribute__((visibility("hidden"))) +void * memmove(void *s1, const void *s2, size_t n) { return memcpy(s1, s2, n); } -__private_extern__ void +__attribute__((visibility("hidden"))) +void bcopy(const void *s1, void *s2, size_t n) { memcpy(s2, s1, n); diff --git a/libsyscall/wrappers/string/memset.c b/libsyscall/wrappers/string/memset.c new file mode 100644 index 000000000..3ae3c7717 --- /dev/null +++ b/libsyscall/wrappers/string/memset.c @@ -0,0 +1,112 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Mike Hibler and Chris Torek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "strings.h" +#include + +#define wsize sizeof(u_int) +#define wmask (wsize - 1) + +__attribute__((visibility("hidden"))) +void +bzero(void *dst0, size_t length) +{ + return (void)memset(dst0, 0, length); +} + +#define RETURN return (dst0) +#define VAL c0 +#define WIDEVAL c + +__attribute__((visibility("hidden"))) +void * +memset(void *dst0, int c0, size_t length) +{ + size_t t; + u_int c; + u_char *dst; + + dst = dst0; + /* + * If not enough words, just fill bytes. A length >= 2 words + * guarantees that at least one of them is `complete' after + * any necessary alignment. For instance: + * + * |-----------|-----------|-----------| + * |00|01|02|03|04|05|06|07|08|09|0A|00| + * ^---------------------^ + * dst dst+length-1 + * + * but we use a minimum of 3 here since the overhead of the code + * to do word writes is substantial. + */ + if (length < 3 * wsize) { + while (length != 0) { + *dst++ = VAL; + --length; + } + RETURN; + } + + if ((c = (u_char)c0) != 0) { /* Fill the word. */ + c = (c << 8) | c; /* u_int is 16 bits. */ +#if UINT_MAX > 0xffff + c = (c << 16) | c; /* u_int is 32 bits. */ +#endif +#if UINT_MAX > 0xffffffff + c = (c << 32) | c; /* u_int is 64 bits. */ +#endif + } + /* Align destination by filling in bytes. */ + if ((t = (long)dst & wmask) != 0) { + t = wsize - t; + length -= t; + do { + *dst++ = VAL; + } while (--t != 0); + } + + /* Fill words. Length was >= 2*words so we know t >= 1 here. */ + t = length / wsize; + do { + *(u_int *)dst = WIDEVAL; + dst += wsize; + } while (--t != 0); + + /* Mop up trailing bytes, if any. */ + t = length & wmask; + if (t != 0) + do { + *dst++ = VAL; + } while (--t != 0); + RETURN; +} diff --git a/libsyscall/wrappers/string/strcmp.c b/libsyscall/wrappers/string/strcmp.c new file mode 100644 index 000000000..cffe07883 --- /dev/null +++ b/libsyscall/wrappers/string/strcmp.c @@ -0,0 +1,46 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Chris Torek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "string.h" + +/* + * Compare strings. + */ +__attribute__((visibility("hidden"))) +int +strcmp(const char *s1, const char *s2) +{ + while (*s1 == *s2++) + if (*s1++ == '\0') + return (0); + return (*(const unsigned char *)s1 - *(const unsigned char *)(s2 - 1)); +} diff --git a/libsyscall/wrappers/string/strcpy.c b/libsyscall/wrappers/string/strcpy.c new file mode 100644 index 000000000..026d098c7 --- /dev/null +++ b/libsyscall/wrappers/string/strcpy.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2011 Apple, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include "strings.h" + +__attribute__((visibility("hidden"))) +char * +strcpy(char * restrict dst, const char * restrict src) { + const size_t length = strlen(src); + // The stpcpy() and strcpy() functions copy the string src to dst + // (including the terminating '\0' character). + memcpy(dst, src, length+1); + // The strcpy() and strncpy() functions return dst. + return dst; +} diff --git a/bsd/libkern/locc.c b/libsyscall/wrappers/string/strings.h similarity index 74% rename from bsd/libkern/locc.c rename to libsyscall/wrappers/string/strings.h index 2f84e6976..540711260 100644 --- a/bsd/libkern/locc.c +++ b/libsyscall/wrappers/string/strings.h @@ -1,19 +1,14 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000, 2007, 2010 Apple Inc. All rights reserved. * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * @APPLE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER @@ -23,10 +18,10 @@ * Please see the License for the specific language governing rights and * limitations under the License. * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + * @APPLE_LICENSE_HEADER_END@ */ /*- - * Copyright (c) 1992, 1993 + * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -57,21 +52,28 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)locc.c 8.1 (Berkeley) 6/10/93 + * @(#)strings.h 8.1 (Berkeley) 6/2/93 */ -#include +#ifndef _STRINGS_H_ +#define _STRINGS_H_ + +#include <_types.h> +#include + +void *memmove(void *, const void *, size_t); +void *memset(void *, int, size_t); +int strcmp(const char *, const char *); +char *strcpy(char *, const char *); +size_t strlen(const char *); +size_t strlcpy(char *, const char *, size_t); +char *strsep(char **, const char *); + +void bcopy(const void *, void *, size_t); +void bzero(void *, size_t); +char *index(const char *, int); + +#include "string.h" -int -locc(mask0, cp0, size) - int mask0; - char *cp0; - u_int size; -{ - register u_char *cp, *end, mask; +#endif /* _STRINGS_H_ */ - mask = mask0; - cp = (u_char *)cp0; - for (end = &cp[size]; cp < end && *cp != mask; ++cp); - return (end - cp); -} diff --git a/libsyscall/wrappers/string/strlcpy.c b/libsyscall/wrappers/string/strlcpy.c new file mode 100644 index 000000000..72e4b2bd8 --- /dev/null +++ b/libsyscall/wrappers/string/strlcpy.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2011 Apple, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include "strings.h" + +__attribute__((visibility("hidden"))) +size_t +strlcpy(char * restrict dst, const char * restrict src, size_t maxlen) { + const size_t srclen = strlen(src); + if (srclen < maxlen) { + memcpy(dst, src, srclen+1); + } else if (maxlen != 0) { + memcpy(dst, src, maxlen-1); + dst[maxlen-1] = '\0'; + } + return srclen; +} diff --git a/libsyscall/wrappers/string/strlen.c b/libsyscall/wrappers/string/strlen.c new file mode 100644 index 000000000..6854e0343 --- /dev/null +++ b/libsyscall/wrappers/string/strlen.c @@ -0,0 +1,107 @@ +/*- + * Copyright (c) 2009 Xin LI + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "string.h" +#include + +/* + * Portable strlen() for 32-bit and 64-bit systems. + * + * Rationale: it is generally much more efficient to do word length + * operations and avoid branches on modern computer systems, as + * compared to byte-length operations with a lot of branches. + * + * The expression: + * + * ((x - 0x01....01) & ~x & 0x80....80) + * + * would evaluate to a non-zero value iff any of the bytes in the + * original word is zero. However, we can further reduce ~1/3 of + * time if we consider that strlen() usually operate on 7-bit ASCII + * by employing the following expression, which allows false positive + * when high bit of 1 and use the tail case to catch these case: + * + * ((x - 0x01....01) & 0x80....80) + * + * This is more than 5.2 times as fast as the raw implementation on + * Intel T7300 under long mode for strings longer than word length. + */ + +/* Magic numbers for the algorithm */ +#if LONG_BIT == 32 +static const unsigned long mask01 = 0x01010101; +static const unsigned long mask80 = 0x80808080; +#elif LONG_BIT == 64 +static const unsigned long mask01 = 0x0101010101010101; +static const unsigned long mask80 = 0x8080808080808080; +#else +#error Unsupported word size +#endif + +#define LONGPTR_MASK (sizeof(long) - 1) + +/* + * Helper macro to return string length if we caught the zero + * byte. + */ +#define testbyte(x) \ + do { \ + if (p[x] == '\0') \ + return (p - str + x); \ + } while (0) + +__attribute__((visibility("hidden"))) +size_t +strlen(const char *str) +{ + const char *p; + const unsigned long *lp; + + /* Skip the first few bytes until we have an aligned p */ + for (p = str; (uintptr_t)p & LONGPTR_MASK; p++) + if (*p == '\0') + return (p - str); + + /* Scan the rest of the string using word sized operation */ + for (lp = (const unsigned long *)p; ; lp++) + if ((*lp - mask01) & mask80) { + p = (const char *)(lp); + testbyte(0); + testbyte(1); + testbyte(2); + testbyte(3); +#if (LONG_BIT >= 64) + testbyte(4); + testbyte(5); + testbyte(6); + testbyte(7); +#endif + } + + /* NOTREACHED */ + return (0); +} + diff --git a/libsyscall/wrappers/string/strsep.c b/libsyscall/wrappers/string/strsep.c new file mode 100644 index 000000000..029d0f3c8 --- /dev/null +++ b/libsyscall/wrappers/string/strsep.c @@ -0,0 +1,69 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "string.h" + +/* + * Get next token from string *stringp, where tokens are possibly-empty + * strings separated by characters from delim. + * + * Writes NULs into the string at *stringp to end tokens. + * delim need not remain constant from call to call. + * On return, *stringp points past the last NUL written (if there might + * be further tokens), or is NULL (if there are definitely no more tokens). + * + * If *stringp is NULL, strsep returns NULL. + */ +__attribute__((visibility("hidden"))) +char * +strsep(char **stringp, const char *delim) +{ + char *s; + const char *spanp; + int c, sc; + char *tok; + + if ((s = *stringp) == NULL) + return (NULL); + for (tok = s;;) { + c = *s++; + spanp = delim; + do { + if ((sc = *spanp++) == c) { + if (c == 0) + s = NULL; + else + s[-1] = 0; + *stringp = s; + return (tok); + } + } while (sc != 0); + } + /* NOTREACHED */ +} diff --git a/libsyscall/wrappers/unix03/mmap.c b/libsyscall/wrappers/unix03/mmap.c index 60c6bba32..6d52fbe3a 100644 --- a/libsyscall/wrappers/unix03/mmap.c +++ b/libsyscall/wrappers/unix03/mmap.c @@ -28,6 +28,8 @@ #include #include #include +#include +#include "stack_logging_internal.h" void *__mmap(void *addr, size_t len, int prot, int flags, int fildes, off_t off); @@ -47,16 +49,28 @@ mmap(void *addr, size_t len, int prot, int flags, int fildes, off_t off) * o flags does not contain either MAP_PRIVATE or MAP_SHARED * o len is zero */ - extern void cthread_set_errno_self(int); + extern void cerror_nocancel(int); if ((off & PAGE_MASK) || (((flags & MAP_PRIVATE) != MAP_PRIVATE) && ((flags & MAP_SHARED) != MAP_SHARED)) || (len == 0)) { - cthread_set_errno_self(EINVAL); + cerror_nocancel(EINVAL); return(MAP_FAILED); } - return(__mmap(addr, len, prot, flags, fildes, off)); + void *ptr = __mmap(addr, len, prot, flags, fildes, off); + + if (__syscall_logger) { + int stackLoggingFlags = stack_logging_type_vm_allocate; + if (flags & MAP_ANON) { + stackLoggingFlags |= (fildes & VM_FLAGS_ALIAS_MASK); + } else { + stackLoggingFlags |= stack_logging_type_mapped_file_or_shared_mem; + } + __syscall_logger(stackLoggingFlags, (uintptr_t)mach_task_self(), (uintptr_t)len, 0, (uintptr_t)ptr, 0); + } + + return ptr; } #endif /* __DARWIN_UNIX03 */ diff --git a/iokit/Kernel/i386/IOSharedLock.s b/libsyscall/wrappers/unix03/munmap.c similarity index 57% rename from iokit/Kernel/i386/IOSharedLock.s rename to libsyscall/wrappers/unix03/munmap.c index 9360dce09..b2d1079a8 100644 --- a/iokit/Kernel/i386/IOSharedLock.s +++ b/libsyscall/wrappers/unix03/munmap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2010 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2012 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -21,44 +21,31 @@ * @APPLE_LICENSE_HEADER_END@ */ -#include +#include - TEXT - -/* - * void - * OSSpinLockUnlock(p) - * int *p; - * - * Unlock the lock pointed to by p. - */ - -LEAF(_OSSpinLockUnlock, 0) -LEAF(_IOSpinUnlock, 0) -LEAF(_ev_unlock, 0) - movl 4(%esp), %ecx - movl $0, (%ecx) -END(_OSSpinLockUnlock) +#if __DARWIN_UNIX03 +#include +#include +#include "stack_logging_internal.h" /* - * int - * OSSpinLockTry(p) - * int *p; + * munmap stub, for stack logging of VM allocations. * - * Try to lock p. Return zero if not successful. + * This is for UNIX03 only. */ +extern int __munmap(void *, size_t); + +int +munmap(void *addr, size_t len) +{ + int result = __munmap(addr, len); + + if (__syscall_logger) { + __syscall_logger(stack_logging_type_vm_deallocate, (uintptr_t)mach_task_self(), (uintptr_t)addr, len, 0, 0); + } + + return result; +} -LEAF(_OSSpinLockTry, 0) -LEAF(_IOTrySpinLock, 0) -LEAF(_ev_try_lock, 0) - movl 4(%esp), %ecx - xorl %eax, %eax - lock - cmpxchgl %ecx, (%ecx) - jne 1f - movl $1, %eax /* yes */ - ret -1: - xorl %eax, %eax /* no */ -END(_OSSpinLockTry) +#endif /* __DARWIN_UNIX03 */ diff --git a/libsyscall/xcodescripts/compile-syscalls.pl b/libsyscall/xcodescripts/compile-syscalls.pl index a75737992..f0c269132 100755 --- a/libsyscall/xcodescripts/compile-syscalls.pl +++ b/libsyscall/xcodescripts/compile-syscalls.pl @@ -62,6 +62,7 @@ chomp(my $CC = `xcrun -sdk "$ENV{'SDKROOT'}" -find cc`); my @CFLAGS = ( "-x assembler-with-cpp", "-c", + "-isysroot", $ENV{'SDKROOT'} || "/", "-I".$ENV{"SDKROOT"}."/System/Library/Frameworks/System.framework/PrivateHeaders", ); diff --git a/libsyscall/xcodescripts/create-syscalls.pl b/libsyscall/xcodescripts/create-syscalls.pl index 85c282f0b..ca2700866 100755 --- a/libsyscall/xcodescripts/create-syscalls.pl +++ b/libsyscall/xcodescripts/create-syscalls.pl @@ -1,6 +1,6 @@ #!/usr/bin/perl # -# Copyright (c) 2006 Apple Computer, Inc. All rights reserved. +# Copyright (c) 2006-2012 Apple Inc. All rights reserved. # # @APPLE_OSREFERENCE_LICENSE_HEADER_START@ # @@ -23,7 +23,7 @@ # ########################################################################## # -# % create-syscalls.pl syscalls.master custom-directory out-directory +# % create-syscalls.pl syscalls.master custom-directory platforms-directory platform-name out-directory # # This script fills the the out-directory with a Makefile.inc and *.s # files to create the double-underbar syscall stubs. It reads the @@ -61,7 +61,9 @@ my $OutDir; # size in bytes of known types (only used for i386) my %TypeBytes = ( 'au_asid_t' => 4, + 'associd_t' => 4, 'caddr_t' => 4, + 'connid_t' => 4, 'gid_t' => 4, 'id_t' => 4, 'idtype_t' => 4, @@ -89,6 +91,7 @@ my %TypeBytes = ( 'user_size_t' => 4, 'user_ssize_t' => 4, 'user_ulong_t' => 4, + 'uuid_t' => 4, ); # Moving towards storing all data in this hash, then we always know @@ -130,14 +133,15 @@ my %Symbols = ( # cancellable version of cerror. my @Cancelable = qw/ accept access aio_suspend - close connect + close connect connectx + disconnectx fcntl fdatasync fpathconf fstat fsync getlogin ioctl link lseek lstat msgrcv msgsnd msync open - pathconf poll posix_spawn pread pwrite + pathconf peeloff poll posix_spawn pread pwrite read readv recvfrom recvmsg rename __semwait_signal __sigwait select sem_wait semop sendmsg sendto sigsuspend stat symlink sync @@ -146,7 +150,7 @@ my @Cancelable = qw/ /; sub usage { - die "Usage: $MyName syscalls.master custom-directory platforms-directory out-directory\n"; + die "Usage: $MyName syscalls.master custom-directory platforms-directory platform-name out-directory\n"; } ########################################################################## @@ -323,6 +327,9 @@ sub writeStubForSymbol { print $f "#define __SYSCALL_32BIT_ARG_BYTES $$symbol{bytes}\n"; print $f "#include \"SYS.h\"\n\n"; if (scalar(@conditions)) { + printf $f "#ifndef SYS_%s\n", $$symbol{syscall}; + printf $f "#error \"SYS_%s not defined. The header files libsyscall is building against do not match syscalls.master.\"\n", $$symbol{syscall}; + printf $f "#endif\n\n"; my $nc = ($is_cancel{$$symbol{syscall}} ? "cerror" : "cerror_nocancel"); printf $f "#if " . join(" || ", @conditions) . "\n"; printf $f "__SYSCALL2(%s, %s, %d, %s)\n", $$symbol{asm_sym}, $$symbol{syscall}, $$symbol{nargs}, $nc; diff --git a/libsyscall/xcodescripts/mach_install_mig.sh b/libsyscall/xcodescripts/mach_install_mig.sh index 3f98c487d..ac66a4f2d 100755 --- a/libsyscall/xcodescripts/mach_install_mig.sh +++ b/libsyscall/xcodescripts/mach_install_mig.sh @@ -31,7 +31,7 @@ cd $OBJROOT # check if we're building for the simulator -if [ "${RC_ProjectName%_Sim}" != "${RC_ProjectName}" ] ; then +if [ "$PLATFORM_NAME" = "iphonesimulator" ] ; then DSTROOT="${DSTROOT}${SDKROOT}" fi @@ -42,8 +42,15 @@ MIG_DEFINES="-DLIBSYSCALL_INTERFACE" MIG_HEADER_DST="$DSTROOT/usr/include/mach" MIG_PRIVATE_HEADER_DST="$DSTROOT/usr/local/include/mach" SERVER_HEADER_DST="$DSTROOT/usr/include/servers" +MACH_HEADER_DST="$DSTROOT/usr/include/mach" + # from old Libsystem makefiles MACHINE_ARCH=`echo $ARCHS | cut -d' ' -f 1` +if [[ ( "$MACHINE_ARCH" = "x86_64" ) && `echo $ARCHS | wc -w` -gt 1 ]] +then + # MACHINE_ARCH needs to be a 32-bit arch to generate vm_map_internal.h correctly. + MACHINE_ARCH=`echo $ARCHS | cut -d' ' -f 2` +fi SRC="$SRCROOT/mach" MIG_INTERNAL_HEADER_DST="$DERIVED_SOURCES_DIR/mach" MIG_PRIVATE_DEFS_INCFLAGS="-I${SDKROOT}/System/Library/Frameworks/System.framework/PrivateHeaders" @@ -59,23 +66,21 @@ MIGS="clock.defs mach_port.defs processor.defs processor_set.defs + task.defs + thread_act.defs vm_map.defs" MIGS_PRIVATE="" MIGS_DUAL_PUBLIC_PRIVATE="" -if [[ "$PLATFORM" = "iPhoneOS" || "$RC_ProjectName" = "Libsyscall_headers_Sim" ]] +if [[ "$PLATFORM_NAME" = "iphoneos" || "$PLATFORM_NAME" = "iphonesimulator" ]] then MIGS_PRIVATE="mach_vm.defs" else MIGS+=" mach_vm.defs" fi - -MIGS_ARCH="thread_act.defs - task.defs" - MIGS_INTERNAL="mach_port.defs mach_vm.defs vm_map.defs" @@ -85,12 +90,26 @@ SERVER_HDRS="key_defs.h netname_defs.h nm_defs.h" +MACH_HDRS="mach.h + mach_error.h + mach_init.h + mach_interface.h + port_obj.h + sync.h + vm_task.h" + # install /usr/include/server headers mkdir -p $SERVER_HEADER_DST for hdr in $SERVER_HDRS; do install -o 0 -c -m 444 $SRC/servers/$hdr $SERVER_HEADER_DST done +# install /usr/include/mach headers +mkdir -p $MACH_HEADER_DST +for hdr in $MACH_HDRS; do + install -o 0 -c -m 444 $SRC/mach/$hdr $MACH_HEADER_DST +done + # special case because we only have one to do here $MIG -arch $MACHINE_ARCH -header "$SERVER_HEADER_DST/netname.h" $SRC/servers/netname.defs @@ -125,14 +144,3 @@ for mig in $MIGS_INTERNAL; do $MIG -arch $MACHINE_ARCH -cc $MIGCC -header "$MIG_INTERNAL_HEADER_DST/${MIG_NAME}_internal.h" $SRC/$mig done -ARCHS=`echo $ARCHS | sed -e 's/armv./arm/g'` -for arch in $ARCHS; do - MIG_ARCH_DST="$MIG_HEADER_DST/$arch" - - mkdir -p $MIG_ARCH_DST - - for mig in $MIGS_ARCH; do - MIG_NAME=`basename $mig .defs` - $MIG -arch $MACHINE_ARCH -cc $MIGCC -header "$MIG_ARCH_DST/$MIG_NAME.h" $MIG_DEFINES $SRC/$mig - done -done diff --git a/lldbmacros.py b/lldbmacros.py deleted file mode 100644 index 9278abb61..000000000 --- a/lldbmacros.py +++ /dev/null @@ -1,184 +0,0 @@ -import lldb -import re -import getopt - -# Note: This module will eventually contain loads of macros. So please bear with the Macro/EndMacro comments - - -# Global functions -def findGlobal(variable): - return lldb.target.FindGlobalVariables(variable, 0).GetValueAtIndex(0) - -def findGlobalValue(variable): - return findGlobal(variable).GetValue() - -def readMemberUnsigned(variable,member): - return variable.GetChildMemberWithName(member).GetValueAsUnsigned(0) - -def readMemberSigned(variable,member): - return variable.GetChildMemberWithName(member).GetValueAsSigned() - -def readMemberString(variable,member): - return str(variable.GetChildMemberWithName(member).GetSummary()).strip('"') - - - -class Output : - """ - An output handler for all command. Use Output.print to direct all output of macro via the handler. - Currently this provide capabilities - -o path/to/filename - The output of this command execution will be saved to file. Parser information or errors will - not be sent to file though. eg /tmp/output.txt - -s filter_string - the "filter_string" param is parsed to python regex expression and each line of output - will be printed/saved only if it matches the expression. - The command header will not be filtered in any case. - """ - STDOUT =1 - FILEOUT =2 - FILTER =False - - def __init__(self): - self.out = Output.STDOUT - self.fname=None - self.fhandle=None - self.FILTER=False - - def printString(self, s): - """ Handler for all commands output. By default just print to stdout """ - if self.FILTER and not self.reg.search(s): return - if self.out == Output.STDOUT: print s - elif self.out == Output.FILEOUT : self.fhandle.write(s+"\n") - - def printHeader(self,s): - if self.out == Output.STDOUT: print s - elif self.out == Output.FILEOUT: self.fhandle.write(s+"\n") - - def done(self): - """ closes any open files. report on any errors """ - if self.fhandle != None : - self.fhandle.close() - - def setOptions(self,args): - """ parse the arguments passed to the command - param : args => [] of (typically args.split()) - """ - opts=() - try: - opts,args = getopt.getopt(args,'o:s:',[]) - except getopt.GetoptError,err: - print str(err) - #continue with processing - for o,a in opts : - if o == "-o" and len(a) > 0: - self.fname=a.strip() - self.fhandle=open(self.fname,"w") - self.out = Output.FILEOUT - print "saving results in file ",str(a) - elif o == "-s" and len(a) > 0: - self.reg = re.compile(a.strip(),re.MULTILINE|re.DOTALL) - self.FILTER=True - print "showing results for regex:",a.strip() - else : - print "Error: unknown option ",o,a - - -# Inteface function for showallkexts command -def showallkexts_command(debugger, args, result, lldb_dict): - kext_summary_header = findGlobal("gLoadedKextSummaries") - result.Printf(_summarizeallkexts(kext_summary_header)) - return None - -# Interface function for loaded kext summary formatter -def showallkexts_summary(kext_summary_header, lldb_dict): - return "\n" + _summarizeallkexts(kext_summary_header) - -# Internal function for walking kext summaries -def _summarizeallkexts(kext_summary_header): - summary = "ID Address Size Version Name\n" - summaries = kext_summary_header.GetChildMemberWithName("summaries") - count = int(kext_summary_header.GetChildMemberWithName("numSummaries").GetValue()) - for i in range(0, count): - summary += summaries.GetChildAtIndex(i, lldb.eNoDynamicValues, True).GetSummary() + "\n" - return summary - -# Macro: memstats -def memstats_command(debugger,args,result,lldb_dict): - stream = Output() - stream.setOptions(args.split()) - memstats(stream) - stream.done() - -def memstats(ostream): - ostream.printString ( "kern_memorystatus_level: {0}".format(findGlobalValue("kern_memorystatus_level")) ) - ostream.printString ( "vm_page_throttled_count: {0}".format(findGlobalValue("vm_page_throttled_count")) ) - ostream.printString ( "vm_page_active_count: {0}".format(findGlobalValue("vm_page_active_count")) ) - ostream.printString ( "vm_page_inactive_count: {0}".format(findGlobalValue("vm_page_inactive_count")) ) - ostream.printString ( "vm_page_wire_count: {0}".format(findGlobalValue("vm_page_wire_count")) ) - ostream.printString ( "vm_page_free_count: {0}".format(findGlobalValue("vm_page_free_count")) ) - ostream.printString ( "vm_page_purgeable_count: {0}".format(findGlobalValue("vm_page_purgeable_count")) ) - ostream.printString ( "vm_page_inactive_target: {0}".format(findGlobalValue("vm_page_inactive_target")) ) - ostream.printString ( "vm_page_free_target: {0}".format(findGlobalValue("vm_page_free_target")) ) - ostream.printString ( "insue_ptepages_count: {0}".format(findGlobalValue("inuse_ptepages_count")) ) - ostream.printString ( "vm_page_free_reserved: {0}".format(findGlobalValue("vm_page_free_reserved")) ) -# EndMacro: memstats - - -# Macro: zprint -def zprint_command(debugger,args,result,lldb_dict): - stream = Output() - stream.setOptions(args.split()) - _zprint(stream) - stream.done() - -def _zprint(ostream): - """Display info about memory zones""" - ostream.printHeader ( "{0: ^20s} {1: >5s} {2: >12s} {3: >12s} {4: >7s} {5: >8s} {6: >9s} {7: >8s} {8: <20s} {9} ".format('ZONE', 'COUNT', 'TOT_SZ', 'MAX_SZ', 'ELT_SZ', 'ALLOC_SZ', 'TOT_ALLOC', 'TOT_FREE', 'NAME','') ) - format_string = '{0: >#020x} {1: >5d} {2: >12d} {3: >12d} {4: >7d} {5: >8d} {6: >9d} {7: >8d} {8: <20s} {9}' - zone_ptr = findGlobal("first_zone"); - - while zone_ptr.GetValueAsUnsigned() != 0 : - addr = zone_ptr.GetValueAsUnsigned() - count = readMemberUnsigned(zone_ptr, "count") - cur_size = readMemberUnsigned(zone_ptr, "cur_size") - max_size = readMemberUnsigned(zone_ptr, "max_size") - elem_size = readMemberUnsigned(zone_ptr, "elem_size") - alloc_size = readMemberUnsigned(zone_ptr, "alloc_size") - num_allocs = readMemberUnsigned(zone_ptr, "num_allocs") - num_frees = readMemberUnsigned(zone_ptr, "num_frees") - name = str(readMemberString(zone_ptr, "zone_name")) - markings="" - if str(zone_ptr.GetChildMemberWithName("exhaustible").GetValue()) == '1' : markings+="H" - if str(zone_ptr.GetChildMemberWithName("collectable").GetValue()) == '1' : markings+="C" - if str(zone_ptr.GetChildMemberWithName("expandable").GetValue()) == '1' : markings+="X" - if str(zone_ptr.GetChildMemberWithName("noencrypt").GetValue()) == '1' : markings+="$" - - ostream.printString(format_string.format(addr, count, cur_size, max_size, elem_size, alloc_size, num_allocs, num_frees, name, markings)) - - zone_ptr = zone_ptr.GetChildMemberWithName("next_zone") - return None -# EndMacro: zprint - - -# Macro: showioalloc -def showioalloc_command(debugger,args,result,lldb_dict): - stream = Output() - stream.setOptions(args.split()) - _showioalloc(stream) - stream.done() - -def _showioalloc(ostream): - ivars_size = findGlobal("debug_ivars_size").GetValueAsUnsigned() - container_malloc_size = findGlobal("debug_container_malloc_size").GetValueAsUnsigned() - iomalloc_size = findGlobal("debug_iomalloc_size").GetValueAsUnsigned() - iomallocpageable_size = findGlobal("debug_iomallocpageable_size").GetValueAsUnsigned() - - ostream.printString("Instance allocation = {0:#0x} = {1:d} K".format(ivars_size, (int)(ivars_size/1024))) - ostream.printString("Container allocation = {0:#0x} = {1:d} K".format(container_malloc_size,(int)(container_malloc_size/1024))) - ostream.printString("IOMalloc allocation = {0:#0x} = {1:d} K".format(iomalloc_size,(int)(iomalloc_size/1024))) - ostream.printString("Pageable allocation = {0:#0x} = {1:d} K".format(iomallocpageable_size,(int)(iomallocpageable_size/1024))) - return None -# EndMacro: showioalloc - - diff --git a/makedefs/MakeInc.cmd b/makedefs/MakeInc.cmd index 12f5203b1..e69b52e0c 100644 --- a/makedefs/MakeInc.cmd +++ b/makedefs/MakeInc.cmd @@ -1,3 +1,12 @@ +# -*- mode: makefile;-*- +# +# Copyright (C) 1999-2012 Apple Inc. All rights reserved. +# +# MakeInc.cmd contains command paths for use during +# the build, as well as make fragments and text +# strings that may be evaluated as utility functions. +# + # # Commands for the build environment # @@ -18,58 +27,63 @@ _vstdout = > /dev/null endif ifeq ($(VERBOSE),YES) - XCRUN = /usr/bin/xcrun -verbose -log + XCRUN = /usr/bin/xcrun -verbose else XCRUN = /usr/bin/xcrun endif SDKROOT ?= / -HOST_SDKROOT ?= / +HOST_SDKROOT ?= macosx HOST_SPARSE_SDKROOT ?= / # SDKROOT may be passed as a shorthand like "iphoneos.internal". We # must resolve these to a full path and override SDKROOT. ifeq ($(SDKROOT_RESOLVED),) -ifeq ($(SDKROOT),/) -export SDKROOT_RESOLVED := / -else -export SDKROOT_RESOLVED := $(shell xcodebuild -sdk $(SDKROOT) -version Path | head -1) +export SDKROOT_RESOLVED := $(shell $(XCRUN) -sdk $(SDKROOT) -show-sdk-path) +ifeq ($(strip $(SDKROOT)_$(SDKROOT_RESOLVED)),/_) +export SDKROOT_RESOLVED := / endif endif override SDKROOT = $(SDKROOT_RESOLVED) +ifeq ($(HOST_SDKROOT_RESOLVED),) +export HOST_SDKROOT_RESOLVED := $(shell $(XCRUN) -sdk $(HOST_SDKROOT) -show-sdk-path) +endif +override HOST_SDKROOT = $(HOST_SDKROOT_RESOLVED) + ifeq ($(PLATFORM),) - export PLATFORM := $(shell xcodebuild -sdk $(SDKROOT) -version PlatformPath | head -1 | sed 's,^.*/\([^/]*\)\.platform$$,\1,') + export PLATFORMPATH := $(shell $(XCRUN) -sdk $(SDKROOT) -show-sdk-platform-path) + export PLATFORM := $(shell echo $(PLATFORMPATH) | sed 's,^.*/\([^/]*\)\.platform$$,\1,') ifeq ($(PLATFORM),) export PLATFORM := MacOSX endif endif +ifeq ($(SDKVERSION),) + export SDKVERSION := $(shell $(XCRUN) -sdk $(SDKROOT) -show-sdk-version) +endif + ifeq ($(PLATFORM),iPhoneOS) - DEVELOPER_DIR ?= $(shell xcode-select -print-path) - export HOST_SPARSE_SDKROOT := $(DEVELOPER_DIR)/SDKs/iPhoneHostSideTools.sparse.sdk + ifeq ($(HOST_SPARSE_SDKROOT),/) + export HOST_SPARSE_SDKROOT := $(shell $(XCRUN) -sdk iphonehost.internal -show-sdk-path) + endif endif # CC/CXX get defined by make(1) by default, so we can't check them # against the empty string to see if they haven't been set ifeq ($(origin CC),default) -ifneq ($(findstring iPhone,$(PLATFORM)),) export CC := $(shell $(XCRUN) -sdk $(SDKROOT) -find clang) -else - export CC := $(shell $(XCRUN) -sdk $(SDKROOT) -find clang) -endif endif ifeq ($(origin CXX),default) -ifneq ($(findstring iPhone,$(PLATFORM)),) export CXX := $(shell $(XCRUN) -sdk $(SDKROOT) -find clang++) -else - export CXX := $(shell $(XCRUN) -sdk $(SDKROOT) -find clang++) -endif endif ifeq ($(MIG),) export MIG := $(shell $(XCRUN) -sdk $(SDKROOT) -find mig) endif +ifeq ($(MIGCOM),) + export MIGCOM := $(shell $(XCRUN) -sdk $(SDKROOT) -find migcom) +endif ifeq ($(MIGCC),) export MIGCC := $(CC) endif @@ -97,32 +111,31 @@ endif ifeq ($(CTFMERGE),) export CTFMERGE := $(shell $(XCRUN) -sdk $(SDKROOT) -find ctfmerge) endif -ifeq ($(CTFSCRUB),) - export CTFSCRUB := $(shell $(XCRUN) -sdk $(SDKROOT) -find ctfdump) -r +ifeq ($(CTFINSERT),) + export CTFINSERT := $(shell $(XCRUN) -sdk $(SDKROOT) -find ctf_insert) endif ifeq ($(NMEDIT),) export NMEDIT := $(shell $(XCRUN) -sdk $(SDKROOT) -find nmedit) endif # Platform-specific tools -ifneq ($(findstring iPhone,$(PRODUCT)),) +ifeq (iPhoneOS,$(PLATFORM)) ifeq ($(EMBEDDED_DEVICE_MAP),) export EMBEDDED_DEVICE_MAP := $(shell $(XCRUN) -sdk $(SDKROOT) -find embedded_device_map) endif -ifeq ($(IPHONEOS_OPTIMIZE),) - export IPHONEOS_OPTIMIZE := $(shell $(XCRUN) -sdk $(SDKROOT) -find iphoneos-optimize) -endif +EDM_DBPATH = $(PLATFORMPATH)/usr/local/standalone/firmware/device_map.db endif # Scripts or tools we build ourselves -SEG_HACK := $(OBJROOT)/SETUP/setsegname/setsegname -KEXT_CREATE_SYMBOL_SET := $(OBJROOT)/SETUP/kextsymboltool/kextsymboltool -DECOMMENT := $(OBJROOT)/SETUP/decomment/decomment +SEG_HACK = $(OBJROOT)/SETUP/setsegname/setsegname +KEXT_CREATE_SYMBOL_SET = $(OBJROOT)/SETUP/kextsymboltool/kextsymboltool +DECOMMENT = $(OBJROOT)/SETUP/decomment/decomment NEWVERS = $(SRCROOT)/config/newvers.pl -MD := $(OBJROOT)/SETUP/md/md +INSTALL = $(OBJROOT)/SETUP/installfile/installfile # Standard BSD tools RM = /bin/rm -f +RMDIR = /bin/rmdir CP = /bin/cp MV = /bin/mv LN = /bin/ln -fs @@ -130,29 +143,25 @@ CAT = /bin/cat MKDIR = /bin/mkdir -p FIND = /usr/bin/find XARGS = /usr/bin/xargs -INSTALL = /usr/bin/install TAR = /usr/bin/gnutar BASENAME = /usr/bin/basename +DIRNAME = /usr/bin/dirname TR = /usr/bin/tr - -# Platform-specific tools -ifeq (iPhoneOS,$(PLATFORM)) -ifeq ($(EMBEDDED_DEVICE_MAP),) - export EMBEDDED_DEVICE_MAP := $(shell $(XCRUN) -sdk $(SDKROOT) -find embedded_device_map || echo /usr/bin/true) -endif -ifeq ($(IPHONEOS_OPTIMIZE),) - export IPHONEOS_OPTIMIZE := $(shell $(XCRUN) -sdk $(SDKROOT) -find iphoneos-optimize || echo /usr/bin/true) -endif -endif - -CTFINSERT = $(XCRUN) -sdk $(SDKROOT) ctf_insert +TOUCH = /usr/bin/touch +AWK = /usr/bin/awk +SED = /usr/bin/sed +ECHO = /bin/echo +PLUTIL = /usr/bin/plutil # # Command to generate host binaries. Intentionally not # $(CC), which controls the target compiler # +ifeq ($(HOST_OS_VERSION),) + export HOST_OS_VERSION := $(shell sw_vers -productVersion) +endif ifeq ($(HOST_CC),) - export HOST_CC := $(shell $(XCRUN) -sdk $(HOST_SDKROOT) -find cc) + export HOST_CC := $(shell $(XCRUN) -sdk $(HOST_SDKROOT) -find clang) endif ifeq ($(HOST_FLEX),) export HOST_FLEX := $(shell $(XCRUN) -sdk $(HOST_SDKROOT) -find flex) @@ -160,8 +169,101 @@ endif ifeq ($(HOST_BISON),) export HOST_BISON := $(shell $(XCRUN) -sdk $(HOST_SDKROOT) -find bison) endif +ifeq ($(HOST_GM4),) + export HOST_GM4 := $(shell $(XCRUN) -sdk $(HOST_SDKROOT) -find gm4) +endif ifeq ($(HOST_CODESIGN),) - export HOST_CODESIGN := $(shell $(XCRUN) -sdk $(HOST_SDKROOT) -find codesign) + export HOST_CODESIGN := /usr/bin/codesign +endif +ifeq ($(HOST_CODESIGN_ALLOCATE),) + export HOST_CODESIGN_ALLOCATE := $(shell $(XCRUN) -sdk $(HOST_SDKROOT) -find codesign_allocate) endif +# +# The following variables are functions invoked with "call", and thus +# behave similarly to externally compiled commands +# + +# $(1) is an expanded kernel config from a TARGET_CONFIGS_UC tuple +# $(2) is an expanded arch config from a TARGET_CONFIGS_UC tuple +# $(3) is an expanded machine config from a TARGET_CONFIGS_UC tuple +_function_create_build_configs_join = $(strip $(1))^$(strip $(2))^$(strip $(3)) + +# $(1) is an un-expanded kernel config from a TARGET_CONFIGS_UC tuple +# $(2) is an un-expanded arch config from a TARGET_CONFIGS_UC tuple +# $(3) is an un-expanded machine config from a TARGET_CONFIGS_UC tuple +_function_create_build_configs_do_expand = $(call _function_create_build_configs_join, \ + $(if $(filter DEFAULT,$(1)), \ + $(DEFAULT_KERNEL_CONFIG), \ + $(1) \ + ), \ + $(if $(filter DEFAULT,$(2)), \ + $(DEFAULT_ARCH_CONFIG), \ + $(2) \ + ), \ + $(if $(filter DEFAULT,$(3)), \ + $(if $(filter DEFAULT,$(2)), \ + $(DEFAULT_$(DEFAULT_ARCH_CONFIG)_MACHINE_CONFIG), \ + $(DEFAULT_$(strip $(2))_MACHINE_CONFIG) \ + ), \ + $(3) \ + ) \ + ) + +# $(1) is an un-expanded TARGET_CONFIGS_UC list, which must be consumed +# 3 elements at a time +function_create_build_configs = $(sort \ + $(strip \ + $(call _function_create_build_configs_do_expand, \ + $(word 1,$(1)), \ + $(word 2,$(1)), \ + $(word 3,$(1)), \ + ) \ + $(if $(word 4,$(1)), \ + $(call function_create_build_configs, \ + $(wordlist 4,$(words $(1)),$(1)) \ + ), \ + \ + ) \ + ) \ + ) + +# $(1) is a fully-expanded kernel config +# $(2) is a fully-expanded arch config +# $(3) is a fully-expanded machine config. "NONE" is not represented in the objdir path +function_convert_target_config_uc_to_objdir = $(if $(filter NONE,$(3)),$(strip $(1))_$(strip $(2)),$(strip $(1))_$(strip $(2))_$(strip $(3))) + +# $(1) is a fully-expanded build config (like "RELEASE^X86_64^NONE") +function_convert_build_config_to_objdir = $(call function_convert_target_config_uc_to_objdir, \ + $(word 1,$(subst ^, ,$(1))), \ + $(word 2,$(subst ^, ,$(1))), \ + $(word 3,$(subst ^, ,$(1))) \ + ) + +# $(1) is a fully-expanded build config (like "RELEASE^X86_64^NONE") +function_extract_kernel_config_from_build_config = $(word 1,$(subst ^, ,$(1))) +function_extract_arch_config_from_build_config = $(word 2,$(subst ^, ,$(1))) +function_extract_machine_config_from_build_config = $(word 3,$(subst ^, ,$(1))) + +# $(1) is an input word +# $(2) is a list of colon-separate potential substitutions like "FOO:BAR BAZ:QUX" +# $(3) is a fallback if no substitutions were made +function_substitute_word_with_replacement = $(strip $(if $(2), \ + $(if $(filter $(word 1,$(subst :, ,$(word 1,$(2)))),$(1)), \ + $(word 2,$(subst :, ,$(word 1,$(2)))), \ + $(call function_substitute_word_with_replacement,$(1),$(wordlist 2,$(words $(2)),$(2)),$(3))), \ + $(3) \ + ) \ + ) + +# You can't assign a variable to an empty space without these +# shenanigans +empty := +space := $(empty) $(empty) + +# Arithmetic +# $(1) is the number to increment +NUM16 = x x x x x x x x x x x x x x x x +increment = $(words x $(wordlist 1,$(1),$(NUM16))) + # vim: set ft=make: diff --git a/makedefs/MakeInc.def b/makedefs/MakeInc.def index 4e49fe6a7..910cc0e46 100644 --- a/makedefs/MakeInc.def +++ b/makedefs/MakeInc.def @@ -1,243 +1,84 @@ -export SOURCE=$(shell /bin/pwd) -export RELATIVE_SOURCE_PATH ?= . - -# -# gnumake 3.77 support +# -*- mode: makefile;-*- # -export USE_APPLE_PB_SUPPORT = all - +# Copyright (C) 1999-2012 Apple Inc. All rights reserved. # -# Incremental Build option -# -ifndef INCR_EXPORTHDRS -ifeq ($(shell test -d $$OBJROOT/EXPORT_HDRS;echo $$?),0) -export INCR_EXPORTHDRS = TRUE -else -export INCR_EXPORTHDRS = FALSE -endif -endif - - -# -# Component List -# -ifndef COMPONENT_LIST -export COMPONENT_LIST = osfmk bsd libkern iokit pexpert libsa security -export COMPONENT_LIST_UC := $(shell printf "%s" "$(COMPONENT_LIST)" | $(TR) a-z A-Z) -endif -ifndef COMPONENT -export COMPONENT := $(firstword $(subst /, ,$(RELATIVE_SOURCE_PATH))) -export COMPONENT_IMPORT_LIST := $(filter-out $(COMPONENT),$(COMPONENT_LIST)) -else -ifeq ($(COMPONENT), .) -export COMPONENT := $(if $(word 2,$(subst /, ,$(RELATIVE_SOURCE_PATH))),$(word 2,$(subst /, ,$(RELATIVE_SOURCE_PATH))),$(firstword $(subst /, ,$(RELATIVE_SOURCE_PATH)))) -export COMPONENT_IMPORT_LIST := $(filter-out $(COMPONENT),$(COMPONENT_LIST)) -endif -endif - -# Architecture options +# MakeInc.def contains global definitions for building, +# linking, and installing files. # -ifndef SUPPORTED_ARCH_CONFIGS -export SUPPORTED_ARCH_CONFIGS = I386 X86_64 -endif - -ifndef ARCH_CONFIGS -ifdef RC_ARCHS -export ARCH_CONFIGS := $(shell printf "%s" "$(RC_ARCHS)" | $(TR) a-z A-Z | sed -E 's/ARMV[0-9][A-Z]?/ARM/g') -else -ifeq ($(PLATFORM),iPhoneOS) - export ARCH_CONFIGS := ARM -else - export ARCH_CONFIGS := $(shell arch | $(TR) a-z A-Z | sed -E 's/ARMV[0-9][A-Z]?/ARM/g') -endif -endif -endif -ifdef ARCH_CONFIG -ifndef ARCH_CONFIG_LC -export ARCH_CONFIG_LC := $(shell printf "%s" "$(ARCH_CONFIG)" | $(TR) A-Z a-z) -endif -endif # -# Platform options +# Architecture Configuration options # -ifndef SUPPORTED_PLATFORMS -export SUPPORTED_PLATFORMS = MacOSX iPhoneOS iPhoneSimulator -endif - -# PLATFORM is set earlier in MakeInc.cmd, closer to where decisions about -# platform tools are made +SUPPORTED_ARCH_CONFIGS := X86_64 # # Kernel Configuration options # -ifndef SUPPORTED_KERNEL_CONFIGS -export SUPPORTED_KERNEL_CONFIGS = RELEASE DEVELOPMENT DEBUG PROFILE -endif - -ifndef DEFAULT_KERNEL_CONFIG -ifeq ($(RC_ProjectName),xnu_debug) -export DEFAULT_KERNEL_CONFIG = DEBUG -else -export DEFAULT_KERNEL_CONFIG = RELEASE -endif -endif - -# If KERNEL_CONFIGS is specified it should override KERNEL_CONFIG. -# If KERNEL_CONFIG is specified it will override the default. Will quit with -# error if more than one config is specified. -# If DEFAULT_KERNEL_CONFIG is not specified then it will be built RELEASE. -ifndef KERNEL_CONFIGS - ifndef KERNEL_CONFIG - export KERNEL_CONFIGS = $(DEFAULT_KERNEL_CONFIG) - else - export KERNEL_CONFIGS = $(KERNEL_CONFIG) - endif -endif - -ifndef KERNEL_CONFIG -export KERNEL_CONFIG = $(firstword $(KERNEL_CONFIGS)) -endif - -ifneq ($(words $(KERNEL_CONFIG)), 1) -$(error There were $(words $(KERNEL_CONFIG)) parameters passed to KERNEL_CONFIG = $(KERNEL_CONFG). \ - Are you sure? To specify multiple configurations please use KERNEL_CONFIGS) -endif - -ifndef MACHINE_CONFIG -export MACHINE_CONFIG = DEFAULT -endif - +SUPPORTED_KERNEL_CONFIGS = RELEASE DEVELOPMENT DEBUG PROFILE # # Machine Configuration options # -export SUPPORTED_I386_MACHINE_CONFIGS := DEFAULT -export SUPPORTED_X86_64_MACHINE_CONFIGS := DEFAULT + +SUPPORTED_X86_64_MACHINE_CONFIGS = NONE # -# Target configuration options. NOTE - target configurations will -# override ARCH_CONFIGS and KERNEL_CONFIGS. -# -# Target configs come in groups of three parameters. The first is the -# kernel configuration, the second is the architecture configuration, -# and the third is the machine configuration. You may pass in as -# many groups of configurations as you wish. Each item passed in is -# separated by whitespace. -# -# Example: -# TARGET_CONFIGS="release x86_64 default debug i386 default release arm S5l8920X" -# Parameters may be in upper or lower case (they are converted to upper). -# -# "default" parameter is a special case. It means use the default value for -# that parameter. Here are the default values for each configuration: -# -# default kernel configuration = DEFAULT_KERNEL_CONFIG -# default architecture configuration = system architecture where you are running make. -# default machine configuration for i386 = none at this time. -# default machine configuration for x86_64 = none at this time. -# default machine configuration for arm = "S5L8920X". +# Platform options # -ifndef TARGET_CONFIGS_UC -ifdef TARGET_CONFIGS - export TARGET_CONFIGS_UC := $(strip $(shell printf "%s" "$(TARGET_CONFIGS)" | $(TR) a-z A-Z)) - export MACHINE_CONFIG = $(word 3, $(TARGET_CONFIGS_UC)) - export DEFAULT_KERNEL_CONFIG = $(word 1, $(TARGET_CONFIGS_UC)) -else - - ifneq ($(filter %_embedded,$(MAKECMDGOALS)),) -# generate set of standard embedded configs - export TARGET_CONFIGS = $(TARGET_CONFIGS_EMBEDDED) - else ifneq ($(filter %_devicemap,$(MAKECMDGOALS)),) - DEVICEMAP_PLATFORMS = $(shell $(EMBEDDED_DEVICE_MAP) -query SELECT DISTINCT Platform FROM Targets | $(TR) [:lower:] [:upper:]) - export TARGET_CONFIGS = $(foreach my_kernel_config,$(KERNEL_CONFIGS_EMBEDDED),$(foreach my_arch,$(ARCH_CONFIGS),$(foreach my_machine_config,$(filter $(DEVICEMAP_PLATFORMS),$(SUPPORTED_$(my_arch)_MACHINE_CONFIGS)),$(my_kernel_config) $(my_arch) $(my_machine_config) ))) - else -# generate TARGET_CONFIGS using KERNEL_CONFIGS and ARCH_CONFIGS and MACHINE_CONFIG (which defaults to "DEFAULT") - export TARGET_CONFIGS = $(strip $(foreach my_arch_config, $(ARCH_CONFIGS), $(foreach my_kern_config, $(KERNEL_CONFIGS), $(my_kern_config) $(my_arch_config) $(MACHINE_CONFIG)))) - endif - export TARGET_CONFIGS_UC := $(shell printf "%s" "$(TARGET_CONFIGS)" | $(TR) a-z A-Z) - export MACHINE_CONFIG = $(word 3, $(TARGET_CONFIGS_UC)) - export DEFAULT_KERNEL_CONFIG = $(word 1, $(TARGET_CONFIGS_UC)) -endif -endif - -export MACHINE_CONFIG_LC := $(shell printf "%s" "$(MACHINE_CONFIG)" | $(TR) A-Z a-z) -export KERNEL_CONFIG_LC := $(shell printf "%s" "$(KERNEL_CONFIG)" | $(TR) A-Z a-z) +SUPPORTED_PLATFORMS = MacOSX iPhoneOS iPhoneSimulator # -# Validate configuration options +# Setup up *_LC variables during recursive invocations # -ifneq ($(ARCH_CONFIG),) -ifeq ($(filter $(ARCH_CONFIG),$(SUPPORTED_ARCH_CONFIGS)),) -$(error Unsupported ARCH_CONFIG $(ARCH_CONFIG)) -endif -endif -ifneq ($(KERNEL_CONFIG),) -ifeq ($(filter $(KERNEL_CONFIG),$(SUPPORTED_KERNEL_CONFIGS)),) -$(error Unsupported KERNEL_CONFIG $(KERNEL_CONFIG)) -endif +ifndef CURRENT_ARCH_CONFIG_LC + export CURRENT_ARCH_CONFIG_LC := $(shell printf "%s" "$(CURRENT_ARCH_CONFIG)" | $(TR) A-Z a-z) endif -ifneq ($(MACHINE_CONFIG),) -ifneq ($(ARCH_CONFIG),) -ifeq ($(filter $(MACHINE_CONFIG),$(SUPPORTED_$(ARCH_CONFIG)_MACHINE_CONFIGS)),) -$(error Unsupported MACHINE_CONFIG $(MACHINE_CONFIG)) -endif -endif +ifndef CURRENT_KERNEL_CONFIG_LC + export CURRENT_KERNEL_CONFIG_LC := $(shell printf "%s" "$(CURRENT_KERNEL_CONFIG)" | $(TR) A-Z a-z) endif -ifneq ($(PLATFORM),) -ifeq ($(filter $(PLATFORM),$(SUPPORTED_PLATFORMS)),) -$(error Unsupported PLATFORM $(PLATFORM)) -endif +ifndef CURRENT_MACHINE_CONFIG_LC + export CURRENT_MACHINE_CONFIG_LC := $(shell printf "%s" "$(CURRENT_MACHINE_CONFIG)" | $(TR) A-Z a-z) endif + # -# Kernel Configuration to install -# -# supported install architecture : I386 X86_64 +# Component List # -export INSTALL_TYPE = $(DEFAULT_KERNEL_CONFIG) - -ifndef INSTALL_ARCHS -export INSTALL_ARCHS = $(strip $(foreach my_config, $(SUPPORTED_ARCH_CONFIGS), $(filter $(TARGET_CONFIGS_UC),$(my_config)))) -export INSTALL_ARCHS_LC := $(shell printf "%s" "$(INSTALL_ARCHS)" | $(TR) A-Z a-z) -endif +COMPONENT_LIST = osfmk bsd libkern iokit pexpert libsa security +COMPONENT = $(if $(word 2,$(subst /, ,$(RELATIVE_SOURCE_PATH))),$(word 2,$(subst /, ,$(RELATIVE_SOURCE_PATH))),$(firstword $(subst /, ,$(RELATIVE_SOURCE_PATH)))) +COMPONENT_IMPORT_LIST = $(filter-out $(COMPONENT),$(COMPONENT_LIST)) -export INSTALL_ARCH_DEFAULT = $(firstword $(INSTALL_ARCHS)) -ifeq ($(INSTALL_ARCH_DEFAULT),) -$(error Could not determine INSTALL_ARCH_DEFAULT) -endif # # Deployment target flag # -ifndef DEPLOYMENT_TARGET_FLAGS -SDKVERSION=$(shell xcodebuild -sdk $(SDKROOT) -version SDKVersion | head -1) ifeq ($(PLATFORM),MacOSX) - export DEPLOYMENT_TARGET_FLAGS := -mmacosx-version-min=$(SDKVERSION) + DEPLOYMENT_TARGET_FLAGS = -mmacosx-version-min=$(SDKVERSION) else ifeq ($(PLATFORM),iPhoneOS) - export DEPLOYMENT_TARGET_FLAGS := -miphoneos-version-min=$(SDKVERSION) + DEPLOYMENT_TARGET_FLAGS = -miphoneos-version-min=$(SDKVERSION) else ifeq ($(PLATFORM),iPhoneSimulator) - export DEPLOYMENT_TARGET_FLAGS := + DEPLOYMENT_TARGET_FLAGS = else - export DEPLOYMENT_TARGET_FLAGS := -endif + DEPLOYMENT_TARGET_FLAGS = endif + # # Standard defines list # -export DEFINES = -DAPPLE -DKERNEL -DKERNEL_PRIVATE -DXNU_KERNEL_PRIVATE \ - -DPRIVATE -D__MACHO__=1 -Dvolatile=__volatile $(IDENT) +DEFINES = -DAPPLE -DKERNEL -DKERNEL_PRIVATE -DXNU_KERNEL_PRIVATE \ + -DPRIVATE -D__MACHO__=1 -Dvolatile=__volatile $(IDENT) \ + $(SEED_DEFINES) # # Compiler command # -KCC := $(CC) -KC++ := $(CXX) +KCC = $(CC) +KC++ = $(CXX) # # Compiler warning flags @@ -249,9 +90,6 @@ CWARNFLAGS_STD = \ -Wwrite-strings -Wswitch -Wshadow -Wcast-align -Wchar-subscripts \ -Winline -Wnested-externs -Wredundant-decls -Wextra-tokens -# Certain warnings are non-fatal (8474835) -CWARNFLAGS_STD += -Wno-error=cast-align - # Can be overridden in Makefile.template or Makefile.$arch export CWARNFLAGS ?= $(CWARNFLAGS_STD) @@ -264,8 +102,8 @@ CXXWARNFLAGS_STD = \ -Wcast-qual -Wwrite-strings -Wswitch -Wcast-align -Wchar-subscripts \ -Wredundant-decls -Wextra-tokens -# Certain warnings are non-fatal (8474835, 9000888) -CXXWARNFLAGS_STD += -Wno-error=cast-align -Wno-error=overloaded-virtual +# overloaded-virtual warnings are non-fatal (9000888) +CXXWARNFLAGS_STD += -Wno-error=overloaded-virtual # Can be overridden in Makefile.template or Makefile.$arch export CXXWARNFLAGS ?= $(CXXWARNFLAGS_STD) @@ -274,17 +112,9 @@ define add_perfile_cxxflags $(1)_CXXWARNFLAGS_ADD += $2 endef -# -# Setup for parallel sub-makes based on 2 times number of logical CPUs -# -ifndef MAKEJOBS -export MAKEJOBS = --jobs=$(shell expr `/usr/sbin//sysctl -n hw.logicalcpu` \* 2) -endif - # # Default ARCH_FLAGS, for use with compiler/linker/assembler/mig drivers -ARCH_FLAGS_I386 = -arch i386 ARCH_FLAGS_X86_64 = -arch x86_64 @@ -292,73 +122,56 @@ ARCH_FLAGS_X86_64 = -arch x86_64 # Default CFLAGS # ifdef RC_CFLAGS -export OTHER_CFLAGS = $(subst $(addprefix -arch ,$(RC_ARCHS)),,$(RC_CFLAGS)) +OTHER_CFLAGS = $(subst $(addprefix -arch ,$(RC_ARCHS)),,$(RC_CFLAGS)) endif -export DSYMRESDIR = ./Contents/Resources/ -export DSYMBUILDDIR = ./Contents/Resources/DWARF/ +# +# Debug info +# +DSYMKERNELSYSDIR = mach_kernel.sys.dSYM +DSYMINFODIR = Contents +DSYMKGMACROSDIR = Contents/Resources +DSYMLLDBMACROSDIR = Contents/Resources/Python +DSYMDWARFDIR = Contents/Resources/DWARF + +DEBUG_CFLAGS := -gdwarf-2 +BUILD_DSYM := 1 # # We must not use -fno-keep-inline-functions, or it will remove the dtrace # probes from the kernel. # -export CFLAGS_GEN = $(DEBUG_CFLAGS) -nostdinc \ +CFLAGS_GEN = $(DEBUG_CFLAGS) -nostdinc \ -freorder-blocks -fno-builtin -fno-common \ -fsigned-bitfields $(OTHER_CFLAGS) -ifeq ($(BUILD_STABS),1) -export CFLAGS_GEN += -gstabs+ -export BUILD_DWARF = 0 -export BUILD_STABS = 1 -else -export CFLAGS_GEN += -gdwarf-2 -export BUILD_DWARF = 1 -export BUILD_STABS = 0 -endif - -export CFLAGS_RELEASE = -export CFLAGS_DEVELOPMENT = -export CFLAGS_DEBUG = -fstack-protector-all -export CFLAGS_PROFILE = -pg +CFLAGS_RELEASE = +CFLAGS_DEVELOPMENT = +CFLAGS_DEBUG = +CFLAGS_PROFILE = -pg -export CFLAGS_I386 = -static -Di386 -DI386 -D__I386__ \ - -DPAGE_SIZE_FIXED -msoft-float \ - -integrated-as -export CFLAGS_X86_64 = -Dx86_64 -DX86_64 -D__X86_64__ -DLP64 \ +CFLAGS_X86_64 = -Dx86_64 -DX86_64 -D__X86_64__ -DLP64 \ -DPAGE_SIZE_FIXED -mkernel -msoft-float \ - -integrated-as - - -ifeq (-arch armv7,$(ARCH_FLAGS_ARM)) -CFLAGS_ARM += -mthumb -endif -ifeq (-arch armv6,$(ARCH_FLAGS_ARM)) -CFLAGS_ARM += -mthumb -endif + -fno-limit-debug-info # Workaround for 11076603 -export CFLAGS_RELEASEI386 = -O2 -export CFLAGS_DEVELOPMENTI386 = -O2 -export CFLAGS_DEBUGI386 = -O0 -export CFLAGS_PROFILEI386 = -O2 - -export CFLAGS_RELEASEX86_64 = -O2 -export CFLAGS_DEVELOPMENTX86_64 = -O2 +CFLAGS_RELEASEX86_64 = -O2 +CFLAGS_DEVELOPMENTX86_64 = -O2 # No space optimization for the DEBUG kernel for the benefit of gdb: -export CFLAGS_DEBUGX86_64 = -O0 -export CFLAGS_PROFILEX86_64 = -O2 - -export CFLAGS_RELEASEARM = -O2 -export CFLAGS_DEVELOPMENTARM = -O2 -export CFLAGS_DEBUGARM = -O0 -export CFLAGS_PROFILEARM = -O2 - -export CFLAGS = $(CFLAGS_GEN) \ - $($(addsuffix $(MACHINE_CONFIG),MACHINE_FLAGS_)) \ - $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_)) \ - $($(addsuffix $(ARCH_CONFIG),CFLAGS_)) \ - $($(addsuffix $(KERNEL_CONFIG),CFLAGS_)) \ - $($(addsuffix $(ARCH_CONFIG), $(addsuffix $(KERNEL_CONFIG),CFLAGS_))) \ +CFLAGS_DEBUGX86_64 = -O0 +CFLAGS_PROFILEX86_64 = -O2 + +CFLAGS_RELEASEARM = -O2 +CFLAGS_DEVELOPMENTARM = -O2 +CFLAGS_DEBUGARM = -O0 +CFLAGS_PROFILEARM = -O2 + +CFLAGS = $(CFLAGS_GEN) \ + $($(addsuffix $(CURRENT_MACHINE_CONFIG),MACHINE_FLAGS_$(CURRENT_ARCH_CONFIG)_)) \ + $($(addsuffix $(CURRENT_ARCH_CONFIG),ARCH_FLAGS_)) \ + $($(addsuffix $(CURRENT_ARCH_CONFIG),CFLAGS_)) \ + $($(addsuffix $(CURRENT_KERNEL_CONFIG),CFLAGS_)) \ + $($(addsuffix $(CURRENT_ARCH_CONFIG), $(addsuffix $(CURRENT_KERNEL_CONFIG),CFLAGS_))) \ $(DEPLOYMENT_TARGET_FLAGS) \ $(DEFINES) @@ -370,9 +183,13 @@ OTHER_CXXFLAGS = CXXFLAGS_GEN = -fapple-kext $(OTHER_CXXFLAGS) +# For the moment, do not use gnu++11 +#CXXFLAGS_ARM = -std=gnu++11 + CXXFLAGS = $(CXXFLAGS_GEN) \ - $($(addsuffix $(ARCH_CONFIG),CXXFLAGS_)) \ - $($(addsuffix $(KERNEL_CONFIG),CXXFLAGS_)) + $($(addsuffix $(CURRENT_ARCH_CONFIG),CXXFLAGS_)) \ + $($(addsuffix $(CURRENT_KERNEL_CONFIG),CXXFLAGS_)) + # # Assembler command @@ -383,26 +200,23 @@ S_KCC = $(CC) # # Default SFLAGS # -export SFLAGS_GEN = -D__ASSEMBLER__ $(OTHER_CFLAGS) +SFLAGS_GEN = -D__ASSEMBLER__ $(OTHER_CFLAGS) -export SFLAGS_RELEASE = -export SFLAGS_DEVELOPMENT = -export SFLAGS_DEBUG = -export SFLAGS_PROFILE = +SFLAGS_RELEASE = +SFLAGS_DEVELOPMENT = +SFLAGS_DEBUG = +SFLAGS_PROFILE = -export SFLAGS_I386 = $(CFLAGS_I386) -export SFLAGS_X86_64 = $(CFLAGS_X86_64) +SFLAGS_X86_64 = $(CFLAGS_X86_64) - -export SFLAGS = $(SFLAGS_GEN) \ - $($(addsuffix $(MACHINE_CONFIG),MACHINE_FLAGS_)) \ - $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_)) \ - $($(addsuffix $(ARCH_CONFIG),SFLAGS_)) \ - $($(addsuffix $(KERNEL_CONFIG),SFLAGS_)) \ +SFLAGS = $(SFLAGS_GEN) \ + $($(addsuffix $(CURRENT_MACHINE_CONFIG),MACHINE_FLAGS_$(CURRENT_ARCH_CONFIG)_)) \ + $($(addsuffix $(CURRENT_ARCH_CONFIG),ARCH_FLAGS_)) \ + $($(addsuffix $(CURRENT_ARCH_CONFIG),SFLAGS_)) \ + $($(addsuffix $(CURRENT_KERNEL_CONFIG),SFLAGS_)) \ $(DEPLOYMENT_TARGET_FLAGS) \ $(DEFINES) - # # Linker command # @@ -411,12 +225,15 @@ LD = $(KC++) -nostdlib # # Default LDFLAGS # -export LDFLAGS_KERNEL_GEN = \ +# Availability of DWARF allows DTrace CTF (compressed type format) to be constructed. +# ctf_insert creates the CTF section. It needs reserved padding in the +# headers for the load command segment and the CTF section structures. +# +LDFLAGS_KERNEL_GEN = \ -nostdlib \ -fapple-kext \ -Wl,-e,__start \ -Wl,-sectalign,__TEXT,__text,0x1000 \ - -Wl,-sectalign,__TEXT,initcode,0x1000 \ -Wl,-sectalign,__DATA,__common,0x1000 \ -Wl,-sectalign,__DATA,__bss,0x1000 \ -Wl,-sectcreate,__PRELINK_TEXT,__text,/dev/null \ @@ -426,49 +243,30 @@ export LDFLAGS_KERNEL_GEN = \ -Wl,-new_linker \ -Wl,-pagezero_size,0x0 \ -Wl,-version_load_command \ - -Wl,-function_starts + -Wl,-function_starts \ + -Wl,-headerpad,152 -# Availability of DWARF allows DTrace CTF (compressed type format) to be constructed. -# ctf_insert creates the CTF section. It needs reserved padding in the -# headers for the load command segment and the CTF section structures. -ifeq ($(BUILD_DWARF),1) -export LDFLAGS_KERNEL_GEN += \ - -Wl,-headerpad,152 -endif - -export LDFLAGS_KERNEL_RELEASE = -export LDFLAGS_KERNEL_DEVELOPMENT = -export LDFLAGS_KERNEL_DEBUG = -export LDFLAGS_KERNEL_PROFILE = - -export LDFLAGS_KERNEL_RELEASEI386 = \ - -Wl,-segaddr,__INITPT,0x00100000 \ - -Wl,-segaddr,__INITGDT,0x00106000 \ - -Wl,-segaddr,__SLEEP,0x00107000 \ - -Wl,-segaddr,__HIB,0x00108000 \ - -Wl,-image_base,0x200000 \ - -Wl,-seg_page_size,__TEXT,0x200000 - -export LDFLAGS_KERNEL_DEBUGI386 = $(LDFLAGS_KERNEL_RELEASEI386) -export LDFLAGS_KERNEL_DEVELOPMENTI386 = $(LDFLAGS_KERNEL_RELEASEI386) -export LDFLAGS_KERNEL_PROFILEI386 = $(LDFLAGS_KERNEL_RELEASEI386) +LDFLAGS_KERNEL_RELEASE = +LDFLAGS_KERNEL_DEVELOPMENT = +LDFLAGS_KERNEL_DEBUG = +LDFLAGS_KERNEL_PROFILE = # KASLR static slide config: ifndef SLIDE SLIDE=0x00 endif -KERNEL_MIN_ADDRESS := 0xffffff8000000000 -KERNEL_BASE_OFFSET := 0x100000 -KERNEL_STATIC_SLIDE := $(shell printf "0x%016x" \ - $$[ $(SLIDE) << 21 ]) -KERNEL_STATIC_BASE := $(shell printf "0x%016x" \ - $$[ $(KERNEL_MIN_ADDRESS) + $(KERNEL_BASE_OFFSET) ]) -KERNEL_HIB_SECTION_BASE := $(shell printf "0x%016x" \ - $$[ $(KERNEL_STATIC_BASE) + $(KERNEL_STATIC_SLIDE) ]) -KERNEL_TEXT_BASE := $(shell printf "0x%016x" \ - $$[ $(KERNEL_HIB_SECTION_BASE) + 0x100000 ]) - -export LDFLAGS_KERNEL_RELEASEX86_64 = \ +KERNEL_MIN_ADDRESS = 0xffffff8000000000 +KERNEL_BASE_OFFSET = 0x100000 +KERNEL_STATIC_SLIDE = $(shell printf "0x%016x" \ + $$[ $(SLIDE) << 21 ]) +KERNEL_STATIC_BASE = $(shell printf "0x%016x" \ + $$[ $(KERNEL_MIN_ADDRESS) + $(KERNEL_BASE_OFFSET) ]) +KERNEL_HIB_SECTION_BASE = $(shell printf "0x%016x" \ + $$[ $(KERNEL_STATIC_BASE) + $(KERNEL_STATIC_SLIDE) ]) +KERNEL_TEXT_BASE = $(shell printf "0x%016x" \ + $$[ $(KERNEL_HIB_SECTION_BASE) + 0x100000 ]) + +LDFLAGS_KERNEL_RELEASEX86_64 = \ -Wl,-pie \ -Wl,-segaddr,__HIB,$(KERNEL_HIB_SECTION_BASE) \ -Wl,-image_base,$(KERNEL_TEXT_BASE) \ @@ -482,112 +280,103 @@ export LDFLAGS_KERNEL_RELEASEX86_64 = \ -Wl,-sectalign,__HIB,__const,0x1000 \ -Wl,-sectalign,__HIB,__bss,0x1000 \ -Wl,-sectalign,__HIB,__common,0x1000 \ + $(LDFLAGS_NOSTRIP_FLAG) # Define KERNEL_BASE_OFFSET so known at compile time: -export CFLAGS_X86_64 += -DKERNEL_BASE_OFFSET=$(KERNEL_BASE_OFFSET) - -export LDFLAGS_KERNEL_DEBUGX86_64 = $(LDFLAGS_KERNEL_RELEASEX86_64) -export LDFLAGS_KERNEL_DEVELOPMENTX86_64 = $(LDFLAGS_KERNEL_RELEASEX86_64) -export LDFLAGS_KERNEL_PROFILEX86_64 = $(LDFLAGS_KERNEL_RELEASEX86_64) +CFLAGS_X86_64 += -DKERNEL_BASE_OFFSET=$(KERNEL_BASE_OFFSET) -export LDFLAGS_KERNEL_RELEASEARM = \ - -Wl,-pie \ - -Wl,-static \ - -Wl,-image_base,0x80001000 \ - -Wl,-exported_symbols_list,$(TARGET)/kernel-kpi.exp - -export LDFLAGS_KERNEL_DEVELOPMENTARM = \ - -Wl,-pie \ - -Wl,-static \ - -Wl,-image_base,0x80001000 +LDFLAGS_KERNEL_DEBUGX86_64 = $(LDFLAGS_KERNEL_RELEASEX86_64) +LDFLAGS_KERNEL_DEVELOPMENTX86_64 = $(LDFLAGS_KERNEL_RELEASEX86_64) +LDFLAGS_KERNEL_PROFILEX86_64 = $(LDFLAGS_KERNEL_RELEASEX86_64) -export LDFLAGS_KERNEL_DEBUGARM = $(LDFLAGS_KERNEL_DEVELOPMENTARM) -# Offset image base by page to have iBoot load kernel TEXT correctly. -# First page is used for various purposes : sleep token, reset vector. - -export LDFLAGS_KERNEL = $(LDFLAGS_KERNEL_GEN) \ - $($(addsuffix $(MACHINE_CONFIG),MACHINE_FLAGS_)) \ - $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_)) \ - $($(addsuffix $(ARCH_CONFIG),LDFLAGS_KERNEL_)) \ - $($(addsuffix $(KERNEL_CONFIG),LDFLAGS_KERNEL_)) \ - $($(addsuffix $(ARCH_CONFIG), $(addsuffix $(KERNEL_CONFIG),LDFLAGS_KERNEL_))) \ +LDFLAGS_KERNEL = $(LDFLAGS_KERNEL_GEN) \ + $($(addsuffix $(CURRENT_ARCH_CONFIG),ARCH_FLAGS_)) \ + $($(addsuffix $(CURRENT_ARCH_CONFIG),LDFLAGS_KERNEL_)) \ + $($(addsuffix $(CURRENT_KERNEL_CONFIG),LDFLAGS_KERNEL_)) \ + $($(addsuffix $(CURRENT_ARCH_CONFIG), $(addsuffix $(CURRENT_KERNEL_CONFIG),LDFLAGS_KERNEL_))) \ $(DEPLOYMENT_TARGET_FLAGS) # # Default runtime libraries to be linked with the kernel # -export LD_KERNEL_LIBS = -lcc_kext +LD_KERNEL_LIBS = -lcc_kext +# +# DTrace support +# +ifeq ($(CURRENT_KERNEL_CONFIG),RELEASE) +ifneq ($(filter ARM%,$(CURRENT_ARCH_CONFIG)),) +DO_CTFCONVERT = 0 +DO_CTFMERGE = 0 +DO_CTFMACHO = 0 +else +DO_CTFCONVERT = $(SUPPORTS_CTFCONVERT) +DO_CTFMERGE = 1 +DO_CTFMACHO = $(NEEDS_CTF_MACHOS) +endif +else +DO_CTFCONVERT = $(SUPPORTS_CTFCONVERT) +DO_CTFMERGE = 1 +DO_CTFMACHO = $(NEEDS_CTF_MACHOS) +endif # # Default INCFLAGS # -export INCFLAGS_IMPORT = $(patsubst %, -I$(OBJROOT)/EXPORT_HDRS/%, $(COMPONENT_IMPORT_LIST)) -export INCFLAGS_EXTERN = -I$(OBJROOT)/EXTERN_HDRS -I$(SRCROOT)/EXTERNAL_HEADERS -I$(SRCROOT)/EXTERNAL_HEADERS/bsd -export INCFLAGS_GEN = -I$(SRCROOT)/$(COMPONENT) -I$(OBJROOT)/EXPORT_HDRS/$(COMPONENT) -export INCFLAGS_POSIX = -I$(OBJROOT)/EXPORT_HDRS/bsd -export INCFLAGS_LOCAL = -I. +INCFLAGS_IMPORT = $(patsubst %, -I$(OBJROOT)/EXPORT_HDRS/%, $(COMPONENT_IMPORT_LIST)) +INCFLAGS_EXTERN = -I$(SRCROOT)/EXTERNAL_HEADERS +INCFLAGS_GEN = -I$(SRCROOT)/$(COMPONENT) -I$(OBJROOT)/EXPORT_HDRS/$(COMPONENT) +INCFLAGS_LOCAL = -I. -export INCFLAGS = $(INCFLAGS_LOCAL) $(INCFLAGS_GEN) $(INCFLAGS_IMPORT) $(INCFLAGS_EXTERN) $(INCFLAGS_MAKEFILE) +INCFLAGS = $(INCFLAGS_LOCAL) $(INCFLAGS_GEN) $(INCFLAGS_IMPORT) $(INCFLAGS_EXTERN) $(INCFLAGS_MAKEFILE) # # Default MIGFLAGS # -export MIGFLAGS = $(DEFINES) $(INCFLAGS) $($(addsuffix $(ARCH_CONFIG),CFLAGS_)) \ - $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_)) \ - $(DEPLOYMENT_TARGET_FLAGS) +MIGFLAGS = $(DEFINES) $(INCFLAGS) $($(addsuffix $(CURRENT_ARCH_CONFIG),CFLAGS_)) $($(addsuffix $(CURRENT_ARCH_CONFIG),ARCH_FLAGS_)) \ + $(DEPLOYMENT_TARGET_FLAGS) # # Support for LLVM Link Time Optimization (LTO) # +# CFLAGS_NOLTO_FLAG is needed on a per-file basis (for files +# that deal poorly with LTO, or files that must be machine +# code *.o files for xnu to build (i.e, setsegname runs on +# them). +# +# LDFLAGS_NOSTRIP_FLAG is used to support configurations that +# do not utilize an export list. For these configs to build, +# we need to prevent the LTO logic from dead stripping them. ifeq ($(BUILD_LTO),1) -export CFLAGS_GEN += -flto -export CXXFLAGS_GEN += -flto -export LDFLAGS_KERNEL_GEN += -Wl,-object_path_lto,$(TARGET)/lto.o -export CFLAGS_NOLTO_FLAG = -fno-lto -export BUILD_MACHO_OBJ = 0 -export BUILD_LTO = 1 -else -export CFLAGS_NOLTO_FLAG = -export BUILD_MACHO_OBJ = 1 -export BUILD_LTO = 0 -endif - -# -# Support for LLVM Integrated Assembler with clang driver -# -ifeq ($(BUILD_INTEGRATED_ASSEMBLER),1) -export SFLAGS_GEN += -integrated-as -export CFLAGS_GEN += -integrated-as -export CXXFLAGS_GEN += -integrated-as -export SFLAGS_NOINTEGRATEDAS_FLAGS = -no-integrated-as -export CFLAGS_NOINTEGRATEDAS_FLAGS = -no-integrated-as +CFLAGS_GEN += -flto +CXXFLAGS_GEN += -flto +LDFLAGS_KERNEL_GEN += -Wl,-mllvm,-disable-simplify-libcalls -Wl,-object_path_lto,$(TARGET)/lto.o # -Wl,-mllvm -Wl,-disable-fp-elim +LDFLAGS_NOSTRIP_FLAG = -rdynamic +CFLAGS_NOLTO_FLAG = -fno-lto +SUPPORTS_CTFCONVERT = 0 +NEEDS_CTF_MACHOS = 1 else -export SFLAGS_NOINTEGRATEDAS_FLAGS = -export CFLAGS_NOINTEGRATEDAS_FLAGS = +LDFLAGS_NOSTRIP_FLAG = +CFLAGS_NOLTO_FLAG = +SUPPORTS_CTFCONVERT = 1 +NEEDS_CTF_MACHOS = 0 endif # # Default VPATH # -empty:= -space:= $(empty) $(empty) -export VPATH_IMPORT = $(subst $(space),:,$(patsubst %,$(OBJROOT)/EXPORT_HDRS/%,$(strip $(COMPONENT_IMPORT_LIST)))): -export VPATH_EXTERN = $(OBJROOT)/EXTERN_HDRS: -export VPATH_GEN = .:$(SOURCE): - -export VPATH = $(VPATH_GEN)$(VPATH_IMPORT)$(VPATH_EXTERN)$(VPATH_MAKEFILE) +export VPATH = .:$(SOURCE) # # Macros that control installation of kernel and its header files # # install flags for header files # -INSTALL_FLAGS = -c -m 0444 -FILE_INSTALL_FLAGS = -c -m 0644 -DATA_INSTALL_FLAGS = -c -m 0644 +INSTALL_FLAGS = -c -S -m 0444 +DATA_INSTALL_FLAGS = -c -S -m 0644 +EXEC_INSTALL_FLAGS = -c -S -m 0755 # # Header file destinations @@ -621,54 +410,83 @@ KRESDIR = $(KINCFRAME)/Versions/$(KINCVERS)/Resources XNU_PRIVATE_UNIFDEF = -UMACH_KERNEL_PRIVATE -UBSD_KERNEL_PRIVATE -UIOKIT_KERNEL_PRIVATE -ULIBKERN_KERNEL_PRIVATE -ULIBSA_KERNEL_PRIVATE -UPEXPERT_KERNEL_PRIVATE -UXNU_KERNEL_PRIVATE -PLATFORM_UNIFDEF = $(foreach x,$(SUPPORTED_PLATFORMS),$(if $(filter $(PLATFORM),$(x)),-DPLATFORM_$(x),-UPLATFORM_$(x))) -SPINCFRAME_UNIFDEF = $(PLATFORM_UNIFDEF) $(XNU_PRIVATE_UNIFDEF) -UKERNEL_PRIVATE -UKERNEL -DPRIVATE -U_OPEN_SOURCE_ -SINCFRAME_UNIFDEF = $(PLATFORM_UNIFDEF) $(XNU_PRIVATE_UNIFDEF) -UKERNEL_PRIVATE -UKERNEL -UPRIVATE -D_OPEN_SOURCE_ -KPINCFRAME_UNIFDEF = $(PLATFORM_UNIFDEF) $(XNU_PRIVATE_UNIFDEF) -DKERNEL_PRIVATE -DPRIVATE -DKERNEL -U_OPEN_SOURCE_ -KINCFRAME_UNIFDEF = $(PLATFORM_UNIFDEF) $(XNU_PRIVATE_UNIFDEF) -UKERNEL_PRIVATE -UPRIVATE -DKERNEL -D_OPEN_SOURCE_ +PLATFORM_UNIFDEF = $(foreach x,$(SUPPORTED_PLATFORMS),$(if $(filter $(PLATFORM),$(x)),-DPLATFORM_$(x) $(foreach token,$(PLATFORM_UNIFDEF_BLACKLIST_TOKENS_$(x)),-U$(token)),-UPLATFORM_$(x))) + +SPINCFRAME_UNIFDEF = $(PLATFORM_UNIFDEF) $(XNU_PRIVATE_UNIFDEF) $(SEED_DEFINES) -UKERNEL_PRIVATE -UKERNEL -DPRIVATE -U_OPEN_SOURCE_ -U__OPEN_SOURCE__ +SINCFRAME_UNIFDEF = $(PLATFORM_UNIFDEF) $(XNU_PRIVATE_UNIFDEF) $(SEED_DEFINES) -UKERNEL_PRIVATE -UKERNEL -UPRIVATE -D_OPEN_SOURCE_ -D__OPEN_SOURCE__ +KPINCFRAME_UNIFDEF = $(PLATFORM_UNIFDEF) $(XNU_PRIVATE_UNIFDEF) $(SEED_DEFINES) -DKERNEL_PRIVATE -DPRIVATE -DKERNEL -U_OPEN_SOURCE_ -U__OPEN_SOURCE__ +KINCFRAME_UNIFDEF = $(PLATFORM_UNIFDEF) $(XNU_PRIVATE_UNIFDEF) $(SEED_DEFINES) -UKERNEL_PRIVATE -UPRIVATE -DKERNEL -D_OPEN_SOURCE_ -D__OPEN_SOURCE__ # -# Component Header file destinations +# Compononent Header file destinations # EXPDIR = EXPORT_HDRS/$(COMPONENT) # # Strip Flags # -export STRIP_FLAGS_RELEASE = -S -x -export STRIP_FLAGS_DEVELOPMENT = -S -x -export STRIP_FLAGS_DEBUG = -S -export STRIP_FLAGS_PROFILE = -S -x +STRIP_FLAGS_RELEASE = -S -x +STRIP_FLAGS_DEVELOPMENT = -S -x +STRIP_FLAGS_DEBUG = -S +STRIP_FLAGS_PROFILE = -S -x -export STRIP_FLAGS = $($(addsuffix $(KERNEL_CONFIG),STRIP_FLAGS_)) +STRIP_FLAGS = $($(addsuffix $(CURRENT_KERNEL_CONFIG),STRIP_FLAGS_)) # # dsymutil flags # -export DSYMUTIL_FLAGS_I386 = --arch=i386 -export DSYMUTIL_FLAGS_X86_64 = --arch=x86_64 +DSYMUTIL_FLAGS_X86_64 = --arch=x86_64 -export DSYMUTIL_FLAGS = $($(addsuffix $(ARCH_CONFIG),DSYMUTIL_FLAGS_)) +DSYMUTIL_FLAGS = $($(addsuffix $(CURRENT_ARCH_CONFIG),DSYMUTIL_FLAGS_)) # # Man Page destination # -MANDIR = usr/share/man +MANDIR = /usr/share/man # # DEBUG alias location # -DEVELOPER_EXTRAS_DIR = AppleInternal/Developer/Extras +DEVELOPER_EXTRAS_DIR = /AppleInternal/CoreOS/xnu_debug # -# This must be here before any rules are possibly defined by the -# machine dependent makefile fragment so that a plain "make" command -# always works. The config program will emit an appropriate rule to -# cause "all" to depend on every kernel configuration it generates. +# mach_kernel install location # +INSTALL_KERNEL_DIR = / -default: all +# +# System.kext pseudo-kext install location +# +INSTALL_EXTENSIONS_DIR = /System/Library/Extensions + +# +# KDK location for iOS +# +INSTALL_KERNEL_SYM_DIR = /System/Library/Extensions/KDK + +# +# Misc. Etc. +# +INSTALL_SHARE_MISC_DIR = /usr/share/misc +INSTALL_DTRACE_SCRIPTS_DIR = /usr/lib/dtrace + +# +# Overrides for XBS build aliases +# +ifeq ($(RC_ProjectName),xnu_debug) +INSTALL_KERNEL_DIR := $(DEVELOPER_EXTRAS_DIR) +DELETE_KERNEL_FRAMEWORK_AND_MISC = 1 +INSTALL_XNU_DEBUG_FILES = 1 +endif +ifeq ($(PLATFORM),iPhoneOS) +INSTALL_PRIMARY_KERNEL_LIKE_NON_PRIMARY = 1 +INSTALL_KERNEL_SYM_TO_KDK = 1 +USE_BINARY_PLIST = 1 +else ifeq ($(PLATFORM),iPhoneSimulator) +INSTALL_SYSTEM_FRAMEWORK_SYMLINKS = 1 +USE_BINARY_PLIST = 1 +endif # vim: set ft=make: diff --git a/makedefs/MakeInc.dir b/makedefs/MakeInc.dir index 12191a3c2..8b4b92a99 100644 --- a/makedefs/MakeInc.dir +++ b/makedefs/MakeInc.dir @@ -1,820 +1,102 @@ -# -# Install kernel header files -# -.PHONY: installhdrs - -ifeq ($(findstring Libsyscall,$(RC_ProjectName)),Libsyscall) -installhdrs: - cd libsyscall ; \ - sdk="$(SDKROOT)" ; \ - if [ $${sdk} = / ] ; then \ - sdk="" ; \ - fi; \ - xcrun -sdk "$(SDKROOT)" xcodebuild installhdrs \ - "SRCROOT=$(SRCROOT)/libsyscall" \ - "OBJROOT=$(OBJROOT)" \ - "SYMROOT=$(SYMROOT)" \ - "DSTROOT=$(DSTROOT)" \ - "SDKROOT=$${sdk}" -else ifeq ($(findstring libkxld,$(RC_ProjectName)),libkxld) -installhdrs: - make -C libkern/kxld/ installhdrs -else ifeq ($(RC_ProjectName),libkmod) -installhdrs: -# nothing to do -else # xnu, xnu_debug, or xnu_headers_Sim -installhdrs: exporthdrs installhdrs_mi installhdrs_md setup - @echo "[ $(SRCROOT) ] make installhdrs installing Kernel.framework" - $(_v)kincpath=$(DSTROOT)/$(KINCDIR); \ - krespath=$(DSTROOT)/$(KRESDIR); \ - kframepath=$(DSTROOT)/$(KINCFRAME); \ - [ -d $$krespath ] || $(MKDIR) $$krespath; \ - [ -d $$kincpath ] || $(MKDIR) $$kincpath; \ - cd $(SRCROOT)/EXTERNAL_HEADERS; \ - install $(FILE_INSTALL_FLAGS) Info.plist $$krespath; \ - $(NEWVERS) $${krespath}/Info.plist; \ - cd $$kframepath/Versions; \ - [ -L Current ] || $(LN) $(KINCVERS) Current; \ - cd $$kframepath; \ - [ -L Headers ] || $(LN) Versions/Current/Headers Headers; \ - [ -L Resources ] || $(LN) Versions/Current/Resources Resources; \ - [ -d $(DSTROOT)/$(KPINCDIR) ] || $(MKDIR) $(DSTROOT)/$(KPINCDIR); \ - cd $$kframepath; [ -L PrivateHeaders ] || \ - $(LN) Versions/Current/PrivateHeaders PrivateHeaders; -ifeq ($(RC_ProjectName),xnu_headers_Sim) - @echo "[ $(SRCROOT) ] make installhdrs installing System.framework" - $(_v)spincpath=$(DSTROOT)/$(SPINCDIR); \ - sframepath=$(DSTROOT)/$(SINCFRAME); \ - [ -d $$spincpath ] || $(MKDIR) $$spincpath; \ - cd $$sframepath/Versions; \ - [ -L Current ] || $(LN) $(SINCVERS) Current; \ - cd $$sframepath; [ -L PrivateHeaders ] || \ - $(LN) Versions/Current/PrivateHeaders PrivateHeaders; -endif -ifeq (iPhoneOS,$(PLATFORM)) - $(_v)$(IPHONEOS_OPTIMIZE) $(DSTROOT)/$(KRESDIR)/Info.plist -endif -endif - -.PHONY: installhdrs_embedded installhdrs_devicemap -installhdrs_embedded installhdrs_devicemap: installhdrs +# -*- mode: makefile;-*- +# +# Copyright (C) 1999-2012 Apple Inc. All rights reserved. +# +# MakeInc.dir contains the recursion rules for the build system. +# For instance, the "build_installhdrs_md" target is auto-generated +# such that make(1) recurses into a specified set of subdirectories +# (building "build_installhdrs_md" in turn at each level) before +# building the special "do_installhdrs_md" target at the current +# level. "do_installhdrs_md" is defined in MakeInc.rule. +# + +# $(1) is the name of the target to produce that will perform the +# recursive behavior via dependencies +# $(2) is a list of subdirectories to recurse into +# $(3) is the target to build with a sub-make after +# the subdirectories have been recursed into +# $(4) should be "1" if TARGET should be pinned to the per-arch +# build COMPONENT directory, or empty if it should recurse +# in lockstep with the source recursion +define RECURSIVE_BUILD_RULES_template +$(1)_recurse_target_list = $$(addprefix $(1)_recurse_into_,$(2)) + +.PHONY: $$($(1)_recurse_target_list) + +$$($(1)_recurse_target_list): + $$(_v)$$(MKDIR) $$(CURDIR)/$$(patsubst $(1)_recurse_into_%,%,$$@) + $$(_v)$${MAKE} -C $$(CURDIR)/$$(patsubst $(1)_recurse_into_%,%,$$@) \ + -f $$(SOURCE)$$(patsubst $(1)_recurse_into_%,%,$$@)/Makefile \ + CURRENT_KERNEL_CONFIG=$${CURRENT_KERNEL_CONFIG} \ + CURRENT_ARCH_CONFIG=$${CURRENT_ARCH_CONFIG} \ + CURRENT_MACHINE_CONFIG=$${CURRENT_MACHINE_CONFIG} \ + CURRENT_BUILD_CONFIG=$${CURRENT_BUILD_CONFIG} \ + SOURCE=$$(SOURCE)$$(patsubst $(1)_recurse_into_%,%,$$@)/ \ + RELATIVE_SOURCE_PATH=$$(RELATIVE_SOURCE_PATH)/$$(patsubst $(1)_recurse_into_%,%,$$@) \ + TARGET=$(if $(4),$${OBJPATH}/$$(COMPONENT),$$(TARGET)$$(patsubst $(1)_recurse_into_%,%,$$@)/) \ + OBJPATH=$${OBJPATH} \ + $(1); + +.PHONY: $(1) + +$(1): $$($(1)_recurse_target_list) + $$(_v)$${MAKE} -C $$(CURDIR) \ + -f $$(firstword $$(MAKEFILE_LIST)) \ + CURRENT_KERNEL_CONFIG=$${CURRENT_KERNEL_CONFIG} \ + CURRENT_ARCH_CONFIG=$${CURRENT_ARCH_CONFIG} \ + CURRENT_MACHINE_CONFIG=$${CURRENT_MACHINE_CONFIG} \ + CURRENT_BUILD_CONFIG=$${CURRENT_BUILD_CONFIG} \ + SOURCE=$$(SOURCE) \ + RELATIVE_SOURCE_PATH=$$(RELATIVE_SOURCE_PATH) \ + TARGET=$$(TARGET) \ + OBJPATH=$${OBJPATH} \ + $(3); +endef # -# Install header files order -# -.ORDER: installhdrs_mi installhdrs_md -.PHONY: installhdrs_mi installhdrs_md - -# -# Install machine independent header files -# -installhdrs_mi: setup - $(_v)kernel_config=$(INSTALL_TYPE); \ - machine_config=$(MACHINE_CONFIG); \ - arch_config=$(INSTALL_ARCH_DEFAULT); \ - if [ $${arch_config} = ARM ] ; then \ - if [ $${machine_config} = DEFAULT ] ; then \ - machine_config=$(DEFAULT_ARM_MACHINE_CONFIG); \ - fi; \ - fi; \ - if [ $${arch_config} = L4_ARM ] ; then \ - if [ $${machine_config} = DEFAULT ] ; then \ - machine_config=$(DEFAULT_L4_ARM_MACHINE_CONFIG); \ - fi; \ - fi; \ - if [ $${machine_config} = DEFAULT ] ; then \ - installinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}/${RELATIVE_SOURCE_PATH}; \ - else \ - installinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/${RELATIVE_SOURCE_PATH}; \ - fi; \ - [ -d $${installinc_dir} ] || $(MKDIR) $${installinc_dir}; \ - ${MAKE} ${MAKEJOBS} -C $${installinc_dir} \ - KERNEL_CONFIG=$${kernel_config} \ - ARCH_CONFIG=$${arch_config} \ - MACHINE_CONFIG=$${machine_config} \ - MAKEFILES=${SOURCE}/Makefile \ - SOURCE=${SOURCE}/ \ - RELATIVE_SOURCE_PATH=. \ - TARGET=$${installinc_dir}/ \ - build_installhdrs_mi; \ - -# -# Install machine dependent kernel header files -# Uses hack for machine_config, which is not threaded through properly. +# Setup pass for all architectures for all Configuration/Architecture options # -installhdrs_md: setup - $(_v)kernel_config=$(INSTALL_TYPE); \ - machine_config=$(MACHINE_CONFIG); \ - for arch_config in $(INSTALL_ARCHS); \ - do \ - if [ $${arch_config} = ARM ] ; then \ - if [ $${machine_config} = DEFAULT ] ; then \ - machine_config=$(DEFAULT_ARM_MACHINE_CONFIG); \ - fi; \ - fi; \ - if [ $${arch_config} = L4_ARM ] ; then \ - if [ $${machine_config} = DEFAULT ] ; then \ - machine_config=$(DEFAULT_L4_ARM_MACHINE_CONFIG); \ - fi; \ - fi; \ - if [ $${machine_config} = DEFAULT ] ; then \ - installinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}/${RELATIVE_SOURCE_PATH}; \ - else \ - installinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/${RELATIVE_SOURCE_PATH}; \ - fi; \ - [ -d $${installinc_dir} ] || $(MKDIR) $${installinc_dir}; \ - ${MAKE} ${MAKEJOBS} -C $${installinc_dir} \ - KERNEL_CONFIG=$${kernel_config} \ - ARCH_CONFIG=$${arch_config} \ - MACHINE_CONFIG=$${machine_config} \ - MAKEFILES=${SOURCE}/Makefile \ - SOURCE=${SOURCE}/ \ - RELATIVE_SOURCE_PATH=. \ - TARGET=$${installinc_dir}/ \ - build_installhdrs_md; \ - done; +$(eval $(call RECURSIVE_BUILD_RULES_template,build_setup,$(SETUP_SUBDIRS),do_build_setup,)) # # Install machine independent kernel header files # -do_installhdrs_mi: - -BUILD_INSTALLHDRS_MI_SUBDIRS_TARGETS = $(addprefix build_installhdrs_mi_,$(INSTINC_SUBDIRS)) - -.PHONY: $(BUILD_INSTALLHDRS_MI_SUBDIRS_TARGETS) - -$(BUILD_INSTALLHDRS_MI_SUBDIRS_TARGETS): - $(_v)installinc_subdir="$(patsubst build_installhdrs_mi_%,%,$@)"; \ - [ -d $${installinc_subdir} ] || $(MKDIR) $${installinc_subdir}; \ - ${MAKE} -C $${installinc_subdir} \ - MAKEFILES=$(SOURCE)$${installinc_subdir}/Makefile \ - SOURCE=$(SOURCE)$${installinc_subdir}/ \ - RELATIVE_SOURCE_PATH=$(RELATIVE_SOURCE_PATH)/$${installinc_subdir} \ - TARGET=$(TARGET)$${installinc_subdir}/ \ - build_installhdrs_mi; - -build_installhdrs_mi: $(BUILD_INSTALLHDRS_MI_SUBDIRS_TARGETS) - $(_v)${MAKE} do_installhdrs_mi; +$(eval $(call RECURSIVE_BUILD_RULES_template,build_installhdrs_mi,$(INSTINC_SUBDIRS),do_installhdrs_mi,)) # # Install machine dependent kernel header files # -do_installhdrs_md: - -BUILD_INSTALLHDRS_MD_SUBDIRS_TARGETS = $(addprefix build_installhdrs_md_,$($(addprefix INSTINC_SUBDIRS_, $(ARCH_CONFIG)))) - -.PHONY: $(BUILD_INSTALLHDRS_MD_SUBDIRS_TARGETS) - -$(BUILD_INSTALLHDRS_MD_SUBDIRS_TARGETS): - $(_v)installinc_subdir="$(patsubst build_installhdrs_md_%,%,$@)"; \ - [ -d $${installinc_subdir} ] || $(MKDIR) $${installinc_subdir}; \ - ${MAKE} -C $${installinc_subdir} \ - MAKEFILES=$(SOURCE)$${installinc_subdir}/Makefile \ - SOURCE=$(SOURCE)$${installinc_subdir}/ \ - RELATIVE_SOURCE_PATH=$(RELATIVE_SOURCE_PATH)/$${installinc_subdir} \ - TARGET=$(TARGET)$${installinc_subdir}/ \ - build_installhdrs_md; - -build_installhdrs_md: $(BUILD_INSTALLHDRS_MD_SUBDIRS_TARGETS) - $(_v)${MAKE} do_installhdrs_md; - -# -# Install kernel header files -# -.PHONY: exporthdrs - -exporthdrs: exporthdrs_mi exporthdrs_md - -# -# Install header files order -# -.ORDER: exporthdrs_mi exporthdrs_md -.PHONY: exporthdrs_mi exporthdrs_md - -# -# Install machine independent header files -# -do_exporthdrs_mi: - -exporthdrs_mi: - $(_v)kernel_config=$(INSTALL_TYPE); \ - arch_config=$(INSTALL_ARCH_DEFAULT); \ - machine_config=DEFAULT; \ - if [ $${arch_config} = ARM ] ; then \ - if [ $${machine_config} = DEFAULT ] ; then \ - machine_config=$(DEFAULT_ARM_MACHINE_CONFIG); \ - fi; \ - fi; \ - if [ $${arch_config} = L4_ARM ] ; then \ - if [ $${machine_config} = DEFAULT ] ; then \ - machine_config=$(DEFAULT_L4_ARM_MACHINE_CONFIG);\ - fi; \ - fi; \ - if [ $${machine_config} = DEFAULT ] ; then \ - exportinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}/${RELATIVE_SOURCE_PATH}; \ - else \ - exportinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/${RELATIVE_SOURCE_PATH}; \ - fi; \ - [ -d $${exportinc_dir} ] || $(MKDIR) $${exportinc_dir}; \ - ${MAKE} ${MAKEJOBS} -C $${exportinc_dir} \ - KERNEL_CONFIG=$${kernel_config} \ - ARCH_CONFIG=$${arch_config} \ - MACHINE_CONFIG=$${machine_config} \ - MAKEFILES=${SOURCE}/Makefile \ - SOURCE=${SOURCE}/ \ - RELATIVE_SOURCE_PATH=. \ - TARGET=$${exportinc_dir}/ \ - build_exporthdrs_mi; \ - -# -# Install machine dependent kernel header files -# -# Note - installation of machine dependent kernel header files only occurs for architecture -# defined in INSTALL_TYPE. We use skipit variable to skip over architectures that are not -# equal to what is in the INSTALL_TYPE variable. -# TARGET_CONFIGS_UC variable holds sets of three configuration options. The first item in the -# set is the kernel configuration. The second item in the set is the architecture and the -# third item is the machine configuration. There may be multiple sets to build. -exporthdrs_md: - $(_v)my_counter=1; \ - for my_config in $(TARGET_CONFIGS_UC); \ - do \ - if [ $${my_counter} -eq 1 ] ; then \ - skipit=0; \ - my_counter=2; \ - kernel_config=$${my_config}; \ - if [ $${kernel_config} = DEFAULT ] ; then \ - kernel_config=$(DEFAULT_KERNEL_CONFIG); \ - fi; \ - if [ $${kernel_config} != $(INSTALL_TYPE) ] ; then \ - skipit=1; \ - fi; \ - elif [ $${my_counter} -eq 2 ] ; then \ - my_counter=3; \ - arch_config=$${my_config}; \ - if [ $${arch_config} = DEFAULT ] ; then \ - arch_config=`arch | $(TR) a-z A-Z`; \ - fi; \ - else \ - my_counter=1; \ - machine_config=$${my_config}; \ - if [ $${skipit} -eq 0 ] ; then \ - if [ $${arch_config} = ARM ] ; then \ - if [ $${machine_config} = DEFAULT ] ; then \ - machine_config=$(DEFAULT_ARM_MACHINE_CONFIG); \ - fi; \ - fi; \ - if [ $${arch_config} = L4_ARM ] ; then \ - if [ $${machine_config} = DEFAULT ] ; then \ - machine_config=$(DEFAULT_L4_ARM_MACHINE_CONFIG); \ - fi; \ - fi; \ - if [ $${machine_config} = DEFAULT ] ; then \ - exportinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}/${RELATIVE_SOURCE_PATH}; \ - else \ - exportinc_dir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/${RELATIVE_SOURCE_PATH}; \ - fi; \ - [ -d $${exportinc_dir} ] || $(MKDIR) $${exportinc_dir}; \ - ${MAKE} ${MAKEJOBS} -C $${exportinc_dir} \ - KERNEL_CONFIG=$${kernel_config} \ - ARCH_CONFIG=$${arch_config} \ - MACHINE_CONFIG=$${machine_config} \ - MAKEFILES=${SOURCE}/Makefile \ - SOURCE=${SOURCE}/ \ - RELATIVE_SOURCE_PATH=. \ - TARGET=$${exportinc_dir}/ \ - build_exporthdrs_md; \ - fi; \ - fi; \ - done; +$(eval $(call RECURSIVE_BUILD_RULES_template,build_installhdrs_md,$(INSTINC_SUBDIRS_$(CURRENT_ARCH_CONFIG)),do_installhdrs_md,)) # # Install machine independent kernel header files # -do_exporthdrs_mi: - -BUILD_EXPORTHDRS_MI_SUBDIRS_TARGETS = $(addprefix build_exporthdrs_mi_,$(EXPINC_SUBDIRS)) - -.PHONY: $(BUILD_EXPORTHDRS_MI_SUBDIRS_TARGETS) - -$(BUILD_EXPORTHDRS_MI_SUBDIRS_TARGETS): - $(_v)exportinc_subdir="$(patsubst build_exporthdrs_mi_%,%,$@)"; \ - [ -d $${exportinc_subdir} ] || $(MKDIR) $${exportinc_subdir}; \ - ${MAKE} -C $${exportinc_subdir} \ - MAKEFILES=$(SOURCE)$${exportinc_subdir}/Makefile \ - SOURCE=$(SOURCE)$${exportinc_subdir}/ \ - RELATIVE_SOURCE_PATH=$(RELATIVE_SOURCE_PATH)/$${exportinc_subdir} \ - TARGET=$(TARGET)$${exportinc_subdir}/ \ - build_exporthdrs_mi; - -build_exporthdrs_mi: $(BUILD_EXPORTHDRS_MI_SUBDIRS_TARGETS) - $(_v)${MAKE} do_exporthdrs_mi; +$(eval $(call RECURSIVE_BUILD_RULES_template,build_exporthdrs_mi,$(EXPINC_SUBDIRS),do_exporthdrs_mi,)) # # Install machine dependent kernel header files # -do_exporthdrs_md: - -BUILD_EXPORTHDRS_MD_SUBDIRS_TARGETS = $(addprefix build_exporthdrs_md_,$($(addprefix EXPINC_SUBDIRS_, $(ARCH_CONFIG)))) - -.PHONY: $(BUILD_EXPORTHDRS_MD_SUBDIRS_TARGETS) - -$(BUILD_EXPORTHDRS_MD_SUBDIRS_TARGETS): - $(_v)exportinc_subdir="$(patsubst build_exporthdrs_md_%,%,$@)"; \ - [ -d $${exportinc_subdir} ] || $(MKDIR) $${exportinc_subdir}; \ - ${MAKE} -C $${exportinc_subdir} \ - MAKEFILES=$(SOURCE)$${exportinc_subdir}/Makefile \ - SOURCE=$(SOURCE)$${exportinc_subdir}/ \ - RELATIVE_SOURCE_PATH=$(RELATIVE_SOURCE_PATH)/$${exportinc_subdir} \ - TARGET=$(TARGET)$${exportinc_subdir}/ \ - build_exporthdrs_md; - -build_exporthdrs_md: $(BUILD_EXPORTHDRS_MD_SUBDIRS_TARGETS) - $(_v)${MAKE} do_exporthdrs_md; - -# -# Setup pass for all architectures for all Configuration/Architecture options -# -.PHONY: setup - -setup: - $(_v)kernel_config=$(INSTALL_TYPE); \ - arch_config=$(INSTALL_ARCH_DEFAULT); \ - setup_subdir=${OBJROOT}/$${RELATIVE_SOURCE_PATH}; \ - [ -d $${setup_subdir} ] || $(MKDIR) $${setup_subdir}; \ - ${MAKE} ${MAKEJOBS} -C $${setup_subdir} \ - KERNEL_CONFIG=$${kernel_config} \ - ARCH_CONFIG=$${arch_config} \ - MACHINE_CONFIG=$${machine_config} \ - MAKEFILES=${SOURCE}/Makefile \ - SOURCE=${SOURCE}/ \ - RELATIVE_SOURCE_PATH=. \ - TARGET=$${setup_subdir}/ \ - build_setup; - -do_build_setup: - -BUILD_SETUP_SUBDIRS_TARGETS = $(addprefix build_setup_,$(SETUP_SUBDIRS) $($(addprefix SETUP_SUBDIRS_, $(ARCH_CONFIG)))) - -.PHONY: $(BUILD_SETUP_SUBDIRS_TARGETS) - -$(BUILD_SETUP_SUBDIRS_TARGETS): - $(_v)setup_subdir="$(patsubst build_setup_%,%,$@)"; \ - [ -d $${setup_subdir} ] || $(MKDIR) $${setup_subdir}; \ - ${MAKE} -C $${setup_subdir} \ - MAKEFILES=${SOURCE}/$${setup_subdir}/Makefile \ - SOURCE=${SOURCE}/$${setup_subdir}/ \ - RELATIVE_SOURCE_PATH=${RELATIVE_SOURCE_PATH}/$${setup_subdir} \ - TARGET=${TARGET}/$${setup_subdir}/ \ - build_setup; - -build_setup: $(BUILD_SETUP_SUBDIRS_TARGETS) - $(_v)${MAKE} do_build_setup; - - -# -# Build all architectures for all Configuration/Architecture options -# -# Note - TARGET_CONFIGS_UC variable holds sets of three configuration options. The first -# item in the set is the kernel configuration. The second item in the set is the architecture -# and the third item is the machine configuration. There may be multiple sets to build. -# -.PHONY: all - -ifeq ($(RC_ProjectName),Libsyscall) -all: - cd libsyscall ; \ - sdk="$(SDKROOT)" ; \ - if [ $${sdk} = / ] ; then \ - sdk="" ; \ - fi; \ - xcrun -sdk "$(SDKROOT)" xcodebuild install \ - "SRCROOT=$(SRCROOT)/libsyscall" \ - "OBJROOT=$(OBJROOT)" \ - "SYMROOT=$(SYMROOT)" \ - "DSTROOT=$(DSTROOT)" \ - "SDKROOT=$${sdk}" -else ifeq ($(RC_ProjectName),libkxld) -all: - make -C libkern/kxld/ install -else ifeq ($(RC_ProjectName),libkxld_host) -all: - make -C libkern/kxld/ install PRODUCT_TYPE=ARCHIVE -else ifeq ($(RC_ProjectName),libkmod) -all: - cd libkern/kmod ; \ - sdk="$(SDKROOT)" ; \ - if [ $${sdk} = / ] ; then \ - sdk="" ; \ - fi; \ - xcrun -sdk "$(SDKROOT)" xcodebuild install \ - "SRCROOT=$(SRCROOT)/libkern/kmod" \ - "OBJROOT=$(OBJROOT)" \ - "SYMROOT=$(SYMROOT)" \ - "DSTROOT=$(DSTROOT)" \ - "SDKROOT=$${sdk}" -else ifeq ($(findstring _headers_Sim,$(RC_ProjectName)),_headers_Sim) # Libsyscall/xnu _headers_Sim -all: exporthdrs -else # xnu or xnu_debug -ifeq ($(COMPONENT), .) -all: exporthdrs setup -else -all: -endif - $(_v)my_counter=1; \ - for my_config in $(TARGET_CONFIGS_UC); \ - do \ - if [ $${my_counter} -eq 1 ] ; then \ - my_counter=2; \ - kernel_config=$${my_config}; \ - if [ $${kernel_config} = DEFAULT ] ; then \ - kernel_config=$(DEFAULT_KERNEL_CONFIG); \ - fi; \ - elif [ $${my_counter} -eq 2 ] ; then \ - my_counter=3; \ - arch_config=$${my_config}; \ - if [ $${arch_config} = DEFAULT ] ; then \ - arch_config=`arch | $(TR) a-z A-Z`; \ - fi; \ - else \ - my_counter=1; \ - machine_config=$${my_config}; \ - if [ $${arch_config} = ARM ] ; then \ - if [ $${machine_config} = DEFAULT ] ; then \ - machine_config=$(DEFAULT_ARM_MACHINE_CONFIG); \ - fi; \ - fi; \ - if [ $${arch_config} = L4_ARM ] ; then \ - if [ $${machine_config} = DEFAULT ] ; then \ - machine_config=$(DEFAULT_L4_ARM_MACHINE_CONFIG); \ - fi; \ - fi; \ - if [ $${machine_config} = DEFAULT ] ; then \ - build_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}/${RELATIVE_SOURCE_PATH}; \ - else \ - build_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/${RELATIVE_SOURCE_PATH}; \ - fi; \ - [ -d $${build_subdir} ] || $(MKDIR) $${build_subdir}; \ - ${MAKE} ${MAKEJOBS} -C $${build_subdir} \ - KERNEL_CONFIG=$${kernel_config} \ - ARCH_CONFIG=$${arch_config} \ - MACHINE_CONFIG=$${machine_config} \ - MAKEFILES=${SOURCE}/Makefile \ - SOURCE=${SOURCE}/ \ - RELATIVE_SOURCE_PATH=${RELATIVE_SOURCE_PATH} \ - build_all; \ - fi; \ - done; -endif - -.PHONY: all_embedded all_devicemap -all_embedded all_devicemap: all +$(eval $(call RECURSIVE_BUILD_RULES_template,build_exporthdrs_md,$(EXPINC_SUBDIRS_$(CURRENT_ARCH_CONFIG)),do_exporthdrs_md,)) # # Build all architectures for all Configuration/Architecture options # -do_build_all: - -BUILD_ALL_SUBDIRS_TARGETS = $(addprefix build_all_,$(COMP_SUBDIRS) $($(addprefix COMP_SUBDIRS_, $(ARCH_CONFIG)))) - -.PHONY: $(BUILD_ALL_SUBDIRS_TARGETS) - -$(BUILD_ALL_SUBDIRS_TARGETS): - $(_v)if [ $(MACHINE_CONFIG) = DEFAULT ] ; then \ - TARGET=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT); \ - else \ - TARGET="$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT)"; \ - fi; \ - comp_subdir="$(patsubst build_all_%,%,$@)"; \ - [ -d $${comp_subdir} ] || $(MKDIR) $${comp_subdir}; \ - ${MAKE} -C $${comp_subdir} \ - MAKEFILES=${SOURCE}/$${comp_subdir}/Makefile \ - SOURCE=${SOURCE}$${comp_subdir}/ \ - RELATIVE_SOURCE_PATH=$(RELATIVE_SOURCE_PATH)/$${comp_subdir} \ - TARGET=$${TARGET} \ - build_all; - -build_all: $(BUILD_ALL_SUBDIRS_TARGETS) - $(_v)if [ $(MACHINE_CONFIG) = DEFAULT ] ; then \ - TARGET=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT); \ - else \ - TARGET="$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT)"; \ - fi; \ - ${MAKE} INCL_MAKEDEP=TRUE TARGET=$${TARGET} do_build_all; \ - _TMP_comp_subdir="$(CONFIG_SUBDIRS) $($(addprefix CONFIG_SUBDIRS_, $(ARCH_CONFIG)))"; \ - for comp_subdir in $${_TMP_comp_subdir}; \ - do \ - [ -d $${comp_subdir} ] || $(MKDIR) $${comp_subdir}; \ - ${MAKE} -C $${comp_subdir} \ - MAKEFILES=${SOURCE}/$${comp_subdir}/Makefile \ - SOURCE=${SOURCE}$${comp_subdir}/ \ - RELATIVE_SOURCE_PATH=$(RELATIVE_SOURCE_PATH)/$${comp_subdir} \ - TARGET=$${TARGET} \ - build_all; \ - done; - +$(eval $(call RECURSIVE_BUILD_RULES_template,build_all,$(COMP_SUBDIRS) $(COMP_SUBDIRS_$(CURRENT_ARCH_CONFIG)),do_build_all,1)) # -# Build all architectures for all Configuration/Architecture options -# -# Note - TARGET_CONFIGS_UC variable holds sets of three configuration options. The first -# item in the set is the kernel configuration. The second item is the architecture -# and the third item is the machine configuration. There may be multiple sets to build. +# Post-process build results # -mach_kernel: - $(_v)my_counter=1; \ - for my_config in $(TARGET_CONFIGS_UC); \ - do \ - if [ $${my_counter} -eq 1 ] ; then \ - my_counter=2; \ - kernel_config=$${my_config}; \ - if [ $${kernel_config} = DEFAULT ] ; then \ - kernel_config=$(DEFAULT_KERNEL_CONFIG); \ - fi; \ - elif [ $${my_counter} -eq 2 ] ; then \ - my_counter=3; \ - arch_config=$${my_config}; \ - if [ $${arch_config} = DEFAULT ] ; then \ - arch_config=`arch | $(TR) a-z A-Z`; \ - fi; \ - else \ - my_counter=1; \ - machine_config=$${my_config}; \ - if [ $${arch_config} = ARM ] ; then \ - if [ $${machine_config} = DEFAULT ] ; then \ - machine_config=$(DEFAULT_ARM_MACHINE_CONFIG); \ - fi; \ - fi; \ - if [ $${arch_config} = L4_ARM ] ; then \ - if [ $${machine_config} = DEFAULT ] ; then \ - machine_config=$(DEFAULT_L4_ARM_MACHINE_CONFIG); \ - fi; \ - fi; \ - if [ $${machine_config} = DEFAULT ] ; then \ - build_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}; \ - else \ - build_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}; \ - fi; \ - [ -d $${build_subdir} ] || $(MKDIR) $${build_subdir}; \ - ${MAKE} ${MAKEJOBS} -C $${build_subdir} \ - KERNEL_CONFIG=$${kernel_config} \ - ARCH_CONFIG=$${arch_config} \ - MACHINE_CONFIG=$${machine_config} \ - MAKEFILES=${SOURCE}/Makefile \ - SOURCE=${SOURCE}/ \ - TARGET=$${build_subdir}/ \ - build_mach_kernel; \ - fi; \ - done; - -# -# Build all architectures for all Configuration/Architecture options -# -do_build_mach_kernel: - -build_mach_kernel: - $(_v)${MAKE} do_build_mach_kernel; - - -# -# -# Install dependencies order -# -.ORDER: installhdrs exporthdrs all - -# -# Install kernel based on RC_ARCHS for all INSTALL_TYPES -# Install kernel header files based on RC_ARCHS -# -install: installhdrs all installman installmachinekernels -ifeq ($(findstring Libsyscall,$(RC_ProjectName)),Libsyscall) -# nothing to do -else ifeq ($(findstring libkxld,$(RC_ProjectName)),libkxld) -# nothing to do, work performed in "all" action -else ifeq ($(RC_ProjectName),libkmod) -# nothing to do, work performed in "all" action -else ifeq ($(findstring _headers_Sim,$(RC_ProjectName)),_headers_Sim) -# nothing to do -else # xnu or xnu_debug -# A bit of a hack for machine_config: machine configs aren't really threaded through properly. - $(_v)machine_config=$(MACHINE_CONFIG); \ - for kernel_config in $(INSTALL_TYPE); \ - do \ - for arch_config in $(INSTALL_ARCHS); \ - do \ - if [ $${arch_config} = ARM ] ; then \ - if [ $${machine_config} = DEFAULT ] ; then \ - machine_config=$(DEFAULT_ARM_MACHINE_CONFIG); \ - fi; \ - fi; \ - if [ $${arch_config} = L4_ARM ] ; then \ - if [ $${machine_config} = DEFAULT ] ; then \ - machine_config=$(DEFAULT_L4_ARM_MACHINE_CONFIG); \ - fi; \ - fi; \ - if [ $${machine_config} = DEFAULT ] ; then \ - install_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}/${RELATIVE_SOURCE_PATH}; \ - else \ - install_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}/${RELATIVE_SOURCE_PATH}; \ - fi; \ - [ -d $${install_subdir} ] || $(MKDIR) $${install_subdir}; \ - ${MAKE} ${MAKEJOBS} -C $${install_subdir} \ - KERNEL_CONFIG=$${kernel_config} \ - ARCH_CONFIG=$${arch_config} \ - MACHINE_CONFIG=$${machine_config} \ - MAKEFILES=${SOURCE}/Makefile \ - SOURCE=${SOURCE}/ \ - RELATIVE_SOURCE_PATH=${RELATIVE_SOURCE_PATH} \ - build_install; \ - done; \ - done; -ifeq ($(RC_ProjectName),xnu_debug) - $(_v)$(MKDIR) $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR) - $(_v)$(MV) $(DSTROOT)/mach_kernel* $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR) - $(_v)$(CP) $(SYMROOT)/kgmacros $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR) - $(_v)$(CP) -r $(SYMROOT)/System.kext $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR) - $(_v)$(CP) -r $(SYMROOT)/mach_kernel.dSYM $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR) - $(_v)$(CP) $(SRCROOT)/config/README.DEBUG-kernel.txt $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR) - $(_v)$(MV) $(DSTROOT)/System $(DSTROOT)/usr $(OBJROOT)/ -endif -endif - -.PHONY: install_embedded install_devicemap -install_embedded install_devicemap: install - -installmachinekernels: - @echo "[ $(SOURCE) ] make installmachinekernels"; \ - my_counter=1; \ - for my_config in $(TARGET_CONFIGS_UC); \ - do \ - if [ $${my_counter} -eq 1 ] ; then \ - my_counter=2; \ - kernel_config=$${my_config}; \ - if [ $${kernel_config} = DEFAULT ] ; then \ - kernel_config=$(DEFAULT_KERNEL_CONFIG); \ - fi; \ - elif [ $${my_counter} -eq 2 ] ; then \ - my_counter=3; \ - arch_config=$${my_config}; \ - if [ $${arch_config} = DEFAULT ] ; then \ - arch_config=`arch | $(TR) a-z A-Z`; \ - fi; \ - else \ - my_counter=1; \ - machine_config=$${my_config}; \ - if [ $${machine_config} != DEFAULT ] ; then \ - build_subdir=${OBJROOT}/$${kernel_config}_$${arch_config}_$${machine_config}; \ - install_kernel_file=mach.`printf "%s" "$${kernel_config}" | $(TR) A-Z a-z`.`printf "%s" "$${machine_config}" | $(TR) A-Z a-z`; \ - [ -d $${build_subdir} ] || $(MKDIR) $${build_subdir}; \ - ${MAKE} ${MAKEJOBS} -C $${build_subdir} \ - INSTALL_KERNEL_FILE=$${install_kernel_file} \ - KERNEL_CONFIG=$${kernel_config} \ - ARCH_CONFIG=$${arch_config} \ - MACHINE_CONFIG=$${machine_config} \ - MAKEFILES=${SOURCE}/Makefile \ - SOURCE=${SOURCE}/ \ - RELATIVE_SOURCE_PATH=${RELATIVE_SOURCE_PATH} \ - TARGET=$${build_subdir}/ \ - do_build_install; \ - fi; \ - fi; \ - done; +$(eval $(call RECURSIVE_BUILD_RULES_template,config_all,$(CONFIG_SUBDIRS),do_config_all,1)) # # Install for all architectures for all Configuration/Architecture options # -setup_build_install: - -do_build_install: - -BUILD_INSTALL_SUBDIRS_TARGETS = $(addprefix build_install_,$(INST_SUBDIRS)) - -.PHONY: $(BUILD_INSTALL_SUBDIRS_TARGETS) - -$(BUILD_INSTALL_SUBDIRS_TARGETS): - $(_v)if [ $(MACHINE_CONFIG) = DEFAULT ] ; then \ - TARGET=${OBJROOT}/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT); \ - else \ - TARGET="$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT)"; \ - fi; \ - ${MAKE} TARGET=$${TARGET} setup_build_install; \ - kernel_config=$(KERNEL_CONFIG); \ - install_subdir="$(patsubst build_install_%,%,$@)"; \ - [ -d $${install_subdir} ] || $(MKDIR) $${install_subdir}; \ - ${MAKE} -C $${install_subdir} \ - KERNEL_CONFIG=$${kernel_config} \ - MAKEFILES=${SOURCE}/$${install_subdir}/Makefile \ - SOURCE=${SOURCE}$${install_subdir}/ \ - RELATIVE_SOURCE_PATH=${RELATIVE_SOURCE_PATH}/$${install_subdir} \ - TARGET=$${TARGET} \ - build_install; - -build_install: $(BUILD_INSTALL_SUBDIRS_TARGETS) - $(_v)if [ $(MACHINE_CONFIG) = DEFAULT ] ; then \ - TARGET=${OBJROOT}/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT); \ - else \ - TARGET="$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT)"; \ - fi; \ - ${MAKE} TARGET=$${TARGET} do_build_install; - - -# -# Install source tree -# -.PHONY: installsrc - -installsrc: - $(_v)($(TAR) -c --mode go=r,+X --no-ignore-case --exclude .svn --exclude .git --exclude cscope.\* --exclude BUILD --exclude \*~ -f - .) | (cd $(SRCROOT) && $(TAR) --no-same-owner -xf -) - - -# -# Clean up source tree -# -.PHONY: clean - -clean: - -# -# Build source file list for cscope database and tags -# -cscope.files: - @echo "Building file list for cscope and tags" - @find . -name '*.h' -type f | grep -v ^..BUILD > _cscope.files 2> /dev/null - @find . -name '*.defs' -type f | grep -v ^..BUILD >> _cscope.files 2> /dev/null - @find . -name '*.c' -type f | grep -v ^..BUILD >> _cscope.files 2> /dev/null - @find . -name '*.cpp' -type f | grep -v ^..BUILD >> _cscope.files 2> /dev/null - @find . -name '*.s' -type f | grep -v ^..BUILD >> _cscope.files 2> /dev/null - @find . -name '*.h.template' -type f | grep -v ^..BUILD >> _cscope.files 2> /dev/null - @echo -k -q -c > cscope.files 2> /dev/null - @sort -u < _cscope.files >> cscope.files 2> /dev/null - @rm -f _cscope.files _cscope.files2 2> /dev/null - -# -# Build cscope database -# -cscope: cscope.files - @echo "Building cscope database" - @cscope -bvU 2> /dev/null - -# -# Build tags -# - -tags: cscope.files - @echo "Building ctags" - @-sed 1d cscope.files | xargs ctags -dtw 2> /dev/null || \ - echo "Phantom files detected!" 2>&1 > /dev/null - @-[ -f TAGS ] || ${MAKE} TAGS - -TAGS: cscope.files - @echo "Building etags" - @-cat cscope.files | etags -l auto -S - 2> /dev/null +$(eval $(call RECURSIVE_BUILD_RULES_template,build_install_primary,$(INST_SUBDIRS),do_build_install_primary,1)) +$(eval $(call RECURSIVE_BUILD_RULES_template,build_install_non_primary,$(INST_SUBDIRS),do_build_install_non_primary,1)) # # Install Man Pages # -.PHONY: installman - -installman: -ifeq ($(findstring Libsyscall,$(RC_ProjectName)),Libsyscall) -# nothing to do -else ifeq ($(findstring libkxld,$(RC_ProjectName)),libkxld) -# nothing to do -else ifeq ($(RC_ProjectName),libkmod) -# nothing to do -else ifeq ($(findstring xnu_,$(RC_ProjectName)),xnu_) -installman: -# nothing to do -else # xnu - @echo "[ $(SRCROOT) ] Installing man pages" - $(_v)manpath=$(DSTROOT)/$(MANDIR); \ - [ -d $$manpath ] || $(MKDIR) $$manpath; \ - ${MAKE} ${MAKEJOBS} MAKEFILES=${SOURCE}/Makefile \ - SOURCE=${SOURCE}/ \ - RELATIVE_SOURCE_PATH=${RELATIVE_SOURCE_PATH} \ - TARGET=${DSTROOT}/ \ - build_installman - ${SRCROOT}/config/compress-man-pages.pl ${DSTROOT}/${MANDIR} -endif - -do_installman: - -BUILD_INSTALLMAN_SUBDIRS_TARGETS = $(addprefix build_installman_,$(INSTMAN_SUBDIRS)) - -.PHONY: $(BUILD_INSTALLMAN_SUBDIRS_TARGETS) - -$(BUILD_INSTALLMAN_SUBDIRS_TARGETS): - $(_v)installman_subdir="$(patsubst build_installman_%,%,$@)"; \ - ${MAKE} -C $${installman_subdir} -r \ - MAKEFILES=$(SOURCE)$${installman_subdir}/Makefile \ - SOURCE=$(SOURCE)$${installman_subdir}/ \ - RELATIVE_SOURCE_PATH=$(RELATIVE_SOURCE_PATH)/$${installman_subdir} \ - TARGET=$(TARGET)$${installman_subdir}/ \ - build_installman; - -build_installman: $(BUILD_INSTALLMAN_SUBDIRS_TARGETS) - $(_v)if [ -n "$(strip $(INSTALL_MAN_LIST))" ]; then \ - ${MAKE} do_installman; \ - fi +$(eval $(call RECURSIVE_BUILD_RULES_template,build_installman,$(INSTMAN_SUBDIRS),do_installman,)) # vim: set ft=make: diff --git a/makedefs/MakeInc.kernel b/makedefs/MakeInc.kernel new file mode 100644 index 000000000..d44d218ca --- /dev/null +++ b/makedefs/MakeInc.kernel @@ -0,0 +1,390 @@ +# -*- mode: makefile;-*- +# +# Copyright (C) 1999-2012 Apple Inc. All rights reserved. +# +# MakeInc.kernel augments the single-architecture +# recursive build system with rules specific +# to assembling and linking a kernel. +# + +# +# Validate configuration options +# +ifeq ($(filter $(CURRENT_ARCH_CONFIG),$(SUPPORTED_ARCH_CONFIGS)),) +$(error Unsupported CURRENT_ARCH_CONFIG $(CURRENT_ARCH_CONFIG)) +endif + +ifeq ($(filter $(CURRENT_KERNEL_CONFIG),$(SUPPORTED_KERNEL_CONFIGS)),) +$(error Unsupported CURRENT_KERNEL_CONFIG $(CURRENT_KERNEL_CONFIG)) +endif + +ifeq ($(filter $(CURRENT_MACHINE_CONFIG),$(SUPPORTED_$(CURRENT_ARCH_CONFIG)_MACHINE_CONFIGS)),) +$(error Unsupported CURRENT_MACHINE_CONFIG $(CURRENT_MACHINE_CONFIG)) +endif + +ifeq ($(filter $(PLATFORM),$(SUPPORTED_PLATFORMS)),) +$(error Unsupported PLATFORM $(PLATFORM)) +endif + +STATIC_KMODS = $(SRCROOT)/kmods.a + +# +# File names in DSTROOT +# + +KERNEL_FILE_NAME_SUFFIX = + +KERNEL_FILE_NAME = mach_kernel$(KERNEL_FILE_NAME_SUFFIX) + +ifeq ($(CURRENT_MACHINE_CONFIG),NONE) +ALT_KERNEL_FILE_NAME = mach$(KERNEL_FILE_NAME_SUFFIX).$(CURRENT_KERNEL_CONFIG_LC) +ALT_KERNEL_LLDBBOOTSTRAP_NAME = mach$(KERNEL_FILE_NAME_SUFFIX).py +else +ALT_KERNEL_FILE_NAME = mach$(KERNEL_FILE_NAME_SUFFIX).$(CURRENT_KERNEL_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) +ALT_KERNEL_LLDBBOOTSTRAP_NAME = mach$(KERNEL_FILE_NAME_SUFFIX)_$(CURRENT_KERNEL_CONFIG_LC).py +endif + +# +# Rules for the highly parallel "build" phase, where each build configuration +# writes into their own $(TARGET) independent of other build configs +# + +do_build_all:: do_build_mach_kernel + +.PHONY: do_build_mach_kernel + +do_build_mach_kernel: $(TARGET)/mach_kernel $(TARGET)/mach_kernel.sys + $(_v)if [ $(CURRENT_MACHINE_CONFIG) != NONE ] ; then \ + $(LN) $(call function_convert_build_config_to_objdir,$(CURRENT_BUILD_CONFIG))/mach_kernel $(OBJROOT)/mach.$(CURRENT_KERNEL_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC); \ + fi; + +ifeq ($(BUILD_DSYM),1) +do_build_all:: do_build_mach_kernel_dSYM +endif + +.PHONY: do_build_mach_kernel_dSYM + +do_build_mach_kernel_dSYM: $(TARGET)/mach_kernel.sys.dSYM + @: + +$(TARGET)/mach_kernel: $(TARGET)/mach_kernel.sys + @echo STRIP mach_kernel + $(_v)$(STRIP) $(STRIP_FLAGS) $< -o $@ + $(_v)$(RM) $@.ctfdata +ifeq ($(DO_CTFMERGE),1) + @echo CTFMERGE mach_kernel + $(_v)$(FIND) $(TARGET)/ -name \*.ctf -size +0 | \ + $(XARGS) $(CTFMERGE) -l xnu -o $@ -Z $@.ctfdata || true +endif + $(_v)if [ -s $@.ctfdata ]; then \ + echo CTFINSERT mach_kernel; \ + $(CTFINSERT) $@ $(ARCH_FLAGS_$(CURRENT_ARCH_CONFIG)) \ + $@.ctfdata -o $@; \ + fi; + +$(TARGET)/mach_kernel.sys.dSYM: $(TARGET)/mach_kernel.sys + $(_v)echo DSYMUTIL mach_kernel.sys + $(_v)$(DSYMUTIL) $(DSYMUTIL_FLAGS) $< -o $@ + $(_v)$(TOUCH) $@ + +$(TARGET)/mach_kernel.sys: $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST),$(component)/$(CURRENT_KERNEL_CONFIG)/$(component).filelist)) lastkernelconstructor.o $(SRCROOT)/config/version.c $(SRCROOT)/config/MasterVersion + $(_v)${MAKE} -f $(firstword $(MAKEFILE_LIST)) version.o + @echo LD mach_kernel.sys + $(_v)$(CAT) $(filter %.filelist,$+) < /dev/null > link.filelist + $(_v)$(LD) $(LDFLAGS_KERNEL) -filelist link.filelist version.o $(filter %.o,$+) `if [ -e $(STATIC_KMODS) ]; then echo $(STATIC_KMODS); fi` \ + -o $@ $(LD_KERNEL_LIBS) + +version.o: $(OBJPATH)/version.c + ${C_RULE_1A}$< + ${C_RULE_2} + ${C_RULE_4} + +# Always recreate version.sh +$(OBJPATH)/version.c: $(SRCROOT)/config/version.c $(NEWVERS) $(SRCROOT)/config/MasterVersion ALWAYS + $(_v)$(CP) $< $@ + $(_v)$(NEWVERS) $(OBJPATH)/version.c > /dev/null; + +lastkernelconstructor.o: $(SRCROOT)/libsa/lastkernelconstructor.c + ${C_RULE_1A}$< $(CFLAGS_NOLTO_FLAG) + ${C_RULE_2} + ${C_RULE_3} + ${C_RULE_4} + $(_v)$(SEG_HACK) -s __DATA -n __LAST -o $@__ $@ + $(_v)$(MV) $@__ $@ + +# invalidate current kernel in $(SYMROOT) +do_build_setup:: + $(_v)$(TOUCH) $(OBJROOT)/.mach_kernel.timestamp + $(_v)$(TOUCH) $(OBJROOT)/.symbolset.timestamp + +# +# Install rules. Each build config is classified as "primary" (the first +# config for an architecture) or "non-primary". Primary build configs +# have the semantic of competing to *combine* single-architecture +# files into a multi-architecture output in the DSTROOT, like +# $(DSTROOT)/mach_kernel, and consequently each primary build config +# has its install target run serially with respect to other primary +# build configs. Non-primary build configs will never compete for +# files in the DSTROOT or SYMROOT, and can be installed in parallel +# with other non-primary configs (and even primary configs) +# + +do_build_install_primary:: do_install_kernel_framework + +ifeq ($(INSTALL_PRIMARY_KERNEL_LIKE_NON_PRIMARY),1) +do_build_install_primary:: do_install_machine_specific_kernel +ifeq ($(BUILD_DSYM),1) +do_build_install_primary:: do_install_machine_specific_kernel_dSYM +endif +else +do_build_install_primary:: do_install_mach_kernel +ifeq ($(BUILD_DSYM),1) +do_build_install_primary:: do_install_mach_kernel_dSYM +endif +endif + +do_build_install_non_primary:: do_install_machine_specific_kernel +ifeq ($(BUILD_DSYM),1) +do_build_install_non_primary:: do_install_machine_specific_kernel_dSYM +endif + +ifeq ($(BUILD_DSYM),1) +ifeq ($(INSTALL_KERNEL_SYM_TO_KDK),1) +do_build_install_primary:: do_install_machine_specific_KDK_dSYM +do_build_install_non_primary:: do_install_machine_specific_KDK_dSYM +endif +endif + +ifeq ($(INSTALL_XNU_DEBUG_FILES),1) +do_build_install_primary:: do_install_xnu_debug_files +ifeq ($(BUILD_DSYM),1) +do_build_install_primary:: do_install_xnu_debug_mach_kernel_dSYM +endif +endif + +.PHONY: do_install_mach_kernel do_install_mach_kernel_dSYM do_install_xnu_debug_files do_install_xnu_debug_mach_kernel_dSYM + +do_install_mach_kernel: $(DSTROOT)/$(INSTALL_KERNEL_DIR)/$(KERNEL_FILE_NAME) \ + $(SYMROOT)/$(KERNEL_FILE_NAME) + +do_install_mach_kernel_dSYM: \ + $(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMINFODIR)/Info.plist \ + $(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/lldbmacros \ + $(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/$(KERNEL_FILE_NAME).py \ + $(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMDWARFDIR)/$(KERNEL_FILE_NAME) + @: + +do_install_xnu_debug_files: $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR)/README.DEBUG-kernel.txt + @: + +do_install_xnu_debug_mach_kernel_dSYM: \ + $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMINFODIR)/Info.plist \ + $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/lldbmacros \ + $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/$(KERNEL_FILE_NAME).py \ + $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMDWARFDIR)/$(KERNEL_FILE_NAME) + @: + +# +# If the timestamp indicates the DSTROOT kernel is out of +# date, start over. Normal dependencies don't work because we can have +# ( BUILDA, BUILDB, INSTALLB, INSTALLA ) in which case at INSTALLA time +# the timestamps would $(DSTROOT)/mach_kernel is not out of date compared +# to BUILDA. So we maintain a separate file at the time make(1) +# was run and use it to determine what actions to take +# + +$(DSTROOT)/$(INSTALL_KERNEL_DIR)/$(KERNEL_FILE_NAME): $(TARGET)/mach_kernel ALWAYS + $(_v)$(MKDIR) $(dir $@) + $(_v)if [ $(OBJROOT)/.mach_kernel.timestamp -nt $@ ]; then \ + echo INSTALL $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC))"; \ + $(INSTALL) $(EXEC_INSTALL_FLAGS) $< $@; \ + cmdstatus=$$?; \ + else \ + echo INSTALL $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC))"; \ + $(LIPO) -create $@ $< -output $@; \ + cmdstatus=$$?; \ + fi; \ + exit $$cmdstatus + +$(SYMROOT)/$(KERNEL_FILE_NAME): $(TARGET)/mach_kernel.sys ALWAYS + $(_v)$(MKDIR) $(dir $@) + $(_v)if [ $(OBJROOT)/.mach_kernel.timestamp -nt $@ ]; then \ + echo INSTALLSYM $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC))"; \ + $(INSTALL) $(EXEC_INSTALL_FLAGS) $< $@; \ + cmdstatus=$$?; \ + else \ + echo INSTALLSYM $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC))"; \ + $(LIPO) -create $@ $< -output $@; \ + cmdstatus=$$?; \ + fi; \ + exit $$cmdstatus + +$(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/lldbmacros $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/lldbmacros: $(TARGET)/$(DSYMKERNELSYSDIR)/$(DSYMLLDBMACROSDIR)/lldbmacros + $(_v)$(MKDIR) $(dir $@) + @echo INSTALLMACROS $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC))" + $(_v)$(CP) -r $< $(dir $@) + $(_v)$(TOUCH) $@ + +$(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/$(KERNEL_FILE_NAME).py $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/$(KERNEL_FILE_NAME).py: $(TARGET)/$(DSYMKERNELSYSDIR)/$(DSYMLLDBMACROSDIR)/lldbmacros/core/xnu_lldb_init.py + $(_v)$(MKDIR) $(dir $@) + @echo INSTALLMACROS $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC))" + $(_v)$(INSTALL) $(INSTALL_FLAGS) $< $@ + +$(DSTROOT)/$(DEVELOPER_EXTRAS_DIR)/README.DEBUG-kernel.txt: $(SRCROOT)/config/README.DEBUG-kernel.txt + $(_v)$(MKDIR) $(dir $@) + @echo INSTALL $(notdir $@) + $(_v)$(INSTALL) $(INSTALL_FLAGS) $< $@ + +$(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMINFODIR)/Info.plist $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMINFODIR)/Info.plist: $(TARGET)/$(DSYMKERNELSYSDIR)/$(DSYMINFODIR)/Info.plist + $(_v)$(MKDIR) $(dir $@) + @echo INSTALLSYM dSYM $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC))" + $(_v)$(INSTALL) $(INSTALL_FLAGS) $< $@ + +$(SYMROOT)/$(KERNEL_FILE_NAME).dSYM/$(DSYMDWARFDIR)/$(KERNEL_FILE_NAME) $(DSTROOT)/$(DEVELOPER_EXTRAS_DIR)/$(KERNEL_FILE_NAME).dSYM/$(DSYMDWARFDIR)/$(KERNEL_FILE_NAME): $(TARGET)/$(DSYMKERNELSYSDIR)/$(DSYMDWARFDIR)/mach_kernel.sys ALWAYS + $(_v)$(MKDIR) $(dir $@) + $(_v)if [ $(OBJROOT)/.mach_kernel.timestamp -nt $@ ]; then \ + echo INSTALLSYM dSYM $(notdir $@).dSYM "($(CURRENT_ARCH_CONFIG_LC))"; \ + $(INSTALL) $(EXEC_INSTALL_FLAGS) $< $@; \ + cmdstatus=$$?; \ + else \ + echo INSTALLSYM dSYM $(notdir $@).dSYM "($(CURRENT_ARCH_CONFIG_LC))"; \ + $(LIPO) -create $@ $< -output $@; \ + cmdstatus=$$?; \ + fi; \ + exit $$cmdstatus + +.PHONY: do_install_machine_specific_kernel do_install_machine_specific_kernel_dSYM + +do_install_machine_specific_kernel: $(DSTROOT)/$(ALT_KERNEL_FILE_NAME) \ + $(SYMROOT)/$(ALT_KERNEL_FILE_NAME) + @: + +do_install_machine_specific_kernel_dSYM: \ + $(SYMROOT)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMINFODIR)/Info.plist \ + $(SYMROOT)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/lldbmacros \ + $(SYMROOT)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/$(ALT_KERNEL_LLDBBOOTSTRAP_NAME) \ + $(SYMROOT)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMDWARFDIR)/$(ALT_KERNEL_FILE_NAME) + @: + +.PHONY: do_install_machine_specific_KDK_dSYM + +do_install_machine_specific_KDK_dSYM: \ + $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(ALT_KERNEL_FILE_NAME) \ + $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMINFODIR)/Info.plist \ + $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/lldbmacros \ + $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/$(ALT_KERNEL_LLDBBOOTSTRAP_NAME) \ + $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMDWARFDIR)/$(ALT_KERNEL_FILE_NAME) + @: + +$(DSTROOT)/$(ALT_KERNEL_FILE_NAME): $(TARGET)/mach_kernel ALWAYS + $(_v)$(MKDIR) $(dir $@) + $(_v)if [ $(OBJROOT)/.mach_kernel.timestamp -nt $@ ]; then \ + echo INSTALL $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC) $(CURRENT_MACHINE_CONFIG_LC))"; \ + $(INSTALL) $(EXEC_INSTALL_FLAGS) $< $@; \ + cmdstatus=$$?; \ + else \ + echo INSTALL $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC) $(CURRENT_MACHINE_CONFIG_LC))"; \ + $(LIPO) -create $@ $< -output $@; \ + cmdstatus=$$?; \ + fi; \ + exit $$cmdstatus + +$(SYMROOT)/$(ALT_KERNEL_FILE_NAME) $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(ALT_KERNEL_FILE_NAME): $(TARGET)/mach_kernel.sys ALWAYS + $(_v)$(MKDIR) $(dir $@) + $(_v)if [ $(OBJROOT)/.mach_kernel.timestamp -nt $@ ]; then \ + echo INSTALLSYM $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC) $(CURRENT_MACHINE_CONFIG_LC))"; \ + $(INSTALL) $(EXEC_INSTALL_FLAGS) $< $@; \ + cmdstatus=$$?; \ + else \ + echo INSTALLSYM $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC) $(CURRENT_MACHINE_CONFIG_LC))"; \ + $(LIPO) -create $@ $< -output $@; \ + cmdstatus=$$?; \ + fi; \ + exit $$cmdstatus + +$(SYMROOT)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMINFODIR)/Info.plist $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMINFODIR)/Info.plist: $(TARGET)/$(DSYMKERNELSYSDIR)/$(DSYMINFODIR)/Info.plist + $(_v)$(MKDIR) $(dir $@) + @echo INSTALLSYM dSYM $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC) $(CURRENT_MACHINE_CONFIG_LC))" +ifeq ($(USE_BINARY_PLIST),1) + $(_v)$(PLUTIL) -convert binary1 -o $@ $< +else + $(_v)$(INSTALL) $(INSTALL_FLAGS) $< $@ +endif + +$(SYMROOT)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/lldbmacros $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/lldbmacros: $(TARGET)/$(DSYMKERNELSYSDIR)/$(DSYMLLDBMACROSDIR)/lldbmacros + $(_v)$(MKDIR) $(dir $@) + @echo INSTALLMACROS $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC))" + $(_v)$(CP) -r $< $(dir $@) + $(_v)$(TOUCH) $@ + +$(SYMROOT)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/$(ALT_KERNEL_LLDBBOOTSTRAP_NAME) $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMLLDBMACROSDIR)/$(ALT_KERNEL_LLDBBOOTSTRAP_NAME): $(TARGET)/$(DSYMKERNELSYSDIR)/$(DSYMLLDBMACROSDIR)/lldbmacros/core/xnu_lldb_init.py + $(_v)$(MKDIR) $(dir $@) + @echo INSTALLMACROS $(notdir $@) "($(CURRENT_ARCH_CONFIG_LC) $(CURRENT_MACHINE_CONFIG_LC))" + $(_v)$(INSTALL) $(INSTALL_FLAGS) $< $@ + +$(SYMROOT)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMDWARFDIR)/$(ALT_KERNEL_FILE_NAME) $(DSTROOT)/$(INSTALL_KERNEL_SYM_DIR)/$(ALT_KERNEL_FILE_NAME).dSYM/$(DSYMDWARFDIR)/$(ALT_KERNEL_FILE_NAME): $(TARGET)/$(DSYMKERNELSYSDIR)/$(DSYMDWARFDIR)/mach_kernel.sys ALWAYS + $(_v)$(MKDIR) $(dir $@) + $(_v)if [ $(OBJROOT)/.mach_kernel.timestamp -nt $@ ]; then \ + echo INSTALLSYM dSYM $(notdir $@).dSYM "($(CURRENT_ARCH_CONFIG_LC) $(CURRENT_MACHINE_CONFIG_LC))"; \ + $(INSTALL) $(EXEC_INSTALL_FLAGS) $< $@; \ + cmdstatus=$$?; \ + else \ + echo INSTALLSYM dSYM $(notdir $@).dSYM "($(CURRENT_ARCH_CONFIG_LC) $(CURRENT_MACHINE_CONFIG_LC))"; \ + $(LIPO) -create $@ $< -output $@; \ + cmdstatus=$$?; \ + fi; \ + exit $$cmdstatus + +# The $(RM) is needed so that the $(LN) doesn't dereference an existing +# symlink during incremental builds and create a new symlink inside +# the target of the existing symlink +do_installhdrs_mi:: + $(_v)$(MKDIR) $(DSTROOT)/$(KINCFRAME) + $(_v)$(MKDIR) $(DSTROOT)/$(KPINCDIR) + $(_v)$(RM) $(DSTROOT)/$(KINCFRAME)/Versions/Current + $(_v)$(LN) $(KINCVERS) $(DSTROOT)/$(KINCFRAME)/Versions/Current + $(_v)$(RM) $(DSTROOT)/$(KINCFRAME)/Headers + $(_v)$(LN) Versions/Current/Headers \ + $(DSTROOT)/$(KINCFRAME)/Headers + $(_v)$(RM) $(DSTROOT)/$(KINCFRAME)/PrivateHeaders + $(_v)$(LN) Versions/Current/PrivateHeaders \ + $(DSTROOT)/$(KINCFRAME)/PrivateHeaders +ifeq ($(INSTALL_SYSTEM_FRAMEWORK_SYMLINKS),1) + $(_v)$(MKDIR) $(DSTROOT)/$(SINCFRAME)/Versions + $(_v)$(RM) $(DSTROOT)/$(SINCFRAME)/Versions/Current + $(_v)$(LN) $(SINCVERS) $(DSTROOT)/$(SINCFRAME)/Versions/Current + $(_v)$(RM) $(DSTROOT)/$(SINCFRAME)/PrivateHeaders + $(_v)$(LN) Versions/Current/PrivateHeaders \ + $(DSTROOT)/$(SINCFRAME)/PrivateHeaders +endif + +$(DSTROOT)/$(KRESDIR)/Info.plist: $(SOURCE)/EXTERNAL_HEADERS/Info.plist + $(_v)$(MKDIR) $(DSTROOT)/$(KRESDIR) + $(_v)$(INSTALL) $(DATA_INSTALL_FLAGS) $< $@ + $(_v)$(NEWVERS) $@ $(_vstdout) +ifeq ($(USE_BINARY_PLIST),1) + $(_v)$(PLUTIL) -convert binary1 -o $@ $@ +endif + +.PHONY: do_install_kernel_framework + +do_install_kernel_framework: $(DSTROOT)/$(KRESDIR)/Info.plist + $(_v)$(MKDIR) $(DSTROOT)/$(KRESDIR) + $(_v)$(RM) $(DSTROOT)/$(KINCFRAME)/Resources + $(_v)$(LN) Versions/Current/Resources \ + $(DSTROOT)/$(KINCFRAME)/Resources +ifeq ($(DELETE_KERNEL_FRAMEWORK_AND_MISC),1) + $(_v)$(FIND) $(DSTROOT)/$(KINCFRAME) \ + $(DSTROOT)/$(INSTALL_EXTENSIONS_DIR) \ + $(DSTROOT)/$(INSTALL_SHARE_MISC_DIR) \ + $(DSTROOT)/$(INSTALL_DTRACE_SCRIPTS_DIR) \ + \( -type f -o -type l \) -exec $(RM) "{}" \; + $(_v)$(FIND) -d $(DSTROOT)/$(KINCFRAME) \ + $(DSTROOT)/$(INSTALL_EXTENSIONS_DIR) \ + $(DSTROOT)/$(INSTALL_SHARE_MISC_DIR) \ + $(DSTROOT)/$(INSTALL_DTRACE_SCRIPTS_DIR) \ + \( -type d \) -exec $(RMDIR) "{}" \; +endif + +print_exports: + $(_v)printenv | sort diff --git a/makedefs/MakeInc.rule b/makedefs/MakeInc.rule index d4e5e5fee..06fc1bda5 100644 --- a/makedefs/MakeInc.rule +++ b/makedefs/MakeInc.rule @@ -1,3 +1,14 @@ +# -*- mode: makefile;-*- +# +# Copyright (C) 1999-2012 Apple Inc. All rights reserved. +# +# MakeInc.rule defines the targets and rules for +# leaf directories once MakeInc.dir has recursed +# into them. "do_XXX" may be double-colon rules +# to allow the Makefile in the source directory +# to augment the actions that will be performed. +# + # # Generic Install rules # @@ -50,461 +61,159 @@ ifndef INSTALL_KF_MD_GEN_LIST INSTALL_KF_MD_GEN_LIST = $(EXPORT_MD_GEN_LIST) endif -ifneq ($(MACHINE_CONFIG), DEFAULT) - export OBJPATH = $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG) -else - export OBJPATH = $(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG) -endif +.PHONY: ALWAYS -INSTALL_MI_GEN_FILES = $(addprefix $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR)/, $(INSTALL_MI_GEN_LIST)) - -$(INSTALL_MI_GEN_FILES): $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR)/% : % - @true echo Installing $< in $(dir $@) - $(_v)[ -d $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR) ] ||$(MKDIR) $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR); \ - filename=`$(BASENAME) $<`; \ - filename_strip=$(addsuffix .strip,$${filename}); \ - $(RM) $(RMFLAGS) $@; \ - [ -d ./incmidir ] || $(MKDIR) ./incmidir; \ - echo garbage > ./incmidir/$${filename_strip}; \ - $(UNIFDEF) $(SINCFRAME_UNIFDEF) \ - $< > ./incmidir/$${filename} || \ - $(DECOMMENT) ./incmidir/$${filename} r > \ - ./incmidir/$${filename_strip}; \ - if [ -s ./incmidir/$${filename_strip} ]; \ - then ( \ - $(INSTALL) $(INSTALL_FLAGS) ./incmidir/$${filename} $(dir $@);\ - ); \ - else \ - echo Header file $< not exported; \ - fi; - -INSTALL_KF_MI_GEN_FILES = $(addprefix $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR)/, $(INSTALL_KF_MI_GEN_LIST)) - -$(INSTALL_KF_MI_GEN_FILES): $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR)/% : % - @true echo Installing $< in $(midir $@) - $(_v)[ -d $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR) ] ||$(MKDIR) $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR); \ - filename=`$(BASENAME) $<`; \ - filename_strip=$(addsuffix .strip,$${filename}); \ - $(RM) $(RMFLAGS) $@; \ - [ -d ./kincmidir ] || $(MKDIR) ./kincmidir; \ - echo garbage > ./kincmidir/$${filename_strip}; \ - $(UNIFDEF) $(KINCFRAME_UNIFDEF) \ - $< > ./kincmidir/$${filename} || \ - $(DECOMMENT) ./kincmidir/$${filename} r > \ - ./kincmidir/$${filename_strip}; \ - if [ -s ./kincmidir/$${filename_strip} ]; \ - then ( \ - $(INSTALL) $(INSTALL_FLAGS) ./kincmidir/$${filename} $(dir $@);\ - ); \ - else \ - echo Header file $< not exported; \ - fi; - -INSTALL_MI_GEN_LCL_FILES = $(addprefix $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR)/, $(INSTALL_MI_LCL_GEN_LIST)) - -$(INSTALL_MI_GEN_LCL_FILES): $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR)/% : % - @true echo Installing $< in $(dir $@) - $(_v)[ -d $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR) ] ||$(MKDIR) $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR); \ - filename=`$(BASENAME) $<`; \ - filename_strip=$(addsuffix .strip,$${filename}); \ - $(RM) $(RMFLAGS) $@; \ - [ -d ./pincmidir ] || $(MKDIR) ./pincmidir; \ - echo garbage > ./pincmidir/$${filename_strip}; \ - $(UNIFDEF) $(SPINCFRAME_UNIFDEF) \ - $< > ./pincmidir/$${filename} || \ - $(DECOMMENT) ./pincmidir/$${filename} r > \ - ./pincmidir/$${filename_strip}; \ - if [ -s ./pincmidir/$${filename_strip} ]; \ - then ( \ - $(INSTALL) $(INSTALL_FLAGS) ./pincmidir/$${filename} $(dir $@);\ - ); \ - else \ - echo Header file $< not exported; \ - fi; - -INSTALL_KF_MI_LCL_GEN_FILES = $(addprefix $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR)/, $(INSTALL_KF_MI_LCL_GEN_LIST)) - -$(INSTALL_KF_MI_LCL_GEN_FILES): $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR)/% : % - @true echo Installing $< in $(dir $@) - $(_v)[ -d $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR) ] ||$(MKDIR) $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR); \ - filename=`$(BASENAME) $<`; \ - filename_strip=$(addsuffix .strip,$${filename}); \ - $(RM) $(RMFLAGS) $@; \ - [ -d ./kpincmidir ] || $(MKDIR) ./kpincmidir; \ - echo garbage > ./kpincmidir/$${filename_strip}; \ - $(UNIFDEF) $(KPINCFRAME_UNIFDEF) \ - $< > ./kpincmidir/$${filename} || \ - $(DECOMMENT) ./kpincmidir/$${filename} r > \ - ./kpincmidir/$${filename_strip}; \ - if [ -s ./kpincmidir/$${filename_strip} ]; \ - then ( \ - $(INSTALL) $(INSTALL_FLAGS) ./kpincmidir/$${filename} $(dir $@);\ - ); \ - else \ - echo Header file $< not exported; \ - fi; - -INSTALL_MD_GEN_INC_FILES = $(addprefix $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR)/, $(INSTALL_MD_GEN_LIST)) - -$(INSTALL_MD_GEN_INC_FILES): $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR)/% : % - @true echo Installing $< in $(dir $@) - $(_v)[ -d $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR) ] ||$(MKDIR) $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR); \ - filename=`$(BASENAME) $<`; \ - filename_strip=$(addsuffix .strip,$${filename}); \ - $(RM) $(RMFLAGS) $@; \ - [ -d ./incdir ] || $(MKDIR) ./incdir; \ - echo garbage > ./incdir/$${filename_strip}; \ - $(UNIFDEF) $(SINCFRAME_UNIFDEF) \ - $< > ./incdir/$${filename} || \ - $(DECOMMENT) ./incdir/$${filename} r > \ - ./incdir/$${filename_strip}; \ - if [ -s ./incdir/$${filename_strip} ]; \ - then ( \ - $(INSTALL) $(INSTALL_FLAGS) ./incdir/$${filename} $(dir $@);\ - ); \ - else \ - echo Header file $< not exported; \ - fi; +ALWAYS: -INSTALL_KF_MD_GEN_FILES = $(addprefix $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR)/, $(INSTALL_KF_MD_GEN_LIST)) +# $(1) is the list of install paths +# $(2) is "1" if it's a "GEN"-style rule that looks locally, or else $(SOURCE) +# $(3) is the local temp directory for processing +# $(4) is the unifdef flags +# +# $$$$$$$$ is a double-escaped "$$" to represent the current pid +# of the shell process for creating uniquely named temporary files -$(INSTALL_KF_MD_GEN_FILES): $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR)/% : % - @true echo Installing $< in $(dir $@) - $(_v)[ -d $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR) ] ||$(MKDIR) $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR); \ - filename=`$(BASENAME) $<`; \ - filename_strip=$(addsuffix .strip,$${filename}); \ - $(RM) $(RMFLAGS) $@; \ - [ -d ./kincdir ] || $(MKDIR) ./kincdir; \ - echo garbage > ./kincdir/$${filename_strip}; \ - $(UNIFDEF) $(KINCFRAME_UNIFDEF) \ - $< > ./kincdir/$${filename} || \ - $(DECOMMENT) ./kincdir/$${filename} r > \ - ./kincdir/$${filename_strip}; \ - if [ -s ./kincdir/$${filename_strip} ]; \ - then ( \ - $(INSTALL) $(INSTALL_FLAGS) ./kincdir/$${filename} $(dir $@);\ - ); \ - else \ - echo Header file $< not exported; \ - fi; - -INSTALL_MD_LCL_FILES = $(addprefix $(SOURCE), $(INSTALL_MD_LCL_LIST)) -INSTALL_MD_GEN_LCL_FILES = $(addprefix $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR)/, $(INSTALL_MD_LCL_GEN_LIST)) - -$(INSTALL_MD_GEN_LCL_FILES): $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR)/% : % - @true echo Installing $< in $(dir $@) - $(_v)[ -d $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR) ] ||$(MKDIR) $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR); \ - filename=`$(BASENAME) $<`; \ - filename_strip=$(addsuffix .strip,$${filename}); \ - $(RM) $(RMFLAGS) $@; \ - [ -d ./pincdir ] || $(MKDIR) ./pincdir; \ - echo garbage > ./pincdir/$${filename_strip}; \ - $(UNIFDEF) $(SPINCFRAME_UNIFDEF) \ - $< > ./pincdir/$${filename} || \ - $(DECOMMENT) ./pincdir/$${filename} r > \ - ./pincdir/$${filename_strip}; \ - if [ -s ./pincdir/$${filename_strip} ]; \ - then ( \ - $(INSTALL) $(INSTALL_FLAGS) ./pincdir/$${filename} $(dir $@);\ - ); \ - else \ - echo Header file $< not exported; \ - fi; - -INSTALL_KF_MD_LCL_FILES = $(addprefix $(SOURCE), $(INSTALL_KF_MD_LCL_LIST)) -INSTALL_KF_MD_LCL_GEN_FILES = $(addprefix $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR)/, $(INSTALL_KF_MD_LCL_GEN_LIST)) +define INSTALLHDRS_RULE_template -$(INSTALL_KF_MD_LCL_GEN_FILES): $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR)/% : % - @true echo Installing $< in $(dir $@) - $(_v)[ -d $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR) ] ||$(MKDIR) $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR); \ - filename=`$(BASENAME) $<`; \ - filename_strip=$(addsuffix .strip,$${filename}); \ - $(RM) $(RMFLAGS) $@; \ - [ -d ./kpincdir ] || $(MKDIR) ./kpincdir; \ - echo garbage > ./kpincdir/$${filename_strip}; \ - $(UNIFDEF) $(KPINCFRAME_UNIFDEF) \ - $< > ./kpincdir/$${filename} || \ - $(DECOMMENT) ./kpincdir/$${filename} r > \ - ./kpincdir/$${filename_strip}; \ - if [ -s ./kpincdir/$${filename_strip} ]; \ - then ( \ - $(INSTALL) $(INSTALL_FLAGS) ./kpincdir/$${filename} $(dir $@);\ - ); \ - else \ - echo Header file $< not exported; \ - fi; - -setup_installhdrs_mi: - -do_installhdrs_mi: $(INSTALL_MI_GEN_FILES) $(INSTALL_MI_GEN_LCL_FILES) $(INSTALL_KF_MI_GEN_FILES) $(INSTALL_KF_MI_LCL_GEN_FILES) - @true echo "[ $(SOURCE) ] make do_installhdrs_mi $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - $(_v)$(MKDIR) ./incmidir ./pincmidir ./kincmidir ./kpincmidir; \ - if [ -n "$(strip $(INSTALL_MI_LIST))" ]; then \ - if [ -d $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR) ]; then \ - (cd $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR);$(RM) $(RMFLAGS) $(INSTALL_MI_LIST) ); \ - else \ - $(MKDIR) $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR); \ - fi; \ - for j in $(INSTALL_MI_LIST); \ - do \ - echo garbage > ./incmidir/$$j.strip; \ - $(UNIFDEF) $(SINCFRAME_UNIFDEF) \ - $(SOURCE)/$$j > ./incmidir/$$j || \ - $(DECOMMENT) ./incmidir/$$j r > \ - ./incmidir/$$j.strip; \ - if [ -s ./incmidir/$$j.strip ]; \ - then ( \ - $(INSTALL) $(INSTALL_FLAGS) ./incmidir/$$j $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR); \ - ); \ - else \ - echo Header file $$j not exported; \ - fi; \ - done; \ - fi; \ - if [ -n "$(strip $(INSTALL_MI_LCL_LIST))" ]; then \ - if [ -d $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR) ]; then \ - (cd $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR);$(RM) $(RMFLAGS) $(INSTALL_MI_LCL_LIST) ); \ - else \ - $(MKDIR) $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR); \ - fi; \ - for j in $(INSTALL_MI_LCL_LIST); \ - do \ - echo garbage > ./pincmidir/$$j.strip; \ - $(UNIFDEF) $(SPINCFRAME_UNIFDEF) \ - $(SOURCE)/$$j > ./pincmidir/$$j || \ - $(DECOMMENT) ./pincmidir/$$j r > \ - ./pincmidir/$$j.strip; \ - if [ -s ./pincmidir/$$j.strip ]; \ - then ( \ - $(INSTALL) $(INSTALL_FLAGS) ./pincmidir/$$j $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR); \ - ); \ - else \ - echo Header file $$j not exported; \ - fi; \ - done; \ - fi; \ - if [ -n "$(strip $(INSTALL_KF_MI_LIST))" ]; then \ - if [ -d $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR) ]; then \ - (cd $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR);$(RM) $(RMFLAGS) $(INSTALL_KF_MI_LIST) ); \ - else \ - $(MKDIR) $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR); \ - fi; \ - for j in $(INSTALL_KF_MI_LIST); \ - do \ - echo garbage > ./kincmidir/$$j.strip; \ - $(UNIFDEF) $(KINCFRAME_UNIFDEF) \ - $(SOURCE)/$$j > ./kincmidir/$$j || \ - $(DECOMMENT) ./kincmidir/$$j r > \ - ./kincmidir/$$j.strip; \ - if [ -s ./kincmidir/$$j.strip ]; \ - then ( \ - $(INSTALL) $(INSTALL_FLAGS) ./kincmidir/$$j $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR); \ - ); \ - else \ - echo Header file $$j not exported; \ - fi; \ - done; \ - fi; \ - if [ -n "$(strip $(INSTALL_KF_MI_LCL_LIST))" ]; then \ - if [ -d $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR) ]; then \ - (cd $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR);$(RM) $(RMFLAGS) $(INSTALL_KF_MI_LCL_LIST) ); \ - else \ - $(MKDIR) $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR); \ - fi; \ - for j in $(INSTALL_KF_MI_LCL_LIST); \ - do \ - echo garbage > ./kpincmidir/$$j.strip; \ - $(UNIFDEF) $(KPINCFRAME_UNIFDEF) \ - $(SOURCE)/$$j > ./kpincmidir/$$j || \ - $(DECOMMENT) ./kpincmidir/$$j r > \ - ./kpincmidir/$$j.strip; \ - if [ -s ./kpincmidir/$$j.strip ]; \ - then ( \ - $(INSTALL) $(INSTALL_FLAGS) ./kpincmidir/$$j $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR); \ - ); \ - else \ - echo Header file $$j not exported; \ - fi; \ - done; \ - fi; \ - $(RM) -rf ./incmidir ./pincmidir ./kincmidir ./kpincmidir; - -setup_installhdrs_md: - -do_installhdrs_md: $(INSTALL_MD_GEN_INC_FILES) $(INSTALL_MD_GEN_LCL_FILES) $(INSTALL_KF_MD_GEN_FILES) $(INSTALL_KF_MD_LCL_GEN_FILES) - @true echo "[ $(SOURCE) ] make do_installhdrs_md $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - $(_v)$(MKDIR) ./incdir ./pincdir ./kincdir ./kpincdir; \ - if [ -n "$(strip $(INSTALL_MD_LIST))" ]; then \ - if [ -d $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR) ]; then \ - (cd $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR);$(RM) $(RMFLAGS) $(INSTALL_MD_LIST) ); \ - else \ - $(MKDIR) $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR); \ - fi; \ - for j in $(INSTALL_MD_LIST); \ - do \ - echo garbage > ./incdir/$$j.strip; \ - $(UNIFDEF) $(SINCFRAME_UNIFDEF) \ - $(SOURCE)/$$j > ./incdir/$$j || \ - $(DECOMMENT) ./incdir/$$j r > \ - ./incdir/$$j.strip; \ - if [ -s ./incdir/$$j.strip ]; \ - then ( \ - $(INSTALL) $(INSTALL_FLAGS) ./incdir/$$j $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR); \ - ); \ - else \ - echo Header file $$j not exported; \ - fi; \ - done; \ - fi; \ - if [ -n "$(strip $(INSTALL_MD_LCL_LIST))" ]; then \ - if [ -d $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR) ]; then \ - (cd $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR);$(RM) $(RMFLAGS) $(INSTALL_MD_LCL_LIST) ); \ - else \ - $(MKDIR) $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR); \ - fi; \ - for j in $(INSTALL_MD_LCL_LIST); \ - do \ - echo garbage > ./pincdir/$$j.strip; \ - $(UNIFDEF) $(SPINCFRAME_UNIFDEF) \ - $(SOURCE)/$$j > ./pincdir/$$j || \ - $(DECOMMENT) ./pincdir/$$j r > \ - ./pincdir/$$j.strip; \ - if [ -s ./pincdir/$$j.strip ]; \ - then ( \ - $(INSTALL) $(INSTALL_FLAGS) ./pincdir/$$j $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR); \ - ); \ - else \ - echo Header file $$j not exported; \ - fi; \ - done; \ - fi; \ - if [ -n "$(strip $(INSTALL_KF_MD_LIST))" ]; then \ - if [ -d $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR) ]; then \ - (cd $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR);$(RM) $(RMFLAGS) $(INSTALL_KF_MD_LIST) ); \ - else \ - $(MKDIR) $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR); \ - fi; \ - for j in $(INSTALL_KF_MD_LIST); \ - do \ - echo garbage > ./kincdir/$$j.strip; \ - $(UNIFDEF) $(KINCFRAME_UNIFDEF) \ - $(SOURCE)/$$j > ./kincdir/$$j || \ - $(DECOMMENT) ./kincdir/$$j r > \ - ./kincdir/$$j.strip; \ - if [ -s ./kincdir/$$j.strip ]; \ - then ( \ - $(INSTALL) $(INSTALL_FLAGS) ./kincdir/$$j $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR); \ - ); \ - else \ - echo Header file $$j not exported; \ - fi; \ - done; \ +.PHONY: $(3)_MKDIR + +$(3)_MKDIR: + $$(_v)$$(MKDIR) ./$(3) + $$(_v)$$(MKDIR) $(dir $(firstword $(1))) + +$(1): $(dir $(firstword $(1)))% : $(if $(2),%,$$(SOURCE)/%) | $(3)_MKDIR + @echo INSTALLHDR $$* + $$(_v)$$(UNIFDEF) $(4) $$< > ./$(3)/$$*.unifdef.$$$$$$$$; \ + if [ $$$$? -eq 2 ]; then \ + echo Parse failure for $$<; \ + exit 1; \ fi; \ - if [ -n "$(strip $(INSTALL_KF_MD_LCL_LIST))" ]; then \ - if [ -d $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR) ]; then \ - (cd $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR);$(RM) $(RMFLAGS) $(INSTALL_KF_MD_LCL_LIST) ); \ - else \ - $(MKDIR) $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR); \ - fi; \ - for j in $(INSTALL_KF_MD_LCL_LIST); \ - do \ - echo garbage > ./kpincdir/$$j.strip; \ - $(UNIFDEF) $(KPINCFRAME_UNIFDEF) \ - $(SOURCE)/$$j > ./kpincdir/$$j || \ - $(DECOMMENT) ./kpincdir/$$j r > \ - ./kpincdir/$$j.strip; \ - if [ -s ./kpincdir/$$j.strip ]; \ - then ( \ - $(INSTALL) $(INSTALL_FLAGS) ./kpincdir/$$j $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR); \ - ); \ - else \ - echo Header file $$j not exported; \ - fi; \ - done; \ + $$(DECOMMENT) ./$(3)/$$*.unifdef.$$$$$$$$ r > \ + ./$(3)/$$*.strip.$$$$$$$$ || exit 1; \ + if [ -s ./$(3)/$$*.strip.$$$$$$$$ ]; then \ + $$(INSTALL) $$(INSTALL_FLAGS) ./$(3)/$$*.unifdef.$$$$$$$$ $$@ || exit 1; \ fi; \ - $(RM) -rf ./incdir ./pincdir ./kincdir ./kpincdir; + $$(RM) ./$(3)/$$*.unifdef.$$$$$$$$ ./$(3)/$$*.strip.$$$$$$$$ +endef + +# +# Machine-independent (public) files +# + +INSTALL_MI_INC_FILES = $(addprefix $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR)/, $(INSTALL_MI_LIST)) +INSTALL_MI_INC_GEN_FILES = $(addprefix $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR)/, $(INSTALL_MI_GEN_LIST)) + +$(eval $(call INSTALLHDRS_RULE_template,$(INSTALL_MI_INC_FILES),,incmidir,$(SINCFRAME_UNIFDEF))) +$(eval $(call INSTALLHDRS_RULE_template,$(INSTALL_MI_INC_GEN_FILES),1,incmigendir,$(SINCFRAME_UNIFDEF))) +INSTALL_KF_MI_FILES = $(addprefix $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR)/, $(INSTALL_KF_MI_LIST)) +INSTALL_KF_MI_GEN_FILES = $(addprefix $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR)/, $(INSTALL_KF_MI_GEN_LIST)) + +$(eval $(call INSTALLHDRS_RULE_template,$(INSTALL_KF_MI_FILES),,kincmidir,$(KINCFRAME_UNIFDEF))) +$(eval $(call INSTALLHDRS_RULE_template,$(INSTALL_KF_MI_GEN_FILES),1,kincmigendir,$(KINCFRAME_UNIFDEF))) # -# Generic Export rules +# Machine-independent local (private) files # -ifeq ($(INCR_EXPORTHDRS), TRUE) -EXPORT_MI_INC_FILES = $(addprefix $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR)/, $(EXPORT_MI_LIST)) -EXPORT_MI_GEN_INC_FILES = $(addprefix $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR)/, $(EXPORT_MI_GEN_LIST)) +INSTALL_MI_LCL_FILES = $(addprefix $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR)/, $(INSTALL_MI_LCL_LIST)) +INSTALL_MI_LCL_GEN_FILES = $(addprefix $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR)/, $(INSTALL_MI_LCL_GEN_LIST)) -$(EXPORT_MI_INC_FILES) $(EXPORT_MI_GEN_INC_FILES): $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR)/% : % - @true echo Exporting $< in $(dir $@) - $(_v)[ -d $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR) ] ||$(MKDIR) $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR); \ - ${CP} -p $< $(dir $@); \ +$(eval $(call INSTALLHDRS_RULE_template,$(INSTALL_MI_LCL_FILES),,pincmidir,$(SPINCFRAME_UNIFDEF))) +$(eval $(call INSTALLHDRS_RULE_template,$(INSTALL_MI_LCL_GEN_FILES),1,pincmigendir,$(SPINCFRAME_UNIFDEF))) +INSTALL_KF_MI_LCL_FILES = $(addprefix $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR)/, $(INSTALL_KF_MI_LCL_LIST)) +INSTALL_KF_MI_LCL_GEN_FILES = $(addprefix $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR)/, $(INSTALL_KF_MI_LCL_GEN_LIST)) -EXPORT_MD_INC_FILES = $(addprefix $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR)/, $(EXPORT_MD_LIST)) -EXPORT_MD_GEN_INC_FILES = $(addprefix $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR)/, $(EXPORT_MD_GEN_LIST)) +$(eval $(call INSTALLHDRS_RULE_template,$(INSTALL_KF_MI_LCL_FILES),,kpincmidir,$(KPINCFRAME_UNIFDEF))) +$(eval $(call INSTALLHDRS_RULE_template,$(INSTALL_KF_MI_LCL_GEN_FILES),1,kpincmigendir,$(KPINCFRAME_UNIFDEF))) -$(EXPORT_MD_INC_FILES) $(EXPORT_MD_GEN_INC_FILES): $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR)/% : % - @true echo Exporting $< in $(dir $@) - $(_v)[ -d $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR) ] ||$(MKDIR) $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR); \ - ${CP} -p $< $(dir $@); \ +# +# Machine-dependent (public) files +# -setup_exporthdrs_mi: +INSTALL_MD_INC_FILES = $(addprefix $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR)/, $(INSTALL_MD_LIST)) +INSTALL_MD_INC_GEN_FILES = $(addprefix $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR)/, $(INSTALL_MD_GEN_LIST)) -do_exporthdrs_mi: $(EXPORT_MI_INC_FILES) $(EXPORT_MI_GEN_INC_FILES) - @true echo "[ $(SOURCE) ] make do_exporthdrs_mi $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" +$(eval $(call INSTALLHDRS_RULE_template,$(INSTALL_MD_INC_FILES),,incdir,$(SINCFRAME_UNIFDEF))) +$(eval $(call INSTALLHDRS_RULE_template,$(INSTALL_MD_INC_GEN_FILES),1,incgendir,$(SINCFRAME_UNIFDEF))) -setup_exporthdrs_md: +INSTALL_KF_MD_FILES = $(addprefix $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR)/, $(INSTALL_KF_MD_LIST)) +INSTALL_KF_MD_GEN_FILES = $(addprefix $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR)/, $(INSTALL_KF_MD_GEN_LIST)) -do_exporthdrs_md: $(EXPORT_MD_INC_FILES) $(EXPORT_MD_GEN_INC_FILES) - @true echo "[ $(SOURCE) ] make do_exporthdrs_md $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" +$(eval $(call INSTALLHDRS_RULE_template,$(INSTALL_KF_MD_FILES),,kincdir,$(KINCFRAME_UNIFDEF))) +$(eval $(call INSTALLHDRS_RULE_template,$(INSTALL_KF_MD_GEN_FILES),1,kincgendir,$(KINCFRAME_UNIFDEF))) -else +# +# Machine-dependent local (private) files +# + +INSTALL_MD_LCL_FILES = $(addprefix $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR)/, $(INSTALL_MD_LCL_LIST)) +INSTALL_MD_LCL_GEN_FILES = $(addprefix $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR)/, $(INSTALL_MD_LCL_GEN_LIST)) + +$(eval $(call INSTALLHDRS_RULE_template,$(INSTALL_MD_LCL_FILES),,pincdir,$(SPINCFRAME_UNIFDEF))) +$(eval $(call INSTALLHDRS_RULE_template,$(INSTALL_MD_LCL_GEN_FILES),1,pincgendir,$(SPINCFRAME_UNIFDEF))) + +INSTALL_KF_MD_LCL_FILES = $(addprefix $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR)/, $(INSTALL_KF_MD_LCL_LIST)) +INSTALL_KF_MD_LCL_GEN_FILES = $(addprefix $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR)/, $(INSTALL_KF_MD_LCL_GEN_LIST)) + +$(eval $(call INSTALLHDRS_RULE_template,$(INSTALL_KF_MD_LCL_FILES),,kpincdir,$(KPINCFRAME_UNIFDEF))) +$(eval $(call INSTALLHDRS_RULE_template,$(INSTALL_KF_MD_LCL_GEN_FILES),1,kpincgendir,$(KPINCFRAME_UNIFDEF))) -EXPORT_MI_INC_FILES = $(addprefix $(SOURCE), $(EXPORT_MI_LIST)) +.PHONY: do_installhdrs_mi + +# Double-colon rule so that MakeInc.kernel can add custom behaviors +do_installhdrs_mi:: $(INSTALL_MI_INC_FILES) $(INSTALL_MI_INC_GEN_FILES) $(INSTALL_KF_MI_FILES) $(INSTALL_KF_MI_GEN_FILES) \ + $(INSTALL_MI_LCL_FILES) $(INSTALL_MI_LCL_GEN_FILES) $(INSTALL_KF_MI_LCL_FILES) $(INSTALL_KF_MI_LCL_GEN_FILES) + @: + +.PHONY: do_installhdrs_md + +do_installhdrs_md: $(INSTALL_MD_INC_FILES) $(INSTALL_MD_INC_GEN_FILES) $(INSTALL_KF_MD_FILES) $(INSTALL_KF_MD_GEN_FILES) \ + $(INSTALL_MD_LCL_FILES) $(INSTALL_MD_LCL_GEN_FILES) $(INSTALL_KF_MD_LCL_FILES) $(INSTALL_KF_MD_LCL_GEN_FILES) + @: + +# +# Generic Export rules +# +EXPORT_MI_INC_FILES = $(addprefix $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR)/, $(EXPORT_MI_LIST)) EXPORT_MI_GEN_INC_FILES = $(addprefix $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR)/, $(EXPORT_MI_GEN_LIST)) -$(EXPORT_MI_GEN_INC_FILES): $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR)/% : % - @true echo Exporting $< in $(dir $@) - $(_v)[ -d $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR) ] ||$(MKDIR) $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR); \ - ${CP} -p $< $(dir $@); \ +.PHONY: EXP_MI_INC_DIR +EXP_MI_INC_DIR: + $(_v)$(MKDIR) $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR) -EXPORT_MD_INC_FILES = $(addprefix $(SOURCE), $(EXPORT_MD_LIST)) +$(EXPORT_MI_GEN_INC_FILES): $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR)/% : % | EXP_MI_INC_DIR + $(_v)$(INSTALL) $(DATA_INSTALL_FLAGS) $< $@ + +$(EXPORT_MI_INC_FILES): $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR)/% : $(SOURCE)/% | EXP_MI_INC_DIR + $(_v)$(INSTALL) $(DATA_INSTALL_FLAGS) $< $@ + +EXPORT_MD_INC_FILES = $(addprefix $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR)/, $(EXPORT_MD_LIST)) EXPORT_MD_GEN_INC_FILES = $(addprefix $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR)/, $(EXPORT_MD_GEN_LIST)) -$(EXPORT_MD_GEN_INC_FILES): $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR)/% : % - @true echo Exporting $< in $(dir $@) - $(_v)[ -d $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR) ] ||$(MKDIR) $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR); \ - ${CP} -p $< $(dir $@); \ - -setup_exporthdrs_mi: - -do_exporthdrs_mi: $(EXPORT_MI_GEN_INC_FILES) - @true echo "[ $(SOURCE) ] make do_exporthdrs_mi $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - $(_v)if [ -n "$(strip $(EXPORT_MI_LIST))" ]; then \ - if [ -d $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR) ]; then \ - (cd $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR);$(RM) $(RMFLAGS) $(EXPORT_MI_LIST) ); \ - else \ - $(MKDIR) $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR); \ - fi; \ - ${CP} -p $(EXPORT_MI_INC_FILES) $(OBJROOT)/$(EXPDIR)/$(EXPORT_MI_DIR); \ - fi - -setup_exporthdrs_md: - -do_exporthdrs_md: $(EXPORT_MD_GEN_INC_FILES) - @true echo "[ $(SOURCE) ] make do_exporthdrs_md $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - $(_v)if [ -n "$(strip $(EXPORT_MD_LIST))" ]; then \ - if [ -d $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR) ]; then \ - (cd $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR);$(RM) $(RMFLAGS) $(EXPORT_MD_LIST) ); \ - else \ - $(MKDIR) $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR); \ - fi; \ - ${CP} -p $(EXPORT_MD_INC_FILES) $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR); \ - fi +.PHONY: EXP_MD_INC_DIR +EXP_MD_INC_DIR: + $(_v)$(MKDIR) $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR) + +$(EXPORT_MD_GEN_INC_FILES): $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR)/% : % | EXP_MD_INC_DIR + $(_v)$(INSTALL) $(DATA_INSTALL_FLAGS) $< $@ + +$(EXPORT_MD_INC_FILES): $(OBJROOT)/$(EXPDIR)/$(EXPORT_MD_DIR)/% : $(SOURCE)/% | EXP_MD_INC_DIR + $(_v)$(INSTALL) $(DATA_INSTALL_FLAGS) $< $@ + +.PHONY: do_exporthdrs_mi + +do_exporthdrs_mi: $(EXPORT_MI_GEN_INC_FILES) $(EXPORT_MI_INC_FILES) + @: + +.PHONY: do_exporthdrs_md + +do_exporthdrs_md: $(EXPORT_MD_GEN_INC_FILES) $(EXPORT_MD_INC_FILES) + @: -# -endif # # Generic Compilation rules # @@ -513,290 +222,119 @@ endif # Compilation rules to generate .o from .s # -S_RULE_1A=$(_v)${S_KCC} -c ${SFLAGS} -MD -DASSEMBLER ${$@_SFLAGS_ADD} ${INCFLAGS} ${$@_INCFLAGS} -S_RULE_1B=$*.s -S_RULE_2=@echo AS $@ -S_RULE_3= +S_RULE_0=@echo AS $@ +S_RULE_1A=$(_v)${S_KCC} -c ${SFLAGS} -MD -MF $(@:o=d) -MP -DASSEMBLER ${$@_SFLAGS_ADD} ${INCFLAGS} ${$@_INCFLAGS} +S_RULE_1B=$( /dev/null && $(CTFSCRUB) `cat $(SRCROOT)/config/DtraceIgnored.symbols` $@.ctf || true; +C_RULE_2= +else ifeq ($(DO_CTFCONVERT),1) +C_RULE_2=$(_v)if [ -z "${$@_SKIP_CTFCONVERT}" ]; then ${CTFCONVERT} -l xnu -v -o $@.ctf $@ > /dev/null || true; fi else -C_RULE_3= -endif -C_RULE_4= - -ifeq ($(ARCH_CONFIG),ARM) -ifeq ($(KERNEL_CONFIG),RELEASE) -C_RULE_3= +C_RULE_2= endif +ifeq ($(DO_CTFMACHO), 1) +C_CTFRULE_1A=${C_RULE_1A} -o $@.non_lto $(CFLAGS_NOLTO_FLAG) +C_CTFRULE_1B=$( /dev/null || true; fi +else +C_CTFRULE_1A=@true +C_CTFRULE_1B= +C_CTFRULE_2=@true endif # # Compilation rules to generate .o from .c for driver files # +C_RULE_0_D=${C_RULE_0} C_RULE_1A_D=${C_RULE_1A} C_RULE_1B_D=${C_RULE_1B} C_RULE_2_D=${C_RULE_2} -C_RULE_3_D=${C_RULE_3} -C_RULE_4_D=${C_RULE_4} +C_CTFRULE_1A_D=${C_CTFRULE_1A} +C_CTFRULE_1B_D=${C_CTFRULE_1B} +C_CTFRULE_2_D=${C_CTFRULE_2} # # Compilation rules to generate .co from .cp or .cpo from .cpp # The config tool slickly changes the last source filename char to 'o' # for the object filename. -P_RULE_1A=$(_v)${KC++} -o $@ -c ${CXXFLAGS} ${filter-out ${$@_CFLAGS_RM}, ${CFLAGS} ${CXXWARNFLAGS}} -MD ${$@_CFLAGS_ADD} ${$@_CXXWARNFLAGS_ADD} ${INCFLAGS} ${$@_INCFLAGS} +P_RULE_0=@echo C++ $@ +P_RULE_1A=$(_v)${KC++} -o $@ -c ${CXXFLAGS} ${filter-out ${$@_CFLAGS_RM}, ${CFLAGS} ${CXXWARNFLAGS}} -MD -MF $(@:o=d) -MP ${$@_CFLAGS_ADD} ${$@_CXXWARNFLAGS_ADD} ${INCFLAGS} ${$@_INCFLAGS} P_RULE_1B=$( $(@:.cpo=.d~) && mv $(@:.cpo=.d~) $(@:.cpo=.d) -P_RULE_3=@echo C++ $@ -ifeq ($(BUILD_MACHO_OBJ),0) -P_RULE_4= -else ifeq ($(BUILD_STABS),1) -P_RULE_4= -else ifeq ($(BUILD_DWARF),1) -P_RULE_4=$(_v)${CTFCONVERT} -l xnu -v -o $@.ctf $@ > /dev/null && $(CTFSCRUB) `cat $(SRCROOT)/config/DtraceIgnored.symbols` $@.ctf || true; -else -P_RULE_4= -endif +P_RULE_2= +P_CTFRULE_1A=@true +P_CTFRULE_1B= +P_CTFRULE_2=@true -ifeq ($(ARCH_CONFIG),ARM) -ifeq ($(KERNEL_CONFIG),RELEASE) -P_RULE_4= -endif -endif -setup_build_all: +.PHONY: do_build_all -do_build_all: $(COMP_FILES) $(COMP_COBJ_FILES) $(COMP_SOBJ_FILES) $(COMPONENT_IMAGE_FILE) +# Do-nothing rule, since not all levels of the recursive hierarchy might implement this +# in their local Makefiles. Those that do will use a "::" rule to augment this. +do_build_all:: + @: -# -# mach_kernel building rules -# -ifeq ($(COMPONENT), .) -do_build_all: do_build_mach_kernel - -STATIC_KMODS = $(SRCROOT)/kmods.a - -do_build_mach_kernel: $(TARGET)/kgmacros $(TARGET)/mach_kernel - -$(TARGET)/mach_kernel: $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST), $(addprefix $(component)/$(firstword $($(addsuffix _KERNEL_CONFIG, $(shell printf $(component) | tr a-z A-Z))) $(KERNEL_CONFIG))/, $(addsuffix .filelist, $(component))))) lastkernelconstructor.o - $(_v)${MAKE} version.o - $(_v)${MAKE} build_mach_kernel_exports - @echo LD mach_kernel.sys - $(_v)$(CAT) $(addprefix $(TARGET)/,$(foreach component,$(COMPONENT_LIST), $(addprefix $(component)/$(firstword $($(addsuffix _KERNEL_CONFIG, $(shell printf $(component) | tr a-z A-Z))) $(KERNEL_CONFIG))/, $(addsuffix .filelist, $(component))))) < /dev/null > link.filelist - $(_v)$(LD) $(LDFLAGS_KERNEL) -filelist link.filelist version.o lastkernelconstructor.o `if [ -e $(STATIC_KMODS) ]; then echo $(STATIC_KMODS); fi` \ - -o $(TARGET)/mach_kernel.sys $(LD_KERNEL_LIBS) - $(_v)if [ $(BUILD_DWARF) -eq 1 ]; then \ - echo DSYMUTIL mach_kernel.sys; \ - $(DSYMUTIL) $(DSYMUTIL_FLAGS) $(TARGET)/mach_kernel.sys -o $(TARGET)/mach_kernel.sys.dSYM > /dev/null; \ - $(MKDIR) $(TARGET)/mach_kernel.sys.dSYM/$(DSYMRESDIR); \ - $(INSTALL) $(INSTALL_FLAGS) $(SRCROOT)/kgmacros $(TARGET)/mach_kernel.sys.dSYM/$(DSYMRESDIR)/kgmacros; \ - fi; - $(_v)if [ $(MACHINE_CONFIG) != DEFAULT ] ; then \ - kernel_file_name=mach.`printf "%s" "$(KERNEL_CONFIG)" | $(TR) A-Z a-z`.`printf "%s" "$(MACHINE_CONFIG)" | $(TR) A-Z a-z`; \ - echo kernel_file_name $${kernel_file_name}; \ - [ -h ${OBJROOT}/$${kernel_file_name} ] || $(LN) $(TARGET)/mach_kernel ${OBJROOT}/$${kernel_file_name}; \ - fi; - @echo STRIP mach_kernel - $(_v)$(STRIP) $(STRIP_FLAGS) $(TARGET)/mach_kernel.sys -o $(TARGET)/mach_kernel - - $(_v)kernel_config=$(KERNEL_CONFIG); \ - onearch=$(ARCH_CONFIG); \ - skip_ctf=FALSE; \ - if [ $${kernel_config} = RELEASE ]; then \ - if [[ $${onearch} = ARM ]]; then \ - skip_ctf=TRUE; \ - echo "Skipping CTF processing"; \ - fi \ - fi; \ - if [ $${skip_ctf} = FALSE ]; then \ - if [ $(BUILD_MACHO_OBJ) -eq 1 -a $(BUILD_DWARF) -eq 1 ]; then \ - echo CTFMERGE mach_kernel; \ - $(FIND) $(OBJPATH)/ -name \*.ctf -size 0 \ - -exec $(RM) -rf {} \; ; \ - $(FIND) $(OBJPATH)/ -name \*.ctf | \ - $(XARGS) $(CTFMERGE) -l xnu -o $(TARGET)/mach_kernel \ - -Z $(TARGET)/mach_kernel.ctfdata || true; \ - echo CTFINSERT mach_kernel; \ - $(CTFINSERT) $(TARGET)/mach_kernel \ - $($(addsuffix $(ARCH_CONFIG),ARCH_FLAGS_)) $(TARGET)/mach_kernel.ctfdata \ - -o $(TARGET)/mach_kernel || true; \ - $(RM) -f $(TARGET)/mach_kernel.ctfdata > /dev/null || true; \ - fi; \ - fi; \ - -version.o: $(OBJPATH)/version.c - ${C_RULE_1A}$< - ${C_RULE_2} - ${C_RULE_4} - -.PHONY: $(OBJPATH)/version.c -$(OBJPATH)/version.c: $(SRCROOT)/config/version.c $(NEWVERS) $(SRCROOT)/config/MasterVersion - $(_v)$(CP) $< $@ - $(_v)$(NEWVERS) $(OBJPATH)/version.c > /dev/null; - -# "/libsa" needed because TARGET ends in "/." -lastkernelconstructor.o: COMP_OBJ_DIR=/libsa -lastkernelconstructor.o: $(SRCROOT)/libsa/lastkernelconstructor.c - $(_v)$(MKDIR) $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG) - ${C_RULE_1A}$< - ${C_RULE_2} - ${C_RULE_3} - ${C_RULE_4} - -$(TARGET)/kgmacros: $(SRCROOT)/kgmacros - $(_v)$(INSTALL) $(INSTALL_FLAGS) $? $@ - -.PHONY: build_mach_kernel_exports -build_mach_kernel_exports: - $(_v)${MAKE} \ - MAKEFILES=${SOURCE}/config/Makefile \ - SOURCE=${SOURCE}/config \ - RELATIVE_SOURCE_PATH=${RELATIVE_SOURCE_PATH}/config \ - TARGET=$${TARGET} \ - build_mach_kernel_exports; - -endif # mach_kernel-specific build rules +.PHONY: do_build_install_primary -# -# Kernel Install rules -# -INSTALL_KERNEL_FILE_FILES = $(addprefix $(DSTROOT)$(INSTALL_KERNEL_DIR), $(INSTALL_KERNEL_FILE)) +# Do-nothing rule, since not all levels of the recursive hierarchy might implement this +# in their local Makefiles. Those that do will use a "::" rule to augment this. +do_build_install_primary:: + @: -force_kernel_file_install: +.PHONY: do_build_install_non_primary -$(INSTALL_KERNEL_FILE_FILES): $(TARGET)/mach_kernel force_kernel_file_install - @echo Installing $< in $@; - $(_v)if [ ! -e $(DSTROOT)$(INSTALL_KERNEL_DIR) ]; then \ - $(MKDIR) $(DSTROOT)$(INSTALL_KERNEL_DIR); \ - fi; \ - if [ "`echo $(INSTALL_ARCHS_LC) | wc -w`" -eq 1 ]; then \ - $(RM) $(RMFLAGS) $@; \ - $(INSTALL) $(FILE_INSTALL_FLAGS) $< $@; \ - else \ - if [ ! -e $@ ]; then \ - printf "" >empty_file_$(notdir $@); \ - lipo_arg="$(foreach lipo_arch,$(INSTALL_ARCHS),$(ARCH_FLAGS_$(lipo_arch)) empty_file_$(notdir $@))"; \ - $(LIPO) $${lipo_arg} -create -output $@; \ - $(RM) $(RMFLAGS) empty_file_$(notdir $@); \ - fi; \ - $(LIPO) $@ -replace $(subst -arch,,$(ARCH_FLAGS_$(ARCH_CONFIG))) $< -o $@; \ - fi - -INSTALL_KERNEL_FILESYS_FILES = $(addprefix $(SYMROOT)$(INSTALL_KERNEL_DIR), $(INSTALL_KERNEL_FILE)) -ifeq ($(PLATFORM),iPhoneOS) -INSTALL_KERNEL_FILESYS_FILES += $(addprefix $(DSTROOT)$(INSTALL_KERNEL_SYM_DIR), $(INSTALL_KERNEL_FILE)) -endif +# Do-nothing rule, since not all levels of the recursive hierarchy might implement this +# in their local Makefiles. Those that do will use a "::" rule to augment this. +do_build_install_non_primary:: + @: -force_kernel_filesys_install: +INSTALL_MAN_FILES = $(addprefix $(DSTROOT)/$(MANDIR)/$(INSTALL_MAN_DIR)/, $(INSTALL_MAN_LIST)) -$(INSTALL_KERNEL_FILESYS_FILES): $(TARGET)/mach_kernel.sys force_kernel_filesys_install - @echo Installing $< in $@; -ifeq ($(PLATFORM),iPhoneOS) - $(_v)if [ ! -e $(DSTROOT)$(INSTALL_KERNEL_SYM_DIR) ]; then \ - $(MKDIR) $(DSTROOT)$(INSTALL_KERNEL_SYM_DIR); \ - fi; -endif - $(_v)if [ ! -e $(SYMROOT)$(INSTALL_KERNEL_DIR) ]; then \ - $(MKDIR) $(SYMROOT)$(INSTALL_KERNEL_DIR); \ - fi; \ - if [ "`echo $(INSTALL_ARCHS_LC) | wc -w`" -eq 1 ]; then \ - $(RM) $(RMFLAGS) $@; \ - $(INSTALL) $(INSTALL_FLAGS) $< $@; \ - if [ $(BUILD_DWARF) -eq 1 ]; then \ - $(RM) -rf $@.dSYM; \ - $(MKDIR) -p -m 0755 $@.dSYM/$(DSYMBUILDDIR); \ - $(INSTALL) $(INSTALL_FLAGS) \ - $<.dSYM/$(DSYMBUILDDIR)/$(notdir $<) \ - $@.dSYM/$(DSYMBUILDDIR)/$(notdir $@); \ - $(INSTALL) $(INSTALL_FLAGS) \ - $<.dSYM/$(DSYMRESDIR)/kgmacros \ - $@.dSYM/$(DSYMRESDIR)/kgmacros; \ - fi; \ - else \ - if [ ! -e $@ ]; then \ - printf "" >empty_filesys_$(notdir $@); \ - lipo_arg="$(foreach lipo_arch,$(INSTALL_ARCHS),$(ARCH_FLAGS_$(lipo_arch)) empty_filesys_$(notdir $@))"; \ - $(LIPO) $${lipo_arg} -create -output $@; \ - $(RM) $(RMFLAGS) empty_filesys_$(notdir $@); \ - fi; \ - $(LIPO) $@ -replace $(subst -arch,,$(ARCH_FLAGS_$(ARCH_CONFIG))) $< -o $@; \ - \ - if [ $(BUILD_DWARF) -eq 1 ]; then \ - if [ ! -e $@.dSYM/$(DSYMBUILDDIR)/$(notdir $@) ]; then \ - printf "" >empty_filesys_$(notdir $@); \ - lipo_arg="$(foreach lipo_arch,$(INSTALL_ARCHS),$(ARCH_FLAGS_$(lipo_arch)) empty_filesys_$(notdir $@))"; \ - $(MKDIR) -p -m 0755 $@.dSYM/$(DSYMBUILDDIR); \ - $(LIPO) $${lipo_arg} -create \ - -output \ - $@.dSYM/$(DSYMBUILDDIR)/$(notdir $@); \ - $(RM) $(RMFLAGS) empty_filesys_$(notdir $@); \ - fi; \ - $(LIPO) $@.dSYM/$(DSYMBUILDDIR)/$(notdir $@) \ - -replace $(subst -arch,,$(ARCH_FLAGS_$(ARCH_CONFIG))) \ - $<.dSYM/$(DSYMBUILDDIR)/$(notdir $<) \ - -o $@.dSYM/$(DSYMBUILDDIR)/$(notdir $@); \ - $(INSTALL) $(INSTALL_FLAGS) \ - $<.dSYM/$(DSYMRESDIR)/kgmacros \ - $@.dSYM/$(DSYMRESDIR)/kgmacros; \ - fi; \ - fi - $(INSTALL) $(INSTALL_FLAGS) $(SOURCE)kgmacros $(SYMROOT)$(INSTALL_FILE_DIR) - -setup_build_install: - @echo "[ $(SOURCE) ] make setup_build_install $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" - -do_build_install: $(INSTALL_KERNEL_FILESYS_FILES) $(INSTALL_KERNEL_FILE_FILES) - @echo "[ $(SOURCE) ] make do_build_install $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)" +.PHONY: INSTALL_MAN_DIR -INSTALL_MAN_FILES = $(addprefix $(DSTROOT)/$(MANDIR)/$(INSTALL_MAN_DIR)/, $(INSTALL_MAN_LIST)) +INSTALL_MAN_DIR: + $(_v)$(MKDIR) $(DSTROOT)/$(MANDIR)/$(INSTALL_MAN_DIR) -do_installman: $(INSTALL_MAN_FILES) - @echo "[ $(SOURCE) ] make do_installman" - $(_v)if [ -n "$(strip $(INSTALL_MAN_LIST))" ]; then \ - man_dir=$(DSTROOT)/$(MANDIR)/$(INSTALL_MAN_DIR); \ - if [ -d $$man_dir ]; then \ - ( cd $$man_dir; \ - $(RM) $(RMFLAGS) $(INSTALL_MAN_LIST) $(INSTALL_MAN_LINKS)); \ - else \ - $(MKDIR) $$man_dir; \ - fi; \ - echo Installing $(INSTALL_MAN_LIST) in $$man_dir; \ - $(INSTALL) $(INSTALL_FLAGS) $(INSTALL_MAN_LIST) $$man_dir; \ - if [ -n "$(strip $(INSTALL_MAN_LINKS))" ]; then \ - set `echo ${INSTALL_MAN_LINKS}`; \ - while : ; do \ - case $$# in \ - 0) break;; \ - 1) echo "warn: empty INSTALL_MAN_LINKS: $$1"; break;; \ - esac; \ - link_src=$$1; shift; link_dst=$$1; shift; \ - echo "hard linking $${link_src} to $${link_dst}"; \ - ln -f $${man_dir}/$${link_src} $${man_dir}/$${link_dst} ; \ - done; \ - fi; \ - fi - -$(INSTALL_MAN_FILES): $(DSTROOT)/$(MANDIR)/$(INSTALL_MAN_DIR)/% : % - @true echo Installing $< in $(dir $@) - $(_v)$(MKDIR) $(DSTROOT)/$(MANDIR)/$(INSTALL_MAN_DIR); \ - $(RM) $(RMFLAGS) $@; \ - $(INSTALL) $(INSTALL_FLAGS) $< $(dir $@); - -ifeq ($(INCL_MAKEDEP), TRUE) --include Makedep -endif +$(INSTALL_MAN_FILES): $(DSTROOT)/$(MANDIR)/$(INSTALL_MAN_DIR)/% : % | INSTALL_MAN_DIR + @echo MAN $* + $(_v)$(INSTALL) $(INSTALL_FLAGS) $< $@ + +define MAN_LINKS_RULE_template +$$(DSTROOT)/$$(MANDIR)/$$(INSTALL_MAN_DIR)/$(2): $$(DSTROOT)/$$(MANDIR)/$$(INSTALL_MAN_DIR)/$(1) + @echo MANLINK $(2) + $(_v)ln -f $$< $$@ +endef + +function_generate_man_links_rules = $(if $(word 1,$(1)),$(eval $(call MAN_LINKS_RULE_template,$(word 1,$(1)),$(word 2,$(1)))) $(DSTROOT)/$(MANDIR)/$(INSTALL_MAN_DIR)/$(word 2,$(1)) $(call function_generate_man_links_rules,$(wordlist 3,$(words $(1)),$(1)))) + +INSTALL_MAN_FILES_LINKS = $(call function_generate_man_links_rules,$(INSTALL_MAN_LINKS)) + +.PHONY: do_installman + +do_installman: $(INSTALL_MAN_FILES) $(INSTALL_MAN_FILES_LINKS) + @: -help: - @cat README +.PHONY: do_build_setup + +# Do-nothing rule, since not all levels of the recursive hierarchy might implement this +# in their local Makefiles. Those that do will use a "::" rule to augment this. +do_build_setup:: + @: + +.PHONY: do_config_all + +# Do-nothing rule, since not all levels of the recursive hierarchy might implement this +# in their local Makefiles. Those that do will use a "::" rule to augment this. +do_config_all:: + @: + +-include Makedep # vim: set ft=make: diff --git a/makedefs/MakeInc.top b/makedefs/MakeInc.top new file mode 100644 index 000000000..052453c7d --- /dev/null +++ b/makedefs/MakeInc.top @@ -0,0 +1,470 @@ +# -*- mode: makefile;-*- +# +# Copyright (C) 2010-2012 Apple Inc. All rights reserved. +# +# MakeInc.top is the top-level makefile for the xnu +# build system. All the main XBS targets +# (like "installhdrs") are defined here, as +# well as globals that can be overridden on +# the command-line by the user. +# +# This makefile's main purpose is to bootstrap +# the user's intent ("build these 3 kernels") +# into 3 single-architecture builds that each +# invoke the recursive make build system. +# As such, we have no knowledge of how to build +# a kernel or perform actions other than +# invoking a sub-make with a different +# current directory, makefile, and target. One +# side effect of this is that each +# single-architecture build is responsible for +# inserting its build products into the final +# multi-architecture output files. To avoid +# races, these aggregating stages for +# "primary" build configs are done in serial. +# + +export MakeInc_cmd=${VERSDIR}/makedefs/MakeInc.cmd + +include $(MakeInc_cmd) + + +# +# Architecture Configuration options +# + +# Default to current kernel architecture +ifeq ($(PLATFORM),iPhoneOS) + override DEFAULT_ARCH_CONFIG := ARM +else ifeq ($(PLATFORM),iPhoneSimulator) + override DEFAULT_ARCH_CONFIG := X86_64 +else + override DEFAULT_ARCH_CONFIG := X86_64 +endif + + +# Accept either explicit ARCH_CONFIGS or XBS-style RC_ARCHS +ifndef ARCH_CONFIGS +ifdef RC_ARCHS +ARCH_CONFIGS := $(shell printf "%s" "$(RC_ARCHS)" | $(TR) a-z A-Z | sed -E 's/ARMV[0-9][A-Z]?/ARM/g' | $(TR) " " "\n" | sort -u | $(TR) "\n" " ") +else +ARCH_CONFIGS := DEFAULT +endif +endif + +# +# Kernel Configuration options +# + +ifeq ($(RC_ProjectName),xnu_debug) +override DEFAULT_KERNEL_CONFIG := DEBUG +else ifeq ($(PLATFORM),iPhoneOS) +override DEFAULT_KERNEL_CONFIG := DEVELOPMENT +else +override DEFAULT_KERNEL_CONFIG := RELEASE +endif + +# If KERNEL_CONFIGS is specified it should override default +ifndef KERNEL_CONFIGS +KERNEL_CONFIGS := DEFAULT +endif + +# +# Machine Configuration options +# + +override DEFAULT_I386_MACHINE_CONFIG := NONE +override DEFAULT_X86_64_MACHINE_CONFIG := NONE + + +# This is typically never specified (TARGET_CONFIGS is used) +ifndef MACHINE_CONFIGS +MACHINE_CONFIGS := DEFAULT +endif + +# +# Target configuration options. NOTE - target configurations will +# override ARCH_CONFIGS and KERNEL_CONFIGS and MACHINE_CONFIGS. +# +# Target configs come in groups of three parameters. The first is the +# kernel configuration, the second is the architecture configuration, +# and the third is the machine configuration. You may pass in as +# many groups of configurations as you wish. Each item passed in is +# seperated by whitespace. +# +# Example: +# TARGET_CONFIGS="release ppc default debug i386 default release arm MX31ADS" +# Parameters may be in upper or lower case (they are converted to upper). +# +# "default" parameter is a special case. It means use the default value for +# that parameter. Here are the default values for each configuration: +# +# default kernel configuration = DEFAULT_KERNEL_CONFIG +# default architecture configuration = system architecture where you are running make. + + +ifndef TARGET_CONFIGS +ifneq ($(PRODUCT_CONFIGS),) +# generate TARGET_CONFIGS using KERNEL_CONFIGS and PRODUCT_CONFIGS +TARGET_CONFIGS := $(foreach my_kernel_config,$(KERNEL_CONFIGS),$(foreach my_product_config,$(shell printf "%s" "$(PRODUCT_CONFIGS)" | $(TR) A-Z a-z),$(my_kernel_config) $(subst ;, ,$(call function_lookup_product,$(my_product_config))))) +else ifneq ($(filter %_embedded,$(MAKECMDGOALS)),) +# generate TARGET_CONFIGS for all kernel configs and products in the device map +TARGET_CONFIGS := $(foreach my_kernel_config,$(KERNEL_CONFIGS_EMBEDDED),$(foreach my_arch_config,$(ARCH_CONFIGS_EMBEDDED),$(foreach my_product_config,$(DEVICEMAP_PRODUCTS_$(my_arch_config)),$(my_kernel_config) $(subst ;, ,$(call function_lookup_product,$(my_product_config)))))) +else +# generate TARGET_CONFIGS using KERNEL_CONFIGS and ARCH_CONFIGS and MACHINE_CONFIGS (which defaults to "DEFAULT") +TARGET_CONFIGS := $(foreach my_kern_config, $(KERNEL_CONFIGS), $(foreach my_arch_config, $(ARCH_CONFIGS), $(foreach my_machine_config, $(MACHINE_CONFIGS), $(my_kern_config) $(my_arch_config) $(my_machine_config)))) +endif +endif + +ifeq ($(TARGET_CONFIGS),) +$(error No TARGET_CONFIGS specified) +endif + +TARGET_CONFIGS_UC := $(strip $(shell printf "%s" "$(TARGET_CONFIGS)" | $(TR) a-z A-Z)) + +# +# Build Configurations +# +# TARGET_CONFIGS is unwieldy for use in Makefiles. Convert them to +# "build configurations" which are tuples joined by "^". For +# example, "RELEASE I386 DEFAULT DEVELOPMENT ARM DEFAULT" becomes +# "RELEASE^I386^NONE DEVELOPMENT^ARM^S5L8920X", which can be looped +# over trivially. PRIMARY_BUILD_CONFIGS is the first config +# for each architecture, used primarily for machine-dependent recursion. + +BUILD_CONFIGS = $(call function_create_build_configs, $(TARGET_CONFIGS_UC)) + +PRIMARY_ARCHS = $(strip $(sort $(foreach build_config, $(BUILD_CONFIGS), $(call function_extract_arch_config_from_build_config, $(build_config))))) +PRIMARY_BUILD_CONFIGS = $(strip $(foreach arch, $(PRIMARY_ARCHS), $(firstword $(foreach build_config, $(BUILD_CONFIGS), $(if $(filter $(arch),$(call function_extract_arch_config_from_build_config, $(build_config))), $(build_config), ))))) +NON_PRIMARY_BUILD_CONFIGS = $(strip $(filter-out $(PRIMARY_BUILD_CONFIGS), $(BUILD_CONFIGS))) +FIRST_BUILD_CONFIG = $(firstword $(BUILD_CONFIGS)) + +# $(warning PRIMARY_ARCHS is $(PRIMARY_ARCHS)) +# $(warning TARGET_CONFIGS is $(TARGET_CONFIGS)) +# $(warning BUILD_CONFIGS is $(BUILD_CONFIGS)) +# $(warning PRIMARY_BUILD_CONFIGS is $(PRIMARY_BUILD_CONFIGS)) +# $(warning NON_PRIMARY_BUILD_CONFIGS is $(NON_PRIMARY_BUILD_CONFIGS)) + +MEMORY_SIZE := $(shell /usr/sbin/sysctl -n hw.memsize) + +# Embedded kernels use LTO by default. +ifeq ($(PLATFORM),iPhoneOS) +export BUILD_LTO := 1 +endif + +LARGE_BUILD_FOOTPRINT := $(BUILD_LTO) + +ifeq ($(LARGE_BUILD_FOOTPRINT),1) +RAM_PER_KERNEL_BUILD := 8589934592 +FLOCK_SIZE := $(shell if [ $(MEMORY_SIZE) -le $$((1 * $(RAM_PER_KERNEL_BUILD))) ]; then echo 1; elif [ $(MEMORY_SIZE) -gt $$((8 * $(RAM_PER_KERNEL_BUILD))) ]; then echo 8; else expr $(MEMORY_SIZE) / $(RAM_PER_KERNEL_BUILD); fi ) +else +RAM_PER_KERNEL_BUILD := 268435456 +FLOCK_SIZE := $(shell if [ $(MEMORY_SIZE) -le $$((2 * $(RAM_PER_KERNEL_BUILD))) ]; then echo 2; elif [ $(MEMORY_SIZE) -gt $$((8 * $(RAM_PER_KERNEL_BUILD))) ]; then echo 8; else expr $(MEMORY_SIZE) / $(RAM_PER_KERNEL_BUILD); fi ) +endif + +# $(warning Building $(FLOCK_SIZE) kernels in parallel) + +# +# TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template +# +# $(1) is the name of the makefile target to invoke for the each build config +# after setting up the parallel hierarchy in the TARGET directory +# $(2) is an optional suffix on the TARGET directory, which might even be +# "/.." +# $(3) are any dependencies for the bootstrap target +# $(4) are any dependencies that are expanded per-build config to another bootstrap target +# $(5) is how many build configurations to build in parallel +# $(6) is which build configs to build in parallel +# +# Since building many configurations in parallel may overwhelm the system, +# we try to throttle behavior into more managable "flocks" of N configurations +# at once, by creating a dependency on all members of the previous flock. + +define TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template + +# Create a list of synthesized targets for each build config +$(1)_bootstrap_target_list = $$(addprefix $(1)_bootstrap_,$(6)) + +.PHONY: $$($(1)_bootstrap_target_list) + +$$(eval $$(call _function_generate_flock_groupings,$(1),$$(wordlist 1,$(5),$(6)),$$(wordlist $(call increment,$(5)),$$(words $(6)),$(6)),,$(5))) + +$$($(1)_bootstrap_target_list): $(1)_bootstrap_% : $(1)_flock_dep_for_% $$(addsuffix _bootstrap_%,$(4)) $(3) + $$(_v)$$(MKDIR) $${OBJROOT}/$$(call function_convert_build_config_to_objdir,$$(patsubst $(1)_bootstrap_%,%,$$@))$(2) + $$(_v)$${MAKE} \ + -C $${OBJROOT}/$$(call function_convert_build_config_to_objdir,$$(patsubst $(1)_bootstrap_%,%,$$@))$(2) \ + -f $${SRCROOT}/Makefile \ + CURRENT_KERNEL_CONFIG=$$(call function_extract_kernel_config_from_build_config,$$(patsubst $(1)_bootstrap_%,%,$$@)) \ + CURRENT_ARCH_CONFIG=$$(call function_extract_arch_config_from_build_config,$$(patsubst $(1)_bootstrap_%,%,$$@)) \ + CURRENT_MACHINE_CONFIG=$$(call function_extract_machine_config_from_build_config,$$(patsubst $(1)_bootstrap_%,%,$$@)) \ + CURRENT_BUILD_CONFIG=$$(patsubst $(1)_bootstrap_%,%,$$@) \ + PRIMARY_BUILD_CONFIGS="$(PRIMARY_BUILD_CONFIGS)" \ + SOURCE=$${SRCROOT}/ \ + RELATIVE_SOURCE_PATH=. \ + TARGET=$${OBJROOT}/$$(call function_convert_build_config_to_objdir,$$(patsubst $(1)_bootstrap_%,%,$$@))$(2)/ \ + OBJPATH=$${OBJROOT}/$$(call function_convert_build_config_to_objdir,$$(patsubst $(1)_bootstrap_%,%,$$@)) \ + $(1) + +.PHONY: $(1)_bootstrap + +$(1)_bootstrap: $$($(1)_bootstrap_target_list) +endef + +# +# TOP_LEVEL_FLOCK_DEPENDENCY_template +# +# $(1) is the Makefile target we are building for +# $(2) are the members of the current flock +# $(3) is what the flock depends on. None of the build +# configs in $(2) will start building until all of +# $(3) are done building + +define TOP_LEVEL_FLOCK_DEPENDENCY_template + +.PHONY: $(addprefix $(1)_flock_dep_for_,$(2)) + +$(addprefix $(1)_flock_dep_for_,$(2)): $(addprefix $(1)_bootstrap_,$(3)) + +endef + +# $(1) is the Makefile target we are building for +# $(2) is the first flock (5 build configs) +# $(3) is the rest of the build configs +# $(4) is the build configs that the first flock depends on +# $(5) is the flock size +_function_generate_flock_groupings = $(if $(3), $(call _function_generate_flock_groupings,$(1),$(wordlist 1,$(5),$(3)),$(wordlist $(call increment,$(5)),$(words $(3)),$(3)),$(2),$(5))) $(call TOP_LEVEL_FLOCK_DEPENDENCY_template,$(1),$(2),$(4)) + +# +# Setup pass for build system tools +# + +generated_top_level_build_setup = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_setup,/..,,,$(FLOCK_SIZE),$(FIRST_BUILD_CONFIG)) +ifeq ($(VERBOSE),YES) +$(warning Generate makefile fragment: $(generated_top_level_build_setup)) +endif +$(eval $(generated_top_level_build_setup)) + +.PHONY: setup + +setup: build_setup_bootstrap + +# +# Install kernel header files +# +.PHONY: exporthdrs exporthdrs_mi exporthdrs_md + +exporthdrs: exporthdrs_mi exporthdrs_md + +# +# Install machine independent kernel header files +# + +generated_top_level_build_exporthdrs_mi = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_exporthdrs_mi,,setup,,1,$(FIRST_BUILD_CONFIG)) +ifeq ($(VERBOSE),YES) +$(warning Generate makefile fragment: $(generated_top_level_build_exporthdrs_mi)) +endif +$(eval $(generated_top_level_build_exporthdrs_mi)) + +exporthdrs_mi: build_exporthdrs_mi_bootstrap + +# +# Install machine dependent kernel header files +# + +generated_top_level_build_exporthdrs_md = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_exporthdrs_md,,setup,,$(FLOCK_SIZE),$(PRIMARY_BUILD_CONFIGS)) +ifeq ($(VERBOSE),YES) +$(warning Generate makefile fragment: $(generated_top_level_build_exporthdrs_md)) +endif +$(eval $(generated_top_level_build_exporthdrs_md)) + +exporthdrs_md: build_exporthdrs_md_bootstrap + +# +# Install kernel header files +# + +.PHONY: installhdrs installhdrs_mi installhdrs_md + +ifeq ($(RC_ProjectName),xnu_debug) +installhdrs: + @: +else + +installhdrs: installhdrs_mi installhdrs_md +endif + +.PHONY: installhdrs_embedded + +installhdrs_embedded: installhdrs + +# +# Install machine independent header files +# + +generated_top_level_build_installhdrs_mi = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_installhdrs_mi,,setup exporthdrs_mi,,1,$(FIRST_BUILD_CONFIG)) +ifeq ($(VERBOSE),YES) +$(warning Generate makefile fragment: $(generated_top_level_build_installhdrs_mi)) +endif +$(eval $(generated_top_level_build_installhdrs_mi)) + +installhdrs_mi: exporthdrs_mi build_installhdrs_mi_bootstrap + +# +# Install machine dependent kernel header files +# + +generated_top_level_build_installhdrs_md = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_installhdrs_md,,setup exporthdrs_md,,$(FLOCK_SIZE),$(PRIMARY_BUILD_CONFIGS)) +ifeq ($(VERBOSE),YES) +$(warning Generate makefile fragment: $(generated_top_level_build_installhdrs_md)) +endif +$(eval $(generated_top_level_build_installhdrs_md)) + +installhdrs_md: exporthdrs_md build_installhdrs_md_bootstrap + +# +# Build all architectures for all Configuration/Architecture options +# + +generated_top_level_build_all = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_all,,setup exporthdrs,,$(FLOCK_SIZE),$(BUILD_CONFIGS)) +ifeq ($(VERBOSE),YES) +$(warning Generate makefile fragment: $(generated_top_level_build_all)) +endif +$(eval $(generated_top_level_build_all)) + +.PHONY: build + +build: build_all_bootstrap + +# +# Post-process build results +# + +generated_top_level_config_all = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,config_all,,setup,build_all,$(FLOCK_SIZE),$(BUILD_CONFIGS)) +ifeq ($(VERBOSE),YES) +$(warning Generate makefile fragment: $(generated_top_level_config_all)) +endif +$(eval $(generated_top_level_config_all)) + +.PHONY: all + +all: config_all_bootstrap + +.PHONY: all_embedded + +all_embedded: all + +# +# Install kernel and header files +# + +generated_top_level_build_install_primary = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_install_primary,,setup,config_all,1,$(PRIMARY_BUILD_CONFIGS)) +ifeq ($(VERBOSE),YES) +$(warning Generate makefile fragment: $(generated_top_level_build_install_primary)) +endif +$(eval $(generated_top_level_build_install_primary)) + +generated_top_level_build_install_non_primary = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_install_non_primary,,setup,config_all,$(FLOCK_SIZE),$(NON_PRIMARY_BUILD_CONFIGS)) +ifeq ($(VERBOSE),YES) +$(warning Generate makefile fragment: $(generated_top_level_build_install_non_primary)) +endif +$(eval $(generated_top_level_build_install_non_primary)) + + +.PHONY: install + +ifeq ($(RC_ProjectName),xnu_debug) + +install: build_install_primary_bootstrap build_install_non_primary_bootstrap +else ifeq ($(RC_ProjectName),xnu_headers_Sim) +install: installhdrs +else + +install: all installhdrs installman build_install_primary_bootstrap build_install_non_primary_bootstrap +endif + +.PHONY: install_embedded + +install_embedded: install + +# +# Install man pages +# + +generated_top_level_build_installman = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,build_installman,,setup,,1,$(FIRST_BUILD_CONFIG)) +ifeq ($(VERBOSE),YES) +$(warning Generate makefile fragment: $(generated_top_level_build_installman)) +endif +$(eval $(generated_top_level_build_installman)) + +.PHONY: installman + +installman: setup build_installman_bootstrap + +# +# Install source tree +# +.PHONY: installsrc + +installsrc: + @echo INSTALLSRC $(SRCROOT) + $(_v)$(MKDIR) $(SRCROOT) + $(_v)($(TAR) -c --mode go=r,+X --no-ignore-case --exclude .git --exclude .svn --exclude cscope.\* --exclude BUILD --exclude \*~ -f - .) | (cd $(SRCROOT) && $(TAR) --no-same-owner -xf -) + + +# +# Clean up source tree +# +.PHONY: clean + +clean: + @: + +# +# Build source file list for cscope database and tags +# +cscope.files: + @echo "Building file list for cscope and tags" + @find . -name '*.h' -type f | grep -v ^..BUILD > _cscope.files 2> /dev/null + @find . -name '*.defs' -type f | grep -v ^..BUILD >> _cscope.files 2> /dev/null + @find . -name '*.c' -type f | grep -v ^..BUILD >> _cscope.files 2> /dev/null + @find . -name '*.cpp' -type f | grep -v ^..BUILD >> _cscope.files 2> /dev/null + @find . -name '*.s' -type f | grep -v ^..BUILD >> _cscope.files 2> /dev/null + @find . -name '*.h.template' -type f | grep -v ^..BUILD >> _cscope.files 2> /dev/null + @echo -k -q -c > cscope.files 2> /dev/null + @sort -u < _cscope.files >> cscope.files 2> /dev/null + @rm -f _cscope.files _cscope.files2 2> /dev/null + +# +# Build cscope database +# +cscope: cscope.files + @echo "Building cscope database" + @cscope -bvU 2> /dev/null + +# +# Build tags +# +tags: cscope.files + @echo "Building ctags" + @-sed 1d cscope.files | xargs ctags -dtw 2> /dev/null || \ + echo "Phantom files detected!" 2>&1 > /dev/null + @-[ -f TAGS ] || ${MAKE} -f $(firstword $(MAKEFILE_LIST)) TAGS + +TAGS: cscope.files + @echo "Building etags" + @-cat cscope.files | etags -l auto -S - 2> /dev/null + +help: + @cat README + +print_exports: + $(_v)printenv | sort + + +generated_top_level_print_exports = $(call TOP_LEVEL_EACH_BUILD_CONFIG_BOOTSTRAP_template,print_exports,,,,1,$(FIRST_BUILD_CONFIG)) +ifeq ($(VERBOSE),YES) +$(warning Generate makefile fragment: $(generated_top_level_print_exports)) +endif +$(eval $(generated_top_level_print_exports)) + +print_exports_first_build_config: print_exports_bootstrap diff --git a/osfmk/Makefile b/osfmk/Makefile index a864f850b..3fa6846f2 100644 --- a/osfmk/Makefile +++ b/osfmk/Makefile @@ -23,9 +23,6 @@ INSTINC_SUBDIRS = \ kdp \ pmc \ kperf -INSTINC_SUBDIRS_I386 = \ - mach \ - i386 INSTINC_SUBDIRS_X86_64 = \ mach \ i386 \ @@ -46,26 +43,17 @@ EXPINC_SUBDIRS = \ lockd \ vm \ libsa \ - kdp \ console \ pmc \ kperf -EXPINC_SUBDIRS_I386 = \ - mach \ - i386 EXPINC_SUBDIRS_X86_64 = \ mach \ i386 \ x86_64 -SETUP_SUBDIRS = - COMP_SUBDIRS = \ conf -INST_SUBDIRS = \ - - include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/osfmk/UserNotification/KUNCUserNotifications.c b/osfmk/UserNotification/KUNCUserNotifications.c index 740b8f125..98ad8b928 100644 --- a/osfmk/UserNotification/KUNCUserNotifications.c +++ b/osfmk/UserNotification/KUNCUserNotifications.c @@ -64,6 +64,8 @@ struct UNDReply { #define UNDReply_lock(reply) lck_mtx_lock(&reply->lock) #define UNDReply_unlock(reply) lck_mtx_lock(&reply->lock) +extern lck_grp_t LockCompatGroup; + /* forward declarations */ void UNDReply_deallocate( UNDReplyRef reply); @@ -83,6 +85,7 @@ UNDReply_deallocate( UNDReply_unlock(reply); ipc_port_dealloc_kernel(port); + lck_mtx_destroy(&reply->lock, &LockCompatGroup); kfree(reply, sizeof(struct UNDReply)); return; } @@ -189,7 +192,6 @@ UNDNotificationCreated_rpc ( * KUNC Functions */ -extern lck_grp_t LockCompatGroup; KUNCUserNotificationID KUNCGetNotificationID(void) diff --git a/osfmk/UserNotification/Makefile b/osfmk/UserNotification/Makefile index 98a5377e8..36e129059 100644 --- a/osfmk/UserNotification/Makefile +++ b/osfmk/UserNotification/Makefile @@ -6,18 +6,6 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = - -INSTINC_SUBDIRS_I386 = - -INSTINC_SUBDIRS_ARM = - -EXPINC_SUBDIRS = - -EXPINC_SUBDIRS_I386 = - -EXPINC_SUBDIRS_ARM = - MIG_TYPES = \ UNDTypes.defs @@ -68,8 +56,10 @@ MIG_KSSRC = \ COMP_FILES = ${MIG_KUSRC} ${MIG_KSSRC} +do_build_all:: $(COMP_FILES) + ${COMP_FILES} : ${MIG_TYPES} - + ${MIG_KUSRC} : \ %.c : %.defs @echo MIG $@ @@ -90,6 +80,5 @@ ${MIG_KSSRC}: \ -sheader $*Server.h \ $< - include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/osfmk/chud/chud_thread.c b/osfmk/chud/chud_thread.c index 6cc0eb0e9..1a827aadd 100644 --- a/osfmk/chud/chud_thread.c +++ b/osfmk/chud/chud_thread.c @@ -43,6 +43,14 @@ #include +#if KPC +#include +#endif + +#if KPERF +#include +#endif + // include the correct file to find real_ncpus #if defined(__i386__) || defined(__x86_64__) # include @@ -561,6 +569,16 @@ extern void chudxnu_thread_ast(thread_t); void chudxnu_thread_ast(thread_t thread) { +#if KPC + /* check for PMC work */ + kpc_thread_ast_handler(thread); +#endif + +#if KPERF + /* check for kperf work */ + kperf_thread_ast_handler(thread); +#endif + /* atomicness for kdebug events */ void (*handler)(thread_t) = chudxnu_thread_ast_handler; if( handler ) diff --git a/osfmk/chud/chud_xnu.h b/osfmk/chud/chud_xnu.h index 7d2c56f67..657e01fe8 100644 --- a/osfmk/chud/chud_xnu.h +++ b/osfmk/chud/chud_xnu.h @@ -34,6 +34,16 @@ #include #include +/* Unslide the provided pointer if it's a kernel address. */ +static inline uint64_t +chudxnu_vm_unslide( uint64_t ptr, int kaddr ) +{ + if( !kaddr ) + return ptr; + + return VM_KERNEL_UNSLIDE(ptr); +} + #if 0 #pragma mark **** version **** #endif @@ -73,6 +83,7 @@ extern kern_return_t chudxnu_thread_get_state(thread_t thread, thread_flavor_t f extern kern_return_t chudxnu_thread_set_state(thread_t thread, thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t count, boolean_t user_only); extern kern_return_t chudxnu_thread_get_callstack64(thread_t thread, uint64_t *callStack, mach_msg_type_number_t *count, boolean_t user_only); +extern kern_return_t chudxnu_thread_get_callstack64_kperf(thread_t thread, uint64_t *callStack, mach_msg_type_number_t *count, boolean_t user_only); extern kern_return_t chudxnu_all_tasks(task_array_t *task_list, mach_msg_type_number_t *count); extern kern_return_t chudxnu_free_task_list(task_array_t *task_list, mach_msg_type_number_t *count); diff --git a/osfmk/chud/i386/chud_osfmk_callback_i386.c b/osfmk/chud/i386/chud_osfmk_callback_i386.c index c92dbb7fd..9865dc99d 100644 --- a/osfmk/chud/i386/chud_osfmk_callback_i386.c +++ b/osfmk/chud/i386/chud_osfmk_callback_i386.c @@ -176,7 +176,7 @@ chudxnu_cpu_timer_callback_enter( chudxnu_private_cpu_timer_callback, NULL); timer_call_enter(&(chud_proc_info->cpu_timer_call), chud_proc_info->t_deadline, - TIMER_CALL_CRITICAL|TIMER_CALL_LOCAL); + TIMER_CALL_SYS_CRITICAL|TIMER_CALL_LOCAL); ml_set_interrupts_enabled(oldlevel); return KERN_SUCCESS; diff --git a/osfmk/chud/i386/chud_thread_i386.c b/osfmk/chud/i386/chud_thread_i386.c index 6b8a4e873..a958aa754 100644 --- a/osfmk/chud/i386/chud_thread_i386.c +++ b/osfmk/chud/i386/chud_thread_i386.c @@ -45,16 +45,6 @@ #include #include - -static uint64_t -chudxnu_vm_unslide( uint64_t ptr, int kaddr ) -{ - if( !kaddr ) - return ptr; - - return VM_KERNEL_UNSLIDE(ptr); -} - #if 0 #pragma mark **** thread state **** #endif @@ -478,14 +468,13 @@ static kern_return_t do_kernel_backtrace( return KERN_SUCCESS; } - - -__private_extern__ -kern_return_t chudxnu_thread_get_callstack64( +static +kern_return_t chudxnu_thread_get_callstack64_internal( thread_t thread, uint64_t *callstack, mach_msg_type_number_t *count, - boolean_t user_only) + boolean_t user_only, + boolean_t kern_only) { kern_return_t kr = KERN_FAILURE; task_t task = thread->task; @@ -686,7 +675,7 @@ kern_return_t chudxnu_thread_get_callstack64( bufferIndex < bufferMaxIndex) { callstack[bufferIndex++] = (uint64_t) esp; } - } else if(u_regs64) { + } else if(u_regs64 && !kern_only) { /* backtrace user land */ uint64_t rsp = 0ULL; @@ -698,7 +687,7 @@ kern_return_t chudxnu_thread_get_callstack64( callstack[bufferIndex++] = rsp; } - } else if(u_regs32) { + } else if(u_regs32 && !kern_only) { uint32_t esp = 0UL; kr = do_backtrace32(task, thread, u_regs32, callstack, &bufferIndex, @@ -714,3 +703,23 @@ kern_return_t chudxnu_thread_get_callstack64( return kr; } +__private_extern__ +kern_return_t chudxnu_thread_get_callstack64_kperf( + thread_t thread, + uint64_t *callstack, + mach_msg_type_number_t *count, + boolean_t is_user) +{ + return chudxnu_thread_get_callstack64_internal(thread, callstack, count, is_user, !is_user); +} + +__private_extern__ +kern_return_t chudxnu_thread_get_callstack64( + thread_t thread, + uint64_t *callstack, + mach_msg_type_number_t *count, + boolean_t user_only) +{ + return chudxnu_thread_get_callstack64_internal(thread, callstack, count, user_only, 0); +} + diff --git a/osfmk/conf/MASTER b/osfmk/conf/MASTER index ad71e25cc..f3dd6fcc8 100644 --- a/osfmk/conf/MASTER +++ b/osfmk/conf/MASTER @@ -158,6 +158,9 @@ options CONFIG_DTRACE # # # options MACH_COUNTERS # # +# DEVELOPMENT define for development builds +options DEVELOPMENT # dev kernel # + ########################################################## # # This defines configuration options that are normally used only during @@ -245,16 +248,6 @@ options CONFIG_NO_PANIC_STRINGS # options CONFIG_NO_PRINTF_STRINGS # options CONFIG_NO_KPRINTF_STRINGS # -# configurable kernel - general switch to say we are building for an -# embedded device -# -options CONFIG_EMBEDDED # - -# only execute signed code. Hang this off config_embedded since there's -# nothing more appropriate right now -# -options CONFIG_ENFORCE_SIGNED_CODE # - # support dynamic signing of code # options CONFIG_DYNAMIC_CODE_SIGNING # @@ -265,12 +258,6 @@ options CONFIG_VC_PROGRESS_WHITE # # secure_kernel - secure kernel from user programs options SECURE_KERNEL # -# -# code decryption... used on embedded for app protection -# must be set in all the bsd/conf and osfmk/conf MASTER files -# -options CONFIG_CODE_DECRYPTION # - # # Context switched counters # @@ -309,7 +296,27 @@ options CHECK_CS_VALIDATION_BITMAP # # options VM_PRESSURE_EVENTS # +# +# Enable inheritance of importance through specially marked mach ports and for file locks +# For now debug is enabled wherever inheritance is +# +options IMPORTANCE_INHERITANCE # +options IMPORTANCE_DEBUG # + # Enable allocation of contiguous physical memory through vm_map_enter_cpm() options VM_CPM # options CONFIG_SKIP_PRECISE_USER_KERNEL_TIME # + +options CONFIG_TELEMETRY # + +# +# Switch to disable cpu, wakeup and high memory watermark monitors +# +options CONFIG_NOMONITORS # + +# +# In-kernel tests +# +options CONFIG_IN_KERNEL_TESTS # + diff --git a/osfmk/conf/MASTER.i386 b/osfmk/conf/MASTER.i386 deleted file mode 100644 index 2240533b9..000000000 --- a/osfmk/conf/MASTER.i386 +++ /dev/null @@ -1,79 +0,0 @@ -# -# Mach Operating System -# Copyright (c) 1986 Carnegie-Mellon University -# All rights reserved. The CMU software License Agreement -# specifies the terms and conditions for use and redistribution. -# -###################################################################### -# -# Standard Apple MacOS X Configurations: -# -------- ---- -------- --------------- -# -# RELEASE = [ medium intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto config_dtrace config_mca config_vmx config_mtrr config_lapic config_counters zleaks config_sched_traditional config_sched_proto config_sched_grrr config_sched_fixedpriority mach_pagemap vm_pressure_events config_sched_idle_in_place memorystatus ] -# DEBUG= [ RELEASE osf_debug debug mach_assert task_zone_info ] -# PROFILE = [ RELEASE profile ] -# -# EMBEDDED_BASE = [ bsmall intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto ] -# EMBEDDED = [ EMBEDDED_BASE no_printf_str no_kprintf_str no_kdebug ] -# DEVELOPMENT = [ EMBEDDED_BASE mach_assert config_dtrace config_counters task_zone_info ] -# -###################################################################### -# -machine "i386" # -cpu "i386" # - -pseudo-device com 2 -pseudo-device vc 1 - -# choices for platform_bus are pci at386 sqt and kkt -makeoptions OSFMK_MACHINE = "i386" # -makeoptions CCONFIGFLAGS = "-g -O -fno-omit-frame-pointer" # -makeoptions CCONFIGFLAGS = "-O3" # -makeoptions RELOC = "00100000" # -makeoptions SYMADDR = "00780000" # - -options GDB # GNU kernel debugger # -options DEBUG # general debugging code # -options SHOW_SPACE # print size of structures # -options EVENTMETER # event meter support # -options FP_EMUL # floating point emulation # -options PC_SUPPORT # virtual PC support # -options PROFILE # kernel profiling # -options UXPR # user-level XPR package # -config mach_kernel swap generic # - -options GPROF # kgmon profiling # - -options EVENT # - -options MACH_BSD -options IOKIT # # -options MACH_PE # # - -options MACH_KDP # KDP # -options CONFIG_SERIAL_KDP # KDP over serial # -options PAE -options X86_64 -options DISPATCH_COUNTS -options PAL_I386 -options CONFIG_YONAH # 32-bit Yonah support # - -# -# Note: MAC/AUDIT options must be set in all the bsd/conf, osfmk/conf, and -# security/conf MASTER files. -# -options CONFIG_MACF # Mandatory Access Control Framework -#options CONFIG_MACF_MACH # MACF applied to Mach services -options CONFIG_AUDIT # Kernel auditing - -# -# code decryption... used on i386 for DSMOS -# must be set in all the bsd/conf and osfmk/conf MASTER files -# -options CONFIG_CODE_DECRYPTION - -options CONFIG_MCA # Machine Check Architecture # -options CONFIG_VMX # Virtual Machine Extensions # -options CONFIG_MTRR # Memory Type Range Registers # - -options NO_NESTED_PMAP # diff --git a/osfmk/conf/MASTER.x86_64 b/osfmk/conf/MASTER.x86_64 index 27b9ce5e5..2738aba36 100644 --- a/osfmk/conf/MASTER.x86_64 +++ b/osfmk/conf/MASTER.x86_64 @@ -9,7 +9,7 @@ # Standard Apple MacOS X Configurations: # -------- ---- -------- --------------- # -# RELEASE = [ medium intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto config_dtrace config_mca config_vmx config_mtrr config_lapic config_counters zleaks config_gzalloc config_sched_traditional config_sched_proto config_sched_grrr config_sched_fixedpriority mach_pagemap vm_pressure_events config_sched_idle_in_place kperf memorystatus config_kext_basement ] +# RELEASE = [ medium intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto config_dtrace config_mca config_vmx config_mtrr config_lapic config_counters zleaks config_gzalloc config_sched_traditional config_sched_proto config_sched_grrr config_sched_fixedpriority mach_pagemap vm_pressure_events kperf kpc memorystatus config_kext_basement config_telemetry importance_inheritance dynamic_codesigning config_nomonitors ] # DEBUG = [ RELEASE osf_debug debug mach_assert task_zone_info ] # # EMBEDDED_BASE = [ bsmall intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto ] @@ -51,7 +51,8 @@ options MACH_PE # # options MACH_KDP # KDP # options CONFIG_SERIAL_KDP # KDP over serial # -# options KPERF # # +options KPERF # # +options KPC # # options PAE options X86_64 options DISPATCH_COUNTS diff --git a/osfmk/conf/Makefile b/osfmk/conf/Makefile index 439807979..25a42ef5e 100644 --- a/osfmk/conf/Makefile +++ b/osfmk/conf/Makefile @@ -6,59 +6,34 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -SETUP_SUBDIRS = - -COMP_SUBDIRS = - -INST_SUBDIRS = - -ifndef OSFMK_KERNEL_CONFIG -export OSFMK_KERNEL_CONFIG = $(KERNEL_CONFIG) -endif - -ifneq ($(MACHINE_CONFIG), DEFAULT) -export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT) -else -export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT) -endif - -MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(ARCH_CONFIG_LC).$(MACHINE_CONFIG_LC) - -$(COMPOBJROOT)/$(OSFMK_KERNEL_CONFIG)/Makefile: $(SOURCE)/MASTER \ - $(SOURCE)/MASTER.$(ARCH_CONFIG_LC) \ - $(SOURCE)/Makefile.template \ - $(SOURCE)/Makefile.$(ARCH_CONFIG_LC) \ - $(SOURCE)/files \ - $(SOURCE)/files.$(ARCH_CONFIG_LC) - $(_v)(doconf_target=$(addsuffix /conf, $(TARGET)); \ - $(MKDIR) $${doconf_target}; \ - cd $${doconf_target}; \ +MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) + +$(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ + $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC) \ + $(SOURCE)/Makefile.template \ + $(SOURCE)/Makefile.$(CURRENT_ARCH_CONFIG_LC) \ + $(SOURCE)/files \ + $(SOURCE)/files.$(CURRENT_ARCH_CONFIG_LC) + $(_v)$(MKDIR) $(addsuffix /conf, $(TARGET)); \ + cd $(addsuffix /conf, $(TARGET)); \ rm -f $(notdir $?); \ - cp $? $${doconf_target}; \ - if [ -f $(MASTER_CPU_PER_SOC) ]; then cp $(MASTER_CPU_PER_SOC) $${doconf_target}; fi; \ - $(SRCROOT)/SETUP/config/doconf -c -cpu $(ARCH_CONFIG_LC) -soc $(MACHINE_CONFIG_LC) -d $(TARGET)/$(OSFMK_KERNEL_CONFIG) $(OSFMK_KERNEL_CONFIG); \ - ); - -$(COMPOBJROOT)/$(OSFMK_KERNEL_CONFIG)/platforms.h: $(COMPOBJROOT)/$(OSFMK_KERNEL_CONFIG)/Makefile - $(_v)(cd $(COMPOBJROOT)/$(OSFMK_KERNEL_CONFIG); \ - ${RM} $@; \ - ${LN} cputypes.h $@; \ - ) - -do_all: $(COMPOBJROOT)/$(OSFMK_KERNEL_CONFIG)/Makefile \ - $(COMPOBJROOT)/$(OSFMK_KERNEL_CONFIG)/platforms.h - $(_v)next_source=$(subst conf/,,$(SOURCE)); \ - next_relsource=$(subst conf/,,$(RELATIVE_SOURCE_PATH)); \ - ${MAKE} -C $(COMPOBJROOT)/$(OSFMK_KERNEL_CONFIG) \ - MAKEFILES=$(TARGET)/$(OSFMK_KERNEL_CONFIG)/Makefile \ - SOURCE=$${next_source} \ - RELATIVE_SOURCE_PATH=$${next_relsource} \ - TARGET=$(TARGET) \ - INCL_MAKEDEP=FALSE \ - KERNEL_CONFIG=$(OSFMK_KERNEL_CONFIG) \ + cp $? .; \ + if [ $(MASTER_CPU_PER_SOC) -nt $@ ]; then cp $(MASTER_CPU_PER_SOC) .; fi; \ + $(SRCROOT)/SETUP/config/doconf -c -cpu $(CURRENT_ARCH_CONFIG_LC) -soc $(CURRENT_MACHINE_CONFIG_LC) -d $(TARGET)/$(CURRENT_KERNEL_CONFIG) $(CURRENT_KERNEL_CONFIG); + +do_all: $(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile + $(_v)${MAKE} \ + -C $(TARGET)/$(CURRENT_KERNEL_CONFIG) \ + -f $(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile \ + CURRENT_KERNEL_CONFIG=${CURRENT_KERNEL_CONFIG} \ + CURRENT_ARCH_CONFIG=${CURRENT_ARCH_CONFIG} \ + CURRENT_MACHINE_CONFIG=${CURRENT_MACHINE_CONFIG} \ + SOURCE=$(subst conf/,,$(SOURCE)) \ + TARGET=${TARGET} \ + OBJPATH=${OBJPATH} \ build_all; -do_build_all: do_all +do_build_all:: do_all include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/osfmk/conf/Makefile.i386 b/osfmk/conf/Makefile.i386 deleted file mode 100644 index 852d8ad0a..000000000 --- a/osfmk/conf/Makefile.i386 +++ /dev/null @@ -1,17 +0,0 @@ -###################################################################### -#BEGIN Machine dependent Makefile fragment for i386 -###################################################################### - -CWARNFLAGS = $(CWARNFLAGS_STD) -Wshorten-64-to-32 - -# Files that must go in the __HIB segment: -UNCONFIGURED_HIB_FILES= \ - hibernate_restore.o - -HIB_FILES=$(filter $(UNCONFIGURED_HIB_FILES),$(OBJS)) - -hibernate_restore.o_CFLAGS_ADD += -fno-stack-protector - -###################################################################### -#END Machine dependent Makefile fragment for i386 -###################################################################### diff --git a/osfmk/conf/Makefile.template b/osfmk/conf/Makefile.template index c39e844cf..e8a23fd21 100644 --- a/osfmk/conf/Makefile.template +++ b/osfmk/conf/Makefile.template @@ -5,16 +5,6 @@ # the terms and conditions for use and redistribution. # -# -# Export IDENT for sub-makefiles -# -export IDENT - -# -# XXX: INCFLAGS to include libsa prototypes -# -INCFLAGS_MAKEFILE= -I$(SOURCE)libsa - export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule @@ -26,7 +16,66 @@ include $(MakeInc_def) # # XXX: CFLAGS # -CFLAGS+= -include meta_features.h -DMACH_KERNEL_PRIVATE $(CFLAGS_INLINE_CONFIG) +CFLAGS+= -include meta_features.h -DMACH_KERNEL_PRIVATE + +# Objects that don't want -Wcast-align warning (8474835) +OBJS_NO_CAST_ALIGN = \ + model_dep.o \ + chud_thread.o \ + chud_thread_arm.o \ + video_console.o \ + kdp.o \ + kdp_udp.o \ + kdp_machdep.o \ + host.o \ + processor.o \ + sched_prim.o \ + task.o \ + thread.o \ + threadinfo.o \ + gssd_mach.o \ + UNDRequest.o \ + panic_dialog.o \ + bsd_i386.o \ + commpage.o \ + cpu_threads.o \ + cpuid.o \ + locks_i386.o \ + machine_task.o \ + mp_desc.o \ + pcb.o \ + pcb_native.o \ + kdp_x86_common.o \ + memory_object.o \ + vm_apple_protect.o \ + vm_map.o \ + startup64.o \ + affinity.o \ + sched_grrr.o \ + sched_fixedpriority.o \ + stack.o \ + task_policy.o \ + wait_queue.o \ + bsd_kern.o \ + pmc.o \ + default_freezer.o \ + status.o \ + machine_routines.o \ + loose_ends.o \ + sleh.o + + +$(foreach file,$(OBJS_NO_CAST_ALIGN),$(eval $(call add_perfile_cflags,$(file),-Wno-cast-align))) + +# +# Do not provide CTF symbolic these files +# +chud_glue.o_SKIP_CTFCONVERT = 1 + +# +# XXX: INCFLAGS to include libsa prototypes +# +INCFLAGS_MAKEFILE= -I$(SOURCE)libsa # # Directories for mig generated files @@ -34,19 +83,12 @@ CFLAGS+= -include meta_features.h -DMACH_KERNEL_PRIVATE $(CFLAGS_INLINE_CONFIG) COMP_SUBDIRS = \ default_pager \ device \ - mach_debug \ mach \ UserNotification \ gssd \ kextd \ lockd -COMP_SUBDIRS_I386 = \ - mach - -COMP_SUBDIRS_X86_64 = \ - mach - # # Make sure we don't remove this by accident if interrupted at the wrong # time. @@ -65,13 +107,8 @@ COMP_SUBDIRS_X86_64 = \ %CFILES -%MFILES - %SFILES -%BFILES - -%ORDERED %MACHDEP # @@ -81,50 +118,42 @@ COMP_SUBDIRS_X86_64 = \ # ${OBJS}: ${OBJSDEPS} - -%LOAD - LDOBJS = $(OBJS) $(COMPONENT).filelist: $(LDOBJS) assym.s - $(_v)if [ $(BUILD_MACHO_OBJ) -eq 1 ]; then \ - for hib_file in ${HIB_FILES}; \ + $(_v)for hib_file in ${HIB_FILES}; \ do \ - $(SEG_HACK) __HIB $${hib_file} -o $${hib_file}__; \ + $(SEG_HACK) -n __HIB -o $${hib_file}__ $${hib_file} ; \ mv $${hib_file}__ $${hib_file} ; \ - done; \ - fi + done @echo LDFILELIST $(COMPONENT) $(_v)( for obj in ${LDOBJS}; do \ - echo $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$${obj}; \ + echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \ done; ) > $(COMPONENT).filelist do_all: $(COMPONENT).filelist -do_depend: do_all - $(_v)${MD} -u Makedep -f -d `ls *.d`; - -do_build_all: do_depend +do_build_all:: do_all # genassym.o actually is an assembly file, # we name it genassym.o to help with the automatic # dependency generation -GENASSYM_LOCATION = $(ARCH_CONFIG_LC) +GENASSYM_LOCATION = $(CURRENT_ARCH_CONFIG_LC) -ifeq ($(ARCH_CONFIG_LC),x86_64) +ifeq ($(CURRENT_ARCH_CONFIG_LC),x86_64) GENASSYM_LOCATION = i386 endif genassym.o: $(SOURCE_DIR)/$(COMPONENT)/$(GENASSYM_LOCATION)/genassym.c - $(_v)${KCC} $(subst -flto,,${CFLAGS}) -MD ${_HOST_EXTRA_CFLAGS} -S -o ${@} -c ${INCFLAGS} $< + @echo GENASSYM $< + $(_v)${KCC} $(subst -flto,,${CFLAGS}) -MD -S -o ${@} ${INCFLAGS} $< assym.s: genassym.o - $(_v)sed -e '/#DEFINITION#/!d' -e 's/^.*#DEFINITION#//' -e 's/\$$//' -e 'p' -e 's/#//2' -e 's/[^A-Za-z0-9_]*\([A-Za-z0-9_]*\)/ \1_NUM/2' genassym.o > ${@} + $(_v)sed -e '/#DEFINITION#/!d' -e 's/^.*#DEFINITION#//' -e 's/\$$//' -e 'p' -e 's/#//2' -e 's/[^A-Za-z0-9_]*\([A-Za-z0-9_]*\)/ \1_NUM/2' genassym.o > $@ ${SOBJS}: assym.s - %RULES include $(MakeInc_rule) diff --git a/osfmk/conf/Makefile.x86_64 b/osfmk/conf/Makefile.x86_64 index 2a4eb03ff..efbb892f6 100644 --- a/osfmk/conf/Makefile.x86_64 +++ b/osfmk/conf/Makefile.x86_64 @@ -6,13 +6,26 @@ CWARNFLAGS = $(CWARNFLAGS_STD) -Wshorten-64-to-32 # Files that must go in the __HIB segment: UNCONFIGURED_HIB_FILES= \ + WKdmDecompress_new.o \ + WKdmData_new.o \ hibernate_restore.o \ - hibernate_bootstrap.o + hibernate_bootstrap.o \ + bcopy.o \ + bzero.o HIB_FILES=$(filter $(UNCONFIGURED_HIB_FILES),$(OBJS)) -hibernate_restore.o_CFLAGS_ADD += -fno-stack-protector -hibernate_bootstrap.o_CFLAGS_ADD += -fno-stack-protector +# Unconfigured __HIB files must be Mach-O for "setsegname" +WKdmDecompress_new.o_CFLAGS_ADD += -fno-stack-protector $(CFLAGS_NOLTO_FLAG) +WKdmData_new.o_CFLAGS_ADD += -fno-stack-protector $(CFLAGS_NOLTO_FLAG) +hibernate_restore.o_CFLAGS_ADD += -fno-stack-protector $(CFLAGS_NOLTO_FLAG) +hibernate_bootstrap.o_CFLAGS_ADD += -fno-stack-protector $(CFLAGS_NOLTO_FLAG) +bcopy.o_CFLAGS_ADD += -fno-stack-protector $(CFLAGS_NOLTO_FLAG) +bzero.o_CFLAGS_ADD += -fno-stack-protector $(CFLAGS_NOLTO_FLAG) + +# To appear at the beginning of the __HIB segment, emit +# as Mach-O so that the linker can enforce symbol order +boot_pt.o_CFLAGS_ADD += $(CFLAGS_NOLTO_FLAG) ###################################################################### #END Machine dependent Makefile fragment for x86_64 diff --git a/osfmk/conf/files b/osfmk/conf/files index 19b3b0550..2c44e99ff 100644 --- a/osfmk/conf/files +++ b/osfmk/conf/files @@ -36,10 +36,6 @@ OPTIONS/mach_debug optional mach_debug OPTIONS/mach_machine_routines.h optional mach_machine_routines OPTIONS/machine_timer_routines optional machine_timer_routines # -OPTIONS/norma_vm optional norma_vm -OPTIONS/norma_task optional norma_task -OPTIONS/norma_ether optional norma_ether -OPTIONS/norma_scsi optional norma_scsi OPTIONS/mach_cluster_stats optional mach_cluster_stats OPTIONS/mach_counters optional mach_counters OPTIONS/mach_ipc_debug optional mach_ipc_debug @@ -65,6 +61,8 @@ OPTIONS/zone_alias_addr optional zone_alias_addr OPTIONS/vm_cpm optional vm_cpm OPTIONS/task_swapper optional task_swapper OPTIONS/stack_usage optional stack_usage +OPTIONS/importance_inheritance optional importance_inheritance +OPTIONS/importance_debug optional importance_debug OPTIONS/config_dtrace optional config_dtrace OPTIONS/config_counters optional config_counters @@ -73,6 +71,8 @@ OPTIONS/no_kextd optional no_kextd # Default pager and system pager files, to be moved to separate component +osfmk/vm/vm_compressor_backing_store.c standard + osfmk/default_pager/default_pager.c standard osfmk/default_pager/dp_backing_store.c standard osfmk/default_pager/dp_memory_object.c standard @@ -125,6 +125,7 @@ osfmk/ipc/mig_log.c optional mig_debug osfmk/kern/affinity.c standard osfmk/kern/ast.c standard osfmk/kern/audit_sessionport.c optional config_audit +osfmk/kern/btlog.c standard osfmk/kern/clock.c standard osfmk/kern/clock_oldops.c standard osfmk/kern/counters.c standard @@ -202,6 +203,7 @@ osfmk/pmc/pmc.c standard ./mach/upl_server.c standard ./mach/audit_triggers_user.c standard ./mach/task_access_user.c standard +./mach/telemetry_notification_user.c optional config_telemetry # # For now, no external pagers # @@ -216,6 +218,8 @@ osfmk/pmc/pmc.c standard ./mach/security_server.c optional config_macf osfmk/vm/bsd_vm.c optional mach_bsd +osfmk/vm/vm_compressor.c standard +osfmk/vm/vm_compressor_pager.c standard osfmk/vm/default_freezer.c optional config_freeze osfmk/vm/device_vm.c standard osfmk/vm/memory_object.c standard @@ -269,9 +273,13 @@ osfmk/kperf/kperf.c optional kperf osfmk/kperf/action.c optional kperf osfmk/kperf/callstack.c optional kperf osfmk/kperf/pet.c optional kperf -osfmk/kperf/filter.c optional kperf # osfmk/kperf/kperfbsd.c optional kperf # bsd/conf/files osfmk/kperf/threadinfo.c optional kperf osfmk/kperf/timetrigger.c optional kperf +osfmk/kperf/kperf_kpc.c optional kperf +osfmk/kern/kpc_thread.c optional kpc +osfmk/kern/kpc_common.c optional kpc osfmk/console/serial_general.c standard + +osfmk/kern/telemetry.c optional config_telemetry diff --git a/osfmk/conf/files.i386 b/osfmk/conf/files.i386 deleted file mode 100644 index fb2610ce3..000000000 --- a/osfmk/conf/files.i386 +++ /dev/null @@ -1,134 +0,0 @@ -OPTIONS/show_space optional show_space -OPTIONS/gdb optional gdb -OPTIONS/iplmeas optional iplmeas -OPTIONS/fb optional fb - - -#machdep/i386/unix_signal.c standard -#machdep/i386/unix_startup.c standard - -OPTIONS/debug optional debug - - -OPTIONS/gprof optional gprof -OPTIONS/dynamic_num_nodes optional dynamic_num_nodes -OPTIONS/vtoc_compat optional vtoc_compat -OPTIONS/fddi optional fddi - -osfmk/vm/vm_apple_protect.c standard - -#osfmk/i386/hi_res_clock_map.c optional hi_res_clock - -osfmk/i386/pmap.c standard -osfmk/i386/pmap_x86_common.c standard -osfmk/i386/pmap_common.c standard - -osfmk/i386/pal_routines.c optional pal_i386 -osfmk/i386/pal_routines_asm.s optional pal_i386 - -osfmk/i386/bsd_i386.c optional mach_bsd -osfmk/i386/bsd_i386_native.c optional mach_bsd -osfmk/i386/machdep_call.c optional mach_bsd - -osfmk/i386/_setjmp.s standard -osfmk/i386/bcopy.s standard -osfmk/i386/bzero.s standard -osfmk/i386/cpu.c standard -osfmk/i386/cpuid.c standard -osfmk/i386/cpu_threads.c standard -osfmk/i386/cpu_topology.c standard -osfmk/i386/etimer.c standard -osfmk/i386/fpu.c standard -osfmk/i386/gdt.c standard -osfmk/i386/i386_lock.s standard -osfmk/i386/i386_init.c standard -osfmk/i386/idle_pt.c standard -osfmk/i386/i386_vm_init.c standard -osfmk/i386/idt.s standard -osfmk/i386/io_map.c standard -osfmk/i386/ktss.c standard -osfmk/i386/ldt.c standard -osfmk/i386/loose_ends.c standard -osfmk/i386/copyio.c standard -osfmk/i386/locks_i386.c standard -osfmk/i386/locore.s standard -osfmk/i386/start.s standard -osfmk/i386/lowmem_vectors.s standard -osfmk/i386/cswitch.s standard -osfmk/i386/machine_routines.c standard -osfmk/i386/machine_routines_asm.s standard -osfmk/i386/machine_check.c optional config_mca -osfmk/i386/machine_task.c standard -osfmk/i386/mcount.s optional profile -osfmk/i386/mp_desc.c standard -#osfmk/i386/ntoh.s standard -osfmk/i386/pcb.c standard -osfmk/i386/pcb_native.c standard -osfmk/i386/phys.c standard -osfmk/i386/rtclock.c standard -osfmk/i386/rtclock_native.c standard -osfmk/i386/trap.c standard -osfmk/i386/trap_native.c standard -osfmk/i386/user_ldt.c standard -osfmk/i386/Diagnostics.c standard -osfmk/i386/pmCPU.c standard -osfmk/i386/tsc.c standard - -osfmk/i386/commpage/commpage.c standard -osfmk/i386/commpage/commpage_asm.s standard -osfmk/i386/commpage/pthreads.s standard -osfmk/i386/commpage/fifo_queues.s standard - -osfmk/i386/AT386/conf.c standard -osfmk/i386/AT386/model_dep.c standard - -osfmk/i386/lapic.c standard -osfmk/i386/lapic_native.c standard -osfmk/i386/mp.c standard -osfmk/i386/mp_native.c standard - -osfmk/i386/acpi.c standard -osfmk/i386/acpi_wakeup.s standard - -osfmk/i386/mtrr.c optional config_mtrr - -osfmk/console/i386/serial_console.c optional com device-driver - -osfmk/console/panic_dialog.c optional vc device-driver -osfmk/console/video_console.c optional vc device-driver -osfmk/console/i386/video_scroll.c optional vc device-driver - -osfmk/kern/etap_map.c optional etap device-driver - -#osfmk/profiling/i386/profile-md.c optional gprof -#osfmk/profiling/i386/profile-asm.s optional gprof -#osfmk/profiling/profile-kgmon.c optional gprof -#osfmk/profiling/profile-mk.c optional gprof - -osfmk/kdp/ml/i386/kdp_machdep.c optional mach_kdp -osfmk/kdp/ml/i386/kdp_vm.c optional mach_kdp -osfmk/kdp/ml/i386/kdp_x86_common.c optional mach_kdp - -osfmk/i386/hibernate_i386.c optional hibernation -osfmk/i386/hibernate_restore.c optional hibernation - -osfmk/chud/i386/chud_osfmk_callback_i386.c standard -osfmk/chud/i386/chud_cpu_i386.c standard -osfmk/chud/i386/chud_thread_i386.c standard - -osfmk/i386/ucode.c standard - -osfmk/i386/vmx/vmx_cpu.c optional config_vmx -osfmk/i386/vmx/vmx_shims.c optional config_vmx - -# DUMMIES TO FORCE GENERATION OF .h FILES -#osfmk/OPTIONS/ln optional ln -#osfmk/OPTIONS/eisa optional eisa -#osfmk/OPTIONS/himem optional himem -#osfmk/OPTIONS/ec optional ec -#osfmk/OPTIONS/hi_res_clock optional hi_res_clock - - -osfmk/i386/startup64.c standard -osfmk/i386/start64.s standard -osfmk/i386/idt64.s standard diff --git a/osfmk/conf/files.x86_64 b/osfmk/conf/files.x86_64 index 50345face..168f91883 100644 --- a/osfmk/conf/files.x86_64 +++ b/osfmk/conf/files.x86_64 @@ -41,13 +41,16 @@ osfmk/x86_64/start.s standard osfmk/x86_64/bcopy.s standard osfmk/x86_64/bzero.s standard +osfmk/x86_64/WKdmDecompress_new.s standard +osfmk/x86_64/WKdmCompress_new.s standard +osfmk/x86_64/WKdmData_new.s standard osfmk/i386/cpu.c standard osfmk/i386/cpuid.c standard osfmk/i386/cpu_threads.c standard osfmk/i386/cpu_topology.c standard -osfmk/i386/etimer.c standard +osfmk/i386/i386_timer.c standard osfmk/i386/fpu.c standard -osfmk/i386/i386_lock.s standard +osfmk/i386/i386_lock.s standard osfmk/i386/i386_init.c standard osfmk/i386/i386_vm_init.c standard osfmk/i386/io_map.c standard @@ -79,7 +82,6 @@ osfmk/i386/tsc.c standard osfmk/i386/commpage/commpage.c standard osfmk/i386/commpage/commpage_asm.s standard -osfmk/i386/commpage/pthreads.s standard osfmk/i386/commpage/fifo_queues.s standard osfmk/i386/AT386/conf.c standard @@ -131,7 +133,7 @@ osfmk/i386/vmx/vmx_shims.c optional config_vmx # Kernel performance monitoring osfmk/kperf/x86_64/kperf_mp.c optional kperf +osfmk/x86_64/kpc_x86.c optional kpc osfmk/i386/startup64.c standard osfmk/x86_64/idt64.s standard - diff --git a/osfmk/console/video_console.c b/osfmk/console/video_console.c index 32446eb35..3f96d8c42 100644 --- a/osfmk/console/video_console.c +++ b/osfmk/console/video_console.c @@ -109,9 +109,7 @@ #include #include "iso_font.c" -#if !CONFIG_EMBEDDED #include "progress_meter_data.c" -#endif #include "sys/msgbuf.h" @@ -186,7 +184,7 @@ MACRO_END #define VCPUTC_LOCK_LOCK() \ MACRO_BEGIN \ - if (!hw_lock_to(&vcputc_lock, hwLockTimeOut*10))\ + if (!hw_lock_to(&vcputc_lock, ~0U))\ { \ panic("VCPUTC_LOCK_LOCK"); \ } \ @@ -2740,9 +2738,7 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op) if ( gc_graphics_boot == FALSE ) break; vc_progress_set( FALSE, 0 ); -#if !CONFIG_EMBEDDED vc_enable_progressmeter( FALSE ); -#endif gc_enable( TRUE ); break; @@ -2753,9 +2749,7 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op) if ( gc_graphics_boot == FALSE ) break; vc_progress_set( FALSE, 0 ); -#if !CONFIG_EMBEDDED vc_enable_progressmeter( FALSE ); -#endif vc_clut8 = NULL; #ifdef GRATEFULDEBUGGER @@ -2835,7 +2829,6 @@ vcattach(void) } } -#if !CONFIG_EMBEDDED int vc_progress_meter_enable; int vc_progress_meter_value; @@ -2975,5 +2968,4 @@ vc_set_progressmeter(int new_value) splx(s); } -#endif /* !CONFIG_EMBEDDED */ diff --git a/osfmk/console/video_console.h b/osfmk/console/video_console.h index 368e94da2..158c140e6 100644 --- a/osfmk/console/video_console.h +++ b/osfmk/console/video_console.h @@ -103,14 +103,12 @@ int vc_display_lzss_icon(uint32_t dst_x, uint32_t dst_y, uint32_t compressed_size, const uint8_t *clut); -#if !CONFIG_EMBEDDED extern void vc_enable_progressmeter(int new_value); extern void vc_set_progressmeter(int new_value); extern int vc_progress_meter_enable; extern int vc_progress_meter_value; -#endif /* !CONFIG_EMBEDDED */ #ifdef __cplusplus } diff --git a/osfmk/default_pager/Makefile b/osfmk/default_pager/Makefile index b0c68f345..45195d298 100644 --- a/osfmk/default_pager/Makefile +++ b/osfmk/default_pager/Makefile @@ -3,7 +3,6 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - include $(MakeInc_cmd) include $(MakeInc_def) @@ -39,12 +38,8 @@ EXPORT_MI_GEN_LIST = \ EXPORT_MI_DIR = default_pager -.ORDER: ${MIG_HDRS} ${MIGINCLUDES} - ${MIGINCLUDES} : ${MIG_TYPES} -.ORDER: ${MIG_UUHDRS} - ${MIG_UUHDRS} : \ %.h : %.defs @echo MIG $@ @@ -54,8 +49,6 @@ ${MIG_UUHDRS} : \ -header $@ \ $< -.ORDER: ${MIG_USHDRS} - ${MIG_USHDRS} : \ %_server.h : %.defs @echo MIG $@ @@ -107,10 +100,10 @@ MIG_KSSRC = \ # COMP_FILES = ${MIG_KUSRC} ${MIG_KSSRC} +do_build_all:: $(COMP_FILES) + ${COMP_FILES} : ${MIG_TYPES} - -.ORDER: ${MIG_KUSRC} - + ${MIG_KUSRC} : \ %_user.c : %.defs @echo MIG $@ @@ -121,8 +114,6 @@ ${MIG_KUSRC} : \ -sheader /dev/null \ $< -.ORDER: ${MIG_KSSRC} - ${MIG_KSSRC}: \ %_server.c : %.defs @echo MIG $@ @@ -132,7 +123,6 @@ ${MIG_KSSRC}: \ -server $*_server.c \ -sheader $*_server.h \ $< - include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/osfmk/default_pager/Makefile.template b/osfmk/default_pager/Makefile.template deleted file mode 100644 index ec89e2f12..000000000 --- a/osfmk/default_pager/Makefile.template +++ /dev/null @@ -1,43 +0,0 @@ -export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd -export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def -export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule -export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - - -include $(MakeInc_cmd) -include $(MakeInc_def) - -MIGKSFLAGS = -DKERNEL_SERVER -MIGKUFLAGS = -DKERNEL_USER -maxonstack 1024 - -DEFAULT_PAGER_FILES = default_pager_object_server.h default_pager_object_server.c - -DEFAULT_PAGER_ALERTS = default_pager_alerts.h default_pager_alerts.c - - -OTHERS = ${DEFAULT_PAGER_FILES} ${DEFAULT_PAGER_ALERTS} - -.ORDER: ${DEFAULT_PAGER_FILES} ${DEFAULT_PAGER_ALERTS} - -${DEFAULT_PAGER_FILES}: default_pager_object.defs - ${_MIG_} ${_MIGFLAGS_} ${MIGKSFLAGS} \ - -header /dev/null \ - -user /dev/null \ - -sheader default_pager_object_server.h \ - -server default_pager_object_server.c \ - $< - -${DEFAULT_PAGER_ALERTS}: default_pager_alerts.defs - ${_MIG_} ${_MIGFLAGS_} ${MIGKUFLAGS} \ - -header default_pager_alerts.h \ - -user default_pager_alerts.c \ - -sheader /dev/null \ - -server /dev/null \ - $< - - - -include $(MakeInc_rule) -include $(MakeInc_dir) - - diff --git a/osfmk/default_pager/default_pager.c b/osfmk/default_pager/default_pager.c index 21d3667eb..4b5062cc2 100644 --- a/osfmk/default_pager/default_pager.c +++ b/osfmk/default_pager/default_pager.c @@ -70,6 +70,7 @@ #include #include #include +#include char my_name[] = "(default pager): "; @@ -90,8 +91,6 @@ unsigned long long vm_page_mask; int vm_page_shift; #endif -int norma_mk; - boolean_t verbose; /* task_t default_pager_self; */ /* Our task port. */ @@ -286,12 +285,6 @@ start_def_pager( __unused char *bs_device ) -#if NORMA_VM - norma_mk = 1; -#else - norma_mk = 0; -#endif - /* setup read buffers, etc */ default_pager_initialize(); @@ -300,13 +293,15 @@ start_def_pager( __unused char *bs_device ) default_pager(); #endif - /* start the backing store monitor, it runs on a callout thread */ - default_pager_backing_store_monitor_callout = - thread_call_allocate(default_pager_backing_store_monitor, NULL); - if (!default_pager_backing_store_monitor_callout) - panic("can't start backing store monitor thread"); - thread_call_enter(default_pager_backing_store_monitor_callout); - + if (DEFAULT_PAGER_IS_ACTIVE) { + /* start the backing store monitor, it runs on a callout thread */ + default_pager_backing_store_monitor_callout = + thread_call_allocate(default_pager_backing_store_monitor, NULL); + if (!default_pager_backing_store_monitor_callout) + panic("can't start backing store monitor thread"); + thread_call_enter(default_pager_backing_store_monitor_callout); + } + return (0); } diff --git a/osfmk/default_pager/default_pager_internal.h b/osfmk/default_pager/default_pager_internal.h index 738ea34e6..0aa9a4604 100644 --- a/osfmk/default_pager/default_pager_internal.h +++ b/osfmk/default_pager/default_pager_internal.h @@ -110,7 +110,7 @@ /* * Debug. */ -__private_extern__ char my_name[]; +extern char my_name[]; #define DEFAULT_PAGER_DEBUG 0 @@ -656,7 +656,7 @@ struct vstruct_list_head { int vsl_count; /* saves code */ }; -__private_extern__ struct vstruct_list_head vstruct_list; +extern struct vstruct_list_head vstruct_list; __private_extern__ void vstruct_list_insert(vstruct_t vs); __private_extern__ void vstruct_list_delete(vstruct_t vs); @@ -673,7 +673,7 @@ extern lck_attr_t default_pager_lck_attr; #define VSL_SLEEP(e,i) lck_mtx_sleep(&vstruct_list.vsl_lock, LCK_SLEEP_DEFAULT, (e), (i)) #ifdef MACH_KERNEL -__private_extern__ zone_t vstruct_zone; +extern zone_t vstruct_zone; #endif /* @@ -746,7 +746,7 @@ extern boolean_t bs_add_device(char *, MACH_PORT_FACE); extern vstruct_t ps_vstruct_create(dp_size_t); extern void ps_vstruct_dealloc(vstruct_t); -extern void ps_vstruct_reclaim(vstruct_t, +extern kern_return_t ps_vstruct_reclaim(vstruct_t, boolean_t, boolean_t); extern kern_return_t pvs_cluster_read(vstruct_t, diff --git a/osfmk/default_pager/dp_backing_store.c b/osfmk/default_pager/dp_backing_store.c index 2b97a2122..b861ccf82 100644 --- a/osfmk/default_pager/dp_backing_store.c +++ b/osfmk/default_pager/dp_backing_store.c @@ -600,7 +600,7 @@ default_pager_backing_store_create( if(alias_struct != NULL) { alias_struct->vs = (struct vstruct *)bs; alias_struct->name = &default_pager_ops; - port->alias = (uintptr_t) alias_struct; + port->ip_alias = (uintptr_t) alias_struct; } else { ipc_port_dealloc_kernel((MACH_PORT_FACE)(port)); @@ -987,8 +987,8 @@ restart: /* * Disable lookups of this backing store. */ - if((void *)bs->bs_port->alias != NULL) - kfree((void *) bs->bs_port->alias, + if((void *)bs->bs_port->ip_alias != NULL) + kfree((void *) bs->bs_port->ip_alias, sizeof (struct vstruct_alias)); ipc_port_dealloc_kernel((ipc_port_t) (bs->bs_port)); bs->bs_port = MACH_PORT_NULL; @@ -1236,7 +1236,7 @@ vs_alloc_async(void) if(alias_struct != NULL) { alias_struct->vs = (struct vstruct *)vsa; alias_struct->name = &default_pager_ops; - reply_port->alias = (uintptr_t) alias_struct; + reply_port->ip_alias = (uintptr_t) alias_struct; vsa->reply_port = reply_port; vs_alloc_async_count++; } @@ -1288,7 +1288,7 @@ vs_alloc_async(void) if(alias_struct != NULL) { alias_struct->vs = reply_port; alias_struct->name = &default_pager_ops; - reply_port->alias = (int) vsa; + reply_port->defpager_importance.alias = (int) vsa; vsa->reply_port = reply_port; vs_alloc_async_count++; } @@ -1312,7 +1312,7 @@ vs_free_async( kern_return_t kr; reply_port = vsa->reply_port; - kfree(reply_port->alias, sizeof (struct vstuct_alias)); + kfree(reply_port->ip_alias, sizeof (struct vstuct_alias)); kfree(vsa, sizeof (struct vs_async)); ipc_port_dealloc_kernel((MACH_PORT_FACE) (reply_port)); #if 0 @@ -1889,7 +1889,7 @@ ps_vstruct_dealloc( zfree(vstruct_zone, vs); } -void +kern_return_t ps_vstruct_reclaim( vstruct_t vs, boolean_t return_to_vm, @@ -1901,7 +1901,7 @@ ps_vstruct_reclaim( struct vm_object_fault_info fault_info; int clmap_off; unsigned int vsmap_size; - kern_return_t kr; + kern_return_t kr = KERN_SUCCESS; VS_MAP_LOCK(vs); @@ -1950,10 +1950,14 @@ ps_vstruct_reclaim( clmap_off, (dp_size_t) -1, /* read whole cluster */ &fault_info); + VS_MAP_LOCK(vs); /* XXX what if it changed ? */ if (kr != KERN_SUCCESS) { vsmap_all_clear = FALSE; vsimap_all_clear = FALSE; + + kr = KERN_MEMORY_ERROR; + goto out; } } } @@ -1990,9 +1994,13 @@ ps_vstruct_reclaim( clmap_off, (dp_size_t) -1, /* read whole cluster */ &fault_info); + VS_MAP_LOCK(vs); /* XXX what if it changed ? */ if (kr != KERN_SUCCESS) { vsmap_all_clear = FALSE; + + kr = KERN_MEMORY_ERROR; + goto out; } else { // VSM_CLR(vsmap[j]); } @@ -2005,6 +2013,8 @@ ps_vstruct_reclaim( } out: VS_MAP_UNLOCK(vs); + + return kr; } int ps_map_extend(vstruct_t, unsigned int); /* forward */ @@ -2488,7 +2498,7 @@ device_write_reply( struct vs_async *vsa; vsa = (struct vs_async *) - ((struct vstruct_alias *)(reply_port->alias))->vs; + ((struct vstruct_alias *)(reply_port->ip_alias))->vs; if (device_code == KERN_SUCCESS && bytes_written != vsa->vsa_size) { device_code = KERN_FAILURE; @@ -2538,7 +2548,7 @@ device_read_reply( { struct vs_async *vsa; vsa = (struct vs_async *) - ((struct vstruct_alias *)(reply_port->alias))->vs; + ((struct vstruct_alias *)(reply_port->defpager_importance.alias))->vs; vsa->vsa_addr = (vm_offset_t)data; vsa->vsa_size = (vm_size_t)dataCnt; vsa->vsa_error = return_code; @@ -2855,6 +2865,9 @@ pvs_object_data_provided( __unused upl_offset_t offset, upl_size_t size) { +#if RECLAIM_SWAP + boolean_t empty; +#endif DP_DEBUG(DEBUG_VS_INTERNAL, ("buffer=0x%x,offset=0x%x,size=0x%x\n", @@ -2866,6 +2879,10 @@ pvs_object_data_provided( /* check upl iosync flag instead of using RECLAIM_SWAP*/ #if RECLAIM_SWAP if (size != upl->size) { + if (size) { + ps_clunmap(vs, offset, size); + upl_commit_range(upl, 0, size, 0, NULL, 0, &empty); + } upl_abort(upl, UPL_ABORT_ERROR); upl_deallocate(upl); } else { @@ -2917,14 +2934,18 @@ pvs_cluster_read( uint32_t io_streaming; int i; boolean_t io_sync = FALSE; + boolean_t reclaim_all = FALSE; pages_in_cl = 1 << vs->vs_clshift; cl_size = pages_in_cl * vm_page_size; cl_mask = cl_size - 1; request_flags = UPL_NO_SYNC | UPL_RET_ONLY_ABSENT | UPL_SET_LITE; + + if (cnt == (dp_size_t) -1) + reclaim_all = TRUE; - if (cnt == (dp_size_t) -1) { + if (reclaim_all == TRUE) { /* * We've been called from ps_vstruct_reclaim() to move all * the object's swapped pages back to VM pages. @@ -2969,7 +2990,7 @@ again: */ return KERN_FAILURE; } - if (cnt == (dp_size_t) -1) { + if (reclaim_all == TRUE) { i--; if (i == 0) { /* no more pages in this cluster */ @@ -3006,7 +3027,7 @@ again: */ return KERN_SUCCESS; } - + if(((vm_object_fault_info_t)fault_info)->io_sync == TRUE ) { io_sync = TRUE; } else { @@ -3087,6 +3108,7 @@ again: while (size > 0 && error == KERN_SUCCESS) { unsigned int abort_size; + unsigned int lsize; int failed_size; int beg_pseg; int beg_indx; @@ -3240,8 +3262,7 @@ again: upl, (upl_offset_t) 0, ps_offset[beg_pseg] + (beg_indx * vm_page_size), xfer_size, &residual, io_flags); - - failed_size = 0; + /* * Adjust counts and send response to VM. Optimize @@ -3261,8 +3282,10 @@ again: * supplied data is deallocated from the pager's * address space. */ - pvs_object_data_provided(vs, upl, vs_offset, xfer_size); + lsize = xfer_size; + failed_size = 0; } else { + lsize = 0; failed_size = xfer_size; if (error == KERN_SUCCESS) { @@ -3286,21 +3309,21 @@ again: * of the range, if any. */ int fill; - unsigned int lsize; - fill = residual & ~vm_page_size; + fill = residual & (vm_page_size - 1); lsize = (xfer_size - residual) + fill; - pvs_object_data_provided(vs, upl, vs_offset, lsize); - - if (lsize < xfer_size) { + if (lsize < xfer_size) failed_size = xfer_size - lsize; + + if (reclaim_all == FALSE) error = KERN_FAILURE; - } } } } - if (error != KERN_SUCCESS) { + pvs_object_data_provided(vs, upl, vs_offset, lsize); + + if (failed_size) { /* * There was an error in some part of the range, tell * the VM. Note that error is explicitly checked again @@ -4416,45 +4439,17 @@ ps_write_file( static inline void ps_vnode_trim_init(struct ps_vnode_trim_data *data) { -#if CONFIG_EMBEDDED - data->vp = NULL; - data->offset = 0; - data->length = 0; -#else #pragma unused(data) -#endif } static inline void ps_vnode_trim_now(struct ps_vnode_trim_data *data) { -#if CONFIG_EMBEDDED - if ((data->vp) != NULL) { - vnode_trim(data->vp, - data->offset, - data->length); - ps_vnode_trim_init(data); - } -#else #pragma unused(data) -#endif } static inline void ps_vnode_trim_more(struct ps_vnode_trim_data *data, struct vs_map *map, unsigned int shift, dp_size_t length) { -#if CONFIG_EMBEDDED - struct vnode *vp = VSM_PS(*map)->ps_vnode; - dp_offset_t offset = ptoa_32(VSM_CLOFF(*map)) << shift; - - if ((vp != data->vp) || (offset) != (data->offset + data->length)) { - ps_vnode_trim_now(data); - data->vp = vp; - data->offset = offset; - data->length = 0; - } - data->length += (length); -#else #pragma unused(data, map, shift, length) -#endif } kern_return_t diff --git a/osfmk/default_pager/dp_memory_object.c b/osfmk/default_pager/dp_memory_object.c index 83c24fe6f..44dc731ff 100644 --- a/osfmk/default_pager/dp_memory_object.c +++ b/osfmk/default_pager/dp_memory_object.c @@ -438,6 +438,7 @@ dp_memory_object_data_reclaim( boolean_t reclaim_backing_store) { vstruct_t vs; + kern_return_t retval; vs_lookup(mem_obj, vs); for (;;) { @@ -450,13 +451,13 @@ dp_memory_object_data_reclaim( vs->vs_xfer_pending = TRUE; vs_unlock(vs); - ps_vstruct_reclaim(vs, TRUE, reclaim_backing_store); + retval = ps_vstruct_reclaim(vs, TRUE, reclaim_backing_store); vs_lock(vs); vs->vs_xfer_pending = FALSE; vs_unlock(vs); - return KERN_SUCCESS; + return retval; } kern_return_t @@ -967,7 +968,8 @@ default_pager_objects( /* * Out out-of-line port arrays are simply kalloc'ed. */ - psize = round_page(actual * sizeof (*pagers)); + psize = vm_map_round_page(actual * sizeof (*pagers), + vm_map_page_mask(ipc_kernel_map)); ppotential = (unsigned int) (psize / sizeof (*pagers)); pagers = (memory_object_t *)kalloc(psize); if (0 == pagers) @@ -979,7 +981,8 @@ default_pager_objects( * then "copied in" as if it had been sent by a * user process. */ - osize = round_page(actual * sizeof (*objects)); + osize = vm_map_round_page(actual * sizeof (*objects), + vm_map_page_mask(ipc_kernel_map)); opotential = (unsigned int) (osize / sizeof (*objects)); kr = kmem_alloc(ipc_kernel_map, &oaddr, osize); if (KERN_SUCCESS != kr) { @@ -1065,8 +1068,12 @@ default_pager_objects( pagers[--ppotential] = MEMORY_OBJECT_NULL; } - kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(oaddr), - vm_map_round_page(oaddr + osize), FALSE); + kr = vm_map_unwire(ipc_kernel_map, + vm_map_trunc_page(oaddr, + vm_map_page_mask(ipc_kernel_map)), + vm_map_round_page(oaddr + osize, + vm_map_page_mask(ipc_kernel_map)), + FALSE); assert(KERN_SUCCESS == kr); kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)oaddr, (vm_map_size_t)osize, TRUE, &pcopy); @@ -1148,7 +1155,8 @@ default_pager_object_pages( if (0 != addr) kmem_free(ipc_kernel_map, addr, size); - size = round_page(actual * sizeof (*pages)); + size = vm_map_round_page(actual * sizeof (*pages), + vm_map_page_mask(ipc_kernel_map)); kr = kmem_alloc(ipc_kernel_map, &addr, size); if (KERN_SUCCESS != kr) return KERN_RESOURCE_SHORTAGE; @@ -1163,8 +1171,12 @@ default_pager_object_pages( while (actual < potential) pages[--potential].dpp_offset = 0; - kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr), - vm_map_round_page(addr + size), FALSE); + kr = vm_map_unwire(ipc_kernel_map, + vm_map_trunc_page(addr, + vm_map_page_mask(ipc_kernel_map)), + vm_map_round_page(addr + size, + vm_map_page_mask(ipc_kernel_map)), + FALSE); assert(KERN_SUCCESS == kr); kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, (vm_map_size_t)size, TRUE, ©); diff --git a/osfmk/device/Makefile b/osfmk/device/Makefile index 817a680d8..f230f04d5 100644 --- a/osfmk/device/Makefile +++ b/osfmk/device/Makefile @@ -3,7 +3,6 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - include $(MakeInc_cmd) include $(MakeInc_def) @@ -26,8 +25,6 @@ EXPORT_MI_LIST = ${DATAFILES} ${_MIG_HDRS_} ${MIGINCLUDES} EXPORT_MI_DIR = device -.ORDER: ${MIG_HDRS} ${MIGINCLUDES} - # # Build path # @@ -40,7 +37,7 @@ DEVICE_FILES = device_server.h device_server.c COMP_FILES = ${DEVICE_FILES} -.ORDER: ${DEVICE_FILES} +do_build_all:: $(COMP_FILES) ${DEVICE_FILES}: device.defs @echo MIG $@ @@ -51,7 +48,5 @@ ${DEVICE_FILES}: device.defs -server device_server.c \ $< - - include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/osfmk/device/device.defs b/osfmk/device/device.defs index dda80916e..65f7686eb 100644 --- a/osfmk/device/device.defs +++ b/osfmk/device/device.defs @@ -68,6 +68,10 @@ subsystem #endif /* KERNEL_SERVER */ iokit 2800; +#if IOKITSIMD || KERNEL_SERVER +#define IOKIT_ALL_IPC 1 +#endif + #include #include #include @@ -233,7 +237,7 @@ routine io_connect_get_service( out service : io_object_t ); -#if KERNEL_SERVER || __ILP32__ +#if IOKIT_ALL_IPC || __ILP32__ routine io_connect_set_notification_port( connection : io_connect_t; in notification_type : uint32_t; @@ -245,7 +249,7 @@ routine io_connect_map_memory( connection : io_connect_t; in memory_type : uint32_t; in into_task : task_t; -#if KERNEL_SERVER +#if IOKIT_ALL_IPC inout address : uint32_t; inout size : uint32_t; #else @@ -270,7 +274,7 @@ routine io_connect_set_properties( out result : kern_return_t ); -#if KERNEL_SERVER || (__ILP32__ && !MAP_32B_METHODS) +#if IOKIT_ALL_IPC || (__ILP32__ && !MAP_32B_METHODS) routine io_connect_method_scalarI_scalarO( connection : io_connect_t; in selector : uint32_t; @@ -416,7 +420,7 @@ routine io_service_match_property_table( out matches : boolean_t ); -#if KERNEL_SERVER || (__ILP32__ && !MAP_32B_ASYNC_METHODS) +#if IOKIT_ALL_IPC || (__ILP32__ && !MAP_32B_ASYNC_METHODS) routine io_async_method_scalarI_scalarO( connection : io_connect_t; in wake_port : mach_port_make_send_t; @@ -456,7 +460,7 @@ skip; skip; #endif -#if KERNEL_SERVER || __ILP32__ +#if IOKIT_ALL_IPC || __ILP32__ routine io_service_add_notification( master_port : mach_port_t; in notification_type : io_name_t; @@ -489,12 +493,12 @@ routine io_connect_get_notification_semaphore( out semaphore : semaphore_t ); -#if KERNEL_SERVER || __ILP32__ +#if IOKIT_ALL_IPC || __ILP32__ routine io_connect_unmap_memory( connection : io_connect_t; in memory_type : uint32_t; in into_task : task_t; -#if KERNEL_SERVER +#if IOKIT_ALL_IPC in address : uint32_t #else in address : vm_address_t @@ -539,7 +543,7 @@ routine io_service_match_property_table_ool( out matches : boolean_t ); -#if KERNEL_SERVER || __ILP32__ +#if IOKIT_ALL_IPC || __ILP32__ routine io_service_add_notification_ool( master_port : mach_port_t; in notification_type : io_name_t; @@ -627,9 +631,9 @@ routine io_connect_async_method( ); -#if KERNEL_SERVER || __LP64__ +#if IOKIT_ALL_IPC || __LP64__ -#if KERNEL_SERVER +#if IOKIT_ALL_IPC #define FUNC_NAME(name) name ## _64 #else #define FUNC_NAME(name) name @@ -676,7 +680,7 @@ routine FUNC_NAME(io_service_add_notification_ool)( skip; skip; -#endif /* KERNEL_SERVER || __LP64__ */ +#endif /* IOKIT_ALL_IPC || __LP64__ */ routine io_registry_entry_get_registry_entry_id( registry_entry : io_object_t; diff --git a/osfmk/device/iokit_rpc.c b/osfmk/device/iokit_rpc.c index 978cb5ead..1412f4115 100644 --- a/osfmk/device/iokit_rpc.c +++ b/osfmk/device/iokit_rpc.c @@ -333,10 +333,12 @@ iokit_alloc_object_port( io_object_t obj, ipc_kobject_type_t type ) EXTERN kern_return_t iokit_destroy_object_port( ipc_port_t port ) { + + iokit_lock_port(port); ipc_kobject_set( port, IKO_NULL, IKOT_NONE); // iokit_remove_reference( obj ); - + iokit_unlock_port(port); ipc_port_dealloc_kernel( port); gIOKitPortCount--; @@ -358,7 +360,7 @@ iokit_make_send_right( task_t task, io_object_t obj, ipc_kobject_type_t type ) { ipc_port_t port; ipc_port_t sendPort; - mach_port_name_t name; + mach_port_name_t name = 0; if( obj == NULL) return MACH_PORT_NULL; @@ -374,8 +376,10 @@ iokit_make_send_right( task_t task, io_object_t obj, ipc_kobject_type_t type ) kern_return_t kr; kr = ipc_object_copyout( task->itk_space, (ipc_object_t) sendPort, MACH_MSG_TYPE_PORT_SEND, TRUE, &name); - if ( kr != KERN_SUCCESS) - name = MACH_PORT_NULL; + if ( kr != KERN_SUCCESS) { + ipc_port_release_send( sendPort ); + name = MACH_PORT_NULL; + } } else if ( sendPort == IP_NULL) name = MACH_PORT_NULL; else if ( sendPort == IP_DEAD) @@ -538,6 +542,8 @@ kern_return_t IOProtectCacheMode(vm_map_t __unused map, mach_vm_address_t __unus vm_prot_t prot; unsigned int flags; pmap_t pmap = map->pmap; + pmap_flush_context pmap_flush_context_storage; + boolean_t delayed_pmap_flush = FALSE; prot = (options & kIOMapReadOnly) ? VM_PROT_READ : (VM_PROT_READ|VM_PROT_WRITE); @@ -566,13 +572,21 @@ kern_return_t IOProtectCacheMode(vm_map_t __unused map, mach_vm_address_t __unus break; } + pmap_flush_context_init(&pmap_flush_context_storage); + delayed_pmap_flush = FALSE; + // enter each page's physical address in the target map for (off = 0; off < length; off += page_size) { ppnum_t ppnum = pmap_find_phys(pmap, va + off); - if (ppnum) - pmap_enter(pmap, va + off, ppnum, prot, VM_PROT_NONE, flags, TRUE); + if (ppnum) { + pmap_enter_options(pmap, va + off, ppnum, prot, VM_PROT_NONE, flags, TRUE, + PMAP_OPTIONS_NOFLUSH, (void *)&pmap_flush_context_storage); + delayed_pmap_flush = TRUE; + } } + if (delayed_pmap_flush == TRUE) + pmap_flush(&pmap_flush_context_storage); return (KERN_SUCCESS); } diff --git a/osfmk/device/subrs.c b/osfmk/device/subrs.c index 4956a3b40..99f8d5032 100644 --- a/osfmk/device/subrs.c +++ b/osfmk/device/subrs.c @@ -262,7 +262,6 @@ strncasecmp(const char *s1, const char *s2, size_t n) * Deprecation Warning: * strcpy() is being deprecated. Please use strlcpy() instead. */ -#if !CONFIG_EMBEDDED char * strcpy( char *to, @@ -275,7 +274,6 @@ strcpy( return ret; } -#endif /* * Abstract: @@ -286,7 +284,7 @@ strcpy( * to the "to" string. */ -// ARM implementation in ../arm/strncpy.s +// ARM implementation in ../arm/strncpy.c char * strncpy( char *s1, @@ -382,7 +380,7 @@ atoi_term( */ // ARM implementation in ../arm/strnlen.s -size_t +size_t strnlen(const char *s, size_t max) { const char *es = s + max, *p = s; while(*p && p != es) @@ -432,7 +430,6 @@ itoa( * Deprecation Warning: * strcat() is being deprecated. Please use strlcat() instead. */ -#if !CONFIG_EMBEDDED char * strcat( char *dest, @@ -446,7 +443,6 @@ strcat( ; return (old); } -#endif /* * Appends src to string dst of size siz (unlike strncat, siz is the @@ -489,7 +485,7 @@ strlcat(char *dst, const char *src, size_t siz) * Returns strlen(src); if retval >= siz, truncation occurred. */ -// ARM implementation in ../arm/strlcpy.s +// ARM implementation in ../arm/strlcpy.c size_t strlcpy(char *dst, const char *src, size_t siz) { @@ -571,3 +567,25 @@ strprefix(register const char *s1, register const char *s2) } return (1); } + +char * +strnstr(char *s, const char *find, size_t slen) +{ + char c, sc; + size_t len; + + if ((c = *find++) != '\0') { + len = strlen(find); + do { + do { + if ((sc = *s++) == '\0' || slen-- < 1) + return (NULL); + } while (sc != c); + if (len > slen) + return (NULL); + } while (strncmp(s, find, len) != 0); + s--; + } + return (s); +} + diff --git a/osfmk/gssd/Makefile b/osfmk/gssd/Makefile index bda924f4b..4134279df 100644 --- a/osfmk/gssd/Makefile +++ b/osfmk/gssd/Makefile @@ -6,14 +6,6 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = - -INSTINC_SUBDIRS_I386 = - -EXPINC_SUBDIRS = - -EXPINC_SUBDIRS_I386 = - MIG_DEFS = gssd_mach.defs DATAFILES = gssd_mach_types.h ${MIG_DEFS} @@ -46,13 +38,13 @@ MIG_KUSRC = gssd_mach.c gssd_mach.h COMP_FILES = ${MIG_KUSRC} -${COMP_FILES} : gssd_mach.defs +do_build_all:: $(COMP_FILES) ${MIG_KUSRC} : gssd_mach.defs @echo MIG $@ $(_v)${MIG} ${MIGFLAGS} ${MIGKUFLAGS} \ - -user $*.c \ - -header $*.h \ + -user gssd_mach.c \ + -header gssd_mach.h \ -server /dev/null \ -sheader /dev/null \ $< diff --git a/osfmk/i386/AT386/model_dep.c b/osfmk/i386/AT386/model_dep.c index 8d01210e9..dd3baf946 100644 --- a/osfmk/i386/AT386/model_dep.c +++ b/osfmk/i386/AT386/model_dep.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -139,13 +139,7 @@ uint32_t pbtcnt = 0; volatile int panic_double_fault_cpu = -1; -#if defined (__i386__) -#define PRINT_ARGS_FROM_STACK_FRAME 1 -#elif defined (__x86_64__) #define PRINT_ARGS_FROM_STACK_FRAME 0 -#else -#error unsupported architecture -#endif typedef struct _cframe_t { struct _cframe_t *prev; @@ -370,11 +364,6 @@ efi_set_tables_64(EFI_SYSTEM_TABLE_64 * system_table) gPEEFISystemTable = system_table; - if (!cpu_mode_is64bit()) { - kprintf("Skipping 64-bit EFI runtime services for 32-bit legacy mode\n"); - break; - } - if(system_table->RuntimeServices == 0) { kprintf("No runtime table present\n"); break; @@ -447,11 +436,7 @@ efi_set_tables_32(EFI_SYSTEM_TABLE_32 * system_table) // 32-bit virtual address is OK for 32-bit EFI and 32-bit kernel. // For a 64-bit kernel, booter provides a virtual address mod 4G runtime = (EFI_RUNTIME_SERVICES_32 *) -#ifdef __x86_64__ (system_table->RuntimeServices | VM_MIN_KERNEL_ADDRESS); -#else - system_table->RuntimeServices; -#endif DPRINTF("Runtime table addressed at %p\n", runtime); if (runtime->Hdr.Signature != EFI_RUNTIME_SERVICES_SIGNATURE) { kprintf("Bad EFI runtime table signature\n"); @@ -524,11 +509,9 @@ efi_init(void) if (((mptr->Attribute & EFI_MEMORY_RUNTIME) == EFI_MEMORY_RUNTIME) ) { vm_size = (vm_offset_t)i386_ptob((uint32_t)mptr->NumberOfPages); vm_addr = (vm_offset_t) mptr->VirtualStart; -#ifdef __x86_64__ /* For K64 on EFI32, shadow-map into high KVA */ if (vm_addr < VM_MIN_KERNEL_ADDRESS) vm_addr |= VM_MIN_KERNEL_ADDRESS; -#endif phys_addr = (vm_map_offset_t) mptr->PhysicalStart; DPRINTF(" Type: %x phys: %p EFIv: %p kv: %p size: %p\n", mptr->Type, @@ -592,11 +575,9 @@ hibernate_newruntime_map(void * map, vm_size_t map_size, uint32_t system_table_o vm_size = (vm_offset_t)i386_ptob((uint32_t)mptr->NumberOfPages); vm_addr = (vm_offset_t) mptr->VirtualStart; -#ifdef __x86_64__ /* K64 on EFI32 */ if (vm_addr < VM_MIN_KERNEL_ADDRESS) vm_addr |= VM_MIN_KERNEL_ADDRESS; -#endif phys_addr = (vm_map_offset_t) mptr->PhysicalStart; kprintf("mapping[%u] %qx @ %lx, %llu\n", mptr->Type, phys_addr, (unsigned long)vm_addr, mptr->NumberOfPages); @@ -615,10 +596,8 @@ hibernate_newruntime_map(void * map, vm_size_t map_size, uint32_t system_table_o vm_size = (vm_offset_t)i386_ptob((uint32_t)mptr->NumberOfPages); vm_addr = (vm_offset_t) mptr->VirtualStart; -#ifdef __x86_64__ if (vm_addr < VM_MIN_KERNEL_ADDRESS) vm_addr |= VM_MIN_KERNEL_ADDRESS; -#endif phys_addr = (vm_map_offset_t) mptr->PhysicalStart; kprintf("mapping[%u] %qx @ %lx, %llu\n", mptr->Type, phys_addr, (unsigned long)vm_addr, mptr->NumberOfPages); @@ -652,10 +631,8 @@ hibernate_newruntime_map(void * map, vm_size_t map_size, uint32_t system_table_o void machine_init(void) { -#if __x86_64__ /* Now with VM up, switch to dynamically allocated cpu data */ cpu_data_realloc(); -#endif /* Ensure panic buffer is initialized. */ debug_log_init(); @@ -819,11 +796,7 @@ Debugger( panic_io_port_read(); /* Obtain current frame pointer */ -#if defined (__i386__) - __asm__ volatile("movl %%ebp, %0" : "=m" (stackptr)); -#elif defined (__x86_64__) __asm__ volatile("movq %%rbp, %0" : "=m" (stackptr)); -#endif /* Print backtrace - callee is internally synchronized */ panic_i386_backtrace(stackptr, ((panic_double_fault_cpu == cn) ? 80: 48), NULL, FALSE, NULL); @@ -1053,7 +1026,7 @@ panic_i386_backtrace(void *_frame, int nframes, const char *msg, boolean_t regdu /* Spin on print backtrace lock, which serializes output * Continue anyway if a timeout occurs. */ - hw_lock_to(&pbtlock, LockTimeOutTSC*2); + hw_lock_to(&pbtlock, ~0U); pbtcpu = cn; } @@ -1064,7 +1037,6 @@ panic_i386_backtrace(void *_frame, int nframes, const char *msg, boolean_t regdu } if ((regdump == TRUE) && (regs != NULL)) { -#if defined(__x86_64__) x86_saved_state64_t *ss64p = saved_state64(regs); kdb_printf( "RAX: 0x%016llx, RBX: 0x%016llx, RCX: 0x%016llx, RDX: 0x%016llx\n" @@ -1079,17 +1051,6 @@ panic_i386_backtrace(void *_frame, int nframes, const char *msg, boolean_t regdu ss64p->isf.rflags, ss64p->isf.rip, ss64p->isf.cs, ss64p->isf.ss); PC = ss64p->isf.rip; -#else - x86_saved_state32_t *ss32p = saved_state32(regs); - kdb_printf( - "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n" - "CR2: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n" - "EFL: 0x%08x, EIP: 0x%08x, CS: 0x%08x, DS: 0x%08x\n", - ss32p->eax,ss32p->ebx,ss32p->ecx,ss32p->edx, - ss32p->cr2,ss32p->ebp,ss32p->esi,ss32p->edi, - ss32p->efl,ss32p->eip,ss32p->cs, ss32p->ds); - PC = ss32p->eip; -#endif } kdb_printf("Backtrace (CPU %d), " diff --git a/osfmk/i386/Diagnostics.c b/osfmk/i386/Diagnostics.c index 3023aeef9..34acf876f 100644 --- a/osfmk/i386/Diagnostics.c +++ b/osfmk/i386/Diagnostics.c @@ -71,7 +71,6 @@ #include #include #include - #include #include #include @@ -80,7 +79,7 @@ diagWork dgWork; uint64_t lastRuptClear = 0ULL; - +boolean_t diag_pmc_enabled = FALSE; void cpu_powerstats(void *); typedef struct { @@ -98,6 +97,7 @@ typedef struct { } core_energy_stat_t; typedef struct { + uint64_t pkes_version; uint64_t pkg_cres[2][7]; uint64_t pkg_power_unit; uint64_t pkg_energy; @@ -130,6 +130,7 @@ diagCall64(x86_saved_state_t * state) assert(is_saved_state64(state)); regs = saved_state64(state); + diagflag = ((dgWork.dgFlags & enaDiagSCs) != 0); selector = regs->rdi; @@ -154,7 +155,6 @@ diagCall64(x86_saved_state_t * state) (void) copyout((char *) &real_ncpus, data, sizeof(real_ncpus)); /* Copy out number of * processors */ - currNap = mach_absolute_time(); /* Get the time now */ durNap = currNap - lastRuptClear; /* Get the last interval * duration */ @@ -176,6 +176,7 @@ diagCall64(x86_saved_state_t * state) } rval = 1; break; + case dgPowerStat: { uint32_t c2l = 0, c2h = 0, c3l = 0, c3h = 0, c6l = 0, c6h = 0, c7l = 0, c7h = 0; @@ -187,6 +188,7 @@ diagCall64(x86_saved_state_t * state) bzero(&pkes, sizeof(pkes)); bzero(&cest, sizeof(cest)); + pkes.pkes_version = 1ULL; rdmsr_carefully(MSR_IA32_PKG_C2_RESIDENCY, &c2l, &c2h); rdmsr_carefully(MSR_IA32_PKG_C3_RESIDENCY, &c3l, &c3h); rdmsr_carefully(MSR_IA32_PKG_C6_RESIDENCY, &c6l, &c6h); @@ -283,21 +285,27 @@ diagCall64(x86_saved_state_t * state) case dgEnaPMC: { boolean_t enable = TRUE; - mp_cpus_call(CPUMASK_ALL, ASYNC, cpu_pmc_control, &enable); + uint32_t cpuinfo[4]; + /* Require architectural PMC v2 or higher, corresponding to + * Merom+, or equivalent virtualised facility. + */ + do_cpuid(0xA, &cpuinfo[0]); + if ((cpuinfo[0] & 0xFF) >= 2) { + mp_cpus_call(CPUMASK_ALL, ASYNC, cpu_pmc_control, &enable); + diag_pmc_enabled = TRUE; + } rval = 1; } break; - #if DEBUG case dgGzallocTest: { (void) ml_set_interrupts_enabled(TRUE); - if (diagflag == 0) - break; - - unsigned *ptr = (unsigned *)kalloc(1024); - kfree(ptr, 1024); - *ptr = 0x42; + if (diagflag) { + unsigned *ptr = (unsigned *)kalloc(1024); + kfree(ptr, 1024); + *ptr = 0x42; + } } break; #endif @@ -306,21 +314,18 @@ diagCall64(x86_saved_state_t * state) case dgPermCheck: { (void) ml_set_interrupts_enabled(TRUE); - if (diagflag == 0) - break; - - rval = pmap_permissions_verify(kernel_pmap, kernel_map, 0, ~0ULL); + if (diagflag) + rval = pmap_permissions_verify(kernel_pmap, kernel_map, 0, ~0ULL); } break; #endif /* PERMIT_PERMCHECK */ - default: /* Handle invalid ones */ rval = 0; /* Return an exception */ } regs->rax = rval; - return rval; /* Normal non-ast check return */ + return rval; } void cpu_powerstats(__unused void *arg) { @@ -346,13 +351,15 @@ void cpu_powerstats(__unused void *arg) { rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch); cdp->cpu_c7res = ((uint64_t)ch << 32) | cl; - - uint64_t insns = read_pmc(FIXED_PMC0); - uint64_t ucc = read_pmc(FIXED_PMC1); - uint64_t urc = read_pmc(FIXED_PMC2); - cdp->cpu_cur_insns = insns; - cdp->cpu_cur_ucc = ucc; - cdp->cpu_cur_urc = urc; + + if (diag_pmc_enabled) { + uint64_t insns = read_pmc(FIXED_PMC0); + uint64_t ucc = read_pmc(FIXED_PMC1); + uint64_t urc = read_pmc(FIXED_PMC2); + cdp->cpu_cur_insns = insns; + cdp->cpu_cur_ucc = ucc; + cdp->cpu_cur_urc = urc; + } } void cpu_pmc_control(void *enablep) { diff --git a/osfmk/i386/Makefile b/osfmk/i386/Makefile index 270006bdb..f773a15b3 100644 --- a/osfmk/i386/Makefile +++ b/osfmk/i386/Makefile @@ -3,7 +3,6 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - include $(MakeInc_cmd) include $(MakeInc_def) diff --git a/osfmk/i386/_setjmp.s b/osfmk/i386/_setjmp.s deleted file mode 100644 index faa486515..000000000 --- a/osfmk/i386/_setjmp.s +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991,1990,1989 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - */ - -/* - * C library -- _setjmp, _longjmp - * - * _longjmp(a,v) - * will generate a "return(v)" from - * the last call to - * _setjmp(a) - * by restoring registers from the stack, - * The previous signal state is NOT restored. - * - */ - -#include - -ENTRY(_setjmp) - movl 4(%esp),%ecx # fetch buffer - movl %ebx,0(%ecx) - movl %esi,4(%ecx) - movl %edi,8(%ecx) - movl %ebp,12(%ecx) # save frame pointer of caller - popl %edx - movl %esp,16(%ecx) # save stack pointer of caller - movl %edx,20(%ecx) # save pc of caller - xorl %eax,%eax - jmp *%edx - -ENTRY(_longjmp) - movl 8(%esp),%eax # return(v) - movl 4(%esp),%ecx # fetch buffer - movl 0(%ecx),%ebx - movl 4(%ecx),%esi - movl 8(%ecx),%edi - movl 12(%ecx),%ebp - movl 16(%ecx),%esp - orl %eax,%eax - jnz 0f - incl %eax -0: jmp *20(%ecx) # done, return.... diff --git a/osfmk/i386/acpi.c b/osfmk/i386/acpi.c index 69a45836a..bccc305cf 100644 --- a/osfmk/i386/acpi.c +++ b/osfmk/i386/acpi.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -53,7 +53,6 @@ #include #include -#include #include #include #include @@ -110,9 +109,6 @@ acpi_hibernate(void *refcon) if (current_cpu_datap()->cpu_hibernate) { -#if defined(__i386__) - cpu_IA32e_enable(current_cpu_datap()); -#endif mode = hibernate_write_image(); if( mode == kIOHibernatePostWriteHalt ) @@ -136,12 +132,6 @@ acpi_hibernate(void *refcon) cpu_datap(0)->cpu_hibernate = 0; } -#if defined(__i386__) - /* - * If we're in 64-bit mode, drop back into legacy mode during sleep. - */ - cpu_IA32e_disable(current_cpu_datap()); -#endif } kdebug_enable = 0; @@ -157,6 +147,7 @@ acpi_hibernate(void *refcon) #endif /* HIBERNATION */ extern void slave_pstart(void); +extern void hibernate_rebuild_vm_structs(void); extern unsigned int wake_nkdbufs; @@ -170,9 +161,9 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) unsigned int cpu; kern_return_t rc; unsigned int my_cpu; - uint64_t now; - uint64_t my_tsc; - uint64_t my_abs; + uint64_t start; + uint64_t elapsed = 0; + uint64_t elapsed_trace_start = 0; kprintf("acpi_sleep_kernel hib=%d, cpu=%d\n", current_cpu_datap()->cpu_hibernate, cpu_number()); @@ -206,12 +197,6 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) vmx_suspend(); #endif -#if defined(__i386__) - /* - * If we're in 64-bit mode, drop back into legacy mode during sleep. - */ - cpu_IA32e_disable(current_cpu_datap()); -#endif /* * Enable FPU/SIMD unit for potential hibernate acceleration */ @@ -230,18 +215,16 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) * Will not return until platform is woken up, * or if sleep failed. */ -#ifdef __x86_64__ uint64_t old_cr3 = x86_64_pre_sleep(); -#endif #if HIBERNATION acpi_sleep_cpu(acpi_hibernate, &data); #else acpi_sleep_cpu(func, refcon); #endif -#ifdef __x86_64__ + start = mach_absolute_time(); + x86_64_post_sleep(old_cr3); -#endif #endif /* CONFIG_SLEEP */ @@ -255,11 +238,6 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) #if HIBERNATION if (current_cpu_datap()->cpu_hibernate) { -#if defined(__i386__) - int i; - for (i = 0; i < PMAP_NWINDOWS; i++) - *current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP = 0; -#endif did_hibernate = TRUE; } else @@ -302,12 +280,14 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) */ pmMarkAllCPUsOff(); - ml_get_timebase(&now); /* re-enable and re-init local apic (prior to starting timers) */ if (lapic_probe()) lapic_configure(); + hibernate_rebuild_vm_structs(); + + elapsed += mach_absolute_time() - start; acpi_wake_abstime = mach_absolute_time(); /* let the realtime clock reset */ @@ -316,9 +296,13 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) kdebug_enable = save_kdebug_enable; if (kdebug_enable == 0) { - if (wake_nkdbufs) + if (wake_nkdbufs) { + start = mach_absolute_time(); start_kern_tracing(wake_nkdbufs, TRUE); + elapsed_trace_start += mach_absolute_time() - start; + } } + start = mach_absolute_time(); /* Reconfigure FP/SIMD unit */ init_fpu(); @@ -327,12 +311,9 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) IOCPURunPlatformActiveActions(); if (did_hibernate) { + elapsed += mach_absolute_time() - start; - my_tsc = (now >> 32) | (now << 32); - my_abs = tmrCvt(my_tsc, tscFCvtt2n); - - KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 2) | DBG_FUNC_START, - (uint32_t)(my_abs >> 32), (uint32_t)my_abs, 0, 0, 0); + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 2) | DBG_FUNC_START, elapsed, elapsed_trace_start, 0, 0, 0); hibernate_machine_init(); KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 2) | DBG_FUNC_END, 0, 0, 0, 0, 0); @@ -351,17 +332,7 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) /* Restart timer interrupts */ rtc_timer_start(); - - #if HIBERNATION -#ifdef __i386__ - /* The image is written out using the copy engine, which disables - * preemption. Since the copy engine writes out the page which contains - * the preemption variable when it is disabled, we need to explicitly - * enable it here */ - if (did_hibernate) - enable_preemption(); -#endif kprintf("ret from acpi_sleep_cpu hib=%d\n", did_hibernate); #endif diff --git a/osfmk/i386/acpi_wakeup.s b/osfmk/i386/acpi_wakeup.s deleted file mode 100644 index 30d40507e..000000000 --- a/osfmk/i386/acpi_wakeup.s +++ /dev/null @@ -1,315 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -#include -#include -#include -#include -#include - - .file "acpi_wakeup.s" - - .text - .align 12 /* Page align for single bcopy_phys() */ - -#define PA(addr) (addr) - -#if CONFIG_SLEEP -ENTRY(acpi_wake_prot) - - /* protected mode, paging disabled */ - - /* setup the protected mode segment registers */ - mov $0x10, %eax - movw %ax, %ds - movw %ax, %es - movw %ax, %ss - movw %ax, %fs - movw %ax, %gs - - /* jump back to the sleep function in the kernel */ - movl PA(saved_eip), %eax - jmp *%eax - -/* - * acpi_sleep_cpu(acpi_sleep_callback func, void * refcon) - * - * Save CPU state before platform sleep. Restore CPU state - * following wake up. - */ - -ENTRY(acpi_sleep_cpu) - pushl %ebp - movl %esp, %ebp - - /* save flags */ - pushfl - - /* save general purpose registers */ - pushal - movl %esp, saved_esp - - /* make sure tlb is flushed */ - movl %cr3,%eax - movl %eax,%cr3 - - /* save control registers */ - movl %cr0, %eax - movl %eax, saved_cr0 - movl %cr2, %eax - movl %eax, saved_cr2 - movl %cr3, %eax - movl %eax, saved_cr3 - movl %cr4, %eax - movl %eax, saved_cr4 - - /* save segment registers */ - movw %es, saved_es - movw %fs, saved_fs - movw %gs, saved_gs - movw %ss, saved_ss - - /* save descriptor table registers */ - sgdt saved_gdt - sldt saved_ldt - sidt saved_idt - str saved_tr - - /* - * When system wakes up, the real mode wake handler will revert to - * protected mode, then jump to the address stored at saved_eip. - */ - movl $(PA(wake_prot)), saved_eip - - /* - * Call ACPI function provided by the caller to sleep the platform. - * This call will not return on success. - */ - pushl B_ARG1 - movl B_ARG0, %edi - call *%edi - popl %edi - - /* sleep failed, no cpu context lost */ - jmp wake_restore - -wake_prot: - /* protected mode, paging disabled */ - POSTCODE(ACPI_WAKE_PROT_ENTRY) - - movl PA(saved_cr3), %ebx - movl PA(saved_cr4), %ecx - /* - * restore cr3, PAE and NXE states in an orderly fashion - */ - movl %ebx, %cr3 - movl %ecx, %cr4 - - movl $(MSR_IA32_EFER), %ecx /* MSR number in ecx */ - rdmsr /* MSR value return in edx: eax */ - orl $(MSR_IA32_EFER_NXE), %eax /* Set NXE bit in low 32-bits */ - wrmsr /* Update Extended Feature Enable reg */ - - /* restore kernel GDT */ - lgdt PA(saved_gdt) - - movl PA(saved_cr2), %eax - movl %eax, %cr2 - - /* restore CR0, paging enabled */ - movl PA(saved_cr0), %eax - movl %eax, %cr0 - - /* switch to kernel code segment */ - ljmpl $(KERNEL32_CS), $wake_paged - -wake_paged: - - /* protected mode, paging enabled */ - POSTCODE(ACPI_WAKE_PAGED_ENTRY) - - /* switch to kernel data segment */ - movw $(KERNEL_DS), %ax - movw %ax, %ds - - /* restore local and interrupt descriptor tables */ - lldt saved_ldt - lidt saved_idt - - /* restore segment registers */ - movw saved_es, %es - movw saved_fs, %fs - movw saved_gs, %gs - movw saved_ss, %ss - - /* - * Restore task register. Before doing this, clear the busy flag - * in the TSS descriptor set by the CPU. - */ - movl $saved_gdt, %eax - movl 2(%eax), %edx /* GDT base, skip limit word */ - movl $(KERNEL_TSS), %eax /* TSS segment selector */ - movb $(K_TSS), 5(%edx, %eax) /* clear busy flag */ - ltr saved_tr /* restore TR */ - -wake_restore: - - /* restore general purpose registers */ - movl saved_esp, %esp - popal - - /* restore flags */ - popfl - - leave - ret - - - .section __HIB, __text - .align 2 - - .globl EXT(acpi_wake_prot_entry) -ENTRY(acpi_wake_prot_entry) - mov %cr0, %eax - and $(~CR0_PG), %eax - mov %eax, %cr0 - mov $EXT(IdlePDPT), %eax - mov EXT(IdlePTD), %ecx - or $(INTEL_PTE_VALID), %ecx - mov $0x0, %edx - mov %ecx, (0*8+0)(%eax) - mov %edx, (0*8+4)(%eax) - add $(PAGE_SIZE), %ecx - mov %ecx, (1*8+0)(%eax) - mov %edx, (1*8+4)(%eax) - add $(PAGE_SIZE), %ecx - mov %ecx, (2*8+0)(%eax) - mov %edx, (2*8+4)(%eax) - add $(PAGE_SIZE), %ecx - mov %ecx, (3*8+0)(%eax) - mov %edx, (3*8+4)(%eax) - mov %eax, %cr3 - mov %cr0, %eax - or $(CR0_PG), %eax - mov %eax, %cr0 - - /* protected mode, paging enabled */ - - POSTCODE(ACPI_WAKE_PAGED_ENTRY) - - /* restore kernel GDT */ - lgdt saved_gdt - - POSTCODE(0x40) - - /* restore control registers */ - - movl saved_cr0, %eax - movl %eax, %cr0 - - movl saved_cr2, %eax - movl %eax, %cr2 - - POSTCODE(0x3E) - - /* restore real PDE base */ - movl saved_cr3, %eax - movl saved_cr4, %edx - movl %eax, %cr3 - movl %edx, %cr4 - movl %eax, %cr3 - - /* switch to kernel data segment */ - movw $(KERNEL_DS), %ax - movw %ax, %ds - - POSTCODE(0x3C) - /* restore local and interrupt descriptor tables */ - lldt saved_ldt - lidt saved_idt - - POSTCODE(0x3B) - /* restore segment registers */ - movw saved_es, %es - movw saved_fs, %fs - movw saved_gs, %gs - movw saved_ss, %ss - - POSTCODE(0x3A) - /* - * Restore task register. Before doing this, clear the busy flag - * in the TSS descriptor set by the CPU. - */ - movl $saved_gdt, %eax - movl 2(%eax), %edx /* GDT base, skip limit word */ - movl $(KERNEL_TSS), %eax /* TSS segment selector */ - movb $(K_TSS), 5(%edx, %eax) /* clear busy flag */ - ltr saved_tr /* restore TR */ - - /* restore general purpose registers */ - movl saved_esp, %esp - popal - - /* restore flags */ - popfl - - /* make sure interrupts are disabled */ - cli - - movl $2, %eax - - leave - - ret -#endif /* CONFIG_SLEEP */ - -.data -.section __SLEEP, __data -.align 2 - -/* - * CPU registers saved across sleep/wake. - */ - -saved_esp: .long 0 -saved_es: .word 0 -saved_fs: .word 0 -saved_gs: .word 0 -saved_ss: .word 0 -saved_cr0: .long 0 -saved_cr2: .long 0 -saved_cr3: .long 0 -saved_cr4: .long 0 -saved_gdt: .word 0 - .long 0 -saved_idt: .word 0 - .long 0 -saved_ldt: .word 0 -saved_tr: .word 0 -saved_eip: .long 0 - diff --git a/osfmk/i386/asm.h b/osfmk/i386/asm.h index a3a0d524b..b04ac6a7d 100644 --- a/osfmk/i386/asm.h +++ b/osfmk/i386/asm.h @@ -376,6 +376,7 @@ #define CCALL2(fn, arg1, arg2) \ mov arg1, %rdi ;\ + mov arg2, %rsi ;\ CCALL(fn) #define CCALL3(fn, arg1, arg2, arg3) \ diff --git a/osfmk/i386/asm64.h b/osfmk/i386/asm64.h index 0f9213e7d..b576f792d 100644 --- a/osfmk/i386/asm64.h +++ b/osfmk/i386/asm64.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2006-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -28,40 +28,6 @@ /* Helper macros for 64-bit mode switching */ -#if __i386__ -/* - * Long jump to 64-bit space from 32-bit compatibility mode. - */ -#define ENTER_64BIT_MODE() \ - .code32 ;\ - .byte 0xea /* far jump longmode */ ;\ - .long 1f ;\ - .word KERNEL64_CS ;\ - .code64 ;\ -1: - -/* - * Here in long mode but still running below 4G. - * "Near" jump into uber-space. - */ -#define ENTER_UBERSPACE() \ - mov 2f,%rax ;\ - jmp *%rax ;\ -2: .long 3f ;\ - .long KERNEL_UBER_BASE_HI32 ;\ -3: - -/* - * Long jump to 32-bit compatibility mode from 64-bit space. - */ -#define ENTER_COMPAT_MODE() \ - ljmp *(%rip) ;\ - .long 4f ;\ - .word KERNEL32_CS ;\ - .code32 ;\ -4: - -#else /* * Long jump to 64-bit space from 32-bit compatibility mode. @@ -89,4 +55,3 @@ lret ;\ 4: .code32 -#endif diff --git a/osfmk/i386/bcopy.s b/osfmk/i386/bcopy.s deleted file mode 100644 index 73642a449..000000000 --- a/osfmk/i386/bcopy.s +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991,1990 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - */ - -#include - -/* void *memcpy((void *) to, (const void *) from, (size_t) bcount) */ - -ENTRY(memcpy) - pushl %edi - pushl %esi - movl 8+ 4(%esp),%edi /* to */ - movl %edi,%eax /* returns its first argument */ - movl 8+ 8(%esp),%esi /* from */ -memcpy_common: - movl 8+ 12(%esp),%edx /* number of bytes */ - cld -/* move longs*/ - movl %edx,%ecx - shrl $2,%ecx - rep - movsl -/* move bytes*/ - movl %edx,%ecx - andl $3,%ecx - rep - movsb - popl %esi - popl %edi - ret - -/* void bcopy((const char *) from, (char *) to, (unsigned int) count) */ - -ENTRY(bcopy_no_overwrite) - pushl %edi - pushl %esi - movl 8+ 8(%esp),%edi /* to */ - movl 8+ 4(%esp),%esi /* from */ - jmp memcpy_common - -/* bcopy16(from, to, bcount) using word moves */ - -ENTRY(bcopy16) - pushl %edi - pushl %esi - movl 8+12(%esp),%edx /* 8 for the two pushes above */ - movl 8+ 8(%esp),%edi - movl 8+ 4(%esp),%esi -/* move words */ -0: cld - movl %edx,%ecx - shrl $1,%ecx - rep - movsw -/* move bytes */ - movl %edx,%ecx - andl $1,%ecx - rep - movsb - popl %esi - popl %edi - ret - - - /* - * Based on NetBSD's bcopy.S from their libc. - * bcopy(src, dst, cnt) - * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 - */ -ENTRY(bcopy) - pushl %esi - pushl %edi - movl 12(%esp),%esi - movl 16(%esp),%edi - movl 20(%esp),%ecx - - movl %edi,%edx - subl %esi,%edx - cmpl %ecx,%edx /* overlapping && src < dst? */ - movl %ecx,%edx - jb 1f - - shrl $2,%ecx /* copy by 32-bit words */ - cld /* nope, copy forwards */ - rep - movsl - movl %edx,%ecx - andl $3,%ecx /* any bytes left? */ - rep - movsb - popl %edi - popl %esi - ret - - -1: - addl %ecx,%edi /* copy backwards */ - addl %ecx,%esi - decl %edi - decl %esi - andl $3,%ecx /* any fractional bytes? */ - std - rep - movsb - movl %edx,%ecx /* copy remainder by 32-bit words */ - shrl $2,%ecx - subl $3,%esi - subl $3,%edi - rep - movsl - popl %edi - popl %esi - cld - ret diff --git a/osfmk/i386/bsd_i386.c b/osfmk/i386/bsd_i386.c index 7ae3c29f1..147951308 100644 --- a/osfmk/i386/bsd_i386.c +++ b/osfmk/i386/bsd_i386.c @@ -71,6 +71,7 @@ #ifdef MACH_BSD extern void mach_kauth_cred_uthread_update(void); +extern void throttle_lowpri_io(int); #endif void * find_user_regs(thread_t); @@ -81,8 +82,6 @@ unsigned int get_msr_nbits(void); unsigned int get_msr_rbits(void); -extern void throttle_lowpri_io(int); - /* * thread_userstack: * @@ -318,7 +317,7 @@ machdep_syscall(x86_saved_state_t *state) DEBUG_KPRINT_SYSCALL_MDEP("machdep_syscall: retval=%u\n", regs->eax); - throttle_lowpri_io(TRUE); + throttle_lowpri_io(1); thread_exception_return(); /* NOTREACHED */ @@ -363,7 +362,7 @@ machdep_syscall64(x86_saved_state_t *state) DEBUG_KPRINT_SYSCALL_MDEP("machdep_syscall: retval=%llu\n", regs->rax); - throttle_lowpri_io(TRUE); + throttle_lowpri_io(1); thread_exception_return(); /* NOTREACHED */ @@ -387,50 +386,15 @@ struct mach_call_args { }; static kern_return_t -mach_call_arg_munger32(uint32_t sp, int nargs, int call_number, struct mach_call_args *args); +mach_call_arg_munger32(uint32_t sp, struct mach_call_args *args, const mach_trap_t *trapp); static kern_return_t -mach_call_arg_munger32(uint32_t sp, int nargs, int call_number, struct mach_call_args *args) +mach_call_arg_munger32(uint32_t sp, struct mach_call_args *args, const mach_trap_t *trapp) { - unsigned int args32[9]; - - if (copyin((user_addr_t)(sp + sizeof(int)), (char *)args32, nargs * sizeof (int))) + if (copyin((user_addr_t)(sp + sizeof(int)), (char *)args, trapp->mach_trap_u32_words * sizeof (int))) return KERN_INVALID_ARGUMENT; - - switch (nargs) { - case 9: args->arg9 = args32[8]; - case 8: args->arg8 = args32[7]; - case 7: args->arg7 = args32[6]; - case 6: args->arg6 = args32[5]; - case 5: args->arg5 = args32[4]; - case 4: args->arg4 = args32[3]; - case 3: args->arg3 = args32[2]; - case 2: args->arg2 = args32[1]; - case 1: args->arg1 = args32[0]; - } - if (call_number == 10) { - /* munge the mach_vm_size_t for mach_vm_allocate() */ - args->arg3 = (((uint64_t)(args32[2])) | ((((uint64_t)(args32[3]))<<32))); - args->arg4 = args32[4]; - } else if (call_number == 12) { - /* munge the mach_vm_address_t and mach_vm_size_t for mach_vm_deallocate() */ - args->arg2 = (((uint64_t)(args32[1])) | ((((uint64_t)(args32[2]))<<32))); - args->arg3 = (((uint64_t)(args32[3])) | ((((uint64_t)(args32[4]))<<32))); - } else if (call_number == 14) { - /* munge the mach_vm_address_t and mach_vm_size_t for mach_vm_protect() */ - args->arg2 = (((uint64_t)(args32[1])) | ((((uint64_t)(args32[2]))<<32))); - args->arg3 = (((uint64_t)(args32[3])) | ((((uint64_t)(args32[4]))<<32))); - args->arg4 = args32[5]; - args->arg5 = args32[6]; - } else if (call_number == 90) { - /* munge_l for mach_wait_until_trap() */ - args->arg1 = (((uint64_t)(args32[0])) | ((((uint64_t)(args32[1]))<<32))); - } else if (call_number == 93) { - /* munge_wl for mk_timer_arm_trap() */ - args->arg2 = (((uint64_t)(args32[1])) | ((((uint64_t)(args32[2]))<<32))); - } - + trapp->mach_trap_arg_munge32(NULL, args); return KERN_SUCCESS; } @@ -476,7 +440,7 @@ mach_call_munger(x86_saved_state_t *state) argc = mach_trap_table[call_number].mach_trap_arg_count; if (argc) { - retval = mach_call_arg_munger32(regs->uesp, argc, call_number, &args); + retval = mach_call_arg_munger32(regs->uesp, &args, &mach_trap_table[call_number]); if (retval != KERN_SUCCESS) { regs->eax = retval; @@ -506,7 +470,7 @@ mach_call_munger(x86_saved_state_t *state) regs->eax = retval; - throttle_lowpri_io(TRUE); + throttle_lowpri_io(1); thread_exception_return(); /* NOTREACHED */ @@ -573,7 +537,7 @@ mach_call_munger64(x86_saved_state_t *state) MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END, regs->rax, 0, 0, 0, 0); - throttle_lowpri_io(TRUE); + throttle_lowpri_io(1); thread_exception_return(); /* NOTREACHED */ diff --git a/osfmk/i386/bsd_i386_native.c b/osfmk/i386/bsd_i386_native.c index 13a7cb0aa..863344aa9 100644 --- a/osfmk/i386/bsd_i386_native.c +++ b/osfmk/i386/bsd_i386_native.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 Apple Inc. All rights reserved. + * Copyright (c) 2010-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -86,12 +86,9 @@ machine_thread_dup( /* * Copy over the x86_saved_state registers */ - if (cpu_mode_is64bit()) { - if (thread_is_64bit(parent)) - bcopy(USER_REGS64(parent), USER_REGS64(child), sizeof(x86_saved_state64_t)); - else - bcopy(USER_REGS32(parent), USER_REGS32(child), sizeof(x86_saved_state_compat32_t)); - } else + if (thread_is_64bit(parent)) + bcopy(USER_REGS64(parent), USER_REGS64(child), sizeof(x86_saved_state64_t)); + else bcopy(USER_REGS32(parent), USER_REGS32(child), sizeof(x86_saved_state32_t)); /* @@ -206,11 +203,9 @@ thread_fast_set_cthread_self64(uint64_t self) pcb->cthread_self = self; mp_disable_preemption(); cdp = current_cpu_datap(); -#if defined(__x86_64__) if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) || (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) wrmsr64(MSR_IA32_KERNEL_GS_BASE, self); -#endif cdp->cpu_uber.cu_user_gs_base = self; mp_enable_preemption(); return (USER_CTHREAD); /* N.B.: not a kern_return_t! */ diff --git a/osfmk/i386/commpage/commpage.c b/osfmk/i386/commpage/commpage.c index 30ea3e10f..35ee69cdf 100644 --- a/osfmk/i386/commpage/commpage.c +++ b/osfmk/i386/commpage/commpage.c @@ -134,7 +134,7 @@ commpage_allocate( * * JMM - What we really need is a way to create it like this in the first place. */ - if (!(kr = vm_map_lookup_entry( kernel_map, vm_map_trunc_page(kernel_addr), &entry) || entry->is_sub_map)) + if (!(kr = vm_map_lookup_entry( kernel_map, vm_map_trunc_page(kernel_addr, VM_MAP_PAGE_MASK(kernel_map)), &entry) || entry->is_sub_map)) panic("cannot find commpage entry %d", kr); entry->object.vm_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; diff --git a/osfmk/i386/commpage/commpage.h b/osfmk/i386/commpage/commpage.h index 030d294af..a39d47b46 100644 --- a/osfmk/i386/commpage/commpage.h +++ b/osfmk/i386/commpage/commpage.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2009 Apple Inc. All rights reserved. + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -59,22 +59,10 @@ #define COMMPAGE_DESCRIPTOR_NAME(label) _commpage_ ## label -#if defined (__i386__) - -#define COMMPAGE_DESCRIPTOR_FIELD_POINTER .long -#define COMMPAGE_DESCRIPTOR_REFERENCE(label) \ - .long COMMPAGE_DESCRIPTOR_NAME(label) - -#elif defined (__x86_64__) - #define COMMPAGE_DESCRIPTOR_FIELD_POINTER .quad #define COMMPAGE_DESCRIPTOR_REFERENCE(label) \ .quad COMMPAGE_DESCRIPTOR_NAME(label) -#else -#error unsupported architecture -#endif - #define COMMPAGE_FUNCTION_START(label,codetype,alignment) \ .text ;\ .code ## codetype ;\ diff --git a/osfmk/i386/commpage/commpage_asm.s b/osfmk/i386/commpage/commpage_asm.s index af6227f72..89278b279 100644 --- a/osfmk/i386/commpage/commpage_asm.s +++ b/osfmk/i386/commpage/commpage_asm.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2009 Apple Inc. All rights reserved. + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -38,7 +38,6 @@ .globl _commpage_sched_gen_inc _commpage_sched_gen_inc: -#if defined (__x86_64__) FRAME /* Increment 32-bit commpage field if present */ @@ -59,30 +58,6 @@ _commpage_sched_gen_inc: 1: EMARF ret -#elif defined (__i386__) - FRAME - - /* Increment 32-bit commpage field if present */ - mov _commPagePtr32,%edx - testl %edx,%edx - je 1f - sub $(ASM_COMM_PAGE32_BASE_ADDRESS),%edx - lock - incl ASM_COMM_PAGE_SCHED_GEN(%edx) - - /* Increment 64-bit commpage field if present */ - mov _commPagePtr64,%edx - testl %edx,%edx - je 1f - sub $(ASM_COMM_PAGE32_START_ADDRESS),%edx - lock - incl ASM_COMM_PAGE_SCHED_GEN(%edx) -1: - EMARF - ret -#else -#error unsupported architecture -#endif /* pointers to the 32-bit commpage routine descriptors */ /* WARNING: these must be sorted by commpage address! */ @@ -94,14 +69,7 @@ _commpage_32_routines: COMMPAGE_DESCRIPTOR_REFERENCE(backoff) COMMPAGE_DESCRIPTOR_REFERENCE(pfz_enqueue) COMMPAGE_DESCRIPTOR_REFERENCE(pfz_dequeue) - COMMPAGE_DESCRIPTOR_REFERENCE(pfz_mutex_lock) -#if defined (__i386__) - .long 0 -#elif defined (__x86_64__) .quad 0 -#else -#error unsupported architecture -#endif /* pointers to the 64-bit commpage routine descriptors */ @@ -114,12 +82,5 @@ _commpage_64_routines: COMMPAGE_DESCRIPTOR_REFERENCE(backoff_64) COMMPAGE_DESCRIPTOR_REFERENCE(pfz_enqueue_64) COMMPAGE_DESCRIPTOR_REFERENCE(pfz_dequeue_64) - COMMPAGE_DESCRIPTOR_REFERENCE(pfz_mutex_lock_64) -#if defined (__i386__) - .long 0 -#elif defined (__x86_64__) .quad 0 -#else -#error unsupported architecture -#endif diff --git a/osfmk/i386/commpage/pthreads.s b/osfmk/i386/commpage/pthreads.s deleted file mode 100644 index c62094de1..000000000 --- a/osfmk/i386/commpage/pthreads.s +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Copyright (c) 2003-2009 Apple, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -#include -#include -#include -#include - -/* Temporary definitions. Replace by #including the correct file when available. */ - -#define PTHRW_EBIT 0x01 -#define PTHRW_LBIT 0x02 -#define PTHRW_YBIT 0x04 -#define PTHRW_WBIT 0x08 -#define PTHRW_UBIT 0x10 -#define PTHRW_RETRYBIT 0x20 -#define PTHRW_TRYLKBIT 0x40 - -#define PTHRW_INC 0x100 -#define PTHRW_BIT_MASK 0x000000ff; - -#define PTHRW_COUNT_SHIFT 8 -#define PTHRW_COUNT_MASK 0xffffff00 -#define PTHRW_MAX_READERS 0xffffff00 - -#define KSYN_MLWAIT 301 /* mutex lock wait syscall */ - -#define PTHRW_STATUS_ACQUIRED 0 -#define PTHRW_STATUS_SYSCALL 1 -#define PTHRW_STATUS_ERROR 2 - -#define PTHRW_LVAL 0 -#define PTHRW_UVAL 4 - - - -/* PREEMPTION FREE ZONE (PFZ) - * - * A portion of the commpage is speacial-cased by the kernel to be "preemption free", - * ie as if we had disabled interrupts in user mode. This facilitates writing - * "nearly-lockless" code, for example code that must be serialized by a spinlock but - * which we do not want to preempt while the spinlock is held. - * - * The PFZ is implemented by collecting all the "preemption-free" code into a single - * contiguous region of the commpage. Register %ebx is used as a flag register; - * before entering the PFZ, %ebx is cleared. If some event occurs that would normally - * result in a premption while in the PFZ, the kernel sets %ebx nonzero instead of - * preempting. Then, when the routine leaves the PFZ we check %ebx and - * if nonzero execute a special "pfz_exit" syscall to take the delayed preemption. - * - * PFZ code must bound the amount of time spent in the PFZ, in order to control - * latency. Backward branches are dangerous and must not be used in a way that - * could inadvertently create a long-running loop. - * - * Because we need to avoid being preempted between changing the mutex stateword - * and entering the kernel to relinquish, some low-level pthread mutex manipulations - * are located in the PFZ. - */ - -/* Work around 10062261 with a dummy non-local symbol */ -pthreads_dummy_symbol: - -/* Internal routine to handle pthread mutex lock operation. This is in the PFZ. - * %edi == ptr to LVAL/UVAL pair - * %esi == ptr to argument list on stack - * %ebx == preempion pending flag (kernel sets nonzero if we should preempt) - */ -COMMPAGE_FUNCTION_START(pfz_mutex_lock, 32, 4) - pushl %ebp // set up frame for backtrace - movl %esp,%ebp -1: - movl 16(%esi),%ecx // get mask (ie, PTHRW_EBIT etc) -2: - movl PTHRW_LVAL(%edi),%eax // get mutex LVAL - testl %eax,%ecx // is mutex available? - jnz 5f // no - - /* lock is available (if we act fast) */ - lea PTHRW_INC(%eax),%edx // copy original lval and bump sequence count - orl $PTHRW_EBIT, %edx // set EBIT - lock - cmpxchgl %edx,PTHRW_LVAL(%edi) // try to acquire lock for real - jz 4f // got it -3: - testl %ebx,%ebx // kernel trying to preempt us? - jz 2b // no, so loop and try again - COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_PFZ_MUTEX_LOCK,pfz_mutex_lock) - jmp 1b // loop to try again - - /* we acquired the mutex */ -4: - movl 20(%esi),%eax // get ptr to TID field of mutex - movl 8(%esi),%ecx // get 64-bit mtid - movl 12(%esi),%edx - movl %ecx,0(%eax) // store my TID in mutex structure - movl %edx,4(%eax) - movl $PTHRW_STATUS_ACQUIRED,%eax - popl %ebp - ret - - /* cannot acquire mutex, so update seq count, set "W", and block in kernel */ - /* this is where we cannot tolerate preemption or being killed */ -5: - lea PTHRW_INC(%eax),%edx // copy original lval and bump sequence count - orl $PTHRW_WBIT, %edx // set WBIT - lock - cmpxchgl %edx,PTHRW_LVAL(%edi) // try to update lock status atomically - jnz 3b // failed - movl 20(%esi),%eax // get ptr to TID field of mutex - pushl 4(%esi) // arg 5: flags from arg list - pushl 4(%eax) // arg 4: tid field from mutex - pushl 0(%eax) - pushl PTHRW_UVAL(%edi) // arg 3: uval field from mutex - pushl %edx // arg 2: new value of mutex lval field - pushl %edi // arg 1: ptr to LVAL/UVAL pair in mutex - call 6f // make ksyn_mlwait call - jc 6f // immediately reissue syscall if error - movl 24(%esi),%edx // get ptr to syscall_return arg - movl %eax,(%edx) // save syscall return value - movl $PTHRW_STATUS_SYSCALL,%eax // we had to make syscall - addl $28,%esp // pop off syscall args and return address - popl %ebp // pop off frame ptr - ret - - /* subroutine to make a ksyn_mlwait syscall */ -6: - movl (%esp),%edx // get return address but leave on stack - movl %esp,%ecx // save stack ptr here - movl $KSYN_MLWAIT,%eax // get syscall code - orl $0x00180000,%eax // copy 24 bytes of arguments in trampoline - xorl %ebx,%ebx // clear preemption flag - sysenter -COMMPAGE_DESCRIPTOR(pfz_mutex_lock,_COMM_PAGE_PFZ_MUTEX_LOCK) - - - -/************************* x86_64 versions follow **************************/ - - - -/* Internal routine to handle pthread mutex lock operation. This is in the PFZ. - * %rdi = lvalp - * %esi = flags - * %rdx = mtid - * %ecx = mask - * %r8 = tidp - * %r9 = &syscall_return - * %ebx = preempion pending flag (kernel sets nonzero if we should preempt) - */ -COMMPAGE_FUNCTION_START(pfz_mutex_lock_64, 64, 4) - pushq %rbp // set up frame for backtrace - movq %rsp,%rbp -1: - movl PTHRW_LVAL(%rdi),%eax // get old lval from mutex -2: - testl %eax,%ecx // can we acquire the lock? - jnz 5f // no - - /* lock is available (if we act fast) */ - lea PTHRW_INC(%rax),%r11 // copy original lval and bump sequence count - orl $PTHRW_EBIT, %r11d // set EBIT - lock - cmpxchgl %r11d,PTHRW_LVAL(%rdi) // try to acquire lock - jz 4f // got it -3: - testl %ebx,%ebx // kernel trying to preempt us? - jz 2b // no, so loop and try again - COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_PFZ_MUTEX_LOCK,pfz_mutex_lock_64) - jmp 1b // loop to try again - - /* we acquired the mutex */ -4: - movq %rdx,(%r8) // store mtid in mutex structure - movl $PTHRW_STATUS_ACQUIRED,%eax - popq %rbp - ret - - /* cannot acquire mutex, so update seq count and block in kernel */ - /* this is where we cannot tolerate preemption or being killed */ -5: - lea PTHRW_INC(%rax),%r11 // copy original lval and bump sequence count - orl $PTHRW_WBIT, %r11d // set WBIT - lock - cmpxchgl %r11d,PTHRW_LVAL(%rdi) // try to update lock status atomically - jnz 3b // failed - movq (%r8),%r10 // arg 4: tid field from mutex [NB: passed in R10] - movl %esi,%r8d // arg 5: flags from arg list - movl PTHRW_UVAL(%rdi),%edx // arg 3: uval field from mutex - movl %r11d,%esi // arg 2: new value of mutex lval field - // arg 1: LVAL/UVAL ptr already in %rdi -6: - movl $(SYSCALL_CONSTRUCT_UNIX(KSYN_MLWAIT)),%eax - pushq %rdx // some syscalls destroy %rdx so save it - xorl %ebx,%ebx // clear preemption flag - syscall - popq %rdx // restore in case we need to re-execute syscall - jc 6b // immediately re-execute syscall if error - movl %eax,(%r9) // store kernel return value - movl $PTHRW_STATUS_SYSCALL,%eax // we made syscall - popq %rbp - ret -COMMPAGE_DESCRIPTOR(pfz_mutex_lock_64,_COMM_PAGE_PFZ_MUTEX_LOCK) - diff --git a/osfmk/i386/copyio.c b/osfmk/i386/copyio.c deleted file mode 100644 index 82516b196..000000000 --- a/osfmk/i386/copyio.c +++ /dev/null @@ -1,621 +0,0 @@ -/* - * Copyright (c) 2009 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -/* - * the copy engine has the following characteristics - * - copyio handles copies to/from user or kernel space - * - copypv deals with physical or virtual addresses - * - * implementation details as follows - * - a cache of up to NCOPY_WINDOWS is maintained per thread for - * access of user virutal space - * - the window size is determined by the amount of virtual space - * that can be mapped by a single page table - * - the mapping is done by copying the page table pointer from - * the user's directory entry corresponding to the window's - * address in user space to the directory entry corresponding - * to the window slot in the kernel's address space - * - the set of mappings is preserved across context switches, - * so the copy can run with pre-emption enabled - * - there is a gdt entry set up to anchor the kernel window on - * each processor - * - the copies are done using the selector corresponding to the - * gdt entry - * - the addresses corresponding to the user virtual address are - * relative to the beginning of the window being used to map - * that region... thus the thread can be pre-empted and switched - * to a different processor while in the midst of a copy - * - the window caches must be invalidated if the pmap changes out - * from under the thread... this can happen during vfork/exec... - * inval_copy_windows is the invalidation routine to be used - * - the copyio engine has 4 different states associated with it - * that allows for lazy tlb flushes and the ability to avoid - * a flush all together if we've just come from user space - * the 4 states are as follows... - * - * WINDOWS_OPENED - set by copyio to indicate to the context - * switch code that it is necessary to do a tlbflush after - * switching the windows since we're in the middle of a copy - * - * WINDOWS_CLOSED - set by copyio to indicate that it's done - * using the windows, so that the context switch code need - * not do the tlbflush... instead it will set the state to... - * - * WINDOWS_DIRTY - set by the context switch code to indicate - * to the copy engine that it is responsible for doing a - * tlbflush before using the windows again... it's also - * set by the inval_copy_windows routine to indicate the - * same responsibility. - * - * WINDOWS_CLEAN - set by the return to user path to indicate - * that a tlbflush has happened and that there is no need - * for copyio to do another when it is entered next... - * - * - a window for mapping single physical pages is provided for copypv - * - this window is maintained across context switches and has the - * same characteristics as the user space windows w/r to pre-emption - */ - -extern int copyout_user(const char *, vm_offset_t, vm_size_t); -extern int copyout_kern(const char *, vm_offset_t, vm_size_t); -extern int copyin_user(const vm_offset_t, char *, vm_size_t); -extern int copyin_kern(const vm_offset_t, char *, vm_size_t); -extern int copyoutphys_user(const char *, vm_offset_t, vm_size_t); -extern int copyoutphys_kern(const char *, vm_offset_t, vm_size_t); -extern int copyinphys_user(const vm_offset_t, char *, vm_size_t); -extern int copyinphys_kern(const vm_offset_t, char *, vm_size_t); -extern int copyinstr_user(const vm_offset_t, char *, vm_size_t, vm_size_t *); -extern int copyinstr_kern(const vm_offset_t, char *, vm_size_t, vm_size_t *); - -static int copyio(int, user_addr_t, char *, vm_size_t, vm_size_t *, int); -static int copyio_phys(addr64_t, addr64_t, vm_size_t, int); - - -#define COPYIN 0 -#define COPYOUT 1 -#define COPYINSTR 2 -#define COPYINPHYS 3 -#define COPYOUTPHYS 4 - -void inval_copy_windows(thread_t thread) -{ - int i; - - for (i = 0; i < NCOPY_WINDOWS; i++) { - thread->machine.copy_window[i].user_base = -1; - } - thread->machine.nxt_window = 0; - thread->machine.copyio_state = WINDOWS_DIRTY; - - KERNEL_DEBUG(0xeff70058 | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), (int)thread->map, 0, 0, 0); -} - - -static int -copyio(int copy_type, user_addr_t user_addr, char *kernel_addr, - vm_size_t nbytes, vm_size_t *lencopied, int use_kernel_map) -{ - thread_t thread; - pmap_t pmap; - pt_entry_t *updp; - pt_entry_t *kpdp; - user_addr_t user_base; - vm_offset_t user_offset; - vm_offset_t kern_vaddr; - vm_size_t cnt; - vm_size_t bytes_copied; - int error = 0; - int window_index; - int copyio_state; - boolean_t istate; -#if KDEBUG - int debug_type = 0xeff70010; - debug_type += (copy_type << 2); -#endif - - thread = current_thread(); - - KERNEL_DEBUG(debug_type | DBG_FUNC_START, (int)(user_addr >> 32), (int)user_addr, - (int)nbytes, thread->machine.copyio_state, 0); - - if (nbytes == 0) { - KERNEL_DEBUG(debug_type | DBG_FUNC_END, (unsigned)user_addr, - (unsigned)kernel_addr, (unsigned)nbytes, 0, 0); - return (0); - } - pmap = thread->map->pmap; - - if (pmap == kernel_pmap || use_kernel_map) { - - kern_vaddr = (vm_offset_t)user_addr; - - switch (copy_type) { - - case COPYIN: - error = copyin_kern(kern_vaddr, kernel_addr, nbytes); - break; - - case COPYOUT: - error = copyout_kern(kernel_addr, kern_vaddr, nbytes); - break; - - case COPYINSTR: - error = copyinstr_kern(kern_vaddr, kernel_addr, nbytes, lencopied); - break; - - case COPYINPHYS: - error = copyinphys_kern(kern_vaddr, kernel_addr, nbytes); - break; - - case COPYOUTPHYS: - error = copyoutphys_kern(kernel_addr, kern_vaddr, nbytes); - break; - } - KERNEL_DEBUG(debug_type | DBG_FUNC_END, (unsigned)kern_vaddr, - (unsigned)kernel_addr, (unsigned)nbytes, - error | 0x80000000, 0); - return (error); - } - -#if CONFIG_DTRACE - thread->machine.specFlags |= CopyIOActive; -#endif /* CONFIG_DTRACE */ - - if ((nbytes && (user_addr + nbytes <= user_addr)) || - (user_addr < vm_map_min(thread->map)) || - (user_addr + nbytes > vm_map_max(thread->map))) { - error = EFAULT; - goto done; - } - - user_base = user_addr & ~((user_addr_t)(NBPDE - 1)); - user_offset = (vm_offset_t)(user_addr & (NBPDE - 1)); - - KERNEL_DEBUG(debug_type | DBG_FUNC_NONE, (int)(user_base >> 32), (int)user_base, - (int)user_offset, 0, 0); - - cnt = NBPDE - user_offset; - - if (cnt > nbytes) - cnt = nbytes; - - istate = ml_set_interrupts_enabled(FALSE); - - copyio_state = thread->machine.copyio_state; - thread->machine.copyio_state = WINDOWS_OPENED; - - (void) ml_set_interrupts_enabled(istate); - - - for (;;) { - - for (window_index = 0; window_index < NCOPY_WINDOWS; window_index++) { - if (thread->machine.copy_window[window_index].user_base == user_base) - break; - } - if (window_index >= NCOPY_WINDOWS) { - - window_index = thread->machine.nxt_window; - thread->machine.nxt_window++; - - if (thread->machine.nxt_window >= NCOPY_WINDOWS) - thread->machine.nxt_window = 0; - - /* - * it's necessary to disable pre-emption - * since I have to compute the kernel descriptor pointer - * for the new window - */ - istate = ml_set_interrupts_enabled(FALSE); - - thread->machine.copy_window[window_index].user_base = user_base; - - updp = pmap_pde(pmap, user_base); - - kpdp = current_cpu_datap()->cpu_copywindow_pdp; - kpdp += window_index; - - pmap_store_pte(kpdp, updp ? *updp : 0); - - (void) ml_set_interrupts_enabled(istate); - - copyio_state = WINDOWS_DIRTY; - - KERNEL_DEBUG(0xeff70040 | DBG_FUNC_NONE, window_index, - (unsigned)user_base, (unsigned)updp, - (unsigned)kpdp, 0); - - } -#if JOE_DEBUG - else { - istate = ml_set_interrupts_enabled(FALSE); - - updp = pmap_pde(pmap, user_base); - - kpdp = current_cpu_datap()->cpu_copywindow_pdp; - - kpdp += window_index; - - if ((*kpdp & PG_FRAME) != (*updp & PG_FRAME)) { - panic("copyio: user pdp mismatch - kpdp = 0x%qx, updp = 0x%qx\n", *kpdp, *updp); - } - (void) ml_set_interrupts_enabled(istate); - } -#endif - if (copyio_state == WINDOWS_DIRTY) { - flush_tlb(); - - copyio_state = WINDOWS_CLEAN; - - KERNEL_DEBUG(0xeff70054 | DBG_FUNC_NONE, window_index, 0, 0, 0, 0); - } - user_offset += (window_index * NBPDE); - - KERNEL_DEBUG(0xeff70044 | DBG_FUNC_NONE, (unsigned)user_offset, - (unsigned)kernel_addr, cnt, 0, 0); - - switch (copy_type) { - - case COPYIN: - error = copyin_user(user_offset, kernel_addr, cnt); - break; - - case COPYOUT: - error = copyout_user(kernel_addr, user_offset, cnt); - break; - - case COPYINPHYS: - error = copyinphys_user(user_offset, kernel_addr, cnt); - break; - - case COPYOUTPHYS: - error = copyoutphys_user(kernel_addr, user_offset, cnt); - break; - - case COPYINSTR: - error = copyinstr_user(user_offset, kernel_addr, cnt, &bytes_copied); - - /* - * lencopied should be updated on success - * or ENAMETOOLONG... but not EFAULT - */ - if (error != EFAULT) - *lencopied += bytes_copied; - - /* - * if we still have room, then the ENAMETOOLONG - * is just an artifact of the buffer straddling - * a window boundary and we should continue - */ - if (error == ENAMETOOLONG && nbytes > cnt) - error = 0; - - if (error) { -#if KDEBUG - nbytes = *lencopied; -#endif - break; - } - if (*(kernel_addr + bytes_copied - 1) == 0) { - /* - * we found a NULL terminator... we're done - */ -#if KDEBUG - nbytes = *lencopied; -#endif - goto done; - } - if (cnt == nbytes) { - /* - * no more room in the buffer and we haven't - * yet come across a NULL terminator - */ -#if KDEBUG - nbytes = *lencopied; -#endif - error = ENAMETOOLONG; - break; - } - assert(cnt == bytes_copied); - - break; - } - if (error) - break; - if ((nbytes -= cnt) == 0) - break; - - kernel_addr += cnt; - user_base += NBPDE; - user_offset = 0; - - if (nbytes > NBPDE) - cnt = NBPDE; - else - cnt = nbytes; - } -done: - thread->machine.copyio_state = WINDOWS_CLOSED; - - KERNEL_DEBUG(debug_type | DBG_FUNC_END, (unsigned)user_addr, - (unsigned)kernel_addr, (unsigned)nbytes, error, 0); - -#if CONFIG_DTRACE - thread->machine.specFlags &= ~CopyIOActive; -#endif /* CONFIG_DTRACE */ - - return (error); -} - -static int -copyio_phys(addr64_t source, addr64_t sink, vm_size_t csize, int which) -{ - pmap_paddr_t paddr; - user_addr_t vaddr; - char *window_offset; - pt_entry_t pentry; - int ctype; - int retval; - boolean_t istate; - - - if (which & cppvPsnk) { - paddr = (pmap_paddr_t)sink; - vaddr = (user_addr_t)source; - ctype = COPYINPHYS; - pentry = (pt_entry_t)(INTEL_PTE_VALID | (paddr & PG_FRAME) | INTEL_PTE_RW); - } else { - paddr = (pmap_paddr_t)source; - vaddr = (user_addr_t)sink; - ctype = COPYOUTPHYS; - pentry = (pt_entry_t)(INTEL_PTE_VALID | (paddr & PG_FRAME)); - } - /* Fold in cache attributes for this physical page */ - pentry |= pmap_get_cache_attributes(i386_btop(paddr)); - window_offset = (char *)(uintptr_t)((uint32_t)paddr & (PAGE_SIZE - 1)); - - assert(!((current_thread()->machine.specFlags & CopyIOActive) && ((which & cppvKmap) == 0))); - - if (current_thread()->machine.physwindow_busy) { - pt_entry_t old_pentry; - - KERNEL_DEBUG(0xeff70048 | DBG_FUNC_NONE, paddr, csize, 0, -1, 0); - /* - * we had better be targeting wired memory at this point - * we will not be able to handle a fault with interrupts - * disabled... we disable them because we can't tolerate - * being preempted during this nested use of the window - */ - istate = ml_set_interrupts_enabled(FALSE); - - old_pentry = *(current_cpu_datap()->cpu_physwindow_ptep); - pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep), pentry); - - invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base); - - retval = copyio(ctype, vaddr, window_offset, csize, NULL, which & cppvKmap); - - pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep), old_pentry); - - invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base); - - (void) ml_set_interrupts_enabled(istate); - } else { - /* - * mark the window as in use... if an interrupt hits while we're - * busy, or we trigger another coyppv from the fault path into - * the driver on a user address space page fault due to a copyin/out - * then we need to save and restore the current window state instead - * of caching the window preserving it across context switches - */ - current_thread()->machine.physwindow_busy = 1; - - if (current_thread()->machine.physwindow_pte != pentry) { - KERNEL_DEBUG(0xeff70048 | DBG_FUNC_NONE, paddr, csize, 0, 0, 0); - - current_thread()->machine.physwindow_pte = pentry; - - /* - * preemption at this point would be bad since we - * could end up on the other processor after we grabbed the - * pointer to the current cpu data area, but before we finished - * using it to stuff the page table entry since we would - * be modifying a window that no longer belonged to us - * the invlpg can be done unprotected since it only flushes - * this page address from the tlb... if it flushes the wrong - * one, no harm is done, and the context switch that moved us - * to the other processor will have already take care of - * flushing the tlb after it reloaded the page table from machine.physwindow_pte - */ - istate = ml_set_interrupts_enabled(FALSE); - - pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep), pentry); - (void) ml_set_interrupts_enabled(istate); - - invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base); - } -#if JOE_DEBUG - else { - if (pentry != - (*(current_cpu_datap()->cpu_physwindow_ptep) & (INTEL_PTE_VALID | PG_FRAME | INTEL_PTE_RW))) - panic("copyio_phys: pentry != *physwindow_ptep"); - } -#endif - retval = copyio(ctype, vaddr, window_offset, csize, NULL, which & cppvKmap); - - current_thread()->machine.physwindow_busy = 0; - } - return (retval); -} - -int -copyinmsg(const user_addr_t user_addr, char *kernel_addr, mach_msg_size_t nbytes) -{ - return (copyio(COPYIN, user_addr, kernel_addr, nbytes, NULL, 0)); -} - -int -copyin(const user_addr_t user_addr, char *kernel_addr, vm_size_t nbytes) -{ - return (copyio(COPYIN, user_addr, kernel_addr, nbytes, NULL, 0)); -} - -int -copyinstr(const user_addr_t user_addr, char *kernel_addr, vm_size_t nbytes, vm_size_t *lencopied) -{ - *lencopied = 0; - - return (copyio(COPYINSTR, user_addr, kernel_addr, nbytes, lencopied, 0)); -} - -int -copyoutmsg(const char *kernel_addr, user_addr_t user_addr, mach_msg_size_t nbytes) -{ - return (copyio(COPYOUT, user_addr, (char *)(uintptr_t)kernel_addr, nbytes, NULL, 0)); -} - -int -copyout(const void *kernel_addr, user_addr_t user_addr, vm_size_t nbytes) -{ - return (copyio(COPYOUT, user_addr, (char *)(uintptr_t)kernel_addr, nbytes, NULL, 0)); -} - - -kern_return_t -copypv(addr64_t src64, addr64_t snk64, unsigned int size, int which) -{ - unsigned int lop, csize; - int bothphys = 0; - - KERNEL_DEBUG(0xeff7004c | DBG_FUNC_START, (unsigned)src64, - (unsigned)snk64, size, which, 0); - - if ((which & (cppvPsrc | cppvPsnk)) == 0 ) /* Make sure that only one is virtual */ - panic("copypv: no more than 1 parameter may be virtual\n"); /* Not allowed */ - - if ((which & (cppvPsrc | cppvPsnk)) == (cppvPsrc | cppvPsnk)) - bothphys = 1; /* both are physical */ - - while (size) { - - if (bothphys) { - lop = (unsigned int)(PAGE_SIZE - (snk64 & (PAGE_SIZE - 1))); /* Assume sink smallest */ - - if (lop > (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1)))) - lop = (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1))); /* No, source is smaller */ - } else { - /* - * only need to compute the resid for the physical page - * address... we don't care about where we start/finish in - * the virtual since we just call the normal copyin/copyout - */ - if (which & cppvPsrc) - lop = (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1))); - else - lop = (unsigned int)(PAGE_SIZE - (snk64 & (PAGE_SIZE - 1))); - } - csize = size; /* Assume we can copy it all */ - if (lop < size) - csize = lop; /* Nope, we can't do it all */ -#if 0 - /* - * flush_dcache64 is currently a nop on the i386... - * it's used when copying to non-system memory such - * as video capture cards... on PPC there was a need - * to flush due to how we mapped this memory... not - * sure if it's needed on i386. - */ - if (which & cppvFsrc) - flush_dcache64(src64, csize, 1); /* If requested, flush source before move */ - if (which & cppvFsnk) - flush_dcache64(snk64, csize, 1); /* If requested, flush sink before move */ -#endif - if (bothphys) { - bcopy_phys(src64, snk64, csize); /* Do a physical copy, virtually */ - } - else { - if (copyio_phys(src64, snk64, csize, which)) { - return (KERN_FAILURE); - } - } -#if 0 - if (which & cppvFsrc) - flush_dcache64(src64, csize, 1); /* If requested, flush source after move */ - if (which & cppvFsnk) - flush_dcache64(snk64, csize, 1); /* If requested, flush sink after move */ -#endif - size -= csize; /* Calculate what is left */ - snk64 += csize; /* Bump sink to next physical address */ - src64 += csize; /* Bump source to next physical address */ - } - KERNEL_DEBUG(0xeff7004c | DBG_FUNC_END, (unsigned)src64, - (unsigned)snk64, size, which, 0); - - return KERN_SUCCESS; -} -void -copy_window_fault(thread_t thread, vm_map_t map, int window) -{ - pt_entry_t *updp; - pt_entry_t *kpdp; - - /* - * in case there was no page table assigned - * for the user base address and the pmap - * got 'expanded' due to this fault, we'll - * copy in the descriptor - * - * we're either setting the page table descriptor - * to the same value or it was 0... no need - * for a TLB flush in either case - */ - - updp = pmap_pde(map->pmap, thread->machine.copy_window[window].user_base); - assert(updp); - if (0 == updp) panic("trap: updp 0"); /* XXX DEBUG */ - kpdp = current_cpu_datap()->cpu_copywindow_pdp; - kpdp += window; - -#if JOE_DEBUG - if (*kpdp && (*kpdp & PG_FRAME) != (*updp & PG_FRAME)) - panic("kernel_fault: user pdp doesn't match - updp = 0x%qx, kpdp = 0x%qx\n", *updp, *kpdp); -#endif - pmap_store_pte(kpdp, *updp); -} diff --git a/osfmk/i386/cpu.c b/osfmk/i386/cpu.c index 23b38e3db..fc9fcc43e 100644 --- a/osfmk/i386/cpu.c +++ b/osfmk/i386/cpu.c @@ -46,7 +46,6 @@ #include #endif #include -#include #include struct processor processor_master; @@ -101,7 +100,7 @@ cpu_init(void) { cpu_data_t *cdp = current_cpu_datap(); - timer_call_initialize_queue(&cdp->rtclock_timer.queue); + timer_call_queue_init(&cdp->rtclock_timer.queue); cdp->rtclock_timer.deadline = EndOfAllTime; cdp->cpu_type = cpuid_cputype(); @@ -146,19 +145,32 @@ cpu_exit_wait( int cpu) { cpu_data_t *cdp = cpu_datap(cpu); + boolean_t intrs_enabled; + uint64_t tsc_timeout; /* * Wait until the CPU indicates that it has stopped. + * Disable interrupts while the topo lock is held -- arguably + * this should always be done but in this instance it can lead to + * a timeout if long-running interrupt were to occur here. */ + intrs_enabled = ml_set_interrupts_enabled(FALSE); simple_lock(&x86_topo_lock); + /* Set a generous timeout of several seconds (in TSC ticks) */ + tsc_timeout = rdtsc64() + (10ULL * 1000 * 1000 * 1000); while ((cdp->lcpu.state != LCPU_HALT) && (cdp->lcpu.state != LCPU_OFF) && !cdp->lcpu.stopped) { simple_unlock(&x86_topo_lock); + ml_set_interrupts_enabled(intrs_enabled); cpu_pause(); + if (rdtsc64() > tsc_timeout) + panic("cpu_exit_wait(%d) timeout", cpu); + ml_set_interrupts_enabled(FALSE); simple_lock(&x86_topo_lock); } simple_unlock(&x86_topo_lock); + ml_set_interrupts_enabled(intrs_enabled); } void diff --git a/osfmk/i386/cpu_capabilities.h b/osfmk/i386/cpu_capabilities.h index b8d5027af..5205eb2e8 100644 --- a/osfmk/i386/cpu_capabilities.h +++ b/osfmk/i386/cpu_capabilities.h @@ -229,7 +229,6 @@ int _NumCPUs( void ) #define _COMM_TEXT_PFZ_START_OFFSET (0xc00) /* offset for Preemption Free Zone */ #define _COMM_TEXT_PFZ_ENQUEUE_OFFSET (0xc00) /* internal FIFO enqueue */ #define _COMM_TEXT_PFZ_DEQUEUE_OFFSET (0xc80) /* internal FIFO dequeue */ -#define _COMM_TEXT_PFZ_MUTEX_LOCK_OFFSET (0xd00) /* internal pthread_mutex_lock() */ #define _COMM_TEXT_UNUSED_OFFSET (0xd80) /* end of routines in text page */ #define _COMM_TEXT_PFZ_END_OFFSET (0xfff) /* offset for end of PFZ */ @@ -241,7 +240,6 @@ int _NumCPUs( void ) #define _COMM_PAGE_PFZ_ENQUEUE (_COMM_PAGE_TEXT_START+_COMM_TEXT_PFZ_ENQUEUE_OFFSET) #define _COMM_PAGE_PFZ_DEQUEUE (_COMM_PAGE_TEXT_START+_COMM_TEXT_PFZ_DEQUEUE_OFFSET) -#define _COMM_PAGE_PFZ_MUTEX_LOCK (_COMM_PAGE_TEXT_START+_COMM_TEXT_PFZ_MUTEX_LOCK_OFFSET) #define _COMM_PAGE_UNUSED6 (_COMM_PAGE_TEXT_START+_COMM_TEXT_UNUSED_OFFSET) #define _COMM_PAGE_PFZ_END (_COMM_PAGE_TEXT_START+_COMM_TEXT_PFZ_END_OFFSET) @@ -266,7 +264,6 @@ symbol_name: nop CREATE_COMM_PAGE_SYMBOL(___backoff, _COMM_PAGE_BACKOFF) CREATE_COMM_PAGE_SYMBOL(___pfz_enqueue, _COMM_PAGE_PFZ_ENQUEUE) CREATE_COMM_PAGE_SYMBOL(___pfz_dequeue, _COMM_PAGE_PFZ_DEQUEUE) - CREATE_COMM_PAGE_SYMBOL(___pfz_mutex_lock, _COMM_PAGE_PFZ_MUTEX_LOCK) CREATE_COMM_PAGE_SYMBOL(___end_comm_page, _COMM_PAGE_END) .data /* Required to make a well behaved symbol file */ diff --git a/osfmk/i386/cpu_data.h b/osfmk/i386/cpu_data.h index 620ba6a29..e0bb1a7e4 100644 --- a/osfmk/i386/cpu_data.h +++ b/osfmk/i386/cpu_data.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -71,35 +71,14 @@ typedef struct rtclock_timer { } rtclock_timer_t; -#if defined(__i386__) - typedef struct { - struct i386_tss *cdi_ktss; - struct __attribute__((packed)) { - uint16_t size; - struct fake_descriptor *ptr; - } cdi_gdt, cdi_idt; - struct fake_descriptor *cdi_ldt; - vm_offset_t cdi_sstk; -} cpu_desc_index_t; - -typedef enum { - TASK_MAP_32BIT, /* 32-bit, compatibility mode */ - TASK_MAP_64BIT, /* 64-bit, separate address space */ - TASK_MAP_64BIT_SHARED /* 64-bit, kernel-shared addr space */ -} task_map_t; - -#elif defined(__x86_64__) - - -typedef struct { - struct x86_64_tss *cdi_ktss; + struct x86_64_tss *cdi_ktss; struct __attribute__((packed)) { uint16_t size; void *ptr; } cdi_gdt, cdi_idt; struct fake_descriptor *cdi_ldt; - vm_offset_t cdi_sstk; + vm_offset_t cdi_sstk; } cpu_desc_index_t; typedef enum { @@ -107,9 +86,6 @@ typedef enum { TASK_MAP_64BIT, /* 64-bit user thread, shared space */ } task_map_t; -#else -#error Unsupported architecture -#endif /* * This structure is used on entry into the (uber-)kernel on syscall from @@ -148,7 +124,8 @@ typedef struct cpu_data #define cpu_pd cpu_pal_data /* convenience alias */ struct cpu_data *cpu_this; /* pointer to myself */ thread_t cpu_active_thread; - int cpu_preemption_level; + thread_t cpu_nthread; + volatile int cpu_preemption_level; int cpu_number; /* Logical CPU */ void *cpu_int_state; /* interrupt state */ vm_offset_t cpu_active_stack; /* kernel stack base */ @@ -157,15 +134,14 @@ typedef struct cpu_data int cpu_interrupt_level; int cpu_phys_number; /* Physical CPU */ cpu_id_t cpu_id; /* Platform Expert */ - int cpu_signals; /* IPI events */ - int cpu_prior_signals; /* Last set of events, + volatile int cpu_signals; /* IPI events */ + volatile int cpu_prior_signals; /* Last set of events, * debugging */ ast_t cpu_pending_ast; volatile int cpu_running; boolean_t cpu_fixed_pmcs_enabled; rtclock_timer_t rtclock_timer; - boolean_t cpu_is64bit; volatile addr64_t cpu_active_cr3 __attribute((aligned(64))); union { volatile uint32_t cpu_tlb_invalid; @@ -203,21 +179,18 @@ typedef struct cpu_data uint64_t cpu_hwIntcexits[HWINTCNT_SIZE]; uint64_t cpu_dr7; /* debug control register */ uint64_t cpu_int_event_time; /* intr entry/exit time */ - uint64_t cpu_uber_arg_store; /* Double mapped address - * of current thread's - * uu_arg array. - */ - uint64_t cpu_uber_arg_store_valid; /* Double mapped - * address of pcb - * arg store - * validity flag. - */ pal_rtc_nanotime_t *cpu_nanotime; /* Nanotime info */ #if CONFIG_COUNTERS thread_t csw_old_thread; thread_t csw_new_thread; #endif /* CONFIG COUNTERS */ -#if defined(__x86_64__) +#if KPC + /* double-buffered performance counter data */ + uint64_t *cpu_kpc_buf[2]; + /* PMC shadow and reload value buffers */ + uint64_t *cpu_kpc_shadow; + uint64_t *cpu_kpc_reload; +#endif uint32_t cpu_pmap_pcid_enabled; pcid_t cpu_active_pcid; pcid_t cpu_last_pcid; @@ -231,7 +204,6 @@ typedef struct cpu_data uint64_t cpu_pmap_pcid_flushes; uint64_t cpu_pmap_pcid_preserves; #endif -#endif /* x86_64 */ uint64_t cpu_aperf; uint64_t cpu_mperf; uint64_t cpu_c3res; @@ -248,9 +220,9 @@ typedef struct cpu_data uint64_t cpu_cur_urc; uint64_t cpu_max_observed_int_latency; int cpu_max_observed_int_latency_vector; + volatile boolean_t cpu_NMI_acknowledged; uint64_t debugger_entry_time; uint64_t debugger_ipi_time; - volatile boolean_t cpu_NMI_acknowledged; /* A separate nested interrupt stack flag, to account * for non-nested interrupts arriving while on the interrupt stack * Currently only occurs when AICPM enables interrupts on the @@ -275,9 +247,43 @@ typedef struct cpu_data } cpu_data_t; extern cpu_data_t *cpu_data_ptr[]; -extern cpu_data_t cpu_data_master; /* Macro to generate inline bodies to retrieve per-cpu data fields. */ +#if defined(__clang__) +#define GS_RELATIVE volatile __attribute__((address_space(256))) +#ifndef offsetof +#define offsetof(TYPE,MEMBER) __builtin_offsetof(TYPE,MEMBER) +#endif + +#define CPU_DATA_GET(member,type) \ + cpu_data_t GS_RELATIVE *cpu_data = \ + (cpu_data_t GS_RELATIVE *)0UL; \ + type ret; \ + ret = cpu_data->member; \ + return ret; + +#define CPU_DATA_GET_INDEX(member,index,type) \ + cpu_data_t GS_RELATIVE *cpu_data = \ + (cpu_data_t GS_RELATIVE *)0UL; \ + type ret; \ + ret = cpu_data->member[index]; \ + return ret; + +#define CPU_DATA_SET(member,value) \ + cpu_data_t GS_RELATIVE *cpu_data = \ + (cpu_data_t GS_RELATIVE *)0UL; \ + cpu_data->member = value; + +#define CPU_DATA_XCHG(member,value,type) \ + cpu_data_t GS_RELATIVE *cpu_data = \ + (cpu_data_t GS_RELATIVE *)0UL; \ + type ret; \ + ret = cpu_data->member; \ + cpu_data->member = value; \ + return ret; + +#else /* !defined(__clang__) */ + #ifndef offsetof #define offsetof(TYPE,MEMBER) ((size_t) &((TYPE *)0)->MEMBER) #endif /* offsetof */ @@ -299,6 +305,7 @@ extern cpu_data_t cpu_data_master; __asm__ volatile ("mov %0,%%gs:%P1" \ : \ : "r" (value), "i" (offsetof(cpu_data_t,member))); + #define CPU_DATA_XCHG(member,value,type) \ type ret; \ __asm__ volatile ("xchg %0,%%gs:%P1" \ @@ -306,6 +313,8 @@ extern cpu_data_t cpu_data_master; : "i" (offsetof(cpu_data_t,member)), "0" (value)); \ return ret; +#endif /* !defined(__clang__) */ + /* * Everyone within the osfmk part of the kernel can use the fast * inline versions of these routines. Everyone outside, must call @@ -319,16 +328,7 @@ get_active_thread(void) #define current_thread_fast() get_active_thread() #define current_thread() current_thread_fast() -static inline boolean_t -get_is64bit(void) -{ - CPU_DATA_GET(cpu_is64bit, boolean_t) -} -#if CONFIG_YONAH -#define cpu_mode_is64bit() get_is64bit() -#else #define cpu_mode_is64bit() TRUE -#endif static inline int get_preemption_level(void) @@ -355,9 +355,14 @@ get_cpu_phys_number(void) static inline void disable_preemption(void) { +#if defined(__clang__) + cpu_data_t GS_RELATIVE *cpu_data = (cpu_data_t GS_RELATIVE *)0UL; + cpu_data->cpu_preemption_level++; +#else __asm__ volatile ("incl %%gs:%P0" : : "i" (offsetof(cpu_data_t, cpu_preemption_level))); +#endif } static inline void @@ -365,6 +370,11 @@ enable_preemption(void) { assert(get_preemption_level() > 0); +#if defined(__clang__) + cpu_data_t GS_RELATIVE *cpu_data = (cpu_data_t GS_RELATIVE *)0UL; + if (0 == --cpu_data->cpu_preemption_level) + kernel_preempt_check(); +#else __asm__ volatile ("decl %%gs:%P0 \n\t" "jne 1f \n\t" "call _kernel_preempt_check \n\t" @@ -372,6 +382,7 @@ enable_preemption(void) : /* no outputs */ : "i" (offsetof(cpu_data_t, cpu_preemption_level)) : "eax", "ecx", "edx", "cc", "memory"); +#endif } static inline void @@ -379,10 +390,15 @@ enable_preemption_no_check(void) { assert(get_preemption_level() > 0); +#if defined(__clang__) + cpu_data_t GS_RELATIVE *cpu_data = (cpu_data_t GS_RELATIVE *)0UL; + cpu_data->cpu_preemption_level--; +#else __asm__ volatile ("decl %%gs:%P0" : /* no outputs */ : "i" (offsetof(cpu_data_t, cpu_preemption_level)) : "cc", "memory"); +#endif } static inline void diff --git a/osfmk/i386/cpu_threads.c b/osfmk/i386/cpu_threads.c index c36eb89b6..b1c0b6104 100644 --- a/osfmk/i386/cpu_threads.c +++ b/osfmk/i386/cpu_threads.c @@ -27,7 +27,7 @@ */ #include #include -#include +#include #include #include #include @@ -348,9 +348,6 @@ x86_lcpu_init(int cpu) lcpu->state = LCPU_OFF; for (i = 0; i < MAX_CACHE_DEPTH; i += 1) lcpu->caches[i] = NULL; - - lcpu->master = (lcpu->cpu_num == (unsigned int) master_cpu); - lcpu->primary = (lcpu->pnum % topoParms.nPThreadsPerPackage) == 0; } static x86_core_t * @@ -468,30 +465,32 @@ x86_core_find(int cpu) } void -x86_set_lcpu_numbers(x86_lcpu_t *lcpu) +x86_set_logical_topology(x86_lcpu_t *lcpu, int pnum, int lnum) { - lcpu->lnum = lcpu->cpu_num % topoParms.nLThreadsPerCore; -} + x86_core_t *core = lcpu->core; + x86_die_t *die = lcpu->die; + x86_pkg_t *pkg = lcpu->package; + + assert(core != NULL); + assert(die != NULL); + assert(pkg != NULL); -void -x86_set_core_numbers(x86_core_t *core, x86_lcpu_t *lcpu) -{ - core->pcore_num = lcpu->cpu_num / topoParms.nLThreadsPerCore; + lcpu->cpu_num = lnum; + lcpu->pnum = pnum; + lcpu->master = (lnum == master_cpu); + lcpu->primary = (lnum % topoParms.nLThreadsPerPackage) == 0; + + lcpu->lnum = lnum % topoParms.nLThreadsPerCore; + + core->pcore_num = lnum / topoParms.nLThreadsPerCore; core->lcore_num = core->pcore_num % topoParms.nLCoresPerDie; -} -void -x86_set_die_numbers(x86_die_t *die, x86_lcpu_t *lcpu) -{ - die->pdie_num = lcpu->cpu_num / (topoParms.nLThreadsPerCore * topoParms.nLCoresPerDie); + die->pdie_num = lnum / (topoParms.nLThreadsPerCore*topoParms.nLCoresPerDie); die->ldie_num = die->pdie_num % topoParms.nLDiesPerPackage; -} -void -x86_set_pkg_numbers(x86_pkg_t *pkg, x86_lcpu_t *lcpu) -{ - pkg->ppkg_num = lcpu->cpu_num / topoParms.nLThreadsPerPackage; + pkg->ppkg_num = lnum / topoParms.nLThreadsPerPackage; pkg->lpkg_num = pkg->ppkg_num; + } static x86_die_t * @@ -964,7 +963,7 @@ cpu_thread_init(void) simple_unlock(&x86_topo_lock); pmCPUMarkRunning(cpup); - etimer_resync_deadlines(); + timer_resync_deadlines(); } /* @@ -1001,7 +1000,7 @@ cpu_thread_halt(void) * after the complete topology is built and no other changes are being made. */ void -validate_topology(void) +x86_validate_topology(void) { x86_pkg_t *pkg; x86_die_t *die; diff --git a/osfmk/i386/cpu_threads.h b/osfmk/i386/cpu_threads.h index a576ef70d..31fe81779 100644 --- a/osfmk/i386/cpu_threads.h +++ b/osfmk/i386/cpu_threads.h @@ -68,10 +68,8 @@ extern void *cpu_thread_alloc(int); extern void cpu_thread_init(void); extern void cpu_thread_halt(void); -extern void x86_set_lcpu_numbers(x86_lcpu_t *lcpu); -extern void x86_set_core_numbers(x86_core_t *core, x86_lcpu_t *lcpu); -extern void x86_set_die_numbers(x86_die_t *die, x86_lcpu_t *lcpu); -extern void x86_set_pkg_numbers(x86_pkg_t *pkg, x86_lcpu_t *lcpu); +extern void x86_set_logical_topology(x86_lcpu_t *lcpu, int pnum, int lnum); +extern void x86_validate_topology(void); extern x86_topology_parameters_t topoParms; @@ -82,6 +80,4 @@ extern boolean_t topo_dbg; kprintf(x); \ } while (0) \ -extern void validate_topology(void); - #endif /* _I386_CPU_THREADS_H_ */ diff --git a/osfmk/i386/cpu_topology.c b/osfmk/i386/cpu_topology.c index 6be77e6ff..76a9e8edf 100644 --- a/osfmk/i386/cpu_topology.c +++ b/osfmk/i386/cpu_topology.c @@ -106,17 +106,10 @@ cpu_topology_sort(int ncpus) } /* - * Fix up logical numbers and reset the map kept by the lapic code. + * Finalize logical numbers and map kept by the lapic code. */ - for (i = 1; i < ncpus; i++) { + for (i = 0; i < ncpus; i++) { cpu_data_t *cpup = cpu_datap(i); - x86_core_t *core = cpup->lcpu.core; - x86_die_t *die = cpup->lcpu.die; - x86_pkg_t *pkg = cpup->lcpu.package; - - assert(core != NULL); - assert(die != NULL); - assert(pkg != NULL); if (cpup->cpu_number != i) { kprintf("cpu_datap(%d):%p local apic id 0x%x " @@ -125,16 +118,11 @@ cpu_topology_sort(int ncpus) cpup->cpu_number); } cpup->cpu_number = i; - cpup->lcpu.cpu_num = i; - cpup->lcpu.pnum = cpup->cpu_phys_number; lapic_cpu_map(cpup->cpu_phys_number, i); - x86_set_lcpu_numbers(&cpup->lcpu); - x86_set_core_numbers(core, &cpup->lcpu); - x86_set_die_numbers(die, &cpup->lcpu); - x86_set_pkg_numbers(pkg, &cpup->lcpu); + x86_set_logical_topology(&cpup->lcpu, cpup->cpu_phys_number, i); } - validate_topology(); + x86_validate_topology(); ml_set_interrupts_enabled(istate); TOPO_DBG("cpu_topology_start() LLC is L%d\n", topoParms.LLCDepth + 1); diff --git a/osfmk/i386/cpuid.c b/osfmk/i386/cpuid.c index 3ca38be8b..090da7d76 100644 --- a/osfmk/i386/cpuid.c +++ b/osfmk/i386/cpuid.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -229,36 +229,15 @@ cpuid_leaf2_find(uint8_t value) * CPU identification routines. */ -static i386_cpu_info_t *cpuid_cpu_infop = NULL; static i386_cpu_info_t cpuid_cpu_info; +static i386_cpu_info_t *cpuid_cpu_infop = NULL; -#if defined(__x86_64__) static void cpuid_fn(uint32_t selector, uint32_t *result) { do_cpuid(selector, result); DBG("cpuid_fn(0x%08x) eax:0x%08x ebx:0x%08x ecx:0x%08x edx:0x%08x\n", selector, result[0], result[1], result[2], result[3]); } -#else -static void cpuid_fn(uint32_t selector, uint32_t *result) -{ - if (get_is64bit()) { - asm("call _cpuid64" - : "=a" (result[0]), - "=b" (result[1]), - "=c" (result[2]), - "=d" (result[3]) - : "a"(selector), - "b" (0), - "c" (0), - "d" (0)); - } else { - do_cpuid(selector, result); - } - DBG("cpuid_fn(0x%08x) eax:0x%08x ebx:0x%08x ecx:0x%08x edx:0x%08x\n", - selector, result[0], result[1], result[2], result[3]); -} -#endif static const char *cache_type_str[LCACHE_MAX] = { "Lnone", "L1I", "L1D", "L2U", "L3U" @@ -752,11 +731,6 @@ cpuid_set_cpufamily(i386_cpu_info_t *info_p) switch (info_p->cpuid_family) { case 6: switch (info_p->cpuid_model) { -#if CONFIG_YONAH - case 14: - cpufamily = CPUFAMILY_INTEL_YONAH; - break; -#endif case 15: cpufamily = CPUFAMILY_INTEL_MEROM; break; @@ -803,10 +777,6 @@ cpuid_set_info(void) { i386_cpu_info_t *info_p = &cpuid_cpu_info; - PE_parse_boot_argn("-cpuid", &cpuid_dbg, sizeof(cpuid_dbg)); - - bzero((void *)info_p, sizeof(cpuid_cpu_info)); - cpuid_set_generic_info(info_p); /* verify we are running on a supported CPU */ @@ -819,7 +789,7 @@ cpuid_set_info(void) info_p->cpuid_cpu_type = CPU_TYPE_X86; info_p->cpuid_cpu_subtype = CPU_SUBTYPE_X86_ARCH1; /* Must be invoked after set_generic_info */ - cpuid_set_cache_info(&cpuid_cpu_info); + cpuid_set_cache_info(info_p); /* * Find the number of enabled cores and threads @@ -850,7 +820,7 @@ cpuid_set_info(void) DBG(" core_count : %d\n", info_p->core_count); DBG(" thread_count : %d\n", info_p->thread_count); - cpuid_cpu_info.cpuid_model_string = ""; /* deprecated */ + info_p->cpuid_model_string = ""; /* deprecated */ } static struct table { @@ -930,14 +900,14 @@ extfeature_map[] = { }, leaf7_feature_map[] = { + {CPUID_LEAF7_FEATURE_SMEP, "SMEP"}, + {CPUID_LEAF7_FEATURE_ENFSTRG, "ENFSTRG"}, {CPUID_LEAF7_FEATURE_RDWRFSGS, "RDWRFSGS"}, {CPUID_LEAF7_FEATURE_TSCOFF, "TSC_THREAD_OFFSET"}, {CPUID_LEAF7_FEATURE_BMI1, "BMI1"}, {CPUID_LEAF7_FEATURE_HLE, "HLE"}, - {CPUID_LEAF7_FEATURE_SMEP, "SMEP"}, {CPUID_LEAF7_FEATURE_AVX2, "AVX2"}, {CPUID_LEAF7_FEATURE_BMI2, "BMI2"}, - {CPUID_LEAF7_FEATURE_ENFSTRG, "ENFSTRG"}, {CPUID_LEAF7_FEATURE_INVPCID, "INVPCID"}, {CPUID_LEAF7_FEATURE_RTM, "RTM"}, {0, 0} @@ -970,6 +940,7 @@ cpuid_info(void) { /* Set-up the cpuid_info stucture lazily */ if (cpuid_cpu_infop == NULL) { + PE_parse_boot_argn("-cpuid", &cpuid_dbg, sizeof(cpuid_dbg)); cpuid_set_info(); cpuid_cpu_infop = &cpuid_cpu_info; } @@ -1010,10 +981,10 @@ cpuid_feature_display( #define s_if_plural(n) ((n > 1) ? "s" : "") kprintf(" HTT: %d core%s per package;" " %d logical cpu%s per package\n", - cpuid_cpu_info.cpuid_cores_per_package, - s_if_plural(cpuid_cpu_info.cpuid_cores_per_package), - cpuid_cpu_info.cpuid_logical_per_package, - s_if_plural(cpuid_cpu_info.cpuid_logical_per_package)); + cpuid_cpu_infop->cpuid_cores_per_package, + s_if_plural(cpuid_cpu_infop->cpuid_cores_per_package), + cpuid_cpu_infop->cpuid_logical_per_package, + s_if_plural(cpuid_cpu_infop->cpuid_logical_per_package)); } } @@ -1032,8 +1003,8 @@ void cpuid_cpu_display( const char *header) { - if (cpuid_cpu_info.cpuid_brand_string[0] != '\0') { - kprintf("%s: %s\n", header, cpuid_cpu_info.cpuid_brand_string); + if (cpuid_cpu_infop->cpuid_brand_string[0] != '\0') { + kprintf("%s: %s\n", header, cpuid_cpu_infop->cpuid_brand_string); } } @@ -1074,15 +1045,15 @@ cpuid_features(void) printf("limiting fpu features to: %s\n", fpu_arg); if (!strncmp("387", fpu_arg, sizeof("387")) || !strncmp("mmx", fpu_arg, sizeof("mmx"))) { printf("no sse or sse2\n"); - cpuid_cpu_info.cpuid_features &= ~(CPUID_FEATURE_SSE | CPUID_FEATURE_SSE2 | CPUID_FEATURE_FXSR); + cpuid_cpu_infop->cpuid_features &= ~(CPUID_FEATURE_SSE | CPUID_FEATURE_SSE2 | CPUID_FEATURE_FXSR); } else if (!strncmp("sse", fpu_arg, sizeof("sse"))) { printf("no sse2\n"); - cpuid_cpu_info.cpuid_features &= ~(CPUID_FEATURE_SSE2); + cpuid_cpu_infop->cpuid_features &= ~(CPUID_FEATURE_SSE2); } } checked = 1; } - return cpuid_cpu_info.cpuid_features; + return cpuid_cpu_infop->cpuid_features; } uint64_t @@ -1163,3 +1134,4 @@ cpuid_vmm_family(void) { return cpuid_vmm_info()->cpuid_vmm_family; } + diff --git a/osfmk/i386/cpuid.h b/osfmk/i386/cpuid.h index 7597bc653..38b5ac7c0 100644 --- a/osfmk/i386/cpuid.h +++ b/osfmk/i386/cpuid.h @@ -127,13 +127,13 @@ * Bits returned in %ebx to a CPUID request with {%eax,%ecx} of (0x7,0x0}: */ #define CPUID_LEAF7_FEATURE_RDWRFSGS _Bit(0) /* FS/GS base read/write */ +#define CPUID_LEAF7_FEATURE_SMEP _Bit(7) /* Supervisor Mode Execute Protect */ +#define CPUID_LEAF7_FEATURE_ENFSTRG _Bit(9) /* ENhanced Fast STRinG copy */ #define CPUID_LEAF7_FEATURE_TSCOFF _Bit(1) /* TSC thread offset */ #define CPUID_LEAF7_FEATURE_BMI1 _Bit(3) /* Bit Manipulation Instrs, set 1 */ #define CPUID_LEAF7_FEATURE_HLE _Bit(4) /* Hardware Lock Elision*/ #define CPUID_LEAF7_FEATURE_AVX2 _Bit(5) /* AVX2 Instructions */ -#define CPUID_LEAF7_FEATURE_SMEP _Bit(7) /* Supervisor Mode Execute Protect */ #define CPUID_LEAF7_FEATURE_BMI2 _Bit(8) /* Bit Manipulation Instrs, set 2 */ -#define CPUID_LEAF7_FEATURE_ENFSTRG _Bit(9) /* ENhanced Fast STRinG copy */ #define CPUID_LEAF7_FEATURE_INVPCID _Bit(10) /* INVPCID intruction, TDB */ #define CPUID_LEAF7_FEATURE_RTM _Bit(11) /* TBD */ @@ -161,24 +161,25 @@ #define CPUID_MWAIT_EXTENSION _Bit(0) /* enumeration of WMAIT extensions */ #define CPUID_MWAIT_BREAK _Bit(1) /* interrupts are break events */ -#define CPUID_MODEL_YONAH 0x0E -#define CPUID_MODEL_MEROM 0x0F -#define CPUID_MODEL_PENRYN 0x17 -#define CPUID_MODEL_NEHALEM 0x1A -#define CPUID_MODEL_FIELDS 0x1E /* Lynnfield, Clarksfield, Jasper */ -#define CPUID_MODEL_DALES 0x1F /* Havendale, Auburndale */ -#define CPUID_MODEL_NEHALEM_EX 0x2E -#define CPUID_MODEL_DALES_32NM 0x25 /* Clarkdale, Arrandale */ -#define CPUID_MODEL_WESTMERE 0x2C /* Gulftown, Westmere-EP, Westmere-WS */ -#define CPUID_MODEL_WESTMERE_EX 0x2F -#define CPUID_MODEL_SANDYBRIDGE 0x2A -#define CPUID_MODEL_JAKETOWN 0x2D -#define CPUID_MODEL_IVYBRIDGE 0x3A -#define CPUID_MODEL_HASWELL 0x3C -#define CPUID_MODEL_HASWELL_SVR 0x3F -#define CPUID_MODEL_HASWELL_ULT 0x45 -#define CPUID_MODEL_CRYSTALWELL 0x46 - +#define CPUID_MODEL_YONAH 0x0E +#define CPUID_MODEL_MEROM 0x0F +#define CPUID_MODEL_PENRYN 0x17 +#define CPUID_MODEL_NEHALEM 0x1A +#define CPUID_MODEL_FIELDS 0x1E /* Lynnfield, Clarksfield */ +#define CPUID_MODEL_DALES 0x1F /* Havendale, Auburndale */ +#define CPUID_MODEL_NEHALEM_EX 0x2E +#define CPUID_MODEL_DALES_32NM 0x25 /* Clarkdale, Arrandale */ +#define CPUID_MODEL_WESTMERE 0x2C /* Gulftown, Westmere-EP/-WS */ +#define CPUID_MODEL_WESTMERE_EX 0x2F +#define CPUID_MODEL_SANDYBRIDGE 0x2A +#define CPUID_MODEL_JAKETOWN 0x2D +#define CPUID_MODEL_IVYBRIDGE 0x3A +#ifdef PRIVATE +#define CPUID_MODEL_CRYSTALWELL 0x46 +#endif +#define CPUID_MODEL_HASWELL 0x3C +#define CPUID_MODEL_HASWELL_SVR 0x3F +#define CPUID_MODEL_HASWELL_ULT 0x45 #define CPUID_VMM_FAMILY_UNKNOWN 0x0 #define CPUID_VMM_FAMILY_VMWARE 0x1 @@ -389,9 +390,7 @@ extern uint64_t cpuid_leaf7_features(void); extern uint32_t cpuid_family(void); extern uint32_t cpuid_cpufamily(void); -extern void cpuid_get_info(i386_cpu_info_t *info_p); extern i386_cpu_info_t *cpuid_info(void); - extern void cpuid_set_info(void); #ifdef MACH_KERNEL_PRIVATE diff --git a/osfmk/i386/cswitch.s b/osfmk/i386/cswitch.s deleted file mode 100644 index 6651e5404..000000000 --- a/osfmk/i386/cswitch.s +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991,1990 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - */ - -#include - -#include -#include -#include -#include - - -#define CX(addr, reg) addr(,reg,4) - - .text -/* - * Context switch routines for i386. - */ - -Entry(Load_context) - movl S_ARG0,%ecx /* get thread */ - movl TH_KERNEL_STACK(%ecx),%ecx /* get kernel stack */ - lea -IKS_SIZE(%ecx),%edx - add EXT(kernel_stack_size),%edx /* point to stack top */ - movl %ecx,%gs:CPU_ACTIVE_STACK /* store stack address */ - movl %edx,%gs:CPU_KERNEL_STACK /* store stack top */ - - movl %edx,%esp - movl %edx,%ebp - - subl $12, %esp /* align stack */ - xorl %eax,%eax /* return zero (no old thread) */ - pushl %eax - call EXT(thread_continue) - -/* - * This has to save registers only - * when there is no explicit continuation. - */ - -Entry(Switch_context) - popl %eax /* pop return PC */ - - /* Test for a continuation and skip all state saving if so... */ - cmpl $0,4(%esp) - jne 5f - movl %gs:CPU_KERNEL_STACK,%ecx /* get old kernel stack top */ - movl %ebx,KSS_EBX(%ecx) /* save registers */ - movl %ebp,KSS_EBP(%ecx) - movl %edi,KSS_EDI(%ecx) - movl %esi,KSS_ESI(%ecx) - movl %eax,KSS_EIP(%ecx) /* save return PC */ - movl %esp,KSS_ESP(%ecx) /* save SP */ -5: - movl 0(%esp),%eax /* return old thread */ - movl 8(%esp),%ecx /* get new thread */ - movl %ecx,%gs:CPU_ACTIVE_THREAD /* new thread is active */ - movl TH_KERNEL_STACK(%ecx),%ebx /* get its kernel stack */ - lea -IKS_SIZE(%ebx),%ecx - add EXT(kernel_stack_size),%ecx - /* point to stack top */ - - movl %ebx,%gs:CPU_ACTIVE_STACK /* set current stack */ - movl %ecx,%gs:CPU_KERNEL_STACK /* set stack top */ - - - movl KSS_ESP(%ecx),%esp /* switch stacks */ - movl KSS_ESI(%ecx),%esi /* restore registers */ - movl KSS_EDI(%ecx),%edi - movl KSS_EBP(%ecx),%ebp - movl KSS_EBX(%ecx),%ebx - jmp *KSS_EIP(%ecx) /* return old thread */ - -Entry(Thread_continue) - subl $12, %esp /* align stack */ - pushl %eax /* push the thread argument */ - xorl %ebp,%ebp /* zero frame pointer */ - call *%ebx /* call real continuation */ - -/* - * thread_t Shutdown_context(thread_t thread, - * void (*routine)(processor_t), - * processor_t processor) - * - * saves the kernel context of the thread, - * switches to the interrupt stack, - * continues the thread (with thread_continue), - * then runs routine on the interrupt stack. - * - */ -Entry(Shutdown_context) - movl %gs:CPU_KERNEL_STACK,%ecx /* get old kernel stack */ - movl %ebx,KSS_EBX(%ecx) /* save registers */ - movl %ebp,KSS_EBP(%ecx) - movl %edi,KSS_EDI(%ecx) - movl %esi,KSS_ESI(%ecx) - popl KSS_EIP(%ecx) /* save return PC */ - movl %esp,KSS_ESP(%ecx) /* save SP */ - - movl %gs:CPU_ACTIVE_STACK,%ecx /* get old stack */ - movl 0(%esp),%eax /* get old thread */ - movl %ecx,TH_KERNEL_STACK(%eax) /* save old stack */ - movl 4(%esp),%ebx /* get routine to run next */ - movl 8(%esp),%esi /* get its argument */ - - movl %gs:CPU_INT_STACK_TOP,%esp /* switch to interrupt stack */ - - subl $12, %esp /* align stack */ - pushl %esi /* push argument */ - call *%ebx /* call routine to run */ - hlt /* (should never return) */ diff --git a/osfmk/i386/endian.h b/osfmk/i386/endian.h index 62dedd9e9..248519588 100644 --- a/osfmk/i386/endian.h +++ b/osfmk/i386/endian.h @@ -55,9 +55,7 @@ static __inline__ unsigned short ntohs(unsigned short w_int) { - register unsigned short w = w_int; - __asm__ volatile("xchgb %h1,%b1" : "=q" (w) : "0" (w)); - return (w); /* zero-extend for compat */ + return ((w_int << 8) | (w_int >> 8)); } #endif @@ -72,9 +70,13 @@ static __inline__ unsigned long ntohl(register unsigned long value) { +#if defined(__clang__) + return (unsigned long)__builtin_bswap32((unsigned int)value); +#else register unsigned long l = value; __asm__ volatile("bswap %0" : "=r" (l) : "0" (l)); return l; +#endif } #endif diff --git a/osfmk/i386/etimer.c b/osfmk/i386/etimer.c deleted file mode 100644 index 3e03db1e9..000000000 --- a/osfmk/i386/etimer.c +++ /dev/null @@ -1,309 +0,0 @@ -/* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * @APPLE_FREE_COPYRIGHT@ - */ -/* - * File: etimer.c - * Purpose: Routines for handling the machine independent - * event timer. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include - -/* - * Event timer interrupt. - * - * XXX a drawback of this implementation is that events serviced earlier must not set deadlines - * that occur before the entire chain completes. - * - * XXX a better implementation would use a set of generic callouts and iterate over them - */ -void -etimer_intr(int user_mode, - uint64_t rip) -{ - uint64_t abstime; - rtclock_timer_t *mytimer; - cpu_data_t *pp; - int32_t latency; - uint64_t pmdeadline; - - pp = current_cpu_datap(); - - SCHED_STATS_TIMER_POP(current_processor()); - - abstime = mach_absolute_time(); /* Get the time now */ - - /* has a pending clock timer expired? */ - mytimer = &pp->rtclock_timer; /* Point to the event timer */ - if (mytimer->deadline <= abstime) { - /* - * Log interrupt service latency (-ve value expected by tool) - * a non-PM event is expected next. - * The requested deadline may be earlier than when it was set - * - use MAX to avoid reporting bogus latencies. - */ - latency = (int32_t) (abstime - MAX(mytimer->deadline, - mytimer->when_set)); - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, - DECR_TRAP_LATENCY | DBG_FUNC_NONE, - -latency, - ((user_mode != 0) ? rip : VM_KERNEL_UNSLIDE(rip)), - user_mode, 0, 0); - - mytimer->has_expired = TRUE; /* Remember that we popped */ - mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime); - mytimer->has_expired = FALSE; - - /* Get the time again since we ran a bit */ - abstime = mach_absolute_time(); - mytimer->when_set = abstime; - } - - /* is it time for power management state change? */ - if ((pmdeadline = pmCPUGetDeadline(pp)) && (pmdeadline <= abstime)) { - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, - DECR_PM_DEADLINE | DBG_FUNC_START, - 0, 0, 0, 0, 0); - pmCPUDeadline(pp); - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, - DECR_PM_DEADLINE | DBG_FUNC_END, - 0, 0, 0, 0, 0); - } - - /* schedule our next deadline */ - etimer_resync_deadlines(); -} - -/* - * Set the clock deadline. - */ -void etimer_set_deadline(uint64_t deadline) -{ - rtclock_timer_t *mytimer; - spl_t s; - cpu_data_t *pp; - - s = splclock(); /* no interruptions */ - pp = current_cpu_datap(); - - mytimer = &pp->rtclock_timer; /* Point to the timer itself */ - mytimer->deadline = deadline; /* Set new expiration time */ - mytimer->when_set = mach_absolute_time(); - - etimer_resync_deadlines(); - - splx(s); -} - -/* - * Re-evaluate the outstanding deadlines and select the most proximate. - * - * Should be called at splclock. - */ -void -etimer_resync_deadlines(void) -{ - uint64_t deadline = EndOfAllTime; - uint64_t pmdeadline; - rtclock_timer_t *mytimer; - spl_t s = splclock(); - cpu_data_t *pp; - uint32_t decr; - - pp = current_cpu_datap(); - if (!pp->cpu_running) - /* There's really nothing to do if this procesor is down */ - return; - - /* - * If we have a clock timer set, pick that. - */ - mytimer = &pp->rtclock_timer; - if (!mytimer->has_expired && - 0 < mytimer->deadline && mytimer->deadline < EndOfAllTime) - deadline = mytimer->deadline; - - /* - * If we have a power management deadline, see if that's earlier. - */ - pmdeadline = pmCPUGetDeadline(pp); - if (0 < pmdeadline && pmdeadline < deadline) - deadline = pmdeadline; - - /* - * Go and set the "pop" event. - */ - decr = (uint32_t) setPop(deadline); - - /* Record non-PM deadline for latency tool */ - if (deadline != pmdeadline) { - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, - DECR_SET_DEADLINE | DBG_FUNC_NONE, - decr, 2, - deadline, (uint32_t)(deadline >> 32), 0); - } - splx(s); -} - -void etimer_timer_expire(void *arg); - -void -etimer_timer_expire( -__unused void *arg) -{ - rtclock_timer_t *mytimer; - uint64_t abstime; - cpu_data_t *pp; - - pp = current_cpu_datap(); - - mytimer = &pp->rtclock_timer; - abstime = mach_absolute_time(); - - mytimer->has_expired = TRUE; - mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime); - mytimer->has_expired = FALSE; - mytimer->when_set = mach_absolute_time(); - - etimer_resync_deadlines(); -} - -uint64_t -timer_call_slop( - uint64_t deadline) -{ - uint64_t now = mach_absolute_time(); - if (deadline > now) { - return MIN((deadline - now) >> 3, NSEC_PER_MSEC); /* Min of 12.5% and 1ms */ - } - - return 0; -} - -mpqueue_head_t * -timer_queue_assign( - uint64_t deadline) -{ - cpu_data_t *cdp = current_cpu_datap(); - mpqueue_head_t *queue; - - if (cdp->cpu_running) { - queue = &cdp->rtclock_timer.queue; - - if (deadline < cdp->rtclock_timer.deadline) - etimer_set_deadline(deadline); - } - else - queue = &cpu_datap(master_cpu)->rtclock_timer.queue; - - return (queue); -} - -void -timer_queue_cancel( - mpqueue_head_t *queue, - uint64_t deadline, - uint64_t new_deadline) -{ - if (queue == ¤t_cpu_datap()->rtclock_timer.queue) { - if (deadline < new_deadline) - etimer_set_deadline(new_deadline); - } -} - -/* - * etimer_queue_migrate() is called from the Power-Management kext - * when a logical processor goes idle (in a deep C-state) with a distant - * deadline so that it's timer queue can be moved to another processor. - * This target processor should be the least idle (most busy) -- - * currently this is the primary processor for the calling thread's package. - * Locking restrictions demand that the target cpu must be the boot cpu. - */ -uint32_t -etimer_queue_migrate(int target_cpu) -{ - cpu_data_t *target_cdp = cpu_datap(target_cpu); - cpu_data_t *cdp = current_cpu_datap(); - int ntimers_moved; - - assert(!ml_get_interrupts_enabled()); - assert(target_cpu != cdp->cpu_number); - assert(target_cpu == master_cpu); - - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, - DECR_TIMER_MIGRATE | DBG_FUNC_START, - target_cpu, - cdp->rtclock_timer.deadline, (cdp->rtclock_timer.deadline >>32), - 0, 0); - - /* - * Move timer requests from the local queue to the target processor's. - * The return value is the number of requests moved. If this is 0, - * it indicates that the first (i.e. earliest) timer is earlier than - * the earliest for the target processor. Since this would force a - * resync, the move of this and all later requests is aborted. - */ - ntimers_moved = timer_queue_migrate(&cdp->rtclock_timer.queue, - &target_cdp->rtclock_timer.queue); - - /* - * Assuming we moved stuff, clear local deadline. - */ - if (ntimers_moved > 0) { - cdp->rtclock_timer.deadline = EndOfAllTime; - setPop(EndOfAllTime); - } - - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, - DECR_TIMER_MIGRATE | DBG_FUNC_END, - target_cpu, ntimers_moved, 0, 0, 0); - - return ntimers_moved; -} diff --git a/osfmk/i386/fpu.c b/osfmk/i386/fpu.c index 879851b8a..d7b7056b2 100644 --- a/osfmk/i386/fpu.c +++ b/osfmk/i386/fpu.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -334,54 +334,15 @@ static void fpu_load_registers(void *fstate) { } #endif /* DEBUG */ -#if defined(__i386__) - if (layout == FXSAVE32) { - /* Restore the compatibility/legacy mode XMM+x87 state */ - fxrstor(ifps); - } - else if (layout == FXSAVE64) { - fxrstor64(ifps); - } - else if (layout == XSAVE32) { - xrstor(ifps); - } - else if (layout == XSAVE64) { - xrstor64(ifps); - } -#elif defined(__x86_64__) if ((layout == XSAVE64) || (layout == XSAVE32)) xrstor(ifps); else fxrstor(ifps); -#endif } static void fpu_store_registers(void *fstate, boolean_t is64) { struct x86_fx_thread_state *ifps = fstate; assert(ALIGNED(ifps, 64)); -#if defined(__i386__) - if (!is64) { - if (fpu_YMM_present) { - xsave(ifps); - ifps->fp_save_layout = XSAVE32; - } - else { - /* save the compatibility/legacy mode XMM+x87 state */ - fxsave(ifps); - ifps->fp_save_layout = FXSAVE32; - } - } - else { - if (fpu_YMM_present) { - xsave64(ifps); - ifps->fp_save_layout = XSAVE64; - } - else { - fxsave64(ifps); - ifps->fp_save_layout = FXSAVE64; - } - } -#elif defined(__x86_64__) if (fpu_YMM_present) { xsave(ifps); ifps->fp_save_layout = is64 ? XSAVE64 : XSAVE32; @@ -390,7 +351,6 @@ static void fpu_store_registers(void *fstate, boolean_t is64) { fxsave(ifps); ifps->fp_save_layout = is64 ? FXSAVE64 : FXSAVE32; } -#endif } /* @@ -957,7 +917,12 @@ fp_load( struct x86_fx_thread_state *ifps = pcb->ifps; assert(ifps); - assert(ifps->fp_valid == FALSE || ifps->fp_valid == TRUE); +#if DEBUG + if (ifps->fp_valid != FALSE && ifps->fp_valid != TRUE) { + panic("fp_load() invalid fp_valid: %u, fp_save_layout: %u\n", + ifps->fp_valid, ifps->fp_save_layout); + } +#endif if (ifps->fp_valid == FALSE) { fpinit(); diff --git a/osfmk/i386/fpu.h b/osfmk/i386/fpu.h index 5b0658f60..12a5082b6 100644 --- a/osfmk/i386/fpu.h +++ b/osfmk/i386/fpu.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -99,10 +99,6 @@ extern void fpexterrflt(void); extern void fpSSEexterrflt(void); extern void fpflush(thread_t); extern void fp_setvalid(boolean_t); -#ifdef __i386__ -extern void fxsave64(struct x86_fx_thread_state *); -extern void fxrstor64(struct x86_fx_thread_state *); -#endif extern void clear_fpu(void); extern void fpu_save_context(thread_t thread); diff --git a/osfmk/i386/gdt.c b/osfmk/i386/gdt.c index ae40e4f01..04937648b 100644 --- a/osfmk/i386/gdt.c +++ b/osfmk/i386/gdt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -63,12 +63,8 @@ #include struct real_descriptor master_gdt[GDTSZ] -#if __x86_64__ __attribute__((section("__HIB,__desc"))) -#else - __attribute__((section("__INITGDT,__DATA"))) -#endif - __attribute__((aligned(CPU_CACHE_SIZE))) = { + __attribute__((aligned(PAGE_SIZE))) = { [SEL_TO_INDEX(KERNEL32_CS)] = MAKE_REAL_DESCRIPTOR( /* kernel 32-bit code */ 0, 0xfffff, @@ -93,7 +89,6 @@ struct real_descriptor master_gdt[GDTSZ] SZ_32|SZ_G, ACC_P|ACC_PL_K|ACC_DATA_W ), -#ifdef __x86_64__ [SEL_TO_INDEX(USER_CS)] = MAKE_REAL_DESCRIPTOR( /* 32-bit user code segment */ 0, 0xfffff, @@ -112,5 +107,4 @@ struct real_descriptor master_gdt[GDTSZ] SZ_64|SZ_G, ACC_P|ACC_PL_U|ACC_CODE_R ), -#endif }; diff --git a/osfmk/i386/genassym.c b/osfmk/i386/genassym.c index 8595b8bb0..e575adf97 100644 --- a/osfmk/i386/genassym.c +++ b/osfmk/i386/genassym.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -145,16 +145,6 @@ main( DECLARE("MUTEX_OWNER", offsetof(lck_mtx_t *, lck_mtx_owner)); DECLARE("MUTEX_PTR", offsetof(lck_mtx_t *, lck_mtx_ptr)); DECLARE("MUTEX_STATE", offsetof(lck_mtx_t *, lck_mtx_state)); -#ifdef __i386__ - DECLARE("MUTEX_TYPE", offsetof(lck_mtx_ext_t *, lck_mtx_deb.type)); - DECLARE("MUTEX_PC", offsetof(lck_mtx_ext_t *, lck_mtx_deb.pc)); - DECLARE("MUTEX_ATTR", offsetof(lck_mtx_ext_t *, lck_mtx_attr)); - DECLARE("MUTEX_ATTR_DEBUG", LCK_MTX_ATTR_DEBUG); - DECLARE("MUTEX_ATTR_DEBUGb", LCK_MTX_ATTR_DEBUGb); - DECLARE("MUTEX_ATTR_STAT", LCK_MTX_ATTR_STAT); - DECLARE("MUTEX_ATTR_STATb", LCK_MTX_ATTR_STATb); - DECLARE("MUTEX_TAG", MUTEX_TAG); -#endif DECLARE("MUTEX_IND", LCK_MTX_TAG_INDIRECT); DECLARE("MUTEX_PTR", offsetof(lck_mtx_t *, lck_mtx_ptr)); DECLARE("MUTEX_ASSERT_OWNED", LCK_MTX_ASSERT_OWNED); @@ -187,6 +177,7 @@ main( DECLARE("TH_KERNEL_STACK", offsetof(thread_t, kernel_stack)); DECLARE("TH_MUTEX_COUNT", offsetof(thread_t, mutex_count)); DECLARE("TH_WAS_PROMOTED_ON_WAKEUP", offsetof(thread_t, was_promoted_on_wakeup)); + DECLARE("TH_IOTIER_OVERRIDE", offsetof(thread_t, iotier_override)); DECLARE("TH_SYSCALLS_MACH", offsetof(thread_t, syscalls_mach)); DECLARE("TH_SYSCALLS_UNIX", offsetof(thread_t, syscalls_unix)); @@ -205,6 +196,7 @@ main( DECLARE("TH_COPYIO_STATE", offsetof(thread_t, machine.copyio_state)); DECLARE("WINDOWS_CLEAN", WINDOWS_CLEAN); #endif + DECLARE("TH_RWLOCK_COUNT", offsetof(thread_t, rwlock_count)); DECLARE("MAP_PMAP", offsetof(vm_map_t, pmap)); @@ -214,14 +206,6 @@ main( /* * KSS_* are offsets from the top of the kernel stack (cpu_kernel_stack) */ -#if defined(__i386__) - DECLARE("KSS_EBX", offsetof(struct x86_kernel_state *, k_ebx)); - DECLARE("KSS_ESP", offsetof(struct x86_kernel_state *, k_esp)); - DECLARE("KSS_EBP", offsetof(struct x86_kernel_state *, k_ebp)); - DECLARE("KSS_EDI", offsetof(struct x86_kernel_state *, k_edi)); - DECLARE("KSS_ESI", offsetof(struct x86_kernel_state *, k_esi)); - DECLARE("KSS_EIP", offsetof(struct x86_kernel_state *, k_eip)); -#elif defined(__x86_64__) DECLARE("KSS_RBX", offsetof(struct x86_kernel_state *, k_rbx)); DECLARE("KSS_RSP", offsetof(struct x86_kernel_state *, k_rsp)); DECLARE("KSS_RBP", offsetof(struct x86_kernel_state *, k_rbp)); @@ -230,9 +214,6 @@ main( DECLARE("KSS_R14", offsetof(struct x86_kernel_state *, k_r14)); DECLARE("KSS_R15", offsetof(struct x86_kernel_state *, k_r15)); DECLARE("KSS_RIP", offsetof(struct x86_kernel_state *, k_rip)); -#else -#error Unsupported architecture -#endif DECLARE("DS_DR0", offsetof(struct x86_debug_state32 *, dr0)); DECLARE("DS_DR1", offsetof(struct x86_debug_state32 *, dr1)); @@ -324,33 +305,16 @@ main( DECLARE("ISF64_SS", ISF64_(ss)); DECLARE("ISF64_SIZE", sizeof(x86_64_intr_stack_frame_t)); - DECLARE("ISC32_OFFSET", offsetof(x86_saved_state_compat32_t *, isf64)); -#define ISC32_(x) offsetof(x86_saved_state_compat32_t *, isf64.x) - DECLARE("ISC32_TRAPNO", ISC32_(trapno)); - DECLARE("ISC32_TRAPFN", ISC32_(trapfn)); - DECLARE("ISC32_ERR", ISC32_(err)); - DECLARE("ISC32_RIP", ISC32_(rip)); - DECLARE("ISC32_CS", ISC32_(cs)); - DECLARE("ISC32_RFLAGS", ISC32_(rflags)); - DECLARE("ISC32_RSP", ISC32_(rsp)); - DECLARE("ISC32_SS", ISC32_(ss)); - DECLARE("NBPG", I386_PGBYTES); DECLARE("PAGE_SIZE", I386_PGBYTES); DECLARE("PAGE_MASK", I386_PGBYTES-1); DECLARE("PAGE_SHIFT", 12); DECLARE("NKPT", NKPT); -#ifdef __i386__ - DECLARE("KPTDI", KPTDI); -#endif DECLARE("VM_MIN_ADDRESS", VM_MIN_ADDRESS); DECLARE("VM_MAX_ADDRESS", VM_MAX_ADDRESS); DECLARE("KERNELBASE", VM_MIN_KERNEL_ADDRESS); DECLARE("LINEAR_KERNELBASE", LINEAR_KERNEL_ADDRESS); DECLARE("KERNEL_STACK_SIZE", KERNEL_STACK_SIZE); -#ifdef __i386__ - DECLARE("KERNEL_UBER_BASE_HI32", KERNEL_UBER_BASE_HI32); -#endif DECLARE("ASM_COMM_PAGE32_BASE_ADDRESS", _COMM_PAGE32_BASE_ADDRESS); DECLARE("ASM_COMM_PAGE32_START_ADDRESS", _COMM_PAGE32_START_ADDRESS); @@ -366,9 +330,7 @@ main( DECLARE("INTEL_PTE_USER", INTEL_PTE_USER); DECLARE("INTEL_PTE_INVALID", INTEL_PTE_INVALID); DECLARE("NPGPTD", NPGPTD); -#if defined(__x86_64__) DECLARE("KERNEL_PML4_INDEX",KERNEL_PML4_INDEX); -#endif DECLARE("IDTSZ", IDTSZ); DECLARE("GDTSZ", GDTSZ); DECLARE("LDTSZ", LDTSZ); @@ -381,19 +343,10 @@ main( DECLARE("USER64_CS", USER64_CS); DECLARE("KERNEL_TSS", KERNEL_TSS); DECLARE("KERNEL_LDT", KERNEL_LDT); -#ifdef __i386__ - DECLARE("DF_TSS", DF_TSS); - DECLARE("MC_TSS", MC_TSS); - DECLARE("CPU_DATA_GS", CPU_DATA_GS); -#endif /* __i386__ */ DECLARE("SYSENTER_CS", SYSENTER_CS); DECLARE("SYSENTER_TF_CS",SYSENTER_TF_CS); DECLARE("SYSENTER_DS", SYSENTER_DS); DECLARE("SYSCALL_CS", SYSCALL_CS); -#ifdef __i386__ - DECLARE("USER_WINDOW_SEL", USER_WINDOW_SEL); - DECLARE("PHYS_WINDOW_SEL", PHYS_WINDOW_SEL); -#endif DECLARE("CPU_THIS", offsetof(cpu_data_t *, cpu_this)); @@ -436,40 +389,25 @@ main( DECLARE("CPU_INT_EVENT_TIME", offsetof(cpu_data_t *, cpu_int_event_time)); -#ifdef __i386__ - DECLARE("CPU_HI_ISS", - offsetof(cpu_data_t *, cpu_hi_iss)); -#endif DECLARE("CPU_TASK_CR3", offsetof(cpu_data_t *, cpu_task_cr3)); DECLARE("CPU_ACTIVE_CR3", offsetof(cpu_data_t *, cpu_active_cr3)); DECLARE("CPU_KERNEL_CR3", offsetof(cpu_data_t *, cpu_kernel_cr3)); -#ifdef __x86_64__ - DECLARE("CPU_TLB_INVALID", + DECLARE("CPU_TLB_INVALID", offsetof(cpu_data_t *, cpu_tlb_invalid)); -#endif - DECLARE("CPU_IS64BIT", - offsetof(cpu_data_t *, cpu_is64bit)); DECLARE("CPU_TASK_MAP", offsetof(cpu_data_t *, cpu_task_map)); DECLARE("TASK_MAP_32BIT", TASK_MAP_32BIT); DECLARE("TASK_MAP_64BIT", TASK_MAP_64BIT); -#ifdef __i386__ - DECLARE("TASK_MAP_64BIT_SHARED", TASK_MAP_64BIT_SHARED); -#endif DECLARE("CPU_UBER_USER_GS_BASE", offsetof(cpu_data_t *, cpu_uber.cu_user_gs_base)); DECLARE("CPU_UBER_ISF", offsetof(cpu_data_t *, cpu_uber.cu_isf)); DECLARE("CPU_UBER_TMP", offsetof(cpu_data_t *, cpu_uber.cu_tmp)); - DECLARE("CPU_UBER_ARG_STORE", - offsetof(cpu_data_t *, cpu_uber_arg_store)); - DECLARE("CPU_UBER_ARG_STORE_VALID", - offsetof(cpu_data_t *, cpu_uber_arg_store_valid)); DECLARE("CPU_NANOTIME", offsetof(cpu_data_t *, cpu_nanotime)); @@ -478,7 +416,6 @@ main( offsetof(cpu_data_t *, cpu_dr7)); DECLARE("hwIntCnt", offsetof(cpu_data_t *,cpu_hwIntCnt)); -#if defined(__x86_64__) DECLARE("CPU_ACTIVE_PCID", offsetof(cpu_data_t *, cpu_active_pcid)); DECLARE("CPU_PCID_COHERENTP", @@ -502,7 +439,6 @@ main( offsetof(cpu_data_t *, cpu_tlb_invalid_local)); DECLARE("CPU_TLB_INVALID_GLOBAL", offsetof(cpu_data_t *, cpu_tlb_invalid_global)); -#endif /* x86_64 */ DECLARE("enaExpTrace", enaExpTrace); DECLARE("enaUsrFCall", enaUsrFCall); DECLARE("enaUsrPhyMp", enaUsrPhyMp); @@ -521,12 +457,6 @@ main( DECLARE("PDESHIFT", PDESHIFT); DECLARE("PDESIZE", PDESIZE); DECLARE("PTESIZE", PTESIZE); -#ifdef __i386__ - DECLARE("PTDPTDI", PTDPTDI); - DECLARE("APTDPTDI", APTDPTDI); - DECLARE("HIGH_MEM_BASE", HIGH_MEM_BASE); - DECLARE("HIGH_IDT_BASE", pmap_index_to_virt(HIGH_FIXED_IDT)); -#endif DECLARE("KERNELBASEPDE", (LINEAR_KERNEL_ADDRESS >> PDESHIFT) * diff --git a/osfmk/i386/hibernate_i386.c b/osfmk/i386/hibernate_i386.c index 83c33dba5..a16994234 100644 --- a/osfmk/i386/hibernate_i386.c +++ b/osfmk/i386/hibernate_i386.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -64,6 +64,7 @@ hibernate_page_list_allocate(boolean_t log) uint32_t mcount, msize, i; hibernate_bitmap_t dram_ranges[MAX_BANKS]; boot_args * args = (boot_args *) PE_state.bootArgs; + uint32_t non_os_pagecount; mptr = (EfiMemoryRange *)ml_static_ptovirt(args->MemoryMap); if (args->MemoryMapDescriptorSize == 0) @@ -72,6 +73,7 @@ hibernate_page_list_allocate(boolean_t log) mcount = args->MemoryMapSize / msize; num_banks = 0; + non_os_pagecount = 0; for (i = 0; i < mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) { base = (ppnum_t) (mptr->PhysicalStart >> I386_PGSHIFT); @@ -87,13 +89,16 @@ hibernate_page_list_allocate(boolean_t log) switch (mptr->Type) { // any kind of dram + case kEfiACPIMemoryNVS: + case kEfiPalCode: + non_os_pagecount += num; + + // OS used dram case kEfiLoaderCode: case kEfiLoaderData: case kEfiBootServicesCode: case kEfiBootServicesData: case kEfiConventionalMemory: - case kEfiACPIMemoryNVS: - case kEfiPalCode: for (bank = 0; bank < num_banks; bank++) { @@ -171,6 +176,7 @@ hibernate_page_list_allocate(boolean_t log) bank, bitmap->first_page, bitmap->last_page); bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords]; } + if (log) printf("efi pagecount %d\n", non_os_pagecount); return (list); } @@ -193,13 +199,6 @@ hibernate_page_list_set_volatile( hibernate_page_list_t * page_list, { boot_args * args = (boot_args *) PE_state.bootArgs; -#if !defined(x86_64) - hibernate_set_page_state(page_list, page_list_wired, - I386_HIB_PAGETABLE, I386_HIB_PAGETABLE_COUNT, - kIOHibernatePageStateFree); - *pagesOut -= I386_HIB_PAGETABLE_COUNT; -#endif - if (args->efiRuntimeServicesPageStart) { hibernate_set_page_state(page_list, page_list_wired, diff --git a/osfmk/i386/hibernate_restore.c b/osfmk/i386/hibernate_restore.c index ba8704298..278ff4e78 100644 --- a/osfmk/i386/hibernate_restore.c +++ b/osfmk/i386/hibernate_restore.c @@ -43,7 +43,6 @@ hibernate_restore_phys_page(uint64_t src, uint64_t dst, uint32_t len, uint32_t p (void)procFlags; uint64_t * d; uint64_t * s; - uint32_t idx; if (src == 0) return (uintptr_t)dst; @@ -51,8 +50,7 @@ hibernate_restore_phys_page(uint64_t src, uint64_t dst, uint32_t len, uint32_t p d = (uint64_t *)pal_hib_map(DEST_COPY_AREA, dst); s = (uint64_t *) (uintptr_t)src; - for (idx = 0; idx < (len / (uint32_t)sizeof(uint64_t)); idx++) - d[idx] = s[idx]; + memcpy(d, s, len); return (uintptr_t)d; } @@ -74,11 +72,17 @@ pal_hib_map(uintptr_t virt, uint64_t phys) case IMAGE_AREA: case IMAGE2_AREA: break; + default: asm("cli;hlt;"); break; } - + if (phys < IMAGE2_AREA) + { + // first 4Gb is all mapped, + // and do not expect source areas to cross 4Gb + return (phys); + } index = (virt >> I386_LPGSHIFT); virt += (uintptr_t)(phys & I386_LPGMASK); phys = ((phys & ~((uint64_t)I386_LPGMASK)) | INTEL_PTE_PS | INTEL_PTE_VALID | INTEL_PTE_WRITE); diff --git a/osfmk/i386/i386_init.c b/osfmk/i386/i386_init.c index 910194f59..4213489eb 100644 --- a/osfmk/i386/i386_init.c +++ b/osfmk/i386/i386_init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2009 Apple Inc. All rights reserved. + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -99,13 +99,9 @@ #include #include #include /* LcksOpts */ -#ifdef __i386__ -#include -#endif #if DEBUG #include #endif - #if DEBUG #define DBG(x...) kprintf(x) #else @@ -125,12 +121,8 @@ uint64_t physmap_base, physmap_max; pd_entry_t *KPTphys; pd_entry_t *IdlePTD; -#ifdef __i386__ -pd_entry_t *IdlePDPT64; -#else pdpt_entry_t *IdlePDPT; pml4_entry_t *IdlePML4; -#endif char *physfree; @@ -144,9 +136,7 @@ ALLOCPAGES(int npages) uintptr_t tmp = (uintptr_t)physfree; bzero(physfree, npages * PAGE_SIZE); physfree += npages * PAGE_SIZE; -#ifdef __x86_64__ tmp += VM_MIN_KERNEL_ADDRESS & ~LOW_4GB_MASK; -#endif return (void *)tmp; } @@ -163,7 +153,6 @@ fillkpt(pt_entry_t *base, int prot, uintptr_t src, int index, int count) extern pmap_paddr_t first_avail; -#ifdef __x86_64__ int break_kprintf = 0; uint64_t @@ -182,14 +171,9 @@ x86_64_post_sleep(uint64_t new_cr3) set_cr3_raw((uint32_t) new_cr3); } -#endif -#ifdef __i386__ -#define ID_MAP_VTOP(x) x -#endif -#ifdef __x86_64__ // Set up the physical mapping - NPHYSMAP GB of memory mapped at a high address // NPHYSMAP is determined by the maximum supported RAM size plus 4GB to account // the PCI hole (which is less 4GB but not more). @@ -313,41 +297,6 @@ Idle_PTs_init(void) } -#else /* __x86_64__ */ - -static void -Idle_PTs_init(void) -{ - /* Allocate the "idle" kernel page tables: */ - KPTphys = ALLOCPAGES(NKPT); /* level 1 */ - IdlePTD = ALLOCPAGES(NPGPTD); /* level 2 */ - - IdlePDPT64 = ALLOCPAGES(1); - - // Recursive mapping of PTEs - fillkpt(IdlePTD, INTEL_PTE_WRITE, (uintptr_t)IdlePTD, PTDPTDI, NPGPTD); - // commpage - fillkpt(IdlePTD, INTEL_PTE_WRITE|INTEL_PTE_USER, (uintptr_t)ALLOCPAGES(1), _COMM_PAGE32_BASE_ADDRESS >> PDESHIFT,1); - - // Fill the lowest level with everything up to physfree - fillkpt(KPTphys, - INTEL_PTE_WRITE, 0, 0, (int)(((uintptr_t)physfree) >> PAGE_SHIFT)); - - // Rewrite the 2nd-lowest level to point to pages of KPTphys. - // This was previously filled statically by idle_pt.c, and thus - // must be done after the KPTphys fill since IdlePTD is in use - fillkpt(IdlePTD, - INTEL_PTE_WRITE, (uintptr_t)ID_MAP_VTOP(KPTphys), 0, NKPT); - - // IdlePDPT entries - fillkpt(IdlePDPT, 0, (uintptr_t)IdlePTD, 0, NPGPTD); - - postcode(VSTART_SET_CR3); - - // Flush the TLB now we're done rewriting the page tables.. - set_cr3_raw(get_cr3_raw()); -} -#endif /* * vstart() is called in the natural mode (64bit for K64, 32 for K32) @@ -417,25 +366,17 @@ vstart(vm_offset_t boot_args_start) PE_init_platform(FALSE, kernelBootArgs); postcode(PE_INIT_PLATFORM_D); } else { -#ifdef __x86_64__ /* Switch to kernel's page tables (from the Boot PTs) */ set_cr3_raw((uintptr_t)ID_MAP_VTOP(IdlePML4)); -#endif /* Find our logical cpu number */ cpu = lapic_to_cpu[(LAPIC_READ(ID)>>LAPIC_ID_SHIFT) & LAPIC_ID_MASK]; DBG("CPU: %d, GSBASE initial value: 0x%llx\n", cpu, rdmsr64(MSR_IA32_GS_BASE)); } postcode(VSTART_CPU_DESC_INIT); -#ifdef __x86_64__ if(is_boot_cpu) cpu_desc_init64(cpu_datap(cpu)); cpu_desc_load64(cpu_datap(cpu)); -#else - if(is_boot_cpu) - cpu_desc_init(cpu_datap(cpu)); - cpu_desc_load(cpu_datap(cpu)); -#endif postcode(VSTART_CPU_MODE_INIT); if (is_boot_cpu) cpu_mode_init(current_cpu_datap()); /* cpu_mode_init() will be @@ -443,22 +384,9 @@ vstart(vm_offset_t boot_args_start) * via i386_init_slave() */ postcode(VSTART_EXIT); -#ifdef __i386__ - if (cpuid_extfeatures() & CPUID_EXTFEATURE_XD) { - wrmsr64(MSR_IA32_EFER, rdmsr64(MSR_IA32_EFER) | MSR_IA32_EFER_NXE); - DBG("vstart() NX/XD enabled, i386\n"); - } - - if (is_boot_cpu) - i386_init(); - else - i386_init_slave(); - /*NOTREACHED*/ -#else x86_init_wrapper(is_boot_cpu ? (uintptr_t) i386_init : (uintptr_t) i386_init_slave, cpu_datap(cpu)->cpu_int_stack_top); -#endif } /* @@ -483,9 +411,6 @@ i386_init(void) mca_cpu_init(); #endif - - kernel_early_bootstrap(); - master_cpu = 0; cpu_init(); @@ -497,6 +422,8 @@ i386_init(void) /* setup debugging output if one has been chosen */ PE_init_kprintf(FALSE); + kernel_early_bootstrap(); + if (!PE_parse_boot_argn("diag", &dgWork.dgFlags, sizeof (dgWork.dgFlags))) dgWork.dgFlags = 0; @@ -544,24 +471,6 @@ i386_init(void) &urgency_notification_assert_abstime_threshold, sizeof(urgency_notification_assert_abstime_threshold)); -#if CONFIG_YONAH - /* - * At this point we check whether we are a 64-bit processor - * and that we're not restricted to legacy mode, 32-bit operation. - */ - if (cpuid_extfeatures() & CPUID_EXTFEATURE_EM64T) { - boolean_t legacy_mode; - kprintf("EM64T supported"); - if (PE_parse_boot_argn("-legacy", &legacy_mode, sizeof (legacy_mode))) { - kprintf(" but legacy mode forced\n"); - IA32e = FALSE; - } else { - kprintf(" and will be enabled\n"); - } - } else - IA32e = FALSE; -#endif - if (!(cpuid_extfeatures() & CPUID_EXTFEATURE_XD)) nx_enabled = 0; @@ -634,8 +543,7 @@ do_init_slave(boolean_t fast_restart) cpu_thread_init(); /* not strictly necessary */ - cpu_init(); /* Sets cpu_running which starter cpu waits for */ - + cpu_init(); /* Sets cpu_running which starter cpu waits for */ slave_main(init_param); panic("do_init_slave() returned from slave_main()"); diff --git a/osfmk/i386/i386_lock.s b/osfmk/i386/i386_lock.s index 6b34073c6..70d117605 100644 --- a/osfmk/i386/i386_lock.s +++ b/osfmk/i386/i386_lock.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -118,19 +118,11 @@ /* For x86_64, the varargs ABI requires that %al indicate * how many SSE register contain arguments. In our case, 0 */ -#if __i386__ -#define ALIGN_STACK() subl $8, %esp; andl $0xFFFFFFF0, %esp ; -#define LOAD_STRING_ARG0(label) movl $##label, (%esp) ; -#define LOAD_ARG1(x) mov x, 4(%esp) ; -#define LOAD_PTR_ARG1(x) mov x, 4(%esp) ; -#define CALL_PANIC() call EXT(panic) ; -#else #define ALIGN_STACK() and $0xFFFFFFFFFFFFFFF0, %rsp ; #define LOAD_STRING_ARG0(label) leaq label(%rip), %rdi ; #define LOAD_ARG1(x) mov x, %esi ; #define LOAD_PTR_ARG1(x) mov x, %rsi ; #define CALL_PANIC() xorb %al,%al ; call EXT(panic) ; -#endif #define CHECK_UNLOCK(current, owner) \ cmp current, owner ; \ @@ -262,39 +254,6 @@ * a "nop" */ -#if defined(__i386__) - -#define LOCKSTAT_LABEL(lab) \ - .data ;\ - .globl lab ;\ - lab: ;\ - .long 9f ;\ - .text ;\ - 9: - -#define LOCKSTAT_RECORD(id, lck) \ - push %ebp ; \ - mov %esp,%ebp ; \ - sub $0x38,%esp /* size of dtrace_probe args */ ; \ - movl _lockstat_probemap + (id * 4),%eax ; \ - test %eax,%eax ; \ - je 9f ; \ - movl $0,36(%esp) ; \ - movl $0,40(%esp) ; \ - movl $0,28(%esp) ; \ - movl $0,32(%esp) ; \ - movl $0,20(%esp) ; \ - movl $0,24(%esp) ; \ - movl $0,12(%esp) ; \ - movl $0,16(%esp) ; \ - movl lck,4(%esp) /* copy lock pointer to arg 1 */ ; \ - movl $0,8(%esp) ; \ - movl %eax,(%esp) ; \ - call *_lockstat_probe ; \ -9: leave - /* ret - left to subsequent code, e.g. return values */ - -#elif defined(__x86_64__) #define LOCKSTAT_LABEL(lab) \ .data ;\ .globl lab ;\ @@ -318,9 +277,7 @@ call *_lockstat_probe(%rip) ; \ 9: leave /* ret - left to subsequent code, e.g. return values */ -#else -#error Unsupported architecture -#endif + #endif /* CONFIG_DTRACE */ /* @@ -329,23 +286,12 @@ * word is loaded/stored to the pointer */ -#if defined(__i386__) -#define HW_LOCK_REGISTER %edx -#define LOAD_HW_LOCK_REGISTER mov L_ARG0, HW_LOCK_REGISTER -#define HW_LOCK_THREAD_REGISTER %ecx -#define LOAD_HW_LOCK_THREAD_REGISTER mov %gs:CPU_ACTIVE_THREAD, HW_LOCK_THREAD_REGISTER -#define HW_LOCK_MOV_WORD movl -#define HW_LOCK_EXAM_REGISTER %eax -#elif defined(__x86_64__) #define HW_LOCK_REGISTER %rdi #define LOAD_HW_LOCK_REGISTER #define HW_LOCK_THREAD_REGISTER %rcx #define LOAD_HW_LOCK_THREAD_REGISTER mov %gs:CPU_ACTIVE_THREAD, HW_LOCK_THREAD_REGISTER #define HW_LOCK_MOV_WORD movq #define HW_LOCK_EXAM_REGISTER %rax -#else -#error Unsupported architecture -#endif /* * void hw_lock_init(hw_lock_t) @@ -445,65 +391,29 @@ LEAF_ENTRY(hw_lock_to) * and then spin re-checking the lock but pausing * every so many (INNER_LOOP_COUNT) spins to check for timeout. */ -#if __i386__ - movl L_ARG1,%ecx /* fetch timeout */ - push %edi - push %ebx - mov %edx,%edi - - lfence - rdtsc /* read cyclecount into %edx:%eax */ - addl %ecx,%eax /* fetch and timeout */ - adcl $0,%edx /* add carry */ - mov %edx,%ecx - mov %eax,%ebx /* %ecx:%ebx is the timeout expiry */ - mov %edi, %edx /* load lock back into %edx */ -#else push %r9 lfence rdtsc /* read cyclecount into %edx:%eax */ shlq $32, %rdx orq %rdx, %rax /* load 64-bit quantity into %rax */ addq %rax, %rsi /* %rsi is the timeout expiry */ -#endif 4: /* * The inner-loop spin to look for the lock being freed. */ -#if __i386__ - mov $(INNER_LOOP_COUNT),%edi -#else mov $(INNER_LOOP_COUNT),%r9 -#endif 5: PAUSE /* pause for hyper-threading */ mov (HW_LOCK_REGISTER),HW_LOCK_EXAM_REGISTER /* spin checking lock value in cache */ test HW_LOCK_EXAM_REGISTER,HW_LOCK_EXAM_REGISTER je 6f /* zero => unlocked, try to grab it */ -#if __i386__ - decl %edi /* decrement inner loop count */ -#else decq %r9 /* decrement inner loop count */ -#endif jnz 5b /* time to check for timeout? */ /* * Here after spinning INNER_LOOP_COUNT times, check for timeout */ -#if __i386__ - mov %edx,%edi /* Save %edx */ - lfence - rdtsc /* cyclecount into %edx:%eax */ - xchg %edx,%edi /* cyclecount into %edi:%eax */ - cmpl %ecx,%edi /* compare high-order 32-bits */ - jb 4b /* continue spinning if less, or */ - cmpl %ebx,%eax /* compare low-order 32-bits */ - jb 4b /* continue if less, else bail */ - xor %eax,%eax /* with 0 return value */ - pop %ebx - pop %edi -#else lfence rdtsc /* cyclecount into %edx:%eax */ shlq $32, %rdx @@ -512,7 +422,6 @@ LEAF_ENTRY(hw_lock_to) jb 4b /* continue spinning if less, or */ xor %rax,%rax /* with 0 return value */ pop %r9 -#endif LEAF_RET 6: @@ -524,12 +433,7 @@ LEAF_ENTRY(hw_lock_to) lock; cmpxchg HW_LOCK_THREAD_REGISTER,(HW_LOCK_REGISTER) /* try to acquire the HW lock */ jne 4b /* no - spin again */ movl $1,%eax /* yes */ -#if __i386__ - pop %ebx - pop %edi -#else pop %r9 -#endif LEAF_RET /* @@ -619,19 +523,10 @@ LEAF_ENTRY(hw_lock_held) * register and examined */ -#if defined(__i386__) -#define LCK_RW_REGISTER %edx -#define LOAD_LCK_RW_REGISTER mov S_ARG0, LCK_RW_REGISTER -#define LCK_RW_FLAGS_REGISTER %eax -#define LOAD_LCK_RW_FLAGS_REGISTER mov (LCK_RW_REGISTER), LCK_RW_FLAGS_REGISTER -#elif defined(__x86_64__) #define LCK_RW_REGISTER %rdi #define LOAD_LCK_RW_REGISTER #define LCK_RW_FLAGS_REGISTER %eax #define LOAD_LCK_RW_FLAGS_REGISTER mov (LCK_RW_REGISTER), LCK_RW_FLAGS_REGISTER -#else -#error Unsupported architecture -#endif #define RW_LOCK_SHARED_MASK (LCK_RW_INTERLOCK | LCK_RW_WANT_UPGRADE | LCK_RW_WANT_WRITE) /* @@ -639,6 +534,8 @@ LEAF_ENTRY(hw_lock_held) * */ Entry(lck_rw_lock_shared) + mov %gs:CPU_ACTIVE_THREAD, %rcx /* Load thread pointer */ + incl TH_RWLOCK_COUNT(%rcx) /* Increment count before atomic CAS */ LOAD_LCK_RW_REGISTER 1: LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield and interlock */ @@ -693,6 +590,10 @@ Entry(lck_rw_try_lock_shared) cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ jne 2f + mov %gs:CPU_ACTIVE_THREAD, %rcx /* Load thread pointer */ + incl TH_RWLOCK_COUNT(%rcx) /* Increment count on success. */ + /* There is a 3 instr window where preemption may not notice rwlock_count after cmpxchg */ + #if CONFIG_DTRACE movl $1, %eax /* @@ -702,8 +603,8 @@ Entry(lck_rw_try_lock_shared) */ LOCKSTAT_LABEL(_lck_rw_try_lock_shared_lockstat_patch_point) ret - /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER */ - LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER) + /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER */ + LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, LCK_RW_REGISTER) #endif movl $1, %eax /* return TRUE */ ret @@ -758,6 +659,8 @@ Entry(lck_rw_grab_shared) * */ Entry(lck_rw_lock_exclusive) + mov %gs:CPU_ACTIVE_THREAD, %rcx /* Load thread pointer */ + incl TH_RWLOCK_COUNT(%rcx) /* Increment count before atomic CAS */ LOAD_LCK_RW_REGISTER 1: LOAD_LCK_RW_FLAGS_REGISTER /* Load state bitfield, interlock and shared count */ @@ -778,8 +681,8 @@ Entry(lck_rw_lock_exclusive) */ LOCKSTAT_LABEL(_lck_rw_lock_exclusive_lockstat_patch_point) ret - /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER */ - LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, LCK_RW_REGISTER) + /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER */ + LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, LCK_RW_REGISTER) #endif ret 2: @@ -813,6 +716,10 @@ Entry(lck_rw_try_lock_exclusive) cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ jne 2f + mov %gs:CPU_ACTIVE_THREAD, %rcx /* Load thread pointer */ + incl TH_RWLOCK_COUNT(%rcx) /* Increment count on success. */ + /* There is a 3 instr window where preemption may not notice rwlock_count after cmpxchg */ + #if CONFIG_DTRACE movl $1, %eax /* @@ -822,8 +729,8 @@ Entry(lck_rw_try_lock_exclusive) */ LOCKSTAT_LABEL(_lck_rw_try_lock_exclusive_lockstat_patch_point) ret - /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER */ - LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, LCK_RW_REGISTER) + /* Fall thru when patched, counting on lock pointer in LCK_RW_REGISTER */ + LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, LCK_RW_REGISTER) #endif movl $1, %eax /* return TRUE */ ret @@ -900,16 +807,9 @@ Entry(lck_rw_lock_shared_to_exclusive) cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ jne 7f -#if __i386__ - pushl %eax /* go check to see if we need to */ - push %edx /* wakeup anyone */ - call EXT(lck_rw_lock_shared_to_exclusive_failure) - addl $8, %esp -#else mov %eax, %esi /* put old flags as second arg */ /* lock is alread in %rdi */ call EXT(lck_rw_lock_shared_to_exclusive_failure) -#endif ret /* and pass the failure return along */ 7: PAUSE @@ -972,16 +872,9 @@ Entry(lck_rw_done) cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ jne 7f -#if __i386__ - pushl %eax - push %edx - call EXT(lck_rw_done_gen) - addl $8, %esp -#else mov %eax,%esi /* old flags in %rsi */ /* lock is in %rdi already */ call EXT(lck_rw_done_gen) -#endif ret 7: PAUSE @@ -1032,15 +925,8 @@ Entry(lck_rw_lock_exclusive_to_shared) cmpxchgl %ecx, (LCK_RW_REGISTER) /* Attempt atomic exchange */ jne 6f -#if __i386__ - pushl %eax - push %edx - call EXT(lck_rw_lock_exclusive_to_shared_gen) - addl $8, %esp -#else mov %eax,%esi call EXT(lck_rw_lock_exclusive_to_shared_gen) -#endif ret 6: PAUSE @@ -1128,106 +1014,6 @@ Entry(lck_rw_held_read_or_upgrade) #define M_PTR MUTEX_PTR #define M_STATE MUTEX_STATE -#if defined(__i386__) - -#define LMTX_ARG0 B_ARG0 -#define LMTX_ARG1 B_ARG1 -#define LMTX_REG %edx -#define LMTX_A_REG %eax -#define LMTX_A_REG32 %eax -#define LMTX_C_REG %ecx -#define LMTX_C_REG32 %ecx -#define LMTX_RET_REG %eax -#define LMTX_RET_REG32 %eax -#define LMTX_LGROUP_REG %esi -#define LMTX_SSTATE_REG %edi -#define LOAD_LMTX_REG(arg) mov arg, LMTX_REG -#define LMTX_CHK_EXTENDED cmp LMTX_REG, LMTX_ARG0 -#define LMTX_ASSERT_OWNED cmpl $(MUTEX_ASSERT_OWNED), LMTX_ARG1 - -#define LMTX_ENTER_EXTENDED \ - mov M_PTR(LMTX_REG), LMTX_REG ; \ - push LMTX_LGROUP_REG ; \ - push LMTX_SSTATE_REG ; \ - xor LMTX_SSTATE_REG, LMTX_SSTATE_REG ; \ - mov MUTEX_GRP(LMTX_REG), LMTX_LGROUP_REG ; \ - LOCK_IF_ATOMIC_STAT_UPDATES ; \ - addl $1, GRP_MTX_STAT_UTIL(LMTX_LGROUP_REG) ; \ - jnc 11f ; \ - incl GRP_MTX_STAT_UTIL+4(LMTX_LGROUP_REG) ; \ -11: - -#define LMTX_EXIT_EXTENDED \ - pop LMTX_SSTATE_REG ; \ - pop LMTX_LGROUP_REG - - -#define LMTX_CHK_EXTENDED_EXIT \ - cmp LMTX_REG, LMTX_ARG0 ; \ - je 12f ; \ - pop LMTX_SSTATE_REG ; \ - pop LMTX_LGROUP_REG ; \ -12: - - -#if LOG_FIRST_MISS_ALONE -#define LMTX_UPDATE_MISS \ - test $1, LMTX_SSTATE_REG ; \ - jnz 11f ; \ - LOCK_IF_ATOMIC_STAT_UPDATES ; \ - incl GRP_MTX_STAT_MISS(LMTX_LGROUP_REG) ; \ - or $1, LMTX_SSTATE_REG ; \ -11: -#else -#define LMTX_UPDATE_MISS \ - LOCK_IF_ATOMIC_STAT_UPDATES ; \ - incl GRP_MTX_STAT_MISS(LMTX_LGROUP_REG) -#endif - - -#if LOG_FIRST_MISS_ALONE -#define LMTX_UPDATE_WAIT \ - test $2, LMTX_SSTATE_REG ; \ - jnz 11f ; \ - LOCK_IF_ATOMIC_STAT_UPDATES ; \ - incl GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG) ; \ - or $2, LMTX_SSTATE_REG ; \ -11: -#else -#define LMTX_UPDATE_WAIT \ - LOCK_IF_ATOMIC_STAT_UPDATES ; \ - incl GRP_MTX_STAT_WAIT(LMTX_LGROUP_REG) -#endif - - -/* - * Record the "direct wait" statistic, which indicates if a - * miss proceeded to block directly without spinning--occurs - * if the owner of the mutex isn't running on another processor - * at the time of the check. - */ -#define LMTX_UPDATE_DIRECT_WAIT \ - LOCK_IF_ATOMIC_STAT_UPDATES ; \ - incl GRP_MTX_STAT_DIRECT_WAIT(LMTX_LGROUP_REG) - - -#define LMTX_CALLEXT1(func_name) \ - push LMTX_REG ; \ - push LMTX_REG ; \ - call EXT(func_name) ; \ - add $4, %esp ; \ - pop LMTX_REG - -#define LMTX_CALLEXT2(func_name, reg) \ - push LMTX_REG ; \ - push reg ; \ - push LMTX_REG ; \ - call EXT(func_name) ; \ - add $8, %esp ; \ - pop LMTX_REG - -#elif defined(__x86_64__) - #define LMTX_ARG0 %rdi #define LMTX_ARG1 %rsi #define LMTX_REG_ORIG %rdi @@ -1332,10 +1118,6 @@ Entry(lck_rw_held_read_or_upgrade) pop LMTX_LGROUP_REG ; \ 12: -#else -#error Unsupported architecture -#endif - #define M_WAITERS_MSK 0x0000ffff #define M_PRIORITY_MSK 0x00ff0000 @@ -1411,6 +1193,8 @@ mutex_interlock_destroyed_str: * lck_mtx_unlock() * lck_mtx_lock_spin() * lck_mtx_lock_spin_always() + * lck_mtx_try_lock_spin() + * lck_mtx_try_lock_spin_always() * lck_mtx_convert_spin() */ NONLEAF_ENTRY(lck_mtx_lock_spin_always) @@ -1642,10 +1426,14 @@ Llml_acquired: jmp 2b - +NONLEAF_ENTRY(lck_mtx_try_lock_spin_always) + LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ + jmp Llmts_avoid_check + NONLEAF_ENTRY(lck_mtx_try_lock_spin) LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ +Llmts_avoid_check: mov M_STATE(LMTX_REG), LMTX_C_REG32 test $(M_ILOCKED_MSK | M_MLOCKED_MSK), LMTX_C_REG32 /* is the interlock or mutex held */ jnz Llmts_slow @@ -1816,18 +1604,7 @@ NONLEAF_ENTRY(lck_mtx_convert_spin) -#if defined(__i386__) -NONLEAF_ENTRY(lck_mtx_unlock) - LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ - mov M_OWNER(LMTX_REG), LMTX_A_REG - test LMTX_A_REG, LMTX_A_REG - jnz Llmu_entry - leave - ret -NONLEAF_ENTRY(lck_mtx_unlock_darwin10) -#else NONLEAF_ENTRY(lck_mtx_unlock) -#endif LOAD_LMTX_REG(B_ARG0) /* fetch lock pointer */ Llmu_entry: mov M_STATE(LMTX_REG), LMTX_C_REG32 @@ -2018,11 +1795,7 @@ LEAF_ENTRY(_enable_preemption) #if MACH_ASSERT cmpl $0,%gs:CPU_PREEMPTION_LEVEL jg 1f -#if __i386__ - pushl %gs:CPU_PREEMPTION_LEVEL -#else movl %gs:CPU_PREEMPTION_LEVEL,%esi -#endif ALIGN_STACK() LOAD_STRING_ARG0(_enable_preemption_less_than_zero) CALL_PANIC() @@ -2068,11 +1841,7 @@ LEAF_ENTRY(_mp_enable_preemption) #if MACH_ASSERT cmpl $0,%gs:CPU_PREEMPTION_LEVEL jg 1f -#if __i386__ - pushl %gs:CPU_PREEMPTION_LEVEL -#else movl %gs:CPU_PREEMPTION_LEVEL,%esi -#endif ALIGN_PANIC() LOAD_STRING_ARG0(_mp_enable_preemption_less_than_zero) CALL_PANIC() @@ -2106,120 +1875,6 @@ _mp_enable_preemption_no_check_less_than_zero: #endif /* MACH_RT */ LEAF_RET -#if __i386__ - -LEAF_ENTRY(i_bit_set) - movl L_ARG0,%edx - movl L_ARG1,%eax - lock - bts %edx,(%eax) - LEAF_RET - -LEAF_ENTRY(i_bit_clear) - movl L_ARG0,%edx - movl L_ARG1,%eax - lock - btr %edx,(%eax) - LEAF_RET - - -LEAF_ENTRY(bit_lock) - movl L_ARG0,%ecx - movl L_ARG1,%eax -1: - lock - bts %ecx,(%eax) - jb 1b - LEAF_RET - - -LEAF_ENTRY(bit_lock_try) - movl L_ARG0,%ecx - movl L_ARG1,%eax - lock - bts %ecx,(%eax) - jb bit_lock_failed - LEAF_RET /* %eax better not be null ! */ -bit_lock_failed: - xorl %eax,%eax - LEAF_RET - -LEAF_ENTRY(bit_unlock) - movl L_ARG0,%ecx - movl L_ARG1,%eax - lock - btr %ecx,(%eax) - LEAF_RET - -/* - * Atomic primitives, prototyped in kern/simple_lock.h - */ -LEAF_ENTRY(hw_atomic_add) - movl L_ARG0, %ecx /* Load address of operand */ - movl L_ARG1, %eax /* Load addend */ - movl %eax, %edx - lock - xaddl %eax, (%ecx) /* Atomic exchange and add */ - addl %edx, %eax /* Calculate result */ - LEAF_RET - -LEAF_ENTRY(hw_atomic_sub) - movl L_ARG0, %ecx /* Load address of operand */ - movl L_ARG1, %eax /* Load subtrahend */ - negl %eax - movl %eax, %edx - lock - xaddl %eax, (%ecx) /* Atomic exchange and add */ - addl %edx, %eax /* Calculate result */ - LEAF_RET - -LEAF_ENTRY(hw_atomic_or) - movl L_ARG0, %ecx /* Load address of operand */ - movl (%ecx), %eax -1: - movl L_ARG1, %edx /* Load mask */ - orl %eax, %edx - lock - cmpxchgl %edx, (%ecx) /* Atomic CAS */ - jne 1b - movl %edx, %eax /* Result */ - LEAF_RET -/* - * A variant of hw_atomic_or which doesn't return a value. - * The implementation is thus comparatively more efficient. - */ - -LEAF_ENTRY(hw_atomic_or_noret) - movl L_ARG0, %ecx /* Load address of operand */ - movl L_ARG1, %edx /* Load mask */ - lock - orl %edx, (%ecx) /* Atomic OR */ - LEAF_RET - -LEAF_ENTRY(hw_atomic_and) - movl L_ARG0, %ecx /* Load address of operand */ - movl (%ecx), %eax -1: - movl L_ARG1, %edx /* Load mask */ - andl %eax, %edx - lock - cmpxchgl %edx, (%ecx) /* Atomic CAS */ - jne 1b - movl %edx, %eax /* Result */ - LEAF_RET -/* - * A variant of hw_atomic_and which doesn't return a value. - * The implementation is thus comparatively more efficient. - */ - -LEAF_ENTRY(hw_atomic_and_noret) - movl L_ARG0, %ecx /* Load address of operand */ - movl L_ARG1, %edx /* Load mask */ - lock - andl %edx, (%ecx) /* Atomic AND */ - LEAF_RET - -#else /* !__i386__ */ LEAF_ENTRY(i_bit_set) lock @@ -2346,4 +2001,3 @@ LEAF_ENTRY(hw_atomic_and_noret) lock andl %esi, (%rdi) /* Atomic OR */ LEAF_RET -#endif /* !__i386 __ */ diff --git a/osfmk/i386/i386_lowmem.h b/osfmk/i386/i386_lowmem.h index 97fa06012..810e1b533 100644 --- a/osfmk/i386/i386_lowmem.h +++ b/osfmk/i386/i386_lowmem.h @@ -37,20 +37,12 @@ */ #define I386_KERNEL_IMAGE_BASE_PAGE 0x100 -#if defined(__i386__) -#define I386_LOWMEM_RESERVED 0x18 - -#define I386_HIB_PAGETABLE 0x13 -#define I386_HIB_PAGETABLE_COUNT 5 - -#elif defined(__x86_64__) /* For K64, only 3 pages are reserved * - physical page zero, a gap page, and then real-mode-bootstrap/lowGlo. * Note that the kernel virtual address 0xffffff8000002000 is re-mapped * to the low globals and that physical page, 0x2000, is used by the bootstrap. */ #define I386_LOWMEM_RESERVED 3 -#endif #endif /* __APPLE_API_PRIVATE */ diff --git a/osfmk/i386/i386_timer.c b/osfmk/i386/i386_timer.c new file mode 100644 index 000000000..55c460461 --- /dev/null +++ b/osfmk/i386/i386_timer.c @@ -0,0 +1,520 @@ +/* + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * @APPLE_FREE_COPYRIGHT@ + */ +/* + * File: timer.c + * Purpose: Routines for handling the machine independent timer. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +uint32_t spurious_timers; + +/* + * Event timer interrupt. + * + * XXX a drawback of this implementation is that events serviced earlier must not set deadlines + * that occur before the entire chain completes. + * + * XXX a better implementation would use a set of generic callouts and iterate over them + */ +void +timer_intr(int user_mode, + uint64_t rip) +{ + uint64_t abstime; + rtclock_timer_t *mytimer; + cpu_data_t *pp; + int64_t latency; + uint64_t pmdeadline; + boolean_t timer_processed = FALSE; + + pp = current_cpu_datap(); + + SCHED_STATS_TIMER_POP(current_processor()); + + abstime = mach_absolute_time(); /* Get the time now */ + + /* has a pending clock timer expired? */ + mytimer = &pp->rtclock_timer; /* Point to the event timer */ + + if ((timer_processed = ((mytimer->deadline <= abstime) || + (abstime >= (mytimer->queue.earliest_soft_deadline))))) { + /* + * Log interrupt service latency (-ve value expected by tool) + * a non-PM event is expected next. + * The requested deadline may be earlier than when it was set + * - use MAX to avoid reporting bogus latencies. + */ + latency = (int64_t) (abstime - MAX(mytimer->deadline, + mytimer->when_set)); + /* Log zero timer latencies when opportunistically processing + * coalesced timers. + */ + if (latency < 0) { + TCOAL_DEBUG(0xEEEE0000, abstime, mytimer->queue.earliest_soft_deadline, abstime - mytimer->queue.earliest_soft_deadline, 0, 0); + latency = 0; + } + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + DECR_TRAP_LATENCY | DBG_FUNC_NONE, + -latency, + ((user_mode != 0) ? rip : VM_KERNEL_UNSLIDE(rip)), + user_mode, 0, 0); + + mytimer->has_expired = TRUE; /* Remember that we popped */ + mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime); + mytimer->has_expired = FALSE; + + /* Get the time again since we ran a bit */ + abstime = mach_absolute_time(); + mytimer->when_set = abstime; + } + + /* is it time for power management state change? */ + if ((pmdeadline = pmCPUGetDeadline(pp)) && (pmdeadline <= abstime)) { + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + DECR_PM_DEADLINE | DBG_FUNC_START, + 0, 0, 0, 0, 0); + pmCPUDeadline(pp); + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + DECR_PM_DEADLINE | DBG_FUNC_END, + 0, 0, 0, 0, 0); + timer_processed = TRUE; + } + + /* schedule our next deadline */ + x86_lcpu()->rtcDeadline = EndOfAllTime; + timer_resync_deadlines(); + + if (__improbable(timer_processed == FALSE)) + spurious_timers++; +} + +/* + * Set the clock deadline. + */ +void timer_set_deadline(uint64_t deadline) +{ + rtclock_timer_t *mytimer; + spl_t s; + cpu_data_t *pp; + + s = splclock(); /* no interruptions */ + pp = current_cpu_datap(); + + mytimer = &pp->rtclock_timer; /* Point to the timer itself */ + mytimer->deadline = deadline; /* Set new expiration time */ + mytimer->when_set = mach_absolute_time(); + + timer_resync_deadlines(); + + splx(s); +} + +/* + * Re-evaluate the outstanding deadlines and select the most proximate. + * + * Should be called at splclock. + */ +void +timer_resync_deadlines(void) +{ + uint64_t deadline = EndOfAllTime; + uint64_t pmdeadline; + rtclock_timer_t *mytimer; + spl_t s = splclock(); + cpu_data_t *pp; + uint32_t decr; + + pp = current_cpu_datap(); + if (!pp->cpu_running) + /* There's really nothing to do if this processor is down */ + return; + + /* + * If we have a clock timer set, pick that. + */ + mytimer = &pp->rtclock_timer; + if (!mytimer->has_expired && + 0 < mytimer->deadline && mytimer->deadline < EndOfAllTime) + deadline = mytimer->deadline; + + /* + * If we have a power management deadline, see if that's earlier. + */ + pmdeadline = pmCPUGetDeadline(pp); + if (0 < pmdeadline && pmdeadline < deadline) + deadline = pmdeadline; + + /* + * Go and set the "pop" event. + */ + decr = (uint32_t) setPop(deadline); + + /* Record non-PM deadline for latency tool */ + if (decr != 0 && deadline != pmdeadline) { + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + DECR_SET_DEADLINE | DBG_FUNC_NONE, + decr, 2, + deadline, + mytimer->queue.count, 0); + } + splx(s); +} + +void +timer_queue_expire_local( +__unused void *arg) +{ + rtclock_timer_t *mytimer; + uint64_t abstime; + cpu_data_t *pp; + + pp = current_cpu_datap(); + + mytimer = &pp->rtclock_timer; + abstime = mach_absolute_time(); + + mytimer->has_expired = TRUE; + mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime); + mytimer->has_expired = FALSE; + mytimer->when_set = mach_absolute_time(); + + timer_resync_deadlines(); +} + +void +timer_queue_expire_rescan( +__unused void *arg) +{ + rtclock_timer_t *mytimer; + uint64_t abstime; + cpu_data_t *pp; + + assert(ml_get_interrupts_enabled() == FALSE); + pp = current_cpu_datap(); + + mytimer = &pp->rtclock_timer; + abstime = mach_absolute_time(); + + mytimer->has_expired = TRUE; + mytimer->deadline = timer_queue_expire_with_options(&mytimer->queue, abstime, TRUE); + mytimer->has_expired = FALSE; + mytimer->when_set = mach_absolute_time(); + + timer_resync_deadlines(); +} + +/* N.B.: Max leeway values assume 1GHz timebase */ +timer_coalescing_priority_params_t tcoal_prio_params = +{ + /* Deadline scale values for each thread attribute */ + 0, -5, 3, 3, 3, + /* Maximum leeway in abstime for each thread attribute */ + 0ULL, 100*NSEC_PER_MSEC, NSEC_PER_MSEC, NSEC_PER_MSEC, NSEC_PER_MSEC, + /* Deadline scale values for each latency QoS tier */ + {3, 2, 1, -2, -15, -15}, + /* Maximum leeway in abstime for each latency QoS Tier*/ + {1*NSEC_PER_MSEC, 5*NSEC_PER_MSEC, 20*NSEC_PER_MSEC, 75*NSEC_PER_MSEC, + 10*NSEC_PER_SEC, 10*NSEC_PER_SEC}, + /* Signifies that the tier requires rate-limiting */ + {FALSE, FALSE, FALSE, FALSE, TRUE, TRUE} +}; +#define TIMER_RESORT_THRESHOLD_ABSTIME (50 * NSEC_PER_MSEC) + +#if TCOAL_PRIO_STATS +int32_t nc_tcl, rt_tcl, bg_tcl, kt_tcl, fp_tcl, ts_tcl, qos_tcl; +#define TCOAL_PRIO_STAT(x) (x++) +#else +#define TCOAL_PRIO_STAT(x) +#endif + +/* Select timer coalescing window based on per-task quality-of-service hints */ +static boolean_t tcoal_qos_adjust(thread_t t, int32_t *tshift, uint64_t *tmax, boolean_t *pratelimited) { + uint32_t latency_qos; + boolean_t adjusted = FALSE; + task_t ctask = t->task; + + if (ctask) { + latency_qos = proc_get_effective_task_policy(ctask, TASK_POLICY_LATENCY_QOS); + + assert(latency_qos <= NUM_LATENCY_QOS_TIERS); + + if (latency_qos) { + *tshift = tcoal_prio_params.latency_qos_scale[latency_qos - 1]; + *tmax = tcoal_prio_params.latency_qos_ns_max[latency_qos - 1]; + *pratelimited = tcoal_prio_params.latency_tier_rate_limited[latency_qos - 1]; + adjusted = TRUE; + } + } + return adjusted; +} + +/* Adjust timer deadlines based on priority of the thread and the + * urgency value provided at timeout establishment. With this mechanism, + * timers are no longer necessarily sorted in order of soft deadline + * on a given timer queue, i.e. they may be differentially skewed. + * In the current scheme, this could lead to fewer pending timers + * processed than is technically possible when the HW deadline arrives. + */ +static void +timer_compute_leeway(thread_t cthread, int32_t urgency, int32_t *tshift, uint64_t *tmax, boolean_t *pratelimited) { + int16_t tpri = cthread->sched_pri; + + if ((urgency & TIMER_CALL_USER_MASK) != 0) { + if (tpri >= BASEPRI_RTQUEUES || + urgency == TIMER_CALL_USER_CRITICAL) { + *tshift = tcoal_prio_params.timer_coalesce_rt_shift; + *tmax = tcoal_prio_params.timer_coalesce_rt_ns_max; + TCOAL_PRIO_STAT(rt_tcl); + } else if ((urgency == TIMER_CALL_USER_BACKGROUND) || + proc_get_effective_thread_policy(cthread, TASK_POLICY_DARWIN_BG)) { + /* Determine if timer should be subjected to a lower QoS */ + if (tcoal_qos_adjust(cthread, tshift, tmax, pratelimited)) { + if (*tmax > tcoal_prio_params.timer_coalesce_bg_ns_max) { + return; + } else { + *pratelimited = FALSE; + } + } + *tshift = tcoal_prio_params.timer_coalesce_bg_shift; + *tmax = tcoal_prio_params.timer_coalesce_bg_ns_max; + TCOAL_PRIO_STAT(bg_tcl); + } else if (tpri >= MINPRI_KERNEL) { + *tshift = tcoal_prio_params.timer_coalesce_kt_shift; + *tmax = tcoal_prio_params.timer_coalesce_kt_ns_max; + TCOAL_PRIO_STAT(kt_tcl); + } else if (cthread->sched_mode == TH_MODE_FIXED) { + *tshift = tcoal_prio_params.timer_coalesce_fp_shift; + *tmax = tcoal_prio_params.timer_coalesce_fp_ns_max; + TCOAL_PRIO_STAT(fp_tcl); + } else if (tcoal_qos_adjust(cthread, tshift, tmax, pratelimited)) { + TCOAL_PRIO_STAT(qos_tcl); + } else if (cthread->sched_mode == TH_MODE_TIMESHARE) { + *tshift = tcoal_prio_params.timer_coalesce_ts_shift; + *tmax = tcoal_prio_params.timer_coalesce_ts_ns_max; + TCOAL_PRIO_STAT(ts_tcl); + } else { + TCOAL_PRIO_STAT(nc_tcl); + } + } else if (urgency == TIMER_CALL_SYS_BACKGROUND) { + *tshift = tcoal_prio_params.timer_coalesce_bg_shift; + *tmax = tcoal_prio_params.timer_coalesce_bg_ns_max; + TCOAL_PRIO_STAT(bg_tcl); + } else { + *tshift = tcoal_prio_params.timer_coalesce_kt_shift; + *tmax = tcoal_prio_params.timer_coalesce_kt_ns_max; + TCOAL_PRIO_STAT(kt_tcl); + } +} + +int timer_user_idle_level; + +uint64_t +timer_call_slop(uint64_t deadline, uint64_t now, uint32_t flags, thread_t cthread, boolean_t *pratelimited) +{ + int32_t tcs_shift = 0; + uint64_t tcs_ns_max = 0; + uint64_t adjval; + uint32_t urgency = (flags & TIMER_CALL_URGENCY_MASK); + + if (mach_timer_coalescing_enabled && + (deadline > now) && (urgency != TIMER_CALL_SYS_CRITICAL)) { + timer_compute_leeway(cthread, urgency, &tcs_shift, &tcs_ns_max, pratelimited); + + if (tcs_shift >= 0) + adjval = MIN((deadline - now) >> tcs_shift, tcs_ns_max); + else + adjval = MIN((deadline - now) << (-tcs_shift), tcs_ns_max); + /* Apply adjustments derived from "user idle level" heuristic */ + adjval += (adjval * timer_user_idle_level) >> 7; + return adjval; + } else { + return 0; + } +} + +boolean_t +timer_resort_threshold(uint64_t skew) { + if (skew >= TIMER_RESORT_THRESHOLD_ABSTIME) + return TRUE; + else + return FALSE; +} + +int +ml_timer_get_user_idle_level(void) { + return timer_user_idle_level; +} + +kern_return_t ml_timer_set_user_idle_level(int ilevel) { + boolean_t do_reeval = FALSE; + + if ((ilevel < 0) || (ilevel > 128)) + return KERN_INVALID_ARGUMENT; + + if (ilevel < timer_user_idle_level) { + do_reeval = TRUE; + } + + timer_user_idle_level = ilevel; + + if (do_reeval) + ml_timer_evaluate(); + + return KERN_SUCCESS; +} + +/* + * Return the local timer queue for a running processor + * else return the boot processor's timer queue. + */ +mpqueue_head_t * +timer_queue_assign( + uint64_t deadline) +{ + cpu_data_t *cdp = current_cpu_datap(); + mpqueue_head_t *queue; + + if (cdp->cpu_running) { + queue = &cdp->rtclock_timer.queue; + + if (deadline < cdp->rtclock_timer.deadline) + timer_set_deadline(deadline); + } + else + queue = &cpu_datap(master_cpu)->rtclock_timer.queue; + + return (queue); +} + +void +timer_queue_cancel( + mpqueue_head_t *queue, + uint64_t deadline, + uint64_t new_deadline) +{ + if (queue == ¤t_cpu_datap()->rtclock_timer.queue) { + if (deadline < new_deadline) + timer_set_deadline(new_deadline); + } +} + +/* + * timer_queue_migrate_cpu() is called from the Power-Management kext + * when a logical processor goes idle (in a deep C-state) with a distant + * deadline so that it's timer queue can be moved to another processor. + * This target processor should be the least idle (most busy) -- + * currently this is the primary processor for the calling thread's package. + * Locking restrictions demand that the target cpu must be the boot cpu. + */ +uint32_t +timer_queue_migrate_cpu(int target_cpu) +{ + cpu_data_t *target_cdp = cpu_datap(target_cpu); + cpu_data_t *cdp = current_cpu_datap(); + int ntimers_moved; + + assert(!ml_get_interrupts_enabled()); + assert(target_cpu != cdp->cpu_number); + assert(target_cpu == master_cpu); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + DECR_TIMER_MIGRATE | DBG_FUNC_START, + target_cpu, + cdp->rtclock_timer.deadline, (cdp->rtclock_timer.deadline >>32), + 0, 0); + + /* + * Move timer requests from the local queue to the target processor's. + * The return value is the number of requests moved. If this is 0, + * it indicates that the first (i.e. earliest) timer is earlier than + * the earliest for the target processor. Since this would force a + * resync, the move of this and all later requests is aborted. + */ + ntimers_moved = timer_queue_migrate(&cdp->rtclock_timer.queue, + &target_cdp->rtclock_timer.queue); + + /* + * Assuming we moved stuff, clear local deadline. + */ + if (ntimers_moved > 0) { + cdp->rtclock_timer.deadline = EndOfAllTime; + setPop(EndOfAllTime); + } + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + DECR_TIMER_MIGRATE | DBG_FUNC_END, + target_cpu, ntimers_moved, 0, 0, 0); + + return ntimers_moved; +} + +mpqueue_head_t * +timer_queue_cpu(int cpu) +{ + return &cpu_datap(cpu)->rtclock_timer.queue; +} + +void +timer_call_cpu(int cpu, void (*fn)(void *), void *arg) +{ + mp_cpus_call(cpu_to_cpumask(cpu), SYNC, fn, arg); +} + +void +timer_call_nosync_cpu(int cpu, void (*fn)(void *), void *arg) +{ + /* XXX Needs error checking and retry */ + mp_cpus_call(cpu_to_cpumask(cpu), NOSYNC, fn, arg); +} + diff --git a/osfmk/i386/i386_vm_init.c b/osfmk/i386/i386_vm_init.c index 9a9735c5c..d5c3db7f0 100644 --- a/osfmk/i386/i386_vm_init.c +++ b/osfmk/i386/i386_vm_init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2008 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -78,11 +78,7 @@ #include #include #include -#ifdef __x86_64__ #include -#else -#include -#endif #include #include @@ -105,6 +101,8 @@ vm_offset_t vm_kernel_top; vm_offset_t vm_kernel_stext; vm_offset_t vm_kernel_etext; vm_offset_t vm_kernel_slide; +vm_offset_t vm_kext_base = VM_MIN_KERNEL_AND_KEXT_ADDRESS; +vm_offset_t vm_kext_top = VM_MIN_KERNEL_ADDRESS; #define MAXLORESERVE (32 * 1024 * 1024) @@ -156,11 +154,8 @@ uint64_t firmware_MMIO_bytes; /* * Linker magic to establish the highest address in the kernel. - * This is replicated from libsa which marks last_kernel_symbol - * but that's not visible from here in osfmk. */ -__asm__(".zerofill __LAST, __last, _kernel_top, 0"); -extern void *kernel_top; +extern void *last_kernel_symbol; #if DEBUG #define PRINT_PMAP_MEMORY_TABLE @@ -183,7 +178,6 @@ i386_vm_init(uint64_t maxmem, unsigned int msize; ppnum_t fap; unsigned int i; - unsigned int safeboot; ppnum_t maxpg = 0; uint32_t pmap_type; uint32_t maxloreserve; @@ -192,7 +186,6 @@ i386_vm_init(uint64_t maxmem, boolean_t mbuf_override = FALSE; boolean_t coalescing_permitted; vm_kernel_base_page = i386_btop(args->kaddr); -#ifdef __x86_64__ vm_offset_t base_address; vm_offset_t static_base_address; @@ -238,8 +231,6 @@ i386_vm_init(uint64_t maxmem, } } -#endif // __x86_64__ - /* * Now retrieve addresses for end, edata, and etext * from MACH-O headers. @@ -270,9 +261,7 @@ i386_vm_init(uint64_t maxmem, eHIB = segHIBB + segSizeHIB; /* Zero-padded from ehib to stext if text is 2M-aligned */ stext = segTEXTB; -#ifdef __x86_64__ lowGlo.lgStext = stext; -#endif etext = (vm_offset_t) round_page_64(lastsectTEXT->addr + lastsectTEXT->size); /* Zero-padded from etext to sdata if text is 2M-aligned */ sdata = segDATAB; @@ -303,10 +292,10 @@ i386_vm_init(uint64_t maxmem, DBG("edata = %p\n", (void *) edata); DBG("sconstdata = %p\n", (void *) sconstdata); DBG("econstdata = %p\n", (void *) econstdata); - DBG("kernel_top = %p\n", (void *) &kernel_top); + DBG("kernel_top = %p\n", (void *) &last_kernel_symbol); vm_kernel_base = sHIB; - vm_kernel_top = (vm_offset_t) &kernel_top; + vm_kernel_top = (vm_offset_t) &last_kernel_symbol; vm_kernel_stext = stext; vm_kernel_etext = etext; @@ -316,9 +305,6 @@ i386_vm_init(uint64_t maxmem, * Compute the memory size. */ - if ((1 == vm_himemory_mode) || PE_parse_boot_argn("-x", &safeboot, sizeof (safeboot))) { - maxpg = 1 << (32 - I386_PGSHIFT); - } avail_remaining = 0; avail_end = 0; pmptr = pmap_memory_regions; @@ -346,6 +332,32 @@ i386_vm_init(uint64_t maxmem, base = (ppnum_t) (mptr->PhysicalStart >> I386_PGSHIFT); top = (ppnum_t) (((mptr->PhysicalStart) >> I386_PGSHIFT) + mptr->NumberOfPages - 1); + if (base == 0) { + /* + * Avoid having to deal with the edge case of the + * very first possible physical page and the roll-over + * to -1; just ignore that page. + */ + kprintf("WARNING: ignoring first page in [0x%llx:0x%llx]\n", (uint64_t) base, (uint64_t) top); + base++; + } + if (top + 1 == 0) { + /* + * Avoid having to deal with the edge case of the + * very last possible physical page and the roll-over + * to 0; just ignore that page. + */ + kprintf("WARNING: ignoring last page in [0x%llx:0x%llx]\n", (uint64_t) base, (uint64_t) top); + top--; + } + if (top < base) { + /* + * That was the only page in that region, so + * ignore the whole region. + */ + continue; + } + #if MR_RSV_TEST static uint32_t nmr = 0; if ((base > 0x20000) && (nmr++ < 4)) @@ -472,14 +484,16 @@ i386_vm_init(uint64_t maxmem, if ((mptr->Attribute & EFI_MEMORY_KERN_RESERVED) && (top < vm_kernel_base_page)) { - pmptr->alloc = pmptr->base; + pmptr->alloc_up = pmptr->base; + pmptr->alloc_down = pmptr->end; pmap_reserved_range_indices[pmap_last_reserved_range_index++] = pmap_memory_region_count; } else { /* * mark as already mapped */ - pmptr->alloc = top; + pmptr->alloc_up = top + 1; + pmptr->alloc_down = top; } pmptr->type = pmap_type; pmptr->attribute = mptr->Attribute; @@ -491,7 +505,9 @@ i386_vm_init(uint64_t maxmem, * mark already allocated */ pmptr->base = base; - pmptr->alloc = pmptr->end = (fap - 1); + pmptr->end = (fap - 1); + pmptr->alloc_up = pmptr->end + 1; + pmptr->alloc_down = pmptr->end; pmptr->type = pmap_type; pmptr->attribute = mptr->Attribute; /* @@ -501,10 +517,10 @@ i386_vm_init(uint64_t maxmem, pmptr++; pmap_memory_region_count++; - pmptr->alloc = pmptr->base = fap; + pmptr->alloc_up = pmptr->base = fap; pmptr->type = pmap_type; pmptr->attribute = mptr->Attribute; - pmptr->end = top; + pmptr->alloc_down = pmptr->end = top; if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED) pmap_reserved_range_indices[pmap_last_reserved_range_index++] = pmap_memory_region_count; @@ -512,10 +528,10 @@ i386_vm_init(uint64_t maxmem, /* * entire range useable */ - pmptr->alloc = pmptr->base = base; + pmptr->alloc_up = pmptr->base = base; pmptr->type = pmap_type; pmptr->attribute = mptr->Attribute; - pmptr->end = top; + pmptr->alloc_down = pmptr->end = top; if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED) pmap_reserved_range_indices[pmap_last_reserved_range_index++] = pmap_memory_region_count; } @@ -531,12 +547,12 @@ i386_vm_init(uint64_t maxmem, if (prev_pmptr && (pmptr->type == prev_pmptr->type) && (coalescing_permitted) && - (pmptr->base == pmptr->alloc) && + (pmptr->base == pmptr->alloc_up) && + (prev_pmptr->end == prev_pmptr->alloc_down) && (pmptr->base == (prev_pmptr->end + 1))) { - if (prev_pmptr->end == prev_pmptr->alloc) - prev_pmptr->alloc = pmptr->base; prev_pmptr->end = pmptr->end; + prev_pmptr->alloc_down = pmptr->alloc_down; } else { pmap_memory_region_count++; prev_pmptr = pmptr; @@ -552,10 +568,11 @@ i386_vm_init(uint64_t maxmem, addr64_t region_start, region_end; addr64_t efi_start, efi_end; for (j=0;jtype, (addr64_t) p->base << I386_PGSHIFT, - (addr64_t) p->alloc << I386_PGSHIFT, + (addr64_t) p->alloc_up << I386_PGSHIFT, + (addr64_t) p->alloc_down << I386_PGSHIFT, (addr64_t) p->end << I386_PGSHIFT); region_start = (addr64_t) p->base << I386_PGSHIFT; region_end = ((addr64_t) p->end << I386_PGSHIFT) - 1; @@ -626,8 +643,10 @@ i386_vm_init(uint64_t maxmem, highest_pn = cur_end; pages_to_use--; } - if (pages_to_use == 0) + if (pages_to_use == 0) { pmap_memory_regions[cur_region].end = cur_end; + pmap_memory_regions[cur_region].alloc_down = cur_end; + } cur_region++; } @@ -669,7 +688,9 @@ i386_vm_init(uint64_t maxmem, else maxloreserve = MAXLORESERVE / PAGE_SIZE; +#if SOCKETS mbuf_reserve = bsd_mbuf_cluster_reserve(&mbuf_override) / PAGE_SIZE; +#endif } else maxloreserve = (maxloreserve * (1024 * 1024)) / PAGE_SIZE; @@ -717,8 +738,8 @@ pmap_next_page_reserved(ppnum_t *pn) { for (n = 0; n < pmap_last_reserved_range_index; n++) { uint32_t reserved_index = pmap_reserved_range_indices[n]; region = &pmap_memory_regions[reserved_index]; - if (region->alloc < region->end) { - *pn = region->alloc++; + if (region->alloc_up <= region->alloc_down) { + *pn = region->alloc_up++; avail_remaining--; if (*pn > max_ppnum) @@ -729,7 +750,7 @@ pmap_next_page_reserved(ppnum_t *pn) { pmap_reserved_pages_allocated++; #if DEBUG - if (region->alloc == region->end) { + if (region->alloc_up > region->alloc_down) { kprintf("Exhausted reserved range index: %u, base: 0x%x end: 0x%x, type: 0x%x, attribute: 0x%llx\n", reserved_index, region->base, region->end, region->type, region->attribute); } #endif @@ -755,8 +776,8 @@ pmap_next_page_hi( for (n = pmap_memory_region_count - 1; n >= 0; n--) { region = &pmap_memory_regions[n]; - if (region->alloc != region->end) { - *pn = region->alloc++; + if (region->alloc_down >= region->alloc_up) { + *pn = region->alloc_down--; avail_remaining--; if (*pn > max_ppnum) @@ -784,12 +805,12 @@ pmap_next_page( ppnum_t *pn) { if (avail_remaining) while (pmap_memory_region_current < pmap_memory_region_count) { - if (pmap_memory_regions[pmap_memory_region_current].alloc == - pmap_memory_regions[pmap_memory_region_current].end) { + if (pmap_memory_regions[pmap_memory_region_current].alloc_up > + pmap_memory_regions[pmap_memory_region_current].alloc_down) { pmap_memory_region_current++; continue; } - *pn = pmap_memory_regions[pmap_memory_region_current].alloc++; + *pn = pmap_memory_regions[pmap_memory_region_current].alloc_up++; avail_remaining--; if (*pn > max_ppnum) diff --git a/osfmk/i386/idle_pt.c b/osfmk/i386/idle_pt.c deleted file mode 100644 index 4110c212e..000000000 --- a/osfmk/i386/idle_pt.c +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2008 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#include - -#define PML4_PROT (INTEL_PTE_VALID | INTEL_PTE_WRITE) -pml4_entry_t IdlePML4[PTE_PER_PAGE] - __attribute__((section("__INITPT, __data"))) = { -}; - -#define PDPT_PROT (INTEL_PTE_VALID) -pdpt_entry_t IdlePDPT[PTE_PER_PAGE] - __attribute__((section("__INITPT, __data"))) = { - [0] = ((uint64_t)(INITPT_SEG_BASE + 2*PAGE_SIZE) | PDPT_PROT), - [1] = ((uint64_t)(INITPT_SEG_BASE + 3*PAGE_SIZE) | PDPT_PROT), - [2] = ((uint64_t)(INITPT_SEG_BASE + 4*PAGE_SIZE) | PDPT_PROT), - [3] = ((uint64_t)(INITPT_SEG_BASE + 5*PAGE_SIZE) | PDPT_PROT), -}; - -#if NPGPTD != 4 -#error Please update idle_pt.c to reflect the new value of NPGPTD -#endif - -#if MACHINE_BOOTSTRAPPTD - -#define ID_MAP_2MEG(x) [(x)] = ((((uint64_t)(x)) << 21) | (INTEL_PTE_PS | INTEL_PTE_VALID | INTEL_PTE_WRITE)), - -#define L0(x,n) x(n) -#define L1(x,n) L0(x,n-1) L0(x,n) -#define L2(x,n) L1(x,n-2) L1(x,n) -#define L3(x,n) L2(x,n-4) L2(x,n) -#define L4(x,n) L3(x,n-8) L3(x,n) -#define L5(x,n) L4(x,n-16) L4(x,n) -#define L6(x,n) L5(x,n-32) L5(x,n) -#define L7(x,n) L6(x,n-64) L6(x,n) -#define L8(x,n) L7(x,n-128) L7(x,n) -#define L9(x,n) L8(x,n-256) L8(x,n) -#define L10(x,n) L9(x,n-512) L9(x,n) -#define L11(x,n) L10(x,n-1024) L10(x,n) - -#define FOR_0_TO_2047(x) L11(x,2047) - -pd_entry_t BootPTD[2048] - __attribute__((section("__INITPT, __data"))) = { - FOR_0_TO_2047(ID_MAP_2MEG) -}; -#endif /* MACHINE_BOOTSTRAPPTD */ diff --git a/osfmk/i386/idt.s b/osfmk/i386/idt.s deleted file mode 100644 index d56556364..000000000 --- a/osfmk/i386/idt.s +++ /dev/null @@ -1,1221 +0,0 @@ -/* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991,1990 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - */ -#include -#include -#include -#include -#include -#define _ARCH_I386_ASM_HELP_H_ /* Prevent inclusion of user header */ -#include -#include -#include -#include - -/* - * Low-memory handlers. - */ -#define LO_ALLINTRS EXT(lo_allintrs32) -#define LO_ALLTRAPS EXT(lo_alltraps32) -#define LO_SYSENTER EXT(lo_sysenter32) -#define LO_UNIX_SCALL EXT(lo_unix_scall32) -#define LO_MACH_SCALL EXT(lo_mach_scall32) -#define LO_MDEP_SCALL EXT(lo_mdep_scall32) - -#define HI_DATA(lo_addr) ( (EXT(lo_addr) - EXT(hi_remap_data)) + HIGH_IDT_BASE ) -#define HI_TEXT(lo_text) ( (EXT(lo_text) - EXT(hi_remap_text)) + HIGH_MEM_BASE ) - -/* - * Interrupt descriptor table and code vectors for it. - */ -#define IDT_BASE_ENTRY(vec,seg,type) \ - .data ;\ - .long EXT(vec) - EXT(hi_remap_text) + HIGH_MEM_BASE ; \ - .word seg ;\ - .byte 0 ;\ - .byte type ;\ - .text - -#define IDT_BASE_ENTRY_INT(vec,seg,type) \ - .data ;\ - .long vec - EXT(hi_remap_text) + HIGH_MEM_BASE ; \ - .word seg ;\ - .byte 0 ;\ - .byte type ;\ - .text - -#define IDT_BASE_ENTRY_TG(vec,seg,type) \ - .data ;\ - .long 0 ; \ - .word seg ;\ - .byte 0 ;\ - .byte type ;\ - .text - -#define IDT_ENTRY(vec,type) IDT_BASE_ENTRY(vec,KERNEL32_CS,type) -#define IDT_ENTRY_INT(vec,type) IDT_BASE_ENTRY_INT(vec,KERNEL32_CS,type) - -/* - * No error code. Clear error code and push trap number. - */ -#define EXCEPTION(n,name) \ - IDT_ENTRY(name,K_INTR_GATE);\ -Entry(name) ;\ - pushl $0 ;\ - pushl $(n) ;\ - pusha ;\ - movl $(LO_ALLTRAPS),%ebx ;\ - jmp enter_lohandler - - -/* - * Interrupt from user. Clear error code and push trap number. - */ -#define EXCEP_USR(n,name) \ - IDT_ENTRY(name,U_INTR_GATE);\ -Entry(name) ;\ - pushl $0 ;\ - pushl $(n) ;\ - pusha ;\ - movl $(LO_ALLTRAPS),%ebx ;\ - jmp enter_lohandler - - -/* - * Special interrupt code. - */ -#define EXCEP_SPC(n,name) \ - IDT_ENTRY(name,K_INTR_GATE) - -/* - * Special interrupt code from user. - */ -#define EXCEP_SPC_USR(n,name) \ - IDT_ENTRY(name,U_INTR_GATE) - - -/* - * Extra-special interrupt code. Note that no offset may be - * specified in a task gate descriptor, so name is ignored. - */ - -/* Double-fault fatal handler */ -#define DF_FATAL_TASK(n,name) \ - IDT_BASE_ENTRY_TG(0,DF_TSS,K_TASK_GATE) - -/* machine-check handler */ -#define MC_FATAL_TASK(n,name) \ - IDT_BASE_ENTRY_TG(0,MC_TSS,K_TASK_GATE) - -/* - * Error code has been pushed. Push trap number. - */ -#define EXCEP_ERR(n,name) \ - IDT_ENTRY(name,K_INTR_GATE) ;\ -Entry(name) ;\ - pushl $(n) ;\ - pusha ;\ - movl $(LO_ALLTRAPS),%ebx ;\ - jmp enter_lohandler - - -/* - * Interrupt. - */ -#define INTERRUPT(n) \ - IDT_ENTRY_INT(L_ ## n,K_INTR_GATE) ;\ - .align FALIGN ;\ -L_ ## n: ;\ - pushl $0 ;\ - pushl $(n) ;\ - pusha ;\ - movl $(LO_ALLINTRS),%ebx ;\ - jmp enter_lohandler - - - .data - .align 12 -Entry(master_idt) -Entry(hi_remap_data) - .text - .align 12 -Entry(hi_remap_text) - -EXCEPTION(0x00,t_zero_div) -EXCEP_SPC(0x01,hi_debug) -INTERRUPT(0x02) /* NMI */ -EXCEP_USR(0x03,t_int3) -EXCEP_USR(0x04,t_into) -EXCEP_USR(0x05,t_bounds) -EXCEPTION(0x06,t_invop) -EXCEPTION(0x07,t_nofpu) -DF_FATAL_TASK(0x08,df_task_start) -EXCEPTION(0x09,a_fpu_over) -EXCEPTION(0x0a,a_inv_tss) -EXCEP_SPC(0x0b,hi_segnp) -EXCEP_ERR(0x0c,t_stack_fault) -EXCEP_SPC(0x0d,hi_gen_prot) -EXCEP_SPC(0x0e,hi_page_fault) -EXCEPTION(0x0f,t_trap_0f) -EXCEPTION(0x10,t_fpu_err) -EXCEPTION(0x11,t_trap_11) -MC_FATAL_TASK(0x12,mc_task_start) -EXCEPTION(0x13,t_sse_err) -EXCEPTION(0x14,t_trap_14) -EXCEPTION(0x15,t_trap_15) -EXCEPTION(0x16,t_trap_16) -EXCEPTION(0x17,t_trap_17) -EXCEPTION(0x18,t_trap_18) -EXCEPTION(0x19,t_trap_19) -EXCEPTION(0x1a,t_trap_1a) -EXCEPTION(0x1b,t_trap_1b) -EXCEPTION(0x1c,t_trap_1c) -EXCEPTION(0x1d,t_trap_1d) -EXCEPTION(0x1e,t_trap_1e) -EXCEPTION(0x1f,t_trap_1f) - -INTERRUPT(0x20) -INTERRUPT(0x21) -INTERRUPT(0x22) -INTERRUPT(0x23) -INTERRUPT(0x24) -INTERRUPT(0x25) -INTERRUPT(0x26) -INTERRUPT(0x27) -INTERRUPT(0x28) -INTERRUPT(0x29) -INTERRUPT(0x2a) -INTERRUPT(0x2b) -INTERRUPT(0x2c) -INTERRUPT(0x2d) -INTERRUPT(0x2e) -INTERRUPT(0x2f) - -INTERRUPT(0x30) -INTERRUPT(0x31) -INTERRUPT(0x32) -INTERRUPT(0x33) -INTERRUPT(0x34) -INTERRUPT(0x35) -INTERRUPT(0x36) -INTERRUPT(0x37) -INTERRUPT(0x38) -INTERRUPT(0x39) -INTERRUPT(0x3a) -INTERRUPT(0x3b) -INTERRUPT(0x3c) -INTERRUPT(0x3d) -INTERRUPT(0x3e) -INTERRUPT(0x3f) - -INTERRUPT(0x40) -INTERRUPT(0x41) -INTERRUPT(0x42) -INTERRUPT(0x43) -INTERRUPT(0x44) -INTERRUPT(0x45) -INTERRUPT(0x46) -INTERRUPT(0x47) -INTERRUPT(0x48) -INTERRUPT(0x49) -INTERRUPT(0x4a) -INTERRUPT(0x4b) -INTERRUPT(0x4c) -INTERRUPT(0x4d) -INTERRUPT(0x4e) -INTERRUPT(0x4f) - -INTERRUPT(0x50) -INTERRUPT(0x51) -INTERRUPT(0x52) -INTERRUPT(0x53) -INTERRUPT(0x54) -INTERRUPT(0x55) -INTERRUPT(0x56) -INTERRUPT(0x57) -INTERRUPT(0x58) -INTERRUPT(0x59) -INTERRUPT(0x5a) -INTERRUPT(0x5b) -INTERRUPT(0x5c) -INTERRUPT(0x5d) -INTERRUPT(0x5e) -INTERRUPT(0x5f) - -INTERRUPT(0x60) -INTERRUPT(0x61) -INTERRUPT(0x62) -INTERRUPT(0x63) -INTERRUPT(0x64) -INTERRUPT(0x65) -INTERRUPT(0x66) -INTERRUPT(0x67) -INTERRUPT(0x68) -INTERRUPT(0x69) -INTERRUPT(0x6a) -INTERRUPT(0x6b) -INTERRUPT(0x6c) -INTERRUPT(0x6d) -INTERRUPT(0x6e) -INTERRUPT(0x6f) - -INTERRUPT(0x70) -INTERRUPT(0x71) -INTERRUPT(0x72) -INTERRUPT(0x73) -INTERRUPT(0x74) -INTERRUPT(0x75) -INTERRUPT(0x76) -INTERRUPT(0x77) -INTERRUPT(0x78) -INTERRUPT(0x79) -INTERRUPT(0x7a) -INTERRUPT(0x7b) -INTERRUPT(0x7c) -INTERRUPT(0x7d) -INTERRUPT(0x7e) -EXCEP_USR(0x7f, t_dtrace_ret) - -EXCEP_SPC_USR(0x80,hi_unix_scall) -EXCEP_SPC_USR(0x81,hi_mach_scall) -EXCEP_SPC_USR(0x82,hi_mdep_scall) -INTERRUPT(0x83) -INTERRUPT(0x84) -INTERRUPT(0x85) -INTERRUPT(0x86) -INTERRUPT(0x87) -INTERRUPT(0x88) -INTERRUPT(0x89) -INTERRUPT(0x8a) -INTERRUPT(0x8b) -INTERRUPT(0x8c) -INTERRUPT(0x8d) -INTERRUPT(0x8e) -INTERRUPT(0x8f) - -INTERRUPT(0x90) -INTERRUPT(0x91) -INTERRUPT(0x92) -INTERRUPT(0x93) -INTERRUPT(0x94) -INTERRUPT(0x95) -INTERRUPT(0x96) -INTERRUPT(0x97) -INTERRUPT(0x98) -INTERRUPT(0x99) -INTERRUPT(0x9a) -INTERRUPT(0x9b) -INTERRUPT(0x9c) -INTERRUPT(0x9d) -INTERRUPT(0x9e) -INTERRUPT(0x9f) - -INTERRUPT(0xa0) -INTERRUPT(0xa1) -INTERRUPT(0xa2) -INTERRUPT(0xa3) -INTERRUPT(0xa4) -INTERRUPT(0xa5) -INTERRUPT(0xa6) -INTERRUPT(0xa7) -INTERRUPT(0xa8) -INTERRUPT(0xa9) -INTERRUPT(0xaa) -INTERRUPT(0xab) -INTERRUPT(0xac) -INTERRUPT(0xad) -INTERRUPT(0xae) -INTERRUPT(0xaf) - -INTERRUPT(0xb0) -INTERRUPT(0xb1) -INTERRUPT(0xb2) -INTERRUPT(0xb3) -INTERRUPT(0xb4) -INTERRUPT(0xb5) -INTERRUPT(0xb6) -INTERRUPT(0xb7) -INTERRUPT(0xb8) -INTERRUPT(0xb9) -INTERRUPT(0xba) -INTERRUPT(0xbb) -INTERRUPT(0xbc) -INTERRUPT(0xbd) -INTERRUPT(0xbe) -INTERRUPT(0xbf) - -INTERRUPT(0xc0) -INTERRUPT(0xc1) -INTERRUPT(0xc2) -INTERRUPT(0xc3) -INTERRUPT(0xc4) -INTERRUPT(0xc5) -INTERRUPT(0xc6) -INTERRUPT(0xc7) -INTERRUPT(0xc8) -INTERRUPT(0xc9) -INTERRUPT(0xca) -INTERRUPT(0xcb) -INTERRUPT(0xcc) -INTERRUPT(0xcd) -INTERRUPT(0xce) -INTERRUPT(0xcf) - -INTERRUPT(0xd0) -INTERRUPT(0xd1) -INTERRUPT(0xd2) -INTERRUPT(0xd3) -INTERRUPT(0xd4) -INTERRUPT(0xd5) -INTERRUPT(0xd6) -INTERRUPT(0xd7) -INTERRUPT(0xd8) -INTERRUPT(0xd9) -INTERRUPT(0xda) -INTERRUPT(0xdb) -INTERRUPT(0xdc) -INTERRUPT(0xdd) -INTERRUPT(0xde) -INTERRUPT(0xdf) - -INTERRUPT(0xe0) -INTERRUPT(0xe1) -INTERRUPT(0xe2) -INTERRUPT(0xe3) -INTERRUPT(0xe4) -INTERRUPT(0xe5) -INTERRUPT(0xe6) -INTERRUPT(0xe7) -INTERRUPT(0xe8) -INTERRUPT(0xe9) -INTERRUPT(0xea) -INTERRUPT(0xeb) -INTERRUPT(0xec) -INTERRUPT(0xed) -INTERRUPT(0xee) -INTERRUPT(0xef) - -INTERRUPT(0xf0) -INTERRUPT(0xf1) -INTERRUPT(0xf2) -INTERRUPT(0xf3) -INTERRUPT(0xf4) -INTERRUPT(0xf5) -INTERRUPT(0xf6) -INTERRUPT(0xf7) -INTERRUPT(0xf8) -INTERRUPT(0xf9) -INTERRUPT(0xfa) -INTERRUPT(0xfb) -INTERRUPT(0xfc) -INTERRUPT(0xfd) -INTERRUPT(0xfe) -EXCEPTION(0xff,t_preempt) - - - .data -Entry(lo_kernel_cr3) - .long 0 - .long 0 - - .text - - -/* - * Trap/interrupt entry points. - * - * All traps must create the following save area on the PCB "stack": - * - * gs - * fs - * es - * ds - * edi - * esi - * ebp - * cr2 if page fault - otherwise unused - * ebx - * edx - * ecx - * eax - * trap number - * error code - * eip - * cs - * eflags - * user esp - if from user - * user ss - if from user - */ - -ret_to_kernel: - jmp *1f -1: .long HI_TEXT(hi_ret_to_kernel) - -ret_to_user: - jmp *1f -1: .long HI_TEXT(hi_ret_to_user) - -Entry(hi_ret_to_user) - movl %esp,%ebx - movl %gs:CPU_ACTIVE_THREAD,%ecx - subl TH_PCB_ISS(%ecx),%ebx - movl $(WINDOWS_CLEAN),TH_COPYIO_STATE(%ecx) - - movl TH_PCB_IDS(%ecx),%eax /* get debug state struct */ - cmpl $0,%eax /* is there a debug state */ - je 1f /* branch if not */ - movl DS_DR0(%eax), %ecx /* Load the 32 bit debug registers */ - movl %ecx, %db0 - movl DS_DR1(%eax), %ecx - movl %ecx, %db1 - movl DS_DR2(%eax), %ecx - movl %ecx, %db2 - movl DS_DR3(%eax), %ecx - movl %ecx, %db3 - movl DS_DR7(%eax), %eax -1: - addl %gs:CPU_HI_ISS,%ebx /* rebase PCB save area to high addr */ - movl %gs:CPU_TASK_CR3,%ecx - movl %ecx,%gs:CPU_ACTIVE_CR3 - movl %ebx,%esp /* switch to hi based PCB stack */ - movl %ecx,%cr3 /* switch to user's address space */ - - cmpl $0,%eax /* is dr7 set to something? */ - je 2f /* branch if not */ - movl %eax,%db7 /* Set dr7 */ -2: - -Entry(hi_ret_to_kernel) - - popl %eax /* ignore flavor of saved state */ -EXT(ret_popl_gs): - popl %gs /* restore segment registers */ -EXT(ret_popl_fs): - popl %fs -EXT(ret_popl_es): - popl %es -EXT(ret_popl_ds): - popl %ds - - popa /* restore general registers */ - addl $8,%esp /* discard trap number and error code */ - - cmpl $(SYSENTER_CS),4(%esp) /* test for fast entry/exit */ - je fast_exit -EXT(ret_iret): - iret /* return from interrupt */ -fast_exit: - popl %edx /* user return eip */ - popl %ecx /* pop and toss cs */ - andl $(~EFL_IF),(%esp) /* clear intrs enabled, see sti below */ - popf /* flags - carry denotes failure */ - popl %ecx /* user return esp */ - sti /* interrupts enabled after sysexit */ - sysexit - - -Entry(hi_unix_scall) - pushl %eax /* save system call number */ - pushl $0 /* clear trap number slot */ - pusha /* save the general registers */ - movl $(LO_UNIX_SCALL),%ebx - jmp enter_lohandler - - -Entry(hi_mach_scall) - pushl %eax /* save system call number */ - pushl $0 /* clear trap number slot */ - pusha /* save the general registers */ - movl $(LO_MACH_SCALL),%ebx - jmp enter_lohandler - - -Entry(hi_mdep_scall) - pushl %eax /* save system call number */ - pushl $0 /* clear trap number slot */ - pusha /* save the general registers */ - movl $(LO_MDEP_SCALL),%ebx - jmp enter_lohandler - - -/* - * sysenter entry point - * Requires user code to set up: - * edx: user instruction pointer (return address) - * ecx: user stack pointer - * on which is pushed stub ret addr and saved ebx - * Return to user-space is made using sysexit. - * Note: sysenter/sysexit cannot be used for calls returning a value in edx, - * or requiring ecx to be preserved. - */ -Entry(hi_sysenter) - movl (%esp), %esp /* switch from intr stack to pcb */ - /* - * Push values on to the PCB stack - * to cons up the saved state. - */ - pushl $(USER_DS) /* ss */ - pushl %ecx /* uesp */ - pushf /* flags */ - /* - * Clear, among others, the Nested Task (NT) flags bit; - * This is cleared by INT, but not by SYSENTER. - */ - pushl $0 - popfl - pushl $(SYSENTER_CS) /* cs */ -hi_sysenter_2: - pushl %edx /* eip */ - pushl %eax /* err/eax - syscall code */ - pushl $0 /* clear trap number slot */ - pusha /* save the general registers */ - orl $(EFL_IF),R32_EFLAGS-R32_EDI(%esp) /* (edi was last reg pushed) */ - movl $(LO_SYSENTER),%ebx -enter_lohandler: - pushl %ds - pushl %es - pushl %fs - pushl %gs - pushl $(SS_32) /* 32-bit state flavor */ -enter_lohandler1: - mov %ss,%eax - mov %eax,%ds - mov %eax,%fs - mov %eax,%es /* switch to kernel data seg */ - mov $(CPU_DATA_GS),%eax - mov %eax,%gs - cld /* clear direction flag */ - /* - * Switch to kernel's address space if necessary - */ - movl HI_DATA(lo_kernel_cr3),%ecx - movl %cr3,%eax - cmpl %eax,%ecx - je 1f - movl %ecx,%cr3 - movl %ecx,%gs:CPU_ACTIVE_CR3 -1: - testb $3,R32_CS(%esp) - jz 2f - movl %esp,%edx /* came from user mode */ - xor %ebp, %ebp - subl %gs:CPU_HI_ISS,%edx - movl %gs:CPU_ACTIVE_THREAD,%ecx - addl TH_PCB_ISS(%ecx),%edx /* rebase the high stack to a low address */ - movl %edx,%esp - cmpl $0, TH_PCB_IDS(%ecx) /* Is there a debug register state? */ - je 2f - movl $0, %ecx /* If so, reset DR7 (the control) */ - movl %ecx, %dr7 -2: - movl R32_TRAPNO(%esp),%ecx // Get the interrupt vector - addl $1,%gs:hwIntCnt(,%ecx,4) // Bump the count - jmp *%ebx - - -/* - * Page fault traps save cr2. - */ -Entry(hi_page_fault) - pushl $(T_PAGE_FAULT) /* mark a page fault trap */ - pusha /* save the general registers */ - movl %cr2,%eax /* get the faulting address */ - movl %eax,R32_CR2-R32_EDI(%esp)/* save in esp save slot */ - - movl $(LO_ALLTRAPS),%ebx - jmp enter_lohandler - - - -/* - * Debug trap. Check for single-stepping across system call into - * kernel. If this is the case, taking the debug trap has turned - * off single-stepping - save the flags register with the trace - * bit set. - */ -Entry(hi_debug) - testb $3,4(%esp) - jnz hi_debug_trap - /* trap came from kernel mode */ - cmpl $(HI_TEXT(hi_mach_scall)),(%esp) - jne 6f - addl $12,%esp /* remove eip/cs/eflags from debug_trap */ - jmp EXT(hi_mach_scall) /* continue system call entry */ -6: - cmpl $(HI_TEXT(hi_mdep_scall)),(%esp) - jne 5f - addl $12,%esp /* remove eip/cs/eflags from debug_trap */ - jmp EXT(hi_mdep_scall) /* continue system call entry */ -5: - cmpl $(HI_TEXT(hi_unix_scall)),(%esp) - jne 4f - addl $12,%esp /* remove eip/cs/eflags from debug_trap */ - jmp EXT(hi_unix_scall) /* continue system call entry */ -4: - cmpl $(HI_TEXT(hi_sysenter)),(%esp) - jne hi_debug_trap - /* - * eip/cs/flags have been pushed on intr stack - * We have to switch to pcb stack and copy eflags. - * Note: setting the cs selector to SYSENTER_TF_CS - * will cause the return to user path to take the iret path so - * that eflags (containing the trap bit) is set atomically. - * In unix_syscall this is tested so that we'll rewind the pc - * to account for with sysenter or int entry. - */ - addl $8,%esp /* remove eip/cs */ - pushl %ecx /* save %ecx */ - movl 8(%esp),%ecx /* top of intr stack -> pcb stack */ - xchgl %ecx,%esp /* switch to pcb stack */ - pushl $(USER_DS) /* ss */ - pushl %ss:(%ecx) /* %ecx into uesp slot */ - pushl %ss:4(%ecx) /* eflags */ - movl %ss:(%ecx),%ecx /* restore %ecx */ - pushl $(SYSENTER_TF_CS) /* cs - not SYSENTER_CS for iret path */ - jmp hi_sysenter_2 /* continue sysenter entry */ -hi_debug_trap: - pushl $0 - pushl $(T_DEBUG) /* handle as user trap */ - pusha /* save the general registers */ - movl $(LO_ALLTRAPS),%ebx - jmp enter_lohandler - - - -/* - * General protection or segment-not-present fault. - * Check for a GP/NP fault in the kernel_return - * sequence; if there, report it as a GP/NP fault on the user's instruction. - * - * esp-> 0: trap code (NP or GP) - * 4: segment number in error - * 8 eip - * 12 cs - * 16 eflags - * 20 old registers (trap is from kernel) - */ -Entry(hi_gen_prot) - pushl $(T_GENERAL_PROTECTION) /* indicate fault type */ - jmp trap_check_kernel_exit /* check for kernel exit sequence */ - -Entry(hi_segnp) - pushl $(T_SEGMENT_NOT_PRESENT) - /* indicate fault type */ -trap_check_kernel_exit: - testb $3,12(%esp) - jnz hi_take_trap - /* trap was from kernel mode, so */ - /* check for the kernel exit sequence */ - cmpl $(HI_TEXT(ret_iret)),8(%esp) /* on IRET? */ - je fault_iret - cmpl $(HI_TEXT(ret_popl_ds)),8(%esp) /* popping DS? */ - je fault_popl_ds - cmpl $(HI_TEXT(ret_popl_es)),8(%esp) /* popping ES? */ - je fault_popl_es - cmpl $(HI_TEXT(ret_popl_fs)),8(%esp) /* popping FS? */ - je fault_popl_fs - cmpl $(HI_TEXT(ret_popl_gs)),8(%esp) /* popping GS? */ - je fault_popl_gs -hi_take_trap: - pusha /* save the general registers */ - movl $(LO_ALLTRAPS),%ebx - jmp enter_lohandler - - -/* - * GP/NP fault on IRET: CS or SS is in error. - * All registers contain the user's values. - * - * on SP is - * 0 trap number - * 4 errcode - * 8 eip - * 12 cs --> trapno - * 16 efl --> errcode - * 20 user eip - * 24 user cs - * 28 user eflags - * 32 user esp - * 36 user ss - */ -fault_iret: - movl %eax,8(%esp) /* save eax (we don`t need saved eip) */ - popl %eax /* get trap number */ - movl %eax,12-4(%esp) /* put in user trap number */ - popl %eax /* get error code */ - movl %eax,16-8(%esp) /* put in user errcode */ - popl %eax /* restore eax */ - /* now treat as fault from user */ - pusha /* save the general registers */ - movl $(LO_ALLTRAPS),%ebx - jmp enter_lohandler - -/* - * Fault restoring a segment register. The user's registers are still - * saved on the stack. The offending segment register has not been - * popped. - */ -fault_popl_ds: - popl %eax /* get trap number */ - popl %edx /* get error code */ - addl $12,%esp /* pop stack to user regs */ - jmp push_es /* (DS on top of stack) */ -fault_popl_es: - popl %eax /* get trap number */ - popl %edx /* get error code */ - addl $12,%esp /* pop stack to user regs */ - jmp push_fs /* (ES on top of stack) */ -fault_popl_fs: - popl %eax /* get trap number */ - popl %edx /* get error code */ - addl $12,%esp /* pop stack to user regs */ - jmp push_gs /* (FS on top of stack) */ -fault_popl_gs: - popl %eax /* get trap number */ - popl %edx /* get error code */ - addl $12,%esp /* pop stack to user regs */ - jmp push_none /* (GS on top of stack) */ - -push_es: - pushl %es /* restore es, */ -push_fs: - pushl %fs /* restore fs, */ -push_gs: - pushl %gs /* restore gs. */ -push_none: - pushl $(SS_32) /* 32-bit state flavor */ - movl %eax,R32_TRAPNO(%esp) /* set trap number */ - movl %edx,R32_ERR(%esp) /* set error code */ - /* now treat as fault from user */ - /* except that segment registers are */ - /* already pushed */ - movl $(LO_ALLTRAPS),%ebx - jmp enter_lohandler1 - - - .text - - -Entry(hi_remap_etext) - - -/* - * All 32 bit task 'exceptions' enter lo_alltraps: - * esp -> x86_saved_state_t - * - * The rest of the state is set up as: - * cr3 -> kernel directory - * esp -> low based stack - * gs -> CPU_DATA_GS - * cs -> KERNEL32_CS - * ss/ds/es -> KERNEL_DS - * - * interrupts disabled - * direction flag cleared - */ -Entry(lo_alltraps32) - movl R32_CS(%esp),%eax /* assume 32-bit state */ - cmpl $(SS_64),SS_FLAVOR(%esp)/* 64-bit? */ - jne 1f - movl R64_CS(%esp),%eax /* 64-bit user mode */ -1: - testb $3,%al - jz trap_from_kernel - /* user mode trap */ - TIME_TRAP_UENTRY - - movl %gs:CPU_ACTIVE_THREAD,%ecx - movl TH_TASK(%ecx),%ebx - - /* Check for active vtimers in the current task */ - TASK_VTIMER_CHECK(%ebx, %ecx) - - movl %gs:CPU_KERNEL_STACK,%ebx - xchgl %ebx,%esp /* switch to kernel stack */ - - CCALL1(user_trap, %ebx) /* call user trap routine */ - /* user_trap() unmasks interrupts */ - cli /* hold off intrs - critical section */ - xorl %ecx,%ecx /* don't check if we're in the PFZ */ - -/* - * Return from trap or system call, checking for ASTs. - * On lowbase PCB stack with intrs disabled - */ -Entry(return_from_trap32) - movl %gs:CPU_ACTIVE_THREAD, %esp - movl TH_PCB_ISS(%esp), %esp /* switch back to PCB stack */ - movl %gs:CPU_PENDING_AST, %eax - testl %eax, %eax - je EXT(return_to_user) /* branch if no AST */ -LEXT(return_from_trap_with_ast) - movl %gs:CPU_KERNEL_STACK, %ebx - xchgl %ebx, %esp /* switch to kernel stack */ - - testl %ecx, %ecx /* see if we need to check for an EIP in the PFZ */ - je 2f /* no, go handle the AST */ - cmpl $(SS_64), SS_FLAVOR(%ebx) /* are we a 64-bit task? */ - je 1f - /* no... 32-bit user mode */ - movl R32_EIP(%ebx), %eax - pushl %ebx /* save PCB stack */ - xorl %ebp, %ebp /* clear frame pointer */ - CCALL1(commpage_is_in_pfz32, %eax) - popl %ebx /* retrieve pointer to PCB stack */ - testl %eax, %eax - je 2f /* not in the PFZ... go service AST */ - movl %eax, R32_EBX(%ebx) /* let the PFZ know we've pended an AST */ - xchgl %ebx, %esp /* switch back to PCB stack */ - jmp EXT(return_to_user) -1: /* 64-bit user mode */ - movl R64_RIP(%ebx), %ecx - movl R64_RIP+4(%ebx), %eax - pushl %ebx /* save PCB stack */ - xorl %ebp, %ebp /* clear frame pointer */ - CCALL2(commpage_is_in_pfz64, %ecx, %eax) - popl %ebx /* retrieve pointer to PCB stack */ - testl %eax, %eax - je 2f /* not in the PFZ... go service AST */ - movl %eax, R64_RBX(%ebx) /* let the PFZ know we've pended an AST */ - xchgl %ebx, %esp /* switch back to PCB stack */ - jmp EXT(return_to_user) -2: - sti /* interrupts always enabled on return to user mode */ - xorl %ebp, %ebp /* Clear framepointer */ - CCALL1(i386_astintr, $0) /* take the AST */ - cli - xorl %ecx, %ecx /* don't check if we're in the PFZ */ - jmp EXT(return_from_trap32) /* and check again (rare) */ - - -/* - * Trap from kernel mode. No need to switch stacks. - * Interrupts must be off here - we will set them to state at time of trap - * as soon as it's safe for us to do so and not recurse doing preemption - */ -trap_from_kernel: - movl %esp, %eax /* saved state addr */ - pushl R32_EIP(%esp) /* Simulate a CALL from fault point */ - pushl %ebp /* Extend framepointer chain */ - movl %esp, %ebp - CCALL1WITHSP(kernel_trap, %eax) /* Call kernel trap handler */ - popl %ebp - addl $4, %esp - cli - - movl %gs:CPU_PENDING_AST,%eax /* get pending asts */ - testl $ AST_URGENT,%eax /* any urgent preemption? */ - je ret_to_kernel /* no, nothing to do */ - cmpl $ T_PREEMPT,R32_TRAPNO(%esp) - je ret_to_kernel /* T_PREEMPT handled in kernel_trap() */ - testl $ EFL_IF,R32_EFLAGS(%esp) /* interrupts disabled? */ - je ret_to_kernel - cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */ - jne ret_to_kernel - movl %gs:CPU_KERNEL_STACK,%eax - movl %esp,%ecx - xorl %eax,%ecx - and EXT(kernel_stack_mask),%ecx - testl %ecx,%ecx /* are we on the kernel stack? */ - jne ret_to_kernel /* no, skip it */ - - CCALL1(i386_astintr, $1) /* take the AST */ - - jmp ret_to_kernel - - -/* - * All interrupts on all tasks enter here with: - * esp-> -> x86_saved_state_t - * - * cr3 -> kernel directory - * esp -> low based stack - * gs -> CPU_DATA_GS - * cs -> KERNEL32_CS - * ss/ds/es -> KERNEL_DS - * - * interrupts disabled - * direction flag cleared - */ -Entry(lo_allintrs32) - /* - * test whether already on interrupt stack - */ - movl %gs:CPU_INT_STACK_TOP,%ecx - cmpl %esp,%ecx - jb 1f - leal -INTSTACK_SIZE(%ecx),%edx - cmpl %esp,%edx - jb int_from_intstack -1: - xchgl %ecx,%esp /* switch to interrupt stack */ - - movl %cr0,%eax /* get cr0 */ - orl $(CR0_TS),%eax /* or in TS bit */ - movl %eax,%cr0 /* set cr0 */ - - subl $8, %esp /* for 16-byte stack alignment */ - pushl %ecx /* save pointer to old stack */ - movl %ecx,%gs:CPU_INT_STATE /* save intr state */ - - TIME_INT_ENTRY /* do timing */ - - movl %gs:CPU_ACTIVE_THREAD,%ecx - movl TH_TASK(%ecx),%ebx - - /* Check for active vtimers in the current task */ - TASK_VTIMER_CHECK(%ebx, %ecx) - - incl %gs:CPU_PREEMPTION_LEVEL - incl %gs:CPU_INTERRUPT_LEVEL - - movl %gs:CPU_INT_STATE, %eax - CCALL1(interrupt, %eax) /* call generic interrupt routine */ - - cli /* just in case we returned with intrs enabled */ - xorl %eax,%eax - movl %eax,%gs:CPU_INT_STATE /* clear intr state pointer */ - - decl %gs:CPU_INTERRUPT_LEVEL - decl %gs:CPU_PREEMPTION_LEVEL - - TIME_INT_EXIT /* do timing */ - - movl %gs:CPU_ACTIVE_THREAD,%eax - movl TH_PCB_FPS(%eax),%eax /* get pcb's ifps */ - testl %eax, %eax /* Is there a context */ - je 1f /* Branch if not */ - cmpl $0, FP_VALID(%eax) /* Check fp_valid */ - jne 1f /* Branch if valid */ - clts /* Clear TS */ - jmp 2f -1: - movl %cr0,%eax /* get cr0 */ - orl $(CR0_TS),%eax /* or in TS bit */ - movl %eax,%cr0 /* set cr0 */ -2: - popl %esp /* switch back to old stack */ - - /* Load interrupted code segment into %eax */ - movl R32_CS(%esp),%eax /* assume 32-bit state */ - cmpl $(SS_64),SS_FLAVOR(%esp)/* 64-bit? */ - jne 3f - movl R64_CS(%esp),%eax /* 64-bit user mode */ -3: - testb $3,%al /* user mode, */ - jnz ast_from_interrupt_user /* go handle potential ASTs */ - /* - * we only want to handle preemption requests if - * the interrupt fell in the kernel context - * and preemption isn't disabled - */ - movl %gs:CPU_PENDING_AST,%eax - testl $ AST_URGENT,%eax /* any urgent requests? */ - je ret_to_kernel /* no, nothing to do */ - - cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */ - jne ret_to_kernel /* yes, skip it */ - - movl %gs:CPU_KERNEL_STACK,%eax - movl %esp,%ecx - xorl %eax,%ecx - and EXT(kernel_stack_mask),%ecx - testl %ecx,%ecx /* are we on the kernel stack? */ - jne ret_to_kernel /* no, skip it */ - - /* - * Take an AST from kernel space. We don't need (and don't want) - * to do as much as the case where the interrupt came from user - * space. - */ - CCALL1(i386_astintr, $1) - - jmp ret_to_kernel - - -/* - * nested int - simple path, can't preempt etc on way out - */ -int_from_intstack: - incl %gs:CPU_PREEMPTION_LEVEL - incl %gs:CPU_INTERRUPT_LEVEL - - movl %esp, %edx /* x86_saved_state */ - CCALL1(interrupt, %edx) - - decl %gs:CPU_INTERRUPT_LEVEL - decl %gs:CPU_PREEMPTION_LEVEL - - jmp ret_to_kernel - -/* - * Take an AST from an interrupted user - */ -ast_from_interrupt_user: - movl %gs:CPU_PENDING_AST,%eax - testl %eax,%eax /* pending ASTs? */ - je ret_to_user /* no, nothing to do */ - - TIME_TRAP_UENTRY - - movl $1, %ecx /* check if we're in the PFZ */ - jmp EXT(return_from_trap_with_ast) /* return */ - - -/* - * 32bit Tasks - * System call entries via INTR_GATE or sysenter: - * - * esp -> x86_saved_state32_t - * cr3 -> kernel directory - * esp -> low based stack - * gs -> CPU_DATA_GS - * cs -> KERNEL32_CS - * ss/ds/es -> KERNEL_DS - * - * interrupts disabled - * direction flag cleared - */ - -Entry(lo_sysenter32) - /* - * We can be here either for a mach syscall or a unix syscall, - * as indicated by the sign of the code: - */ - movl R32_EAX(%esp),%eax - testl %eax,%eax - js EXT(lo_mach_scall32) /* < 0 => mach */ - /* > 0 => unix */ - -Entry(lo_unix_scall32) - TIME_TRAP_UENTRY - - movl %gs:CPU_KERNEL_STACK,%edi - xchgl %edi,%esp /* switch to kernel stack */ - movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */ - movl TH_TASK(%ecx),%ebx /* point to current task */ - incl TH_SYSCALLS_UNIX(%ecx) /* increment call count */ - - /* Check for active vtimers in the current task */ - TASK_VTIMER_CHECK(%ebx, %ecx) - - sti - - CCALL1(unix_syscall, %edi) - /* - * always returns through thread_exception_return - */ - - -Entry(lo_mach_scall32) - TIME_TRAP_UENTRY - - movl %gs:CPU_KERNEL_STACK,%edi - xchgl %edi,%esp /* switch to kernel stack */ - movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */ - movl TH_TASK(%ecx),%ebx /* point to current task */ - incl TH_SYSCALLS_MACH(%ecx) /* increment call count */ - - /* Check for active vtimers in the current task */ - TASK_VTIMER_CHECK(%ebx, %ecx) - - sti - - CCALL1(mach_call_munger, %edi) - /* - * always returns through thread_exception_return - */ - - -Entry(lo_mdep_scall32) - TIME_TRAP_UENTRY - - movl %gs:CPU_KERNEL_STACK,%edi - xchgl %edi,%esp /* switch to kernel stack */ - movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */ - movl TH_TASK(%ecx),%ebx /* point to current task */ - - /* Check for active vtimers in the current task */ - TASK_VTIMER_CHECK(%ebx, %ecx) - - sti - - CCALL1(machdep_syscall, %edi) - /* - * always returns through thread_exception_return - */ - - -LEXT(return_to_user) - TIME_TRAP_UEXIT - jmp ret_to_user - - -/* - * Double-fault exception handler task. The last gasp... - */ -Entry(df_task_start) - CCALL1(panic_double_fault32, $(T_DOUBLE_FAULT)) - hlt - - -/* - * machine-check handler task. The last gasp... - */ -Entry(mc_task_start) - CCALL1(panic_machine_check32, $(T_MACHINE_CHECK)) - hlt diff --git a/osfmk/i386/idt64.s b/osfmk/i386/idt64.s deleted file mode 100644 index fd488ebd9..000000000 --- a/osfmk/i386/idt64.s +++ /dev/null @@ -1,1701 +0,0 @@ -/* - * Copyright (c) 2010 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#include -#include -#include -#include -#include -#include -#define _ARCH_I386_ASM_HELP_H_ /* Prevent inclusion of user header */ -#include -#include -#include -#include - - -/* - * Low-memory compability-mode handlers. - */ -#define LO_ALLINTRS EXT(lo_allintrs) -#define LO_ALLTRAPS EXT(lo_alltraps) -#define LO_SYSCALL EXT(lo_syscall) -#define LO_UNIX_SCALL EXT(lo_unix_scall) -#define LO_MACH_SCALL EXT(lo_mach_scall) -#define LO_MDEP_SCALL EXT(lo_mdep_scall) -#define LO_DOUBLE_FAULT EXT(lo_df64) -#define LO_MACHINE_CHECK EXT(lo_mc64) - -/* - * Interrupt descriptor table and code vectors for it. - * - * The IDT64_BASE_ENTRY macro lays down a fake descriptor that must be - * reformatted ("fixed") before use. - * All vector are rebased in uber-space. - * Special vectors (e.g. double-fault) use a non-0 IST. - */ -#define IDT64_BASE_ENTRY(vec,seg,ist,type) \ - .data ;\ - .long vec ;\ - .long KERNEL_UBER_BASE_HI32 ;\ - .word seg ;\ - .byte ist*16 ;\ - .byte type ;\ - .long 0 ;\ - .text - -#define IDT64_ENTRY(vec,ist,type) \ - IDT64_BASE_ENTRY(EXT(vec),KERNEL64_CS,ist,type) -#define IDT64_ENTRY_LOCAL(vec,ist,type) \ - IDT64_BASE_ENTRY(vec,KERNEL64_CS,ist,type) - -/* - * Push trap number and address of compatibility mode handler, - * then branch to common trampoline. Error already pushed. - */ -#define EXCEP64_ERR(n,name) \ - IDT64_ENTRY(name,0,K_INTR_GATE) ;\ -Entry(name) ;\ - push $(LO_ALLTRAPS) ;\ - push $(n) ;\ - jmp L_enter_lohandler - - -/* - * Push error(0), trap number and address of compatibility mode handler, - * then branch to common trampoline. - */ -#define EXCEPTION64(n,name) \ - IDT64_ENTRY(name,0,K_INTR_GATE) ;\ -Entry(name) ;\ - push $0 ;\ - push $(LO_ALLTRAPS) ;\ - push $(n) ;\ - jmp L_enter_lohandler - - -/* - * Interrupt from user. - * Push error (0), trap number and address of compatibility mode handler, - * then branch to common trampoline. - */ -#define EXCEP64_USR(n,name) \ - IDT64_ENTRY(name,0,U_INTR_GATE) ;\ -Entry(name) ;\ - push $0 ;\ - push $(LO_ALLTRAPS) ;\ - push $(n) ;\ - jmp L_enter_lohandler - - -/* - * Special interrupt code from user. - */ -#define EXCEP64_SPC_USR(n,name) \ - IDT64_ENTRY(name,0,U_INTR_GATE) - - -/* - * Special interrupt code. - * In 64-bit mode we may use an IST slot instead of task gates. - */ -#define EXCEP64_IST(n,name,ist) \ - IDT64_ENTRY(name,ist,K_INTR_GATE) -#define EXCEP64_SPC(n,name) \ - IDT64_ENTRY(name,0,K_INTR_GATE) - - -/* - * Interrupt. - * Push zero err, interrupt vector and address of compatibility mode handler, - * then branch to common trampoline. - */ -#define INTERRUPT64(n) \ - IDT64_ENTRY_LOCAL(L_ ## n,0,K_INTR_GATE) ;\ - .align FALIGN ;\ -L_ ## n: ;\ - push $0 ;\ - push $(LO_ALLINTRS) ;\ - push $(n) ;\ - jmp L_enter_lohandler - - - .data - .align 12 -Entry(master_idt64) -Entry(hi64_data_base) - .text - .code64 -Entry(hi64_text_base) - -EXCEPTION64(0x00,t64_zero_div) -EXCEP64_SPC(0x01,hi64_debug) -INTERRUPT64(0x02) /* NMI */ -EXCEP64_USR(0x03,t64_int3) -EXCEP64_USR(0x04,t64_into) -EXCEP64_USR(0x05,t64_bounds) -EXCEPTION64(0x06,t64_invop) -EXCEPTION64(0x07,t64_nofpu) -EXCEP64_IST(0x08,hi64_double_fault,1) -EXCEPTION64(0x09,a64_fpu_over) -EXCEPTION64(0x0a,a64_inv_tss) -EXCEP64_SPC(0x0b,hi64_segnp) -EXCEP64_SPC(0x0c,hi64_stack_fault) -EXCEP64_SPC(0x0d,hi64_gen_prot) -EXCEP64_SPC(0x0e, hi64_page_fault) -EXCEPTION64(0x0f,t64_trap_0f) -EXCEPTION64(0x10,t64_fpu_err) -EXCEPTION64(0x11,t64_trap_11) -EXCEP64_IST(0x12,mc64,1) -EXCEPTION64(0x13,t64_sse_err) -EXCEPTION64(0x14,t64_trap_14) -EXCEPTION64(0x15,t64_trap_15) -EXCEPTION64(0x16,t64_trap_16) -EXCEPTION64(0x17,t64_trap_17) -EXCEPTION64(0x18,t64_trap_18) -EXCEPTION64(0x19,t64_trap_19) -EXCEPTION64(0x1a,t64_trap_1a) -EXCEPTION64(0x1b,t64_trap_1b) -EXCEPTION64(0x1c,t64_trap_1c) -EXCEPTION64(0x1d,t64_trap_1d) -EXCEPTION64(0x1e,t64_trap_1e) -EXCEPTION64(0x1f,t64_trap_1f) - -INTERRUPT64(0x20) -INTERRUPT64(0x21) -INTERRUPT64(0x22) -INTERRUPT64(0x23) -INTERRUPT64(0x24) -INTERRUPT64(0x25) -INTERRUPT64(0x26) -INTERRUPT64(0x27) -INTERRUPT64(0x28) -INTERRUPT64(0x29) -INTERRUPT64(0x2a) -INTERRUPT64(0x2b) -INTERRUPT64(0x2c) -INTERRUPT64(0x2d) -INTERRUPT64(0x2e) -INTERRUPT64(0x2f) - -INTERRUPT64(0x30) -INTERRUPT64(0x31) -INTERRUPT64(0x32) -INTERRUPT64(0x33) -INTERRUPT64(0x34) -INTERRUPT64(0x35) -INTERRUPT64(0x36) -INTERRUPT64(0x37) -INTERRUPT64(0x38) -INTERRUPT64(0x39) -INTERRUPT64(0x3a) -INTERRUPT64(0x3b) -INTERRUPT64(0x3c) -INTERRUPT64(0x3d) -INTERRUPT64(0x3e) -INTERRUPT64(0x3f) - -INTERRUPT64(0x40) -INTERRUPT64(0x41) -INTERRUPT64(0x42) -INTERRUPT64(0x43) -INTERRUPT64(0x44) -INTERRUPT64(0x45) -INTERRUPT64(0x46) -INTERRUPT64(0x47) -INTERRUPT64(0x48) -INTERRUPT64(0x49) -INTERRUPT64(0x4a) -INTERRUPT64(0x4b) -INTERRUPT64(0x4c) -INTERRUPT64(0x4d) -INTERRUPT64(0x4e) -INTERRUPT64(0x4f) - -INTERRUPT64(0x50) -INTERRUPT64(0x51) -INTERRUPT64(0x52) -INTERRUPT64(0x53) -INTERRUPT64(0x54) -INTERRUPT64(0x55) -INTERRUPT64(0x56) -INTERRUPT64(0x57) -INTERRUPT64(0x58) -INTERRUPT64(0x59) -INTERRUPT64(0x5a) -INTERRUPT64(0x5b) -INTERRUPT64(0x5c) -INTERRUPT64(0x5d) -INTERRUPT64(0x5e) -INTERRUPT64(0x5f) - -INTERRUPT64(0x60) -INTERRUPT64(0x61) -INTERRUPT64(0x62) -INTERRUPT64(0x63) -INTERRUPT64(0x64) -INTERRUPT64(0x65) -INTERRUPT64(0x66) -INTERRUPT64(0x67) -INTERRUPT64(0x68) -INTERRUPT64(0x69) -INTERRUPT64(0x6a) -INTERRUPT64(0x6b) -INTERRUPT64(0x6c) -INTERRUPT64(0x6d) -INTERRUPT64(0x6e) -INTERRUPT64(0x6f) - -INTERRUPT64(0x70) -INTERRUPT64(0x71) -INTERRUPT64(0x72) -INTERRUPT64(0x73) -INTERRUPT64(0x74) -INTERRUPT64(0x75) -INTERRUPT64(0x76) -INTERRUPT64(0x77) -INTERRUPT64(0x78) -INTERRUPT64(0x79) -INTERRUPT64(0x7a) -INTERRUPT64(0x7b) -INTERRUPT64(0x7c) -INTERRUPT64(0x7d) -INTERRUPT64(0x7e) -EXCEP64_USR(0x7f, t64_dtrace_ret) - -EXCEP64_SPC_USR(0x80,hi64_unix_scall) -EXCEP64_SPC_USR(0x81,hi64_mach_scall) -EXCEP64_SPC_USR(0x82,hi64_mdep_scall) -INTERRUPT64(0x83) -INTERRUPT64(0x84) -INTERRUPT64(0x85) -INTERRUPT64(0x86) -INTERRUPT64(0x87) -INTERRUPT64(0x88) -INTERRUPT64(0x89) -INTERRUPT64(0x8a) -INTERRUPT64(0x8b) -INTERRUPT64(0x8c) -INTERRUPT64(0x8d) -INTERRUPT64(0x8e) -INTERRUPT64(0x8f) - -INTERRUPT64(0x90) -INTERRUPT64(0x91) -INTERRUPT64(0x92) -INTERRUPT64(0x93) -INTERRUPT64(0x94) -INTERRUPT64(0x95) -INTERRUPT64(0x96) -INTERRUPT64(0x97) -INTERRUPT64(0x98) -INTERRUPT64(0x99) -INTERRUPT64(0x9a) -INTERRUPT64(0x9b) -INTERRUPT64(0x9c) -INTERRUPT64(0x9d) -INTERRUPT64(0x9e) -INTERRUPT64(0x9f) - -INTERRUPT64(0xa0) -INTERRUPT64(0xa1) -INTERRUPT64(0xa2) -INTERRUPT64(0xa3) -INTERRUPT64(0xa4) -INTERRUPT64(0xa5) -INTERRUPT64(0xa6) -INTERRUPT64(0xa7) -INTERRUPT64(0xa8) -INTERRUPT64(0xa9) -INTERRUPT64(0xaa) -INTERRUPT64(0xab) -INTERRUPT64(0xac) -INTERRUPT64(0xad) -INTERRUPT64(0xae) -INTERRUPT64(0xaf) - -INTERRUPT64(0xb0) -INTERRUPT64(0xb1) -INTERRUPT64(0xb2) -INTERRUPT64(0xb3) -INTERRUPT64(0xb4) -INTERRUPT64(0xb5) -INTERRUPT64(0xb6) -INTERRUPT64(0xb7) -INTERRUPT64(0xb8) -INTERRUPT64(0xb9) -INTERRUPT64(0xba) -INTERRUPT64(0xbb) -INTERRUPT64(0xbc) -INTERRUPT64(0xbd) -INTERRUPT64(0xbe) -INTERRUPT64(0xbf) - -INTERRUPT64(0xc0) -INTERRUPT64(0xc1) -INTERRUPT64(0xc2) -INTERRUPT64(0xc3) -INTERRUPT64(0xc4) -INTERRUPT64(0xc5) -INTERRUPT64(0xc6) -INTERRUPT64(0xc7) -INTERRUPT64(0xc8) -INTERRUPT64(0xc9) -INTERRUPT64(0xca) -INTERRUPT64(0xcb) -INTERRUPT64(0xcc) -INTERRUPT64(0xcd) -INTERRUPT64(0xce) -INTERRUPT64(0xcf) - -INTERRUPT64(0xd0) -INTERRUPT64(0xd1) -INTERRUPT64(0xd2) -INTERRUPT64(0xd3) -INTERRUPT64(0xd4) -INTERRUPT64(0xd5) -INTERRUPT64(0xd6) -INTERRUPT64(0xd7) -INTERRUPT64(0xd8) -INTERRUPT64(0xd9) -INTERRUPT64(0xda) -INTERRUPT64(0xdb) -INTERRUPT64(0xdc) -INTERRUPT64(0xdd) -INTERRUPT64(0xde) -INTERRUPT64(0xdf) - -INTERRUPT64(0xe0) -INTERRUPT64(0xe1) -INTERRUPT64(0xe2) -INTERRUPT64(0xe3) -INTERRUPT64(0xe4) -INTERRUPT64(0xe5) -INTERRUPT64(0xe6) -INTERRUPT64(0xe7) -INTERRUPT64(0xe8) -INTERRUPT64(0xe9) -INTERRUPT64(0xea) -INTERRUPT64(0xeb) -INTERRUPT64(0xec) -INTERRUPT64(0xed) -INTERRUPT64(0xee) -INTERRUPT64(0xef) - -INTERRUPT64(0xf0) -INTERRUPT64(0xf1) -INTERRUPT64(0xf2) -INTERRUPT64(0xf3) -INTERRUPT64(0xf4) -INTERRUPT64(0xf5) -INTERRUPT64(0xf6) -INTERRUPT64(0xf7) -INTERRUPT64(0xf8) -INTERRUPT64(0xf9) -INTERRUPT64(0xfa) -INTERRUPT64(0xfb) -INTERRUPT64(0xfc) -INTERRUPT64(0xfd) -INTERRUPT64(0xfe) -EXCEPTION64(0xff,t64_preempt) - - - .text -/* - * - * Trap/interrupt entry points. - * - * All traps must create the following 32-bit save area on the PCB "stack" - * - this is identical to the legacy mode 32-bit case: - * - * gs - * fs - * es - * ds - * edi - * esi - * ebp - * cr2 (defined only for page fault) - * ebx - * edx - * ecx - * eax - * trap number - * error code - * eip - * cs - * eflags - * user esp - if from user - * user ss - if from user - * - * Above this is the trap number and compatibility mode handler address - * (packed into an 8-byte stack entry) and the 64-bit interrupt stack frame: - * - * (trapno, trapfn) - * err - * rip - * cs - * rflags - * rsp - * ss - * - */ - - .code32 - -/* - * Control is passed here to return to the compatibility mode user. - * At this stage we're in kernel space in compatibility mode - * but we need to switch into 64-bit mode in the 4G-based trampoline - * space before performing the iret. - */ -ret_to_user: - movl %gs:CPU_ACTIVE_THREAD,%ecx - - movl TH_PCB_IDS(%ecx),%eax /* Obtain this thread's debug state */ - cmpl $0,%eax /* Is there a debug register context? */ - je 2f /* branch if not */ - cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP /* Are we a 32-bit task? */ - jne 1f - movl DS_DR0(%eax), %ecx /* If so, load the 32 bit DRs */ - movl %ecx, %db0 - movl DS_DR1(%eax), %ecx - movl %ecx, %db1 - movl DS_DR2(%eax), %ecx - movl %ecx, %db2 - movl DS_DR3(%eax), %ecx - movl %ecx, %db3 - movl DS_DR7(%eax), %ecx - movl %ecx, %gs:CPU_DR7 - movl $0, %gs:CPU_DR7 + 4 - jmp 2f -1: - ENTER_64BIT_MODE() /* Enter long mode */ - mov DS64_DR0(%eax), %rcx /* Load the full width DRs*/ - mov %rcx, %dr0 - mov DS64_DR1(%eax), %rcx - mov %rcx, %dr1 - mov DS64_DR2(%eax), %rcx - mov %rcx, %dr2 - mov DS64_DR3(%eax), %rcx - mov %rcx, %dr3 - mov DS64_DR7(%eax), %rcx - mov %rcx, %gs:CPU_DR7 - jmp 3f /* Enter uberspace */ -2: - ENTER_64BIT_MODE() -3: - ENTER_UBERSPACE() - - /* - * Now switch %cr3, if necessary. - */ - swapgs /* switch back to uber-kernel gs base */ - mov %gs:CPU_TASK_CR3,%rcx - mov %rcx,%gs:CPU_ACTIVE_CR3 - mov %cr3, %rax - cmp %rcx, %rax - je 1f - /* flag the copyio engine state as WINDOWS_CLEAN */ - mov %gs:CPU_ACTIVE_THREAD,%eax - movl $(WINDOWS_CLEAN),TH_COPYIO_STATE(%eax) - mov %rcx,%cr3 /* switch to user's address space */ -1: - - mov %gs:CPU_DR7, %rax /* Is there a debug control register?*/ - cmp $0, %rax - je 1f - mov %rax, %dr7 /* Set DR7 */ - movq $0, %gs:CPU_DR7 -1: - - /* - * Adjust stack to use uber-space. - */ - mov $(KERNEL_UBER_BASE_HI32), %rax - shl $32, %rsp - shrd $32, %rax, %rsp /* relocate into uber-space */ - - cmpl $(SS_32), SS_FLAVOR(%rsp) /* 32-bit state? */ - jne L_64bit_return - jmp L_32bit_return - -ret_to_kernel: - ENTER_64BIT_MODE() - ENTER_UBERSPACE() - - swapgs /* switch back to uber-kernel gs base */ - - /* - * Adjust stack to use uber-space. - */ - mov $(KERNEL_UBER_BASE_HI32), %rax - shl $32, %rsp - shrd $32, %rax, %rsp /* relocate into uber-space */ - - /* Check for return to 64-bit kernel space (EFI today) */ - cmpl $(SS_32), SS_FLAVOR(%rsp) /* 32-bit state? */ - jne L_64bit_return - /* fall through for 32-bit return */ - -L_32bit_return: - /* - * Restore registers into the machine state for iret. - */ - movl R32_EIP(%rsp), %eax - movl %eax, ISC32_RIP(%rsp) - movl R32_EFLAGS(%rsp), %eax - movl %eax, ISC32_RFLAGS(%rsp) - movl R32_CS(%rsp), %eax - movl %eax, ISC32_CS(%rsp) - movl R32_UESP(%rsp), %eax - movl %eax, ISC32_RSP(%rsp) - movl R32_SS(%rsp), %eax - movl %eax, ISC32_SS(%rsp) - - /* - * Restore general 32-bit registers - */ - movl R32_EAX(%rsp), %eax - movl R32_EBX(%rsp), %ebx - movl R32_ECX(%rsp), %ecx - movl R32_EDX(%rsp), %edx - movl R32_EBP(%rsp), %ebp - movl R32_ESI(%rsp), %esi - movl R32_EDI(%rsp), %edi - - /* - * Restore segment registers. We make take an exception here but - * we've got enough space left in the save frame area to absorb - * a hardware frame plus the trapfn and trapno - */ - swapgs -EXT(ret32_set_ds): - movw R32_DS(%rsp), %ds -EXT(ret32_set_es): - movw R32_ES(%rsp), %es -EXT(ret32_set_fs): - movw R32_FS(%rsp), %fs -EXT(ret32_set_gs): - movw R32_GS(%rsp), %gs - - add $(ISC32_OFFSET)+8+8+8, %rsp /* pop compat frame + - trapno, trapfn and error */ - cmpl $(SYSENTER_CS),ISF64_CS-8-8-8(%rsp) - /* test for fast entry/exit */ - je L_fast_exit -EXT(ret32_iret): - iretq /* return from interrupt */ - -L_fast_exit: - pop %rdx /* user return eip */ - pop %rcx /* pop and toss cs */ - andl $(~EFL_IF), (%rsp) /* clear interrupts enable, sti below */ - popf /* flags - carry denotes failure */ - pop %rcx /* user return esp */ - .code32 - sti /* interrupts enabled after sysexit */ - .byte 0x0f,0x35 /* 32-bit sysexit */ - .code64 - -L_64bit_return: - /* - * Set the GS Base MSR with the user's gs base. - */ - movl %gs:CPU_UBER_USER_GS_BASE, %eax - movl %gs:CPU_UBER_USER_GS_BASE+4, %edx - movl $(MSR_IA32_GS_BASE), %ecx - swapgs - testb $3, R64_CS(%rsp) /* returning to user-space? */ - jz 1f - wrmsr /* set 64-bit base */ -1: - - /* - * Restore general 64-bit registers - */ - mov R64_R15(%rsp), %r15 - mov R64_R14(%rsp), %r14 - mov R64_R13(%rsp), %r13 - mov R64_R12(%rsp), %r12 - mov R64_R11(%rsp), %r11 - mov R64_R10(%rsp), %r10 - mov R64_R9(%rsp), %r9 - mov R64_R8(%rsp), %r8 - mov R64_RSI(%rsp), %rsi - mov R64_RDI(%rsp), %rdi - mov R64_RBP(%rsp), %rbp - mov R64_RDX(%rsp), %rdx - mov R64_RBX(%rsp), %rbx - mov R64_RCX(%rsp), %rcx - mov R64_RAX(%rsp), %rax - - add $(ISS64_OFFSET)+8+8+8, %rsp /* pop saved state frame + - trapno, trapfn and error */ - cmpl $(SYSCALL_CS),ISF64_CS-8-8-8(%rsp) - /* test for fast entry/exit */ - je L_sysret -EXT(ret64_iret): - iretq /* return from interrupt */ - -L_sysret: - /* - * Here to load rcx/r11/rsp and perform the sysret back to user-space. - * rcx user rip - * r1 user rflags - * rsp user stack pointer - */ - mov ISF64_RIP-8-8-8(%rsp), %rcx - mov ISF64_RFLAGS-8-8-8(%rsp), %r11 - mov ISF64_RSP-8-8-8(%rsp), %rsp - sysretq /* return from system call */ - -/* - * Common path to enter locore handlers. - */ -L_enter_lohandler: - swapgs /* switch to kernel gs (cpu_data) */ -L_enter_lohandler_continue: - cmpl $(USER64_CS), ISF64_CS(%rsp) - je L_64bit_enter /* this is a 64-bit user task */ - cmpl $(KERNEL64_CS), ISF64_CS(%rsp) - je L_64bit_enter /* we're in 64-bit (EFI) code */ - jmp L_32bit_enter - -/* - * System call handlers. - * These are entered via a syscall interrupt. The system call number in %rax - * is saved to the error code slot in the stack frame. We then branch to the - * common state saving code. - */ - -Entry(hi64_unix_scall) - swapgs /* switch to kernel gs (cpu_data) */ -L_unix_scall_continue: - push %rax /* save system call number */ - push $(LO_UNIX_SCALL) - push $(UNIX_INT) - jmp L_32bit_enter_check - - -Entry(hi64_mach_scall) - swapgs /* switch to kernel gs (cpu_data) */ -L_mach_scall_continue: - push %rax /* save system call number */ - push $(LO_MACH_SCALL) - push $(MACH_INT) - jmp L_32bit_enter_check - - -Entry(hi64_mdep_scall) - swapgs /* switch to kernel gs (cpu_data) */ -L_mdep_scall_continue: - push %rax /* save system call number */ - push $(LO_MDEP_SCALL) - push $(MACHDEP_INT) - jmp L_32bit_enter_check - - -Entry(hi64_syscall) - swapgs /* Kapow! get per-cpu data area */ -L_syscall_continue: - mov %rsp, %gs:CPU_UBER_TMP /* save user stack */ - mov %gs:CPU_UBER_ISF, %rsp /* switch stack to pcb */ - - /* - * Save values in the ISF frame in the PCB - * to cons up the saved machine state. - */ - movl $(USER_DS), ISF64_SS(%rsp) - movl $(SYSCALL_CS), ISF64_CS(%rsp) /* cs - a pseudo-segment */ - mov %r11, ISF64_RFLAGS(%rsp) /* rflags */ - mov %rcx, ISF64_RIP(%rsp) /* rip */ - mov %gs:CPU_UBER_TMP, %rcx - mov %rcx, ISF64_RSP(%rsp) /* user stack */ - mov %rax, ISF64_ERR(%rsp) /* err/rax - syscall code */ - movl $(T_SYSCALL), ISF64_TRAPNO(%rsp) /* trapno */ - movl $(LO_SYSCALL), ISF64_TRAPFN(%rsp) - jmp L_64bit_enter /* this can only be a 64-bit task */ - - -L_32bit_enter_check: - /* - * Check we're not a confused 64-bit user. - */ - cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP - jne L_64bit_entry_reject - jmp L_32bit_enter -/* - * sysenter entry point - * Requires user code to set up: - * edx: user instruction pointer (return address) - * ecx: user stack pointer - * on which is pushed stub ret addr and saved ebx - * Return to user-space is made using sysexit. - * Note: sysenter/sysexit cannot be used for calls returning a value in edx, - * or requiring ecx to be preserved. - */ -Entry(hi64_sysenter) - mov (%rsp), %rsp /* switch from temporary stack to pcb */ - /* - * Push values on to the PCB stack - * to cons up the saved machine state. - */ - push $(USER_DS) /* ss */ - push %rcx /* uesp */ - pushf /* flags */ - /* - * Clear, among others, the Nested Task (NT) flags bit; - * this is zeroed by INT, but not by SYSENTER. - */ - push $0 - popf - push $(SYSENTER_CS) /* cs */ - swapgs /* switch to kernel gs (cpu_data) */ -L_sysenter_continue: - push %rdx /* eip */ - push %rax /* err/eax - syscall code */ - push $0 - push $(T_SYSENTER) - orl $(EFL_IF), ISF64_RFLAGS(%rsp) - movl $(LO_MACH_SCALL), ISF64_TRAPFN(%rsp) - testl %eax, %eax - js L_32bit_enter_check - movl $(LO_UNIX_SCALL), ISF64_TRAPFN(%rsp) - cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP - jne L_64bit_entry_reject -/* If the caller (typically LibSystem) has recorded the cumulative size of - * the arguments in EAX, copy them over from the user stack directly. - * We recover from exceptions inline--if the copy loop doesn't complete - * due to an exception, we fall back to copyin from compatibility mode. - * We can potentially extend this mechanism to mach traps as well (DRK). - */ -L_sysenter_copy_args: - testl $(I386_SYSCALL_ARG_BYTES_MASK), %eax - jz L_32bit_enter - xor %r9, %r9 - mov %gs:CPU_UBER_ARG_STORE, %r8 - movl %eax, %r9d - mov %gs:CPU_UBER_ARG_STORE_VALID, %r12 - xor %r10, %r10 - shrl $(I386_SYSCALL_ARG_DWORDS_SHIFT), %r9d - andl $(I386_SYSCALL_ARG_DWORDS_MASK), %r9d - movl $0, (%r12) -EXT(hi64_sysenter_user_arg_copy): -0: - movl 4(%rcx, %r10, 4), %r11d - movl %r11d, (%r8, %r10, 4) - incl %r10d - decl %r9d - jnz 0b - movl $1, (%r12) - /* Fall through to 32-bit handler */ - -L_32bit_enter: - cld - /* - * Make space for the compatibility save area. - */ - sub $(ISC32_OFFSET), %rsp - movl $(SS_32), SS_FLAVOR(%rsp) - - /* - * Save segment regs - */ - mov %ds, R32_DS(%rsp) - mov %es, R32_ES(%rsp) - mov %fs, R32_FS(%rsp) - mov %gs, R32_GS(%rsp) - - /* - * Save general 32-bit registers - */ - mov %eax, R32_EAX(%rsp) - mov %ebx, R32_EBX(%rsp) - mov %ecx, R32_ECX(%rsp) - mov %edx, R32_EDX(%rsp) - mov %ebp, R32_EBP(%rsp) - mov %esi, R32_ESI(%rsp) - mov %edi, R32_EDI(%rsp) - - /* Unconditionally save cr2; only meaningful on page faults */ - mov %cr2, %rax - mov %eax, R32_CR2(%rsp) - - /* - * Copy registers already saved in the machine state - * (in the interrupt stack frame) into the compat save area. - */ - mov ISC32_RIP(%rsp), %eax - mov %eax, R32_EIP(%rsp) - mov ISC32_RFLAGS(%rsp), %eax - mov %eax, R32_EFLAGS(%rsp) - mov ISC32_CS(%rsp), %eax - mov %eax, R32_CS(%rsp) - testb $3, %al - jz 1f - xor %ebp, %ebp -1: - mov ISC32_RSP(%rsp), %eax - mov %eax, R32_UESP(%rsp) - mov ISC32_SS(%rsp), %eax - mov %eax, R32_SS(%rsp) -L_32bit_enter_after_fault: - mov ISC32_TRAPNO(%rsp), %ebx /* %ebx := trapno for later */ - mov %ebx, R32_TRAPNO(%rsp) - mov ISC32_ERR(%rsp), %eax - mov %eax, R32_ERR(%rsp) - mov ISC32_TRAPFN(%rsp), %edx - -/* - * Common point to enter lo_handler in compatibilty mode: - * %ebx trapno - * %edx locore handler address - */ -L_enter_lohandler2: - /* - * Switch address space to kernel - * if not shared space and not already mapped. - * Note: cpu_task_map is valid only if cpu_task_cr3 is loaded in cr3. - */ - mov %cr3, %rax - mov %gs:CPU_TASK_CR3, %rcx - cmp %rax, %rcx /* is the task's cr3 loaded? */ - jne 1f - cmpl $(TASK_MAP_64BIT_SHARED), %gs:CPU_TASK_MAP - je 2f -1: - mov %gs:CPU_KERNEL_CR3, %rcx - cmp %rax, %rcx - je 2f - mov %rcx, %cr3 - mov %rcx, %gs:CPU_ACTIVE_CR3 -2: - movl %gs:CPU_ACTIVE_THREAD,%ecx /* Get the active thread */ - cmpl $0, TH_PCB_IDS(%ecx) /* Is there a debug register state? */ - jz 21f - xor %ecx, %ecx /* If so, reset DR7 (the control) */ - mov %rcx, %dr7 -21: - /* - * Switch to compatibility mode. - * Then establish kernel segments. - */ - swapgs /* Done with uber-kernel gs */ - ENTER_COMPAT_MODE() - - /* - * Now in compatibility mode and running in compatibility space - * prepare to enter the locore handler. - * %ebx trapno - * %edx lo_handler pointer - * Note: the stack pointer (now 32-bit) is now directly addressing the - * the kernel below 4G and therefore is automagically re-based. - */ - mov $(KERNEL_DS), %eax - mov %eax, %ss - mov %eax, %ds - mov %eax, %es - mov %eax, %fs - mov $(CPU_DATA_GS), %eax - mov %eax, %gs - - incl %gs:hwIntCnt(,%ebx,4) /* Bump the trap/intr count */ - - /* Dispatch the designated lo handler */ - jmp *%edx - - .code64 -L_64bit_entry_reject: - /* - * Here for a 64-bit user attempting an invalid kernel entry. - */ - movl $(LO_ALLTRAPS), ISF64_TRAPFN(%rsp) - movl $(T_INVALID_OPCODE), ISF64_TRAPNO(%rsp) - /* Fall through... */ - -L_64bit_enter: - /* - * Here for a 64-bit user task, or special 64-bit kernel code. - * Make space for the save area. - */ - sub $(ISS64_OFFSET), %rsp - movl $(SS_64), SS_FLAVOR(%rsp) - - cld - /* - * Save segment regs - */ - mov %fs, R64_FS(%rsp) - mov %gs, R64_GS(%rsp) - - /* Save general-purpose registers */ - mov %rax, R64_RAX(%rsp) - mov %rcx, R64_RCX(%rsp) - mov %rbx, R64_RBX(%rsp) - mov %rbp, R64_RBP(%rsp) - mov %r11, R64_R11(%rsp) - mov %r12, R64_R12(%rsp) - mov %r13, R64_R13(%rsp) - mov %r14, R64_R14(%rsp) - mov %r15, R64_R15(%rsp) - - /* cr2 is significant only for page-faults */ - mov %cr2, %rax - mov %rax, R64_CR2(%rsp) - - /* Other registers (which may contain syscall args) */ - mov %rdi, R64_RDI(%rsp) /* arg0 .. */ - mov %rsi, R64_RSI(%rsp) - mov %rdx, R64_RDX(%rsp) - mov %r10, R64_R10(%rsp) - mov %r8, R64_R8(%rsp) - mov %r9, R64_R9(%rsp) /* .. arg5 */ - -L_64bit_enter_after_fault: - /* - * At this point we're almost ready to join the common lo-entry code. - */ - mov R64_TRAPNO(%rsp), %ebx - mov R64_TRAPFN(%rsp), %edx - - testb $3, ISF64_CS+ISS64_OFFSET(%rsp) - jz 1f - xor %rbp, %rbp -1: - jmp L_enter_lohandler2 - -Entry(hi64_page_fault) - push $(LO_ALLTRAPS) - push $(T_PAGE_FAULT) - cmpl $(KERNEL_UBER_BASE_HI32), ISF64_RIP+4(%rsp) - jne L_enter_lohandler - cmpl $(EXT(hi64_sysenter_user_arg_copy)), ISF64_RIP(%rsp) - jne hi64_kernel_trap - mov ISF64_RSP(%rsp), %rsp - jmp L_32bit_enter - -/* - * Debug trap. Check for single-stepping across system call into - * kernel. If this is the case, taking the debug trap has turned - * off single-stepping - save the flags register with the trace - * bit set. - */ -Entry(hi64_debug) - swapgs /* set %gs for cpu data */ - push $0 /* error code */ - push $(LO_ALLTRAPS) - push $(T_DEBUG) - - testb $3, ISF64_CS(%rsp) - jnz L_enter_lohandler_continue - - /* - * trap came from kernel mode - */ - cmpl $(KERNEL_UBER_BASE_HI32), ISF64_RIP+4(%rsp) - jne L_enter_lohandler_continue /* trap not in uber-space */ - - cmpl $(EXT(hi64_mach_scall)), ISF64_RIP(%rsp) - jne 6f - add $(ISF64_SIZE),%rsp /* remove entire intr stack frame */ - jmp L_mach_scall_continue /* continue system call entry */ -6: - cmpl $(EXT(hi64_mdep_scall)), ISF64_RIP(%rsp) - jne 5f - add $(ISF64_SIZE),%rsp /* remove entire intr stack frame */ - jmp L_mdep_scall_continue /* continue system call entry */ -5: - cmpl $(EXT(hi64_unix_scall)), ISF64_RIP(%rsp) - jne 4f - add $(ISF64_SIZE),%rsp /* remove entire intr stack frame */ - jmp L_unix_scall_continue /* continue system call entry */ -4: - cmpl $(EXT(hi64_sysenter)), ISF64_RIP(%rsp) - jne L_enter_lohandler_continue - /* - * Interrupt stack frame has been pushed on the temporary stack. - * We have to switch to pcb stack and copy eflags. - */ - add $40,%rsp /* remove trapno/trapfn/err/rip/cs */ - push %rcx /* save %rcx - user stack pointer */ - mov 32(%rsp),%rcx /* top of intr stack -> pcb stack */ - xchg %rcx,%rsp /* switch to pcb stack */ - push $(USER_DS) /* ss */ - push (%rcx) /* saved %rcx into rsp slot */ - push 8(%rcx) /* rflags */ - mov (%rcx),%rcx /* restore %rcx */ - push $(SYSENTER_TF_CS) /* cs - not SYSENTER_CS for iret path */ - jmp L_sysenter_continue /* continue sysenter entry */ - - -Entry(hi64_double_fault) - swapgs /* set %gs for cpu data */ - push $(LO_DOUBLE_FAULT) - push $(T_DOUBLE_FAULT) - - cmpl $(KERNEL_UBER_BASE_HI32), ISF64_RIP+4(%rsp) - jne L_enter_lohandler_continue /* trap not in uber-space */ - - cmpl $(EXT(hi64_syscall)), ISF64_RIP(%rsp) - jne L_enter_lohandler_continue - - mov ISF64_RSP(%rsp), %rsp - jmp L_syscall_continue - - -/* - * General protection or segment-not-present fault. - * Check for a GP/NP fault in the kernel_return - * sequence; if there, report it as a GP/NP fault on the user's instruction. - * - * rsp-> 0 ISF64_TRAPNO: trap code (NP or GP) - * 8 ISF64_TRAPFN: trap function - * 16 ISF64_ERR: segment number in error (error code) - * 24 ISF64_RIP: rip - * 32 ISF64_CS: cs - * 40 ISF64_RFLAGS: rflags - * 48 ISF64_RSP: rsp - * 56 ISF64_SS: ss - * 64 old registers (trap is from kernel) - */ -Entry(hi64_gen_prot) - push $(LO_ALLTRAPS) - push $(T_GENERAL_PROTECTION) - jmp trap_check_kernel_exit /* check for kernel exit sequence */ - -Entry(hi64_stack_fault) - push $(LO_ALLTRAPS) - push $(T_STACK_FAULT) - jmp trap_check_kernel_exit /* check for kernel exit sequence */ - -Entry(hi64_segnp) - push $(LO_ALLTRAPS) - push $(T_SEGMENT_NOT_PRESENT) - /* indicate fault type */ -trap_check_kernel_exit: - testb $3,ISF64_CS(%rsp) - jnz L_enter_lohandler - /* trap was from kernel mode, so */ - /* check for the kernel exit sequence */ - cmpl $(KERNEL_UBER_BASE_HI32), ISF64_RIP+4(%rsp) - jne L_enter_lohandler_continue /* trap not in uber-space */ - - cmpl $(EXT(ret32_iret)), ISF64_RIP(%rsp) - je L_fault_iret32 - cmpl $(EXT(ret32_set_ds)), ISF64_RIP(%rsp) - je L_32bit_fault_set_seg - cmpl $(EXT(ret32_set_es)), ISF64_RIP(%rsp) - je L_32bit_fault_set_seg - cmpl $(EXT(ret32_set_fs)), ISF64_RIP(%rsp) - je L_32bit_fault_set_seg - cmpl $(EXT(ret32_set_gs)), ISF64_RIP(%rsp) - je L_32bit_fault_set_seg - - cmpl $(EXT(ret64_iret)), ISF64_RIP(%rsp) - je L_fault_iret64 - - cmpl $(EXT(hi64_sysenter_user_arg_copy)), ISF64_RIP(%rsp) - cmove ISF64_RSP(%rsp), %rsp - je L_32bit_enter - -hi64_kernel_trap: - /* - * Here after taking an unexpected trap from kernel mode - perhaps - * while running in the trampolines hereabouts. - * Make sure we're not on the PCB stack, if so move to the kernel stack. - * This is likely a fatal condition. - * But first, try to be sure we have the kernel gs base active... - */ - cmpq $0, %gs:CPU_THIS /* test gs_base */ - js 1f /* -ve kernel addr, no swap */ - swapgs /* +ve user addr, swap */ -1: - movq %rax, %gs:CPU_UBER_TMP /* save %rax */ - movq %gs:CPU_UBER_ISF, %rax /* PCB stack addr */ - subq %rsp, %rax - cmpq $(PAGE_SIZE), %rax /* current stack in PCB? */ - movq %gs:CPU_UBER_TMP, %rax /* restore %rax */ - ja L_enter_lohandler_continue /* stack not in PCB */ - - /* - * Here if %rsp is in the PCB - * Copy the interrupt stack frame from PCB stack to kernel stack - */ - movq %gs:CPU_KERNEL_STACK, %rax /* note: %rax restored below */ - xchgq %rax, %rsp - pushq ISF64_SS(%rax) - pushq ISF64_RSP(%rax) - pushq ISF64_RFLAGS(%rax) - pushq ISF64_CS(%rax) - pushq ISF64_RIP(%rax) - pushq ISF64_ERR(%rax) - pushq ISF64_TRAPFN(%rax) - pushq ISF64_TRAPNO(%rax) - movq %gs:CPU_UBER_TMP, %rax /* restore %rax */ - jmp L_enter_lohandler_continue - - -/* - * GP/NP fault on IRET: CS or SS is in error. - * All registers contain the user's values. - * - * on SP is - * 0 ISF64_TRAPNO: trap code (NP or GP) - * 8 ISF64_TRAPFN: trap function - * 16 ISF64_ERR: segment number in error (error code) - * 24 ISF64_RIP: rip - * 32 ISF64_CS: cs - * 40 ISF64_RFLAGS: rflags - * 48 ISF64_RSP: rsp - * 56 ISF64_SS: ss --> new new trapno/trapfn - * 64 pad --> new errcode - * 72 user rip - * 80 user cs - * 88 user rflags - * 96 user rsp - * 104 user ss (16-byte aligned) - */ -L_fault_iret32: - mov %rax, ISF64_RIP(%rsp) /* save rax (we don`t need saved rip) */ - mov ISF64_TRAPNO(%rsp), %rax - mov %rax, ISF64_SS(%rsp) /* put in user trap number */ - mov ISF64_ERR(%rsp), %rax - mov %rax, 8+ISF64_SS(%rsp) /* put in user errcode */ - mov ISF64_RIP(%rsp), %rax /* restore rax */ - add $(ISF64_SS), %rsp /* reset to original frame */ - /* now treat as fault from user */ - swapgs - jmp L_32bit_enter - -L_fault_iret64: - mov %rax, ISF64_RIP(%rsp) /* save rax (we don`t need saved rip) */ - mov ISF64_TRAPNO(%rsp), %rax - mov %rax, ISF64_SS(%rsp) /* put in user trap number */ - mov ISF64_ERR(%rsp), %rax - mov %rax, 8+ISF64_SS(%rsp) /* put in user errcode */ - mov ISF64_RIP(%rsp), %rax /* restore rax */ - add $(ISF64_SS), %rsp /* reset to original frame */ - /* now treat as fault from user */ - swapgs - jmp L_64bit_enter - -/* - * Fault restoring a segment register. All of the saved state is still - * on the stack untouched since we didn't move the stack pointer. - */ -L_32bit_fault_set_seg: - mov ISF64_TRAPNO(%rsp), %rax - mov ISF64_ERR(%rsp), %rdx - mov ISF64_RSP(%rsp), %rsp /* reload stack prior to fault */ - mov %rax,ISC32_TRAPNO(%rsp) - mov %rdx,ISC32_ERR(%rsp) - /* now treat as fault from user */ - /* except that all the state is */ - /* already saved - we just have to */ - /* move the trapno and error into */ - /* the compatibility frame */ - swapgs - jmp L_32bit_enter_after_fault - - -/* - * Fatal exception handlers: - */ -Entry(db_task_dbl_fault64) - push $(LO_DOUBLE_FAULT) - push $(T_DOUBLE_FAULT) - jmp L_enter_lohandler - -Entry(db_task_stk_fault64) - push $(LO_DOUBLE_FAULT) - push $(T_STACK_FAULT) - jmp L_enter_lohandler - -Entry(mc64) - push $(0) /* Error */ - push $(LO_MACHINE_CHECK) - push $(T_MACHINE_CHECK) - jmp L_enter_lohandler - - - .code32 - -/* - * All task 'exceptions' enter lo_alltraps: - * esp -> x86_saved_state_t - * - * The rest of the state is set up as: - * cr3 -> kernel directory - * esp -> low based stack - * gs -> CPU_DATA_GS - * cs -> KERNEL32_CS - * ss/ds/es -> KERNEL_DS - * - * interrupts disabled - * direction flag cleared - */ -Entry(lo_alltraps) - movl R32_CS(%esp),%eax /* assume 32-bit state */ - cmpl $(SS_64),SS_FLAVOR(%esp)/* 64-bit? */ - jne 1f - movl R64_CS(%esp),%eax /* 64-bit user mode */ -1: - testb $3,%al - jz trap_from_kernel - /* user mode trap */ - TIME_TRAP_UENTRY - - movl %gs:CPU_ACTIVE_THREAD,%ecx - movl TH_TASK(%ecx),%ebx - - /* Check for active vtimers in the current task */ - TASK_VTIMER_CHECK(%ebx, %ecx) - - movl %gs:CPU_KERNEL_STACK,%ebx - xchgl %ebx,%esp /* switch to kernel stack */ - - CCALL1(user_trap, %ebx) /* call user trap routine */ - /* user_trap() unmasks interrupts */ - cli /* hold off intrs - critical section */ - xorl %ecx,%ecx /* don't check if we're in the PFZ */ - -/* - * Return from trap or system call, checking for ASTs. - * On lowbase PCB stack with intrs disabled - */ -Entry(return_from_trap) - movl %gs:CPU_ACTIVE_THREAD, %esp - movl TH_PCB_ISS(%esp),%esp /* switch back to PCB stack */ - movl %gs:CPU_PENDING_AST, %eax - testl %eax, %eax - je return_to_user /* branch if no AST */ -LEXT(return_from_trap_with_ast) - movl %gs:CPU_KERNEL_STACK, %ebx - xchgl %ebx, %esp /* switch to kernel stack */ - - testl %ecx, %ecx /* see if we need to check for an EIP in the PFZ */ - je 2f /* no, go handle the AST */ - cmpl $(SS_64), SS_FLAVOR(%ebx) /* are we a 64-bit task? */ - je 1f - /* no... 32-bit user mode */ - movl R32_EIP(%ebx), %eax - pushl %ebx /* save PCB stack */ - xorl %ebp, %ebp /* clear frame pointer */ - CCALL1(commpage_is_in_pfz32, %eax) - popl %ebx /* retrieve pointer to PCB stack */ - testl %eax, %eax - je 2f /* not in the PFZ... go service AST */ - movl %eax, R32_EBX(%ebx) /* let the PFZ know we've pended an AST */ - xchgl %ebx, %esp /* switch back to PCB stack */ - jmp return_to_user -1: /* 64-bit user mode */ - movl R64_RIP(%ebx), %ecx - movl R64_RIP+4(%ebx), %eax - pushl %ebx /* save PCB stack */ - xorl %ebp, %ebp /* clear frame pointer */ - CCALL2(commpage_is_in_pfz64, %ecx, %eax) - popl %ebx /* retrieve pointer to PCB stack */ - testl %eax, %eax - je 2f /* not in the PFZ... go service AST */ - movl %eax, R64_RBX(%ebx) /* let the PFZ know we've pended an AST */ - xchgl %ebx, %esp /* switch back to PCB stack */ - jmp return_to_user -2: - sti /* interrupts always enabled on return to user mode */ - pushl %ebx /* save PCB stack */ - xorl %ebp, %ebp /* Clear framepointer */ - CCALL1(i386_astintr, $0) /* take the AST */ - cli - - popl %esp /* switch back to PCB stack (w/exc link) */ - - xorl %ecx, %ecx /* don't check if we're in the PFZ */ - jmp EXT(return_from_trap) /* and check again (rare) */ - - - -/* - * Trap from kernel mode. No need to switch stacks. - * Interrupts must be off here - we will set them to state at time of trap - * as soon as it's safe for us to do so and not recurse doing preemption - */ -trap_from_kernel: - movl %esp, %eax /* saved state addr */ - pushl R32_EIP(%esp) /* Simulate a CALL from fault point */ - pushl %ebp /* Extend framepointer chain */ - movl %esp, %ebp - CCALL1WITHSP(kernel_trap, %eax) /* Call kernel trap handler */ - popl %ebp - addl $4, %esp - cli - - movl %gs:CPU_PENDING_AST,%eax /* get pending asts */ - testl $ AST_URGENT,%eax /* any urgent preemption? */ - je ret_to_kernel /* no, nothing to do */ - cmpl $ T_PREEMPT,R32_TRAPNO(%esp) - je ret_to_kernel /* T_PREEMPT handled in kernel_trap() */ - testl $ EFL_IF,R32_EFLAGS(%esp) /* interrupts disabled? */ - je ret_to_kernel - cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */ - jne ret_to_kernel - movl %gs:CPU_KERNEL_STACK,%eax - movl %esp,%ecx - xorl %eax,%ecx - and EXT(kernel_stack_mask),%ecx - testl %ecx,%ecx /* are we on the kernel stack? */ - jne ret_to_kernel /* no, skip it */ - - CCALL1(i386_astintr, $1) /* take the AST */ - - -/* - * All interrupts on all tasks enter here with: - * esp-> -> x86_saved_state_t - * - * cr3 -> kernel directory - * esp -> low based stack - * gs -> CPU_DATA_GS - * cs -> KERNEL32_CS - * ss/ds/es -> KERNEL_DS - * - * interrupts disabled - * direction flag cleared - */ -Entry(lo_allintrs) - /* - * test whether already on interrupt stack - */ - movl %gs:CPU_INT_STACK_TOP,%ecx - cmpl %esp,%ecx - jb 1f - leal -INTSTACK_SIZE(%ecx),%edx - cmpl %esp,%edx - jb int_from_intstack -1: - xchgl %ecx,%esp /* switch to interrupt stack */ - - movl %cr0,%eax /* get cr0 */ - orl $(CR0_TS),%eax /* or in TS bit */ - movl %eax,%cr0 /* set cr0 */ - - subl $8, %esp /* for 16-byte stack alignment */ - pushl %ecx /* save pointer to old stack */ - movl %ecx,%gs:CPU_INT_STATE /* save intr state */ - - TIME_INT_ENTRY /* do timing */ - - movl %gs:CPU_ACTIVE_THREAD,%ecx - movl TH_TASK(%ecx),%ebx - - /* Check for active vtimers in the current task */ - TASK_VTIMER_CHECK(%ebx, %ecx) - - incl %gs:CPU_PREEMPTION_LEVEL - incl %gs:CPU_INTERRUPT_LEVEL - - movl %gs:CPU_INT_STATE, %eax - CCALL1(interrupt, %eax) /* call generic interrupt routine */ - - cli /* just in case we returned with intrs enabled */ - xorl %eax,%eax - movl %eax,%gs:CPU_INT_STATE /* clear intr state pointer */ - - decl %gs:CPU_INTERRUPT_LEVEL - decl %gs:CPU_PREEMPTION_LEVEL - - TIME_INT_EXIT /* do timing */ - - movl %gs:CPU_ACTIVE_THREAD,%eax - movl TH_PCB_FPS(%eax),%eax /* get pcb's ifps */ - testl %eax, %eax /* Is there a context */ - je 1f /* Branch if not */ - cmpl $0, FP_VALID(%eax) /* Check fp_valid */ - jne 1f /* Branch if valid */ - clts /* Clear TS */ - jmp 2f -1: - movl %cr0,%eax /* get cr0 */ - orl $(CR0_TS),%eax /* or in TS bit */ - movl %eax,%cr0 /* set cr0 */ -2: - popl %esp /* switch back to old stack */ - - /* Load interrupted code segment into %eax */ - movl R32_CS(%esp),%eax /* assume 32-bit state */ - cmpl $(SS_64),SS_FLAVOR(%esp)/* 64-bit? */ - jne 3f - movl R64_CS(%esp),%eax /* 64-bit user mode */ -3: - testb $3,%al /* user mode, */ - jnz ast_from_interrupt_user /* go handle potential ASTs */ - /* - * we only want to handle preemption requests if - * the interrupt fell in the kernel context - * and preemption isn't disabled - */ - movl %gs:CPU_PENDING_AST,%eax - testl $ AST_URGENT,%eax /* any urgent requests? */ - je ret_to_kernel /* no, nothing to do */ - - cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */ - jne ret_to_kernel /* yes, skip it */ - - movl %gs:CPU_KERNEL_STACK,%eax - movl %esp,%ecx - xorl %eax,%ecx - and EXT(kernel_stack_mask),%ecx - testl %ecx,%ecx /* are we on the kernel stack? */ - jne ret_to_kernel /* no, skip it */ - - /* - * Take an AST from kernel space. We don't need (and don't want) - * to do as much as the case where the interrupt came from user - * space. - */ - CCALL1(i386_astintr, $1) - - jmp ret_to_kernel - - -/* - * nested int - simple path, can't preempt etc on way out - */ -int_from_intstack: - incl %gs:CPU_PREEMPTION_LEVEL - incl %gs:CPU_INTERRUPT_LEVEL - incl %gs:CPU_NESTED_ISTACK - - movl %esp, %edx /* x86_saved_state */ - CCALL1(interrupt, %edx) - - decl %gs:CPU_INTERRUPT_LEVEL - decl %gs:CPU_PREEMPTION_LEVEL - decl %gs:CPU_NESTED_ISTACK - - jmp ret_to_kernel - -/* - * Take an AST from an interrupted user - */ -ast_from_interrupt_user: - movl %gs:CPU_PENDING_AST,%eax - testl %eax,%eax /* pending ASTs? */ - je ret_to_user /* no, nothing to do */ - - TIME_TRAP_UENTRY - - movl $1, %ecx /* check if we're in the PFZ */ - jmp EXT(return_from_trap_with_ast) /* return */ - - -/* - * 32bit Tasks - * System call entries via INTR_GATE or sysenter: - * - * esp -> x86_saved_state32_t - * cr3 -> kernel directory - * esp -> low based stack - * gs -> CPU_DATA_GS - * cs -> KERNEL32_CS - * ss/ds/es -> KERNEL_DS - * - * interrupts disabled - * direction flag cleared - */ - -Entry(lo_unix_scall) - TIME_TRAP_UENTRY - - movl %gs:CPU_KERNEL_STACK,%edi - xchgl %edi,%esp /* switch to kernel stack */ - movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */ - movl TH_TASK(%ecx),%ebx /* point to current task */ - incl TH_SYSCALLS_UNIX(%ecx) /* increment call count */ - - /* Check for active vtimers in the current task */ - TASK_VTIMER_CHECK(%ebx, %ecx) - - sti - - CCALL1(unix_syscall, %edi) - /* - * always returns through thread_exception_return - */ - - -Entry(lo_mach_scall) - TIME_TRAP_UENTRY - - movl %gs:CPU_KERNEL_STACK,%edi - xchgl %edi,%esp /* switch to kernel stack */ - movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */ - movl TH_TASK(%ecx),%ebx /* point to current task */ - incl TH_SYSCALLS_MACH(%ecx) /* increment call count */ - - /* Check for active vtimers in the current task */ - TASK_VTIMER_CHECK(%ebx, %ecx) - - sti - - CCALL1(mach_call_munger, %edi) - /* - * always returns through thread_exception_return - */ - - -Entry(lo_mdep_scall) - TIME_TRAP_UENTRY - - movl %gs:CPU_KERNEL_STACK,%edi - xchgl %edi,%esp /* switch to kernel stack */ - movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */ - movl TH_TASK(%ecx),%ebx /* point to current task */ - - /* Check for active vtimers in the current task */ - TASK_VTIMER_CHECK(%ebx, %ecx) - - sti - - CCALL1(machdep_syscall, %edi) - /* - * always returns through thread_exception_return - */ - -return_to_user: - TIME_TRAP_UEXIT - jmp ret_to_user - - -/* - * 64bit Tasks - * System call entries via syscall only: - * - * esp -> x86_saved_state64_t - * cr3 -> kernel directory - * esp -> low based stack - * gs -> CPU_DATA_GS - * cs -> KERNEL32_CS - * ss/ds/es -> KERNEL_DS - * - * interrupts disabled - * direction flag cleared - */ - -Entry(lo_syscall) - TIME_TRAP_UENTRY - - movl %gs:CPU_KERNEL_STACK,%edi - xchgl %edi,%esp /* switch to kernel stack */ - - movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */ - movl TH_TASK(%ecx),%ebx /* point to current task */ - - /* Check for active vtimers in the current task */ - TASK_VTIMER_CHECK(%ebx, %ecx) - - /* - * We can be here either for a mach, unix machdep or diag syscall, - * as indicated by the syscall class: - */ - movl R64_RAX(%edi), %eax /* syscall number/class */ - movl %eax, %edx - andl $(SYSCALL_CLASS_MASK), %edx /* syscall class */ - cmpl $(SYSCALL_CLASS_MACH< #include -#ifdef __i386__ -struct i386_tss master_ktss - __attribute__ ((section ("__DESC, master_ktss"))) - __attribute__ ((aligned (4096))) = { - 0, /* back link */ - 0, /* esp0 */ - KERNEL_DS, /* ss0 */ - 0, /* esp1 */ - 0, /* ss1 */ - 0, /* esp2 */ - 0, /* ss2 */ - 0, /* cr3 */ - 0, /* eip */ - 0, /* eflags */ - 0, /* eax */ - 0, /* ecx */ - 0, /* edx */ - 0, /* ebx */ - 0, /* esp */ - 0, /* ebp */ - 0, /* esi */ - 0, /* edi */ - 0, /* es */ - 0, /* cs */ - 0, /* ss */ - 0, /* ds */ - 0, /* fs */ - 0, /* gs */ - KERNEL_LDT, /* ldt */ - 0, /* trace_trap */ - 0x0FFF /* IO bitmap offset - - beyond end of TSS segment, - so no bitmap */ -}; -#endif /* * The transient stack for sysenter. @@ -111,86 +76,7 @@ struct sysenter_stack master_sstk __attribute__ ((section ("__DESC, master_sstk"))) __attribute__ ((aligned (16))) = { {0}, 0 }; -#ifdef X86_64 struct x86_64_tss master_ktss64 __attribute__ ((aligned (4096))) = { .io_bit_map_offset = 0x0FFF, }; -#endif /* X86_64 */ - -#ifdef __i386__ -/* - * Task structure for double-fault handler: - */ -struct i386_tss master_dftss - __attribute__ ((section ("__DESC, master_dftss"))) - __attribute__ ((aligned (4096))) = { - 0, /* back link */ - (int) &df_task_stack_end - 4, /* esp0 */ - KERNEL_DS, /* ss0 */ - 0, /* esp1 */ - 0, /* ss1 */ - 0, /* esp2 */ - 0, /* ss2 */ - (int) IdlePDPT, /* cr3 */ - (int) &df_task_start, /* eip */ - 0, /* eflags */ - 0, /* eax */ - 0, /* ecx */ - 0, /* edx */ - 0, /* ebx */ - (int) &df_task_stack_end - 4, /* esp */ - 0, /* ebp */ - 0, /* esi */ - 0, /* edi */ - KERNEL_DS, /* es */ - KERNEL32_CS, /* cs */ - KERNEL_DS, /* ss */ - KERNEL_DS, /* ds */ - KERNEL_DS, /* fs */ - CPU_DATA_GS, /* gs */ - KERNEL_LDT, /* ldt */ - 0, /* trace_trap */ - 0x0FFF /* IO bitmap offset - - beyond end of TSS segment, - so no bitmap */ -}; - - -/* - * Task structure for machine_check handler: - */ -struct i386_tss master_mctss - __attribute__ ((section ("__DESC, master_mctss"))) - __attribute__ ((aligned (4096))) = { - 0, /* back link */ - (int) &mc_task_stack_end - 4, /* esp0 */ - KERNEL_DS, /* ss0 */ - 0, /* esp1 */ - 0, /* ss1 */ - 0, /* esp2 */ - 0, /* ss2 */ - (int) IdlePDPT, /* cr3 */ - (int) &mc_task_start, /* eip */ - 0, /* eflags */ - 0, /* eax */ - 0, /* ecx */ - 0, /* edx */ - 0, /* ebx */ - (int) &mc_task_stack_end - 4, /* esp */ - 0, /* ebp */ - 0, /* esi */ - 0, /* edi */ - KERNEL_DS, /* es */ - KERNEL32_CS, /* cs */ - KERNEL_DS, /* ss */ - KERNEL_DS, /* ds */ - KERNEL_DS, /* fs */ - CPU_DATA_GS, /* gs */ - KERNEL_LDT, /* ldt */ - 0, /* trace_trap */ - 0x0FFF /* IO bitmap offset - - beyond end of TSS segment, - so no bitmap */ -}; -#endif /* __i386__ */ diff --git a/osfmk/i386/lapic.h b/osfmk/i386/lapic.h index 219332ae4..e83240931 100644 --- a/osfmk/i386/lapic.h +++ b/osfmk/i386/lapic.h @@ -301,6 +301,12 @@ static inline void lapic_set_timer_func(i386_intr_func_t func) { lapic_set_intr_func(LAPIC_VECTOR(TIMER), func); } +/* We don't support dynamic adjustment of the LAPIC timer base vector here + * it's effectively incompletely supported elsewhere as well. + */ +static inline void lapic_timer_swi(void) { + __asm__ __volatile__("int %0" :: "i"(LAPIC_DEFAULT_INTERRUPT_BASE + LAPIC_TIMER_INTERRUPT):"memory"); +} static inline void lapic_set_thermal_func(i386_intr_func_t func) { diff --git a/osfmk/i386/lapic_native.c b/osfmk/i386/lapic_native.c index 347b9e969..eda81384b 100644 --- a/osfmk/i386/lapic_native.c +++ b/osfmk/i386/lapic_native.c @@ -284,6 +284,7 @@ lapic_init(void) /* Set up the lapic_id <-> cpu_number map and add this boot processor */ lapic_cpu_map_init(); lapic_cpu_map((LAPIC_READ(ID)>>LAPIC_ID_SHIFT)&LAPIC_ID_MASK, 0); + current_cpu_datap()->cpu_phys_number = cpu_to_lapic[0]; kprintf("Boot cpu local APIC id 0x%x\n", cpu_to_lapic[0]); } @@ -443,6 +444,10 @@ lapic_probe(void) * Re-initialize cpu features info and re-check. */ cpuid_set_info(); + /* We expect this codepath will never be traversed + * due to EFI enabling the APIC. Reducing the APIC + * interrupt base dynamically is not supported. + */ if (cpuid_features() & CPUID_FEATURE_APIC) { printf("Local APIC discovered and enabled\n"); lapic_os_enabled = TRUE; diff --git a/osfmk/i386/ldt.c b/osfmk/i386/ldt.c index ff1facd34..4027c05f2 100644 --- a/osfmk/i386/ldt.c +++ b/osfmk/i386/ldt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -64,38 +64,6 @@ struct real_descriptor master_ldt[LDTSZ] __attribute__ ((aligned (4096))) = { -#ifdef __i386__ - [SEL_TO_INDEX(SYSENTER_CS)] = MAKE_REAL_DESCRIPTOR( /* kernel code (sysenter) */ - 0, - 0xfffff, - SZ_32|SZ_G, - ACC_P|ACC_PL_K|ACC_CODE_R - ), - [SEL_TO_INDEX(SYSENTER_DS)] = MAKE_REAL_DESCRIPTOR( /* kernel data (sysenter) */ - 0, - 0xfffff, - SZ_32|SZ_G, - ACC_P|ACC_PL_K|ACC_DATA_W - ), - [SEL_TO_INDEX(USER_CS)] = MAKE_REAL_DESCRIPTOR( /* user code segment */ - 0, - 0xfffff, - SZ_32|SZ_G, - ACC_P|ACC_PL_U|ACC_CODE_R - ), - [SEL_TO_INDEX(USER_DS)] = MAKE_REAL_DESCRIPTOR( /* user data segment */ - 0, - 0xfffff, - SZ_32|SZ_G, - ACC_P|ACC_PL_U|ACC_DATA_W - ), - [SEL_TO_INDEX(USER64_CS)] = MAKE_REAL_DESCRIPTOR( /* user 64-bit code segment */ - 0, - 0xfffff, - SZ_64|SZ_G, - ACC_P|ACC_PL_U|ACC_CODE_R - ), -#endif [SEL_TO_INDEX(USER_CTHREAD)] = MAKE_REAL_DESCRIPTOR( /* user cthread segment */ 0, 0xfffff, diff --git a/osfmk/i386/locks.h b/osfmk/i386/locks.h index 065dfecea..368b34141 100644 --- a/osfmk/i386/locks.h +++ b/osfmk/i386/locks.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2007 Apple Inc. All rights reserved. + * Copyright (c) 2004-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -40,6 +40,7 @@ extern unsigned int LcksOpts; #define enaLkDeb 0x00000001 /* Request debug in default attribute */ #define enaLkStat 0x00000002 /* Request statistic in default attribute */ +#define disLkRWPrio 0x00000004 /* Disable RW lock priority promotion */ #endif /* MACH_KERNEL_PRIVATE */ @@ -85,17 +86,13 @@ typedef struct _lck_mtx_ { }; uint32_t lck_mtxd_state; }; -#if defined(__x86_64__) /* Pad field used as a canary, initialized to ~0 */ uint32_t lck_mtxd_pad32; -#endif } lck_mtxd; struct { struct _lck_mtx_ext_ *lck_mtxi_ptr; uint32_t lck_mtxi_tag; -#if defined(__x86_64__) uint32_t lck_mtxi_pad32; -#endif } lck_mtxi; } lck_mtx_sw; } lck_mtx_t; @@ -129,9 +126,7 @@ extern void hw_lock_byte_unlock(volatile uint8_t *lock_byte); typedef struct { unsigned int type; -#ifdef __x86_64__ unsigned int pad4; -#endif vm_offset_t pc; vm_offset_t thread; } lck_mtx_deb_t; @@ -146,14 +141,10 @@ typedef struct _lck_mtx_ext_ { lck_mtx_t lck_mtx; struct _lck_grp_ *lck_mtx_grp; unsigned int lck_mtx_attr; -#ifdef __x86_64__ unsigned int lck_mtx_pad1; -#endif lck_mtx_deb_t lck_mtx_deb; uint64_t lck_mtx_stat; -#ifdef __x86_64__ unsigned int lck_mtx_pad2[2]; -#endif } lck_mtx_ext_t; #define LCK_MTX_ATTR_DEBUG 0x1 @@ -205,9 +196,7 @@ typedef struct _lck_rw_t_internal_ { * are in */ uint32_t lck_rw_pad8; -#ifdef __x86_64__ uint32_t lck_rw_pad12; -#endif } lck_rw_t; #pragma pack() @@ -229,9 +218,7 @@ typedef struct _lck_rw_t_internal_ { #pragma pack(1) typedef struct { uint32_t opaque[3]; -#ifdef __x86_64__ uint32_t opaque4; -#endif } lck_rw_t; #pragma pack() #else diff --git a/osfmk/i386/locks_i386.c b/osfmk/i386/locks_i386.c index a274e0e40..69b83f1c3 100644 --- a/osfmk/i386/locks_i386.c +++ b/osfmk/i386/locks_i386.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -199,6 +199,8 @@ lck_rw_type_t lck_rw_done_gen( lck_rw_t *lck, int prior_lock_state); +void lck_rw_clear_promotions_x86(thread_t thread); + /* * Routine: lck_spin_alloc_init */ @@ -870,6 +872,9 @@ lck_rw_destroy( { if (lck->lck_rw_tag == LCK_RW_TAG_DESTROYED) return; +#if MACH_LDEBUG + lck_rw_assert(lck, LCK_RW_ASSERT_NOTHELD); +#endif lck->lck_rw_tag = LCK_RW_TAG_DESTROYED; lck_grp_lckcnt_decr(grp, LCK_TYPE_RW); lck_grp_deallocate(grp); @@ -1179,6 +1184,20 @@ lck_rw_done_gen( { lck_rw_t *fake_lck; lck_rw_type_t lock_type; + thread_t thread = current_thread(); + uint32_t rwlock_count; + + /* Check if dropping the lock means that we need to unpromote */ + rwlock_count = thread->rwlock_count--; +#if MACH_LDEBUG + if (rwlock_count == 0) { + panic("rw lock count underflow for thread %p", thread); + } +#endif + if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) { + /* sched_flags checked without lock, but will be rechecked while clearing */ + lck_rw_clear_promotion(thread); + } /* * prior_lock state is a snapshot of the 1st word of the @@ -1395,6 +1414,20 @@ lck_rw_lock_shared_to_exclusive_failure( int prior_lock_state) { lck_rw_t *fake_lck; + thread_t thread = current_thread(); + uint32_t rwlock_count; + + /* Check if dropping the lock means that we need to unpromote */ + rwlock_count = thread->rwlock_count--; +#if MACH_LDEBUG + if (rwlock_count == 0) { + panic("rw lock count underflow for thread %p", thread); + } +#endif + if ((rwlock_count == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) { + /* sched_flags checked without lock, but will be rechecked while clearing */ + lck_rw_clear_promotion(thread); + } /* * prior_lock state is a snapshot of the 1st word of the @@ -1616,13 +1649,35 @@ lck_rw_assert( return; } break; + case LCK_RW_ASSERT_NOTHELD: + if (!(lck->lck_rw_want_write || + lck->lck_rw_want_upgrade || + lck->lck_rw_shared_count != 0)) { + return; + } + break; default: break; } - panic("rw lock (%p) not held (mode=%u), first word %08x\n", lck, type, *(uint32_t *)lck); + panic("rw lock (%p)%s held (mode=%u), first word %08x\n", lck, (type == LCK_RW_ASSERT_NOTHELD ? "" : " not"), type, *(uint32_t *)lck); +} + +/* On return to userspace, this routine is called if the rwlock_count is somehow imbalanced */ +void +lck_rw_clear_promotions_x86(thread_t thread) +{ +#if MACH_LDEBUG + /* It's fatal to leave a RW lock locked and return to userspace */ + panic("%u rw lock(s) held on return to userspace for thread %p", thread->rwlock_count, thread); +#else + /* Paper over the issue */ + thread->rwlock_count = 0; + lck_rw_clear_promotion(thread); +#endif } + #ifdef MUTEX_ZONE extern zone_t lck_mtx_zone; #endif @@ -1683,9 +1738,7 @@ lck_mtx_ext_init( lck->lck_mtx_attr |= LCK_MTX_ATTR_STAT; lck->lck_mtx.lck_mtx_is_ext = 1; -#if defined(__x86_64__) lck->lck_mtx.lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF; -#endif } /* @@ -1715,9 +1768,7 @@ lck_mtx_init( lck->lck_mtx_owner = 0; lck->lck_mtx_state = 0; } -#if defined(__x86_64__) lck->lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF; -#endif lck_grp_reference(grp); lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX); } @@ -1747,9 +1798,7 @@ lck_mtx_init_ext( lck->lck_mtx_owner = 0; lck->lck_mtx_state = 0; } -#if defined(__x86_64__) lck->lck_mtx_sw.lck_mtxd.lck_mtxd_pad32 = 0xFFFFFFFF; -#endif lck_grp_reference(grp); lck_grp_lckcnt_incr(grp, LCK_TYPE_MTX); @@ -1767,6 +1816,9 @@ lck_mtx_destroy( if (lck->lck_mtx_tag == LCK_MTX_TAG_DESTROYED) return; +#if MACH_LDEBUG + lck_mtx_assert(lck, LCK_MTX_ASSERT_NOTOWNED); +#endif lck_is_indirect = (lck->lck_mtx_tag == LCK_MTX_TAG_INDIRECT); lck_mtx_lock_mark_destroyed(lck); @@ -1815,7 +1867,6 @@ lck_mtx_unlock_wakeup_x86 ( mutex, fake_lck.lck_mtx_promoted, fake_lck.lck_mtx_waiters, fake_lck.lck_mtx_pri, 0); if (__probable(fake_lck.lck_mtx_waiters)) { - if (fake_lck.lck_mtx_waiters > 1) thread_wakeup_one_with_pri((event_t)(((unsigned int*)mutex)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)), fake_lck.lck_mtx_pri); else @@ -1898,9 +1949,11 @@ lck_mtx_lock_acquire_x86( s = splsched(); thread_lock(thread); - if (thread->sched_pri < priority) + if (thread->sched_pri < priority) { + /* Do not promote into the realtime priority band */ + assert(priority <= MAXPRI_KERNEL); set_sched_pri(thread, priority); - + } if (mutex->lck_mtx_promoted == 0) { mutex->lck_mtx_promoted = 1; @@ -2037,13 +2090,19 @@ lck_mtx_lock_wait_x86 ( if (priority < BASEPRI_DEFAULT) priority = BASEPRI_DEFAULT; + /* Do not promote into the realtime priority band */ + priority = MIN(priority, MAXPRI_KERNEL); + if (mutex->lck_mtx_waiters == 0 || priority > mutex->lck_mtx_pri) mutex->lck_mtx_pri = priority; mutex->lck_mtx_waiters++; if ( (holder = (thread_t)mutex->lck_mtx_owner) && holder->sched_pri < mutex->lck_mtx_pri ) { - + /* Assert that we're not altering the priority of a + * MAXPRI_KERNEL or RT prio band thread + */ + assert(holder->sched_pri < MAXPRI_KERNEL); s = splsched(); thread_lock(holder); diff --git a/osfmk/i386/locore.s b/osfmk/i386/locore.s deleted file mode 100644 index 911439764..000000000 --- a/osfmk/i386/locore.s +++ /dev/null @@ -1,464 +0,0 @@ -/* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991,1990 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -/* - * PTmap is recursive pagemap at top of virtual address space. - * Within PTmap, the page directory can be found (third indirection). -*/ - .globl _PTmap,_PTD,_PTDpde - .set _PTmap,(PTDPTDI << PDESHIFT) - .set _PTD,_PTmap + (PTDPTDI * NBPG) - .set _PTDpde,_PTD + (PTDPTDI * PDESIZE) - -#if __MACHO__ -/* Under Mach-O, etext is a variable which contains - * the last text address - */ -#define ETEXT_ADDR (EXT(etext)) -#else -/* Under ELF and other non-Mach-O formats, the address of - * etext represents the last text address - */ -#define ETEXT_ADDR $ EXT(etext) -#endif - - - .text -locore_start: - -/* - * Fault recovery. - */ - -#ifdef __MACHO__ -#define RECOVERY_SECTION .section __VECTORS, __recover -#else -#define RECOVERY_SECTION .text -#define RECOVERY_SECTION .text -#endif - -#define RECOVER_TABLE_START \ - .align 2 ; \ - .globl EXT(recover_table) ;\ -LEXT(recover_table) ;\ - .text - -#define RECOVER(addr) \ - .align 2; \ - .long 9f ;\ - .long addr ;\ - .text ;\ -9: - -#define RECOVER_TABLE_END \ - .align 2 ;\ - .globl EXT(recover_table_end) ;\ -LEXT(recover_table_end) ;\ - .long 0 /* workaround see comment below */ ;\ - .text ; - -/* TODO FIXME - * the .long 0 is to work around a linker bug (insert radar# here) - * basically recover_table_end has zero size and bumps up right against saved_esp in acpi_wakeup.s - * recover_table_end is in __RECOVER,__vectors and saved_esp is in __SLEEP,__data, but they're right next to each - * other and so the linker combines them and incorrectly relocates everything referencing recover_table_end to point - * into the SLEEP section - */ - -/* - * Allocate recovery and table. - */ - RECOVERY_SECTION - RECOVER_TABLE_START - - -/* - * Called as a function, makes the current thread - * return from the kernel as if from an exception. - * We will consult with DTrace if this is a - * newly created thread and we need to fire a probe. - */ - - .globl EXT(thread_exception_return) - .globl EXT(thread_bootstrap_return) -LEXT(thread_bootstrap_return) -#if CONFIG_DTRACE - call EXT(dtrace_thread_bootstrap) -#endif - -LEXT(thread_exception_return) - cli - xorl %ecx,%ecx /* don't check if in the PFZ */ - cmpl $0, %gs:CPU_IS64BIT - je EXT(return_from_trap32) - jmp EXT(return_from_trap) - - -/* - * Utility routines. - */ - -/* - * Copy from user/kernel address space. - * arg0: window offset or kernel address - * arg1: kernel address - * arg2: byte count - */ -Entry(copyinphys_user) - movl $(USER_WINDOW_SEL),%ecx /* user data segment access through kernel window */ - mov %cx,%ds - -Entry(copyinphys_kern) - movl $(PHYS_WINDOW_SEL),%ecx /* physical access through kernel window */ - mov %cx,%es - jmp copyin_common - -Entry(copyin_user) - movl $(USER_WINDOW_SEL),%ecx /* user data segment access through kernel window */ - mov %cx,%ds - -Entry(copyin_kern) - -copyin_common: - pushl %esi - pushl %edi /* save registers */ - - movl 8+S_ARG0,%esi /* get source - window offset or kernel address */ - movl 8+S_ARG1,%edi /* get destination - kernel address */ - movl 8+S_ARG2,%edx /* get count */ - - cld /* count up */ - movl %edx,%ecx /* move by longwords first */ - shrl $2,%ecx - RECOVERY_SECTION - RECOVER(copyin_fail) - rep - movsl /* move longwords */ - movl %edx,%ecx /* now move remaining bytes */ - andl $3,%ecx - RECOVERY_SECTION - RECOVER(copyin_fail) - rep - movsb - xorl %eax,%eax /* return 0 for success */ -copyin_ret: - mov %ss,%cx /* restore kernel data and extended segments */ - mov %cx,%ds - mov %cx,%es - - popl %edi /* restore registers */ - popl %esi - ret /* and return */ - -copyin_fail: - movl $(EFAULT),%eax /* return error for failure */ - jmp copyin_ret /* pop frame and return */ - - - -/* - * Copy string from user/kern address space. - * arg0: window offset or kernel address - * arg1: kernel address - * arg2: max byte count - * arg3: actual byte count (OUT) - */ -Entry(copyinstr_kern) - mov %ds,%cx - jmp copyinstr_common - -Entry(copyinstr_user) - movl $(USER_WINDOW_SEL),%ecx /* user data segment access through kernel window */ - -copyinstr_common: - mov %cx,%fs - - pushl %esi - pushl %edi /* save registers */ - - movl 8+S_ARG0,%esi /* get source - window offset or kernel address */ - movl 8+S_ARG1,%edi /* get destination - kernel address */ - movl 8+S_ARG2,%edx /* get count */ - - xorl %eax,%eax /* set to 0 here so that the high 24 bits */ - /* are 0 for the cmpl against 0 */ -2: - RECOVERY_SECTION - RECOVER(copystr_fail) /* copy bytes... */ - movb %fs:(%esi),%al - incl %esi - testl %edi,%edi /* if kernel address is ... */ - jz 3f /* not NULL */ - movb %al,(%edi) /* copy the byte */ - incl %edi -3: - testl %eax,%eax /* did we just stuff the 0-byte? */ - jz 4f /* yes, return 0 status already in %eax */ - decl %edx /* decrement #bytes left in buffer */ - jnz 2b /* buffer not full so copy in another byte */ - movl $(ENAMETOOLONG),%eax /* buffer full but no 0-byte: ENAMETOOLONG */ -4: - movl 8+S_ARG3,%edi /* get OUT len ptr */ - cmpl $0,%edi - jz copystr_ret /* if null, just return */ - subl 8+S_ARG0,%esi - movl %esi,(%edi) /* else set OUT arg to xfer len */ -copystr_ret: - popl %edi /* restore registers */ - popl %esi - ret /* and return */ - -copystr_fail: - movl $(EFAULT),%eax /* return error for failure */ - jmp copystr_ret /* pop frame and return */ - - -/* - * Copy to user/kern address space. - * arg0: kernel address - * arg1: window offset or kernel address - * arg2: byte count - */ -ENTRY(copyoutphys_user) - movl $(USER_WINDOW_SEL),%ecx /* user data segment access through kernel window */ - mov %cx,%es - -ENTRY(copyoutphys_kern) - movl $(PHYS_WINDOW_SEL),%ecx /* physical access through kernel window */ - mov %cx,%ds - jmp copyout_common - -ENTRY(copyout_user) - movl $(USER_WINDOW_SEL),%ecx /* user data segment access through kernel window */ - mov %cx,%es - -ENTRY(copyout_kern) - -copyout_common: - pushl %esi - pushl %edi /* save registers */ - - movl 8+S_ARG0,%esi /* get source - kernel address */ - movl 8+S_ARG1,%edi /* get destination - window offset or kernel address */ - movl 8+S_ARG2,%edx /* get count */ - - cld /* count up */ - movl %edx,%ecx /* move by longwords first */ - shrl $2,%ecx - RECOVERY_SECTION - RECOVER(copyout_fail) - rep - movsl - movl %edx,%ecx /* now move remaining bytes */ - andl $3,%ecx - RECOVERY_SECTION - RECOVER(copyout_fail) - rep - movsb /* move */ - xorl %eax,%eax /* return 0 for success */ -copyout_ret: - mov %ss,%cx /* restore kernel segment */ - mov %cx,%es - mov %cx,%ds - - popl %edi /* restore registers */ - popl %esi - ret /* and return */ - -copyout_fail: - movl $(EFAULT),%eax /* return error for failure */ - jmp copyout_ret /* pop frame and return */ - - -/* - * io register must not be used on slaves (no AT bus) - */ -#define ILL_ON_SLAVE - - -#if MACH_ASSERT - -#define ARG0 B_ARG0 -#define ARG1 B_ARG1 -#define ARG2 B_ARG2 -#define PUSH_FRAME FRAME -#define POP_FRAME EMARF - -#else /* MACH_ASSERT */ - -#define ARG0 S_ARG0 -#define ARG1 S_ARG1 -#define ARG2 S_ARG2 -#define PUSH_FRAME -#define POP_FRAME - -#endif /* MACH_ASSERT */ - - -/* - * int rdmsr_carefully(uint32_t msr, uint32_t *lo, uint32_t *hi) - */ -ENTRY(rdmsr_carefully) - movl S_ARG0, %ecx - RECOVERY_SECTION - RECOVER(rdmsr_fail) - rdmsr - movl S_ARG1, %ecx - movl %eax, (%ecx) - movl S_ARG2, %ecx - movl %edx, (%ecx) - movl $0, %eax - ret - -rdmsr_fail: - movl $1, %eax - ret - -/* - * Done with recovery table. - */ - RECOVERY_SECTION - RECOVER_TABLE_END - -/* - * ffs(mask) - */ -ENTRY(ffs) - bsfl S_ARG0, %eax - jz 0f - incl %eax - ret -0: xorl %eax, %eax - ret - -/* - * cpu_shutdown() - * Force reboot - */ - -null_idtr: - .word 0 - .long 0 - -Entry(cpu_shutdown) - lidt null_idtr /* disable the interrupt handler */ - xor %ecx,%ecx /* generate a divide by zero */ - div %ecx,%eax /* reboot now */ - ret /* this will "never" be executed */ - - -/* - * setbit(int bitno, int *s) - set bit in bit string - */ -ENTRY(setbit) - movl S_ARG0, %ecx /* bit number */ - movl S_ARG1, %eax /* address */ - btsl %ecx, (%eax) /* set bit */ - ret - -/* - * clrbit(int bitno, int *s) - clear bit in bit string - */ -ENTRY(clrbit) - movl S_ARG0, %ecx /* bit number */ - movl S_ARG1, %eax /* address */ - btrl %ecx, (%eax) /* clear bit */ - ret - -/* - * ffsbit(int *s) - find first set bit in bit string - */ -ENTRY(ffsbit) - movl S_ARG0, %ecx /* address */ - movl $0, %edx /* base offset */ -0: - bsfl (%ecx), %eax /* check argument bits */ - jnz 1f /* found bit, return */ - addl $4, %ecx /* increment address */ - addl $32, %edx /* increment offset */ - jmp 0b /* try again */ -1: - addl %edx, %eax /* return offset */ - ret - -/* - * testbit(int nr, volatile void *array) - * - * Test to see if the bit is set within the bit string - */ - -ENTRY(testbit) - movl S_ARG0,%eax /* Get the bit to test */ - movl S_ARG1,%ecx /* get the array string */ - btl %eax,(%ecx) - sbbl %eax,%eax - ret - diff --git a/osfmk/i386/loose_ends.c b/osfmk/i386/loose_ends.c deleted file mode 100644 index fe0a6aabd..000000000 --- a/osfmk/i386/loose_ends.c +++ /dev/null @@ -1,794 +0,0 @@ -/* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991,1990,1989 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - */ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#if 0 - -#undef KERNEL_DEBUG -#define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT -#define KDEBUG 1 - -#endif - -/* XXX - should be gone from here */ -extern void invalidate_icache64(addr64_t addr, unsigned cnt, int phys); -extern void flush_dcache64(addr64_t addr, unsigned count, int phys); -extern boolean_t phys_page_exists(ppnum_t); -extern void bcopy_no_overwrite(const char *from, char *to,vm_size_t bytes); -extern void pmap_set_reference(ppnum_t pn); -extern void mapping_set_mod(ppnum_t pa); -extern void mapping_set_ref(ppnum_t pn); - -extern void ovbcopy(const char *from, - char *to, - vm_size_t nbytes); -void machine_callstack(natural_t *buf, vm_size_t callstack_max); - - -#define value_64bit(value) ((value) & 0xFFFFFFFF00000000LL) -#define low32(x) ((unsigned int)((x) & 0x00000000FFFFFFFFLL)) - -#define JOE_DEBUG 0 - -void -bzero_phys_nc( - addr64_t src64, - uint32_t bytes) -{ - bzero_phys(src64,bytes); -} - -void -bzero_phys( - addr64_t src64, - uint32_t bytes) -{ - mapwindow_t *map; - - mp_disable_preemption(); - - map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | INTEL_PTE_RW | ((pmap_paddr_t)src64 & PG_FRAME) | INTEL_PTE_REF | INTEL_PTE_MOD)); - - bzero((void *)((uintptr_t)map->prv_CADDR | ((uint32_t)src64 & INTEL_OFFMASK)), bytes); - - pmap_put_mapwindow(map); - - mp_enable_preemption(); -} - - -/* - * bcopy_phys - like bcopy but copies from/to physical addresses. - */ - -void -bcopy_phys( - addr64_t src64, - addr64_t dst64, - vm_size_t bytes) -{ - mapwindow_t *src_map, *dst_map; - - /* ensure we stay within a page */ - if ( ((((uint32_t)src64 & (NBPG-1)) + bytes) > NBPG) || ((((uint32_t)dst64 & (NBPG-1)) + bytes) > NBPG) ) { - panic("bcopy_phys alignment"); - } - mp_disable_preemption(); - - src_map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | ((pmap_paddr_t)src64 & PG_FRAME) | INTEL_PTE_REF)); - dst_map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | INTEL_PTE_RW | ((pmap_paddr_t)dst64 & PG_FRAME) | - INTEL_PTE_REF | INTEL_PTE_MOD)); - - bcopy((void *) ((uintptr_t)src_map->prv_CADDR | ((uint32_t)src64 & INTEL_OFFMASK)), - (void *) ((uintptr_t)dst_map->prv_CADDR | ((uint32_t)dst64 & INTEL_OFFMASK)), bytes); - - pmap_put_mapwindow(src_map); - pmap_put_mapwindow(dst_map); - - mp_enable_preemption(); -} - -/* - * allow a function to get a quick virtual mapping of a physical page - */ - -int -apply_func_phys( - addr64_t dst64, - vm_size_t bytes, - int (*func)(void * buffer, vm_size_t bytes, void * arg), - void * arg) -{ - mapwindow_t *dst_map; - int rc = -1; - - /* ensure we stay within a page */ - if ( ((((uint32_t)dst64 & (NBPG-1)) + bytes) > NBPG) ) { - panic("apply_func_phys alignment"); - } - mp_disable_preemption(); - - dst_map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | INTEL_PTE_RW | ((pmap_paddr_t)dst64 & PG_FRAME) | - INTEL_PTE_REF | INTEL_PTE_MOD)); - - rc = func((void *)((uintptr_t)dst_map->prv_CADDR | ((uint32_t)dst64 & INTEL_OFFMASK)), bytes, arg); - - pmap_put_mapwindow(dst_map); - - mp_enable_preemption(); - - return rc; -} - -/* - * ovbcopy - like bcopy, but recognizes overlapping ranges and handles - * them correctly. - */ - -void -ovbcopy( - const char *from, - char *to, - vm_size_t bytes) /* num bytes to copy */ -{ - /* Assume that bcopy copies left-to-right (low addr first). */ - if (from + bytes <= to || to + bytes <= from || to == from) - bcopy_no_overwrite(from, to, bytes); /* non-overlapping or no-op*/ - else if (from > to) - bcopy_no_overwrite(from, to, bytes); /* overlapping but OK */ - else { - /* to > from: overlapping, and must copy right-to-left. */ - from += bytes - 1; - to += bytes - 1; - while (bytes-- > 0) - *to-- = *from--; - } -} - - -/* - * Read data from a physical address. - */ - - -static unsigned int -ml_phys_read_data(pmap_paddr_t paddr, int size ) -{ - mapwindow_t *map; - unsigned int result; - - mp_disable_preemption(); - - map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | (paddr & PG_FRAME) | INTEL_PTE_REF)); - - switch (size) { - unsigned char s1; - unsigned short s2; - case 1: - s1 = *(unsigned char *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK)); - result = s1; - break; - case 2: - s2 = *(unsigned short *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK)); - result = s2; - break; - case 4: - default: - result = *(unsigned int *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK)); - break; - } - pmap_put_mapwindow(map); - - mp_enable_preemption(); - - return result; -} - -static unsigned long long -ml_phys_read_long_long(pmap_paddr_t paddr ) -{ - mapwindow_t *map; - unsigned long long result; - - mp_disable_preemption(); - - map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | (paddr & PG_FRAME) | INTEL_PTE_REF)); - - result = *(unsigned long long *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK)); - - pmap_put_mapwindow(map); - - mp_enable_preemption(); - - return result; -} - -unsigned int ml_phys_read( vm_offset_t paddr) -{ - return ml_phys_read_data((pmap_paddr_t)paddr, 4); -} - -unsigned int ml_phys_read_word(vm_offset_t paddr) { - - return ml_phys_read_data((pmap_paddr_t)paddr, 4); -} - -unsigned int ml_phys_read_64(addr64_t paddr64) -{ - return ml_phys_read_data((pmap_paddr_t)paddr64, 4); -} - -unsigned int ml_phys_read_word_64(addr64_t paddr64) -{ - return ml_phys_read_data((pmap_paddr_t)paddr64, 4); -} - -unsigned int ml_phys_read_half(vm_offset_t paddr) -{ - return ml_phys_read_data((pmap_paddr_t)paddr, 2); -} - -unsigned int ml_phys_read_half_64(addr64_t paddr64) -{ - return ml_phys_read_data((pmap_paddr_t)paddr64, 2); -} - -unsigned int ml_phys_read_byte(vm_offset_t paddr) -{ - return ml_phys_read_data((pmap_paddr_t)paddr, 1); -} - -unsigned int ml_phys_read_byte_64(addr64_t paddr64) -{ - return ml_phys_read_data((pmap_paddr_t)paddr64, 1); -} - -unsigned long long ml_phys_read_double(vm_offset_t paddr) -{ - return ml_phys_read_long_long((pmap_paddr_t)paddr); -} - -unsigned long long ml_phys_read_double_64(addr64_t paddr64) -{ - return ml_phys_read_long_long((pmap_paddr_t)paddr64); -} - - - -/* - * Write data to a physical address. - */ - -static void -ml_phys_write_data(pmap_paddr_t paddr, unsigned long data, int size) -{ - mapwindow_t *map; - - mp_disable_preemption(); - - map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | INTEL_PTE_RW | (paddr & PG_FRAME) | - INTEL_PTE_REF | INTEL_PTE_MOD)); - - switch (size) { - case 1: - *(unsigned char *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK)) = (unsigned char)data; - break; - case 2: - *(unsigned short *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK)) = (unsigned short)data; - break; - case 4: - default: - *(unsigned int *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK)) = (uint32_t)data; - break; - } - pmap_put_mapwindow(map); - - mp_enable_preemption(); -} - -static void -ml_phys_write_long_long(pmap_paddr_t paddr, unsigned long long data) -{ - mapwindow_t *map; - - mp_disable_preemption(); - - map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | INTEL_PTE_RW | (paddr & PG_FRAME) | - INTEL_PTE_REF | INTEL_PTE_MOD)); - - *(unsigned long long *)((uintptr_t)map->prv_CADDR | ((uint32_t)paddr & INTEL_OFFMASK)) = data; - - pmap_put_mapwindow(map); - - mp_enable_preemption(); -} - - - -void ml_phys_write_byte(vm_offset_t paddr, unsigned int data) -{ - ml_phys_write_data((pmap_paddr_t)paddr, data, 1); -} - -void ml_phys_write_byte_64(addr64_t paddr64, unsigned int data) -{ - ml_phys_write_data((pmap_paddr_t)paddr64, data, 1); -} - -void ml_phys_write_half(vm_offset_t paddr, unsigned int data) -{ - ml_phys_write_data((pmap_paddr_t)paddr, data, 2); -} - -void ml_phys_write_half_64(addr64_t paddr64, unsigned int data) -{ - ml_phys_write_data((pmap_paddr_t)paddr64, data, 2); -} - -void ml_phys_write(vm_offset_t paddr, unsigned int data) -{ - ml_phys_write_data((pmap_paddr_t)paddr, data, 4); -} - -void ml_phys_write_64(addr64_t paddr64, unsigned int data) -{ - ml_phys_write_data((pmap_paddr_t)paddr64, data, 4); -} - -void ml_phys_write_word(vm_offset_t paddr, unsigned int data) -{ - ml_phys_write_data((pmap_paddr_t)paddr, data, 4); -} - -void ml_phys_write_word_64(addr64_t paddr64, unsigned int data) -{ - ml_phys_write_data((pmap_paddr_t)paddr64, data, 4); -} - -void ml_phys_write_double(vm_offset_t paddr, unsigned long long data) -{ - ml_phys_write_long_long((pmap_paddr_t)paddr, data); -} - -void ml_phys_write_double_64(addr64_t paddr64, unsigned long long data) -{ - ml_phys_write_long_long((pmap_paddr_t)paddr64, data); -} - - -/* PCI config cycle probing - * - * - * Read the memory location at physical address paddr. - * This is a part of a device probe, so there is a good chance we will - * have a machine check here. So we have to be able to handle that. - * We assume that machine checks are enabled both in MSR and HIDs - */ - -boolean_t -ml_probe_read(vm_offset_t paddr, unsigned int *val) -{ - if ((PAGE_SIZE - (paddr & PAGE_MASK)) < 4) - return FALSE; - - *val = ml_phys_read(paddr); - - return TRUE; -} - -/* - * Read the memory location at physical address paddr. - * This is a part of a device probe, so there is a good chance we will - * have a machine check here. So we have to be able to handle that. - * We assume that machine checks are enabled both in MSR and HIDs - */ -boolean_t -ml_probe_read_64(addr64_t paddr64, unsigned int *val) -{ - if ((PAGE_SIZE - (paddr64 & PAGE_MASK)) < 4) - return FALSE; - - *val = ml_phys_read_64((pmap_paddr_t)paddr64); - return TRUE; -} - - -int bcmp( - const void *pa, - const void *pb, - size_t len) -{ - const char *a = (const char *)pa; - const char *b = (const char *)pb; - - if (len == 0) - return 0; - - do - if (*a++ != *b++) - break; - while (--len); - - return (int)len; -} - -int -memcmp(const void *s1, const void *s2, size_t n) -{ - if (n != 0) { - const unsigned char *p1 = s1, *p2 = s2; - - do { - if (*p1++ != *p2++) - return (*--p1 - *--p2); - } while (--n != 0); - } - return (0); -} - -/* - * Abstract: - * strlen returns the number of characters in "string" preceeding - * the terminating null character. - */ - -size_t -strlen( - register const char *string) -{ - register const char *ret = string; - - while (*string++ != '\0') - continue; - return string - 1 - ret; -} - -uint32_t -hw_compare_and_store(uint32_t oldval, uint32_t newval, volatile uint32_t *dest) -{ - return OSCompareAndSwap((UInt32)oldval, - (UInt32)newval, - (volatile UInt32 *)dest); -} - -#if MACH_ASSERT - -/* - * Machine-dependent routine to fill in an array with up to callstack_max - * levels of return pc information. - */ -void machine_callstack( - __unused natural_t *buf, - __unused vm_size_t callstack_max) -{ -} - -#endif /* MACH_ASSERT */ - -void fillPage(ppnum_t pa, unsigned int fill) -{ - mapwindow_t *map; - pmap_paddr_t src; - int i; - int cnt = PAGE_SIZE/sizeof(unsigned int); - unsigned int *addr; - - mp_disable_preemption(); - - src = i386_ptob(pa); - map = pmap_get_mapwindow((pt_entry_t)(INTEL_PTE_VALID | INTEL_PTE_RW | (src & PG_FRAME) | - INTEL_PTE_REF | INTEL_PTE_MOD)); - - for (i = 0, addr = (unsigned int *)map->prv_CADDR; i < cnt ; i++ ) - *addr++ = fill; - - pmap_put_mapwindow(map); - - mp_enable_preemption(); -} - -static inline void __sfence(void) -{ - __asm__ volatile("sfence"); -} -static inline void __mfence(void) -{ - __asm__ volatile("mfence"); -} -static inline void __wbinvd(void) -{ - __asm__ volatile("wbinvd"); -} -static inline void __clflush(void *ptr) -{ - __asm__ volatile("clflush (%0)" : : "r" (ptr)); -} - -void dcache_incoherent_io_store64(addr64_t pa, unsigned int count) -{ - mapwindow_t *map; - uint32_t linesize = cpuid_info()->cache_linesize; - addr64_t addr; - uint32_t offset, chunk; - boolean_t istate; - - __mfence(); - - istate = ml_set_interrupts_enabled(FALSE); - - offset = (uint32_t)(pa & (linesize - 1)); - addr = pa - offset; - - map = pmap_get_mapwindow((pt_entry_t)(i386_ptob(atop_64(addr)) | INTEL_PTE_VALID)); - - count += offset; - offset = (uint32_t)(addr & ((addr64_t) (page_size - 1))); - chunk = (uint32_t)page_size - offset; - - do - { - if (chunk > count) - chunk = count; - - for (; offset < chunk; offset += linesize) - __clflush((void *)(((uintptr_t)map->prv_CADDR) + offset)); - - count -= chunk; - addr += chunk; - chunk = (uint32_t) page_size; - offset = 0; - - if (count) { - pmap_store_pte(map->prv_CMAP, (pt_entry_t)(i386_ptob(atop_64(addr)) | INTEL_PTE_VALID)); - invlpg((uintptr_t)map->prv_CADDR); - } - } - while (count); - - pmap_put_mapwindow(map); - - (void) ml_set_interrupts_enabled(istate); - - __mfence(); -} - -void dcache_incoherent_io_flush64(addr64_t pa, unsigned int count) -{ - return(dcache_incoherent_io_store64(pa,count)); -} - - -void -flush_dcache64(addr64_t addr, unsigned count, int phys) -{ - if (phys) { - dcache_incoherent_io_flush64(addr, count); - } - else { - uint64_t linesize = cpuid_info()->cache_linesize; - addr64_t bound = (addr + count + linesize - 1) & ~(linesize - 1); - __mfence(); - while (addr < bound) { - __clflush((void *) (uintptr_t) addr); - addr += linesize; - } - __mfence(); - } -} - -void -invalidate_icache64(__unused addr64_t addr, - __unused unsigned count, - __unused int phys) -{ -} - - -addr64_t vm_last_addr; - -void -mapping_set_mod(ppnum_t pn) -{ - pmap_set_modify(pn); -} - -void -mapping_set_ref(ppnum_t pn) -{ - pmap_set_reference(pn); -} - -void -cache_flush_page_phys(ppnum_t pa) -{ - mapwindow_t *map; - boolean_t istate; - int i; - unsigned char *cacheline_addr; - int cacheline_size = cpuid_info()->cache_linesize; - int cachelines_in_page = PAGE_SIZE/cacheline_size; - - __mfence(); - - istate = ml_set_interrupts_enabled(FALSE); - - map = pmap_get_mapwindow((pt_entry_t)(i386_ptob(pa) | INTEL_PTE_VALID)); - - for (i = 0, cacheline_addr = (unsigned char *)map->prv_CADDR; - i < cachelines_in_page; - i++, cacheline_addr += cacheline_size) { - __clflush((void *) cacheline_addr); - } - pmap_put_mapwindow(map); - - (void) ml_set_interrupts_enabled(istate); - - __mfence(); -} - - -#if !MACH_KDP -void -kdp_register_callout(void) -{ -} -#endif - -/* - * Return a uniformly distributed 64-bit random number. - * - * This interface should have minimal dependencies on kernel - * services, and thus be available very early in the life - * of the kernel. But as a result, it may not be very random - * on all platforms. - */ -uint64_t -early_random(void) -{ - return (ml_early_random()); -} - -#if !CONFIG_VMX -int host_vmxon(boolean_t exclusive __unused) -{ - return VMX_UNSUPPORTED; -} - -void host_vmxoff(void) -{ - return; -} -#endif - -#ifdef __LP64__ - -#define INT_SIZE (BYTE_SIZE * sizeof (int)) - -/* - * Set indicated bit in bit string. - */ -void -setbit(int bitno, int *s) -{ - s[bitno / INT_SIZE] |= 1 << (bitno % INT_SIZE); -} - -/* - * Clear indicated bit in bit string. - */ -void -clrbit(int bitno, int *s) -{ - s[bitno / INT_SIZE] &= ~(1 << (bitno % INT_SIZE)); -} - -/* - * Test if indicated bit is set in bit string. - */ -int -testbit(int bitno, int *s) -{ - return s[bitno / INT_SIZE] & (1 << (bitno % INT_SIZE)); -} - -/* - * Find first bit set in bit string. - */ -int -ffsbit(int *s) -{ - int offset; - - for (offset = 0; !*s; offset += (int)INT_SIZE, ++s); - return offset + __builtin_ctz(*s); -} - -int -ffs(unsigned int mask) -{ - if (mask == 0) - return 0; - - /* - * NOTE: cannot use __builtin_ffs because it generates a call to - * 'ffs' - */ - return 1 + __builtin_ctz(mask); -} -#endif diff --git a/osfmk/i386/lowglobals.h b/osfmk/i386/lowglobals.h deleted file mode 100644 index 33de7450a..000000000 --- a/osfmk/i386/lowglobals.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/* - * Header files for the Low Memory Globals (lg) - */ -#ifndef _LOW_MEMORY_GLOBALS_H_ -#define _LOW_MEMORY_GLOBALS_H_ - -#if defined(__x86_64__) -#include -#elif !defined(__i386__) -#error Wrong architecture - this file is meant for i386 -#endif - -#include -#include -#include -#include - -/* - * Don't change these structures unless you change the corresponding assembly code - * which is in lowmem_vectors.s - */ - -/* - * This is where we put constants, pointers, and data areas that must be accessed - * quickly through assembler. They are designed to be accessed directly with - * absolute addresses, not via a base register. This is a global area, and not - * per processor. - */ - -#pragma pack(4) /* Make sure the structure stays as we defined it */ -typedef struct lowglo { - - unsigned char lgVerCode[8]; /* 0x2000 System verification code */ - unsigned long long lgZero; /* 0x2008 Double constant 0 */ - uint32_t lgRsv010; /* 0x2010 Reserved */ - uint32_t lgCHUDXNUfnStart; /* 0x2014 CHUD XNU function glue - * table */ - uint32_t lgRsv018; /* 0x2018 Reserved */ - uint32_t lgVersion; /* 0x201C Pointer to kernel version string */ - uint32_t lgRsv020[280]; /* 0X2020 Reserved */ - uint32_t lgKmodptr; /* 0x2480 Pointer to kmod, debugging aid */ - uint32_t lgTransOff; /* 0x2484 Pointer to kdp_trans_off, debugging aid */ - uint32_t lgReadIO; /* 0x2488 Pointer to kdp_read_io, debugging aid */ - uint32_t lgDevSlot1; /* 0x248C For developer use */ - uint32_t lgDevSlot2; /* 0x2490 For developer use */ - uint32_t lgOSVersion; /* 0x2494 Pointer to OS version string */ - uint32_t lgRebootFlag; /* 0x2498 Pointer to debugger reboot trigger */ - uint32_t lgManualPktAddr; /* 0x249C Pointer to manual packet structure */ - uint32_t lgRsv49C[728]; /* 0x24A0 Reserved - push to 1 page */ -} lowglo; -#pragma pack() -extern lowglo lowGlo; -#endif /* _LOW_MEMORY_GLOBALS_H_ */ diff --git a/osfmk/i386/lowmem_vectors.s b/osfmk/i386/lowmem_vectors.s deleted file mode 100644 index c1b8a2e18..000000000 --- a/osfmk/i386/lowmem_vectors.s +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991,1990 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ - -#include -#include - -#include -#include -#include -#include - - -/* on x86 the low mem vectors live here and get mapped to 0x2000 at - * system startup time - */ - - .text - .align 12 - - .globl EXT(lowGlo) -EXT(lowGlo): - - .ascii "Catfish " /* 0x2000 System verification code */ - .long 0 /* 0x2008 Double constant 0 */ - .long 0 - .long 0 /* 0x2010 Reserved */ - .long 0 /* 0x2014 Zero */ - .long 0 /* 0x2018 Reserved */ - .long EXT(version) /* 0x201C Pointer to kernel version string */ - .fill 280, 4, 0 /* 0x2020 Reserved */ - .long EXT(kmod) /* 0x2480 Pointer to kmod, debugging aid */ -#if MACH_KDP - .long EXT(kdp_trans_off) /* 0x2484 Pointer to kdp_trans_off, debugging aid */ - .long EXT(kdp_read_io) /* 0x2488 Pointer to kdp_read_io, debugging aid */ -#else - .long 0 /* 0x2484 Reserved */ - .long 0 /* 0x2488 Reserved */ -#endif - .long 0 /* 0x248C Reserved for developer use */ - .long 0 /* 0x2490 Reserved for developer use */ - .long EXT(osversion) /* 0x2494 Pointer to osversion string */ -#if MACH_KDP - .long EXT(flag_kdp_trigger_reboot) /* 0x2498 Pointer to debugger reboot trigger */ - .long EXT(manual_pkt) /* 0x249C Poiner to debugger manual packet address */ -#else - .long 0 /* 0x2498 Reserved */ - .long 0 /* 0x249C Reserved */ -#endif - .fill 728, 4, 0 diff --git a/osfmk/i386/machine_check.c b/osfmk/i386/machine_check.c index c70168b3c..63aa09824 100644 --- a/osfmk/i386/machine_check.c +++ b/osfmk/i386/machine_check.c @@ -93,15 +93,15 @@ mca_get_availability(void) uint32_t model = cpuid_info()->cpuid_model; uint32_t stepping = cpuid_info()->cpuid_stepping; - mca_MCE_present = (features & CPUID_FEATURE_MCE) != 0; - mca_MCA_present = (features & CPUID_FEATURE_MCA) != 0; - mca_family = family; - if ((model == CPUID_MODEL_HASWELL && stepping < 3) || (model == CPUID_MODEL_HASWELL_ULT && stepping < 1) || (model == CPUID_MODEL_CRYSTALWELL && stepping < 1)) panic("Haswell pre-C0 steppings are not supported"); + mca_MCE_present = (features & CPUID_FEATURE_MCE) != 0; + mca_MCA_present = (features & CPUID_FEATURE_MCA) != 0; + mca_family = family; + /* * If MCA, the number of banks etc is reported by the IA32_MCG_CAP MSR. */ @@ -270,27 +270,6 @@ static void mca_dump_64bit_state(void) kdb_printf(" IA32_MCG_R15: 0x%016qx\n", rdmsr64(IA32_MCG_R15)); } -static uint32_t rdmsr32(uint32_t msr) -{ - return (uint32_t) rdmsr64(msr); -} - -static void mca_dump_32bit_state(void) -{ - kdb_printf("Extended Machine Check State:\n"); - kdb_printf(" IA32_MCG_EAX: 0x%08x\n", rdmsr32(IA32_MCG_EAX)); - kdb_printf(" IA32_MCG_EBX: 0x%08x\n", rdmsr32(IA32_MCG_EBX)); - kdb_printf(" IA32_MCG_ECX: 0x%08x\n", rdmsr32(IA32_MCG_ECX)); - kdb_printf(" IA32_MCG_EDX: 0x%08x\n", rdmsr32(IA32_MCG_EDX)); - kdb_printf(" IA32_MCG_ESI: 0x%08x\n", rdmsr32(IA32_MCG_ESI)); - kdb_printf(" IA32_MCG_EDI: 0x%08x\n", rdmsr32(IA32_MCG_EDI)); - kdb_printf(" IA32_MCG_EBP: 0x%08x\n", rdmsr32(IA32_MCG_EBP)); - kdb_printf(" IA32_MCG_ESP: 0x%08x\n", rdmsr32(IA32_MCG_ESP)); - kdb_printf(" IA32_MCG_EFLAGS: 0x%08x\n", rdmsr32(IA32_MCG_EFLAGS)); - kdb_printf(" IA32_MCG_EIP: 0x%08x\n", rdmsr32(IA32_MCG_EIP)); - kdb_printf(" IA32_MCG_MISC: 0x%08x\n", rdmsr32(IA32_MCG_MISC)); -} - static void mca_report_cpu_info(void) { @@ -561,10 +540,7 @@ mca_dump(void) * Dump any extended machine state: */ if (mca_extended_MSRs_present) { - if (cpu_mode_is64bit()) - mca_dump_64bit_state(); - else - mca_dump_32bit_state(); + mca_dump_64bit_state(); } /* Update state to release any other threads. */ diff --git a/osfmk/i386/machine_routines.c b/osfmk/i386/machine_routines.c index 721806047..06c57561c 100644 --- a/osfmk/i386/machine_routines.c +++ b/osfmk/i386/machine_routines.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -49,6 +50,10 @@ #include #include #include +#include +#if KPC +#include +#endif #if DEBUG #define DBG(x...) kprintf("DBG: " x) @@ -70,6 +75,12 @@ extern uint64_t panic_restart_timeout; boolean_t virtualized = FALSE; +decl_simple_lock_data(static, ml_timer_evaluation_slock); +uint32_t ml_timer_eager_evaluations; +uint64_t ml_timer_eager_evaluation_max; +static boolean_t ml_timer_evaluation_in_progress = FALSE; + + #define MAX_CPUS_SET 0x1 #define MAX_CPUS_WAIT 0x2 @@ -200,6 +211,38 @@ vm_size_t ml_nofault_copy( return nbytes; } +/* + * Routine: ml_validate_nofault + * Function: Validate that ths address range has a valid translations + * in the kernel pmap. If translations are present, they are + * assumed to be wired; i.e. no attempt is made to guarantee + * that the translation persist after the check. + * Returns: TRUE if the range is mapped and will not cause a fault, + * FALSE otherwise. + */ + +boolean_t ml_validate_nofault( + vm_offset_t virtsrc, vm_size_t size) +{ + addr64_t cur_phys_src; + uint32_t count; + + while (size > 0) { + if (!(cur_phys_src = kvtophys(virtsrc))) + return FALSE; + if (!pmap_valid_page(i386_btop(cur_phys_src))) + return FALSE; + count = (uint32_t)(PAGE_SIZE - (cur_phys_src & PAGE_MASK)); + if (count > size) + count = (uint32_t)size; + + virtsrc += count; + size -= count; + } + + return TRUE; +} + /* Interrupt handling */ /* Initialize Interrupts */ @@ -226,6 +269,8 @@ boolean_t ml_set_interrupts_enabled(boolean_t enable) __asm__ volatile("pushf; pop %0" : "=r" (flags)); + assert(get_interrupt_level() ? (enable == FALSE) : TRUE); + istate = ((flags & EFL_IF) != 0); if (enable) { @@ -340,6 +385,23 @@ register_cpu( if (this_cpu_datap->cpu_chud == NULL) goto failed; +#if KPC + this_cpu_datap->cpu_kpc_buf[0] = kpc_counterbuf_alloc(); + if(this_cpu_datap->cpu_kpc_buf[0] == NULL ) + goto failed; + this_cpu_datap->cpu_kpc_buf[1] = kpc_counterbuf_alloc(); + if(this_cpu_datap->cpu_kpc_buf[1] == NULL ) + goto failed; + + this_cpu_datap->cpu_kpc_shadow = kpc_counterbuf_alloc(); + if(this_cpu_datap->cpu_kpc_shadow == NULL ) + goto failed; + + this_cpu_datap->cpu_kpc_reload = kpc_counterbuf_alloc(); + if(this_cpu_datap->cpu_kpc_reload == NULL ) + goto failed; +#endif + if (!boot_cpu) { cpu_thread_alloc(this_cpu_datap->cpu_number); if (this_cpu_datap->lcpu.core == NULL) @@ -372,6 +434,13 @@ failed: #endif chudxnu_cpu_free(this_cpu_datap->cpu_chud); console_cpu_free(this_cpu_datap->cpu_console_buf); +#if KPC + kpc_counterbuf_free(this_cpu_datap->cpu_kpc_buf[0]); + kpc_counterbuf_free(this_cpu_datap->cpu_kpc_buf[1]); + kpc_counterbuf_free(this_cpu_datap->cpu_kpc_shadow); + kpc_counterbuf_free(this_cpu_datap->cpu_kpc_reload); +#endif + return KERN_FAILURE; } @@ -560,12 +629,14 @@ ml_init_lock_timeout(void) nanoseconds_to_absolutetime(prt * NSEC_PER_SEC, &panic_restart_timeout); virtualized = ((cpuid_features() & CPUID_FEATURE_VMM) != 0); interrupt_latency_tracker_setup(); + simple_lock_init(&ml_timer_evaluation_slock, 0); } /* * Threshold above which we should attempt to block * instead of spinning for clock_delay_until(). */ + void ml_init_delay_spin_threshold(int threshold_us) { @@ -647,17 +718,7 @@ void ml_cpu_set_ldt(int selector) current_cpu_datap()->cpu_ldt == KERNEL_LDT) return; -#if defined(__i386__) - /* - * If 64bit this requires a mode switch (and back). - */ - if (cpu_mode_is64bit()) - ml_64bit_lldt(selector); - else - lldt(selector); -#else lldt(selector); -#endif current_cpu_datap()->cpu_ldt = selector; } @@ -710,3 +771,27 @@ kernel_preempt_check(void) boolean_t machine_timeout_suspended(void) { return (virtualized || pmap_tlb_flush_timeout || spinlock_timed_out || panic_active() || mp_recent_debugger_activity()); } + +/* Eagerly evaluate all pending timer and thread callouts + */ +void ml_timer_evaluate(void) { + KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN|DBG_FUNC_START, 0, 0, 0, 0, 0); + + uint64_t te_end, te_start = mach_absolute_time(); + simple_lock(&ml_timer_evaluation_slock); + ml_timer_evaluation_in_progress = TRUE; + thread_call_delayed_timer_rescan_all(); + mp_cpus_call(CPUMASK_ALL, ASYNC, timer_queue_expire_rescan, NULL); + ml_timer_evaluation_in_progress = FALSE; + ml_timer_eager_evaluations++; + te_end = mach_absolute_time(); + ml_timer_eager_evaluation_max = MAX(ml_timer_eager_evaluation_max, (te_end - te_start)); + simple_unlock(&ml_timer_evaluation_slock); + + KERNEL_DEBUG_CONSTANT(DECR_TIMER_RESCAN|DBG_FUNC_END, 0, 0, 0, 0, 0); +} + +boolean_t +ml_timer_forced_evaluation(void) { + return ml_timer_evaluation_in_progress; +} diff --git a/osfmk/i386/machine_routines.h b/osfmk/i386/machine_routines.h index 270ddcfb5..f0c1baa71 100644 --- a/osfmk/i386/machine_routines.h +++ b/osfmk/i386/machine_routines.h @@ -103,6 +103,9 @@ vm_offset_t ml_vtophys( vm_size_t ml_nofault_copy( vm_offset_t virtsrc, vm_offset_t virtdst, vm_size_t size); +boolean_t ml_validate_nofault( + vm_offset_t virtsrc, vm_size_t size); + /* Machine topology info */ uint64_t ml_cpu_cache_size(unsigned int level); uint64_t ml_cpu_cache_sharing(unsigned int level); @@ -117,6 +120,33 @@ extern void ml_cpu_down(void); void bzero_phys_nc( addr64_t phys_address, uint32_t length); +#define NUM_LATENCY_QOS_TIERS (6) +typedef struct { + int32_t timer_coalesce_rt_shift; + int32_t timer_coalesce_bg_shift; + int32_t timer_coalesce_kt_shift; + int32_t timer_coalesce_fp_shift; + int32_t timer_coalesce_ts_shift; + + uint64_t timer_coalesce_rt_ns_max; + uint64_t timer_coalesce_bg_ns_max; + uint64_t timer_coalesce_kt_ns_max; + uint64_t timer_coalesce_fp_ns_max; + uint64_t timer_coalesce_ts_ns_max; + + uint32_t latency_qos_scale[NUM_LATENCY_QOS_TIERS]; + uint64_t latency_qos_ns_max[NUM_LATENCY_QOS_TIERS]; + boolean_t latency_tier_rate_limited[NUM_LATENCY_QOS_TIERS]; +} timer_coalescing_priority_params_t; +extern timer_coalescing_priority_params_t tcoal_prio_params; +extern uint32_t interrupt_timer_coalescing_enabled; +extern uint32_t idle_entry_timer_processing_hdeadline_threshold; + +#if TCOAL_INSTRUMENT +#define TCOAL_DEBUG KERNEL_DEBUG_CONSTANT +#else +#define TCOAL_DEBUG(x, a, b, c, d, e) do { } while(0) +#endif /* TCOAL_INSTRUMENT */ #if defined(PEXPERT_KERNEL_PRIVATE) || defined(MACH_KERNEL_PRIVATE) /* IO memory map services */ @@ -314,5 +344,12 @@ void interrupt_reset_latency_stats(void); void interrupt_populate_latency_stats(char *, unsigned); void ml_get_power_state(boolean_t *, boolean_t *); +void timer_queue_expire_local(void*); +void timer_queue_expire_rescan(void*); +void ml_timer_evaluate(void); +boolean_t ml_timer_forced_evaluation(void); +int ml_timer_get_user_idle_level(void); +kern_return_t ml_timer_set_user_idle_level(int); + #endif /* XNU_KERNEL_PRIVATE */ #endif /* _I386_MACHINE_ROUTINES_H_ */ diff --git a/osfmk/i386/machine_routines_asm.s b/osfmk/i386/machine_routines_asm.s deleted file mode 100644 index c4176cdf5..000000000 --- a/osfmk/i386/machine_routines_asm.s +++ /dev/null @@ -1,314 +0,0 @@ -/* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -#include -#include -#include -#include -#include -#include -#include - -/* -** ml_get_timebase() -** -** Entry - %esp contains pointer to 64 bit structure. -** -** Exit - 64 bit structure filled in. -** -*/ -ENTRY(ml_get_timebase) - - movl S_ARG0, %ecx - - lfence - rdtsc - lfence - - movl %edx, 0(%ecx) - movl %eax, 4(%ecx) - - ret - -/* - * Convert between various timer units - * - * uint64_t tmrCvt(uint64_t time, uint64_t *conversion) - * - * This code converts 64-bit time units to other units. - * For example, the TSC is converted to HPET units. - * - * Time is a 64-bit integer that is some number of ticks. - * Conversion is 64-bit fixed point number which is composed - * of a 32 bit integer and a 32 bit fraction. - * - * The time ticks are multiplied by the conversion factor. The - * calculations are done as a 128-bit value but both the high - * and low words are dropped. The high word is overflow and the - * low word is the fraction part of the result. - * - * We return a 64-bit value. - * - * Note that we can use this function to multiply 2 conversion factors. - * We do this in order to calculate the multiplier used to convert - * directly between any two units. - * - */ - - .globl EXT(tmrCvt) - .align FALIGN - -LEXT(tmrCvt) - - pushl %ebp // Save a volatile - movl %esp,%ebp // Get the parameters - 8 - pushl %ebx // Save a volatile - pushl %esi // Save a volatile - pushl %edi // Save a volatile - -// %ebp + 8 - low-order ts -// %ebp + 12 - high-order ts -// %ebp + 16 - low-order cvt -// %ebp + 20 - high-order cvt - - movl 8(%ebp),%eax // Get low-order ts - mull 16(%ebp) // Multiply by low-order conversion - movl %edx,%edi // Need to save only the high order part - - movl 12(%ebp),%eax // Get the high-order ts - mull 16(%ebp) // Multiply by low-order conversion - addl %eax,%edi // Add in the overflow from the low x low calculation - adcl $0,%edx // Add in any overflow to high high part - movl %edx,%esi // Save high high part - -// We now have the upper 64 bits of the 96 bit multiply of ts and the low half of cvt -// in %esi:%edi - - movl 8(%ebp),%eax // Get low-order ts - mull 20(%ebp) // Multiply by high-order conversion - movl %eax,%ebx // Need to save the low order part - movl %edx,%ecx // Need to save the high order part - - movl 12(%ebp),%eax // Get the high-order ts - mull 20(%ebp) // Multiply by high-order conversion - -// Now have %ecx:%ebx as low part of high low and %edx:%eax as high part of high high -// We don't care about the highest word since it is overflow - - addl %edi,%ebx // Add the low words - adcl %ecx,%esi // Add in the high plus carry from low - addl %eax,%esi // Add in the rest of the high - - movl %ebx,%eax // Pass back low word - movl %esi,%edx // and the high word - - popl %edi // Restore a volatile - popl %esi // Restore a volatile - popl %ebx // Restore a volatile - popl %ebp // Restore a volatile - - ret // Leave... - - -/* void _rtc_nanotime_adjust( - uint64_t tsc_base_delta, - rtc_nanotime_t *dst); -*/ - .globl EXT(_rtc_nanotime_adjust) - .align FALIGN - -LEXT(_rtc_nanotime_adjust) - mov 12(%esp),%edx /* ptr to rtc_nanotime_info */ - - movl RNT_GENERATION(%edx),%ecx /* get current generation */ - movl $0,RNT_GENERATION(%edx) /* flag data as being updated */ - - movl 4(%esp),%eax /* get lower 32-bits of delta */ - addl %eax,RNT_TSC_BASE(%edx) - adcl $0,RNT_TSC_BASE+4(%edx) /* propagate carry */ - - incl %ecx /* next generation */ - jnz 1f - incl %ecx /* skip 0, which is a flag */ -1: movl %ecx,RNT_GENERATION(%edx) /* update generation and make usable */ - - ret - - -/* unint64_t _rtc_nanotime_read( rtc_nanotime_t *rntp, int slow ); - * - * This is the same as the commpage nanotime routine, except that it uses the - * kernel internal "rtc_nanotime_info" data instead of the commpage data. The two copies - * of data (one in the kernel and one in user space) are kept in sync by rtc_clock_napped(). - * - * Warning! There is another copy of this code in osfmk/i386/locore.s. The - * two versions must be kept in sync with each other! - * - * There are actually two versions of the algorithm, one each for "slow" and "fast" - * processors. The more common "fast" algorithm is: - * - * nanoseconds = (((rdtsc - rnt_tsc_base) * rnt_tsc_scale) / 2**32) - rnt_ns_base; - * - * Of course, the divide by 2**32 is a nop. rnt_tsc_scale is a constant computed during initialization: - * - * rnt_tsc_scale = (10e9 * 2**32) / tscFreq; - * - * The "slow" algorithm uses long division: - * - * nanoseconds = (((rdtsc - rnt_tsc_base) * 10e9) / tscFreq) - rnt_ns_base; - * - * Since this routine is not synchronized and can be called in any context, - * we use a generation count to guard against seeing partially updated data. In addition, - * the _rtc_nanotime_store() routine -- just above -- zeroes the generation before - * updating the data, and stores the nonzero generation only after all other data has been - * stored. Because IA32 guarantees that stores by one processor must be seen in order - * by another, we can avoid using a lock. We spin while the generation is zero. - * - * In accordance with the ABI, we return the 64-bit nanotime in %edx:%eax. - */ - - .globl EXT(_rtc_nanotime_read) - .align FALIGN -LEXT(_rtc_nanotime_read) - pushl %ebp - movl %esp,%ebp - pushl %esi - pushl %edi - pushl %ebx - movl 8(%ebp),%edi /* get ptr to rtc_nanotime_info */ - movl 12(%ebp),%eax /* get "slow" flag */ - testl %eax,%eax - jnz Lslow - - /* Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD */ - PAL_RTC_NANOTIME_READ_FAST() - - popl %ebx - popl %edi - popl %esi - popl %ebp - ret - - /* Processor whose TSC frequency is slower than or equal to SLOW_TSC_THRESHOLD */ -Lslow: - movl RNT_GENERATION(%edi),%esi /* get generation (0 if being changed) */ - testl %esi,%esi /* if being changed, loop until stable */ - jz Lslow - pushl %esi /* save generation */ - pushl RNT_SHIFT(%edi) /* save low 32 bits of tscFreq */ - - lfence - rdtsc /* get TSC in %edx:%eax */ - lfence - subl RNT_TSC_BASE(%edi),%eax - sbbl RNT_TSC_BASE+4(%edi),%edx - - /* - * Do the math to convert tsc ticks to nanoseconds. We first - * do long multiply of 1 billion times the tsc. Then we do - * long division by the tsc frequency - */ - mov $1000000000, %ecx /* number of nanoseconds in a second */ - mov %edx, %ebx - mul %ecx - mov %edx, %edi - mov %eax, %esi - mov %ebx, %eax - mul %ecx - add %edi, %eax - adc $0, %edx /* result in edx:eax:esi */ - mov %eax, %edi - popl %ecx /* get low 32 tscFreq */ - xor %eax, %eax - xchg %edx, %eax - div %ecx - xor %eax, %eax - mov %edi, %eax - div %ecx - mov %eax, %ebx - mov %esi, %eax - div %ecx - mov %ebx, %edx /* result in edx:eax */ - - movl 8(%ebp),%edi /* recover ptr to rtc_nanotime_info */ - popl %esi /* recover generation */ - - addl RNT_NS_BASE(%edi),%eax - adcl RNT_NS_BASE+4(%edi),%edx - - cmpl RNT_GENERATION(%edi),%esi /* have the parameters changed? */ - jne Lslow /* yes, loop until stable */ - - pop %ebx - pop %edi - pop %esi - pop %ebp - ret /* result in edx:eax */ - - - -/* - * Timing routines. - */ -Entry(timer_update) - movl 4(%esp),%ecx - movl 8(%esp),%eax - movl 12(%esp),%edx - movl %eax,TIMER_HIGHCHK(%ecx) - movl %edx,TIMER_LOW(%ecx) - movl %eax,TIMER_HIGH(%ecx) - ret - -Entry(timer_grab) - movl 4(%esp),%ecx -0: movl TIMER_HIGH(%ecx),%edx - movl TIMER_LOW(%ecx),%eax - cmpl TIMER_HIGHCHK(%ecx),%edx - jne 0b - ret - - -Entry(call_continuation) - movl S_ARG0,%eax /* get continuation */ - movl S_ARG1,%edx /* continuation param */ - movl S_ARG2,%ecx /* wait result */ - movl %gs:CPU_KERNEL_STACK,%esp /* pop the stack */ - xorl %ebp,%ebp /* zero frame pointer */ - subl $8,%esp /* align the stack */ - pushl %ecx - pushl %edx - call *%eax /* call continuation */ - addl $16,%esp - movl %gs:CPU_ACTIVE_THREAD,%eax - pushl %eax - call EXT(thread_terminate) - - -Entry(ml_early_random) - xor %eax, %eax - ret diff --git a/osfmk/i386/mcount.s b/osfmk/i386/mcount.s deleted file mode 100644 index 0246ba152..000000000 --- a/osfmk/i386/mcount.s +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -#define __NO_UNDERSCORES__ -#include -#include - -Entry(mcount) - pushl %ebp // setup mcount's frame - movl %esp,%ebp - pushl %eax // save %eax - pushf // save interrupt state - cli // disable interrupts - - // - // Check that this cpu is ready. - // This delays the start of mcounting until a cpu is really prepared. - // - mov %gs, %ax - test %ax, %ax - jz 1f - - movl %gs:CPU_RUNNING,%eax - testl %eax,%eax - jz 1f - - // - // Test for recursion as indicated by a per-cpu flag. - // Skip if nested, otherwise set the flag and call the C mount(). - // - movl %gs:CPU_MCOUNT_OFF,%eax - testl %eax,%eax // test for recursion - jnz 1f - - incl %gs:CPU_MCOUNT_OFF // set recursion flag - - movl (%ebp),%eax // frame pointer of mcount's caller - movl 4(%eax),%eax // mcount's caller's return address - pushl 4(%ebp) // push selfpc parameter for mcount() - pushl %eax // push frompc parameter for mcount() - call _mcount // call the C mcount - addl $8,%esp // pop args - - decl %gs:CPU_MCOUNT_OFF // turn off recursion flag -1: - popf // restore interrupt state - popl %eax - movl %ebp,%esp // tear down mcount's frame - popl %ebp - ret diff --git a/osfmk/i386/misc_protos.h b/osfmk/i386/misc_protos.h index 4a2ed207d..8bba95cda 100644 --- a/osfmk/i386/misc_protos.h +++ b/osfmk/i386/misc_protos.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -46,12 +46,6 @@ extern void i386_vm_init( uint64_t, boolean_t, struct boot_args *); -#ifdef __i386__ -extern void cpu_IA32e_enable(struct cpu_data *); -extern void cpu_IA32e_disable(struct cpu_data *); -extern void ml_load_desc64(void); -extern void ml_64bit_lldt(int); -#endif #if NCOPY_WINDOWS > 0 extern void cpu_userwindow_init(int); @@ -70,9 +64,6 @@ extern void panic_io_port_read(void); extern void remote_kdb(void); extern void clear_kdb_intr(void); extern void cpu_init(void); -#ifdef __i386__ -extern void cpu_shutdown(void); -#endif extern void fix_desc( void * desc, int num_desc); @@ -92,17 +83,6 @@ extern void blkclr( const char *from, int nbytes); -#ifdef __i386__ -extern unsigned int div_scale( - unsigned int dividend, - unsigned int divisor, - unsigned int *scale); - -extern unsigned int mul_scale( - unsigned int multiplicand, - unsigned int multiplier, - unsigned int *scale); -#endif /* Move arbitrarily-aligned data from one physical address to another */ extern void bcopy_phys(addr64_t from, addr64_t to, vm_size_t nbytes); @@ -110,7 +90,7 @@ extern void bcopy_phys(addr64_t from, addr64_t to, vm_size_t nbytes); /* allow a function to get a quick virtual mapping of a physical page */ extern int apply_func_phys(addr64_t src64, vm_size_t bytes, int (*func)(void * buffer, vm_size_t bytes, void * arg), void * arg); -extern void ml_copy_phys(addr64_t, addr64_t, vm_size_t); +extern int ml_copy_phys(addr64_t, addr64_t, vm_size_t); /* Flush all cachelines for a page. */ extern void cache_flush_page_phys(ppnum_t pa); diff --git a/osfmk/i386/mp.c b/osfmk/i386/mp.c index 286b822aa..dc50b4848 100644 --- a/osfmk/i386/mp.c +++ b/osfmk/i386/mp.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -46,7 +47,7 @@ #include #include #include -#include +#include #include #include @@ -79,6 +80,8 @@ #include +#include + #if MP_DEBUG #define PAUSE delay(1000000) #define DBG(x...) kprintf(x) @@ -102,11 +105,15 @@ void slave_boot_init(void); void i386_cpu_IPI(int cpu); +#if MACH_KDP static void mp_kdp_wait(boolean_t flush, boolean_t isNMI); +#endif /* MACH_KDP */ static void mp_rendezvous_action(void); static void mp_broadcast_action(void); +#if MACH_KDP static boolean_t cpu_signal_pending(int cpu, mp_event_t event); +#endif /* MACH_KDP */ static int NMIInterruptHandler(x86_saved_state_t *regs); boolean_t smp_initialized = FALSE; @@ -154,7 +161,10 @@ static volatile long mp_bc_count; decl_lck_mtx_data(static, mp_bc_lock); lck_mtx_ext_t mp_bc_lock_ext; static volatile int debugger_cpu = -1; -volatile long NMIPI_acks = 0; +volatile long NMIPI_acks = 0; +volatile long NMI_count = 0; + +extern void NMI_cpus(void); static void mp_cpus_call_init(void); static void mp_cpus_call_cpu_init(void); @@ -427,10 +437,7 @@ intel_startCPU( * Initialize (or re-initialize) the descriptor tables for this cpu. * Propagate processor mode to slave. */ - if (cpu_mode_is64bit()) - cpu_desc_init64(cpu_datap(slot_num)); - else - cpu_desc_init(cpu_datap(slot_num)); + cpu_desc_init64(cpu_datap(slot_num)); /* Serialize use of the slave boot stack, etc. */ lck_mtx_lock(&mp_cpu_boot_lock); @@ -483,6 +490,9 @@ MP_EVENT_NAME_DECL(); int cpu_signal_handler(x86_saved_state_t *regs) { +#if !MACH_KDP +#pragma unused (regs) +#endif /* !MACH_KDP */ int my_cpu; volatile int *my_word; @@ -499,7 +509,7 @@ cpu_signal_handler(x86_saved_state_t *regs) do { #if MACH_KDP - if (i_bit(MP_KDP, my_word)) { + if (i_bit(MP_KDP, my_word) && regs != NULL) { DBGLOG(cpu_handle,my_cpu,MP_KDP); i_bit_clear(MP_KDP, my_word); /* Ensure that the i386_kernel_state at the base of the @@ -562,12 +572,9 @@ NMIInterruptHandler(x86_saved_state_t *regs) } atomic_incl(&NMIPI_acks, 1); + atomic_incl(&NMI_count, 1); sync_iss_to_iks_unconditionally(regs); -#if defined (__i386__) - __asm__ volatile("movl %%ebp, %0" : "=m" (stackptr)); -#elif defined (__x86_64__) __asm__ volatile("movq %%rbp, %0" : "=m" (stackptr)); -#endif if (cpu_number() == debugger_cpu) goto NMExit; @@ -625,6 +632,35 @@ cpu_NMI_interrupt(int cpu) } } +void +NMI_cpus(void) +{ + unsigned int cpu; + boolean_t intrs_enabled; + uint64_t tsc_timeout; + + intrs_enabled = ml_set_interrupts_enabled(FALSE); + + for (cpu = 0; cpu < real_ncpus; cpu++) { + if (!cpu_datap(cpu)->cpu_running) + continue; + cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE; + cpu_NMI_interrupt(cpu); + tsc_timeout = !machine_timeout_suspended() ? + rdtsc64() + (1000 * 1000 * 1000 * 10ULL) : + ~0ULL; + while (!cpu_datap(cpu)->cpu_NMI_acknowledged) { + handle_pending_TLB_flushes(); + cpu_pause(); + if (rdtsc64() > tsc_timeout) + panic("NMI_cpus() timeout cpu %d", cpu); + } + cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE; + } + + ml_set_interrupts_enabled(intrs_enabled); +} + static void (* volatile mp_PM_func)(void) = NULL; static void @@ -674,7 +710,9 @@ i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode) i386_cpu_IPI(cpu); if (mode == SYNC) { again: - tsc_timeout = rdtsc64() + (1000*1000*1000); + tsc_timeout = !machine_timeout_suspended() ? + rdtsc64() + (1000*1000*1000) : + ~0ULL; while (i_bit(event, signals) && rdtsc64() < tsc_timeout) { cpu_pause(); } @@ -726,6 +764,30 @@ i386_active_cpus(void) return(ncpus); } +/* + * Helper function called when busy-waiting: panic if too long + * a TSC-based time has elapsed since the start of the spin. + */ +static void +mp_spin_timeout_check(uint64_t tsc_start, const char *msg) +{ + uint64_t tsc_timeout; + + cpu_pause(); + if (machine_timeout_suspended()) + return; + + /* + * The timeout is 4 * the spinlock timeout period + * unless we have serial console printing (kprintf) enabled + * in which case we allow an even greater margin. + */ + tsc_timeout = disable_serial_output ? (uint64_t) LockTimeOutTSC << 2 + : (uint64_t) LockTimeOutTSC << 4; + if (rdtsc64() > tsc_start + tsc_timeout) + panic("%s: spin timeout", msg); +} + /* * All-CPU rendezvous: * - CPUs are signalled, @@ -743,7 +805,8 @@ i386_active_cpus(void) static void mp_rendezvous_action(void) { - boolean_t intrs_enabled; + boolean_t intrs_enabled; + uint64_t tsc_spin_start; /* setup function */ if (mp_rv_setup_func != NULL) @@ -753,11 +816,13 @@ mp_rendezvous_action(void) /* spin on entry rendezvous */ atomic_incl(&mp_rv_entry, 1); + tsc_spin_start = rdtsc64(); while (mp_rv_entry < mp_rv_ncpus) { /* poll for pesky tlb flushes if interrupts disabled */ if (!intrs_enabled) handle_pending_TLB_flushes(); - cpu_pause(); + mp_spin_timeout_check(tsc_spin_start, + "mp_rendezvous_action() entry"); } /* action function */ @@ -766,10 +831,12 @@ mp_rendezvous_action(void) /* spin on exit rendezvous */ atomic_incl(&mp_rv_exit, 1); + tsc_spin_start = rdtsc64(); while (mp_rv_exit < mp_rv_ncpus) { if (!intrs_enabled) handle_pending_TLB_flushes(); - cpu_pause(); + mp_spin_timeout_check(tsc_spin_start, + "mp_rendezvous_action() exit"); } /* teardown function */ @@ -786,6 +853,7 @@ mp_rendezvous(void (*setup_func)(void *), void (*teardown_func)(void *), void *arg) { + uint64_t tsc_spin_start; if (!smp_initialized) { if (setup_func != NULL) @@ -827,8 +895,9 @@ mp_rendezvous(void (*setup_func)(void *), * This is necessary to ensure that all processors have proceeded * from the exit barrier before we release the rendezvous structure. */ + tsc_spin_start = rdtsc64(); while (mp_rv_complete < mp_rv_ncpus) { - cpu_pause(); + mp_spin_timeout_check(tsc_spin_start, "mp_rendezvous()"); } /* Tidy up */ @@ -1073,9 +1142,11 @@ mp_cpus_call_wait(boolean_t intrs_enabled, volatile long *mp_cpus_calls) { mp_call_queue_t *cqp; + uint64_t tsc_spin_start; cqp = &mp_cpus_call_head[cpu_number()]; + tsc_spin_start = rdtsc64(); while (*mp_cpus_calls < mp_cpus_signals) { if (!intrs_enabled) { /* Sniffing w/o locking */ @@ -1083,7 +1154,7 @@ mp_cpus_call_wait(boolean_t intrs_enabled, mp_cpus_call_action(); handle_pending_TLB_flushes(); } - cpu_pause(); + mp_spin_timeout_check(tsc_spin_start, "mp_cpus_call_wait()"); } } @@ -1104,6 +1175,7 @@ mp_cpus_call1( cpumask_t cpus_notcalled = 0; long mp_cpus_signals = 0; volatile long mp_cpus_calls = 0; + uint64_t tsc_spin_start; KERNEL_DEBUG_CONSTANT( TRACE_MP_CPUS_CALL | DBG_FUNC_START, @@ -1127,6 +1199,8 @@ mp_cpus_call1( * and then re-check it after taking the call lock. A cpu being taken * offline runs the action function after clearing the cpu_running. */ + mp_disable_preemption(); /* interrupts may be enabled */ + tsc_spin_start = rdtsc64(); for (cpu = 0; cpu < (cpu_t) real_ncpus; cpu++) { if (((cpu_to_cpumask(cpu) & cpus) == 0) || !cpu_datap(cpu)->cpu_running) @@ -1183,7 +1257,9 @@ mp_cpus_call1( mp_cpus_call_action(); handle_pending_TLB_flushes(); } - cpu_pause(); + mp_spin_timeout_check( + tsc_spin_start, + "mp_cpus_call1()"); goto queue_call; } callp->countp = &mp_cpus_calls; @@ -1214,6 +1290,9 @@ mp_cpus_call1( } } + /* Safe to allow pre-emption now */ + mp_enable_preemption(); + /* For ASYNC, now wait for all signaled cpus to complete their calls */ if (mode == ASYNC) { mp_cpus_call_wait(intrs_enabled, mp_cpus_signals, &mp_cpus_calls); @@ -1313,8 +1392,6 @@ i386_activate_cpu(void) flush_tlb_raw(); } -extern void etimer_timer_expire(void *arg); - void i386_deactivate_cpu(void) { @@ -1335,7 +1412,7 @@ i386_deactivate_cpu(void) * and poke it in case there's a sooner deadline for it to schedule. */ timer_queue_shutdown(&cdp->rtclock_timer.queue); - mp_cpus_call(cpu_to_cpumask(master_cpu), ASYNC, etimer_timer_expire, NULL); + mp_cpus_call(cpu_to_cpumask(master_cpu), ASYNC, timer_queue_expire_local, NULL); /* * Open an interrupt window @@ -1378,6 +1455,11 @@ mp_kdp_enter(void) DBG("mp_kdp_enter()\n"); +#if DEBUG + if (!smp_initialized) + simple_lock_init(&mp_kdp_lock, 0); +#endif + /* * Here to enter the debugger. * In case of races, only one cpu is allowed to enter kdp after @@ -1559,7 +1641,7 @@ mp_kdp_exit(void) debugger_exit_time = mach_absolute_time(); mp_kdp_trap = FALSE; - __asm__ volatile("mfence"); + mfence(); /* Wait other processors to stop spinning. XXX needs timeout */ DBG("mp_kdp_exit() waiting for processors to resume\n"); @@ -1699,7 +1781,7 @@ _cpu_warm_setup( { cpu_warm_data_t cwdp = (cpu_warm_data_t)arg; - timer_call_enter(cwdp->cwd_call, cwdp->cwd_deadline, TIMER_CALL_CRITICAL | TIMER_CALL_LOCAL); + timer_call_enter(cwdp->cwd_call, cwdp->cwd_deadline, TIMER_CALL_SYS_CRITICAL | TIMER_CALL_LOCAL); cwdp->cwd_result = 0; return; diff --git a/osfmk/i386/mp.h b/osfmk/i386/mp.h index faa84df7d..1e5a13dc9 100644 --- a/osfmk/i386/mp.h +++ b/osfmk/i386/mp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -265,27 +265,7 @@ i_bit_impl(long word, long bit) { #if MACH_RT -#if defined(__i386__) - -#define _DISABLE_PREEMPTION \ - incl %gs:CPU_PREEMPTION_LEVEL - -#define _ENABLE_PREEMPTION \ - decl %gs:CPU_PREEMPTION_LEVEL ; \ - jne 9f ; \ - pushl %eax ; \ - pushl %ecx ; \ - pushl %edx ; \ - call EXT(kernel_preempt_check) ; \ - popl %edx ; \ - popl %ecx ; \ - popl %eax ; \ -9: - -#define _ENABLE_PREEMPTION_NO_CHECK \ - decl %gs:CPU_PREEMPTION_LEVEL - -#elif defined(__x86_64__) +#if defined(__x86_64__) #define _DISABLE_PREEMPTION \ incl %gs:CPU_PREEMPTION_LEVEL @@ -304,63 +284,12 @@ i_bit_impl(long word, long bit) { #endif /* x86_64 just calls through to the other macro directly */ -#if MACH_ASSERT && defined(__i386__) -#define DISABLE_PREEMPTION \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call EXT(_disable_preemption); \ - popl %edx; \ - popl %ecx; \ - popl %eax -#define ENABLE_PREEMPTION \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call EXT(_enable_preemption); \ - popl %edx; \ - popl %ecx; \ - popl %eax -#define ENABLE_PREEMPTION_NO_CHECK \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call EXT(_enable_preemption_no_check); \ - popl %edx; \ - popl %ecx; \ - popl %eax -#define MP_DISABLE_PREEMPTION \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call EXT(_mp_disable_preemption); \ - popl %edx; \ - popl %ecx; \ - popl %eax -#define MP_ENABLE_PREEMPTION \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call EXT(_mp_enable_preemption); \ - popl %edx; \ - popl %ecx; \ - popl %eax -#define MP_ENABLE_PREEMPTION_NO_CHECK \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call EXT(_mp_enable_preemption_no_check); \ - popl %edx; \ - popl %ecx; \ - popl %eax -#else /* MACH_ASSERT */ #define DISABLE_PREEMPTION _DISABLE_PREEMPTION #define ENABLE_PREEMPTION _ENABLE_PREEMPTION #define ENABLE_PREEMPTION_NO_CHECK _ENABLE_PREEMPTION_NO_CHECK #define MP_DISABLE_PREEMPTION _DISABLE_PREEMPTION #define MP_ENABLE_PREEMPTION _ENABLE_PREEMPTION #define MP_ENABLE_PREEMPTION_NO_CHECK _ENABLE_PREEMPTION_NO_CHECK -#endif /* MACH_ASSERT */ #else /* MACH_RT */ #define DISABLE_PREEMPTION diff --git a/osfmk/i386/mp_desc.c b/osfmk/i386/mp_desc.c index fd4003f20..b1bc88bf6 100644 --- a/osfmk/i386/mp_desc.c +++ b/osfmk/i386/mp_desc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -62,7 +62,6 @@ #include #include #include -#include #include #include #include @@ -82,7 +81,6 @@ #include -#ifdef __x86_64__ #define K_INTR_GATE (ACC_P|ACC_PL_K|ACC_INTR_GATE) #define U_INTR_GATE (ACC_P|ACC_PL_U|ACC_INTR_GATE) @@ -90,7 +88,8 @@ #define TRAP(n, name) extern void *name ; #define TRAP_ERR(n, name) extern void *name ; #define TRAP_SPC(n, name) extern void *name ; -#define TRAP_IST(n, name) extern void *name ; +#define TRAP_IST1(n, name) extern void *name ; +#define TRAP_IST2(n, name) extern void *name ; #define INTERRUPT(n) extern void *_intr_ ## n ; #define USER_TRAP(n, name) extern void *name ; #define USER_TRAP_SPC(n, name) extern void *name ; @@ -102,7 +101,8 @@ #undef TRAP #undef TRAP_ERR #undef TRAP_SPC -#undef TRAP_IST +#undef TRAP_IST1 +#undef TRAP_IST2 #undef INTERRUPT #undef USER_TRAP #undef USER_TRAP_SPC @@ -119,7 +119,7 @@ #define TRAP_ERR TRAP #define TRAP_SPC TRAP -#define TRAP_IST(n, name) \ +#define TRAP_IST1(n, name) \ [n] = { \ (uintptr_t)&name, \ KERNEL64_CS, \ @@ -128,6 +128,15 @@ 0 \ }, +#define TRAP_IST2(n, name) \ + [n] = { \ + (uintptr_t)&name, \ + KERNEL64_CS, \ + 2, \ + K_INTR_GATE, \ + 0 \ + }, + #define INTERRUPT(n) \ [n] = { \ (uintptr_t)&_intr_ ## n,\ @@ -154,13 +163,6 @@ struct fake_descriptor64 master_idt64[IDTSZ] __attribute__ ((aligned(PAGE_SIZE))) = { #include "../x86_64/idt_table.h" }; -#endif - -/* - * The i386 needs an interrupt stack to keep the PCB stack from being - * overrun by interrupts. All interrupt stacks MUST lie at lower addresses - * than any thread`s kernel stack. - */ /* * First cpu`s interrupt stack. @@ -173,15 +175,10 @@ extern uint32_t low_eintstack[]; /* top */ * The master cpu (cpu 0) has its data area statically allocated; * others are allocated dynamically and this array is updated at runtime. */ -cpu_data_t cpu_data_master = { +static cpu_data_t cpu_data_master = { .cpu_this = &cpu_data_master, .cpu_nanotime = &pal_rtc_nanotime_info, .cpu_int_stack_top = (vm_offset_t) low_eintstack, -#ifdef __i386__ - .cpu_is64bit = FALSE, -#else - .cpu_is64bit = TRUE -#endif }; cpu_data_t *cpu_data_ptr[MAX_CPUS] = { [0] = &cpu_data_master }; @@ -189,28 +186,9 @@ decl_simple_lock_data(,ncpus_lock); /* protects real_ncpus */ unsigned int real_ncpus = 1; unsigned int max_ncpus = MAX_CPUS; -#ifdef __i386__ -extern void *hi_remap_text; -#define HI_TEXT(lo_text) \ - (((uint32_t)&lo_text - (uint32_t)&hi_remap_text) + HIGH_MEM_BASE) - -extern void hi_sysenter(void); - -typedef struct { - uint16_t length; - uint32_t offset[2]; -} __attribute__((__packed__)) table_descriptor64_t; - -extern table_descriptor64_t gdtptr64; -extern table_descriptor64_t idtptr64; -#endif extern void hi64_sysenter(void); extern void hi64_syscall(void); -#if defined(__x86_64__) && !defined(UBER64) -#define UBER64(x) ((uintptr_t)x) -#endif - /* * Multiprocessor i386/i486 systems use a separate copy of the * GDT, IDT, LDT, and kernel TSS per processor. The first three @@ -411,128 +389,6 @@ fix_desc64(void *descp, int count) } } -#ifdef __i386__ -void -cpu_desc_init(cpu_data_t *cdp) -{ - cpu_desc_index_t *cdi = &cdp->cpu_desc_index; - - if (cdp == &cpu_data_master) { - /* - * Fix up the entries in the GDT to point to - * this LDT and this TSS. - */ - struct fake_descriptor temp_fake_desc; - temp_fake_desc = ldt_desc_pattern; - temp_fake_desc.offset = (vm_offset_t) &master_ldt; - fix_desc(&temp_fake_desc, 1); - *(struct fake_descriptor *) &master_gdt[sel_idx(KERNEL_LDT)] = - temp_fake_desc; - *(struct fake_descriptor *) &master_gdt[sel_idx(USER_LDT)] = - temp_fake_desc; - - temp_fake_desc = tss_desc_pattern; - temp_fake_desc.offset = (vm_offset_t) &master_ktss; - fix_desc(&temp_fake_desc, 1); - *(struct fake_descriptor *) &master_gdt[sel_idx(KERNEL_TSS)] = - temp_fake_desc; - - temp_fake_desc = cpudata_desc_pattern; - temp_fake_desc.offset = (vm_offset_t) &cpu_data_master; - fix_desc(&temp_fake_desc, 1); - *(struct fake_descriptor *) &master_gdt[sel_idx(CPU_DATA_GS)] = - temp_fake_desc; - - fix_desc((void *)&master_idt, IDTSZ); - - cdi->cdi_idt.ptr = master_idt; - cdi->cdi_gdt.ptr = (void *)master_gdt; - - - /* - * Master CPU uses the tables built at boot time. - * Just set the index pointers to the high shared-mapping space. - * Note that the sysenter stack uses empty space above the ktss - * in the HIGH_FIXED_KTSS page. In this case we don't map the - * the real master_sstk in low memory. - */ - cdi->cdi_ktss = (struct i386_tss *) - pmap_index_to_virt(HIGH_FIXED_KTSS) ; - cdi->cdi_sstk = (vm_offset_t) (cdi->cdi_ktss + 1) + - (vm_offset_t) &master_sstk.top - - (vm_offset_t) &master_sstk; - } else { - cpu_desc_table_t *cdt = (cpu_desc_table_t *) cdp->cpu_desc_tablep; - - vm_offset_t cpu_hi_desc; - - cpu_hi_desc = pmap_cpu_high_shared_remap( - cdp->cpu_number, - HIGH_CPU_DESC, - (vm_offset_t) cdt, 1); - - /* - * Per-cpu GDT, IDT, LDT, KTSS descriptors are allocated in one - * block (cpu_desc_table) and double-mapped into high shared space - * in one page window. - * Also, a transient stack for the fast sysenter path. The top of - * which is set at context switch time to point to the PCB using - * the high address. - */ - cdi->cdi_gdt.ptr = (struct fake_descriptor *) (cpu_hi_desc + - offsetof(cpu_desc_table_t, gdt[0])); - cdi->cdi_idt.ptr = (struct fake_descriptor *) (cpu_hi_desc + - offsetof(cpu_desc_table_t, idt[0])); - cdi->cdi_ktss = (struct i386_tss *) (cpu_hi_desc + - offsetof(cpu_desc_table_t, ktss)); - cdi->cdi_sstk = cpu_hi_desc + offsetof(cpu_desc_table_t, sstk.top); - - /* - * LDT descriptors are mapped into a seperate area. - */ - cdi->cdi_ldt = (struct fake_descriptor *) - pmap_cpu_high_shared_remap( - cdp->cpu_number, - HIGH_CPU_LDT_BEGIN, - (vm_offset_t) cdp->cpu_ldtp, - HIGH_CPU_LDT_END - HIGH_CPU_LDT_BEGIN + 1); - - /* - * Copy the tables - */ - bcopy((char *)master_idt, (char *)cdt->idt, sizeof(master_idt)); - bcopy((char *)master_gdt, (char *)cdt->gdt, sizeof(master_gdt)); - bcopy((char *)master_ldt, (char *)cdp->cpu_ldtp, sizeof(master_ldt)); - bzero((char *)&cdt->ktss, sizeof(struct i386_tss)); - - /* - * Fix up the entries in the GDT to point to - * this LDT and this TSS. - */ - struct fake_descriptor temp_ldt = ldt_desc_pattern; - temp_ldt.offset = (vm_offset_t)cdi->cdi_ldt; - fix_desc(&temp_ldt, 1); - - cdt->gdt[sel_idx(KERNEL_LDT)] = temp_ldt; - cdt->gdt[sel_idx(USER_LDT)] = temp_ldt; - - cdt->gdt[sel_idx(KERNEL_TSS)] = tss_desc_pattern; - cdt->gdt[sel_idx(KERNEL_TSS)].offset = (vm_offset_t) cdi->cdi_ktss; - fix_desc(&cdt->gdt[sel_idx(KERNEL_TSS)], 1); - - cdt->gdt[sel_idx(CPU_DATA_GS)] = cpudata_desc_pattern; - cdt->gdt[sel_idx(CPU_DATA_GS)].offset = (vm_offset_t) cdp; - fix_desc(&cdt->gdt[sel_idx(CPU_DATA_GS)], 1); - - cdt->ktss.ss0 = KERNEL_DS; - cdt->ktss.io_bit_map_offset = 0x0FFF; /* no IO bitmap */ - - cpu_userwindow_init(cdp->cpu_number); - cpu_physwindow_init(cdp->cpu_number); - - } -} -#endif /* __i386__ */ void cpu_desc_init64(cpu_data_t *cdp) @@ -546,22 +402,17 @@ cpu_desc_init64(cpu_data_t *cdp) */ cdi->cdi_ktss = (void *)&master_ktss64; cdi->cdi_sstk = (vm_offset_t) &master_sstk.top; -#if __x86_64__ cdi->cdi_gdt.ptr = (void *)MASTER_GDT_ALIAS; cdi->cdi_idt.ptr = (void *)MASTER_IDT_ALIAS; -#else - cdi->cdi_gdt.ptr = (void *)master_gdt; - cdi->cdi_idt.ptr = (void *)master_idt64; -#endif cdi->cdi_ldt = (struct fake_descriptor *) master_ldt; /* Replace the expanded LDTs and TSS slots in the GDT */ - kernel_ldt_desc64.offset64 = UBER64(&master_ldt); + kernel_ldt_desc64.offset64 = (uintptr_t) &master_ldt; *(struct fake_descriptor64 *) &master_gdt[sel_idx(KERNEL_LDT)] = kernel_ldt_desc64; *(struct fake_descriptor64 *) &master_gdt[sel_idx(USER_LDT)] = kernel_ldt_desc64; - kernel_tss_desc64.offset64 = UBER64(&master_ktss64); + kernel_tss_desc64.offset64 = (uintptr_t) &master_ktss64; *(struct fake_descriptor64 *) &master_gdt[sel_idx(KERNEL_TSS)] = kernel_tss_desc64; @@ -572,13 +423,12 @@ cpu_desc_init64(cpu_data_t *cdp) fix_desc64((void *) &master_gdt[sel_idx(KERNEL_TSS)], 1); /* - * Set the double-fault stack as IST1 in the 64-bit TSS + * Set the NMI/fault stacks as IST2/IST1 in the 64-bit TSS + * Note: this will be dynamically re-allocated in VM later. */ -#if __x86_64__ - master_ktss64.ist1 = (uintptr_t) low_eintstack; -#else - master_ktss64.ist1 = UBER64((uintptr_t) df_task_stack_end); -#endif + master_ktss64.ist2 = (uintptr_t) low_eintstack; + master_ktss64.ist1 = (uintptr_t) low_eintstack + - sizeof(x86_64_intr_stack_frame_t); } else { cpu_desc_table64_t *cdt = (cpu_desc_table64_t *) cdp->cpu_desc_tablep; @@ -587,11 +437,7 @@ cpu_desc_init64(cpu_data_t *cdp) * heap (cpu_desc_table). * LDT descriptors are mapped into a separate area. */ -#if __x86_64__ cdi->cdi_idt.ptr = (void *)MASTER_IDT_ALIAS; -#else - cdi->cdi_idt.ptr = (void *)cdt->idt; -#endif cdi->cdi_gdt.ptr = (struct fake_descriptor *)cdt->gdt; cdi->cdi_ktss = (void *)&cdt->ktss; cdi->cdi_sstk = (vm_offset_t)&cdt->sstk.top; @@ -600,9 +446,6 @@ cpu_desc_init64(cpu_data_t *cdp) /* * Copy the tables */ -#if !__x86_64__ - bcopy((char *)master_idt64, (char *)cdt->idt, sizeof(master_idt64)); -#endif bcopy((char *)master_gdt, (char *)cdt->gdt, sizeof(master_gdt)); bcopy((char *)master_ldt, (char *)cdp->cpu_ldtp, sizeof(master_ldt)); bcopy((char *)&master_ktss64, (char *)&cdt->ktss, sizeof(struct x86_64_tss)); @@ -611,33 +454,26 @@ cpu_desc_init64(cpu_data_t *cdp) * Fix up the entries in the GDT to point to * this LDT and this TSS. */ - kernel_ldt_desc64.offset64 = UBER64(cdi->cdi_ldt); + kernel_ldt_desc64.offset64 = (uintptr_t) cdi->cdi_ldt; *(struct fake_descriptor64 *) &cdt->gdt[sel_idx(KERNEL_LDT)] = kernel_ldt_desc64; fix_desc64(&cdt->gdt[sel_idx(KERNEL_LDT)], 1); - kernel_ldt_desc64.offset64 = UBER64(cdi->cdi_ldt); + kernel_ldt_desc64.offset64 = (uintptr_t) cdi->cdi_ldt; *(struct fake_descriptor64 *) &cdt->gdt[sel_idx(USER_LDT)] = kernel_ldt_desc64; fix_desc64(&cdt->gdt[sel_idx(USER_LDT)], 1); - kernel_tss_desc64.offset64 = UBER64(cdi->cdi_ktss); + kernel_tss_desc64.offset64 = (uintptr_t) cdi->cdi_ktss; *(struct fake_descriptor64 *) &cdt->gdt[sel_idx(KERNEL_TSS)] = kernel_tss_desc64; fix_desc64(&cdt->gdt[sel_idx(KERNEL_TSS)], 1); - /* Set (zeroed) double-fault stack as IST1 */ - bzero((void *) cdt->dfstk, sizeof(cdt->dfstk)); - cdt->ktss.ist1 = UBER64((unsigned long)cdt->dfstk + sizeof(cdt->dfstk)); -#ifdef __i386__ - cdt->gdt[sel_idx(CPU_DATA_GS)] = cpudata_desc_pattern; - cdt->gdt[sel_idx(CPU_DATA_GS)].offset = (vm_offset_t) cdp; - fix_desc(&cdt->gdt[sel_idx(CPU_DATA_GS)], 1); - - /* Allocate copyio windows */ - cpu_userwindow_init(cdp->cpu_number); - cpu_physwindow_init(cdp->cpu_number); -#endif + /* Set (zeroed) fault stack as IST1, NMI intr stack IST2 */ + bzero((void *) cdt->fstk, sizeof(cdt->fstk)); + cdt->ktss.ist2 = (unsigned long)cdt->fstk + sizeof(cdt->fstk); + cdt->ktss.ist1 = cdt->ktss.ist2 + - sizeof(x86_64_intr_stack_frame_t); } /* Require that the top of the sysenter stack is 16-byte aligned */ @@ -645,53 +481,12 @@ cpu_desc_init64(cpu_data_t *cdp) panic("cpu_desc_init64() sysenter stack not 16-byte aligned"); } -#ifdef __i386__ -void -cpu_desc_load(cpu_data_t *cdp) -{ - cpu_desc_index_t *cdi = &cdp->cpu_desc_index; - - cdi->cdi_idt.size = 0x1000 + cdp->cpu_number; - cdi->cdi_gdt.size = sizeof(struct real_descriptor)*GDTSZ - 1; - - lgdt((uintptr_t *) &cdi->cdi_gdt); - lidt((uintptr_t *) &cdi->cdi_idt); - lldt(KERNEL_LDT); - - set_tr(KERNEL_TSS); - - __asm__ volatile("mov %0, %%gs" : : "rm" ((unsigned short)(CPU_DATA_GS))); -} -#endif /* __i386__ */ void cpu_desc_load64(cpu_data_t *cdp) { cpu_desc_index_t *cdi = &cdp->cpu_desc_index; -#ifdef __i386__ - /* - * Load up the new descriptors etc - * ml_load_desc64() expects these global pseudo-descriptors: - * gdtptr64 -> per-cpu gdt - * idtptr64 -> per-cpu idt - * These are 10-byte descriptors with 64-bit addresses into - * uber-space. - * - * Refer to commpage/cpu_number.s for the IDT limit trick. - */ - gdtptr64.length = GDTSZ * sizeof(struct real_descriptor) - 1; - gdtptr64.offset[0] = (uint32_t) cdi->cdi_gdt.ptr; - gdtptr64.offset[1] = KERNEL_UBER_BASE_HI32; - idtptr64.length = 0x1000 + cdp->cpu_number; - idtptr64.offset[0] = (uint32_t) cdi->cdi_idt.ptr; - idtptr64.offset[1] = KERNEL_UBER_BASE_HI32; - - /* Make sure busy bit is cleared in the TSS */ - gdt_desc_p(KERNEL_TSS)->access &= ~ACC_TSS_BUSY; - - ml_load_desc64(); -#else /* Load the GDT, LDT, IDT and TSS */ cdi->cdi_gdt.size = sizeof(struct real_descriptor)*GDTSZ - 1; cdi->cdi_idt.size = 0x1000 + cdp->cpu_number; @@ -707,21 +502,8 @@ cpu_desc_load64(cpu_data_t *cdp) #if GPROF // Hack to enable mcount to work on K64 __asm__ volatile("mov %0, %%gs" : : "rm" ((unsigned short)(KERNEL_DS))); #endif -#endif } -#ifdef __i386__ -/* - * Set MSRs for sysenter/sysexit for 32-bit. - */ -static void -fast_syscall_init(__unused cpu_data_t *cdp) -{ - wrmsr(MSR_IA32_SYSENTER_CS, SYSENTER_CS, 0); - wrmsr(MSR_IA32_SYSENTER_EIP, HI_TEXT(hi_sysenter), 0); - wrmsr(MSR_IA32_SYSENTER_ESP, current_sstk(), 0); -} -#endif /* * Set MSRs for sysenter/sysexit and syscall/sysret for 64-bit. @@ -730,8 +512,8 @@ static void fast_syscall_init64(__unused cpu_data_t *cdp) { wrmsr64(MSR_IA32_SYSENTER_CS, SYSENTER_CS); - wrmsr64(MSR_IA32_SYSENTER_EIP, UBER64((uintptr_t) hi64_sysenter)); - wrmsr64(MSR_IA32_SYSENTER_ESP, UBER64(current_sstk())); + wrmsr64(MSR_IA32_SYSENTER_EIP, (uintptr_t) hi64_sysenter); + wrmsr64(MSR_IA32_SYSENTER_ESP, current_sstk()); /* Enable syscall/sysret */ wrmsr64(MSR_IA32_EFER, rdmsr64(MSR_IA32_EFER) | MSR_IA32_EFER_SCE); @@ -740,7 +522,7 @@ fast_syscall_init64(__unused cpu_data_t *cdp) * Note USER_CS because sysret uses this + 16 when returning to * 64-bit code. */ - wrmsr64(MSR_IA32_LSTAR, UBER64((uintptr_t) hi64_syscall)); + wrmsr64(MSR_IA32_LSTAR, (uintptr_t) hi64_syscall); wrmsr64(MSR_IA32_STAR, (((uint64_t)USER_CS) << 48) | (((uint64_t)KERNEL64_CS) << 32)); /* @@ -752,18 +534,6 @@ fast_syscall_init64(__unused cpu_data_t *cdp) */ wrmsr64(MSR_IA32_FMASK, EFL_DF|EFL_IF|EFL_TF|EFL_NT); -#ifdef __i386__ - /* - * Set the Kernel GS base MSR to point to per-cpu data in uber-space. - * The uber-space handler (hi64_syscall) uses the swapgs instruction. - */ - wrmsr64(MSR_IA32_KERNEL_GS_BASE, UBER64(cdp)); - -#if ONLY_SAFE_FOR_LINDA_SERIAL - kprintf("fast_syscall_init64() KERNEL_GS_BASE=0x%016llx\n", - rdmsr64(MSR_IA32_KERNEL_GS_BASE)); -#endif -#endif } @@ -797,9 +567,6 @@ cpu_data_alloc(boolean_t is_boot_cpu) bzero((void*) cdp, sizeof(cpu_data_t)); cdp->cpu_this = cdp; - /* Propagate mode */ - cdp->cpu_is64bit = cpu_mode_is64bit(); - /* * Allocate interrupt stack: */ @@ -815,13 +582,10 @@ cpu_data_alloc(boolean_t is_boot_cpu) /* * Allocate descriptor table: - * Size depends on cpu mode. */ - ret = kmem_alloc(kernel_map, (vm_offset_t *) &cdp->cpu_desc_tablep, - cdp->cpu_is64bit ? sizeof(cpu_desc_table64_t) - : sizeof(cpu_desc_table_t)); + sizeof(cpu_desc_table64_t)); if (ret != KERN_SUCCESS) { printf("cpu_data_alloc() desc_table failed, ret=%d\n", ret); goto abort; @@ -864,7 +628,7 @@ abort: if (cdp) { if (cdp->cpu_desc_tablep) kfree((void *) cdp->cpu_desc_tablep, - sizeof(*cdp->cpu_desc_tablep)); + sizeof(cpu_desc_table64_t)); if (cdp->cpu_int_stack_top) kfree((void *) (cdp->cpu_int_stack_top - INTSTACK_SIZE), INTSTACK_SIZE); @@ -1005,13 +769,6 @@ cpu_userwindow_init(int cpu) */ cdp->cpu_copywindow_pdp = pmap_pde(kernel_pmap, user_window); -#ifdef __i386__ - cpu_desc_index_t *cdi = &cdp->cpu_desc_index; - cdi->cdi_gdt.ptr[sel_idx(USER_WINDOW_SEL)] = userwindow_desc_pattern; - cdi->cdi_gdt.ptr[sel_idx(USER_WINDOW_SEL)].offset = user_window; - - fix_desc(&cdi->cdi_gdt.ptr[sel_idx(USER_WINDOW_SEL)], 1); -#endif /* __i386__ */ } void @@ -1037,13 +794,6 @@ cpu_physwindow_init(int cpu) cdp->cpu_physwindow_base = phys_window; cdp->cpu_physwindow_ptep = vtopte(phys_window); } -#ifdef __i386__ - cpu_desc_index_t *cdi = &cdp->cpu_desc_index; - cdi->cdi_gdt.ptr[sel_idx(PHYS_WINDOW_SEL)] = physwindow_desc_pattern; - cdi->cdi_gdt.ptr[sel_idx(PHYS_WINDOW_SEL)].offset = phys_window; - - fix_desc(&cdi->cdi_gdt.ptr[sel_idx(PHYS_WINDOW_SEL)], 1); -#endif /* __i386__ */ } #endif /* NCOPY_WINDOWS > 0 */ @@ -1053,20 +803,9 @@ cpu_physwindow_init(int cpu) void cpu_mode_init(cpu_data_t *cdp) { -#ifdef __i386__ - if (cdp->cpu_is64bit) { - cpu_IA32e_enable(cdp); - cpu_desc_load64(cdp); - fast_syscall_init64(cdp); - } else { - fast_syscall_init(cdp); - } -#else fast_syscall_init64(cdp); -#endif } -#if __x86_64__ /* * Allocate a new interrupt stack for the boot processor from the * heap rather than continue to use the statically allocated space. @@ -1076,16 +815,17 @@ void cpu_data_realloc(void) { int ret; - vm_offset_t stack; + vm_offset_t istk; + vm_offset_t fstk; cpu_data_t *cdp; boolean_t istate; - ret = kmem_alloc(kernel_map, &stack, INTSTACK_SIZE); + ret = kmem_alloc(kernel_map, &istk, INTSTACK_SIZE); if (ret != KERN_SUCCESS) { panic("cpu_data_realloc() stack alloc, ret=%d\n", ret); } - bzero((void*) stack, INTSTACK_SIZE); - stack += INTSTACK_SIZE; + bzero((void*) istk, INTSTACK_SIZE); + istk += INTSTACK_SIZE; ret = kmem_alloc(kernel_map, (vm_offset_t *) &cdp, sizeof(cpu_data_t)); if (ret != KERN_SUCCESS) { @@ -1093,21 +833,33 @@ cpu_data_realloc(void) } /* Copy old contents into new area and make fix-ups */ - bcopy((void *) &cpu_data_master, (void*) cdp, sizeof(cpu_data_t)); + assert(cpu_number() == 0); + bcopy((void *) cpu_data_ptr[0], (void*) cdp, sizeof(cpu_data_t)); cdp->cpu_this = cdp; - cdp->cpu_int_stack_top = stack; - timer_call_initialize_queue(&cdp->rtclock_timer.queue); + cdp->cpu_int_stack_top = istk; + timer_call_queue_init(&cdp->rtclock_timer.queue); - kprintf("Reallocated master cpu data: %p, interrupt stack top: %p\n", - (void *) cdp, (void *) stack); + /* Allocate the separate fault stack */ + ret = kmem_alloc(kernel_map, &fstk, PAGE_SIZE); + if (ret != KERN_SUCCESS) { + panic("cpu_data_realloc() fault stack alloc, ret=%d\n", ret); + } + bzero((void*) fstk, PAGE_SIZE); + fstk += PAGE_SIZE; /* * With interrupts disabled commmit the new areas. */ istate = ml_set_interrupts_enabled(FALSE); cpu_data_ptr[0] = cdp; + master_ktss64.ist2 = (uintptr_t) fstk; + master_ktss64.ist1 = (uintptr_t) fstk + - sizeof(x86_64_intr_stack_frame_t); wrmsr64(MSR_IA32_GS_BASE, (uintptr_t) cdp); wrmsr64(MSR_IA32_KERNEL_GS_BASE, (uintptr_t) cdp); (void) ml_set_interrupts_enabled(istate); + + kprintf("Reallocated master cpu data: %p," + " interrupt stack: %p, fault stack: %p\n", + (void *) cdp, (void *) istk, (void *) fstk); } -#endif /* __x86_64__ */ diff --git a/osfmk/i386/mp_desc.h b/osfmk/i386/mp_desc.h index 3b8ef7ea1..1fc15f5f8 100644 --- a/osfmk/i386/mp_desc.h +++ b/osfmk/i386/mp_desc.h @@ -80,21 +80,11 @@ __BEGIN_DECLS * The descriptor tables are together in a structure * allocated one per processor (except for the boot processor). */ -typedef struct cpu_desc_table { - struct fake_descriptor idt[IDTSZ] __attribute__ ((aligned (16))); - struct fake_descriptor gdt[GDTSZ] __attribute__ ((aligned (16))); - struct i386_tss ktss __attribute__ ((aligned (16))); - struct sysenter_stack sstk; -} cpu_desc_table_t; - typedef struct cpu_desc_table64 { -#if !__x86_64__ - struct fake_descriptor64 idt[IDTSZ] __attribute__ ((aligned (16))); -#endif struct fake_descriptor gdt[GDTSZ] __attribute__ ((aligned (16))); struct x86_64_tss ktss __attribute__ ((aligned (16))); struct sysenter_stack sstk __attribute__ ((aligned (16))); - uint8_t dfstk[PAGE_SIZE] __attribute__ ((aligned (16))); + uint8_t fstk[PAGE_SIZE] __attribute__ ((aligned (16))); } cpu_desc_table64_t; #define current_gdt() (current_cpu_datap()->cpu_desc_index.cdi_gdt.ptr) diff --git a/osfmk/i386/pal_routines.h b/osfmk/i386/pal_routines.h index 4945a66c4..40ebdf0bc 100644 --- a/osfmk/i386/pal_routines.h +++ b/osfmk/i386/pal_routines.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009 Apple Inc. All rights reserved. + * Copyright (c) 2009-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -80,11 +80,7 @@ boolean_t pal_machine_sleep(uint8_t type_a, #ifdef XNU_KERNEL_PRIVATE /* Define any PAL-specific types for x86 */ -#ifdef __i386__ -typedef uint32_t pal_cr_t; -#else typedef uint64_t pal_cr_t; -#endif struct pal_cpu_data; /* Defined per-platform */ struct pal_pcb; /* Defined per-platform */ @@ -157,16 +153,16 @@ void pal_efi_hibernate_prepare(void); extern boolean_t virtualized; #define PAL_VIRTUALIZED_PROPERTY_VALUE 4 - + /* Allow for tricky IOKit property matching */ #define PAL_AICPM_PROPERTY_NAME "intel_cpupm_matching" static inline void pal_get_resource_property(const char **property_name, int *property_value) { - *property_name = PAL_AICPM_PROPERTY_NAME; - *property_value = PAL_AICPM_PROPERTY_VALUE; - if (virtualized) - *property_value = PAL_VIRTUALIZED_PROPERTY_VALUE; + *property_name = PAL_AICPM_PROPERTY_NAME; + *property_value = PAL_AICPM_PROPERTY_VALUE; + if (virtualized) + *property_value = PAL_VIRTUALIZED_PROPERTY_VALUE; } /* assembly function to update TSC / timebase info */ diff --git a/osfmk/i386/pal_routines_asm.s b/osfmk/i386/pal_routines_asm.s deleted file mode 100644 index 0c4089af9..000000000 --- a/osfmk/i386/pal_routines_asm.s +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Copyright (c) 2009 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -#include -#include - -#include - -/* - * Copy "count" bytes from "src" to %esp, using - * "tmpindex" for a scratch counter and %eax - */ -#define COPY_STACK(src, count, tmpindex) \ - mov $0, tmpindex /* initial scratch counter */ ; \ -1: \ - mov 0(src,tmpindex,1), %eax /* copy one 32-bit word from source... */ ; \ - mov %eax, 0(%esp,tmpindex,1) /* ... to stack */ ; \ - add $4, tmpindex /* increment counter */ ; \ - cmp count, tmpindex /* exit it stack has been copied */ ; \ - jne 1b - -/* - void - pal_efi_call_in_64bit_mode_asm(uint64_t func, - struct pal_efi_registers *efi_reg, - void *stack_contents, - size_t stack_contents_size) - - * Switch from compatibility mode to long mode, and - * then execute the function pointer with the specified - * register and stack contents (based at %rsp). Afterwards, - * collect the return value, restore the original state, - * and return. -*/ -ENTRY(_pal_efi_call_in_64bit_mode_asm) - FRAME - - /* save non-volatile registers */ - push %ebx - push %esi - push %edi - - sub $12, %esp /* align to 16-byte boundary */ - mov 16(%ebp), %esi /* load efi_reg into %esi */ - mov 20(%ebp), %edx /* load stack_contents into %edx */ - mov 24(%ebp), %ecx /* load s_c_s into %ecx */ - sub %ecx, %esp /* make room for stack contents */ - - COPY_STACK(%edx, %ecx, %edi) - - ENTER_64BIT_MODE() - - /* load efi_reg into real registers */ - mov 0(%rsi), %rcx - mov 8(%rsi), %rdx - mov 16(%rsi), %r8 - mov 24(%rsi), %r9 - mov 32(%rsi), %rax - - mov 8(%rbp), %rdi /* load func pointer */ - call *%rdi /* call EFI runtime */ - - mov 16(%rbp), %esi /* load efi_reg into %esi */ - mov %rax, 32(%rsi) /* save RAX back */ - - ENTER_COMPAT_MODE() - - add 24(%ebp), %esp /* discard stack contents */ - add $12, %esp /* restore stack pointer */ - - pop %edi - pop %esi - pop %ebx - - EMARF - ret - -/* - void - pal_efi_call_in_32bit_mode_asm(uint32_t func, - struct pal_efi_registers *efi_reg, - void *stack_contents, - size_t stack_contents_size) -*/ -ENTRY(_pal_efi_call_in_32bit_mode_asm) - FRAME - - /* save non-volatile registers */ - push %ebx - push %esi - push %edi - - sub $12, %esp /* align to 16-byte boundary */ - mov 12(%ebp), %esi /* load efi_reg into %esi */ - mov 16(%ebp), %edx /* load stack_contents into %edx */ - mov 20(%ebp), %ecx /* load s_c_s into %ecx */ - sub %ecx, %esp /* make room for stack contents */ - - COPY_STACK(%edx, %ecx, %edi) - - /* load efi_reg into real registers */ - mov 0(%esi), %ecx - mov 8(%esi), %edx - mov 32(%esi), %eax - - mov 8(%ebp), %edi /* load func pointer */ - call *%edi /* call EFI runtime */ - - mov 12(%ebp), %esi /* load efi_reg into %esi */ - mov %eax, 32(%esi) /* save RAX back */ - movl $0, 36(%esi) /* zero out high bits of RAX */ - - add 20(%ebp), %esp /* discard stack contents */ - add $12, %esp /* restore stack pointer */ - - pop %edi - pop %esi - pop %ebx - - EMARF - ret - - -/* void _rtc_nanotime_store(uint64_t tsc, - uint64_t nsec, - uint32_t scale, - uint32_t shift, - rtc_nanotime_t *dst) ; -*/ - -ENTRY(_pal_rtc_nanotime_store) - push %ebp - movl %esp,%ebp - push %esi - - mov 32(%ebp),%edx /* get ptr to rtc_nanotime_info */ - - movl RNT_GENERATION(%edx),%esi /* get current generation */ - movl $0,RNT_GENERATION(%edx) /* flag data as being updated */ - - mov 8(%ebp),%eax - mov %eax,RNT_TSC_BASE(%edx) - mov 12(%ebp),%eax - mov %eax,RNT_TSC_BASE+4(%edx) - - mov 24(%ebp),%eax - mov %eax,RNT_SCALE(%edx) - - mov 28(%ebp),%eax - mov %eax,RNT_SHIFT(%edx) - - mov 16(%ebp),%eax - mov %eax,RNT_NS_BASE(%edx) - mov 20(%ebp),%eax - mov %eax,RNT_NS_BASE+4(%edx) - - incl %esi /* next generation */ - jnz 1f - incl %esi /* skip 0, which is a flag */ -1: movl %esi,RNT_GENERATION(%edx) /* update generation and make usable */ - - pop %esi - pop %ebp - - ret - - diff --git a/osfmk/i386/pcb.c b/osfmk/i386/pcb.c index e5274a5eb..cc5b22787 100644 --- a/osfmk/i386/pcb.c +++ b/osfmk/i386/pcb.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -91,9 +91,6 @@ #include #include #include -#if defined(__i386__) -#include -#endif #include #include /* LAPIC_PMC_SWI_VECTOR */ @@ -101,6 +98,14 @@ #include #endif /* CONFIG_COUNTERS */ +#if KPC +#include +#endif + +#if KPERF +#include +#endif + /* * Maps state flavor to number of words in the state: */ @@ -179,6 +184,30 @@ void ml_get_csw_threads(thread_t *old, thread_t *new) { #endif /* CONFIG_COUNTERS */ +#if KPC +static inline void +ml_kpc_cswitch(thread_t old, thread_t new) +{ + if(!kpc_threads_counting) + return; + + /* call the kpc function */ + kpc_switch_context( old, new ); +} +#endif + +#if KPERF +static inline void +ml_kperf_cswitch(thread_t old, thread_t new) +{ + if(!kperf_cswitch_hook) + return; + + /* call the kpc function */ + kperf_switch_context( old, new ); +} +#endif + /* * Don't let an illegal value for dr7 get set. Specifically, * check for undefined settings. Setting these bit patterns @@ -201,19 +230,6 @@ dr7_is_valid(uint32_t *dr7) if ((*dr7 & mask1) == mask2) return (FALSE); - /* - * len0-3 pattern "10B" is ok for len on Merom and newer processors - * (it signifies an 8-byte wide region). We use the 64bit capability - * of the processor in lieu of the more laborious model/family checks - * as all 64-bit capable processors so far support this. - * Reject an attempt to use this on 64-bit incapable processors. - */ - if (current_cpu_datap()->cpu_is64bit == FALSE) - for (i = 0, mask1 = 0x3<<18, mask2 = 0x2<<18; i < 4; - i++, mask1 <<= 4, mask2 <<= 4) - if ((*dr7 & mask1) == mask2) - return (FALSE); - /* * if we are doing an instruction execution break (indicated * by r/w[x] being "00B"), then the len[x] must also be set @@ -259,8 +275,7 @@ set_live_debug_state32(cpu_data_t *cdp, x86_debug_state32_t *ds) __asm__ volatile ("movl %0,%%db1" : :"r" (ds->dr1)); __asm__ volatile ("movl %0,%%db2" : :"r" (ds->dr2)); __asm__ volatile ("movl %0,%%db3" : :"r" (ds->dr3)); - if (cpu_mode_is64bit()) - cdp->cpu_dr7 = ds->dr7; + cdp->cpu_dr7 = ds->dr7; } extern void set_64bit_debug_regs(x86_debug_state64_t *ds); @@ -282,27 +297,6 @@ debug_state_is_valid32(x86_debug_state32_t *ds) if (!dr7_is_valid(&ds->dr7)) return FALSE; -#if defined(__i386__) - /* - * Only allow local breakpoints and make sure they are not - * in the trampoline code. - */ - if (ds->dr7 & 0x1) - if (ds->dr0 >= (unsigned long)HIGH_MEM_BASE) - return FALSE; - - if (ds->dr7 & (0x1<<2)) - if (ds->dr1 >= (unsigned long)HIGH_MEM_BASE) - return FALSE; - - if (ds->dr7 & (0x1<<4)) - if (ds->dr2 >= (unsigned long)HIGH_MEM_BASE) - return FALSE; - - if (ds->dr7 & (0x1<<6)) - if (ds->dr3 >= (unsigned long)HIGH_MEM_BASE) - return FALSE; -#endif return TRUE; } @@ -476,6 +470,12 @@ machine_switch_context( #endif #if CONFIG_COUNTERS machine_pmc_cswitch(old, new); +#endif +#if KPC + ml_kpc_cswitch(old, new); +#endif +#if KPERF + ml_kperf_cswitch(old, new); #endif /* * Save FP registers if in use. @@ -626,11 +626,6 @@ set_thread_state32(thread_t thread, x86_thread_state32_t *ts) * Scrub segment selector values: */ ts->cs = USER_CS; -#ifdef __i386__ - if (ts->ss == 0) ts->ss = USER_DS; - if (ts->ds == 0) ts->ds = USER_DS; - if (ts->es == 0) ts->es = USER_DS; -#else /* __x86_64__ */ /* * On a 64 bit kernel, we always override the data segments, * as the actual selector numbers have changed. This also @@ -640,7 +635,6 @@ set_thread_state32(thread_t thread, x86_thread_state32_t *ts) ts->ss = USER_DS; ts->ds = USER_DS; ts->es = USER_DS; -#endif /* Check segment selectors are safe */ if (!valid_user_segment_selectors(ts->cs, @@ -1707,10 +1701,6 @@ machine_thread_switch_addrmode(thread_t thread) /* If we're switching ourselves, reset the pcb addresses etc. */ if (thread == current_thread()) { boolean_t istate = ml_set_interrupts_enabled(FALSE); -#if defined(__i386__) - if (current_cpu_datap()->cpu_active_cr3 != kernel_pmap->pm_cr3) - pmap_load_kernel_cr3(); -#endif /* defined(__i386) */ act_machine_switch_pcb(NULL, thread); ml_set_interrupts_enabled(istate); } @@ -1736,121 +1726,20 @@ machine_set_current_thread(thread_t thread) void machine_thread_init(void) { - if (cpu_mode_is64bit()) { - assert(sizeof(x86_sframe_compat32_t) % 16 == 0); - iss_zone = zinit(sizeof(x86_sframe64_t), - thread_max * sizeof(x86_sframe64_t), - THREAD_CHUNK * sizeof(x86_sframe64_t), - "x86_64 saved state"); - - ids_zone = zinit(sizeof(x86_debug_state64_t), - thread_max * sizeof(x86_debug_state64_t), - THREAD_CHUNK * sizeof(x86_debug_state64_t), - "x86_64 debug state"); + iss_zone = zinit(sizeof(x86_saved_state_t), + thread_max * sizeof(x86_saved_state_t), + THREAD_CHUNK * sizeof(x86_saved_state_t), + "x86_64 saved state"); - } else { - iss_zone = zinit(sizeof(x86_sframe32_t), - thread_max * sizeof(x86_sframe32_t), - THREAD_CHUNK * sizeof(x86_sframe32_t), - "x86 saved state"); - ids_zone = zinit(sizeof(x86_debug_state32_t), - thread_max * (sizeof(x86_debug_state32_t)), - THREAD_CHUNK * (sizeof(x86_debug_state32_t)), - "x86 debug state"); - } - fpu_module_init(); -} + ids_zone = zinit(sizeof(x86_debug_state64_t), + thread_max * sizeof(x86_debug_state64_t), + THREAD_CHUNK * sizeof(x86_debug_state64_t), + "x86_64 debug state"); - -#if defined(__i386__) -/* - * Some routines for debugging activation code - */ -static void dump_handlers(thread_t); -void dump_regs(thread_t); -int dump_act(thread_t thr_act); - -static void -dump_handlers(thread_t thr_act) -{ - ReturnHandler *rhp = thr_act->handlers; - int counter = 0; - - printf("\t"); - while (rhp) { - if (rhp == &thr_act->special_handler){ - if (rhp->next) - printf("[NON-Zero next ptr(%p)]", rhp->next); - printf("special_handler()->"); - break; - } - printf("hdlr_%d(%p)->", counter, rhp->handler); - rhp = rhp->next; - if (++counter > 32) { - printf("Aborting: HUGE handler chain\n"); - break; - } - } - printf("HLDR_NULL\n"); -} - -void -dump_regs(thread_t thr_act) -{ - if (thread_is_64bit(thr_act)) { - x86_saved_state64_t *ssp; - - ssp = USER_REGS64(thr_act); - - panic("dump_regs: 64bit tasks not yet supported"); - - } else { - x86_saved_state32_t *ssp; - - ssp = USER_REGS32(thr_act); - - /* - * Print out user register state - */ - printf("\tRegs:\tedi=%x esi=%x ebp=%x ebx=%x edx=%x\n", - ssp->edi, ssp->esi, ssp->ebp, ssp->ebx, ssp->edx); - - printf("\t\tecx=%x eax=%x eip=%x efl=%x uesp=%x\n", - ssp->ecx, ssp->eax, ssp->eip, ssp->efl, ssp->uesp); - - printf("\t\tcs=%x ss=%x\n", ssp->cs, ssp->ss); - } + fpu_module_init(); } -int -dump_act(thread_t thr_act) -{ - if (!thr_act) - return(0); - - printf("thread(%p)(%d): task=%p(%d)\n", - thr_act, thr_act->ref_count, - thr_act->task, - thr_act->task ? thr_act->task->ref_count : 0); - - printf("\tsusp=%d user_stop=%d active=%x ast=%x\n", - thr_act->suspend_count, thr_act->user_stop_count, - thr_act->active, thr_act->ast); - printf("\tpcb=%p\n", &thr_act->machine); - if (thr_act->kernel_stack) { - vm_offset_t stack = thr_act->kernel_stack; - - printf("\tk_stk %lx eip %x ebx %x esp %x iss %p\n", - (long)stack, STACK_IKS(stack)->k_eip, STACK_IKS(stack)->k_ebx, - STACK_IKS(stack)->k_esp, thr_act->machine.iss); - } - - dump_handlers(thr_act); - dump_regs(thr_act); - return((int)thr_act); -} -#endif user_addr_t get_useraddr(void) @@ -1940,6 +1829,12 @@ machine_stack_handoff(thread_t old, #if CONFIG_COUNTERS machine_pmc_cswitch(old, new); #endif +#if KPC + ml_kpc_cswitch(old, new); +#endif +#if KPERF + ml_kperf_cswitch(old, new); +#endif stack = old->kernel_stack; if (stack == old->reserved_stack) { @@ -2103,16 +1998,6 @@ void act_thread_cfree(__unused void *ctx) { /* XXX - Unused */ } -void x86_toggle_sysenter_arg_store(thread_t thread, boolean_t valid); -void x86_toggle_sysenter_arg_store(thread_t thread, boolean_t valid) { - thread->machine.arg_store_valid = valid; -} - -boolean_t x86_sysenter_arg_store_isvalid(thread_t thread); - -boolean_t x86_sysenter_arg_store_isvalid(thread_t thread) { - return (thread->machine.arg_store_valid); -} /* * Duplicate one x86_debug_state32_t to another. "all" parameter @@ -2163,11 +2048,3 @@ copy_debug_state64( target->dr6 = src->dr6; target->dr7 = src->dr7; } - -boolean_t is_useraddr64_canonical(uint64_t addr64); - -boolean_t -is_useraddr64_canonical(uint64_t addr64) -{ - return IS_USERADDR64_CANONICAL(addr64); -} diff --git a/osfmk/i386/pcb_native.c b/osfmk/i386/pcb_native.c index 8ce815029..1c4e9ebfe 100644 --- a/osfmk/i386/pcb_native.c +++ b/osfmk/i386/pcb_native.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -94,9 +94,6 @@ #include #include #include -#if defined(__i386__) -#include -#endif #include #include @@ -106,8 +103,6 @@ extern char assert_is_16byte_multiple_sizeof_ ## _type_ \ /* Compile-time checks for vital save area sizing: */ ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_64_intr_stack_frame_t); -ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_sframe64_t); -ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_saved_state_compat32_t); ASSERT_IS_16BYTE_MULTIPLE_SIZEOF(x86_saved_state_t); #define DIRECTION_FLAG_DEBUG (DEBUG | DEVELOPMENT) @@ -132,7 +127,6 @@ act_machine_switch_pcb(__unused thread_t old, thread_t new) } #endif -#if defined(__x86_64__) /* * Clear segment state * unconditionally for DS/ES/FS but more carefully for GS whose @@ -150,40 +144,29 @@ act_machine_switch_pcb(__unused thread_t old, thread_t new) cdp->cpu_uber.cu_user_gs_base = 0; } - if (is_saved_state64(pcb->iss)) { - /* - * The test above is performed against the thread save state - * flavor and not task's 64-bit feature flag because of the - * thread/task 64-bit state divergence that can arise in - * task_set_64bit() x86: the task state is changed before - * the individual thread(s). - */ - x86_saved_state64_tagged_t *iss64; - vm_offset_t isf; + vm_offset_t isf; - assert(is_saved_state64(pcb->iss)); - - iss64 = (x86_saved_state64_tagged_t *) pcb->iss; - - /* - * Set pointer to PCB's interrupt stack frame in cpu data. - * Used by syscall and double-fault trap handlers. - */ - isf = (vm_offset_t) &iss64->state.isf; - cdp->cpu_uber.cu_isf = isf; - pcb_stack_top = (vm_offset_t) (iss64 + 1); - /* require 16-byte alignment */ - assert((pcb_stack_top & 0xF) == 0); + /* + * Set pointer to PCB's interrupt stack frame in cpu data. + * Used by syscall and double-fault trap handlers. + */ + isf = (vm_offset_t) &pcb->iss->ss_64.isf; + cdp->cpu_uber.cu_isf = isf; + pcb_stack_top = (vm_offset_t) (pcb->iss + 1); + /* require 16-byte alignment */ + assert((pcb_stack_top & 0xF) == 0); - /* Interrupt stack is pcb */ - current_ktss64()->rsp0 = pcb_stack_top; + /* Interrupt stack is pcb */ + current_ktss64()->rsp0 = pcb_stack_top; - /* - * Top of temporary sysenter stack points to pcb stack. - * Although this is not normally used by 64-bit users, - * it needs to be set in case a sysenter is attempted. - */ - *current_sstk64() = pcb_stack_top; + /* + * Top of temporary sysenter stack points to pcb stack. + * Although this is not normally used by 64-bit users, + * it needs to be set in case a sysenter is attempted. + */ + *current_sstk64() = pcb_stack_top; + + if (is_saved_state64(pcb->iss)) { cdp->cpu_task_map = new->map->pmap->pm_task_map; @@ -209,37 +192,10 @@ act_machine_switch_pcb(__unused thread_t old, thread_t new) wrmsr64(MSR_IA32_KERNEL_GS_BASE, pcb->cthread_self); } } - } else { - x86_saved_state_compat32_t *iss32compat; - vm_offset_t isf; - - assert(is_saved_state32(pcb->iss)); - iss32compat = (x86_saved_state_compat32_t *) pcb->iss; - - pcb_stack_top = (uintptr_t) (iss32compat + 1); - /* require 16-byte alignment */ - assert((pcb_stack_top & 0xF) == 0); - - /* - * Set pointer to PCB's interrupt stack frame in cpu data. - * Used by debug trap handler. - */ - isf = (vm_offset_t) &iss32compat->isf64; - cdp->cpu_uber.cu_isf = isf; - /* Top of temporary sysenter stack points to pcb stack */ - *current_sstk64() = pcb_stack_top; - - /* Interrupt stack is pcb */ - current_ktss64()->rsp0 = pcb_stack_top; + } else { cdp->cpu_task_map = TASK_MAP_32BIT; - /* Precalculate pointers to syscall argument store, for use - * in the trampolines. - */ - cdp->cpu_uber_arg_store = (vm_offset_t)get_bsduthreadarg(new); - cdp->cpu_uber_arg_store_valid = (vm_offset_t)&pcb->arg_store_valid; - pcb->arg_store_valid = 0; /* * Disable USER64_CS @@ -276,162 +232,14 @@ act_machine_switch_pcb(__unused thread_t old, thread_t new) } } -#else /* !__x86_64__ */ - - vm_offset_t hi_pcb_stack_top; - vm_offset_t hi_iss; - - if (!cpu_mode_is64bit()) { - x86_saved_state32_tagged_t *hi_iss32; - /* - * Save a pointer to the top of the "kernel" stack - - * actually the place in the PCB where a trap into - * kernel mode will push the registers. - */ - hi_iss = (vm_offset_t)((unsigned long) - pmap_cpu_high_map_vaddr(cpu_number(), HIGH_CPU_ISS0) | - ((unsigned long)pcb->iss & PAGE_MASK)); - - cdp->cpu_hi_iss = (void *)hi_iss; - - pmap_high_map(pcb->iss_pte0, HIGH_CPU_ISS0); - pmap_high_map(pcb->iss_pte1, HIGH_CPU_ISS1); - - hi_iss32 = (x86_saved_state32_tagged_t *) hi_iss; - assert(hi_iss32->tag == x86_SAVED_STATE32); - - hi_pcb_stack_top = (int) (hi_iss32 + 1); - - /* - * For fast syscall, top of interrupt stack points to pcb stack - */ - *(vm_offset_t *) current_sstk() = hi_pcb_stack_top; - - current_ktss()->esp0 = hi_pcb_stack_top; - - } else if (is_saved_state64(pcb->iss)) { - /* - * The test above is performed against the thread save state - * flavor and not task's 64-bit feature flag because of the - * thread/task 64-bit state divergence that can arise in - * task_set_64bit() x86: the task state is changed before - * the individual thread(s). - */ - x86_saved_state64_tagged_t *iss64; - vm_offset_t isf; - - assert(is_saved_state64(pcb->iss)); - - iss64 = (x86_saved_state64_tagged_t *) pcb->iss; - - /* - * Set pointer to PCB's interrupt stack frame in cpu data. - * Used by syscall and double-fault trap handlers. - */ - isf = (vm_offset_t) &iss64->state.isf; - cdp->cpu_uber.cu_isf = UBER64(isf); - pcb_stack_top = (vm_offset_t) (iss64 + 1); - /* require 16-byte alignment */ - assert((pcb_stack_top & 0xF) == 0); - /* Interrupt stack is pcb */ - current_ktss64()->rsp0 = UBER64(pcb_stack_top); - - /* - * Top of temporary sysenter stack points to pcb stack. - * Although this is not normally used by 64-bit users, - * it needs to be set in case a sysenter is attempted. - */ - *current_sstk64() = UBER64(pcb_stack_top); - - cdp->cpu_task_map = new->map->pmap->pm_task_map; - - /* - * Enable the 64-bit user code segment, USER64_CS. - * Disable the 32-bit user code segment, USER_CS. - */ - ldt_desc_p(USER64_CS)->access |= ACC_PL_U; - ldt_desc_p(USER_CS)->access &= ~ACC_PL_U; - - } else { - x86_saved_state_compat32_t *iss32compat; - vm_offset_t isf; - - assert(is_saved_state32(pcb->iss)); - iss32compat = (x86_saved_state_compat32_t *) pcb->iss; - - pcb_stack_top = (int) (iss32compat + 1); - /* require 16-byte alignment */ - assert((pcb_stack_top & 0xF) == 0); - - /* - * Set pointer to PCB's interrupt stack frame in cpu data. - * Used by debug trap handler. - */ - isf = (vm_offset_t) &iss32compat->isf64; - cdp->cpu_uber.cu_isf = UBER64(isf); - - /* Top of temporary sysenter stack points to pcb stack */ - *current_sstk64() = UBER64(pcb_stack_top); - - /* Interrupt stack is pcb */ - current_ktss64()->rsp0 = UBER64(pcb_stack_top); - - cdp->cpu_task_map = TASK_MAP_32BIT; - /* Precalculate pointers to syscall argument store, for use - * in the trampolines. - */ - cdp->cpu_uber_arg_store = UBER64((vm_offset_t)get_bsduthreadarg(new)); - cdp->cpu_uber_arg_store_valid = UBER64((vm_offset_t)&pcb->arg_store_valid); - pcb->arg_store_valid = 0; - - /* - * Disable USER64_CS - * Enable USER_CS - */ - ldt_desc_p(USER64_CS)->access &= ~ACC_PL_U; - ldt_desc_p(USER_CS)->access |= ACC_PL_U; - } - - /* - * Set the thread`s cthread (a.k.a pthread) - * For 32-bit user this involves setting the USER_CTHREAD - * descriptor in the LDT to point to the cthread data. - * The involves copying in the pre-initialized descriptor. - */ - ldtp = (struct real_descriptor *)current_ldt(); - ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc; - if (pcb->uldt_selector != 0) - ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc; - - /* - * For 64-bit, we additionally set the 64-bit User GS base - * address. On return to 64-bit user, the GS.Base MSR will be written. - */ - cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self; - - /* - * Set the thread`s LDT or LDT entry. - */ - if (new->task == TASK_NULL || new->task->i386_ldt == 0) { - /* - * Use system LDT. - */ - ml_cpu_set_ldt(KERNEL_LDT); - } else { - /* - * Task has its own LDT. - */ - user_ldt_set(new); - } -#endif - /* * Bump the scheduler generation count in the commpage. * This can be read by user code to detect its preemption. */ commpage_sched_gen_inc(); } -void + +kern_return_t thread_set_wq_state32(thread_t thread, thread_state_t tstate) { x86_thread_state32_t *state; @@ -470,10 +278,12 @@ thread_set_wq_state32(thread_t thread, thread_state_t tstate) thread_unlock(thread); splx(s); } + + return KERN_SUCCESS; } -void +kern_return_t thread_set_wq_state64(thread_t thread, thread_state_t tstate) { x86_thread_state64_t *state; @@ -481,11 +291,17 @@ thread_set_wq_state64(thread_t thread, thread_state_t tstate) thread_t curth = current_thread(); spl_t s=0; - pal_register_cache_state(thread, DIRTY); - saved_state = USER_REGS64(thread); state = (x86_thread_state64_t *)tstate; + /* Disallow setting non-canonical PC or stack */ + if (!IS_USERADDR64_CANONICAL(state->rsp) || + !IS_USERADDR64_CANONICAL(state->rip)) { + return KERN_FAILURE; + } + + pal_register_cache_state(thread, DIRTY); + if (curth != thread) { s = splsched(); thread_lock(thread); @@ -508,6 +324,8 @@ thread_set_wq_state64(thread_t thread, thread_state_t tstate) thread_unlock(thread); splx(s); } + + return KERN_SUCCESS; } /* @@ -519,7 +337,6 @@ machine_thread_create( task_t task) { pcb_t pcb = THREAD_TO_PCB(thread); - x86_saved_state_t *iss; #if NCOPY_WINDOWS > 0 inval_copy_windows(thread); @@ -531,90 +348,45 @@ machine_thread_create( /* * Allocate save frame only if required. */ - if (pcb->sf == NULL) { + if (pcb->iss == NULL) { assert((get_preemption_level() == 0)); - pcb->sf = zalloc(iss_zone); - if (pcb->sf == NULL) + pcb->iss = (x86_saved_state_t *) zalloc(iss_zone); + if (pcb->iss == NULL) panic("iss_zone"); } - if (task_has_64BitAddr(task)) { - x86_sframe64_t *sf64; + /* + * Assure that the synthesized 32-bit state including + * the 64-bit interrupt state can be acommodated in the + * 64-bit state we allocate for both 32-bit and 64-bit threads. + */ + assert(sizeof(pcb->iss->ss_32) + sizeof(pcb->iss->ss_64.isf) <= + sizeof(pcb->iss->ss_64)); - sf64 = (x86_sframe64_t *) pcb->sf; + bzero((char *)pcb->iss, sizeof(x86_saved_state_t)); - bzero((char *)sf64, sizeof(x86_sframe64_t)); + if (task_has_64BitAddr(task)) { + pcb->iss->flavor = x86_SAVED_STATE64; - iss = (x86_saved_state_t *) &sf64->ssf; - iss->flavor = x86_SAVED_STATE64; - /* - * Guarantee that the bootstrapped thread will be in user - * mode. - */ - iss->ss_64.isf.rflags = EFL_USER_SET; - iss->ss_64.isf.cs = USER64_CS; - iss->ss_64.isf.ss = USER_DS; - iss->ss_64.fs = USER_DS; - iss->ss_64.gs = USER_DS; + pcb->iss->ss_64.isf.cs = USER64_CS; + pcb->iss->ss_64.isf.ss = USER_DS; + pcb->iss->ss_64.fs = USER_DS; + pcb->iss->ss_64.gs = USER_DS; + pcb->iss->ss_64.isf.rflags = EFL_USER_SET; } else { - if (cpu_mode_is64bit()) { - x86_sframe_compat32_t *sfc32; - - sfc32 = (x86_sframe_compat32_t *)pcb->sf; - - bzero((char *)sfc32, sizeof(x86_sframe_compat32_t)); - - iss = (x86_saved_state_t *) &sfc32->ssf.iss32; - iss->flavor = x86_SAVED_STATE32; -#if defined(__i386__) -#if DEBUG - { - sfc32->pad_for_16byte_alignment[0] = 0x64326432; - sfc32->pad_for_16byte_alignment[1] = 0x64326432; - } -#endif /* DEBUG */ - } else { - x86_sframe32_t *sf32; - struct real_descriptor *ldtp; - pmap_paddr_t paddr; - - sf32 = (x86_sframe32_t *) pcb->sf; - - bzero((char *)sf32, sizeof(x86_sframe32_t)); - - iss = (x86_saved_state_t *) &sf32->ssf; - iss->flavor = x86_SAVED_STATE32; - - pcb->iss_pte0 = pte_kernel_rw(kvtophys((vm_offset_t)iss)); - if (0 == (paddr = pa_to_pte(kvtophys((vm_offset_t)iss + PAGE_SIZE)))) - pcb->iss_pte1 = INTEL_PTE_INVALID; - else - pcb->iss_pte1 = pte_kernel_rw(paddr); - - ldtp = (struct real_descriptor *) - pmap_index_to_virt(HIGH_FIXED_LDT_BEGIN); - pcb->cthread_desc = ldtp[sel_idx(USER_DS)]; - pcb->uldt_desc = ldtp[sel_idx(USER_DS)]; -#endif /* __i386__ */ - } - /* - * Guarantee that the bootstrapped thread will be in user - * mode. - */ - iss->ss_32.cs = USER_CS; - iss->ss_32.ss = USER_DS; - iss->ss_32.ds = USER_DS; - iss->ss_32.es = USER_DS; - iss->ss_32.fs = USER_DS; - iss->ss_32.gs = USER_DS; - iss->ss_32.efl = EFL_USER_SET; - + pcb->iss->flavor = x86_SAVED_STATE32; + + pcb->iss->ss_32.cs = USER_CS; + pcb->iss->ss_32.ss = USER_DS; + pcb->iss->ss_32.ds = USER_DS; + pcb->iss->ss_32.es = USER_DS; + pcb->iss->ss_32.fs = USER_DS; + pcb->iss->ss_32.gs = USER_DS; + pcb->iss->ss_32.efl = EFL_USER_SET; } - pcb->iss = iss; simple_lock_init(&pcb->lock, 0); - pcb->arg_store_valid = 0; pcb->cthread_self = 0; pcb->uldt_selector = 0; @@ -641,9 +413,9 @@ machine_thread_destroy( if (pcb->ifps != 0) fpu_free(pcb->ifps); - if (pcb->sf != 0) { - zfree(iss_zone, pcb->sf); - pcb->sf = 0; + if (pcb->iss != 0) { + zfree(iss_zone, pcb->iss); + pcb->iss = 0; } if (pcb->ids) { zfree(ids_zone, pcb->ids); diff --git a/osfmk/i386/phys.c b/osfmk/i386/phys.c index 4db5983c1..06ad22f10 100644 --- a/osfmk/i386/phys.c +++ b/osfmk/i386/phys.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -156,26 +156,11 @@ pmap_copy_part_lpage( __unused vm_offset_t dst_offset, __unused vm_size_t len) { -#ifdef __i386__ - mapwindow_t *map; -#endif assert(pdst != vm_page_fictitious_addr); assert(pdst != vm_page_guard_addr); assert((dst_offset + len) <= PAGE_SIZE); -#ifdef __i386__ - mp_disable_preemption(); - - map = pmap_get_mapwindow(INTEL_PTE_VALID | INTEL_PTE_RW | (i386_ptob(pdst) & PG_FRAME) | - INTEL_PTE_REF | INTEL_PTE_MOD); - - memcpy((void *) (map->prv_CADDR + (dst_offset & INTEL_OFFMASK)), (void *) src, len); - - pmap_put_mapwindow(map); - - mp_enable_preemption(); -#endif } /* @@ -189,26 +174,11 @@ pmap_copy_part_rpage( __unused vm_offset_t dst, __unused vm_size_t len) { -#ifdef __i386__ - mapwindow_t *map; -#endif assert(psrc != vm_page_fictitious_addr); assert(psrc != vm_page_guard_addr); assert((src_offset + len) <= PAGE_SIZE); -#ifdef __i386__ - mp_disable_preemption(); - - map = pmap_get_mapwindow(INTEL_PTE_VALID | INTEL_PTE_RW | (i386_ptob(psrc) & PG_FRAME) | - INTEL_PTE_REF); - - memcpy((void *) dst, (void *) (map->prv_CADDR + (src_offset & INTEL_OFFMASK)), len); - - pmap_put_mapwindow(map); - - mp_enable_preemption(); -#endif } /* @@ -231,9 +201,14 @@ kvtophys( extern pt_entry_t *debugger_ptep; extern vm_map_offset_t debugger_window_kva; +extern int _bcopy(const void *, void *, vm_size_t); +extern int _bcopy2(const void *, void *); +extern int _bcopy4(const void *, void *); +extern int _bcopy8(const void *, void *); -__private_extern__ void ml_copy_phys(addr64_t src64, addr64_t dst64, vm_size_t bytes) { +__private_extern__ int ml_copy_phys(addr64_t src64, addr64_t dst64, vm_size_t bytes) { void *src, *dst; + int err = 0; mp_disable_preemption(); #if NCOPY_WINDOWS > 0 @@ -284,27 +259,33 @@ __private_extern__ void ml_copy_phys(addr64_t src64, addr64_t dst64, vm_size_t b panic("ml_copy_phys spans pages, src: 0x%llx, dst: 0x%llx", src64, dst64); } + /* + * For device register access from the debugger, + * 2-byte/16-bit, 4-byte/32-bit and 8-byte/64-bit copies are handled + * by assembly routines ensuring the required access widths. + * 1-byte and other copies are handled by the regular _bcopy. + */ switch (bytes) { - case 1: - *((uint8_t *) dst) = *((volatile uint8_t *) src); - break; case 2: - *((uint16_t *) dst) = *((volatile uint16_t *) src); + err = _bcopy2(src, dst); break; case 4: - *((uint32_t *) dst) = *((volatile uint32_t *) src); + err = _bcopy4(src, dst); break; - /* Should perform two 32-bit reads */ case 8: - *((uint64_t *) dst) = *((volatile uint64_t *) src); + err = _bcopy8(src, dst); break; + case 1: default: - bcopy(src, dst, bytes); + err = _bcopy(src, dst, bytes); break; } + #if NCOPY_WINDOWS > 0 pmap_put_mapwindow(src_map); pmap_put_mapwindow(dst_map); #endif mp_enable_preemption(); + + return err; } diff --git a/osfmk/i386/pmCPU.c b/osfmk/i386/pmCPU.c index d9c7ff9fb..a4b8c62e6 100644 --- a/osfmk/i386/pmCPU.c +++ b/osfmk/i386/pmCPU.c @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include #include @@ -50,6 +50,7 @@ #include #include #include +#include extern int disableConsoleOutput; @@ -88,6 +89,9 @@ static inline void machine_classify_interval(uint64_t interval, uint64_t *bins, } } +uint64_t idle_pending_timers_processed; +uint32_t idle_entry_timer_processing_hdeadline_threshold = 5000000; + /* * Called when the CPU is idle. It calls into the power management kext * to determine the best way to idle the CPU. @@ -96,21 +100,46 @@ void machine_idle(void) { cpu_data_t *my_cpu = current_cpu_datap(); + __unused uint32_t cnum = my_cpu->cpu_number; uint64_t ctime, rtime, itime; - - if (my_cpu == NULL) - goto out; +#if CST_DEMOTION_DEBUG + processor_t cproc = my_cpu->cpu_processor; + uint64_t cwakeups = PROCESSOR_DATA(cproc, wakeups_issued_total); +#endif /* CST_DEMOTION_DEBUG */ + uint64_t esdeadline, ehdeadline; + boolean_t do_process_pending_timers = FALSE; ctime = mach_absolute_time(); - + esdeadline = my_cpu->rtclock_timer.queue.earliest_soft_deadline; + ehdeadline = my_cpu->rtclock_timer.deadline; +/* Determine if pending timers exist */ + if ((ctime >= esdeadline) && (ctime < ehdeadline) && + ((ehdeadline - ctime) < idle_entry_timer_processing_hdeadline_threshold)) { + idle_pending_timers_processed++; + do_process_pending_timers = TRUE; + goto machine_idle_exit; + } else { + TCOAL_DEBUG(0xCCCC0000, ctime, my_cpu->rtclock_timer.queue.earliest_soft_deadline, my_cpu->rtclock_timer.deadline, idle_pending_timers_processed, 0); + } + my_cpu->lcpu.state = LCPU_IDLE; DBGLOG(cpu_handle, cpu_number(), MP_IDLE); - MARK_CPU_IDLE(cpu_number()); + MARK_CPU_IDLE(cnum); rtime = ctime - my_cpu->cpu_ixtime; my_cpu->cpu_rtime_total += rtime; machine_classify_interval(rtime, &my_cpu->cpu_rtimes[0], &cpu_rtime_bins[0], CPU_RTIME_BINS); +#if CST_DEMOTION_DEBUG + uint32_t cl = 0, ch = 0; + uint64_t c3res, c6res, c7res; + rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch); + c3res = ((uint64_t)ch << 32) | cl; + rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch); + c6res = ((uint64_t)ch << 32) | cl; + rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch); + c7res = ((uint64_t)ch << 32) | cl; +#endif if (pmInitDone) { /* @@ -121,7 +150,6 @@ machine_idle(void) */ if (earlyMaxBusDelay != DELAY_UNSET) ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay & 0xFFFFFFFF)); - if (earlyMaxIntDelay != DELAY_UNSET) ml_set_maxintdelay(earlyMaxIntDelay); } @@ -139,7 +167,6 @@ machine_idle(void) * stopping during a GV3 transition). */ pal_hlt(); - /* Once woken, re-disable interrupts. */ pal_cli(); } @@ -147,25 +174,53 @@ machine_idle(void) /* * Mark the CPU as running again. */ - MARK_CPU_ACTIVE(cpu_number()); - DBGLOG(cpu_handle, cpu_number(), MP_UNIDLE); - + MARK_CPU_ACTIVE(cnum); + DBGLOG(cpu_handle, cnum, MP_UNIDLE); + my_cpu->lcpu.state = LCPU_RUN; uint64_t ixtime = my_cpu->cpu_ixtime = mach_absolute_time(); - my_cpu->cpu_idle_exits++; - itime = ixtime - ctime; + my_cpu->cpu_idle_exits++; + my_cpu->cpu_itime_total += itime; + machine_classify_interval(itime, &my_cpu->cpu_itimes[0], &cpu_itime_bins[0], CPU_ITIME_BINS); +#if CST_DEMOTION_DEBUG + cl = ch = 0; + rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch); + c3res = (((uint64_t)ch << 32) | cl) - c3res; + rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch); + c6res = (((uint64_t)ch << 32) | cl) - c6res; + rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch); + c7res = (((uint64_t)ch << 32) | cl) - c7res; + + uint64_t ndelta = itime - tmrCvt(c3res + c6res + c7res, tscFCvtt2n); + KERNEL_DEBUG_CONSTANT(0xcead0000, ndelta, itime, c7res, c6res, c3res); + if ((itime > 1000000) && (ndelta > 250000)) + KERNEL_DEBUG_CONSTANT(0xceae0000, ndelta, itime, c7res, c6res, c3res); +#endif - my_cpu->lcpu.state = LCPU_RUN; - - machine_classify_interval(itime, &my_cpu->cpu_itimes[0], &cpu_itime_bins[0], CPU_ITIME_BINS); - my_cpu->cpu_itime_total += itime; - - + machine_idle_exit: /* * Re-enable interrupts. */ -out: + pal_sti(); + + if (do_process_pending_timers) { + TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_START, ctime, esdeadline, ehdeadline, idle_pending_timers_processed, 0); + + /* Adjust to reflect that this isn't truly a package idle exit */ + __sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1); + lapic_timer_swi(); /* Trigger software timer interrupt */ + __sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1); + + TCOAL_DEBUG(0xBBBB0000 | DBG_FUNC_END, ctime, esdeadline, idle_pending_timers_processed, 0, 0); + } +#if CST_DEMOTION_DEBUG + uint64_t nwakeups = PROCESSOR_DATA(cproc, wakeups_issued_total); + + if ((nwakeups == cwakeups) && (topoParms.nLThreadsPerPackage == my_cpu->lcpu.package->num_idle)) { + KERNEL_DEBUG_CONSTANT(0xceaa0000, cwakeups, 0, 0, 0, 0); + } +#endif } /* @@ -246,7 +301,6 @@ pmInitComplete(void) (*pmDispatch->pmCPUStateInit)(); earlyTopology = FALSE; } - pmInitDone = 1; } @@ -339,6 +393,7 @@ pmCPUGetDeadline(cpu_data_t *cpu) * Called to determine if the supplied deadline or the power management * deadline is sooner. Returns which ever one is first. */ + uint64_t pmCPUSetDeadline(cpu_data_t *cpu, uint64_t deadline) { @@ -679,8 +734,22 @@ machine_choose_processor(processor_set_t pset, static int pmThreadGetUrgency(uint64_t *rt_period, uint64_t *rt_deadline) { + int urgency; + uint64_t arg1, arg2; + + urgency = thread_get_urgency(current_processor()->next_thread, &arg1, &arg2); + + if (urgency == THREAD_URGENCY_REAL_TIME) { + if (rt_period != NULL) + *rt_period = arg1; + + if (rt_deadline != NULL) + *rt_deadline = arg2; + } + + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_GET_URGENCY), urgency, arg1, arg2, 0, 0); - return(thread_get_urgency(rt_period, rt_deadline)); + return(urgency); } #if DEBUG @@ -693,7 +762,8 @@ uint64_t urgency_notification_assert_abstime_threshold, urgency_notification_max void thread_tell_urgency(int urgency, uint64_t rt_period, - uint64_t rt_deadline) + uint64_t rt_deadline, + thread_t nthread) { uint64_t urgency_notification_time_start, delta; boolean_t urgency_assert = (urgency_notification_assert_abstime_threshold != 0); @@ -706,11 +776,12 @@ thread_tell_urgency(int urgency, || pmDispatch->pmThreadTellUrgency == NULL) return; - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, (rt_deadline >> 32), rt_deadline, 0); + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, 0, 0); if (__improbable((urgency_assert == TRUE))) urgency_notification_time_start = mach_absolute_time(); + current_cpu_datap()->cpu_nthread = nthread; pmDispatch->pmThreadTellUrgency(urgency, rt_period, rt_deadline); if (__improbable((urgency_assert == TRUE))) { @@ -728,7 +799,7 @@ thread_tell_urgency(int urgency, } } - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, (rt_deadline >> 32), rt_deadline, 0); + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0); } void @@ -775,12 +846,12 @@ pmReSyncDeadlines(int cpu) static boolean_t registered = FALSE; if (!registered) { - PM_interrupt_register(&etimer_resync_deadlines); + PM_interrupt_register(&timer_resync_deadlines); registered = TRUE; } if ((uint32_t)cpu == current_cpu_datap()->lcpu.cpu_num) - etimer_resync_deadlines(); + timer_resync_deadlines(); else cpu_PM_interrupt(cpu); } @@ -812,7 +883,7 @@ pmTimerQueueMigrate(int target_cpu) { /* Call the etimer code to do this. */ return (target_cpu != cpu_number()) - ? etimer_queue_migrate(target_cpu) + ? timer_queue_migrate_cpu(target_cpu) : 0; } @@ -825,57 +896,57 @@ pmTimerQueueMigrate(int target_cpu) */ void pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs, - pmCallBacks_t *callbacks) -{ - if (callbacks != NULL && version == PM_DISPATCH_VERSION) { - callbacks->setRTCPop = setPop; - callbacks->resyncDeadlines = pmReSyncDeadlines; - callbacks->initComplete = pmInitComplete; - callbacks->GetLCPU = pmGetLogicalCPU; - callbacks->GetCore = pmGetCore; - callbacks->GetDie = pmGetDie; - callbacks->GetPackage = pmGetPackage; - callbacks->GetMyLCPU = pmGetMyLogicalCPU; - callbacks->GetMyCore = pmGetMyCore; - callbacks->GetMyDie = pmGetMyDie; - callbacks->GetMyPackage = pmGetMyPackage; - callbacks->GetPkgRoot = pmGetPkgRoot; - callbacks->LockCPUTopology = pmLockCPUTopology; - callbacks->GetHibernate = pmCPUGetHibernate; - callbacks->LCPUtoProcessor = pmLCPUtoProcessor; - callbacks->ThreadBind = thread_bind; - callbacks->GetSavedRunCount = pmGetSavedRunCount; - callbacks->GetNanotimeInfo = pmGetNanotimeInfo; - callbacks->ThreadGetUrgency = pmThreadGetUrgency; - callbacks->RTCClockAdjust = rtc_clock_adjust; - callbacks->timerQueueMigrate = pmTimerQueueMigrate; - callbacks->topoParms = &topoParms; - callbacks->pmSendIPI = pmSendIPI; - callbacks->InterruptPending = lapic_is_interrupt_pending; - callbacks->IsInterrupting = lapic_is_interrupting; - callbacks->InterruptStats = lapic_interrupt_counts; - callbacks->DisableApicTimer = lapic_disable_timer; - } else { - panic("Version mis-match between Kernel and CPU PM"); - } + pmCallBacks_t *callbacks) +{ + if (callbacks != NULL && version == PM_DISPATCH_VERSION) { + callbacks->setRTCPop = setPop; + callbacks->resyncDeadlines = pmReSyncDeadlines; + callbacks->initComplete = pmInitComplete; + callbacks->GetLCPU = pmGetLogicalCPU; + callbacks->GetCore = pmGetCore; + callbacks->GetDie = pmGetDie; + callbacks->GetPackage = pmGetPackage; + callbacks->GetMyLCPU = pmGetMyLogicalCPU; + callbacks->GetMyCore = pmGetMyCore; + callbacks->GetMyDie = pmGetMyDie; + callbacks->GetMyPackage = pmGetMyPackage; + callbacks->GetPkgRoot = pmGetPkgRoot; + callbacks->LockCPUTopology = pmLockCPUTopology; + callbacks->GetHibernate = pmCPUGetHibernate; + callbacks->LCPUtoProcessor = pmLCPUtoProcessor; + callbacks->ThreadBind = thread_bind; + callbacks->GetSavedRunCount = pmGetSavedRunCount; + callbacks->GetNanotimeInfo = pmGetNanotimeInfo; + callbacks->ThreadGetUrgency = pmThreadGetUrgency; + callbacks->RTCClockAdjust = rtc_clock_adjust; + callbacks->timerQueueMigrate = pmTimerQueueMigrate; + callbacks->topoParms = &topoParms; + callbacks->pmSendIPI = pmSendIPI; + callbacks->InterruptPending = lapic_is_interrupt_pending; + callbacks->IsInterrupting = lapic_is_interrupting; + callbacks->InterruptStats = lapic_interrupt_counts; + callbacks->DisableApicTimer = lapic_disable_timer; + } else { + panic("Version mis-match between Kernel and CPU PM"); + } - if (cpuFuncs != NULL) { - if (pmDispatch) { - panic("Attempt to re-register power management interface--AICPM present in xcpm mode? %p->%p", pmDispatch, cpuFuncs); - } + if (cpuFuncs != NULL) { + if (pmDispatch) { + panic("Attempt to re-register power management interface--AICPM present in xcpm mode? %p->%p", pmDispatch, cpuFuncs); + } - pmDispatch = cpuFuncs; + pmDispatch = cpuFuncs; - if (earlyTopology - && pmDispatch->pmCPUStateInit != NULL) { - (*pmDispatch->pmCPUStateInit)(); - earlyTopology = FALSE; - } + if (earlyTopology + && pmDispatch->pmCPUStateInit != NULL) { + (*pmDispatch->pmCPUStateInit)(); + earlyTopology = FALSE; + } - if (pmDispatch->pmIPIHandler != NULL) { - lapic_set_pm_func((i386_intr_func_t)pmDispatch->pmIPIHandler); + if (pmDispatch->pmIPIHandler != NULL) { + lapic_set_pm_func((i386_intr_func_t)pmDispatch->pmIPIHandler); + } } - } } /* @@ -889,46 +960,6 @@ pmUnRegister(pmDispatch_t *cpuFuncs) } } -/****************************************************************************** - * - * All of the following are deprecated interfaces and no longer used. - * - ******************************************************************************/ -kern_return_t -pmsControl(__unused uint32_t request, __unused user_addr_t reqaddr, - __unused uint32_t reqsize) -{ - return(KERN_SUCCESS); -} - -void -pmsInit(void) -{ -} - -void -pmsStart(void) -{ -} - -void -pmsPark(void) -{ -} - -void -pmsRun(__unused uint32_t nstep) -{ -} - -kern_return_t -pmsBuild(__unused pmsDef *pd, __unused uint32_t pdsize, - __unused pmsSetFunc_t *functab, - __unused uint32_t platformData, __unused pmsQueryFunc_t queryFunc) -{ - return(KERN_SUCCESS); -} - void machine_track_platform_idle(boolean_t entry) { cpu_data_t *my_cpu = current_cpu_datap(); diff --git a/osfmk/i386/pmap.c b/osfmk/i386/pmap.c deleted file mode 100644 index 2e0594487..000000000 --- a/osfmk/i386/pmap.c +++ /dev/null @@ -1,2817 +0,0 @@ -/* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - */ - -/* - * File: pmap.c - * Author: Avadis Tevanian, Jr., Michael Wayne Young - * (These guys wrote the Vax version) - * - * Physical Map management code for Intel i386, i486, and i860. - * - * Manages physical address maps. - * - * In addition to hardware address maps, this - * module is called upon to provide software-use-only - * maps which may or may not be stored in the same - * form as hardware maps. These pseudo-maps are - * used to store intermediate results from copy - * operations to and from address spaces. - * - * Since the information managed by this module is - * also stored by the logical address mapping module, - * this module may throw away valid virtual-to-physical - * mappings at almost any time. However, invalidations - * of virtual-to-physical mappings must be done as - * requested. - * - * In order to cope with hardware architectures which - * make virtual-to-physical map invalidates expensive, - * this module may delay invalidate or reduced protection - * operations until such time as they are actually - * necessary. This module is given full information as - * to which processors are currently using which maps, - * and to when physical maps must be made correct. - */ - -#include -#include - -#include - -#include - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include /* prototyping */ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include - - -/* #define DEBUGINTERRUPTS 1 uncomment to ensure pmap callers have interrupts enabled */ -#ifdef DEBUGINTERRUPTS -#define pmap_intr_assert() {if (processor_avail_count > 1 && !ml_get_interrupts_enabled()) panic("pmap interrupt assert %s, %d",__FILE__, __LINE__);} -#else -#define pmap_intr_assert() -#endif - -#ifdef IWANTTODEBUG -#undef DEBUG -#define DEBUG 1 -#define POSTCODE_DELAY 1 -#include -#endif /* IWANTTODEBUG */ - -#ifdef PMAP_DEBUG -void dump_pmap(pmap_t); -void dump_4GB_pdpt(pmap_t p); -void dump_4GB_pdpt_thread(thread_t tp); -#endif - -int nx_enabled = 1; /* enable no-execute protection */ -#ifdef CONFIG_EMBEDDED -int allow_data_exec = 0; /* no exec from data, embedded is hardcore like that */ -#else -int allow_data_exec = VM_ABI_32; /* 32-bit apps may execute data by default, 64-bit apps may not */ -#endif -int allow_stack_exec = 0; /* No apps may execute from the stack by default */ - -#if CONFIG_YONAH -boolean_t cpu_64bit = FALSE; -#else -const boolean_t cpu_64bit = TRUE; -#endif -boolean_t pmap_trace = FALSE; - -uint64_t max_preemption_latency_tsc = 0; - -pv_hashed_entry_t *pv_hash_table; /* hash lists */ - -uint32_t npvhash = 0; - -/* - * pv_list entries are kept on a list that can only be accessed - * with the pmap system locked (at SPLVM, not in the cpus_active set). - * The list is refilled from the pv_hashed_list_zone if it becomes empty. - */ -pv_rooted_entry_t pv_free_list = PV_ROOTED_ENTRY_NULL; /* free list at SPLVM */ -pv_hashed_entry_t pv_hashed_free_list = PV_HASHED_ENTRY_NULL; -pv_hashed_entry_t pv_hashed_kern_free_list = PV_HASHED_ENTRY_NULL; -decl_simple_lock_data(,pv_hashed_free_list_lock) -decl_simple_lock_data(,pv_hashed_kern_free_list_lock) -decl_simple_lock_data(,pv_hash_table_lock) - -zone_t pv_hashed_list_zone; /* zone of pv_hashed_entry structures */ - -static zone_t pdpt_zone; - -/* - * First and last physical addresses that we maintain any information - * for. Initialized to zero so that pmap operations done before - * pmap_init won't touch any non-existent structures. - */ -boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */ - -static struct vm_object kptobj_object_store; -static vm_object_t kptobj; - -/* - * Index into pv_head table, its lock bits, and the modify/reference and managed bits - */ - -/* - * Array of physical page attribites for managed pages. - * One byte per physical page. - */ -char *pmap_phys_attributes; -unsigned int last_managed_page = 0; - -uint64_t pde_mapped_size; - -const boolean_t pmap_disable_kheap_nx = TRUE; -const boolean_t pmap_disable_kstack_nx = TRUE; - - -#if USLOCK_DEBUG -extern int max_lock_loops; -#define LOOP_VAR \ - unsigned int loop_count; \ - loop_count = disable_serial_output ? max_lock_loops \ - : max_lock_loops*100 -#define LOOP_CHECK(msg, pmap) \ - if (--loop_count == 0) { \ - mp_disable_preemption(); \ - kprintf("%s: cpu %d pmap %x\n", \ - msg, cpu_number(), pmap); \ - Debugger("deadlock detection"); \ - mp_enable_preemption(); \ - loop_count = max_lock_loops; \ - } -#else /* USLOCK_DEBUG */ -#define LOOP_VAR -#define LOOP_CHECK(msg, pmap) -#endif /* USLOCK_DEBUG */ - -unsigned pmap_memory_region_count; -unsigned pmap_memory_region_current; - -pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE]; - -/* - * Other useful macros. - */ -#define current_pmap() (vm_map_pmap(current_thread()->map)) - -struct pmap kernel_pmap_store; -pmap_t kernel_pmap; - -pd_entry_t high_shared_pde; -pd_entry_t commpage64_pde; - -struct zone *pmap_zone; /* zone of pmap structures */ - -int pmap_debug = 0; /* flag for debugging prints */ - -unsigned int inuse_ptepages_count = 0; -long long alloc_ptepages_count __attribute__((aligned(8))) = 0LL; /* aligned for atomic access */ -unsigned int bootstrap_wired_pages = 0; -int pt_fake_zone_index = -1; - -extern long NMIPI_acks; - -addr64_t kernel64_cr3; -boolean_t no_shared_cr3 = FALSE; /* -no_shared_cr3 boot arg */ - -boolean_t kernel_text_ps_4K = TRUE; -boolean_t wpkernel = TRUE; - -extern char end; -static int nkpt; - -pt_entry_t *DMAP1, *DMAP2; -caddr_t DADDR1; -caddr_t DADDR2; - -/* - * for legacy, returns the address of the pde entry. - * for 64 bit, causes the pdpt page containing the pde entry to be mapped, - * then returns the mapped address of the pde entry in that page - */ -pd_entry_t * -pmap_pde(pmap_t m, vm_map_offset_t v) -{ - pd_entry_t *pde; - if (!cpu_64bit || (m == kernel_pmap)) { - pde = (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT])); - } else { - assert(m); - assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); - pde = pmap64_pde(m, v); - } - return pde; -} - -/* - * the single pml4 page per pmap is allocated at pmap create time and exists - * for the duration of the pmap. we allocate this page in kernel vm (to save us one - * level of page table dynamic mapping. - * this returns the address of the requested pml4 entry in the top level page. - */ -static inline -pml4_entry_t * -pmap64_pml4(pmap_t pmap, vm_map_offset_t vaddr) -{ - return ((pml4_entry_t *)pmap->pm_hold + ((vm_offset_t)((vaddr>>PML4SHIFT)&(NPML4PG-1)))); -} - -/* - * maps in the pml4 page, if any, containing the pdpt entry requested - * and returns the address of the pdpt entry in that mapped page - */ -pdpt_entry_t * -pmap64_pdpt(pmap_t pmap, vm_map_offset_t vaddr) -{ - pml4_entry_t newpf; - pml4_entry_t *pml4; - int i; - - assert(pmap); - assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); - if ((vaddr > 0x00007FFFFFFFFFFFULL) && (vaddr < 0xFFFF800000000000ULL)) { - return(0); - } - - pml4 = pmap64_pml4(pmap, vaddr); - - if (pml4 && ((*pml4 & INTEL_PTE_VALID))) { - - newpf = *pml4 & PG_FRAME; - - - for (i=PMAP_PDPT_FIRST_WINDOW; i < PMAP_PDPT_FIRST_WINDOW+PMAP_PDPT_NWINDOWS; i++) { - if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) { - return((pdpt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) + - ((vm_offset_t)((vaddr>>PDPTSHIFT)&(NPDPTPG-1)))); - } - } - - current_cpu_datap()->cpu_pmap->pdpt_window_index++; - if (current_cpu_datap()->cpu_pmap->pdpt_window_index > (PMAP_PDPT_FIRST_WINDOW+PMAP_PDPT_NWINDOWS-1)) - current_cpu_datap()->cpu_pmap->pdpt_window_index = PMAP_PDPT_FIRST_WINDOW; - pmap_store_pte( - (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CMAP), - newpf | INTEL_PTE_RW | INTEL_PTE_VALID); - invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CADDR)); - return ((pdpt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CADDR) + - ((vm_offset_t)((vaddr>>PDPTSHIFT)&(NPDPTPG-1)))); - } - - return (NULL); -} - -/* - * maps in the pdpt page, if any, containing the pde entry requested - * and returns the address of the pde entry in that mapped page - */ -pd_entry_t * -pmap64_pde(pmap_t pmap, vm_map_offset_t vaddr) -{ - pdpt_entry_t newpf; - pdpt_entry_t *pdpt; - int i; - - assert(pmap); - assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); - if ((vaddr > 0x00007FFFFFFFFFFFULL) && (vaddr < 0xFFFF800000000000ULL)) { - return(0); - } - - /* if (vaddr & (1ULL << 63)) panic("neg addr");*/ - pdpt = pmap64_pdpt(pmap, vaddr); - - if (pdpt && ((*pdpt & INTEL_PTE_VALID))) { - - newpf = *pdpt & PG_FRAME; - - for (i=PMAP_PDE_FIRST_WINDOW; i < PMAP_PDE_FIRST_WINDOW+PMAP_PDE_NWINDOWS; i++) { - if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) { - return((pd_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) + - ((vm_offset_t)((vaddr>>PDSHIFT)&(NPDPG-1)))); - } - } - - current_cpu_datap()->cpu_pmap->pde_window_index++; - if (current_cpu_datap()->cpu_pmap->pde_window_index > (PMAP_PDE_FIRST_WINDOW+PMAP_PDE_NWINDOWS-1)) - current_cpu_datap()->cpu_pmap->pde_window_index = PMAP_PDE_FIRST_WINDOW; - pmap_store_pte( - (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CMAP), - newpf | INTEL_PTE_RW | INTEL_PTE_VALID); - invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CADDR)); - return ((pd_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CADDR) + - ((vm_offset_t)((vaddr>>PDSHIFT)&(NPDPG-1)))); - } - - return (NULL); -} - -/* - * Because the page tables (top 3 levels) are mapped into per cpu windows, - * callers must either disable interrupts or disable preemption before calling - * one of the pte mapping routines (e.g. pmap_pte()) as the returned vaddr - * is in one of those mapped windows and that cannot be allowed to change until - * the caller is done using the returned pte pointer. When done, the caller - * restores interrupts or preemption to its previous state after which point the - * vaddr for the returned pte can no longer be used - */ - - -/* - * return address of mapped pte for vaddr va in pmap pmap. - * must be called with pre-emption or interrupts disabled - * if targeted pmap is not the kernel pmap - * since we may be passing back a virtual address that is - * associated with this cpu... pre-emption or interrupts - * must remain disabled until the caller is done using - * the pointer that was passed back . - * - * maps the pde page, if any, containing the pte in and returns - * the address of the pte in that mapped page - */ -pt_entry_t * -pmap_pte(pmap_t pmap, vm_map_offset_t vaddr) -{ - pd_entry_t *pde; - pd_entry_t newpf; - int i; - - assert(pmap); - pde = pmap_pde(pmap,vaddr); - - if (pde && ((*pde & INTEL_PTE_VALID))) { - if (*pde & INTEL_PTE_PS) - return pde; - if (pmap == kernel_pmap) - return (vtopte(vaddr)); /* compat kernel still has pte's mapped */ -#if TESTING - if (ml_get_interrupts_enabled() && get_preemption_level() == 0) - panic("pmap_pte: unsafe call"); -#endif - assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); - - newpf = *pde & PG_FRAME; - - for (i=PMAP_PTE_FIRST_WINDOW; i < PMAP_PTE_FIRST_WINDOW+PMAP_PTE_NWINDOWS; i++) { - if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) { - return((pt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) + - ((vm_offset_t)i386_btop(vaddr) & (NPTEPG-1))); - } - } - - current_cpu_datap()->cpu_pmap->pte_window_index++; - if (current_cpu_datap()->cpu_pmap->pte_window_index > (PMAP_PTE_FIRST_WINDOW+PMAP_PTE_NWINDOWS-1)) - current_cpu_datap()->cpu_pmap->pte_window_index = PMAP_PTE_FIRST_WINDOW; - pmap_store_pte( - (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CMAP), - newpf | INTEL_PTE_RW | INTEL_PTE_VALID); - invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CADDR)); - return ((pt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CADDR) + - ((vm_offset_t)i386_btop(vaddr) & (NPTEPG-1))); - } - - return(NULL); -} - - -/* - * Map memory at initialization. The physical addresses being - * mapped are not managed and are never unmapped. - * - * For now, VM is already on, we only need to map the - * specified memory. - */ -vm_offset_t -pmap_map( - vm_offset_t virt, - vm_map_offset_t start_addr, - vm_map_offset_t end_addr, - vm_prot_t prot, - unsigned int flags) -{ - int ps; - - ps = PAGE_SIZE; - while (start_addr < end_addr) { - pmap_enter(kernel_pmap, (vm_map_offset_t)virt, - (ppnum_t) i386_btop(start_addr), prot, VM_PROT_NONE, flags, FALSE); - virt += ps; - start_addr += ps; - } - return(virt); -} - -extern pmap_paddr_t first_avail; -extern vm_offset_t virtual_avail, virtual_end; -extern pmap_paddr_t avail_start, avail_end; -extern vm_offset_t sHIB; -extern vm_offset_t eHIB; -extern vm_offset_t stext; -extern vm_offset_t etext; -extern vm_offset_t sdata; - -extern void *KPTphys; - -void -pmap_cpu_init(void) -{ - /* - * Here early in the life of a processor (from cpu_mode_init()). - */ - - /* - * Initialize the per-cpu, TLB-related fields. - */ - current_cpu_datap()->cpu_active_cr3 = kernel_pmap->pm_cr3; - current_cpu_datap()->cpu_tlb_invalid = FALSE; -} - -vm_offset_t -pmap_high_shared_remap(enum high_fixed_addresses e, vm_offset_t va, int sz) -{ - vm_offset_t ve = pmap_index_to_virt(e); - pt_entry_t *ptep; - pmap_paddr_t pa; - int i; - spl_t s; - - assert(0 == (va & PAGE_MASK)); /* expecting page aligned */ - s = splhigh(); - ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ve); - - for (i=0; i< sz; i++) { - pa = (pmap_paddr_t) kvtophys(va); - pmap_store_pte(ptep, (pa & PG_FRAME) - | INTEL_PTE_VALID - | INTEL_PTE_GLOBAL - | INTEL_PTE_RW - | INTEL_PTE_REF - | INTEL_PTE_MOD); - va+= PAGE_SIZE; - ptep++; - } - splx(s); - return ve; -} - -vm_offset_t -pmap_cpu_high_shared_remap(int cpu, enum high_cpu_types e, vm_offset_t va, int sz) -{ - enum high_fixed_addresses a = e + HIGH_CPU_END * cpu; - return pmap_high_shared_remap(HIGH_FIXED_CPUS_BEGIN + a, va, sz); -} - -void pmap_init_high_shared(void); - -extern vm_offset_t gdtptr, idtptr; - -extern uint32_t low_intstack; - -extern struct fake_descriptor ldt_desc_pattern; -extern struct fake_descriptor tss_desc_pattern; - -extern char hi_remap_text, hi_remap_etext; -extern char t_zero_div; - -pt_entry_t *pte_unique_base; - -void -pmap_init_high_shared(void) -{ - - vm_offset_t haddr; - spl_t s; - - cpu_desc_index_t * cdi = &cpu_data_master.cpu_desc_index; - - kprintf("HIGH_MEM_BASE 0x%x fixed per-cpu begin 0x%x\n", - HIGH_MEM_BASE,pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN)); - s = splhigh(); - pte_unique_base = pmap_pte(kernel_pmap, (vm_map_offset_t)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN)); - splx(s); - - if (i386_btop(&hi_remap_etext - &hi_remap_text + 1) > - HIGH_FIXED_TRAMPS_END - HIGH_FIXED_TRAMPS + 1) - panic("tramps too large"); - haddr = pmap_high_shared_remap(HIGH_FIXED_TRAMPS, - (vm_offset_t) &hi_remap_text, 3); - kprintf("tramp: 0x%x, ",haddr); - /* map gdt up high and update ptr for reload */ - haddr = pmap_high_shared_remap(HIGH_FIXED_GDT, - (vm_offset_t) master_gdt, 1); - cdi->cdi_gdt.ptr = (void *)haddr; - kprintf("GDT: 0x%x, ",haddr); - /* map ldt up high */ - haddr = pmap_high_shared_remap(HIGH_FIXED_LDT_BEGIN, - (vm_offset_t) master_ldt, - HIGH_FIXED_LDT_END - HIGH_FIXED_LDT_BEGIN + 1); - cdi->cdi_ldt = (struct fake_descriptor *)haddr; - kprintf("LDT: 0x%x, ",haddr); - /* put new ldt addr into gdt */ - struct fake_descriptor temp_fake_desc; - temp_fake_desc = ldt_desc_pattern; - temp_fake_desc.offset = (vm_offset_t) haddr; - fix_desc(&temp_fake_desc, 1); - - *(struct fake_descriptor *) &master_gdt[sel_idx(KERNEL_LDT)] = temp_fake_desc; - *(struct fake_descriptor *) &master_gdt[sel_idx(USER_LDT)] = temp_fake_desc; - - /* map idt up high */ - haddr = pmap_high_shared_remap(HIGH_FIXED_IDT, - (vm_offset_t) master_idt, 1); - cdi->cdi_idt.ptr = (void *)haddr; - kprintf("IDT: 0x%x, ", haddr); - /* remap ktss up high and put new high addr into gdt */ - haddr = pmap_high_shared_remap(HIGH_FIXED_KTSS, - (vm_offset_t) &master_ktss, 1); - - temp_fake_desc = tss_desc_pattern; - temp_fake_desc.offset = (vm_offset_t) haddr; - fix_desc(&temp_fake_desc, 1); - *(struct fake_descriptor *) &master_gdt[sel_idx(KERNEL_TSS)] = temp_fake_desc; - kprintf("KTSS: 0x%x, ",haddr); - - /* remap dftss up high and put new high addr into gdt */ - haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS, - (vm_offset_t) &master_dftss, 1); - temp_fake_desc = tss_desc_pattern; - temp_fake_desc.offset = (vm_offset_t) haddr; - fix_desc(&temp_fake_desc, 1); - *(struct fake_descriptor *) &master_gdt[sel_idx(DF_TSS)] = temp_fake_desc; - kprintf("DFTSS: 0x%x\n",haddr); - - /* remap mctss up high and put new high addr into gdt */ - haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS, - (vm_offset_t) &master_mctss, 1); - temp_fake_desc = tss_desc_pattern; - temp_fake_desc.offset = (vm_offset_t) haddr; - fix_desc(&temp_fake_desc, 1); - *(struct fake_descriptor *) &master_gdt[sel_idx(MC_TSS)] = temp_fake_desc; - kprintf("MCTSS: 0x%x\n",haddr); - - cpu_desc_load(&cpu_data_master); -} - - -/* - * Bootstrap the system enough to run with virtual memory. - * Map the kernel's code and data, and allocate the system page table. - * Called with mapping OFF. Page_size must already be set. - */ - -void -pmap_bootstrap( - __unused vm_offset_t load_start, - boolean_t IA32e) -{ - vm_offset_t va; - unsigned i; - pdpt_entry_t *pdpt; - spl_t s; - - vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Set the highest address - * known to VM */ - /* - * The kernel's pmap is statically allocated so we don't - * have to use pmap_create, which is unlikely to work - * correctly at this part of the boot sequence. - */ - - - kernel_pmap = &kernel_pmap_store; - kernel_pmap->ref_count = 1; - kernel_pmap->nx_enabled = FALSE; - kernel_pmap->pm_task_map = TASK_MAP_32BIT; - kernel_pmap->pm_obj = (vm_object_t) NULL; - kernel_pmap->dirbase = (pd_entry_t *)((unsigned int)IdlePTD | KERNBASE); - kernel_pmap->pdirbase = (pmap_paddr_t)((int)IdlePTD); - pdpt = (pd_entry_t *)((unsigned int)IdlePDPT | KERNBASE ); - kernel_pmap->pm_pdpt = pdpt; - kernel_pmap->pm_cr3 = (pmap_paddr_t)((int)IdlePDPT); - - - va = (vm_offset_t)kernel_pmap->dirbase; - /* setup self referential mapping(s) */ - for (i = 0; i< NPGPTD; i++, pdpt++) { - pmap_paddr_t pa; - pa = (pmap_paddr_t) kvtophys((vm_offset_t)(va + i386_ptob(i))); - pmap_store_pte( - (pd_entry_t *) (kernel_pmap->dirbase + PTDPTDI + i), - (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF | - INTEL_PTE_MOD | INTEL_PTE_WIRED) ; - pmap_store_pte(pdpt, pa | INTEL_PTE_VALID); - } - -#if CONFIG_YONAH - /* 32-bit and legacy support depends on IA32e mode being disabled */ - cpu_64bit = IA32e; -#endif - - lo_kernel_cr3 = kernel_pmap->pm_cr3; - current_cpu_datap()->cpu_kernel_cr3 = (addr64_t) kernel_pmap->pm_cr3; - - /* save the value we stuff into created pmaps to share the gdts etc */ - high_shared_pde = *pmap_pde(kernel_pmap, HIGH_MEM_BASE); - /* make sure G bit is on for high shared pde entry */ - high_shared_pde |= INTEL_PTE_GLOBAL; - s = splhigh(); - pmap_store_pte(pmap_pde(kernel_pmap, HIGH_MEM_BASE), high_shared_pde); - splx(s); - - nkpt = NKPT; - OSAddAtomic(NKPT, &inuse_ptepages_count); - OSAddAtomic64(NKPT, &alloc_ptepages_count); - bootstrap_wired_pages = NKPT; - - virtual_avail = (vm_offset_t)VADDR(KPTDI,0) + (vm_offset_t)first_avail; - virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS); - - /* - * Reserve some special page table entries/VA space for temporary - * mapping of pages. - */ - va = virtual_avail; - pt_entry_t *pte; - pte = vtopte(va); -#define SYSMAP(c, p, v, n) \ - v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n) - - for (i=0; icpu_pmap->mapwindow[i].prv_CMAP), - (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR), - 1); - *current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP = 0; - } - - /* DMAP user for debugger */ - SYSMAP(caddr_t, DMAP1, DADDR1, 1); - SYSMAP(caddr_t, DMAP2, DADDR2, 1); /* XXX temporary - can remove */ - - virtual_avail = va; - - if (PE_parse_boot_argn("npvhash", &npvhash, sizeof (npvhash))) { - if (0 != ((npvhash+1) & npvhash)) { - kprintf("invalid hash %d, must be ((2^N)-1), using default %d\n",npvhash,NPVHASH); - npvhash = NPVHASH; - } - } else { - npvhash = NPVHASH; - } - printf("npvhash=%d\n",npvhash); - - simple_lock_init(&kernel_pmap->lock, 0); - simple_lock_init(&pv_hashed_free_list_lock, 0); - simple_lock_init(&pv_hashed_kern_free_list_lock, 0); - simple_lock_init(&pv_hash_table_lock,0); - - pmap_init_high_shared(); - - pde_mapped_size = PDE_MAPPED_SIZE; - - if (cpu_64bit) { - pdpt_entry_t *ppdpt = IdlePDPT; - pdpt_entry_t *ppdpt64 = (pdpt_entry_t *)IdlePDPT64; - pdpt_entry_t *ppml4 = (pdpt_entry_t *)IdlePML4; - int istate = ml_set_interrupts_enabled(FALSE); - - /* - * Clone a new 64-bit 3rd-level page table directory, IdlePML4, - * with page bits set for the correct IA-32e operation and so that - * the legacy-mode IdlePDPT is retained for slave processor start-up. - * This is necessary due to the incompatible use of page bits between - * 64-bit and legacy modes. - */ - kernel_pmap->pm_cr3 = (pmap_paddr_t)((int)IdlePML4); /* setup in start.s for us */ - kernel_pmap->pm_pml4 = IdlePML4; - kernel_pmap->pm_pdpt = (pd_entry_t *) - ((unsigned int)IdlePDPT64 | KERNBASE ); -#define PAGE_BITS INTEL_PTE_VALID|INTEL_PTE_RW|INTEL_PTE_USER|INTEL_PTE_REF - pmap_store_pte(kernel_pmap->pm_pml4, - (uint32_t)IdlePDPT64 | PAGE_BITS); - pmap_store_pte((ppdpt64+0), *(ppdpt+0) | PAGE_BITS); - pmap_store_pte((ppdpt64+1), *(ppdpt+1) | PAGE_BITS); - pmap_store_pte((ppdpt64+2), *(ppdpt+2) | PAGE_BITS); - pmap_store_pte((ppdpt64+3), *(ppdpt+3) | PAGE_BITS); - - /* - * The kernel is also mapped in the uber-sapce at the 4GB starting - * 0xFFFFFF80:00000000. This is the highest entry in the 4th-level. - */ - pmap_store_pte((ppml4+KERNEL_UBER_PML4_INDEX), *(ppml4+0)); - - kernel64_cr3 = (addr64_t) kernel_pmap->pm_cr3; - - /* Re-initialize descriptors and prepare to switch modes */ - cpu_desc_init64(&cpu_data_master); - current_cpu_datap()->cpu_is64bit = TRUE; - current_cpu_datap()->cpu_active_cr3 = kernel64_cr3; - - pde_mapped_size = 512*4096 ; - - ml_set_interrupts_enabled(istate); - } - - /* Sets 64-bit mode if required. */ - cpu_mode_init(&cpu_data_master); - /* Update in-kernel CPUID information if we're now in 64-bit mode */ - if (IA32e) - cpuid_set_info(); - - kernel_pmap->pm_hold = (vm_offset_t)kernel_pmap->pm_pml4; - - kprintf("Kernel virtual space from 0x%x to 0x%x.\n", - VADDR(KPTDI,0), virtual_end); - printf("PAE enabled\n"); - if (cpu_64bit){ - printf("64 bit mode enabled\n");kprintf("64 bit mode enabled\n"); } - - kprintf("Available physical space from 0x%llx to 0x%llx\n", - avail_start, avail_end); - - /* - * By default for 64-bit users loaded at 4GB, share kernel mapping. - * But this may be overridden by the -no_shared_cr3 boot-arg. - */ - if (PE_parse_boot_argn("-no_shared_cr3", &no_shared_cr3, sizeof (no_shared_cr3))) { - kprintf("Shared kernel address space disabled\n"); - } - -#ifdef PMAP_TRACES - if (PE_parse_boot_argn("-pmap_trace", &pmap_trace, sizeof (pmap_trace))) { - kprintf("Kernel traces for pmap operations enabled\n"); - } -#endif /* PMAP_TRACES */ -} - -void -pmap_virtual_space( - vm_offset_t *startp, - vm_offset_t *endp) -{ - *startp = virtual_avail; - *endp = virtual_end; -} - -/* - * Initialize the pmap module. - * Called by vm_init, to initialize any structures that the pmap - * system needs to map virtual memory. - */ -void -pmap_init(void) -{ - long npages; - vm_map_offset_t vaddr; - vm_offset_t addr; - vm_size_t s, vsize; - ppnum_t ppn; - - /* - * Allocate memory for the pv_head_table and its lock bits, - * the modify bit array, and the pte_page table. - */ - - /* - * zero bias all these arrays now instead of off avail_start - * so we cover all memory - */ - - npages = (long)i386_btop(avail_end); - s = (vm_size_t) (sizeof(struct pv_rooted_entry) * npages - + (sizeof (struct pv_hashed_entry_t *) * (npvhash+1)) - + pv_lock_table_size(npages) - + pv_hash_lock_table_size((npvhash+1)) - + npages); - - s = round_page(s); - if (kernel_memory_allocate(kernel_map, &addr, s, 0, - KMA_KOBJECT | KMA_PERMANENT) - != KERN_SUCCESS) - panic("pmap_init"); - - memset((char *)addr, 0, s); - - vaddr = addr; - vsize = s; - -#if PV_DEBUG - if (0 == npvhash) panic("npvhash not initialized"); -#endif - - /* - * Allocate the structures first to preserve word-alignment. - */ - pv_head_table = (pv_rooted_entry_t) addr; - addr = (vm_offset_t) (pv_head_table + npages); - - pv_hash_table = (pv_hashed_entry_t *)addr; - addr = (vm_offset_t) (pv_hash_table + (npvhash + 1)); - - pv_lock_table = (char *) addr; - addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages)); - - pv_hash_lock_table = (char *) addr; - addr = (vm_offset_t) (pv_hash_lock_table + pv_hash_lock_table_size((npvhash+1))); - - pmap_phys_attributes = (char *) addr; - { - unsigned int i; - unsigned int pn; - ppnum_t last_pn; - pmap_memory_region_t *pmptr = pmap_memory_regions; - - last_pn = (ppnum_t)i386_btop(avail_end); - - for (i = 0; i < pmap_memory_region_count; i++, pmptr++) { - if (pmptr->type == kEfiConventionalMemory) { - - for (pn = pmptr->base; pn <= pmptr->end; pn++) { - if (pn < last_pn) { - pmap_phys_attributes[pn] |= PHYS_MANAGED; - - if (pn > last_managed_page) - last_managed_page = pn; - - if (pn >= lowest_hi && pn <= highest_hi) - pmap_phys_attributes[pn] |= PHYS_NOENCRYPT; - } - } - } - } - } - while (vsize) { - ppn = pmap_find_phys(kernel_pmap, vaddr); - - pmap_phys_attributes[ppn] |= PHYS_NOENCRYPT; - - vaddr += PAGE_SIZE; - vsize -= PAGE_SIZE; - } - /* - * Create the zone of physical maps, - * and of the physical-to-virtual entries. - */ - s = (vm_size_t) sizeof(struct pmap); - pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */ - zone_change(pmap_zone, Z_NOENCRYPT, TRUE); - - s = (vm_size_t) sizeof(struct pv_hashed_entry); - pv_hashed_list_zone = zinit(s, 10000*s /* Expandable zone */, - 4096 * 4 /* LCM i386 */, "pv_list"); - zone_change(pv_hashed_list_zone, Z_NOENCRYPT, TRUE); - - s = 63; - pdpt_zone = zinit(s, 400*s, 4096, "pdpt"); /* XXX */ - zone_change(pdpt_zone, Z_NOENCRYPT, TRUE); - - kptobj = &kptobj_object_store; - _vm_object_allocate((vm_object_size_t)(NPGPTD*NPTDPG), kptobj); - kernel_pmap->pm_obj = kptobj; - - /* create pv entries for kernel pages mapped by low level - startup code. these have to exist so we can pmap_remove() - e.g. kext pages from the middle of our addr space */ - - vaddr = (vm_map_offset_t)0; - for (ppn = 0; ppn < i386_btop(avail_start) ; ppn++ ) { - pv_rooted_entry_t pv_e; - - pv_e = pai_to_pvh(ppn); - pv_e->va = vaddr; - vaddr += PAGE_SIZE; - pv_e->pmap = kernel_pmap; - queue_init(&pv_e->qlink); - } - - pmap_initialized = TRUE; - - max_preemption_latency_tsc = tmrCvt((uint64_t)MAX_PREEMPTION_LATENCY_NS, tscFCvtn2t); - -} - -#ifdef PMAP_DEBUG -#define DBG(x...) kprintf("DBG: " x) -#else -#define DBG(x...) -#endif - -/* - * Called once VM is fully initialized so that we can release unused - * sections of low memory to the general pool. - * Also complete the set-up of identity-mapped sections of the kernel: - * 1) write-protect kernel text - * 2) map kernel text using large pages if possible - * 3) read and write-protect page zero (for K32) - * 4) map the global page at the appropriate virtual address. - * - * Use of large pages - * ------------------ - * To effectively map and write-protect all kernel text pages, the text - * must be 2M-aligned at the base, and the data section above must also be - * 2M-aligned. That is, there's padding below and above. This is achieved - * through linker directives. Large pages are used only if this alignment - * exists (and not overriden by the -kernel_text_page_4K boot-arg). The - * memory layout is: - * - * : : - * | __DATA | - * sdata: ================== 2Meg - * | | - * | zero-padding | - * | | - * etext: ------------------ - * | | - * : : - * | | - * | __TEXT | - * | | - * : : - * | | - * stext: ================== 2Meg - * | | - * | zero-padding | - * | | - * eHIB: ------------------ - * | __HIB | - * : : - * - * Prior to changing the mapping from 4K to 2M, the zero-padding pages - * [eHIB,stext] and [etext,sdata] are ml_static_mfree()'d. Then all the - * 4K pages covering [stext,etext] are coalesced as 2M large pages. - * The now unused level-1 PTE pages are also freed. - */ -extern uint32_t pmap_reserved_ranges; -void -pmap_lowmem_finalize(void) -{ - spl_t spl; - int i; - - /* Check the kernel is linked at the expected base address */ - if (i386_btop(kvtophys((vm_offset_t) &IdlePML4)) != - I386_KERNEL_IMAGE_BASE_PAGE) - panic("pmap_lowmem_finalize() unexpected kernel base address"); - - /* - * Update wired memory statistics for early boot pages - */ - PMAP_ZINFO_PALLOC(kernel_pmap, bootstrap_wired_pages * PAGE_SIZE); - - /* - * Free all pages in pmap regions below the base: - * rdar://6332712 - * We can't free all the pages to VM that EFI reports available. - * Pages in the range 0xc0000-0xff000 aren't safe over sleep/wake. - * There's also a size miscalculation here: pend is one page less - * than it should be but this is not fixed to be backwards - * compatible. - * Due to this current EFI limitation, we take only the first - * entry in the memory region table. However, the loop is retained - * (with the intended termination criteria commented out) in the - * hope that some day we can free all low-memory ranges. - */ - for (i = 0; -// pmap_memory_regions[i].end <= I386_KERNEL_IMAGE_BASE_PAGE; - i < 1 && (pmap_reserved_ranges == 0); - i++) { - vm_offset_t pbase = (vm_offset_t)i386_ptob(pmap_memory_regions[i].base); - vm_offset_t pend = (vm_offset_t)i386_ptob(pmap_memory_regions[i].end); -// vm_offset_t pend = i386_ptob(pmap_memory_regions[i].end+1); - - DBG("ml_static_mfree(%p,%p) for pmap region %d\n", - (void *) ml_static_ptovirt(pbase), - (void *) (pend - pbase), i); - ml_static_mfree(ml_static_ptovirt(pbase), pend - pbase); - } - - /* - * If text and data are both 2MB-aligned, - * we can map text with large-pages, - * unless the -kernel_text_ps_4K boot-arg overrides. - */ - if ((stext & I386_LPGMASK) == 0 && (sdata & I386_LPGMASK) == 0) { - kprintf("Kernel text is 2MB aligned"); - kernel_text_ps_4K = FALSE; - if (PE_parse_boot_argn("-kernel_text_ps_4K", - &kernel_text_ps_4K, - sizeof (kernel_text_ps_4K))) - kprintf(" but will be mapped with 4K pages\n"); - else - kprintf(" and will be mapped with 2M pages\n"); - } - - (void) PE_parse_boot_argn("wpkernel", &wpkernel, sizeof (wpkernel)); - if (wpkernel) - kprintf("Kernel text %p-%p to be write-protected\n", - (void *) stext, (void *) etext); - - spl = splhigh(); - - /* - * Scan over text if mappings are to be changed: - * - Remap kernel text readonly unless the "wpkernel" boot-arg is 0 - * - Change to large-pages if possible and not overriden. - */ - if (kernel_text_ps_4K && wpkernel) { - vm_offset_t myva; - for (myva = stext; myva < etext; myva += PAGE_SIZE) { - pt_entry_t *ptep; - - ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva); - if (ptep) - pmap_store_pte(ptep, *ptep & ~INTEL_PTE_RW); - } - } - - if (!kernel_text_ps_4K) { - vm_offset_t myva; - - /* - * Release zero-filled page padding used for 2M-alignment. - */ - DBG("ml_static_mfree(%p,%p) for padding below text\n", - (void *) eHIB, (void *) (stext - eHIB)); - ml_static_mfree(eHIB, stext - eHIB); - DBG("ml_static_mfree(%p,%p) for padding above text\n", - (void *) etext, (void *) (sdata - etext)); - ml_static_mfree(etext, sdata - etext); - - /* - * Coalesce text pages into large pages. - */ - for (myva = stext; myva < sdata; myva += I386_LPGBYTES) { - pt_entry_t *ptep; - vm_offset_t pte_phys; - pt_entry_t *pdep; - pt_entry_t pde; - - pdep = pmap_pde(kernel_pmap, (vm_map_offset_t)myva); - ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva); - DBG("myva: %p pdep: %p ptep: %p\n", - (void *) myva, (void *) pdep, (void *) ptep); - if ((*ptep & INTEL_PTE_VALID) == 0) - continue; - pte_phys = (vm_offset_t)(*ptep & PG_FRAME); - pde = *pdep & PTMASK; /* page attributes from pde */ - pde |= INTEL_PTE_PS; /* make it a 2M entry */ - pde |= pte_phys; /* take page frame from pte */ - - if (wpkernel) - pde &= ~INTEL_PTE_RW; - DBG("pmap_store_pte(%p,0x%llx)\n", - (void *)pdep, pde); - pmap_store_pte(pdep, pde); - - /* - * Free the now-unused level-1 pte. - * Note: ptep is a virtual address to the pte in the - * recursive map. We can't use this address to free - * the page. Instead we need to compute its address - * in the Idle PTEs in "low memory". - */ - vm_offset_t vm_ptep = (vm_offset_t) KPTphys - + (pte_phys >> PTPGSHIFT); - DBG("ml_static_mfree(%p,0x%x) for pte\n", - (void *) vm_ptep, PAGE_SIZE); - ml_static_mfree(vm_ptep, PAGE_SIZE); - } - - /* Change variable read by sysctl machdep.pmap */ - pmap_kernel_text_ps = I386_LPGBYTES; - } - - /* no matter what, kernel page zero is not accessible */ - pmap_store_pte(pmap_pte(kernel_pmap, 0), INTEL_PTE_INVALID); - - /* map lowmem global page into fixed addr */ - pt_entry_t *pte = NULL; - if (0 == (pte = pmap_pte(kernel_pmap, - VM_MIN_KERNEL_LOADED_ADDRESS + 0x2000))) - panic("lowmem pte"); - /* make sure it is defined on page boundary */ - assert(0 == ((vm_offset_t) &lowGlo & PAGE_MASK)); - pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo) - | INTEL_PTE_REF - | INTEL_PTE_MOD - | INTEL_PTE_WIRED - | INTEL_PTE_VALID - | INTEL_PTE_RW); - splx(spl); - flush_tlb(); -} - -#define managed_page(x) ( (unsigned int)x <= last_managed_page && (pmap_phys_attributes[x] & PHYS_MANAGED) ) - -/* - * this function is only used for debugging fron the vm layer - */ -boolean_t -pmap_verify_free( - ppnum_t pn) -{ - pv_rooted_entry_t pv_h; - int pai; - boolean_t result; - - assert(pn != vm_page_fictitious_addr); - - if (!pmap_initialized) - return(TRUE); - - if (pn == vm_page_guard_addr) - return TRUE; - - pai = ppn_to_pai(pn); - if (!managed_page(pai)) - return(FALSE); - pv_h = pai_to_pvh(pn); - result = (pv_h->pmap == PMAP_NULL); - return(result); -} - -boolean_t -pmap_is_empty( - pmap_t pmap, - vm_map_offset_t va_start, - vm_map_offset_t va_end) -{ - vm_map_offset_t offset; - ppnum_t phys_page; - - if (pmap == PMAP_NULL) { - return TRUE; - } - - /* - * Check the resident page count - * - if it's zero, the pmap is completely empty. - * This short-circuit test prevents a virtual address scan which is - * painfully slow for 64-bit spaces. - * This assumes the count is correct - * .. the debug kernel ought to be checking perhaps by page table walk. - */ - if (pmap->stats.resident_count == 0) - return TRUE; - - for (offset = va_start; - offset < va_end; - offset += PAGE_SIZE_64) { - phys_page = pmap_find_phys(pmap, offset); - if (phys_page) { - if (pmap != kernel_pmap && - pmap->pm_task_map == TASK_MAP_32BIT && - offset >= HIGH_MEM_BASE) { - /* - * The "high_shared_pde" is used to share - * the entire top-most 2MB of address space - * between the kernel and all 32-bit tasks. - * So none of this can be removed from 32-bit - * tasks. - * Let's pretend there's nothing up - * there... - */ - return TRUE; - } - kprintf("pmap_is_empty(%p,0x%llx,0x%llx): " - "page %d at 0x%llx\n", - pmap, va_start, va_end, phys_page, offset); - return FALSE; - } - } - - return TRUE; -} - - -/* - * Create and return a physical map. - * - * If the size specified for the map - * is zero, the map is an actual physical - * map, and may be referenced by the - * hardware. - * - * If the size specified is non-zero, - * the map will be used in software only, and - * is bounded by that size. - */ -pmap_t -pmap_create( - ledger_t ledger, - vm_map_size_t sz, - boolean_t is_64bit) -{ - pmap_t p; - unsigned i; - vm_offset_t va; - vm_size_t size; - pdpt_entry_t *pdpt; - pml4_entry_t *pml4p; - pd_entry_t *pdp; - int template; - spl_t s; - - PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, - (int) (sz>>32), (int) sz, (int) is_64bit, 0, 0); - - size = (vm_size_t) sz; - - /* - * A software use-only map doesn't even need a map. - */ - - if (size != 0) { - return(PMAP_NULL); - } - - p = (pmap_t) zalloc(pmap_zone); - if (PMAP_NULL == p) - panic("pmap_create zalloc"); - - /* init counts now since we'll be bumping some */ - simple_lock_init(&p->lock, 0); - p->stats.resident_count = 0; - p->stats.resident_max = 0; - p->stats.wired_count = 0; - ledger_reference(ledger); - p->ledger = ledger; - p->ref_count = 1; - p->nx_enabled = 1; - p->pm_shared = FALSE; - - assert(!is_64bit || cpu_64bit); - p->pm_task_map = is_64bit ? TASK_MAP_64BIT : TASK_MAP_32BIT;; - - if (!cpu_64bit) { - /* legacy 32 bit setup */ - /* in the legacy case the pdpt layer is hardwired to 4 entries and each - * entry covers 1GB of addr space */ - if (KERN_SUCCESS != kmem_alloc_kobject(kernel_map, (vm_offset_t *)(&p->dirbase), NBPTD)) - panic("pmap_create kmem_alloc_kobject"); - p->pm_hold = (vm_offset_t)zalloc(pdpt_zone); - if ((vm_offset_t)NULL == p->pm_hold) { - panic("pdpt zalloc"); - } - pdpt = (pdpt_entry_t *) (( p->pm_hold + 31) & ~31); - p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)pdpt); - if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPGPTD*NPTDPG)))) - panic("pmap_create vm_object_allocate"); - - memset((char *)p->dirbase, 0, NBPTD); - - va = (vm_offset_t)p->dirbase; - p->pdirbase = kvtophys(va); - - PMAP_ZINFO_SALLOC(p,NBPTD); - - template = INTEL_PTE_VALID; - for (i = 0; i< NPGPTD; i++, pdpt++ ) { - pmap_paddr_t pa; - pa = (pmap_paddr_t) kvtophys((vm_offset_t)(va + i386_ptob(i))); - pmap_store_pte(pdpt, pa | template); - } - - /* map the high shared pde */ - s = splhigh(); - pmap_store_pte(pmap_pde(p, HIGH_MEM_BASE), high_shared_pde); - splx(s); - - } else { - /* 64 bit setup */ - - /* alloc the pml4 page in kernel vm */ - if (KERN_SUCCESS != kmem_alloc_kobject(kernel_map, (vm_offset_t *)(&p->pm_hold), PAGE_SIZE)) - panic("pmap_create kmem_alloc_kobject pml4"); - - memset((char *)p->pm_hold, 0, PAGE_SIZE); - p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_hold); - - OSAddAtomic(1, &inuse_ptepages_count); - OSAddAtomic64(1, &alloc_ptepages_count); - PMAP_ZINFO_SALLOC(p, PAGE_SIZE); - - /* allocate the vm_objs to hold the pdpt, pde and pte pages */ - - if (NULL == (p->pm_obj_pml4 = vm_object_allocate((vm_object_size_t)(NPML4PGS)))) - panic("pmap_create pdpt obj"); - - if (NULL == (p->pm_obj_pdpt = vm_object_allocate((vm_object_size_t)(NPDPTPGS)))) - panic("pmap_create pdpt obj"); - - if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPDEPGS)))) - panic("pmap_create pte obj"); - - /* uber space points to uber mapped kernel */ - s = splhigh(); - pml4p = pmap64_pml4(p, 0ULL); - pmap_store_pte((pml4p+KERNEL_UBER_PML4_INDEX),*kernel_pmap->pm_pml4); - - - if (!is_64bit) { - while ((pdp = pmap64_pde(p, (uint64_t)HIGH_MEM_BASE)) == PD_ENTRY_NULL) { - splx(s); - pmap_expand_pdpt(p, (uint64_t)HIGH_MEM_BASE, PMAP_EXPAND_OPTIONS_NONE); /* need room for another pde entry */ - s = splhigh(); - } - pmap_store_pte(pdp, high_shared_pde); - } - splx(s); - } - - PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, - (int) p, is_64bit, 0, 0, 0); - - return(p); -} - -/* - * The following routines implement the shared address optmization for 64-bit - * users with a 4GB page zero. - * - * pmap_set_4GB_pagezero() - * is called in the exec and fork paths to mirror the kernel's - * mapping in the bottom 4G of the user's pmap. The task mapping changes - * from TASK_MAP_64BIT to TASK_MAP_64BIT_SHARED. This routine returns - * without doing anything if the -no_shared_cr3 boot-arg is set. - * - * pmap_clear_4GB_pagezero() - * is called in the exec/exit paths to undo this mirror. The task mapping - * reverts to TASK_MAP_64BIT. In addition, we switch to the kernel's - * CR3 by calling pmap_load_kernel_cr3(). - * - * pmap_load_kernel_cr3() - * loads cr3 with the kernel's page table. In addition to being called - * by pmap_clear_4GB_pagezero(), it is used both prior to teardown and - * when we go idle in the context of a shared map. - * - * Further notes on per-cpu data used: - * - * cpu_kernel_cr3 is the cr3 for the kernel's pmap. - * This is loaded in a trampoline on entering the kernel - * from a 32-bit user (or non-shared-cr3 64-bit user). - * cpu_task_cr3 is the cr3 for the current thread. - * This is loaded in a trampoline as we exit the kernel. - * cpu_active_cr3 reflects the cr3 currently loaded. - * However, the low order bit is set when the - * processor is idle or interrupts are disabled - * while the system pmap lock is held. It is used by - * tlb shoot-down. - * cpu_task_map indicates whether the task cr3 belongs to - * a 32-bit, a 64-bit or a 64-bit shared map. - * The latter allows the avoidance of the cr3 load - * on kernel entry and exit. - * cpu_tlb_invalid set TRUE when a tlb flush is requested. - * If the cr3 is "inactive" (the cpu is idle or the - * system-wide pmap lock is held) this not serviced by - * an IPI but at time when the cr3 becomes "active". - */ - -void -pmap_set_4GB_pagezero(pmap_t p) -{ - pdpt_entry_t *user_pdptp; - pdpt_entry_t *kern_pdptp; - - assert(p->pm_task_map != TASK_MAP_32BIT); - - /* Kernel-shared cr3 may be disabled by boot arg. */ - if (no_shared_cr3) - return; - - /* - * Set the bottom 4 3rd-level pte's to be the kernel's. - */ - PMAP_LOCK(p); - while ((user_pdptp = pmap64_pdpt(p, 0x0)) == PDPT_ENTRY_NULL) { - PMAP_UNLOCK(p); - pmap_expand_pml4(p, 0x0, PMAP_EXPAND_OPTIONS_NONE); - PMAP_LOCK(p); - } - kern_pdptp = kernel_pmap->pm_pdpt; - pmap_store_pte(user_pdptp+0, *(kern_pdptp+0)); - pmap_store_pte(user_pdptp+1, *(kern_pdptp+1)); - pmap_store_pte(user_pdptp+2, *(kern_pdptp+2)); - pmap_store_pte(user_pdptp+3, *(kern_pdptp+3)); - p->pm_task_map = TASK_MAP_64BIT_SHARED; - PMAP_UNLOCK(p); -} - -void -pmap_clear_4GB_pagezero(pmap_t p) -{ - pdpt_entry_t *user_pdptp; - boolean_t istate; - - if (p->pm_task_map != TASK_MAP_64BIT_SHARED) - return; - - PMAP_LOCK(p); - - p->pm_task_map = TASK_MAP_64BIT; - - istate = ml_set_interrupts_enabled(FALSE); - - if (current_cpu_datap()->cpu_task_map == TASK_MAP_64BIT_SHARED) - current_cpu_datap()->cpu_task_map = TASK_MAP_64BIT; - - pmap_load_kernel_cr3(); - - user_pdptp = pmap64_pdpt(p, 0x0); - pmap_store_pte(user_pdptp+0, 0); - pmap_store_pte(user_pdptp+1, 0); - pmap_store_pte(user_pdptp+2, 0); - pmap_store_pte(user_pdptp+3, 0); - - ml_set_interrupts_enabled(istate); - - PMAP_UNLOCK(p); -} - -void -pmap_load_kernel_cr3(void) -{ - uint64_t kernel_cr3; - - assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); - - /* - * Reload cr3 with the true kernel cr3. - */ - kernel_cr3 = current_cpu_datap()->cpu_kernel_cr3; - set64_cr3(kernel_cr3); - current_cpu_datap()->cpu_active_cr3 = kernel_cr3; - current_cpu_datap()->cpu_tlb_invalid = FALSE; - __asm__ volatile("mfence"); -} - -/* - * Retire the given physical map from service. - * Should only be called if the map contains - * no valid mappings. - */ - -void -pmap_destroy( - register pmap_t p) -{ - register int c; - - if (p == PMAP_NULL) - return; - - PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_START, - (int) p, 0, 0, 0, 0); - - PMAP_LOCK(p); - - c = --p->ref_count; - - if (c == 0) { - /* - * If some cpu is not using the physical pmap pointer that it - * is supposed to be (see set_dirbase), we might be using the - * pmap that is being destroyed! Make sure we are - * physically on the right pmap: - */ - PMAP_UPDATE_TLBS(p, - 0x0ULL, - 0xFFFFFFFFFFFFF000ULL); - } - - PMAP_UNLOCK(p); - - if (c != 0) { - PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END, - (int) p, 1, 0, 0, 0); - return; /* still in use */ - } - - /* - * Free the memory maps, then the - * pmap structure. - */ - if (!cpu_64bit) { - OSAddAtomic(-p->pm_obj->resident_page_count, &inuse_ptepages_count); - PMAP_ZINFO_PFREE(p, p->pm_obj->resident_page_count * PAGE_SIZE); - - kmem_free(kernel_map, (vm_offset_t)p->dirbase, NBPTD); - PMAP_ZINFO_SFREE(p, NBPTD); - - zfree(pdpt_zone, (void *)p->pm_hold); - - vm_object_deallocate(p->pm_obj); - } else { - /* 64 bit */ - int inuse_ptepages = 0; - - /* free 64 bit mode structs */ - kmem_free(kernel_map, (vm_offset_t)p->pm_hold, PAGE_SIZE); - PMAP_ZINFO_SFREE(p, PAGE_SIZE); - - inuse_ptepages += p->pm_obj_pml4->resident_page_count; - vm_object_deallocate(p->pm_obj_pml4); - - inuse_ptepages += p->pm_obj_pdpt->resident_page_count; - vm_object_deallocate(p->pm_obj_pdpt); - - inuse_ptepages += p->pm_obj->resident_page_count; - vm_object_deallocate(p->pm_obj); - - OSAddAtomic(-(inuse_ptepages+1), &inuse_ptepages_count); - PMAP_ZINFO_PFREE(p, inuse_ptepages * PAGE_SIZE); - } - ledger_dereference(p->ledger); - - zfree(pmap_zone, p); - - PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END, - 0, 0, 0, 0, 0); - -} - -/* - * Add a reference to the specified pmap. - */ - -void -pmap_reference( - register pmap_t p) -{ - - if (p != PMAP_NULL) { - PMAP_LOCK(p); - p->ref_count++; - PMAP_UNLOCK(p);; - } -} - -/* - * Remove phys addr if mapped in specified map - * - */ -void -pmap_remove_some_phys( - __unused pmap_t map, - __unused ppnum_t pn) -{ - -/* Implement to support working set code */ - -} - -/* - * Set the physical protection on the - * specified range of this map as requested. - * Will not increase permissions. - */ -void -pmap_protect( - pmap_t map, - vm_map_offset_t sva, - vm_map_offset_t eva, - vm_prot_t prot) -{ - register pt_entry_t *pde; - register pt_entry_t *spte, *epte; - vm_map_offset_t lva; - vm_map_offset_t orig_sva; - boolean_t set_NX; - int num_found = 0; - - pmap_intr_assert(); - - if (map == PMAP_NULL) - return; - - if (prot == VM_PROT_NONE) { - pmap_remove(map, sva, eva); - return; - } - - PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START, - (int) map, - (int) (sva>>32), (int) sva, - (int) (eva>>32), (int) eva); - - if ( (prot & VM_PROT_EXECUTE) || !nx_enabled || !map->nx_enabled ) - set_NX = FALSE; - else - set_NX = TRUE; - - PMAP_LOCK(map); - - orig_sva = sva; - while (sva < eva) { - lva = (sva + pde_mapped_size) & ~(pde_mapped_size-1); - if (lva > eva) - lva = eva; - pde = pmap_pde(map, sva); - if (pde && (*pde & INTEL_PTE_VALID)) { - spte = (pt_entry_t *)pmap_pte(map, (sva & ~(pde_mapped_size-1))); - spte = &spte[ptenum(sva)]; - epte = &spte[intel_btop(lva-sva)]; - - while (spte < epte) { - - if (*spte & INTEL_PTE_VALID) { - if (prot & VM_PROT_WRITE) - pmap_update_pte(spte, 0, INTEL_PTE_WRITE); - else - pmap_update_pte(spte, INTEL_PTE_WRITE, 0); - - if (set_NX == TRUE) - pmap_update_pte(spte,0, INTEL_PTE_NX); - else - pmap_update_pte(spte, INTEL_PTE_NX, 0); - - num_found++; - } - spte++; - } - } - sva = lva; - } - if (num_found) - { - PMAP_UPDATE_TLBS(map, orig_sva, eva); - } - - PMAP_UNLOCK(map); - - PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END, - 0, 0, 0, 0, 0); - -} - -/* Map a (possibly) autogenned block */ -void -pmap_map_block( - pmap_t pmap, - addr64_t va, - ppnum_t pa, - uint32_t size, - vm_prot_t prot, - int attr, - __unused unsigned int flags) -{ - uint32_t page; - - for (page = 0; page < size; page++) { - pmap_enter(pmap, va, pa, prot, VM_PROT_NONE, attr, TRUE); - va += PAGE_SIZE; - pa++; - } -} - -/* - * Routine: pmap_extract - * Function: - * Extract the physical page address associated - * with the given map/virtual_address pair. - * Change to shim for backwards compatibility but will not - * work for 64 bit systems. Some old drivers that we cannot - * change need this. - */ - -vm_offset_t -pmap_extract( - register pmap_t pmap, - vm_map_offset_t vaddr) -{ - ppnum_t ppn; - vm_offset_t paddr; - - paddr = (vm_offset_t)0; - ppn = pmap_find_phys(pmap, vaddr); - - if (ppn) { - paddr = ((vm_offset_t)i386_ptob(ppn)) | ((vm_offset_t)vaddr & INTEL_OFFMASK); - } - return (paddr); -} - -kern_return_t -pmap_expand_pml4( - pmap_t map, - vm_map_offset_t vaddr, - __unused unsigned int options) -{ - register vm_page_t m; - register pmap_paddr_t pa; - uint64_t i; - spl_t spl; - ppnum_t pn; - pml4_entry_t *pml4p; - - if (kernel_pmap == map) panic("expand kernel pml4"); - - spl = splhigh(); - pml4p = pmap64_pml4(map, vaddr); - splx(spl); - if (PML4_ENTRY_NULL == pml4p) panic("pmap_expand_pml4 no pml4p"); - - /* - * Allocate a VM page for the pml4 page - */ - while ((m = vm_page_grab()) == VM_PAGE_NULL) - VM_PAGE_WAIT(); - - /* - * put the page into the pmap's obj list so it - * can be found later. - */ - pn = m->phys_page; - pa = i386_ptob(pn); - i = pml4idx(map, vaddr); - - /* - * Zero the page. - */ - pmap_zero_page(pn); - - vm_page_lockspin_queues(); - vm_page_wire(m); - vm_page_unlock_queues(); - - OSAddAtomic(1, &inuse_ptepages_count); - OSAddAtomic64(1, &alloc_ptepages_count); - PMAP_ZINFO_PALLOC(map, PAGE_SIZE); - - /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */ - vm_object_lock(map->pm_obj_pml4); - - PMAP_LOCK(map); - /* - * See if someone else expanded us first - */ - if (pmap64_pdpt(map, vaddr) != PDPT_ENTRY_NULL) { - PMAP_UNLOCK(map); - vm_object_unlock(map->pm_obj_pml4); - - VM_PAGE_FREE(m); - - OSAddAtomic(-1, &inuse_ptepages_count); - PMAP_ZINFO_PFREE(map, PAGE_SIZE); - return KERN_SUCCESS; - } - pmap_set_noencrypt(pn); - -#if 0 /* DEBUG */ - if (0 != vm_page_lookup(map->pm_obj_pml4, (vm_object_offset_t)i)) { - panic("pmap_expand_pml4: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n", - map, map->pm_obj_pml4, vaddr, i); - } -#endif - vm_page_insert(m, map->pm_obj_pml4, (vm_object_offset_t)i); - vm_object_unlock(map->pm_obj_pml4); - - /* - * Set the page directory entry for this page table. - */ - pml4p = pmap64_pml4(map, vaddr); /* refetch under lock */ - - pmap_store_pte(pml4p, pa_to_pte(pa) - | INTEL_PTE_VALID - | INTEL_PTE_USER - | INTEL_PTE_WRITE); - - PMAP_UNLOCK(map); - - return KERN_SUCCESS; -} - -kern_return_t -pmap_expand_pdpt(pmap_t map, vm_map_offset_t vaddr, __unused unsigned int options) -{ - register vm_page_t m; - register pmap_paddr_t pa; - uint64_t i; - spl_t spl; - ppnum_t pn; - pdpt_entry_t *pdptp; - - if (kernel_pmap == map) panic("expand kernel pdpt"); - - spl = splhigh(); - while ((pdptp = pmap64_pdpt(map, vaddr)) == PDPT_ENTRY_NULL) { - splx(spl); - pmap_expand_pml4(map, vaddr, PMAP_EXPAND_OPTIONS_NONE); /* need room for another pdpt entry */ - spl = splhigh(); - } - splx(spl); - - /* - * Allocate a VM page for the pdpt page - */ - while ((m = vm_page_grab()) == VM_PAGE_NULL) - VM_PAGE_WAIT(); - - /* - * put the page into the pmap's obj list so it - * can be found later. - */ - pn = m->phys_page; - pa = i386_ptob(pn); - i = pdptidx(map, vaddr); - - /* - * Zero the page. - */ - pmap_zero_page(pn); - - vm_page_lockspin_queues(); - vm_page_wire(m); - vm_page_unlock_queues(); - - OSAddAtomic(1, &inuse_ptepages_count); - OSAddAtomic64(1, &alloc_ptepages_count); - PMAP_ZINFO_PALLOC(map, PAGE_SIZE); - - /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */ - vm_object_lock(map->pm_obj_pdpt); - - PMAP_LOCK(map); - /* - * See if someone else expanded us first - */ - if (pmap64_pde(map, vaddr) != PD_ENTRY_NULL) { - PMAP_UNLOCK(map); - vm_object_unlock(map->pm_obj_pdpt); - - VM_PAGE_FREE(m); - - OSAddAtomic(-1, &inuse_ptepages_count); - PMAP_ZINFO_PFREE(map, PAGE_SIZE); - return KERN_SUCCESS; - } - pmap_set_noencrypt(pn); - -#if 0 /* DEBUG */ - if (0 != vm_page_lookup(map->pm_obj_pdpt, (vm_object_offset_t)i)) { - panic("pmap_expand_pdpt: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n", - map, map->pm_obj_pdpt, vaddr, i); - } -#endif - vm_page_insert(m, map->pm_obj_pdpt, (vm_object_offset_t)i); - vm_object_unlock(map->pm_obj_pdpt); - - /* - * Set the page directory entry for this page table. - */ - pdptp = pmap64_pdpt(map, vaddr); /* refetch under lock */ - - pmap_store_pte(pdptp, pa_to_pte(pa) - | INTEL_PTE_VALID - | INTEL_PTE_USER - | INTEL_PTE_WRITE); - - PMAP_UNLOCK(map); - - return KERN_SUCCESS; -} - - - -/* - * Routine: pmap_expand - * - * Expands a pmap to be able to map the specified virtual address. - * - * Allocates new virtual memory for the P0 or P1 portion of the - * pmap, then re-maps the physical pages that were in the old - * pmap to be in the new pmap. - * - * Must be called with the pmap system and the pmap unlocked, - * since these must be unlocked to use vm_allocate or vm_deallocate. - * Thus it must be called in a loop that checks whether the map - * has been expanded enough. - * (We won't loop forever, since page tables aren't shrunk.) - */ -kern_return_t -pmap_expand( - pmap_t map, - vm_map_offset_t vaddr, - __unused unsigned int options) -{ - pt_entry_t *pdp; - register vm_page_t m; - register pmap_paddr_t pa; - uint64_t i; - spl_t spl; - ppnum_t pn; - - /* - * if not the kernel map (while we are still compat kernel mode) - * and we are 64 bit, propagate expand upwards - */ - - if (cpu_64bit && (map != kernel_pmap)) { - spl = splhigh(); - while ((pdp = pmap64_pde(map, vaddr)) == PD_ENTRY_NULL) { - splx(spl); - pmap_expand_pdpt(map, vaddr, PMAP_EXPAND_OPTIONS_NONE); /* need room for another pde entry */ - spl = splhigh(); - } - splx(spl); - } - - /* - * Allocate a VM page for the pde entries. - */ - while ((m = vm_page_grab()) == VM_PAGE_NULL) - VM_PAGE_WAIT(); - - /* - * put the page into the pmap's obj list so it - * can be found later. - */ - pn = m->phys_page; - pa = i386_ptob(pn); - i = pdeidx(map, vaddr); - - /* - * Zero the page. - */ - pmap_zero_page(pn); - - vm_page_lockspin_queues(); - vm_page_wire(m); - vm_page_unlock_queues(); - - OSAddAtomic(1, &inuse_ptepages_count); - OSAddAtomic64(1, &alloc_ptepages_count); - PMAP_ZINFO_PALLOC(map, PAGE_SIZE); - - /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */ - vm_object_lock(map->pm_obj); - - PMAP_LOCK(map); - /* - * See if someone else expanded us first - */ - - if (pmap_pte(map, vaddr) != PT_ENTRY_NULL) { - PMAP_UNLOCK(map); - vm_object_unlock(map->pm_obj); - - VM_PAGE_FREE(m); - - OSAddAtomic(-1, &inuse_ptepages_count); - PMAP_ZINFO_PFREE(map, PAGE_SIZE); - return KERN_SUCCESS; - } - pmap_set_noencrypt(pn); - -#if 0 /* DEBUG */ - if (0 != vm_page_lookup(map->pm_obj, (vm_object_offset_t)i)) { - panic("pmap_expand: obj not empty, pmap 0x%x pm_obj 0x%x vaddr 0x%llx i 0x%llx\n", - map, map->pm_obj, vaddr, i); - } -#endif - vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i); - vm_object_unlock(map->pm_obj); - - /* - * refetch while locked - */ - - pdp = pmap_pde(map, vaddr); - - /* - * Set the page directory entry for this page table. - */ - pmap_store_pte(pdp, pa_to_pte(pa) - | INTEL_PTE_VALID - | INTEL_PTE_USER - | INTEL_PTE_WRITE); - - PMAP_UNLOCK(map); - - return KERN_SUCCESS; -} - - -/* - * pmap_sync_page_data_phys(ppnum_t pa) - * - * Invalidates all of the instruction cache on a physical page and - * pushes any dirty data from the data cache for the same physical page - * Not required in i386. - */ -void -pmap_sync_page_data_phys(__unused ppnum_t pa) -{ - return; -} - -/* - * pmap_sync_page_attributes_phys(ppnum_t pa) - * - * Write back and invalidate all cachelines on a physical page. - */ -void -pmap_sync_page_attributes_phys(ppnum_t pa) -{ - cache_flush_page_phys(pa); -} - - - -#ifdef CURRENTLY_UNUSED_AND_UNTESTED - -int collect_ref; -int collect_unref; - -/* - * Routine: pmap_collect - * Function: - * Garbage collects the physical map system for - * pages which are no longer used. - * Success need not be guaranteed -- that is, there - * may well be pages which are not referenced, but - * others may be collected. - * Usage: - * Called by the pageout daemon when pages are scarce. - */ -void -pmap_collect( - pmap_t p) -{ - register pt_entry_t *pdp, *ptp; - pt_entry_t *eptp; - int wired; - - if (p == PMAP_NULL) - return; - - if (p == kernel_pmap) - return; - - /* - * Garbage collect map. - */ - PMAP_LOCK(p); - - for (pdp = (pt_entry_t *)p->dirbase; - pdp < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)]; - pdp++) - { - if (*pdp & INTEL_PTE_VALID) { - if(*pdp & INTEL_PTE_REF) { - pmap_store_pte(pdp, *pdp & ~INTEL_PTE_REF); - collect_ref++; - } else { - collect_unref++; - ptp = pmap_pte(p, pdetova(pdp - (pt_entry_t *)p->dirbase)); - eptp = ptp + NPTEPG; - - /* - * If the pte page has any wired mappings, we cannot - * free it. - */ - wired = 0; - { - register pt_entry_t *ptep; - for (ptep = ptp; ptep < eptp; ptep++) { - if (iswired(*ptep)) { - wired = 1; - break; - } - } - } - if (!wired) { - /* - * Remove the virtual addresses mapped by this pte page. - */ - pmap_remove_range(p, - pdetova(pdp - (pt_entry_t *)p->dirbase), - ptp, - eptp); - - /* - * Invalidate the page directory pointer. - */ - pmap_store_pte(pdp, 0x0); - - PMAP_UNLOCK(p); - - /* - * And free the pte page itself. - */ - { - register vm_page_t m; - - vm_object_lock(p->pm_obj); - - m = vm_page_lookup(p->pm_obj,(vm_object_offset_t)(pdp - (pt_entry_t *)&p->dirbase[0])); - if (m == VM_PAGE_NULL) - panic("pmap_collect: pte page not in object"); - - vm_object_unlock(p->pm_obj); - - VM_PAGE_FREE(m); - - OSAddAtomic(-1, &inuse_ptepages_count); - PMAP_ZINFO_PFREE(map, PAGE_SIZE); - } - - PMAP_LOCK(p); - } - } - } - } - - PMAP_UPDATE_TLBS(p, 0x0, 0xFFFFFFFFFFFFF000ULL); - PMAP_UNLOCK(p); - return; - -} -#endif - - -void -pmap_copy_page(ppnum_t src, ppnum_t dst) -{ - bcopy_phys((addr64_t)i386_ptob(src), - (addr64_t)i386_ptob(dst), - PAGE_SIZE); -} - - -/* - * Routine: pmap_pageable - * Function: - * Make the specified pages (by pmap, offset) - * pageable (or not) as requested. - * - * A page which is not pageable may not take - * a fault; therefore, its page table entry - * must remain valid for the duration. - * - * This routine is merely advisory; pmap_enter - * will specify that these pages are to be wired - * down (or not) as appropriate. - */ -void -pmap_pageable( - __unused pmap_t pmap, - __unused vm_map_offset_t start_addr, - __unused vm_map_offset_t end_addr, - __unused boolean_t pageable) -{ -#ifdef lint - pmap++; start_addr++; end_addr++; pageable++; -#endif /* lint */ -} - -void -invalidate_icache(__unused vm_offset_t addr, - __unused unsigned cnt, - __unused int phys) -{ - return; -} -void -flush_dcache(__unused vm_offset_t addr, - __unused unsigned count, - __unused int phys) -{ - return; -} - -#if CONFIG_DTRACE -/* - * Constrain DTrace copyin/copyout actions - */ -extern kern_return_t dtrace_copyio_preflight(addr64_t); -extern kern_return_t dtrace_copyio_postflight(addr64_t); - -kern_return_t dtrace_copyio_preflight(__unused addr64_t va) -{ - thread_t thread = current_thread(); - - if (current_map() == kernel_map) - return KERN_FAILURE; - else if (thread->machine.specFlags & CopyIOActive) - return KERN_FAILURE; - else - return KERN_SUCCESS; -} - -kern_return_t dtrace_copyio_postflight(__unused addr64_t va) -{ - return KERN_SUCCESS; -} -#endif /* CONFIG_DTRACE */ - -#include -#if MACH_VM_DEBUG -#include - -int -pmap_list_resident_pages( - __unused pmap_t pmap, - __unused vm_offset_t *listp, - __unused int space) -{ - return 0; -} -#endif /* MACH_VM_DEBUG */ - - - -/* temporary workaround */ -boolean_t -coredumpok(__unused vm_map_t map, __unused vm_offset_t va) -{ -#if 0 - pt_entry_t *ptep; - - ptep = pmap_pte(map->pmap, va); - if (0 == ptep) - return FALSE; - return ((*ptep & (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)); -#else - return TRUE; -#endif -} - - -boolean_t -phys_page_exists( - ppnum_t pn) -{ - assert(pn != vm_page_fictitious_addr); - - if (!pmap_initialized) - return (TRUE); - - if (pn == vm_page_guard_addr) - return FALSE; - - if (!managed_page(ppn_to_pai(pn))) - return (FALSE); - - return TRUE; -} - -void -pmap_commpage32_init(vm_offset_t kernel_commpage, vm_offset_t user_commpage, int cnt) -{ - int i; - pt_entry_t *opte, *npte; - pt_entry_t pte; - spl_t s; - - for (i = 0; i < cnt; i++) { - s = splhigh(); - opte = pmap_pte(kernel_pmap, (vm_map_offset_t)kernel_commpage); - if (0 == opte) - panic("kernel_commpage"); - pte = *opte | INTEL_PTE_USER|INTEL_PTE_GLOBAL; - pte &= ~INTEL_PTE_WRITE; // ensure read only - npte = pmap_pte(kernel_pmap, (vm_map_offset_t)user_commpage); - if (0 == npte) - panic("user_commpage"); - pmap_store_pte(npte, pte); - splx(s); - kernel_commpage += INTEL_PGBYTES; - user_commpage += INTEL_PGBYTES; - } -} - - -#define PMAP_COMMPAGE64_CNT (_COMM_PAGE64_AREA_USED/PAGE_SIZE) -pt_entry_t pmap_commpage64_ptes[PMAP_COMMPAGE64_CNT]; - -void -pmap_commpage64_init(vm_offset_t kernel_commpage, __unused vm_map_offset_t user_commpage, int cnt) -{ - int i; - pt_entry_t *kptep; - - PMAP_LOCK(kernel_pmap); - - for (i = 0; i < cnt; i++) { - kptep = pmap_pte(kernel_pmap, (uint64_t)kernel_commpage + (i*PAGE_SIZE)); - if ((0 == kptep) || (0 == (*kptep & INTEL_PTE_VALID))) - panic("pmap_commpage64_init pte"); - pmap_commpage64_ptes[i] = ((*kptep & ~INTEL_PTE_WRITE) | INTEL_PTE_USER); - } - PMAP_UNLOCK(kernel_pmap); -} - - -static cpu_pmap_t cpu_pmap_master; - -struct cpu_pmap * -pmap_cpu_alloc(boolean_t is_boot_cpu) -{ - int ret; - int i; - cpu_pmap_t *cp; - vm_offset_t address; - vm_map_address_t mapaddr; - vm_map_entry_t entry; - pt_entry_t *pte; - - if (is_boot_cpu) { - cp = &cpu_pmap_master; - } else { - /* - * The per-cpu pmap data structure itself. - */ - ret = kmem_alloc(kernel_map, - (vm_offset_t *) &cp, sizeof(cpu_pmap_t)); - if (ret != KERN_SUCCESS) { - printf("pmap_cpu_alloc() failed ret=%d\n", ret); - return NULL; - } - bzero((void *)cp, sizeof(cpu_pmap_t)); - - /* - * The temporary windows used for copy/zero - see loose_ends.c - */ - ret = vm_map_find_space(kernel_map, - &mapaddr, PMAP_NWINDOWS*PAGE_SIZE, (vm_map_offset_t)0, 0, &entry); - if (ret != KERN_SUCCESS) { - printf("pmap_cpu_alloc() " - "vm_map_find_space ret=%d\n", ret); - pmap_cpu_free(cp); - return NULL; - } - address = (vm_offset_t)mapaddr; - - for (i = 0; i < PMAP_NWINDOWS; i++, address += PAGE_SIZE) { - spl_t s; - s = splhigh(); - while ((pte = pmap_pte(kernel_pmap, (vm_map_offset_t)address)) == 0) - pmap_expand(kernel_pmap, (vm_map_offset_t)address, PMAP_EXPAND_OPTIONS_NONE); - * (int *) pte = 0; - cp->mapwindow[i].prv_CADDR = (caddr_t) address; - cp->mapwindow[i].prv_CMAP = pte; - splx(s); - } - vm_map_unlock(kernel_map); - } - - cp->pdpt_window_index = PMAP_PDPT_FIRST_WINDOW; - cp->pde_window_index = PMAP_PDE_FIRST_WINDOW; - cp->pte_window_index = PMAP_PTE_FIRST_WINDOW; - - return cp; -} - -void -pmap_cpu_free(struct cpu_pmap *cp) -{ - if (cp != NULL && cp != &cpu_pmap_master) { - kfree((void *) cp, sizeof(cpu_pmap_t)); - } -} - -mapwindow_t * -pmap_get_mapwindow(pt_entry_t pentry) -{ - mapwindow_t *mp; - int i; - - assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); - /* fold in cache attributes for this physical page */ - pentry |= pmap_get_cache_attributes(i386_btop(pte_to_pa(pentry))); - /* - * Note: 0th map reserved for pmap_pte() - */ - for (i = PMAP_NWINDOWS_FIRSTFREE; i < PMAP_NWINDOWS; i++) { - mp = ¤t_cpu_datap()->cpu_pmap->mapwindow[i]; - - if (*mp->prv_CMAP == 0) { - pmap_store_pte(mp->prv_CMAP, pentry); - - invlpg((uintptr_t)mp->prv_CADDR); - - return (mp); - } - } - panic("pmap_get_mapwindow: no windows available"); - - return NULL; -} - - -void -pmap_put_mapwindow(mapwindow_t *mp) -{ - pmap_store_pte(mp->prv_CMAP, 0); -} - -void -pmap_switch(pmap_t tpmap) -{ - spl_t s; - - s = splhigh(); /* Make sure interruptions are disabled */ - - set_dirbase(tpmap, current_thread()); - - splx(s); -} - - -/* - * disable no-execute capability on - * the specified pmap - */ -void pmap_disable_NX(pmap_t pmap) { - - pmap->nx_enabled = 0; -} - -void -pt_fake_zone_init(int zone_index) -{ - pt_fake_zone_index = zone_index; -} - -void -pt_fake_zone_info(int *count, - vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, vm_size_t *alloc_size, - uint64_t *sum_size, int *collectable, int *exhaustable, int *caller_acct) -{ - *count = inuse_ptepages_count; - *cur_size = PAGE_SIZE * inuse_ptepages_count; - *max_size = PAGE_SIZE * (inuse_ptepages_count + vm_page_inactive_count + vm_page_active_count + vm_page_free_count); - *elem_size = PAGE_SIZE; - *alloc_size = PAGE_SIZE; - *sum_size = alloc_ptepages_count * PAGE_SIZE; - - *collectable = 1; - *exhaustable = 0; - *caller_acct = 1; -} - -vm_offset_t pmap_cpu_high_map_vaddr(int cpu, enum high_cpu_types e) -{ - enum high_fixed_addresses a; - a = e + HIGH_CPU_END * cpu; - return pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN + a); -} - -vm_offset_t pmap_high_map_vaddr(enum high_cpu_types e) -{ - return pmap_cpu_high_map_vaddr(cpu_number(), e); -} - -vm_offset_t pmap_high_map(pt_entry_t pte, enum high_cpu_types e) -{ - enum high_fixed_addresses a; - vm_offset_t vaddr; - - a = e + HIGH_CPU_END * cpu_number(); - vaddr = (vm_offset_t)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN + a); - pmap_store_pte(pte_unique_base + a, pte); - - /* TLB flush for this page for this cpu */ - invlpg((uintptr_t)vaddr); - - return vaddr; -} - -static inline void -pmap_cpuset_NMIPI(cpu_set cpu_mask) { - unsigned int cpu, cpu_bit; - uint64_t deadline; - - for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { - if (cpu_mask & cpu_bit) - cpu_NMI_interrupt(cpu); - } - deadline = mach_absolute_time() + (((uint64_t)LockTimeOut) * 3); - while (mach_absolute_time() < deadline) - cpu_pause(); -} - -/* - * Called with pmap locked, we: - * - scan through per-cpu data to see which other cpus need to flush - * - send an IPI to each non-idle cpu to be flushed - * - wait for all to signal back that they are inactive or we see that - * they are in an interrupt handler or at a safe point - * - flush the local tlb is active for this pmap - * - return ... the caller will unlock the pmap - */ -void -pmap_flush_tlbs(pmap_t pmap, vm_map_offset_t startv, vm_map_offset_t endv) -{ - unsigned int cpu; - unsigned int cpu_bit; - cpu_set cpus_to_signal; - unsigned int my_cpu = cpu_number(); - pmap_paddr_t pmap_cr3 = pmap->pm_cr3; - boolean_t flush_self = FALSE; - uint64_t deadline; - - assert((processor_avail_count < 2) || - (ml_get_interrupts_enabled() && get_preemption_level() != 0)); - - /* - * Scan other cpus for matching active or task CR3. - * For idle cpus (with no active map) we mark them invalid but - * don't signal -- they'll check as they go busy. - * Note: for the kernel pmap we look for 64-bit shared address maps. - */ - cpus_to_signal = 0; - for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { - if (!cpu_datap(cpu)->cpu_running) - continue; - if ((cpu_datap(cpu)->cpu_task_cr3 == pmap_cr3) || - (CPU_GET_ACTIVE_CR3(cpu) == pmap_cr3) || - (pmap->pm_shared) || - ((pmap == kernel_pmap) && - (!CPU_CR3_IS_ACTIVE(cpu) || - cpu_datap(cpu)->cpu_task_map == TASK_MAP_64BIT_SHARED))) { - if (cpu == my_cpu) { - flush_self = TRUE; - continue; - } - cpu_datap(cpu)->cpu_tlb_invalid = TRUE; - __asm__ volatile("mfence"); - - if (CPU_CR3_IS_ACTIVE(cpu)) { - cpus_to_signal |= cpu_bit; - i386_signal_cpu(cpu, MP_TLB_FLUSH, ASYNC); - } - } - } - - PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_START, - (uintptr_t) pmap, cpus_to_signal, flush_self, startv, 0); - - if (cpus_to_signal) { - cpu_set cpus_to_respond = cpus_to_signal; - - deadline = mach_absolute_time() + LockTimeOut; - /* - * Wait for those other cpus to acknowledge - */ - while (cpus_to_respond != 0) { - long orig_acks = 0; - - for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { - if ((cpus_to_respond & cpu_bit) != 0) { - if (!cpu_datap(cpu)->cpu_running || - cpu_datap(cpu)->cpu_tlb_invalid == FALSE || - !CPU_CR3_IS_ACTIVE(cpu)) { - cpus_to_respond &= ~cpu_bit; - } - cpu_pause(); - } - if (cpus_to_respond == 0) - break; - } - - if (cpus_to_respond && (mach_absolute_time() > deadline)) { - if (machine_timeout_suspended()) - continue; - pmap_tlb_flush_timeout = TRUE; - orig_acks = NMIPI_acks; - pmap_cpuset_NMIPI(cpus_to_respond); - - panic("TLB invalidation IPI timeout: " - "CPU(s) failed to respond to interrupts, unresponsive CPU bitmap: 0x%lx, NMIPI acks: orig: 0x%lx, now: 0x%lx", - cpus_to_respond, orig_acks, NMIPI_acks); - } - } - } - /* - * Flush local tlb if required. - * We need this flush even if the pmap being changed - * is the user map... in case we do a copyin/out - * before returning to user mode. - */ - if (flush_self) - flush_tlb(); - - if ((pmap == kernel_pmap) && (flush_self != TRUE)) { - panic("pmap_flush_tlbs: pmap == kernel_pmap && flush_self != TRUE; kernel CR3: 0x%llX, CPU active CR3: 0x%llX, CPU Task Map: %d", kernel_pmap->pm_cr3, current_cpu_datap()->cpu_active_cr3, current_cpu_datap()->cpu_task_map); - } - - PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_END, - (uintptr_t) pmap, cpus_to_signal, startv, endv, 0); -} - -void -process_pmap_updates(void) -{ - assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); - - flush_tlb(); - - current_cpu_datap()->cpu_tlb_invalid = FALSE; - __asm__ volatile("mfence"); -} - -void -pmap_update_interrupt(void) -{ - PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_START, - 0, 0, 0, 0, 0); - - process_pmap_updates(); - - PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_END, - 0, 0, 0, 0, 0); -} -#ifdef PMAP_DEBUG -void -pmap_dump(pmap_t p) -{ - int i; - - kprintf("pmap 0x%x\n",p); - - kprintf(" pm_cr3 0x%llx\n",p->pm_cr3); - kprintf(" pm_pml4 0x%x\n",p->pm_pml4); - kprintf(" pm_pdpt 0x%x\n",p->pm_pdpt); - - kprintf(" pml4[0] 0x%llx\n",*p->pm_pml4); - for (i=0;i<8;i++) - kprintf(" pdpt[%d] 0x%llx\n",i, p->pm_pdpt[i]); -} - -void pmap_dump_wrap(void) -{ - pmap_dump(current_cpu_datap()->cpu_active_thread->task->map->pmap); -} - -void -dump_4GB_pdpt(pmap_t p) -{ - int spl; - pdpt_entry_t *user_pdptp; - pdpt_entry_t *kern_pdptp; - pdpt_entry_t *pml4p; - - spl = splhigh(); - while ((user_pdptp = pmap64_pdpt(p, 0x0)) == PDPT_ENTRY_NULL) { - splx(spl); - pmap_expand_pml4(p, 0x0, PMAP_EXPAND_OPTIONS_NONE); - spl = splhigh(); - } - kern_pdptp = kernel_pmap->pm_pdpt; - if (kern_pdptp == NULL) - panic("kern_pdptp == NULL"); - kprintf("dump_4GB_pdpt(%p)\n" - "kern_pdptp=%p (phys=0x%016llx)\n" - "\t 0x%08x: 0x%016llx\n" - "\t 0x%08x: 0x%016llx\n" - "\t 0x%08x: 0x%016llx\n" - "\t 0x%08x: 0x%016llx\n" - "\t 0x%08x: 0x%016llx\n" - "user_pdptp=%p (phys=0x%016llx)\n" - "\t 0x%08x: 0x%016llx\n" - "\t 0x%08x: 0x%016llx\n" - "\t 0x%08x: 0x%016llx\n" - "\t 0x%08x: 0x%016llx\n" - "\t 0x%08x: 0x%016llx\n", - p, kern_pdptp, kvtophys(kern_pdptp), - kern_pdptp+0, *(kern_pdptp+0), - kern_pdptp+1, *(kern_pdptp+1), - kern_pdptp+2, *(kern_pdptp+2), - kern_pdptp+3, *(kern_pdptp+3), - kern_pdptp+4, *(kern_pdptp+4), - user_pdptp, kvtophys(user_pdptp), - user_pdptp+0, *(user_pdptp+0), - user_pdptp+1, *(user_pdptp+1), - user_pdptp+2, *(user_pdptp+2), - user_pdptp+3, *(user_pdptp+3), - user_pdptp+4, *(user_pdptp+4)); - kprintf("user pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n", - p->pm_cr3, p->pm_hold, p->pm_pml4); - pml4p = (pdpt_entry_t *)p->pm_hold; - if (pml4p == NULL) - panic("user pml4p == NULL"); - kprintf("\t 0x%08x: 0x%016llx\n" - "\t 0x%08x: 0x%016llx\n", - pml4p+0, *(pml4p), - pml4p+KERNEL_UBER_PML4_INDEX, *(pml4p+KERNEL_UBER_PML4_INDEX)); - kprintf("kern pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n", - kernel_pmap->pm_cr3, kernel_pmap->pm_hold, kernel_pmap->pm_pml4); - pml4p = (pdpt_entry_t *)kernel_pmap->pm_hold; - if (pml4p == NULL) - panic("kern pml4p == NULL"); - kprintf("\t 0x%08x: 0x%016llx\n" - "\t 0x%08x: 0x%016llx\n", - pml4p+0, *(pml4p), - pml4p+511, *(pml4p+511)); - splx(spl); -} - -void dump_4GB_pdpt_thread(thread_t tp) -{ - dump_4GB_pdpt(tp->map->pmap); -} - - -#endif diff --git a/osfmk/i386/pmap.h b/osfmk/i386/pmap.h index 5cc91b6e2..5e786acbe 100644 --- a/osfmk/i386/pmap.h +++ b/osfmk/i386/pmap.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -114,9 +114,6 @@ #define PTESHIFT 12ULL -#ifdef __i386__ -#define INITPT_SEG_BASE 0x100000 -#endif #ifdef __x86_64__ #define LOW_4GB_MASK ((vm_offset_t)0x00000000FFFFFFFFUL) @@ -205,32 +202,11 @@ typedef uint64_t pmap_paddr_t; static inline void pmap_store_pte(pt_entry_t *entryp, pt_entry_t value) { -#ifdef __i386__ - /* - * Load the new value into %ecx:%ebx - * Load the old value into %edx:%eax - * Compare-exchange-8bytes at address entryp (loaded in %edi) - * If the compare succeeds, the new value will have been stored. - * Otherwise, the old value changed and reloaded, so try again. - */ - __asm__ volatile( - " movl (%0), %%eax \n\t" - " movl 4(%0), %%edx \n\t" - "1: \n\t" - " cmpxchg8b (%0) \n\t" - " jnz 1b" - : - : "D" (entryp), - "b" ((uint32_t)value), - "c" ((uint32_t)(value >> 32)) - : "eax", "edx", "memory"); -#else /* * In the 32-bit kernel a compare-and-exchange loop was * required to provide atomicity. For K64, life is easier: */ *entryp = value; -#endif } /* in 64 bit spaces, the number of each type of page in the page tables */ @@ -239,22 +215,11 @@ pmap_store_pte(pt_entry_t *entryp, pt_entry_t value) #define NPDEPGS (NPDPTPGS * (PAGE_SIZE/(sizeof (pd_entry_t)))) #define NPTEPGS (NPDEPGS * (PAGE_SIZE/(sizeof (pt_entry_t)))) -#ifdef __i386__ -/* - * The 64-bit kernel is remapped in uber-space which is at the base - * the highest 4th-level directory (KERNEL_UBER_PML4_INDEX). That is, - * 512GB from the top of virtual space (or zero). - */ -#define KERNEL_UBER_PML4_INDEX 511 -#define KERNEL_UBER_BASE (0ULL - NBPML4) -#define KERNEL_UBER_BASE_HI32 ((uint32_t)(KERNEL_UBER_BASE >> 32)) -#else #define KERNEL_PML4_INDEX 511 #define KERNEL_KEXTS_INDEX 510 /* Home of KEXTs - the basement */ #define KERNEL_PHYSMAP_PML4_INDEX 509 /* virtual to physical map */ #define KERNEL_BASE (0ULL - NBPML4) #define KERNEL_BASEMENT (KERNEL_BASE - NBPML4) -#endif #define VM_WIMG_COPYBACK VM_MEM_COHERENT #define VM_WIMG_COPYBACKLW VM_WIMG_COPYBACK @@ -269,10 +234,6 @@ pmap_store_pte(pt_entry_t *entryp, pt_entry_t value) /* * Pte related macros */ -#ifdef __i386__ -#define VADDR(pdi, pti) ((vm_offset_t)(((pdi)<dirbase[(vm_offset_t)(v) >> PDESHIFT]))*/ - -#define HIGH_MEM_BASE ((uint32_t)( -NBPDE) ) /* shared gdt etc seg addr */ /* XXX64 ?? */ -#define pmap_index_to_virt(x) (HIGH_MEM_BASE | ((unsigned)(x) << PAGE_SHIFT)) -#endif /* * Convert address offset to page descriptor index @@ -395,6 +308,8 @@ enum high_fixed_addresses { /* This is conservative, but suffices */ #define INTEL_PTE_RSVD ((1ULL << 10) | (1ULL << 11) | (0x1FFULL << 54)) +#define INTEL_PTE_COMPRESSED INTEL_PTE_REF /* marker, for invalid PTE only */ + #define pa_to_pte(a) ((a) & INTEL_PTE_PFN) /* XXX */ #define pte_to_pa(p) ((p) & INTEL_PTE_PFN) /* XXX */ #define pte_increment_pa(p) ((p) += INTEL_OFFMASK+1) @@ -420,18 +335,9 @@ enum high_fixed_addresses { * and directories. */ -#ifdef __i386__ -extern pt_entry_t PTmap[], APTmap[], Upte; -extern pd_entry_t PTD[], APTD[], PTDpde[], APTDpde[], Upde; -extern pmap_paddr_t lo_kernel_cr3; -extern pdpt_entry_t *IdlePDPT64; -extern pdpt_entry_t IdlePDPT[]; -extern pml4_entry_t IdlePML4[]; -#else extern pt_entry_t *PTmap; extern pdpt_entry_t *IdlePDPT; extern pml4_entry_t *IdlePML4; -#endif extern boolean_t no_shared_cr3; extern addr64_t kernel64_cr3; extern pd_entry_t *IdlePTD; /* physical addr of "Idle" state PTD */ @@ -441,17 +347,6 @@ extern uint64_t pmap_pv_hashlist_cnts; extern uint32_t pmap_pv_hashlist_max; extern uint32_t pmap_kernel_text_ps; -#ifdef __i386__ -/* - * ** i386 ** - * virtual address to page table entry and - * to physical address. Likewise for alternate address space. - * Note: these work recursively, thus vtopte of a pte will give - * the corresponding pde that in turn maps it. - */ - -#define vtopte(va) (PTmap + i386_btop((vm_offset_t)va)) -#endif #ifdef __x86_64__ @@ -506,10 +401,6 @@ struct pmap { pmap_paddr_t pm_cr3; /* physical addr */ boolean_t pm_shared; pd_entry_t *dirbase; /* page directory pointer */ -#ifdef __i386__ - pmap_paddr_t pdirbase; /* phys. address of dirbase */ - vm_offset_t pm_hold; /* true pdpt zalloc addr */ -#endif vm_object_t pm_obj; /* object to hold pde's */ task_map_t pm_task_map; pdpt_entry_t *pm_pdpt; /* KVA of 3rd level page */ @@ -556,9 +447,10 @@ extern void pmap_put_mapwindow(mapwindow_t *map); #endif typedef struct pmap_memory_regions { - ppnum_t base; - ppnum_t end; - ppnum_t alloc; + ppnum_t base; /* first page of this region */ + ppnum_t alloc_up; /* pages below this one have been "stolen" */ + ppnum_t alloc_down; /* pages above this one have been "stolen" */ + ppnum_t end; /* last page of this region */ uint32_t type; uint64_t attribute; } pmap_memory_region_t; @@ -576,7 +468,6 @@ set_dirbase(pmap_t tpmap, __unused thread_t thread) { int ccpu = cpu_number(); cpu_datap(ccpu)->cpu_task_cr3 = tpmap->pm_cr3; cpu_datap(ccpu)->cpu_task_map = tpmap->pm_task_map; -#ifndef __i386__ /* * Switch cr3 if necessary * - unless running with no_shared_cr3 debugging mode @@ -594,7 +485,6 @@ set_dirbase(pmap_t tpmap, __unused thread_t thread) { if (get_cr3_base() != cpu_datap(ccpu)->cpu_kernel_cr3) set_cr3_raw(cpu_datap(ccpu)->cpu_kernel_cr3); } -#endif } /* @@ -658,17 +548,6 @@ extern int pmap_list_resident_pages( vm_offset_t *listp, int space); extern void x86_filter_TLB_coherency_interrupts(boolean_t); -#ifdef __i386__ -extern void pmap_commpage32_init( - vm_offset_t kernel, - vm_offset_t user, - int count); -extern void pmap_commpage64_init( - vm_offset_t kernel, - vm_map_offset_t user, - int count); - -#endif /* * Get cache attributes (as pagetable bits) for the specified phys page */ @@ -695,16 +574,6 @@ extern ppnum_t pmap_find_phys(pmap_t map, addr64_t va); extern void pmap_cpu_init(void); extern void pmap_disable_NX(pmap_t pmap); -#ifdef __i386__ -extern void pmap_set_4GB_pagezero(pmap_t pmap); -extern void pmap_clear_4GB_pagezero(pmap_t pmap); -extern void pmap_load_kernel_cr3(void); -extern vm_offset_t pmap_cpu_high_map_vaddr(int, enum high_cpu_types); -extern vm_offset_t pmap_high_map_vaddr(enum high_cpu_types); -extern vm_offset_t pmap_high_map(pt_entry_t, enum high_cpu_types); -extern vm_offset_t pmap_cpu_high_shared_remap(int, enum high_cpu_types, vm_offset_t, int); -extern vm_offset_t pmap_high_shared_remap(enum high_fixed_addresses, vm_offset_t, int); -#endif extern void pt_fake_zone_init(int); extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *, @@ -726,57 +595,13 @@ extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__pr set_dirbase(tpmap, thread); \ } -#ifdef __i386__ -#define PMAP_DEACTIVATE_MAP(map, thread) \ - if (vm_map_pmap(map)->pm_task_map == TASK_MAP_64BIT_SHARED) \ - pmap_load_kernel_cr3(); -#elif defined(__x86_64__) +#if defined(__x86_64__) #define PMAP_DEACTIVATE_MAP(map, thread) \ pmap_assert(pmap_pcid_ncpus ? (pcid_for_pmap_cpu_tuple(map->pmap, cpu_number()) == (get_cr3_raw() & 0xFFF)) : TRUE); #else #define PMAP_DEACTIVATE_MAP(map, thread) #endif -#if defined(__i386__) - -#define PMAP_SWITCH_CONTEXT(old_th, new_th, my_cpu) { \ - spl_t spl; \ - pt_entry_t *kpdp; \ - pt_entry_t *updp; \ - int i; \ - int need_flush; \ - \ - need_flush = 0; \ - spl = splhigh(); \ - if ((old_th->map != new_th->map) || (new_th->task != old_th->task)) { \ - PMAP_DEACTIVATE_MAP(old_th->map, old_th); \ - PMAP_ACTIVATE_MAP(new_th->map, new_th); \ - } \ - kpdp = current_cpu_datap()->cpu_copywindow_pdp; \ - for (i = 0; i < NCOPY_WINDOWS; i++) { \ - if (new_th->machine.copy_window[i].user_base != (user_addr_t)-1) { \ - updp = pmap_pde(new_th->map->pmap, \ - new_th->machine.copy_window[i].user_base);\ - pmap_store_pte(kpdp, updp ? *updp : 0); \ - } \ - kpdp++; \ - } \ - splx(spl); \ - if (new_th->machine.copyio_state == WINDOWS_OPENED) \ - need_flush = 1; \ - else \ - new_th->machine.copyio_state = WINDOWS_DIRTY; \ - if (new_th->machine.physwindow_pte) { \ - pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep), \ - new_th->machine.physwindow_pte); \ - if (need_flush == 0) \ - invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base);\ - } \ - if (need_flush) \ - flush_tlb(); \ -} - -#else /* __x86_64__ */ #define PMAP_SWITCH_CONTEXT(old_th, new_th, my_cpu) { \ \ pmap_assert(ml_get_interrupts_enabled() == FALSE); \ @@ -785,7 +610,6 @@ extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__pr PMAP_ACTIVATE_MAP(new_th->map, new_th); \ } \ } -#endif /* __i386__ */ #if NCOPY_WINDOWS > 0 #define PMAP_SWITCH_USER(th, new_map, my_cpu) { \ @@ -839,30 +663,11 @@ extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__pr * but will queue the update request for when the cpu * becomes active. */ -#if defined(__x86_64__) #define MARK_CPU_IDLE(my_cpu) { \ assert(ml_get_interrupts_enabled() == FALSE); \ CPU_CR3_MARK_INACTIVE(); \ - __asm__ volatile("mfence"); \ -} -#else /* __i386__ native */ -#define MARK_CPU_IDLE(my_cpu) { \ - assert(ml_get_interrupts_enabled() == FALSE); \ - /* \ - * Mark this cpu idle, and remove it from the active set, \ - * since it is not actively using any pmap. Signal_cpus \ - * will notice that it is idle, and avoid signaling it, \ - * but will queue the update request for when the cpu \ - * becomes active. \ - */ \ - if (!cpu_mode_is64bit() || no_shared_cr3) \ - process_pmap_updates(); \ - else \ - pmap_load_kernel_cr3(); \ - CPU_CR3_MARK_INACTIVE(); \ - __asm__ volatile("mfence"); \ + mfence(); \ } -#endif /* __i386__ */ #define MARK_CPU_ACTIVE(my_cpu) { \ assert(ml_get_interrupts_enabled() == FALSE); \ @@ -877,7 +682,7 @@ extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__pr * interrupt if this happens. \ */ \ CPU_CR3_MARK_ACTIVE(); \ - __asm__ volatile("mfence"); \ + mfence(); \ \ if (current_cpu_datap()->cpu_tlb_invalid) \ process_pmap_updates(); \ diff --git a/osfmk/i386/pmap_common.c b/osfmk/i386/pmap_common.c index abe1e24a3..4bf41195e 100644 --- a/osfmk/i386/pmap_common.c +++ b/osfmk/i386/pmap_common.c @@ -394,7 +394,7 @@ pmap_set_modify(ppnum_t pn) void pmap_clear_modify(ppnum_t pn) { - phys_attribute_clear(pn, PHYS_MODIFIED); + phys_attribute_clear(pn, PHYS_MODIFIED, 0, NULL); } /* @@ -422,7 +422,7 @@ pmap_is_modified(ppnum_t pn) void pmap_clear_reference(ppnum_t pn) { - phys_attribute_clear(pn, PHYS_REFERENCED); + phys_attribute_clear(pn, PHYS_REFERENCED, 0, NULL); } void @@ -468,6 +468,18 @@ pmap_get_refmod(ppnum_t pn) return (retval); } + +void +pmap_clear_refmod_options(ppnum_t pn, unsigned int mask, unsigned int options, void *arg) +{ + unsigned int x86Mask; + + x86Mask = ( ((mask & VM_MEM_MODIFIED)? PHYS_MODIFIED : 0) + | ((mask & VM_MEM_REFERENCED)? PHYS_REFERENCED : 0)); + + phys_attribute_clear(pn, x86Mask, options, arg); +} + /* * pmap_clear_refmod(phys, mask) * clears the referenced and modified bits as specified by the mask @@ -480,12 +492,19 @@ pmap_clear_refmod(ppnum_t pn, unsigned int mask) x86Mask = ( ((mask & VM_MEM_MODIFIED)? PHYS_MODIFIED : 0) | ((mask & VM_MEM_REFERENCED)? PHYS_REFERENCED : 0)); - phys_attribute_clear(pn, x86Mask); + + phys_attribute_clear(pn, x86Mask, 0, NULL); +} + +unsigned int +pmap_disconnect(ppnum_t pa) +{ + return (pmap_disconnect_options(pa, 0, NULL)); } /* * Routine: - * pmap_disconnect + * pmap_disconnect_options * * Function: * Disconnect all mappings for this page and return reference and change status @@ -493,14 +512,14 @@ pmap_clear_refmod(ppnum_t pn, unsigned int mask) * */ unsigned int -pmap_disconnect(ppnum_t pa) +pmap_disconnect_options(ppnum_t pa, unsigned int options, void *arg) { unsigned refmod, vmrefmod = 0; - pmap_page_protect(pa, 0); /* disconnect the page */ + pmap_page_protect_options(pa, 0, options, arg); /* disconnect the page */ pmap_assert(pa != vm_page_fictitious_addr); - if ((pa == vm_page_guard_addr) || !IS_MANAGED_PAGE(pa)) + if ((pa == vm_page_guard_addr) || !IS_MANAGED_PAGE(pa) || (options & PMAP_OPTIONS_NOREFMOD)) return 0; refmod = pmap_phys_attributes[pa] & (PHYS_MODIFIED | PHYS_REFERENCED); diff --git a/osfmk/i386/pmap_internal.h b/osfmk/i386/pmap_internal.h index 22bf95cec..6ca95d7b1 100644 --- a/osfmk/i386/pmap_internal.h +++ b/osfmk/i386/pmap_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -47,8 +47,15 @@ simple_unlock(&(pmap)->lock); \ } -#define PMAP_UPDATE_TLBS(pmap, s, e) \ - pmap_flush_tlbs(pmap, s, e) +#define PMAP_UPDATE_TLBS(pmap, s, e) \ + pmap_flush_tlbs(pmap, s, e, 0, NULL) + + +#define PMAP_DELAY_TLB_FLUSH 0x01 + +#define PMAP_UPDATE_TLBS_DELAYED(pmap, s, e, c) \ + pmap_flush_tlbs(pmap, s, e, PMAP_DELAY_TLB_FLUSH, c) + #define iswired(pte) ((pte) & INTEL_PTE_WIRED) @@ -85,16 +92,13 @@ void pmap_set_reference( boolean_t phys_page_exists( ppnum_t pn); -void pmap_flush_tlbs(pmap_t, vm_map_offset_t, vm_map_offset_t); +void +pmap_flush_tlbs(pmap_t, vm_map_offset_t, vm_map_offset_t, int, pmap_flush_context *); void pmap_update_cache_attributes_locked(ppnum_t, unsigned); -#if CONFIG_YONAH -extern boolean_t cpu_64bit; -#else extern const boolean_t cpu_64bit; -#endif /* * Private data structures. @@ -367,6 +371,10 @@ static inline void pmap_pv_throttle(__unused pmap_t p) { #define IS_MANAGED_PAGE(x) \ ((unsigned int)(x) <= last_managed_page && \ (pmap_phys_attributes[x] & PHYS_MANAGED)) +#define IS_INTERNAL_PAGE(x) \ + (IS_MANAGED_PAGE(x) && (pmap_phys_attributes[x] & PHYS_INTERNAL)) +#define IS_REUSABLE_PAGE(x) \ + (IS_MANAGED_PAGE(x) && (pmap_phys_attributes[x] & PHYS_REUSABLE)) /* * Physical page attributes. Copy bits from PTE definition. @@ -378,6 +386,8 @@ static inline void pmap_pv_throttle(__unused pmap_t p) { #define PHYS_NCACHE INTEL_PTE_NCACHE #define PHYS_PTA INTEL_PTE_PTA #define PHYS_CACHEABILITY_MASK (INTEL_PTE_PTA | INTEL_PTE_NCACHE) +#define PHYS_INTERNAL INTEL_PTE_WTHRU /* page from internal object */ +#define PHYS_REUSABLE INTEL_PTE_WRITE /* page is "reusable" */ extern const boolean_t pmap_disable_kheap_nx; extern const boolean_t pmap_disable_kstack_nx; @@ -750,7 +760,7 @@ pmap_pv_remove_retry: if (pac == PMAP_ACTION_IGNORE) goto pmap_pv_remove_exit; else if (pac == PMAP_ACTION_ASSERT) - panic("pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx, %p, %p): null pv_list!", pmap, vaddr, ppn, *pte, ppnp, pte); + panic("Possible memory corruption: pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx, %p, %p): null pv_list!", pmap, vaddr, ppn, *pte, ppnp, pte); else if (pac == PMAP_ACTION_RETRY_RELOCK) { LOCK_PVH(ppn_to_pai(*ppnp)); pmap_phys_attributes[ppn_to_pai(*ppnp)] |= (PHYS_MODIFIED | PHYS_REFERENCED); @@ -780,7 +790,7 @@ pmap_pv_remove_retry: remque(&pvh_e->qlink); pprevh = pvhash(pvhash_idx); if (PV_HASHED_ENTRY_NULL == *pprevh) { - panic("pmap_pv_remove(%p,0x%llx,0x%x): " + panic("Possible memory corruption: pmap_pv_remove(%p,0x%llx,0x%x): " "empty hash, removing rooted", pmap, vaddr, ppn); } @@ -803,7 +813,7 @@ pmap_pv_remove_retry: LOCK_PV_HASH(pvhash_idx); pprevh = pvhash(pvhash_idx); if (PV_HASHED_ENTRY_NULL == *pprevh) { - panic("pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx, %p): empty hash", + panic("Possible memory corruption: pmap_pv_remove(%p,0x%llx,0x%x, 0x%llx, %p): empty hash", pmap, vaddr, ppn, *pte, pte); } pvh_e = *pprevh; @@ -823,7 +833,7 @@ pmap_pv_remove_retry: pmap_pagetable_corruption_action_t pac = pmap_classify_pagetable_corruption(pmap, vaddr, ppnp, pte, ROOT_PRESENT); if (pac == PMAP_ACTION_ASSERT) - panic("pmap_pv_remove(%p, 0x%llx, 0x%x, 0x%llx, %p, %p): pv not on hash, head: %p, 0x%llx", pmap, vaddr, ppn, *pte, ppnp, pte, pv_h->pmap, pv_h->va); + panic("Possible memory corruption: pmap_pv_remove(%p, 0x%llx, 0x%x, 0x%llx, %p, %p): pv not on hash, head: %p, 0x%llx", pmap, vaddr, ppn, *pte, ppnp, pte, pv_h->pmap, pv_h->va); else { UNLOCK_PV_HASH(pvhash_idx); if (pac == PMAP_ACTION_RETRY_RELOCK) { @@ -906,7 +916,9 @@ int phys_attribute_test( int bits); void phys_attribute_clear( ppnum_t phys, - int bits); + int bits, + unsigned int options, + void *arg); //#define PCID_DEBUG 1 #if PCID_DEBUG @@ -929,26 +941,6 @@ pmap_cmpx_pte(pt_entry_t *entryp, pt_entry_t old, pt_entry_t new) { boolean_t ret; -#ifdef __i386__ - /* - * Load the old value into %edx:%eax - * Load the new value into %ecx:%ebx - * Compare-exchange-8bytes at address entryp (loaded in %edi) - * If the compare succeeds, the new value is stored, return TRUE. - * Otherwise, no swap is made, return FALSE. - */ - asm volatile( - " lock; cmpxchg8b (%1) \n\t" - " setz %%al \n\t" - " movzbl %%al,%0" - : "=a" (ret) - : "D" (entryp), - "a" ((uint32_t)old), - "d" ((uint32_t)(old >> 32)), - "b" ((uint32_t)new), - "c" ((uint32_t)(new >> 32)) - : "memory"); -#else /* * Load the old value into %rax * Load the new value into another register @@ -965,7 +957,6 @@ pmap_cmpx_pte(pt_entry_t *entryp, pt_entry_t old, pt_entry_t new) "r" (new), "r" (entryp) : "memory"); -#endif return ret; } diff --git a/osfmk/i386/pmap_x86_common.c b/osfmk/i386/pmap_x86_common.c index f400bc280..65291caf3 100644 --- a/osfmk/i386/pmap_x86_common.c +++ b/osfmk/i386/pmap_x86_common.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -36,6 +36,20 @@ void pmap_remove_range( pt_entry_t *spte, pt_entry_t *epte); +void pmap_remove_range_options( + pmap_t pmap, + vm_map_offset_t va, + pt_entry_t *spte, + pt_entry_t *epte, + int options); + +void pmap_reusable_range( + pmap_t pmap, + vm_map_offset_t va, + pt_entry_t *spte, + pt_entry_t *epte, + boolean_t reusable); + uint32_t pmap_update_clear_pte_count; /* @@ -379,7 +393,7 @@ void x86_filter_TLB_coherency_interrupts(boolean_t dofilter) { CPU_CR3_MARK_INACTIVE(); } else { CPU_CR3_MARK_ACTIVE(); - __asm__ volatile("mfence"); + mfence(); if (current_cpu_datap()->cpu_tlb_invalid) process_pmap_updates(); } @@ -409,9 +423,10 @@ pmap_enter( unsigned int flags, boolean_t wired) { - (void) pmap_enter_options(pmap, vaddr, pn, prot, fault_type, flags, wired, PMAP_EXPAND_OPTIONS_NONE); + (void) pmap_enter_options(pmap, vaddr, pn, prot, fault_type, flags, wired, PMAP_EXPAND_OPTIONS_NONE, NULL); } + kern_return_t pmap_enter_options( register pmap_t pmap, @@ -421,7 +436,8 @@ pmap_enter_options( __unused vm_prot_t fault_type, unsigned int flags, boolean_t wired, - unsigned int options) + unsigned int options, + void *arg) { pt_entry_t *pte; pv_rooted_entry_t pv_h; @@ -531,6 +547,13 @@ Retry: pai = pa_index(old_pa); old_pa_locked = FALSE; + if (old_pa == 0 && + (*pte & INTEL_PTE_COMPRESSED)) { + /* one less "compressed" */ + OSAddAtomic64(-1, &pmap->stats.compressed); + /* marker will be cleared below */ + } + /* * if we have a previous managed page, lock the pv entry now. after * we lock it, check to see if someone beat us to the lock and if so @@ -569,8 +592,9 @@ Retry: } if (pmap != kernel_pmap) template |= INTEL_PTE_USER; - if (prot & VM_PROT_WRITE) + if (prot & VM_PROT_WRITE) { template |= INTEL_PTE_WRITE; + } if (set_NX) template |= INTEL_PTE_NX; @@ -593,6 +617,11 @@ Retry: /* Determine delta, PV locked */ need_tlbflush = ((old_attributes ^ template) != INTEL_PTE_WIRED); + + if (need_tlbflush == TRUE && !(old_attributes & INTEL_PTE_WRITE)) { + if ((old_attributes ^ template) == INTEL_PTE_WRITE) + need_tlbflush = FALSE; + } /* store modified PTE and preserve RC bits */ pt_entry_t npte, opte;; @@ -641,8 +670,21 @@ Retry: if (IS_MANAGED_PAGE(pai)) { pmap_assert(old_pa_locked == TRUE); pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE); + pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE); assert(pmap->stats.resident_count >= 1); OSAddAtomic(-1, &pmap->stats.resident_count); + if (pmap != kernel_pmap) { + if (IS_REUSABLE_PAGE(pai)) { + assert(pmap->stats.reusable > 0); + OSAddAtomic(-1, &pmap->stats.reusable); + } else if (IS_INTERNAL_PAGE(pai)) { + assert(pmap->stats.internal > 0); + OSAddAtomic(-1, &pmap->stats.internal); + } else { + assert(pmap->stats.external > 0); + OSAddAtomic(-1, &pmap->stats.external); + } + } if (iswired(*pte)) { assert(pmap->stats.wired_count >= 1); OSAddAtomic(-1, &pmap->stats.wired_count); @@ -666,6 +708,12 @@ Retry: * Do removal part of accounting. */ + if (pmap != kernel_pmap) { +#if 00 + assert(pmap->stats.device > 0); + OSAddAtomic(-1, &pmap->stats.device); +#endif + } if (iswired(*pte)) { assert(pmap->stats.wired_count >= 1); OSAddAtomic(-1, &pmap->stats.wired_count); @@ -700,6 +748,17 @@ Retry: pv_h->va = vaddr; pv_h->pmap = pmap; queue_init(&pv_h->qlink); + + if (options & PMAP_OPTIONS_INTERNAL) { + pmap_phys_attributes[pai] |= PHYS_INTERNAL; + } else { + pmap_phys_attributes[pai] &= ~PHYS_INTERNAL; + } + if (options & PMAP_OPTIONS_REUSABLE) { + pmap_phys_attributes[pai] |= PHYS_REUSABLE; + } else { + pmap_phys_attributes[pai] &= ~PHYS_REUSABLE; + } } else { /* * Add new pv_hashed_entry after header. @@ -750,16 +809,36 @@ Retry: * for 'managed memory' */ pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE); + pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE); OSAddAtomic(+1, &pmap->stats.resident_count); if (pmap->stats.resident_count > pmap->stats.resident_max) { pmap->stats.resident_max = pmap->stats.resident_count; } + if (pmap != kernel_pmap) { + if (IS_REUSABLE_PAGE(pai)) { + OSAddAtomic(+1, &pmap->stats.reusable); + PMAP_STATS_PEAK(pmap->stats.reusable); + } else if (IS_INTERNAL_PAGE(pai)) { + OSAddAtomic(+1, &pmap->stats.internal); + PMAP_STATS_PEAK(pmap->stats.internal); + } else { + OSAddAtomic(+1, &pmap->stats.external); + PMAP_STATS_PEAK(pmap->stats.external); + } + } } else if (last_managed_page == 0) { /* Account for early mappings created before "managed pages" * are determined. Consider consulting the available DRAM map. */ pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE); + pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE); OSAddAtomic(+1, &pmap->stats.resident_count); + if (pmap != kernel_pmap) { +#if 00 + OSAddAtomic(+1, &pmap->stats.device); + PMAP_STATS_PEAK(pmap->stats.device); +#endif + } } /* * Step 3) Enter the mapping. @@ -804,9 +883,12 @@ Retry: UNLOCK_PVH(pai); } Done: - if (need_tlbflush == TRUE) - PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); - + if (need_tlbflush == TRUE) { + if (options & PMAP_OPTIONS_NOFLUSH) + PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg); + else + PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); + } if (pvh_e != PV_HASHED_ENTRY_NULL) { PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1); } @@ -850,6 +932,17 @@ pmap_remove_range( vm_map_offset_t start_vaddr, pt_entry_t *spte, pt_entry_t *epte) +{ + pmap_remove_range_options(pmap, start_vaddr, spte, epte, 0); +} + +void +pmap_remove_range_options( + pmap_t pmap, + vm_map_offset_t start_vaddr, + pt_entry_t *spte, + pt_entry_t *epte, + int options) { pt_entry_t *cpte; pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL; @@ -857,6 +950,8 @@ pmap_remove_range( pv_hashed_entry_t pvh_e; int pvh_cnt = 0; int num_removed, num_unwired, num_found, num_invalid; + int num_device, num_external, num_internal, num_reusable; + uint64_t num_compressed; ppnum_t pai; pmap_paddr_t pa; vm_map_offset_t vaddr; @@ -865,19 +960,11 @@ pmap_remove_range( num_unwired = 0; num_found = 0; num_invalid = 0; -#if defined(__i386__) - if (pmap != kernel_pmap && - pmap->pm_task_map == TASK_MAP_32BIT && - start_vaddr >= HIGH_MEM_BASE) { - /* - * The range is in the "high_shared_pde" which is shared - * between the kernel and all 32-bit tasks. It holds - * the 32-bit commpage but also the trampolines, GDT, etc... - * so we can't let user tasks remove anything from it. - */ - return; - } -#endif + num_device = 0; + num_external = 0; + num_internal = 0; + num_reusable = 0; + num_compressed = 0; /* invalidate the PTEs first to "freeze" them */ for (cpte = spte, vaddr = start_vaddr; cpte < epte; @@ -885,8 +972,18 @@ pmap_remove_range( pt_entry_t p = *cpte; pa = pte_to_pa(p); - if (pa == 0) + if (pa == 0) { + if (pmap != kernel_pmap && + (options & PMAP_OPTIONS_REMOVE) && + (p & INTEL_PTE_COMPRESSED)) { + /* one less "compressed" */ + num_compressed++; + /* clear marker */ + /* XXX probably does not need to be atomic! */ + pmap_update_pte(cpte, INTEL_PTE_COMPRESSED, 0); + } continue; + } num_found++; if (iswired(p)) @@ -900,6 +997,7 @@ pmap_remove_range( * Just remove the mappings. */ pmap_store_pte(cpte, 0); + num_device++; continue; } @@ -937,6 +1035,13 @@ pmap_remove_range( continue; } num_removed++; + if (IS_REUSABLE_PAGE(pai)) { + num_reusable++; + } else if (IS_INTERNAL_PAGE(pai)) { + num_internal++; + } else { + num_external++; + } /* * Get the modify and reference bits, then @@ -979,9 +1084,30 @@ update_counts: panic("pmap_remove_range: resident_count"); #endif pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed)); + pmap_ledger_debit(pmap, task_ledgers.phys_footprint, machine_ptob(num_removed)); assert(pmap->stats.resident_count >= num_removed); OSAddAtomic(-num_removed, &pmap->stats.resident_count); + if (pmap != kernel_pmap) { +#if 00 + assert(pmap->stats.device >= num_device); + if (num_device) + OSAddAtomic(-num_device, &pmap->stats.device); +#endif /* 00 */ + assert(pmap->stats.external >= num_external); + if (num_external) + OSAddAtomic(-num_external, &pmap->stats.external); + assert(pmap->stats.internal >= num_internal); + if (num_internal) + OSAddAtomic(-num_internal, &pmap->stats.internal); + assert(pmap->stats.reusable >= num_reusable); + if (num_reusable) + OSAddAtomic(-num_reusable, &pmap->stats.reusable); + assert(pmap->stats.compressed >= num_compressed); + if (num_compressed) + OSAddAtomic64(-num_compressed, &pmap->stats.compressed); + } + #if TESTING if (pmap->stats.wired_count < num_unwired) panic("pmap_remove_range: wired_count"); @@ -1006,6 +1132,16 @@ pmap_remove( pmap_t map, addr64_t s64, addr64_t e64) +{ + pmap_remove_options(map, s64, e64, 0); +} + +void +pmap_remove_options( + pmap_t map, + addr64_t s64, + addr64_t e64, + int options) { pt_entry_t *pde; pt_entry_t *spte, *epte; @@ -1080,7 +1216,8 @@ pmap_remove( spte = &spte[ptenum(s64)]; epte = &spte[intel_btop(l64 - s64)]; } - pmap_remove_range(map, s64, spte, epte); + pmap_remove_range_options(map, s64, spte, epte, + options); } s64 = l64; @@ -1098,17 +1235,27 @@ pmap_remove( } +void +pmap_page_protect( + ppnum_t pn, + vm_prot_t prot) +{ + pmap_page_protect_options(pn, prot, 0, NULL); +} + /* - * Routine: pmap_page_protect + * Routine: pmap_page_protect_options * * Function: * Lower the permission for all mappings to a given * page. */ void -pmap_page_protect( +pmap_page_protect_options( ppnum_t pn, - vm_prot_t prot) + vm_prot_t prot, + unsigned int options, + void *arg) { pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL; pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL; @@ -1121,6 +1268,7 @@ pmap_page_protect( int pai; pmap_t pmap; boolean_t remove; + pt_entry_t new_pte_value; pmap_intr_assert(); assert(pn != vm_page_fictitious_addr); @@ -1188,10 +1336,6 @@ pmap_page_protect( * Remove the mapping if new protection is NONE */ if (remove) { - /* - * Remove the mapping, collecting dirty bits. - */ - pmap_update_pte(pte, INTEL_PTE_VALID, 0); /* Remove per-pmap wired count */ if (iswired(*pte)) { @@ -1199,11 +1343,37 @@ pmap_page_protect( pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE); } - PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE); - pmap_phys_attributes[pai] |= - *pte & (PHYS_MODIFIED|PHYS_REFERENCED); - pmap_store_pte(pte, 0); + if (pmap != kernel_pmap && + (options & PMAP_OPTIONS_COMPRESSOR) && + IS_INTERNAL_PAGE(pai)) { + /* adjust "reclaimed" stats */ + OSAddAtomic64(+1, &pmap->stats.compressed); + PMAP_STATS_PEAK(pmap->stats.compressed); + pmap->stats.compressed_lifetime++; + /* mark this PTE as having been "reclaimed" */ + new_pte_value = INTEL_PTE_COMPRESSED; + } else { + new_pte_value = 0; + } + + if (options & PMAP_OPTIONS_NOREFMOD) { + pmap_store_pte(pte, new_pte_value); + if (options & PMAP_OPTIONS_NOFLUSH) + PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg); + else + PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); + } else { + /* + * Remove the mapping, collecting dirty bits. + */ + pmap_update_pte(pte, INTEL_PTE_VALID, 0); + + PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE); + pmap_phys_attributes[pai] |= + *pte & (PHYS_MODIFIED|PHYS_REFERENCED); + pmap_store_pte(pte, new_pte_value); + } #if TESTING if (pmap->stats.resident_count < 1) panic("pmap_page_protect: resident_count"); @@ -1211,6 +1381,29 @@ pmap_page_protect( pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE); assert(pmap->stats.resident_count >= 1); OSAddAtomic(-1, &pmap->stats.resident_count); + if (options & PMAP_OPTIONS_COMPRESSOR) { + /* + * This removal is only being done so we can send this page to + * the compressor; therefore it mustn't affect total task footprint. + */ + pmap_ledger_credit(pmap, task_ledgers.phys_compressed, PAGE_SIZE); + } else { + pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE); + } + + if (pmap != kernel_pmap) { + if (IS_REUSABLE_PAGE(pai)) { + assert(pmap->stats.reusable > 0); + OSAddAtomic(-1, &pmap->stats.reusable); + } else if (IS_INTERNAL_PAGE(pai)) { + assert(pmap->stats.internal > 0); + OSAddAtomic(-1, &pmap->stats.internal); + } else { + assert(pmap->stats.external > 0); + OSAddAtomic(-1, &pmap->stats.external); + } + } + /* * Deal with the pv_rooted_entry. */ @@ -1239,7 +1432,11 @@ pmap_page_protect( pmap_phys_attributes[pai] |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED); pmap_update_pte(pte, INTEL_PTE_WRITE, 0); - PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE); + + if (options & PMAP_OPTIONS_NOFLUSH) + PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg); + else + PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE); } pvh_e = nexth; } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h); @@ -1273,13 +1470,16 @@ done: 0, 0, 0, 0, 0); } + /* * Clear specified attribute bits. */ void phys_attribute_clear( ppnum_t pn, - int bits) + int bits, + unsigned int options, + void *arg) { pv_rooted_entry_t pv_h; pv_hashed_entry_t pv_e; @@ -1339,7 +1539,11 @@ phys_attribute_clear( * shadow of the 'D' bit (in particular) is * synchronized with the updated PTE. */ - PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE); + if (options & PMAP_OPTIONS_NOFLUSH) { + if (arg) + PMAP_UPDATE_TLBS_DELAYED(pmap, va, va + PAGE_SIZE, (pmap_flush_context *)arg); + } else + PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE); pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink); @@ -1532,3 +1736,227 @@ pmap_map_bd( PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr); return(virt); } + +void +pmap_reusable( + pmap_t pmap, + addr64_t s64, + addr64_t e64, + boolean_t reusable) +{ + pt_entry_t *pde; + pt_entry_t *spte, *epte; + addr64_t l64; + uint64_t deadline; + + pmap_intr_assert(); + + if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) + return; + + PMAP_TRACE(PMAP_CODE(PMAP__REUSABLE) | DBG_FUNC_START, + pmap, + (uint32_t) (s64 >> 32), s64, + (uint32_t) (e64 >> 32), e64); + + PMAP_LOCK(pmap); + + deadline = rdtsc64() + max_preemption_latency_tsc; + + while (s64 < e64) { + l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1); + if (l64 > e64) + l64 = e64; + pde = pmap_pde(pmap, s64); + + if (pde && (*pde & INTEL_PTE_VALID)) { + if (*pde & INTEL_PTE_PS) { + /* superpage: not supported */ + } else { + spte = pmap_pte(pmap, + (s64 & ~(pde_mapped_size - 1))); + spte = &spte[ptenum(s64)]; + epte = &spte[intel_btop(l64 - s64)]; + pmap_reusable_range(pmap, s64, spte, epte, + reusable); + } + } + s64 = l64; + + if (s64 < e64 && rdtsc64() >= deadline) { + PMAP_UNLOCK(pmap); + PMAP_LOCK(pmap); + deadline = rdtsc64() + max_preemption_latency_tsc; + } + } + + PMAP_UNLOCK(pmap); + + PMAP_TRACE(PMAP_CODE(PMAP__REUSABLE) | DBG_FUNC_END, + pmap, reusable, 0, 0, 0); +} + +void +pmap_reusable_range( + pmap_t pmap, + vm_map_offset_t start_vaddr, + pt_entry_t *spte, + pt_entry_t *epte, + boolean_t reusable) +{ + pt_entry_t *cpte; + int num_external, num_internal, num_reusable; + ppnum_t pai; + pmap_paddr_t pa; + vm_map_offset_t vaddr; + + num_external = 0; + num_internal = 0; + num_reusable = 0; + + for (cpte = spte, vaddr = start_vaddr; + cpte < epte; + cpte++, vaddr += PAGE_SIZE_64) { + + pa = pte_to_pa(*cpte); + if (pa == 0) + continue; + + pai = pa_index(pa); + + LOCK_PVH(pai); + + pa = pte_to_pa(*cpte); + if (pa == 0) { + UNLOCK_PVH(pai); + continue; + } + if (reusable) { + /* we want to set "reusable" */ + if (IS_REUSABLE_PAGE(pai)) { + /* already reusable: no change */ + } else { + pmap_phys_attributes[pai] |= PHYS_REUSABLE; + /* one more "reusable" */ + num_reusable++; + if (IS_INTERNAL_PAGE(pai)) { + /* one less "internal" */ + num_internal--; + } else { + /* one less "external" */ + num_external--; + } + } + } else { + /* we want to clear "reusable" */ + if (IS_REUSABLE_PAGE(pai)) { + pmap_phys_attributes[pai] &= ~PHYS_REUSABLE; + /* one less "reusable" */ + num_reusable--; + if (IS_INTERNAL_PAGE(pai)) { + /* one more "internal" */ + num_internal++; + } else { + /* one more "external" */ + num_external++; + } + } else { + /* already not reusable: no change */ + } + } + + UNLOCK_PVH(pai); + + } /* for loop */ + + /* + * Update the counts + */ + if (pmap != kernel_pmap) { + if (num_external) { + OSAddAtomic(num_external, &pmap->stats.external); + PMAP_STATS_PEAK(pmap->stats.external); + } + assert(pmap->stats.external >= 0); + if (num_internal) { + OSAddAtomic(num_internal, &pmap->stats.internal); + PMAP_STATS_PEAK(pmap->stats.internal); + } + assert(pmap->stats.internal >= 0); + if (num_reusable) { + OSAddAtomic(num_reusable, &pmap->stats.reusable); + PMAP_STATS_PEAK(pmap->stats.reusable); + } + assert(pmap->stats.reusable >= 0); + } + + return; +} + +unsigned int +pmap_query_resident( + pmap_t pmap, + addr64_t s64, + addr64_t e64) +{ + pt_entry_t *pde; + pt_entry_t *spte, *epte; + addr64_t l64; + uint64_t deadline; + unsigned int result; + + pmap_intr_assert(); + + if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) + return 0; + + PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START, + pmap, + (uint32_t) (s64 >> 32), s64, + (uint32_t) (e64 >> 32), e64); + + result = 0; + + PMAP_LOCK(pmap); + + deadline = rdtsc64() + max_preemption_latency_tsc; + + while (s64 < e64) { + l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1); + if (l64 > e64) + l64 = e64; + pde = pmap_pde(pmap, s64); + + if (pde && (*pde & INTEL_PTE_VALID)) { + if (*pde & INTEL_PTE_PS) { + /* superpage: not supported */ + } else { + spte = pmap_pte(pmap, + (s64 & ~(pde_mapped_size - 1))); + spte = &spte[ptenum(s64)]; + epte = &spte[intel_btop(l64 - s64)]; + + for (; spte < epte; spte++) { + if (pte_to_pa(*spte) != 0) { + result++; + } + } + + } + } + s64 = l64; + + if (s64 < e64 && rdtsc64() >= deadline) { + PMAP_UNLOCK(pmap); + PMAP_LOCK(pmap); + deadline = rdtsc64() + max_preemption_latency_tsc; + } + } + + PMAP_UNLOCK(pmap); + + PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END, + pmap, 0, 0, 0, 0); + + return result; +} diff --git a/osfmk/i386/proc_reg.h b/osfmk/i386/proc_reg.h index 019d0aebe..1ba613a21 100644 --- a/osfmk/i386/proc_reg.h +++ b/osfmk/i386/proc_reg.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -279,19 +279,6 @@ static inline void set_cr3_raw(uintptr_t value) __asm__ volatile("mov %0, %%cr3" : : "r" (value)); } -#if defined(__i386__) -static inline uintptr_t get_cr3(void) -{ - register uintptr_t cr3; - __asm__ volatile("mov %%cr3, %0" : "=r" (cr3)); - return(cr3); -} - -static inline void set_cr3(uintptr_t value) -{ - __asm__ volatile("mov %0, %%cr3" : : "r" (value)); -} -#else static inline uintptr_t get_cr3_base(void) { register uintptr_t cr3; @@ -304,7 +291,6 @@ static inline void set_cr3_composed(uintptr_t base, uint16_t pcid, uint32_t pres __asm__ volatile("mov %0, %%cr3" : : "r" (base | pcid | ( ( (uint64_t)preserve) << 63) ) ); } -#endif static inline uintptr_t get_cr4(void) { uintptr_t cr4; @@ -370,33 +356,10 @@ static inline void swapgs(void) #ifdef MACH_KERNEL_PRIVATE -#ifdef __i386__ - -#include - -extern void cpuid64(uint32_t); -extern void flush_tlb64(void); -extern uint64_t get64_cr3(void); -extern void set64_cr3(uint64_t); -static inline void flush_tlb(void) -{ - if (cpu_mode_is64bit()) { - flush_tlb64(); - } else { - set_cr3(get_cr3()); - } -} -static inline void flush_tlb_raw(void) -{ - flush_tlb(); -} - -#elif defined(__x86_64__) static inline void flush_tlb_raw(void) { set_cr3_raw(get_cr3_raw()); } -#endif extern int rdmsr64_carefully(uint32_t msr, uint64_t *val); extern int wrmsr64_carefully(uint32_t msr, uint64_t val); #endif /* MACH_KERNEL_PRIVATE */ @@ -431,39 +394,18 @@ static inline void invlpg(uintptr_t addr) #define rdpmc(counter,lo,hi) \ __asm__ volatile("rdpmc" : "=a" (lo), "=d" (hi) : "c" (counter)) -#ifdef __i386__ - -static inline uint64_t rdmsr64(uint32_t msr) -{ - uint64_t ret; - __asm__ volatile("rdmsr" : "=A" (ret) : "c" (msr)); - return ret; -} - -static inline void wrmsr64(uint32_t msr, uint64_t val) -{ - __asm__ volatile("wrmsr" : : "c" (msr), "A" (val)); -} +#ifdef XNU_KERNEL_PRIVATE +extern void do_mfence(void); +#define mfence() do_mfence() +#endif -static inline uint64_t rdtsc64(void) +static inline uint64_t rdpmc64(uint32_t pmc) { - uint64_t ret; - __asm__ volatile("lfence; rdtsc; lfence" : "=A" (ret)); - return ret; -} - -static inline uint64_t rdtscp64(uint32_t *aux) -{ - uint64_t ret; - __asm__ volatile("rdtscp; mov %%ecx, %1" - : "=A" (ret), "=m" (*aux) - : - : "ecx"); - return ret; + uint32_t lo=0, hi=0; + rdpmc(pmc, lo, hi); + return (((uint64_t)hi) << 32) | ((uint64_t)lo); } -#elif defined(__x86_64__) - static inline uint64_t rdmsr64(uint32_t msr) { uint32_t lo=0, hi=0; @@ -493,9 +435,6 @@ static inline uint64_t rdtscp64(uint32_t *aux) return ((hi) << 32) | (lo); } -#else -#error Unsupported architecture -#endif /* * rdmsr_carefully() returns 0 when the MSR has been read successfully, @@ -559,6 +498,7 @@ __END_DECLS #define MSR_IA32_MISC_ENABLE 0x1a0 + #define MSR_IA32_PACKAGE_THERM_STATUS 0x1b1 #define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x1b2 @@ -586,6 +526,13 @@ __END_DECLS #define MSR_IA32_MTRR_FIX4K_F0000 0x26e #define MSR_IA32_MTRR_FIX4K_F8000 0x26f +#define MSR_IA32_PERF_FIXED_CTR0 0x309 + +#define MSR_IA32_PERF_FIXED_CTR_CTRL 0x38D +#define MSR_IA32_PERF_GLOBAL_STATUS 0x38E +#define MSR_IA32_PERF_GLOBAL_CTRL 0x38F +#define MSR_IA32_PERF_GLOBAL_OVF_CTRL 0x390 + #define MSR_IA32_PKG_C3_RESIDENCY 0x3F8 #define MSR_IA32_PKG_C6_RESIDENCY 0x3F9 #define MSR_IA32_PKG_C7_RESIDENCY 0x3FA @@ -616,7 +563,6 @@ __END_DECLS #define MSR_IA32_PKG_POWER_SKU_UNIT 0x606 #define MSR_IA32_PKG_C2_RESIDENCY 0x60D #define MSR_IA32_PKG_ENERGY_STATUS 0x611 - #define MSR_IA32_DDR_ENERGY_STATUS 0x619 #define MSR_IA32_LLC_FLUSHED_RESIDENCY_TIMER 0x61D #define MSR_IA32_RING_PERF_STATUS 0x621 diff --git a/osfmk/i386/rtclock.c b/osfmk/i386/rtclock.c index 28354563c..9935839d9 100644 --- a/osfmk/i386/rtclock.c +++ b/osfmk/i386/rtclock.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -51,7 +51,7 @@ #include #include #include -#include +#include #include #include #include /* for kernel_map */ @@ -70,7 +70,6 @@ #include #include #include - #define UI_CPUFREQ_ROUNDING_FACTOR 10000000 int rtclock_config(void); @@ -88,46 +87,25 @@ rtc_timer_start(void) /* * Force a complete re-evaluation of timer deadlines. */ - etimer_resync_deadlines(); + x86_lcpu()->rtcDeadline = EndOfAllTime; + timer_resync_deadlines(); } static inline uint32_t _absolutetime_to_microtime(uint64_t abstime, clock_sec_t *secs, clock_usec_t *microsecs) { uint32_t remain; -#if defined(__i386__) - asm volatile( - "divl %3" - : "=a" (*secs), "=d" (remain) - : "A" (abstime), "r" (NSEC_PER_SEC)); - asm volatile( - "divl %3" - : "=a" (*microsecs) - : "0" (remain), "d" (0), "r" (NSEC_PER_USEC)); -#elif defined(__x86_64__) *secs = abstime / (uint64_t)NSEC_PER_SEC; remain = (uint32_t)(abstime % (uint64_t)NSEC_PER_SEC); *microsecs = remain / NSEC_PER_USEC; -#else -#error Unsupported architecture -#endif return remain; } static inline void _absolutetime_to_nanotime(uint64_t abstime, clock_sec_t *secs, clock_usec_t *nanosecs) { -#if defined(__i386__) - asm volatile( - "divl %3" - : "=a" (*secs), "=d" (*nanosecs) - : "A" (abstime), "r" (NSEC_PER_SEC)); -#elif defined(__x86_64__) *secs = abstime / (uint64_t)NSEC_PER_SEC; *nanosecs = (clock_usec_t)(abstime % (uint64_t)NSEC_PER_SEC); -#else -#error Unsupported architecture -#endif } /* @@ -245,7 +223,6 @@ rtc_clock_napped(uint64_t base, uint64_t tsc_base) if (oldnsecs < newnsecs) { _pal_rtc_nanotime_store(tsc_base, base, rntp->scale, rntp->shift, rntp); rtc_nanotime_set_commpage(rntp); - trace_set_timebases(tsc_base, base); } } @@ -475,7 +452,7 @@ rtclock_intr( } /* call the generic etimer */ - etimer_intr(user_mode, rip); + timer_intr(user_mode, rip); } @@ -567,11 +544,11 @@ nanoseconds_to_absolutetime( void machine_delay_until( - uint64_t interval, - uint64_t deadline) + uint64_t interval, + uint64_t deadline) { - (void)interval; - while (mach_absolute_time() < deadline) { - cpu_pause(); - } + (void)interval; + while (mach_absolute_time() < deadline) { + cpu_pause(); + } } diff --git a/osfmk/i386/rtclock_asm.h b/osfmk/i386/rtclock_asm.h index fedf7a4f4..5ad7dde8a 100644 --- a/osfmk/i386/rtclock_asm.h +++ b/osfmk/i386/rtclock_asm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2010 Apple Inc. All rights reserved. + * Copyright (c) 2004-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -42,134 +42,6 @@ #include -#if defined(__i386__) - -/* - * Nanotime returned in %edx:%eax. - * Computed from tsc based on the scale factor - * and an implicit 32 bit shift. - * - * Uses %eax, %ebx, %ecx, %edx, %esi, %edi. - */ -#define NANOTIME \ - mov %gs:CPU_NANOTIME,%edi ; \ - PAL_RTC_NANOTIME_READ_FAST() - - -/* - * Add 64-bit delta in register dreg : areg to timer pointed to by register treg. - */ -#define TIMER_UPDATE(treg,dreg,areg,offset) \ - addl (TIMER_LOW+(offset))(treg),areg /* add low bits */ ; \ - adcl dreg,(TIMER_HIGH+(offset))(treg) /* carry high bits */; \ - movl areg,(TIMER_LOW+(offset))(treg) /* updated low bit */; \ - movl (TIMER_HIGH+(offset))(treg),dreg /* copy high bits */ ; \ - movl dreg,(TIMER_HIGHCHK+(offset))(treg) /* to high check */ - -/* - * Add time delta to old timer and start new. - */ -#define TIMER_EVENT(old,new) \ - NANOTIME /* edx:eax nanosecs */ ; \ - movl %eax,%esi /* save timestamp */ ; \ - movl %edx,%edi /* save timestamp */ ; \ - movl %gs:CPU_ACTIVE_THREAD,%ecx /* get current thread */ ; \ - subl (old##_TIMER)+TIMER_TSTAMP(%ecx),%eax /* elapsed */ ; \ - sbbl (old##_TIMER)+TIMER_TSTAMP+4(%ecx),%edx /* time */ ; \ - TIMER_UPDATE(%ecx,%edx,%eax,old##_TIMER) /* update timer */ ; \ - movl %esi,(new##_TIMER)+TIMER_TSTAMP(%ecx) /* set timestamp */ ; \ - movl %edi,(new##_TIMER)+TIMER_TSTAMP+4(%ecx) /* set timestamp */ ; \ - leal (new##_TIMER)(%ecx), %ecx /* compute new timer pointer */ ; \ - movl %gs:CPU_PROCESSOR,%ebx /* get current processor */ ; \ - movl %ecx,THREAD_TIMER(%ebx) /* set current timer */ ; \ - movl %esi,%eax /* restore timestamp */ ; \ - movl %edi,%edx /* restore timestamp */ ; \ - subl (old##_STATE)+TIMER_TSTAMP(%ebx),%eax /* elapsed */ ; \ - sbbl (old##_STATE)+TIMER_TSTAMP+4(%ebx),%edx /* time */ ; \ - TIMER_UPDATE(%ebx,%edx,%eax,old##_STATE)/* update timer */ ; \ - leal (new##_STATE)(%ebx),%ecx /* new state pointer */ ; \ - movl %ecx,CURRENT_STATE(%ebx) /* set current state */ ; \ - movl %esi,TIMER_TSTAMP(%ecx) /* set timestamp */ ; \ - movl %edi,TIMER_TSTAMP+4(%ecx) /* set timestamp */ - -/* - * Update time on user trap entry. - * Uses %eax,%ebx,%ecx,%edx,%esi,%edi. - */ -#define TIME_TRAP_UENTRY TIMER_EVENT(USER,SYSTEM) - -/* - * update time on user trap exit. - * Uses %eax,%ebx,%ecx,%edx,%esi,%edi. - */ -#define TIME_TRAP_UEXIT TIMER_EVENT(SYSTEM,USER) - -/* - * update time on interrupt entry. - * Uses %eax,%ebx,%ecx,%edx,%esi,%edi. - * Saves processor state info on stack. - */ -#define TIME_INT_ENTRY \ - NANOTIME /* edx:eax nanosecs */ ; \ - movl %eax,%gs:CPU_INT_EVENT_TIME /* save in cpu data */ ; \ - movl %edx,%gs:CPU_INT_EVENT_TIME+4 /* save in cpu data */ ; \ - movl %eax,%esi /* save timestamp */ ; \ - movl %edx,%edi /* save timestamp */ ; \ - movl %gs:CPU_PROCESSOR,%ebx /* get current processor */ ; \ - movl THREAD_TIMER(%ebx),%ecx /* get current timer */ ; \ - subl TIMER_TSTAMP(%ecx),%eax /* compute elapsed time */ ; \ - sbbl TIMER_TSTAMP+4(%ecx),%edx /* compute elapsed time */ ; \ - TIMER_UPDATE(%ecx,%edx,%eax,0) /* update timer */ ; \ - movl KERNEL_TIMER(%ebx),%ecx /* point to kernel timer */ ; \ - movl %esi,TIMER_TSTAMP(%ecx) /* set timestamp */ ; \ - movl %edi,TIMER_TSTAMP+4(%ecx) /* set timestamp */ ; \ - movl %esi,%eax /* restore timestamp */ ; \ - movl %edi,%edx /* restore timestamp */ ; \ - movl CURRENT_STATE(%ebx),%ecx /* get current state */ ; \ - pushl %ecx /* save state */ ; \ - subl TIMER_TSTAMP(%ecx),%eax /* compute elapsed time */ ; \ - sbbl TIMER_TSTAMP+4(%ecx),%edx /* compute elapsed time */ ; \ - TIMER_UPDATE(%ecx,%edx,%eax,0) /* update timer */ ; \ - leal IDLE_STATE(%ebx),%eax /* get idle state */ ; \ - cmpl %eax,%ecx /* compare current state */ ; \ - je 0f /* skip if equal */ ; \ - leal SYSTEM_STATE(%ebx),%ecx /* get system state */ ; \ - movl %ecx,CURRENT_STATE(%ebx) /* set current state */ ; \ -0: movl %esi,TIMER_TSTAMP(%ecx) /* set timestamp */ ; \ - movl %edi,TIMER_TSTAMP+4(%ecx) /* set timestamp */ - -/* - * update time on interrupt exit. - * Uses %eax,%ebx,%ecx,%edx,%esi,%edi. - * Restores processor state info from stack. - */ -#define TIME_INT_EXIT \ - NANOTIME /* edx:eax nanosecs */ ; \ - movl %eax,%gs:CPU_INT_EVENT_TIME /* save in cpu data */ ; \ - movl %edx,%gs:CPU_INT_EVENT_TIME+4 /* save in cpu data */ ; \ - movl %eax,%esi /* save timestamp */ ; \ - movl %edx,%edi /* save timestamp */ ; \ - movl %gs:CPU_PROCESSOR,%ebx /* get current processor */ ; \ - movl KERNEL_TIMER(%ebx),%ecx /* point to kernel timer */ ; \ - subl TIMER_TSTAMP(%ecx),%eax /* compute elapsed time */ ; \ - sbbl TIMER_TSTAMP+4(%ecx),%edx /* compute elapsed time */ ; \ - TIMER_UPDATE(%ecx,%edx,%eax,0) /* update timer */ ; \ - movl THREAD_TIMER(%ebx),%ecx /* interrupted timer */ ; \ - movl %esi,TIMER_TSTAMP(%ecx) /* set timestamp */ ; \ - movl %edi,TIMER_TSTAMP+4(%ecx) /* set timestamp */ ; \ - movl %esi,%eax /* restore timestamp */ ; \ - movl %edi,%edx /* restore timestamp */ ; \ - movl CURRENT_STATE(%ebx),%ecx /* get current state */ ; \ - subl TIMER_TSTAMP(%ecx),%eax /* compute elapsed time */ ; \ - sbbl TIMER_TSTAMP+4(%ecx),%edx /* compute elapsed time */ ; \ - TIMER_UPDATE(%ecx,%edx,%eax,0) /* update timer */ ; \ - popl %ecx /* restore state */ ; \ - movl %ecx,CURRENT_STATE(%ebx) /* set current state */ ; \ - movl %esi,TIMER_TSTAMP(%ecx) /* set timestamp */ ; \ - movl %edi,TIMER_TSTAMP+4(%ecx) /* set timestamp */ - -#elif defined(__x86_64__) - /* * Nanotime returned in %rax. * Computed from tsc based on the scale factor and an implicit 32 bit shift. @@ -272,7 +144,6 @@ movq %rcx,CURRENT_STATE(%rdx) /* set current state */ ; \ movq %rsi,TIMER_TSTAMP(%rcx) /* set timestamp */ -#endif /* * Check for vtimers for task. diff --git a/osfmk/i386/rtclock_asm_native.h b/osfmk/i386/rtclock_asm_native.h index 528cbfe75..ab16668e3 100644 --- a/osfmk/i386/rtclock_asm_native.h +++ b/osfmk/i386/rtclock_asm_native.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 Apple Inc. All rights reserved. + * Copyright (c) 2010-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -39,37 +39,6 @@ #ifndef _PAL_RTCLOCK_ASM_NATIVE_H_ #define _PAL_RTCLOCK_ASM_NATIVE_H_ - -#if defined(__i386__) -/* - * Assembly snippet included in exception handlers and rtc_nanotime_read() - * %edi points to nanotime info struct - * %edx:%eax returns nanotime - */ -#define PAL_RTC_NANOTIME_READ_FAST() \ -0: movl RNT_GENERATION(%edi),%esi /* being updated? */ ; \ - testl %esi,%esi ; \ - jz 0b /* wait until done */ ; \ - lfence ; \ - rdtsc ; \ - lfence ; \ - subl RNT_TSC_BASE(%edi),%eax ; \ - sbbl RNT_TSC_BASE+4(%edi),%edx /* tsc - tsc_base */ ; \ - movl RNT_SCALE(%edi),%ecx /* * scale factor */ ; \ - movl %edx,%ebx ; \ - mull %ecx ; \ - movl %ebx,%eax ; \ - movl %edx,%ebx ; \ - mull %ecx ; \ - addl %ebx,%eax ; \ - adcl $0,%edx ; \ - addl RNT_NS_BASE(%edi),%eax /* + ns_base */ ; \ - adcl RNT_NS_BASE+4(%edi),%edx ; \ - cmpl RNT_GENERATION(%edi),%esi /* check for update */ ; \ - jne 0b /* do it all again */ - -#elif defined(__x86_64__) - /* * Assembly snippet included in exception handlers and rtc_nanotime_read() * @@ -113,7 +82,4 @@ cmpl RNT_GENERATION(%rdi),%esi /* repeat if changed */ ; \ jne 0b -#endif /* !defined(x86_64) */ - - #endif /* _PAL_RTCLOCK_ASM_NATIVE_H_ */ diff --git a/osfmk/i386/rtclock_native.c b/osfmk/i386/rtclock_native.c index cf24293be..436877ae3 100644 --- a/osfmk/i386/rtclock_native.c +++ b/osfmk/i386/rtclock_native.c @@ -49,7 +49,7 @@ #include #include #include -#include +#include static uint64_t rtc_decrementer_min; static uint64_t rtc_decrementer_max; diff --git a/osfmk/i386/seg.h b/osfmk/i386/seg.h index 94af4521f..5d555bef9 100644 --- a/osfmk/i386/seg.h +++ b/osfmk/i386/seg.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -176,11 +176,6 @@ struct fake_descriptor64 { uint32_t access:8; /* access */ uint32_t reserved:32; /* reserved/zero */ }; -#ifdef __i386__ -#define FAKE_UBER64(addr32) { (uint32_t) (addr32), KERNEL_UBER_BASE_HI32 } -#define FAKE_COMPAT(addr32) { (uint32_t) (addr32), 0x0 } -#define UBER64(addr32) ((addr64_t) (uintptr_t)addr32 + KERNEL_UBER_BASE) -#endif /* * Boot-time data for master (or only) CPU @@ -262,46 +257,6 @@ __END_DECLS #define NULL_SEG 0 -#ifdef __i386__ -/* - * User descriptors for MACH - 32-bit flat address space - */ -#define SYSENTER_CS 0x07 /* sysenter kernel code segment */ -#define SYSENTER_DS 0x0f /* sysenter kernel data segment */ -#define USER_CS 0x17 /* user code segment - Must be SYSENTER_CS+16 for sysexit */ -/* Special case: sysenter with EFL_TF (trace bit) set - use iret not sysexit */ -#define SYSENTER_TF_CS (USER_CS|0x10000) -#define USER_DS 0x1f /* user data segment - Must be SYSENTER_CS+24 for sysexit */ -#define USER64_CS 0x27 /* 64-bit user code segment - Must be USER_CS+16 for sysret */ -#define USER64_DS USER_DS /* 64-bit user data segment == 32-bit */ -#define SYSCALL_CS 0x2f /* 64-bit syscall pseudo-segment */ -#define USER_CTHREAD 0x37 /* user cthread area */ -#define USER_SETTABLE 0x3f /* start of user settable ldt entries */ - -/* - * Kernel descriptors for MACH - 32-bit flat address space. - */ -#define KERNEL32_CS 0x08 /* kernel code */ -#define KERNEL_DS 0x10 /* kernel data */ -#define KERNEL_LDT 0x18 /* master LDT */ -#define KERNEL_LDT_2 0x20 /* master LDT expanded for 64-bit */ -#define KERNEL_TSS 0x28 /* master TSS */ -#define KERNEL_TSS_2 0x30 /* master TSS expanded for 64-bit */ - -#define MC_TSS 0x38 /* machine-check handler TSS */ - -#define CPU_DATA_GS 0x48 /* per-cpu data */ - -#define DF_TSS 0x50 /* double-fault handler TSS */ - -#define USER_LDT 0x58 -#define USER_TSS 0x60 -#define FPE_CS 0x68 - -#else // __x86_64__ /* * Kernel descriptors for MACH - 64-bit flat address space. @@ -322,19 +277,9 @@ __END_DECLS /* 12: other 8 bytes of USER_LDT */ #define KERNEL_DS 0x68 /* 13: 32-bit kernel data */ -#endif -#ifdef __i386__ -#if !defined(USER_WINDOW_SEL) -#define USER_WINDOW_SEL 0x70 /* 14: window for copyin/copyout */ -#define PHYS_WINDOW_SEL 0x78 /* 15: window for copyin/copyout */ -#endif -#define KERNEL64_CS 0x80 /* 16: kernel 64-bit code */ -#define KERNEL64_SS 0x88 /* 17: kernel 64-bit (syscall) stack */ -#else // __x86_64__ #define SYSENTER_TF_CS (USER_CS|0x10000) #define SYSENTER_DS KERNEL64_SS /* sysenter kernel data segment */ -#endif #ifdef __x86_64__ /* diff --git a/osfmk/i386/start.s b/osfmk/i386/start.s deleted file mode 100644 index c8a904038..000000000 --- a/osfmk/i386/start.s +++ /dev/null @@ -1,327 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991,1990 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - */ - -#include - -#include -#include -#include -#include - -#define CX(addr,reg) addr(,reg,4) - -#include -#include - -/* - * Interrupt and bootup stack for initial processor. - */ - -/* in the __HIB section since the hibernate restore code uses this stack. */ - .section __HIB, __data - .align 12 - - .globl EXT(low_intstack) -EXT(low_intstack): - .globl EXT(gIOHibernateRestoreStack) -EXT(gIOHibernateRestoreStack): - - .space INTSTACK_SIZE - - .globl EXT(low_eintstack) -EXT(low_eintstack:) - .globl EXT(gIOHibernateRestoreStackEnd) -EXT(gIOHibernateRestoreStackEnd): - -/* - * Pointers to GDT and IDT. These contain linear addresses. - */ - .align ALIGN - .globl EXT(gdtptr) - /* align below properly */ - .word 0 -LEXT(gdtptr) - .word Times(8,GDTSZ)-1 - .long EXT(master_gdt) - - /* back to the regular __DATA section. */ - - .section __DATA, __data - -/* - * Stack for last-gasp double-fault handler. - */ - .align 12 - .globl EXT(df_task_stack) -EXT(df_task_stack): - .space INTSTACK_SIZE - .globl EXT(df_task_stack_end) -EXT(df_task_stack_end): - - -/* - * Stack for machine-check handler. - */ - .align 12 - .globl EXT(mc_task_stack) -EXT(mc_task_stack): - .space INTSTACK_SIZE - .globl EXT(mc_task_stack_end) -EXT(mc_task_stack_end): - -/* - * BSP CPU start here. - * eax points to kernbootstruct - * - * Environment: - * protected mode, no paging, flat 32-bit address space. - * (Code/data/stack segments have base == 0, limit == 4G) - */ - .text - .align ALIGN - .globl EXT(_start) -LEXT(_start) - mov %ds, %bx - mov %bx, %es - mov %eax, %ebp /* Move kernbootstruct to ebp */ - mov %eax, %ebx /* get pointer to kernbootstruct */ - - mov $EXT(low_eintstack),%esp /* switch to the bootup stack */ - - POSTCODE(PSTART_ENTRY) - - lgdt EXT(gdtptr) /* load GDT */ - - mov $(KERNEL_DS),%ax /* set kernel data segment */ - mov %ax, %ds - mov %ax, %es - mov %ax, %ss - xor %ax, %ax /* fs must be zeroed; */ - mov %ax, %fs /* some bootstrappers don`t do this */ - mov %ax, %gs - cld - - /* "The Aussie Maneuver" ("Myria" variant) */ - pushl $(0xcb<<24)|KERNEL32_CS /* reload CS */ - call .-1 - -paging: - andl $0xfffffff0, %esp /* align stack */ - subl $0xc, %esp - pushl %ebp /* push boot args addr */ - xorl %ebp, %ebp /* zero frame pointer */ - - POSTCODE(PSTART_BEFORE_PAGING) - -/* - * Turn on paging. - */ - movl $EXT(IdlePDPT), %eax /* CR3 */ - movl %eax, %cr3 - movl %cr4, %eax /* PAE */ - orl $(CR4_PAE), %eax - movl %eax, %cr4 - movl %cr0,%eax /* paging */ - orl $(CR0_PG|CR0_WP),%eax - movl %eax,%cr0 - - POSTCODE(PSTART_VSTART) - - call EXT(vstart) /* run C code */ - /*NOTREACHED*/ - hlt - -/* - * AP (slave) CPUs enter here. - * - * Environment: - * protected mode, no paging, flat 32-bit address space. - * (Code/data/stack segments have base == 0, limit == 4G) - */ - .align ALIGN - .globl EXT(slave_pstart) -LEXT(slave_pstart) - cli /* disable interrupts, so we don`t */ - /* need IDT for a while */ - xor %ebp, %ebp // zero boot cpu - mov $EXT(mp_slave_stack)+PAGE_SIZE, %esp; - jmp paging - - -/* Code to get from real mode to protected mode */ - -#define operand_size_prefix .byte 0x66 -#define address_size_prefix .byte 0x67 -#define cs_base_prefix .byte 0x2e - -#undef LJMP -#define LJMP(segment,address) \ - operand_size_prefix ;\ - .byte 0xea ;\ - .long address-EXT(real_mode_bootstrap_base) ;\ - .word segment - -#define LGDT(address) \ - cs_base_prefix ;\ - address_size_prefix ;\ - operand_size_prefix ;\ - .word 0x010f ;\ - .byte 0x15 ;\ - .long address-EXT(real_mode_bootstrap_base) - -.section __HIB,__text -.align 12 /* Page align for single bcopy_phys() */ -.code32 -Entry(real_mode_bootstrap_base) - cli - - LGDT(EXT(protected_mode_gdtr)) - - /* set the PE bit of CR0 */ - mov %cr0, %eax - inc %eax - mov %eax, %cr0 - - /* reload CS register */ - LJMP(KERNEL32_CS, 1f + REAL_MODE_BOOTSTRAP_OFFSET) -1: - - /* we are in protected mode now */ - /* set up the segment registers */ - mov $KERNEL_DS, %eax - movw %ax, %ds - movw %ax, %es - movw %ax, %ss - mov $0, %ax - movw %ax, %fs - movw %ax, %gs - - POSTCODE(SLAVE_STARTPROG_ENTRY); - - mov PROT_MODE_START+REAL_MODE_BOOTSTRAP_OFFSET, %ecx - jmp *%ecx - -Entry(protected_mode_gdtr) - .short 160 /* limit (8*6 segs) */ - .long EXT(master_gdt) - -Entry(real_mode_bootstrap_end) - -.section __HIB,__text - .align ALIGN - .globl EXT(hibernate_machine_entrypoint) -LEXT(hibernate_machine_entrypoint) - mov %eax, %edi // save header pointer - /* restore gdt */ - lgdt EXT(protected_mode_gdtr) - - /* setup the protected mode segment registers */ - mov $KERNEL_DS, %eax - movw %ax, %ds - movw %ax, %es - movw %ax, %ss - mov $0,%ax /* fs must be zeroed; */ - mov %ax,%fs - mov %ax,%gs - - /* set up the page tables to use BootstrapPTD - * as done in idle_pt.c, but this must be done programatically */ - mov $EXT(IdlePDPT), %eax - mov $EXT(BootPTD) + (INTEL_PTE_VALID), %ecx - mov $0x0, %edx - mov %ecx, (0*8+0)(%eax) - mov %edx, (0*8+4)(%eax) - add $(PAGE_SIZE), %ecx - mov %ecx, (1*8+0)(%eax) - mov %edx, (1*8+4)(%eax) - add $(PAGE_SIZE), %ecx - mov %ecx, (2*8+0)(%eax) - mov %edx, (2*8+4)(%eax) - add $(PAGE_SIZE), %ecx - mov %ecx, (3*8+0)(%eax) - mov %edx, (3*8+4)(%eax) - mov %eax, %cr3 - - - movl %cr4,%eax - orl $(CR4_PAE),%eax - movl %eax,%cr4 /* enable page size extensions */ - - movl $(MSR_IA32_EFER), %ecx /* MSR number in ecx */ - rdmsr /* MSR value return in edx: eax */ - orl $(MSR_IA32_EFER_NXE), %eax /* Set NXE bit in low 32-bits */ - wrmsr /* Update Extended Feature Enable reg */ - - movl %cr0, %eax - orl $(CR0_PG|CR0_WP), %eax - movl %eax, %cr0 /* ready paging */ - - mov $EXT(gIOHibernateRestoreStackEnd), %esp /* setup stack */ - xorl %ebp, %ebp /* zero frame pointer */ - - ljmpl $(KERNEL32_CS), $Ltemp -Ltemp: - xorl %eax, %eax /* Video memory - N/A */ - pushl %eax - pushl %eax - pushl %eax - mov %edi, %eax /* Pointer to hibernate header */ - pushl %eax - call EXT(hibernate_kernel_entrypoint) - /* NOTREACHED */ - hlt diff --git a/osfmk/i386/start64.s b/osfmk/i386/start64.s deleted file mode 100644 index 95a9dd664..000000000 --- a/osfmk/i386/start64.s +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -#include -#include -#include -#include -#include -#include -#include - - .data - .align 3 - .globl EXT(gdtptr64) - /* align below right */ - .word 0 -LEXT(gdtptr64) - .word Times(8,GDTSZ)-1 - /* XXX really want .quad here */ - .long EXT(master_gdt) - .long KERNEL_UBER_BASE_HI32 /* must be in uber-space */ - - .align 3 - .globl EXT(idtptr64) - /* align below right */ - .word 0 -LEXT(idtptr64) - .word Times(16,IDTSZ)-1 - /* XXX really want .quad here */ - .long EXT(master_idt64) - .long KERNEL_UBER_BASE_HI32 /* must be in uber-space */ - - .text - -Entry(ml_load_desc64) - - ENTER_64BIT_MODE() - - POSTCODE(ML_LOAD_DESC64_ENTRY) - - lgdt EXT(gdtptr64) /* load GDT */ - - POSTCODE(ML_LOAD_DESC64_GDT) - - lidt EXT(idtptr64) /* load IDT */ - - POSTCODE(ML_LOAD_DESC64_IDT) - - movw $(KERNEL_LDT),%ax /* get LDT segment */ - lldt %ax /* load LDT */ - - POSTCODE(ML_LOAD_DESC64_LDT) - - movw $(KERNEL_TSS),%ax - ltr %ax /* set up KTSS */ - - POSTCODE(ML_LOAD_DESC64_EXIT) - - ENTER_COMPAT_MODE() - - ret - - -Entry(ml_64bit_lldt) - /* (int32_t selector) */ - - FRAME - - ENTER_64BIT_MODE() - - movl B_ARG0, %eax - lldt %ax - - ENTER_COMPAT_MODE() - - EMARF - ret - -Entry(set_64bit_debug_regs) - /* x86_debug_state64_t *ds */ - - FRAME - - ENTER_64BIT_MODE() - - mov B_ARG0, %edx - mov DS64_DR0(%edx), %rax - mov %rax, %dr0 - mov DS64_DR1(%edx), %rax - mov %rax, %dr1 - mov DS64_DR2(%edx), %rax - mov %rax, %dr2 - mov DS64_DR3(%edx), %rax - mov %rax, %dr3 - - ENTER_COMPAT_MODE() - - EMARF - ret - -Entry(flush_tlb64) - - FRAME - - ENTER_64BIT_MODE() - - mov %cr3, %rax - mov %rax, %cr3 - - ENTER_COMPAT_MODE() - - EMARF - ret - -Entry(set64_cr3) - - FRAME - - movl B_ARG0, %eax - movl B_ARG1, %edx - - ENTER_64BIT_MODE() - - /* %rax = %edx:%eax */ - shl $32, %rax - shrd $32, %rdx, %rax - - mov %rax, %cr3 - - ENTER_COMPAT_MODE() - - EMARF - ret - -Entry(get64_cr3) - - FRAME - - ENTER_64BIT_MODE() - - mov %cr3, %rax - mov %rax, %rdx - shr $32, %rdx // %edx:%eax = %cr3 - - ENTER_COMPAT_MODE() - - EMARF - ret - -Entry(cpuid64) - ENTER_64BIT_MODE() - cpuid - ENTER_COMPAT_MODE() - ret - - -/* FXSAVE and FXRSTOR operate in a mode dependent fashion, hence these variants. - * Must be called with interrupts disabled. - */ - -Entry(fxsave64) - movl S_ARG0,%eax - ENTER_64BIT_MODE() - fxsave (%eax) - ENTER_COMPAT_MODE() - ret - -Entry(fxrstor64) - movl S_ARG0,%eax - ENTER_64BIT_MODE() - fxrstor (%rax) - ENTER_COMPAT_MODE() - ret - -Entry(xsave64o) - ENTER_64BIT_MODE() - .short 0xAE0F - /* MOD 0x4, ECX, 0x1 */ - .byte 0x21 - ENTER_COMPAT_MODE() - ret - -Entry(xrstor64o) - ENTER_64BIT_MODE() - .short 0xAE0F - /* MOD 0x5, ECX 0x1 */ - .byte 0x29 - ENTER_COMPAT_MODE() - ret - -#if CONFIG_VMX - -/* - * __vmxon -- Enter VMX Operation - * int __vmxon(addr64_t v); - */ -Entry(__vmxon) - FRAME - - ENTER_64BIT_MODE() - mov $(VMX_FAIL_INVALID), %ecx - mov $(VMX_FAIL_VALID), %edx - mov $(VMX_SUCCEED), %eax - vmxon 8(%rbp) /* physical addr passed on stack */ - cmovcl %ecx, %eax /* CF = 1, ZF = 0 */ - cmovzl %edx, %eax /* CF = 0, ZF = 1 */ - ENTER_COMPAT_MODE() - - EMARF - ret - -/* - * __vmxoff -- Leave VMX Operation - * int __vmxoff(void); - */ -Entry(__vmxoff) - FRAME - - ENTER_64BIT_MODE() - mov $(VMX_FAIL_INVALID), %ecx - mov $(VMX_FAIL_VALID), %edx - mov $(VMX_SUCCEED), %eax - vmxoff - cmovcl %ecx, %eax /* CF = 1, ZF = 0 */ - cmovzl %edx, %eax /* CF = 0, ZF = 1 */ - ENTER_COMPAT_MODE() - - EMARF - ret - -#endif /* CONFIG_VMX */ diff --git a/osfmk/i386/startup64.c b/osfmk/i386/startup64.c index b4f69f741..e0c687a2b 100644 --- a/osfmk/i386/startup64.c +++ b/osfmk/i386/startup64.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2006-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -65,135 +65,13 @@ #include -#ifdef __i386__ -void -cpu_IA32e_enable(cpu_data_t *cdp) -{ - assert(!ml_get_interrupts_enabled()); - - if (!cdp->cpu_is64bit || - (rdmsr64(MSR_IA32_EFER) & MSR_IA32_EFER_LMA) != 0) - return; - - postcode(CPU_IA32_ENABLE_ENTRY); - - /* - * The following steps are performed by inlines so that - * we can be assured we don't use the stack or any other - * non-identity mapped data while paging is turned off... - */ - /* Turn paging off */ - asm volatile( - "mov %%cr0, %%eax \n\t" - "andl %0, %%eax \n\t" - "mov %%eax, %%cr0 \n\t" - : - : "i" (~CR0_PG) - : "eax" ); - - /* Pop new top level phys pg addr into CR3 */ - asm volatile( - "mov %%eax, %%cr3 \n\t" - : - : "a" ((uint32_t) kernel64_cr3)); - - /* Turn on the 64-bit mode bit */ - asm volatile( - "rdmsr \n\t" - "orl %1, %%eax \n\t" - "wrmsr \n\t" - : - : "c" (MSR_IA32_EFER), "i" (MSR_IA32_EFER_LME) - : "eax", "edx"); - - /* Turn paging on again */ - asm volatile( - "mov %%cr0, %%eax \n\t" - "orl %0, %%eax \n\t" - "mov %%eax, %%cr0 \n\t" - : - : "i" (CR0_PG) - : "eax" ); - -#if ONLY_SAFE_FOR_LINDA_SERIAL - kprintf("cpu_IA32e_enable(%p)\n", cdp); -#endif - - if ((rdmsr64(MSR_IA32_EFER) & MSR_IA32_EFER_LMA) == 0) - panic("cpu_IA32e_enable() MSR_IA32_EFER_LMA not asserted"); - - cdp->cpu_kernel_cr3 = kernel64_cr3; - - postcode(CPU_IA32_ENABLE_EXIT); -} - -void -cpu_IA32e_disable(cpu_data_t *cdp) -{ - assert(!ml_get_interrupts_enabled()); - - postcode(CPU_IA32_DISABLE_ENTRY); - - if (!cdp->cpu_is64bit || - (rdmsr64(MSR_IA32_EFER) & MSR_IA32_EFER_LMA) == 0) - return; - - /* - * The following steps are performed by inlines so that - * we can be assured we don't use the stack or any other - * non-identity mapped data while paging is turned off... - */ - /* Turn paging off */ - asm volatile( - "mov %%cr0, %%eax \n\t" - "andl %0, %%eax \n\t" - "mov %%eax, %%cr0 \n\t" - : - : "i" (~CR0_PG) - : "eax" ); - - /* Pop legacy top level phys pg addr into CR3 */ - asm volatile( - "mov %%eax, %%cr3 \n\t" - : - : "a" ((uint32_t) lo_kernel_cr3)); - - /* Turn off the 64-bit mode bit */ - asm volatile( - "rdmsr \n\t" - "andl %1, %%eax \n\t" - "wrmsr \n\t" - : - : "c" (MSR_IA32_EFER), "i" (~MSR_IA32_EFER_LME) - : "eax", "edx"); - - /* Turn paging on again */ - asm volatile( - "mov %%cr0, %%eax \n\t" - "orl %0, %%eax \n\t" - "mov %%eax, %%cr0 \n\t" - : - : "i" (CR0_PG) - : "eax" ); - - kprintf("cpu_IA32e_disable(%p)\n", cdp); - - if ((rdmsr64(MSR_IA32_EFER) & MSR_IA32_EFER_LMA) != 0) - panic("cpu_IA32e_disable() MSR_IA32_EFER_LMA not cleared"); - - cdp->cpu_kernel_cr3 = 0ULL; - - postcode(CPU_IA32_DISABLE_EXIT); -} -#endif - #if DEBUG extern void dump_regs64(void); extern void dump_gdt(void *); extern void dump_ldt(void *); extern void dump_idt(void *); extern void dump_tss(void *); -extern void dump_frame32(x86_saved_state_compat32_t *scp); +extern void dump_frame32(x86_saved_state32_t *sp); extern void dump_frame64(x86_saved_state64_t *sp); extern void dump_frame(x86_saved_state_t *sp); @@ -201,7 +79,7 @@ void dump_frame(x86_saved_state_t *sp) { if (is_saved_state32(sp)) - dump_frame32((x86_saved_state_compat32_t *) sp); + dump_frame32(&sp->ss_32); else if (is_saved_state64(sp)) dump_frame64(&sp->ss_64); else @@ -209,44 +87,36 @@ dump_frame(x86_saved_state_t *sp) } void -dump_frame32(x86_saved_state_compat32_t *scp) +dump_frame32(x86_saved_state32_t *sp) { unsigned int i; - uint32_t *ip = (uint32_t *) scp; + uint32_t *ip = (uint32_t *) sp; - kprintf("dump_frame32(%p):\n", scp); + kprintf("dump_frame32(%p):\n", sp); for (i = 0; - i < sizeof(x86_saved_state_compat32_t)/sizeof(uint32_t); + i < sizeof(x86_saved_state32_t)/sizeof(uint32_t); i++, ip++) kprintf("%p: 0x%08x\n", ip, *ip); - kprintf("scp->isf64.err: 0x%016llx\n", scp->isf64.err); - kprintf("scp->isf64.rip: 0x%016llx\n", scp->isf64.rip); - kprintf("scp->isf64.cs: 0x%016llx\n", scp->isf64.cs); - kprintf("scp->isf64.rflags: 0x%016llx\n", scp->isf64.rflags); - kprintf("scp->isf64.rsp: 0x%016llx\n", scp->isf64.rsp); - kprintf("scp->isf64.ss: 0x%016llx\n", scp->isf64.ss); - - kprintf("scp->iss32.tag: 0x%08x\n", scp->iss32.tag); - kprintf("scp->iss32.state.gs: 0x%08x\n", scp->iss32.state.gs); - kprintf("scp->iss32.state.fs: 0x%08x\n", scp->iss32.state.fs); - kprintf("scp->iss32.state.es: 0x%08x\n", scp->iss32.state.es); - kprintf("scp->iss32.state.ds: 0x%08x\n", scp->iss32.state.ds); - kprintf("scp->iss32.state.edi: 0x%08x\n", scp->iss32.state.edi); - kprintf("scp->iss32.state.esi: 0x%08x\n", scp->iss32.state.esi); - kprintf("scp->iss32.state.ebp: 0x%08x\n", scp->iss32.state.ebp); - kprintf("scp->iss32.state.cr2: 0x%08x\n", scp->iss32.state.cr2); - kprintf("scp->iss32.state.ebx: 0x%08x\n", scp->iss32.state.ebx); - kprintf("scp->iss32.state.edx: 0x%08x\n", scp->iss32.state.edx); - kprintf("scp->iss32.state.ecx: 0x%08x\n", scp->iss32.state.ecx); - kprintf("scp->iss32.state.eax: 0x%08x\n", scp->iss32.state.eax); - kprintf("scp->iss32.state.trapno: 0x%08x\n", scp->iss32.state.eax); - kprintf("scp->iss32.state.eip: 0x%08x\n", scp->iss32.state.eip); - kprintf("scp->iss32.state.cs: 0x%08x\n", scp->iss32.state.cs); - kprintf("scp->iss32.state.efl: 0x%08x\n", scp->iss32.state.efl); - kprintf("scp->iss32.state.uesp: 0x%08x\n", scp->iss32.state.uesp); - kprintf("scp->iss32.state.ss: 0x%08x\n", scp->iss32.state.ss); + kprintf("sp->gs: 0x%08x\n", sp->gs); + kprintf("sp->fs: 0x%08x\n", sp->fs); + kprintf("sp->es: 0x%08x\n", sp->es); + kprintf("sp->ds: 0x%08x\n", sp->ds); + kprintf("sp->edi: 0x%08x\n", sp->edi); + kprintf("sp->esi: 0x%08x\n", sp->esi); + kprintf("sp->ebp: 0x%08x\n", sp->ebp); + kprintf("sp->cr2: 0x%08x\n", sp->cr2); + kprintf("sp->ebx: 0x%08x\n", sp->ebx); + kprintf("sp->edx: 0x%08x\n", sp->edx); + kprintf("sp->ecx: 0x%08x\n", sp->ecx); + kprintf("sp->eax: 0x%08x\n", sp->eax); + kprintf("sp->trapno: 0x%08x\n", sp->eax); + kprintf("sp->eip: 0x%08x\n", sp->eip); + kprintf("sp->cs: 0x%08x\n", sp->cs); + kprintf("sp->efl: 0x%08x\n", sp->efl); + kprintf("sp->uesp: 0x%08x\n", sp->uesp); + kprintf("sp->ss: 0x%08x\n", sp->ss); postcode(0x99); } @@ -351,7 +221,6 @@ dump_tss(void *tssp) } } -#if defined(__x86_64__) void dump_regs64(void) { @@ -394,5 +263,4 @@ void dump_regs64(void) KPRINT_REG(r13); KPRINT_REG(r14); } -#endif /* __x86_64__ */ #endif /* DEBUG */ diff --git a/osfmk/i386/thread.h b/osfmk/i386/thread.h index bbccc7832..91d684b8d 100644 --- a/osfmk/i386/thread.h +++ b/osfmk/i386/thread.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -89,21 +89,6 @@ * as saved in a context-switch. It lives at the base of the stack. */ -#ifdef __i386__ -struct x86_kernel_state { - uint32_t k_ebx; /* kernel context */ - uint32_t k_esp; - uint32_t k_ebp; - uint32_t k_edi; - uint32_t k_esi; - uint32_t k_eip; - /* - * Kernel stacks are 16-byte aligned with x86_kernel_state at the top, - * so we need a couple of dummy 32-bit words here. - */ - uint32_t dummy[2]; -}; -#else struct x86_kernel_state { uint64_t k_rbx; /* kernel context */ uint64_t k_rsp; @@ -114,12 +99,11 @@ struct x86_kernel_state { uint64_t k_r15; uint64_t k_rip; }; -#endif /* * Maps state flavor to number of words in the state: */ -__private_extern__ unsigned int _MachineStateCount[]; +extern unsigned int _MachineStateCount[]; /* * The machine-dependent thread state - registers and all platform-dependent @@ -128,14 +112,13 @@ __private_extern__ unsigned int _MachineStateCount[]; * as the PCB. */ struct machine_thread { - void *sf; x86_saved_state_t *iss; void *ifps; void *ids; decl_simple_lock_data(,lock); /* protects ifps and ids */ uint64_t iss_pte0; uint64_t iss_pte1; - uint32_t arg_store_valid; + #ifdef MACH_BSD uint64_t cthread_self; /* for use of cthread package */ struct real_descriptor cthread_desc; @@ -144,7 +127,6 @@ struct machine_thread { #endif struct pal_pcb pal_pcb; - uint32_t specFlags; #define OnProc 0x1 #define CopyIOActive 0x2 /* Checked to ensure DTrace actions do not re-enter copyio(). */ diff --git a/osfmk/i386/trap.c b/osfmk/i386/trap.c index c6f921c6f..e15f40b05 100644 --- a/osfmk/i386/trap.c +++ b/osfmk/i386/trap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -89,7 +89,9 @@ #include #include #include - +#if CONFIG_TELEMETRY +#include +#endif #include #include @@ -113,14 +115,8 @@ extern void kprint_state(x86_saved_state64_t *saved_state); * Forward declarations */ static void user_page_fault_continue(kern_return_t kret); -#ifdef __i386__ -static void panic_trap(x86_saved_state32_t *saved_state); -static void set_recovery_ip(x86_saved_state32_t *saved_state, vm_offset_t ip); -extern void panic_64(x86_saved_state_t *, int, const char *, boolean_t); -#else static void panic_trap(x86_saved_state64_t *saved_state); static void set_recovery_ip(x86_saved_state64_t *saved_state, vm_offset_t ip); -#endif volatile perfCallback perfTrapHook = NULL; /* Pointer to CHUD trap hook routine */ @@ -193,7 +189,7 @@ thread_syscall_return( ret); #endif } - throttle_lowpri_io(TRUE); + throttle_lowpri_io(1); thread_exception_return(); /*NOTREACHED*/ @@ -335,6 +331,9 @@ void interrupt_populate_latency_stats(char *buf, unsigned bufsize) { snprintf(buf, bufsize, "0x%x 0x%x 0x%llx", tcpu, cpu_data_ptr[tcpu]->cpu_max_observed_int_latency_vector, cpu_data_ptr[tcpu]->cpu_max_observed_int_latency); } +uint32_t interrupt_timer_coalescing_enabled = 1; +uint64_t interrupt_coalesced_timers; + /* * Handle interrupts: * - local APIC interrupts (IPIs, timers, etc) are handled by the kernel, @@ -349,6 +348,7 @@ interrupt(x86_saved_state_t *state) boolean_t user_mode = FALSE; int ipl; int cnum = cpu_number(); + cpu_data_t *cdp = cpu_data_ptr[cnum]; int itype = 0; if (is_saved_state64(state) == TRUE) { @@ -391,6 +391,17 @@ interrupt(x86_saved_state_t *state) SCHED_STATS_INTERRUPT(current_processor()); +#if CONFIG_TELEMETRY + if (telemetry_needs_record + && (current_task() != kernel_task) +#if CONFIG_SCHED_IDLE_IN_PLACE + && ((current_thread()->state & TH_IDLE) == 0) /* idle-in-place should be treated like the idle thread */ +#endif + ) { + telemetry_mark_curthread(user_mode); + } +#endif + ipl = get_preemption_level(); /* @@ -405,21 +416,40 @@ interrupt(x86_saved_state_t *state) } - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, - MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_END, - interrupt_num, 0, 0, 0, 0); - - if (cpu_data_ptr[cnum]->cpu_nested_istack) { - cpu_data_ptr[cnum]->cpu_nested_istack_events++; + if (__improbable(cdp->cpu_nested_istack)) { + cdp->cpu_nested_istack_events++; } else { - uint64_t int_latency = mach_absolute_time() - cpu_data_ptr[cnum]->cpu_int_event_time; - if (ilat_assert && (int_latency > interrupt_latency_cap) && !machine_timeout_suspended()) { - panic("Interrupt vector 0x%x exceeded interrupt latency threshold, 0x%llx absolute time delta, prior signals: 0x%x, current signals: 0x%x", interrupt_num, int_latency, cpu_data_ptr[cnum]->cpu_prior_signals, cpu_data_ptr[cnum]->cpu_signals); + uint64_t ctime = mach_absolute_time(); + uint64_t int_latency = ctime - cdp->cpu_int_event_time; + uint64_t esdeadline, ehdeadline; + /* Attempt to process deferred timers in the context of + * this interrupt, unless interrupt time has already exceeded + * TCOAL_ILAT_THRESHOLD. + */ +#define TCOAL_ILAT_THRESHOLD (30000ULL) + + if ((int_latency < TCOAL_ILAT_THRESHOLD) && + interrupt_timer_coalescing_enabled) { + esdeadline = cdp->rtclock_timer.queue.earliest_soft_deadline; + ehdeadline = cdp->rtclock_timer.deadline; + if ((ctime >= esdeadline) && (ctime < ehdeadline)) { + interrupt_coalesced_timers++; + TCOAL_DEBUG(0x88880000 | DBG_FUNC_START, ctime, esdeadline, ehdeadline, interrupt_coalesced_timers, 0); + rtclock_intr(state); + TCOAL_DEBUG(0x88880000 | DBG_FUNC_END, ctime, esdeadline, interrupt_coalesced_timers, 0, 0); + } else { + TCOAL_DEBUG(0x77770000, ctime, cdp->rtclock_timer.queue.earliest_soft_deadline, cdp->rtclock_timer.deadline, interrupt_coalesced_timers, 0); + } } - if (int_latency > cpu_data_ptr[cnum]->cpu_max_observed_int_latency) { - cpu_data_ptr[cnum]->cpu_max_observed_int_latency = int_latency; - cpu_data_ptr[cnum]->cpu_max_observed_int_latency_vector = interrupt_num; + + if (__improbable(ilat_assert && (int_latency > interrupt_latency_cap) && !machine_timeout_suspended())) { + panic("Interrupt vector 0x%x exceeded interrupt latency threshold, 0x%llx absolute time delta, prior signals: 0x%x, current signals: 0x%x", interrupt_num, int_latency, cdp->cpu_prior_signals, cdp->cpu_signals); + } + + if (__improbable(int_latency > cdp->cpu_max_observed_int_latency)) { + cdp->cpu_max_observed_int_latency = int_latency; + cdp->cpu_max_observed_int_latency_vector = interrupt_num; } } @@ -427,17 +457,22 @@ interrupt(x86_saved_state_t *state) * Having serviced the interrupt first, look at the interrupted stack depth. */ if (!user_mode) { - uint64_t depth = cpu_data_ptr[cnum]->cpu_kernel_stack + uint64_t depth = cdp->cpu_kernel_stack + sizeof(struct x86_kernel_state) + sizeof(struct i386_exception_link *) - rsp; - if (depth > kernel_stack_depth_max) { + if (__improbable(depth > kernel_stack_depth_max)) { kernel_stack_depth_max = (vm_offset_t)depth; KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_DEPTH), (long) depth, (long) VM_KERNEL_UNSLIDE(rip), 0, 0, 0); } } + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + MACHDBG_CODE(DBG_MACH_EXCP_INTR, 0) | DBG_FUNC_END, + interrupt_num, 0, 0, 0, 0); + } static inline void @@ -463,11 +498,7 @@ kernel_trap( x86_saved_state_t *state, uintptr_t *lo_spp) { -#ifdef __i386__ - x86_saved_state32_t *saved_state; -#else x86_saved_state64_t *saved_state; -#endif int code; user_addr_t vaddr; int type; @@ -486,22 +517,6 @@ kernel_trap( thread = current_thread(); -#ifdef __i386__ - if (__improbable(is_saved_state64(state))) { - panic_64(state, 0, "Kernel trap with 64-bit state", FALSE); - } - - saved_state = saved_state32(state); - - /* Record cpu where state was captured (trampolines don't set this) */ - saved_state->cpu = cpu_number(); - - vaddr = (user_addr_t)saved_state->cr2; - type = saved_state->trapno; - code = saved_state->err & 0xffff; - intr = (saved_state->efl & EFL_IF) != 0; /* state of ints at trap */ - kern_ip = (vm_offset_t)saved_state->eip; -#else if (__improbable(is_saved_state32(state))) panic("kernel_trap(%p) with 32-bit state", state); saved_state = saved_state64(state); @@ -514,7 +529,6 @@ kernel_trap( code = (int)(saved_state->isf.err & 0xffff); intr = (saved_state->isf.rflags & EFL_IF) != 0; /* state of ints at trap */ kern_ip = (vm_offset_t)saved_state->isf.rip; -#endif myast = ast_pending(); @@ -659,11 +673,7 @@ kernel_trap( fpSSEexterrflt(); return; case T_DEBUG: -#ifdef __i386__ - if ((saved_state->efl & EFL_TF) == 0 && NO_WATCHPOINTS) -#else if ((saved_state->isf.rflags & EFL_TF) == 0 && NO_WATCHPOINTS) -#endif { /* We've somehow encountered a debug * register match that does not belong @@ -703,7 +713,8 @@ kernel_trap( #endif result = vm_fault(map, - vm_map_trunc_page(vaddr), + vm_map_trunc_page(vaddr, + PAGE_MASK), prot, FALSE, THREAD_UNINT, NULL, 0); @@ -783,62 +794,13 @@ debugger_entry: } -#ifdef __i386__ -static void -set_recovery_ip(x86_saved_state32_t *saved_state, vm_offset_t ip) -{ - saved_state->eip = ip; -} -#else static void set_recovery_ip(x86_saved_state64_t *saved_state, vm_offset_t ip) { saved_state->isf.rip = ip; } -#endif -#ifdef __i386__ -static void -panic_trap(x86_saved_state32_t *regs) -{ - const char *trapname = "Unknown"; - pal_cr_t cr0, cr2, cr3, cr4; - - pal_get_control_registers( &cr0, &cr2, &cr3, &cr4 ); - - /* - * Issue an I/O port read if one has been requested - this is an - * event logic analyzers can use as a trigger point. - */ - panic_io_port_read(); - - kprintf("panic trap number 0x%x, eip 0x%x\n", regs->trapno, regs->eip); - kprintf("cr0 0x%08x cr2 0x%08x cr3 0x%08x cr4 0x%08x\n", - cr0, cr2, cr3, cr4); - - if (regs->trapno < TRAP_TYPES) - trapname = trap_type[regs->trapno]; -#undef panic - panic("Kernel trap at 0x%08x, type %d=%s, registers:\n" - "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" - "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n" - "CR2: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n" - "EFL: 0x%08x, EIP: 0x%08x, CS: 0x%08x, DS: 0x%08x\n" - "Error code: 0x%08x%s\n", - regs->eip, regs->trapno, trapname, cr0, cr2, cr3, cr4, - regs->eax,regs->ebx,regs->ecx,regs->edx, - regs->cr2,regs->ebp,regs->esi,regs->edi, - regs->efl,regs->eip,regs->cs & 0xFFFF, regs->ds & 0xFFFF, regs->err, - virtualized ? " VMM" : ""); - /* - * This next statement is not executed, - * but it's needed to stop the compiler using tail call optimization - * for the panic call - which confuses the subsequent backtrace. - */ - cr0 = 0; -} -#else static void @@ -900,7 +862,6 @@ panic_trap(x86_saved_state64_t *regs) */ cr0 = 0; } -#endif #if CONFIG_DTRACE extern kern_return_t dtrace_user_probe(x86_saved_state_t *); @@ -1110,7 +1071,7 @@ user_trap( case T_PAGE_FAULT: { - prot = VM_PROT_READ; + prot = VM_PROT_READ; if (err & T_PF_WRITE) prot |= VM_PROT_WRITE; @@ -1118,13 +1079,15 @@ user_trap( if (__improbable(err & T_PF_EXECUTE)) prot |= VM_PROT_EXECUTE; #endif - kret = vm_fault(thread->map, vm_map_trunc_page(vaddr), - prot, FALSE, - THREAD_ABORTSAFE, NULL, 0); + kret = vm_fault(thread->map, + vm_map_trunc_page(vaddr, + PAGE_MASK), + prot, FALSE, + THREAD_ABORTSAFE, NULL, 0); if (__probable((kret == KERN_SUCCESS) || (kret == KERN_ABORTED))) { thread_exception_return(); - /* NOTREACHED */ + /*NOTREACHED*/ } user_page_fault_continue(kret); @@ -1232,27 +1195,11 @@ sync_iss_to_iks(x86_saved_state_t *saved_state) pal_get_kern_regs( saved_state ); if ((kstack = current_thread()->kernel_stack) != 0) { -#ifdef __i386__ - x86_saved_state32_t *regs = saved_state32(saved_state); -#else x86_saved_state64_t *regs = saved_state64(saved_state); -#endif iks = STACK_IKS(kstack); /* Did we take the trap/interrupt in kernel mode? */ -#ifdef __i386__ - if (regs == USER_REGS32(current_thread())) - record_active_regs = TRUE; - else { - iks->k_ebx = regs->ebx; - iks->k_esp = (int)regs; - iks->k_ebp = regs->ebp; - iks->k_edi = regs->edi; - iks->k_esi = regs->esi; - iks->k_eip = regs->eip; - } -#else if (regs == USER_REGS64(current_thread())) record_active_regs = TRUE; else { @@ -1265,20 +1212,9 @@ sync_iss_to_iks(x86_saved_state_t *saved_state) iks->k_r15 = regs->r15; iks->k_rip = regs->isf.rip; } -#endif } if (record_active_regs == TRUE) { -#ifdef __i386__ - /* Show the trap handler path */ - __asm__ volatile("movl %%ebx, %0" : "=m" (iks->k_ebx)); - __asm__ volatile("movl %%esp, %0" : "=m" (iks->k_esp)); - __asm__ volatile("movl %%ebp, %0" : "=m" (iks->k_ebp)); - __asm__ volatile("movl %%edi, %0" : "=m" (iks->k_edi)); - __asm__ volatile("movl %%esi, %0" : "=m" (iks->k_esi)); - /* "Current" instruction pointer */ - __asm__ volatile("movl $1f, %0\n1:" : "=m" (iks->k_eip)); -#else /* Show the trap handler path */ __asm__ volatile("movq %%rbx, %0" : "=m" (iks->k_rbx)); __asm__ volatile("movq %%rsp, %0" : "=m" (iks->k_rsp)); @@ -1292,7 +1228,6 @@ sync_iss_to_iks(x86_saved_state_t *saved_state) : "=m" (iks->k_rip) : : "rax"); -#endif } } @@ -1309,16 +1244,6 @@ sync_iss_to_iks_unconditionally(__unused x86_saved_state_t *saved_state) { if ((kstack = current_thread()->kernel_stack) != 0) { iks = STACK_IKS(kstack); -#ifdef __i386__ - /* Display the trap handler path */ - __asm__ volatile("movl %%ebx, %0" : "=m" (iks->k_ebx)); - __asm__ volatile("movl %%esp, %0" : "=m" (iks->k_esp)); - __asm__ volatile("movl %%ebp, %0" : "=m" (iks->k_ebp)); - __asm__ volatile("movl %%edi, %0" : "=m" (iks->k_edi)); - __asm__ volatile("movl %%esi, %0" : "=m" (iks->k_esi)); - /* "Current" instruction pointer */ - __asm__ volatile("movl $1f, %0\n1:" : "=m" (iks->k_eip)); -#else /* Display the trap handler path */ __asm__ volatile("movq %%rbx, %0" : "=m" (iks->k_rbx)); __asm__ volatile("movq %%rsp, %0" : "=m" (iks->k_rsp)); @@ -1329,6 +1254,5 @@ sync_iss_to_iks_unconditionally(__unused x86_saved_state_t *saved_state) { __asm__ volatile("movq %%r15, %0" : "=m" (iks->k_r15)); /* "Current" instruction pointer */ __asm__ volatile("leaq 1f(%%rip), %%rax; mov %%rax, %0\n1:" : "=m" (iks->k_rip)::"rax"); -#endif } } diff --git a/osfmk/i386/trap.h b/osfmk/i386/trap.h index f24141112..d5c14fe4f 100644 --- a/osfmk/i386/trap.h +++ b/osfmk/i386/trap.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -134,10 +134,6 @@ extern void user_trap(x86_saved_state_t *regs); extern void interrupt(x86_saved_state_t *regs); -#ifdef __i386__ -extern void panic_double_fault32(int code); -extern void panic_machine_check32(int code); -#endif extern void panic_double_fault64(x86_saved_state_t *regs); extern void panic_machine_check64(x86_saved_state_t *regs); @@ -160,11 +156,7 @@ extern void panic_i386_backtrace(void *, int, const char *, boolean_t, x86_save #if MACH_KDP extern boolean_t kdp_i386_trap( unsigned int, -#ifdef __i386__ - x86_saved_state32_t *, -#else x86_saved_state64_t *, -#endif kern_return_t, vm_offset_t); #endif /* MACH_KDP */ diff --git a/osfmk/i386/trap_native.c b/osfmk/i386/trap_native.c index 8c5ea3350..23bda004e 100644 --- a/osfmk/i386/trap_native.c +++ b/osfmk/i386/trap_native.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009 Apple Inc. All rights reserved. + * Copyright (c) 2009-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -119,79 +119,6 @@ panic_idt64(x86_saved_state_t *rsp) } #endif -#ifdef __i386__ -static void -panic_32(__unused int code, __unused int pc, __unused const char *msg, boolean_t do_mca_dump, boolean_t do_bt) -{ - struct i386_tss *my_ktss = current_ktss(); - - /* Set postcode (DEBUG only) */ - postcode(pc); - - /* - * Issue an I/O port read if one has been requested - this is an - * event logic analyzers can use as a trigger point. - */ - panic_io_port_read(); - - /* - * Break kprintf lock in case of recursion, - * and record originally faulted instruction address. - */ - kprintf_break_lock(); - - if (do_mca_dump) { -#if CONFIG_MCA - /* - * Dump the contents of the machine check MSRs (if any). - */ - mca_dump(); -#endif - } - -#if MACH_KDP - /* - * Print backtrace leading to first fault: - */ - if (do_bt) - panic_i386_backtrace((void *) my_ktss->ebp, 10, NULL, FALSE, NULL); -#endif - - panic("%s at 0x%08x, code:0x%x, " - "registers:\n" - "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" - "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n" - "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n" - "EFL: 0x%08x, EIP: 0x%08x%s\n", - msg, - my_ktss->eip, code, - (uint32_t)get_cr0(), (uint32_t)get_cr2(), (uint32_t)get_cr3(), (uint32_t)get_cr4(), - my_ktss->eax, my_ktss->ebx, my_ktss->ecx, my_ktss->edx, - my_ktss->esp, my_ktss->ebp, my_ktss->esi, my_ktss->edi, - my_ktss->eflags, my_ktss->eip, virtualized ? " VMM" : ""); -} - -/* - * Called from locore on a special reserved stack after a double-fault - * is taken in kernel space. - * Kernel stack overflow is one route here. - */ -void -panic_double_fault32(int code) -{ - (void)OSCompareAndSwap((UInt32) -1, (UInt32) cpu_number(), (volatile UInt32 *)&panic_double_fault_cpu); - panic_32(code, PANIC_DOUBLE_FAULT, "Double fault", FALSE, TRUE); -} - -/* - * Called from locore on a special reserved stack after a machine-check - */ -void -panic_machine_check32(int code) -{ - panic_32(code, PANIC_MACHINE_CHECK, "Machine-check", TRUE, FALSE); -} -#endif /* __i386__ */ void panic_64(x86_saved_state_t *sp, __unused int pc, __unused const char *msg, boolean_t do_mca_dump) @@ -221,45 +148,6 @@ panic_64(x86_saved_state_t *sp, __unused int pc, __unused const char *msg, boole #endif } -#ifdef __i386__ - /* - * Dump the interrupt stack frame at last kernel entry. - */ - if (is_saved_state64(sp)) { - x86_saved_state64_t *ss64p = saved_state64(sp); - panic("%s trapno:0x%x, err:0x%qx, " - "registers:\n" - "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" - "RAX: 0x%016qx, RBX: 0x%016qx, RCX: 0x%016qx, RDX: 0x%016qx\n" - "RSP: 0x%016qx, RBP: 0x%016qx, RSI: 0x%016qx, RDI: 0x%016qx\n" - "R8: 0x%016qx, R9: 0x%016qx, R10: 0x%016qx, R11: 0x%016qx\n" - "R12: 0x%016qx, R13: 0x%016qx, R14: 0x%016qx, R15: 0x%016qx\n" - "RFL: 0x%016qx, RIP: 0x%016qx, CR2: 0x%016qx%s\n", - msg, - ss64p->isf.trapno, ss64p->isf.err, - (uint32_t)get_cr0(), (uint32_t)get_cr2(), (uint32_t)get_cr3(), (uint32_t)get_cr4(), - ss64p->rax, ss64p->rbx, ss64p->rcx, ss64p->rdx, - ss64p->isf.rsp, ss64p->rbp, ss64p->rsi, ss64p->rdi, - ss64p->r8, ss64p->r9, ss64p->r10, ss64p->r11, - ss64p->r12, ss64p->r13, ss64p->r14, ss64p->r15, - ss64p->isf.rflags, ss64p->isf.rip, ss64p->cr2, - virtualized ? " VMM" : ""); - } else { - x86_saved_state32_t *ss32p = saved_state32(sp); - panic("%s at 0x%08x, trapno:0x%x, err:0x%x," - "registers:\n" - "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" - "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n" - "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n" - "EFL: 0x%08x, EIP: 0x%08x%s\n", - msg, - ss32p->eip, ss32p->trapno, ss32p->err, - (uint32_t)get_cr0(), (uint32_t)get_cr2(), (uint32_t)get_cr3(), (uint32_t)get_cr4(), - ss32p->eax, ss32p->ebx, ss32p->ecx, ss32p->edx, - ss32p->uesp, ss32p->ebp, ss32p->esi, ss32p->edi, - ss32p->efl, ss32p->eip, virtualized ? " VMM" : ""); - } -#else x86_saved_state64_t *regs = saved_state64(sp); panic("%s at 0x%016llx, registers:\n" "CR0: 0x%016lx, CR2: 0x%016lx, CR3: 0x%016lx, CR4: 0x%016lx\n" @@ -278,7 +166,6 @@ panic_64(x86_saved_state_t *sp, __unused int pc, __unused const char *msg, boole regs->r12, regs->r13, regs->r14, regs->r15, regs->isf.rflags, regs->isf.rip, regs->isf.cs & 0xFFFF, regs->isf.ss & 0xFFFF, regs->isf.err, virtualized ? " VMM" : ""); -#endif } void diff --git a/osfmk/i386/ucode.c b/osfmk/i386/ucode.c index e416cc5ff..7f057ed03 100644 --- a/osfmk/i386/ucode.c +++ b/osfmk/i386/ucode.c @@ -12,6 +12,7 @@ #include #include // mp_broadcast #include // cpu_number +#include // boot-args #define IA32_BIOS_UPDT_TRIG (0x79) /* microcode update trigger MSR */ @@ -150,10 +151,6 @@ cpu_update(__unused void *arg) /* execute the update */ update_microcode(); - /* if CPU #0, update global CPU information */ - if (!cpu_number()) - cpuid_set_info(); - /* release the lock */ lck_spin_unlock(ucode_slock); } @@ -167,6 +164,10 @@ xcpu_update(void) /* Get all CPUs to perform the update */ mp_broadcast(cpu_update, NULL); + + /* Update the cpuid info */ + cpuid_set_info(); + } /* @@ -177,6 +178,12 @@ int ucode_interface(uint64_t addr) { int error; + char arg[16]; + + if (PE_parse_boot_argn("-x", arg, sizeof (arg))) { + printf("ucode: no updates in safe mode\n"); + return EPERM; + } #if !DEBUG /* diff --git a/osfmk/i386/vmx/vmx_cpu.c b/osfmk/i386/vmx/vmx_cpu.c index 76b4c0ee3..27c1b3fc9 100644 --- a/osfmk/i386/vmx/vmx_cpu.c +++ b/osfmk/i386/vmx/vmx_cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2006-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -211,11 +211,6 @@ vmx_on(void *arg __unused) assert(vmx_is_cr0_valid(&cpu->specs)); assert(vmx_is_cr4_valid(&cpu->specs)); -#if defined(__i386__) - if (!cpu_mode_is64bit()) - result = VMX_FAIL_INVALID; /* Not supported in legacy mode */ - else -#endif result = __vmxon(vmxon_region_paddr); if (result != VMX_SUCCEED) { @@ -233,16 +228,13 @@ vmx_off(void *arg __unused) int result; /* Tell the CPU to release the VMXON region */ -#if defined(__i386__) - if (!cpu_mode_is64bit()) - result = VMX_FAIL_INVALID; /* Not supported in legacy mode */ - else -#endif result = __vmxoff(); if (result != VMX_SUCCEED) { panic("vmx_off: unexpected return %d from __vmxoff()", result); } + + set_cr4(get_cr4() & ~CR4_VMXE); } /* ----------------------------------------------------------------------------- diff --git a/osfmk/ipc/ipc_entry.c b/osfmk/ipc/ipc_entry.c index e14c8d5e9..80eb3d662 100644 --- a/osfmk/ipc/ipc_entry.c +++ b/osfmk/ipc/ipc_entry.c @@ -329,7 +329,7 @@ ipc_entry_alloc_name( * we must restart. */ kern_return_t kr; - kr = ipc_entry_grow_table(space, index); + kr = ipc_entry_grow_table(space, index + 1); assert(kr != KERN_NO_SPACE); if (kr != KERN_SUCCESS) { /* space is unlocked */ diff --git a/osfmk/ipc/ipc_hash.c b/osfmk/ipc/ipc_hash.c index 87f4fc6c3..491111b51 100644 --- a/osfmk/ipc/ipc_hash.c +++ b/osfmk/ipc/ipc_hash.c @@ -207,7 +207,9 @@ ipc_hash_table_lookup( { mach_port_index_t hindex, index; - assert(obj != IO_NULL); + if (obj == IO_NULL) { + return FALSE; + } hindex = IH_TABLE_HASH(obj, size); diff --git a/osfmk/ipc/ipc_init.c b/osfmk/ipc/ipc_init.c index 11dd9dafc..879748a89 100644 --- a/osfmk/ipc/ipc_init.c +++ b/osfmk/ipc/ipc_init.c @@ -219,7 +219,6 @@ ipc_bootstrap(void) ipc_table_init(); semaphore_init(); - lock_set_init(); mk_timer_init(); host_notify_init(); } diff --git a/osfmk/ipc/ipc_init.h b/osfmk/ipc/ipc_init.h index 36fd8976b..30e916a28 100644 --- a/osfmk/ipc/ipc_init.h +++ b/osfmk/ipc/ipc_init.h @@ -121,9 +121,9 @@ extern int ipc_pset_max; */ /* IPC initialization needed before creation of kernel task */ -extern void ipc_bootstrap(void) __attribute__((section("__TEXT, initcode"))); +extern void ipc_bootstrap(void); /* Remaining IPC initialization */ -extern void ipc_init(void) __attribute__((section("__TEXT, initcode"))); +extern void ipc_init(void); #endif /* _IPC_IPC_INIT_H_ */ diff --git a/osfmk/ipc/ipc_kmsg.c b/osfmk/ipc/ipc_kmsg.c index f45d1deab..70ae8f7ef 100644 --- a/osfmk/ipc/ipc_kmsg.c +++ b/osfmk/ipc/ipc_kmsg.c @@ -70,7 +70,6 @@ * Operations on kernel messages. */ -#include #include #include @@ -126,6 +125,9 @@ #define DEBUG_MSGS_K64 1 #endif +#include +#include + #pragma pack(4) typedef struct @@ -1351,6 +1353,8 @@ ipc_kmsg_get_from_kernel( * MACH_SEND_INTERRUPTED Caller still has message. * MACH_SEND_INVALID_DEST Caller still has message. */ + + mach_msg_return_t ipc_kmsg_send( ipc_kmsg_t kmsg, @@ -1361,9 +1365,22 @@ ipc_kmsg_send( mach_msg_return_t error = MACH_MSG_SUCCESS; spl_t s; +#if IMPORTANCE_INHERITANCE + boolean_t did_importance = FALSE; +#if IMPORTANCE_DEBUG + mach_msg_id_t imp_msgh_id = -1; + int sender_pid = -1; +#endif /* IMPORTANCE_DEBUG */ +#endif /* IMPORTANCE_INHERITANCE */ + + /* don't allow the creation of a circular loop */ + if (kmsg->ikm_header->msgh_bits & MACH_MSGH_BITS_CIRCULAR) { + ipc_kmsg_destroy(kmsg); + return MACH_MSG_SUCCESS; + } + port = (ipc_port_t) kmsg->ikm_header->msgh_remote_port; assert(IP_VALID(port)); - ip_lock(port); if (port->ip_receiver == ipc_space_kernel) { @@ -1393,6 +1410,10 @@ ipc_kmsg_send( /* fall thru with reply - same options */ } +#if IMPORTANCE_INHERITANCE + retry: +#endif /* IMPORTANCE_INHERITANCE */ + /* * Can't deliver to a dead port. * However, we can pretend it got sent @@ -1412,14 +1433,39 @@ ipc_kmsg_send( return MACH_MSG_SUCCESS; } - if (kmsg->ikm_header->msgh_bits & MACH_MSGH_BITS_CIRCULAR) { - ip_unlock(port); - - /* don't allow the creation of a circular loop */ +#if IMPORTANCE_INHERITANCE + /* + * Need to see if this message needs importance donation and/or + * propagation. That routine can drop the port lock. If it does + * we'll have to revalidate the destination. + */ + if ((did_importance == FALSE) && + (port->ip_impdonation != 0) && + ((option & MACH_SEND_NOIMPORTANCE) == 0) && + (((option & MACH_SEND_IMPORTANCE) != 0) || + (task_is_importance_donor(current_task())))) { + + did_importance = TRUE; + kmsg->ikm_header->msgh_bits |= MACH_MSGH_BITS_RAISEIMP; + +#if IMPORTANCE_DEBUG + if (kdebug_enable) { + mach_msg_max_trailer_t *dbgtrailer = (mach_msg_max_trailer_t *) + ((vm_offset_t)kmsg->ikm_header + round_msg(kmsg->ikm_header->msgh_size)); + sender_pid = dbgtrailer->msgh_audit.val[5]; + imp_msgh_id = kmsg->ikm_header->msgh_id; + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_MSG, IMP_MSG_SEND)) | DBG_FUNC_START, + audit_token_pid_from_task(current_task()), sender_pid, imp_msgh_id, 0, 0); + } +#endif /* IMPORTANCE_DEBUG */ - ipc_kmsg_destroy(kmsg); - return MACH_MSG_SUCCESS; + if (ipc_port_importance_delta(port, 1) == TRUE) { + ip_lock(port); + goto retry; + } } +#endif /* IMPORTANCE_INHERITANCE */ /* * We have a valid message and a valid reference on the port. @@ -1429,9 +1475,59 @@ ipc_kmsg_send( s = splsched(); imq_lock(&port->ip_messages); ip_unlock(port); + error = ipc_mqueue_send(&port->ip_messages, kmsg, option, send_timeout, s); +#if IMPORTANCE_INHERITANCE + if (did_importance == TRUE) { + __unused int importance_cleared = 0; + switch (error) { + case MACH_SEND_TIMED_OUT: + case MACH_SEND_NO_BUFFER: + case MACH_SEND_INTERRUPTED: + /* + * We still have the kmsg and its + * reference on the port. But we + * have to back out the importance + * boost. + * + * The port could have changed hands, + * be inflight to another destination, + * etc... But in those cases our + * back-out will find the new owner + * (and all the operations that + * transferred the right should have + * applied their own boost adjustments + * to the old owner(s)). + */ + importance_cleared = 1; + ip_lock(port); + if (ipc_port_importance_delta(port, -1) == FALSE) + ip_unlock(port); + break; + + case MACH_SEND_INVALID_DEST: + /* + * In the case that the receive right has + * gone away, the assertion count for the + * message we were trying to enqueue was + * already subtracted from the destination + * task (as part of port destruction). + */ + break; + + case MACH_MSG_SUCCESS: + default: + break; + } +#if IMPORTANCE_DEBUG + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_MSG, IMP_MSG_SEND)) | DBG_FUNC_END, + audit_token_pid_from_task(current_task()), sender_pid, imp_msgh_id, importance_cleared, 0); +#endif /* IMPORTANCE_DEBUG */ + } +#endif /* IMPORTANCE_INHERITANCE */ + /* * If the port has been destroyed while we wait, treat the message * as a successful delivery (like we do for an inactive port). @@ -1574,7 +1670,7 @@ mach_msg_return_t ipc_kmsg_copyin_header( mach_msg_header_t *msg, ipc_space_t space, - boolean_t notify) + mach_msg_option_t *optionp) { mach_msg_bits_t mbits = msg->msgh_bits & MACH_MSGH_BITS_USER; mach_port_name_t dest_name = CAST_MACH_PORT_TO_NAME(msg->msgh_remote_port); @@ -1588,6 +1684,11 @@ ipc_kmsg_copyin_header( ipc_port_t dest_soright, reply_soright; ipc_port_t release_port = IP_NULL; +#if IMPORTANCE_INHERITANCE + int assertcnt = 0; + boolean_t needboost = FALSE; +#endif /* IMPORTANCE_INHERITANCE */ + queue_head_t links_data; queue_t links = &links_data; wait_queue_link_t wql; @@ -1679,11 +1780,23 @@ ipc_kmsg_copyin_header( (dest_type == MACH_MSG_TYPE_MAKE_SEND_ONCE) || (reply_type == MACH_MSG_TYPE_MAKE_SEND) || (reply_type == MACH_MSG_TYPE_MAKE_SEND_ONCE)) { + +#if IMPORTANCE_INHERITANCE kr = ipc_right_copyin(space, name, dest_entry, dest_type, FALSE, &dest_port, &dest_soright, &release_port, + &assertcnt, links); + assert(assertcnt == 0); +#else + kr = ipc_right_copyin(space, name, dest_entry, + dest_type, FALSE, + &dest_port, &dest_soright, + &release_port, + links); +#endif /* IMPORTANCE_INHERITANCE */ + if (kr != KERN_SUCCESS) goto invalid_dest; @@ -1700,11 +1813,21 @@ ipc_kmsg_copyin_header( assert(IO_VALID(dest_port)); assert(dest_soright == IP_NULL); +#if IMPORTANCE_INHERITANCE + kr = ipc_right_copyin(space, name, reply_entry, + reply_type, TRUE, + &reply_port, &reply_soright, + &release_port, + &assertcnt, + links); + assert(assertcnt == 0); +#else kr = ipc_right_copyin(space, name, reply_entry, reply_type, TRUE, &reply_port, &reply_soright, &release_port, links); +#endif /* IMPORTANCE_INHERITANCE */ assert(kr == KERN_SUCCESS); assert(reply_port == dest_port); @@ -1717,11 +1840,22 @@ ipc_kmsg_copyin_header( * and dup the send right we get out. */ +#if IMPORTANCE_INHERITANCE kr = ipc_right_copyin(space, name, dest_entry, dest_type, FALSE, &dest_port, &dest_soright, &release_port, + &assertcnt, links); + assert(assertcnt == 0); +#else + kr = ipc_right_copyin(space, name, dest_entry, + dest_type, FALSE, + &dest_port, &dest_soright, + &release_port, + links); +#endif /* IMPORTANCE_INHERITANCE */ + if (kr != KERN_SUCCESS) goto invalid_dest; @@ -1772,11 +1906,22 @@ ipc_kmsg_copyin_header( * and dup the send right we get out. */ +#if IMPORTANCE_INHERITANCE kr = ipc_right_copyin(space, name, dest_entry, MACH_MSG_TYPE_MOVE_SEND, FALSE, &dest_port, &soright, &release_port, + &assertcnt, links); + assert(assertcnt == 0); +#else + kr = ipc_right_copyin(space, name, dest_entry, + MACH_MSG_TYPE_MOVE_SEND, FALSE, + &dest_port, &soright, + &release_port, + links); +#endif /* IMPORTANCE_INHERITANCE */ + if (kr != KERN_SUCCESS) goto invalid_dest; @@ -1814,11 +1959,22 @@ ipc_kmsg_copyin_header( if (dest_entry == IE_NULL) goto invalid_dest; +#if IMPORTANCE_INHERITANCE kr = ipc_right_copyin(space, dest_name, dest_entry, dest_type, FALSE, &dest_port, &dest_soright, &release_port, + &assertcnt, links); + assert(assertcnt == 0); +#else + kr = ipc_right_copyin(space, dest_name, dest_entry, + dest_type, FALSE, + &dest_port, &dest_soright, + &release_port, + links); +#endif /* IMPORTANCE_INHERITANCE */ + if (kr != KERN_SUCCESS) goto invalid_dest; @@ -1881,21 +2037,43 @@ ipc_kmsg_copyin_header( reply_type)) goto invalid_reply; +#if IMPORTANCE_INHERITANCE + kr = ipc_right_copyin(space, dest_name, dest_entry, + dest_type, FALSE, + &dest_port, &dest_soright, + &release_port, + &assertcnt, + links); + assert(assertcnt == 0); +#else kr = ipc_right_copyin(space, dest_name, dest_entry, dest_type, FALSE, &dest_port, &dest_soright, &release_port, links); +#endif /* IMPORTANCE_INHERITANCE */ + if (kr != KERN_SUCCESS) goto invalid_dest; assert(IO_VALID(dest_port)); +#if IMPORTANCE_INHERITANCE kr = ipc_right_copyin(space, reply_name, reply_entry, reply_type, TRUE, &reply_port, &reply_soright, &release_port, + &assertcnt, links); + assert(assertcnt == 0); +#else + kr = ipc_right_copyin(space, reply_name, reply_entry, + reply_type, TRUE, + &reply_port, &reply_soright, + &release_port, + links); +#endif /* IMPORTANCE_INHERITANCE */ + assert(kr == KERN_SUCCESS); /* the entries might need to be deallocated */ @@ -1917,23 +2095,55 @@ ipc_kmsg_copyin_header( /* * JMM - Without rdar://problem/6275821, this is the last place we can * re-arm the send-possible notifications. It may trigger unexpectedly - * early (send may NOT have failed), but better than missing. + * early (send may NOT have failed), but better than missing. We assure + * we won't miss by forcing MACH_SEND_ALWAYS if we got past arming. */ - if (notify && dest_type != MACH_MSG_TYPE_PORT_SEND_ONCE && + if (((*optionp & MACH_SEND_NOTIFY) != 0) && + dest_type != MACH_MSG_TYPE_PORT_SEND_ONCE && dest_entry != IE_NULL && dest_entry->ie_request != IE_REQ_NONE) { ipc_port_t dport = (ipc_port_t)dest_port; assert(dport != IP_NULL); ip_lock(dport); - if (ip_active(dport) && - dport->ip_receiver != ipc_space_kernel && ip_full(dport)) { - ipc_port_request_sparm(dport, dest_name, dest_entry->ie_request); + if (ip_active(dport) && dport->ip_receiver != ipc_space_kernel) { + if (ip_full(dport)) { +#if IMPORTANCE_INHERITANCE + needboost = ipc_port_request_sparm(dport, dest_name, + dest_entry->ie_request, + (*optionp & MACH_SEND_NOIMPORTANCE)); + if (needboost == FALSE) + ip_unlock(dport); +#else + + ipc_port_request_sparm(dport, dest_name, dest_entry->ie_request); + ip_unlock(dport); +#endif /* IMPORTANCE_INHERITANCE */ + } else { + *optionp |= MACH_SEND_ALWAYS; + ip_unlock(dport); + } + } else { + ip_unlock(dport); } - ip_unlock(dport); } is_write_unlock(space); +#if IMPORTANCE_INHERITANCE + /* + * If our request is the first boosting send-possible + * notification this cycle, push the boost down the + * destination port. + */ + if (needboost == TRUE) { + ipc_port_t dport = (ipc_port_t)dest_port; + + /* dport still locked from above */ + if (ipc_port_importance_delta(dport, 1) == FALSE) + ip_unlock(dport); + } +#endif /* IMPORTANCE_INHERITANCE */ + if (dest_soright != IP_NULL) ipc_notify_port_deleted(dest_soright, dest_name); @@ -1953,6 +2163,7 @@ ipc_kmsg_copyin_header( if (release_port != IP_NULL) ip_release(release_port); + return MACH_MSG_SUCCESS; invalid_reply: @@ -2409,6 +2620,13 @@ ipc_kmsg_copyin_body( * Out-of-line memory descriptor, accumulate kernel * memory requirements */ + if (space_needed + round_page(size) <= space_needed) { + /* Overflow dectected */ + ipc_kmsg_clean_partial(kmsg, 0, NULL, 0, 0); + mr = MACH_MSG_VM_KERNEL; + goto out; + } + space_needed += round_page(size); if (space_needed > ipc_kmsg_max_vm_space) { @@ -2526,11 +2744,14 @@ ipc_kmsg_copyin( ipc_kmsg_t kmsg, ipc_space_t space, vm_map_t map, - boolean_t notify) + mach_msg_option_t *optionp) { mach_msg_return_t mr; - - mr = ipc_kmsg_copyin_header(kmsg->ikm_header, space, notify); + + kmsg->ikm_header->msgh_bits &= MACH_MSGH_BITS_USER; + + mr = ipc_kmsg_copyin_header(kmsg->ikm_header, space, optionp); + if (mr != MACH_MSG_SUCCESS) return mr; @@ -2556,6 +2777,7 @@ ipc_kmsg_copyin( kprintf("%.4x\n",((uint32_t *)(kmsg->ikm_header + 1))[i]); } } + return mr; } @@ -2568,9 +2790,6 @@ ipc_kmsg_copyin( * Because the message comes from the kernel, * the implementation assumes there are no errors * or peculiarities in the message. - * - * Returns TRUE if queueing the message - * would result in a circularity. * Conditions: * Nothing locked. */ @@ -3001,11 +3220,13 @@ ipc_kmsg_copyout_header( kr = ipc_right_copyout(space, reply_name, entry, reply_type, TRUE, (ipc_object_t) reply); + /* reply port is unlocked */ assert(kr == KERN_SUCCESS); ip_lock(dest); is_write_unlock(space); + } else { /* * No reply port! This is an easy case. @@ -3078,6 +3299,7 @@ ipc_kmsg_copyout_header( ipc_object_copyout_dest(space, (ipc_object_t) dest, dest_type, &dest_name); /* dest is unlocked */ + } else { ipc_port_timestamp_t timestamp; @@ -3352,7 +3574,7 @@ ipc_kmsg_copyout_ool_ports_descriptor(mach_msg_ool_ports_descriptor_t *dsc, ipc_kmsg_t kmsg, mach_msg_return_t *mr) { - mach_vm_offset_t rcv_addr; + mach_vm_offset_t rcv_addr = 0; mach_msg_type_name_t disp; mach_msg_type_number_t count, i; vm_size_t ports_length, names_length; @@ -3708,7 +3930,7 @@ ipc_kmsg_copyout_pseudo( mr = (ipc_kmsg_copyout_object(space, dest, dest_type, &dest_name) | ipc_kmsg_copyout_object(space, reply, reply_type, &reply_name)); - kmsg->ikm_header->msgh_bits = mbits &~ MACH_MSGH_BITS_CIRCULAR; + kmsg->ikm_header->msgh_bits = mbits & MACH_MSGH_BITS_USER; kmsg->ikm_header->msgh_remote_port = CAST_MACH_NAME_TO_PORT(dest_name); kmsg->ikm_header->msgh_local_port = CAST_MACH_NAME_TO_PORT(reply_name); @@ -4180,5 +4402,6 @@ ipc_kmsg_add_trailer(ipc_kmsg_t kmsg, ipc_space_t space, done: + return trailer->msgh_trailer_size; } diff --git a/osfmk/ipc/ipc_kmsg.h b/osfmk/ipc/ipc_kmsg.h index cb3579737..5e742e007 100644 --- a/osfmk/ipc/ipc_kmsg.h +++ b/osfmk/ipc/ipc_kmsg.h @@ -248,14 +248,6 @@ MACRO_BEGIN \ } \ MACRO_END -/* - * extern void - * ipc_kmsg_send_always(ipc_kmsg_t); - */ -#define ipc_kmsg_send_always(kmsg) \ - ipc_kmsg_send((kmsg), MACH_SEND_ALWAYS, MACH_MSG_TIMEOUT_NONE) - - /* Allocate a kernel message */ extern ipc_kmsg_t ipc_kmsg_alloc( mach_msg_size_t size); @@ -323,14 +315,14 @@ extern void ipc_kmsg_put_to_kernel( extern mach_msg_return_t ipc_kmsg_copyin_header( mach_msg_header_t *msg, ipc_space_t space, - boolean_t notify); + mach_msg_option_t *optionp); /* Copyin port rights and out-of-line memory from a user message */ extern mach_msg_return_t ipc_kmsg_copyin( ipc_kmsg_t kmsg, ipc_space_t space, vm_map_t map, - boolean_t notify); + mach_msg_option_t *optionp); /* Copyin port rights and out-of-line memory from a kernel message */ extern mach_msg_return_t ipc_kmsg_copyin_from_kernel( diff --git a/osfmk/ipc/ipc_mqueue.c b/osfmk/ipc/ipc_mqueue.c index 569c6fb0b..4bcb66adc 100644 --- a/osfmk/ipc/ipc_mqueue.c +++ b/osfmk/ipc/ipc_mqueue.c @@ -352,7 +352,7 @@ ipc_mqueue_changed( * the message and must do something with it. If successful, * the message is queued, given to a receiver, or destroyed. * Conditions: - * Nothing locked. + * mqueue is locked. * Returns: * MACH_MSG_SUCCESS The message was accepted. * MACH_SEND_TIMED_OUT Caller still has message. @@ -409,7 +409,9 @@ ipc_mqueue_send( wresult = wait_queue_assert_wait64_locked( &mqueue->imq_wait_queue, IPC_MQUEUE_FULL, - THREAD_ABORTSAFE, deadline, + THREAD_ABORTSAFE, + TIMEOUT_URGENCY_USER_NORMAL, + deadline, 0, cur_thread); thread_unlock(cur_thread); imq_unlock(mqueue); @@ -445,6 +447,7 @@ ipc_mqueue_send( return MACH_MSG_SUCCESS; } + /* * Routine: ipc_mqueue_release_msgcount * Purpose: @@ -572,6 +575,7 @@ ipc_mqueue_post( * and handle its error without getting the message. We * need to go back and pick another one. */ + receiver->ith_receiver_name = mqueue->imq_receiver_name; receiver->ith_kmsg = IKM_NULL; receiver->ith_seqno = 0; thread_unlock(receiver); @@ -869,7 +873,9 @@ ipc_mqueue_receive_on_thread( wresult = wait_queue_assert_wait64_locked(&mqueue->imq_wait_queue, IPC_MQUEUE_RECEIVE, - interruptible, deadline, + interruptible, + TIMEOUT_URGENCY_USER_NORMAL, + deadline, 0, thread); /* preposts should be detected above, not here */ if (wresult == THREAD_AWAKENED) @@ -946,50 +952,167 @@ ipc_mqueue_select_on_thread( /* * Routine: ipc_mqueue_peek * Purpose: - * Peek at a message queue to see if it has any messages - * (in it or contained message queues for a set). + * Peek at a (non-set) message queue to see if it has a message + * matching the sequence number provided (if zero, then the + * first message in the queue) and return vital info about the + * message. + * + * Conditions: + * Locks may be held by callers, so this routine cannot block. + * Caller holds reference on the message queue. + */ +unsigned +ipc_mqueue_peek(ipc_mqueue_t mq, + mach_port_seqno_t *seqnop, + mach_msg_size_t *msg_sizep, + mach_msg_id_t *msg_idp, + mach_msg_max_trailer_t *msg_trailerp) +{ + ipc_kmsg_queue_t kmsgq; + ipc_kmsg_t kmsg; + mach_port_seqno_t seqno, msgoff; + int res = 0; + spl_t s; + + assert(!imq_is_set(mq)); + + s = splsched(); + imq_lock(mq); + + seqno = (seqnop != NULL) ? seqno = *seqnop : 0; + + if (seqno == 0) { + seqno = mq->imq_seqno; + msgoff = 0; + } else if (seqno >= mq->imq_seqno && + seqno < mq->imq_seqno + mq->imq_msgcount) { + msgoff = seqno - mq->imq_seqno; + } else + goto out; + + /* look for the message that would match that seqno */ + kmsgq = &mq->imq_messages; + kmsg = ipc_kmsg_queue_first(kmsgq); + while (msgoff-- && kmsg != IKM_NULL) { + kmsg = ipc_kmsg_queue_next(kmsgq, kmsg); + } + if (kmsg == IKM_NULL) + goto out; + + /* found one - return the requested info */ + if (seqnop != NULL) + *seqnop = seqno; + if (msg_sizep != NULL) + *msg_sizep = kmsg->ikm_header->msgh_size; + if (msg_idp != NULL) + *msg_idp = kmsg->ikm_header->msgh_id; + if (msg_trailerp != NULL) + memcpy(msg_trailerp, + (mach_msg_max_trailer_t *)((vm_offset_t)kmsg->ikm_header + + round_msg(kmsg->ikm_header->msgh_size)), + sizeof(mach_msg_max_trailer_t)); + res = 1; + + out: + imq_unlock(mq); + splx(s); + return res; +} + +/* + * Routine: ipc_mqueue_set_peek + * Purpose: + * Peek at a message queue set to see if it has any ports + * with messages. * * Conditions: * Locks may be held by callers, so this routine cannot block. * Caller holds reference on the message queue. */ unsigned -ipc_mqueue_peek(ipc_mqueue_t mq) +ipc_mqueue_set_peek(ipc_mqueue_t mq) { wait_queue_link_t wql; queue_t q; spl_t s; + int res; - if (!imq_is_set(mq)) - return (ipc_kmsg_queue_first(&mq->imq_messages) != IKM_NULL); + assert(imq_is_set(mq)); - /* - * Don't block trying to get the lock. - */ s = splsched(); imq_lock(mq); /* * peek at the contained port message queues, return as soon as * we spot a message on one of the message queues linked on the - * prepost list. + * prepost list. No need to lock each message queue, as only the + * head of each queue is checked. If a message wasn't there before + * we entered here, no need to find it (if we do, great). */ + res = 0; q = &mq->imq_preposts; queue_iterate(q, wql, wait_queue_link_t, wql_preposts) { ipc_mqueue_t port_mq = (ipc_mqueue_t)wql->wql_queue; ipc_kmsg_queue_t kmsgs = &port_mq->imq_messages; if (ipc_kmsg_queue_first(kmsgs) != IKM_NULL) { - imq_unlock(mq); - splx(s); - return 1; + res = 1; + break; } } imq_unlock(mq); splx(s); - return 0; + return res; +} + +/* + * Routine: ipc_mqueue_set_gather_member_names + * Purpose: + * Iterate a message queue set to identify the member port + * names. Actual returned names is limited to maxnames entries, + * but we keep counting the actual number of members to let + * the caller decide to retry if necessary. + * + * Conditions: + * Locks may be held by callers, so this routine cannot block. + * Caller holds reference on the message queue. + */ +void +ipc_mqueue_set_gather_member_names( + ipc_mqueue_t mq, + ipc_entry_num_t maxnames, + mach_port_name_t *names, + ipc_entry_num_t *actualp) +{ + wait_queue_link_t wql; + queue_t q; + spl_t s; + ipc_entry_num_t actual = 0; + + assert(imq_is_set(mq)); + + s = splsched(); + imq_lock(mq); + + /* + * Iterate over the member ports through the mqueue set links + * capturing as many names as we can. + */ + q = &mq->imq_setlinks; + queue_iterate(q, wql, wait_queue_link_t, wql_setlinks) { + ipc_mqueue_t port_mq = (ipc_mqueue_t)wql->wql_queue; + + if (actual < maxnames) + names[actual] = port_mq->imq_receiver_name; + actual++; + } + imq_unlock(mq); + splx(s); + + *actualp = actual; } + /* * Routine: ipc_mqueue_destroy * Purpose: @@ -1009,7 +1132,6 @@ ipc_mqueue_destroy( boolean_t reap = FALSE; spl_t s; - s = splsched(); imq_lock(mqueue); /* diff --git a/osfmk/ipc/ipc_mqueue.h b/osfmk/ipc/ipc_mqueue.h index f452f7079..26aa7fe0d 100644 --- a/osfmk/ipc/ipc_mqueue.h +++ b/osfmk/ipc/ipc_mqueue.h @@ -92,6 +92,7 @@ typedef struct ipc_mqueue { mach_port_seqno_t seqno; mach_port_name_t receiver_name; boolean_t fullwaiters; + natural_t pset_count; } port; struct { struct wait_queue_set set_queue; @@ -109,6 +110,7 @@ typedef struct ipc_mqueue { #define imq_seqno data.port.seqno #define imq_receiver_name data.port.receiver_name #define imq_fullwaiters data.port.fullwaiters +#define imq_pset_count data.port.pset_count #define imq_set_queue data.pset.set_queue #define imq_setlinks data.pset.set_queue.wqs_setlinks @@ -182,6 +184,13 @@ extern mach_msg_return_t ipc_mqueue_send( mach_msg_timeout_t timeout_val, spl_t s); +/* check for queue send queue full of a port */ +extern mach_msg_return_t ipc_mqueue_preflight_send( + ipc_mqueue_t mqueue, + ipc_kmsg_t kmsg, + mach_msg_option_t option, + mach_msg_timeout_t timeout_val); + /* Deliver message to message queue or waiting receiver */ extern void ipc_mqueue_post( ipc_mqueue_t mqueue, @@ -218,8 +227,23 @@ extern void ipc_mqueue_select_on_thread( /* Peek into a messaqe queue to see if there are messages */ extern unsigned ipc_mqueue_peek( + ipc_mqueue_t mqueue, + mach_port_seqno_t *msg_seqnop, + mach_msg_size_t *msg_sizep, + mach_msg_id_t *msg_idp, + mach_msg_max_trailer_t *msg_trailerp); + +/* Peek into a messaqe queue set to see if there are queues with messages */ +extern unsigned ipc_mqueue_set_peek( ipc_mqueue_t mqueue); +/* Gather the names of member port for a given set */ +extern void ipc_mqueue_set_gather_member_names( + ipc_mqueue_t mqueue, + ipc_entry_num_t maxnames, + mach_port_name_t *names, + ipc_entry_num_t *actualp); + /* Clear a message count reservation */ extern void ipc_mqueue_release_msgcount( ipc_mqueue_t mqueue); diff --git a/osfmk/ipc/ipc_object.c b/osfmk/ipc/ipc_object.c index ae246fb85..054b7aec6 100644 --- a/osfmk/ipc/ipc_object.c +++ b/osfmk/ipc/ipc_object.c @@ -508,6 +508,10 @@ ipc_object_copyin( queue_t links = &links_data; wait_queue_link_t wql; +#if IMPORTANCE_INHERITANCE + int assertcnt = 0; +#endif + queue_init(links); /* @@ -526,6 +530,9 @@ ipc_object_copyin( msgt_name, TRUE, objectp, &soright, &release_port, +#if IMPORTANCE_INHERITANCE + &assertcnt, +#endif /* IMPORTANCE_INHERITANCE */ links); if (IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE) ipc_entry_dealloc(space, name, entry); @@ -536,6 +543,12 @@ ipc_object_copyin( wait_queue_link_free(wql); } +#if IMPORTANCE_INHERITANCE + if (assertcnt > 0 && current_task()->imp_receiver != 0) { + task_importance_drop_internal_assertion(current_task(), assertcnt); + } +#endif /* IMPORTANCE_INHERITANCE */ + if (release_port != IP_NULL) ip_release(release_port); @@ -616,13 +629,14 @@ ipc_object_copyin_from_kernel( ipc_port_t port = (ipc_port_t) object; ip_lock(port); - assert(ip_active(port)); - assert(port->ip_receiver_name != MACH_PORT_NULL); - assert(port->ip_receiver == ipc_space_kernel); + if (ip_active(port)) { + assert(port->ip_receiver_name != MACH_PORT_NULL); + assert(port->ip_receiver == ipc_space_kernel); + port->ip_mscount++; + } - ip_reference(port); - port->ip_mscount++; port->ip_srights++; + ip_reference(port); ip_unlock(port); break; } @@ -637,11 +651,11 @@ ipc_object_copyin_from_kernel( ipc_port_t port = (ipc_port_t) object; ip_lock(port); - assert(ip_active(port)); - assert(port->ip_receiver_name != MACH_PORT_NULL); - - ip_reference(port); + if (ip_active(port)) { + assert(port->ip_receiver_name != MACH_PORT_NULL); + } port->ip_sorights++; + ip_reference(port); ip_unlock(port); break; } @@ -809,6 +823,7 @@ ipc_object_copyout( kr = ipc_right_copyout(space, name, entry, msgt_name, overflow, object); + /* object is unlocked */ is_write_unlock(space); @@ -849,6 +864,11 @@ ipc_object_copyout_name( ipc_entry_t entry; kern_return_t kr; +#if IMPORTANCE_INHERITANCE + int assertcnt = 0; + task_t task = TASK_NULL; +#endif /* IMPORTANCE_INHERITANCE */ + assert(IO_VALID(object)); assert(io_otype(object) == IOT_PORT); @@ -893,10 +913,47 @@ ipc_object_copyout_name( /* space is write-locked and active, object is locked and active */ +#if IMPORTANCE_INHERITANCE + /* + * We are slamming a receive right into the space, without + * first having been enqueued on a port destined there. So, + * we have to arrange to boost the task appropriately if this + * port has assertions (and the task wants them). + */ + if (msgt_name == MACH_MSG_TYPE_PORT_RECEIVE) { + ipc_port_t port = (ipc_port_t)object; + + if ((space->is_task != TASK_NULL) && + (space->is_task->imp_receiver != 0)) { + assertcnt = port->ip_impcount; + task = space->is_task; + task_reference(task); + } + + /* take port out of limbo */ + assert(port->ip_tempowner != 0); + port->ip_tempowner = 0; + } + +#endif /* IMPORTANCE_INHERITANCE */ + kr = ipc_right_copyout(space, name, entry, msgt_name, overflow, object); + /* object is unlocked */ is_write_unlock(space); + +#if IMPORTANCE_INHERITANCE + /* + * Add the assertions to the task that we captured before + */ + if (task != TASK_NULL) { + if (assertcnt > 0) + task_importance_hold_internal_assertion(task, assertcnt); + task_deallocate(task); + } +#endif /* IMPORTANCE_INHERITANCE */ + return kr; } diff --git a/osfmk/ipc/ipc_object.h b/osfmk/ipc/ipc_object.h index 05822d0fa..e040b4be3 100644 --- a/osfmk/ipc/ipc_object.h +++ b/osfmk/ipc/ipc_object.h @@ -136,7 +136,7 @@ struct ipc_object_header { #define IO_BITS_OTYPE 0x7fff0000 /* determines a zone */ #define IO_BITS_ACTIVE 0x80000000 /* is object alive? */ -#define io_active(io) ((io)->io_bits & IO_BITS_ACTIVE) +#define io_active(io) (((io)->io_bits & IO_BITS_ACTIVE) != 0) #define io_otype(io) (((io)->io_bits & IO_BITS_OTYPE) >> 16) #define io_kotype(io) ((io)->io_bits & IO_BITS_KOTYPE) diff --git a/osfmk/ipc/ipc_port.c b/osfmk/ipc/ipc_port.c index 9ba62e5a9..f8f96739a 100644 --- a/osfmk/ipc/ipc_port.c +++ b/osfmk/ipc/ipc_port.c @@ -69,7 +69,6 @@ * Functions to manipulate IPC ports. */ -#include #include #include @@ -102,11 +101,11 @@ int ipc_portbt; #if MACH_ASSERT void ipc_port_init_debug( ipc_port_t port, - natural_t *callstack, + uintptr_t *callstack, unsigned int callstack_max); void ipc_port_callstack_init_debug( - natural_t *callstack, + uintptr_t *callstack, unsigned int callstack_max); #endif /* MACH_ASSERT */ @@ -148,6 +147,17 @@ ipc_port_timestamp(void) * KERN_NO_SPACE No index allocated. */ +#if IMPORTANCE_INHERITANCE +kern_return_t +ipc_port_request_alloc( + ipc_port_t port, + mach_port_name_t name, + ipc_port_t soright, + boolean_t send_possible, + boolean_t immediate, + ipc_port_request_index_t *indexp, + boolean_t *importantp) +#else kern_return_t ipc_port_request_alloc( ipc_port_t port, @@ -156,11 +166,16 @@ ipc_port_request_alloc( boolean_t send_possible, boolean_t immediate, ipc_port_request_index_t *indexp) +#endif /* IMPORTANCE_INHERITANCE */ { ipc_port_request_t ipr, table; ipc_port_request_index_t index; uintptr_t mask = 0; +#if IMPORTANCE_INHERITANCE + *importantp = FALSE; +#endif /* IMPORTANCE_INHERITANCE */ + assert(ip_active(port)); assert(name != MACH_PORT_NULL); assert(soright != IP_NULL); @@ -184,7 +199,17 @@ ipc_port_request_alloc( mask |= IPR_SOR_SPREQ_MASK; if (immediate) { mask |= IPR_SOR_SPARM_MASK; - port->ip_sprequests = TRUE; + if (port->ip_sprequests == 0) { + port->ip_sprequests = 1; +#if IMPORTANCE_INHERITANCE + if (port->ip_impdonation != 0 && + port->ip_spimportant == 0 && + (task_is_importance_donor(current_task()))) { + port->ip_spimportant = 1; + *importantp = TRUE; + } +#endif /* IMPORTANCE_INHERTANCE */ + } } } ipr->ipr_soright = IPR_SOR_MAKE(soright, mask); @@ -318,13 +343,25 @@ ipc_port_request_grow( * Arm delayed send-possible request. * Conditions: * The port must be locked and active. + * + * Returns TRUE if the request was armed + * (or armed with importance in that version). */ -void +#if IMPORTANCE_INHERITANCE +boolean_t +ipc_port_request_sparm( + ipc_port_t port, + __assert_only mach_port_name_t name, + ipc_port_request_index_t index, + mach_msg_option_t option) +#else +boolean_t ipc_port_request_sparm( ipc_port_t port, __assert_only mach_port_name_t name, ipc_port_request_index_t index) +#endif /* IMPORTANCE_INHERITANCE */ { if (index != IE_REQ_NONE) { ipc_port_request_t ipr, table; @@ -339,9 +376,22 @@ ipc_port_request_sparm( if (IPR_SOR_SPREQ(ipr->ipr_soright)) { ipr->ipr_soright = IPR_SOR_MAKE(ipr->ipr_soright, IPR_SOR_SPARM_MASK); - port->ip_sprequests = TRUE; - } + port->ip_sprequests = 1; +#if IMPORTANCE_INHERITANCE + if (((option & MACH_SEND_NOIMPORTANCE) == 0) && + (port->ip_impdonation != 0) && + (port->ip_spimportant == 0) && + (((option & MACH_SEND_IMPORTANCE) != 0) || + (task_is_importance_donor(current_task())))) { + port->ip_spimportant = 1; + return TRUE; + } +#else + return TRUE; +#endif /* IMPORTANCE_INHERITANCE */ + } } + return FALSE; } /* @@ -521,6 +571,7 @@ ipc_port_clear_receiver( ipc_mqueue_changed(&port->ip_messages); ipc_port_set_mscount(port, 0); port->ip_messages.imq_seqno = 0; + port->ip_context = port->ip_guarded = port->ip_strict_guard = 0; imq_unlock(&port->ip_messages); splx(s); } @@ -555,6 +606,18 @@ ipc_port_init( port->ip_premsg = IKM_NULL; port->ip_context = 0; + port->ip_sprequests = 0; + port->ip_spimportant = 0; + port->ip_impdonation = 0; + port->ip_tempowner = 0; + port->ip_taskptr = 0; + + port->ip_guarded = 0; + port->ip_strict_guard = 0; + port->ip_impcount = 0; + + port->ip_reserved = 0; + ipc_mqueue_init(&port->ip_messages, FALSE /* set */); } @@ -583,7 +646,7 @@ ipc_port_alloc( kern_return_t kr; #if MACH_ASSERT - natural_t buf[IP_CALLSTACK_MAX]; + uintptr_t buf[IP_CALLSTACK_MAX]; ipc_port_callstack_init_debug(&buf[0], IP_CALLSTACK_MAX); #endif /* MACH_ASSERT */ @@ -641,7 +704,7 @@ ipc_port_alloc_name( kern_return_t kr; #if MACH_ASSERT - natural_t buf[IP_CALLSTACK_MAX]; + uintptr_t buf[IP_CALLSTACK_MAX]; ipc_port_callstack_init_debug(&buf[0], IP_CALLSTACK_MAX); #endif /* MACH_ASSERT */ @@ -685,22 +748,34 @@ ipc_port_spnotify( { ipc_port_request_index_t index = 0; ipc_table_elems_t size = 0; +#if IMPORTANCE_INHERITANCE + boolean_t dropassert = FALSE; +#endif /* IMPORTANCE_INHERITANCE */ /* * If the port has no send-possible request * armed, don't bother to lock the port. */ - if (!port->ip_sprequests) + if (port->ip_sprequests == 0) return; ip_lock(port); - if (!port->ip_sprequests) { + +#if IMPORTANCE_INHERITANCE + if (port->ip_spimportant != 0) { + port->ip_spimportant = 0; + port->ip_impcount--; + dropassert = TRUE; + } +#endif /* IMPORTANCE_INHERITANCE */ + + if (port->ip_sprequests == 0) { ip_unlock(port); - return; + goto out; } - port->ip_sprequests = FALSE; + port->ip_sprequests = 0; - revalidate: +revalidate: if (ip_active(port)) { ipc_port_request_t requests; @@ -735,6 +810,14 @@ ipc_port_spnotify( } } ip_unlock(port); +out: +#if IMPORTANCE_INHERITANCE + if ((dropassert == TRUE) && (current_task()->imp_receiver != 0)) { + /* drop internal assertion and no task lock held */ + task_importance_drop_internal_assertion(current_task(), 1); + } +#endif /* IMPORTANCE_INHERITANCE */ + return; } /* @@ -793,15 +876,45 @@ ipc_port_destroy( ipc_mqueue_t mqueue; ipc_kmsg_t kmsg; +#if IMPORTANCE_INHERITANCE + task_t release_imp_task = TASK_NULL; + thread_t self = current_thread(); + boolean_t top = (self->ith_assertions == 0); + natural_t assertcnt = 0; +#endif /* IMPORTANCE_INHERITANCE */ + assert(ip_active(port)); /* port->ip_receiver_name is garbage */ /* port->ip_receiver/port->ip_destination is garbage */ assert(port->ip_pset_count == 0); assert(port->ip_mscount == 0); - /* first check for a backup port */ - + /* check for a backup port */ pdrequest = port->ip_pdrequest; + +#if IMPORTANCE_INHERITANCE + /* determine how may assertions to drop and from whom */ + if (port->ip_tempowner != 0) { + assert(top); + if (port->ip_taskptr != 0) { + release_imp_task = port->ip_imp_task; + port->ip_imp_task = TASK_NULL; + port->ip_taskptr = 0; + assertcnt = port->ip_impcount; + } + /* Otherwise, nothing to drop */ + } else { + assert(port->ip_taskptr == 0); + assertcnt = port->ip_impcount; + if (pdrequest != IP_NULL) + /* mark in limbo for the journey */ + port->ip_tempowner = 1; + } + + if (top) + self->ith_assertions = assertcnt; +#endif /* IMPORTANCE_INHERITANCE */ + if (pdrequest != IP_NULL) { /* we assume the ref for pdrequest */ port->ip_pdrequest = IP_NULL; @@ -813,7 +926,8 @@ ipc_port_destroy( /* consumes our refs for port and pdrequest */ ipc_notify_port_destroyed(pdrequest, port); - return; + + goto drop_assertions; } /* once port is dead, we don't need to keep it locked */ @@ -859,6 +973,31 @@ ipc_port_destroy( ipc_kobject_destroy(port); ip_release(port); /* consume caller's ref */ + + drop_assertions: +#if IMPORTANCE_INHERITANCE + if (release_imp_task != TASK_NULL) { + if (assertcnt > 0) { + assert(top); + self->ith_assertions = 0; + assert(release_imp_task->imp_receiver != 0); + task_importance_drop_internal_assertion(release_imp_task, assertcnt); + } + task_deallocate(release_imp_task); + + } else if (assertcnt > 0) { + if (top) { + self->ith_assertions = 0; + release_imp_task = current_task(); + if (release_imp_task->imp_receiver != 0) { + task_importance_drop_internal_assertion(release_imp_task, assertcnt); + } + } else { + /* the port chain we are enqueued on should cover our assertions */ + assert(assertcnt <= self->ith_assertions); + } + } +#endif /* IMPORTANCE_INHERITANCE */ } /* @@ -873,6 +1012,12 @@ ipc_port_destroy( * That is, we want to set port->ip_destination == dest, * but guaranteeing that this doesn't create a circle * port->ip_destination->ip_destination->... == port + * + * Additionally, if port was successfully changed to "in transit", + * propagate boost assertions from the "in limbo" port to all + * the ports in the chain, and, if the destination task accepts + * boosts, to the destination task. + * * Conditions: * No ports locked. References held for "port" and "dest". */ @@ -884,6 +1029,12 @@ ipc_port_check_circularity( { ipc_port_t base; +#if IMPORTANCE_INHERITANCE + task_t task = TASK_NULL; + task_t release_task = TASK_NULL; + int assertcnt = 0; +#endif /* IMPORTANCE_INHERITANCE */ + assert(port != IP_NULL); assert(dest != IP_NULL); @@ -976,32 +1127,227 @@ ipc_port_check_circularity( ip_reference(dest); port->ip_destination = dest; +#if IMPORTANCE_INHERITANCE + /* must have been in limbo or still bound to a task */ + assert(port->ip_tempowner != 0); + + if (port->ip_taskptr != 0) { + /* + * We delayed dropping assertions from a specific task. + * Cache that info now (we'll drop assertions and the + * task reference below). + */ + release_task = port->ip_imp_task; + port->ip_imp_task = TASK_NULL; + port->ip_taskptr = 0; + } + assertcnt = port->ip_impcount; + + /* take the port out of limbo w.r.t. assertions */ + port->ip_tempowner = 0; + +#endif /* IMPORTANCE_INHERITANCE */ + /* now unlock chain */ - while (port != base) { - ipc_port_t next; + ip_unlock(port); + + for (;;) { + +#if IMPORTANCE_INHERITANCE + /* every port along chain track assertions behind it */ + dest->ip_impcount += assertcnt; +#endif /* IMPORTANCE_INHERITANCE */ + + if (dest == base) + break; /* port is in transit */ - assert(ip_active(port)); - assert(port->ip_receiver_name == MACH_PORT_NULL); - assert(port->ip_destination != IP_NULL); + assert(ip_active(dest)); + assert(dest->ip_receiver_name == MACH_PORT_NULL); + assert(dest->ip_destination != IP_NULL); - next = port->ip_destination; - ip_unlock(port); - port = next; +#if IMPORTANCE_INHERITANCE + assert(dest->ip_tempowner == 0); +#endif /* IMPORTANCE_INHERITANCE */ + + port = dest->ip_destination; + ip_unlock(dest); + dest = port; } /* base is not in transit */ - assert(!ip_active(base) || (base->ip_receiver_name != MACH_PORT_NULL) || (base->ip_destination == IP_NULL)); + +#if IMPORTANCE_INHERITANCE + /* + * Find the task to boost (if any). + * We will boost "through" ports that don't know + * about inheritance to deliver receive rights that + * do. + */ + if (ip_active(base) && (assertcnt > 0)) { + if (base->ip_tempowner != 0) { + if (base->ip_taskptr != 0) + /* specified tempowner task */ + task = base->ip_imp_task; + /* otherwise don't boost current task */ + + } else if (base->ip_receiver_name != MACH_PORT_NULL) { + ipc_space_t space = base->ip_receiver; + + /* only spaces with boost-accepting tasks */ + if (space->is_task != TASK_NULL && + space->is_task->imp_receiver != 0) + task = space->is_task; + } + + /* take reference before unlocking base */ + if (task != TASK_NULL) { + assert(task->imp_receiver != 0); + task_reference(task); + } + } +#endif /* IMPORTANCE_INHERITANCE */ + ip_unlock(base); +#if IMPORTANCE_INHERITANCE + /* + * Transfer assertions now that the ports are unlocked. + * Avoid extra overhead if transferring to/from the same task. + */ + boolean_t transfer_assertions = (task != release_task) ? TRUE : FALSE; + + if (task != TASK_NULL) { + if (transfer_assertions) + task_importance_hold_internal_assertion(task, assertcnt); + task_deallocate(task); + task = TASK_NULL; + } + + if (release_task != TASK_NULL) { + if (transfer_assertions) + task_importance_drop_internal_assertion(release_task, assertcnt); + task_deallocate(release_task); + release_task = TASK_NULL; + } +#endif /* IMPORTANCE_INHERITANCE */ + return FALSE; } +/* + * Routine: ipc_port_importance_delta + * Purpose: + * Adjust the importance count through the given port. + * If the port is in transit, apply the delta throughout + * the chain. Determine if the there is a task at the + * base of the chain that wants/needs to be adjusted, + * and if so, apply the delta. + * Conditions: + * The port is referenced and locked on entry. + * Nothing else is locked. + * The lock may be dropped on exit. + * Returns TRUE if lock was dropped. + */ +#if IMPORTANCE_INHERITANCE + +boolean_t +ipc_port_importance_delta( + ipc_port_t port, + mach_port_delta_t delta) +{ + ipc_port_t next, base; + task_t task = TASK_NULL; + boolean_t dropped = FALSE; + + if (delta == 0) + return FALSE; + + base = port; + + /* if port is in transit, have to search for end of chain */ + if (ip_active(port) && + port->ip_destination != IP_NULL && + port->ip_receiver_name == MACH_PORT_NULL) { + + dropped = TRUE; + + ip_unlock(port); + ipc_port_multiple_lock(); /* massive serialization */ + ip_lock(base); + + while(ip_active(base) && + base->ip_destination != IP_NULL && + base->ip_receiver_name == MACH_PORT_NULL) { + + base = base->ip_destination; + ip_lock(base); + } + ipc_port_multiple_unlock(); + } + + /* unlock down to the base, adding a boost at each level */ + for (;;) { + port->ip_impcount += delta; + + if (port == base) + break; + + /* port is in transit */ + assert(port->ip_tempowner == 0); + next = port->ip_destination; + ip_unlock(port); + port = next; + } + + /* find the task (if any) to boost according to the base */ + if (ip_active(base)) { + if (base->ip_tempowner != 0) { + if (base->ip_taskptr != 0) + task = base->ip_imp_task; + /* otherwise don't boost */ + + } else if (base->ip_receiver_name != MACH_PORT_NULL) { + ipc_space_t space = base->ip_receiver; + + /* only spaces with boost-accepting tasks */ + if (space->is_task != TASK_NULL && + space->is_task->imp_receiver != 0) + task = space->is_task; + } + } + + /* + * Only the base is locked. If we have to hold or drop task + * importance assertions, we'll have to drop that lock as well. + */ + if (task != TASK_NULL) { + /* take a reference before unlocking base */ + assert(task->imp_receiver != 0); + task_reference(task); + + ip_unlock(base); + dropped = TRUE; + + if (delta > 0) + task_importance_hold_internal_assertion(task, delta); + else + task_importance_drop_internal_assertion(task, -delta); + + task_deallocate(task); + } else if (dropped == TRUE) { + ip_unlock(base); + } + + return dropped; +} +#endif /* IMPORTANCE_INHERITANCE */ + /* * Routine: ipc_port_lookup_notify * Purpose: @@ -1061,7 +1407,6 @@ ipc_port_make_send_locked( port->ip_mscount++; port->ip_srights++; ip_reference(port); - ip_unlock(port); return port; } @@ -1329,7 +1674,7 @@ ipc_port_alloc_special( return IP_NULL; #if MACH_ASSERT - natural_t buf[IP_CALLSTACK_MAX]; + uintptr_t buf[IP_CALLSTACK_MAX]; ipc_port_callstack_init_debug(&buf[0], IP_CALLSTACK_MAX); #endif /* MACH_ASSERT */ @@ -1476,7 +1821,7 @@ extern int proc_pid(struct proc*); void ipc_port_init_debug( ipc_port_t port, - natural_t *callstack, + uintptr_t *callstack, unsigned int callstack_max) { unsigned int i; @@ -1518,7 +1863,7 @@ ipc_port_init_debug( */ void ipc_port_callstack_init_debug( - natural_t *callstack, + uintptr_t *callstack, unsigned int callstack_max) { unsigned int i; diff --git a/osfmk/ipc/ipc_port.h b/osfmk/ipc/ipc_port.h index 100195f7b..169189f63 100644 --- a/osfmk/ipc/ipc_port.h +++ b/osfmk/ipc/ipc_port.h @@ -74,7 +74,6 @@ #if MACH_KERNEL_PRIVATE -#include #include #include #include @@ -126,30 +125,33 @@ struct ipc_port { ipc_port_timestamp_t timestamp; } data; - ipc_kobject_t ip_kobject; + union { + ipc_kobject_t kobject; + task_t imp_task; + uintptr_t alias; + } kdata; + + struct ipc_port *ip_nsrequest; + struct ipc_port *ip_pdrequest; + struct ipc_port_request *ip_requests; + struct ipc_kmsg *ip_premsg; mach_port_mscount_t ip_mscount; mach_port_rights_t ip_srights; mach_port_rights_t ip_sorights; - struct ipc_port *ip_nsrequest; - struct ipc_port *ip_pdrequest; - struct ipc_port_request *ip_requests; - boolean_t ip_sprequests; + natural_t ip_sprequests:1, /* send-possible requests outstanding */ + ip_spimportant:1, /* ... at least one is importance donating */ + ip_impdonation:1, /* port supports importance donation */ + ip_tempowner:1, /* dont give donations to current receiver */ + ip_taskptr:1, /* ... instead give them to a specified task */ + ip_guarded:1, /* port guarded (use context value as guard) */ + ip_strict_guard:1, /* Strict guarding; Prevents user manipulation of context values directly */ + ip_reserved:1, + ip_impcount:24; /* number of importance donations in nested queue */ - unsigned int ip_pset_count; - struct ipc_kmsg *ip_premsg; mach_vm_address_t ip_context; -#if NORMA_VM - /* - * These fields are needed for the use of XMM. - * Few ports need this information; it should - * be kept in XMM instead (TBD). XXX - */ - long ip_norma_xmm_object_refs; - struct ipc_port *ip_norma_xmm_object; -#endif #if MACH_ASSERT #define IP_NSPARES 4 @@ -157,10 +159,9 @@ struct ipc_port { queue_chain_t ip_port_links; /* all allocated ports */ thread_t ip_thread; /* who made me? thread context */ unsigned long ip_timetrack; /* give an idea of "when" created */ - natural_t ip_callstack[IP_CALLSTACK_MAX]; /* stack trace */ + uintptr_t ip_callstack[IP_CALLSTACK_MAX]; /* stack trace */ unsigned long ip_spares[IP_NSPARES]; /* for debugging */ #endif /* MACH_ASSERT */ - uintptr_t alias; #if CONFIG_MACF_MACH struct label ip_label; @@ -171,11 +172,16 @@ struct ipc_port { #define ip_references ip_object.io_references #define ip_bits ip_object.io_bits +#define ip_receiver_name ip_messages.imq_receiver_name +#define ip_pset_count ip_messages.imq_pset_count + #define ip_receiver data.receiver #define ip_destination data.destination #define ip_timestamp data.timestamp -#define ip_receiver_name ip_messages.imq_receiver_name +#define ip_kobject kdata.kobject +#define ip_imp_task kdata.imp_task +#define ip_alias kdata.alias #define IP_NULL IPC_PORT_NULL #define IP_DEAD IPC_PORT_DEAD @@ -314,6 +320,17 @@ extern ipc_port_timestamp_t ipc_port_timestamp(void); (ipc_object_t *) (portp)) /* Allocate a notification request slot */ +#if IMPORTANCE_INHERITANCE +extern kern_return_t +ipc_port_request_alloc( + ipc_port_t port, + mach_port_name_t name, + ipc_port_t soright, + boolean_t send_possible, + boolean_t immediate, + ipc_port_request_index_t *indexp, + boolean_t *importantp); +#else extern kern_return_t ipc_port_request_alloc( ipc_port_t port, @@ -322,6 +339,7 @@ ipc_port_request_alloc( boolean_t send_possible, boolean_t immediate, ipc_port_request_index_t *indexp); +#endif /* IMPORTANCE_INHERITANCE */ /* Grow one of a port's tables of notifcation requests */ extern kern_return_t ipc_port_request_grow( @@ -341,10 +359,18 @@ extern ipc_port_t ipc_port_request_cancel( ipc_port_request_index_t index); /* Arm any delayed send-possible notification */ -extern void ipc_port_request_sparm( +#if IMPORTANCE_INHERITANCE +extern boolean_t ipc_port_request_sparm( + ipc_port_t port, + mach_port_name_t name, + ipc_port_request_index_t index, + mach_msg_option_t option); +#else +extern boolean_t ipc_port_request_sparm( ipc_port_t port, mach_port_name_t name, ipc_port_request_index_t index); +#endif /* IMPORTANCE_INHERITANCE */ /* Macros for manipulating a port's dead name notificaiton requests */ #define ipc_port_request_rename(port, index, oname, nname) \ @@ -425,6 +451,14 @@ ipc_port_check_circularity( ipc_port_t port, ipc_port_t dest); +#if IMPORTANCE_INHERITANCE +/* Apply an importance delta to a port */ +extern boolean_t +ipc_port_importance_delta( + ipc_port_t port, + mach_port_delta_t delta); +#endif /* IMPORTANCE_INHERITANCE */ + /* Make a send-once notify port from a receive right */ extern ipc_port_t ipc_port_lookup_notify( ipc_space_t space, diff --git a/osfmk/ipc/ipc_pset.c b/osfmk/ipc/ipc_pset.c index b5d509472..3a47ddae9 100644 --- a/osfmk/ipc/ipc_pset.c +++ b/osfmk/ipc/ipc_pset.c @@ -430,7 +430,7 @@ filt_machport( * provided, just force a MACH_RCV_TOO_LARGE to detect the * name of the port and sizeof the waiting message. */ - option = kn->kn_sfflags & (MACH_RCV_MSG|MACH_RCV_LARGE|MACH_RCV_TRAILER_MASK); + option = kn->kn_sfflags & (MACH_RCV_MSG|MACH_RCV_LARGE|MACH_RCV_LARGE_IDENTITY|MACH_RCV_TRAILER_MASK); if (option & MACH_RCV_MSG) { self->ith_msg_addr = (mach_vm_address_t) kn->kn_ext[0]; size = (mach_msg_size_t)kn->kn_ext[1]; @@ -454,7 +454,7 @@ filt_machport( self->ith_receiver_name = MACH_PORT_NULL; self->ith_continuation = NULL; option |= MACH_RCV_TIMEOUT; // never wait - assert((self->ith_state = MACH_RCV_IN_PROGRESS) == MACH_RCV_IN_PROGRESS); + self->ith_state = MACH_RCV_IN_PROGRESS; wresult = ipc_mqueue_receive_on_thread( &pset->ips_messages, @@ -493,10 +493,20 @@ filt_machport( * the results in the fflags field. */ assert(option & MACH_RCV_MSG); - kn->kn_data = MACH_PORT_NULL; kn->kn_ext[1] = self->ith_msize; + kn->kn_data = MACH_PORT_NULL; kn->kn_fflags = mach_msg_receive_results(); /* kmsg and pset reference consumed */ + + /* + * if the user asked for the identity of ports containing a + * a too-large message, return it in the data field (as we + * do for messages we didn't try to receive). + */ + if ((kn->kn_fflags == MACH_RCV_TOO_LARGE) && + (option & MACH_RCV_LARGE_IDENTITY)) + kn->kn_data = self->ith_receiver_name; + return 1; } @@ -507,6 +517,8 @@ filt_machporttouch(struct knote *kn, struct kevent64_s *kev, long type) case EVENT_REGISTER: kn->kn_sfflags = kev->fflags; kn->kn_sdata = kev->data; + kn->kn_ext[0] = kev->ext[0]; + kn->kn_ext[1] = kev->ext[1]; break; case EVENT_PROCESS: *kev = kn->kn_kevent; @@ -544,5 +556,5 @@ filt_machportpeek(struct knote *kn) ipc_pset_t pset = kn->kn_ptr.p_pset; ipc_mqueue_t set_mq = &pset->ips_messages; - return (ipc_mqueue_peek(set_mq)); + return (ipc_mqueue_set_peek(set_mq)); } diff --git a/osfmk/ipc/ipc_right.c b/osfmk/ipc/ipc_right.c index 46d7f1ec7..f4e102b47 100644 --- a/osfmk/ipc/ipc_right.c +++ b/osfmk/ipc/ipc_right.c @@ -88,6 +88,13 @@ #include #include +/* Allow IPC to generate mach port guard exceptions */ +extern kern_return_t +mach_port_guard_exception( + mach_port_name_t name, + uint64_t inguard, + uint64_t portguard, + unsigned reason); /* * Routine: ipc_right_lookup_write * Purpose: @@ -272,6 +279,10 @@ ipc_right_request_alloc( ipc_entry_t entry; kern_return_t kr; +#if IMPORTANCE_INHERITANCE + boolean_t needboost = FALSE; +#endif /* IMPORTANCE_INHERITANCE */ + for (;;) { ipc_port_t port = IP_NULL; @@ -336,9 +347,15 @@ ipc_right_request_alloc( if (prev_request != IE_REQ_NONE) previous = ipc_port_request_cancel(port, name, prev_request); +#if IMPORTANCE_INHERITANCE + kr = ipc_port_request_alloc(port, name, notify, + send_possible, immediate, + &new_request, &needboost); +#else kr = ipc_port_request_alloc(port, name, notify, send_possible, immediate, &new_request); +#endif /* IMPORTANCE_INHERITANCE */ if (kr != KERN_SUCCESS) { assert(previous == IP_NULL); is_write_unlock(space); @@ -352,11 +369,20 @@ ipc_right_request_alloc( continue; } + assert(new_request != IE_REQ_NONE); - ip_unlock(port); entry->ie_request = new_request; ipc_entry_modified(space, name, entry); is_write_unlock(space); + +#if IMPORTANCE_INHERITANCE + if (needboost == TRUE) { + if (ipc_port_importance_delta(port, 1) == FALSE) + ip_unlock(port); + } else +#endif /* IMPORTANCE_INHERITANCE */ + ip_unlock(port); + break; } /* entry may have changed to dead-name by ipc_right_check() */ @@ -487,7 +513,6 @@ ipc_right_check( ip_lock(port); if (ip_active(port)) return FALSE; - ip_unlock(port); /* this was either a pure send right or a send-once right */ @@ -495,25 +520,34 @@ ipc_right_check( assert((bits & MACH_PORT_TYPE_RECEIVE) == 0); assert(IE_BITS_UREFS(bits) > 0); - if (bits & MACH_PORT_TYPE_SEND) { + if (bits & MACH_PORT_TYPE_SEND) { assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_SEND); + assert(IE_BITS_UREFS(bits) > 0); + assert(port->ip_srights > 0); + port->ip_srights--; } else { assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_SEND_ONCE); assert(IE_BITS_UREFS(bits) == 1); + assert(port->ip_sorights > 0); + port->ip_sorights--; } + ip_unlock(port); + /* + * delete SEND rights from ipc hash. + */ - /* convert entry to dead name */ - - if ((bits & MACH_PORT_TYPE_SEND) && !(bits & MACH_PORT_TYPE_RECEIVE)) + if ((bits & MACH_PORT_TYPE_SEND) != 0) { ipc_hash_delete(space, (ipc_object_t)port, name, entry); + } + /* convert entry to dead name */ bits = (bits &~ IE_BITS_TYPE_MASK) | MACH_PORT_TYPE_DEAD_NAME; /* * If there was a notification request outstanding on this * name, and the port went dead, that notification - * must already be on its way up from the port layer. + * must already be on its way up from the port layer. * * Add the reference that the notification carries. It * is done here, and not in the notification delivery, @@ -679,7 +713,9 @@ kern_return_t ipc_right_destroy( ipc_space_t space, mach_port_name_t name, - ipc_entry_t entry) + ipc_entry_t entry, + boolean_t check_guard, + uint64_t guard) { ipc_entry_bits_t bits; mach_port_type_t type; @@ -744,6 +780,20 @@ ipc_right_destroy( break; } + /* For receive rights, check for guarding */ + if ((type & MACH_PORT_TYPE_RECEIVE) && + (check_guard) && (port->ip_guarded) && + (guard != port->ip_context)) { + /* Guard Violation */ + uint64_t portguard = port->ip_context; + ip_unlock(port); + is_write_unlock(space); + /* Raise mach port guard exception */ + mach_port_guard_exception(name, 0, portguard, kGUARD_EXC_DESTROY); + return KERN_INVALID_RIGHT; + } + + request = ipc_right_request_cancel_macro(space, port, name, entry); entry->ie_object = IO_NULL; @@ -770,8 +820,10 @@ ipc_right_destroy( assert(port->ip_receiver == space); queue_init(links); + ipc_port_clear_receiver(port, links); ipc_port_destroy(port); /* consumes our ref, unlocks */ + while(!queue_empty(links)) { wql = (wait_queue_link_t) dequeue(links); wait_queue_link_free(wql); @@ -794,6 +846,8 @@ ipc_right_destroy( if (request != IP_NULL) ipc_notify_port_deleted(request, name); + + break; } @@ -1017,7 +1071,7 @@ ipc_right_delta( { ipc_port_t port = IP_NULL; ipc_entry_bits_t bits; - + bits = entry->ie_bits; @@ -1093,7 +1147,17 @@ ipc_right_delta( assert(ip_active(port)); assert(port->ip_receiver_name == name); assert(port->ip_receiver == space); - + + /* Mach Port Guard Checking */ + if(port->ip_guarded) { + uint64_t portguard = port->ip_context; + ip_unlock(port); + is_write_unlock(space); + /* Raise mach port guard exception */ + mach_port_guard_exception(name, 0, portguard, kGUARD_EXC_MOD_REFS); + goto guard_failure; + } + if (bits & MACH_PORT_TYPE_SEND) { assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_SEND_RECEIVE); @@ -1149,6 +1213,7 @@ ipc_right_delta( queue_init(links); ipc_port_clear_receiver(port, links); ipc_port_destroy(port); /* consumes ref, unlocks */ + while(!queue_empty(links)) { wql = (wait_queue_link_t) dequeue(links); wait_queue_link_free(wql); @@ -1205,6 +1270,7 @@ ipc_right_delta( } case MACH_PORT_RIGHT_DEAD_NAME: { + ipc_port_t relport = IP_NULL; mach_port_urefs_t urefs; if (bits & MACH_PORT_TYPE_SEND_RIGHTS) { @@ -1219,6 +1285,8 @@ ipc_right_delta( goto invalid_right; } bits = entry->ie_bits; + relport = port; + port = IP_NULL; } else if ((bits & MACH_PORT_TYPE_DEAD_NAME) == 0) goto invalid_right; @@ -1241,6 +1309,9 @@ ipc_right_delta( } is_write_unlock(space); + if (relport != IP_NULL) + ip_release(relport); + break; } @@ -1349,8 +1420,191 @@ ipc_right_delta( urefs_overflow: is_write_unlock(space); return KERN_UREFS_OVERFLOW; + + guard_failure: + return KERN_INVALID_RIGHT; } +/* + * Routine: ipc_right_destruct + * Purpose: + * Deallocates the receive right and modifies the + * user-reference count for the send rights as requested. + * Conditions: + * The space is write-locked, and is unlocked upon return. + * The space must be active. + * Returns: + * KERN_SUCCESS Count was modified. + * KERN_INVALID_RIGHT Entry has wrong type. + * KERN_INVALID_VALUE Bad delta for the right. + */ + +kern_return_t +ipc_right_destruct( + ipc_space_t space, + mach_port_name_t name, + ipc_entry_t entry, + mach_port_delta_t srdelta, + uint64_t guard) +{ + ipc_port_t port = IP_NULL; + ipc_entry_bits_t bits; + + queue_head_t links_data; + queue_t links = &links_data; + wait_queue_link_t wql; + + mach_port_urefs_t urefs; + ipc_port_t request = IP_NULL; + ipc_port_t nsrequest = IP_NULL; + mach_port_mscount_t mscount = 0; + + bits = entry->ie_bits; + + assert(is_active(space)); + + if (((bits & MACH_PORT_TYPE_RECEIVE) == 0) || + (srdelta && ((bits & MACH_PORT_TYPE_SEND) == 0))) { + is_write_unlock(space); + return KERN_INVALID_RIGHT; + } + + if (srdelta > 0) + goto invalid_value; + + port = (ipc_port_t) entry->ie_object; + assert(port != IP_NULL); + + ip_lock(port); + assert(ip_active(port)); + assert(port->ip_receiver_name == name); + assert(port->ip_receiver == space); + + /* Mach Port Guard Checking */ + if(port->ip_guarded && (guard != port->ip_context)) { + uint64_t portguard = port->ip_context; + ip_unlock(port); + is_write_unlock(space); + mach_port_guard_exception(name, 0, portguard, kGUARD_EXC_DESTROY); + return KERN_INVALID_ARGUMENT; + } + + /* + * First reduce the send rights as requested and + * adjust the entry->ie_bits accordingly. The + * ipc_entry_modified() call is made once the receive + * right is destroyed too. + */ + + if (srdelta) { + + assert(port->ip_srights > 0); + + urefs = IE_BITS_UREFS(bits); + /* + * Since we made sure that srdelta is negative, + * the check for urefs overflow is not required. + */ + if (MACH_PORT_UREFS_UNDERFLOW(urefs, srdelta)) { + ip_unlock(port); + goto invalid_value; + } + if ((urefs + srdelta) == 0) { + if (--port->ip_srights == 0) { + nsrequest = port->ip_nsrequest; + if (nsrequest != IP_NULL) { + port->ip_nsrequest = IP_NULL; + mscount = port->ip_mscount; + } + } + assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_SEND_RECEIVE); + entry->ie_bits = bits &~ (IE_BITS_UREFS_MASK| + MACH_PORT_TYPE_SEND); + } else { + entry->ie_bits = bits + srdelta; + } + } + + /* + * Now destroy the receive right. Update space and + * entry accordingly. + */ + + bits = entry->ie_bits; + if (bits & MACH_PORT_TYPE_SEND) { + assert(IE_BITS_UREFS(bits) > 0); + assert(IE_BITS_UREFS(bits) < MACH_PORT_UREFS_MAX); + + if (port->ip_pdrequest != NULL) { + /* + * Since another task has requested a + * destroy notification for this port, it + * isn't actually being destroyed - the receive + * right is just being moved to another task. + * Since we still have one or more send rights, + * we need to record the loss of the receive + * right and enter the remaining send right + * into the hash table. + */ + ipc_entry_modified(space, name, entry); + entry->ie_bits &= ~MACH_PORT_TYPE_RECEIVE; + ipc_hash_insert(space, (ipc_object_t) port, + name, entry); + ip_reference(port); + } else { + /* + * The remaining send right turns into a + * dead name. Notice we don't decrement + * ip_srights, generate a no-senders notif, + * or use ipc_right_dncancel, because the + * port is destroyed "first". + */ + bits &= ~IE_BITS_TYPE_MASK; + bits |= MACH_PORT_TYPE_DEAD_NAME; + if (entry->ie_request) { + entry->ie_request = IE_REQ_NONE; + bits++; + } + entry->ie_bits = bits; + entry->ie_object = IO_NULL; + ipc_entry_modified(space, name, entry); + } + } else { + assert(IE_BITS_TYPE(bits) == MACH_PORT_TYPE_RECEIVE); + assert(IE_BITS_UREFS(bits) == 0); + request = ipc_right_request_cancel_macro(space, port, + name, entry); + entry->ie_object = IO_NULL; + ipc_entry_dealloc(space, name, entry); + } + + /* Unlock space */ + is_write_unlock(space); + + if (nsrequest != IP_NULL) + ipc_notify_no_senders(nsrequest, mscount); + + queue_init(links); + ipc_port_clear_receiver(port, links); + ipc_port_destroy(port); /* consumes ref, unlocks */ + + while(!queue_empty(links)) { + wql = (wait_queue_link_t) dequeue(links); + wait_queue_link_free(wql); + } + + if (request != IP_NULL) + ipc_notify_port_deleted(request, name); + + return KERN_SUCCESS; + + invalid_value: + is_write_unlock(space); + return KERN_INVALID_VALUE; + +} + + /* * Routine: ipc_right_info * Purpose: @@ -1588,6 +1842,9 @@ ipc_right_copyin( ipc_object_t *objectp, ipc_port_t *sorightp, ipc_port_t *releasep, +#if IMPORTANCE_INHERITANCE + int *assertcntp, +#endif /* IMPORTANCE_INHERITANCE */ queue_t links) { ipc_entry_bits_t bits; @@ -1599,6 +1856,10 @@ ipc_right_copyin( *releasep = IP_NULL; +#if IMPORTANCE_INHERITANCE + *assertcntp = 0; +#endif + bits = entry->ie_bits; assert(is_active(space)); @@ -1717,6 +1978,26 @@ ipc_right_copyin( ipc_port_clear_receiver(port, links); port->ip_receiver_name = MACH_PORT_NULL; port->ip_destination = IP_NULL; + +#if IMPORTANCE_INHERITANCE + /* + * Account for boosts the current task is going to lose when + * copying this right in. Tempowner ports have either not + * been accounting to any task (and therefore are already in + * "limbo" state w.r.t. assertions) or to some other specific + * task. As we have no way to drop the latter task's assertions + * here, We'll deduct those when we enqueue it on its + * destination port (see ipc_port_check_circularity()). + */ + if (port->ip_tempowner == 0) { + assert(port->ip_taskptr == 0); + + /* ports in limbo have to be tempowner */ + port->ip_tempowner = 1; + *assertcntp = port->ip_impcount; + } +#endif /* IMPORTANCE_INHERITANCE */ + ip_unlock(port); *objectp = (ipc_object_t) port; @@ -2280,6 +2561,10 @@ ipc_right_copyout( case MACH_MSG_TYPE_PORT_RECEIVE: { ipc_port_t dest; +#if IMPORTANCE_INHERITANCE + natural_t assertcnt = port->ip_impcount; +#endif /* IMPORTANCE_INHERITANCE */ + assert(port->ip_mscount == 0); assert(port->ip_receiver_name == MACH_PORT_NULL); dest = port->ip_destination; @@ -2325,15 +2610,27 @@ ipc_right_copyout( entry->ie_bits = bits | MACH_PORT_TYPE_RECEIVE; ipc_entry_modified(space, name, entry); - if (dest != IP_NULL) + if (dest != IP_NULL) { +#if IMPORTANCE_INHERITANCE + /* + * Deduct the assertion counts we contributed to + * the old destination port. They've already + * been reflected into the task as a result of + * getting enqueued. + */ + ip_lock(dest); + assert(dest->ip_impcount >= assertcnt); + dest->ip_impcount -= assertcnt; + ip_unlock(dest); +#endif /* IMPORTANCE_INHERITANCE */ ip_release(dest); + } break; } default: panic("ipc_right_copyout: strange rights"); } - return KERN_SUCCESS; } diff --git a/osfmk/ipc/ipc_right.h b/osfmk/ipc/ipc_right.h index dfbfd232e..b844f26fa 100644 --- a/osfmk/ipc/ipc_right.h +++ b/osfmk/ipc/ipc_right.h @@ -138,7 +138,9 @@ extern void ipc_right_terminate( extern kern_return_t ipc_right_destroy( ipc_space_t space, mach_port_name_t name, - ipc_entry_t entry); + ipc_entry_t entry, + boolean_t check_guard, + uint64_t guard); /* Release a send/send-once/dead-name user reference */ extern kern_return_t ipc_right_dealloc( @@ -154,6 +156,14 @@ extern kern_return_t ipc_right_delta( mach_port_right_t right, mach_port_delta_t delta); +/* Destroy a receive right; Modify ref count for send rights */ +extern kern_return_t ipc_right_destruct( + ipc_space_t space, + mach_port_name_t name, + ipc_entry_t entry, + mach_port_delta_t srdelta, + uint64_t guard); + /* Retrieve information about a right */ extern kern_return_t ipc_right_info( ipc_space_t space, @@ -179,6 +189,9 @@ extern kern_return_t ipc_right_copyin( ipc_object_t *objectp, ipc_port_t *sorightp, ipc_port_t *releasep, +#if IMPORTANCE_INHERITANCE + int *assertcntp, +#endif queue_t links); /* Undo the effects of an ipc_right_copyin */ diff --git a/osfmk/ipc/ipc_space.c b/osfmk/ipc/ipc_space.c index 803ab7321..b7152e720 100644 --- a/osfmk/ipc/ipc_space.c +++ b/osfmk/ipc/ipc_space.c @@ -200,7 +200,14 @@ ipc_space_create_special( return KERN_RESOURCE_SHORTAGE; is_lock_init(space); - space->is_bits = IS_INACTIVE | 1; /* 1 ref, not active, not growing */ + + space->is_bits = IS_INACTIVE | 1; /* 1 ref, not active, not growing */ + space->is_table = IE_NULL; + space->is_task = TASK_NULL; + space->is_table_next = 0; + space->is_low_mod = 0; + space->is_high_mod = 0; + *spacep = space; return KERN_SUCCESS; } @@ -250,7 +257,7 @@ ipc_space_clean( if (type != MACH_PORT_TYPE_NONE) { mach_port_name_t name = MACH_PORT_MAKE(index, IE_BITS_GEN(entry->ie_bits)); - ipc_right_destroy(space, name, entry); /* unlocks space */ + ipc_right_destroy(space, name, entry, FALSE, 0); /* unlocks space */ goto retry; } } diff --git a/osfmk/ipc/ipc_space.h b/osfmk/ipc/ipc_space.h index 2f9edeb47..b98eba367 100644 --- a/osfmk/ipc/ipc_space.h +++ b/osfmk/ipc/ipc_space.h @@ -199,6 +199,7 @@ is_release(ipc_space_t is) { /* If we just removed the last reference count */ if ( 1 == (OSDecrementAtomic(&(is->is_bits)) & IS_REFS_MAX)) { + assert(!is_active(is)); is_lock_destroy(is); is_free(is); } diff --git a/osfmk/ipc/ipc_table.h b/osfmk/ipc/ipc_table.h index a310197e0..66a2d3d5a 100644 --- a/osfmk/ipc/ipc_table.h +++ b/osfmk/ipc/ipc_table.h @@ -109,7 +109,7 @@ extern ipc_table_size_t ipc_table_entries; extern ipc_table_size_t ipc_table_requests; /* Initialize IPC capabilities table storage */ -extern void ipc_table_init(void) __attribute__((section("__TEXT, initcode"))); +extern void ipc_table_init(void); /* * Note that ipc_table_alloc and ipc_table_free diff --git a/osfmk/ipc/mach_debug.c b/osfmk/ipc/mach_debug.c index 615d61f99..c97e27bcf 100644 --- a/osfmk/ipc/mach_debug.c +++ b/osfmk/ipc/mach_debug.c @@ -200,8 +200,10 @@ mach_port_space_info( return KERN_INVALID_TASK; } - table_size_needed = round_page(space->is_table_size - * sizeof(ipc_info_name_t)); + table_size_needed = + vm_map_round_page((space->is_table_size + * sizeof(ipc_info_name_t)), + VM_MAP_PAGE_MASK(ipc_kernel_map)); if (table_size_needed == table_size) break; @@ -237,6 +239,7 @@ mach_port_space_info( bits = entry->ie_bits; iin->iin_name = MACH_PORT_MAKE(index, IE_BITS_GEN(bits)); + iin->iin_collision = 0; iin->iin_type = IE_BITS_TYPE(bits); if ((entry->ie_bits & MACH_PORT_TYPE_PORT_RIGHTS) != MACH_PORT_TYPE_NONE && entry->ie_request != IE_REQ_NONE) { @@ -262,8 +265,13 @@ mach_port_space_info( bzero((char *)&table_info[infop->iis_table_size], table_size - infop->iis_table_size * sizeof(ipc_info_name_t)); - kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(table_addr), - vm_map_round_page(table_addr + table_size), FALSE); + kr = vm_map_unwire( + ipc_kernel_map, + vm_map_trunc_page(table_addr, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + vm_map_round_page(table_addr + table_size, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + FALSE); assert(kr == KERN_SUCCESS); kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)table_addr, (vm_map_size_t)table_size, TRUE, ©); diff --git a/osfmk/ipc/mach_kernelrpc.c b/osfmk/ipc/mach_kernelrpc.c index 75244966f..9971c84b5 100644 --- a/osfmk/ipc/mach_kernelrpc.c +++ b/osfmk/ipc/mach_kernelrpc.c @@ -93,6 +93,31 @@ done: return (rv); } +int +_kernelrpc_mach_vm_map_trap(struct _kernelrpc_mach_vm_map_trap_args *args) +{ + mach_vm_offset_t addr; + task_t task = port_name_to_task(args->target); + int rv = MACH_SEND_INVALID_DEST; + + if (task != current_task()) + goto done; + + if (copyin(args->addr, (char *)&addr, sizeof (addr))) + goto done; + + rv = mach_vm_map(task->map, &addr, args->size, args->mask, args->flags, + IPC_PORT_NULL, 0, FALSE, args->cur_protection, VM_PROT_ALL, + VM_INHERIT_DEFAULT); + if (rv == KERN_SUCCESS) + rv = copyout(&addr, args->addr, sizeof (addr)); + +done: + if (task) + task_deallocate(task); + return (rv); +} + int _kernelrpc_mach_port_allocate_trap(struct _kernelrpc_mach_port_allocate_args *args) { @@ -242,3 +267,81 @@ done: task_deallocate(task); return (rv); } + +int +_kernelrpc_mach_port_construct_trap(struct _kernelrpc_mach_port_construct_args *args) +{ + task_t task = port_name_to_task(args->target); + mach_port_name_t name; + int rv = MACH_SEND_INVALID_DEST; + mach_port_options_t options; + + if (copyin(args->options, (char *)&options, sizeof (options))) { + rv = MACH_SEND_INVALID_DATA; + goto done; + } + + if (task != current_task()) + goto done; + + rv = mach_port_construct(task->itk_space, &options, args->context, &name); + if (rv == KERN_SUCCESS) + rv = copyout(&name, args->name, sizeof (name)); + +done: + if (task) + task_deallocate(task); + return (rv); +} + +int +_kernelrpc_mach_port_destruct_trap(struct _kernelrpc_mach_port_destruct_args *args) +{ + task_t task = port_name_to_task(args->target); + int rv = MACH_SEND_INVALID_DEST; + + if (task != current_task()) + goto done; + + rv = mach_port_destruct(task->itk_space, args->name, args->srdelta, args->guard); + +done: + if (task) + task_deallocate(task); + return (rv); +} + +int +_kernelrpc_mach_port_guard_trap(struct _kernelrpc_mach_port_guard_args *args) +{ + task_t task = port_name_to_task(args->target); + int rv = MACH_SEND_INVALID_DEST; + + if (task != current_task()) + goto done; + + rv = mach_port_guard(task->itk_space, args->name, args->guard, args->strict); + +done: + if (task) + task_deallocate(task); + return (rv); +} + +int +_kernelrpc_mach_port_unguard_trap(struct _kernelrpc_mach_port_unguard_args *args) +{ + task_t task = port_name_to_task(args->target); + int rv = MACH_SEND_INVALID_DEST; + + if (task != current_task()) + goto done; + + rv = mach_port_unguard(task->itk_space, args->name, args->guard); + +done: + if (task) + task_deallocate(task); + return (rv); +} + diff --git a/osfmk/ipc/mach_msg.c b/osfmk/ipc/mach_msg.c index 2b4c67eb2..af1ce5928 100644 --- a/osfmk/ipc/mach_msg.c +++ b/osfmk/ipc/mach_msg.c @@ -110,7 +110,6 @@ #include - #ifndef offsetof #define offsetof(type, member) ((size_t)(&((type *)0)->member)) #endif /* offsetof */ @@ -154,9 +153,14 @@ mach_msg_format_0_trailer_t trailer_template = { }; /* - * Routine: mach_msg_send + * Routine: mach_msg_send [Kernel Internal] * Purpose: - * Send a message. + * Routine for kernel-task threads to send a message. + * + * Unlike mach_msg_send_from_kernel(), this routine + * looks port names up in the kernel's port namespace + * and copies in the kernel virtual memory (instead + * of taking a vm_map_copy_t pointer for OOL descriptors). * Conditions: * Nothing locked. * Returns: @@ -217,14 +221,16 @@ mach_msg_send( trailer->msgh_audit = current_thread()->task->audit_token; trailer->msgh_trailer_type = MACH_MSG_TRAILER_FORMAT_0; trailer->msgh_trailer_size = MACH_MSG_TRAILER_MINIMUM_SIZE; - - mr = ipc_kmsg_copyin(kmsg, space, map, option & MACH_SEND_NOTIFY); + + mr = ipc_kmsg_copyin(kmsg, space, map, &option); + if (mr != MACH_MSG_SUCCESS) { ipc_kmsg_free(kmsg); return mr; } - mr = ipc_kmsg_send(kmsg, option & MACH_SEND_TIMEOUT, send_timeout); + mr = ipc_kmsg_send(kmsg, option, send_timeout); + if (mr != MACH_MSG_SUCCESS) { mr |= ipc_kmsg_copyout_pseudo(kmsg, space, map, MACH_MSG_BODY_NULL); (void) memcpy((void *) msg, (const void *) kmsg->ikm_header, @@ -235,6 +241,20 @@ mach_msg_send( return mr; } +/* + * message header as seen at user-space + * (for MACH_RCV_LARGE/IDENTITY updating) + */ +typedef struct +{ + mach_msg_bits_t msgh_bits; + mach_msg_size_t msgh_size; + mach_port_name_t msgh_remote_port; + mach_port_name_t msgh_local_port; + mach_msg_size_t msgh_reserved; + mach_msg_id_t msgh_id; +} mach_msg_user_header_t; + /* * Routine: mach_msg_receive_results * Purpose: @@ -283,8 +303,14 @@ mach_msg_receive_results(void) * msize save area instead of the message (which was left on * the queue). */ + if (option & MACH_RCV_LARGE_IDENTITY) { + if (copyout((char *) &self->ith_receiver_name, + msg_addr + offsetof(mach_msg_user_header_t, msgh_local_port), + sizeof(mach_port_name_t))) + mr = MACH_RCV_INVALID_DATA; + } if (copyout((char *) &self->ith_msize, - msg_addr + offsetof(mach_msg_header_t, msgh_size), + msg_addr + offsetof(mach_msg_user_header_t, msgh_size), sizeof(mach_msg_size_t))) mr = MACH_RCV_INVALID_DATA; goto out; @@ -297,6 +323,45 @@ mach_msg_receive_results(void) goto out; } +#if IMPORTANCE_INHERITANCE + if ((kmsg->ikm_header->msgh_bits & MACH_MSGH_BITS_RAISEIMP) != 0) { + __unused int impresult; + int sender_pid = -1; +#if IMPORTANCE_DEBUG + sender_pid = ((mach_msg_max_trailer_t *) + ((vm_offset_t)kmsg->ikm_header + round_msg(kmsg->ikm_header->msgh_size)))->msgh_audit.val[5]; +#endif /* IMPORTANCE_DEBUG */ + ipc_port_t port = kmsg->ikm_header->msgh_remote_port; + task_t task_self = current_task(); + + ip_lock(port); + assert(port->ip_impcount > 0); + port->ip_impcount--; + ip_unlock(port); + + if (task_self->imp_receiver == 0) { + /* + * The task was never ready to receive importance boost, remove msghbit. + * This can happen when a receive right (which has donor messages) is copied + * out to a non-imp_receiver task (we don't clear the bits on the messages, + * but we did't transfer any boost counts either). + */ + kmsg->ikm_header->msgh_bits &= ~MACH_MSGH_BITS_RAISEIMP; + impresult = 0; + } else { + /* user will accept responsibility for the importance boost */ + task_importance_externalize_assertion(task_self, 1, sender_pid); + impresult = 1; + } + +#if IMPORTANCE_DEBUG + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_MSG, IMP_MSG_DELV)) | DBG_FUNC_NONE, + sender_pid, audit_token_pid_from_task(task_self), + kmsg->ikm_header->msgh_id, impresult, 0); +#endif /* IMPORTANCE_DEBUG */ + } +#endif /* IMPORTANCE_INHERITANCE */ + trailer_size = ipc_kmsg_add_trailer(kmsg, space, option, self, seqno, FALSE, kmsg->ikm_header->msgh_remote_port->ip_context); /* @@ -337,6 +402,23 @@ mach_msg_receive_results(void) return mr; } +/* + * Routine: mach_msg_receive [Kernel Internal] + * Purpose: + * Routine for kernel-task threads to actively receive a message. + * + * Unlike being dispatched to by ipc_kobject_server() or the + * reply part of mach_msg_rpc_from_kernel(), this routine + * looks up the receive port name in the kernel's port + * namespace and copies out received port rights to that namespace + * as well. Out-of-line memory is copied out the kernel's + * address space (rather than just providing the vm_map_copy_t). + * Conditions: + * Nothing locked. + * Returns: + * MACH_MSG_SUCCESS Received a message. + * See for list of MACH_RCV_XXX errors. + */ mach_msg_return_t mach_msg_receive( mach_msg_header_t *msg, @@ -408,7 +490,10 @@ mach_msg_overwrite_trap( mach_msg_return_t mr = MACH_MSG_SUCCESS; vm_map_t map = current_map(); - + + /* Only accept options allowed by the user */ + option &= MACH_MSG_OPTION_USER; + if (option & MACH_SEND_MSG) { ipc_space_t space = current_space(); ipc_kmsg_t kmsg; @@ -418,13 +503,14 @@ mach_msg_overwrite_trap( if (mr != MACH_MSG_SUCCESS) return mr; - mr = ipc_kmsg_copyin(kmsg, space, map, option & MACH_SEND_NOTIFY); + mr = ipc_kmsg_copyin(kmsg, space, map, &option); + if (mr != MACH_MSG_SUCCESS) { ipc_kmsg_free(kmsg); return mr; } - mr = ipc_kmsg_send(kmsg, option & MACH_SEND_TIMEOUT, msg_timeout); + mr = ipc_kmsg_send(kmsg, option, msg_timeout); if (mr != MACH_MSG_SUCCESS) { mr |= ipc_kmsg_copyout_pseudo(kmsg, space, map, MACH_MSG_BODY_NULL); diff --git a/osfmk/ipc/mach_port.c b/osfmk/ipc/mach_port.c index a2d4f0d7a..39f1a6696 100644 --- a/osfmk/ipc/mach_port.c +++ b/osfmk/ipc/mach_port.c @@ -114,12 +114,23 @@ void mach_port_names_helper( void mach_port_gst_helper( ipc_pset_t pset, - ipc_port_t port, ipc_entry_num_t maxnames, mach_port_name_t *names, ipc_entry_num_t *actualp); +kern_return_t +mach_port_guard_exception( + mach_port_name_t name, + uint64_t inguard, + uint64_t portguard, + unsigned reason); + +/* Needs port locked */ +void mach_port_get_status_helper( + ipc_port_t port, + mach_port_status_t *status); + /* Zeroed template of qos flags */ static mach_port_qos_t qos_template; @@ -260,7 +271,9 @@ mach_port_names( /* upper bound on number of names in the space */ bound = space->is_table_size; - size_needed = round_page(bound * sizeof(mach_port_name_t)); + size_needed = vm_map_round_page( + (bound * sizeof(mach_port_name_t)), + VM_MAP_PAGE_MASK(ipc_kernel_map)); if (size_needed <= size) break; @@ -285,18 +298,28 @@ mach_port_names( /* can't fault while we hold locks */ - kr = vm_map_wire(ipc_kernel_map, vm_map_trunc_page(addr1), - vm_map_round_page(addr1 + size), - VM_PROT_READ|VM_PROT_WRITE, FALSE); + kr = vm_map_wire( + ipc_kernel_map, + vm_map_trunc_page(addr1, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + vm_map_round_page(addr1 + size, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + VM_PROT_READ|VM_PROT_WRITE, + FALSE); if (kr != KERN_SUCCESS) { kmem_free(ipc_kernel_map, addr1, size); kmem_free(ipc_kernel_map, addr2, size); return KERN_RESOURCE_SHORTAGE; } - kr = vm_map_wire(ipc_kernel_map, vm_map_trunc_page(addr2), - vm_map_round_page(addr2 + size), - VM_PROT_READ|VM_PROT_WRITE, FALSE); + kr = vm_map_wire( + ipc_kernel_map, + vm_map_trunc_page(addr2, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + vm_map_round_page(addr2 + size, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + VM_PROT_READ|VM_PROT_WRITE, + FALSE); if (kr != KERN_SUCCESS) { kmem_free(ipc_kernel_map, addr1, size); kmem_free(ipc_kernel_map, addr2, size); @@ -343,19 +366,31 @@ mach_port_names( vm_size_t vm_size_used; size_used = actual * sizeof(mach_port_name_t); - vm_size_used = round_page(size_used); + vm_size_used = + vm_map_round_page(size_used, + VM_MAP_PAGE_MASK(ipc_kernel_map)); /* * Make used memory pageable and get it into * copied-in form. Free any unused memory. */ - kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr1), - vm_map_round_page(addr1 + vm_size_used), FALSE); + kr = vm_map_unwire( + ipc_kernel_map, + vm_map_trunc_page(addr1, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + vm_map_round_page(addr1 + vm_size_used, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + FALSE); assert(kr == KERN_SUCCESS); - kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr2), - vm_map_round_page(addr2 + vm_size_used), FALSE); + kr = vm_map_unwire( + ipc_kernel_map, + vm_map_trunc_page(addr2, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + vm_map_round_page(addr2 + vm_size_used, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + FALSE); assert(kr == KERN_SUCCESS); kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr1, @@ -729,7 +764,7 @@ mach_port_destroy( return kr; /* space is write-locked and active */ - kr = ipc_right_destroy(space, name, entry); /* unlocks space */ + kr = ipc_right_destroy(space, name, entry, TRUE, 0); /* unlocks space */ return kr; } @@ -908,6 +943,95 @@ mach_port_mod_refs( } +/* + * Routine: mach_port_peek [kernel call] + * Purpose: + * Peek at the message queue for the specified receive + * right and return info about a message in the queue. + * + * On input, seqnop points to a sequence number value + * to match the message being peeked. If zero is specified + * as the seqno, the first message in the queue will be + * peeked. + * + * Only the following trailer types are currently supported: + * MACH_RCV_TRAILER_TYPE(MACH_MSG_TRAILER_FORMAT_0) + * + * or'ed with one of these element types: + * MACH_RCV_TRAILER_ELEMENTS(MACH_RCV_TRAILER_NULL) + * MACH_RCV_TRAILER_ELEMENTS(MACH_RCV_TRAILER_SEQNO) + * MACH_RCV_TRAILER_ELEMENTS(MACH_RCV_TRAILER_SENDER) + * MACH_RCV_TRAILER_ELEMENTS(MACH_RCV_TRAILER_AUDIT) + * + * On input, the value pointed to by trailer_sizep must be + * large enough to hold the requested trailer size. + * + * The message sequence number, id, size, requested trailer info + * and requested trailer size are returned in their respective + * output parameters upon success. + * + * Conditions: + * Nothing locked. + * Returns: + * KERN_SUCCESS Matching message found, out parameters set. + * KERN_INVALID_TASK The space is null or dead. + * KERN_INVALID_NAME The name doesn't denote a right. + * KERN_INVALID_RIGHT Name doesn't denote receive rights. + * KERN_INVALID_VALUE The input parameter values are out of bounds. + * KERN_FAILURE The requested message was not found. + */ + +kern_return_t +mach_port_peek( + ipc_space_t space, + mach_port_name_t name, + mach_msg_trailer_type_t trailer_type, + mach_port_seqno_t *seqnop, + mach_msg_size_t *msg_sizep, + mach_msg_id_t *msg_idp, + mach_msg_trailer_info_t trailer_infop, + mach_msg_type_number_t *trailer_sizep) +{ + ipc_port_t port; + kern_return_t kr; + boolean_t found; + mach_msg_max_trailer_t max_trailer; + + if (space == IS_NULL) + return KERN_INVALID_TASK; + + if (!MACH_PORT_VALID(name)) + return KERN_INVALID_RIGHT; + + /* + * We don't allow anything greater than the audit trailer - to avoid + * leaking the context pointer and to avoid variable-sized context issues. + */ + if (GET_RCV_ELEMENTS(trailer_type) > MACH_RCV_TRAILER_AUDIT || + REQUESTED_TRAILER_SIZE(TRUE, trailer_type) > *trailer_sizep) + return KERN_INVALID_VALUE; + + *trailer_sizep = REQUESTED_TRAILER_SIZE(TRUE, trailer_type); + + kr = ipc_port_translate_receive(space, name, &port); + if (kr != KERN_SUCCESS) + return kr; + + /* Port locked and active */ + + found = ipc_mqueue_peek(&port->ip_messages, seqnop, + msg_sizep, msg_idp, &max_trailer); + ip_unlock(port); + + if (found != TRUE) + return KERN_FAILURE; + + max_trailer.msgh_seqno = *seqnop; + memcpy(trailer_infop, &max_trailer, *trailer_sizep); + + return KERN_SUCCESS; +} + /* * Routine: mach_port_set_mscount [kernel call] * Purpose: @@ -1021,8 +1145,13 @@ mach_port_get_context( if (kr != KERN_SUCCESS) return kr; - /* port is locked and active */ - *context = port->ip_context; + /* Port locked and active */ + + /* For strictly guarded ports, return empty context (which acts as guard) */ + if (port->ip_strict_guard) + *context = 0; + else + *context = port->ip_context; ip_unlock(port); return KERN_SUCCESS; @@ -1063,6 +1192,14 @@ mach_port_set_context( return kr; /* port is locked and active */ + if(port->ip_strict_guard) { + uint64_t portguard = port->ip_context; + ip_unlock(port); + /* For strictly guarded ports, disallow overwriting context; Raise Exception */ + mach_port_guard_exception(name, context, portguard, kGUARD_EXC_SET_CONTEXT); + return KERN_INVALID_ARGUMENT; + } + port->ip_context = context; ip_unlock(port); @@ -1070,44 +1207,6 @@ mach_port_set_context( } -/* - * Routine: mach_port_gst_helper - * Conditions: - * portspace is locked for both the recieve right and pset - * under observation. - * Purpose: - * A helper function for mach_port_get_set_status. - */ - -void -mach_port_gst_helper( - ipc_pset_t pset, - ipc_port_t port, - ipc_entry_num_t maxnames, - mach_port_name_t *names, - ipc_entry_num_t *actualp) -{ - mach_port_name_t name; - - assert(port != IP_NULL); - /* - * The space lock is held by the calling function, - * hence it is OK to read name without the port lock. - */ - assert(ip_active(port)); - name = port->ip_receiver_name; - assert(name != MACH_PORT_NULL); - - if (ipc_pset_member(pset, port)) { - ipc_entry_num_t actual = *actualp; - - if (actual < maxnames) - names[actual] = name; - - *actualp = actual+1; - } -} - /* * Routine: mach_port_get_set_status [kernel call] * Purpose: @@ -1145,13 +1244,11 @@ mach_port_get_set_status( if (!MACH_PORT_VALID(name)) return KERN_INVALID_RIGHT; - size = PAGE_SIZE; /* initial guess */ + size = VM_MAP_PAGE_SIZE(ipc_kernel_map); /* initial guess */ for (;;) { - ipc_entry_t entry, table; - ipc_entry_num_t tsize; - mach_port_index_t index; mach_port_name_t *names; + ipc_object_t psobj; ipc_pset_t pset; kr = vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE); @@ -1164,50 +1261,34 @@ mach_port_get_set_status( VM_PROT_READ|VM_PROT_WRITE, FALSE); assert(kr == KERN_SUCCESS); - kr = ipc_right_lookup_read(space, name, &entry); + kr = ipc_object_translate(space, name, MACH_PORT_RIGHT_PORT_SET, &psobj); if (kr != KERN_SUCCESS) { kmem_free(ipc_kernel_map, addr, size); return kr; } - /* space is read-locked and active */ - if (IE_BITS_TYPE(entry->ie_bits) != MACH_PORT_TYPE_PORT_SET) { - is_read_unlock(space); - kmem_free(ipc_kernel_map, addr, size); - return KERN_INVALID_RIGHT; - } - - pset = (ipc_pset_t) entry->ie_object; - assert(pset != IPS_NULL); - /* the port set must be active */ + /* just use a portset reference from here on out */ + pset = (ipc_pset_t) psobj; + ips_reference(pset); + ips_unlock(pset); names = (mach_port_name_t *) addr; maxnames = (ipc_entry_num_t)(size / sizeof(mach_port_name_t)); - actual = 0; - table = space->is_table; - tsize = space->is_table_size; + ipc_mqueue_set_gather_member_names(&pset->ips_messages, maxnames, names, &actual); - for (index = 0; index < tsize; index++) { - ipc_entry_t ientry = &table[index]; - ipc_port_t port = (ipc_port_t) ientry->ie_object; - - if (ientry->ie_bits & MACH_PORT_TYPE_RECEIVE && - port->ip_pset_count > 0) { - mach_port_gst_helper(pset, port, - maxnames, names, &actual); - } - } - - is_read_unlock(space); + /* release the portset reference */ + ips_release(pset); if (actual <= maxnames) break; /* didn't have enough memory; allocate more */ - kmem_free(ipc_kernel_map, addr, size); - size = round_page(actual * sizeof(mach_port_name_t)) + PAGE_SIZE; + size = vm_map_round_page( + (actual * sizeof(mach_port_name_t)), + VM_MAP_PAGE_MASK(ipc_kernel_map)) + + VM_MAP_PAGE_SIZE(ipc_kernel_map); } if (actual == 0) { @@ -1219,15 +1300,22 @@ mach_port_get_set_status( vm_size_t vm_size_used; size_used = actual * sizeof(mach_port_name_t); - vm_size_used = round_page(size_used); + vm_size_used = vm_map_round_page( + size_used, + VM_MAP_PAGE_MASK(ipc_kernel_map)); /* * Make used memory pageable and get it into * copied-in form. Free any unused memory. */ - kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr), - vm_map_round_page(addr + vm_size_used), FALSE); + kr = vm_map_unwire( + ipc_kernel_map, + vm_map_trunc_page(addr, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + vm_map_round_page(addr + vm_size_used, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + FALSE); assert(kr == KERN_SUCCESS); kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, @@ -1598,6 +1686,46 @@ mach_port_extract_right( return kr; } +/* + * Routine: mach_port_get_status_helper [helper] + * Purpose: + * Populates a mach_port_status_t structure with + * port information. + * Conditions: + * Port needs to be locked + * Returns: + * None. + */ +void mach_port_get_status_helper( + ipc_port_t port, + mach_port_status_t *statusp) +{ + spl_t s; + statusp->mps_pset = port->ip_pset_count; + + s = splsched(); + imq_lock(&port->ip_messages); + statusp->mps_seqno = port->ip_messages.imq_seqno; + statusp->mps_qlimit = port->ip_messages.imq_qlimit; + statusp->mps_msgcount = port->ip_messages.imq_msgcount; + imq_unlock(&port->ip_messages); + splx(s); + + statusp->mps_mscount = port->ip_mscount; + statusp->mps_sorights = port->ip_sorights; + statusp->mps_srights = port->ip_srights > 0; + statusp->mps_pdrequest = port->ip_pdrequest != IP_NULL; + statusp->mps_nsrequest = port->ip_nsrequest != IP_NULL; + statusp->mps_flags = 0; + statusp->mps_flags |= ((port->ip_impdonation) ? MACH_PORT_STATUS_FLAG_IMP_DONATION:0); + statusp->mps_flags |= ((port->ip_tempowner) ? MACH_PORT_STATUS_FLAG_TEMPOWNER:0); + statusp->mps_flags |= ((port->ip_taskptr) ? MACH_PORT_STATUS_FLAG_TASKPTR:0); + statusp->mps_flags |= ((port->ip_guarded) ? MACH_PORT_STATUS_FLAG_GUARDED:0); + statusp->mps_flags |= ((port->ip_strict_guard) ? MACH_PORT_STATUS_FLAG_STRICT_GUARD:0); + return; +} + + kern_return_t mach_port_get_attributes( @@ -1637,41 +1765,23 @@ mach_port_get_attributes( } case MACH_PORT_RECEIVE_STATUS: { - mach_port_status_t *statusp = (mach_port_status_t *)info; - spl_t s; - - if (*count < MACH_PORT_RECEIVE_STATUS_COUNT) - return KERN_FAILURE; - + mach_port_status_t *statusp = (mach_port_status_t *)info; + + if (*count < MACH_PORT_RECEIVE_STATUS_COUNT) + return KERN_FAILURE; + if (!MACH_PORT_VALID(name)) return KERN_INVALID_RIGHT; - kr = ipc_port_translate_receive(space, name, &port); - if (kr != KERN_SUCCESS) - return kr; - /* port is locked and active */ - - statusp->mps_pset = port->ip_pset_count; - - s = splsched(); - imq_lock(&port->ip_messages); - statusp->mps_seqno = port->ip_messages.imq_seqno; - statusp->mps_qlimit = port->ip_messages.imq_qlimit; - statusp->mps_msgcount = port->ip_messages.imq_msgcount; - imq_unlock(&port->ip_messages); - splx(s); - - statusp->mps_mscount = port->ip_mscount; - statusp->mps_sorights = port->ip_sorights; - statusp->mps_srights = port->ip_srights > 0; - statusp->mps_pdrequest = port->ip_pdrequest != IP_NULL; - statusp->mps_nsrequest = port->ip_nsrequest != IP_NULL; - statusp->mps_flags = 0; - - *count = MACH_PORT_RECEIVE_STATUS_COUNT; - ip_unlock(port); - break; - } + kr = ipc_port_translate_receive(space, name, &port); + if (kr != KERN_SUCCESS) + return kr; + /* port is locked and active */ + mach_port_get_status_helper(port, statusp); + *count = MACH_PORT_RECEIVE_STATUS_COUNT; + ip_unlock(port); + break; + } case MACH_PORT_DNREQUESTS_SIZE: { ipc_port_request_t table; @@ -1699,6 +1809,25 @@ mach_port_get_attributes( break; } + case MACH_PORT_INFO_EXT: { + mach_port_info_ext_t *mp_info = (mach_port_info_ext_t *)info; + if (*count < MACH_PORT_INFO_EXT_COUNT) + return KERN_FAILURE; + + if (!MACH_PORT_VALID(name)) + return KERN_INVALID_RIGHT; + + kr = ipc_port_translate_receive(space, name, &port); + if (kr != KERN_SUCCESS) + return kr; + /* port is locked and active */ + mach_port_get_status_helper(port, &mp_info->mpie_status); + mp_info->mpie_boost_cnt = port->ip_impcount; + *count = MACH_PORT_INFO_EXT_COUNT; + ip_unlock(port); + break; + } + default: return KERN_INVALID_ARGUMENT; /*NOTREACHED*/ @@ -1761,6 +1890,68 @@ mach_port_set_attributes( return kr; break; } + case MACH_PORT_TEMPOWNER: + if (!MACH_PORT_VALID(name)) + return KERN_INVALID_RIGHT; + + task_t release_imp_task = TASK_NULL; + natural_t assertcnt = 0; + + kr = ipc_port_translate_receive(space, name, &port); + if (kr != KERN_SUCCESS) + return kr; + + /* port is locked and active */ + + if (port->ip_tempowner != 0) { + if (port->ip_taskptr != 0) { + release_imp_task = port->ip_imp_task; + port->ip_taskptr = 0; + port->ip_imp_task = TASK_NULL; + assertcnt = port->ip_impcount; + } + } else { + assertcnt = port->ip_impcount; + } + + port->ip_impdonation = 1; + port->ip_tempowner = 1; + ip_unlock(port); + +#if IMPORTANCE_INHERITANCE + /* drop assertions from previous destination task */ + if (release_imp_task != TASK_NULL) { + assert(release_imp_task->imp_receiver != 0); + if (assertcnt > 0) + task_importance_drop_internal_assertion(release_imp_task, assertcnt); + task_deallocate(release_imp_task); + } else if (assertcnt > 0) { + release_imp_task = current_task(); + if (release_imp_task->imp_receiver != 0) + task_importance_drop_internal_assertion(release_imp_task, assertcnt); + } +#else + if (release_imp_task != TASK_NULL) + task_deallocate(release_imp_task); +#endif /* IMPORTANCE_INHERITANCE */ + + break; +#if IMPORTANCE_INHERITANCE + case MACH_PORT_IMPORTANCE_RECEIVER: + if (!MACH_PORT_VALID(name)) + return KERN_INVALID_RIGHT; + + kr = ipc_port_translate_receive(space, name, &port); + if (kr != KERN_SUCCESS) + return kr; + /* port is locked and active */ + + port->ip_impdonation = 1; + ip_unlock(port); + + break; +#endif /* IMPORTANCE_INHERITANCE */ + default: return KERN_INVALID_ARGUMENT; /*NOTREACHED*/ @@ -1907,6 +2098,367 @@ task_set_port_space( return kr; } +/* + * Routine: mach_port_guard_locked [helper routine] + * Purpose: + * Sets a new guard for a locked port. + * Conditions: + * Port Locked. + * Returns: + * KERN_SUCCESS Port Guarded. + * KERN_INVALID_ARGUMENT Port already contains a context/guard. + */ +static kern_return_t +mach_port_guard_locked( + ipc_port_t port, + uint64_t guard, + boolean_t strict) +{ + if (port->ip_context) + return KERN_INVALID_ARGUMENT; + + port->ip_context = guard; + port->ip_guarded = 1; + port->ip_strict_guard = (strict)?1:0; + return KERN_SUCCESS; +} + +/* + * Routine: mach_port_unguard_locked [helper routine] + * Purpose: + * Removes guard for a locked port. + * Conditions: + * Port Locked. + * Returns: + * KERN_SUCCESS Port Unguarded. + * KERN_INVALID_ARGUMENT Port is either unguarded already or guard mismatch. + * This also raises a EXC_GUARD exception. + */ +static kern_return_t +mach_port_unguard_locked( + ipc_port_t port, + mach_port_name_t name, + uint64_t guard) +{ + /* Port locked and active */ + if (!port->ip_guarded) { + /* Port already unguarded; Raise exception */ + mach_port_guard_exception(name, guard, 0, kGUARD_EXC_UNGUARDED); + return KERN_INVALID_ARGUMENT; + } + + if (port->ip_context != guard) { + /* Incorrect guard; Raise exception */ + mach_port_guard_exception(name, guard, port->ip_context, kGUARD_EXC_INCORRECT_GUARD); + return KERN_INVALID_ARGUMENT; + } + + port->ip_context = 0; + port->ip_guarded = port->ip_strict_guard = 0; + return KERN_SUCCESS; +} + + +/* + * Routine: mach_port_guard_exception [helper routine] + * Purpose: + * Marks the thread with AST_GUARD for mach port guard violation. + * Also saves exception info in thread structure. + * Conditions: + * None. + * Returns: + * KERN_FAILURE Thread marked with AST_GUARD. + */ +kern_return_t +mach_port_guard_exception( + mach_port_name_t name, + uint64_t inguard, + uint64_t portguard, + unsigned reason) +{ + thread_t t = current_thread(); + uint64_t code, subcode; + + /* Log exception info to syslog */ + printf( "Mach Port Guard Exception - " + "Thread: 0x%x, " + "Port Name: 0x%x, " + "Expected Guard: 0x%x, " + "Received Guard: 0x%x\n", + (unsigned)t, + (unsigned)name, + (unsigned)portguard, + (unsigned)inguard); + + /* + * EXC_GUARD namespace for mach ports + * + * + * Mach Port guards use the exception codes like + * + * code: + * +----------------------------------------------------------------+ + * |[63:61] GUARD_TYPE_MACH_PORT | [60:32] flavor | [31:0] port name| + * +----------------------------------------------------------------+ + * + * subcode: + * +----------------------------------------------------------------+ + * | [63:0] guard value | + * +----------------------------------------------------------------+ + */ + + code = (((uint64_t)GUARD_TYPE_MACH_PORT) << 61) | + (((uint64_t)reason) << 32) | + ((uint64_t)name); + subcode = (uint64_t)(portguard); + + t->guard_exc_info.code = code; + t->guard_exc_info.subcode = subcode; + + /* Mark thread with AST_GUARD */ + thread_guard_violation(t, GUARD_TYPE_MACH_PORT); + return KERN_FAILURE; +} + + +/* + * Routine: mach_port_guard_ast + * Purpose: + * Raises an exception for mach port guard violation. + * Conditions: + * None. + * Returns: + * None. + */ + +void +mach_port_guard_ast(thread_t t) +{ + mach_exception_data_type_t code[EXCEPTION_CODE_MAX]; + + code[0] = t->guard_exc_info.code; + code[1] = t->guard_exc_info.subcode; + + /* Raise an EXC_GUARD exception */ + exception_triage(EXC_GUARD, code, EXCEPTION_CODE_MAX); + + /* Terminate task which caused the exception */ + (void) task_terminate_internal(current_task()); + return; +} + +/* + * Routine: mach_port_construct [kernel call] + * Purpose: + * Constructs a mach port with the provided set of options. + * Conditions: + * None. + * Returns: + * KERN_SUCCESS The right is allocated. + * KERN_INVALID_TASK The space is null. + * KERN_INVALID_TASK The space is dead. + * KERN_RESOURCE_SHORTAGE Couldn't allocate memory. + * KERN_NO_SPACE No room in space for another right. + * KERN_FAILURE Illegal option values requested. + */ + +kern_return_t +mach_port_construct( + ipc_space_t space, + mach_port_options_t *options, + uint64_t context, + mach_port_name_t *name) +{ + kern_return_t kr; + ipc_port_t port; + + if (space == IS_NULL) + return (KERN_INVALID_TASK); + + /* Allocate a new port in the IPC space */ + kr = ipc_port_alloc(space, name, &port); + if (kr != KERN_SUCCESS) + return kr; + + /* Port locked and active */ + if (options->flags & MPO_CONTEXT_AS_GUARD) { + kr = mach_port_guard_locked(port, (uint64_t) context, (options->flags & MPO_STRICT)); + /* A newly allocated and locked port should always be guarded successfully */ + assert(kr == KERN_SUCCESS); + } else { + port->ip_context = context; + } + + /* Unlock port */ + ip_unlock(port); + + /* Set port attributes as requested */ + + if (options->flags & MPO_QLIMIT) { + kr = mach_port_set_attributes(space, *name, MACH_PORT_LIMITS_INFO, + (mach_port_info_t)&options->mpl, sizeof(options->mpl)/sizeof(int)); + if (kr != KERN_SUCCESS) + goto cleanup; + } + + if (options->flags & MPO_TEMPOWNER) { + kr = mach_port_set_attributes(space, *name, MACH_PORT_TEMPOWNER, NULL, 0); + if (kr != KERN_SUCCESS) + goto cleanup; + } + + if (options->flags & MPO_IMPORTANCE_RECEIVER) { + kr = mach_port_set_attributes(space, *name, MACH_PORT_IMPORTANCE_RECEIVER, NULL, 0); + if (kr != KERN_SUCCESS) + goto cleanup; + } + + if (options->flags & MPO_INSERT_SEND_RIGHT) { + kr = ipc_object_copyin(space, *name, MACH_MSG_TYPE_MAKE_SEND, (ipc_object_t *)&port); + if (kr != KERN_SUCCESS) + goto cleanup; + + kr = mach_port_insert_right(space, *name, port, MACH_MSG_TYPE_PORT_SEND); + if (kr != KERN_SUCCESS) + goto cleanup; + } + + return KERN_SUCCESS; + +cleanup: + /* Attempt to destroy port. If its already destroyed by some other thread, we're done */ + (void) mach_port_destruct(space, *name, 0, context); + return kr; +} + +/* + * Routine: mach_port_destruct [kernel call] + * Purpose: + * Destroys a mach port with appropriate guard + * Conditions: + * None. + * Returns: + * KERN_SUCCESS The name is destroyed. + * KERN_INVALID_TASK The space is null. + * KERN_INVALID_TASK The space is dead. + * KERN_INVALID_NAME The name doesn't denote a right. + * KERN_INVALID_RIGHT The right isn't correct. + * KERN_INVALID_VALUE The delta for send right is incorrect. + * KERN_INVALID_ARGUMENT Port is either unguarded already or guard mismatch. + * This also raises a EXC_GUARD exception. + */ + +kern_return_t +mach_port_destruct( + ipc_space_t space, + mach_port_name_t name, + mach_port_delta_t srdelta, + uint64_t guard) +{ + kern_return_t kr; + ipc_entry_t entry; + + if (space == IS_NULL) + return KERN_INVALID_TASK; + + if (!MACH_PORT_VALID(name)) + return KERN_INVALID_NAME; + + /* Remove reference for receive right */ + kr = ipc_right_lookup_write(space, name, &entry); + if (kr != KERN_SUCCESS) + return kr; + /* space is write-locked and active */ + kr = ipc_right_destruct(space, name, entry, srdelta, guard); /* unlocks */ + + return kr; +} + +/* + * Routine: mach_port_guard [kernel call] + * Purpose: + * Guard a mach port with specified guard value. + * The context field of the port is used as the guard. + * Conditions: + * None. + * Returns: + * KERN_SUCCESS The name is destroyed. + * KERN_INVALID_TASK The space is null. + * KERN_INVALID_TASK The space is dead. + * KERN_INVALID_NAME The name doesn't denote a right. + * KERN_INVALID_RIGHT The right isn't correct. + * KERN_INVALID_ARGUMENT Port already contains a context/guard. + */ +kern_return_t +mach_port_guard( + ipc_space_t space, + mach_port_name_t name, + uint64_t guard, + boolean_t strict) +{ + kern_return_t kr; + ipc_port_t port; + + if (space == IS_NULL) + return KERN_INVALID_TASK; + + if (!MACH_PORT_VALID(name)) + return KERN_INVALID_NAME; + + /* Guard can be applied only to receive rights */ + kr = ipc_port_translate_receive(space, name, &port); + if (kr != KERN_SUCCESS) + return kr; + + /* Port locked and active */ + kr = mach_port_guard_locked(port, guard, strict); + ip_unlock(port); + + return kr; + +} + +/* + * Routine: mach_port_unguard [kernel call] + * Purpose: + * Unguard a mach port with specified guard value. + * Conditions: + * None. + * Returns: + * KERN_SUCCESS The name is destroyed. + * KERN_INVALID_TASK The space is null. + * KERN_INVALID_TASK The space is dead. + * KERN_INVALID_NAME The name doesn't denote a right. + * KERN_INVALID_RIGHT The right isn't correct. + * KERN_INVALID_ARGUMENT Port is either unguarded already or guard mismatch. + * This also raises a EXC_GUARD exception. + */ +kern_return_t +mach_port_unguard( + ipc_space_t space, + mach_port_name_t name, + uint64_t guard) +{ + + kern_return_t kr; + ipc_port_t port; + + if (space == IS_NULL) + return KERN_INVALID_TASK; + + if (!MACH_PORT_VALID(name)) + return KERN_INVALID_NAME; + + kr = ipc_port_translate_receive(space, name, &port); + if (kr != KERN_SUCCESS) + return kr; + + /* Port locked and active */ + kr = mach_port_unguard_locked(port, name, guard); + ip_unlock(port); + return kr; +} + /* * Get a (new) label handle representing the given port's port label. */ diff --git a/osfmk/kdp/Makefile b/osfmk/kdp/Makefile index 03e7bb526..b36e5dce9 100644 --- a/osfmk/kdp/Makefile +++ b/osfmk/kdp/Makefile @@ -3,14 +3,9 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = - -EXPINC_SUBDIRS = - DATAFILES = \ kdp_callout.h \ kdp_en_debugger.h diff --git a/osfmk/kdp/kdp.c b/osfmk/kdp/kdp.c index ec8e73b4e..02f2d363c 100644 --- a/osfmk/kdp/kdp.c +++ b/osfmk/kdp/kdp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -48,6 +48,8 @@ #include #include #include +#include +#include extern int count_busy_buffers(void); /* must track with declaration in bsd/sys/buf_internal.h */ @@ -131,6 +133,13 @@ extern int machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p); extern int proc_pid(void *p); +extern uint64_t +proc_uniqueid(void *p); +extern uint64_t +proc_was_throttled(void *p); +extern uint64_t +proc_did_throttle(void *p); + extern void proc_name_kdp(task_t task, char *buf, int size); @@ -140,6 +149,9 @@ kdp_snapshot_postflight(void); static int pid_from_task(task_t task); +static uint64_t +proc_uniqueid_from_task(task_t task); + kdp_error_t kdp_set_breakpoint_internal( mach_vm_address_t address @@ -251,10 +263,10 @@ kdp_connect( rp->error = KDPERR_ALREADY_CONNECTED; } else { - kdp.reply_port = rport; - kdp.exception_port = eport; - kdp.is_conn = TRUE; - kdp.conn_seq = seq; + kdp.reply_port = rport; + kdp.exception_port = eport; + kdp.is_conn = TRUE; + kdp.conn_seq = seq; kdp.session_key = key; rp->error = KDPERR_NO_ERROR; @@ -299,7 +311,7 @@ kdp_disconnect( kdp.session_key = 0; if ((panicstr != NULL) && (return_on_panic == 0)) - reattach_wait = 1; + reattach_wait = 1; if (noresume_on_disconnect == 1) { reattach_wait = 1; @@ -324,13 +336,13 @@ kdp_reattach( unsigned short *reply_port ) { - kdp_reattach_req_t *rq = &pkt->reattach_req; + kdp_reattach_req_t *rq = &pkt->reattach_req; - kdp.is_conn = TRUE; - kdp_disconnect(pkt, len, reply_port); - *reply_port = rq->req_reply_port; - reattach_wait = 1; - return (TRUE); + kdp.is_conn = TRUE; + kdp_disconnect(pkt, len, reply_port); + *reply_port = rq->req_reply_port; + reattach_wait = 1; + return (TRUE); } static boolean_t @@ -347,7 +359,7 @@ kdp_hostinfo( if (plen < sizeof (*rq)) return (FALSE); - dprintf(("kdp_hostinfo\n")); + dprintf(("kdp_hostinfo\n")); rp->hdr.is_reply = 1; rp->hdr.len = sizeof (*rp); @@ -362,9 +374,9 @@ kdp_hostinfo( static boolean_t kdp_kernelversion( - kdp_pkt_t *pkt, - int *len, - unsigned short *reply_port + kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port ) { kdp_kernelversion_req_t *rq = &pkt->kernelversion_req; @@ -379,9 +391,9 @@ kdp_kernelversion( rp->hdr.len = sizeof (*rp); dprintf(("kdp_kernelversion\n")); - slen = strlcpy(rp->version, kdp_kernelversion_string, MAX_KDP_DATA_SIZE); + slen = strlcpy(rp->version, kdp_kernelversion_string, MAX_KDP_DATA_SIZE); - rp->hdr.len += slen + 1; /* strlcpy returns the amount copied with NUL */ + rp->hdr.len += slen + 1; /* strlcpy returns the amount copied with NUL */ *reply_port = kdp.reply_port; *len = rp->hdr.len; @@ -453,7 +465,7 @@ kdp_writemem( kdp_writemem_req_t *rq = &pkt->writemem_req; size_t plen = *len; kdp_writemem_reply_t *rp = &pkt->writemem_reply; - mach_vm_size_t cnt; + mach_vm_size_t cnt; if (plen < sizeof (*rq)) return (FALSE); @@ -462,9 +474,9 @@ kdp_writemem( rp->error = KDPERR_BAD_NBYTES; else { dprintf(("kdp_writemem addr %x size %d\n", rq->address, rq->nbytes)); - cnt = kdp_machine_vm_write((caddr_t)rq->data, (mach_vm_address_t)rq->address, rq->nbytes); - rp->error = KDPERR_NO_ERROR; + rp->error = KDPERR_ACCESS(rq->nbytes, cnt); + dprintf((" cnt %lld error %d\n", cnt, rp->error)); } rp->hdr.is_reply = 1; @@ -478,9 +490,9 @@ kdp_writemem( static boolean_t kdp_writemem64( - kdp_pkt_t *pkt, - int *len, - unsigned short *reply_port + kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port ) { kdp_writemem64_req_t *rq = &pkt->writemem64_req; @@ -492,12 +504,12 @@ kdp_writemem64( return (FALSE); if (rq->nbytes > MAX_KDP_DATA_SIZE) - rp->error = KDPERR_BAD_NBYTES; + rp->error = KDPERR_BAD_NBYTES; else { - dprintf(("kdp_writemem64 addr %llx size %d\n", rq->address, rq->nbytes)); - - cnt = kdp_machine_vm_write((caddr_t)rq->data, (mach_vm_address_t)rq->address, (mach_vm_size_t)rq->nbytes); - rp->error = KDPERR_NO_ERROR; + dprintf(("kdp_writemem64 addr %llx size %d\n", rq->address, rq->nbytes)); + cnt = kdp_machine_vm_write((caddr_t)rq->data, (mach_vm_address_t)rq->address, (mach_vm_size_t)rq->nbytes); + rp->error = KDPERR_ACCESS(rq->nbytes, cnt); + dprintf((" cnt %lld error %d\n", cnt, rp->error)); } rp->hdr.is_reply = 1; @@ -511,24 +523,28 @@ kdp_writemem64( static boolean_t kdp_writephysmem64( - kdp_pkt_t *pkt, - int *len, - unsigned short *reply_port + kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port ) { kdp_writephysmem64_req_t *rq = &pkt->writephysmem64_req; size_t plen = *len; kdp_writephysmem64_reply_t *rp = &pkt->writephysmem64_reply; + mach_vm_size_t cnt; + unsigned int size; if (plen < sizeof (*rq)) - return (FALSE); + return (FALSE); - if (rq->nbytes > MAX_KDP_DATA_SIZE) - rp->error = KDPERR_BAD_NBYTES; + size = rq->nbytes; + if (size > MAX_KDP_DATA_SIZE) + rp->error = KDPERR_BAD_NBYTES; else { - dprintf(("kdp_writephysmem64 addr %llx size %d\n", rq->address, rq->nbytes)); - kdp_machine_phys_write(rq, rq->data, rq->lcpu); - rp->error = KDPERR_NO_ERROR; + dprintf(("kdp_writephysmem64 addr %llx size %d\n", rq->address, size)); + cnt = kdp_machine_phys_write(rq, rq->data, rq->lcpu); + rp->error = KDPERR_ACCESS(size, cnt); + dprintf((" cnt %lld error %d\n", cnt, rp->error)); } rp->hdr.is_reply = 1; @@ -550,10 +566,8 @@ kdp_readmem( kdp_readmem_req_t *rq = &pkt->readmem_req; size_t plen = *len; kdp_readmem_reply_t *rp = &pkt->readmem_reply; - mach_vm_size_t cnt; -#if __i386__ - void *pversion = &kdp_kernelversion_string; -#endif + mach_vm_size_t cnt; + unsigned int size; if (plen < sizeof (*rq)) return (FALSE); @@ -561,29 +575,14 @@ kdp_readmem( rp->hdr.is_reply = 1; rp->hdr.len = sizeof (*rp); - if (rq->nbytes > MAX_KDP_DATA_SIZE) + size = rq->nbytes; + if (size > MAX_KDP_DATA_SIZE) rp->error = KDPERR_BAD_NBYTES; else { - unsigned int n = rq->nbytes; - - dprintf(("kdp_readmem addr %x size %d\n", rq->address, n)); -#if __i386__ - /* XXX This is a hack to facilitate the "showversion" macro - * on i386, which is used to obtain the kernel version without - * symbols - a pointer to the version string should eventually - * be pinned at a fixed address when an equivalent of the - * VECTORS segment (loaded at a fixed load address, and contains - * a table) is implemented on these architectures, as with PPC. - * N.B.: x86 now has a low global page, and the version indirection - * is pinned at 0x201C. We retain the 0x501C address override - * for compatibility. Future architectures should instead use - * the KDP_KERNELVERSION request. - */ - if (rq->address == 0x501C) - rq->address = (uintptr_t)&pversion; -#endif - cnt = kdp_machine_vm_read((mach_vm_address_t)rq->address, (caddr_t)rp->data, n); - rp->error = KDPERR_NO_ERROR; + dprintf(("kdp_readmem addr %x size %d\n", rq->address, size)); + cnt = kdp_machine_vm_read((mach_vm_address_t)rq->address, (caddr_t)rp->data, rq->nbytes); + rp->error = KDPERR_ACCESS(size, cnt); + dprintf((" cnt %lld error %d\n", cnt, rp->error)); rp->hdr.len += cnt; } @@ -596,15 +595,16 @@ kdp_readmem( static boolean_t kdp_readmem64( - kdp_pkt_t *pkt, - int *len, - unsigned short *reply_port + kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port ) { kdp_readmem64_req_t *rq = &pkt->readmem64_req; size_t plen = *len; kdp_readmem64_reply_t *rp = &pkt->readmem64_reply; - mach_vm_size_t cnt; + mach_vm_size_t cnt; + unsigned int size; if (plen < sizeof (*rq)) return (FALSE); @@ -612,16 +612,16 @@ kdp_readmem64( rp->hdr.is_reply = 1; rp->hdr.len = sizeof (*rp); - if (rq->nbytes > MAX_KDP_DATA_SIZE) - rp->error = KDPERR_BAD_NBYTES; + size = rq->nbytes; + if (size > MAX_KDP_DATA_SIZE) + rp->error = KDPERR_BAD_NBYTES; else { - - dprintf(("kdp_readmem64 addr %llx size %d\n", rq->address, rq->nbytes)); - - cnt = kdp_machine_vm_read((mach_vm_address_t)rq->address, (caddr_t)rp->data, rq->nbytes); - rp->error = KDPERR_NO_ERROR; + dprintf(("kdp_readmem64 addr %llx size %d\n", rq->address, size)); + cnt = kdp_machine_vm_read((mach_vm_address_t)rq->address, (caddr_t)rp->data, rq->nbytes); + rp->error = KDPERR_ACCESS(size, cnt); + dprintf((" cnt %lld error %d\n", cnt, rp->error)); - rp->hdr.len += cnt; + rp->hdr.len += cnt; } *reply_port = kdp.reply_port; @@ -632,32 +632,33 @@ kdp_readmem64( static boolean_t kdp_readphysmem64( - kdp_pkt_t *pkt, - int *len, - unsigned short *reply_port + kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port ) { kdp_readphysmem64_req_t *rq = &pkt->readphysmem64_req; size_t plen = *len; kdp_readphysmem64_reply_t *rp = &pkt->readphysmem64_reply; - int cnt; + mach_vm_size_t cnt; + unsigned int size; if (plen < sizeof (*rq)) - return (FALSE); + return (FALSE); rp->hdr.is_reply = 1; rp->hdr.len = sizeof (*rp); - if (rq->nbytes > MAX_KDP_DATA_SIZE) - rp->error = KDPERR_BAD_NBYTES; + size = rq->nbytes; + if (size > MAX_KDP_DATA_SIZE) + rp->error = KDPERR_BAD_NBYTES; else { - - dprintf(("kdp_readphysmem64 addr %llx size %d\n", rq->address, rq->nbytes)); - - cnt = (int)kdp_machine_phys_read(rq, rp->data, rq->lcpu); - rp->error = KDPERR_NO_ERROR; + dprintf(("kdp_readphysmem64 addr %llx size %d\n", rq->address, size)); + cnt = kdp_machine_phys_read(rq, rp->data, rq->lcpu); + rp->error = KDPERR_ACCESS(size, cnt); + dprintf((" cnt %lld error %d\n", cnt, rp->error)); - rp->hdr.len += cnt; + rp->hdr.len += cnt; } *reply_port = kdp.reply_port; @@ -817,9 +818,9 @@ kdp_readregs( boolean_t kdp_breakpoint_set( - kdp_pkt_t *pkt, - int *len, - unsigned short *reply_port + kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port ) { kdp_breakpoint_req_t *rq = &pkt->breakpoint_req; @@ -846,9 +847,9 @@ kdp_breakpoint_set( boolean_t kdp_breakpoint64_set( - kdp_pkt_t *pkt, - int *len, - unsigned short *reply_port + kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port ) { kdp_breakpoint64_req_t *rq = &pkt->breakpoint64_req; @@ -875,9 +876,9 @@ kdp_breakpoint64_set( boolean_t kdp_breakpoint_remove( - kdp_pkt_t *pkt, - int *len, - unsigned short *reply_port + kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port ) { kdp_breakpoint_req_t *rq = &pkt->breakpoint_req; @@ -903,9 +904,9 @@ kdp_breakpoint_remove( boolean_t kdp_breakpoint64_remove( - kdp_pkt_t *pkt, - int *len, - unsigned short *reply_port + kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port ) { kdp_breakpoint64_req_t *rq = &pkt->breakpoint64_req; @@ -933,8 +934,8 @@ kdp_breakpoint64_remove( kdp_error_t kdp_set_breakpoint_internal( - mach_vm_address_t address - ) + mach_vm_address_t address +) { uint8_t breakinstr[MAX_BREAKINSN_BYTES], oldinstr[MAX_BREAKINSN_BYTES]; @@ -975,8 +976,8 @@ kdp_set_breakpoint_internal( kdp_error_t kdp_remove_breakpoint_internal( - mach_vm_address_t address - ) + mach_vm_address_t address +) { mach_vm_size_t cnt; int i; @@ -984,7 +985,7 @@ kdp_remove_breakpoint_internal( for(i=0;(i < MAX_BREAKPOINTS) && (breakpoint_list[i].address != address); i++); if (i == MAX_BREAKPOINTS) - { + { return KDPERR_BREAKPOINT_NOT_FOUND; } @@ -1001,7 +1002,7 @@ kdp_remove_all_breakpoints(void) boolean_t breakpoint_found = FALSE; if (breakpoints_initialized) - { + { for(i=0;i < MAX_BREAKPOINTS; i++) { if (breakpoint_list[i].address) @@ -1014,15 +1015,15 @@ kdp_remove_all_breakpoints(void) if (breakpoint_found) printf("kdp_remove_all_breakpoints: found extant breakpoints, removing them.\n"); - } + } return breakpoint_found; } boolean_t kdp_reboot( - __unused kdp_pkt_t *pkt, - __unused int *len, - __unused unsigned short *reply_port + __unused kdp_pkt_t *pkt, + __unused int *len, + __unused unsigned short *reply_port ) { dprintf(("kdp_reboot\n")); @@ -1044,6 +1045,39 @@ static int pid_from_task(task_t task) return pid; } +static uint64_t +proc_uniqueid_from_task(task_t task) +{ + uint64_t uniqueid = ~(0ULL); + + if (task->bsd_info) + uniqueid = proc_uniqueid(task->bsd_info); + + return uniqueid; +} + +static uint64_t +proc_was_throttled_from_task(task_t task) +{ + uint64_t was_throttled = 0; + + if (task->bsd_info) + was_throttled = proc_was_throttled(task->bsd_info); + + return was_throttled; +} + +static uint64_t +proc_did_throttle_from_task(task_t task) +{ + uint64_t did_throttle = 0; + + if (task->bsd_info) + did_throttle = proc_did_throttle(task->bsd_info); + + return did_throttle; +} + boolean_t kdp_copyin(pmap_t p, uint64_t uaddr, void *dest, size_t size) { size_t rem = size; @@ -1078,6 +1112,26 @@ kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap) unsigned int pages_wanted; kern_return_t kErr; + processor_t processor; + vm_statistics64_t stat; + vm_statistics64_data_t host_vm_stat; + + processor = processor_list; + stat = &PROCESSOR_DATA(processor, vm_stat); + host_vm_stat = *stat; + + if (processor_count > 1) { + simple_lock(&processor_list_lock); + + while ((processor = processor->processor_list) != NULL) { + stat = &PROCESSOR_DATA(processor, vm_stat); + host_vm_stat.compressions += stat->compressions; + host_vm_stat.decompressions += stat->decompressions; + } + + simple_unlock(&processor_list_lock); + } + memio_snap->snapshot_magic = STACKSHOT_MEM_AND_IO_SNAPSHOT_MAGIC; memio_snap->free_pages = vm_page_free_count; memio_snap->active_pages = vm_page_active_count; @@ -1087,6 +1141,10 @@ kdp_mem_and_io_snapshot(struct mem_and_io_snapshot *memio_snap) memio_snap->speculative_pages = vm_page_speculative_count; memio_snap->throttled_pages = vm_page_throttled_count; memio_snap->busy_buffer_count = count_busy_buffers(); + memio_snap->filebacked_pages = vm_page_external_count; + memio_snap->compressions = (uint32_t)host_vm_stat.compressions; + memio_snap->decompressions = (uint32_t)host_vm_stat.decompressions; + memio_snap->compressor_size = VM_PAGE_COMPRESSOR_COUNT; kErr = mach_vm_pressure_monitor(FALSE, VM_PRESSURE_TIME_WINDOW, &pages_reclaimed, &pages_wanted); if ( ! kErr ) { memio_snap->pages_wanted = (uint32_t)pages_wanted; @@ -1134,16 +1192,14 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_fl thread_t thread = THREAD_NULL; thread_snapshot_t tsnap = NULL; unsigned framesize = 2 * sizeof(vm_offset_t); - struct task ctask; - struct thread cthread; - struct _vm_map cmap; - struct pmap cpmap; queue_head_t *task_list = &tasks; boolean_t is_active_list = TRUE; boolean_t dispatch_p = ((trace_flags & STACKSHOT_GET_DQ) != 0); boolean_t save_loadinfo_p = ((trace_flags & STACKSHOT_SAVE_LOADINFO) != 0); + boolean_t save_kextloadinfo_p = ((trace_flags & STACKSHOT_SAVE_KEXT_LOADINFO) != 0); + boolean_t save_userframes_p = ((trace_flags & STACKSHOT_SAVE_KERNEL_FRAMES_ONLY) == 0); if(trace_flags & STACKSHOT_GET_GLOBAL_MEM_STATS) { if(tracepos + sizeof(struct mem_and_io_snapshot) > tracebound) { @@ -1156,10 +1212,11 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_fl walk_list: queue_iterate(task_list, task, task_t, tasks) { - if ((task == NULL) || (ml_nofault_copy((vm_offset_t) task, (vm_offset_t) &ctask, sizeof(struct task)) != sizeof(struct task))) + if ((task == NULL) || !ml_validate_nofault((vm_offset_t) task, sizeof(struct task))) goto error_exit; int task_pid = pid_from_task(task); + uint64_t task_uniqueid = proc_uniqueid_from_task(task); boolean_t task64 = task_has_64BitAddr(task); if (!task->active) { @@ -1178,9 +1235,10 @@ walk_list: uint32_t uuid_info_count = 0; mach_vm_address_t uuid_info_addr = 0; boolean_t have_map = (task->map != NULL) && - (ml_nofault_copy((vm_offset_t)(task->map), (vm_offset_t)&cmap, sizeof(struct _vm_map)) == sizeof(struct _vm_map)); - boolean_t have_pmap = have_map && (cmap.pmap != NULL) && - (ml_nofault_copy((vm_offset_t)(cmap.pmap), (vm_offset_t)&cpmap, sizeof(struct pmap)) == sizeof(struct pmap)); + (ml_validate_nofault((vm_offset_t)(task->map), sizeof(struct _vm_map))); + boolean_t have_pmap = have_map && (task->map->pmap != NULL) && + (ml_validate_nofault((vm_offset_t)(task->map->pmap), sizeof(struct pmap))); + uint64_t shared_cache_base_address = 0; if (have_pmap && task->active && save_loadinfo_p && task_pid > 0) { // Read the dyld_all_image_infos struct from the task memory to get UUID array count and location @@ -1206,6 +1264,12 @@ walk_list: } } + if (have_pmap && save_kextloadinfo_p && task_pid == 0) { + if (ml_validate_nofault((vm_offset_t)(gLoadedKextSummaries), sizeof(OSKextLoadedKextSummaryHeader))) { + uuid_info_count = gLoadedKextSummaries->numSummaries + 1; /* include main kernel UUID */ + } + } + if (tracepos + sizeof(struct task_snapshot) > tracebound) { error = -1; goto error_exit; @@ -1214,6 +1278,7 @@ walk_list: task_snap = (task_snapshot_t) tracepos; task_snap->snapshot_magic = STACKSHOT_TASK_SNAPSHOT_MAGIC; task_snap->pid = task_pid; + task_snap->uniqueid = task_uniqueid; task_snap->nloadinfos = uuid_info_count; /* Add the BSD process identifiers */ if (task_pid != -1) @@ -1223,19 +1288,57 @@ walk_list: task_snap->ss_flags = 0; if (task64) task_snap->ss_flags |= kUser64_p; + if (task64 && task_pid == 0) + task_snap->ss_flags |= kKernel64_p; if (!task->active) task_snap->ss_flags |= kTerminatedSnapshot; if(task->pidsuspended) task_snap->ss_flags |= kPidSuspended; if(task->frozen) task_snap->ss_flags |= kFrozen; + if (task->effective_policy.t_sup_active == 1) + task_snap->ss_flags |= kTaskIsSuppressed; + + task_snap->latency_qos = (task->effective_policy.t_latency_qos == LATENCY_QOS_TIER_UNSPECIFIED) ? + LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | task->effective_policy.t_latency_qos); + task_snap->suspend_count = task->suspend_count; task_snap->task_size = have_pmap ? pmap_resident_count(task->map->pmap) : 0; task_snap->faults = task->faults; task_snap->pageins = task->pageins; task_snap->cow_faults = task->cow_faults; - + task_snap->user_time_in_terminated_threads = task->total_user_time; task_snap->system_time_in_terminated_threads = task->total_system_time; + /* + * The throttling counters are maintained as 64-bit counters in the proc + * structure. However, we reserve 32-bits (each) for them in the task_snapshot + * struct to save space and since we do not expect them to overflow 32-bits. If we + * find these values overflowing in the future, the fix would be to simply + * upgrade these counters to 64-bit in the task_snapshot struct + */ + task_snap->was_throttled = (uint32_t) proc_was_throttled_from_task(task); + task_snap->did_throttle = (uint32_t) proc_did_throttle_from_task(task); + + if (task->shared_region && ml_validate_nofault((vm_offset_t)task->shared_region, + sizeof(struct vm_shared_region))) { + struct vm_shared_region *sr = task->shared_region; + + shared_cache_base_address = sr->sr_base_address + sr->sr_first_mapping; + } + if (!shared_cache_base_address + || !kdp_copyin(task->map->pmap, shared_cache_base_address, task_snap->shared_cache_identifier, sizeof(task_snap->shared_cache_identifier))) { + memset(task_snap->shared_cache_identifier, 0x0, sizeof(task_snap->shared_cache_identifier)); + } + if (task->shared_region) { + /* + * No refcounting here, but we are in debugger + * context, so that should be safe. + */ + task_snap->shared_cache_slide = task->shared_region->sr_slide_info.slide; + } else { + task_snap->shared_cache_slide = 0; + } + tracepos += sizeof(struct task_snapshot); if (task_pid > 0 && uuid_info_count > 0) { @@ -1253,22 +1356,66 @@ walk_list: task_snap->nloadinfos = 0; else tracepos += uuid_info_array_size; + } else if (task_pid == 0 && uuid_info_count > 0) { + uint32_t uuid_info_size = (uint32_t)sizeof(kernel_uuid_info); + uint32_t uuid_info_array_size = uuid_info_count * uuid_info_size; + kernel_uuid_info *output_uuids; + + if (tracepos + uuid_info_array_size > tracebound) { + error = -1; + goto error_exit; + } + + output_uuids = (kernel_uuid_info *)tracepos; + + do { + + if (!kernel_uuid || !ml_validate_nofault((vm_offset_t)kernel_uuid, sizeof(uuid_t))) { + /* Kernel UUID not found or inaccessible */ + task_snap->nloadinfos = 0; + break; + } + + output_uuids[0].imageLoadAddress = (uintptr_t)VM_KERNEL_UNSLIDE(vm_kernel_stext); + memcpy(&output_uuids[0].imageUUID, kernel_uuid, sizeof(uuid_t)); + + if (ml_validate_nofault((vm_offset_t)(&gLoadedKextSummaries->summaries[0]), + gLoadedKextSummaries->entry_size * gLoadedKextSummaries->numSummaries)) { + uint32_t kexti; + + for (kexti=0 ; kexti < gLoadedKextSummaries->numSummaries; kexti++) { + output_uuids[1+kexti].imageLoadAddress = (uintptr_t)VM_KERNEL_UNSLIDE(gLoadedKextSummaries->summaries[kexti].address); + memcpy(&output_uuids[1+kexti].imageUUID, &gLoadedKextSummaries->summaries[kexti].uuid, sizeof(uuid_t)); + } + + tracepos += uuid_info_array_size; + } else { + /* kext summary invalid, but kernel UUID was copied */ + task_snap->nloadinfos = 1; + tracepos += uuid_info_size; + break; + } + } while(0); } queue_iterate(&task->threads, thread, thread_t, task_threads){ uint64_t tval; - if ((thread == NULL) || (ml_nofault_copy((vm_offset_t) thread, (vm_offset_t) &cthread, sizeof(struct thread)) != sizeof(struct thread))) + if ((thread == NULL) || !ml_validate_nofault((vm_offset_t) thread, sizeof(struct thread))) goto error_exit; if (((tracepos + 4 * sizeof(struct thread_snapshot)) > tracebound)) { error = -1; goto error_exit; } + if (!save_userframes_p && thread->kernel_stack == 0) + continue; + /* Populate the thread snapshot header */ tsnap = (thread_snapshot_t) tracepos; tsnap->thread_id = thread_tid(thread); tsnap->state = thread->state; + tsnap->priority = thread->priority; tsnap->sched_pri = thread->sched_pri; tsnap->sched_flags = thread->sched_flags; tsnap->wait_event = VM_KERNEL_UNSLIDE(thread->wait_event); @@ -1286,6 +1433,10 @@ walk_list: tracepos += sizeof(struct thread_snapshot); tsnap->ss_flags = 0; + if (thread->effective_policy.darwinbg) { + tsnap->ss_flags |= kThreadDarwinBG; + } + if (dispatch_p && (task != kernel_task) && (task->active) && have_pmap) { uint64_t dqkeyaddr = thread_dispatchqaddr(thread); if (dqkeyaddr != 0) { @@ -1319,7 +1470,7 @@ walk_list: tracepos += tracebytes; tracebytes = 0; /* Trace user stack, if any */ - if (task->active && thread->task->map != kernel_map) { + if (save_userframes_p && task->active && thread->task->map != kernel_map) { /* 64-bit task? */ if (task_has_64BitAddr(thread->task)) { tracebytes = machine_trace_thread64(thread, tracepos, tracebound, MAX_FRAMES, TRUE); @@ -1354,7 +1505,8 @@ error_exit: } static boolean_t -kdp_readioport(kdp_pkt_t *pkt, +kdp_readioport( + kdp_pkt_t *pkt, int *len, unsigned short *reply_port ) @@ -1422,10 +1574,11 @@ kdp_writeioport( } static boolean_t -kdp_readmsr64(kdp_pkt_t *pkt, - int *len, - unsigned short *reply_port - ) +kdp_readmsr64( + kdp_pkt_t *pkt, + int *len, + unsigned short *reply_port +) { kdp_readmsr64_req_t *rq = &pkt->readmsr64_req; kdp_readmsr64_reply_t *rp = &pkt->readmsr64_reply; diff --git a/osfmk/kdp/kdp_dyld.h b/osfmk/kdp/kdp_dyld.h index 18a27c4fb..fc7e8f3a0 100644 --- a/osfmk/kdp/kdp_dyld.h +++ b/osfmk/kdp/kdp_dyld.h @@ -43,6 +43,13 @@ struct user64_dyld_uuid_info { uuid_t imageUUID; /* UUID of image */ }; +/* Re-use dyld format for kext load addresses */ +#if __LP64__ +typedef struct user64_dyld_uuid_info kernel_uuid_info; +#else +typedef struct user32_dyld_uuid_info kernel_uuid_info; +#endif + struct user32_dyld_image_info { user32_addr_t imageLoadAddress; /* base address image is mapped int */ user32_addr_t imageFilePath; /* path dyld used to load the image */ diff --git a/osfmk/kdp/kdp_protocol.h b/osfmk/kdp/kdp_protocol.h index fb9d29482..eb86bae10 100644 --- a/osfmk/kdp/kdp_protocol.h +++ b/osfmk/kdp/kdp_protocol.h @@ -179,12 +179,21 @@ typedef enum { KDPERR_ALREADY_CONNECTED, KDPERR_BAD_NBYTES, KDPERR_BADFLAVOR, /* bad flavor in w/r regs */ + KDPERR_BAD_ACCESS, /* memory reference failure */ KDPERR_MAX_BREAKPOINTS = 100, KDPERR_BREAKPOINT_NOT_FOUND = 101, KDPERR_BREAKPOINT_ALREADY_SET = 102 } kdp_error_t; +#if defined(__x86_64__) +#define KDPERR_ACCESS(_req,_ret) \ + (((_req) == (uint32_t)(_ret)) ? KDPERR_NO_ERROR : KDPERR_BAD_ACCESS) +#else +#define KDPERR_ACCESS(req,cnt) (KDPERR_NO_ERROR) +#endif /* x86_64 */ + + /* * KDP requests and reply packet formats */ diff --git a/osfmk/kdp/kdp_udp.c b/osfmk/kdp/kdp_udp.c index 4415e10f6..e51c48286 100644 --- a/osfmk/kdp/kdp_udp.c +++ b/osfmk/kdp/kdp_udp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -71,6 +71,10 @@ #include #include +extern int inet_aton(const char *, struct kdp_in_addr *); /* in libkern */ +extern char *inet_ntoa_r(struct kdp_in_addr ina, char *buf, + size_t buflen); /* in libkern */ + #define DO_ALIGN 1 /* align all packet data accesses */ #define KDP_SERIAL_IPADDR 0xABADBABE /* IP address used for serial KDP */ #define LINK_UP_STATUS (IFM_AVALID | IFM_ACTIVE) @@ -93,9 +97,124 @@ static u_short ip_id; /* ip packet ctr, for ids */ * Per RFC 768, August, 1980. */ #define UDP_TTL 60 /* deflt time to live for UDP packets */ -int udp_ttl = UDP_TTL; +static int udp_ttl = UDP_TTL; static unsigned char exception_seq; +struct kdp_ipovly { + uint32_t ih_next, ih_prev; /* for protocol sequence q's */ + u_char ih_x1; /* (unused) */ + u_char ih_pr; /* protocol */ + short ih_len; /* protocol length */ + struct kdp_in_addr ih_src; /* source internet address */ + struct kdp_in_addr ih_dst; /* destination internet address */ +}; + +struct kdp_udphdr { + u_short uh_sport; /* source port */ + u_short uh_dport; /* destination port */ + short uh_ulen; /* udp length */ + u_short uh_sum; /* udp checksum */ +}; + +struct kdp_udpiphdr { + struct kdp_ipovly ui_i; /* overlaid ip structure */ + struct kdp_udphdr ui_u; /* udp header */ +}; +#define ui_next ui_i.ih_next +#define ui_prev ui_i.ih_prev +#define ui_x1 ui_i.ih_x1 +#define ui_pr ui_i.ih_pr +#define ui_len ui_i.ih_len +#define ui_src ui_i.ih_src +#define ui_dst ui_i.ih_dst +#define ui_sport ui_u.uh_sport +#define ui_dport ui_u.uh_dport +#define ui_ulen ui_u.uh_ulen +#define ui_sum ui_u.uh_sum + +struct kdp_ip { + union { + uint32_t ip_w; + struct { + unsigned int +#ifdef __LITTLE_ENDIAN__ + ip_xhl:4, /* header length */ + ip_xv:4, /* version */ + ip_xtos:8, /* type of service */ + ip_xlen:16; /* total length */ +#endif +#ifdef __BIG_ENDIAN__ + ip_xv:4, /* version */ + ip_xhl:4, /* header length */ + ip_xtos:8, /* type of service */ + ip_xlen:16; /* total length */ +#endif + } ip_x; + } ip_vhltl; + u_short ip_id; /* identification */ + short ip_off; /* fragment offset field */ +#define IP_DF 0x4000 /* dont fragment flag */ +#define IP_MF 0x2000 /* more fragments flag */ +#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ + u_char ip_ttl; /* time to live */ + u_char ip_p; /* protocol */ + u_short ip_sum; /* checksum */ + struct kdp_in_addr ip_src,ip_dst; /* source and dest address */ +}; +#define ip_v ip_vhltl.ip_x.ip_xv +#define ip_hl ip_vhltl.ip_x.ip_xhl +#define ip_tos ip_vhltl.ip_x.ip_xtos +#define ip_len ip_vhltl.ip_x.ip_xlen + +#define IPPROTO_UDP 17 +#define IPVERSION 4 + +#define ETHERTYPE_IP 0x0800 /* IP protocol */ + +/* + * Ethernet Address Resolution Protocol. + * + * See RFC 826 for protocol description. Structure below is adapted + * to resolving internet addresses. Field names used correspond to + * RFC 826. + */ + +#define ETHERTYPE_ARP 0x0806 /* Addr. resolution protocol */ + +struct kdp_arphdr { + u_short ar_hrd; /* format of hardware address */ +#define ARPHRD_ETHER 1 /* ethernet hardware format */ +#define ARPHRD_FRELAY 15 /* frame relay hardware format */ + u_short ar_pro; /* format of protocol address */ + u_char ar_hln; /* length of hardware address */ + u_char ar_pln; /* length of protocol address */ + u_short ar_op; /* one of: */ +#define ARPOP_REQUEST 1 /* request to resolve address */ +#define ARPOP_REPLY 2 /* response to previous request */ +#define ARPOP_REVREQUEST 3 /* request protocol address given hardware */ +#define ARPOP_REVREPLY 4 /* response giving protocol address */ +#define ARPOP_INVREQUEST 8 /* request to identify peer */ +#define ARPOP_INVREPLY 9 /* response identifying peer */ +}; + +struct kdp_ether_arp { + struct kdp_arphdr ea_hdr; /* fixed-size header */ + u_char arp_sha[ETHER_ADDR_LEN]; /* sender hardware address */ + u_char arp_spa[4]; /* sender protocol address */ + u_char arp_tha[ETHER_ADDR_LEN]; /* target hardware address */ + u_char arp_tpa[4]; /* target protocol address */ +}; +#define arp_hrd ea_hdr.ar_hrd +#define arp_pro ea_hdr.ar_pro +#define arp_hln ea_hdr.ar_hln +#define arp_pln ea_hdr.ar_pln +#define arp_op ea_hdr.ar_op + +#define ETHERMTU 1500 +#define ETHERHDRSIZE 14 +#define ETHERCRC 4 +#define KDP_MAXPACKET (ETHERHDRSIZE + ETHERMTU + ETHERCRC) + static struct { unsigned char data[KDP_MAXPACKET]; unsigned int off, len; @@ -106,12 +225,12 @@ struct kdp_manual_pkt manual_pkt; struct { struct { - struct in_addr in; - struct ether_addr ea; + struct kdp_in_addr in; + struct kdp_ether_addr ea; } loc; struct { - struct in_addr in; - struct ether_addr ea; + struct kdp_in_addr in; + struct kdp_ether_addr ea; } rmt; } adr; @@ -141,7 +260,7 @@ static void kdp_serial_send(void *rpkt, unsigned int rpkt_len); #endif static uint32_t kdp_current_ip_address = 0; -static struct ether_addr kdp_current_mac_address = {{0, 0, 0, 0, 0, 0}}; +static struct kdp_ether_addr kdp_current_mac_address = {{0, 0, 0, 0, 0, 0}}; static void *kdp_current_ifp; static void kdp_handler( void *); @@ -158,12 +277,12 @@ static boolean_t router_specified = FALSE; static boolean_t corename_specified = FALSE; static unsigned int panicd_port = CORE_REMOTE_PORT; -static struct ether_addr etherbroadcastaddr = {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}; +static struct kdp_ether_addr etherbroadcastaddr = {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}; -static struct ether_addr router_mac = {{0, 0, 0 , 0, 0, 0}}; -static struct ether_addr destination_mac = {{0, 0, 0 , 0, 0, 0}}; -static struct ether_addr temp_mac = {{0, 0, 0 , 0, 0, 0}}; -static struct ether_addr current_resolved_MAC = {{0, 0, 0 , 0, 0, 0}}; +static struct kdp_ether_addr router_mac = {{0, 0, 0 , 0, 0, 0}}; +static struct kdp_ether_addr destination_mac = {{0, 0, 0 , 0, 0, 0}}; +static struct kdp_ether_addr temp_mac = {{0, 0, 0 , 0, 0, 0}}; +static struct kdp_ether_addr current_resolved_MAC = {{0, 0, 0 , 0, 0, 0}}; static boolean_t flag_panic_dump_in_progress = FALSE; static boolean_t flag_router_mac_initialized = FALSE; @@ -177,7 +296,7 @@ static unsigned int last_panic_port = CORE_REMOTE_PORT; #define KDP_THROTTLE_VALUE (10ULL * NSEC_PER_SEC) uint32_t kdp_crashdump_pkt_size = 512; -#define KDP_LARGE_CRASHDUMP_PKT_SIZE (1440 - 6 - sizeof(struct udpiphdr)) +#define KDP_LARGE_CRASHDUMP_PKT_SIZE (1440 - 6 - sizeof(struct kdp_udpiphdr)) static char panicd_ip_str[20]; static char router_ip_str[20]; static char corename_str[50]; @@ -194,18 +313,15 @@ extern void kdp_call(void); extern boolean_t kdp_call_kdb(void); extern int kern_dump(void); -extern int inet_aton(const char *cp, struct in_addr *pin); -extern int inet_ntoa2(struct in_addr * pin, char * cp, const int len); - void * kdp_get_interface(void); void kdp_set_gateway_mac(void *gatewaymac); -void kdp_set_ip_and_mac_addresses(struct in_addr *ipaddr, struct ether_addr *); -void kdp_set_interface(void *interface, const struct ether_addr *macaddr); +void kdp_set_ip_and_mac_addresses(struct kdp_in_addr *ipaddr, struct kdp_ether_addr *); +void kdp_set_interface(void *interface, const struct kdp_ether_addr *macaddr); void kdp_disable_arp(void); -static void kdp_arp_reply(struct ether_arp *); -static void kdp_process_arp_reply(struct ether_arp *); -static boolean_t kdp_arp_resolve(uint32_t, struct ether_addr *); +static void kdp_arp_reply(struct kdp_ether_arp *); +static void kdp_process_arp_reply(struct kdp_ether_arp *); +static boolean_t kdp_arp_resolve(uint32_t, struct kdp_ether_addr *); static volatile unsigned kdp_reentry_deadline; @@ -437,7 +553,7 @@ enaddr_copy( void *dst ) { - bcopy((char *)src, (char *)dst, sizeof (struct ether_addr)); + bcopy((char *)src, (char *)dst, sizeof (struct kdp_ether_addr)); } static unsigned short @@ -468,26 +584,26 @@ kdp_reply( const boolean_t sideband ) { - struct udpiphdr aligned_ui, *ui = &aligned_ui; - struct ip aligned_ip, *ip = &aligned_ip; - struct in_addr tmp_ipaddr; - struct ether_addr tmp_enaddr; - struct ether_header *eh = NULL; + struct kdp_udpiphdr aligned_ui, *ui = &aligned_ui; + struct kdp_ip aligned_ip, *ip = &aligned_ip; + struct kdp_in_addr tmp_ipaddr; + struct kdp_ether_addr tmp_enaddr; + struct kdp_ether_header *eh = NULL; if (!pkt.input) kdp_panic("kdp_reply"); - pkt.off -= (unsigned int)sizeof (struct udpiphdr); + pkt.off -= (unsigned int)sizeof (struct kdp_udpiphdr); #if DO_ALIGN bcopy((char *)&pkt.data[pkt.off], (char *)ui, sizeof(*ui)); #else - ui = (struct udpiphdr *)&pkt.data[pkt.off]; + ui = (struct kdp_udpiphdr *)&pkt.data[pkt.off]; #endif ui->ui_next = ui->ui_prev = 0; ui->ui_x1 = 0; ui->ui_pr = IPPROTO_UDP; - ui->ui_len = htons((u_short)pkt.len + sizeof (struct udphdr)); + ui->ui_len = htons((u_short)pkt.len + sizeof (struct kdp_udphdr)); tmp_ipaddr = ui->ui_src; ui->ui_src = ui->ui_dst; ui->ui_dst = tmp_ipaddr; @@ -499,12 +615,12 @@ kdp_reply( bcopy((char *)ui, (char *)&pkt.data[pkt.off], sizeof(*ui)); bcopy((char *)&pkt.data[pkt.off], (char *)ip, sizeof(*ip)); #else - ip = (struct ip *)&pkt.data[pkt.off]; + ip = (struct kdp_ip *)&pkt.data[pkt.off]; #endif - ip->ip_len = htons(sizeof (struct udpiphdr) + pkt.len); + ip->ip_len = htons(sizeof (struct kdp_udpiphdr) + pkt.len); ip->ip_v = IPVERSION; ip->ip_id = htons(ip_id++); - ip->ip_hl = sizeof (struct ip) >> 2; + ip->ip_hl = sizeof (struct kdp_ip) >> 2; ip->ip_ttl = udp_ttl; ip->ip_sum = 0; ip->ip_sum = htons(~ip_sum((unsigned char *)ip, ip->ip_hl)); @@ -512,17 +628,17 @@ kdp_reply( bcopy((char *)ip, (char *)&pkt.data[pkt.off], sizeof(*ip)); #endif - pkt.len += (unsigned int)sizeof (struct udpiphdr); + pkt.len += (unsigned int)sizeof (struct kdp_udpiphdr); - pkt.off -= (unsigned int)sizeof (struct ether_header); + pkt.off -= (unsigned int)sizeof (struct kdp_ether_header); - eh = (struct ether_header *)&pkt.data[pkt.off]; + eh = (struct kdp_ether_header *)&pkt.data[pkt.off]; enaddr_copy(eh->ether_shost, &tmp_enaddr); enaddr_copy(eh->ether_dhost, eh->ether_shost); enaddr_copy(&tmp_enaddr, eh->ether_dhost); eh->ether_type = htons(ETHERTYPE_IP); - pkt.len += (unsigned int)sizeof (struct ether_header); + pkt.len += (unsigned int)sizeof (struct kdp_ether_header); // save reply for possible retransmission assert(pkt.len <= KDP_MAXPACKET); @@ -541,24 +657,24 @@ kdp_send( unsigned short remote_port ) { - struct udpiphdr aligned_ui, *ui = &aligned_ui; - struct ip aligned_ip, *ip = &aligned_ip; - struct ether_header *eh; + struct kdp_udpiphdr aligned_ui, *ui = &aligned_ui; + struct kdp_ip aligned_ip, *ip = &aligned_ip; + struct kdp_ether_header *eh; if (pkt.input) kdp_panic("kdp_send"); - pkt.off -= (unsigned int)sizeof (struct udpiphdr); + pkt.off -= (unsigned int)sizeof (struct kdp_udpiphdr); #if DO_ALIGN bcopy((char *)&pkt.data[pkt.off], (char *)ui, sizeof(*ui)); #else - ui = (struct udpiphdr *)&pkt.data[pkt.off]; + ui = (struct kdp_udpiphdr *)&pkt.data[pkt.off]; #endif ui->ui_next = ui->ui_prev = 0; ui->ui_x1 = 0; ui->ui_pr = IPPROTO_UDP; - ui->ui_len = htons((u_short)pkt.len + sizeof (struct udphdr)); + ui->ui_len = htons((u_short)pkt.len + sizeof (struct kdp_udphdr)); ui->ui_src = adr.loc.in; ui->ui_dst = adr.rmt.in; ui->ui_sport = htons(KDP_REMOTE_PORT); @@ -569,12 +685,12 @@ kdp_send( bcopy((char *)ui, (char *)&pkt.data[pkt.off], sizeof(*ui)); bcopy((char *)&pkt.data[pkt.off], (char *)ip, sizeof(*ip)); #else - ip = (struct ip *)&pkt.data[pkt.off]; + ip = (struct kdp_ip *)&pkt.data[pkt.off]; #endif - ip->ip_len = htons(sizeof (struct udpiphdr) + pkt.len); + ip->ip_len = htons(sizeof (struct kdp_udpiphdr) + pkt.len); ip->ip_v = IPVERSION; ip->ip_id = htons(ip_id++); - ip->ip_hl = sizeof (struct ip) >> 2; + ip->ip_hl = sizeof (struct kdp_ip) >> 2; ip->ip_ttl = udp_ttl; ip->ip_sum = 0; ip->ip_sum = htons(~ip_sum((unsigned char *)ip, ip->ip_hl)); @@ -582,16 +698,16 @@ kdp_send( bcopy((char *)ip, (char *)&pkt.data[pkt.off], sizeof(*ip)); #endif - pkt.len += (unsigned int)sizeof (struct udpiphdr); + pkt.len += (unsigned int)sizeof (struct kdp_udpiphdr); - pkt.off -= (unsigned int)sizeof (struct ether_header); + pkt.off -= (unsigned int)sizeof (struct kdp_ether_header); - eh = (struct ether_header *)&pkt.data[pkt.off]; + eh = (struct kdp_ether_header *)&pkt.data[pkt.off]; enaddr_copy(&adr.loc.ea, eh->ether_shost); enaddr_copy(&adr.rmt.ea, eh->ether_dhost); eh->ether_type = htons(ETHERTYPE_IP); - pkt.len += (unsigned int)sizeof (struct ether_header); + pkt.len += (unsigned int)sizeof (struct kdp_ether_header); kdp_send_data(&pkt.data[pkt.off], pkt.len); } @@ -612,10 +728,10 @@ inline static void debugger_if_necessary(void) when the interface gets initialized for real. */ void -kdp_set_interface(void *ifp, const struct ether_addr *macaddr) +kdp_set_interface(void *ifp, const struct kdp_ether_addr *macaddr) { char kdpstr[80]; - struct in_addr addr = { 0 }; + struct kdp_in_addr addr = { 0 }; unsigned int len; kdp_current_ifp = ifp; @@ -662,8 +778,8 @@ kdp_get_interface(void) void kdp_set_ip_and_mac_addresses( - struct in_addr *ipaddr, - struct ether_addr *macaddr) + struct kdp_in_addr *ipaddr, + struct kdp_ether_addr *macaddr) { static uint64_t last_time = (uint64_t) -1; static uint64_t throttle_val = 0; @@ -683,7 +799,7 @@ kdp_set_ip_and_mac_addresses( if (save_ip_in_nvram == FALSE) goto done; - if (inet_ntoa2(ipaddr, addr, sizeof(addr)) == FALSE) + if (inet_ntoa_r(*ipaddr, addr, sizeof(addr)) == NULL) goto done; /* throttle writes if needed */ @@ -705,11 +821,11 @@ done: void kdp_set_gateway_mac(void *gatewaymac) { - router_mac = *(struct ether_addr *)gatewaymac; + router_mac = *(struct kdp_ether_addr *)gatewaymac; flag_router_mac_initialized = TRUE; } -struct ether_addr +struct kdp_ether_addr kdp_get_mac_addr(void) { return kdp_current_mac_address; @@ -730,10 +846,10 @@ kdp_disable_arp(void) static void kdp_arp_dispatch(void) { - struct ether_arp aligned_ea, *ea = &aligned_ea; + struct kdp_ether_arp aligned_ea, *ea = &aligned_ea; unsigned arp_header_offset; - arp_header_offset = (unsigned)sizeof(struct ether_header) + pkt.off; + arp_header_offset = (unsigned)sizeof(struct kdp_ether_header) + pkt.off; memcpy((void *)ea, (void *)&pkt.data[arp_header_offset], sizeof(*ea)); switch(ntohs(ea->arp_op)) { @@ -749,18 +865,18 @@ kdp_arp_dispatch(void) } static void -kdp_process_arp_reply(struct ether_arp *ea) +kdp_process_arp_reply(struct kdp_ether_arp *ea) { /* Are we interested in ARP replies? */ if (flag_arp_resolved == TRUE) return; /* Did we receive a reply from the right source? */ - if (((struct in_addr *)(ea->arp_spa))->s_addr != target_ip) + if (((struct kdp_in_addr *)(ea->arp_spa))->s_addr != target_ip) return; flag_arp_resolved = TRUE; - current_resolved_MAC = *(struct ether_addr *) (ea->arp_sha); + current_resolved_MAC = *(struct kdp_ether_addr *) (ea->arp_sha); return; } @@ -770,15 +886,15 @@ kdp_process_arp_reply(struct ether_arp *ea) */ static void -kdp_arp_reply(struct ether_arp *ea) +kdp_arp_reply(struct kdp_ether_arp *ea) { - struct ether_header *eh; + struct kdp_ether_header *eh; - struct in_addr isaddr, itaddr, myaddr; - struct ether_addr my_enaddr; + struct kdp_in_addr isaddr, itaddr, myaddr; + struct kdp_ether_addr my_enaddr; - eh = (struct ether_header *)&pkt.data[pkt.off]; - pkt.off += (unsigned int)sizeof(struct ether_header); + eh = (struct kdp_ether_header *)&pkt.data[pkt.off]; + pkt.off += (unsigned int)sizeof(struct kdp_ether_header); if(ntohs(ea->arp_op) != ARPOP_REQUEST) return; @@ -812,7 +928,7 @@ kdp_arp_reply(struct ether_arp *ea) (void)memcpy(eh->ether_shost, &my_enaddr, sizeof(eh->ether_shost)); eh->ether_type = htons(ETHERTYPE_ARP); (void)memcpy(&pkt.data[pkt.off], ea, sizeof(*ea)); - pkt.off -= (unsigned int)sizeof (struct ether_header); + pkt.off -= (unsigned int)sizeof (struct kdp_ether_header); /* pkt.len is still the length we want, ether_header+ether_arp */ kdp_send_data(&pkt.data[pkt.off], pkt.len); } @@ -821,9 +937,9 @@ kdp_arp_reply(struct ether_arp *ea) static void kdp_poll(void) { - struct ether_header *eh = NULL; - struct udpiphdr aligned_ui, *ui = &aligned_ui; - struct ip aligned_ip, *ip = &aligned_ip; + struct kdp_ether_header *eh = NULL; + struct kdp_udpiphdr aligned_ui, *ui = &aligned_ui; + struct kdp_ip aligned_ip, *ip = &aligned_ip; static int msg_printed; if (pkt.input) @@ -843,9 +959,9 @@ kdp_poll(void) if (pkt.len == 0) return; - if (pkt.len >= sizeof(struct ether_header)) + if (pkt.len >= sizeof(struct kdp_ether_header)) { - eh = (struct ether_header *)&pkt.data[pkt.off]; + eh = (struct kdp_ether_header *)&pkt.data[pkt.off]; if (kdp_flag & KDP_ARP) { @@ -857,10 +973,10 @@ kdp_poll(void) } } - if (pkt.len < (sizeof (struct ether_header) + sizeof (struct udpiphdr))) + if (pkt.len < (sizeof (struct kdp_ether_header) + sizeof (struct kdp_udpiphdr))) return; - pkt.off += (unsigned int)sizeof (struct ether_header); + pkt.off += (unsigned int)sizeof (struct kdp_ether_header); if (ntohs(eh->ether_type) != ETHERTYPE_IP) { return; } @@ -869,16 +985,16 @@ kdp_poll(void) bcopy((char *)&pkt.data[pkt.off], (char *)ui, sizeof(*ui)); bcopy((char *)&pkt.data[pkt.off], (char *)ip, sizeof(*ip)); #else - ui = (struct udpiphdr *)&pkt.data[pkt.off]; - ip = (struct ip *)&pkt.data[pkt.off]; + ui = (struct kdp_udpiphdr *)&pkt.data[pkt.off]; + ip = (struct kdp_ip *)&pkt.data[pkt.off]; #endif - pkt.off += (unsigned int)sizeof (struct udpiphdr); + pkt.off += (unsigned int)sizeof (struct kdp_udpiphdr); if (ui->ui_pr != IPPROTO_UDP) { return; } - if (ip->ip_hl > (sizeof (struct ip) >> 2)) { + if (ip->ip_hl > (sizeof (struct kdp_ip) >> 2)) { return; } @@ -914,7 +1030,7 @@ kdp_poll(void) /* * Calculate kdp packet length. */ - pkt.len = ntohs((u_short)ui->ui_ulen) - (unsigned int)sizeof (struct udphdr); + pkt.len = ntohs((u_short)ui->ui_ulen) - (unsigned int)sizeof (struct kdp_udphdr); pkt.input = TRUE; } @@ -925,8 +1041,8 @@ kdp_poll(void) static void transmit_ARP_request(uint32_t ip_addr) { - struct ether_header *eh = (struct ether_header *) &pkt.data[0]; - struct ether_arp *ea = (struct ether_arp *) &pkt.data[sizeof(struct ether_header)]; + struct kdp_ether_header *eh = (struct kdp_ether_header *) &pkt.data[0]; + struct kdp_ether_arp *ea = (struct kdp_ether_arp *) &pkt.data[sizeof(struct kdp_ether_header)]; KDP_DEBUG("Transmitting ARP request\n"); /* Populate the ether_header */ @@ -950,13 +1066,13 @@ transmit_ARP_request(uint32_t ip_addr) memcpy(ea->arp_spa, (void *) &kdp_current_ip_address, sizeof(kdp_current_ip_address)); pkt.off = 0; - pkt.len = sizeof(struct ether_header) + sizeof(struct ether_arp); + pkt.len = sizeof(struct kdp_ether_header) + sizeof(struct kdp_ether_arp); /* Transmit */ kdp_send_data(&pkt.data[pkt.off], pkt.len); } static boolean_t -kdp_arp_resolve(uint32_t arp_target_ip, struct ether_addr *resolved_MAC) +kdp_arp_resolve(uint32_t arp_target_ip, struct kdp_ether_addr *resolved_MAC) { int poll_count = 256; /* ~770 ms modulo broadcast/delayed traffic? */ char tretries = 0; @@ -1094,7 +1210,7 @@ static void kdp_connection_wait(void) { unsigned short reply_port; - struct ether_addr kdp_mac_addr = kdp_get_mac_addr(); + struct kdp_ether_addr kdp_mac_addr = kdp_get_mac_addr(); unsigned int ip_addr = ntohl(kdp_get_ip_address()); /* @@ -1216,7 +1332,7 @@ kdp_send_exception( unsigned int poll_timeout; do { - pkt.off = sizeof (struct ether_header) + sizeof (struct udpiphdr); + pkt.off = sizeof (struct kdp_ether_header) + sizeof (struct kdp_udpiphdr); kdp_exception((unsigned char *)&pkt.data[pkt.off], (int *)&pkt.len, (unsigned short *)&remote_port, @@ -1404,28 +1520,28 @@ struct corehdr * create_panic_header(unsigned int request, const char *corename, unsigned length, unsigned int block) { - struct udpiphdr aligned_ui, *ui = &aligned_ui; - struct ip aligned_ip, *ip = &aligned_ip; - struct ether_header *eh; + struct kdp_udpiphdr aligned_ui, *ui = &aligned_ui; + struct kdp_ip aligned_ip, *ip = &aligned_ip; + struct kdp_ether_header *eh; struct corehdr *coreh; const char *mode = "octet"; char modelen = strlen(mode) + 1; size_t fmask_size = sizeof(KDP_FEATURE_MASK_STRING) + sizeof(kdp_crashdump_feature_mask); - pkt.off = sizeof (struct ether_header); + pkt.off = sizeof (struct kdp_ether_header); pkt.len = (unsigned int)(length + ((request == KDP_WRQ) ? modelen + fmask_size : 0) + (corename ? (strlen(corename) + 1 ): 0) + sizeof(struct corehdr)); #if DO_ALIGN bcopy((char *)&pkt.data[pkt.off], (char *)ui, sizeof(*ui)); #else - ui = (struct udpiphdr *)&pkt.data[pkt.off]; + ui = (struct kdp_udpiphdr *)&pkt.data[pkt.off]; #endif ui->ui_next = ui->ui_prev = 0; ui->ui_x1 = 0; ui->ui_pr = IPPROTO_UDP; - ui->ui_len = htons((u_short)pkt.len + sizeof (struct udphdr)); + ui->ui_len = htons((u_short)pkt.len + sizeof (struct kdp_udphdr)); ui->ui_src.s_addr = (uint32_t)kdp_current_ip_address; /* Already in network byte order via inet_aton() */ ui->ui_dst.s_addr = panic_server_ip; @@ -1437,12 +1553,12 @@ create_panic_header(unsigned int request, const char *corename, bcopy((char *)ui, (char *)&pkt.data[pkt.off], sizeof(*ui)); bcopy((char *)&pkt.data[pkt.off], (char *)ip, sizeof(*ip)); #else - ip = (struct ip *)&pkt.data[pkt.off]; + ip = (struct kdp_ip *)&pkt.data[pkt.off]; #endif - ip->ip_len = htons(sizeof (struct udpiphdr) + pkt.len); + ip->ip_len = htons(sizeof (struct kdp_udpiphdr) + pkt.len); ip->ip_v = IPVERSION; ip->ip_id = htons(ip_id++); - ip->ip_hl = sizeof (struct ip) >> 2; + ip->ip_hl = sizeof (struct kdp_ip) >> 2; ip->ip_ttl = udp_ttl; ip->ip_sum = 0; ip->ip_sum = htons(~ip_sum((unsigned char *)ip, ip->ip_hl)); @@ -1450,9 +1566,9 @@ create_panic_header(unsigned int request, const char *corename, bcopy((char *)ip, (char *)&pkt.data[pkt.off], sizeof(*ip)); #endif - pkt.len += (unsigned int)sizeof (struct udpiphdr); + pkt.len += (unsigned int)sizeof (struct kdp_udpiphdr); - pkt.off += (unsigned int)sizeof (struct udpiphdr); + pkt.off += (unsigned int)sizeof (struct kdp_udpiphdr); coreh = (struct corehdr *) &pkt.data[pkt.off]; coreh->th_opcode = htons((u_short)request); @@ -1479,15 +1595,15 @@ create_panic_header(unsigned int request, const char *corename, coreh->th_block = htonl((unsigned int) block); } - pkt.off -= (unsigned int)sizeof (struct udpiphdr); - pkt.off -= (unsigned int)sizeof (struct ether_header); + pkt.off -= (unsigned int)sizeof (struct kdp_udpiphdr); + pkt.off -= (unsigned int)sizeof (struct kdp_ether_header); - eh = (struct ether_header *)&pkt.data[pkt.off]; + eh = (struct kdp_ether_header *)&pkt.data[pkt.off]; enaddr_copy(&kdp_current_mac_address, eh->ether_shost); enaddr_copy(&destination_mac, eh->ether_dhost); eh->ether_type = htons(ETHERTYPE_IP); - pkt.len += (unsigned int)sizeof (struct ether_header); + pkt.len += (unsigned int)sizeof (struct kdp_ether_header); return coreh; } @@ -1690,27 +1806,6 @@ isdigit (char c) { return ((c > 47) && (c < 58)); } -/* From user mode Libc - this ought to be in a library */ -static char * -strnstr(char *s, const char *find, size_t slen) -{ - char c, sc; - size_t len; - - if ((c = *find++) != '\0') { - len = strlen(find); - do { - do { - if ((sc = *s++) == '\0' || slen-- < 1) - return (NULL); - } while (sc != c); - if (len > slen) - return (NULL); - } while (strncmp(s, find, len) != 0); - s--; - } - return (s); -} /* Horrid hack to extract xnu version if possible - a much cleaner approach * would be to have the integrator run a script which would copy the @@ -1918,7 +2013,7 @@ kdp_panic_dump(void) (unsigned int) (abstime & 0xffffffff)); } - if (0 == inet_aton(panicd_ip_str, (struct in_addr *) &panic_server_ip)) { + if (0 == inet_aton(panicd_ip_str, (struct kdp_in_addr *) &panic_server_ip)) { kdb_printf("inet_aton() failed interpreting %s as a panic server IP\n", panicd_ip_str); } else @@ -1933,7 +2028,7 @@ kdp_panic_dump(void) else { if (!flag_panic_dump_in_progress) goto panic_dump_exit; if (router_specified) { - if (0 == inet_aton(router_ip_str, (struct in_addr *) &parsed_router_ip)) + if (0 == inet_aton(router_ip_str, (struct kdp_in_addr *) &parsed_router_ip)) kdb_printf("inet_aton() failed interpreting %s as an IP\n", router_ip_str); else { router_ip = parsed_router_ip; @@ -2098,19 +2193,19 @@ kdp_init(void) strlcpy(kdp_kernelversion_string, version, sizeof(kdp_kernelversion_string)); /* Relies on platform layer calling panic_init() before kdp_init() */ - if (kernel_uuid[0] != '\0') { + if (kernel_uuid_string[0] != '\0') { /* * Update kdp_kernelversion_string with our UUID * generated at link time. */ strlcat(kdp_kernelversion_string, "; UUID=", sizeof(kdp_kernelversion_string)); - strlcat(kdp_kernelversion_string, kernel_uuid, sizeof(kdp_kernelversion_string)); + strlcat(kdp_kernelversion_string, kernel_uuid_string, sizeof(kdp_kernelversion_string)); } -#if defined(__x86_64__) || defined(__arm__) debug_log_init(); +#if defined(__x86_64__) || defined(__arm__) if (vm_kernel_slide) { char KASLR_stext[19]; strlcat(kdp_kernelversion_string, "; stext=", sizeof(kdp_kernelversion_string)); @@ -2130,19 +2225,13 @@ kdp_init(void) #if CONFIG_SERIAL_KDP char kdpname[80]; - struct in_addr ipaddr; - struct ether_addr macaddr; + struct kdp_in_addr ipaddr; + struct kdp_ether_addr macaddr; -#if CONFIG_EMBEDDED - //serial will be the debugger, unless match name is explicitly provided, and it's not "serial" - if(PE_parse_boot_argn("kdp_match_name", kdpname, sizeof(kdpname)) && strncmp(kdpname, "serial", sizeof(kdpname)) != 0) - return; -#else // serial must be explicitly requested if(!PE_parse_boot_argn("kdp_match_name", kdpname, sizeof(kdpname)) || strncmp(kdpname, "serial", sizeof(kdpname)) != 0) return; -#endif kprintf("Initializing serial KDP\n"); diff --git a/osfmk/kdp/kdp_udp.h b/osfmk/kdp/kdp_udp.h index c057dc4b1..49bb1ab23 100644 --- a/osfmk/kdp/kdp_udp.h +++ b/osfmk/kdp/kdp_udp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -34,144 +34,30 @@ #include /* OSSwap functions */ #include -#define ETHERMTU 1500 -#define ETHERHDRSIZE 14 -#define ETHERCRC 4 -#define KDP_MAXPACKET (ETHERHDRSIZE + ETHERMTU + ETHERCRC) - -struct in_addr { +struct kdp_in_addr { uint32_t s_addr; }; -struct ether_addr { - u_char ether_addr_octet[6]; -}; - -typedef struct ether_addr enet_addr_t; - -extern struct ether_addr kdp_get_mac_addr(void); -unsigned int kdp_get_ip_address(void); - -struct ipovly { - uint32_t ih_next, ih_prev; /* for protocol sequence q's */ - u_char ih_x1; /* (unused) */ - u_char ih_pr; /* protocol */ - short ih_len; /* protocol length */ - struct in_addr ih_src; /* source internet address */ - struct in_addr ih_dst; /* destination internet address */ -}; - -struct udphdr { - u_short uh_sport; /* source port */ - u_short uh_dport; /* destination port */ - short uh_ulen; /* udp length */ - u_short uh_sum; /* udp checksum */ -}; +#define ETHER_ADDR_LEN 6 -struct udpiphdr { - struct ipovly ui_i; /* overlaid ip structure */ - struct udphdr ui_u; /* udp header */ +struct kdp_ether_addr { + u_char ether_addr_octet[ETHER_ADDR_LEN]; }; -#define ui_next ui_i.ih_next -#define ui_prev ui_i.ih_prev -#define ui_x1 ui_i.ih_x1 -#define ui_pr ui_i.ih_pr -#define ui_len ui_i.ih_len -#define ui_src ui_i.ih_src -#define ui_dst ui_i.ih_dst -#define ui_sport ui_u.uh_sport -#define ui_dport ui_u.uh_dport -#define ui_ulen ui_u.uh_ulen -#define ui_sum ui_u.uh_sum -struct ip { - union { - uint32_t ip_w; - struct { - unsigned int -#ifdef __LITTLE_ENDIAN__ - ip_xhl:4, /* header length */ - ip_xv:4, /* version */ - ip_xtos:8, /* type of service */ - ip_xlen:16; /* total length */ -#endif -#ifdef __BIG_ENDIAN__ - ip_xv:4, /* version */ - ip_xhl:4, /* header length */ - ip_xtos:8, /* type of service */ - ip_xlen:16; /* total length */ -#endif - } ip_x; - } ip_vhltl; - u_short ip_id; /* identification */ - short ip_off; /* fragment offset field */ -#define IP_DF 0x4000 /* dont fragment flag */ -#define IP_MF 0x2000 /* more fragments flag */ -#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ - u_char ip_ttl; /* time to live */ - u_char ip_p; /* protocol */ - u_short ip_sum; /* checksum */ - struct in_addr ip_src,ip_dst; /* source and dest address */ -}; -#define ip_v ip_vhltl.ip_x.ip_xv -#define ip_hl ip_vhltl.ip_x.ip_xhl -#define ip_tos ip_vhltl.ip_x.ip_xtos -#define ip_len ip_vhltl.ip_x.ip_xlen +typedef struct kdp_ether_addr enet_addr_t; -#define IPPROTO_UDP 17 -#define IPVERSION 4 +extern struct kdp_ether_addr kdp_get_mac_addr(void); +unsigned int kdp_get_ip_address(void); -struct ether_header { - u_char ether_dhost[6]; - u_char ether_shost[6]; +struct kdp_ether_header { + u_char ether_dhost[ETHER_ADDR_LEN]; + u_char ether_shost[ETHER_ADDR_LEN]; u_short ether_type; }; -typedef struct ether_header ether_header_t; - -#define ETHERTYPE_IP 0x0800 /* IP protocol */ +typedef struct kdp_ether_header ether_header_t; #define ntohs(x) OSSwapBigToHostInt16(x) #define ntohl(x) OSSwapBigToHostInt32(x) #define htons(x) OSSwapHostToBigInt16(x) #define htonl(x) OSSwapHostToBigInt32(x) -/* - * Ethernet Address Resolution Protocol. - * - * See RFC 826 for protocol description. Structure below is adapted - * to resolving internet addresses. Field names used correspond to - * RFC 826. - */ - -#define ETHERTYPE_ARP 0x0806 /* Addr. resolution protocol */ - -struct arphdr { - u_short ar_hrd; /* format of hardware address */ -#define ARPHRD_ETHER 1 /* ethernet hardware format */ -#define ARPHRD_FRELAY 15 /* frame relay hardware format */ - u_short ar_pro; /* format of protocol address */ - u_char ar_hln; /* length of hardware address */ - u_char ar_pln; /* length of protocol address */ - u_short ar_op; /* one of: */ -#define ARPOP_REQUEST 1 /* request to resolve address */ -#define ARPOP_REPLY 2 /* response to previous request */ -#define ARPOP_REVREQUEST 3 /* request protocol address given hardware */ -#define ARPOP_REVREPLY 4 /* response giving protocol address */ -#define ARPOP_INVREQUEST 8 /* request to identify peer */ -#define ARPOP_INVREPLY 9 /* response identifying peer */ -}; - -#define ETHER_ADDR_LEN 6 - -struct ether_arp { - struct arphdr ea_hdr; /* fixed-size header */ - u_char arp_sha[ETHER_ADDR_LEN]; /* sender hardware address */ - u_char arp_spa[4]; /* sender protocol address */ - u_char arp_tha[ETHER_ADDR_LEN]; /* target hardware address */ - u_char arp_tpa[4]; /* target protocol address */ -}; -#define arp_hrd ea_hdr.ar_hrd -#define arp_pro ea_hdr.ar_pro -#define arp_hln ea_hdr.ar_hln -#define arp_pln ea_hdr.ar_pln -#define arp_op ea_hdr.ar_op diff --git a/osfmk/kdp/ml/i386/kdp_machdep.c b/osfmk/kdp/ml/i386/kdp_machdep.c deleted file mode 100644 index ca38128a4..000000000 --- a/osfmk/kdp/ml/i386/kdp_machdep.c +++ /dev/null @@ -1,699 +0,0 @@ -/* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* for PE_halt_restart */ -#include /* for halt_all_cpus */ -#include - -#include -#include -#include -#include -#include - -#define KDP_TEST_HARNESS 0 -#if KDP_TEST_HARNESS -#define dprintf(x) printf x -#else -#define dprintf(x) -#endif - -extern cpu_type_t cpuid_cputype(void); -extern cpu_subtype_t cpuid_cpusubtype(void); - -void print_saved_state(void *); -void kdp_call(void); -int kdp_getc(void); -boolean_t kdp_call_kdb(void); -void kdp_getstate(i386_thread_state_t *); -void kdp_setstate(i386_thread_state_t *); -void kdp_print_phys(int); - -int -machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p); - -int -machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p); - -unsigned -machine_read64(addr64_t srcaddr, caddr_t dstaddr, uint32_t len); - -static void kdp_callouts(kdp_event_t event); - -void -kdp_exception( - unsigned char *pkt, - int *len, - unsigned short *remote_port, - unsigned int exception, - unsigned int code, - unsigned int subcode -) -{ - kdp_exception_t *rq = (kdp_exception_t *)pkt; - - rq->hdr.request = KDP_EXCEPTION; - rq->hdr.is_reply = 0; - rq->hdr.seq = kdp.exception_seq; - rq->hdr.key = 0; - rq->hdr.len = sizeof (*rq); - - rq->n_exc_info = 1; - rq->exc_info[0].cpu = 0; - rq->exc_info[0].exception = exception; - rq->exc_info[0].code = code; - rq->exc_info[0].subcode = subcode; - - rq->hdr.len += rq->n_exc_info * sizeof (kdp_exc_info_t); - - bcopy((char *)rq, (char *)pkt, rq->hdr.len); - - kdp.exception_ack_needed = TRUE; - - *remote_port = kdp.exception_port; - *len = rq->hdr.len; -} - -boolean_t -kdp_exception_ack( - unsigned char *pkt, - int len -) -{ - kdp_exception_ack_t *rq = (kdp_exception_ack_t *)pkt; - - if (((unsigned int) len) < sizeof (*rq)) - return(FALSE); - - if (!rq->hdr.is_reply || rq->hdr.request != KDP_EXCEPTION) - return(FALSE); - - dprintf(("kdp_exception_ack seq %x %x\n", rq->hdr.seq, kdp.exception_seq)); - - if (rq->hdr.seq == kdp.exception_seq) { - kdp.exception_ack_needed = FALSE; - kdp.exception_seq++; - } - return(TRUE); -} - -void -kdp_getstate( - x86_thread_state32_t *state -) -{ - static x86_thread_state32_t null_state; - x86_saved_state32_t *saved_state; - - saved_state = (x86_saved_state32_t *)kdp.saved_state; - - *state = null_state; - state->eax = saved_state->eax; - state->ebx = saved_state->ebx; - state->ecx = saved_state->ecx; - state->edx = saved_state->edx; - state->edi = saved_state->edi; - state->esi = saved_state->esi; - state->ebp = saved_state->ebp; - - if ((saved_state->cs & SEL_PL) == SEL_PL_K) { /* Kernel state? */ - if (cpu_mode_is64bit()) - state->esp = (uint32_t) saved_state->uesp; - else - state->esp = ((uint32_t)saved_state) + offsetof(x86_saved_state_t, ss_32) + sizeof(x86_saved_state32_t); - state->ss = KERNEL_DS; - } else { - state->esp = saved_state->uesp; - state->ss = saved_state->ss; - } - - state->eflags = saved_state->efl; - state->eip = saved_state->eip; - state->cs = saved_state->cs; - state->ds = saved_state->ds; - state->es = saved_state->es; - state->fs = saved_state->fs; - state->gs = saved_state->gs; -} - - -void -kdp_setstate( - x86_thread_state32_t *state -) -{ - x86_saved_state32_t *saved_state; - - saved_state = (x86_saved_state32_t *)kdp.saved_state; - - saved_state->eax = state->eax; - saved_state->ebx = state->ebx; - saved_state->ecx = state->ecx; - saved_state->edx = state->edx; - saved_state->edi = state->edi; - saved_state->esi = state->esi; - saved_state->ebp = state->ebp; - saved_state->efl = state->eflags; -#if 0 - saved_state->frame.eflags &= ~( EFL_VM | EFL_NT | EFL_IOPL | EFL_CLR ); - saved_state->frame.eflags |= ( EFL_IF | EFL_SET ); -#endif - saved_state->eip = state->eip; -} - - -kdp_error_t -kdp_machine_read_regs( - __unused unsigned int cpu, - __unused unsigned int flavor, - char *data, - __unused int *size -) -{ - static x86_float_state32_t null_fpstate; - - switch (flavor) { - - case x86_THREAD_STATE32: - dprintf(("kdp_readregs THREAD_STATE\n")); - kdp_getstate((x86_thread_state32_t *)data); - *size = sizeof (x86_thread_state32_t); - return KDPERR_NO_ERROR; - - case x86_FLOAT_STATE32: - dprintf(("kdp_readregs THREAD_FPSTATE\n")); - *(x86_float_state32_t *)data = null_fpstate; - *size = sizeof (x86_float_state32_t); - return KDPERR_NO_ERROR; - - default: - dprintf(("kdp_readregs bad flavor %d\n", flavor)); - *size = 0; - return KDPERR_BADFLAVOR; - } -} - -kdp_error_t -kdp_machine_write_regs( - __unused unsigned int cpu, - unsigned int flavor, - char *data, - __unused int *size -) -{ - switch (flavor) { - - case x86_THREAD_STATE32: - dprintf(("kdp_writeregs THREAD_STATE\n")); - kdp_setstate((x86_thread_state32_t *)data); - return KDPERR_NO_ERROR; - - case x86_FLOAT_STATE32: - dprintf(("kdp_writeregs THREAD_FPSTATE\n")); - return KDPERR_NO_ERROR; - - default: - dprintf(("kdp_writeregs bad flavor %d\n")); - return KDPERR_BADFLAVOR; - } -} - - - -void -kdp_machine_hostinfo( - kdp_hostinfo_t *hostinfo -) -{ - int i; - - hostinfo->cpus_mask = 0; - - for (i = 0; i < machine_info.max_cpus; i++) { - if (cpu_data_ptr[i] == NULL) - continue; - - hostinfo->cpus_mask |= (1 << i); - } - - hostinfo->cpu_type = cpuid_cputype(); - hostinfo->cpu_subtype = cpuid_cpusubtype(); -} - -void -kdp_panic( - const char *msg -) -{ - kprintf("kdp panic: %s\n", msg); - __asm__ volatile("hlt"); -} - - -void -kdp_machine_reboot(void) -{ - printf("Attempting system restart..."); - kprintf("Attempting system restart..."); - /* Call the platform specific restart*/ - if (PE_halt_restart) - (*PE_halt_restart)(kPERestartCPU); - /* If we do reach this, give up */ - halt_all_cpus(TRUE); -} - -int -kdp_intr_disbl(void) -{ - return splhigh(); -} - -void -kdp_intr_enbl(int s) -{ - splx(s); -} - -int -kdp_getc(void) -{ - return cnmaygetc(); -} - -void -kdp_us_spin(int usec) -{ - delay(usec/100); -} - -void print_saved_state(void *state) -{ - x86_saved_state32_t *saved_state; - - saved_state = state; - - kprintf("pc = 0x%x\n", saved_state->eip); - kprintf("cr2= 0x%x\n", saved_state->cr2); - kprintf("rp = TODO FIXME\n"); - kprintf("sp = %p\n", saved_state); - -} - -void -kdp_sync_cache(void) -{ - return; /* No op here. */ -} - -void -kdp_call(void) -{ - __asm__ volatile ("int $3"); /* Let the processor do the work */ -} - - -typedef struct _cframe_t { - struct _cframe_t *prev; - unsigned caller; - unsigned args[0]; -} cframe_t; - -#include -extern pt_entry_t *DMAP2; -extern caddr_t DADDR2; - -void -kdp_print_phys(int src) -{ - unsigned int *iptr; - int i; - - *(int *) DMAP2 = 0x63 | (src & 0xfffff000); - invlpg((u_int) DADDR2); - iptr = (unsigned int *) DADDR2; - for (i = 0; i < 100; i++) { - kprintf("0x%x ", *iptr++); - if ((i % 8) == 0) - kprintf("\n"); - } - kprintf("\n"); - *(int *) DMAP2 = 0; - -} - -boolean_t -kdp_i386_trap( - unsigned int trapno, - x86_saved_state32_t *saved_state, - kern_return_t result, - vm_offset_t va -) -{ - unsigned int exception, subcode = 0, code; - - if (trapno != T_INT3 && trapno != T_DEBUG) { - kprintf("Debugger: Unexpected kernel trap number: " - "0x%x, EIP: 0x%x, CR2: 0x%x\n", - trapno, saved_state->eip, saved_state->cr2); - if (!kdp.is_conn) - return FALSE; - } - - mp_kdp_enter(); - kdp_callouts(KDP_EVENT_ENTER); - - if (saved_state->efl & EFL_TF) { - enable_preemption_no_check(); - } - - switch (trapno) { - - case T_DIVIDE_ERROR: - exception = EXC_ARITHMETIC; - code = EXC_I386_DIVERR; - break; - - case T_OVERFLOW: - exception = EXC_SOFTWARE; - code = EXC_I386_INTOFLT; - break; - - case T_OUT_OF_BOUNDS: - exception = EXC_ARITHMETIC; - code = EXC_I386_BOUNDFLT; - break; - - case T_INVALID_OPCODE: - exception = EXC_BAD_INSTRUCTION; - code = EXC_I386_INVOPFLT; - break; - - case T_SEGMENT_NOT_PRESENT: - exception = EXC_BAD_INSTRUCTION; - code = EXC_I386_SEGNPFLT; - subcode = saved_state->err; - break; - - case T_STACK_FAULT: - exception = EXC_BAD_INSTRUCTION; - code = EXC_I386_STKFLT; - subcode = saved_state->err; - break; - - case T_GENERAL_PROTECTION: - exception = EXC_BAD_INSTRUCTION; - code = EXC_I386_GPFLT; - subcode = saved_state->err; - break; - - case T_PAGE_FAULT: - exception = EXC_BAD_ACCESS; - code = result; - subcode = va; - break; - - case T_WATCHPOINT: - exception = EXC_SOFTWARE; - code = EXC_I386_ALIGNFLT; - break; - - case T_DEBUG: - case T_INT3: - exception = EXC_BREAKPOINT; - code = EXC_I386_BPTFLT; - break; - - default: - exception = EXC_BAD_INSTRUCTION; - code = trapno; - break; - } - - kdp_raise_exception(exception, code, subcode, saved_state); - /* If the instruction single step bit is set, disable kernel preemption - */ - if (saved_state->efl & EFL_TF) { - disable_preemption(); - } - - kdp_callouts(KDP_EVENT_EXIT); - mp_kdp_exit(); - - return TRUE; -} - -boolean_t -kdp_call_kdb( - void) -{ - return(FALSE); -} - -void -kdp_machine_get_breakinsn( - uint8_t *bytes, - uint32_t *size -) -{ - bytes[0] = 0xcc; - *size = 1; -} - -extern pmap_t kdp_pmap; - -#define RETURN_OFFSET 4 -int -machine_trace_thread(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p) -{ - uint32_t *tracebuf = (uint32_t *)tracepos; - uint32_t fence = 0; - uint32_t stackptr = 0; - uint32_t stacklimit = 0xfc000000; - int framecount = 0; - uint32_t init_eip = 0; - uint32_t prevsp = 0; - uint32_t framesize = 2 * sizeof(vm_offset_t); - - if (user_p) { - x86_saved_state32_t *iss32; - - iss32 = USER_REGS32(thread); - - init_eip = iss32->eip; - stackptr = iss32->ebp; - - /* This bound isn't useful, but it doesn't hinder us*/ - stacklimit = 0xffffffff; - kdp_pmap = thread->task->map->pmap; - } - else { - /*Examine the i386_saved_state at the base of the kernel stack*/ - stackptr = STACK_IKS(thread->kernel_stack)->k_ebp; - init_eip = STACK_IKS(thread->kernel_stack)->k_eip; - } - - *tracebuf++ = init_eip; - - for (framecount = 0; framecount < nframes; framecount++) { - - if ((uint32_t)(tracebound - ((char *)tracebuf)) < (4 * framesize)) { - tracebuf--; - break; - } - - *tracebuf++ = stackptr; -/* Invalid frame, or hit fence */ - if (!stackptr || (stackptr == fence)) { - break; - } - - /* Unaligned frame */ - if (stackptr & 0x0000003) { - break; - } - - if (stackptr > stacklimit) { - break; - } - - if (stackptr <= prevsp) { - break; - } - - if (kdp_machine_vm_read((mach_vm_address_t)(stackptr + RETURN_OFFSET), (caddr_t) tracebuf, sizeof(caddr_t)) != sizeof(caddr_t)) { - break; - } - tracebuf++; - - prevsp = stackptr; - if (kdp_machine_vm_read((mach_vm_address_t)stackptr, (caddr_t) &stackptr, sizeof(caddr_t)) != sizeof(caddr_t)) { - *tracebuf++ = 0; - break; - } - } - - kdp_pmap = 0; - - return (uint32_t) (((char *) tracebuf) - tracepos); -} - -#define RETURN_OFFSET64 8 -/* Routine to encapsulate the 64-bit address read hack*/ -unsigned -machine_read64(addr64_t srcaddr, caddr_t dstaddr, uint32_t len) -{ - return (unsigned)kdp_machine_vm_read(srcaddr, dstaddr, len); -} - -int -machine_trace_thread64(thread_t thread, char *tracepos, char *tracebound, int nframes, boolean_t user_p) -{ - uint64_t *tracebuf = (uint64_t *)tracepos; - uint32_t fence = 0; - addr64_t stackptr = 0; - uint64_t stacklimit = 0xfc000000; - int framecount = 0; - addr64_t init_rip = 0; - addr64_t prevsp = 0; - unsigned framesize = 2 * sizeof(addr64_t); - - if (user_p) { - x86_saved_state64_t *iss64; - iss64 = USER_REGS64(thread); - init_rip = iss64->isf.rip; - stackptr = iss64->rbp; - stacklimit = 0xffffffffffffffffULL; - kdp_pmap = thread->task->map->pmap; - } - - *tracebuf++ = init_rip; - - for (framecount = 0; framecount < nframes; framecount++) { - - if ((uint32_t)(tracebound - ((char *)tracebuf)) < (4 * framesize)) { - tracebuf--; - break; - } - - *tracebuf++ = stackptr; - - if (!stackptr || (stackptr == fence)){ - break; - } - - if (stackptr & 0x0000003) { - break; - } - if (stackptr > stacklimit) { - break; - } - - if (stackptr <= prevsp) { - break; - } - - if (machine_read64(stackptr + RETURN_OFFSET64, (caddr_t) tracebuf, sizeof(addr64_t)) != sizeof(addr64_t)) { - break; - } - tracebuf++; - - prevsp = stackptr; - if (machine_read64(stackptr, (caddr_t) &stackptr, sizeof(addr64_t)) != sizeof(addr64_t)) { - *tracebuf++ = 0; - break; - } - } - - kdp_pmap = NULL; - - return (uint32_t) (((char *) tracebuf) - tracepos); -} - -static struct kdp_callout { - struct kdp_callout *callout_next; - kdp_callout_fn_t callout_fn; - void *callout_arg; -} *kdp_callout_list = NULL; - - -/* - * Called from kernel context to register a kdp event callout. - */ -void -kdp_register_callout( - kdp_callout_fn_t fn, - void *arg) -{ - struct kdp_callout *kcp; - struct kdp_callout *list_head; - - kcp = kalloc(sizeof(*kcp)); - if (kcp == NULL) - panic("kdp_register_callout() kalloc failed"); - - kcp->callout_fn = fn; - kcp->callout_arg = arg; - - /* Lock-less list insertion using compare and exchange. */ - do { - list_head = kdp_callout_list; - kcp->callout_next = list_head; - } while (!OSCompareAndSwapPtr(list_head, kcp, (void * volatile *)&kdp_callout_list)); -} - -/* - * Called at exception/panic time when extering or exiting kdp. - * We are single-threaded at this time and so we don't use locks. - */ -static void -kdp_callouts(kdp_event_t event) -{ - struct kdp_callout *kcp = kdp_callout_list; - - while (kcp) { - kcp->callout_fn(kcp->callout_arg, event); - kcp = kcp->callout_next; - } -} - -void -kdp_ml_enter_debugger(void) -{ - __asm__ __volatile__("int3"); -} diff --git a/osfmk/kdp/ml/i386/kdp_vm.c b/osfmk/kdp/ml/i386/kdp_vm.c deleted file mode 100644 index 58f614d06..000000000 --- a/osfmk/kdp/ml/i386/kdp_vm.c +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -int kdp_dump_trap(int type, x86_saved_state32_t *regs); - -static const x86_state_hdr_t thread_flavor_array [] = { - {x86_THREAD_STATE32, x86_THREAD_STATE32_COUNT} -}; - -size_t -kern_collectth_state_size(void) -{ - unsigned int i; - size_t tstate_size = 0; - - for (i = 0; i < sizeof(thread_flavor_array)/sizeof(thread_flavor_array[0]); i++) - tstate_size += sizeof(x86_state_hdr_t) + - (thread_flavor_array[i].count * sizeof(int)); - - return tstate_size; -} - -void -kern_collectth_state(thread_t thread, void *buffer, size_t size) -{ - size_t hoffset; - unsigned int i; - struct thread_command *tc; - - /* - * Fill in thread command structure. - */ - hoffset = 0; - - if (hoffset + sizeof(struct thread_command) > size) - return; - - tc = (struct thread_command *) ((uintptr_t)buffer + hoffset); - tc->cmd = LC_THREAD; - tc->cmdsize = sizeof(struct thread_command) + kern_collectth_state_size(); - hoffset += sizeof(struct thread_command); - /* - * Follow with a struct thread_state_flavor and - * the appropriate thread state struct for each - * thread state flavor. - */ - for (i = 0; i < sizeof(thread_flavor_array)/sizeof(thread_flavor_array[0]); i++) { - - if (hoffset + sizeof(x86_state_hdr_t) > size) - return; - - *(x86_state_hdr_t *)((uintptr_t)buffer + hoffset) = - thread_flavor_array[i]; - hoffset += sizeof(x86_state_hdr_t); - - - if (hoffset + thread_flavor_array[i].count*sizeof(int) > size) - return; - - /* Locate and obtain the non-volatile register context - * for this kernel thread. This should ideally be - * encapsulated in machine_thread_get_kern_state() - * but that routine appears to have been co-opted - * by CHUD to obtain pre-interrupt state. - */ - if (thread_flavor_array[i].flavor == x86_THREAD_STATE32) { - x86_thread_state32_t *tstate = (x86_thread_state32_t *) ((uintptr_t)buffer + hoffset); - vm_offset_t kstack; - - bzero(tstate, x86_THREAD_STATE32_COUNT * sizeof(int)); - if ((kstack = thread->kernel_stack) != 0){ - struct x86_kernel_state *iks = STACK_IKS(kstack); - tstate->ebx = iks->k_ebx; - tstate->esp = iks->k_esp; - tstate->ebp = iks->k_ebp; - tstate->edi = iks->k_edi; - tstate->esi = iks->k_esi; - tstate->eip = iks->k_eip; - } - } else { - void *tstate = (void *)((uintptr_t)buffer + hoffset); - - bzero(tstate, thread_flavor_array[i].count*sizeof(int)); - } - - hoffset += thread_flavor_array[i].count*sizeof(int); - } -} - -/* Intended to be called from the kernel trap handler if an unrecoverable fault - * occurs during a crashdump (which shouldn't happen since we validate mappings - * and so on). This should be reworked to attempt some form of recovery. - */ -int -kdp_dump_trap( - int type, - __unused x86_saved_state32_t *saved_state) -{ - printf ("An unexpected trap (type %d) occurred during the system dump, terminating.\n", type); - kdp_send_crashdump_pkt (KDP_EOF, NULL, 0, ((void *) 0)); - abort_panic_transfer(); - kdp_flag &= ~KDP_PANIC_DUMP_ENABLED; - kdp_flag &= ~PANIC_CORE_ON_NMI; - kdp_flag &= ~PANIC_LOG_DUMP; - - kdp_reset(); - - kdp_raise_exception(EXC_BAD_ACCESS, 0, 0, kdp.saved_state); - return( 0 ); -} diff --git a/osfmk/kdp/ml/i386/kdp_x86_common.c b/osfmk/kdp/ml/i386/kdp_x86_common.c index b1c4669ba..ff05a8e5e 100644 --- a/osfmk/kdp/ml/i386/kdp_x86_common.c +++ b/osfmk/kdp/ml/i386/kdp_x86_common.c @@ -26,6 +26,8 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ +#include + #include #include #include @@ -55,6 +57,12 @@ // #define KDP_VM_READ_DEBUG 1 // #define KDP_VM_WRITE_DEBUG 1 +/* + * A (potentially valid) physical address is not a kernel address + * i.e. it'a a user address. + */ +#define IS_PHYS_ADDR(addr) IS_USERADDR64_CANONICAL(addr) + boolean_t kdp_read_io; boolean_t kdp_trans_off; @@ -99,7 +107,7 @@ kdp_machine_vm_read( mach_vm_address_t src, caddr_t dst, mach_vm_size_t len) printf("kdp_vm_read: src %llx dst %p len %llx\n", src, (void *)dst, len); #endif - if (kdp_trans_off) { + if (kdp_trans_off && IS_PHYS_ADDR(src)) { kdp_readphysmem64_req_t rq; mach_vm_size_t ret; @@ -141,8 +149,10 @@ kdp_machine_vm_read( mach_vm_address_t src, caddr_t dst, mach_vm_size_t len) cnt = resid; /* Do a physical copy */ - ml_copy_phys(cur_phys_src, cur_phys_dst, (vm_size_t)cnt); - + if (EFAULT == ml_copy_phys(cur_phys_src, + cur_phys_dst, + (vm_size_t)cnt)) + goto exit; cur_virt_src += cnt; cur_virt_dst += cnt; resid -= cnt; @@ -193,7 +203,10 @@ kdp_machine_phys_read(kdp_readphysmem64_req_t *rq, caddr_t dst, /* Do a physical copy; use ml_copy_phys() in the event this is * a short read with potential side effects. */ - ml_copy_phys(cur_phys_src, cur_phys_dst, (vm_size_t)cnt); + if (EFAULT == ml_copy_phys(cur_phys_src, + cur_phys_dst, + (vm_size_t)cnt)) + goto exit; cur_phys_src += cnt; cur_virt_dst += cnt; resid -= cnt; @@ -239,7 +252,8 @@ kdp_machine_vm_write( caddr_t src, mach_vm_address_t dst, mach_vm_size_t len) if (cnt > resid) cnt = resid; - ml_copy_phys(cur_phys_src, cur_phys_dst, cnt); /* Copy stuff over */ + if (EFAULT == ml_copy_phys(cur_phys_src, cur_phys_dst, cnt)) + goto exit; /* Copy stuff over */ cur_virt_src +=cnt; cur_virt_dst +=cnt; @@ -291,7 +305,8 @@ kdp_machine_phys_write(kdp_writephysmem64_req_t *rq, caddr_t src, if (cnt > resid) cnt = resid; - ml_copy_phys(cur_phys_src, cur_phys_dst, cnt); /* Copy stuff over */ + if (EFAULT == ml_copy_phys(cur_phys_src, cur_phys_dst, cnt)) + goto exit; /* Copy stuff over */ cur_virt_src +=cnt; cur_phys_dst +=cnt; diff --git a/osfmk/kern/Makefile b/osfmk/kern/Makefile index 846b95682..fa484a3c9 100644 --- a/osfmk/kern/Makefile +++ b/osfmk/kern/Makefile @@ -3,27 +3,29 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - include $(MakeInc_cmd) include $(MakeInc_def) DATAFILES = +PRIVATE_DATAFILES = exc_resource.h + EXPORT_ONLY_FILES = \ affinity.h \ assert.h \ audit_sessionport.h \ + call_entry.h \ clock.h \ cpu_number.h \ cpu_data.h \ debug.h \ - etimer.h \ extmod_statistics.h \ ipc_mig.h \ ipc_misc.h \ kalloc.h \ kext_alloc.h \ kern_types.h \ + kpc.h \ ledger.h \ lock.h \ locks.h \ @@ -38,12 +40,14 @@ EXPORT_ONLY_FILES = \ simple_lock.h \ startup.h \ task.h \ + telemetry.h \ thread.h \ thread_call.h \ + timer_call.h \ wait_queue.h \ zalloc.h -INSTALL_MI_LIST = ${DATAFILES} +INSTALL_MI_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} INSTALL_MI_DIR = kern diff --git a/osfmk/kern/affinity.c b/osfmk/kern/affinity.c index 002482dea..3af3c4b10 100644 --- a/osfmk/kern/affinity.c +++ b/osfmk/kern/affinity.c @@ -386,6 +386,7 @@ affinity_space_free(affinity_space_t aspc) { assert(queue_empty(&aspc->aspc_affinities)); + lck_mtx_destroy(&aspc->aspc_lock, &task_lck_grp); DBG("affinity_space_free(%p)\n", aspc); kfree(aspc, sizeof(struct affinity_space)); } diff --git a/osfmk/kern/ast.c b/osfmk/kern/ast.c index 9f6757a6b..ee197f7f6 100644 --- a/osfmk/kern/ast.c +++ b/osfmk/kern/ast.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -77,6 +77,9 @@ #include #include #include +#if CONFIG_TELEMETRY +#include +#endif #include #include #include @@ -174,7 +177,12 @@ ast_taken( */ if (reasons & AST_APC) act_execute_returnhandlers(); - + + if (reasons & AST_GUARD) { + thread_ast_clear(thread, AST_GUARD); + guard_ast(thread); + } + if (reasons & AST_LEDGER) { thread_ast_clear(thread, AST_LEDGER); ledger_ast(thread); @@ -183,12 +191,22 @@ ast_taken( /* * Kernel Profiling Hook */ - if (reasons & AST_KPERF) - { + if (reasons & AST_KPERF) { thread_ast_clear(thread, AST_KPERF); chudxnu_thread_ast(thread); } +#if CONFIG_TELEMETRY + if (reasons & AST_TELEMETRY_ALL) { + boolean_t interrupted_userspace; + + assert((reasons & AST_TELEMETRY_ALL) != AST_TELEMETRY_ALL); /* only one is valid at a time */ + interrupted_userspace = (reasons & AST_TELEMETRY_USER) ? TRUE : FALSE; + thread_ast_clear(thread, AST_TELEMETRY_ALL); + telemetry_ast(thread, interrupted_userspace); + } +#endif + ml_set_interrupts_enabled(FALSE); /* diff --git a/osfmk/kern/ast.h b/osfmk/kern/ast.h index 713c6158c..272396cbf 100644 --- a/osfmk/kern/ast.h +++ b/osfmk/kern/ast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -99,6 +99,11 @@ typedef uint32_t ast_t; #define AST_BSD 0x80 #define AST_KPERF 0x100 /* kernel profiling */ #define AST_MACF 0x200 /* MACF user ret pending */ +#define AST_CHUD 0x400 +#define AST_CHUD_URGENT 0x800 +#define AST_GUARD 0x1000 +#define AST_TELEMETRY_USER 0x2000 +#define AST_TELEMETRY_KERNEL 0x4000 #define AST_NONE 0x00 #define AST_ALL (~AST_NONE) @@ -106,6 +111,9 @@ typedef uint32_t ast_t; #define AST_SCHEDULING (AST_PREEMPTION | AST_YIELD | AST_HANDOFF) #define AST_PREEMPTION (AST_PREEMPT | AST_QUANTUM | AST_URGENT) +#define AST_CHUD_ALL (AST_CHUD_URGENT|AST_CHUD) +#define AST_TELEMETRY_ALL (AST_TELEMETRY_USER | AST_TELEMETRY_KERNEL) + #ifdef MACHINE_AST /* * machine/ast.h is responsible for defining aston and astoff. @@ -117,11 +125,6 @@ typedef uint32_t ast_t; #endif /* MACHINE_AST */ -#define AST_CHUD_URGENT 0x800 -#define AST_CHUD 0x400 - -#define AST_CHUD_ALL (AST_CHUD_URGENT|AST_CHUD) - /* Initialize module */ extern void ast_init(void); @@ -144,7 +147,7 @@ extern ast_t *ast_pending(void); #define MACHINE_AST_PER_THREAD 0 #endif -#define AST_PER_THREAD (AST_APC | AST_BSD | AST_MACF | MACHINE_AST_PER_THREAD | AST_LEDGER) +#define AST_PER_THREAD (AST_APC | AST_BSD | AST_MACF | MACHINE_AST_PER_THREAD | AST_LEDGER | AST_GUARD | AST_TELEMETRY_USER | AST_TELEMETRY_KERNEL) /* * ast_pending(), ast_on(), ast_off(), ast_context(), and ast_propagate() * assume splsched. @@ -152,18 +155,18 @@ extern ast_t *ast_pending(void); #define ast_on_fast(reasons) \ MACRO_BEGIN \ - ast_t *myast = ast_pending(); \ + ast_t *_ast_myast = ast_pending(); \ \ - if ((*myast |= (reasons)) != AST_NONE) \ - { aston(myast); } \ + if ((*_ast_myast |= (reasons)) != AST_NONE) \ + { aston(_ast_myast); } \ MACRO_END #define ast_off_fast(reasons) \ MACRO_BEGIN \ - ast_t *myast = ast_pending(); \ + ast_t *_ast_myast = ast_pending(); \ \ - if ((*myast &= ~(reasons)) == AST_NONE) \ - { astoff(myast); } \ + if ((*_ast_myast &= ~(reasons)) == AST_NONE) \ + { astoff(_ast_myast); } \ MACRO_END #define ast_propagate(reasons) ast_on(reasons) diff --git a/osfmk/kern/bsd_kern.c b/osfmk/kern/bsd_kern.c index ea5f9e139..8d4f388ed 100644 --- a/osfmk/kern/bsd_kern.c +++ b/osfmk/kern/bsd_kern.c @@ -27,8 +27,10 @@ */ #include #include +#include #include +#include #include #include #include @@ -41,6 +43,7 @@ #include #include #include /* last */ +#include #undef thread_should_halt @@ -58,6 +61,7 @@ void task_act_iterate_wth_args(task_t, void(*)(thread_t, void *), void *); kern_return_t get_signalact(task_t , thread_t *, int); int get_vmsubmap_entries(vm_map_t, vm_object_offset_t, vm_object_offset_t); void syscall_exit_funnelcheck(void); +int fill_task_rusage_v2(task_t task, struct rusage_info_v2 *ri); /* @@ -109,7 +113,7 @@ int get_thread_lock_count(thread_t th) */ thread_t get_firstthread(task_t task) { - thread_t thread = (thread_t)queue_first(&task->threads); + thread_t thread = (thread_t)(void *)queue_first(&task->threads); if (queue_end(&task->threads, (queue_entry_t)thread)) thread = THREAD_NULL; @@ -137,7 +141,7 @@ get_signalact( return (KERN_FAILURE); } - for (inc = (thread_t)queue_first(&task->threads); + for (inc = (thread_t)(void *)queue_first(&task->threads); !queue_end(&task->threads, (queue_entry_t)inc); ) { thread_mtx_lock(inc); if (inc->active && @@ -147,7 +151,7 @@ get_signalact( } thread_mtx_unlock(inc); - inc = (thread_t)queue_next(&inc->task_threads); + inc = (thread_t)(void *)queue_next(&inc->task_threads); } if (result_out) @@ -185,7 +189,7 @@ check_actforsig( return (KERN_FAILURE); } - for (inc = (thread_t)queue_first(&task->threads); + for (inc = (thread_t)(void *)queue_first(&task->threads); !queue_end(&task->threads, (queue_entry_t)inc); ) { if (inc == thread) { thread_mtx_lock(inc); @@ -200,7 +204,7 @@ check_actforsig( break; } - inc = (thread_t)queue_next(&inc->task_threads); + inc = (thread_t)(void *)queue_next(&inc->task_threads); } if (result == KERN_SUCCESS) { @@ -263,8 +267,8 @@ int get_task_numactivethreads(task_t task) int num_active_thr=0; task_lock(task); - for (inc = (thread_t)queue_first(&task->threads); - !queue_end(&task->threads, (queue_entry_t)inc); inc = (thread_t)queue_next(&inc->task_threads)) + for (inc = (thread_t)(void *)queue_first(&task->threads); + !queue_end(&task->threads, (queue_entry_t)inc); inc = (thread_t)(void *)queue_next(&inc->task_threads)) { if(inc->active) num_active_thr++; @@ -281,11 +285,11 @@ int get_task_numacts(task_t t) /* does this machine need 64bit register set for signal handler */ int is_64signalregset(void) { - task_t t = current_task(); - if(t->taskFeatures[0] & tf64BitData) + if (task_has_64BitData(current_task())) { return(1); - else - return(0); + } + + return(0); } /* @@ -336,6 +340,38 @@ uint64_t get_task_resident_size(task_t task) return((uint64_t)pmap_resident_count(map->pmap) * PAGE_SIZE_64); } +/* + * + */ +uint64_t get_task_phys_footprint(task_t task) +{ + kern_return_t ret; + ledger_amount_t credit, debit; + + ret = ledger_get_entries(task->ledger, task_ledgers.phys_footprint, &credit, &debit); + if (KERN_SUCCESS == ret) { + return (credit - debit); + } + + return 0; +} + +/* + * + */ +uint64_t get_task_phys_footprint_max(task_t task) +{ + kern_return_t ret; + ledger_amount_t max; + + ret = ledger_get_maximum(task->ledger, task_ledgers.phys_footprint, &max); + if (KERN_SUCCESS == ret) { + return max; + } + + return 0; +} + /* * */ @@ -533,10 +569,10 @@ task_act_iterate_wth_args( task_lock(task); - for (inc = (thread_t)queue_first(&task->threads); + for (inc = (thread_t)(void *)queue_first(&task->threads); !queue_end(&task->threads, (queue_entry_t)inc); ) { (void) (*func_callback)(inc, func_arg); - inc = (thread_t)queue_next(&inc->task_threads); + inc = (thread_t)(void *)queue_next(&inc->task_threads); } task_unlock(task); @@ -565,7 +601,7 @@ fill_taskprocinfo(task_t task, struct proc_taskinfo_internal * ptinfo) uint32_t cswitch = 0, numrunning = 0; uint32_t syscalls_unix = 0; uint32_t syscalls_mach = 0; - + map = (task == kernel_task)? kernel_map: task->map; ptinfo->pti_virtual_size = map->size; @@ -586,6 +622,9 @@ fill_taskprocinfo(task_t task, struct proc_taskinfo_internal * ptinfo) uint64_t tval; spl_t x; + if (thread->options & TH_OPT_IDLE_THREAD) + continue; + x = splsched(); thread_lock(thread); @@ -646,7 +685,7 @@ fill_taskthreadinfo(task_t task, uint64_t thaddr, int thuniqueid, struct proc_th task_lock(task); - for (thact = (thread_t)queue_first(&task->threads); + for (thact = (thread_t)(void *)queue_first(&task->threads); !queue_end(&task->threads, (queue_entry_t)thact); ) { addr = (thuniqueid==0)?thact->machine.cthread_self: thact->thread_id; if (addr == thaddr) @@ -657,8 +696,8 @@ fill_taskthreadinfo(task_t task, uint64_t thaddr, int thuniqueid, struct proc_th err = 1; goto out; } - ptinfo->pth_user_time = ((basic_info.user_time.seconds * NSEC_PER_SEC) + (basic_info.user_time.microseconds * NSEC_PER_USEC)); - ptinfo->pth_system_time = ((basic_info.system_time.seconds * NSEC_PER_SEC) + (basic_info.system_time.microseconds * NSEC_PER_USEC)); + ptinfo->pth_user_time = ((basic_info.user_time.seconds * (integer_t)NSEC_PER_SEC) + (basic_info.user_time.microseconds * (integer_t)NSEC_PER_USEC)); + ptinfo->pth_system_time = ((basic_info.system_time.seconds * (integer_t)NSEC_PER_SEC) + (basic_info.system_time.microseconds * (integer_t)NSEC_PER_USEC)); ptinfo->pth_cpu_usage = basic_info.cpu_usage; ptinfo->pth_policy = basic_info.policy; @@ -675,7 +714,7 @@ fill_taskthreadinfo(task_t task, uint64_t thaddr, int thuniqueid, struct proc_th err = 0; goto out; } - thact = (thread_t)queue_next(&thact->task_threads); + thact = (thread_t)(void *)queue_next(&thact->task_threads); } err = 1; @@ -696,14 +735,14 @@ fill_taskthreadlist(task_t task, void * buffer, int thcount) task_lock(task); - for (thact = (thread_t)queue_first(&task->threads); + for (thact = (thread_t)(void *)queue_first(&task->threads); !queue_end(&task->threads, (queue_entry_t)thact); ) { thaddr = thact->machine.cthread_self; *uptr++ = thaddr; numthr++; if (numthr >= thcount) goto out; - thact = (thread_t)queue_next(&thact->task_threads); + thact = (thread_t)(void *)queue_next(&thact->task_threads); } out: @@ -728,3 +767,35 @@ syscall_exit_funnelcheck(void) if (thread->funnel_lock) panic("syscall exit with funnel held\n"); } + + +/* + * Gather the various pieces of info about the designated task, + * and collect it all into a single rusage_info. + */ +int +fill_task_rusage_v2(task_t task, struct rusage_info_v2 *ri) +{ + struct task_power_info powerinfo; + + assert(task != TASK_NULL); + task_lock(task); + + task_power_info_locked(task, &powerinfo); + ri->ri_pkg_idle_wkups = powerinfo.task_platform_idle_wakeups; + ri->ri_interrupt_wkups = powerinfo.task_interrupt_wakeups; + ri->ri_user_time = powerinfo.total_user; + ri->ri_system_time = powerinfo.total_system; + + ledger_get_balance(task->ledger, task_ledgers.phys_footprint, + (ledger_amount_t *)&ri->ri_phys_footprint); + ledger_get_balance(task->ledger, task_ledgers.phys_mem, + (ledger_amount_t *)&ri->ri_resident_size); + ledger_get_balance(task->ledger, task_ledgers.wired_mem, + (ledger_amount_t *)&ri->ri_wired_size); + + ri->ri_pageins = task->pageins; + + task_unlock(task); + return (0); +} diff --git a/osfmk/kern/btlog.c b/osfmk/kern/btlog.c new file mode 100644 index 000000000..50fc5991e --- /dev/null +++ b/osfmk/kern/btlog.c @@ -0,0 +1,327 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include + +/* + * Since all records are located contiguously in memory, + * we use indices to them as the primary lookup mechanism, + * and to maintain the linked list of active records + * in chronological order. + */ +typedef uint32_t btlog_recordindex_t; /* only 24 bits used */ +#define BTLOG_RECORDINDEX_NONE (0xFFFFFF) +#define BTLOG_MAX_RECORDS (0xFFFFFF /* 16777215 */) + +typedef struct btlog_record { + btlog_recordindex_t next:24; + uint8_t operation; +#if __LP64__ + uint32_t _pad; +#endif + void *element; + void *bt[]; /* variable sized, based on btlog_t params */ +} btlog_record_t; + +struct btlog { + vm_address_t btlog_buffer; /* all memory for this btlog_t */ + vm_size_t btlog_buffersize; + + btlog_lock_t lock_callback; /* caller-provided locking */ + btlog_unlock_t unlock_callback; + void *callback_context; + + uintptr_t btrecords; /* use btlog_recordindex_t to lookup */ + size_t btrecord_count; + size_t btrecord_btdepth; /* BT entries per record */ + size_t btrecord_size; + + btlog_recordindex_t head; /* active record list */ + btlog_recordindex_t tail; + size_t activecount; + + btlog_recordindex_t freelist; +}; + +extern boolean_t kmem_alloc_ready; + +#define lookup_btrecord(btlog, index) \ + ((btlog_record_t *)(btlog->btrecords + index * btlog->btrecord_size)) + +btlog_t * +btlog_create(size_t numrecords, + size_t record_btdepth, + btlog_lock_t lock_callback, + btlog_unlock_t unlock_callback, + void *callback_context) +{ + btlog_t *btlog; + vm_size_t buffersize_needed; + vm_address_t buffer = 0; + size_t i; + kern_return_t ret; + size_t btrecord_size; + + if (!kmem_alloc_ready) + return NULL; + + if (numrecords > BTLOG_MAX_RECORDS) + return NULL; + + if (numrecords == 0) + return NULL; + + if (record_btdepth > BTLOG_MAX_DEPTH) + return NULL; + + if ((lock_callback && !unlock_callback) || + (!lock_callback && unlock_callback)) + return NULL; + + /* btlog_record_t is variable-sized, calculate needs now */ + btrecord_size = sizeof(btlog_record_t) + + sizeof(void *) * record_btdepth; + + buffersize_needed = sizeof(btlog_t) + numrecords * btrecord_size; + buffersize_needed = round_page(buffersize_needed); + + /* since rounding to a page size might hold more, recalculate */ + numrecords = MIN(BTLOG_MAX_RECORDS, + (buffersize_needed - sizeof(btlog_t))/btrecord_size); + + ret = kmem_alloc(kernel_map, &buffer, buffersize_needed); + if (ret != KERN_SUCCESS) + return NULL; + + btlog = (btlog_t *)buffer; + btlog->btlog_buffer = buffer; + btlog->btlog_buffersize = buffersize_needed; + + btlog->lock_callback = lock_callback; + btlog->unlock_callback = unlock_callback; + btlog->callback_context = callback_context; + + btlog->btrecords = (uintptr_t)(buffer + sizeof(btlog_t)); + btlog->btrecord_count = numrecords; + btlog->btrecord_btdepth = record_btdepth; + btlog->btrecord_size = btrecord_size; + + btlog->head = BTLOG_RECORDINDEX_NONE; + btlog->tail = BTLOG_RECORDINDEX_NONE; + btlog->activecount = 0; + + /* populate freelist with all records in order */ + btlog->freelist = 0; + for (i=0; i < (numrecords - 1); i++) { + btlog_record_t *rec = lookup_btrecord(btlog, i); + rec->next = (btlog_recordindex_t)(i + 1); + } + lookup_btrecord(btlog, i)->next = BTLOG_RECORDINDEX_NONE; /* terminate */ + + return btlog; +} + +/* Assumes btlog is already locked */ +static btlog_recordindex_t +btlog_get_record_from_freelist(btlog_t *btlog) +{ + btlog_recordindex_t recindex = btlog->freelist; + + if (recindex == BTLOG_RECORDINDEX_NONE) { + /* nothing on freelist */ + return BTLOG_RECORDINDEX_NONE; + } else { + /* remove the head of the freelist */ + btlog_record_t *record = lookup_btrecord(btlog, recindex); + btlog->freelist = record->next; + return recindex; + } +} + +/* Assumes btlog is already locked */ +static btlog_recordindex_t +btlog_evict_record_from_activelist(btlog_t *btlog) +{ + btlog_recordindex_t recindex = btlog->head; + + if (recindex == BTLOG_RECORDINDEX_NONE) { + /* nothing on active list */ + return BTLOG_RECORDINDEX_NONE; + } else { + /* remove the head of the active list */ + btlog_record_t *record = lookup_btrecord(btlog, recindex); + btlog->head = record->next; + btlog->activecount--; + if (btlog->head == BTLOG_RECORDINDEX_NONE) { + /* active list is now empty, update tail */ + btlog->tail = BTLOG_RECORDINDEX_NONE; + } + return recindex; + } +} + +/* Assumes btlog is already locked */ +static void +btlog_append_record_to_activelist(btlog_t *btlog, btlog_recordindex_t recindex) +{ + if (btlog->head == BTLOG_RECORDINDEX_NONE) { + /* empty active list, update both head and tail */ + btlog->head = btlog->tail = recindex; + } else { + btlog_record_t *record = lookup_btrecord(btlog, btlog->tail); + record->next = recindex; + btlog->tail = recindex; + } + btlog->activecount++; +} + +void +btlog_add_entry(btlog_t *btlog, + void *element, + uint8_t operation, + void *bt[], + size_t btcount) +{ + btlog_recordindex_t recindex; + btlog_record_t *record; + size_t i; + + if (btlog->lock_callback) + btlog->lock_callback(btlog->callback_context); + + /* If there's a free record, use it */ + recindex = btlog_get_record_from_freelist(btlog); + if (recindex == BTLOG_RECORDINDEX_NONE) { + /* Use the first active record (FIFO age-out) */ + recindex = btlog_evict_record_from_activelist(btlog); + assert(recindex != BTLOG_RECORDINDEX_NONE); + } + + record = lookup_btrecord(btlog, recindex); + + /* we always add to the tail, so there is no next pointer */ + record->next = BTLOG_RECORDINDEX_NONE; + record->operation = operation; + record->element = element; + for (i=0; i < MIN(btcount, btlog->btrecord_btdepth); i++) { + record->bt[i] = bt[i]; + } + for (; i < btlog->btrecord_btdepth; i++) { + record->bt[i] = NULL; + } + + btlog_append_record_to_activelist(btlog, recindex); + + if (btlog->unlock_callback) + btlog->unlock_callback(btlog->callback_context); +} + +void +btlog_remove_entries_for_element(btlog_t *btlog, + void *element) +{ + btlog_recordindex_t recindex; + btlog_record_t *record; + + if (btlog->lock_callback) + btlog->lock_callback(btlog->callback_context); + + /* + * Since the btlog_t anchors the active + * list with a pointer to the head of + * the list, first loop making sure + * the head is correct (and doesn't + * match the element being removed). + */ + recindex = btlog->head; + record = lookup_btrecord(btlog, recindex); + while (recindex != BTLOG_RECORDINDEX_NONE) { + if (record->element == element) { + /* remove head of active list */ + btlog->head = record->next; + btlog->activecount--; + + /* add to freelist */ + record->next = btlog->freelist; + btlog->freelist = recindex; + + /* check the new head */ + recindex = btlog->head; + record = lookup_btrecord(btlog, recindex); + } else { + /* head didn't match, so we can move on */ + break; + } + } + + if (recindex == BTLOG_RECORDINDEX_NONE) { + /* we iterated over the entire active list removing the element */ + btlog->tail = BTLOG_RECORDINDEX_NONE; + } else { + /* the head of the active list is stable, now remove other entries */ + btlog_recordindex_t precindex = recindex; + btlog_record_t *precord = record; + + recindex = precord->next; + record = lookup_btrecord(btlog, recindex); + while (recindex != BTLOG_RECORDINDEX_NONE) { + if (record->element == element) { + /* remove in place */ + precord->next = record->next; + btlog->activecount--; + + /* add to freelist */ + record->next = btlog->freelist; + btlog->freelist = recindex; + + /* check the next record */ + recindex = precord->next; + record = lookup_btrecord(btlog, recindex); + } else { + /* check the next record */ + precindex = recindex; + precord = record; + + recindex = record->next; + record = lookup_btrecord(btlog, recindex); + } + } + + /* We got to the end of the active list. Update the tail */ + btlog->tail = precindex; + } + + if (btlog->unlock_callback) + btlog->unlock_callback(btlog->callback_context); + +} diff --git a/osfmk/kern/btlog.h b/osfmk/kern/btlog.h new file mode 100644 index 000000000..7bbc12a5d --- /dev/null +++ b/osfmk/kern/btlog.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _KERN_BTLOG_H_ +#define _KERN_BTLOG_H_ + +#include +#include +#include + +#ifdef XNU_KERNEL_PRIVATE + +/* + * The btlog subsystem allows for fast unobtrusive backtraces + * to be recorded and maintained in chronological order. + * + * Each backtrace is associated with an element/object, + * and an operation. For example, memory allocations and + * frees can be tracked with this infrastructure. So + * can refcounts. The "operation" namespace is maintained + * by the caller. + * + * When the event buffer fills, records are reused in FIFO + * order. + * + * When a btlog_t is created, callbacks can be provided + * to ensure proper locking of the datastructures. If these + * are not provided, the caller is responsible for + * preventing simultaneous modification. + */ + +/* + * BTLOG_MAX_DEPTH configures how deep of a stack trace is stored. 15 + * levels is usually enough to get past all the layers of code in + * kalloc and IOKit and see who the actual caller is up above these + * lower levels, when used by the zone allocator logging code. + */ + +#define BTLOG_MAX_DEPTH 15 + +struct btlog; +typedef struct btlog btlog_t; + +typedef void (*btlog_lock_t)(void *context); +typedef void (*btlog_unlock_t)(void *context); + +extern btlog_t *btlog_create(size_t numrecords, + size_t record_btdepth, + btlog_lock_t lock_callback, + btlog_unlock_t unlock_callback, + void *callback_context); + +extern void btlog_add_entry(btlog_t *btlog, + void *element, + uint8_t operation, + void *bt[], + size_t btcount); + +extern void btlog_remove_entries_for_element(btlog_t *btlog, + void *element); + +#endif /* XNU_KERNEL_PRIVATE */ + +#endif /* _KERN_BTLOG_H_ */ diff --git a/osfmk/kern/call_entry.h b/osfmk/kern/call_entry.h index 36f47a31b..96fe6e856 100644 --- a/osfmk/kern/call_entry.h +++ b/osfmk/kern/call_entry.h @@ -32,9 +32,11 @@ #ifndef _KERN_CALL_ENTRY_H_ #define _KERN_CALL_ENTRY_H_ -#ifdef MACH_KERNEL_PRIVATE +#ifdef XNU_KERNEL_PRIVATE #include +#define TIMER_TRACE 1 + typedef void *call_entry_param_t; typedef void (*call_entry_func_t)( call_entry_param_t param0, @@ -47,16 +49,21 @@ typedef struct call_entry { call_entry_param_t param0; call_entry_param_t param1; uint64_t deadline; +#if TIMER_TRACE + uint64_t entry_time; +#endif } call_entry_data_t; typedef struct call_entry *call_entry_t; +#ifdef MACH_KERNEL_PRIVATE #define call_entry_setup(entry, pfun, p0) \ MACRO_BEGIN \ (entry)->func = (call_entry_func_t)(pfun); \ (entry)->param0 = (call_entry_param_t)(p0); \ (entry)->queue = NULL; \ + (entry)->deadline = 0; \ MACRO_END #define qe(x) ((queue_entry_t)(x)) @@ -124,8 +131,7 @@ call_entry_enqueue_deadline( } insque(qe(entry), qe(current)); } - else - if (deadline < entry->deadline) { + else if (deadline < entry->deadline) { current = CE(queue_prev(qe(entry))); (void)remque(qe(entry)); @@ -147,4 +153,6 @@ call_entry_enqueue_deadline( } #endif /* MACH_KERNEL_PRIVATE */ +#endif /* XNU_KERNEL_PRIVATE */ + #endif /* _KERN_CALL_ENTRY_H_ */ diff --git a/osfmk/kern/clock.c b/osfmk/kern/clock.c index 771a0dbef..740f16340 100644 --- a/osfmk/kern/clock.c +++ b/osfmk/kern/clock.c @@ -231,6 +231,23 @@ void clock_get_calendar_microtime( clock_sec_t *secs, clock_usec_t *microsecs) +{ + clock_get_calendar_absolute_and_microtime(secs, microsecs, NULL); +} + +/* + * clock_get_calendar_absolute_and_microtime: + * + * Returns the current calendar value, + * microseconds as the fraction. Also + * returns mach_absolute_time if abstime + * is not NULL. + */ +void +clock_get_calendar_absolute_and_microtime( + clock_sec_t *secs, + clock_usec_t *microsecs, + uint64_t *abstime) { uint64_t now; spl_t s; @@ -239,6 +256,8 @@ clock_get_calendar_microtime( clock_lock(); now = mach_absolute_time(); + if (abstime) + *abstime = now; if (clock_calend.adjdelta < 0) { uint32_t t32; @@ -547,7 +566,7 @@ clock_adjtime( interval = calend_set_adjustment(secs, microsecs); if (interval != 0) { calend_adjdeadline = mach_absolute_time() + interval; - if (!timer_call_enter(&calend_adjcall, calend_adjdeadline, TIMER_CALL_CRITICAL)) + if (!timer_call_enter(&calend_adjcall, calend_adjdeadline, TIMER_CALL_SYS_CRITICAL)) calend_adjactive++; } else @@ -570,7 +589,7 @@ calend_set_adjustment( /* * Compute the total adjustment time in nanoseconds. */ - total = (int64_t)*secs * NSEC_PER_SEC + *microsecs * NSEC_PER_USEC; + total = ((int64_t)*secs * (int64_t)NSEC_PER_SEC) + (*microsecs * (int64_t)NSEC_PER_USEC); /* * Disable commpage gettimeofday(). @@ -601,7 +620,7 @@ calend_set_adjustment( * Positive adjustment. If greater than the preset 'big' * threshold, slew at a faster rate, capping if necessary. */ - if (total > calend_adjbig) + if (total > (int64_t) calend_adjbig) delta *= 10; if (delta > total) delta = (int32_t)total; @@ -618,7 +637,7 @@ calend_set_adjustment( * greater than the preset 'big' threshold, slew at a faster * rate, capping if necessary. */ - if (total < -calend_adjbig) + if (total < (int64_t) -calend_adjbig) delta *= 10; delta = -delta; if (delta < total) @@ -665,8 +684,8 @@ calend_set_adjustment( * remaining uncorrected time from it. */ if (ototal != 0) { - *secs = (long)(ototal / NSEC_PER_SEC); - *microsecs = (int)((ototal % NSEC_PER_SEC) / NSEC_PER_USEC); + *secs = (long)(ototal / (long)NSEC_PER_SEC); + *microsecs = (int)((ototal % (int)NSEC_PER_SEC) / (int)NSEC_PER_USEC); } else *secs = *microsecs = 0; @@ -692,7 +711,7 @@ calend_adjust_call(void) if (interval != 0) { clock_deadline_for_periodic_event(interval, mach_absolute_time(), &calend_adjdeadline); - if (!timer_call_enter(&calend_adjcall, calend_adjdeadline, TIMER_CALL_CRITICAL)) + if (!timer_call_enter(&calend_adjcall, calend_adjdeadline, TIMER_CALL_SYS_CRITICAL)) calend_adjactive++; } } @@ -792,7 +811,8 @@ mach_wait_until_trap( uint64_t deadline = args->deadline; wait_result_t wresult; - wresult = assert_wait_deadline((event_t)mach_wait_until_trap, THREAD_ABORTSAFE, deadline); + wresult = assert_wait_deadline_with_leeway((event_t)mach_wait_until_trap, THREAD_ABORTSAFE, + TIMEOUT_URGENCY_USER_NORMAL, deadline, 0); if (wresult == THREAD_WAITING) wresult = thread_block(mach_wait_until_continue); diff --git a/osfmk/kern/clock.h b/osfmk/kern/clock.h index fd31a1b9b..b2e4b77bf 100644 --- a/osfmk/kern/clock.h +++ b/osfmk/kern/clock.h @@ -182,6 +182,11 @@ extern void clock_get_calendar_microtime( clock_sec_t *secs, clock_usec_t *microsecs); +extern void clock_get_calendar_absolute_and_microtime( + clock_sec_t *secs, + clock_usec_t *microsecs, + uint64_t *abstime); + extern void clock_get_calendar_nanotime( clock_sec_t *secs, clock_nsec_t *nanosecs); @@ -259,9 +264,9 @@ extern void nanoseconds_to_absolutetime( #include /* Use mach_absolute_time() */ -extern mach_timespec_t clock_get_system_value(void) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_8, __IPHONE_2_0, __IPHONE_NA); +extern mach_timespec_t clock_get_system_value(void) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_8, __IPHONE_2_0, __IPHONE_6_0); -extern mach_timespec_t clock_get_calendar_value(void) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_8, __IPHONE_2_0, __IPHONE_NA); +extern mach_timespec_t clock_get_calendar_value(void) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_0, __MAC_10_8, __IPHONE_2_0, __IPHONE_6_0); #else /* __LP64__ */ @@ -277,42 +282,6 @@ extern void delay_for_interval( uint32_t interval, uint32_t scale_factor); -#ifndef MACH_KERNEL_PRIVATE - -#ifndef __LP64__ - -#ifndef ABSOLUTETIME_SCALAR_TYPE - -#define clock_get_uptime(a) \ - clock_get_uptime(__OSAbsoluteTimePtr(a)) - -#define clock_interval_to_deadline(a, b, c) \ - clock_interval_to_deadline((a), (b), __OSAbsoluteTimePtr(c)) - -#define clock_interval_to_absolutetime_interval(a, b, c) \ - clock_interval_to_absolutetime_interval((a), (b), __OSAbsoluteTimePtr(c)) - -#define clock_absolutetime_interval_to_deadline(a, b) \ - clock_absolutetime_interval_to_deadline(__OSAbsoluteTime(a), __OSAbsoluteTimePtr(b)) - -#define clock_deadline_for_periodic_event(a, b, c) \ - clock_deadline_for_periodic_event(__OSAbsoluteTime(a), __OSAbsoluteTime(b), __OSAbsoluteTimePtr(c)) - -#define clock_delay_until(a) \ - clock_delay_until(__OSAbsoluteTime(a)) - -#define absolutetime_to_nanoseconds(a, b) \ - absolutetime_to_nanoseconds(__OSAbsoluteTime(a), (b)) - -#define nanoseconds_to_absolutetime(a, b) \ - nanoseconds_to_absolutetime((a), __OSAbsoluteTimePtr(b)) - -#endif /* ABSOLUTETIME_SCALAR_TYPE */ - -#endif /* __LP64__ */ - -#endif /* MACH_KERNEL_PRIVATE */ - #endif /* KERNEL_PRIVATE */ __END_DECLS diff --git a/osfmk/kern/clock_oldops.c b/osfmk/kern/clock_oldops.c index 9a3e6c93f..13a9fa1c6 100644 --- a/osfmk/kern/clock_oldops.c +++ b/osfmk/kern/clock_oldops.c @@ -766,7 +766,7 @@ set_alarm( uint64_t abstime; nanotime_to_absolutetime(alarm_time->tv_sec, alarm_time->tv_nsec, &abstime); - timer_call_enter(&alarm_expire_timer, abstime, 0); + timer_call_enter_with_leeway(&alarm_expire_timer, NULL, abstime, 0, TIMER_CALL_USER_NORMAL, FALSE); } /* diff --git a/osfmk/kern/debug.c b/osfmk/kern/debug.c index b89774897..7ee11d7a9 100644 --- a/osfmk/kern/debug.c +++ b/osfmk/kern/debug.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -66,6 +66,7 @@ #include #include #include +#include #include #include #include @@ -108,18 +109,15 @@ unsigned int panic_is_inited = 0; unsigned int return_on_panic = 0; unsigned long panic_caller; -#if CONFIG_EMBEDDED -#define DEBUG_BUF_SIZE (PAGE_SIZE) -#else #define DEBUG_BUF_SIZE (3 * PAGE_SIZE) -#endif char debug_buf[DEBUG_BUF_SIZE]; char *debug_buf_ptr = debug_buf; unsigned int debug_buf_size = sizeof(debug_buf); static char model_name[64]; -/* uuid_string_t */ char kernel_uuid[37]; +unsigned char *kernel_uuid; +/* uuid_string_t */ char kernel_uuid_string[37]; static spl_t panic_prologue(const char *str); static void panic_epilogue(spl_t s); @@ -156,7 +154,15 @@ Assert( } saved_return_on_panic = return_on_panic; - return_on_panic = 1; + + /* + * If we don't have a debugger configured, returning from an + * assert is a bad, bad idea; there is no guarantee that we + * didn't simply assert before we were able to restart the + * platform. + */ + if (current_debugger != NO_CUR_DB) + return_on_panic = 1; panic_plain("%s:%d Assertion failed: %s", file, line, expression); @@ -181,7 +187,6 @@ MACRO_BEGIN \ simple_unlock(&panic_lock); \ MACRO_END - void panic_init(void) { @@ -190,7 +195,8 @@ panic_init(void) uuid = getuuidfromheader(&_mh_execute_header, &uuidlen); if ((uuid != NULL) && (uuidlen == sizeof(uuid_t))) { - uuid_unparse_upper(*(uuid_t *)uuid, kernel_uuid); + kernel_uuid = uuid; + uuid_unparse_upper(*(uuid_t *)uuid, kernel_uuid_string); } simple_lock_init(&panic_lock, 0); @@ -234,8 +240,10 @@ panic_prologue(const char *str) spl_t s; if (kdebug_enable) { - ml_set_interrupts_enabled(TRUE); - kdbg_dump_trace_to_file("/var/tmp/panic.trace"); + if (get_preemption_level() == 0 && !ml_at_interrupt_context()) { + ml_set_interrupts_enabled(TRUE); + kdbg_dump_trace_to_file("/var/tmp/panic.trace"); + } } s = splhigh(); @@ -316,6 +324,7 @@ panic(const char *str, ...) va_list listp; spl_t s; + /* panic_caller is initialized to 0. If set, don't change it */ if ( ! panic_caller ) panic_caller = (unsigned long)(char *)__builtin_return_address(0); @@ -343,6 +352,7 @@ panic_context(unsigned int reason, void *ctx, const char *str, ...) va_list listp; spl_t s; + /* panic_caller is initialized to 0. If set, don't change it */ if ( ! panic_caller ) panic_caller = (unsigned long)(char *)__builtin_return_address(0); @@ -474,22 +484,20 @@ static void panic_display_model_name(void) { } static void panic_display_kernel_uuid(void) { - char tmp_kernel_uuid[sizeof(kernel_uuid)]; + char tmp_kernel_uuid[sizeof(kernel_uuid_string)]; - if (ml_nofault_copy((vm_offset_t) &kernel_uuid, (vm_offset_t) &tmp_kernel_uuid, sizeof(kernel_uuid)) != sizeof(kernel_uuid)) + if (ml_nofault_copy((vm_offset_t) &kernel_uuid_string, (vm_offset_t) &tmp_kernel_uuid, sizeof(kernel_uuid_string)) != sizeof(kernel_uuid_string)) return; if (tmp_kernel_uuid[0] != '\0') kdb_printf("Kernel UUID: %s\n", tmp_kernel_uuid); } -static void panic_display_kernel_aslr(void) { -#if defined(__x86_64__) +void panic_display_kernel_aslr(void) { if (vm_kernel_slide) { - kdb_printf("Kernel slide: 0x%016lx\n", vm_kernel_slide); + kdb_printf("Kernel slide: 0x%016lx\n", (unsigned long) vm_kernel_slide); kdb_printf("Kernel text base: %p\n", (void *) vm_kernel_stext); } -#endif } static void panic_display_uptime(void) { @@ -605,11 +613,11 @@ __private_extern__ void panic_display_ztrace(void) #endif /* CONFIG_ZLEAKS */ #if !MACH_KDP -static struct ether_addr kdp_current_mac_address = {{0, 0, 0, 0, 0, 0}}; +static struct kdp_ether_addr kdp_current_mac_address = {{0, 0, 0, 0, 0, 0}}; /* XXX ugly forward declares to stop warnings */ void *kdp_get_interface(void); -void kdp_set_ip_and_mac_addresses(struct in_addr *, struct ether_addr *); +void kdp_set_ip_and_mac_addresses(struct kdp_in_addr *, struct kdp_ether_addr *); void kdp_set_gateway_mac(void *); void kdp_set_interface(void *); void kdp_register_send_receive(void *, void *); @@ -628,7 +636,7 @@ unsigned int kdp_get_ip_address(void ) { return 0; } -struct ether_addr +struct kdp_ether_addr kdp_get_mac_addr(void) { return kdp_current_mac_address; @@ -636,8 +644,8 @@ kdp_get_mac_addr(void) void kdp_set_ip_and_mac_addresses( - __unused struct in_addr *ipaddr, - __unused struct ether_addr *macaddr) + __unused struct kdp_in_addr *ipaddr, + __unused struct kdp_ether_addr *macaddr) {} void @@ -674,3 +682,10 @@ kdp_stack_snapshot_bytes_traced(void) } #endif + +#if !CONFIG_TELEMETRY +int telemetry_gather(user_addr_t buffer __unused, uint32_t *length __unused, boolean_t mark __unused) +{ + return KERN_NOT_SUPPORTED; +} +#endif diff --git a/osfmk/kern/debug.h b/osfmk/kern/debug.h index c773ca623..a61206294 100644 --- a/osfmk/kern/debug.h +++ b/osfmk/kern/debug.h @@ -31,6 +31,11 @@ #include #include +#include + +#ifndef XNU_KERNEL_PRIVATE +#include +#endif #ifdef __APPLE_API_PRIVATE #ifdef __APPLE_API_UNSTABLE @@ -45,6 +50,7 @@ struct thread_snapshot { uint64_t user_time; uint64_t system_time; int32_t state; + int32_t priority; // static priority int32_t sched_pri; // scheduled (current) priority int32_t sched_flags; // scheduler flags char ss_flags; @@ -53,23 +59,37 @@ struct thread_snapshot { struct task_snapshot { uint32_t snapshot_magic; int32_t pid; - uint32_t nloadinfos; + uint64_t uniqueid; uint64_t user_time_in_terminated_threads; uint64_t system_time_in_terminated_threads; + uint8_t shared_cache_identifier[16]; + uint64_t shared_cache_slide; + uint32_t nloadinfos; int suspend_count; int task_size; // pages int faults; // number of page faults int pageins; // number of actual pageins int cow_faults; // number of copy-on-write faults - char ss_flags; + uint32_t ss_flags; /* We restrict ourselves to a statically defined * (current as of 2009) length for the * p_comm string, due to scoping issues (osfmk/bsd and user/kernel * binary compatibility). */ char p_comm[17]; + uint32_t was_throttled; + uint32_t did_throttle; + uint32_t latency_qos; } __attribute__ ((packed)); +struct micro_snapshot { + uint32_t snapshot_magic; + uint32_t ms_cpu; /* cpu number this snapshot was recorded on */ + uint64_t ms_time; /* time at sample (seconds) */ + uint64_t ms_time_microsecs; + uint8_t ms_flags; + uint16_t ms_opaque_flags; /* managed by external entity, e.g. fdrmicrod */ +} __attribute__ ((packed)); struct mem_and_io_snapshot { uint32_t snapshot_magic; @@ -80,33 +100,107 @@ struct mem_and_io_snapshot { uint32_t wired_pages; uint32_t speculative_pages; uint32_t throttled_pages; + uint32_t filebacked_pages; + uint32_t compressions; + uint32_t decompressions; + uint32_t compressor_size; int busy_buffer_count; uint32_t pages_wanted; uint32_t pages_reclaimed; uint8_t pages_wanted_reclaimed_valid; // did mach_vm_pressure_monitor succeed? } __attribute__((packed)); +struct stack_snapshot_frame32 { + uint32_t lr; + uint32_t sp; +}; -enum { - kUser64_p = 0x1, - kKernel64_p = 0x2, - kHasDispatchSerial = 0x4, - kTerminatedSnapshot = 0x8, - kPidSuspended = 0x10, // true for suspended task - kFrozen = 0x20 // true for hibernated task (along with pidsuspended) +struct stack_snapshot_frame64 { + uint64_t lr; + uint64_t sp; +}; + +struct _dyld_cache_header +{ + char magic[16]; // e.g. "dyld_v0 i386" + uint32_t mappingOffset; // file offset to first dyld_cache_mapping_info + uint32_t mappingCount; // number of dyld_cache_mapping_info entries + uint32_t imagesOffset; // file offset to first dyld_cache_image_info + uint32_t imagesCount; // number of dyld_cache_image_info entries + uint64_t dyldBaseAddress; // base address of dyld when cache was built + uint64_t codeSignatureOffset; // file offset of code signature blob + uint64_t codeSignatureSize; // size of code signature blob (zero means to end of file) + uint64_t slideInfoOffset; // file offset of kernel slid info + uint64_t slideInfoSize; // size of kernel slid info + uint64_t localSymbolsOffset; // file offset of where local symbols are stored + uint64_t localSymbolsSize; // size of local symbols information + uint8_t uuid[16]; // unique value for each shared cache file +}; + +struct dyld_uuid_info_32 { + uint32_t imageLoadAddress; /* base address image is mapped at */ + uuid_t imageUUID; +}; + +struct dyld_uuid_info_64 { + uint64_t imageLoadAddress; /* base address image is mapped at */ + uuid_t imageUUID; +}; + +enum micro_snapshot_flags { + kInterruptRecord = 0x1, + kTimerArmingRecord = 0x2, + kUserMode = 0x4, /* interrupted usermode, or armed by usermode */ +}; + +/* + * Flags used in the following assortment of snapshots. + */ +enum generic_snapshot_flags { + kUser64_p = 0x1, + kKernel64_p = 0x2 +}; + + enum task_snapshot_flags { + kTaskRsrcFlagged = 0x4, // In the EXC_RESOURCE danger zone? + kTerminatedSnapshot = 0x8, + kPidSuspended = 0x10, // true for suspended task + kFrozen = 0x20, // true for hibernated task (along with pidsuspended) + kTaskDarwinBG = 0x40, + kTaskExtDarwinBG = 0x80, + kTaskVisVisible = 0x100, + kTaskVisNonvisible = 0x200, + kTaskIsForeground = 0x400, + kTaskIsBoosted = 0x800, + kTaskIsSuppressed = 0x1000, + kTaskIsTimerThrottled = 0x2000 /* deprecated */ + }; + +enum thread_snapshot_flags { + kHasDispatchSerial = 0x4, + kStacksPCOnly = 0x8, /* Stack traces have no frame pointers. */ + kThreadDarwinBG = 0x10 /* Thread is darwinbg */ }; #define VM_PRESSURE_TIME_WINDOW 5 /* seconds */ enum { - STACKSHOT_GET_DQ = 0x1, - STACKSHOT_SAVE_LOADINFO = 0x2, - STACKSHOT_GET_GLOBAL_MEM_STATS = 0x4 + STACKSHOT_GET_DQ = 0x01, + STACKSHOT_SAVE_LOADINFO = 0x02, + STACKSHOT_GET_GLOBAL_MEM_STATS = 0x04, + STACKSHOT_SAVE_KEXT_LOADINFO = 0x08, + STACKSHOT_GET_MICROSTACKSHOT = 0x10, + STACKSHOT_GLOBAL_MICROSTACKSHOT_ENABLE = 0x20, + STACKSHOT_GLOBAL_MICROSTACKSHOT_DISABLE = 0x40, + STACKSHOT_SET_MICROSTACKSHOT_MARK = 0x80, + STACKSHOT_SAVE_KERNEL_FRAMES_ONLY = 0x100, + STACKSHOT_GET_BOOT_PROFILE = 0x200, }; #define STACKSHOT_THREAD_SNAPSHOT_MAGIC 0xfeedface #define STACKSHOT_TASK_SNAPSHOT_MAGIC 0xdecafbad #define STACKSHOT_MEM_AND_IO_SNAPSHOT_MAGIC 0xbfcabcde +#define STACKSHOT_MICRO_SNAPSHOT_MAGIC 0x31c54011 #endif /* __APPLE_API_UNSTABLE */ #endif /* __APPLE_API_PRIVATE */ @@ -116,7 +210,8 @@ enum { extern unsigned int systemLogDiags; extern char debug_buf[]; extern unsigned int debug_boot_arg; -extern char kernel_uuid[]; +extern unsigned char *kernel_uuid; +extern char kernel_uuid_string[]; #ifdef MACH_KERNEL_PRIVATE @@ -166,6 +261,7 @@ void unpackA(char *inbuf, uint32_t length); void panic_display_system_configuration(void); void panic_display_zprint(void); +void panic_display_kernel_aslr(void); #if CONFIG_ZLEAKS void panic_display_ztrace(void); #endif /* CONFIG_ZLEAKS */ @@ -190,6 +286,7 @@ void panic_display_ztrace(void); * post-panic crashdump/paniclog * dump. */ +#define DB_NMI_BTN_ENA 0x8000 /* Enable button to directly trigger NMI */ #if DEBUG /* diff --git a/osfmk/kern/exc_resource.h b/osfmk/kern/exc_resource.h new file mode 100644 index 000000000..a48f12608 --- /dev/null +++ b/osfmk/kern/exc_resource.h @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2011-2012 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * Mach Operating System + * Copyright (c) 1989 Carnegie-Mellon University + * Copyright (c) 1988 Carnegie-Mellon University + * Copyright (c) 1987 Carnegie-Mellon University + * All rights reserved. The CMU software License Agreement specifies + * the terms and conditions for use and redistribution. + */ + +/* + * EXC_RESOURCE related macros, namespace etc. + */ + +#ifndef _EXC_RESOURCE_H_ +#define _EXC_RESOURCE_H_ + +/* + * Generic exception code format: + * + * code: + * +----------------------------------------------------------+ + * |[63:61] type | [60:58] flavor | [57:0] type-specific data | + * +----------------------------------------------------------+ + */ + + +/* EXC_RESOURCE type and flavor decoding routines */ +#define EXC_RESOURCE_DECODE_RESOURCE_TYPE(code) \ + (((code) >> 61) & 0x7ULL) +#define EXC_RESOURCE_DECODE_FLAVOR(code) \ + (((code) >> 58) & 0x7ULL) + +/* EXC_RESOURCE Types */ +#define RESOURCE_TYPE_CPU 1 +#define RESOURCE_TYPE_WAKEUPS 2 +#define RESOURCE_TYPE_MEMORY 3 + +/* RESOURCE_TYPE_CPU flavors */ +#define FLAVOR_CPU_MONITOR 1 + +/* + * RESOURCE_TYPE_CPU exception code & subcode. + * + * This is sent by the kernel when the CPU usage monitor + * is tripped. [See proc_set_cpumon_params()] + * + * code: + * +-----------------------------------------------+ + * |[63:61] RESOURCE |[60:58] FLAVOR_CPU_ |[57:32] | + * |_TYPE_CPU |MONITOR |Unused | + * +-----------------------------------------------+ + * |[31:7] Interval (sec) | [6:0] CPU limit (%)| + * +-----------------------------------------------+ + * + * subcode: + * +-----------------------------------------------+ + * | | [6:0] % of CPU | + * | | actually consumed | + * +-----------------------------------------------+ + * + */ + +/* RESOURCE_TYPE_CPU decoding macros */ +#define EXC_RESOURCE_CPUMONITOR_DECODE_INTERVAL(code) \ + (((code) >> 7) & 0x1FFFFFFULL) +#define EXC_RESOURCE_CPUMONITOR_DECODE_PERCENTAGE(code) \ + ((code) & 0x7FULL) +#define EXC_RESOURCE_CPUMONITOR_DECODE_PERCENTAGE_OBSERVED(subcode) \ + ((subcode) & 0x7FULL) + + +/* RESOURCE_TYPE_WAKEUPS flavors */ +#define FLAVOR_WAKEUPS_MONITOR 1 + +/* + * RESOURCE_TYPE_WAKEUPS exception code & subcode. + * + * This is sent by the kernel when the platform idle + * wakeups monitor is tripped. + * [See proc_set_wakeupsmon_params()] + * + * code: + * +-----------------------------------------------+ + * |[63:61] RESOURCE |[60:58] FLAVOR_ |[57:32] | + * |_TYPE_WAKEUPS |WAKEUPS_MONITOR |Unused | + * +-----------------------------------------------+ + * | [31:20] Observation | [19:0] # of wakeups | + * | interval (sec) | permitted (per sec) | + * +-----------------------------------------------+ + * + * subcode: + * +-----------------------------------------------+ + * | | [19:0] # of wakeups | + * | | observed (per sec) | + * +-----------------------------------------------+ + * + */ + +#define EXC_RESOURCE_CPUMONITOR_DECODE_WAKEUPS_PERMITTED(code) \ + ((code) & 0xFFFULL) +#define EXC_RESOURCE_CPUMONITOR_DECODE_OBSERVATION_INTERVAL(code) \ + (((code) >> 20) & 0xFFFFFULL) +#define EXC_RESOURCE_CPUMONITOR_DECODE_WAKEUPS_OBSERVED(subcode) \ + ((subcode) & 0xFFFFFULL) + +/* RESOURCE_TYPE_MEMORY flavors */ +#define FLAVOR_HIGH_WATERMARK 1 + +/* + * RESOURCE_TYPE_MEMORY / FLAVOR_HIGH_WATERMARK + * exception code & subcode. + * + * This is sent by the kernel when a task crosses its high + * watermark memory limit. + * + * code: + * +------------------------------------------------+ + * |[63:61] RESOURCE |[60:58] FLAVOR_HIGH_ |[57:32] | + * |_TYPE_MEMORY |WATERMARK |Unused | + * +------------------------------------------------+ + * | | [12:0] HWM limit (MB)| + * +------------------------------------------------+ + * + * subcode: + * +------------------------------------------------+ + * | unused | + * +------------------------------------------------+ + * + */ + +#define EXC_RESOURCE_HWM_DECODE_LIMIT(code) \ + ((code) & 0x1FFFULL) + + +#ifdef KERNEL + +/* EXC_RESOURCE type and flavor encoding macros */ +#define EXC_RESOURCE_ENCODE_TYPE(code, type) \ + ((code) |= (((uint64_t)(type) & 0x7ULL) << 61)) +#define EXC_RESOURCE_ENCODE_FLAVOR(code, flavor) \ + ((code) |= (((uint64_t)(flavor) & 0x7ULL) << 58)) + +/* RESOURCE_TYPE_CPU::FLAVOR_CPU_MONITOR specific encoding macros */ +#define EXC_RESOURCE_CPUMONITOR_ENCODE_INTERVAL(code, interval) \ + ((code) |= (((uint64_t)(interval) & 0x1FFFFFFULL) << 7)) +#define EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code, percentage) \ + ((code) |= (((uint64_t)(percentage) & 0x7FULL))) + +/* RESOURCE_TYPE_WAKEUPS::FLAVOR_WAKEUPS_MONITOR specific encoding macros */ +#define EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code, num) \ + ((code) |= ((uint64_t)(num) & 0xFFFFFULL)) +#define EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code, num) \ + ((code) |= (((uint64_t)(num) & 0xFFFULL) << 20)) +#define EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(subcode, num) \ + ((subcode) |= ((uint64_t)(num) & 0xFFFFFULL)) + +/* RESOURCE_TYPE_MEMORY::FLAVOR_HIGH_WATERMARK specific encoding macros */ +#define EXC_RESOURCE_HWM_ENCODE_LIMIT(code, num) \ + ((code) |= ((uint64_t)(num) & 0x1FFFULL)) + +#endif /* KERNEL */ + + +#endif /* _EXC_RESOURCE_H_ */ diff --git a/osfmk/kern/exception.c b/osfmk/kern/exception.c index 8df6e268b..d8128145d 100644 --- a/osfmk/kern/exception.c +++ b/osfmk/kern/exception.c @@ -144,6 +144,19 @@ exception_deliver( if (!thread->active) return KERN_SUCCESS; + /* + * If there are no exception actions defined for this entity, + * we can't deliver here. + */ + if (excp == NULL) + return KERN_FAILURE; + + assert(exception < EXC_TYPES_COUNT); + if (exception >= EXC_TYPES_COUNT) + return KERN_FAILURE; + + excp = &excp[exception]; + /* * Snapshot the exception action data under lock for consistency. * Hold a reference to the port over the exception_raise_* calls @@ -305,9 +318,8 @@ exception_triage( thread_t thread; task_t task; host_priv_t host_priv; - struct exception_action *excp; - lck_mtx_t *mutex; - kern_return_t kr; + lck_mtx_t *mutex; + kern_return_t kr; assert(exception != EXC_RPC_ALERT); @@ -317,8 +329,7 @@ exception_triage( * Try to raise the exception at the activation level. */ mutex = &thread->mutex; - excp = &thread->exc_actions[exception]; - kr = exception_deliver(thread, exception, code, codeCnt, excp, mutex); + kr = exception_deliver(thread, exception, code, codeCnt, thread->exc_actions, mutex); if (kr == KERN_SUCCESS || kr == MACH_RCV_PORT_DIED) goto out; @@ -327,8 +338,7 @@ exception_triage( */ task = current_task(); mutex = &task->lock; - excp = &task->exc_actions[exception]; - kr = exception_deliver(thread, exception, code, codeCnt, excp, mutex); + kr = exception_deliver(thread, exception, code, codeCnt, task->exc_actions, mutex); if (kr == KERN_SUCCESS || kr == MACH_RCV_PORT_DIED) goto out; @@ -337,8 +347,7 @@ exception_triage( */ host_priv = host_priv_self(); mutex = &host_priv->lock; - excp = &host_priv->exc_actions[exception]; - kr = exception_deliver(thread, exception, code, codeCnt, excp, mutex); + kr = exception_deliver(thread, exception, code, codeCnt, host_priv->exc_actions, mutex); if (kr == KERN_SUCCESS || kr == MACH_RCV_PORT_DIED) goto out; @@ -349,7 +358,8 @@ exception_triage( (void) task_terminate(task); out: - if ((exception != EXC_CRASH) && (exception != EXC_RESOURCE)) + if ((exception != EXC_CRASH) && (exception != EXC_RESOURCE) && + (exception != EXC_GUARD)) thread_exception_return(); return; } @@ -361,7 +371,6 @@ bsd_exception( mach_msg_type_number_t codeCnt) { task_t task; - struct exception_action *excp; lck_mtx_t *mutex; thread_t self = current_thread(); kern_return_t kr; @@ -371,9 +380,8 @@ bsd_exception( */ task = current_task(); mutex = &task->lock; - excp = &task->exc_actions[exception]; - kr = exception_deliver(self, exception, code, codeCnt, excp, mutex); + kr = exception_deliver(self, exception, code, codeCnt, task->exc_actions, mutex); if (kr == KERN_SUCCESS || kr == MACH_RCV_PORT_DIED) return(KERN_SUCCESS); @@ -408,7 +416,6 @@ kern_return_t task_exception_notify(exception_type_t exception, kern_return_t sys_perf_notify(thread_t thread, int pid) { host_priv_t hostp; - struct exception_action *excp; ipc_port_t xport; wait_interrupt_t wsave; kern_return_t ret; @@ -419,8 +426,7 @@ kern_return_t sys_perf_notify(thread_t thread, int pid) code[1] = pid; /* Pass out the pid */ struct task *task = thread->task; - excp = &hostp->exc_actions[EXC_RPC_ALERT]; - xport = excp->port; + xport = hostp->exc_actions[EXC_RPC_ALERT].port; /* Make sure we're not catching our own exception */ if (!IP_VALID(xport) || @@ -436,7 +442,7 @@ kern_return_t sys_perf_notify(thread_t thread, int pid) EXC_RPC_ALERT, code, 2, - excp, + hostp->exc_actions, &hostp->lock); (void)thread_interrupt_level(wsave); diff --git a/osfmk/kern/hibernate.c b/osfmk/kern/hibernate.c index 431c4795c..7b3d38563 100644 --- a/osfmk/kern/hibernate.c +++ b/osfmk/kern/hibernate.c @@ -41,47 +41,84 @@ #include #include #include +#include /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +boolean_t need_to_unlock_decompressor = FALSE; + kern_return_t -hibernate_setup(IOHibernateImageHeader * header, - uint32_t free_page_ratio, - uint32_t free_page_time, - boolean_t vmflush, - hibernate_page_list_t ** page_list_ret, - hibernate_page_list_t ** page_list_wired_ret, - hibernate_page_list_t ** page_list_pal_ret) +hibernate_alloc_page_lists( + hibernate_page_list_t ** page_list_ret, + hibernate_page_list_t ** page_list_wired_ret, + hibernate_page_list_t ** page_list_pal_ret) { + kern_return_t retval = KERN_SUCCESS; + hibernate_page_list_t * page_list = NULL; hibernate_page_list_t * page_list_wired = NULL; hibernate_page_list_t * page_list_pal = NULL; - uint32_t gobble_count; - - *page_list_ret = NULL; - *page_list_wired_ret = NULL; - *page_list_pal_ret = NULL; - - if (vmflush && dp_isssd) - hibernate_flush_memory(); page_list = hibernate_page_list_allocate(TRUE); - if (!page_list) - return (KERN_RESOURCE_SHORTAGE); + if (!page_list) { + + retval = KERN_RESOURCE_SHORTAGE; + goto done; + } page_list_wired = hibernate_page_list_allocate(FALSE); if (!page_list_wired) { - kfree(page_list, page_list->list_size); - return (KERN_RESOURCE_SHORTAGE); + kfree(page_list, page_list->list_size); + + retval = KERN_RESOURCE_SHORTAGE; + goto done; } page_list_pal = hibernate_page_list_allocate(FALSE); if (!page_list_pal) { - kfree(page_list, page_list->list_size); - kfree(page_list_wired, page_list_wired->list_size); - return (KERN_RESOURCE_SHORTAGE); + kfree(page_list, page_list->list_size); + kfree(page_list_wired, page_list_wired->list_size); + + retval = KERN_RESOURCE_SHORTAGE; + goto done; + } + *page_list_ret = page_list; + *page_list_wired_ret = page_list_wired; + *page_list_pal_ret = page_list_pal; + +done: + return (retval); + +} + +extern int sync_internal(void); + +kern_return_t +hibernate_setup(IOHibernateImageHeader * header, + uint32_t free_page_ratio, + uint32_t free_page_time, + boolean_t vmflush, + hibernate_page_list_t * page_list, + hibernate_page_list_t * page_list_wired __unused, + hibernate_page_list_t * page_list_pal __unused) +{ + uint32_t gobble_count; + kern_return_t retval = KERN_SUCCESS; + + hibernate_create_paddr_map(); + + if (vmflush && (COMPRESSED_PAGER_IS_ACTIVE || dp_isssd)) { + + sync_internal(); + + if (COMPRESSED_PAGER_IS_ACTIVE) { + vm_decompressor_lock(); + need_to_unlock_decompressor = TRUE; + } + hibernate_flush_memory(); } + // pages we could force out to reduce hibernate image size gobble_count = (uint32_t)((((uint64_t) page_list->page_count) * ((uint64_t) free_page_ratio)) / 100); @@ -90,18 +127,18 @@ hibernate_setup(IOHibernateImageHeader * header, hibernate_processor_setup(header); if (gobble_count) - hibernate_gobble_pages(gobble_count, free_page_time); + hibernate_gobble_pages(gobble_count, free_page_time); HIBLOG("hibernate_alloc_pages act %d, inact %d, anon %d, throt %d, spec %d, wire %d, wireinit %d\n", vm_page_active_count, vm_page_inactive_count, vm_page_anonymous_count, vm_page_throttled_count, vm_page_speculative_count, vm_page_wire_count, vm_page_wire_count_initial); - *page_list_ret = page_list; - *page_list_wired_ret = page_list_wired; - *page_list_pal_ret = page_list_pal; - - return (KERN_SUCCESS); + if (retval != KERN_SUCCESS && need_to_unlock_decompressor == TRUE) { + need_to_unlock_decompressor = FALSE; + vm_decompressor_unlock(); + } + return (retval); } kern_return_t @@ -118,6 +155,13 @@ hibernate_teardown(hibernate_page_list_t * page_list, if (page_list_pal) kfree(page_list_pal, page_list_pal->list_size); + if (COMPRESSED_PAGER_IS_ACTIVE) { + if (need_to_unlock_decompressor == TRUE) { + need_to_unlock_decompressor = FALSE; + vm_decompressor_unlock(); + } + vm_compressor_do_warmup(); + } return (KERN_SUCCESS); } diff --git a/osfmk/kern/host.c b/osfmk/kern/host.c index 75b1e5f83..e61673978 100644 --- a/osfmk/kern/host.c +++ b/osfmk/kern/host.c @@ -75,6 +75,7 @@ #include #include #include +#include #include #include @@ -87,6 +88,8 @@ #include #include +#include +#include host_data_t realhost; @@ -258,6 +261,17 @@ host_info( return (KERN_SUCCESS); } + case HOST_VM_PURGABLE: + { + if (*count < HOST_VM_PURGABLE_COUNT) + return (KERN_FAILURE); + + vm_purgeable_stats((vm_purgeable_info_t) info, NULL); + + *count = HOST_VM_PURGABLE_COUNT; + return (KERN_SUCCESS); + } + default: return (KERN_INVALID_ARGUMENT); } @@ -344,11 +358,7 @@ host_statistics( } } stat32->inactive_count = VM_STATISTICS_TRUNCATE_TO_32_BIT(vm_page_inactive_count); -#if CONFIG_EMBEDDED - stat32->wire_count = VM_STATISTICS_TRUNCATE_TO_32_BIT(vm_page_wire_count); -#else stat32->wire_count = VM_STATISTICS_TRUNCATE_TO_32_BIT(vm_page_wire_count + vm_page_throttled_count + vm_lopage_free_count); -#endif stat32->zero_fill_count = VM_STATISTICS_TRUNCATE_TO_32_BIT(host_vm_stat.zero_fill_count); stat32->reactivations = VM_STATISTICS_TRUNCATE_TO_32_BIT(host_vm_stat.reactivations); stat32->pageins = VM_STATISTICS_TRUNCATE_TO_32_BIT(host_vm_stat.pageins); @@ -391,11 +401,14 @@ host_statistics( if (*count < HOST_CPU_LOAD_INFO_COUNT) return (KERN_FAILURE); -#define GET_TICKS_VALUE(processor, state, timer) \ +#define GET_TICKS_VALUE(state, ticks) \ MACRO_BEGIN \ cpu_load_info->cpu_ticks[(state)] += \ - (uint32_t)(timer_grab(&PROCESSOR_DATA(processor, timer)) \ - / hz_tick_interval); \ + (uint32_t)(ticks / hz_tick_interval); \ +MACRO_END +#define GET_TICKS_VALUE_FROM_TIMER(processor, state, timer) \ +MACRO_BEGIN \ + GET_TICKS_VALUE(state, timer_grab(&PROCESSOR_DATA(processor, timer))); \ MACRO_END cpu_load_info = (host_cpu_load_info_t)info; @@ -407,28 +420,39 @@ MACRO_END simple_lock(&processor_list_lock); for (processor = processor_list; processor != NULL; processor = processor->processor_list) { - timer_data_t idle_temp; - timer_t idle_state; + timer_t idle_state; + uint64_t idle_time_snapshot1, idle_time_snapshot2; + uint64_t idle_time_tstamp1, idle_time_tstamp2; + + /* See discussion in processor_info(PROCESSOR_CPU_LOAD_INFO) */ - GET_TICKS_VALUE(processor, CPU_STATE_USER, user_state); + GET_TICKS_VALUE_FROM_TIMER(processor, CPU_STATE_USER, user_state); if (precise_user_kernel_time) { - GET_TICKS_VALUE(processor, CPU_STATE_SYSTEM, system_state); + GET_TICKS_VALUE_FROM_TIMER(processor, CPU_STATE_SYSTEM, system_state); } else { /* system_state may represent either sys or user */ - GET_TICKS_VALUE(processor, CPU_STATE_USER, system_state); + GET_TICKS_VALUE_FROM_TIMER(processor, CPU_STATE_USER, system_state); } idle_state = &PROCESSOR_DATA(processor, idle_state); - idle_temp = *idle_state; - - if (PROCESSOR_DATA(processor, current_state) != idle_state || - timer_grab(&idle_temp) != timer_grab(idle_state)) - GET_TICKS_VALUE(processor, CPU_STATE_IDLE, idle_state); - else { - timer_advance(&idle_temp, mach_absolute_time() - idle_temp.tstamp); - - cpu_load_info->cpu_ticks[CPU_STATE_IDLE] += - (uint32_t)(timer_grab(&idle_temp) / hz_tick_interval); + idle_time_snapshot1 = timer_grab(idle_state); + idle_time_tstamp1 = idle_state->tstamp; + + if (PROCESSOR_DATA(processor, current_state) != idle_state) { + /* Processor is non-idle, so idle timer should be accurate */ + GET_TICKS_VALUE_FROM_TIMER(processor, CPU_STATE_IDLE, idle_state); + } else if ((idle_time_snapshot1 != (idle_time_snapshot2 = timer_grab(idle_state))) || + (idle_time_tstamp1 != (idle_time_tstamp2 = idle_state->tstamp))){ + /* Idle timer is being updated concurrently, second stamp is good enough */ + GET_TICKS_VALUE(CPU_STATE_IDLE, idle_time_snapshot2); + } else { + /* + * Idle timer may be very stale. Fortunately we have established + * that idle_time_snapshot1 and idle_time_tstamp1 are unchanging + */ + idle_time_snapshot1 += mach_absolute_time() - idle_time_tstamp1; + + GET_TICKS_VALUE(CPU_STATE_IDLE, idle_time_snapshot1); } } simple_unlock(&processor_list_lock); @@ -450,19 +474,21 @@ MACRO_END tinfo->task_platform_idle_wakeups = dead_task_statistics.task_platform_idle_wakeups; tinfo->task_timer_wakeups_bin_1 = dead_task_statistics.task_timer_wakeups_bin_1; + tinfo->task_timer_wakeups_bin_2 = dead_task_statistics.task_timer_wakeups_bin_2; tinfo->total_user = dead_task_statistics.total_user_time; tinfo->total_system = dead_task_statistics.total_system_time; return (KERN_SUCCESS); - } + } default: return (KERN_INVALID_ARGUMENT); } } +extern uint32_t c_segment_pages_compressed; kern_return_t host_statistics64( @@ -483,8 +509,11 @@ host_statistics64( register processor_t processor; register vm_statistics64_t stat; vm_statistics64_data_t host_vm_stat; + mach_msg_type_number_t original_count; + unsigned int local_q_internal_count; + unsigned int local_q_external_count; - if (*count < HOST_VM_INFO64_COUNT) + if (*count < HOST_VM_INFO64_REV0_COUNT) return (KERN_FAILURE); processor = processor_list; @@ -505,6 +534,10 @@ host_statistics64( host_vm_stat.cow_faults += stat->cow_faults; host_vm_stat.lookups += stat->lookups; host_vm_stat.hits += stat->hits; + host_vm_stat.compressions += stat->compressions; + host_vm_stat.decompressions += stat->decompressions; + host_vm_stat.swapins += stat->swapins; + host_vm_stat.swapouts += stat->swapouts; } simple_unlock(&processor_list_lock); @@ -515,6 +548,8 @@ host_statistics64( stat->free_count = vm_page_free_count + vm_page_speculative_count; stat->active_count = vm_page_active_count; + local_q_internal_count = 0; + local_q_external_count = 0; if (vm_page_local_q) { for (i = 0; i < vm_page_local_q_count; i++) { struct vpl *lq; @@ -522,14 +557,14 @@ host_statistics64( lq = &vm_page_local_q[i].vpl_un.vpl; stat->active_count += lq->vpl_count; + local_q_internal_count += + lq->vpl_internal_count; + local_q_external_count += + lq->vpl_external_count; } } stat->inactive_count = vm_page_inactive_count; -#if CONFIG_EMBEDDED - stat->wire_count = vm_page_wire_count; -#else stat->wire_count = vm_page_wire_count + vm_page_throttled_count + vm_lopage_free_count; -#endif stat->zero_fill_count = host_vm_stat.zero_fill_count; stat->reactivations = host_vm_stat.reactivations; stat->pageins = host_vm_stat.pageins; @@ -539,14 +574,41 @@ host_statistics64( stat->lookups = host_vm_stat.lookups; stat->hits = host_vm_stat.hits; - /* rev1 added "purgable" info */ stat->purgeable_count = vm_page_purgeable_count; stat->purges = vm_page_purged_count; - /* rev2 added "speculative" info */ stat->speculative_count = vm_page_speculative_count; - *count = HOST_VM_INFO64_COUNT; + /* + * Fill in extra info added in later revisions of the + * vm_statistics data structure. Fill in only what can fit + * in the data structure the caller gave us ! + */ + original_count = *count; + *count = HOST_VM_INFO64_REV0_COUNT; /* rev0 already filled in */ + if (original_count >= HOST_VM_INFO64_REV1_COUNT) { + /* rev1 added "throttled count" */ + stat->throttled_count = vm_page_throttled_count; + /* rev1 added "compression" info */ + stat->compressor_page_count = VM_PAGE_COMPRESSOR_COUNT; + stat->compressions = host_vm_stat.compressions; + stat->decompressions = host_vm_stat.decompressions; + stat->swapins = host_vm_stat.swapins; + stat->swapouts = host_vm_stat.swapouts; + /* rev1 added: + * "external page count" + * "anonymous page count" + * "total # of pages (uncompressed) held in the compressor" + */ + stat->external_page_count = + (vm_page_pageable_external_count + + local_q_external_count); + stat->internal_page_count = + (vm_page_pageable_internal_count + + local_q_internal_count); + stat->total_uncompressed_pages_in_compressor = c_segment_pages_compressed; + *count = HOST_VM_INFO64_REV1_COUNT; + } return(KERN_SUCCESS); } @@ -662,7 +724,8 @@ host_page_size( if (host == HOST_NULL) return(KERN_INVALID_ARGUMENT); - *out_page_size = PAGE_SIZE; + vm_map_t map = get_task_map(current_task()); + *out_page_size = vm_map_page_size(map); return(KERN_SUCCESS); } @@ -779,7 +842,8 @@ host_processor_info( assert(pcount != 0); needed = pcount * icount * sizeof(natural_t); - size = round_page(needed); + size = vm_map_round_page(needed, + VM_MAP_PAGE_MASK(ipc_kernel_map)); result = kmem_alloc(ipc_kernel_map, &addr, size); if (result != KERN_SUCCESS) return (KERN_RESOURCE_SHORTAGE); @@ -813,8 +877,13 @@ host_processor_info( if (size != needed) bzero((char *) addr + needed, size - needed); - result = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr), - vm_map_round_page(addr + size), FALSE); + result = vm_map_unwire( + ipc_kernel_map, + vm_map_trunc_page(addr, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + vm_map_round_page(addr + size, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + FALSE); assert(result == KERN_SUCCESS); result = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, (vm_map_size_t)size, TRUE, ©); diff --git a/osfmk/kern/host_notify.h b/osfmk/kern/host_notify.h index 180805c85..85bfa608d 100644 --- a/osfmk/kern/host_notify.h +++ b/osfmk/kern/host_notify.h @@ -45,7 +45,7 @@ void host_notify_port_destroy( void host_notify_calendar_change(void); -void host_notify_init(void) __attribute__((section("__TEXT, initcode"))); +void host_notify_init(void); #endif /* MACH_KERNEL_PRIVATE */ diff --git a/osfmk/kern/host_statistics.h b/osfmk/kern/host_statistics.h index efe59d74e..c67af697e 100644 --- a/osfmk/kern/host_statistics.h +++ b/osfmk/kern/host_statistics.h @@ -48,4 +48,9 @@ MACRO_BEGIN \ OSAddAtomic64(1, (SInt64 *) (&(PROCESSOR_DATA(current_processor(), vm_stat).event))); \ MACRO_END +#define VM_STAT_INCR_BY(event, amount) \ +MACRO_BEGIN \ + OSAddAtomic64((amount), (SInt64 *) (&(PROCESSOR_DATA(current_processor(), vm_stat).event))); \ +MACRO_END + #endif /* _KERN_HOST_STATISTICS_H_ */ diff --git a/osfmk/kern/ipc_host.c b/osfmk/kern/ipc_host.c index 65f1035fe..a0a7ccef5 100644 --- a/osfmk/kern/ipc_host.c +++ b/osfmk/kern/ipc_host.c @@ -705,15 +705,15 @@ host_swap_exception_ports( return KERN_INVALID_ARGUMENT; } } + /* Cannot easily check "new_flavor", but that just means that * the flavor in the generated exception message might be garbage: * GIGO */ host_lock(host_priv); - count = 0; - - for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) { + assert(EXC_TYPES_COUNT > FIRST_EXCEPTION); + for (count=0, i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT && count < *CountCnt; i++) { if (exception_mask & (1 << i)) { for (j = 0; j < count; j++) { /* @@ -741,9 +741,6 @@ host_swap_exception_ports( ipc_port_copy_send(new_port); host_priv->exc_actions[i].behavior = new_behavior; host_priv->exc_actions[i].flavor = new_flavor; - if (count > *CountCnt) { - break; - } } else old_port[i] = IP_NULL; }/* for */ @@ -752,9 +749,11 @@ host_swap_exception_ports( /* * Consume send rights without any lock held. */ - for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) + while (--i >= FIRST_EXCEPTION) { if (IP_VALID(old_port[i])) ipc_port_release_send(old_port[i]); + } + if (IP_VALID(new_port)) /* consume send right */ ipc_port_release_send(new_port); *CountCnt = count; diff --git a/osfmk/kern/ipc_host.h b/osfmk/kern/ipc_host.h index 863be913f..e34d69806 100644 --- a/osfmk/kern/ipc_host.h +++ b/osfmk/kern/ipc_host.h @@ -63,7 +63,7 @@ #include /* Initialize IPC host services */ -extern void ipc_host_init(void) __attribute__((section("__TEXT, initcode"))); +extern void ipc_host_init(void); /* Initialize ipc access to processor by allocating a port */ extern void ipc_processor_init( diff --git a/osfmk/kern/ipc_kobject.c b/osfmk/kern/ipc_kobject.c index 89b8f9e68..bb224e8f0 100644 --- a/osfmk/kern/ipc_kobject.c +++ b/osfmk/kern/ipc_kobject.c @@ -73,7 +73,6 @@ #include #include #include -#include #include #include @@ -139,6 +138,9 @@ #include +extern char *proc_name_address(void *p); +extern int proc_pid(void *p); + /* * Routine: ipc_kobject_notify * Purpose: @@ -592,6 +594,11 @@ ipc_kobject_notify( return iokit_notify(request_header); } #endif + case IKOT_TASK_RESUME: + { + return task_suspension_notify(request_header); + } + default: return FALSE; } diff --git a/osfmk/kern/ipc_kobject.h b/osfmk/kern/ipc_kobject.h index 9cf8ffb58..d5bfe7895 100644 --- a/osfmk/kern/ipc_kobject.h +++ b/osfmk/kern/ipc_kobject.h @@ -123,11 +123,12 @@ typedef natural_t ipc_kobject_type_t; #define IKOT_AU_SESSIONPORT 33 #define IKOT_FILEPORT 34 #define IKOT_LABELH 35 +#define IKOT_TASK_RESUME 36 /* * Add new entries here and adjust IKOT_UNKNOWN. * Please keep ipc/ipc_object.c:ikot_print_array up to date. */ -#define IKOT_UNKNOWN 36 /* magic catchall */ +#define IKOT_UNKNOWN 37 /* magic catchall */ #define IKOT_MAX_TYPE (IKOT_UNKNOWN+1) /* # of IKOT_ types */ diff --git a/osfmk/kern/ipc_mig.c b/osfmk/kern/ipc_mig.c index 512073675..632275883 100644 --- a/osfmk/kern/ipc_mig.c +++ b/osfmk/kern/ipc_mig.c @@ -126,7 +126,9 @@ mach_msg_send_from_kernel( return mr; } - mr = ipc_kmsg_send_always(kmsg); + mr = ipc_kmsg_send(kmsg, + MACH_SEND_KERNEL_DEFAULT, + MACH_MSG_TIMEOUT_NONE); if (mr != MACH_MSG_SUCCESS) { ipc_kmsg_destroy(kmsg); } @@ -154,7 +156,9 @@ mach_msg_send_from_kernel_proper( return mr; } - mr = ipc_kmsg_send_always(kmsg); + mr = ipc_kmsg_send(kmsg, + MACH_SEND_KERNEL_DEFAULT, + MACH_MSG_TIMEOUT_NONE); if (mr != MACH_MSG_SUCCESS) { ipc_kmsg_destroy(kmsg); } @@ -162,10 +166,50 @@ mach_msg_send_from_kernel_proper( return mr; } +mach_msg_return_t +mach_msg_send_from_kernel_with_options( + mach_msg_header_t *msg, + mach_msg_size_t send_size, + mach_msg_option_t option, + mach_msg_timeout_t timeout_val) +{ + ipc_kmsg_t kmsg; + mach_msg_return_t mr; + + mr = ipc_kmsg_get_from_kernel(msg, send_size, &kmsg); + if (mr != MACH_MSG_SUCCESS) + return mr; + + mr = ipc_kmsg_copyin_from_kernel(kmsg); + if (mr != MACH_MSG_SUCCESS) { + ipc_kmsg_free(kmsg); + return mr; + } + +#if 11938665 + /* + * Until we are sure of its effects, we are disabling + * importance donation from the kernel-side of user + * threads in importance-donating tasks - unless the + * option to force importance donation is passed in. + */ + if ((option & MACH_SEND_IMPORTANCE) == 0) + option |= MACH_SEND_NOIMPORTANCE; +#endif + mr = ipc_kmsg_send(kmsg, option, timeout_val); + + if (mr != MACH_MSG_SUCCESS) { + ipc_kmsg_destroy(kmsg); + } + + return mr; +} + + #if IKM_SUPPORT_LEGACY mach_msg_return_t -mach_msg_send_from_kernel_with_options( +mach_msg_send_from_kernel_with_options_legacy( mach_msg_header_t *msg, mach_msg_size_t send_size, mach_msg_option_t option, @@ -183,8 +227,17 @@ mach_msg_send_from_kernel_with_options( ipc_kmsg_free(kmsg); return mr; } - + +#if 11938665 + /* + * Until we are sure of its effects, we are disabling + * importance donation from the kernel-side of user + * threads in importance-donating tasks. + */ + option |= MACH_SEND_NOIMPORTANCE; +#endif mr = ipc_kmsg_send(kmsg, option, timeout_val); + if (mr != MACH_MSG_SUCCESS) { ipc_kmsg_destroy(kmsg); } @@ -277,8 +330,6 @@ mach_msg_rpc_from_kernel_body( kmsg->ikm_header->msgh_bits |= MACH_MSGH_BITS(0, MACH_MSG_TYPE_MAKE_SEND_ONCE); - ip_reference(reply); - #if IKM_SUPPORT_LEGACY if(legacy) mr = ipc_kmsg_copyin_from_kernel_legacy(kmsg); @@ -291,7 +342,9 @@ mach_msg_rpc_from_kernel_body( ipc_kmsg_free(kmsg); return mr; } - mr = ipc_kmsg_send_always(kmsg); + mr = ipc_kmsg_send(kmsg, + MACH_SEND_KERNEL_DEFAULT, + MACH_MSG_TIMEOUT_NONE); if (mr != MACH_MSG_SUCCESS) { ipc_kmsg_destroy(kmsg); return mr; @@ -300,24 +353,19 @@ mach_msg_rpc_from_kernel_body( for (;;) { ipc_mqueue_t mqueue; - ip_lock(reply); - if ( !ip_active(reply)) { - ip_unlock(reply); - ip_release(reply); - return MACH_RCV_PORT_DIED; - } + assert(reply->ip_pset_count == 0); + assert(ip_active(reply)); + + /* JMM - why this check? */ if (!self->active) { - ip_unlock(reply); - ip_release(reply); + ipc_port_dealloc_reply(reply); + self->ith_rpc_reply = IP_NULL; return MACH_RCV_INTERRUPTED; } - assert(reply->ip_pset_count == 0); - mqueue = &reply->ip_messages; - ip_unlock(reply); - self->ith_continuation = (void (*)(mach_msg_return_t))0; + mqueue = &reply->ip_messages; ipc_mqueue_receive(mqueue, MACH_MSG_OPTION_NONE, MACH_MSG_SIZE_MAX, @@ -335,12 +383,14 @@ mach_msg_rpc_from_kernel_body( assert(mr == MACH_RCV_INTERRUPTED); + assert(reply == self->ith_rpc_reply); + if (self->handlers) { - ip_release(reply); + ipc_port_dealloc_reply(reply); + self->ith_rpc_reply = IP_NULL; return(mr); } } - ip_release(reply); /* * Check to see how much of the message/trailer can be received. @@ -452,17 +502,18 @@ mach_msg_overwrite( max_trailer->msgh_audit = current_thread()->task->audit_token; max_trailer->msgh_trailer_type = MACH_MSG_TRAILER_FORMAT_0; max_trailer->msgh_trailer_size = MACH_MSG_TRAILER_MINIMUM_SIZE; - - mr = ipc_kmsg_copyin(kmsg, space, map, FALSE); + + mr = ipc_kmsg_copyin(kmsg, space, map, &option); + if (mr != MACH_MSG_SUCCESS) { ipc_kmsg_free(kmsg); return mr; } - do - mr = ipc_kmsg_send(kmsg, MACH_MSG_OPTION_NONE, - MACH_MSG_TIMEOUT_NONE); - while (mr == MACH_SEND_INTERRUPTED); + do { + mr = ipc_kmsg_send(kmsg, MACH_MSG_OPTION_NONE, MACH_MSG_TIMEOUT_NONE); + } while (mr == MACH_SEND_INTERRUPTED); + assert(mr == MACH_MSG_SUCCESS); } diff --git a/osfmk/kern/ipc_mig.h b/osfmk/kern/ipc_mig.h index ff7326b4b..92fe442c5 100644 --- a/osfmk/kern/ipc_mig.h +++ b/osfmk/kern/ipc_mig.h @@ -155,6 +155,14 @@ mach_msg_rpc_from_kernel_proper( #define mach_msg_rpc_from_kernel mach_msg_rpc_from_kernel_proper +#ifdef XNU_KERNEL_PRIVATE +extern mach_msg_return_t mach_msg_send_from_kernel_with_options_legacy( + mach_msg_header_t *msg, + mach_msg_size_t send_size, + mach_msg_option_t option, + mach_msg_timeout_t timeout_val); +#endif /* XNU_KERNEL_PRIVATE */ + extern mach_msg_return_t mach_msg_send_from_kernel_with_options( mach_msg_header_t *msg, mach_msg_size_t send_size, @@ -168,7 +176,7 @@ __END_DECLS extern void mach_msg_receive_continue(void); /* Initialize kernel server dispatch table */ -extern void mig_init(void) __attribute__((section("__TEXT, initcode"))); +extern void mig_init(void); /* * Kernel implementation of the MIG object base class diff --git a/osfmk/kern/ipc_sync.c b/osfmk/kern/ipc_sync.c index ab24fe06f..3e960b1d9 100644 --- a/osfmk/kern/ipc_sync.c +++ b/osfmk/kern/ipc_sync.c @@ -114,33 +114,14 @@ convert_semaphore_to_port (semaphore_t semaphore) } lock_set_t -convert_port_to_lock_set (ipc_port_t port) +convert_port_to_lock_set (__unused ipc_port_t port) { - lock_set_t lock_set = LOCK_SET_NULL; - - if (IP_VALID (port)) { - ip_lock(port); - if (ip_active(port) && (ip_kotype(port) == IKOT_LOCK_SET)) { - lock_set = (lock_set_t) port->ip_kobject; - lock_set_reference(lock_set); - } - ip_unlock(port); - } - - return (lock_set); + return (LOCK_SET_NULL); } ipc_port_t -convert_lock_set_to_port (lock_set_t lock_set) +convert_lock_set_to_port (__unused lock_set_t lock_set) { - ipc_port_t port; - - if (lock_set == LOCK_SET_NULL) - return IP_NULL; - - /* caller is donating a reference */ - port = ipc_port_make_send(lock_set->port); - lock_set_dereference(lock_set); - return (port); + return (IP_NULL); } diff --git a/osfmk/kern/ipc_tt.c b/osfmk/kern/ipc_tt.c index 61013e399..23a582401 100644 --- a/osfmk/kern/ipc_tt.c +++ b/osfmk/kern/ipc_tt.c @@ -142,6 +142,7 @@ ipc_task_init( itk_lock_init(task); task->itk_self = kport; task->itk_nself = nport; + task->itk_resume = IP_NULL; /* Lazily allocated on-demand */ task->itk_sself = ipc_port_make_send(kport); task->itk_space = space; @@ -251,6 +252,7 @@ ipc_task_disable( { ipc_port_t kport; ipc_port_t nport; + ipc_port_t rport; itk_lock(task); kport = task->itk_self; @@ -259,6 +261,23 @@ ipc_task_disable( nport = task->itk_nself; if (nport != IP_NULL) ipc_kobject_set(nport, IKO_NULL, IKOT_NONE); + + rport = task->itk_resume; + if (rport != IP_NULL) { + /* + * From this point onwards this task is no longer accepting + * resumptions. + * + * There are still outstanding suspensions on this task, + * even as it is being torn down. Disconnect the task + * from the rport, thereby "orphaning" the rport. The rport + * itself will go away only when the last suspension holder + * destroys his SO right to it -- when he either + * exits, or tries to actually use that last SO right to + * resume this (now non-existent) task. + */ + ipc_kobject_set(rport, IKO_NULL, IKOT_NONE); + } itk_unlock(task); } @@ -277,6 +296,7 @@ ipc_task_terminate( { ipc_port_t kport; ipc_port_t nport; + ipc_port_t rport; int i; itk_lock(task); @@ -293,6 +313,9 @@ ipc_task_terminate( assert(nport != IP_NULL); task->itk_nself = IP_NULL; + rport = task->itk_resume; + task->itk_resume = IP_NULL; + itk_unlock(task); /* release the naked send rights */ @@ -328,6 +351,8 @@ ipc_task_terminate( /* destroy the kernel ports */ ipc_port_dealloc_kernel(kport); ipc_port_dealloc_kernel(nport); + if (rport != IP_NULL) + ipc_port_dealloc_kernel(rport); itk_lock_destroy(task); } @@ -413,7 +438,6 @@ ipc_thread_init( thread_t thread) { ipc_port_t kport; - int i; kport = ipc_port_alloc_kernel(); if (kport == IP_NULL) @@ -421,17 +445,40 @@ ipc_thread_init( thread->ith_self = kport; thread->ith_sself = ipc_port_make_send(kport); - - for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; ++i) - thread->exc_actions[i].port = IP_NULL; + thread->exc_actions = NULL; ipc_kobject_set(kport, (ipc_kobject_t)thread, IKOT_THREAD); +#if IMPORTANCE_INHERITANCE + thread->ith_assertions = 0; +#endif + ipc_kmsg_queue_init(&thread->ith_messages); thread->ith_rpc_reply = IP_NULL; } +void +ipc_thread_init_exc_actions( + thread_t thread) +{ + assert(thread->exc_actions == NULL); + + thread->exc_actions = kalloc(sizeof(struct exception_action) * EXC_TYPES_COUNT); + bzero(thread->exc_actions, sizeof(struct exception_action) * EXC_TYPES_COUNT); +} + +void +ipc_thread_destroy_exc_actions( + thread_t thread) +{ + if (thread->exc_actions != NULL) { + kfree(thread->exc_actions, + sizeof(struct exception_action) * EXC_TYPES_COUNT); + thread->exc_actions = NULL; + } +} + void ipc_thread_disable( thread_t thread) @@ -464,14 +511,21 @@ ipc_thread_terminate( thread->ith_sself = thread->ith_self = IP_NULL; - for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; ++i) { - if (IP_VALID(thread->exc_actions[i].port)) - ipc_port_release_send(thread->exc_actions[i].port); - } + if (thread->exc_actions != NULL) { + for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; ++i) { + if (IP_VALID(thread->exc_actions[i].port)) + ipc_port_release_send(thread->exc_actions[i].port); + } + ipc_thread_destroy_exc_actions(thread); + } ipc_port_dealloc_kernel(kport); } +#if IMPORTANCE_INHERITANCE + assert(thread->ith_assertions == 0); +#endif + assert(ipc_kmsg_queue_empty(&thread->ith_messages)); if (thread->ith_rpc_reply != IP_NULL) @@ -499,7 +553,8 @@ ipc_thread_reset( ipc_port_t old_kport, new_kport; ipc_port_t old_sself; ipc_port_t old_exc_actions[EXC_TYPES_COUNT]; - int i; + boolean_t has_old_exc_actions = FALSE; + int i; new_kport = ipc_port_alloc_kernel(); if (new_kport == IP_NULL) @@ -522,14 +577,21 @@ ipc_thread_reset( ipc_kobject_set(old_kport, IKO_NULL, IKOT_NONE); ipc_kobject_set(new_kport, (ipc_kobject_t) thread, IKOT_THREAD); - for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) { - if (!thread->exc_actions[i].privileged) { - old_exc_actions[i] = thread->exc_actions[i].port; - thread->exc_actions[i].port = IP_NULL; - } else { - old_exc_actions[i] = IP_NULL; + /* + * Only ports that were set by root-owned processes + * (privileged ports) should survive + */ + if (thread->exc_actions != NULL) { + has_old_exc_actions = TRUE; + for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) { + if (thread->exc_actions[i].privileged) { + old_exc_actions[i] = IP_NULL; + } else { + old_exc_actions[i] = thread->exc_actions[i].port; + thread->exc_actions[i].port = IP_NULL; + } } - }/* for */ + } thread_mtx_unlock(thread); @@ -538,11 +600,11 @@ ipc_thread_reset( if (IP_VALID(old_sself)) ipc_port_release_send(old_sself); - for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) { - if (IP_VALID(old_exc_actions[i])) { + if (has_old_exc_actions) { + for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) { ipc_port_release_send(old_exc_actions[i]); } - }/* for */ + } /* destroy the kernel port */ ipc_port_dealloc_kernel(old_kport); @@ -861,7 +923,7 @@ task_get_special_port( case TASK_ACCESS_PORT: port = ipc_port_copy_send(task->itk_task_access); break; - + default: itk_unlock(task); return KERN_INVALID_ARGUMENT; @@ -971,6 +1033,7 @@ task_set_special_port( * KERN_SUCCESS Stashed the port rights. * KERN_INVALID_ARGUMENT The task is null. * KERN_INVALID_ARGUMENT The task is dead. + * KERN_INVALID_ARGUMENT The memory param is null. * KERN_INVALID_ARGUMENT Too many port rights supplied. */ @@ -984,7 +1047,8 @@ mach_ports_register( unsigned int i; if ((task == TASK_NULL) || - (portsCnt > TASK_PORT_REGISTER_MAX)) + (portsCnt > TASK_PORT_REGISTER_MAX) || + (portsCnt && memory == NULL)) return KERN_INVALID_ARGUMENT; /* @@ -1198,6 +1262,38 @@ convert_port_to_task_name( return (task); } +/* + * Routine: convert_port_to_task_suspension_token + * Purpose: + * Convert from a port to a task suspension token. + * Doesn't consume the port ref; produces a suspension token ref, + * which may be null. + * Conditions: + * Nothing locked. + */ +task_suspension_token_t +convert_port_to_task_suspension_token( + ipc_port_t port) +{ + task_suspension_token_t task = TASK_NULL; + + if (IP_VALID(port)) { + ip_lock(port); + + if ( ip_active(port) && + ip_kotype(port) == IKOT_TASK_RESUME) { + task = (task_suspension_token_t)port->ip_kobject; + assert(task != TASK_NULL); + + task_reference_internal(task); + } + + ip_unlock(port); + } + + return (task); +} + /* * Routine: convert_port_to_space * Purpose: @@ -1377,6 +1473,50 @@ convert_task_to_port( return port; } +/* + * Routine: convert_task_suspend_token_to_port + * Purpose: + * Convert from a task suspension token to a port. + * Consumes a task suspension token ref; produces a naked send-once right + * which may be invalid. + * Conditions: + * Nothing locked. + */ +ipc_port_t +convert_task_suspension_token_to_port( + task_suspension_token_t task) +{ + ipc_port_t port; + + task_lock(task); + if (task->active) { + if (task->itk_resume == IP_NULL) { + task->itk_resume = ipc_port_alloc_kernel(); + if (!IP_VALID(task->itk_resume)) { + panic("failed to create resume port"); + } + + ipc_kobject_set(task->itk_resume, (ipc_kobject_t) task, IKOT_TASK_RESUME); + } + + /* + * Create a send-once right for each instance of a direct user-called + * task_suspend2 call. Each time one of these send-once rights is abandoned, + * the notification handler will resume the target task. + */ + port = ipc_port_make_sonce(task->itk_resume); + assert(IP_VALID(port)); + } else { + port = IP_NULL; + } + + task_unlock(task); + task_suspension_token_deallocate(task); + + return port; +} + + /* * Routine: convert_task_name_to_port * Purpose: @@ -1515,6 +1655,9 @@ thread_set_exception_ports( return (KERN_FAILURE); } + if (thread->exc_actions == NULL) { + ipc_thread_init_exc_actions(thread); + } for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; ++i) { if (exception_mask & (1 << i)) { old_port[i] = thread->exc_actions[i].port; @@ -1674,9 +1817,12 @@ thread_swap_exception_ports( return (KERN_FAILURE); } - count = 0; + if (thread->exc_actions == NULL) { + ipc_thread_init_exc_actions(thread); + } - for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; ++i) { + assert(EXC_TYPES_COUNT > FIRST_EXCEPTION); + for (count = 0, i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT && count < *CountCnt; ++i) { if (exception_mask & (1 << i)) { for (j = 0; j < count; ++j) { /* @@ -1705,8 +1851,6 @@ thread_swap_exception_ports( thread->exc_actions[i].behavior = new_behavior; thread->exc_actions[i].flavor = new_flavor; thread->exc_actions[i].privileged = privileged; - if (count > *CountCnt) - break; } else old_port[i] = IP_NULL; @@ -1714,9 +1858,10 @@ thread_swap_exception_ports( thread_mtx_unlock(thread); - for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; ++i) + while (--i >= FIRST_EXCEPTION) { if (IP_VALID(old_port[i])) ipc_port_release_send(old_port[i]); + } if (IP_VALID(new_port)) /* consume send right */ ipc_port_release_send(new_port); @@ -1770,9 +1915,8 @@ task_swap_exception_ports( return (KERN_FAILURE); } - count = 0; - - for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; ++i) { + assert(EXC_TYPES_COUNT > FIRST_EXCEPTION); + for (count = 0, i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT && count < *CountCnt; ++i) { if (exception_mask & (1 << i)) { for (j = 0; j < count; j++) { /* @@ -1796,12 +1940,11 @@ task_swap_exception_ports( } old_port[i] = task->exc_actions[i].port; + task->exc_actions[i].port = ipc_port_copy_send(new_port); task->exc_actions[i].behavior = new_behavior; task->exc_actions[i].flavor = new_flavor; task->exc_actions[i].privileged = privileged; - if (count > *CountCnt) - break; } else old_port[i] = IP_NULL; @@ -1809,9 +1952,10 @@ task_swap_exception_ports( itk_unlock(task); - for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; i++) + while (--i >= FIRST_EXCEPTION) { if (IP_VALID(old_port[i])) ipc_port_release_send(old_port[i]); + } if (IP_VALID(new_port)) /* consume send right */ ipc_port_release_send(new_port); @@ -1868,6 +2012,10 @@ thread_get_exception_ports( count = 0; + if (thread->exc_actions == NULL) { + goto done; + } + for (i = FIRST_EXCEPTION; i < EXC_TYPES_COUNT; ++i) { if (exception_mask & (1 << i)) { for (j = 0; j < count; ++j) { @@ -1895,6 +2043,7 @@ thread_get_exception_ports( } } +done: thread_mtx_unlock(thread); *CountCnt = count; diff --git a/osfmk/kern/ipc_tt.h b/osfmk/kern/ipc_tt.h index 7c1384c41..cbf75eb27 100644 --- a/osfmk/kern/ipc_tt.h +++ b/osfmk/kern/ipc_tt.h @@ -97,6 +97,12 @@ extern void ipc_task_terminate( extern void ipc_thread_init( thread_t thread); +extern void ipc_thread_init_exc_actions( + thread_t thread); + +extern void ipc_thread_destroy_exc_actions( + thread_t thread); + /* Disable IPC access to a thread */ extern void ipc_thread_disable( thread_t thread); diff --git a/osfmk/kern/kalloc.h b/osfmk/kern/kalloc.h index 77b8cd3be..41fc204a3 100644 --- a/osfmk/kern/kalloc.h +++ b/osfmk/kern/kalloc.h @@ -77,7 +77,7 @@ __END_DECLS #include -extern void kalloc_init(void) __attribute__((section("__TEXT, initcode"))); +extern void kalloc_init(void); extern void kalloc_fake_zone_init( int ); diff --git a/osfmk/kern/kern_types.h b/osfmk/kern/kern_types.h index d8e98aa6f..b7fc7ec5c 100644 --- a/osfmk/kern/kern_types.h +++ b/osfmk/kern/kern_types.h @@ -85,12 +85,93 @@ typedef void (*thread_continue_t)(void *, wait_result_t); /* * Interruptible flag for waits. + * + * THREAD_UNINT: Uninterruptible wait + * Wait will only end when someone explicitly wakes up the thread, or if the + * wait timeout expires. + * + * Use this state if the system as a whole cannot recover from a thread being + * interrupted out of the wait. + * + * THREAD_INTERRUPTIBLE: + * Wait will end if someone explicitly wakes up the thread, the wait timeout + * expires, or the current thread is being terminated. + * + * This value can be used when your operation may not be cleanly restartable + * for the current process or thread (i.e. the loss of state would be only visible + * to the current client). Since the thread is exiting anyways, you're willing + * to cut the operation short. The system as a whole must be able to cleanly + * deal with the interruption (i.e. remain in a consistent and recoverable state). + * + * THREAD_ABORTSAFE: + * Wait will end if someone explicitly wakes up the thread, the wait timeout + * expires, the current thread is being terminated, if any signal arrives for + * the task, or thread_abort_safely() is called on the thread. + * + * Using this value means that you are willing to be interrupted in the face + * of any user signal, and safely rewind the thread back to the user/kernel + * boundary. Many syscalls will try to restart the operation they were performing + * after the signal has been handled. + * + * You must provide this value for any unbounded wait - otherwise you will + * pend user signals forever. + * + * Thread interrupt mask: + * + * The current maximum interruptible state for the thread, as set by + * thread_interrupt_level(), will limit the conditions that will cause a wake. + * This is useful for code that can't be interrupted to set before calling code + * that doesn't know that. + * + * Thread termination vs safe abort: + * + * Termination abort: thread_abort(), thread_terminate() + * + * A termination abort is sticky. Once a thread is marked for termination, every + * THREAD_INTERRUPTIBLE wait will return immediately with THREAD_INTERRUPTED + * until the thread successfully exits. + * + * Safe abort: thread_abort_safely() + * + * A safe abort is not sticky. The current wait, (or the next wait if the thread + * is not currently waiting) will be interrupted, but then the abort condition is cleared. + * The next wait will sleep as normal. Safe aborts only have a single effect. + * + * The path back to the user/kernel boundary must not make any further unbounded + * wait calls. The waiter should detect the THREAD_INTERRUPTED return code + * from an ABORTSAFE wait and return an error code that causes its caller + * to understand that the current operation has been interrupted, and its + * caller should return a similar error code, and so on until the + * user/kernel boundary is reached. For Mach, the error code is usually KERN_ABORTED, + * for BSD it is EINTR. + * + * Debuggers rely on the safe abort mechanism - a signaled thread must return to + * the AST at the user/kernel boundary for the debugger to finish attaching. + * + * No wait/block will ever disappear a thread out from under the waiter. The block + * call will always either return or call the passed in continuation. */ typedef int wait_interrupt_t; #define THREAD_UNINT 0 /* not interruptible */ #define THREAD_INTERRUPTIBLE 1 /* may not be restartable */ #define THREAD_ABORTSAFE 2 /* abortable safely */ +typedef int wait_timeout_urgency_t; +#define TIMEOUT_URGENCY_SYS_NORMAL 0x00 /* use default leeway thresholds for system */ +#define TIMEOUT_URGENCY_SYS_CRITICAL 0x01 /* use critical leeway thresholds for system */ +#define TIMEOUT_URGENCY_SYS_BACKGROUND 0x02 /* use background leeway thresholds for system */ + +#define TIMEOUT_URGENCY_USER_MASK 0x10 /* mask to identify user timeout urgency classes */ +#define TIMEOUT_URGENCY_USER_NORMAL 0x10 /* use default leeway thresholds for user */ +#define TIMEOUT_URGENCY_USER_CRITICAL 0x11 /* use critical leeway thresholds for user */ +#define TIMEOUT_URGENCY_USER_BACKGROUND 0x12 /* use background leeway thresholds for user */ + +#define TIMEOUT_URGENCY_MASK 0x13 /* mask to identify timeout urgency */ + +#define TIMEOUT_URGENCY_LEEWAY 0x20 /* don't ignore provided leeway value */ + +#define TIMEOUT_URGENCY_FIRST_AVAIL 0x40 /* first available bit outside of urgency mask/leeway */ + #ifdef KERNEL_PRIVATE #ifdef MACH_KERNEL_PRIVATE diff --git a/osfmk/kern/kext_alloc.c b/osfmk/kern/kext_alloc.c index c44446335..336e58d07 100644 --- a/osfmk/kern/kext_alloc.c +++ b/osfmk/kern/kext_alloc.c @@ -75,9 +75,11 @@ kext_alloc_init(void) */ text = getsegbyname(SEG_TEXT); - text_start = vm_map_trunc_page(text->vmaddr); + text_start = vm_map_trunc_page(text->vmaddr, + VM_MAP_PAGE_MASK(kernel_map)); text_start &= ~((512ULL * 1024 * 1024 * 1024) - 1); - text_end = vm_map_round_page(text->vmaddr + text->vmsize); + text_end = vm_map_round_page(text->vmaddr + text->vmsize, + VM_MAP_PAGE_MASK(kernel_map)); text_size = text_end - text_start; kext_alloc_base = KEXT_ALLOC_BASE(text_end); @@ -91,7 +93,8 @@ kext_alloc_init(void) * kexts */ kext_post_boot_base = - vm_map_round_page(kext_alloc_base + prelinkTextSegment->vmsize); + vm_map_round_page(kext_alloc_base + prelinkTextSegment->vmsize, + VM_MAP_PAGE_MASK(kernel_map)); } else { kext_post_boot_base = kext_alloc_base; diff --git a/osfmk/kern/kmod.c b/osfmk/kern/kmod.c index d0563ce09..ac38bac82 100644 --- a/osfmk/kern/kmod.c +++ b/osfmk/kern/kmod.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -85,15 +85,7 @@ extern void proc_selfname(char * buf, int size); __FUNCTION__, procname); \ } while (0) -#if __i386__ -// in libkern/OSKextLib.cpp -extern kern_return_t kext_get_kmod_info( - kmod_info_array_t * kmod_list, - mach_msg_type_number_t * kmodCount); -#define KMOD_MIG_UNUSED -#else #define KMOD_MIG_UNUSED __unused -#endif /* __i386__ */ /********************************************************************* @@ -148,14 +140,6 @@ kmod_get_info( kmod_info_array_t * kmod_list KMOD_MIG_UNUSED, mach_msg_type_number_t * kmodCount KMOD_MIG_UNUSED) { -#if __i386__ - if (current_task() != kernel_task && task_has_64BitAddr(current_task())) { - NOT_SUPPORTED_USER64(); - return KERN_NOT_SUPPORTED; - } - return kext_get_kmod_info(kmod_list, kmodCount); -#else NOT_SUPPORTED_KERNEL(); return KERN_NOT_SUPPORTED; -#endif /* __i386__ */ } diff --git a/osfmk/kern/kpc.h b/osfmk/kern/kpc.h new file mode 100644 index 000000000..4f4df0921 --- /dev/null +++ b/osfmk/kern/kpc.h @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef __KERN_KPC_H__ +#define __KERN_KPC_H__ + +/** kernel interfaces to KPC PMC infrastructure **/ + +#include + +/* cross-platform class constants */ +#define KPC_CLASS_FIXED (0) +#define KPC_CLASS_CONFIGURABLE (1) +#define KPC_CLASS_POWER (2) + +#define KPC_CLASS_FIXED_MASK (1< +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include + +uint32_t kpc_actionid[KPC_MAX_COUNTERS]; + +/* locks */ +static lck_grp_attr_t *kpc_config_lckgrp_attr = NULL; +static lck_grp_t *kpc_config_lckgrp = NULL; +static lck_mtx_t kpc_config_lock; + +void kpc_arch_init(void); +void +kpc_arch_init(void) +{ + kpc_config_lckgrp_attr = lck_grp_attr_alloc_init(); + kpc_config_lckgrp = lck_grp_alloc_init("kpc", kpc_config_lckgrp_attr); + lck_mtx_init(&kpc_config_lock, kpc_config_lckgrp, LCK_ATTR_NULL); +} + +uint32_t +kpc_get_running(void) +{ + uint32_t cur_state = 0; + + if( kpc_is_running_fixed() ) + cur_state |= KPC_CLASS_FIXED_MASK; + + if( kpc_is_running_configurable() ) + cur_state |= KPC_CLASS_CONFIGURABLE_MASK; + + return cur_state; +} + +/* generic counter reading function */ +int +kpc_get_cpu_counters( boolean_t all_cpus, uint32_t classes, + int *curcpu, uint64_t *buf ) +{ + int r, enabled, offset = 0; + + (void) all_cpus; + + /* grab counters and CPU number as close as possible */ + enabled = ml_set_interrupts_enabled(FALSE); + + /* and the CPU ID */ + if( curcpu ) + *curcpu = current_processor()->cpu_id; + + if( classes & KPC_CLASS_FIXED_MASK ) + { + kpc_get_fixed_counters( &buf[offset] ); + + offset += kpc_get_counter_count(KPC_CLASS_FIXED_MASK); + } + + if( classes & KPC_CLASS_CONFIGURABLE_MASK ) + { + r = kpc_get_configurable_counters( &buf[offset] ); + + offset += kpc_get_counter_count(KPC_CLASS_CONFIGURABLE_MASK); + } + + ml_set_interrupts_enabled(enabled); + + return offset; +} + +int +kpc_get_shadow_counters( boolean_t all_cpus, uint32_t classes, + int *curcpu, uint64_t *buf ) +{ + int enabled, count, offset = 0; + + (void)all_cpus; + + enabled = ml_set_interrupts_enabled(FALSE); + + if( curcpu ) + *curcpu = current_processor()->cpu_id; + + if( classes & KPC_CLASS_FIXED_MASK ) + { + count = kpc_get_counter_count(KPC_CLASS_FIXED_MASK); + + memcpy( &buf[offset], &FIXED_SHADOW(0), count*sizeof(uint64_t) ); + + offset += count; + } + + if( classes & KPC_CLASS_CONFIGURABLE_MASK ) + { + count = kpc_get_counter_count(KPC_CLASS_CONFIGURABLE_MASK); + + memcpy( &buf[offset], &CONFIGURABLE_SHADOW(0), count*sizeof(uint64_t) ); + + offset += count; + } + + ml_set_interrupts_enabled(enabled); + + return offset; +} + +uint32_t +kpc_get_counter_count(uint32_t classes) +{ + int count = 0; + + if( classes & KPC_CLASS_FIXED_MASK ) + count += kpc_fixed_count(); + + if( classes & KPC_CLASS_CONFIGURABLE_MASK ) + count += kpc_configurable_count() ; + + return count; +} + +uint32_t +kpc_get_config_count(uint32_t classes) +{ + int count = 0; + + if( classes & KPC_CLASS_FIXED_MASK ) + count += kpc_fixed_config_count(); + + if( classes & KPC_CLASS_CONFIGURABLE_MASK ) + count += kpc_configurable_config_count(); + + return count; +} + +int +kpc_get_config(uint32_t classes, kpc_config_t *current_config) +{ + int count = 0; + + if( classes & KPC_CLASS_FIXED_MASK ) + { + kpc_get_fixed_config(¤t_config[count]); + count += kpc_get_config_count(KPC_CLASS_FIXED_MASK); + } + + if( classes & KPC_CLASS_CONFIGURABLE_MASK ) + { + kpc_get_configurable_config(¤t_config[count]); + count += kpc_get_config_count(KPC_CLASS_CONFIGURABLE_MASK); + } + + return 0; +} + +int +kpc_set_config(uint32_t classes, kpc_config_t *configv) +{ + struct kpc_config_remote mp_config; + + lck_mtx_lock(&kpc_config_lock); + + mp_config.classes = classes; + mp_config.configv = configv; + + kpc_set_config_arch( &mp_config ); + + lck_mtx_unlock(&kpc_config_lock); + + return 0; +} + +/* allocate a buffer big enough for all the counters */ +uint64_t * +kpc_counterbuf_alloc(void) +{ + uint64_t *buf; + + buf = kalloc(KPC_MAX_COUNTERS * sizeof(uint64_t)); + if(buf) + bzero( buf, KPC_MAX_COUNTERS * sizeof(uint64_t) ); + + return buf; +} + +void +kpc_counterbuf_free(uint64_t *buf) +{ + if( buf ) + kfree(buf, KPC_MAX_COUNTERS * sizeof(uint64_t)); +} + +void kpc_sample_kperf(uint32_t actionid) +{ + struct kperf_sample sbuf; + struct kperf_context ctx; + task_t task = NULL; + int r; + + BUF_DATA1(PERF_KPC_HNDLR | DBG_FUNC_START, 0); + + ctx.cur_pid = 0; + ctx.cur_thread = current_thread(); + + task = chudxnu_task_for_thread(ctx.cur_thread); + if (task) + ctx.cur_pid = chudxnu_pid_for_task(task); + + ctx.trigger_type = TRIGGER_TYPE_PMI; + ctx.trigger_id = 0; + + r = kperf_sample(&sbuf, &ctx, actionid, SAMPLE_FLAG_PEND_USER); + + BUF_INFO1(PERF_KPC_HNDLR | DBG_FUNC_END, r); +} + + +int kpc_set_period(uint32_t classes, uint64_t *val) +{ + struct kpc_config_remote mp_config; + + lck_mtx_lock(&kpc_config_lock); + +#ifndef FIXED_COUNTER_SHADOW + if (classes & KPC_CLASS_FIXED_MASK) { + lck_mtx_unlock(&kpc_config_lock); + return -1; + } +#endif + + kprintf("setting period %u\n", classes); + + mp_config.classes = classes; + mp_config.configv = val; + + kpc_set_period_arch( &mp_config ); + + lck_mtx_unlock(&kpc_config_lock); + + return 0; +} + + +int kpc_get_period(uint32_t classes, uint64_t *val) +{ + uint32_t i, count, offset = 0; + + lck_mtx_lock(&kpc_config_lock); + + if (classes & KPC_CLASS_FIXED_MASK) { + count = kpc_get_counter_count(KPC_CLASS_FIXED_MASK); + + /* convert reload values to periods */ + for (i = 0; i < count; i++) + val[i] = kpc_fixed_max() - FIXED_RELOAD(i); + + offset += count; + } + + if (classes & KPC_CLASS_CONFIGURABLE_MASK) { + count = kpc_get_counter_count(KPC_CLASS_CONFIGURABLE_MASK); + + /* convert reload values to periods */ + for (i = 0; i < count; i++) + val[i + offset] = kpc_configurable_max() - CONFIGURABLE_RELOAD(i); + } + + lck_mtx_unlock(&kpc_config_lock); + + return 0; +} + +int kpc_set_actionid(uint32_t classes, uint32_t *val) +{ + uint32_t count, offset = 0; + + /* NOTE: what happens if a pmi occurs while actionids are being + * set is undefined. */ + lck_mtx_lock(&kpc_config_lock); + + if (classes & KPC_CLASS_FIXED_MASK) { + count = kpc_get_counter_count(KPC_CLASS_FIXED_MASK); + + memcpy(&FIXED_ACTIONID(0), val, count*sizeof(uint32_t)); + + offset += count; + } + + if (classes & KPC_CLASS_CONFIGURABLE_MASK) { + count = kpc_get_counter_count(KPC_CLASS_CONFIGURABLE_MASK); + + memcpy(&CONFIGURABLE_ACTIONID(0), &val[offset], count*sizeof(uint32_t)); + } + + lck_mtx_unlock(&kpc_config_lock); + + return 0; +} + +int kpc_get_actionid(uint32_t classes, uint32_t *val) +{ + uint32_t count, offset = 0; + + lck_mtx_lock(&kpc_config_lock); + + if (classes & KPC_CLASS_FIXED_MASK) { + count = kpc_get_counter_count(KPC_CLASS_FIXED_MASK); + + memcpy(val, &FIXED_ACTIONID(0), count*sizeof(uint32_t)); + + offset += count; + } + + if (classes & KPC_CLASS_CONFIGURABLE_MASK) { + count = kpc_get_counter_count(KPC_CLASS_CONFIGURABLE_MASK); + + memcpy(&val[offset], &CONFIGURABLE_ACTIONID(0), count*sizeof(uint32_t)); + } + + lck_mtx_unlock(&kpc_config_lock); + + return 0; + +} + diff --git a/osfmk/kern/kpc_thread.c b/osfmk/kern/kpc_thread.c new file mode 100644 index 000000000..692aa02fe --- /dev/null +++ b/osfmk/kern/kpc_thread.c @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include + +#include + + +/* global for whether to read PMCs on context switch */ +int kpc_threads_counting; + +/* current config and number of counters in that config */ +static uint32_t kpc_thread_classes = 0; +static uint32_t kpc_thread_classes_count = 0; + +static lck_grp_attr_t *kpc_thread_lckgrp_attr = NULL; +static lck_grp_t *kpc_thread_lckgrp = NULL; +static lck_mtx_t kpc_thread_lock; + +void kpc_thread_init(void); + +void +kpc_thread_init(void) +{ + kpc_thread_lckgrp_attr = lck_grp_attr_alloc_init(); + kpc_thread_lckgrp = lck_grp_alloc_init("kpc", kpc_thread_lckgrp_attr); + lck_mtx_init(&kpc_thread_lock, kpc_thread_lckgrp, LCK_ATTR_NULL); +} + +uint32_t +kpc_get_thread_counting(void) +{ + uint32_t kpc_thread_classes_tmp; + int kpc_threads_counting_tmp; + + /* Make sure we get a consistent snapshot of these values */ + lck_mtx_lock(&kpc_thread_lock); + + kpc_thread_classes_tmp = kpc_thread_classes; + kpc_threads_counting_tmp = kpc_threads_counting; + + lck_mtx_unlock(&kpc_thread_lock); + + if( kpc_threads_counting_tmp ) + return kpc_thread_classes_tmp; + else + return 0; +} + +int +kpc_set_thread_counting(uint32_t classes) +{ + uint32_t count; + + lck_mtx_lock(&kpc_thread_lock); + + count = kpc_get_counter_count(classes); + + if( (classes == 0) + || (count == 0) ) + { + /* shut down */ + kpc_threads_counting = FALSE; + } + else + { + /* stash the config */ + kpc_thread_classes = classes; + + /* work out the size */ + kpc_thread_classes_count = count; + assert(kpc_thread_classes_count <= KPC_MAX_COUNTERS); + + /* enable switch */ + kpc_threads_counting = TRUE; + + /* and schedule an AST for this thread... */ + if( !current_thread()->kpc_buf ) + { + current_thread()->t_chud |= T_KPC_ALLOC; + act_set_kperf(current_thread()); + } + } + + lck_mtx_unlock(&kpc_thread_lock); + + return 0; +} + +/* snapshot current PMCs and update counters in the current thread */ +static void +kpc_update_thread_counters( thread_t thread ) +{ + uint32_t i; + uint64_t *tmp = NULL; + cpu_data_t *cpu = NULL; + +/* TODO: Fix this...*/ +#if defined (__x86_64__) + cpu = current_cpu_datap(); +#else +#error architecture not yet supported +#endif + + /* 1. stash current PMCs into latest CPU block */ + kpc_get_cpu_counters( FALSE, kpc_thread_classes, + NULL, cpu->cpu_kpc_buf[1] ); + + /* 2. apply delta to old thread */ + if( thread->kpc_buf ) + for( i = 0; i < kpc_thread_classes_count; i++ ) + thread->kpc_buf[i] += cpu->cpu_kpc_buf[1][i] - cpu->cpu_kpc_buf[0][i]; + + + /* schedule any necessary allocations */ + if( !current_thread()->kpc_buf ) + { + current_thread()->t_chud |= T_KPC_ALLOC; + act_set_kperf(current_thread()); + } + + /* 3. switch the PMC block pointers */ + tmp = cpu->cpu_kpc_buf[1]; + cpu->cpu_kpc_buf[1] = cpu->cpu_kpc_buf[0]; + cpu->cpu_kpc_buf[0] = tmp; +} + +void +kpc_switch_context( thread_t old, thread_t new __unused ) +{ + kpc_update_thread_counters( old ); +} + +/* get counter values for a thread */ +int +kpc_get_curthread_counters(uint32_t *inoutcount, uint64_t *buf) +{ + thread_t thread = current_thread(); + boolean_t enabled; + + /* buffer too small :( */ + if( *inoutcount < kpc_thread_classes_count ) + return EINVAL; + + /* copy data and actual size */ + if( !thread->kpc_buf ) + return EINVAL; + + enabled = ml_set_interrupts_enabled(FALSE); + + /* snap latest version of counters for this thread */ + kpc_update_thread_counters( current_thread() ); + + /* copy out */ + memcpy( buf, thread->kpc_buf, + kpc_thread_classes_count * sizeof(*buf) ); + *inoutcount = kpc_thread_classes_count; + + ml_set_interrupts_enabled(enabled); + + return 0; +} + + +void +kpc_thread_create(thread_t thread) +{ + /* nothing to do if we're not counting */ + if(!kpc_threads_counting) + return; + + /* give the new thread a counterbuf */ + thread->kpc_buf = kpc_counterbuf_alloc(); +} + +void +kpc_thread_destroy(thread_t thread) +{ + uint64_t *buf = NULL; + + /* usual case: no kpc buf, just return */ + if( !thread->kpc_buf ) + return; + + /* otherwise, don't leak */ + buf = thread->kpc_buf; + thread->kpc_buf = NULL; + kpc_counterbuf_free(buf); +} + +/* ast callback on a thread */ +void +kpc_thread_ast_handler( thread_t thread ) +{ + /* see if we want an alloc */ + if( thread->t_chud & T_KPC_ALLOC ) + thread->kpc_buf = kpc_counterbuf_alloc(); +} diff --git a/osfmk/kern/ledger.c b/osfmk/kern/ledger.c index cf1a7aa02..13146ce6f 100644 --- a/osfmk/kern/ledger.c +++ b/osfmk/kern/ledger.c @@ -46,17 +46,19 @@ * Ledger entry flags. Bits in second nibble (masked by 0xF0) are used for * ledger actions (LEDGER_ACTION_BLOCK, etc). */ -#define ENTRY_ACTIVE 0x0001 /* entry is active if set */ -#define WAKE_NEEDED 0x0100 /* one or more threads are asleep */ -#define WAKE_INPROGRESS 0x0200 /* the wait queue is being processed */ -#define REFILL_SCHEDULED 0x0400 /* a refill timer has been set */ -#define REFILL_INPROGRESS 0x0800 /* the ledger is being refilled */ -#define CALLED_BACK 0x1000 /* callback has already been called */ +#define LF_ENTRY_ACTIVE 0x0001 /* entry is active if set */ +#define LF_WAKE_NEEDED 0x0100 /* one or more threads are asleep */ +#define LF_WAKE_INPROGRESS 0x0200 /* the wait queue is being processed */ +#define LF_REFILL_SCHEDULED 0x0400 /* a refill timer has been set */ +#define LF_REFILL_INPROGRESS 0x0800 /* the ledger is being refilled */ +#define LF_CALLED_BACK 0x1000 /* callback was called for balance in deficit */ +#define LF_WARNED 0x2000 /* callback was called for balance warning */ +#define LF_TRACKING_MAX 0x4000 /* track max balance over user-specfied time */ /* Determine whether a ledger entry exists and has been initialized and active */ #define ENTRY_VALID(l, e) \ (((l) != NULL) && ((e) >= 0) && ((e) < (l)->l_size) && \ - (((l)->l_entries[e].le_flags & ENTRY_ACTIVE) == ENTRY_ACTIVE)) + (((l)->l_entries[e].le_flags & LF_ENTRY_ACTIVE) == LF_ENTRY_ACTIVE)) #ifdef LEDGER_DEBUG int ledger_debug = 0; @@ -121,21 +123,34 @@ struct ledger_template { splx(s); \ } +/* + * Use 2 "tocks" to track the rolling maximum balance of a ledger entry. + */ +#define NTOCKS 2 /* * The explicit alignment is to ensure that atomic operations don't panic * on ARM. */ struct ledger_entry { - volatile uint32_t le_flags; - ledger_amount_t le_limit; - volatile ledger_amount_t le_credit __attribute__((aligned(8))); - volatile ledger_amount_t le_debit __attribute__((aligned(8))); - /* - * XXX - the following two fields can go away if we move all of - * the refill logic into process policy - */ - uint64_t le_refill_period; - uint64_t le_last_refill; + volatile uint32_t le_flags; + ledger_amount_t le_limit; + ledger_amount_t le_warn_level; + volatile ledger_amount_t le_credit __attribute__((aligned(8))); + volatile ledger_amount_t le_debit __attribute__((aligned(8))); + union { + struct { + /* + * XXX - the following two fields can go away if we move all of + * the refill logic into process policy + */ + uint64_t le_refill_period; + uint64_t le_last_refill; + } le_refill; + struct _le_peak { + uint32_t le_max; /* Lower 32-bits of observed max balance */ + uint32_t le_time; /* time when this peak was observed */ + } le_peaks[NTOCKS]; + } _le; } __attribute__((aligned(8))); struct ledger { @@ -273,7 +288,7 @@ ledger_entry_add(ledger_template_t template, const char *key, strlcpy(et->et_key, key, LEDGER_NAME_MAX); strlcpy(et->et_group, group, LEDGER_NAME_MAX); strlcpy(et->et_units, units, LEDGER_NAME_MAX); - et->et_flags = ENTRY_ACTIVE; + et->et_flags = LF_ENTRY_ACTIVE; et->et_callback = NULL; idx = template->lt_cnt++; @@ -292,8 +307,8 @@ ledger_entry_setactive(ledger_t ledger, int entry) return (KERN_INVALID_ARGUMENT); le = &ledger->l_entries[entry]; - if ((le->le_flags & ENTRY_ACTIVE) == 0) { - flag_set(&le->le_flags, ENTRY_ACTIVE); + if ((le->le_flags & LF_ENTRY_ACTIVE) == 0) { + flag_set(&le->le_flags, LF_ENTRY_ACTIVE); } return (KERN_SUCCESS); } @@ -361,17 +376,19 @@ ledger_instantiate(ledger_template_t template, int entry_type) le->le_flags = et->et_flags; /* make entry inactive by removing active bit */ if (entry_type == LEDGER_CREATE_INACTIVE_ENTRIES) - flag_clear(&le->le_flags, ENTRY_ACTIVE); + flag_clear(&le->le_flags, LF_ENTRY_ACTIVE); /* * If template has a callback, this entry is opted-in, * by default. */ if (et->et_callback != NULL) flag_set(&le->le_flags, LEDGER_ACTION_CALLBACK); - le->le_credit = 0; - le->le_debit = 0; - le->le_limit = LEDGER_LIMIT_INFINITY; - le->le_refill_period = 0; + le->le_credit = 0; + le->le_debit = 0; + le->le_limit = LEDGER_LIMIT_INFINITY; + le->le_warn_level = LEDGER_LIMIT_INFINITY; + le->_le.le_refill.le_refill_period = 0; + le->_le.le_refill.le_last_refill = 0; } template_unlock(template); @@ -436,6 +453,26 @@ ledger_dereference(ledger_t ledger) return (KERN_SUCCESS); } +/* + * Determine whether an entry has exceeded its warning level. + */ +static inline int +warn_level_exceeded(struct ledger_entry *le) +{ + ledger_amount_t balance; + + assert((le->le_credit >= 0) && (le->le_debit >= 0)); + + /* + * XXX - Currently, we only support warnings for ledgers which + * use positive limits. + */ + balance = le->le_credit - le->le_debit; + if ((le->le_warn_level != LEDGER_LIMIT_INFINITY) && (balance > le->le_warn_level)) + return (1); + return (0); +} + /* * Determine whether an entry has exceeded its limit. */ @@ -444,6 +481,8 @@ limit_exceeded(struct ledger_entry *le) { ledger_amount_t balance; + assert((le->le_credit >= 0) && (le->le_debit >= 0)); + balance = le->le_credit - le->le_debit; if ((le->le_limit <= 0) && (balance < le->le_limit)) return (1); @@ -475,10 +514,10 @@ ledger_limit_entry_wakeup(struct ledger_entry *le) uint32_t flags; if (!limit_exceeded(le)) { - flags = flag_clear(&le->le_flags, CALLED_BACK); + flags = flag_clear(&le->le_flags, LF_CALLED_BACK); - while (le->le_flags & WAKE_NEEDED) { - flag_clear(&le->le_flags, WAKE_NEEDED); + while (le->le_flags & LF_WAKE_NEEDED) { + flag_clear(&le->le_flags, LF_WAKE_NEEDED); thread_wakeup((event_t)le); } } @@ -493,20 +532,26 @@ ledger_refill(uint64_t now, ledger_t ledger, int entry) uint64_t elapsed, period, periods; struct ledger_entry *le; ledger_amount_t balance, due; - int cnt; le = &ledger->l_entries[entry]; + assert(le->le_limit != LEDGER_LIMIT_INFINITY); + /* * If another thread is handling the refill already, we're not - * needed. Just sit here for a few cycles while the other thread - * finishes updating the balance. If it takes too long, just return - * and we'll block again. + * needed. */ - if (flag_set(&le->le_flags, REFILL_INPROGRESS) & REFILL_INPROGRESS) { - cnt = 0; - while (cnt++ < 100 && (le->le_flags & REFILL_INPROGRESS)) - ; + if (flag_set(&le->le_flags, LF_REFILL_INPROGRESS) & LF_REFILL_INPROGRESS) { + return; + } + + /* + * If the timestamp we're about to use to refill is older than the + * last refill, then someone else has already refilled this ledger + * and there's nothing for us to do here. + */ + if (now <= le->_le.le_refill.le_last_refill) { + flag_clear(&le->le_flags, LF_REFILL_INPROGRESS); return; } @@ -514,10 +559,10 @@ ledger_refill(uint64_t now, ledger_t ledger, int entry) * See how many refill periods have passed since we last * did a refill. */ - period = le->le_refill_period; - elapsed = now - le->le_last_refill; + period = le->_le.le_refill.le_refill_period; + elapsed = now - le->_le.le_refill.le_last_refill; if ((period == 0) || (elapsed < period)) { - flag_clear(&le->le_flags, REFILL_INPROGRESS); + flag_clear(&le->le_flags, LF_REFILL_INPROGRESS); return; } @@ -536,43 +581,106 @@ ledger_refill(uint64_t now, ledger_t ledger, int entry) * how long. */ if (elapsed > 0) - periods = (now - le->le_last_refill) / period; + periods = (now - le->_le.le_refill.le_last_refill) / period; balance = le->le_credit - le->le_debit; due = periods * le->le_limit; if (balance - due < 0) due = balance; + + assert(due >= 0); + OSAddAtomic64(due, &le->le_debit); + assert(le->le_debit >= 0); + /* * If we've completely refilled the pool, set the refill time to now. * Otherwise set it to the time at which it last should have been * fully refilled. */ if (balance == due) - le->le_last_refill = now; + le->_le.le_refill.le_last_refill = now; else - le->le_last_refill += (le->le_refill_period * periods); + le->_le.le_refill.le_last_refill += (le->_le.le_refill.le_refill_period * periods); - flag_clear(&le->le_flags, REFILL_INPROGRESS); + flag_clear(&le->le_flags, LF_REFILL_INPROGRESS); lprintf(("Refill %lld %lld->%lld\n", periods, balance, balance - due)); if (!limit_exceeded(le)) ledger_limit_entry_wakeup(le); } +/* + * In tenths of a second, the length of one lookback period (a "tock") for + * ledger rolling maximum calculations. The effective lookback window will be this times + * NTOCKS. + * + * Use a tock length of 2.5 seconds to get a total lookback period of 5 seconds. + * + * XXX Could make this caller-definable, at the point that rolling max tracking + * is enabled for the entry. + */ +#define TOCKLEN 25 + +/* + * How many sched_tick's are there in one tock (one of our lookback periods)? + * + * X sched_ticks 2.5 sec N sched_ticks + * --------------- = ---------- * ------------- + * tock tock sec + * + * where N sched_ticks/sec is calculated via 1 << SCHED_TICK_SHIFT (see sched_prim.h) + * + * This should give us 20 sched_tick's in one 2.5 second-long tock. + */ +#define SCHED_TICKS_PER_TOCK ((TOCKLEN * (1 << SCHED_TICK_SHIFT)) / 10) + +/* + * Rolling max timestamps use their own unit (let's call this a "tock"). One tock is the + * length of one lookback period that we use for our rolling max calculation. + * + * Calculate the current time in tocks from sched_tick (which runs at a some + * fixed rate). + */ +#define CURRENT_TOCKSTAMP() (sched_tick / SCHED_TICKS_PER_TOCK) + +/* + * Does the given tockstamp fall in either the current or the previous tocks? + */ +#define TOCKSTAMP_IS_STALE(now, tock) ((((now) - (tock)) < NTOCKS) ? FALSE : TRUE) + static void ledger_check_new_balance(ledger_t ledger, int entry) { struct ledger_entry *le; - uint64_t now; le = &ledger->l_entries[entry]; + if (le->le_flags & LF_TRACKING_MAX) { + ledger_amount_t balance = le->le_credit - le->le_debit; + uint32_t now = CURRENT_TOCKSTAMP(); + struct _le_peak *p = &le->_le.le_peaks[now % NTOCKS]; + + if (!TOCKSTAMP_IS_STALE(now, p->le_time) || (balance > p->le_max)) { + /* + * The current balance is greater than the previously + * observed peak for the current time block, *or* we + * haven't yet recorded a peak for the current time block -- + * so this is our new peak. + * + * (We only track the lower 32-bits of a balance for rolling + * max purposes.) + */ + p->le_max = (uint32_t)balance; + p->le_time = now; + } + } + /* Check to see whether we're due a refill */ - if (le->le_refill_period) { - now = mach_absolute_time(); - if ((now - le->le_last_refill) > le->le_refill_period) + if (le->le_flags & LF_REFILL_SCHEDULED) { + uint64_t now = mach_absolute_time(); + if ((now - le->_le.le_refill.le_last_refill) > le->_le.le_refill.le_refill_period) ledger_refill(now, ledger, entry); } @@ -588,18 +696,55 @@ ledger_check_new_balance(ledger_t ledger, int entry) * again until it gets rearmed. */ if ((le->le_flags & LEDGER_ACTION_BLOCK) || - (!(le->le_flags & CALLED_BACK) && + (!(le->le_flags & LF_CALLED_BACK) && entry_get_callback(ledger, entry))) { set_astledger(current_thread()); } } else { /* - * The balance on the account is below the limit. If - * there are any threads blocked on this entry, now would + * The balance on the account is below the limit. + * + * If there are any threads blocked on this entry, now would * be a good time to wake them up. */ - if (le->le_flags & WAKE_NEEDED) + if (le->le_flags & LF_WAKE_NEEDED) ledger_limit_entry_wakeup(le); + + if (le->le_flags & LEDGER_ACTION_CALLBACK) { + /* + * Client has requested that a callback be invoked whenever + * the ledger's balance crosses into or out of the warning + * level. + */ + if (warn_level_exceeded(le)) { + /* + * This ledger's balance is above the warning level. + */ + if ((le->le_flags & LF_WARNED) == 0) { + /* + * If we are above the warning level and + * have not yet invoked the callback, + * set the AST so it can be done before returning + * to userland. + */ + set_astledger(current_thread()); + } + } else { + /* + * This ledger's balance is below the warning level. + */ + if (le->le_flags & LF_WARNED) { + /* + * If we are below the warning level and + * the LF_WARNED flag is still set, we need + * to invoke the callback to let the client + * know the ledger balance is now back below + * the warning level. + */ + set_astledger(current_thread()); + } + } + } } } @@ -628,32 +773,156 @@ ledger_credit(ledger_t ledger, int entry, ledger_amount_t amount) return (KERN_SUCCESS); } +/* + * Zero the balance of a ledger by adding to its credit or debit, whichever is smaller. + * Note that some clients of ledgers (notably, task wakeup statistics) require that + * le_credit only ever increase as a function of ledger_credit(). + */ +kern_return_t +ledger_zero_balance(ledger_t ledger, int entry) +{ + struct ledger_entry *le; + + if (!ENTRY_VALID(ledger, entry)) + return (KERN_INVALID_VALUE); + + le = &ledger->l_entries[entry]; + +top: + if (le->le_credit > le->le_debit) { + if (!OSCompareAndSwap64(le->le_debit, le->le_credit, &le->le_debit)) + goto top; + lprintf(("%p zeroed %lld->%lld\n", current_thread(), le->le_debit, le->le_credit)); + } else if (le->le_credit < le->le_debit) { + if (!OSCompareAndSwap64(le->le_credit, le->le_debit, &le->le_credit)) + goto top; + lprintf(("%p zeroed %lld->%lld\n", current_thread(), le->le_credit, le->le_debit)); + } + + return (KERN_SUCCESS); +} + +kern_return_t +ledger_get_limit(ledger_t ledger, int entry, ledger_amount_t *limit) +{ + struct ledger_entry *le; + + if (!ENTRY_VALID(ledger, entry)) + return (KERN_INVALID_VALUE); + + le = &ledger->l_entries[entry]; + *limit = le->le_limit; + + lprintf(("ledger_get_limit: %lld\n", *limit)); + + return (KERN_SUCCESS); +} /* * Adjust the limit of a limited resource. This does not affect the * current balance, so the change doesn't affect the thread until the * next refill. + * + * warn_level: If non-zero, causes the callback to be invoked when + * the balance exceeds this level. Specified as a percentage [of the limit]. */ kern_return_t -ledger_set_limit(ledger_t ledger, int entry, ledger_amount_t limit) +ledger_set_limit(ledger_t ledger, int entry, ledger_amount_t limit, + uint8_t warn_level_percentage) { struct ledger_entry *le; if (!ENTRY_VALID(ledger, entry)) return (KERN_INVALID_VALUE); - lprintf(("ledger_set_limit: %x\n", (uint32_t)limit)); + lprintf(("ledger_set_limit: %lld\n", limit)); le = &ledger->l_entries[entry]; + + if (limit == LEDGER_LIMIT_INFINITY) { + /* + * Caller wishes to disable the limit. This will implicitly + * disable automatic refill, as refills implicitly depend + * on the limit. + */ + ledger_disable_refill(ledger, entry); + } + le->le_limit = limit; - le->le_last_refill = 0; - flag_clear(&le->le_flags, CALLED_BACK); + le->_le.le_refill.le_last_refill = 0; + flag_clear(&le->le_flags, LF_CALLED_BACK); + flag_clear(&le->le_flags, LF_WARNED); ledger_limit_entry_wakeup(le); + if (warn_level_percentage != 0) { + assert(warn_level_percentage <= 100); + assert(limit > 0); /* no negative limit support for warnings */ + assert(limit != LEDGER_LIMIT_INFINITY); /* warn % without limit makes no sense */ + le->le_warn_level = (le->le_limit * warn_level_percentage) / 100; + } else { + le->le_warn_level = LEDGER_LIMIT_INFINITY; + } + + return (KERN_SUCCESS); +} + +kern_return_t +ledger_get_maximum(ledger_t ledger, int entry, + ledger_amount_t *max_observed_balance) +{ + struct ledger_entry *le; + uint32_t now = CURRENT_TOCKSTAMP(); + int i; + + le = &ledger->l_entries[entry]; + + if (!ENTRY_VALID(ledger, entry) || !(le->le_flags & LF_TRACKING_MAX)) { + return (KERN_INVALID_VALUE); + } + + /* + * Start with the current balance; if neither of the recorded peaks are + * within recent history, we use this. + */ + *max_observed_balance = le->le_credit - le->le_debit; + + for (i = 0; i < NTOCKS; i++) { + if (!TOCKSTAMP_IS_STALE(now, le->_le.le_peaks[i].le_time) && + (le->_le.le_peaks[i].le_max > *max_observed_balance)) { + /* + * The peak for this time block isn't stale, and it + * is greater than the current balance -- so use it. + */ + *max_observed_balance = le->_le.le_peaks[i].le_max; + } + } + + lprintf(("ledger_get_maximum: %lld\n", *max_observed_balance)); + + return (KERN_SUCCESS); +} + +/* + * Enable tracking of periodic maximums for this ledger entry. + */ +kern_return_t +ledger_track_maximum(ledger_template_t template, int entry, + __unused int period_in_secs) +{ + template_lock(template); + + if ((entry < 0) || (entry >= template->lt_cnt)) { + template_unlock(template); + return (KERN_INVALID_VALUE); + } + + template->lt_entries[entry].et_flags |= LF_TRACKING_MAX; + template_unlock(template); + return (KERN_SUCCESS); } /* - * Add a callback to be executed when the resource goes into deficit + * Add a callback to be executed when the resource goes into deficit. */ kern_return_t ledger_set_callback(ledger_template_t template, int entry, @@ -698,21 +967,52 @@ ledger_disable_callback(ledger_t ledger, int entry) if (!ENTRY_VALID(ledger, entry)) return (KERN_INVALID_VALUE); + /* + * le_warn_level is used to indicate *if* this ledger has a warning configured, + * in addition to what that warning level is set to. + * This means a side-effect of ledger_disable_callback() is that the + * warning level is forgotten. + */ + ledger->l_entries[entry].le_warn_level = LEDGER_LIMIT_INFINITY; flag_clear(&ledger->l_entries[entry].le_flags, LEDGER_ACTION_CALLBACK); return (KERN_SUCCESS); } /* - * Clear the called_back flag, indicating that we want to be notified - * again when the limit is next exceeded. + * Enable callback notification for a specific ledger entry. + * + * This is only needed if ledger_disable_callback() has previously + * been invoked against an entry; there must already be a callback + * configured. */ kern_return_t -ledger_reset_callback(ledger_t ledger, int entry) +ledger_enable_callback(ledger_t ledger, int entry) { if (!ENTRY_VALID(ledger, entry)) return (KERN_INVALID_VALUE); - flag_clear(&ledger->l_entries[entry].le_flags, CALLED_BACK); + assert(entry_get_callback(ledger, entry) != NULL); + + flag_set(&ledger->l_entries[entry].le_flags, LEDGER_ACTION_CALLBACK); + return (KERN_SUCCESS); +} + +/* + * Query the automatic refill period for this ledger entry. + * + * A period of 0 means this entry has none configured. + */ +kern_return_t +ledger_get_period(ledger_t ledger, int entry, uint64_t *period) +{ + struct ledger_entry *le; + + if (!ENTRY_VALID(ledger, entry)) + return (KERN_INVALID_VALUE); + + le = &ledger->l_entries[entry]; + *period = abstime_to_nsecs(le->_le.le_refill.le_refill_period); + lprintf(("ledger_get_period: %llx\n", *period)); return (KERN_SUCCESS); } @@ -729,15 +1029,69 @@ ledger_set_period(ledger_t ledger, int entry, uint64_t period) return (KERN_INVALID_VALUE); le = &ledger->l_entries[entry]; - le->le_refill_period = nsecs_to_abstime(period); + + /* + * A refill period refills the ledger in multiples of the limit, + * so if you haven't set one yet, you need a lesson on ledgers. + */ + assert(le->le_limit != LEDGER_LIMIT_INFINITY); + + if (le->le_flags & LF_TRACKING_MAX) { + /* + * Refill is incompatible with rolling max tracking. + */ + return (KERN_INVALID_VALUE); + } + + le->_le.le_refill.le_refill_period = nsecs_to_abstime(period); + + /* + * Set the 'starting time' for the next refill to now. Since + * we're resetting the balance to zero here, we consider this + * moment the starting time for accumulating a balance that + * counts towards the limit. + */ + le->_le.le_refill.le_last_refill = mach_absolute_time(); + ledger_zero_balance(ledger, entry); + + flag_set(&le->le_flags, LF_REFILL_SCHEDULED); return (KERN_SUCCESS); } +/* + * Disable automatic refill. + */ +kern_return_t +ledger_disable_refill(ledger_t ledger, int entry) +{ + struct ledger_entry *le; + + if (!ENTRY_VALID(ledger, entry)) + return (KERN_INVALID_VALUE); + + le = &ledger->l_entries[entry]; + + flag_clear(&le->le_flags, LF_REFILL_SCHEDULED); + + return (KERN_SUCCESS); +} + +kern_return_t +ledger_get_actions(ledger_t ledger, int entry, int *actions) +{ + if (!ENTRY_VALID(ledger, entry)) + return (KERN_INVALID_VALUE); + + *actions = ledger->l_entries[entry].le_flags & LEDGER_ACTION_MASK; + lprintf(("ledger_get_actions: %#x\n", *actions)); + return (KERN_SUCCESS); +} + kern_return_t ledger_set_action(ledger_t ledger, int entry, int action) { - lprintf(("ledger_set_action: %d\n", action)); + lprintf(("ledger_set_action: %#x\n", action)); if (!ENTRY_VALID(ledger, entry)) return (KERN_INVALID_VALUE); @@ -794,10 +1148,14 @@ ledger_debit(ledger_t ledger, int entry, ledger_amount_t amount) void ledger_ast(thread_t thread) { - struct ledger *l = thread->t_ledger; - struct ledger *thl = thread->t_threadledger; - uint32_t block; - uint64_t now; + struct ledger *l = thread->t_ledger; + struct ledger *thl; + uint32_t block; + uint64_t now; + uint8_t task_flags; + uint8_t task_percentage; + uint64_t task_interval; + kern_return_t ret; task_t task = thread->task; @@ -807,26 +1165,46 @@ ledger_ast(thread_t thread) ASSERT(thread == current_thread()); top: + /* + * Take a self-consistent snapshot of the CPU usage monitor parameters. The task + * can change them at any point (with the task locked). + */ + task_lock(task); + task_flags = task->rusage_cpu_flags; + task_percentage = task->rusage_cpu_perthr_percentage; + task_interval = task->rusage_cpu_perthr_interval; + task_unlock(task); + /* * Make sure this thread is up to date with regards to any task-wide per-thread - * CPU limit. + * CPU limit, but only if it doesn't have a thread-private blocking CPU limit. */ - if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) && - ((thread->options & TH_OPT_PROC_CPULIMIT) == 0) ) { + if (((task_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) != 0) && + ((thread->options & TH_OPT_PRVT_CPULIMIT) == 0)) { + uint8_t percentage; + uint64_t interval; + int action; + + thread_get_cpulimit(&action, &percentage, &interval); + /* - * Task has a per-thread CPU limit on it, and this thread - * needs it applied. + * If the thread's CPU limits no longer match the task's, or the + * task has a limit but the thread doesn't, update the limit. */ - thread_set_cpulimit(THREAD_CPULIMIT_EXCEPTION, task->rusage_cpu_perthr_percentage, - task->rusage_cpu_perthr_interval); - assert((thread->options & TH_OPT_PROC_CPULIMIT) != 0); - } else if (((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) == 0) && - (thread->options & TH_OPT_PROC_CPULIMIT)) { + if (((thread->options & TH_OPT_PROC_CPULIMIT) == 0) || + (interval != task_interval) || (percentage != task_percentage)) { + thread_set_cpulimit(THREAD_CPULIMIT_EXCEPTION, task_percentage, task_interval); + assert((thread->options & TH_OPT_PROC_CPULIMIT) != 0); + } + } else if (((task_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) == 0) && + (thread->options & TH_OPT_PROC_CPULIMIT)) { + assert((thread->options & TH_OPT_PRVT_CPULIMIT) == 0); + /* * Task no longer has a per-thread CPU limit; remove this thread's * corresponding CPU limit. */ - thread_set_cpulimit(THREAD_CPULIMIT_EXCEPTION, 0, 0); + thread_set_cpulimit(THREAD_CPULIMIT_DISABLE, 0, 0); assert((thread->options & TH_OPT_PROC_CPULIMIT) == 0); } @@ -844,6 +1222,11 @@ top: block = 0; now = mach_absolute_time(); + /* + * Note that thread->t_threadledger may have been changed by the + * thread_set_cpulimit() call above - so don't examine it until afterwards. + */ + thl = thread->t_threadledger; if (LEDGER_VALID(thl)) { block |= ledger_check_needblock(thl, now); } @@ -878,12 +1261,39 @@ ledger_check_needblock(ledger_t l, uint64_t now) for (i = 0; i < l->l_size; i++) { le = &l->l_entries[i]; - if (limit_exceeded(le) == FALSE) + + lc = entry_get_callback(l, i); + + if (limit_exceeded(le) == FALSE) { + if (le->le_flags & LEDGER_ACTION_CALLBACK) { + /* + * If needed, invoke the callback as a warning. + * This needs to happen both when the balance rises above + * the warning level, and also when it dips back below it. + */ + assert(lc != NULL); + /* + * See comments for matching logic in ledger_check_new_balance(). + */ + if (warn_level_exceeded(le)) { + flags = flag_set(&le->le_flags, LF_WARNED); + if ((flags & LF_WARNED) == 0) { + lc->lc_func(LEDGER_WARNING_ROSE_ABOVE, lc->lc_param0, lc->lc_param1); + } + } else { + flags = flag_clear(&le->le_flags, LF_WARNED); + if (flags & LF_WARNED) { + lc->lc_func(LEDGER_WARNING_DIPPED_BELOW, lc->lc_param0, lc->lc_param1); + } + } + } + continue; + } - /* Check for refill eligibility */ - if (le->le_refill_period) { - if ((le->le_last_refill + le->le_refill_period) > now) { + /* We're over the limit, so refill if we are eligible and past due. */ + if (le->le_flags & LF_REFILL_SCHEDULED) { + if ((le->_le.le_refill.le_last_refill + le->_le.le_refill.le_refill_period) > now) { ledger_refill(now, l, i); if (limit_exceeded(le) == FALSE) continue; @@ -894,13 +1304,17 @@ ledger_check_needblock(ledger_t l, uint64_t now) block = 1; if ((le->le_flags & LEDGER_ACTION_CALLBACK) == 0) continue; - lc = entry_get_callback(l, i); + + /* + * If the LEDGER_ACTION_CALLBACK flag is on, we expect there to + * be a registered callback. + */ assert(lc != NULL); - flags = flag_set(&le->le_flags, CALLED_BACK); + flags = flag_set(&le->le_flags, LF_CALLED_BACK); /* Callback has already been called */ - if (flags & CALLED_BACK) + if (flags & LF_CALLED_BACK) continue; - lc->lc_func(lc->lc_param0, lc->lc_param1); + lc->lc_func(FALSE, lc->lc_param0, lc->lc_param1); } return(block); } @@ -922,12 +1336,12 @@ ledger_perform_blocking(ledger_t l) /* Prepare to sleep until the resource is refilled */ ret = assert_wait_deadline(le, TRUE, - le->le_last_refill + le->le_refill_period); + le->_le.le_refill.le_last_refill + le->_le.le_refill.le_refill_period); if (ret != THREAD_WAITING) return(KERN_SUCCESS); /* Mark that somebody is waiting on this entry */ - flag_set(&le->le_flags, WAKE_NEEDED); + flag_set(&le->le_flags, LF_WAKE_NEEDED); ret = thread_block_reason(THREAD_CONTINUE_NULL, NULL, AST_LEDGER); @@ -963,6 +1377,23 @@ ledger_get_entries(ledger_t ledger, int entry, ledger_amount_t *credit, return (KERN_SUCCESS); } +kern_return_t +ledger_get_balance(ledger_t ledger, int entry, ledger_amount_t *balance) +{ + struct ledger_entry *le; + + if (!ENTRY_VALID(ledger, entry)) + return (KERN_INVALID_ARGUMENT); + + le = &ledger->l_entries[entry]; + + assert((le->le_credit >= 0) && (le->le_debit >= 0)); + + *balance = le->le_credit - le->le_debit; + + return (KERN_SUCCESS); +} + int ledger_template_info(void **buf, int *len) { @@ -1002,8 +1433,27 @@ ledger_template_info(void **buf, int *len) return (0); } +static void +ledger_fill_entry_info(struct ledger_entry *le, + struct ledger_entry_info *lei, + uint64_t now) +{ + assert(le != NULL); + assert(lei != NULL); + + memset(lei, 0, sizeof (*lei)); + + lei->lei_limit = le->le_limit; + lei->lei_credit = le->le_credit; + lei->lei_debit = le->le_debit; + lei->lei_balance = lei->lei_credit - lei->lei_debit; + lei->lei_refill_period = (le->le_flags & LF_REFILL_SCHEDULED) ? + abstime_to_nsecs(le->_le.le_refill.le_refill_period) : 0; + lei->lei_last_refill = abstime_to_nsecs(now - le->_le.le_refill.le_last_refill); +} + int -ledger_entry_info(task_t task, void **buf, int *len) +ledger_get_task_entry_info_multiple(task_t task, void **buf, int *len) { struct ledger_entry_info *lei; struct ledger_entry *le; @@ -1024,15 +1474,7 @@ ledger_entry_info(task_t task, void **buf, int *len) le = l->l_entries; for (i = 0; i < *len; i++) { - memset(lei, 0, sizeof (*lei)); - lei->lei_limit = le->le_limit; - lei->lei_credit = le->le_credit; - lei->lei_debit = le->le_debit; - lei->lei_balance = lei->lei_credit - lei->lei_debit; - lei->lei_refill_period = - abstime_to_nsecs(le->le_refill_period); - lei->lei_last_refill = - abstime_to_nsecs(now - le->le_last_refill); + ledger_fill_entry_info(le, lei, now); le++; lei++; } @@ -1040,6 +1482,22 @@ ledger_entry_info(task_t task, void **buf, int *len) return (0); } +void +ledger_get_entry_info(ledger_t ledger, + int entry, + struct ledger_entry_info *lei) +{ + uint64_t now = mach_absolute_time(); + + assert(ledger != NULL); + assert(lei != NULL); + assert(entry < ledger->l_size); + + struct ledger_entry *le = &ledger->l_entries[entry]; + + ledger_fill_entry_info(le, lei, now); +} + int ledger_info(task_t task, struct ledger_info *info) { diff --git a/osfmk/kern/ledger.h b/osfmk/kern/ledger.h index 982781686..17dcf329a 100644 --- a/osfmk/kern/ledger.h +++ b/osfmk/kern/ledger.h @@ -75,11 +75,16 @@ typedef struct ledger_template *ledger_template_t; /* Action to take when a ledger goes into deficit */ #define LEDGER_ACTION_IGNORE 0x0000 #define LEDGER_ACTION_BLOCK 0x0010 -#define LEDGER_ACTION_EXCEPTION 0x0020 -#define LEDGER_ACTION_CALLBACK 0x0040 +#define LEDGER_ACTION_CALLBACK 0x0020 #define LEDGER_ACTION_MASK 0x00f0 -typedef void (*ledger_callback_t)(const void * param0, const void *param1); +/* + * Types of warnings that trigger a callback. + */ +#define LEDGER_WARNING_ROSE_ABOVE 1 +#define LEDGER_WARNING_DIPPED_BELOW 2 + +typedef void (*ledger_callback_t)(int warning, const void * param0, const void *param1); extern void ledger_init(void); @@ -89,6 +94,8 @@ extern int ledger_entry_add(ledger_template_t template, const char *key, const char *group, const char *units); extern kern_return_t ledger_set_callback(ledger_template_t template, int entry, ledger_callback_t callback, const void *param0, const void *param1); +extern kern_return_t ledger_track_maximum(ledger_template_t template, int entry, + int period_in_secs); extern int ledger_key_lookup(ledger_template_t template, const char *key); /* value of entry type */ @@ -96,19 +103,30 @@ extern int ledger_key_lookup(ledger_template_t template, const char *key); #define LEDGER_CREATE_INACTIVE_ENTRIES 1 extern ledger_t ledger_instantiate(ledger_template_t template, int entry_type); extern kern_return_t ledger_disable_callback(ledger_t ledger, int entry); -extern kern_return_t ledger_reset_callback(ledger_t ledger, int entry); +extern kern_return_t ledger_enable_callback(ledger_t ledger, int entry); +extern kern_return_t ledger_get_limit(ledger_t ledger, int entry, + ledger_amount_t *limit); extern kern_return_t ledger_set_limit(ledger_t ledger, int entry, - ledger_amount_t limit); + ledger_amount_t limit, uint8_t warn_level_percentage); +extern kern_return_t ledger_get_maximum(ledger_t ledger, int entry, + ledger_amount_t *max_observed_balance); +extern kern_return_t ledger_get_actions(ledger_t ledger, int entry, int *actions); extern kern_return_t ledger_set_action(ledger_t ledger, int entry, int action); +extern kern_return_t ledger_get_period(ledger_t ledger, int entry, + uint64_t *period); extern kern_return_t ledger_set_period(ledger_t ledger, int entry, uint64_t period); +extern kern_return_t ledger_disable_refill(ledger_t l, int entry); extern kern_return_t ledger_entry_setactive(ledger_t ledger, int entry); extern kern_return_t ledger_credit(ledger_t ledger, int entry, ledger_amount_t amount); extern kern_return_t ledger_debit(ledger_t ledger, int entry, ledger_amount_t amount); +extern kern_return_t ledger_zero_balance(ledger_t ledger, int entry); extern kern_return_t ledger_get_entries(ledger_t ledger, int entry, ledger_amount_t *credit, ledger_amount_t *debit); +extern kern_return_t ledger_get_balance(ledger_t ledger, int entry, + ledger_amount_t *balance); extern void ledger_ast(thread_t thread); extern void set_astledger(thread_t thread); @@ -126,7 +144,14 @@ extern kern_return_t ledger_dereference(ledger_t ledger); extern int ledger_limit(task_t task, struct ledger_limit_args *args); #endif extern int ledger_info(task_t task, struct ledger_info *info); -extern int ledger_entry_info(task_t task, void **buf, int *len); + +extern int +ledger_get_task_entry_info_multiple(task_t task, void **buf, int *len); + +extern void +ledger_get_entry_info(ledger_t ledger, int entry, + struct ledger_entry_info *lei); + extern int ledger_template_info(void **buf, int *len); #endif /* KERNEL_PRIVATE */ diff --git a/osfmk/kern/locks.c b/osfmk/kern/locks.c index 7ee5a2f4e..91e60ec74 100644 --- a/osfmk/kern/locks.c +++ b/osfmk/kern/locks.c @@ -609,7 +609,10 @@ lck_mtx_lock_wait ( KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE, holder->sched_pri, priority, holder, lck, 0); - + /* This can potentially elevate the holder into the realtime + * priority band; the implementation in locks_i386.c enforces a + * MAXPRI_KERNEL ceiling. + */ set_sched_pri(holder, priority); } thread_unlock(holder); @@ -699,6 +702,15 @@ lck_mtx_lock_acquire( else mutex->lck_mtx_pri = 0; +#if CONFIG_DTRACE + if (lockstat_probemap[LS_LCK_MTX_LOCK_ACQUIRE] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_ACQUIRE]) { + if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) { + LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lck, 0); + } else { + LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, lck, 0); + } + } +#endif return (mutex->lck_mtx_waiters); } @@ -921,6 +933,92 @@ lck_rw_sleep_deadline( return res; } +/* + * Reader-writer lock promotion + * + * We support a limited form of reader-writer + * lock promotion whose effects are: + * + * * Qualifying threads have decay disabled + * * Scheduler priority is reset to a floor of + * of their statically assigned priority + * or BASEPRI_BACKGROUND + * + * The rationale is that lck_rw_ts do not have + * a single owner, so we cannot apply a directed + * priority boost from all waiting threads + * to all holding threads without maintaining + * lists of all shared owners and all waiting + * threads for every lock. + * + * Instead (and to preserve the uncontended fast- + * path), acquiring (or attempting to acquire) + * a RW lock in shared or exclusive lock increments + * a per-thread counter. Only if that thread stops + * making forward progress (for instance blocking + * on a mutex, or being preempted) do we consult + * the counter and apply the priority floor. + * When the thread becomes runnable again (or in + * the case of preemption it never stopped being + * runnable), it has the priority boost and should + * be in a good position to run on the CPU and + * release all RW locks (at which point the priority + * boost is cleared). + * + * Care must be taken to ensure that priority + * boosts are not retained indefinitely, since unlike + * mutex priority boosts (where the boost is tied + * to the mutex lifecycle), the boost is tied + * to the thread and independent of any particular + * lck_rw_t. Assertions are in place on return + * to userspace so that the boost is not held + * indefinitely. + * + * The routines that increment/decrement the + * per-thread counter should err on the side of + * incrementing any time a preemption is possible + * and the lock would be visible to the rest of the + * system as held (so it should be incremented before + * interlocks are dropped/preemption is enabled, or + * before a CAS is executed to acquire the lock). + * + */ + +/* + * lck_rw_clear_promotion: Undo priority promotions when the last RW + * lock is released by a thread (if a promotion was active) + */ +void lck_rw_clear_promotion(thread_t thread) +{ + assert(thread->rwlock_count == 0); + + /* Cancel any promotions if the thread had actually blocked while holding a RW lock */ + spl_t s = splsched(); + + thread_lock(thread); + + if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) { + thread->sched_flags &= ~TH_SFLAG_RW_PROMOTED; + + if (thread->sched_flags & TH_SFLAG_PROMOTED) { + /* Thread still has a mutex promotion */ + } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) { + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE, + thread->sched_pri, DEPRESSPRI, 0, 0, 0); + + set_sched_pri(thread, DEPRESSPRI); + } else { + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE, + thread->sched_pri, thread->priority, 0, 0, 0); + + SCHED(compute_priority)(thread, FALSE); + } + } + + thread_unlock(thread); + splx(s); +} + kern_return_t host_lockgroup_info( host_t host, diff --git a/osfmk/kern/locks.h b/osfmk/kern/locks.h index 659336194..dbf40e76b 100644 --- a/osfmk/kern/locks.h +++ b/osfmk/kern/locks.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -40,7 +40,7 @@ #include extern void lck_mod_init( - void) __attribute__((section("__TEXT, initcode"))); + void); typedef unsigned int lck_type_t; @@ -286,12 +286,8 @@ extern void lck_mtx_init( extern void lck_mtx_lock( lck_mtx_t *lck); -#if defined(__i386__) -extern void lck_mtx_unlock(lck_mtx_t *lck) __DARWIN10_ALIAS(lck_mtx_unlock); -#else extern void lck_mtx_unlock( lck_mtx_t *lck); -#endif /* __i386__ */ extern void lck_mtx_destroy( lck_mtx_t *lck, @@ -328,6 +324,9 @@ extern void lck_mtx_yield ( extern boolean_t lck_mtx_try_lock_spin( lck_mtx_t *lck); +extern boolean_t lck_mtx_try_lock_spin_always( + lck_mtx_t *lck); + extern void lck_mtx_lock_spin_always( lck_mtx_t *lck); @@ -342,6 +341,7 @@ extern void lck_mtx_convert_spin( #else #define lck_mtx_try_lock_spin(l) lck_mtx_try_lock(l) #define lck_mtx_lock_spin(l) lck_mtx_lock(l) +#define lck_mtx_try_lock_spin_always(l) lck_spin_try_lock(l) #define lck_mtx_lock_spin_always(l) lck_spin_lock(l) #define lck_mtx_unlock_always(l) lck_spin_unlock(l) #define lck_mtx_convert_spin(l) do {} while (0) @@ -387,7 +387,8 @@ typedef unsigned int lck_rw_type_t; #ifdef XNU_KERNEL_PRIVATE #define LCK_RW_ASSERT_SHARED 0x01 #define LCK_RW_ASSERT_EXCLUSIVE 0x02 -#define LCK_RW_ASSERT_HELD (LCK_RW_ASSERT_SHARED | LCK_RW_ASSERT_EXCLUSIVE) +#define LCK_RW_ASSERT_HELD 0x03 +#define LCK_RW_ASSERT_NOTHELD 0x04 #endif __BEGIN_DECLS @@ -430,6 +431,9 @@ extern void lck_rw_unlock_exclusive( extern void lck_rw_assert( lck_rw_t *lck, unsigned int type); + +extern void lck_rw_clear_promotion( + thread_t thread); #endif #ifdef KERNEL_PRIVATE diff --git a/osfmk/kern/machine.h b/osfmk/kern/machine.h index 6d46f2c6e..fcdd78f20 100644 --- a/osfmk/kern/machine.h +++ b/osfmk/kern/machine.h @@ -109,7 +109,7 @@ extern char *machine_boot_info( * levels of return pc information. */ extern void machine_callstack( - natural_t *buf, + uintptr_t *buf, vm_size_t callstack_max); extern void consider_machine_collect(void); diff --git a/osfmk/kern/mk_sp.c b/osfmk/kern/mk_sp.c index 83ec87bb1..1d1a6a5e4 100644 --- a/osfmk/kern/mk_sp.c +++ b/osfmk/kern/mk_sp.c @@ -94,15 +94,23 @@ thread_policy_common( if (policy == POLICY_TIMESHARE && !oldmode) { thread->sched_mode = TH_MODE_TIMESHARE; - if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) + if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) { sched_share_incr(); + + if (thread->max_priority <= MAXPRI_THROTTLE) + sched_background_incr(); + } } else if (policy != POLICY_TIMESHARE && oldmode) { thread->sched_mode = TH_MODE_FIXED; - if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) + if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) { + if (thread->max_priority <= MAXPRI_THROTTLE) + sched_background_decr(); + sched_share_decr(); + } } } else { @@ -131,21 +139,8 @@ thread_policy_common( if (priority < MINPRI) priority = MINPRI; -#if CONFIG_EMBEDDED - if ((thread->task->ext_appliedstate.apptype == PROC_POLICY_IOS_APPLE_DAEMON) && - (thread->appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL)) { - thread->saved_importance = priority - thread->task_priority; - priority = MAXPRI_THROTTLE; - } else { - thread->importance = priority - thread->task_priority; - } - /* No one can have a base priority less than MAXPRI_THROTTLE */ - if (priority < MAXPRI_THROTTLE) - priority = MAXPRI_THROTTLE; -#else /* CONFIG_EMBEDDED */ thread->importance = priority - thread->task_priority; -#endif /* CONFIG_EMBEDDED */ set_priority(thread, priority); } diff --git a/osfmk/kern/mk_timer.c b/osfmk/kern/mk_timer.c index f0dd81cc9..fdb6174dd 100644 --- a/osfmk/kern/mk_timer.c +++ b/osfmk/kern/mk_timer.c @@ -244,12 +244,12 @@ kern_return_t mk_timer_arm_trap( struct mk_timer_arm_trap_args *args) { - mach_port_name_t name = args->name; + mach_port_name_t name = args->name; uint64_t expire_time = args->expire_time; mk_timer_t timer; ipc_space_t myspace = current_space(); ipc_port_t port; - kern_return_t result; + kern_return_t result; result = ipc_port_translate_receive(myspace, name, &port); if (result != KERN_SUCCESS) @@ -258,6 +258,7 @@ mk_timer_arm_trap( if (ip_kotype(port) == IKOT_TIMER) { timer = (mk_timer_t)port->ip_kobject; assert(timer != NULL); + simple_lock(&timer->lock); assert(timer->port == port); ip_unlock(port); @@ -265,8 +266,15 @@ mk_timer_arm_trap( if (!timer->is_dead) { timer->is_armed = TRUE; - if (!thread_call_enter_delayed(&timer->call_entry, expire_time)) - timer->active++; + if (expire_time > mach_absolute_time()) { + if (!thread_call_enter_delayed_with_leeway(&timer->call_entry, NULL, + expire_time, 0, THREAD_CALL_DELAY_USER_NORMAL)) + timer->active++; + } + else { + if (!thread_call_enter1(&timer->call_entry, NULL)) + timer->active++; + } } simple_unlock(&timer->lock); diff --git a/osfmk/kern/mk_timer.h b/osfmk/kern/mk_timer.h index adcad2133..56904a9b3 100644 --- a/osfmk/kern/mk_timer.h +++ b/osfmk/kern/mk_timer.h @@ -56,7 +56,7 @@ typedef struct mk_timer *mk_timer_t, mk_timer_data_t; void mk_timer_port_destroy( ipc_port_t port); -void mk_timer_init(void) __attribute__((section("__TEXT, initcode"))); +void mk_timer_init(void); #endif /* MACH_KERNEL_PRIVATE */ diff --git a/osfmk/kern/page_decrypt.h b/osfmk/kern/page_decrypt.h index ad81beef3..ebad3cddf 100644 --- a/osfmk/kern/page_decrypt.h +++ b/osfmk/kern/page_decrypt.h @@ -31,6 +31,8 @@ #ifndef _KERN_PAGE_DECRYPT_H #define _KERN_PAGE_DECRYPT_H +#include + /* * Interface for DSMOS */ @@ -54,6 +56,13 @@ struct pager_crypt_info { }; typedef struct pager_crypt_info pager_crypt_info_t; +struct crypt_file_data { + char *filename; + cpu_type_t cputype; + cpu_subtype_t cpusubtype; +}; +typedef struct crypt_file_data crypt_file_data_t; + typedef int (*text_crypter_create_hook_t)(struct pager_crypt_info *crypt_info, const char *id, void *crypt_data); extern void text_crypter_create_hook_set(text_crypter_create_hook_t hook); diff --git a/osfmk/kern/printf.c b/osfmk/kern/printf.c index 88813d844..8772a0251 100644 --- a/osfmk/kern/printf.c +++ b/osfmk/kern/printf.c @@ -844,7 +844,6 @@ kdb_printf_unbuffered(const char *fmt, ...) return 0; } -#if !CONFIG_EMBEDDED static void copybyte(int c, void *arg) @@ -877,4 +876,3 @@ sprintf(char *buf, const char *fmt, ...) *copybyte_str = '\0'; return (int)strlen(buf); } -#endif /* !CONFIG_EMBEDDED */ diff --git a/osfmk/kern/priority.c b/osfmk/kern/priority.c index a7fa6ea44..d3de06bba 100644 --- a/osfmk/kern/priority.c +++ b/osfmk/kern/priority.c @@ -92,6 +92,7 @@ thread_quantum_expire( processor_t processor = p0; thread_t thread = p1; ast_t preempt; + uint64_t ctime; SCHED_STATS_QUANTUM_TIMER_EXPIRATION(processor); @@ -117,7 +118,7 @@ thread_quantum_expire( * Check for fail-safe trip. */ if ((thread->sched_mode == TH_MODE_REALTIME || thread->sched_mode == TH_MODE_FIXED) && - !(thread->sched_flags & TH_SFLAG_PROMOTED) && + !(thread->sched_flags & TH_SFLAG_PROMOTED_MASK) && !(thread->options & TH_OPT_SYSTEM_CRITICAL)) { uint64_t new_computation; @@ -184,9 +185,10 @@ thread_quantum_expire( PROCESSOR_DATA(processor, thread_timer)); } - processor->quantum_end = mach_absolute_time() + thread->current_quantum; + ctime = mach_absolute_time(); + processor->quantum_end = ctime + thread->current_quantum; timer_call_enter1(&processor->quantum_timer, thread, - processor->quantum_end, TIMER_CALL_CRITICAL); + processor->quantum_end, TIMER_CALL_SYS_CRITICAL); /* * Context switch check. @@ -205,6 +207,10 @@ thread_quantum_expire( } thread_unlock(thread); + +#if defined(CONFIG_SCHED_TRADITIONAL) + sched_traditional_consider_maintenance(ctime); +#endif /* CONFIG_SCHED_TRADITIONAL */ } #if defined(CONFIG_SCHED_TRADITIONAL) @@ -241,7 +247,7 @@ lightweight_update_priority(thread_t thread) * the thread has not been promoted * and is not depressed. */ - if ( !(thread->sched_flags & TH_SFLAG_PROMOTED) && + if ( !(thread->sched_flags & TH_SFLAG_PROMOTED_MASK) && !(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) ) compute_my_priority(thread); } @@ -272,24 +278,6 @@ static struct shift_data sched_decay_shifts[SCHED_DECAY_TICKS] = { * * Calculate the timesharing priority based upon usage and load. */ -#ifdef CONFIG_EMBEDDED - -#define do_priority_computation(thread, pri) \ - MACRO_BEGIN \ - (pri) = (thread)->priority /* start with base priority */ \ - - ((thread)->sched_usage >> (thread)->pri_shift); \ - if ((pri) < MAXPRI_THROTTLE) { \ - if ((thread)->task->max_priority > MAXPRI_THROTTLE) \ - (pri) = MAXPRI_THROTTLE; \ - else \ - if ((pri) < MINPRI_USER) \ - (pri) = MINPRI_USER; \ - } else \ - if ((pri) > MAXPRI_KERNEL) \ - (pri) = MAXPRI_KERNEL; \ - MACRO_END - -#else #define do_priority_computation(thread, pri) \ MACRO_BEGIN \ @@ -302,7 +290,6 @@ static struct shift_data sched_decay_shifts[SCHED_DECAY_TICKS] = { (pri) = MAXPRI_KERNEL; \ MACRO_END -#endif /* defined(CONFIG_SCHED_TRADITIONAL) */ #endif @@ -341,7 +328,7 @@ compute_priority( { register int priority; - if ( !(thread->sched_flags & TH_SFLAG_PROMOTED) && + if ( !(thread->sched_flags & TH_SFLAG_PROMOTED_MASK) && (!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) || override_depress ) ) { if (thread->sched_mode == TH_MODE_TIMESHARE) @@ -409,7 +396,16 @@ update_priority( ticks = sched_tick - thread->sched_stamp; assert(ticks != 0); thread->sched_stamp += ticks; - thread->pri_shift = sched_pri_shift; + if (sched_use_combined_fgbg_decay) + thread->pri_shift = sched_combined_fgbg_pri_shift; + else if (thread->max_priority <= MAXPRI_THROTTLE) + thread->pri_shift = sched_background_pri_shift; + else + thread->pri_shift = sched_pri_shift; + + /* If requested, accelerate aging of sched_usage */ + if (sched_decay_usage_age_factor > 1) + ticks *= sched_decay_usage_age_factor; /* * Gather cpu usage data. @@ -475,55 +471,12 @@ update_priority( thread->sched_flags &= ~TH_SFLAG_FAILSAFE; } -#if CONFIG_EMBEDDED - /* Check for pending throttle transitions, and safely switch queues */ - if (thread->sched_flags & TH_SFLAG_PENDING_THROTTLE_MASK) { - boolean_t removed = thread_run_queue_remove(thread); - - if (thread->sched_flags & TH_SFLAG_PENDING_THROTTLE_DEMOTION) { - if (thread->sched_mode == TH_MODE_REALTIME) { - thread->saved_mode = thread->sched_mode; - thread->sched_mode = TH_MODE_TIMESHARE; - - if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) - sched_share_incr(); - } else { - /* - * It's possible that this is a realtime thread that has - * already tripped the failsafe, in which case saved_mode - * is already set correctly. - */ - if (!(thread->sched_flags & TH_SFLAG_FAILSAFE)) { - thread->saved_mode = thread->sched_mode; - } - thread->sched_flags &= ~TH_SFLAG_FAILSAFE; - } - thread->sched_flags |= TH_SFLAG_THROTTLED; - - } else { - if ((thread->sched_mode == TH_MODE_TIMESHARE) - && (thread->saved_mode == TH_MODE_REALTIME)) { - if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) - sched_share_decr(); - } - - thread->sched_mode = thread->saved_mode; - thread->saved_mode = TH_MODE_NONE; - thread->sched_flags &= ~TH_SFLAG_THROTTLED; - } - - thread->sched_flags &= ~(TH_SFLAG_PENDING_THROTTLE_MASK); - - if (removed) - thread_setrun(thread, SCHED_TAILQ); - } -#endif /* * Recompute scheduled priority if appropriate. */ if ( (thread->sched_mode == TH_MODE_TIMESHARE) && - !(thread->sched_flags & TH_SFLAG_PROMOTED) && + !(thread->sched_flags & TH_SFLAG_PROMOTED_MASK) && !(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) ) { register int new_pri; @@ -531,7 +484,24 @@ update_priority( if (new_pri != thread->sched_pri) { boolean_t removed = thread_run_queue_remove(thread); +#if 0 + if (sched_use_combined_fgbg_decay && ((thread)->task->max_priority > MAXPRI_THROTTLE) && (new_pri == MAXPRI_THROTTLE)) { + /* with the alternate (new) algorithm, would we have decayed this far? */ + int alt_pri = thread->priority - (thread->sched_usage >> sched_pri_shift); + if ((alt_pri > new_pri) && (sched_background_count > 0)) { + printf("thread %p would have decayed to only %d instead of %d\n", thread, alt_pri, new_pri); + } + } +#endif + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_DECAY_PRIORITY)|DBG_FUNC_NONE, + (uintptr_t)thread_tid(thread), + thread->priority, + thread->sched_pri, + new_pri, + 0); thread->sched_pri = new_pri; + if (removed) thread_setrun(thread, SCHED_TAILQ); } diff --git a/osfmk/kern/processor.c b/osfmk/kern/processor.c index ead95b882..709019b9f 100644 --- a/osfmk/kern/processor.c +++ b/osfmk/kern/processor.c @@ -91,6 +91,7 @@ decl_simple_lock_data(static,pset_node_lock) queue_head_t tasks; queue_head_t terminated_tasks; /* To be used ONLY for stackshot. */ int tasks_count; +int terminated_tasks_count; queue_head_t threads; int threads_count; decl_lck_mtx_data(,tasks_threads_lock) @@ -259,6 +260,7 @@ pset_init( pset_count_init_hint(pset, PROCESSOR_NULL); pset->cpu_set_low = pset->cpu_set_hi = 0; pset->cpu_set_count = 0; + pset->pending_AST_cpu_mask = 0; pset_lock_init(pset); pset->pset_self = IP_NULL; pset->pset_name_self = IP_NULL; @@ -337,8 +339,20 @@ processor_info( case PROCESSOR_CPU_LOAD_INFO: { processor_cpu_load_info_t cpu_load_info; - timer_data_t idle_temp; timer_t idle_state; + uint64_t idle_time_snapshot1, idle_time_snapshot2; + uint64_t idle_time_tstamp1, idle_time_tstamp2; + + /* + * We capture the accumulated idle time twice over + * the course of this function, as well as the timestamps + * when each were last updated. Since these are + * all done using non-atomic racy mechanisms, the + * most we can infer is whether values are stable. + * timer_grab() is the only function that can be + * used reliably on another processor's per-processor + * data. + */ if (*count < PROCESSOR_CPU_LOAD_INFO_COUNT) return (KERN_FAILURE); @@ -358,17 +372,35 @@ processor_info( } idle_state = &PROCESSOR_DATA(processor, idle_state); - idle_temp = *idle_state; - - if (PROCESSOR_DATA(processor, current_state) != idle_state || - timer_grab(&idle_temp) != timer_grab(idle_state)) { + idle_time_snapshot1 = timer_grab(idle_state); + idle_time_tstamp1 = idle_state->tstamp; + + /* + * Idle processors are not continually updating their + * per-processor idle timer, so it may be extremely + * out of date, resulting in an over-representation + * of non-idle time between two measurement + * intervals by e.g. top(1). If we are non-idle, or + * have evidence that the timer is being updated + * concurrently, we consider its value up-to-date. + */ + if (PROCESSOR_DATA(processor, current_state) != idle_state) { + cpu_load_info->cpu_ticks[CPU_STATE_IDLE] = + (uint32_t)(idle_time_snapshot1 / hz_tick_interval); + } else if ((idle_time_snapshot1 != (idle_time_snapshot2 = timer_grab(idle_state))) || + (idle_time_tstamp1 != (idle_time_tstamp2 = idle_state->tstamp))){ + /* Idle timer is being updated concurrently, second stamp is good enough */ cpu_load_info->cpu_ticks[CPU_STATE_IDLE] = - (uint32_t)(timer_grab(&PROCESSOR_DATA(processor, idle_state)) / hz_tick_interval); + (uint32_t)(idle_time_snapshot2 / hz_tick_interval); } else { - timer_advance(&idle_temp, mach_absolute_time() - idle_temp.tstamp); + /* + * Idle timer may be very stale. Fortunately we have established + * that idle_time_snapshot1 and idle_time_tstamp1 are unchanging + */ + idle_time_snapshot1 += mach_absolute_time() - idle_time_tstamp1; cpu_load_info->cpu_ticks[CPU_STATE_IDLE] = - (uint32_t)(timer_grab(&idle_temp) / hz_tick_interval); + (uint32_t)(idle_time_snapshot1 / hz_tick_interval); } cpu_load_info->cpu_ticks[CPU_STATE_NICE] = 0; @@ -955,15 +987,6 @@ processor_set_threads( { return KERN_FAILURE; } -#elif defined(CONFIG_EMBEDDED) -kern_return_t -processor_set_threads( - __unused processor_set_t pset, - __unused thread_array_t *thread_list, - __unused mach_msg_type_number_t *count) -{ - return KERN_NOT_SUPPORTED; -} #else kern_return_t processor_set_threads( diff --git a/osfmk/kern/processor.h b/osfmk/kern/processor.h index 0407b8541..a634c2594 100644 --- a/osfmk/kern/processor.h +++ b/osfmk/kern/processor.h @@ -99,7 +99,10 @@ struct processor_set { int pset_runq_bound_count; /* # of threads in runq bound to any processor in pset */ #endif - + + /* CPUs that have been sent an unacknowledged remote AST for scheduling purposes */ + uint32_t pending_AST_cpu_mask; + struct ipc_port * pset_self; /* port for operations */ struct ipc_port * pset_name_self; /* port for information */ @@ -121,7 +124,7 @@ struct pset_node { extern struct pset_node pset_node0; extern queue_head_t tasks, terminated_tasks, threads; /* Terminated tasks are ONLY for stackshot */ -extern int tasks_count, threads_count; +extern int tasks_count, terminated_tasks_count, threads_count; decl_lck_mtx_data(extern,tasks_threads_lock) struct processor_meta { @@ -237,12 +240,12 @@ MACRO_BEGIN \ MACRO_END -extern void processor_bootstrap(void) __attribute__((section("__TEXT, initcode"))); +extern void processor_bootstrap(void); extern void processor_init( processor_t processor, int cpu_id, - processor_set_t processor_set) __attribute__((section("__TEXT, initcode"))); + processor_set_t processor_set); extern void processor_meta_init( processor_t processor, @@ -264,7 +267,7 @@ extern processor_set_t pset_create( extern void pset_init( processor_set_t pset, - pset_node_t node) __attribute__((section("__TEXT, initcode"))); + pset_node_t node); extern kern_return_t processor_info_count( processor_flavor_t flavor, diff --git a/osfmk/kern/processor_data.h b/osfmk/kern/processor_data.h index 5a83c61c8..da90d7b40 100644 --- a/osfmk/kern/processor_data.h +++ b/osfmk/kern/processor_data.h @@ -81,13 +81,14 @@ struct processor_data { ipc_kmsg_t entries[IKM_STASH]; unsigned int avail; } ikm_cache; - - unsigned long page_grab_count; int start_color; + unsigned long page_grab_count; void *free_pages; - struct processor_sched_statistics sched_stats; - uint64_t timer_call_ttd; /* current timer call time-to-deadline */ + uint64_t timer_call_ttd; /* current timer call time-to-deadline */ + uint64_t wakeups_issued_total; /* Count of thread wakeups issued + * by this processor + */ }; typedef struct processor_data processor_data_t; diff --git a/osfmk/kern/queue.h b/osfmk/kern/queue.h index 2202f812d..db7377840 100644 --- a/osfmk/kern/queue.h +++ b/osfmk/kern/queue.h @@ -350,7 +350,8 @@ MACRO_BEGIN \ (head)->next = (queue_entry_t) (elt); \ } \ else { \ - ((type)__prev)->field.next = (queue_entry_t)(elt);\ + ((type)(void *)__prev)->field.next = \ + (queue_entry_t)(elt); \ } \ (elt)->field.prev = __prev; \ (elt)->field.next = head; \ @@ -377,7 +378,8 @@ MACRO_BEGIN \ (head)->prev = (queue_entry_t) (elt); \ } \ else { \ - ((type)__next)->field.prev = (queue_entry_t)(elt);\ + ((type)(void *)__next)->field.prev = \ + (queue_entry_t)(elt); \ } \ (elt)->field.next = __next; \ (elt)->field.prev = head; \ @@ -407,7 +409,8 @@ MACRO_BEGIN \ (head)->next = (queue_entry_t)(elt); \ } else { /* last element */ \ __prev = (elt)->field.prev = (head)->prev; \ - ((type)__prev)->field.next = (queue_entry_t)(elt);\ + ((type)(void *)__prev)->field.next = \ + (queue_entry_t)(elt); \ } \ (head)->prev = (queue_entry_t)(elt); \ } else { \ @@ -418,7 +421,8 @@ MACRO_BEGIN \ (head)->next = (queue_entry_t)(elt); \ } else { /* middle element */ \ __prev = (elt)->field.prev = (cur)->field.prev; \ - ((type)__prev)->field.next = (queue_entry_t)(elt);\ + ((type)(void *)__prev)->field.next = \ + (queue_entry_t)(elt); \ } \ (cur)->field.prev = (queue_entry_t)(elt); \ } \ @@ -447,7 +451,8 @@ MACRO_BEGIN \ (head)->prev = (queue_entry_t)(elt); \ } else { /* first element */ \ __next = (elt)->field.next = (head)->next; \ - ((type)__next)->field.prev = (queue_entry_t)(elt);\ + ((type)(void *)__next)->field.prev = \ + (queue_entry_t)(elt); \ } \ (head)->next = (queue_entry_t)(elt); \ } else { \ @@ -458,7 +463,8 @@ MACRO_BEGIN \ (head)->prev = (queue_entry_t)(elt); \ } else { /* middle element */ \ __next = (elt)->field.next = (cur)->field.next; \ - ((type)__next)->field.prev = (queue_entry_t)(elt);\ + ((type)(void *)__next)->field.prev = \ + (queue_entry_t)(elt); \ } \ (cur)->field.next = (queue_entry_t)(elt); \ } \ @@ -471,7 +477,7 @@ MACRO_END * given element (thing) in the given queue (head) */ #define queue_field(head, thing, type, field) \ - (((head) == (thing)) ? (head) : &((type)(thing))->field) + (((head) == (thing)) ? (head) : &((type)(void *)(thing))->field) /* * Macro: queue_remove @@ -491,12 +497,12 @@ MACRO_BEGIN \ if ((head) == __next) \ (head)->prev = __prev; \ else \ - ((type)__next)->field.prev = __prev; \ + ((type)(void *)__next)->field.prev = __prev; \ \ if ((head) == __prev) \ (head)->next = __next; \ else \ - ((type)__prev)->field.next = __next; \ + ((type)(void *)__prev)->field.next = __next; \ \ (elt)->field.next = NULL; \ (elt)->field.prev = NULL; \ @@ -515,13 +521,13 @@ MACRO_END MACRO_BEGIN \ register queue_entry_t __next; \ \ - (entry) = (type) ((head)->next); \ + (entry) = (type)(void *) ((head)->next); \ __next = (entry)->field.next; \ \ if ((head) == __next) \ (head)->prev = (head); \ else \ - ((type)(__next))->field.prev = (head); \ + ((type)(void *)(__next))->field.prev = (head); \ (head)->next = __next; \ \ (entry)->field.next = NULL; \ @@ -541,13 +547,13 @@ MACRO_END MACRO_BEGIN \ register queue_entry_t __prev; \ \ - (entry) = (type) ((head)->prev); \ + (entry) = (type)(void *) ((head)->prev); \ __prev = (entry)->field.prev; \ \ if ((head) == __prev) \ (head)->next = (head); \ else \ - ((type)(__prev))->field.next = (head); \ + ((type)(void *)(__prev))->field.next = (head); \ (head)->prev = __prev; \ \ (entry)->field.next = NULL; \ @@ -559,8 +565,8 @@ MACRO_END */ #define queue_assign(to, from, type, field) \ MACRO_BEGIN \ - ((type)((from)->prev))->field.next = (to); \ - ((type)((from)->next))->field.prev = (to); \ + ((type)(void *)((from)->prev))->field.next = (to); \ + ((type)(void *)((from)->next))->field.prev = (to); \ *to = *from; \ MACRO_END @@ -579,8 +585,10 @@ MACRO_END MACRO_BEGIN \ if (!queue_empty(old)) { \ *(new) = *(old); \ - ((type)((new)->next))->field.prev = (new); \ - ((type)((new)->prev))->field.next = (new); \ + ((type)(void *)((new)->next))->field.prev = \ + (new); \ + ((type)(void *)((new)->prev))->field.next = \ + (new); \ } else { \ queue_init(new); \ } \ @@ -600,9 +608,9 @@ MACRO_END * is the chain field in (*) */ #define queue_iterate(head, elt, type, field) \ - for ((elt) = (type) queue_first(head); \ + for ((elt) = (type)(void *) queue_first(head); \ !queue_end((head), (queue_entry_t)(elt)); \ - (elt) = (type) queue_next(&(elt)->field)) + (elt) = (type)(void *) queue_next(&(elt)->field)) #ifdef MACH_KERNEL_PRIVATE @@ -614,6 +622,8 @@ MACRO_END */ struct mpqueue_head { struct queue_entry head; /* header for queue */ + uint64_t earliest_soft_deadline; + uint64_t count; #if defined(__i386__) || defined(__x86_64__) lck_mtx_t lock_data; lck_mtx_ext_t lock_data_ext; @@ -636,6 +646,8 @@ MACRO_BEGIN \ &(q)->lock_data_ext, \ lck_grp, \ lck_attr); \ + (q)->earliest_soft_deadline = UINT64_MAX; \ + (q)->count = 0; \ MACRO_END #else diff --git a/osfmk/kern/sched.h b/osfmk/kern/sched.h index 8f87afad2..319da1fe0 100644 --- a/osfmk/kern/sched.h +++ b/osfmk/kern/sched.h @@ -148,6 +148,7 @@ #define MINPRI_KERNEL (MAXPRI_KERNEL - (NRQS / 8) + 1) /* 80 */ #define MAXPRI_RESERVED (MINPRI_KERNEL - 1) /* 79 */ +#define BASEPRI_GRAPHICS (MAXPRI_RESERVED - 3) /* 76 */ #define MINPRI_RESERVED (MAXPRI_RESERVED - (NRQS / 8) + 1) /* 64 */ #define MAXPRI_USER (MINPRI_RESERVED - 1) /* 63 */ @@ -155,14 +156,11 @@ #define BASEPRI_FOREGROUND (BASEPRI_DEFAULT + 16) /* 47 */ #define BASEPRI_BACKGROUND (BASEPRI_DEFAULT + 15) /* 46 */ #define BASEPRI_DEFAULT (MAXPRI_USER - (NRQS / 4)) /* 31 */ +#define MAXPRI_SUPPRESSED (BASEPRI_DEFAULT - 3) /* 28 */ #define MAXPRI_THROTTLE (MINPRI + 4) /* 4 */ #define MINPRI_USER MINPRI /* 0 */ -#ifdef CONFIG_EMBEDDED -#define DEPRESSPRI MAXPRI_THROTTLE -#else #define DEPRESSPRI MINPRI /* depress priority */ -#endif /* Type used for thread->sched_mode and saved_mode */ typedef enum { @@ -292,9 +290,12 @@ extern int default_bg_preemption_rate; #if defined(CONFIG_SCHED_TRADITIONAL) /* - * Age usage (1 << SCHED_TICK_SHIFT) times per second. + * Age usage at approximately (1 << SCHED_TICK_SHIFT) times per second + * Aging may be deferred during periods where all processors are idle + * and cumulatively applied during periods of activity. */ #define SCHED_TICK_SHIFT 3 +#define SCHED_TICK_MAX_DELTA (8) extern unsigned sched_tick; extern uint32_t sched_tick_interval; @@ -304,7 +305,7 @@ extern uint32_t sched_tick_interval; extern uint64_t sched_one_second_interval; /* Periodic computation of various averages */ -extern void compute_averages(void); +extern void compute_averages(uint64_t); extern void compute_averunnable( void *nrun); @@ -330,14 +331,19 @@ extern void compute_pmap_gc_throttle( */ #if defined(CONFIG_SCHED_TRADITIONAL) extern uint32_t sched_pri_shift; +extern uint32_t sched_background_pri_shift; +extern uint32_t sched_combined_fgbg_pri_shift; extern uint32_t sched_fixed_shift; extern int8_t sched_load_shifts[NRQS]; +extern uint32_t sched_decay_usage_age_factor; +extern uint32_t sched_use_combined_fgbg_decay; +void sched_traditional_consider_maintenance(uint64_t); #endif extern int32_t sched_poll_yield_shift; extern uint64_t sched_safe_duration; -extern uint32_t sched_run_count, sched_share_count; +extern uint32_t sched_run_count, sched_share_count, sched_background_count; extern uint32_t sched_load_average, sched_mach_factor; extern uint32_t avenrun[3], mach_factor[3]; @@ -365,6 +371,16 @@ MACRO_BEGIN \ (void)hw_atomic_sub(&sched_share_count, 1); \ MACRO_END +#define sched_background_incr() \ +MACRO_BEGIN \ + (void)hw_atomic_add(&sched_background_count, 1); \ +MACRO_END + +#define sched_background_decr() \ +MACRO_BEGIN \ + (void)hw_atomic_sub(&sched_background_count, 1); \ +MACRO_END + /* * thread_timer_delta macro takes care of both thread timers. */ diff --git a/osfmk/kern/sched_average.c b/osfmk/kern/sched_average.c index d2a2ce6cb..b3edb3d9c 100644 --- a/osfmk/kern/sched_average.c +++ b/osfmk/kern/sched_average.c @@ -68,6 +68,9 @@ #include #include #include +#if CONFIG_TELEMETRY +#include +#endif uint32_t avenrun[3] = {0, 0, 0}; uint32_t mach_factor[3] = {0, 0, 0}; @@ -97,9 +100,9 @@ typedef void (*sched_avg_comp_t)( static struct sched_average { sched_avg_comp_t comp; - void *param; - int period; /* in seconds */ - uint64_t deadline; + void *param; + int period; /* in seconds */ + uint64_t deadline; } sched_average[] = { { compute_averunnable, &sched_nrun, 5, 0 }, { compute_stack_target, NULL, 5, 1 }, @@ -107,19 +110,27 @@ static struct sched_average { { compute_zone_gc_throttle, NULL, 60, 0 }, { compute_pageout_gc_throttle, NULL, 1, 0 }, { compute_pmap_gc_throttle, NULL, 60, 0 }, +#if CONFIG_TELEMETRY + { compute_telemetry, NULL, 1, 0 }, +#endif { NULL, NULL, 0, 0 } }; typedef struct sched_average *sched_average_t; +/* The "stdelta" parameter represents the number of scheduler maintenance + * "ticks" that have elapsed since the last invocation, subject to + * integer division imprecision. + */ + void -compute_averages(void) +compute_averages(uint64_t stdelta) { - int ncpus, nthreads, nshared; - uint32_t factor_now, average_now, load_now = 0; + int ncpus, nthreads, nshared, nbackground, nshared_non_bg; + uint32_t factor_now, average_now, load_now = 0, background_load_now = 0, combined_fgbg_load_now = 0; sched_average_t avg; - uint64_t abstime; - + uint64_t abstime, index; + /* * Retrieve counts, ignoring * the current thread. @@ -127,6 +138,7 @@ compute_averages(void) ncpus = processor_avail_count; nthreads = sched_run_count - 1; nshared = sched_share_count; + nbackground = sched_background_count; /* * Load average and mach factor calculations for @@ -139,26 +151,60 @@ compute_averages(void) else factor_now = (ncpus - nthreads) * LOAD_SCALE; - sched_mach_factor = ((sched_mach_factor << 2) + factor_now) / 5; - sched_load_average = ((sched_load_average << 2) + average_now) / 5; - + /* For those statistics that formerly relied on being recomputed + * on timer ticks, advance by the approximate number of corresponding + * elapsed intervals, thus compensating for potential idle intervals. + */ + for (index = 0; index < stdelta; index++) { + sched_mach_factor = ((sched_mach_factor << 2) + factor_now) / 5; + sched_load_average = ((sched_load_average << 2) + average_now) / 5; + } /* - * Compute the timeshare priority - * conversion factor based on loading. + * Compute the timeshare priority conversion factor based on loading. + * Because our counters may be incremented and accessed + * concurrently with respect to each other, we may have + * windows where the invariant nthreads >= nshared >= nbackground + * is broken, so truncate values in these cases. */ + if (nshared > nthreads) nshared = nthreads; - if (nshared > ncpus) { + if (nbackground > nshared) + nbackground = nshared; + + nshared_non_bg = nshared - nbackground; + + if (nshared_non_bg > ncpus) { if (ncpus > 1) - load_now = nshared / ncpus; + load_now = nshared_non_bg / ncpus; else - load_now = nshared; + load_now = nshared_non_bg; if (load_now > NRQS - 1) load_now = NRQS - 1; } + if (nbackground > ncpus) { + if (ncpus > 1) + background_load_now = nbackground / ncpus; + else + background_load_now = nbackground; + + if (background_load_now > NRQS - 1) + background_load_now = NRQS - 1; + } + + if (nshared > ncpus) { + if (ncpus > 1) + combined_fgbg_load_now = nshared / ncpus; + else + combined_fgbg_load_now = nshared; + + if (combined_fgbg_load_now > NRQS - 1) + combined_fgbg_load_now = NRQS - 1; + } + /* * Sample total running threads. */ @@ -178,11 +224,14 @@ compute_averages(void) * are discarded. */ sched_pri_shift = sched_fixed_shift - sched_load_shifts[load_now]; + sched_background_pri_shift = sched_fixed_shift - sched_load_shifts[background_load_now]; + sched_combined_fgbg_pri_shift = sched_fixed_shift - sched_load_shifts[combined_fgbg_load_now]; /* * Compute old-style Mach load averages. */ - { + + for (index = 0; index < stdelta; index++) { register int i; for (i = 0; i < 3; i++) { @@ -201,8 +250,16 @@ compute_averages(void) abstime = mach_absolute_time(); for (avg = sched_average; avg->comp != NULL; ++avg) { if (abstime >= avg->deadline) { - (*avg->comp)(avg->param); - avg->deadline = abstime + avg->period * sched_one_second_interval; + uint64_t period_abs = (avg->period * sched_one_second_interval); + uint64_t ninvokes = 1; + + ninvokes += (abstime - avg->deadline) / period_abs; + ninvokes = MIN(ninvokes, SCHED_TICK_MAX_DELTA); + + for (index = 0; index < ninvokes; index++) { + (*avg->comp)(avg->param); + } + avg->deadline = abstime + period_abs; } } } diff --git a/osfmk/kern/sched_fixedpriority.c b/osfmk/kern/sched_fixedpriority.c index ccde4a094..315c46d73 100644 --- a/osfmk/kern/sched_fixedpriority.c +++ b/osfmk/kern/sched_fixedpriority.c @@ -361,7 +361,7 @@ sched_fixedpriority_maintenance_continuation(void) /* * Compute various averages. */ - compute_averages(); + compute_averages(1); if (sched_fixedpriority_tick_deadline == 0) sched_fixedpriority_tick_deadline = abstime; @@ -452,13 +452,13 @@ sched_fixedpriority_processor_queue_shutdown( } } - while ((thread = (thread_t)dequeue_head(&bqueue)) != THREAD_NULL) { + while ((thread = (thread_t)(void *)dequeue_head(&bqueue)) != THREAD_NULL) { sched_fixedpriority_processor_enqueue(processor, thread, SCHED_TAILQ); } pset_unlock(pset); - while ((thread = (thread_t)dequeue_head(&tqueue)) != THREAD_NULL) { + while ((thread = (thread_t)(void *)dequeue_head(&tqueue)) != THREAD_NULL) { thread_lock(thread); thread_setrun(thread, SCHED_TAILQ); @@ -653,60 +653,6 @@ sched_fixedpriority_update_priority(thread_t thread) } -#if CONFIG_EMBEDDED - /* Check for pending throttle transitions, and safely switch queues */ - if ((thread->sched_flags & TH_SFLAG_PENDING_THROTTLE_MASK) && (thread->bound_processor == PROCESSOR_NULL)) { - boolean_t removed = thread_run_queue_remove(thread); - - if (thread->sched_flags & TH_SFLAG_PENDING_THROTTLE_DEMOTION) { - if (thread->sched_mode == TH_MODE_REALTIME) { - thread->saved_mode = thread->sched_mode; - thread->sched_mode = TH_MODE_TIMESHARE; - - if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) - sched_share_incr(); - } else { - /* - * It's possible that this is a realtime thread that has - * already tripped the failsafe, in which case it should not - * degrade further. - */ - if (!(thread->sched_flags & TH_SFLAG_FAILSAFE)) { - - thread->saved_mode = thread->sched_mode; - - if (thread->sched_mode == TH_MODE_TIMESHARE) { - thread->sched_mode = TH_MODE_FAIRSHARE; - } - } - } - thread->sched_flags |= TH_SFLAG_THROTTLED; - - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_SCHED,MACH_FAIRSHARE_ENTER) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), 0xFFFFFFFF, 0, 0, 0); - - } else { - if ((thread->sched_mode == TH_MODE_TIMESHARE) - && (thread->saved_mode == TH_MODE_REALTIME)) { - if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) - sched_share_decr(); - } - - thread->sched_mode = thread->saved_mode; - thread->saved_mode = TH_MODE_NONE; - thread->sched_flags &= ~TH_SFLAG_THROTTLED; - - KERNEL_DEBUG_CONSTANT1( - MACHDBG_CODE(DBG_MACH_SCHED,MACH_FAIRSHARE_EXIT) | DBG_FUNC_NONE, 0, 0, 0, 0, thread_tid(thread)); - - } - - thread->sched_flags &= ~(TH_SFLAG_PENDING_THROTTLE_MASK); - - if (removed) - thread_setrun(thread, SCHED_TAILQ); - } -#endif /* * Check for fail-safe release. diff --git a/osfmk/kern/sched_grrr.c b/osfmk/kern/sched_grrr.c index 0c4d1a3d0..13067b857 100644 --- a/osfmk/kern/sched_grrr.c +++ b/osfmk/kern/sched_grrr.c @@ -291,7 +291,7 @@ sched_grrr_maintenance_continuation(void) /* * Compute various averages. */ - compute_averages(); + compute_averages(1); if (sched_grrr_tick_deadline == 0) sched_grrr_tick_deadline = abstime; @@ -373,13 +373,13 @@ sched_grrr_processor_queue_shutdown( } } - while ((thread = (thread_t)dequeue_head(&bqueue)) != THREAD_NULL) { + while ((thread = (thread_t)(void *)dequeue_head(&bqueue)) != THREAD_NULL) { sched_grrr_processor_enqueue(processor, thread, SCHED_TAILQ); } pset_unlock(pset); - while ((thread = (thread_t)dequeue_head(&tqueue)) != THREAD_NULL) { + while ((thread = (thread_t)(void *)dequeue_head(&tqueue)) != THREAD_NULL) { thread_lock(thread); thread_setrun(thread, SCHED_TAILQ); @@ -591,13 +591,13 @@ grrr_intragroup_schedule(grrr_group_t group) thread = group->current_client; if (thread == THREAD_NULL) { - thread = (thread_t)queue_first(&group->clients); + thread = (thread_t)(void *)queue_first(&group->clients); } if (1 /* deficit */) { - group->current_client = (thread_t)queue_next((queue_entry_t)thread); + group->current_client = (thread_t)(void *)queue_next((queue_entry_t)thread); if (queue_end(&group->clients, (queue_entry_t)group->current_client)) { - group->current_client = (thread_t)queue_first(&group->clients); + group->current_client = (thread_t)(void *)queue_first(&group->clients); } thread = group->current_client; diff --git a/osfmk/kern/sched_prim.c b/osfmk/kern/sched_prim.c index eebc19bf5..11bad560a 100644 --- a/osfmk/kern/sched_prim.c +++ b/osfmk/kern/sched_prim.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -96,6 +96,7 @@ #include #include #include +#include #include #include @@ -161,18 +162,26 @@ unsigned sched_tick; uint32_t sched_tick_interval; uint32_t sched_pri_shift = INT8_MAX; +uint32_t sched_background_pri_shift = INT8_MAX; +uint32_t sched_combined_fgbg_pri_shift = INT8_MAX; uint32_t sched_fixed_shift; +uint32_t sched_use_combined_fgbg_decay = 0; + +uint32_t sched_decay_usage_age_factor = 1; /* accelerate 5/8^n usage aging */ static boolean_t sched_traditional_use_pset_runqueue = FALSE; /* Defaults for timer deadline profiling */ #define TIMER_DEADLINE_TRACKING_BIN_1_DEFAULT 2000000 /* Timers with deadlines <= - * 2ms */ + * 2ms */ #define TIMER_DEADLINE_TRACKING_BIN_2_DEFAULT 5000000 /* Timers with deadlines - <= 5ms */ + <= 5ms */ + uint64_t timer_deadline_tracking_bin_1; uint64_t timer_deadline_tracking_bin_2; +thread_t sched_maintenance_thread; + __attribute__((always_inline)) static inline run_queue_t runq_for_processor(processor_t processor) { @@ -214,15 +223,15 @@ static inline void runq_consider_decr_bound_count(processor_t processor, thread_ uint64_t sched_one_second_interval; -uint32_t sched_run_count, sched_share_count; +uint32_t sched_run_count, sched_share_count, sched_background_count; uint32_t sched_load_average, sched_mach_factor; /* Forwards */ #if defined(CONFIG_SCHED_TRADITIONAL) -static void load_shift_init(void) __attribute__((section("__TEXT, initcode"))); -static void preempt_pri_init(void) __attribute__((section("__TEXT, initcode"))); +static void load_shift_init(void); +static void preempt_pri_init(void); #endif /* CONFIG_SCHED_TRADITIONAL */ @@ -240,6 +249,10 @@ thread_t processor_idle( thread_t thread, processor_t processor); +ast_t +csw_check_locked( processor_t processor, + processor_set_t pset); + #if defined(CONFIG_SCHED_TRADITIONAL) static thread_t steal_thread( @@ -312,7 +325,7 @@ sched_traditional_with_pset_runqueue_init(void); #endif static void -sched_realtime_init(void) __attribute__((section("__TEXT, initcode"))); +sched_realtime_init(void); static void sched_realtime_timebase_init(void); @@ -322,7 +335,7 @@ sched_timer_deadline_tracking_init(void); #if defined(CONFIG_SCHED_TRADITIONAL) static void -sched_traditional_tick_continue(void); +sched_traditional_maintenance_continue(void); static uint32_t sched_traditional_initial_quantum_size(thread_t thread); @@ -393,7 +406,7 @@ const struct sched_dispatch_table sched_traditional_dispatch = { sched_traditional_timebase_init, sched_traditional_processor_init, sched_traditional_pset_init, - sched_traditional_tick_continue, + sched_traditional_maintenance_continue, sched_traditional_choose_thread, steal_thread, compute_priority, @@ -429,7 +442,7 @@ const struct sched_dispatch_table sched_traditional_with_pset_runqueue_dispatch sched_traditional_timebase_init, sched_traditional_processor_init, sched_traditional_pset_init, - sched_traditional_tick_continue, + sched_traditional_maintenance_continue, sched_traditional_choose_thread, steal_thread, compute_priority, @@ -484,7 +497,10 @@ const struct sched_dispatch_table *sched_current_dispatch = NULL; #define SCHED_STRING_MAX_LENGTH (48) char sched_string[SCHED_STRING_MAX_LENGTH]; -static enum sched_enum _sched_enum = sched_enum_unknown; +static enum sched_enum _sched_enum __attribute__((used)) = sched_enum_unknown; + +/* Global flag which indicates whether Background Stepper Context is enabled */ +static int cpu_throttle_enabled = 1; void sched_init(void) @@ -546,10 +562,10 @@ sched_init(void) } } else { #if defined(CONFIG_SCHED_TRADITIONAL) - sched_current_dispatch = &sched_traditional_dispatch; - _sched_enum = sched_enum_traditional; - strlcpy(sched_string, kSchedTraditionalString, sizeof(sched_string)); - kprintf("Scheduler: Default of %s\n", kSchedTraditionalString); + sched_current_dispatch = &sched_traditional_with_pset_runqueue_dispatch; + _sched_enum = sched_enum_traditional_with_pset_runqueue; + strlcpy(sched_string, kSchedTraditionalWithPsetRunqueueString, sizeof(sched_string)); + kprintf("Scheduler: Default of %s\n", kSchedTraditionalWithPsetRunqueueString); #elif defined(CONFIG_SCHED_PROTO) sched_current_dispatch = &sched_proto_dispatch; _sched_enum = sched_enum_proto; @@ -575,7 +591,7 @@ sched_init(void) sched_realtime_init(); ast_init(); sched_timer_deadline_tracking_init(); - + SCHED(pset_init)(&pset0); SCHED(processor_init)(master_processor); } @@ -743,10 +759,45 @@ load_shift_init(void) int8_t k, *p = sched_load_shifts; uint32_t i, j; + uint32_t sched_decay_penalty = 1; + + if (PE_parse_boot_argn("sched_decay_penalty", &sched_decay_penalty, sizeof (sched_decay_penalty))) { + kprintf("Overriding scheduler decay penalty %u\n", sched_decay_penalty); + } + + if (PE_parse_boot_argn("sched_decay_usage_age_factor", &sched_decay_usage_age_factor, sizeof (sched_decay_usage_age_factor))) { + kprintf("Overriding scheduler decay usage age factor %u\n", sched_decay_usage_age_factor); + } + + if (PE_parse_boot_argn("sched_use_combined_fgbg_decay", &sched_use_combined_fgbg_decay, sizeof (sched_use_combined_fgbg_decay))) { + kprintf("Overriding schedule fg/bg decay calculation: %u\n", sched_use_combined_fgbg_decay); + } + + if (sched_decay_penalty == 0) { + /* + * There is no penalty for timeshare threads for using too much + * CPU, so set all load shifts to INT8_MIN. Even under high load, + * sched_pri_shift will be >INT8_MAX, and there will be no + * penalty applied to threads (nor will sched_usage be updated per + * thread). + */ + for (i = 0; i < NRQS; i++) { + sched_load_shifts[i] = INT8_MIN; + } + + return; + } + *p++ = INT8_MIN; *p++ = 0; - for (i = j = 2, k = 1; i < NRQS; ++k) { - for (j <<= 1; i < j; ++i) + /* + * For a given system load "i", the per-thread priority + * penalty per quantum of CPU usage is ~2^k priority + * levels. "sched_decay_penalty" can cause more + * array entries to be filled with smaller "k" values + */ + for (i = 2, j = 1 << sched_decay_penalty, k = 1; i < NRQS; ++k) { + for (j <<= 1; (i < j) && (i < NRQS); ++i) *p++ = k; } } @@ -756,7 +807,7 @@ preempt_pri_init(void) { int i, *p = sched_preempt_pri; - for (i = BASEPRI_FOREGROUND + 1; i < MINPRI_KERNEL; ++i) + for (i = BASEPRI_FOREGROUND; i < MINPRI_KERNEL; ++i) setbit(i, p); for (i = BASEPRI_PREEMPT; i <= MAXPRI; ++i) @@ -788,73 +839,6 @@ thread_timer_expire( splx(s); } -#ifndef __LP64__ - -/* - * thread_set_timer: - * - * Set a timer for the current thread, if the thread - * is ready to wait. Must be called between assert_wait() - * and thread_block(). - */ -void -thread_set_timer( - uint32_t interval, - uint32_t scale_factor) -{ - thread_t thread = current_thread(); - uint64_t deadline; - spl_t s; - - s = splsched(); - thread_lock(thread); - if ((thread->state & TH_WAIT) != 0) { - clock_interval_to_deadline(interval, scale_factor, &deadline); - if (!timer_call_enter(&thread->wait_timer, deadline, thread->sched_pri >= BASEPRI_RTQUEUES ? TIMER_CALL_CRITICAL : 0)) - thread->wait_timer_active++; - thread->wait_timer_is_set = TRUE; - } - thread_unlock(thread); - splx(s); -} - -void -thread_set_timer_deadline( - uint64_t deadline) -{ - thread_t thread = current_thread(); - spl_t s; - - s = splsched(); - thread_lock(thread); - if ((thread->state & TH_WAIT) != 0) { - if (!timer_call_enter(&thread->wait_timer, deadline, thread->sched_pri >= BASEPRI_RTQUEUES ? TIMER_CALL_CRITICAL : 0)) - thread->wait_timer_active++; - thread->wait_timer_is_set = TRUE; - } - thread_unlock(thread); - splx(s); -} - -void -thread_cancel_timer(void) -{ - thread_t thread = current_thread(); - spl_t s; - - s = splsched(); - thread_lock(thread); - if (thread->wait_timer_is_set) { - if (timer_call_cancel(&thread->wait_timer)) - thread->wait_timer_active--; - thread->wait_timer_is_set = FALSE; - } - thread_unlock(thread); - splx(s); -} - -#endif /* __LP64__ */ - /* * thread_unblock: * @@ -901,8 +885,12 @@ thread_unblock( * Update run counts. */ sched_run_incr(); - if (thread->sched_mode == TH_MODE_TIMESHARE) + if (thread->sched_mode == TH_MODE_TIMESHARE) { sched_share_incr(); + + if (thread->max_priority <= MAXPRI_THROTTLE) + sched_background_incr(); + } } else { /* @@ -926,8 +914,7 @@ thread_unblock( * Calculate deadline for real-time threads. */ if (thread->sched_mode == TH_MODE_REALTIME) { - thread->realtime.deadline = mach_absolute_time(); - thread->realtime.deadline += thread->realtime.constraint; + thread->realtime.deadline = thread->realtime.constraint + mach_absolute_time(); } /* @@ -945,9 +932,13 @@ thread_unblock( */ boolean_t aticontext, pidle; ml_get_power_state(&aticontext, &pidle); - if (__improbable(aticontext)) { + + if (__improbable(aticontext && !(thread_get_tag_internal(thread) & THREAD_TAG_CALLOUT))) { ledger_credit(thread->t_ledger, task_ledgers.interrupt_wakeups, 1); + DTRACE_SCHED2(iwakeup, struct thread *, thread, struct proc *, thread->task->bsd_info); + uint64_t ttd = PROCESSOR_DATA(current_processor(), timer_call_ttd); + if (ttd) { if (ttd <= timer_deadline_tracking_bin_1) thread->thread_timer_wakeups_bin_1++; @@ -955,9 +946,11 @@ thread_unblock( if (ttd <= timer_deadline_tracking_bin_2) thread->thread_timer_wakeups_bin_2++; } + if (pidle) { ledger_credit(thread->t_ledger, task_ledgers.platform_idle_wakeups, 1); } + } else if (thread_get_tag_internal(cthread) & THREAD_TAG_CALLOUT) { if (cthread->callout_woken_from_icontext) { ledger_credit(thread->t_ledger, task_ledgers.interrupt_wakeups, 1); @@ -966,12 +959,15 @@ thread_unblock( ledger_credit(thread->t_ledger, task_ledgers.platform_idle_wakeups, 1); thread->thread_callout_platform_idle_wakeups++; } + + cthread->callout_woke_thread = TRUE; } } if (thread_get_tag_internal(thread) & THREAD_TAG_CALLOUT) { - thread->callout_woken_from_icontext = aticontext; - thread->callout_woken_from_platform_idle = pidle; + thread->callout_woken_from_icontext = aticontext; + thread->callout_woken_from_platform_idle = pidle; + thread->callout_woke_thread = FALSE; } /* Event should only be triggered if thread is not already running */ @@ -1165,7 +1161,57 @@ assert_wait_timeout( VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0); wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t, event), - interruptible, deadline, thread); + interruptible, + TIMEOUT_URGENCY_SYS_NORMAL, + deadline, 0, + thread); + + thread_unlock(thread); + wait_queue_unlock(wqueue); + splx(s); + + return (wresult); +} + +wait_result_t +assert_wait_timeout_with_leeway( + event_t event, + wait_interrupt_t interruptible, + wait_timeout_urgency_t urgency, + uint32_t interval, + uint32_t leeway, + uint32_t scale_factor) +{ + thread_t thread = current_thread(); + wait_result_t wresult; + wait_queue_t wqueue; + uint64_t deadline; + uint64_t abstime; + uint64_t slop; + uint64_t now; + spl_t s; + + now = mach_absolute_time(); + clock_interval_to_absolutetime_interval(interval, scale_factor, &abstime); + deadline = now + abstime; + + clock_interval_to_absolutetime_interval(leeway, scale_factor, &slop); + + assert(event != NO_EVENT); + wqueue = &wait_queues[wait_hash(event)]; + + s = splsched(); + wait_queue_lock(wqueue); + thread_lock(thread); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE, + VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0); + + wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t, event), + interruptible, + urgency, deadline, slop, + thread); thread_unlock(thread); wait_queue_unlock(wqueue); @@ -1197,7 +1243,45 @@ assert_wait_deadline( VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0); wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t,event), - interruptible, deadline, thread); + interruptible, + TIMEOUT_URGENCY_SYS_NORMAL, deadline, 0, + thread); + + thread_unlock(thread); + wait_queue_unlock(wqueue); + splx(s); + + return (wresult); +} + +wait_result_t +assert_wait_deadline_with_leeway( + event_t event, + wait_interrupt_t interruptible, + wait_timeout_urgency_t urgency, + uint64_t deadline, + uint64_t leeway) +{ + thread_t thread = current_thread(); + wait_result_t wresult; + wait_queue_t wqueue; + spl_t s; + + assert(event != NO_EVENT); + wqueue = &wait_queues[wait_hash(event)]; + + s = splsched(); + wait_queue_lock(wqueue); + thread_lock(thread); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + MACHDBG_CODE(DBG_MACH_SCHED, MACH_WAIT)|DBG_FUNC_NONE, + VM_KERNEL_UNSLIDE(event), interruptible, deadline, 0, 0); + + wresult = wait_queue_assert_wait64_locked(wqueue, CAST_DOWN(event64_t,event), + interruptible, + urgency, deadline, leeway, + thread); thread_unlock(thread); wait_queue_unlock(wqueue); @@ -1285,11 +1369,46 @@ thread_sleep_lock_write( return res; } +/* + * thread_isoncpu: + * + * Return TRUE if a thread is running on a processor such that an AST + * is needed to pull it out of userspace execution, or if executing in + * the kernel, bring to a context switch boundary that would cause + * thread state to be serialized in the thread PCB. + * + * Thread locked, returns the same way. While locked, fields + * like "state" and "runq" cannot change. + */ +static inline boolean_t +thread_isoncpu(thread_t thread) +{ + /* Not running or runnable */ + if (!(thread->state & TH_RUN)) + return (FALSE); + + /* Waiting on a runqueue, not currently running */ + if (thread->runq != PROCESSOR_NULL) + return (FALSE); + + /* + * Thread must be running on a processor, or + * about to run, or just did run. In all these + * cases, an AST to the processor is needed + * to guarantee that the thread is kicked out + * of userspace and the processor has + * context switched (and saved register state). + */ + return (TRUE); +} + /* * thread_stop: * * Force a preemption point for a thread and wait - * for it to stop running. Arbitrates access among + * for it to stop running on a CPU. If a stronger + * guarantee is requested, wait until no longer + * runnable. Arbitrates access among * multiple stop requests. (released by unstop) * * The thread must enter a wait state and stop via a @@ -1299,10 +1418,12 @@ thread_sleep_lock_write( */ boolean_t thread_stop( - thread_t thread) + thread_t thread, + boolean_t until_not_runnable) { wait_result_t wresult; spl_t s = splsched(); + boolean_t oncpu; wake_lock(thread); thread_lock(thread); @@ -1328,11 +1449,15 @@ thread_stop( thread->state |= TH_SUSP; - while (thread->state & TH_RUN) { - processor_t processor = thread->last_processor; - - if (processor != PROCESSOR_NULL && processor->active_thread == thread) + while ((oncpu = thread_isoncpu(thread)) || + (until_not_runnable && (thread->state & TH_RUN))) { + processor_t processor; + + if (oncpu) { + assert(thread->state & TH_RUN); + processor = thread->chosen_processor; cause_ast_check(processor); + } thread->wake_active = TRUE; thread_unlock(thread); @@ -1357,6 +1482,13 @@ thread_stop( thread_unlock(thread); wake_unlock(thread); splx(s); + + /* + * We return with the thread unlocked. To prevent it from + * transitioning to a runnable state (or from TH_RUN to + * being on the CPU), the caller must ensure the thread + * is stopped via an external means (such as an AST) + */ return (TRUE); } @@ -1405,16 +1537,6 @@ thread_unstop( splx(s); } -/* - * Thread locked, returns the same way - */ -static inline boolean_t -thread_isoncpu(thread_t thread) -{ - processor_t processor = thread->last_processor; - - return ((processor != PROCESSOR_NULL) && (processor->active_thread == thread)); -} /* * thread_wait: * @@ -1441,12 +1563,12 @@ thread_wait( * where the original, pure "TH_RUN" check would have let us * finish. */ - while ((oncpu = thread_isoncpu(thread)) || + while ((oncpu = thread_isoncpu(thread)) || (until_not_runnable && (thread->state & TH_RUN))) { if (oncpu) { assert(thread->state & TH_RUN); - processor = thread->last_processor; + processor = thread->chosen_processor; cause_ast_check(processor); } @@ -1650,6 +1772,20 @@ thread_select( assert(pset->low_count); assert(pset->low_pri); + if (processor->processor_meta != PROCESSOR_META_NULL && processor->processor_meta->primary != processor) { + /* + * Should this secondary SMT processor attempt to find work? For pset runqueue systems, + * we should look for work only under the same conditions that choose_processor() + * would have assigned work, which is when all primary processors have been assigned work. + * + * An exception is that bound threads are dispatched to a processor without going through + * choose_processor(), so in those cases we should continue trying to dequeue work. + */ + if (!processor->runq_bound_count && !queue_empty(&pset->idle_queue) && !rt_runq.count) { + goto idle; + } + } + inactive_state = processor->state != PROCESSOR_SHUTDOWN && machine_processor_is_inactive(processor); simple_lock(&rt_lock); @@ -1660,26 +1796,28 @@ thread_select( * bound to a different processor, nor be in the wrong * processor set. */ - if ( ((thread->state & ~TH_SUSP) == TH_RUN) && - (thread->sched_pri >= BASEPRI_RTQUEUES || - processor->processor_meta == PROCESSOR_META_NULL || - processor->processor_meta->primary == processor) && - (thread->bound_processor == PROCESSOR_NULL || - thread->bound_processor == processor) && - (thread->affinity_set == AFFINITY_SET_NULL || - thread->affinity_set->aset_pset == pset) ) { - if ( thread->sched_pri >= BASEPRI_RTQUEUES && - first_timeslice(processor) ) { + if ( ((thread->state & ~TH_SUSP) == TH_RUN) && + (thread->sched_pri >= BASEPRI_RTQUEUES || + processor->processor_meta == PROCESSOR_META_NULL || + processor->processor_meta->primary == processor) && + (thread->bound_processor == PROCESSOR_NULL || + thread->bound_processor == processor) && + (thread->affinity_set == AFFINITY_SET_NULL || + thread->affinity_set->aset_pset == pset)) { + if (thread->sched_pri >= BASEPRI_RTQUEUES && + first_timeslice(processor)) { if (rt_runq.count > 0) { register queue_t q; q = &rt_runq.queue; if (((thread_t)q->next)->realtime.deadline < - processor->deadline) { + processor->deadline) { + if ((((thread_t)q->next)->bound_processor == PROCESSOR_NULL) || (((thread_t)q->next)->bound_processor == processor)) { thread = (thread_t)dequeue_head(q); thread->runq = PROCESSOR_NULL; SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count); rt_runq.count--; + } } } @@ -1731,16 +1869,20 @@ thread_select( if (rt_runq.count > 0) { thread = (thread_t)dequeue_head(&rt_runq.queue); - thread->runq = PROCESSOR_NULL; - SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count); - rt_runq.count--; + if (__probable((thread->bound_processor == NULL || (thread->bound_processor == processor)))) { + thread->runq = PROCESSOR_NULL; + SCHED_STATS_RUNQ_CHANGE(&rt_runq.runq_stats, rt_runq.count); + rt_runq.count--; - simple_unlock(&rt_lock); + simple_unlock(&rt_lock); - processor->deadline = thread->realtime.deadline; - pset_unlock(pset); + processor->deadline = thread->realtime.deadline; + pset_unlock(pset); - return (thread); + return (thread); + } else { + enqueue_head(&rt_runq.queue, (queue_entry_t)thread); + } } simple_unlock(&rt_lock); @@ -1794,6 +1936,7 @@ thread_select( pset_lock(pset); + idle: /* * Nothing is runnable, so set this processor idle if it * was running. @@ -1860,9 +2003,15 @@ thread_select_idle( processor_t processor) { thread_t new_thread; + uint64_t arg1, arg2; + int urgency; + + if (thread->sched_mode == TH_MODE_TIMESHARE) { + if (thread->max_priority <= MAXPRI_THROTTLE) + sched_background_decr(); - if (thread->sched_mode == TH_MODE_TIMESHARE) sched_share_decr(); + } sched_run_decr(); thread->state |= TH_IDLE; @@ -1890,7 +2039,7 @@ thread_select_idle( (*thread->sched_call)(SCHED_CALL_BLOCK, thread); - thread_tell_urgency(THREAD_URGENCY_NONE, 0, 0); + thread_tell_urgency(THREAD_URGENCY_NONE, 0, 0, NULL); /* * Enable interrupts and perform idling activities. No @@ -1905,23 +2054,6 @@ thread_select_idle( thread_lock(thread); - /* - * If we idled in place, simulate a context switch back - * to the original priority of the thread so that the - * platform layer cannot distinguish this from a true - * switch to the idle thread. - */ - if (thread->sched_mode == TH_MODE_REALTIME) - thread_tell_urgency(THREAD_URGENCY_REAL_TIME, thread->realtime.period, thread->realtime.deadline); - /* Identify non-promoted threads which have requested a - * "background" priority. - */ - else if ((thread->sched_pri <= MAXPRI_THROTTLE) && - (thread->priority <= MAXPRI_THROTTLE)) - thread_tell_urgency(THREAD_URGENCY_BACKGROUND, thread->sched_pri, thread->priority); - else - thread_tell_urgency(THREAD_URGENCY_NORMAL, thread->sched_pri, thread->priority); - /* * If awakened, switch to thread timer and start a new quantum. * Otherwise skip; we will context switch to another thread or return here. @@ -1935,7 +2067,7 @@ thread_select_idle( thread->last_quantum_refill_time = processor->last_dispatch; processor->quantum_end = processor->last_dispatch + thread->current_quantum; - timer_call_enter1(&processor->quantum_timer, thread, processor->quantum_end, TIMER_CALL_CRITICAL); + timer_call_enter1(&processor->quantum_timer, thread, processor->quantum_end, TIMER_CALL_SYS_CRITICAL); processor->timeslice = 1; thread->computation_epoch = processor->last_dispatch; @@ -1943,10 +2075,25 @@ thread_select_idle( thread->state &= ~TH_IDLE; + /* + * If we idled in place, simulate a context switch back + * to the original priority of the thread so that the + * platform layer cannot distinguish this from a true + * switch to the idle thread. + */ + + urgency = thread_get_urgency(thread, &arg1, &arg2); + + thread_tell_urgency(urgency, arg1, arg2, new_thread); + sched_run_incr(); - if (thread->sched_mode == TH_MODE_TIMESHARE) + if (thread->sched_mode == TH_MODE_TIMESHARE) { sched_share_incr(); + if (thread->max_priority <= MAXPRI_THROTTLE) + sched_background_incr(); + } + return (new_thread); } #endif /* CONFIG_SCHED_IDLE_IN_PLACE */ @@ -2059,17 +2206,25 @@ MACRO_BEGIN \ } \ MACRO_END +/* + * thread_invoke + * + * "self" is what is currently running on the processor, + * "thread" is the new thread to context switch to + * (which may be the same thread in some cases) + */ static boolean_t thread_invoke( - register thread_t self, - register thread_t thread, + thread_t self, + thread_t thread, ast_t reason) { thread_continue_t continuation = self->continuation; - void *parameter = self->parameter; - processor_t processor; + void *parameter = self->parameter; + processor_t processor; + uint64_t ctime = mach_absolute_time(); - if (get_preemption_level() != 0) { + if (__improbable(get_preemption_level() != 0)) { int pl = get_preemption_level(); panic("thread_invoke: preemption_level %d, possible cause: %s", pl, (pl < 0 ? "unlocking an unlocked mutex or spinlock" : @@ -2078,6 +2233,10 @@ thread_invoke( assert(self == current_thread()); +#if defined(CONFIG_SCHED_TRADITIONAL) + sched_traditional_consider_maintenance(ctime); +#endif /* CONFIG_SCHED_TRADITIONAL */ + /* * Mark thread interruptible. */ @@ -2130,9 +2289,9 @@ thread_invoke( self->reason = reason; - processor->last_dispatch = mach_absolute_time(); - self->last_run_time = processor->last_dispatch; - thread_timer_event(processor->last_dispatch, &thread->system_timer); + processor->last_dispatch = ctime; + self->last_run_time = ctime; + thread_timer_event(ctime, &thread->system_timer); PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer; /* @@ -2141,7 +2300,7 @@ thread_invoke( */ if (!thread->precise_user_kernel_time) { timer_switch(PROCESSOR_DATA(processor, current_state), - processor->last_dispatch, + ctime, PROCESSOR_DATA(processor, current_state)); } @@ -2149,7 +2308,7 @@ thread_invoke( MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_HANDOFF)|DBG_FUNC_NONE, self->reason, (uintptr_t)thread_tid(thread), self->sched_pri, thread->sched_pri, 0); - if ((thread->chosen_processor != processor) && (thread->chosen_processor != NULL)) { + if ((thread->chosen_processor != processor) && (thread->chosen_processor != PROCESSOR_NULL)) { KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_MOVED)|DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), (uintptr_t)thread->chosen_processor->cpu_id, 0, 0, 0); } @@ -2243,9 +2402,9 @@ need_stack: assert(self->runq == PROCESSOR_NULL); self->reason = reason; - processor->last_dispatch = mach_absolute_time(); - self->last_run_time = processor->last_dispatch; - thread_timer_event(processor->last_dispatch, &thread->system_timer); + processor->last_dispatch = ctime; + self->last_run_time = ctime; + thread_timer_event(ctime, &thread->system_timer); PROCESSOR_DATA(processor, kernel_timer) = &thread->system_timer; /* @@ -2254,7 +2413,7 @@ need_stack: */ if (!thread->precise_user_kernel_time) { timer_switch(PROCESSOR_DATA(processor, current_state), - processor->last_dispatch, + ctime, PROCESSOR_DATA(processor, current_state)); } @@ -2310,6 +2469,9 @@ need_stack: * special actions. Update quantum for other thread and begin * the quantum for ourselves. * + * "self" is our new current thread that we have context switched + * to, "thread" is the old thread that we have switched away from. + * * Called at splsched. */ void @@ -2337,15 +2499,16 @@ thread_dispatch( consumed = thread->current_quantum - remainder; - if ((thread->reason & AST_LEDGER) == 0) + if ((thread->reason & AST_LEDGER) == 0) { /* - * Bill CPU time to both the individual thread - * and the task. + * Bill CPU time to both the task and + * the individual thread. */ ledger_credit(thread->t_ledger, task_ledgers.cpu_time, consumed); ledger_credit(thread->t_threadledger, thread_ledgers.cpu_time, consumed); + } wake_lock(thread); thread_lock(thread); @@ -2394,6 +2557,28 @@ thread_dispatch( thread->computation_metered += (processor->last_dispatch - thread->computation_epoch); + if ((thread->rwlock_count != 0) && !(LcksOpts & disLkRWPrio)) { + integer_t priority; + + priority = thread->sched_pri; + + if (priority < thread->priority) + priority = thread->priority; + if (priority < BASEPRI_BACKGROUND) + priority = BASEPRI_BACKGROUND; + + if ((thread->sched_pri < priority) || !(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) { + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_PROMOTE) | DBG_FUNC_NONE, + (uintptr_t)thread_tid(thread), thread->sched_pri, thread->priority, priority, 0); + + thread->sched_flags |= TH_SFLAG_RW_PROMOTED; + + if (thread->sched_pri < priority) + set_sched_pri(thread, priority); + } + } + if (!(thread->state & TH_WAIT)) { /* * Still running. @@ -2435,9 +2620,14 @@ thread_dispatch( } thread->state &= ~TH_RUN; + thread->chosen_processor = PROCESSOR_NULL; + + if (thread->sched_mode == TH_MODE_TIMESHARE) { + if (thread->max_priority <= MAXPRI_THROTTLE) + sched_background_decr(); - if (thread->sched_mode == TH_MODE_TIMESHARE) sched_share_decr(); + } sched_run_decr(); (*thread->sched_call)(SCHED_CALL_BLOCK, thread); @@ -2460,17 +2650,13 @@ thread_dispatch( } if (!(self->state & TH_IDLE)) { + uint64_t arg1, arg2; + int urgency; - if (self->sched_mode == TH_MODE_REALTIME) - thread_tell_urgency(THREAD_URGENCY_REAL_TIME, self->realtime.period, self->realtime.deadline); - /* Identify non-promoted threads which have requested a - * "background" priority. - */ - else if ((self->sched_pri <= MAXPRI_THROTTLE) && - (self->priority <= MAXPRI_THROTTLE)) - thread_tell_urgency(THREAD_URGENCY_BACKGROUND, self->sched_pri, self->priority); - else - thread_tell_urgency(THREAD_URGENCY_NORMAL, self->sched_pri, self->priority); + urgency = thread_get_urgency(self, &arg1, &arg2); + + thread_tell_urgency(urgency, arg1, arg2, self); + /* * Get a new quantum if none remaining. */ @@ -2483,7 +2669,7 @@ thread_dispatch( * Set up quantum timer and timeslice. */ processor->quantum_end = (processor->last_dispatch + self->current_quantum); - timer_call_enter1(&processor->quantum_timer, self, processor->quantum_end, TIMER_CALL_CRITICAL); + timer_call_enter1(&processor->quantum_timer, self, processor->quantum_end, TIMER_CALL_SYS_CRITICAL); processor->timeslice = 1; @@ -2493,7 +2679,7 @@ thread_dispatch( timer_call_cancel(&processor->quantum_timer); processor->timeslice = 0; - thread_tell_urgency(THREAD_URGENCY_NONE, 0, 0); + thread_tell_urgency(THREAD_URGENCY_NONE, 0, 0, NULL); } } @@ -2989,6 +3175,7 @@ realtime_setrun( thread_t thread) { processor_set_t pset = processor->processor_set; + ast_t preempt; thread->chosen_processor = processor; @@ -3001,23 +3188,69 @@ realtime_setrun( enqueue_tail(&pset->active_queue, (queue_entry_t)processor); processor->next_thread = thread; + processor->current_pri = thread->sched_pri; + processor->current_thmode = thread->sched_mode; processor->deadline = thread->realtime.deadline; processor->state = PROCESSOR_DISPATCHING; - pset_unlock(pset); - if (processor != current_processor()) - machine_signal_idle(processor); + if (processor != current_processor()) { + if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { + /* cleared on exit from main processor_idle() loop */ + pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); + machine_signal_idle(processor); + } + } + + pset_unlock(pset); return; } - if (realtime_queue_insert(thread)) { - int prstate = processor->state; - if (processor == current_processor()) - ast_on(AST_PREEMPT | AST_URGENT); - else if ((prstate == PROCESSOR_IDLE) || (prstate == PROCESSOR_DISPATCHING)) - machine_signal_idle(processor); - else - cause_ast_check(processor); + if (processor->current_pri < BASEPRI_RTQUEUES) + preempt = (AST_PREEMPT | AST_URGENT); + else if (thread->realtime.deadline < processor->deadline) + preempt = (AST_PREEMPT | AST_URGENT); + else + preempt = AST_NONE; + + realtime_queue_insert(thread); + + if (preempt != AST_NONE) { + if (processor->state == PROCESSOR_IDLE) { + remqueue((queue_entry_t)processor); + enqueue_tail(&pset->active_queue, (queue_entry_t)processor); + processor->next_thread = THREAD_NULL; + processor->current_pri = thread->sched_pri; + processor->current_thmode = thread->sched_mode; + processor->deadline = thread->realtime.deadline; + processor->state = PROCESSOR_DISPATCHING; + if (processor == current_processor()) { + ast_on(preempt); + } else { + if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { + /* cleared on exit from main processor_idle() loop */ + pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); + machine_signal_idle(processor); + } + } + } else if (processor->state == PROCESSOR_DISPATCHING) { + if ((processor->next_thread == THREAD_NULL) && ((processor->current_pri < thread->sched_pri) || (processor->deadline > thread->realtime.deadline))) { + processor->current_pri = thread->sched_pri; + processor->current_thmode = thread->sched_mode; + processor->deadline = thread->realtime.deadline; + } + } else { + if (processor == current_processor()) { + ast_on(preempt); + } else { + if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { + /* cleared after IPI causes csw_check() to be called */ + pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); + cause_ast_check(processor); + } + } + } + } else { + /* Selected processor was too busy, just keep thread enqueued and let other processors drain it naturally. */ } pset_unlock(pset); @@ -3078,6 +3311,7 @@ processor_setrun( { processor_set_t pset = processor->processor_set; ast_t preempt; + enum { eExitIdle, eInterruptRunning, eDoNothing } ipi_action = eDoNothing; thread->chosen_processor = processor; @@ -3091,12 +3325,18 @@ processor_setrun( enqueue_tail(&pset->active_queue, (queue_entry_t)processor); processor->next_thread = thread; + processor->current_pri = thread->sched_pri; + processor->current_thmode = thread->sched_mode; processor->deadline = UINT64_MAX; processor->state = PROCESSOR_DISPATCHING; - pset_unlock(pset); - if (processor != current_processor()) + if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { + /* cleared on exit from main processor_idle() loop */ + pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); machine_signal_idle(processor); + } + + pset_unlock(pset); return; } @@ -3107,41 +3347,89 @@ processor_setrun( preempt = (AST_PREEMPT | AST_URGENT); else if(processor->active_thread && thread_eager_preemption(processor->active_thread)) preempt = (AST_PREEMPT | AST_URGENT); - else - if ((thread->sched_mode == TH_MODE_TIMESHARE) && thread->sched_pri < thread->priority) - preempt = AST_NONE; - else + else if ((thread->sched_mode == TH_MODE_TIMESHARE) && (thread->sched_pri < thread->priority)) { + if(SCHED(priority_is_urgent)(thread->priority) && thread->sched_pri > processor->current_pri) { + preempt = (options & SCHED_PREEMPT)? AST_PREEMPT: AST_NONE; + } else { + preempt = AST_NONE; + } + } else preempt = (options & SCHED_PREEMPT)? AST_PREEMPT: AST_NONE; - if (!SCHED(processor_enqueue)(processor, thread, options)) - preempt = AST_NONE; + SCHED(processor_enqueue)(processor, thread, options); if (preempt != AST_NONE) { - if (processor == current_processor()) { - if (csw_check(processor) != AST_NONE) - ast_on(preempt); - } - else - if ( processor->state == PROCESSOR_IDLE || processor->state == PROCESSOR_DISPATCHING) { - machine_signal_idle(processor); - } - else - if ( (processor->state == PROCESSOR_RUNNING || + if (processor->state == PROCESSOR_IDLE) { + remqueue((queue_entry_t)processor); + enqueue_tail(&pset->active_queue, (queue_entry_t)processor); + processor->next_thread = THREAD_NULL; + processor->current_pri = thread->sched_pri; + processor->current_thmode = thread->sched_mode; + processor->deadline = UINT64_MAX; + processor->state = PROCESSOR_DISPATCHING; + + ipi_action = eExitIdle; + } else if ( processor->state == PROCESSOR_DISPATCHING) { + if ((processor->next_thread == THREAD_NULL) && (processor->current_pri < thread->sched_pri)) { + processor->current_pri = thread->sched_pri; + processor->current_thmode = thread->sched_mode; + processor->deadline = UINT64_MAX; + } + } else if ( (processor->state == PROCESSOR_RUNNING || processor->state == PROCESSOR_SHUTDOWN) && (thread->sched_pri >= processor->current_pri || processor->current_thmode == TH_MODE_FAIRSHARE)) { - cause_ast_check(processor); + ipi_action = eInterruptRunning; } - } - else - if ( processor->state == PROCESSOR_SHUTDOWN && + } else { + /* + * New thread is not important enough to preempt what is running, but + * special processor states may need special handling + */ + if (processor->state == PROCESSOR_SHUTDOWN && thread->sched_pri >= processor->current_pri ) { - cause_ast_check(processor); + ipi_action = eInterruptRunning; + } else if ( processor->state == PROCESSOR_IDLE && + processor != current_processor() ) { + remqueue((queue_entry_t)processor); + enqueue_tail(&pset->active_queue, (queue_entry_t)processor); + processor->next_thread = THREAD_NULL; + processor->current_pri = thread->sched_pri; + processor->current_thmode = thread->sched_mode; + processor->deadline = UINT64_MAX; + processor->state = PROCESSOR_DISPATCHING; + + ipi_action = eExitIdle; + } } - else - if ( processor->state == PROCESSOR_IDLE && - processor != current_processor() ) { - machine_signal_idle(processor); + + switch (ipi_action) { + case eDoNothing: + break; + case eExitIdle: + if (processor == current_processor()) { + if (csw_check_locked(processor, pset) != AST_NONE) + ast_on(preempt); + } else { + if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { + /* cleared on exit from main processor_idle() loop */ + pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); + machine_signal_idle(processor); + } + } + break; + case eInterruptRunning: + if (processor == current_processor()) { + if (csw_check_locked(processor, pset) != AST_NONE) + ast_on(preempt); + } else { + if (!(pset->pending_AST_cpu_mask & (1U << processor->cpu_id))) { + /* cleared after IPI causes csw_check() to be called */ + pset->pending_AST_cpu_mask |= (1U << processor->cpu_id); + cause_ast_check(processor); + } + } + break; } pset_unlock(pset); @@ -3548,6 +3836,9 @@ thread_setrun( pset_lock(pset); processor = SCHED(choose_processor)(pset, PROCESSOR_NULL, thread); + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE, + (uintptr_t)thread_tid(thread), (uintptr_t)-1, processor->cpu_id, processor->state, 0); } else if (thread->last_processor != PROCESSOR_NULL) { @@ -3559,11 +3850,8 @@ thread_setrun( pset_lock(pset); processor = SCHED(choose_processor)(pset, processor, thread); - if ((thread->last_processor != processor) && (thread->last_processor != PROCESSOR_NULL)) { - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_LPA_BROKEN)|DBG_FUNC_NONE, - (uintptr_t)thread_tid(thread), (uintptr_t)thread->last_processor->cpu_id, (uintptr_t)processor->cpu_id, thread->last_processor->state, 0); - } - + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE, + (uintptr_t)thread_tid(thread), thread->last_processor->cpu_id, processor->cpu_id, processor->state, 0); } else { /* @@ -3583,6 +3871,9 @@ thread_setrun( processor = SCHED(choose_processor)(pset, PROCESSOR_NULL, thread); task->pset_hint = processor->processor_set; + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE, + (uintptr_t)thread_tid(thread), (uintptr_t)-1, processor->cpu_id, processor->state, 0); } } else { @@ -3594,6 +3885,9 @@ thread_setrun( processor = thread->bound_processor; pset = processor->processor_set; pset_lock(pset); + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHOOSE_PROCESSOR)|DBG_FUNC_NONE, + (uintptr_t)thread_tid(thread), (uintptr_t)-2, processor->cpu_id, processor->state, 0); } /* @@ -3696,6 +3990,30 @@ processor_queue_shutdown( ast_t csw_check( processor_t processor) +{ + processor_set_t pset = processor->processor_set; + ast_t result; + + pset_lock(pset); + + /* If we were sent a remote AST and interrupted a running processor, acknowledge it here with pset lock held */ + pset->pending_AST_cpu_mask &= ~(1U << processor->cpu_id); + + result = csw_check_locked(processor, pset); + + pset_unlock(pset); + + return result; +} + +/* + * Check for preemption at splsched with + * pset locked + */ +ast_t +csw_check_locked( + processor_t processor, + processor_set_t pset __unused) { ast_t result = AST_NONE; thread_t thread = processor->active_thread; @@ -3705,8 +4023,12 @@ csw_check( return (AST_PREEMPT | AST_URGENT); } else { - if (rt_runq.count > 0 && BASEPRI_RTQUEUES >= processor->current_pri) - return (AST_PREEMPT | AST_URGENT); + if (rt_runq.count > 0) { + if (BASEPRI_RTQUEUES > processor->current_pri) + return (AST_PREEMPT | AST_URGENT); + else + return (AST_PREEMPT); + } } result = SCHED(processor_csw_check)(processor); @@ -4012,41 +4334,43 @@ static thread_t steal_thread_disabled( #endif /* CONFIG_SCHED_TRADITIONAL */ -int -thread_get_urgency(uint64_t *rt_period, uint64_t *rt_deadline) +void +sys_override_cpu_throttle(int flag) { - processor_t processor; - thread_t thread; - - processor = current_processor(); - - thread = processor->next_thread; - - if (thread != NULL) { - if (thread->sched_mode == TH_MODE_REALTIME) { - - if (rt_period != NULL) - *rt_period = thread->realtime.period; - if (rt_deadline != NULL) - *rt_deadline = thread->realtime.deadline; - - KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_GET_URGENCY), THREAD_URGENCY_REAL_TIME, thread->realtime.period, - (thread->realtime.deadline >> 32), thread->realtime.deadline, 0); + if (flag == CPU_THROTTLE_ENABLE) + cpu_throttle_enabled = 1; + if (flag == CPU_THROTTLE_DISABLE) + cpu_throttle_enabled = 0; +} - return (THREAD_URGENCY_REAL_TIME); - } else if ((thread->sched_pri <= MAXPRI_THROTTLE) && - (thread->priority <= MAXPRI_THROTTLE)) { - KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_GET_URGENCY), THREAD_URGENCY_BACKGROUND, thread->sched_pri, thread->priority, 0, 0); - return (THREAD_URGENCY_BACKGROUND); - } - else - KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_GET_URGENCY), THREAD_URGENCY_NORMAL, 0, 0, 0, 0); +int +thread_get_urgency(thread_t thread, uint64_t *arg1, uint64_t *arg2) +{ + if (thread == NULL || (thread->state & TH_IDLE)) { + *arg1 = 0; + *arg2 = 0; + + return (THREAD_URGENCY_NONE); + } else if (thread->sched_mode == TH_MODE_REALTIME) { + *arg1 = thread->realtime.period; + *arg2 = thread->realtime.deadline; + + return (THREAD_URGENCY_REAL_TIME); + } else if (cpu_throttle_enabled && + ((thread->sched_pri <= MAXPRI_THROTTLE) && (thread->priority <= MAXPRI_THROTTLE))) { + /* + * Background urgency applied when thread priority is MAXPRI_THROTTLE or lower and thread is not promoted + */ + *arg1 = thread->sched_pri; + *arg2 = thread->priority; + return (THREAD_URGENCY_BACKGROUND); + } else { + *arg1 = thread->sched_pri; + *arg2 = thread->priority; + return (THREAD_URGENCY_NORMAL); } - else - KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_GET_URGENCY), THREAD_URGENCY_NONE, 0, 0, 0, 0); - return (THREAD_URGENCY_NONE); } @@ -4084,8 +4408,24 @@ processor_idle( mach_absolute_time(), &PROCESSOR_DATA(processor, idle_state)); PROCESSOR_DATA(processor, current_state) = &PROCESSOR_DATA(processor, idle_state); - while (processor->next_thread == THREAD_NULL && SCHED(processor_queue_empty)(processor) && rt_runq.count == 0 && SCHED(fairshare_runq_count)() == 0 && - (thread == THREAD_NULL || ((thread->state & (TH_WAIT|TH_SUSP)) == TH_WAIT && !thread->wake_active))) { + while (1) { + + if (processor->state != PROCESSOR_IDLE) /* unsafe, but worst case we loop around once */ + break; + if (pset->pending_AST_cpu_mask & (1U << processor->cpu_id)) + break; + if (!SCHED(processor_queue_empty)(processor)) + break; + if (rt_runq.count) + break; +#if CONFIG_SCHED_IDLE_IN_PLACE + if (thread != THREAD_NULL) { + /* Did idle-in-place thread wake up */ + if ((thread->state & (TH_WAIT|TH_SUSP)) != TH_WAIT || thread->wake_active) + break; + } +#endif + IDLE_KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), rt_runq.count, SCHED(processor_runq_count)(processor), -1, 0); @@ -4110,6 +4450,9 @@ processor_idle( pset_lock(pset); + /* If we were sent a remote AST and came out of idle, acknowledge it here with pset lock held */ + pset->pending_AST_cpu_mask &= ~(1U << processor->cpu_id); + state = processor->state; if (state == PROCESSOR_DISPATCHING) { /* @@ -4119,8 +4462,10 @@ processor_idle( processor->next_thread = THREAD_NULL; processor->state = PROCESSOR_RUNNING; - if (SCHED(processor_queue_has_priority)(processor, new_thread->sched_pri, FALSE) || - (rt_runq.count > 0 && BASEPRI_RTQUEUES >= new_thread->sched_pri) ) { + if ((new_thread != THREAD_NULL) && (SCHED(processor_queue_has_priority)(processor, new_thread->sched_pri, FALSE) || + (rt_runq.count > 0 && BASEPRI_RTQUEUES >= new_thread->sched_pri)) ) { + processor->current_pri = IDLEPRI; + processor->current_thmode = TH_MODE_FIXED; processor->deadline = UINT64_MAX; pset_unlock(pset); @@ -4150,6 +4495,9 @@ processor_idle( remqueue((queue_entry_t)processor); processor->state = PROCESSOR_RUNNING; + processor->current_pri = IDLEPRI; + processor->current_thmode = TH_MODE_FIXED; + processor->deadline = UINT64_MAX; enqueue_tail(&pset->active_queue, (queue_entry_t)processor); } else @@ -4165,6 +4513,8 @@ processor_idle( */ if ((new_thread = processor->next_thread) != THREAD_NULL) { processor->next_thread = THREAD_NULL; + processor->current_pri = IDLEPRI; + processor->current_thmode = TH_MODE_FIXED; processor->deadline = UINT64_MAX; pset_unlock(pset); @@ -4229,6 +4579,7 @@ idle_thread_create( processor->idle_thread = thread; thread->sched_pri = thread->priority = IDLEPRI; thread->state = (TH_RUN | TH_IDLE); + thread->options |= TH_OPT_IDLE_THREAD; thread_unlock(thread); splx(s); @@ -4251,8 +4602,7 @@ sched_startup(void) thread_t thread; result = kernel_thread_start_priority((thread_continue_t)sched_init_thread, - (void *)SCHED(maintenance_continuation), - MAXPRI_KERNEL, &thread); + (void *)SCHED(maintenance_continuation), MAXPRI_KERNEL, &thread); if (result != KERN_SUCCESS) panic("sched_startup"); @@ -4271,8 +4621,10 @@ sched_startup(void) #if defined(CONFIG_SCHED_TRADITIONAL) -static uint64_t sched_tick_deadline = 0; - +static volatile uint64_t sched_maintenance_deadline; +static uint64_t sched_tick_last_abstime; +static uint64_t sched_tick_delta; +uint64_t sched_tick_max_delta; /* * sched_init_thread: * @@ -4280,16 +4632,45 @@ static uint64_t sched_tick_deadline = 0; * times per second. */ static void -sched_traditional_tick_continue(void) +sched_traditional_maintenance_continue(void) { - uint64_t abstime = mach_absolute_time(); + uint64_t sched_tick_ctime; + sched_tick_ctime = mach_absolute_time(); - sched_tick++; + if (__improbable(sched_tick_last_abstime == 0)) { + sched_tick_last_abstime = sched_tick_ctime; + sched_tick_delta = 1; + } else { + sched_tick_delta = ((sched_tick_ctime) - sched_tick_last_abstime) / sched_tick_interval; + /* Ensure a delta of 1, since the interval could be slightly + * smaller than the sched_tick_interval due to dispatch + * latencies. + */ + sched_tick_delta = MAX(sched_tick_delta, 1); + + /* In the event interrupt latencies or platform + * idle events that advanced the timebase resulted + * in periods where no threads were dispatched, + * cap the maximum "tick delta" at SCHED_TICK_MAX_DELTA + * iterations. + */ + sched_tick_delta = MIN(sched_tick_delta, SCHED_TICK_MAX_DELTA); + + sched_tick_last_abstime = sched_tick_ctime; + sched_tick_max_delta = MAX(sched_tick_delta, sched_tick_max_delta); + } + + /* Add a number of pseudo-ticks corresponding to the elapsed interval + * This could be greater than 1 if substantial intervals where + * all processors are idle occur, which rarely occurs in practice. + */ + + sched_tick += sched_tick_delta; /* * Compute various averages. */ - compute_averages(); + compute_averages(sched_tick_delta); /* * Scan the run queues for threads which @@ -4297,17 +4678,41 @@ sched_traditional_tick_continue(void) */ thread_update_scan(); - if (sched_tick_deadline == 0) - sched_tick_deadline = abstime; - - clock_deadline_for_periodic_event(sched_tick_interval, abstime, - &sched_tick_deadline); - - assert_wait_deadline((event_t)sched_traditional_tick_continue, THREAD_UNINT, sched_tick_deadline); - thread_block((thread_continue_t)sched_traditional_tick_continue); + assert_wait((event_t)sched_traditional_maintenance_continue, THREAD_UNINT); + thread_block((thread_continue_t)sched_traditional_maintenance_continue); /*NOTREACHED*/ } +static uint64_t sched_maintenance_wakeups; + +/* + * Determine if the set of routines formerly driven by a maintenance timer + * must be invoked, based on a deadline comparison. Signals the scheduler + * maintenance thread on deadline expiration. Must be invoked at an interval + * lower than the "sched_tick_interval", currently accomplished by + * invocation via the quantum expiration timer and at context switch time. + * Performance matters: this routine reuses a timestamp approximating the + * current absolute time received from the caller, and should perform + * no more than a comparison against the deadline in the common case. + */ +void +sched_traditional_consider_maintenance(uint64_t ctime) { + uint64_t ndeadline, deadline = sched_maintenance_deadline; + + if (__improbable(ctime >= deadline)) { + if (__improbable(current_thread() == sched_maintenance_thread)) + return; + OSMemoryBarrier(); + + ndeadline = ctime + sched_tick_interval; + + if (__probable(__sync_bool_compare_and_swap(&sched_maintenance_deadline, deadline, ndeadline))) { + thread_wakeup((event_t)sched_traditional_maintenance_continue); + sched_maintenance_wakeups++; + } + } +} + #endif /* CONFIG_SCHED_TRADITIONAL */ void @@ -4315,6 +4720,7 @@ sched_init_thread(void (*continuation)(void)) { thread_block(THREAD_CONTINUE_NULL); + sched_maintenance_thread = current_thread(); continuation(); /*NOTREACHED*/ diff --git a/osfmk/kern/sched_prim.h b/osfmk/kern/sched_prim.h index c22ba7efd..8c7bc56b0 100644 --- a/osfmk/kern/sched_prim.h +++ b/osfmk/kern/sched_prim.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -77,7 +77,7 @@ #ifdef MACH_KERNEL_PRIVATE /* Initialization */ -extern void sched_init(void) __attribute__((section("__TEXT, initcode"))); +extern void sched_init(void); extern void sched_startup(void); @@ -85,7 +85,8 @@ extern void sched_timebase_init(void); /* Force a preemption point for a thread and wait for it to stop running */ extern boolean_t thread_stop( - thread_t thread); + thread_t thread, + boolean_t until_not_runnable); /* Release a previous stop request */ extern void thread_unstop( @@ -315,8 +316,9 @@ do { \ #define THREAD_URGENCY_NORMAL 2 /* indicates that the thread is marked as a "normal" thread */ #define THREAD_URGENCY_REAL_TIME 3 /* indicates that the thread is marked as a "real-time" or urgent thread */ #define THREAD_URGENCY_MAX 4 /* Marker */ -/* Returns the "urgency" of the currently running thread (provided by scheduler) */ +/* Returns the "urgency" of a thread (provided by scheduler) */ extern int thread_get_urgency( + thread_t thread, uint64_t *rt_period, uint64_t *rt_deadline); @@ -324,7 +326,8 @@ extern int thread_get_urgency( extern void thread_tell_urgency( int urgency, uint64_t rt_period, - uint64_t rt_deadline); + uint64_t rt_deadline, + thread_t nthread); /* Tells if there are "active" RT threads in the system (provided by CPU PM) */ extern void active_rt_threads( @@ -338,6 +341,11 @@ __BEGIN_DECLS extern boolean_t assert_wait_possible(void); +/* Toggles a global override to turn off CPU Throttling */ +#define CPU_THROTTLE_DISABLE 0 +#define CPU_THROTTLE_ENABLE 1 +extern void sys_override_cpu_throttle(int flag); + /* ****************** Only exported until BSD stops using ******************** */ @@ -375,11 +383,28 @@ extern wait_result_t assert_wait_timeout( uint32_t interval, uint32_t scale_factor); +/* Assert that the thread intends to wait with an urgency, timeout and leeway */ +extern wait_result_t assert_wait_timeout_with_leeway( + event_t event, + wait_interrupt_t interruptible, + wait_timeout_urgency_t urgency, + uint32_t interval, + uint32_t leeway, + uint32_t scale_factor); + extern wait_result_t assert_wait_deadline( event_t event, wait_interrupt_t interruptible, uint64_t deadline); +/* Assert that the thread intends to wait with an urgency, deadline, and leeway */ +extern wait_result_t assert_wait_deadline_with_leeway( + event_t event, + wait_interrupt_t interruptible, + wait_timeout_urgency_t urgency, + uint64_t deadline, + uint64_t leeway); + /* Wake up thread (or threads) waiting on a particular event */ extern kern_return_t thread_wakeup_prim( event_t event, @@ -407,38 +432,6 @@ extern kern_return_t thread_wakeup_prim_internal( extern boolean_t preemption_enabled(void); -#ifdef KERNEL_PRIVATE - -#ifndef __LP64__ - -/* - * Obsolete interfaces. - */ - -extern void thread_set_timer( - uint32_t interval, - uint32_t scale_factor); - -extern void thread_set_timer_deadline( - uint64_t deadline); - -extern void thread_cancel_timer(void); - -#ifndef MACH_KERNEL_PRIVATE - -#ifndef ABSOLUTETIME_SCALAR_TYPE - -#define thread_set_timer_deadline(a) \ - thread_set_timer_deadline(__OSAbsoluteTime(a)) - -#endif /* ABSOLUTETIME_SCALAR_TYPE */ - -#endif /* MACH_KERNEL_PRIVATE */ - -#endif /* __LP64__ */ - -#endif /* KERNEL_PRIVATE */ - #ifdef MACH_KERNEL_PRIVATE /* diff --git a/osfmk/kern/sched_proto.c b/osfmk/kern/sched_proto.c index e31cb0590..4eb740797 100644 --- a/osfmk/kern/sched_proto.c +++ b/osfmk/kern/sched_proto.c @@ -276,7 +276,7 @@ sched_proto_maintenance_continuation(void) /* * Compute various averages. */ - compute_averages(); + compute_averages(1); if (sched_proto_tick_deadline == 0) sched_proto_tick_deadline = abstime; diff --git a/osfmk/kern/stack.c b/osfmk/kern/stack.c index 9906b8b3a..805830d54 100644 --- a/osfmk/kern/stack.c +++ b/osfmk/kern/stack.c @@ -386,7 +386,9 @@ stack_collect(void) * back in stack_alloc(). */ - stack = (vm_offset_t)vm_map_trunc_page(stack); + stack = (vm_offset_t)vm_map_trunc_page( + stack, + VM_MAP_PAGE_MASK(kernel_map)); stack -= PAGE_SIZE; if (vm_map_remove( kernel_map, @@ -551,9 +553,9 @@ processor_set_stack_usage( /* OK, have memory and list is locked */ thread_list = (thread_t *) addr; - for (i = 0, thread = (thread_t) queue_first(&threads); + for (i = 0, thread = (thread_t)(void *) queue_first(&threads); !queue_end(&threads, (queue_entry_t) thread); - thread = (thread_t) queue_next(&thread->threads)) { + thread = (thread_t)(void *) queue_next(&thread->threads)) { thread_reference_internal(thread); thread_list[i++] = thread; } diff --git a/osfmk/kern/startup.c b/osfmk/kern/startup.c index e17631376..30d2bbff2 100644 --- a/osfmk/kern/startup.c +++ b/osfmk/kern/startup.c @@ -89,6 +89,9 @@ #include #include #include +#if CONFIG_TELEMETRY +#include +#endif #include #include #include @@ -104,6 +107,7 @@ #include #include #include +#include #include #if MACH_KDP @@ -118,6 +122,15 @@ #include #endif +#if KPC +#include +#endif + +#if KPERF +#include +#endif + + #include static void kernel_bootstrap_thread(void); @@ -134,6 +147,7 @@ extern void OSKextRemoveKextBootstrap(void); void scale_setup(void); extern void bsd_scale_setup(int); extern unsigned int semaphore_max; +extern void stackshot_lock_init(void); /* * Running in virtual memory, on the interrupt stack. @@ -159,7 +173,7 @@ kernel_early_bootstrap(void) /* * Initialize the timer callout world */ - timer_call_initialize(); + timer_call_init(); } @@ -190,6 +204,9 @@ kernel_bootstrap(void) kernel_bootstrap_kprintf("calling vm_mem_bootstrap\n"); vm_mem_bootstrap(); + kernel_bootstrap_kprintf("calling cs_init\n"); + cs_init(); + kernel_bootstrap_kprintf("calling vm_mem_init\n"); vm_mem_init(); @@ -198,6 +215,14 @@ kernel_bootstrap(void) machine_info.major_version = version_major; machine_info.minor_version = version_minor; +#if CONFIG_TELEMETRY + kernel_bootstrap_kprintf("calling telemetry_init\n"); + telemetry_init(); +#endif + + kernel_bootstrap_kprintf("calling stackshot_lock_init\n"); + stackshot_lock_init(); + kernel_bootstrap_kprintf("calling sched_init\n"); sched_init(); @@ -259,6 +284,7 @@ kernel_bootstrap(void) int kth_started = 0; vm_offset_t vm_kernel_addrperm; +vm_offset_t buf_kernel_addrperm; /* * Now running in a thread. Kick off other services, @@ -290,7 +316,10 @@ kernel_bootstrap_thread(void) */ kernel_bootstrap_thread_kprintf("calling thread_daemon_init\n"); thread_daemon_init(); - + + /* Create kernel map entry reserve */ + vm_kernel_reserved_entry_init(); + /* * Thread callout service. */ @@ -331,23 +360,41 @@ kernel_bootstrap_thread(void) cpu_physwindow_init(0); #endif - vm_kernel_reserved_entry_init(); + #if MACH_KDP kernel_bootstrap_kprintf("calling kdp_init\n"); kdp_init(); #endif +#if ALTERNATE_DEBUGGER + alternate_debugger_init(); +#endif + #if CONFIG_COUNTERS pmc_bootstrap(); #endif +#if KPC + kpc_init(); +#endif + +#if KPERF + kperf_bootstrap(); +#endif + +#if CONFIG_TELEMETRY + kernel_bootstrap_kprintf("calling bootprofile_init\n"); + bootprofile_init(); +#endif + #if (defined(__i386__) || defined(__x86_64__)) if (turn_on_log_leaks && !new_nkdbufs) new_nkdbufs = 200000; start_kern_tracing(new_nkdbufs, FALSE); if (turn_on_log_leaks) log_leaks = 1; + #endif #ifdef IOKIT @@ -382,6 +429,7 @@ kernel_bootstrap_thread(void) vm_commpage_init(); vm_commpage_text_init(); + #if CONFIG_MACF mac_policy_initmach(); #endif @@ -394,6 +442,7 @@ kernel_bootstrap_thread(void) * word-aligned address to zero via addition. */ vm_kernel_addrperm = (vm_offset_t)early_random() | 1; + buf_kernel_addrperm = (vm_offset_t)early_random() | 1; /* * Start the user bootstrap. @@ -411,7 +460,7 @@ kernel_bootstrap_thread(void) serial_keyboard_init(); /* Start serial keyboard if wanted */ vm_page_init_local_q(); - + thread_bind(PROCESSOR_NULL); /* @@ -503,7 +552,7 @@ load_context( * should never occur since the thread is expected * to have reserved stack. */ - load_context_kprintf("thread %p, stack %x, stackptr %x\n", thread, + load_context_kprintf("thread %p, stack %lx, stackptr %lx\n", thread, thread->kernel_stack, thread->machine.kstackptr); if (!thread->kernel_stack) { load_context_kprintf("calling stack_alloc_try\n"); diff --git a/osfmk/kern/startup.h b/osfmk/kern/startup.h index 7c239784f..6e1864df6 100644 --- a/osfmk/kern/startup.h +++ b/osfmk/kern/startup.h @@ -42,8 +42,8 @@ __BEGIN_DECLS */ /* Initialize kernel */ -extern void kernel_early_bootstrap(void) __attribute__((section("__TEXT, initcode"))); -extern void kernel_bootstrap(void) __attribute__((section("__TEXT, initcode"))); +extern void kernel_early_bootstrap(void); +extern void kernel_bootstrap(void); /* Initialize machine dependent stuff */ extern void machine_init(void); @@ -65,6 +65,9 @@ extern void device_service_create(void); /* BSD subsystem initialization */ extern void bsd_init(void); +/* codesigning subsystem initialization */ +extern void cs_init(void); + #endif /* MACH_BSD */ __END_DECLS diff --git a/osfmk/kern/sync_lock.c b/osfmk/kern/sync_lock.c index 5a06b28f8..3479d0c7b 100644 --- a/osfmk/kern/sync_lock.c +++ b/osfmk/kern/sync_lock.c @@ -53,823 +53,88 @@ #include #include -/* - * Ulock ownership MACROS - * - * Assumes: ulock internal lock is held - */ - -#define ulock_ownership_set(ul, th) \ - MACRO_BEGIN \ - thread_mtx_lock(th); \ - enqueue (&th->held_ulocks, (queue_entry_t) (ul)); \ - thread_mtx_unlock(th); \ - (ul)->holder = th; \ - MACRO_END - -#define ulock_ownership_clear(ul) \ - MACRO_BEGIN \ - thread_t th; \ - th = (ul)->holder; \ - if ((th)->active) { \ - thread_mtx_lock(th); \ - remqueue((queue_entry_t) (ul)); \ - thread_mtx_unlock(th); \ - } else { \ - remqueue((queue_entry_t) (ul)); \ - } \ - (ul)->holder = THREAD_NULL; \ - MACRO_END - -/* - * Lock set ownership MACROS - */ - -#define lock_set_ownership_set(ls, t) \ - MACRO_BEGIN \ - task_lock((t)); \ - enqueue_head(&(t)->lock_set_list, (queue_entry_t) (ls));\ - (t)->lock_sets_owned++; \ - task_unlock((t)); \ - (ls)->owner = (t); \ - MACRO_END - -#define lock_set_ownership_clear(ls, t) \ - MACRO_BEGIN \ - task_lock((t)); \ - remqueue((queue_entry_t) (ls)); \ - (t)->lock_sets_owned--; \ - task_unlock((t)); \ - MACRO_END - -unsigned int lock_set_event; -#define LOCK_SET_EVENT CAST_EVENT64_T(&lock_set_event) - -unsigned int lock_set_handoff; -#define LOCK_SET_HANDOFF CAST_EVENT64_T(&lock_set_handoff) - - -lck_attr_t lock_set_attr; -lck_grp_t lock_set_grp; -static lck_grp_attr_t lock_set_grp_attr; - /* - * ROUTINE: lock_set_init [private] - * - * Initialize the lock_set subsystem. - */ -void -lock_set_init(void) -{ - lck_grp_attr_setdefault(&lock_set_grp_attr); - lck_grp_init(&lock_set_grp, "lock_set", &lock_set_grp_attr); - lck_attr_setdefault(&lock_set_attr); -} - - -/* - * ROUTINE: lock_set_create [exported] - * - * Creates a lock set. - * The port representing the lock set is returned as a parameter. + * OBSOLETE: lock set routines are obsolete */ kern_return_t lock_set_create ( - task_t task, - lock_set_t *new_lock_set, - int n_ulocks, - int policy) -{ - lock_set_t lock_set = LOCK_SET_NULL; - ulock_t ulock; - vm_size_t size; - int x; - - *new_lock_set = LOCK_SET_NULL; - - if (task == TASK_NULL || n_ulocks <= 0 || policy > SYNC_POLICY_MAX) - return KERN_INVALID_ARGUMENT; - - if ((VM_MAX_ADDRESS - sizeof(struct lock_set))/sizeof(struct ulock) < (unsigned)n_ulocks) - return KERN_RESOURCE_SHORTAGE; - - size = sizeof(struct lock_set) + (sizeof(struct ulock) * (n_ulocks-1)); - lock_set = (lock_set_t) kalloc (size); - - if (lock_set == LOCK_SET_NULL) - return KERN_RESOURCE_SHORTAGE; - - - lock_set_lock_init(lock_set); - lock_set->n_ulocks = n_ulocks; - lock_set->ref_count = (task == kernel_task) ? 1 : 2; /* one for kernel, one for port */ - - /* - * Create and initialize the lock set port - */ - lock_set->port = ipc_port_alloc_kernel(); - if (lock_set->port == IP_NULL) { - kfree(lock_set, size); - return KERN_RESOURCE_SHORTAGE; - } - - ipc_kobject_set (lock_set->port, - (ipc_kobject_t) lock_set, - IKOT_LOCK_SET); - - /* - * Initialize each ulock in the lock set - */ - - for (x=0; x < n_ulocks; x++) { - ulock = (ulock_t) &lock_set->ulock_list[x]; - ulock_lock_init(ulock); - ulock->lock_set = lock_set; - ulock->holder = THREAD_NULL; - ulock->blocked = FALSE; - ulock->unstable = FALSE; - ulock->ho_wait = FALSE; - ulock->accept_wait = FALSE; - wait_queue_init(&ulock->wait_queue, policy); - } - - lock_set_ownership_set(lock_set, task); - - lock_set->active = TRUE; - *new_lock_set = lock_set; - - return KERN_SUCCESS; -} - -/* - * ROUTINE: lock_set_destroy [exported] - * - * Destroys a lock set. This call will only succeed if the - * specified task is the SAME task name specified at the lock set's - * creation. - * - * NOTES: - * - All threads currently blocked on the lock set's ulocks are awoken. - * - These threads will return with the KERN_LOCK_SET_DESTROYED error. - */ -kern_return_t -lock_set_destroy (task_t task, lock_set_t lock_set) -{ - ulock_t ulock; - int i; - - if (task == TASK_NULL || lock_set == LOCK_SET_NULL) - return KERN_INVALID_ARGUMENT; - - if (lock_set->owner != task) - return KERN_INVALID_RIGHT; - - lock_set_lock(lock_set); - if (!lock_set->active) { - lock_set_unlock(lock_set); - return KERN_LOCK_SET_DESTROYED; - } - - /* - * Deactivate lock set - */ - lock_set->active = FALSE; - - /* - * If a ulock is currently held in the target lock set: - * - * 1) Wakeup all threads blocked on the ulock (if any). Threads - * may be blocked waiting normally, or waiting for a handoff. - * Blocked threads will return with KERN_LOCK_SET_DESTROYED. - * - * 2) ulock ownership is cleared. - * The thread currently holding the ulock is revoked of its - * ownership. - */ - for (i = 0; i < lock_set->n_ulocks; i++) { - ulock = &lock_set->ulock_list[i]; - - ulock_lock(ulock); - - if (ulock->accept_wait) { - ulock->accept_wait = FALSE; - wait_queue_wakeup64_one(&ulock->wait_queue, - LOCK_SET_HANDOFF, - THREAD_RESTART); - } - - if (ulock->holder) { - if (ulock->blocked) { - ulock->blocked = FALSE; - wait_queue_wakeup64_all(&ulock->wait_queue, - LOCK_SET_EVENT, - THREAD_RESTART); - } - if (ulock->ho_wait) { - ulock->ho_wait = FALSE; - wait_queue_wakeup64_one(&ulock->wait_queue, - LOCK_SET_HANDOFF, - THREAD_RESTART); - } - ulock_ownership_clear(ulock); - } - - ulock_unlock(ulock); - } - - lock_set_unlock(lock_set); - lock_set_ownership_clear(lock_set, task); - - /* - * Drop the lock set reference given to the containing task, - * which inturn destroys the lock set structure if the reference - * count goes to zero. - */ - lock_set_dereference(lock_set); - - return KERN_SUCCESS; -} - -kern_return_t -lock_acquire (lock_set_t lock_set, int lock_id) + __unused task_t task, + __unused lock_set_t *new_lock_set, + __unused int n_ulocks, + __unused int policy) { - ulock_t ulock; - - if (lock_set == LOCK_SET_NULL) - return KERN_INVALID_ARGUMENT; - - if (lock_id < 0 || lock_id >= lock_set->n_ulocks) - return KERN_INVALID_ARGUMENT; - - retry: - lock_set_lock(lock_set); - if (!lock_set->active) { - lock_set_unlock(lock_set); - return KERN_LOCK_SET_DESTROYED; - } - - ulock = (ulock_t) &lock_set->ulock_list[lock_id]; - ulock_lock(ulock); - lock_set_unlock(lock_set); - - /* - * Block the current thread if the lock is already held. - */ - - if (ulock->holder != THREAD_NULL) { - int wait_result; - - if (ulock->holder == current_thread()) { - ulock_unlock(ulock); - return KERN_LOCK_OWNED_SELF; - } - - ulock->blocked = TRUE; - wait_result = wait_queue_assert_wait64(&ulock->wait_queue, - LOCK_SET_EVENT, - THREAD_ABORTSAFE, 0); - ulock_unlock(ulock); - - /* - * Block - Wait for lock to become available. - */ - if (wait_result == THREAD_WAITING) - wait_result = thread_block(THREAD_CONTINUE_NULL); - - /* - * Check the result status: - * - * Check to see why thread was woken up. In all cases, we - * already have been removed from the queue. - */ - switch (wait_result) { - case THREAD_AWAKENED: - /* lock transitioned from old locker to us */ - /* he already made us owner */ - return (ulock->unstable) ? KERN_LOCK_UNSTABLE : - KERN_SUCCESS; - - case THREAD_INTERRUPTED: - return KERN_ABORTED; - - case THREAD_RESTART: - goto retry; /* probably a dead lock_set */ - - default: - panic("lock_acquire\n"); - } - } - - /* - * Assign lock ownership - */ - ulock_ownership_set(ulock, current_thread()); - ulock_unlock(ulock); - - return (ulock->unstable) ? KERN_LOCK_UNSTABLE : KERN_SUCCESS; + return KERN_FAILURE; } kern_return_t -lock_release (lock_set_t lock_set, int lock_id) +lock_set_destroy ( + __unused task_t task, + __unused lock_set_t lock_set) { - ulock_t ulock; - - if (lock_set == LOCK_SET_NULL) - return KERN_INVALID_ARGUMENT; - - if (lock_id < 0 || lock_id >= lock_set->n_ulocks) - return KERN_INVALID_ARGUMENT; - - ulock = (ulock_t) &lock_set->ulock_list[lock_id]; - - return (ulock_release_internal(ulock, current_thread())); + return KERN_FAILURE; } kern_return_t -lock_try (lock_set_t lock_set, int lock_id) +lock_acquire ( + __unused lock_set_t lock_set, + __unused int lock_id) { - ulock_t ulock; - - - if (lock_set == LOCK_SET_NULL) - return KERN_INVALID_ARGUMENT; - - if (lock_id < 0 || lock_id >= lock_set->n_ulocks) - return KERN_INVALID_ARGUMENT; - - - lock_set_lock(lock_set); - if (!lock_set->active) { - lock_set_unlock(lock_set); - return KERN_LOCK_SET_DESTROYED; - } - - ulock = (ulock_t) &lock_set->ulock_list[lock_id]; - ulock_lock(ulock); - lock_set_unlock(lock_set); - - /* - * If the lock is already owned, we return without blocking. - * - * An ownership status is returned to inform the caller as to - * whether it already holds the lock or another thread does. - */ - - if (ulock->holder != THREAD_NULL) { - lock_set_unlock(lock_set); - - if (ulock->holder == current_thread()) { - ulock_unlock(ulock); - return KERN_LOCK_OWNED_SELF; - } - - ulock_unlock(ulock); - return KERN_LOCK_OWNED; - } - - /* - * Add the ulock to the lock set's held_ulocks list. - */ - - ulock_ownership_set(ulock, current_thread()); - ulock_unlock(ulock); - - return (ulock->unstable) ? KERN_LOCK_UNSTABLE : KERN_SUCCESS; + return KERN_FAILURE; } kern_return_t -lock_make_stable (lock_set_t lock_set, int lock_id) +lock_release ( + __unused lock_set_t lock_set, + __unused int lock_id) { - ulock_t ulock; - - - if (lock_set == LOCK_SET_NULL) - return KERN_INVALID_ARGUMENT; - - if (lock_id < 0 || lock_id >= lock_set->n_ulocks) - return KERN_INVALID_ARGUMENT; - - - lock_set_lock(lock_set); - if (!lock_set->active) { - lock_set_unlock(lock_set); - return KERN_LOCK_SET_DESTROYED; - } - - ulock = (ulock_t) &lock_set->ulock_list[lock_id]; - ulock_lock(ulock); - lock_set_unlock(lock_set); - - if (ulock->holder != current_thread()) { - ulock_unlock(ulock); - return KERN_INVALID_RIGHT; - } - - ulock->unstable = FALSE; - ulock_unlock(ulock); - - return KERN_SUCCESS; + return KERN_FAILURE; } -/* - * ROUTINE: lock_make_unstable [internal] - * - * Marks the lock as unstable. - * - * NOTES: - * - All future acquisitions of the lock will return with a - * KERN_LOCK_UNSTABLE status, until the lock is made stable again. - */ kern_return_t -lock_make_unstable (ulock_t ulock, thread_t thread) +lock_try ( + __unused lock_set_t lock_set, + __unused int lock_id) { - lock_set_t lock_set; - - lock_set = ulock->lock_set; - lock_set_lock(lock_set); - if (!lock_set->active) { - lock_set_unlock(lock_set); - return KERN_LOCK_SET_DESTROYED; - } - - ulock_lock(ulock); - lock_set_unlock(lock_set); - - if (ulock->holder != thread) { - ulock_unlock(ulock); - return KERN_INVALID_RIGHT; - } - - ulock->unstable = TRUE; - ulock_unlock(ulock); - - return KERN_SUCCESS; + return KERN_FAILURE; } -/* - * ROUTINE: ulock_release_internal [internal] - * - * Releases the ulock. - * If any threads are blocked waiting for the ulock, one is woken-up. - * - */ kern_return_t -ulock_release_internal (ulock_t ulock, thread_t thread) +lock_make_stable ( + __unused lock_set_t lock_set, + __unused int lock_id) { - lock_set_t lock_set; - - if ((lock_set = ulock->lock_set) == LOCK_SET_NULL) - return KERN_INVALID_ARGUMENT; - - lock_set_lock(lock_set); - if (!lock_set->active) { - lock_set_unlock(lock_set); - return KERN_LOCK_SET_DESTROYED; - } - ulock_lock(ulock); - lock_set_unlock(lock_set); - - if (ulock->holder != thread) { - ulock_unlock(ulock); - return KERN_INVALID_RIGHT; - } - - /* - * If we have a hint that threads might be waiting, - * try to transfer the lock ownership to a waiting thread - * and wake it up. - */ - if (ulock->blocked) { - wait_queue_t wq = &ulock->wait_queue; - thread_t wqthread; - spl_t s; - - s = splsched(); - wait_queue_lock(wq); - wqthread = wait_queue_wakeup64_identity_locked(wq, - LOCK_SET_EVENT, - THREAD_AWAKENED, - TRUE); - /* wait_queue now unlocked, thread locked */ - - if (wqthread != THREAD_NULL) { - thread_unlock(wqthread); - splx(s); - - /* - * Transfer ulock ownership - * from the current thread to the acquisition thread. - */ - ulock_ownership_clear(ulock); - ulock_ownership_set(ulock, wqthread); - ulock_unlock(ulock); - - return KERN_SUCCESS; - } else { - ulock->blocked = FALSE; - splx(s); - } - } - - /* - * Disown ulock - */ - ulock_ownership_clear(ulock); - ulock_unlock(ulock); - - return KERN_SUCCESS; + return KERN_FAILURE; } kern_return_t -lock_handoff (lock_set_t lock_set, int lock_id) +lock_handoff ( + __unused lock_set_t lock_set, + __unused int lock_id) { - ulock_t ulock; - int wait_result; - - - if (lock_set == LOCK_SET_NULL) - return KERN_INVALID_ARGUMENT; - - if (lock_id < 0 || lock_id >= lock_set->n_ulocks) - return KERN_INVALID_ARGUMENT; - - retry: - lock_set_lock(lock_set); - - if (!lock_set->active) { - lock_set_unlock(lock_set); - return KERN_LOCK_SET_DESTROYED; - } - - ulock = (ulock_t) &lock_set->ulock_list[lock_id]; - ulock_lock(ulock); - lock_set_unlock(lock_set); - - if (ulock->holder != current_thread()) { - ulock_unlock(ulock); - return KERN_INVALID_RIGHT; - } - - /* - * If the accepting thread (the receiver) is already waiting - * to accept the lock from the handoff thread (the sender), - * then perform the hand-off now. - */ - - if (ulock->accept_wait) { - wait_queue_t wq = &ulock->wait_queue; - thread_t thread; - spl_t s; - - /* - * See who the lucky devil is, if he is still there waiting. - */ - s = splsched(); - wait_queue_lock(wq); - thread = wait_queue_wakeup64_identity_locked( - wq, - LOCK_SET_HANDOFF, - THREAD_AWAKENED, - TRUE); - /* wait queue unlocked, thread locked */ - - /* - * Transfer lock ownership - */ - if (thread != THREAD_NULL) { - /* - * The thread we are transferring to will try - * to take the lock on the ulock, and therefore - * will wait for us complete the handoff even - * through we set the thread running. - */ - thread_unlock(thread); - splx(s); - - ulock_ownership_clear(ulock); - ulock_ownership_set(ulock, thread); - ulock->accept_wait = FALSE; - ulock_unlock(ulock); - return KERN_SUCCESS; - } else { - - /* - * OOPS. The accepting thread must have been aborted. - * and is racing back to clear the flag that says is - * waiting for an accept. He will clear it when we - * release the lock, so just fall thru and wait for - * the next accept thread (that's the way it is - * specified). - */ - splx(s); - } - } - - /* - * Indicate that there is a hand-off thread waiting, and then wait - * for an accepting thread. - */ - ulock->ho_wait = TRUE; - wait_result = wait_queue_assert_wait64(&ulock->wait_queue, - LOCK_SET_HANDOFF, - THREAD_ABORTSAFE, 0); - ulock_unlock(ulock); - - if (wait_result == THREAD_WAITING) - wait_result = thread_block(THREAD_CONTINUE_NULL); - - /* - * If the thread was woken-up via some action other than - * lock_handoff_accept or lock_set_destroy (i.e. thread_terminate), - * then we need to clear the ulock's handoff state. - */ - switch (wait_result) { - - - case THREAD_AWAKENED: - /* - * we take the ulock lock to syncronize with the - * thread that is accepting ownership. - */ - ulock_lock(ulock); - assert(ulock->holder != current_thread()); - ulock_unlock(ulock); - return KERN_SUCCESS; - - case THREAD_INTERRUPTED: - ulock_lock(ulock); - assert(ulock->holder == current_thread()); - ulock->ho_wait = FALSE; - ulock_unlock(ulock); - return KERN_ABORTED; - - case THREAD_RESTART: - goto retry; - } - - panic("lock_handoff"); return KERN_FAILURE; } kern_return_t -lock_handoff_accept (lock_set_t lock_set, int lock_id) +lock_handoff_accept ( + __unused lock_set_t lock_set, + __unused int lock_id) { - ulock_t ulock; - int wait_result; - - - if (lock_set == LOCK_SET_NULL) - return KERN_INVALID_ARGUMENT; - - if (lock_id < 0 || lock_id >= lock_set->n_ulocks) - return KERN_INVALID_ARGUMENT; - - retry: - lock_set_lock(lock_set); - if (!lock_set->active) { - lock_set_unlock(lock_set); - return KERN_LOCK_SET_DESTROYED; - } - - ulock = (ulock_t) &lock_set->ulock_list[lock_id]; - ulock_lock(ulock); - lock_set_unlock(lock_set); - - /* - * If there is another accepting thread that beat us, just - * return with an error. - */ - if (ulock->accept_wait) { - ulock_unlock(ulock); - return KERN_ALREADY_WAITING; - } - - if (ulock->holder == current_thread()) { - ulock_unlock(ulock); - return KERN_LOCK_OWNED_SELF; - } - - /* - * If the handoff thread (the sender) is already waiting to - * hand-off the lock to the accepting thread (the receiver), - * then perform the hand-off now. - */ - if (ulock->ho_wait) { - wait_queue_t wq = &ulock->wait_queue; - - /* - * See who the lucky devil is, if he is still there waiting. - */ - assert(ulock->holder != THREAD_NULL); - - if (wait_queue_wakeup64_thread(wq, - LOCK_SET_HANDOFF, - ulock->holder, - THREAD_AWAKENED) == KERN_SUCCESS) { - /* - * Holder thread was still waiting to give it - * away. Take over ownership. - */ - ulock_ownership_clear(ulock); - ulock_ownership_set(ulock, current_thread()); - ulock->ho_wait = FALSE; - ulock_unlock(ulock); - return (ulock->unstable) ? KERN_LOCK_UNSTABLE : - KERN_SUCCESS; - } - - /* - * OOPS. The owner was aborted out of the handoff. - * He will clear his own flag when he gets back. - * in the meantime, we will wait as if we didn't - * even see his flag (by falling thru). - */ - } - - ulock->accept_wait = TRUE; - wait_result = wait_queue_assert_wait64(&ulock->wait_queue, - LOCK_SET_HANDOFF, - THREAD_ABORTSAFE, 0); - ulock_unlock(ulock); - - if (wait_result == THREAD_WAITING) - wait_result = thread_block(THREAD_CONTINUE_NULL); - - /* - * If the thread was woken-up via some action other than - * lock_handoff_accept or lock_set_destroy (i.e. thread_terminate), - * then we need to clear the ulock's handoff state. - */ - switch (wait_result) { - - case THREAD_AWAKENED: - /* - * Take the lock to synchronize with the thread handing - * off the lock to us. We don't want to continue until - * they complete the handoff. - */ - ulock_lock(ulock); - assert(ulock->accept_wait == FALSE); - assert(ulock->holder == current_thread()); - ulock_unlock(ulock); - return KERN_SUCCESS; - - case THREAD_INTERRUPTED: - ulock_lock(ulock); - ulock->accept_wait = FALSE; - ulock_unlock(ulock); - return KERN_ABORTED; - - case THREAD_RESTART: - goto retry; - } - - panic("lock_handoff_accept"); return KERN_FAILURE; } -/* - * Routine: lock_set_reference - * - * Take out a reference on a lock set. This keeps the data structure - * in existence (but the lock set may be deactivated). - */ void -lock_set_reference(lock_set_t lock_set) +lock_set_reference( + __unused lock_set_t lock_set) { - OSIncrementAtomic(&((lock_set)->ref_count)); + return; } -/* - * Routine: lock_set_dereference - * - * Release a reference on a lock set. If this is the last reference, - * the lock set data structure is deallocated. - */ void -lock_set_dereference(lock_set_t lock_set) +lock_set_dereference( + __unused lock_set_t lock_set) { - int size; - - if (1 == OSDecrementAtomic(&((lock_set)->ref_count))) { - ipc_port_dealloc_kernel(lock_set->port); - size = (int)(sizeof(struct lock_set) + - (sizeof(struct ulock) * (lock_set->n_ulocks - 1))); - kfree(lock_set, size); - } + return; } -void -ulock_release_all( - thread_t thread) -{ - ulock_t ulock; - - while (!queue_empty(&thread->held_ulocks)) { - ulock = (ulock_t)queue_first(&thread->held_ulocks); - lock_make_unstable(ulock, thread); - ulock_release_internal(ulock, thread); - } -} diff --git a/osfmk/kern/sync_lock.h b/osfmk/kern/sync_lock.h index 589fdfcb8..b4e81af38 100644 --- a/osfmk/kern/sync_lock.h +++ b/osfmk/kern/sync_lock.h @@ -48,73 +48,6 @@ #include #include -typedef struct ulock { - queue_chain_t thread_link; /* ulocks owned by thread MUST BE FIRST */ - queue_chain_t held_link; /* ulocks held in the lock set */ - queue_chain_t handoff_link; /* ulocks w/ active handoffs */ - struct lock_set *lock_set; /* the retaining lock set */ - thread_t holder; /* thread that holds the lock */ - - struct wait_queue wait_queue; /* queue of blocked threads */ - - decl_lck_mtx_data(,lock) /* ulock lock */ - - unsigned int /* flags */ - /* boolean_t */ blocked:1, /* did threads block waiting? */ - /* boolean_t */ unstable:1, /* unstable? (holder died) */ - /* boolean_t */ ho_wait:1, /* handoff thread waiting? */ - /* boolean_t */ accept_wait:1, /* accepting thread waiting? */ - :0; /* force to long boundary */ -} Ulock; - -typedef struct ulock *ulock_t; - -typedef struct lock_set { - queue_chain_t task_link; /* lock sets owned by a task MUST BE FIRST */ - task_t owner; /* task that owns the lock set */ - ipc_port_t port; /* lock set port */ - decl_lck_mtx_data(,lock) /* lock set lock */ - uint32_t ref_count; /* reference count */ - - boolean_t active; /* active status */ - int n_ulocks; /* number of ulocks in the lock set */ - - struct ulock ulock_list[1]; /* ulock group list place holder */ -} Lock_Set; - -#define ULOCK_NULL ((ulock_t) 0) - -#define ULOCK_FREE 0 -#define ULOCK_HELD 1 - -extern lck_grp_t lock_set_grp; -extern lck_attr_t lock_set_attr; - -/* - * Data structure internal lock macros - */ - -#define lock_set_lock_init(ls) lck_mtx_init(&(ls)->lock, &lock_set_grp, &lock_set_attr) -#define lock_set_lock(ls) lck_mtx_lock(&(ls)->lock) -#define lock_set_unlock(ls) lck_mtx_unlock(&(ls)->lock) - -#define ulock_lock_init(ul) lck_mtx_init(&(ul)->lock, &lock_set_grp, &lock_set_attr) -#define ulock_lock(ul) lck_mtx_lock(&(ul)->lock) -#define ulock_unlock(ul) lck_mtx_unlock(&(ul)->lock) - -extern void lock_set_init(void) __attribute__((section("__TEXT, initcode"))); - -extern kern_return_t ulock_release_internal( - ulock_t ulock, - thread_t thread); - -extern kern_return_t lock_make_unstable( - ulock_t ulock, - thread_t thread); - -extern void ulock_release_all( - thread_t thread); - extern void lock_set_reference (lock_set_t lock_set); extern void lock_set_dereference (lock_set_t lock_set); diff --git a/osfmk/kern/sync_sema.c b/osfmk/kern/sync_sema.c index 687387b3c..812aa800a 100644 --- a/osfmk/kern/sync_sema.c +++ b/osfmk/kern/sync_sema.c @@ -637,7 +637,9 @@ semaphore_wait_internal( (void)wait_queue_assert_wait64_locked( &wait_semaphore->wait_queue, SEMAPHORE_EVENT, - THREAD_ABORTSAFE, deadline, + THREAD_ABORTSAFE, + TIMEOUT_URGENCY_USER_NORMAL, + deadline, 0, self); thread_unlock(self); } diff --git a/osfmk/kern/sync_sema.h b/osfmk/kern/sync_sema.h index 1da09b0f3..bcd013b2f 100644 --- a/osfmk/kern/sync_sema.h +++ b/osfmk/kern/sync_sema.h @@ -61,7 +61,7 @@ typedef struct semaphore { #define semaphore_lock(semaphore) wait_queue_lock(&(semaphore)->wait_queue) #define semaphore_unlock(semaphore) wait_queue_unlock(&(semaphore)->wait_queue) -extern void semaphore_init(void) __attribute__((section("__TEXT, initcode"))); +extern void semaphore_init(void); extern void semaphore_reference (semaphore_t semaphore); extern void semaphore_dereference (semaphore_t semaphore); diff --git a/osfmk/kern/syscall_subr.c b/osfmk/kern/syscall_subr.c index 89fc63b1b..da2fe9e74 100644 --- a/osfmk/kern/syscall_subr.c +++ b/osfmk/kern/syscall_subr.c @@ -77,6 +77,7 @@ #ifdef MACH_BSD extern void workqueue_thread_yielded(void); +extern sched_call_t workqueue_get_sched_callback(void); #endif /* MACH_BSD */ @@ -199,15 +200,48 @@ __unused struct swtch_pri_args *args) return (result); } +static int +thread_switch_disable_workqueue_sched_callback(void) +{ + sched_call_t callback = workqueue_get_sched_callback(); + thread_t self = current_thread(); + if (!callback || self->sched_call != callback) { + return FALSE; + } + spl_t s = splsched(); + thread_lock(self); + thread_sched_call(self, NULL); + thread_unlock(self); + splx(s); + return TRUE; +} + +static void +thread_switch_enable_workqueue_sched_callback(void) +{ + sched_call_t callback = workqueue_get_sched_callback(); + thread_t self = current_thread(); + spl_t s = splsched(); + thread_lock(self); + thread_sched_call(self, callback); + thread_unlock(self); + splx(s); +} + static void thread_switch_continue(void) { register thread_t self = current_thread(); int option = self->saved.swtch.option; + boolean_t reenable_workq_callback = self->saved.swtch.reenable_workq_callback; + - if (option == SWITCH_OPTION_DEPRESS) + if (option == SWITCH_OPTION_DEPRESS || option == SWITCH_OPTION_OSLOCK_DEPRESS) thread_depress_abort_internal(self); + if (reenable_workq_callback) + thread_switch_enable_workqueue_sched_callback(); + thread_syscall_return(KERN_SUCCESS); /*NOTREACHED*/ } @@ -225,23 +259,47 @@ thread_switch( mach_port_name_t thread_name = args->thread_name; int option = args->option; mach_msg_timeout_t option_time = args->option_time; + uint32_t scale_factor = NSEC_PER_MSEC; + boolean_t reenable_workq_callback = FALSE; + boolean_t depress_option = FALSE; + boolean_t wait_option = FALSE; /* - * Process option. + * Validate and process option. */ switch (option) { case SWITCH_OPTION_NONE: - case SWITCH_OPTION_DEPRESS: + workqueue_thread_yielded(); + break; case SWITCH_OPTION_WAIT: - break; - + wait_option = TRUE; + workqueue_thread_yielded(); + break; + case SWITCH_OPTION_DEPRESS: + depress_option = TRUE; + workqueue_thread_yielded(); + break; + case SWITCH_OPTION_DISPATCH_CONTENTION: + scale_factor = NSEC_PER_USEC; + wait_option = TRUE; + if (thread_switch_disable_workqueue_sched_callback()) + reenable_workq_callback = TRUE; + break; + case SWITCH_OPTION_OSLOCK_DEPRESS: + depress_option = TRUE; + if (thread_switch_disable_workqueue_sched_callback()) + reenable_workq_callback = TRUE; + break; + case SWITCH_OPTION_OSLOCK_WAIT: + wait_option = TRUE; + if (thread_switch_disable_workqueue_sched_callback()) + reenable_workq_callback = TRUE; + break; default: return (KERN_INVALID_ARGUMENT); } - workqueue_thread_yielded(); - /* * Translate the port name if supplied. */ @@ -267,6 +325,32 @@ thread_switch( else thread = THREAD_NULL; + + if (option == SWITCH_OPTION_OSLOCK_DEPRESS || option == SWITCH_OPTION_OSLOCK_WAIT) { + if (thread != THREAD_NULL) { + + if (thread->task != self->task) { + /* + * OSLock boosting only applies to other threads + * in your same task (even if you have a port for + * a thread in another task) + */ + + (void)thread_deallocate_internal(thread); + thread = THREAD_NULL; + } else { + /* + * Attempt to kick the lock owner up to our same IO throttling tier. + * If the thread is currently blocked in throttle_lowpri_io(), + * it will immediately break out. + */ + int new_policy = proc_get_effective_thread_policy(self, TASK_POLICY_IO); + + set_thread_iotier_override(thread, new_policy); + } + } + } + /* * Try to handoff if supplied. */ @@ -298,14 +382,15 @@ thread_switch( (void)thread_deallocate_internal(thread); - if (option == SWITCH_OPTION_WAIT) + if (wait_option) assert_wait_timeout((event_t)assert_wait_timeout, THREAD_ABORTSAFE, - option_time, 1000*NSEC_PER_USEC); + option_time, scale_factor); else - if (option == SWITCH_OPTION_DEPRESS) + if (depress_option) thread_depress_ms(option_time); self->saved.swtch.option = option; + self->saved.swtch.reenable_workq_callback = reenable_workq_callback; thread_run(self, (thread_continue_t)thread_switch_continue, NULL, thread); /* NOTREACHED */ @@ -317,19 +402,23 @@ thread_switch( thread_deallocate(thread); } - if (option == SWITCH_OPTION_WAIT) - assert_wait_timeout((event_t)assert_wait_timeout, THREAD_ABORTSAFE, option_time, 1000*NSEC_PER_USEC); + if (wait_option) + assert_wait_timeout((event_t)assert_wait_timeout, THREAD_ABORTSAFE, option_time, scale_factor); else - if (option == SWITCH_OPTION_DEPRESS) + if (depress_option) thread_depress_ms(option_time); self->saved.swtch.option = option; + self->saved.swtch.reenable_workq_callback = reenable_workq_callback; thread_block_reason((thread_continue_t)thread_switch_continue, NULL, AST_YIELD); - if (option == SWITCH_OPTION_DEPRESS) + if (depress_option) thread_depress_abort_internal(self); + if (reenable_workq_callback) + thread_switch_enable_workqueue_sched_callback(); + return (KERN_SUCCESS); } @@ -356,7 +445,7 @@ thread_depress_abstime( if (interval != 0) { clock_absolutetime_interval_to_deadline(interval, &deadline); - if (!timer_call_enter(&self->depress_timer, deadline, TIMER_CALL_CRITICAL)) + if (!timer_call_enter(&self->depress_timer, deadline, TIMER_CALL_USER_CRITICAL)) self->depress_timer_active++; } } @@ -371,7 +460,7 @@ thread_depress_ms( uint64_t abstime; clock_interval_to_absolutetime_interval( - interval, 1000*NSEC_PER_USEC, &abstime); + interval, NSEC_PER_MSEC, &abstime); thread_depress_abstime(abstime); } @@ -453,7 +542,7 @@ thread_poll_yield( self->sched_flags |= TH_SFLAG_POLLDEPRESS; abstime += (total_computation >> sched_poll_yield_shift); - if (!timer_call_enter(&self->depress_timer, abstime, TIMER_CALL_CRITICAL)) + if (!timer_call_enter(&self->depress_timer, abstime, TIMER_CALL_USER_CRITICAL)) self->depress_timer_active++; thread_unlock(self); diff --git a/osfmk/kern/syscall_sw.c b/osfmk/kern/syscall_sw.c index 9abedea98..012f4b2d1 100644 --- a/osfmk/kern/syscall_sw.c +++ b/osfmk/kern/syscall_sw.c @@ -67,11 +67,18 @@ /* * To add a new entry: - * Add an "MACH_TRAP(routine, arg count)" to the table below. + * Add an "MACH_TRAP(routine, arg_count, num_32_bit_words, munge_routine)" to the table below. + * where, + * - routine: The trap handling routine in the kernel + * - arg_count: The number of arguments for the mach trap (independant of arch/arg size). + * This value also defines the number of 64-bit words copied in for a U64 process. + * - num_32_bit_words: The number of 32-bit words to be copied in for a U32 process. + * - munge_routine: The argument munging routine to align input args correctly. * - * Add trap definition to mach/syscall_sw.h and + * Also, add trap definition to mach/syscall_sw.h and * recompile user library. * + * * WARNING: If you add a trap which requires more than 7 * parameters, mach/{machine}/syscall_sw.h and {machine}/trap.c * and/or {machine}/locore.s may need to be modified for it @@ -79,12 +86,6 @@ * * WARNING: Don't use numbers 0 through -9. They (along with * the positive numbers) are reserved for Unix. - * - * WARNING: The 'arg_count' parameter in the list below is poorly named. - * It doesn't refer to the number of arguments the trap takes - - * it actually refers to the number of 32-bit words that need - * to be copied in from userspace. The munging of words to trap - * arguments is done in mach_call_munger(). */ int kern_invalid_debug = 0; @@ -99,142 +100,138 @@ int kern_invalid_debug = 0; #include const mach_trap_t mach_trap_table[MACH_TRAP_TABLE_COUNT] = { -/* 0 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 1 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 2 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 3 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 4 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 5 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 6 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 7 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 8 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 9 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 10 */ MACH_TRAP(_kernelrpc_mach_vm_allocate_trap, 5, munge_wwlw, munge_dddd), -/* 11 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 12 */ MACH_TRAP(_kernelrpc_mach_vm_deallocate_trap, 5, munge_wll, munge_ddd), -/* 13 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 14 */ MACH_TRAP(_kernelrpc_mach_vm_protect_trap, 7, munge_wllww, munge_ddddd), -/* 15 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 16 */ MACH_TRAP(_kernelrpc_mach_port_allocate_trap, 3, munge_www, munge_ddd), -/* 17 */ MACH_TRAP(_kernelrpc_mach_port_destroy_trap, 2, munge_ww, munge_dd), -/* 18 */ MACH_TRAP(_kernelrpc_mach_port_deallocate_trap, 2, munge_ww, munge_dd), -/* 19 */ MACH_TRAP(_kernelrpc_mach_port_mod_refs_trap, 4, munge_wwww, munge_dddd), -/* 20 */ MACH_TRAP(_kernelrpc_mach_port_move_member_trap, 3, munge_www, munge_ddd), -/* 21 */ MACH_TRAP(_kernelrpc_mach_port_insert_right_trap, 4, munge_wwww, munge_dddd), -/* 22 */ MACH_TRAP(_kernelrpc_mach_port_insert_member_trap, 3, munge_www, munge_ddd), -/* 23 */ MACH_TRAP(_kernelrpc_mach_port_extract_member_trap, 3, munge_www, munge_ddd), -/* 24 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 25 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 26 */ MACH_TRAP(mach_reply_port, 0, NULL, NULL), -/* 27 */ MACH_TRAP(thread_self_trap, 0, NULL, NULL), -/* 28 */ MACH_TRAP(task_self_trap, 0, NULL, NULL), -/* 29 */ MACH_TRAP(host_self_trap, 0, NULL, NULL), -/* 30 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 31 */ MACH_TRAP(mach_msg_trap, 7, munge_wwwwwww, munge_ddddddd), -/* 32 */ MACH_TRAP(mach_msg_overwrite_trap, 8, munge_wwwwwwww, munge_dddddddd), -/* 33 */ MACH_TRAP(semaphore_signal_trap, 1, munge_w, munge_d), -/* 34 */ MACH_TRAP(semaphore_signal_all_trap, 1, munge_w, munge_d), -/* 35 */ MACH_TRAP(semaphore_signal_thread_trap, 2, munge_ww, munge_dd), -/* 36 */ MACH_TRAP(semaphore_wait_trap, 1, munge_w, munge_d), -/* 37 */ MACH_TRAP(semaphore_wait_signal_trap, 2, munge_ww, munge_dd), -/* 38 */ MACH_TRAP(semaphore_timedwait_trap, 3, munge_www, munge_ddd), -/* 39 */ MACH_TRAP(semaphore_timedwait_signal_trap, 4, munge_wwww, munge_dddd), -/* 40 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 41 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 42 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -#if !defined(CONFIG_EMBEDDED) -/* 43 */ MACH_TRAP(map_fd, 5, munge_wwwww, munge_ddddd), -#else -/* 43 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -#endif /* !defined(CONFIG_EMBEDDED) */ -/* 44 */ MACH_TRAP(task_name_for_pid, 3, munge_www, munge_ddd), -/* 45 */ MACH_TRAP(task_for_pid, 3, munge_www, munge_ddd), -/* 46 */ MACH_TRAP(pid_for_task, 2, munge_ww,munge_dd), -/* 47 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 48 */ MACH_TRAP(macx_swapon, 5, munge_lwww, munge_dddd), -/* 49 */ MACH_TRAP(macx_swapoff, 3, munge_lw, munge_dd), -/* 50 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 51 */ MACH_TRAP(macx_triggers, 4, munge_wwww, munge_dddd), -/* 52 */ MACH_TRAP(macx_backing_store_suspend, 1, munge_w, munge_d), -/* 53 */ MACH_TRAP(macx_backing_store_recovery, 1, munge_w, munge_d), -/* 54 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 55 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 56 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 57 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 58 */ MACH_TRAP(pfz_exit, 0, NULL, NULL), -/* 59 */ MACH_TRAP(swtch_pri, 0, NULL, NULL), -/* 60 */ MACH_TRAP(swtch, 0, NULL, NULL), -/* 61 */ MACH_TRAP(thread_switch, 3, munge_www, munge_ddd), -/* 62 */ MACH_TRAP(clock_sleep_trap, 5, munge_wwwww, munge_ddddd), -/* 63 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 0 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 1 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 2 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 3 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 4 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 5 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 6 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 7 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 8 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 9 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 10 */ MACH_TRAP(_kernelrpc_mach_vm_allocate_trap, 4, 5, munge_wwlw), +/* 11 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 12 */ MACH_TRAP(_kernelrpc_mach_vm_deallocate_trap, 3, 5, munge_wll), +/* 13 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 14 */ MACH_TRAP(_kernelrpc_mach_vm_protect_trap, 5, 7, munge_wllww), +/* 15 */ MACH_TRAP(_kernelrpc_mach_vm_map_trap, 6, 8, munge_wwllww), +/* 16 */ MACH_TRAP(_kernelrpc_mach_port_allocate_trap, 3, 3, munge_www), +/* 17 */ MACH_TRAP(_kernelrpc_mach_port_destroy_trap, 2, 2, munge_ww), +/* 18 */ MACH_TRAP(_kernelrpc_mach_port_deallocate_trap, 2, 2, munge_ww), +/* 19 */ MACH_TRAP(_kernelrpc_mach_port_mod_refs_trap, 4, 4, munge_wwww), +/* 20 */ MACH_TRAP(_kernelrpc_mach_port_move_member_trap, 3, 3, munge_www), +/* 21 */ MACH_TRAP(_kernelrpc_mach_port_insert_right_trap, 4, 4, munge_wwww), +/* 22 */ MACH_TRAP(_kernelrpc_mach_port_insert_member_trap, 3, 3, munge_www), +/* 23 */ MACH_TRAP(_kernelrpc_mach_port_extract_member_trap, 3, 3, munge_www), +/* 24 */ MACH_TRAP(_kernelrpc_mach_port_construct_trap, 4, 5, munge_wwlw), +/* 25 */ MACH_TRAP(_kernelrpc_mach_port_destruct_trap, 4, 5, munge_wwwl), +/* 26 */ MACH_TRAP(mach_reply_port, 0, 0, NULL), +/* 27 */ MACH_TRAP(thread_self_trap, 0, 0, NULL), +/* 28 */ MACH_TRAP(task_self_trap, 0, 0, NULL), +/* 29 */ MACH_TRAP(host_self_trap, 0, 0, NULL), +/* 30 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 31 */ MACH_TRAP(mach_msg_trap, 7, 7, munge_wwwwwww), +/* 32 */ MACH_TRAP(mach_msg_overwrite_trap, 8, 8, munge_wwwwwwww), +/* 33 */ MACH_TRAP(semaphore_signal_trap, 1, 1, munge_w), +/* 34 */ MACH_TRAP(semaphore_signal_all_trap, 1, 1, munge_w), +/* 35 */ MACH_TRAP(semaphore_signal_thread_trap, 2, 2, munge_ww), +/* 36 */ MACH_TRAP(semaphore_wait_trap, 1, 1, munge_w), +/* 37 */ MACH_TRAP(semaphore_wait_signal_trap, 2, 2, munge_ww), +/* 38 */ MACH_TRAP(semaphore_timedwait_trap, 3, 3, munge_www), +/* 39 */ MACH_TRAP(semaphore_timedwait_signal_trap, 4, 4, munge_wwww), +/* 40 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 41 */ MACH_TRAP(_kernelrpc_mach_port_guard_trap, 4, 5, munge_wwlw), +/* 42 */ MACH_TRAP(_kernelrpc_mach_port_unguard_trap, 3, 4, munge_wwl), +/* 43 */ MACH_TRAP(map_fd, 5, 5, munge_wwwww), +/* 44 */ MACH_TRAP(task_name_for_pid, 3, 3, munge_www), +/* 45 */ MACH_TRAP(task_for_pid, 3, 3, munge_www), +/* 46 */ MACH_TRAP(pid_for_task, 2, 2, munge_ww), +/* 47 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 48 */ MACH_TRAP(macx_swapon, 4, 5, munge_lwww), +/* 49 */ MACH_TRAP(macx_swapoff, 2, 3, munge_lw), +/* 50 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 51 */ MACH_TRAP(macx_triggers, 4, 4, munge_wwww), +/* 52 */ MACH_TRAP(macx_backing_store_suspend, 1, 1, munge_w), +/* 53 */ MACH_TRAP(macx_backing_store_recovery, 1, 1, munge_w), +/* 54 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 55 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 56 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 57 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 58 */ MACH_TRAP(pfz_exit, 0, 0, NULL), +/* 59 */ MACH_TRAP(swtch_pri, 0, 0, NULL), +/* 60 */ MACH_TRAP(swtch, 0, 0, NULL), +/* 61 */ MACH_TRAP(thread_switch, 3, 3, munge_www), +/* 62 */ MACH_TRAP(clock_sleep_trap, 5, 5, munge_wwwww), +/* 63 */ MACH_TRAP(kern_invalid, 0, 0, NULL), /* traps 64 - 95 reserved (debo) */ -/* 64 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 65 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 66 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 67 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 68 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 69 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 70 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 71 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 72 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 73 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 74 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 75 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 76 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 77 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 78 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 79 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 80 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 81 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 82 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 83 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 84 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 85 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 86 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 87 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 88 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 89 */ MACH_TRAP(mach_timebase_info_trap, 1, munge_w, munge_d), -/* 90 */ MACH_TRAP(mach_wait_until_trap, 2, munge_l, munge_d), -/* 91 */ MACH_TRAP(mk_timer_create_trap, 0, NULL, NULL), -/* 92 */ MACH_TRAP(mk_timer_destroy_trap, 1, munge_w, munge_d), -/* 93 */ MACH_TRAP(mk_timer_arm_trap, 3, munge_wl, munge_dd), -/* 94 */ MACH_TRAP(mk_timer_cancel_trap, 2, munge_ww, munge_dd), -/* 95 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 64 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 65 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 66 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 67 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 68 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 69 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 70 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 71 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 72 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 73 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 74 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 75 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 76 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 77 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 78 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 79 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 80 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 81 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 82 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 83 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 84 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 85 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 86 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 87 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 88 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 89 */ MACH_TRAP(mach_timebase_info_trap, 1, 1, munge_w), +/* 90 */ MACH_TRAP(mach_wait_until_trap, 1, 2, munge_l), +/* 91 */ MACH_TRAP(mk_timer_create_trap, 0, 0, NULL), +/* 92 */ MACH_TRAP(mk_timer_destroy_trap, 1, 1, munge_w), +/* 93 */ MACH_TRAP(mk_timer_arm_trap, 2, 3, munge_wl), +/* 94 */ MACH_TRAP(mk_timer_cancel_trap, 2, 2, munge_ww), +/* 95 */ MACH_TRAP(kern_invalid, 0, 0, NULL), /* traps 64 - 95 reserved (debo) */ -/* 96 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 97 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 98 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 99 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 96 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 97 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 98 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 99 */ MACH_TRAP(kern_invalid, 0, 0, NULL), /* traps 100-107 reserved for iokit (esb) */ -/* 100 */ MACH_TRAP(iokit_user_client_trap, 8, munge_wwwwwwww, munge_dddddddd), -/* 101 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 102 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 103 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 104 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 105 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 106 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 107 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 100 */ MACH_TRAP(iokit_user_client_trap, 8, 8, munge_wwwwwwww), +/* 101 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 102 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 103 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 104 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 105 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 106 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 107 */ MACH_TRAP(kern_invalid, 0, 0, NULL), /* traps 108-127 unused */ -/* 108 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 109 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 110 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 111 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 112 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 113 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 114 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 115 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 116 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 117 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 118 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 119 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 120 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 121 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 122 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 123 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 124 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 125 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 126 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), -/* 127 */ MACH_TRAP(kern_invalid, 0, NULL, NULL), +/* 108 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 109 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 110 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 111 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 112 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 113 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 114 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 115 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 116 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 117 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 118 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 119 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 120 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 121 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 122 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 123 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 124 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 125 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 126 */ MACH_TRAP(kern_invalid, 0, 0, NULL), +/* 127 */ MACH_TRAP(kern_invalid, 0, 0, NULL), }; const char * mach_syscall_name_table[MACH_TRAP_TABLE_COUNT] = { @@ -253,7 +250,7 @@ const char * mach_syscall_name_table[MACH_TRAP_TABLE_COUNT] = { /* 12 */ "_kernelrpc_mach_vm_deallocate_trap", /* 13 */ "kern_invalid", /* 14 */ "_kernelrpc_mach_vm_protect_trap", -/* 15 */ "kern_invalid", +/* 15 */ "_kernelrpc_mach_vm_map_trap", /* 16 */ "_kernelrpc_mach_port_allocate_trap", /* 17 */ "_kernelrpc_mach_port_destroy_trap", /* 18 */ "_kernelrpc_mach_port_deallocate_trap", @@ -262,8 +259,8 @@ const char * mach_syscall_name_table[MACH_TRAP_TABLE_COUNT] = { /* 21 */ "_kernelrpc_mach_port_insert_right_trap", /* 22 */ "_kernelrpc_mach_port_insert_member_trap", /* 23 */ "_kernelrpc_mach_port_extract_member_trap", -/* 24 */ "kern_invalid", -/* 25 */ "kern_invalid", +/* 24 */ "_kernelrpc_mach_port_construct_trap", +/* 25 */ "_kernelrpc_mach_port_destruct_trap", /* 26 */ "mach_reply_port", /* 27 */ "thread_self_trap", /* 28 */ "task_self_trap", @@ -279,8 +276,8 @@ const char * mach_syscall_name_table[MACH_TRAP_TABLE_COUNT] = { /* 38 */ "semaphore_timedwait_trap", /* 39 */ "semaphore_timedwait_signal_trap", /* 40 */ "kern_invalid", -/* 41 */ "kern_invalid", -/* 42 */ "kern_invalid", +/* 41 */ "_kernelrpc_mach_port_guard_trap", +/* 42 */ "_kernelrpc_mach_port_unguard_trap", /* 43 */ "map_fd", /* 44 */ "task_name_for_pid", /* 45 */ "task_for_pid", diff --git a/osfmk/kern/syscall_sw.h b/osfmk/kern/syscall_sw.h index 879b9cd5c..70d69bfde 100644 --- a/osfmk/kern/syscall_sw.h +++ b/osfmk/kern/syscall_sw.h @@ -69,8 +69,12 @@ typedef void mach_munge_t(const void *, void *); typedef struct { - int mach_trap_arg_count; + int mach_trap_arg_count; /* Number of trap arguments (Arch independant) */ kern_return_t (*mach_trap_function)(void *); +#if defined(__x86_64__) + mach_munge_t *mach_trap_arg_munge32; /* system call argument munger routine for 32-bit */ +#endif + int mach_trap_u32_words; /* number of 32-bit words to copyin for U32 */ #if MACH_ASSERT const char* mach_trap_name; #endif /* MACH_ASSERT */ @@ -82,14 +86,33 @@ typedef struct { extern const mach_trap_t mach_trap_table[]; extern int mach_trap_count; -#if defined(__i386__) || defined(__x86_64__) +#if defined(__x86_64__) + #if !MACH_ASSERT -#define MACH_TRAP(name, arg_count, munge32, munge64) \ - { (arg_count), (kern_return_t (*)(void *)) (name) } -#else -#define MACH_TRAP(name, arg_count, munge32, munge64) \ - { (arg_count), (kern_return_t (*)(void *)) (name), #name } +#define MACH_TRAP(name, arg_count, u32_arg_words, munge32) \ + { (arg_count), (kern_return_t (*)(void *)) (name), munge32, (u32_arg_words) } +#else /* !MACH_ASSERT */ +#define MACH_TRAP(name, arg_count, u32_arg_words, munge32) \ + { (arg_count), (kern_return_t (*)(void *)) (name), munge32, (u32_arg_words), #name } #endif /* !MACH_ASSERT */ + + + + +#elif defined(__i386__) || defined(__arm__) + +#if !MACH_ASSERT +#define MACH_TRAP(name, arg_count, u32_arg_words, munge32) \ + { (arg_count), (kern_return_t (*)(void *)) (name), (u32_arg_words) } +#else /* !MACH_ASSERT */ +#define MACH_TRAP(name, arg_count, u32_arg_words, munge32) \ + { (arg_count), (kern_return_t (*)(void *)) (name), (u32_arg_words), #name } +#endif /* !MACH_ASSERT */ + + + + + #else /* !defined(__i386__) && !defined(__x86_64__) && !defined(__arm__) */ #error Unsupported architecture #endif /* !defined(__i386__) && !defined(__x86_64__) && !defined(__arm__) */ diff --git a/osfmk/kern/task.c b/osfmk/kern/task.c index 324195b3d..d39ed2047 100644 --- a/osfmk/kern/task.c +++ b/osfmk/kern/task.c @@ -101,6 +101,7 @@ #include #include #include +#include #include #include @@ -118,13 +119,19 @@ #include #include #include +#include +#if CONFIG_TELEMETRY +#include +#endif #include #include #include /* for kernel_map, ipc_kernel_map */ #include #include +#include +#include /* * Exported interfaces */ @@ -145,31 +152,71 @@ #include #endif /* CONFIG_COUNTERS */ +#include +#include + task_t kernel_task; zone_t task_zone; lck_attr_t task_lck_attr; lck_grp_t task_lck_grp; lck_grp_attr_t task_lck_grp_attr; -#if CONFIG_EMBEDDED -lck_mtx_t task_watch_mtx; -#endif /* CONFIG_EMBEDDED */ zinfo_usage_store_t tasks_tkm_private; zinfo_usage_store_t tasks_tkm_shared; /* A container to accumulate statistics for expired tasks */ -expired_task_statistics_t dead_task_statistics; -lck_spin_t dead_task_statistics_lock; +expired_task_statistics_t dead_task_statistics; +lck_spin_t dead_task_statistics_lock; static ledger_template_t task_ledger_template = NULL; -struct _task_ledger_indices task_ledgers = {-1, -1, -1, -1, -1, -1, -1}; +struct _task_ledger_indices task_ledgers __attribute__((used)) = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; void init_task_ledgers(void); +void task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1); +void task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1); +void __attribute__((noinline)) THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void); +void __attribute__((noinline)) THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb); +int coredump(void *core_proc, int reserve_mb, int ignore_ulimit); + +kern_return_t task_suspend_internal(task_t); +kern_return_t task_resume_internal(task_t); + +void proc_init_cpumon_params(void); + +// Warn tasks when they hit 80% of their memory limit. +#define PHYS_FOOTPRINT_WARNING_LEVEL 80 + +#define TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT 150 /* wakeups per second */ +#define TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL 300 /* in seconds. */ + +/* + * Level (in terms of percentage of the limit) at which the wakeups monitor triggers telemetry. + * + * (ie when the task's wakeups rate exceeds 70% of the limit, start taking user + * stacktraces, aka micro-stackshots) + */ +#define TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER 70 + +int task_wakeups_monitor_interval; /* In seconds. Time period over which wakeups rate is observed */ +int task_wakeups_monitor_rate; /* In hz. Maximum allowable wakeups per task before EXC_RESOURCE is sent */ + +int task_wakeups_monitor_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */ +int disable_exc_resource; /* Global override to supress EXC_RESOURCE for resource monitor violations. */ +int max_task_footprint = 0; /* Per-task limit on physical memory consumption */ int task_max = CONFIG_TASK_MAX; /* Max number of tasks */ -/* externs for BSD kernel */ -extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long); +int hwm_user_cores = 0; /* high watermark violations generate user core files */ + +#ifdef MACH_BSD +extern void proc_getexecutableuuid(void *, unsigned char *, unsigned long); +extern int proc_pid(struct proc *p); +extern int proc_selfpid(void); +extern char *proc_name_address(struct proc *p); +#if CONFIG_JETSAM +extern void memorystatus_on_ledger_footprint_exceeded(int warning, const int max_footprint_mb); +#endif +#endif /* Forwards */ @@ -206,33 +253,17 @@ task_set_64bit( { #if defined(__i386__) || defined(__x86_64__) thread_t thread; -#endif /* __i386__ */ - int vm_flags = 0; +#endif /* defined(__i386__) || defined(__x86_64__) */ + + task_lock(task); if (is64bit) { if (task_has_64BitAddr(task)) - return; - + goto out; task_set_64BitAddr(task); } else { if ( !task_has_64BitAddr(task)) - return; - - /* - * Deallocate all memory previously allocated - * above the 32-bit address space, since it won't - * be accessible anymore. - */ - /* remove regular VM map entries & pmap mappings */ - (void) vm_map_remove(task->map, - (vm_map_offset_t) VM_MAX_ADDRESS, - MACH_VM_MAX_ADDRESS, - 0); - /* remove the higher VM mappings */ - (void) vm_map_remove(task->map, - MACH_VM_MAX_ADDRESS, - 0xFFFFFFFFFFFFF000ULL, - vm_flags); + goto out; task_clear_64BitAddr(task); } /* FIXME: On x86, the thread save state flavor can diverge from the @@ -241,15 +272,17 @@ task_set_64bit( * certain routines may observe the thread as being in an inconsistent * state with respect to its task's 64-bitness. */ + #if defined(__i386__) || defined(__x86_64__) - task_lock(task); queue_iterate(&task->threads, thread, thread_t, task_threads) { thread_mtx_lock(thread); machine_thread_switch_addrmode(thread); thread_mtx_unlock(thread); } +#endif /* defined(__i386__) || defined(__x86_64__) */ + +out: task_unlock(task); -#endif /* __i386__ */ } @@ -262,6 +295,55 @@ task_set_dyld_info(task_t task, mach_vm_address_t addr, mach_vm_size_t size) task_unlock(task); } +#if TASK_REFERENCE_LEAK_DEBUG +#include + +decl_simple_lock_data(static,task_ref_lock); +static btlog_t *task_ref_btlog; +#define TASK_REF_OP_INCR 0x1 +#define TASK_REF_OP_DECR 0x2 + +#define TASK_REF_BTDEPTH 7 + +static void +task_ref_lock_lock(void *context) +{ + simple_lock((simple_lock_t)context); +} +static void +task_ref_lock_unlock(void *context) +{ + simple_unlock((simple_lock_t)context); +} + +void +task_reference_internal(task_t task) +{ + void * bt[TASK_REF_BTDEPTH]; + int numsaved = 0; + + numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH); + + (void)hw_atomic_add(&(task)->ref_count, 1); + btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_INCR, + bt, numsaved); +} + +uint32_t +task_deallocate_internal(task_t task) +{ + void * bt[TASK_REF_BTDEPTH]; + int numsaved = 0; + + numsaved = OSBacktrace(bt, TASK_REF_BTDEPTH); + + btlog_add_entry(task_ref_btlog, task, TASK_REF_OP_DECR, + bt, numsaved); + return hw_atomic_sub(&(task)->ref_count, 1); +} + +#endif /* TASK_REFERENCE_LEAK_DEBUG */ + void task_init(void) { @@ -270,9 +352,6 @@ task_init(void) lck_grp_init(&task_lck_grp, "task", &task_lck_grp_attr); lck_attr_setdefault(&task_lck_attr); lck_mtx_init(&tasks_threads_lock, &task_lck_grp, &task_lck_attr); -#if CONFIG_EMBEDDED - lck_mtx_init(&task_watch_mtx, &task_lck_grp, &task_lck_attr); -#endif /* CONFIG_EMBEDDED */ task_zone = zinit( sizeof(struct task), @@ -282,8 +361,77 @@ task_init(void) zone_change(task_zone, Z_NOENCRYPT, TRUE); + /* + * Configure per-task memory limit. The boot arg takes precedence over the + * device tree. + */ + if (!PE_parse_boot_argn("max_task_pmem", &max_task_footprint, + sizeof (max_task_footprint))) { + max_task_footprint = 0; + } + + if (max_task_footprint == 0) { + /* + * No limit was found in boot-args, so go look in the device tree. + */ + if (!PE_get_default("kern.max_task_pmem", &max_task_footprint, + sizeof(max_task_footprint))) { + max_task_footprint = 0; + } + } + + if (max_task_footprint != 0) { +#if CONFIG_JETSAM + if (max_task_footprint < 50) { + printf("Warning: max_task_pmem %d below minimum.\n", + max_task_footprint); + max_task_footprint = 50; + } + printf("Limiting task physical memory footprint to %d MB\n", + max_task_footprint); + max_task_footprint *= 1024 * 1024; // Convert MB to bytes +#else + printf("Warning: max_task_footprint specified, but jetsam not configured; ignoring.\n"); +#endif + } + + if (!PE_parse_boot_argn("hwm_user_cores", &hwm_user_cores, + sizeof (hwm_user_cores))) { + hwm_user_cores = 0; + } + + proc_init_cpumon_params(); + + if (!PE_parse_boot_argn("task_wakeups_monitor_rate", &task_wakeups_monitor_rate, sizeof (task_wakeups_monitor_rate))) { + task_wakeups_monitor_rate = TASK_WAKEUPS_MONITOR_DEFAULT_LIMIT; + } + + if (!PE_parse_boot_argn("task_wakeups_monitor_interval", &task_wakeups_monitor_interval, sizeof (task_wakeups_monitor_interval))) { + task_wakeups_monitor_interval = TASK_WAKEUPS_MONITOR_DEFAULT_INTERVAL; + } + + if (!PE_parse_boot_argn("task_wakeups_monitor_ustackshots_trigger_pct", &task_wakeups_monitor_ustackshots_trigger_pct, + sizeof (task_wakeups_monitor_ustackshots_trigger_pct))) { + task_wakeups_monitor_ustackshots_trigger_pct = TASK_WAKEUPS_MONITOR_DEFAULT_USTACKSHOTS_TRIGGER; + } + + if (!PE_parse_boot_argn("disable_exc_resource", &disable_exc_resource, + sizeof (disable_exc_resource))) { + disable_exc_resource = 0; + } + init_task_ledgers(); +#if TASK_REFERENCE_LEAK_DEBUG + simple_lock_init(&task_ref_lock, 0); + task_ref_btlog = btlog_create(100000, + TASK_REF_BTDEPTH, + task_ref_lock_lock, + task_ref_lock_unlock, + &task_ref_lock); + assert(task_ref_btlog); +#endif + /* * Create the kernel task as the first task. */ @@ -355,6 +503,25 @@ host_security_create_task_token( return(KERN_FAILURE); } +/* + * Task ledgers + * ------------ + * + * phys_footprint + * Physical footprint: This is the sum of: + * + phys_mem [task's resident memory] + * + phys_compressed + * + iokit_mem + * + * iokit_mem + * IOKit mappings: The total size of all IOKit mappings in this task [regardless of clean/dirty state]. + * + * phys_compressed + * Physical compressed: Amount of this task's resident memory which is held by the compressor. + * Such memory is no longer actually resident for the task [i.e., resident in its pmap], + * and could be either decompressed back into memory, or paged out to storage, depending + * on our implementation. + */ void init_task_ledgers(void) { @@ -375,18 +542,34 @@ init_task_ledgers(void) "bytes"); task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem", "bytes"); + task_ledgers.iokit_mem = ledger_entry_add(t, "iokit_mem", "mappings", + "bytes"); + task_ledgers.phys_footprint = ledger_entry_add(t, "phys_footprint", "physmem", + "bytes"); + task_ledgers.phys_compressed = ledger_entry_add(t, "phys_compressed", "physmem", + "bytes"); task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power", - "count"); + "count"); task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power", - "count"); + "count"); if ((task_ledgers.cpu_time < 0) || (task_ledgers.tkm_private < 0) || (task_ledgers.tkm_shared < 0) || (task_ledgers.phys_mem < 0) || - (task_ledgers.wired_mem < 0) || (task_ledgers.platform_idle_wakeups < 0) || - (task_ledgers.interrupt_wakeups < 0)) { + (task_ledgers.wired_mem < 0) || (task_ledgers.iokit_mem < 0) || + (task_ledgers.phys_footprint < 0) || (task_ledgers.phys_compressed < 0) || + (task_ledgers.platform_idle_wakeups < 0) || (task_ledgers.interrupt_wakeups < 0)) { panic("couldn't create entries for task ledger template"); } + ledger_track_maximum(t, task_ledgers.phys_footprint, 60); + +#if CONFIG_JETSAM + ledger_set_callback(t, task_ledgers.phys_footprint, task_footprint_exceeded, NULL, NULL); +#endif + + ledger_set_callback(t, task_ledgers.interrupt_wakeups, + task_wakeups_rate_exceeded, NULL, NULL); + task_ledger_template = t; } @@ -416,6 +599,7 @@ task_create_internal( zfree(task_zone, new_task); return(KERN_RESOURCE_SHORTAGE); } + new_task->ledger = ledger; /* if inherit_memory is true, parent_task MUST not be NULL */ @@ -436,7 +620,7 @@ task_create_internal( new_task->thread_count = 0; new_task->active_thread_count = 0; new_task->user_stop_count = 0; - new_task->role = TASK_UNSPECIFIED; + new_task->legacy_stop_count = 0; new_task->active = TRUE; new_task->halting = FALSE; new_task->user_data = NULL; @@ -449,8 +633,8 @@ task_create_internal( new_task->priv_flags = 0; new_task->syscalls_unix=0; new_task->c_switch = new_task->p_switch = new_task->ps_switch = 0; - new_task->taskFeatures[0] = 0; /* Init task features */ - new_task->taskFeatures[1] = 0; /* Init task features */ + new_task->t_flags = 0; + new_task->importance = 0; zinfo_task_init(new_task); @@ -458,16 +642,26 @@ task_create_internal( new_task->bsd_info = NULL; #endif /* MACH_BSD */ +#if CONFIG_JETSAM + if (max_task_footprint != 0) { + ledger_set_limit(ledger, task_ledgers.phys_footprint, max_task_footprint, PHYS_FOOTPRINT_WARNING_LEVEL); + } +#endif + + if (task_wakeups_monitor_rate != 0) { + uint32_t flags = WAKEMON_ENABLE | WAKEMON_SET_DEFAULTS; + int32_t rate; // Ignored because of WAKEMON_SET_DEFAULTS + task_wakeups_monitor_ctl(new_task, &flags, &rate); + } + #if defined(__i386__) || defined(__x86_64__) new_task->i386_ldt = 0; - new_task->task_debug = NULL; #endif + new_task->task_debug = NULL; queue_init(&new_task->semaphore_list); - queue_init(&new_task->lock_set_list); new_task->semaphores_owned = 0; - new_task->lock_sets_owned = 0; #if CONFIG_MACF_MACH new_task->label = labelh_new(1); @@ -491,20 +685,37 @@ task_create_internal( new_task->pidsuspended = FALSE; new_task->frozen = FALSE; + new_task->changing_freeze_state = FALSE; new_task->rusage_cpu_flags = 0; new_task->rusage_cpu_percentage = 0; new_task->rusage_cpu_interval = 0; new_task->rusage_cpu_deadline = 0; new_task->rusage_cpu_callt = NULL; - new_task->proc_terminate = 0; -#if CONFIG_EMBEDDED - queue_init(&new_task->task_watchers); - new_task->appstate = TASK_APPSTATE_ACTIVE; - new_task->num_taskwatchers = 0; - new_task->watchapplying = 0; -#endif /* CONFIG_EMBEDDED */ +#if MACH_ASSERT + new_task->suspends_outstanding = 0; +#endif + + new_task->low_mem_notified_warn = 0; + new_task->low_mem_notified_critical = 0; + new_task->purged_memory_warn = 0; + new_task->purged_memory_critical = 0; + new_task->mem_notify_reserved = 0; +#if IMPORTANCE_INHERITANCE + new_task->imp_receiver = 0; + new_task->imp_donor = 0; + new_task->imp_reserved = 0; + new_task->task_imp_assertcnt = 0; + new_task->task_imp_externcnt = 0; +#endif /* IMPORTANCE_INHERITANCE */ + +#if defined(__x86_64__) new_task->uexc_range_start = new_task->uexc_range_size = new_task->uexc_handler = 0; +#endif + + new_task->requested_policy = default_task_requested_policy; + new_task->effective_policy = default_task_effective_policy; + new_task->pended_policy = default_task_pended_policy; if (parent_task != TASK_NULL) { new_task->sec_token = parent_task->sec_token; @@ -527,18 +738,26 @@ task_create_internal( task_affinity_create(parent_task, new_task); new_task->pset_hint = parent_task->pset_hint = task_choose_pset(parent_task); - new_task->policystate = parent_task->policystate; - /* inherit the self action state */ - new_task->appliedstate = parent_task->appliedstate; - new_task->ext_policystate = parent_task->ext_policystate; -#if NOTYET - /* till the child lifecycle is cleared do not inherit external action */ - new_task->ext_appliedstate = parent_task->ext_appliedstate; -#else - new_task->ext_appliedstate = default_task_null_policy; -#endif - } - else { + +#if IMPORTANCE_INHERITANCE + new_task->imp_donor = parent_task->imp_donor; + /* Embedded doesn't want this to inherit */ + new_task->imp_receiver = parent_task->imp_receiver; +#endif /* IMPORTANCE_INHERITANCE */ + + new_task->requested_policy.t_apptype = parent_task->requested_policy.t_apptype; + + new_task->requested_policy.int_darwinbg = parent_task->requested_policy.int_darwinbg; + new_task->requested_policy.ext_darwinbg = parent_task->requested_policy.ext_darwinbg; + new_task->requested_policy.int_iotier = parent_task->requested_policy.int_iotier; + new_task->requested_policy.ext_iotier = parent_task->requested_policy.ext_iotier; + new_task->requested_policy.int_iopassive = parent_task->requested_policy.int_iopassive; + new_task->requested_policy.ext_iopassive = parent_task->requested_policy.ext_iopassive; + new_task->requested_policy.bg_iotier = parent_task->requested_policy.bg_iotier; + new_task->requested_policy.terminated = parent_task->requested_policy.terminated; + + task_policy_create(new_task, parent_task->requested_policy.t_boosted); + } else { new_task->sec_token = KERNEL_SECURITY_TOKEN; new_task->audit_token = KERNEL_AUDIT_TOKEN; #ifdef __LP64__ @@ -549,24 +768,21 @@ task_create_internal( new_task->all_image_info_size = (mach_vm_size_t)0; new_task->pset_hint = PROCESSOR_SET_NULL; - new_task->policystate = default_task_proc_policy; - new_task->ext_policystate = default_task_proc_policy; - new_task->appliedstate = default_task_null_policy; - new_task->ext_appliedstate = default_task_null_policy; } if (kernel_task == TASK_NULL) { new_task->priority = BASEPRI_KERNEL; new_task->max_priority = MAXPRI_KERNEL; - } - else { + } else if (proc_get_effective_task_policy(new_task, TASK_POLICY_LOWPRI_CPU)) { + new_task->priority = MAXPRI_THROTTLE; + new_task->max_priority = MAXPRI_THROTTLE; + } else { new_task->priority = BASEPRI_DEFAULT; new_task->max_priority = MAXPRI_USER; } bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics)); new_task->task_timer_wakeups_bin_1 = new_task->task_timer_wakeups_bin_2 = 0; - lck_mtx_lock(&tasks_threads_lock); queue_enter(&tasks, new_task, task_t, tasks); tasks_count++; @@ -575,6 +791,8 @@ task_create_internal( if (vm_backing_store_low && parent_task != NULL) new_task->priv_flags |= (parent_task->priv_flags&VM_BACKING_STORE_PRIV); + new_task->task_volatile_objects = 0; + ipc_task_enable(new_task); *child_task = new_task; @@ -600,6 +818,7 @@ task_deallocate( lck_mtx_lock(&tasks_threads_lock); queue_remove(&terminated_tasks, task, task_t, tasks); + terminated_tasks_count--; lck_mtx_unlock(&tasks_threads_lock); /* @@ -652,6 +871,21 @@ task_deallocate( } ledger_dereference(task->ledger); zinfo_task_free(task); + +#if TASK_REFERENCE_LEAK_DEBUG + btlog_remove_entries_for_element(task_ref_btlog, task); +#endif + + if (task->task_volatile_objects) { + /* + * This task still "owns" some volatile VM objects. + * Disown them now to avoid leaving them pointing back at + * an invalid task. + */ + vm_purgeable_disown(task); + assert(task->task_volatile_objects == 0); + } + zfree(task_zone, task); } @@ -667,6 +901,17 @@ task_name_deallocate( return(task_deallocate((task_t)task_name)); } +/* + * task_suspension_token_deallocate: + * + * Drop a reference on a task suspension token. + */ +void +task_suspension_token_deallocate( + task_suspension_token_t token) +{ + return(task_deallocate((task_t)token)); +} /* * task_terminate: @@ -731,6 +976,14 @@ task_terminate_internal( return (KERN_FAILURE); } +#if MACH_ASSERT + if (task->suspends_outstanding != 0) { + printf("WARNING: %s (%d) exiting with %d outstanding suspensions\n", + proc_name_address(task->bsd_info), proc_pid(task->bsd_info), + task->suspends_outstanding); + } +#endif + if (self_task != task) task_unlock(self_task); @@ -752,6 +1005,13 @@ task_terminate_internal( task->active = FALSE; ipc_task_disable(task); +#if CONFIG_TELEMETRY + /* + * Notify telemetry that this task is going away. + */ + telemetry_task_ctl_locked(task, TF_TELEMETRY, 0); +#endif + /* * Terminate each thread in the task. */ @@ -761,12 +1021,6 @@ task_terminate_internal( task_unlock(task); -#if CONFIG_EMBEDDED - /* - * remove all task watchers - */ - task_removewatchers(task); -#endif /* CONFIG_EMBEDDED */ /* * Destroy all synchronizers owned by the task. @@ -801,6 +1055,7 @@ task_terminate_internal( queue_remove(&tasks, task, task_t, tasks); queue_enter(&terminated_tasks, task, task_t, tasks); tasks_count--; + terminated_tasks_count++; lck_mtx_unlock(&tasks_threads_lock); /* @@ -1214,14 +1469,32 @@ task_threads( return (KERN_SUCCESS); } +#define TASK_HOLD_NORMAL 0 +#define TASK_HOLD_PIDSUSPEND 1 +#define TASK_HOLD_LEGACY 2 +#define TASK_HOLD_LEGACY_ALL 3 + static kern_return_t place_task_hold ( - register task_t task) + register task_t task, + int mode) { if (!task->active) { return (KERN_FAILURE); } + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_SUSPEND) | DBG_FUNC_NONE, + proc_pid(task->bsd_info), ((thread_t)queue_first(&task->threads))->thread_id, + task->user_stop_count, task->user_stop_count + 1, 0); + +#if MACH_ASSERT + current_task()->suspends_outstanding++; +#endif + + if (mode == TASK_HOLD_LEGACY) + task->legacy_stop_count++; + if (task->user_stop_count++ > 0) { /* * If the stop count was positive, the task is @@ -1237,7 +1510,7 @@ place_task_hold ( * to stop executing user code. */ task_hold_locked(task); - task_wait_locked(task, TRUE); + task_wait_locked(task, FALSE); return (KERN_SUCCESS); } @@ -1245,7 +1518,7 @@ place_task_hold ( static kern_return_t release_task_hold ( register task_t task, - boolean_t pidresume) + int mode) { register boolean_t release = FALSE; @@ -1253,16 +1526,42 @@ release_task_hold ( return (KERN_FAILURE); } - if (pidresume) { + if (mode == TASK_HOLD_PIDSUSPEND) { if (task->pidsuspended == FALSE) { - return (KERN_FAILURE); + return (KERN_FAILURE); } task->pidsuspended = FALSE; } - if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) { - if (--task->user_stop_count == 0) { - release = TRUE; + if (task->user_stop_count > (task->pidsuspended ? 1 : 0)) { + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + MACHDBG_CODE(DBG_MACH_IPC,MACH_TASK_RESUME) | DBG_FUNC_NONE, + proc_pid(task->bsd_info), ((thread_t)queue_first(&task->threads))->thread_id, + task->user_stop_count, mode, task->legacy_stop_count); + +#if MACH_ASSERT + /* + * This is obviously not robust; if we suspend one task and then resume a different one, + * we'll fly under the radar. This is only meant to catch the common case of a crashed + * or buggy suspender. + */ + current_task()->suspends_outstanding--; +#endif + + if (mode == TASK_HOLD_LEGACY_ALL) { + if (task->legacy_stop_count >= task->user_stop_count) { + task->user_stop_count = 0; + release = TRUE; + } else { + task->user_stop_count -= task->legacy_stop_count; + } + task->legacy_stop_count = 0; + } else { + if (mode == TASK_HOLD_LEGACY && task->legacy_stop_count > 0) + task->legacy_stop_count--; + if (--task->user_stop_count == 0) + release = TRUE; } } else { @@ -1278,10 +1577,18 @@ release_task_hold ( return (KERN_SUCCESS); } + /* * task_suspend: * - * Implement a user-level suspension on a task. + * Implement an (old-fashioned) user-level suspension on a task. + * + * Because the user isn't expecting to have to manage a suspension + * token, we'll track it for him in the kernel in the form of a naked + * send right to the task's resume port. All such send rights + * account for a single suspension against the task (unlike task_suspend2() + * where each caller gets a unique suspension count represented by a + * unique send-once right). * * Conditions: * The caller holds a reference to the task @@ -1290,23 +1597,72 @@ kern_return_t task_suspend( register task_t task) { - kern_return_t kr; - + kern_return_t kr; + mach_port_t port, send, old_notify; + mach_port_name_t name; + if (task == TASK_NULL || task == kernel_task) return (KERN_INVALID_ARGUMENT); task_lock(task); - kr = place_task_hold(task); + /* + * Claim a send right on the task resume port, and request a no-senders + * notification on that port (if none outstanding). + */ + if (task->itk_resume == IP_NULL) { + task->itk_resume = ipc_port_alloc_kernel(); + if (!IP_VALID(task->itk_resume)) + panic("failed to create resume port"); + ipc_kobject_set(task->itk_resume, (ipc_kobject_t)task, IKOT_TASK_RESUME); + } + + port = task->itk_resume; + ip_lock(port); + assert(ip_active(port)); + + send = ipc_port_make_send_locked(port); + assert(IP_VALID(send)); + + if (port->ip_nsrequest == IP_NULL) { + ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify); + assert(old_notify == IP_NULL); + /* port unlocked */ + } else { + ip_unlock(port); + } + + /* + * place a legacy hold on the task. + */ + kr = place_task_hold(task, TASK_HOLD_LEGACY); + if (kr != KERN_SUCCESS) { + task_unlock(task); + ipc_port_release_send(send); + return kr; + } task_unlock(task); + /* + * Copyout the send right into the calling task's IPC space. It won't know it is there, + * but we'll look it up when calling a traditional resume. Any IPC operations that + * deallocate the send right will auto-release the suspension. + */ + if ((kr = ipc_kmsg_copyout_object(current_task()->itk_space, (ipc_object_t)send, + MACH_MSG_TYPE_MOVE_SEND, &name)) != KERN_SUCCESS) { + printf("warning: %s(%d) failed to copyout suspension token for task %s(%d) with error: %d\n", + proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info), + proc_name_address(task->bsd_info), proc_pid(task->bsd_info), kr); + return (kr); + } + return (kr); } /* * task_resume: - * Release a kernel hold on a task. + * Release a user hold on a task. * * Conditions: * The caller holds a reference to the task @@ -1316,19 +1672,177 @@ task_resume( register task_t task) { kern_return_t kr; + mach_port_name_t resume_port_name; + ipc_entry_t resume_port_entry; + ipc_space_t space = current_task()->itk_space; + + if (task == TASK_NULL || task == kernel_task ) + return (KERN_INVALID_ARGUMENT); + + /* release a legacy task hold */ + task_lock(task); + kr = release_task_hold(task, TASK_HOLD_LEGACY); + task_unlock(task); + + is_write_lock(space); + if (is_active(space) && IP_VALID(task->itk_resume) && + ipc_hash_lookup(space, (ipc_object_t)task->itk_resume, &resume_port_name, &resume_port_entry) == TRUE) { + /* + * We found a suspension token in the caller's IPC space. Release a send right to indicate that + * we are holding one less legacy hold on the task from this caller. If the release failed, + * go ahead and drop all the rights, as someone either already released our holds or the task + * is gone. + */ + if (kr == KERN_SUCCESS) + ipc_right_dealloc(space, resume_port_name, resume_port_entry); + else + ipc_right_destroy(space, resume_port_name, resume_port_entry, FALSE, 0); + /* space unlocked */ + } else { + is_write_unlock(space); + if (kr == KERN_SUCCESS) + printf("warning: %s(%d) performed out-of-band resume on %s(%d)\n", + proc_name_address(current_task()->bsd_info), proc_pid(current_task()->bsd_info), + proc_name_address(task->bsd_info), proc_pid(task->bsd_info)); + } + + return kr; +} +/* + * Suspend the target task. + * Making/holding a token/reference/port is the callers responsibility. + */ +kern_return_t +task_suspend_internal(task_t task) +{ + kern_return_t kr; + if (task == TASK_NULL || task == kernel_task) return (KERN_INVALID_ARGUMENT); task_lock(task); + kr = place_task_hold(task, TASK_HOLD_NORMAL); + task_unlock(task); + return (kr); +} + +/* + * Suspend the target task, and return a suspension token. The token + * represents a reference on the suspended task. + */ +kern_return_t +task_suspend2( + register task_t task, + task_suspension_token_t *suspend_token) +{ + kern_return_t kr; + + kr = task_suspend_internal(task); + if (kr != KERN_SUCCESS) { + *suspend_token = TASK_NULL; + return (kr); + } + + /* + * Take a reference on the target task and return that to the caller + * as a "suspension token," which can be converted into an SO right to + * the now-suspended task's resume port. + */ + task_reference_internal(task); + *suspend_token = task; + + return (KERN_SUCCESS); +} + +/* + * Resume the task + * (reference/token/port management is caller's responsibility). + */ +kern_return_t +task_resume_internal( + register task_suspension_token_t task) +{ + kern_return_t kr; - kr = release_task_hold(task, FALSE); + if (task == TASK_NULL || task == kernel_task) + return (KERN_INVALID_ARGUMENT); + task_lock(task); + kr = release_task_hold(task, TASK_HOLD_NORMAL); task_unlock(task); + return (kr); +} + +/* + * Resume the task using a suspension token. Consumes the token's ref. + */ +kern_return_t +task_resume2( + register task_suspension_token_t task) +{ + kern_return_t kr; + + kr = task_resume_internal(task); + task_suspension_token_deallocate(task); return (kr); } +boolean_t +task_suspension_notify(mach_msg_header_t *request_header) +{ + ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port; + task_t task = convert_port_to_task_suspension_token(port); + mach_msg_type_number_t not_count; + + if (task == TASK_NULL || task == kernel_task) + return TRUE; /* nothing to do */ + + switch (request_header->msgh_id) { + + case MACH_NOTIFY_SEND_ONCE: + /* release the hold held by this specific send-once right */ + task_lock(task); + release_task_hold(task, TASK_HOLD_NORMAL); + task_unlock(task); + break; + + case MACH_NOTIFY_NO_SENDERS: + not_count = ((mach_no_senders_notification_t *)request_header)->not_count; + + task_lock(task); + ip_lock(port); + if (port->ip_mscount == not_count) { + + /* release all the [remaining] outstanding legacy holds */ + assert(port->ip_nsrequest == IP_NULL); + ip_unlock(port); + release_task_hold(task, TASK_HOLD_LEGACY_ALL); + task_unlock(task); + + } else if (port->ip_nsrequest == IP_NULL) { + ipc_port_t old_notify; + + task_unlock(task); + /* new send rights, re-arm notification at current make-send count */ + ipc_port_nsrequest(port, port->ip_mscount, ipc_port_make_sonce_locked(port), &old_notify); + assert(old_notify == IP_NULL); + /* port unlocked */ + } else { + ip_unlock(port); + task_unlock(task); + } + break; + + default: + break; + } + + task_suspension_token_deallocate(task); /* drop token reference */ + return TRUE; +} + kern_return_t task_pidsuspend_locked(task_t task) { @@ -1341,7 +1855,7 @@ task_pidsuspend_locked(task_t task) task->pidsuspended = TRUE; - kr = place_task_hold(task); + kr = place_task_hold(task, TASK_HOLD_PIDSUSPEND); if (kr != KERN_SUCCESS) { task->pidsuspended = FALSE; } @@ -1391,9 +1905,6 @@ task_pidresume( register task_t task) { kern_return_t kr; -#if (CONFIG_FREEZE && THAW_ON_RESUME) - boolean_t frozen; -#endif if (task == TASK_NULL || task == kernel_task) return (KERN_INVALID_ARGUMENT); @@ -1401,18 +1912,41 @@ task_pidresume( task_lock(task); #if (CONFIG_FREEZE && THAW_ON_RESUME) - frozen = task->frozen; - task->frozen = FALSE; -#endif - kr = release_task_hold(task, TRUE); + while (task->changing_freeze_state) { - task_unlock(task); + assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT); + task_unlock(task); + thread_block(THREAD_CONTINUE_NULL); -#if (CONFIG_FREEZE && THAW_ON_RESUME) - if ((kr == KERN_SUCCESS) && (frozen == TRUE)) { - kr = vm_map_thaw(task->map); + task_lock(task); } + task->changing_freeze_state = TRUE; +#endif + + kr = release_task_hold(task, TASK_HOLD_PIDSUSPEND); + + task_unlock(task); + +#if (CONFIG_FREEZE && THAW_ON_RESUME) + if ((kr == KERN_SUCCESS) && (task->frozen == TRUE)) { + + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + + kr = KERN_SUCCESS; + } else { + + kr = vm_map_thaw(task->map); + } + } + task_lock(task); + + if (kr == KERN_SUCCESS) + task->frozen = FALSE; + task->changing_freeze_state = FALSE; + thread_wakeup(&task->changing_freeze_state); + + task_unlock(task); #endif return (kr); @@ -1446,14 +1980,19 @@ task_freeze( task_lock(task); + while (task->changing_freeze_state) { + + assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT); + task_unlock(task); + thread_block(THREAD_CONTINUE_NULL); + + task_lock(task); + } if (task->frozen) { - task_unlock(task); - return (KERN_FAILURE); + task_unlock(task); + return (KERN_FAILURE); } - - if (walk_only == FALSE) { - task->frozen = TRUE; - } + task->changing_freeze_state = TRUE; task_unlock(task); @@ -1463,6 +2002,15 @@ task_freeze( kr = vm_map_freeze(task->map, purgeable_count, wired_count, clean_count, dirty_count, dirty_budget, shared); } + task_lock(task); + + if (walk_only == FALSE && kr == KERN_SUCCESS) + task->frozen = TRUE; + task->changing_freeze_state = FALSE; + thread_wakeup(&task->changing_freeze_state); + + task_unlock(task); + return (kr); } @@ -1474,6 +2022,9 @@ task_freeze( * Conditions: * The caller holds a reference to the task */ +extern void +vm_consider_waking_compactor_swapper(void); + kern_return_t task_thaw( register task_t task) @@ -1485,16 +2036,42 @@ task_thaw( task_lock(task); + while (task->changing_freeze_state) { + + assert_wait((event_t)&task->changing_freeze_state, THREAD_UNINT); + task_unlock(task); + thread_block(THREAD_CONTINUE_NULL); + + task_lock(task); + } if (!task->frozen) { - task_unlock(task); - return (KERN_FAILURE); + task_unlock(task); + return (KERN_FAILURE); } + task->changing_freeze_state = TRUE; + + if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) { + task_unlock(task); + + kr = vm_map_thaw(task->map); - task->frozen = FALSE; + task_lock(task); + + if (kr == KERN_SUCCESS) + task->frozen = FALSE; + } else { + task->frozen = FALSE; + kr = KERN_SUCCESS; + } + task->changing_freeze_state = FALSE; + thread_wakeup(&task->changing_freeze_state); + task_unlock(task); - kr = vm_map_thaw(task->map); + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + vm_consider_waking_compactor_swapper(); + } return (kr); } @@ -1521,7 +2098,8 @@ host_security_set_task_token( task_lock(task); task->sec_token = sec_token; task->audit_token = audit_token; - task_unlock(task); + + task_unlock(task); if (host_priv != HOST_PRIV_NULL) { kr = host_get_host_priv_port(host_priv, &host_port); @@ -1560,9 +2138,9 @@ task_set_info( kern_return_t task_info( - task_t task, - task_flavor_t flavor, - task_info_t task_info_out, + task_t task, + task_flavor_t flavor, + task_info_t task_info_out, mach_msg_type_number_t *task_info_count) { kern_return_t error = KERN_SUCCESS; @@ -1726,14 +2304,16 @@ task_info( queue_iterate(&task->threads, thread, thread_t, task_threads) { - time_value_t user_time, system_time; + time_value_t user_time, system_time; - thread_read_times(thread, &user_time, &system_time); + if (thread->options & TH_OPT_IDLE_THREAD) + continue; - time_value_add(×_info->user_time, &user_time); - time_value_add(×_info->system_time, &system_time); - } + thread_read_times(thread, &user_time, &system_time); + time_value_add(×_info->user_time, &user_time); + time_value_add(×_info->system_time, &system_time); + } *task_info_count = TASK_THREAD_TIMES_INFO_COUNT; break; @@ -1760,6 +2340,9 @@ task_info( uint64_t tval; spl_t x; + if (thread->options & TH_OPT_IDLE_THREAD) + continue; + x = splsched(); thread_lock(thread); @@ -2057,57 +2640,108 @@ task_info( error = task_affinity_info(task, task_info_out, task_info_count); break; } - case TASK_POWER_INFO: { - task_power_info_t info; - thread_t thread; - ledger_amount_t tmp; - if (*task_info_count < TASK_POWER_INFO_COUNT) { error = KERN_INVALID_ARGUMENT; break; } - info = (task_power_info_t)task_info_out; - - ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups, - (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp); - ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups, - (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp); - - info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1; - info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2; - - info->total_user = task->total_user_time; - info->total_system = task->total_system_time; + task_power_info_locked(task, (task_power_info_t)task_info_out); + break; + } - queue_iterate(&task->threads, thread, thread_t, task_threads) { - uint64_t tval; - spl_t x; + case TASK_VM_INFO: + case TASK_VM_INFO_PURGEABLE: + { + task_vm_info_t vm_info; + vm_map_t map; - if ((task == kernel_task) && (thread->priority == IDLEPRI) && (thread->sched_pri == IDLEPRI)) - continue; - x = splsched(); - thread_lock(thread); + if (*task_info_count < TASK_VM_INFO_COUNT) { + error = KERN_INVALID_ARGUMENT; + break; + } - info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1; - info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2; + vm_info = (task_vm_info_t)task_info_out; - tval = timer_grab(&thread->user_timer); - info->total_user += tval; + if (task == kernel_task) { + map = kernel_map; + /* no lock */ + } else { + map = task->map; + vm_map_lock_read(map); + } - tval = timer_grab(&thread->system_timer); - if (thread->precise_user_kernel_time) { - info->total_system += tval; - } else { - /* system_timer may represent either sys or user */ - info->total_user += tval; + vm_info->virtual_size = (typeof(vm_info->virtual_size))map->size; + vm_info->region_count = map->hdr.nentries; + vm_info->page_size = vm_map_page_size(map); + + vm_info->resident_size = pmap_resident_count(map->pmap); + vm_info->resident_size *= PAGE_SIZE; + vm_info->resident_size_peak = pmap_resident_max(map->pmap); + vm_info->resident_size_peak *= PAGE_SIZE; + +#define _VM_INFO(_name) \ + vm_info->_name = ((mach_vm_size_t) map->pmap->stats._name) * PAGE_SIZE + + _VM_INFO(device); + _VM_INFO(device_peak); + _VM_INFO(external); + _VM_INFO(external_peak); + _VM_INFO(internal); + _VM_INFO(internal_peak); + _VM_INFO(reusable); + _VM_INFO(reusable_peak); + _VM_INFO(compressed); + _VM_INFO(compressed_peak); + _VM_INFO(compressed_lifetime); + + vm_info->purgeable_volatile_pmap = 0; + vm_info->purgeable_volatile_resident = 0; + vm_info->purgeable_volatile_virtual = 0; + if (task == kernel_task) { + /* + * We do not maintain the detailed stats for the + * kernel_pmap, so just count everything as + * "internal"... + */ + vm_info->internal = vm_info->resident_size; + /* + * ... but since the memory held by the VM compressor + * in the kernel address space ought to be attributed + * to user-space tasks, we subtract it from "internal" + * to give memory reporting tools a more accurate idea + * of what the kernel itself is actually using, instead + * of making it look like the kernel is leaking memory + * when the system is under memory pressure. + */ + vm_info->internal -= (VM_PAGE_COMPRESSOR_COUNT * + PAGE_SIZE); + } else { + mach_vm_size_t volatile_virtual_size; + mach_vm_size_t volatile_resident_size; + mach_vm_size_t volatile_pmap_size; + kern_return_t kr; + + if (flavor == TASK_VM_INFO_PURGEABLE) { + kr = vm_map_query_volatile( + map, + &volatile_virtual_size, + &volatile_resident_size, + &volatile_pmap_size); + if (kr == KERN_SUCCESS) { + vm_info->purgeable_volatile_pmap = + volatile_pmap_size; + vm_info->purgeable_volatile_resident = + volatile_resident_size; + vm_info->purgeable_volatile_virtual = + volatile_virtual_size; + } } - - thread_unlock(thread); - splx(x); + vm_map_unlock_read(map); } + + *task_info_count = TASK_VM_INFO_COUNT; break; } @@ -2119,6 +2753,77 @@ task_info( return (error); } +/* + * task_power_info + * + * Returns power stats for the task. + * Note: Called with task locked. + */ +void +task_power_info_locked( + task_t task, + task_power_info_t info) +{ + thread_t thread; + ledger_amount_t tmp; + + task_lock_assert_owned(task); + + ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups, + (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp); + ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups, + (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp); + + info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1; + info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2; + + info->total_user = task->total_user_time; + info->total_system = task->total_system_time; + + queue_iterate(&task->threads, thread, thread_t, task_threads) { + uint64_t tval; + spl_t x; + + if (thread->options & TH_OPT_IDLE_THREAD) + continue; + + x = splsched(); + thread_lock(thread); + + info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1; + info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2; + + tval = timer_grab(&thread->user_timer); + info->total_user += tval; + + tval = timer_grab(&thread->system_timer); + if (thread->precise_user_kernel_time) { + info->total_system += tval; + } else { + /* system_timer may represent either sys or user */ + info->total_user += tval; + } + + thread_unlock(thread); + splx(x); + } +} + +kern_return_t +task_purgable_info( + task_t task, + task_purgable_info_t *stats) +{ + if (task == TASK_NULL || stats == NULL) + return KERN_INVALID_ARGUMENT; + /* Take task reference */ + task_reference(task); + vm_purgeable_stats((vm_purgeable_info_t)stats, task); + /* Drop task reference */ + task_deallocate(task); + return KERN_SUCCESS; +} + void task_vtimer_set( task_t task, @@ -2361,7 +3066,6 @@ void task_synchronizer_destroy_all(task_t task) { semaphore_t semaphore; - lock_set_t lock_set; /* * Destroy owned semaphores @@ -2371,15 +3075,6 @@ task_synchronizer_destroy_all(task_t task) semaphore = (semaphore_t) queue_first(&task->semaphore_list); (void) semaphore_destroy(task, semaphore); } - - /* - * Destroy owned lock sets - */ - - while (!queue_empty(&task->lock_set_list)) { - lock_set = (lock_set_t) queue_first(&task->lock_set_list); - (void) lock_set_destroy(task, lock_set); - } } /* @@ -2445,6 +3140,187 @@ task_get_state( return ret; } +#if CONFIG_JETSAM +#define HWM_USERCORE_MINSPACE 250 // free space (in MB) required *after* core file creation + +void __attribute__((noinline)) +THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE(int max_footprint_mb) +{ + task_t task = current_task(); + int pid = 0; + char *procname = (char *) "unknown"; + mach_exception_data_type_t code[EXCEPTION_CODE_MAX]; + +#ifdef MACH_BSD + pid = proc_selfpid(); + if (task->bsd_info != NULL) + procname = proc_name_address(current_task()->bsd_info); +#endif + + if (hwm_user_cores) { + int error; + uint64_t starttime, end; + clock_sec_t secs = 0; + uint32_t microsecs = 0; + + starttime = mach_absolute_time(); + /* + * Trigger a coredump of this process. Don't proceed unless we know we won't + * be filling up the disk; and ignore the core size resource limit for this + * core file. + */ + if ((error = coredump(current_task()->bsd_info, HWM_USERCORE_MINSPACE, 1)) != 0) { + printf("couldn't take coredump of %s[%d]: %d\n", procname, pid, error); + } + /* + * coredump() leaves the task suspended. + */ + task_resume_internal(current_task()); + + end = mach_absolute_time(); + absolutetime_to_microtime(end - starttime, &secs, µsecs); + printf("coredump of %s[%d] taken in %d secs %d microsecs\n", + proc_name_address(current_task()->bsd_info), pid, (int)secs, microsecs); + } + + if (disable_exc_resource) { + printf("process %s[%d] crossed memory high watermark (%d MB); EXC_RESOURCE " + "supressed by a boot-arg.\n", procname, pid, max_footprint_mb); + return; + } + + printf("process %s[%d] crossed memory high watermark (%d MB); sending " + "EXC_RESOURCE.\n", procname, pid, max_footprint_mb); + + code[0] = code[1] = 0; + EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_MEMORY); + EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_HIGH_WATERMARK); + EXC_RESOURCE_HWM_ENCODE_LIMIT(code[0], max_footprint_mb); + exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX); +} + +/* + * Callback invoked when a task exceeds its physical footprint limit. + */ +void +task_footprint_exceeded(int warning, __unused const void *param0, __unused const void *param1) +{ + ledger_amount_t max_footprint_mb; + + if (warning == LEDGER_WARNING_DIPPED_BELOW) { + /* + * Task memory limits only provide a warning on the way up. + */ + return; + } + + ledger_get_limit(current_task()->ledger, task_ledgers.phys_footprint, &max_footprint_mb); + max_footprint_mb >>= 20; + + /* + * If this an actual violation (not a warning), + * generate a non-fatal high watermark EXC_RESOURCE. + */ + if ((warning == 0) && (current_task()->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION)) { + THIS_PROCESS_CROSSED_HIGH_WATERMARK__SENDING_EXC_RESOURCE((int)max_footprint_mb); + } + + memorystatus_on_ledger_footprint_exceeded((warning == LEDGER_WARNING_ROSE_ABOVE) ? TRUE : FALSE, + (int)max_footprint_mb); +} + +extern int proc_check_footprint_priv(void); + +kern_return_t +task_set_phys_footprint_limit( + task_t task, + int new_limit_mb, + int *old_limit_mb) +{ + kern_return_t error; + + if ((error = proc_check_footprint_priv())) { + return (KERN_NO_ACCESS); + } + + return task_set_phys_footprint_limit_internal(task, new_limit_mb, old_limit_mb, FALSE); +} + +kern_return_t +task_set_phys_footprint_limit_internal( + task_t task, + int new_limit_mb, + int *old_limit_mb, + boolean_t trigger_exception) +{ + ledger_amount_t old; + + ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &old); + + if (old_limit_mb) { + *old_limit_mb = old >> 20; + } + + if (new_limit_mb == -1) { + /* + * Caller wishes to remove the limit. + */ + ledger_set_limit(task->ledger, task_ledgers.phys_footprint, + max_task_footprint ? max_task_footprint : LEDGER_LIMIT_INFINITY, + max_task_footprint ? PHYS_FOOTPRINT_WARNING_LEVEL : 0); + return (KERN_SUCCESS); + } + +#ifdef CONFIG_NOMONITORS + return (KERN_SUCCESS); +#endif /* CONFIG_NOMONITORS */ + + task_lock(task); + + if (trigger_exception) { + task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION; + } else { + task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION; + } + + ledger_set_limit(task->ledger, task_ledgers.phys_footprint, + (ledger_amount_t)new_limit_mb << 20, PHYS_FOOTPRINT_WARNING_LEVEL); + + task_unlock(task); + + return (KERN_SUCCESS); +} + +kern_return_t +task_get_phys_footprint_limit( + task_t task, + int *limit_mb) +{ + ledger_amount_t limit; + + ledger_get_limit(task->ledger, task_ledgers.phys_footprint, &limit); + *limit_mb = limit >> 20; + + return (KERN_SUCCESS); +} +#else /* CONFIG_JETSAM */ +kern_return_t +task_set_phys_footprint_limit( + __unused task_t task, + __unused int new_limit_mb, + __unused int *old_limit_mb) +{ + return (KERN_FAILURE); +} + +kern_return_t +task_get_phys_footprint_limit( + __unused task_t task, + __unused int *limit_mb) +{ + return (KERN_FAILURE); +} +#endif /* CONFIG_JETSAM */ /* * We need to export some functions to other components that @@ -2564,3 +3440,179 @@ mac_task_get_label(struct task *task) return (&task->maclabel); } #endif + +/* + * Control the CPU usage monitor for a task. + */ +kern_return_t +task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags) +{ + int error = KERN_SUCCESS; + + if (*flags & CPUMON_MAKE_FATAL) { + task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_CPUMON; + } else { + error = KERN_INVALID_ARGUMENT; + } + + return error; +} + +/* + * Control the wakeups monitor for a task. + */ +kern_return_t +task_wakeups_monitor_ctl(task_t task, uint32_t *flags, int32_t *rate_hz) +{ + ledger_t ledger = task->ledger; + + task_lock(task); + if (*flags & WAKEMON_GET_PARAMS) { + ledger_amount_t limit; + uint64_t period; + + ledger_get_limit(ledger, task_ledgers.interrupt_wakeups, &limit); + ledger_get_period(ledger, task_ledgers.interrupt_wakeups, &period); + + if (limit != LEDGER_LIMIT_INFINITY) { + /* + * An active limit means the wakeups monitor is enabled. + */ + *rate_hz = (int32_t)(limit / (int64_t)(period / NSEC_PER_SEC)); + *flags = WAKEMON_ENABLE; + if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) { + *flags |= WAKEMON_MAKE_FATAL; + } + } else { + *flags = WAKEMON_DISABLE; + *rate_hz = -1; + } + + /* + * If WAKEMON_GET_PARAMS is present in flags, all other flags are ignored. + */ + task_unlock(task); + return KERN_SUCCESS; + } + + if (*flags & WAKEMON_ENABLE) { + if (*flags & WAKEMON_SET_DEFAULTS) { + *rate_hz = task_wakeups_monitor_rate; + } + +#ifndef CONFIG_NOMONITORS + if (*flags & WAKEMON_MAKE_FATAL) { + task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON; + } +#endif /* CONFIG_NOMONITORS */ + + if (*rate_hz < 0) { + task_unlock(task); + return KERN_INVALID_ARGUMENT; + } + +#ifndef CONFIG_NOMONITORS + ledger_set_limit(ledger, task_ledgers.interrupt_wakeups, *rate_hz * task_wakeups_monitor_interval, + task_wakeups_monitor_ustackshots_trigger_pct); + ledger_set_period(ledger, task_ledgers.interrupt_wakeups, task_wakeups_monitor_interval * NSEC_PER_SEC); + ledger_enable_callback(ledger, task_ledgers.interrupt_wakeups); +#endif /* CONFIG_NOMONITORS */ + } else if (*flags & WAKEMON_DISABLE) { + /* + * Caller wishes to disable wakeups monitor on the task. + * + * Disable telemetry if it was triggered by the wakeups monitor, and + * remove the limit & callback on the wakeups ledger entry. + */ +#if CONFIG_TELEMETRY + telemetry_task_ctl_locked(current_task(), TF_WAKEMON_WARNING, 0); +#endif + ledger_disable_refill(ledger, task_ledgers.interrupt_wakeups); + ledger_disable_callback(ledger, task_ledgers.interrupt_wakeups); + } + + task_unlock(task); + return KERN_SUCCESS; +} + +void +task_wakeups_rate_exceeded(int warning, __unused const void *param0, __unused const void *param1) +{ + if (warning == LEDGER_WARNING_ROSE_ABOVE) { +#if CONFIG_TELEMETRY + /* + * This task is in danger of violating the wakeups monitor. Enable telemetry on this task + * so there are micro-stackshots available if and when EXC_RESOURCE is triggered. + */ + telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 1); +#endif + return; + } + +#if CONFIG_TELEMETRY + /* + * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or + * exceeded the limit, turn telemetry off for the task. + */ + telemetry_task_ctl(current_task(), TF_WAKEMON_WARNING, 0); +#endif + + if (warning == 0) { + THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(); + } +} + +void __attribute__((noinline)) +THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void) +{ + task_t task = current_task(); + int pid = 0; + char *procname = (char *) "unknown"; + uint64_t observed_wakeups_rate; + uint64_t permitted_wakeups_rate; + uint64_t observation_interval; + mach_exception_data_type_t code[EXCEPTION_CODE_MAX]; + struct ledger_entry_info lei; + +#ifdef MACH_BSD + pid = proc_selfpid(); + if (task->bsd_info != NULL) + procname = proc_name_address(current_task()->bsd_info); +#endif + + ledger_get_entry_info(task->ledger, task_ledgers.interrupt_wakeups, &lei); + + /* + * Disable the exception notification so we don't overwhelm + * the listener with an endless stream of redundant exceptions. + */ + uint32_t flags = WAKEMON_DISABLE; + task_wakeups_monitor_ctl(task, &flags, NULL); + + observed_wakeups_rate = (lei.lei_balance * (int64_t)NSEC_PER_SEC) / lei.lei_last_refill; + permitted_wakeups_rate = lei.lei_limit / task_wakeups_monitor_interval; + observation_interval = lei.lei_refill_period / NSEC_PER_SEC; + + if (disable_exc_resource) { + printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE " + "supressed by a boot-arg\n", procname, pid); + return; + } + printf("process %s[%d] caught causing excessive wakeups. Observed wakeups rate " + "(per sec): %lld; Maximum permitted wakeups rate (per sec): %lld; Observation " + "period: %lld seconds; Task lifetime number of wakeups: %lld\n", + procname, pid, observed_wakeups_rate, permitted_wakeups_rate, + observation_interval, lei.lei_credit); + + code[0] = code[1] = 0; + EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_WAKEUPS); + EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_WAKEUPS_MONITOR); + EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_PERMITTED(code[0], task_wakeups_monitor_rate); + EXC_RESOURCE_CPUMONITOR_ENCODE_OBSERVATION_INTERVAL(code[0], observation_interval); + EXC_RESOURCE_CPUMONITOR_ENCODE_WAKEUPS_OBSERVED(code[1], lei.lei_balance * (int64_t)NSEC_PER_SEC / lei.lei_last_refill); + exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX); + + if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON) { + task_terminate_internal(task); + } +} diff --git a/osfmk/kern/task.h b/osfmk/kern/task.h index 9b6e4a1f6..1b2991861 100644 --- a/osfmk/kern/task.h +++ b/osfmk/kern/task.h @@ -127,59 +127,8 @@ #define TASK_POLICY_RESOURCE_USAGE_COUNT 6 -/* - * Process Action and Policy bit definitions - -The bit defns of the policy states -64 60 56 52 48 44 40 36 32 28 24 20 16 12 8 0 -|----|-----|----|----|----|----|----|----|----|----|----|----|----|----|--------| -|RFU | RFU | PWR| NET| DSK| CPU| VM | WM | LVM| RFU| CPU| NET| GPU| DSK| BGRND | -|----|-----|----|----|----|----|----|----|----|----|----|----|----|----|--------| -|<----------- RESOURCE USAGE -------->|< LOWSRC>|<-HARDWARE ACCESS->|BackGrnd| -|----|-----|----|----|----|----|----|----|----|----|----|----|----|----|--------| - -* -*/ - -#define TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE 0x00 -#define TASK_POLICY_BACKGROUND_ATTRIBUTE_LOWPRI 0x01 -#define TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE 0x02 -#define TASK_POLICY_BACKGROUND_ATTRIBUTE_NETTHROTTLE 0x04 -#define TASK_POLICY_BACKGROUND_ATTRIBUTE_NOGPU 0x08 -#if CONFIG_EMBEDDED -#define TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL 0x0F -#else /* CONFIG_EMBEDDED */ -#define TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL 0x07 -#endif /* CONFIG_EMBEDDED */ -#define TASK_POLICY_BACKGROUND_ATTRIBUTE_DEFAULT TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL - -/* Hardware disk access attributes, bit different as it should reflect IOPOL_XXX */ -#define TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NONE 0x00 -#define TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS 0x01 -#define TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL 0x01 -#define TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_PASSIVE 0x02 -#define TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE 0x03 -#define TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_UTILITY 0x04 -#define TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_DEFAULT TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS - -/* Hardware GPU access attributes */ -#define TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NONE 0x00 -#define TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_FULLACCESS 0x00 -#define TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS 0x01 -#define TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_DEFAULT TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_FULLACCESS - -/* Hardware Network access attributes */ -#define TASK_POLICY_HWACCESS_NET_ATTRIBUTE_NONE 0x00 -#define TASK_POLICY_HWACCESS_NET_ATTRIBUTE_FULLACCESS 0x00 -#define TASK_POLICY_HWACCESS_NET_ATTRIBUTE_THROTTLE 0x01 -#define TASK_POLICY_HWACCESS_NET_ATTRIBUTE_DEFAULT TASK_POLICY_HWACCESS_NET_ATTRIBUTE_FULLACCESS - -/* Hardware CPU access attributes */ -#define TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_NONE 0x00 -#define TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_FULLACCESS 0x00 -#define TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_ONE 0x01 -#define TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_LLCACHE 0x02 -#define TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_DEFAULT TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_FULLACCESS +#define TASK_POLICY_CPUMON_DISABLE 0xFF +#define TASK_POLICY_CPUMON_DEFAULTS 0xFE /* Resource usage/low resource attributes */ #define TASK_POLICY_RESOURCE_ATTRIBUTE_NONE 0x00 @@ -194,43 +143,9 @@ The bit defns of the policy states #ifdef MACH_KERNEL_PRIVATE -typedef struct process_policy { - uint64_t apptype:4, - rfu1:4, - ru_power:4, /* Resource Usage Power */ - ru_net:4, /* Resource Usage Network */ - ru_disk:4, /* Resource Usage Disk */ - ru_cpu:4, /* Resource Usage CPU */ - ru_virtmem:4, /* Resource Usage VM */ - ru_wiredmem:4,/* Resource Usage Wired Memory */ - low_vm:4, /* Low Virtual Memory */ - rfu2:4, - hw_cpu:4, /* HW Access to CPU */ - hw_net:4, /* HW Access to Network */ - hw_gpu:4, /* HW Access to GPU */ - hw_disk:4, /* HW Access to Disk */ - hw_bg:8; /* Darwin Background Policy */ -} process_policy_t; - -#if CONFIG_EMBEDDED - -typedef struct task_watcher { - queue_chain_t tw_links; /* queueing of threads */ - task_t tw_task; /* task that is being watched */ - thread_t tw_thread; /* thread that is watching the watch_task */ - int tw_state; /* the current app state of the thread */ - int tw_importance; /* importance prior to backgrounding */ -} task_watch_t; - -extern lck_mtx_t task_watch_mtx; - -#endif /* CONFIG_EMBEDDED */ #include -extern process_policy_t default_task_proc_policy; /* init value for the process policy attributes */ -extern process_policy_t default_task_null_policy; /* none as the value for the process policy attributes */ - struct task { /* Synchronization/destruction information */ decl_lck_mtx_data(,lock) /* Task's lock */ @@ -255,12 +170,13 @@ struct task { /* User-visible scheduling information */ integer_t user_stop_count; /* outstanding stops */ - - task_role_t role; + integer_t legacy_stop_count; /* outstanding legacy stops */ integer_t priority; /* base priority for threads */ integer_t max_priority; /* maximum priority for threads */ + integer_t importance; /* priority offset (BSD 'nice' value) */ + /* Task security and audit tokens */ security_token_t sec_token; audit_token_t audit_token; @@ -284,6 +200,7 @@ struct task { struct ipc_port *itk_seatbelt; /* a send right */ struct ipc_port *itk_gssd; /* yet another send right */ struct ipc_port *itk_task_access; /* and another send right */ + struct ipc_port *itk_resume; /* a receive right to resume this task */ struct ipc_port *itk_registered[TASK_PORT_REGISTER_MAX]; /* all send rights */ @@ -291,9 +208,7 @@ struct task { /* Synchronizer ownership information */ queue_head_t semaphore_list; /* list of owned semaphores */ - queue_head_t lock_set_list; /* list of owned lock sets */ int semaphores_owned; /* number of semaphores owned */ - int lock_sets_owned; /* number of lock sets owned */ ledger_t ledger; @@ -319,15 +234,20 @@ struct task { void *bsd_info; #endif struct vm_shared_region *shared_region; - uint32_t taskFeatures[2]; /* Special feature for this task */ -#define tf64BitAddr 0x80000000 /* Task has 64-bit addressing */ -#define tf64BitData 0x40000000 /* Task has 64-bit data registers */ + uint32_t t_flags; /* general-purpose task flags protected by task_lock (TL) */ +#define TF_64B_ADDR 0x1 /* task has 64-bit addressing */ +#define TF_64B_DATA 0x2 /* task has 64-bit data registers */ +#define TF_CPUMON_WARNING 0x4 /* task has at least one thread in CPU usage warning zone */ +#define TF_WAKEMON_WARNING 0x8 /* task is in wakeups monitor warning zone */ +#define TF_TELEMETRY (TF_CPUMON_WARNING | TF_WAKEMON_WARNING) /* task is a telemetry participant */ #define task_has_64BitAddr(task) \ - (((task)->taskFeatures[0] & tf64BitAddr) != 0) + (((task)->t_flags & TF_64B_ADDR) != 0) #define task_set_64BitAddr(task) \ - ((task)->taskFeatures[0] |= tf64BitAddr) + ((task)->t_flags |= TF_64B_ADDR) #define task_clear_64BitAddr(task) \ - ((task)->taskFeatures[0] &= ~tf64BitAddr) + ((task)->t_flags &= ~TF_64B_ADDR) +#define task_has_64BitData(task) \ + (((task)->t_flags & TF_64B_DATA) != 0) mach_vm_address_t all_image_info_addr; /* dyld __all_image_info */ mach_vm_size_t all_image_info_size; /* section location and size */ @@ -342,10 +262,11 @@ struct task { boolean_t pidsuspended; /* pid_suspend called; no threads can execute */ boolean_t frozen; /* frozen; private resident pages committed to swap */ - process_policy_t ext_appliedstate; /* externally applied actions */ - process_policy_t ext_policystate; /* externally defined process policy states*/ - process_policy_t appliedstate; /* self applied acions */ - process_policy_t policystate; /* process wide policy states */ + boolean_t changing_freeze_state; /* in the process of freezing or thawing */ + uint16_t policy_ru_cpu :4, + policy_ru_cpu_ext :4, + applied_ru_cpu :4, + applied_ru_cpu_ext :4; uint8_t rusage_cpu_flags; uint8_t rusage_cpu_percentage; /* Task-wide CPU limit percentage */ uint64_t rusage_cpu_interval; /* Task-wide CPU limit interval */ @@ -353,24 +274,47 @@ struct task { uint64_t rusage_cpu_perthr_interval; /* Per-thread CPU limit interval */ uint64_t rusage_cpu_deadline; thread_call_t rusage_cpu_callt; -#if CONFIG_EMBEDDED - uint32_t appstate; /* the current appstate */ - queue_head_t task_watchers; /* app state watcher threads */ - int num_taskwatchers; - int watchapplying; -#endif /* CONFIG_EMBEDDED */ + +#if IMPORTANCE_INHERITANCE + uint32_t imp_receiver :1, /* the task can receive importance boost */ + imp_donor :1, /* the task always sends boosts regardless of boost status */ + imp_reserved :30; /* reserved for future use */ + + uint32_t task_imp_assertcnt; /* total number of boost assertions (kernel managed plus userspace managed) */ + uint32_t task_imp_externcnt; /* number of boost assertions externalized (userspace managed) */ +#endif /* IMPORTANCE_INHERITANCE */ vm_extmod_statistics_data_t extmod_statistics; - natural_t proc_terminate; /* the process is marked for proc_terminate */ + +#if MACH_ASSERT + int8_t suspends_outstanding; /* suspends this task performed in excess of resumes */ +#endif + + struct task_requested_policy requested_policy; + struct task_effective_policy effective_policy; + struct task_pended_policy pended_policy; + + /* + * Can be merged with imp_donor bits, once the IMPORTANCE_INHERITANCE macro goes away. + */ + uint32_t low_mem_notified_warn :1, /* warning low memory notification is sent to the task */ + low_mem_notified_critical :1, /* critical low memory notification is sent to the task */ + purged_memory_warn :1, /* purgeable memory of the task is purged for warning level pressure */ + purged_memory_critical :1, /* purgeable memory of the task is purged for critical level pressure */ + mem_notify_reserved :28; /* reserved for future use */ /* Statistics accumulated for terminated threads from this task */ uint32_t task_timer_wakeups_bin_1; uint32_t task_timer_wakeups_bin_2; + + int task_volatile_objects; /* # of purgeable VM objects made + * "volatile" by this task */ }; -#define task_lock(task) lck_mtx_lock(&(task)->lock) -#define task_lock_try(task) lck_mtx_try_lock(&(task)->lock) -#define task_unlock(task) lck_mtx_unlock(&(task)->lock) +#define task_lock(task) lck_mtx_lock(&(task)->lock) +#define task_lock_assert_owned(task) lck_mtx_assert(&(task)->lock, LCK_MTX_ASSERT_OWNED) +#define task_lock_try(task) lck_mtx_try_lock(&(task)->lock) +#define task_unlock(task) lck_mtx_unlock(&(task)->lock) #if CONFIG_MACF_MACH #define maclabel label->lh_label @@ -387,11 +331,18 @@ extern void tasklabel_unlock2(task_t a, task_t b); #define itk_lock(task) lck_mtx_lock(&(task)->itk_lock_data) #define itk_unlock(task) lck_mtx_unlock(&(task)->itk_lock_data) +#define TASK_REFERENCE_LEAK_DEBUG 0 + +#if TASK_REFERENCE_LEAK_DEBUG +extern void task_reference_internal(task_t task); +extern uint32_t task_deallocate_internal(task_t task); +#else #define task_reference_internal(task) \ (void)hw_atomic_add(&(task)->ref_count, 1) #define task_deallocate_internal(task) \ hw_atomic_sub(&(task)->ref_count, 1) +#endif #define task_reference(task) \ MACRO_BEGIN \ @@ -406,7 +357,7 @@ extern kern_return_t kernel_task_create( task_t *child); /* Initialize task module */ -extern void task_init(void) __attribute__((section("__TEXT, initcode"))); +extern void task_init(void); #define current_task_fast() (current_thread()->task) #define current_task() current_task_fast() @@ -493,6 +444,10 @@ extern kern_return_t task_importance( task_t task, integer_t importance); +extern void task_power_info_locked( + task_t task, + task_power_info_t info); + extern void task_vtimer_set( task_t task, integer_t which); @@ -534,6 +489,11 @@ extern vm_map_t get_task_map_reference(task_t); extern vm_map_t swap_task_map(task_t, thread_t, vm_map_t, boolean_t); extern pmap_t get_task_pmap(task_t); extern uint64_t get_task_resident_size(task_t); +extern uint64_t get_task_phys_footprint(task_t); +extern uint64_t get_task_phys_footprint_max(task_t); + +extern kern_return_t task_set_phys_footprint_limit_internal(task_t, int, int *, boolean_t); +extern kern_return_t task_get_phys_footprint_limit(task_t task, int *limit_mb); extern boolean_t is_kerneltask(task_t task); @@ -559,106 +519,166 @@ struct _task_ledger_indices { int tkm_shared; int phys_mem; int wired_mem; + int iokit_mem; + int phys_footprint; + int phys_compressed; int platform_idle_wakeups; int interrupt_wakeups; }; extern struct _task_ledger_indices task_ledgers; -int proc_get_task_bg_policy(task_t task); -int proc_get_thread_bg_policy(task_t task, uint64_t tid); -int proc_get_self_isbackground(void); -int proc_get_selfthread_isbackground(void); - -int proc_get_darwinbgstate(task_t, uint32_t *); -int proc_set_bgtaskpolicy(task_t task, int intval); -int proc_set_and_apply_bgtaskpolicy(task_t task, int intval); -int proc_set_bgthreadpolicy(task_t task, uint64_t tid, int val); -int proc_set_and_apply_bgthreadpolicy(task_t task, uint64_t tid, int val); - -int proc_add_bgtaskpolicy(task_t task, int val); -int proc_add_bgthreadpolicy(task_t task, uint64_t tid, int val); -int proc_remove_bgtaskpolicy(task_t task, int policy); -int proc_remove_bgthreadpolicy(task_t task, uint64_t tid, int val); - -int proc_apply_bgtaskpolicy(task_t task); -int proc_apply_bgtaskpolicy_external(task_t task); -int proc_apply_bgthreadpolicy(task_t task, uint64_t tid); -int proc_apply_bgtask_selfpolicy(void); -int proc_apply_bgthread_selfpolicy(void); -int proc_apply_workq_bgthreadpolicy(thread_t); - -int proc_restore_bgtaskpolicy(task_t task); -int proc_restore_bgthreadpolicy(task_t task, uint64_t tid); -int proc_restore_bgthread_selfpolicy(void); -int proc_restore_workq_bgthreadpolicy(thread_t); -void proc_task_remove_throttle(task_t task); - -/* hw access routines */ -int proc_apply_task_diskacc(task_t task, int policy); -int proc_apply_thread_diskacc(task_t task, uint64_t tid, int policy); -int proc_apply_thread_selfdiskacc(int policy); -int proc_get_task_disacc(task_t task); -int proc_get_task_selfdiskacc(void); -int proc_get_diskacc(thread_t thread); -int proc_get_thread_selfdiskacc(void); -int proc_denyinherit_policy(task_t task); -int proc_denyselfset_policy(task_t task); - -int proc_get_task_selfgpuacc_deny(void); -int proc_apply_task_gpuacc(task_t task, int prio); - -int proc_get_task_ruse_cpu(task_t task, uint32_t * policyp, uint32_t * percentagep, uint64_t * intervalp, uint64_t * deadlinep); -int proc_set_task_ruse_cpu(task_t task, uint32_t policy, uint32_t percentage, uint64_t interval, uint64_t deadline); -int proc_clear_task_ruse_cpu(task_t task); -thread_t task_findtid(task_t, uint64_t); +/* Begin task_policy */ -#define TASK_RUSECPU_FLAGS_PROC_LIMIT 0x1 -#define TASK_RUSECPU_FLAGS_PERTHR_LIMIT 0x2 -#define TASK_RUSECPU_FLAGS_DEADLINE 0x4 +/* value */ +#define TASK_POLICY_DISABLE 0x0 +#define TASK_POLICY_ENABLE 0x1 -#define PROC_POLICY_OSX_APPTYPE_NONE 0 -#if CONFIG_EMBEDDED -#define PROC_POLICY_IOS_RESV1_APPTYPE 1 -#define PROC_POLICY_IOS_APPLE_DAEMON 2 -#define PROC_POLICY_IOS_APPTYPE 3 -#define PROC_POLICY_IOS_NONUITYPE 4 -#else -#define PROC_POLICY_OSX_APPTYPE_TAL 1 -#define PROC_POLICY_OSX_APPTYPE_WIDGET 2 -#define PROC_POLICY_OSX_APPTYPE_DBCLIENT 2 /* Not a bug, just rename of widget */ -#endif +/* category */ +#define TASK_POLICY_INTERNAL 0x0 +#define TASK_POLICY_EXTERNAL 0x1 +#define TASK_POLICY_ATTRIBUTE 0x2 -void proc_set_task_apptype(task_t task, int type, thread_t thread); -int proc_disable_task_apptype(task_t task, int policy_subtype); -int proc_enable_task_apptype(task_t task, int policy_subtype); +/* for tracing */ +#define TASK_POLICY_TASK 0x4 +#define TASK_POLICY_THREAD 0x8 -#if CONFIG_EMBEDDED -extern int proc_setthread_saved_importance(thread_t thread, int importance); -#endif +/* flavors (also DBG_IMPORTANCE subclasses 0x20 - 0x3F) */ + +/* internal or external, thread or task */ +#define TASK_POLICY_DARWIN_BG 0x21 +#define TASK_POLICY_IOPOL 0x22 +#define TASK_POLICY_IO 0x23 +#define TASK_POLICY_PASSIVE_IO 0x24 + +/* internal or external, task only */ +#define TASK_POLICY_DARWIN_BG_AND_GPU 0x25 +#define TASK_POLICY_GPU_DENY 0x26 + +/* internal, task only */ +#define TASK_POLICY_DARWIN_BG_IOPOL 0x27 + +/* task-only attributes */ +#define TASK_POLICY_TAL 0x28 +#define TASK_POLICY_BOOST 0x29 +#define TASK_POLICY_ROLE 0x2A +#define TASK_POLICY_SUPPRESSED_CPU 0x2B +#define TASK_POLICY_TERMINATED 0x2C +#define TASK_POLICY_NEW_SOCKETS_BG 0x2D +#define TASK_POLICY_LOWPRI_CPU 0x2E +#define TASK_POLICY_LATENCY_QOS 0x2F +#define TASK_POLICY_THROUGH_QOS 0x30 +#define TASK_POLICY_WATCHERS_BG 0x31 + +/* thread-only attributes */ +#define TASK_POLICY_PIDBIND_BG 0x32 +#define TASK_POLICY_WORKQ_BG 0x33 + +#define TASK_POLICY_MAX 0x3F + +/* The main entrance to task policy is this function */ +extern void proc_set_task_policy(task_t task, thread_t thread, int category, int flavor, int value); +extern int proc_get_task_policy(task_t task, thread_t thread, int category, int flavor); + +/* For use by kernel threads and others who don't hold a reference on the target thread */ +extern void proc_set_task_policy_thread(task_t task, uint64_t tid, int category, int flavor, int value); + +extern void proc_set_task_apptype(task_t task, int type); + +/* IO Throttle tiers */ +#define THROTTLE_LEVEL_NONE -1 +#define THROTTLE_LEVEL_TIER0 0 /* IOPOL_NORMAL, IOPOL_DEFAULT, IOPOL_PASSIVE */ + +#define THROTTLE_LEVEL_THROTTLED 1 +#define THROTTLE_LEVEL_TIER1 1 /* IOPOL_STANDARD */ +#define THROTTLE_LEVEL_TIER2 2 /* IOPOL_UTILITY */ +#define THROTTLE_LEVEL_TIER3 3 /* IOPOL_THROTTLE */ + +#define THROTTLE_LEVEL_START 0 +#define THROTTLE_LEVEL_END 3 + +#define THROTTLE_LEVEL_COMPRESSOR_TIER0 THROTTLE_LEVEL_TIER0 +#define THROTTLE_LEVEL_COMPRESSOR_TIER1 THROTTLE_LEVEL_TIER1 +#define THROTTLE_LEVEL_COMPRESSOR_TIER2 THROTTLE_LEVEL_TIER2 + +#define THROTTLE_LEVEL_PAGEOUT_THROTTLED THROTTLE_LEVEL_TIER2 +#define THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED THROTTLE_LEVEL_TIER1 + +extern int proc_apply_workq_bgthreadpolicy(thread_t thread); +extern int proc_restore_workq_bgthreadpolicy(thread_t thread); + +extern int proc_get_darwinbgstate(task_t task, uint32_t *flagsp); +extern boolean_t proc_task_is_tal(task_t task); +extern integer_t task_grab_latency_qos(task_t task); +extern void task_policy_create(task_t task, int parent_boosted); + +/* + * Get effective policy + * Only for use by relevant subsystem, should never be passed into a setter! + */ + +extern int proc_get_effective_task_policy(task_t task, int flavor); +extern int proc_get_effective_thread_policy(thread_t thread, int flavor); + +/* temporary compatibility */ +int proc_setthread_saved_importance(thread_t thread, int importance); + +int proc_get_task_ruse_cpu(task_t task, uint32_t *policyp, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep); +int proc_set_task_ruse_cpu(task_t task, uint32_t policy, uint8_t percentage, uint64_t interval, uint64_t deadline, int cpumon_entitled); +int proc_clear_task_ruse_cpu(task_t task, int cpumon_entitled); +thread_t task_findtid(task_t, uint64_t); +void set_thread_iotier_override(thread_t, int policy); + +#define TASK_RUSECPU_FLAGS_PROC_LIMIT 0x01 +#define TASK_RUSECPU_FLAGS_PERTHR_LIMIT 0x02 +#define TASK_RUSECPU_FLAGS_DEADLINE 0x04 +#define TASK_RUSECPU_FLAGS_FATAL_CPUMON 0x08 /* CPU usage monitor violations are fatal */ +#define TASK_RUSECPU_FLAGS_FATAL_WAKEUPSMON 0x10 /* wakeups monitor violations are fatal */ +#define TASK_RUSECPU_FLAGS_PHYS_FOOTPRINT_EXCEPTION 0x20 /* exceeding physical footprint generates EXC_RESOURCE */ /* BSD call back functions */ extern int proc_apply_resource_actions(void * p, int type, int action); extern int proc_restore_resource_actions(void * p, int type, int action); extern int task_restore_resource_actions(task_t task, int type); -extern void proc_apply_task_networkbg(void * bsd_info); -extern void proc_restore_task_networkbg(void * bsd_info); -extern void proc_set_task_networkbg(void * bsd_info, int setbg); -extern int task_clear_cpuusage(task_t task); - -#if CONFIG_EMBEDDED -#define TASK_APPSTATE_NONE 0 -#define TASK_APPSTATE_ACTIVE 1 -#define TASK_APPSTATE_BACKGROUND 2 -#define TASK_APPSTATE_NONUI 3 -#define TASK_APPSTATE_INACTIVE 4 - -extern int proc_lf_getappstate(task_t task); -extern int proc_lf_setappstate(task_t task, int state); -extern int proc_lf_pidbind(task_t curtask, uint64_t tid, task_t target_task, int bind); -extern void thead_remove_taskwatch(thread_t thread); -extern void task_removewatchers(task_t task); -#endif /* CONFIG_EMBEDDED */ +extern int task_clear_cpuusage(task_t task, int cpumon_entitled); + +extern kern_return_t task_wakeups_monitor_ctl(task_t task, uint32_t *rate_hz, int32_t *flags); +extern kern_return_t task_cpu_usage_monitor_ctl(task_t task, uint32_t *flags); + + +void task_importance_mark_donor(task_t task, boolean_t donating); + +#if IMPORTANCE_INHERITANCE +extern boolean_t task_is_importance_donor(task_t task); +extern boolean_t task_is_importance_receiver(task_t task); + +extern int task_importance_hold_internal_assertion(task_t target_task, uint32_t count); +extern int task_importance_drop_internal_assertion(task_t target_task, uint32_t count); + +extern int task_importance_hold_external_assertion(task_t target_task, uint32_t count); +extern int task_importance_drop_external_assertion(task_t target_task, uint32_t count); + +extern int task_importance_externalize_assertion(task_t target_task, uint32_t count, int sender_pid); +#endif /* IMPORTANCE_INHERITANCE */ + +extern void task_hold_multiple_assertion(task_t task, uint32_t count); +extern void task_add_importance_watchport(task_t task, int pid, mach_port_t port, int *boostp); + +extern boolean_t task_has_been_notified(task_t task, int pressurelevel); +extern boolean_t task_used_for_purging(task_t task, int pressurelevel); +extern void task_mark_has_been_notified(task_t task, int pressurelevel); +extern void task_mark_used_for_purging(task_t task, int pressurelevel); +extern void task_clear_has_been_notified(task_t task, int pressurelevel); +extern void task_clear_used_for_purging(task_t task); +extern int task_importance_estimate(task_t task); + +/* + * This should only be used for debugging. + * pid is stored in audit_token by set_security_token(). + */ +#define audit_token_pid_from_task(task) ((task)->audit_token.val[5]) + +/* End task_policy */ #endif /* XNU_KERNEL_PRIVATE */ @@ -674,9 +694,13 @@ extern boolean_t get_task_frozen(task_t); /* Convert from a task to a port */ extern ipc_port_t convert_task_to_port(task_t); - -/* Convert from a task name to a port */ extern ipc_port_t convert_task_name_to_port(task_name_t); +extern ipc_port_t convert_task_suspension_token_to_port(task_suspension_token_t task); + +/* Convert from a port (in this case, an SO right to a task's resume port) to a task. */ +extern task_suspension_token_t convert_port_to_task_suspension_token(ipc_port_t port); + +extern boolean_t task_suspension_notify(mach_msg_header_t *); #endif /* KERNEL_PRIVATE */ @@ -687,6 +711,9 @@ extern void task_deallocate( extern void task_name_deallocate( task_name_t task_name); + +extern void task_suspension_token_deallocate( + task_suspension_token_t token); __END_DECLS #endif /* _KERN_TASK_H_ */ diff --git a/osfmk/kern/task_policy.c b/osfmk/kern/task_policy.c index 2cbb2daa2..88aeafa46 100644 --- a/osfmk/kern/task_policy.c +++ b/osfmk/kern/task_policy.c @@ -37,124 +37,221 @@ #include #include #include -#if CONFIG_EMBEDDED -#include -#include -#endif /* CONFIG_EMBEDDED */ +#if CONFIG_TELEMETRY +#include +#endif + +#if IMPORTANCE_DEBUG +#include +#endif /* IMPORTANCE_DEBUG */ + #include -#if CONFIG_MEMORYSTATUS -extern void memorystatus_on_suspend(int pid); -extern void memorystatus_on_resume(int pid); -#endif +/* + * Task Policy + * + * This subsystem manages task and thread IO priority and backgrounding, + * as well as importance inheritance, process suppression, task QoS, and apptype. + * These properties have a suprising number of complex interactions, so they are + * centralized here in one state machine to simplify the implementation of those interactions. + * + * Architecture: + * Threads and tasks have three policy fields: requested, effective, and pending. + * Requested represents the wishes of each interface that influences task policy. + * Effective represents the distillation of that policy into a set of behaviors. + * Pending represents updates that haven't been applied yet. + * + * Each interface that has an input into the task policy state machine controls a field in requested. + * If the interface has a getter, it returns what is in the field in requested, but that is + * not necessarily what is actually in effect. + * + * All kernel subsystems that behave differently based on task policy call into + * the get_effective_policy function, which returns the decision of the task policy state machine + * for that subsystem by querying only the 'effective' field. + * + * Policy change operations: + * Here are the steps to change a policy on a task or thread: + * 1) Lock task + * 2) Change requested field for the relevant policy + * 3) Run a task policy update, which recalculates effective based on requested, + * then takes a diff between the old and new versions of requested and calls the relevant + * other subsystems to apply these changes, and updates the pending field. + * 4) Unlock task + * 5) Run task policy update complete, which looks at the pending field to update + * subsystems which cannot be touched while holding the task lock. + * + * To add a new requested policy, add the field in the requested struct, the flavor in task.h, + * the setter and getter in proc_(set|get)_task_policy*, and dump the state in task_requested_bitfield, + * then set up the effects of that behavior in task_policy_update*. + * + * Most policies are set via proc_set_task_policy, but policies that don't fit that interface + * roll their own lock/set/update/unlock/complete code inside this file. + * + * + * Suppression policy + * + * These are a set of behaviors that can be requested for a task. They currently have specific + * implied actions when they're enabled, but they may be made customizable in the future. + * + * When the affected task is boosted, we temporarily disable the suppression behaviors + * so that the affected process has a chance to run so it can call the API to permanently + * disable the suppression behaviors. + * + * Locking + * + * Changing task policy on a task or thread takes the task lock, and not the thread lock. + * TODO: Should changing policy on a thread take the thread lock instead? + * + * Querying the effective policy does not take the task lock, to prevent deadlocks or slowdown in sensitive code. + * This means that any notification of state change needs to be externally synchronized. + * + */ + +/* for task holds without dropping the lock */ +extern void task_hold_locked(task_t task); +extern void task_release_locked(task_t task); +extern void task_wait_locked(task_t task, boolean_t until_not_runnable); + +/* Task policy related helper functions */ +static void proc_set_task_policy_locked(task_t task, thread_t thread, int category, int flavor, int value); -static int proc_apply_bgtaskpolicy_internal(task_t, int, int); -static int proc_restore_bgtaskpolicy_internal(task_t, int, int, int); -static int task_get_cpuusage(task_t task, uint32_t * percentagep, uint64_t * intervalp, uint64_t * deadlinep); -int task_set_cpuusage(task_t task, uint64_t percentage, uint64_t interval, uint64_t deadline, int scope); -static int task_clear_cpuusage_locked(task_t task); +static void task_policy_update_locked(task_t task, thread_t thread); +static void task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_create); +static void task_policy_update_task_locked(task_t task, boolean_t update_throttle, boolean_t update_bg_throttle); +static void task_policy_update_thread_locked(thread_t thread, int update_cpu, boolean_t update_throttle); + +static void task_policy_update_complete_unlocked(task_t task, thread_t thread); + +static int proc_get_effective_policy(task_t task, thread_t thread, int policy); + +static void proc_iopol_to_tier(int iopolicy, int *tier, int *passive); +static int proc_tier_to_iopol(int tier, int passive); + +static uintptr_t trequested(task_t task, thread_t thread); +static uintptr_t teffective(task_t task, thread_t thread); +static uintptr_t tpending(task_t task, thread_t thread); +static uint64_t task_requested_bitfield(task_t task, thread_t thread); +static uint64_t task_effective_bitfield(task_t task, thread_t thread); +static uint64_t task_pending_bitfield(task_t task, thread_t thread); + +void proc_get_thread_policy(thread_t thread, thread_policy_state_t info); + +/* CPU Limits related helper functions */ +static int task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope); +int task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int entitled); +static int task_clear_cpuusage_locked(task_t task, int cpumon_entitled); +int task_disable_cpumon(task_t task); static int task_apply_resource_actions(task_t task, int type); -static void task_priority(task_t task, integer_t priority, integer_t max_priority); -static kern_return_t task_role_default_handler(task_t task, task_role_t role); void task_action_cpuusage(thread_call_param_t param0, thread_call_param_t param1); -static int proc_apply_bgthreadpolicy_locked(thread_t thread, int selfset); -static void restore_bgthreadpolicy_locked(thread_t thread, int selfset, int importance); -static int proc_get_task_selfdiskacc_internal(task_t task, thread_t thread); -extern void unthrottle_thread(void * uthread); - -#if CONFIG_EMBEDDED -static void set_thread_appbg(thread_t thread, int setbg,int importance); -static void apply_bgthreadpolicy_external(thread_t thread); -static void add_taskwatch_locked(task_t task, task_watch_t * twp); -static void remove_taskwatch_locked(task_t task, task_watch_t * twp); -static void task_watch_lock(void); -static void task_watch_unlock(void); -static void apply_appstate_watchers(task_t task, int setbg); -void proc_apply_task_networkbg_internal(void *, thread_t); -void proc_restore_task_networkbg_internal(void *, thread_t); -int proc_pid(void * proc); - -typedef struct thread_watchlist { - thread_t thread; /* thread being worked on for taskwatch action */ - int importance; /* importance to be restored if thread is being made active */ -} thread_watchlist_t; - -#endif /* CONFIG_EMBEDDED */ - - -process_policy_t default_task_proc_policy = {0, - 0, - TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, - TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, - TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, - TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, - TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, - TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, - TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, - 0, - TASK_POLICY_HWACCESS_CPU_ATTRIBUTE_FULLACCESS, - TASK_POLICY_HWACCESS_NET_ATTRIBUTE_FULLACCESS, - TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_FULLACCESS, - TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL, - TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL - }; - -process_policy_t default_task_null_policy = {0, - 0, - TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, - TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, - TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, - TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, - TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, - TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, - TASK_POLICY_RESOURCE_ATTRIBUTE_NONE, - 0, - TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NONE, - TASK_POLICY_HWACCESS_NET_ATTRIBUTE_NONE, - TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NONE, - TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL, - TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE - }; - +void proc_init_cpumon_params(void); + +#ifdef MACH_BSD +int proc_pid(void *proc); +extern int proc_selfpid(void); +extern char * proc_name_address(void *p); +extern void rethrottle_thread(void * uthread); +extern void proc_apply_task_networkbg(void * bsd_info, thread_t thread, int bg); +#endif /* MACH_BSD */ +/* Importance Inheritance related helper functions */ + +void task_importance_mark_receiver(task_t task, boolean_t receiving); + +#if IMPORTANCE_INHERITANCE +static void task_update_boost_locked(task_t task, boolean_t boost_active); + +static int task_importance_hold_assertion_locked(task_t target_task, int external, uint32_t count); +static int task_importance_drop_assertion_locked(task_t target_task, int external, uint32_t count); +#endif /* IMPORTANCE_INHERITANCE */ + +#if IMPORTANCE_DEBUG +#define __impdebug_only +#else +#define __impdebug_only __unused +#endif + +#if IMPORTANCE_INHERITANCE +#define __imp_only +#else +#define __imp_only __unused +#endif + +#define TASK_LOCKED 1 +#define TASK_UNLOCKED 0 + +#define DO_LOWPRI_CPU 1 +#define UNDO_LOWPRI_CPU 2 + +/* Macros for making tracing simpler */ + +#define tpriority(task, thread) ((uintptr_t)(thread == THREAD_NULL ? (task->priority) : (thread->priority))) +#define tisthread(thread) (thread == THREAD_NULL ? TASK_POLICY_TASK : TASK_POLICY_THREAD) +#define targetid(task, thread) ((uintptr_t)(thread == THREAD_NULL ? (audit_token_pid_from_task(task)) : (thread->thread_id))) + +/* + * Default parameters for certain policies + */ + +int proc_standard_daemon_tier = THROTTLE_LEVEL_TIER1; +int proc_suppressed_disk_tier = THROTTLE_LEVEL_TIER1; +int proc_tal_disk_tier = THROTTLE_LEVEL_TIER1; + +int proc_graphics_timer_qos = (LATENCY_QOS_TIER_0 & 0xFF); + +const int proc_default_bg_iotier = THROTTLE_LEVEL_TIER2; + + +const struct task_requested_policy default_task_requested_policy = { + .bg_iotier = proc_default_bg_iotier +}; +const struct task_effective_policy default_task_effective_policy = {}; +const struct task_pended_policy default_task_pended_policy = {}; + /* - * This routine should always be called with the task lock held. - * This routine handles Default operations for TASK_FOREGROUND_APPLICATION - * and TASK_BACKGROUND_APPLICATION of task with no special app type. + * Default parameters for CPU usage monitor. + * + * Default setting is 50% over 3 minutes. */ +#define DEFAULT_CPUMON_PERCENTAGE 50 +#define DEFAULT_CPUMON_INTERVAL (3 * 60) + +uint8_t proc_max_cpumon_percentage; +uint64_t proc_max_cpumon_interval; + static kern_return_t -task_role_default_handler(task_t task, task_role_t role) -{ - kern_return_t result = KERN_SUCCESS; - - switch (task->role) { - case TASK_FOREGROUND_APPLICATION: - case TASK_BACKGROUND_APPLICATION: - case TASK_UNSPECIFIED: - /* if there are no process wide backgrounding ... */ - if ((task->ext_appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) && - (task->appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE)) { - task_priority(task, - ((role == TASK_FOREGROUND_APPLICATION)? - BASEPRI_FOREGROUND: BASEPRI_BACKGROUND), - task->max_priority); - } - task->role = role; - break; +task_qos_policy_validate(task_qos_policy_t qosinfo, mach_msg_type_number_t count) { + if (count < TASK_QOS_POLICY_COUNT) + return KERN_INVALID_ARGUMENT; - case TASK_CONTROL_APPLICATION: - case TASK_RENICED: - /* else fail silently */ - break; + task_latency_qos_t ltier = qosinfo->task_latency_qos_tier; + task_throughput_qos_t ttier = qosinfo->task_throughput_qos_tier; - default: - result = KERN_INVALID_ARGUMENT; - break; - } - return(result); + if ((ltier != LATENCY_QOS_TIER_UNSPECIFIED) && + ((ltier > LATENCY_QOS_TIER_5) || (ltier < LATENCY_QOS_TIER_0))) + return KERN_INVALID_ARGUMENT; + + if ((ttier != THROUGHPUT_QOS_TIER_UNSPECIFIED) && + ((ttier > THROUGHPUT_QOS_TIER_5) || (ttier < THROUGHPUT_QOS_TIER_0))) + return KERN_INVALID_ARGUMENT; + + return KERN_SUCCESS; +} + +static uint32_t +task_qos_extract(uint32_t qv) { + return (qv & 0xFF); +} + +static uint32_t +task_qos_latency_package(uint32_t qv) { + return (qv == LATENCY_QOS_TIER_UNSPECIFIED) ? LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | qv); } +static uint32_t +task_qos_throughput_package(uint32_t qv) { + return (qv == THROUGHPUT_QOS_TIER_UNSPECIFIED) ? THROUGHPUT_QOS_TIER_UNSPECIFIED : ((0xFE << 16) | qv); +} kern_return_t task_policy_set( @@ -164,161 +261,173 @@ task_policy_set( mach_msg_type_number_t count) { kern_return_t result = KERN_SUCCESS; - void * bsdinfo = NULL; - int setbg = 0; if (task == TASK_NULL || task == kernel_task) return (KERN_INVALID_ARGUMENT); switch (flavor) { - case TASK_CATEGORY_POLICY: - { + case TASK_CATEGORY_POLICY: { task_category_policy_t info = (task_category_policy_t)policy_info; if (count < TASK_CATEGORY_POLICY_COUNT) return (KERN_INVALID_ARGUMENT); -#if CONFIG_EMBEDDED - if ((current_task() == task) && (info != NULL) && - (info->role != TASK_THROTTLE_APPLICATION)) - return (KERN_INVALID_ARGUMENT); -#endif - task_lock(task); switch(info->role) { - case TASK_FOREGROUND_APPLICATION : { - if (task->ext_appliedstate.apptype == PROC_POLICY_OSX_APPTYPE_NONE) { - result = task_role_default_handler(task, info->role); - } else { - switch (task->ext_appliedstate.apptype) { -#if !CONFIG_EMBEDDED - case PROC_POLICY_OSX_APPTYPE_TAL: - /* Move the app to foreground with no DarwinBG */ - proc_restore_bgtaskpolicy_internal(task, 1, 1, BASEPRI_FOREGROUND); - bsdinfo = task->bsd_info; - setbg = 0; - break; - - case PROC_POLICY_OSX_APPTYPE_DBCLIENT: - /* reset the apptype so enforcement on background/foregound */ - task->ext_appliedstate.apptype = PROC_POLICY_OSX_APPTYPE_NONE; - /* Internal application and make it foreground pri */ - proc_restore_bgtaskpolicy_internal(task, 1, 0, BASEPRI_FOREGROUND); - bsdinfo = task->bsd_info; - setbg = 0; - break; -#endif /* !CONFIG_EMBEDDED */ - - default: - /* the app types cannot be in CONTROL, GRAPHICS STATE, so it will de default state here */ - task_priority(task, BASEPRI_FOREGROUND, task->max_priority); - break; - - } /* switch (task->ext_appliedstate.apptype) */ - task->role = TASK_FOREGROUND_APPLICATION; - } - } - break; + case TASK_FOREGROUND_APPLICATION: + case TASK_BACKGROUND_APPLICATION: + case TASK_DEFAULT_APPLICATION: + proc_set_task_policy(task, THREAD_NULL, + TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE, + info->role); + break; - case TASK_BACKGROUND_APPLICATION : { - if (task->ext_appliedstate.apptype == PROC_POLICY_OSX_APPTYPE_NONE) { - result = task_role_default_handler(task, info->role); - } else { /* apptype != PROC_POLICY_OSX_APPTYPE_NONE */ - switch (task->ext_appliedstate.apptype) { -#if !CONFIG_EMBEDDED - case PROC_POLICY_OSX_APPTYPE_TAL: - /* TAL apps will get Darwin backgrounded if not already set */ - if (task->ext_appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) { - proc_apply_bgtaskpolicy_internal(task, 1, 1); - bsdinfo = task->bsd_info; - setbg = 1; - } - break; -#endif /* !CONFIG_EMBEDDED */ - default: - task_priority(task, BASEPRI_BACKGROUND, task->max_priority); - break; - } /* switch (task->ext_appliedstate.apptype) */ - task->role = TASK_BACKGROUND_APPLICATION; - } - } - break; + case TASK_CONTROL_APPLICATION: + if (task != current_task() || task->sec_token.val[0] != 0) + result = KERN_INVALID_ARGUMENT; + else + proc_set_task_policy(task, THREAD_NULL, + TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE, + info->role); + break; - case TASK_CONTROL_APPLICATION: - if (task != current_task()|| - task->sec_token.val[0] != 0) + case TASK_GRAPHICS_SERVER: + /* TODO: Restrict this role to FCFS */ + if (task != current_task() || task->sec_token.val[0] != 0) + result = KERN_INVALID_ARGUMENT; + else + proc_set_task_policy(task, THREAD_NULL, + TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE, + info->role); + break; + default: result = KERN_INVALID_ARGUMENT; - else { - task_priority(task, BASEPRI_CONTROL, task->max_priority); - task->role = info->role; - } - break; + break; + } /* switch (info->role) */ - case TASK_GRAPHICS_SERVER: - if (task != current_task() || - task->sec_token.val[0] != 0) - result = KERN_INVALID_ARGUMENT; - else { - task_priority(task, MAXPRI_RESERVED - 3, MAXPRI_RESERVED); - task->role = info->role; - } - break; - case TASK_DEFAULT_APPLICATION: - task_priority(task, BASEPRI_DEFAULT, MAXPRI_USER); - task->role = info->role; - break; + break; + } - default : - result = KERN_INVALID_ARGUMENT; - break; - } /* switch (info->role) */ +/* Desired energy-efficiency/performance "quality-of-service" */ + case TASK_BASE_QOS_POLICY: + { + task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info; + kern_return_t kr = task_qos_policy_validate(qosinfo, count); + + if (kr != KERN_SUCCESS) + return kr; + + task_lock(task); + + /* This uses the latency QoS tracepoint, even though we might be changing both */ + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(TASK_POLICY_LATENCY_QOS, (TASK_POLICY_ATTRIBUTE | TASK_POLICY_TASK))) | DBG_FUNC_START, + proc_selfpid(), targetid(task, THREAD_NULL), trequested(task, THREAD_NULL), 0, 0); + + task->requested_policy.t_base_latency_qos = task_qos_extract(qosinfo->task_latency_qos_tier); + task->requested_policy.t_base_through_qos = task_qos_extract(qosinfo->task_throughput_qos_tier); + + task_policy_update_locked(task, THREAD_NULL); task_unlock(task); - /* if backgrounding action ... */ - if (bsdinfo != NULL) - proc_set_task_networkbg(bsdinfo, setbg); + task_policy_update_complete_unlocked(task, THREAD_NULL); - break; + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(TASK_POLICY_LATENCY_QOS, (TASK_POLICY_ATTRIBUTE | TASK_POLICY_TASK))) | DBG_FUNC_END, + proc_selfpid(), targetid(task, THREAD_NULL), trequested(task, THREAD_NULL), 0, 0); } - - default: - result = KERN_INVALID_ARGUMENT; break; + + case TASK_OVERRIDE_QOS_POLICY: + { + task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info; + kern_return_t kr = task_qos_policy_validate(qosinfo, count); + + if (kr != KERN_SUCCESS) + return kr; + + task_lock(task); + + /* This uses the latency QoS tracepoint, even though we might be changing both */ + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(TASK_POLICY_LATENCY_QOS, (TASK_POLICY_ATTRIBUTE | TASK_POLICY_TASK))) | DBG_FUNC_START, + proc_selfpid(), targetid(task, THREAD_NULL), trequested(task, THREAD_NULL), 0, 0); + + task->requested_policy.t_over_latency_qos = task_qos_extract(qosinfo->task_latency_qos_tier); + task->requested_policy.t_over_through_qos = task_qos_extract(qosinfo->task_throughput_qos_tier); + + task_policy_update_locked(task, THREAD_NULL); + + task_unlock(task); + + task_policy_update_complete_unlocked(task, THREAD_NULL); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(TASK_POLICY_LATENCY_QOS, (TASK_POLICY_ATTRIBUTE | TASK_POLICY_TASK))) | DBG_FUNC_END, + proc_selfpid(), targetid(task, THREAD_NULL), trequested(task, THREAD_NULL), 0, 0); } + break; - return (result); -} + case TASK_SUPPRESSION_POLICY: + { -static void -task_priority( - task_t task, - integer_t priority, - integer_t max_priority) -{ - thread_t thread; + task_suppression_policy_t info = (task_suppression_policy_t)policy_info; - task->max_priority = max_priority; + if (count < TASK_SUPPRESSION_POLICY_COUNT) + return (KERN_INVALID_ARGUMENT); - if (priority > task->max_priority) - priority = task->max_priority; - else - if (priority < MINPRI) - priority = MINPRI; + struct task_qos_policy qosinfo; - task->priority = priority; + qosinfo.task_latency_qos_tier = info->timer_throttle; + qosinfo.task_throughput_qos_tier = info->throughput_qos; - queue_iterate(&task->threads, thread, thread_t, task_threads) { - thread_mtx_lock(thread); + kern_return_t kr = task_qos_policy_validate(&qosinfo, TASK_QOS_POLICY_COUNT); + + if (kr != KERN_SUCCESS) + return kr; + + task_lock(task); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_START, + proc_selfpid(), audit_token_pid_from_task(task), trequested(task, THREAD_NULL), + 0, 0); + + task->requested_policy.t_sup_active = (info->active) ? 1 : 0; + task->requested_policy.t_sup_lowpri_cpu = (info->lowpri_cpu) ? 1 : 0; + task->requested_policy.t_sup_timer = task_qos_extract(info->timer_throttle); + task->requested_policy.t_sup_disk = (info->disk_throttle) ? 1 : 0; + task->requested_policy.t_sup_cpu_limit = (info->cpu_limit) ? 1 : 0; + task->requested_policy.t_sup_suspend = (info->suspend) ? 1 : 0; + task->requested_policy.t_sup_throughput = task_qos_extract(info->throughput_qos); + task->requested_policy.t_sup_cpu = (info->suppressed_cpu) ? 1 : 0; + + task_policy_update_locked(task, THREAD_NULL); + + task_unlock(task); + + task_policy_update_complete_unlocked(task, THREAD_NULL); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_END, + proc_selfpid(), audit_token_pid_from_task(task), trequested(task, THREAD_NULL), + 0, 0); + + break; - if (thread->active) - thread_task_priority(thread, priority, max_priority); + } - thread_mtx_unlock(thread); + default: + result = KERN_INVALID_ARGUMENT; + break; } + + return (result); } +/* Sets BSD 'nice' value on the task */ kern_return_t task_importance( task_t task, @@ -335,20 +444,24 @@ task_importance( return (KERN_TERMINATED); } - if (task->role >= TASK_CONTROL_APPLICATION) { + if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) >= TASK_CONTROL_APPLICATION) { task_unlock(task); return (KERN_INVALID_ARGUMENT); } - task_priority(task, importance + BASEPRI_DEFAULT, task->max_priority); - task->role = TASK_RENICED; + task->importance = importance; + + /* TODO: tracepoint? */ + + /* Redrive only the task priority calculation */ + task_policy_update_task_locked(task, FALSE, FALSE); task_unlock(task); return (KERN_SUCCESS); } - + kern_return_t task_policy_get( task_t task, @@ -371,1019 +484,1511 @@ task_policy_get( if (*get_default) info->role = TASK_UNSPECIFIED; - else { + else + info->role = proc_get_task_policy(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE); + break; + } + + case TASK_BASE_QOS_POLICY: /* FALLTHRU */ + case TASK_OVERRIDE_QOS_POLICY: + { + task_qos_policy_t info = (task_qos_policy_t)policy_info; + + if (*count < TASK_QOS_POLICY_COUNT) + return (KERN_INVALID_ARGUMENT); + + if (*get_default) { + info->task_latency_qos_tier = LATENCY_QOS_TIER_UNSPECIFIED; + info->task_throughput_qos_tier = THROUGHPUT_QOS_TIER_UNSPECIFIED; + } else if (flavor == TASK_BASE_QOS_POLICY) { + task_lock(task); + + info->task_latency_qos_tier = task_qos_latency_package(task->requested_policy.t_base_latency_qos); + info->task_throughput_qos_tier = task_qos_throughput_package(task->requested_policy.t_base_through_qos); + + task_unlock(task); + } else if (flavor == TASK_OVERRIDE_QOS_POLICY) { task_lock(task); - info->role = task->role; + + info->task_latency_qos_tier = task_qos_latency_package(task->requested_policy.t_over_latency_qos); + info->task_throughput_qos_tier = task_qos_throughput_package(task->requested_policy.t_over_through_qos); + task_unlock(task); } + break; } - default: - return (KERN_INVALID_ARGUMENT); - } + case TASK_POLICY_STATE: + { + task_policy_state_t info = (task_policy_state_t)policy_info; - return (KERN_SUCCESS); -} + if (*count < TASK_POLICY_STATE_COUNT) + return (KERN_INVALID_ARGUMENT); -/* task Darwin BG enforcement/settings related routines */ -int -proc_get_task_bg_policy(task_t task) -{ + /* Only root can get this info */ + if (current_task()->sec_token.val[0] != 0) + return KERN_PROTECTION_FAILURE; - int selfset = 0; - int val = 0; + task_lock(task); - if (current_task() == task) - selfset = 1; + if (*get_default) { + info->requested = 0; + info->effective = 0; + info->pending = 0; + info->imp_assertcnt = 0; + info->imp_externcnt = 0; + info->flags = 0; + } else { + info->requested = task_requested_bitfield(task, THREAD_NULL); + info->effective = task_effective_bitfield(task, THREAD_NULL); + info->pending = task_pending_bitfield(task, THREAD_NULL); + info->imp_assertcnt = task->task_imp_assertcnt; + info->imp_externcnt = task->task_imp_externcnt; + + info->flags = 0; + info->flags |= (task->imp_receiver ? TASK_IMP_RECEIVER : 0); + info->flags |= (task->imp_donor ? TASK_IMP_DONOR : 0); + } - if (selfset == 0) { - val = task->ext_policystate.hw_bg; - } else { - val = task->policystate.hw_bg; - } + task_unlock(task); - return(val); -} + break; + } + case TASK_SUPPRESSION_POLICY: + { + task_suppression_policy_t info = (task_suppression_policy_t)policy_info; -int -proc_get_thread_bg_policy(task_t task, uint64_t tid) -{ - int selfset = 0; - thread_t self = current_thread(); - thread_t thread = THREAD_NULL; - int val = 0; + if (*count < TASK_SUPPRESSION_POLICY_COUNT) + return (KERN_INVALID_ARGUMENT); - if (tid == self->thread_id) - selfset = 1; - - if (selfset == 0) { task_lock(task); - thread = task_findtid(task, tid); - if (thread != NULL) - val = thread->ext_policystate.hw_bg; + + if (*get_default) { + info->active = 0; + info->lowpri_cpu = 0; + info->timer_throttle = LATENCY_QOS_TIER_UNSPECIFIED; + info->disk_throttle = 0; + info->cpu_limit = 0; + info->suspend = 0; + info->throughput_qos = 0; + info->suppressed_cpu = 0; + } else { + info->active = task->requested_policy.t_sup_active; + info->lowpri_cpu = task->requested_policy.t_sup_lowpri_cpu; + info->timer_throttle = task_qos_latency_package(task->requested_policy.t_sup_timer); + info->disk_throttle = task->requested_policy.t_sup_disk; + info->cpu_limit = task->requested_policy.t_sup_cpu_limit; + info->suspend = task->requested_policy.t_sup_suspend; + info->throughput_qos = task_qos_throughput_package(task->requested_policy.t_sup_throughput); + info->suppressed_cpu = task->requested_policy.t_sup_cpu; + } + task_unlock(task); - } else { - val = self->policystate.hw_bg; + break; + } + + default: + return (KERN_INVALID_ARGUMENT); } - return(val); + return (KERN_SUCCESS); } -int -proc_get_self_isbackground(void) +/* + * Called at task creation + * We calculate the correct effective but don't apply it to anything yet. + * The threads, etc will inherit from the task as they get created. + */ +void +task_policy_create(task_t task, int parent_boosted) { - task_t task = current_task();; - thread_t thread = current_thread(); + if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) { + if (parent_boosted) { + task->requested_policy.t_apptype = TASK_APPTYPE_DAEMON_INTERACTIVE; + task_importance_mark_donor(task, TRUE); + } else { + task->requested_policy.t_apptype = TASK_APPTYPE_DAEMON_BACKGROUND; + task_importance_mark_receiver(task, FALSE); + } + } - if ((task->ext_appliedstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) || - (task->appliedstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) || - (thread->ext_appliedstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) || - (thread->appliedstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE)) - return(1); - else - return(0); - + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_START, + proc_selfpid(), audit_token_pid_from_task(task), + teffective(task, THREAD_NULL), tpriority(task, THREAD_NULL), 0); + + task_policy_update_internal_locked(task, THREAD_NULL, TRUE); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_END, + proc_selfpid(), audit_token_pid_from_task(task), + teffective(task, THREAD_NULL), tpriority(task, THREAD_NULL), 0); } -int proc_get_selfthread_isbackground(void) +static void +task_policy_update_locked(task_t task, thread_t thread) { - thread_t thread = current_thread(); + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(IMP_UPDATE, tisthread(thread)) | DBG_FUNC_START), + proc_selfpid(), targetid(task, thread), + teffective(task, thread), tpriority(task, thread), 0); - if ((thread->ext_appliedstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) || - (thread->appliedstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE)) - return(1); - else - return(0); -} + task_policy_update_internal_locked(task, thread, FALSE); + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(IMP_UPDATE, tisthread(thread))) | DBG_FUNC_END, + proc_selfpid(), targetid(task, thread), + teffective(task, thread), tpriority(task, thread), 0); +} -int -proc_set_bgtaskpolicy(task_t task, int intval) +/* + * One state update function TO RULE THEM ALL + * + * This function updates the task or thread effective policy fields + * and pushes the results to the relevant subsystems. + * + * Must call update_complete after unlocking the task, + * as some subsystems cannot be updated while holding the task lock. + * + * Called with task locked, not thread + */ +static void +task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_create) { + boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE; + + /* + * Step 1: + * Gather requested policy + */ - int selfset = 0; + struct task_requested_policy requested = + (on_task) ? task->requested_policy : thread->requested_policy; + + /* + * Step 2: + * Calculate new effective policies from requested policy and task state + * Rules: + * If in an 'on_task' block, must only look at and set fields starting with t_ + * If operating on a task, don't touch anything starting with th_ + * If operating on a thread, don't touch anything starting with t_ + * Don't change requested, it won't take effect + */ - if (current_task() == task) - selfset = 1; + struct task_effective_policy next = {}; - task_lock(task); + /* Calculate DARWIN_BG */ + boolean_t wants_darwinbg = FALSE; + boolean_t wants_all_sockets_bg = FALSE; /* Do I want my existing sockets to be bg */ + boolean_t wants_watchersbg = FALSE; /* Do I want my pidbound threads to be bg */ + boolean_t wants_tal = FALSE; /* Do I want the effects of TAL mode */ + /* + * If DARWIN_BG has been requested at either level, it's engaged. + * Only true DARWIN_BG changes cause watchers to transition. + */ + if (requested.int_darwinbg || requested.ext_darwinbg) + wants_watchersbg = wants_all_sockets_bg = wants_darwinbg = TRUE; + + if (on_task) { + /* Background TAL apps are throttled when TAL is enabled */ + if (requested.t_apptype == TASK_APPTYPE_APP_TAL && + requested.t_role == TASK_BACKGROUND_APPLICATION && + requested.t_tal_enabled == 1) { + wants_tal = TRUE; + next.t_tal_engaged = 1; + } - if (selfset == 0) { - /* allready set? */ - if (task->ext_policystate.hw_bg != intval) - task->ext_policystate.hw_bg = intval; + /* Adaptive daemons are DARWIN_BG unless boosted, and don't get network throttled. */ + if (requested.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE && + requested.t_boosted == 0) + wants_darwinbg = TRUE; + + /* Background daemons are always DARWIN_BG, no exceptions, and don't get network throttled. */ + if (requested.t_apptype == TASK_APPTYPE_DAEMON_BACKGROUND) + wants_darwinbg = TRUE; } else { - if (task->policystate.hw_bg != intval) - task->policystate.hw_bg = intval; - } + if (requested.th_pidbind_bg) + wants_all_sockets_bg = wants_darwinbg = TRUE; - task_unlock(task); - return(0); -} + if (requested.th_workq_bg) + wants_darwinbg = TRUE; + } -/* set and apply as well , handles reset of NONUI due to setprio() task app state implmn side effect */ -int -proc_set_and_apply_bgtaskpolicy(task_t task, int prio) -{ - int error = 0; + /* Calculate side effects of DARWIN_BG */ - if (prio == PRIO_DARWIN_BG) { - error = proc_set_bgtaskpolicy(task, TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL); - if (error == 0) { - error = proc_apply_bgtaskpolicy(task); -#if CONFIG_EMBEDDED - /* XXX: till SB uses newer SPIs */ - apply_appstate_watchers(task, 1); -#endif /* CONFIG_EMBEDDED */ - } - } else { - error = proc_restore_bgtaskpolicy(task); - if (error == 0) { - /* since prior impl of non UI was overloaded with bg state, need to reset */ - error = proc_apply_task_gpuacc(task, TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_DEFAULT); -#if CONFIG_EMBEDDED - /* XXX: till SB uses newer SPIs */ - apply_appstate_watchers(task, 0); -#endif /* CONFIG_EMBEDDED */ - } - + if (wants_darwinbg) { + next.darwinbg = 1; + /* darwinbg threads/tasks always create bg sockets, but we don't always loop over all sockets */ + next.new_sockets_bg = 1; + next.lowpri_cpu = 1; } - return(error); -} + if (wants_all_sockets_bg) + next.all_sockets_bg = 1; + if (on_task && wants_watchersbg) + next.t_watchers_bg = 1; -int -proc_set_bgthreadpolicy(task_t task, uint64_t tid, int prio) -{ - int selfset = 0; - thread_t self = current_thread(); - thread_t thread = THREAD_NULL; - int reset; + /* Calculate low CPU priority */ - if (prio == 0) - reset = 1; - if (tid == self->thread_id) - selfset = 1; + boolean_t wants_lowpri_cpu = FALSE; - task_lock(task); - if (selfset == 0) { - thread = task_findtid(task, tid); - if (thread != NULL) - thread->ext_policystate.hw_bg = prio; - } else { - self->policystate.hw_bg = prio; + if (wants_darwinbg || wants_tal) + wants_lowpri_cpu = TRUE; + + if (on_task && requested.t_sup_lowpri_cpu && requested.t_boosted == 0) + wants_lowpri_cpu = TRUE; + + if (wants_lowpri_cpu) + next.lowpri_cpu = 1; + + /* Calculate IO policy */ + + /* Update BG IO policy (so we can see if it has changed) */ + next.bg_iotier = requested.bg_iotier; + + int iopol = THROTTLE_LEVEL_TIER0; + + if (wants_darwinbg) + iopol = MAX(iopol, requested.bg_iotier); + + if (on_task) { + if (requested.t_apptype == TASK_APPTYPE_DAEMON_STANDARD) + iopol = MAX(iopol, proc_standard_daemon_tier); + + if (requested.t_sup_disk && requested.t_boosted == 0) + iopol = MAX(iopol, proc_suppressed_disk_tier); + + if (wants_tal) + iopol = MAX(iopol, proc_tal_disk_tier); } - - task_unlock(task); - return(0); -} + iopol = MAX(iopol, requested.int_iotier); + iopol = MAX(iopol, requested.ext_iotier); -int -proc_set_and_apply_bgthreadpolicy(task_t task, uint64_t tid, int prio) -{ - int error = 0; + next.io_tier = iopol; - if (prio == PRIO_DARWIN_BG) { - error = proc_set_bgthreadpolicy(task, tid, TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL); - if (error == 0) - error = proc_apply_bgthreadpolicy(task, tid); - } else { - error = proc_restore_bgthreadpolicy(task, tid); + /* Calculate Passive IO policy */ + + if (requested.ext_iopassive || requested.int_iopassive) + next.io_passive = 1; + + /* Calculate miscellaneous policy */ + + if (on_task) { + /* Update role */ + next.t_role = requested.t_role; + + /* Calculate suppression-active flag */ + if (requested.t_sup_active && requested.t_boosted == 0) + next.t_sup_active = 1; + + /* Calculate suspend policy */ + if (requested.t_sup_suspend && requested.t_boosted == 0) + next.t_suspended = 1; + + /* Calculate GPU Access policy */ + if (requested.t_int_gpu_deny || requested.t_ext_gpu_deny) + next.t_gpu_deny = 1; + + + /* Calculate timer QOS */ + int latency_qos = requested.t_base_latency_qos; + + if (requested.t_sup_timer && requested.t_boosted == 0) + latency_qos = requested.t_sup_timer; + + if (requested.t_over_latency_qos != 0) + latency_qos = requested.t_over_latency_qos; + + /* Treat the windowserver special */ + if (requested.t_role == TASK_GRAPHICS_SERVER) + latency_qos = proc_graphics_timer_qos; + + next.t_latency_qos = latency_qos; + + /* Calculate throughput QOS */ + int through_qos = requested.t_base_through_qos; + + if (requested.t_sup_throughput && requested.t_boosted == 0) + through_qos = requested.t_sup_throughput; + + if (requested.t_over_through_qos != 0) + through_qos = requested.t_over_through_qos; + + next.t_through_qos = through_qos; + + /* Calculate suppressed CPU priority */ + if (requested.t_sup_cpu && requested.t_boosted == 0) + next.t_suppressed_cpu = 1; } - return(error); -} + if (requested.terminated) { + /* + * Shoot down the throttles that slow down exit or response to SIGTERM + * We don't need to shoot down: + * passive (don't want to cause others to throttle) + * all_sockets_bg (don't need to iterate FDs on every exit) + * new_sockets_bg (doesn't matter for exiting process) + * gpu deny (doesn't matter for exiting process) + * pidsuspend (jetsam-ed BG process shouldn't run again) + * watchers_bg (watcher threads don't need to be unthrottled) + * t_latency_qos (affects userspace timers only) + */ -int -proc_add_bgtaskpolicy(task_t task, int val) -{ - int selfset = 0; + next.terminated = 1; + next.darwinbg = 0; + next.lowpri_cpu = 0; + next.io_tier = THROTTLE_LEVEL_TIER0; + if (on_task) { + next.t_tal_engaged = 0; + next.t_role = TASK_UNSPECIFIED; + next.t_suppressed_cpu = 0; + + /* TODO: This should only be shot down on SIGTERM, not exit */ + next.t_suspended = 0; + } + } - if (current_task() == task) - selfset = 1; + /* + * Step 3: + * Swap out old policy for new policy + */ - task_lock(task); + struct task_effective_policy prev = + (on_task) ? task->effective_policy : thread->effective_policy; + + /* + * Check for invalid transitions here for easier debugging + * TODO: dump the structs as hex in the panic string + */ + if (task == kernel_task && prev.all_sockets_bg != next.all_sockets_bg) + panic("unexpected network change for kernel task"); + + /* This is the point where the new values become visible to other threads */ + if (on_task) + task->effective_policy = next; + else + thread->effective_policy = next; + + /* Don't do anything further to a half-formed task or thread */ + if (in_create) + return; + + /* + * Step 4: + * Pend updates that can't be done while holding the task lock + * Preserve pending updates that may still be waiting to be applied + */ + + struct task_pended_policy pended = + (on_task) ? task->pended_policy : thread->pended_policy; + + if (prev.all_sockets_bg != next.all_sockets_bg) + pended.update_sockets = 1; + + if (on_task) { + /* Only re-scan the timer list if the qos level is getting less strong */ + if (prev.t_latency_qos > next.t_latency_qos) + pended.t_update_timers = 1; - if (selfset == 0) { - task->policystate.hw_bg |= val; - } else { - task->ext_policystate.hw_bg |= val; } - task_unlock(task); - return(0); + if (on_task) + task->pended_policy = pended; + else + thread->pended_policy = pended; + + /* + * Step 5: + * Update other subsystems as necessary if something has changed + */ + + boolean_t update_throttle = (prev.io_tier != next.io_tier) ? TRUE : FALSE; + + if (on_task) { + if (prev.t_suspended == 0 && next.t_suspended == 1 && task->active) { + task_hold_locked(task); + task_wait_locked(task, FALSE); + } + if (prev.t_suspended == 1 && next.t_suspended == 0 && task->active) { + task_release_locked(task); + } + + boolean_t update_threads = FALSE; + + if (prev.bg_iotier != next.bg_iotier) + update_threads = TRUE; + + if (prev.terminated != next.terminated) + update_threads = TRUE; + + task_policy_update_task_locked(task, update_throttle, update_threads); + } else { + int update_cpu = 0; + + if (prev.lowpri_cpu != next.lowpri_cpu) + update_cpu = (next.lowpri_cpu ? DO_LOWPRI_CPU : UNDO_LOWPRI_CPU); + + task_policy_update_thread_locked(thread, update_cpu, update_throttle); + } } -int -proc_add_bgthreadpolicy(task_t task, uint64_t tid, int val) +/* Despite the name, the thread's task is locked, the thread is not */ +static void +task_policy_update_thread_locked(thread_t thread, + int update_cpu, + boolean_t update_throttle) { - int selfset = 0; - thread_t self = current_thread(); - thread_t thread = THREAD_NULL; - int reset; + thread_precedence_policy_data_t policy; - if (val == 0) - reset = 1; - if (tid == self->thread_id) - selfset = 1; + if (update_throttle) { + rethrottle_thread(thread->uthread); + } - task_lock(task); - if (selfset == 0) { - thread = task_findtid(task, tid); - if (thread != NULL) - thread->ext_policystate.hw_bg |= val; - } else { - self->policystate.hw_bg |= val; + /* + * TODO: pidbind needs to stuff remembered importance into saved_importance + * properly deal with bg'ed threads being pidbound and unbging while pidbound + * + * TODO: A BG thread's priority is 0 on desktop and 4 on embedded. Need to reconcile this. + * */ + if (update_cpu == DO_LOWPRI_CPU) { + thread->saved_importance = thread->importance; + policy.importance = INT_MIN; + } else if (update_cpu == UNDO_LOWPRI_CPU) { + policy.importance = thread->saved_importance; + thread->saved_importance = 0; } - - task_unlock(task); - return(val); + /* Takes thread lock and thread mtx lock */ + if (update_cpu) + thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY, + (thread_policy_t)&policy, + THREAD_PRECEDENCE_POLICY_COUNT); } -int -proc_remove_bgtaskpolicy(task_t task, int intval) +/* + * Calculate priority on a task, loop through its threads, and tell them about + * priority changes and throttle changes. + */ +static void +task_policy_update_task_locked(task_t task, + boolean_t update_throttle, + boolean_t update_threads) { - int selfset = 0; + boolean_t update_priority = FALSE; - if (current_task() == task) - selfset = 1; + if (task == kernel_task) + panic("Attempting to set task policy on kernel_task"); - task_lock(task); + int priority = BASEPRI_DEFAULT; + int max_priority = MAXPRI_USER; - if (selfset == 0) { - task->policystate.hw_bg &= ~intval; + if (proc_get_effective_task_policy(task, TASK_POLICY_LOWPRI_CPU)) { + priority = MAXPRI_THROTTLE; + max_priority = MAXPRI_THROTTLE; + } else if (proc_get_effective_task_policy(task, TASK_POLICY_SUPPRESSED_CPU)) { + priority = MAXPRI_SUPPRESSED; + max_priority = MAXPRI_SUPPRESSED; } else { - task->ext_policystate.hw_bg &= ~intval; + switch (proc_get_effective_task_policy(task, TASK_POLICY_ROLE)) { + case TASK_FOREGROUND_APPLICATION: + priority = BASEPRI_FOREGROUND; + break; + case TASK_BACKGROUND_APPLICATION: + priority = BASEPRI_BACKGROUND; + break; + case TASK_CONTROL_APPLICATION: + priority = BASEPRI_CONTROL; + break; + case TASK_GRAPHICS_SERVER: + priority = BASEPRI_GRAPHICS; + max_priority = MAXPRI_RESERVED; + break; + default: + break; + } + + /* factor in 'nice' value */ + priority += task->importance; } - task_unlock(task); - return(0); -} + /* avoid extra work if priority isn't changing */ + if (task->priority != priority || task->max_priority != max_priority) { + update_priority = TRUE; -int -proc_remove_bgthreadpolicy(task_t task, uint64_t tid, int val) -{ - int selfset = 0; - thread_t self = current_thread(); - thread_t thread = THREAD_NULL; - int reset; + /* update the scheduling priority for the task */ + task->max_priority = max_priority; - if (val == 0) - reset = 1; - if (tid == self->thread_id) - selfset = 1; + if (priority > task->max_priority) + priority = task->max_priority; + else if (priority < MINPRI) + priority = MINPRI; - task_lock(task); - if (selfset == 0) { - thread = task_findtid(task, tid); - if (thread != NULL) - thread->ext_policystate.hw_bg &= ~val; - } else { - self->policystate.hw_bg &= ~val; + task->priority = priority; } - - task_unlock(task); - return(val); -} + /* Loop over the threads in the task only once, and only if necessary */ + if (update_threads || update_throttle || update_priority ) { + thread_t thread; -int -proc_apply_bgtask_selfpolicy(void) -{ - return(proc_apply_bgtaskpolicy(current_task())); -} + queue_iterate(&task->threads, thread, thread_t, task_threads) { + if (update_priority) { + thread_mtx_lock(thread); -int -proc_apply_bgtaskpolicy(task_t task) -{ - int external = 1; + if (thread->active) + thread_task_priority(thread, priority, max_priority); - if (task == current_task()) - external = 0; - return(proc_apply_bgtaskpolicy_internal(task, 0, external)); -} + thread_mtx_unlock(thread); + } -int -proc_apply_bgtaskpolicy_external(task_t task) -{ - return(proc_apply_bgtaskpolicy_internal(task, 0, 1)); + if (update_throttle) { + rethrottle_thread(thread->uthread); + } + + if (update_threads) { + thread->requested_policy.bg_iotier = task->effective_policy.bg_iotier; + thread->requested_policy.terminated = task->effective_policy.terminated; + + task_policy_update_internal_locked(task, thread, FALSE); + /* The thread policy must not emit any completion actions due to this change. */ + } + } + } } -static int -proc_apply_bgtaskpolicy_internal(task_t task, int locked, int external) +/* + * Called with task unlocked to do things that can't be done while holding the task lock + * To keep things consistent, only one thread can make progress through here at a time for any one task. + * + * TODO: tracepoints + */ +static void +task_policy_update_complete_unlocked(task_t task, thread_t thread) { + boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE; + + task_lock(task); - if (locked == 0) + while (task->pended_policy.t_updating_policy != 0) { + assert_wait((event_t)&task->pended_policy, THREAD_UNINT); + task_unlock(task); + thread_block(THREAD_CONTINUE_NULL); task_lock(task); + } - /* if the process is exiting, no action to be done */ - if (task->proc_terminate != 0) - goto out; + /* Take a snapshot of the current state */ + + struct task_pended_policy pended = + (on_task) ? task->pended_policy : thread->pended_policy; + + struct task_effective_policy effective = + (on_task) ? task->effective_policy : thread->effective_policy; + + /* Mark the pended operations as being handled */ + if (on_task) + task->pended_policy = default_task_pended_policy; + else + thread->pended_policy = default_task_pended_policy; + + task->pended_policy.t_updating_policy = 1; + + task_unlock(task); + + /* Update the other subsystems with the new state */ + +#ifdef MACH_BSD + if (pended.update_sockets) + proc_apply_task_networkbg(task->bsd_info, thread, effective.all_sockets_bg); +#endif /* MACH_BSD */ + + if (on_task) { + /* The timer throttle has been removed, we need to look for expired timers and fire them */ + if (pended.t_update_timers) + ml_timer_evaluate(); - if (external != 0) { - /* allready set? */ - if (task->ext_appliedstate.hw_bg != task->ext_policystate.hw_bg) { - task->ext_appliedstate.hw_bg = task->ext_policystate.hw_bg; - task_priority(task, MAXPRI_THROTTLE, MAXPRI_THROTTLE); - /* background state applied */ - } - } else { - if (task->appliedstate.hw_bg != task->policystate.hw_bg) { - task->appliedstate.hw_bg = task->policystate.hw_bg; - task_priority(task, MAXPRI_THROTTLE, MAXPRI_THROTTLE); - } } -out: - if (locked == 0) - task_unlock(task); - return(0); + + /* Wake up anyone waiting to make another update */ + task_lock(task); + task->pended_policy.t_updating_policy = 0; + thread_wakeup(&task->pended_policy); + task_unlock(task); } -/* apply the self backgrounding even if the thread is not current thread */ -int -proc_apply_workq_bgthreadpolicy(thread_t thread) +/* + * Initiate a task policy state transition + * + * Everything that modifies requested except functions that need to hold the task lock + * should use this function + * + * Argument validation should be performed before reaching this point. + * + * TODO: Do we need to check task->active or thread->active? + */ +void +proc_set_task_policy(task_t task, + thread_t thread, + int category, + int flavor, + int value) { - int error; - task_t wqtask = TASK_NULL; + task_lock(task); - if (thread != THREAD_NULL) { - wqtask = thread->task; - task_lock(wqtask); - /* apply the background as selfset internal one */ - error = proc_apply_bgthreadpolicy_locked(thread, 1); - task_unlock(wqtask); - } else - error = ESRCH; + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_START, + proc_selfpid(), targetid(task, thread), trequested(task, thread), value, 0); - return(error); + proc_set_task_policy_locked(task, thread, category, flavor, value); + + task_policy_update_locked(task, thread); + + task_unlock(task); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_END, + proc_selfpid(), targetid(task, thread), trequested(task, thread), tpending(task, thread), 0); + + task_policy_update_complete_unlocked(task, thread); } -int -proc_apply_bgthreadpolicy(task_t task, uint64_t tid) +/* + * Initiate a task policy state transition on a thread with its TID + * Useful if you cannot guarantee the thread won't get terminated + */ +void +proc_set_task_policy_thread(task_t task, + uint64_t tid, + int category, + int flavor, + int value) { - int selfset = 0, error = 0; + thread_t thread; thread_t self = current_thread(); - thread_t thread = THREAD_NULL; - task_t localtask = TASK_NULL; - if (tid == self->thread_id) { - selfset = 1; - localtask = current_task(); - } else - localtask = task; + task_lock(task); - task_lock(localtask); - if (selfset != 0) { + if (tid == TID_NULL || tid == self->thread_id) thread = self; - } else { - thread = task_findtid(localtask, tid); + else + thread = task_findtid(task, tid); + + if (thread == THREAD_NULL) { + task_unlock(task); + return; } - error = proc_apply_bgthreadpolicy_locked(thread, selfset); + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START, + proc_selfpid(), targetid(task, thread), trequested(task, thread), value, 0); - task_unlock(localtask); - - return(error); + proc_set_task_policy_locked(task, thread, category, flavor, value); + + task_policy_update_locked(task, thread); + + task_unlock(task); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END, + proc_selfpid(), targetid(task, thread), trequested(task, thread), tpending(task, thread), 0); + + task_policy_update_complete_unlocked(task, thread); } -static int -proc_apply_bgthreadpolicy_locked(thread_t thread, int selfset) + +/* + * Set the requested state for a specific flavor to a specific value. + * + * TODO: + * Verify that arguments to non iopol things are 1 or 0 + */ +static void +proc_set_task_policy_locked(task_t task, + thread_t thread, + int category, + int flavor, + int value) { - int set = 0; - thread_precedence_policy_data_t policy; + boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE; + + int tier, passive; + struct task_requested_policy requested = + (on_task) ? task->requested_policy : thread->requested_policy; - if (thread != NULL) { - /* if the process is exiting, no action to be done */ - if (thread->task->proc_terminate != 0) - goto out; + switch (flavor) { + + /* Category: EXTERNAL and INTERNAL, thread and task */ + + case TASK_POLICY_DARWIN_BG: + if (category == TASK_POLICY_EXTERNAL) + requested.ext_darwinbg = value; + else + requested.int_darwinbg = value; + break; - if (selfset != 0) { - /* internal application */ - if (thread->appliedstate.hw_bg != thread->policystate.hw_bg) { - thread->appliedstate.hw_bg = thread->policystate.hw_bg; - if (thread->ext_appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) - set = 1; - + case TASK_POLICY_IOPOL: + proc_iopol_to_tier(value, &tier, &passive); + if (category == TASK_POLICY_EXTERNAL) { + requested.ext_iotier = tier; + requested.ext_iopassive = passive; + } else { + requested.int_iotier = tier; + requested.int_iopassive = passive; } - } else { - /* external application */ - if (thread->ext_appliedstate.hw_bg != thread->ext_policystate.hw_bg) { - thread->ext_appliedstate.hw_bg = thread->ext_policystate.hw_bg; - if (thread->appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) - set = 1; + break; + + case TASK_POLICY_IO: + if (category == TASK_POLICY_EXTERNAL) + requested.ext_iotier = value; + else + requested.int_iotier = value; + break; + + case TASK_POLICY_PASSIVE_IO: + if (category == TASK_POLICY_EXTERNAL) + requested.ext_iopassive = value; + else + requested.int_iopassive = value; + break; + + /* Category: EXTERNAL and INTERNAL, task only */ + + case TASK_POLICY_GPU_DENY: + assert(on_task); + if (category == TASK_POLICY_EXTERNAL) + requested.t_ext_gpu_deny = value; + else + requested.t_int_gpu_deny = value; + break; + + case TASK_POLICY_DARWIN_BG_AND_GPU: + assert(on_task); + if (category == TASK_POLICY_EXTERNAL) { + requested.ext_darwinbg = value; + requested.t_ext_gpu_deny = value; + } else { + requested.int_darwinbg = value; + requested.t_int_gpu_deny = value; } - } - - if (set != 0) { -#if CONFIG_EMBEDDED - if (thread->task->ext_appliedstate.apptype == PROC_POLICY_IOS_APPLE_DAEMON) { - thread->saved_importance = thread->importance; - } -#endif /* CONFIG_EMBEDDED */ - /* set thread priority (we did not save previous value) */ - policy.importance = INT_MIN; - - thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY, - (thread_policy_t)&policy, - THREAD_PRECEDENCE_POLICY_COUNT ); + break; - } - } else - return(ESRCH); + /* Category: INTERNAL, task only */ -out: - return(0); + case TASK_POLICY_DARWIN_BG_IOPOL: + assert(on_task && category == TASK_POLICY_INTERNAL); + proc_iopol_to_tier(value, &tier, &passive); + requested.bg_iotier = tier; + break; + + /* Category: ATTRIBUTE, task only */ + + case TASK_POLICY_TAL: + assert(on_task && category == TASK_POLICY_ATTRIBUTE); + requested.t_tal_enabled = value; + break; + + case TASK_POLICY_BOOST: + assert(on_task && category == TASK_POLICY_ATTRIBUTE); + requested.t_boosted = value; + break; + + case TASK_POLICY_ROLE: + assert(on_task && category == TASK_POLICY_ATTRIBUTE); + requested.t_role = value; + break; + + case TASK_POLICY_TERMINATED: + assert(on_task && category == TASK_POLICY_ATTRIBUTE); + requested.terminated = value; + break; + + /* Category: ATTRIBUTE, thread only */ + + case TASK_POLICY_PIDBIND_BG: + assert(!on_task && category == TASK_POLICY_ATTRIBUTE); + requested.th_pidbind_bg = value; + break; + + case TASK_POLICY_WORKQ_BG: + assert(!on_task && category == TASK_POLICY_ATTRIBUTE); + requested.th_workq_bg = value; + break; + + default: + panic("unknown task policy: %d %d %d", category, flavor, value); + break; + } + + if (on_task) + task->requested_policy = requested; + else + thread->requested_policy = requested; } -#if CONFIG_EMBEDDED -/* set external application of background */ -static void -apply_bgthreadpolicy_external(thread_t thread) + +/* + * Gets what you set. Effective values may be different. + */ +int +proc_get_task_policy(task_t task, + thread_t thread, + int category, + int flavor) { -int set = 0; -thread_precedence_policy_data_t policy; + boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE; - /* if the process is exiting, no action to be done */ - if (thread->task->proc_terminate != 0) - return; + int value = 0; + + task_lock(task); - thread->ext_policystate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL; + struct task_requested_policy requested = + (on_task) ? task->requested_policy : thread->requested_policy; - if (thread->ext_appliedstate.hw_bg != thread->ext_policystate.hw_bg) { - thread->ext_appliedstate.hw_bg = thread->ext_policystate.hw_bg; - if (thread->appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) - set = 1; + switch (flavor) { + case TASK_POLICY_DARWIN_BG: + if (category == TASK_POLICY_EXTERNAL) + value = requested.ext_darwinbg; + else + value = requested.int_darwinbg; + break; + case TASK_POLICY_IOPOL: + if (category == TASK_POLICY_EXTERNAL) + value = proc_tier_to_iopol(requested.ext_iotier, + requested.ext_iopassive); + else + value = proc_tier_to_iopol(requested.int_iotier, + requested.int_iopassive); + break; + case TASK_POLICY_IO: + if (category == TASK_POLICY_EXTERNAL) + value = requested.ext_iotier; + else + value = requested.int_iotier; + break; + case TASK_POLICY_PASSIVE_IO: + if (category == TASK_POLICY_EXTERNAL) + value = requested.ext_iopassive; + else + value = requested.int_iopassive; + break; + case TASK_POLICY_GPU_DENY: + assert(on_task); + if (category == TASK_POLICY_EXTERNAL) + value = requested.t_ext_gpu_deny; + else + value = requested.t_int_gpu_deny; + break; + case TASK_POLICY_DARWIN_BG_IOPOL: + assert(on_task && category == TASK_POLICY_ATTRIBUTE); + value = proc_tier_to_iopol(requested.bg_iotier, 0); + break; + case TASK_POLICY_ROLE: + assert(on_task && category == TASK_POLICY_ATTRIBUTE); + value = requested.t_role; + break; + default: + panic("unknown policy_flavor %d", flavor); + break; } - if (set != 0) { - /* set thread priority (we did not save previous value) */ - policy.importance = INT_MIN; + task_unlock(task); - thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY, - (thread_policy_t)&policy, - THREAD_PRECEDENCE_POLICY_COUNT ); + return value; +} + + +/* + * Functions for querying effective state for relevant subsystems + * ONLY the relevant subsystem should query these. + * NEVER take a value from one of the 'effective' functions and stuff it into a setter. + */ + +int +proc_get_effective_task_policy(task_t task, int flavor) +{ + return proc_get_effective_policy(task, THREAD_NULL, flavor); +} + +int +proc_get_effective_thread_policy(thread_t thread, int flavor) +{ + return proc_get_effective_policy(thread->task, thread, flavor); +} + +/* + * Gets what is actually in effect, for subsystems which pull policy instead of receive updates. + * + * NOTE: This accessor does not take the task lock. + * Notifications of state updates need to be externally synchronized with state queries. + * This routine *MUST* remain interrupt safe, as it is potentially invoked + * within the context of a timer interrupt. + */ +static int +proc_get_effective_policy(task_t task, + thread_t thread, + int flavor) +{ + boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE; + int value = 0; + + switch (flavor) { + case TASK_POLICY_DARWIN_BG: + /* + * This backs the KPI call proc_pidbackgrounded to find + * out if a pid is backgrounded, + * as well as proc_get_effective_thread_policy. + * Its main use is within the timer layer, as well as + * prioritizing requests to the graphics system. + * Returns 1 for background mode, 0 for normal mode + */ + if (on_task) + value = task->effective_policy.darwinbg; + else + value = (task->effective_policy.darwinbg || + thread->effective_policy.darwinbg) ? 1 : 0; + break; + case TASK_POLICY_IO: + /* + * The I/O system calls here to find out what throttling tier to apply to an operation. + * Returns THROTTLE_LEVEL_* values + */ + if (on_task) + value = task->effective_policy.io_tier; + else { + value = MAX(task->effective_policy.io_tier, + thread->effective_policy.io_tier); + if (thread->iotier_override != THROTTLE_LEVEL_NONE) + value = MIN(value, thread->iotier_override); + } + break; + case TASK_POLICY_PASSIVE_IO: + /* + * The I/O system calls here to find out whether an operation should be passive. + * (i.e. not cause operations with lower throttle tiers to be throttled) + * Returns 1 for passive mode, 0 for normal mode + */ + if (on_task) + value = task->effective_policy.io_passive; + else + value = (task->effective_policy.io_passive || + thread->effective_policy.io_passive) ? 1 : 0; + break; + case TASK_POLICY_NEW_SOCKETS_BG: + /* + * socreate() calls this to determine if it should mark a new socket as background + * Returns 1 for background mode, 0 for normal mode + */ + if (on_task) + value = task->effective_policy.new_sockets_bg; + else + value = (task->effective_policy.new_sockets_bg || + thread->effective_policy.new_sockets_bg) ? 1 : 0; + break; + case TASK_POLICY_LOWPRI_CPU: + /* + * Returns 1 for low priority cpu mode, 0 for normal mode + */ + if (on_task) + value = task->effective_policy.lowpri_cpu; + else + value = (task->effective_policy.lowpri_cpu || + thread->effective_policy.lowpri_cpu) ? 1 : 0; + break; + case TASK_POLICY_SUPPRESSED_CPU: + /* + * Returns 1 for suppressed cpu mode, 0 for normal mode + */ + assert(on_task); + value = task->effective_policy.t_suppressed_cpu; + break; + case TASK_POLICY_LATENCY_QOS: + /* + * timer arming calls into here to find out the timer coalescing level + * Returns a QoS tier (0-6) + */ + assert(on_task); + value = task->effective_policy.t_latency_qos; + break; + case TASK_POLICY_THROUGH_QOS: + /* + * Returns a QoS tier (0-6) + */ + assert(on_task); + value = task->effective_policy.t_through_qos; + break; + case TASK_POLICY_GPU_DENY: + /* + * This is where IOKit calls into task_policy to find out whether + * it should allow access to the GPU. + * Returns 1 for NOT allowed, returns 0 for allowed + */ + assert(on_task); + value = task->effective_policy.t_gpu_deny; + break; + case TASK_POLICY_ROLE: + assert(on_task); + value = task->effective_policy.t_role; + break; + case TASK_POLICY_WATCHERS_BG: + assert(on_task); + value = task->effective_policy.t_watchers_bg; + break; + default: + panic("unknown policy_flavor %d", flavor); + break; } -} -#endif /* CONFIG_EMBEDDED */ - -int -proc_apply_bgthread_selfpolicy(void) -{ - return(proc_apply_bgthreadpolicy(current_task(), current_thread()->thread_id)); + return value; } +/* + * Convert from IOPOL_* values to throttle tiers. + * + * TODO: Can this be made more compact, like an array lookup + * Note that it is possible to support e.g. IOPOL_PASSIVE_STANDARD in the future + */ -int -proc_restore_bgtaskpolicy(task_t task) +static void +proc_iopol_to_tier(int iopolicy, int *tier, int *passive) { - int external = 1; - - if (current_task() == task) - external = 0; - return(proc_restore_bgtaskpolicy_internal(task, 0, external, BASEPRI_DEFAULT)); + *passive = 0; + *tier = 0; + switch (iopolicy) { + case IOPOL_IMPORTANT: + *tier = THROTTLE_LEVEL_TIER0; + break; + case IOPOL_PASSIVE: + *tier = THROTTLE_LEVEL_TIER0; + *passive = 1; + break; + case IOPOL_STANDARD: + *tier = THROTTLE_LEVEL_TIER1; + break; + case IOPOL_UTILITY: + *tier = THROTTLE_LEVEL_TIER2; + break; + case IOPOL_THROTTLE: + *tier = THROTTLE_LEVEL_TIER3; + break; + default: + panic("unknown I/O policy %d", iopolicy); + break; + } } static int -proc_restore_bgtaskpolicy_internal(task_t task, int locked, int external, int pri) +proc_tier_to_iopol(int tier, int passive) { - if (locked == 0) - task_lock(task); - - /* if the process is exiting, no action to be done */ - if (task->proc_terminate != 0) - goto out; - - if (external != 0) { - task->ext_appliedstate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE; - /* self BG in flight? */ - if (task->appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) { - task_priority(task, pri, MAXPRI_USER); -#if CONFIG_EMBEDDED - task->role = TASK_DEFAULT_APPLICATION; -#endif /* CONFIG_EMBEDDED */ + if (passive == 1) { + switch (tier) { + case THROTTLE_LEVEL_TIER0: + return IOPOL_PASSIVE; + break; + default: + panic("unknown passive tier %d", tier); + return IOPOL_DEFAULT; + break; } - } else { - task->appliedstate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE; - /* external BG in flight? */ - if (task->ext_appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) { - task_priority(task, pri, MAXPRI_USER); -#if CONFIG_EMBEDDED - task->role = TASK_DEFAULT_APPLICATION; -#endif /* CONFIG_EMBEDDED */ + } else { + switch (tier) { + case THROTTLE_LEVEL_NONE: + return IOPOL_DEFAULT; + break; + case THROTTLE_LEVEL_TIER0: + return IOPOL_IMPORTANT; + break; + case THROTTLE_LEVEL_TIER1: + return IOPOL_STANDARD; + break; + case THROTTLE_LEVEL_TIER2: + return IOPOL_UTILITY; + break; + case THROTTLE_LEVEL_TIER3: + return IOPOL_THROTTLE; + break; + default: + panic("unknown tier %d", tier); + return IOPOL_DEFAULT; + break; } } -out: - if (locked == 0) - task_unlock(task); +} + +/* apply internal backgrounding for workqueue threads */ +int +proc_apply_workq_bgthreadpolicy(thread_t thread) +{ + if (thread == THREAD_NULL) + return ESRCH; + + proc_set_task_policy(thread->task, thread, TASK_POLICY_ATTRIBUTE, + TASK_POLICY_WORKQ_BG, TASK_POLICY_ENABLE); return(0); } -/* restore the self backgrounding even if the thread is not current thread */ -int +/* + * remove internal backgrounding for workqueue threads + * does NOT go find sockets created while BG and unbackground them + */ +int proc_restore_workq_bgthreadpolicy(thread_t thread) { - int error = 0; - task_t wqtask = TASK_NULL; - int importance = 0; - - if (thread != THREAD_NULL) { - wqtask = thread->task; - task_lock(wqtask); - /* remove the background and restore default importance as self(internal) removal */ -#if CONFIG_EMBEDDED - if (thread->task->ext_appliedstate.apptype == PROC_POLICY_IOS_APPLE_DAEMON) { - /* restore prev set importnace */ - importance = thread->saved_importance; - thread->saved_importance = 0; - } -#endif /* CONFIG_EMBEDDED */ - restore_bgthreadpolicy_locked(thread, 1, importance); - task_unlock(wqtask); - } else - error = ESRCH; + if (thread == THREAD_NULL) + return ESRCH; - return(error); + proc_set_task_policy(thread->task, thread, TASK_POLICY_ATTRIBUTE, + TASK_POLICY_WORKQ_BG, TASK_POLICY_DISABLE); + + return(0); } -int -proc_restore_bgthread_selfpolicy(void) +/* here for temporary compatibility */ +int +proc_setthread_saved_importance(__unused thread_t thread, __unused int importance) { - return(proc_restore_bgthreadpolicy(current_task(), thread_tid(current_thread()))); + return(0); } -int -proc_restore_bgthreadpolicy(task_t task, uint64_t tid) -{ +/* + * Set an override on the thread which is consulted with a + * higher priority than the task/thread policy. This should + * only be set for temporary grants until the thread + * returns to the userspace boundary + * + * We use atomic operations to swap in the override, with + * the assumption that the thread itself can + * read the override and clear it on return to userspace. + * + * No locking is performed, since it is acceptable to see + * a stale override for one loop through throttle_lowpri_io(). + * However a thread reference must be held on the thread. + */ - int selfset = 0; - thread_t self = current_thread(); - thread_t thread = THREAD_NULL; - int importance = 0; +void set_thread_iotier_override(thread_t thread, int policy) +{ + int current_override; - if (tid == self->thread_id) - selfset = 1; + /* Let most aggressive I/O policy win until user boundary */ + do { + current_override = thread->iotier_override; - task_lock(task); - if (selfset == 0) { - thread = task_findtid(task, tid); - } else { - thread = self; - } + if (current_override != THROTTLE_LEVEL_NONE) + policy = MIN(current_override, policy); - if (thread != NULL) { -#if CONFIG_EMBEDDED - if (thread->task->ext_appliedstate.apptype == PROC_POLICY_IOS_APPLE_DAEMON) { - /* restore prev set importnace */ - importance = thread->saved_importance; - thread->saved_importance = 0; + if (current_override == policy) { + /* no effective change */ + return; } -#endif /* CONFIG_EMBEDDED */ - restore_bgthreadpolicy_locked(thread, selfset, importance); - } - task_unlock(task); + } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override)); - if (thread != NULL) - return(0); - else - return(1); + /* + * Since the thread may be currently throttled, + * re-evaluate tiers and potentially break out + * of an msleep + */ + rethrottle_thread(thread->uthread); } -static void -restore_bgthreadpolicy_locked(thread_t thread, int selfset, int importance) +/* + * Called at process exec to initialize the apptype of a process + */ +void +proc_set_task_apptype(task_t task, int apptype) { - thread_precedence_policy_data_t policy; - int reset = 0; + task_lock(task); - if (thread != NULL) { - /* if the process is exiting, no action to be done */ - if (thread->task->proc_terminate != 0) - return; + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_START, + proc_selfpid(), audit_token_pid_from_task(task), trequested(task, THREAD_NULL), + apptype, 0); - if (selfset != 0) { - thread->appliedstate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE; - /* external BG in flight? */ - if (thread->ext_appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) - reset = 1; - - } else { - thread->ext_appliedstate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE; - /* self BG in flight? */ - if (thread->appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_NONE) - reset = 1; - } - - if (reset != 0) { - /* reset thread priority (we did not save previous value) */ - policy.importance = importance; - thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY, - (thread_policy_t)&policy, - THREAD_PRECEDENCE_POLICY_COUNT ); - } - } -} + switch (apptype) { + case TASK_APPTYPE_APP_TAL: + /* TAL starts off enabled by default */ + task->requested_policy.t_tal_enabled = 1; + /* fall through */ -void -#if CONFIG_EMBEDDED -proc_set_task_apptype(task_t task, int type, thread_t thread) -#else -proc_set_task_apptype(task_t task, int type, __unused thread_t thread) -#endif -{ -#if CONFIG_EMBEDDED - thread_t th = THREAD_NULL; -#endif /* CONFIG_EMBEDDED */ + case TASK_APPTYPE_APP_DEFAULT: + case TASK_APPTYPE_DAEMON_INTERACTIVE: + task->requested_policy.t_apptype = apptype; - switch (type) { -#if CONFIG_EMBEDDED - case PROC_POLICY_IOS_RESV1_APPTYPE: - task->ext_policystate.apptype = type; - task->policystate.apptype = type; - proc_apply_bgtaskpolicy_external(task); - /* indicate that BG is set and next foreground needs to reset */ - task->ext_appliedstate.apptype = type; + task_importance_mark_donor(task, TRUE); + /* Apps (and interactive daemons) are boost recievers on desktop for suppression behaviors */ + task_importance_mark_receiver(task, TRUE); break; - case PROC_POLICY_IOS_APPLE_DAEMON: - task->ext_policystate.apptype = type; - task->policystate.apptype = type; - task->ext_appliedstate.apptype = type; - /* posix spawn will already have thread created, so backround it */ - if (thread == NULL) - th = current_thread(); - else - th = thread; - if (th->appliedstate.hw_bg != TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL) { - /* apply self backgrounding if not already set */ - task_lock(th->task); - proc_apply_bgthreadpolicy_locked(th, 1); - task_unlock(th->task); - } - break; - - case PROC_POLICY_IOS_APPTYPE: - task->ext_policystate.apptype = type; - task->policystate.apptype = type; - break; - case PROC_POLICY_IOS_NONUITYPE: - task->ext_policystate.apptype = type; - task->policystate.apptype = type; - /* set to deny access to gpu */ - task->ext_appliedstate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS; - task->ext_policystate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS; + case TASK_APPTYPE_DAEMON_STANDARD: + task->requested_policy.t_apptype = apptype; + + task_importance_mark_donor(task, TRUE); + task_importance_mark_receiver(task, FALSE); break; -#else /* CONFIG_EMBEDDED */ - case PROC_POLICY_OSX_APPTYPE_TAL: - task->ext_policystate.apptype = type; - task->policystate.apptype = type; - proc_apply_bgtaskpolicy_external(task); - /* indicate that BG is set and next foreground needs to reset */ - task->ext_appliedstate.apptype = type; + + case TASK_APPTYPE_DAEMON_ADAPTIVE: + task->requested_policy.t_apptype = apptype; + + task_importance_mark_donor(task, FALSE); + task_importance_mark_receiver(task, TRUE); break; - case PROC_POLICY_OSX_APPTYPE_DBCLIENT: - task->ext_policystate.apptype = type; - task->policystate.apptype = type; - proc_apply_bgtaskpolicy_internal(task, 0, 0); + case TASK_APPTYPE_DAEMON_BACKGROUND: + task->requested_policy.t_apptype = apptype; + + task_importance_mark_donor(task, FALSE); + task_importance_mark_receiver(task, FALSE); break; - -#endif /* CONFIG_EMBEDDED */ default: + panic("invalid apptype %d", apptype); break; } + + task_policy_update_locked(task, THREAD_NULL); + + task_unlock(task); + + task_policy_update_complete_unlocked(task, THREAD_NULL); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_END, + proc_selfpid(), audit_token_pid_from_task(task), trequested(task, THREAD_NULL), + task->imp_receiver, 0); +} + +/* for process_policy to check before attempting to set */ +boolean_t +proc_task_is_tal(task_t task) +{ + return (task->requested_policy.t_apptype == TASK_APPTYPE_APP_TAL) ? TRUE : FALSE; } -/* update the darwin backdground action state in the flags field for libproc */ -#define PROC_FLAG_DARWINBG 0x8000 /* process in darwin background */ -#define PROC_FLAG_EXT_DARWINBG 0x10000 /* process in darwin background - external enforcement */ -#define PROC_FLAG_IOS_APPLEDAEMON 0x20000 /* process is apple ios daemon */ +/* for telemetry */ +integer_t +task_grab_latency_qos(task_t task) +{ + return task_qos_latency_package(proc_get_effective_task_policy(task, TASK_POLICY_LATENCY_QOS)); +} +/* update the darwin background action state in the flags field for libproc */ int proc_get_darwinbgstate(task_t task, uint32_t * flagsp) { - if (task->ext_appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL){ + if (task->requested_policy.ext_darwinbg) *flagsp |= PROC_FLAG_EXT_DARWINBG; - } - if (task->appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL){ + + if (task->requested_policy.int_darwinbg) *flagsp |= PROC_FLAG_DARWINBG; - } -#if CONFIG_EMBEDDED - if (task->ext_appliedstate.apptype == PROC_POLICY_IOS_APPLE_DAEMON) { - *flagsp |= PROC_FLAG_IOS_APPLEDAEMON; - } -#endif /* CONFIG_EMBEDDED */ - - return(0); -} -/* - * HW disk access realted routines, they need to return - * IOPOL_XXX equivalents for spec_xxx/throttle updates. - */ -int -proc_get_task_disacc(task_t task) -{ -#if CONFIG_EMBEDDED - if ((task->ext_appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0) - return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE); -#else /* CONFIG_EMBEDDED */ - if ((task->ext_appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0) { - /* if it is a TAL or DBClient and not self throttled, return Utility */ - if ((task->ext_appliedstate.apptype == PROC_POLICY_OSX_APPTYPE_TAL) || (task->ext_appliedstate.apptype == PROC_POLICY_OSX_APPTYPE_DBCLIENT)) { - /* any setting for DBG, we need to honor that */ - if ((task->ext_appliedstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE) && - ((task->appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE)!= 0) && - (task->appliedstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE)) { - return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_UTILITY); - } else - return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE); - } else - return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE); - } -#endif /* CONFIG_EMBEDDED */ - if (task->ext_appliedstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS) - return(task->ext_appliedstate.hw_disk); - if ((task->appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0) - return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE); - if (task->appliedstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS) - return(task->appliedstate.hw_disk); - return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS); -} + if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) + *flagsp |= PROC_FLAG_ADAPTIVE; -int -proc_get_task_selfdiskacc_internal(task_t task, thread_t thread) -{ - /* if the task is marked for proc_terminate, no throttling for it */ - if (task->proc_terminate != 0) - goto out; - /* - * As per defined iopolicysys behavior, thread trumps task. - * Do we need to follow that for external enforcements of BG or hw access? - * Status quo for now.. - */ - - if((thread->ext_appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0) - return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE); - if (thread->ext_appliedstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS) - return(thread->ext_appliedstate.hw_disk); - if((thread->appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0) - return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE); - if (thread->appliedstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS) - return(thread->appliedstate.hw_disk); - -#if CONFIG_EMBEDDED - if ((task->ext_appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0) - return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE); -#else /* CONFIG_EMBEDDED */ - if ((task->ext_appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0) { - /* if it is a TAL or DBClient and not self throttled, return Utility */ - if ((task->ext_appliedstate.apptype == PROC_POLICY_OSX_APPTYPE_TAL) || (task->ext_appliedstate.apptype == PROC_POLICY_OSX_APPTYPE_DBCLIENT)) { - /* any setting for DBG, we need to honor that */ - if ((task->ext_appliedstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE) && - ((task->appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE)!= 0) && - (task->appliedstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE)) { - return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_UTILITY); - } else - return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE); - } else - return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE); - } -#endif /* CONFIG_EMBEDDED */ - if (task->ext_appliedstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS) - return(task->ext_appliedstate.hw_disk); - if ((task->appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0) - return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE); - if (task->appliedstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS) - return(task->appliedstate.hw_disk); -out: - return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS); -} + if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE && task->requested_policy.t_boosted == 1) + *flagsp |= PROC_FLAG_ADAPTIVE_IMPORTANT; + if (task->imp_donor) + *flagsp |= PROC_FLAG_IMPORTANCE_DONOR; -int -proc_get_task_selfdiskacc(void) -{ - return(proc_get_task_selfdiskacc_internal(current_task(), current_thread())); -} + if (task->effective_policy.t_sup_active) + *flagsp |= PROC_FLAG_SUPPRESSED; + return(0); +} -int -proc_get_diskacc(thread_t thread) +/* All per-thread state is in the first 32-bits of the bitfield */ +void +proc_get_thread_policy(thread_t thread, thread_policy_state_t info) { - return(proc_get_task_selfdiskacc_internal(thread->task, thread)); + task_t task = thread->task; + task_lock(task); + info->requested = (integer_t)task_requested_bitfield(task, thread); + info->effective = (integer_t)task_effective_bitfield(task, thread); + info->pending = (integer_t)task_pending_bitfield(task, thread); + task_unlock(task); } -int -proc_get_thread_selfdiskacc(void) +/* dump requested for tracepoint */ +static uintptr_t +trequested(task_t task, thread_t thread) { - thread_t thread = current_thread(); - - if((thread->ext_appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0) - return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE); - if (thread->ext_appliedstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS) - return(thread->ext_appliedstate.hw_disk); - if((thread->appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_DISKTHROTTLE) != 0) - return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE); - if (thread->appliedstate.hw_disk != TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS) - return(thread->appliedstate.hw_disk); - return(TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS); + return (uintptr_t) task_requested_bitfield(task, thread); } -int -proc_apply_task_diskacc(task_t task, int policy) +/* dump effective for tracepoint */ +static uintptr_t +teffective(task_t task, thread_t thread) { - task_t self = current_task(); - - task_lock(task); - if (task == self) { - task->appliedstate.hw_disk = policy; - task->policystate.hw_disk = policy; - } else { - task->ext_appliedstate.hw_disk = policy; - task->ext_policystate.hw_disk = policy; - } - task_unlock(task); - return(0); + return (uintptr_t) task_effective_bitfield(task, thread); } -int -proc_apply_thread_diskacc(task_t task, uint64_t tid, int policy) +/* dump pending for tracepoint */ +static uintptr_t +tpending(task_t task, thread_t thread) { - thread_t thread; - - if (tid == TID_NULL) { - thread = current_thread(); - proc_apply_thread_selfdiskacc(policy); - } else { - task_lock(task); - thread = task_findtid(task, tid); - if (thread != NULL) { - thread->ext_appliedstate.hw_disk = policy; - thread->ext_policystate.hw_disk = policy; - } - task_unlock(task); - } - if (thread != NULL) - return(0); - else - return(0); + return (uintptr_t) task_pending_bitfield(task, thread); } -void -proc_task_remove_throttle(task_t task) +uint64_t +task_requested_bitfield(task_t task, thread_t thread) { - thread_t thread; - int importance = 0; + uint64_t bits = 0; + struct task_requested_policy requested = + (thread == THREAD_NULL) ? task->requested_policy : thread->requested_policy; - task_lock(task); + bits |= (requested.int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0); + bits |= (requested.ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0); + bits |= (requested.int_iotier ? (((uint64_t)requested.int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0); + bits |= (requested.ext_iotier ? (((uint64_t)requested.ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0); + bits |= (requested.int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0); + bits |= (requested.ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0); + bits |= (requested.bg_iotier ? (((uint64_t)requested.bg_iotier) << POLICY_REQ_BG_IOTIER_SHIFT) : 0); + bits |= (requested.terminated ? POLICY_REQ_TERMINATED : 0); + bits |= (requested.th_pidbind_bg ? POLICY_REQ_PIDBIND_BG : 0); + bits |= (requested.th_workq_bg ? POLICY_REQ_WORKQ_BG : 0); - /* remove processwide internal DBG applicationn */ - proc_restore_bgtaskpolicy_internal(task, 1, 0, BASEPRI_DEFAULT); - /* remove processwide external DBG applicationn */ - proc_restore_bgtaskpolicy_internal(task, 1, 1, BASEPRI_DEFAULT); + bits |= (requested.t_boosted ? POLICY_REQ_BOOSTED : 0); + bits |= (requested.t_tal_enabled ? POLICY_REQ_TAL_ENABLED : 0); + bits |= (requested.t_int_gpu_deny ? POLICY_REQ_INT_GPU_DENY : 0); + bits |= (requested.t_ext_gpu_deny ? POLICY_REQ_EXT_GPU_DENY : 0); + bits |= (requested.t_apptype ? (((uint64_t)requested.t_apptype) << POLICY_REQ_APPTYPE_SHIFT) : 0); + bits |= (requested.t_role ? (((uint64_t)requested.t_role) << POLICY_REQ_ROLE_SHIFT) : 0); - for (thread = (thread_t)queue_first(&task->threads); - !queue_end(&task->threads, (queue_entry_t)thread); ) { -#if CONFIG_EMBEDDED - if (thread->task->ext_appliedstate.apptype == PROC_POLICY_IOS_APPLE_DAEMON) { - /* restore prev set importnace */ - importance = thread->saved_importance; - thread->saved_importance = 0; - } -#endif /* CONFIG_EMBEDDED */ - /* remove thread level internal DBG application */ - restore_bgthreadpolicy_locked(thread, 1, importance); - /* remove thread level external DBG application */ - restore_bgthreadpolicy_locked(thread, 0, importance); - /* reset thread io policy */ - thread->ext_appliedstate.hw_disk = TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS; - thread->appliedstate.hw_disk = TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS; - unthrottle_thread(thread->uthread); - thread = (thread_t)queue_next(&thread->task_threads); - } - - /* reset task iopolicy */ - task->ext_appliedstate.hw_disk = TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS; - task->appliedstate.hw_disk = TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_FULLACCESS; - task->proc_terminate = 1; + bits |= (requested.t_sup_active ? POLICY_REQ_SUP_ACTIVE : 0); + bits |= (requested.t_sup_lowpri_cpu ? POLICY_REQ_SUP_LOWPRI_CPU : 0); + bits |= (requested.t_sup_cpu ? POLICY_REQ_SUP_CPU : 0); + bits |= (requested.t_sup_timer ? (((uint64_t)requested.t_sup_timer) << POLICY_REQ_SUP_TIMER_THROTTLE_SHIFT) : 0); + bits |= (requested.t_sup_throughput ? (((uint64_t)requested.t_sup_throughput) << POLICY_REQ_SUP_THROUGHPUT_SHIFT) : 0); + bits |= (requested.t_sup_disk ? POLICY_REQ_SUP_DISK_THROTTLE : 0); + bits |= (requested.t_sup_cpu_limit ? POLICY_REQ_SUP_CPU_LIMIT : 0); + bits |= (requested.t_sup_suspend ? POLICY_REQ_SUP_SUSPEND : 0); + bits |= (requested.t_base_latency_qos ? (((uint64_t)requested.t_base_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0); + bits |= (requested.t_over_latency_qos ? (((uint64_t)requested.t_over_latency_qos) << POLICY_REQ_OVER_LATENCY_QOS_SHIFT) : 0); + bits |= (requested.t_base_through_qos ? (((uint64_t)requested.t_base_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0); + bits |= (requested.t_over_through_qos ? (((uint64_t)requested.t_over_through_qos) << POLICY_REQ_OVER_THROUGH_QOS_SHIFT) : 0); - task_unlock(task); + return bits; } +uint64_t +task_effective_bitfield(task_t task, thread_t thread) +{ + uint64_t bits = 0; + struct task_effective_policy effective = + (thread == THREAD_NULL) ? task->effective_policy : thread->effective_policy; + bits |= (effective.io_tier ? (((uint64_t)effective.io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0); + bits |= (effective.io_passive ? POLICY_EFF_IO_PASSIVE : 0); + bits |= (effective.darwinbg ? POLICY_EFF_DARWIN_BG : 0); + bits |= (effective.lowpri_cpu ? POLICY_EFF_LOWPRI_CPU : 0); + bits |= (effective.terminated ? POLICY_EFF_TERMINATED : 0); + bits |= (effective.all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0); + bits |= (effective.new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0); + bits |= (effective.bg_iotier ? (((uint64_t)effective.bg_iotier) << POLICY_EFF_BG_IOTIER_SHIFT) : 0); -int -proc_apply_thread_selfdiskacc(int policy) -{ - task_t task = current_task(); - thread_t thread = current_thread(); + bits |= (effective.t_gpu_deny ? POLICY_EFF_GPU_DENY : 0); + bits |= (effective.t_tal_engaged ? POLICY_EFF_TAL_ENGAGED : 0); + bits |= (effective.t_suspended ? POLICY_EFF_SUSPENDED : 0); + bits |= (effective.t_watchers_bg ? POLICY_EFF_WATCHERS_BG : 0); + bits |= (effective.t_sup_active ? POLICY_EFF_SUP_ACTIVE : 0); + bits |= (effective.t_suppressed_cpu ? POLICY_EFF_SUP_CPU : 0); + bits |= (effective.t_role ? (((uint64_t)effective.t_role) << POLICY_EFF_ROLE_SHIFT) : 0); + bits |= (effective.t_latency_qos ? (((uint64_t)effective.t_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0); + bits |= (effective.t_through_qos ? (((uint64_t)effective.t_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0); - task_lock(task); - thread->appliedstate.hw_disk = policy; - thread->policystate.hw_disk = policy; - task_unlock(task); - return(0); + return bits; } -int -proc_denyinherit_policy(__unused task_t task) +uint64_t +task_pending_bitfield(task_t task, thread_t thread) { - return(0); -} + uint64_t bits = 0; + struct task_pended_policy pended = + (thread == THREAD_NULL) ? task->pended_policy : thread->pended_policy; -int -proc_denyselfset_policy(__unused task_t task) -{ - return(0); -} + bits |= (pended.t_updating_policy ? POLICY_PEND_UPDATING : 0); + bits |= (pended.update_sockets ? POLICY_PEND_SOCKETS : 0); -/* HW GPU access related routines */ -int -proc_get_task_selfgpuacc_deny(void) -{ - task_t task = current_task(); -#ifdef NOTYET - thread_t thread = current_thread(); -#endif /* NOTYET */ - - if (((task->ext_appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_NOGPU) != 0) || (task->ext_appliedstate.hw_gpu == TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS)) - return(TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS); - if (((task->appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_NOGPU) != 0) || (task->appliedstate.hw_gpu == TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS)) - return(TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS); -#ifdef NOTYET - /* - * Since background dispatch items run in a thread can also be - * denied access, we need to make sure there are no unintended - * consequences of background dispatch usage. So till this is - * hashed out, disable thread level checking. - */ - if (((thread->ext_appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_NOGPU) != 0) || (thread->ext_appliedstate.hw_gpu == TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS)) - return(TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS); - if (((thread->appliedstate.hw_bg & TASK_POLICY_BACKGROUND_ATTRIBUTE_NOGPU) != 0) || (thread->appliedstate.hw_gpu == TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS)) - return(TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS); + bits |= (pended.t_update_timers ? POLICY_PEND_TIMERS : 0); + bits |= (pended.t_update_watchers ? POLICY_PEND_WATCHERS : 0); -#endif /* NOTYET */ - return(TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_FULLACCESS); + return bits; } -int -proc_apply_task_gpuacc(task_t task, int policy) -{ - - task_t self = current_task(); - - task_lock(task); - if (task == self) { - task->appliedstate.hw_gpu = policy; - task->policystate.hw_gpu = policy; - } else { - task->ext_appliedstate.hw_gpu = policy; - task->ext_policystate.hw_gpu = policy; - } - task_unlock(task); - return(0); -} +/* + * Resource usage and CPU related routines + */ -/* Resource usage , CPU realted routines */ int -proc_get_task_ruse_cpu(task_t task, uint32_t * policyp, uint32_t * percentagep, uint64_t * intervalp, uint64_t * deadlinep) +proc_get_task_ruse_cpu(task_t task, uint32_t *policyp, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep) { int error = 0; + int scope; task_lock(task); - if (task != current_task()) { - *policyp = task->ext_policystate.ru_cpu; - } else { - *policyp = task->policystate.ru_cpu; - } + - error = task_get_cpuusage(task, percentagep, intervalp, deadlinep); + error = task_get_cpuusage(task, percentagep, intervalp, deadlinep, &scope); + task_unlock(task); + + /* + * Reverse-map from CPU resource limit scopes back to policies (see comment below). + */ + if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) { + *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC; + } else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) { + *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE; + } else if (scope == TASK_RUSECPU_FLAGS_DEADLINE) { + *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE; + } return(error); } +/* + * Configure the default CPU usage monitor parameters. + * + * For tasks which have this mechanism activated: if any thread in the + * process consumes more CPU than this, an EXC_RESOURCE exception will be generated. + */ +void +proc_init_cpumon_params(void) +{ + if (!PE_parse_boot_argn("max_cpumon_percentage", &proc_max_cpumon_percentage, + sizeof (proc_max_cpumon_percentage))) { + proc_max_cpumon_percentage = DEFAULT_CPUMON_PERCENTAGE; + } + + if (proc_max_cpumon_percentage > 100) { + proc_max_cpumon_percentage = 100; + } + + /* The interval should be specified in seconds. */ + if (!PE_parse_boot_argn("max_cpumon_interval", &proc_max_cpumon_interval, + sizeof (proc_max_cpumon_interval))) { + proc_max_cpumon_interval = DEFAULT_CPUMON_INTERVAL; + } + + proc_max_cpumon_interval *= NSEC_PER_SEC; +} + /* * Currently supported configurations for CPU limits. * - * Deadline-based CPU limit Percentage-based CPU limit - * PROC_POLICY_RSRCACT_THROTTLE ENOTSUP Task-wide scope only - * PROC_POLICY_RSRCACT_SUSPEND Task-wide scope only ENOTSUP - * PROC_POLICY_RSRCACT_TERMINATE Task-wide scope only ENOTSUP - * PROC_POLICY_RSRCACT_NOTIFY_KQ Task-wide scope only ENOTSUP - * PROC_POLICY_RSRCACT_NOTIFY_EXC ENOTSUP Per-thread scope only + * Policy | Deadline-based CPU limit | Percentage-based CPU limit + * -------------------------------------+--------------------------+------------------------------ + * PROC_POLICY_RSRCACT_THROTTLE | ENOTSUP | Task-wide scope only + * PROC_POLICY_RSRCACT_SUSPEND | Task-wide scope only | ENOTSUP + * PROC_POLICY_RSRCACT_TERMINATE | Task-wide scope only | ENOTSUP + * PROC_POLICY_RSRCACT_NOTIFY_KQ | Task-wide scope only | ENOTSUP + * PROC_POLICY_RSRCACT_NOTIFY_EXC | ENOTSUP | Per-thread scope only * * A deadline-based CPU limit is actually a simple wallclock timer - the requested action is performed * after the specified amount of wallclock time has elapsed. @@ -1402,7 +2007,8 @@ proc_get_task_ruse_cpu(task_t task, uint32_t * policyp, uint32_t * percentagep, * CPU limit. All other types of notifications force task-wide scope for the limit. */ int -proc_set_task_ruse_cpu(task_t task, uint32_t policy, uint32_t percentage, uint64_t interval, uint64_t deadline) +proc_set_task_ruse_cpu(task_t task, uint32_t policy, uint8_t percentage, uint64_t interval, uint64_t deadline, + int cpumon_entitled) { int error = 0; int scope; @@ -1429,6 +2035,9 @@ proc_set_task_ruse_cpu(task_t task, uint32_t policy, uint32_t percentage, uint64 if (deadline != 0) return (ENOTSUP); scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT; +#ifdef CONFIG_NOMONITORS + return (error); +#endif /* CONFIG_NOMONITORS */ break; default: return (EINVAL); @@ -1436,17 +2045,17 @@ proc_set_task_ruse_cpu(task_t task, uint32_t policy, uint32_t percentage, uint64 task_lock(task); if (task != current_task()) { - task->ext_policystate.ru_cpu = policy; + task->policy_ru_cpu_ext = policy; } else { - task->policystate.ru_cpu = policy; + task->policy_ru_cpu = policy; } - error = task_set_cpuusage(task, percentage, interval, deadline, scope); + error = task_set_cpuusage(task, percentage, interval, deadline, scope, cpumon_entitled); task_unlock(task); return(error); } int -proc_clear_task_ruse_cpu(task_t task) +proc_clear_task_ruse_cpu(task_t task, int cpumon_entitled) { int error = 0; int action; @@ -1454,19 +2063,19 @@ proc_clear_task_ruse_cpu(task_t task) task_lock(task); if (task != current_task()) { - task->ext_policystate.ru_cpu = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT; + task->policy_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT; } else { - task->policystate.ru_cpu = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT; + task->policy_ru_cpu = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT; } - error = task_clear_cpuusage_locked(task); + error = task_clear_cpuusage_locked(task, cpumon_entitled); if (error != 0) goto out; - action = task->ext_appliedstate.ru_cpu; - if (task->ext_appliedstate.ru_cpu != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) { + action = task->applied_ru_cpu; + if (task->applied_ru_cpu_ext != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) { /* reset action */ - task->ext_appliedstate.ru_cpu = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE; + task->applied_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE; } if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) { bsdinfo = task->bsd_info; @@ -1506,12 +2115,12 @@ task_apply_resource_actions(task_t task, int type) /* only cpu actions for now */ task_lock(task); - if (task->ext_appliedstate.ru_cpu == TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) { + if (task->applied_ru_cpu_ext == TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) { /* apply action */ - task->ext_appliedstate.ru_cpu = task->ext_policystate.ru_cpu; - action = task->ext_appliedstate.ru_cpu; + task->applied_ru_cpu_ext = task->policy_ru_cpu_ext; + action = task->applied_ru_cpu_ext; } else { - action = task->ext_appliedstate.ru_cpu; + action = task->applied_ru_cpu_ext; } if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) { @@ -1524,24 +2133,78 @@ task_apply_resource_actions(task_t task, int type) return(0); } -/* For ledger hookups */ -static int -task_get_cpuusage(task_t task, uint32_t * percentagep, uint64_t * intervalp, uint64_t * deadlinep) -{ - *percentagep = task->rusage_cpu_percentage; - *intervalp = task->rusage_cpu_interval; - *deadlinep = task->rusage_cpu_deadline; +/* + * XXX This API is somewhat broken; we support multiple simultaneous CPU limits, but the get/set API + * only allows for one at a time. This means that if there is a per-thread limit active, the other + * "scopes" will not be accessible via this API. We could change it to pass in the scope of interest + * to the caller, and prefer that, but there's no need for that at the moment. + */ +int +task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope) +{ + *percentagep = 0; + *intervalp = 0; + *deadlinep = 0; + + if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) != 0) { + *scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT; + *percentagep = task->rusage_cpu_perthr_percentage; + *intervalp = task->rusage_cpu_perthr_interval; + } else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) != 0) { + *scope = TASK_RUSECPU_FLAGS_PROC_LIMIT; + *percentagep = task->rusage_cpu_percentage; + *intervalp = task->rusage_cpu_interval; + } else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) != 0) { + *scope = TASK_RUSECPU_FLAGS_DEADLINE; + *deadlinep = task->rusage_cpu_deadline; + } else { + *scope = 0; + } return(0); } +/* + * Disable the CPU usage monitor for the task. Return value indicates + * if the mechanism was actually enabled. + */ +int +task_disable_cpumon(task_t task) { + thread_t thread; + + task_lock_assert_owned(task); + + if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) == 0) { + return (KERN_INVALID_ARGUMENT); + } + +#if CONFIG_TELEMETRY + /* + * Disable task-wide telemetry if it was ever enabled by the CPU usage + * monitor's warning zone. + */ + telemetry_task_ctl_locked(current_task(), TF_CPUMON_WARNING, 0); +#endif + + /* + * Disable the monitor for the task, and propagate that change to each thread. + */ + task->rusage_cpu_flags &= ~(TASK_RUSECPU_FLAGS_PERTHR_LIMIT | TASK_RUSECPU_FLAGS_FATAL_CPUMON); + queue_iterate(&task->threads, thread, thread_t, task_threads) { + set_astledger(thread); + } + task->rusage_cpu_perthr_percentage = 0; + task->rusage_cpu_perthr_interval = 0; + + return (KERN_SUCCESS); +} + int -task_set_cpuusage(task_t task, uint64_t percentage, uint64_t interval, uint64_t deadline, int scope) +task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int cpumon_entitled) { + thread_t thread; uint64_t abstime = 0; - uint64_t save_abstime = 0; uint64_t limittime = 0; - thread_t thread; lck_mtx_assert(&task->lock, LCK_MTX_ASSERT_OWNED); @@ -1549,17 +2212,86 @@ task_set_cpuusage(task_t task, uint64_t percentage, uint64_t interval, uint64_t if (interval == 0) interval = NSEC_PER_SEC; - if (percentage != 0) { - if (percentage > 100) - percentage = 100; - limittime = (interval * percentage)/ 100; - nanoseconds_to_absolutetime(limittime, &abstime); - if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) { + if (percentage != 0) { + if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) { + boolean_t warn = FALSE; + + /* + * A per-thread CPU limit on a task generates an exception + * (LEDGER_ACTION_EXCEPTION) if any one thread in the task + * exceeds the limit. + */ + + if (percentage == TASK_POLICY_CPUMON_DISABLE) { + if (cpumon_entitled) { + task_disable_cpumon(task); + return (0); + } + + /* + * This task wishes to disable the CPU usage monitor, but it's + * missing the required entitlement: + * com.apple.private.kernel.override-cpumon + * + * Instead, treat this as a request to reset its params + * back to the defaults. + */ + warn = TRUE; + percentage = TASK_POLICY_CPUMON_DEFAULTS; + } + + if (percentage == TASK_POLICY_CPUMON_DEFAULTS) { + percentage = proc_max_cpumon_percentage; + interval = proc_max_cpumon_interval; + } + + if (percentage > 100) { + percentage = 100; + } + + /* + * Passing in an interval of -1 means either: + * - Leave the interval as-is, if there's already a per-thread + * limit configured + * - Use the system default. + */ + if (interval == -1ULL) { + if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) { + interval = task->rusage_cpu_perthr_interval; + } else { + interval = proc_max_cpumon_interval; + } + } + /* - * A per-thread CPU limit on a task generates an exception - * (LEDGER_ACTION_EXCEPTION) if any one thread in the task - * exceeds the limit. + * Enforce global caps on CPU usage monitor here if the process is not + * entitled to escape the global caps. */ + if ((percentage > proc_max_cpumon_percentage) && (cpumon_entitled == 0)) { + warn = TRUE; + percentage = proc_max_cpumon_percentage; + } + + if ((interval > proc_max_cpumon_interval) && (cpumon_entitled == 0)) { + warn = TRUE; + interval = proc_max_cpumon_interval; + } + + if (warn) { + int pid = 0; + char *procname = (char *)"unknown"; + +#ifdef MACH_BSD + pid = proc_selfpid(); + if (current_task()->bsd_info != NULL) { + procname = proc_name_address(current_task()->bsd_info); + } +#endif + + printf("process %s[%d] denied attempt to escape CPU monitor" + " (missing required entitlement).\n", procname, pid); + } + task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PERTHR_LIMIT; task->rusage_cpu_perthr_percentage = percentage; task->rusage_cpu_perthr_interval = interval; @@ -1575,7 +2307,10 @@ task_set_cpuusage(task_t task, uint64_t percentage, uint64_t interval, uint64_t task->rusage_cpu_percentage = percentage; task->rusage_cpu_interval = interval; - ledger_set_limit(task->ledger, task_ledgers.cpu_time, abstime); + limittime = (interval * percentage) / 100; + nanoseconds_to_absolutetime(limittime, &abstime); + + ledger_set_limit(task->ledger, task_ledgers.cpu_time, abstime, 0); ledger_set_period(task->ledger, task_ledgers.cpu_time, interval); ledger_set_action(task->ledger, task_ledgers.cpu_time, LEDGER_ACTION_BLOCK); } @@ -1595,6 +2330,8 @@ task_set_cpuusage(task_t task, uint64_t percentage, uint64_t interval, uint64_t } /* setup callout */ if (task->rusage_cpu_callt != 0) { + uint64_t save_abstime = 0; + task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_DEADLINE; task->rusage_cpu_deadline = deadline; @@ -1609,39 +2346,35 @@ task_set_cpuusage(task_t task, uint64_t percentage, uint64_t interval, uint64_t } int -task_clear_cpuusage(task_t task) +task_clear_cpuusage(task_t task, int cpumon_entitled) { int retval = 0; task_lock(task); - retval = task_clear_cpuusage_locked(task); + retval = task_clear_cpuusage_locked(task, cpumon_entitled); task_unlock(task); return(retval); } int -task_clear_cpuusage_locked(task_t task) +task_clear_cpuusage_locked(task_t task, int cpumon_entitled) { thread_call_t savecallt; - thread_t thread; /* cancel percentage handling if set */ if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) { task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PROC_LIMIT; - ledger_set_limit(task->ledger, task_ledgers.cpu_time, LEDGER_LIMIT_INFINITY); + ledger_set_limit(task->ledger, task_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0); task->rusage_cpu_percentage = 0; task->rusage_cpu_interval = 0; } - if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) { - task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PERTHR_LIMIT; - queue_iterate(&task->threads, thread, thread_t, task_threads) { - set_astledger(thread); - } - task->rusage_cpu_perthr_percentage = 0; - task->rusage_cpu_perthr_interval = 0; - + /* + * Disable the CPU usage monitor. + */ + if (cpumon_entitled) { + task_disable_cpumon(task); } /* cancel deadline handling if set */ @@ -1669,636 +2402,537 @@ task_action_cpuusage(thread_call_param_t param0, __unused thread_call_param_t pa return; } -#if CONFIG_EMBEDDED -/* return the appstate of a task */ -int -proc_lf_getappstate(task_t task) -{ - return(task->appstate); -} +/* + * Routines for taskwatch and pidbind + */ -/* set appstate of a task and apply approp actions */ -int -proc_lf_setappstate(task_t task, int state) -{ - int ret = 0, oldstate; - kern_return_t kret = KERN_SUCCESS; - int applywatch = 0, setbg = 0, setnetbg = 0; - int sethib_suspend = 0, sethib_resume=0; - - if (state == TASK_APPSTATE_NONE) - goto out; - - /* valid states? */ - switch (state) { - case TASK_APPSTATE_ACTIVE: - case TASK_APPSTATE_BACKGROUND: - case TASK_APPSTATE_NONUI: - case TASK_APPSTATE_INACTIVE: - break; - default: - ret = EINVAL; - goto out; +/* + * Routines for importance donation/inheritance/boosting + */ - } +void +task_importance_mark_donor(task_t task, boolean_t donating) +{ +#if IMPORTANCE_INHERITANCE + task->imp_donor = (donating ? 1 : 0); +#endif /* IMPORTANCE_INHERITANCE */ +} - task_lock(task); - oldstate = task->appstate; - if (oldstate == state) { - /* no changes */ - goto out1; +void +task_importance_mark_receiver(task_t task, boolean_t receiving) +{ +#if IMPORTANCE_INHERITANCE + if (receiving) { + assert(task->task_imp_assertcnt == 0); + task->imp_receiver = 1; /* task can receive importance boost */ + task->task_imp_assertcnt = 0; + task->task_imp_externcnt = 0; + } else { + if (task->task_imp_assertcnt != 0 || task->task_imp_externcnt != 0) + panic("disabling imp_receiver on task with pending boosts!"); + + task->imp_receiver = 0; + task->task_imp_assertcnt = 0; + task->task_imp_externcnt = 0; } +#endif /* IMPORTANCE_INHERITANCE */ +} - switch(oldstate) { - case TASK_APPSTATE_ACTIVE: - switch(state) { - case TASK_APPSTATE_BACKGROUND: - /* moving from active to app background */ - task->ext_policystate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL; - proc_apply_bgtaskpolicy_internal(task, 1, 1); - /* watchers need update */ - applywatch = 1; - setbg = 1; - /* set network part */ - setnetbg = 1; - break; - - case TASK_APPSTATE_NONUI: - /* set no graphics */ - task->ext_policystate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS; - task->ext_appliedstate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS; - break; - - case TASK_APPSTATE_INACTIVE: - /* suspend the process */ - kret = task_pidsuspend_locked(task); - if (kret != KERN_SUCCESS) - ret = EINVAL; - else - sethib_suspend = 1; - - break; - } - break; - case TASK_APPSTATE_BACKGROUND: - switch(state) { - /* watchers need update */ - applywatch = 1; - setbg = 0; - /* set network part */ - setnetbg = 1; - case TASK_APPSTATE_ACTIVE: - /* remove app background */ - ret = proc_restore_bgtaskpolicy_internal(task, 1, 1, BASEPRI_DEFAULT); - /* going from BG to active */ - break; - - case TASK_APPSTATE_NONUI: - /* remove app background + no graphics */ - task->ext_policystate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS; - task->ext_appliedstate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS; - ret = proc_restore_bgtaskpolicy_internal(task, 1, 1, BASEPRI_DEFAULT); - break; - - case TASK_APPSTATE_INACTIVE: - /* suspend and then remove app background */ - kret = task_pidsuspend_locked(task); - if (kret != KERN_SUCCESS) { - ret = EINVAL; - } else { - ret = proc_restore_bgtaskpolicy_internal(task, 1, 1, BASEPRI_DEFAULT); - sethib_suspend = 1; - } - - break; +#if IMPORTANCE_INHERITANCE - } - break; +static void +task_update_boost_locked(task_t task, boolean_t boost_active) +{ +#if IMPORTANCE_DEBUG + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_START), + proc_selfpid(), audit_token_pid_from_task(task), trequested(task, THREAD_NULL), 0, 0); +#endif - case TASK_APPSTATE_NONUI: - switch(state) { - case TASK_APPSTATE_ACTIVE: - /* restore graphics access */ - task->ext_policystate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS; - task->ext_appliedstate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_DEFAULT; - break; - - case TASK_APPSTATE_BACKGROUND: - /* set app background */ - task->ext_policystate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL; - - ret = proc_apply_bgtaskpolicy_internal(task, 1, 1); - if (ret == 0) { - task->ext_policystate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_DEFAULT; - task->ext_appliedstate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_DEFAULT; - } - /* watchers need update */ - applywatch = 1; - setbg = 1; - /* set network part */ - setnetbg = 1; - break; - - case TASK_APPSTATE_INACTIVE: - /* suspend & restore graphics access */ - kret = task_pidsuspend_locked(task); - if (kret != KERN_SUCCESS) { - ret = EINVAL; - } else { - ret = proc_restore_bgtaskpolicy_internal(task, 1, 1, BASEPRI_DEFAULT); - task->ext_policystate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_DEFAULT; - task->ext_appliedstate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_DEFAULT; - sethib_suspend = 1; - } - break; - } - break; + /* assert(boost_active ? task->requested_policy.t_boosted == 0 : task->requested_policy.t_boosted == 1); */ - case TASK_APPSTATE_INACTIVE: - switch(state) { - case TASK_APPSTATE_ACTIVE: - /* resume process */ - /* going from inactive to active */ - break; - - case TASK_APPSTATE_BACKGROUND: - task->ext_policystate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL; - ret = proc_apply_bgtaskpolicy_internal(task, 1, 1); - /* put in app background & resume process */ - /* watchers need update */ - applywatch = 1; - setbg = 1; - /* set network part */ - setnetbg = 1; - break; - - case TASK_APPSTATE_NONUI: - /* remove graphics access and resume */ - task->ext_policystate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS; - task->ext_appliedstate.hw_gpu = TASK_POLICY_HWACCESS_GPU_ATTRIBUTE_NOACCESS; - break; - } - /* pidresume does drop task lock,so no need to have locked version */ - task_unlock(task); - kret = task_pidresume(task); - task_lock(task); - sethib_resume = 1; - break; - } - /* set the new app state on the task */ - task->appstate = state; -out1: - task_unlock(task); - if (setnetbg != 0) { - /* apply network background */ - if (setbg != 0) - proc_apply_task_networkbg_internal(task->bsd_info, NULL); - else - proc_restore_task_networkbg_internal(task->bsd_info, NULL); - } -#if CONFIG_MEMORYSTATUS - if (sethib_suspend != 0) - memorystatus_on_suspend(proc_pid(task->bsd_info)); - if (sethib_resume != 0) - memorystatus_on_resume(proc_pid(task->bsd_info)); -#endif /* CONFIG_MEMORYSTATUS */ - /* if watchers need update, safe point to do that */ - if (applywatch != 0) - apply_appstate_watchers(task, setbg); + proc_set_task_policy_locked(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_BOOST, boost_active); -out: - return(ret); + task_policy_update_locked(task, THREAD_NULL); + +#if IMPORTANCE_DEBUG + if (boost_active == TRUE){ + DTRACE_BOOST2(boost, task_t, task, int, audit_token_pid_from_task(task)); + } else { + DTRACE_BOOST2(unboost, task_t, task, int, audit_token_pid_from_task(task)); + } + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_END), + proc_selfpid(), audit_token_pid_from_task(task), + trequested(task, THREAD_NULL), tpending(task, THREAD_NULL), 0); +#endif } -static void -task_watch_lock(void) +/* + * Check if this task should donate importance. + * + * May be called without taking the task lock. In that case, donor status can change + * so you must check only once for each donation event. + */ +boolean_t +task_is_importance_donor(task_t task) { - lck_mtx_lock(&task_watch_mtx); + return (task->imp_donor == 1 || task->task_imp_assertcnt > 0) ? TRUE : FALSE; } -static void -task_watch_unlock(void) +/* + * This routine may be called without holding task lock + * since the value of imp_receiver can never be unset. + */ +boolean_t +task_is_importance_receiver(task_t task) { - lck_mtx_unlock(&task_watch_mtx); + return (task->imp_receiver) ? TRUE : FALSE; } -static void -add_taskwatch_locked(task_t task, task_watch_t * twp) +/* + * External importance assertions are managed by the process in userspace + * Internal importance assertions are the responsibility of the kernel + * Assertions are changed from internal to external via task_importance_externalize_assertion + */ + +int +task_importance_hold_internal_assertion(task_t target_task, uint32_t count) { - queue_enter(&task->task_watchers, twp, task_watch_t *, tw_links); - task->num_taskwatchers++; + int rval = 0; + + task_lock(target_task); + rval = task_importance_hold_assertion_locked(target_task, TASK_POLICY_INTERNAL, count); + task_unlock(target_task); + task_policy_update_complete_unlocked(target_task, THREAD_NULL); + + return(rval); } -static void -remove_taskwatch_locked(task_t task, task_watch_t * twp) +int +task_importance_hold_external_assertion(task_t target_task, uint32_t count) { - queue_remove(&task->task_watchers, twp, task_watch_t *, tw_links); - task->num_taskwatchers--; + int rval = 0; + + task_lock(target_task); + rval = task_importance_hold_assertion_locked(target_task, TASK_POLICY_EXTERNAL, count); + task_unlock(target_task); + + task_policy_update_complete_unlocked(target_task, THREAD_NULL); + + return(rval); } +int +task_importance_drop_internal_assertion(task_t target_task, uint32_t count) +{ + int rval = 0; + + task_lock(target_task); + rval = task_importance_drop_assertion_locked(target_task, TASK_POLICY_INTERNAL, count); + task_unlock(target_task); -int -proc_lf_pidbind(task_t curtask, uint64_t tid, task_t target_task, int bind) + task_policy_update_complete_unlocked(target_task, THREAD_NULL); + + return(rval); +} + +int +task_importance_drop_external_assertion(task_t target_task, uint32_t count) { - thread_t self = current_thread(); - thread_t target_thread = NULL; - int selfset = 0, ret = 0, setbg = 0; - task_watch_t *twp = NULL; - task_t task = TASK_NULL; + int rval = 0; + task_lock(target_task); + rval = task_importance_drop_assertion_locked(target_task, TASK_POLICY_EXTERNAL, count); + task_unlock(target_task); - if ((tid == 0) || (tid == self->thread_id)) { - selfset = 1; - target_thread = self; - thread_reference(target_thread); - } else { - task_lock(curtask); - target_thread = task_findtid(curtask, tid); - if (target_thread != NULL) - thread_reference(target_thread); - else { - ret = ESRCH; - goto out; - } - - task_unlock(curtask); - } - - if (bind != 0) { - /* task is still active ? */ - task_lock(target_task); - if (target_task->active == 0) { - task_unlock(target_task); - ret = ESRCH; - goto out; - } - task_unlock(target_task); + task_policy_update_complete_unlocked(target_task, THREAD_NULL); - twp = (task_watch_t *)kalloc(sizeof(task_watch_t)); - if (twp == NULL) { - task_watch_unlock(); - ret = ENOMEM; - goto out; - } + return(rval); +} - bzero(twp, sizeof(task_watch_t)); +/* + * Returns EOVERFLOW if an external assertion is taken when not holding an external boost. + */ +static int +task_importance_hold_assertion_locked(task_t target_task, int external, uint32_t count) +{ + boolean_t apply_boost = FALSE; + int ret = 0; - task_watch_lock(); + assert(target_task->imp_receiver != 0); - if (target_thread->taskwatch != NULL){ - /* already bound to another task */ - task_watch_unlock(); +#if IMPORTANCE_DEBUG + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, (IMP_HOLD | external))) | DBG_FUNC_START, + proc_selfpid(), audit_token_pid_from_task(target_task), target_task->task_imp_assertcnt, target_task->task_imp_externcnt, 0); +#endif - kfree(twp, sizeof(task_watch_t)); - ret = EBUSY; - goto out; + /* assert(target_task->task_imp_assertcnt >= target_task->task_imp_externcnt); */ + + if (external == TASK_POLICY_EXTERNAL) { + if (target_task->task_imp_externcnt == 0) { + /* Only allowed to take a new boost assertion when holding an external boost */ + printf("BUG in process %s[%d]: it attempted to acquire a new boost assertion without holding an existing external assertion. " + "(%d total, %d external)\n", + proc_name_address(target_task->bsd_info), audit_token_pid_from_task(target_task), + target_task->task_imp_assertcnt, target_task->task_imp_externcnt); + ret = EOVERFLOW; + count = 0; + } else { + target_task->task_imp_assertcnt += count; + target_task->task_imp_externcnt += count; } + } else { + if (target_task->task_imp_assertcnt == 0) + apply_boost = TRUE; + target_task->task_imp_assertcnt += count; + } - task_reference(target_task); + if (apply_boost == TRUE) + task_update_boost_locked(target_task, TRUE); - twp->tw_task = target_task; /* holds the task reference */ - twp->tw_thread = target_thread; /* holds the thread reference */ - twp->tw_state = target_task->appstate; - twp->tw_importance = target_thread->importance; - - add_taskwatch_locked(target_task, twp); +#if IMPORTANCE_DEBUG + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, (IMP_HOLD | external))) | DBG_FUNC_END, + proc_selfpid(), audit_token_pid_from_task(target_task), target_task->task_imp_assertcnt, target_task->task_imp_externcnt, 0); + DTRACE_BOOST6(receive_internal_boost, task_t, target_task, int, audit_token_pid_from_task(target_task), task_t, current_task(), int, proc_selfpid(), int, count, int, target_task->task_imp_assertcnt); + if (external == TASK_POLICY_EXTERNAL){ + DTRACE_BOOST5(receive_boost, task_t, target_task, int, audit_token_pid_from_task(target_task), int, proc_selfpid(), int, count, int, target_task->task_imp_externcnt); + } +#endif + return(ret); +} - target_thread->taskwatch = twp; - if (target_task->appstate == TASK_APPSTATE_BACKGROUND) - setbg = 1; +/* + * Returns EOVERFLOW if an external assertion is over-released. + * Panics if an internal assertion is over-released. + */ +static int +task_importance_drop_assertion_locked(task_t target_task, int external, uint32_t count) +{ + int ret = 0; - task_watch_unlock(); + assert(target_task->imp_receiver != 0); - if (setbg != 0) { - set_thread_appbg(target_thread, setbg, INT_MIN); - } +#if IMPORTANCE_DEBUG + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, (IMP_DROP | external))) | DBG_FUNC_START, + proc_selfpid(), audit_token_pid_from_task(target_task), target_task->task_imp_assertcnt, target_task->task_imp_externcnt, 0); +#endif - /* retain the thread reference as it is in twp */ - target_thread = NULL; + /* assert(target_task->task_imp_assertcnt >= target_task->task_imp_externcnt); */ + + if (external == TASK_POLICY_EXTERNAL) { + assert(count == 1); + if (count <= target_task->task_imp_externcnt) { + target_task->task_imp_externcnt -= count; + if (count <= target_task->task_imp_assertcnt) + target_task->task_imp_assertcnt -= count; + } else { + /* Process over-released its boost count */ + printf("BUG in process %s[%d]: over-released external boost assertions (%d total, %d external)\n", + proc_name_address(target_task->bsd_info), audit_token_pid_from_task(target_task), + target_task->task_imp_assertcnt, target_task->task_imp_externcnt); + + /* TODO: If count > 1, we should clear out as many external assertions as there are left. */ + ret = EOVERFLOW; + count = 0; + } } else { - /* unbind */ - task_watch_lock(); - if ((twp = target_thread->taskwatch) != NULL) { - task = twp->tw_task; - target_thread->taskwatch = NULL; - remove_taskwatch_locked(task, twp); - - task_watch_unlock(); - - task_deallocate(task); /* drop task ref in twp */ - set_thread_appbg(target_thread, 0, twp->tw_importance); - thread_deallocate(target_thread); /* drop thread ref in twp */ - kfree(twp, sizeof(task_watch_t)); + if (count <= target_task->task_imp_assertcnt) { + target_task->task_imp_assertcnt -= count; } else { - task_watch_unlock(); - ret = 0; /* return success if it not alredy bound */ - goto out; + /* TODO: Turn this back into a panic */ + printf("Over-release of kernel-internal importance assertions for task %p (%s), dropping %d assertion(s) but task only has %d remaining (%d external).\n", + target_task, + (target_task->bsd_info == NULL) ? "" : proc_name_address(target_task->bsd_info), + count, + target_task->task_imp_assertcnt, + target_task->task_imp_externcnt); + count = 0; } } -out: - if (target_thread != NULL) - thread_deallocate(target_thread); /* drop thread ref acquired in this routine */ - return(ret); -} -static void -set_thread_appbg(thread_t thread, int setbg,int importance) -{ - /* TBD: ensure the proc for network is fine */ - if (setbg == 0) { - restore_bgthreadpolicy_locked(thread, 0, importance); - proc_restore_task_networkbg_internal(thread->task->bsd_info, thread); - } else { - apply_bgthreadpolicy_external(thread); - proc_apply_task_networkbg_internal(thread->task->bsd_info, thread); + /* assert(target_task->task_imp_assertcnt >= target_task->task_imp_externcnt); */ + + if (target_task->task_imp_assertcnt == 0 && ret == 0) + task_update_boost_locked(target_task, FALSE); + +#if IMPORTANCE_DEBUG + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, (IMP_DROP | external))) | DBG_FUNC_END, + proc_selfpid(), audit_token_pid_from_task(target_task), target_task->task_imp_assertcnt, target_task->task_imp_externcnt, 0); + if (external == TASK_POLICY_EXTERNAL) { + DTRACE_BOOST4(drop_boost, task_t, target_task, int, audit_token_pid_from_task(target_task), int, count, int, target_task->task_imp_externcnt); } + DTRACE_BOOST4(drop_internal_boost, task_t, target_task, int, audit_token_pid_from_task(target_task), int, count, int, target_task->task_imp_assertcnt); +#endif + + return(ret); } -static void -apply_appstate_watchers(task_t task, int setbg) +/* Transfer an assertion to userspace responsibility */ +int +task_importance_externalize_assertion(task_t target_task, uint32_t count, __unused int sender_pid) { - int numwatchers = 0, i, j; - thread_watchlist_t * threadlist; - task_watch_t * twp; + assert(target_task != TASK_NULL); + assert(target_task->imp_receiver != 0); -retry: - /* if no watchers on the list return */ - if ((numwatchers = task->num_taskwatchers) == 0) - return; - - threadlist = (thread_watchlist_t *)kalloc(numwatchers*sizeof(thread_watchlist_t)); - if (threadlist == NULL) - return; + task_lock(target_task); - bzero(threadlist, numwatchers*sizeof(thread_watchlist_t)); +#if IMPORTANCE_DEBUG + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, IMP_EXTERN)) | DBG_FUNC_START, + proc_selfpid(), audit_token_pid_from_task(target_task), target_task->task_imp_assertcnt, target_task->task_imp_externcnt, 0); +#endif - task_watch_lock(); - /*serialize application of app state changes */ - if (task->watchapplying != 0) { - lck_mtx_sleep(&task_watch_mtx, LCK_SLEEP_DEFAULT, &task->watchapplying, THREAD_UNINT); - task_watch_unlock(); - kfree(threadlist, numwatchers*sizeof(thread_watchlist_t)); - goto retry; - } + /* assert(target_task->task_imp_assertcnt >= target_task->task_imp_externcnt + count); */ - if (numwatchers != task->num_taskwatchers) { - task_watch_unlock(); - kfree(threadlist, numwatchers*sizeof(thread_watchlist_t)); - goto retry; - } - - task->watchapplying = 1; - i = 0; - queue_iterate(&task->task_watchers, twp, task_watch_t *, tw_links) { - - threadlist[i].thread = twp->tw_thread; - thread_reference(threadlist[i].thread); - if (setbg != 0) { - twp->tw_importance = twp->tw_thread->importance; - threadlist[i].importance = INT_MIN; - } else - threadlist[i].importance = twp->tw_importance; - i++; - if (i > numwatchers) - break; - } - task_watch_unlock(); + target_task->task_imp_externcnt += count; - for (j = 0; j< i; j++) { - set_thread_appbg(threadlist[j].thread, setbg, threadlist[j].importance); - thread_deallocate(threadlist[j].thread); - } - kfree(threadlist, numwatchers*sizeof(thread_watchlist_t)); +#if IMPORTANCE_DEBUG + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_ASSERTION, IMP_EXTERN)) | DBG_FUNC_END, + proc_selfpid(), audit_token_pid_from_task(target_task), target_task->task_imp_assertcnt, target_task->task_imp_externcnt, 0); + DTRACE_BOOST5(receive_boost, task_t, target_task, int, audit_token_pid_from_task(target_task), + int, sender_pid, int, count, int, target_task->task_imp_externcnt); +#endif /* IMPORTANCE_DEBUG */ + task_unlock(target_task); - task_watch_lock(); - task->watchapplying = 0; - thread_wakeup_one(&task->watchapplying); - task_watch_unlock(); + return(0); } + +#endif /* IMPORTANCE_INHERITANCE */ + void -thead_remove_taskwatch(thread_t thread) +task_hold_multiple_assertion(__imp_only task_t task, __imp_only uint32_t count) { - task_watch_t * twp; - int importance = 0; +#if IMPORTANCE_INHERITANCE + assert(task->imp_receiver != 0); - task_watch_lock(); - if ((twp = thread->taskwatch) != NULL) { - thread->taskwatch = NULL; - remove_taskwatch_locked(twp->tw_task, twp); - } - task_watch_unlock(); - if (twp != NULL) { - thread_deallocate(twp->tw_thread); - task_deallocate(twp->tw_task); - importance = twp->tw_importance; - kfree(twp, sizeof(task_watch_t)); - /* remove the thread and networkbg */ - set_thread_appbg(thread, 0, importance); - } + task_importance_hold_internal_assertion(task, count); +#endif /* IMPORTANCE_INHERITANCE */ } void -task_removewatchers(task_t task) +task_add_importance_watchport(__imp_only task_t task, __imp_only __impdebug_only int pid, __imp_only mach_port_t port, int *boostp) { - int numwatchers = 0, i, j; - task_watch_t ** twplist = NULL; - task_watch_t * twp = NULL; - -retry: - if ((numwatchers = task->num_taskwatchers) == 0) - return; + int boost = 0; - twplist = (task_watch_t **)kalloc(numwatchers*sizeof(task_watch_t *)); - if (twplist == NULL) - return; + __impdebug_only int released_pid = 0; - bzero(twplist, numwatchers*sizeof(task_watch_t *)); +#if IMPORTANCE_INHERITANCE + task_t release_imp_task = TASK_NULL; - task_watch_lock(); - if (task->num_taskwatchers == 0) { - task_watch_unlock(); - goto out; + if (task->imp_receiver == 0) { + *boostp = boost; + return; } - if (numwatchers != task->num_taskwatchers) { - task_watch_unlock(); - kfree(twplist, numwatchers*sizeof(task_watch_t *)); - numwatchers = 0; - goto retry; - } - - i = 0; - while((twp = (task_watch_t *)dequeue_head(&task->task_watchers)) != NULL) - { - twplist[i] = twp; - task->num_taskwatchers--; + if (IP_VALID(port) != 0) { + ip_lock(port); - /* - * Since the linkage is removed and thead state cleanup is already set up, - * remove the refernce from the thread. + /* + * The port must have been marked tempowner already. + * This also filters out ports whose receive rights + * are already enqueued in a message, as you can't + * change the right's destination once it's already + * on its way. */ - twp->tw_thread->taskwatch = NULL; /* removed linkage, clear thread holding ref */ - i++; - if ((task->num_taskwatchers == 0) || (i > numwatchers)) - break; - } + if (port->ip_tempowner != 0) { + assert(port->ip_impdonation != 0); + + boost = port->ip_impcount; + if (port->ip_taskptr != 0) { + /* + * if this port is already bound to a task, + * release the task reference and drop any + * watchport-forwarded boosts + */ + release_imp_task = port->ip_imp_task; + } - task_watch_unlock(); + /* mark the port is watching another task */ + port->ip_taskptr = 1; + port->ip_imp_task = task; + task_reference(task); + } + ip_unlock(port); - for (j = 0; j< i; j++) { - - twp = twplist[j]; - /* remove thread and network bg */ - set_thread_appbg(twp->tw_thread, 0, twp->tw_importance); - thread_deallocate(twp->tw_thread); - task_deallocate(twp->tw_task); - kfree(twp, sizeof(task_watch_t)); + if (release_imp_task != TASK_NULL) { + if (boost > 0) + task_importance_drop_internal_assertion(release_imp_task, boost); + released_pid = audit_token_pid_from_task(release_imp_task); + task_deallocate(release_imp_task); + } +#if IMPORTANCE_DEBUG + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_WATCHPORT, 0)) | DBG_FUNC_NONE, + proc_selfpid(), pid, boost, released_pid, 0); +#endif /* IMPORTANCE_DEBUG */ } +#endif /* IMPORTANCE_INHERITANCE */ -out: - kfree(twplist, numwatchers*sizeof(task_watch_t *)); - + *boostp = boost; + return; } -#endif /* CONFIG_EMBEDDED */ -int -proc_disable_task_apptype(task_t task, int policy_subtype) -{ - void * bsdinfo = NULL; - int ret = 0; - int setbg = 0; -#if !CONFIG_EMBEDDED - int maxpri = BASEPRI_DEFAULT; -#endif /* !CONFIG_EMBEDDED */ +/* + * Routines for VM to query task importance + */ - task_lock(task); - if (task->ext_policystate.apptype != policy_subtype) { - ret = EINVAL; - goto out; +/* + * Order to be considered while estimating importance + * for low memory notification and purging purgeable memory. + */ +#define TASK_IMPORTANCE_FOREGROUND 4 +#define TASK_IMPORTANCE_NOTDARWINBG 1 + + +/* + * Checks if the task is already notified. + * + * Condition: task lock should be held while calling this function. + */ +boolean_t +task_has_been_notified(task_t task, int pressurelevel) +{ + if (task == NULL) { + return FALSE; } + + if (pressurelevel == kVMPressureWarning) + return (task->low_mem_notified_warn ? TRUE : FALSE); + else if (pressurelevel == kVMPressureCritical) + return (task->low_mem_notified_critical ? TRUE : FALSE); + else + return TRUE; +} -#if !CONFIG_EMBEDDED - switch (task->role) { - case TASK_FOREGROUND_APPLICATION: - maxpri = BASEPRI_FOREGROUND; - break; - case TASK_BACKGROUND_APPLICATION: - maxpri = BASEPRI_BACKGROUND; - break; - default: - maxpri = BASEPRI_DEFAULT; - } - - -#endif /* !CONFIG_EMBEDDED */ - - /* TAL apps are cleared with BG handling on first foreground application */ - if (task->ext_appliedstate.apptype != PROC_POLICY_OSX_APPTYPE_NONE) { - switch (task->ext_appliedstate.apptype) { -#if !CONFIG_EMBEDDED - case PROC_POLICY_OSX_APPTYPE_TAL: - /* disable foreground/background handling */ - task->ext_appliedstate.apptype = PROC_POLICY_OSX_APPTYPE_NONE; - /* external BG application removal */ - proc_restore_bgtaskpolicy_internal(task, 1, 1, maxpri); - bsdinfo = task->bsd_info; - setbg = 0; - break; - - case PROC_POLICY_OSX_APPTYPE_DBCLIENT: - /* disable foreground/background handling */ - task->ext_appliedstate.apptype = PROC_POLICY_OSX_APPTYPE_NONE; - /* internal BG application removal */ - proc_restore_bgtaskpolicy_internal(task, 1, 0, maxpri); - bsdinfo = task->bsd_info; - setbg = 0; - break; - -#endif /* !CONFIG_EMBEDDED */ - default: - ret = EINVAL; - break; - } - } else { - ret = EINVAL; +/* + * Checks if the task is used for purging. + * + * Condition: task lock should be held while calling this function. + */ +boolean_t +task_used_for_purging(task_t task, int pressurelevel) +{ + if (task == NULL) { + return FALSE; } + + if (pressurelevel == kVMPressureWarning) + return (task->purged_memory_warn ? TRUE : FALSE); + else if (pressurelevel == kVMPressureCritical) + return (task->purged_memory_critical ? TRUE : FALSE); + else + return TRUE; +} -out: - task_unlock(task); - /* if backgrounding action ... */ - if (bsdinfo != NULL) - proc_set_task_networkbg(bsdinfo, setbg); - return(ret); +/* + * Mark the task as notified with memory notification. + * + * Condition: task lock should be held while calling this function. + */ +void +task_mark_has_been_notified(task_t task, int pressurelevel) +{ + if (task == NULL) { + return; + } + + if (pressurelevel == kVMPressureWarning) + task->low_mem_notified_warn = 1; + else if (pressurelevel == kVMPressureCritical) + task->low_mem_notified_critical = 1; } -int -proc_enable_task_apptype(task_t task, int policy_subtype) + +/* + * Mark the task as purged. + * + * Condition: task lock should be held while calling this function. + */ +void +task_mark_used_for_purging(task_t task, int pressurelevel) { - void * bsdinfo = NULL; - int setbg = 0; - int ret = 0; + if (task == NULL) { + return; + } + + if (pressurelevel == kVMPressureWarning) + task->purged_memory_warn = 1; + else if (pressurelevel == kVMPressureCritical) + task->purged_memory_critical = 1; +} - task_lock(task); - if (task->ext_policystate.apptype != policy_subtype) { - ret = EINVAL; - goto out; - } - - if (task->ext_appliedstate.apptype == PROC_POLICY_OSX_APPTYPE_NONE) { - switch (task->ext_policystate.apptype) { -#if !CONFIG_EMBEDDED - case PROC_POLICY_OSX_APPTYPE_TAL: - /* TAL policy is activated again */ - task->ext_appliedstate.apptype = task->ext_policystate.apptype; - if (task->role == TASK_BACKGROUND_APPLICATION) { - if (task->role == TASK_BACKGROUND_APPLICATION) { - proc_apply_bgtaskpolicy_internal(task, 1, 1); - bsdinfo = task->bsd_info; - setbg = 1; - } - } - ret = 0; - break; -#endif /* !CONFIG_EMBEDDED */ - default: - ret = EINVAL; - } - } else - ret = EINVAL; +/* + * Mark the task eligible for low memory notification. + * + * Condition: task lock should be held while calling this function. + */ +void +task_clear_has_been_notified(task_t task, int pressurelevel) +{ + if (task == NULL) { + return; + } + + if (pressurelevel == kVMPressureWarning) + task->low_mem_notified_warn = 0; + else if (pressurelevel == kVMPressureCritical) + task->low_mem_notified_critical = 0; +} -out: - task_unlock(task); - /* if backgrounding action ... */ - if (bsdinfo != NULL) - proc_set_task_networkbg(bsdinfo, setbg); - return(ret); +/* + * Mark the task eligible for purging its purgeable memory. + * + * Condition: task lock should be held while calling this function. + */ +void +task_clear_used_for_purging(task_t task) +{ + if (task == NULL) { + return; + } + + task->purged_memory_warn = 0; + task->purged_memory_critical = 0; } -#if CONFIG_EMBEDDED + +/* + * Estimate task importance for purging its purgeable memory + * and low memory notification. + * + * Importance is calculated in the following order of criteria: + * -Task role : Background vs Foreground + * -Boost status: Not boosted vs Boosted + * -Darwin BG status. + * + * Returns: Estimated task importance. Less important task will have lower + * estimated importance. + */ int -proc_setthread_saved_importance(thread_t thread, int importance) +task_importance_estimate(task_t task) { - if ((thread->task->ext_appliedstate.apptype == PROC_POLICY_IOS_APPLE_DAEMON) && - (thread->appliedstate.hw_bg == TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL)) - { - /* the thread is still backgrounded , save the importance for restore time */ - thread->saved_importance = importance; + int task_importance = 0; - return(1); - } else - return(0); + if (task == NULL) { + return 0; + } + + if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) == TASK_FOREGROUND_APPLICATION) + task_importance += TASK_IMPORTANCE_FOREGROUND; + + if (proc_get_effective_task_policy(task, TASK_POLICY_DARWIN_BG) == 0) + task_importance += TASK_IMPORTANCE_NOTDARWINBG; + + return task_importance; } -#endif /* CONFIG_EMBEDDED */ + diff --git a/osfmk/kern/telemetry.c b/osfmk/kern/telemetry.c new file mode 100644 index 000000000..5419eb48d --- /dev/null +++ b/osfmk/kern/telemetry.c @@ -0,0 +1,1023 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include + +#define TELEMETRY_DEBUG 0 + +extern int proc_pid(void *); +extern char *proc_name_address(void *p); +extern uint64_t proc_uniqueid(void *p); +extern uint64_t proc_was_throttled(void *p); +extern uint64_t proc_did_throttle(void *p); +extern uint64_t get_dispatchqueue_serialno_offset_from_proc(void *p); +extern int proc_selfpid(void); + +void telemetry_take_sample(thread_t thread, uint8_t microsnapshot_flags); + +#define TELEMETRY_DEFAULT_SAMPLE_RATE (1) /* 1 sample every 1 second */ +#define TELEMETRY_DEFAULT_BUFFER_SIZE (16*1024) +#define TELEMETRY_MAX_BUFFER_SIZE (64*1024) + +#define TELEMETRY_DEFAULT_NOTIFY_LEEWAY (4*1024) // Userland gets 4k of leeway to collect data after notification + +uint32_t telemetry_sample_rate = 0; +volatile boolean_t telemetry_needs_record = FALSE; +volatile boolean_t telemetry_needs_timer_arming_record = FALSE; + +/* + * If TRUE, record micro-stackshot samples for all tasks. + * If FALSE, only sample tasks which are marked for telemetry. + */ +boolean_t telemetry_sample_all_tasks = FALSE; +uint32_t telemetry_active_tasks = 0; // Number of tasks opted into telemetry + +uint32_t telemetry_timestamp = 0; + +vm_offset_t telemetry_buffer = 0; +uint32_t telemetry_buffer_size = 0; +uint32_t telemetry_buffer_current_position = 0; +uint32_t telemetry_buffer_end_point = 0; // If we've wrapped, where does the last record end? +int telemetry_bytes_since_last_mark = -1; // How much data since buf was last marked? +int telemetry_buffer_notify_at = 0; + +lck_grp_t telemetry_lck_grp; +lck_mtx_t telemetry_mtx; + +#define TELEMETRY_LOCK() do { lck_mtx_lock(&telemetry_mtx); } while(0) +#define TELEMETRY_TRY_SPIN_LOCK() lck_mtx_try_lock_spin(&telemetry_mtx) +#define TELEMETRY_UNLOCK() do { lck_mtx_unlock(&telemetry_mtx); } while(0) + +void telemetry_init(void) +{ + kern_return_t ret; + uint32_t telemetry_notification_leeway; + + lck_grp_init(&telemetry_lck_grp, "telemetry group", LCK_GRP_ATTR_NULL); + lck_mtx_init(&telemetry_mtx, &telemetry_lck_grp, LCK_ATTR_NULL); + + if (!PE_parse_boot_argn("telemetry_buffer_size", &telemetry_buffer_size, sizeof(telemetry_buffer_size))) { + telemetry_buffer_size = TELEMETRY_DEFAULT_BUFFER_SIZE; + } + + if (telemetry_buffer_size > TELEMETRY_MAX_BUFFER_SIZE) + telemetry_buffer_size = TELEMETRY_MAX_BUFFER_SIZE; + + ret = kmem_alloc(kernel_map, &telemetry_buffer, telemetry_buffer_size); + if (ret != KERN_SUCCESS) { + kprintf("Telemetry: Allocation failed: %d\n", ret); + return; + } + + if (!PE_parse_boot_argn("telemetry_notification_leeway", &telemetry_notification_leeway, sizeof(telemetry_notification_leeway))) { + /* + * By default, notify the user to collect the buffer when there is this much space left in the buffer. + */ + telemetry_notification_leeway = TELEMETRY_DEFAULT_NOTIFY_LEEWAY; + } + if (telemetry_notification_leeway >= telemetry_buffer_size) { + printf("telemetry: nonsensical telemetry_notification_leeway boot-arg %d changed to %d\n", + telemetry_notification_leeway, TELEMETRY_DEFAULT_NOTIFY_LEEWAY); + telemetry_notification_leeway = TELEMETRY_DEFAULT_NOTIFY_LEEWAY; + } + telemetry_buffer_notify_at = telemetry_buffer_size - telemetry_notification_leeway; + + if (!PE_parse_boot_argn("telemetry_sample_rate", &telemetry_sample_rate, sizeof(telemetry_sample_rate))) { + telemetry_sample_rate = TELEMETRY_DEFAULT_SAMPLE_RATE; + } + + /* + * To enable telemetry for all tasks, include "telemetry_sample_all_tasks=1" in boot-args. + */ + if (!PE_parse_boot_argn("telemetry_sample_all_tasks", &telemetry_sample_all_tasks, sizeof(telemetry_sample_all_tasks))) { + + telemetry_sample_all_tasks = TRUE; + + } + + kprintf("Telemetry: Sampling %stasks once per %u second%s\n", + (telemetry_sample_all_tasks) ? "all " : "", + telemetry_sample_rate, telemetry_sample_rate == 1 ? "" : "s"); +} + +/* + * Enable or disable global microstackshots (ie telemetry_sample_all_tasks). + * + * enable_disable == 1: turn it on + * enable_disable == 0: turn it off + */ +void +telemetry_global_ctl(int enable_disable) +{ + if (enable_disable == 1) { + telemetry_sample_all_tasks = TRUE; + } else { + telemetry_sample_all_tasks = FALSE; + } +} + +/* + * Opt the given task into or out of the telemetry stream. + * + * Supported reasons (callers may use any or all of): + * TF_CPUMON_WARNING + * TF_WAKEMON_WARNING + * + * enable_disable == 1: turn it on + * enable_disable == 0: turn it off + */ +void +telemetry_task_ctl(task_t task, uint32_t reasons, int enable_disable) +{ + task_lock(task); + telemetry_task_ctl_locked(task, reasons, enable_disable); + task_unlock(task); +} + +void +telemetry_task_ctl_locked(task_t task, uint32_t reasons, int enable_disable) +{ + uint32_t origflags; + + assert((reasons != 0) && ((reasons | TF_TELEMETRY) == TF_TELEMETRY)); + + task_lock_assert_owned(task); + + origflags = task->t_flags; + + if (enable_disable == 1) { + task->t_flags |= reasons; + if ((origflags & TF_TELEMETRY) == 0) { + OSIncrementAtomic(&telemetry_active_tasks); +#if TELEMETRY_DEBUG + printf("%s: telemetry OFF -> ON (%d active)\n", proc_name_address(task->bsd_info), telemetry_active_tasks); +#endif + } + } else { + task->t_flags &= ~reasons; + if (((origflags & TF_TELEMETRY) != 0) && ((task->t_flags & TF_TELEMETRY) == 0)) { + /* + * If this task went from having at least one telemetry bit to having none, + * the net change was to disable telemetry for the task. + */ + OSDecrementAtomic(&telemetry_active_tasks); +#if TELEMETRY_DEBUG + printf("%s: telemetry ON -> OFF (%d active)\n", proc_name_address(task->bsd_info), telemetry_active_tasks); +#endif + } + } +} + +/* + * Determine if the current thread is eligible for telemetry: + * + * telemetry_sample_all_tasks: All threads are eligible. This takes precedence. + * telemetry_active_tasks: Count of tasks opted in. + * task->t_flags & TF_TELEMETRY: This task is opted in. + */ +static boolean_t +telemetry_is_active(thread_t thread) +{ + if (telemetry_sample_all_tasks == TRUE) { + return (TRUE); + } + + if ((telemetry_active_tasks > 0) && ((thread->task->t_flags & TF_TELEMETRY) != 0)) { + return (TRUE); + } + + return (FALSE); +} + +/* + * Userland is arming a timer. If we are eligible for such a record, + * sample now. No need to do this one at the AST because we're already at + * a safe place in this system call. + */ +int telemetry_timer_event(__unused uint64_t deadline, __unused uint64_t interval, __unused uint64_t leeway) +{ + if (telemetry_needs_timer_arming_record == TRUE) { + telemetry_needs_timer_arming_record = FALSE; + telemetry_take_sample(current_thread(), kTimerArmingRecord | kUserMode); + } + + return (0); +} + +/* + * Mark the current thread for an interrupt-based + * telemetry record, to be sampled at the next AST boundary. + */ +void telemetry_mark_curthread(boolean_t interrupted_userspace) +{ + thread_t thread = current_thread(); + + /* + * If telemetry isn't active for this thread, return and try + * again next time. + */ + if (telemetry_is_active(thread) == FALSE) { + return; + } + + telemetry_needs_record = FALSE; + thread_ast_set(thread, interrupted_userspace ? AST_TELEMETRY_USER : AST_TELEMETRY_KERNEL); + ast_propagate(thread->ast); +} + +void compute_telemetry(void *arg __unused) +{ + if (telemetry_sample_all_tasks || (telemetry_active_tasks > 0)) { + if ((++telemetry_timestamp) % telemetry_sample_rate == 0) { + /* + * To avoid overloading the system with telemetry ASTs, make + * sure we don't add more requests while existing ones + * are in-flight. + */ + if (TELEMETRY_TRY_SPIN_LOCK()) { + telemetry_needs_record = TRUE; + telemetry_needs_timer_arming_record = TRUE; + TELEMETRY_UNLOCK(); + } + } + } +} + +/* + * If userland has registered a port for telemetry notifications, send one now. + */ +static void +telemetry_notify_user(void) +{ + mach_port_t user_port; + uint32_t flags = 0; + int error; + + error = host_get_telemetry_port(host_priv_self(), &user_port); + if ((error != KERN_SUCCESS) || !IPC_PORT_VALID(user_port)) { + return; + } + + telemetry_notification(user_port, flags); +} + +void telemetry_ast(thread_t thread, boolean_t interrupted_userspace) +{ + uint8_t microsnapshot_flags = kInterruptRecord; + + if (interrupted_userspace) + microsnapshot_flags |= kUserMode; + + telemetry_take_sample(thread, microsnapshot_flags); +} + +void telemetry_take_sample(thread_t thread, uint8_t microsnapshot_flags) +{ + task_t task; + void *p; + struct kperf_context ctx; + struct callstack cs; + uint32_t btcount, bti; + struct micro_snapshot *msnap; + struct task_snapshot *tsnap; + struct thread_snapshot *thsnap; + clock_sec_t secs; + clock_usec_t usecs; + vm_size_t framesize; + uint32_t current_record_start; + uint32_t tmp = 0; + boolean_t notify = FALSE; + + if (thread == THREAD_NULL) + return; + + task = thread->task; + if ((task == TASK_NULL) || (task == kernel_task)) + return; + + /* telemetry_XXX accessed outside of lock for instrumentation only */ + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_STACKSHOT, MICROSTACKSHOT_RECORD) | DBG_FUNC_START, microsnapshot_flags, telemetry_bytes_since_last_mark, 0, 0, 0); + + p = get_bsdtask_info(task); + + ctx.cur_thread = thread; + ctx.cur_pid = proc_pid(p); + + /* + * Gather up the data we'll need for this sample. The sample is written into the kernel + * buffer with the global telemetry lock held -- so we must do our (possibly faulting) + * copies from userland here, before taking the lock. + */ + kperf_ucallstack_sample(&cs, &ctx); + if (!(cs.flags & CALLSTACK_VALID)) + return; + + /* + * Find the actual [slid] address of the shared cache's UUID, and copy it in from userland. + */ + int shared_cache_uuid_valid = 0; + uint64_t shared_cache_base_address; + struct _dyld_cache_header shared_cache_header; + uint64_t shared_cache_slide; + + /* + * Don't copy in the entire shared cache header; we only need the UUID. Calculate the + * offset of that one field. + */ + int sc_header_uuid_offset = (char *)&shared_cache_header.uuid - (char *)&shared_cache_header; + vm_shared_region_t sr = vm_shared_region_get(task); + if (sr != NULL) { + if ((vm_shared_region_start_address(sr, &shared_cache_base_address) == KERN_SUCCESS) && + (copyin(shared_cache_base_address + sc_header_uuid_offset, (char *)&shared_cache_header.uuid, + sizeof (shared_cache_header.uuid)) == 0)) { + shared_cache_uuid_valid = 1; + shared_cache_slide = vm_shared_region_get_slide(sr); + } + // vm_shared_region_get() gave us a reference on the shared region. + vm_shared_region_deallocate(sr); + } + + /* + * Retrieve the array of UUID's for binaries used by this task. + * We reach down into DYLD's data structures to find the array. + * + * XXX - make this common with kdp? + */ + uint32_t uuid_info_count = 0; + mach_vm_address_t uuid_info_addr = 0; + if (task_has_64BitAddr(task)) { + struct user64_dyld_all_image_infos task_image_infos; + if (copyin(task->all_image_info_addr, (char *)&task_image_infos, sizeof(task_image_infos)) == 0) { + uuid_info_count = (uint32_t)task_image_infos.uuidArrayCount; + uuid_info_addr = task_image_infos.uuidArray; + } + } else { + struct user32_dyld_all_image_infos task_image_infos; + if (copyin(task->all_image_info_addr, (char *)&task_image_infos, sizeof(task_image_infos)) == 0) { + uuid_info_count = task_image_infos.uuidArrayCount; + uuid_info_addr = task_image_infos.uuidArray; + } + } + + /* + * If we get a NULL uuid_info_addr (which can happen when we catch dyld in the middle of updating + * this data structure), we zero the uuid_info_count so that we won't even try to save load info + * for this task. + */ + if (!uuid_info_addr) { + uuid_info_count = 0; + } + + uint32_t uuid_info_size = (uint32_t)(task_has_64BitAddr(thread->task) ? sizeof(struct user64_dyld_uuid_info) : sizeof(struct user32_dyld_uuid_info)); + uint32_t uuid_info_array_size = uuid_info_count * uuid_info_size; + char *uuid_info_array = NULL; + + if (uuid_info_count > 0) { + if ((uuid_info_array = (char *)kalloc(uuid_info_array_size)) == NULL) { + return; + } + + /* + * Copy in the UUID info array. + * It may be nonresident, in which case just fix up nloadinfos to 0 in the task snapshot. + */ + if (copyin(uuid_info_addr, uuid_info_array, uuid_info_array_size) != 0) { + kfree(uuid_info_array, uuid_info_array_size); + uuid_info_array = NULL; + uuid_info_array_size = 0; + } + } + + /* + * Look for a dispatch queue serial number, and copy it in from userland if present. + */ + uint64_t dqserialnum = 0; + int dqserialnum_valid = 0; + + uint64_t dqkeyaddr = thread_dispatchqaddr(thread); + if (dqkeyaddr != 0) { + uint64_t dqaddr = 0; + uint64_t dq_serialno_offset = get_dispatchqueue_serialno_offset_from_proc(task->bsd_info); + if ((copyin(dqkeyaddr, (char *)&dqaddr, (task_has_64BitAddr(task) ? 8 : 4)) == 0) && + (dqaddr != 0) && (dq_serialno_offset != 0)) { + uint64_t dqserialnumaddr = dqaddr + dq_serialno_offset; + if (copyin(dqserialnumaddr, (char *)&dqserialnum, (task_has_64BitAddr(task) ? 8 : 4)) == 0) { + dqserialnum_valid = 1; + } + } + } + + clock_get_calendar_microtime(&secs, &usecs); + + TELEMETRY_LOCK(); + + /* + * We do the bulk of the operation under the telemetry lock, on assumption that + * any page faults during execution will not cause another AST_TELEMETRY_ALL + * to deadlock; they will just block until we finish. This makes it easier + * to copy into the buffer directly. As soon as we unlock, userspace can copy + * out of our buffer. + */ + +copytobuffer: + + current_record_start = telemetry_buffer_current_position; + + if ((telemetry_buffer_size - telemetry_buffer_current_position) < sizeof(struct micro_snapshot)) { + /* + * We can't fit a record in the space available, so wrap around to the beginning. + * Save the current position as the known end point of valid data. + */ + telemetry_buffer_end_point = current_record_start; + telemetry_buffer_current_position = 0; + goto copytobuffer; + } + + msnap = (struct micro_snapshot *)(uintptr_t)(telemetry_buffer + telemetry_buffer_current_position); + msnap->snapshot_magic = STACKSHOT_MICRO_SNAPSHOT_MAGIC; + msnap->ms_flags = microsnapshot_flags; + msnap->ms_opaque_flags = 0; /* namespace managed by userspace */ + msnap->ms_cpu = 0; /* XXX - does this field make sense for a micro-stackshot? */ + msnap->ms_time = secs; + msnap->ms_time_microsecs = usecs; + + telemetry_buffer_current_position += sizeof(struct micro_snapshot); + + if ((telemetry_buffer_size - telemetry_buffer_current_position) < sizeof(struct task_snapshot)) { + telemetry_buffer_end_point = current_record_start; + telemetry_buffer_current_position = 0; + goto copytobuffer; + } + + tsnap = (struct task_snapshot *)(uintptr_t)(telemetry_buffer + telemetry_buffer_current_position); + bzero(tsnap, sizeof(*tsnap)); + tsnap->snapshot_magic = STACKSHOT_TASK_SNAPSHOT_MAGIC; + tsnap->pid = proc_pid(p); + tsnap->uniqueid = proc_uniqueid(p); + tsnap->user_time_in_terminated_threads = task->total_user_time; + tsnap->system_time_in_terminated_threads = task->total_system_time; + tsnap->suspend_count = task->suspend_count; + tsnap->task_size = pmap_resident_count(task->map->pmap); + tsnap->faults = task->faults; + tsnap->pageins = task->pageins; + tsnap->cow_faults = task->cow_faults; + /* + * The throttling counters are maintained as 64-bit counters in the proc + * structure. However, we reserve 32-bits (each) for them in the task_snapshot + * struct to save space and since we do not expect them to overflow 32-bits. If we + * find these values overflowing in the future, the fix would be to simply + * upgrade these counters to 64-bit in the task_snapshot struct + */ + tsnap->was_throttled = (uint32_t) proc_was_throttled(p); + tsnap->did_throttle = (uint32_t) proc_did_throttle(p); + + if (task->t_flags & TF_TELEMETRY) { + tsnap->ss_flags |= kTaskRsrcFlagged; + } + + proc_get_darwinbgstate(task, &tmp); + + if (tmp & PROC_FLAG_DARWINBG) { + tsnap->ss_flags |= kTaskDarwinBG; + } + if (tmp & PROC_FLAG_EXT_DARWINBG) { + tsnap->ss_flags |= kTaskExtDarwinBG; + } + + if (task->requested_policy.t_role == TASK_FOREGROUND_APPLICATION) { + tsnap->ss_flags |= kTaskIsForeground; + } + + if (tmp & PROC_FLAG_ADAPTIVE_IMPORTANT) { + tsnap->ss_flags |= kTaskIsBoosted; + } + + if (tmp & PROC_FLAG_SUPPRESSED) { + tsnap->ss_flags |= kTaskIsSuppressed; + } + + tsnap->latency_qos = task_grab_latency_qos(task); + + strlcpy(tsnap->p_comm, proc_name_address(p), sizeof(tsnap->p_comm)); + if (task_has_64BitAddr(thread->task)) { + tsnap->ss_flags |= kUser64_p; + } + + if (shared_cache_uuid_valid) { + tsnap->shared_cache_slide = shared_cache_slide; + bcopy(shared_cache_header.uuid, tsnap->shared_cache_identifier, sizeof (shared_cache_header.uuid)); + } + + telemetry_buffer_current_position += sizeof(struct task_snapshot); + + /* + * Directly after the task snapshot, place the array of UUID's corresponding to the binaries + * used by this task. + */ + if ((telemetry_buffer_size - telemetry_buffer_current_position) < uuid_info_array_size) { + telemetry_buffer_end_point = current_record_start; + telemetry_buffer_current_position = 0; + goto copytobuffer; + } + + /* + * Copy the UUID info array into our sample. + */ + if (uuid_info_array_size > 0) { + bcopy(uuid_info_array, (char *)(telemetry_buffer + telemetry_buffer_current_position), uuid_info_array_size); + tsnap->nloadinfos = uuid_info_count; + } + + telemetry_buffer_current_position += uuid_info_array_size; + + /* + * After the task snapshot & list of binary UUIDs, we place a thread snapshot. + */ + + if ((telemetry_buffer_size - telemetry_buffer_current_position) < sizeof(struct thread_snapshot)) { + /* wrap and overwrite */ + telemetry_buffer_end_point = current_record_start; + telemetry_buffer_current_position = 0; + goto copytobuffer; + } + + thsnap = (struct thread_snapshot *)(uintptr_t)(telemetry_buffer + telemetry_buffer_current_position); + bzero(thsnap, sizeof(*thsnap)); + + thsnap->snapshot_magic = STACKSHOT_THREAD_SNAPSHOT_MAGIC; + thsnap->thread_id = thread_tid(thread); + thsnap->state = thread->state; + thsnap->priority = thread->priority; + thsnap->sched_pri = thread->sched_pri; + thsnap->sched_flags = thread->sched_flags; + thsnap->ss_flags |= kStacksPCOnly; + + if (thread->effective_policy.darwinbg) { + thsnap->ss_flags |= kThreadDarwinBG; + } + + thsnap->user_time = timer_grab(&thread->user_timer); + + uint64_t tval = timer_grab(&thread->system_timer); + + if (thread->precise_user_kernel_time) { + thsnap->system_time = tval; + } else { + thsnap->user_time += tval; + thsnap->system_time = 0; + } + + telemetry_buffer_current_position += sizeof(struct thread_snapshot); + + /* + * If this thread has a dispatch queue serial number, include it here. + */ + if (dqserialnum_valid) { + if ((telemetry_buffer_size - telemetry_buffer_current_position) < sizeof(dqserialnum)) { + /* wrap and overwrite */ + telemetry_buffer_end_point = current_record_start; + telemetry_buffer_current_position = 0; + goto copytobuffer; + } + + thsnap->ss_flags |= kHasDispatchSerial; + bcopy(&dqserialnum, (char *)telemetry_buffer + telemetry_buffer_current_position, sizeof (dqserialnum)); + telemetry_buffer_current_position += sizeof (dqserialnum); + } + + if (task_has_64BitAddr(task)) { + framesize = 8; + thsnap->ss_flags |= kUser64_p; + } else { + framesize = 4; + } + + btcount = cs.nframes; + + /* + * If we can't fit this entire stacktrace then cancel this record, wrap to the beginning, + * and start again there so that we always store a full record. + */ + if ((telemetry_buffer_size - telemetry_buffer_current_position)/framesize < btcount) { + telemetry_buffer_end_point = current_record_start; + telemetry_buffer_current_position = 0; + goto copytobuffer; + } + + for (bti=0; bti < btcount; bti++, telemetry_buffer_current_position += framesize) { + if (framesize == 8) { + *(uint64_t *)(uintptr_t)(telemetry_buffer + telemetry_buffer_current_position) = cs.frames[bti]; + } else { + *(uint32_t *)(uintptr_t)(telemetry_buffer + telemetry_buffer_current_position) = (uint32_t)cs.frames[bti]; + } + } + + if (telemetry_buffer_end_point < telemetry_buffer_current_position) { + /* + * Each time the cursor wraps around to the beginning, we leave a + * differing amount of unused space at the end of the buffer. Make + * sure the cursor pushes the end point in case we're making use of + * more of the buffer than we did the last time we wrapped. + */ + telemetry_buffer_end_point = telemetry_buffer_current_position; + } + + thsnap->nuser_frames = btcount; + + telemetry_bytes_since_last_mark += (telemetry_buffer_current_position - current_record_start); + if (telemetry_bytes_since_last_mark > telemetry_buffer_notify_at) { + notify = TRUE; + } + + TELEMETRY_UNLOCK(); + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_STACKSHOT, MICROSTACKSHOT_RECORD) | DBG_FUNC_END, notify, telemetry_bytes_since_last_mark, telemetry_buffer_current_position, telemetry_buffer_end_point, 0); + + if (notify) { + telemetry_notify_user(); + } + + if (uuid_info_array != NULL) { + kfree(uuid_info_array, uuid_info_array_size); + } +} + +#if TELEMETRY_DEBUG +static void +log_telemetry_output(vm_offset_t buf, uint32_t pos, uint32_t sz) +{ + struct micro_snapshot *p; + uint32_t offset; + + printf("Copying out %d bytes of telemetry at offset %d\n", sz, pos); + + buf += pos; + + /* + * Find and log each timestamp in this chunk of buffer. + */ + for (offset = 0; offset < sz; offset++) { + p = (struct micro_snapshot *)(buf + offset); + if (p->snapshot_magic == STACKSHOT_MICRO_SNAPSHOT_MAGIC) { + printf("telemetry timestamp: %lld\n", p->ms_time); + } + } +} +#endif + +int telemetry_gather(user_addr_t buffer, uint32_t *length, boolean_t mark) +{ + int result = 0; + uint32_t oldest_record_offset; + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_STACKSHOT, MICROSTACKSHOT_GATHER) | DBG_FUNC_START, mark, telemetry_bytes_since_last_mark, 0, 0, 0); + + TELEMETRY_LOCK(); + + if (telemetry_buffer == 0) { + *length = 0; + goto out; + } + + if (*length < telemetry_buffer_size) { + result = KERN_NO_SPACE; + goto out; + } + + /* + * Copy the ring buffer out to userland in order sorted by time: least recent to most recent. + * First, we need to search forward from the cursor to find the oldest record in our buffer. + */ + oldest_record_offset = telemetry_buffer_current_position; + do { + if ((oldest_record_offset == telemetry_buffer_size) || + (oldest_record_offset == telemetry_buffer_end_point)) { + + if (*(uint32_t *)(uintptr_t)(telemetry_buffer) == 0) { + /* + * There is no magic number at the start of the buffer, which means + * it's empty; nothing to see here yet. + */ + *length = 0; + goto out; + } + /* + * We've looked through the end of the active buffer without finding a valid + * record; that means all valid records are in a single chunk, beginning at + * the very start of the buffer. + */ + + oldest_record_offset = 0; + assert(*(uint32_t *)(uintptr_t)(telemetry_buffer) == STACKSHOT_MICRO_SNAPSHOT_MAGIC); + break; + } + + if (*(uint32_t *)(uintptr_t)(telemetry_buffer + oldest_record_offset) == STACKSHOT_MICRO_SNAPSHOT_MAGIC) + break; + + /* + * There are no alignment guarantees for micro-stackshot records, so we must search at each + * byte offset. + */ + oldest_record_offset++; + } while (oldest_record_offset != telemetry_buffer_current_position); + + /* + * If needed, copyout in two chunks: from the oldest record to the end of the buffer, and then + * from the beginning of the buffer up to the current position. + */ + if (oldest_record_offset != 0) { +#if TELEMETRY_DEBUG + log_telemetry_output(telemetry_buffer, oldest_record_offset, + telemetry_buffer_end_point - oldest_record_offset); +#endif + if ((result = copyout((void *)(telemetry_buffer + oldest_record_offset), buffer, + telemetry_buffer_end_point - oldest_record_offset)) != 0) { + *length = 0; + goto out; + } + *length = telemetry_buffer_end_point - oldest_record_offset; + } else { + *length = 0; + } + +#if TELEMETRY_DEBUG + log_telemetry_output(telemetry_buffer, 0, telemetry_buffer_current_position); +#endif + if ((result = copyout((void *)telemetry_buffer, buffer + *length, + telemetry_buffer_current_position)) != 0) { + *length = 0; + goto out; + } + *length += (uint32_t)telemetry_buffer_current_position; + +out: + + if (mark && (*length > 0)) { + telemetry_bytes_since_last_mark = 0; + } + + TELEMETRY_UNLOCK(); + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_STACKSHOT, MICROSTACKSHOT_GATHER) | DBG_FUNC_END, telemetry_buffer_current_position, *length, telemetry_buffer_end_point, 0, 0); + + return (result); +} + +/************************/ +/* BOOT PROFILE SUPPORT */ +/************************/ +/* + * Boot Profiling + * + * The boot-profiling support is a mechanism to sample activity happening on the + * system during boot. This mechanism sets up a periodic timer and on every timer fire, + * captures a full backtrace into the boot profiling buffer. This buffer can be pulled + * out and analyzed from user-space. It is turned on using the following boot-args: + * "bootprofile_buffer_size" specifies the size of the boot profile buffer + * "bootprofile_interval_ms" specifies the interval for the profiling timer + * + * Process Specific Boot Profiling + * + * The boot-arg "bootprofile_proc_name" can be used to specify a certain + * process that needs to profiled during boot. Setting this boot-arg changes + * the way stackshots are captured. At every timer fire, the code looks at the + * currently running process and takes a stackshot only if the requested process + * is on-core (which makes it unsuitable for MP systems). + * + */ + +#define BOOTPROFILE_MAX_BUFFER_SIZE (64*1024*1024) /* see also COPYSIZELIMIT_PANIC */ + +vm_offset_t bootprofile_buffer = 0; +uint32_t bootprofile_buffer_size = 0; +uint32_t bootprofile_buffer_current_position = 0; +uint32_t bootprofile_interval_ms = 0; +uint64_t bootprofile_interval_abs = 0; +uint64_t bootprofile_next_deadline = 0; +uint32_t bootprofile_all_procs = 0; +char bootprofile_proc_name[17]; + +lck_grp_t bootprofile_lck_grp; +lck_mtx_t bootprofile_mtx; + +static timer_call_data_t bootprofile_timer_call_entry; + +#define BOOTPROFILE_LOCK() do { lck_mtx_lock(&bootprofile_mtx); } while(0) +#define BOOTPROFILE_TRY_SPIN_LOCK() lck_mtx_try_lock_spin(&bootprofile_mtx) +#define BOOTPROFILE_UNLOCK() do { lck_mtx_unlock(&bootprofile_mtx); } while(0) + +static void bootprofile_timer_call( + timer_call_param_t param0, + timer_call_param_t param1); + +extern int +stack_snapshot_from_kernel(int pid, void *buf, uint32_t size, uint32_t flags, unsigned *retbytes); + +void bootprofile_init(void) +{ + kern_return_t ret; + + lck_grp_init(&bootprofile_lck_grp, "bootprofile group", LCK_GRP_ATTR_NULL); + lck_mtx_init(&bootprofile_mtx, &bootprofile_lck_grp, LCK_ATTR_NULL); + + if (!PE_parse_boot_argn("bootprofile_buffer_size", &bootprofile_buffer_size, sizeof(bootprofile_buffer_size))) { + bootprofile_buffer_size = 0; + } + + if (bootprofile_buffer_size > BOOTPROFILE_MAX_BUFFER_SIZE) + bootprofile_buffer_size = BOOTPROFILE_MAX_BUFFER_SIZE; + + if (!PE_parse_boot_argn("bootprofile_interval_ms", &bootprofile_interval_ms, sizeof(bootprofile_interval_ms))) { + bootprofile_interval_ms = 0; + } + + if (!PE_parse_boot_argn("bootprofile_proc_name", &bootprofile_proc_name, sizeof(bootprofile_proc_name))) { + bootprofile_all_procs = 1; + bootprofile_proc_name[0] = '\0'; + } + + clock_interval_to_absolutetime_interval(bootprofile_interval_ms, NSEC_PER_MSEC, &bootprofile_interval_abs); + + /* Both boot args must be set to enable */ + if ((bootprofile_buffer_size == 0) || (bootprofile_interval_abs == 0)) { + return; + } + + ret = kmem_alloc(kernel_map, &bootprofile_buffer, bootprofile_buffer_size); + if (ret != KERN_SUCCESS) { + kprintf("Boot profile: Allocation failed: %d\n", ret); + return; + } + + kprintf("Boot profile: Sampling %s once per %u ms\n", bootprofile_all_procs ? "all procs" : bootprofile_proc_name, bootprofile_interval_ms); + + timer_call_setup(&bootprofile_timer_call_entry, + bootprofile_timer_call, + NULL); + + bootprofile_next_deadline = mach_absolute_time() + bootprofile_interval_abs; + timer_call_enter_with_leeway(&bootprofile_timer_call_entry, + NULL, + bootprofile_next_deadline, + 0, + TIMER_CALL_SYS_NORMAL, + FALSE); +} + +static void bootprofile_timer_call( + timer_call_param_t param0 __unused, + timer_call_param_t param1 __unused) +{ + unsigned retbytes = 0; + int pid_to_profile = -1; + + if (!BOOTPROFILE_TRY_SPIN_LOCK()) { + goto reprogram; + } + + /* Check if process-specific boot profiling is turned on */ + if (!bootprofile_all_procs) { + /* + * Since boot profiling initializes really early in boot, it is + * possible that at this point, the task/proc is not initialized. + * Nothing to do in that case. + */ + + if ((current_task() != NULL) && (current_task()->bsd_info != NULL) && + (0 == strncmp(bootprofile_proc_name, proc_name_address(current_task()->bsd_info), 17))) { + pid_to_profile = proc_selfpid(); + } + else { + /* + * Process-specific boot profiling requested but the on-core process is + * something else. Nothing to do here. + */ + BOOTPROFILE_UNLOCK(); + goto reprogram; + } + } + + /* initiate a stackshot with whatever portion of the buffer is left */ + if (bootprofile_buffer_current_position < bootprofile_buffer_size) { + stack_snapshot_from_kernel( + pid_to_profile, + (void *)(bootprofile_buffer + bootprofile_buffer_current_position), + bootprofile_buffer_size - bootprofile_buffer_current_position, + STACKSHOT_SAVE_LOADINFO | STACKSHOT_SAVE_KEXT_LOADINFO | STACKSHOT_GET_GLOBAL_MEM_STATS, + &retbytes + ); + + bootprofile_buffer_current_position += retbytes; + } + + BOOTPROFILE_UNLOCK(); + + /* If we didn't get any data or have run out of buffer space, stop profiling */ + if ((retbytes == 0) || (bootprofile_buffer_current_position == bootprofile_buffer_size)) { + return; + } + + +reprogram: + /* If the user gathered the buffer, no need to keep profiling */ + if (bootprofile_interval_abs == 0) { + return; + } + + clock_deadline_for_periodic_event(bootprofile_interval_abs, + mach_absolute_time(), + &bootprofile_next_deadline); + timer_call_enter_with_leeway(&bootprofile_timer_call_entry, + NULL, + bootprofile_next_deadline, + 0, + TIMER_CALL_SYS_NORMAL, + FALSE); +} + +int bootprofile_gather(user_addr_t buffer, uint32_t *length) +{ + int result = 0; + + BOOTPROFILE_LOCK(); + + if (bootprofile_buffer == 0) { + *length = 0; + goto out; + } + + if (*length < bootprofile_buffer_current_position) { + result = KERN_NO_SPACE; + goto out; + } + + if ((result = copyout((void *)bootprofile_buffer, buffer, + bootprofile_buffer_current_position)) != 0) { + *length = 0; + goto out; + } + *length = bootprofile_buffer_current_position; + + /* cancel future timers */ + bootprofile_interval_abs = 0; + +out: + + BOOTPROFILE_UNLOCK(); + + return (result); +} diff --git a/osfmk/kern/telemetry.h b/osfmk/kern/telemetry.h new file mode 100644 index 000000000..fe5cc6ca0 --- /dev/null +++ b/osfmk/kern/telemetry.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifndef _KERNEL_TELEMETRY_H_ +#define _KERNEL_TELEMETRY_H_ + +#include +#include +#include + +extern volatile boolean_t telemetry_needs_record; + +extern void telemetry_init(void); + +extern void compute_telemetry(void *); + +extern void telemetry_ast(thread_t, boolean_t interrupted_userspace); + +extern int telemetry_gather(user_addr_t buffer, uint32_t *length, boolean_t mark); + +extern void telemetry_mark_curthread(boolean_t interrupted_userspace); + +extern void telemetry_task_ctl(task_t task, uint32_t reason, int enable_disable); +extern void telemetry_task_ctl_locked(task_t task, uint32_t reason, int enable_disable); +extern void telemetry_global_ctl(int enable_disable); + +extern int telemetry_timer_event(uint64_t deadline, uint64_t interval, uint64_t leeway); + +#define TELEMETRY_CMD_TIMER_EVENT 1 + +extern void bootprofile_init(void); +extern int bootprofile_gather(user_addr_t buffer, uint32_t *length); + +#endif /* _KERNEL_TELEMETRY_H_ */ diff --git a/osfmk/kern/template.mk b/osfmk/kern/template.mk deleted file mode 100644 index 8d40fb3bd..000000000 --- a/osfmk/kern/template.mk +++ /dev/null @@ -1,68 +0,0 @@ -# -# @OSF_FREE_COPYRIGHT@ -# -# -# HISTORY -# -# Revision 1.1.1.1 1998/09/22 21:05:32 wsanchez -# Import of Mac OS X kernel (~semeria) -# -# Revision 1.1.1.1 1998/03/07 02:25:56 wsanchez -# Import of OSF Mach kernel (~mburg) -# -# Revision 1.1.4.1 1995/02/23 17:32:24 alanl -# Taken from DIPC2_SHARED. Add -X to MIG ala norma/template.mk -# [1995/02/22 20:46:31 alanl] -# -# Revision 1.1.2.1 1994/08/04 02:26:22 mmp -# Initial revision: NORMA_TASK split out from NORMA_INTERNAL and -# moved here from norma/template.mk. -# [1994/08/03 20:29:11 mmp] -# -# $EndLog$ - -VPATH = ..:../.. - -MIGFLAGS = -MD ${IDENT} -X -MIGKSFLAGS = -DKERNEL_SERVER -MIGKUFLAGS = -DKERNEL_USER - -NORMA_TASK_FILES = \ - norma_task_server.h \ - norma_task_server.c - -NORMA_TASK_USER_FILES = \ - norma_task.h \ - norma_task_user.c - -OTHERS = ${NORMA_TASK_FILES} ${NORMA_TASK_USER_FILES} - -INCFLAGS = -I.. -I../.. -MDINCFLAGS = -I.. -I../.. - -DEPENDENCIES = - -.include <${RULES_MK}> - -.ORDER: ${NORMA_TASK_FILES} - -${NORMA_TASK_FILES}: kern/norma_task.defs - ${_MIG_} ${_MIGFLAGS_} ${MIGKSFLAGS} \ - -header /dev/null \ - -user /dev/null \ - -sheader norma_task_server.h \ - -server norma_task_server.c \ - ${kern/norma_task.defs:P} - -.ORDER: ${NORMA_TASK_USER_FILES} - -${NORMA_TASK_USER_FILES}: kern/norma_task.defs - ${_MIG_} ${_MIGFLAGS_} ${MIGKUFLAGS} \ - -header norma_task.h \ - -user norma_task_user.c \ - -server /dev/null \ - ${kern/norma_task.defs:P} - -.if exists(depend.mk) -.include "depend.mk" -.endif diff --git a/osfmk/kern/thread.c b/osfmk/kern/thread.c index 92840195a..a8811ca94 100644 --- a/osfmk/kern/thread.c +++ b/osfmk/kern/thread.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -115,6 +115,11 @@ #include #include #include +#include +#include +#if KPC +#include +#endif #include #include @@ -154,8 +159,11 @@ static void sched_call_null( #ifdef MACH_BSD extern void proc_exit(void *); extern uint64_t get_dispatchqueue_offset_from_proc(void *); +extern int proc_selfpid(void); +extern char * proc_name_address(void *p); #endif /* MACH_BSD */ +extern int disable_exc_resource; extern int debug_task; int thread_max = CONFIG_THREAD_MAX; /* Max number of threads */ int task_threadmax = CONFIG_THREAD_MAX; @@ -165,6 +173,23 @@ static uint64_t thread_unique_id = 0; struct _thread_ledger_indices thread_ledgers = { -1 }; static ledger_template_t thread_ledger_template = NULL; void init_thread_ledgers(void); +int task_disable_cpumon(task_t task); + +/* + * Level (in terms of percentage of the limit) at which the CPU usage monitor triggers telemetry. + * + * (ie when any thread's CPU consumption exceeds 70% of the limit, start taking user + * stacktraces, aka micro-stackshots) + */ +#define CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT 70 + +int cpumon_ustackshots_trigger_pct; /* Percentage. Level at which we start gathering telemetry. */ +void __attribute__((noinline)) THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE(void); + +/* + * The smallest interval over which we support limiting CPU consumption is 1ms + */ +#define MINIMUM_CPULIMIT_INTERVAL_MS 1 void thread_bootstrap(void) @@ -202,6 +227,7 @@ thread_bootstrap(void) thread_template.pending_promoter_index = 0; thread_template.pending_promoter[0] = thread_template.pending_promoter[1] = NULL; + thread_template.rwlock_count = 0; thread_template.realtime.deadline = UINT64_MAX; @@ -253,6 +279,10 @@ thread_bootstrap(void) thread_template.t_dtrace_tracing = 0; #endif /* CONFIG_DTRACE */ +#if KPC + thread_template.kpc_buf = NULL; +#endif + thread_template.t_chud = 0; thread_template.t_page_creation_count = 0; thread_template.t_page_creation_time = 0; @@ -265,14 +295,18 @@ thread_bootstrap(void) thread_template.t_ledger = LEDGER_NULL; thread_template.t_threadledger = LEDGER_NULL; - thread_template.appliedstate = default_task_null_policy; - thread_template.ext_appliedstate = default_task_null_policy; - thread_template.policystate = default_task_proc_policy; - thread_template.ext_policystate = default_task_proc_policy; -#if CONFIG_EMBEDDED - thread_template.taskwatch = NULL; - thread_template.saved_importance = 0; -#endif /* CONFIG_EMBEDDED */ + thread_template.requested_policy = default_task_requested_policy; + thread_template.effective_policy = default_task_effective_policy; + thread_template.pended_policy = default_task_pended_policy; + + thread_template.iotier_override = THROTTLE_LEVEL_NONE; + + thread_template.thread_callout_interrupt_wakeups = thread_template.thread_callout_platform_idle_wakeups = 0; + + thread_template.thread_timer_wakeups_bin_1 = thread_template.thread_timer_wakeups_bin_2 = 0; + thread_template.callout_woken_from_icontext = thread_template.callout_woken_from_platform_idle = 0; + + thread_template.thread_tag = 0; init_thread = thread_template; machine_set_current_thread(&init_thread); @@ -299,6 +333,11 @@ thread_init(void) */ machine_thread_init(); + if (!PE_parse_boot_argn("cpumon_ustackshots_trigger_pct", &cpumon_ustackshots_trigger_pct, + sizeof (cpumon_ustackshots_trigger_pct))) { + cpumon_ustackshots_trigger_pct = CPUMON_USTACKSHOTS_TRIGGER_DEFAULT_PCT; + } + init_thread_ledgers(); } @@ -327,8 +366,6 @@ thread_terminate_self(void) thread_mtx_lock(thread); - ulock_release_all(thread); - ipc_thread_disable(thread); thread_mtx_unlock(thread); @@ -364,9 +401,6 @@ thread_terminate_self(void) thread_policy_reset(thread); -#if CONFIG_EMBEDDED - thead_remove_taskwatch(thread); -#endif /* CONFIG_EMBEDDED */ task = thread->task; uthread_cleanup(task, thread->uthread, task->bsd_info); @@ -419,6 +453,7 @@ thread_terminate_self(void) thread->state |= TH_TERMINATE; thread_mark_wait_locked(thread, THREAD_UNINT); assert(thread->promotions == 0); + assert(thread->rwlock_count == 0); thread_unlock(thread); /* splsched */ @@ -438,6 +473,13 @@ thread_deallocate( if (thread_deallocate_internal(thread) > 0) return; + if(!(thread->state & TH_TERMINATE2)) + panic("thread_deallocate: thread not properly terminated\n"); + +#if KPC + kpc_thread_destroy(thread); +#endif + ipc_thread_terminate(thread); @@ -568,25 +610,29 @@ static void thread_stack_daemon(void) { thread_t thread; + spl_t s; + s = splsched(); simple_lock(&thread_stack_lock); while ((thread = (thread_t)dequeue_head(&thread_stack_queue)) != THREAD_NULL) { simple_unlock(&thread_stack_lock); + splx(s); + /* allocate stack with interrupts enabled so that we can call into VM */ stack_alloc(thread); - (void)splsched(); + s = splsched(); thread_lock(thread); thread_setrun(thread, SCHED_PREEMPT | SCHED_TAILQ); thread_unlock(thread); - (void)spllo(); simple_lock(&thread_stack_lock); } assert_wait((event_t)&thread_stack_queue, THREAD_UNINT); simple_unlock(&thread_stack_lock); + splx(s); thread_block((thread_continue_t)thread_stack_daemon); /*NOTREACHED*/ @@ -697,7 +743,6 @@ thread_create_internal( lck_mtx_init(&new_thread->mutex, &thread_lck_grp, &thread_lck_attr); ipc_thread_init(new_thread); - queue_init(&new_thread->held_ulocks); new_thread->continuation = continuation; @@ -775,6 +820,13 @@ thread_create_internal( new_thread->t_chud = (TASK_PMC_FLAG == (parent_task->t_chud & TASK_PMC_FLAG)) ? THREAD_PMC_FLAG : 0U; #endif +#if KPC + kpc_thread_create(new_thread); +#endif + + /* Only need to update policies pushed from task to thread */ + new_thread->requested_policy.bg_iotier = parent_task->effective_policy.bg_iotier; + new_thread->requested_policy.terminated = parent_task->effective_policy.terminated; /* Set the thread's scheduling parameters */ new_thread->sched_mode = SCHED(initial_thread_sched_mode)(parent_task); @@ -784,27 +836,8 @@ thread_create_internal( new_thread->priority = (priority < 0)? parent_task->priority: priority; if (new_thread->priority > new_thread->max_priority) new_thread->priority = new_thread->max_priority; -#if CONFIG_EMBEDDED - if (new_thread->priority < MAXPRI_THROTTLE) { - new_thread->priority = MAXPRI_THROTTLE; - } -#endif /* CONFIG_EMBEDDED */ - new_thread->importance = - new_thread->priority - new_thread->task_priority; -#if CONFIG_EMBEDDED + new_thread->importance = new_thread->priority - new_thread->task_priority; new_thread->saved_importance = new_thread->importance; - /* apple ios daemon starts all threads in darwin background */ - if (parent_task->ext_appliedstate.apptype == PROC_POLICY_IOS_APPLE_DAEMON) { - /* Cannot use generic routines here so apply darwin bacground directly */ - new_thread->policystate.hw_bg = TASK_POLICY_BACKGROUND_ATTRIBUTE_ALL; - /* set thread self backgrounding */ - new_thread->appliedstate.hw_bg = new_thread->policystate.hw_bg; - /* priority will get recomputed suitably bit later */ - new_thread->importance = INT_MIN; - /* to avoid changes to many pri compute routines, set the effect of those here */ - new_thread->priority = MAXPRI_THROTTLE; - } -#endif /* CONFIG_EMBEDDED */ #if defined(CONFIG_SCHED_TRADITIONAL) new_thread->sched_stamp = sched_tick; @@ -1027,9 +1060,6 @@ kernel_thread_create( stack_alloc(thread); assert(thread->kernel_stack != 0); -#if CONFIG_EMBEDDED - if (priority > BASEPRI_KERNEL) -#endif thread->reserved_stack = thread->kernel_stack; thread->parameter = parameter; @@ -1073,29 +1103,6 @@ kernel_thread_start( return kernel_thread_start_priority(continuation, parameter, -1, new_thread); } -#if defined(__i386__) - -thread_t -kernel_thread( - task_t task, - void (*start)(void)) -{ - kern_return_t result; - thread_t thread; - - if (task != kernel_task) - panic("kernel_thread"); - - result = kernel_thread_start_priority((thread_continue_t)start, NULL, -1, &thread); - if (result != KERN_SUCCESS) - return (THREAD_NULL); - - thread_deallocate(thread); - - return (thread); -} - -#endif /* defined(__i386__) */ kern_return_t thread_info_internal( @@ -1155,7 +1162,7 @@ thread_info_internal( POLICY_TIMESHARE: POLICY_RR); flags = 0; - if (thread->bound_processor != PROCESSOR_NULL && thread->bound_processor->idle_thread == thread) + if (thread->options & TH_OPT_IDLE_THREAD) flags |= TH_FLAGS_IDLE; if (!thread->kernel_stack) @@ -1431,24 +1438,179 @@ thread_wire( return (thread_wire_internal(host_priv, thread, wired, NULL)); } + +/* + * XXX assuming current thread only, for now... + */ +void +thread_guard_violation(thread_t thread, unsigned type) +{ + assert(thread == current_thread()); + + spl_t s = splsched(); + /* + * Use the saved state area of the thread structure + * to store all info required to handle the AST when + * returning to userspace + */ + thread->guard_exc_info.type = type; + thread_ast_set(thread, AST_GUARD); + ast_propagate(thread->ast); + + splx(s); +} + +/* + * guard_ast: + * + * Handle AST_GUARD for a thread. This routine looks at the + * state saved in the thread structure to determine the cause + * of this exception. Based on this value, it invokes the + * appropriate routine which determines other exception related + * info and raises the exception. + */ +void +guard_ast(thread_t thread) +{ + if (thread->guard_exc_info.type == GUARD_TYPE_MACH_PORT) + mach_port_guard_ast(thread); + else + fd_guard_ast(thread); +} + static void -thread_resource_exception(const void *arg0, __unused const void *arg1) +thread_cputime_callback(int warning, __unused const void *arg0, __unused const void *arg1) { - thread_t thread = current_thread(); - int code = (int)((uintptr_t)arg0 & ((int)-1)); - + if (warning == LEDGER_WARNING_ROSE_ABOVE) { +#if CONFIG_TELEMETRY + /* + * This thread is in danger of violating the CPU usage monitor. Enable telemetry + * on the entire task so there are micro-stackshots available if and when + * EXC_RESOURCE is triggered. We could have chosen to enable micro-stackshots + * for this thread only; but now that this task is suspect, knowing what all of + * its threads are up to will be useful. + */ + telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 1); +#endif + return; + } + +#if CONFIG_TELEMETRY + /* + * If the balance has dipped below the warning level (LEDGER_WARNING_DIPPED_BELOW) or + * exceeded the limit, turn telemetry off for the task. + */ + telemetry_task_ctl(current_task(), TF_CPUMON_WARNING, 0); +#endif + + if (warning == 0) { + THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE(); + } +} + +void __attribute__((noinline)) +THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE(void) +{ + int pid = 0; + task_t task = current_task(); + thread_t thread = current_thread(); + uint64_t tid = thread->thread_id; + char *procname = (char *) "unknown"; + time_value_t thread_total_time = {0, 0}; + time_value_t thread_system_time; + time_value_t thread_user_time; + int action; + uint8_t percentage; + uint32_t limit_percent; + uint32_t usage_percent; + uint32_t interval_sec; + uint64_t interval_ns; + uint64_t balance_ns; + boolean_t fatal = FALSE; + + mach_exception_data_type_t code[EXCEPTION_CODE_MAX]; + struct ledger_entry_info lei; + assert(thread->t_threadledger != LEDGER_NULL); /* - * Disable the exception notification so we don't overwhelm - * the listener with an endless stream of redundant exceptions. + * Now that a thread has tripped the monitor, disable it for the entire task. */ - ledger_set_action(thread->t_threadledger, thread_ledgers.cpu_time, - LEDGER_ACTION_IGNORE); - ledger_disable_callback(thread->t_threadledger, thread_ledgers.cpu_time); + task_lock(task); + if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) == 0) { + /* + * The CPU usage monitor has been disabled on our task, so some other + * thread must have gotten here first. We only send one exception per + * task lifetime, so there's nothing left for us to do here. + */ + task_unlock(task); + return; + } + if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_FATAL_CPUMON) { + fatal = TRUE; + } + task_disable_cpumon(task); + task_unlock(task); + +#ifdef MACH_BSD + pid = proc_selfpid(); + if (task->bsd_info != NULL) + procname = proc_name_address(task->bsd_info); +#endif + + thread_get_cpulimit(&action, &percentage, &interval_ns); + + interval_sec = (uint32_t)(interval_ns / NSEC_PER_SEC); + + thread_read_times(thread, &thread_user_time, &thread_system_time); + time_value_add(&thread_total_time, &thread_user_time); + time_value_add(&thread_total_time, &thread_system_time); + + ledger_get_entry_info(thread->t_threadledger, thread_ledgers.cpu_time, &lei); - /* XXX code should eventually be a user-exported namespace of resources */ - (void) task_exception_notify(EXC_RESOURCE, code, 0); + absolutetime_to_nanoseconds(lei.lei_balance, &balance_ns); + usage_percent = (uint32_t) ((balance_ns * 100ULL) / lei.lei_last_refill); + + /* Show refill period in the same units as balance, limit, etc */ + nanoseconds_to_absolutetime(lei.lei_refill_period, &lei.lei_refill_period); + + limit_percent = (uint32_t) ((lei.lei_limit * 100ULL) / lei.lei_refill_period); + + /* TODO: show task total runtime as well? see TASK_ABSOLUTETIME_INFO */ + + if (disable_exc_resource) { + printf("process %s[%d] thread %llu caught burning CPU!; EXC_RESOURCE " + "supressed by a boot-arg\n", procname, pid, tid); + return; + } + + printf("process %s[%d] thread %llu caught burning CPU! " + "It used more than %d%% CPU (Actual recent usage: %d%%) over %d seconds. " + "thread lifetime cpu usage %d.%06d seconds, (%d.%06d user, %d.%06d system) " + "ledger info: balance: %lld credit: %lld debit: %lld limit: %llu (%d%%) " + "period: %llu time since last refill (ns): %llu \n", + procname, pid, tid, + percentage, usage_percent, interval_sec, + thread_total_time.seconds, thread_total_time.microseconds, + thread_user_time.seconds, thread_user_time.microseconds, + thread_system_time.seconds, thread_system_time.microseconds, + lei.lei_balance, + lei.lei_credit, lei.lei_debit, + lei.lei_limit, limit_percent, + lei.lei_refill_period, lei.lei_last_refill); + + + code[0] = code[1] = 0; + EXC_RESOURCE_ENCODE_TYPE(code[0], RESOURCE_TYPE_CPU); + EXC_RESOURCE_ENCODE_FLAVOR(code[0], FLAVOR_CPU_MONITOR); + EXC_RESOURCE_CPUMONITOR_ENCODE_INTERVAL(code[0], interval_sec); + EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[0], limit_percent); + EXC_RESOURCE_CPUMONITOR_ENCODE_PERCENTAGE(code[1], usage_percent); + exception_triage(EXC_RESOURCE, code, EXCEPTION_CODE_MAX); + + if (fatal) { + task_terminate_internal(task); + } } void @@ -1465,8 +1627,7 @@ init_thread_ledgers(void) { panic("couldn't create cpu_time entry for thread ledger template"); } - if (ledger_set_callback(t, idx, thread_resource_exception, - (void *)(uintptr_t)idx, NULL) < 0) { + if (ledger_set_callback(t, idx, thread_cputime_callback, NULL, NULL) < 0) { panic("couldn't set thread ledger callback for cpu_time entry"); } @@ -1474,12 +1635,66 @@ init_thread_ledgers(void) { thread_ledger_template = t; } +/* + * Returns currently applied CPU usage limit, or 0/0 if none is applied. + */ +int +thread_get_cpulimit(int *action, uint8_t *percentage, uint64_t *interval_ns) +{ + int64_t abstime = 0; + uint64_t limittime = 0; + thread_t thread = current_thread(); + + *percentage = 0; + *interval_ns = 0; + *action = 0; + + if (thread->t_threadledger == LEDGER_NULL) { + /* + * This thread has no per-thread ledger, so it can't possibly + * have a CPU limit applied. + */ + return (KERN_SUCCESS); + } + + ledger_get_period(thread->t_threadledger, thread_ledgers.cpu_time, interval_ns); + ledger_get_limit(thread->t_threadledger, thread_ledgers.cpu_time, &abstime); + + if ((abstime == LEDGER_LIMIT_INFINITY) || (*interval_ns == 0)) { + /* + * This thread's CPU time ledger has no period or limit; so it + * doesn't have a CPU limit applied. + */ + return (KERN_SUCCESS); + } + + /* + * This calculation is the converse to the one in thread_set_cpulimit(). + */ + absolutetime_to_nanoseconds(abstime, &limittime); + *percentage = (limittime * 100ULL) / *interval_ns; + assert(*percentage <= 100); + + if (thread->options & TH_OPT_PROC_CPULIMIT) { + assert((thread->options & TH_OPT_PRVT_CPULIMIT) == 0); + + *action = THREAD_CPULIMIT_BLOCK; + } else if (thread->options & TH_OPT_PRVT_CPULIMIT) { + assert((thread->options & TH_OPT_PROC_CPULIMIT) == 0); + + *action = THREAD_CPULIMIT_EXCEPTION; + } else { + *action = THREAD_CPULIMIT_DISABLE; + } + + return (KERN_SUCCESS); +} + /* * Set CPU usage limit on a thread. * * Calling with percentage of 0 will unset the limit for this thread. */ - int thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns) { @@ -1490,23 +1705,28 @@ thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns) assert(percentage <= 100); - if (percentage == 0) { + if (action == THREAD_CPULIMIT_DISABLE) { /* * Remove CPU limit, if any exists. */ if (thread->t_threadledger != LEDGER_NULL) { + l = thread->t_threadledger; /* * The only way to get a per-thread ledger is via CPU limits. */ assert(thread->options & (TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT)); - ledger_dereference(thread->t_threadledger); - thread->t_threadledger = LEDGER_NULL; + thread->t_threadledger = NULL; + ledger_dereference(l); thread->options &= ~(TH_OPT_PROC_CPULIMIT | TH_OPT_PRVT_CPULIMIT); } return (0); } + if (interval_ns < MINIMUM_CPULIMIT_INTERVAL_MS * NSEC_PER_MSEC) { + return (KERN_INVALID_ARGUMENT); + } + l = thread->t_threadledger; if (l == LEDGER_NULL) { /* @@ -1528,28 +1748,41 @@ thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns) */ limittime = (interval_ns * percentage) / 100; nanoseconds_to_absolutetime(limittime, &abstime); - ledger_set_limit(l, thread_ledgers.cpu_time, abstime); + ledger_set_limit(l, thread_ledgers.cpu_time, abstime, cpumon_ustackshots_trigger_pct); /* * Refill the thread's allotted CPU time every interval_ns nanoseconds. */ ledger_set_period(l, thread_ledgers.cpu_time, interval_ns); - /* - * Ledgers supports multiple actions for one ledger entry, so we do too. - */ if (action == THREAD_CPULIMIT_EXCEPTION) { + /* + * We don't support programming the CPU usage monitor on a task if any of its + * threads have a per-thread blocking CPU limit configured. + */ + if (thread->options & TH_OPT_PRVT_CPULIMIT) { + panic("CPU usage monitor activated, but blocking thread limit exists"); + } + + /* + * Make a note that this thread's CPU limit is being used for the task-wide CPU + * usage monitor. We don't have to arm the callback which will trigger the + * exception, because that was done for us in ledger_instantiate (because the + * ledger template used has a default callback). + */ thread->options |= TH_OPT_PROC_CPULIMIT; - ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_EXCEPTION); - } + } else { + /* + * We deliberately override any CPU limit imposed by a task-wide limit (eg + * CPU usage monitor). + */ + thread->options &= ~TH_OPT_PROC_CPULIMIT; - if (action == THREAD_CPULIMIT_BLOCK) { thread->options |= TH_OPT_PRVT_CPULIMIT; /* The per-thread ledger template by default has a callback for CPU time */ ledger_disable_callback(l, thread_ledgers.cpu_time); ledger_set_action(l, thread_ledgers.cpu_time, LEDGER_ACTION_BLOCK); } - thread->t_threadledger = l; return (0); } @@ -1703,12 +1936,10 @@ thread_tid( return (thread != THREAD_NULL? thread->thread_id: 0); } -uint16_t -thread_set_tag(thread_t th, uint16_t tag) { +uint16_t thread_set_tag(thread_t th, uint16_t tag) { return thread_set_tag_internal(th, tag); } -uint16_t -thread_get_tag(thread_t th) { +uint16_t thread_get_tag(thread_t th) { return thread_get_tag_internal(th); } @@ -1851,10 +2082,22 @@ vm_offset_t dtrace_set_thread_recover(thread_t thread, vm_offset_t recover) void dtrace_thread_bootstrap(void) { task_t task = current_task(); - if(task->thread_count == 1) { + + if (task->thread_count == 1) { + thread_t thread = current_thread(); + if (thread->t_dtrace_flags & TH_DTRACE_EXECSUCCESS) { + thread->t_dtrace_flags &= ~TH_DTRACE_EXECSUCCESS; + DTRACE_PROC(exec__success); + } DTRACE_PROC(start); } DTRACE_PROC(lwp__start); } + +void +dtrace_thread_didexec(thread_t thread) +{ + thread->t_dtrace_flags |= TH_DTRACE_EXECSUCCESS; +} #endif /* CONFIG_DTRACE */ diff --git a/osfmk/kern/thread.h b/osfmk/kern/thread.h index 89ac1937b..47ffc95ff 100644 --- a/osfmk/kern/thread.h +++ b/osfmk/kern/thread.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -142,30 +142,30 @@ struct thread { processor_t runq; /* run queue assignment */ wait_queue_t wait_queue; /* wait queue we are currently on */ event64_t wait_event; /* wait queue event */ - integer_t options; /* options set by thread itself */ -#define TH_OPT_INTMASK 0x03 /* interrupt / abort level */ -#define TH_OPT_VMPRIV 0x04 /* may allocate reserved memory */ -#define TH_OPT_DTRACE 0x08 /* executing under dtrace_probe */ -#define TH_OPT_SYSTEM_CRITICAL 0x10 /* Thread must always be allowed to run - even under heavy load */ -#define TH_OPT_PROC_CPULIMIT 0x20 /* Thread has a task-wide CPU limit applied to it */ -#define TH_OPT_PRVT_CPULIMIT 0x40 /* Thread has a thread-private CPU limit applied to it */ -#define TH_OPT_IDLE_THREAD 0x0080 /* Thread is a per-processor idle thread */ - /* Data updated during assert_wait/thread_wakeup */ decl_simple_lock_data(,sched_lock) /* scheduling lock (thread_lock()) */ decl_simple_lock_data(,wake_lock) /* for thread stop / wait (wake_lock()) */ + integer_t options; /* options set by thread itself */ +#define TH_OPT_INTMASK 0x0003 /* interrupt / abort level */ +#define TH_OPT_VMPRIV 0x0004 /* may allocate reserved memory */ +#define TH_OPT_DTRACE 0x0008 /* executing under dtrace_probe */ +#define TH_OPT_SYSTEM_CRITICAL 0x0010 /* Thread must always be allowed to run - even under heavy load */ +#define TH_OPT_PROC_CPULIMIT 0x0020 /* Thread has a task-wide CPU limit applied to it */ +#define TH_OPT_PRVT_CPULIMIT 0x0040 /* Thread has a thread-private CPU limit applied to it */ +#define TH_OPT_IDLE_THREAD 0x0080 /* Thread is a per-processor idle thread */ + boolean_t wake_active; /* wake event on stop */ int at_safe_point; /* thread_abort_safely allowed */ ast_t reason; /* why we blocked */ + thread_continue_t continuation; /* continue here next dispatch */ + void *parameter; /* continuation parameter */ wait_result_t wait_result; /* outcome of wait - * may be examined by this thread * WITHOUT locking */ - thread_continue_t continuation; /* continue here next dispatch */ - void *parameter; /* continuation parameter */ /* Data updated/used in thread_invoke */ - struct funnel_lock *funnel_lock; /* Non-reentrancy funnel */ int funnel_state; + struct funnel_lock *funnel_lock; /* Non-reentrancy funnel */ #define TH_FN_OWNED 0x1 /* we own the funnel */ #define TH_FN_REFUNNEL 0x2 /* re-acquire funnel on dispatch */ @@ -205,6 +205,10 @@ struct thread { #define TH_SFLAG_DEPRESSED_MASK (TH_SFLAG_DEPRESS | TH_SFLAG_POLLDEPRESS) #define TH_SFLAG_PRI_UPDATE 0x0100 /* Updating priority */ #define TH_SFLAG_EAGERPREEMPT 0x0200 /* Any preemption of this thread should be treated as if AST_URGENT applied */ +#define TH_SFLAG_RW_PROMOTED 0x0400 /* sched pri has been promoted due to blocking with RW lock held */ +#define TH_SFLAG_PROMOTED_MASK (TH_SFLAG_PROMOTED | TH_SFLAG_RW_PROMOTED) + +#define TH_SFLAG_RW_PROMOTED_BIT (10) /* 0x400 */ /* * A thread can either be completely unthrottled, about to be throttled, @@ -214,30 +218,33 @@ struct thread { #define TH_SFLAG_PENDING_THROTTLE_PROMOTION 0x2000 /* Pending sched_mode promition */ #define TH_SFLAG_PENDING_THROTTLE_MASK (TH_SFLAG_PENDING_THROTTLE_DEMOTION | TH_SFLAG_PENDING_THROTTLE_PROMOTION) - integer_t sched_pri; /* scheduled (current) priority */ - integer_t priority; /* base priority */ - integer_t max_priority; /* max base priority */ - integer_t task_priority; /* copy of task base priority */ - + int16_t sched_pri; /* scheduled (current) priority */ + int16_t priority; /* base priority */ + int16_t max_priority; /* max base priority */ + int16_t task_priority; /* copy of task base priority */ #if defined(CONFIG_SCHED_GRRR) #if 0 uint16_t grrr_deficit; /* fixed point (1/1000th quantum) fractional deficit */ #endif #endif - integer_t promotions; /* level of promotion */ - integer_t pending_promoter_index; + int16_t promotions; /* level of promotion */ + int16_t pending_promoter_index; + uint32_t ref_count; /* number of references to me */ void *pending_promoter[2]; - integer_t importance; /* task-relative importance */ + uint32_t rwlock_count; /* Number of lck_rw_t locks held by thread */ - /* real-time parameters */ + integer_t importance; /* task-relative importance */ + /* Priority depression expiration */ + integer_t depress_timer_active; + timer_call_data_t depress_timer; + /* real-time parameters */ struct { /* see mach/thread_policy.h */ uint32_t period; uint32_t computation; uint32_t constraint; boolean_t preemptible; - uint64_t deadline; } realtime; @@ -291,9 +298,6 @@ struct thread { integer_t wait_timer_active; boolean_t wait_timer_is_set; - /* Priority depression expiration */ - timer_call_data_t depress_timer; - integer_t depress_timer_active; /* * Processor/cache affinity @@ -306,6 +310,7 @@ struct thread { union { struct { mach_msg_return_t state; /* receive state */ + mach_port_seqno_t seqno; /* seqno of recvd message */ ipc_object_t object; /* object received on */ mach_vm_address_t msg_addr; /* receive buffer pointer */ mach_msg_size_t msize; /* max size for recvd msg */ @@ -313,7 +318,6 @@ struct thread { mach_msg_size_t slist_size; /* scatter list size */ mach_port_name_t receiver_name; /* the receive port name */ struct ipc_kmsg *kmsg; /* received message */ - mach_port_seqno_t seqno; /* seqno of recvd message */ mach_msg_continue_t continuation; } receive; struct { @@ -325,17 +329,28 @@ struct thread { } sema; struct { int option; /* switch option */ + boolean_t reenable_workq_callback; /* on entry, callbacks were suspended */ } swtch; int misc; /* catch-all for other state */ } saved; + /* Structure to save information about guard exception */ + struct { + unsigned type; /* EXC_GUARD reason/type */ + mach_exception_data_type_t code; /* Exception code */ + mach_exception_data_type_t subcode; /* Exception sub-code */ + } guard_exc_info; + + /* IPC data structures */ - struct ipc_kmsg_queue ith_messages; +#if IMPORTANCE_INHERITANCE + natural_t ith_assertions; /* assertions pending drop */ +#endif + struct ipc_kmsg_queue ith_messages; /* messages to reap */ mach_port_t ith_rpc_reply; /* reply port for kernel RPCs */ /* Ast/Halt data structures */ vm_offset_t recover; /* page fault recover(copyin/out) */ - uint32_t ref_count; /* number of references to me */ queue_chain_t threads; /* global list of all threads */ @@ -351,11 +366,6 @@ struct thread { decl_lck_mtx_data(,mutex) - /* Kernel holds on this thread */ - int suspend_count; - - /* User level suspensions */ - int user_stop_count; /* Pending thread ast(s) */ ast_t ast; @@ -378,23 +388,22 @@ struct thread { /* Ports associated with this thread */ struct ipc_port *ith_self; /* not a right, doesn't hold ref */ struct ipc_port *ith_sself; /* a send right */ - struct exception_action exc_actions[EXC_TYPES_COUNT]; - - /* Owned ulocks (a lock set element) */ - queue_head_t held_ulocks; + struct exception_action *exc_actions; #ifdef MACH_BSD void *uthread; #endif #if CONFIG_DTRACE + uint32_t t_dtrace_flags; /* DTrace thread states */ +#define TH_DTRACE_EXECSUCCESS 0x01 uint32_t t_dtrace_predcache;/* DTrace per thread predicate value hint */ int64_t t_dtrace_tracing; /* Thread time under dtrace_probe() */ int64_t t_dtrace_vtime; #endif - uint32_t t_page_creation_count; clock_sec_t t_page_creation_time; + uint32_t t_page_creation_count; #define T_CHUD_MARKED 0x01 /* this thread is marked by CHUD */ #define T_IN_CHUD 0x02 /* this thread is already in a CHUD handler */ @@ -407,12 +416,23 @@ struct thread { * AST */ #define T_NAME_DONE 0x20 /* Thread has previously * recorded its name */ +#define T_KPC_ALLOC 0x40 /* Thread needs a kpc_buf */ uint32_t t_chud; /* CHUD flags, used for Shark */ uint32_t chud_c_switch; /* last dispatch detection */ integer_t mutex_count; /* total count of locks held */ +#ifdef KPC + /* accumulated performance counters for this thread */ + uint64_t *kpc_buf; +#endif + +#ifdef KPERF + /* count of how many times a thread has been sampled since it was last scheduled */ + uint64_t kperf_pet_cnt; +#endif + uint64_t thread_id; /*system wide unique thread-id*/ /* Statistics accumulated per-thread and aggregated per-task */ @@ -420,14 +440,17 @@ struct thread { uint32_t syscalls_mach; ledger_t t_ledger; ledger_t t_threadledger; /* per thread ledger */ - struct process_policy ext_appliedstate; /* externally applied actions */ - struct process_policy ext_policystate; /* externally defined process policy states*/ - struct process_policy appliedstate; /* self applied acions */ - struct process_policy policystate; /* process wide policy states */ -#if CONFIG_EMBEDDED - task_watch_t * taskwatch; /* task watch */ + + /* policy is protected by the task lock */ + struct task_requested_policy requested_policy; + struct task_effective_policy effective_policy; + struct task_pended_policy pended_policy; + + int iotier_override; /* atomic operations to set, cleared on ret to user */ + + integer_t saved_importance; /* saved task-relative importance */ -#endif /* CONFIG_EMBEDDED */ + uint32_t thread_callout_interrupt_wakeups; uint32_t thread_callout_platform_idle_wakeups; uint32_t thread_timer_wakeups_bin_1; @@ -435,8 +458,12 @@ struct thread { uint16_t thread_tag; uint16_t callout_woken_from_icontext:1, callout_woken_from_platform_idle:1, - thread_bitfield_unused:14; - + callout_woke_thread:1, + thread_bitfield_unused:13; + /* Kernel holds on this thread */ + int16_t suspend_count; + /* User level suspensions */ + int16_t user_stop_count; }; #define ith_state saved.receive.state @@ -456,9 +483,9 @@ struct thread { #define sth_result saved.sema.result #define sth_continuation saved.sema.continuation -extern void thread_bootstrap(void) __attribute__((section("__TEXT, initcode"))); +extern void thread_bootstrap(void); -extern void thread_init(void) __attribute__((section("__TEXT, initcode"))); +extern void thread_init(void); extern void thread_daemon_init(void); @@ -526,7 +553,7 @@ extern boolean_t stack_alloc_try( extern void stack_collect(void); -extern void stack_init(void) __attribute__((section("__TEXT, initcode"))); +extern void stack_init(void); extern kern_return_t thread_info_internal( @@ -655,10 +682,12 @@ extern void funnel_lock( extern void funnel_unlock( struct funnel_lock *lock); -static inline uint16_t thread_set_tag_internal(thread_t thread, uint16_t tag) { + +static inline uint16_t thread_set_tag_internal(thread_t thread, uint16_t tag) { return __sync_fetch_and_or(&thread->thread_tag, tag); } -static inline uint16_t thread_get_tag_internal(thread_t thread) { + +static inline uint16_t thread_get_tag_internal(thread_t thread) { return thread->thread_tag; } @@ -682,13 +711,6 @@ __END_DECLS __BEGIN_DECLS -#if defined(__i386__) - -extern thread_t kernel_thread( - task_t task, - void (*start)(void)); - -#endif /* defined(__i386__) */ extern uint64_t thread_tid( thread_t thread); @@ -714,6 +736,7 @@ __BEGIN_DECLS uint16_t thread_set_tag(thread_t, uint16_t); uint16_t thread_get_tag(thread_t); + extern kern_return_t thread_state_initialize( thread_t thread); @@ -742,9 +765,11 @@ extern void thread_yield_internal( * * 1) Block. Prevent CPU consumption of the thread from exceeding the limit. * 2) Exception. Generate a resource consumption exception when the limit is exceeded. + * 3) Disable. Remove any existing CPU limit. */ #define THREAD_CPULIMIT_BLOCK 0x1 #define THREAD_CPULIMIT_EXCEPTION 0x2 +#define THREAD_CPULIMIT_DISABLE 0x3 struct _thread_ledger_indices { int cpu_time; @@ -752,6 +777,7 @@ struct _thread_ledger_indices { extern struct _thread_ledger_indices thread_ledgers; +extern int thread_get_cpulimit(int *action, uint8_t *percentage, uint64_t *interval_ns); extern int thread_set_cpulimit(int action, uint8_t percentage, uint64_t interval_ns); typedef struct funnel_lock funnel_t; @@ -874,15 +900,16 @@ extern void dtrace_set_thread_tracing(thread_t, int64_t); extern void dtrace_set_thread_reentering(thread_t, boolean_t); extern vm_offset_t dtrace_set_thread_recover(thread_t, vm_offset_t); extern void dtrace_thread_bootstrap(void); +extern void dtrace_thread_didexec(thread_t); extern int64_t dtrace_calc_thread_recent_vtime(thread_t); -extern void thread_set_wq_state32( +extern kern_return_t thread_set_wq_state32( thread_t thread, thread_state_t tstate); -extern void thread_set_wq_state64( +extern kern_return_t thread_set_wq_state64( thread_t thread, thread_state_t tstate); @@ -890,6 +917,11 @@ extern vm_offset_t kernel_stack_mask; extern vm_offset_t kernel_stack_size; extern vm_offset_t kernel_stack_depth_max; +void guard_ast(thread_t thread); +extern void fd_guard_ast(thread_t thread); +extern void mach_port_guard_ast(thread_t thread); +extern void thread_guard_violation(thread_t thread, unsigned type); + #endif /* XNU_KERNEL_PRIVATE */ /*! @function kernel_thread_start diff --git a/osfmk/kern/thread_act.c b/osfmk/kern/thread_act.c index d99ee186c..0f2a4dc3a 100644 --- a/osfmk/kern/thread_act.c +++ b/osfmk/kern/thread_act.c @@ -238,7 +238,7 @@ thread_suspend( thread_mtx_unlock(thread); if (thread != self && result == KERN_SUCCESS) - thread_wait(thread, TRUE); + thread_wait(thread, FALSE); return (result); } @@ -434,7 +434,7 @@ thread_get_state( thread_mtx_unlock(thread); - if (thread_stop(thread)) { + if (thread_stop(thread, FALSE)) { thread_mtx_lock(thread); result = machine_thread_get_state( thread, flavor, state, state_count); @@ -484,7 +484,7 @@ thread_set_state_internal( thread_mtx_unlock(thread); - if (thread_stop(thread)) { + if (thread_stop(thread, FALSE)) { thread_mtx_lock(thread); result = machine_thread_set_state( thread, flavor, state, state_count); @@ -564,7 +564,7 @@ thread_state_initialize( thread_mtx_unlock(thread); - if (thread_stop(thread)) { + if (thread_stop(thread, TRUE)) { thread_mtx_lock(thread); result = machine_thread_state_initialize( thread ); thread_unstop(thread); @@ -605,7 +605,7 @@ thread_dup( thread_mtx_unlock(target); - if (thread_stop(target)) { + if (thread_stop(target, TRUE)) { thread_mtx_lock(target); result = machine_thread_dup(self, target); if (self->affinity_set != AFFINITY_SET_NULL) diff --git a/osfmk/kern/thread_call.c b/osfmk/kern/thread_call.c index 36cb66a8e..610b94991 100644 --- a/osfmk/kern/thread_call.c +++ b/osfmk/kern/thread_call.c @@ -36,6 +36,7 @@ #include #include #include +#include #include @@ -44,11 +45,13 @@ #include #include +#include #include #if CONFIG_DTRACE #include #endif +#include static zone_t thread_call_zone; static struct wait_queue daemon_wqueue; @@ -90,9 +93,10 @@ static struct thread_call_group thread_call_groups[THREAD_CALL_GROUP_COUNT]; static boolean_t thread_call_daemon_awake; static thread_call_data_t internal_call_storage[INTERNAL_CALL_COUNT]; static queue_head_t thread_call_internal_queue; +int thread_call_internal_queue_count = 0; static uint64_t thread_call_dealloc_interval_abs; -static __inline__ thread_call_t _internal_call_allocate(void); +static __inline__ thread_call_t _internal_call_allocate(thread_call_func_t func, thread_call_param_t param0); static __inline__ void _internal_call_release(thread_call_t call); static __inline__ boolean_t _pending_call_enqueue(thread_call_t call, thread_call_group_t group); static __inline__ boolean_t _delayed_call_enqueue(thread_call_t call, thread_call_group_t group, uint64_t deadline); @@ -109,6 +113,10 @@ static void thread_call_group_setup(thread_call_group_t group, thread_call_pri static void sched_call_thread(int type, thread_t thread); static void thread_call_start_deallocate_timer(thread_call_group_t group); static void thread_call_wait_locked(thread_call_t call); +static boolean_t thread_call_enter_delayed_internal(thread_call_t call, + thread_call_func_t alt_func, thread_call_param_t alt_param0, + thread_call_param_t param1, uint64_t deadline, + uint64_t leeway, unsigned int flags); #define qe(x) ((queue_entry_t)(x)) #define TC(x) ((thread_call_t)(x)) @@ -132,6 +140,7 @@ lck_spin_t thread_call_lock_data; #define thread_call_unlock() \ lck_mtx_unlock_always(&thread_call_lock_data) +extern boolean_t mach_timer_coalescing_enabled; static inline spl_t disable_ints_and_lock(void) @@ -208,7 +217,7 @@ thread_call_priority_to_sched_pri(thread_call_priority_t pri) case THREAD_CALL_PRIORITY_USER: return BASEPRI_DEFAULT; case THREAD_CALL_PRIORITY_LOW: - return DEPRESSPRI; + return MAXPRI_THROTTLE; default: panic("Invalid priority."); } @@ -332,6 +341,7 @@ thread_call_initialize(void) call++) { enqueue_tail(&thread_call_internal_queue, qe(call)); + thread_call_internal_queue_count++; } thread_call_daemon_awake = TRUE; @@ -364,7 +374,7 @@ thread_call_setup( * Called with thread_call_lock held. */ static __inline__ thread_call_t -_internal_call_allocate(void) +_internal_call_allocate(thread_call_func_t func, thread_call_param_t param0) { thread_call_t call; @@ -372,7 +382,12 @@ _internal_call_allocate(void) panic("_internal_call_allocate"); call = TC(dequeue_head(&thread_call_internal_queue)); - + thread_call_internal_queue_count--; + + thread_call_setup(call, func, param0); + call->tc_refs = 0; + call->tc_flags = 0; /* THREAD_CALL_ALLOC not set, do not free back to zone */ + return (call); } @@ -380,7 +395,9 @@ _internal_call_allocate(void) * _internal_call_release: * * Release an internal callout entry which - * is no longer pending (or delayed). + * is no longer pending (or delayed). This is + * safe to call on a non-internal entry, in which + * case nothing happens. * * Called with thread_call_lock held. */ @@ -389,8 +406,11 @@ _internal_call_release( thread_call_t call) { if ( call >= internal_call_storage && - call < &internal_call_storage[INTERNAL_CALL_COUNT] ) + call < &internal_call_storage[INTERNAL_CALL_COUNT] ) { + assert((call->tc_flags & THREAD_CALL_ALLOC) == 0); enqueue_head(&thread_call_internal_queue, qe(call)); + thread_call_internal_queue_count++; + } } /* @@ -494,7 +514,15 @@ _set_delayed_call_timer( thread_call_t call, thread_call_group_t group) { - timer_call_enter(&group->delayed_timer, call->tc_call.deadline, 0); + uint64_t leeway; + + assert((call->tc_soft_deadline != 0) && ((call->tc_soft_deadline <= call->tc_call.deadline))); + + leeway = call->tc_call.deadline - call->tc_soft_deadline; + timer_call_enter_with_leeway(&group->delayed_timer, NULL, + call->tc_soft_deadline, leeway, + TIMER_CALL_SYS_CRITICAL|TIMER_CALL_LEEWAY, + ((call->tc_soft_deadline & 0x1) == 0x1)); } /* @@ -587,54 +615,6 @@ _remove_from_delayed_queue( return (call_removed); } -#ifndef __LP64__ - -/* - * thread_call_func: - * - * Enqueue a function callout. - * - * Guarantees { function, argument } - * uniqueness if unique_call is TRUE. - */ -void -thread_call_func( - thread_call_func_t func, - thread_call_param_t param, - boolean_t unique_call) -{ - thread_call_t call; - thread_call_group_t group = &thread_call_groups[THREAD_CALL_PRIORITY_HIGH]; - spl_t s; - - s = splsched(); - thread_call_lock_spin(); - - call = TC(queue_first(&group->pending_queue)); - - while (unique_call && !queue_end(&group->pending_queue, qe(call))) { - if (call->tc_call.func == func && call->tc_call.param0 == param) { - break; - } - - call = TC(queue_next(qe(call))); - } - - if (!unique_call || queue_end(&group->pending_queue, qe(call))) { - call = _internal_call_allocate(); - call->tc_call.func = func; - call->tc_call.param0 = param; - call->tc_call.param1 = NULL; - - _pending_call_enqueue(call, group); - } - - thread_call_unlock(); - splx(s); -} - -#endif /* __LP64__ */ - /* * thread_call_func_delayed: * @@ -647,25 +627,25 @@ thread_call_func_delayed( thread_call_param_t param, uint64_t deadline) { - thread_call_t call; - thread_call_group_t group = &thread_call_groups[THREAD_CALL_PRIORITY_HIGH]; - spl_t s; - - s = splsched(); - thread_call_lock_spin(); - - call = _internal_call_allocate(); - call->tc_call.func = func; - call->tc_call.param0 = param; - call->tc_call.param1 = 0; - - _delayed_call_enqueue(call, group, deadline); + (void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, 0, 0); +} - if (queue_first(&group->delayed_queue) == qe(call)) - _set_delayed_call_timer(call, group); +/* + * thread_call_func_delayed_with_leeway: + * + * Same as thread_call_func_delayed(), but with + * leeway/flags threaded through. + */ - thread_call_unlock(); - splx(s); +void +thread_call_func_delayed_with_leeway( + thread_call_func_t func, + thread_call_param_t param, + uint64_t deadline, + uint64_t leeway, + uint32_t flags) +{ + (void)thread_call_enter_delayed_internal(NULL, func, param, 0, deadline, leeway, flags); } /* @@ -858,28 +838,10 @@ thread_call_enter1( boolean_t thread_call_enter_delayed( thread_call_t call, - uint64_t deadline) + uint64_t deadline) { - boolean_t result = TRUE; - thread_call_group_t group; - spl_t s; - - group = thread_call_get_group(call); - - s = splsched(); - thread_call_lock_spin(); - - result = _delayed_call_enqueue(call, group, deadline); - - if (queue_first(&group->delayed_queue) == qe(call)) - _set_delayed_call_timer(call, group); - - call->tc_call.param1 = 0; - - thread_call_unlock(); - splx(s); - - return (result); + assert(call); + return thread_call_enter_delayed_internal(call, NULL, 0, 0, deadline, 0, 0); } boolean_t @@ -887,28 +849,103 @@ thread_call_enter1_delayed( thread_call_t call, thread_call_param_t param1, uint64_t deadline) +{ + assert(call); + return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, 0, 0); +} + +boolean_t +thread_call_enter_delayed_with_leeway( + thread_call_t call, + thread_call_param_t param1, + uint64_t deadline, + uint64_t leeway, + unsigned int flags) +{ + assert(call); + return thread_call_enter_delayed_internal(call, NULL, 0, param1, deadline, leeway, flags); +} + + +/* + * thread_call_enter_delayed_internal: + * enqueue a callout entry to occur at the stated time + * + * Returns True if the call was already on a queue + * params: + * call - structure encapsulating state of the callout + * alt_func/alt_param0 - if call is NULL, allocate temporary storage using these parameters + * deadline - time deadline in nanoseconds + * leeway - timer slack represented as delta of deadline. + * flags - THREAD_CALL_DELAY_XXX : classification of caller's desires wrt timer coalescing. + * THREAD_CALL_DELAY_LEEWAY : value in leeway is used for timer coalescing. + */ +boolean_t +thread_call_enter_delayed_internal( + thread_call_t call, + thread_call_func_t alt_func, + thread_call_param_t alt_param0, + thread_call_param_t param1, + uint64_t deadline, + uint64_t leeway, + unsigned int flags) { boolean_t result = TRUE; thread_call_group_t group; spl_t s; - uint64_t abstime; + uint64_t abstime, sdeadline, slop; + uint32_t urgency; - group = thread_call_get_group(call); + /* direct mapping between thread_call, timer_call, and timeout_urgency values */ + urgency = (flags & TIMEOUT_URGENCY_MASK); s = splsched(); thread_call_lock_spin(); + + if (call == NULL) { + /* allocate a structure out of internal storage, as a convenience for BSD callers */ + call = _internal_call_allocate(alt_func, alt_param0); + } + + group = thread_call_get_group(call); abstime = mach_absolute_time(); + + call->tc_flags |= THREAD_CALL_DELAYED; + + call->tc_soft_deadline = sdeadline = deadline; + + boolean_t ratelimited = FALSE; + slop = timer_call_slop(deadline, abstime, urgency, current_thread(), &ratelimited); + + if ((flags & THREAD_CALL_DELAY_LEEWAY) != 0 && leeway > slop) + slop = leeway; + + if (UINT64_MAX - deadline <= slop) + deadline = UINT64_MAX; + else + deadline += slop; + + /* Bit 0 of the "soft" deadline indicates that + * this particular callout requires rate-limiting + * behaviour. Maintain the invariant deadline >= soft_deadline + */ + deadline |= 1; + if (ratelimited) { + call->tc_soft_deadline |= 0x1ULL; + } else { + call->tc_soft_deadline &= ~0x1ULL; + } + + call->tc_call.param1 = param1; + call->ttd = (sdeadline > abstime) ? (sdeadline - abstime) : 0; result = _delayed_call_enqueue(call, group, deadline); if (queue_first(&group->delayed_queue) == qe(call)) _set_delayed_call_timer(call, group); - call->tc_call.param1 = param1; - - call->ttd = (deadline > abstime) ? (deadline - abstime) : 0; #if CONFIG_DTRACE - DTRACE_TMR4(thread_callout__create, thread_call_func_t, call->tc_call.func, 0, (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF)); + DTRACE_TMR5(thread_callout__create, thread_call_func_t, call->tc_call.func, uint64_t, (deadline - sdeadline), uint64_t, (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF), call); #endif thread_call_unlock(); splx(s); @@ -928,7 +965,7 @@ boolean_t thread_call_cancel( thread_call_t call) { - boolean_t result; + boolean_t result, do_cancel_callout = FALSE; thread_call_group_t group; spl_t s; @@ -937,8 +974,21 @@ thread_call_cancel( s = splsched(); thread_call_lock_spin(); + if ((call->tc_call.deadline != 0) && + (queue_first(&group->delayed_queue) == qe(call))) { + assert (call->tc_call.queue == &group->delayed_queue); + do_cancel_callout = TRUE; + } + result = _call_dequeue(call, group); + if (do_cancel_callout) { + timer_call_cancel(&group->delayed_timer); + if (!queue_empty(&group->delayed_queue)) { + _set_delayed_call_timer(TC(queue_first(&group->delayed_queue)), group); + } + } + thread_call_unlock(); splx(s); #if CONFIG_DTRACE @@ -984,44 +1034,6 @@ thread_call_cancel_wait( } -#ifndef __LP64__ - -/* - * thread_call_is_delayed: - * - * Returns TRUE if the call is - * currently on a delayed queue. - * - * Optionally returns the expiration time. - */ -boolean_t -thread_call_is_delayed( - thread_call_t call, - uint64_t *deadline) -{ - boolean_t result = FALSE; - thread_call_group_t group; - spl_t s; - - group = thread_call_get_group(call); - - s = splsched(); - thread_call_lock_spin(); - - if (call->tc_call.queue == &group->delayed_queue) { - if (deadline != NULL) - *deadline = call->tc_call.deadline; - result = TRUE; - } - - thread_call_unlock(); - splx(s); - - return (result); -} - -#endif /* __LP64__ */ - /* * thread_call_wake: * @@ -1196,8 +1208,16 @@ thread_call_thread( MACHDBG_CODE(DBG_MACH_SCHED,MACH_CALLOUT) | DBG_FUNC_NONE, VM_KERNEL_UNSLIDE(func), param0, param1, 0, 0); +#if CONFIG_DTRACE + DTRACE_TMR6(thread_callout__start, thread_call_func_t, func, int, 0, int, (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF), (call->tc_flags & THREAD_CALL_DELAYED), call); +#endif + (*func)(param0, param1); +#if CONFIG_DTRACE + DTRACE_TMR6(thread_callout__end, thread_call_func_t, func, int, 0, int, (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF), (call->tc_flags & THREAD_CALL_DELAYED), call); +#endif + if (get_preemption_level() != 0) { int pl = get_preemption_level(); panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)", @@ -1216,6 +1236,16 @@ thread_call_thread( thread_sched_call(self, NULL); group->active_count--; + + if (self->callout_woken_from_icontext && !self->callout_woke_thread) { + ledger_credit(self->t_ledger, task_ledgers.interrupt_wakeups, 1); + if (self->callout_woken_from_platform_idle) + ledger_credit(self->t_ledger, task_ledgers.platform_idle_wakeups, 1); + } + + self->callout_woken_from_icontext = FALSE; + self->callout_woken_from_platform_idle = FALSE; + self->callout_woke_thread = FALSE; if (group_isparallel(group)) { /* @@ -1356,7 +1386,7 @@ thread_call_delayed_timer( { thread_call_t call; thread_call_group_t group = p0; - uint64_t timestamp; + uint64_t timestamp; thread_call_lock_spin(); @@ -1365,9 +1395,20 @@ thread_call_delayed_timer( call = TC(queue_first(&group->delayed_queue)); while (!queue_end(&group->delayed_queue, qe(call))) { - if (call->tc_call.deadline <= timestamp) { + if (call->tc_soft_deadline <= timestamp) { + /* Bit 0 of the "soft" deadline indicates that + * this particular callout is rate-limited + * and hence shouldn't be processed before its + * hard deadline. Rate limited timers aren't + * skipped when a forcible reevaluation is in progress. + */ + if ((call->tc_soft_deadline & 0x1) && + (CE(call)->deadline > timestamp) && + (ml_timer_forced_evaluation() == FALSE)) { + break; + } _pending_call_enqueue(call, group); - } + } /* TODO, identify differentially coalesced timers */ else break; @@ -1380,6 +1421,56 @@ thread_call_delayed_timer( thread_call_unlock(); } +static void +thread_call_delayed_timer_rescan(timer_call_param_t p0, __unused timer_call_param_t p1) +{ + thread_call_t call; + thread_call_group_t group = p0; + uint64_t timestamp; + boolean_t istate; + + istate = ml_set_interrupts_enabled(FALSE); + thread_call_lock_spin(); + + assert(ml_timer_forced_evaluation() == TRUE); + timestamp = mach_absolute_time(); + + call = TC(queue_first(&group->delayed_queue)); + + while (!queue_end(&group->delayed_queue, qe(call))) { + if (call->tc_soft_deadline <= timestamp) { + _pending_call_enqueue(call, group); + call = TC(queue_first(&group->delayed_queue)); + } + else { + uint64_t skew = call->tc_call.deadline - call->tc_soft_deadline; + assert (call->tc_call.deadline >= call->tc_soft_deadline); + /* On a latency quality-of-service level change, + * re-sort potentially rate-limited callout. The platform + * layer determines which timers require this. + */ + if (timer_resort_threshold(skew)) { + _call_dequeue(call, group); + _delayed_call_enqueue(call, group, call->tc_soft_deadline); + } + call = TC(queue_next(qe(call))); + } + } + + if (!queue_empty(&group->delayed_queue)) + _set_delayed_call_timer(TC(queue_first(&group->delayed_queue)), group); + thread_call_unlock(); + ml_set_interrupts_enabled(istate); +} + +void +thread_call_delayed_timer_rescan_all(void) { + thread_call_delayed_timer_rescan((timer_call_param_t)&thread_call_groups[THREAD_CALL_PRIORITY_LOW], NULL); + thread_call_delayed_timer_rescan((timer_call_param_t)&thread_call_groups[THREAD_CALL_PRIORITY_USER], NULL); + thread_call_delayed_timer_rescan((timer_call_param_t)&thread_call_groups[THREAD_CALL_PRIORITY_KERNEL], NULL); + thread_call_delayed_timer_rescan((timer_call_param_t)&thread_call_groups[THREAD_CALL_PRIORITY_HIGH], NULL); +} + /* * Timer callback to tell a thread to terminate if * we have an excess of threads and at least one has been @@ -1483,4 +1574,3 @@ thread_call_isactive(thread_call_t call) return active; } - diff --git a/osfmk/kern/thread_call.h b/osfmk/kern/thread_call.h index e1ddebc36..36cd6170a 100644 --- a/osfmk/kern/thread_call.h +++ b/osfmk/kern/thread_call.h @@ -129,6 +129,64 @@ extern boolean_t thread_call_enter1_delayed( thread_call_t call, thread_call_param_t param1, uint64_t deadline); +#ifdef XNU_KERNEL_PRIVATE + +/* + * Flags to alter the default timer/timeout coalescing behavior + * on a per-thread_call basis. + * + * The SYS urgency classes indicate that the thread_call is not + * directly related to the current thread at the time the thread_call + * is entered, so it is ignored in the calculation entirely (only + * the subclass specified is used). + * + * The USER flags indicate that both the current thread scheduling and QoS + * attributes, in addition to the per-thread_call urgency specification, + * are used to establish coalescing behavior. + */ +#define THREAD_CALL_DELAY_SYS_NORMAL TIMEOUT_URGENCY_SYS_NORMAL +#define THREAD_CALL_DELAY_SYS_CRITICAL TIMEOUT_URGENCY_SYS_CRITICAL +#define THREAD_CALL_DELAY_SYS_BACKGROUND TIMEOUT_URGENCY_SYS_BACKGROUND + +#define THREAD_CALL_DELAY_USER_MASK TIMEOUT_URGENCY_USER_MASK +#define THREAD_CALL_DELAY_USER_NORMAL TIMEOUT_URGENCY_USER_NORMAL +#define THREAD_CALL_DELAY_USER_CRITICAL TIMEOUT_URGENCY_USER_CRITICAL +#define THREAD_CALL_DELAY_USER_BACKGROUND TIMEOUT_URGENCY_USER_BACKGROUND + +#define THREAD_CALL_DELAY_URGENCY_MASK TIMEOUT_URGENCY_MASK + +/* + * Indicate that a specific leeway value is being provided (otherwise + * the leeway parameter is ignored). The supplied value can currently + * only be used to extend the leeway calculated internally from the + * urgency class provided. + */ +#define THREAD_CALL_DELAY_LEEWAY TIMEOUT_URGENCY_LEEWAY + +/*! + @function thread_call_enter_delayed_with_leeway + @abstract Submit a thread call to be executed at some point in the future. + @discussion If the work item is already scheduled for delayed or immediate execution, + and it has not yet begun to run, that invocation will be cancelled in favor of execution + at the newly specified time. Note that if a thread call is rescheduled from its own callback, + then multiple invocations of the callback may be in flight at the same time. + @result TRUE if the call was already pending for either delayed or immediate + execution, FALSE otherwise. + @param call The thread call to execute. + @param param1 Second parameter to callback. + @param deadline Time, in absolute time units, at which to execute callback. + @param leeway Time delta, in absolute time units, which sets range of time allowing kernel + to decide appropriate time to run. + @param flags configuration for timers in kernel. + */ +extern boolean_t thread_call_enter_delayed_with_leeway( + thread_call_t call, + thread_call_param_t param1, + uint64_t deadline, + uint64_t leeway, + uint32_t flags); + +#endif /* XNU_KERNEL_PRIVATE */ /*! @function thread_call_cancel @@ -224,16 +282,16 @@ struct thread_call { struct call_entry tc_call; /* Must be first */ uint64_t tc_submit_count; uint64_t tc_finish_count; - thread_call_priority_t tc_pri; - + uint64_t ttd; /* Time to deadline at creation */ + uint64_t tc_soft_deadline; + thread_call_priority_t tc_pri; uint32_t tc_flags; int32_t tc_refs; - - uint64_t ttd; /* Time to deadline at creation */ }; #define THREAD_CALL_ALLOC 0x01 #define THREAD_CALL_WAIT 0x02 +#define THREAD_CALL_DELAYED 0x04 typedef struct thread_call thread_call_data_t; @@ -244,81 +302,38 @@ extern void thread_call_setup( thread_call_func_t func, thread_call_param_t param0); +extern void thread_call_delayed_timer_rescan_all(void); #endif /* MACH_KERNEL_PRIVATE */ -#ifdef KERNEL_PRIVATE +#ifdef XNU_KERNEL_PRIVATE __BEGIN_DECLS /* - * Obsolete interfaces. + * These routines are equivalent to their thread_call_enter_XXX + * variants, only the thread_call_t is allocated out of a + * fixed preallocated pool of memory, and will panic if the pool + * is exhausted. */ -#ifndef __LP64__ - -extern boolean_t thread_call_is_delayed( - thread_call_t call, - uint64_t *deadline); - -extern void thread_call_func( - thread_call_func_t func, - thread_call_param_t param, - boolean_t unique_call); - extern void thread_call_func_delayed( thread_call_func_t func, thread_call_param_t param, uint64_t deadline); -extern boolean_t thread_call_func_cancel( - thread_call_func_t func, - thread_call_param_t param, - boolean_t cancel_all); - -#else /* __LP64__ */ - -#ifdef XNU_KERNEL_PRIVATE - -extern void thread_call_func_delayed( - thread_call_func_t func, - thread_call_param_t param, - uint64_t deadline); +extern void thread_call_func_delayed_with_leeway( + thread_call_func_t func, + thread_call_param_t param, + uint64_t deadline, + uint64_t leeway, + uint32_t flags); extern boolean_t thread_call_func_cancel( thread_call_func_t func, thread_call_param_t param, boolean_t cancel_all); - -#endif /* XNU_KERNEL_PRIVATE */ - -#endif /* __LP64__ */ - -#ifndef MACH_KERNEL_PRIVATE - -#ifndef __LP64__ - -#ifndef ABSOLUTETIME_SCALAR_TYPE - -#define thread_call_enter_delayed(a, b) \ - thread_call_enter_delayed((a), __OSAbsoluteTime(b)) - -#define thread_call_enter1_delayed(a, b, c) \ - thread_call_enter1_delayed((a), (b), __OSAbsoluteTime(c)) - -#define thread_call_is_delayed(a, b) \ - thread_call_is_delayed((a), __OSAbsoluteTimePtr(b)) - -#define thread_call_func_delayed(a, b, c) \ - thread_call_func_delayed((a), (b), __OSAbsoluteTime(c)) - -#endif /* ABSOLUTETIME_SCALAR_TYPE */ - -#endif /* __LP64__ */ - -#endif /* MACH_KERNEL_PRIVATE */ - __END_DECLS -#endif /* KERNEL_PRIVATE */ +#endif /* XNU_KERNEL_PRIVATE */ #endif /* _KERN_THREAD_CALL_H_ */ diff --git a/osfmk/kern/thread_policy.c b/osfmk/kern/thread_policy.c index 8108514f5..648edba7a 100644 --- a/osfmk/kern/thread_policy.c +++ b/osfmk/kern/thread_policy.c @@ -38,15 +38,8 @@ static void thread_recompute_priority( thread_t thread); -#if CONFIG_EMBEDDED -static void -thread_throttle( - thread_t thread, - integer_t task_priority); - -extern int mach_do_background_thread(thread_t thread, int prio); -#endif +extern void proc_get_thread_policy(thread_t thread, thread_policy_state_t info); kern_return_t thread_policy_set( @@ -107,16 +100,24 @@ thread_policy_set_internal( thread->sched_mode = TH_MODE_TIMESHARE; if (!oldmode) { - if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) + if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) { sched_share_incr(); + + if (thread->max_priority <= MAXPRI_THROTTLE) + sched_background_incr(); + } } } else { thread->sched_mode = TH_MODE_FIXED; if (oldmode) { - if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) + if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) { + if (thread->max_priority <= MAXPRI_THROTTLE) + sched_background_decr(); + sched_share_decr(); + } } } @@ -166,8 +167,12 @@ thread_policy_set_internal( } else { if (thread->sched_mode == TH_MODE_TIMESHARE) { - if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) + if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) { + if (thread->max_priority <= MAXPRI_THROTTLE) + sched_background_decr(); + sched_share_decr(); + } } thread->sched_mode = TH_MODE_REALTIME; thread_recompute_priority(thread); @@ -226,17 +231,6 @@ thread_policy_set_internal( return thread_affinity_set(thread, info->affinity_tag); } -#if CONFIG_EMBEDDED - case THREAD_BACKGROUND_POLICY: - { - thread_background_policy_t info; - - info = (thread_background_policy_t) policy_info; - - thread_mtx_unlock(thread); - return mach_do_background_thread(thread, info->priority); - } -#endif /* CONFIG_EMBEDDED */ default: result = KERN_INVALID_ARGUMENT; @@ -271,50 +265,11 @@ thread_recompute_priority( else if (priority < MINPRI) priority = MINPRI; -#if CONFIG_EMBEDDED - /* No one can have a base priority less than MAXPRI_THROTTLE */ - if (priority < MAXPRI_THROTTLE) - priority = MAXPRI_THROTTLE; -#endif /* CONFIG_EMBEDDED */ } set_priority(thread, priority); } -#if CONFIG_EMBEDDED -static void -thread_throttle( - thread_t thread, - integer_t task_priority) -{ - if ((!(thread->sched_flags & TH_SFLAG_THROTTLED) - || (thread->sched_flags & TH_SFLAG_PENDING_THROTTLE_PROMOTION)) - && (task_priority <= MAXPRI_THROTTLE)) { - - /* Kill a promotion if it was in flight */ - thread->sched_flags &= ~TH_SFLAG_PENDING_THROTTLE_PROMOTION; - - if (!(thread->sched_flags & TH_SFLAG_THROTTLED)) { - /* - * Set the pending bit so that we can switch runqueues - * (potentially) at a later time safely - */ - thread->sched_flags |= TH_SFLAG_PENDING_THROTTLE_DEMOTION; - } - } - else if (((thread->sched_flags & TH_SFLAG_THROTTLED) - || (thread->sched_flags & TH_SFLAG_PENDING_THROTTLE_DEMOTION)) - && (task_priority > MAXPRI_THROTTLE)) { - - /* Kill a demotion if it was in flight */ - thread->sched_flags &= ~TH_SFLAG_PENDING_THROTTLE_DEMOTION; - - if (thread->sched_flags & TH_SFLAG_THROTTLED) { - thread->sched_flags |= TH_SFLAG_PENDING_THROTTLE_PROMOTION; - } - } -} -#endif void thread_task_priority( @@ -329,9 +284,15 @@ thread_task_priority( s = splsched(); thread_lock(thread); -#if CONFIG_EMBEDDED - thread_throttle(thread, priority); -#endif + + + if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) { + if ((thread->max_priority <= MAXPRI_THROTTLE) && (max_priority > MAXPRI_THROTTLE)) { + sched_background_decr(); + } else if ((thread->max_priority > MAXPRI_THROTTLE) && (max_priority <= MAXPRI_THROTTLE)) { + sched_background_incr(); + } + } thread->task_priority = priority; thread->max_priority = max_priority; @@ -358,8 +319,12 @@ thread_policy_reset( if ((oldmode != TH_MODE_TIMESHARE) && (thread->sched_mode == TH_MODE_TIMESHARE)) { - if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) + if ((thread->state & (TH_RUN|TH_IDLE)) == TH_RUN) { sched_share_incr(); + + if (thread->max_priority <= MAXPRI_THROTTLE) + sched_background_incr(); + } } } else { @@ -519,6 +484,42 @@ thread_policy_get( break; } + case THREAD_POLICY_STATE: + { + thread_policy_state_t info; + + if (*count < THREAD_POLICY_STATE_COUNT) { + result = KERN_INVALID_ARGUMENT; + break; + } + + /* Only root can get this info */ + if (current_task()->sec_token.val[0] != 0) { + result = KERN_PROTECTION_FAILURE; + break; + } + + info = (thread_policy_state_t)policy_info; + + if (!(*get_default)) { + /* + * Unlock the thread mutex and directly return. + * This is necessary because proc_get_thread_policy() + * takes the task lock. + */ + thread_mtx_unlock(thread); + proc_get_thread_policy(thread, info); + return (result); + } else { + info->requested = 0; + info->effective = 0; + info->pending = 0; + } + + break; + } + + default: result = KERN_INVALID_ARGUMENT; break; diff --git a/osfmk/kern/timer.c b/osfmk/kern/timer.c index 02a088597..901dbd9ee 100644 --- a/osfmk/kern/timer.c +++ b/osfmk/kern/timer.c @@ -66,11 +66,7 @@ #include #include -#if CONFIG_EMBEDDED -int precise_user_kernel_time = 0; -#else int precise_user_kernel_time = 1; -#endif /* * timer_init initializes a timer. diff --git a/osfmk/kern/timer_call.c b/osfmk/kern/timer_call.c index 5a17e057c..a382c8607 100644 --- a/osfmk/kern/timer_call.c +++ b/osfmk/kern/timer_call.c @@ -33,10 +33,10 @@ #include #include -#include #include #include #include +#include #include @@ -58,15 +58,26 @@ #define DBG(x...) #endif +#if TIMER_TRACE +#define TIMER_KDEBUG_TRACE KERNEL_DEBUG_CONSTANT_IST +#else +#define TIMER_KDEBUG_TRACE(x...) +#endif + + lck_grp_t timer_call_lck_grp; lck_attr_t timer_call_lck_attr; lck_grp_attr_t timer_call_lck_grp_attr; +lck_grp_t timer_longterm_lck_grp; +lck_attr_t timer_longterm_lck_attr; +lck_grp_attr_t timer_longterm_lck_grp_attr; + -#define timer_call_lock_spin(queue) \ +#define timer_queue_lock_spin(queue) \ lck_mtx_lock_spin_always(&queue->lock_data) -#define timer_call_unlock(queue) \ +#define timer_queue_unlock(queue) \ lck_mtx_unlock_always(&queue->lock_data) @@ -74,6 +85,66 @@ lck_grp_attr_t timer_call_lck_grp_attr; #define MPQUEUE(x) ((mpqueue_head_t *)(x)) #define TIMER_CALL(x) ((timer_call_t)(x)) +/* + * The longterm timer object is a global structure holding all timers + * beyond the short-term, local timer queue threshold. The boot processor + * is responsible for moving each timer to its local timer queue + * if and when that timer becomes due within the threshold. + */ +#define TIMER_LONGTERM_NONE EndOfAllTime +#if defined(__x86_64__) +#define TIMER_LONGTERM_THRESHOLD (1ULL * NSEC_PER_SEC) +#else +#define TIMER_LONGTERM_THRESHOLD TIMER_LONGTERM_NONE +#endif + +typedef struct { + uint64_t interval; /* longterm timer interval */ + uint64_t margin; /* fudge factor (10% of interval */ + uint64_t deadline; /* first/soonest longterm deadline */ + uint64_t preempted; /* sooner timer has pre-empted */ + timer_call_t call; /* first/soonest longterm timer call */ + uint64_t deadline_set; /* next timer set */ + timer_call_data_t timer; /* timer used by threshold management */ + /* Stats: */ + uint64_t scans; /* num threshold timer scans */ + uint64_t preempts; /* num threshold reductions */ + uint64_t latency; /* average threshold latency */ + uint64_t latency_min; /* minimum threshold latency */ + uint64_t latency_max; /* maximum threshold latency */ +} threshold_t; + +typedef struct { + mpqueue_head_t queue; /* longterm timer list */ + uint64_t enqueues; /* num timers queued */ + uint64_t dequeues; /* num timers dequeued */ + uint64_t escalates; /* num timers becoming shortterm */ + uint64_t scan_time; /* last time the list was scanned */ + threshold_t threshold; /* longterm timer threshold */ +} timer_longterm_t; + +timer_longterm_t timer_longterm; + +static mpqueue_head_t *timer_longterm_queue = NULL; + +static void timer_longterm_init(void); +static void timer_longterm_callout( + timer_call_param_t p0, + timer_call_param_t p1); +extern void timer_longterm_scan( + timer_longterm_t *tlp, + uint64_t now); +static void timer_longterm_update( + timer_longterm_t *tlp); +static void timer_longterm_update_locked( + timer_longterm_t *tlp); +static mpqueue_head_t * timer_longterm_enqueue_unlocked( + timer_call_t call, + uint64_t now, + uint64_t deadline, + mpqueue_head_t ** old_queue); +static void timer_longterm_dequeued_locked( + timer_call_t call); uint64_t past_deadline_timers; uint64_t past_deadline_deltas; @@ -83,7 +154,7 @@ enum {PAST_DEADLINE_TIMER_ADJUSTMENT_NS = 10 * 1000}; uint64_t past_deadline_timer_adjustment; -static boolean_t timer_call_enter_internal(timer_call_t call, timer_call_param_t param1, uint64_t deadline, uint32_t flags); +static boolean_t timer_call_enter_internal(timer_call_t call, timer_call_param_t param1, uint64_t deadline, uint64_t leeway, uint32_t flags, boolean_t ratelimited); boolean_t mach_timer_coalescing_enabled = TRUE; mpqueue_head_t *timer_call_enqueue_deadline_unlocked( @@ -96,19 +167,21 @@ mpqueue_head_t *timer_call_dequeue_unlocked( void -timer_call_initialize(void) +timer_call_init(void) { lck_attr_setdefault(&timer_call_lck_attr); lck_grp_attr_setdefault(&timer_call_lck_grp_attr); lck_grp_init(&timer_call_lck_grp, "timer_call", &timer_call_lck_grp_attr); nanotime_to_absolutetime(0, PAST_DEADLINE_TIMER_ADJUSTMENT_NS, &past_deadline_timer_adjustment); + + timer_longterm_init(); } void -timer_call_initialize_queue(mpqueue_head_t *queue) +timer_call_queue_init(mpqueue_head_t *queue) { - DBG("timer_call_initialize_queue(%p)\n", queue); + DBG("timer_call_queue_init(%p)\n", queue); mpqueue_init(queue, &timer_call_lck_grp, &timer_call_lck_attr); } @@ -151,6 +224,7 @@ timer_call_setup( * operated upon since it is being changed. Furthermore, we don't null * the entry.queue pointer (protected by the entry lock we don't own). * Instead, we set the async_dequeue flag -- see (1c). + * 2c) Same as 2b but occurring when a longterm timer is matured. */ /* @@ -183,6 +257,7 @@ timer_call_entry_dequeue( "queue %p is not locked\n", old_queue); call_entry_dequeue(CE(entry)); + old_queue->count--; return (old_queue); } @@ -208,6 +283,15 @@ timer_call_entry_enqueue_deadline( call_entry_enqueue_deadline(CE(entry), QUEUE(queue), deadline); +/* For efficiency, track the earliest soft deadline on the queue, so that + * fuzzy decisions can be made without lock acquisitions. + */ + queue->earliest_soft_deadline = ((timer_call_t)queue_first(&queue->head))->soft_deadline; + + if (old_queue) + old_queue->count--; + queue->count++; + return (old_queue); } @@ -217,7 +301,12 @@ static __inline__ mpqueue_head_t * timer_call_entry_dequeue( timer_call_t entry) { - return MPQUEUE(call_entry_dequeue(CE(entry))); + mpqueue_head_t *old_queue = MPQUEUE(CE(entry)->queue); + + call_entry_dequeue(CE(entry)); + old_queue->count--; + + return old_queue; } static __inline__ mpqueue_head_t * @@ -226,12 +315,51 @@ timer_call_entry_enqueue_deadline( mpqueue_head_t *queue, uint64_t deadline) { - return MPQUEUE(call_entry_enqueue_deadline(CE(entry), - QUEUE(queue), deadline)); + mpqueue_head_t *old_queue = MPQUEUE(CE(entry)->queue); + + call_entry_enqueue_deadline(CE(entry), QUEUE(queue), deadline); + + /* For efficiency, track the earliest soft deadline on the queue, + * so that fuzzy decisions can be made without lock acquisitions. + */ + queue->earliest_soft_deadline = ((timer_call_t)queue_first(&queue->head))->soft_deadline; + + if (old_queue) + old_queue->count--; + queue->count++; + + return old_queue; } #endif +static __inline__ void +timer_call_entry_enqueue_tail( + timer_call_t entry, + mpqueue_head_t *queue) +{ + call_entry_enqueue_tail(CE(entry), QUEUE(queue)); + queue->count++; + return; +} + +/* + * Remove timer entry from its queue but don't change the queue pointer + * and set the async_dequeue flag. This is locking case 2b. + */ +static __inline__ void +timer_call_entry_dequeue_async( + timer_call_t entry) +{ + mpqueue_head_t *old_queue = MPQUEUE(CE(entry)->queue); + if (old_queue) { + old_queue->count--; + (void) remque(qe(entry)); + entry->async_dequeue = TRUE; + } + return; +} + #if TIMER_ASSERT unsigned timer_call_enqueue_deadline_unlocked_async1; unsigned timer_call_enqueue_deadline_unlocked_async2; @@ -253,31 +381,38 @@ timer_call_enqueue_deadline_unlocked( simple_lock(&call->lock); old_queue = MPQUEUE(entry->queue); if (old_queue != NULL) { - timer_call_lock_spin(old_queue); + timer_queue_lock_spin(old_queue); if (call->async_dequeue) { - /* collision (1c): null queue pointer and reset flag */ - call->async_dequeue = FALSE; - entry->queue = NULL; + /* collision (1c): timer already dequeued, clear flag */ #if TIMER_ASSERT + TIMER_KDEBUG_TRACE(KDEBUG_TRACE, + DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, + call, + call->async_dequeue, + CE(call)->queue, + 0x1c, 0); timer_call_enqueue_deadline_unlocked_async1++; #endif - } else if (old_queue != queue) { - (void)remque(qe(entry)); + call->async_dequeue = FALSE; entry->queue = NULL; + } else if (old_queue != queue) { + timer_call_entry_dequeue(call); #if TIMER_ASSERT timer_call_enqueue_deadline_unlocked_async2++; #endif } + if (old_queue == timer_longterm_queue) + timer_longterm_dequeued_locked(call); if (old_queue != queue) { - timer_call_unlock(old_queue); - timer_call_lock_spin(queue); + timer_queue_unlock(old_queue); + timer_queue_lock_spin(queue); } } else { - timer_call_lock_spin(queue); + timer_queue_lock_spin(queue); } timer_call_entry_enqueue_deadline(call, queue, deadline); - timer_call_unlock(queue); + timer_queue_unlock(queue); simple_unlock(&call->lock); return (old_queue); @@ -298,22 +433,35 @@ timer_call_dequeue_unlocked( simple_lock(&call->lock); old_queue = MPQUEUE(entry->queue); +#if TIMER_ASSERT + TIMER_KDEBUG_TRACE(KDEBUG_TRACE, + DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, + call, + call->async_dequeue, + CE(call)->queue, + 0, 0); +#endif if (old_queue != NULL) { - timer_call_lock_spin(old_queue); + timer_queue_lock_spin(old_queue); if (call->async_dequeue) { - /* collision (1c): null queue pointer and reset flag */ - call->async_dequeue = FALSE; + /* collision (1c): timer already dequeued, clear flag */ #if TIMER_ASSERT + TIMER_KDEBUG_TRACE(KDEBUG_TRACE, + DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, + call, + call->async_dequeue, + CE(call)->queue, + 0x1c, 0); timer_call_dequeue_unlocked_async1++; #endif + call->async_dequeue = FALSE; + entry->queue = NULL; } else { - (void)remque(qe(entry)); -#if TIMER_ASSERT - timer_call_dequeue_unlocked_async2++; -#endif + timer_call_entry_dequeue(call); } - entry->queue = NULL; - timer_call_unlock(old_queue); + if (old_queue == timer_longterm_queue) + timer_longterm_dequeued_locked(call); + timer_queue_unlock(old_queue); } simple_unlock(&call->lock); return (old_queue); @@ -324,26 +472,42 @@ timer_call_enter_internal( timer_call_t call, timer_call_param_t param1, uint64_t deadline, - uint32_t flags) + uint64_t leeway, + uint32_t flags, + boolean_t ratelimited) { - mpqueue_head_t *queue; + mpqueue_head_t *queue = NULL; mpqueue_head_t *old_queue; spl_t s; - uint64_t slop = 0; + uint64_t slop; + uint32_t urgency; s = splclock(); call->soft_deadline = deadline; call->flags = flags; - if ((flags & TIMER_CALL_CRITICAL) == 0 && - mach_timer_coalescing_enabled) { - slop = timer_call_slop(deadline); + uint64_t ctime = mach_absolute_time(); + + TIMER_KDEBUG_TRACE(KDEBUG_TRACE, + DECR_TIMER_ENTER | DBG_FUNC_START, + call, + param1, deadline, flags, 0); + + urgency = (flags & TIMER_CALL_URGENCY_MASK); + + boolean_t slop_ratelimited = FALSE; + slop = timer_call_slop(deadline, ctime, urgency, current_thread(), &slop_ratelimited); + + if ((flags & TIMER_CALL_LEEWAY) != 0 && leeway > slop) + slop = leeway; + + if (UINT64_MAX - deadline <= slop) { + deadline = UINT64_MAX; + } else { deadline += slop; } -#if defined(__i386__) || defined(__x86_64__) - uint64_t ctime = mach_absolute_time(); if (__improbable(deadline < ctime)) { uint64_t delta = (ctime - deadline); @@ -357,34 +521,64 @@ timer_call_enter_internal( deadline = ctime + past_deadline_timer_adjustment; call->soft_deadline = deadline; } -#endif + + /* Bit 0 of the "soft" deadline indicates that + * this particular timer call requires rate-limiting + * behaviour. Maintain the invariant deadline >= soft_deadline by + * setting bit 0 of "deadline". + */ + + deadline |= 1; + if (ratelimited || slop_ratelimited) { + call->soft_deadline |= 1ULL; + } else { + call->soft_deadline &= ~0x1ULL; + } + call->ttd = call->soft_deadline - ctime; #if CONFIG_DTRACE - DTRACE_TMR6(callout__create, timer_call_func_t, CE(call)->func, + DTRACE_TMR7(callout__create, timer_call_func_t, CE(call)->func, timer_call_param_t, CE(call)->param0, uint32_t, call->flags, (deadline - call->soft_deadline), - (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF)); + (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF), call); #endif - queue = timer_queue_assign(deadline); + if (!ratelimited && !slop_ratelimited) { + queue = timer_longterm_enqueue_unlocked(call, ctime, deadline, &old_queue); + } - old_queue = timer_call_enqueue_deadline_unlocked(call, queue, deadline); + if (queue == NULL) { + queue = timer_queue_assign(deadline); + old_queue = timer_call_enqueue_deadline_unlocked(call, queue, deadline); + } CE(call)->param1 = param1; +#if TIMER_TRACE + CE(call)->entry_time = ctime; +#endif + + TIMER_KDEBUG_TRACE(KDEBUG_TRACE, + DECR_TIMER_ENTER | DBG_FUNC_END, + call, + (old_queue != NULL), call->soft_deadline, queue->count, 0); splx(s); return (old_queue != NULL); } +/* + * timer_call_*() + * return boolean indicating whether the call was previously queued. + */ boolean_t timer_call_enter( timer_call_t call, uint64_t deadline, uint32_t flags) { - return timer_call_enter_internal(call, NULL, deadline, flags); + return timer_call_enter_internal(call, NULL, deadline, 0, flags, FALSE); } boolean_t @@ -394,7 +588,19 @@ timer_call_enter1( uint64_t deadline, uint32_t flags) { - return timer_call_enter_internal(call, param1, deadline, flags); + return timer_call_enter_internal(call, param1, deadline, 0, flags, FALSE); +} + +boolean_t +timer_call_enter_with_leeway( + timer_call_t call, + timer_call_param_t param1, + uint64_t deadline, + uint64_t leeway, + uint32_t flags, + boolean_t ratelimited) +{ + return timer_call_enter_internal(call, param1, deadline, leeway, flags, ratelimited); } boolean_t @@ -406,16 +612,31 @@ timer_call_cancel( s = splclock(); + TIMER_KDEBUG_TRACE(KDEBUG_TRACE, + DECR_TIMER_CANCEL | DBG_FUNC_START, + call, + CE(call)->deadline, call->soft_deadline, call->flags, 0); + old_queue = timer_call_dequeue_unlocked(call); if (old_queue != NULL) { - timer_call_lock_spin(old_queue); - if (!queue_empty(&old_queue->head)) + timer_queue_lock_spin(old_queue); + if (!queue_empty(&old_queue->head)) { timer_queue_cancel(old_queue, CE(call)->deadline, CE(queue_first(&old_queue->head))->deadline); - else + old_queue->earliest_soft_deadline = ((timer_call_t)queue_first(&old_queue->head))->soft_deadline; + } + else { timer_queue_cancel(old_queue, CE(call)->deadline, UINT64_MAX); - timer_call_unlock(old_queue); + old_queue->earliest_soft_deadline = UINT64_MAX; + } + timer_queue_unlock(old_queue); } + TIMER_KDEBUG_TRACE(KDEBUG_TRACE, + DECR_TIMER_CANCEL | DBG_FUNC_END, + call, + old_queue, + CE(call)->deadline - mach_absolute_time(), + CE(call)->deadline - CE(call)->entry_time, 0); splx(s); #if CONFIG_DTRACE @@ -441,7 +662,7 @@ timer_queue_shutdown( s = splclock(); /* Note comma operator in while expression re-locking each iteration */ - while (timer_call_lock_spin(queue), !queue_empty(&queue->head)) { + while (timer_queue_lock_spin(queue), !queue_empty(&queue->head)) { call = TIMER_CALL(queue_first(&queue->head)); if (!simple_lock_try(&call->lock)) { /* @@ -450,54 +671,89 @@ timer_queue_shutdown( * but set the async_dequeue field. */ timer_queue_shutdown_lock_skips++; - (void) remque(qe(call)); - call->async_dequeue = TRUE; - timer_call_unlock(queue); + timer_call_entry_dequeue_async(call); +#if TIMER_ASSERT + TIMER_KDEBUG_TRACE(KDEBUG_TRACE, + DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, + call, + call->async_dequeue, + CE(call)->queue, + 0x2b, 0); +#endif + timer_queue_unlock(queue); continue; } /* remove entry from old queue */ timer_call_entry_dequeue(call); - timer_call_unlock(queue); + timer_queue_unlock(queue); /* and queue it on new */ new_queue = timer_queue_assign(CE(call)->deadline); - timer_call_lock_spin(new_queue); + timer_queue_lock_spin(new_queue); timer_call_entry_enqueue_deadline( call, new_queue, CE(call)->deadline); - timer_call_unlock(new_queue); + timer_queue_unlock(new_queue); simple_unlock(&call->lock); } - timer_call_unlock(queue); + timer_queue_unlock(queue); splx(s); } uint32_t timer_queue_expire_lock_skips; uint64_t -timer_queue_expire( +timer_queue_expire_with_options( mpqueue_head_t *queue, - uint64_t deadline) + uint64_t deadline, + boolean_t rescan) { - timer_call_t call; - + timer_call_t call = NULL; + uint32_t tc_iterations = 0; DBG("timer_queue_expire(%p,)\n", queue); - timer_call_lock_spin(queue); + uint64_t cur_deadline = deadline; + timer_queue_lock_spin(queue); while (!queue_empty(&queue->head)) { - call = TIMER_CALL(queue_first(&queue->head)); + /* Upon processing one or more timer calls, refresh the + * deadline to account for time elapsed in the callout + */ + if (++tc_iterations > 1) + cur_deadline = mach_absolute_time(); + + if (call == NULL) + call = TIMER_CALL(queue_first(&queue->head)); - if (call->soft_deadline <= deadline) { + if (call->soft_deadline <= cur_deadline) { timer_call_func_t func; timer_call_param_t param0, param1; + TCOAL_DEBUG(0xDDDD0000, queue->earliest_soft_deadline, call->soft_deadline, 0, 0, 0); + TIMER_KDEBUG_TRACE(KDEBUG_TRACE, + DECR_TIMER_EXPIRE | DBG_FUNC_NONE, + call, + call->soft_deadline, + CE(call)->deadline, + CE(call)->entry_time, 0); + + /* Bit 0 of the "soft" deadline indicates that + * this particular timer call is rate-limited + * and hence shouldn't be processed before its + * hard deadline. + */ + if ((call->soft_deadline & 0x1) && + (CE(call)->deadline > cur_deadline)) { + if (rescan == FALSE) + break; + } + if (!simple_lock_try(&call->lock)) { /* case (2b) lock inversion, dequeue and skip */ timer_queue_expire_lock_skips++; - (void) remque(qe(call)); - call->async_dequeue = TRUE; + timer_call_entry_dequeue_async(call); + call = NULL; continue; } @@ -508,19 +764,18 @@ timer_queue_expire( param1 = CE(call)->param1; simple_unlock(&call->lock); - timer_call_unlock(queue); + timer_queue_unlock(queue); - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_CALLOUT | DBG_FUNC_START, - VM_KERNEL_UNSLIDE(func), param0, param1, 0, 0); + call, VM_KERNEL_UNSLIDE(func), param0, param1, 0); #if CONFIG_DTRACE - DTRACE_TMR6(callout__start, timer_call_func_t, func, + DTRACE_TMR7(callout__start, timer_call_func_t, func, timer_call_param_t, param0, unsigned, call->flags, 0, (call->ttd >> 32), - (unsigned) (call->ttd & 0xFFFFFFFF)); + (unsigned) (call->ttd & 0xFFFFFFFF), call); #endif - /* Maintain time-to-deadline in per-processor data * structure for thread wakeup deadline statistics. */ @@ -528,38 +783,75 @@ timer_queue_expire( *ttdp = call->ttd; (*func)(param0, param1); *ttdp = 0; - #if CONFIG_DTRACE - DTRACE_TMR3(callout__end, timer_call_func_t, func, - timer_call_param_t, param0, timer_call_param_t, - param1); + DTRACE_TMR4(callout__end, timer_call_func_t, func, + param0, param1, call); #endif - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + TIMER_KDEBUG_TRACE(KDEBUG_TRACE, DECR_TIMER_CALLOUT | DBG_FUNC_END, - VM_KERNEL_UNSLIDE(func), param0, param1, 0, 0); - - timer_call_lock_spin(queue); + call, VM_KERNEL_UNSLIDE(func), param0, param1, 0); + call = NULL; + timer_queue_lock_spin(queue); + } else { + if (__probable(rescan == FALSE)) { + break; + } else { + int64_t skew = CE(call)->deadline - call->soft_deadline; + assert(CE(call)->deadline >= call->soft_deadline); + + /* DRK: On a latency quality-of-service level change, + * re-sort potentially rate-limited timers. The platform + * layer determines which timers require + * this. In the absence of the per-callout + * synchronization requirement, a global resort could + * be more efficient. The re-sort effectively + * annuls all timer adjustments, i.e. the "soft + * deadline" is the sort key. + */ + + if (timer_resort_threshold(skew)) { + if (__probable(simple_lock_try(&call->lock))) { + timer_call_entry_dequeue(call); + timer_call_entry_enqueue_deadline(call, queue, call->soft_deadline); + simple_unlock(&call->lock); + call = NULL; + } + } + if (call) { + call = TIMER_CALL(queue_next(qe(call))); + if (queue_end(&queue->head, qe(call))) + break; + } + } } - else - break; } - if (!queue_empty(&queue->head)) - deadline = CE(call)->deadline; - else - deadline = UINT64_MAX; + if (!queue_empty(&queue->head)) { + call = TIMER_CALL(queue_first(&queue->head)); + cur_deadline = CE(call)->deadline; + queue->earliest_soft_deadline = call->soft_deadline; + } else { + queue->earliest_soft_deadline = cur_deadline = UINT64_MAX; + } - timer_call_unlock(queue); + timer_queue_unlock(queue); - return (deadline); + return (cur_deadline); } +uint64_t +timer_queue_expire( + mpqueue_head_t *queue, + uint64_t deadline) +{ + return timer_queue_expire_with_options(queue, deadline, FALSE); +} extern int serverperfmode; uint32_t timer_queue_migrate_lock_skips; /* - * timer_queue_migrate() is called by etimer_queue_migrate() + * timer_queue_migrate() is called by timer_queue_migrate_cpu() * to move timer requests from the local processor (queue_from) * to a target processor's (queue_to). */ @@ -597,7 +889,7 @@ timer_queue_migrate(mpqueue_head_t *queue_from, mpqueue_head_t *queue_to) * so that we need not have the target resync. */ - timer_call_lock_spin(queue_to); + timer_queue_lock_spin(queue_to); head_to = TIMER_CALL(queue_first(&queue_to->head)); if (queue_empty(&queue_to->head)) { @@ -605,7 +897,7 @@ timer_queue_migrate(mpqueue_head_t *queue_from, mpqueue_head_t *queue_to) goto abort1; } - timer_call_lock_spin(queue_from); + timer_queue_lock_spin(queue_from); if (queue_empty(&queue_from->head)) { timers_migrated = -2; @@ -632,9 +924,16 @@ timer_queue_migrate(mpqueue_head_t *queue_from, mpqueue_head_t *queue_to) call = TIMER_CALL(queue_first(&queue_from->head)); if (!simple_lock_try(&call->lock)) { /* case (2b) lock order inversion, dequeue only */ +#ifdef TIMER_ASSERT + TIMER_KDEBUG_TRACE(KDEBUG_TRACE, + DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, + call, + CE(call)->queue, + call->lock.interlock.lock_data, + 0x2b, 0); +#endif timer_queue_migrate_lock_skips++; - (void) remque(qe(call)); - call->async_dequeue = TRUE; + timer_call_entry_dequeue_async(call); continue; } timer_call_entry_dequeue(call); @@ -643,11 +942,540 @@ timer_queue_migrate(mpqueue_head_t *queue_from, mpqueue_head_t *queue_to) timers_migrated++; simple_unlock(&call->lock); } - + queue_from->earliest_soft_deadline = UINT64_MAX; abort2: - timer_call_unlock(queue_from); + timer_queue_unlock(queue_from); abort1: - timer_call_unlock(queue_to); + timer_queue_unlock(queue_to); return timers_migrated; } + +void +timer_queue_trace_cpu(int ncpu) +{ + timer_call_nosync_cpu( + ncpu, + (void(*)())timer_queue_trace, + (void*) timer_queue_cpu(ncpu)); +} + +void +timer_queue_trace( + mpqueue_head_t *queue) +{ + timer_call_t call; + spl_t s; + + if (!kdebug_enable) + return; + + s = splclock(); + timer_queue_lock_spin(queue); + + TIMER_KDEBUG_TRACE(KDEBUG_TRACE, + DECR_TIMER_QUEUE | DBG_FUNC_START, + queue->count, mach_absolute_time(), 0, 0, 0); + + if (!queue_empty(&queue->head)) { + call = TIMER_CALL(queue_first(&queue->head)); + do { + TIMER_KDEBUG_TRACE(KDEBUG_TRACE, + DECR_TIMER_QUEUE | DBG_FUNC_NONE, + call->soft_deadline, + CE(call)->deadline, + CE(call)->entry_time, + CE(call)->func, + 0); + call = TIMER_CALL(queue_next(qe(call))); + } while (!queue_end(&queue->head, qe(call))); + } + + TIMER_KDEBUG_TRACE(KDEBUG_TRACE, + DECR_TIMER_QUEUE | DBG_FUNC_END, + queue->count, mach_absolute_time(), 0, 0, 0); + + timer_queue_unlock(queue); + splx(s); +} + +void +timer_longterm_dequeued_locked(timer_call_t call) +{ + timer_longterm_t *tlp = &timer_longterm; + + tlp->dequeues++; + if (call == tlp->threshold.call) + tlp->threshold.call = NULL; +} + +/* + * Place a timer call in the longterm list + * and adjust the next timer callout deadline if the new timer is first. + */ +mpqueue_head_t * +timer_longterm_enqueue_unlocked(timer_call_t call, + uint64_t now, + uint64_t deadline, + mpqueue_head_t **old_queue) +{ + timer_longterm_t *tlp = &timer_longterm; + boolean_t update_required = FALSE; + uint64_t longterm_threshold; + + longterm_threshold = now + tlp->threshold.interval; + + /* + * Return NULL without doing anything if: + * - this timer is local, or + * - the longterm mechanism is disabled, or + * - this deadline is too short. + */ + if (__probable((call->flags & TIMER_CALL_LOCAL) != 0 || + (tlp->threshold.interval == TIMER_LONGTERM_NONE) || + (deadline <= longterm_threshold))) + return NULL; + + /* + * Remove timer from its current queue, if any. + */ + *old_queue = timer_call_dequeue_unlocked(call); + + /* + * Lock the longterm queue, queue timer and determine + * whether an update is necessary. + */ + assert(!ml_get_interrupts_enabled()); + simple_lock(&call->lock); + timer_queue_lock_spin(timer_longterm_queue); + timer_call_entry_enqueue_tail(call, timer_longterm_queue); + CE(call)->deadline = deadline; + + tlp->enqueues++; + + /* + * We'll need to update the currently set threshold timer + * if the new deadline is sooner and no sooner update is in flight. + */ + if (deadline < tlp->threshold.deadline && + deadline < tlp->threshold.preempted) { + tlp->threshold.preempted = deadline; + tlp->threshold.call = call; + update_required = TRUE; + } + timer_queue_unlock(timer_longterm_queue); + simple_unlock(&call->lock); + + if (update_required) { + timer_call_nosync_cpu( + master_cpu, + (void (*)(void *)) timer_longterm_update, + (void *)tlp); + } + + return timer_longterm_queue; +} + +/* + * Scan for timers below the longterm threshold. + * Move these to the local timer queue (of the boot processor on which the + * calling thread is running). + * Both the local (boot) queue and the longterm queue are locked. + * The scan is similar to the timer migrate sequence but is performed by + * successively examining each timer on the longterm queue: + * - if within the short-term threshold + * - enter on the local queue (unless being deleted), + * - otherwise: + * - if sooner, deadline becomes the next threshold deadline. + */ +void +timer_longterm_scan(timer_longterm_t *tlp, + uint64_t now) +{ + queue_entry_t qe; + timer_call_t call; + uint64_t threshold; + uint64_t deadline; + mpqueue_head_t *timer_master_queue; + + assert(!ml_get_interrupts_enabled()); + assert(cpu_number() == master_cpu); + + if (tlp->threshold.interval != TIMER_LONGTERM_NONE) + threshold = now + tlp->threshold.interval; + else + threshold = TIMER_LONGTERM_NONE; + + tlp->threshold.deadline = TIMER_LONGTERM_NONE; + tlp->threshold.call = NULL; + + if (queue_empty(&timer_longterm_queue->head)) + return; + + timer_master_queue = timer_queue_cpu(master_cpu); + timer_queue_lock_spin(timer_master_queue); + + qe = queue_first(&timer_longterm_queue->head); + while (!queue_end(&timer_longterm_queue->head, qe)) { + call = TIMER_CALL(qe); + deadline = call->soft_deadline; + qe = queue_next(qe); + if (!simple_lock_try(&call->lock)) { + /* case (2c) lock order inversion, dequeue only */ +#ifdef TIMER_ASSERT + TIMER_KDEBUG_TRACE(KDEBUG_TRACE, + DECR_TIMER_ASYNC_DEQ | DBG_FUNC_NONE, + call, + CE(call)->queue, + call->lock.interlock.lock_data, + 0x2c, 0); +#endif + timer_call_entry_dequeue_async(call); + continue; + } + if (deadline < threshold) { + /* + * This timer needs moving (escalating) + * to the local (boot) processor's queue. + */ +#ifdef TIMER_ASSERT + if (deadline < now) + TIMER_KDEBUG_TRACE(KDEBUG_TRACE, + DECR_TIMER_OVERDUE | DBG_FUNC_NONE, + call, + deadline, + now, + threshold, + 0); +#endif + TIMER_KDEBUG_TRACE(KDEBUG_TRACE, + DECR_TIMER_ESCALATE | DBG_FUNC_NONE, + call, + CE(call)->deadline, + CE(call)->entry_time, + CE(call)->func, + 0); + tlp->escalates++; + timer_call_entry_dequeue(call); + timer_call_entry_enqueue_deadline( + call, timer_master_queue, CE(call)->deadline); + /* + * A side-effect of the following call is to update + * the actual hardware deadline if required. + */ + (void) timer_queue_assign(deadline); + } else { + if (deadline < tlp->threshold.deadline) { + tlp->threshold.deadline = deadline; + tlp->threshold.call = call; + } + } + simple_unlock(&call->lock); + } + + timer_queue_unlock(timer_master_queue); +} + +void +timer_longterm_callout(timer_call_param_t p0, __unused timer_call_param_t p1) +{ + timer_longterm_t *tlp = (timer_longterm_t *) p0; + + timer_longterm_update(tlp); +} + +void +timer_longterm_update_locked(timer_longterm_t *tlp) +{ + uint64_t latency; + + TIMER_KDEBUG_TRACE(KDEBUG_TRACE, + DECR_TIMER_UPDATE | DBG_FUNC_START, + &tlp->queue, + tlp->threshold.deadline, + tlp->threshold.preempted, + tlp->queue.count, 0); + + tlp->scan_time = mach_absolute_time(); + if (tlp->threshold.preempted != TIMER_LONGTERM_NONE) { + tlp->threshold.preempts++; + tlp->threshold.deadline = tlp->threshold.preempted; + tlp->threshold.preempted = TIMER_LONGTERM_NONE; + /* + * Note: in the unlikely event that a pre-empted timer has + * itself been cancelled, we'll simply re-scan later at the + * time of the preempted/cancelled timer. + */ + } else { + tlp->threshold.scans++; + + /* + * Maintain a moving average of our wakeup latency. + * Clamp latency to 0 and ignore above threshold interval. + */ + if (tlp->scan_time > tlp->threshold.deadline_set) + latency = tlp->scan_time - tlp->threshold.deadline_set; + else + latency = 0; + if (latency < tlp->threshold.interval) { + tlp->threshold.latency_min = + MIN(tlp->threshold.latency_min, latency); + tlp->threshold.latency_max = + MAX(tlp->threshold.latency_max, latency); + tlp->threshold.latency = + (tlp->threshold.latency*99 + latency) / 100; + } + + timer_longterm_scan(tlp, tlp->scan_time); + } + + tlp->threshold.deadline_set = tlp->threshold.deadline; + /* The next deadline timer to be set is adjusted */ + if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) { + tlp->threshold.deadline_set -= tlp->threshold.margin; + tlp->threshold.deadline_set -= tlp->threshold.latency; + } + + TIMER_KDEBUG_TRACE(KDEBUG_TRACE, + DECR_TIMER_UPDATE | DBG_FUNC_END, + &tlp->queue, + tlp->threshold.deadline, + tlp->threshold.scans, + tlp->queue.count, 0); +} + +void +timer_longterm_update(timer_longterm_t *tlp) +{ + spl_t s = splclock(); + + timer_queue_lock_spin(timer_longterm_queue); + + if (cpu_number() != master_cpu) + panic("timer_longterm_update_master() on non-boot cpu"); + + timer_longterm_update_locked(tlp); + + if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) + timer_call_enter( + &tlp->threshold.timer, + tlp->threshold.deadline_set, + TIMER_CALL_LOCAL | TIMER_CALL_SYS_CRITICAL); + + timer_queue_unlock(timer_longterm_queue); + splx(s); +} + +void +timer_longterm_init(void) +{ + uint32_t longterm; + timer_longterm_t *tlp = &timer_longterm; + + DBG("timer_longterm_init() tlp: %p, queue: %p\n", tlp, &tlp->queue); + + /* + * Set the longterm timer threshold. + * Defaults to TIMER_LONGTERM_THRESHOLD; overridden longterm boot-arg + */ + tlp->threshold.interval = TIMER_LONGTERM_THRESHOLD; + if (PE_parse_boot_argn("longterm", &longterm, sizeof (longterm))) { + tlp->threshold.interval = (longterm == 0) ? + TIMER_LONGTERM_NONE : + longterm * NSEC_PER_MSEC; + } + if (tlp->threshold.interval != TIMER_LONGTERM_NONE) { + printf("Longterm timer threshold: %llu ms\n", + tlp->threshold.interval / NSEC_PER_MSEC); + kprintf("Longterm timer threshold: %llu ms\n", + tlp->threshold.interval / NSEC_PER_MSEC); + nanoseconds_to_absolutetime(tlp->threshold.interval, + &tlp->threshold.interval); + tlp->threshold.margin = tlp->threshold.interval / 10; + tlp->threshold.latency_min = EndOfAllTime; + tlp->threshold.latency_max = 0; + } + + tlp->threshold.preempted = TIMER_LONGTERM_NONE; + tlp->threshold.deadline = TIMER_LONGTERM_NONE; + + lck_attr_setdefault(&timer_longterm_lck_attr); + lck_grp_attr_setdefault(&timer_longterm_lck_grp_attr); + lck_grp_init(&timer_longterm_lck_grp, + "timer_longterm", &timer_longterm_lck_grp_attr); + mpqueue_init(&tlp->queue, + &timer_longterm_lck_grp, &timer_longterm_lck_attr); + + timer_call_setup(&tlp->threshold.timer, + timer_longterm_callout, (timer_call_param_t) tlp); + + timer_longterm_queue = &tlp->queue; +} + +enum { + THRESHOLD, QCOUNT, + ENQUEUES, DEQUEUES, ESCALATES, SCANS, PREEMPTS, + LATENCY, LATENCY_MIN, LATENCY_MAX +}; +uint64_t +timer_sysctl_get(int oid) +{ + timer_longterm_t *tlp = &timer_longterm; + + switch (oid) { + case THRESHOLD: + return (tlp->threshold.interval == TIMER_LONGTERM_NONE) ? + 0 : tlp->threshold.interval / NSEC_PER_MSEC; + case QCOUNT: + return tlp->queue.count; + case ENQUEUES: + return tlp->enqueues; + case DEQUEUES: + return tlp->dequeues; + case ESCALATES: + return tlp->escalates; + case SCANS: + return tlp->threshold.scans; + case PREEMPTS: + return tlp->threshold.preempts; + case LATENCY: + return tlp->threshold.latency; + case LATENCY_MIN: + return tlp->threshold.latency_min; + case LATENCY_MAX: + return tlp->threshold.latency_max; + default: + return 0; + } +} + +/* + * timer_master_scan() is the inverse of timer_longterm_scan() + * since it un-escalates timers to the longterm queue. + */ +static void +timer_master_scan(timer_longterm_t *tlp, + uint64_t now) +{ + queue_entry_t qe; + timer_call_t call; + uint64_t threshold; + uint64_t deadline; + mpqueue_head_t *timer_master_queue; + + if (tlp->threshold.interval != TIMER_LONGTERM_NONE) + threshold = now + tlp->threshold.interval; + else + threshold = TIMER_LONGTERM_NONE; + + timer_master_queue = timer_queue_cpu(master_cpu); + timer_queue_lock_spin(timer_master_queue); + + qe = queue_first(&timer_master_queue->head); + while (!queue_end(&timer_master_queue->head, qe)) { + call = TIMER_CALL(qe); + deadline = CE(call)->deadline; + qe = queue_next(qe); + if ((call->flags & TIMER_CALL_LOCAL) != 0) + continue; + if (!simple_lock_try(&call->lock)) { + /* case (2c) lock order inversion, dequeue only */ + timer_call_entry_dequeue_async(call); + continue; + } + if (deadline > threshold) { + /* move from master to longterm */ + timer_call_entry_dequeue(call); + timer_call_entry_enqueue_tail(call, timer_longterm_queue); + if (deadline < tlp->threshold.deadline) { + tlp->threshold.deadline = deadline; + tlp->threshold.call = call; + } + } + simple_unlock(&call->lock); + } + timer_queue_unlock(timer_master_queue); +} + +static void +timer_sysctl_set_threshold(uint64_t value) +{ + timer_longterm_t *tlp = &timer_longterm; + spl_t s = splclock(); + boolean_t threshold_increase; + + timer_queue_lock_spin(timer_longterm_queue); + + timer_call_cancel(&tlp->threshold.timer); + + /* + * Set the new threshold and note whther it's increasing. + */ + if (value == 0) { + tlp->threshold.interval = TIMER_LONGTERM_NONE; + threshold_increase = TRUE; + timer_call_cancel(&tlp->threshold.timer); + } else { + uint64_t old_interval = tlp->threshold.interval; + tlp->threshold.interval = value * NSEC_PER_MSEC; + nanoseconds_to_absolutetime(tlp->threshold.interval, + &tlp->threshold.interval); + tlp->threshold.margin = tlp->threshold.interval / 10; + if (old_interval == TIMER_LONGTERM_NONE) + threshold_increase = FALSE; + else + threshold_increase = (tlp->threshold.interval > old_interval); + } + + if (threshold_increase /* or removal */) { + /* Escalate timers from the longterm queue */ + timer_longterm_scan(tlp, mach_absolute_time()); + } else /* decrease or addition */ { + /* + * We scan the local/master queue for timers now longterm. + * To be strictly correct, we should scan all processor queues + * but timer migration results in most timers gravitating to the + * master processor in any case. + */ + timer_master_scan(tlp, mach_absolute_time()); + } + + /* Set new timer accordingly */ + tlp->threshold.deadline_set = tlp->threshold.deadline; + if (tlp->threshold.deadline != TIMER_LONGTERM_NONE) { + tlp->threshold.deadline_set -= tlp->threshold.margin; + tlp->threshold.deadline_set -= tlp->threshold.latency; + timer_call_enter( + &tlp->threshold.timer, + tlp->threshold.deadline_set, + TIMER_CALL_LOCAL | TIMER_CALL_SYS_CRITICAL); + } + + /* Reset stats */ + tlp->enqueues = 0; + tlp->dequeues = 0; + tlp->escalates = 0; + tlp->threshold.scans = 0; + tlp->threshold.preempts = 0; + tlp->threshold.latency = 0; + tlp->threshold.latency_min = EndOfAllTime; + tlp->threshold.latency_max = 0; + + timer_queue_unlock(timer_longterm_queue); + splx(s); +} + +int +timer_sysctl_set(int oid, uint64_t value) +{ + switch (oid) { + case THRESHOLD: + timer_call_cpu( + master_cpu, + (void (*)(void *)) timer_sysctl_set_threshold, + (void *) value); + return KERN_SUCCESS; + default: + return KERN_INVALID_ARGUMENT; + } +} diff --git a/osfmk/kern/timer_call.h b/osfmk/kern/timer_call.h index 1f9370b1e..10152e07c 100644 --- a/osfmk/kern/timer_call.h +++ b/osfmk/kern/timer_call.h @@ -33,11 +33,19 @@ #define _KERN_TIMER_CALL_H_ #include +#include -#ifdef MACH_KERNEL_PRIVATE +#ifdef XNU_KERNEL_PRIVATE #include +#ifdef MACH_KERNEL_PRIVATE +#include + +extern boolean_t mach_timer_coalescing_enabled; +extern void timer_call_queue_init(mpqueue_head_t *); +#endif + /* * NOTE: for now, bsd/dev/dtrace/dtrace_glue.c has its own definition * of this data structure, and the two had better match. @@ -50,39 +58,81 @@ typedef struct timer_call { boolean_t async_dequeue; /* this field is protected by call_entry queue's lock */ uint64_t ttd; /* Time to deadline at creation */ -} *timer_call_t; - -typedef void *timer_call_param_t; -typedef void (*timer_call_func_t)( - timer_call_param_t param0, - timer_call_param_t param1); -#define TIMER_CALL_CRITICAL 0x01 -#define TIMER_CALL_LOCAL 0x02 +} timer_call_data_t, *timer_call_t; + +#define EndOfAllTime 0xFFFFFFFFFFFFFFFFULL + +typedef void *timer_call_param_t; +typedef void (*timer_call_func_t)( + timer_call_param_t param0, + timer_call_param_t param1); + +/* + * Flags to alter the default timer/timeout coalescing behavior + * on a per-timer_call basis. + * + * The SYS urgency classes indicate that the timer_call is not + * directly related to the current thread at the time the timer_call + * is entered, so it is ignored in the calculation entirely (only + * the subclass specified is used). + * + * The USER flags indicate that both the current thread scheduling and QoS + * attributes, in addition to the per-timer_call urgency specification, + * are used to establish coalescing behavior. + */ +#define TIMER_CALL_SYS_NORMAL TIMEOUT_URGENCY_SYS_NORMAL +#define TIMER_CALL_SYS_CRITICAL TIMEOUT_URGENCY_SYS_CRITICAL +#define TIMER_CALL_SYS_BACKGROUND TIMEOUT_URGENCY_SYS_BACKGROUND + +#define TIMER_CALL_USER_MASK TIMEOUT_URGENCY_USER_MASK +#define TIMER_CALL_USER_NORMAL TIMEOUT_URGENCY_USER_NORMAL +#define TIMER_CALL_USER_CRITICAL TIMEOUT_URGENCY_USER_CRITICAL +#define TIMER_CALL_USER_BACKGROUND TIMEOUT_URGENCY_USER_BACKGROUND + +#define TIMER_CALL_URGENCY_MASK TIMEOUT_URGENCY_MASK + +/* + * Indicate that a specific leeway value is being provided (otherwise + * the leeway parameter is ignored). This supplied value can currently + * only be used to extend the leeway calculated internally from the + * urgency class provided. + */ +#define TIMER_CALL_LEEWAY TIMEOUT_URGENCY_LEEWAY + +/* + * Non-migratable timer_call + */ +#define TIMER_CALL_LOCAL TIMEOUT_URGENCY_FIRST_AVAIL + extern boolean_t timer_call_enter( - timer_call_t call, - uint64_t deadline, - uint32_t flags); + timer_call_t call, + uint64_t deadline, + uint32_t flags); extern boolean_t timer_call_enter1( + timer_call_t call, + timer_call_param_t param1, + uint64_t deadline, + uint32_t flags); + +extern boolean_t timer_call_enter_with_leeway( timer_call_t call, timer_call_param_t param1, uint64_t deadline, - uint32_t flags); + uint64_t leeway, + uint32_t flags, + boolean_t ratelimited); extern boolean_t timer_call_cancel( - timer_call_t call); - -typedef struct timer_call timer_call_data_t; - -extern void timer_call_initialize(void); + timer_call_t call); -extern void timer_call_initialize_queue(mpqueue_head_t *); +extern void timer_call_init(void); extern void timer_call_setup( - timer_call_t call, - timer_call_func_t func, - timer_call_param_t param0); + timer_call_t call, + timer_call_func_t func, + timer_call_param_t param0); -#endif /* MACH_KERNEL_PRIVATE */ +#endif /* XNU_KERNEL_PRIVATE */ #endif /* _KERN_TIMER_CALL_H_ */ diff --git a/osfmk/kern/timer_queue.h b/osfmk/kern/timer_queue.h index 3975b3101..060e72183 100644 --- a/osfmk/kern/timer_queue.h +++ b/osfmk/kern/timer_queue.h @@ -38,16 +38,41 @@ #include +/* Kernel trace events associated with timers and timer queues */ +#define DECR_TRAP_LATENCY MACHDBG_CODE(DBG_MACH_EXCP_DECI, 0) +#define DECR_SET_DEADLINE MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) +#define DECR_TIMER_CALLOUT MACHDBG_CODE(DBG_MACH_EXCP_DECI, 2) +#define DECR_PM_DEADLINE MACHDBG_CODE(DBG_MACH_EXCP_DECI, 3) +#define DECR_TIMER_MIGRATE MACHDBG_CODE(DBG_MACH_EXCP_DECI, 4) +#if defined(i386) || defined(x86_64) +#define DECR_RDHPET MACHDBG_CODE(DBG_MACH_EXCP_DECI, 5) +#define DECR_SET_TSC_DEADLINE MACHDBG_CODE(DBG_MACH_EXCP_DECI, 6) +#endif +#define DECR_TIMER_ENTER MACHDBG_CODE(DBG_MACH_EXCP_DECI, 7) +#define DECR_TIMER_CANCEL MACHDBG_CODE(DBG_MACH_EXCP_DECI, 8) +#define DECR_TIMER_QUEUE MACHDBG_CODE(DBG_MACH_EXCP_DECI, 9) +#define DECR_TIMER_EXPIRE MACHDBG_CODE(DBG_MACH_EXCP_DECI,10) +#define DECR_TIMER_ASYNC_DEQ MACHDBG_CODE(DBG_MACH_EXCP_DECI,11) +#define DECR_TIMER_UPDATE MACHDBG_CODE(DBG_MACH_EXCP_DECI,12) +#define DECR_TIMER_ESCALATE MACHDBG_CODE(DBG_MACH_EXCP_DECI,13) +#define DECR_TIMER_OVERDUE MACHDBG_CODE(DBG_MACH_EXCP_DECI,14) +#define DECR_TIMER_RESCAN MACHDBG_CODE(DBG_MACH_EXCP_DECI,15) + /* * Invoked by kernel, implemented by platform. */ /* Request an expiration deadline, returns queue association */ -extern mpqueue_head_t *timer_queue_assign( +extern mpqueue_head_t * timer_queue_assign( uint64_t deadline); extern uint64_t timer_call_slop( - uint64_t deadline); + uint64_t deadline, + uint64_t armtime, + uint32_t urgency, + thread_t arming_thread, + boolean_t *rlimited); +extern boolean_t timer_resort_threshold(uint64_t); /* Cancel an associated expiration deadline and specify new deadline */ extern void timer_queue_cancel( @@ -55,6 +80,22 @@ extern void timer_queue_cancel( uint64_t deadline, uint64_t new_deadline); +/* Return a pointer to the local timer queue for a given cpu */ +extern mpqueue_head_t * timer_queue_cpu( + int cpu); + +/* Call a function with argument on a cpu */ +extern void timer_call_cpu( + int cpu, + void (*fn)(void *), + void *arg); + +/* Queue a function to be called with argument on a cpu */ +extern void timer_call_nosync_cpu( + int cpu, + void (*fn)(void *), + void *arg); + /* * Invoked by platform, implemented by kernel. */ @@ -64,6 +105,11 @@ extern uint64_t timer_queue_expire( mpqueue_head_t *queue, uint64_t deadline); +extern uint64_t timer_queue_expire_with_options( + mpqueue_head_t *, + uint64_t, + boolean_t); + /* Shutdown a timer queue and reassign existing activities */ extern void timer_queue_shutdown( mpqueue_head_t *queue); @@ -73,6 +119,32 @@ extern int timer_queue_migrate( mpqueue_head_t *from, mpqueue_head_t *to); +/* + * Invoked by platform, implemented by platfrom. + */ + +extern void timer_intr(int inuser, uint64_t iaddr); + +#if defined(i386) || defined(x86_64) +extern uint64_t setPop(uint64_t time); +#else +extern int setPop(uint64_t time); +#endif + +extern void timer_resync_deadlines(void); + +extern void timer_set_deadline(uint64_t deadline); + +/* Migrate the local timer queue of a given cpu to the master cpu */ +extern uint32_t timer_queue_migrate_cpu(int target_cpu); + +extern void timer_queue_trace( + mpqueue_head_t *queue); +extern void timer_queue_trace_cpu(int cpu); + +extern uint64_t timer_sysctl_get(int oid); +extern int timer_sysctl_set(int oid, uint64_t value); + #endif /* MACH_KERNEL_PRIVATE */ #endif /* _KERN_TIMER_QUEUE_H_ */ diff --git a/osfmk/kern/wait_queue.c b/osfmk/kern/wait_queue.c index 6bcabf8c1..b51ff419f 100644 --- a/osfmk/kern/wait_queue.c +++ b/osfmk/kern/wait_queue.c @@ -82,7 +82,7 @@ static boolean_t wait_queue_member_locked( wait_queue_t wq, wait_queue_set_t wq_set); -static void wait_queues_init(void) __attribute__((section("__TEXT, initcode"))); +static void wait_queues_init(void); #define WAIT_QUEUE_MAX thread_max #define WAIT_QUEUE_SET_MAX task_max * 3 @@ -231,6 +231,7 @@ wait_queue_init( wq->wq_fifo = ((policy & SYNC_POLICY_REVERSED) == 0); wq->wq_type = _WAIT_QUEUE_inited; + wq->wq_eventmask = 0; queue_init(&wq->wq_queue); hw_lock_init(&wq->wq_interlock); return KERN_SUCCESS; @@ -473,6 +474,22 @@ MACRO_END #endif /* !_WAIT_QUEUE_DEBUG_ */ +/* + * Routine: wait_queue_global + * Purpose: + * Indicate if this wait queue is a global wait queue or not. + */ +static boolean_t +wait_queue_global( + wait_queue_t wq) +{ + if ((wq >= wait_queues) && (wq <= (wait_queues + num_wait_queues))) { + return TRUE; + } + return FALSE; +} + + /* * Routine: wait_queue_member_locked * Purpose: @@ -1164,7 +1181,9 @@ wait_queue_assert_wait64_locked( wait_queue_t wq, event64_t event, wait_interrupt_t interruptible, + wait_timeout_urgency_t urgency, uint64_t deadline, + uint64_t leeway, thread_t thread) { wait_result_t wait_result; @@ -1209,14 +1228,16 @@ wait_queue_assert_wait64_locked( thread->wait_queue = wq; if (deadline != 0) { - uint32_t flags; - - flags = realtime ? TIMER_CALL_CRITICAL : 0; - if (!timer_call_enter(&thread->wait_timer, deadline, flags)) + if (!timer_call_enter_with_leeway(&thread->wait_timer, NULL, + deadline, leeway, urgency, FALSE)) thread->wait_timer_active++; thread->wait_timer_is_set = TRUE; } + if (wait_queue_global(wq)) { + wq->wq_eventmask = wq->wq_eventmask | CAST_TO_EVENT_MASK(event); + } + } return(wait_result); } @@ -1249,7 +1270,50 @@ wait_queue_assert_wait( wait_queue_lock(wq); thread_lock(thread); ret = wait_queue_assert_wait64_locked(wq, CAST_DOWN(event64_t,event), - interruptible, deadline, thread); + interruptible, + TIMEOUT_URGENCY_SYS_NORMAL, + deadline, 0, + thread); + thread_unlock(thread); + wait_queue_unlock(wq); + splx(s); + return(ret); +} + +/* + * Routine: wait_queue_assert_wait_with_leeway + * Purpose: + * Insert the current thread into the supplied wait queue + * waiting for a particular event to be posted to that queue. + * Deadline values are specified with urgency and leeway. + * + * Conditions: + * nothing of interest locked. + */ +wait_result_t +wait_queue_assert_wait_with_leeway( + wait_queue_t wq, + event_t event, + wait_interrupt_t interruptible, + wait_timeout_urgency_t urgency, + uint64_t deadline, + uint64_t leeway) +{ + spl_t s; + wait_result_t ret; + thread_t thread = current_thread(); + + /* If it is an invalid wait queue, you can't wait on it */ + if (!wait_queue_is_valid(wq)) + return (thread->wait_result = THREAD_RESTART); + + s = splsched(); + wait_queue_lock(wq); + thread_lock(thread); + ret = wait_queue_assert_wait64_locked(wq, CAST_DOWN(event64_t,event), + interruptible, + urgency, deadline, leeway, + thread); thread_unlock(thread); wait_queue_unlock(wq); splx(s); @@ -1282,7 +1346,48 @@ wait_queue_assert_wait64( s = splsched(); wait_queue_lock(wq); thread_lock(thread); - ret = wait_queue_assert_wait64_locked(wq, event, interruptible, deadline, thread); + ret = wait_queue_assert_wait64_locked(wq, event, interruptible, + TIMEOUT_URGENCY_SYS_NORMAL, + deadline, 0, + thread); + thread_unlock(thread); + wait_queue_unlock(wq); + splx(s); + return(ret); +} + +/* + * Routine: wait_queue_assert_wait64_with_leeway + * Purpose: + * Insert the current thread into the supplied wait queue + * waiting for a particular event to be posted to that queue. + * Deadline values are specified with urgency and leeway. + * Conditions: + * nothing of interest locked. + */ +wait_result_t +wait_queue_assert_wait64_with_leeway( + wait_queue_t wq, + event64_t event, + wait_interrupt_t interruptible, + wait_timeout_urgency_t urgency, + uint64_t deadline, + uint64_t leeway) +{ + spl_t s; + wait_result_t ret; + thread_t thread = current_thread(); + + /* If it is an invalid wait queue, you cant wait on it */ + if (!wait_queue_is_valid(wq)) + return (thread->wait_result = THREAD_RESTART); + + s = splsched(); + wait_queue_lock(wq); + thread_lock(thread); + ret = wait_queue_assert_wait64_locked(wq, event, interruptible, + urgency, deadline, leeway, + thread); thread_unlock(thread); wait_queue_unlock(wq); splx(s); @@ -1310,8 +1415,18 @@ _wait_queue_select64_all( { wait_queue_element_t wq_element; wait_queue_element_t wqe_next; + unsigned long eventmask = 0; + boolean_t is_queue_global = FALSE; queue_t q; + is_queue_global = wait_queue_global(wq); + if (is_queue_global) { + eventmask = CAST_TO_EVENT_MASK(event); + if ((wq->wq_eventmask & eventmask) != eventmask) { + return; + } + eventmask = 0; + } q = &wq->wq_queue; wq_element = (wait_queue_element_t) queue_first(q); @@ -1348,7 +1463,7 @@ _wait_queue_select64_all( * the event we are posting to this queue, pull * it off the queue and stick it in out wake_queue. */ - thread_t t = (thread_t)wq_element; + thread_t t = (thread_t)(void *)wq_element; if (t->wait_event == event) { thread_lock(t); @@ -1358,10 +1473,20 @@ _wait_queue_select64_all( t->wait_event = NO_EVENT64; t->at_safe_point = FALSE; /* returned locked */ + } else { + if (is_queue_global) { + eventmask = eventmask | + CAST_TO_EVENT_MASK(t->wait_event); + } } } wq_element = wqe_next; } + /* Update event mask if global wait queue */ + if (is_queue_global) { + wq->wq_eventmask = eventmask; + } + } /* @@ -1407,7 +1532,7 @@ wait_queue_wakeup64_all_locked( */ res = KERN_NOT_WAITING; while (!queue_empty (q)) { - thread_t thread = (thread_t) dequeue(q); + thread_t thread = (thread_t)(void *) dequeue(q); res = thread_go(thread, result); assert(res == KERN_SUCCESS); thread_unlock(thread); @@ -1508,8 +1633,26 @@ _wait_queue_select64_one( wait_queue_element_t wq_element; wait_queue_element_t wqe_next; thread_t t = THREAD_NULL; + thread_t fifo_thread = THREAD_NULL; + boolean_t is_queue_fifo = TRUE; + boolean_t is_queue_global = FALSE; + boolean_t thread_imp_donor = FALSE; + boolean_t realtime = FALSE; + unsigned long eventmask = 0; queue_t q; + if (wait_queue_global(wq)) { + eventmask = CAST_TO_EVENT_MASK(event); + if ((wq->wq_eventmask & eventmask) != eventmask) { + return THREAD_NULL; + } + eventmask = 0; + is_queue_global = TRUE; +#if IMPORTANCE_INHERITANCE + is_queue_fifo = FALSE; +#endif /* IMPORTANCE_INHERITANCE */ + } + q = &wq->wq_queue; wq_element = (wait_queue_element_t) queue_first(q); @@ -1556,20 +1699,55 @@ _wait_queue_select64_one( * the event we are posting to this queue, pull * it off the queue and stick it in out wake_queue. */ - t = (thread_t)wq_element; + t = (thread_t)(void *)wq_element; if (t->wait_event == event) { - thread_lock(t); - remqueue((queue_entry_t) t); - t->wait_queue = WAIT_QUEUE_NULL; - t->wait_event = NO_EVENT64; - t->at_safe_point = FALSE; - return t; /* still locked */ + if (fifo_thread == THREAD_NULL) { + fifo_thread = t; + } +#if IMPORTANCE_INHERITANCE + /* + * Checking imp donor bit does not need thread lock or + * or task lock since we have the wait queue lock and + * thread can not be removed from it without acquiring + * wait queue lock. The imp donor bit may change + * once we read its value, but it is ok to wake + * a thread while someone drops importance assertion + * on the that thread. + */ + thread_imp_donor = task_is_importance_donor(t->task); +#endif /* IMPORTANCE_INHERITANCE */ + realtime = (t->sched_pri >= BASEPRI_REALTIME); + if (is_queue_fifo || thread_imp_donor || realtime || + (t->options & TH_OPT_VMPRIV)) { + thread_lock(t); + remqueue((queue_entry_t) t); + t->wait_queue = WAIT_QUEUE_NULL; + t->wait_event = NO_EVENT64; + t->at_safe_point = FALSE; + return t; /* still locked */ + } + } + if (is_queue_global) { + eventmask = eventmask | CAST_TO_EVENT_MASK(t->wait_event); } - t = THREAD_NULL; } wq_element = wqe_next; } + + if (is_queue_global) { + wq->wq_eventmask = eventmask; + } +#if IMPORTANCE_INHERITANCE + if (fifo_thread != THREAD_NULL) { + thread_lock(fifo_thread); + remqueue((queue_entry_t) fifo_thread); + fifo_thread->wait_queue = WAIT_QUEUE_NULL; + fifo_thread->wait_event = NO_EVENT64; + fifo_thread->at_safe_point = FALSE; + return fifo_thread; /* still locked */ + } +#endif /* IMPORTANCE_INHERITANCE */ return THREAD_NULL; } diff --git a/osfmk/kern/wait_queue.h b/osfmk/kern/wait_queue.h index fc91a60af..01111d16c 100644 --- a/osfmk/kern/wait_queue.h +++ b/osfmk/kern/wait_queue.h @@ -47,6 +47,19 @@ #include #include /* machine_timeout_suspended() */ + +/* + * The event mask is of 60 bits on 64 bit architeture and 28 bits on + * 32 bit architecture and so we calculate its size using sizeof(long). + * If the bitfield for wq_type and wq_fifo is changed, then value of + * EVENT_MASK_BITS will also change. + */ +#define EVENT_MASK_BITS ((sizeof(long) * 8) - 4) + +/* + * Zero out the 4 msb of the event. + */ +#define CAST_TO_EVENT_MASK(event) (((CAST_DOWN(unsigned long, event)) << 4) >> 4) /* * wait_queue_t * This is the definition of the common event wait queue @@ -63,11 +76,11 @@ * them. */ typedef struct wait_queue { - unsigned int /* flags */ - /* boolean_t */ wq_type:16, /* only public field */ + unsigned long int /* flags */ + /* boolean_t */ wq_type:2, /* only public field */ wq_fifo:1, /* fifo wakeup policy? */ wq_prepost:1, /* waitq supports prepost? set only */ - :0; /* force to long boundary */ + wq_eventmask:EVENT_MASK_BITS; hw_lock_data_t wq_interlock; /* interlock */ queue_head_t wq_queue; /* queue of elements */ } WaitQueue; @@ -136,8 +149,8 @@ typedef struct _wait_queue_link { #define wql_type wql_element.wqe_type #define wql_queue wql_element.wqe_queue -#define _WAIT_QUEUE_inited 0xf1d0 -#define _WAIT_QUEUE_SET_inited 0xf1d1 +#define _WAIT_QUEUE_inited 0x2 +#define _WAIT_QUEUE_SET_inited 0x3 #define wait_queue_is_queue(wq) \ ((wq)->wq_type == _WAIT_QUEUE_inited) @@ -184,6 +197,7 @@ static inline void wait_queue_lock(wait_queue_t wq) { if (wql_acquired == FALSE) panic("wait queue deadlock - wq=%p, cpu=%d\n", wq, cpu_number()); } + assert(wait_queue_held(wq)); } static inline void wait_queue_unlock(wait_queue_t wq) { @@ -212,7 +226,9 @@ __private_extern__ wait_result_t wait_queue_assert_wait64_locked( wait_queue_t wait_queue, event64_t wait_event, wait_interrupt_t interruptible, + wait_timeout_urgency_t urgency, uint64_t deadline, + uint64_t leeway, thread_t thread); /* pull a thread from its wait queue */ @@ -250,8 +266,8 @@ __private_extern__ kern_return_t wait_queue_wakeup64_thread_locked( wait_result_t result, boolean_t unlock); -__private_extern__ uint32_t num_wait_queues; -__private_extern__ struct wait_queue *wait_queues; +extern uint32_t num_wait_queues; +extern struct wait_queue *wait_queues; /* The Jenkins "one at a time" hash. * TBD: There may be some value to unrolling here, * depending on the architecture. @@ -375,6 +391,14 @@ extern wait_result_t wait_queue_assert_wait64( wait_interrupt_t interruptible, uint64_t deadline); +extern wait_result_t wait_queue_assert_wait64_with_leeway( + wait_queue_t wait_queue, + event64_t wait_event, + wait_interrupt_t interruptible, + wait_timeout_urgency_t urgency, + uint64_t deadline, + uint64_t leeway); + /* wakeup the most appropriate thread waiting on pair */ extern kern_return_t wait_queue_wakeup64_one( wait_queue_t wait_queue, @@ -406,6 +430,15 @@ extern wait_result_t wait_queue_assert_wait( wait_interrupt_t interruptible, uint64_t deadline); +/* assert intent to wait on pair */ +extern wait_result_t wait_queue_assert_wait_with_leeway( + wait_queue_t wait_queue, + event_t wait_event, + wait_interrupt_t interruptible, + wait_timeout_urgency_t urgency, + uint64_t deadline, + uint64_t leeway); + /* wakeup the most appropriate thread waiting on pair */ extern kern_return_t wait_queue_wakeup_one( wait_queue_t wait_queue, diff --git a/osfmk/kern/zalloc.c b/osfmk/kern/zalloc.c index dc9ea000c..d03746a4e 100644 --- a/osfmk/kern/zalloc.c +++ b/osfmk/kern/zalloc.c @@ -85,6 +85,7 @@ #include #include #include +#include #include #include @@ -94,139 +95,699 @@ #include #include +#include /* ml_cpu_get_info */ #include #include #include -/* +/* + * ZONE_ALIAS_ADDR + * + * With this option enabled, zones with alloc_size <= PAGE_SIZE allocate + * a virtual page from the zone_map, but before zcram-ing the allocated memory + * into the zone, the page is translated to use the alias address of the page + * in the static kernel region. zone_gc reverses that translation when + * scanning the freelist to collect free pages so that it can look up the page + * in the zone_page_table, and free it to kmem_free. + * + * The static kernel region is a flat 1:1 mapping of physical memory passed + * to xnu by the booter. It is mapped to the range: + * [gVirtBase, gVirtBase + gPhysSize] + * + * Accessing memory via the static kernel region is faster due to the + * entire region being mapped via large pages, cutting down + * on TLB misses. + * + * zinit favors using PAGE_SIZE backing allocations for a zone unless it would + * waste more than 10% space to use a single page, in order to take advantage + * of the speed benefit for as many zones as possible. + * + * Zones with > PAGE_SIZE allocations can't take advantage of this + * because kernel_memory_allocate doesn't give out physically contiguous pages. + * + * zone_virtual_addr() + * - translates an address from the static kernel region to the zone_map + * - returns the same address if it's not from the static kernel region + * It relies on the fact that a physical page mapped to the + * zone_map is not mapped anywhere else (except the static kernel region). + * + * zone_alias_addr() + * - translates a virtual memory address from the zone_map to the + * corresponding address in the static kernel region + * + */ + +#if !ZONE_ALIAS_ADDR +#define from_zone_map(addr, size) \ + ((vm_offset_t)(addr) >= zone_map_min_address && \ + ((vm_offset_t)(addr) + size - 1) < zone_map_max_address ) +#else +#define from_zone_map(addr, size) \ + ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)(uintptr_t)addr)) >= zone_map_min_address && \ + ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)(uintptr_t)addr)) + size -1) < zone_map_max_address ) +#endif + +/* * Zone Corruption Debugging * - * We perform three methods to detect use of a zone element after it's been freed. These - * checks are enabled for every N'th element (counted per-zone) by specifying - * "zp-factor=N" as a boot-arg. To turn this feature off, set "zp-factor=0" or "-no-zp". + * We use three techniques to detect modification of a zone element + * after it's been freed. * - * (1) Range-check the free-list "next" pointer for sanity. - * (2) Store the pointer in two different words, one at the beginning of the freed element - * and one at the end, and compare them against each other when re-using the element, - * to detect modifications. - * (3) Poison the freed memory by overwriting it with 0xdeadbeef, and check it when the - * memory is being reused to make sure it is still poisoned. + * (1) Check the freelist next pointer for sanity. + * (2) Store a backup of the next pointer at the end of the element, + * and compare it to the primary next pointer when the element is allocated + * to detect corruption of the freelist due to use-after-free bugs. + * The backup pointer is also XORed with a per-boot random cookie. + * (3) Poison the freed element by overwriting it with 0xdeadbeef, + * and check for that value when the element is being reused to make sure + * no part of the element has been modified while it was on the freelist. + * This will also help catch read-after-frees, as code will now dereference + * 0xdeadbeef instead of a valid but freed pointer. * - * As a result, each element (that is large enough to hold this data inside) must be marked - * as either "ZP_POISONED" or "ZP_NOT_POISONED" in the first integer within the would-be - * poisoned segment after the first free-list pointer. + * (1) and (2) occur for every allocation and free to a zone. + * This is done to make it slightly more difficult for an attacker to + * manipulate the freelist to behave in a specific way. * - * Performance slowdown is inversely proportional to the frequency with which you check - * (as would be expected), with a 4-5% hit around N=1, down to ~0.3% at N=16 and just - * "noise" at N=32 and higher. You can expect to find a 100% reproducible - * bug in an average of N tries, with a standard deviation of about N, but you will probably - * want to set "zp-factor=1" or "-zp" if you are attempting to reproduce a known bug. + * Poisoning (3) occurs periodically for every N frees (counted per-zone) + * and on every free for zones smaller than a cacheline. If -zp + * is passed as a boot arg, poisoning occurs for every free. * + * Performance slowdown is inversely proportional to the frequency of poisoning, + * with a 4-5% hit around N=1, down to ~0.3% at N=16 and just "noise" at N=32 + * and higher. You can expect to find a 100% reproducible bug in an average of + * N tries, with a standard deviation of about N, but you will want to set + * "-zp" to always poison every free if you are attempting to reproduce + * a known bug. * - * Zone corruption logging + * For a more heavyweight, but finer-grained method of detecting misuse + * of zone memory, look up the "Guard mode" zone allocator in gzalloc.c. + * + * Zone Corruption Logging + * + * You can also track where corruptions come from by using the boot-arguments + * "zlog= -zc". Search for "Zone corruption logging" later + * in this document for more implementation and usage information. + * + * Zone Leak Detection + * + * To debug leaks of zone memory, use the zone leak detection tool 'zleaks' + * found later in this file via the showtopztrace and showz* macros in kgmacros, + * or use zlog without the -zc argument. * - * You can also track where corruptions come from by using the boot-arguments: - * "zlog= -zc". Search for "Zone corruption logging" later in this - * document for more implementation and usage information. */ -#define ZP_POISON 0xdeadbeef -#define ZP_POISONED 0xfeedface -#define ZP_NOT_POISONED 0xbaddecaf -#if CONFIG_EMBEDDED - #define ZP_DEFAULT_SAMPLING_FACTOR 0 -#else /* CONFIG_EMBEDDED */ - #define ZP_DEFAULT_SAMPLING_FACTOR 16 -#endif /* CONFIG_EMBEDDED */ +#if defined(__LP64__) +#define ZP_POISON 0xdeadbeefdeadbeef +#else +#define ZP_POISON 0xdeadbeef +#endif -uint32_t free_check_sample_factor = 0; /* set by zp-factor=N boot arg */ -boolean_t corruption_debug_flag = FALSE; /* enabled by "-zc" boot-arg */ +#define ZP_DEFAULT_SAMPLING_FACTOR 16 -/* - * Zone checking helper macro. +/* + * A zp_factor of 0 indicates zone poisoning is disabled, + * however, we still poison zones smaller than zp_tiny_zone_limit (a cacheline). + * Passing the -no-zp boot-arg disables even this behavior. + * In all cases, we record and check the integrity of a backup pointer. */ -#define is_kernel_data_addr(a) (!(a) || ((a) >= vm_min_kernel_address && !((a) & 0x3))) + +/* set by zp-factor=N boot arg, zero indicates non-tiny poisoning disabled */ +uint32_t zp_factor = 0; + +/* set in zp_init, zero indicates -no-zp boot-arg */ +vm_size_t zp_tiny_zone_limit = 0; + +/* initialized to a per-boot random value in zp_init */ +uintptr_t zp_poisoned_cookie = 0; +uintptr_t zp_nopoison_cookie = 0; + /* - * Frees the specified element, which is within the specified zone. If this - * element should be poisoned and its free list checker should be set, both are - * done here. These checks will only be enabled if the element size is at least - * large enough to hold two vm_offset_t's and one uint32_t (to enable both types - * of checks). + * initialize zone poisoning + * called from zone_bootstrap before any allocations are made from zalloc */ static inline void -free_to_zone(zone_t zone, void *elem) { - /* get the index of the first uint32_t beyond the 'next' pointer */ - unsigned int i = sizeof(vm_offset_t) / sizeof(uint32_t); +zp_init(void) +{ + char temp_buf[16]; + + /* + * Initialize backup pointer random cookie for poisoned elements + * Try not to call early_random() back to back, it may return + * the same value if mach_absolute_time doesn't have sufficient time + * to tick over between calls. + * (This is only a problem on embedded devices) + */ + zp_poisoned_cookie = (uintptr_t) early_random(); + + /* + * Always poison zones smaller than a cacheline, + * because it's pretty close to free + */ + ml_cpu_info_t cpu_info; + ml_cpu_get_info(&cpu_info); + zp_tiny_zone_limit = (vm_size_t) cpu_info.cache_line_size; + + zp_factor = ZP_DEFAULT_SAMPLING_FACTOR; + + //TODO: Bigger permutation? + /* + * Permute the default factor +/- 1 to make it less predictable + * This adds or subtracts ~4 poisoned objects per 1000 frees. + */ + if (zp_factor != 0) { + uint32_t rand_bits = early_random() & 0x3; + + if (rand_bits == 0x1) + zp_factor += 1; + else if (rand_bits == 0x2) + zp_factor -= 1; + /* if 0x0 or 0x3, leave it alone */ + } + + /* -zp: enable poisoning for every alloc and free */ + if (PE_parse_boot_argn("-zp", temp_buf, sizeof(temp_buf))) { + zp_factor = 1; + } + + /* -no-zp: disable poisoning completely even for tiny zones */ + if (PE_parse_boot_argn("-no-zp", temp_buf, sizeof(temp_buf))) { + zp_factor = 0; + zp_tiny_zone_limit = 0; + printf("Zone poisoning disabled\n"); + } + + /* zp-factor=XXXX: override how often to poison freed zone elements */ + if (PE_parse_boot_argn("zp-factor", &zp_factor, sizeof(zp_factor))) { + printf("Zone poisoning factor override: %u\n", zp_factor); + } + + /* Initialize backup pointer random cookie for unpoisoned elements */ + zp_nopoison_cookie = (uintptr_t) early_random(); + +#if MACH_ASSERT + if (zp_poisoned_cookie == zp_nopoison_cookie) + panic("early_random() is broken: %p and %p are not random\n", + (void *) zp_poisoned_cookie, (void *) zp_nopoison_cookie); +#endif + + /* + * Use the last bit in the backup pointer to hint poisoning state + * to backup_ptr_mismatch_panic. Valid zone pointers are aligned, so + * the low bits are zero. + */ + zp_poisoned_cookie |= (uintptr_t)0x1ULL; + zp_nopoison_cookie &= ~((uintptr_t)0x1ULL); + +#if defined(__LP64__) + /* + * Make backup pointers more obvious in GDB for 64 bit + * by making OxFFFFFF... ^ cookie = 0xFACADE... + * (0xFACADE = 0xFFFFFF ^ 0x053521) + * (0xC0FFEE = 0xFFFFFF ^ 0x3f0011) + * The high 3 bytes of a zone pointer are always 0xFFFFFF, and are checked + * by the sanity check, so it's OK for that part of the cookie to be predictable. + * + * TODO: Use #defines, xors, and shifts + */ + + zp_poisoned_cookie &= 0x000000FFFFFFFFFF; + zp_poisoned_cookie |= 0x0535210000000000; /* 0xFACADE */ + + zp_nopoison_cookie &= 0x000000FFFFFFFFFF; + zp_nopoison_cookie |= 0x3f00110000000000; /* 0xC0FFEE */ +#endif +} + +/* zone_map page count for page table structure */ +uint64_t zone_map_table_page_count = 0; + +/* + * These macros are used to keep track of the number + * of pages being used by the zone currently. The + * z->page_count is protected by the zone lock. + */ +#define ZONE_PAGE_COUNT_INCR(z, count) \ +{ \ + OSAddAtomic64(count, &(z->page_count)); \ +} + +#define ZONE_PAGE_COUNT_DECR(z, count) \ +{ \ + OSAddAtomic64(-count, &(z->page_count)); \ +} + +/* for is_sane_zone_element and garbage collection */ + +vm_offset_t zone_map_min_address = 0; /* initialized in zone_init */ +vm_offset_t zone_map_max_address = 0; + +/* Helpful for walking through a zone's free element list. */ +struct zone_free_element { + struct zone_free_element *next; + /* ... */ + /* void *backup_ptr; */ +}; + +struct zone_page_metadata { + queue_chain_t pages; + struct zone_free_element *elements; + zone_t zone; + uint16_t alloc_count; + uint16_t free_count; +}; + +/* The backup pointer is stored in the last pointer-sized location in an element. */ +static inline vm_offset_t * +get_backup_ptr(vm_size_t elem_size, + vm_offset_t *element) +{ + return (vm_offset_t *) ((vm_offset_t)element + elem_size - sizeof(vm_offset_t)); +} + +static inline struct zone_page_metadata * +get_zone_page_metadata(struct zone_free_element *element) +{ + return (struct zone_page_metadata *)(trunc_page((vm_offset_t)element) + PAGE_SIZE - sizeof(struct zone_page_metadata)); +} + +/* + * Zone checking helper function. + * A pointer that satisfies these conditions is OK to be a freelist next pointer + * A pointer that doesn't satisfy these conditions indicates corruption + */ +static inline boolean_t +is_sane_zone_ptr(zone_t zone, + vm_offset_t addr, + size_t obj_size) +{ + /* Must be aligned to pointer boundary */ + if (__improbable((addr & (sizeof(vm_offset_t) - 1)) != 0)) + return FALSE; + + /* Must be a kernel address */ + if (__improbable(!pmap_kernel_va(addr))) + return FALSE; + + /* Must be from zone map if the zone only uses memory from the zone_map */ + /* + * TODO: Remove the zone->collectable check when every + * zone using foreign memory is properly tagged with allows_foreign + */ + if (zone->collectable && !zone->allows_foreign) { +#if ZONE_ALIAS_ADDR + /* + * If this address is in the static kernel region, it might be + * the alias address of a valid zone element. + * If we tried to find the zone_virtual_addr() of an invalid + * address in the static kernel region, it will panic, so don't + * check addresses in this region. + * + * TODO: Use a safe variant of zone_virtual_addr to + * make this check more accurate + * + * The static kernel region is mapped at: + * [gVirtBase, gVirtBase + gPhysSize] + */ + if ((addr - gVirtBase) < gPhysSize) + return TRUE; +#endif + /* check if addr is from zone map */ + if (addr >= zone_map_min_address && + (addr + obj_size - 1) < zone_map_max_address ) + return TRUE; + + return FALSE; + } + + return TRUE; +} + +static inline boolean_t +is_sane_zone_page_metadata(zone_t zone, + vm_offset_t page_meta) +{ + /* NULL page metadata structures are invalid */ + if (page_meta == 0) + return FALSE; + return is_sane_zone_ptr(zone, page_meta, sizeof(struct zone_page_metadata)); +} + +static inline boolean_t +is_sane_zone_element(zone_t zone, + vm_offset_t addr) +{ + /* NULL is OK because it indicates the tail of the list */ + if (addr == 0) + return TRUE; + return is_sane_zone_ptr(zone, addr, zone->elem_size); +} - /* should we run checks on this piece of memory? */ - if (free_check_sample_factor != 0 && - zone->free_check_count++ % free_check_sample_factor == 0 && - zone->elem_size >= (2 * sizeof(vm_offset_t) + sizeof(uint32_t))) { - zone->free_check_count = 1; - ((uint32_t *) elem)[i] = ZP_POISONED; - for (i++; i < zone->elem_size / sizeof(uint32_t); i++) { - ((uint32_t *) elem)[i] = ZP_POISON; - } - ((vm_offset_t *) elem)[((zone->elem_size)/sizeof(vm_offset_t))-1] = zone->free_elements; +/* Someone wrote to freed memory. */ +static inline void /* noreturn */ +zone_element_was_modified_panic(zone_t zone, + vm_offset_t found, + vm_offset_t expected, + vm_offset_t offset) +{ + panic("a freed zone element has been modified: expected %p but found %p, bits changed %p, at offset %d of %d in zone: %s", + (void *) expected, + (void *) found, + (void *) (expected ^ found), + (uint32_t) offset, + (uint32_t) zone->elem_size, + zone->zone_name); +} + +/* + * The primary and backup pointers don't match. + * Determine which one was likely the corrupted pointer, find out what it + * probably should have been, and panic. + * I would like to mark this as noreturn, but panic() isn't marked noreturn. + */ +static void /* noreturn */ +backup_ptr_mismatch_panic(zone_t zone, + vm_offset_t primary, + vm_offset_t backup) +{ + vm_offset_t likely_backup; + + boolean_t sane_backup; + boolean_t sane_primary = is_sane_zone_element(zone, primary); + boolean_t element_was_poisoned = (backup & 0x1) ? TRUE : FALSE; + + if (element_was_poisoned) { + likely_backup = backup ^ zp_poisoned_cookie; + sane_backup = is_sane_zone_element(zone, likely_backup); } else { - ((uint32_t *) elem)[i] = ZP_NOT_POISONED; + likely_backup = backup ^ zp_nopoison_cookie; + sane_backup = is_sane_zone_element(zone, likely_backup); } - - /* maintain free list and decrement number of active objects in zone */ - ((vm_offset_t *) elem)[0] = zone->free_elements; - zone->free_elements = (vm_offset_t) elem; - zone->count--; + + /* The primary is definitely the corrupted one */ + if (!sane_primary && sane_backup) + zone_element_was_modified_panic(zone, primary, likely_backup, 0); + + /* The backup is definitely the corrupted one */ + if (sane_primary && !sane_backup) + zone_element_was_modified_panic(zone, backup, primary, + zone->elem_size - sizeof(vm_offset_t)); + + /* + * Not sure which is the corrupted one. + * It's less likely that the backup pointer was overwritten with + * ( (sane address) ^ (valid cookie) ), so we'll guess that the + * primary pointer has been overwritten with a sane but incorrect address. + */ + if (sane_primary && sane_backup) + zone_element_was_modified_panic(zone, primary, likely_backup, 0); + + /* Neither are sane, so just guess. */ + zone_element_was_modified_panic(zone, primary, likely_backup, 0); } + /* - * Allocates an element from the specifed zone, storing its address in the - * return arg. This function will look for corruptions revealed through zone - * poisoning and free list checks. + * Sets the next element of tail to elem. + * elem can be NULL. + * Preserves the poisoning state of the element. */ static inline void -alloc_from_zone(zone_t zone, void **ret) { - void *elem = (void *) zone->free_elements; - if (elem != NULL) { - /* get the index of the first uint32_t beyond the 'next' pointer */ - unsigned int i = sizeof(vm_offset_t) / sizeof(uint32_t); - - /* first int in data section must be ZP_POISONED or ZP_NOT_POISONED */ - if (((uint32_t *) elem)[i] == ZP_POISONED && - zone->elem_size >= (2 * sizeof(vm_offset_t) + sizeof(uint32_t))) { - /* check the free list pointers */ - if (!is_kernel_data_addr(((vm_offset_t *) elem)[0]) || - ((vm_offset_t *) elem)[0] != - ((vm_offset_t *) elem)[(zone->elem_size/sizeof(vm_offset_t))-1]) { - panic("a freed zone element has been modified in zone: %s", - zone->zone_name); +append_zone_element(zone_t zone, + struct zone_free_element *tail, + struct zone_free_element *elem) +{ + vm_offset_t *backup = get_backup_ptr(zone->elem_size, (vm_offset_t *) tail); + + vm_offset_t old_backup = *backup; + + vm_offset_t old_next = (vm_offset_t) tail->next; + vm_offset_t new_next = (vm_offset_t) elem; + + if (old_next == (old_backup ^ zp_nopoison_cookie)) + *backup = new_next ^ zp_nopoison_cookie; + else if (old_next == (old_backup ^ zp_poisoned_cookie)) + *backup = new_next ^ zp_poisoned_cookie; + else + backup_ptr_mismatch_panic(zone, + old_next, + old_backup); + + tail->next = elem; +} + + +/* + * Insert a linked list of elements (delineated by head and tail) at the head of + * the zone free list. Every element in the list being added has already gone + * through append_zone_element, so their backup pointers are already + * set properly. + * Precondition: There should be no elements after tail + */ +static inline void +add_list_to_zone(zone_t zone, + struct zone_free_element *head, + struct zone_free_element *tail) +{ + assert(tail->next == NULL); + assert(!zone->use_page_list); + + append_zone_element(zone, tail, zone->free_elements); + + zone->free_elements = head; +} + + +/* + * Adds the element to the head of the zone's free list + * Keeps a backup next-pointer at the end of the element + * Poisons the element with ZP_POISON every zp_factor frees + */ +static inline void +free_to_zone(zone_t zone, + vm_offset_t element) +{ + vm_offset_t old_head; + struct zone_page_metadata *page_meta; + + vm_offset_t *primary = (vm_offset_t *) element; + vm_offset_t *backup = get_backup_ptr(zone->elem_size, primary); + + if (zone->use_page_list) { + page_meta = get_zone_page_metadata((struct zone_free_element *)element); + assert(page_meta->zone == zone); + old_head = (vm_offset_t)page_meta->elements; + } else { + old_head = (vm_offset_t)zone->free_elements; + } + +#if MACH_ASSERT + if (__improbable(!is_sane_zone_element(zone, old_head))) + panic("zfree: invalid head pointer %p for freelist of zone %s\n", + (void *) old_head, zone->zone_name); +#endif + + if (__improbable(!is_sane_zone_element(zone, element))) + panic("zfree: freeing invalid pointer %p to zone %s\n", + (void *) element, zone->zone_name); + + boolean_t poison = FALSE; + + /* Always poison tiny zones' elements (limit is 0 if -no-zp is set) */ + if (zone->elem_size <= zp_tiny_zone_limit) + poison = TRUE; + else if (zp_factor != 0 && ++zone->zp_count >= zp_factor) { + /* Poison zone elements periodically */ + zone->zp_count = 0; + poison = TRUE; + } + + if (poison) { + /* memset_pattern{4|8} could help make this faster: */ + vm_offset_t *element_cursor = primary + 1; + + for ( ; element_cursor < backup; element_cursor++) + *element_cursor = ZP_POISON; + } + + /* + * Always write a redundant next pointer + * So that it is more difficult to forge, xor it with a random cookie + * A poisoned element is indicated by using zp_poisoned_cookie + * instead of zp_nopoison_cookie + */ + + *backup = old_head ^ (poison ? zp_poisoned_cookie : zp_nopoison_cookie); + + /* Insert this element at the head of the free list */ + *primary = old_head; + if (zone->use_page_list) { + page_meta->elements = (struct zone_free_element *)element; + page_meta->free_count++; + if (zone->allows_foreign && !from_zone_map(element, zone->elem_size)) { + if (page_meta->free_count == 1) { + /* first foreign element freed on page, move from all_used */ + remqueue((queue_entry_t)page_meta); + enqueue_tail(&zone->pages.any_free_foreign, (queue_entry_t)page_meta); + } else { + /* no other list transitions */ } + } else if (page_meta->free_count == page_meta->alloc_count) { + /* whether the page was on the intermediate or all_used, queue, move it to free */ + remqueue((queue_entry_t)page_meta); + enqueue_tail(&zone->pages.all_free, (queue_entry_t)page_meta); + } else if (page_meta->free_count == 1) { + /* first free element on page, move from all_used */ + remqueue((queue_entry_t)page_meta); + enqueue_tail(&zone->pages.intermediate, (queue_entry_t)page_meta); + } + } else { + zone->free_elements = (struct zone_free_element *)element; + } + zone->count--; + zone->countfree++; +} + + +/* + * Removes an element from the zone's free list, returning 0 if the free list is empty. + * Verifies that the next-pointer and backup next-pointer are intact, + * and verifies that a poisoned element hasn't been modified. + */ +static inline vm_offset_t +try_alloc_from_zone(zone_t zone) +{ + vm_offset_t element; + struct zone_page_metadata *page_meta; + + /* if zone is empty, bail */ + if (zone->use_page_list) { + if (zone->allows_foreign && !queue_empty(&zone->pages.any_free_foreign)) + page_meta = (struct zone_page_metadata *)queue_first(&zone->pages.any_free_foreign); + else if (!queue_empty(&zone->pages.intermediate)) + page_meta = (struct zone_page_metadata *)queue_first(&zone->pages.intermediate); + else if (!queue_empty(&zone->pages.all_free)) + page_meta = (struct zone_page_metadata *)queue_first(&zone->pages.all_free); + else { + return 0; + } + + /* Check if page_meta passes is_sane_zone_element */ + if (__improbable(!is_sane_zone_page_metadata(zone, (vm_offset_t)page_meta))) + panic("zalloc: invalid metadata structure %p for freelist of zone %s\n", + (void *) page_meta, zone->zone_name); + assert(page_meta->zone == zone); + element = (vm_offset_t)page_meta->elements; + } else { + if (zone->free_elements == NULL) + return 0; + + element = (vm_offset_t)zone->free_elements; + } + +#if MACH_ASSERT + if (__improbable(!is_sane_zone_element(zone, element))) + panic("zfree: invalid head pointer %p for freelist of zone %s\n", + (void *) element, zone->zone_name); +#endif + + vm_offset_t *primary = (vm_offset_t *) element; + vm_offset_t *backup = get_backup_ptr(zone->elem_size, primary); + + vm_offset_t next_element = *primary; + vm_offset_t next_element_backup = *backup; + + /* + * backup_ptr_mismatch_panic will determine what next_element + * should have been, and print it appropriately + */ + if (__improbable(!is_sane_zone_element(zone, next_element))) + backup_ptr_mismatch_panic(zone, next_element, next_element_backup); + + /* Check the backup pointer for the regular cookie */ + if (__improbable(next_element != (next_element_backup ^ zp_nopoison_cookie))) { + + /* Check for the poisoned cookie instead */ + if (__improbable(next_element != (next_element_backup ^ zp_poisoned_cookie))) + /* Neither cookie is valid, corruption has occurred */ + backup_ptr_mismatch_panic(zone, next_element, next_element_backup); + + /* + * Element was marked as poisoned, so check its integrity, + * skipping the primary and backup pointers at the beginning and end. + */ + vm_offset_t *element_cursor = primary + 1; + + for ( ; element_cursor < backup ; element_cursor++) + if (__improbable(*element_cursor != ZP_POISON)) + zone_element_was_modified_panic(zone, + *element_cursor, + ZP_POISON, + ((vm_offset_t)element_cursor) - element); + } + + if (zone->use_page_list) { - /* check for poisoning in free space */ - for (i++; - i < zone->elem_size / sizeof(uint32_t) - - sizeof(vm_offset_t) / sizeof(uint32_t); - i++) { - if (((uint32_t *) elem)[i] != ZP_POISON) { - panic("a freed zone element has been modified in zone: %s", - zone->zone_name); - } + /* Make sure the page_meta is at the correct offset from the start of page */ + if (__improbable(page_meta != get_zone_page_metadata((struct zone_free_element *)element))) + panic("zalloc: metadata located at incorrect location on page of zone %s\n", + zone->zone_name); + + /* Make sure next_element belongs to the same page as page_meta */ + if (next_element) { + if (__improbable(page_meta != get_zone_page_metadata((struct zone_free_element *)next_element))) + panic("zalloc: next element pointer %p for element %p points to invalid element for zone %s\n", + (void *)next_element, (void *)element, zone->zone_name); + } + } + + /* + * Clear out the old next pointer and backup to avoid leaking the cookie + * and so that only values on the freelist have a valid cookie + */ + *primary = ZP_POISON; + *backup = ZP_POISON; + + /* Remove this element from the free list */ + if (zone->use_page_list) { + + page_meta->elements = (struct zone_free_element *)next_element; + page_meta->free_count--; + + if (zone->allows_foreign && !from_zone_map(element, zone->elem_size)) { + if (page_meta->free_count == 0) { + /* move to all used */ + remqueue((queue_entry_t)page_meta); + enqueue_tail(&zone->pages.all_used, (queue_entry_t)page_meta); + } else { + /* no other list transitions */ } - } else if (((uint32_t *) elem)[i] != ZP_NOT_POISONED) { - panic("a freed zone element has been modified in zone: %s", - zone->zone_name); + } else if (page_meta->free_count == 0) { + /* remove from intermediate or free, move to all_used */ + remqueue((queue_entry_t)page_meta); + enqueue_tail(&zone->pages.all_used, (queue_entry_t)page_meta); + } else if (page_meta->alloc_count == page_meta->free_count + 1) { + /* remove from free, move to intermediate */ + remqueue((queue_entry_t)page_meta); + enqueue_tail(&zone->pages.intermediate, (queue_entry_t)page_meta); } - - zone->count++; - zone->sum_count++; - zone->free_elements = ((vm_offset_t *) elem)[0]; + } else { + zone->free_elements = (struct zone_free_element *)next_element; } - *ret = elem; + zone->countfree--; + zone->count++; + zone->sum_count++; + + return element; } +/* + * End of zone poisoning + */ + /* * Fake zones for things that want to report via zprint but are not actually zones. */ @@ -323,11 +884,28 @@ void zone_page_keep( vm_offset_t addr, vm_size_t size); +void zone_display_zprint(void); + +zone_t zone_find_largest(void); + +/* + * Async allocation of zones + * This mechanism allows for bootstrapping an empty zone which is setup with + * non-blocking flags. The first call to zalloc_noblock() will kick off a thread_call + * to zalloc_async. We perform a zalloc() (which may block) and then an immediate free. + * This will prime the zone for the next use. + * + * Currently the thread_callout function (zalloc_async) will loop through all zones + * looking for any zone with async_pending set and do the work for it. + * + * NOTE: If the calling thread for zalloc_noblock is lower priority than thread_call, + * then zalloc_noblock to an empty zone may succeed. + */ void zalloc_async( thread_call_param_t p0, thread_call_param_t p1); -void zone_display_zprint( void ); +static thread_call_data_t call_async_alloc; vm_map_t zone_map = VM_MAP_NULL; @@ -348,16 +926,20 @@ vm_size_t zdata_size; #define zone_sleep(zone) \ (void) lck_mtx_sleep(&(zone)->lock, LCK_SLEEP_SPIN, (event_t)(zone), THREAD_UNINT); +/* + * The zone_locks_grp allows for collecting lock statistics. + * All locks are associated to this group in zinit. + * Look at tools/lockstat for debugging lock contention. + */ + +lck_grp_t zone_locks_grp; +lck_grp_attr_t zone_locks_grp_attr; #define lock_zone_init(zone) \ MACRO_BEGIN \ - char _name[32]; \ - (void) snprintf(_name, sizeof (_name), "zone.%s", (zone)->zone_name); \ - lck_grp_attr_setdefault(&(zone)->lock_grp_attr); \ - lck_grp_init(&(zone)->lock_grp, _name, &(zone)->lock_grp_attr); \ lck_attr_setdefault(&(zone)->lock_attr); \ lck_mtx_init_ext(&(zone)->lock, &(zone)->lock_ext, \ - &(zone)->lock_grp, &(zone)->lock_attr); \ + &zone_locks_grp, &(zone)->lock_attr); \ MACRO_END #define lock_try_zone(zone) lck_mtx_try_lock_spin(&zone->lock) @@ -368,8 +950,6 @@ MACRO_END #define ZONE_PAGE_TABLE_FIRST_LEVEL_SIZE (32) struct zone_page_table_entry * volatile zone_page_table[ZONE_PAGE_TABLE_FIRST_LEVEL_SIZE]; vm_size_t zone_page_table_used_size; -vm_offset_t zone_map_min_address; -vm_offset_t zone_map_max_address; unsigned int zone_pages; unsigned int zone_page_table_second_level_size; /* power of 2 */ unsigned int zone_page_table_second_level_shift_amount; @@ -383,22 +963,12 @@ struct zone_page_table_entry *zone_page_table_lookup(zone_page_index_t pindex); /* * Exclude more than one concurrent garbage collection */ -decl_lck_mtx_data(, zone_gc_lock) +decl_lck_mtx_data(, zone_gc_lock) -lck_attr_t zone_lck_attr; -lck_grp_t zone_lck_grp; -lck_grp_attr_t zone_lck_grp_attr; -lck_mtx_ext_t zone_lck_ext; - -#if !ZONE_ALIAS_ADDR -#define from_zone_map(addr, size) \ - ((vm_offset_t)(addr) >= zone_map_min_address && \ - ((vm_offset_t)(addr) + size -1) < zone_map_max_address) -#else -#define from_zone_map(addr, size) \ - ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)(uintptr_t)addr)) >= zone_map_min_address && \ - ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)(uintptr_t)addr)) + size -1) < zone_map_max_address) -#endif +lck_attr_t zone_gc_lck_attr; +lck_grp_t zone_gc_lck_grp; +lck_grp_attr_t zone_gc_lck_grp_attr; +lck_mtx_ext_t zone_gc_lck_ext; /* * Protects first_zone, last_zone, num_zones, @@ -414,6 +984,10 @@ boolean_t zone_gc_forced = FALSE; boolean_t panic_include_zprint = FALSE; boolean_t zone_gc_allowed_by_time_throttle = TRUE; +#define ZALLOC_DEBUG_ZONEGC 0x00000001 +#define ZALLOC_DEBUG_ZCRAM 0x00000002 +uint32_t zalloc_debug = 0; + /* * Zone leak debugging code * @@ -450,6 +1024,9 @@ static int log_records; /* size of the log, expressed in number of records */ static char zone_name_to_log[MAX_ZONE_NAME] = ""; /* the zone name we're logging, if any */ +/* Log allocations and frees to help debug a zone element corruption */ +boolean_t corruption_debug_flag = FALSE; /* enabled by "-zc" boot-arg */ + /* * The number of records in the log is configurable via the zrecs parameter in boot-args. Set this to * the number of records you want in the log. For example, "zrecs=1000" sets it to 1000 records. Note @@ -467,22 +1044,16 @@ static char zone_name_to_log[MAX_ZONE_NAME] = ""; /* the zone name we're logging #define ZRECORDS_DEFAULT 4000 /* default records in log if zrecs is not specificed in boot-args */ /* - * Each record in the log contains a pointer to the zone element it refers to, a "time" number that allows - * the records to be ordered chronologically, and a small array to hold the pc's from the stack trace. A + * Each record in the log contains a pointer to the zone element it refers to, + * and a small array to hold the pc's from the stack trace. A * record is added to the log each time a zalloc() is done in the zone_of_interest. For leak debugging, * the record is cleared when a zfree() is done. For corruption debugging, the log tracks both allocs and frees. * If the log fills, old records are replaced as if it were a circular buffer. */ -struct zrecord { - void *z_element; /* the element that was zalloc'ed of zfree'ed */ - uint32_t z_opcode:1, /* whether it was a zalloc or zfree */ - z_time:31; /* time index when operation was done */ - void *z_pc[MAX_ZTRACE_DEPTH]; /* stack trace of caller */ -}; /* - * Opcodes for the z_opcode field: + * Opcodes for the btlog operation field: */ #define ZOP_ALLOC 1 @@ -491,11 +1062,7 @@ struct zrecord { /* * The allocation log and all the related variables are protected by the zone lock for the zone_of_interest */ - -static struct zrecord *zrecords; /* the log itself, dynamically allocated when logging is enabled */ -static int zcurrent = 0; /* index of the next slot in the log to use */ -static int zrecorded = 0; /* number of allocations recorded in the log */ -static unsigned int ztime = 0; /* a timestamp of sorts */ +static btlog_t *zlog_btlog; /* the log itself, dynamically allocated when logging is enabled */ static zone_t zone_of_interest = NULL; /* the zone being watched; corresponds to zone_name_to_log */ /* @@ -544,9 +1111,9 @@ log_this_zone(const char *zonename, const char *logname) * the buffer for the records has been allocated. */ -#define DO_LOGGING(z) (zrecords && (z) == zone_of_interest) +#define DO_LOGGING(z) (zlog_btlog && (z) == zone_of_interest) -extern boolean_t zlog_ready; +extern boolean_t kmem_alloc_ready; #if CONFIG_ZLEAKS #pragma mark - @@ -642,15 +1209,6 @@ zleak_init(vm_size_t max_zonemap_size) zleak_global_tracking_threshold = max_zonemap_size / 2; zleak_per_zone_tracking_threshold = zleak_global_tracking_threshold / 8; -#if CONFIG_EMBEDDED - if (PE_parse_boot_argn("-zleakon", scratch_buf, sizeof(scratch_buf))) { - zleak_enable_flag = TRUE; - printf("zone leak detection enabled\n"); - } else { - zleak_enable_flag = FALSE; - printf("zone leak detection disabled\n"); - } -#else /* CONFIG_EMBEDDED */ /* -zleakoff (flag to disable zone leak monitor) */ if (PE_parse_boot_argn("-zleakoff", scratch_buf, sizeof(scratch_buf))) { zleak_enable_flag = FALSE; @@ -659,28 +1217,27 @@ zleak_init(vm_size_t max_zonemap_size) zleak_enable_flag = TRUE; printf("zone leak detection enabled\n"); } -#endif /* CONFIG_EMBEDDED */ /* zfactor=XXXX (override how often to sample the zone allocator) */ if (PE_parse_boot_argn("zfactor", &zleak_sample_factor, sizeof(zleak_sample_factor))) { - printf("Zone leak factor override:%u\n", zleak_sample_factor); + printf("Zone leak factor override: %u\n", zleak_sample_factor); } /* zleak-allocs=XXXX (override number of buckets in zallocations) */ if (PE_parse_boot_argn("zleak-allocs", &zleak_alloc_buckets, sizeof(zleak_alloc_buckets))) { - printf("Zone leak alloc buckets override:%u\n", zleak_alloc_buckets); + printf("Zone leak alloc buckets override: %u\n", zleak_alloc_buckets); /* uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) */ if (zleak_alloc_buckets == 0 || (zleak_alloc_buckets & (zleak_alloc_buckets-1))) { - printf("Override isn't a power of two, bad things might happen!"); + printf("Override isn't a power of two, bad things might happen!\n"); } } /* zleak-traces=XXXX (override number of buckets in ztraces) */ if (PE_parse_boot_argn("zleak-traces", &zleak_trace_buckets, sizeof(zleak_trace_buckets))) { - printf("Zone leak trace buckets override:%u\n", zleak_trace_buckets); + printf("Zone leak trace buckets override: %u\n", zleak_trace_buckets); /* uses 'is power of 2' trick: (0x01000 & 0x00FFF == 0) */ if (zleak_trace_buckets == 0 || (zleak_trace_buckets & (zleak_trace_buckets-1))) { - printf("Override isn't a power of two, bad things might happen!"); + printf("Override isn't a power of two, bad things might happen!\n"); } } @@ -981,6 +1538,8 @@ fastbacktrace(uintptr_t* bt, uint32_t max_frames) kstackt = kstackb + kernel_stack_size; /* Load stack frame pointer (EBP on x86) into frameptr */ frameptr = __builtin_frame_address(0); + if (((uintptr_t)frameptr > kstackt) || ((uintptr_t)frameptr < kstackb)) + frameptr = NULL; while (frameptr != NULL && frame_index < max_frames ) { /* Next frame pointer is pointed to by the previous one */ @@ -1087,29 +1646,36 @@ zinit( const char *name) /* a name for the zone */ { zone_t z; + boolean_t use_page_list = FALSE; if (zone_zone == ZONE_NULL) { z = (struct zone *)zdata; - zdata += sizeof(*z); - zdata_size -= sizeof(*z); + /* special handling in zcram() because the first element is being used */ } else z = (zone_t) zalloc(zone_zone); if (z == ZONE_NULL) return(ZONE_NULL); + /* Zone elements must fit both a next pointer and a backup pointer */ + vm_size_t minimum_element_size = sizeof(vm_offset_t) * 2; + if (size < minimum_element_size) + size = minimum_element_size; + /* - * Round off all the parameters appropriately. + * Round element size to a multiple of sizeof(pointer) + * This also enforces that allocations will be aligned on pointer boundaries */ - if (size < sizeof(z->free_elements)) - size = sizeof(z->free_elements); - size = ((size-1) + sizeof(z->free_elements)) - - ((size-1) % sizeof(z->free_elements)); - if (alloc == 0) + size = ((size-1) + sizeof(vm_offset_t)) - + ((size-1) % sizeof(vm_offset_t)); + + if (alloc == 0) alloc = PAGE_SIZE; + alloc = round_page(alloc); max = round_page(max); + /* * we look for an allocation size with less than 1% waste * up to 5 pages in size... @@ -1119,6 +1685,7 @@ zinit( * the user suggestion is larger AND has less fragmentation */ #if ZONE_ALIAS_ADDR + /* Favor PAGE_SIZE allocations unless we waste >10% space */ if ((size < PAGE_SIZE) && (PAGE_SIZE % size <= PAGE_SIZE / 10)) alloc = PAGE_SIZE; else @@ -1132,7 +1699,7 @@ zinit( waste = best % size; for (i = 1; i <= 5; i++) { - vm_size_t tsize, twaste; + vm_size_t tsize, twaste; tsize = i * PAGE_SIZE; @@ -1151,13 +1718,32 @@ use_this_allocation: if (max && (max < alloc)) max = alloc; - z->free_elements = 0; + /* + * Opt into page list tracking if we can reliably map an allocation + * to its page_metadata, and if the wastage in the tail of + * the allocation is not too large + */ + if (alloc == PAGE_SIZE) { + if ((PAGE_SIZE % size) >= sizeof(struct zone_page_metadata)) { + use_page_list = TRUE; + } else if ((PAGE_SIZE - sizeof(struct zone_page_metadata)) % size <= PAGE_SIZE / 100) { + use_page_list = TRUE; + } + } + + z->free_elements = NULL; + queue_init(&z->pages.any_free_foreign); + queue_init(&z->pages.all_free); + queue_init(&z->pages.intermediate); + queue_init(&z->pages.all_used); z->cur_size = 0; + z->page_count = 0; z->max_size = max; z->elem_size = size; z->alloc_size = alloc; z->zone_name = name; z->count = 0; + z->countfree = 0; z->sum_count = 0LL; z->doing_alloc = FALSE; z->doing_gc = FALSE; @@ -1173,11 +1759,11 @@ use_this_allocation: z->async_prio_refill = FALSE; z->gzalloc_exempt = FALSE; z->alignment_required = FALSE; + z->use_page_list = use_page_list; z->prio_refill_watermark = 0; z->zone_replenish_thread = NULL; + z->zp_count = 0; #if CONFIG_ZLEAKS - z->num_allocs = 0; - z->num_frees = 0; z->zleak_capture = 0; z->zleak_on = FALSE; #endif /* CONFIG_ZLEAKS */ @@ -1195,7 +1781,6 @@ use_this_allocation: * using the overflow zone slot. */ z->next_zone = ZONE_NULL; - thread_call_setup(&z->call_async_alloc, zalloc_async, z); simple_lock(&all_zones_lock); *last_zone = z; last_zone = &z->next_zone; @@ -1216,23 +1801,16 @@ use_this_allocation: /* * If we want to log a zone, see if we need to allocate buffer space for the log. Some vm related zones are - * zinit'ed before we can do a kmem_alloc, so we have to defer allocation in that case. zlog_ready is set to + * zinit'ed before we can do a kmem_alloc, so we have to defer allocation in that case. kmem_alloc_ready is set to * TRUE once enough of the VM system is up and running to allow a kmem_alloc to work. If we want to log one * of the VM related zones that's set up early on, we will skip allocation of the log until zinit is called again * later on some other zone. So note we may be allocating a buffer to log a zone other than the one being initialized * right now. */ - if (zone_of_interest != NULL && zrecords == NULL && zlog_ready) { - if (kmem_alloc(kernel_map, (vm_offset_t *)&zrecords, log_records * sizeof(struct zrecord)) == KERN_SUCCESS) { - - /* - * We got the memory for the log. Zero it out since the code needs this to identify unused records. - * At this point, everything is set up and we're ready to start logging this zone. - */ - - bzero((void *)zrecords, log_records * sizeof(struct zrecord)); - printf("zone: logging started for zone %s (%p)\n", zone_of_interest->zone_name, zone_of_interest); - + if (zone_of_interest != NULL && zlog_btlog == NULL && kmem_alloc_ready) { + zlog_btlog = btlog_create(log_records, MAX_ZTRACE_DEPTH, NULL, NULL, NULL); + if (zlog_btlog) { + printf("zone: logging started for zone %s\n", zone_of_interest->zone_name); } else { printf("zone: couldn't allocate memory for zrecords, turning off zleak logging\n"); zone_of_interest = NULL; @@ -1243,7 +1821,7 @@ use_this_allocation: #endif return(z); } -unsigned zone_replenish_loops, zone_replenish_wakeups, zone_replenish_wakeups_initiated; +unsigned zone_replenish_loops, zone_replenish_wakeups, zone_replenish_wakeups_initiated, zone_replenish_throttle_count; static void zone_replenish_thread(zone_t); @@ -1281,6 +1859,7 @@ static void zone_replenish_thread(zone_t z) { if (alloc_size == PAGE_SIZE) space = zone_alias_addr(space); #endif + ZONE_PAGE_COUNT_INCR(z, (alloc_size / PAGE_SIZE)); zcram(z, space, alloc_size); } else if (kr == KERN_RESOURCE_SHORTAGE) { VM_PAGE_WAIT(); @@ -1303,6 +1882,11 @@ static void zone_replenish_thread(zone_t z) { } unlock_zone(z); + /* Signal any potential throttled consumers, terminating + * their timer-bounded waits. + */ + thread_wakeup(z); + assert_wait(&z->zone_replenish_thread, THREAD_UNINT); thread_block(THREAD_CONTINUE_NULL); zone_replenish_wakeups++; @@ -1346,18 +1930,63 @@ zcram( if (from_zone_map(newmem, size)) from_zm = TRUE; - if (from_zm) + if (zalloc_debug & ZALLOC_DEBUG_ZCRAM) + kprintf("zcram(%p[%s], 0x%lx%s, 0x%lx)\n", zone, zone->zone_name, + (unsigned long)newmem, from_zm ? "" : "[F]", (unsigned long)size); + + if (from_zm && !zone->use_page_list) zone_page_init(newmem, size); lock_zone(zone); - while (size >= elem_size) { - free_to_zone(zone, (void *) newmem); - if (from_zm) - zone_page_alloc(newmem, elem_size); - zone->count++; /* compensate for free_to_zone */ - size -= elem_size; - newmem += elem_size; - zone->cur_size += elem_size; + + if (zone->use_page_list) { + struct zone_page_metadata *page_metadata; + + assert((newmem & PAGE_MASK) == 0); + assert((size & PAGE_MASK) == 0); + for (; size > 0; newmem += PAGE_SIZE, size -= PAGE_SIZE) { + + vm_size_t pos_in_page; + page_metadata = (struct zone_page_metadata *)(newmem + PAGE_SIZE - sizeof(struct zone_page_metadata)); + + page_metadata->pages.next = NULL; + page_metadata->pages.prev = NULL; + page_metadata->elements = NULL; + page_metadata->zone = zone; + page_metadata->alloc_count = 0; + page_metadata->free_count = 0; + + enqueue_tail(&zone->pages.all_used, (queue_entry_t)page_metadata); + + for (pos_in_page = 0; (newmem + pos_in_page + elem_size) < (vm_offset_t)page_metadata; pos_in_page += elem_size) { + page_metadata->alloc_count++; + zone->count++; /* compensate for free_to_zone */ + if ((newmem + pos_in_page) == (vm_offset_t)zone) { + /* + * special case for the "zone_zone" zone, which is using the first + * allocation of its pmap_steal_memory()-ed allocation for + * the "zone_zone" variable already. + */ + } else { + free_to_zone(zone, newmem + pos_in_page); + } + zone->cur_size += elem_size; + } + } + } else { + while (size >= elem_size) { + zone->count++; /* compensate for free_to_zone */ + if (newmem == (vm_offset_t)zone) { + /* Don't free zone_zone zone */ + } else { + free_to_zone(zone, newmem); + } + if (from_zm) + zone_page_alloc(newmem, elem_size); + size -= elem_size; + newmem += elem_size; + zone->cur_size += elem_size; + } } unlock_zone(zone); } @@ -1375,7 +2004,8 @@ zone_steal_memory(void) #endif /* Request enough early memory to get to the pmap zone */ zdata_size = 12 * sizeof(struct zone); - zdata = (vm_offset_t)pmap_steal_memory(round_page(zdata_size)); + zdata_size = round_page(zdata_size); + zdata = (vm_offset_t)pmap_steal_memory(zdata_size); } @@ -1406,6 +2036,7 @@ zfill( return 0; zone_change(zone, Z_FOREIGN, TRUE); + ZONE_PAGE_COUNT_INCR(zone, (size / PAGE_SIZE)); zcram(zone, memory, size); nalloc = (int)(size / zone->elem_size); assert(nalloc >= nelem); @@ -1427,27 +2058,16 @@ zone_bootstrap(void) zinfo_per_task = TRUE; } - /* do we want corruption-style debugging with zlog? */ - if (PE_parse_boot_argn("-zc", temp_buf, sizeof(temp_buf))) { - corruption_debug_flag = TRUE; - } - - /* Set up zone poisoning */ - - free_check_sample_factor = ZP_DEFAULT_SAMPLING_FACTOR; + if (!PE_parse_boot_argn("zalloc_debug", &zalloc_debug, sizeof(zalloc_debug))) + zalloc_debug = 0; - /* support for old zone poisoning boot-args */ - if (PE_parse_boot_argn("-zp", temp_buf, sizeof(temp_buf))) { - free_check_sample_factor = 1; - } - if (PE_parse_boot_argn("-no-zp", temp_buf, sizeof(temp_buf))) { - free_check_sample_factor = 0; - } + /* Set up zone element poisoning */ + zp_init(); - /* zp-factor=XXXX (override how often to poison freed zone elements) */ - if (PE_parse_boot_argn("zp-factor", &free_check_sample_factor, sizeof(free_check_sample_factor))) { - printf("Zone poisoning factor override:%u\n", free_check_sample_factor); - } + /* should zlog log to debug zone corruption instead of leaks? */ + if (PE_parse_boot_argn("-zc", temp_buf, sizeof(temp_buf))) { + corruption_debug_flag = TRUE; + } /* * Check for and set up zone leak detection if requested via boot-args. We recognized two @@ -1481,9 +2101,15 @@ zone_bootstrap(void) first_zone = ZONE_NULL; last_zone = &first_zone; num_zones = 0; + thread_call_setup(&call_async_alloc, zalloc_async, NULL); /* assertion: nobody else called zinit before us */ assert(zone_zone == ZONE_NULL); + + /* initializing global lock group for zones */ + lck_grp_attr_setdefault(&zone_locks_grp_attr); + lck_grp_init(&zone_locks_grp, "zone_locks", &zone_locks_grp_attr); + zone_zone = zinit(sizeof(struct zone), 128 * sizeof(struct zone), sizeof(struct zone), "zones"); zone_change(zone_zone, Z_COLLECT, FALSE); @@ -1526,6 +2152,9 @@ zinfo_task_free(task_t task) } } +/* Global initialization of Zone Allocator. + * Runs after zone_bootstrap. + */ void zone_init( vm_size_t max_zonemap_size) @@ -1566,10 +2195,10 @@ zone_init( zone_page_table_second_level_shift_amount++; } - lck_grp_attr_setdefault(&zone_lck_grp_attr); - lck_grp_init(&zone_lck_grp, "zones", &zone_lck_grp_attr); - lck_attr_setdefault(&zone_lck_attr); - lck_mtx_init_ext(&zone_gc_lock, &zone_lck_ext, &zone_lck_grp, &zone_lck_attr); + lck_grp_attr_setdefault(&zone_gc_lck_grp_attr); + lck_grp_init(&zone_gc_lck_grp, "zone_gc", &zone_gc_lck_grp_attr); + lck_attr_setdefault(&zone_gc_lck_attr); + lck_mtx_init_ext(&zone_gc_lock, &zone_gc_lck_ext, &zone_gc_lck_grp, &zone_gc_lck_attr); #if CONFIG_ZLEAKS /* @@ -1605,6 +2234,7 @@ zone_page_table_expand(zone_page_index_t pindex) second_level_size) != KERN_SUCCESS) { panic("zone_page_table_expand"); } + zone_map_table_page_count += (second_level_size / PAGE_SIZE); /* * zone_gc() may scan the "zone_page_table" directly, @@ -1622,6 +2252,7 @@ zone_page_table_expand(zone_page_index_t pindex) } else { /* Old slot was not NULL, someone else expanded first */ kmem_free(zone_map, second_level_array, second_level_size); + zone_map_table_page_count -= (second_level_size / PAGE_SIZE); } } else { /* Old slot was not NULL, already been expanded */ @@ -1651,18 +2282,19 @@ extern volatile SInt32 kfree_nop_count; */ void * zalloc_canblock( - register zone_t zone, + zone_t zone, boolean_t canblock) { vm_offset_t addr = 0; kern_return_t retval; uintptr_t zbt[MAX_ZTRACE_DEPTH]; /* used in zone leak logging and zone leak detection */ int numsaved = 0; - int i; - boolean_t zone_replenish_wakeup = FALSE; - boolean_t did_gzalloc; + boolean_t zone_replenish_wakeup = FALSE, zone_alloc_throttle = FALSE; +#if CONFIG_GZALLOC || ZONE_DEBUG + boolean_t did_gzalloc = FALSE; +#endif + thread_t thr = current_thread(); - did_gzalloc = FALSE; #if CONFIG_ZLEAKS uint32_t zleak_tracedepth = 0; /* log this allocation if nonzero */ #endif /* CONFIG_ZLEAKS */ @@ -1674,22 +2306,22 @@ zalloc_canblock( did_gzalloc = (addr != 0); #endif - lock_zone(zone); - /* * If zone logging is turned on and this is the zone we're tracking, grab a backtrace. */ - - if (DO_LOGGING(zone)) + if (__improbable(DO_LOGGING(zone))) numsaved = OSBacktrace((void*) zbt, MAX_ZTRACE_DEPTH); + + lock_zone(zone); + #if CONFIG_ZLEAKS /* * Zone leak detection: capture a backtrace every zleak_sample_factor * allocations in this zone. */ - if (zone->zleak_on && (zone->zleak_capture++ % zleak_sample_factor == 0)) { - zone->zleak_capture = 1; + if (zone->zleak_on && (++zone->zleak_capture >= zleak_sample_factor)) { + zone->zleak_capture = 0; /* Avoid backtracing twice if zone logging is on */ if (numsaved == 0 ) @@ -1699,15 +2331,40 @@ zalloc_canblock( } #endif /* CONFIG_ZLEAKS */ + if (zone->async_prio_refill && zone->zone_replenish_thread) { + do { + vm_size_t zfreec = (zone->cur_size - (zone->count * zone->elem_size)); + vm_size_t zrefillwm = zone->prio_refill_watermark * zone->elem_size; + zone_replenish_wakeup = (zfreec < zrefillwm); + zone_alloc_throttle = (zfreec < (zrefillwm / 2)) && ((thr->options & TH_OPT_VMPRIV) == 0); + + if (zone_replenish_wakeup) { + zone_replenish_wakeups_initiated++; + unlock_zone(zone); + /* Signal the potentially waiting + * refill thread. + */ + thread_wakeup(&zone->zone_replenish_thread); + + /* Scheduling latencies etc. may prevent + * the refill thread from keeping up + * with demand. Throttle consumers + * when we fall below half the + * watermark, unless VM privileged + */ + if (zone_alloc_throttle) { + zone_replenish_throttle_count++; + assert_wait_timeout(zone, THREAD_UNINT, 1, NSEC_PER_MSEC); + thread_block(THREAD_CONTINUE_NULL); + } + lock_zone(zone); + } + } while (zone_alloc_throttle == TRUE); + } + if (__probable(addr == 0)) - alloc_from_zone(zone, (void **) &addr); + addr = try_alloc_from_zone(zone); - if (zone->async_prio_refill && - ((zone->cur_size - (zone->count * zone->elem_size)) < - (zone->prio_refill_watermark * zone->elem_size))) { - zone_replenish_wakeup = TRUE; - zone_replenish_wakeups_initiated++; - } while ((addr == 0) && canblock) { /* @@ -1801,7 +2458,7 @@ zalloc_canblock( } } #endif /* CONFIG_ZLEAKS */ - + ZONE_PAGE_COUNT_INCR(zone, (alloc_size / PAGE_SIZE)); zcram(zone, space, alloc_size); break; @@ -1820,9 +2477,13 @@ zalloc_canblock( panic_include_ztrace = TRUE; } #endif /* CONFIG_ZLEAKS */ - /* TODO: Change this to something more descriptive, perhaps - * 'zone_map exhausted' only if we get retval 3 (KERN_NO_SPACE). - */ + if (retval == KERN_NO_SPACE) { + zone_t zone_largest = zone_find_largest(); + panic("zalloc: zone map exhausted while allocating from zone %s, likely due to memory leak in zone %s (%lu total bytes, %d elements allocated)", + zone->zone_name, zone_largest->zone_name, + (unsigned long)zone_largest->cur_size, zone_largest->count); + + } panic("zalloc: \"%s\" (%d elements) retry fail %d, kfree_nop_count: %d", zone->zone_name, zone->count, retval, (int)kfree_nop_count); } } else { @@ -1835,7 +2496,7 @@ zalloc_canblock( zone->waiting = FALSE; zone_wakeup(zone); } - alloc_from_zone(zone, (void **) &addr); + addr = try_alloc_from_zone(zone); if (addr == 0 && retval == KERN_RESOURCE_SHORTAGE) { unlock_zone(zone); @@ -1845,7 +2506,7 @@ zalloc_canblock( } } if (addr == 0) - alloc_from_zone(zone, (void **) &addr); + addr = try_alloc_from_zone(zone); } #if CONFIG_ZLEAKS @@ -1862,82 +2523,22 @@ zalloc_canblock( #endif /* CONFIG_ZLEAKS */ + if ((addr == 0) && !canblock && (zone->async_pending == FALSE) && (zone->no_callout == FALSE) && (zone->exhaustible == FALSE) && (!vm_pool_low())) { + zone->async_pending = TRUE; + unlock_zone(zone); + thread_call_enter(&call_async_alloc); + lock_zone(zone); + addr = try_alloc_from_zone(zone); + } + /* * See if we should be logging allocations in this zone. Logging is rarely done except when a leak is * suspected, so this code rarely executes. We need to do this code while still holding the zone lock * since it protects the various log related data structures. */ - if (DO_LOGGING(zone) && addr) { - - /* - * Look for a place to record this new allocation. We implement two different logging strategies - * depending on whether we're looking for the source of a zone leak or a zone corruption. When looking - * for a leak, we want to log as many allocations as possible in order to clearly identify the leaker - * among all the records. So we look for an unused slot in the log and fill that in before overwriting - * an old entry. When looking for a corruption however, it's better to have a chronological log of all - * the allocations and frees done in the zone so that the history of operations for a specific zone - * element can be inspected. So in this case, we treat the log as a circular buffer and overwrite the - * oldest entry whenever a new one needs to be added. - * - * The corruption_debug_flag flag tells us what style of logging to do. It's set if we're supposed to be - * doing corruption style logging (indicated via -zc in the boot-args). - */ - - if (!corruption_debug_flag && zrecords[zcurrent].z_element && zrecorded < log_records) { - - /* - * If we get here, we're doing leak style logging and there's still some unused entries in - * the log (since zrecorded is smaller than the size of the log). Look for an unused slot - * starting at zcurrent and wrap-around if we reach the end of the buffer. If the buffer - * is already full, we just fall through and overwrite the element indexed by zcurrent. - */ - - for (i = zcurrent; i < log_records; i++) { - if (zrecords[i].z_element == NULL) { - zcurrent = i; - goto empty_slot; - } - } - - for (i = 0; i < zcurrent; i++) { - if (zrecords[i].z_element == NULL) { - zcurrent = i; - goto empty_slot; - } - } - } - - /* - * Save a record of this allocation - */ - -empty_slot: - if (zrecords[zcurrent].z_element == NULL) - zrecorded++; - - zrecords[zcurrent].z_element = (void *)addr; - zrecords[zcurrent].z_time = ztime++; - zrecords[zcurrent].z_opcode = ZOP_ALLOC; - - for (i = 0; i < numsaved; i++) - zrecords[zcurrent].z_pc[i] = (void*) zbt[i]; - - for (; i < MAX_ZTRACE_DEPTH; i++) - zrecords[zcurrent].z_pc[i] = 0; - - zcurrent++; - - if (zcurrent >= log_records) - zcurrent = 0; - } - - if ((addr == 0) && !canblock && (zone->async_pending == FALSE) && (zone->no_callout == FALSE) && (zone->exhaustible == FALSE) && (!vm_pool_low())) { - zone->async_pending = TRUE; - unlock_zone(zone); - thread_call_enter(&zone->call_async_alloc); - lock_zone(zone); - alloc_from_zone(zone, (void **) &addr); + if (__improbable(DO_LOGGING(zone) && addr)) { + btlog_add_entry(zlog_btlog, (void *)addr, ZOP_ALLOC, (void **)zbt, numsaved); } #if ZONE_DEBUG @@ -1946,22 +2547,12 @@ empty_slot: addr += ZONE_DEBUG_OFFSET; } #endif - -#if CONFIG_ZLEAKS - if (addr != 0) { - zone->num_allocs++; - } -#endif /* CONFIG_ZLEAKS */ unlock_zone(zone); - if (zone_replenish_wakeup) - thread_wakeup(&zone->zone_replenish_thread); - TRACE_MACHLEAKS(ZALLOC_CODE, ZALLOC_CODE_2, zone->elem_size, addr); if (addr) { - thread_t thr = current_thread(); task_t task; zinfo_usage_t zinfo; vm_size_t sz = zone->elem_size; @@ -1994,16 +2585,39 @@ zalloc_noblock( void zalloc_async( - thread_call_param_t p0, + __unused thread_call_param_t p0, __unused thread_call_param_t p1) { - void *elt; + zone_t current_z = NULL, head_z; + unsigned int max_zones, i; + void *elt = NULL; + boolean_t pending = FALSE; + + simple_lock(&all_zones_lock); + head_z = first_zone; + max_zones = num_zones; + simple_unlock(&all_zones_lock); + current_z = head_z; + for (i = 0; i < max_zones; i++) { + lock_zone(current_z); + if (current_z->async_pending == TRUE) { + current_z->async_pending = FALSE; + pending = TRUE; + } + unlock_zone(current_z); - elt = zalloc_canblock((zone_t)p0, TRUE); - zfree((zone_t)p0, elt); - lock_zone(((zone_t)p0)); - ((zone_t)p0)->async_pending = FALSE; - unlock_zone(((zone_t)p0)); + if (pending == TRUE) { + elt = zalloc_canblock(current_z, TRUE); + zfree(current_z, elt); + pending = FALSE; + } + /* + * This is based on assumption that zones never get + * freed once allocated and linked. + * Hence a read outside of lock is OK. + */ + current_z = current_z->next_zone; + } } /* @@ -2037,13 +2651,13 @@ zget( /* * Zone leak detection: capture a backtrace */ - if (zone->zleak_on && (zone->zleak_capture++ % zleak_sample_factor == 0)) { - zone->zleak_capture = 1; + if (zone->zleak_on && (++zone->zleak_capture >= zleak_sample_factor)) { + zone->zleak_capture = 0; zleak_tracedepth = fastbacktrace(zbt, MAX_ZTRACE_DEPTH); } #endif /* CONFIG_ZLEAKS */ - alloc_from_zone(zone, (void **) &addr); + addr = try_alloc_from_zone(zone); #if ZONE_DEBUG if (addr && zone_debug_enabled(zone)) { enqueue_tail(&zone->active_zones, (queue_entry_t)addr); @@ -2062,10 +2676,6 @@ zget( zone->zleak_capture = zleak_sample_factor; } } - - if (addr != 0) { - zone->num_allocs++; - } #endif /* CONFIG_ZLEAKS */ unlock_zone(zone); @@ -2077,6 +2687,54 @@ zget( slower in debug mode when true. Use debugger to enable if needed */ /* static */ boolean_t zone_check = FALSE; +static void zone_check_freelist(zone_t zone, vm_offset_t elem) +{ + struct zone_free_element *this; + struct zone_page_metadata *thispage; + + if (zone->use_page_list) { + if (zone->allows_foreign) { + for (thispage = (struct zone_page_metadata *)queue_first(&zone->pages.any_free_foreign); + !queue_end(&zone->pages.any_free_foreign, (queue_entry_t)thispage); + thispage = (struct zone_page_metadata *)queue_next((queue_chain_t *)thispage)) { + for (this = thispage->elements; + this != NULL; + this = this->next) { + if (!is_sane_zone_element(zone, (vm_address_t)this) || (vm_address_t)this == elem) + panic("zone_check_freelist"); + } + } + } + for (thispage = (struct zone_page_metadata *)queue_first(&zone->pages.all_free); + !queue_end(&zone->pages.all_free, (queue_entry_t)thispage); + thispage = (struct zone_page_metadata *)queue_next((queue_chain_t *)thispage)) { + for (this = thispage->elements; + this != NULL; + this = this->next) { + if (!is_sane_zone_element(zone, (vm_address_t)this) || (vm_address_t)this == elem) + panic("zone_check_freelist"); + } + } + for (thispage = (struct zone_page_metadata *)queue_first(&zone->pages.intermediate); + !queue_end(&zone->pages.intermediate, (queue_entry_t)thispage); + thispage = (struct zone_page_metadata *)queue_next((queue_chain_t *)thispage)) { + for (this = thispage->elements; + this != NULL; + this = this->next) { + if (!is_sane_zone_element(zone, (vm_address_t)this) || (vm_address_t)this == elem) + panic("zone_check_freelist"); + } + } + } else { + for (this = zone->free_elements; + this != NULL; + this = this->next) { + if (!is_sane_zone_element(zone, (vm_address_t)this) || (vm_address_t)this == elem) + panic("zone_check_freelist"); + } + } +} + static zone_t zone_last_bogus_zone = ZONE_NULL; static vm_offset_t zone_last_bogus_elem = 0; @@ -2086,18 +2744,54 @@ zfree( void *addr) { vm_offset_t elem = (vm_offset_t) addr; - void *zbt[MAX_ZTRACE_DEPTH]; /* only used if zone logging is enabled via boot-args */ + uintptr_t zbt[MAX_ZTRACE_DEPTH]; /* only used if zone logging is enabled via boot-args */ int numsaved = 0; boolean_t gzfreed = FALSE; assert(zone != ZONE_NULL); +#if 1 + if (zone->use_page_list) { + struct zone_page_metadata *page_meta = get_zone_page_metadata((struct zone_free_element *)addr); + if (zone != page_meta->zone) { + /* + * Something bad has happened. Someone tried to zfree a pointer but the metadata says it is from + * a different zone (or maybe it's from a zone that doesn't use page free lists at all). We can repair + * some cases of this, if: + * 1) The specified zone had use_page_list, and the true zone also has use_page_list set. In that case + * we can swap the zone_t + * 2) The specified zone had use_page_list, but the true zone does not. In this case page_meta is garbage, + * and dereferencing page_meta->zone might panic. + * To distinguish the two, we enumerate the zone list to match it up. + * We do not handle the case where an incorrect zone is passed that does not have use_page_list set, + * even if the true zone did have this set. + */ + zone_t fixed_zone = NULL; + int fixed_i, max_zones; + + simple_lock(&all_zones_lock); + max_zones = num_zones; + fixed_zone = first_zone; + simple_unlock(&all_zones_lock); + + for (fixed_i=0; fixed_i < max_zones; fixed_i++, fixed_zone = fixed_zone->next_zone) { + if (fixed_zone == page_meta->zone && fixed_zone->use_page_list) { + /* we can fix this */ + printf("Fixing incorrect zfree from zone %s to zone %s\n", zone->zone_name, fixed_zone->zone_name); + zone = fixed_zone; + break; + } + } + } + } +#endif + /* * If zone logging is turned on and this is the zone we're tracking, grab a backtrace. */ - if (DO_LOGGING(zone)) - numsaved = OSBacktrace(&zbt[0], MAX_ZTRACE_DEPTH); + if (__improbable(DO_LOGGING(zone) && corruption_debug_flag)) + numsaved = OSBacktrace((void *)zbt, MAX_ZTRACE_DEPTH); #if MACH_ASSERT /* Basic sanity checks */ @@ -2131,55 +2825,24 @@ zfree( * whether we're trying to catch a leak or corruption. See comments above in zalloc for details. */ - if (DO_LOGGING(zone)) { - int i; - + if (__improbable(DO_LOGGING(zone))) { if (corruption_debug_flag) { - /* * We're logging to catch a corruption. Add a record of this zfree operation * to log. */ - - if (zrecords[zcurrent].z_element == NULL) - zrecorded++; - - zrecords[zcurrent].z_element = (void *)addr; - zrecords[zcurrent].z_time = ztime++; - zrecords[zcurrent].z_opcode = ZOP_FREE; - - for (i = 0; i < numsaved; i++) - zrecords[zcurrent].z_pc[i] = zbt[i]; - - for (; i < MAX_ZTRACE_DEPTH; i++) - zrecords[zcurrent].z_pc[i] = 0; - - zcurrent++; - - if (zcurrent >= log_records) - zcurrent = 0; - + btlog_add_entry(zlog_btlog, (void *)addr, ZOP_FREE, (void **)zbt, numsaved); } else { - /* * We're logging to catch a leak. Remove any record we might have for this * element since it's being freed. Note that we may not find it if the buffer * overflowed and that's OK. Since the log is of a limited size, old records * get overwritten if there are more zallocs than zfrees. */ - - for (i = 0; i < log_records; i++) { - if (zrecords[i].z_element == addr) { - zrecords[i].z_element = NULL; - zcurrent = i; - zrecorded--; - break; - } - } + btlog_remove_entries_for_element(zlog_btlog, (void *)addr); } } - #if ZONE_DEBUG if (!gzfreed && zone_debug_enabled(zone)) { queue_t tmp_elem; @@ -2200,29 +2863,20 @@ zfree( } #endif /* ZONE_DEBUG */ if (zone_check) { - vm_offset_t this; - - /* check the zone's consistency */ - - for (this = zone->free_elements; - this != 0; - this = * (vm_offset_t *) this) - if (!pmap_kernel_va(this) || this == elem) - panic("zfree"); + zone_check_freelist(zone, elem); } if (__probable(!gzfreed)) - free_to_zone(zone, (void *) elem); + free_to_zone(zone, elem); #if MACH_ASSERT if (zone->count < 0) - panic("zfree: count < 0!"); + panic("zfree: zone count underflow in zone %s while freeing element %p, possible cause: double frees or freeing memory that did not come from this zone", + zone->zone_name, addr); #endif #if CONFIG_ZLEAKS - zone->num_frees++; - /* * Zone leak detection: un-track the allocation */ @@ -2327,7 +2981,7 @@ zone_free_count(zone_t zone) integer_t free_count; lock_zone(zone); - free_count = (integer_t)(zone->cur_size/zone->elem_size - zone->count); + free_count = zone->countfree; unlock_zone(zone); assert(free_count >= 0); @@ -2499,6 +3153,13 @@ zone_page_free_element( panic("zone_page_free_element"); #endif + /* Clear out the old next and backup pointers */ + vm_offset_t *primary = (vm_offset_t *) addr; + vm_offset_t *backup = get_backup_ptr(size, primary); + + *primary = ZP_POISON; + *backup = ZP_POISON; + i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address); j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address); @@ -2535,39 +3196,11 @@ zone_page_free_element( } -/* This is used for walking through a zone's free element list. - */ -struct zone_free_element { - struct zone_free_element * next; -}; -/* - * Add a linked list of pages starting at base back into the zone - * free list. Tail points to the last element on the list. - */ -#define ADD_LIST_TO_ZONE(zone, base, tail) \ -MACRO_BEGIN \ - (tail)->next = (void *)((zone)->free_elements); \ - if ((zone)->elem_size >= (2 * sizeof(vm_offset_t) + sizeof(uint32_t))) { \ - ((vm_offset_t *)(tail))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \ - (zone)->free_elements; \ - } \ - (zone)->free_elements = (unsigned long)(base); \ -MACRO_END - -/* - * Add an element to the chain pointed to by prev. - */ -#define ADD_ELEMENT(zone, prev, elem) \ -MACRO_BEGIN \ - (prev)->next = (elem); \ - if ((zone)->elem_size >= (2 * sizeof(vm_offset_t) + sizeof(uint32_t))) { \ - ((vm_offset_t *)(prev))[((zone)->elem_size/sizeof(vm_offset_t))-1] = \ - (vm_offset_t)(elem); \ - } \ -MACRO_END struct { + uint64_t zgc_invoked; + uint64_t zgc_bailed; uint32_t pgs_freed; uint32_t elems_collected, @@ -2588,17 +3221,23 @@ zone_gc(boolean_t all_zones) unsigned int max_zones; zone_t z; unsigned int i; + uint32_t old_pgs_freed; zone_page_index_t zone_free_page_head; zone_page_index_t zone_free_page_tail; thread_t mythread = current_thread(); lck_mtx_lock(&zone_gc_lock); + zgc_stats.zgc_invoked++; + old_pgs_freed = zgc_stats.pgs_freed; + simple_lock(&all_zones_lock); max_zones = num_zones; z = first_zone; simple_unlock(&all_zones_lock); + if (zalloc_debug & ZALLOC_DEBUG_ZONEGC) + kprintf("zone_gc(all_zones=%s) starting...\n", all_zones ? "TRUE" : "FALSE"); /* * it's ok to allow eager kernel preemption while @@ -2620,14 +3259,16 @@ zone_gc(boolean_t all_zones) unsigned int n, m; vm_size_t elt_size, size_freed; struct zone_free_element *elt, *base_elt, *base_prev, *prev, *scan, *keep, *tail; - int kmem_frees = 0; + int kmem_frees = 0, total_freed_pages = 0; + struct zone_page_metadata *page_meta; + queue_head_t page_meta_head; assert(z != ZONE_NULL); if (!z->collectable) continue; - if (all_zones == FALSE && z->elem_size < PAGE_SIZE) + if (all_zones == FALSE && z->elem_size < PAGE_SIZE && !z->use_page_list) continue; lock_zone(z); @@ -2643,6 +3284,7 @@ zone_gc(boolean_t all_zones) * or the element size is a multiple of the PAGE_SIZE */ if ((elt_size & PAGE_MASK) && + !z->use_page_list && (((z->cur_size - z->count * elt_size) <= (2 * z->alloc_size)) || ((z->cur_size - z->count * elt_size) <= (z->cur_size / 10)))) { unlock_zone(z); @@ -2655,11 +3297,80 @@ zone_gc(boolean_t all_zones) * Snatch all of the free elements away from the zone. */ - scan = (void *)z->free_elements; - z->free_elements = 0; + if (z->use_page_list) { + queue_new_head(&z->pages.all_free, &page_meta_head, struct zone_page_metadata *, pages); + queue_init(&z->pages.all_free); + } else { + scan = (void *)z->free_elements; + z->free_elements = 0; + } unlock_zone(z); + if (z->use_page_list) { + /* + * For zones that maintain page lists (which in turn + * track free elements on those pages), zone_gc() + * is incredibly easy, and we bypass all the logic + * for scanning elements and mapping them to + * collectable pages + */ + + size_freed = 0; + + queue_iterate(&page_meta_head, page_meta, struct zone_page_metadata *, pages) { + assert(from_zone_map((vm_address_t)page_meta, sizeof(*page_meta))); /* foreign elements should be in any_free_foreign */ + + zgc_stats.elems_freed += page_meta->free_count; + size_freed += elt_size * page_meta->free_count; + zgc_stats.elems_collected += page_meta->free_count; + } + + lock_zone(z); + + if (size_freed > 0) { + z->cur_size -= size_freed; + z->countfree -= size_freed/elt_size; + } + + z->doing_gc = FALSE; + if (z->waiting) { + z->waiting = FALSE; + zone_wakeup(z); + } + + unlock_zone(z); + + if (queue_empty(&page_meta_head)) + continue; + + thread_clear_eager_preempt(mythread); + + while ((page_meta = (struct zone_page_metadata *)dequeue_head(&page_meta_head)) != NULL) { + vm_address_t free_page_address; + + free_page_address = trunc_page((vm_address_t)page_meta); +#if ZONE_ALIAS_ADDR + free_page_address = zone_virtual_addr(free_page_address); +#endif + kmem_free(zone_map, free_page_address, PAGE_SIZE); + ZONE_PAGE_COUNT_DECR(z, 1); + total_freed_pages++; + zgc_stats.pgs_freed += 1; + + if (++kmem_frees == 32) { + thread_yield_internal(1); + kmem_frees = 0; + } + } + + if (zalloc_debug & ZALLOC_DEBUG_ZONEGC) + kprintf("zone_gc() of zone %s freed %lu elements, %d pages\n", z->zone_name, (unsigned long)size_freed/elt_size, total_freed_pages); + + thread_set_eager_preempt(mythread); + continue; /* go to next zone */ + } + /* * Pass 1: * @@ -2689,17 +3400,23 @@ zone_gc(boolean_t all_zones) if (keep == NULL) keep = tail = elt; else { - ADD_ELEMENT(z, tail, elt); + append_zone_element(z, tail, elt); tail = elt; } - ADD_ELEMENT(z, prev, elt->next); + append_zone_element(z, prev, elt->next); elt = elt->next; - ADD_ELEMENT(z, tail, NULL); + append_zone_element(z, tail, NULL); } /* * Dribble back the elements we are keeping. + * If there are none, give some elements that we haven't looked at yet + * back to the freelist so that others waiting on the zone don't get stuck + * for too long. This might prevent us from recovering some memory, + * but allows us to avoid having to allocate new memory to serve requests + * while zone_gc has all the free memory tied up. + * */ if (++n >= 50) { @@ -2708,7 +3425,7 @@ zone_gc(boolean_t all_zones) lock_zone(z); if (keep != NULL) { - ADD_LIST_TO_ZONE(z, keep, tail); + add_list_to_zone(z, keep, tail); tail = keep = NULL; } else { m =0; @@ -2719,8 +3436,11 @@ zone_gc(boolean_t all_zones) elt = elt->next; } if (m !=0 ) { - ADD_LIST_TO_ZONE(z, base_elt, prev); - ADD_ELEMENT(z, base_prev, elt); + /* Extract the elements from the list and + * give them back */ + append_zone_element(z, prev, NULL); + add_list_to_zone(z, base_elt, prev); + append_zone_element(z, base_prev, elt); prev = base_prev; } } @@ -2743,7 +3463,7 @@ zone_gc(boolean_t all_zones) if (keep != NULL) { lock_zone(z); - ADD_LIST_TO_ZONE(z, keep, tail); + add_list_to_zone(z, keep, tail); if (z->waiting) { z->waiting = FALSE; @@ -2788,12 +3508,12 @@ zone_gc(boolean_t all_zones) if (keep == NULL) keep = tail = elt; else { - ADD_ELEMENT(z, tail, elt); + append_zone_element(z, tail, elt); tail = elt; } elt = elt->next; - ADD_ELEMENT(z, tail, NULL); + append_zone_element(z, tail, NULL); ++zgc_stats.elems_kept; } @@ -2807,10 +3527,11 @@ zone_gc(boolean_t all_zones) lock_zone(z); z->cur_size -= size_freed; + z->countfree -= size_freed/elt_size; size_freed = 0; if (keep != NULL) { - ADD_LIST_TO_ZONE(z, keep, tail); + add_list_to_zone(z, keep, tail); } if (z->waiting) { @@ -2834,9 +3555,10 @@ zone_gc(boolean_t all_zones) if (size_freed > 0 || keep != NULL) { z->cur_size -= size_freed; + z->countfree -= size_freed/elt_size; if (keep != NULL) { - ADD_LIST_TO_ZONE(z, keep, tail); + add_list_to_zone(z, keep, tail); } } @@ -2848,7 +3570,6 @@ zone_gc(boolean_t all_zones) } unlock_zone(z); - if (zone_free_page_head == ZONE_PAGE_INDEX_INVALID) continue; @@ -2858,6 +3579,20 @@ zone_gc(boolean_t all_zones) */ thread_clear_eager_preempt(mythread); + + /* + * This loop counts the number of pages that should be freed by the + * next loop that tries to coalesce the kmem_frees() + */ + uint32_t pages_to_free_count = 0; + vm_address_t fpa; + zone_page_index_t index; + for (index = zone_free_page_head; index != ZONE_PAGE_INDEX_INVALID;) { + pages_to_free_count++; + fpa = zone_map_min_address + PAGE_SIZE * ((vm_size_t)index); + index = *(zone_page_index_t *)fpa; + } + /* * Reclaim the pages we are freeing. */ @@ -2873,6 +3608,7 @@ zone_gc(boolean_t all_zones) zone_free_page_head = *(zone_page_index_t *)free_page_address; page_count = 1; + total_freed_pages++; while (zone_free_page_head != ZONE_PAGE_INDEX_INVALID) { zone_page_index_t next_zind = zone_free_page_head; @@ -2887,18 +3623,31 @@ zone_gc(boolean_t all_zones) zone_free_page_head = *(zone_page_index_t *)next_free_page_address; page_count++; + total_freed_pages++; } kmem_free(zone_map, free_page_address, page_count * PAGE_SIZE); - + ZONE_PAGE_COUNT_DECR(z, page_count); zgc_stats.pgs_freed += page_count; + pages_to_free_count -= page_count; if (++kmem_frees == 32) { thread_yield_internal(1); kmem_frees = 0; } } + + /* Check that we actually free the exact number of pages we were supposed to */ + assert(pages_to_free_count == 0); + + if (zalloc_debug & ZALLOC_DEBUG_ZONEGC) + kprintf("zone_gc() of zone %s freed %lu elements, %d pages\n", z->zone_name, (unsigned long)size_freed/elt_size, total_freed_pages); + thread_set_eager_preempt(mythread); } + + if (old_pgs_freed == zgc_stats.pgs_freed) + zgc_stats.zgc_bailed++; + thread_clear_eager_preempt(mythread); lck_mtx_unlock(&zone_gc_lock); @@ -3471,11 +4220,42 @@ void zone_display_zprint() printf("Kalloc.Large:\t%lu\n",(uintptr_t)kalloc_large_total); } +zone_t +zone_find_largest(void) +{ + unsigned int i; + unsigned int max_zones; + zone_t the_zone; + zone_t zone_largest; + + simple_lock(&all_zones_lock); + the_zone = first_zone; + max_zones = num_zones; + simple_unlock(&all_zones_lock); + + zone_largest = the_zone; + for (i = 0; i < max_zones; i++) { + if (the_zone->cur_size > zone_largest->cur_size) { + zone_largest = the_zone; + } + + if (the_zone->next_zone == NULL) { + break; + } + + the_zone = the_zone->next_zone; + } + return zone_largest; +} + #if ZONE_DEBUG /* should we care about locks here ? */ -#define zone_in_use(z) ( z->count || z->free_elements ) +#define zone_in_use(z) ( z->count || z->free_elements \ + || !queue_empty(&z->pages.all_free) \ + || !queue_empty(&z->pages.intermediate) \ + || (z->allows_foreign && !queue_empty(&z->pages.any_free_foreign))) void zone_debug_enable( diff --git a/osfmk/kern/zalloc.h b/osfmk/kern/zalloc.h index 630a4fff0..19110a165 100644 --- a/osfmk/kern/zalloc.h +++ b/osfmk/kern/zalloc.h @@ -94,51 +94,58 @@ typedef struct gzalloc_data { * */ +struct zone_free_element; +struct zone_page_metadata; + struct zone { + struct zone_free_element *free_elements; /* free elements directly linked */ + struct { + queue_head_t any_free_foreign; /* foreign pages crammed into zone */ + queue_head_t all_free; + queue_head_t intermediate; + queue_head_t all_used; + } pages; /* list of zone_page_metadata structs, which maintain per-page free element lists */ int count; /* Number of elements used now */ - vm_offset_t free_elements; + int countfree; /* Number of free elements */ + lck_attr_t lock_attr; /* zone lock attribute */ decl_lck_mtx_data(,lock) /* zone lock */ lck_mtx_ext_t lock_ext; /* placeholder for indirect mutex */ - lck_attr_t lock_attr; /* zone lock attribute */ - lck_grp_t lock_grp; /* zone lock group */ - lck_grp_attr_t lock_grp_attr; /* zone lock group attribute */ vm_size_t cur_size; /* current memory utilization */ vm_size_t max_size; /* how large can this zone grow */ vm_size_t elem_size; /* size of an element */ vm_size_t alloc_size; /* size used for more memory */ + uint64_t page_count __attribute__((aligned(8))); /* number of pages used by this zone */ uint64_t sum_count; /* count of allocs (life of zone) */ - unsigned int - /* boolean_t */ exhaustible :1, /* (F) merely return if empty? */ - /* boolean_t */ collectable :1, /* (F) garbage collect empty pages */ - /* boolean_t */ expandable :1, /* (T) expand zone (with message)? */ - /* boolean_t */ allows_foreign :1,/* (F) allow non-zalloc space */ - /* boolean_t */ doing_alloc :1, /* is zone expanding now? */ - /* boolean_t */ waiting :1, /* is thread waiting for expansion? */ - /* boolean_t */ async_pending :1, /* asynchronous allocation pending? */ -#if CONFIG_ZLEAKS - /* boolean_t */ zleak_on :1, /* Are we collecting allocation information? */ -#endif /* CONFIG_ZLEAKS */ - /* boolean_t */ caller_acct: 1, /* do we account allocation/free to the caller? */ - /* boolean_t */ doing_gc :1, /* garbage collect in progress? */ - /* boolean_t */ noencrypt :1, - /* boolean_t */ no_callout:1, - /* boolean_t */ async_prio_refill:1, - /* boolean_t */ gzalloc_exempt:1, - /* boolean_t */ alignment_required:1; + uint32_t + /* boolean_t */ exhaustible :1, /* (F) merely return if empty? */ + /* boolean_t */ collectable :1, /* (F) garbage collect empty pages */ + /* boolean_t */ expandable :1, /* (T) expand zone (with message)? */ + /* boolean_t */ allows_foreign :1, /* (F) allow non-zalloc space */ + /* boolean_t */ doing_alloc :1, /* is zone expanding now? */ + /* boolean_t */ waiting :1, /* is thread waiting for expansion? */ + /* boolean_t */ async_pending :1, /* asynchronous allocation pending? */ + /* boolean_t */ zleak_on :1, /* Are we collecting allocation information? */ + /* boolean_t */ caller_acct :1, /* do we account allocation/free to the caller? */ + /* boolean_t */ doing_gc :1, /* garbage collect in progress? */ + /* boolean_t */ noencrypt :1, + /* boolean_t */ no_callout :1, + /* boolean_t */ async_prio_refill :1, + /* boolean_t */ gzalloc_exempt :1, + /* boolean_t */ alignment_required :1, + /* boolean_t */ use_page_list :1, + /* future */ _reserved :16; + int index; /* index into zone_info arrays for this zone */ - struct zone * next_zone; /* Link for all-zones list */ - thread_call_data_t call_async_alloc; /* callout for asynchronous alloc */ + struct zone *next_zone; /* Link for all-zones list */ const char *zone_name; /* a name for the zone */ #if ZONE_DEBUG queue_head_t active_zones; /* active elements */ #endif /* ZONE_DEBUG */ #if CONFIG_ZLEAKS - uint32_t num_allocs; /* alloc stats for zleak benchmarks */ - uint32_t num_frees; /* free stats for zleak benchmarks */ uint32_t zleak_capture; /* per-zone counter for capturing every N allocations */ #endif /* CONFIG_ZLEAKS */ - uint32_t free_check_count; /* counter for poisoning/checking every N frees */ + uint32_t zp_count; /* counter for poisoning every N frees */ vm_size_t prio_refill_watermark; thread_t zone_replenish_thread; #if CONFIG_GZALLOC @@ -164,11 +171,11 @@ extern void consider_zone_gc(boolean_t); extern void zone_steal_memory(void); /* Bootstrap zone module (create zone zone) */ -extern void zone_bootstrap(void) __attribute__((section("__TEXT, initcode"))); +extern void zone_bootstrap(void); /* Init zone module */ extern void zone_init( - vm_size_t map_size) __attribute__((section("__TEXT, initcode"))); + vm_size_t map_size); /* Handle per-task zone info */ extern void zinfo_task_init(task_t task); @@ -266,6 +273,7 @@ extern void zone_prio_refill_configure(zone_t, vm_size_t); */ #define Z_ALIGNMENT_REQUIRED 8 #define Z_GZALLOC_EXEMPT 9 /* Not tracked in guard allocation mode */ + /* Preallocate space for zone from zone map */ extern void zprealloc( zone_t zone, diff --git a/osfmk/kextd/Makefile b/osfmk/kextd/Makefile index 771b0cd26..b938c7fbb 100644 --- a/osfmk/kextd/Makefile +++ b/osfmk/kextd/Makefile @@ -3,17 +3,10 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = - -INSTINC_SUBDIRS_I386 = - -EXPINC_SUBDIRS = - -EXPINC_SUBDIRS_I386 = - MIG_DEFS = kextd_mach.defs DATAFILES = ${MIG_DEFS} @@ -46,13 +39,13 @@ MIG_KUSRC = kextd_mach.c kextd_mach.h COMP_FILES = ${MIG_KUSRC} -${COMP_FILES} : kextd_mach.defs +do_build_all:: $(COMP_FILES) ${MIG_KUSRC} : kextd_mach.defs @echo MIG $@ $(_v)${MIG} ${MIGFLAGS} ${MIGKUFLAGS} \ - -user $*.c \ - -header $*.h \ + -user kextd_mach.c \ + -header kextd_mach.h \ -server /dev/null \ -sheader /dev/null \ $< diff --git a/osfmk/kperf/Makefile b/osfmk/kperf/Makefile index 9ede0b0f9..699416570 100644 --- a/osfmk/kperf/Makefile +++ b/osfmk/kperf/Makefile @@ -11,21 +11,21 @@ EXPORT_ONLY_FILES = \ context.h \ timetrigger.h \ pet.h \ - filter.h \ kperfbsd.h \ action.h \ kperf.h - -INSTALL_MI_LIST = +EXPORT_MI_DIR = kperf # Export our headers EXPORT_MI_LIST = ${EXPORT_ONLY_FILES} -# Don't install in non-local, though -INSTALL_KF_MI_LIST = "" +INSTALL_MI_DIR = kperf -EXPORT_MI_DIR = kperf +INSTALL_MI_LIST = $(empty) + +# Don't install in non-local, though +INSTALL_KF_MI_LIST = $(empty) include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/osfmk/kperf/action.c b/osfmk/kperf/action.c index 3f1b5e2b0..dbaf90b21 100644 --- a/osfmk/kperf/action.c +++ b/osfmk/kperf/action.c @@ -47,37 +47,45 @@ #include #include #include -#include #include #include #include #define ACTION_MAX 32 -/* XXX: callback handler from chudxnu */ -/* FIXME: hook this up to something */ -//void (*kperf_thread_ast_handler)(thread_t); - /* the list of different actions to take */ struct action { - unsigned sample; + uint32_t sample; + uint32_t userdata; + int pid_filter; }; /* the list of actions */ static unsigned actionc = 0; static struct action *actionv = NULL; +/* whether to record callstacks on kdebug events */ +static int kdebug_callstack_action = 0; + +/* whether we get a callback on a thread switch */ +int kperf_cswitch_hook = 0; + +/* indirect hooks to play nice with CHUD for the transition to kperf */ +kern_return_t chudxnu_kdebug_callback_enter(chudxnu_kdebug_callback_func_t fn); +kern_return_t chudxnu_kdebug_callback_cancel(void); /* Do the real work! */ /* this can be called in any context ... right? */ static kern_return_t kperf_sample_internal( struct kperf_sample *sbuf, - struct kperf_context *context, - unsigned sample_what, boolean_t pend_user ) + struct kperf_context *context, + unsigned sample_what, unsigned sample_flags, + unsigned actionid ) { boolean_t enabled; int did_ucallstack = 0, did_tinfo_extra = 0; + uint32_t userdata; /* not much point continuing here, but what to do ? return * Shutdown? cut a tracepoint and continue? @@ -87,6 +95,11 @@ kperf_sample_internal( struct kperf_sample *sbuf, int is_kernel = (context->cur_pid == 0); + sbuf->kcallstack.nframes = 0; + sbuf->kcallstack.flags = CALLSTACK_VALID; + sbuf->ucallstack.nframes = 0; + sbuf->ucallstack.flags = CALLSTACK_VALID; + /* an event occurred. Sample everything and dump it in a * buffer. */ @@ -95,20 +108,20 @@ kperf_sample_internal( struct kperf_sample *sbuf, if( sample_what & SAMPLER_TINFO ) { kperf_threadinfo_sample( &sbuf->threadinfo, context ); - /* XXX FIXME This drops events when the thread is idle. - * This should be configurable. */ - if (sbuf->threadinfo.runmode & 0x40) - return SAMPLE_CONTINUE; + /* See if we should drop idle thread samples */ + if( !(sample_flags & SAMPLE_FLAG_IDLE_THREADS) ) + if (sbuf->threadinfo.runmode & 0x40) + return SAMPLE_CONTINUE; } - if( sample_what & SAMPLER_KSTACK ) + if( (sample_what & SAMPLER_KSTACK) && !(sample_flags & SAMPLE_FLAG_EMPTY_CALLSTACK) ) kperf_kcallstack_sample( &sbuf->kcallstack, context ); /* sensitive ones */ if ( !is_kernel ) { - if( pend_user ) + if( sample_flags & SAMPLE_FLAG_PEND_USER ) { - if( sample_what & SAMPLER_USTACK ) + if( (sample_what & SAMPLER_USTACK) && !(sample_flags & SAMPLE_FLAG_EMPTY_CALLSTACK) ) did_ucallstack = kperf_ucallstack_pend( context ); if( sample_what & SAMPLER_TINFOEX ) @@ -116,7 +129,7 @@ kperf_sample_internal( struct kperf_sample *sbuf, } else { - if( sample_what & SAMPLER_USTACK ) + if( (sample_what & SAMPLER_USTACK) && !(sample_flags & SAMPLE_FLAG_EMPTY_CALLSTACK) ) kperf_ucallstack_sample( &sbuf->ucallstack, context ); if( sample_what & SAMPLER_TINFOEX ) @@ -125,13 +138,26 @@ kperf_sample_internal( struct kperf_sample *sbuf, } } +#if KPC + if ( sample_what & SAMPLER_PMC_CPU ) + kperf_kpc_cpu_sample( &sbuf->kpcdata, + (sample_what & SAMPLER_PMC_CPU) != 0 ); +#endif + + /* lookup the user tag, if any */ + if( actionid + && (actionid <= actionc) ) + userdata = actionv[actionid-1].userdata; + else + userdata = actionid; + /* stash the data into the buffer * interrupts off to ensure we don't get split */ enabled = ml_set_interrupts_enabled(FALSE); - if ( pend_user ) - BUF_DATA1( PERF_GEN_EVENT | DBG_FUNC_START, sample_what ); + BUF_DATA( PERF_GEN_EVENT | DBG_FUNC_START, sample_what, + actionid, userdata, sample_flags ); /* dump threadinfo */ if( sample_what & SAMPLER_TINFO ) @@ -144,7 +170,7 @@ kperf_sample_internal( struct kperf_sample *sbuf, /* dump user stuff */ if ( !is_kernel ) { - if ( pend_user ) + if ( sample_flags & SAMPLE_FLAG_PEND_USER ) { if ( did_ucallstack ) BUF_INFO1( PERF_CS_UPEND, 0 ); @@ -162,8 +188,13 @@ kperf_sample_internal( struct kperf_sample *sbuf, } } - if ( pend_user ) - BUF_DATA1( PERF_GEN_EVENT | DBG_FUNC_END, sample_what ); +#if KPC + if ( sample_what & SAMPLER_PMC_CPU ) + kperf_kpc_cpu_log( &sbuf->kpcdata ); + +#endif + + BUF_DATA1( PERF_GEN_EVENT | DBG_FUNC_END, sample_what ); /* intrs back on */ ml_set_interrupts_enabled(enabled); @@ -175,23 +206,29 @@ kperf_sample_internal( struct kperf_sample *sbuf, kern_return_t kperf_sample( struct kperf_sample *sbuf, struct kperf_context *context, - unsigned actionid, boolean_t pend_user ) + unsigned actionid, unsigned sample_flags ) { unsigned sample_what = 0; - - /* check samppling is on, or panic */ - if( kperf_sampling_status() == KPERF_SAMPLING_OFF ) - panic("trigger fired while sampling off"); - else if( kperf_sampling_status() == KPERF_SAMPLING_SHUTDOWN ) - return SAMPLE_SHUTDOWN; + int pid_filter; /* work out what to sample, if anything */ - if( actionid >= actionc ) + if( (actionid > actionc) || (actionid == 0) ) return SAMPLE_SHUTDOWN; - sample_what = actionv[actionid].sample; + /* check the pid filter against the context's current pid. + * filter pid == -1 means any pid + */ + pid_filter = actionv[actionid-1].pid_filter; + if( (pid_filter != -1) + && (pid_filter != context->cur_pid) ) + return SAMPLE_CONTINUE; + + /* the samplers to run */ + sample_what = actionv[actionid-1].sample; - return kperf_sample_internal( sbuf, context, sample_what, pend_user ); + /* do the actual sample operation */ + return kperf_sample_internal( sbuf, context, sample_what, + sample_flags, actionid ); } /* ast callback on a thread */ @@ -204,20 +241,11 @@ kperf_thread_ast_handler( thread_t thread ) /* we know we're on a thread, so let's do stuff */ task_t task = NULL; - /* Don't sample if we are shutting down or off */ - if( kperf_sampling_status() != KPERF_SAMPLING_ON ) - return; - BUF_INFO1(PERF_AST_HNDLR | DBG_FUNC_START, thread); - /* FIXME: probably want a faster allocator here... :P */ - struct kperf_sample *sbuf = kalloc( sizeof(*sbuf) ); - if( sbuf == NULL ) - { - /* FIXME: error code */ - BUF_INFO1( PERF_AST_ERROR, 0 ); - goto error; - } + /* use ~2kb of the stack for the sample, should be ok since we're in the ast */ + struct kperf_sample sbuf; + bzero(&sbuf, sizeof(struct kperf_sample)); /* make a context, take a sample */ struct kperf_context ctx; @@ -235,17 +263,15 @@ kperf_thread_ast_handler( thread_t thread ) sample_what |= SAMPLER_TINFOEX; if (t_chud & T_AST_CALLSTACK) + { sample_what |= SAMPLER_USTACK; + sample_what |= SAMPLER_TINFO; + } /* do the sample, just of the user stuff */ - r = kperf_sample_internal( sbuf, &ctx, sample_what, FALSE ); - - /* free it again */ - kfree( sbuf, sizeof(*sbuf) ); + r = kperf_sample_internal( &sbuf, &ctx, sample_what, 0, 0 ); -error: BUF_INFO1(PERF_AST_HNDLR | DBG_FUNC_END, r); - } /* register AST bits */ @@ -281,6 +307,132 @@ kperf_ast_pend( thread_t cur_thread, uint32_t check_bits, // BUF_INFO3( dbg_code, (uintptr_t)cur_thread, t_chud, set_done ); } +/* + * kdebug callback & stack management + */ + +#define IS_END(debugid) ((debugid & 3) == DBG_FUNC_END) +#define IS_MIG(debugid) (IS_END(debugid) && ((debugid & 0xff000000U) == KDBG_CLASS_ENCODE((unsigned)DBG_MIG, 0U))) +#define IS_MACH_SYSCALL(debugid) (IS_END(debugid) && (KDBG_CLASS_DECODE(debugid) == KDBG_CLASS_ENCODE(DBG_MACH, DBG_MACH_EXCP_SC))) +#define IS_VM_FAULT(debugid) (IS_END(debugid) && (KDBG_CLASS_DECODE(debugid) == KDBG_CLASS_ENCODE(DBG_MACH, DBG_MACH_VM))) +#define IS_BSD_SYSCTLL(debugid) (IS_END(debugid) && (KDBG_CLASS_DECODE(debugid) == KDBG_CLASS_ENCODE(DBG_BSD, DBG_BSD_EXCP_SC))) +#define IS_APPS_SIGNPOST(debugid) (IS_END(debugid) && (KDBG_CLASS_DECODE(debugid) == KDBG_CLASS_ENCODE(DBG_APPS, DBG_MACH_CHUD))) +#define IS_MACH_SIGNPOST(debugid) (IS_END(debugid) && (KDBG_CLASS_DECODE(debugid) == KDBG_CLASS_ENCODE(DBG_MACH, DBG_MACH_CHUD))) + +void +kperf_kdebug_callback(uint32_t debugid) +{ + int cur_pid = 0; + task_t task = NULL; + + /* if we're not doing kperf callback stacks, return */ + if( !kdebug_callstack_action ) + return; + + /* if we're looking at a kperf tracepoint, don't recurse */ + if( (debugid & 0xff000000) == KDBG_CLASS_ENCODE(DBG_PERF, 0) ) + return; + + /* ensure interrupts are already off thanks to kdebug */ + if( ml_get_interrupts_enabled() ) + return; + + /* make sure we're not being called recursively. */ +#if NOTYET + if( kperf_kdbg_recurse(KPERF_RECURSE_IN) ) + return; +#endif + + /* check the happy list of trace codes */ + if( !( IS_MIG(debugid) + || IS_MACH_SYSCALL(debugid) + || IS_VM_FAULT(debugid) + || IS_BSD_SYSCTLL(debugid) + || IS_MACH_SIGNPOST(debugid) + || IS_APPS_SIGNPOST(debugid) ) ) + return; + + /* check for kernel */ + thread_t thread = chudxnu_current_thread(); + task = chudxnu_task_for_thread(thread); + if(task) + cur_pid = chudxnu_pid_for_task(task); + if( !cur_pid ) + return; + +#if NOTYET + /* setup a context */ + struct kperf_context ctx; + struct kperf_sample *intbuf = NULL; + + ctx.cur_thread = thread; + ctx.cur_pid = cur_pid; + ctx.trigger_type = TRIGGER_TYPE_TRACE; + ctx.trigger_id = 0; + + /* CPU sample buffer -- only valid with interrupts off (above) + * Technically this isn't true -- tracepoints can, and often + * are, cut from interrupt handlers, but none of those tracepoints + * should make it this far. + */ + intbuf = kperf_intr_sample_buffer(); + + /* do the sample */ + kperf_sample( intbuf, &ctx, kdebug_callstack_action, SAMPLE_FLAG_PEND_USER ); + + /* no longer recursive */ + kperf_kdbg_recurse(KPERF_RECURSE_OUT); +#else + /* dicing with death */ + BUF_INFO2(PERF_KDBG_HNDLR, debugid, cur_pid); + + /* pend the AST */ + kperf_ast_pend( thread, T_AST_CALLSTACK, T_AST_CALLSTACK ); +#endif + +} + +int +kperf_kdbg_get_stacks(void) +{ + return kdebug_callstack_action; +} + +int +kperf_kdbg_set_stacks(int newval) +{ + /* set the value */ + kdebug_callstack_action = newval; + + /* enable the callback from kdebug */ + if( newval ) + chudxnu_kdebug_callback_enter(NULL); + else + chudxnu_kdebug_callback_cancel(); + + return 0; +} + +/* + * Thread switch + */ + +/* called from context switch handler */ +void +kperf_switch_context( __unused thread_t old, thread_t new ) +{ + task_t task = get_threadtask(new); + int pid = chudxnu_pid_for_task(task); + + /* cut a tracepoint to tell us what the new thread's PID is + * for Instruments + */ + BUF_DATA2( PERF_TI_CSWITCH, thread_tid(new), pid ); +} + +/* + * Action configuration + */ unsigned kperf_action_get_count(void) { @@ -290,10 +442,10 @@ kperf_action_get_count(void) int kperf_action_set_samplers( unsigned actionid, uint32_t samplers ) { - if( actionid >= actionc ) + if( (actionid > actionc) || (actionid == 0) ) return EINVAL; - actionv[actionid].sample = samplers; + actionv[actionid-1].sample = samplers; return 0; } @@ -301,10 +453,65 @@ kperf_action_set_samplers( unsigned actionid, uint32_t samplers ) int kperf_action_get_samplers( unsigned actionid, uint32_t *samplers_out ) { - if( actionid >= actionc ) + if( (actionid > actionc) ) return EINVAL; - *samplers_out = actionv[actionid].sample; + if( actionid == 0 ) + *samplers_out = 0; /* "NULL" action */ + else + *samplers_out = actionv[actionid-1].sample; + + return 0; +} + +int +kperf_action_set_userdata( unsigned actionid, uint32_t userdata ) +{ + if( (actionid > actionc) || (actionid == 0) ) + return EINVAL; + + actionv[actionid-1].userdata = userdata; + + return 0; +} + +int +kperf_action_get_userdata( unsigned actionid, uint32_t *userdata_out ) +{ + if( (actionid > actionc) ) + return EINVAL; + + if( actionid == 0 ) + *userdata_out = 0; /* "NULL" action */ + else + *userdata_out = actionv[actionid-1].userdata; + + return 0; +} + +int +kperf_action_set_filter( unsigned actionid, + int pid ) +{ + if( (actionid > actionc) || (actionid == 0) ) + return EINVAL; + + actionv[actionid-1].pid_filter = pid; + + return 0; +} + +int +kperf_action_get_filter( unsigned actionid, + int *pid_out ) +{ + if( (actionid > actionc) ) + return EINVAL; + + if( actionid == 0 ) + *pid_out = -1; /* "NULL" action */ + else + *pid_out = actionv[actionid-1].pid_filter; return 0; } @@ -313,7 +520,7 @@ int kperf_action_set_count(unsigned count) { struct action *new_actionv = NULL, *old_actionv = NULL; - unsigned old_count; + unsigned old_count, i; /* easy no-op */ if( count == actionc ) @@ -352,13 +559,14 @@ kperf_action_set_count(unsigned count) bzero( &new_actionv[actionc], (count - old_count) * sizeof(*actionv) ); + for( i = old_count; i < count; i++ ) + new_actionv[i].pid_filter = -1; + actionv = new_actionv; actionc = count; if( old_actionv != NULL ) kfree( old_actionv, old_count * sizeof(*actionv) ); - printf( "kperf: done the alloc\n" ); - return 0; } diff --git a/osfmk/kperf/action.h b/osfmk/kperf/action.h index 71e91df5f..1ea3f3169 100644 --- a/osfmk/kperf/action.h +++ b/osfmk/kperf/action.h @@ -32,16 +32,24 @@ struct kperf_context; /* bits for defining what to do on an action */ -#define SAMPLER_TINFO (1<<0) -#define SAMPLER_TINFOEX (1<<1) -#define SAMPLER_KSTACK (1<<2) -#define SAMPLER_USTACK (1<<3) +#define SAMPLER_TINFO (1<<0) +#define SAMPLER_TINFOEX (1<<1) +#define SAMPLER_KSTACK (1<<2) +#define SAMPLER_USTACK (1<<3) +#define SAMPLER_PMC_THREAD (1<<4) /* FIXME: not implemented */ +#define SAMPLER_PMC_CPU (1<<5) +#define SAMPLER_PMC_CONFIG (1<<6) + +/* flags for sample calls*/ +#define SAMPLE_FLAG_PEND_USER (1<<0) +#define SAMPLE_FLAG_IDLE_THREADS (1<<1) +#define SAMPLE_FLAG_EMPTY_CALLSTACK (1<<2) /* Take a sample into "sbuf" using current thread "cur_thread" */ extern kern_return_t kperf_sample( struct kperf_sample *sbuf, struct kperf_context*, unsigned actionid, - boolean_t pend_user ); + unsigned sample_flags ); /* return codes from taking a sample * either keep trigger, or something went wrong (or we're shutting down) @@ -49,6 +57,7 @@ extern kern_return_t kperf_sample( struct kperf_sample *sbuf, */ #define SAMPLE_CONTINUE (0) #define SAMPLE_SHUTDOWN (1) +#define SAMPLE_OFF (2) /* Get the sample buffer to use from interrupt handler context. Only * valid in interrupt contexts. @@ -64,5 +73,12 @@ extern int kperf_action_set_samplers( unsigned actionid, extern int kperf_action_get_samplers( unsigned actionid, uint32_t *samplers_out ); -extern void -kperf_thread_ast_handler( thread_t thread ); +extern int kperf_action_set_userdata( unsigned actionid, + uint32_t userdata ); +extern int kperf_action_get_userdata( unsigned actionid, + uint32_t *userdata_out ); + +extern int kperf_action_set_filter( unsigned actionid, + int pid ); +extern int kperf_action_get_filter( unsigned actionid, + int *pid_out ); diff --git a/osfmk/kperf/buffer.h b/osfmk/kperf/buffer.h index 0bb0f0970..28673a5a9 100644 --- a/osfmk/kperf/buffer.h +++ b/osfmk/kperf/buffer.h @@ -39,7 +39,9 @@ #define PERF_CALLSTACK (2) #define PERF_TIMER (3) #define PERF_PET (4) -#define PERF_AST (5) /* not confusing at all */ +#define PERF_AST (5) +#define PERF_KPC (6) +#define PERF_KDBG (7) /* sub-class codes */ #define PERF_GEN_CODE(code) PERF_CODE(PERF_GENERIC, code) @@ -51,6 +53,7 @@ #define PERF_TI_XSAMPLE PERF_TI_CODE(2) #define PERF_TI_XPEND PERF_TI_CODE(3) #define PERF_TI_XDATA PERF_TI_CODE(4) +#define PERF_TI_CSWITCH PERF_TI_CODE(5) #define PERF_CS_CODE(code) PERF_CODE(PERF_CALLSTACK, code) #define PERF_CS_KSAMPLE PERF_CS_CODE(0) @@ -58,6 +61,9 @@ #define PERF_CS_USAMPLE PERF_CS_CODE(2) #define PERF_CS_KDATA PERF_CS_CODE(3) #define PERF_CS_UDATA PERF_CS_CODE(4) +#define PERF_CS_KHDR PERF_CS_CODE(5) +#define PERF_CS_UHDR PERF_CS_CODE(6) +#define PERF_CS_ERROR PERF_CS_CODE(7) #define PERF_TM_CODE(code) PERF_CODE(PERF_TIMER, code) #define PERF_TM_ASCHED PERF_TM_CODE(0) @@ -71,10 +77,25 @@ #define PERF_PET_PAUSE PERF_PET_CODE(3) #define PERF_PET_IDLE PERF_PET_CODE(4) #define PERF_PET_SAMPLE PERF_PET_CODE(5) +#define PERF_PET_SCHED PERF_PET_CODE(6) +#define PERF_PET_END PERF_PET_CODE(7) #define PERF_AST_CODE(code) PERF_CODE(PERF_AST, code) -#define PERF_AST_HNDLR PERF_TM_CODE(0) -#define PERF_AST_ERROR PERF_PET_CODE(1) +#define PERF_AST_HNDLR PERF_AST_CODE(0) +#define PERF_AST_ERROR PERF_AST_CODE(1) + +#define PERF_KPC_CODE(code) PERF_CODE(PERF_KPC, code) +#define PERF_KPC_HNDLR PERF_KPC_CODE(0) +#define PERF_KPC_FCOUNTER PERF_KPC_CODE(1) +#define PERF_KPC_COUNTER PERF_KPC_CODE(2) +#define PERF_KPC_DATA PERF_KPC_CODE(3) +#define PERF_KPC_CONFIG PERF_KPC_CODE(4) +#define PERF_KPC_CFG_REG PERF_KPC_CODE(5) +#define PERF_KPC_DATA32 PERF_KPC_CODE(6) +#define PERF_KPC_CFG_REG32 PERF_KPC_CODE(7) + +#define PERF_KDBG_CODE(code) PERF_CODE(PERF_KDBG, code) +#define PERF_KDBG_HNDLR PERF_KDBG_CODE(0) /* error sub-codes for trace data */ enum @@ -87,17 +108,23 @@ enum ERR_NOMEM, }; +/* level of trace debug */ +#define KPERF_DEBUG_DATA 0 +#define KPERF_DEBUG_INFO 1 +#define KPERF_DEBUG_VERBOSE 2 +extern int kperf_debug_level; + /* for logging information / debugging -- optional */ -#define BUF_INFO( id, a0, a1, a2, a3) KERNEL_DEBUG_CONSTANT(id,a0,a1,a2,a3,0) +#define BUF_INFO( id, a0, a1, a2, a3) if (kperf_debug_level >= KPERF_DEBUG_INFO) KERNEL_DEBUG_CONSTANT_IST(~KDEBUG_ENABLE_PPT, id,a0,a1,a2,a3,0) #define BUF_INFO1( id, a0 ) BUF_INFO(id, a0, 0, 0, 0 ) #define BUF_INFO2( id, a0, a1 ) BUF_INFO(id, a0, a1, 0, 0 ) #define BUF_INFO3( id, a0, a1, a2 ) BUF_INFO(id, a0, a1, a2, 0 ) /* for logging actual data -- never compiled out */ -#define BUF_DATA( id, a0, a1, a2, a3) KERNEL_DEBUG_CONSTANT(id,a0,a1,a2,a3,0) +#define BUF_DATA( id, a0, a1, a2, a3) KERNEL_DEBUG_CONSTANT_IST(~KDEBUG_ENABLE_PPT, id,a0,a1,a2,a3,0) /* code neatness */ #define BUF_DATA1( id, a0 ) BUF_DATA(id, a0, 0, 0, 0 ) #define BUF_DATA2( id, a0, a1 ) BUF_DATA(id, a0, a1, 0, 0 ) -#define BUF_DATA3( id, a0, a1, a3 ) BUF_DATA(id, a0, a1, a2, a3 ) +#define BUF_DATA3( id, a0, a1, a2 ) BUF_DATA(id, a0, a1, a2, 0 ) diff --git a/osfmk/kperf/callstack.c b/osfmk/kperf/callstack.c index d0c1e3947..4bedab65b 100644 --- a/osfmk/kperf/callstack.c +++ b/osfmk/kperf/callstack.c @@ -71,10 +71,10 @@ callstack_sample( struct callstack *cs, /* collect the callstack */ nframes = MAX_CALLSTACK_FRAMES; - kr = chudxnu_thread_get_callstack64( context->cur_thread, - cs->frames, - &nframes, - is_user ); + kr = chudxnu_thread_get_callstack64_kperf( context->cur_thread, + cs->frames, + &nframes, + is_user ); /* check for overflow */ if( kr == KERN_SUCCESS ) @@ -91,14 +91,14 @@ callstack_sample( struct callstack *cs, } else { - BUF_INFO2(PERF_PET_ERROR, ERR_GETSTACK, kr); + BUF_INFO2(PERF_CS_ERROR, ERR_GETSTACK, kr); cs->nframes = 0; } if( cs->nframes >= MAX_CALLSTACK_FRAMES ) { /* necessary? */ - BUF_INFO1(PERF_PET_ERROR, ERR_FRAMES); + BUF_INFO1(PERF_CS_ERROR, ERR_FRAMES); cs->nframes = 0; } @@ -117,12 +117,12 @@ kperf_ucallstack_sample( struct callstack *cs, struct kperf_context *context ) } static void -callstack_log( struct callstack *cs, uint32_t code ) +callstack_log( struct callstack *cs, uint32_t hcode, uint32_t dcode ) { unsigned int i, j, n, of = 4; /* Header on the stack */ - BUF_DATA2( code, cs->flags, cs->nframes ); + BUF_DATA2( hcode, cs->flags, cs->nframes ); /* look for how many batches of 4 */ n = cs->nframes / 4; @@ -135,7 +135,7 @@ callstack_log( struct callstack *cs, uint32_t code ) { #define SCRUB_FRAME(x) (((x)nframes)?cs->frames[x]:0) j = i * 4; - BUF_DATA ( code, + BUF_DATA ( dcode, SCRUB_FRAME(j+0), SCRUB_FRAME(j+1), SCRUB_FRAME(j+2), @@ -147,13 +147,13 @@ callstack_log( struct callstack *cs, uint32_t code ) void kperf_kcallstack_log( struct callstack *cs ) { - callstack_log( cs, PERF_CS_KDATA ); + callstack_log( cs, PERF_CS_KHDR, PERF_CS_KDATA ); } void kperf_ucallstack_log( struct callstack *cs ) { - callstack_log( cs, PERF_CS_UDATA ); + callstack_log( cs, PERF_CS_UHDR, PERF_CS_UDATA ); } int diff --git a/osfmk/kperf/filter.c b/osfmk/kperf/filter.c deleted file mode 100644 index 1485d7472..000000000 --- a/osfmk/kperf/filter.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2011 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -/* Toy filtering. Allow system-wide or filtering on 4 PIDs */ - -#include -#include /* NULL */ -// #include - -#include -#include - -// Filter params... dodge for now -#define NPIDS (4) -int pid_list[NPIDS]; - -// function to determine whether we should take a sample -int -kperf_filter_should_sample(struct kperf_context *context) -{ - int i, restricted = 0; - - /* see if the pids are restricted */ - for( i = 0; i < NPIDS; i++ ) - { - if( context->cur_pid == pid_list[i] ) - return 1; - - if( pid_list[i] != -1 ) - restricted = 1; - } - - /* wasn't in the pid list, but something was */ - if( restricted ) - return 0; - - /* not fitered, sample it */ - return 1; -} - -/* check whether pid filtering is enabled */ -int -kperf_filter_on_pid(void) -{ - int i; - - for( i = 0; i < NPIDS; i++ ) - if( pid_list[i] != -1 ) - return 1; - - return 0; -} - -/* create a list of pids to filter */ -void -kperf_filter_pid_list( int *outcount, int **outv ) -{ - int i, found = 0; - - for( i = 0; i < NPIDS; i++ ) - if( pid_list[i] != -1 ) - found = 1; - - if( !found ) - { - *outcount = 0; - *outv = NULL; - return; - } - - /* just return our list */ - *outcount = NPIDS; - *outv = pid_list; -} - -/* free a list we created*/ -void -kperf_filter_free_pid_list( int *incount, int **inv ) -{ - // no op - (void) incount; - (void) inv; -} - -/* init the filters to nothing */ -void -kperf_filter_init(void) -{ - int i; - for( i = 0; i < NPIDS; i++ ) - pid_list[i] = -1; -} diff --git a/osfmk/kperf/kperf.c b/osfmk/kperf/kperf.c index 2d6fc400e..b1d9d4cb4 100644 --- a/osfmk/kperf/kperf.c +++ b/osfmk/kperf/kperf.c @@ -31,22 +31,37 @@ #include #include -#include #include -#include #include #include #include #include +#include /* port_name_to_task */ + /** misc functions **/ #include /* XXX: should bust this out */ +/* thread on CPUs before starting the PET thread */ +thread_t *kperf_thread_on_cpus = NULL; + +/* interupt sample buffers -- one wired per CPU */ static struct kperf_sample *intr_samplev = NULL; static unsigned intr_samplec = 0; + +/* track recursion in the trace code */ +static struct +{ + int active; + int pad[64 / sizeof(int)]; +} *kpdbg_recursev; +static unsigned kpdbg_recursec = 0; + +/* Curren sampling status */ static unsigned sampling_status = KPERF_SAMPLING_OFF; -static unsigned kperf_initted = 0; +/* Make sure we only init once */ +static unsigned kperf_initted = 0; extern void (*chudxnu_thread_ast_handler)(thread_t); @@ -62,11 +77,53 @@ kperf_intr_sample_buffer(void) return &intr_samplev[ncpu]; } +int +kperf_kdbg_recurse(int step) +{ + unsigned ncpu = chudxnu_cpu_number(); + + // XXX: assert? + if( ncpu >= kpdbg_recursec ) + return 1; + + /* recursing in, available */ + if( (step > 0) + && (kpdbg_recursev[ncpu].active == 0) ) + { + kpdbg_recursev[ncpu].active = 1; + return 0; + } + + /* recursing in, unavailable */ + if( (step > 0) + && (kpdbg_recursev[ncpu].active != 0) ) + { + return 1; + } + + /* recursing out, unavailable */ + if( (step < 0) + && (kpdbg_recursev[ncpu].active != 0) ) + { + kpdbg_recursev[ncpu].active = 0; + return 0; + } + + /* recursing out, available */ + if( (step < 0) + && (kpdbg_recursev[ncpu].active == 0) ) + panic( "return from non-recursed kperf kdebug call" ); + + panic( "unknown kperf kdebug call" ); + return 1; +} + /* setup interrupt sample buffers */ int kperf_init(void) { unsigned ncpus = 0; + int err; if( kperf_initted ) return 0; @@ -74,51 +131,50 @@ kperf_init(void) /* get number of cpus */ ncpus = machine_info.logical_cpu_max; - /* make the CPU array + kperf_thread_on_cpus = kalloc( ncpus * sizeof(*kperf_thread_on_cpus) ); + if( kperf_thread_on_cpus == NULL ) + { + err = ENOMEM; + goto error; + } + + /* clear it */ + bzero( kperf_thread_on_cpus, ncpus * sizeof(*kperf_thread_on_cpus) ); + + /* make the CPU array * FIXME: cache alignment */ intr_samplev = kalloc( ncpus * sizeof(*intr_samplev)); + intr_samplec = ncpus; if( intr_samplev == NULL ) - return ENOMEM; + { + err = ENOMEM; + goto error; + } /* clear it */ bzero( intr_samplev, ncpus * sizeof(*intr_samplev) ); - - chudxnu_thread_ast_handler = kperf_thread_ast_handler; + + /* make the recursion array */ + kpdbg_recursev = kalloc( ncpus * sizeof(*kpdbg_recursev)); + kpdbg_recursec = ncpus; + + /* clear it */ + bzero( kpdbg_recursev, ncpus * sizeof(*kpdbg_recursev) ); /* we're done */ - intr_samplec = ncpus; kperf_initted = 1; return 0; +error: + if( intr_samplev ) + kfree( intr_samplev, ncpus * sizeof(*intr_samplev) ); + if( kperf_thread_on_cpus ) + kfree( kperf_thread_on_cpus, ncpus * sizeof(*kperf_thread_on_cpus) ); + return err; } - -/** kext start/stop functions **/ -kern_return_t kperf_start (kmod_info_t * ki, void * d); - -kern_return_t -kperf_start (kmod_info_t * ki, void * d) -{ - (void) ki; - (void) d; - - /* say hello */ - printf( "aprof: kext starting\n" ); - - /* register modules */ - // kperf_action_init(); - kperf_filter_init(); - kperf_pet_init(); - - /* register the sysctls */ - //kperf_register_profiling(); - - return KERN_SUCCESS; -} - - /* random misc-ish functions */ uint32_t kperf_get_thread_bits( thread_t thread ) @@ -192,3 +248,25 @@ kperf_sampling_disable(void) return 0; } + +int +kperf_port_to_pid(mach_port_name_t portname) +{ + task_t task; + int pid; + + if( !MACH_PORT_VALID(portname) ) + return -1; + + task = port_name_to_task(portname); + + if( task == TASK_NULL ) + return -1; + + + pid = chudxnu_pid_for_task(task); + + task_deallocate_internal(task); + + return pid; +} diff --git a/osfmk/kperf/kperf.h b/osfmk/kperf/kperf.h index 1e1ab32cd..12b168acd 100644 --- a/osfmk/kperf/kperf.h +++ b/osfmk/kperf/kperf.h @@ -26,21 +26,64 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* XXX: still needed? just access directly? */ +#ifndef __KPERF_H__ +#define __KPERF_H__ +/* The various trigger types supported by kperf */ #define TRIGGER_TYPE_TIMER (0) #define TRIGGER_TYPE_PMI (1) #define TRIGGER_TYPE_TRACE (2) +/* Helpers to get and set AST bits on a thread */ extern uint32_t kperf_get_thread_bits( thread_t thread ); extern void kperf_set_thread_bits( thread_t thread, uint32_t bits ); extern void kperf_set_thread_ast( thread_t thread ); +/* Possible states of kperf sampling */ #define KPERF_SAMPLING_OFF 0 #define KPERF_SAMPLING_ON 1 #define KPERF_SAMPLING_SHUTDOWN 2 +/* Init kperf module. Must be called before use, can be called as many + * times as you like. + */ extern int kperf_init(void); + +/* Get and set sampling status */ extern unsigned kperf_sampling_status(void); extern int kperf_sampling_enable(void); extern int kperf_sampling_disable(void); + +/* kperf AST handler + */ +extern void kperf_thread_ast_handler( thread_t thread ); + +/* kperf kdebug callback + */ +extern void kperf_kdebug_callback(uint32_t debugid); + +/* get and set whether we're recording stacks on interesting kdebug events */ +extern int kperf_kdbg_get_stacks(void); +extern int kperf_kdbg_set_stacks(int); + +/* given a task port, find out its pid */ +int kperf_port_to_pid(mach_port_name_t portname); + +/* Check whether the current process has been blessed to allow access + * to kperf facilities. + */ +extern int kperf_access_check(void); + +/* track recursion on kdebug tracepoint tracking */ +extern int kperf_kdbg_recurse(int step); +#define KPERF_RECURSE_IN (1) +#define KPERF_RECURSE_out (-1) + +/* context switch tracking */ +extern int kperf_cswitch_hook; +extern void kperf_switch_context( thread_t old, thread_t new ); + +/* bootstrap */ +extern void kperf_bootstrap(void); + +#endif /* __KPERF_H__ */ diff --git a/osfmk/kperf/kperf_arch.h b/osfmk/kperf/kperf_arch.h index dd6e319c3..9019d6562 100644 --- a/osfmk/kperf/kperf_arch.h +++ b/osfmk/kperf/kperf_arch.h @@ -37,5 +37,6 @@ /* common definitions */ extern int kperf_mp_broadcast( void (*func)(void*), void *arg ); +extern int kperf_mp_signal(void); #endif /* _KPERF_ARCH_H */ diff --git a/osfmk/kperf/kperf_kpc.c b/osfmk/kperf/kperf_kpc.c new file mode 100644 index 000000000..fc474262f --- /dev/null +++ b/osfmk/kperf/kperf_kpc.c @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2013 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + + +/* Sample KPC data into kperf */ + +#include +#include /* thread_* */ +#include /* panic */ +// #include + +#include +#include + +#include +#include + +#include + +/* If we have kperf enabled, but not KPC */ +#if KPC + +void +kperf_kpc_cpu_sample( struct kpcdata *kpcd, int sample_config ) +{ + kpcd->running = kpc_get_running(); + kpcd->counterc = kpc_get_cpu_counters(0, kpcd->running, + &kpcd->curcpu, kpcd->counterv); + if( !sample_config ) + kpcd->configc = 0; + else + { + kpcd->configc = kpc_get_config_count(kpcd->running); + kpc_get_config(kpcd->running, kpcd->configv); + } + +} + +void +kperf_kpc_cpu_log( struct kpcdata *kpcd ) +{ + unsigned i; + + /* cut a config for instruments -- what's running and + * how many fixed counters there are + */ + BUF_DATA(PERF_KPC_CONFIG, + kpcd->running, + kpcd->counterc, + kpc_get_counter_count(KPC_CLASS_FIXED_MASK), + kpcd->configc); + +#if __LP64__ + /* config registers, if they were asked for */ + for (i = 0; i < ((kpcd->configc+3) / 4); i++) { + BUF_DATA( PERF_KPC_CFG_REG, + kpcd->configv[0 + i * 4], + kpcd->configv[1 + i * 4], + kpcd->configv[2 + i * 4], + kpcd->configv[3 + i * 4] ); + } + + /* and the actual data -- 64-bit trace entries */ + for (i = 0; i < ((kpcd->counterc+3) / 4); i++) { + BUF_DATA( PERF_KPC_DATA, + kpcd->counterv[0 + i * 4], + kpcd->counterv[1 + i * 4], + kpcd->counterv[2 + i * 4], + kpcd->counterv[3 + i * 4] ); + } + +#else + /* config registers, if requested */ + for (i = 0; i < ((kpcd->configc+1) / 2); i++) { + BUF_DATA( PERF_KPC_CFG_REG32, + (kpcd->configv[0 + i * 2] >> 32ULL), + kpcd->configv[0 + i * 2] & 0xffffffffULL, + (kpcd->configv[1 + i * 2] >> 32ULL), + kpcd->configv[1 + i * 2] & 0xffffffffULL ); + } + + /* and the actual data -- two counters per tracepoint */ + for (i = 0; i < ((kpcd->counterc+1) / 2); i++) { + BUF_DATA( PERF_KPC_DATA32, + (kpcd->counterv[0 + i * 2] >> 32ULL), + kpcd->counterv[0 + i * 2] & 0xffffffffULL, + (kpcd->counterv[1 + i * 2] >> 32ULL), + kpcd->counterv[1 + i * 2] & 0xffffffffULL ); + } +#endif +} + +#endif /* KPC */ diff --git a/osfmk/kperf/filter.h b/osfmk/kperf/kperf_kpc.h similarity index 75% rename from osfmk/kperf/filter.h rename to osfmk/kperf/kperf_kpc.h index 655c4fd30..91df04c0f 100644 --- a/osfmk/kperf/filter.h +++ b/osfmk/kperf/kperf_kpc.h @@ -26,14 +26,26 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -/* so we can pull this in without the context header... */ -struct kperf_context; +#ifndef __KPERF_KPC_H__ +#define __KPERF_KPC_H__ -extern void kperf_filter_init(void); -extern int kperf_filter_should_sample(struct kperf_context *context); +#if KPC +#include /* KPC_MAX_COUNTERS */ +#endif -extern int kperf_filter_on_pid(void); -extern void kperf_filter_pid_list( int *outcount, int **outv ); -extern void kperf_filter_free_pid_list( int *incount, int **inv ); +/* KPC sample data */ +struct kpcdata +{ + int curcpu; + uint32_t running; + uint32_t counterc; + uint64_t counterv[KPC_MAX_COUNTERS+1]; + uint32_t configc; + uint64_t configv[KPC_MAX_COUNTERS]; +}; -extern int pid_list[]; + +void kperf_kpc_cpu_sample( struct kpcdata *, int ); +void kperf_kpc_cpu_log( struct kpcdata * ); + +#endif /* __KPERF_KPC_H__ */ diff --git a/osfmk/kperf/kperfbsd.c b/osfmk/kperf/kperfbsd.c index 6e626e453..149b07093 100644 --- a/osfmk/kperf/kperfbsd.c +++ b/osfmk/kperf/kperfbsd.c @@ -32,23 +32,69 @@ #include #include #include +#include #include +#include +#include #include #include #include #include -#include #include #include + +/* a pid which is allowed to control kperf without requiring root access */ +static pid_t blessed_pid = -1; +static boolean_t blessed_preempt = FALSE; + +/* IDs for dispatch from SYSCTL macros */ #define REQ_SAMPLING (1) #define REQ_ACTION_COUNT (2) #define REQ_ACTION_SAMPLERS (3) #define REQ_TIMER_COUNT (4) #define REQ_TIMER_PERIOD (5) #define REQ_TIMER_PET (6) +#define REQ_TIMER_ACTION (7) +#define REQ_BLESS (8) +#define REQ_ACTION_USERDATA (9) +#define REQ_ACTION_FILTER_BY_TASK (10) +#define REQ_ACTION_FILTER_BY_PID (11) +#define REQ_KDBG_CALLSTACKS (12) +#define REQ_PET_IDLE_RATE (13) +#define REQ_BLESS_PREEMPT (14) + +/* simple state variables */ +int kperf_debug_level = 0; + +static lck_grp_attr_t *kperf_cfg_lckgrp_attr = NULL; +static lck_grp_t *kperf_cfg_lckgrp = NULL; +static lck_mtx_t kperf_cfg_lock; +static boolean_t kperf_cfg_initted = FALSE; + +/*************************** + * + * lock init + * + ***************************/ + +void +kperf_bootstrap(void) +{ + kperf_cfg_lckgrp_attr = lck_grp_attr_alloc_init(); + kperf_cfg_lckgrp = lck_grp_alloc_init("kperf cfg", + kperf_cfg_lckgrp_attr); + lck_mtx_init(&kperf_cfg_lock, kperf_cfg_lckgrp, LCK_ATTR_NULL); + + kperf_cfg_initted = TRUE; +} +/*************************** + * + * sysctl handlers + * + ***************************/ static int sysctl_timer_period( __unused struct sysctl_oid *oidp, struct sysctl_req *req ) @@ -60,22 +106,15 @@ sysctl_timer_period( __unused struct sysctl_oid *oidp, struct sysctl_req *req ) /* get 2x 64-bit words */ error = SYSCTL_IN( req, inputs, 2*sizeof(inputs[0]) ); if(error) - { - printf( "error in\n" ); return (error); - } /* setup inputs */ timer = (unsigned) inputs[0]; if( inputs[1] != ~0ULL ) set = 1; - printf( "%s timer: %u, inp[0] %llu\n", set ? "set" : "get", - timer, inputs[0] ); - if( set ) { - printf( "timer set period\n" ); error = kperf_timer_set_period( timer, inputs[1] ); if( error ) return error; @@ -83,20 +122,50 @@ sysctl_timer_period( __unused struct sysctl_oid *oidp, struct sysctl_req *req ) error = kperf_timer_get_period(timer, &retval); if(error) - { - printf( "error get period\n" ); return (error); - } inputs[1] = retval; if( error == 0 ) - { error = SYSCTL_OUT( req, inputs, 2*sizeof(inputs[0]) ); + + return error; +} + +static int +sysctl_timer_action( __unused struct sysctl_oid *oidp, struct sysctl_req *req ) +{ + int error = 0; + uint64_t inputs[2]; + uint32_t retval; + unsigned timer, set = 0; + + /* get 2x 64-bit words */ + error = SYSCTL_IN( req, inputs, 2*sizeof(inputs[0]) ); + if(error) + return (error); + + /* setup inputs */ + timer = (unsigned) inputs[0]; + if( inputs[1] != ~0ULL ) + set = 1; + + if( set ) + { + error = kperf_timer_set_action( timer, inputs[1] ); if( error ) - printf( "error out\n" ); + return error; } + error = kperf_timer_get_action(timer, &retval); + if(error) + return (error); + + inputs[1] = retval; + + if( error == 0 ) + error = SYSCTL_OUT( req, inputs, 2*sizeof(inputs[0]) ); + return error; } @@ -112,10 +181,7 @@ sysctl_action_samplers( __unused struct sysctl_oid *oidp, /* get 3x 64-bit words */ error = SYSCTL_IN( req, inputs, 3*sizeof(inputs[0]) ); if(error) - { - printf( "error in\n" ); return (error); - } /* setup inputs */ set = (unsigned) inputs[0]; @@ -128,25 +194,99 @@ sysctl_action_samplers( __unused struct sysctl_oid *oidp, return error; } - printf("set %d actionid %u samplers val %u\n", - set, actionid, (unsigned) inputs[2] ); - error = kperf_action_get_samplers(actionid, &retval); if(error) - { - printf( "error get samplers\n" ); return (error); - } inputs[2] = retval; if( error == 0 ) + error = SYSCTL_OUT( req, inputs, 3*sizeof(inputs[0]) ); + + return error; +} + +static int +sysctl_action_userdata( __unused struct sysctl_oid *oidp, + struct sysctl_req *req ) +{ + int error = 0; + uint64_t inputs[3]; + uint32_t retval; + unsigned actionid, set = 0; + + /* get 3x 64-bit words */ + error = SYSCTL_IN( req, inputs, 3*sizeof(inputs[0]) ); + if(error) + return (error); + + /* setup inputs */ + set = (unsigned) inputs[0]; + actionid = (unsigned) inputs[1]; + + if( set ) { + error = kperf_action_set_userdata( actionid, inputs[2] ); + if( error ) + return error; + } + + error = kperf_action_get_userdata(actionid, &retval); + if(error) + return (error); + + inputs[2] = retval; + + if( error == 0 ) error = SYSCTL_OUT( req, inputs, 3*sizeof(inputs[0]) ); + + return error; +} + +static int +sysctl_action_filter( __unused struct sysctl_oid *oidp, + struct sysctl_req *req, int is_task_t ) +{ + int error = 0; + uint64_t inputs[3]; + int retval; + unsigned actionid, set = 0; + mach_port_name_t portname; + int pid; + + /* get 3x 64-bit words */ + error = SYSCTL_IN( req, inputs, 3*sizeof(inputs[0]) ); + if(error) + return (error); + + /* setup inputs */ + set = (unsigned) inputs[0]; + actionid = (unsigned) inputs[1]; + + if( set ) + { + if( is_task_t ) + { + portname = (mach_port_name_t) inputs[2]; + pid = kperf_port_to_pid(portname); + } + else + pid = (int) inputs[2]; + + error = kperf_action_set_filter( actionid, pid ); if( error ) - printf( "error out\n" ); + return error; } + error = kperf_action_get_filter(actionid, &retval); + if(error) + return (error); + + inputs[2] = retval; + + if( error == 0 ) + error = SYSCTL_OUT( req, inputs, 3*sizeof(inputs[0]) ); + return error; } @@ -164,8 +304,6 @@ sysctl_sampling( struct sysctl_oid *oidp, struct sysctl_req *req ) if (error || !req->newptr) return (error); - printf( "setting sampling to %d\n", value ); - /* if that worked, and we're writing... */ if( value ) error = kperf_sampling_enable(); @@ -189,8 +327,6 @@ sysctl_action_count( struct sysctl_oid *oidp, struct sysctl_req *req ) if (error || !req->newptr) return (error); - printf( "setting action count to %d\n", value ); - /* if that worked, and we're writing... */ return kperf_action_set_count(value); } @@ -209,8 +345,6 @@ sysctl_timer_count( struct sysctl_oid *oidp, struct sysctl_req *req ) if (error || !req->newptr) return (error); - printf( "setting timer count to %d\n", value ); - /* if that worked, and we're writing... */ return kperf_timer_set_count(value); } @@ -229,12 +363,91 @@ sysctl_timer_pet( struct sysctl_oid *oidp, struct sysctl_req *req ) if (error || !req->newptr) return (error); - printf( "setting timer petid to %d\n", value ); - /* if that worked, and we're writing... */ return kperf_timer_set_petid(value); } +static int +sysctl_bless( struct sysctl_oid *oidp, struct sysctl_req *req ) +{ + int error = 0; + int value = 0; + + /* get the old value and process it */ + value = blessed_pid; + + /* copy out the old value, get the new value */ + error = sysctl_handle_int(oidp, &value, 0, req); + if (error || !req->newptr) + return (error); + + /* if that worked, and we're writing... */ + error = kperf_bless_pid(value); + + return error; +} + +static int +sysctl_bless_preempt( struct sysctl_oid *oidp, struct sysctl_req *req ) +{ + int error = 0; + int value = 0; + + /* get the old value and process it */ + value = blessed_preempt; + + /* copy out the old value, get the new value */ + error = sysctl_handle_int(oidp, &value, 0, req); + if (error || !req->newptr) + return (error); + + /* if that worked, and we're writing... */ + blessed_preempt = value ? TRUE : FALSE; + + return 0; +} + + +static int +sysctl_kdbg_callstacks( struct sysctl_oid *oidp, struct sysctl_req *req ) +{ + int error = 0; + int value = 0; + + /* get the old value and process it */ + value = kperf_kdbg_get_stacks(); + + /* copy out the old value, get the new value */ + error = sysctl_handle_int(oidp, &value, 0, req); + if (error || !req->newptr) + return (error); + + /* if that worked, and we're writing... */ + error = kperf_kdbg_set_stacks(value); + + return error; +} + +static int +sysctl_pet_idle_rate( struct sysctl_oid *oidp, struct sysctl_req *req ) +{ + int error = 0; + int value = 0; + + /* get the old value and process it */ + value = kperf_get_pet_idle_rate(); + + /* copy out the old value, get the new value */ + error = sysctl_handle_int(oidp, &value, 0, req); + if (error || !req->newptr) + return (error); + + /* if that worked, and we're writing... */ + kperf_set_pet_idle_rate(value); + + return error; +} + /* * #define SYSCTL_HANDLER_ARGS (struct sysctl_oid *oidp, \ * void *arg1, int arg2, \ @@ -243,36 +456,183 @@ sysctl_timer_pet( struct sysctl_oid *oidp, struct sysctl_req *req ) static int kperf_sysctl SYSCTL_HANDLER_ARGS { + int ret; + // __unused struct sysctl_oid *unused_oidp = oidp; (void)arg2; - + + if ( !kperf_cfg_initted ) + panic("kperf_bootstrap not called"); + + ret = kperf_access_check(); + if (ret) { + return ret; + } + + lck_mtx_lock(&kperf_cfg_lock); + /* which request */ switch( (uintptr_t) arg1 ) { case REQ_ACTION_COUNT: - return sysctl_action_count( oidp, req ); + ret = sysctl_action_count( oidp, req ); + break; case REQ_ACTION_SAMPLERS: - return sysctl_action_samplers( oidp, req ); + ret = sysctl_action_samplers( oidp, req ); + break; + case REQ_ACTION_USERDATA: + ret = sysctl_action_userdata( oidp, req ); + break; case REQ_TIMER_COUNT: - return sysctl_timer_count( oidp, req ); + ret = sysctl_timer_count( oidp, req ); + break; case REQ_TIMER_PERIOD: - return sysctl_timer_period( oidp, req ); + ret = sysctl_timer_period( oidp, req ); + break; case REQ_TIMER_PET: - return sysctl_timer_pet( oidp, req ); + ret = sysctl_timer_pet( oidp, req ); + break; + case REQ_TIMER_ACTION: + ret = sysctl_timer_action( oidp, req ); + break; case REQ_SAMPLING: - return sysctl_sampling( oidp, req ); - -#if 0 - case REQ_TIMER: - return sysctl_timer_period( req ); - case REQ_PET: - return sysctl_pet_period( req ); -#endif + ret = sysctl_sampling( oidp, req ); + break; + case REQ_KDBG_CALLSTACKS: + ret = sysctl_kdbg_callstacks( oidp, req ); + break; + case REQ_ACTION_FILTER_BY_TASK: + ret = sysctl_action_filter( oidp, req, 1 ); + break; + case REQ_ACTION_FILTER_BY_PID: + ret = sysctl_action_filter( oidp, req, 0 ); + break; + case REQ_PET_IDLE_RATE: + ret = sysctl_pet_idle_rate( oidp, req ); + break; + case REQ_BLESS_PREEMPT: + ret = sysctl_bless_preempt( oidp, req ); + break; default: - return ENOENT; + ret = ENOENT; + break; + } + + lck_mtx_unlock(&kperf_cfg_lock); + + return ret; +} + +static int +kperf_sysctl_bless_handler SYSCTL_HANDLER_ARGS +{ + int ret; + // __unused struct sysctl_oid *unused_oidp = oidp; + (void)arg2; + + if ( !kperf_cfg_initted ) + panic("kperf_bootstrap not called"); + + lck_mtx_lock(&kperf_cfg_lock); + + /* which request */ + if ( (uintptr_t) arg1 == REQ_BLESS ) + ret = sysctl_bless( oidp, req ); + else + ret = ENOENT; + + lck_mtx_unlock(&kperf_cfg_lock); + + return ret; +} + + +/*************************** + * + * Access control + * + ***************************/ + +/* Validate whether the current process has priviledges to access + * kperf (and by extension, trace). Returns 0 if access is granted. + */ +int +kperf_access_check(void) +{ + proc_t p = current_proc(); + proc_t blessed_p; + int ret = 0; + boolean_t pid_gone = FALSE; + + /* check if the pid that held the lock is gone */ + blessed_p = proc_find(blessed_pid); + + if ( blessed_p != NULL ) + proc_rele(blessed_p); + else + pid_gone = TRUE; + + if ( blessed_pid == -1 || pid_gone ) { + /* check for root */ + ret = suser(kauth_cred_get(), &p->p_acflag); + if( !ret ) + return ret; + } + + /* check against blessed pid */ + if( p->p_pid != blessed_pid ) + return EACCES; + + /* access granted. */ + return 0; +} + +/* specify a pid as being able to access kperf/trace, depiste not + * being root + */ +int +kperf_bless_pid(pid_t newpid) +{ + proc_t p = NULL; + pid_t current_pid; + + p = current_proc(); + current_pid = p->p_pid; + + /* are we allowed to preempt? */ + if ( (newpid != -1) && (blessed_pid != -1) && + (blessed_pid != current_pid) && !blessed_preempt ) { + /* check if the pid that held the lock is gone */ + p = proc_find(blessed_pid); + + if ( p != NULL ) { + proc_rele(p); + return EACCES; + } + } + + /* validate new pid */ + if ( newpid != -1 ) { + p = proc_find(newpid); + + if ( p == NULL ) + return EINVAL; + + proc_rele(p); } + + blessed_pid = newpid; + blessed_preempt = FALSE; + + return 0; } +/*************************** + * + * sysctl hooks + * + ***************************/ + /* root kperf node */ SYSCTL_NODE(, OID_AUTO, kperf, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "kperf"); @@ -292,6 +652,24 @@ SYSCTL_PROC(_kperf_action, OID_AUTO, samplers, 3*sizeof(uint64_t), kperf_sysctl, "UQ", "What to sample what a trigger fires an action"); +SYSCTL_PROC(_kperf_action, OID_AUTO, userdata, + CTLFLAG_RW|CTLFLAG_ANYBODY, + (void*)REQ_ACTION_USERDATA, + 3*sizeof(uint64_t), kperf_sysctl, "UQ", + "User data to attribute to action"); + +SYSCTL_PROC(_kperf_action, OID_AUTO, filter_by_task, + CTLFLAG_RW|CTLFLAG_ANYBODY, + (void*)REQ_ACTION_FILTER_BY_TASK, + 3*sizeof(uint64_t), kperf_sysctl, "UQ", + "Apply a task filter to the action"); + +SYSCTL_PROC(_kperf_action, OID_AUTO, filter_by_pid, + CTLFLAG_RW|CTLFLAG_ANYBODY, + (void*)REQ_ACTION_FILTER_BY_PID, + 3*sizeof(uint64_t), kperf_sysctl, "UQ", + "Apply a pid filter to the action"); + /* timer sub-section */ SYSCTL_NODE(_kperf, OID_AUTO, timer, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "timer"); @@ -306,6 +684,11 @@ SYSCTL_PROC(_kperf_timer, OID_AUTO, period, (void*)REQ_TIMER_PERIOD, 2*sizeof(uint64_t), kperf_sysctl, "UQ", "Timer number and period"); +SYSCTL_PROC(_kperf_timer, OID_AUTO, action, + CTLFLAG_RW|CTLFLAG_ANYBODY, + (void*)REQ_TIMER_ACTION, + 2*sizeof(uint64_t), kperf_sysctl, "UQ", "Timer number and actionid"); + SYSCTL_PROC(_kperf_timer, OID_AUTO, pet_timer, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, (void*)REQ_TIMER_PET, @@ -317,26 +700,32 @@ SYSCTL_PROC(_kperf, OID_AUTO, sampling, (void*)REQ_SAMPLING, sizeof(int), kperf_sysctl, "I", "Sampling running"); -int legacy_mode = 1; -SYSCTL_INT(_kperf, OID_AUTO, legacy_mode, CTLFLAG_RW, &legacy_mode, 0, "legacy_mode"); - -#if 0 -SYSCTL_PROC(_kperf, OID_AUTO, timer_period, - CTLFLAG_RW, (void*)REQ_TIMER, - sizeof(uint64_t), kperf_sysctl, "QU", "nanoseconds"); - -SYSCTL_PROC(_kperf, OID_AUTO, pet_period, - CTLFLAG_RW, (void*)REQ_PET, - sizeof(uint64_t), kperf_sysctl, "QU", "nanoseconds"); - -/* FIXME: do real stuff */ -SYSCTL_INT(_kperf, OID_AUTO, filter_pid0, - CTLFLAG_RW, &pid_list[0], 0, ""); -SYSCTL_INT(_kperf, OID_AUTO, filter_pid1, - CTLFLAG_RW, &pid_list[1], 0, ""); -SYSCTL_INT(_kperf, OID_AUTO, filter_pid2, - CTLFLAG_RW, &pid_list[2], 0, ""); -SYSCTL_INT(_kperf, OID_AUTO, filter_pid3, - CTLFLAG_RW, &pid_list[3], 0, ""); - -#endif +SYSCTL_PROC(_kperf, OID_AUTO, blessed_pid, + CTLTYPE_INT|CTLFLAG_RW, /* must be root */ + (void*)REQ_BLESS, + sizeof(int), kperf_sysctl_bless_handler, "I", "Blessed pid"); + +SYSCTL_PROC(_kperf, OID_AUTO, blessed_preempt, + CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, + (void*)REQ_BLESS_PREEMPT, + sizeof(int), kperf_sysctl, "I", "Blessed preemption"); + + +SYSCTL_PROC(_kperf, OID_AUTO, kdbg_callstacks, + CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, + (void*)REQ_KDBG_CALLSTACKS, + sizeof(int), kperf_sysctl, "I", "Generate kdbg callstacks"); + +SYSCTL_INT(_kperf, OID_AUTO, kdbg_cswitch, + CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, + &kperf_cswitch_hook, 0, "Generate context switch info"); + +SYSCTL_PROC(_kperf, OID_AUTO, pet_idle_rate, + CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY, + (void*)REQ_PET_IDLE_RATE, + sizeof(int), kperf_sysctl, "I", "Rate at which unscheduled threads are forced to be sampled in PET mode"); + +/* debug */ +SYSCTL_INT(_kperf, OID_AUTO, debug_level, CTLFLAG_RW, + &kperf_debug_level, 0, "debug level"); + diff --git a/osfmk/kperf/kperfbsd.h b/osfmk/kperf/kperfbsd.h index 8c5864096..16bfb7c91 100644 --- a/osfmk/kperf/kperfbsd.h +++ b/osfmk/kperf/kperfbsd.h @@ -26,4 +26,12 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ +#ifndef __KPERF_BSD_H__ +#define __KPERF_BSD_H__ +/* bless a process to allow kperf access to a non-root process + */ +extern int kperf_bless_pid(pid_t newpid); + + +#endif /* __KPERF_BSD_H__ */ diff --git a/osfmk/kperf/pet.c b/osfmk/kperf/pet.c index 3b039c059..68a3cabdf 100644 --- a/osfmk/kperf/pet.c +++ b/osfmk/kperf/pet.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include @@ -59,23 +58,34 @@ static IOLock *pet_lock = NULL; /* where to sample data to */ static struct kperf_sample pet_sample_buf; +static int pet_idle_rate = 15; + /* sample an actual, honest to god thread! */ static void pet_sample_thread( thread_t thread ) { struct kperf_context ctx; task_t task; + unsigned skip_callstack; /* work out the context */ ctx.cur_thread = thread; - ctx.cur_pid = -1; + ctx.cur_pid = 0; task = chudxnu_task_for_thread(thread); if(task) ctx.cur_pid = chudxnu_pid_for_task(task); + skip_callstack = (chudxnu_thread_get_dirty(thread) == TRUE) || ((thread->kperf_pet_cnt % (uint64_t)pet_idle_rate) == 0) ? 0 : SAMPLE_FLAG_EMPTY_CALLSTACK; + /* do the actual sample */ - kperf_sample( &pet_sample_buf, &ctx, pet_actionid, false ); + kperf_sample( &pet_sample_buf, &ctx, pet_actionid, + SAMPLE_FLAG_IDLE_THREADS | skip_callstack ); + + if (!skip_callstack) + chudxnu_thread_set_dirty(thread, FALSE); + + thread->kperf_pet_cnt++; } /* given a list of threads, preferably stopped, sample 'em! */ @@ -83,6 +93,7 @@ static void pet_sample_thread_list( mach_msg_type_number_t threadc, thread_array_t threadv ) { unsigned int i; + int ncpu; for( i = 0; i < threadc; i++ ) { @@ -92,7 +103,16 @@ pet_sample_thread_list( mach_msg_type_number_t threadc, thread_array_t threadv ) /* XXX? */ continue; - pet_sample_thread( thread ); + for (ncpu = 0; ncpu < machine_info.logical_cpu_max; ++ncpu) + { + thread_t candidate = kperf_thread_on_cpus[ncpu]; + if (candidate && candidate->thread_id == thread->thread_id) + break; + } + + /* the thread was not on a CPU */ + if (ncpu == machine_info.logical_cpu_max) + pet_sample_thread( thread ); } } @@ -121,7 +141,7 @@ static void pet_sample_task_list( int taskc, task_array_t taskv ) { int i; - + for( i = 0; i < taskc; i++ ) { kern_return_t kr; @@ -133,7 +153,7 @@ pet_sample_task_list( int taskc, task_array_t taskv ) if(!task) { continue; } - + /* try and stop any task other than the kernel task */ if( task != kernel_task ) { @@ -143,7 +163,7 @@ pet_sample_task_list( int taskc, task_array_t taskv ) if( kr != KERN_SUCCESS ) continue; } - + /* sample it */ pet_sample_task( task ); @@ -172,6 +192,7 @@ pet_sample_all_tasks(void) chudxnu_free_task_list(&taskv, &taskc); } +#if 0 static void pet_sample_pid_filter(void) { @@ -210,6 +231,7 @@ pet_sample_pid_filter(void) out: kperf_filter_free_pid_list( &pidc, &pidv ); } +#endif /* do the pet sample */ static void @@ -218,14 +240,17 @@ pet_work_unit(void) int pid_filter; /* check if we're filtering on pid */ - pid_filter = kperf_filter_on_pid(); + // pid_filter = kperf_filter_on_pid(); + pid_filter = 0; // FIXME +#if 0 if( pid_filter ) { BUF_INFO1(PERF_PET_SAMPLE | DBG_FUNC_START, 1); pet_sample_pid_filter(); } else +#endif { /* otherwise filter everything */ BUF_INFO1(PERF_PET_SAMPLE | DBG_FUNC_START, 0); @@ -240,27 +265,32 @@ pet_work_unit(void) static void pet_idle(void) { - IOLockLock(pet_lock); IOLockSleep(pet_lock, &pet_actionid, THREAD_UNINT); - IOLockUnlock(pet_lock); } /* loop between sampling and waiting */ static void pet_thread_loop( __unused void *param, __unused wait_result_t wr ) { + uint64_t work_unit_ticks; + BUF_INFO1(PERF_PET_THREAD, 1); + IOLockLock(pet_lock); while(1) { BUF_INFO1(PERF_PET_IDLE, 0); pet_idle(); BUF_INFO1(PERF_PET_RUN, 0); + + /* measure how long the work unit takes */ + work_unit_ticks = mach_absolute_time(); pet_work_unit(); + work_unit_ticks = mach_absolute_time() - work_unit_ticks; /* re-program the timer */ - kperf_timer_pet_set( pet_timerid ); + kperf_timer_pet_set( pet_timerid, work_unit_ticks ); /* FIXME: break here on a condition? */ } @@ -270,6 +300,9 @@ pet_thread_loop( __unused void *param, __unused wait_result_t wr ) void kperf_pet_timer_config( unsigned timerid, unsigned actionid ) { + if( !pet_lock ) + return; + /* hold the lock so pet thread doesn't run while we do this */ IOLockLock(pet_lock); @@ -287,6 +320,9 @@ kperf_pet_timer_config( unsigned timerid, unsigned actionid ) void kperf_pet_thread_go(void) { + if( !pet_lock ) + return; + /* Make the thread go */ IOLockWakeup(pet_lock, &pet_actionid, FALSE); } @@ -296,6 +332,9 @@ kperf_pet_thread_go(void) void kperf_pet_thread_wait(void) { + if( !pet_lock ) + return; + /* acquire the lock to ensure the thread is parked. */ IOLockLock(pet_lock); IOLockUnlock(pet_lock); @@ -329,3 +368,15 @@ kperf_pet_init(void) /* OK! */ return 0; } + +int +kperf_get_pet_idle_rate( void ) +{ + return pet_idle_rate; +} + +void +kperf_set_pet_idle_rate( int val ) +{ + pet_idle_rate = val; +} diff --git a/osfmk/kperf/pet.h b/osfmk/kperf/pet.h index 9ffa736b5..03c411a01 100644 --- a/osfmk/kperf/pet.h +++ b/osfmk/kperf/pet.h @@ -38,3 +38,9 @@ extern void kperf_pet_thread_wait(void); /* tell pet the timer parameters */ extern void kperf_pet_timer_config( unsigned timerid, unsigned actionid ); + +/* get/set rate at which PET forces threads to be sampled */ +extern int kperf_get_pet_idle_rate( void ); +extern void kperf_set_pet_idle_rate( int val ); + + diff --git a/osfmk/kperf/sample.h b/osfmk/kperf/sample.h index 5a871214e..1f0f3117c 100644 --- a/osfmk/kperf/sample.h +++ b/osfmk/kperf/sample.h @@ -26,16 +26,27 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ -// what goes in a sample - #include "threadinfo.h" #include "callstack.h" +#include "kperf_kpc.h" + +#ifndef __KPERF_SAMPLE_H__ +#define __KPERF_SAMPLE_H__ +// what goes in a sample struct kperf_sample { struct threadinfo threadinfo; struct tinfo_ex tinfo_ex; struct callstack kcallstack; struct callstack ucallstack; + +#if KPC + struct kpcdata kpcdata; +#endif }; +// cache of thread on CPUs during the IPI +extern thread_t *kperf_thread_on_cpus; + +#endif /* __KPERF_SAMPLE_H__ */ diff --git a/osfmk/kperf/threadinfo.c b/osfmk/kperf/threadinfo.c index 88388c3c7..70d2c1fe2 100644 --- a/osfmk/kperf/threadinfo.c +++ b/osfmk/kperf/threadinfo.c @@ -63,8 +63,10 @@ make_runmode(thread_t thread) } else #endif + { // Today we happen to know there's a one-to-one mapping. return ((mode & 0xffff) | ((~mode & 0xffff) << 16)); + } } diff --git a/osfmk/kperf/timetrigger.c b/osfmk/kperf/timetrigger.c index 643d63cd9..bc43fd423 100644 --- a/osfmk/kperf/timetrigger.c +++ b/osfmk/kperf/timetrigger.c @@ -33,6 +33,8 @@ #include #include +#include + #include #include @@ -42,6 +44,10 @@ #include #include #include +#include + +/* make up for arm signal deficiencies */ +void kperf_signal_handler(void); /* represents a periodic timer */ struct time_trigger @@ -50,6 +56,12 @@ struct time_trigger uint64_t period; unsigned actionid; volatile unsigned active; + +#ifdef USE_SIMPLE_SIGNALS + /* firing accounting */ + uint64_t fire_count; + uint64_t last_cpu_fire[MAX_CPUS]; +#endif }; /* the list of timers */ @@ -60,8 +72,10 @@ static unsigned pet_timer = 999; /* maximum number of timers we can construct */ #define TIMER_MAX 16 -/* minimal interval for a timer (100usec in nsec) */ -#define MIN_TIMER (100000) +/* minimal interval for a timer (10usec in nsec) */ +#define MIN_TIMER_NS (10000) +/* minimal interval for pet timer (2msec in nsec) */ +#define MIN_PET_TIMER_NS (2000000) static void kperf_timer_schedule( struct time_trigger *trigger, uint64_t now ) @@ -70,23 +84,29 @@ kperf_timer_schedule( struct time_trigger *trigger, uint64_t now ) BUF_INFO1(PERF_TM_SCHED, trigger->period); + /* if we re-programmed the timer to zero, just drop it */ + if( !trigger->period ) + return; + /* calculate deadline */ deadline = now + trigger->period; /* re-schedule the timer, making sure we don't apply slop */ - timer_call_enter( &trigger->tcall, deadline, TIMER_CALL_CRITICAL); + timer_call_enter( &trigger->tcall, deadline, TIMER_CALL_SYS_CRITICAL); } static void kperf_ipi_handler( void *param ) { int r; + int ncpu; struct kperf_sample *intbuf = NULL; struct kperf_context ctx; struct time_trigger *trigger = param; task_t task = NULL; - - BUF_INFO1(PERF_TM_HNDLR | DBG_FUNC_START, 0); + + /* Always cut a tracepoint to show a sample event occurred */ + BUF_DATA1(PERF_TM_HNDLR | DBG_FUNC_START, 0); /* In an interrupt, get the interrupt buffer for this CPU */ intbuf = kperf_intr_sample_buffer(); @@ -103,17 +123,67 @@ kperf_ipi_handler( void *param ) ctx.trigger_type = TRIGGER_TYPE_TIMER; ctx.trigger_id = (unsigned)(trigger-timerv); /* computer timer number */ + ncpu = chudxnu_cpu_number(); + if (ctx.trigger_id == pet_timer && ncpu < machine_info.logical_cpu_max) + kperf_thread_on_cpus[ncpu] = ctx.cur_thread; + + /* check samppling is on */ + if( kperf_sampling_status() == KPERF_SAMPLING_OFF ) { + BUF_INFO1(PERF_TM_HNDLR | DBG_FUNC_END, SAMPLE_OFF); + return; + } else if( kperf_sampling_status() == KPERF_SAMPLING_SHUTDOWN ) { + BUF_INFO1(PERF_TM_HNDLR | DBG_FUNC_END, SAMPLE_SHUTDOWN); + return; + } + /* call the action -- kernel-only from interrupt, pend user */ - r = kperf_sample( intbuf, &ctx, trigger->actionid, TRUE ); - + r = kperf_sample( intbuf, &ctx, trigger->actionid, SAMPLE_FLAG_PEND_USER ); + + /* end tracepoint is informational */ BUF_INFO1(PERF_TM_HNDLR | DBG_FUNC_END, r); } +#ifdef USE_SIMPLE_SIGNALS +/* if we can't pass a (function, arg) pair through a signal properly, + * we do it the simple way. When a timer fires, we increment a counter + * in the time trigger and broadcast a generic signal to all cores. Cores + * search the time trigger list for any triggers for which their last seen + * firing counter is lower than the current one. + */ +void +kperf_signal_handler(void) +{ + int i, cpu; + struct time_trigger *tr = NULL; + + OSMemoryBarrier(); + + cpu = chudxnu_cpu_number(); + for( i = 0; i < (int) timerc; i++ ) + { + tr = &timerv[i]; + if( tr->fire_count <= tr->last_cpu_fire[cpu] ) + continue; /* this trigger hasn't fired */ + + /* fire the trigger! */ + tr->last_cpu_fire[cpu] = tr->fire_count; + kperf_ipi_handler( tr ); + } +} +#else +void +kperf_signal_handler(void) +{ + // so we can link... +} +#endif + static void kperf_timer_handler( void *param0, __unused void *param1 ) { struct time_trigger *trigger = param0; unsigned ntimer = (unsigned)(trigger - timerv); + unsigned ncpus = machine_info.logical_cpu_max; trigger->active = 1; @@ -121,8 +191,17 @@ kperf_timer_handler( void *param0, __unused void *param1 ) if( kperf_sampling_status() == KPERF_SAMPLING_SHUTDOWN ) goto deactivate; + /* clean-up the thread-on-CPUs cache */ + bzero(kperf_thread_on_cpus, ncpus * sizeof(*kperf_thread_on_cpus)); + /* ping all CPUs */ +#ifndef USE_SIMPLE_SIGNALS kperf_mp_broadcast( kperf_ipi_handler, trigger ); +#else + trigger->fire_count++; + OSMemoryBarrier(); + kperf_mp_signal(); +#endif /* release the pet thread? */ if( ntimer == pet_timer ) @@ -132,7 +211,7 @@ kperf_timer_handler( void *param0, __unused void *param1 ) } else { - /* re-enable the timer + /* re-enable the timer * FIXME: get the current time from elsewhere */ uint64_t now = mach_absolute_time(); @@ -145,10 +224,18 @@ deactivate: /* program the timer from the pet thread */ int -kperf_timer_pet_set( unsigned timer ) +kperf_timer_pet_set( unsigned timer, uint64_t elapsed_ticks ) { + static uint64_t pet_min_ticks = 0; + uint64_t now; struct time_trigger *trigger = NULL; + uint64_t period = 0; + uint64_t deadline; + + /* compute ns -> ticks */ + if( pet_min_ticks == 0 ) + nanoseconds_to_absolutetime(MIN_PET_TIMER_NS, &pet_min_ticks); if( timer != pet_timer ) panic( "PET setting with bogus ID\n" ); @@ -156,14 +243,42 @@ kperf_timer_pet_set( unsigned timer ) if( timer >= timerc ) return EINVAL; + if( kperf_sampling_status() == KPERF_SAMPLING_OFF ) { + BUF_INFO1(PERF_PET_END, SAMPLE_OFF); + return 0; + } + + // don't repgram the timer if it's been shutdown + if( kperf_sampling_status() == KPERF_SAMPLING_SHUTDOWN ) { + BUF_INFO1(PERF_PET_END, SAMPLE_SHUTDOWN); + return 0; + } + /* CHECKME: we probably took so damn long in the PET thread, * it makes sense to take the time again. */ now = mach_absolute_time(); trigger = &timerv[timer]; - /* reprogram */ - kperf_timer_schedule( trigger, now ); + /* if we re-programmed the timer to zero, just drop it */ + if( !trigger->period ) + return 0; + + /* subtract the time the pet sample took being careful not to underflow */ + if ( trigger->period > elapsed_ticks ) + period = trigger->period - elapsed_ticks; + + /* make sure we don't set the next PET sample to happen too soon */ + if ( period < pet_min_ticks ) + period = pet_min_ticks; + + /* calculate deadline */ + deadline = now + period; + + BUF_INFO(PERF_PET_SCHED, trigger->period, period, elapsed_ticks, deadline); + + /* re-schedule the timer, making sure we don't apply slop */ + timer_call_enter( &trigger->tcall, deadline, TIMER_CALL_SYS_CRITICAL); return 0; } @@ -242,8 +357,6 @@ kperf_timer_set_petid(unsigned timerid) int kperf_timer_get_period( unsigned timer, uint64_t *period ) { - printf( "get timer %u / %u\n", timer, timerc ); - if( timer >= timerc ) return EINVAL; @@ -255,13 +368,18 @@ kperf_timer_get_period( unsigned timer, uint64_t *period ) int kperf_timer_set_period( unsigned timer, uint64_t period ) { - printf( "set timer %u\n", timer ); + static uint64_t min_timer_ticks = 0; if( timer >= timerc ) return EINVAL; - if( period < MIN_TIMER ) - period = MIN_TIMER; + /* compute us -> ticks */ + if( min_timer_ticks == 0 ) + nanoseconds_to_absolutetime(MIN_TIMER_NS, &min_timer_ticks); + + /* check actual timer */ + if( period && (period < min_timer_ticks) ) + period = min_timer_ticks; timerv[timer].period = period; @@ -270,6 +388,28 @@ kperf_timer_set_period( unsigned timer, uint64_t period ) return 0; } +int +kperf_timer_get_action( unsigned timer, uint32_t *action ) +{ + if( timer >= timerc ) + return EINVAL; + + *action = timerv[timer].actionid; + + return 0; +} + +int +kperf_timer_set_action( unsigned timer, uint32_t action ) +{ + if( timer >= timerc ) + return EINVAL; + + timerv[timer].actionid = action; + + return 0; +} + unsigned kperf_timer_get_count(void) { @@ -290,10 +430,7 @@ kperf_timer_set_count(unsigned count) /* easy no-op */ if( count == timerc ) - { - printf( "already got %d timers\n", timerc ); return 0; - } /* TODO: allow shrinking? */ if( count < timerc ) @@ -314,13 +451,19 @@ kperf_timer_set_count(unsigned count) r = kperf_init(); if( r ) return r; - + /* get the PET thread going */ r = kperf_pet_init(); if( r ) return r; } + /* first shut down any running timers since we will be messing + * with the timer call structures + */ + if( kperf_timer_stop() ) + return EBUSY; + /* create a new array */ new_timerv = kalloc( count * sizeof(*new_timerv) ); if( new_timerv == NULL ) @@ -335,8 +478,8 @@ kperf_timer_set_count(unsigned count) /* zero the new entries */ bzero( &new_timerv[timerc], (count - old_count) * sizeof(*new_timerv) ); - /* setup the timer call info */ - for( i = old_count; i < count; i++ ) + /* (re-)setup the timer call info for all entries */ + for( i = 0; i < count; i++ ) setup_timer_call( &new_timerv[i] ); timerv = new_timerv; @@ -345,7 +488,5 @@ kperf_timer_set_count(unsigned count) if( old_timerv != NULL ) kfree( old_timerv, old_count * sizeof(*timerv) ); - printf( "kperf: done timer alloc, timerc %d\n", timerc ); - return 0; } diff --git a/osfmk/kperf/timetrigger.h b/osfmk/kperf/timetrigger.h index b0d67b00e..81196f129 100644 --- a/osfmk/kperf/timetrigger.h +++ b/osfmk/kperf/timetrigger.h @@ -35,13 +35,16 @@ extern void kperf_timer_reprogram_all(void); #define TIMER_REPROGRAM (0) #define TIMER_STOP (1) -/* blah */ +/* getters and setters on timers */ extern unsigned kperf_timer_get_count(void); extern int kperf_timer_set_count(unsigned count); extern int kperf_timer_get_period( unsigned timer, uint64_t *period ); extern int kperf_timer_set_period( unsigned timer, uint64_t period ); +extern int kperf_timer_get_action( unsigned timer, uint32_t *action ); +extern int kperf_timer_set_action( unsigned timer, uint32_t action ); + extern int kperf_timer_go(void); extern int kperf_timer_stop(void); @@ -49,4 +52,4 @@ extern unsigned kperf_timer_get_petid(void); extern int kperf_timer_set_petid(unsigned count); /* so PET thread can re-arm the timer */ -extern int kperf_timer_pet_set( unsigned timer ); +extern int kperf_timer_pet_set( unsigned timer, uint64_t elapsed_ticks ); diff --git a/osfmk/libsa/Makefile b/osfmk/libsa/Makefile index 98c0c2585..ea0f4cb80 100644 --- a/osfmk/libsa/Makefile +++ b/osfmk/libsa/Makefile @@ -3,7 +3,6 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - include $(MakeInc_cmd) include $(MakeInc_def) diff --git a/osfmk/libsa/string.h b/osfmk/libsa/string.h index c94f56dd7..731cd7653 100644 --- a/osfmk/libsa/string.h +++ b/osfmk/libsa/string.h @@ -81,6 +81,7 @@ extern int strncmp(const char *,const char *, size_t); extern int strcasecmp(const char *s1, const char *s2); extern int strncasecmp(const char *s1, const char *s2, size_t n); +extern char *strnstr(char *s, const char *find, size_t slen); extern char *strchr(const char *s, int c); extern char *STRDUP(const char *, int); extern int strprefix(const char *s1, const char *s2); diff --git a/osfmk/lockd/Makefile b/osfmk/lockd/Makefile index 2975dc2dd..a262c76df 100644 --- a/osfmk/lockd/Makefile +++ b/osfmk/lockd/Makefile @@ -6,14 +6,6 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = - -INSTINC_SUBDIRS_I386 = - -EXPINC_SUBDIRS = - -EXPINC_SUBDIRS_I386 = - MIG_DEFS = lockd_mach.defs DATAFILES = lockd_mach_types.h ${MIG_DEFS} @@ -46,13 +38,13 @@ MIG_KUSRC = lockd_mach.c lockd_mach.h COMP_FILES = ${MIG_KUSRC} -${COMP_FILES} : lockd_mach.defs +do_build_all:: $(COMP_FILES) ${MIG_KUSRC} : lockd_mach.defs @echo MIG $@ $(_v)${MIG} ${MIGFLAGS} ${MIGKUFLAGS} \ - -user $*.c \ - -header $*.h \ + -user lockd_mach.c \ + -header lockd_mach.h \ -server /dev/null \ -sheader /dev/null \ $< diff --git a/osfmk/mach/Makefile b/osfmk/mach/Makefile index b435d9794..99a57a358 100644 --- a/osfmk/mach/Makefile +++ b/osfmk/mach/Makefile @@ -8,11 +8,9 @@ include $(MakeInc_def) INSTINC_SUBDIRS = machine INSTINC_SUBDIRS_ARM = arm -INSTINC_SUBDIRS_I386 = i386 INSTINC_SUBDIRS_X86_64 = i386 EXPINC_SUBDIRS = machine -EXPINC_SUBDIRS_I386 = i386 EXPINC_SUBDIRS_X86_64 = i386 EXPINC_SUBDIRS_ARM = arm @@ -41,6 +39,7 @@ MIG_DEFS = \ security.defs \ task.defs \ task_access.defs \ + telemetry_notification.defs \ thread_act.defs \ vm_map.defs @@ -66,7 +65,8 @@ MIG_USHDRS = \ memory_object_server.h \ memory_object_default_server.h \ notify_server.h \ - task_access_server.h + task_access_server.h \ + telemetry_notification_server.h MIG_UUHDRS = \ clock.h \ @@ -173,12 +173,8 @@ EXPORT_MI_GEN_LIST = \ EXPORT_MI_DIR = mach -.ORDER: ${MIGINCLUDES} - ${MIGINCLUDES} : ${MIG_TYPES} -.ORDER: ${MIG_UUHDRS} - ${MIG_UUHDRS} : \ %.h : %.defs @echo MIG $@ @@ -188,8 +184,6 @@ ${MIG_UUHDRS} : \ -header $@ \ $< -.ORDER: ${MIG_USHDRS} - ${MIG_USHDRS} : \ %_server.h : %.defs @echo MIG $@ @@ -203,8 +197,6 @@ ${MIG_USHDRS} : \ # # Build path # -COMP_SUBDIRS_I386 = \ - i386 INCFLAGS_MAKEFILE= -I.. @@ -239,6 +231,7 @@ MIG_KUSRC = \ memory_object_control_user.c \ memory_object_default_user.c \ task_access_user.c \ + telemetry_notification_user.c \ upl_user.c \ vm_map_user.c @@ -310,9 +303,9 @@ MIG_KSSRC = \ # COMP_FILES = ${MIG_KUSRC} ${MIG_KSSRC} +do_build_all:: $(COMP_FILES) + ${COMP_FILES} : ${MIG_TYPES} - -.ORDER: ${MIG_KUSRC} ${MIG_KUSRC} : \ %_user.c : %.defs @@ -324,8 +317,6 @@ ${MIG_KUSRC} : \ -sheader /dev/null \ $< -.ORDER: ${MIG_KSSRC} - ${MIG_KSSRC}: \ %_server.c : %.defs @echo MIG $@ diff --git a/osfmk/mach/Makefile.template b/osfmk/mach/Makefile.template deleted file mode 100644 index fa15c23a1..000000000 --- a/osfmk/mach/Makefile.template +++ /dev/null @@ -1,173 +0,0 @@ -export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd -export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def -export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule -export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - - -include $(MakeInc_cmd) -include $(MakeInc_def) - -MIGKSFLAGS = -DKERNEL_SERVER -MIGKUFLAGS = -DKERNEL_USER -maxonstack 1024 - -MACH_FILES = mach_server.h mach_server.c - -MACH_PORT_FILES =mach_port_server.h mach_port_server.c - -EXC_FILES = exc_user.h exc_user.c exc_server.c - -MACH_EXC_FILES = mach_exc_user.h mach_exc_user.c mach_exc_server.c - -MEMORY_OBJECT_FILES = memory_object_user.h memory_object_user.c - -MEMORY_OBJECT_DEFAULT_FILES = memory_object_default.h memory_object_default_user.c - -PROF_FILES = prof_user.c prof.h - -MACH_HOST_FILES = mach_host_server.h mach_host_server.c - -CLOCK_FILES = clock_server.h clock_server.c - -CLOCK_REPLY_FILES = clock_reply.h clock_reply_user.c - -BOOTSTRAP_FILES = bootstrap_server.h bootstrap_server.c - -SYNC_FILES = sync_server.h sync_server.c - -MACH_USER_FILES = mach_user.h mach_user.c - -OTHERS = ${MACH_FILES} ${MACH_PORT_FILES} \ - ${EXC_FILES} ${MACH_EXC_FILES} \ - ${MEMORY_OBJECT_FILES} ${MEMORY_OBJECT_DEFAULT_FILES} \ - ${PROF_FILES} ${MACH_HOST_FILES} \ - ${CLOCK_FILES} ${CLOCK_REPLY_FILES} ${BOOTSTRAP_FILES} \ - ${BOOTSTRAP_FILES} ${SYNC_FILES} \ - ${MACH_USER_FILES} - -.ORDER: ${MACH_FILES} - -${MACH_FILES}: mach/mach.defs - ${_MIG_} ${_MIGFLAGS_} ${MIGKSFLAGS} \ - -header /dev/null \ - -user /dev/null \ - -sheader mach_server.h \ - -server mach_server.c \ - $< - -.ORDER: ${MACH_PORT_FILES} - -${MACH_PORT_FILES}: mach/mach_port.defs - ${_MIG_} ${_MIGFLAGS_} ${MIGKSFLAGS} \ - -header /dev/null \ - -user /dev/null \ - -sheader mach_port_server.h \ - -server mach_port_server.c \ - $< - -.ORDER: ${EXC_FILES} -${EXC_FILES}: mach/exc.defs - ${_MIG_} ${_MIGFLAGS_} ${MIGKUFLAGS} \ - -header exc_user.h \ - -user exc_user.c \ - -sheader exc_server.h \ - -server exc_server.c \ - $< - -.ORDER: ${MACH_EXC_FILES} -${MACH_EXC_FILES}: mach/mach_exc.defs - ${_MIG_} ${_MIGFLAGS_} ${MIGKUFLAGS} \ - -header mach_exc_user.h \ - -user mach_exc_user.c \ - -sheader mach_exc_server.h \ - -server mach_exc_server.c \ - $< - -.ORDER: ${MEMORY_OBJECT_FILES} - -${MEMORY_OBJECT_FILES}: mach/memory_object.defs - ${_MIG_} ${_MIGFLAGS_} ${MIGKUFLAGS} -DSEQNOS \ - -header memory_object_user.h \ - -user memory_object_user.c \ - -server /dev/null \ - $< - -.ORDER: ${MEMORY_OBJECT_DEFAULT_FILES} - -${MEMORY_OBJECT_DEFAULT_FILES}: mach/memory_object_default.defs - ${_MIG_} ${_MIGFLAGS_} ${MIGKUFLAGS} -DSEQNOS \ - -header memory_object_default.h \ - -user memory_object_default_user.c \ - -server /dev/null \ - $< - -.ORDER: ${PROF_FILES} - -${PROF_FILES}: mach/prof.defs - ${_MIG_} ${_MIGFLAGS_} ${MIGKUFLAGS} \ - -header prof.h \ - -iheader prof_internal.h \ - -user prof_user.c \ - -server /dev/null \ - $< - -.ORDER: ${MACH_HOST_FILES} - -${MACH_HOST_FILES}: mach/mach_host.defs - ${_MIG_} ${_MIGFLAGS_} ${MIGKSFLAGS} \ - -header /dev/null \ - -user /dev/null \ - -sheader mach_host_server.h \ - -server mach_host_server.c \ - $< - -.ORDER: ${CLOCK_FILES} - -${CLOCK_FILES}: mach/clock.defs - ${_MIG_} ${_MIGFLAGS_} ${MIGKSFLAGS} \ - -header /dev/null \ - -user /dev/null \ - -sheader clock_server.h \ - -server clock_server.c \ - $< - -.ORDER: ${CLOCK_REPLY_FILES} -${CLOCK_REPLY_FILES}: mach/clock_reply.defs - ${_MIG_} ${_MIGFLAGS_} ${MIGKUFLAGS} \ - -header clock_reply.h \ - -user clock_reply_user.c \ - -server /dev/null \ - $< - -.ORDER: ${BOOTSTRAP_FILES} - -${BOOTSTRAP_FILES}: mach/bootstrap.defs - ${_MIG_} ${_MIGFLAGS_} ${MIGKSFLAGS} \ - -header /dev/null \ - -user /dev/null \ - -sheader bootstrap_server.h \ - -server bootstrap_server.c \ - $< - -.ORDER: ${SYNC_FILES} - -${SYNC_FILES}: mach/sync.defs - ${_MIG_} ${_MIGFLAGS_} ${MIGKSFLAGS} \ - -header /dev/null \ - -user /dev/null \ - -sheader sync_server.h \ - -server sync_server.c \ - $< - -${MACH_USER_FILES}: mach/mach.defs - ${_MIG_} -X ${_MIGFLAGS_} ${MIGKUFLAGS} \ - -header mach_user.h \ - -user mach_user.c \ - -server /dev/null \ - $< - - - -include $(MakeInc_rule) -include $(MakeInc_dir) - - diff --git a/osfmk/mach/branch_predicates.h b/osfmk/mach/branch_predicates.h index ab32e87a7..0882d3e1e 100644 --- a/osfmk/mach/branch_predicates.h +++ b/osfmk/mach/branch_predicates.h @@ -30,6 +30,6 @@ #ifndef _MACH_BRANCH_PREDICATES_H #define _MACH_BRANCH_PREDICATES_H -#define __probable(x) __builtin_expect((long)(x), 1L) -#define __improbable(x) __builtin_expect((long)(x), 0L) +#define __probable(x) __builtin_expect(!!((long)(x)), 1L) +#define __improbable(x) __builtin_expect(!!((long)(x)), 0L) #endif /* _MACH_BRANCH_PREDICATES_H */ diff --git a/osfmk/mach/clock_types.h b/osfmk/mach/clock_types.h index e020066d1..eb274f070 100644 --- a/osfmk/mach/clock_types.h +++ b/osfmk/mach/clock_types.h @@ -81,25 +81,25 @@ typedef struct mach_timespec mach_timespec_t; #define CLOCK_ALARM_MINRES 4 /* minimum alarm resolution */ #define CLOCK_ALARM_MAXRES 5 /* maximum alarm resolution */ -#define NSEC_PER_USEC 1000 /* nanoseconds per microsecond */ -#define USEC_PER_SEC 1000000 /* microseconds per second */ -#define NSEC_PER_SEC 1000000000 /* nanoseconds per second */ +#define NSEC_PER_USEC 1000ull /* nanoseconds per microsecond */ +#define USEC_PER_SEC 1000000ull /* microseconds per second */ +#define NSEC_PER_SEC 1000000000ull /* nanoseconds per second */ #define NSEC_PER_MSEC 1000000ull /* nanoseconds per millisecond */ #define BAD_MACH_TIMESPEC(t) \ - ((t)->tv_nsec < 0 || (t)->tv_nsec >= NSEC_PER_SEC) + ((t)->tv_nsec < 0 || (t)->tv_nsec >= (long)NSEC_PER_SEC) /* t1 <=> t2, also (t1 - t2) in nsec with max of +- 1 sec */ #define CMP_MACH_TIMESPEC(t1, t2) \ - ((t1)->tv_sec > (t2)->tv_sec ? +NSEC_PER_SEC : \ - ((t1)->tv_sec < (t2)->tv_sec ? -NSEC_PER_SEC : \ + ((t1)->tv_sec > (t2)->tv_sec ? (long) +NSEC_PER_SEC : \ + ((t1)->tv_sec < (t2)->tv_sec ? (long) -NSEC_PER_SEC : \ (t1)->tv_nsec - (t2)->tv_nsec)) /* t1 += t2 */ #define ADD_MACH_TIMESPEC(t1, t2) \ do { \ - if (((t1)->tv_nsec += (t2)->tv_nsec) >= NSEC_PER_SEC) { \ - (t1)->tv_nsec -= NSEC_PER_SEC; \ + if (((t1)->tv_nsec += (t2)->tv_nsec) >= (long) NSEC_PER_SEC) { \ + (t1)->tv_nsec -= (long) NSEC_PER_SEC; \ (t1)->tv_sec += 1; \ } \ (t1)->tv_sec += (t2)->tv_sec; \ @@ -109,7 +109,7 @@ typedef struct mach_timespec mach_timespec_t; #define SUB_MACH_TIMESPEC(t1, t2) \ do { \ if (((t1)->tv_nsec -= (t2)->tv_nsec) < 0) { \ - (t1)->tv_nsec += NSEC_PER_SEC; \ + (t1)->tv_nsec += (long) NSEC_PER_SEC; \ (t1)->tv_sec -= 1; \ } \ (t1)->tv_sec -= (t2)->tv_sec; \ diff --git a/osfmk/mach/exception_types.h b/osfmk/mach/exception_types.h index 96de421d2..baf2ec24d 100644 --- a/osfmk/mach/exception_types.h +++ b/osfmk/mach/exception_types.h @@ -98,6 +98,9 @@ #define EXC_RESOURCE 11 /* Hit resource consumption limit */ /* Exact resource is in code field. */ +#define EXC_GUARD 12 /* Violated guarded resource protections */ + + /* * Machine-independent exception behaviors */ @@ -135,6 +138,7 @@ #define EXC_MASK_RPC_ALERT (1 << EXC_RPC_ALERT) #define EXC_MASK_CRASH (1 << EXC_CRASH) #define EXC_MASK_RESOURCE (1 << EXC_RESOURCE) +#define EXC_MASK_GUARD (1 << EXC_GUARD) #define EXC_MASK_ALL (EXC_MASK_BAD_ACCESS | \ EXC_MASK_BAD_INSTRUCTION | \ @@ -146,6 +150,7 @@ EXC_MASK_MACH_SYSCALL | \ EXC_MASK_RPC_ALERT | \ EXC_MASK_RESOURCE | \ + EXC_MASK_GUARD | \ EXC_MASK_MACHINE) #ifdef KERNEL_PRIVATE diff --git a/osfmk/mach/host_info.h b/osfmk/mach/host_info.h index 0464d5ed0..8f67ac1a1 100644 --- a/osfmk/mach/host_info.h +++ b/osfmk/mach/host_info.h @@ -99,6 +99,7 @@ typedef integer_t host_flavor_t; #define HOST_PRIORITY_INFO 5 /* priority information */ #define HOST_SEMAPHORE_TRAPS 7 /* Has semaphore traps */ #define HOST_MACH_MSG_TRAP 8 /* Has mach_msg_trap */ +#define HOST_VM_PURGABLE 9 /* purg'e'able memory info */ #ifdef MACH_KERNEL_PRIVATE struct host_basic_info_old { @@ -185,7 +186,8 @@ typedef struct host_priority_info *host_priority_info_t; /* host_statistics64() */ #define HOST_VM_INFO64 4 /* 64-bit virtual memory stats */ #define HOST_EXTMOD_INFO64 5 /* External modification stats */ -#define HOST_EXPIRED_TASK_INFO 6 /* Statistics for expired tasks */ +#define HOST_EXPIRED_TASK_INFO 6 /* Statistics for expired tasks */ + struct host_load_info { integer_t avenrun[3]; /* scaled by LOAD_SCALE */ @@ -197,6 +199,11 @@ typedef struct host_load_info *host_load_info_t; #define HOST_LOAD_INFO_COUNT ((mach_msg_type_number_t) \ (sizeof(host_load_info_data_t)/sizeof(integer_t))) +typedef struct vm_purgeable_info host_purgable_info_data_t; +typedef struct vm_purgeable_info *host_purgable_info_t; +#define HOST_VM_PURGABLE_COUNT ((mach_msg_type_number_t) \ + (sizeof(host_purgable_info_data_t)/sizeof(integer_t))) + /* in */ /* vm_statistics64 */ #define HOST_VM_INFO64_COUNT ((mach_msg_type_number_t) \ @@ -204,6 +211,11 @@ typedef struct host_load_info *host_load_info_t; /* size of the latest version of the structure */ #define HOST_VM_INFO64_LATEST_COUNT HOST_VM_INFO64_COUNT +#define HOST_VM_INFO64_REV1_COUNT HOST_VM_INFO64_LATEST_COUNT +/* previous versions: adjust the size according to what was added each time */ +#define HOST_VM_INFO64_REV0_COUNT /* added compression and swapper info (14 ints) */\ + ((mach_msg_type_number_t) \ + (HOST_VM_INFO64_REV1_COUNT - 14)) /* in */ /* vm_extmod_statistics */ diff --git a/osfmk/mach/host_special_ports.h b/osfmk/mach/host_special_ports.h index 4b2037256..d51582ef2 100644 --- a/osfmk/mach/host_special_ports.h +++ b/osfmk/mach/host_special_ports.h @@ -91,7 +91,8 @@ #define HOST_UNFREED_PORT (10 + HOST_MAX_SPECIAL_KERNEL_PORT) #define HOST_AMFID_PORT (11 + HOST_MAX_SPECIAL_KERNEL_PORT) #define HOST_GSSD_PORT (12 + HOST_MAX_SPECIAL_KERNEL_PORT) -#define HOST_MAX_SPECIAL_PORT (13 + HOST_MAX_SPECIAL_KERNEL_PORT) +#define HOST_TELEMETRY_PORT (13 + HOST_MAX_SPECIAL_KERNEL_PORT) +#define HOST_MAX_SPECIAL_PORT (14 + HOST_MAX_SPECIAL_KERNEL_PORT) /* room to grow here as well */ /* @@ -181,8 +182,12 @@ #define host_get_gssd_port(host, port) \ (host_get_special_port((host), \ HOST_LOCAL_NODE, HOST_GSSD_PORT, (port))) - #define host_set_gssd_port(host, port) \ (host_set_special_port((host), HOST_GSSD_PORT, (port))) +#define host_get_telemetry_port(host, port) \ + (host_get_special_port((host), \ + HOST_LOCAL_NODE, HOST_TELEMETRY_PORT, (port))) +#define host_set_telemetry_port(host, port) \ + (host_set_special_port((host), HOST_TELEMETRY_PORT, (port))) #endif /* _MACH_HOST_SPECIAL_PORTS_H_ */ diff --git a/osfmk/mach/i386/Makefile b/osfmk/mach/i386/Makefile index 7f99e1758..16abdd76a 100644 --- a/osfmk/mach/i386/Makefile +++ b/osfmk/mach/i386/Makefile @@ -6,7 +6,7 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -VPATH+=$(SOURCE)/../../i386: +VPATH := $(VPATH):$(SOURCE)/../../i386 DATAFILES = \ boolean.h exception.h fp_reg.h \ diff --git a/osfmk/mach/i386/exception.h b/osfmk/mach/i386/exception.h index a9b4fbf04..db603d588 100644 --- a/osfmk/mach/i386/exception.h +++ b/osfmk/mach/i386/exception.h @@ -63,7 +63,7 @@ * No machine dependent types for the 80386 */ -#define EXC_TYPES_COUNT 12 /* incl. illegal exception 0 */ +#define EXC_TYPES_COUNT 13 /* incl. illegal exception 0 */ /* * Codes and subcodes for 80386 exceptions. diff --git a/osfmk/mach/i386/thread_status.h b/osfmk/mach/i386/thread_status.h index 36232c736..9c5ae47b0 100644 --- a/osfmk/mach/i386/thread_status.h +++ b/osfmk/mach/i386/thread_status.h @@ -300,59 +300,11 @@ typedef struct x86_avx_state x86_avx_state_t; #define MACHINE_THREAD_STATE x86_THREAD_STATE #define MACHINE_THREAD_STATE_COUNT x86_THREAD_STATE_COUNT -/* - * when reloading the segment registers on - * a return out of the kernel, we may take - * a GeneralProtection or SegmentNotPresent - * fault if one or more of the segment - * registers in the saved state was improperly - * specified via an x86_THREAD_STATE32 call - * the frame we push on top of the existing - * save area looks like this... we need to - * carry this as part of the save area - * in case we get hit so that we have a big - * enough stack - */ -struct x86_seg_load_fault32 { - uint16_t trapno; - uint16_t cpu; - uint32_t err; - uint32_t eip; - uint32_t cs; - uint32_t efl; -}; - #ifdef XNU_KERNEL_PRIVATE #define x86_SAVED_STATE32 THREAD_STATE_NONE + 1 #define x86_SAVED_STATE64 THREAD_STATE_NONE + 2 -/* - * Subset of saved state stored by processor on kernel-to-kernel - * trap. (Used by ddb to examine state guaranteed to be present - * on all traps into debugger.) - */ -struct x86_saved_state32_from_kernel { - uint32_t gs; - uint32_t fs; - uint32_t es; - uint32_t ds; - uint32_t edi; - uint32_t esi; - uint32_t ebp; - uint32_t cr2; /* kernel esp stored by pusha - we save cr2 here later */ - uint32_t ebx; - uint32_t edx; - uint32_t ecx; - uint32_t eax; - uint16_t trapno; - uint16_t cpu; - uint32_t err; - uint32_t eip; - uint32_t cs; - uint32_t efl; -}; - /* * The format in which thread state is saved by Mach on this machine. This * state flavor is most efficient for exception RPC's to kernel-loaded @@ -386,27 +338,6 @@ typedef struct x86_saved_state32 x86_saved_state32_t; (sizeof (x86_saved_state32_t)/sizeof(unsigned int))) #pragma pack(4) -struct x86_saved_state32_tagged { - uint32_t tag; - struct x86_saved_state32 state; -}; -typedef struct x86_saved_state32_tagged x86_saved_state32_tagged_t; -/* Note: sizeof(x86_saved_state32_tagged_t) is a multiple of 16 bytes */ - -struct x86_sframe32 { - /* - * in case we throw a fault reloading - * segment registers on a return out of - * the kernel... the 'slf' state is only kept - * long enough to rejigger (i.e. restore - * the save area to its original state) - * the save area and throw the appropriate - * kernel trap pointing to the 'ssf' state - */ - struct x86_seg_load_fault32 slf; - struct x86_saved_state32_tagged ssf; -}; -typedef struct x86_sframe32 x86_sframe32_t; /* * This is the state pushed onto the 64-bit interrupt stack @@ -427,26 +358,6 @@ struct x86_64_intr_stack_frame { typedef struct x86_64_intr_stack_frame x86_64_intr_stack_frame_t; /* Note: sizeof(x86_64_intr_stack_frame_t) must be a multiple of 16 bytes */ -/* - * This defines the state saved before entry into compatibility mode. - * The machine state is pushed automatically and the compat state is - * synthethized in the exception handling code. - */ -struct x86_saved_state_compat32 { - struct x86_saved_state32_tagged iss32; - struct x86_64_intr_stack_frame isf64; -}; -typedef struct x86_saved_state_compat32 x86_saved_state_compat32_t; - -struct x86_sframe_compat32 { - uint32_t pad_for_16byte_alignment[2]; - uint64_t _register_save_slot; - struct x86_64_intr_stack_frame slf; - struct x86_saved_state_compat32 ssf; -}; -typedef struct x86_sframe_compat32 x86_sframe_compat32_t; -/* Note: sizeof(x86_sframe_compat32_t) must be a multiple of 16 bytes */ - /* * thread state format for task running in 64bit long mode * in long mode, the same hardware frame is always pushed regardless @@ -467,15 +378,15 @@ struct x86_saved_state64 { * system call handlers will fill these in * via copyin if needed... */ - uint64_t rdi; /* arg0 for system call */ + uint64_t rdi; /* arg0 for system call */ uint64_t rsi; uint64_t rdx; - uint64_t r10; - uint64_t r8; - uint64_t r9; /* arg5 for system call */ - uint64_t v_arg6; - uint64_t v_arg7; - uint64_t v_arg8; + uint64_t r10; /* R10 := RCX prior to syscall trap */ + uint64_t r8; + uint64_t r9; /* arg5 for system call */ + uint64_t v_arg6; + uint64_t v_arg7; + uint64_t v_arg8; uint64_t cr2; uint64_t r15; @@ -491,28 +402,12 @@ struct x86_saved_state64 { uint32_t gs; uint32_t fs; - uint32_t _pad_for_tagged_alignment[3]; - struct x86_64_intr_stack_frame isf; }; typedef struct x86_saved_state64 x86_saved_state64_t; #define x86_SAVED_STATE64_COUNT ((mach_msg_type_number_t) \ (sizeof (struct x86_saved_state64)/sizeof(unsigned int))) -struct x86_saved_state64_tagged { - uint32_t tag; - x86_saved_state64_t state; -}; -typedef struct x86_saved_state64_tagged x86_saved_state64_tagged_t; - -struct x86_sframe64 { - uint64_t _register_save_slot[2]; - struct x86_64_intr_stack_frame slf; - x86_saved_state64_tagged_t ssf; -}; -typedef struct x86_sframe64 x86_sframe64_t; -/* Note: sizeof(x86_sframe64_t) is a multiple of 16 bytes */ - extern uint32_t get_eflags_exportmask(void); /* @@ -520,6 +415,7 @@ extern uint32_t get_eflags_exportmask(void); */ typedef struct { uint32_t flavor; + uint32_t _pad_for_16byte_alignment[3]; union { x86_saved_state32_t ss_32; x86_saved_state64_t ss_64; diff --git a/osfmk/mach/i386/vm_param.h b/osfmk/mach/i386/vm_param.h index 12eb226e2..02bef271f 100644 --- a/osfmk/mach/i386/vm_param.h +++ b/osfmk/mach/i386/vm_param.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -95,6 +95,12 @@ #define I386_PGBYTES 4096 /* bytes per 80386 page */ #define I386_PGSHIFT 12 /* bitshift for pages */ +#ifdef PRIVATE +#define KERNEL_PAGE_SIZE I386_PGBYTES +#define KERNEL_PAGE_SHIFT I386_PGSHIFT +#define KERNEL_PAGE_MASK (KERNEL_PAGE_SIZE-1) +#endif + #define PAGE_SIZE I386_PGBYTES #define PAGE_SHIFT I386_PGSHIFT #define PAGE_MASK (PAGE_SIZE - 1) @@ -166,12 +172,8 @@ * Maximum physical memory supported. */ #define K32_MAXMEM (32*GB) -#define K64_MAXMEM (96*GB) -#if defined(__i386__) -#define KERNEL_MAXMEM K32_MAXMEM -#else +#define K64_MAXMEM (128*GB) #define KERNEL_MAXMEM K64_MAXMEM -#endif /* * XXX @@ -181,14 +183,6 @@ * We can't let VM allocate memory from there. */ -#if defined(__i386__) - -#define KERNEL_IMAGE_TO_PHYS(x) (x) -#define VM_MIN_KERNEL_ADDRESS ((vm_offset_t) 0x00001000U) -#define VM_MIN_KERNEL_AND_KEXT_ADDRESS VM_MIN_KERNEL_ADDRESS -#define VM_MAX_KERNEL_ADDRESS ((vm_offset_t) 0xFE7FFFFFU) - -#elif defined(__x86_64__) #define KERNEL_IMAGE_TO_PHYS(x) (x) #define VM_MIN_KERNEL_ADDRESS ((vm_offset_t) 0xFFFFFF8000000000UL) @@ -200,13 +194,6 @@ #define KEXT_ALLOC_BASE(x) ((x) - KEXT_ALLOC_MAX_OFFSET) #define KEXT_ALLOC_SIZE(x) (KEXT_ALLOC_MAX_OFFSET - (x)) -#define VM_KERNEL_IS_KEXT(_o) \ - (((vm_offset_t)(_o) >= VM_MIN_KERNEL_AND_KEXT_ADDRESS) && \ - ((vm_offset_t)(_o) < VM_MIN_KERNEL_ADDRESS)) - -#else -#error unsupported architecture -#endif #define KERNEL_STACK_SIZE (I386_PGBYTES*4) @@ -236,19 +223,6 @@ * The common alignment for LP64 is for longs and pointers i.e. 8 bytes. */ -#if defined(__i386__) - -#define KALLOC_MINSIZE 16 /* minimum allocation size */ -#define KALLOC_LOG2_MINALIGN 4 /* log2 minimum alignment */ - -#define LINEAR_KERNEL_ADDRESS ((vm_offset_t) 0x00000000) - -#define VM_MIN_KERNEL_LOADED_ADDRESS ((vm_offset_t) 0x00000000U) -#define VM_MAX_KERNEL_LOADED_ADDRESS ((vm_offset_t) 0x1FFFFFFFU) - -#define NCOPY_WINDOWS 4 - -#elif defined(__x86_64__) #define KALLOC_MINSIZE 16 /* minimum allocation size */ #define KALLOC_LOG2_MINALIGN 4 /* log2 minimum alignment */ @@ -261,9 +235,6 @@ #define NCOPY_WINDOWS 0 -#else -#error unsupported architecture -#endif /* * Conversion between 80386 pages and VM pages diff --git a/osfmk/mach/lock_set.defs b/osfmk/mach/lock_set.defs index c7ee5845b..d4cb9abb6 100644 --- a/osfmk/mach/lock_set.defs +++ b/osfmk/mach/lock_set.defs @@ -47,11 +47,15 @@ subsystem #include /* + * OBSOLETE interfaces * a lock_set_t is created and destroyed through the task object. * lock_set_create(task,&lock_set_t,...); * lock_set_destroy(task,lock_set_t); */ +/* + * OBSOLETE interfaces + */ routine lock_acquire( lock_set : lock_set_t; lock_id : int); diff --git a/osfmk/mach/mach_norma.defs b/osfmk/mach/mach_norma.defs deleted file mode 100644 index 0bc1c6945..000000000 --- a/osfmk/mach/mach_norma.defs +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:30 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:25:45 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.3.17.2 1994/09/23 02:38:45 ezf - * change marker to not FREE - * [1994/09/22 21:40:54 ezf] - * - * Revision 1.3.17.1 1994/06/13 20:49:36 dlb - * Merge MK6 and NMK17 - * [1994/06/13 20:47:52 dlb] - * - * Revision 1.3.11.1 1994/02/08 11:01:11 bernadat - * Checked in NMK16_1 changes - * [94/02/04 bernadat] - * - * Revision 1.3.4.2 1993/07/22 13:54:29 bernadat - * [Joe Barerra: joebar@microsoft.com] Added norma_task_teleport as - * an alternative migration mechanism. - * Change from NORMA_MK14.6 [93/03/08 sjs] - * [93/07/16 bernadat] - * - * Revision 1.3.2.2 1993/06/02 23:45:24 jeffc - * Added to OSF/1 R1.3 from NMK15.0. - * [1993/06/02 21:16:50 jeffc] - * - * Revision 1.3 1992/12/07 21:29:17 robert - * integrate any changes below for 14.0 (branch from 13.16 base) - * - * Joseph Barrera (jsb) at Carnegie-Mellon University 03-Jul-92 - * Added norma_task_clone to support task migration. - * [1992/12/06 20:25:30 robert] - * - * Revision 1.2 1992/11/25 01:13:00 robert - * fix history - * [1992/11/09 21:25:21 robert] - * - * integrate changes below for norma_14 - * [1992/11/09 16:42:52 robert] - * - * Revision 0.0 92/10/02 dwm - * Add Comment: norma_port_location_hint requires send rights on 'port'. - * - * Revision 1.1 1992/11/05 20:59:18 robert - * Initial revision - * [92/10/02 dwm] - * - * $EndLog$ - */ -/* CMU_HIST */ -/* - * Revision 2.6 91/12/13 13:45:12 jsb - * Moved non-exported interfaces to norma/norma_internal.defs. - * Changed name of task_create_remote to norma_task_create. - * Added comments. - * - * Revision 2.5 91/11/14 16:56:43 rpd - * Picked up mysterious norma changes. - * [91/11/14 rpd] - * - * Revision 2.4 91/08/28 11:15:08 jsb - * Added KERNEL_USER definitions. - * Removed norma_set_task_server. - * Added task_create_remote, norma_copy_create. - * [91/08/15 13:28:27 jsb] - * - * Revision 2.3 91/08/03 18:19:02 jsb - * Removed norma_get_{host,host_priv,device}_port; - * Use norma_{get,set}_special_port instead. - * [91/07/25 07:51:11 jsb] - * - * Revision 2.2 91/06/06 17:07:57 jsb - * First checkin. - * [91/05/25 10:37:22 jsb] - * - */ -/* CMU_ENDHIST */ -/* - * Mach Operating System - * Copyright (c) 1991 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - */ - -subsystem -#if KERNEL_USER - KernelUser -#endif /* KERNEL_USER */ -#if KERNEL_SERVER - KernelServer -#endif /* KERNEL_SERVER */ - mach_norma 555000; -#ifdef KERNEL_USER -userprefix r_; -#endif /* KERNEL_USER */ - -#include -#include - -skip; - -/* - * Specify a node upon which children tasks will be created. - * This call exists only to allow testing with unmodified servers. - * Server developers should use norma_task_create instead. - */ -routine task_set_child_node( - target_task : task_t; - child_node : int); - -/* - * THIS CALL WILL BE ELIMINATED. - * Use norma_port_location_hint(,mach_task_self(),) instead. - */ -routine norma_node_self( - host : host_t; - out node : int); - -skip; - -skip; - -/* - * Create a task on the given node, possibly inheriting memory. - * Same inheritance semantics as task_create, including inheritance - * of initial ports and emulation library, except for memory: - * inheritance attributes are ignored, so that all regions appear - * in the child task, shared with the parent, until the parent - * is destroyed. (The inheritance of the regions in the child - * will, however, be set to match the parent.) - * - * This call is intended to support process migration, where the - * inheritance semantics of norma_task_create would break migrated - * programs that depended upon sharing relationships remaining - * after migration. - * - * This call is not a true task migration call, in that it does not - * migrate the port space, threads, and other non-address-space - * attributes of the task. - */ -routine norma_task_clone( - target_task : task_t; - inherit_memory : boolean_t; - child_node : int; - out child_task : task_t); - -/* - * Create a task on the given node, possibly inheriting memory. - * Same inheritance semantics as task_create, including inheritance - * of initial ports and emulation library. - * Setting child_node to node_self forces local task creation. - */ -routine norma_task_create( - target_task : task_t; - inherit_memory : boolean_t; - child_node : int; - out child_task : task_t); - -/* - * Get a given special port for a given node. - * Norma special ports are defined in norma_special_ports.h; - * examples include the master device port. - * There are a limited number of slots available for system servers. - */ -routine norma_get_special_port( - host_priv : host_priv_t; - node : int; - which : int; - out port : mach_port_t); - -/* - * Set a given special port for a given node. - * See norma_get_special_port. - */ -routine norma_set_special_port( - host_priv : host_priv_t; - which : int; - port : mach_port_t); - -/* - * Just like norma_task_clone, except target_task is terminated, - * allowing useful VM optimizations. - */ -routine norma_task_teleport( - target_task : task_t; - inherit_memory : boolean_t; - child_node : int; - out child_task : task_t); - -skip; - -skip; - -/* - * Return best guess of port's current location. - * Guaranteed to be a node where the port once was. - * Guaranteed to be accurate if port has never moved. - * Can be used to determine residence node for hosts, tasks, threads, etc. - */ -routine norma_port_location_hint( - task : task_t; - port : mach_port_t; - out node : int); - -/* vim: set ft=c : */ diff --git a/osfmk/mach/mach_port.defs b/osfmk/mach/mach_port.defs index 2799790b6..8591f77d4 100644 --- a/osfmk/mach/mach_port.defs +++ b/osfmk/mach/mach_port.defs @@ -208,7 +208,30 @@ routine mach_port_mod_refs( right : mach_port_right_t; delta : mach_port_delta_t); -skip; +/* + * Peek at the message queue for the specified receive + * right and return info about the message with the + * sequence number matching the input. If zero is + * specified as the seqno, the first message in the + * queue will be peeked. + * + * Only the following trailer types are currently supported: + * MACH_RCV_TRAILER_TYPE(MACH_MSG_TRAILER_FORMAT_0) + * + * or'ed with one of these element types: + * MACH_RCV_TRAILER_ELEMENTS(MACH_RCV_TRAILER_NULL) + * MACH_RCV_TRAILER_ELEMENTS(MACH_RCV_TRAILER_SEQNO) + * MACH_RCV_TRAILER_ELEMENTS(MACH_RCV_TRAILER_SENDER) + * MACH_RCV_TRAILER_ELEMENTS(MACH_RCV_TRAILER_AUDIT) + */ +routine mach_port_peek( + task : ipc_space_t; + name : mach_port_name_t; + trailer_type : mach_msg_trailer_type_t; + inout request_seqnop : mach_port_seqno_t; + out msg_sizep : mach_msg_size_t; + out msg_idp : mach_msg_id_t; + out trailer_infop : mach_msg_trailer_info_t, CountInOut); /* * Only valid for receive rights. @@ -517,4 +540,66 @@ routine mach_port_kobject( out object_addr : mach_vm_address_t); +/* + * Constructs a right based on the options passed + * in. Also allows guarding the port as one of the + * options if the requested right is a receive + * right. + */ +routine mach_port_construct( + task : ipc_space_t; + options : mach_port_options_ptr_t; +#ifdef LIBSYSCALL_INTERFACE + context : mach_port_context_t; +#else + context : uint64_t; +#endif + out name : mach_port_name_t); + +/* + * Destroys a mach port using the guard provided + * for guarded ports. Also reduces the user ref + * count for send rights as specified by srdelta. + */ +routine mach_port_destruct( + task : ipc_space_t; + name : mach_port_name_t; + srdelta : mach_port_delta_t; +#ifdef LIBSYSCALL_INTERFACE + guard : mach_port_context_t +#else + guard : uint64_t +#endif + ); + +/* + * Guard an already existing port. Allows guarding + * receive rights only. Uses the context field in the + * port structure to store the guard. + */ +routine mach_port_guard( + task : ipc_space_t; + name : mach_port_name_t; +#ifdef LIBSYSCALL_INTERFACE + guard : mach_port_context_t; +#else + guard : uint64_t; +#endif + strict : boolean_t); + +/* + * Unguard a port guarded previously. For unguarded ports + * or incorrect guards passed in it raises an exception + * indicating guarding misbehavior. + */ +routine mach_port_unguard( + task : ipc_space_t; + name : mach_port_name_t; +#ifdef LIBSYSCALL_INTERFACE + guard : mach_port_context_t +#else + guard : uint64_t +#endif + ); + /* vim: set ft=c : */ diff --git a/osfmk/mach/mach_traps.h b/osfmk/mach/mach_traps.h index 915bf9381..cfff70c09 100644 --- a/osfmk/mach/mach_traps.h +++ b/osfmk/mach/mach_traps.h @@ -168,6 +168,15 @@ extern kern_return_t _kernelrpc_mach_vm_protect_trap( vm_prot_t new_protection ); +extern kern_return_t _kernelrpc_mach_vm_map_trap( + mach_port_name_t target, + mach_vm_offset_t *address, + mach_vm_size_t size, + mach_vm_offset_t mask, + int flags, + vm_prot_t cur_protection +); + extern kern_return_t _kernelrpc_mach_port_allocate_trap( mach_port_name_t target, mach_port_right_t right, @@ -217,6 +226,33 @@ extern kern_return_t _kernelrpc_mach_port_extract_member_trap( mach_port_name_t pset ); +extern kern_return_t _kernelrpc_mach_port_construct_trap( + mach_port_name_t target, + mach_port_options_t *options, + uint64_t context, + mach_port_name_t *name +); + +extern kern_return_t _kernelrpc_mach_port_destruct_trap( + mach_port_name_t target, + mach_port_name_t name, + mach_port_delta_t srdelta, + uint64_t guard +); + +extern kern_return_t _kernelrpc_mach_port_guard_trap( + mach_port_name_t target, + mach_port_name_t name, + uint64_t guard, + boolean_t strict +); + +extern kern_return_t _kernelrpc_mach_port_unguard_trap( + mach_port_name_t target, + mach_port_name_t name, + uint64_t guard +); + extern kern_return_t macx_swapon( uint64_t filename, int flags, @@ -433,7 +469,6 @@ struct semaphore_timedwait_signal_trap_args { extern kern_return_t semaphore_timedwait_signal_trap( struct semaphore_timedwait_signal_trap_args *args); -#if !defined(CONFIG_EMBEDDED) struct map_fd_args { PAD_ARG_(int, fd); PAD_ARG_(vm_offset_t, offset); @@ -443,7 +478,6 @@ struct map_fd_args { }; extern kern_return_t map_fd( struct map_fd_args *args); -#endif /* !defined(CONFIG_EMBEDDED) */ struct task_for_pid_args { PAD_ARG_(mach_port_name_t, target_tport); @@ -607,6 +641,18 @@ struct _kernelrpc_mach_vm_protect_args { extern kern_return_t _kernelrpc_mach_vm_protect_trap( struct _kernelrpc_mach_vm_protect_args *args); +struct _kernelrpc_mach_vm_map_trap_args { + PAD_ARG_(mach_port_name_t, target); + PAD_ARG_(user_addr_t, addr); + PAD_ARG_(mach_vm_size_t, size); + PAD_ARG_(mach_vm_offset_t, mask); + PAD_ARG_(int, flags); + PAD_ARG_8 + PAD_ARG_(vm_prot_t, cur_protection); +}; +extern kern_return_t _kernelrpc_mach_vm_map_trap( + struct _kernelrpc_mach_vm_map_trap_args *args); + struct _kernelrpc_mach_port_allocate_args { PAD_ARG_(mach_port_name_t, target); PAD_ARG_(mach_port_right_t, right); @@ -672,6 +718,42 @@ struct _kernelrpc_mach_port_extract_member_args { extern kern_return_t _kernelrpc_mach_port_extract_member_trap( struct _kernelrpc_mach_port_extract_member_args *args); +struct _kernelrpc_mach_port_construct_args { + PAD_ARG_(mach_port_name_t, target); + PAD_ARG_(user_addr_t, options); + PAD_ARG_(uint64_t, context); + PAD_ARG_(user_addr_t, name); +}; +extern kern_return_t _kernelrpc_mach_port_construct_trap( + struct _kernelrpc_mach_port_construct_args *args); + +struct _kernelrpc_mach_port_destruct_args { + PAD_ARG_(mach_port_name_t, target); + PAD_ARG_(mach_port_name_t, name); + PAD_ARG_(mach_port_delta_t, srdelta); + PAD_ARG_(uint64_t, guard); +}; +extern kern_return_t _kernelrpc_mach_port_destruct_trap( + struct _kernelrpc_mach_port_destruct_args *args); + +struct _kernelrpc_mach_port_guard_args { + PAD_ARG_(mach_port_name_t, target); + PAD_ARG_(mach_port_name_t, name); + PAD_ARG_(uint64_t, guard); + PAD_ARG_(boolean_t, strict); +}; +extern kern_return_t _kernelrpc_mach_port_guard_trap( + struct _kernelrpc_mach_port_guard_args *args); + +struct _kernelrpc_mach_port_unguard_args { + PAD_ARG_(mach_port_name_t, target); + PAD_ARG_(mach_port_name_t, name); + PAD_ARG_(uint64_t, guard); +}; +extern kern_return_t _kernelrpc_mach_port_unguard_trap( + struct _kernelrpc_mach_port_unguard_args *args); + + /* not published to LP64 clients yet */ struct iokit_user_client_trap_args { PAD_ARG_(void *, userClientRef); diff --git a/osfmk/mach/mach_types.defs b/osfmk/mach/mach_types.defs index e890f6652..dddcbdeae 100644 --- a/osfmk/mach/mach_types.defs +++ b/osfmk/mach/mach_types.defs @@ -93,15 +93,31 @@ type upl_t = mach_port_t #endif /* KERNEL_PRIVATE */ type mach_port_status_t = struct[10] of integer_t; /* obsolete */ +type mach_port_info_ext_t = struct[17] of integer_t; /* mach_port_info_t: can hold either a * mach_port_status_t (9 ints) or a - * mach_port_limits_t (1 int). If new flavors of + * mach_port_limits_t (1 int) or a + * mach_port_info_ext_t (17 ints). If new flavors of * mach_port_{get,set}_attributes are added, the size of * this array may have to be increased. (See mach/port.h) */ type mach_port_flavor_t = int; -type mach_port_info_t = array[*:10] of integer_t; +type mach_port_info_t = array[*:17] of integer_t; + + /* + * mach_msg_max_trailer_t: can hold + * mach_msg_trailer_type_t (1 int) + * mach_msg_trailer_size_t (1 int) + * mach_port_seqno_t (1 int) + * security_token_t (2 ints) + * audit_token_t (8 ints) + * mach_port_context_t (2 ints) + * msgh_ad (1 int) + * msg_labels_t (1 int) + */ +type mach_msg_trailer_type_t = int; +type mach_msg_trailer_info_t = array[*:68] of char; type task_t = mach_port_t #if KERNEL_SERVER @@ -222,7 +238,9 @@ type thread_policy_t = array[*:16] of integer_t; * definition may need to be changed. (See * mach/task_info.h and mach/policy.h) */ type task_flavor_t = int; -type task_info_t = array[*:32] of integer_t; +type task_info_t = array[*:52] of integer_t; + +type task_purgable_info_t = struct[68] of integer_t; type task_policy_flavor_t = natural_t; type task_policy_t = array[*:16] of integer_t; @@ -304,15 +322,14 @@ type host_security_t = mach_port_t * kernel_resource_sizes_t (5 ints) * host_load_info_t (6 ints) * vm_statistics32_t (15 ints) + * host_purgable_info_t (68 ints) * host_expired_task_info uses a task_power_info (18 ints) * * If other host_info flavors are added, this definition may * need to be changed. (See mach/{host_info,vm_statistics}.h) */ type host_flavor_t = int; -type host_info_t = array[*:18] of integer_t; - - +type host_info_t = array[*:68] of integer_t; /* * host_info64_t: variable-sized inline array that can contain: * @@ -386,6 +403,9 @@ type time_value_t = struct[2] of integer_t; type mach_port_qos_t = struct[2] of integer_t; +type mach_port_options_t = struct[3] of uint64_t; +type mach_port_options_ptr_t = ^ mach_port_options_t; + type emulation_vector_t = ^array[] of vm_offset_t; type inline_existence_map_t = array[*:512] of char; @@ -423,7 +443,6 @@ type msg_labels_t = mach_port_t; * memory_object_behave_info_t (4 ints) * memory_object_perf_info_t (2 ints) * old_memory_object_attr_info_t (3 ints) - * memory_object_norma_info_t (5 ints) * If other flavors are added, this definition may * need to be changed. (see mach/memory_object.h) */ type memory_object_flavor_t = int; @@ -488,6 +507,13 @@ type lock_set_t = mach_port_t #endif /* KERNEL_SERVER */ ; +type task_suspension_token_t = mach_port_move_send_once_t +#if KERNEL_SERVER + intran: task_suspension_token_t convert_port_to_task_suspension_token(mach_port_t) + outtran: mach_port_t convert_task_suspension_token_to_port(task_suspension_token_t) +#endif /* KERNEL_SERVER */ + ; + /* kernel module loader */ type kmod_t = int; type kmod_control_flavor_t = int; diff --git a/osfmk/mach/mach_types.h b/osfmk/mach/mach_types.h index 9c9a1afbb..2da7476a9 100644 --- a/osfmk/mach/mach_types.h +++ b/osfmk/mach/mach_types.h @@ -115,7 +115,7 @@ * If we are in the kernel, then pick up the kernel definitions for * the basic mach types. */ -typedef struct task *task_t, *task_name_t; +typedef struct task *task_t, *task_name_t, *task_suspension_token_t; typedef struct thread *thread_t, *thread_act_t; typedef struct ipc_space *ipc_space_t; typedef struct host *host_t; @@ -125,12 +125,17 @@ typedef struct processor *processor_t; typedef struct processor_set *processor_set_t; typedef struct processor_set *processor_set_control_t; typedef struct semaphore *semaphore_t; -typedef struct lock_set *lock_set_t; typedef struct ledger *ledger_t; typedef struct alarm *alarm_t; typedef struct clock *clock_serv_t; typedef struct clock *clock_ctrl_t; +/* + * OBSOLETE: lock_set interfaces are obsolete. + */ +typedef struct lock_set *lock_set_t; +struct lock_set ; + #ifndef MACH_KERNEL_PRIVATE __BEGIN_DECLS @@ -141,7 +146,6 @@ struct host ; struct processor ; struct processor_set ; struct semaphore ; -struct lock_set ; struct ledger ; struct alarm ; struct clock ; @@ -158,6 +162,7 @@ __END_DECLS */ typedef mach_port_t task_t; typedef mach_port_t task_name_t; +typedef mach_port_t task_suspension_token_t; typedef mach_port_t thread_t; typedef mach_port_t thread_act_t; typedef mach_port_t ipc_space_t; @@ -265,7 +270,7 @@ typedef natural_t ledger_item_t; #define LEDGER_ITEM_INFINITY ((ledger_item_t) (~0)) typedef int64_t ledger_amount_t; -#define LEDGER_LIMIT_INFINITY ((ledger_amount_t)(((uint64_t)1 << 63) - 1)) +#define LEDGER_LIMIT_INFINITY ((ledger_amount_t)((1ULL << 63) - 1)) typedef mach_vm_offset_t *emulation_vector_t; typedef char *user_subsystem_t; diff --git a/osfmk/mach/mach_vm.defs b/osfmk/mach/mach_vm.defs index f4793f8e0..806215525 100644 --- a/osfmk/mach/mach_vm.defs +++ b/osfmk/mach/mach_vm.defs @@ -196,9 +196,9 @@ routine vm_inherit( * protection on the specified range must permit reading.] */ #if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) -routine mach_vm_read( +routine PREFIX(mach_vm_read) ( #else -routine vm_read( +routine PREFIX(vm_read) ( #endif target_task : vm_map_t; address : mach_vm_address_t; @@ -333,12 +333,12 @@ routine vm_behavior_set( * for further consistency.] */ #if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) -routine mach_vm_map( +routine PREFIX(mach_vm_map) ( #else #if defined(__arm__) && !LIBSYSCALL_INTERFACE routine _vm_map_arm( #else -routine vm_map( +routine PREFIX(vm_map) ( #endif #endif target_task : vm_task_entry_t; @@ -373,9 +373,9 @@ routine vm_machine_attribute( * Map portion of a task's address space. */ #if !defined(_MACH_VM_PUBLISH_AS_LOCAL_) -routine mach_vm_remap( +routine PREFIX(mach_vm_remap) ( #else -routine vm_remap( +routine PREFIX(vm_remap) ( #endif target_task : vm_map_t; inout target_address : mach_vm_address_t; diff --git a/osfmk/mach/machine.h b/osfmk/mach/machine.h index 1520a6049..4b9f35824 100644 --- a/osfmk/mach/machine.h +++ b/osfmk/mach/machine.h @@ -351,7 +351,11 @@ __END_DECLS #define CPU_SUBTYPE_ARM_XSCALE ((cpu_subtype_t) 8) #define CPU_SUBTYPE_ARM_V7 ((cpu_subtype_t) 9) #define CPU_SUBTYPE_ARM_V7F ((cpu_subtype_t) 10) /* Cortex A9 */ +#define CPU_SUBTYPE_ARM_V7S ((cpu_subtype_t) 11) /* Swift */ #define CPU_SUBTYPE_ARM_V7K ((cpu_subtype_t) 12) /* Kirkwood40 */ +#define CPU_SUBTYPE_ARM_V6M ((cpu_subtype_t) 14) /* Not meant to be run under xnu */ +#define CPU_SUBTYPE_ARM_V7M ((cpu_subtype_t) 15) /* Not meant to be run under xnu */ +#define CPU_SUBTYPE_ARM_V7EM ((cpu_subtype_t) 16) /* Not meant to be run under xnu */ #endif /* !__ASSEMBLER__ */ @@ -382,8 +386,10 @@ __END_DECLS #define CPUFAMILY_ARM_9 0xe73283ae #define CPUFAMILY_ARM_11 0x8ff620d8 #define CPUFAMILY_ARM_XSCALE 0x53b005f5 +#define CPUFAMILY_ARM_12 0xbd1b0ae9 #define CPUFAMILY_ARM_13 0x0cc90e64 #define CPUFAMILY_ARM_14 0x96077ef1 +#define CPUFAMILY_ARM_SWIFT 0x1e2d6381 /* The following synonyms are deprecated: */ #define CPUFAMILY_INTEL_6_14 CPUFAMILY_INTEL_YONAH diff --git a/osfmk/mach/machine/sdt.h b/osfmk/mach/machine/sdt.h index cf99fbd09..d7b5c9d82 100644 --- a/osfmk/mach/machine/sdt.h +++ b/osfmk/mach/machine/sdt.h @@ -192,7 +192,7 @@ DTRACE_PROBE1(__io_, name, arg1); #define DTRACE_IO2(name, type1, arg1, type2, arg2) \ - DTRACE_PROBE2(__io_, name, type1, arg1, type2, arg2); + DTRACE_PROBE2(__io_, name, arg1, arg2); #define DTRACE_IO3(name, type1, arg1, type2, arg2, type3, arg3) \ DTRACE_PROBE3(__io_, name, arg1, arg2, arg3); @@ -217,6 +217,9 @@ #define DTRACE_TMR6(name, type1, arg1, type2, arg2, type3, arg3, arg4, arg5, arg6) \ DTRACE_PROBE6(__sdt_, name, arg1, arg2, arg3, arg4, arg5, arg6); +#define DTRACE_TMR7(name, type1, arg1, type2, arg2, type3, arg3, arg4, arg5, arg6, arg7) \ + DTRACE_PROBE7(__sdt_, name, arg1, arg2, arg3, arg4, arg5, arg6, arg7); + #define DTRACE_VM(name) \ DTRACE_PROBE(__vminfo_, name) @@ -281,6 +284,66 @@ type4, arg4, type5, arg5) \ DTRACE_PROBE5(__tcp_, name, arg1, arg2, arg3, arg4, arg5) +#define DTRACE_MPTCP(name) \ + DTRACE_PROBE(__mptcp_, name) + +#define DTRACE_MPTCP1(name, type1, arg1) \ + DTRACE_PROBE1(__mptcp_, name, arg1) + +#define DTRACE_MPTCP2(name, type1, arg1, type2, arg2) \ + DTRACE_PROBE2(__mptcp_, name, arg1, arg2) + +#define DTRACE_MPTCP3(name, type1, arg1, type2, arg2, type3, arg3) \ + DTRACE_PROBE3(__mptcp_, name, arg1, arg2, arg3) + +#define DTRACE_MPTCP4(name, type1, arg1, type2, arg2, \ + type3, arg3, type4, arg4) \ + DTRACE_PROBE4(__mptcp_, name, arg1, arg2, arg3, arg4) + +#define DTRACE_MPTCP5(name, typ1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5) \ + DTRACE_PROBE5(__mptcp_, name, arg1, arg2, arg3, arg4, arg5) + +#define DTRACE_MPTCP6(name, typ1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5, type6, arg6) \ + DTRACE_PROBE6(__mptcp_, name, arg1, arg2, arg3, arg4, arg5, arg6) + +#define DTRACE_MPTCP7(name, typ1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5, type6, arg6, \ + type7, arg7) \ + DTRACE_PROBE7(__mptcp_, name, arg1, arg2, arg3, arg4, arg5, \ + arg6, arg7) + +#define DTRACE_FSINFO(name, type, vp) \ + DTRACE_PROBE1(__fsinfo_, name, vp) + +#define DTRACE_FSINFO_IO(name, type1, vp, type2, size) \ + DTRACE_PROBE2(__fsinfo_, name, vp, size) + +#define DTRACE_BOOST(name) \ + DTRACE_PROBE(__boost_, name); + +#define DTRACE_BOOST1(name, type1, arg1) \ + DTRACE_PROBE1(__boost_, name, arg1); + +#define DTRACE_BOOST2(name, type1, arg1, type2, arg2) \ + DTRACE_PROBE2(__boost_, name, arg1, arg2); + +#define DTRACE_BOOST3(name, type1, arg1, type2, arg2, type3, arg3) \ + DTRACE_PROBE3(__boost_, name, arg1, arg2, arg3); + +#define DTRACE_BOOST4(name, type1, arg1, type2, arg2, \ + type3, arg3, type4, arg4) \ + DTRACE_PROBE4(__boost_, name, arg1, arg2, arg3, arg4); + +#define DTRACE_BOOST5(name, type1, arg1, type2, arg2, \ + type3, arg3, type4, arg4, type5, arg5) \ + DTRACE_PROBE5(__boost_, name, arg1, arg2, arg3, arg4, arg5); + +#define DTRACE_BOOST6(name, type1, arg1, type2, arg2, \ + type3, arg3, type4, arg4, type5, arg5, type6, arg6) \ + DTRACE_PROBE6(__boost_, name, arg1, arg2, arg3, arg4, arg5, arg6); + #else /* CONFIG_DTRACE */ #define DTRACE_SCHED(name) do {} while (0) @@ -324,6 +387,26 @@ #define DTRACE_TCP4(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) do {} while(0) #define DTRACE_TCP5(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5) do {} while(0) +#define DTRACE_MPTCP(name) do {} while(0) +#define DTRACE_MPTCP1(name, type1, arg1) do {} while(0) +#define DTRACE_MPTCP2(name, type1, arg1, type2, arg2) do {} while(0) +#define DTRACE_MPTCP3(name, type1, arg1, type2, arg2, type3, arg3) do {} while(0) +#define DTRACE_MPTCP4(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) do {} while(0) +#define DTRACE_MPTCP5(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5) do {} while(0) +#define DTRACE_MPTCP6(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, type6, arg6) do {} while(0) +#define DTRACE_MPTCP7(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, type6, arg6, type7, arg7) do {} while(0) + +#define DTRACE_FSINFO(name, type, vp) do {} while(0) +#define DTRACE_FSINFO_IO(name, type1, vp, type2, size) do {} while (0) + +#define DTRACE_BOOST(name) do {} while(0) +#define DTRACE_BOOST1(name, type1, arg1) do {} while(0) +#define DTRACE_BOOST2(name, type1, arg1, type2, arg2) do {} while(0) +#define DTRACE_BOOST3(name, type1, arg1, type2, arg2, type3, arg3) do {} while(0) +#define DTRACE_BOOST4(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) do {} while(0) +#define DTRACE_BOOST5(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5) do {} while(0) +#define DTRACE_BOOST6(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4, type5, arg5, type6, arg6) do {} while(0) + #endif /* CONFIG_DTRACE */ #endif /* KERNEL */ diff --git a/osfmk/mach/memory_object_types.h b/osfmk/mach/memory_object_types.h index 281ff1c9f..afdb148bb 100644 --- a/osfmk/mach/memory_object_types.h +++ b/osfmk/mach/memory_object_types.h @@ -384,6 +384,9 @@ typedef struct memory_object_attr_info memory_object_attr_info_data_t; #define MAP_MEM_NAMED_CREATE 0x020000 /* create extant object */ #define MAP_MEM_PURGABLE 0x040000 /* create a purgable VM object */ #define MAP_MEM_NAMED_REUSE 0x080000 /* reuse provided entry if identical */ +#define MAP_MEM_USE_DATA_ADDR 0x100000 /* preserve address of data, rather than base of page */ +#define MAP_MEM_VM_COPY 0x200000 /* make a copy of a VM range */ +#define MAP_MEM_VM_SHARE 0x400000 /* extract a VM range for remap */ #ifdef KERNEL @@ -469,9 +472,11 @@ typedef uint32_t upl_size_t; /* page-aligned byte size */ #define UPL_UBC_PAGEOUT 0x04000000 #define UPL_UBC_PAGEIN 0x08000000 #define UPL_REQUEST_SET_DIRTY 0x10000000 +#define UPL_REQUEST_NO_FAULT 0x20000000 /* fail if pages not all resident */ +#define UPL_NOZEROFILLIO 0x40000000 /* allow non zerofill pages present */ /* UPL flags known by this kernel */ -#define UPL_VALID_FLAGS 0x1FFFFFFF +#define UPL_VALID_FLAGS 0x7FFFFFFF /* upl abort error flags */ @@ -552,6 +557,15 @@ typedef uint32_t upl_size_t; /* page-aligned byte size */ */ #define UPL_IOSTREAMING 0x100 +/* + * Currently, it's only used for the swap pagein path. + * Since the swap + compressed pager layer manage their + * pages, these pages are not marked "absent" i.e. these + * are "valid" pages. The pagein path will _not_ issue an + * I/O (correctly) for valid pages. So, this flag is used + * to override that logic in the vnode I/O path. + */ +#define UPL_IGNORE_VALID_PAGE_CHECK 0x200 diff --git a/osfmk/mach/message.h b/osfmk/mach/message.h index eba414fc1..7c1f16164 100644 --- a/osfmk/mach/message.h +++ b/osfmk/mach/message.h @@ -128,7 +128,9 @@ typedef natural_t mach_msg_timeout_t; #define MACH_MSGH_BITS_USER 0x8000ffffU #define MACH_MSGH_BITS_CIRCULAR 0x40000000 /* internal use only */ -#define MACH_MSGH_BITS_USED 0xc000ffffU +#define MACH_MSGH_BITS_RAISEIMP 0x20000000 /* importance raise, internal use only */ +#define MACH_MSGH_BITS_IMPHOLDASRT 0x10000000 /* holds assertion alredy, in userland */ +#define MACH_MSGH_BITS_USED 0xf000ffffU #define MACH_MSGH_BITS_PORTS_MASK \ (MACH_MSGH_BITS_REMOTE_MASK|MACH_MSGH_BITS_LOCAL_MASK) @@ -371,6 +373,7 @@ typedef unsigned int mach_msg_trailer_type_t; #define MACH_MSG_TRAILER_FORMAT_0 0 typedef unsigned int mach_msg_trailer_size_t; +typedef char *mach_msg_trailer_info_t; typedef struct { @@ -464,6 +467,7 @@ typedef struct * another module may exceed the local modules notion of * MAX_TRAILER_SIZE. */ + typedef mach_msg_mac_trailer_t mach_msg_max_trailer_t; #define MAX_TRAILER_SIZE ((mach_msg_size_t)sizeof(mach_msg_max_trailer_t)) @@ -587,18 +591,23 @@ typedef integer_t mach_msg_option_t; #define MACH_SEND_MSG 0x00000001 #define MACH_RCV_MSG 0x00000002 -#define MACH_RCV_LARGE 0x00000004 -#define MACH_SEND_TIMEOUT 0x00000010 -#define MACH_SEND_INTERRUPT 0x00000040 /* libmach implements */ +#define MACH_RCV_LARGE 0x00000004 /* report large message sizes */ +#define MACH_RCV_LARGE_IDENTITY 0x00000008 /* identify source of large messages */ + +#define MACH_SEND_TIMEOUT 0x00000010 /* timeout value applies to send */ +#define MACH_SEND_INTERRUPT 0x00000040 /* don't restart interrupted sends */ #define MACH_SEND_NOTIFY 0x00000080 /* arm send-possible notify */ -#define MACH_SEND_ALWAYS 0x00010000 /* internal use only */ -#define MACH_SEND_TRAILER 0x00020000 +#define MACH_SEND_ALWAYS 0x00010000 /* ignore qlimits - kernel only */ +#define MACH_SEND_TRAILER 0x00020000 /* sender-provided trailer */ +#define MACH_SEND_NOIMPORTANCE 0x00040000 /* msg won't carry importance */ +#define MACH_SEND_IMPORTANCE 0x00080000 /* msg carries importance - kernel only */ -#define MACH_RCV_TIMEOUT 0x00000100 + +#define MACH_RCV_TIMEOUT 0x00000100 /* timeout value applies to receive */ #define MACH_RCV_NOTIFY 0x00000200 /* reserved - legacy */ -#define MACH_RCV_INTERRUPT 0x00000400 /* libmach implements */ -#define MACH_RCV_OVERWRITE 0x00001000 +#define MACH_RCV_INTERRUPT 0x00000400 /* don't restart interrupted receive */ +#define MACH_RCV_OVERWRITE 0x00001000 /* scatter receive */ /* * NOTE: a 0x00------ RCV mask implies to ask for @@ -619,10 +628,41 @@ typedef integer_t mach_msg_option_t; #define MACH_RCV_TRAILER_TYPE(x) (((x) & 0xf) << 28) #define MACH_RCV_TRAILER_ELEMENTS(x) (((x) & 0xf) << 24) -#define MACH_RCV_TRAILER_MASK ((0xff << 24)) +#define MACH_RCV_TRAILER_MASK ((0xf << 24)) #define GET_RCV_ELEMENTS(y) (((y) >> 24) & 0xf) +#ifdef MACH_KERNEL_PRIVATE +/* The options that the kernel honors when passed from user space */ +#define MACH_SEND_USER (MACH_SEND_MSG | \ + MACH_SEND_TIMEOUT | MACH_SEND_NOTIFY | \ + MACH_SEND_TRAILER | MACH_SEND_NOIMPORTANCE ) + +#define MACH_RCV_USER (MACH_RCV_MSG | \ + MACH_RCV_TIMEOUT | MACH_RCV_OVERWRITE | \ + MACH_RCV_LARGE | MACH_RCV_LARGE_IDENTITY | \ + MACH_RCV_TRAILER_MASK) + +#define MACH_MSG_OPTION_USER (MACH_SEND_USER | MACH_RCV_USER) + +/* The options implemented by the library interface to mach_msg et. al. */ +#define MACH_MSG_OPTION_LIB (MACH_SEND_INTERRUPT | MACH_RCV_INTERRUPT) + +/* Default options to use when sending from the kernel */ +#if 11938665 + /* + * Until we are sure of its effects, we are disabling + * importance donation from the kernel-side of user + * threads in importance-donating tasks. + */ +#define MACH_SEND_KERNEL_DEFAULT (MACH_SEND_MSG | \ + MACH_SEND_ALWAYS | MACH_SEND_NOIMPORTANCE) +#else +#define MACH_SEND_KERNEL_DEFAULT (MACH_SEND_MSG | MACH_SEND_ALWAYS) +#endif + +#endif /* MACH_KERNEL_PRIVATE */ + /* * XXXMAC: note that in the case of MACH_RCV_TRAILER_LABELS, * we just fall through to mach_msg_max_trailer_t. diff --git a/osfmk/mach/norma_special_ports.h b/osfmk/mach/norma_special_ports.h deleted file mode 100644 index e4da723f6..000000000 --- a/osfmk/mach/norma_special_ports.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * Mach Operating System - * Copyright (c) 1991 Carnegie Mellon University - * All Rights Reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR - * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - */ -/* - * File: mach/norma_special_ports.h - * - * Defines codes for remote access to special ports. - */ - -#ifndef _MACH_NORMA_SPECIAL_PORTS_H_ -#define _MACH_NORMA_SPECIAL_PORTS_H_ - -#include - -#define norma_get_host_port(host, node, port) \ - (host_get_special_port((host), (node), \ - HOST_PORT, (port))) - -#define norma_get_host_priv_port(host, node, port) \ - (host_get_special_port((host), (node), \ - HOST_PRIV_PORT, (port))) - -#define norma_get_io_master_port(host, node, port) \ - (host_get_special_port((host), (node), \ - HOST_IO_MASTER_PORT, (port))) - -#define norma_get_dynamic_pager_port(host, port) \ - (host_get_special_port((host), 0, \ - HOST_DYNAMIC_PAGER_PORT, (port))) - -#define norma_get_audit_control_port(host, node, port) \ - (host_get_special_port((host), (node), \ - HOST_AUDIT_CONTROL_PORT, (port))) - -#define norma_get_user_notification_port(host, node, port) \ - (host_get_special_port((host), (node), \ - HOST_USER_NOTIFICATION_PORT, (port))) - -#endif /* _MACH_NORMA_SPECIAL_PORTS_H_ */ diff --git a/osfmk/mach/port.h b/osfmk/mach/port.h index b09673aba..f9594bfb8 100644 --- a/osfmk/mach/port.h +++ b/osfmk/mach/port.h @@ -173,10 +173,8 @@ typedef ipc_port_t mach_port_t; * */ -#ifndef _MACH_PORT_T -#define _MACH_PORT_T -typedef mach_port_name_t mach_port_t; -#endif +#include +#include #endif /* KERNEL */ @@ -327,6 +325,20 @@ typedef struct mach_port_limits { mach_port_msgcount_t mpl_qlimit; /* number of msgs */ } mach_port_limits_t; +/* Possible values for mps_flags (part of mach_port_status_t) */ +#define MACH_PORT_STATUS_FLAG_TEMPOWNER 0x01 +#define MACH_PORT_STATUS_FLAG_GUARDED 0x02 +#define MACH_PORT_STATUS_FLAG_STRICT_GUARD 0x04 +#define MACH_PORT_STATUS_FLAG_IMP_DONATION 0x08 +#define MACH_PORT_STATUS_FLAG_REVIVE 0x10 +#define MACH_PORT_STATUS_FLAG_TASKPTR 0x20 + +typedef struct mach_port_info_ext { + mach_port_status_t mpie_status; + mach_port_msgcount_t mpie_boost_cnt; + uint32_t reserved[6]; +} mach_port_info_ext_t; + typedef integer_t *mach_port_info_t; /* varying array of natural_t */ /* Flavors for mach_port_get/set_attributes() */ @@ -334,13 +346,17 @@ typedef int mach_port_flavor_t; #define MACH_PORT_LIMITS_INFO 1 /* uses mach_port_status_t */ #define MACH_PORT_RECEIVE_STATUS 2 /* uses mach_port_limits_t */ #define MACH_PORT_DNREQUESTS_SIZE 3 /* info is int */ +#define MACH_PORT_TEMPOWNER 4 /* indicates receive right will be reassigned to another task */ +#define MACH_PORT_IMPORTANCE_RECEIVER 5 /* indicates recieve right accepts priority donation */ +#define MACH_PORT_INFO_EXT 7 /* uses mach_port_info_ext_t */ #define MACH_PORT_LIMITS_INFO_COUNT ((natural_t) \ (sizeof(mach_port_limits_t)/sizeof(natural_t))) #define MACH_PORT_RECEIVE_STATUS_COUNT ((natural_t) \ (sizeof(mach_port_status_t)/sizeof(natural_t))) #define MACH_PORT_DNREQUESTS_SIZE_COUNT 1 - +#define MACH_PORT_INFO_EXT_COUNT ((natural_t) \ + (sizeof(mach_port_info_ext_t)/sizeof(natural_t))) /* * Structure used to pass information about port allocation requests. * Must be padded to 64-bits total length. @@ -352,6 +368,49 @@ typedef struct mach_port_qos { natural_t len; } mach_port_qos_t; +/* Mach Port Guarding definitions */ + +/* + * Flags for mach_port_options (used for + * invocation of mach_port_construct). + * Indicates attributes to be set for the newly + * allocated port. + */ +#define MPO_CONTEXT_AS_GUARD 0x01 /* Add guard to the port */ +#define MPO_QLIMIT 0x02 /* Set qlimit for the port msg queue */ +#define MPO_TEMPOWNER 0x04 /* Set the tempowner bit of the port */ +#define MPO_IMPORTANCE_RECEIVER 0x08 /* Mark the port as importance receiver */ +#define MPO_INSERT_SEND_RIGHT 0x10 /* Insert a send right for the port */ +#define MPO_STRICT 0x20 /* Apply strict guarding for port */ + +/* + * Structure to define optional attributes for a newly + * constructed port. + */ +typedef struct mach_port_options { + uint32_t flags; /* Flags defining attributes for port */ + mach_port_limits_t mpl; /* Message queue limit for port */ + uint64_t reserved[2]; /* Reserved */ +}mach_port_options_t; + +typedef mach_port_options_t *mach_port_options_ptr_t; + +/* + * EXC_GUARD represents a guard violation for both + * mach ports and file descriptors. GUARD_TYPE_ is used + * to differentiate among them. + */ +#define GUARD_TYPE_MACH_PORT 0x1 + +/* Reasons for exception for a guarded mach port */ +enum mach_port_guard_exception_codes { + kGUARD_EXC_DESTROY = 1u << 0, + kGUARD_EXC_MOD_REFS = 1u << 1, + kGUARD_EXC_SET_CONTEXT = 1u << 2, + kGUARD_EXC_UNGUARDED = 1u << 3, + kGUARD_EXC_INCORRECT_GUARD = 1u << 4 +}; + #if !__DARWIN_UNIX03 && !defined(_NO_PORT_T_FROM_MACH) /* * Mach 3.0 renamed everything to have mach_ in front of it. diff --git a/osfmk/mach/shared_region.h b/osfmk/mach/shared_region.h index 2297ba7b7..845cccc1d 100644 --- a/osfmk/mach/shared_region.h +++ b/osfmk/mach/shared_region.h @@ -63,14 +63,13 @@ #define SHARED_REGION_NESTING_MIN_PPC64 0x0000000010000000ULL #define SHARED_REGION_NESTING_MAX_PPC64 0x0000000010000000ULL -#define SHARED_REGION_BASE_ARM 0x30000000ULL -#define SHARED_REGION_SIZE_ARM 0x10000000ULL -#define SHARED_REGION_NESTING_BASE_ARM 0x30000000ULL -#define SHARED_REGION_NESTING_SIZE_ARM 0x10000000ULL +#define SHARED_REGION_BASE_ARM 0x2C000000ULL +#define SHARED_REGION_SIZE_ARM 0x14000000ULL +#define SHARED_REGION_NESTING_BASE_ARM 0x2C000000ULL +#define SHARED_REGION_NESTING_SIZE_ARM 0x14000000ULL #define SHARED_REGION_NESTING_MIN_ARM ? #define SHARED_REGION_NESTING_MAX_ARM ? - #if defined(__i386__) #define SHARED_REGION_BASE SHARED_REGION_BASE_I386 #define SHARED_REGION_SIZE SHARED_REGION_SIZE_I386 diff --git a/osfmk/mach/syscall_sw.h b/osfmk/mach/syscall_sw.h index a3ef52610..42ccd6440 100644 --- a/osfmk/mach/syscall_sw.h +++ b/osfmk/mach/syscall_sw.h @@ -86,6 +86,7 @@ kernel_trap(_kernelrpc_mach_vm_allocate_trap,-10,5) /* 4 args, +1 for mach_vm_size_t */ kernel_trap(_kernelrpc_mach_vm_deallocate_trap,-12,5) /* 3 args, +2 for mach_vm_size_t and mach_vm_address_t */ kernel_trap(_kernelrpc_mach_vm_protect_trap,-14,7) /* 5 args, +2 for mach_vm_address_t and mach_vm_size_t */ +kernel_trap(_kernelrpc_mach_vm_map_trap,-15,9) kernel_trap(_kernelrpc_mach_port_allocate_trap,-16,3) kernel_trap(_kernelrpc_mach_port_destroy_trap,-17,2) kernel_trap(_kernelrpc_mach_port_deallocate_trap,-18,2) @@ -94,6 +95,8 @@ kernel_trap(_kernelrpc_mach_port_move_member_trap,-20,3) kernel_trap(_kernelrpc_mach_port_insert_right_trap,-21,4) kernel_trap(_kernelrpc_mach_port_insert_member_trap,-22,3) kernel_trap(_kernelrpc_mach_port_extract_member_trap,-23,3) +kernel_trap(_kernelrpc_mach_port_construct_trap,-24,5) +kernel_trap(_kernelrpc_mach_port_destruct_trap,-25,5) kernel_trap(mach_reply_port,-26,0) kernel_trap(thread_self_trap,-27,0) @@ -110,6 +113,8 @@ kernel_trap(semaphore_wait_signal_trap,-37,2) kernel_trap(semaphore_timedwait_trap,-38,3) kernel_trap(semaphore_timedwait_signal_trap,-39,4) +kernel_trap(_kernelrpc_mach_port_guard_trap,-41,5) +kernel_trap(_kernelrpc_mach_port_unguard_trap,-42,4) #if !defined(__LP64__) && !defined(__arm__) kernel_trap(map_fd,-43,5) #endif /*!defined(__LP64__) && !defined(__arm__) */ diff --git a/osfmk/mach/task.defs b/osfmk/mach/task.defs index 0c70e9aef..ca6660b0f 100644 --- a/osfmk/mach/task.defs +++ b/osfmk/mach/task.defs @@ -242,8 +242,7 @@ routine task_swap_exception_ports( out old_flavors : exception_flavor_array_t, SameCount); /* - * Create and destroy lock_set and semaphore synchronizers on a - * per-task basis (i.e. the task owns them). + * OBSOLETE interface. */ routine lock_set_create( task : task_t; @@ -251,10 +250,18 @@ routine lock_set_create( n_ulocks : int; policy : int); +/* + * OBSOLETE interface. + */ routine lock_set_destroy( task : task_t; lock_set : lock_set_t); +/* + * Create and destroy semaphore synchronizers on a + * per-task basis (i.e. the task owns them). + */ + routine semaphore_create( task : task_t; out semaphore : semaphore_t; @@ -410,4 +417,23 @@ routine task_set_state( flavor : thread_state_flavor_t; new_state : thread_state_t); +/* + * Change the task's physical footprint limit (in MB). + */ +routine task_set_phys_footprint_limit( + task : task_t; + new_limit : int; + out old_limit : int); + /* vim: set ft=c : */ + +routine task_suspend2( + target_task : task_t; + out suspend_token : task_suspension_token_t); + +routine task_resume2( + suspend_token : task_suspension_token_t); + +routine task_purgable_info( + task : task_t; + out stats : task_purgable_info_t); diff --git a/osfmk/mach/task_info.h b/osfmk/mach/task_info.h index 8a24624c0..8893c0a1e 100644 --- a/osfmk/mach/task_info.h +++ b/osfmk/mach/task_info.h @@ -261,23 +261,8 @@ typedef struct task_extmod_info *task_extmod_info_t; #define TASK_EXTMOD_INFO_COUNT \ (sizeof(task_extmod_info_data_t) / sizeof(natural_t)) -/* Always 64-bit in user and kernel */ -#define MACH_TASK_BASIC_INFO 20 /* always 64-bit basic info */ - -#define TASK_POWER_INFO 21 -struct task_power_info { - uint64_t total_user; - uint64_t total_system; - uint64_t task_interrupt_wakeups; - uint64_t task_platform_idle_wakeups; - uint64_t task_timer_wakeups_bin_1; - uint64_t task_timer_wakeups_bin_2; -}; -typedef struct task_power_info task_power_info_data_t; -typedef struct task_power_info *task_power_info_t; -#define TASK_POWER_INFO_COUNT ((mach_msg_type_number_t) \ - (sizeof (task_power_info_data_t) / sizeof (natural_t))) +#define MACH_TASK_BASIC_INFO 20 /* always 64-bit basic info */ struct mach_task_basic_info { mach_vm_size_t virtual_size; /* virtual memory size (bytes) */ mach_vm_size_t resident_size; /* resident memory size (bytes) */ @@ -294,6 +279,57 @@ typedef struct mach_task_basic_info *mach_task_basic_info_t; #define MACH_TASK_BASIC_INFO_COUNT \ (sizeof(mach_task_basic_info_data_t) / sizeof(natural_t)) + +#define TASK_POWER_INFO 21 + +struct task_power_info { + uint64_t total_user; + uint64_t total_system; + uint64_t task_interrupt_wakeups; + uint64_t task_platform_idle_wakeups; + uint64_t task_timer_wakeups_bin_1; + uint64_t task_timer_wakeups_bin_2; +}; + +typedef struct task_power_info task_power_info_data_t; +typedef struct task_power_info *task_power_info_t; +#define TASK_POWER_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof (task_power_info_data_t) / sizeof (natural_t))) + + + +#define TASK_VM_INFO 22 +#define TASK_VM_INFO_PURGEABLE 23 +struct task_vm_info { + mach_vm_size_t virtual_size; /* virtual memory size (bytes) */ + integer_t region_count; /* number of memory regions */ + integer_t page_size; + mach_vm_size_t resident_size; /* resident memory size (bytes) */ + mach_vm_size_t resident_size_peak; /* peak resident size (bytes) */ + + mach_vm_size_t device; + mach_vm_size_t device_peak; + mach_vm_size_t internal; + mach_vm_size_t internal_peak; + mach_vm_size_t external; + mach_vm_size_t external_peak; + mach_vm_size_t reusable; + mach_vm_size_t reusable_peak; + mach_vm_size_t purgeable_volatile_pmap; + mach_vm_size_t purgeable_volatile_resident; + mach_vm_size_t purgeable_volatile_virtual; + mach_vm_size_t compressed; + mach_vm_size_t compressed_peak; + mach_vm_size_t compressed_lifetime; +}; +typedef struct task_vm_info task_vm_info_data_t; +typedef struct task_vm_info *task_vm_info_t; +#define TASK_VM_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof (task_vm_info_data_t) / sizeof (natural_t))) + + +typedef struct vm_purgeable_info task_purgable_info_t; + /* * Obsolete interfaces. */ diff --git a/osfmk/mach/task_policy.h b/osfmk/mach/task_policy.h index 71d70526f..c05f6f1fd 100644 --- a/osfmk/mach/task_policy.h +++ b/osfmk/mach/task_policy.h @@ -103,6 +103,11 @@ kern_return_t task_policy_get( #define TASK_CATEGORY_POLICY 1 +#define TASK_SUPPRESSION_POLICY 3 +#define TASK_POLICY_STATE 4 +#define TASK_BASE_QOS_POLICY 8 +#define TASK_OVERRIDE_QOS_POLICY 9 + enum task_role { TASK_RENICED = -1, TASK_UNSPECIFIED = 0, @@ -115,7 +120,7 @@ enum task_role { TASK_DEFAULT_APPLICATION }; -typedef enum task_role task_role_t; +typedef integer_t task_role_t; struct task_category_policy { task_role_t role; @@ -127,4 +132,277 @@ typedef struct task_category_policy *task_category_policy_t; #define TASK_CATEGORY_POLICY_COUNT ((mach_msg_type_number_t) \ (sizeof (task_category_policy_data_t) / sizeof (integer_t))) + +enum task_latency_qos { + LATENCY_QOS_TIER_UNSPECIFIED = 0x0, + LATENCY_QOS_TIER_0 = ((0xFF<<16) | 1), + LATENCY_QOS_TIER_1 = ((0xFF<<16) | 2), + LATENCY_QOS_TIER_2 = ((0xFF<<16) | 3), + LATENCY_QOS_TIER_3 = ((0xFF<<16) | 4), + LATENCY_QOS_TIER_4 = ((0xFF<<16) | 5), + LATENCY_QOS_TIER_5 = ((0xFF<<16) | 6) +}; +typedef integer_t task_latency_qos_t; +enum task_throughput_qos { + THROUGHPUT_QOS_TIER_UNSPECIFIED = 0x0, + THROUGHPUT_QOS_TIER_0 = ((0xFE<<16) | 1), + THROUGHPUT_QOS_TIER_1 = ((0xFE<<16) | 2), + THROUGHPUT_QOS_TIER_2 = ((0xFE<<16) | 3), + THROUGHPUT_QOS_TIER_3 = ((0xFE<<16) | 4), + THROUGHPUT_QOS_TIER_4 = ((0xFE<<16) | 5), + THROUGHPUT_QOS_TIER_5 = ((0xFE<<16) | 6), +}; + +#define LATENCY_QOS_LAUNCH_DEFAULT_TIER LATENCY_QOS_TIER_3 +#define THROUGHPUT_QOS_LAUNCH_DEFAULT_TIER THROUGHPUT_QOS_TIER_3 + +typedef integer_t task_throughput_qos_t; + +struct task_qos_policy { + task_latency_qos_t task_latency_qos_tier; + task_throughput_qos_t task_throughput_qos_tier; +}; + +typedef struct task_qos_policy *task_qos_policy_t; +#define TASK_QOS_POLICY_COUNT ((mach_msg_type_number_t) \ + (sizeof (struct task_qos_policy) / sizeof (integer_t))) + +#define PROC_FLAG_DARWINBG 0x8000 /* process in darwin background */ +#define PROC_FLAG_EXT_DARWINBG 0x10000 /* process in darwin background - external enforcement */ +#define PROC_FLAG_IOS_APPLEDAEMON 0x20000 /* process is apple ios daemon */ +#define PROC_FLAG_IOS_IMPPROMOTION 0x80000 /* process is apple ios daemon */ +#define PROC_FLAG_ADAPTIVE 0x100000 /* Process is adaptive */ +#define PROC_FLAG_ADAPTIVE_IMPORTANT 0x200000 /* Process is adaptive, and is currently important */ +#define PROC_FLAG_IMPORTANCE_DONOR 0x400000 /* Process is marked as an importance donor */ +#define PROC_FLAG_SUPPRESSED 0x800000 /* Process is suppressed */ +#define PROC_FLAG_IOS_APPLICATION 0x1000000 /* Process is an application */ + +#ifdef MACH_KERNEL_PRIVATE + +struct task_requested_policy { + /* Task and thread policy (inherited) */ + uint64_t int_darwinbg :1, /* marked as darwinbg via setpriority */ + ext_darwinbg :1, + int_iotier :2, /* IO throttle tier */ + ext_iotier :2, + int_iopassive :1, /* should IOs cause lower tiers to be throttled */ + ext_iopassive :1, + bg_iotier :2, /* what IO throttle tier should apply to me when I'm darwinbg? (pushed to threads) */ + terminated :1, /* all throttles should be removed for quick exit or SIGTERM handling */ + + /* Thread only policy */ + th_pidbind_bg :1, /* thread only: task i'm bound to is marked 'watchbg' */ + th_workq_bg :1, /* thread only: currently running a background priority workqueue */ + + /* Task only policy */ + t_apptype :3, /* What apptype did launchd tell us this was (inherited) */ + t_boosted :1, /* Has a non-zero importance assertion count */ + t_int_gpu_deny :1, /* don't allow access to GPU */ + t_ext_gpu_deny :1, + t_role :3, /* task's system role */ + t_tal_enabled :1, /* TAL mode is enabled */ + t_base_latency_qos :3, /* Timer latency QoS */ + t_over_latency_qos :3, /* Timer latency QoS override */ + t_base_through_qos :3, /* Computation throughput QoS */ + t_over_through_qos :3, /* Computation throughput QoS override */ + + /* Task only: suppression policies (non-embedded only) */ + t_sup_active :1, /* Suppression is on */ + t_sup_lowpri_cpu :1, /* Wants low priority CPU (MAXPRI_THROTTLE) */ + t_sup_timer :3, /* Wanted timer throttling QoS tier */ + t_sup_disk :1, /* Wants disk throttling */ + t_sup_cpu_limit :1, /* Wants CPU limit (not hooked up yet)*/ + t_sup_suspend :1, /* Wants to be suspended */ + t_sup_throughput :3, /* Wants throughput QoS tier */ + t_sup_cpu :1, /* Wants suppressed CPU priority (MAXPRI_SUPPRESSED) */ + + reserved :17; +}; + +struct task_effective_policy { + /* Task and thread policy */ + uint64_t darwinbg :1, /* marked as 'background', and sockets are marked bg when created */ + lowpri_cpu :1, /* cpu priority == MAXPRI_THROTTLE */ + io_tier :2, /* effective throttle tier */ + io_passive :1, /* should IOs cause lower tiers to be throttled */ + all_sockets_bg :1, /* All existing sockets in process are marked as bg (thread: all created by thread) */ + new_sockets_bg :1, /* Newly created sockets should be marked as bg */ + bg_iotier :2, /* What throttle tier should I be in when darwinbg is set? */ + terminated :1, /* all throttles have been removed for quick exit or SIGTERM handling */ + + /* Task only policy */ + t_gpu_deny :1, /* not allowed to access GPU */ + t_tal_engaged :1, /* TAL mode is in effect */ + t_suspended :1, /* task_suspend-ed due to suppression */ + t_watchers_bg :1, /* watchers are BG-ed */ + t_latency_qos :3, /* Timer latency QoS level */ + t_through_qos :3, /* Computation throughput QoS level */ + t_sup_active :1, /* suppression behaviors are in effect */ + t_role :3, /* task's system role */ + t_suppressed_cpu :1, /* cpu priority == MAXPRI_SUPPRESSED (trumped by lowpri_cpu) */ + + reserved :39; +}; + +struct task_pended_policy { + uint64_t t_updating_policy :1, /* Busy bit for task to prevent concurrent 'complete' operations */ + + /* Task and thread policy */ + update_sockets :1, + + /* Task only policy */ + t_update_timers :1, + t_update_watchers :1, + + reserved :60; +}; + +extern const struct task_requested_policy default_task_requested_policy; +extern const struct task_effective_policy default_task_effective_policy; +extern const struct task_pended_policy default_task_pended_policy; + + +#endif /* MACH_KERNEL_PRIVATE */ + +#ifdef PRIVATE + +/* + * Control structure for applying suppression behaviors to tasks + */ +struct task_suppression_policy { + integer_t active; + integer_t lowpri_cpu; /* priority MAXPRI_THROTTLE cpu */ + integer_t timer_throttle; + integer_t disk_throttle; + integer_t cpu_limit; + integer_t suspend; + integer_t throughput_qos; + integer_t suppressed_cpu; /* priority MAXPRI_SUPPRESSED cpu */ + integer_t reserved[8]; +}; + +typedef struct task_suppression_policy *task_suppression_policy_t; + +#define TASK_SUPPRESSION_POLICY_COUNT ((mach_msg_type_number_t) \ + (sizeof (struct task_suppression_policy) / sizeof (integer_t))) + +struct task_policy_state { + uint64_t requested; + uint64_t effective; + uint64_t pending; + uint32_t imp_assertcnt; + uint32_t imp_externcnt; + uint64_t flags; + uint64_t reserved[3]; +}; + +typedef struct task_policy_state *task_policy_state_t; + +#define TASK_POLICY_STATE_COUNT ((mach_msg_type_number_t) \ + (sizeof (struct task_policy_state) / sizeof (integer_t))) + + +/* + * Definitions for munging and unmunging a policy struct + * Used in task_policy_state and in tracepoints + * + * Note: this is tightly bound to the implementation of task policy + * and the values exported through this API may change or change meaning at any time + * + * Do not rely on these values, especially apptype, to decide behaviors at runtime. + * + * All per-thread state must be in the first 32 bits of the bitfield. + */ + +#define TASK_APPTYPE_NONE 0 +#define TASK_APPTYPE_DAEMON_INTERACTIVE 1 +#define TASK_APPTYPE_DAEMON_STANDARD 2 +#define TASK_APPTYPE_DAEMON_ADAPTIVE 3 +#define TASK_APPTYPE_DAEMON_BACKGROUND 4 +#define TASK_APPTYPE_APP_DEFAULT 5 +#define TASK_APPTYPE_APP_TAL 6 + +/* task policy state flags */ +#define TASK_IMP_RECEIVER 0x00000001 +#define TASK_IMP_DONOR 0x00000002 + +/* requested_policy */ +#define POLICY_REQ_INT_DARWIN_BG 0x00000001 +#define POLICY_REQ_EXT_DARWIN_BG 0x00000002 +#define POLICY_REQ_INT_IO_TIER_MASK 0x0000000c /* 2 bits */ +#define POLICY_REQ_INT_IO_TIER_SHIFT 2 +#define POLICY_REQ_EXT_IO_TIER_MASK 0x00000030 /* 2 bits */ +#define POLICY_REQ_EXT_IO_TIER_SHIFT 4 +#define POLICY_REQ_INT_PASSIVE_IO 0x00000040 +#define POLICY_REQ_EXT_PASSIVE_IO 0x00000080 +#define POLICY_REQ_BG_IOTIER_MASK 0x00000300 /* 2 bits */ +#define POLICY_REQ_BG_IOTIER_SHIFT 8 +#define POLICY_REQ_PIDBIND_BG 0x00000400 +#define POLICY_REQ_WORKQ_BG 0x00000800 + +/* task_requested_policy */ +#define POLICY_REQ_TERMINATED 0x00001000 +#define POLICY_REQ_BOOSTED 0x00002000 +#define POLICY_REQ_INT_GPU_DENY 0x00004000 +#define POLICY_REQ_EXT_GPU_DENY 0x00008000 +#define POLICY_REQ_APPTYPE_MASK 0x00070000 /* 3 bits */ +#define POLICY_REQ_APPTYPE_SHIFT 16 +#define POLICY_REQ_BASE_LATENCY_QOS_MASK 0x00700000 /* 3 bits */ +#define POLICY_REQ_BASE_LATENCY_QOS_SHIFT 20 +#define POLICY_REQ_ROLE_MASK 0x07000000 /* 3 bits */ +#define POLICY_REQ_ROLE_SHIFT 24 +#define POLICY_REQ_TAL_ENABLED 0x40000000 + +/* requested suppression behaviors (note: clipped off in 32-bit tracepoints) */ +#define POLICY_REQ_SUP_ACTIVE 0x0000000100000000 +#define POLICY_REQ_SUP_LOWPRI_CPU 0x0000000200000000 +#define POLICY_REQ_SUP_CPU 0x0000000400000000 +#define POLICY_REQ_SUP_DISK_THROTTLE 0x0000003000000000 /* 2 bits */ +#define POLICY_REQ_SUP_CPU_LIMIT 0x0000004000000000 +#define POLICY_REQ_SUP_SUSPEND 0x0000008000000000 +#define POLICY_REQ_OVER_LATENCY_QOS_MASK 0x0000070000000000 /* 3 bits */ +#define POLICY_REQ_OVER_LATENCY_QOS_SHIFT 40 +#define POLICY_REQ_BASE_THROUGH_QOS_MASK 0x0000700000000000 /* 3 bits */ +#define POLICY_REQ_BASE_THROUGH_QOS_SHIFT 44 +#define POLICY_REQ_OVER_THROUGH_QOS_MASK 0x0007000000000000 /* 3 bits */ +#define POLICY_REQ_OVER_THROUGH_QOS_SHIFT 48 +#define POLICY_REQ_SUP_TIMER_THROTTLE_MASK 0x0070000000000000 /* 3 bits */ +#define POLICY_REQ_SUP_TIMER_THROTTLE_SHIFT 52 +#define POLICY_REQ_SUP_THROUGHPUT_MASK 0x0700000000000000 /* 3 bits */ +#define POLICY_REQ_SUP_THROUGHPUT_SHIFT 56 + +/* effective policy */ +#define POLICY_EFF_IO_TIER_MASK 0x00000003 /* 2 bits */ +#define POLICY_EFF_IO_TIER_SHIFT 0 +#define POLICY_EFF_IO_PASSIVE 0x00000008 +#define POLICY_EFF_DARWIN_BG 0x00000010 +#define POLICY_EFF_LOWPRI_CPU 0x00000020 +#define POLICY_EFF_ALL_SOCKETS_BG 0x00000040 +#define POLICY_EFF_NEW_SOCKETS_BG 0x00000080 +#define POLICY_EFF_BG_IOTIER_MASK 0x00000300 /* 2 bits */ +#define POLICY_EFF_BG_IOTIER_SHIFT 8 +#define POLICY_EFF_TERMINATED 0x00000400 + +/* task effective policy */ +#define POLICY_EFF_GPU_DENY 0x00001000 +#define POLICY_EFF_TAL_ENGAGED 0x00002000 +#define POLICY_EFF_SUSPENDED 0x00004000 +#define POLICY_EFF_WATCHERS_BG 0x00008000 +#define POLICY_EFF_LATENCY_QOS_MASK 0x00070000 /* 3 bits */ +#define POLICY_EFF_LATENCY_QOS_SHIFT 16 +#define POLICY_EFF_SUP_ACTIVE 0x00080000 +#define POLICY_EFF_ROLE_MASK 0x00700000 /* 3 bits */ +#define POLICY_EFF_ROLE_SHIFT 20 +#define POLICY_EFF_SUP_CPU 0x00800000 +#define POLICY_EFF_THROUGH_QOS_MASK 0x07000000 /* 3 bits */ +#define POLICY_EFF_THROUGH_QOS_SHIFT 24 + +/* pending policy */ +#define POLICY_PEND_UPDATING 0x00000001 +#define POLICY_PEND_SOCKETS 0x00000002 +#define POLICY_PEND_TIMERS 0x00000004 +#define POLICY_PEND_WATCHERS 0x00000008 + +#endif /* PRIVATE */ + #endif /* _MACH_TASK_POLICY_H_ */ diff --git a/osfmk/mach/telemetry_notification.defs b/osfmk/mach/telemetry_notification.defs new file mode 100644 index 000000000..4b9f8a590 --- /dev/null +++ b/osfmk/mach/telemetry_notification.defs @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2012, Apple Inc. All rights reserved. + */ + + /* + * Interface definition for the telemetry facility. + */ + +subsystem +#if KERNEL_USER + KernelUser +#endif /* KERNEL_USER */ + telemetry_notification 5100; + +#include +#include + +simpleroutine telemetry_notification( + RequestPort telemetry_port : mach_port_t; + in flags : uint32_t); diff --git a/osfmk/mach/thread_policy.h b/osfmk/mach/thread_policy.h index 607028837..a0662cae6 100644 --- a/osfmk/mach/thread_policy.h +++ b/osfmk/mach/thread_policy.h @@ -231,4 +231,28 @@ typedef struct thread_background_policy *thread_background_policy_t; #define THREAD_BACKGROUND_POLICY_COUNT ((mach_msg_type_number_t) \ (sizeof (thread_background_policy_data_t) / sizeof (integer_t))) + + +#ifdef PRIVATE + +/* + * THREAD_POLICY_STATE: + */ +#define THREAD_POLICY_STATE 6 + +struct thread_policy_state { + integer_t requested; + integer_t effective; + integer_t pending; + integer_t reserved[13]; +}; + +typedef struct thread_policy_state thread_policy_state_data_t; +typedef struct thread_policy_state *thread_policy_state_t; + +#define THREAD_POLICY_STATE_COUNT ((mach_msg_type_number_t) \ + (sizeof (thread_policy_state_data_t) / sizeof (integer_t))) + +#endif /* PRIVATE */ + #endif /* _MACH_THREAD_POLICY_H_ */ diff --git a/osfmk/mach/thread_switch.h b/osfmk/mach/thread_switch.h index f5558946a..abce09f6e 100644 --- a/osfmk/mach/thread_switch.h +++ b/osfmk/mach/thread_switch.h @@ -71,7 +71,14 @@ #define SWITCH_OPTION_NONE 0 #define SWITCH_OPTION_DEPRESS 1 #define SWITCH_OPTION_WAIT 2 +#ifdef PRIVATE +/* Workqueue should not consider thread blocked, and option_time is in us */ +#define SWITCH_OPTION_DISPATCH_CONTENTION 3 +/* Handoff to lock owner and temporarily grant matching IO throttling policy */ +#define SWITCH_OPTION_OSLOCK_DEPRESS 4 +#define SWITCH_OPTION_OSLOCK_WAIT 5 +#endif /* PRIVATE */ -#define valid_switch_option(opt) (0 <= (opt) && (opt) <= 2) +#define valid_switch_option(opt) (0 <= (opt) && (opt) <= 5) #endif /* _MACH_THREAD_SWITCH_H_ */ diff --git a/osfmk/mach/vm_map.defs b/osfmk/mach/vm_map.defs index 521d5886e..8ab1fcc4e 100644 --- a/osfmk/mach/vm_map.defs +++ b/osfmk/mach/vm_map.defs @@ -174,7 +174,7 @@ routine vm_inherit( * and must be a multiple of pages in extent. The * protection on the specified range must permit reading.] */ -routine vm_read( +routine PREFIX(vm_read) ( target_task : vm_map_t; address : vm_address_t; size : vm_size_t; @@ -283,7 +283,7 @@ routine vm_behavior_set( * The user-defined memory manager for this object is responsible * for further consistency.] */ -routine vm_map( +routine PREFIX(vm_map) ( target_task : vm_task_entry_t; inout address : vm_address_t; size : vm_size_t; @@ -311,7 +311,7 @@ routine vm_machine_attribute( /* * Map portion of a task's address space. */ -routine vm_remap( +routine PREFIX(vm_remap) ( target_task : vm_map_t; inout target_address : vm_address_t; size : vm_size_t; diff --git a/osfmk/mach/vm_param.h b/osfmk/mach/vm_param.h index 60e65de24..afb040eee 100644 --- a/osfmk/mach/vm_param.h +++ b/osfmk/mach/vm_param.h @@ -247,16 +247,15 @@ extern vm_offset_t vm_kernel_top; extern vm_offset_t vm_kernel_slide; extern vm_offset_t vm_kernel_addrperm; +extern vm_offset_t vm_kext_base; +extern vm_offset_t vm_kext_top; + #define VM_KERNEL_IS_SLID(_o) \ (((vm_offset_t)(_o) >= vm_kernel_base) && \ ((vm_offset_t)(_o) < vm_kernel_top)) -/* - * VM_KERNEL_IS_KEXT is platform-specific, defined in . - * Set default if undefined. - */ -#ifndef VM_KERNEL_IS_KEXT -#define VM_KERNEL_IS_KEXT(_o) (FALSE) -#endif +#define VM_KERNEL_IS_KEXT(_o) \ + (((vm_offset_t)(_o) >= vm_kext_base) && \ + ((vm_offset_t)(_o) < vm_kext_top)) #define VM_KERNEL_UNSLIDE(_v) \ ((VM_KERNEL_IS_SLID(_v) || \ VM_KERNEL_IS_KEXT(_v)) ? \ diff --git a/osfmk/mach/vm_purgable.h b/osfmk/mach/vm_purgable.h index 2d8ca3038..2fefb86e6 100644 --- a/osfmk/mach/vm_purgable.h +++ b/osfmk/mach/vm_purgable.h @@ -59,6 +59,10 @@ typedef int vm_purgable_t; #define VM_PURGABLE_GET_STATE ((vm_purgable_t) 1) /* get state of purgeable object */ #define VM_PURGABLE_PURGE_ALL ((vm_purgable_t) 2) /* purge all volatile objects now */ +#define VM_PURGABLE_NO_AGING_SHIFT 16 +#define VM_PURGABLE_NO_AGING_MASK (0x1 << VM_PURGABLE_NO_AGING_SHIFT) +#define VM_PURGABLE_NO_AGING (0x1 << VM_PURGABLE_NO_AGING_SHIFT) + #define VM_PURGABLE_DEBUG_SHIFT 12 #define VM_PURGABLE_DEBUG_MASK (0x3 << VM_PURGABLE_DEBUG_SHIFT) #define VM_PURGABLE_DEBUG_EMPTY (0x1 << VM_PURGABLE_DEBUG_SHIFT) @@ -71,7 +75,7 @@ typedef int vm_purgable_t; */ #define VM_VOLATILE_GROUP_SHIFT 8 #define VM_VOLATILE_GROUP_MASK (7 << VM_VOLATILE_GROUP_SHIFT) -#define VM_VOLATILE_GROUP_DEFAULT VM_VOLATILE_GROUP_7 +#define VM_VOLATILE_GROUP_DEFAULT VM_VOLATILE_GROUP_0 #define VM_VOLATILE_GROUP_0 (0 << VM_VOLATILE_GROUP_SHIFT) #define VM_VOLATILE_GROUP_1 (1 << VM_VOLATILE_GROUP_SHIFT) @@ -122,13 +126,14 @@ typedef int vm_purgable_t; #define VM_PURGABLE_NONVOLATILE 0 /* purgeable object is non-volatile */ #define VM_PURGABLE_VOLATILE 1 /* purgeable object is volatile */ -#define VM_PURGABLE_EMPTY 2 /* purgeable object is volatile and empty */ -#define VM_PURGABLE_DENY 3 /* (mark) object not purgeable */ +#define VM_PURGABLE_EMPTY 2 /* purgeable object is volatile and empty */ +#define VM_PURGABLE_DENY 3 /* (mark) object not purgeable */ #define VM_PURGABLE_ALL_MASKS (VM_PURGABLE_STATE_MASK | \ VM_VOLATILE_ORDER_MASK | \ VM_PURGABLE_ORDERING_MASK | \ VM_PURGABLE_BEHAVIOR_MASK | \ VM_VOLATILE_GROUP_MASK | \ - VM_PURGABLE_DEBUG_MASK) + VM_PURGABLE_DEBUG_MASK | \ + VM_PURGABLE_NO_AGING_MASK) #endif /* _MACH_VM_PURGABLE_H_ */ diff --git a/osfmk/mach/vm_region.h b/osfmk/mach/vm_region.h index ebc0e8d54..729c7d81f 100644 --- a/osfmk/mach/vm_region.h +++ b/osfmk/mach/vm_region.h @@ -117,8 +117,6 @@ typedef struct vm_region_basic_info vm_region_basic_info_data_t; #define VM_REGION_BASIC_INFO_COUNT ((mach_msg_type_number_t) \ (sizeof(vm_region_basic_info_data_t)/sizeof(int))) -#define VM_REGION_EXTENDED_INFO 11 - #define SM_COW 1 #define SM_PRIVATE 2 #define SM_EMPTY 3 @@ -136,7 +134,9 @@ typedef struct vm_region_basic_info vm_region_basic_info_data_t; * back. */ -struct vm_region_extended_info { +#if MACH_KERNEL_PRIVATE +#define VM_REGION_EXTENDED_INFO__legacy 11 +struct vm_region_extended_info__legacy { vm_prot_t protection; unsigned int user_tag; unsigned int pages_resident; @@ -147,13 +147,42 @@ struct vm_region_extended_info { unsigned short shadow_depth; unsigned char external_pager; unsigned char share_mode; + /* + * XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX + * DO NOT EXTEND THIS DATA STRUCTURE. + * IT IS NOW ABANDONED AND REPLACED WITH vm_region_extended_info BELOW. + * XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX + */ }; +#define VM_REGION_EXTENDED_INFO_COUNT__legacy \ + ((mach_msg_type_number_t) \ + (sizeof (struct vm_region_extended_info__legacy) / \ + sizeof (natural_t))) +#endif /* MACH_KERNEL_PRIVATE */ + + +#define VM_REGION_EXTENDED_INFO 13 +struct vm_region_extended_info { + vm_prot_t protection; + unsigned int user_tag; + unsigned int pages_resident; + unsigned int pages_shared_now_private; + unsigned int pages_swapped_out; + unsigned int pages_dirtied; + unsigned int ref_count; + unsigned short shadow_depth; + unsigned char external_pager; + unsigned char share_mode; + unsigned int pages_reusable; +}; typedef struct vm_region_extended_info *vm_region_extended_info_t; typedef struct vm_region_extended_info vm_region_extended_info_data_t; +#define VM_REGION_EXTENDED_INFO_COUNT \ + ((mach_msg_type_number_t) \ + (sizeof (vm_region_extended_info_data_t) / sizeof (natural_t))) + -#define VM_REGION_EXTENDED_INFO_COUNT ((mach_msg_type_number_t) \ - (sizeof(vm_region_extended_info_data_t)/sizeof(int))) #define VM_REGION_TOP_INFO 12 @@ -169,8 +198,9 @@ struct vm_region_top_info { typedef struct vm_region_top_info *vm_region_top_info_t; typedef struct vm_region_top_info vm_region_top_info_data_t; -#define VM_REGION_TOP_INFO_COUNT ((mach_msg_type_number_t) \ - (sizeof(vm_region_top_info_data_t)/sizeof(int))) +#define VM_REGION_TOP_INFO_COUNT \ + ((mach_msg_type_number_t) \ + (sizeof(vm_region_top_info_data_t) / sizeof(natural_t))) @@ -210,15 +240,16 @@ struct vm_region_submap_info { unsigned char share_mode; /* see enumeration */ boolean_t is_submap; /* submap vs obj */ vm_behavior_t behavior; /* access behavior hint */ - vm32_object_id_t object_id; /* obj/map name, not a handle */ + vm32_object_id_t object_id; /* obj/map name, not a handle */ unsigned short user_wired_count; }; typedef struct vm_region_submap_info *vm_region_submap_info_t; typedef struct vm_region_submap_info vm_region_submap_info_data_t; -#define VM_REGION_SUBMAP_INFO_COUNT ((mach_msg_type_number_t) \ - (sizeof(vm_region_submap_info_data_t)/sizeof(int))) +#define VM_REGION_SUBMAP_INFO_COUNT \ + ((mach_msg_type_number_t) \ + (sizeof(vm_region_submap_info_data_t) / sizeof(natural_t))) struct vm_region_submap_info_64 { vm_prot_t protection; /* present access protection */ @@ -236,15 +267,29 @@ struct vm_region_submap_info_64 { unsigned char share_mode; /* see enumeration */ boolean_t is_submap; /* submap vs obj */ vm_behavior_t behavior; /* access behavior hint */ - vm32_object_id_t object_id; /* obj/map name, not a handle */ - unsigned short user_wired_count; + vm32_object_id_t object_id; /* obj/map name, not a handle */ + unsigned short user_wired_count; + unsigned int pages_reusable; }; typedef struct vm_region_submap_info_64 *vm_region_submap_info_64_t; typedef struct vm_region_submap_info_64 vm_region_submap_info_data_64_t; -#define VM_REGION_SUBMAP_INFO_COUNT_64 ((mach_msg_type_number_t) \ - (sizeof(vm_region_submap_info_data_64_t)/sizeof(int))) +#define VM_REGION_SUBMAP_INFO_V1_SIZE \ + (sizeof (vm_region_submap_info_data_64_t)) +#define VM_REGION_SUBMAP_INFO_V0_SIZE \ + (VM_REGION_SUBMAP_INFO_V1_SIZE - \ + sizeof (unsigned int) /* pages_reusable */) + +#define VM_REGION_SUBMAP_INFO_V1_COUNT_64 \ + ((mach_msg_type_number_t) \ + (VM_REGION_SUBMAP_INFO_V1_SIZE / sizeof (natural_t))) +#define VM_REGION_SUBMAP_INFO_V0_COUNT_64 \ + ((mach_msg_type_number_t) \ + (VM_REGION_SUBMAP_INFO_V0_SIZE / sizeof (natural_t))) + +/* set this to the latest version */ +#define VM_REGION_SUBMAP_INFO_COUNT_64 VM_REGION_SUBMAP_INFO_V1_COUNT_64 struct vm_region_submap_short_info_64 { vm_prot_t protection; /* present access protection */ @@ -258,15 +303,17 @@ struct vm_region_submap_short_info_64 { unsigned char share_mode; /* see enumeration */ boolean_t is_submap; /* submap vs obj */ vm_behavior_t behavior; /* access behavior hint */ - vm32_object_id_t object_id; /* obj/map name, not a handle */ + vm32_object_id_t object_id; /* obj/map name, not a handle */ unsigned short user_wired_count; }; typedef struct vm_region_submap_short_info_64 *vm_region_submap_short_info_64_t; typedef struct vm_region_submap_short_info_64 vm_region_submap_short_info_data_64_t; -#define VM_REGION_SUBMAP_SHORT_INFO_COUNT_64 ((mach_msg_type_number_t) \ - (sizeof(vm_region_submap_short_info_data_64_t)/sizeof(int))) +#define VM_REGION_SUBMAP_SHORT_INFO_COUNT_64 \ + ((mach_msg_type_number_t) \ + (sizeof (vm_region_submap_short_info_data_64_t) / sizeof (natural_t))) + struct mach_vm_read_entry { diff --git a/osfmk/mach/vm_statistics.h b/osfmk/mach/vm_statistics.h index 487549894..cf89a6471 100644 --- a/osfmk/mach/vm_statistics.h +++ b/osfmk/mach/vm_statistics.h @@ -142,12 +142,8 @@ struct vm_statistics64 { uint64_t cow_faults; /* # of copy-on-writes */ uint64_t lookups; /* object cache lookups */ uint64_t hits; /* object cache hits */ - - /* added for rev1 */ uint64_t purges; /* # of pages purged */ natural_t purgeable_count; /* # of pages purgeable */ - - /* added for rev2 */ /* * NB: speculative pages are already accounted for in "free_count", * so "speculative_count" is the number of "free" pages that are @@ -156,6 +152,16 @@ struct vm_statistics64 { */ natural_t speculative_count; /* # of pages speculative */ + /* added for rev1 */ + uint64_t decompressions; /* # of pages decompressed */ + uint64_t compressions; /* # of pages compressed */ + uint64_t swapins; /* # of pages swapped in (via compression segments) */ + uint64_t swapouts; /* # of pages swapped out (via compression segments) */ + natural_t compressor_page_count; /* # of pages used by the compressed pager to hold all the compressed data */ + natural_t throttled_count; /* # of pages throttled */ + natural_t external_page_count; /* # of pages that are file-backed (non-swap) */ + natural_t internal_page_count; /* # of pages that are anonymous */ + uint64_t total_uncompressed_pages_in_compressor; /* # of pages (uncompressed) held within the compressor. */ } __attribute__((aligned(8))); typedef struct vm_statistics64 *vm_statistics64_t; @@ -191,6 +197,18 @@ struct vm_extmod_statistics { typedef struct vm_extmod_statistics *vm_extmod_statistics_t; typedef struct vm_extmod_statistics vm_extmod_statistics_data_t; +typedef struct vm_purgeable_stat { + uint64_t count; + uint64_t size; +}vm_purgeable_stat_t; + +struct vm_purgeable_info { + vm_purgeable_stat_t fifo_data[8]; + vm_purgeable_stat_t obsolete_data; + vm_purgeable_stat_t lifo_data[8]; +}; + +typedef struct vm_purgeable_info *vm_purgeable_info_t; /* included for the vm_map_page_query call */ @@ -218,10 +236,29 @@ struct pmap_statistics { integer_t resident_count; /* # of pages mapped (total)*/ integer_t resident_max; /* # of pages mapped (peak) */ integer_t wired_count; /* # of pages wired */ + + integer_t device; + integer_t device_peak; + integer_t internal; + integer_t internal_peak; + integer_t external; + integer_t external_peak; + integer_t reusable; + integer_t reusable_peak; + uint64_t compressed; + uint64_t compressed_peak; + uint64_t compressed_lifetime; }; typedef struct pmap_statistics *pmap_statistics_t; +#define PMAP_STATS_PEAK(field) \ + MACRO_BEGIN \ + if (field > field##_peak) { \ + field##_peak = field; \ + } \ + MACRO_END + #endif /* MACH_KERNEL_PRIVATE */ /* @@ -269,7 +306,7 @@ typedef struct pmap_statistics *pmap_statistics_t; #define VM_FLAGS_NO_PMAP_CHECK 0x8000 /* do not check that pmap is empty */ #define VM_FLAGS_MAP_JIT 0x80000 /* Used to mark an entry as describing a JIT region */ #endif /* KERNEL_PRIVATE */ - +#define VM_FLAGS_RETURN_DATA_ADDR 0x100000 /* Return address of target data, rather than base of page */ /* * VM_FLAGS_SUPERPAGE_MASK * 3 bits that specify whether large pages should be used instead of @@ -302,10 +339,11 @@ typedef struct pmap_statistics *pmap_statistics_t; VM_FLAGS_OVERWRITE | \ VM_FLAGS_SUPERPAGE_MASK | \ VM_FLAGS_ALIAS_MASK) -#define VM_FLAGS_USER_MAP VM_FLAGS_USER_ALLOCATE +#define VM_FLAGS_USER_MAP (VM_FLAGS_USER_ALLOCATE | VM_FLAGS_RETURN_DATA_ADDR) #define VM_FLAGS_USER_REMAP (VM_FLAGS_FIXED | \ VM_FLAGS_ANYWHERE | \ - VM_FLAGS_OVERWRITE) + VM_FLAGS_OVERWRITE| \ + VM_FLAGS_RETURN_DATA_ADDR) #define VM_MEMORY_MALLOC 1 #define VM_MEMORY_MALLOC_SMALL 2 @@ -319,6 +357,8 @@ typedef struct pmap_statistics *pmap_statistics_t; #define VM_MEMORY_ANALYSIS_TOOL 10 +#define VM_MEMORY_MALLOC_NANO 11 + #define VM_MEMORY_MACH_MSG 20 #define VM_MEMORY_IOKIT 21 #define VM_MEMORY_STACK 30 @@ -340,6 +380,8 @@ typedef struct pmap_statistics *pmap_statistics_t; #define VM_MEMORY_CORESERVICES 43 #define VM_MEMORY_CARBON VM_MEMORY_CORESERVICES #define VM_MEMORY_JAVA 44 +#define VM_MEMORY_COREDATA 45 +#define VM_MEMORY_COREDATA_OBJECTIDS 46 #define VM_MEMORY_ATS 50 #define VM_MEMORY_LAYERKIT 51 #define VM_MEMORY_CGIMAGE 52 @@ -395,6 +437,18 @@ typedef struct pmap_statistics *pmap_statistics_t; /* assetsd / MobileSlideShow memory */ #define VM_MEMORY_ASSETSD 72 +/* libsystem_kernel os_once_alloc */ +#define VM_MEMORY_OS_ALLOC_ONCE 73 + +/* libdispatch internal allocator */ +#define VM_MEMORY_LIBDISPATCH 74 + +/* Accelerate.framework image backing stores */ +#define VM_MEMORY_ACCELERATE 75 + +/* CoreUI image block data */ +#define VM_MEMORY_COREUI 76 + /* Reserve 240-255 for application */ #define VM_MEMORY_APPLICATION_SPECIFIC_1 240 #define VM_MEMORY_APPLICATION_SPECIFIC_16 255 diff --git a/osfmk/mach_debug/Makefile b/osfmk/mach_debug/Makefile index 6ddb140a6..d0cbf88d6 100644 --- a/osfmk/mach_debug/Makefile +++ b/osfmk/mach_debug/Makefile @@ -3,7 +3,6 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - include $(MakeInc_cmd) include $(MakeInc_def) @@ -23,8 +22,6 @@ EXPORT_MI_LIST = ${DATAFILES} EXPORT_MI_DIR = mach_debug -COMP_FILES = - include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/osfmk/mach_debug/template.mk b/osfmk/mach_debug/template.mk deleted file mode 100644 index 7673ffc01..000000000 --- a/osfmk/mach_debug/template.mk +++ /dev/null @@ -1,59 +0,0 @@ -# -# @OSF_COPYRIGHT@ -# -# -# HISTORY -# -# Revision 1.1.1.1 1998/09/22 21:05:45 wsanchez -# Import of Mac OS X kernel (~semeria) -# -# Revision 1.1.1.1 1998/03/07 02:26:17 wsanchez -# Import of OSF Mach kernel (~mburg) -# -# Revision 1.1.8.2 1994/09/23 02:46:03 ezf -# change marker to not FREE -# [1994/09/22 21:44:25 ezf] -# -# Revision 1.1.8.1 1994/06/13 19:58:36 dlb -# Merge MK6 and NMK17 -# [1994/06/13 16:22:58 dlb] -# -# Revision 1.1.6.1 1994/03/07 16:41:51 paire -# Added MIGKSARGS and MIGKSENV variables to MIGKSFLAGS definition. -# [94/02/28 paire] -# -# Revision 1.1.2.2 1993/08/04 19:32:37 gm -# CR9605: Add SUBDIRS to mach_kernel build process. -# [1993/08/03 13:30:22 gm] -# -# $EndLog$ - -VPATH = ..:../.. - -MIGFLAGS = -MD ${IDENT} -MIGKSFLAGS = -DKERNEL_SERVER ${MIGKSARGS} ${MIGKSENV} - -MACH_DEBUG_FILES = mach_debug_server.h mach_debug_server.c - -OTHERS = ${MACH_DEBUG_FILES} - -INCFLAGS = -I.. -I../.. -MDINCFLAGS = -I.. -I../.. - -DEPENDENCIES = - -.include <${RULES_MK}> - -.ORDER: ${MACH_DEBUG_FILES} - -${MACH_DEBUG_FILES}: mach_debug/mach_debug.defs - ${_MIG_} ${_MIGFLAGS_} ${MIGKSFLAGS} \ - -header /dev/null \ - -user /dev/null \ - -sheader mach_debug_server.h \ - -server mach_debug_server.c \ - ${mach_debug/mach_debug.defs:P} - -.if exists(depend.mk) -.include "depend.mk" -.endif diff --git a/osfmk/machine/Makefile b/osfmk/machine/Makefile index ca45cc170..82e05c4d8 100644 --- a/osfmk/machine/Makefile +++ b/osfmk/machine/Makefile @@ -14,8 +14,9 @@ DATAFILES = \ io_map_entries.h \ lock.h \ locks.h \ - machine_cpuid.h \ + machine_cpuid.h \ machine_routines.h \ + machine_kpc.h \ pal_routines.h \ pal_hibernate.h \ simple_lock.h diff --git a/osfmk/machine/machine_kpc.h b/osfmk/machine/machine_kpc.h new file mode 100644 index 000000000..de9593c3f --- /dev/null +++ b/osfmk/machine/machine_kpc.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _MACHINE_MACHINE_KPC_H +#define _MACHINE_MACHINE_KPC_H + +#if defined (__x86_64__) +#include "x86_64/machine_kpc.h" +#else +#error architecture not supported +#endif + +#endif /* _MACHINE_MACHINE_KPC_H */ diff --git a/osfmk/man/DMN_port_deleted.html b/osfmk/man/DMN_port_deleted.html old mode 100755 new mode 100644 diff --git a/osfmk/man/DMN_port_destroyed.html b/osfmk/man/DMN_port_destroyed.html old mode 100755 new mode 100644 diff --git a/osfmk/man/DP_backing_store_create.html b/osfmk/man/DP_backing_store_create.html old mode 100755 new mode 100644 diff --git a/osfmk/man/DP_backing_store_delete.html b/osfmk/man/DP_backing_store_delete.html old mode 100755 new mode 100644 diff --git a/osfmk/man/DP_backing_store_info.html b/osfmk/man/DP_backing_store_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/DP_object_create.html b/osfmk/man/DP_object_create.html old mode 100755 new mode 100644 diff --git a/osfmk/man/DR_overwrite_async.html b/osfmk/man/DR_overwrite_async.html old mode 100755 new mode 100644 diff --git a/osfmk/man/HD_memory_manager.html b/osfmk/man/HD_memory_manager.html old mode 100755 new mode 100644 diff --git a/osfmk/man/MO_SY_completed.html b/osfmk/man/MO_SY_completed.html old mode 100755 new mode 100644 diff --git a/osfmk/man/MO_change_attributes.html b/osfmk/man/MO_change_attributes.html old mode 100755 new mode 100644 diff --git a/osfmk/man/MO_change_completed.html b/osfmk/man/MO_change_completed.html old mode 100755 new mode 100644 diff --git a/osfmk/man/MO_data_initialize.html b/osfmk/man/MO_data_initialize.html old mode 100755 new mode 100644 diff --git a/osfmk/man/MO_data_unavailable.html b/osfmk/man/MO_data_unavailable.html old mode 100755 new mode 100644 diff --git a/osfmk/man/MO_default_server.html b/osfmk/man/MO_default_server.html old mode 100755 new mode 100644 diff --git a/osfmk/man/MO_get_attributes.html b/osfmk/man/MO_get_attributes.html old mode 100755 new mode 100644 diff --git a/osfmk/man/MO_lock_completed.html b/osfmk/man/MO_lock_completed.html old mode 100755 new mode 100644 diff --git a/osfmk/man/MO_supply_completed.html b/osfmk/man/MO_supply_completed.html old mode 100755 new mode 100644 diff --git a/osfmk/man/MP_allocate_subsystem.html b/osfmk/man/MP_allocate_subsystem.html old mode 100755 new mode 100644 diff --git a/osfmk/man/MP_request_notification.html b/osfmk/man/MP_request_notification.html old mode 100755 new mode 100644 diff --git a/osfmk/man/P_set_policy_control.html b/osfmk/man/P_set_policy_control.html old mode 100755 new mode 100644 diff --git a/osfmk/man/P_set_policy_disable.html b/osfmk/man/P_set_policy_disable.html old mode 100755 new mode 100644 diff --git a/osfmk/man/P_set_policy_enable.html b/osfmk/man/P_set_policy_enable.html old mode 100755 new mode 100644 diff --git a/osfmk/man/SMO_default_server.html b/osfmk/man/SMO_default_server.html old mode 100755 new mode 100644 diff --git a/osfmk/man/SMO_server.html b/osfmk/man/SMO_server.html old mode 100755 new mode 100644 diff --git a/osfmk/man/TS_exception_ports.html b/osfmk/man/TS_exception_ports.html old mode 100755 new mode 100644 diff --git a/osfmk/man/VSD_memory_manager.html b/osfmk/man/VSD_memory_manager.html old mode 100755 new mode 100644 diff --git a/osfmk/man/bootstrap_arguments.html b/osfmk/man/bootstrap_arguments.html old mode 100755 new mode 100644 diff --git a/osfmk/man/bootstrap_completed.html b/osfmk/man/bootstrap_completed.html old mode 100755 new mode 100644 diff --git a/osfmk/man/bootstrap_environment.html b/osfmk/man/bootstrap_environment.html old mode 100755 new mode 100644 diff --git a/osfmk/man/bootstrap_ports.html b/osfmk/man/bootstrap_ports.html old mode 100755 new mode 100644 diff --git a/osfmk/man/catch_exception_raise.html b/osfmk/man/catch_exception_raise.html old mode 100755 new mode 100644 diff --git a/osfmk/man/clock_alarm.html b/osfmk/man/clock_alarm.html old mode 100755 new mode 100644 diff --git a/osfmk/man/clock_alarm_reply.html b/osfmk/man/clock_alarm_reply.html old mode 100755 new mode 100644 diff --git a/osfmk/man/clock_get_attributes.html b/osfmk/man/clock_get_attributes.html old mode 100755 new mode 100644 diff --git a/osfmk/man/clock_get_time.html b/osfmk/man/clock_get_time.html old mode 100755 new mode 100644 diff --git a/osfmk/man/clock_map_time.html b/osfmk/man/clock_map_time.html old mode 100755 new mode 100644 diff --git a/osfmk/man/clock_reply_server.html b/osfmk/man/clock_reply_server.html old mode 100755 new mode 100644 diff --git a/osfmk/man/clock_set_attributes.html b/osfmk/man/clock_set_attributes.html old mode 100755 new mode 100644 diff --git a/osfmk/man/clock_set_time.html b/osfmk/man/clock_set_time.html old mode 100755 new mode 100644 diff --git a/osfmk/man/clock_sleep.html b/osfmk/man/clock_sleep.html old mode 100755 new mode 100644 diff --git a/osfmk/man/default_pager_add_segment.html b/osfmk/man/default_pager_add_segment.html old mode 100755 new mode 100644 diff --git a/osfmk/man/default_pager_info.html b/osfmk/man/default_pager_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/device_close.html b/osfmk/man/device_close.html old mode 100755 new mode 100644 diff --git a/osfmk/man/device_get_status.html b/osfmk/man/device_get_status.html old mode 100755 new mode 100644 diff --git a/osfmk/man/device_map.html b/osfmk/man/device_map.html old mode 100755 new mode 100644 diff --git a/osfmk/man/device_open.html b/osfmk/man/device_open.html old mode 100755 new mode 100644 diff --git a/osfmk/man/device_read.html b/osfmk/man/device_read.html old mode 100755 new mode 100644 diff --git a/osfmk/man/device_read_async.html b/osfmk/man/device_read_async.html old mode 100755 new mode 100644 diff --git a/osfmk/man/device_read_async_inband.html b/osfmk/man/device_read_async_inband.html old mode 100755 new mode 100644 diff --git a/osfmk/man/device_read_inband.html b/osfmk/man/device_read_inband.html old mode 100755 new mode 100644 diff --git a/osfmk/man/device_read_overwrite.html b/osfmk/man/device_read_overwrite.html old mode 100755 new mode 100644 diff --git a/osfmk/man/device_reply_server.html b/osfmk/man/device_reply_server.html old mode 100755 new mode 100644 diff --git a/osfmk/man/device_set_filter.html b/osfmk/man/device_set_filter.html old mode 100755 new mode 100644 diff --git a/osfmk/man/device_set_status.html b/osfmk/man/device_set_status.html old mode 100755 new mode 100644 diff --git a/osfmk/man/device_write.html b/osfmk/man/device_write.html old mode 100755 new mode 100644 diff --git a/osfmk/man/device_write_async.html b/osfmk/man/device_write_async.html old mode 100755 new mode 100644 diff --git a/osfmk/man/device_write_async_inband.html b/osfmk/man/device_write_async_inband.html old mode 100755 new mode 100644 diff --git a/osfmk/man/device_write_inband.html b/osfmk/man/device_write_inband.html old mode 100755 new mode 100644 diff --git a/osfmk/man/do_mach_notify_dead_name.html b/osfmk/man/do_mach_notify_dead_name.html old mode 100755 new mode 100644 diff --git a/osfmk/man/do_mach_notify_no_senders.html b/osfmk/man/do_mach_notify_no_senders.html old mode 100755 new mode 100644 diff --git a/osfmk/man/do_mach_notify_send_once.html b/osfmk/man/do_mach_notify_send_once.html old mode 100755 new mode 100644 diff --git a/osfmk/man/etap_get_info.html b/osfmk/man/etap_get_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/etap_probe.html b/osfmk/man/etap_probe.html old mode 100755 new mode 100644 diff --git a/osfmk/man/etap_trace_event.html b/osfmk/man/etap_trace_event.html old mode 100755 new mode 100644 diff --git a/osfmk/man/etap_trace_thread.html b/osfmk/man/etap_trace_thread.html old mode 100755 new mode 100644 diff --git a/osfmk/man/evc_wait.html b/osfmk/man/evc_wait.html old mode 100755 new mode 100644 diff --git a/osfmk/man/exc_server.html b/osfmk/man/exc_server.html old mode 100755 new mode 100644 diff --git a/osfmk/man/host_adjust_time.html b/osfmk/man/host_adjust_time.html old mode 100755 new mode 100644 diff --git a/osfmk/man/host_basic_info.html b/osfmk/man/host_basic_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/host_get_boot_info.html b/osfmk/man/host_get_boot_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/host_get_clock_control.html b/osfmk/man/host_get_clock_control.html old mode 100755 new mode 100644 diff --git a/osfmk/man/host_get_clock_service.html b/osfmk/man/host_get_clock_service.html old mode 100755 new mode 100644 diff --git a/osfmk/man/host_get_time.html b/osfmk/man/host_get_time.html old mode 100755 new mode 100644 diff --git a/osfmk/man/host_info.html b/osfmk/man/host_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/host_kernel_version.html b/osfmk/man/host_kernel_version.html old mode 100755 new mode 100644 diff --git a/osfmk/man/host_load_info.html b/osfmk/man/host_load_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/host_page_size.html b/osfmk/man/host_page_size.html old mode 100755 new mode 100644 diff --git a/osfmk/man/host_processor_set_priv.html b/osfmk/man/host_processor_set_priv.html old mode 100755 new mode 100644 diff --git a/osfmk/man/host_processor_sets.html b/osfmk/man/host_processor_sets.html old mode 100755 new mode 100644 diff --git a/osfmk/man/host_processor_slots.html b/osfmk/man/host_processor_slots.html old mode 100755 new mode 100644 diff --git a/osfmk/man/host_processors.html b/osfmk/man/host_processors.html old mode 100755 new mode 100644 diff --git a/osfmk/man/host_reboot.html b/osfmk/man/host_reboot.html old mode 100755 new mode 100644 diff --git a/osfmk/man/host_sched_info.html b/osfmk/man/host_sched_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/host_security_create_task_token.html b/osfmk/man/host_security_create_task_token.html old mode 100755 new mode 100644 diff --git a/osfmk/man/host_security_set_task_token.html b/osfmk/man/host_security_set_task_token.html old mode 100755 new mode 100644 diff --git a/osfmk/man/host_set_time.html b/osfmk/man/host_set_time.html old mode 100755 new mode 100644 diff --git a/osfmk/man/host_statistics.html b/osfmk/man/host_statistics.html old mode 100755 new mode 100644 diff --git a/osfmk/man/i386_get_ldt.html b/osfmk/man/i386_get_ldt.html old mode 100755 new mode 100644 diff --git a/osfmk/man/i386_io_port_add.html b/osfmk/man/i386_io_port_add.html old mode 100755 new mode 100644 diff --git a/osfmk/man/i386_io_port_list.html b/osfmk/man/i386_io_port_list.html old mode 100755 new mode 100644 diff --git a/osfmk/man/i386_io_port_remove.html b/osfmk/man/i386_io_port_remove.html old mode 100755 new mode 100644 diff --git a/osfmk/man/i386_set_ldt.html b/osfmk/man/i386_set_ldt.html old mode 100755 new mode 100644 diff --git a/osfmk/man/index.html b/osfmk/man/index.html old mode 100755 new mode 100644 index 551c2e30d..2a9d0ff9e --- a/osfmk/man/index.html +++ b/osfmk/man/index.html @@ -415,13 +415,6 @@ added back in, something like these will likely be added. host_page_size - Returns the page size for the given host.
ledger_get_remote - Return send right to specified host's remote ledger port.
ledger_set_remote - Set this host's remote ledger port.
-norma_get_special_port - Returns a send right for a specified node-specific special port.
-norma_node_self - Return the node index of the current host.
-norma_port_location_hint - Guess a port's current location.
-norma_set_special_port - Set node-specific special port.
-norma_task_clone - Create a remote task that shares access to parent task's memory.
-norma_task_create - Create a remote task using task_create semantics.
-norma_task_teleport - "Clone" a task on a specified node.

diff --git a/osfmk/man/io_done_queue_create.html b/osfmk/man/io_done_queue_create.html old mode 100755 new mode 100644 diff --git a/osfmk/man/io_done_queue_terminate.html b/osfmk/man/io_done_queue_terminate.html old mode 100755 new mode 100644 diff --git a/osfmk/man/io_done_queue_wait.html b/osfmk/man/io_done_queue_wait.html old mode 100755 new mode 100644 diff --git a/osfmk/man/kernel_resource_sizes.html b/osfmk/man/kernel_resource_sizes.html old mode 100755 new mode 100644 diff --git a/osfmk/man/ledger_create.html b/osfmk/man/ledger_create.html old mode 100755 new mode 100644 diff --git a/osfmk/man/ledger_get_remote.html b/osfmk/man/ledger_get_remote.html old mode 100755 new mode 100644 diff --git a/osfmk/man/ledger_read.html b/osfmk/man/ledger_read.html old mode 100755 new mode 100644 diff --git a/osfmk/man/ledger_set_remote.html b/osfmk/man/ledger_set_remote.html old mode 100755 new mode 100644 diff --git a/osfmk/man/ledger_terminate.html b/osfmk/man/ledger_terminate.html old mode 100755 new mode 100644 diff --git a/osfmk/man/ledger_transfer.html b/osfmk/man/ledger_transfer.html old mode 100755 new mode 100644 diff --git a/osfmk/man/lock_acquire.html b/osfmk/man/lock_acquire.html old mode 100755 new mode 100644 diff --git a/osfmk/man/lock_handoff.html b/osfmk/man/lock_handoff.html old mode 100755 new mode 100644 diff --git a/osfmk/man/lock_handoff_accept.html b/osfmk/man/lock_handoff_accept.html old mode 100755 new mode 100644 diff --git a/osfmk/man/lock_make_stable.html b/osfmk/man/lock_make_stable.html old mode 100755 new mode 100644 diff --git a/osfmk/man/lock_release.html b/osfmk/man/lock_release.html old mode 100755 new mode 100644 diff --git a/osfmk/man/lock_set_create.html b/osfmk/man/lock_set_create.html old mode 100755 new mode 100644 diff --git a/osfmk/man/lock_set_destroy.html b/osfmk/man/lock_set_destroy.html old mode 100755 new mode 100644 diff --git a/osfmk/man/lock_try.html b/osfmk/man/lock_try.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_host_self.html b/osfmk/man/mach_host_self.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_msg.html b/osfmk/man/mach_msg.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_msg_descriptor.html b/osfmk/man/mach_msg_descriptor.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_msg_header.html b/osfmk/man/mach_msg_header.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_allocate.html b/osfmk/man/mach_port_allocate.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_allocate_full.html b/osfmk/man/mach_port_allocate_full.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_allocate_name.html b/osfmk/man/mach_port_allocate_name.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_allocate_qos.html b/osfmk/man/mach_port_allocate_qos.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_deallocate.html b/osfmk/man/mach_port_deallocate.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_destroy.html b/osfmk/man/mach_port_destroy.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_extract_member.html b/osfmk/man/mach_port_extract_member.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_extract_right.html b/osfmk/man/mach_port_extract_right.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_get_attributes.html b/osfmk/man/mach_port_get_attributes.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_get_refs.html b/osfmk/man/mach_port_get_refs.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_get_set_status.html b/osfmk/man/mach_port_get_set_status.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_insert_member.html b/osfmk/man/mach_port_insert_member.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_insert_right.html b/osfmk/man/mach_port_insert_right.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_limits.html b/osfmk/man/mach_port_limits.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_mod_refs.html b/osfmk/man/mach_port_mod_refs.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_move_member.html b/osfmk/man/mach_port_move_member.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_names.html b/osfmk/man/mach_port_names.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_qos.html b/osfmk/man/mach_port_qos.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_set_attributes.html b/osfmk/man/mach_port_set_attributes.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_set_mscount.html b/osfmk/man/mach_port_set_mscount.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_set_seqno.html b/osfmk/man/mach_port_set_seqno.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_status.html b/osfmk/man/mach_port_status.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_port_type.html b/osfmk/man/mach_port_type.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_ports_lookup.html b/osfmk/man/mach_ports_lookup.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_ports_register.html b/osfmk/man/mach_ports_register.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_reply_port.html b/osfmk/man/mach_reply_port.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_rpc_return_trap.html b/osfmk/man/mach_rpc_return_trap.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_rpc_trap.html b/osfmk/man/mach_rpc_trap.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_subsystem_create.html b/osfmk/man/mach_subsystem_create.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_task_self.html b/osfmk/man/mach_task_self.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mach_thread_self.html b/osfmk/man/mach_thread_self.html old mode 100755 new mode 100644 diff --git a/osfmk/man/mapped_tvalspec.html b/osfmk/man/mapped_tvalspec.html old mode 100755 new mode 100644 diff --git a/osfmk/man/memory_object_attr_info.html b/osfmk/man/memory_object_attr_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/memory_object_create.html b/osfmk/man/memory_object_create.html old mode 100755 new mode 100644 diff --git a/osfmk/man/memory_object_data_error.html b/osfmk/man/memory_object_data_error.html old mode 100755 new mode 100644 diff --git a/osfmk/man/memory_object_data_request.html b/osfmk/man/memory_object_data_request.html old mode 100755 new mode 100644 diff --git a/osfmk/man/memory_object_data_return.html b/osfmk/man/memory_object_data_return.html old mode 100755 new mode 100644 diff --git a/osfmk/man/memory_object_data_supply.html b/osfmk/man/memory_object_data_supply.html old mode 100755 new mode 100644 diff --git a/osfmk/man/memory_object_data_unlock.html b/osfmk/man/memory_object_data_unlock.html old mode 100755 new mode 100644 diff --git a/osfmk/man/memory_object_destroy.html b/osfmk/man/memory_object_destroy.html old mode 100755 new mode 100644 diff --git a/osfmk/man/memory_object_init.html b/osfmk/man/memory_object_init.html old mode 100755 new mode 100644 diff --git a/osfmk/man/memory_object_lock_request.html b/osfmk/man/memory_object_lock_request.html old mode 100755 new mode 100644 diff --git a/osfmk/man/memory_object_perf_info.html b/osfmk/man/memory_object_perf_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/memory_object_server.html b/osfmk/man/memory_object_server.html old mode 100755 new mode 100644 diff --git a/osfmk/man/memory_object_synchronize.html b/osfmk/man/memory_object_synchronize.html old mode 100755 new mode 100644 diff --git a/osfmk/man/memory_object_terminate.html b/osfmk/man/memory_object_terminate.html old mode 100755 new mode 100644 diff --git a/osfmk/man/norma_get_special_port.html b/osfmk/man/norma_get_special_port.html deleted file mode 100755 index 9641b66e5..000000000 --- a/osfmk/man/norma_get_special_port.html +++ /dev/null @@ -1,102 +0,0 @@ -

norma_get_special_port

-
-

-Function - Acquire a send right for a specified node-specific special port. -

SYNOPSIS

-
-kern_return_t   norma_get_special_port
-                (host_priv_t                          host_priv,
-                 int                                       node,
-                 int                                 which_port,
-                 mach_port_t                       special_port);
-
-
-

Macro forms:

-
-
-#include<mach/norma_special_ports.h>
-
-kern_return_t   norma_get_device_port
-                (host_priv_t                          host_priv,
-                 int                                       node,
-                 mach_port_t                       special_port);
-
-kern_return_t   norma_get_host_port
-                (host_priv_t                          host_priv,
-                 int                                       node,
-                 mach_port_t                       special_port);
-
-kern_return_t   norma_get_host_priv_port
-                (host_priv_t                          host_priv,
-                 int                                       node,
-                 mach_port_t                       special_port);
-
-kern_return_t   norma_get_nameserver_port
-                (host_priv_t                          host_priv,
-                 int                                       node,
-                 mach_port_t                       special_port);
-
-

PARAMETERS

-
-
host_priv -
-[in host-control send right] -The control port for the host for which to -return the special port's send right. -

-

node -
-[in scalar] -The index of the node for which the port is desired. -

-

which_port -
-[in scalar] -The index of the special port for which the send right is -requested. Valid values are: -
-

-

NORMA_DEVICE_PORT -
-[device-master send right] The device master port for the -node. -

-

NORMA_HOST_PORT -
-[host-name send right] The host name port for the node. -

-

NORMA_HOST_PRIV_PORT -
-[host-control send right] The host control port for the node. -

-

NORMA_NAMESERVER_PORT -
-[name-server send right] The registered name server port for -the node. -
-

-

special_port -
-[out norma-special send right] -The returned value for the port. -
-

DESCRIPTION

-

-The norma_get_special_port function returns a send -right for a special port belonging to node on host_priv. -

-Each node maintains a (small) set of node specific ports. The device master -port, host name, and host control ports are -maintained by the kernel. The kernel also permits a small set -of server specified -node specific ports; the name server port is an example and is given (by -convention) an assigned special port index. -

RETURN VALUES

-

-Only generic errors apply. -

RELATED INFORMATION

-

-Functions: -mach_host_self, -norma_get_special_port, -bootstrap_ports. diff --git a/osfmk/man/norma_node_self.html b/osfmk/man/norma_node_self.html deleted file mode 100755 index a41419cea..000000000 --- a/osfmk/man/norma_node_self.html +++ /dev/null @@ -1,32 +0,0 @@ -

norma_node_self

-
-

-Function - Return the node index of the current host. -

SYNOPSIS

-
-kern_return_t   norma_node_self
-                (host_t                                    host,
-                 int                                        int);
-
-

PARAMETERS

-
-

-

host -
-[in host send-right] Name of the host. -

-

node -
-[out scalar] Node index of the host. -
-

DESCRIPTION

-

-The norma_node_self function returns the node index of the current host. -

RETURN VALUES

-

-Only generic errors apply. -

RELATED INFORMATION

-

-Functions: -norma_task_create, -norma_task_clone, diff --git a/osfmk/man/norma_port_location_hint.html b/osfmk/man/norma_port_location_hint.html deleted file mode 100755 index f4c899ca1..000000000 --- a/osfmk/man/norma_port_location_hint.html +++ /dev/null @@ -1,44 +0,0 @@ -

norma_port_location_hint

-
-

-Function - Guess a port's current location. -

SYNOPSIS

-
-kern_return_t   norma_port_location_hint
-                (task_t                                    task,
-                 mach_port_t                               name,
-                 int                                       node);
-
-

PARAMETERS

-
-

-

task -
-[in task send right] -Task containing the right to locate -

-

name -
-[in scalar] -Name of the right to locate -

-

node -
-[out scalar] -Port location hint -
-

DESCRIPTION

-

-The norma_port_location_hint function returns the best -guess of name's -current location. The hint is guaranteed to be a node where -the port once was; it is -guaranteed to be accurate if port has never moved. This can be used to -determine residence node for hosts, tasks, threads, etc. -

RETURN VALUES

-

-Only generic errors apply. -

RELATED INFORMATION

-

-Functions: -norma_task_create. diff --git a/osfmk/man/norma_set_special_port.html b/osfmk/man/norma_set_special_port.html deleted file mode 100755 index f2d1f381d..000000000 --- a/osfmk/man/norma_set_special_port.html +++ /dev/null @@ -1,97 +0,0 @@ -

norma_set_special_port

-
-

-Function - Set node-specific special port. -

SYNOPSIS

-
-kern_return_t   norma_set_special_port
-                (host_priv_t                          host_priv,
-                 int                                 which_port,
-                 mach_port_t                       special_port);
-
-
-

Macro forms:

-
-
-#include<mach/norma_special_ports.h>
-
-kern_return_t   norma_set_device_port
-                (host_priv_t                          host_priv,
-                 mach_port_t                       special_port);
-
-kern_return_t   norma_set_host_port
-                (host_priv_t                          host_priv,
-                 mach_port_t                       special_port);
-
-kern_return_t   norma_set_host_priv_port
-                (host_priv_t                          host_priv,
-                 int                                       node,
-                 mach_port_t                       special_port);
-
-kern_return_t   norma_set_nameserver_port
-                (host_priv_t                          host_priv,
-                 mach_port_t                       special_port);
-
- -

PARAMETERS

-
-
host_priv -
-[in host-control send right] -The host for which to set the special port. -Currently, this must be the per-node host control port. -

-

node -
-[in scalar] -The index of the node for which the port is to be set. -

-

which_port -
-[in scalar] -The index of the special port to be set. Valid values are: -
-

-

NORMA_DEVICE_PORT -
-[device-master send right] The device master port for the -node. -

-

NORMA_HOST_PORT -
-[host-name send right] The host name port for the node. -

-

NORMA_HOST_PRIV_PORT -
-[host-control send right] The host control port for the node. -

-

NORMA_NAMESERVER_PORT -
-[name-server send right] The registered name server port for -the node. -
-

-

special_port -
-[in norma-special send right] -Send right to the new special port. -
-

DESCRIPTION

-

-The norma_set_special_port function sets the special -port belonging to node on host_priv. -

-Each node maintains a (small) set of node specific ports. The device master -port, host name, and host control ports are maintained by the kernel. -The kernel also permits -a small set of server specified -node specific ports; the name server port is an example and is given (by -convention) an assigned special port index. -

RETURN VALUES

-

-Only generic errors apply. -

RELATED INFORMATION

-

-Functions: -mach_host_self, -norma_get_special_port. diff --git a/osfmk/man/norma_task_clone.html b/osfmk/man/norma_task_clone.html deleted file mode 100755 index 737acb899..000000000 --- a/osfmk/man/norma_task_clone.html +++ /dev/null @@ -1,78 +0,0 @@ -

norma_task_clone

-
-

-Function - Create a remote task that shares access to parent task's memory regardless of inheritance attributes. -

SYNOPSIS

-
-kern_return_t   norma_task_clone
-                (task_t                             parent_task,
-                 boolean_t                       inherit_memory,
-                 int                                 child_node,
-                 task_t                              child_task);
-
-

PARAMETERS

-
-

-

parent_task -
-[in task send right] -The port for the task from which to draw the child -task's port rights, resource limits, and address space. -

-

inherit_memory -
-[in scalar] -Address space inheritance indicator. If true, the child task -inherits the address space of the parent task. If false, the kernel assigns -the child task an empty address space. -

-

child_node -
-[in scalar] -The node index of the node on which to create the child. -

-

child_task -
-[out task send right] -The kernel-assigned port name for the new task. -
-

DESCRIPTION

-

-The norma_task_clone function "clones" a new task from -parent_task on the specified node and returns the name -of the new task in child_task. The child -task acquires shared parts of the parent's -address space (see vm_inherit) -regardless of the inheritance set for the parent's memory regions, although the -inheritance for the child's regions will be set to that of the -parent's regions. The child -task initially contains no threads. -

-By way of comparison, tasks created by the standard task_create -primitive are created on the same node as the parent. -

-Other than being created on a different node, the new task has the same -properties as if created by task_create. -

NOTES

-

-This call differs from norma_task_create in that the -inheritance set for the -parent's memory regions is ignored; the child always shares memory with the -parent. -

-This call is intended to support process migration, where the inheritance -semantics of norma_task_create would break migrated -programs that depended upon -sharing relationships remaining after migration. -

-This call is not a true task migration call, in that it does -not migrate the port -space, threads, and other non-address-space attributes of the task. -

RETURN VALUES

-

-Only generic errors apply. -

RELATED INFORMATION

-

-Functions: -task_create, -norma_task_create. diff --git a/osfmk/man/norma_task_create.html b/osfmk/man/norma_task_create.html deleted file mode 100755 index 68158d866..000000000 --- a/osfmk/man/norma_task_create.html +++ /dev/null @@ -1,59 +0,0 @@ -

norma_task_create

-
-

-Function - Create a remote task using task_create semantics. -

SYNOPSIS

-
-kern_return_t   norma_task_create
-                (task_t                             parent_task,
-                 boolean_t                       inherit_memory,
-                 int                                 child_node,
-                 task_t                              child_task);
-
-

PARAMETERS

-
-

-

parent_task -
-[in task send right] -The port for the task from which to draw the child -task's port rights, resource limits, and address space. -

-

inherit_memory -
-[in scalar] -Address space inheritance indicator. If true, the child task -inherits the address space of the parent task. If false, the kernel assigns -the child task an empty address space. -

-

child_node -
-[in scalar] -The node index of the node on which to create the child. -

-

child_task -
-[out task send right] -The kernel-assigned port name for the new task. -
-

DESCRIPTION

-

-The norma_task_create function creates a new task from -parent_task on the specified node and returns the name of the -new task in child_task. The child -task acquires shared or copied parts of the parent's address space (see -vm_inherit). The child task initially contains no threads. -

-By way of comparison, tasks created by the standard task_create -primitive are created on the same node as the parent. -

-Other than being created on a different node, the new task has the same -properties as if created by task_create. -

RETURN VALUES

-

-Only generic errors apply. -

RELATED INFORMATION

-

-Functions: -task_create, -norma_task_clone. diff --git a/osfmk/man/norma_task_teleport.html b/osfmk/man/norma_task_teleport.html deleted file mode 100755 index 102530ef1..000000000 --- a/osfmk/man/norma_task_teleport.html +++ /dev/null @@ -1,71 +0,0 @@ -

norma_task_teleport

-
-

-Function - "Clone" a task on a specified node. -

SYNOPSIS

-
-kern_return_t   norma_task_teleport
-                (task_t                             parent_task,
-                 boolean_t                       inherit_memory,
-                 int                                 child_node,
-                 task_t                              child_task);
-
-

PARAMETERS

-
-

-

parent_task -
-[in task send right] The port for the task from which to draw the child -task's port rights, resource limits, and address space. -

-

inherit_memory -
-[in scalar] Address space inheritance indicator. If true, the child task in- -herits the address space of the parent task. If false, the kernel assigns -the child task an empty address space. -

-

child_node -
-[in scalar] The node index of the node on which to create the child. -

-

child_task -
-[out task send right] The kernel-assigned port name for the new task. -
-

DESCRIPTION

-

-The norma_task_clone function "clones" a new task from parent_task on -the specified node and returns the name of the new task in -child_task. The child task acquires shared parts of the parent's -address space (see vm_inherit) regardless of the inheritance set for -the parent's memory regions, although the inheritance for the -child's regions will be set to that of the parent's regions. The child -task initially contains no threads. The parent_task is then -terminated. -By way of comparison, tasks created by the standard task_create -primitive are created on the same node as the parent. -Other than being created on a different node, the new task has the -same properties as if created by task_create. -

NOTES

-

-This call differs from norma_task_clone in that the parent task is -terminated as part of the teleport call. -This call differs from norma_task_create in that the inheritance set -for the parent's memory regions is ignored; the child always shares -memory with the parent. -This call is intended to support process migration, where the -inheritance semantics of norma_task_create would break migrated -programs that depended upon sharing relationships remaining after -migration. -This call is not a true task migration call, in that it does not -migrate the port space, threads, and other non-address-space -attributes of the task. -

RETURN VALUES

-

-Only generic errors apply. -

RELATED INFORMATION

-

-Functions: -norma_task_clone, -task_create, -norma_task_create, diff --git a/osfmk/man/notify_server.html b/osfmk/man/notify_server.html old mode 100755 new mode 100644 diff --git a/osfmk/man/policy_fifo_info.html b/osfmk/man/policy_fifo_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/policy_rr_info.html b/osfmk/man/policy_rr_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/policy_timeshare_info.html b/osfmk/man/policy_timeshare_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/processor_assign.html b/osfmk/man/processor_assign.html old mode 100755 new mode 100644 diff --git a/osfmk/man/processor_basic_info.html b/osfmk/man/processor_basic_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/processor_control.html b/osfmk/man/processor_control.html old mode 100755 new mode 100644 diff --git a/osfmk/man/processor_exit.html b/osfmk/man/processor_exit.html old mode 100755 new mode 100644 diff --git a/osfmk/man/processor_get_assignment.html b/osfmk/man/processor_get_assignment.html old mode 100755 new mode 100644 diff --git a/osfmk/man/processor_info.html b/osfmk/man/processor_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/processor_set_basic_info.html b/osfmk/man/processor_set_basic_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/processor_set_create.html b/osfmk/man/processor_set_create.html old mode 100755 new mode 100644 diff --git a/osfmk/man/processor_set_default.html b/osfmk/man/processor_set_default.html old mode 100755 new mode 100644 diff --git a/osfmk/man/processor_set_destroy.html b/osfmk/man/processor_set_destroy.html old mode 100755 new mode 100644 diff --git a/osfmk/man/processor_set_info.html b/osfmk/man/processor_set_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/processor_set_load_info.html b/osfmk/man/processor_set_load_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/processor_set_max_priority.html b/osfmk/man/processor_set_max_priority.html old mode 100755 new mode 100644 diff --git a/osfmk/man/processor_set_statistics.html b/osfmk/man/processor_set_statistics.html old mode 100755 new mode 100644 diff --git a/osfmk/man/processor_set_tasks.html b/osfmk/man/processor_set_tasks.html old mode 100755 new mode 100644 diff --git a/osfmk/man/processor_set_threads.html b/osfmk/man/processor_set_threads.html old mode 100755 new mode 100644 diff --git a/osfmk/man/processor_start.html b/osfmk/man/processor_start.html old mode 100755 new mode 100644 diff --git a/osfmk/man/prof_server.html b/osfmk/man/prof_server.html old mode 100755 new mode 100644 diff --git a/osfmk/man/receive_samples.html b/osfmk/man/receive_samples.html old mode 100755 new mode 100644 diff --git a/osfmk/man/semaphore_create.html b/osfmk/man/semaphore_create.html old mode 100755 new mode 100644 diff --git a/osfmk/man/semaphore_destroy.html b/osfmk/man/semaphore_destroy.html old mode 100755 new mode 100644 diff --git a/osfmk/man/semaphore_signal.html b/osfmk/man/semaphore_signal.html old mode 100755 new mode 100644 diff --git a/osfmk/man/semaphore_signal_all.html b/osfmk/man/semaphore_signal_all.html old mode 100755 new mode 100644 diff --git a/osfmk/man/semaphore_wait.html b/osfmk/man/semaphore_wait.html old mode 100755 new mode 100644 diff --git a/osfmk/man/seqnos_notify_server.html b/osfmk/man/seqnos_notify_server.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_assign.html b/osfmk/man/task_assign.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_assign_default.html b/osfmk/man/task_assign_default.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_basic_info.html b/osfmk/man/task_basic_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_create.html b/osfmk/man/task_create.html old mode 100755 new mode 100644 index 7578a8e06..a0f5a64d1 --- a/osfmk/man/task_create.html +++ b/osfmk/man/task_create.html @@ -120,5 +120,4 @@ Functions: task_sample, task_set_exception_ports, mach_ports_register, -norma_task_create, host_security_set_task_token. diff --git a/osfmk/man/task_get_assignment.html b/osfmk/man/task_get_assignment.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_get_emulation_vector.html b/osfmk/man/task_get_emulation_vector.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_get_exception_ports.html b/osfmk/man/task_get_exception_ports.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_get_special_port.html b/osfmk/man/task_get_special_port.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_info.html b/osfmk/man/task_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_policy.html b/osfmk/man/task_policy.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_resume.html b/osfmk/man/task_resume.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_sample.html b/osfmk/man/task_sample.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_set_emulation.html b/osfmk/man/task_set_emulation.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_set_emulation_vector.html b/osfmk/man/task_set_emulation_vector.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_set_exception_ports.html b/osfmk/man/task_set_exception_ports.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_set_info.html b/osfmk/man/task_set_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_set_policy.html b/osfmk/man/task_set_policy.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_set_port_space.html b/osfmk/man/task_set_port_space.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_set_special_port.html b/osfmk/man/task_set_special_port.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_suspend.html b/osfmk/man/task_suspend.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_swap_exception_ports.html b/osfmk/man/task_swap_exception_ports.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_terminate.html b/osfmk/man/task_terminate.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_thread_times_info.html b/osfmk/man/task_thread_times_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/task_threads.html b/osfmk/man/task_threads.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_abort.html b/osfmk/man/thread_abort.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_abort_safely.html b/osfmk/man/thread_abort_safely.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_activation_create.html b/osfmk/man/thread_activation_create.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_assign.html b/osfmk/man/thread_assign.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_assign_default.html b/osfmk/man/thread_assign_default.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_basic_info.html b/osfmk/man/thread_basic_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_create.html b/osfmk/man/thread_create.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_create_running.html b/osfmk/man/thread_create_running.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_depress_abort.html b/osfmk/man/thread_depress_abort.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_get_assignment.html b/osfmk/man/thread_get_assignment.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_get_exception_ports.html b/osfmk/man/thread_get_exception_ports.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_get_special_port.html b/osfmk/man/thread_get_special_port.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_get_state.html b/osfmk/man/thread_get_state.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_info.html b/osfmk/man/thread_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_policy.html b/osfmk/man/thread_policy.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_resume.html b/osfmk/man/thread_resume.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_sample.html b/osfmk/man/thread_sample.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_set_exception_ports.html b/osfmk/man/thread_set_exception_ports.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_set_policy.html b/osfmk/man/thread_set_policy.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_set_special_port.html b/osfmk/man/thread_set_special_port.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_set_state.html b/osfmk/man/thread_set_state.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_suspend.html b/osfmk/man/thread_suspend.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_switch.html b/osfmk/man/thread_switch.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_terminate.html b/osfmk/man/thread_terminate.html old mode 100755 new mode 100644 diff --git a/osfmk/man/thread_wire.html b/osfmk/man/thread_wire.html old mode 100755 new mode 100644 diff --git a/osfmk/man/tvalspec.html b/osfmk/man/tvalspec.html old mode 100755 new mode 100644 diff --git a/osfmk/man/vm_allocate.html b/osfmk/man/vm_allocate.html old mode 100755 new mode 100644 diff --git a/osfmk/man/vm_behavior_set.html b/osfmk/man/vm_behavior_set.html old mode 100755 new mode 100644 diff --git a/osfmk/man/vm_copy.html b/osfmk/man/vm_copy.html old mode 100755 new mode 100644 diff --git a/osfmk/man/vm_deallocate.html b/osfmk/man/vm_deallocate.html old mode 100755 new mode 100644 diff --git a/osfmk/man/vm_inherit.html b/osfmk/man/vm_inherit.html old mode 100755 new mode 100644 index e748f054e..2108ed52f --- a/osfmk/man/vm_inherit.html +++ b/osfmk/man/vm_inherit.html @@ -92,4 +92,3 @@ Functions: task_create, vm_map, vm_region, -norma_task_create. diff --git a/osfmk/man/vm_machine_attribute.html b/osfmk/man/vm_machine_attribute.html old mode 100755 new mode 100644 diff --git a/osfmk/man/vm_map.html b/osfmk/man/vm_map.html old mode 100755 new mode 100644 diff --git a/osfmk/man/vm_msync.html b/osfmk/man/vm_msync.html old mode 100755 new mode 100644 diff --git a/osfmk/man/vm_protect.html b/osfmk/man/vm_protect.html old mode 100755 new mode 100644 diff --git a/osfmk/man/vm_read.html b/osfmk/man/vm_read.html old mode 100755 new mode 100644 diff --git a/osfmk/man/vm_region.html b/osfmk/man/vm_region.html old mode 100755 new mode 100644 diff --git a/osfmk/man/vm_region_basic_info.html b/osfmk/man/vm_region_basic_info.html old mode 100755 new mode 100644 diff --git a/osfmk/man/vm_remap.html b/osfmk/man/vm_remap.html old mode 100755 new mode 100644 diff --git a/osfmk/man/vm_statistics.html b/osfmk/man/vm_statistics.html old mode 100755 new mode 100644 diff --git a/osfmk/man/vm_wire.html b/osfmk/man/vm_wire.html old mode 100755 new mode 100644 diff --git a/osfmk/man/vm_write.html b/osfmk/man/vm_write.html old mode 100755 new mode 100644 diff --git a/osfmk/profiling/Makefile b/osfmk/profiling/Makefile index 3fda07c40..c273a8c41 100644 --- a/osfmk/profiling/Makefile +++ b/osfmk/profiling/Makefile @@ -10,9 +10,6 @@ include $(MakeInc_def) INSTINC_SUBDIRS = \ machine -INSTINC_SUBDIRS_I386 = \ - i386 - INSTINC_SUBDIRS_X86_64 = \ x86_64 @@ -20,20 +17,12 @@ INSTINC_SUBDIRS_X86_64 = \ EXPINC_SUBDIRS = \ machine -EXPINC_SUBDIRS_I386 = \ - i386 - EXPINC_SUBDIRS_X86_64 = \ x86_64 -MIG_DEFS = \ - -MIG_HDRS = \ - DATAFILES = \ - profile-internal.h profile-mk.h profile-kgmon.c \ - ${MIG_DEFS} + profile-internal.h profile-mk.h profile-kgmon.c MIGINCLUDES = \ diff --git a/osfmk/profiling/i386/profile-asm.s b/osfmk/profiling/i386/profile-asm.s deleted file mode 100644 index 2386d46bf..000000000 --- a/osfmk/profiling/i386/profile-asm.s +++ /dev/null @@ -1,1451 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:49 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:26:08 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.7.1 1997/09/22 17:41:24 barbou - * MP+RT: protect cpu_number() usage against preemption. - * [97/09/16 barbou] - * - * Revision 1.1.5.1 1995/01/06 19:53:37 devrcs - * mk6 CR668 - 1.3b26 merge - * new file for mk6 - * [1994/10/12 22:25:20 dwm] - * - * Revision 1.1.2.2 1994/05/16 19:19:17 meissner - * Add support for converting 64-bit integers to a decimal string. - * Use the correct address (selfpc) when creating the prof header for gprof. - * [1994/04/28 21:44:59 meissner] - * - * Revision 1.1.2.1 1994/04/08 17:51:42 meissner - * Make most stats 64 bits, except for things like memory allocation. - * [1994/04/02 14:58:21 meissner] - * - * Do not provide old mcount support under MK or server. - * Fixup stats size so it is the same as in profile-md.h. - * [1994/03/29 21:00:03 meissner] - * - * Use faster sequence for overflow addition. - * Keep {dummy,prof,gprof,old}_mcount counts in double precision. - * Add kernel NCPUS > 1 support. - * [1994/03/17 20:13:23 meissner] - * - * Add gprof/prof overflow support - * [1994/03/17 14:56:44 meissner] - * - * Add size of histogram counters & unused fields to profile_profil struct - * [1994/02/17 21:41:44 meissner] - * - * Add too_low/too_high to profile_stats. - * [1994/02/16 22:38:11 meissner] - * - * Bump # allocation contexts to 32 from 16. - * Store unique ptr address in gprof function header structure for _profile_reset. - * Add new fields from profile-{internal,md}.h. - * Align loop looking for an unlocked acontext. - * Count # times a locked context block was found. - * Expand copyright. - * [1994/02/07 12:40:56 meissner] - * - * Keep track of the number of times the kernel overflows the HISTCOUNTER counter. - * [1994/02/03 20:13:23 meissner] - * - * Add stats for {user,kernel,idle} mode in the kernel. - * [1994/02/03 15:17:22 meissner] - * - * No change. - * [1994/02/03 00:58:49 meissner] - * - * Combine _profile_{vars,stats,md}; Allow more than one _profile_vars. - * [1994/02/01 12:03:56 meissner] - * - * Move _mcount_ptr to be closer to other data declarations. - * Add text_len to profile_profil structure for mk. - * Split records_cnt into prof_cnt/gprof_cnt. - * Always update prof_cnt/gprof_cnt even if not DO_STATS. - * Add current/max cpu indicator to stats for kernel. - * [1994/01/28 23:33:20 meissner] - * - * Don't do 4+Lgotoff(lab), use separate labels. - * Change GPROF_HASH_SHIFT to 9 (from 8). - * [1994/01/26 22:00:59 meissner] - * - * Fixup NO_RECURSIVE_ALLOC to do byte loads, not word loads. - * [1994/01/26 20:30:57 meissner] - * - * Move callback pointers into separate allocation context. - * Add size fields for other structures to profile-vars. - * Allocate string table as one large allocation. - * Rewrite old mcount code once again. - * Use multiply to make hash value, not divide. - * Hash table is now a power of two. - * [1994/01/26 20:23:32 meissner] - * - * Cut hash table size back to 16189. - * Add size fields to all structures. - * Add major/minor version number to _profile_md. - * Move allocation context block pointers to _profile_vars. - * Move _gprof_dummy after _profile_md. - * New function header code now falls into hash an element - * to avoid having the hash code duplicated or use a macro. - * Fix bug in _gprof_mcount with ELF shared libraries. - * [1994/01/25 01:45:59 meissner] - * - * Move init functions to C code; rearrange profil varaibles. - * [1994/01/22 01:11:14 meissner] - * - * No change. - * [1994/01/20 20:56:43 meissner] - * - * Fixup copyright. - * [1994/01/18 23:07:39 meissner] - * - * Make flags byte-sized. - * Add have_bb flag. - * Add init_format flag. - * Always put word size multipler first in .space. - * [1994/01/18 21:57:14 meissner] - * - * Fix elfpic problems in last change. - * [1994/01/16 14:04:26 meissner] - * - * Rewrite gprof caching to be faster & not need a lock. - * Record prof information for gprof too. - * Bump reserved stats to 64. - * Bump up hash table size 30799. - * Conditionally use lock prefix. - * Change most #ifdef's to #if. - * DEBUG_PROFILE turns on stack frames now. - * Conditionally add externs to gprof to determine where time is spent. - * Prof_mcount uses xchgl to update function pointer. - * [1994/01/15 18:40:33 meissner] - * - * Fix a comment. - * Separate statistics from debugging (though debugging turns it on). - * Remove debug code that traces each gprof request. - * [1994/01/15 00:59:02 meissner] - * - * Move max hash bucket calculation into _gprof_write & put info in stats structure. - * [1994/01/04 16:15:14 meissner] - * - * Use _profile_printf to write diagnostics; add diag_stream to hold stream to write to. - * [1994/01/04 15:37:44 meissner] - * - * Add more allocation memory pools (gprof function hdrs in particular). - * For prof, gprof arc, and gprof function hdrs, allocate 16 pages at a time. - * Add major/minor version numbers to _profile_{vars,stats}. - * Add # profil buckets field to _profil_stats. - * [19 - * - * $EndLog$ - */ - -/* - * Common 386 profiling module that is shared between the kernel, mach - * servers, and the user space library. Each environment includes - * this file. - */ - - .file "profile-asm.s" - -#include - -/* - * By default, debugging turns on statistics and stack frames. - */ - -#if DEBUG_PROFILE -#ifndef DO_STATS -#define DO_STATS 1 -#endif - -#ifndef STACK_FRAMES -#define STACK_FRAMES 1 -#endif -#endif - -#ifndef OLD_MCOUNT -#define OLD_MCOUNT 0 /* do not compile old code for mcount */ -#endif - -#ifndef DO_STATS -#define DO_STATS 1 /* compile in statistics code */ -#endif - -#ifndef DO_LOCK -#define DO_LOCK 0 /* use lock; in front of increments */ -#endif - -#ifndef LOCK_STATS -#define LOCK_STATS DO_LOCK /* update stats with lock set */ -#endif - -#ifndef STACK_FRAMES -#define STACK_FRAMES 0 /* create stack frames for debugger */ -#endif - -#ifndef NO_RECURSIVE_ALLOC -#define NO_RECURSIVE_ALLOC 0 /* check for recursive allocs */ - /* (not thread safe!) */ -#endif - -#ifndef MARK_GPROF -#define MARK_GPROF 0 /* add externs for gprof profiling */ -#endif - -#ifndef OVERFLOW -#define OVERFLOW 1 /* add overflow checking support */ -#endif - -/* - * Turn on the use of the lock prefix if desired. - */ - -#ifndef LOCK -#if DO_LOCK -#define LOCK lock; -#else -#define LOCK -#endif -#endif - -#ifndef SLOCK -#if LOCK_STATS -#define SLOCK LOCK -#else -#define SLOCK -#endif -#endif - -/* - * Double or single precision incrementing - */ - -#if OVERFLOW -#define DINC(mem) LOCK addl $1,mem; LOCK adcl $0,4+mem -#define DINC2(mem,mem2) LOCK addl $1,mem; LOCK adcl $0,mem2 -#define SDINC(mem) SLOCK addl $1,mem; SLOCK adcl $0,4+mem -#define SDADD(val,mem) SLOCK addl val,mem; SLOCK adcl $0,4+mem -#define SDADDNEG(val,mem) SLOCK subl val,mem; SLOCK adcl $0,4+mem -#define SDSUB(val,mem) SLOCK subl val,mem; SLOCK sbbl $0,4+mem - -#else -#define DINC(mem) LOCK incl mem -#define DINC2(mem,mem2) LOCK incl mem -#define SDINC(mem) SLOCK incl mem -#define SDADD(val,mem) SLOCK addl val,mem -#define SDADDNEG(val,mem) SLOCK subl val,mem -#define SDSUB(val,mem) SLOCK subl val,mem -#endif - -/* - * Stack frame support so that debugger traceback works. - */ - -#if STACK_FRAMES -#define ENTER pushl %ebp; movl %esp,%ebp -#define LEAVE0 popl %ebp -#define Estack 4 -#else -#define ENTER -#define LEAVE0 -#define Estack 0 -#endif - -/* - * Gprof profiling. - */ - -#if MARK_GPROF -#define MARK(name) .globl EXT(name); ELF_FUNC(EXT(name)); ELF_SIZE(EXT(name),0); LEXT(name) -#else -#define MARK(name) -#endif - -/* - * Profiling allocation context block. Each time memory is needed, the - * allocator loops until it finds an unlocked context block, and allocates - * from that block. If no context blocks are available, a new memory - * pool is allocated, and added to the end of the chain. - */ - -LCL(A_next) = 0 /* next context block link (must be 0) */ -LCL(A_plist) = LCL(A_next)+4 /* head of page list for context block */ -LCL(A_lock) = LCL(A_plist)+4 /* lock word */ -LCL(A_size) = LCL(A_lock)+4 /* size of context block */ - -#define A_next LCL(A_next) -#define A_plist LCL(A_plist) -#define A_lock LCL(A_lock) -#define A_size LCL(A_size) - -/* - * Allocation contexts used. - */ - -LCL(C_prof) = 0 /* prof records */ -LCL(C_gprof) = 1 /* gprof arc records */ -LCL(C_gfunc) = 2 /* gprof function headers */ -LCL(C_misc) = 3 /* misc. allocations */ -LCL(C_profil) = 4 /* memory for profil */ -LCL(C_dci) = 5 /* memory for dci */ -LCL(C_bb) = 6 /* memory for basic blocks */ -LCL(C_callback) = 7 /* memory for callbacks */ -LCL(C_max) = 32 /* # allocation contexts */ - -#define C_prof LCL(C_prof) -#define C_gprof LCL(C_gprof) -#define C_gfunc LCL(C_gfunc) -#define C_max LCL(C_max) - -/* - * Linked list of memory allocations. - */ - -LCL(M_first) = 0 /* pointer to first byte available */ -LCL(M_ptr) = LCL(M_first)+4 /* pointer to next available byte */ -LCL(M_next) = LCL(M_ptr)+4 /* next page allocated */ -LCL(M_nfree) = LCL(M_next)+4 /* # bytes available */ -LCL(M_nalloc) = LCL(M_nfree)+4 /* # bytes allocated */ -LCL(M_num) = LCL(M_nalloc)+4 /* # allocations done on this page */ -LCL(M_size) = LCL(M_num)+4 /* size of page header */ - -#define M_first LCL(M_first) -#define M_ptr LCL(M_ptr) -#define M_next LCL(M_next) -#define M_nfree LCL(M_nfree) -#define M_nalloc LCL(M_nalloc) -#define M_num LCL(M_num) -#define M_size LCL(M_size) - -/* - * Prof data type. - */ - -LCL(P_addr) = 0 /* function address */ -LCL(P_count) = LCL(P_addr)+4 /* # times function called */ -LCL(P_overflow) = LCL(P_count)+4 /* # times count overflowed */ -LCL(P_size) = LCL(P_overflow)+4 /* size of prof data type */ - -#define P_addr LCL(P_addr) -#define P_count LCL(P_count) -#define P_overflow LCL(P_overflow) -#define P_size LCL(P_size) - -/* - * Gprof data type. - */ - -LCL(G_next) = 0 /* next hash link (must be 0) */ -LCL(G_frompc) = LCL(G_next)+4 /* caller's caller */ -LCL(G_selfpc) = LCL(G_frompc)+4 /* caller's address */ -LCL(G_count) = LCL(G_selfpc)+4 /* # times arc traversed */ -LCL(G_overflow) = LCL(G_count)+4 /* # times count overflowed */ -LCL(G_size) = LCL(G_overflow)+4 /* size of gprof data type */ - -#define G_next LCL(G_next) -#define G_frompc LCL(G_frompc) -#define G_selfpc LCL(G_selfpc) -#define G_count LCL(G_count) -#define G_overflow LCL(G_overflow) -#define G_size LCL(G_size) - -/* - * Gprof header. - * - * At least one header is allocated for each unique function that is profiled. - * In order to save time calculating the hash value, the last H_maxcache - * distinct arcs are cached within this structure. Also, to avoid loading - * the GOT when searching the hash table, we copy the hash pointer to this - * structure, so that we only load the GOT when we need to allocate an arc. - */ - -LCL(H_maxcache) = 3 /* # of cache table entries */ -LCL(H_csize) = 4*LCL(H_maxcache) /* size of each cache array */ - -LCL(H_hash_ptr) = 0 /* hash table to use */ -LCL(H_unique_ptr) = LCL(H_hash_ptr)+4 /* function unique pointer */ -LCL(H_prof) = LCL(H_unique_ptr)+4 /* prof statistics */ -LCL(H_cache_ptr) = LCL(H_prof)+P_size /* cache table of element pointers */ -LCL(H_size) = LCL(H_cache_ptr)+LCL(H_csize) /* size of gprof header type */ - -#define H_maxcache LCL(H_maxcache) -#define H_csize LCL(H_csize) -#define H_hash_ptr LCL(H_hash_ptr) -#define H_unique_ptr LCL(H_unique_ptr) -#define H_prof LCL(H_prof) -#define H_cache_ptr LCL(H_cache_ptr) -#define H_size LCL(H_size) - -/* - * Number of digits needed to write a 64 bit number including trailing null. - * (rounded up to be divisable by 4). - */ - -#define N_digit 24 - - - .data - -/* - * Default gprof hash table size, which must be a power of two. - * The shift specifies how many low order bits to eliminate when - * calculating the hash value. - */ - -#ifndef GPROF_HASH_SIZE -#define GPROF_HASH_SIZE 16384 -#endif - -#ifndef GPROF_HASH_SHIFT -#define GPROF_HASH_SHIFT 9 -#endif - -#define GPROF_HASH_MASK (GPROF_HASH_SIZE-1) - -DATA(_profile_hash_size) - .long GPROF_HASH_SIZE -ENDDATA(_profile_hash_size) - - - -/* - * Pointer that the compiler uses to call to the appropriate mcount function. - */ - -DATA(_mcount_ptr) - .long EXT(_dummy_mcount) -ENDDATA(_mcount_ptr) - -/* - * Global profile variables. The structure that accesses this in C is declared - * in profile-internal.h. All items in .data that follow this will be used as - * one giant record, and each unique machine, thread, kgmon output or what have - * you will create a separate instance. Typically there is only one instance - * which will be the memory laid out below. - */ - -LCL(var_major_version) = 0 /* major version number */ -LCL(var_minor_version) = LCL(var_major_version)+4 /* minor version number */ -LCL(vars_size) = LCL(var_minor_version)+4 /* size of _profile_vars structure */ -LCL(plist_size) = LCL(vars_size)+4 /* size of page_list structure */ -LCL(acontext_size) = LCL(plist_size)+4 /* size of allocation contexts */ -LCL(callback_size) = LCL(acontext_size)+4 /* size of callback structure */ -LCL(type) = LCL(callback_size)+4 /* profile type (gprof, prof) */ -LCL(error_msg) = LCL(type)+4 /* error message for perror */ -LCL(filename) = LCL(error_msg)+4 /* filename to write to */ -LCL(str_ptr) = LCL(filename)+4 /* string table pointer */ -LCL(stream) = LCL(str_ptr)+4 /* stdio stream to write to */ -LCL(diag_stream) = LCL(stream)+4 /* stdio stream to write diagnostics to */ -LCL(fwrite_func) = LCL(diag_stream)+4 /* function like fwrite to output bytes */ -LCL(page_size) = LCL(fwrite_func)+4 /* page size in bytes */ -LCL(str_bytes) = LCL(page_size)+4 /* # bytes in string table */ -LCL(str_total) = LCL(str_bytes)+4 /* # total bytes allocated for string table */ -LCL(clock_ticks) = LCL(str_total)+4 /* # clock ticks per second */ - - /* profil variables */ -LCL(profil_start) = LCL(clock_ticks)+4 /* start of profil variables */ -LCL(lowpc) = LCL(clock_ticks)+4 /* lowest address */ -LCL(highpc) = LCL(lowpc)+4 /* highest address */ -LCL(text_len) = LCL(highpc)+4 /* highpc-lowpc */ -LCL(profil_len) = LCL(text_len)+4 /* size of profil buffer */ -LCL(counter_size) = LCL(profil_len)+4 /* size of indivual counter */ -LCL(scale) = LCL(counter_size)+4 /* scale factor */ -LCL(profil_unused) = LCL(scale)+4 /* unused fields */ -LCL(profil_end) = LCL(profil_unused)+4*8 /* end of profil_info structure */ -LCL(profil_buf) = LCL(profil_end) /* buffer for profil */ - - /* Output selection func ptrs */ -LCL(output_init) = LCL(profil_buf)+4 /* Initialization */ -LCL(output) = LCL(output_init)+4 /* Write out profiling info */ -LCL(output_ptr) = LCL(output)+4 /* Output specific data ptr */ - - /* Memory allocation support */ -LCL(acontext) = LCL(output_ptr)+4 /* pointers to allocation context blocks */ - -LCL(bogus_func) = LCL(acontext)+4*C_max /* function to use if gprof arc is bad */ -LCL(vars_unused) = LCL(bogus_func)+4 /* future growth */ - - /* flags */ -LCL(init) = LCL(vars_unused)+4*63 /* whether initializations were done */ -LCL(active) = LCL(init)+1 /* whether profiling is active */ -LCL(do_profile) = LCL(active)+1 /* whether to do profiling */ -LCL(use_dci) = LCL(do_profile)+1 /* whether to use DCI */ -LCL(use_profil) = LCL(use_dci)+1 /* whether to use profil */ -LCL(recursive_alloc) = LCL(use_profil)+1 /* alloc called recursively */ -LCL(output_uarea) = LCL(recursive_alloc)+1 /* output uarea */ -LCL(output_stats) = LCL(output_uarea)+1 /* output stats info */ -LCL(output_clock) = LCL(output_stats)+1 /* output the clock ticks */ -LCL(multiple_sections) = LCL(output_clock)+1 /* multiple sections are ok */ -LCL(have_bb) = LCL(multiple_sections)+1 /* whether we have basic block data */ -LCL(init_format) = LCL(have_bb)+1 /* The output format has been chosen */ -LCL(debug) = LCL(init_format)+1 /* Whether or not we are debugging */ -LCL(check_funcs) = LCL(debug)+1 /* Whether to check functions for validity */ -LCL(flag_unused) = LCL(check_funcs)+1 /* unused flags */ -LCL(end_of_vars) = LCL(flag_unused)+62 /* size of machine independent vars */ - -/* - * Data that contains profile statistics that can be dumped out - * into the {,g}mon.out file. This is defined in profile-md.h. - */ - -LCL(stats_start) = LCL(end_of_vars) /* start of stats substructure */ -LCL(stats_major_version)= LCL(stats_start) /* major version number */ -LCL(stats_minor_version)= LCL(stats_major_version)+4 /* minor version number */ -LCL(stats_size) = LCL(stats_minor_version)+4 /* size of _profile_stats structure */ -LCL(profil_buckets) = LCL(stats_size)+4 /* # profil buckets */ -LCL(my_cpu) = LCL(profil_buckets)+4 /* identify which cpu/thread this is */ -LCL(max_cpu) = LCL(my_cpu)+4 /* identify which cpu/thread this is */ -LCL(prof_records) = LCL(max_cpu)+4 /* # of profiled functions */ -LCL(gprof_records) = LCL(prof_records)+4 /* # of gprof arcs created */ -LCL(hash_buckets) = LCL(gprof_records)+4 /* max gprof hash buckets on a chain */ -LCL(bogus_count) = LCL(hash_buckets)+4 /* # bogus functions found in gprof */ - -LCL(cnt) = LCL(bogus_count)+4 /* # of _{prof,gprof}_mcount calls */ -LCL(dummy) = LCL(cnt)+8 /* # of _dummy_mcount calls */ -LCL(old_mcount) = LCL(dummy)+8 /* # of old mcount calls */ -LCL(hash_search) = LCL(old_mcount)+8 /* # gprof hash buckets searched */ -LCL(hash_num) = LCL(hash_search)+8 /* # times hash table searched */ -LCL(user_ticks) = LCL(hash_num)+8 /* # ticks within user space */ -LCL(kernel_ticks) = LCL(user_ticks)+8 /* # ticks within kernel space */ -LCL(idle_ticks) = LCL(kernel_ticks)+8 /* # ticks cpu was idle */ -LCL(overflow_ticks) = LCL(idle_ticks)+8 /* # ticks where histcounter overflowed */ -LCL(acontext_locked) = LCL(overflow_ticks)+8 /* # times an acontext was locked */ -LCL(too_low) = LCL(acontext_locked)+8 /* # times histogram tick too low */ -LCL(too_high) = LCL(too_low)+8 /* # times histogram tick too low */ -LCL(prof_overflow) = LCL(too_high)+8 /* # times the prof count field overflowed */ -LCL(gprof_overflow) = LCL(prof_overflow)+8 /* # times the gprof count field overflowed */ -LCL(num_alloc) = LCL(gprof_overflow)+8 /* # allocations in each context */ -LCL(bytes_alloc) = LCL(num_alloc)+4*C_max /* bytes allocated in each context */ -LCL(num_context) = LCL(bytes_alloc)+4*C_max /* # allocation context blocks */ -LCL(wasted) = LCL(num_context)+4*C_max /* # bytes wasted */ -LCL(overhead) = LCL(wasted)+4*C_max /* # bytes of overhead */ -LCL(buckets) = LCL(overhead)+4*C_max /* # hash indexes that have n buckets */ -LCL(cache_hits1) = LCL(buckets)+4*10 /* # gprof cache hits in bucket #1 */ -LCL(cache_hits2) = LCL(cache_hits1)+8 /* # gprof cache hits in bucket #2 */ -LCL(cache_hits3) = LCL(cache_hits2)+8 /* # gprof cache hits in bucket #3 */ -LCL(stats_unused) = LCL(cache_hits3)+8 /* reserved for future use */ -LCL(stats_end) = LCL(stats_unused)+8*64 /* end of stats structure */ - -/* - * Machine dependent variables that no C file should access (except for - * profile-md.c). - */ - -LCL(md_start) = LCL(stats_end) /* start of md structure */ -LCL(md_major_version) = LCL(md_start) /* major version number */ -LCL(md_minor_version) = LCL(md_major_version)+4 /* minor version number */ -LCL(md_size) = LCL(md_minor_version)+4 /* size of _profile_stats structure */ -LCL(hash_ptr) = LCL(md_size)+4 /* gprof hash pointer */ -LCL(hash_size) = LCL(hash_ptr)+4 /* gprof hash size */ -LCL(num_cache) = LCL(hash_size)+4 /* # of cache entries */ -LCL(save_mcount_ptr) = LCL(num_cache)+4 /* save for mcount_ptr when suspending profiling */ -LCL(mcount_ptr_ptr) = LCL(save_mcount_ptr)+4 /* pointer to _mcount_ptr */ -LCL(dummy_ptr) = LCL(mcount_ptr_ptr)+4 /* pointer to gprof_dummy */ -LCL(alloc_pages) = LCL(dummy_ptr)+4 /* allocate more memory */ -LCL(num_buffer) = LCL(alloc_pages)+4 /* buffer to convert 64 bit ints in */ -LCL(md_unused) = LCL(num_buffer)+N_digit /* unused fields */ -LCL(md_end) = LCL(md_unused)+4*58 /* end of md structure */ -LCL(total_size) = LCL(md_end) /* size of entire structure */ - -/* - * Size of the entire _profile_vars structure. - */ - -DATA(_profile_size) - .long LCL(total_size) -ENDDATA(_profile_size) - -/* - * Size of the statistics substructure. - */ - -DATA(_profile_stats_size) - .long LCL(stats_end)-LCL(stats_start) -ENDDATA(_profile_stats_size) - -/* - * Size of the profil info substructure. - */ - -DATA(_profile_profil_size) - .long LCL(profil_end)-LCL(profil_start) -ENDDATA(_profile_profil_size) - -/* - * Size of the machine dependent substructure. - */ - -DATA(_profile_md_size) - .long LCL(md_end)-LCL(md_start) -ENDDATA(_profile_profil_size) - -/* - * Whether statistics are supported. - */ - -DATA(_profile_do_stats) - .long DO_STATS -ENDDATA(_profile_do_stats) - - .text - -/* - * Map LCL(xxx) -> into simpler names - */ - -#define V_acontext LCL(acontext) -#define V_acontext_locked LCL(acontext_locked) -#define V_alloc_pages LCL(alloc_pages) -#define V_bogus_func LCL(bogus_func) -#define V_bytes_alloc LCL(bytes_alloc) -#define V_cache_hits1 LCL(cache_hits1) -#define V_cache_hits2 LCL(cache_hits2) -#define V_cache_hits3 LCL(cache_hits3) -#define V_cnt LCL(cnt) -#define V_cnt_overflow LCL(cnt_overflow) -#define V_check_funcs LCL(check_funcs) -#define V_dummy LCL(dummy) -#define V_dummy_overflow LCL(dummy_overflow) -#define V_dummy_ptr LCL(dummy_ptr) -#define V_gprof_records LCL(gprof_records) -#define V_hash_num LCL(hash_num) -#define V_hash_ptr LCL(hash_ptr) -#define V_hash_search LCL(hash_search) -#define V_mcount_ptr_ptr LCL(mcount_ptr_ptr) -#define V_num_alloc LCL(num_alloc) -#define V_num_buffer LCL(num_buffer) -#define V_num_context LCL(num_context) -#define V_old_mcount LCL(old_mcount) -#define V_old_mcount_overflow LCL(old_mcount_overflow) -#define V_overhead LCL(overhead) -#define V_page_size LCL(page_size) -#define V_prof_records LCL(prof_records) -#define V_recursive_alloc LCL(recursive_alloc) -#define V_wasted LCL(wasted) - -/* - * Loadup %ebx with the address of _profile_vars. On a multiprocessor, this - * will loads up the appropriate machine's _profile_vars structure. - * For ELF shared libraries, rely on the fact that we won't need a GOT, - * except to load this pointer. - */ - -#if defined (MACH_KERNEL) -#define ASSEMBLER -#include - -#if SQT -#include -#endif - -#ifndef CPU_NUMBER -#error "Cannot determine how to get CPU number" -#endif - -#define Vload CPU_NUMBER(%ebx); movl EXT(_profile_vars_cpus)(,%ebx,4),%ebx - -#else /* not kernel */ -#define Vload Gload; Egaddr(%ebx,_profile_vars) -#endif - - -/* - * Allocate some memory for profiling. This memory is guaranteed to - * be zero. - * %eax contains the memory size requested and will contain ptr on exit. - * %ebx contains the address of the appropriate profile_vars structure. - * %ecx is the number of the memory pool to allocate from (trashed on exit). - * %edx is trashed. - * %esi is preserved. - * %edi is preserved. - * %ebp is preserved. - */ - -Entry(_profile_alloc_asm) - ENTER - pushl %esi - pushl %edi - - movl %ecx,%edi /* move context number to saved reg */ - -#if NO_RECURSIVE_ALLOC - movb $-1,%cl - xchgb %cl,V_recursive_alloc(%ebx) - cmpb $0,%cl - je LCL(no_recurse) - - int $3 - - .align ALIGN -LCL(no_recurse): -#endif - - leal V_acontext(%ebx,%edi,4),%ecx - - /* Loop looking for a free allocation context. */ - /* %eax = size, %ebx = vars addr, %ecx = ptr to allocation context to try */ - /* %edi = context number */ - - .align ALIGN -LCL(alloc_loop): - movl %ecx,%esi /* save ptr in case no more contexts */ - movl A_next(%ecx),%ecx /* next context block */ - cmpl $0,%ecx - je LCL(alloc_context) /* need to allocate a new context block */ - - movl $-1,%edx - xchgl %edx,A_lock(%ecx) /* %edx == 0 if context available */ - -#if DO_STATS - SDADDNEG(%edx,V_acontext_locked(%ebx)) /* increment counter if lock was held */ -#endif - - cmpl $0,%edx - jne LCL(alloc_loop) /* go back if this context block is not available */ - - /* Allocation context found (%ecx), now allocate. */ - movl A_plist(%ecx),%edx /* pointer to current block */ - cmpl $0,%edx /* first allocation? */ - je LCL(alloc_new) - - cmpl %eax,M_nfree(%edx) /* see if we have enough space */ - jl LCL(alloc_new) /* jump if not enough space */ - - /* Allocate from local block (and common exit) */ - /* %eax = bytes to allocate, %ebx = GOT, %ecx = context, %edx = memory block */ - /* %edi = context number */ - - .align ALIGN -LCL(alloc_ret): - -#if DO_STATS - SLOCK incl V_num_alloc(%ebx,%edi,4) /* update global counters */ - SLOCK addl %eax,V_bytes_alloc(%ebx,%edi,4) - SLOCK subl %eax,V_wasted(%ebx,%edi,4) -#endif - - movl M_ptr(%edx),%esi /* pointer return value */ - subl %eax,M_nfree(%edx) /* decrement bytes remaining */ - addl %eax,M_nalloc(%edx) /* increment bytes allocated */ - incl M_num(%edx) /* increment # allocations */ - addl %eax,M_ptr(%edx) /* advance pointer */ - movl $0,A_lock(%ecx) /* unlock context block */ - movl %esi,%eax /* return pointer */ - -#if NO_RECURSIVE_ALLOC - movb $0,V_recursive_alloc(%ebx) -#endif - - popl %edi - popl %esi - LEAVE0 - ret /* return to the caller */ - - /* Allocate space in whole number of pages */ - /* %eax = bytes to allocate, %ebx = vars address, %ecx = context */ - /* %edi = context number */ - - .align ALIGN -LCL(alloc_new): - pushl %eax /* save regs */ - pushl %ecx - movl V_page_size(%ebx),%edx - addl $(M_size-1),%eax /* add in overhead size & subtract 1 */ - decl %edx /* page_size - 1 */ - addl %edx,%eax /* round up to whole number of pages */ - notl %edx - andl %edx,%eax - leal -M_size(%eax),%esi /* save allocation size */ - pushl %eax /* argument to _profile_alloc_pages */ - call *V_alloc_pages(%ebx) /* allocate some memory */ - addl $4,%esp /* pop off argument */ - -#if DO_STATS - SLOCK addl %esi,V_wasted(%ebx,%edi,4) /* udpate global counters */ - SLOCK addl $(M_size),V_overhead(%ebx,%edi,4) -#endif - - popl %ecx /* context block */ - movl %eax,%edx /* memory block pointer */ - movl %esi,M_nfree(%edx) /* # free bytes */ - addl $(M_size),%eax /* bump past overhead */ - movl A_plist(%ecx),%esi /* previous memory block or 0 */ - movl %eax,M_first(%edx) /* first space available */ - movl %eax,M_ptr(%edx) /* current address available */ - movl %esi,M_next(%edx) /* next memory block allocated */ - movl %edx,A_plist(%ecx) /* update current page list */ - popl %eax /* user size request */ - jmp LCL(alloc_ret) /* goto common return code */ - - /* Allocate a context header in addition to memory block header + data */ - /* %eax = bytes to allocate, %ebx = GOT, %esi = ptr to store context ptr */ - /* %edi = context number */ - - .align ALIGN -LCL(alloc_context): - pushl %eax /* save regs */ - pushl %esi - movl V_page_size(%ebx),%edx - addl $(A_size+M_size-1),%eax /* add in overhead size & subtract 1 */ - decl %edx /* page_size - 1 */ - addl %edx,%eax /* round up to whole number of pages */ - notl %edx - andl %edx,%eax - leal -A_size-M_size(%eax),%esi /* save allocation size */ - pushl %eax /* argument to _profile_alloc_pages */ - call *V_alloc_pages(%ebx) /* allocate some memory */ - addl $4,%esp /* pop off argument */ - -#if DO_STATS - SLOCK incl V_num_context(%ebx,%edi,4) /* bump # context blocks */ - SLOCK addl %esi,V_wasted(%ebx,%edi,4) /* update global counters */ - SLOCK addl $(A_size+M_size),V_overhead(%ebx,%edi,4) -#endif - - movl %eax,%ecx /* context pointer */ - leal A_size(%eax),%edx /* memory block pointer */ - movl %esi,M_nfree(%edx) /* # free bytes */ - addl $(A_size+M_size),%eax /* bump past overhead */ - movl %eax,M_first(%edx) /* first space available */ - movl %eax,M_ptr(%edx) /* current address available */ - movl $0,M_next(%edx) /* next memory block allocated */ - movl %edx,A_plist(%ecx) /* head of memory block list */ - movl $1,A_lock(%ecx) /* set lock */ - popl %esi /* ptr to store context block link */ - movl %ecx,%eax /* context pointer temp */ - xchgl %eax,A_next(%esi) /* link into chain */ - movl %eax,A_next(%ecx) /* add links in case of threading */ - popl %eax /* user size request */ - jmp LCL(alloc_ret) /* goto common return code */ - -END(_profile_alloc_asm) - -/* - * C callable version of the profile memory allocator. - * extern void *_profile_alloc(struct profile_vars *, size_t, acontext_type_t); -*/ - -Entry(_profile_alloc) - ENTER - pushl %ebx - movl 12+Estack(%esp),%eax /* memory size */ - movl 8+Estack(%esp),%ebx /* provile_vars address */ - addl $3,%eax /* round up to word boundary */ - movl 16+Estack(%esp),%ecx /* which memory pool to allocate from */ - andl $0xfffffffc,%eax - call EXT(_profile_alloc_asm) - popl %ebx - LEAVE0 - ret -END(_profile_alloc) - - -/* - * Dummy mcount routine that just returns. - * - * +-------------------------------+ - * | | - * | | - * | caller's caller stack, | - * | saved registers, params. | - * | | - * | | - * +-------------------------------+ - * | caller's caller return addr. | - * +-------------------------------+ - * esp --> | caller's return address | - * +-------------------------------+ - * - * edx --> function unqiue LCL - */ - -Entry(_dummy_mcount) - ENTER - -#if DO_STATS - pushl %ebx - MP_DISABLE_PREEMPTION(%ebx) - Vload - SDINC(V_dummy(%ebx)) - MP_ENABLE_PREEMPTION(%ebx) - popl %ebx -#endif - - LEAVE0 - ret -END(_dummy_mcount) - - -/* - * Entry point for System V based profiling, count how many times each function - * is called. The function label is passed in %edx, and the top two words on - * the stack are the caller's address, and the caller's return address. - * - * +-------------------------------+ - * | | - * | | - * | caller's caller stack, | - * | saved registers, params. | - * | | - * | | - * +-------------------------------+ - * | caller's caller return addr. | - * +-------------------------------+ - * esp --> | caller's return address | - * +-------------------------------+ - * - * edx --> function unique label - * - * We don't worry about the possibility about two threads calling - * the same function for the first time simulataneously. If that - * happens, two records will be created, and one of the records - * address will be stored in in the function unique label (which - * is aligned by the compiler, so we don't have to watch out for - * crossing page/cache boundaries). - */ - -Entry(_prof_mcount) - ENTER - -#if DO_STATS - pushl %ebx - MP_DISABLE_PREEMPTION(%ebx) - Vload - SDINC(V_cnt(%ebx)) -#endif - - movl (%edx),%eax /* initialized? */ - cmpl $0,%eax - je LCL(pnew) - - DINC2(P_count(%eax),P_overflow(%eax)) /* bump function count (double precision) */ - -#if DO_STATS - MP_ENABLE_PREEMPTION(%ebx) - popl %ebx -#endif - - LEAVE0 - ret - - .align ALIGN -LCL(pnew): - -#if !DO_STATS - pushl %ebx - MP_DISABLE_PREEMPTION(%ebx) - Vload -#endif - - SLOCK incl V_prof_records(%ebx) - pushl %edx - movl $(P_size),%eax /* allocation size */ - movl $(C_prof),%ecx /* allocation pool */ - call EXT(_profile_alloc_asm) /* allocate a new record */ - popl %edx - - movl Estack+4(%esp),%ecx /* caller's address */ - movl %ecx,P_addr(%eax) - movl $1,P_count(%eax) /* call count */ - xchgl %eax,(%edx) /* update function header */ - MP_ENABLE_PREEMPTION(%ebx) - popl %ebx - LEAVE0 - ret - -END(_prof_mcount) - - -/* - * Entry point for BSD based graph profiling, count how many times each unique - * call graph (caller + callee) is called. The function label is passed in - * %edx, and the top two words on the stack are the caller's address, and the - * caller's return address. - * - * +-------------------------------+ - * | | - * | | - * | caller's caller stack, | - * | saved registers, params. | - * | | - * | | - * +-------------------------------+ - * | caller's caller return addr. | - * +-------------------------------+ - * esp --> | caller's return address | - * +-------------------------------+ - * - * edx --> function unqiue label - * - * We don't worry about the possibility about two threads calling the same - * function simulataneously. If that happens, two records will be created, and - * one of the records address will be stored in in the function unique label - * (which is aligned by the compiler). - * - * By design, the gprof header is not locked. Each of the cache pointers is - * always a valid pointer (possibily to a null record), and if another thread - * comes in and modifies the pointer, it does so automatically with a simple store. - * Since all arcs are in the hash table, the caches are just to avoid doing - * a multiplication in the common case, and if they don't match, the arcs will - * still be found. - */ - -Entry(_gprof_mcount) - - ENTER - movl Estack+4(%esp),%ecx /* caller's caller address */ - -#if DO_STATS - pushl %ebx - MP_DISABLE_PREEMPTION(%ebx) - Vload - SDINC(V_cnt(%ebx)) /* bump profile call counter (double int) */ -#endif - - movl (%edx),%eax /* Gprof header allocated? */ - cmpl $0,%eax - je LCL(gnew) /* skip if first call */ - - DINC2(H_prof+P_count(%eax),H_prof+P_overflow(%eax)) /* bump function count */ - - /* See if this call arc is the same as the last time */ -MARK(_gprof_mcount_cache1) - movl H_cache_ptr(%eax),%edx /* last arc searched */ - cmpl %ecx,G_frompc(%edx) /* skip if not equal */ - jne LCL(gcache2) - - /* Same as last time, increment and return */ - - DINC2(G_count(%edx),G_overflow(%edx)) /* bump arc count */ - -#if DO_STATS - SDINC(V_cache_hits1(%ebx)) /* update counter */ - MP_ENABLE_PREEMPTION(%ebx) - popl %ebx -#endif - - LEAVE0 - ret - - /* Search second cache entry */ - /* %eax = gprof func header, %ebx = vars address if DO_STATS, %ecx = caller's caller */ - /* %edx = first arc searched */ - /* %ebx if DO_STATS pushed on stack */ - - .align ALIGN -MARK(_gprof_mcount_cache2) -LCL(gcache2): - pushl %esi /* get a saved register */ - movl H_cache_ptr+4(%eax),%esi /* 2nd arc to be searched */ - cmpl %ecx,G_frompc(%esi) /* skip if not equal */ - jne LCL(gcache3) - - /* Element found, increment, reset last arc searched and return */ - - DINC2(G_count(%esi),G_overflow(%esi)) /* bump arc count */ - - movl %esi,H_cache_ptr+0(%eax) /* swap 1st and 2nd cached arcs */ - popl %esi - movl %edx,H_cache_ptr+4(%eax) - -#if DO_STATS - SDINC(V_cache_hits2(%ebx)) /* update counter */ - MP_ENABLE_PREEMPTION(%ebx) - popl %ebx -#endif - - LEAVE0 - ret - - /* Search third cache entry */ - /* %eax = gprof func header, %ebx = vars address if DO_STATS, %ecx = caller's caller */ - /* %edx = first arc searched, %esi = second arc searched */ - /* %esi, %ebx if DO_STATS pushed on stack */ - - .align ALIGN -MARK(_gprof_mcount_cache3) -LCL(gcache3): - pushl %edi - movl H_cache_ptr+8(%eax),%edi /* 3rd arc to be searched */ - cmpl %ecx,G_frompc(%edi) /* skip if not equal */ - jne LCL(gnocache) - - /* Element found, increment, reset last arc searched and return */ - - DINC2(G_count(%edi),G_overflow(%edi)) /* bump arc count */ - - movl %edi,H_cache_ptr+0(%eax) /* make this 1st cached arc */ - movl %esi,H_cache_ptr+8(%eax) - movl %edx,H_cache_ptr+4(%eax) - popl %edi - popl %esi - -#if DO_STATS - SDINC(V_cache_hits3(%ebx)) /* update counter */ - MP_ENABLE_PREEMPTION(%ebx) - popl %ebx -#endif - - LEAVE0 - ret - - /* No function context, allocate a new context */ - /* %ebx is the variables address if DO_STATS */ - /* %ecx is the caller's caller's address */ - /* %edx is the unique function pointer */ - /* %ebx if DO_STATS pushed on stack */ - - .align ALIGN -MARK(_gprof_mcount_new) -LCL(gnew): - pushl %esi - pushl %edi - -#if !DO_STATS - pushl %ebx /* Address of vars needed for alloc */ - MP_DISABLE_PREEMPTION(%ebx) - Vload /* stats already loaded address */ -#endif - - SLOCK incl V_prof_records(%ebx) - movl %edx,%esi /* save unique function ptr */ - movl %ecx,%edi /* and caller's caller address */ - movl $(H_size),%eax /* memory block size */ - movl $(C_gfunc),%ecx /* gprof function header memory pool */ - call EXT(_profile_alloc_asm) - - movl V_hash_ptr(%ebx),%ecx /* copy hash_ptr to func header */ - movl V_dummy_ptr(%ebx),%edx /* dummy cache entry */ - movl %ecx,H_hash_ptr(%eax) - movl %edx,H_cache_ptr+0(%eax) /* store dummy cache ptrs */ - movl %edx,H_cache_ptr+4(%eax) - movl %edx,H_cache_ptr+8(%eax) - movl %esi,H_unique_ptr(%eax) /* remember function unique ptr */ - movl Estack+12(%esp),%ecx /* caller's address */ - movl $1,H_prof+P_count(%eax) /* function called once so far */ - movl %ecx,H_prof+P_addr(%eax) /* set up prof information */ - movl %eax,(%esi) /* update context block address */ - movl %edi,%ecx /* caller's caller address */ - movl %edx,%esi /* 2nd cached arc */ - -#if !DO_STATS - popl %ebx -#endif - - /* Fall through to add element to the hash table. This may involve */ - /* searching a few hash table elements that don't need to be searched */ - /* since we have a new element, but it allows the hash table function */ - /* to be specified in only one place */ - - /* Didn't find entry in cache, search the global hash table */ - /* %eax = gprof func header, %ebx = vars address if DO_STATS */ - /* %ecx = caller's caller */ - /* %edx, %esi = cached arcs that were searched */ - /* %edi, %esi, %ebx if DO_STATS pushed on stack */ - - .align ALIGN -MARK(_gprof_mcount_hash) -LCL(gnocache): - - pushl %esi /* save 2nd arc searched */ - pushl %edx /* save 1st arc searched */ - movl %eax,%esi /* save gprof func header */ - -#if DO_STATS - SDINC(V_hash_num(%ebx)) - movl Estack+20(%esp),%edi /* caller's address */ -#else - movl Estack+16(%esp),%edi /* caller's address */ -#endif - movl %ecx,%eax /* caller's caller address */ - imull %edi,%eax /* multiply to get hash */ - movl H_hash_ptr(%esi),%edx /* hash pointer */ - shrl $(GPROF_HASH_SHIFT),%eax /* eliminate low order bits */ - andl $(GPROF_HASH_MASK),%eax /* mask to get hash value */ - leal 0(%edx,%eax,4),%eax /* pointer to hash bucket */ - movl %eax,%edx /* save hash bucket address */ - - /* %eax = old arc, %ebx = vars address if DO_STATS, %ecx = caller's caller */ - /* %edx = hash bucket address, %esi = gfunc ptr, %edi = caller's addr */ - /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */ - - .align ALIGN -LCL(ghash): - movl G_next(%eax),%eax /* get next hash element */ - cmpl $0,%eax /* end of line? */ - je LCL(ghashnew) /* skip if allocate new hash */ - -#if DO_STATS - SDINC(V_hash_search(%ebx)) -#endif - - cmpl G_selfpc(%eax),%edi /* loop back if not one we want */ - jne LCL(ghash) - - cmpl G_frompc(%eax),%ecx /* loop back if not one we want */ - jne LCL(ghash) - - /* Found an entry, increment count, set up for caching, and return */ - /* %eax = arc, %ebx = vars address if DO_STATS, %esi = func header */ - /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */ - - DINC2(G_count(%eax),G_overflow(%eax)) /* bump arc count */ - - popl %ecx /* previous 1st arc searched */ - movl %eax,H_cache_ptr+0(%esi) /* this element is now 1st arc */ - popl %edi /* previous 2nd arc searched */ - movl %ecx,H_cache_ptr+4(%esi) /* new 2nd arc to be searched */ - movl %edi,H_cache_ptr+8(%esi) /* new 3rd arc to be searched */ - popl %edi - popl %esi - -#if DO_STATS - MP_ENABLE_PREEMPTION(%ebx) - popl %ebx -#endif - - LEAVE0 - ret /* return to user */ - - /* Allocate new arc */ - /* %eax = old arc, %ebx = vars address if DO_STATS, %ecx = caller's caller */ - /* %edx = hash bucket address, %esi = gfunc ptr, %edi = caller's addr */ - /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */ - - .align ALIGN -MARK(_gprof_mcount_hashnew) -LCL(ghashnew): - -#if !DO_STATS - pushl %ebx /* load address of vars if we haven't */ - MP_DISABLE_PREEMPTION(%ebx) - Vload /* already done so */ -#endif - - SLOCK incl V_gprof_records(%ebx) - pushl %edx - movl %ecx,%edi /* save caller's caller */ - movl $(G_size),%eax /* arc size */ - movl $(C_gprof),%ecx /* gprof memory pool */ - call EXT(_profile_alloc_asm) - popl %edx - - movl $1,G_count(%eax) /* set call count */ - movl Estack+20(%esp),%ecx /* caller's address */ - movl %edi,G_frompc(%eax) /* caller's caller */ - movl %ecx,G_selfpc(%eax) - -#if !DO_STATS - popl %ebx /* release %ebx if no stats */ -#endif - - movl (%edx),%ecx /* first hash bucket */ - movl %ecx,G_next(%eax) /* update link */ - movl %eax,%ecx /* copy for xchgl */ - xchgl %ecx,(%edx) /* add to hash linked list */ - movl %ecx,G_next(%eax) /* update in case list changed */ - - popl %ecx /* previous 1st arc searched */ - popl %edi /* previous 2nd arc searched */ - movl %eax,H_cache_ptr+0(%esi) /* this element is now 1st arc */ - movl %ecx,H_cache_ptr+4(%esi) /* new 2nd arc to be searched */ - movl %edi,H_cache_ptr+8(%esi) /* new 3rd arc to be searched */ - - popl %edi - popl %esi - -#if DO_STATS - MP_ENABLE_PREEMPTION(%ebx) - popl %ebx -#endif - - LEAVE0 - ret /* return to user */ - -END(_gprof_mcount) - - -/* - * This function assumes that neither the caller or it's caller - * has not omitted the frame pointer in order to get the caller's - * caller. The stack looks like the following at the time of the call: - * - * +-------------------------------+ - * | | - * | | - * | caller's caller stack, | - * | saved registers, params. | - * | | - * | | - * +-------------------------------+ - * | caller's caller return addr. | - * +-------------------------------+ - * fp --> | previous frame pointer | - * +-------------------------------+ - * | | - * | caller's stack, saved regs, | - * | params. | - * | | - * +-------------------------------+ - * sp --> | caller's return address | - * +-------------------------------+ - * - * Recent versions of the compiler put the address of the pointer - * sized word in %edx. Previous versions did not, but this code - * does not support them. - */ - -/* - * Note that OSF/rose blew defining _mcount, since it prepends leading - * underscores, and _mcount didn't have a second leading underscore. However, - * some of the kernel/server functions 'know' that mcount has a leading - * underscore, so we satisfy both camps. - */ - -#if OLD_MCOUNT - .globl mcount - .globl _mcount - ELF_FUNC(mcount) - ELF_FUNC(_mcount) - .align FALIGN -_mcount: -mcount: - - pushl %ebx - MP_DISABLE_PREEMPTION(%ebx) - Vload - -#if DO_STATS - SDINC(V_old_mcount(%ebx)) -#endif - - /* In calling the functions, we will actually leave 1 extra word on the */ - /* top of the stack, but generated code will not notice, since the function */ - /* uses a frame pointer */ - - movl V_mcount_ptr_ptr(%ebx),%ecx /* address of mcount_ptr */ - MP_ENABLE_PREEMPTION(%ebx) - popl %ebx - movl 4(%ebp),%eax /* caller's caller return address */ - xchgl %eax,(%esp) /* push & get return address */ - pushl %eax /* push return address */ - jmp *(%ecx) /* go to profile the function */ - -End(mcount) -End(_mcount) -#endif - - -#if !defined(KERNEL) && !defined(MACH_KERNEL) - -/* - * Convert a 64-bit integer to a string. - * Arg #1 is a pointer to a string (at least 24 bytes) or NULL - * Arg #2 is the low part of the 64-bit integer. - * Arg #3 is the high part of the 64-bit integer. - */ - -Entry(_profile_cnt_to_decimal) - ENTER - pushl %ebx - pushl %esi - pushl %edi - movl Estack+16(%esp),%ebx /* pointer or null */ - movl Estack+20(%esp),%edi /* low part of number */ - movl $10,%ecx /* divisor */ - cmpl $0,%ebx /* skip if pointer ok */ - jne LCL(cvt_nonnull) - - MP_DISABLE_PREEMPTION(%ebx) - Vload /* get _profile_vars address */ - leal V_num_buffer(%ebx),%ebx /* temp buffer to use */ - - .align ALIGN -LCL(cvt_nonnull): - addl $(N_digit-1),%ebx /* point string at end */ - movb $0,0(%ebx) /* null terminate string */ - -#if OVERFLOW - movl Estack+24(%esp),%esi /* high part of number */ - cmpl $0,%esi /* any thing left in high part? */ - je LCL(cvt_low) - - .align ALIGN -LCL(cvt_high): - movl %esi,%eax /* calculate high/10 & high%10 */ - xorl %edx,%edx - divl %ecx - movl %eax,%esi - - movl %edi,%eax /* calculate (low + (high%10)*2^32) / 10 */ - divl %ecx - movl %eax,%edi - - decl %ebx /* decrement string pointer */ - addl $48,%edx /* convert from 0..9 -> '0'..'9' */ - movb %dl,0(%ebx) /* store digit in string */ - cmpl $0,%esi /* any thing left in high part? */ - jne LCL(cvt_high) - -#endif /* OVERFLOW */ - - .align ALIGN -LCL(cvt_low): - movl %edi,%eax /* get low part into %eax */ - - .align ALIGN -LCL(cvt_low2): - xorl %edx,%edx /* 0 */ - divl %ecx /* calculate next digit */ - decl %ebx /* decrement string pointer */ - addl $48,%edx /* convert from 0..9 -> '0'..'9' */ - movb %dl,0(%ebx) /* store digit in string */ - cmpl $0,%eax /* any more digits to convert? */ - jne LCL(cvt_low2) - - movl %ebx,%eax /* return value */ - popl %edi - popl %esi - MP_ENABLE_PREEMPTION(%ebx) - popl %ebx - LEAVE0 - ret - -END(_profile_cnt_to_decimal) - -#endif diff --git a/osfmk/profiling/i386/profile-md.c b/osfmk/profiling/i386/profile-md.c deleted file mode 100644 index ff5c91d1b..000000000 --- a/osfmk/profiling/i386/profile-md.c +++ /dev/null @@ -1,1244 +0,0 @@ -/* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -/* - * @OSF_COPYRIGHT@ - */ -/* - * HISTORY - * - * Revision 1.1.1.1 1998/09/22 21:05:49 wsanchez - * Import of Mac OS X kernel (~semeria) - * - * Revision 1.1.1.1 1998/03/07 02:26:08 wsanchez - * Import of OSF Mach kernel (~mburg) - * - * Revision 1.1.5.1 1995/01/06 19:53:45 devrcs - * mk6 CR668 - 1.3b26 merge - * new file for mk6 - * [1994/10/12 22:25:24 dwm] - * - * Revision 1.1.2.2 1994/05/16 19:19:22 meissner - * Protect against hash_ptr being null in _profile_update_stats. - * [1994/05/16 17:23:53 meissner] - * - * Remove _profile_cnt_to_hex, _profile_strbuffer. - * _profile_print_stats now takes const pointers. - * Use the new 64-bit arithmetic support instead of converting to double. - * Add _profile_merge_stats to merge statistics. - * [1994/04/28 21:45:04 meissner] - * - * If MACH_ASSERT is on in server or kernel, turn on profiling printfs. - * Print out fractional digits for average # of hash searches in stats. - * Update overflow_ticks for # times the lprofil counter overflows into high word. - * Don't make sizes of C/asm structures a const array, since it has pointers in it. - * Add support for converting 64 bit ints to a string. - * Use PROF_CNT_TO_DECIMAL where possible instead of PROF_CNT_TO_LDOUBLE. - * [1994/04/20 15:47:02 meissner] - * - * Revision 1.1.2.1 1994/04/08 17:51:51 meissner - * no change - * [1994/04/08 02:11:40 meissner] - * - * Make most stats 64 bits, except for things like memory allocation. - * [1994/04/02 14:58:28 meissner] - * - * Add some printfs under #idef DEBUG_PROFILE. - * [1994/03/29 21:00:11 meissner] - * - * Further changes for gprof/prof overflow support. - * Add overflow support for {gprof,prof,old,dummy}_mcount counters. - * [1994/03/17 20:13:31 meissner] - * - * Add gprof/prof overflow support - * [1994/03/17 14:56:51 meissner] - * - * Use memset instead of bzero. - * [1994/02/28 23:56:10 meissner] - * - * Add size of histogram counters & unused fields to profile_profil struct - * [1994/02/17 21:41:50 meissner] - * - * Allocate slop space for server in addition to microkernel. - * Add 3rd argument to _profile_print_stats for profil info. - * Print # histogram ticks too low/too high for server/mk. - * [1994/02/16 22:38:18 meissner] - * - * Calculate percentages for # of hash buckets. - * [1994/02/11 16:52:04 meissner] - * - * Print stats as an unsigned number. - * [1994/02/07 18:47:05 meissner] - * - * For kernel and server, include not . - * Always do assert on comparing asm vs. C structure sizes. - * Add _profile_reset to reset profiling information. - * Add _profile_update_stats to update the statistics. - * Move _gprof_write code that updates hash stats to _profile_update_stats. - * Don't allocate space for basic block support just yet. - * Add support for range checking the gprof arc {from,self}pc addresses. - * _profile_debug now calls _profile_update_stats. - * Print how many times the acontext was locked. - * If DEBUG_PROFILE is defined, set pv->debug to 1. - * Expand copyright. - * [1994/02/07 12:41:03 meissner] - * - * Keep track of the number of times the kernel overflows the HISTCOUNTER counter. - * [1994/02/03 20:13:28 meissner] - * - * Add stats for {user,kernel,idle} mode in the kernel. - * [1994/02/03 15:17:31 meissner] - * - * Print unused stats in hex as well as decimal. - * [1994/02/03 14:52:20 meissner] - * - * _profile_print_stats no longer takes profile_{vars,md} pointer arguments. - * If stream is NULL, _profile_print_stats will use stdout. - * Separate _profile_update_stats from _gprof_write. - * [1994/02/03 00:58:55 meissner] - * - * Combine _profile_{vars,stats,md}; Allow more than one _profile_vars. - * [1994/02/01 12:04:01 meissner] - * - * Add allocation flag to _profile_md_init. - * Fix core dumps in _profile_print_stats if no profile_vars ptr passed. - * Print numbers in 12 columns, not 8. - * Print my_cpu/max_cpu if max_cpu != 0. - * Make allocations print like other stats. - * Use ACONTEXT_FIRST to start loop on, not ACONTEXT_PROF. - * [1994/01/28 23:33:26 meissner] - * - * Move callback pointers into separate allocation context. - * Add size fields for other structures to profile-vars. - * [1994/01/26 20:23:37 meissner] - * - * Allocate initial memory at startup. - * Print structure sizes and version number when printing stats. - * Initialize size fields and version numbers. - * Allocation context pointers moved to _profile_vars. - * [1994/01/25 01:46:04 meissner] - * - * Move init code here from assembly language. - * [1994/01/22 01:13:21 meissner] - * - * Include instead of "profile-md.h". - * [1994/01/20 20:56:49 meissner] - * - * Fixup copyright. - * [1994/01/18 23:08:02 meissner] - * - * Rename profile.h -> profile-md.h. - * [1994/01/18 19:44:57 meissner] - * - * Write out stats unused fields. - * Make _prof_write write out the prof stats gprof collects. - * [1994/01/15 18:40:37 meissner] - * - * Remove debug code called from profile-asm.s. - * Always print out the # of profil buckets. - * [1994/01/15 00:59:06 meissner] - * - * Fix typo. - * [1994/01/04 16:34:46 meissner] - * - * Move max hash bucket calculation into _gprof_write & put info in stats structure. - * [1994/01/04 16:15:17 meissner] - * - * Use _profile_printf to write diagnostics; add diag_stream to hold stream to write to. - * [1994/01/04 15:37:46 meissner] - * - * Correctly handle case where more than one allocation context was - * allocated due to multiple threads. - * Cast stats to long for output. - * Print number of profil buckets field in _profile_stats. - * Add support for GFUNC allocation context. - * [1994/01/04 14:26:00 meissner] - * - * CR 10198 - Initial version. - * [1994/01/01 22:44:10 meissne - * - * $EndLog$ - */ - -#include -#include -#include -#include - -#if defined(MACH_KERNEL) || defined(_KERNEL) - -#include -#if MACH_ASSERT && !defined(DEBUG_PROFILE) -#define DEBUG_PROFILE 1 -#endif - -#else -#include -#define panic(str) exit(1) -#endif - -#ifndef PROFILE_NUM_FUNCS -#define PROFILE_NUM_FUNCS 2000 -#endif - -#ifndef PROFILE_NUM_ARCS -#define PROFILE_NUM_ARCS 8000 -#endif - -/* - * Information passed on from profile-asm.s - */ - -extern int _profile_do_stats; -extern size_t _profile_size; -extern size_t _profile_stats_size; -extern size_t _profile_md_size; -extern size_t _profile_profil_size; -extern size_t _profile_hash_size; - -/* - * All profiling variables, and a dummy gprof record. - */ - -struct profile_vars _profile_vars = { 0 }; -struct hasharc _gprof_dummy = { 0 }; - -/* - * Forward references. - */ - -static void *_profile_md_acontext(struct profile_vars *pv, - void *ptr, - size_t len, - acontext_type_t type); - -static void _profile_reset_alloc(struct profile_vars *, - acontext_type_t); - -extern void _bogus_function(void); - - -#define PROFILE_VARS(cpu) (&_profile_vars) - -void * -_profile_alloc_pages (size_t size) -{ - vm_offset_t addr; - - /* - * For the MK, we can't support allocating pages at runtime, because we - * might be at interrupt level, so abort if we didn't size the table - * properly. - */ - - if (PROFILE_VARS(0)->active) { - panic("Call to _profile_alloc_pages while profiling is running."); - } - - if (kmem_alloc(kernel_map, &addr, size)) { - panic("Could not allocate memory for profiling"); - } - - memset((void *)addr, '\0', size); - if (PROFILE_VARS(0)->debug) { - printf("Allocated %d bytes for profiling, address 0x%x\n", (int)size, (int)addr); - } - - return((caddr_t)addr); -} - -void -_profile_free_pages(void *addr, size_t size) -{ - if (PROFILE_VARS(0)->debug) { - printf("Freed %d bytes for profiling, address 0x%x\n", (int)size, (int)addr); - } - - kmem_free(kernel_map, (vm_offset_t)addr, size); - return; -} - -void _profile_error(struct profile_vars *pv) -{ - panic("Fatal error in profiling"); -} - - -/* - * Function to set up the initial allocation for a context block. - */ - -static void * -_profile_md_acontext(struct profile_vars *pv, - void *ptr, - size_t len, - acontext_type_t type) -{ - struct memory { - struct alloc_context context; - struct page_list plist; - int data[1]; - }; - - struct memory *mptr = (struct memory *)ptr; - struct alloc_context *context = &mptr->context; - struct page_list *plist = &mptr->plist; - -#ifdef DEBUG_PROFILE - _profile_printf("_profile_md_acontext: pv= 0x%lx, ptr= 0x%lx, len= %6ld, type= %d\n", - (long)pv, - (long)ptr, - (long)len, - (int)type); -#endif - - /* Fill in context block header */ - context->next = pv->acontext[type]; - context->plist = plist; - context->lock = 0; - - /* Fill in first page list information */ - plist->ptr = plist->first = (void *)&mptr->data[0]; - plist->next = (struct page_list *)0; - plist->bytes_free = len - ((char *)plist->ptr - (char *)ptr); - plist->bytes_allocated = 0; - plist->num_allocations = 0; - - /* Update statistics */ - pv->stats.num_context[type]++; - pv->stats.wasted[type] += plist->bytes_free; - pv->stats.overhead[type] += len - plist->bytes_free; - - /* And setup context block */ - pv->acontext[type] = context; - - return (void *)((char *)ptr+len); -} - - -/* - * Machine dependent function to initialize things. - */ - -void -_profile_md_init(struct profile_vars *pv, - profile_type_t type, - profile_alloc_mem_t alloc_mem) -{ - size_t page_size = pv->page_size; - size_t arc_size; - size_t func_size; - size_t misc_size; - size_t hash_size; - size_t extra_arc_size; - size_t extra_func_size; - size_t callback_size = page_size; - void *ptr; - acontext_type_t ac; - int i; - static struct { - size_t c_size; /* size C thinks structure is */ - size_t *asm_size_ptr; /* pointer to size asm thinks struct is */ - const char *name; /* structure name */ - } sizes[] = { - { sizeof(struct profile_profil), &_profile_profil_size, "profile_profil" }, - { sizeof(struct profile_stats), &_profile_stats_size, "profile_stats" }, - { sizeof(struct profile_md), &_profile_md_size, "profile_md" }, - { sizeof(struct profile_vars), &_profile_size, "profile_vars" }}; - -#ifdef DEBUG_PROFILE - _profile_printf("_profile_md_init: pv = 0x%lx, type = %d, alloc = %d\n", - (long) pv, - (int)type, - (int)alloc_mem); -#endif - - for (i = 0; i < sizeof (sizes) / sizeof(sizes[0]); i++) { - if (sizes[i].c_size != *sizes[i].asm_size_ptr) { - _profile_printf("C thinks struct %s is %ld bytes, asm thinks it is %ld bytes\n", - sizes[i].name, - (long)sizes[i].c_size, - (long)*sizes[i].asm_size_ptr); - - panic(sizes[i].name); - } - } - - /* Figure out which function will handle compiler generated profiling */ - if (type == PROFILE_GPROF) { - pv->md.save_mcount_ptr = _gprof_mcount; - - } else if (type == PROFILE_PROF) { - pv->md.save_mcount_ptr = _prof_mcount; - - } else { - pv->md.save_mcount_ptr = _dummy_mcount; - } - - pv->vars_size = sizeof(struct profile_vars); - pv->plist_size = sizeof(struct page_list); - pv->acontext_size = sizeof(struct alloc_context); - pv->callback_size = sizeof(struct callback); - pv->major_version = PROFILE_MAJOR_VERSION; - pv->minor_version = PROFILE_MINOR_VERSION; - pv->type = type; - pv->do_profile = 1; - pv->use_dci = 1; - pv->use_profil = 1; - pv->output_uarea = 1; - pv->output_stats = (prof_flag_t) _profile_do_stats; - pv->output_clock = 1; - pv->multiple_sections = 1; - pv->init_format = 0; - pv->bogus_func = _bogus_function; - -#ifdef DEBUG_PROFILE - pv->debug = 1; -#endif - - if (!pv->error_msg) { - pv->error_msg = "error in profiling"; - } - - if (!pv->page_size) { - pv->page_size = 4096; - } - - pv->stats.stats_size = sizeof(struct profile_stats); - pv->stats.major_version = PROFILE_MAJOR_VERSION; - pv->stats.minor_version = PROFILE_MINOR_VERSION; - - pv->md.md_size = sizeof(struct profile_md); - pv->md.major_version = PROFILE_MAJOR_VERSION; - pv->md.minor_version = PROFILE_MINOR_VERSION; - pv->md.hash_size = _profile_hash_size; - pv->md.num_cache = MAX_CACHE; - pv->md.mcount_ptr_ptr = &_mcount_ptr; - pv->md.dummy_ptr = &_gprof_dummy; - pv->md.alloc_pages = _profile_alloc_pages; - - /* zero out all allocation context blocks */ - for (ac = ACONTEXT_FIRST; ac < ACONTEXT_MAX; ac++) { - pv->acontext[ac] = (struct alloc_context *)0; - } - - /* Don't allocate memory if not desired */ - if (!alloc_mem) { - return; - } - - /* Allocate some space for the initial allocations */ - switch (type) { - default: - misc_size = page_size; - ptr = _profile_alloc_pages(misc_size + callback_size); - ptr = _profile_md_acontext(pv, ptr, misc_size, ACONTEXT_MISC); - ptr = _profile_md_acontext(pv, ptr, callback_size, ACONTEXT_CALLBACK); - break; - - case PROFILE_GPROF: - -#if defined(MACH_KERNEL) || defined(_KERNEL) - /* - * For the MK & server allocate some slop space now for the - * secondary context blocks in case allocations are done at - * interrupt level when another allocation is being done. This - * is done before the main allocation blocks and will be pushed - * so that it will only be used when the main allocation block - * is locked. - */ - extra_arc_size = 4*page_size; - extra_func_size = 2*page_size; -#else - extra_arc_size = extra_func_size = 0; -#endif - - /* Set up allocation areas */ - arc_size = ROUNDUP(PROFILE_NUM_ARCS * sizeof(struct hasharc), page_size); - func_size = ROUNDUP(PROFILE_NUM_FUNCS * sizeof(struct gfuncs), page_size); - hash_size = _profile_hash_size * sizeof (struct hasharc *); - misc_size = ROUNDUP(hash_size + page_size, page_size); - - ptr = _profile_alloc_pages(arc_size - + func_size - + misc_size - + callback_size - + extra_arc_size - + extra_func_size); - -#if defined(MACH_KERNEL) || defined(_KERNEL) - ptr = _profile_md_acontext(pv, ptr, extra_arc_size, ACONTEXT_GPROF); - ptr = _profile_md_acontext(pv, ptr, extra_func_size, ACONTEXT_GFUNC); -#endif - ptr = _profile_md_acontext(pv, ptr, arc_size, ACONTEXT_GPROF); - ptr = _profile_md_acontext(pv, ptr, func_size, ACONTEXT_GFUNC); - ptr = _profile_md_acontext(pv, ptr, misc_size, ACONTEXT_MISC); - ptr = _profile_md_acontext(pv, ptr, callback_size, ACONTEXT_CALLBACK); - - /* Allocate hash table */ - pv->md.hash_ptr = (struct hasharc **) _profile_alloc(pv, hash_size, ACONTEXT_MISC); - break; - - case PROFILE_PROF: - /* Set up allocation areas */ - func_size = ROUNDUP(PROFILE_NUM_FUNCS * sizeof(struct prof_ext), page_size); - misc_size = page_size; - - ptr = _profile_alloc_pages(func_size - + misc_size - + callback_size); - - ptr = _profile_md_acontext(pv, ptr, func_size, ACONTEXT_PROF); - ptr = _profile_md_acontext(pv, ptr, misc_size, ACONTEXT_MISC); - ptr = _profile_md_acontext(pv, ptr, callback_size, ACONTEXT_CALLBACK); - break; - } -} - - -/* - * Machine dependent functions to start and stop profiling. - */ - -int -_profile_md_start(void) -{ - _mcount_ptr = _profile_vars.md.save_mcount_ptr; - return 0; -} - -int -_profile_md_stop(void) -{ - _mcount_ptr = _dummy_mcount; - return 0; -} - - -/* - * Free up all memory in a memory context block. - */ - -static void -_profile_reset_alloc(struct profile_vars *pv, acontext_type_t ac) -{ - struct alloc_context *aptr; - struct page_list *plist; - - for (aptr = pv->acontext[ac]; - aptr != (struct alloc_context *)0; - aptr = aptr->next) { - - for (plist = aptr->plist; - plist != (struct page_list *)0; - plist = plist->next) { - - plist->ptr = plist->first; - plist->bytes_free += plist->bytes_allocated; - plist->bytes_allocated = 0; - plist->num_allocations = 0; - memset(plist->first, '\0', plist->bytes_allocated); - } - } -} - - -/* - * Reset profiling. Since the only user of this function is the kernel - * and the server, we don't have to worry about other stuff than gprof. - */ - -void -_profile_reset(struct profile_vars *pv) -{ - struct alloc_context *aptr; - struct page_list *plist; - struct gfuncs *gfunc; - - if (pv->active) { - _profile_md_stop(); - } - - /* Reset all function unique pointers back to 0 */ - for (aptr = pv->acontext[ACONTEXT_GFUNC]; - aptr != (struct alloc_context *)0; - aptr = aptr->next) { - - for (plist = aptr->plist; - plist != (struct page_list *)0; - plist = plist->next) { - - for (gfunc = (struct gfuncs *)plist->first; - gfunc < (struct gfuncs *)plist->ptr; - gfunc++) { - - *(gfunc->unique_ptr) = (struct hasharc *)0; - } - } - } - - /* Release memory */ - _profile_reset_alloc(pv, ACONTEXT_GPROF); - _profile_reset_alloc(pv, ACONTEXT_GFUNC); - _profile_reset_alloc(pv, ACONTEXT_PROF); - - memset((void *)pv->profil_buf, '\0', pv->profil_info.profil_len); - memset((void *)pv->md.hash_ptr, '\0', pv->md.hash_size * sizeof(struct hasharc *)); - memset((void *)&pv->stats, '\0', sizeof(pv->stats)); - - pv->stats.stats_size = sizeof(struct profile_stats); - pv->stats.major_version = PROFILE_MAJOR_VERSION; - pv->stats.minor_version = PROFILE_MINOR_VERSION; - - if (pv->active) { - _profile_md_start(); - } -} - - -/* - * Machine dependent function to write out gprof records. - */ - -size_t -_gprof_write(struct profile_vars *pv, struct callback *callback_ptr) -{ - struct alloc_context *aptr; - struct page_list *plist; - size_t bytes = 0; - struct hasharc *hptr; - int i; - - for (aptr = pv->acontext[ACONTEXT_GPROF]; - aptr != (struct alloc_context *)0; - aptr = aptr->next) { - - for (plist = aptr->plist; plist != (struct page_list *)0; plist = plist->next) { - hptr = (struct hasharc *)plist->first; - for (i = 0; i < plist->num_allocations; (i++, hptr++)) { - - struct gprof_arc arc = hptr->arc; - int nrecs = 1 + (hptr->overflow * 2); - int j; - - if (pv->check_funcs) { - if (arc.frompc < pv->profil_info.lowpc || - arc.frompc > pv->profil_info.highpc) { - - arc.frompc = (prof_uptrint_t)pv->bogus_func; - } - - if (arc.selfpc < pv->profil_info.lowpc || - arc.selfpc > pv->profil_info.highpc) { - - arc.selfpc = (prof_uptrint_t)pv->bogus_func; - } - } - - /* For each overflow, emit 2 extra records with the count - set to 0x80000000 */ - for (j = 0; j < nrecs; j++) { - bytes += sizeof (arc); - if ((*pv->fwrite_func)((void *)&arc, - sizeof(arc), - 1, - pv->stream) != 1) { - - _profile_error(pv); - } - - arc.count = 0x80000000; - } - } - } - } - - return bytes; -} - - -/* - * Machine dependent function to write out prof records. - */ - -size_t -_prof_write(struct profile_vars *pv, struct callback *callback_ptr) -{ - struct alloc_context *aptr; - struct page_list *plist; - size_t bytes = 0; - struct prof_ext prof_st; - struct prof_int *pptr; - struct gfuncs *gptr; - int nrecs; - int i, j; - - /* Write out information prof_mcount collects */ - for (aptr = pv->acontext[ACONTEXT_PROF]; - aptr != (struct alloc_context *)0; - aptr = aptr->next) { - - for (plist = aptr->plist; plist != (struct page_list *)0; plist = plist->next) { - pptr = (struct prof_int *)plist->first; - - for (i = 0; i < plist->num_allocations; (i++, pptr++)) { - - /* Write out 2 records for each overflow, each with a - count of 0x80000000 + the normal record */ - prof_st = pptr->prof; - nrecs = 1 + (pptr->overflow * 2); - - for (j = 0; j < nrecs; j++) { - bytes += sizeof (struct prof_ext); - if ((*pv->fwrite_func)((void *)&prof_st, - sizeof(prof_st), - 1, - pv->stream) != 1) { - - _profile_error(pv); - } - - prof_st.cncall = 0x80000000; - } - } - } - } - - /* Now write out the prof information that gprof collects */ - for (aptr = pv->acontext[ACONTEXT_GFUNC]; - aptr != (struct alloc_context *)0; - aptr = aptr->next) { - - for (plist = aptr->plist; plist != (struct page_list *)0; plist = plist->next) { - gptr = (struct gfuncs *)plist->first; - - for (i = 0; i < plist->num_allocations; (i++, gptr++)) { - - /* Write out 2 records for each overflow, each with a - count of 0x80000000 + the normal record */ - prof_st = gptr->prof.prof; - nrecs = 1 + (gptr->prof.overflow * 2); - - for (j = 0; j < nrecs; j++) { - bytes += sizeof (struct prof_ext); - if ((*pv->fwrite_func)((void *)&prof_st, - sizeof(prof_st), - 1, - pv->stream) != 1) { - - _profile_error(pv); - } - - prof_st.cncall = 0x80000000; - } - } - } - } - - return bytes; -} - - -/* - * Update any statistics. For the 386, calculate the hash table loading factor. - * Also figure out how many overflows occurred. - */ - -void -_profile_update_stats(struct profile_vars *pv) -{ - struct alloc_context *aptr; - struct page_list *plist; - struct hasharc *hptr; - struct prof_int *pptr; - struct gfuncs *fptr; - LHISTCOUNTER *lptr; - int i; - - for(i = 0; i < MAX_BUCKETS+1; i++) { - pv->stats.buckets[i] = 0; - } - - pv->stats.hash_buckets = 0; - - if (pv->md.hash_ptr) { - for (i = 0; i < pv->md.hash_size; i++) { - long nbuckets = 0; - struct hasharc *hptr; - - for (hptr = pv->md.hash_ptr[i]; hptr; hptr = hptr->next) { - nbuckets++; - } - - pv->stats.buckets[ (nbuckets < MAX_BUCKETS) ? nbuckets : MAX_BUCKETS ]++; - if (pv->stats.hash_buckets < nbuckets) { - pv->stats.hash_buckets = nbuckets; - } - } - } - - /* Count how many times functions are out of bounds */ - if (pv->check_funcs) { - pv->stats.bogus_count = 0; - - for (aptr = pv->acontext[ACONTEXT_GPROF]; - aptr != (struct alloc_context *)0; - aptr = aptr->next) { - - for (plist = aptr->plist; - plist != (struct page_list *)0; - plist = plist->next) { - - hptr = (struct hasharc *)plist->first; - for (i = 0; i < plist->num_allocations; (i++, hptr++)) { - - if (hptr->arc.frompc < pv->profil_info.lowpc || - hptr->arc.frompc > pv->profil_info.highpc) { - pv->stats.bogus_count++; - } - - if (hptr->arc.selfpc < pv->profil_info.lowpc || - hptr->arc.selfpc > pv->profil_info.highpc) { - pv->stats.bogus_count++; - } - } - } - } - } - - /* Figure out how many overflows occurred */ - PROF_ULONG_TO_CNT(pv->stats.prof_overflow, 0); - PROF_ULONG_TO_CNT(pv->stats.gprof_overflow, 0); - - for (aptr = pv->acontext[ACONTEXT_GPROF]; - aptr != (struct alloc_context *)0; - aptr = aptr->next) { - - for (plist = aptr->plist; - plist != (struct page_list *)0; - plist = plist->next) { - - hptr = (struct hasharc *)plist->first; - for (i = 0; i < plist->num_allocations; (i++, hptr++)) { - PROF_CNT_ADD(pv->stats.gprof_overflow, hptr->overflow); - } - } - } - - for (aptr = pv->acontext[ACONTEXT_PROF]; - aptr != (struct alloc_context *)0; - aptr = aptr->next) { - - for (plist = aptr->plist; - plist != (struct page_list *)0; - plist = plist->next) { - - pptr = (struct prof_int *)plist->first; - for (i = 0; i < plist->num_allocations; (i++, pptr++)) { - PROF_CNT_ADD(pv->stats.prof_overflow, pptr->overflow); - } - } - } - - for (aptr = pv->acontext[ACONTEXT_GFUNC]; - aptr != (struct alloc_context *)0; - aptr = aptr->next) { - - for (plist = aptr->plist; - plist != (struct page_list *)0; - plist = plist->next) { - - fptr = (struct gfuncs *)plist->first; - for (i = 0; i < plist->num_allocations; (i++, fptr++)) { - PROF_CNT_ADD(pv->stats.prof_overflow, fptr->prof.overflow); - } - } - } - - /* Now go through & count how many times the LHISTCOUNTER overflowed into a 2nd word */ - lptr = (LHISTCOUNTER *)pv->profil_buf; - - if (pv->use_profil && - pv->profil_info.counter_size == sizeof(LHISTCOUNTER) && - lptr != (LHISTCOUNTER *)0) { - - PROF_ULONG_TO_CNT(pv->stats.overflow_ticks, 0); - for (i = 0; i < pv->stats.profil_buckets; i++) { - PROF_CNT_ADD(pv->stats.overflow_ticks, lptr[i].high); - } - } -} - -#if !defined(_KERNEL) && !defined(MACH_KERNEL) - -/* - * Routine callable from the debugger that prints the statistics. - */ - -int _profile_debug(void) -{ - _profile_update_stats(&_profile_vars); - _profile_print_stats(stderr, &_profile_vars.stats, &_profile_vars.profil_info); - return 0; -} - -/* - * Print the statistics structure in a meaningful way. - */ - -void _profile_print_stats(FILE *stream, - const struct profile_stats *stats, - const struct profile_profil *pinfo) -{ - int i; - prof_cnt_t total_hits; - acontext_type_t ac; - int width_cname = 0; - int width_alloc = 0; - int width_wasted = 0; - int width_overhead = 0; - int width_context = 0; - static const char *cname[ACONTEXT_MAX] = ACONTEXT_NAMES; - char buf[20]; - - if (!stats) { - return; - } - - if (!stream) { - stream = stdout; - } - - sprintf(buf, "%ld.%ld", (long)stats->major_version, (long)stats->minor_version); - fprintf(stream, "%12s profiling version number\n", buf); - fprintf(stream, "%12lu size of profile_vars\n", (long unsigned)sizeof(struct profile_vars)); - fprintf(stream, "%12lu size of profile_stats\n", (long unsigned)sizeof(struct profile_stats)); - fprintf(stream, "%12lu size of profile_md\n", (long unsigned)sizeof(struct profile_md)); - fprintf(stream, "%12s calls to _{,g}prof_mcount\n", PROF_CNT_TO_DECIMAL((char *)0, stats->cnt)); - fprintf(stream, "%12s calls to old mcount\n", PROF_CNT_TO_DECIMAL((char *)0, stats->old_mcount)); - fprintf(stream, "%12s calls to _dummy_mcount\n", PROF_CNT_TO_DECIMAL((char *)0, stats->dummy)); - fprintf(stream, "%12lu functions profiled\n", (long unsigned)stats->prof_records); - fprintf(stream, "%12lu gprof arcs\n", (long unsigned)stats->gprof_records); - - if (pinfo) { - fprintf(stream, "%12lu profil buckets\n", (long unsigned)stats->profil_buckets); - fprintf(stream, "%12lu profil lowpc [0x%lx]\n", - (long unsigned)pinfo->lowpc, - (long unsigned)pinfo->lowpc); - - fprintf(stream, "%12lu profil highpc [0x%lx]\n", - (long unsigned)pinfo->highpc, - (long unsigned)pinfo->highpc); - - fprintf(stream, "%12lu profil highpc-lowpc\n", (long unsigned)(pinfo->highpc - pinfo->lowpc)); - fprintf(stream, "%12lu profil buffer length\n", (long unsigned)pinfo->profil_len); - fprintf(stream, "%12lu profil sizeof counters\n", (long unsigned)pinfo->counter_size); - fprintf(stream, "%12lu profil scale (%g)\n", - (long unsigned)pinfo->scale, - ((double)pinfo->scale) / ((double) 0x10000)); - - - for (i = 0; i < sizeof (pinfo->profil_unused) / sizeof (pinfo->profil_unused[0]); i++) { - if (pinfo->profil_unused[i]) { - fprintf(stream, "%12lu profil unused[%2d] {0x%.8lx}\n", - (long unsigned)pinfo->profil_unused[i], - i, - (long unsigned)pinfo->profil_unused[i]); - } - } - } - - if (stats->max_cpu) { - fprintf(stream, "%12lu current cpu/thread\n", (long unsigned)stats->my_cpu); - fprintf(stream, "%12lu max cpu/thread+1\n", (long unsigned)stats->max_cpu); - } - - if (stats->bogus_count != 0) { - fprintf(stream, - "%12lu gprof functions found outside of range\n", - (long unsigned)stats->bogus_count); - } - - if (PROF_CNT_NE_0(stats->too_low)) { - fprintf(stream, - "%12s histogram ticks were too low\n", - PROF_CNT_TO_DECIMAL((char *)0, stats->too_low)); - } - - if (PROF_CNT_NE_0(stats->too_high)) { - fprintf(stream, - "%12s histogram ticks were too high\n", - PROF_CNT_TO_DECIMAL((char *)0, stats->too_high)); - } - - if (PROF_CNT_NE_0(stats->acontext_locked)) { - fprintf(stream, - "%12s times an allocation context was locked\n", - PROF_CNT_TO_DECIMAL((char *)0, stats->acontext_locked)); - } - - if (PROF_CNT_NE_0(stats->kernel_ticks) - || PROF_CNT_NE_0(stats->user_ticks) - || PROF_CNT_NE_0(stats->idle_ticks)) { - - prof_cnt_t total_ticks; - long double total_ticks_dbl; - - total_ticks = stats->kernel_ticks; - PROF_CNT_LADD(total_ticks, stats->user_ticks); - PROF_CNT_LADD(total_ticks, stats->idle_ticks); - total_ticks_dbl = PROF_CNT_TO_LDOUBLE(total_ticks); - - fprintf(stream, - "%12s total ticks\n", - PROF_CNT_TO_DECIMAL((char *)0, total_ticks)); - - fprintf(stream, - "%12s ticks within the kernel (%5.2Lf%%)\n", - PROF_CNT_TO_DECIMAL((char *)0, stats->kernel_ticks), - 100.0L * (PROF_CNT_TO_LDOUBLE(stats->kernel_ticks) / total_ticks_dbl)); - - fprintf(stream, - "%12s ticks within user space (%5.2Lf%%)\n", - PROF_CNT_TO_DECIMAL((char *)0, stats->user_ticks), - 100.0L * (PROF_CNT_TO_LDOUBLE(stats->user_ticks) / total_ticks_dbl)); - - fprintf(stream, - "%12s ticks idle (%5.2Lf%%)\n", - PROF_CNT_TO_DECIMAL((char *)0, stats->idle_ticks), - 100.0L * (PROF_CNT_TO_LDOUBLE(stats->idle_ticks) / total_ticks_dbl)); - } - - if (PROF_CNT_NE_0(stats->overflow_ticks)) { - fprintf(stream, "%12s times a HISTCOUNTER counter would have overflowed\n", - PROF_CNT_TO_DECIMAL((char *)0, stats->overflow_ticks)); - } - - if (PROF_CNT_NE_0(stats->hash_num)) { - long double total_buckets = 0.0L; - - for (i = 0; i <= MAX_BUCKETS; i++) { - total_buckets += (long double)stats->buckets[i]; - } - - fprintf(stream, "%12lu max bucket(s) on hash chain.\n", (long unsigned)stats->hash_buckets); - for (i = 0; i < MAX_BUCKETS; i++) { - if (stats->buckets[i] != 0) { - fprintf(stream, "%12lu bucket(s) had %d entries (%5.2Lf%%)\n", - (long unsigned)stats->buckets[i], i, - 100.0L * ((long double)stats->buckets[i] / total_buckets)); - } - } - - if (stats->buckets[MAX_BUCKETS] != 0) { - fprintf(stream, "%12lu bucket(s) had more than %d entries (%5.2Lf%%)\n", - (long unsigned)stats->buckets[MAX_BUCKETS], MAX_BUCKETS, - 100.0L * ((long double)stats->buckets[MAX_BUCKETS] / total_buckets)); - } - } - - PROF_ULONG_TO_CNT(total_hits, 0); - for (i = 0; i < MAX_CACHE; i++) { - PROF_CNT_LADD(total_hits, stats->cache_hits[i]); - } - - if (PROF_CNT_NE_0(total_hits)) { - long double total = PROF_CNT_TO_LDOUBLE(stats->cnt); - long double total_hits_dbl = PROF_CNT_TO_LDOUBLE(total_hits); - - fprintf(stream, - "%12s cache hits (%.2Lf%%)\n", - PROF_CNT_TO_DECIMAL((char *)0, total_hits), - 100.0L * (total_hits_dbl / total)); - - for (i = 0; i < MAX_CACHE; i++) { - if (PROF_CNT_NE_0(stats->cache_hits[i])) { - fprintf(stream, - "%12s times cache#%d matched (%5.2Lf%% of cache hits, %5.2Lf%% total)\n", - PROF_CNT_TO_DECIMAL((char *)0, stats->cache_hits[i]), - i+1, - 100.0L * (PROF_CNT_TO_LDOUBLE(stats->cache_hits[i]) / total_hits_dbl), - 100.0L * (PROF_CNT_TO_LDOUBLE(stats->cache_hits[i]) / total)); - } - } - - if (PROF_CNT_NE_0(stats->hash_num)) { - fprintf(stream, "%12s times hash table searched\n", PROF_CNT_TO_DECIMAL((char *)0, stats->hash_num)); - fprintf(stream, "%12s hash buckets searched\n", PROF_CNT_TO_DECIMAL((char *)0, stats->hash_search)); - fprintf(stream, "%12.4Lf average buckets searched\n", - PROF_CNT_TO_LDOUBLE(stats->hash_search) / PROF_CNT_TO_LDOUBLE(stats->hash_num)); - } - } - - for (i = 0; i < sizeof (stats->stats_unused) / sizeof (stats->stats_unused[0]); i++) { - if (PROF_CNT_NE_0(stats->stats_unused[i])) { - fprintf(stream, "%12s unused[%2d] {0x%.8lx 0x%.8lx}\n", - PROF_CNT_TO_DECIMAL((char *)0, stats->stats_unused[i]), - i, - (unsigned long)stats->stats_unused[i].high, - (unsigned long)stats->stats_unused[i].low); - } - } - - /* Get the width for the allocation contexts */ - for (ac = ACONTEXT_FIRST; ac < ACONTEXT_MAX; ac++) { - int len; - - if (stats->num_context[ac] == 0) { - continue; - } - - len = strlen (cname[ac]); - if (len > width_cname) - width_cname = len; - - len = sprintf (buf, "%lu", (long unsigned)stats->num_alloc[ac]); - if (len > width_alloc) - width_alloc = len; - - len = sprintf (buf, "%lu", (long unsigned)stats->wasted[ac]); - if (len > width_wasted) - width_wasted = len; - - len = sprintf (buf, "%lu", (long unsigned)stats->overhead[ac]); - if (len > width_overhead) - width_overhead = len; - - len = sprintf (buf, "%lu", (long unsigned)stats->num_context[ac]); - if (len > width_context) - width_context = len; - } - - /* Print info about allocation contexts */ - for (ac = ACONTEXT_FIRST; ac < ACONTEXT_MAX; ac++) { - if (stats->num_context[ac] == 0) { - continue; - } - - fprintf (stream, - "%12lu bytes in %-*s %*lu alloc, %*lu unused, %*lu over, %*lu context\n", - (long unsigned)stats->bytes_alloc[ac], - width_cname, cname[ac], - width_alloc, (long unsigned)stats->num_alloc[ac], - width_wasted, (long unsigned)stats->wasted[ac], - width_overhead, (long unsigned)stats->overhead[ac], - width_context, (long unsigned)stats->num_context[ac]); - } -} - - -/* - * Merge a new statistics field into an old one. - */ - -void _profile_merge_stats(struct profile_stats *old_stats, const struct profile_stats *new_stats) -{ - int i; - - /* If nothing passed, just return */ - if (!old_stats || !new_stats) - return; - - /* If the old_stats has not been initialized, just copy in the new stats */ - if (old_stats->major_version == 0) { - *old_stats = *new_stats; - - /* Otherwise, update stats, field by field */ - } else { - if (old_stats->prof_records < new_stats->prof_records) - old_stats->prof_records = new_stats->prof_records; - - if (old_stats->gprof_records < new_stats->gprof_records) - old_stats->gprof_records = new_stats->gprof_records; - - if (old_stats->hash_buckets < new_stats->hash_buckets) - old_stats->hash_buckets = new_stats->hash_buckets; - - if (old_stats->bogus_count < new_stats->bogus_count) - old_stats->bogus_count = new_stats->bogus_count; - - PROF_CNT_LADD(old_stats->cnt, new_stats->cnt); - PROF_CNT_LADD(old_stats->dummy, new_stats->dummy); - PROF_CNT_LADD(old_stats->old_mcount, new_stats->old_mcount); - PROF_CNT_LADD(old_stats->hash_search, new_stats->hash_search); - PROF_CNT_LADD(old_stats->hash_num, new_stats->hash_num); - PROF_CNT_LADD(old_stats->user_ticks, new_stats->user_ticks); - PROF_CNT_LADD(old_stats->kernel_ticks, new_stats->kernel_ticks); - PROF_CNT_LADD(old_stats->idle_ticks, new_stats->idle_ticks); - PROF_CNT_LADD(old_stats->overflow_ticks, new_stats->overflow_ticks); - PROF_CNT_LADD(old_stats->acontext_locked, new_stats->acontext_locked); - PROF_CNT_LADD(old_stats->too_low, new_stats->too_low); - PROF_CNT_LADD(old_stats->too_high, new_stats->too_high); - PROF_CNT_LADD(old_stats->prof_overflow, new_stats->prof_overflow); - PROF_CNT_LADD(old_stats->gprof_overflow, new_stats->gprof_overflow); - - for (i = 0; i < (int)ACONTEXT_MAX; i++) { - if (old_stats->num_alloc[i] < new_stats->num_alloc[i]) - old_stats->num_alloc[i] = new_stats->num_alloc[i]; - - if (old_stats->bytes_alloc[i] < new_stats->bytes_alloc[i]) - old_stats->bytes_alloc[i] = new_stats->bytes_alloc[i]; - - if (old_stats->num_context[i] < new_stats->num_context[i]) - old_stats->num_context[i] = new_stats->num_context[i]; - - if (old_stats->wasted[i] < new_stats->wasted[i]) - old_stats->wasted[i] = new_stats->wasted[i]; - - if (old_stats->overhead[i] < new_stats->overhead[i]) - old_stats->overhead[i] = new_stats->overhead[i]; - - } - - for (i = 0; i < MAX_BUCKETS+1; i++) { - if (old_stats->buckets[i] < new_stats->buckets[i]) - old_stats->buckets[i] = new_stats->buckets[i]; - } - - for (i = 0; i < MAX_CACHE; i++) { - PROF_CNT_LADD(old_stats->cache_hits[i], new_stats->cache_hits[i]); - } - - for (i = 0; i < sizeof(old_stats->stats_unused) / sizeof(old_stats->stats_unused[0]); i++) { - PROF_CNT_LADD(old_stats->stats_unused[i], new_stats->stats_unused[i]); - } - } -} - -#endif - - -/* - * Invalid function address used when checking of function addresses is - * desired for gprof arcs, and we discover an address out of bounds. - * There should be no callers of this function. - */ - -void -_bogus_function(void) -{ -} diff --git a/osfmk/profiling/i386/profile-md.h b/osfmk/profiling/i386/profile-md.h index b30deae93..86ed5b023 100644 --- a/osfmk/profiling/i386/profile-md.h +++ b/osfmk/profiling/i386/profile-md.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -172,61 +172,14 @@ typedef unsigned char prof_flag_t; /* type for boolean flags */ */ /* These are 64 bit on both i386 and x86_64 */ -#ifdef __i386__ -typedef struct prof_cnt_t { - prof_uptrint_t low; /* low 32 bits of counter */ - prof_uptrint_t high; /* high 32 bits of counter */ -} prof_cnt_t; -#else typedef unsigned long prof_cnt_t; -#endif - -#ifdef __i386__ -#if defined(__GNUC__) && !defined(lint) -#define PROF_CNT_INC(cnt) \ - __asm__("addl $1,%0; adcl $0,%1" \ - : "=g" ((cnt).low), "=g" ((cnt).high) \ - : "0" ((cnt).low), "1" ((cnt).high)) - -#define PROF_CNT_ADD(cnt,val) \ - __asm__("addl %2,%0; adcl $0,%1" \ - : "=g,r" ((cnt).low), "=g,r" ((cnt).high) \ - : "r,g" ((unsigned long)(val)), \ - "0,0" ((cnt).low), "1,1" ((cnt).high)) - -#define PROF_CNT_LADD(cnt,val) \ - __asm__("addl %2,%0; adcl %3,%1" \ - : "=g,r" ((cnt).low), "=g,r" ((cnt).high) \ - : "r,g" ((val).low), "r,g" ((val).high), \ - "0,0" ((cnt).low), "1,1" ((cnt).high)) - -#define PROF_CNT_SUB(cnt,val) \ - __asm__("subl %2,%0; sbbl $0,%1" \ - : "=g,r" ((cnt).low), "=g,r" ((cnt).high) \ - : "r,g" ((unsigned long)(val)), \ - "0,0" ((cnt).low), "1,1" ((cnt).high)) - -#define PROF_CNT_LSUB(cnt,val) \ - __asm__("subl %2,%0; sbbl %3,%1" \ - : "=g,r" ((cnt).low), "=g,r" ((cnt).high) \ - : "r,g" ((val).low), "r,g" ((val).high), \ - "0,0" ((cnt).low), "1,1" ((cnt).high)) - -#else -#define PROF_CNT_INC(cnt) ((++((cnt).low) == 0) ? ++((cnt).high) : 0) -#define PROF_CNT_ADD(cnt,val) (((((cnt).low + (val)) < (val)) ? ((cnt).high++) : 0), ((cnt).low += (val))) -#define PROF_CNT_LADD(cnt,val) (PROF_CNT_ADD(cnt,(val).low), (cnt).high += (val).high) -#define PROF_CNT_SUB(cnt,val) (((((cnt).low - (val)) > (cnt).low) ? ((cnt).high--) : 0), ((cnt).low -= (val))) -#define PROF_CNT_LSUB(cnt,val) (PROF_CNT_SUB(cnt,(val).low), (cnt).high -= (val).high) -#endif -#else + /* x86_64 */ #define PROF_CNT_INC(cnt) (cnt++) #define PROF_CNT_ADD(cnt,val) (cnt+=val) #define PROF_CNT_LADD(cnt,val) (cnt+=val) #define PROF_CNT_SUB(cnt,val) (cnt-=val) #define PROF_CNT_LSUB(cnt,val) (cnt-=val) -#endif #define PROF_ULONG_TO_CNT(cnt,val) (((cnt).high = 0), ((cnt).low = val)) #define PROF_CNT_OVERFLOW(cnt,high,low) (((high) = (cnt).high), ((low) = (cnt).low)) diff --git a/osfmk/vm/Makefile b/osfmk/vm/Makefile index 7181839ca..dbdfa765e 100644 --- a/osfmk/vm/Makefile +++ b/osfmk/vm/Makefile @@ -3,7 +3,6 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - include $(MakeInc_cmd) include $(MakeInc_def) @@ -17,7 +16,8 @@ EXPORT_ONLY_FILES = \ vm_options.h \ vm_pageout.h \ vm_protos.h \ - vm_shared_region.h + vm_shared_region.h \ + WKdm_new.h INSTALL_MI_LIST = ${DATAFILES} diff --git a/libkern/libkern/WKdm.h b/osfmk/vm/WKdm_new.h similarity index 77% rename from libkern/libkern/WKdm.h rename to osfmk/vm/WKdm_new.h index 68977ce8a..c29a2cbb3 100644 --- a/libkern/libkern/WKdm.h +++ b/osfmk/vm/WKdm_new.h @@ -1,3 +1,31 @@ +/* + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + /* direct-mapped partial matching compressor with simple 22/10 split * * Compresses buffers using a dictionary based match and partial match @@ -42,10 +70,12 @@ extern "C" { /* ============================================================ */ /* Included files */ -//#include -//#include -//#include -//#include +#ifdef WK_DEBUG +#include +#include +#include +#include +#endif typedef unsigned int WK_word; @@ -62,17 +92,17 @@ typedef unsigned int WK_word; /* * macros defining the basic layout of stuff in a page */ -#define HEADER_SIZE_IN_WORDS 4 -#define TAGS_AREA_OFFSET 4 +#define HEADER_SIZE_IN_WORDS 3 +#define TAGS_AREA_OFFSET 3 #define TAGS_AREA_SIZE 64 /* the next few are used during compression to write the header */ #define SET_QPOS_AREA_START(compr_dest_buf,qpos_start_addr) \ - (compr_dest_buf[1] = (WK_word)(qpos_start_addr - compr_dest_buf)) + (compr_dest_buf[0] = qpos_start_addr - compr_dest_buf) #define SET_LOW_BITS_AREA_START(compr_dest_buf,lb_start_addr) \ - (compr_dest_buf[2] = (WK_word)(lb_start_addr - compr_dest_buf)) + (compr_dest_buf[1] = lb_start_addr - compr_dest_buf) #define SET_LOW_BITS_AREA_END(compr_dest_buf,lb_end_addr) \ - (compr_dest_buf[3] = (WK_word)(lb_end_addr - compr_dest_buf)) + (compr_dest_buf[2] = lb_end_addr - compr_dest_buf) /* the next few are only use during decompression to read the header */ #define TAGS_AREA_START(decomp_src_buf) \ @@ -81,12 +111,12 @@ typedef unsigned int WK_word; (TAGS_AREA_START(decomp_src_buf) + TAGS_AREA_SIZE) #define FULL_WORD_AREA_START(the_buf) TAGS_AREA_END(the_buf) #define QPOS_AREA_START(decomp_src_buf) \ - (decomp_src_buf + decomp_src_buf[1]) + (decomp_src_buf + decomp_src_buf[0]) #define LOW_BITS_AREA_START(decomp_src_buf) \ - (decomp_src_buf + (decomp_src_buf[2])) + (decomp_src_buf + (decomp_src_buf[1])) #define QPOS_AREA_END(the_buf) LOW_BITS_AREA_START(the_buf) #define LOW_BITS_AREA_END(decomp_src_buf) \ - (decomp_src_buf + (decomp_src_buf[3])) + (decomp_src_buf + (decomp_src_buf[2])) /* ============================================================ */ /* Types and structures */ @@ -193,8 +223,8 @@ extern const char hashLookupTable[]; /* EMIT... macros emit bytes or words into the intermediate arrays */ -#define EMIT_BYTE(fill_ptr, byte_value) {*fill_ptr = byte_value; fill_ptr++;} -#define EMIT_WORD(fill_ptr,word_value) {*fill_ptr = word_value; fill_ptr++;} +#define EMIT_BYTE(fill_ptr, byte_value) {*fill_ptr++ = byte_value; } +#define EMIT_WORD(fill_ptr,word_value) {*fill_ptr++ = word_value; } /* RECORD... macros record the results of modeling in the intermediate * arrays @@ -213,14 +243,19 @@ extern const char hashLookupTable[]; #define RECORD_MISS(word_pattern) EMIT_BYTE(next_tag,MISS_TAG); \ EMIT_WORD(next_full_patt,(word_pattern)); + +#define WKdm_SCRATCH_BUF_SIZE 4096 + void -WKdm_decompress (WK_word* src_buf, +WKdm_decompress_new (WK_word* src_buf, WK_word* dest_buf, - unsigned int words); -unsigned int -WKdm_compress (WK_word* src_buf, + WK_word* scratch, + unsigned int bytes); +int +WKdm_compress_new (WK_word* src_buf, WK_word* dest_buf, - unsigned int num_input_words); + WK_word* scratch, + unsigned int limit); #ifdef __cplusplus } /* extern "C" */ diff --git a/osfmk/vm/bsd_vm.c b/osfmk/vm/bsd_vm.c index 97d76cdf0..6c155e60a 100644 --- a/osfmk/vm/bsd_vm.c +++ b/osfmk/vm/bsd_vm.c @@ -252,7 +252,7 @@ mach_macx_triggers( } /* trigger_port is locked and active */ ipc_port_make_send_locked(trigger_port); - /* now unlocked */ + ip_unlock(trigger_port); default_pager_triggers(default_pager, 0, 0, SWAP_FILE_CREATION_ERROR, @@ -266,7 +266,7 @@ mach_macx_triggers( } /* trigger_port is locked and active */ ipc_port_make_send_locked(trigger_port); - /* now unlocked */ + ip_unlock(trigger_port); default_pager_triggers(default_pager, hi_water, low_water, HI_WAT_ALERT, trigger_port); @@ -279,7 +279,7 @@ mach_macx_triggers( } /* trigger_port is locked and active */ ipc_port_make_send_locked(trigger_port); - /* and now its unlocked */ + ip_unlock(trigger_port); default_pager_triggers(default_pager, hi_water, low_water, LO_WAT_ALERT, trigger_port); @@ -665,10 +665,9 @@ vnode_pager_get_isinuse( } kern_return_t -vnode_pager_check_hard_throttle( +vnode_pager_get_throttle_io_limit( memory_object_t mem_obj, - uint32_t *limit, - uint32_t hard_throttle) + uint32_t *limit) { vnode_pager_t vnode_object; @@ -677,7 +676,7 @@ vnode_pager_check_hard_throttle( vnode_object = vnode_pager_lookup(mem_obj); - (void)vnode_pager_return_hard_throttle_limit(vnode_object->vnode_handle, limit, hard_throttle); + (void)vnode_pager_return_throttle_io_limit(vnode_object->vnode_handle, limit); return KERN_SUCCESS; } diff --git a/osfmk/vm/default_freezer.c b/osfmk/vm/default_freezer.c index c6e9c8ef6..b7127b39e 100644 --- a/osfmk/vm/default_freezer.c +++ b/osfmk/vm/default_freezer.c @@ -296,7 +296,8 @@ default_freezer_pack_page( df_handle->dfh_compact_offset += PAGE_SIZE; } -void + +kern_return_t default_freezer_unpack( default_freezer_handle_t df_handle) { @@ -311,6 +312,7 @@ default_freezer_unpack( default_freezer_memory_object_t fo = NULL; default_freezer_mapping_table_t freeze_table = NULL; boolean_t should_unlock_handle = FALSE; + kern_return_t kr; assert(df_handle); @@ -326,11 +328,11 @@ default_freezer_unpack( assert(compact_object->pager_ready); /* Bring the pages back in */ - if (vm_object_pagein(compact_object) != KERN_SUCCESS) { + if ((kr = vm_object_pagein(compact_object)) != KERN_SUCCESS) { if (should_unlock_handle) { default_freezer_handle_unlock(df_handle); } - return; + return (kr); } vm_object_lock(compact_object); @@ -399,6 +401,7 @@ default_freezer_unpack( df_handle->dfh_compact_offset = 0; default_freezer_handle_unlock(df_handle); } + return (KERN_SUCCESS); } void @@ -517,6 +520,7 @@ df_memory_object_data_request( memory_object_t pager = NULL; kern_return_t kr = KERN_SUCCESS; boolean_t drop_object_ref = FALSE; + vm_page_t compact_page, dst_page; default_freezer_memory_object_t fo = (default_freezer_memory_object_t)mem_obj; default_freezer_handle_t df_handle = NULL; @@ -563,7 +567,7 @@ df_memory_object_data_request( upl_t upl; unsigned int page_list_count = 0; - request_flags = UPL_NO_SYNC | UPL_RET_ONLY_ABSENT | UPL_SET_LITE; + request_flags = UPL_NO_SYNC | UPL_RET_ONLY_ABSENT | UPL_SET_LITE | UPL_SET_INTERNAL; /* * Should we decide to activate USE_PRECIOUS (from default_pager_internal.h) * here, then the request_flags will need to add these to the ones above: @@ -588,79 +592,86 @@ df_memory_object_data_request( return KERN_SUCCESS; } + vm_object_lock(compact_object); assert(compact_object->alive); assert(!compact_object->terminating); - assert(compact_object->pager_ready); - - vm_object_lock(compact_object); + /* + * note that the activity_in_progress could be non-zero, but + * the pager has not yet been created since the activity_in_progress + * count is bumped via vm_pageout_cluster, while the pager isn't created + * until the pageout thread runs and starts to process the pages + * placed on the I/O queue... once the processing of the compact object + * proceeds to the point where it's placed the first page on the I/O + * queue, we need to wait until the entire freeze operation has completed. + */ vm_object_paging_wait(compact_object, THREAD_UNINT); - vm_object_paging_begin(compact_object); - compact_object->blocked_access = TRUE; - pager = (memory_object_t)compact_object->pager; + if (compact_object->pager_ready) { + vm_object_paging_begin(compact_object); - vm_object_unlock(compact_object); + compact_object->blocked_access = TRUE; + pager = (memory_object_t)compact_object->pager; - ((vm_object_fault_info_t) fault_info)->io_sync = TRUE; + vm_object_unlock(compact_object); - /* - * We have a reference on both the default_freezer - * memory object handle and the compact object. - */ - kr = dp_memory_object_data_request(pager, - compact_offset, - length, - protection_required, - fault_info); - if (kr == KERN_SUCCESS){ + ((vm_object_fault_info_t) fault_info)->io_sync = TRUE; - vm_page_t compact_page = VM_PAGE_NULL, dst_page = VM_PAGE_NULL; + /* + * We have a reference on both the default_freezer + * memory object handle and the compact object. + */ + kr = dp_memory_object_data_request(pager, + compact_offset, + length, + protection_required, + fault_info); + if (kr != KERN_SUCCESS) + panic("%d: default_freezer TOC pointed us to default_pager incorrectly\n", kr); vm_object_lock(compact_object); compact_object->blocked_access = FALSE; vm_object_paging_end(compact_object); + } + vm_object_lock(src_object); - vm_object_lock(src_object); - - if ((compact_page = vm_page_lookup(compact_object, compact_offset)) != VM_PAGE_NULL){ + if ((compact_page = vm_page_lookup(compact_object, compact_offset)) != VM_PAGE_NULL){ - dst_page = vm_page_lookup(src_object, offset - src_object->paging_offset); + dst_page = vm_page_lookup(src_object, offset - src_object->paging_offset); - if (!dst_page->absent){ - /* - * Someone raced us here and unpacked - * the object behind us. - * So cleanup before we return. - */ - VM_PAGE_FREE(compact_page); - } else { + if (dst_page && !dst_page->absent){ + /* + * Someone raced us here and unpacked + * the object behind us. + * So cleanup before we return. + */ + VM_PAGE_FREE(compact_page); + } else { + if (dst_page != NULL) { VM_PAGE_FREE(dst_page); - vm_page_rename(compact_page, src_object, offset - src_object->paging_offset, FALSE); - - if (default_freezer_mapping_update(fo->fo_df_handle->dfh_table, - mem_obj, - offset, - NULL, - TRUE) != KERN_SUCCESS) { - printf("Page for object: 0x%lx at offset: 0x%lx not found in table\n", (uintptr_t)src_object, (uintptr_t)offset); - } + } + vm_page_rename(compact_page, src_object, offset - src_object->paging_offset, FALSE); - PAGE_WAKEUP_DONE(compact_page); + if (default_freezer_mapping_update(fo->fo_df_handle->dfh_table, + mem_obj, + offset, + NULL, + TRUE) != KERN_SUCCESS) { + printf("Page for object: 0x%lx at offset: 0x%lx not found in table\n", (uintptr_t)src_object, (uintptr_t)offset); } - } else { - printf("%d: default_freezer: compact_object doesn't have the page for object 0x%lx at offset 0x%lx \n", kr, (uintptr_t)compact_object, (uintptr_t)compact_offset); - kr = KERN_SUCCESS; + + PAGE_WAKEUP_DONE(compact_page); } - vm_object_unlock(src_object); - vm_object_unlock(compact_object); - vm_object_deallocate(compact_object); } else { - panic("%d: default_freezer TOC pointed us to default_pager incorrectly\n", kr); + printf("%d: default_freezer: compact_object doesn't have the page for object 0x%lx at offset 0x%lx \n", kr, (uintptr_t)compact_object, (uintptr_t)compact_offset); + kr = KERN_SUCCESS; } - + vm_object_unlock(src_object); + vm_object_unlock(compact_object); + vm_object_deallocate(compact_object); + return kr; } diff --git a/osfmk/vm/memory_object.c b/osfmk/vm/memory_object.c index 67b69df41..7f2ebbd3b 100644 --- a/osfmk/vm/memory_object.c +++ b/osfmk/vm/memory_object.c @@ -103,9 +103,7 @@ #include /* Needed by some vm_page.h macros */ #include -#if MACH_PAGEMAP #include -#endif /* MACH_PAGEMAP */ #include @@ -454,9 +452,17 @@ vm_object_sync( vm_object_lock(object); vm_object_paging_begin(object); - if (should_flush) + if (should_flush) { flags = MEMORY_OBJECT_DATA_FLUSH; - else + /* + * This flush is from an msync(), not a truncate(), so the + * contents of the file are not affected. + * MEMORY_OBECT_DATA_NO_CHANGE lets vm_object_update() know + * that the data is not changed and that there's no need to + * push the old contents to a copy object. + */ + flags |= MEMORY_OBJECT_DATA_NO_CHANGE; + } else flags = 0; if (should_iosync) @@ -483,7 +489,7 @@ MACRO_BEGIN \ int upl_flags; \ memory_object_t pager; \ \ - if (object == slide_info.slide_object) { \ + if (object->object_slid) { \ panic("Objects with slid pages not allowed\n"); \ } \ \ @@ -815,9 +821,11 @@ vm_object_update( assert(fault_info.cluster_size == copy_size - i); prot = VM_PROT_WRITE|VM_PROT_READ; + page = VM_PAGE_NULL; result = vm_fault_page(copy_object, i, VM_PROT_WRITE|VM_PROT_READ, FALSE, + FALSE, /* page not looked up */ &prot, &page, &top_page, @@ -1096,7 +1104,7 @@ vm_object_set_attributes_common( boolean_t may_cache, memory_object_copy_strategy_t copy_strategy, boolean_t temporary, - boolean_t silent_overwrite, + __unused boolean_t silent_overwrite, boolean_t advisory_pageout) { boolean_t object_became_ready; @@ -1140,7 +1148,7 @@ vm_object_set_attributes_common( object->copy_strategy = copy_strategy; object->can_persist = may_cache; object->temporary = temporary; - object->silent_overwrite = silent_overwrite; +// object->silent_overwrite = silent_overwrite; object->advisory_pageout = advisory_pageout; /* @@ -1191,7 +1199,8 @@ memory_object_change_attributes( temporary = object->temporary; may_cache = object->can_persist; copy_strategy = object->copy_strategy; - silent_overwrite = object->silent_overwrite; +// silent_overwrite = object->silent_overwrite; + silent_overwrite = FALSE; advisory_pageout = object->advisory_pageout; #if notyet invalidate = object->invalidate; @@ -1371,7 +1380,8 @@ memory_object_get_attributes( behave->invalidate = FALSE; #endif behave->advisory_pageout = object->advisory_pageout; - behave->silent_overwrite = object->silent_overwrite; +// behave->silent_overwrite = object->silent_overwrite; + behave->silent_overwrite = FALSE; *count = MEMORY_OBJECT_BEHAVE_INFO_COUNT; break; } @@ -1495,8 +1505,9 @@ memory_object_iopl_request( /* offset from beginning of named entry offset in object */ offset = offset + named_entry->offset; - if(named_entry->is_sub_map) - return (KERN_INVALID_ARGUMENT); + if (named_entry->is_sub_map || + named_entry->is_copy) + return KERN_INVALID_ARGUMENT; named_entry_lock(named_entry); @@ -1953,18 +1964,35 @@ memory_object_signed( return KERN_SUCCESS; } +boolean_t +memory_object_is_signed( + memory_object_control_t control) +{ + boolean_t is_signed; + vm_object_t object; + + object = memory_object_control_to_vm_object(control); + if (object == VM_OBJECT_NULL) + return FALSE; + + vm_object_lock_shared(object); + is_signed = object->code_signed; + vm_object_unlock(object); + + return is_signed; +} + boolean_t memory_object_is_slid( memory_object_control_t control) { vm_object_t object = VM_OBJECT_NULL; - vm_object_t slide_object = slide_info.slide_object; object = memory_object_control_to_vm_object(control); if (object == VM_OBJECT_NULL) return FALSE; - return (object == slide_object); + return object->object_slid; } static zone_t mem_obj_control_zone; diff --git a/osfmk/vm/memory_object.h b/osfmk/vm/memory_object.h index 05f78fcf4..f9d1b4326 100644 --- a/osfmk/vm/memory_object.h +++ b/osfmk/vm/memory_object.h @@ -75,10 +75,10 @@ __private_extern__ kern_return_t memory_manager_default_check(void); __private_extern__ -void memory_manager_default_init(void) __attribute__((section("__TEXT, initcode"))); +void memory_manager_default_init(void); __private_extern__ -void memory_object_control_bootstrap(void) __attribute__((section("__TEXT, initcode"))); +void memory_object_control_bootstrap(void); __private_extern__ memory_object_control_t memory_object_control_allocate( vm_object_t object); @@ -141,6 +141,9 @@ extern kern_return_t memory_object_signed( memory_object_control_t control, boolean_t is_signed); +extern boolean_t memory_object_is_signed( + memory_object_control_t control); + extern boolean_t memory_object_is_slid( memory_object_control_t control); diff --git a/osfmk/vm/pmap.h b/osfmk/vm/pmap.h index 26c26d6c0..2e228d6d2 100644 --- a/osfmk/vm/pmap.h +++ b/osfmk/vm/pmap.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -141,7 +141,7 @@ extern void pmap_startup( * use remaining physical pages * to allocate page frames. */ -extern void pmap_init(void) __attribute__((section("__TEXT, initcode"))); +extern void pmap_init(void); /* Initialization, * after kernel runs * in virtual memory. @@ -189,11 +189,7 @@ extern void pmap_virtual_space( extern pmap_t pmap_create( /* Create a pmap_t. */ ledger_t ledger, vm_map_size_t size, -#ifdef __i386__ - boolean_t is_64bit); -#else __unused boolean_t is_64bit); -#endif extern pmap_t (pmap_kernel)(void); /* Return the kernel's pmap */ extern void pmap_reference(pmap_t pmap); /* Gain a reference. */ extern void pmap_destroy(pmap_t pmap); /* Release a reference. */ @@ -217,7 +213,8 @@ extern kern_return_t pmap_enter_options( vm_prot_t fault_type, unsigned int flags, boolean_t wired, - unsigned int options); + unsigned int options, + void *arg); extern void pmap_remove_some_phys( pmap_t pmap, @@ -232,6 +229,12 @@ extern void pmap_page_protect( /* Restrict access to page. */ ppnum_t phys, vm_prot_t prot); +extern void pmap_page_protect_options( /* Restrict access to page. */ + ppnum_t phys, + vm_prot_t prot, + unsigned int options, + void *arg); + extern void (pmap_zero_page)( ppnum_t pn); @@ -266,6 +269,11 @@ extern void (pmap_copy_part_rpage)( extern unsigned int (pmap_disconnect)( /* disconnect mappings and return reference and change */ ppnum_t phys); +extern unsigned int (pmap_disconnect_options)( /* disconnect mappings and return reference and change */ + ppnum_t phys, + unsigned int options, + void *arg); + extern kern_return_t (pmap_attribute_cache_sync)( /* Flush appropriate * cache based on * page number sent */ @@ -381,34 +389,51 @@ extern kern_return_t (pmap_attribute)( /* Get/Set special memory MACRO_BEGIN \ pmap_t __pmap = (pmap); \ vm_page_t __page = (page); \ + int __options = 0; \ \ PMAP_ENTER_CHECK(__pmap, __page) \ - pmap_enter(__pmap, \ - (virtual_address), \ - __page->phys_page, \ - (protection), \ - (fault_type), \ - (flags), \ - (wired)); \ + if (__page->object->internal) { \ + __options |= PMAP_OPTIONS_INTERNAL; \ + } \ + if (__page->reusable || __page->object->all_reusable) { \ + __options |= PMAP_OPTIONS_REUSABLE; \ + } \ + (void) pmap_enter_options(__pmap, \ + (virtual_address), \ + __page->phys_page, \ + (protection), \ + (fault_type), \ + (flags), \ + (wired), \ + __options, \ + NULL); \ MACRO_END #endif /* !PMAP_ENTER */ #ifndef PMAP_ENTER_OPTIONS -#define PMAP_ENTER_OPTIONS(pmap, virtual_address, page, protection, fault_type, \ - flags, wired, options, result) \ +#define PMAP_ENTER_OPTIONS(pmap, virtual_address, page, protection, \ + fault_type, flags, wired, options, result) \ MACRO_BEGIN \ pmap_t __pmap = (pmap); \ vm_page_t __page = (page); \ + int __extra_options = 0; \ \ PMAP_ENTER_CHECK(__pmap, __page) \ + if (__page->object->internal) { \ + __extra_options |= PMAP_OPTIONS_INTERNAL; \ + } \ + if (__page->reusable || __page->object->all_reusable) { \ + __extra_options |= PMAP_OPTIONS_REUSABLE; \ + } \ result = pmap_enter_options(__pmap, \ - (virtual_address), \ - __page->phys_page, \ - (protection), \ - (fault_type), \ - (flags), \ - (wired), \ - options); \ + (virtual_address), \ + __page->phys_page, \ + (protection), \ + (fault_type), \ + (flags), \ + (wired), \ + (options) | __extra_options, \ + NULL); \ MACRO_END #endif /* !PMAP_ENTER_OPTIONS */ @@ -455,6 +480,13 @@ extern kern_return_t (pmap_attribute)( /* Get/Set special memory * physical addresses, simulating them if not provided * by the hardware. */ +struct pfc { + long pfc_cpus; + long pfc_invalid_global; +}; + +typedef struct pfc pmap_flush_context; + /* Clear reference bit */ extern void pmap_clear_reference(ppnum_t pn); /* Return reference bit */ @@ -471,6 +503,11 @@ extern unsigned int pmap_get_refmod(ppnum_t pn); extern void pmap_clear_refmod(ppnum_t pn, unsigned int mask); #define VM_MEM_MODIFIED 0x01 /* Modified bit */ #define VM_MEM_REFERENCED 0x02 /* Referenced bit */ +extern void pmap_clear_refmod_options(ppnum_t pn, unsigned int mask, unsigned int options, void *); + + +extern void pmap_flush_context_init(pmap_flush_context *); +extern void pmap_flush(pmap_flush_context *); /* * Routines that operate on ranges of virtual addresses. @@ -481,6 +518,14 @@ extern void pmap_protect( /* Change protections. */ vm_map_offset_t e, vm_prot_t prot); +extern void pmap_protect_options( /* Change protections. */ + pmap_t map, + vm_map_offset_t s, + vm_map_offset_t e, + vm_prot_t prot, + unsigned int options, + void *arg); + extern void (pmap_pageable)( pmap_t pmap, vm_map_offset_t start, @@ -534,6 +579,13 @@ extern pmap_t kernel_pmap; /* The kernel's map */ #define PMAP_OPTIONS_NOENTER 0x2 /* expand pmap if needed * but don't enter mapping */ +#define PMAP_OPTIONS_COMPRESSOR 0x4 /* credit the compressor for + * this operation */ +#define PMAP_OPTIONS_INTERNAL 0x8 /* page from internal object */ +#define PMAP_OPTIONS_REUSABLE 0x10 /* page is "reusable" */ +#define PMAP_OPTIONS_NOFLUSH 0x20 /* delay flushing of pmap */ +#define PMAP_OPTIONS_NOREFMOD 0x40 /* don't need ref/mod on disconnect */ +#define PMAP_OPTIONS_REMOVE 0x100 /* removing a mapping */ #if !defined(__LP64__) extern vm_offset_t pmap_extract(pmap_t pmap, @@ -550,6 +602,18 @@ extern void pmap_remove( /* Remove mappings. */ vm_map_offset_t s, vm_map_offset_t e); +extern void pmap_remove_options( /* Remove mappings. */ + pmap_t map, + vm_map_offset_t s, + vm_map_offset_t e, + int options); + +extern void pmap_reusable( + pmap_t map, + vm_map_offset_t s, + vm_map_offset_t e, + boolean_t reusable); + extern void fillPage(ppnum_t pa, unsigned int fill); extern void pmap_map_sharedpage(task_t task, pmap_t pmap); @@ -559,6 +623,10 @@ extern void pmap_unmap_sharedpage(pmap_t pmap); void pmap_pre_expand(pmap_t pmap, vm_map_offset_t vaddr); #endif +unsigned int pmap_query_resident(pmap_t pmap, + vm_map_offset_t s, + vm_map_offset_t e); + #endif /* KERNEL_PRIVATE */ #endif /* _VM_PMAP_H_ */ diff --git a/osfmk/vm/vm32_user.c b/osfmk/vm/vm32_user.c index f37fd3659..607e8d4b5 100644 --- a/osfmk/vm/vm32_user.c +++ b/osfmk/vm/vm32_user.c @@ -484,7 +484,7 @@ vm32_purgable_control( return KERN_INVALID_ARGUMENT; return vm_map_purgable_control(map, - vm_map_trunc_page(address), + vm_map_trunc_page(address, PAGE_MASK), control, state); } @@ -499,9 +499,11 @@ vm32_map_page_query( if (VM_MAP_NULL == map) return KERN_INVALID_ARGUMENT; - return vm_map_page_query_internal(map, - vm_map_trunc_page(offset), - disposition, ref_count); + return vm_map_page_query_internal( + map, + vm_map_trunc_page(offset, PAGE_MASK), + disposition, + ref_count); } kern_return_t diff --git a/osfmk/vm/vm_apple_protect.c b/osfmk/vm/vm_apple_protect.c index a98fdbb3a..c6da62e27 100644 --- a/osfmk/vm/vm_apple_protect.c +++ b/osfmk/vm/vm_apple_protect.c @@ -447,10 +447,12 @@ apple_protect_pager_data_request( vm_object_paging_begin(src_object); error_code = 0; prot = VM_PROT_READ; + src_page = VM_PAGE_NULL; kr = vm_fault_page(src_object, offset + cur_offset, VM_PROT_READ, FALSE, + FALSE, /* src_page not looked up */ &prot, &src_page, &top_page, diff --git a/osfmk/vm/vm_compressor.c b/osfmk/vm/vm_compressor.c new file mode 100644 index 000000000..ca9ff6c6d --- /dev/null +++ b/osfmk/vm/vm_compressor.c @@ -0,0 +1,2602 @@ +/* + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include /* for host_info() */ +#include + +#include +#include + +#include + +/* + * vm_compressor_mode has a heirarchy of control to set its value. + * boot-args are checked first, then device-tree, and finally + * the default value that is defined below. See vm_fault_init() for + * the boot-arg & device-tree code. + */ + +extern ipc_port_t min_pages_trigger_port; +extern lck_mtx_t paging_segments_lock; +#define PSL_LOCK() lck_mtx_lock(&paging_segments_lock) +#define PSL_UNLOCK() lck_mtx_unlock(&paging_segments_lock) + + +int vm_compressor_mode = VM_PAGER_COMPRESSOR_WITH_SWAP; +int vm_scale = 16; + + +int vm_compression_limit = 0; + +extern boolean_t vm_swap_up; +extern void vm_pageout_io_throttle(void); + +#if CHECKSUM_THE_DATA || CHECKSUM_THE_SWAP || CHECKSUM_THE_COMPRESSED_DATA +extern unsigned int hash_string(char *cp, int len); +#endif + +struct c_slot { + uint64_t c_offset:C_SEG_OFFSET_BITS, + c_size:12, + c_packed_ptr:36; +#if CHECKSUM_THE_DATA + unsigned int c_hash_data; +#endif +#if CHECKSUM_THE_COMPRESSED_DATA + unsigned int c_hash_compressed_data; +#endif + +}; + +#define UNPACK_C_SIZE(cs) ((cs->c_size == (PAGE_SIZE-1)) ? 4096 : cs->c_size) +#define PACK_C_SIZE(cs, size) (cs->c_size = ((size == PAGE_SIZE) ? PAGE_SIZE - 1 : size)) + + +struct c_slot_mapping { + uint32_t s_cseg:22, /* segment number + 1 */ + s_cindx:10; /* index in the segment */ +}; + +typedef struct c_slot_mapping *c_slot_mapping_t; + + +union c_segu { + c_segment_t c_seg; + uint32_t c_segno; +}; + + + +#define C_SLOT_PACK_PTR(ptr) (((uintptr_t)ptr - (uintptr_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS) >> 2) +#define C_SLOT_UNPACK_PTR(cslot) ((uintptr_t)(cslot->c_packed_ptr << 2) + (uintptr_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS) + + +uint32_t c_segment_count = 0; + +uint64_t c_generation_id = 0; +uint64_t c_generation_id_flush_barrier; + + +#define HIBERNATE_FLUSHING_SECS_TO_COMPLETE 120 + +boolean_t hibernate_no_swapspace = FALSE; +clock_sec_t hibernate_flushing_deadline = 0; + + +#if TRACK_BAD_C_SEGMENTS +queue_head_t c_bad_list_head; +uint32_t c_bad_count = 0; +#endif + +queue_head_t c_age_list_head; +queue_head_t c_swapout_list_head; +queue_head_t c_swappedin_list_head; +queue_head_t c_swappedout_list_head; +queue_head_t c_swappedout_sparse_list_head; + +uint32_t c_age_count = 0; +uint32_t c_swapout_count = 0; +uint32_t c_swappedin_count = 0; +uint32_t c_swappedout_count = 0; +uint32_t c_swappedout_sparse_count = 0; + +queue_head_t c_minor_list_head; +uint32_t c_minor_count = 0; + +union c_segu *c_segments; +caddr_t c_segments_next_page; +boolean_t c_segments_busy; +uint32_t c_segments_available; +uint32_t c_segments_limit; +uint32_t c_segment_pages_compressed; +uint32_t c_segment_pages_compressed_limit; +uint32_t c_free_segno_head = (uint32_t)-1; + +uint32_t vm_compressor_minorcompact_threshold_divisor = 10; +uint32_t vm_compressor_majorcompact_threshold_divisor = 10; +uint32_t vm_compressor_unthrottle_threshold_divisor = 10; +uint32_t vm_compressor_catchup_threshold_divisor = 10; + +#define C_SEGMENTS_PER_PAGE (PAGE_SIZE / sizeof(union c_segu)) + + +lck_grp_attr_t vm_compressor_lck_grp_attr; +lck_attr_t vm_compressor_lck_attr; +lck_grp_t vm_compressor_lck_grp; + + +#if __i386__ || __x86_64__ +lck_mtx_t *c_list_lock; +#else /* __i386__ || __x86_64__ */ +lck_spin_t *c_list_lock; +#endif /* __i386__ || __x86_64__ */ + +lck_rw_t c_master_lock; +lck_rw_t c_decompressor_lock; + +zone_t compressor_segment_zone; +int c_compressor_swap_trigger = 0; + +uint32_t compressor_cpus; +char *compressor_scratch_bufs; + + +clock_sec_t start_of_sample_period_sec = 0; +clock_nsec_t start_of_sample_period_nsec = 0; +clock_sec_t start_of_eval_period_sec = 0; +clock_nsec_t start_of_eval_period_nsec = 0; +uint32_t sample_period_decompression_count = 0; +uint32_t sample_period_compression_count = 0; +uint32_t last_eval_decompression_count = 0; +uint32_t last_eval_compression_count = 0; + +#define DECOMPRESSION_SAMPLE_MAX_AGE (60 * 30) + +uint32_t swapout_target_age = 0; +uint32_t age_of_decompressions_during_sample_period[DECOMPRESSION_SAMPLE_MAX_AGE]; +uint32_t overage_decompressions_during_sample_period = 0; + +void do_fastwake_warmup(void); +boolean_t fastwake_warmup = FALSE; +boolean_t fastwake_recording_in_progress = FALSE; +clock_sec_t dont_trim_until_ts = 0; + +uint64_t c_segment_warmup_count; +uint64_t first_c_segment_to_warm_generation_id = 0; +uint64_t last_c_segment_to_warm_generation_id = 0; +boolean_t hibernate_flushing = FALSE; + +int64_t c_segment_input_bytes = 0; +int64_t c_segment_compressed_bytes = 0; +int64_t compressor_bytes_used = 0; + +static boolean_t compressor_needs_to_swap(void); +static void vm_compressor_swap_trigger_thread(void); +static void vm_compressor_do_delayed_compactions(boolean_t); +static void vm_compressor_compact_and_swap(boolean_t); +static void vm_compressor_age_swapped_in_segments(boolean_t); +static uint64_t compute_elapsed_msecs(clock_sec_t, clock_nsec_t, clock_sec_t, clock_nsec_t); + +boolean_t vm_compressor_low_on_space(void); + +void compute_swapout_target_age(void); + +boolean_t c_seg_major_compact(c_segment_t, c_segment_t); +boolean_t c_seg_major_compact_ok(c_segment_t, c_segment_t); + +int c_seg_minor_compaction_and_unlock(c_segment_t, boolean_t); +int c_seg_do_minor_compaction_and_unlock(c_segment_t, boolean_t, boolean_t, boolean_t); +void c_seg_try_minor_compaction_and_unlock(c_segment_t c_seg); +void c_seg_need_delayed_compaction(c_segment_t); + +void c_seg_move_to_sparse_list(c_segment_t); +void c_seg_insert_into_q(queue_head_t *, c_segment_t); + +boolean_t c_seg_try_free(c_segment_t); +void c_seg_free(c_segment_t); +void c_seg_free_locked(c_segment_t); + + +uint64_t vm_available_memory(void); + +extern unsigned int dp_pages_free, dp_pages_reserve; + +uint64_t +vm_available_memory(void) +{ + return (((uint64_t)AVAILABLE_NON_COMPRESSED_MEMORY) * PAGE_SIZE_64); +} + + +boolean_t +vm_compression_available(void) +{ + if ( !(COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)) + return (FALSE); + + if (c_segments_available >= c_segments_limit || c_segment_pages_compressed >= c_segment_pages_compressed_limit) + return (FALSE); + + return (TRUE); +} + + +boolean_t +vm_compressor_low_on_space(void) +{ + if ((c_segment_pages_compressed > (c_segment_pages_compressed_limit - 20000)) || + (c_segment_count > (c_segments_limit - 250))) + return (TRUE); + + return (FALSE); +} + + +int +vm_low_on_space(void) +{ + if (vm_compressor_mode == COMPRESSED_PAGER_IS_ACTIVE || vm_compressor_mode == DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + if (vm_compressor_low_on_space() || HARD_THROTTLE_LIMIT_REACHED()) + return (1); + } else { + if (((dp_pages_free + dp_pages_reserve < 2000) && VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))) + return (1); + } + return (0); +} + + +void +vm_compressor_init_locks(void) +{ + lck_grp_attr_setdefault(&vm_compressor_lck_grp_attr); + lck_grp_init(&vm_compressor_lck_grp, "vm_compressor", &vm_compressor_lck_grp_attr); + lck_attr_setdefault(&vm_compressor_lck_attr); + + lck_rw_init(&c_master_lock, &vm_compressor_lck_grp, &vm_compressor_lck_attr); + lck_rw_init(&c_decompressor_lock, &vm_compressor_lck_grp, &vm_compressor_lck_attr); +} + + +void +vm_decompressor_lock(void) +{ + lck_rw_lock_exclusive(&c_decompressor_lock); +} + +void +vm_decompressor_unlock(void) +{ + lck_rw_done(&c_decompressor_lock); + +} + + + +void +vm_compressor_init(void) +{ + thread_t thread; + + assert((C_SEGMENTS_PER_PAGE * sizeof(union c_segu)) == PAGE_SIZE); + + PE_parse_boot_argn("vm_compression_limit", &vm_compression_limit, sizeof (vm_compression_limit)); + + if (max_mem <= (3ULL * 1024ULL * 1024ULL * 1024ULL)) { + vm_compressor_minorcompact_threshold_divisor = 11; + vm_compressor_majorcompact_threshold_divisor = 13; + vm_compressor_unthrottle_threshold_divisor = 20; + vm_compressor_catchup_threshold_divisor = 35; + } else { + vm_compressor_minorcompact_threshold_divisor = 20; + vm_compressor_majorcompact_threshold_divisor = 25; + vm_compressor_unthrottle_threshold_divisor = 35; + vm_compressor_catchup_threshold_divisor = 50; + } + /* + * vm_page_init_lck_grp is now responsible for calling vm_compressor_init_locks + * c_master_lock needs to be available early so that "vm_page_find_contiguous" can + * use PAGE_REPLACEMENT_ALLOWED to coordinate with the compressor. + */ + +#if __i386__ || __x86_64__ + c_list_lock = lck_mtx_alloc_init(&vm_compressor_lck_grp, &vm_compressor_lck_attr); +#else /* __i386__ || __x86_64__ */ + c_list_lock = lck_spin_alloc_init(&vm_compressor_lck_grp, &vm_compressor_lck_attr); +#endif /* __i386__ || __x86_64__ */ + +#if TRACK_BAD_C_SEGMENTS + queue_init(&c_bad_list_head); +#endif + queue_init(&c_age_list_head); + queue_init(&c_minor_list_head); + queue_init(&c_swapout_list_head); + queue_init(&c_swappedin_list_head); + queue_init(&c_swappedout_list_head); + queue_init(&c_swappedout_sparse_list_head); + + compressor_segment_zone = zinit(sizeof (struct c_segment), + 128000 * sizeof (struct c_segment), + 8192, "compressor_segment"); + zone_change(compressor_segment_zone, Z_CALLERACCT, FALSE); + zone_change(compressor_segment_zone, Z_NOENCRYPT, TRUE); + + + c_free_segno_head = -1; + c_segments_available = 0; + + if (vm_compression_limit == 0) { + c_segment_pages_compressed_limit = (uint32_t)((max_mem / PAGE_SIZE)) * vm_scale; + +#define OLD_SWAP_LIMIT (1024 * 1024 * 16) +#define MAX_SWAP_LIMIT (1024 * 1024 * 128) + + if (c_segment_pages_compressed_limit > (OLD_SWAP_LIMIT)) + c_segment_pages_compressed_limit = OLD_SWAP_LIMIT; + + if (c_segment_pages_compressed_limit < (uint32_t)(max_mem / PAGE_SIZE_64)) + c_segment_pages_compressed_limit = (uint32_t)(max_mem / PAGE_SIZE_64); + } else { + if (vm_compression_limit < MAX_SWAP_LIMIT) + c_segment_pages_compressed_limit = vm_compression_limit; + else + c_segment_pages_compressed_limit = MAX_SWAP_LIMIT; + } + if ((c_segments_limit = c_segment_pages_compressed_limit / (C_SEG_BUFSIZE / PAGE_SIZE)) > C_SEG_MAX_LIMIT) + c_segments_limit = C_SEG_MAX_LIMIT; + + c_segments_busy = FALSE; + + if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&c_segments), (sizeof(union c_segu) * c_segments_limit), 0, KMA_KOBJECT | KMA_VAONLY) != KERN_SUCCESS) + panic("vm_compressor_init: kernel_memory_allocate failed\n"); + + c_segments_next_page = (caddr_t)c_segments; + + { + host_basic_info_data_t hinfo; + mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT; + +#define BSD_HOST 1 + host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count); + + compressor_cpus = hinfo.max_cpus; + + compressor_scratch_bufs = kalloc(compressor_cpus * WKdm_SCRATCH_BUF_SIZE); + } + + if (kernel_thread_start_priority((thread_continue_t)vm_compressor_swap_trigger_thread, NULL, + BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) { + panic("vm_compressor_swap_trigger_thread: create failed"); + } + thread->options |= TH_OPT_VMPRIV; + + thread_deallocate(thread); + + assert(default_pager_init_flag == 0); + + if (vm_pageout_internal_start() != KERN_SUCCESS) { + panic("vm_compressor_init: Failed to start the internal pageout thread.\n"); + } + +#if CONFIG_FREEZE + memorystatus_freeze_enabled = TRUE; +#endif /* CONFIG_FREEZE */ + + default_pager_init_flag = 1; + + vm_page_reactivate_all_throttled(); +} + + +#if VALIDATE_C_SEGMENTS + +static void +c_seg_validate(c_segment_t c_seg, boolean_t must_be_compact) +{ + int c_indx; + int32_t bytes_used; + int32_t bytes_unused; + uint32_t c_rounded_size; + uint32_t c_size; + c_slot_t cs; + + if (c_seg->c_firstemptyslot < c_seg->c_nextslot) { + c_indx = c_seg->c_firstemptyslot; + cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); + + if (cs == NULL) + panic("c_seg_validate: no slot backing c_firstemptyslot"); + + if (cs->c_size) + panic("c_seg_validate: c_firstemptyslot has non-zero size (%d)\n", cs->c_size); + } + bytes_used = 0; + bytes_unused = 0; + + for (c_indx = 0; c_indx < c_seg->c_nextslot; c_indx++) { + + cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); + + c_size = UNPACK_C_SIZE(cs); + + c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; + + bytes_used += c_rounded_size; + +#if CHECKSUM_THE_COMPRESSED_DATA + if (c_size && cs->c_hash_compressed_data != hash_string((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size)) + panic("compressed data doesn't match original"); +#endif + } + + if (bytes_used != c_seg->c_bytes_used) + panic("c_seg_validate: bytes_used mismatch - found %d, segment has %d\n", bytes_used, c_seg->c_bytes_used); + + if (c_seg->c_bytes_used > C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset)) + panic("c_seg_validate: c_bytes_used > c_nextoffset - c_nextoffset = %d, c_bytes_used = %d\n", + (int32_t)C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset), c_seg->c_bytes_used); + + if (must_be_compact) { + if (c_seg->c_bytes_used != C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset)) + panic("c_seg_validate: c_bytes_used doesn't match c_nextoffset - c_nextoffset = %d, c_bytes_used = %d\n", + (int32_t)C_SEG_OFFSET_TO_BYTES((int32_t)c_seg->c_nextoffset), c_seg->c_bytes_used); + } +} + +#endif + + +void +c_seg_need_delayed_compaction(c_segment_t c_seg) +{ + boolean_t clear_busy = FALSE; + + if ( !lck_mtx_try_lock_spin_always(c_list_lock)) { + c_seg->c_busy = 1; + + lck_mtx_unlock_always(&c_seg->c_lock); + lck_mtx_lock_spin_always(c_list_lock); + lck_mtx_lock_spin_always(&c_seg->c_lock); + + clear_busy = TRUE; + } + if (!c_seg->c_on_minorcompact_q && !c_seg->c_ondisk && !c_seg->c_on_swapout_q) { + queue_enter(&c_minor_list_head, c_seg, c_segment_t, c_list); + c_seg->c_on_minorcompact_q = 1; + c_minor_count++; + } + lck_mtx_unlock_always(c_list_lock); + + if (clear_busy == TRUE) + C_SEG_WAKEUP_DONE(c_seg); +} + + +unsigned int c_seg_moved_to_sparse_list = 0; + +void +c_seg_move_to_sparse_list(c_segment_t c_seg) +{ + boolean_t clear_busy = FALSE; + + if ( !lck_mtx_try_lock_spin_always(c_list_lock)) { + c_seg->c_busy = 1; + + lck_mtx_unlock_always(&c_seg->c_lock); + lck_mtx_lock_spin_always(c_list_lock); + lck_mtx_lock_spin_always(&c_seg->c_lock); + + clear_busy = TRUE; + } + assert(c_seg->c_ondisk); + assert(c_seg->c_on_swappedout_q); + assert(!c_seg->c_on_swappedout_sparse_q); + + queue_remove(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list); + c_seg->c_on_swappedout_q = 0; + c_swappedout_count--; + + c_seg_insert_into_q(&c_swappedout_sparse_list_head, c_seg); + c_seg->c_on_swappedout_sparse_q = 1; + c_swappedout_sparse_count++; + + c_seg_moved_to_sparse_list++; + + lck_mtx_unlock_always(c_list_lock); + + if (clear_busy == TRUE) + C_SEG_WAKEUP_DONE(c_seg); +} + + +void +c_seg_insert_into_q(queue_head_t *qhead, c_segment_t c_seg) +{ + c_segment_t c_seg_next; + + if (queue_empty(qhead)) { + queue_enter(qhead, c_seg, c_segment_t, c_age_list); + } else { + c_seg_next = (c_segment_t)queue_first(qhead); + + while (TRUE) { + + if (c_seg->c_generation_id < c_seg_next->c_generation_id) { + queue_insert_before(qhead, c_seg, c_seg_next, c_segment_t, c_age_list); + break; + } + c_seg_next = (c_segment_t) queue_next(&c_seg_next->c_age_list); + + if (queue_end(qhead, (queue_entry_t) c_seg_next)) { + queue_enter(qhead, c_seg, c_segment_t, c_age_list); + break; + } + } + } +} + + +int try_minor_compaction_failed = 0; +int try_minor_compaction_succeeded = 0; + +void +c_seg_try_minor_compaction_and_unlock(c_segment_t c_seg) +{ + + assert(c_seg->c_on_minorcompact_q); + /* + * c_seg is currently on the delayed minor compaction + * queue and we have c_seg locked... if we can get the + * c_list_lock w/o blocking (if we blocked we could deadlock + * because the lock order is c_list_lock then c_seg's lock) + * we'll pull it from the delayed list and free it directly + */ + if ( !lck_mtx_try_lock_spin_always(c_list_lock)) { + /* + * c_list_lock is held, we need to bail + */ + try_minor_compaction_failed++; + + lck_mtx_unlock_always(&c_seg->c_lock); + } else { + try_minor_compaction_succeeded++; + + c_seg->c_busy = 1; + c_seg_do_minor_compaction_and_unlock(c_seg, TRUE, FALSE, FALSE); + } +} + + +int +c_seg_do_minor_compaction_and_unlock(c_segment_t c_seg, boolean_t clear_busy, boolean_t need_list_lock, boolean_t disallow_page_replacement) +{ + int c_seg_freed; + + assert(c_seg->c_busy); + + if (!c_seg->c_on_minorcompact_q) { + if (clear_busy == TRUE) + C_SEG_WAKEUP_DONE(c_seg); + + lck_mtx_unlock_always(&c_seg->c_lock); + + return (0); + } + queue_remove(&c_minor_list_head, c_seg, c_segment_t, c_list); + c_seg->c_on_minorcompact_q = 0; + c_minor_count--; + + lck_mtx_unlock_always(c_list_lock); + + if (disallow_page_replacement == TRUE) { + lck_mtx_unlock_always(&c_seg->c_lock); + + PAGE_REPLACEMENT_DISALLOWED(TRUE); + + lck_mtx_lock_spin_always(&c_seg->c_lock); + } + c_seg_freed = c_seg_minor_compaction_and_unlock(c_seg, clear_busy); + + if (disallow_page_replacement == TRUE) + PAGE_REPLACEMENT_DISALLOWED(FALSE); + + if (need_list_lock == TRUE) + lck_mtx_lock_spin_always(c_list_lock); + + return (c_seg_freed); +} + + +void +c_seg_wait_on_busy(c_segment_t c_seg) +{ + c_seg->c_wanted = 1; + assert_wait((event_t) (c_seg), THREAD_UNINT); + + lck_mtx_unlock_always(&c_seg->c_lock); + thread_block(THREAD_CONTINUE_NULL); +} + + + +int try_free_succeeded = 0; +int try_free_failed = 0; + +boolean_t +c_seg_try_free(c_segment_t c_seg) +{ + /* + * c_seg is currently on the delayed minor compaction + * or the spapped out sparse queue and we have c_seg locked... + * if we can get the c_list_lock w/o blocking (if we blocked we + * could deadlock because the lock order is c_list_lock then c_seg's lock) + * we'll pull it from the appropriate queue and free it + */ + if ( !lck_mtx_try_lock_spin_always(c_list_lock)) { + /* + * c_list_lock is held, we need to bail + */ + try_free_failed++; + return (FALSE); + } + if (c_seg->c_on_minorcompact_q) { + queue_remove(&c_minor_list_head, c_seg, c_segment_t, c_list); + c_seg->c_on_minorcompact_q = 0; + c_minor_count--; + } else { + assert(c_seg->c_on_swappedout_sparse_q); + + /* + * c_seg_free_locked will remove it from the swappedout sparse list + */ + } + if (!c_seg->c_busy_swapping) + c_seg->c_busy = 1; + + c_seg_free_locked(c_seg); + + try_free_succeeded++; + + return (TRUE); +} + + +void +c_seg_free(c_segment_t c_seg) +{ + if (!c_seg->c_busy_swapping) + c_seg->c_busy = 1; + + lck_mtx_unlock_always(&c_seg->c_lock); + lck_mtx_lock_spin_always(c_list_lock); + lck_mtx_lock_spin_always(&c_seg->c_lock); + + c_seg_free_locked(c_seg); +} + + +void +c_seg_free_locked(c_segment_t c_seg) +{ + int segno, i; + int pages_populated; + int32_t *c_buffer = NULL; + uint64_t c_swap_handle; + + assert(!c_seg->c_on_minorcompact_q); + + if (c_seg->c_on_age_q) { + queue_remove(&c_age_list_head, c_seg, c_segment_t, c_age_list); + c_seg->c_on_age_q = 0; + c_age_count--; + } else if (c_seg->c_on_swappedin_q) { + queue_remove(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list); + c_seg->c_on_swappedin_q = 0; + c_swappedin_count--; + } else if (c_seg->c_on_swapout_q) { + queue_remove(&c_swapout_list_head, c_seg, c_segment_t, c_age_list); + c_seg->c_on_swapout_q = 0; + c_swapout_count--; + thread_wakeup((event_t)&compaction_swapper_running); + } else if (c_seg->c_on_swappedout_q) { + queue_remove(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list); + c_seg->c_on_swappedout_q = 0; + c_swappedout_count--; + } else if (c_seg->c_on_swappedout_sparse_q) { + queue_remove(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list); + c_seg->c_on_swappedout_sparse_q = 0; + c_swappedout_sparse_count--; + } +#if TRACK_BAD_C_SEGMENTS + else if (c_seg->c_on_bad_q) { + queue_remove(&c_bad_list_head, c_seg, c_segment_t, c_age_list); + c_seg->c_on_bad_q = 0; + c_bad_count--; + } +#endif + segno = c_seg->c_mysegno; + c_segments[segno].c_segno = c_free_segno_head; + c_free_segno_head = segno; + c_segment_count--; + + lck_mtx_unlock_always(c_list_lock); + + if (c_seg->c_wanted) { + thread_wakeup((event_t) (c_seg)); + c_seg->c_wanted = 0; + } + if (c_seg->c_busy_swapping) { + c_seg->c_must_free = 1; + + lck_mtx_unlock_always(&c_seg->c_lock); + return; + } + if (c_seg->c_ondisk == 0) { + pages_populated = (round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE; + + c_buffer = c_seg->c_store.c_buffer; + c_seg->c_store.c_buffer = NULL; + } else { + /* + * Free swap space on disk. + */ + c_swap_handle = c_seg->c_store.c_swap_handle; + c_seg->c_store.c_swap_handle = (uint64_t)-1; + } + lck_mtx_unlock_always(&c_seg->c_lock); + + if (c_buffer) { + kernel_memory_depopulate(kernel_map, (vm_offset_t) c_buffer, pages_populated * PAGE_SIZE, KMA_COMPRESSOR); + + kmem_free(kernel_map, (vm_offset_t) c_buffer, C_SEG_ALLOCSIZE); + } else if (c_swap_handle) + vm_swap_free(c_swap_handle); + + +#if __i386__ || __x86_64__ + lck_mtx_destroy(&c_seg->c_lock, &vm_compressor_lck_grp); +#else /* __i386__ || __x86_64__ */ + lck_spin_destroy(&c_seg->c_lock, &vm_compressor_lck_grp); +#endif /* __i386__ || __x86_64__ */ + + for (i = 0; i < C_SEG_SLOT_ARRAYS; i++) { + if (c_seg->c_slots[i] == 0) + break; + + kfree((char *)c_seg->c_slots[i], sizeof(struct c_slot) * C_SEG_SLOT_ARRAY_SIZE); + } + zfree(compressor_segment_zone, c_seg); +} + + +int c_seg_trim_page_count = 0; + +void +c_seg_trim_tail(c_segment_t c_seg) +{ + c_slot_t cs; + uint32_t c_size; + uint32_t c_offset; + uint32_t c_rounded_size; + uint16_t current_nextslot; + uint32_t current_populated_offset; + + if (c_seg->c_bytes_used == 0) + return; + current_nextslot = c_seg->c_nextslot; + current_populated_offset = c_seg->c_populated_offset; + + while (c_seg->c_nextslot) { + + cs = C_SEG_SLOT_FROM_INDEX(c_seg, (c_seg->c_nextslot - 1)); + + c_size = UNPACK_C_SIZE(cs); + + if (c_size) { + if (current_nextslot != c_seg->c_nextslot) { + c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; + c_offset = cs->c_offset + C_SEG_BYTES_TO_OFFSET(c_rounded_size); + + c_seg->c_nextoffset = c_offset; + c_seg->c_populated_offset = (c_offset + (C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1)) & ~(C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1); + + if (c_seg->c_firstemptyslot > c_seg->c_nextslot) + c_seg->c_firstemptyslot = c_seg->c_nextslot; + + c_seg_trim_page_count += ((round_page_32(C_SEG_OFFSET_TO_BYTES(current_populated_offset)) - + round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset))) / PAGE_SIZE); + } + break; + } + c_seg->c_nextslot--; + } + assert(c_seg->c_nextslot); +} + + +int +c_seg_minor_compaction_and_unlock(c_segment_t c_seg, boolean_t clear_busy) +{ + c_slot_mapping_t slot_ptr; + uint32_t c_offset = 0; + uint32_t old_populated_offset; + uint32_t c_rounded_size; + uint32_t c_size; + int c_indx = 0; + int i; + c_slot_t c_dst; + c_slot_t c_src; + boolean_t need_unlock = TRUE; + + assert(c_seg->c_busy); + +#if VALIDATE_C_SEGMENTS + c_seg_validate(c_seg, FALSE); +#endif + if (c_seg->c_bytes_used == 0) { + c_seg_free(c_seg); + return (1); + } + if (c_seg->c_firstemptyslot >= c_seg->c_nextslot || C_SEG_UNUSED_BYTES(c_seg) < PAGE_SIZE) + goto done; + +#if VALIDATE_C_SEGMENTS + c_seg->c_was_minor_compacted++; +#endif + c_indx = c_seg->c_firstemptyslot; + c_dst = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); + + old_populated_offset = c_seg->c_populated_offset; + c_offset = c_dst->c_offset; + + for (i = c_indx + 1; i < c_seg->c_nextslot && c_offset < c_seg->c_nextoffset; i++) { + + c_src = C_SEG_SLOT_FROM_INDEX(c_seg, i); + + c_size = UNPACK_C_SIZE(c_src); + + if (c_size == 0) + continue; + + memcpy(&c_seg->c_store.c_buffer[c_offset], &c_seg->c_store.c_buffer[c_src->c_offset], c_size); + +#if CHECKSUM_THE_DATA + c_dst->c_hash_data = c_src->c_hash_data; +#endif +#if CHECKSUM_THE_COMPRESSED_DATA + c_dst->c_hash_compressed_data = c_src->c_hash_compressed_data; +#endif + c_dst->c_size = c_src->c_size; + c_dst->c_packed_ptr = c_src->c_packed_ptr; + c_dst->c_offset = c_offset; + + slot_ptr = (c_slot_mapping_t)C_SLOT_UNPACK_PTR(c_dst); + slot_ptr->s_cindx = c_indx; + + c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; + + c_offset += C_SEG_BYTES_TO_OFFSET(c_rounded_size); + PACK_C_SIZE(c_src, 0); + c_indx++; + + c_dst = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); + } + c_seg->c_firstemptyslot = c_indx; + c_seg->c_nextslot = c_indx; + c_seg->c_nextoffset = c_offset; + c_seg->c_populated_offset = (c_offset + (C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1)) & ~(C_SEG_BYTES_TO_OFFSET(PAGE_SIZE) - 1); + c_seg->c_bytes_unused = 0; + +#if VALIDATE_C_SEGMENTS + c_seg_validate(c_seg, TRUE); +#endif + + if (old_populated_offset > c_seg->c_populated_offset) { + uint32_t gc_size; + int32_t *gc_ptr; + + gc_size = C_SEG_OFFSET_TO_BYTES(old_populated_offset - c_seg->c_populated_offset); + gc_ptr = &c_seg->c_store.c_buffer[c_seg->c_populated_offset]; + + lck_mtx_unlock_always(&c_seg->c_lock); + + kernel_memory_depopulate(kernel_map, (vm_offset_t)gc_ptr, gc_size, KMA_COMPRESSOR); + + if (clear_busy == TRUE) + lck_mtx_lock_spin_always(&c_seg->c_lock); + else + need_unlock = FALSE; + } +done: + if (need_unlock == TRUE) { + if (clear_busy == TRUE) + C_SEG_WAKEUP_DONE(c_seg); + + lck_mtx_unlock_always(&c_seg->c_lock); + } + return (0); +} + + + +struct { + uint64_t asked_permission; + uint64_t compactions; + uint64_t moved_slots; + uint64_t moved_bytes; + uint64_t wasted_space_in_swapouts; + uint64_t count_of_swapouts; +} c_seg_major_compact_stats; + + +#define C_MAJOR_COMPACTION_AGE_APPROPRIATE 30 +#define C_MAJOR_COMPACTION_OLD_ENOUGH 300 +#define C_MAJOR_COMPACTION_SIZE_APPROPRIATE ((C_SEG_BUFSIZE * 80) / 100) + + +boolean_t +c_seg_major_compact_ok( + c_segment_t c_seg_dst, + c_segment_t c_seg_src) +{ + + c_seg_major_compact_stats.asked_permission++; + + if (c_seg_src->c_filling) { + /* + * we're at or near the head... don't compact + */ + return (FALSE); + } + if (c_seg_src->c_bytes_used >= C_MAJOR_COMPACTION_SIZE_APPROPRIATE && + c_seg_dst->c_bytes_used >= C_MAJOR_COMPACTION_SIZE_APPROPRIATE) + return (FALSE); + + if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX) { + /* + * destination segment is full... can't compact + */ + return (FALSE); + } + + return (TRUE); +} + + +boolean_t +c_seg_major_compact( + c_segment_t c_seg_dst, + c_segment_t c_seg_src) +{ + c_slot_mapping_t slot_ptr; + uint32_t c_rounded_size; + uint32_t c_size; + uint16_t dst_slot; + int i; + c_slot_t c_dst; + c_slot_t c_src; + int slotarray; + boolean_t keep_compacting = TRUE; + + /* + * segments are not locked but they are both marked c_busy + * which keeps c_decompress from working on them... + * we can safely allocate new pages, move compressed data + * from c_seg_src to c_seg_dst and update both c_segment's + * state w/o holding the master lock + */ + +#if VALIDATE_C_SEGMENTS + c_seg_dst->c_was_major_compacted++; + c_seg_src->c_was_major_donor++; +#endif + c_seg_major_compact_stats.compactions++; + + dst_slot = c_seg_dst->c_nextslot; + + for (i = 0; i < c_seg_src->c_nextslot; i++) { + + c_src = C_SEG_SLOT_FROM_INDEX(c_seg_src, i); + + c_size = UNPACK_C_SIZE(c_src); + + if (c_size == 0) { + /* BATCH: move what we have so far; */ + continue; + } + + if (C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset - c_seg_dst->c_nextoffset) < (unsigned) c_size) { + /* doesn't fit */ + if ((C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset) == C_SEG_BUFSIZE)) { + /* can't fit */ + keep_compacting = FALSE; + break; + } + kernel_memory_populate(kernel_map, + (vm_offset_t) &c_seg_dst->c_store.c_buffer[c_seg_dst->c_populated_offset], + PAGE_SIZE, + KMA_COMPRESSOR); + + c_seg_dst->c_populated_offset += C_SEG_BYTES_TO_OFFSET(PAGE_SIZE); + assert(C_SEG_OFFSET_TO_BYTES(c_seg_dst->c_populated_offset) <= C_SEG_BUFSIZE); + } + + slotarray = C_SEG_SLOTARRAY_FROM_INDEX(c_seg_dst, c_seg_dst->c_nextslot); + + if (c_seg_dst->c_slots[slotarray] == 0) { + KERNEL_DEBUG(0xe0400008 | DBG_FUNC_START, 0, 0, 0, 0, 0); + c_seg_dst->c_slots[slotarray] = (struct c_slot *) + kalloc(sizeof(struct c_slot) * + C_SEG_SLOT_ARRAY_SIZE); + KERNEL_DEBUG(0xe0400008 | DBG_FUNC_END, 0, 0, 0, 0, 0); + } + c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, c_seg_dst->c_nextslot); + + memcpy(&c_seg_dst->c_store.c_buffer[c_seg_dst->c_nextoffset], &c_seg_src->c_store.c_buffer[c_src->c_offset], c_size); + + c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; + + c_seg_major_compact_stats.moved_slots++; + c_seg_major_compact_stats.moved_bytes += c_size; + +#if CHECKSUM_THE_DATA + c_dst->c_hash_data = c_src->c_hash_data; +#endif +#if CHECKSUM_THE_COMPRESSED_DATA + c_dst->c_hash_compressed_data = c_src->c_hash_compressed_data; +#endif + c_dst->c_size = c_src->c_size; + c_dst->c_packed_ptr = c_src->c_packed_ptr; + c_dst->c_offset = c_seg_dst->c_nextoffset; + + if (c_seg_dst->c_firstemptyslot == c_seg_dst->c_nextslot) + c_seg_dst->c_firstemptyslot++; + c_seg_dst->c_nextslot++; + c_seg_dst->c_bytes_used += c_rounded_size; + c_seg_dst->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size); + + PACK_C_SIZE(c_src, 0); + + c_seg_src->c_bytes_used -= c_rounded_size; + c_seg_src->c_bytes_unused += c_rounded_size; + c_seg_src->c_firstemptyslot = 0; + + if (c_seg_dst->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg_dst->c_nextslot >= C_SLOT_MAX) { + /* dest segment is now full */ + keep_compacting = FALSE; + break; + } + } + if (dst_slot < c_seg_dst->c_nextslot) { + + PAGE_REPLACEMENT_ALLOWED(TRUE); + /* + * we've now locked out c_decompress from + * converting the slot passed into it into + * a c_segment_t which allows us to use + * the backptr to change which c_segment and + * index the slot points to + */ + while (dst_slot < c_seg_dst->c_nextslot) { + + c_dst = C_SEG_SLOT_FROM_INDEX(c_seg_dst, dst_slot); + + slot_ptr = (c_slot_mapping_t)C_SLOT_UNPACK_PTR(c_dst); + /* would mean "empty slot", so use csegno+1 */ + slot_ptr->s_cseg = c_seg_dst->c_mysegno + 1; + slot_ptr->s_cindx = dst_slot++; + } + PAGE_REPLACEMENT_ALLOWED(FALSE); + } + return (keep_compacting); +} + + +static uint64_t +compute_elapsed_msecs(clock_sec_t end_sec, clock_nsec_t end_nsec, clock_sec_t start_sec, clock_nsec_t start_nsec) +{ + uint64_t end_msecs; + uint64_t start_msecs; + + end_msecs = (end_sec * 1000) + end_nsec / 1000000; + start_msecs = (start_sec * 1000) + start_nsec / 1000000; + + return (end_msecs - start_msecs); +} + + + +uint32_t compressor_eval_period_in_msecs = 250; +uint32_t compressor_sample_min_in_msecs = 500; +uint32_t compressor_sample_max_in_msecs = 10000; +uint32_t compressor_thrashing_threshold_per_10msecs = 50; +uint32_t compressor_thrashing_min_per_10msecs = 20; + +extern uint32_t vm_page_filecache_min; + + +void +compute_swapout_target_age(void) +{ + clock_sec_t cur_ts_sec; + clock_nsec_t cur_ts_nsec; + uint32_t min_operations_needed_in_this_sample; + uint64_t elapsed_msecs_in_eval; + uint64_t elapsed_msecs_in_sample; + boolean_t need_sample_reset = FALSE; + boolean_t need_eval_reset = FALSE; + + clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec); + + elapsed_msecs_in_sample = compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, start_of_sample_period_sec, start_of_sample_period_nsec); + + if (elapsed_msecs_in_sample >= compressor_sample_max_in_msecs) { + need_sample_reset = TRUE; + need_eval_reset = TRUE; + goto done; + } + elapsed_msecs_in_eval = compute_elapsed_msecs(cur_ts_sec, cur_ts_nsec, start_of_eval_period_sec, start_of_eval_period_nsec); + + if (elapsed_msecs_in_eval < compressor_eval_period_in_msecs) + goto done; + need_eval_reset = TRUE; + + KERNEL_DEBUG(0xe0400020 | DBG_FUNC_START, elapsed_msecs_in_eval, sample_period_compression_count, sample_period_decompression_count, 0, 0); + + min_operations_needed_in_this_sample = (compressor_thrashing_min_per_10msecs * (uint32_t)elapsed_msecs_in_eval) / 10; + + if ((sample_period_compression_count - last_eval_compression_count) < min_operations_needed_in_this_sample || + (sample_period_decompression_count - last_eval_decompression_count) < min_operations_needed_in_this_sample) { + + KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, sample_period_compression_count - last_eval_compression_count, + sample_period_decompression_count - last_eval_decompression_count, 0, 1, 0); + + swapout_target_age = 0; + + need_sample_reset = TRUE; + need_eval_reset = TRUE; + goto done; + } + last_eval_compression_count = sample_period_compression_count; + last_eval_decompression_count = sample_period_decompression_count; + + if (elapsed_msecs_in_sample < compressor_sample_min_in_msecs) { + + KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, swapout_target_age, 0, 0, 5, 0); + goto done; + } + if (sample_period_decompression_count > ((compressor_thrashing_threshold_per_10msecs * elapsed_msecs_in_sample) / 10)) { + + uint64_t running_total; + uint64_t working_target; + uint64_t aging_target; + uint32_t oldest_age_of_csegs_sampled = 0; + uint64_t working_set_approximation = 0; + + swapout_target_age = 0; + + working_target = (sample_period_decompression_count / 100) * 95; /* 95 percent */ + aging_target = (sample_period_decompression_count / 100) * 1; /* 1 percent */ + running_total = 0; + + for (oldest_age_of_csegs_sampled = 0; oldest_age_of_csegs_sampled < DECOMPRESSION_SAMPLE_MAX_AGE; oldest_age_of_csegs_sampled++) { + + running_total += age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled]; + + working_set_approximation += oldest_age_of_csegs_sampled * age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled]; + + if (running_total >= working_target) + break; + } + if (oldest_age_of_csegs_sampled < DECOMPRESSION_SAMPLE_MAX_AGE) { + + working_set_approximation = (working_set_approximation * 1000) / elapsed_msecs_in_sample; + + if (working_set_approximation < VM_PAGE_COMPRESSOR_COUNT) { + + running_total = overage_decompressions_during_sample_period; + + for (oldest_age_of_csegs_sampled = DECOMPRESSION_SAMPLE_MAX_AGE - 1; oldest_age_of_csegs_sampled; oldest_age_of_csegs_sampled--) { + running_total += age_of_decompressions_during_sample_period[oldest_age_of_csegs_sampled]; + + if (running_total >= aging_target) + break; + } + swapout_target_age = (uint32_t)cur_ts_sec - oldest_age_of_csegs_sampled; + + KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, swapout_target_age, working_set_approximation, VM_PAGE_COMPRESSOR_COUNT, 2, 0); + } else { + KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, working_set_approximation, VM_PAGE_COMPRESSOR_COUNT, 0, 3, 0); + } + } else + KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, working_target, running_total, 0, 4, 0); + + need_sample_reset = TRUE; + need_eval_reset = TRUE; + } else + KERNEL_DEBUG(0xe0400020 | DBG_FUNC_END, sample_period_decompression_count, (compressor_thrashing_threshold_per_10msecs * elapsed_msecs_in_sample) / 10, 0, 6, 0); +done: + if (need_sample_reset == TRUE) { + bzero(age_of_decompressions_during_sample_period, sizeof(age_of_decompressions_during_sample_period)); + overage_decompressions_during_sample_period = 0; + + start_of_sample_period_sec = cur_ts_sec; + start_of_sample_period_nsec = cur_ts_nsec; + sample_period_decompression_count = 0; + sample_period_compression_count = 0; + last_eval_decompression_count = 0; + last_eval_compression_count = 0; + } + if (need_eval_reset == TRUE) { + start_of_eval_period_sec = cur_ts_sec; + start_of_eval_period_nsec = cur_ts_nsec; + } +} + + + +int calls_since_last_considered = 0; +int compaction_swapper_running = 0; +int compaction_swapper_abort = 0; + + +#if CONFIG_JETSAM +boolean_t memorystatus_kill_on_VM_thrashing(boolean_t); +int compressor_thrashing_induced_jetsam = 0; +boolean_t vm_compressor_thrashing_detected = FALSE; +#endif /* CONFIG_JETSAM */ + +static boolean_t +compressor_needs_to_swap(void) +{ + boolean_t should_swap = FALSE; + + if (vm_swap_up == TRUE) { + if (COMPRESSOR_NEEDS_TO_SWAP()) { + return (TRUE); + } + if (VM_PAGE_Q_THROTTLED(&vm_pageout_queue_external) && vm_page_anonymous_count < (vm_page_inactive_count / 20)) { + return (TRUE); + } + if (vm_page_free_count < (vm_page_free_reserved - COMPRESSOR_FREE_RESERVED_LIMIT)) + return (TRUE); + } + compute_swapout_target_age(); + + if (swapout_target_age) { + c_segment_t c_seg; + + lck_mtx_lock_spin_always(c_list_lock); + + if (!queue_empty(&c_age_list_head)) { + + c_seg = (c_segment_t) queue_first(&c_age_list_head); + + if (c_seg->c_creation_ts <= swapout_target_age) + should_swap = TRUE; + else + swapout_target_age = 0; + } + lck_mtx_unlock_always(c_list_lock); + } + + if (vm_swap_up == FALSE) { +#if CONFIG_JETSAM + if (should_swap) { + if (vm_compressor_thrashing_detected == FALSE) { + vm_compressor_thrashing_detected = TRUE; + memorystatus_kill_on_VM_thrashing(TRUE /* async */); + compressor_thrashing_induced_jetsam++; + /* + * let the jetsam take precedence over + * any major compactions we might have + * been able to do... otherwise we run + * the risk of doing major compactions + * on segments we're about to free up + * due to the jetsam activity. + */ + should_swap = FALSE; + } + } else +#endif /* CONFIG_JETSAM */ + if (COMPRESSOR_NEEDS_TO_MAJOR_COMPACT()) + should_swap = TRUE; + } + /* + * returning TRUE when swap_supported == FALSE + * will cause the major compaction engine to + * run, but will not trigger any swapping... + * segments that have been major compacted + * will be moved to the swapped_out_q + * but will not have the c_ondisk flag set + */ + return (should_swap); +} + +uint64_t +vm_compressor_total_compressions(void) +{ + processor_t processor = processor_list; + vm_statistics64_t stat = &PROCESSOR_DATA(processor, vm_stat); + + uint64_t compressions = stat->compressions; + + if (processor_count > 1) { + simple_lock(&processor_list_lock); + + while ((processor = processor->processor_list) != NULL) { + stat = &PROCESSOR_DATA(processor, vm_stat); + compressions += stat->compressions; + } + + simple_unlock(&processor_list_lock); + } + + return compressions; +} + +uint32_t vm_wake_compactor_swapper_calls = 0; + +void +vm_wake_compactor_swapper(void) +{ + if (compaction_swapper_running) + return; + + if (c_minor_count == 0) + return; + + lck_mtx_lock_spin_always(c_list_lock); + + fastwake_warmup = FALSE; + + if (compaction_swapper_running == 0) { + vm_wake_compactor_swapper_calls++; + + thread_wakeup((event_t)&c_compressor_swap_trigger); + + compaction_swapper_running = 1; + } + lck_mtx_unlock_always(c_list_lock); +} + +void +vm_consider_waking_compactor_swapper(void) +{ + boolean_t need_wakeup = FALSE; + + if (calls_since_last_considered++ < 1000 || compaction_swapper_running) + return; + calls_since_last_considered = 0; + + if (c_minor_count && (COMPRESSOR_NEEDS_TO_MINOR_COMPACT())) { + + need_wakeup = TRUE; + + } else if (compressor_needs_to_swap()) { + + need_wakeup = TRUE; + + } else if (c_minor_count) { + uint64_t total_bytes; + + total_bytes = compressor_object->resident_page_count * PAGE_SIZE_64; + + if ((total_bytes - compressor_bytes_used) > total_bytes / 10) + need_wakeup = TRUE; + } + if (need_wakeup == TRUE) { + + lck_mtx_lock_spin_always(c_list_lock); + + fastwake_warmup = FALSE; + + if (compaction_swapper_running == 0) { + memoryshot(VM_WAKEUP_COMPACTOR_SWAPPER, DBG_FUNC_NONE); + + thread_wakeup((event_t)&c_compressor_swap_trigger); + + compaction_swapper_running = 1; + } + lck_mtx_unlock_always(c_list_lock); + } +} + + +#define C_SWAPOUT_LIMIT 4 +#define DELAYED_COMPACTIONS_PER_PASS 30 + +void +vm_compressor_do_delayed_compactions(boolean_t flush_all) +{ + c_segment_t c_seg; + int number_compacted = 0; + boolean_t needs_to_swap = FALSE; + + + lck_mtx_assert(c_list_lock, LCK_MTX_ASSERT_OWNED); + + while (!queue_empty(&c_minor_list_head) && needs_to_swap == FALSE) { + + c_seg = (c_segment_t)queue_first(&c_minor_list_head); + + lck_mtx_lock_spin_always(&c_seg->c_lock); + c_seg->c_busy = 1; + + c_seg_do_minor_compaction_and_unlock(c_seg, TRUE, FALSE, TRUE); + + if (vm_swap_up == TRUE && (number_compacted++ > DELAYED_COMPACTIONS_PER_PASS)) { + + if ((flush_all == TRUE || compressor_needs_to_swap() == TRUE) && c_swapout_count < C_SWAPOUT_LIMIT) + needs_to_swap = TRUE; + + number_compacted = 0; + } + lck_mtx_lock_spin_always(c_list_lock); + } +} + + +#define C_SEGMENT_SWAPPEDIN_AGE_LIMIT 10 + +static void +vm_compressor_age_swapped_in_segments(boolean_t flush_all) +{ + c_segment_t c_seg; + clock_sec_t now; + clock_nsec_t nsec; + + clock_get_system_nanotime(&now, &nsec); + + while (!queue_empty(&c_swappedin_list_head)) { + + c_seg = (c_segment_t)queue_first(&c_swappedin_list_head); + + if (flush_all == FALSE && (now - c_seg->c_swappedin_ts) < C_SEGMENT_SWAPPEDIN_AGE_LIMIT) + break; + + lck_mtx_lock_spin_always(&c_seg->c_lock); + + queue_remove(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list); + c_seg->c_on_swappedin_q = 0; + c_swappedin_count--; + + c_seg_insert_into_q(&c_age_list_head, c_seg); + c_seg->c_on_age_q = 1; + c_age_count++; + + lck_mtx_unlock_always(&c_seg->c_lock); + } +} + + +void +vm_compressor_flush(void) +{ + uint64_t vm_swap_put_failures_at_start; + wait_result_t wait_result = 0; + AbsoluteTime startTime, endTime; + clock_sec_t now_sec; + clock_nsec_t now_nsec; + uint64_t nsec; + + HIBLOG("vm_compressor_flush - starting\n"); + + clock_get_uptime(&startTime); + + lck_mtx_lock_spin_always(c_list_lock); + + fastwake_warmup = FALSE; + compaction_swapper_abort = 1; + + while (compaction_swapper_running) { + assert_wait((event_t)&compaction_swapper_running, THREAD_UNINT); + + lck_mtx_unlock_always(c_list_lock); + + thread_block(THREAD_CONTINUE_NULL); + + lck_mtx_lock_spin_always(c_list_lock); + } + compaction_swapper_abort = 0; + compaction_swapper_running = 1; + + hibernate_flushing = TRUE; + hibernate_no_swapspace = FALSE; + c_generation_id_flush_barrier = c_generation_id + 1000; + + clock_get_system_nanotime(&now_sec, &now_nsec); + hibernate_flushing_deadline = now_sec + HIBERNATE_FLUSHING_SECS_TO_COMPLETE; + + vm_swap_put_failures_at_start = vm_swap_put_failures; + + vm_compressor_compact_and_swap(TRUE); + + while (!queue_empty(&c_swapout_list_head)) { + + assert_wait_timeout((event_t) &compaction_swapper_running, THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC); + + lck_mtx_unlock_always(c_list_lock); + + wait_result = thread_block(THREAD_CONTINUE_NULL); + + lck_mtx_lock_spin_always(c_list_lock); + + if (wait_result == THREAD_TIMED_OUT) + break; + } + hibernate_flushing = FALSE; + compaction_swapper_running = 0; + + if (vm_swap_put_failures > vm_swap_put_failures_at_start) + HIBLOG("vm_compressor_flush failed to clean %llu segments - vm_page_compressor_count(%d)\n", + vm_swap_put_failures - vm_swap_put_failures_at_start, VM_PAGE_COMPRESSOR_COUNT); + + lck_mtx_unlock_always(c_list_lock); + + clock_get_uptime(&endTime); + SUB_ABSOLUTETIME(&endTime, &startTime); + absolutetime_to_nanoseconds(endTime, &nsec); + + HIBLOG("vm_compressor_flush completed - took %qd msecs\n", nsec / 1000000ULL); +} + + + +int compaction_swap_trigger_thread_awakened = 0; + +static void +vm_compressor_swap_trigger_thread(void) +{ + + lck_mtx_lock_spin_always(c_list_lock); + + compaction_swap_trigger_thread_awakened++; + + vm_compressor_compact_and_swap(FALSE); + + assert_wait((event_t)&c_compressor_swap_trigger, THREAD_UNINT); + + compaction_swapper_running = 0; + thread_wakeup((event_t)&compaction_swapper_running); + + lck_mtx_unlock_always(c_list_lock); + + thread_block((thread_continue_t)vm_compressor_swap_trigger_thread); + + /* NOTREACHED */ +} + + +void +vm_compressor_record_warmup_start(void) +{ + c_segment_t c_seg; + + lck_mtx_lock_spin_always(c_list_lock); + + if (!queue_empty(&c_age_list_head)) { + + c_seg = (c_segment_t)queue_last(&c_age_list_head); + + first_c_segment_to_warm_generation_id = c_seg->c_generation_id; + } else + first_c_segment_to_warm_generation_id = 0; + + fastwake_recording_in_progress = TRUE; + + lck_mtx_unlock_always(c_list_lock); +} + + +void +vm_compressor_record_warmup_end(void) +{ + c_segment_t c_seg; + + lck_mtx_lock_spin_always(c_list_lock); + + if (!queue_empty(&c_age_list_head)) { + + c_seg = (c_segment_t)queue_last(&c_age_list_head); + + last_c_segment_to_warm_generation_id = c_seg->c_generation_id; + } else + last_c_segment_to_warm_generation_id = first_c_segment_to_warm_generation_id; + + fastwake_recording_in_progress = FALSE; + + lck_mtx_unlock_always(c_list_lock); +} + + +#define DELAY_TRIM_ON_WAKE_SECS 4 + +void +vm_compressor_do_warmup(void) +{ + clock_sec_t sec; + clock_nsec_t nsec; + + clock_get_system_nanotime(&sec, &nsec); + dont_trim_until_ts = sec + DELAY_TRIM_ON_WAKE_SECS; + + if (first_c_segment_to_warm_generation_id == last_c_segment_to_warm_generation_id) + return; + + lck_mtx_lock_spin_always(c_list_lock); + + if (compaction_swapper_running == 0) { + + fastwake_warmup = TRUE; + compaction_swapper_running = 1; + thread_wakeup((event_t)&c_compressor_swap_trigger); + } + lck_mtx_unlock_always(c_list_lock); +} + + +void +do_fastwake_warmup(void) +{ + uint64_t my_thread_id; + c_segment_t c_seg = NULL; + + lck_mtx_unlock_always(c_list_lock); + + my_thread_id = current_thread()->thread_id; + proc_set_task_policy_thread(kernel_task, my_thread_id, + TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2); + + PAGE_REPLACEMENT_DISALLOWED(TRUE); + + lck_mtx_lock_spin_always(c_list_lock); + + while (!queue_empty(&c_swappedout_list_head) && fastwake_warmup == TRUE) { + + c_seg = (c_segment_t) queue_first(&c_swappedout_list_head); + + if (c_seg->c_generation_id < first_c_segment_to_warm_generation_id || + c_seg->c_generation_id > last_c_segment_to_warm_generation_id) + break; + + lck_mtx_lock_spin_always(&c_seg->c_lock); + lck_mtx_unlock_always(c_list_lock); + + if (c_seg->c_busy) + c_seg_wait_on_busy(c_seg); + else { + c_seg_swapin(c_seg, TRUE); + + lck_mtx_unlock_always(&c_seg->c_lock); + + c_segment_warmup_count++; + vm_pageout_io_throttle(); + } + lck_mtx_lock_spin_always(c_list_lock); + } + lck_mtx_unlock_always(c_list_lock); + + PAGE_REPLACEMENT_DISALLOWED(FALSE); + + proc_set_task_policy_thread(kernel_task, my_thread_id, + TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER0); + + lck_mtx_lock_spin_always(c_list_lock); +} + + +void +vm_compressor_compact_and_swap(boolean_t flush_all) +{ + c_segment_t c_seg, c_seg_next; + boolean_t keep_compacting; + + + if (fastwake_warmup == TRUE) { + uint64_t starting_warmup_count; + + starting_warmup_count = c_segment_warmup_count; + + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 11) | DBG_FUNC_START, c_segment_warmup_count, + first_c_segment_to_warm_generation_id, last_c_segment_to_warm_generation_id, 0, 0); + do_fastwake_warmup(); + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 11) | DBG_FUNC_END, c_segment_warmup_count, c_segment_warmup_count - starting_warmup_count, 0, 0, 0); + + fastwake_warmup = FALSE; + } + + while (!queue_empty(&c_age_list_head) && compaction_swapper_abort == 0) { + + if (hibernate_flushing == TRUE) { + clock_sec_t sec; + clock_nsec_t nsec; + + if (hibernate_should_abort()) { + HIBLOG("vm_compressor_flush - hibernate_should_abort returned TRUE\n"); + break; + } + if (hibernate_no_swapspace == TRUE) { + HIBLOG("vm_compressor_flush - out of swap space\n"); + break; + } + clock_get_system_nanotime(&sec, &nsec); + + if (sec > hibernate_flushing_deadline) { + HIBLOG("vm_compressor_flush - failed to finish before deadline\n"); + break; + } + } + if (c_swapout_count >= C_SWAPOUT_LIMIT) { + + assert_wait_timeout((event_t) &compaction_swapper_running, THREAD_INTERRUPTIBLE, 100, 1000*NSEC_PER_USEC); + + lck_mtx_unlock_always(c_list_lock); + + thread_block(THREAD_CONTINUE_NULL); + + lck_mtx_lock_spin_always(c_list_lock); + } + /* + * Minor compactions + */ + vm_compressor_do_delayed_compactions(flush_all); + + vm_compressor_age_swapped_in_segments(flush_all); + + if (c_swapout_count >= C_SWAPOUT_LIMIT) { + /* + * we timed out on the above thread_block + * let's loop around and try again + * the timeout allows us to continue + * to do minor compactions to make + * more memory available + */ + continue; + } + + /* + * Swap out segments? + */ + if (flush_all == FALSE) { + boolean_t needs_to_swap; + + lck_mtx_unlock_always(c_list_lock); + + needs_to_swap = compressor_needs_to_swap(); + + lck_mtx_lock_spin_always(c_list_lock); + + if (needs_to_swap == FALSE) + break; + } + if (queue_empty(&c_age_list_head)) + break; + c_seg = (c_segment_t) queue_first(&c_age_list_head); + + if (flush_all == TRUE && c_seg->c_generation_id > c_generation_id_flush_barrier) + break; + + if (c_seg->c_filling) { + /* + * we're at or near the head... no more work to do + */ + break; + } + lck_mtx_lock_spin_always(&c_seg->c_lock); + + if (c_seg->c_busy) { + + lck_mtx_unlock_always(c_list_lock); + c_seg_wait_on_busy(c_seg); + lck_mtx_lock_spin_always(c_list_lock); + + continue; + } + c_seg->c_busy = 1; + + if (c_seg_do_minor_compaction_and_unlock(c_seg, FALSE, TRUE, TRUE)) { + /* + * found an empty c_segment and freed it + * so go grab the next guy in the queue + */ + continue; + } + /* + * Major compaction + */ + keep_compacting = TRUE; + + while (keep_compacting == TRUE) { + + assert(c_seg->c_busy); + + /* look for another segment to consolidate */ + + c_seg_next = (c_segment_t) queue_next(&c_seg->c_age_list); + + if (queue_end(&c_age_list_head, (queue_entry_t)c_seg_next)) + break; + + if (c_seg_major_compact_ok(c_seg, c_seg_next) == FALSE) + break; + + lck_mtx_lock_spin_always(&c_seg_next->c_lock); + + if (c_seg_next->c_busy) { + + lck_mtx_unlock_always(c_list_lock); + c_seg_wait_on_busy(c_seg_next); + lck_mtx_lock_spin_always(c_list_lock); + + continue; + } + /* grab that segment */ + c_seg_next->c_busy = 1; + + if (c_seg_do_minor_compaction_and_unlock(c_seg_next, FALSE, TRUE, TRUE)) { + /* + * found an empty c_segment and freed it + * so we can't continue to use c_seg_next + */ + continue; + } + + /* unlock the list ... */ + lck_mtx_unlock_always(c_list_lock); + + /* do the major compaction */ + + keep_compacting = c_seg_major_compact(c_seg, c_seg_next); + + PAGE_REPLACEMENT_DISALLOWED(TRUE); + + lck_mtx_lock_spin_always(&c_seg_next->c_lock); + /* + * run a minor compaction on the donor segment + * since we pulled at least some of it's + * data into our target... if we've emptied + * it, now is a good time to free it which + * c_seg_minor_compaction_and_unlock also takes care of + * + * by passing TRUE, we ask for c_busy to be cleared + * and c_wanted to be taken care of + */ + c_seg_minor_compaction_and_unlock(c_seg_next, TRUE); + + PAGE_REPLACEMENT_DISALLOWED(FALSE); + + /* relock the list */ + lck_mtx_lock_spin_always(c_list_lock); + + } /* major compaction */ + + c_seg_major_compact_stats.wasted_space_in_swapouts += C_SEG_BUFSIZE - c_seg->c_bytes_used; + c_seg_major_compact_stats.count_of_swapouts++; + + lck_mtx_lock_spin_always(&c_seg->c_lock); + + assert(c_seg->c_busy); + assert(c_seg->c_on_age_q); + assert(!c_seg->c_on_minorcompact_q); + + queue_remove(&c_age_list_head, c_seg, c_segment_t, c_age_list); + c_seg->c_on_age_q = 0; + c_age_count--; + + if (vm_swap_up == TRUE) { + queue_enter(&c_swapout_list_head, c_seg, c_segment_t, c_age_list); + c_seg->c_on_swapout_q = 1; + c_swapout_count++; + } else { + queue_enter(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list); + c_seg->c_on_swappedout_q = 1; + c_swappedout_count++; + } + C_SEG_WAKEUP_DONE(c_seg); + + lck_mtx_unlock_always(&c_seg->c_lock); + + if (c_swapout_count) { + lck_mtx_unlock_always(c_list_lock); + + thread_wakeup((event_t)&c_swapout_list_head); + + lck_mtx_lock_spin_always(c_list_lock); + } + } +} + + +static c_segment_t +c_seg_allocate(c_segment_t *current_chead) +{ + clock_sec_t sec; + clock_nsec_t nsec; + c_segment_t c_seg; + int slotarray; + + if ( (c_seg = *current_chead) == NULL ) { + uint32_t c_segno; + + KERNEL_DEBUG(0xe0400004 | DBG_FUNC_START, 0, 0, 0, 0, 0); + + lck_mtx_lock_spin_always(c_list_lock); + + while (c_segments_busy == TRUE) { + assert_wait((event_t) (&c_segments_busy), THREAD_UNINT); + + lck_mtx_unlock_always(c_list_lock); + + thread_block(THREAD_CONTINUE_NULL); + + lck_mtx_lock_spin_always(c_list_lock); + } + if (c_free_segno_head == (uint32_t)-1) { + + if (c_segments_available >= c_segments_limit || c_segment_pages_compressed >= c_segment_pages_compressed_limit) { + lck_mtx_unlock_always(c_list_lock); + + KERNEL_DEBUG(0xe0400004 | DBG_FUNC_END, 0, 0, 0, 1, 0); + return (NULL); + } + c_segments_busy = TRUE; + lck_mtx_unlock_always(c_list_lock); + + kernel_memory_populate(kernel_map, (vm_offset_t)c_segments_next_page, PAGE_SIZE, KMA_KOBJECT); + c_segments_next_page += PAGE_SIZE; + + for (c_segno = c_segments_available + 1; c_segno < (c_segments_available + C_SEGMENTS_PER_PAGE); c_segno++) + c_segments[c_segno - 1].c_segno = c_segno; + + lck_mtx_lock_spin_always(c_list_lock); + + c_segments[c_segno - 1].c_segno = c_free_segno_head; + c_free_segno_head = c_segments_available; + c_segments_available += C_SEGMENTS_PER_PAGE; + + c_segments_busy = FALSE; + thread_wakeup((event_t) (&c_segments_busy)); + } + c_segno = c_free_segno_head; + c_free_segno_head = c_segments[c_segno].c_segno; + + lck_mtx_unlock_always(c_list_lock); + + c_seg = (c_segment_t)zalloc(compressor_segment_zone); + bzero((char *)c_seg, sizeof(struct c_segment)); + + if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&c_seg->c_store.c_buffer), C_SEG_ALLOCSIZE, 0, KMA_COMPRESSOR | KMA_VAONLY) != KERN_SUCCESS) { + zfree(compressor_segment_zone, c_seg); + + lck_mtx_lock_spin_always(c_list_lock); + + c_segments[c_segno].c_segno = c_free_segno_head; + c_free_segno_head = c_segno; + + lck_mtx_unlock_always(c_list_lock); + + KERNEL_DEBUG(0xe0400004 | DBG_FUNC_END, 0, 0, 0, 2, 0); + + return (NULL); + } + +#if __i386__ || __x86_64__ + lck_mtx_init(&c_seg->c_lock, &vm_compressor_lck_grp, &vm_compressor_lck_attr); +#else /* __i386__ || __x86_64__ */ + lck_spin_init(&c_seg->c_lock, &vm_compressor_lck_grp, &vm_compressor_lck_attr); +#endif /* __i386__ || __x86_64__ */ + + kernel_memory_populate(kernel_map, (vm_offset_t)(c_seg->c_store.c_buffer), 3 * PAGE_SIZE, KMA_COMPRESSOR); + + c_seg->c_populated_offset = C_SEG_BYTES_TO_OFFSET(3 * PAGE_SIZE); + c_seg->c_firstemptyslot = C_SLOT_MAX; + c_seg->c_mysegno = c_segno; + c_seg->c_filling = 1; + + lck_mtx_lock_spin_always(c_list_lock); + + c_segment_count++; + c_segments[c_segno].c_seg = c_seg; + + c_seg->c_generation_id = c_generation_id++; + + queue_enter(&c_age_list_head, c_seg, c_segment_t, c_age_list); + c_seg->c_on_age_q = 1; + c_age_count++; + + lck_mtx_unlock_always(c_list_lock); + + clock_get_system_nanotime(&sec, &nsec); + c_seg->c_creation_ts = (uint32_t)sec; + + *current_chead = c_seg; + + KERNEL_DEBUG(0xe0400004 | DBG_FUNC_END, c_seg, 0, 0, 3, 0); + } + slotarray = C_SEG_SLOTARRAY_FROM_INDEX(c_seg, c_seg->c_nextslot); + + if (c_seg->c_slots[slotarray] == 0) { + KERNEL_DEBUG(0xe0400008 | DBG_FUNC_START, 0, 0, 0, 0, 0); + + c_seg->c_slots[slotarray] = (struct c_slot *)kalloc(sizeof(struct c_slot) * C_SEG_SLOT_ARRAY_SIZE); + + KERNEL_DEBUG(0xe0400008 | DBG_FUNC_END, 0, 0, 0, 0, 0); + } + + PAGE_REPLACEMENT_DISALLOWED(TRUE); + + lck_mtx_lock_spin_always(&c_seg->c_lock); + + return (c_seg); +} + + + +static void +c_current_seg_filled(c_segment_t c_seg, c_segment_t *current_chead) +{ + uint32_t unused_bytes; + uint32_t offset_to_depopulate; + + unused_bytes = trunc_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset - c_seg->c_nextoffset)); + + if (unused_bytes) { + + offset_to_depopulate = C_SEG_BYTES_TO_OFFSET(round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_nextoffset))); + + /* + * release the extra physical page(s) at the end of the segment + */ + lck_mtx_unlock_always(&c_seg->c_lock); + + kernel_memory_depopulate( + kernel_map, + (vm_offset_t) &c_seg->c_store.c_buffer[offset_to_depopulate], + unused_bytes, + KMA_COMPRESSOR); + + lck_mtx_lock_spin_always(&c_seg->c_lock); + + c_seg->c_populated_offset = offset_to_depopulate; + } + c_seg->c_filling = 0; + + if (C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) + c_seg_need_delayed_compaction(c_seg); + + lck_mtx_unlock_always(&c_seg->c_lock); + + *current_chead = NULL; +} + + +/* + * returns with c_seg locked + */ +void +c_seg_swapin_requeue(c_segment_t c_seg) +{ + clock_sec_t sec; + clock_nsec_t nsec; + + clock_get_system_nanotime(&sec, &nsec); + + lck_mtx_lock_spin_always(c_list_lock); + lck_mtx_lock_spin_always(&c_seg->c_lock); + + if (c_seg->c_on_swappedout_q) { + queue_remove(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list); + c_seg->c_on_swappedout_q = 0; + c_swappedout_count--; + } else { + assert(c_seg->c_on_swappedout_sparse_q); + + queue_remove(&c_swappedout_sparse_list_head, c_seg, c_segment_t, c_age_list); + c_seg->c_on_swappedout_sparse_q = 0; + c_swappedout_sparse_count--; + } + if (c_seg->c_store.c_buffer) { + queue_enter(&c_swappedin_list_head, c_seg, c_segment_t, c_age_list); + c_seg->c_on_swappedin_q = 1; + c_swappedin_count++; + } +#if TRACK_BAD_C_SEGMENTS + else { + queue_enter(&c_bad_list_head, c_seg, c_segment_t, c_age_list); + c_seg->c_on_bad_q = 1; + c_bad_count++; + } +#endif + c_seg->c_swappedin_ts = (uint32_t)sec; + c_seg->c_ondisk = 0; + c_seg->c_was_swapped_in = 1; + + lck_mtx_unlock_always(c_list_lock); +} + + + +/* + * c_seg has to be locked and is returned locked. + * PAGE_REPLACMENT_DISALLOWED has to be TRUE on entry and is returned TRUE + */ + +void +c_seg_swapin(c_segment_t c_seg, boolean_t force_minor_compaction) +{ + vm_offset_t addr = 0; + uint32_t io_size = 0; + uint64_t f_offset; + +#if !CHECKSUM_THE_SWAP + if (c_seg->c_ondisk) + c_seg_trim_tail(c_seg); +#endif + io_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset)); + f_offset = c_seg->c_store.c_swap_handle; + + c_seg->c_busy = 1; + lck_mtx_unlock_always(&c_seg->c_lock); + + if (c_seg->c_ondisk) { + + PAGE_REPLACEMENT_DISALLOWED(FALSE); + + if (kernel_memory_allocate(kernel_map, &addr, C_SEG_ALLOCSIZE, 0, KMA_COMPRESSOR | KMA_VAONLY) != KERN_SUCCESS) + panic("c_seg_swapin: kernel_memory_allocate failed\n"); + + kernel_memory_populate(kernel_map, addr, io_size, KMA_COMPRESSOR); + + if (vm_swap_get(addr, f_offset, io_size) != KERN_SUCCESS) { + PAGE_REPLACEMENT_DISALLOWED(TRUE); + + kernel_memory_depopulate(kernel_map, addr, io_size, KMA_COMPRESSOR); + kmem_free(kernel_map, addr, C_SEG_ALLOCSIZE); + + c_seg->c_store.c_buffer = (int32_t*) NULL; + } else { + c_seg->c_store.c_buffer = (int32_t*) addr; +#if CRYPTO + vm_swap_decrypt(c_seg); +#endif /* CRYPTO */ + +#if CHECKSUM_THE_SWAP + if (c_seg->cseg_swap_size != io_size) + panic("swapin size doesn't match swapout size"); + + if (c_seg->cseg_hash != hash_string((char*) c_seg->c_store.c_buffer, (int)io_size)) { + panic("c_seg_swapin - Swap hash mismatch\n"); + } +#endif /* CHECKSUM_THE_SWAP */ + + PAGE_REPLACEMENT_DISALLOWED(TRUE); + + if (force_minor_compaction == TRUE) { + lck_mtx_lock_spin_always(&c_seg->c_lock); + + c_seg_minor_compaction_and_unlock(c_seg, FALSE); + } + OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used); + } + } + c_seg_swapin_requeue(c_seg); + + C_SEG_WAKEUP_DONE(c_seg); +} + + +static int +c_compress_page(char *src, c_slot_mapping_t slot_ptr, c_segment_t *current_chead, char *scratch_buf) +{ + int c_size; + int c_rounded_size; + int max_csize; + c_slot_t cs; + c_segment_t c_seg; + + KERNEL_DEBUG(0xe0400000 | DBG_FUNC_START, *current_chead, 0, 0, 0, 0); +retry: + if ((c_seg = c_seg_allocate(current_chead)) == NULL) + return (1); + /* + * returns with c_seg lock held + * and PAGE_REPLACEMENT_DISALLOWED(TRUE) + */ + cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_seg->c_nextslot); + + cs->c_packed_ptr = C_SLOT_PACK_PTR(slot_ptr); + cs->c_offset = c_seg->c_nextoffset; + + max_csize = C_SEG_BUFSIZE - C_SEG_OFFSET_TO_BYTES((int32_t)cs->c_offset); + + if (max_csize > PAGE_SIZE) + max_csize = PAGE_SIZE; + + if (C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset - + c_seg->c_nextoffset) + < (unsigned) max_csize + PAGE_SIZE && + (C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset) + < C_SEG_ALLOCSIZE)) { + lck_mtx_unlock_always(&c_seg->c_lock); + + kernel_memory_populate(kernel_map, + (vm_offset_t) &c_seg->c_store.c_buffer[c_seg->c_populated_offset], + PAGE_SIZE, + KMA_COMPRESSOR); + + lck_mtx_lock_spin_always(&c_seg->c_lock); + + c_seg->c_populated_offset += C_SEG_BYTES_TO_OFFSET(PAGE_SIZE); + } + +#if CHECKSUM_THE_DATA + cs->c_hash_data = hash_string(src, PAGE_SIZE); +#endif + c_size = WKdm_compress_new((WK_word *)(uintptr_t)src, (WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset], + (WK_word *)(uintptr_t)scratch_buf, max_csize - 4); + + assert(c_size <= (max_csize - 4) && c_size >= -1); + + if (c_size == -1) { + + if (max_csize < PAGE_SIZE) { + c_current_seg_filled(c_seg, current_chead); + + PAGE_REPLACEMENT_DISALLOWED(FALSE); + + goto retry; + } + c_size = PAGE_SIZE; + + memcpy(&c_seg->c_store.c_buffer[cs->c_offset], src, c_size); + } +#if CHECKSUM_THE_COMPRESSED_DATA + cs->c_hash_compressed_data = hash_string((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size); +#endif + c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; + + PACK_C_SIZE(cs, c_size); + c_seg->c_bytes_used += c_rounded_size; + c_seg->c_nextoffset += C_SEG_BYTES_TO_OFFSET(c_rounded_size); + + slot_ptr->s_cindx = c_seg->c_nextslot++; + /* would mean "empty slot", so use csegno+1 */ + slot_ptr->s_cseg = c_seg->c_mysegno + 1; + + if (c_seg->c_nextoffset >= C_SEG_OFF_LIMIT || c_seg->c_nextslot >= C_SLOT_MAX) + c_current_seg_filled(c_seg, current_chead); + else + lck_mtx_unlock_always(&c_seg->c_lock); + + PAGE_REPLACEMENT_DISALLOWED(FALSE); + + OSAddAtomic64(c_rounded_size, &compressor_bytes_used); + OSAddAtomic64(PAGE_SIZE, &c_segment_input_bytes); + OSAddAtomic64(c_size, &c_segment_compressed_bytes); + + OSAddAtomic(1, &c_segment_pages_compressed); + OSAddAtomic(1, &sample_period_compression_count); + + KERNEL_DEBUG(0xe0400000 | DBG_FUNC_END, *current_chead, c_size, c_segment_input_bytes, c_segment_compressed_bytes, 0); + + if (vm_compressor_low_on_space()) { + ipc_port_t trigger = IP_NULL; + + PSL_LOCK(); + if (IP_VALID(min_pages_trigger_port)) { + trigger = min_pages_trigger_port; + min_pages_trigger_port = IP_NULL; + } + PSL_UNLOCK(); + + if (IP_VALID(trigger)) { + no_paging_space_action(); + default_pager_space_alert(trigger, HI_WAT_ALERT); + ipc_port_release_send(trigger); + } + } + return (0); +} + + +static int +c_decompress_page(char *dst, volatile c_slot_mapping_t slot_ptr, int flags, int *zeroslot) +{ + c_slot_t cs; + c_segment_t c_seg; + int c_indx; + int c_rounded_size; + uint32_t c_size; + int retval = 0; + boolean_t c_seg_has_data = TRUE; + boolean_t c_seg_swappedin = FALSE; + boolean_t need_unlock = TRUE; + boolean_t consider_defragmenting = FALSE; + +ReTry: +#if HIBERNATION + if (dst) { + if (lck_rw_try_lock_shared(&c_decompressor_lock) == 0) { + if (flags & C_DONT_BLOCK) { + *zeroslot = 0; + return (-2); + } + lck_rw_lock_shared(&c_decompressor_lock); + } + } +#endif + PAGE_REPLACEMENT_DISALLOWED(TRUE); + + /* s_cseg is actually "segno+1" */ + c_seg = c_segments[slot_ptr->s_cseg - 1].c_seg; + + lck_mtx_lock_spin_always(&c_seg->c_lock); + + if (flags & C_DONT_BLOCK) { + if (c_seg->c_busy || c_seg->c_ondisk) { + + retval = -2; + *zeroslot = 0; + + goto done; + } + } + if (c_seg->c_busy) { + + PAGE_REPLACEMENT_DISALLOWED(FALSE); +#if HIBERNATION + if (dst) + lck_rw_done(&c_decompressor_lock); +#endif + c_seg_wait_on_busy(c_seg); + + goto ReTry; + } + c_indx = slot_ptr->s_cindx; + + cs = C_SEG_SLOT_FROM_INDEX(c_seg, c_indx); + + c_size = UNPACK_C_SIZE(cs); + + c_rounded_size = (c_size + C_SEG_OFFSET_ALIGNMENT_MASK) & ~C_SEG_OFFSET_ALIGNMENT_MASK; + + if (dst) { + uint32_t age_of_cseg; + clock_sec_t cur_ts_sec; + clock_nsec_t cur_ts_nsec; + + if (c_seg->c_on_swappedout_q || c_seg->c_on_swappedout_sparse_q) { + if (c_seg->c_ondisk) + c_seg_swappedin = TRUE; + c_seg_swapin(c_seg, FALSE); + } + if (c_seg->c_store.c_buffer == NULL) { + c_seg_has_data = FALSE; + goto c_seg_invalid_data; + } +#if CHECKSUM_THE_COMPRESSED_DATA + if (cs->c_hash_compressed_data != hash_string((char *)&c_seg->c_store.c_buffer[cs->c_offset], c_size)) + panic("compressed data doesn't match original"); +#endif + if (c_rounded_size == PAGE_SIZE) { + /* + * page wasn't compressible... just copy it out + */ + memcpy(dst, &c_seg->c_store.c_buffer[cs->c_offset], PAGE_SIZE); + } else { + uint32_t my_cpu_no; + char *scratch_buf; + + /* + * we're behind the c_seg lock held in spin mode + * which means pre-emption is disabled... therefore + * the following sequence is atomic and safe + */ + my_cpu_no = cpu_number(); + + assert(my_cpu_no < compressor_cpus); + + scratch_buf = &compressor_scratch_bufs[my_cpu_no * WKdm_SCRATCH_BUF_SIZE]; + + WKdm_decompress_new((WK_word *)(uintptr_t)&c_seg->c_store.c_buffer[cs->c_offset], + (WK_word *)(uintptr_t)dst, (WK_word *)(uintptr_t)scratch_buf, c_size); + } + +#if CHECKSUM_THE_DATA + if (cs->c_hash_data != hash_string(dst, PAGE_SIZE)) + panic("decompressed data doesn't match original"); +#endif + if (!c_seg->c_was_swapped_in) { + + clock_get_system_nanotime(&cur_ts_sec, &cur_ts_nsec); + + age_of_cseg = (uint32_t)cur_ts_sec - c_seg->c_creation_ts; + + if (age_of_cseg < DECOMPRESSION_SAMPLE_MAX_AGE) + OSAddAtomic(1, &age_of_decompressions_during_sample_period[age_of_cseg]); + else + OSAddAtomic(1, &overage_decompressions_during_sample_period); + + OSAddAtomic(1, &sample_period_decompression_count); + } + } else { + if (c_seg->c_store.c_buffer == NULL) + c_seg_has_data = FALSE; + } +c_seg_invalid_data: + + if (c_seg_has_data == TRUE) { + if (c_seg_swappedin == TRUE) + retval = 1; + else + retval = 0; + } else + retval = -1; + + if (flags & C_KEEP) { + *zeroslot = 0; + goto done; + } + c_seg->c_bytes_unused += c_rounded_size; + c_seg->c_bytes_used -= c_rounded_size; + PACK_C_SIZE(cs, 0); + + if (c_indx < c_seg->c_firstemptyslot) + c_seg->c_firstemptyslot = c_indx; + + OSAddAtomic(-1, &c_segment_pages_compressed); + + if (c_seg_has_data == TRUE && !c_seg->c_ondisk) { + /* + * c_ondisk == TRUE can occur when we're doing a + * free of a compressed page (i.e. dst == NULL) + */ + OSAddAtomic64(-c_rounded_size, &compressor_bytes_used); + } + if (!c_seg->c_filling) { + if (c_seg->c_bytes_used == 0) { + if (c_seg->c_on_minorcompact_q || c_seg->c_on_swappedout_sparse_q) { + if (c_seg_try_free(c_seg) == TRUE) + need_unlock = FALSE; + } else { + c_seg_free(c_seg); + need_unlock = FALSE; + } + } else if (c_seg->c_on_minorcompact_q) { + + if (C_SEG_INCORE_IS_SPARSE(c_seg)) { + c_seg_try_minor_compaction_and_unlock(c_seg); + need_unlock = FALSE; + } + } else if (!c_seg->c_ondisk) { + + if (c_seg_has_data == TRUE && !c_seg->c_on_swapout_q && C_SEG_UNUSED_BYTES(c_seg) >= PAGE_SIZE) { + c_seg_need_delayed_compaction(c_seg); + } + } else if (!c_seg->c_on_swappedout_sparse_q && C_SEG_ONDISK_IS_SPARSE(c_seg)) { + + c_seg_move_to_sparse_list(c_seg); + consider_defragmenting = TRUE; + } + } +done: + if (need_unlock == TRUE) + lck_mtx_unlock_always(&c_seg->c_lock); + + PAGE_REPLACEMENT_DISALLOWED(FALSE); + + if (consider_defragmenting == TRUE) + vm_swap_consider_defragmenting(); +#if HIBERNATION + if (dst) + lck_rw_done(&c_decompressor_lock); +#endif + return (retval); +} + + +int +vm_compressor_get(ppnum_t pn, int *slot, int flags) +{ + char *dst; + int zeroslot = 1; + int retval; + +#if __x86_64__ + dst = PHYSMAP_PTOV((uint64_t)pn << (uint64_t)PAGE_SHIFT); +#else +#error "unsupported architecture" +#endif + + retval = c_decompress_page(dst, (c_slot_mapping_t)slot, flags, &zeroslot); + + /* + * zeroslot will be set to 0 by c_decompress_page if (flags & C_KEEP) + * or (flags & C_DONT_BLOCK) and we found 'c_busy' or 'c_ondisk' set + */ + if (zeroslot) { + /* + * We've just decompressed a page, and are about to hand that back to VM for + * re-entry into some pmap. This is a decompression operation which must have no + * impact on the pmap's physical footprint. However, when VM goes to re-enter + * this page into the pmap, it doesn't know that it came from the compressor, + * which means the pmap's physical footprint will be incremented. To compensate + * for that, we decrement the physical footprint here, so that the total net effect + * on the physical footprint statistic is zero. + */ + pmap_ledger_debit(current_task()->map->pmap, task_ledgers.phys_footprint, PAGE_SIZE); + + *slot = 0; + } + /* + * returns 0 if we successfully decompressed a page from a segment already in memory + * returns 1 if we had to first swap in the segment, before successfully decompressing the page + * returns -1 if we encountered an error swapping in the segment - decompression failed + * returns -2 if (flags & C_DONT_BLOCK) and we found 'c_busy' or 'c_ondisk' set + */ + return (retval); +} + + +void +vm_compressor_free(int *slot) +{ + int zeroslot = 1; + + (void)c_decompress_page(NULL, (c_slot_mapping_t)slot, 0, &zeroslot); + + *slot = 0; +} + + +int +vm_compressor_put(ppnum_t pn, int *slot, void **current_chead, char *scratch_buf) +{ + char *src; + int retval; + + if ((vm_offset_t)slot < VM_MIN_KERNEL_AND_KEXT_ADDRESS || (vm_offset_t)slot >= VM_MAX_KERNEL_ADDRESS) + panic("vm_compressor_put: slot 0x%llx address out of range [0x%llx:0x%llx]", + (uint64_t)(vm_offset_t) slot, + (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS, + (uint64_t) VM_MAX_KERNEL_ADDRESS); + +#if __x86_64__ + src = PHYSMAP_PTOV((uint64_t)pn << (uint64_t)PAGE_SHIFT); +#else +#error "unsupported architecture" +#endif + retval = c_compress_page(src, (c_slot_mapping_t)slot, (c_segment_t *)current_chead, scratch_buf); + + return (retval); +} diff --git a/osfmk/vm/vm_compressor.h b/osfmk/vm/vm_compressor.h new file mode 100644 index 000000000..c2d85ab08 --- /dev/null +++ b/osfmk/vm/vm_compressor.h @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +#define C_SEG_OFFSET_BITS 16 +#define C_SEG_BUFSIZE (1024 * 256) +#define C_SEG_ALLOCSIZE (C_SEG_BUFSIZE + PAGE_SIZE) +#define C_SEG_OFF_LIMIT (C_SEG_BYTES_TO_OFFSET((C_SEG_BUFSIZE - 512))) + +#define C_SEG_SLOT_ARRAYS 6 +#define C_SEG_SLOT_ARRAY_SIZE 64 /* must be a power of 2 */ +#define C_SEG_SLOT_ARRAY_MASK (C_SEG_SLOT_ARRAY_SIZE - 1) +#define C_SLOT_MAX (C_SEG_SLOT_ARRAYS * C_SEG_SLOT_ARRAY_SIZE) + + +#define CHECKSUM_THE_SWAP 0 /* Debug swap data */ +#define CHECKSUM_THE_DATA 0 /* Debug compressor/decompressor data */ +#define CHECKSUM_THE_COMPRESSED_DATA 0 /* Debug compressor/decompressor compressed data */ +#define VALIDATE_C_SEGMENTS 0 /* Debug compaction */ +#define TRACK_BAD_C_SEGMENTS 0 /* Debug I/O error handling */ + +struct c_segment { +#if __i386__ || __x86_64__ + lck_mtx_t c_lock; +#else /* __i386__ || __x86_64__ */ + lck_spin_t c_lock; +#endif /* __i386__ || __x86_64__ */ + queue_chain_t c_age_list; + queue_chain_t c_list; + + uint64_t c_generation_id; + int32_t c_bytes_used; + int32_t c_bytes_unused; + +#define C_SEG_MAX_LIMIT (1 << 19) /* this needs to track the size of c_mysegno */ + uint32_t c_mysegno:19, + c_filling:1, + c_busy:1, + c_busy_swapping:1, + c_wanted:1, + c_must_free:1, + c_ondisk:1, + c_was_swapped_in:1, + c_on_minorcompact_q:1, /* can also be on the age_q or the swappedin_q */ + c_on_age_q:1, /* creation age ordered list of in-core segments that + are available to be major-compacted and swapped out */ + c_on_swappedin_q:1, /* allows us to age newly swapped in segments */ + c_on_swapout_q:1, /* this is a transient queue */ + c_on_swappedout_q:1, /* segment has been major-compacted and + possibly swapped out to disk (c_ondisk == 1) */ + c_on_swappedout_sparse_q:1; /* segment has become sparse and should be garbage + collected if too many segments reach this state */ + uint16_t c_firstemptyslot; + uint16_t c_nextslot; + uint32_t c_nextoffset; + uint32_t c_populated_offset; + + uint32_t c_creation_ts; + uint32_t c_swappedin_ts; + + union { + int32_t *c_buffer; + uint64_t c_swap_handle; + } c_store; + +#if TRACK_BAD_C_SEGMENTS + uint32_t c_on_bad_q; +#endif + +#if VALIDATE_C_SEGMENTS + uint32_t c_was_minor_compacted; + uint32_t c_was_major_compacted; + uint32_t c_was_major_donor; +#endif +#if CHECKSUM_THE_SWAP + unsigned int cseg_hash; + unsigned int cseg_swap_size; +#endif /* CHECKSUM_THE_SWAP */ + + struct c_slot *c_slots[C_SEG_SLOT_ARRAYS]; +}; + + +#define C_SEG_SLOT_FROM_INDEX(cseg, index) (&(cseg->c_slots[index / C_SEG_SLOT_ARRAY_SIZE])[index & C_SEG_SLOT_ARRAY_MASK]) +#define C_SEG_SLOTARRAY_FROM_INDEX(cseg, index) (index / C_SEG_SLOT_ARRAY_SIZE) + +#define C_SEG_OFFSET_TO_BYTES(off) ((off) * (int) sizeof(int32_t)) +#define C_SEG_BYTES_TO_OFFSET(bytes) ((bytes) / (int) sizeof(int32_t)) + +#define C_SEG_UNUSED_BYTES(cseg) (cseg->c_bytes_unused + (C_SEG_OFFSET_TO_BYTES(cseg->c_populated_offset - cseg->c_nextoffset))) + +#define C_SEG_OFFSET_ALIGNMENT_MASK 0x3 + +#define C_SEG_ONDISK_IS_SPARSE(cseg) ((cseg->c_bytes_used < (C_SEG_BUFSIZE / 2)) ? 1 : 0) +#define C_SEG_INCORE_IS_SPARSE(cseg) ((C_SEG_UNUSED_BYTES(cseg) >= (C_SEG_BUFSIZE / 2)) ? 1 : 0) + +#define C_SEG_WAKEUP_DONE(cseg) \ + MACRO_BEGIN \ + (cseg)->c_busy = 0; \ + if ((cseg)->c_wanted) { \ + (cseg)->c_wanted = 0; \ + thread_wakeup((event_t) (cseg)); \ + } \ + MACRO_END + + +typedef struct c_segment *c_segment_t; +typedef struct c_slot *c_slot_t; + +uint64_t vm_compressor_total_compressions(void); +void vm_wake_compactor_swapper(void); +void vm_consider_waking_compactor_swapper(void); +void vm_compressor_flush(void); +void c_seg_free(c_segment_t); +void c_seg_free_locked(c_segment_t); +void c_seg_insert_into_age_q(c_segment_t); + +void vm_decompressor_lock(void); +void vm_decompressor_unlock(void); + +void vm_compressor_do_warmup(void); +void vm_compressor_record_warmup_start(void); +void vm_compressor_record_warmup_end(void); + +int vm_low_on_space(void); +boolean_t vm_compression_available(void); + +extern void vm_compressor_init_locks(void); +extern lck_rw_t c_master_lock; + +#if CRYPTO +extern void vm_swap_decrypt(c_segment_t); +#endif /* CRYPTO */ + +extern kern_return_t vm_swap_get(vm_offset_t, uint64_t, uint64_t); +extern void vm_swap_free(uint64_t); +extern void vm_swap_consider_defragmenting(void); + +extern void c_seg_swapin_requeue(c_segment_t); +extern void c_seg_swapin(c_segment_t, boolean_t); +extern void c_seg_wait_on_busy(c_segment_t); +extern void c_seg_trim_tail(c_segment_t); + +extern boolean_t fastwake_recording_in_progress; +extern int compaction_swapper_running; +extern uint64_t vm_swap_put_failures; + +extern queue_head_t c_minor_list_head; +extern queue_head_t c_age_list_head; +extern queue_head_t c_swapout_list_head; +extern queue_head_t c_swappedout_list_head; +extern queue_head_t c_swappedout_sparse_list_head; + +extern uint32_t c_age_count; +extern uint32_t c_swapout_count; +extern uint32_t c_swappedout_count; +extern uint32_t c_swappedout_sparse_count; + +extern int64_t compressor_bytes_used; +extern uint64_t first_c_segment_to_warm_generation_id; +extern uint64_t last_c_segment_to_warm_generation_id; +extern boolean_t hibernate_flushing; +extern boolean_t hibernate_no_swapspace; +extern uint32_t swapout_target_age; + +extern void c_seg_insert_into_q(queue_head_t *, c_segment_t); + +extern uint32_t vm_compressor_minorcompact_threshold_divisor; +extern uint32_t vm_compressor_majorcompact_threshold_divisor; +extern uint32_t vm_compressor_unthrottle_threshold_divisor; +extern uint32_t vm_compressor_catchup_threshold_divisor; + +#define PAGE_REPLACEMENT_DISALLOWED(enable) (enable == TRUE ? lck_rw_lock_shared(&c_master_lock) : lck_rw_done(&c_master_lock)) +#define PAGE_REPLACEMENT_ALLOWED(enable) (enable == TRUE ? lck_rw_lock_exclusive(&c_master_lock) : lck_rw_done(&c_master_lock)) + + +#define AVAILABLE_NON_COMPRESSED_MEMORY (vm_page_active_count + vm_page_inactive_count + vm_page_free_count + vm_page_speculative_count) +#define AVAILABLE_MEMORY (AVAILABLE_NON_COMPRESSED_MEMORY + VM_PAGE_COMPRESSOR_COUNT) + +#define VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD (((AVAILABLE_MEMORY) * 10) / (vm_compressor_minorcompact_threshold_divisor ? vm_compressor_minorcompact_threshold_divisor : 1)) +#define VM_PAGE_COMPRESSOR_SWAP_THRESHOLD (((AVAILABLE_MEMORY) * 10) / (vm_compressor_majorcompact_threshold_divisor ? vm_compressor_majorcompact_threshold_divisor : 1)) +#define VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD (((AVAILABLE_MEMORY) * 10) / (vm_compressor_unthrottle_threshold_divisor ? vm_compressor_unthrottle_threshold_divisor : 1)) +#define VM_PAGE_COMPRESSOR_SWAP_CATCHUP_THRESHOLD (((AVAILABLE_MEMORY) * 10) / (vm_compressor_catchup_threshold_divisor ? vm_compressor_catchup_threshold_divisor : 1)) + +#define COMPRESSOR_NEEDS_TO_SWAP() ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_SWAP_THRESHOLD) ? 1 : 0) + +#define VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE() \ + ((vm_compressor_mode == VM_PAGER_COMPRESSOR_WITH_SWAP || \ + vm_compressor_mode == VM_PAGER_FREEZER_COMPRESSOR_WITH_SWAP) && \ + ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_SWAP_CATCHUP_THRESHOLD) ? 1 : 0)) +#define HARD_THROTTLE_LIMIT_REACHED() ((AVAILABLE_NON_COMPRESSED_MEMORY < (VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 2) ? 1 : 0) +#define SWAPPER_NEEDS_TO_UNTHROTTLE() ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) ? 1 : 0) +#define COMPRESSOR_NEEDS_TO_MINOR_COMPACT() ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0) +#define COMPRESSOR_NEEDS_TO_MAJOR_COMPACT() ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_SWAP_THRESHOLD) ? 1 : 0) + +#define COMPRESSOR_FREE_RESERVED_LIMIT 28 + +/* + * Upward trajectory. + */ +extern boolean_t vm_compressor_low_on_space(void); + +#define VM_PRESSURE_NORMAL_TO_WARNING() ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0) +#define VM_PRESSURE_WARNING_TO_CRITICAL() (vm_compressor_low_on_space() || (AVAILABLE_NON_COMPRESSED_MEMORY < ((12 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0) + +/* + * Downward trajectory. + */ +#define VM_PRESSURE_WARNING_TO_NORMAL() ((AVAILABLE_NON_COMPRESSED_MEMORY > ((12 * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) / 10)) ? 1 : 0) +#define VM_PRESSURE_CRITICAL_TO_WARNING() ((AVAILABLE_NON_COMPRESSED_MEMORY > ((14 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0) + +#define COMPRESSOR_SCRATCH_BUF_SIZE WKdm_SCRATCH_BUF_SIZE + + +#if __i386__ || __x86_64__ +extern lck_mtx_t *c_list_lock; +#else /* __i386__ || __x86_64__ */ +extern lck_spin_t *c_list_lock; +#endif /* __i386__ || __x86_64__ */ diff --git a/osfmk/vm/vm_compressor_backing_store.c b/osfmk/vm/vm_compressor_backing_store.c new file mode 100644 index 000000000..80db056d4 --- /dev/null +++ b/osfmk/vm/vm_compressor_backing_store.c @@ -0,0 +1,1597 @@ +/* + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include "vm_compressor_backing_store.h" +#include + +#include + + +boolean_t compressor_store_stop_compaction = FALSE; +boolean_t vm_swap_up = FALSE; +boolean_t vm_swapfile_mgmt_needed = FALSE; + +int swapper_throttle = -1; +boolean_t swapper_throttle_inited = FALSE; +uint64_t vm_swapout_thread_id; + +uint64_t vm_swap_put_failures = 0; +uint64_t vm_swap_get_failures = 0; +int vm_num_swap_files = 0; +int vm_swapout_thread_processed_segments = 0; +int vm_swapout_thread_awakened = 0; +int vm_swapfile_mgmt_thread_awakened = 0; +int vm_swapfile_mgmt_thread_running = 0; + +unsigned int vm_swapfile_total_segs_alloced = 0; +unsigned int vm_swapfile_total_segs_used = 0; + + +#define SWAP_READY 0x1 /* Swap file is ready to be used */ +#define SWAP_RECLAIM 0x2 /* Swap file is marked to be reclaimed */ +#define SWAP_WANTED 0x4 /* Swap file has waiters */ +#define SWAP_REUSE 0x8 /* Swap file is on the Q and has a name. Reuse after init-ing.*/ + +struct swapfile{ + queue_head_t swp_queue; /* list of swap files */ + char *swp_path; /* saved pathname of swap file */ + struct vnode *swp_vp; /* backing vnode */ + uint64_t swp_size; /* size of this swap file */ + uint8_t *swp_bitmap; /* bitmap showing the alloced/freed slots in the swap file */ + unsigned int swp_pathlen; /* length of pathname */ + unsigned int swp_nsegs; /* #segments we can use */ + unsigned int swp_nseginuse; /* #segments in use */ + unsigned int swp_index; /* index of this swap file */ + unsigned int swp_flags; /* state of swap file */ + unsigned int swp_free_hint; /* offset of 1st free chunk */ + unsigned int swp_io_count; /* count of outstanding I/Os */ + c_segment_t *swp_csegs; /* back pointers to the c_segments. Used during swap reclaim. */ + + struct trim_list *swp_delayed_trim_list_head; + unsigned int swp_delayed_trim_count; + boolean_t swp_trim_supported; +}; + +queue_head_t swf_global_queue; + +#define VM_SWAPFILE_DELAYED_TRIM_MAX 128 + +extern clock_sec_t dont_trim_until_ts; +clock_sec_t vm_swapfile_last_failed_to_create_ts = 0; + +static void vm_swapout_thread_throttle_adjust(void); +static void vm_swap_free_now(struct swapfile *swf, uint64_t f_offset); +static void vm_swapout_thread(void); +static void vm_swapfile_mgmt_thread(void); +static void vm_swap_defragment(); +static void vm_swap_handle_delayed_trims(boolean_t); +static void vm_swap_do_delayed_trim(); + + +#define VM_SWAPFILE_DELAYED_CREATE 30 +#define VM_SWAP_SHOULD_DEFRAGMENT() (c_swappedout_sparse_count > (vm_swapfile_total_segs_used / 4) ? 1 : 0) +#define VM_SWAP_SHOULD_RECLAIM() (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) >= SWAPFILE_RECLAIM_THRESHOLD_SEGS) ? 1 : 0) +#define VM_SWAP_SHOULD_CREATE(cur_ts) (((vm_swapfile_total_segs_alloced - vm_swapfile_total_segs_used) < (unsigned int)VM_SWAPFILE_HIWATER_SEGS) && \ + ((cur_ts - vm_swapfile_last_failed_to_create_ts) > VM_SWAPFILE_DELAYED_CREATE) ? 1 : 0) +#define VM_SWAP_SHOULD_TRIM(swf) ((swf->swp_delayed_trim_count >= VM_SWAPFILE_DELAYED_TRIM_MAX) ? 1 : 0) + + +#define VM_SWAP_BUSY() ((c_swapout_count && (swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER1 || swapper_throttle == THROTTLE_LEVEL_COMPRESSOR_TIER0)) ? 1 : 0) + + +#if CHECKSUM_THE_SWAP +extern unsigned int hash_string(char *cp, int len); +#endif + +#if CRYPTO +extern boolean_t swap_crypt_ctx_initialized; +extern void swap_crypt_ctx_initialize(void); +extern const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE]; +extern aes_ctx swap_crypt_ctx; +extern unsigned long vm_page_encrypt_counter; +extern unsigned long vm_page_decrypt_counter; +#endif /* CRYPTO */ + +extern void vm_pageout_io_throttle(void); + +struct swapfile *vm_swapfile_for_handle(uint64_t); + +/* + * Called with the vm_swap_data_lock held. + */ + +struct swapfile * +vm_swapfile_for_handle(uint64_t f_offset) +{ + + uint64_t file_offset = 0; + unsigned int swapfile_index = 0; + struct swapfile* swf = NULL; + + file_offset = (f_offset & SWAP_SLOT_MASK); + swapfile_index = (f_offset >> SWAP_DEVICE_SHIFT); + + swf = (struct swapfile*) queue_first(&swf_global_queue); + + while(queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) { + + if (swapfile_index == swf->swp_index) { + break; + } + + swf = (struct swapfile*) queue_next(&swf->swp_queue); + } + + if (queue_end(&swf_global_queue, (queue_entry_t) swf)) { + swf = NULL; + } + + return swf; +} + +void +vm_swap_init() +{ + static boolean_t vm_swap_try_init = FALSE; + thread_t thread = NULL; + + if (vm_swap_try_init == TRUE) { + return; + } + + vm_swap_try_init = TRUE; + + lck_grp_attr_setdefault(&vm_swap_data_lock_grp_attr); + lck_grp_init(&vm_swap_data_lock_grp, + "vm_swap_data", + &vm_swap_data_lock_grp_attr); + lck_attr_setdefault(&vm_swap_data_lock_attr); + lck_mtx_init_ext(&vm_swap_data_lock, + &vm_swap_data_lock_ext, + &vm_swap_data_lock_grp, + &vm_swap_data_lock_attr); + + queue_init(&swf_global_queue); + + if (vm_swap_create_file()) { + + if (kernel_thread_start_priority((thread_continue_t)vm_swapout_thread, NULL, + BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) { + panic("vm_swapout_thread: create failed"); + } + thread->options |= TH_OPT_VMPRIV; + vm_swapout_thread_id = thread->thread_id; + + thread_deallocate(thread); + + if (kernel_thread_start_priority((thread_continue_t)vm_swapfile_mgmt_thread, NULL, + BASEPRI_PREEMPT - 1, &thread) != KERN_SUCCESS) { + panic("vm_swapfile_mgmt_thread: create failed"); + } + thread->options |= TH_OPT_VMPRIV; + + thread_deallocate(thread); + +#if CRYPTO + if (swap_crypt_ctx_initialized == FALSE) { + swap_crypt_ctx_initialize(); + } +#endif /* CRYPTO */ + + vm_swap_up = TRUE; + +#if SANITY_CHECK_SWAP_ROUTINES +extern lck_attr_t *vm_compressor_lck_attr; +extern lck_grp_t *vm_compressor_lck_grp; + + /* + * Changes COMPRESSED_SWAP_CHUNK_SIZE to make it (4*KB). + * Changes MIN_SWAP_FILE_SIZE to (4*KB). + * Changes MAX_SWAP_FILE_SIZE to (4*KB). + * That will then cause the below allocations to create + * 4 new swap files and put/get/free from them. + */ + { + c_segment_t c_seg = NULL, c_seg1 = NULL, c_seg2 = NULL, c_seg3 = NULL; + vm_offset_t addr = 0; + vm_offset_t dup_addr = 0; + kern_return_t kr = KERN_SUCCESS; + uint64_t f_offset = 0; + uint64_t f_offset1 = 0; + uint64_t f_offset2 = 0; + uint64_t f_offset3 = 0; + + if ((kr = kernel_memory_allocate(kernel_map, + &addr, + 4 * COMPRESSED_SWAP_CHUNK_SIZE, + 0, + KMA_KOBJECT))) { + printf("kernel_memory_allocate failed with %d\n", kr); + goto done; + } + + if ((kr = kernel_memory_allocate(kernel_map, + &dup_addr, + 4 * COMPRESSED_SWAP_CHUNK_SIZE, + 0, + KMA_KOBJECT))) { + printf("kernel_memory_allocate failed with %d\n", kr); + goto done; + } + + c_seg = (c_segment_t) kalloc(sizeof(*c_seg)); + memset(c_seg, 0, sizeof(*c_seg)); +#if __i386__ || __x86_64__ + lck_mtx_init(&c_seg->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr); +#else /* __i386__ || __x86_64__ */ + lck_spin_init(&c_seg->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr); +#endif /* __i386__ || __x86_64__ */ + + + c_seg1 = (c_segment_t) kalloc(sizeof(*c_seg)); + memset(c_seg1, 0, sizeof(*c_seg)); +#if __i386__ || __x86_64__ + lck_mtx_init(&c_seg1->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr); +#else /* __i386__ || __x86_64__ */ + lck_spin_init(&c_seg1->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr); +#endif /* __i386__ || __x86_64__ */ + + + c_seg2 = (c_segment_t) kalloc(sizeof(*c_seg)); + memset(c_seg2, 0, sizeof(*c_seg)); +#if __i386__ || __x86_64__ + lck_mtx_init(&c_seg2->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr); +#else /* __i386__ || __x86_64__ */ + lck_spin_init(&c_seg2->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr); +#endif /* __i386__ || __x86_64__ */ + + + c_seg3 = (c_segment_t) kalloc(sizeof(*c_seg)); + memset(c_seg3, 0, sizeof(*c_seg)); +#if __i386__ || __x86_64__ + lck_mtx_init(&c_seg3->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr); +#else /* __i386__ || __x86_64__ */ + lck_spin_init(&c_seg3->c_lock, vm_compressor_lck_grp, vm_compressor_lck_attr); +#endif /* __i386__ || __x86_64__ */ + + + memset((void*)addr, (int) 'a', PAGE_SIZE_64); + memset((void*)(addr + PAGE_SIZE_64), (int) 'b', PAGE_SIZE_64); + memset((void*)(addr + (2 * PAGE_SIZE_64)), (int) 'c', PAGE_SIZE_64); + memset((void*)(addr + (3 * PAGE_SIZE_64)), (int) 'd', PAGE_SIZE_64); + + vm_swap_put(addr, &f_offset, PAGE_SIZE_64, c_seg); + c_seg->c_store.c_swap_handle = f_offset; + + vm_swap_put(addr + PAGE_SIZE_64, &f_offset1, PAGE_SIZE_64, c_seg1); + c_seg1->c_store.c_swap_handle = f_offset1; + + vm_swap_put(addr + (2 * PAGE_SIZE_64), &f_offset2, PAGE_SIZE_64, c_seg2); + c_seg2->c_store.c_swap_handle = f_offset2; + + vm_swap_put(addr + (3 * PAGE_SIZE_64), &f_offset3, PAGE_SIZE_64, c_seg3); + c_seg3->c_store.c_swap_handle = f_offset3; + + //vm_swap_free(f_offset); + vm_swap_get(dup_addr, f_offset, PAGE_SIZE_64); + + //vm_swap_free(f_offset1); + vm_swap_reclaim(); + vm_swap_get(dup_addr + PAGE_SIZE_64, c_seg1->c_store.c_swap_handle, PAGE_SIZE_64); + + //vm_swap_free(f_offset2); + vm_swap_reclaim(); + vm_swap_get(dup_addr + (2 * PAGE_SIZE_64), c_seg2->c_store.c_swap_handle, PAGE_SIZE_64); + + //vm_swap_free(f_offset3); + vm_swap_reclaim(); + vm_swap_get(dup_addr + (3 * PAGE_SIZE_64), c_seg3->c_store.c_swap_handle, PAGE_SIZE_64); + + if (memcmp((void*)addr, (void*)dup_addr, PAGE_SIZE_64)) { + panic("First page data mismatch\n"); + kr = KERN_FAILURE; + goto done; + } + + if (memcmp((void*)(addr + PAGE_SIZE_64), (void*)(dup_addr + PAGE_SIZE_64), PAGE_SIZE_64)) { + panic("Second page data mismatch 0x%lx, 0x%lxn", addr, dup_addr); + kr = KERN_FAILURE; + goto done; + } + + if (memcmp((void*)(addr + (2 * PAGE_SIZE_64)), (void*)(dup_addr + (2 * PAGE_SIZE_64)), PAGE_SIZE_64)) { + panic("Third page data mismatch\n"); + kr = KERN_FAILURE; + goto done; + } + + if (memcmp((void*)(addr + (3 * PAGE_SIZE_64)), (void*)(dup_addr + (3 * PAGE_SIZE_64)), PAGE_SIZE_64)) { + panic("Fourth page data mismatch 0x%lx, 0x%lxn", addr, dup_addr); + kr = KERN_FAILURE; + goto done; + } + +done: + printf("Sanity check %s\n", ((kr != KERN_SUCCESS) ? "FAILED" : "SUCCEEDED")); + kfree((void*)addr, 4 * COMPRESSED_SWAP_CHUNK_SIZE); + addr = 0; + kfree((void*)dup_addr, 4 * COMPRESSED_SWAP_CHUNK_SIZE); + dup_addr = 0; + } +#endif /* SANITY_CHECK_SWAP_ROUTINES */ + } + + printf("VM Swap Subsystem is %s\n", (vm_swap_up == TRUE) ? "ON" : "OFF"); +} + +#if CRYPTO +void +vm_swap_encrypt(c_segment_t c_seg) +{ + vm_offset_t kernel_vaddr = 0; + uint64_t size = 0; + + union { + unsigned char aes_iv[AES_BLOCK_SIZE]; + void *c_seg; + } encrypt_iv; + + assert(swap_crypt_ctx_initialized); + + bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv)); + + encrypt_iv.c_seg = (void*)c_seg; + + /* encrypt the "initial vector" */ + aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0], + swap_crypt_null_iv, + 1, + &encrypt_iv.aes_iv[0], + &swap_crypt_ctx.encrypt); + + kernel_vaddr = (vm_offset_t) c_seg->c_store.c_buffer; + size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset)); + + /* + * Encrypt the c_segment. + */ + aes_encrypt_cbc((const unsigned char *) kernel_vaddr, + &encrypt_iv.aes_iv[0], + (unsigned int)(size / AES_BLOCK_SIZE), + (unsigned char *) kernel_vaddr, + &swap_crypt_ctx.encrypt); + + vm_page_encrypt_counter += (size/PAGE_SIZE_64); +} + +void +vm_swap_decrypt(c_segment_t c_seg) +{ + + vm_offset_t kernel_vaddr = 0; + uint64_t size = 0; + + union { + unsigned char aes_iv[AES_BLOCK_SIZE]; + void *c_seg; + } decrypt_iv; + + + assert(swap_crypt_ctx_initialized); + + /* + * Prepare an "initial vector" for the decryption. + * It has to be the same as the "initial vector" we + * used to encrypt that page. + */ + bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv)); + + decrypt_iv.c_seg = (void*)c_seg; + + /* encrypt the "initial vector" */ + aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0], + swap_crypt_null_iv, + 1, + &decrypt_iv.aes_iv[0], + &swap_crypt_ctx.encrypt); + + kernel_vaddr = (vm_offset_t) c_seg->c_store.c_buffer; + size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset)); + + /* + * Decrypt the c_segment. + */ + aes_decrypt_cbc((const unsigned char *) kernel_vaddr, + &decrypt_iv.aes_iv[0], + (unsigned int) (size / AES_BLOCK_SIZE), + (unsigned char *) kernel_vaddr, + &swap_crypt_ctx.decrypt); + + vm_page_decrypt_counter += (size/PAGE_SIZE_64); +} +#endif /* CRYPTO */ + + +void +vm_swap_consider_defragmenting() +{ + if (compressor_store_stop_compaction == FALSE && !VM_SWAP_BUSY() && (VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())) { + + if (!vm_swapfile_mgmt_thread_running) { + lck_mtx_lock(&vm_swap_data_lock); + + if (!vm_swapfile_mgmt_thread_running) + thread_wakeup((event_t) &vm_swapfile_mgmt_needed); + + lck_mtx_unlock(&vm_swap_data_lock); + } + } +} + + +int vm_swap_defragment_yielded = 0; +int vm_swap_defragment_swapin = 0; +int vm_swap_defragment_free = 0; +int vm_swap_defragment_busy = 0; + + +static void +vm_swap_defragment() +{ + c_segment_t c_seg; + + /* + * have to grab the master lock w/o holding + * any locks in spin mode + */ + PAGE_REPLACEMENT_DISALLOWED(TRUE); + + lck_mtx_lock_spin_always(c_list_lock); + + while (!queue_empty(&c_swappedout_sparse_list_head)) { + + if (compressor_store_stop_compaction == TRUE || VM_SWAP_BUSY()) { + vm_swap_defragment_yielded++; + break; + } + c_seg = (c_segment_t)queue_first(&c_swappedout_sparse_list_head); + + lck_mtx_lock_spin_always(&c_seg->c_lock); + + assert(c_seg->c_on_swappedout_sparse_q); + + if (c_seg->c_busy) { + lck_mtx_unlock_always(c_list_lock); + + PAGE_REPLACEMENT_DISALLOWED(FALSE); + /* + * c_seg_wait_on_busy consumes c_seg->c_lock + */ + c_seg_wait_on_busy(c_seg); + + PAGE_REPLACEMENT_DISALLOWED(TRUE); + + lck_mtx_lock_spin_always(c_list_lock); + + vm_swap_defragment_busy++; + continue; + } + if (c_seg->c_bytes_used == 0) { + /* + * c_seg_free_locked consumes the c_list_lock + * and c_seg->c_lock + */ + c_seg_free_locked(c_seg); + + vm_swap_defragment_free++; + } else { + lck_mtx_unlock_always(c_list_lock); + + c_seg_swapin(c_seg, TRUE); + lck_mtx_unlock_always(&c_seg->c_lock); + + vm_swap_defragment_swapin++; + } + PAGE_REPLACEMENT_DISALLOWED(FALSE); + + vm_pageout_io_throttle(); + + /* + * because write waiters have privilege over readers, + * dropping and immediately retaking the master lock will + * still allow any thread waiting to acquire the + * master lock exclusively an opportunity to take it + */ + PAGE_REPLACEMENT_DISALLOWED(TRUE); + + lck_mtx_lock_spin_always(c_list_lock); + } + lck_mtx_unlock_always(c_list_lock); + + PAGE_REPLACEMENT_DISALLOWED(FALSE); +} + + + +static void +vm_swapfile_mgmt_thread(void) +{ + + boolean_t did_work = FALSE; + clock_sec_t sec; + clock_nsec_t nsec; + + vm_swapfile_mgmt_thread_awakened++; + vm_swapfile_mgmt_thread_running = 1; + +try_again: + + do { + if (vm_swap_up == FALSE) + break; + did_work = FALSE; + clock_get_system_nanotime(&sec, &nsec); + + /* + * walk through the list of swap files + * and do the delayed frees/trims for + * any swap file whose count of delayed + * frees is above the batch limit + */ + vm_swap_handle_delayed_trims(FALSE); + + if (VM_SWAP_SHOULD_CREATE(sec)) { + if (vm_swap_create_file() == TRUE) + did_work = TRUE; + else { + vm_swapfile_last_failed_to_create_ts = sec; + HIBLOG("vm_swap_create_file failed @ %lu secs\n", sec); + } + } + if (VM_SWAP_SHOULD_DEFRAGMENT()) { + proc_set_task_policy_thread(kernel_task, current_thread()->thread_id, + TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2); + + vm_swap_defragment(); + + if (!VM_SWAP_BUSY()) + did_work = TRUE; + + proc_set_task_policy_thread(kernel_task, current_thread()->thread_id, + TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER1); + } + if (VM_SWAP_SHOULD_RECLAIM()) { + proc_set_task_policy_thread(kernel_task, current_thread()->thread_id, + TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2); + + vm_swap_defragment(); + vm_swap_reclaim(); + + if (!VM_SWAP_BUSY()) + did_work = TRUE; + + proc_set_task_policy_thread(kernel_task, current_thread()->thread_id, + TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER1); + } + + } while (did_work == TRUE); + + lck_mtx_lock(&vm_swap_data_lock); + + clock_get_system_nanotime(&sec, &nsec); + + if (vm_swap_up == TRUE && (VM_SWAP_SHOULD_CREATE(sec) || ((!VM_SWAP_BUSY() && compressor_store_stop_compaction == FALSE) && + (VM_SWAP_SHOULD_DEFRAGMENT() || VM_SWAP_SHOULD_RECLAIM())))) { + lck_mtx_unlock(&vm_swap_data_lock); + goto try_again; + } + + vm_swapfile_mgmt_thread_running = 0; + + assert_wait((event_t)&vm_swapfile_mgmt_needed, THREAD_UNINT); + + lck_mtx_unlock(&vm_swap_data_lock); + + thread_block((thread_continue_t)vm_swapfile_mgmt_thread); + + /* NOTREACHED */ +} + + + +int swapper_entered_T0 = 0; +int swapper_entered_T1 = 0; +int swapper_entered_T2 = 0; + +static void +vm_swapout_thread_throttle_adjust(void) +{ + int swapper_throttle_new; + + if (swapper_throttle_inited == FALSE) { + /* + * force this thread to be set to the correct + * throttling tier + */ + swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER2; + swapper_throttle = THROTTLE_LEVEL_COMPRESSOR_TIER1; + swapper_throttle_inited = TRUE; + swapper_entered_T2++; + goto done; + } + swapper_throttle_new = swapper_throttle; + + + switch(swapper_throttle) { + + case THROTTLE_LEVEL_COMPRESSOR_TIER2: + + if (SWAPPER_NEEDS_TO_UNTHROTTLE() || swapout_target_age || hibernate_flushing == TRUE) { + swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER1; + swapper_entered_T1++; + break; + } + break; + + case THROTTLE_LEVEL_COMPRESSOR_TIER1: + + if (VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) { + swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER0; + swapper_entered_T0++; + break; + } + if (COMPRESSOR_NEEDS_TO_SWAP() == 0 && swapout_target_age == 0 && hibernate_flushing == FALSE) { + swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER2; + swapper_entered_T2++; + break; + } + break; + + case THROTTLE_LEVEL_COMPRESSOR_TIER0: + + if (COMPRESSOR_NEEDS_TO_SWAP() == 0) { + swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER2; + swapper_entered_T2++; + break; + } + if (SWAPPER_NEEDS_TO_UNTHROTTLE() == 0) { + swapper_throttle_new = THROTTLE_LEVEL_COMPRESSOR_TIER1; + swapper_entered_T1++; + break; + } + break; + } +done: + if (swapper_throttle != swapper_throttle_new) { + proc_set_task_policy_thread(kernel_task, vm_swapout_thread_id, + TASK_POLICY_INTERNAL, TASK_POLICY_IO, swapper_throttle_new); + proc_set_task_policy_thread(kernel_task, vm_swapout_thread_id, + TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE); + + swapper_throttle = swapper_throttle_new; + } +} + + +static void +vm_swapout_thread(void) +{ + uint64_t f_offset = 0; + uint32_t size = 0; + c_segment_t c_seg = NULL; + kern_return_t kr = KERN_SUCCESS; + vm_offset_t addr = 0; + + vm_swapout_thread_awakened++; + + lck_mtx_lock_spin_always(c_list_lock); + + while (!queue_empty(&c_swapout_list_head)) { + + c_seg = (c_segment_t)queue_first(&c_swapout_list_head); + + lck_mtx_lock_spin_always(&c_seg->c_lock); + + assert(c_seg->c_on_swapout_q); + + if (c_seg->c_busy) { + lck_mtx_unlock_always(&c_seg->c_lock); + lck_mtx_unlock_always(c_list_lock); + + mutex_pause(2); + + lck_mtx_lock_spin_always(c_list_lock); + + continue; + } + queue_remove(&c_swapout_list_head, c_seg, c_segment_t, c_age_list); + c_seg->c_on_swapout_q = 0; + c_swapout_count--; + + c_seg->c_busy = 1; + c_seg->c_busy_swapping = 1; + + vm_swapout_thread_processed_segments++; + + thread_wakeup((event_t)&compaction_swapper_running); + + lck_mtx_unlock_always(c_list_lock); + + addr = (vm_offset_t) c_seg->c_store.c_buffer; + + size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset)); + + lck_mtx_unlock_always(&c_seg->c_lock); + +#if CHECKSUM_THE_SWAP + c_seg->cseg_hash = hash_string((char*)addr, (int)size); + c_seg->cseg_swap_size = size; +#endif /* CHECKSUM_THE_SWAP */ + +#if CRYPTO + vm_swap_encrypt(c_seg); +#endif /* CRYPTO */ + + vm_swapout_thread_throttle_adjust(); + + kr = vm_swap_put((vm_offset_t) addr, &f_offset, size, c_seg); + + PAGE_REPLACEMENT_DISALLOWED(TRUE); + + lck_mtx_lock_spin_always(c_list_lock); + lck_mtx_lock_spin_always(&c_seg->c_lock); + + if (kr == KERN_SUCCESS) { + + if (C_SEG_ONDISK_IS_SPARSE(c_seg) && hibernate_flushing == FALSE) { + + c_seg_insert_into_q(&c_swappedout_sparse_list_head, c_seg); + c_seg->c_on_swappedout_sparse_q = 1; + c_swappedout_sparse_count++; + + } else { + if (hibernate_flushing == TRUE && (c_seg->c_generation_id >= first_c_segment_to_warm_generation_id && + c_seg->c_generation_id <= last_c_segment_to_warm_generation_id)) + queue_enter_first(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list); + else + queue_enter(&c_swappedout_list_head, c_seg, c_segment_t, c_age_list); + c_seg->c_on_swappedout_q = 1; + c_swappedout_count++; + } + c_seg->c_store.c_swap_handle = f_offset; + c_seg->c_ondisk = 1; + + VM_STAT_INCR_BY(swapouts, size >> PAGE_SHIFT); + + if (c_seg->c_bytes_used) + OSAddAtomic64(-c_seg->c_bytes_used, &compressor_bytes_used); + } else { +#if CRYPTO + vm_swap_decrypt(c_seg); +#endif /* CRYPTO */ + c_seg_insert_into_q(&c_age_list_head, c_seg); + c_seg->c_on_age_q = 1; + c_age_count++; + + vm_swap_put_failures++; + } + lck_mtx_unlock_always(c_list_lock); + + c_seg->c_busy_swapping = 0; + + C_SEG_WAKEUP_DONE(c_seg); + + if (c_seg->c_must_free) + c_seg_free(c_seg); + else + lck_mtx_unlock_always(&c_seg->c_lock); + + if (kr == KERN_SUCCESS) + kernel_memory_depopulate(kernel_map, (vm_offset_t) addr, size, KMA_COMPRESSOR); + + PAGE_REPLACEMENT_DISALLOWED(FALSE); + + if (kr == KERN_SUCCESS) + kmem_free(kernel_map, (vm_offset_t) addr, C_SEG_ALLOCSIZE); + + vm_pageout_io_throttle(); + + if (c_swapout_count == 0) + vm_swap_consider_defragmenting(); + + lck_mtx_lock_spin_always(c_list_lock); + } + + assert_wait((event_t)&c_swapout_list_head, THREAD_UNINT); + + lck_mtx_unlock_always(c_list_lock); + + thread_block((thread_continue_t)vm_swapout_thread); + + /* NOTREACHED */ +} + +boolean_t +vm_swap_create_file() +{ + uint64_t size = 0; + int namelen = 0; + boolean_t swap_file_created = FALSE; + boolean_t swap_file_reuse = FALSE; + struct swapfile *swf = NULL; + + + if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) { + } + + /* + * Any swapfile structure ready for re-use? + */ + + lck_mtx_lock(&vm_swap_data_lock); + + swf = (struct swapfile*) queue_first(&swf_global_queue); + + while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) { + if (swf->swp_flags == SWAP_REUSE) { + swap_file_reuse = TRUE; + break; + } + swf = (struct swapfile*) queue_next(&swf->swp_queue); + } + + lck_mtx_unlock(&vm_swap_data_lock); + + if (swap_file_reuse == FALSE) { + + namelen = SWAPFILENAME_LEN + SWAPFILENAME_INDEX_LEN + 1; + + swf = (struct swapfile*) kalloc(sizeof *swf); + memset(swf, 0, sizeof(*swf)); + + swf->swp_index = vm_num_swap_files + 1; + swf->swp_pathlen = namelen; + swf->swp_path = (char*)kalloc(swf->swp_pathlen); + + memset(swf->swp_path, 0, namelen); + + snprintf(swf->swp_path, namelen, "%s%d", SWAP_FILE_NAME, vm_num_swap_files + 1); + } + + vm_swapfile_open(swf->swp_path, &swf->swp_vp); + + if (swf->swp_vp == NULL) { + if (swap_file_reuse == FALSE) { + kfree(swf->swp_path, swf->swp_pathlen); + kfree(swf, sizeof *swf); + } + return FALSE; + } + size = MAX_SWAP_FILE_SIZE; + + while (size >= MIN_SWAP_FILE_SIZE) { + + if (vm_swapfile_preallocate(swf->swp_vp, &size) == 0) { + + int num_bytes_for_bitmap = 0; + + swap_file_created = TRUE; + + swf->swp_size = size; + swf->swp_nsegs = (unsigned int) (size / COMPRESSED_SWAP_CHUNK_SIZE); + swf->swp_nseginuse = 0; + swf->swp_free_hint = 0; + + num_bytes_for_bitmap = MAX((swf->swp_nsegs >> 3) , 1); + /* + * Allocate a bitmap that describes the + * number of segments held by this swapfile. + */ + swf->swp_bitmap = (uint8_t*)kalloc(num_bytes_for_bitmap); + memset(swf->swp_bitmap, 0, num_bytes_for_bitmap); + + swf->swp_csegs = (c_segment_t *) kalloc(swf->swp_nsegs * sizeof(c_segment_t)); + memset(swf->swp_csegs, 0, (swf->swp_nsegs * sizeof(c_segment_t))); + + /* + * passing a NULL trim_list into vnode_trim_list + * will return ENOTSUP if trim isn't supported + * and 0 if it is + */ + if (vnode_trim_list(swf->swp_vp, NULL)) + swf->swp_trim_supported = FALSE; + else + swf->swp_trim_supported = TRUE; + + lck_mtx_lock(&vm_swap_data_lock); + + swf->swp_flags = SWAP_READY; + + if (swap_file_reuse == FALSE) { + queue_enter(&swf_global_queue, swf, struct swapfile*, swp_queue); + } + + vm_num_swap_files++; + + vm_swapfile_total_segs_alloced += swf->swp_nsegs; + + lck_mtx_unlock(&vm_swap_data_lock); + + thread_wakeup((event_t) &vm_num_swap_files); + + break; + } else { + + size = size / 2; + } + } + if (swap_file_created == FALSE) { + + vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp); + + swf->swp_vp = NULL; + + if (swap_file_reuse == FALSE) { + kfree(swf->swp_path, swf->swp_pathlen); + kfree(swf, sizeof *swf); + } + } + return swap_file_created; +} + + +kern_return_t +vm_swap_get(vm_offset_t addr, uint64_t f_offset, uint64_t size) +{ + struct swapfile *swf = NULL; + uint64_t file_offset = 0; + int retval; + + if (addr == 0) { + return KERN_FAILURE; + } + + lck_mtx_lock(&vm_swap_data_lock); + + swf = vm_swapfile_for_handle(f_offset); + + if (swf) { + if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) { + + swf->swp_io_count++; + file_offset = (f_offset & SWAP_SLOT_MASK); + + lck_mtx_unlock(&vm_swap_data_lock); + + } else { + + lck_mtx_unlock(&vm_swap_data_lock); + return KERN_FAILURE; + } + } else { + + lck_mtx_unlock(&vm_swap_data_lock); + return KERN_FAILURE; + } + + retval = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int)(size / PAGE_SIZE_64), SWAP_READ); + + /* + * Free this slot in the swap structure. + */ + vm_swap_free(f_offset); + + lck_mtx_lock(&vm_swap_data_lock); + swf->swp_io_count--; + + if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) { + + swf->swp_flags &= ~SWAP_WANTED; + thread_wakeup((event_t) &swf->swp_flags); + } + if (retval == 0) + VM_STAT_INCR_BY(swapins, size >> PAGE_SHIFT); + lck_mtx_unlock(&vm_swap_data_lock); + + if (retval == 0) + return KERN_SUCCESS; + else { + vm_swap_get_failures++; + return KERN_FAILURE; + } +} + +kern_return_t +vm_swap_put(vm_offset_t addr, uint64_t *f_offset, uint64_t size, c_segment_t c_seg) +{ + unsigned int segidx = 0; + struct swapfile *swf = NULL; + uint64_t file_offset = 0; + uint64_t swapfile_index = 0; + unsigned int byte_for_segidx = 0; + unsigned int offset_within_byte = 0; + boolean_t swf_eligible = FALSE; + boolean_t waiting = FALSE; + int error = 0; + clock_sec_t sec; + clock_nsec_t nsec; + + if (addr == 0 || f_offset == NULL) { + return KERN_FAILURE; + } + + lck_mtx_lock(&vm_swap_data_lock); + + swf = (struct swapfile*) queue_first(&swf_global_queue); + + while(queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) { + + segidx = swf->swp_free_hint; + + swf_eligible = (swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse < swf->swp_nsegs); + + if (swf_eligible) { + + while(segidx < swf->swp_nsegs) { + + byte_for_segidx = segidx >> 3; + offset_within_byte = segidx % 8; + + if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) { + segidx++; + continue; + } + + (swf->swp_bitmap)[byte_for_segidx] |= (1 << offset_within_byte); + + file_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE; + swf->swp_nseginuse++; + swf->swp_io_count++; + swapfile_index = swf->swp_index; + + vm_swapfile_total_segs_used++; + + clock_get_system_nanotime(&sec, &nsec); + + if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_mgmt_thread_running) + thread_wakeup((event_t) &vm_swapfile_mgmt_needed); + + lck_mtx_unlock(&vm_swap_data_lock); + + goto done; + } + } + swf = (struct swapfile*) queue_next(&swf->swp_queue); + } + assert(queue_end(&swf_global_queue, (queue_entry_t) swf)); + + /* + * we've run out of swap segments, but may not + * be in a position to immediately create a new swap + * file if we've recently failed to create due to a lack + * of free space in the root filesystem... we'll try + * to kick that create off, but in any event we're going + * to take a breather (up to 1 second) so that we're not caught in a tight + * loop back in "vm_compressor_compact_and_swap" trying to stuff + * segments into swap files only to have them immediately put back + * on the c_age queue due to vm_swap_put failing. + * + * if we're doing these puts due to a hibernation flush, + * no need to block... setting hibernate_no_swapspace to TRUE, + * will cause "vm_compressor_compact_and_swap" to immediately abort + */ + clock_get_system_nanotime(&sec, &nsec); + + if (VM_SWAP_SHOULD_CREATE(sec) && !vm_swapfile_mgmt_thread_running) + thread_wakeup((event_t) &vm_swapfile_mgmt_needed); + + if (hibernate_flushing == FALSE || VM_SWAP_SHOULD_CREATE(sec)) { + waiting = TRUE; + assert_wait_timeout((event_t) &vm_num_swap_files, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC); + } else + hibernate_no_swapspace = TRUE; + + lck_mtx_unlock(&vm_swap_data_lock); + + if (waiting == TRUE) + thread_block(THREAD_CONTINUE_NULL); + + return KERN_FAILURE; + +done: + error = vm_swapfile_io(swf->swp_vp, file_offset, addr, (int) (size / PAGE_SIZE_64), SWAP_WRITE); + + lck_mtx_lock(&vm_swap_data_lock); + + swf->swp_csegs[segidx] = c_seg; + + swf->swp_io_count--; + + *f_offset = (swapfile_index << SWAP_DEVICE_SHIFT) | file_offset; + + if ((swf->swp_flags & SWAP_WANTED) && swf->swp_io_count == 0) { + + swf->swp_flags &= ~SWAP_WANTED; + thread_wakeup((event_t) &swf->swp_flags); + } + + lck_mtx_unlock(&vm_swap_data_lock); + +#if SANITY_CHECK_SWAP_ROUTINES + printf("Returned 0x%llx as offset\n", *f_offset); +#endif /* SANITY_CHECK_SWAP_ROUTINES */ + + if (error) { + vm_swap_free(*f_offset); + + return KERN_FAILURE; + } + return KERN_SUCCESS; +} + + + +static void +vm_swap_free_now(struct swapfile *swf, uint64_t f_offset) +{ + uint64_t file_offset = 0; + unsigned int segidx = 0; + + + if ((swf->swp_flags & SWAP_READY) || (swf->swp_flags & SWAP_RECLAIM)) { + + unsigned int byte_for_segidx = 0; + unsigned int offset_within_byte = 0; + + file_offset = (f_offset & SWAP_SLOT_MASK); + segidx = (unsigned int) (file_offset / COMPRESSED_SWAP_CHUNK_SIZE); + + byte_for_segidx = segidx >> 3; + offset_within_byte = segidx % 8; + + if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) { + + (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte); + + swf->swp_csegs[segidx] = NULL; + + swf->swp_nseginuse--; + vm_swapfile_total_segs_used--; + + if (segidx < swf->swp_free_hint) { + swf->swp_free_hint = segidx; + } + } + if (VM_SWAP_SHOULD_RECLAIM() && !vm_swapfile_mgmt_thread_running) + thread_wakeup((event_t) &vm_swapfile_mgmt_needed); + } + lck_mtx_unlock(&vm_swap_data_lock); +} + + +uint32_t vm_swap_free_now_count = 0; +uint32_t vm_swap_free_delayed_count = 0; + + +void +vm_swap_free(uint64_t f_offset) +{ + struct swapfile *swf = NULL; + struct trim_list *tl; + clock_sec_t sec; + clock_nsec_t nsec; + + lck_mtx_lock(&vm_swap_data_lock); + + swf = vm_swapfile_for_handle(f_offset); + + if (swf && (swf->swp_flags & (SWAP_READY | SWAP_RECLAIM))) { + + if (swf->swp_trim_supported == FALSE || (swf->swp_flags & SWAP_RECLAIM)) { + /* + * don't delay the free if the underlying disk doesn't support + * trim, or we're in the midst of reclaiming this swap file since + * we don't want to move segments that are technically free + * but not yet handled by the delayed free mechanism + */ + vm_swap_free_now(swf, f_offset); + + vm_swap_free_now_count++; + return; + } + tl = kalloc(sizeof(struct trim_list)); + + tl->tl_offset = f_offset & SWAP_SLOT_MASK; + tl->tl_length = COMPRESSED_SWAP_CHUNK_SIZE; + + tl->tl_next = swf->swp_delayed_trim_list_head; + swf->swp_delayed_trim_list_head = tl; + swf->swp_delayed_trim_count++; + + if (VM_SWAP_SHOULD_TRIM(swf) && !vm_swapfile_mgmt_thread_running) { + clock_get_system_nanotime(&sec, &nsec); + + if (sec > dont_trim_until_ts) + thread_wakeup((event_t) &vm_swapfile_mgmt_needed); + } + vm_swap_free_delayed_count++; + } + lck_mtx_unlock(&vm_swap_data_lock); +} + + +static void +vm_swap_handle_delayed_trims(boolean_t force_now) +{ + struct swapfile *swf = NULL; + + /* + * because swap files are created or reclaimed on the + * same thread that calls this function, it's safe + * to iterate "swf_global_queue" w/o holding + * the lock since those are the only 2 cases that can + * change the items on the "swf_global_queue" + */ + swf = (struct swapfile*) queue_first(&swf_global_queue); + + while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) { + + assert(!(swf->swp_flags & SWAP_RECLAIM)); + + if ((swf->swp_flags & SWAP_READY) && (force_now == TRUE || VM_SWAP_SHOULD_TRIM(swf))) + vm_swap_do_delayed_trim(swf); + + swf = (struct swapfile*) queue_next(&swf->swp_queue); + } +} + + +static void +vm_swap_do_delayed_trim(struct swapfile *swf) +{ + struct trim_list *tl, *tl_head; + + lck_mtx_lock(&vm_swap_data_lock); + + tl_head = swf->swp_delayed_trim_list_head; + swf->swp_delayed_trim_list_head = NULL; + swf->swp_delayed_trim_count = 0; + + lck_mtx_unlock(&vm_swap_data_lock); + + vnode_trim_list(swf->swp_vp, tl_head); + + while ((tl = tl_head) != NULL) { + unsigned int segidx = 0; + unsigned int byte_for_segidx = 0; + unsigned int offset_within_byte = 0; + + lck_mtx_lock(&vm_swap_data_lock); + + segidx = (unsigned int) (tl->tl_offset / COMPRESSED_SWAP_CHUNK_SIZE); + + byte_for_segidx = segidx >> 3; + offset_within_byte = segidx % 8; + + if ((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) { + + (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte); + + swf->swp_csegs[segidx] = NULL; + + swf->swp_nseginuse--; + vm_swapfile_total_segs_used--; + + if (segidx < swf->swp_free_hint) { + swf->swp_free_hint = segidx; + } + } + lck_mtx_unlock(&vm_swap_data_lock); + + tl_head = tl->tl_next; + + kfree(tl, sizeof(struct trim_list)); + } +} + + +void +vm_swap_flush() +{ + return; +} + +int vm_swap_reclaim_yielded = 0; + +void +vm_swap_reclaim(void) +{ + vm_offset_t addr = 0; + unsigned int segidx = 0; + uint64_t f_offset = 0; + struct swapfile *swf = NULL; + struct swapfile *smallest_swf = NULL; + unsigned int min_nsegs = 0; + unsigned int byte_for_segidx = 0; + unsigned int offset_within_byte = 0; + uint32_t c_size = 0; + + c_segment_t c_seg = NULL; + + if (kernel_memory_allocate(kernel_map, (vm_offset_t *)(&addr), C_SEG_BUFSIZE, 0, KMA_KOBJECT) != KERN_SUCCESS) { + panic("vm_swap_reclaim: kernel_memory_allocate failed\n"); + } + + lck_mtx_lock(&vm_swap_data_lock); + + swf = (struct swapfile*) queue_first(&swf_global_queue); + min_nsegs = MAX_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE; + smallest_swf = NULL; + + while (queue_end(&swf_global_queue, (queue_entry_t)swf) == FALSE) { + + if ((swf->swp_flags & SWAP_READY) && (swf->swp_nseginuse <= min_nsegs)) { + + smallest_swf = swf; + min_nsegs = swf->swp_nseginuse; + } + swf = (struct swapfile*) queue_next(&swf->swp_queue); + } + + if (smallest_swf == NULL) + goto done; + + swf = smallest_swf; + + + swf->swp_flags &= ~SWAP_READY; + swf->swp_flags |= SWAP_RECLAIM; + + if (swf->swp_delayed_trim_count) { + + lck_mtx_unlock(&vm_swap_data_lock); + + vm_swap_do_delayed_trim(swf); + + lck_mtx_lock(&vm_swap_data_lock); + } + segidx = 0; + + while (segidx < swf->swp_nsegs) { + +ReTry_for_cseg: + if (compressor_store_stop_compaction == TRUE || (swf->swp_trim_supported == FALSE && VM_SWAP_BUSY())) { + vm_swap_reclaim_yielded++; + break; + } + /* + * Wait for outgoing I/Os. + */ + while (swf->swp_io_count) { + + swf->swp_flags |= SWAP_WANTED; + + assert_wait((event_t) &swf->swp_flags, THREAD_UNINT); + lck_mtx_unlock(&vm_swap_data_lock); + + thread_block(THREAD_CONTINUE_NULL); + + lck_mtx_lock(&vm_swap_data_lock); + } + + byte_for_segidx = segidx >> 3; + offset_within_byte = segidx % 8; + + if (((swf->swp_bitmap)[byte_for_segidx] & (1 << offset_within_byte)) == 0) { + + segidx++; + continue; + } + + c_seg = swf->swp_csegs[segidx]; + + lck_mtx_lock_spin_always(&c_seg->c_lock); + + assert(c_seg->c_ondisk); + + if (c_seg->c_busy) { + + c_seg->c_wanted = 1; + + assert_wait((event_t) (c_seg), THREAD_UNINT); + lck_mtx_unlock_always(&c_seg->c_lock); + + lck_mtx_unlock(&vm_swap_data_lock); + + thread_block(THREAD_CONTINUE_NULL); + + lck_mtx_lock(&vm_swap_data_lock); + + goto ReTry_for_cseg; + } + (swf->swp_bitmap)[byte_for_segidx] &= ~(1 << offset_within_byte); + + f_offset = segidx * COMPRESSED_SWAP_CHUNK_SIZE; + + swf->swp_csegs[segidx] = NULL; + swf->swp_nseginuse--; + + vm_swapfile_total_segs_used--; + + lck_mtx_unlock(&vm_swap_data_lock); + + if (c_seg->c_must_free) { + + c_seg_free(c_seg); + } else { + + c_seg->c_busy = 1; + c_seg->c_busy_swapping = 1; +#if !CHECKSUM_THE_SWAP + c_seg_trim_tail(c_seg); +#endif + +#if SANITY_CHECK_SWAP_ROUTINES + + c_size = COMPRESSED_SWAP_CHUNK_SIZE; + +#else /* SANITY_CHECK_SWAP_ROUTINES */ + + c_size = round_page_32(C_SEG_OFFSET_TO_BYTES(c_seg->c_populated_offset)); + + assert(c_size <= C_SEG_BUFSIZE); + +#endif /* SANITY_CHECK_SWAP_ROUTINES */ + + lck_mtx_unlock_always(&c_seg->c_lock); + + if (vm_swapfile_io(swf->swp_vp, f_offset, addr, (int)(c_size / PAGE_SIZE_64), SWAP_READ)) { + + /* + * reading the data back in failed, so convert c_seg + * to a swapped in c_segment that contains no data + */ + c_seg->c_store.c_buffer = (int32_t *)NULL; + c_seg_swapin_requeue(c_seg); + + goto swap_io_failed; + } + VM_STAT_INCR_BY(swapins, c_size >> PAGE_SHIFT); + + if (vm_swap_put(addr, &f_offset, c_size, c_seg)) { + vm_offset_t c_buffer; + + /* + * the put failed, so convert c_seg to a fully swapped in c_segment + * with valid data + */ + if (kernel_memory_allocate(kernel_map, &c_buffer, C_SEG_ALLOCSIZE, 0, KMA_COMPRESSOR | KMA_VAONLY) != KERN_SUCCESS) + panic("vm_swap_reclaim: kernel_memory_allocate failed\n"); + kernel_memory_populate(kernel_map, c_buffer, c_size, KMA_COMPRESSOR); + + memcpy((char *)c_buffer, (char *)addr, c_size); + + c_seg->c_store.c_buffer = (int32_t *)c_buffer; +#if CRYPTO + vm_swap_decrypt(c_seg); +#endif /* CRYPTO */ + c_seg_swapin_requeue(c_seg); + + OSAddAtomic64(c_seg->c_bytes_used, &compressor_bytes_used); + + goto swap_io_failed; + } + VM_STAT_INCR_BY(swapouts, c_size >> PAGE_SHIFT); + + lck_mtx_lock_spin_always(&c_seg->c_lock); + + assert(c_seg->c_ondisk); + /* + * The c_seg will now know about the new location on disk. + */ + c_seg->c_store.c_swap_handle = f_offset; +swap_io_failed: + c_seg->c_busy_swapping = 0; + + if (c_seg->c_must_free) + c_seg_free(c_seg); + else { + C_SEG_WAKEUP_DONE(c_seg); + + lck_mtx_unlock_always(&c_seg->c_lock); + } + } + lck_mtx_lock(&vm_swap_data_lock); + } + + if (swf->swp_nseginuse) { + + swf->swp_flags &= ~SWAP_RECLAIM; + swf->swp_flags |= SWAP_READY; + + goto done; + } + /* + * We don't remove this inactive swf from the queue. + * That way, we can re-use it when needed again and + * preserve the namespace. + */ + //queue_remove(&swf_global_queue, swf, struct swapfile*, swp_queue); + + vm_num_swap_files--; + + vm_swapfile_total_segs_alloced -= swf->swp_nsegs; + + lck_mtx_unlock(&vm_swap_data_lock); + + vm_swapfile_close((uint64_t)(swf->swp_path), swf->swp_vp); + + kfree(swf->swp_csegs, swf->swp_nsegs * sizeof(c_segment_t)); + kfree(swf->swp_bitmap, MAX((swf->swp_nsegs >> 3), 1)); + + lck_mtx_lock(&vm_swap_data_lock); + + swf->swp_vp = NULL; + swf->swp_size = 0; + swf->swp_free_hint = 0; + swf->swp_nsegs = 0; + swf->swp_flags = SWAP_REUSE; + + thread_wakeup((event_t) &swf->swp_flags); +done: + lck_mtx_unlock(&vm_swap_data_lock); + + kmem_free(kernel_map, (vm_offset_t) addr, C_SEG_BUFSIZE); +} + + +uint64_t +vm_swap_get_total_space(void) +{ + uint64_t total_space = 0; + + total_space = (uint64_t)vm_swapfile_total_segs_alloced * COMPRESSED_SWAP_CHUNK_SIZE; + + return total_space; +} + +uint64_t +vm_swap_get_used_space(void) +{ + uint64_t used_space = 0; + + used_space = (uint64_t)vm_swapfile_total_segs_used * COMPRESSED_SWAP_CHUNK_SIZE; + + return used_space; +} + +uint64_t +vm_swap_get_free_space(void) +{ + return (vm_swap_get_total_space() - vm_swap_get_used_space()); +} diff --git a/osfmk/vm/vm_compressor_backing_store.h b/osfmk/vm/vm_compressor_backing_store.h new file mode 100644 index 000000000..6cced5b2f --- /dev/null +++ b/osfmk/vm/vm_compressor_backing_store.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define SANITY_CHECK_SWAP_ROUTINES 0 + +#if SANITY_CHECK_SWAP_ROUTINES + +#define MIN_SWAP_FILE_SIZE (4 * 1024) + +#define MAX_SWAP_FILE_SIZE (4 * 1024) + +#define COMPRESSED_SWAP_CHUNK_SIZE (4 * 1024) + +#define VM_SWAPFILE_HIWATER_SEGS (MIN_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE) + +#define SWAPFILE_RECLAIM_THRESHOLD_SEGS (MIN_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE) + +#else /* SANITY_CHECK_SWAP_ROUTINES */ + + +#define MIN_SWAP_FILE_SIZE (256 * 1024 * 1024) + +#define MAX_SWAP_FILE_SIZE (1 * 1024 * 1024 * 1024) + + +#define COMPRESSED_SWAP_CHUNK_SIZE (C_SEG_BUFSIZE) + +#define VM_SWAPFILE_HIWATER_SEGS (MIN_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE) + +#define SWAPFILE_RECLAIM_THRESHOLD_SEGS ((15 * (MAX_SWAP_FILE_SIZE / COMPRESSED_SWAP_CHUNK_SIZE)) / 10) + +#endif /* SANITY_CHECK_SWAP_ROUTINES */ + +#define SWAP_FILE_NAME "/var/vm/swapfile" +#define SWAPFILENAME_LEN (int)(strlen(SWAP_FILE_NAME)) +#define SWAPFILENAME_INDEX_LEN 2 /* Doesn't include the terminating NULL character */ + +#define SWAP_SLOT_MASK 0x1FFFFFFFF +#define SWAP_DEVICE_SHIFT 33 + +extern int vm_num_swap_files; +extern boolean_t vm_swap_up; + +struct swapfile; +lck_grp_attr_t vm_swap_data_lock_grp_attr; +lck_grp_t vm_swap_data_lock_grp; +lck_attr_t vm_swap_data_lock_attr; +lck_mtx_ext_t vm_swap_data_lock_ext; +lck_mtx_t vm_swap_data_lock; + +void vm_swap_init(void); +boolean_t vm_swap_create_file(void); +kern_return_t vm_swap_put(vm_offset_t, uint64_t*, uint64_t, c_segment_t); +void vm_swap_flush(void); +void vm_swap_reclaim(void); +void vm_swap_encrypt(c_segment_t); +uint64_t vm_swap_get_total_space(void); +uint64_t vm_swap_get_used_space(void); +uint64_t vm_swap_get_free_space(void); + +struct vnode; +extern void vm_swapfile_open(const char *path, struct vnode **vp); +extern void vm_swapfile_close(uint64_t path, struct vnode *vp); +extern int vm_swapfile_preallocate(struct vnode *vp, uint64_t *size); +extern uint64_t vm_swapfile_get_blksize(struct vnode *vp); +extern uint64_t vm_swapfile_get_transfer_size(struct vnode *vp); +extern int vm_swapfile_io(struct vnode *vp, uint64_t offset, uint64_t start, int npages, int flags); + + diff --git a/osfmk/vm/vm_compressor_pager.c b/osfmk/vm/vm_compressor_pager.c new file mode 100644 index 000000000..45f3dbeb7 --- /dev/null +++ b/osfmk/vm/vm_compressor_pager.c @@ -0,0 +1,781 @@ +/* + * Copyright (c) 2013 Apple Computer, Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +/* + * Compressor Pager. + * Memory Object Management. + */ + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* memory_object interfaces */ +void compressor_memory_object_reference(memory_object_t mem_obj); +void compressor_memory_object_deallocate(memory_object_t mem_obj); +kern_return_t compressor_memory_object_init( + memory_object_t mem_obj, + memory_object_control_t control, + memory_object_cluster_size_t pager_page_size); +kern_return_t compressor_memory_object_terminate(memory_object_t mem_obj); +kern_return_t compressor_memory_object_data_request( + memory_object_t mem_obj, + memory_object_offset_t offset, + memory_object_cluster_size_t length, + __unused vm_prot_t protection_required, + memory_object_fault_info_t fault_info); +kern_return_t compressor_memory_object_data_return( + memory_object_t mem_obj, + memory_object_offset_t offset, + memory_object_cluster_size_t size, + __unused memory_object_offset_t *resid_offset, + __unused int *io_error, + __unused boolean_t dirty, + __unused boolean_t kernel_copy, + __unused int upl_flags); +kern_return_t compressor_memory_object_data_initialize( + memory_object_t mem_obj, + memory_object_offset_t offset, + memory_object_cluster_size_t size); +kern_return_t compressor_memory_object_data_unlock( + __unused memory_object_t mem_obj, + __unused memory_object_offset_t offset, + __unused memory_object_size_t size, + __unused vm_prot_t desired_access); +kern_return_t compressor_memory_object_synchronize( + memory_object_t mem_obj, + memory_object_offset_t offset, + memory_object_size_t length, + __unused vm_sync_t flags); +kern_return_t compressor_memory_object_map( + __unused memory_object_t mem_obj, + __unused vm_prot_t prot); +kern_return_t compressor_memory_object_last_unmap(memory_object_t mem_obj); +kern_return_t compressor_memory_object_data_reclaim( + __unused memory_object_t mem_obj, + __unused boolean_t reclaim_backing_store); + +const struct memory_object_pager_ops compressor_pager_ops = { + compressor_memory_object_reference, + compressor_memory_object_deallocate, + compressor_memory_object_init, + compressor_memory_object_terminate, + compressor_memory_object_data_request, + compressor_memory_object_data_return, + compressor_memory_object_data_initialize, + compressor_memory_object_data_unlock, + compressor_memory_object_synchronize, + compressor_memory_object_map, + compressor_memory_object_last_unmap, + compressor_memory_object_data_reclaim, + "compressor pager" +}; + +/* internal data structures */ + +struct { + uint64_t data_returns; + uint64_t data_requests; + uint64_t state_clr; + uint64_t state_get; +} compressor_pager_stats; + +typedef int compressor_slot_t; + +typedef struct compressor_pager { + struct ipc_object_header cpgr_pager_header; /* fake ip_kotype */ + memory_object_pager_ops_t cpgr_pager_ops; /* == &compressor_pager_ops */ + memory_object_control_t cpgr_control; + lck_mtx_t cpgr_lock; + + unsigned int cpgr_references; + unsigned int cpgr_num_slots; + union { + compressor_slot_t *cpgr_dslots; + compressor_slot_t **cpgr_islots; + } cpgr_slots; +} *compressor_pager_t; + +#define compressor_pager_lookup(_mem_obj_, _cpgr_) \ + MACRO_BEGIN \ + if (_mem_obj_ == NULL || \ + _mem_obj_->mo_pager_ops != &compressor_pager_ops) { \ + _cpgr_ = NULL; \ + } else { \ + _cpgr_ = (compressor_pager_t) _mem_obj_; \ + } \ + MACRO_END + +zone_t compressor_pager_zone; + +lck_grp_t compressor_pager_lck_grp; +lck_grp_attr_t compressor_pager_lck_grp_attr; +lck_attr_t compressor_pager_lck_attr; + +#define compressor_pager_lock(_cpgr_) \ + lck_mtx_lock(&(_cpgr_)->cpgr_lock) +#define compressor_pager_unlock(_cpgr_) \ + lck_mtx_unlock(&(_cpgr_)->cpgr_lock) +#define compressor_pager_lock_init(_cpgr_) \ + lck_mtx_init(&(_cpgr_)->cpgr_lock, &compressor_pager_lck_grp, &compressor_pager_lck_attr) +#define compressor_pager_lock_destroy(_cpgr_) \ + lck_mtx_destroy(&(_cpgr_)->cpgr_lock, &compressor_pager_lck_grp) + +#define COMPRESSOR_SLOTS_CHUNK_SIZE (512) +#define COMPRESSOR_SLOTS_PER_CHUNK (COMPRESSOR_SLOTS_CHUNK_SIZE / sizeof (compressor_slot_t)) + +/* forward declarations */ +void compressor_pager_slots_chunk_free(compressor_slot_t *chunk, int num_slots); +void compressor_pager_slot_lookup( + compressor_pager_t pager, + boolean_t do_alloc, + uint32_t offset, + compressor_slot_t **slot_pp); + +kern_return_t +compressor_memory_object_init( + memory_object_t mem_obj, + memory_object_control_t control, + __unused memory_object_cluster_size_t pager_page_size) +{ + compressor_pager_t pager; + + assert(pager_page_size == PAGE_SIZE); + + memory_object_control_reference(control); + + compressor_pager_lookup(mem_obj, pager); + compressor_pager_lock(pager); + + if (pager->cpgr_control != MEMORY_OBJECT_CONTROL_NULL) + panic("compressor_memory_object_init: bad request"); + pager->cpgr_control = control; + + compressor_pager_unlock(pager); + + return KERN_SUCCESS; +} + +kern_return_t +compressor_memory_object_synchronize( + memory_object_t mem_obj, + memory_object_offset_t offset, + memory_object_size_t length, + __unused vm_sync_t flags) +{ + compressor_pager_t pager; + + compressor_pager_lookup(mem_obj, pager); + + memory_object_synchronize_completed(pager->cpgr_control, offset, length); + + return KERN_SUCCESS; +} + +kern_return_t +compressor_memory_object_map( + __unused memory_object_t mem_obj, + __unused vm_prot_t prot) +{ + panic("compressor_memory_object_map"); + return KERN_FAILURE; +} + +kern_return_t +compressor_memory_object_last_unmap( + __unused memory_object_t mem_obj) +{ + panic("compressor_memory_object_last_unmap"); + return KERN_FAILURE; +} + +kern_return_t +compressor_memory_object_data_reclaim( + __unused memory_object_t mem_obj, + __unused boolean_t reclaim_backing_store) +{ + panic("compressor_memory_object_data_reclaim"); + return KERN_FAILURE; +} + +kern_return_t +compressor_memory_object_terminate( + memory_object_t mem_obj) +{ + memory_object_control_t control; + compressor_pager_t pager; + + /* + * control port is a receive right, not a send right. + */ + + compressor_pager_lookup(mem_obj, pager); + compressor_pager_lock(pager); + + /* + * After memory_object_terminate both memory_object_init + * and a no-senders notification are possible, so we need + * to clean up our reference to the memory_object_control + * to prepare for a new init. + */ + + control = pager->cpgr_control; + pager->cpgr_control = MEMORY_OBJECT_CONTROL_NULL; + + compressor_pager_unlock(pager); + + /* + * Now we deallocate our reference on the control. + */ + memory_object_control_deallocate(control); + return KERN_SUCCESS; +} + +void +compressor_memory_object_reference( + memory_object_t mem_obj) +{ + compressor_pager_t pager; + + compressor_pager_lookup(mem_obj, pager); + if (pager == NULL) + return; + + compressor_pager_lock(pager); + assert(pager->cpgr_references > 0); + pager->cpgr_references++; + compressor_pager_unlock(pager); +} + +void +compressor_memory_object_deallocate( + memory_object_t mem_obj) +{ + compressor_pager_t pager; + + /* + * Because we don't give out multiple first references + * for a memory object, there can't be a race + * between getting a deallocate call and creating + * a new reference for the object. + */ + + compressor_pager_lookup(mem_obj, pager); + if (pager == NULL) + return; + + compressor_pager_lock(pager); + if (--pager->cpgr_references > 0) { + compressor_pager_unlock(pager); + return; + } + + /* + * We shouldn't get a deallocation call + * when the kernel has the object cached. + */ + if (pager->cpgr_control != MEMORY_OBJECT_CONTROL_NULL) + panic("compressor_memory_object_deallocate(): bad request"); + + /* + * Unlock the pager (though there should be no one + * waiting for it). + */ + compressor_pager_unlock(pager); + + /* free the compressor slots */ + int num_chunks; + int i; + compressor_slot_t *chunk; + + num_chunks = (pager->cpgr_num_slots + COMPRESSOR_SLOTS_PER_CHUNK -1) / COMPRESSOR_SLOTS_PER_CHUNK; + if (num_chunks > 1) { + /* we have an array of chunks */ + for (i = 0; i < num_chunks; i++) { + chunk = pager->cpgr_slots.cpgr_islots[i]; + if (chunk != NULL) { + compressor_pager_slots_chunk_free( + chunk, + COMPRESSOR_SLOTS_PER_CHUNK); + pager->cpgr_slots.cpgr_islots[i] = NULL; + kfree(chunk, COMPRESSOR_SLOTS_CHUNK_SIZE); + } + } + kfree(pager->cpgr_slots.cpgr_islots, + num_chunks * sizeof (pager->cpgr_slots.cpgr_islots[0])); + pager->cpgr_slots.cpgr_islots = NULL; + } else { + chunk = pager->cpgr_slots.cpgr_dslots; + compressor_pager_slots_chunk_free( + chunk, + pager->cpgr_num_slots); + pager->cpgr_slots.cpgr_dslots = NULL; + kfree(chunk, + (pager->cpgr_num_slots * + sizeof (pager->cpgr_slots.cpgr_dslots[0]))); + } + + compressor_pager_lock_destroy(pager); + zfree(compressor_pager_zone, pager); +} + +kern_return_t +compressor_memory_object_data_request( + memory_object_t mem_obj, + memory_object_offset_t offset, + memory_object_cluster_size_t length, + __unused vm_prot_t protection_required, + __unused memory_object_fault_info_t fault_info) +{ + compressor_pager_t pager; + kern_return_t kr; + compressor_slot_t *slot_p; + + compressor_pager_stats.data_requests++; + + /* + * Request must be on a page boundary and a multiple of pages. + */ + if ((offset & PAGE_MASK) != 0 || (length & PAGE_MASK) != 0) + panic("compressor_memory_object_data_request(): bad alignment"); + + assert((uint32_t) offset == offset); + + compressor_pager_lookup(mem_obj, pager); + + if (length == 0) { + /* we're only querying the pager for this page */ + } else { + panic("compressor: data_request"); + } + + /* find the compressor slot for that page */ + compressor_pager_slot_lookup(pager, FALSE, (uint32_t) offset, &slot_p); + + if (offset / PAGE_SIZE > pager->cpgr_num_slots) { + /* out of range */ + kr = KERN_FAILURE; + } else if (slot_p == NULL || *slot_p == 0) { + /* compressor does not have this page */ + kr = KERN_FAILURE; + } else { + /* compressor does have this page */ + kr = KERN_SUCCESS; + } + return kr; +} + +/* + * memory_object_data_initialize: check whether we already have each page, and + * write it if we do not. The implementation is far from optimized, and + * also assumes that the default_pager is single-threaded. + */ +/* It is questionable whether or not a pager should decide what is relevant */ +/* and what is not in data sent from the kernel. Data initialize has been */ +/* changed to copy back all data sent to it in preparation for its eventual */ +/* merge with data return. It is the kernel that should decide what pages */ +/* to write back. As of the writing of this note, this is indeed the case */ +/* the kernel writes back one page at a time through this interface */ + +kern_return_t +compressor_memory_object_data_initialize( + memory_object_t mem_obj, + memory_object_offset_t offset, + memory_object_cluster_size_t size) +{ + compressor_pager_t pager; + memory_object_offset_t cur_offset; + + compressor_pager_lookup(mem_obj, pager); + compressor_pager_lock(pager); + + for (cur_offset = offset; + cur_offset < offset + size; + cur_offset += PAGE_SIZE) { + panic("do a data_return() if slot for this page is empty"); + } + + compressor_pager_unlock(pager); + + return KERN_SUCCESS; +} + +kern_return_t +compressor_memory_object_data_unlock( + __unused memory_object_t mem_obj, + __unused memory_object_offset_t offset, + __unused memory_object_size_t size, + __unused vm_prot_t desired_access) +{ + panic("compressor_memory_object_data_unlock()"); + return KERN_FAILURE; +} + + +/*ARGSUSED*/ +kern_return_t +compressor_memory_object_data_return( + __unused memory_object_t mem_obj, + __unused memory_object_offset_t offset, + __unused memory_object_cluster_size_t size, + __unused memory_object_offset_t *resid_offset, + __unused int *io_error, + __unused boolean_t dirty, + __unused boolean_t kernel_copy, + __unused int upl_flags) +{ + panic("compressor: data_return"); + return KERN_FAILURE; +} + +/* + * Routine: default_pager_memory_object_create + * Purpose: + * Handle requests for memory objects from the + * kernel. + * Notes: + * Because we only give out the default memory + * manager port to the kernel, we don't have to + * be so paranoid about the contents. + */ +kern_return_t +compressor_memory_object_create( + vm_size_t new_size, + memory_object_t *new_mem_obj) +{ + compressor_pager_t pager; + int num_chunks; + + if ((uint32_t) new_size != new_size) { + /* 32-bit overflow */ + return KERN_INVALID_ARGUMENT; + } + + pager = (compressor_pager_t) zalloc(compressor_pager_zone); + if (pager == NULL) { + return KERN_RESOURCE_SHORTAGE; + } + + compressor_pager_lock_init(pager); + pager->cpgr_control = MEMORY_OBJECT_CONTROL_NULL; + pager->cpgr_references = 1; + pager->cpgr_num_slots = (uint32_t) (new_size / PAGE_SIZE); + + num_chunks = (pager->cpgr_num_slots + COMPRESSOR_SLOTS_PER_CHUNK - 1) / COMPRESSOR_SLOTS_PER_CHUNK; + if (num_chunks > 1) { + pager->cpgr_slots.cpgr_islots = kalloc(num_chunks * sizeof (pager->cpgr_slots.cpgr_islots[0])); + bzero(pager->cpgr_slots.cpgr_islots, num_chunks * sizeof (pager->cpgr_slots.cpgr_islots[0])); + } else { + pager->cpgr_slots.cpgr_dslots = kalloc(pager->cpgr_num_slots * sizeof (pager->cpgr_slots.cpgr_dslots[0])); + bzero(pager->cpgr_slots.cpgr_dslots, pager->cpgr_num_slots * sizeof (pager->cpgr_slots.cpgr_dslots[0])); + } + + /* + * Set up associations between this memory object + * and this compressor_pager structure + */ + + pager->cpgr_pager_ops = &compressor_pager_ops; + pager->cpgr_pager_header.io_bits = IKOT_MEMORY_OBJECT; + + *new_mem_obj = (memory_object_t) pager; + return KERN_SUCCESS; +} + + +void +compressor_pager_slots_chunk_free( + compressor_slot_t *chunk, + int num_slots) +{ +#if 00 + vm_compressor_free(chunk, num_slots); +#else + int i; + for (i = 0; i < num_slots; i++) { + if (chunk[i] != 0) { + vm_compressor_free(&chunk[i]); + } + } +#endif +} + +void +compressor_pager_slot_lookup( + compressor_pager_t pager, + boolean_t do_alloc, + uint32_t offset, + compressor_slot_t **slot_pp) +{ + int num_chunks; + uint32_t page_num; + int chunk_idx; + int slot_idx; + compressor_slot_t *chunk; + compressor_slot_t *t_chunk; + + page_num = offset / PAGE_SIZE; + if (page_num > pager->cpgr_num_slots) { + /* out of range */ + *slot_pp = NULL; + return; + } + num_chunks = (pager->cpgr_num_slots + COMPRESSOR_SLOTS_PER_CHUNK - 1) / COMPRESSOR_SLOTS_PER_CHUNK; + if (num_chunks > 1) { + /* we have an array of chunks */ + chunk_idx = page_num / COMPRESSOR_SLOTS_PER_CHUNK; + chunk = pager->cpgr_slots.cpgr_islots[chunk_idx]; + + if (chunk == NULL && do_alloc) { + t_chunk = kalloc(COMPRESSOR_SLOTS_CHUNK_SIZE); + bzero(t_chunk, COMPRESSOR_SLOTS_CHUNK_SIZE); + + compressor_pager_lock(pager); + + if ((chunk = pager->cpgr_slots.cpgr_islots[chunk_idx]) == NULL) { + chunk = pager->cpgr_slots.cpgr_islots[chunk_idx] = t_chunk; + t_chunk = NULL; + } + compressor_pager_unlock(pager); + + if (t_chunk) + kfree(t_chunk, COMPRESSOR_SLOTS_CHUNK_SIZE); + } + if (chunk == NULL) { + *slot_pp = NULL; + } else { + slot_idx = page_num % COMPRESSOR_SLOTS_PER_CHUNK; + *slot_pp = &chunk[slot_idx]; + } + } else { + slot_idx = page_num; + *slot_pp = &pager->cpgr_slots.cpgr_dslots[slot_idx]; + } +} + +void +vm_compressor_pager_init(void) +{ + lck_grp_attr_setdefault(&compressor_pager_lck_grp_attr); + lck_grp_init(&compressor_pager_lck_grp, "compressor_pager", &compressor_pager_lck_grp_attr); + lck_attr_setdefault(&compressor_pager_lck_attr); + + compressor_pager_zone = zinit(sizeof (struct compressor_pager), + 10000 * sizeof (struct compressor_pager), + 8192, "compressor_pager"); + zone_change(compressor_pager_zone, Z_CALLERACCT, FALSE); + zone_change(compressor_pager_zone, Z_NOENCRYPT, TRUE); + + vm_compressor_init(); +} + +kern_return_t +vm_compressor_pager_put( + memory_object_t mem_obj, + memory_object_offset_t offset, + ppnum_t ppnum, + void **current_chead, + char *scratch_buf) +{ + compressor_pager_t pager; + compressor_slot_t *slot_p; + + compressor_pager_stats.data_returns++; + + /* This routine is called by the pageout thread. The pageout thread */ + /* cannot be blocked by read activities unless the read activities */ + /* Therefore the grant of vs lock must be done on a try versus a */ + /* blocking basis. The code below relies on the fact that the */ + /* interface is synchronous. Should this interface be again async */ + /* for some type of pager in the future the pages will have to be */ + /* returned through a separate, asynchronous path. */ + + compressor_pager_lookup(mem_obj, pager); + + assert((upl_offset_t) offset == offset); + + compressor_pager_slot_lookup(pager, TRUE, (uint32_t) offset, &slot_p); + + if (slot_p == NULL) { + /* out of range ? */ + panic("compressor_pager_put: out of range"); + } + if (*slot_p != 0) { + /* + * Already compressed: forget about the old one. + * + * This can happen after a vm_object_do_collapse() when + * the "backing_object" had some pages paged out and the + * "object" had an equivalent page resident. + */ + vm_compressor_free(slot_p); + } + if (vm_compressor_put(ppnum, slot_p, current_chead, scratch_buf)) + return (KERN_RESOURCE_SHORTAGE); + + return (KERN_SUCCESS); +} + + +kern_return_t +vm_compressor_pager_get( + memory_object_t mem_obj, + memory_object_offset_t offset, + ppnum_t ppnum, + int *my_fault_type, + int flags) +{ + compressor_pager_t pager; + kern_return_t kr; + compressor_slot_t *slot_p; + + compressor_pager_stats.data_requests++; + + assert((uint32_t) offset == offset); + + compressor_pager_lookup(mem_obj, pager); + + /* find the compressor slot for that page */ + compressor_pager_slot_lookup(pager, FALSE, (uint32_t) offset, &slot_p); + + if (offset / PAGE_SIZE > pager->cpgr_num_slots) { + /* out of range */ + kr = KERN_MEMORY_FAILURE; + } else if (slot_p == NULL || *slot_p == 0) { + /* compressor does not have this page */ + kr = KERN_MEMORY_ERROR; + } else { + /* compressor does have this page */ + kr = KERN_SUCCESS; + } + *my_fault_type = DBG_COMPRESSOR_FAULT; + + if (kr == KERN_SUCCESS) { + int retval; + + /* get the page from the compressor */ + if ((retval = vm_compressor_get(ppnum, slot_p, flags)) == -1) + kr = KERN_MEMORY_FAILURE; + else if (retval == 1) + *my_fault_type = DBG_COMPRESSOR_SWAPIN_FAULT; + else if (retval == -2) { + assert((flags & C_DONT_BLOCK)); + kr = KERN_FAILURE; + } + } + return kr; +} + +void +vm_compressor_pager_state_clr( + memory_object_t mem_obj, + memory_object_offset_t offset) +{ + compressor_pager_t pager; + compressor_slot_t *slot_p; + + compressor_pager_stats.state_clr++; + + assert((uint32_t) offset == offset); + + compressor_pager_lookup(mem_obj, pager); + + /* find the compressor slot for that page */ + compressor_pager_slot_lookup(pager, FALSE, (uint32_t) offset, &slot_p); + + if (slot_p && *slot_p != 0) { + vm_compressor_free(slot_p); + } +} + +vm_external_state_t +vm_compressor_pager_state_get( + memory_object_t mem_obj, + memory_object_offset_t offset) +{ + compressor_pager_t pager; + compressor_slot_t *slot_p; + + compressor_pager_stats.state_get++; + + assert((uint32_t) offset == offset); + + compressor_pager_lookup(mem_obj, pager); + + /* find the compressor slot for that page */ + compressor_pager_slot_lookup(pager, FALSE, (uint32_t) offset, &slot_p); + + if (offset / PAGE_SIZE > pager->cpgr_num_slots) { + /* out of range */ + return VM_EXTERNAL_STATE_ABSENT; + } else if (slot_p == NULL || *slot_p == 0) { + /* compressor does not have this page */ + return VM_EXTERNAL_STATE_ABSENT; + } else { + /* compressor does have this page */ + return VM_EXTERNAL_STATE_EXISTS; + } +} diff --git a/osfmk/vm/vm_compressor_pager.h b/osfmk/vm/vm_compressor_pager.h new file mode 100644 index 000000000..be9035986 --- /dev/null +++ b/osfmk/vm/vm_compressor_pager.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +#ifdef XNU_KERNEL_PRIVATE + +#ifndef _VM_VM_COMPRESSOR_PAGER_H_ +#define _VM_VM_COMPRESSOR_PAGER_H_ + +#include +#include +#include + +extern kern_return_t vm_compressor_pager_put( + memory_object_t mem_obj, + memory_object_offset_t offset, + ppnum_t ppnum, + void **current_chead, + char *scratch_buf); +extern kern_return_t vm_compressor_pager_get( + memory_object_t mem_obj, + memory_object_offset_t offset, + ppnum_t ppnum, + int *my_fault_type, + int flags); + +#define C_DONT_BLOCK 0x01 +#define C_KEEP 0x02 + +extern void vm_compressor_pager_state_clr( + memory_object_t mem_obj, + memory_object_offset_t offset); +extern vm_external_state_t vm_compressor_pager_state_get( + memory_object_t mem_obj, + memory_object_offset_t offset); + +#define VM_COMPRESSOR_PAGER_STATE_GET(object, offset) \ + (((COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) && \ + (object)->internal && \ + (object)->pager != NULL && \ + !(object)->terminating && \ + (object)->alive) \ + ? vm_compressor_pager_state_get((object)->pager, \ + (offset) + (object)->paging_offset) \ + : VM_EXTERNAL_STATE_UNKNOWN) + +#define VM_COMPRESSOR_PAGER_STATE_CLR(object, offset) \ + MACRO_BEGIN \ + if ((COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) && \ + (object)->internal && \ + (object)->pager != NULL && \ + !(object)->terminating && \ + (object)->alive) { \ + vm_compressor_pager_state_clr( \ + (object)->pager, \ + (offset) + (object)->paging_offset); \ + } \ + MACRO_END + +extern void vm_compressor_init(void); +extern int vm_compressor_put(ppnum_t pn, int *slot, void **current_chead, char *scratch_buf); +extern int vm_compressor_get(ppnum_t pn, int *slot, int flags); +extern void vm_compressor_free(int *slot); + +#endif /* _VM_VM_COMPRESSOR_PAGER_H_ */ + +#endif /* XNU_KERNEL_PRIVATE */ diff --git a/osfmk/vm/vm_debug.c b/osfmk/vm/vm_debug.c index 1dfa947ef..317a36420 100644 --- a/osfmk/vm/vm_debug.c +++ b/osfmk/vm/vm_debug.c @@ -267,15 +267,21 @@ vm32_region_info( if (size != 0) kmem_free(ipc_kernel_map, addr, size); - size = round_page(2 * used * sizeof(vm_info_object_t)); + size = vm_map_round_page(2 * used * sizeof(vm_info_object_t), + VM_MAP_PAGE_MASK(ipc_kernel_map)); kr = vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE); if (kr != KERN_SUCCESS) return KERN_RESOURCE_SHORTAGE; - kr = vm_map_wire(ipc_kernel_map, vm_map_trunc_page(addr), - vm_map_round_page(addr + size), - VM_PROT_READ|VM_PROT_WRITE, FALSE); + kr = vm_map_wire( + ipc_kernel_map, + vm_map_trunc_page(addr, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + vm_map_round_page(addr + size, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + VM_PROT_READ|VM_PROT_WRITE, + FALSE); assert(kr == KERN_SUCCESS); } @@ -288,10 +294,16 @@ vm32_region_info( kmem_free(ipc_kernel_map, addr, size); } else { vm_size_t size_used = - round_page(used * sizeof(vm_info_object_t)); - - kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr), - vm_map_round_page(addr + size_used), FALSE); + vm_map_round_page(used * sizeof(vm_info_object_t), + VM_MAP_PAGE_MASK(ipc_kernel_map)); + + kr = vm_map_unwire( + ipc_kernel_map, + vm_map_trunc_page(addr, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + vm_map_round_page(addr + size_used, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + FALSE); assert(kr == KERN_SUCCESS); kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, @@ -469,15 +481,21 @@ vm32_region_info_64( if (size != 0) kmem_free(ipc_kernel_map, addr, size); - size = round_page(2 * used * sizeof(vm_info_object_t)); + size = vm_map_round_page(2 * used * sizeof(vm_info_object_t), + VM_MAP_PAGE_MASK(ipc_kernel_map)); kr = vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE); if (kr != KERN_SUCCESS) return KERN_RESOURCE_SHORTAGE; - kr = vm_map_wire(ipc_kernel_map, vm_map_trunc_page(addr), - vm_map_round_page(addr + size), - VM_PROT_READ|VM_PROT_WRITE, FALSE); + kr = vm_map_wire( + ipc_kernel_map, + vm_map_trunc_page(addr, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + vm_map_round_page(addr + size, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + VM_PROT_READ|VM_PROT_WRITE, + FALSE); assert(kr == KERN_SUCCESS); } @@ -490,10 +508,16 @@ vm32_region_info_64( kmem_free(ipc_kernel_map, addr, size); } else { vm_size_t size_used = - round_page(used * sizeof(vm_info_object_t)); - - kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr), - vm_map_round_page(addr + size_used), FALSE); + vm_map_round_page(used * sizeof(vm_info_object_t), + VM_MAP_PAGE_MASK(ipc_kernel_map)); + + kr = vm_map_unwire( + ipc_kernel_map, + vm_map_trunc_page(addr, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + vm_map_round_page(addr + size_used, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + FALSE); assert(kr == KERN_SUCCESS); kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, @@ -534,12 +558,18 @@ vm32_mapped_pages_info( pmap = map->pmap; size = pmap_resident_count(pmap) * sizeof(vm_offset_t); - size = round_page(size); + size = vm_map_round_page(size, + VM_MAP_PAGE_MASK(ipc_kernel_map)); for (;;) { (void) vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE); - (void) vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr), - vm_map_round_page(addr + size), FALSE); + (void) vm_map_unwire( + ipc_kernel_map, + vm_map_trunc_page(addr, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + vm_map_round_page(addr + size, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + FALSE); list = (page_address_array_t) addr; space = (unsigned int) (size / sizeof(vm_offset_t)); @@ -558,7 +588,8 @@ vm32_mapped_pages_info( /* * Try again, doubling the size */ - size = round_page(actual * sizeof(vm_offset_t)); + size = vm_map_round_page(actual * sizeof(vm_offset_t), + VM_MAP_PAGE_MASK(ipc_kernel_map)); } if (actual == 0) { *pages = 0; @@ -567,10 +598,16 @@ vm32_mapped_pages_info( } else { *pages_count = actual; - size_used = round_page(actual * sizeof(vm_offset_t)); - (void) vm_map_wire(ipc_kernel_map, vm_map_trunc_page(addr), - vm_map_round_page(addr + size), - VM_PROT_READ|VM_PROT_WRITE, FALSE); + size_used = vm_map_round_page(actual * sizeof(vm_offset_t), + VM_MAP_PAGE_MASK(ipc_kernel_map)); + (void) vm_map_wire( + ipc_kernel_map, + vm_map_trunc_page(addr, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + vm_map_round_page(addr + size, + VM_MAP_PAGE_MASK(ipc_kernel_map)), + VM_PROT_READ|VM_PROT_WRITE, + FALSE); (void) vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, (vm_map_size_t)size_used, @@ -634,7 +671,8 @@ host_virtual_physical_table_info( if (info != *infop) kmem_free(ipc_kernel_map, addr, size); - size = round_page(actual * sizeof *info); + size = vm_map_round_page(actual * sizeof *info, + VM_MAP_PAGE_MASK(ipc_kernel_map)); kr = kmem_alloc_pageable(ipc_kernel_map, &addr, size); if (kr != KERN_SUCCESS) return KERN_RESOURCE_SHORTAGE; @@ -655,7 +693,8 @@ host_virtual_physical_table_info( vm_map_copy_t copy; vm_size_t used; - used = round_page(actual * sizeof *info); + used = vm_map_round_page(actual * sizeof *info, + VM_MAP_PAGE_MASK(ipc_kernel_map)); if (used != size) kmem_free(ipc_kernel_map, addr + used, size - used); diff --git a/osfmk/vm/vm_external.h b/osfmk/vm/vm_external.h index e0bdbf5e8..803c9753a 100644 --- a/osfmk/vm/vm_external.h +++ b/osfmk/vm/vm_external.h @@ -143,4 +143,5 @@ boolean_t vm_external_within( * fits in current map */ vm_object_size_t new_size, vm_object_size_t old_size); + #endif /* VM_VM_EXTERNAL_H_ */ diff --git a/osfmk/vm/vm_fault.c b/osfmk/vm/vm_fault.c index f4c6f17ef..8a1b35d84 100644 --- a/osfmk/vm/vm_fault.c +++ b/osfmk/vm/vm_fault.c @@ -88,6 +88,8 @@ #include #include +#include +#include #include #include #include @@ -101,6 +103,8 @@ #include /* Needed by some vm_page.h macros */ #include +#include + #define VM_FAULT_CLASSIFY 0 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */ @@ -121,25 +125,21 @@ int vm_object_pagein_throttle = 16; * delay of HARD_THROTTLE_DELAY microseconds before being allowed to try the page fault again. */ -extern boolean_t thread_is_io_throttled(void); extern void throttle_lowpri_io(int); uint64_t vm_hard_throttle_threshold; -extern unsigned int dp_pages_free, dp_pages_reserve; -#define NEED_TO_HARD_THROTTLE_THIS_TASK() (((dp_pages_free + dp_pages_reserve < 2000) && \ - (get_task_resident_size(current_task()) > vm_hard_throttle_threshold) && \ - (current_task() != kernel_task) && VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) || \ - (vm_page_free_count < vm_page_throttle_limit && thread_is_io_throttled() && \ - (get_task_resident_size(current_task()) > vm_hard_throttle_threshold))) +#define NEED_TO_HARD_THROTTLE_THIS_TASK() ((current_task() != kernel_task && \ + get_task_resident_size(current_task()) > (((AVAILABLE_NON_COMPRESSED_MEMORY) * PAGE_SIZE) / 5)) && \ + (vm_low_on_space() || (vm_page_free_count < vm_page_throttle_limit && \ + proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO) >= THROTTLE_LEVEL_THROTTLED ))) -#define HARD_THROTTLE_DELAY 20000 /* 20000 us == 20 ms */ -#define SOFT_THROTTLE_DELAY 2000 /* 2000 us == 2 ms */ -extern int cs_debug; +#define HARD_THROTTLE_DELAY 20000 /* 20000 us == 20 ms */ +#define SOFT_THROTTLE_DELAY 2000 /* 2000 us == 2 ms */ boolean_t current_thread_aborted(void); @@ -176,11 +176,6 @@ unsigned long vm_cs_revalidates = 0; unsigned long vm_cs_query_modified = 0; unsigned long vm_cs_validated_dirtied = 0; unsigned long vm_cs_bitmap_validated = 0; -#if CONFIG_ENFORCE_SIGNED_CODE -int cs_enforcement_disable=0; -#else -static const int cs_enforcement_disable=1; -#endif /* * Routine: vm_fault_init @@ -190,24 +185,41 @@ static const int cs_enforcement_disable=1; void vm_fault_init(void) { -#if !SECURE_KERNEL -#if CONFIG_ENFORCE_SIGNED_CODE - PE_parse_boot_argn("cs_enforcement_disable", &cs_enforcement_disable, - sizeof (cs_enforcement_disable)); -#endif - PE_parse_boot_argn("cs_debug", &cs_debug, sizeof (cs_debug)); -#endif - + int i, vm_compressor_temp; + boolean_t need_default_val = TRUE; /* * Choose a value for the hard throttle threshold based on the amount of ram. The threshold is * computed as a percentage of available memory, and the percentage used is scaled inversely with - * the amount of memory. The pertange runs between 10% and 35%. We use 35% for small memory systems + * the amount of memory. The percentage runs between 10% and 35%. We use 35% for small memory systems * and reduce the value down to 10% for very large memory configurations. This helps give us a * definition of a memory hog that makes more sense relative to the amount of ram in the machine. * The formula here simply uses the number of gigabytes of ram to adjust the percentage. */ vm_hard_throttle_threshold = sane_size * (35 - MIN((int)(sane_size / (1024*1024*1024)), 25)) / 100; + + /* + * Configure compressed pager behavior. A boot arg takes precedence over a device tree entry. + */ + + if (PE_parse_boot_argn("vm_compressor", &vm_compressor_temp, sizeof (vm_compressor_temp))) { + for ( i = 0; i < VM_PAGER_MAX_MODES; i++) { + if (vm_compressor_temp > 0 && + ((vm_compressor_temp & ( 1 << i)) == vm_compressor_temp)) { + need_default_val = FALSE; + vm_compressor_mode = vm_compressor_temp; + break; + } + } + if (need_default_val) + printf("Ignoring \"vm_compressor\" boot arg %d\n", vm_compressor_temp); + } + if (need_default_val) { + /* If no boot arg or incorrect boot arg, try device tree. */ + PE_get_default("kern.vm_compressor", &vm_compressor_mode, sizeof(vm_compressor_mode)); + } + PE_parse_boot_argn("vm_compressor_threads", &vm_compressor_thread_count, sizeof (vm_compressor_thread_count)); + printf("\"vm_compressor_mode\" is %d\n", vm_compressor_mode); } /* @@ -492,7 +504,19 @@ vm_fault_deactivate_behind( if (m && !m->laundry && !m->busy && !m->no_cache && !m->throttled && !m->fictitious && !m->absent) { page_run[pages_in_run++] = m; - pmap_clear_reference(m->phys_page); + + /* + * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise... + * + * a TLB flush isn't really needed here since at worst we'll miss the reference bit being + * updated in the PTE if a remote processor still has this mapping cached in its TLB when the + * new reference happens. If no futher references happen on the page after that remote TLB flushes + * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue + * by pageout_scan, which is just fine since the last reference would have happened quite far + * in the past (TLB caches don't hang around for very long), and of course could just as easily + * have happened before we did the deactivate_behind. + */ + pmap_clear_refmod_options(m->phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL); } } if (pages_in_run) { @@ -534,9 +558,9 @@ vm_page_throttled(void) if (NEED_TO_HARD_THROTTLE_THIS_TASK()) return (HARD_THROTTLE_DELAY); - if (vm_page_free_count < vm_page_throttle_limit && + if ((vm_page_free_count < vm_page_throttle_limit || ((COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) && SWAPPER_NEEDS_TO_UNTHROTTLE())) && thread->t_page_creation_count > vm_page_creation_throttle) { - + clock_get_system_microtime(&tv_sec, &tv_usec); elapsed_sec = tv_sec - thread->t_page_creation_time; @@ -558,7 +582,10 @@ vm_page_throttled(void) } ++vm_page_throttle_count; - return (SOFT_THROTTLE_DELAY); + if ((COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) && HARD_THROTTLE_LIMIT_REACHED()) + return (HARD_THROTTLE_DELAY); + else + return (SOFT_THROTTLE_DELAY); } thread->t_page_creation_time = tv_sec; thread->t_page_creation_count = 0; @@ -693,20 +720,21 @@ vm_fault_zero_page(vm_page_t m, boolean_t no_zero_fill) vm_page_lockspin_queues(); - assert(!VM_PAGE_WIRED(m)); + if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) { + assert(!VM_PAGE_WIRED(m)); - /* - * can't be on the pageout queue since we don't - * have a pager to try and clean to - */ - assert(!m->pageout_queue); - - VM_PAGE_QUEUES_REMOVE(m); + /* + * can't be on the pageout queue since we don't + * have a pager to try and clean to + */ + assert(!m->pageout_queue); - queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq); - m->throttled = TRUE; - vm_page_throttled_count++; + VM_PAGE_QUEUES_REMOVE(m); + queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq); + m->throttled = TRUE; + vm_page_throttled_count++; + } vm_page_unlock_queues(); } return (my_fault); @@ -771,10 +799,11 @@ vm_fault_page( vm_object_offset_t first_offset, /* Offset into object */ vm_prot_t fault_type, /* What access is requested */ boolean_t must_be_resident,/* Must page be resident? */ + boolean_t caller_lookup, /* caller looked up page */ /* Modifies in place: */ vm_prot_t *protection, /* Protection for mapping */ - /* Returns: */ vm_page_t *result_page, /* Page found, if successful */ + /* Returns: */ vm_page_t *top_page, /* Page in top object, if * not result_page. */ int *type_of_fault, /* if non-null, fill in with type of fault @@ -782,13 +811,9 @@ vm_fault_page( /* More arguments: */ kern_return_t *error_code, /* code if page is in error */ boolean_t no_zero_fill, /* don't zero fill absent pages */ -#if MACH_PAGEMAP boolean_t data_supply, /* treat as data_supply if * it is a write fault and a full * page is provided */ -#else - __unused boolean_t data_supply, -#endif vm_object_fault_info_t fault_info) { vm_page_t m; @@ -812,6 +837,7 @@ vm_fault_page( int my_fault; uint32_t try_failed_count; int interruptible; /* how may fault be interrupted? */ + int external_state = VM_EXTERNAL_STATE_UNKNOWN; memory_object_t pager; vm_fault_return_t retval; @@ -842,14 +868,22 @@ vm_fault_page( * into a copy object in order to avoid a redundant page out operation. */ #if MACH_PAGEMAP -#define MUST_ASK_PAGER(o, f) (vm_external_state_get((o)->existence_map, (f)) \ - != VM_EXTERNAL_STATE_ABSENT) -#define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \ - == VM_EXTERNAL_STATE_EXISTS) -#else -#define MUST_ASK_PAGER(o, f) (TRUE) -#define PAGED_OUT(o, f) (FALSE) -#endif +#define MUST_ASK_PAGER(o, f, s) \ + ((vm_external_state_get((o)->existence_map, (f)) \ + != VM_EXTERNAL_STATE_ABSENT) && \ + (s = (VM_COMPRESSOR_PAGER_STATE_GET((o), (f)))) \ + != VM_EXTERNAL_STATE_ABSENT) +#define PAGED_OUT(o, f) \ + ((vm_external_state_get((o)->existence_map, (f)) \ + == VM_EXTERNAL_STATE_EXISTS) || \ + (VM_COMPRESSOR_PAGER_STATE_GET((o), (f)) \ + == VM_EXTERNAL_STATE_EXISTS)) +#else /* MACH_PAGEMAP */ +#define MUST_ASK_PAGER(o, f, s) \ + ((s = VM_COMPRESSOR_PAGER_STATE_GET((o), (f))) != VM_EXTERNAL_STATE_ABSENT) +#define PAGED_OUT(o, f) \ + (VM_COMPRESSOR_PAGER_STATE_GET((o), (f)) == VM_EXTERNAL_STATE_EXISTS) +#endif /* MACH_PAGEMAP */ /* * Recovery actions @@ -859,8 +893,12 @@ vm_fault_page( PAGE_WAKEUP_DONE(m); \ if (!m->active && !m->inactive && !m->throttled) { \ vm_page_lockspin_queues(); \ - if (!m->active && !m->inactive && !m->throttled) \ - vm_page_activate(m); \ + if (!m->active && !m->inactive && !m->throttled) { \ + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) \ + vm_page_deactivate(m); \ + else \ + vm_page_activate(m); \ + } \ vm_page_unlock_queues(); \ } \ MACRO_END @@ -934,6 +972,7 @@ vm_fault_page( * must be a "large page" object. We do not deal * with VM pages for this object. */ + caller_lookup = FALSE; m = VM_PAGE_NULL; goto phys_contig_object; } @@ -945,6 +984,7 @@ vm_fault_page( * a "activity_in_progress" reference and wait for * access to be unblocked. */ + caller_lookup = FALSE; /* no longer valid after sleep */ vm_object_activity_begin(object); vm_object_paging_end(object); while (object->blocked_access) { @@ -960,7 +1000,19 @@ vm_fault_page( /* * See whether the page at 'offset' is resident */ - m = vm_page_lookup(object, offset); + if (caller_lookup == TRUE) { + /* + * The caller has already looked up the page + * and gave us the result in "result_page". + * We can use this for the first lookup but + * it loses its validity as soon as we unlock + * the object. + */ + m = *result_page; + caller_lookup = FALSE; /* no longer valid after that */ + } else { + m = vm_page_lookup(object, offset); + } #if TRACEFAULTPAGE dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */ #endif @@ -1324,10 +1376,30 @@ vm_fault_page( * this object can provide the data or we're the top object... * object is locked; m == NULL */ - if (must_be_resident) + if (must_be_resident) { + if (fault_type == VM_PROT_NONE && + object == kernel_object) { + /* + * We've been called from vm_fault_unwire() + * while removing a map entry that was allocated + * with KMA_KOBJECT and KMA_VAONLY. This page + * is not present and there's nothing more to + * do here (nothing to unwire). + */ + vm_fault_cleanup(object, first_m); + thread_interrupt_level(interruptible_state); + + return VM_FAULT_MEMORY_ERROR; + } + goto dont_look_for_page; + } - look_for_page = (object->pager_created && (MUST_ASK_PAGER(object, offset) == TRUE) && !data_supply); +#if !MACH_PAGEMAP + data_supply = FALSE; +#endif /* !MACH_PAGEMAP */ + + look_for_page = (object->pager_created && (MUST_ASK_PAGER(object, offset, external_state) == TRUE) && !data_supply); #if TRACEFAULTPAGE dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object); /* (TEST/DEBUG) */ @@ -1356,6 +1428,7 @@ vm_fault_page( } if (look_for_page) { kern_return_t rc; + int my_fault_type; /* * If the memory manager is not ready, we @@ -1438,6 +1511,77 @@ vm_fault_page( return (VM_FAULT_RETRY); } } + if ((COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) && object->internal) { + + if (m == VM_PAGE_NULL) { + /* + * Allocate a new page for this object/offset pair as a placeholder + */ + m = vm_page_grab(); +#if TRACEFAULTPAGE + dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */ +#endif + if (m == VM_PAGE_NULL) { + + vm_fault_cleanup(object, first_m); + thread_interrupt_level(interruptible_state); + + return (VM_FAULT_MEMORY_SHORTAGE); + } + + m->absent = TRUE; + if (fault_info && fault_info->batch_pmap_op == TRUE) { + vm_page_insert_internal(m, object, offset, FALSE, TRUE, TRUE); + } else { + vm_page_insert(m, object, offset); + } + } + assert(m->busy); + + m->absent = TRUE; + pager = object->pager; + + vm_object_unlock(object); + + rc = vm_compressor_pager_get(pager, offset + object->paging_offset, m->phys_page, &my_fault_type, 0); + + vm_object_lock(object); + + switch (rc) { + case KERN_SUCCESS: + m->absent = FALSE; + m->dirty = TRUE; + if ((m->object->wimg_bits & + VM_WIMG_MASK) != + VM_WIMG_USE_DEFAULT) { + /* + * If the page is not cacheable, + * we can't let its contents + * linger in the data cache + * after the decompression. + */ + pmap_sync_page_attributes_phys( + m->phys_page); + } + break; + case KERN_MEMORY_FAILURE: + m->unusual = TRUE; + m->error = TRUE; + m->absent = FALSE; + break; + case KERN_MEMORY_ERROR: + assert(m->absent); + break; + default: + panic("?"); + } + PAGE_WAKEUP_DONE(m); + + rc = KERN_SUCCESS; + goto data_requested; + } + my_fault_type = DBG_PAGEIN_FAULT; + if (m != VM_PAGE_NULL) { VM_PAGE_FREE(m); m = VM_PAGE_NULL; @@ -1549,6 +1693,7 @@ vm_fault_page( #endif vm_object_lock(object); + data_requested: if (rc != KERN_SUCCESS) { vm_fault_cleanup(object, first_m); @@ -1560,10 +1705,12 @@ vm_fault_page( } else { clock_sec_t tv_sec; clock_usec_t tv_usec; - - clock_get_system_microtime(&tv_sec, &tv_usec); - current_thread()->t_page_creation_time = tv_sec; - current_thread()->t_page_creation_count = 0; + + if (my_fault_type == DBG_PAGEIN_FAULT) { + clock_get_system_microtime(&tv_sec, &tv_usec); + current_thread()->t_page_creation_time = tv_sec; + current_thread()->t_page_creation_count = 0; + } } if ((interruptible != THREAD_UNINT) && (current_thread()->sched_flags & TH_SFLAG_ABORT)) { @@ -1601,7 +1748,7 @@ vm_fault_page( * if we make it through the state checks * above, than we'll count it as such */ - my_fault = DBG_PAGEIN_FAULT; + my_fault = my_fault_type; /* * Retry with same object/offset, since new data may @@ -1837,7 +1984,8 @@ dont_look_for_page: /* * We no longer need the old page or object. */ - PAGE_WAKEUP_DONE(m); + RELEASE_PAGE(m); + vm_object_paging_end(object); vm_object_unlock(object); @@ -2068,6 +2216,7 @@ dont_look_for_page: #if MACH_PAGEMAP || vm_external_state_get(copy_object->existence_map, copy_offset) == VM_EXTERNAL_STATE_ABSENT #endif + || VM_COMPRESSOR_PAGER_STATE_GET(copy_object, copy_offset) == VM_EXTERNAL_STATE_ABSENT ) { vm_page_lockspin_queues(); @@ -2078,7 +2227,8 @@ dont_look_for_page: SET_PAGE_DIRTY(copy_m, TRUE); PAGE_WAKEUP_DONE(copy_m); - } else if (copy_object->internal) { + } else if (copy_object->internal && + (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE)) { /* * For internal objects check with the pager to see * if the page already exists in the backing store. @@ -2230,7 +2380,8 @@ done: retval = VM_FAULT_SUCCESS; if (my_fault == DBG_PAGEIN_FAULT) { - VM_STAT_INCR(pageins); + if (!m->object->internal || (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE)) + VM_STAT_INCR(pageins); DTRACE_VM2(pgin, int, 1, (uint64_t *), NULL); DTRACE_VM2(maj_fault, int, 1, (uint64_t *), NULL); current_task()->pageins++; @@ -2251,6 +2402,9 @@ done: vm_fault_is_sequential(object, offset, fault_info->behavior); vm_fault_deactivate_behind(object, offset, fault_info->behavior); + } else if (my_fault == DBG_COMPRESSOR_FAULT || my_fault == DBG_COMPRESSOR_SWAPIN_FAULT) { + + VM_STAT_INCR(decompressions); } if (type_of_fault) *type_of_fault = my_fault; @@ -2322,6 +2476,7 @@ vm_fault_enter(vm_page_t m, boolean_t previously_pmapped = m->pmapped; boolean_t must_disconnect = 0; boolean_t map_is_switched, map_is_switch_protected; + int cs_enforcement_enabled; vm_object_lock_assert_held(m->object); #if DEBUG @@ -2414,7 +2569,9 @@ vm_fault_enter(vm_page_t m, * from the current map. We do that below right before we do the * PMAP_ENTER. */ - if(!cs_enforcement_disable && map_is_switched && + cs_enforcement_enabled = cs_enforcement(NULL); + + if(cs_enforcement_enabled && map_is_switched && map_is_switch_protected && page_immutable(m, prot) && (prot & VM_PROT_WRITE)) { @@ -2432,7 +2589,7 @@ vm_fault_enter(vm_page_t m, * code can be created */ if (m->cs_tainted || - (( !cs_enforcement_disable && !cs_bypass ) && + ((cs_enforcement_enabled && !cs_bypass ) && (/* The page is unsigned and wants to be executable */ (!m->cs_validated && (prot & VM_PROT_EXECUTE)) || /* The page should be immutable, but is in danger of being modified @@ -2465,6 +2622,14 @@ vm_fault_enter(vm_page_t m, assert(!(prot & VM_PROT_WRITE) || (map_is_switch_protected == FALSE)); reject_page = FALSE; } else { + if (cs_debug > 5) + printf("vm_fault: signed: %s validate: %s tainted: %s wpmapped: %s slid: %s prot: 0x%x\n", + m->object->code_signed ? "yes" : "no", + m->cs_validated ? "yes" : "no", + m->cs_tainted ? "yes" : "no", + m->wpmapped ? "yes" : "no", + m->slid ? "yes" : "no", + (int)prot); reject_page = cs_invalid_page((addr64_t) vaddr); } @@ -2486,6 +2651,11 @@ vm_fault_enter(vm_page_t m, printf("CODESIGNING: vm_fault_enter(0x%llx): " "page %p obj %p off 0x%llx *** INVALID PAGE ***\n", (long long)vaddr, m, m->object, m->offset); +#if !SECURE_KERNEL + if (kr != KERN_SUCCESS && cs_enforcement_panic) { + panic("CODESIGNING: panicking on invalid page\n"); + } +#endif } } else { @@ -2493,6 +2663,210 @@ vm_fault_enter(vm_page_t m, kr = KERN_SUCCESS; } + boolean_t page_queues_locked = FALSE; +#define __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED() \ +MACRO_BEGIN \ + if (! page_queues_locked) { \ + page_queues_locked = TRUE; \ + vm_page_lockspin_queues(); \ + } \ +MACRO_END +#define __VM_PAGE_UNLOCK_QUEUES_IF_NEEDED() \ +MACRO_BEGIN \ + if (page_queues_locked) { \ + page_queues_locked = FALSE; \ + vm_page_unlock_queues(); \ + } \ +MACRO_END + + /* + * Hold queues lock to manipulate + * the page queues. Change wiring + * case is obvious. + */ + assert(m->compressor || m->object != compressor_object); + if (m->compressor) { + /* + * Compressor pages are neither wired + * nor pageable and should never change. + */ + assert(m->object == compressor_object); + } else if (change_wiring) { + __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED(); + + if (wired) { + if (kr == KERN_SUCCESS) { + vm_page_wire(m); + } + } else { + vm_page_unwire(m, TRUE); + } + /* we keep the page queues lock, if we need it later */ + + } else { + if (kr != KERN_SUCCESS) { + __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED(); + vm_page_deactivate(m); + /* we keep the page queues lock, if we need it later */ + } else if (((!m->active && !m->inactive) || + m->clean_queue || + no_cache) && + !VM_PAGE_WIRED(m) && !m->throttled) { + + if (vm_page_local_q && + !no_cache && + (*type_of_fault == DBG_COW_FAULT || + *type_of_fault == DBG_ZERO_FILL_FAULT) ) { + struct vpl *lq; + uint32_t lid; + + __VM_PAGE_UNLOCK_QUEUES_IF_NEEDED(); + vm_object_lock_assert_exclusive(m->object); + + /* + * we got a local queue to stuff this + * new page on... + * its safe to manipulate local and + * local_id at this point since we're + * behind an exclusive object lock and + * the page is not on any global queue. + * + * we'll use the current cpu number to + * select the queue note that we don't + * need to disable preemption... we're + * going to behind the local queue's + * lock to do the real work + */ + lid = cpu_number(); + + lq = &vm_page_local_q[lid].vpl_un.vpl; + + VPL_LOCK(&lq->vpl_lock); + + queue_enter(&lq->vpl_queue, m, + vm_page_t, pageq); + m->local = TRUE; + m->local_id = lid; + lq->vpl_count++; + + if (m->object->internal) + lq->vpl_internal_count++; + else + lq->vpl_external_count++; + + VPL_UNLOCK(&lq->vpl_lock); + + if (lq->vpl_count > vm_page_local_q_soft_limit) + { + /* + * we're beyond the soft limit + * for the local queue + * vm_page_reactivate_local will + * 'try' to take the global page + * queue lock... if it can't + * that's ok... we'll let the + * queue continue to grow up + * to the hard limit... at that + * point we'll wait for the + * lock... once we've got the + * lock, we'll transfer all of + * the pages from the local + * queue to the global active + * queue + */ + vm_page_reactivate_local(lid, FALSE, FALSE); + } + } else { + + __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED(); + + /* + * test again now that we hold the + * page queue lock + */ + if (!VM_PAGE_WIRED(m)) { + if (m->clean_queue) { + VM_PAGE_QUEUES_REMOVE(m); + + vm_pageout_cleaned_reactivated++; + vm_pageout_cleaned_fault_reactivated++; + } + + if ((!m->active && + !m->inactive) || + no_cache) { + /* + * If this is a no_cache mapping + * and the page has never been + * mapped before or was + * previously a no_cache page, + * then we want to leave pages + * in the speculative state so + * that they can be readily + * recycled if free memory runs + * low. Otherwise the page is + * activated as normal. + */ + + if (no_cache && + (!previously_pmapped || + m->no_cache)) { + m->no_cache = TRUE; + + if (!m->speculative) + vm_page_speculate(m, FALSE); + + } else if (!m->active && + !m->inactive) { + + vm_page_activate(m); + } + } + } + /* we keep the page queues lock, if we need it later */ + } + } + } + + if ((prot & VM_PROT_EXECUTE) && + ! m->xpmapped) { + + __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED(); + + /* + * xpmapped is protected by the page queues lock + * so it matters not that we might only hold the + * object lock in the shared state + */ + + if (! m->xpmapped) { + + m->xpmapped = TRUE; + __VM_PAGE_UNLOCK_QUEUES_IF_NEEDED(); + + if ((COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) && + m->object->internal && + m->object->pager != NULL) { + /* + * This page could have been + * uncompressed by the + * compressor pager and its + * contents might be only in + * the data cache. + * Since it's being mapped for + * "execute" for the fist time, + * make sure the icache is in + * sync. + */ + pmap_sync_page_data_phys(m->phys_page); + } + + } + } + /* we're done with the page queues lock, if we ever took it */ + __VM_PAGE_UNLOCK_QUEUES_IF_NEEDED(); + + /* If we have a KERN_SUCCESS from the previous checks, we either have * a good page, or a tainted page that has been accepted by the process. * In both cases the page will be entered into the pmap. @@ -2512,6 +2886,9 @@ vm_fault_enter(vm_page_t m, m->pmapped = TRUE; if(vm_page_is_slideable(m)) { boolean_t was_busy = m->busy; + + vm_object_lock_assert_exclusive(m->object); + m->busy = TRUE; kr = vm_page_slide(m, 0); assert(m->busy); @@ -2541,7 +2918,7 @@ vm_fault_enter(vm_page_t m, * We can only get here * because of the CSE logic */ - assert(cs_enforcement_disable == FALSE); + assert(cs_enforcement_enabled); pmap_disconnect(m->phys_page); /* * If we are faulting for a write, we can clear @@ -2588,6 +2965,9 @@ vm_fault_enter(vm_page_t m, * use the blocking version instead. Requires marking * the page busy and unlocking the object */ boolean_t was_busy = m->busy; + + vm_object_lock_assert_exclusive(m->object); + m->busy = TRUE; vm_object_unlock(m->object); @@ -2607,112 +2987,6 @@ vm_fault_enter(vm_page_t m, } after_the_pmap_enter: - /* - * Hold queues lock to manipulate - * the page queues. Change wiring - * case is obvious. - */ - if (change_wiring) { - vm_page_lockspin_queues(); - - if (wired) { - if (kr == KERN_SUCCESS) { - vm_page_wire(m); - } - } else { - vm_page_unwire(m, TRUE); - } - vm_page_unlock_queues(); - - } else { - if (kr != KERN_SUCCESS) { - vm_page_lockspin_queues(); - vm_page_deactivate(m); - vm_page_unlock_queues(); - } else { - if (((!m->active && !m->inactive) || m->clean_queue || no_cache) && !VM_PAGE_WIRED(m) && !m->throttled) { - - if ( vm_page_local_q && !no_cache && (*type_of_fault == DBG_COW_FAULT || *type_of_fault == DBG_ZERO_FILL_FAULT) ) { - struct vpl *lq; - uint32_t lid; - - /* - * we got a local queue to stuff this new page on... - * its safe to manipulate local and local_id at this point - * since we're behind an exclusive object lock and the - * page is not on any global queue. - * - * we'll use the current cpu number to select the queue - * note that we don't need to disable preemption... we're - * going to behind the local queue's lock to do the real - * work - */ - lid = cpu_number(); - - lq = &vm_page_local_q[lid].vpl_un.vpl; - - VPL_LOCK(&lq->vpl_lock); - - queue_enter(&lq->vpl_queue, m, vm_page_t, pageq); - m->local = TRUE; - m->local_id = lid; - lq->vpl_count++; - - VPL_UNLOCK(&lq->vpl_lock); - - if (lq->vpl_count > vm_page_local_q_soft_limit) { - /* - * we're beyond the soft limit for the local queue - * vm_page_reactivate_local will 'try' to take - * the global page queue lock... if it can't that's - * ok... we'll let the queue continue to grow up - * to the hard limit... at that point we'll wait - * for the lock... once we've got the lock, we'll - * transfer all of the pages from the local queue - * to the global active queue - */ - vm_page_reactivate_local(lid, FALSE, FALSE); - } - return kr; - } - - vm_page_lockspin_queues(); - /* - * test again now that we hold the page queue lock - */ - if (!VM_PAGE_WIRED(m)) { - if (m->clean_queue) { - VM_PAGE_QUEUES_REMOVE(m); - - vm_pageout_cleaned_reactivated++; - vm_pageout_cleaned_fault_reactivated++; - } - - if ((!m->active && !m->inactive) || no_cache) { - /* - * If this is a no_cache mapping and the page has never been - * mapped before or was previously a no_cache page, then we - * want to leave pages in the speculative state so that they - * can be readily recycled if free memory runs low. Otherwise - * the page is activated as normal. - */ - - if (no_cache && (!previously_pmapped || m->no_cache)) { - m->no_cache = TRUE; - - if (!m->speculative) - vm_page_speculate(m, FALSE); - - } else if (!m->active && !m->inactive) { - - vm_page_activate(m); - } - } - } - vm_page_unlock_queues(); - } - } - } return kr; } @@ -2736,6 +3010,7 @@ extern int _map_enter_debug; unsigned long vm_fault_collapse_total = 0; unsigned long vm_fault_collapse_skipped = 0; + kern_return_t vm_fault( vm_map_t map, @@ -2771,6 +3046,7 @@ vm_fault( struct vm_object_fault_info fault_info; boolean_t need_collapse = FALSE; boolean_t need_retry = FALSE; + boolean_t *need_retry_ptr = NULL; int object_lock_type = 0; int cur_object_lock_type; vm_object_t top_object = VM_OBJECT_NULL; @@ -2779,8 +3055,8 @@ vm_fault( KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_START, - (int)((uint64_t)vaddr >> 32), - (int)vaddr, + ((uint64_t)vaddr >> 32), + vaddr, (map == kernel_map), 0, 0); @@ -2788,8 +3064,8 @@ vm_fault( if (get_preemption_level() != 0) { KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_END, - (int)((uint64_t)vaddr >> 32), - (int)vaddr, + ((uint64_t)vaddr >> 32), + vaddr, KERN_FAILURE, 0, 0); @@ -2942,7 +3218,6 @@ RetryFault: * have object that 'm' belongs to locked exclusively */ if (object != cur_object) { - vm_object_unlock(object); if (cur_object_lock_type == OBJECT_LOCK_SHARED) { @@ -2951,11 +3226,13 @@ RetryFault: if (vm_object_lock_upgrade(cur_object) == FALSE) { /* * couldn't upgrade so go do a full retry - * immediately since we've already dropped - * the top object lock associated with this page - * and the current one got dropped due to the - * failed upgrade... the state is no longer valid + * immediately since we can no longer be + * certain about cur_object (since we + * don't hold a reference on it)... + * first drop the top object lock */ + vm_object_unlock(object); + vm_map_unlock_read(map); if (real_map != map) vm_map_unlock(real_map); @@ -2982,6 +3259,30 @@ RetryFault: continue; } } + if (m->pageout_queue && m->object->internal && COMPRESSED_PAGER_IS_ACTIVE) { + /* + * m->busy == TRUE and the object is locked exclusively + * if m->pageout_queue == TRUE after we acquire the + * queues lock, we are guaranteed that it is stable on + * the pageout queue and therefore reclaimable + * + * NOTE: this is only true for the internal pageout queue + * in the compressor world + */ + vm_page_lock_queues(); + + if (m->pageout_queue) { + vm_pageout_throttle_up(m); + vm_page_unlock_queues(); + + PAGE_WAKEUP_DONE(m); + goto reclaimed_from_pageout; + } + vm_page_unlock_queues(); + } + if (object != cur_object) + vm_object_unlock(object); + vm_map_unlock_read(map); if (real_map != map) vm_map_unlock(real_map); @@ -3001,6 +3302,7 @@ RetryFault: kr = KERN_ABORTED; goto done; } +reclaimed_from_pageout: if (m->laundry) { if (object != cur_object) { if (cur_object_lock_type == OBJECT_LOCK_SHARED) { @@ -3255,6 +3557,11 @@ FastPmapEnter: * cur_object == NULL or it's been unlocked * no paging references on either object or cur_object */ + if (top_object != VM_OBJECT_NULL || object_lock_type != OBJECT_LOCK_EXCLUSIVE) + need_retry_ptr = &need_retry; + else + need_retry_ptr = NULL; + if (caller_pmap) { kr = vm_fault_enter(m, caller_pmap, @@ -3265,7 +3572,7 @@ FastPmapEnter: change_wiring, fault_info.no_cache, fault_info.cs_bypass, - (top_object != VM_OBJECT_NULL ? &need_retry : NULL), + need_retry_ptr, &type_of_fault); } else { kr = vm_fault_enter(m, @@ -3277,7 +3584,7 @@ FastPmapEnter: change_wiring, fault_info.no_cache, fault_info.cs_bypass, - (top_object != VM_OBJECT_NULL ? &need_retry : NULL), + need_retry_ptr, &type_of_fault); } @@ -3330,7 +3637,7 @@ FastPmapEnter: * re-drive the fault which should result in vm_fault_enter * being able to successfully enter the mapping this time around */ - (void)pmap_enter_options(pmap, vaddr, 0, 0, 0, 0, 0, PMAP_OPTIONS_NOENTER); + (void)pmap_enter_options(pmap, vaddr, 0, 0, 0, 0, 0, PMAP_OPTIONS_NOENTER, NULL); need_retry = FALSE; goto RetryFault; @@ -3476,12 +3783,148 @@ FastPmapEnter: * No page at cur_object, cur_offset... m == NULL */ if (cur_object->pager_created) { - if (MUST_ASK_PAGER(cur_object, cur_offset) == TRUE) { + int compressor_external_state = VM_EXTERNAL_STATE_UNKNOWN; + + if (MUST_ASK_PAGER(cur_object, cur_offset, compressor_external_state) == TRUE) { + int my_fault_type; + int c_flags = C_DONT_BLOCK; + boolean_t insert_cur_object = FALSE; + /* * May have to talk to a pager... - * take the slow path. + * if so, take the slow path by + * doing a 'break' from the while (TRUE) loop + * + * external_state will only be set to VM_EXTERNAL_STATE_EXISTS + * if the compressor is active and the page exists there */ - break; + if (compressor_external_state != VM_EXTERNAL_STATE_EXISTS) + break; + + if (map == kernel_map || real_map == kernel_map) { + /* + * can't call into the compressor with the kernel_map + * lock held, since the compressor may try to operate + * on the kernel map in order to return an empty c_segment + */ + break; + } + if (object != cur_object) { + if (fault_type & VM_PROT_WRITE) + c_flags |= C_KEEP; + else + insert_cur_object = TRUE; + } + if (insert_cur_object == TRUE) { + + if (cur_object_lock_type == OBJECT_LOCK_SHARED) { + + cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE; + + if (vm_object_lock_upgrade(cur_object) == FALSE) { + /* + * couldn't upgrade so go do a full retry + * immediately since we can no longer be + * certain about cur_object (since we + * don't hold a reference on it)... + * first drop the top object lock + */ + vm_object_unlock(object); + + vm_map_unlock_read(map); + if (real_map != map) + vm_map_unlock(real_map); + + goto RetryFault; + } + } + } else if (object_lock_type == OBJECT_LOCK_SHARED) { + + object_lock_type = OBJECT_LOCK_EXCLUSIVE; + + if (object != cur_object) { + /* + * we can't go for the upgrade on the top + * lock since the upgrade may block waiting + * for readers to drain... since we hold + * cur_object locked at this point, waiting + * for the readers to drain would represent + * a lock order inversion since the lock order + * for objects is the reference order in the + * shadown chain + */ + vm_object_unlock(object); + vm_object_unlock(cur_object); + + vm_map_unlock_read(map); + if (real_map != map) + vm_map_unlock(real_map); + + goto RetryFault; + } + if (vm_object_lock_upgrade(object) == FALSE) { + /* + * couldn't upgrade, so explictly take the lock + * exclusively and go relookup the page since we + * will have dropped the object lock and + * a different thread could have inserted + * a page at this offset + * no need for a full retry since we're + * at the top level of the object chain + */ + vm_object_lock(object); + + continue; + } + } + m = vm_page_grab(); + + if (m == VM_PAGE_NULL) { + /* + * no free page currently available... + * must take the slow path + */ + break; + } + if (vm_compressor_pager_get(cur_object->pager, cur_offset + cur_object->paging_offset, + m->phys_page, &my_fault_type, c_flags) != KERN_SUCCESS) { + vm_page_release(m); + break; + } + m->dirty = TRUE; + + if (insert_cur_object) + vm_page_insert(m, cur_object, cur_offset); + else + vm_page_insert(m, object, offset); + + if ((m->object->wimg_bits & VM_WIMG_MASK) != VM_WIMG_USE_DEFAULT) { + /* + * If the page is not cacheable, + * we can't let its contents + * linger in the data cache + * after the decompression. + */ + pmap_sync_page_attributes_phys(m->phys_page); + } + type_of_fault = my_fault_type; + + VM_STAT_INCR(decompressions); + + if (cur_object != object) { + if (insert_cur_object) { + top_object = object; + /* + * switch to the object that has the new page + */ + object = cur_object; + object_lock_type = cur_object_lock_type; + } else { + vm_object_unlock(cur_object); + cur_object = object; + } + } + goto FastPmapEnter; } /* * existence map present and indicates @@ -3654,8 +4097,10 @@ handle_copy_delay: error_code = 0; + result_page = VM_PAGE_NULL; kr = vm_fault_page(object, offset, fault_type, (change_wiring && !wired), + FALSE, /* page not looked up */ &prot, &result_page, &top_page, &type_of_fault, &error_code, map->no_zero_fill, @@ -4044,12 +4489,17 @@ handle_copy_delay: done: thread_interrupt_level(interruptible_state); - throttle_lowpri_io(TRUE); + /* + * Only throttle on faults which cause a pagein. + */ + if ((type_of_fault == DBG_PAGEIND_FAULT) || (type_of_fault == DBG_PAGEINV_FAULT) || (type_of_fault == DBG_COMPRESSOR_SWAPIN_FAULT)) { + throttle_lowpri_io(1); + } KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_END, - (int)((uint64_t)vaddr >> 32), - (int)vaddr, + ((uint64_t)vaddr >> 32), + vaddr, kr, type_of_fault, 0); @@ -4201,10 +4651,12 @@ vm_fault_unwire( XPR(XPR_VM_FAULT, "vm_fault_unwire -> vm_fault_page\n", 0,0,0,0,0); + result_page = VM_PAGE_NULL; result = vm_fault_page( object, entry->offset + (va - entry->vme_start), VM_PROT_NONE, TRUE, + FALSE, /* page not looked up */ &prot, &result_page, &top_page, (int *)0, NULL, map->no_zero_fill, @@ -4224,6 +4676,18 @@ vm_fault_unwire( if (result == VM_FAULT_MEMORY_ERROR && !object->alive) continue; + if (result == VM_FAULT_MEMORY_ERROR && + object == kernel_object) { + /* + * This must have been allocated with + * KMA_KOBJECT and KMA_VAONLY and there's + * no physical page at this offset. + * We're done (no page to free). + */ + assert(deallocate); + continue; + } + if (result != VM_FAULT_SUCCESS) panic("vm_fault_unwire: failure"); @@ -4614,10 +5078,12 @@ vm_fault_copy( } XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0); + dst_page = VM_PAGE_NULL; result = vm_fault_page(dst_object, vm_object_trunc_page(dst_offset), VM_PROT_WRITE|VM_PROT_READ, FALSE, + FALSE, /* page not looked up */ &dst_prot, &dst_page, &dst_top_page, (int *)0, &error, @@ -4708,10 +5174,12 @@ vm_fault_copy( XPR(XPR_VM_FAULT, "vm_fault_copy(2) -> vm_fault_page\n", 0,0,0,0,0); + result_page = VM_PAGE_NULL; result = vm_fault_page( src_object, vm_object_trunc_page(src_offset), VM_PROT_READ, FALSE, + FALSE, /* page not looked up */ &src_prot, &result_page, &src_top_page, (int *)0, &error, FALSE, @@ -4945,8 +5413,6 @@ vm_fault_classify_init(void) #endif /* VM_FAULT_CLASSIFY */ -extern int cs_validation; - void vm_page_validate_cs_mapped( vm_page_t page, @@ -5042,6 +5508,7 @@ vm_page_validate_cs( vm_offset_t kaddr; kern_return_t kr; boolean_t busy_page; + boolean_t need_unmap; vm_object_lock_assert_held(page->object); @@ -5102,15 +5569,17 @@ vm_page_validate_cs( vm_object_paging_begin(object); /* map the page in the kernel address space */ - koffset = 0; ksize = PAGE_SIZE_64; - kr = vm_paging_map_object(&koffset, - page, + koffset = 0; + need_unmap = FALSE; + kr = vm_paging_map_object(page, object, offset, - &ksize, VM_PROT_READ, - FALSE); /* can't unlock object ! */ + FALSE, /* can't unlock object ! */ + &ksize, + &koffset, + &need_unmap); if (kr != KERN_SUCCESS) { panic("vm_page_validate_cs: could not map page: 0x%x\n", kr); } @@ -5131,7 +5600,7 @@ vm_page_validate_cs( if (!busy_page) { PAGE_WAKEUP_DONE(page); } - if (koffset != 0) { + if (need_unmap) { /* unmap the map from the kernel address space */ vm_paging_unmap_object(object, koffset, koffset + ksize); koffset = 0; diff --git a/osfmk/vm/vm_fault.h b/osfmk/vm/vm_fault.h index 878d140f1..1c4e0696d 100644 --- a/osfmk/vm/vm_fault.h +++ b/osfmk/vm/vm_fault.h @@ -113,10 +113,11 @@ extern vm_fault_return_t vm_fault_page( vm_object_offset_t first_offset,/* Offset into object */ vm_prot_t fault_type, /* What access is requested */ boolean_t must_be_resident,/* Must page be resident? */ + boolean_t caller_lookup, /* caller looked up page */ /* Modifies in place: */ vm_prot_t *protection, /* Protection for mapping */ - /* Returns: */ vm_page_t *result_page, /* Page found, if successful */ + /* Returns: */ vm_page_t *top_page, /* Page in top object, if * not result_page. */ int *type_of_fault, /* if non-zero, return COW, zero-filled, etc... diff --git a/osfmk/vm/vm_init.c b/osfmk/vm/vm_init.c index 59af43c26..027e6c416 100644 --- a/osfmk/vm/vm_init.c +++ b/osfmk/vm/vm_init.c @@ -90,6 +90,7 @@ const vm_offset_t vm_max_kernel_address = VM_MAX_KERNEL_ADDRESS; boolean_t vm_kernel_ready = FALSE; boolean_t kmem_ready = FALSE; +boolean_t kmem_alloc_ready = FALSE; boolean_t zlog_ready = FALSE; vm_offset_t kmapoff_kaddr; @@ -152,7 +153,7 @@ vm_mem_bootstrap(void) vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling pmap_init\n")); pmap_init(); - zlog_ready = TRUE; + kmem_alloc_ready = TRUE; if (PE_parse_boot_argn("zsize", &zsizearg, sizeof (zsizearg))) zsize = zsizearg * 1024ULL * 1024ULL; diff --git a/osfmk/vm/vm_init.h b/osfmk/vm/vm_init.h index b405952d1..8e23b580b 100644 --- a/osfmk/vm/vm_init.h +++ b/osfmk/vm/vm_init.h @@ -32,8 +32,8 @@ #ifndef VM_INIT_H #define VM_INIT_H -extern void vm_mem_bootstrap(void) __attribute__((section("__TEXT, initcode"))); -extern void vm_mem_init(void) __attribute__((section("__TEXT, initcode"))); -extern void vm_map_steal_memory(void) __attribute__((section("__TEXT, initcode")));; +extern void vm_mem_bootstrap(void); +extern void vm_mem_init(void); +extern void vm_map_steal_memory(void);; #endif /* VM_INIT_H */ diff --git a/osfmk/vm/vm_kern.c b/osfmk/vm/vm_kern.c index 0629a9900..65e48ae7d 100644 --- a/osfmk/vm/vm_kern.c +++ b/osfmk/vm/vm_kern.c @@ -127,7 +127,8 @@ kmem_alloc_contig( if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT))) return KERN_INVALID_ARGUMENT; - map_size = vm_map_round_page(size); + map_size = vm_map_round_page(size, + VM_MAP_PAGE_MASK(map)); map_mask = (vm_map_offset_t)mask; /* Check for zero allocation size (either directly or via overflow) */ @@ -165,8 +166,12 @@ kmem_alloc_contig( kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags); if (kr != KERN_SUCCESS) { - vm_map_remove(map, vm_map_trunc_page(map_addr), - vm_map_round_page(map_addr + map_size), 0); + vm_map_remove(map, + vm_map_trunc_page(map_addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(map_addr + map_size, + VM_MAP_PAGE_MASK(map)), + 0); vm_object_deallocate(object); *addrp = 0; return kr; @@ -182,16 +187,25 @@ kmem_alloc_contig( } vm_object_unlock(object); - if ((kr = vm_map_wire(map, vm_map_trunc_page(map_addr), - vm_map_round_page(map_addr + map_size), VM_PROT_DEFAULT, FALSE)) - != KERN_SUCCESS) { + kr = vm_map_wire(map, + vm_map_trunc_page(map_addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(map_addr + map_size, + VM_MAP_PAGE_MASK(map)), + VM_PROT_DEFAULT, + FALSE); + if (kr != KERN_SUCCESS) { if (object == kernel_object) { vm_object_lock(object); vm_object_page_remove(object, offset, offset + map_size); vm_object_unlock(object); } - vm_map_remove(map, vm_map_trunc_page(map_addr), - vm_map_round_page(map_addr + map_size), 0); + vm_map_remove(map, + vm_map_trunc_page(map_addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(map_addr + map_size, + VM_MAP_PAGE_MASK(map)), + 0); vm_object_deallocate(object); return kr; } @@ -237,7 +251,7 @@ kernel_memory_allocate( vm_map_offset_t map_addr, fill_start; vm_map_offset_t map_mask; vm_map_size_t map_size, fill_size; - kern_return_t kr; + kern_return_t kr, pe_result; vm_page_t mem; vm_page_t guard_page_list = NULL; vm_page_t wired_page_list = NULL; @@ -251,7 +265,8 @@ kernel_memory_allocate( panic("kernel_memory_allocate: VM is not ready"); } - map_size = vm_map_round_page(size); + map_size = vm_map_round_page(size, + VM_MAP_PAGE_MASK(map)); map_mask = (vm_map_offset_t) mask; vm_alloc_flags = 0; @@ -265,8 +280,9 @@ kernel_memory_allocate( * limit the size of a single extent of wired memory * to try and limit the damage to the system if * too many pages get wired down + * limit raised to 2GB with 128GB max physical limit */ - if (map_size > (1 << 30)) { + if (map_size > (1ULL << 31)) { return KERN_RESOURCE_SHORTAGE; } @@ -328,6 +344,7 @@ kernel_memory_allocate( guard_page_list = mem; } + if (! (flags & KMA_VAONLY)) { for (i = 0; i < wired_page_count; i++) { uint64_t unavailable; @@ -359,6 +376,7 @@ kernel_memory_allocate( mem->pageq.next = (queue_entry_t)wired_page_list; wired_page_list = mem; } + } /* * Allocate a new object (if necessary). We must do this before @@ -367,6 +385,9 @@ kernel_memory_allocate( if ((flags & KMA_KOBJECT) != 0) { object = kernel_object; vm_object_reference(object); + } else if ((flags & KMA_COMPRESSOR) != 0) { + object = compressor_object; + vm_object_reference(object); } else { object = vm_object_allocate(map_size); } @@ -380,15 +401,16 @@ kernel_memory_allocate( } entry->object.vm_object = object; - entry->offset = offset = (object == kernel_object) ? + entry->offset = offset = (object == kernel_object || object == compressor_object) ? map_addr : 0; - - entry->wired_count++; + + if (object != compressor_object) + entry->wired_count++; if (flags & KMA_PERMANENT) entry->permanent = TRUE; - if (object != kernel_object) + if (object != kernel_object && object != compressor_object) vm_object_reference(object); vm_object_lock(object); @@ -412,6 +434,9 @@ kernel_memory_allocate( kma_prot = VM_PROT_READ | VM_PROT_WRITE; + if (flags & KMA_VAONLY) { + pg_offset = fill_start + fill_size; + } else { for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) { if (wired_page_list == NULL) panic("kernel_memory_allocate: wired_page_list == NULL"); @@ -427,15 +452,25 @@ kernel_memory_allocate( mem->pmapped = TRUE; mem->wpmapped = TRUE; - PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem, - kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE); + PMAP_ENTER_OPTIONS(kernel_pmap, map_addr + pg_offset, mem, + kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE, + PMAP_OPTIONS_NOWAIT, pe_result); + + if (pe_result == KERN_RESOURCE_SHORTAGE) { + vm_object_unlock(object); + PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem, + kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE); + + vm_object_lock(object); + } if (flags & KMA_NOENCRYPT) { bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE); pmap_set_noencrypt(mem->phys_page); } } + } if ((fill_start + fill_size) < map_size) { if (guard_page_list == NULL) panic("kernel_memory_allocate: guard_page_list == NULL"); @@ -451,16 +486,18 @@ kernel_memory_allocate( if (guard_page_list || wired_page_list) panic("kernel_memory_allocate: non empty list\n"); + if (! (flags & KMA_VAONLY)) { vm_page_lockspin_queues(); vm_page_wire_count += wired_page_count; vm_page_unlock_queues(); + } vm_object_unlock(object); /* * now that the pages are wired, we no longer have to fear coalesce */ - if (object == kernel_object) + if (object == kernel_object || object == compressor_object) vm_map_simplify(map, map_addr); else vm_object_deallocate(object); @@ -481,6 +518,243 @@ out: return kr; } +kern_return_t +kernel_memory_populate( + vm_map_t map, + vm_offset_t addr, + vm_size_t size, + int flags) +{ + vm_object_t object; + vm_object_offset_t offset, pg_offset; + kern_return_t kr, pe_result; + vm_page_t mem; + vm_page_t page_list = NULL; + int page_count = 0; + int i; + + page_count = (int) (size / PAGE_SIZE_64); + + assert((flags & (KMA_COMPRESSOR|KMA_KOBJECT)) != (KMA_COMPRESSOR|KMA_KOBJECT)); + + if (flags & KMA_COMPRESSOR) { + + for (i = 0; i < page_count; i++) { + for (;;) { + mem = vm_page_grab(); + + if (mem != VM_PAGE_NULL) + break; + + VM_PAGE_WAIT(); + } + mem->pageq.next = (queue_entry_t) page_list; + page_list = mem; + } + offset = addr; + object = compressor_object; + + vm_object_lock(object); + + for (pg_offset = 0; + pg_offset < size; + pg_offset += PAGE_SIZE_64) { + + mem = page_list; + page_list = (vm_page_t) mem->pageq.next; + mem->pageq.next = NULL; + + vm_page_insert(mem, object, offset + pg_offset); + assert(mem->busy); + + PMAP_ENTER_OPTIONS(kernel_pmap, addr + pg_offset, mem, + VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, + 0, TRUE, PMAP_OPTIONS_NOWAIT, pe_result); + + if (pe_result == KERN_RESOURCE_SHORTAGE) { + + vm_object_unlock(object); + + PMAP_ENTER(kernel_pmap, addr + pg_offset, mem, + VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE); + + vm_object_lock(object); + } + mem->busy = FALSE; + mem->pmapped = TRUE; + mem->wpmapped = TRUE; + mem->compressor = TRUE; + } + vm_object_unlock(object); + + return KERN_SUCCESS; + } + + for (i = 0; i < page_count; i++) { + for (;;) { + if (flags & KMA_LOMEM) + mem = vm_page_grablo(); + else + mem = vm_page_grab(); + + if (mem != VM_PAGE_NULL) + break; + + if (flags & KMA_NOPAGEWAIT) { + kr = KERN_RESOURCE_SHORTAGE; + goto out; + } + if ((flags & KMA_LOMEM) && + (vm_lopage_needed == TRUE)) { + kr = KERN_RESOURCE_SHORTAGE; + goto out; + } + VM_PAGE_WAIT(); + } + mem->pageq.next = (queue_entry_t) page_list; + page_list = mem; + } + if (flags & KMA_KOBJECT) { + offset = addr; + object = kernel_object; + + vm_object_lock(object); + } else { + /* + * If it's not the kernel object, we need to: + * lock map; + * lookup entry; + * lock object; + * take reference on object; + * unlock map; + */ + panic("kernel_memory_populate(%p,0x%llx,0x%llx,0x%x): " + "!KMA_KOBJECT", + map, (uint64_t) addr, (uint64_t) size, flags); + } + + for (pg_offset = 0; + pg_offset < size; + pg_offset += PAGE_SIZE_64) { + + if (page_list == NULL) + panic("kernel_memory_populate: page_list == NULL"); + + mem = page_list; + page_list = (vm_page_t) mem->pageq.next; + mem->pageq.next = NULL; + + mem->wire_count++; + + vm_page_insert(mem, object, offset + pg_offset); + + mem->busy = FALSE; + mem->pmapped = TRUE; + mem->wpmapped = TRUE; + + PMAP_ENTER_OPTIONS(kernel_pmap, addr + pg_offset, mem, + VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, + ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE, + PMAP_OPTIONS_NOWAIT, pe_result); + + if (pe_result == KERN_RESOURCE_SHORTAGE) { + + vm_object_unlock(object); + + PMAP_ENTER(kernel_pmap, addr + pg_offset, mem, + VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, + ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE); + + vm_object_lock(object); + } + if (flags & KMA_NOENCRYPT) { + bzero(CAST_DOWN(void *, (addr + pg_offset)), PAGE_SIZE); + pmap_set_noencrypt(mem->phys_page); + } + } + vm_page_lock_queues(); + vm_page_wire_count += page_count; + vm_page_unlock_queues(); + + vm_object_unlock(object); + + return KERN_SUCCESS; + +out: + if (page_list) + vm_page_free_list(page_list, FALSE); + + return kr; +} + + +void +kernel_memory_depopulate( + vm_map_t map, + vm_offset_t addr, + vm_size_t size, + int flags) +{ + vm_object_t object; + vm_object_offset_t offset, pg_offset; + vm_page_t mem; + vm_page_t local_freeq = NULL; + + assert((flags & (KMA_COMPRESSOR|KMA_KOBJECT)) != (KMA_COMPRESSOR|KMA_KOBJECT)); + + if (flags & KMA_COMPRESSOR) { + offset = addr; + object = compressor_object; + + vm_object_lock(object); + } else if (flags & KMA_KOBJECT) { + offset = addr; + object = kernel_object; + + vm_object_lock(object); + } else { + offset = 0; + object = NULL; + /* + * If it's not the kernel object, we need to: + * lock map; + * lookup entry; + * lock object; + * unlock map; + */ + panic("kernel_memory_depopulate(%p,0x%llx,0x%llx,0x%x): " + "!KMA_KOBJECT", + map, (uint64_t) addr, (uint64_t) size, flags); + } + pmap_protect(kernel_map->pmap, offset, offset + size, VM_PROT_NONE); + + for (pg_offset = 0; + pg_offset < size; + pg_offset += PAGE_SIZE_64) { + + mem = vm_page_lookup(object, offset + pg_offset); + + assert(mem); + + pmap_disconnect(mem->phys_page); + + mem->busy = TRUE; + + assert(mem->tabled); + vm_page_remove(mem, TRUE); + assert(mem->busy); + + assert(mem->pageq.next == NULL && + mem->pageq.prev == NULL); + mem->pageq.next = (queue_entry_t)local_freeq; + local_freeq = mem; + } + vm_object_unlock(object); + + if (local_freeq) + vm_page_free_list(local_freeq, TRUE); +} + /* * kmem_alloc: * @@ -529,10 +803,13 @@ kmem_realloc( vm_page_t mem; kern_return_t kr; - oldmapmin = vm_map_trunc_page(oldaddr); - oldmapmax = vm_map_round_page(oldaddr + oldsize); + oldmapmin = vm_map_trunc_page(oldaddr, + VM_MAP_PAGE_MASK(map)); + oldmapmax = vm_map_round_page(oldaddr + oldsize, + VM_MAP_PAGE_MASK(map)); oldmapsize = oldmapmax - oldmapmin; - newmapsize = vm_map_round_page(newsize); + newmapsize = vm_map_round_page(newsize, + VM_MAP_PAGE_MASK(map)); /* @@ -674,7 +951,8 @@ kmem_alloc_pageable( #else map_addr = vm_map_min(map); #endif - map_size = vm_map_round_page(size); + map_size = vm_map_round_page(size, + VM_MAP_PAGE_MASK(map)); kr = vm_map_enter(map, &map_addr, map_size, (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE, @@ -715,9 +993,12 @@ kmem_free( return; } - kr = vm_map_remove(map, vm_map_trunc_page(addr), - vm_map_round_page(addr + size), - VM_MAP_REMOVE_KUNWIRE); + kr = vm_map_remove(map, + vm_map_trunc_page(addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(addr + size, + VM_MAP_PAGE_MASK(map)), + VM_MAP_REMOVE_KUNWIRE); if (kr != KERN_SUCCESS) panic("kmem_free"); } @@ -778,8 +1059,10 @@ kmem_remap_pages( /* * Mark the pmap region as not pageable. */ - map_start = vm_map_trunc_page(start); - map_end = vm_map_round_page(end); + map_start = vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(kernel_map)); + map_end = vm_map_round_page(end, + VM_MAP_PAGE_MASK(kernel_map)); pmap_pageable(kernel_pmap, map_start, map_end, FALSE); @@ -854,7 +1137,8 @@ kmem_suballoc( vm_map_size_t map_size; kern_return_t kr; - map_size = vm_map_round_page(size); + map_size = vm_map_round_page(size, + VM_MAP_PAGE_MASK(parent)); /* * Need reference on submap object because it is internal @@ -863,8 +1147,10 @@ kmem_suballoc( */ vm_object_reference(vm_submap_object); - map_addr = (flags & VM_FLAGS_ANYWHERE) ? - vm_map_min(parent) : vm_map_trunc_page(*addr); + map_addr = ((flags & VM_FLAGS_ANYWHERE) + ? vm_map_min(parent) + : vm_map_trunc_page(*addr, + VM_MAP_PAGE_MASK(parent))); kr = vm_map_enter(parent, &map_addr, map_size, (vm_map_offset_t) 0, flags, @@ -879,6 +1165,8 @@ kmem_suballoc( map = vm_map_create(vm_map_pmap(parent), map_addr, map_addr + map_size, pageable); if (map == VM_MAP_NULL) panic("kmem_suballoc: vm_map_create failed"); /* "can't happen" */ + /* inherit the parent map's page size */ + vm_map_set_page_shift(map, VM_MAP_PAGE_SHIFT(parent)); kr = vm_map_submap(parent, map_addr, map_addr + map_size, map, map_addr, FALSE); if (kr != KERN_SUCCESS) { @@ -909,8 +1197,10 @@ kmem_init( vm_map_offset_t map_start; vm_map_offset_t map_end; - map_start = vm_map_trunc_page(start); - map_end = vm_map_round_page(end); + map_start = vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(kernel_map)); + map_end = vm_map_round_page(end, + VM_MAP_PAGE_MASK(kernel_map)); kernel_map = vm_map_create(pmap_kernel(),VM_MIN_KERNEL_AND_KEXT_ADDRESS, map_end, FALSE); diff --git a/osfmk/vm/vm_kern.h b/osfmk/vm/vm_kern.h index c4c8696d2..6fd232e46 100644 --- a/osfmk/vm/vm_kern.h +++ b/osfmk/vm/vm_kern.h @@ -89,6 +89,8 @@ extern kern_return_t kernel_memory_allocate( #define KMA_PERMANENT 0x40 #define KMA_NOENCRYPT 0x80 #define KMA_KSTACK 0x100 +#define KMA_VAONLY 0x200 +#define KMA_COMPRESSOR 0x400 /* Pages belonging to the compressor are not on the paging queues, nor are they counted as wired. */ extern kern_return_t kmem_alloc_contig( vm_map_t map, @@ -140,11 +142,22 @@ extern kern_return_t kmem_alloc_kobject( vm_offset_t *addrp, vm_size_t size); +extern kern_return_t kernel_memory_populate( + vm_map_t map, + vm_offset_t addr, + vm_size_t size, + int flags); +extern void kernel_memory_depopulate( + vm_map_t map, + vm_offset_t addr, + vm_size_t size, + int flags); + #ifdef MACH_KERNEL_PRIVATE extern void kmem_init( vm_offset_t start, - vm_offset_t end) __attribute__((section("__TEXT, initcode"))); + vm_offset_t end); extern kern_return_t copyinmap( diff --git a/osfmk/vm/vm_map.c b/osfmk/vm/vm_map.c index 0dbde8941..315ad3387 100644 --- a/osfmk/vm/vm_map.c +++ b/osfmk/vm/vm_map.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -84,6 +84,7 @@ #include #include +#include #include #include #include @@ -160,7 +161,8 @@ static kern_return_t vm_map_copy_overwrite_unaligned( vm_map_t dst_map, vm_map_entry_t entry, vm_map_copy_t copy, - vm_map_address_t start); + vm_map_address_t start, + boolean_t discard_on_success); static kern_return_t vm_map_copy_overwrite_aligned( vm_map_t dst_map, @@ -180,7 +182,8 @@ static kern_return_t vm_map_copyout_kernel_buffer( vm_map_t map, vm_map_address_t *addr, /* IN/OUT */ vm_map_copy_t copy, - boolean_t overwrite); + boolean_t overwrite, + boolean_t consume_on_success); static void vm_map_fork_share( vm_map_t old_map, @@ -203,7 +206,8 @@ void vm_map_region_walk( vm_object_offset_t offset, vm_object_size_t range, vm_region_extended_info_t extended, - boolean_t look_for_pages); + boolean_t look_for_pages, + mach_msg_type_number_t count); static kern_return_t vm_map_wire_nested( vm_map_t map, @@ -261,7 +265,8 @@ static void vm_map_region_look_for_page( vm_object_offset_t offset, int max_refcnt, int depth, - vm_region_extended_info_t extended); + vm_region_extended_info_t extended, + mach_msg_type_number_t count); static int vm_map_region_count_obj_refs( vm_map_entry_t entry, @@ -441,11 +446,7 @@ static vm_size_t map_data_size; static void *kentry_data; static vm_size_t kentry_data_size; -#if CONFIG_EMBEDDED -#define NO_COALESCE_LIMIT 0 -#else #define NO_COALESCE_LIMIT ((1024 * 128) - 1) -#endif /* Skip acquiring locks if we're in the midst of a kernel core dump */ unsigned int not_in_kdp = 1; @@ -737,6 +738,8 @@ vm_map_create( vm_map_store_init( &(result->hdr) ); + result->hdr.page_shift = PAGE_SHIFT; + result->size = 0; result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */ result->user_wire_size = 0; @@ -812,7 +815,9 @@ _vm_map_entry_create( vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE); #if MAP_ENTRY_CREATION_DEBUG - fastbacktrace(&entry->vme_bt[0], (sizeof(entry->vme_bt)/sizeof(uintptr_t))); + entry->vme_creation_maphdr = map_header; + fastbacktrace(&entry->vme_creation_bt[0], + (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t))); #endif return(entry); } @@ -1213,7 +1218,7 @@ vm_map_find_space( if (flags & VM_FLAGS_GUARD_AFTER) { /* account for the back guard page in the size */ - size += PAGE_SIZE_64; + size += VM_MAP_PAGE_SIZE(map); } new_entry = vm_map_entry_create(map, FALSE); @@ -1251,7 +1256,7 @@ vm_map_find_space( if (flags & VM_FLAGS_GUARD_BEFORE) { /* reserve space for the front guard page */ - start += PAGE_SIZE_64; + start += VM_MAP_PAGE_SIZE(map); } end = ((start + mask) & ~mask); @@ -1305,7 +1310,7 @@ vm_map_find_space( if (flags & VM_FLAGS_GUARD_BEFORE) { /* go back for the front guard page */ - start -= PAGE_SIZE_64; + start -= VM_MAP_PAGE_SIZE(map); } *address = start; @@ -1314,6 +1319,10 @@ vm_map_find_space( new_entry->vme_end = end; assert(page_aligned(new_entry->vme_start)); assert(page_aligned(new_entry->vme_end)); + assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start, + VM_MAP_PAGE_MASK(map))); + assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end, + VM_MAP_PAGE_MASK(map))); new_entry->is_shared = FALSE; new_entry->is_sub_map = FALSE; @@ -1334,7 +1343,12 @@ vm_map_find_space( new_entry->needs_wakeup = FALSE; new_entry->no_cache = FALSE; new_entry->permanent = FALSE; - new_entry->superpage_size = 0; + new_entry->superpage_size = FALSE; + if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) { + new_entry->map_aligned = TRUE; + } else { + new_entry->map_aligned = FALSE; + } new_entry->used_for_jit = 0; @@ -1485,8 +1499,9 @@ vm_map_random_address_for_size( while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) { random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT; - random_addr = trunc_page(vm_map_min(map) + - (random_addr % addr_space_size)); + random_addr = vm_map_trunc_page( + vm_map_min(map) +(random_addr % addr_space_size), + VM_MAP_PAGE_MASK(map)); if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) { if (prev_entry == vm_map_to_entry(map)) { @@ -1563,6 +1578,7 @@ vm_map_enter( char alias; vm_map_offset_t effective_min_offset, effective_max_offset; kern_return_t kr; + boolean_t clear_map_aligned = FALSE; if (superpage_size) { switch (superpage_size) { @@ -1590,14 +1606,6 @@ vm_map_enter( } -#if CONFIG_EMBEDDED - if (cur_protection & VM_PROT_WRITE){ - if ((cur_protection & VM_PROT_EXECUTE) && !entry_for_jit){ - printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__); - cur_protection &= ~VM_PROT_EXECUTE; - } - } -#endif /* CONFIG_EMBEDDED */ if (is_submap) { if (purgable) { @@ -1650,6 +1658,18 @@ vm_map_enter( assert(page_aligned(*address)); assert(page_aligned(size)); + if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) { + /* + * In most cases, the caller rounds the size up to the + * map's page size. + * If we get a size that is explicitly not map-aligned here, + * we'll have to respect the caller's wish and mark the + * mapping as "not map-aligned" to avoid tripping the + * map alignment checks later. + */ + clear_map_aligned = TRUE; + } + /* * Only zero-fill objects are allowed to be purgable. * LP64todo - limit purgable objects to 32-bits for now @@ -1676,6 +1696,7 @@ vm_map_enter( *address, *address + size, map->hdr.entries_pageable); + vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map)); } StartAgain: ; @@ -1735,6 +1756,8 @@ StartAgain: ; } else { if (start < (entry->vme_next)->vme_start ) { start = entry->vme_end; + start = vm_map_round_page(start, + VM_MAP_PAGE_MASK(map)); } else { /* * Need to do a lookup. @@ -1749,6 +1772,8 @@ StartAgain: ; if (vm_map_lookup_entry(map, start, &tmp_entry)) { assert(!entry_for_jit); start = tmp_entry->vme_end; + start = vm_map_round_page(start, + VM_MAP_PAGE_MASK(map)); } entry = tmp_entry; } @@ -1770,9 +1795,13 @@ StartAgain: ; */ end = ((start + mask) & ~mask); + end = vm_map_round_page(end, + VM_MAP_PAGE_MASK(map)); if (end < start) RETURN(KERN_NO_SPACE); start = end; + assert(VM_MAP_PAGE_ALIGNED(start, + VM_MAP_PAGE_MASK(map))); end += size; if ((end > effective_max_offset) || (end < start)) { @@ -1812,8 +1841,12 @@ StartAgain: ; entry = next; start = entry->vme_end; + start = vm_map_round_page(start, + VM_MAP_PAGE_MASK(map)); } *address = start; + assert(VM_MAP_PAGE_ALIGNED(*address, + VM_MAP_PAGE_MASK(map))); } else { /* * Verify that: @@ -1977,6 +2010,11 @@ StartAgain: ; (entry->behavior == VM_BEHAVIOR_DEFAULT) && (entry->in_transition == 0) && (entry->no_cache == no_cache) && + /* + * No coalescing if not map-aligned, to avoid propagating + * that condition any further than needed: + */ + (!entry->map_aligned || !clear_map_aligned) && ((entry->vme_end - entry->vme_start) + size <= (alias == VM_MEMORY_REALLOC ? ANON_CHUNK_SIZE : @@ -1996,6 +2034,8 @@ StartAgain: ; */ map->size += (end - entry->vme_end); assert(entry->vme_start < end); + assert(VM_MAP_PAGE_ALIGNED(end, + VM_MAP_PAGE_MASK(map))); entry->vme_end = end; vm_map_store_update_first_free(map, map->first_free); RETURN(KERN_SUCCESS); @@ -2038,7 +2078,9 @@ StartAgain: ; VM_BEHAVIOR_DEFAULT, (entry_for_jit)? VM_INHERIT_NONE: inheritance, 0, no_cache, - permanent, superpage_size); + permanent, + superpage_size, + clear_map_aligned); new_entry->alias = alias; if (entry_for_jit){ if (!(map->jit_entry_exists)){ @@ -2216,6 +2258,8 @@ BailOut: ; *address, *address + size, map->hdr.entries_pageable); + vm_map_set_page_shift(zap_new_map, + VM_MAP_PAGE_SHIFT(map)); if (!map_locked) { vm_map_lock(map); map_locked = TRUE; @@ -2322,6 +2366,7 @@ vm_map_enter_mem_object( vm_object_size_t size; kern_return_t result; boolean_t mask_cur_protection, mask_max_protection; + vm_map_offset_t offset_in_mapping; mask_cur_protection = cur_protection & VM_PROT_IS_MASK; mask_max_protection = max_protection & VM_PROT_IS_MASK; @@ -2338,9 +2383,11 @@ vm_map_enter_mem_object( initial_size == 0) return KERN_INVALID_ARGUMENT; - map_addr = vm_map_trunc_page(*address); - map_size = vm_map_round_page(initial_size); - size = vm_object_round_page(initial_size); + map_addr = vm_map_trunc_page(*address, + VM_MAP_PAGE_MASK(target_map)); + map_size = vm_map_round_page(initial_size, + VM_MAP_PAGE_MASK(target_map)); + size = vm_object_round_page(initial_size); /* * Find the vm object (if any) corresponding to this port. @@ -2353,6 +2400,11 @@ vm_map_enter_mem_object( vm_named_entry_t named_entry; named_entry = (vm_named_entry_t) port->ip_kobject; + + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { + offset += named_entry->data_offset; + } + /* a few checks to make sure user is obeying rules */ if (size == 0) { if (offset >= named_entry->size) @@ -2374,14 +2426,46 @@ vm_map_enter_mem_object( if (named_entry->size < (offset + size)) return KERN_INVALID_ARGUMENT; + if (named_entry->is_copy) { + /* for a vm_map_copy, we can only map it whole */ + if ((size != named_entry->size) && + (vm_map_round_page(size, + VM_MAP_PAGE_MASK(target_map)) == + named_entry->size)) { + /* XXX FBDP use the rounded size... */ + size = vm_map_round_page( + size, + VM_MAP_PAGE_MASK(target_map)); + } + + if (offset != 0 || + size != named_entry->size) { + return KERN_INVALID_ARGUMENT; + } + } + /* the callers parameter offset is defined to be the */ /* offset from beginning of named entry offset in object */ offset = offset + named_entry->offset; + if (! VM_MAP_PAGE_ALIGNED(size, + VM_MAP_PAGE_MASK(target_map))) { + /* + * Let's not map more than requested; + * vm_map_enter() will handle this "not map-aligned" + * case. + */ + map_size = size; + } + named_entry_lock(named_entry); if (named_entry->is_sub_map) { vm_map_t submap; + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { + panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap."); + } + submap = named_entry->backing.map; vm_map_lock(submap); vm_map_reference(submap); @@ -2436,6 +2520,10 @@ vm_map_enter_mem_object( protections = named_entry->protection & VM_PROT_ALL; access = GET_MAP_MEM(named_entry->protection); + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { + panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap."); + } + object = vm_object_enter(named_entry->backing.pager, named_entry->size, named_entry->internal, @@ -2488,6 +2576,147 @@ vm_map_enter_mem_object( if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; vm_object_unlock(object); + + } else if (named_entry->is_copy) { + kern_return_t kr; + vm_map_copy_t copy_map; + vm_map_entry_t copy_entry; + vm_map_offset_t copy_addr; + + if (flags & ~(VM_FLAGS_FIXED | + VM_FLAGS_ANYWHERE | + VM_FLAGS_OVERWRITE | + VM_FLAGS_RETURN_DATA_ADDR)) { + named_entry_unlock(named_entry); + return KERN_INVALID_ARGUMENT; + } + + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { + offset_in_mapping = offset - vm_object_trunc_page(offset); + offset = vm_object_trunc_page(offset); + map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset; + } + + copy_map = named_entry->backing.copy; + assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST); + if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) { + /* unsupported type; should not happen */ + printf("vm_map_enter_mem_object: " + "memory_entry->backing.copy " + "unsupported type 0x%x\n", + copy_map->type); + named_entry_unlock(named_entry); + return KERN_INVALID_ARGUMENT; + } + + /* reserve a contiguous range */ + kr = vm_map_enter(target_map, + &map_addr, + map_size, + mask, + flags & (VM_FLAGS_ANYWHERE | + VM_FLAGS_OVERWRITE | + VM_FLAGS_RETURN_DATA_ADDR), + VM_OBJECT_NULL, + 0, + FALSE, /* copy */ + cur_protection, + max_protection, + inheritance); + if (kr != KERN_SUCCESS) { + named_entry_unlock(named_entry); + return kr; + } + + copy_addr = map_addr; + + for (copy_entry = vm_map_copy_first_entry(copy_map); + copy_entry != vm_map_copy_to_entry(copy_map); + copy_entry = copy_entry->vme_next) { + int remap_flags = 0; + vm_map_t copy_submap; + vm_object_t copy_object; + vm_map_size_t copy_size; + vm_object_offset_t copy_offset; + + copy_offset = copy_entry->offset; + copy_size = (copy_entry->vme_end - + copy_entry->vme_start); + + /* sanity check */ + if (copy_addr + copy_size > + map_addr + map_size) { + /* over-mapping too much !? */ + kr = KERN_INVALID_ARGUMENT; + /* abort */ + break; + } + + /* take a reference on the object */ + if (copy_entry->is_sub_map) { + remap_flags |= VM_FLAGS_SUBMAP; + copy_submap = + copy_entry->object.sub_map; + vm_map_lock(copy_submap); + vm_map_reference(copy_submap); + vm_map_unlock(copy_submap); + copy_object = (vm_object_t) copy_submap; + } else { + copy_object = + copy_entry->object.vm_object; + vm_object_reference(copy_object); + } + + /* over-map the object into destination */ + remap_flags |= flags; + remap_flags |= VM_FLAGS_FIXED; + remap_flags |= VM_FLAGS_OVERWRITE; + remap_flags &= ~VM_FLAGS_ANYWHERE; + kr = vm_map_enter(target_map, + ©_addr, + copy_size, + (vm_map_offset_t) 0, + remap_flags, + copy_object, + copy_offset, + copy, + cur_protection, + max_protection, + inheritance); + if (kr != KERN_SUCCESS) { + if (copy_entry->is_sub_map) { + vm_map_deallocate(copy_submap); + } else { + vm_object_deallocate(copy_object); + } + /* abort */ + break; + } + + /* next mapping */ + copy_addr += copy_size; + } + + if (kr == KERN_SUCCESS) { + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { + *address = map_addr + offset_in_mapping; + } else { + *address = map_addr; + } + } + named_entry_unlock(named_entry); + + if (kr != KERN_SUCCESS) { + if (! (flags & VM_FLAGS_OVERWRITE)) { + /* deallocate the contiguous range */ + (void) vm_deallocate(target_map, + map_addr, + map_size); + } + } + + return kr; + } else { /* This is the case where we are going to map */ /* an already mapped object. If the object is */ @@ -2495,6 +2724,12 @@ vm_map_enter_mem_object( /* object cannot be mapped until it is ready */ /* we can therefore avoid the ready check */ /* in this case. */ + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { + offset_in_mapping = offset - vm_object_trunc_page(offset); + offset = vm_object_trunc_page(offset); + map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset; + } + object = named_entry->backing.object; assert(object != VM_OBJECT_NULL); named_entry_unlock(named_entry); @@ -2509,7 +2744,10 @@ vm_map_enter_mem_object( * this case, the port isn't really a port at all, but * instead is just a raw memory object. */ - + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { + panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object."); + } + object = vm_object_enter((memory_object_t)port, size, FALSE, FALSE, FALSE); if (object == VM_OBJECT_NULL) @@ -2641,7 +2879,12 @@ vm_map_enter_mem_object( cur_protection, max_protection, inheritance); if (result != KERN_SUCCESS) vm_object_deallocate(object); - *address = map_addr; + + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { + *address = map_addr + offset_in_mapping; + } else { + *address = map_addr; + } return result; } @@ -2681,8 +2924,10 @@ vm_map_enter_mem_object_control( initial_size == 0) return KERN_INVALID_ARGUMENT; - map_addr = vm_map_trunc_page(*address); - map_size = vm_map_round_page(initial_size); + map_addr = vm_map_trunc_page(*address, + VM_MAP_PAGE_MASK(target_map)); + map_size = vm_map_round_page(initial_size, + VM_MAP_PAGE_MASK(target_map)); size = vm_object_round_page(initial_size); object = memory_object_control_to_vm_object(control); @@ -2836,8 +3081,10 @@ vm_map_enter_cpm( if (anywhere) *addr = vm_map_min(map); else - *addr = vm_map_trunc_page(*addr); - size = vm_map_round_page(size); + *addr = vm_map_trunc_page(*addr, + VM_MAP_PAGE_MASK(map)); + size = vm_map_round_page(size, + VM_MAP_PAGE_MASK(map)); /* * LP64todo - cpm_allocate should probably allow @@ -3173,10 +3420,14 @@ _vm_map_clip_start( new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable); vm_map_entry_copy_full(new_entry, entry); + assert(VM_MAP_PAGE_ALIGNED(start, + VM_MAP_HDR_PAGE_MASK(map_header))); new_entry->vme_end = start; assert(new_entry->vme_start < new_entry->vme_end); entry->offset += (start - entry->vme_start); assert(start < entry->vme_end); + assert(VM_MAP_PAGE_ALIGNED(start, + VM_MAP_HDR_PAGE_MASK(map_header))); entry->vme_start = start; _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry); @@ -3267,6 +3518,8 @@ _vm_map_clip_end( vm_map_entry_copy_full(new_entry, entry); assert(entry->vme_start < end); + assert(VM_MAP_PAGE_ALIGNED(end, + VM_MAP_HDR_PAGE_MASK(map_header))); new_entry->vme_start = entry->vme_end = end; new_entry->offset += (end - entry->vme_start); assert(new_entry->vme_start < new_entry->vme_end); @@ -3552,14 +3805,6 @@ vm_map_protect( } } -#if CONFIG_EMBEDDED - if (new_prot & VM_PROT_WRITE) { - if ((new_prot & VM_PROT_EXECUTE) && !(current->used_for_jit)) { - printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__); - new_prot &= ~VM_PROT_EXECUTE; - } - } -#endif prev = current->vme_end; current = current->vme_next; @@ -3879,6 +4124,8 @@ vm_map_wire_nested( VM_MAP_RANGE_CHECK(map, start, end); assert(page_aligned(start)); assert(page_aligned(end)); + assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map))); + assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map))); if (start == end) { /* We wired what the caller asked for, zero pages */ vm_map_unlock(map); @@ -4370,6 +4617,8 @@ vm_map_unwire_nested( VM_MAP_RANGE_CHECK(map, start, end); assert(page_aligned(start)); assert(page_aligned(end)); + assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map))); + assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map))); if (start == end) { /* We unwired what the caller asked for: zero pages */ @@ -4708,6 +4957,10 @@ vm_map_entry_delete( e = entry->vme_end; assert(page_aligned(s)); assert(page_aligned(e)); + if (entry->map_aligned == TRUE) { + assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map))); + assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map))); + } assert(entry->wired_count == 0); assert(entry->user_wired_count == 0); assert(!entry->permanent); @@ -5114,6 +5367,15 @@ vm_map_delete( pmap, pmap_addr); } else { + if (tmp_entry.object.vm_object == kernel_object) { + pmap_protect_options( + map->pmap, + tmp_entry.vme_start, + tmp_entry.vme_end, + VM_PROT_NONE, + PMAP_OPTIONS_REMOVE, + NULL); + } vm_fault_unwire(map, &tmp_entry, tmp_entry.object.vm_object == kernel_object, map->pmap, tmp_entry.vme_start); @@ -5198,19 +5460,35 @@ vm_map_delete( entry->object.sub_map, entry->offset); } - } else if (entry->object.vm_object != kernel_object) { + } else if (entry->object.vm_object != kernel_object && + entry->object.vm_object != compressor_object) { object = entry->object.vm_object; - if((map->mapped_in_other_pmaps) && (map->ref_count)) { - vm_object_pmap_protect( + if ((map->mapped_in_other_pmaps) && (map->ref_count)) { + vm_object_pmap_protect_options( object, entry->offset, entry->vme_end - entry->vme_start, PMAP_NULL, entry->vme_start, - VM_PROT_NONE); - } else { - pmap_remove(map->pmap, - (addr64_t)entry->vme_start, - (addr64_t)entry->vme_end); + VM_PROT_NONE, + PMAP_OPTIONS_REMOVE); + } else if ((entry->object.vm_object != + VM_OBJECT_NULL) || + (map->pmap == kernel_pmap)) { + /* Remove translations associated + * with this range unless the entry + * does not have an object, or + * it's the kernel map or a descendant + * since the platform could potentially + * create "backdoor" mappings invisible + * to the VM. It is expected that + * objectless, non-kernel ranges + * do not have such VM invisible + * translations. + */ + pmap_remove_options(map->pmap, + (addr64_t)entry->vme_start, + (addr64_t)entry->vme_end, + PMAP_OPTIONS_REMOVE); } } @@ -5310,6 +5588,15 @@ vm_map_remove( vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); + /* + * For the zone_map, the kernel controls the allocation/freeing of memory. + * Any free to the zone_map should be within the bounds of the map and + * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a + * free to the zone_map into a no-op, there is a problem and we should + * panic. + */ + if ((map == zone_map) && (start == end)) + panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start); result = vm_map_delete(map, start, end, flags, VM_MAP_NULL); vm_map_unlock(map); @@ -5338,7 +5625,11 @@ vm_map_copy_discard( vm_map_entry_t entry = vm_map_copy_first_entry(copy); vm_map_copy_entry_unlink(copy, entry); - vm_object_deallocate(entry->object.vm_object); + if (entry->is_sub_map) { + vm_map_deallocate(entry->object.sub_map); + } else { + vm_object_deallocate(entry->object.vm_object); + } vm_map_copy_entry_dispose(copy, entry); } break; @@ -5437,7 +5728,8 @@ vm_map_overwrite_submap_recurse( * splitting entries in strange ways. */ - dst_end = vm_map_round_page(dst_addr + dst_size); + dst_end = vm_map_round_page(dst_addr + dst_size, + VM_MAP_PAGE_MASK(dst_map)); vm_map_lock(dst_map); start_pass_1: @@ -5446,7 +5738,10 @@ start_pass_1: return(KERN_INVALID_ADDRESS); } - vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr)); + vm_map_clip_start(dst_map, + tmp_entry, + vm_map_trunc_page(dst_addr, + VM_MAP_PAGE_MASK(dst_map))); assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */ for (entry = tmp_entry;;) { @@ -5642,7 +5937,7 @@ vm_map_copy_overwrite_nested( if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) { return(vm_map_copyout_kernel_buffer( dst_map, &dst_addr, - copy, TRUE)); + copy, TRUE, discard_on_success)); } /* @@ -5665,12 +5960,17 @@ vm_map_copy_overwrite_nested( * splitting entries in strange ways. */ - if (!page_aligned(copy->size) || - !page_aligned (copy->offset) || - !page_aligned (dst_addr)) + if (!VM_MAP_PAGE_ALIGNED(copy->size, + VM_MAP_PAGE_MASK(dst_map)) || + !VM_MAP_PAGE_ALIGNED(copy->offset, + VM_MAP_PAGE_MASK(dst_map)) || + !VM_MAP_PAGE_ALIGNED(dst_addr, + VM_MAP_PAGE_MASK(dst_map)) || + dst_map->hdr.page_shift != copy->cpy_hdr.page_shift) { aligned = FALSE; - dst_end = vm_map_round_page(dst_addr + copy->size); + dst_end = vm_map_round_page(dst_addr + copy->size, + VM_MAP_PAGE_MASK(dst_map)); } else { dst_end = dst_addr + copy->size; } @@ -5691,7 +5991,10 @@ start_pass_1: vm_map_unlock(dst_map); return(KERN_INVALID_ADDRESS); } - vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr)); + vm_map_clip_start(dst_map, + tmp_entry, + vm_map_trunc_page(dst_addr, + VM_MAP_PAGE_MASK(dst_map))); for (entry = tmp_entry;;) { vm_map_entry_t next = entry->vme_next; @@ -6153,8 +6456,13 @@ start_overwrite: * bits of the region in this case ! */ /* ALWAYS UNLOCKS THE dst_map MAP */ - if ((kr = vm_map_copy_overwrite_unaligned( dst_map, - tmp_entry, copy, base_addr)) != KERN_SUCCESS) { + kr = vm_map_copy_overwrite_unaligned( + dst_map, + tmp_entry, + copy, + base_addr, + discard_on_success); + if (kr != KERN_SUCCESS) { if(next_copy != NULL) { copy->cpy_hdr.nentries += remaining_entries; @@ -6194,7 +6502,10 @@ start_overwrite: break; } } - vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr)); + vm_map_clip_start(dst_map, + tmp_entry, + vm_map_trunc_page(base_addr, + VM_MAP_PAGE_MASK(dst_map))); entry = tmp_entry; } /* while */ @@ -6251,7 +6562,8 @@ vm_map_copy_overwrite( goto blunt_copy; } - if ((dst_addr & PAGE_MASK) != (copy->offset & PAGE_MASK)) { + if ((dst_addr & VM_MAP_PAGE_MASK(dst_map)) != + (copy->offset & VM_MAP_PAGE_MASK(dst_map))) { /* * Incompatible mis-alignment of source and destination... */ @@ -6265,7 +6577,8 @@ vm_map_copy_overwrite( */ if (!page_aligned(dst_addr)) { head_addr = dst_addr; - head_size = PAGE_SIZE - (copy->offset & PAGE_MASK); + head_size = (VM_MAP_PAGE_SIZE(dst_map) - + (copy->offset & VM_MAP_PAGE_MASK(dst_map))); } if (!page_aligned(copy->offset + copy->size)) { /* @@ -6273,7 +6586,8 @@ vm_map_copy_overwrite( * Do an aligned copy up to the last page and * then an unaligned copy for the remaining bytes. */ - tail_size = (copy->offset + copy->size) & PAGE_MASK; + tail_size = ((copy->offset + copy->size) & + VM_MAP_PAGE_MASK(dst_map)); tail_addr = dst_addr + copy->size - tail_size; } @@ -6476,9 +6790,11 @@ vm_map_copy_overwrite_unaligned( vm_map_t dst_map, vm_map_entry_t entry, vm_map_copy_t copy, - vm_map_offset_t start) + vm_map_offset_t start, + boolean_t discard_on_success) { - vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy); + vm_map_entry_t copy_entry; + vm_map_entry_t copy_entry_next; vm_map_version_t version; vm_object_t dst_object; vm_object_offset_t dst_offset; @@ -6491,6 +6807,9 @@ vm_map_copy_overwrite_unaligned( amount_left; kern_return_t kr = KERN_SUCCESS; + + copy_entry = vm_map_copy_first_entry(copy); + vm_map_lock_write_to_read(dst_map); src_offset = copy->offset - vm_object_trunc_page(copy->offset); @@ -6608,17 +6927,26 @@ vm_map_copy_overwrite_unaligned( /* * all done with this copy entry, dispose. */ - vm_map_copy_entry_unlink(copy, copy_entry); - vm_object_deallocate(copy_entry->object.vm_object); - vm_map_copy_entry_dispose(copy, copy_entry); + copy_entry_next = copy_entry->vme_next; + + if (discard_on_success) { + vm_map_copy_entry_unlink(copy, copy_entry); + assert(!copy_entry->is_sub_map); + vm_object_deallocate( + copy_entry->object.vm_object); + vm_map_copy_entry_dispose(copy, copy_entry); + } - if ((copy_entry = vm_map_copy_first_entry(copy)) - == vm_map_copy_to_entry(copy) && amount_left) { + if (copy_entry_next == vm_map_copy_to_entry(copy) && + amount_left) { /* * not finished copying but run out of source */ return KERN_INVALID_ADDRESS; } + + copy_entry = copy_entry_next; + src_offset = 0; } @@ -6788,7 +7116,6 @@ vm_map_copy_overwrite_aligned( continue; } -#if !CONFIG_EMBEDDED #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */ #define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */ if (copy_entry->object.vm_object != VM_OBJECT_NULL && @@ -6806,7 +7133,6 @@ vm_map_copy_overwrite_aligned( vm_map_copy_overwrite_aligned_src_large++; goto slow_copy; } -#endif /* !CONFIG_EMBEDDED */ if (entry->alias >= VM_MEMORY_MALLOC && entry->alias <= VM_MEMORY_MALLOC_LARGE_REUSED) { @@ -6821,10 +7147,8 @@ vm_map_copy_overwrite_aligned( vm_object_lock_shared(new_object); } while (new_object != VM_OBJECT_NULL && -#if !CONFIG_EMBEDDED !new_object->true_share && new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC && -#endif /* !CONFIG_EMBEDDED */ new_object->internal) { new_shadow = new_object->shadow; if (new_shadow == VM_OBJECT_NULL) { @@ -6849,7 +7173,6 @@ vm_map_copy_overwrite_aligned( vm_object_unlock(new_object); goto slow_copy; } -#if !CONFIG_EMBEDDED if (new_object->true_share || new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) { /* @@ -6862,7 +7185,6 @@ vm_map_copy_overwrite_aligned( vm_object_unlock(new_object); goto slow_copy; } -#endif /* !CONFIG_EMBEDDED */ vm_object_unlock(new_object); } /* @@ -6901,18 +7223,21 @@ vm_map_copy_overwrite_aligned( entry->object.sub_map); } else { if(dst_map->mapped_in_other_pmaps) { - vm_object_pmap_protect( + vm_object_pmap_protect_options( entry->object.vm_object, entry->offset, entry->vme_end - entry->vme_start, PMAP_NULL, entry->vme_start, - VM_PROT_NONE); + VM_PROT_NONE, + PMAP_OPTIONS_REMOVE); } else { - pmap_remove(dst_map->pmap, - (addr64_t)(entry->vme_start), - (addr64_t)(entry->vme_end)); + pmap_remove_options( + dst_map->pmap, + (addr64_t)(entry->vme_start), + (addr64_t)(entry->vme_end), + PMAP_OPTIONS_REMOVE); } vm_object_deallocate(old_object); } @@ -7115,12 +7440,15 @@ vm_map_copyin_kernel_buffer( return kr; } if (src_destroy) { - (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr), - vm_map_round_page(src_addr + len), - VM_MAP_REMOVE_INTERRUPTIBLE | - VM_MAP_REMOVE_WAIT_FOR_KWIRE | - (src_map == kernel_map) ? - VM_MAP_REMOVE_KUNWIRE : 0); + (void) vm_map_remove( + src_map, + vm_map_trunc_page(src_addr, + VM_MAP_PAGE_MASK(src_map)), + vm_map_round_page(src_addr + len, + VM_MAP_PAGE_MASK(src_map)), + (VM_MAP_REMOVE_INTERRUPTIBLE | + VM_MAP_REMOVE_WAIT_FOR_KWIRE | + (src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0)); } *copy_result = copy; return KERN_SUCCESS; @@ -7143,7 +7471,8 @@ vm_map_copyout_kernel_buffer( vm_map_t map, vm_map_address_t *addr, /* IN/OUT */ vm_map_copy_t copy, - boolean_t overwrite) + boolean_t overwrite, + boolean_t consume_on_success) { kern_return_t kr = KERN_SUCCESS; thread_t thread = current_thread(); @@ -7156,7 +7485,8 @@ vm_map_copyout_kernel_buffer( *addr = 0; kr = vm_map_enter(map, addr, - vm_map_round_page(copy->size), + vm_map_round_page(copy->size, + VM_MAP_PAGE_MASK(map)), (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE, VM_OBJECT_NULL, @@ -7210,16 +7540,22 @@ vm_map_copyout_kernel_buffer( /* * Deallocate the space we allocated in the target map. */ - (void) vm_map_remove(map, - vm_map_trunc_page(*addr), - vm_map_round_page(*addr + - vm_map_round_page(copy->size)), - VM_MAP_NO_FLAGS); + (void) vm_map_remove( + map, + vm_map_trunc_page(*addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page((*addr + + vm_map_round_page(copy->size, + VM_MAP_PAGE_MASK(map))), + VM_MAP_PAGE_MASK(map)), + VM_MAP_NO_FLAGS); *addr = 0; } } else { /* copy was successful, dicard the copy structure */ - kfree(copy, copy->cpy_kalloc_size); + if (consume_on_success) { + kfree(copy, copy->cpy_kalloc_size); + } } return kr; @@ -7242,6 +7578,49 @@ MACRO_BEGIN \ zfree(vm_map_copy_zone, copy); \ MACRO_END +void +vm_map_copy_remap( + vm_map_t map, + vm_map_entry_t where, + vm_map_copy_t copy, + vm_map_offset_t adjustment, + vm_prot_t cur_prot, + vm_prot_t max_prot, + vm_inherit_t inheritance) +{ + vm_map_entry_t copy_entry, new_entry; + + for (copy_entry = vm_map_copy_first_entry(copy); + copy_entry != vm_map_copy_to_entry(copy); + copy_entry = copy_entry->vme_next) { + /* get a new VM map entry for the map */ + new_entry = vm_map_entry_create(map, + !map->hdr.entries_pageable); + /* copy the "copy entry" to the new entry */ + vm_map_entry_copy(new_entry, copy_entry); + /* adjust "start" and "end" */ + new_entry->vme_start += adjustment; + new_entry->vme_end += adjustment; + /* clear some attributes */ + new_entry->inheritance = inheritance; + new_entry->protection = cur_prot; + new_entry->max_protection = max_prot; + new_entry->behavior = VM_BEHAVIOR_DEFAULT; + /* take an extra reference on the entry's "object" */ + if (new_entry->is_sub_map) { + vm_map_lock(new_entry->object.sub_map); + vm_map_reference(new_entry->object.sub_map); + vm_map_unlock(new_entry->object.sub_map); + } else { + vm_object_reference(new_entry->object.vm_object); + } + /* insert the new entry in the map */ + vm_map_store_entry_link(map, where, new_entry); + /* continue inserting the "copy entries" after the new entry */ + where = new_entry; + } +} + /* * Routine: vm_map_copyout * @@ -7252,18 +7631,35 @@ MACRO_END * If successful, consumes the copy object. * Otherwise, the caller is responsible for it. */ + kern_return_t vm_map_copyout( vm_map_t dst_map, vm_map_address_t *dst_addr, /* OUT */ vm_map_copy_t copy) +{ + return vm_map_copyout_internal(dst_map, dst_addr, copy, + TRUE, /* consume_on_success */ + VM_PROT_DEFAULT, + VM_PROT_ALL, + VM_INHERIT_DEFAULT); +} + +kern_return_t +vm_map_copyout_internal( + vm_map_t dst_map, + vm_map_address_t *dst_addr, /* OUT */ + vm_map_copy_t copy, + boolean_t consume_on_success, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) { vm_map_size_t size; vm_map_size_t adjustment; vm_map_offset_t start; vm_object_offset_t vm_copy_start; vm_map_entry_t last; - register vm_map_entry_t entry; /* @@ -7286,8 +7682,10 @@ vm_map_copyout( vm_object_offset_t offset; offset = vm_object_trunc_page(copy->offset); - size = vm_map_round_page(copy->size + - (vm_map_size_t)(copy->offset - offset)); + size = vm_map_round_page((copy->size + + (vm_map_size_t)(copy->offset - + offset)), + VM_MAP_PAGE_MASK(dst_map)); *dst_addr = 0; kr = vm_map_enter(dst_map, dst_addr, size, (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE, @@ -7298,7 +7696,8 @@ vm_map_copyout( return(kr); /* Account for non-pagealigned copy object */ *dst_addr += (vm_map_offset_t)(copy->offset - offset); - zfree(vm_map_copy_zone, copy); + if (consume_on_success) + zfree(vm_map_copy_zone, copy); return(KERN_SUCCESS); } @@ -7308,18 +7707,23 @@ vm_map_copyout( */ if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) { - return(vm_map_copyout_kernel_buffer(dst_map, dst_addr, - copy, FALSE)); + return vm_map_copyout_kernel_buffer(dst_map, dst_addr, + copy, FALSE, + consume_on_success); } + /* * Find space for the data */ - vm_copy_start = vm_object_trunc_page(copy->offset); - size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size) + vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset, + VM_MAP_COPY_PAGE_MASK(copy)); + size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size, + VM_MAP_COPY_PAGE_MASK(copy)) - vm_copy_start; + StartAgain: ; vm_map_lock(dst_map); @@ -7330,6 +7734,8 @@ StartAgain: ; assert(first_free_is_valid(dst_map)); start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ? vm_map_min(dst_map) : last->vme_end; + start = vm_map_round_page(start, + VM_MAP_PAGE_MASK(dst_map)); } while (TRUE) { @@ -7356,6 +7762,23 @@ StartAgain: ; last = next; start = last->vme_end; + start = vm_map_round_page(start, + VM_MAP_PAGE_MASK(dst_map)); + } + + adjustment = start - vm_copy_start; + if (! consume_on_success) { + /* + * We're not allowed to consume "copy", so we'll have to + * copy its map entries into the destination map below. + * No need to re-allocate map entries from the correct + * (pageable or not) zone, since we'll get new map entries + * during the transfer. + * We'll also adjust the map entries's "start" and "end" + * during the transfer, to keep "copy"'s entries consistent + * with its "offset". + */ + goto after_adjustments; } /* @@ -7407,13 +7830,29 @@ StartAgain: ; * reset the region attributes. */ - adjustment = start - vm_copy_start; for (entry = vm_map_copy_first_entry(copy); entry != vm_map_copy_to_entry(copy); entry = entry->vme_next) { + if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) { + /* + * We're injecting this copy entry into a map that + * has the standard page alignment, so clear + * "map_aligned" (which might have been inherited + * from the original map entry). + */ + entry->map_aligned = FALSE; + } + entry->vme_start += adjustment; entry->vme_end += adjustment; + if (entry->map_aligned) { + assert(VM_MAP_PAGE_ALIGNED(entry->vme_start, + VM_MAP_PAGE_MASK(dst_map))); + assert(VM_MAP_PAGE_ALIGNED(entry->vme_end, + VM_MAP_PAGE_MASK(dst_map))); + } + entry->inheritance = VM_INHERIT_DEFAULT; entry->protection = VM_PROT_DEFAULT; entry->max_protection = VM_PROT_ALL; @@ -7495,6 +7934,8 @@ StartAgain: ; } } +after_adjustments: + /* * Correct the page alignment for the result */ @@ -7505,7 +7946,11 @@ StartAgain: ; * Update the hints and the map size */ - SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy)); + if (consume_on_success) { + SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy)); + } else { + SAVE_HINT_MAP_WRITE(dst_map, last); + } dst_map->size += size; @@ -7513,7 +7958,13 @@ StartAgain: ; * Link in the copy */ - vm_map_copy_insert(dst_map, last, copy); + if (consume_on_success) { + vm_map_copy_insert(dst_map, last, copy); + } else { + vm_map_copy_remap(dst_map, last, copy, adjustment, + cur_protection, max_protection, + inheritance); + } vm_map_unlock(dst_map); @@ -7634,8 +8085,10 @@ vm_map_copyin_common( /* * Compute (page aligned) start and end of region */ - src_start = vm_map_trunc_page(src_addr); - src_end = vm_map_round_page(src_end); + src_start = vm_map_trunc_page(src_addr, + VM_MAP_PAGE_MASK(src_map)); + src_end = vm_map_round_page(src_end, + VM_MAP_PAGE_MASK(src_map)); XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0); @@ -7652,6 +8105,18 @@ vm_map_copyin_common( copy->type = VM_MAP_COPY_ENTRY_LIST; copy->cpy_hdr.nentries = 0; copy->cpy_hdr.entries_pageable = TRUE; +#if 00 + copy->cpy_hdr.page_shift = src_map->hdr.page_shift; +#else + /* + * The copy entries can be broken down for a variety of reasons, + * so we can't guarantee that they will remain map-aligned... + * Will need to adjust the first copy_entry's "vme_start" and + * the last copy_entry's "vme_end" to be rounded to PAGE_MASK + * rather than the original map's alignment. + */ + copy->cpy_hdr.page_shift = PAGE_SHIFT; +#endif vm_map_store_init( &(copy->cpy_hdr) ); @@ -7999,8 +8464,12 @@ vm_map_copyin_common( ((src_entry->max_protection & VM_PROT_READ) == 0)) goto VerificationFailed; - if (src_entry->vme_end < new_entry->vme_end) - src_size = (new_entry->vme_end = src_entry->vme_end) - src_start; + if (src_entry->vme_end < new_entry->vme_end) { + assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end, + VM_MAP_COPY_PAGE_MASK(copy))); + new_entry->vme_end = src_entry->vme_end; + src_size = new_entry->vme_end - src_start; + } if ((src_entry->object.vm_object != src_object) || (src_entry->offset != src_offset) ) { @@ -8087,8 +8556,44 @@ vm_map_copyin_common( tmp_entry = src_entry->vme_next; if ((tmp_entry->vme_start != src_start) || - (tmp_entry == vm_map_to_entry(src_map))) + (tmp_entry == vm_map_to_entry(src_map))) { + + if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT && + (vm_map_round_page(src_entry->vme_end, + VM_MAP_PAGE_MASK(src_map)) == + src_end)) { + vm_map_entry_t last_copy_entry; + vm_map_offset_t adjustment; + + /* + * This is the last entry in the range we + * want and it happens to miss a few pages + * because it is not map-aligned (must have + * been imported from a differently-aligned + * map). + * Let's say we're done, but first we have + * to compensate for the alignment adjustment + * we're about to do before returning. + */ + + last_copy_entry = vm_map_copy_last_entry(copy); + assert(last_copy_entry != + vm_map_copy_to_entry(copy)); + adjustment = + (vm_map_round_page((copy->offset + + copy->size), + VM_MAP_PAGE_MASK(src_map)) - + vm_map_round_page((copy->offset + + copy->size), + PAGE_MASK)); + last_copy_entry->vme_end += adjustment; + last_copy_entry->map_aligned = FALSE; + /* ... and we're done */ + break; + } + RETURN(KERN_INVALID_ADDRESS); + } } /* @@ -8096,28 +8601,89 @@ vm_map_copyin_common( * copy was successful. */ if (src_destroy) { - (void) vm_map_delete(src_map, - vm_map_trunc_page(src_addr), - src_end, - (src_map == kernel_map) ? - VM_MAP_REMOVE_KUNWIRE : - VM_MAP_NO_FLAGS, - VM_MAP_NULL); + (void) vm_map_delete( + src_map, + vm_map_trunc_page(src_addr, + VM_MAP_PAGE_MASK(src_map)), + src_end, + ((src_map == kernel_map) ? + VM_MAP_REMOVE_KUNWIRE : + VM_MAP_NO_FLAGS), + VM_MAP_NULL); } else { /* fix up the damage we did in the base map */ - vm_map_simplify_range(src_map, - vm_map_trunc_page(src_addr), - vm_map_round_page(src_end)); + vm_map_simplify_range( + src_map, + vm_map_trunc_page(src_addr, + VM_MAP_PAGE_MASK(src_map)), + vm_map_round_page(src_end, + VM_MAP_PAGE_MASK(src_map))); } vm_map_unlock(src_map); + if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) { + assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK); + + /* adjust alignment of first copy_entry's "vme_start" */ + tmp_entry = vm_map_copy_first_entry(copy); + if (tmp_entry != vm_map_copy_to_entry(copy)) { + vm_map_offset_t adjustment; + adjustment = + (vm_map_trunc_page(copy->offset, + PAGE_MASK) - + vm_map_trunc_page(copy->offset, + VM_MAP_PAGE_MASK(src_map))); + if (adjustment) { + assert(page_aligned(adjustment)); + assert(adjustment < VM_MAP_PAGE_SIZE(src_map)); + tmp_entry->vme_start += adjustment; + tmp_entry->offset += adjustment; + copy_addr += adjustment; + assert(tmp_entry->vme_start < tmp_entry->vme_end); + } + } + + /* adjust alignment of last copy_entry's "vme_end" */ + tmp_entry = vm_map_copy_last_entry(copy); + if (tmp_entry != vm_map_copy_to_entry(copy)) { + vm_map_offset_t adjustment; + adjustment = + (vm_map_round_page((copy->offset + + copy->size), + VM_MAP_PAGE_MASK(src_map)) - + vm_map_round_page((copy->offset + + copy->size), + PAGE_MASK)); + if (adjustment) { + assert(page_aligned(adjustment)); + assert(adjustment < VM_MAP_PAGE_SIZE(src_map)); + tmp_entry->vme_end -= adjustment; + assert(tmp_entry->vme_start < tmp_entry->vme_end); + } + } + } + /* Fix-up start and end points in copy. This is necessary */ /* when the various entries in the copy object were picked */ /* up from different sub-maps */ tmp_entry = vm_map_copy_first_entry(copy); while (tmp_entry != vm_map_copy_to_entry(copy)) { + assert(VM_MAP_PAGE_ALIGNED( + copy_addr + (tmp_entry->vme_end - + tmp_entry->vme_start), + VM_MAP_COPY_PAGE_MASK(copy))); + assert(VM_MAP_PAGE_ALIGNED( + copy_addr, + VM_MAP_COPY_PAGE_MASK(copy))); + + /* + * The copy_entries will be injected directly into the + * destination map and might not be "map aligned" there... + */ + tmp_entry->map_aligned = FALSE; + tmp_entry->vme_end = copy_addr + (tmp_entry->vme_end - tmp_entry->vme_start); tmp_entry->vme_start = copy_addr; @@ -8132,6 +8698,78 @@ vm_map_copyin_common( #undef RETURN } +kern_return_t +vm_map_copy_extract( + vm_map_t src_map, + vm_map_address_t src_addr, + vm_map_size_t len, + vm_map_copy_t *copy_result, /* OUT */ + vm_prot_t *cur_prot, /* OUT */ + vm_prot_t *max_prot) +{ + vm_map_offset_t src_start, src_end; + vm_map_copy_t copy; + kern_return_t kr; + + /* + * Check for copies of zero bytes. + */ + + if (len == 0) { + *copy_result = VM_MAP_COPY_NULL; + return(KERN_SUCCESS); + } + + /* + * Check that the end address doesn't overflow + */ + src_end = src_addr + len; + if (src_end < src_addr) + return KERN_INVALID_ADDRESS; + + /* + * Compute (page aligned) start and end of region + */ + src_start = vm_map_trunc_page(src_addr, PAGE_MASK); + src_end = vm_map_round_page(src_end, PAGE_MASK); + + /* + * Allocate a header element for the list. + * + * Use the start and end in the header to + * remember the endpoints prior to rounding. + */ + + copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); + vm_map_copy_first_entry(copy) = + vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy); + copy->type = VM_MAP_COPY_ENTRY_LIST; + copy->cpy_hdr.nentries = 0; + copy->cpy_hdr.entries_pageable = TRUE; + + vm_map_store_init(©->cpy_hdr); + + copy->offset = 0; + copy->size = len; + + kr = vm_map_remap_extract(src_map, + src_addr, + len, + FALSE, /* copy */ + ©->cpy_hdr, + cur_prot, + max_prot, + VM_INHERIT_SHARE, + TRUE); /* pageable */ + if (kr != KERN_SUCCESS) { + vm_map_copy_discard(copy); + return kr; + } + + *copy_result = copy; + return KERN_SUCCESS; +} + /* * vm_map_copyin_object: * @@ -8499,10 +9137,6 @@ vm_map_fork( #error Unknown architecture. #endif ); -#if defined(__i386__) - if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED) - pmap_set_4GB_pagezero(new_pmap); -#endif vm_map_reference_swap(old_map); vm_map_lock(old_map); @@ -8511,6 +9145,8 @@ vm_map_fork( old_map->min_offset, old_map->max_offset, old_map->hdr.entries_pageable); + /* inherit the parent map's page size */ + vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map)); for ( old_entry = vm_map_first_entry(old_map); old_entry != vm_map_to_entry(old_map); @@ -9211,6 +9847,7 @@ vm_map_region_recurse_64( vm_region_submap_info_64_t submap_info, /* IN/OUT */ mach_msg_type_number_t *count) /* IN/OUT */ { + mach_msg_type_number_t original_count; vm_region_extended_info_data_t extended; vm_map_entry_t tmp_entry; vm_map_offset_t user_address; @@ -9263,26 +9900,32 @@ vm_map_region_recurse_64( return KERN_INVALID_ARGUMENT; } - if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) { - if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) { - /* - * "info" structure is not big enough and - * would overflow - */ - return KERN_INVALID_ARGUMENT; - } else { - look_for_pages = FALSE; - *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64; - short_info = (vm_region_submap_short_info_64_t) submap_info; - submap_info = NULL; - } + + if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) { + /* + * "info" structure is not big enough and + * would overflow + */ + return KERN_INVALID_ARGUMENT; + } + + original_count = *count; + + if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) { + *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64; + look_for_pages = FALSE; + short_info = (vm_region_submap_short_info_64_t) submap_info; + submap_info = NULL; } else { look_for_pages = TRUE; - *count = VM_REGION_SUBMAP_INFO_COUNT_64; + *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64; short_info = NULL; + + if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) { + *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64; + } } - - + user_address = *address; user_max_depth = *nesting_depth; @@ -9494,7 +10137,7 @@ vm_map_region_recurse_64( // LP64todo: all the current tools are 32bit, obviously never worked for 64b // so probably should be a real 32b ID vs. ptr. // Current users just check for equality -#define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)p) +#define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p)) if (look_for_pages) { submap_info->user_tag = curr_entry->alias; @@ -9522,6 +10165,7 @@ vm_map_region_recurse_64( extended.pages_swapped_out = 0; extended.pages_shared_now_private = 0; extended.pages_dirtied = 0; + extended.pages_reusable = 0; extended.external_pager = 0; extended.shadow_depth = 0; @@ -9540,7 +10184,7 @@ vm_map_region_recurse_64( curr_entry->vme_start)), range_end - range_start, &extended, - look_for_pages); + look_for_pages, VM_REGION_EXTENDED_INFO_COUNT); if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED) { @@ -9567,6 +10211,10 @@ vm_map_region_recurse_64( submap_info->shadow_depth = extended.shadow_depth; submap_info->share_mode = extended.share_mode; submap_info->ref_count = extended.ref_count; + + if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) { + submap_info->pages_reusable = extended.pages_reusable; + } } else { short_info->external_pager = extended.external_pager; short_info->shadow_depth = extended.shadow_depth; @@ -9703,14 +10351,18 @@ vm_map_region( return(KERN_SUCCESS); } case VM_REGION_EXTENDED_INFO: - { - vm_region_extended_info_t extended; - if (*count < VM_REGION_EXTENDED_INFO_COUNT) return(KERN_INVALID_ARGUMENT); + /*fallthru*/ + case VM_REGION_EXTENDED_INFO__legacy: + if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) + return KERN_INVALID_ARGUMENT; + + { + vm_region_extended_info_t extended; + mach_msg_type_number_t original_count; extended = (vm_region_extended_info_t) info; - *count = VM_REGION_EXTENDED_INFO_COUNT; vm_map_lock_read(map); @@ -9734,7 +10386,15 @@ vm_map_region( extended->external_pager = 0; extended->shadow_depth = 0; - vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE); + original_count = *count; + if (flavor == VM_REGION_EXTENDED_INFO__legacy) { + *count = VM_REGION_EXTENDED_INFO_COUNT__legacy; + } else { + extended->pages_reusable = 0; + *count = VM_REGION_EXTENDED_INFO_COUNT; + } + + vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE, *count); if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) extended->share_mode = SM_PRIVATE; @@ -9859,9 +10519,9 @@ vm_map_region_top_walk( if (ref_count == 1 || (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) { top->share_mode = SM_PRIVATE; - top->private_pages_resident = - OBJ_RESIDENT_COUNT(obj, - entry_size); + top->private_pages_resident = + OBJ_RESIDENT_COUNT(obj, + entry_size); } else { top->share_mode = SM_SHARED; top->shared_pages_resident = @@ -9872,7 +10532,7 @@ vm_map_region_top_walk( top->ref_count = ref_count; } /* XXX K64: obj_id will be truncated */ - top->obj_id = (unsigned int) (uintptr_t)obj; + top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj); vm_object_unlock(obj); } @@ -9886,7 +10546,8 @@ vm_map_region_walk( vm_object_offset_t offset, vm_object_size_t range, vm_region_extended_info_t extended, - boolean_t look_for_pages) + boolean_t look_for_pages, + mach_msg_type_number_t count) { register struct vm_object *obj, *tmp_obj; register vm_map_offset_t last_offset; @@ -9925,10 +10586,11 @@ vm_map_region_walk( if (look_for_pages) { for (last_offset = offset + range; offset < last_offset; - offset += PAGE_SIZE_64, va += PAGE_SIZE) - vm_map_region_look_for_page(map, va, obj, - offset, ref_count, - 0, extended); + offset += PAGE_SIZE_64, va += PAGE_SIZE) { + vm_map_region_look_for_page(map, va, obj, + offset, ref_count, + 0, extended, count); + } } else { shadow_object = obj->shadow; shadow_depth = 0; @@ -10020,15 +10682,14 @@ vm_map_region_look_for_page( vm_object_offset_t offset, int max_refcnt, int depth, - vm_region_extended_info_t extended) + vm_region_extended_info_t extended, + mach_msg_type_number_t count) { register vm_page_t p; register vm_object_t shadow; register int ref_count; vm_object_t caller_object; -#if MACH_PAGEMAP kern_return_t kr; -#endif shadow = object->shadow; caller_object = object; @@ -10042,11 +10703,16 @@ vm_map_region_look_for_page( if (shadow && (max_refcnt == 1)) extended->pages_shared_now_private++; - if (!p->fictitious && + if (!p->fictitious && (p->dirty || pmap_is_modified(p->phys_page))) extended->pages_dirtied++; + else if (count >= VM_REGION_EXTENDED_INFO_COUNT) { + if (p->reusable || p->object->all_reusable) { + extended->pages_reusable++; + } + } - extended->pages_resident++; + extended->pages_resident++; if(object != caller_object) vm_object_unlock(object); @@ -10064,36 +10730,49 @@ vm_map_region_look_for_page( return; } - } else if (object->internal && - object->alive && - !object->terminating && - object->pager_ready) { - - memory_object_t pager; - - vm_object_paging_begin(object); - pager = object->pager; - vm_object_unlock(object); + } else +#endif /* MACH_PAGEMAP */ + if (object->internal && + object->alive && + !object->terminating && + object->pager_ready) { + + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + if (VM_COMPRESSOR_PAGER_STATE_GET(object, + offset) + == VM_EXTERNAL_STATE_EXISTS) { + /* the pager has that page */ + extended->pages_swapped_out++; + if (object != caller_object) + vm_object_unlock(object); + return; + } + } else { + memory_object_t pager; - kr = memory_object_data_request( - pager, - offset + object->paging_offset, - 0, /* just poke the pager */ - VM_PROT_READ, - NULL); + vm_object_paging_begin(object); + pager = object->pager; + vm_object_unlock(object); - vm_object_lock(object); - vm_object_paging_end(object); + kr = memory_object_data_request( + pager, + offset + object->paging_offset, + 0, /* just poke the pager */ + VM_PROT_READ, + NULL); - if (kr == KERN_SUCCESS) { - /* the pager has that page */ - extended->pages_swapped_out++; - if (object != caller_object) - vm_object_unlock(object); - return; + vm_object_lock(object); + vm_object_paging_end(object); + + if (kr == KERN_SUCCESS) { + /* the pager has that page */ + extended->pages_swapped_out++; + if (object != caller_object) + vm_object_unlock(object); + return; + } } } -#endif /* MACH_PAGEMAP */ if (shadow) { vm_object_lock(shadow); @@ -10192,6 +10871,7 @@ vm_map_simplify_entry( prev_entry->vme_start)) == this_entry->offset) && + (prev_entry->map_aligned == this_entry->map_aligned) && (prev_entry->inheritance == this_entry->inheritance) && (prev_entry->protection == this_entry->protection) && (prev_entry->max_protection == this_entry->max_protection) && @@ -10216,6 +10896,9 @@ vm_map_simplify_entry( ) { vm_map_store_entry_unlink(map, prev_entry); assert(prev_entry->vme_start < this_entry->vme_end); + if (prev_entry->map_aligned) + assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start, + VM_MAP_PAGE_MASK(map))); this_entry->vme_start = prev_entry->vme_start; this_entry->offset = prev_entry->offset; if (prev_entry->is_sub_map) { @@ -10262,8 +10945,10 @@ vm_map_simplify_range( return; } - start = vm_map_trunc_page(start); - end = vm_map_round_page(end); + start = vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)); + end = vm_map_round_page(end, + VM_MAP_PAGE_MASK(map)); if (!vm_map_lookup_entry(map, start, &entry)) { /* "start" is not mapped and "entry" ends before "start" */ @@ -10707,6 +11392,28 @@ vm_map_entry_is_reusable( { vm_object_t object; + switch (entry->alias) { + case VM_MEMORY_MALLOC: + case VM_MEMORY_MALLOC_SMALL: + case VM_MEMORY_MALLOC_LARGE: + case VM_MEMORY_REALLOC: + case VM_MEMORY_MALLOC_TINY: + case VM_MEMORY_MALLOC_LARGE_REUSABLE: + case VM_MEMORY_MALLOC_LARGE_REUSED: + /* + * This is a malloc() memory region: check if it's still + * in its original state and can be re-used for more + * malloc() allocations. + */ + break; + default: + /* + * Not a malloc() memory region: let the caller decide if + * it's re-usable. + */ + return TRUE; + } + if (entry->is_shared || entry->is_sub_map || entry->in_transition || @@ -10715,7 +11422,7 @@ vm_map_entry_is_reusable( entry->inheritance != VM_INHERIT_DEFAULT || entry->no_cache || entry->permanent || - entry->superpage_size != 0 || + entry->superpage_size != FALSE || entry->zero_wired_pages || entry->wired_count != 0 || entry->user_wired_count != 0) { @@ -10816,6 +11523,11 @@ vm_map_reuse_pages( object = entry->object.vm_object; if (object != VM_OBJECT_NULL) { + /* tell pmap to not count this range as "reusable" */ + pmap_reusable(map->pmap, + MAX(start, entry->vme_start), + MIN(end, entry->vme_end), + FALSE); vm_object_lock(object); vm_object_reuse_pages(object, start_offset, end_offset, TRUE); @@ -10911,6 +11623,11 @@ vm_map_reusable_pages( else kill_pages = -1; if (kill_pages != -1) { + /* tell pmap to count this range as "reusable" */ + pmap_reusable(map->pmap, + MAX(start, entry->vme_start), + MIN(end, entry->vme_end), + TRUE); vm_object_deactivate_pages(object, start_offset, end_offset - start_offset, @@ -11012,7 +11729,8 @@ vm_map_entry_insert( unsigned wired_count, boolean_t no_cache, boolean_t permanent, - unsigned int superpage_size) + unsigned int superpage_size, + boolean_t clear_map_aligned) { vm_map_entry_t new_entry; @@ -11020,10 +11738,26 @@ vm_map_entry_insert( new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable); + if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) { + new_entry->map_aligned = TRUE; + } else { + new_entry->map_aligned = FALSE; + } + if (clear_map_aligned && + ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map))) { + new_entry->map_aligned = FALSE; + } + new_entry->vme_start = start; new_entry->vme_end = end; assert(page_aligned(new_entry->vme_start)); assert(page_aligned(new_entry->vme_end)); + assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start, + VM_MAP_PAGE_MASK(map))); + if (new_entry->map_aligned) { + assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end, + VM_MAP_PAGE_MASK(map))); + } assert(new_entry->vme_start < new_entry->vme_end); new_entry->object.vm_object = object; @@ -11044,7 +11778,10 @@ vm_map_entry_insert( new_entry->zero_wired_pages = FALSE; new_entry->no_cache = no_cache; new_entry->permanent = permanent; - new_entry->superpage_size = superpage_size; + if (superpage_size) + new_entry->superpage_size = TRUE; + else + new_entry->superpage_size = FALSE; new_entry->used_for_jit = FALSE; /* @@ -11095,7 +11832,8 @@ vm_map_remap_extract( boolean_t new_entry_needs_copy; assert(map != VM_MAP_NULL); - assert(size != 0 && size == vm_map_round_page(size)); + assert(size != 0); + assert(size == vm_map_round_page(size, PAGE_MASK)); assert(inheritance == VM_INHERIT_NONE || inheritance == VM_INHERIT_COPY || inheritance == VM_INHERIT_SHARE); @@ -11103,8 +11841,9 @@ vm_map_remap_extract( /* * Compute start and end of region. */ - src_start = vm_map_trunc_page(addr); - src_end = vm_map_round_page(src_start + size); + src_start = vm_map_trunc_page(addr, PAGE_MASK); + src_end = vm_map_round_page(src_start + size, PAGE_MASK); + /* * Initialize map_header. @@ -11113,6 +11852,7 @@ vm_map_remap_extract( map_header->links.prev = (struct vm_map_entry *)&map_header->links; map_header->nentries = 0; map_header->entries_pageable = pageable; + map_header->page_shift = PAGE_SHIFT; vm_map_store_init( map_header ); @@ -11225,6 +11965,8 @@ vm_map_remap_extract( vm_map_entry_copy(new_entry, src_entry); new_entry->use_pmap = FALSE; /* clr address space specifics */ + new_entry->map_aligned = FALSE; + new_entry->vme_start = map_address; new_entry->vme_end = map_address + tmp_size; assert(new_entry->vme_start < new_entry->vme_end); @@ -11392,7 +12134,11 @@ vm_map_remap_extract( src_entry = new_entry) { new_entry = src_entry->vme_next; _vm_map_store_entry_unlink(map_header, src_entry); - vm_object_deallocate(src_entry->object.vm_object); + if (src_entry->is_sub_map) { + vm_map_deallocate(src_entry->object.sub_map); + } else { + vm_object_deallocate(src_entry->object.vm_object); + } _vm_map_entry_dispose(map_header, src_entry); } } @@ -11429,6 +12175,7 @@ vm_map_remap( vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL; vm_map_entry_t new_entry; struct vm_map_header map_header; + vm_map_offset_t offset_in_mapping; if (target_map == VM_MAP_NULL) return KERN_INVALID_ARGUMENT; @@ -11444,15 +12191,39 @@ vm_map_remap( return KERN_INVALID_ARGUMENT; } - size = vm_map_round_page(size); + /* + * If the user is requesting that we return the address of the + * first byte of the data (rather than the base of the page), + * then we use different rounding semantics: specifically, + * we assume that (memory_address, size) describes a region + * all of whose pages we must cover, rather than a base to be truncated + * down and a size to be added to that base. So we figure out + * the highest page that the requested region includes and make + * sure that the size will cover it. + * + * The key example we're worried about it is of the form: + * + * memory_address = 0x1ff0, size = 0x20 + * + * With the old semantics, we round down the memory_address to 0x1000 + * and round up the size to 0x1000, resulting in our covering *only* + * page 0x1000. With the new semantics, we'd realize that the region covers + * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page + * 0x1000 and page 0x2000 in the region we remap. + */ + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { + offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK); + size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK); + } else { + size = vm_map_round_page(size, PAGE_MASK); + } result = vm_map_remap_extract(src_map, memory_address, size, copy, &map_header, cur_protection, max_protection, inheritance, - target_map->hdr. - entries_pageable); + target_map->hdr.entries_pageable); if (result != KERN_SUCCESS) { return result; @@ -11462,7 +12233,8 @@ vm_map_remap( * Allocate/check a range of free virtual address * space for the target */ - *address = vm_map_trunc_page(*address); + *address = vm_map_trunc_page(*address, + VM_MAP_PAGE_MASK(target_map)); vm_map_lock(target_map); result = vm_map_remap_range_allocate(target_map, address, size, mask, flags, &insp_entry); @@ -11475,6 +12247,7 @@ vm_map_remap( if (result == KERN_SUCCESS) { entry->vme_start += *address; entry->vme_end += *address; + assert(!entry->map_aligned); vm_map_store_entry_link(target_map, insp_entry, entry); insp_entry = entry; } else { @@ -11502,6 +12275,15 @@ vm_map_remap( if (result == KERN_SUCCESS && target_map->wiring_required) result = vm_map_wire(target_map, *address, *address + size, *cur_protection, TRUE); + + /* + * If requested, return the address of the data pointed to by the + * request, rather than the base of the resulting page. + */ + if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { + *address += offset_in_mapping; + } + return result; } @@ -11564,6 +12346,8 @@ StartAgain: ; start = tmp_entry->vme_end; entry = tmp_entry; } + start = vm_map_round_page(start, + VM_MAP_PAGE_MASK(map)); } /* @@ -11582,6 +12366,8 @@ StartAgain: ; */ end = ((start + mask) & ~mask); + end = vm_map_round_page(end, + VM_MAP_PAGE_MASK(map)); if (end < start) return(KERN_NO_SPACE); start = end; @@ -11671,6 +12457,7 @@ StartAgain: ; if (zap_map == VM_MAP_NULL) { return KERN_RESOURCE_SHORTAGE; } + vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map)); kr = vm_map_delete(map, start, end, VM_MAP_REMOVE_SAVE_ENTRIES, @@ -11941,6 +12728,7 @@ vm_map_purgable_control( vm_object_lock(object); +#if 00 if (entry->offset != 0 || entry->vme_end - entry->vme_start != object->vo_size) { /* @@ -11951,6 +12739,7 @@ vm_map_purgable_control( vm_object_unlock(object); return KERN_INVALID_ARGUMENT; } +#endif vm_map_unlock_read(map); @@ -12005,7 +12794,6 @@ vm_map_page_info( boolean_t top_object; int disposition; int ref_count; - vm_object_id_t object_id; vm_page_info_basic_t basic_info; int depth; vm_map_offset_t offset_in_page; @@ -12028,13 +12816,12 @@ vm_map_page_info( disposition = 0; ref_count = 0; - object_id = 0; top_object = TRUE; depth = 0; retval = KERN_SUCCESS; offset_in_page = offset & PAGE_MASK; - offset = vm_map_trunc_page(offset); + offset = vm_map_trunc_page(offset, PAGE_MASK); vm_map_lock_read(map); @@ -12103,12 +12890,21 @@ vm_map_page_info( } } else #endif - { - if (object->internal && - object->alive && - !object->terminating && - object->pager_ready) { - + if (object->internal && + object->alive && + !object->terminating && + object->pager_ready) { + + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + if (VM_COMPRESSOR_PAGER_STATE_GET( + object, + offset) + == VM_EXTERNAL_STATE_EXISTS) { + /* the pager has that page */ + disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT; + break; + } + } else { memory_object_t pager; vm_object_paging_begin(object); @@ -12204,7 +13000,8 @@ done: basic_info = (vm_page_info_basic_t) info; basic_info->disposition = disposition; basic_info->ref_count = ref_count; - basic_info->object_id = (vm_object_id_t) (uintptr_t) object; + basic_info->object_id = (vm_object_id_t) (uintptr_t) + VM_KERNEL_ADDRPERM(object); basic_info->offset = (memory_object_offset_t) offset + offset_in_page; basic_info->depth = depth; @@ -12276,8 +13073,12 @@ vm_map_msync( /* * align address and size on page boundaries */ - size = vm_map_round_page(address + size) - vm_map_trunc_page(address); - address = vm_map_trunc_page(address); + size = (vm_map_round_page(address + size, + VM_MAP_PAGE_MASK(map)) - + vm_map_trunc_page(address, + VM_MAP_PAGE_MASK(map))); + address = vm_map_trunc_page(address, + VM_MAP_PAGE_MASK(map)); if (map == VM_MAP_NULL) return(KERN_INVALID_TASK); @@ -12294,7 +13095,10 @@ vm_map_msync( vm_map_lock(map); if (!vm_map_lookup_entry(map, - vm_map_trunc_page(address), &entry)) { + vm_map_trunc_page( + address, + VM_MAP_PAGE_MASK(map)), + &entry)) { vm_map_size_t skip; @@ -12464,6 +13268,8 @@ vm_map_msync( if (pager == MEMORY_OBJECT_NULL) { vm_object_unlock(object); vm_object_deallocate(object); + msync_req_free(new_msr); + new_msr = NULL; continue; } @@ -12607,45 +13413,35 @@ vm_object_t convert_port_entry_to_object( ipc_port_t port) { - vm_object_t object; + vm_object_t object = VM_OBJECT_NULL; vm_named_entry_t named_entry; - uint32_t try_failed_count = 0; - - if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) { - while(TRUE) { - ip_lock(port); - if(ip_active(port) && (ip_kotype(port) - == IKOT_NAMED_ENTRY)) { - named_entry = - (vm_named_entry_t)port->ip_kobject; - if (!(lck_mtx_try_lock(&(named_entry)->Lock))) { - ip_unlock(port); - - try_failed_count++; - mutex_pause(try_failed_count); - continue; - } - named_entry->ref_count++; - lck_mtx_unlock(&(named_entry)->Lock); + uint32_t try_failed_count = 0; + + if (IP_VALID(port) && + (ip_kotype(port) == IKOT_NAMED_ENTRY)) { + try_again: + ip_lock(port); + if (ip_active(port) && + (ip_kotype(port) == IKOT_NAMED_ENTRY)) { + named_entry = (vm_named_entry_t)port->ip_kobject; + if (!(lck_mtx_try_lock(&(named_entry)->Lock))) { ip_unlock(port); - if ((!named_entry->is_sub_map) && - (!named_entry->is_pager) && - (named_entry->protection - & VM_PROT_WRITE)) { - object = named_entry->backing.object; - } else { - mach_destroy_memory_entry(port); - return (vm_object_t)NULL; - } - vm_object_reference(named_entry->backing.object); - mach_destroy_memory_entry(port); - break; + try_failed_count++; + mutex_pause(try_failed_count); + goto try_again; + } + named_entry->ref_count++; + lck_mtx_unlock(&(named_entry)->Lock); + ip_unlock(port); + if (!(named_entry->is_sub_map) && + !(named_entry->is_pager) && + !(named_entry->is_copy) && + (named_entry->protection & VM_PROT_WRITE)) { + object = named_entry->backing.object; + vm_object_reference(object); } - else - return (vm_object_t)NULL; + mach_destroy_memory_entry(port); } - } else { - return (vm_object_t)NULL; } return object; @@ -12768,6 +13564,12 @@ vm_compute_max_offset(unsigned is64) return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS); } +uint64_t +vm_map_get_max_aslr_slide_pages(vm_map_t map) +{ + return (1 << (vm_map_is_64bit(map) ? 16 : 8)); +} + boolean_t vm_map_is_64bit( vm_map_t map) @@ -12796,22 +13598,14 @@ vm_map_has_hard_pagezero( void vm_map_set_4GB_pagezero(vm_map_t map) { -#if defined(__i386__) - pmap_set_4GB_pagezero(map->pmap); -#else #pragma unused(map) -#endif } void vm_map_clear_4GB_pagezero(vm_map_t map) { -#if defined(__i386__) - pmap_clear_4GB_pagezero(map->pmap); -#else #pragma unused(map) -#endif } /* @@ -12857,7 +13651,8 @@ vm_map_raise_min_offset( { vm_map_entry_t first_entry; - new_min_offset = vm_map_round_page(new_min_offset); + new_min_offset = vm_map_round_page(new_min_offset, + VM_MAP_PAGE_MASK(map)); vm_map_lock(map); @@ -12912,6 +13707,29 @@ void vm_map_switch_protect(vm_map_t map, vm_map_unlock(map); } +/* + * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately. + * phys_footprint is a composite limit consisting of iokit + physmem, so we need to + * bump both counters. + */ +void +vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes) +{ + pmap_t pmap = vm_map_pmap(map); + + ledger_credit(pmap->ledger, task_ledgers.iokit_mem, bytes); + ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes); +} + +void +vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes) +{ + pmap_t pmap = vm_map_pmap(map); + + ledger_debit(pmap->ledger, task_ledgers.iokit_mem, bytes); + ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes); +} + /* Add (generate) code signature for memory range */ #if CONFIG_DYNAMIC_CODE_SIGNING kern_return_t vm_map_sign(vm_map_t map, @@ -13068,8 +13886,9 @@ kern_return_t vm_map_freeze( unsigned int dirty_budget, boolean_t *has_shared) { - vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL; - kern_return_t kr = KERN_SUCCESS; + vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL; + kern_return_t kr = KERN_SUCCESS; + boolean_t default_freezer_active = TRUE; *purgeable_count = *wired_count = *clean_count = *dirty_count = 0; *has_shared = FALSE; @@ -13081,17 +13900,23 @@ kern_return_t vm_map_freeze( */ vm_map_lock(map); - if (map->default_freezer_handle == NULL) { - map->default_freezer_handle = default_freezer_handle_allocate(); + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + default_freezer_active = FALSE; } - if ((kr = default_freezer_handle_init(map->default_freezer_handle)) != KERN_SUCCESS) { - /* - * Can happen if default_freezer_handle passed in is NULL - * Or, a table has already been allocated and associated - * with this handle, i.e. the map is already frozen. - */ - goto done; + if (default_freezer_active) { + if (map->default_freezer_handle == NULL) { + map->default_freezer_handle = default_freezer_handle_allocate(); + } + + if ((kr = default_freezer_handle_init(map->default_freezer_handle)) != KERN_SUCCESS) { + /* + * Can happen if default_freezer_handle passed in is NULL + * Or, a table has already been allocated and associated + * with this handle, i.e. the map is already frozen. + */ + goto done; + } } for (entry2 = vm_map_first_entry(map); @@ -13100,35 +13925,46 @@ kern_return_t vm_map_freeze( vm_object_t src_object = entry2->object.vm_object; - /* If eligible, scan the entry, moving eligible pages over to our parent object */ if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) { - unsigned int purgeable, clean, dirty, wired; - boolean_t shared; - - default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, - src_object, map->default_freezer_handle); - - *purgeable_count += purgeable; - *wired_count += wired; - *clean_count += clean; - *dirty_count += dirty; + /* If eligible, scan the entry, moving eligible pages over to our parent object */ + if (default_freezer_active) { + unsigned int purgeable, clean, dirty, wired; + boolean_t shared; - /* Adjust pageout budget and finish up if reached */ - if (dirty_budget) { - dirty_budget -= dirty; - if (dirty_budget == 0) { - break; + default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, + src_object, map->default_freezer_handle); + + *purgeable_count += purgeable; + *wired_count += wired; + *clean_count += clean; + *dirty_count += dirty; + + /* Adjust pageout budget and finish up if reached */ + if (dirty_budget) { + dirty_budget -= dirty; + if (dirty_budget == 0) { + break; + } } - } - if (shared) { - *has_shared = TRUE; + if (shared) { + *has_shared = TRUE; + } + } else { + /* + * To the compressor. + */ + if (entry2->object.vm_object->internal == TRUE) { + vm_object_pageout(entry2->object.vm_object); + } } } } - /* Finally, throw out the pages to swap */ - default_freezer_pageout(map->default_freezer_handle); + if (default_freezer_active) { + /* Finally, throw out the pages to swap */ + default_freezer_pageout(map->default_freezer_handle); + } done: vm_map_unlock(map); @@ -13142,6 +13978,13 @@ vm_map_thaw( { kern_return_t kr = KERN_SUCCESS; + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + /* + * We will on-demand thaw in the presence of the compressed pager. + */ + return kr; + } + vm_map_lock(map); if (map->default_freezer_handle == NULL) { @@ -13152,7 +13995,7 @@ vm_map_thaw( goto out; } - default_freezer_unpack(map->default_freezer_handle); + kr = default_freezer_unpack(map->default_freezer_handle); out: vm_map_unlock(map); @@ -13160,7 +14003,6 @@ out: } #endif -#if !CONFIG_EMBEDDED /* * vm_map_entry_should_cow_for_true_share: * @@ -13240,4 +14082,124 @@ vm_map_entry_should_cow_for_true_share( */ return TRUE; } -#endif /* !CONFIG_EMBEDDED */ + +vm_map_offset_t +vm_map_round_page_mask( + vm_map_offset_t offset, + vm_map_offset_t mask) +{ + return VM_MAP_ROUND_PAGE(offset, mask); +} + +vm_map_offset_t +vm_map_trunc_page_mask( + vm_map_offset_t offset, + vm_map_offset_t mask) +{ + return VM_MAP_TRUNC_PAGE(offset, mask); +} + +int +vm_map_page_shift( + vm_map_t map) +{ + return VM_MAP_PAGE_SHIFT(map); +} + +int +vm_map_page_size( + vm_map_t map) +{ + return VM_MAP_PAGE_SIZE(map); +} + +int +vm_map_page_mask( + vm_map_t map) +{ + return VM_MAP_PAGE_MASK(map); +} + +kern_return_t +vm_map_set_page_shift( + vm_map_t map, + int pageshift) +{ + if (map->hdr.nentries != 0) { + /* too late to change page size */ + return KERN_FAILURE; + } + + map->hdr.page_shift = pageshift; + + return KERN_SUCCESS; +} + +kern_return_t +vm_map_query_volatile( + vm_map_t map, + mach_vm_size_t *volatile_virtual_size_p, + mach_vm_size_t *volatile_resident_size_p, + mach_vm_size_t *volatile_pmap_size_p) +{ + mach_vm_size_t volatile_virtual_size; + mach_vm_size_t volatile_resident_count; + mach_vm_size_t volatile_pmap_count; + mach_vm_size_t resident_count; + vm_map_entry_t entry; + vm_object_t object; + + /* map should be locked by caller */ + + volatile_virtual_size = 0; + volatile_resident_count = 0; + volatile_pmap_count = 0; + + for (entry = vm_map_first_entry(map); + entry != vm_map_to_entry(map); + entry = entry->vme_next) { + if (entry->is_sub_map) { + continue; + } + if (! (entry->protection & VM_PROT_WRITE)) { + continue; + } + object = entry->object.vm_object; + if (object == VM_OBJECT_NULL) { + continue; + } + if (object->purgable != VM_PURGABLE_VOLATILE) { + continue; + } + if (entry->offset != 0) { + /* + * If the map entry has been split and the object now + * appears several times in the VM map, we don't want + * to count the object's resident_page_count more than + * once. We count it only for the first one, starting + * at offset 0 and ignore the other VM map entries. + */ + continue; + } + resident_count = object->resident_page_count; + if ((entry->offset / PAGE_SIZE) >= resident_count) { + resident_count = 0; + } else { + resident_count -= (entry->offset / PAGE_SIZE); + } + + volatile_virtual_size += entry->vme_end - entry->vme_start; + volatile_resident_count += resident_count; + volatile_pmap_count += pmap_query_resident(map->pmap, + entry->vme_start, + entry->vme_end); + } + + /* map is still locked on return */ + + *volatile_virtual_size_p = volatile_virtual_size; + *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE; + *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE; + + return KERN_SUCCESS; +} diff --git a/osfmk/vm/vm_map.h b/osfmk/vm/vm_map.h index a89ad4d50..2a40d949d 100644 --- a/osfmk/vm/vm_map.h +++ b/osfmk/vm/vm_map.h @@ -171,15 +171,18 @@ struct vm_named_entry { vm_object_t object; /* object I point to */ memory_object_t pager; /* amo pager port */ vm_map_t map; /* map backing submap */ + vm_map_copy_t copy; /* a VM map copy */ } backing; vm_object_offset_t offset; /* offset into object */ vm_object_size_t size; /* size of region */ + vm_object_offset_t data_offset; /* offset to first byte of data */ vm_prot_t protection; /* access permissions */ int ref_count; /* Number of references */ unsigned int /* Is backing.xxx : */ /* boolean_t */ internal:1, /* ... an internal object */ /* boolean_t */ is_sub_map:1, /* ... a submap? */ - /* boolean_t */ is_pager:1; /* ... a pager port */ + /* boolean_t */ is_pager:1, /* ... a pager port */ + /* boolean_t */ is_copy:1; /* ... a VM map copy */ }; /* @@ -235,18 +238,25 @@ struct vm_map_entry { /* unsigned char */ alias:8, /* user alias */ /* boolean_t */ no_cache:1, /* should new pages be cached? */ /* boolean_t */ permanent:1, /* mapping can not be removed */ - /* boolean_t */ superpage_size:3,/* use superpages of a certain size */ + /* boolean_t */ superpage_size:1,/* use superpages of a certain size */ + /* boolean_t */ map_aligned:1, /* align to map's page size */ /* boolean_t */ zero_wired_pages:1, /* zero out the wired pages of this entry it is being deleted without unwiring them */ /* boolean_t */ used_for_jit:1, - /* boolean_t */ from_reserved_zone:1; /* Allocated from + /* boolean_t */ from_reserved_zone:1, /* Allocated from * kernel reserved zone */ + __unused_bits:1; unsigned short wired_count; /* can be paged if = 0 */ unsigned short user_wired_count; /* for vm_wire */ #if DEBUG #define MAP_ENTRY_CREATION_DEBUG (1) +#define MAP_ENTRY_INSERTION_DEBUG (1) #endif #if MAP_ENTRY_CREATION_DEBUG - uintptr_t vme_bt[16]; + struct vm_map_header *vme_creation_maphdr; + uintptr_t vme_creation_bt[16]; +#endif +#if MAP_ENTRY_INSERTION_DEBUG + uintptr_t vme_insertion_bt[16]; #endif }; @@ -284,8 +294,13 @@ struct vm_map_header { #ifdef VM_MAP_STORE_USE_RB struct rb_head rb_head_store; #endif + int page_shift; /* page shift */ }; +#define VM_MAP_HDR_PAGE_SHIFT(hdr) ((hdr)->page_shift) +#define VM_MAP_HDR_PAGE_SIZE(hdr) (1 << VM_MAP_HDR_PAGE_SHIFT((hdr))) +#define VM_MAP_HDR_PAGE_MASK(hdr) (VM_MAP_HDR_PAGE_SIZE((hdr)) - 1) + /* * Type: vm_map_t [exported; contents invisible] * @@ -423,6 +438,9 @@ struct vm_map_copy { #define cpy_kdata c_u.c_k.kdata #define cpy_kalloc_size c_u.c_k.kalloc_size +#define VM_MAP_COPY_PAGE_SHIFT(copy) ((copy)->cpy_hdr.page_shift) +#define VM_MAP_COPY_PAGE_SIZE(copy) (1 << VM_MAP_COPY_PAGE_SHIFT((copy))) +#define VM_MAP_COPY_PAGE_MASK(copy) (VM_MAP_COPY_PAGE_SIZE((copy)) - 1) /* * Useful macros for entry list copy objects @@ -464,9 +482,9 @@ struct vm_map_copy { */ /* Initialize the module */ -extern void vm_map_init(void) __attribute__((section("__TEXT, initcode"))); +extern void vm_map_init(void); -extern void vm_kernel_reserved_entry_init(void) __attribute__((section("__TEXT, initcode"))); +extern void vm_kernel_reserved_entry_init(void); /* Allocate a range in the specified virtual address map and * return the entry allocated for that range. */ @@ -486,10 +504,8 @@ extern void vm_map_clip_end( vm_map_t map, vm_map_entry_t entry, vm_map_offset_t endaddr); -#if !CONFIG_EMBEDDED extern boolean_t vm_map_entry_should_cow_for_true_share( vm_map_entry_t entry); -#endif /* !CONFIG_EMBEDDED */ /* Lookup map entry containing or the specified address in the given map */ extern boolean_t vm_map_lookup_entry( @@ -497,6 +513,15 @@ extern boolean_t vm_map_lookup_entry( vm_map_address_t address, vm_map_entry_t *entry); /* OUT */ +extern void vm_map_copy_remap( + vm_map_t map, + vm_map_entry_t where, + vm_map_copy_t copy, + vm_map_offset_t adjustment, + vm_prot_t cur_prot, + vm_prot_t max_prot, + vm_inherit_t inheritance); + /* Find the VM object, offset, and protection for a given virtual address * in the specified map, assuming a page fault of the type specified. */ extern kern_return_t vm_map_lookup_locked( @@ -534,7 +559,8 @@ extern vm_map_entry_t vm_map_entry_insert( unsigned wired_count, boolean_t no_cache, boolean_t permanent, - unsigned int superpage_size); + unsigned int superpage_size, + boolean_t clear_map_aligned); /* @@ -825,6 +851,11 @@ extern kern_return_t vm_map_page_query_internal( int *disposition, int *ref_count); +extern kern_return_t vm_map_query_volatile( + vm_map_t map, + mach_vm_size_t *volatile_virtual_size_p, + mach_vm_size_t *volatile_resident_size_p, + mach_vm_size_t *volatile_pmap_size_p); extern kern_return_t vm_map_submap( vm_map_t map, @@ -967,6 +998,15 @@ extern kern_return_t vm_map_copyout( vm_map_address_t *dst_addr, /* OUT */ vm_map_copy_t copy); +extern kern_return_t vm_map_copyout_internal( + vm_map_t dst_map, + vm_map_address_t *dst_addr, /* OUT */ + vm_map_copy_t copy, + boolean_t consume_on_success, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance); + extern kern_return_t vm_map_copyin( vm_map_t src_map, vm_map_address_t src_addr, @@ -983,6 +1023,15 @@ extern kern_return_t vm_map_copyin_common( vm_map_copy_t *copy_result, /* OUT */ boolean_t use_maxprot); +extern kern_return_t vm_map_copy_extract( + vm_map_t src_map, + vm_map_address_t src_addr, + vm_map_size_t len, + vm_map_copy_t *copy_result, /* OUT */ + vm_prot_t *cur_prot, /* OUT */ + vm_prot_t *max_prot); + + extern void vm_map_disable_NX( vm_map_t map); @@ -1021,6 +1070,9 @@ extern kern_return_t vm_map_raise_min_offset( extern vm_map_offset_t vm_compute_max_offset( unsigned is64); +extern uint64_t vm_map_get_max_aslr_slide_pages( + vm_map_t map); + extern void vm_map_set_user_wire_limit( vm_map_t map, vm_size_t limit); @@ -1029,8 +1081,34 @@ extern void vm_map_switch_protect( vm_map_t map, boolean_t val); +extern void vm_map_iokit_mapped_region( + vm_map_t map, + vm_size_t bytes); + +extern void vm_map_iokit_unmapped_region( + vm_map_t map, + vm_size_t bytes); + + extern boolean_t first_free_is_valid(vm_map_t); +extern int vm_map_page_shift( + vm_map_t map); + +extern int vm_map_page_mask( + vm_map_t map); + +extern int vm_map_page_size( + vm_map_t map); + +extern vm_map_offset_t vm_map_round_page_mask( + vm_map_offset_t offset, + vm_map_offset_t mask); + +extern vm_map_offset_t vm_map_trunc_page_mask( + vm_map_offset_t offset, + vm_map_offset_t mask); + #ifdef XNU_KERNEL_PRIVATE extern kern_return_t vm_map_page_info( vm_map_t map, @@ -1061,13 +1139,29 @@ extern kern_return_t vm_map_page_info( vm_map_copyin_common(src_map, src_addr, len, src_destroy, \ FALSE, copy_result, TRUE) -#endif /* MACH_KERNEL_PRIVATE */ + +/* + * Internal macros for rounding and truncation of vm_map offsets and sizes + */ +#define VM_MAP_ROUND_PAGE(x,pgmask) (((vm_map_offset_t)(x) + (pgmask)) & ~((signed)(pgmask))) +#define VM_MAP_TRUNC_PAGE(x,pgmask) ((vm_map_offset_t)(x) & ~((signed)(pgmask))) /* * Macros for rounding and truncation of vm_map offsets and sizes */ -#define vm_map_round_page(x) (((vm_map_offset_t)(x) + PAGE_MASK) & ~((signed)PAGE_MASK)) -#define vm_map_trunc_page(x) ((vm_map_offset_t)(x) & ~((signed)PAGE_MASK)) +#define VM_MAP_PAGE_SHIFT(map) ((map) ? (map)->hdr.page_shift : PAGE_SHIFT) +#define VM_MAP_PAGE_SIZE(map) (1 << VM_MAP_PAGE_SHIFT((map))) +#define VM_MAP_PAGE_MASK(map) (VM_MAP_PAGE_SIZE((map)) - 1) +#define VM_MAP_PAGE_ALIGNED(x,pgmask) (((x) & (pgmask)) == 0) + +#endif /* MACH_KERNEL_PRIVATE */ + +#ifdef XNU_KERNEL_PRIVATE +extern kern_return_t vm_map_set_page_shift(vm_map_t map, int pageshift); +#endif /* XNU_KERNEL_PRIVATE */ + +#define vm_map_round_page(x,pgmask) (((vm_map_offset_t)(x) + (pgmask)) & ~((signed)(pgmask))) +#define vm_map_trunc_page(x,pgmask) ((vm_map_offset_t)(x) & ~((signed)(pgmask))) /* * Flags for vm_map_remove() and vm_map_delete() diff --git a/osfmk/vm/vm_map_store.c b/osfmk/vm/vm_map_store.c index b875fd651..3a2b381f0 100644 --- a/osfmk/vm/vm_map_store.c +++ b/osfmk/vm/vm_map_store.c @@ -106,6 +106,10 @@ _vm_map_store_entry_link( struct vm_map_header * mapHdr, vm_map_entry_t after_wh #ifdef VM_MAP_STORE_USE_RB vm_map_store_entry_link_rb(mapHdr, after_where, entry); #endif +#if MAP_ENTRY_INSERTION_DEBUG + fastbacktrace(&entry->vme_insertion_bt[0], + (sizeof (entry->vme_insertion_bt) / sizeof (uintptr_t))); +#endif } void diff --git a/osfmk/vm/vm_map_store_ll.c b/osfmk/vm/vm_map_store_ll.c index 16959bdb6..f33ac3270 100644 --- a/osfmk/vm/vm_map_store_ll.c +++ b/osfmk/vm/vm_map_store_ll.c @@ -34,8 +34,14 @@ first_free_is_valid_ll( vm_map_t map ) vm_map_entry_t entry, next; entry = vm_map_to_entry(map); next = entry->vme_next; - while (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_end) || - (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_start) && + while (vm_map_trunc_page(next->vme_start, + VM_MAP_PAGE_MASK(map)) == + vm_map_trunc_page(entry->vme_end, + VM_MAP_PAGE_MASK(map)) || + (vm_map_trunc_page(next->vme_start, + VM_MAP_PAGE_MASK(map)) == + vm_map_trunc_page(entry->vme_start, + VM_MAP_PAGE_MASK(map)) && next != vm_map_to_entry(map))) { entry = next; next = entry->vme_next; @@ -57,32 +63,42 @@ first_free_is_valid_ll( vm_map_t map ) * entry immediately before the first hole in the map. * The map should be locked. */ -#define UPDATE_FIRST_FREE_LL(map, new_first_free) \ +#define UPDATE_FIRST_FREE_LL(map, new_first_free) \ MACRO_BEGIN \ - if( map->disable_vmentry_reuse == FALSE){ \ - vm_map_t UFF_map; \ - vm_map_entry_t UFF_first_free; \ - vm_map_entry_t UFF_next_entry; \ - UFF_map = (map); \ - UFF_first_free = (new_first_free); \ - UFF_next_entry = UFF_first_free->vme_next; \ - while (vm_map_trunc_page(UFF_next_entry->vme_start) == \ - vm_map_trunc_page(UFF_first_free->vme_end) || \ - (vm_map_trunc_page(UFF_next_entry->vme_start) == \ - vm_map_trunc_page(UFF_first_free->vme_start) && \ - UFF_next_entry != vm_map_to_entry(UFF_map))) { \ - UFF_first_free = UFF_next_entry; \ - UFF_next_entry = UFF_first_free->vme_next; \ - if (UFF_first_free == vm_map_to_entry(UFF_map)) \ - break; \ - } \ - UFF_map->first_free = UFF_first_free; \ - assert(first_free_is_valid(UFF_map)); \ - } \ + if( map->disable_vmentry_reuse == FALSE){ \ + vm_map_t UFF_map; \ + vm_map_entry_t UFF_first_free; \ + vm_map_entry_t UFF_next_entry; \ + UFF_map = (map); \ + UFF_first_free = (new_first_free); \ + UFF_next_entry = UFF_first_free->vme_next; \ + while (vm_map_trunc_page(UFF_next_entry->vme_start, \ + VM_MAP_PAGE_MASK(UFF_map)) == \ + vm_map_trunc_page(UFF_first_free->vme_end, \ + VM_MAP_PAGE_MASK(UFF_map)) || \ + (vm_map_trunc_page(UFF_next_entry->vme_start, \ + VM_MAP_PAGE_MASK(UFF_map)) == \ + vm_map_trunc_page(UFF_first_free->vme_start, \ + VM_MAP_PAGE_MASK(UFF_map)) && \ + UFF_next_entry != vm_map_to_entry(UFF_map))) { \ + UFF_first_free = UFF_next_entry; \ + UFF_next_entry = UFF_first_free->vme_next; \ + if (UFF_first_free == vm_map_to_entry(UFF_map)) \ + break; \ + } \ + UFF_map->first_free = UFF_first_free; \ + assert(first_free_is_valid(UFF_map)); \ + } \ MACRO_END #define _vm_map_entry_link_ll(hdr, after_where, entry) \ MACRO_BEGIN \ + if (entry->map_aligned) { \ + assert(VM_MAP_PAGE_ALIGNED((entry->vme_start), \ + VM_MAP_HDR_PAGE_MASK((hdr))));\ + assert(VM_MAP_PAGE_ALIGNED((entry->vme_end), \ + VM_MAP_HDR_PAGE_MASK((hdr))));\ + } \ (hdr)->nentries++; \ (entry)->vme_prev = (after_where); \ (entry)->vme_next = (after_where)->vme_next; \ diff --git a/osfmk/vm/vm_map_store_rb.c b/osfmk/vm/vm_map_store_rb.c index 2e103b0a2..1643d1dd9 100644 --- a/osfmk/vm/vm_map_store_rb.c +++ b/osfmk/vm/vm_map_store_rb.c @@ -135,6 +135,10 @@ void vm_map_store_copy_insert_rb( vm_map_t map, __unused vm_map_entry_t after_wh (uintptr_t)prev->vme_start, (uintptr_t)prev->vme_end, (uintptr_t)entry->vme_start, (uintptr_t)entry->vme_end, (uintptr_t)(VME_FOR_STORE(rbh->rbh_root))->vme_start, (uintptr_t)(VME_FOR_STORE(rbh->rbh_root))->vme_end); } else { +#if MAP_ENTRY_INSERTION_DEBUG + fastbacktrace(&entry->vme_insertion_bt[0], + (sizeof (entry->vme_insertion_bt) / sizeof (uintptr_t))); +#endif entry = entry->vme_next; inserted++; nentries--; diff --git a/osfmk/vm/vm_object.c b/osfmk/vm/vm_object.c index aeab64231..9044bbe06 100644 --- a/osfmk/vm/vm_object.c +++ b/osfmk/vm/vm_object.c @@ -90,6 +90,7 @@ #include #include +#include #include #include #include @@ -98,6 +99,8 @@ #include #include +#include + /* * Virtual memory objects maintain the actual data * associated with allocated virtual memory. A given @@ -205,6 +208,8 @@ static zone_t vm_object_zone; /* vm backing store zone */ static struct vm_object kernel_object_store; vm_object_t kernel_object; +static struct vm_object compressor_object_store; +vm_object_t compressor_object = &compressor_object_store; /* * The submap object is used as a placeholder for vm_map_submap @@ -486,6 +491,7 @@ lck_grp_t vm_object_cache_lck_grp; lck_grp_attr_t vm_object_lck_grp_attr; lck_attr_t vm_object_lck_attr; lck_attr_t kernel_object_lck_attr; +lck_attr_t compressor_object_lck_attr; /* * vm_object_bootstrap: @@ -587,8 +593,8 @@ vm_object_bootstrap(void) vm_object_template.pageout = FALSE; vm_object_template.alive = TRUE; vm_object_template.purgable = VM_PURGABLE_DENY; + vm_object_template.purgeable_when_ripe = FALSE; vm_object_template.shadowed = FALSE; - vm_object_template.silent_overwrite = FALSE; vm_object_template.advisory_pageout = FALSE; vm_object_template.true_share = FALSE; vm_object_template.terminating = FALSE; @@ -620,6 +626,7 @@ vm_object_bootstrap(void) /* cache bitfields */ vm_object_template.wimg_bits = VM_WIMG_USE_DEFAULT; vm_object_template.set_cache_attr = FALSE; + vm_object_template.object_slid = FALSE; vm_object_template.code_signed = FALSE; vm_object_template.hashed = FALSE; vm_object_template.transposed = FALSE; @@ -641,6 +648,9 @@ vm_object_bootstrap(void) vm_object_template.objq.next=NULL; vm_object_template.objq.prev=NULL; + vm_object_template.purgeable_queue_type = PURGEABLE_Q_TYPE_MAX; + vm_object_template.purgeable_queue_group = 0; + vm_object_template.vo_cache_ts = 0; /* @@ -660,8 +670,12 @@ vm_object_bootstrap(void) #else _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1, kernel_object); + + _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1, + compressor_object); #endif kernel_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; + compressor_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; /* * Initialize the "submap object". Make it as large as the @@ -728,6 +742,8 @@ vm_object_init_lck_grp(void) lck_attr_setdefault(&vm_object_lck_attr); lck_attr_setdefault(&kernel_object_lck_attr); lck_attr_cleardebug(&kernel_object_lck_attr); + lck_attr_setdefault(&compressor_object_lck_attr); + lck_attr_cleardebug(&compressor_object_lck_attr); } #if VM_OBJECT_CACHE @@ -767,13 +783,16 @@ vm_object_deallocate( if (object == VM_OBJECT_NULL) return; - if (object == kernel_object) { + if (object == kernel_object || object == compressor_object) { vm_object_lock_shared(object); OSAddAtomic(-1, &object->ref_count); if (object->ref_count == 0) { - panic("vm_object_deallocate: losing kernel_object\n"); + if (object == kernel_object) + panic("vm_object_deallocate: losing kernel_object\n"); + else + panic("vm_object_deallocate: losing compressor_object\n"); } vm_object_unlock(object); return; @@ -1723,12 +1742,17 @@ vm_object_reap( purgeable_q_t queue = vm_purgeable_object_remove(object); assert(queue); - /* Must take page lock for this - using it to protect token queue */ - vm_page_lock_queues(); - vm_purgeable_token_delete_first(queue); + if (object->purgeable_when_ripe) { + /* + * Must take page lock for this - + * using it to protect token queue + */ + vm_page_lock_queues(); + vm_purgeable_token_delete_first(queue); - assert(queue->debug_count_objects>=0); - vm_page_unlock_queues(); + assert(queue->debug_count_objects>=0); + vm_page_unlock_queues(); + } } /* @@ -1818,6 +1842,7 @@ vm_object_reap_pages( vm_page_t local_free_q = VM_PAGE_NULL; int loop_count; boolean_t disconnect_on_release; + pmap_flush_context pmap_flush_context_storage; if (reap_type == REAP_DATA_FLUSH) { /* @@ -1841,6 +1866,9 @@ restart_after_sleep: return; loop_count = BATCH_LIMIT(V_O_R_MAX_BATCH); + if (reap_type == REAP_PURGEABLE) + pmap_flush_context_init(&pmap_flush_context_storage); + vm_page_lockspin_queues(); next = (vm_page_t)queue_first(&object->memq); @@ -1855,6 +1883,11 @@ restart_after_sleep: vm_page_unlock_queues(); if (local_free_q) { + + if (reap_type == REAP_PURGEABLE) { + pmap_flush(&pmap_flush_context_storage); + pmap_flush_context_init(&pmap_flush_context_storage); + } /* * Free the pages we reclaimed so far * and take a little break to avoid @@ -1943,21 +1976,12 @@ restart_after_sleep: * we can discard this page... */ if (p->pmapped == TRUE) { - int refmod_state; /* * unmap the page */ - refmod_state = pmap_disconnect(p->phys_page); - if (refmod_state & VM_MEM_MODIFIED) { - SET_PAGE_DIRTY(p, FALSE); - } - } - if (p->dirty || p->precious) { - /* - * we saved the cost of cleaning this page ! - */ - vm_page_purged_count++; + pmap_disconnect_options(p->phys_page, PMAP_OPTIONS_NOFLUSH | PMAP_OPTIONS_NOREFMOD, (void *)&pmap_flush_context_storage); } + vm_page_purged_count++; break; @@ -2019,6 +2043,9 @@ restart_after_sleep: /* * Free the remaining reclaimed pages */ + if (reap_type == REAP_PURGEABLE) + pmap_flush(&pmap_flush_context_storage); + VM_OBJ_REAP_FREELIST(local_free_q, disconnect_on_release); } @@ -2409,11 +2436,20 @@ page_is_paged_out( return TRUE; } } else -#endif - if (object->internal && - object->alive && - !object->terminating && - object->pager_ready) { +#endif /* MACH_PAGEMAP */ + if (object->internal && + object->alive && + !object->terminating && + object->pager_ready) { + + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset) + == VM_EXTERNAL_STATE_EXISTS) { + return TRUE; + } else { + return FALSE; + } + } /* * We're already holding a "paging in progress" reference @@ -2448,6 +2484,20 @@ page_is_paged_out( +/* + * madvise_free_debug + * + * To help debug madvise(MADV_FREE*) mis-usage, this triggers a + * zero-fill as soon as a page is affected by a madvise(MADV_FREE*), to + * simulate the loss of the page's contents as if the page had been + * reclaimed and then re-faulted. + */ +#if DEVELOPMENT || DEBUG +int madvise_free_debug = 1; +#else /* DEBUG */ +int madvise_free_debug = 0; +#endif /* DEBUG */ + /* * Deactivate the pages in the specified object and range. If kill_page is set, also discard any * page modified state from the pmap. Update the chunk_state as we go along. The caller must specify @@ -2461,11 +2511,9 @@ deactivate_pages_in_object( vm_object_size_t size, boolean_t kill_page, boolean_t reusable_page, -#if !MACH_ASSERT - __unused -#endif boolean_t all_reusable, - chunk_state_t *chunk_state) + chunk_state_t *chunk_state, + pmap_flush_context *pfc) { vm_page_t m; int p; @@ -2475,7 +2523,6 @@ deactivate_pages_in_object( int dw_limit; unsigned int reusable = 0; - /* * Examine each page in the chunk. The variable 'p' is the page number relative to the start of the * chunk. Since this routine is called once for each level in the shadow chain, the chunk_state may @@ -2515,10 +2562,20 @@ deactivate_pages_in_object( if (( !VM_PAGE_WIRED(m)) && (!m->private) && (!m->gobbled) && (!m->busy) && (!m->laundry)) { int clear_refmod; + dwp->dw_mask = 0; + clear_refmod = VM_MEM_REFERENCED; - dwp->dw_mask = DW_clear_reference; + dwp->dw_mask |= DW_clear_reference; if ((kill_page) && (object->internal)) { + if (madvise_free_debug) { + /* + * zero-fill the page now + * to simulate it being + * reclaimed and re-faulted. + */ + pmap_zero_page(m->phys_page); + } m->precious = FALSE; m->dirty = FALSE; @@ -2536,6 +2593,8 @@ deactivate_pages_in_object( #if MACH_PAGEMAP vm_external_state_clr(object->existence_map, offset); #endif /* MACH_PAGEMAP */ + VM_COMPRESSOR_PAGER_STATE_CLR(object, + offset); if (reusable_page && !m->reusable) { assert(!all_reusable); @@ -2546,12 +2605,14 @@ deactivate_pages_in_object( reusable++; } } - pmap_clear_refmod(m->phys_page, clear_refmod); + pmap_clear_refmod_options(m->phys_page, clear_refmod, PMAP_OPTIONS_NOFLUSH, (void *)pfc); if (!m->throttled && !(reusable_page || all_reusable)) dwp->dw_mask |= DW_move_page; - VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count); + if (dwp->dw_mask) + VM_PAGE_ADD_DELAYED_WORK(dwp, m, + dw_count); if (dw_count >= dw_limit) { if (reusable) { @@ -2587,6 +2648,8 @@ deactivate_pages_in_object( #if MACH_PAGEMAP vm_external_state_clr(object->existence_map, offset); #endif /* MACH_PAGEMAP */ + VM_COMPRESSOR_PAGER_STATE_CLR(object, + offset); } } } @@ -2619,7 +2682,8 @@ deactivate_a_chunk( vm_object_size_t size, boolean_t kill_page, boolean_t reusable_page, - boolean_t all_reusable) + boolean_t all_reusable, + pmap_flush_context *pfc) { vm_object_t object; vm_object_t tmp_object; @@ -2652,7 +2716,7 @@ deactivate_a_chunk( while (object && CHUNK_NOT_COMPLETE(chunk_state)) { vm_object_paging_begin(object); - deactivate_pages_in_object(object, offset, length, kill_page, reusable_page, all_reusable, &chunk_state); + deactivate_pages_in_object(object, offset, length, kill_page, reusable_page, all_reusable, &chunk_state, pfc); vm_object_paging_end(object); @@ -2702,6 +2766,7 @@ vm_object_deactivate_pages( { vm_object_size_t length; boolean_t all_reusable; + pmap_flush_context pmap_flush_context_storage; /* * We break the range up into chunks and do one chunk at a time. This is for @@ -2727,12 +2792,15 @@ vm_object_deactivate_pages( return; } + pmap_flush_context_init(&pmap_flush_context_storage); + while (size) { - length = deactivate_a_chunk(object, offset, size, kill_page, reusable_page, all_reusable); + length = deactivate_a_chunk(object, offset, size, kill_page, reusable_page, all_reusable, &pmap_flush_context_storage); size -= length; offset += length; } + pmap_flush(&pmap_flush_context_storage); if (all_reusable) { if (!object->all_reusable) { @@ -2868,8 +2936,25 @@ vm_object_pmap_protect( vm_map_offset_t pmap_start, vm_prot_t prot) { + vm_object_pmap_protect_options(object, offset, size, + pmap, pmap_start, prot, 0); +} + +__private_extern__ void +vm_object_pmap_protect_options( + register vm_object_t object, + register vm_object_offset_t offset, + vm_object_size_t size, + pmap_t pmap, + vm_map_offset_t pmap_start, + vm_prot_t prot, + int options) +{ + pmap_flush_context pmap_flush_context_storage; + boolean_t delayed_pmap_flush = FALSE; + if (object == VM_OBJECT_NULL) - return; + return; size = vm_object_round_page(size); offset = vm_object_trunc_page(offset); @@ -2878,7 +2963,12 @@ vm_object_pmap_protect( if (object->phys_contiguous) { if (pmap != NULL) { vm_object_unlock(object); - pmap_protect(pmap, pmap_start, pmap_start + size, prot); + pmap_protect_options(pmap, + pmap_start, + pmap_start + size, + prot, + options & ~PMAP_OPTIONS_NOFLUSH, + NULL); } else { vm_object_offset_t phys_start, phys_end, phys_addr; @@ -2888,11 +2978,21 @@ vm_object_pmap_protect( assert(phys_end <= object->vo_shadow_offset + object->vo_size); vm_object_unlock(object); + pmap_flush_context_init(&pmap_flush_context_storage); + delayed_pmap_flush = FALSE; + for (phys_addr = phys_start; phys_addr < phys_end; phys_addr += PAGE_SIZE_64) { - pmap_page_protect((ppnum_t) (phys_addr >> PAGE_SHIFT), prot); + pmap_page_protect_options( + (ppnum_t) (phys_addr >> PAGE_SHIFT), + prot, + options | PMAP_OPTIONS_NOFLUSH, + (void *)&pmap_flush_context_storage); + delayed_pmap_flush = TRUE; } + if (delayed_pmap_flush == TRUE) + pmap_flush(&pmap_flush_context_storage); } return; } @@ -2902,38 +3002,49 @@ vm_object_pmap_protect( while (TRUE) { if (ptoa_64(object->resident_page_count) > size/2 && pmap != PMAP_NULL) { vm_object_unlock(object); - pmap_protect(pmap, pmap_start, pmap_start + size, prot); + pmap_protect_options(pmap, pmap_start, pmap_start + size, prot, + options & ~PMAP_OPTIONS_NOFLUSH, NULL); return; } - /* if we are doing large ranges with respect to resident */ - /* page count then we should interate over pages otherwise */ - /* inverse page look-up will be faster */ + pmap_flush_context_init(&pmap_flush_context_storage); + delayed_pmap_flush = FALSE; + + /* + * if we are doing large ranges with respect to resident + * page count then we should interate over pages otherwise + * inverse page look-up will be faster + */ if (ptoa_64(object->resident_page_count / 4) < size) { vm_page_t p; vm_object_offset_t end; end = offset + size; - if (pmap != PMAP_NULL) { - queue_iterate(&object->memq, p, vm_page_t, listq) { - if (!p->fictitious && - (offset <= p->offset) && (p->offset < end)) { - vm_map_offset_t start; + queue_iterate(&object->memq, p, vm_page_t, listq) { + if (!p->fictitious && (offset <= p->offset) && (p->offset < end)) { + vm_map_offset_t start; - start = pmap_start + p->offset - offset; - pmap_protect(pmap, start, start + PAGE_SIZE_64, prot); - } - } - } else { - queue_iterate(&object->memq, p, vm_page_t, listq) { - if (!p->fictitious && - (offset <= p->offset) && (p->offset < end)) { + start = pmap_start + p->offset - offset; - pmap_page_protect(p->phys_page, prot); - } - } + if (pmap != PMAP_NULL) + pmap_protect_options( + pmap, + start, + start + PAGE_SIZE_64, + prot, + options | PMAP_OPTIONS_NOFLUSH, + &pmap_flush_context_storage); + else + pmap_page_protect_options( + p->phys_page, + prot, + options | PMAP_OPTIONS_NOFLUSH, + &pmap_flush_context_storage); + delayed_pmap_flush = TRUE; + } } + } else { vm_page_t p; vm_object_offset_t end; @@ -2941,29 +3052,36 @@ vm_object_pmap_protect( end = offset + size; - if (pmap != PMAP_NULL) { - for(target_off = offset; - target_off < end; - target_off += PAGE_SIZE) { - p = vm_page_lookup(object, target_off); - if (p != VM_PAGE_NULL) { - vm_object_offset_t start; - start = pmap_start + - (p->offset - offset); - pmap_protect(pmap, start, - start + PAGE_SIZE, prot); - } - } - } else { - for(target_off = offset; - target_off < end; target_off += PAGE_SIZE) { - p = vm_page_lookup(object, target_off); - if (p != VM_PAGE_NULL) { - pmap_page_protect(p->phys_page, prot); - } + for (target_off = offset; + target_off < end; target_off += PAGE_SIZE) { + + p = vm_page_lookup(object, target_off); + + if (p != VM_PAGE_NULL) { + vm_object_offset_t start; + + start = pmap_start + (p->offset - offset); + + if (pmap != PMAP_NULL) + pmap_protect_options( + pmap, + start, + start + PAGE_SIZE_64, + prot, + options | PMAP_OPTIONS_NOFLUSH, + &pmap_flush_context_storage); + else + pmap_page_protect_options( + p->phys_page, + prot, + options | PMAP_OPTIONS_NOFLUSH, + &pmap_flush_context_storage); + delayed_pmap_flush = TRUE; } } - } + } + if (delayed_pmap_flush == TRUE) + pmap_flush(&pmap_flush_context_storage); if (prot == VM_PROT_NONE) { /* @@ -3128,8 +3246,10 @@ vm_object_copy_slowly( } XPR(XPR_VM_FAULT,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0); + _result_page = VM_PAGE_NULL; result = vm_fault_page(src_object, src_offset, VM_PROT_READ, FALSE, + FALSE, /* page not looked up */ &prot, &_result_page, &top_page, (int *)0, &error_code, FALSE, FALSE, &fault_info); @@ -3467,6 +3587,8 @@ vm_object_copy_delayed( vm_object_t old_copy; vm_page_t p; vm_object_size_t copy_size = src_offset + size; + pmap_flush_context pmap_flush_context_storage; + boolean_t delayed_pmap_flush = FALSE; int collisions = 0; @@ -3597,6 +3719,9 @@ vm_object_copy_delayed( */ copy_delayed_protect_iterate++; + pmap_flush_context_init(&pmap_flush_context_storage); + delayed_pmap_flush = FALSE; + queue_iterate(&src_object->memq, p, vm_page_t, listq) { if (!p->fictitious && p->offset >= old_copy->vo_size && @@ -3609,14 +3734,20 @@ vm_object_copy_delayed( vm_object_unlock(new_copy); vm_object_deallocate(new_copy); } + if (delayed_pmap_flush == TRUE) + pmap_flush(&pmap_flush_context_storage); return VM_OBJECT_NULL; } else { - pmap_page_protect(p->phys_page, - (VM_PROT_ALL & ~VM_PROT_WRITE)); + pmap_page_protect_options(p->phys_page, (VM_PROT_ALL & ~VM_PROT_WRITE), + PMAP_OPTIONS_NOFLUSH, (void *)&pmap_flush_context_storage); + delayed_pmap_flush = TRUE; } } } + if (delayed_pmap_flush == TRUE) + pmap_flush(&pmap_flush_context_storage); + old_copy->vo_size = copy_size; } if (src_object_shared == TRUE) @@ -3688,6 +3819,9 @@ vm_object_copy_delayed( */ copy_delayed_protect_iterate++; + pmap_flush_context_init(&pmap_flush_context_storage); + delayed_pmap_flush = FALSE; + queue_iterate(&src_object->memq, p, vm_page_t, listq) { if (!p->fictitious && p->offset < copy_size) { if (VM_PAGE_WIRED(p)) { @@ -3696,13 +3830,21 @@ vm_object_copy_delayed( vm_object_unlock(src_object); vm_object_unlock(new_copy); vm_object_deallocate(new_copy); + + if (delayed_pmap_flush == TRUE) + pmap_flush(&pmap_flush_context_storage); + return VM_OBJECT_NULL; } else { - pmap_page_protect(p->phys_page, - (VM_PROT_ALL & ~VM_PROT_WRITE)); + pmap_page_protect_options(p->phys_page, (VM_PROT_ALL & ~VM_PROT_WRITE), + PMAP_OPTIONS_NOFLUSH, (void *)&pmap_flush_context_storage); + delayed_pmap_flush = TRUE; } } } + if (delayed_pmap_flush == TRUE) + pmap_flush(&pmap_flush_context_storage); + if (old_copy != VM_OBJECT_NULL) { /* * Make the old copy-object shadow the new one. @@ -4319,11 +4461,13 @@ vm_object_pager_create( vm_object_unlock(object); #if MACH_PAGEMAP - map = vm_external_create(size); - vm_object_lock(object); - assert(object->vo_size == size); - object->existence_map = map; - vm_object_unlock(object); + if (DEFAULT_PAGER_IS_ACTIVE) { + map = vm_external_create(size); + vm_object_lock(object); + assert(object->vo_size == size); + object->existence_map = map; + vm_object_unlock(object); + } #endif /* MACH_PAGEMAP */ if ((uint32_t) object->vo_size != object->vo_size) { @@ -4382,6 +4526,95 @@ vm_object_pager_create( vm_object_paging_end(object); } +void +vm_object_compressor_pager_create( + register vm_object_t object) +{ + memory_object_t pager; + vm_object_hash_entry_t entry; + lck_mtx_t *lck; + + assert(object != kernel_object); + + /* + * Prevent collapse or termination by holding a paging reference + */ + + vm_object_paging_begin(object); + if (object->pager_created) { + /* + * Someone else got to it first... + * wait for them to finish initializing the ports + */ + while (!object->pager_initialized) { + vm_object_sleep(object, + VM_OBJECT_EVENT_INITIALIZED, + THREAD_UNINT); + } + vm_object_paging_end(object); + return; + } + + /* + * Indicate that a memory object has been assigned + * before dropping the lock, to prevent a race. + */ + + object->pager_created = TRUE; + object->paging_offset = 0; + + vm_object_unlock(object); + + if ((uint32_t) object->vo_size != object->vo_size) { + panic("vm_object_compressor_pager_create(): object size 0x%llx >= 4GB\n", + (uint64_t) object->vo_size); + } + + /* + * Create the [internal] pager, and associate it with this object. + * + * We make the association here so that vm_object_enter() + * can look up the object to complete initializing it. No + * user will ever map this object. + */ + { + assert(object->temporary); + + /* create our new memory object */ + assert((vm_size_t) object->vo_size == object->vo_size); + (void) compressor_memory_object_create( + (vm_size_t) object->vo_size, + &pager); + } + + entry = vm_object_hash_entry_alloc(pager); + + lck = vm_object_hash_lock_spin(pager); + vm_object_hash_insert(entry, object); + vm_object_hash_unlock(lck); + + /* + * A reference was returned by + * memory_object_create(), and it is + * copied by vm_object_enter(). + */ + + if (vm_object_enter(pager, object->vo_size, TRUE, TRUE, FALSE) != object) + panic("vm_object_compressor_pager_create: mismatch"); + + /* + * Drop the reference we were passed. + */ + memory_object_deallocate(pager); + + vm_object_lock(object); + + /* + * Release the paging reference + */ + vm_object_paging_end(object); +} + /* * Routine: vm_object_remove * Purpose: @@ -4540,6 +4773,14 @@ vm_object_do_collapse( if (backing_object->pager != MEMORY_OBJECT_NULL) { vm_object_hash_entry_t entry; +#if 00 + if (COMPRESSED_PAGER_IS_ACTIVE) { + panic("vm_object_do_collapse(%p,%p): " + "backing_object has a compressor pager", + object, backing_object); + } +#endif + /* * Move the pager from backing_object to object. * @@ -4934,7 +5175,7 @@ retry: if (backing_object->ref_count == 1 && (!object->pager_created #if !MACH_PAGEMAP - || !backing_object->pager_created + || (!backing_object->pager_created) #endif /*!MACH_PAGEMAP */ ) && vm_object_collapse_allowed) { @@ -5001,7 +5242,7 @@ retry: * we have to make sure no pages in the backing object * "show through" before bypassing it. */ - size = atop(object->vo_size); + size = (unsigned int)atop(object->vo_size); rcount = object->resident_page_count; if (rcount != size) { @@ -5051,18 +5292,18 @@ retry: backing_rcount = backing_object->resident_page_count; if ( (int)backing_rcount - (int)(atop(backing_object->vo_size) - size) > (int)rcount) { - /* + /* * we have enough pages in the backing object to guarantee that * at least 1 of them must be 'uncovered' by a resident page * in the object we're evaluating, so move on and * try to collapse the rest of the shadow chain */ - if (object != original_object) { - vm_object_unlock(object); - } - object = backing_object; - object_lock_type = backing_object_lock_type; - continue; + if (object != original_object) { + vm_object_unlock(object); + } + object = backing_object; + object_lock_type = backing_object_lock_type; + continue; } /* @@ -5080,12 +5321,17 @@ retry: #if MACH_PAGEMAP #define EXISTS_IN_OBJECT(obj, off, rc) \ - (vm_external_state_get((obj)->existence_map, \ - (vm_offset_t)(off)) == VM_EXTERNAL_STATE_EXISTS || \ + ((vm_external_state_get((obj)->existence_map, \ + (vm_offset_t)(off)) \ + == VM_EXTERNAL_STATE_EXISTS) || \ + (VM_COMPRESSOR_PAGER_STATE_GET((obj), (off)) \ + == VM_EXTERNAL_STATE_EXISTS) || \ + ((rc) && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--)) +#else /* MACH_PAGEMAP */ +#define EXISTS_IN_OBJECT(obj, off, rc) \ + ((VM_COMPRESSOR_PAGER_STATE_GET((obj), (off)) \ + == VM_EXTERNAL_STATE_EXISTS) || \ ((rc) && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--)) -#else -#define EXISTS_IN_OBJECT(obj, off, rc) \ - (((rc) && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--)) #endif /* MACH_PAGEMAP */ /* @@ -5146,7 +5392,6 @@ retry: p = (vm_page_t) queue_next(&p->listq); } while (--backing_rcount); - if (backing_rcount != 0 ) { /* try and collapse the rest of the shadow chain */ if (object != original_object) { @@ -6111,7 +6356,9 @@ vm_object_purgable_control( purgeable_q_t queue = vm_purgeable_object_remove(object); assert(queue); - vm_purgeable_token_delete_last(queue); + if (object->purgeable_when_ripe) { + vm_purgeable_token_delete_last(queue); + } assert(queue->debug_count_objects>=0); vm_page_unlock_queues(); @@ -6157,15 +6404,26 @@ vm_object_purgable_control( old_state == VM_PURGABLE_EMPTY) { unsigned int delta; - /* try to add token... this can fail */ - vm_page_lock_queues(); + if ((*state & VM_PURGABLE_NO_AGING_MASK) == + VM_PURGABLE_NO_AGING) { + object->purgeable_when_ripe = FALSE; + } else { + object->purgeable_when_ripe = TRUE; + } + + if (object->purgeable_when_ripe) { + kern_return_t result; - kern_return_t result = vm_purgeable_token_add(queue); - if (result != KERN_SUCCESS) { - vm_page_unlock_queues(); - return result; + /* try to add token... this can fail */ + vm_page_lock_queues(); + + result = vm_purgeable_token_add(queue); + if (result != KERN_SUCCESS) { + vm_page_unlock_queues(); + return result; + } + vm_page_unlock_queues(); } - vm_page_unlock_queues(); assert(object->resident_page_count >= object->wired_page_count); @@ -6187,6 +6445,9 @@ vm_object_purgable_control( assert(object->objq.next == NULL && object->objq.prev == NULL); } else if (old_state == VM_PURGABLE_VOLATILE) { + purgeable_q_t old_queue; + boolean_t purgeable_when_ripe; + /* * if reassigning priorities / purgeable groups, we don't change the * token queue. So moving priorities will not make pages stay around longer. @@ -6197,19 +6458,33 @@ vm_object_purgable_control( */ assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */ - purgeable_q_t old_queue=vm_purgeable_object_remove(object); + old_queue = vm_purgeable_object_remove(object); assert(old_queue); - if (old_queue != queue) { + if ((*state & VM_PURGABLE_NO_AGING_MASK) == + VM_PURGABLE_NO_AGING) { + purgeable_when_ripe = FALSE; + } else { + purgeable_when_ripe = TRUE; + } + + if (old_queue != queue || + (purgeable_when_ripe != + object->purgeable_when_ripe)) { kern_return_t result; /* Changing queue. Have to move token. */ vm_page_lock_queues(); - vm_purgeable_token_delete_last(old_queue); - result = vm_purgeable_token_add(queue); + if (object->purgeable_when_ripe) { + vm_purgeable_token_delete_last(old_queue); + } + object->purgeable_when_ripe = purgeable_when_ripe; + if (object->purgeable_when_ripe) { + result = vm_purgeable_token_add(queue); + assert(result==KERN_SUCCESS); /* this should never fail since we just freed a token */ + } vm_page_unlock_queues(); - assert(result==KERN_SUCCESS); /* this should never fail since we just freed a token */ } }; vm_purgeable_object_add(object, queue, (*state&VM_VOLATILE_GROUP_MASK)>>VM_VOLATILE_GROUP_SHIFT ); @@ -6249,9 +6524,11 @@ vm_object_purgable_control( object->objq.prev != NULL); old_queue = vm_purgeable_object_remove(object); assert(old_queue); - vm_page_lock_queues(); - vm_purgeable_token_delete_last(old_queue); - vm_page_unlock_queues(); + if (object->purgeable_when_ripe) { + vm_page_lock_queues(); + vm_purgeable_token_delete_first(old_queue); + vm_page_unlock_queues(); + } } (void) vm_object_purge(object); } @@ -6263,6 +6540,103 @@ vm_object_purgable_control( return KERN_SUCCESS; } +kern_return_t +vm_object_get_page_counts( + vm_object_t object, + vm_object_offset_t offset, + vm_object_size_t size, + unsigned int *resident_page_count, + unsigned int *dirty_page_count) +{ + + kern_return_t kr = KERN_SUCCESS; + boolean_t count_dirty_pages = FALSE; + vm_page_t p = VM_PAGE_NULL; + unsigned int local_resident_count = 0; + unsigned int local_dirty_count = 0; + vm_object_offset_t cur_offset = 0; + vm_object_offset_t end_offset = 0; + + if (object == VM_OBJECT_NULL) + return KERN_INVALID_ARGUMENT; + + + cur_offset = offset; + + end_offset = offset + size; + + vm_object_lock_assert_exclusive(object); + + if (dirty_page_count != NULL) { + + count_dirty_pages = TRUE; + } + + if (resident_page_count != NULL && count_dirty_pages == FALSE) { + /* + * Fast path when: + * - we only want the resident page count, and, + * - the entire object is exactly covered by the request. + */ + if (offset == 0 && (object->vo_size == size)) { + + *resident_page_count = object->resident_page_count; + goto out; + } + } + + if (object->resident_page_count <= (size >> PAGE_SHIFT)) { + + queue_iterate(&object->memq, p, vm_page_t, listq) { + + if (p->offset >= cur_offset && p->offset < end_offset) { + + local_resident_count++; + + if (count_dirty_pages) { + + if (p->dirty || (p->wpmapped && pmap_is_modified(p->phys_page))) { + + local_dirty_count++; + } + } + } + } + } else { + + for (cur_offset = offset; cur_offset < end_offset; cur_offset += PAGE_SIZE_64) { + + p = vm_page_lookup(object, cur_offset); + + if (p != VM_PAGE_NULL) { + + local_resident_count++; + + if (count_dirty_pages) { + + if (p->dirty || (p->wpmapped && pmap_is_modified(p->phys_page))) { + + local_dirty_count++; + } + } + } + } + + } + + if (resident_page_count != NULL) { + *resident_page_count = local_resident_count; + } + + if (dirty_page_count != NULL) { + *dirty_page_count = local_dirty_count; + } + +out: + return kr; +} + + #if TASK_SWAPPER /* * vm_object_res_deallocate @@ -6620,7 +6994,7 @@ MACRO_END assert(object1->purgable == VM_PURGABLE_DENY); assert(object2->purgable == VM_PURGABLE_DENY); /* "shadowed" refers to the the object not its contents */ - __TRANSPOSE_FIELD(silent_overwrite); + __TRANSPOSE_FIELD(purgeable_when_ripe); __TRANSPOSE_FIELD(advisory_pageout); __TRANSPOSE_FIELD(true_share); /* "terminating" should not be set */ @@ -6737,13 +7111,8 @@ done: extern int speculative_reads_disabled; extern int ignore_is_ssd; -#if CONFIG_EMBEDDED -unsigned int preheat_pages_max = MAX_UPL_TRANSFER; -unsigned int preheat_pages_min = 10; -#else unsigned int preheat_pages_max = MAX_UPL_TRANSFER; unsigned int preheat_pages_min = 8; -#endif uint32_t pre_heat_scaling[MAX_UPL_TRANSFER + 1]; uint32_t pre_heat_cluster[MAX_UPL_TRANSFER + 1]; @@ -6928,7 +7297,7 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, throttle_limit = (uint32_t) max_length; assert(throttle_limit == max_length); - if (vnode_pager_check_hard_throttle(object->pager, &throttle_limit, *io_streaming) == KERN_SUCCESS) { + if (vnode_pager_get_throttle_io_limit(object->pager, &throttle_limit) == KERN_SUCCESS) { if (max_length > throttle_limit) max_length = throttle_limit; } @@ -7028,7 +7397,11 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, */ break; } -#endif +#endif /* MACH_PAGEMAP */ + if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset) + == VM_EXTERNAL_STATE_ABSENT) { + break; + } if (vm_page_lookup(object, offset) != VM_PAGE_NULL) { /* * don't bridge resident pages @@ -7060,7 +7433,11 @@ vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, */ break; } -#endif +#endif /* MACH_PAGEMAP */ + if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset) + == VM_EXTERNAL_STATE_ABSENT) { + break; + } if (vm_page_lookup(object, offset) != VM_PAGE_NULL) { /* * don't bridge resident pages @@ -7332,6 +7709,46 @@ vm_object_range_op( return KERN_SUCCESS; } +/* + * Used to point a pager directly to a range of memory (when the pager may be associated + * with a non-device vnode). Takes a virtual address, an offset, and a size. We currently + * expect that the virtual address will denote the start of a range that is physically contiguous. + */ +kern_return_t pager_map_to_phys_contiguous( + memory_object_control_t object, + memory_object_offset_t offset, + addr64_t base_vaddr, + vm_size_t size) +{ + ppnum_t page_num; + boolean_t clobbered_private; + kern_return_t retval; + vm_object_t pager_object; + + page_num = pmap_find_phys(kernel_pmap, base_vaddr); + + if (!page_num) { + retval = KERN_FAILURE; + goto out; + } + + pager_object = memory_object_control_to_vm_object(object); + + if (!pager_object) { + retval = KERN_FAILURE; + goto out; + } + + clobbered_private = pager_object->private; + pager_object->private = TRUE; + retval = vm_object_populate_with_private(pager_object, offset, page_num, size); + + if (retval != KERN_SUCCESS) + pager_object->private = clobbered_private; + +out: + return retval; +} uint32_t scan_object_collision = 0; @@ -7456,9 +7873,11 @@ kern_return_t vm_object_pack( src_object->objq.prev != NULL); queue = vm_purgeable_object_remove(src_object); assert(queue); - vm_page_lock_queues(); - vm_purgeable_token_delete_first(queue); - vm_page_unlock_queues(); + if (src_object->purgeable_when_ripe) { + vm_page_lock_queues(); + vm_purgeable_token_delete_first(queue); + vm_page_unlock_queues(); + } vm_object_purge(src_object); } goto done; @@ -7549,12 +7968,26 @@ void vm_object_pageout( vm_object_t object) { - vm_page_t p, next; + vm_page_t p, next; + struct vm_pageout_queue *iq; + + iq = &vm_pageout_queue_internal; assert(object != VM_OBJECT_NULL ); vm_object_lock(object); - + + if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) { + if (!object->pager_initialized) { + /* + * If there is no memory object for the page, create + * one and hand it to the default pager. + */ + vm_object_pager_create(object); + } + } + +ReScan: next = (vm_page_t)queue_first(&object->memq); while (!queue_end(&object->memq, (queue_entry_t)next)) { @@ -7569,6 +8002,54 @@ vm_object_pageout( * being cleaned... if so, leave it alone */ if (!p->laundry) { + + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + + if (VM_PAGE_Q_THROTTLED(iq)) { + + iq->pgo_draining = TRUE; + + assert_wait((event_t) (&iq->pgo_laundry + 1), THREAD_INTERRUPTIBLE); + vm_page_unlock_queues(); + vm_object_unlock(object); + + thread_block(THREAD_CONTINUE_NULL); + + vm_object_lock(object); + goto ReScan; + } + + if (p->fictitious || p->busy ) { + vm_page_unlock_queues(); + continue; + } + + if (p->absent || p->unusual || p->error || VM_PAGE_WIRED(p)) { + vm_page_unlock_queues(); + continue; + } + + if (p->cleaning) { + p->pageout = TRUE; + vm_page_unlock_queues(); + continue; + } + + if (p->pmapped == TRUE) { + int refmod_state; + refmod_state = pmap_disconnect_options(p->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL); + if (refmod_state & VM_MEM_MODIFIED) { + SET_PAGE_DIRTY(p, FALSE); + } + } + + if (p->dirty == FALSE) { + vm_page_unlock_queues(); + VM_PAGE_FREE(p); + continue; + } + } + VM_PAGE_QUEUES_REMOVE(p); vm_pageout_cluster(p, TRUE); } diff --git a/osfmk/vm/vm_object.h b/osfmk/vm/vm_object.h index 21bc4ddcb..9c4fe0e32 100644 --- a/osfmk/vm/vm_object.h +++ b/osfmk/vm/vm_object.h @@ -87,13 +87,12 @@ #include #include -#if MACH_PAGEMAP #include -#endif /* MACH_PAGEMAP */ #include struct vm_page; +struct vm_shared_region_slide_info; /* * Types defined: @@ -120,10 +119,12 @@ struct vm_object_fault_info { }; -#define vo_size vo_un1.vou_size -#define vo_cache_pages_to_scan vo_un1.vou_cache_pages_to_scan -#define vo_shadow_offset vo_un2.vou_shadow_offset -#define vo_cache_ts vo_un2.vou_cache_ts +#define vo_size vo_un1.vou_size +#define vo_cache_pages_to_scan vo_un1.vou_cache_pages_to_scan +#define vo_shadow_offset vo_un2.vou_shadow_offset +#define vo_cache_ts vo_un2.vou_cache_ts +#define vo_purgeable_owner vo_un2.vou_purgeable_owner +#define vo_slide_info vo_un2.vou_slide_info struct vm_object { queue_head_t memq; /* Resident memory */ @@ -157,9 +158,14 @@ struct vm_object { union { vm_object_offset_t vou_shadow_offset; /* Offset into shadow */ - clock_sec_t vou_cache_ts; /* age of an external object - * present in cache + clock_sec_t vou_cache_ts; /* age of an external object + * present in cache + */ + task_t vou_purgeable_owner; /* If the purg'a'ble bits below are set + * to volatile/emtpy, this is the task + * that owns this purgeable object. */ + struct vm_shared_region_slide_info *vou_slide_info; } vo_un2; memory_object_t pager; /* Where to get data */ @@ -222,11 +228,10 @@ struct vm_object { /* boolean_t */ purgable:2, /* Purgable state. See * VM_PURGABLE_* */ + /* boolean_t */ purgeable_when_ripe:1, /* Purgeable when a token + * becomes ripe. + */ /* boolean_t */ shadowed:1, /* Shadow may exist */ - /* boolean_t */ silent_overwrite:1, - /* Allow full page overwrite - * without data_request if - * page is absent */ /* boolean_t */ advisory_pageout:1, /* Instead of sending page * via OOL, just notify @@ -325,7 +330,10 @@ struct vm_object { all_reusable:1, blocked_access:1, set_cache_attr:1, - __object2_unused_bits:15; /* for expansion */ + object_slid:1, + purgeable_queue_type:2, + purgeable_queue_group:3, + __object2_unused_bits:9; /* for expansion */ uint32_t scan_collisions; @@ -381,10 +389,13 @@ struct vm_object { __object->memq_hint = __page; \ MACRO_END -__private_extern__ +extern vm_object_t kernel_object; /* the single kernel object */ -__private_extern__ +extern +vm_object_t compressor_object; /* the single compressor object */ + +extern unsigned int vm_object_absent_max; /* maximum number of absent pages at a time for each object */ @@ -420,7 +431,10 @@ extern lck_attr_t vm_map_lck_attr; MACRO_END #define msync_req_free(msr) \ - (kfree((msr), sizeof(struct msync_req))) + MACRO_BEGIN \ + lck_mtx_destroy(&(msr)->msync_req_lock, &vm_map_lck_grp); \ + kfree((msr), sizeof(struct msync_req)); \ + MACRO_END #define msr_lock(msr) lck_mtx_lock(&(msr)->msync_req_lock) #define msr_unlock(msr) lck_mtx_unlock(&(msr)->msync_req_lock) @@ -429,7 +443,7 @@ extern lck_attr_t vm_map_lck_attr; * Declare procedures that operate on VM objects. */ -__private_extern__ void vm_object_bootstrap(void) __attribute__((section("__TEXT, initcode"))); +__private_extern__ void vm_object_bootstrap(void); __private_extern__ void vm_object_init(void); @@ -516,6 +530,15 @@ __private_extern__ void vm_object_pmap_protect( vm_map_offset_t pmap_start, vm_prot_t prot); +__private_extern__ void vm_object_pmap_protect_options( + vm_object_t object, + vm_object_offset_t offset, + vm_object_size_t size, + pmap_t pmap, + vm_map_offset_t pmap_start, + vm_prot_t prot, + int options); + __private_extern__ void vm_object_page_remove( vm_object_t object, vm_object_offset_t start, @@ -542,6 +565,13 @@ __private_extern__ kern_return_t vm_object_purgable_control( vm_purgable_t control, int *state); +__private_extern__ kern_return_t vm_object_get_page_counts( + vm_object_t object, + vm_object_offset_t offset, + vm_object_size_t size, + unsigned int *resident_page_count, + unsigned int *dirty_page_count); + __private_extern__ boolean_t vm_object_coalesce( vm_object_t prev_object, vm_object_t next_object, @@ -597,6 +627,9 @@ __private_extern__ kern_return_t vm_object_destroy( __private_extern__ void vm_object_pager_create( vm_object_t object); +__private_extern__ void vm_object_compressor_pager_create( + vm_object_t object); + __private_extern__ void vm_object_page_map( vm_object_t object, vm_object_offset_t offset, @@ -909,6 +942,7 @@ extern lck_grp_t vm_object_lck_grp; extern lck_grp_attr_t vm_object_lck_grp_attr; extern lck_attr_t vm_object_lck_attr; extern lck_attr_t kernel_object_lck_attr; +extern lck_attr_t compressor_object_lck_attr; extern vm_object_t vm_pageout_scan_wants_object; @@ -928,7 +962,9 @@ extern boolean_t vm_object_lock_try_shared(vm_object_t); (((object) == kernel_object || \ (object) == vm_submap_object) ? \ &kernel_object_lck_attr : \ - &vm_object_lck_attr)) + (((object) == compressor_object) ? \ + &compressor_object_lck_attr : \ + &vm_object_lck_attr))) #define vm_object_lock_destroy(object) lck_rw_destroy(&(object)->Lock, &vm_object_lck_grp) #define vm_object_unlock(object) lck_rw_done(&(object)->Lock) diff --git a/osfmk/vm/vm_page.h b/osfmk/vm/vm_page.h index 70050ffa3..0e2824f05 100644 --- a/osfmk/vm/vm_page.h +++ b/osfmk/vm/vm_page.h @@ -132,6 +132,8 @@ extern int speculative_age_index; extern unsigned int vm_page_speculative_q_age_ms; +#define VM_PAGE_COMPRESSOR_COUNT (compressor_object->resident_page_count) + /* * Management of resident (logical) pages. * @@ -184,7 +186,7 @@ struct vm_page { clean_queue:1, /* page is in pre-cleaned list (P) */ local:1, /* page is in one of the local queues (P) */ speculative:1, /* page is in speculative list (P) */ - throttled:1, /* pager is not responding (P) */ + throttled:1, /* pager is not responding or doesn't exist(P) */ free:1, /* page is on free list (P) */ pageout_queue:1,/* page is on queue for pageout (P) */ laundry:1, /* page is being cleaned now (P)*/ @@ -194,7 +196,8 @@ struct vm_page { * the free list (P) */ no_cache:1, /* page is not to be cached and should * be reused ahead of other pages (P) */ - __unused_pageq_bits:3; /* 3 bits available here */ + xpmapped:1, + __unused_pageq_bits:2; /* 2 bits available here */ ppnum_t phys_page; /* Physical address of page, passed * to pmap_enter (read-only) */ @@ -244,7 +247,8 @@ struct vm_page { lopage:1, slid:1, was_dirty:1, /* was this page previously dirty? */ - __unused_object_bits:8; /* 8 bits available here */ + compressor:1, /* page owned by compressor pool */ + __unused_object_bits:7; /* 7 bits available here */ #if __LP64__ unsigned int __unused_padding; /* Pad structure explicitly @@ -363,6 +367,8 @@ vm_map_size_t vm_global_no_user_wire_amount; struct vpl { unsigned int vpl_count; + unsigned int vpl_internal_count; + unsigned int vpl_external_count; queue_head_t vpl_queue; #ifdef VPL_LOCK_SPIN lck_spin_t vpl_lock; @@ -423,6 +429,12 @@ extern unsigned int vm_page_throttled_count;/* How many inactives are throttled */ extern unsigned int vm_page_speculative_count; /* How many speculative pages are unclaimed? */ +extern unsigned int vm_page_pageable_internal_count; +extern unsigned int vm_page_pageable_external_count; +extern +unsigned int vm_page_external_count; /* How many pages are file-backed? */ +extern +unsigned int vm_page_internal_count; /* How many pages are anonymous? */ extern unsigned int vm_page_wire_count; /* How many pages are wired? */ extern @@ -475,6 +487,8 @@ extern ppnum_t vm_page_guard_addr; extern boolean_t vm_page_deactivate_hint; +extern int vm_compressor_mode; + /* 0 = all pages avail ( default. ) 1 = disable high mem ( cap max pages to 4G) @@ -495,9 +509,9 @@ extern ppnum_t max_valid_low_ppnum; */ extern void vm_page_bootstrap( vm_offset_t *startp, - vm_offset_t *endp) __attribute__((section("__TEXT, initcode"))); + vm_offset_t *endp); -extern void vm_page_module_init(void) __attribute__((section("__TEXT, initcode"))); +extern void vm_page_module_init(void); extern void vm_page_init_local_q(void); @@ -652,42 +666,36 @@ extern void vm_page_free_prepare_object( boolean_t remove_from_hash); #if CONFIG_JETSAM -extern void memorystatus_update(unsigned int pages_avail); +extern void memorystatus_pages_update(unsigned int pages_avail); #define VM_CHECK_MEMORYSTATUS do { \ - memorystatus_update( \ - vm_page_active_count + \ - vm_page_inactive_count + \ - vm_page_speculative_count + \ - vm_page_free_count + \ + memorystatus_pages_update( \ + vm_page_external_count + \ + vm_page_free_count + \ (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) ? 0 : vm_page_purgeable_count) \ ); \ } while(0) -#else -#define VM_CHECK_MEMORYSTATUS do {} while(0) -#endif + +#else /* CONFIG_JETSAM */ + + +extern void vm_pressure_response(void); + +#define VM_CHECK_MEMORYSTATUS vm_pressure_response() + + +#endif /* CONFIG_JETSAM */ /* * Functions implemented as macros. m->wanted and m->busy are * protected by the object lock. */ -#if CONFIG_EMBEDDED #define SET_PAGE_DIRTY(m, set_pmap_modified) \ MACRO_BEGIN \ vm_page_t __page__ = (m); \ - if (__page__->dirty == FALSE && (set_pmap_modified)) { \ - pmap_set_modify(__page__->phys_page); \ - } \ __page__->dirty = TRUE; \ MACRO_END -#else /* CONFIG_EMBEDDED */ -#define SET_PAGE_DIRTY(m, set_pmap_modified) \ - MACRO_BEGIN \ - vm_page_t __page__ = (m); \ - __page__->dirty = TRUE; \ - MACRO_END -#endif /* CONFIG_EMBEDDED */ #define PAGE_ASSERT_WAIT(m, interruptible) \ (((m)->wanted = TRUE), \ @@ -773,6 +781,8 @@ extern void vm_page_queues_assert(vm_page_t mem, int val); #define VM_PAGE_QUEUES_REMOVE(mem) \ MACRO_BEGIN \ + boolean_t was_pageable; \ + \ VM_PAGE_QUEUES_ASSERT(mem, 1); \ assert(!mem->laundry); \ /* \ @@ -785,6 +795,7 @@ extern void vm_page_queues_assert(vm_page_t mem, int val); if (mem->local) { \ struct vpl *lq; \ assert(mem->object != kernel_object); \ + assert(mem->object != compressor_object); \ assert(!mem->inactive && !mem->speculative); \ assert(!mem->active && !mem->throttled); \ assert(!mem->clean_queue); \ @@ -796,11 +807,18 @@ extern void vm_page_queues_assert(vm_page_t mem, int val); mem->local = FALSE; \ mem->local_id = 0; \ lq->vpl_count--; \ + if (mem->object->internal) { \ + lq->vpl_internal_count--; \ + } else { \ + lq->vpl_external_count--; \ + } \ VPL_UNLOCK(&lq->vpl_lock); \ + was_pageable = FALSE; \ } \ \ else if (mem->active) { \ assert(mem->object != kernel_object); \ + assert(mem->object != compressor_object); \ assert(!mem->inactive && !mem->speculative); \ assert(!mem->clean_queue); \ assert(!mem->throttled); \ @@ -809,10 +827,12 @@ extern void vm_page_queues_assert(vm_page_t mem, int val); mem, vm_page_t, pageq); \ mem->active = FALSE; \ vm_page_active_count--; \ + was_pageable = TRUE; \ } \ \ else if (mem->inactive) { \ assert(mem->object != kernel_object); \ + assert(mem->object != compressor_object); \ assert(!mem->active && !mem->speculative); \ assert(!mem->throttled); \ assert(!mem->fictitious); \ @@ -834,9 +854,11 @@ extern void vm_page_queues_assert(vm_page_t mem, int val); vm_purgeable_q_advance_all(); \ } \ mem->inactive = FALSE; \ + was_pageable = TRUE; \ } \ \ else if (mem->throttled) { \ + assert(mem->object != compressor_object); \ assert(!mem->active && !mem->inactive); \ assert(!mem->speculative); \ assert(!mem->fictitious); \ @@ -844,22 +866,37 @@ extern void vm_page_queues_assert(vm_page_t mem, int val); mem, vm_page_t, pageq); \ mem->throttled = FALSE; \ vm_page_throttled_count--; \ + was_pageable = FALSE; \ } \ \ else if (mem->speculative) { \ + assert(mem->object != compressor_object); \ assert(!mem->active && !mem->inactive); \ assert(!mem->throttled); \ assert(!mem->fictitious); \ remque(&mem->pageq); \ mem->speculative = FALSE; \ vm_page_speculative_count--; \ + was_pageable = TRUE; \ } \ \ - else if (mem->pageq.next || mem->pageq.prev) \ + else if (mem->pageq.next || mem->pageq.prev) { \ + was_pageable = FALSE; \ panic("VM_PAGE_QUEUES_REMOVE: unmarked page on Q"); \ + } else { \ + was_pageable = FALSE; \ + } \ + \ mem->pageq.next = NULL; \ mem->pageq.prev = NULL; \ VM_PAGE_QUEUES_ASSERT(mem, 0); \ + if (was_pageable) { \ + if (mem->object->internal) { \ + vm_page_pageable_internal_count--; \ + } else { \ + vm_page_pageable_external_count--; \ + } \ + } \ MACRO_END @@ -874,12 +911,14 @@ extern void vm_page_queues_assert(vm_page_t mem, int val); queue_enter_first(&vm_page_queue_anonymous, mem, vm_page_t, pageq); \ else \ queue_enter(&vm_page_queue_anonymous, mem, vm_page_t, pageq); \ - vm_page_anonymous_count++; \ + vm_page_anonymous_count++; \ + vm_page_pageable_internal_count++; \ } else { \ if (first == TRUE) \ queue_enter_first(&vm_page_queue_inactive, mem, vm_page_t, pageq); \ else \ queue_enter(&vm_page_queue_inactive, mem, vm_page_t, pageq); \ + vm_page_pageable_external_count++; \ } \ mem->inactive = TRUE; \ vm_page_inactive_count++; \ diff --git a/osfmk/vm/vm_pageout.c b/osfmk/vm/vm_pageout.c index 2bfd6e7a5..9e8d0384d 100644 --- a/osfmk/vm/vm_pageout.c +++ b/osfmk/vm/vm_pageout.c @@ -95,6 +95,7 @@ #include #include +#include #include #include #include @@ -104,6 +105,8 @@ #include #include #include +#include + /* * ENCRYPTED SWAP: */ @@ -116,21 +119,25 @@ extern int cs_debug; #include #endif +extern vm_pressure_level_t memorystatus_vm_pressure_level; +int memorystatus_purge_on_warning = 2; +int memorystatus_purge_on_urgent = 5; +int memorystatus_purge_on_critical = 8; + #if VM_PRESSURE_EVENTS +void vm_pressure_response(void); +boolean_t vm_pressure_thread_running = FALSE; extern void consider_vm_pressure_events(void); #endif +boolean_t vm_pressure_changed = FALSE; #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE /* maximum iterations of the active queue to move pages to inactive */ #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 100 #endif #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */ -#ifdef CONFIG_EMBEDDED -#define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 1024 -#else #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096 #endif -#endif #ifndef VM_PAGEOUT_DEADLOCK_RELIEF #define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */ @@ -160,6 +167,10 @@ extern void consider_vm_pressure_events(void); #define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */ #endif /* VM_PAGEOUT_IDLE_WAIT */ +#ifndef VM_PAGEOUT_SWAP_WAIT +#define VM_PAGEOUT_SWAP_WAIT 50 /* milliseconds */ +#endif /* VM_PAGEOUT_SWAP_WAIT */ + #ifndef VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED #define VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED 1000 /* maximum pages considered before we issue a pressure event */ #endif /* VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED */ @@ -203,24 +214,17 @@ unsigned int vm_page_speculative_percentage = 5; */ #ifndef VM_PAGE_FREE_TARGET -#ifdef CONFIG_EMBEDDED -#define VM_PAGE_FREE_TARGET(free) (15 + (free) / 100) -#else #define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80) -#endif #endif /* VM_PAGE_FREE_TARGET */ + /* * The pageout daemon always starts running once vm_page_free_count * falls below vm_page_free_min. */ #ifndef VM_PAGE_FREE_MIN -#ifdef CONFIG_EMBEDDED -#define VM_PAGE_FREE_MIN(free) (10 + (free) / 200) -#else #define VM_PAGE_FREE_MIN(free) (10 + (free) / 100) -#endif #endif /* VM_PAGE_FREE_MIN */ #define VM_PAGE_FREE_RESERVED_LIMIT 100 @@ -251,11 +255,7 @@ unsigned int vm_page_speculative_percentage = 5; */ #define VM_PAGE_REACTIVATE_LIMIT_MAX 20000 #ifndef VM_PAGE_REACTIVATE_LIMIT -#ifdef CONFIG_EMBEDDED -#define VM_PAGE_REACTIVATE_LIMIT(avail) (VM_PAGE_INACTIVE_TARGET(avail) / 2) -#else #define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX)) -#endif #endif /* VM_PAGE_REACTIVATE_LIMIT */ #define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 100 @@ -274,12 +274,20 @@ unsigned int vm_pageout_scan_event_counter = 0; /* * Forward declarations for internal routines. */ +struct cq { + struct vm_pageout_queue *q; + void *current_chead; + char *scratch_buf; +}; + -static void vm_pressure_thread(void); +#if VM_PRESSURE_EVENTS +void vm_pressure_thread(void); +#endif static void vm_pageout_garbage_collect(int); static void vm_pageout_iothread_continue(struct vm_pageout_queue *); static void vm_pageout_iothread_external(void); -static void vm_pageout_iothread_internal(void); +static void vm_pageout_iothread_internal(struct cq *cq); static void vm_pageout_adjust_io_throttles(struct vm_pageout_queue *, struct vm_pageout_queue *, boolean_t); extern void vm_pageout_continue(void); @@ -291,6 +299,7 @@ static thread_t vm_pageout_internal_iothread = THREAD_NULL; unsigned int vm_pageout_reserved_internal = 0; unsigned int vm_pageout_reserved_really = 0; +unsigned int vm_pageout_swap_wait = 0; unsigned int vm_pageout_idle_wait = 0; /* milliseconds */ unsigned int vm_pageout_empty_wait = 0; /* milliseconds */ unsigned int vm_pageout_burst_wait = 0; /* milliseconds */ @@ -362,20 +371,15 @@ unsigned int vm_pageout_scan_inactive_throttled_external = 0; unsigned int vm_pageout_scan_throttle = 0; /* debugging */ unsigned int vm_pageout_scan_burst_throttle = 0; /* debugging */ unsigned int vm_pageout_scan_empty_throttle = 0; /* debugging */ +unsigned int vm_pageout_scan_swap_throttle = 0; /* debugging */ unsigned int vm_pageout_scan_deadlock_detected = 0; /* debugging */ unsigned int vm_pageout_scan_active_throttle_success = 0; /* debugging */ unsigned int vm_pageout_scan_inactive_throttle_success = 0; /* debugging */ -unsigned int vm_pageout_inactive_external_forced_reactivate_count = 0; /* debugging */ unsigned int vm_pageout_inactive_external_forced_jetsam_count = 0; /* debugging */ unsigned int vm_page_speculative_count_drifts = 0; unsigned int vm_page_speculative_count_drift_max = 0; -unsigned int vm_precleaning_aborted = 0; - -static boolean_t vm_pageout_need_to_refill_clean_queue = FALSE; -static boolean_t vm_pageout_precleaning_delayed = FALSE; - /* * Backing store throttle when BS is exhausted */ @@ -413,12 +417,15 @@ unsigned long vm_cs_validated_resets = 0; int vm_debug_events = 0; #if CONFIG_MEMORYSTATUS -extern int memorystatus_wakeup; +#if !CONFIG_JETSAM +extern boolean_t memorystatus_idle_exit_from_VM(void); #endif -#if CONFIG_JETSAM -extern int memorystatus_kill_top_proc_from_VM(void); +extern boolean_t memorystatus_kill_on_VM_page_shortage(boolean_t async); +extern void memorystatus_on_pageout_scan_end(void); #endif +boolean_t vm_page_compressions_failing = FALSE; + /* * Routine: vm_backing_store_disable * Purpose: @@ -510,7 +517,7 @@ vm_pageout_object_terminate( * Also decrement the burst throttle (if external). */ vm_page_lock_queues(); - if (m->laundry) + if (m->pageout_queue) vm_pageout_throttle_up(m); /* @@ -810,7 +817,7 @@ struct { * which will page it out and attempt to clean adjacent pages * in the same operation. * - * The page must be busy, and the object and queues locked. We will take a + * The object and queues must be locked. We will take a * paging reference to prevent deallocation or collapse when we * release the object lock back at the call site. The I/O thread * is responsible for consuming this reference @@ -852,12 +859,15 @@ vm_pageout_cluster(vm_page_t m, boolean_t pageout) m->pageout = pageout; - if (object->internal == TRUE) + if (object->internal == TRUE) { + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) + m->busy = TRUE; + q = &vm_pageout_queue_internal; - else + } else q = &vm_pageout_queue_external; - /* + /* * pgo_laundry count is tied to the laundry bit */ m->laundry = TRUE; @@ -867,10 +877,9 @@ vm_pageout_cluster(vm_page_t m, boolean_t pageout) queue_enter(&q->pgo_pending, m, vm_page_t, pageq); if (q->pgo_idle == TRUE) { - q->pgo_idle = FALSE; - thread_wakeup((event_t) &q->pgo_pending); + q->pgo_idle = FALSE; + thread_wakeup((event_t) &q->pgo_pending); } - VM_PAGE_CHECK(m); } @@ -928,20 +937,35 @@ vm_pageout_throttle_up( q->pgo_draining = FALSE; thread_wakeup((event_t) (&q->pgo_laundry+1)); } - if (vm_pageout_precleaning_delayed == TRUE) { - /* - * since the pageout scan can return on laundry congestion, wake it up this way - * don't depend on pgo_throttled == TRUE to indicate that the pageout scan thread - * is blocked on &q->pgo_laundry since the hibernation mechanism utilizes both - * pgo_throttled and pgo_draining - */ - vm_pageout_precleaning_delayed = FALSE; - thread_wakeup((event_t)(&vm_page_free_wanted)); - } } } +static void +vm_pageout_throttle_up_batch( + struct vm_pageout_queue *q, + int batch_cnt) +{ +#if DEBUG + lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); +#endif + + vm_pageout_throttle_up_count += batch_cnt; + + q->pgo_laundry -= batch_cnt; + + if (q->pgo_throttled == TRUE) { + q->pgo_throttled = FALSE; + thread_wakeup((event_t) &q->pgo_laundry); + } + if (q->pgo_draining == TRUE && q->pgo_laundry == 0) { + q->pgo_draining = FALSE; + thread_wakeup((event_t) (&q->pgo_laundry+1)); + } +} + + + /* * VM memory pressure monitoring. * @@ -1102,6 +1126,13 @@ mach_vm_pressure_monitor( extern void vm_pageout_io_throttle(void); +#if LATENCY_JETSAM +boolean_t jlp_init = FALSE; +uint64_t jlp_time = 0, jlp_current = 0; +struct vm_page jetsam_latency_page[NUM_OF_JETSAM_LATENCY_TOKENS]; +unsigned int latency_jetsam_wakeup = 0; +#endif /* LATENCY_JETSAM */ + /* * Page States: Used below to maintain the page state * before it's removed from it's Q. This saved state @@ -1113,27 +1144,28 @@ extern void vm_pageout_io_throttle(void); #define PAGE_STATE_INACTIVE_FIRST 4 #define PAGE_STATE_CLEAN 5 -#define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m) \ - MACRO_BEGIN \ - /* \ - * If a "reusable" page somehow made it back into \ - * the active queue, it's been re-used and is not \ - * quite re-usable. \ - * If the VM object was "all_reusable", consider it \ - * as "all re-used" instead of converting it to \ - * "partially re-used", which could be expensive. \ - */ \ - if ((m)->reusable || \ - (m)->object->all_reusable) { \ - vm_object_reuse_pages((m)->object, \ - (m)->offset, \ - (m)->offset + PAGE_SIZE_64, \ - FALSE); \ - } \ - MACRO_END - - -#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT 128 + +#define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m) \ + MACRO_BEGIN \ + /* \ + * If a "reusable" page somehow made it back into \ + * the active queue, it's been re-used and is not \ + * quite re-usable. \ + * If the VM object was "all_reusable", consider it \ + * as "all re-used" instead of converting it to \ + * "partially re-used", which could be expensive. \ + */ \ + if ((m)->reusable || \ + (m)->object->all_reusable) { \ + vm_object_reuse_pages((m)->object, \ + (m)->offset, \ + (m)->offset + PAGE_SIZE_64, \ + FALSE); \ + } \ + MACRO_END + + +#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT 64 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX 1024 #define FCS_IDLE 0 @@ -1146,7 +1178,10 @@ struct flow_control { }; uint32_t vm_pageout_considered_page = 0; +uint32_t vm_page_filecache_min = 0; +#define VM_PAGE_FILECACHE_MIN 50000 +#define ANONS_GRABBED_LIMIT 2 /* * vm_pageout_scan does the dirty work for the pageout daemon. @@ -1182,14 +1217,38 @@ vm_pageout_scan(void) boolean_t forced_reclaim; boolean_t exceeded_burst_throttle; boolean_t grab_anonymous = FALSE; + boolean_t force_anonymous = FALSE; + int anons_grabbed = 0; int page_prev_state = 0; int cache_evict_throttle = 0; uint32_t vm_pageout_inactive_external_forced_reactivate_limit = 0; + vm_pressure_level_t pressure_level; VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START, vm_pageout_speculative_clean, vm_pageout_inactive_clean, vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external); +#if LATENCY_JETSAM + if (jlp_init == FALSE) { + int i=0; + vm_page_t jlp; + for(; i < NUM_OF_JETSAM_LATENCY_TOKENS; i++) { + jlp = &jetsam_latency_page[i]; + jlp->fictitious = TRUE; + jlp->offset = 0; + + } + jlp = &jetsam_latency_page[0]; + queue_enter(&vm_page_queue_active, jlp, vm_page_t, pageq); + jlp->active = TRUE; + + jlp->offset = mach_absolute_time(); + jlp_time = jlp->offset; + jlp_current++; + jlp_init = TRUE; + } +#endif /* LATENCY_JETSAM */ + flow_control.state = FCS_IDLE; iq = &vm_pageout_queue_internal; eq = &vm_pageout_queue_external; @@ -1233,7 +1292,7 @@ vm_pageout_scan(void) Restart: assert(delayed_unlock!=0); - + /* * Recalculate vm_page_inactivate_target. */ @@ -1241,7 +1300,8 @@ Restart: vm_page_inactive_count + vm_page_speculative_count); - vm_page_anonymous_min = vm_page_inactive_target / 3; + vm_page_anonymous_min = vm_page_inactive_target / 20; + /* * don't want to wake the pageout_scan thread up everytime we fall below @@ -1265,7 +1325,7 @@ Restart: catch_up_count = vm_page_inactive_count + vm_page_speculative_count; else catch_up_count = 0; - + for (;;) { vm_page_t m; @@ -1299,8 +1359,7 @@ Restart: * Don't sweep through active queue more than the throttle * which should be kept relatively low */ - active_burst_count = MIN(vm_pageout_burst_active_throttle, - vm_page_active_count); + active_burst_count = MIN(vm_pageout_burst_active_throttle, vm_page_active_count); VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_START, vm_pageout_inactive, vm_pageout_inactive_used, vm_page_free_count, local_freed); @@ -1308,6 +1367,8 @@ Restart: VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_NONE, vm_pageout_speculative_clean, vm_pageout_inactive_clean, vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external); + memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_START); + while (!queue_empty(&vm_page_queue_active) && active_burst_count--) { @@ -1322,44 +1383,112 @@ Restart: DTRACE_VM2(scan, int, 1, (uint64_t *), NULL); - /* - * The page might be absent or busy, - * but vm_page_deactivate can handle that. - */ - vm_page_deactivate(m); +#if LATENCY_JETSAM + if (m->fictitious) { + const uint32_t FREE_TARGET_MULTIPLIER = 2; + + uint64_t now = mach_absolute_time(); + uint64_t delta = now - m->offset; + clock_sec_t jl_secs = 0; + clock_usec_t jl_usecs = 0; + boolean_t issue_jetsam = FALSE; + + absolutetime_to_microtime(delta, &jl_secs, &jl_usecs); + jl_usecs += jl_secs * USEC_PER_SEC; - if (delayed_unlock++ > delayed_unlock_limit) { + /* Jetsam only if the token hasn't aged sufficiently and the free count is close to the target (avoiding spurious triggers) */ + if ((jl_usecs <= JETSAM_AGE_NOTIFY_CRITICAL) && (vm_page_free_count < (FREE_TARGET_MULTIPLIER * vm_page_free_target))) { + issue_jetsam = TRUE; + } + + VM_DEBUG_EVENT(vm_pageout_page_token, VM_PAGEOUT_PAGE_TOKEN, DBG_FUNC_NONE, + vm_page_active_count, vm_page_inactive_count, vm_page_free_count, jl_usecs); + + m->offset = 0; + queue_remove(&vm_page_queue_active, m, vm_page_t, pageq); + queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); - if (local_freeq) { + m->offset = now; + jlp_time = now; + + if (issue_jetsam) { vm_page_unlock_queues(); - - VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START, - vm_page_free_count, local_freed, delayed_unlock_limit, 1); - vm_page_free_list(local_freeq, TRUE); + if (local_freeq) { + vm_page_free_list(local_freeq, TRUE); + local_freeq = NULL; + local_freed = 0; + } + + VM_DEBUG_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START, + vm_page_active_count, vm_page_inactive_count, vm_page_free_count, 0); + + assert_wait_timeout(&latency_jetsam_wakeup, THREAD_INTERRUPTIBLE, 10 /* msecs */, 1000*NSEC_PER_USEC); + /* Kill the top process asynchronously */ + memorystatus_kill_on_VM_page_shortage(TRUE); + thread_block(THREAD_CONTINUE_NULL); - VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END, - vm_page_free_count, 0, 0, 1); + VM_DEBUG_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END, 0, 0, 0, 0); - local_freeq = NULL; - local_freed = 0; vm_page_lock_queues(); - } else - lck_mtx_yield(&vm_page_queue_lock); - - delayed_unlock = 1; + } + } else { +#endif /* LATENCY_JETSAM */ + /* + * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise... + * + * a TLB flush isn't really needed here since at worst we'll miss the reference bit being + * updated in the PTE if a remote processor still has this mapping cached in its TLB when the + * new reference happens. If no futher references happen on the page after that remote TLB flushes + * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue + * by pageout_scan, which is just fine since the last reference would have happened quite far + * in the past (TLB caches don't hang around for very long), and of course could just as easily + * have happened before we moved the page + */ + pmap_clear_refmod_options(m->phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL); /* - * continue the while loop processing - * the active queue... need to hold - * the page queues lock + * The page might be absent or busy, + * but vm_page_deactivate can handle that. + * FALSE indicates that we don't want a H/W clear reference */ + vm_page_deactivate_internal(m, FALSE); + + if (delayed_unlock++ > delayed_unlock_limit) { + + if (local_freeq) { + vm_page_unlock_queues(); + + VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START, + vm_page_free_count, local_freed, delayed_unlock_limit, 1); + + vm_page_free_list(local_freeq, TRUE); + + VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END, + vm_page_free_count, 0, 0, 1); + + local_freeq = NULL; + local_freed = 0; + vm_page_lock_queues(); + } else + lck_mtx_yield(&vm_page_queue_lock); + + delayed_unlock = 1; + + /* + * continue the while loop processing + * the active queue... need to hold + * the page queues lock + */ + } +#if LATENCY_JETSAM } +#endif /* LATENCY_JETSAM */ } VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_END, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count, vm_page_inactive_target); - + memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_END); /********************************************************************** * above this point we're playing with the active queue @@ -1369,9 +1498,6 @@ Restart: done_moving_active_pages: - if (vm_page_cleaned_count < VM_PAGE_CLEANED_MIN && vm_page_anonymous_count > vm_page_anonymous_min) - vm_pageout_need_to_refill_clean_queue = TRUE; - if (vm_page_free_count + local_freed >= vm_page_free_target) { if (object != NULL) { vm_object_unlock(object); @@ -1410,7 +1536,6 @@ done_moving_active_pages: vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count + vm_page_inactive_count + vm_page_speculative_count); -#ifndef CONFIG_EMBEDDED if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) && !queue_empty(&vm_page_queue_active)) { /* @@ -1419,22 +1544,19 @@ done_moving_active_pages: */ continue; } -#endif lck_mtx_lock(&vm_page_queue_free_lock); if ((vm_page_free_count >= vm_page_free_target) && - (vm_page_cleaned_count >= VM_PAGE_CLEANED_TARGET || vm_pageout_need_to_refill_clean_queue == FALSE) && (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) { /* * done - we have met our target *and* * there is no one waiting for a page. */ - vm_pageout_need_to_refill_clean_queue = FALSE; return_from_scan: assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL); VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE, - vm_pageout_inactive, vm_pageout_inactive_used, vm_pageout_need_to_refill_clean_queue, 0); + vm_pageout_inactive, vm_pageout_inactive_used, 0, 0); VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END, vm_pageout_speculative_clean, vm_pageout_inactive_clean, vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external); @@ -1451,23 +1573,40 @@ return_from_scan: * If the purge succeeds, go back to the top and reevalute * the new memory situation. */ + pressure_level = memorystatus_vm_pressure_level; assert (available_for_purge>=0); - if (available_for_purge) - { + + if (available_for_purge + || pressure_level > kVMPressureNormal + ) { + int force_purge; + if (object != NULL) { vm_object_unlock(object); object = NULL; } VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0); - - if (TRUE == vm_purgeable_object_purge_one()) { + memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START); + + force_purge = 0; /* no force-purging */ + if (pressure_level >= kVMPressureCritical) { + force_purge = memorystatus_purge_on_critical; + } else if (pressure_level >= kVMPressureUrgent) { + force_purge = memorystatus_purge_on_urgent; + } else if (pressure_level >= kVMPressureWarning) { + force_purge = memorystatus_purge_on_warning; + } else { + force_purge = 0; + } + if (vm_purgeable_object_purge_one(force_purge)) { VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0); - + memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END); continue; } VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1); + memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END); } if (queue_empty(&sq->age_q) && vm_page_speculative_count) { /* @@ -1555,6 +1694,7 @@ return_from_scan: VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE, vm_page_free_count, pages_evicted, vm_pageout_cache_evicted, 0); + memoryshot(VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE); /* * we just freed up to 100 pages, @@ -1592,6 +1732,12 @@ return_from_scan: exceeded_burst_throttle = TRUE; goto vm_pageout_scan_delay; + } else if (vm_page_free_count > (vm_page_free_reserved / 4) && + VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) { + vm_pageout_scan_swap_throttle++; + msecs = vm_pageout_swap_wait; + goto vm_pageout_scan_delay; + } else if (VM_PAGE_Q_THROTTLED(iq) && VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) { clock_sec_t sec; @@ -1601,11 +1747,12 @@ return_from_scan: case FCS_IDLE: if ((vm_page_free_count + local_freed) < vm_page_free_target) { - if (vm_page_inactive_count - vm_page_anonymous_count > 0) { - grab_anonymous = FALSE; + + if (vm_page_pageable_external_count > vm_page_filecache_min && !queue_empty(&vm_page_queue_inactive)) { + anons_grabbed = ANONS_GRABBED_LIMIT; goto consider_inactive; } - if ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) + if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) && vm_page_active_count) continue; } reset_deadlock_timer: @@ -1695,11 +1842,9 @@ vm_pageout_scan_delay: if (vm_page_free_count >= vm_page_free_target) { /* - * we're here because either + * we're here because * 1) someone else freed up some pages while we had - * the queues unlocked above or - * 2) we're precleaning and we haven't yet met - * our cleaned target + * the queues unlocked above * and we've hit one of the 3 conditions that * cause us to pause the pageout scan thread * @@ -1725,11 +1870,8 @@ vm_pageout_scan_delay: } lck_mtx_lock(&vm_page_queue_free_lock); - if (vm_page_free_count >= vm_page_free_target) { - if (vm_page_cleaned_count < VM_PAGE_CLEANED_TARGET) { - vm_precleaning_aborted++; - vm_pageout_precleaning_delayed = TRUE; - } + if (vm_page_free_count >= vm_page_free_target && + (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) { goto return_from_scan; } lck_mtx_unlock(&vm_page_queue_free_lock); @@ -1769,6 +1911,9 @@ vm_pageout_scan_delay: vm_pageout_scan_throttle++; iq->pgo_throttled = TRUE; + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) + vm_consider_waking_compactor_swapper(); + assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC); counter(c_vm_pageout_scan_block++); @@ -1778,11 +1923,13 @@ vm_pageout_scan_delay: VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START, iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0); + memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START); thread_block(THREAD_CONTINUE_NULL); VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END, iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0); + memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END); vm_page_lock_queues(); delayed_unlock = 1; @@ -1805,69 +1952,69 @@ consider_inactive: loop_count++; inactive_burst_count++; vm_pageout_inactive++; - - boolean_t pageout_making_free = ((vm_page_free_count + local_freed) < vm_page_free_target); /* TRUE if making free, FALSE if making clean */ + /* * Choose a victim. */ - while (1) { + while (1) { m = NULL; if (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) { assert(vm_page_throttled_count == 0); assert(queue_empty(&vm_page_queue_throttled)); } - /* - * If we are still below the free target, try speculative - * and clean queue pages. + * The most eligible pages are ones we paged in speculatively, + * but which have not yet been touched. */ - if (pageout_making_free) { - /* - * The most eligible pages are ones we paged in speculatively, - * but which have not yet been touched. - */ - if ( !queue_empty(&sq->age_q) ) { - m = (vm_page_t) queue_first(&sq->age_q); + if (!queue_empty(&sq->age_q) ) { + m = (vm_page_t) queue_first(&sq->age_q); - page_prev_state = PAGE_STATE_SPECULATIVE; - - break; - } + page_prev_state = PAGE_STATE_SPECULATIVE; - /* - * Try a clean-queue inactive page, if we are still trying to fill the free list. - */ - if ( !queue_empty(&vm_page_queue_cleaned) ) { - m = (vm_page_t) queue_first(&vm_page_queue_cleaned); + break; + } + /* + * Try a clean-queue inactive page. + */ + if (!queue_empty(&vm_page_queue_cleaned)) { + m = (vm_page_t) queue_first(&vm_page_queue_cleaned); - page_prev_state = PAGE_STATE_CLEAN; + page_prev_state = PAGE_STATE_CLEAN; - break; - } + break; + } - if (grab_anonymous == FALSE || queue_empty(&vm_page_queue_anonymous)) { + grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min); - if ( !queue_empty(&vm_page_queue_inactive) ) { - m = (vm_page_t) queue_first(&vm_page_queue_inactive); - - page_prev_state = PAGE_STATE_INACTIVE; - if (vm_pageout_need_to_refill_clean_queue == TRUE) - grab_anonymous = TRUE; - break; - } - } + if (vm_page_pageable_external_count < vm_page_filecache_min || force_anonymous == TRUE) { + grab_anonymous = TRUE; + anons_grabbed = 0; } - if (vm_pageout_need_to_refill_clean_queue == TRUE) { - if ( !queue_empty(&vm_page_queue_anonymous) ) { - m = (vm_page_t) queue_first(&vm_page_queue_anonymous); - page_prev_state = PAGE_STATE_ANONYMOUS; - grab_anonymous = FALSE; + if (grab_anonymous == TRUE && vm_compression_available() == FALSE) + grab_anonymous = FALSE; + + if (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT || queue_empty(&vm_page_queue_anonymous)) { + + if ( !queue_empty(&vm_page_queue_inactive) ) { + m = (vm_page_t) queue_first(&vm_page_queue_inactive); + + page_prev_state = PAGE_STATE_INACTIVE; + anons_grabbed = 0; + break; } } + if ( !queue_empty(&vm_page_queue_anonymous) ) { + m = (vm_page_t) queue_first(&vm_page_queue_anonymous); + + page_prev_state = PAGE_STATE_ANONYMOUS; + anons_grabbed++; + + break; + } /* * if we've gotten here, we have no victim page. @@ -1898,14 +2045,6 @@ consider_inactive: vm_page_lock_queues(); delayed_unlock = 1; - if (pageout_making_free == FALSE) { - if (vm_pageout_need_to_refill_clean_queue == TRUE) - DTRACE_VM(novictimforclean); - - lck_mtx_lock(&vm_page_queue_free_lock); - goto return_from_scan; - - } if ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) goto Restart; @@ -1913,6 +2052,7 @@ consider_inactive: /* NOTREACHED */ } + force_anonymous = FALSE; /* * we just found this page on one of our queues... @@ -1986,18 +2126,15 @@ consider_inactive: */ m->object->scan_collisions++; - if (pageout_making_free) { - if ( !queue_empty(&sq->age_q) ) - m_want = (vm_page_t) queue_first(&sq->age_q); - else if (!queue_empty(&vm_page_queue_cleaned)) - m_want = (vm_page_t) queue_first(&vm_page_queue_cleaned); - else if (grab_anonymous == FALSE || queue_empty(&vm_page_queue_anonymous)) - m_want = (vm_page_t) queue_first(&vm_page_queue_inactive); - } - if (m_want == NULL && vm_pageout_need_to_refill_clean_queue == TRUE) { - if ( !queue_empty(&vm_page_queue_anonymous) ) - m_want = (vm_page_t) queue_first(&vm_page_queue_anonymous); - } + if ( !queue_empty(&sq->age_q) ) + m_want = (vm_page_t) queue_first(&sq->age_q); + else if ( !queue_empty(&vm_page_queue_cleaned)) + m_want = (vm_page_t) queue_first(&vm_page_queue_cleaned); + else if (anons_grabbed >= ANONS_GRABBED_LIMIT || queue_empty(&vm_page_queue_anonymous)) + m_want = (vm_page_t) queue_first(&vm_page_queue_inactive); + else if ( !queue_empty(&vm_page_queue_anonymous)) + m_want = (vm_page_t) queue_first(&vm_page_queue_anonymous); + /* * this is the next object we're going to be interested in * try to make sure its available after the mutex_yield @@ -2131,8 +2268,6 @@ reclaim_page: else vm_pageout_freed_from_inactive_clean++; - inactive_burst_count = 0; - if (page_prev_state != PAGE_STATE_SPECULATIVE) vm_pageout_stats[vm_pageout_stat_now].reclaimed++; @@ -2159,7 +2294,22 @@ reclaim_page: } goto reclaim_page; } - if (object->purgable == VM_PURGABLE_VOLATILE) { + + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + /* + * With the VM compressor, the cost of + * reclaiming a page is much lower (no I/O), + * so if we find a "volatile" page, it's better + * to let it get compressed rather than letting + * it occupy a full page until it gets purged. + * So no need to check for "volatile" here. + */ + } else if (object->purgable == VM_PURGABLE_VOLATILE) { + /* + * Avoid cleaning a "volatile" page which might + * be purged soon. + */ + /* if it's wired, we can't put it on our queue */ assert(!VM_PAGE_WIRED(m)); @@ -2214,11 +2364,11 @@ consider_inactive_page: } /* - * if (m->cleaning) + * if (m->cleaning && !m->pageout) * If already cleaning this page in place and it hasn't - * been recently referenced, just pull off the queue. - * We can leave the page mapped, and upl_commit_range - * will put it on the clean queue. + * been recently referenced, just pull off the queue. + * We can leave the page mapped, and upl_commit_range + * will put it on the clean queue. * * note: if m->encrypted_cleaning == TRUE, then * m->cleaning == TRUE @@ -2240,10 +2390,9 @@ consider_inactive_page: /* * page with m->pageout and still on the queues means that an - * MS_INVALIDATE in progress on this page... leave it alone + * MS_INVALIDATE is in progress on this page... leave it alone */ if (m->pageout) { - inactive_burst_count = 0; goto done_with_inactivepage; } @@ -2253,15 +2402,14 @@ consider_inactive_page: reactivated_this_call++; goto reactivate_page; } else { - inactive_burst_count = 0; goto done_with_inactivepage; } } - if (m->reference || m->dirty) { - /* deal with a rogue "reusable" page */ - VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m); - } + if (m->reference || m->dirty) { + /* deal with a rogue "reusable" page */ + VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m); + } if (m->reference && !m->no_cache) { /* @@ -2305,7 +2453,6 @@ reactivate_page: vm_pageout_cleaned_reactivated++; vm_pageout_inactive_used++; - inactive_burst_count = 0; goto done_with_inactivepage; } @@ -2373,41 +2520,50 @@ throttle_inactive: } if (inactive_throttled == TRUE) { - if (object->internal) - vm_pageout_scan_inactive_throttled_internal++; - else - vm_pageout_scan_inactive_throttled_external++; - - if (page_prev_state == PAGE_STATE_SPECULATIVE) - page_prev_state = PAGE_STATE_INACTIVE; - - if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && object->internal == FALSE) { - /* - * a) The external pageout queue is throttled - * b) We're done with the active queue and moved on to the inactive queue - * c) We start noticing dirty pages and usually we would put them at the end of the inactive queue, but, - * d) We don't have a default pager, and so, - * e) We push these onto the active queue in an effort to cause a re-evaluation of the active queue - * and get back some, possibly clean, pages. + if (object->internal == FALSE) { + /* + * we need to break up the following potential deadlock case... + * a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written. + * b) The thread doing the writing is waiting for pages while holding the truncate lock + * c) Most of the pages in the inactive queue belong to this file. + * + * we are potentially in this deadlock because... + * a) the external pageout queue is throttled + * b) we're done with the active queue and moved on to the inactive queue + * c) we've got a dirty external page * - * We also keep a count of the pages of this kind, since, these will be a good indicator of us being in a deadlock - * on systems without a dynamic pager, where: - * a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written. - * b) The thread doing the writing is waiting for pages while holding the truncate lock - * c) Most of the pages in the inactive queue belong to this file. + * since we don't know the reason for the external pageout queue being throttled we + * must suspect that we are deadlocked, so move the current page onto the active queue + * in an effort to cause a page from the active queue to 'age' to the inactive queue + * + * if we don't have jetsam configured (i.e. we have a dynamic pager), set + * 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous + * pool the next time we select a victim page... if we can make enough new free pages, + * the deadlock will break, the external pageout queue will empty and it will no longer + * be throttled + * + * if we have jestam configured, keep a count of the pages reactivated this way so + * that we can try to find clean pages in the active/inactive queues before + * deciding to jetsam a process */ + vm_pageout_scan_inactive_throttled_external++; + queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); m->active = TRUE; vm_page_active_count++; + if (m->object->internal) { + vm_page_pageable_internal_count++; + } else { + vm_page_pageable_external_count++; + } vm_pageout_adjust_io_throttles(iq, eq, FALSE); - vm_pageout_inactive_external_forced_reactivate_count++; +#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM vm_pageout_inactive_external_forced_reactivate_limit--; - if (vm_pageout_inactive_external_forced_reactivate_limit <= 0){ + if (vm_pageout_inactive_external_forced_reactivate_limit <= 0) { vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count; -#if CONFIG_JETSAM /* * Possible deadlock scenario so request jetsam action */ @@ -2415,19 +2571,31 @@ throttle_inactive: vm_object_unlock(object); object = VM_OBJECT_NULL; vm_page_unlock_queues(); + + VM_DEBUG_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START, + vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count); - if (memorystatus_kill_top_proc_from_VM() < 0){ + /* Kill first suitable process */ + if (memorystatus_kill_on_VM_page_shortage(FALSE) == FALSE) { panic("vm_pageout_scan: Jetsam request failed\n"); } + + VM_DEBUG_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END, 0, 0, 0, 0); vm_pageout_inactive_external_forced_jetsam_count++; vm_page_lock_queues(); delayed_unlock = 1; -#endif } - inactive_burst_count = 0; +#else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */ + force_anonymous = TRUE; +#endif goto done_with_inactivepage; } else { + if (page_prev_state == PAGE_STATE_SPECULATIVE) + page_prev_state = PAGE_STATE_INACTIVE; + + vm_pageout_scan_inactive_throttled_internal++; + goto requeue_page; } } @@ -2443,37 +2611,21 @@ throttle_inactive: * page was still mapped up to the pmap_disconnect * and may have been dirtied at the last microsecond * - * we also check for the page being referenced 'late' - * and reactivate it for that case - * * Note that if 'pmapped' is FALSE then the page is not * and has not been in any map, so there is no point calling - * pmap_disconnect(). m->dirty and/or m->reference could - * have been set in anticipation of likely usage of the page. + * pmap_disconnect(). m->dirty could have been set in anticipation + * of likely usage of the page. */ if (m->pmapped == TRUE) { - refmod_state = pmap_disconnect(m->phys_page); - if (refmod_state & VM_MEM_MODIFIED) { - SET_PAGE_DIRTY(m, FALSE); + if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || object->internal == FALSE) { + refmod_state = pmap_disconnect_options(m->phys_page, 0, NULL); + } else { + refmod_state = pmap_disconnect_options(m->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL); } - if (refmod_state & VM_MEM_REFERENCED) { - - /* If m->reference is already set, this page must have - * already failed the reactivate_limit test, so don't - * bump the counts twice. - */ - if ( ! m->reference ) { - m->reference = TRUE; - if (forced_reclaim || - ++reactivated_this_call >= reactivate_limit) - vm_pageout_reactivation_limit_exceeded++; - else { - if (page_prev_state == PAGE_STATE_CLEAN) - vm_pageout_cleaned_reference_reactivated++; - goto reactivate_page; - } - } + + if (refmod_state & VM_MEM_MODIFIED) { + SET_PAGE_DIRTY(m, FALSE); } } /* @@ -2502,59 +2654,10 @@ throttle_inactive: } vm_pageout_inactive_clean++; } - /* FYI: (!pageout_making_free) == (!m->clean_queue && !m->speculative) */ - if (((vm_page_free_count + local_freed) >= vm_page_free_target) && !pageout_making_free) { - - assert(!m->clean_queue); - assert(!m->speculative); - - /* - * we have met our free page target and this page wasn't just pulled - * from the clean or speculative queues, so put it on the clean queue - */ - if (m->reference == TRUE) { - /* - * must have come through the forced reclaim path. - * we need to clear out the reference state in this case - * so that we don't just reactivate the page when we - * find it in the clean queue based on an old reference. - * if it gets re-referenced while on the queue, then - * the reactivation is justified - */ - m->reference = FALSE; - pmap_clear_reference(m->phys_page); - } - - vm_pageout_enqueued_cleaned_from_inactive_clean++; - vm_page_enqueue_cleaned(m); - - inactive_burst_count = 0; /* we found a usable page on the inactive queue, hooray */ - - goto done_with_inactivepage; - } /* * OK, at this point we have found a page we are going to free. */ - -#ifndef CONFIG_EMBEDDED - -#define VM_PRESSURE_INTERVAL_NS 250000000 /* nanoseconds; == .25 seconds */ - if (vm_pageout_need_to_refill_clean_queue == TRUE || page_prev_state == PAGE_STATE_CLEAN) { - static uint64_t vm_pressure_last_time_ns = 0; - uint64_t cur_time_ns = 0; - absolutetime_to_nanoseconds(mach_absolute_time(), &cur_time_ns); - if (cur_time_ns >= vm_pressure_last_time_ns + VM_PRESSURE_INTERVAL_NS) { - vm_pressure_last_time_ns = cur_time_ns; - thread_wakeup(&vm_pressure_thread); -#if CONFIG_MEMORYSTATUS - /* Wake up idle-exit thread */ - thread_wakeup((event_t)&memorystatus_wakeup); -#endif - } - } -#endif /* !CONFIG_EMBEDDED */ - goto reclaim_page; } @@ -2573,6 +2676,10 @@ throttle_inactive: if (inactive_throttled == TRUE) goto throttle_inactive; + +#if VM_PRESSURE_EVENTS + vm_pressure_response(); +#endif /* VM_PRESSURE_EVENTS */ /* * do NOT set the pageout bit! @@ -2588,10 +2695,11 @@ throttle_inactive: vm_pageout_inactive_dirty_internal++; else vm_pageout_inactive_dirty_external++; - - inactive_burst_count = 0; + done_with_inactivepage: + inactive_burst_count = 0; + if (delayed_unlock++ > delayed_unlock_limit || try_failed == TRUE) { if (object != NULL) { @@ -2619,7 +2727,10 @@ done_with_inactivepage: delayed_unlock = 1; } vm_pageout_considered_page++; - + + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) + vm_consider_waking_compactor_swapper(); + /* * back to top of pageout scan loop */ @@ -2635,11 +2746,19 @@ vm_page_free_reserve( { int free_after_reserve; - vm_page_free_reserved += pages; + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { - if (vm_page_free_reserved > VM_PAGE_FREE_RESERVED_LIMIT) - vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT; + if ((vm_page_free_reserved + pages + COMPRESSOR_FREE_RESERVED_LIMIT) >= (VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT)) + vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT; + else + vm_page_free_reserved += (pages + COMPRESSOR_FREE_RESERVED_LIMIT); + } else { + if ((vm_page_free_reserved + pages) >= VM_PAGE_FREE_RESERVED_LIMIT) + vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT; + else + vm_page_free_reserved += pages; + } free_after_reserve = vm_page_free_count_init - vm_page_free_reserved; vm_page_free_min = vm_page_free_reserved + @@ -2718,7 +2837,7 @@ vm_pageout_iothread_continue(struct vm_pageout_queue *q) q->pgo_busy = TRUE; queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq); - if (m->object == slide_info.slide_object) { + if (m->object->object_slid) { panic("slid page %p not allowed on this path\n", m); } VM_PAGE_CHECK(m); @@ -2803,103 +2922,495 @@ vm_pageout_iothread_continue(struct vm_pageout_queue *q) */ m->pageout = FALSE; - vm_page_lockspin_queues(); + vm_page_lockspin_queues(); + + vm_pageout_throttle_up(m); + vm_page_activate(m); + vm_pageout_dirty_no_pager++; + + vm_page_unlock_queues(); + + /* + * And we are done with it. + */ + vm_object_activity_end(object); + vm_object_unlock(object); + + vm_page_lockspin_queues(); + continue; + } + } + pager = object->pager; + + if (pager == MEMORY_OBJECT_NULL) { + /* + * This pager has been destroyed by either + * memory_object_destroy or vm_object_destroy, and + * so there is nowhere for the page to go. + */ + if (m->pageout) { + /* + * Just free the page... VM_PAGE_FREE takes + * care of cleaning up all the state... + * including doing the vm_pageout_throttle_up + */ + VM_PAGE_FREE(m); + } else { + vm_page_lockspin_queues(); + + vm_pageout_throttle_up(m); + vm_page_activate(m); + + vm_page_unlock_queues(); + + /* + * And we are done with it. + */ + } + vm_object_activity_end(object); + vm_object_unlock(object); + + vm_page_lockspin_queues(); + continue; + } +#if 0 + /* + * we don't hold the page queue lock + * so this check isn't safe to make + */ + VM_PAGE_CHECK(m); +#endif + /* + * give back the activity_in_progress reference we + * took when we queued up this page and replace it + * it with a paging_in_progress reference that will + * also hold the paging offset from changing and + * prevent the object from terminating + */ + vm_object_activity_end(object); + vm_object_paging_begin(object); + vm_object_unlock(object); + + /* + * Send the data to the pager. + * any pageout clustering happens there + */ + memory_object_data_return(pager, + m->offset + object->paging_offset, + PAGE_SIZE, + NULL, + NULL, + FALSE, + FALSE, + 0); + + vm_object_lock(object); + vm_object_paging_end(object); + vm_object_unlock(object); + + vm_pageout_io_throttle(); + + vm_page_lockspin_queues(); + } + q->pgo_busy = FALSE; + q->pgo_idle = TRUE; + + assert_wait((event_t) &q->pgo_pending, THREAD_UNINT); + vm_page_unlock_queues(); + + thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) q); + /*NOTREACHED*/ +} + + +static void +vm_pageout_iothread_external_continue(struct vm_pageout_queue *q) +{ + vm_page_t m = NULL; + vm_object_t object; + vm_object_offset_t offset; + memory_object_t pager; + + + if (vm_pageout_internal_iothread != THREAD_NULL) + current_thread()->options &= ~TH_OPT_VMPRIV; + + vm_page_lockspin_queues(); + + while ( !queue_empty(&q->pgo_pending) ) { + + q->pgo_busy = TRUE; + queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq); + if (m->object->object_slid) { + panic("slid page %p not allowed on this path\n", m); + } + VM_PAGE_CHECK(m); + m->pageout_queue = FALSE; + m->pageq.next = NULL; + m->pageq.prev = NULL; + + /* + * grab a snapshot of the object and offset this + * page is tabled in so that we can relookup this + * page after we've taken the object lock - these + * fields are stable while we hold the page queues lock + * but as soon as we drop it, there is nothing to keep + * this page in this object... we hold an activity_in_progress + * on this object which will keep it from terminating + */ + object = m->object; + offset = m->offset; + + vm_page_unlock_queues(); + + vm_object_lock(object); + + m = vm_page_lookup(object, offset); + + if (m == NULL || + m->busy || m->cleaning || m->pageout_queue || !m->laundry) { + /* + * it's either the same page that someone else has + * started cleaning (or it's finished cleaning or + * been put back on the pageout queue), or + * the page has been freed or we have found a + * new page at this offset... in all of these cases + * we merely need to release the activity_in_progress + * we took when we put the page on the pageout queue + */ + vm_object_activity_end(object); + vm_object_unlock(object); + + vm_page_lockspin_queues(); + continue; + } + pager = object->pager; + + if (pager == MEMORY_OBJECT_NULL) { + /* + * This pager has been destroyed by either + * memory_object_destroy or vm_object_destroy, and + * so there is nowhere for the page to go. + */ + if (m->pageout) { + /* + * Just free the page... VM_PAGE_FREE takes + * care of cleaning up all the state... + * including doing the vm_pageout_throttle_up + */ + VM_PAGE_FREE(m); + } else { + vm_page_lockspin_queues(); + + vm_pageout_throttle_up(m); + vm_page_activate(m); + + vm_page_unlock_queues(); + + /* + * And we are done with it. + */ + } + vm_object_activity_end(object); + vm_object_unlock(object); + + vm_page_lockspin_queues(); + continue; + } +#if 0 + /* + * we don't hold the page queue lock + * so this check isn't safe to make + */ + VM_PAGE_CHECK(m); +#endif + /* + * give back the activity_in_progress reference we + * took when we queued up this page and replace it + * it with a paging_in_progress reference that will + * also hold the paging offset from changing and + * prevent the object from terminating + */ + vm_object_activity_end(object); + vm_object_paging_begin(object); + vm_object_unlock(object); + + /* + * Send the data to the pager. + * any pageout clustering happens there + */ + memory_object_data_return(pager, + m->offset + object->paging_offset, + PAGE_SIZE, + NULL, + NULL, + FALSE, + FALSE, + 0); + + vm_object_lock(object); + vm_object_paging_end(object); + vm_object_unlock(object); + + vm_pageout_io_throttle(); + + vm_page_lockspin_queues(); + } + q->pgo_busy = FALSE; + q->pgo_idle = TRUE; + + assert_wait((event_t) &q->pgo_pending, THREAD_UNINT); + vm_page_unlock_queues(); + + thread_block_parameter((thread_continue_t)vm_pageout_iothread_external_continue, (void *) q); + /*NOTREACHED*/ +} + + +uint32_t vm_compressor_failed; + +static void +vm_pageout_iothread_internal_continue(struct cq *cq) +{ + struct vm_pageout_queue *q; + vm_page_t m = NULL; + vm_object_t object; + memory_object_t pager; + boolean_t pgo_draining; + vm_page_t local_q; + int local_cnt; + vm_page_t local_freeq = NULL; + int local_freed = 0; + int local_batch_size; + kern_return_t retval; + + + KERNEL_DEBUG(0xe040000c | DBG_FUNC_END, 0, 0, 0, 0, 0); + + q = cq->q; + local_batch_size = q->pgo_maxlaundry / (vm_compressor_thread_count * 4); + + while (TRUE) { + + local_cnt = 0; + local_q = NULL; + + KERNEL_DEBUG(0xe0400014 | DBG_FUNC_START, 0, 0, 0, 0, 0); + + vm_page_lock_queues(); + + KERNEL_DEBUG(0xe0400014 | DBG_FUNC_END, 0, 0, 0, 0, 0); + + KERNEL_DEBUG(0xe0400018 | DBG_FUNC_START, 0, 0, 0, 0, 0); + + while ( !queue_empty(&q->pgo_pending) && local_cnt < local_batch_size) { + + queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq); + + VM_PAGE_CHECK(m); + + m->pageout_queue = FALSE; + m->pageq.prev = NULL; + + m->pageq.next = (queue_entry_t)local_q; + local_q = m; + local_cnt++; + } + if (local_q == NULL) + break; + + q->pgo_busy = TRUE; + + if ((pgo_draining = q->pgo_draining) == FALSE) + vm_pageout_throttle_up_batch(q, local_cnt); + + vm_page_unlock_queues(); + + KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, 0, 0, 0, 0, 0); + + while (local_q) { + + m = local_q; + local_q = (vm_page_t)m->pageq.next; + m->pageq.next = NULL; + + if (m->object->object_slid) { + panic("slid page %p not allowed on this path\n", m); + } + + object = m->object; + pager = object->pager; + + if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL) { + + KERNEL_DEBUG(0xe0400010 | DBG_FUNC_START, object, pager, 0, 0, 0); + + vm_object_lock(object); + + /* + * If there is no memory object for the page, create + * one and hand it to the compression pager. + */ + + if (!object->pager_initialized) + vm_object_collapse(object, (vm_object_offset_t) 0, TRUE); + if (!object->pager_initialized) + vm_object_compressor_pager_create(object); + + if (!object->pager_initialized) { + /* + * Still no pager for the object. + * Reactivate the page. + * + * Should only happen if there is no + * compression pager + */ + m->pageout = FALSE; + m->laundry = FALSE; + PAGE_WAKEUP_DONE(m); + + vm_page_lockspin_queues(); + vm_page_activate(m); + vm_pageout_dirty_no_pager++; + vm_page_unlock_queues(); + + /* + * And we are done with it. + */ + vm_object_activity_end(object); + vm_object_unlock(object); + + continue; + } + pager = object->pager; + + if (pager == MEMORY_OBJECT_NULL) { + /* + * This pager has been destroyed by either + * memory_object_destroy or vm_object_destroy, and + * so there is nowhere for the page to go. + */ + if (m->pageout) { + /* + * Just free the page... VM_PAGE_FREE takes + * care of cleaning up all the state... + * including doing the vm_pageout_throttle_up + */ + VM_PAGE_FREE(m); + } else { + m->laundry = FALSE; + PAGE_WAKEUP_DONE(m); + + vm_page_lockspin_queues(); + vm_page_activate(m); + vm_page_unlock_queues(); + + /* + * And we are done with it. + */ + } + vm_object_activity_end(object); + vm_object_unlock(object); + + continue; + } + vm_object_unlock(object); + + KERNEL_DEBUG(0xe0400010 | DBG_FUNC_END, object, pager, 0, 0, 0); + } + while (vm_page_free_count < (vm_page_free_reserved - COMPRESSOR_FREE_RESERVED_LIMIT)) { + kern_return_t wait_result; + int need_wakeup = 0; + + if (local_freeq) { + vm_page_free_list(local_freeq, TRUE); + + local_freeq = NULL; + local_freed = 0; - vm_pageout_throttle_up(m); - vm_page_activate(m); - vm_pageout_dirty_no_pager++; + continue; + } + lck_mtx_lock_spin(&vm_page_queue_free_lock); - vm_page_unlock_queues(); + if (vm_page_free_count < (vm_page_free_reserved - COMPRESSOR_FREE_RESERVED_LIMIT)) { + + if (vm_page_free_wanted_privileged++ == 0) + need_wakeup = 1; + wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, THREAD_UNINT); - /* - * And we are done with it. - */ - vm_object_activity_end(object); - vm_object_unlock(object); + lck_mtx_unlock(&vm_page_queue_free_lock); - vm_page_lockspin_queues(); - continue; - } - } - pager = object->pager; + if (need_wakeup) + thread_wakeup((event_t)&vm_page_free_wanted); - if (pager == MEMORY_OBJECT_NULL) { - /* - * This pager has been destroyed by either - * memory_object_destroy or vm_object_destroy, and - * so there is nowhere for the page to go. - */ - if (m->pageout) { - /* - * Just free the page... VM_PAGE_FREE takes - * care of cleaning up all the state... - * including doing the vm_pageout_throttle_up - */ - VM_PAGE_FREE(m); - } else { - vm_page_lockspin_queues(); + if (wait_result == THREAD_WAITING) + thread_block(THREAD_CONTINUE_NULL); + } else + lck_mtx_unlock(&vm_page_queue_free_lock); + } + retval = vm_compressor_pager_put(pager, m->offset + object->paging_offset, m->phys_page, &cq->current_chead, cq->scratch_buf); - vm_pageout_throttle_up(m); - vm_page_activate(m); - - vm_page_unlock_queues(); + vm_object_lock(object); + m->laundry = FALSE; + m->pageout = FALSE; - /* - * And we are done with it. - */ - } - vm_object_activity_end(object); - vm_object_unlock(object); + if (retval == KERN_SUCCESS) { - vm_page_lockspin_queues(); - continue; - } -#if 0 - /* - * we don't hold the page queue lock - * so this check isn't safe to make - */ - VM_PAGE_CHECK(m); -#endif - /* - * give back the activity_in_progress reference we - * took when we queued up this page and replace it - * it with a paging_in_progress reference that will - * also hold the paging offset from changing and - * prevent the object from terminating - */ - vm_object_activity_end(object); - vm_object_paging_begin(object); - vm_object_unlock(object); + vm_page_compressions_failing = FALSE; + + VM_STAT_INCR(compressions); + + if (m->tabled) + vm_page_remove(m, TRUE); + vm_object_activity_end(object); + vm_object_unlock(object); - /* - * Send the data to the pager. - * any pageout clustering happens there - */ - memory_object_data_return(pager, - m->offset + object->paging_offset, - PAGE_SIZE, - NULL, - NULL, - FALSE, - FALSE, - 0); + m->pageq.next = (queue_entry_t)local_freeq; + local_freeq = m; + local_freed++; - vm_object_lock(object); - vm_object_paging_end(object); - vm_object_unlock(object); + } else { + PAGE_WAKEUP_DONE(m); - vm_pageout_io_throttle(); + vm_page_lockspin_queues(); - vm_page_lockspin_queues(); + vm_page_activate(m); + vm_compressor_failed++; + + vm_page_compressions_failing = TRUE; + + vm_page_unlock_queues(); + + vm_object_activity_end(object); + vm_object_unlock(object); + } + } + if (local_freeq) { + vm_page_free_list(local_freeq, TRUE); + + local_freeq = NULL; + local_freed = 0; + } + if (pgo_draining == TRUE) { + vm_page_lockspin_queues(); + vm_pageout_throttle_up_batch(q, local_cnt); + vm_page_unlock_queues(); + } } + KERNEL_DEBUG(0xe040000c | DBG_FUNC_START, 0, 0, 0, 0, 0); + + /* + * queue lock is held and our q is empty + */ q->pgo_busy = FALSE; q->pgo_idle = TRUE; - assert_wait((event_t) q, THREAD_UNINT); + assert_wait((event_t) &q->pgo_pending, THREAD_UNINT); vm_page_unlock_queues(); - thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) &q->pgo_pending); + KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, 0, 0, 0, 0, 0); + + thread_block_parameter((thread_continue_t)vm_pageout_iothread_internal_continue, (void *) cq); /*NOTREACHED*/ } @@ -2915,7 +3426,7 @@ vm_pageout_adjust_io_throttles(struct vm_pageout_queue *iq, struct vm_pageout_qu if (hibernate_cleaning_in_progress == TRUE) req_lowpriority = FALSE; - if (iq->pgo_inited == TRUE && iq->pgo_lowpriority != req_lowpriority) + if ((DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) && iq->pgo_inited == TRUE && iq->pgo_lowpriority != req_lowpriority) set_iq = TRUE; if (eq->pgo_inited == TRUE && eq->pgo_lowpriority != req_lowpriority) @@ -2926,18 +3437,20 @@ vm_pageout_adjust_io_throttles(struct vm_pageout_queue *iq, struct vm_pageout_qu vm_page_unlock_queues(); if (req_lowpriority == TRUE) { - policy = TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE; + policy = THROTTLE_LEVEL_PAGEOUT_THROTTLED; DTRACE_VM(laundrythrottle); } else { - policy = TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL; + policy = THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED; DTRACE_VM(laundryunthrottle); } if (set_iq == TRUE) { - proc_apply_thread_diskacc(kernel_task, iq->pgo_tid, policy); + proc_set_task_policy_thread(kernel_task, iq->pgo_tid, TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy); + iq->pgo_lowpriority = req_lowpriority; } if (set_eq == TRUE) { - proc_apply_thread_diskacc(kernel_task, eq->pgo_tid, policy); + proc_set_task_policy_thread(kernel_task, eq->pgo_tid, TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy); + eq->pgo_lowpriority = req_lowpriority; } vm_page_lock_queues(); @@ -2953,7 +3466,9 @@ vm_pageout_iothread_external(void) self->options |= TH_OPT_VMPRIV; DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL); - proc_apply_thread_diskacc(kernel_task, self->thread_id, TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE); + + proc_set_task_policy_thread(kernel_task, self->thread_id, TASK_POLICY_EXTERNAL, + TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED); vm_page_lock_queues(); @@ -2963,21 +3478,28 @@ vm_pageout_iothread_external(void) vm_page_unlock_queues(); - vm_pageout_iothread_continue(&vm_pageout_queue_external); + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) + vm_pageout_iothread_external_continue(&vm_pageout_queue_external); + else + vm_pageout_iothread_continue(&vm_pageout_queue_external); /*NOTREACHED*/ } + static void -vm_pageout_iothread_internal(void) +vm_pageout_iothread_internal(struct cq *cq) { thread_t self = current_thread(); self->options |= TH_OPT_VMPRIV; - DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL); - proc_apply_thread_diskacc(kernel_task, self->thread_id, TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE); + if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) { + DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL); + proc_set_task_policy_thread(kernel_task, self->thread_id, TASK_POLICY_EXTERNAL, + TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED); + } vm_page_lock_queues(); vm_pageout_queue_internal.pgo_tid = self->thread_id; @@ -2986,7 +3508,14 @@ vm_pageout_iothread_internal(void) vm_page_unlock_queues(); - vm_pageout_iothread_continue(&vm_pageout_queue_internal); + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + cq->q = &vm_pageout_queue_internal; + cq->current_chead = NULL; + cq->scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE); + + vm_pageout_iothread_internal_continue(cq); + } else + vm_pageout_iothread_continue(&vm_pageout_queue_internal); /*NOTREACHED*/ } @@ -3001,20 +3530,148 @@ vm_set_buffer_cleanup_callout(boolean_t (*func)(int)) } } -static void + +extern boolean_t memorystatus_manual_testing_on; +extern unsigned int memorystatus_level; + + + +#if VM_PRESSURE_EVENTS + +void +vm_pressure_response(void) +{ + + + vm_pressure_level_t old_level = kVMPressureNormal; + int new_level = -1; + + uint64_t available_memory = (((uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY) * 100); + + memorystatus_level = (unsigned int) (available_memory / atop_64(max_mem)); + + if (memorystatus_manual_testing_on) { + return; + } + + old_level = memorystatus_vm_pressure_level; + + switch (memorystatus_vm_pressure_level) { + + case kVMPressureNormal: + { + if (VM_PRESSURE_WARNING_TO_CRITICAL()) { + new_level = kVMPressureCritical; + } else if (VM_PRESSURE_NORMAL_TO_WARNING()) { + new_level = kVMPressureWarning; + } + break; + } + + case kVMPressureWarning: + case kVMPressureUrgent: + { + if (VM_PRESSURE_WARNING_TO_NORMAL()) { + new_level = kVMPressureNormal; + } else if (VM_PRESSURE_WARNING_TO_CRITICAL()) { + new_level = kVMPressureCritical; + } + break; + } + + case kVMPressureCritical: + { + if (VM_PRESSURE_WARNING_TO_NORMAL()) { + new_level = kVMPressureNormal; + } else if (VM_PRESSURE_CRITICAL_TO_WARNING()) { + new_level = kVMPressureWarning; + } + break; + } + + default: + return; + } + + if (new_level != -1) { + memorystatus_vm_pressure_level = (vm_pressure_level_t) new_level; + + if (old_level != new_level) { + if (vm_pressure_thread_running == FALSE) { + thread_wakeup(&vm_pressure_thread); + } + thread_wakeup(&vm_pressure_changed); + } + } + +} +#endif /* VM_PRESSURE_EVENTS */ + +kern_return_t +mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure, __unused unsigned int *pressure_level) { + +#if !VM_PRESSURE_EVENTS + + return KERN_FAILURE; + +#else /* VM_PRESSURE_EVENTS */ + + kern_return_t kr = KERN_SUCCESS; + + if (pressure_level != NULL) { + + vm_pressure_level_t old_level = memorystatus_vm_pressure_level; + + if (wait_for_pressure == TRUE) { + wait_result_t wr = 0; + + while (old_level == *pressure_level) { + wr = assert_wait((event_t) &vm_pressure_changed, + THREAD_INTERRUPTIBLE); + if (wr == THREAD_WAITING) { + wr = thread_block(THREAD_CONTINUE_NULL); + } + if (wr == THREAD_INTERRUPTED) { + return KERN_ABORTED; + } + if (wr == THREAD_AWAKENED) { + + old_level = memorystatus_vm_pressure_level; + + if (old_level != *pressure_level) { + break; + } + } + } + } + + *pressure_level = old_level; + kr = KERN_SUCCESS; + } else { + kr = KERN_INVALID_ARGUMENT; + } + + return kr; +#endif /* VM_PRESSURE_EVENTS */ +} + +#if VM_PRESSURE_EVENTS +void vm_pressure_thread(void) { static boolean_t set_up_thread = FALSE; if (set_up_thread) { -#if VM_PRESSURE_EVENTS + vm_pressure_thread_running = TRUE; consider_vm_pressure_events(); -#endif /* VM_PRESSURE_EVENTS */ + vm_pressure_thread_running = FALSE; } set_up_thread = TRUE; assert_wait((event_t) &vm_pressure_thread, THREAD_UNINT); thread_block((thread_continue_t)vm_pressure_thread); } +#endif /* VM_PRESSURE_EVENTS */ + uint32_t vm_pageout_considered_page_last = 0; @@ -3096,6 +3753,9 @@ vm_pageout(void) * Initialize some paging parameters. */ + if (vm_pageout_swap_wait == 0) + vm_pageout_swap_wait = VM_PAGEOUT_SWAP_WAIT; + if (vm_pageout_idle_wait == 0) vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT; @@ -3120,6 +3780,12 @@ vm_pageout(void) if (vm_pageout_burst_inactive_throttle == 0) vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE; +#if !CONFIG_JETSAM + vm_page_filecache_min = (uint32_t) (max_mem / PAGE_SIZE) / 20; + if (vm_page_filecache_min < VM_PAGE_FILECACHE_MIN) + vm_page_filecache_min = VM_PAGE_FILECACHE_MIN; +#endif + /* * Set kernel task to low backing store privileged * status @@ -3185,6 +3851,7 @@ vm_pageout(void) thread_deallocate(thread); +#if VM_PRESSURE_EVENTS result = kernel_thread_start_priority((thread_continue_t)vm_pressure_thread, NULL, BASEPRI_DEFAULT, &thread); @@ -3193,9 +3860,12 @@ vm_pageout(void) panic("vm_pressure_thread: create failed"); thread_deallocate(thread); +#endif vm_object_reaper_init(); - + + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) + vm_compressor_pager_init(); vm_pageout_continue(); @@ -3223,15 +3893,49 @@ vm_pageout(void) /*NOTREACHED*/ } + + +#define MAX_COMRPESSOR_THREAD_COUNT 8 + +struct cq ciq[MAX_COMRPESSOR_THREAD_COUNT]; + +int vm_compressor_thread_count = 2; + kern_return_t vm_pageout_internal_start(void) { - kern_return_t result; + kern_return_t result; + int i; + host_basic_info_data_t hinfo; + + if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { + mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT; +#define BSD_HOST 1 + host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count); + + assert(hinfo.max_cpus > 0); + + if (vm_compressor_thread_count >= hinfo.max_cpus) + vm_compressor_thread_count = hinfo.max_cpus - 1; + if (vm_compressor_thread_count <= 0) + vm_compressor_thread_count = 1; + else if (vm_compressor_thread_count > MAX_COMRPESSOR_THREAD_COUNT) + vm_compressor_thread_count = MAX_COMRPESSOR_THREAD_COUNT; + + vm_pageout_queue_internal.pgo_maxlaundry = (vm_compressor_thread_count * 4) * VM_PAGE_LAUNDRY_MAX; + } else { + vm_compressor_thread_count = 1; + vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX; + } - vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX; - result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, NULL, BASEPRI_PREEMPT - 1, &vm_pageout_internal_iothread); - if (result == KERN_SUCCESS) - thread_deallocate(vm_pageout_internal_iothread); + for (i = 0; i < vm_compressor_thread_count; i++) { + + result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, (void *)&ciq[i], BASEPRI_PREEMPT - 1, &vm_pageout_internal_iothread); + if (result == KERN_SUCCESS) + thread_deallocate(vm_pageout_internal_iothread); + else + break; + } return result; } @@ -3240,9 +3944,9 @@ static upl_t upl_create(int type, int flags, upl_size_t size) { upl_t upl; - int page_field_size = 0; + vm_size_t page_field_size = 0; int upl_flags = 0; - int upl_size = sizeof(struct upl); + vm_size_t upl_size = sizeof(struct upl); size = round_page_32(size); @@ -3253,7 +3957,7 @@ upl_create(int type, int flags, upl_size_t size) upl_flags |= UPL_LITE; } if (type & UPL_CREATE_INTERNAL) { - upl_size += (int) sizeof(struct upl_page_info) * atop(size); + upl_size += sizeof(struct upl_page_info) * atop(size); upl_flags |= UPL_INTERNAL; } @@ -3637,7 +4341,7 @@ vm_object_upl_request( /* * we're only asking for DIRTY pages to be returned */ - if (dst_page->pageout || !(cntrl_flags & UPL_FOR_PAGEOUT)) { + if (dst_page->laundry || !(cntrl_flags & UPL_FOR_PAGEOUT)) { /* * if we were the page stolen by vm_pageout_scan to be * cleaned (as opposed to a buddy being clustered in @@ -4403,7 +5107,6 @@ REDISCOVER_ENTRY: return KERN_PROTECTION_FAILURE; } -#if !CONFIG_EMBEDDED local_object = entry->object.vm_object; if (vm_map_entry_should_cow_for_true_share(entry) && local_object->vo_size > *upl_size && @@ -4419,8 +5122,14 @@ REDISCOVER_ENTRY: goto REDISCOVER_ENTRY; } - vm_map_clip_start(map, entry, vm_map_trunc_page(offset)); - vm_map_clip_end(map, entry, vm_map_round_page(offset + *upl_size)); + vm_map_clip_start(map, + entry, + vm_map_trunc_page(offset, + VM_MAP_PAGE_MASK(map))); + vm_map_clip_end(map, + entry, + vm_map_round_page(offset + *upl_size, + VM_MAP_PAGE_MASK(map))); prot = entry->protection & ~VM_PROT_WRITE; if (override_nx(map, entry->alias) && prot) prot |= VM_PROT_EXECUTE; @@ -4436,7 +5145,6 @@ REDISCOVER_ENTRY: vm_map_lock_write_to_read(map); } -#endif /* !CONFIG_EMBEDDED */ if (entry->needs_copy) { /* @@ -4877,9 +5585,12 @@ process_upl_to_remove: if(!isVectorUPL) { upl_unlock(upl); - vm_map_remove(map, - vm_map_trunc_page(addr), - vm_map_round_page(addr + size), + vm_map_remove( + map, + vm_map_trunc_page(addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(addr + size, + VM_MAP_PAGE_MASK(map)), VM_MAP_NO_FLAGS); return KERN_SUCCESS; @@ -5074,6 +5785,13 @@ process_upl_to_commit: if ((upl->flags & UPL_KERNEL_OBJECT) || m == VM_PAGE_NULL) goto commit_next_page; + if (m->compressor) { + assert(m->busy); + + dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); + goto commit_next_page; + } + if (flags & UPL_COMMIT_CS_VALIDATED) { /* * CODE SIGNING: @@ -5137,6 +5855,8 @@ process_upl_to_commit: goto commit_next_page; } + assert(!m->compressor); + if (page_list) page_list[entry].phys_addr = 0; @@ -5591,6 +6311,8 @@ process_upl_to_abort: if (m != VM_PAGE_NULL) { + assert(!m->compressor); + if (m->absent) { boolean_t must_free = TRUE; @@ -5899,6 +6621,7 @@ vm_object_iopl_request( int dw_count; int dw_limit; int dw_index; + boolean_t caller_lookup; if (cntrl_flags & ~UPL_VALID_FLAGS) { /* @@ -5932,7 +6655,7 @@ vm_object_iopl_request( */ assert(! (cntrl_flags & UPL_ENCRYPT)); } - if (cntrl_flags & UPL_NOZEROFILL) + if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO)) no_zero_fill = TRUE; if (cntrl_flags & UPL_COPYOUT_FROM) @@ -6045,7 +6768,7 @@ vm_object_iopl_request( } return KERN_SUCCESS; } - if (object != kernel_object) { + if (object != kernel_object && object != compressor_object) { /* * Protect user space from future COW operations */ @@ -6107,7 +6830,7 @@ vm_object_iopl_request( fault_info.stealth = FALSE; fault_info.io_sync = FALSE; fault_info.cs_bypass = FALSE; - fault_info.mark_zf_absent = TRUE; + fault_info.mark_zf_absent = (0 == (cntrl_flags & UPL_NOZEROFILLIO)); dwp = &dw_array[0]; dw_count = 0; @@ -6136,6 +6859,21 @@ vm_object_iopl_request( if (object == kernel_object) panic("vm_object_iopl_request: missing/bad page in kernel object\n"); + if (object == compressor_object) + panic("vm_object_iopl_request: missing/bad page in compressor object\n"); + + if (cntrl_flags & UPL_REQUEST_NO_FAULT) { + ret = KERN_MEMORY_ERROR; + goto return_err; + } + + /* + * We just looked up the page and the result remains valid + * until the object lock is release, so send it to + * vm_fault_page() (as "dst_page"), to avoid having to + * look it up again there. + */ + caller_lookup = TRUE; do { vm_page_t top_page; @@ -6154,12 +6892,16 @@ vm_object_iopl_request( vm_object_paging_begin(object); result = vm_fault_page(object, dst_offset, - prot | VM_PROT_WRITE, FALSE, + prot | VM_PROT_WRITE, FALSE, + caller_lookup, &prot, &dst_page, &top_page, (int *)0, &error_code, no_zero_fill, FALSE, &fault_info); + /* our lookup is no longer valid at this point */ + caller_lookup = FALSE; + switch (result) { case VM_FAULT_SUCCESS: @@ -6250,6 +6992,11 @@ vm_object_iopl_request( if (upl->flags & UPL_KERNEL_OBJECT) goto record_phys_addr; + if (dst_page->compressor) { + dst_page->busy = TRUE; + goto record_phys_addr; + } + if (dst_page->cleaning) { /* * Someone else is cleaning this page in place. @@ -6377,7 +7124,7 @@ record_phys_addr: user_page_list[entry].cs_validated = dst_page->cs_validated; user_page_list[entry].cs_tainted = dst_page->cs_tainted; } - if (object != kernel_object) { + if (object != kernel_object && object != compressor_object) { /* * someone is explicitly grabbing this page... * update clustered and speculative state @@ -6701,13 +7448,14 @@ vm_paging_map_init(void) */ kern_return_t vm_paging_map_object( - vm_map_offset_t *address, vm_page_t page, vm_object_t object, vm_object_offset_t offset, - vm_map_size_t *size, vm_prot_t protection, - boolean_t can_unlock_object) + boolean_t can_unlock_object, + vm_map_size_t *size, /* IN/OUT */ + vm_map_offset_t *address, /* OUT */ + boolean_t *need_unmap) /* OUT */ { kern_return_t kr; vm_map_offset_t page_map_offset; @@ -6715,8 +7463,18 @@ vm_paging_map_object( vm_object_offset_t object_offset; int i; - if (page != VM_PAGE_NULL && *size == PAGE_SIZE) { + /* use permanent 1-to-1 kernel mapping of physical memory ? */ +#if __x86_64__ + *address = (vm_map_offset_t) + PHYSMAP_PTOV((pmap_paddr_t)page->phys_page << + PAGE_SHIFT); + *need_unmap = FALSE; + return KERN_SUCCESS; +#else +#warn "vm_paging_map_object: no 1-to-1 kernel mapping of physical memory..." +#endif + assert(page->busy); /* * Use one of the pre-allocated kernel virtual addresses @@ -6793,6 +7551,7 @@ vm_paging_map_object( vm_paging_objects_mapped++; vm_paging_pages_mapped++; *address = page_map_offset; + *need_unmap = TRUE; /* all done and mapped, ready to use ! */ return KERN_SUCCESS; @@ -6808,11 +7567,15 @@ vm_paging_map_object( } if (! can_unlock_object) { + *address = 0; + *size = 0; + *need_unmap = FALSE; return KERN_NOT_SUPPORTED; } object_offset = vm_object_trunc_page(offset); - map_size = vm_map_round_page(*size); + map_size = vm_map_round_page(*size, + VM_MAP_PAGE_MASK(kernel_map)); /* * Try and map the required range of the object @@ -6836,6 +7599,7 @@ vm_paging_map_object( if (kr != KERN_SUCCESS) { *address = 0; *size = 0; + *need_unmap = FALSE; vm_object_deallocate(object); /* for the map entry */ vm_object_lock(object); return kr; @@ -6867,6 +7631,7 @@ vm_paging_map_object( assert(kr == KERN_SUCCESS); *address = 0; *size = 0; + *need_unmap = FALSE; vm_object_lock(object); return KERN_MEMORY_ERROR; } @@ -6885,6 +7650,8 @@ vm_paging_map_object( vm_paging_objects_mapped_slow++; vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64); + *need_unmap = TRUE; + return KERN_SUCCESS; } @@ -7069,6 +7836,7 @@ vm_page_encrypt( { kern_return_t kr; vm_map_size_t kernel_mapping_size; + boolean_t kernel_mapping_needs_unmap; vm_offset_t kernel_vaddr; union { unsigned char aes_iv[AES_BLOCK_SIZE]; @@ -7109,13 +7877,15 @@ vm_page_encrypt( * its contents and encrypt them. */ kernel_mapping_size = PAGE_SIZE; - kr = vm_paging_map_object(&kernel_mapping_offset, - page, + kernel_mapping_needs_unmap = FALSE; + kr = vm_paging_map_object(page, page->object, page->offset, - &kernel_mapping_size, VM_PROT_READ | VM_PROT_WRITE, - FALSE); + FALSE, + &kernel_mapping_size, + &kernel_mapping_offset, + &kernel_mapping_needs_unmap); if (kr != KERN_SUCCESS) { panic("vm_page_encrypt: " "could not map page in kernel: 0x%x\n", @@ -7123,6 +7893,7 @@ vm_page_encrypt( } } else { kernel_mapping_size = 0; + kernel_mapping_needs_unmap = FALSE; } kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset); @@ -7166,7 +7937,7 @@ vm_page_encrypt( * if we had to map it ourselves. Otherwise, let * the caller undo the mapping if needed. */ - if (kernel_mapping_size != 0) { + if (kernel_mapping_needs_unmap) { vm_paging_unmap_object(page->object, kernel_mapping_offset, kernel_mapping_offset + kernel_mapping_size); @@ -7209,6 +7980,7 @@ vm_page_decrypt( kern_return_t kr; vm_map_size_t kernel_mapping_size; vm_offset_t kernel_vaddr; + boolean_t kernel_mapping_needs_unmap; union { unsigned char aes_iv[AES_BLOCK_SIZE]; struct { @@ -7237,13 +8009,15 @@ vm_page_decrypt( * its contents and decrypt them. */ kernel_mapping_size = PAGE_SIZE; - kr = vm_paging_map_object(&kernel_mapping_offset, - page, + kernel_mapping_needs_unmap = FALSE; + kr = vm_paging_map_object(page, page->object, page->offset, - &kernel_mapping_size, VM_PROT_READ | VM_PROT_WRITE, - FALSE); + FALSE, + &kernel_mapping_size, + &kernel_mapping_offset, + &kernel_mapping_needs_unmap); if (kr != KERN_SUCCESS) { panic("vm_page_decrypt: " "could not map page in kernel: 0x%x\n", @@ -7251,6 +8025,7 @@ vm_page_decrypt( } } else { kernel_mapping_size = 0; + kernel_mapping_needs_unmap = FALSE; } kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset); @@ -7288,7 +8063,7 @@ vm_page_decrypt( * if we had to map it ourselves. Otherwise, let * the caller undo the mapping if needed. */ - if (kernel_mapping_size != 0) { + if (kernel_mapping_needs_unmap) { vm_paging_unmap_object(page->object, kernel_vaddr, kernel_vaddr + PAGE_SIZE); @@ -7866,23 +8641,25 @@ boolean_t vm_page_is_slideable(vm_page_t m) { boolean_t result = FALSE; - vm_object_t slide_object = slide_info.slide_object; - mach_vm_offset_t start = slide_info.start; - mach_vm_offset_t end = slide_info.end; + vm_shared_region_slide_info_t si; + + vm_object_lock_assert_held(m->object); /* make sure our page belongs to the one object allowed to do this */ - if (slide_object == VM_OBJECT_NULL) { - return result; + if (!m->object->object_slid) { + goto done; } - /*Should we traverse down the chain?*/ - if (m->object != slide_object) { - return result; + si = m->object->vo_slide_info; + if (si == NULL) { + goto done; } - if(!m->slid && (start <= m->offset && end > m->offset)) { + if(!m->slid && (si->start <= m->offset && si->end > m->offset)) { result = TRUE; } + +done: return result; } @@ -7895,10 +8672,13 @@ vm_page_slide( { kern_return_t kr; vm_map_size_t kernel_mapping_size; + boolean_t kernel_mapping_needs_unmap; vm_offset_t kernel_vaddr; uint32_t pageIndex = 0; assert(!page->slid); + assert(page->object->object_slid); + vm_object_lock_assert_exclusive(page->object); if (page->error) return KERN_FAILURE; @@ -7917,13 +8697,15 @@ vm_page_slide( * its contents and decrypt them. */ kernel_mapping_size = PAGE_SIZE; - kr = vm_paging_map_object(&kernel_mapping_offset, - page, + kernel_mapping_needs_unmap = FALSE; + kr = vm_paging_map_object(page, page->object, page->offset, - &kernel_mapping_size, VM_PROT_READ | VM_PROT_WRITE, - FALSE); + FALSE, + &kernel_mapping_size, + &kernel_mapping_offset, + &kernel_mapping_needs_unmap); if (kr != KERN_SUCCESS) { panic("vm_page_slide: " "could not map page in kernel: 0x%x\n", @@ -7931,6 +8713,7 @@ vm_page_slide( } } else { kernel_mapping_size = 0; + kernel_mapping_needs_unmap = FALSE; } kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset); @@ -7940,14 +8723,17 @@ vm_page_slide( /*assert that slide_file_info.start/end are page-aligned?*/ - pageIndex = (uint32_t)((page->offset - slide_info.start)/PAGE_SIZE); - kr = vm_shared_region_slide(kernel_vaddr, pageIndex); + assert(!page->slid); + assert(page->object->object_slid); + + pageIndex = (uint32_t)((page->offset - page->object->vo_slide_info->start)/PAGE_SIZE); + kr = vm_shared_region_slide_page(page->object->vo_slide_info, kernel_vaddr, pageIndex); vm_page_slide_counter++; /* * Unmap the page from the kernel's address space, */ - if (kernel_mapping_size != 0) { + if (kernel_mapping_needs_unmap) { vm_paging_unmap_object(page->object, kernel_vaddr, kernel_vaddr + PAGE_SIZE); @@ -7977,6 +8763,19 @@ vm_page_slide( return kr; } +void inline memoryshot(unsigned int event, unsigned int control) +{ + if (vm_debug_events) { + KERNEL_DEBUG_CONSTANT1((MACHDBG_CODE(DBG_MACH_VM_PRESSURE, event)) | control, + vm_page_active_count, vm_page_inactive_count, + vm_page_free_count, vm_page_speculative_count, + vm_page_throttled_count); + } else { + (void) event; + (void) control; + } + +} #ifdef MACH_BSD diff --git a/osfmk/vm/vm_pageout.h b/osfmk/vm/vm_pageout.h index d04bbe8cf..7396460b2 100644 --- a/osfmk/vm/vm_pageout.h +++ b/osfmk/vm/vm_pageout.h @@ -87,39 +87,56 @@ #include -#define VM_PAGE_CLEANED_TARGET 30000 /* 25600 pages = 100 MB */ -#define VM_PAGE_CLEANED_MIN ((VM_PAGE_CLEANED_TARGET * 80) / 100) - #define VM_PAGE_AVAILABLE_COUNT() ((unsigned int)(vm_page_cleaned_count)) /* externally manipulated counters */ extern unsigned int vm_pageout_cleaned_reactivated, vm_pageout_cleaned_fault_reactivated, vm_pageout_cleaned_commit_reactivated; +#if CONFIG_JETSAM +#define LATENCY_JETSAM FALSE +#if LATENCY_JETSAM +#define JETSAM_LATENCY_TOKEN_AGE 3000 /* 3ms */ +#define NUM_OF_JETSAM_LATENCY_TOKENS 1000 + +#define JETSAM_AGE_NOTIFY_CRITICAL 1500000 /* 1.5 secs */ + +extern boolean_t jlp_init; +extern uint64_t jlp_time, jlp_current; +extern unsigned int latency_jetsam_wakeup; +#endif /* LATENCY_JETSAM */ +#endif /* CONFIG_JETSAM */ + #if CONFIG_FREEZE extern boolean_t memorystatus_freeze_enabled; -#define VM_DYNAMIC_PAGING_ENABLED(port) ((memorystatus_freeze_enabled == FALSE) && IP_VALID(port)) +#define VM_DYNAMIC_PAGING_ENABLED(port) ((COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) || (memorystatus_freeze_enabled == FALSE && IP_VALID(port))) #else -#define VM_DYNAMIC_PAGING_ENABLED(port) IP_VALID(port) +#define VM_DYNAMIC_PAGING_ENABLED(port) (COMPRESSED_PAGER_IS_ACTIVE || IP_VALID(port)) #endif extern int vm_debug_events; -#define VMF_CHECK_ZFDELAY 0x100 -#define VMF_COWDELAY 0x101 -#define VMF_ZFDELAY 0x102 +#define VMF_CHECK_ZFDELAY 0x100 +#define VMF_COWDELAY 0x101 +#define VMF_ZFDELAY 0x102 +#define VMF_COMPRESSORDELAY 0x103 -#define VM_PAGEOUT_SCAN 0x104 -#define VM_PAGEOUT_BALANCE 0x105 -#define VM_PAGEOUT_FREELIST 0x106 -#define VM_PAGEOUT_PURGEONE 0x107 -#define VM_PAGEOUT_CACHE_EVICT 0x108 -#define VM_PAGEOUT_THREAD_BLOCK 0x109 +#define VM_PAGEOUT_SCAN 0x104 +#define VM_PAGEOUT_BALANCE 0x105 +#define VM_PAGEOUT_FREELIST 0x106 +#define VM_PAGEOUT_PURGEONE 0x107 +#define VM_PAGEOUT_CACHE_EVICT 0x108 +#define VM_PAGEOUT_THREAD_BLOCK 0x109 +#define VM_PAGEOUT_JETSAM 0x10A +#define VM_PAGEOUT_PAGE_TOKEN 0x10B -#define VM_UPL_PAGE_WAIT 0x120 -#define VM_IOPL_PAGE_WAIT 0x121 +#define VM_UPL_PAGE_WAIT 0x120 +#define VM_IOPL_PAGE_WAIT 0x121 +#define VM_PAGE_WAIT_BLOCK 0x122 -#define VM_PRESSURE_EVENT 0x130 +#define VM_PRESSURE_EVENT 0x130 +#define VM_EXECVE 0x131 +#define VM_WAKEUP_COMPACTOR_SWAPPER 0x132 #define VM_DEBUG_EVENT(name, event, control, arg1, arg2, arg3, arg4) \ MACRO_BEGIN \ @@ -128,7 +145,7 @@ extern int vm_debug_events; } \ MACRO_END - +extern void inline memoryshot(unsigned int event, unsigned int control); extern kern_return_t vm_map_create_upl( vm_map_t map, @@ -164,6 +181,8 @@ extern vm_object_offset_t vm_page_get_offset(vm_page_t page); extern ppnum_t vm_page_get_phys_page(vm_page_t page); extern vm_page_t vm_page_get_next(vm_page_t page); +extern kern_return_t mach_vm_pressure_level_monitor(boolean_t wait_for_pressure, unsigned int *pressure_level); + #ifdef MACH_KERNEL_PRIVATE #include @@ -389,13 +408,14 @@ extern void vm_page_decrypt( vm_page_t page, vm_map_offset_t kernel_map_offset); extern kern_return_t vm_paging_map_object( - vm_map_offset_t *address, vm_page_t page, vm_object_t object, vm_object_offset_t offset, - vm_map_size_t *size, vm_prot_t protection, - boolean_t can_unlock_object); + boolean_t can_unlock_object, + vm_map_size_t *size, /* IN/OUT */ + vm_map_offset_t *address, /* OUT */ + boolean_t *need_unmap); /* OUT */ extern void vm_paging_unmap_object( vm_object_t object, vm_map_offset_t start, @@ -463,10 +483,33 @@ struct vm_page_stats_reusable { uint64_t reuse_pages_failure; uint64_t can_reuse_success; uint64_t can_reuse_failure; + uint64_t reusable_reclaimed; }; extern struct vm_page_stats_reusable vm_page_stats_reusable; extern int hibernate_flush_memory(void); +extern void hibernate_create_paddr_map(void); + +extern int vm_compressor_mode; +extern int vm_compressor_thread_count; + +#define VM_PAGER_DEFAULT 0x1 /* Use default pager. */ +#define VM_PAGER_COMPRESSOR_NO_SWAP 0x2 /* In-core compressor only. */ +#define VM_PAGER_COMPRESSOR_WITH_SWAP 0x4 /* In-core compressor + swap backend. */ +#define VM_PAGER_FREEZER_DEFAULT 0x8 /* Freezer backed by default pager.*/ +#define VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP 0x10 /* Freezer backed by in-core compressor only i.e. frozen data remain in-core compressed.*/ +#define VM_PAGER_FREEZER_COMPRESSOR_WITH_SWAP 0x20 /* Freezer backed by in-core compressor with swap support too.*/ + +#define VM_PAGER_MAX_MODES 6 /* Total number of vm compressor modes supported */ + +#define DEFAULT_PAGER_IS_ACTIVE ((vm_compressor_mode & VM_PAGER_DEFAULT) == VM_PAGER_DEFAULT) + +#define COMPRESSED_PAGER_IS_ACTIVE (vm_compressor_mode & (VM_PAGER_COMPRESSOR_NO_SWAP | VM_PAGER_COMPRESSOR_WITH_SWAP)) + +#define DEFAULT_FREEZER_IS_ACTIVE ((vm_compressor_mode & VM_PAGER_FREEZER_DEFAULT) == VM_PAGER_FREEZER_DEFAULT) + +#define DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE (vm_compressor_mode & (VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP | VM_PAGER_FREEZER_COMPRESSOR_WITH_SWAP)) + #endif /* KERNEL_PRIVATE */ diff --git a/osfmk/vm/vm_protos.h b/osfmk/vm/vm_protos.h index fa8c27872..533b03ada 100644 --- a/osfmk/vm/vm_protos.h +++ b/osfmk/vm/vm_protos.h @@ -92,6 +92,8 @@ extern vm_map_offset_t get_map_max(vm_map_t); extern vm_map_size_t get_vmmap_size(vm_map_t); extern int get_vmmap_entries(vm_map_t); +int vm_map_page_mask(vm_map_t); + extern boolean_t coredumpok(vm_map_t map, vm_offset_t va); /* @@ -172,10 +174,9 @@ extern boolean_t vnode_pager_isSSD( struct vnode *); extern void vnode_pager_throttle( void); -extern uint32_t vnode_pager_return_hard_throttle_limit( +extern uint32_t vnode_pager_return_throttle_io_limit( struct vnode *, - uint32_t *, - uint32_t); + uint32_t *); extern kern_return_t vnode_pager_get_pathname( struct vnode *vp, char *pathname, @@ -195,7 +196,7 @@ extern kern_return_t vnode_pager_get_cs_blobs( #endif /* CHECK_CS_VALIDATION_BITMAP */ -extern void vnode_pager_bootstrap(void) __attribute__((section("__TEXT, initcode"))); +extern void vnode_pager_bootstrap(void); extern kern_return_t vnode_pager_data_unlock( memory_object_t mem_obj, @@ -215,10 +216,9 @@ extern kern_return_t vnode_pager_get_isinuse( extern kern_return_t vnode_pager_get_isSSD( memory_object_t, boolean_t *); -extern kern_return_t vnode_pager_check_hard_throttle( +extern kern_return_t vnode_pager_get_throttle_io_limit( memory_object_t, - uint32_t *, - uint32_t); + uint32_t *); extern kern_return_t vnode_pager_get_object_pathname( memory_object_t mem_obj, char *pathname, @@ -357,7 +357,7 @@ default_freezer_pack( boolean_t *shared, vm_object_t src_object, struct default_freezer_handle *df_handle); -__private_extern__ void +__private_extern__ kern_return_t default_freezer_unpack( struct default_freezer_handle *df_handle); __private_extern__ void @@ -409,7 +409,13 @@ extern memory_object_t device_pager_setup( uintptr_t, vm_size_t, int); -extern void device_pager_bootstrap(void) __attribute__((section("__TEXT, initcode"))); +extern void device_pager_bootstrap(void); + +extern kern_return_t pager_map_to_phys_contiguous( + memory_object_control_t object, + memory_object_offset_t offset, + addr64_t base_vaddr, + vm_size_t size); extern kern_return_t memory_object_create_named( memory_object_t pager, @@ -429,6 +435,7 @@ extern int macx_swapinfo( extern void log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot); extern void log_unnest_badness(vm_map_t, vm_map_offset_t, vm_map_offset_t); +struct proc; extern int cs_allow_invalid(struct proc *p); extern int cs_invalid_page(addr64_t vaddr); extern boolean_t cs_validate_page(void *blobs, @@ -442,6 +449,11 @@ extern kern_return_t mach_memory_entry_purgable_control( vm_purgable_t control, int *state); +extern kern_return_t mach_memory_entry_get_page_counts( + ipc_port_t entry_port, + unsigned int *resident_page_count, + unsigned int *dirty_page_count); + extern kern_return_t mach_memory_entry_page_op( ipc_port_t entry_port, vm_object_offset_t offset, @@ -473,6 +485,28 @@ extern void no_paging_space_action(void); #define VM_TOGGLE_SET 1 #define VM_TOGGLE_GETVALUE 999 int vm_toggle_entry_reuse(int, int*); + +#define SWAP_WRITE 0x00000000 /* Write buffer (pseudo flag). */ +#define SWAP_READ 0x00000001 /* Read buffer. */ +#define SWAP_ASYNC 0x00000002 /* Start I/O, do not wait. */ + +extern void vm_compressor_pager_init(void); +extern kern_return_t compressor_memory_object_create( + vm_size_t, + memory_object_t *); + +/* the object purger. purges the next eligible object from memory. */ +/* returns TRUE if an object was purged, otherwise FALSE. */ +boolean_t vm_purgeable_object_purge_one_unlocked(int force_purge_below_group); + +struct trim_list { + uint64_t tl_offset; + uint64_t tl_length; + struct trim_list *tl_next; +}; + +u_int32_t vnode_trim_list(struct vnode *vp, struct trim_list *tl); + #endif /* _VM_VM_PROTOS_H_ */ #endif /* XNU_KERNEL_PRIVATE */ diff --git a/osfmk/vm/vm_purgeable.c b/osfmk/vm/vm_purgeable.c index 61830edeb..5930bc7d4 100644 --- a/osfmk/vm/vm_purgeable.c +++ b/osfmk/vm/vm_purgeable.c @@ -24,12 +24,17 @@ #include #include #include /* kmem_alloc */ +#include #include #include #include +#include + +extern vm_pressure_level_t memorystatus_vm_pressure_level; struct token { token_cnt_t count; + token_idx_t prev; token_idx_t next; }; @@ -65,6 +70,8 @@ decl_lck_mtx_data(,vm_purgeable_queue_lock) static token_idx_t vm_purgeable_token_remove_first(purgeable_q_t queue); +static void vm_purgeable_stats_helper(vm_purgeable_stat_t *stat, purgeable_q_t queue, int group, task_t target_task); + #if MACH_ASSERT static void vm_purgeable_token_check_queue(purgeable_q_t queue) @@ -228,8 +235,10 @@ find_available_token: if (queue->token_q_tail == 0) { assert(queue->token_q_head == 0 && queue->token_q_unripe == 0); queue->token_q_head = token; + tokens[token].prev = 0; } else { tokens[queue->token_q_tail].next = token; + tokens[token].prev = queue->token_q_tail; } if (queue->token_q_unripe == 0) { /* only ripe tokens (token * count == 0) in queue */ @@ -293,6 +302,7 @@ vm_purgeable_token_remove_first(purgeable_q_t queue) queue->token_q_head = tokens[token].next; if (queue->token_q_head) { tokens[queue->token_q_head].count += tokens[token].count; + tokens[queue->token_q_head].prev = 0; } else { /* currently no other tokens in the queue */ /* @@ -355,11 +365,11 @@ vm_purgeable_token_remove_last(purgeable_q_t queue) } else { token_idx_t new_tail; - for (new_tail = queue->token_q_head; - tokens[new_tail].next != token && new_tail != 0; - new_tail = tokens[new_tail].next) { - } + new_tail = tokens[token].prev; + + assert(new_tail); assert(tokens[new_tail].next == token); + queue->token_q_tail = new_tail; tokens[new_tail].next = 0; } @@ -398,6 +408,7 @@ vm_purgeable_token_delete_first(purgeable_q_t queue) if (token) { /* stick removed token on free queue */ tokens[token].next = token_free_idx; + tokens[token].prev = 0; token_free_idx = token; } } @@ -413,6 +424,7 @@ vm_purgeable_token_delete_last(purgeable_q_t queue) if (token) { /* stick removed token on free queue */ tokens[token].next = token_free_idx; + tokens[token].prev = 0; token_free_idx = token; } } @@ -521,8 +533,10 @@ vm_purgeable_token_remove_ripe(purgeable_q_t queue) /* return token to free list. advance token list. */ token_idx_t new_head = tokens[queue->token_q_head].next; tokens[queue->token_q_head].next = token_free_idx; + tokens[queue->token_q_head].prev = 0; token_free_idx = queue->token_q_head; queue->token_q_head = new_head; + tokens[new_head].prev = 0; if (new_head == 0) queue->token_q_tail = 0; @@ -573,36 +587,55 @@ vm_purgeable_token_choose_and_delete_ripe(purgeable_q_t queue, purgeable_q_t que /* migrate to queue2 */ /* go to migration target loc */ - token_idx_t *token_in_queue2 = &queue2->token_q_head; - while (*token_in_queue2 && count > tokens[*token_in_queue2].count) { - count -= tokens[*token_in_queue2].count; - token_in_queue2 = &tokens[*token_in_queue2].next; - } - if ((*token_in_queue2 == queue2->token_q_unripe) || /* becomes the first - * unripe token */ - (queue2->token_q_unripe == 0)) - queue2->token_q_unripe = token; /* must update unripe - * pointer */ + token_idx_t token_to_insert_before = queue2->token_q_head, token_to_insert_after; - /* insert token */ - tokens[token].count = count; - tokens[token].next = *token_in_queue2; + while (token_to_insert_before != 0 && count > tokens[token_to_insert_before].count) { + count -= tokens[token_to_insert_before].count; + token_to_insert_before = tokens[token_to_insert_before].next; + } + + /* token_to_insert_before is now set correctly */ + + /* should the inserted token become the first unripe token? */ + if ((token_to_insert_before == queue2->token_q_unripe) || (queue2->token_q_unripe == 0)) + queue2->token_q_unripe = token; /* if so, must update unripe pointer */ /* - * if inserting at end, reduce new_pages by that value if - * inserting before token, reduce counter of that token + * insert token. + * if inserting at end, reduce new_pages by that value; + * otherwise, reduce counter of next token */ - if (*token_in_queue2 == 0) { /* insertion at end of queue2 */ - queue2->token_q_tail = token; /* must update tail - * pointer */ + + tokens[token].count = count; + + if (token_to_insert_before != 0) { + token_to_insert_after = tokens[token_to_insert_before].prev; + + tokens[token].next = token_to_insert_before; + tokens[token_to_insert_before].prev = token; + + assert(tokens[token_to_insert_before].count >= count); + tokens[token_to_insert_before].count -= count; + } else { + /* if we ran off the end of the list, the token to insert after is the tail */ + token_to_insert_after = queue2->token_q_tail; + + tokens[token].next = 0; + queue2->token_q_tail = token; + assert(queue2->new_pages >= (int32_t) count); queue2->new_pages -= count; + } + + if (token_to_insert_after != 0) { + tokens[token].prev = token_to_insert_after; + tokens[token_to_insert_after].next = token; } else { - assert(tokens[*token_in_queue2].count >= count); - tokens[*token_in_queue2].count -= count; + /* is this case possible? */ + tokens[token].prev = 0; + queue2->token_q_head = token; } - *token_in_queue2 = token; #if MACH_ASSERT queue2->debug_count_tokens++; @@ -613,9 +646,22 @@ vm_purgeable_token_choose_and_delete_ripe(purgeable_q_t queue, purgeable_q_t que /* Find an object that can be locked. Returns locked object. */ /* Call with purgeable queue locked. */ -static vm_object_t -vm_purgeable_object_find_and_lock(purgeable_q_t queue, int group) +static vm_object_t +vm_purgeable_object_find_and_lock( + purgeable_q_t queue, + int group, + boolean_t pick_ripe) { + vm_object_t object, best_object; + int object_task_importance; + int best_object_task_importance; + int best_object_skipped; + int num_objects_skipped; + task_t owner; + + best_object = VM_OBJECT_NULL; + best_object_task_importance = INT_MAX; + lck_mtx_assert(&vm_purgeable_queue_lock, LCK_MTX_ASSERT_OWNED); /* * Usually we would pick the first element from a queue. However, we @@ -623,21 +669,62 @@ vm_purgeable_object_find_and_lock(purgeable_q_t queue, int group) * remaining elements in order. */ - vm_object_t object; + num_objects_skipped = -1; for (object = (vm_object_t) queue_first(&queue->objq[group]); !queue_end(&queue->objq[group], (queue_entry_t) object); - object = (vm_object_t) queue_next(&object->objq)) { - if (vm_object_lock_try(object)) { - /* Locked. Great. We'll take it. Remove and return. */ - queue_remove(&queue->objq[group], object, - vm_object_t, objq); - object->objq.next = 0; - object->objq.prev = 0; + object = (vm_object_t) queue_next(&object->objq), + num_objects_skipped++) { + + if (pick_ripe && + ! object->purgeable_when_ripe) { + /* we want an object that has a ripe token */ + continue; + } + + object_task_importance = 0; + owner = object->vo_purgeable_owner; + if (owner) { + object_task_importance = task_importance_estimate(owner); + } + if (object_task_importance < best_object_task_importance) { + if (vm_object_lock_try(object)) { + if (best_object != VM_OBJECT_NULL) { + /* forget about previous best object */ + vm_object_unlock(best_object); + } + best_object = object; + best_object_task_importance = object_task_importance; + best_object_skipped = num_objects_skipped; + if (best_object_task_importance == 0) { + /* can't get any better: stop looking */ + break; + } + } + } + } + + if (best_object) { + /* Locked. Great. We'll take it. Remove and return. */ +// printf("FOUND PURGEABLE object %p skipped %d\n", object, num_objects_skipped); + + /* clear ownership when dequeueing purgeable object */ + owner = best_object->vo_purgeable_owner; + if (owner) { + assert(owner->task_volatile_objects > 0); + OSAddAtomic(-1, &owner->task_volatile_objects); + best_object->vo_purgeable_owner = NULL; + } + + queue_remove(&queue->objq[group], best_object, + vm_object_t, objq); + best_object->purgeable_queue_type = PURGEABLE_Q_TYPE_MAX; + best_object->purgeable_queue_group = 0; + best_object->objq.next = NULL; + best_object->objq.prev = NULL; #if MACH_ASSERT - queue->debug_count_objects--; + queue->debug_count_objects--; #endif - return object; - } + return best_object; } return 0; @@ -672,7 +759,7 @@ restart: */ for (group = 0; group < NUM_VOLATILE_GROUPS; group++) { while (!queue_empty(&queue->objq[group])) { - object = vm_purgeable_object_find_and_lock(queue, group); + object = vm_purgeable_object_find_and_lock(queue, group, FALSE); if (object == VM_OBJECT_NULL) { lck_mtx_unlock(&vm_purgeable_queue_lock); mutex_pause(collisions++); @@ -683,9 +770,11 @@ restart: /* Lock the page queue here so we don't hold it * over the whole, legthy operation */ - vm_page_lock_queues(); - vm_purgeable_token_remove_first(queue); - vm_page_unlock_queues(); + if (object->purgeable_when_ripe) { + vm_page_lock_queues(); + vm_purgeable_token_remove_first(queue); + vm_page_unlock_queues(); + } assert(object->purgable == VM_PURGABLE_VOLATILE); (void) vm_object_purge(object); @@ -707,12 +796,27 @@ restart: } boolean_t -vm_purgeable_object_purge_one(void) +vm_purgeable_object_purge_one_unlocked( + int force_purge_below_group) +{ + boolean_t retval; + + vm_page_lock_queues(); + retval = vm_purgeable_object_purge_one(force_purge_below_group); + vm_page_unlock_queues(); + + return retval; +} + +boolean_t +vm_purgeable_object_purge_one( + int force_purge_below_group) { enum purgeable_q_type i; int group; vm_object_t object = 0; purgeable_q_t queue, queue2; + boolean_t forced_purge; /* Need the page queue lock since we'll be changing the token queue. */ #if MACH_ASSERT @@ -724,13 +828,21 @@ vm_purgeable_object_purge_one(void) for (i = PURGEABLE_Q_TYPE_OBSOLETE; i < PURGEABLE_Q_TYPE_MAX; i++) { queue = &purgeable_queues[i]; - /* - * Are there any ripe tokens on this queue? If yes, we'll - * find an object to purge there - */ - if (!(queue->token_q_head && tokens[queue->token_q_head].count == 0)) - continue; /* no token? Look at next purgeable - * queue */ + if (force_purge_below_group == 0) { + /* + * Are there any ripe tokens on this queue? If yes, + * we'll find an object to purge there + */ + if (!queue->token_q_head) { + /* no token: look at next purgeable queue */ + continue; + } + + if (tokens[queue->token_q_head].count != 0) { + /* no ripe token: next queue */ + continue; + } + } /* * Now look through all groups, starting from the lowest. If @@ -739,10 +851,39 @@ vm_purgeable_object_purge_one(void) * lock, remove a token and then purge the object. */ for (group = 0; group < NUM_VOLATILE_GROUPS; group++) { + if (!queue->token_q_head || + tokens[queue->token_q_head].count != 0) { + /* no tokens or no ripe tokens */ + + if (group >= force_purge_below_group) { + /* no more groups to force-purge */ + break; + } + + /* + * Try and purge an object in this group + * even though no tokens are ripe. + */ + if (!queue_empty(&queue->objq[group]) && + (object = vm_purgeable_object_find_and_lock(queue, group, FALSE))) { + lck_mtx_unlock(&vm_purgeable_queue_lock); + if (object->purgeable_when_ripe) { + vm_purgeable_token_delete_first(queue); + } + forced_purge = TRUE; + goto purge_now; + } + + /* nothing to purge in this group: next group */ + continue; + } if (!queue_empty(&queue->objq[group]) && - (object = vm_purgeable_object_find_and_lock(queue, group))) { + (object = vm_purgeable_object_find_and_lock(queue, group, TRUE))) { lck_mtx_unlock(&vm_purgeable_queue_lock); - vm_purgeable_token_choose_and_delete_ripe(queue, 0); + if (object->purgeable_when_ripe) { + vm_purgeable_token_choose_and_delete_ripe(queue, 0); + } + forced_purge = FALSE; goto purge_now; } if (i != PURGEABLE_Q_TYPE_OBSOLETE) { @@ -753,9 +894,12 @@ vm_purgeable_object_purge_one(void) PURGEABLE_Q_TYPE_LIFO]; if (!queue_empty(&queue2->objq[group]) && - (object = vm_purgeable_object_find_and_lock(queue2, group))) { + (object = vm_purgeable_object_find_and_lock(queue2, group, TRUE))) { lck_mtx_unlock(&vm_purgeable_queue_lock); - vm_purgeable_token_choose_and_delete_ripe(queue2, queue); + if (object->purgeable_when_ripe) { + vm_purgeable_token_choose_and_delete_ripe(queue2, queue); + } + forced_purge = FALSE; goto purge_now; } } @@ -775,6 +919,7 @@ purge_now: assert(object); assert(object->purgable == VM_PURGABLE_VOLATILE); vm_page_unlock_queues(); /* Unlock for call to vm_object_purge() */ +// printf("%sPURGING object %p task %p importance %d queue %d group %d force_purge_below_group %d memorystatus_vm_pressure_level %d\n", forced_purge ? "FORCED " : "", object, object->vo_purgeable_owner, task_importance_estimate(object->vo_purgeable_owner), i, group, force_purge_below_group, memorystatus_vm_pressure_level); (void) vm_object_purge(object); vm_object_unlock(object); vm_page_lock_queues(); @@ -793,17 +938,32 @@ purge_now: void vm_purgeable_object_add(vm_object_t object, purgeable_q_t queue, int group) { + task_t owner; + vm_object_lock_assert_exclusive(object); lck_mtx_lock(&vm_purgeable_queue_lock); if (queue->type == PURGEABLE_Q_TYPE_OBSOLETE) group = 0; + if (queue->type != PURGEABLE_Q_TYPE_LIFO) /* fifo and obsolete are * fifo-queued */ queue_enter(&queue->objq[group], object, vm_object_t, objq); /* last to die */ else queue_enter_first(&queue->objq[group], object, vm_object_t, objq); /* first to die */ + object->purgeable_queue_type = queue->type; + object->purgeable_queue_group = group; + + /* set ownership when enqueueing purgeable object */ + assert(object->vo_purgeable_owner == NULL); + owner = current_task(); + if (current_task() != kernel_task) { + OSAddAtomic(+1, &owner->task_volatile_objects); + assert(owner->task_volatile_objects > 0); + object->vo_purgeable_owner = owner; + } + #if MACH_ASSERT queue->debug_count_objects++; KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, OBJECT_ADD)), @@ -822,39 +982,152 @@ vm_purgeable_object_add(vm_object_t object, purgeable_q_t queue, int group) purgeable_q_t vm_purgeable_object_remove(vm_object_t object) { - enum purgeable_q_type i; - int group; + int group; + task_t owner; + enum purgeable_q_type type; + purgeable_q_t queue; vm_object_lock_assert_exclusive(object); + + type = object->purgeable_queue_type; + group = object->purgeable_queue_group; + + if (type == PURGEABLE_Q_TYPE_MAX) { + if (object->objq.prev || object->objq.next) + panic("unmarked object on purgeable q"); + + return NULL; + } else if (!(object->objq.prev && object->objq.next)) + panic("marked object not on purgeable q"); + lck_mtx_lock(&vm_purgeable_queue_lock); - - for (i = PURGEABLE_Q_TYPE_OBSOLETE; i < PURGEABLE_Q_TYPE_MAX; i++) { - purgeable_q_t queue = &purgeable_queues[i]; - for (group = 0; group < NUM_VOLATILE_GROUPS; group++) { - vm_object_t o; - for (o = (vm_object_t) queue_first(&queue->objq[group]); - !queue_end(&queue->objq[group], (queue_entry_t) o); - o = (vm_object_t) queue_next(&o->objq)) { - if (o == object) { - queue_remove(&queue->objq[group], object, - vm_object_t, objq); + + queue = &purgeable_queues[type]; + + /* clear ownership when dequeueing purgeable object */ + owner = object->vo_purgeable_owner; + if (owner) { + assert(owner->task_volatile_objects > 0); + OSAddAtomic(-1, &owner->task_volatile_objects); + object->vo_purgeable_owner = NULL; + } + + queue_remove(&queue->objq[group], object, vm_object_t, objq); + #if MACH_ASSERT - queue->debug_count_objects--; - KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, OBJECT_REMOVE)), - 0, - tokens[queue->token_q_head].count, - queue->type, - group, - 0); + queue->debug_count_objects--; + KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, OBJECT_REMOVE)), + 0, + tokens[queue->token_q_head].count, + queue->type, + group, + 0); #endif - lck_mtx_unlock(&vm_purgeable_queue_lock); - object->objq.next = 0; - object->objq.prev = 0; - return &purgeable_queues[i]; - } + + lck_mtx_unlock(&vm_purgeable_queue_lock); + + object->purgeable_queue_type = PURGEABLE_Q_TYPE_MAX; + object->purgeable_queue_group = 0; + + object->objq.next = NULL; + object->objq.prev = NULL; + + return &purgeable_queues[type]; +} + +void +vm_purgeable_stats_helper(vm_purgeable_stat_t *stat, purgeable_q_t queue, int group, task_t target_task) +{ + lck_mtx_assert(&vm_purgeable_queue_lock, LCK_MTX_ASSERT_OWNED); + + stat->count = stat->size = 0; + vm_object_t object; + for (object = (vm_object_t) queue_first(&queue->objq[group]); + !queue_end(&queue->objq[group], (queue_entry_t) object); + object = (vm_object_t) queue_next(&object->objq)) { + if (!target_task || object->vo_purgeable_owner == target_task) { + stat->count++; + stat->size += (object->resident_page_count * PAGE_SIZE); } + } + return; +} + +void +vm_purgeable_stats(vm_purgeable_info_t info, task_t target_task) +{ + purgeable_q_t queue; + int group; + + lck_mtx_lock(&vm_purgeable_queue_lock); + + /* Populate fifo_data */ + queue = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO]; + for (group = 0; group < NUM_VOLATILE_GROUPS; group++) + vm_purgeable_stats_helper(&(info->fifo_data[group]), queue, group, target_task); + + /* Populate lifo_data */ + queue = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO]; + for (group = 0; group < NUM_VOLATILE_GROUPS; group++) + vm_purgeable_stats_helper(&(info->lifo_data[group]), queue, group, target_task); + + /* Populate obsolete data */ + queue = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE]; + vm_purgeable_stats_helper(&(info->obsolete_data), queue, 0, target_task); + + lck_mtx_unlock(&vm_purgeable_queue_lock); + return; +} + + +static void +vm_purgeable_queue_disown( + purgeable_q_t queue, + int group, + task_t task) +{ + vm_object_t object; + int num_objects; + + lck_mtx_assert(&vm_purgeable_queue_lock, LCK_MTX_ASSERT_OWNED); + + num_objects = 0; + for (object = (vm_object_t) queue_first(&queue->objq[group]); + !queue_end(&queue->objq[group], (queue_entry_t) object); + object = (vm_object_t) queue_next(&object->objq)) { + if (object->vo_purgeable_owner == task) { + object->vo_purgeable_owner = NULL; + num_objects++; } } + assert(task->task_volatile_objects >= num_objects); + OSAddAtomic(-num_objects, &task->task_volatile_objects); + return; +} + +void +vm_purgeable_disown( + task_t task) +{ + purgeable_q_t queue; + int group; + + if (task == NULL) { + return; + } + + lck_mtx_lock(&vm_purgeable_queue_lock); + + queue = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE]; + vm_purgeable_queue_disown(queue, 0, task); + + queue = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO]; + for (group = 0; group < NUM_VOLATILE_GROUPS; group++) + vm_purgeable_queue_disown(queue, group, task); + + queue = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO]; + for (group = 0; group < NUM_VOLATILE_GROUPS; group++) + vm_purgeable_queue_disown(queue, group, task); + lck_mtx_unlock(&vm_purgeable_queue_lock); - return 0; } diff --git a/osfmk/vm/vm_purgeable_internal.h b/osfmk/vm/vm_purgeable_internal.h index 169aa660d..efd66bbb3 100644 --- a/osfmk/vm/vm_purgeable_internal.h +++ b/osfmk/vm/vm_purgeable_internal.h @@ -98,7 +98,7 @@ void vm_purgeable_q_advance_all(void); /* the object purger. purges the next eligible object from memory. */ /* returns TRUE if an object was purged, otherwise FALSE. */ -boolean_t vm_purgeable_object_purge_one(void); +boolean_t vm_purgeable_object_purge_one(int force_purge_below_group); /* purge all volatile objects now */ void vm_purgeable_object_purge_all(void); @@ -109,4 +109,9 @@ void vm_purgeable_object_add(vm_object_t object, purgeable_q_t queue, int group) /* look for object. If found, remove from purgeable queue. */ purgeable_q_t vm_purgeable_object_remove(vm_object_t object); +/* statistics for purgable objects in all queues */ +void vm_purgeable_stats(vm_purgeable_info_t info, task_t target_task); + +void vm_purgeable_disown(task_t task); + #endif /* __VM_PURGEABLE_INTERNAL__ */ diff --git a/osfmk/vm/vm_resident.c b/osfmk/vm/vm_resident.c index c660d662c..2bb8118c2 100644 --- a/osfmk/vm/vm_resident.c +++ b/osfmk/vm/vm_resident.c @@ -90,6 +90,7 @@ #include #include #include +#include #include @@ -293,9 +294,9 @@ ppnum_t vm_page_guard_addr = (ppnum_t) -2; * system (pageout daemon). These queues are * defined here, but are shared by the pageout * module. The inactive queue is broken into - * inactive and zf for convenience as the + * file backed and anonymous for convenience as the * pageout daemon often assignes a higher - * affinity to zf pages + * importance to anonymous pages (less likely to pick) */ queue_head_t vm_page_queue_active; queue_head_t vm_page_queue_inactive; @@ -317,6 +318,11 @@ unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */ unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */ uint64_t vm_page_purged_count = 0; /* total count of purged pages */ +unsigned int vm_page_external_count = 0; +unsigned int vm_page_internal_count = 0; +unsigned int vm_page_pageable_external_count = 0; +unsigned int vm_page_pageable_internal_count = 0; + #if DEVELOPMENT || DEBUG unsigned int vm_page_speculative_recreated = 0; unsigned int vm_page_speculative_created = 0; @@ -343,7 +349,7 @@ unsigned int vm_page_free_min = 0; unsigned int vm_page_throttle_limit = 0; uint32_t vm_page_creation_throttle = 0; unsigned int vm_page_inactive_target = 0; -unsigned int vm_page_anonymous_min = 0; +unsigned int vm_page_anonymous_min = 0; unsigned int vm_page_inactive_min = 0; unsigned int vm_page_free_reserved = 0; unsigned int vm_page_throttle_count = 0; @@ -436,6 +442,8 @@ vm_page_init_lck_grp(void) lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr); lck_attr_setdefault(&vm_page_lck_attr); lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr); + + vm_compressor_init_locks(); } void @@ -460,6 +468,8 @@ vm_page_init_local_q() VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr); queue_init(&lq->vpl_queue); lq->vpl_count = 0; + lq->vpl_internal_count = 0; + lq->vpl_external_count = 0; } vm_page_local_q_count = num_cpus; @@ -546,9 +556,10 @@ vm_page_bootstrap( m->reusable = FALSE; m->slid = FALSE; m->was_dirty = FALSE; + m->xpmapped = FALSE; + m->compressor = FALSE; m->__unused_object_bits = 0; - /* * Initialize the page queues. */ @@ -1019,6 +1030,8 @@ vm_page_insert_internal( VM_PAGE_CHECK(mem); #endif + assert(page_aligned(offset)); + if (object == vm_submap_object) { /* the vm_submap_object is only a placeholder for submaps */ panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset); @@ -1084,7 +1097,6 @@ vm_page_insert_internal( /* * Now link into the object's list of backed pages. */ - VM_PAGE_INSERT(mem, object); mem->tabled = TRUE; @@ -1098,7 +1110,28 @@ vm_page_insert_internal( } assert(object->resident_page_count >= object->wired_page_count); + if (object->internal) { + OSAddAtomic(1, &vm_page_internal_count); + } else { + OSAddAtomic(1, &vm_page_external_count); + } + + /* + * It wouldn't make sense to insert a "reusable" page in + * an object (the page would have been marked "reusable" only + * at the time of a madvise(MADV_FREE_REUSABLE) if it was already + * in the object at that time). + * But a page could be inserted in a "all_reusable" object, if + * something faults it in (a vm_read() from another task or a + * "use-after-free" issue in user space, for example). It can + * also happen if we're relocating a page from that object to + * a different physical page during a physically-contiguous + * allocation. + */ assert(!mem->reusable); + if (mem->object->all_reusable) { + OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count); + } if (object->purgable == VM_PURGABLE_VOLATILE) { if (VM_PAGE_WIRED(mem)) { @@ -1293,6 +1326,13 @@ vm_page_remove( assert(mem->object->resident_page_count > 0); mem->object->resident_page_count--; + if (mem->object->internal) { + assert(vm_page_internal_count); + OSAddAtomic(-1, &vm_page_internal_count); + } else { + assert(vm_page_external_count); + OSAddAtomic(-1, &vm_page_external_count); + } if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) { if (mem->object->resident_page_count == 0) vm_object_cache_remove(mem->object); @@ -1466,6 +1506,8 @@ vm_page_rename( vm_object_offset_t new_offset, boolean_t encrypted_ok) { + boolean_t internal_to_external, external_to_internal; + assert(mem->object != new_object); /* @@ -1496,9 +1538,39 @@ vm_page_rename( */ vm_page_lockspin_queues(); + internal_to_external = FALSE; + external_to_internal = FALSE; + + if (mem->local) { + /* + * it's much easier to get the vm_page_pageable_xxx accounting correct + * if we first move the page to the active queue... it's going to end + * up there anyway, and we don't do vm_page_rename's frequently enough + * for this to matter. + */ + VM_PAGE_QUEUES_REMOVE(mem); + vm_page_activate(mem); + } + if (mem->active || mem->inactive || mem->speculative) { + if (mem->object->internal && !new_object->internal) { + internal_to_external = TRUE; + } + if (!mem->object->internal && new_object->internal) { + external_to_internal = TRUE; + } + } + vm_page_remove(mem, TRUE); vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE, FALSE); + if (internal_to_external) { + vm_page_pageable_internal_count--; + vm_page_pageable_external_count++; + } else if (external_to_internal) { + vm_page_pageable_external_count--; + vm_page_pageable_internal_count++; + } + vm_page_unlock_queues(); } @@ -1679,6 +1751,10 @@ void vm_page_more_fictitious(void) vm_page_wait(THREAD_UNINT); return; } + + /* Increment zone page count. We account for all memory managed by the zone in z->page_count */ + OSAddAtomic64(1, &(vm_page_zone->page_count)); + zcram(vm_page_zone, addr, PAGE_SIZE); lck_mtx_unlock(&vm_page_alloc_lock); @@ -2140,8 +2216,12 @@ vm_page_wait( if (need_wakeup) thread_wakeup((event_t)&vm_page_free_wanted); - if (wait_result == THREAD_WAITING) + if (wait_result == THREAD_WAITING) { + VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START, + vm_page_free_wanted_privileged, vm_page_free_wanted, 0, 0); wait_result = thread_block(THREAD_CONTINUE_NULL); + VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0); + } return(wait_result == THREAD_AWAKENED); } else { @@ -2265,7 +2345,7 @@ vm_page_free_prepare_queues( vm_pageout_steal_laundry(mem, TRUE); counter(++c_laundry_pages_freed); } - + VM_PAGE_QUEUES_REMOVE(mem); /* clears local/active/inactive/throttled/speculative */ if (VM_PAGE_WIRED(mem)) { @@ -2743,7 +2823,7 @@ vm_page_deactivate_internal( * reference which is held on the object while the page is in the pageout queue... * just let the normal laundry processing proceed */ - if (m->pageout_queue || m->private || m->fictitious || (VM_PAGE_WIRED(m))) + if (m->pageout_queue || m->private || m->fictitious || m->compressor || (VM_PAGE_WIRED(m))) return; if (!m->absent && clear_hw_reference == TRUE) @@ -2820,6 +2900,11 @@ void vm_page_enqueue_cleaned(vm_page_t m) m->inactive = TRUE; vm_page_inactive_count++; + if (m->object->internal) { + vm_page_pageable_internal_count++; + } else { + vm_page_pageable_external_count++; + } vm_pageout_enqueued_cleaned++; } @@ -2832,6 +2917,12 @@ void vm_page_enqueue_cleaned(vm_page_t m) * The page queues must be locked. */ +#if CONFIG_JETSAM +#if LATENCY_JETSAM +extern struct vm_page jetsam_latency_page[NUM_OF_JETSAM_LATENCY_TOKENS]; +#endif /* LATENCY_JETSAM */ +#endif /* CONFIG_JETSAM */ + void vm_page_activate( register vm_page_t m) @@ -2861,7 +2952,7 @@ vm_page_activate( * reference which is held on the object while the page is in the pageout queue... * just let the normal laundry processing proceed */ - if (m->pageout_queue || m->private || m->fictitious) + if (m->pageout_queue || m->private || m->fictitious || m->compressor) return; #if DEBUG @@ -2890,6 +2981,42 @@ vm_page_activate( queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); m->active = TRUE; vm_page_active_count++; + if (m->object->internal) { + vm_page_pageable_internal_count++; + } else { + vm_page_pageable_external_count++; + } +#if LATENCY_JETSAM + if (jlp_init) { + uint64_t now = mach_absolute_time(); + uint64_t delta = now - jlp_time; + clock_sec_t jl_secs = 0; + clock_usec_t jl_usecs = 0; + vm_page_t jlp; + + absolutetime_to_microtime(delta, &jl_secs, &jl_usecs); + + jl_usecs += jl_secs * USEC_PER_SEC; + if (jl_usecs >= JETSAM_LATENCY_TOKEN_AGE) { + + jlp = &jetsam_latency_page[jlp_current]; + if (jlp->active) { + queue_remove(&vm_page_queue_active, jlp, vm_page_t, pageq); + } + queue_enter(&vm_page_queue_active, jlp, vm_page_t, pageq); + + jlp->active = TRUE; + + jlp->offset = now; + jlp_time = jlp->offset; + + if(++jlp_current == NUM_OF_JETSAM_LATENCY_TOKENS) { + jlp_current = 0; + } + + } + } +#endif /* LATENCY_JETSAM */ } m->reference = TRUE; m->no_cache = FALSE; @@ -2928,7 +3055,7 @@ vm_page_speculate( * reference which is held on the object while the page is in the pageout queue... * just let the normal laundry processing proceed */ - if (m->pageout_queue || m->private || m->fictitious) + if (m->pageout_queue || m->private || m->fictitious || m->compressor) return; VM_PAGE_QUEUES_REMOVE(m); @@ -2985,6 +3112,11 @@ vm_page_speculate( enqueue_tail(&aq->age_q, &m->pageq); m->speculative = TRUE; vm_page_speculative_count++; + if (m->object->internal) { + vm_page_pageable_internal_count++; + } else { + vm_page_pageable_external_count++; + } if (new == TRUE) { vm_object_lock_assert_exclusive(m->object); @@ -3057,7 +3189,7 @@ vm_page_lru( * reference which is held on the object while the page is in the pageout queue... * just let the normal laundry processing proceed */ - if (m->pageout_queue || m->private || (VM_PAGE_WIRED(m))) + if (m->pageout_queue || m->private || m->compressor || (VM_PAGE_WIRED(m))) return; m->no_cache = FALSE; @@ -3075,11 +3207,14 @@ vm_page_reactivate_all_throttled(void) vm_page_t first_active; vm_page_t m; int extra_active_count; + int extra_internal_count, extra_external_count; if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) return; extra_active_count = 0; + extra_internal_count = 0; + extra_external_count = 0; vm_page_lock_queues(); if (! queue_empty(&vm_page_queue_throttled)) { /* @@ -3094,6 +3229,11 @@ vm_page_reactivate_all_throttled(void) assert(!VM_PAGE_WIRED(m)); extra_active_count++; + if (m->object->internal) { + extra_internal_count++; + } else { + extra_external_count++; + } m->throttled = FALSE; m->active = TRUE; @@ -3126,6 +3266,8 @@ vm_page_reactivate_all_throttled(void) * Adjust the global page counts. */ vm_page_active_count += extra_active_count; + vm_page_pageable_internal_count += extra_internal_count; + vm_page_pageable_external_count += extra_external_count; vm_page_throttled_count = 0; } assert(vm_page_throttled_count == 0); @@ -3214,7 +3356,11 @@ vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks) * Adjust the global page counts. */ vm_page_active_count += lq->vpl_count; + vm_page_pageable_internal_count += lq->vpl_internal_count; + vm_page_pageable_external_count += lq->vpl_external_count; lq->vpl_count = 0; + lq->vpl_internal_count = 0; + lq->vpl_external_count = 0; } assert(queue_empty(&lq->vpl_queue)); @@ -3229,13 +3375,13 @@ vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks) * * Zero-fill a part of the page. */ +#define PMAP_ZERO_PART_PAGE_IMPLEMENTED void vm_page_part_zero_fill( vm_page_t m, vm_offset_t m_pa, vm_size_t len) { - vm_page_t tmp; #if 0 /* @@ -3248,6 +3394,7 @@ vm_page_part_zero_fill( #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED pmap_zero_part_page(m->phys_page, m_pa, len); #else + vm_page_t tmp; while (1) { tmp = vm_page_grab(); if (tmp == VM_PAGE_NULL) { @@ -3702,6 +3849,7 @@ vm_page_find_contiguous( int yielded = 0; int dumped_run = 0; int stolen_pages = 0; + int compressed_pages = 0; #endif if (contig_pages == 0) @@ -3713,6 +3861,8 @@ vm_page_find_contiguous( #if DEBUG clock_get_system_microtime(&tv_start_sec, &tv_start_usec); #endif + PAGE_REPLACEMENT_ALLOWED(TRUE); + vm_page_lock_queues(); lck_mtx_lock(&vm_page_queue_free_lock); @@ -3763,9 +3913,9 @@ retry: RESET_STATE_OF_RUN(); } else if (VM_PAGE_WIRED(m) || m->gobbled || - m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted || - m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious || - m->cleaning || m->overwriting || m->restart || m->unusual || m->pageout) { + m->encrypted_cleaning || + m->pageout_queue || m->laundry || m->wanted || + m->cleaning || m->overwriting || m->pageout) { /* * page is in a transient state * or a state we don't want to deal @@ -3774,9 +3924,10 @@ retry: */ RESET_STATE_OF_RUN(); - } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) { + } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled && !m->compressor) { /* * page needs to be on one of our queues + * or it needs to belong to the compressor pool * in order for it to be stable behind the * locks we hold at this point... * if not, don't consider it which @@ -3827,7 +3978,7 @@ retry: * into a substitute page. */ #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL - if (m->pmapped || m->dirty) { + if (m->pmapped || m->dirty || m->precious) { substitute_needed++; } #else @@ -3862,12 +4013,16 @@ retry: } did_consider: if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) { - + + PAGE_REPLACEMENT_ALLOWED(FALSE); + lck_mtx_unlock(&vm_page_queue_free_lock); vm_page_unlock_queues(); mutex_pause(0); + PAGE_REPLACEMENT_ALLOWED(TRUE); + vm_page_lock_queues(); lck_mtx_lock(&vm_page_queue_free_lock); @@ -3992,6 +4147,7 @@ did_consider: m1 = &vm_pages[cur_idx--]; assert(!m1->free); + if (m1->object == VM_OBJECT_NULL) { /* * page has already been removed from @@ -4003,6 +4159,8 @@ did_consider: assert(!m1->laundry); } else { vm_object_t object; + int refmod; + boolean_t disconnected, reusable; if (abort_run == TRUE) continue; @@ -4019,9 +4177,9 @@ did_consider: } if (locked_object == VM_OBJECT_NULL || (VM_PAGE_WIRED(m1) || m1->gobbled || - m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted || - m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious || - m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->busy)) { + m1->encrypted_cleaning || + m1->pageout_queue || m1->laundry || m1->wanted || + m1->cleaning || m1->overwriting || m1->pageout || m1->busy)) { if (locked_object) { vm_object_unlock(locked_object); @@ -4031,8 +4189,31 @@ did_consider: abort_run = TRUE; continue; } - if (m1->pmapped || m1->dirty) { - int refmod; + + disconnected = FALSE; + reusable = FALSE; + + if ((m1->reusable || + m1->object->all_reusable) && + m1->inactive && + !m1->dirty && + !m1->reference) { + /* reusable page... */ + refmod = pmap_disconnect(m1->phys_page); + disconnected = TRUE; + if (refmod == 0) { + /* + * ... not reused: can steal + * without relocating contents. + */ + reusable = TRUE; + } + } + + if ((m1->pmapped && + ! reusable) || + m1->dirty || + m1->precious) { vm_object_offset_t offset; m2 = vm_page_grab(); @@ -4046,14 +4227,67 @@ did_consider: abort_run = TRUE; continue; } - if (m1->pmapped) - refmod = pmap_disconnect(m1->phys_page); - else - refmod = 0; - vm_page_copy(m1, m2); - - m2->reference = m1->reference; - m2->dirty = m1->dirty; + if (! disconnected) { + if (m1->pmapped) + refmod = pmap_disconnect(m1->phys_page); + else + refmod = 0; + } + + /* copy the page's contents */ + pmap_copy_page(m1->phys_page, m2->phys_page); + /* copy the page's state */ + assert(!VM_PAGE_WIRED(m1)); + assert(!m1->free); + assert(!m1->pageout_queue); + assert(!m1->laundry); + m2->reference = m1->reference; + assert(!m1->gobbled); + assert(!m1->private); + m2->no_cache = m1->no_cache; + m2->xpmapped = m1->xpmapped; + assert(!m1->busy); + assert(!m1->wanted); + assert(!m1->fictitious); + m2->pmapped = m1->pmapped; /* should flush cache ? */ + m2->wpmapped = m1->wpmapped; + assert(!m1->pageout); + m2->absent = m1->absent; + m2->error = m1->error; + m2->dirty = m1->dirty; + assert(!m1->cleaning); + m2->precious = m1->precious; + m2->clustered = m1->clustered; + assert(!m1->overwriting); + m2->restart = m1->restart; + m2->unusual = m1->unusual; + m2->encrypted = m1->encrypted; + assert(!m1->encrypted_cleaning); + m2->cs_validated = m1->cs_validated; + m2->cs_tainted = m1->cs_tainted; + + /* + * If m1 had really been reusable, + * we would have just stolen it, so + * let's not propagate it's "reusable" + * bit and assert that m2 is not + * marked as "reusable". + */ + // m2->reusable = m1->reusable; + assert(!m2->reusable); + + assert(!m1->lopage); + m2->slid = m1->slid; + m2->was_dirty = m1->was_dirty; + m2->compressor = m1->compressor; + + /* + * make sure we clear the ref/mod state + * from the pmap layer... else we risk + * inheriting state from the last time + * this page was used... + */ + pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED); if (refmod & VM_MEM_REFERENCED) m2->reference = TRUE; @@ -4072,25 +4306,31 @@ did_consider: vm_page_free_prepare(m1); /* - * make sure we clear the ref/mod state - * from the pmap layer... else we risk - * inheriting state from the last time - * this page was used... - */ - pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED); - /* - * now put the substitute page on the object + * now put the substitute page + * on the object */ vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE, FALSE); - if (m2->reference) - vm_page_activate(m2); - else - vm_page_deactivate(m2); + if (m2->compressor) { + m2->pmapped = TRUE; + m2->wpmapped = TRUE; + PMAP_ENTER(kernel_pmap, m2->offset, m2, + VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE); +#if MACH_ASSERT + compressed_pages++; +#endif + } else { + if (m2->reference) + vm_page_activate(m2); + else + vm_page_deactivate(m2); + } PAGE_WAKEUP_DONE(m2); } else { + assert(!m1->compressor); + /* * completely cleans up the state * of the page so that it is ready @@ -4177,6 +4417,8 @@ did_consider: assert(vm_page_verify_contiguous(m, npages)); } done_scanning: + PAGE_REPLACEMENT_ALLOWED(FALSE); + vm_page_unlock_queues(); #if DEBUG @@ -4193,10 +4435,10 @@ done_scanning: tv_end_sec -= 1000000; } if (vm_page_find_contig_debug) { - printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n", - __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT, - (long)tv_end_sec, tv_end_usec, orig_last_idx, - scanned, yielded, dumped_run, stolen_pages); + printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n", + __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT, + (long)tv_end_sec, tv_end_usec, orig_last_idx, + scanned, yielded, dumped_run, stolen_pages, compressed_pages); } #endif @@ -4483,13 +4725,19 @@ static vm_page_t hibernate_gobble_queue; extern boolean_t (* volatile consider_buffer_cache_collect)(int); static int hibernate_drain_pageout_queue(struct vm_pageout_queue *); -static int hibernate_flush_dirty_pages(void); +static int hibernate_flush_dirty_pages(int); static int hibernate_flush_queue(queue_head_t *, int); void hibernate_flush_wait(void); void hibernate_mark_in_progress(void); void hibernate_clear_in_progress(void); +void hibernate_free_range(int, int); +void hibernate_hash_insert_page(vm_page_t); +uint32_t hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *); +void hibernate_rebuild_vm_structs(void); +uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *); +ppnum_t hibernate_lookup_paddr(unsigned int); struct hibernate_statistics { int hibernate_considered; @@ -4498,6 +4746,7 @@ struct hibernate_statistics { int hibernate_skipped_cleaning; int hibernate_skipped_transient; int hibernate_skipped_precious; + int hibernate_skipped_external; int hibernate_queue_nolock; int hibernate_queue_paused; int hibernate_throttled; @@ -4512,9 +4761,11 @@ struct hibernate_statistics { int cd_found_cleaning; int cd_found_laundry; int cd_found_dirty; + int cd_found_xpmapped; int cd_local_free; int cd_total_free; int cd_vm_page_wire_count; + int cd_vm_struct_pages_unneeded; int cd_pages; int cd_discarded; int cd_count_wire; @@ -4529,7 +4780,7 @@ hibernate_drain_pageout_queue(struct vm_pageout_queue *q) vm_page_lock_queues(); - while (q->pgo_laundry) { + while ( !queue_empty(&q->pgo_pending) ) { q->pgo_draining = TRUE; @@ -4539,8 +4790,12 @@ hibernate_drain_pageout_queue(struct vm_pageout_queue *q) wait_result = thread_block(THREAD_CONTINUE_NULL); - if (wait_result == THREAD_TIMED_OUT) { + if (wait_result == THREAD_TIMED_OUT && !queue_empty(&q->pgo_pending)) { hibernate_stats.hibernate_drain_timeout++; + + if (q == &vm_pageout_queue_external) + return (0); + return (1); } vm_page_lock_queues(); @@ -4553,6 +4808,8 @@ hibernate_drain_pageout_queue(struct vm_pageout_queue *q) } +boolean_t hibernate_skip_external = FALSE; + static int hibernate_flush_queue(queue_head_t *q, int qcount) { @@ -4567,7 +4824,6 @@ hibernate_flush_queue(queue_head_t *q, int qcount) struct vm_pageout_queue *eq; struct vm_pageout_queue *tq; - hibernate_cleaning_in_progress = TRUE; KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0); @@ -4643,9 +4899,6 @@ hibernate_flush_queue(queue_head_t *q, int qcount) goto reenter_pg_on_q; } - if ( !m_object->pager_initialized && m_object->pager_created) - goto reenter_pg_on_q; - if (m_object->copy == VM_OBJECT_NULL) { if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) { /* @@ -4674,6 +4927,13 @@ hibernate_flush_queue(queue_head_t *q, int qcount) goto reenter_pg_on_q; } + + if (hibernate_skip_external == TRUE && !m_object->internal) { + + hibernate_stats.hibernate_skipped_external++; + + goto reenter_pg_on_q; + } tq = NULL; if (m_object->internal) { @@ -4692,10 +4952,10 @@ hibernate_flush_queue(queue_head_t *q, int qcount) } vm_pageout_scan_wants_object = VM_OBJECT_NULL; - tq->pgo_throttled = TRUE; - while (retval == 0) { + tq->pgo_throttled = TRUE; + assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC); vm_page_unlock_queues(); @@ -4704,14 +4964,22 @@ hibernate_flush_queue(queue_head_t *q, int qcount) vm_page_lock_queues(); + if (wait_result != THREAD_TIMED_OUT) + break; + if (!VM_PAGE_Q_THROTTLED(tq)) + break; + if (hibernate_should_abort()) retval = 1; - if (wait_result != THREAD_TIMED_OUT) - break; - if (--wait_count == 0) { + hibernate_stats.hibernate_throttle_timeout++; + + if (tq == eq) { + hibernate_skip_external = TRUE; + break; + } retval = 1; } } @@ -4731,6 +4999,9 @@ hibernate_flush_queue(queue_head_t *q, int qcount) VM_PAGE_QUEUES_REMOVE(m); + if (COMPRESSED_PAGER_IS_ACTIVE) + pmap_disconnect(m->phys_page); + vm_pageout_cluster(m, FALSE); hibernate_stats.hibernate_found_dirty++; @@ -4758,14 +5029,12 @@ next_pg: KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0); - hibernate_cleaning_in_progress = FALSE; - return (retval); } static int -hibernate_flush_dirty_pages() +hibernate_flush_dirty_pages(int pass) { struct vm_speculative_age_q *aq; uint32_t i; @@ -4803,23 +5072,37 @@ hibernate_flush_dirty_pages() return (1); } } - if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) - return (1); if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count)) return (1); if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count)) return (1); if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count)) return (1); - if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) return (1); - return (hibernate_drain_pageout_queue(&vm_pageout_queue_external)); -} + if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1) + vm_compressor_record_warmup_start(); + + if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) { + if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1) + vm_compressor_record_warmup_end(); + return (1); + } + if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) { + if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1) + vm_compressor_record_warmup_end(); + return (1); + } + if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1) + vm_compressor_record_warmup_end(); + + if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external)) + return (1); + + return (0); +} -extern void IOSleep(unsigned int); -extern int sync_internal(void); int hibernate_flush_memory() @@ -4828,32 +5111,53 @@ hibernate_flush_memory() KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0); - IOSleep(2 * 1000); + hibernate_cleaning_in_progress = TRUE; + hibernate_skip_external = FALSE; + + if ((retval = hibernate_flush_dirty_pages(1)) == 0) { + + if (COMPRESSED_PAGER_IS_ACTIVE) { - KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_NONE, vm_page_free_count, 0, 0, 0, 0); + if ((retval = hibernate_flush_dirty_pages(2)) == 0) { - if ((retval = hibernate_flush_dirty_pages()) == 0) { - if (consider_buffer_cache_collect != NULL) { + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0); - KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, vm_page_wire_count, 0, 0, 0, 0); + vm_compressor_flush(); + + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0); + } + } + if (retval == 0 && consider_buffer_cache_collect != NULL) { + unsigned int orig_wire_count; + + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0); + orig_wire_count = vm_page_wire_count; - sync_internal(); (void)(*consider_buffer_cache_collect)(1); consider_zone_gc(TRUE); - KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, vm_page_wire_count, 0, 0, 0, 0); + HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count); + + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0); } } + hibernate_cleaning_in_progress = FALSE; + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0); + if (retval && COMPRESSED_PAGER_IS_ACTIVE) + HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT); + + HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n", hibernate_stats.hibernate_considered, hibernate_stats.hibernate_reentered_on_q, hibernate_stats.hibernate_found_dirty); - HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) queue_nolock(%d)\n", + HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n", hibernate_stats.hibernate_skipped_cleaning, hibernate_stats.hibernate_skipped_transient, hibernate_stats.hibernate_skipped_precious, + hibernate_stats.hibernate_skipped_external, hibernate_stats.hibernate_queue_nolock); HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n", hibernate_stats.hibernate_queue_paused, @@ -5005,9 +5309,15 @@ hibernate_consider_discard(vm_page_t m, boolean_t preflight) || (VM_PURGABLE_VOLATILE == object->purgable) || (VM_PURGABLE_EMPTY == object->purgable); - if (discard == FALSE) { - if (!preflight) hibernate_stats.cd_found_dirty++; - } + + if (discard == FALSE) { + if (!preflight) + hibernate_stats.cd_found_dirty++; + } else if (m->xpmapped && m->reference) { + if (!preflight) + hibernate_stats.cd_found_xpmapped++; + discard = FALSE; + } } while (FALSE); @@ -5054,7 +5364,9 @@ hibernate_discard_page(vm_page_t m) assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL)); purgeable_q_t old_queue = vm_purgeable_object_remove(m->object); assert(old_queue); - vm_purgeable_token_delete_first(old_queue); + if (m->object->purgeable_when_ripe) { + vm_purgeable_token_delete_first(old_queue); + } m->object->purgable = VM_PURGABLE_EMPTY; } @@ -5071,6 +5383,7 @@ hibernate_discard_page(vm_page_t m) void hibernate_vm_lock_queues(void) { + vm_object_lock(compressor_object); vm_page_lock_queues(); lck_mtx_lock(&vm_page_queue_free_lock); @@ -5097,6 +5410,7 @@ hibernate_vm_unlock_queues(void) } lck_mtx_unlock(&vm_page_queue_free_lock); vm_page_unlock_queues(); + vm_object_unlock(compressor_object); } /* @@ -5109,13 +5423,15 @@ void hibernate_page_list_setall(hibernate_page_list_t * page_list, hibernate_page_list_t * page_list_wired, hibernate_page_list_t * page_list_pal, - boolean_t preflight, + boolean_t preflight, + boolean_t will_discard, uint32_t * pagesOut) { uint64_t start, end, nsec; vm_page_t m; + vm_page_t next; uint32_t pages = page_list->page_count; - uint32_t count_zf = 0, count_throttled = 0; + uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0; uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0; uint32_t count_wire = pages; uint32_t count_discard_active = 0; @@ -5123,10 +5439,13 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list, uint32_t count_discard_cleaned = 0; uint32_t count_discard_purgeable = 0; uint32_t count_discard_speculative = 0; + uint32_t count_discard_vm_struct_pages = 0; uint32_t i; uint32_t bank; hibernate_bitmap_t * bitmap; hibernate_bitmap_t * bitmap_wired; + boolean_t discard_all; + boolean_t discard; HIBLOG("hibernate_page_list_setall(preflight %d) start %p, %p\n", preflight, page_list, page_list_wired); @@ -5134,9 +5453,14 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list, page_list = NULL; page_list_wired = NULL; page_list_pal = NULL; + discard_all = FALSE; + } else { + discard_all = will_discard; } #if DEBUG + if (!preflight) + { vm_page_lock_queues(); if (vm_page_local_q) { for (i = 0; i < vm_page_local_q_count; i++) { @@ -5145,6 +5469,7 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list, VPL_LOCK(&lq->vpl_lock); } } + } #endif /* DEBUG */ @@ -5167,12 +5492,13 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list, } if (preflight) { + vm_object_lock(compressor_object); vm_page_lock_queues(); lck_mtx_lock(&vm_page_queue_free_lock); } m = (vm_page_t) hibernate_gobble_queue; - while(m) + while (m) { pages--; count_wire--; @@ -5233,28 +5559,32 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list, } } - queue_iterate( &vm_page_queue_throttled, - m, - vm_page_t, - pageq ) + m = (vm_page_t) queue_first(&vm_page_queue_throttled); + while (m && !queue_end(&vm_page_queue_throttled, (queue_entry_t)m)) { + next = (vm_page_t) m->pageq.next; + discard = FALSE; if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) && hibernate_consider_discard(m, preflight)) { if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page); count_discard_inactive++; + discard = discard_all; } else count_throttled++; count_wire--; if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + + if (discard) hibernate_discard_page(m); + m = next; } - queue_iterate( &vm_page_queue_anonymous, - m, - vm_page_t, - pageq ) + m = (vm_page_t) queue_first(&vm_page_queue_anonymous); + while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m)) { + next = (vm_page_t) m->pageq.next; + discard = FALSE; if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) && hibernate_consider_discard(m, preflight)) { @@ -5263,18 +5593,21 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list, count_discard_purgeable++; else count_discard_inactive++; + discard = discard_all; } else - count_zf++; + count_anonymous++; count_wire--; if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + if (discard) hibernate_discard_page(m); + m = next; } - queue_iterate( &vm_page_queue_inactive, - m, - vm_page_t, - pageq ) + m = (vm_page_t) queue_first(&vm_page_queue_inactive); + while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m)) { + next = (vm_page_t) m->pageq.next; + discard = FALSE; if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) && hibernate_consider_discard(m, preflight)) { @@ -5283,18 +5616,21 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list, count_discard_purgeable++; else count_discard_inactive++; + discard = discard_all; } else count_inactive++; count_wire--; if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + if (discard) hibernate_discard_page(m); + m = next; } - queue_iterate( &vm_page_queue_cleaned, - m, - vm_page_t, - pageq ) + m = (vm_page_t) queue_first(&vm_page_queue_cleaned); + while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m)) { + next = (vm_page_t) m->pageq.next; + discard = FALSE; if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) && hibernate_consider_discard(m, preflight)) { @@ -5303,38 +5639,44 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list, count_discard_purgeable++; else count_discard_cleaned++; + discard = discard_all; } else count_cleaned++; count_wire--; if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + if (discard) hibernate_discard_page(m); + m = next; } for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) { - queue_iterate(&vm_page_queue_speculative[i].age_q, - m, - vm_page_t, - pageq) - { - if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) - && hibernate_consider_discard(m, preflight)) - { - if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page); - count_discard_speculative++; - } - else - count_speculative++; - count_wire--; - if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); - } + m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q); + while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m)) + { + next = (vm_page_t) m->pageq.next; + discard = FALSE; + if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) + && hibernate_consider_discard(m, preflight)) + { + if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page); + count_discard_speculative++; + discard = discard_all; + } + else + count_speculative++; + count_wire--; + if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + if (discard) hibernate_discard_page(m); + m = next; + } } - queue_iterate( &vm_page_queue_active, - m, - vm_page_t, - pageq ) + m = (vm_page_t) queue_first(&vm_page_queue_active); + while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m)) { + next = (vm_page_t) m->pageq.next; + discard = FALSE; if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode) && hibernate_consider_discard(m, preflight)) { @@ -5343,11 +5685,36 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list, count_discard_purgeable++; else count_discard_active++; + discard = discard_all; } else count_active++; count_wire--; if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + if (discard) hibernate_discard_page(m); + m = next; + } + + queue_iterate(&compressor_object->memq, m, vm_page_t, listq) + { + count_compressor++; + count_wire--; + if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + } + + if (preflight == FALSE && discard_all == TRUE) { + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START, 0, 0, 0, 0, 0); + + HIBLOG("hibernate_teardown started\n"); + count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired); + HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages); + + pages -= count_discard_vm_struct_pages; + count_wire -= count_discard_vm_struct_pages; + + hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages; + + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0); } if (!preflight) { @@ -5368,20 +5735,26 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list, if (!preflight) { hibernate_stats.cd_count_wire = count_wire; - hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable + count_discard_speculative + count_discard_cleaned; + hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable + + count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages; } clock_get_uptime(&end); absolutetime_to_nanoseconds(end - start, &nsec); HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL); - HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d cleaned %d\n", - pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_zf, count_throttled, + HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n", + pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped, + discard_all ? "did" : "could", count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned); *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned; + if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active; + #if DEBUG + if (!preflight) + { if (vm_page_local_q) { for (i = 0; i < vm_page_local_q_count; i++) { struct vpl *lq; @@ -5390,11 +5763,13 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list, } } vm_page_unlock_queues(); + } #endif /* DEBUG */ if (preflight) { lck_mtx_unlock(&vm_page_queue_free_lock); vm_page_unlock_queues(); + vm_object_unlock(compressor_object); } KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0); @@ -5413,6 +5788,7 @@ hibernate_page_list_discard(hibernate_page_list_t * page_list) uint32_t count_discard_cleaned = 0; uint32_t count_discard_speculative = 0; + #if DEBUG vm_page_lock_queues(); if (vm_page_local_q) { @@ -5519,6 +5895,356 @@ hibernate_page_list_discard(hibernate_page_list_t * page_list) count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned); } +boolean_t hibernate_paddr_map_inited = FALSE; +boolean_t hibernate_rebuild_needed = FALSE; +unsigned int hibernate_teardown_last_valid_compact_indx = -1; +vm_page_t hibernate_rebuild_hash_list = NULL; + +unsigned int hibernate_teardown_found_tabled_pages = 0; +unsigned int hibernate_teardown_found_created_pages = 0; +unsigned int hibernate_teardown_found_free_pages = 0; +unsigned int hibernate_teardown_vm_page_free_count; + + +struct ppnum_mapping { + struct ppnum_mapping *ppnm_next; + ppnum_t ppnm_base_paddr; + unsigned int ppnm_sindx; + unsigned int ppnm_eindx; +}; + +struct ppnum_mapping *ppnm_head; +struct ppnum_mapping *ppnm_last_found = NULL; + + +void +hibernate_create_paddr_map() +{ + unsigned int i; + ppnum_t next_ppnum_in_run = 0; + struct ppnum_mapping *ppnm = NULL; + + if (hibernate_paddr_map_inited == FALSE) { + + for (i = 0; i < vm_pages_count; i++) { + + if (ppnm) + ppnm->ppnm_eindx = i; + + if (ppnm == NULL || vm_pages[i].phys_page != next_ppnum_in_run) { + + ppnm = kalloc(sizeof(struct ppnum_mapping)); + + ppnm->ppnm_next = ppnm_head; + ppnm_head = ppnm; + + ppnm->ppnm_sindx = i; + ppnm->ppnm_base_paddr = vm_pages[i].phys_page; + } + next_ppnum_in_run = vm_pages[i].phys_page + 1; + } + ppnm->ppnm_eindx++; + + hibernate_paddr_map_inited = TRUE; + } +} + +ppnum_t +hibernate_lookup_paddr(unsigned int indx) +{ + struct ppnum_mapping *ppnm = NULL; + + ppnm = ppnm_last_found; + + if (ppnm) { + if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) + goto done; + } + for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) { + + if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) { + ppnm_last_found = ppnm; + break; + } + } + if (ppnm == NULL) + panic("hibernate_lookup_paddr of %d failed\n", indx); +done: + return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx)); +} + + +uint32_t +hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired) +{ + addr64_t saddr_aligned; + addr64_t eaddr_aligned; + addr64_t addr; + ppnum_t paddr; + unsigned int mark_as_unneeded_pages = 0; + + saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64; + eaddr_aligned = eaddr & ~PAGE_MASK_64; + + for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) { + + paddr = pmap_find_phys(kernel_pmap, addr); + + assert(paddr); + + hibernate_page_bitset(page_list, TRUE, paddr); + hibernate_page_bitset(page_list_wired, TRUE, paddr); + + mark_as_unneeded_pages++; + } + return (mark_as_unneeded_pages); +} + + +void +hibernate_hash_insert_page(vm_page_t mem) +{ + vm_page_bucket_t *bucket; + int hash_id; + + assert(mem->tabled); + assert(mem->object); + assert(mem->offset != (vm_object_offset_t) -1); + + /* + * Insert it into the object_object/offset hash table + */ + hash_id = vm_page_hash(mem->object, mem->offset); + bucket = &vm_page_buckets[hash_id]; + + mem->next = bucket->pages; + bucket->pages = mem; +} + + +void +hibernate_free_range(int sindx, int eindx) +{ + vm_page_t mem; + unsigned int color; + + while (sindx < eindx) { + mem = &vm_pages[sindx]; + + vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE); + + mem->lopage = FALSE; + mem->free = TRUE; + + color = mem->phys_page & vm_color_mask; + queue_enter_first(&vm_page_queue_free[color], + mem, + vm_page_t, + pageq); + vm_page_free_count++; + + sindx++; + } +} + + +extern void hibernate_rebuild_pmap_structs(void); + +void +hibernate_rebuild_vm_structs(void) +{ + int cindx, sindx, eindx; + vm_page_t mem, tmem, mem_next; + AbsoluteTime startTime, endTime; + uint64_t nsec; + + if (hibernate_rebuild_needed == FALSE) + return; + + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START, 0, 0, 0, 0, 0); + HIBLOG("hibernate_rebuild started\n"); + + clock_get_uptime(&startTime); + + hibernate_rebuild_pmap_structs(); + + bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t)); + eindx = vm_pages_count; + + for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) { + + mem = &vm_pages[cindx]; + /* + * hibernate_teardown_vm_structs leaves the location where + * this vm_page_t must be located in "next". + */ + tmem = mem->next; + mem->next = NULL; + + sindx = (int)(tmem - &vm_pages[0]); + + if (mem != tmem) { + /* + * this vm_page_t was moved by hibernate_teardown_vm_structs, + * so move it back to its real location + */ + *tmem = *mem; + mem = tmem; + } + if (mem->tabled) + hibernate_hash_insert_page(mem); + /* + * the 'hole' between this vm_page_t and the previous + * vm_page_t we moved needs to be initialized as + * a range of free vm_page_t's + */ + hibernate_free_range(sindx + 1, eindx); + + eindx = sindx; + } + if (sindx) + hibernate_free_range(0, sindx); + + assert(vm_page_free_count == hibernate_teardown_vm_page_free_count); + + /* + * process the list of vm_page_t's that were tabled in the hash, + * but were not located in the vm_pages arrary... these are + * vm_page_t's that were created on the fly (i.e. fictitious) + */ + for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) { + mem_next = mem->next; + + mem->next = NULL; + hibernate_hash_insert_page(mem); + } + hibernate_rebuild_hash_list = NULL; + + clock_get_uptime(&endTime); + SUB_ABSOLUTETIME(&endTime, &startTime); + absolutetime_to_nanoseconds(endTime, &nsec); + + HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL); + + hibernate_rebuild_needed = FALSE; + + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0); +} + + +extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *); + +uint32_t +hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired) +{ + unsigned int i; + unsigned int compact_target_indx; + vm_page_t mem, mem_next; + vm_page_bucket_t *bucket; + unsigned int mark_as_unneeded_pages = 0; + unsigned int unneeded_vm_page_bucket_pages = 0; + unsigned int unneeded_vm_pages_pages = 0; + unsigned int unneeded_pmap_pages = 0; + addr64_t start_of_unneeded = 0; + addr64_t end_of_unneeded = 0; + + + if (hibernate_should_abort()) + return (0); + + HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n", + vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count, + vm_page_cleaned_count, compressor_object->resident_page_count); + + for (i = 0; i < vm_page_bucket_count; i++) { + + bucket = &vm_page_buckets[i]; + + for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem_next) { + + assert(mem->tabled); + + mem_next = mem->next; + + if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) { + mem->next = hibernate_rebuild_hash_list; + hibernate_rebuild_hash_list = mem; + } + } + } + unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired); + mark_as_unneeded_pages += unneeded_vm_page_bucket_pages; + + hibernate_teardown_vm_page_free_count = vm_page_free_count; + + compact_target_indx = 0; + + for (i = 0; i < vm_pages_count; i++) { + + mem = &vm_pages[i]; + + if (mem->free) { + unsigned int color; + + assert(mem->busy); + assert(!mem->lopage); + + color = mem->phys_page & vm_color_mask; + + queue_remove(&vm_page_queue_free[color], + mem, + vm_page_t, + pageq); + mem->pageq.next = NULL; + mem->pageq.prev = NULL; + + vm_page_free_count--; + + hibernate_teardown_found_free_pages++; + + if ( !vm_pages[compact_target_indx].free) + compact_target_indx = i; + } else { + /* + * record this vm_page_t's original location + * we need this even if it doesn't get moved + * as an indicator to the rebuild function that + * we don't have to move it + */ + mem->next = mem; + + if (vm_pages[compact_target_indx].free) { + /* + * we've got a hole to fill, so + * move this vm_page_t to it's new home + */ + vm_pages[compact_target_indx] = *mem; + mem->free = TRUE; + + hibernate_teardown_last_valid_compact_indx = compact_target_indx; + compact_target_indx++; + } else + hibernate_teardown_last_valid_compact_indx = i; + } + } + unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1], + (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired); + mark_as_unneeded_pages += unneeded_vm_pages_pages; + + hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded); + + if (start_of_unneeded) { + unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired); + mark_as_unneeded_pages += unneeded_pmap_pages; + } + HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages); + + hibernate_rebuild_needed = TRUE; + + return (mark_as_unneeded_pages); +} + + #endif /* HIBERNATION */ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ diff --git a/osfmk/vm/vm_shared_region.c b/osfmk/vm/vm_shared_region.c index 80ded84be..832502e10 100644 --- a/osfmk/vm/vm_shared_region.c +++ b/osfmk/vm/vm_shared_region.c @@ -124,7 +124,11 @@ int shared_region_persistence = 0; /* no by default */ /* delay before reclaiming an unused shared region */ int shared_region_destroy_delay = 120; /* in seconds */ -/* indicate if the shared region has been slid. Only one region can be slid */ +/* + * Only one cache gets to slide on Desktop, since we can't + * tear down slide info properly today and the desktop actually + * produces lots of shared caches. + */ boolean_t shared_region_completed_slide = FALSE; /* this lock protects all the shared region data structures */ @@ -279,6 +283,39 @@ vm_shared_region_mem_entry( return shared_region->sr_mem_entry; } +uint32_t +vm_shared_region_get_slide( + vm_shared_region_t shared_region) +{ + SHARED_REGION_TRACE_DEBUG( + ("shared_region: -> vm_shared_region_get_slide(%p)\n", + shared_region)); + assert(shared_region->sr_ref_count > 1); + SHARED_REGION_TRACE_DEBUG( + ("shared_region: vm_shared_region_get_slide(%p) <- %u\n", + shared_region, shared_region->sr_slide_info.slide)); + + /* 0 if we haven't slid */ + assert(shared_region->sr_slide_info.slide_object != NULL || + shared_region->sr_slide_info.slide == 0); + + return shared_region->sr_slide_info.slide; +} + +vm_shared_region_slide_info_t +vm_shared_region_get_slide_info( + vm_shared_region_t shared_region) +{ + SHARED_REGION_TRACE_DEBUG( + ("shared_region: -> vm_shared_region_get_slide_info(%p)\n", + shared_region)); + assert(shared_region->sr_ref_count > 1); + SHARED_REGION_TRACE_DEBUG( + ("shared_region: vm_shared_region_get_slide_info(%p) <- %p\n", + shared_region, &shared_region->sr_slide_info)); + return &shared_region->sr_slide_info; +} + /* * Set the shared region the process should use. * A NULL new shared region means that we just want to release the old @@ -460,6 +497,9 @@ vm_shared_region_deallocate( * can persist or not based on the "shared_region_persistence" * sysctl. * Make sure that this one complies. + * + * See comments in vm_shared_region_slide() for notes about + * shared regions we have slid (which are not torn down currently). */ if (shared_region_persistence && !shared_region->sr_persists) { @@ -484,6 +524,8 @@ vm_shared_region_deallocate( if (shared_region->sr_ref_count == 0) { uint64_t deadline; + assert(!shared_region->sr_slid); + if (shared_region->sr_timer_call == NULL) { /* hold one reference for the timer */ assert(! shared_region->sr_mapping_in_progress); @@ -509,6 +551,11 @@ vm_shared_region_deallocate( } else { /* timer expired: let go of this shared region */ + /* + * We can't properly handle teardown of a slid object today. + */ + assert(!shared_region->sr_slid); + /* * Remove it from the queue first, so no one can find * it... @@ -518,6 +565,7 @@ vm_shared_region_deallocate( vm_shared_region_t, sr_q); vm_shared_region_unlock(); + /* ... and destroy it */ vm_shared_region_destroy(shared_region); shared_region = NULL; @@ -556,6 +604,7 @@ vm_shared_region_create( vm_named_entry_t mem_entry; ipc_port_t mem_entry_port; vm_shared_region_t shared_region; + vm_shared_region_slide_info_t si; vm_map_t sub_map; mach_vm_offset_t base_address, pmap_nesting_start; mach_vm_size_t size, pmap_nesting_size; @@ -615,14 +664,6 @@ vm_shared_region_create( pmap_nesting_start = SHARED_REGION_NESTING_BASE_PPC; pmap_nesting_size = SHARED_REGION_NESTING_SIZE_PPC; break; -#ifdef CPU_TYPE_ARM - case CPU_TYPE_ARM: - base_address = SHARED_REGION_BASE_ARM; - size = SHARED_REGION_SIZE_ARM; - pmap_nesting_start = SHARED_REGION_NESTING_BASE_ARM; - pmap_nesting_size = SHARED_REGION_NESTING_SIZE_ARM; - break; -#endif /* CPU_TYPE_ARM */ default: SHARED_REGION_TRACE_ERROR( ("shared_region: create: unknown cpu type %d\n", @@ -630,7 +671,6 @@ vm_shared_region_create( kfree(shared_region, sizeof (*shared_region)); shared_region = NULL; goto done; - } } @@ -680,13 +720,24 @@ vm_shared_region_create( queue_init(&shared_region->sr_q); shared_region->sr_mapping_in_progress = FALSE; + shared_region->sr_slide_in_progress = FALSE; shared_region->sr_persists = FALSE; + shared_region->sr_slid = FALSE; shared_region->sr_timer_call = NULL; shared_region->sr_first_mapping = (mach_vm_offset_t) -1; /* grab a reference for the caller */ shared_region->sr_ref_count = 1; + /* And set up slide info */ + si = &shared_region->sr_slide_info; + si->start = 0; + si->end = 0; + si->slide = 0; + si->slide_object = NULL; + si->slide_info_size = 0; + si->slide_info_entry = NULL; + done: if (shared_region) { SHARED_REGION_TRACE_INFO( @@ -726,11 +777,13 @@ vm_shared_region_destroy( assert(shared_region->sr_ref_count == 0); assert(!shared_region->sr_persists); + assert(!shared_region->sr_slid); mem_entry = (vm_named_entry_t) shared_region->sr_mem_entry->ip_kobject; assert(mem_entry->is_sub_map); assert(!mem_entry->internal); assert(!mem_entry->is_pager); + assert(!mem_entry->is_copy); map = mem_entry->backing.map; /* @@ -763,20 +816,19 @@ vm_shared_region_destroy( thread_call_free(shared_region->sr_timer_call); } - if ((slide_info.slide_info_entry != NULL) && (slide_info.sr == shared_region)) { +#if 0 + /* + * If slid, free those resources. We'll want this eventually, + * but can't handle it properly today. + */ + si = &shared_region->sr_slide_info; + if (si->slide_info_entry) { kmem_free(kernel_map, - (vm_offset_t) slide_info.slide_info_entry, - (vm_size_t) slide_info.slide_info_size); - vm_object_deallocate(slide_info.slide_object); - slide_info.slide_object = NULL; - slide_info.start = 0; - slide_info.end = 0; - slide_info.slide = 0; - slide_info.sr = NULL; - slide_info.slide_info_entry = NULL; - slide_info.slide_info_size = 0; - shared_region_completed_slide = FALSE; + (vm_offset_t) si->slide_info_entry, + (vm_size_t) si->slide_info_size); + vm_object_deallocate(si->slide_object); } +#endif /* release the shared region structure... */ kfree(shared_region, sizeof (*shared_region)); @@ -1081,7 +1133,8 @@ vm_shared_region_map_file( * We have to create the VM object now, so that it * can be mapped "copy-on-write". */ - obj_size = vm_map_round_page(mappings[i].sfm_size); + obj_size = vm_map_round_page(mappings[i].sfm_size, + VM_MAP_PAGE_MASK(sr_map)); object = vm_object_allocate(obj_size); if (object == VM_OBJECT_NULL) { kr = KERN_RESOURCE_SHORTAGE; @@ -1089,7 +1142,8 @@ vm_shared_region_map_file( kr = vm_map_enter( sr_map, &target_address, - vm_map_round_page(mappings[i].sfm_size), + vm_map_round_page(mappings[i].sfm_size, + VM_MAP_PAGE_MASK(sr_map)), 0, VM_FLAGS_FIXED | VM_FLAGS_ALREADY, object, @@ -1104,7 +1158,8 @@ vm_shared_region_map_file( kr = vm_map_enter_mem_object( sr_map, &target_address, - vm_map_round_page(mappings[i].sfm_size), + vm_map_round_page(mappings[i].sfm_size, + VM_MAP_PAGE_MASK(sr_map)), 0, VM_FLAGS_FIXED | VM_FLAGS_ALREADY, map_port, @@ -1378,15 +1433,21 @@ done: struct vm_shared_region_slide_info slide_info; kern_return_t -vm_shared_region_sliding_valid(uint32_t slide) { - +vm_shared_region_sliding_valid(uint32_t slide) +{ kern_return_t kr = KERN_SUCCESS; + vm_shared_region_t sr = vm_shared_region_get(current_task()); - if ((shared_region_completed_slide == TRUE) && slide) { - if (slide != slide_info.slide) { + /* No region yet? we're fine. */ + if (sr == NULL) { + return kr; + } + + if ((sr->sr_slid == TRUE) && slide) { + if (slide != vm_shared_region_get_slide_info(sr)->slide) { printf("Only one shared region can be slid\n"); kr = KERN_FAILURE; - } else if (slide == slide_info.slide) { + } else { /* * Request for sliding when we've * already done it with exactly the @@ -1398,11 +1459,13 @@ vm_shared_region_sliding_valid(uint32_t slide) { kr = KERN_INVALID_ARGUMENT; } } + vm_shared_region_deallocate(sr); return kr; } kern_return_t vm_shared_region_slide_init( + vm_shared_region_t sr, mach_vm_size_t slide_info_size, mach_vm_offset_t start, mach_vm_size_t size, @@ -1412,21 +1475,16 @@ vm_shared_region_slide_init( kern_return_t kr = KERN_SUCCESS; vm_object_t object = VM_OBJECT_NULL; vm_object_offset_t offset = 0; + vm_shared_region_slide_info_t si = vm_shared_region_get_slide_info(sr); + vm_offset_t slide_info_entry; - vm_map_t map =NULL, cur_map = NULL; + vm_map_t map = NULL, cur_map = NULL; boolean_t is_map_locked = FALSE; - if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) { - if (kr == KERN_INVALID_ARGUMENT) { - /* - * This will happen if we request sliding again - * with the same slide value that was used earlier - * for the very first sliding. - */ - kr = KERN_SUCCESS; - } - return kr; - } + assert(sr->sr_slide_in_progress); + assert(!sr->sr_slid); + assert(si->slide_object == NULL); + assert(si->slide_info_entry == NULL); if (slide_info_size > SANE_SLIDE_INFO_SIZE) { printf("Slide_info_size too large: %lx\n", (uintptr_t)slide_info_size); @@ -1434,14 +1492,20 @@ vm_shared_region_slide_init( return kr; } + kr = kmem_alloc(kernel_map, + (vm_offset_t *) &slide_info_entry, + (vm_size_t) slide_info_size); + if (kr != KERN_SUCCESS) { + return kr; + } + if (sr_file_control != MEMORY_OBJECT_CONTROL_NULL) { object = memory_object_control_to_vm_object(sr_file_control); vm_object_reference(object); offset = start; - vm_object_lock_shared(object); - + vm_object_lock(object); } else { /* * Remove this entire "else" block and all "map" references @@ -1471,10 +1535,10 @@ vm_shared_region_slide_init( offset = (start - entry->vme_start) + entry->offset; } - vm_object_lock_shared(object); + vm_object_lock(object); while (object->shadow != VM_OBJECT_NULL) { shadow_obj = object->shadow; - vm_object_lock_shared(shadow_obj); + vm_object_lock(shadow_obj); vm_object_unlock(object); object = shadow_obj; } @@ -1483,68 +1547,72 @@ vm_shared_region_slide_init( if (object->internal == TRUE) { kr = KERN_INVALID_ADDRESS; + } else if (object->object_slid) { + /* Can only be slid once */ + printf("%s: found vm_object %p already slid?\n", __FUNCTION__, object); + kr = KERN_FAILURE; } else { - kr = kmem_alloc(kernel_map, - (vm_offset_t *) &slide_info.slide_info_entry, - (vm_size_t) slide_info_size); - if (kr == KERN_SUCCESS) { - slide_info.slide_info_size = slide_info_size; - slide_info.slide_object = object; - slide_info.start = offset; - slide_info.end = slide_info.start + size; - slide_info.slide = slide; - slide_info.sr = vm_shared_region_get(current_task()); - /* - * We want to keep the above reference on the shared region - * because we have a pointer to it in the slide_info. - * - * If we want to have this region get deallocated/freed - * then we will have to make sure that we msync(..MS_INVALIDATE..) - * the pages associated with this shared region. Those pages would - * have been slid with an older slide value. - * - * vm_shared_region_deallocate(slide_info.sr); - */ - shared_region_completed_slide = TRUE; - } else { - kr = KERN_FAILURE; - } + + si->slide_info_entry = (vm_shared_region_slide_info_entry_t)slide_info_entry; + si->slide_info_size = slide_info_size; + si->slide_object = object; + si->start = offset; + si->end = si->start + size; + si->slide = slide; + + /* + * If we want to have this region get deallocated/freed + * then we will have to make sure that we msync(..MS_INVALIDATE..) + * the pages associated with this shared region. Those pages would + * have been slid with an older slide value. + */ + + /* + * Pointers in object are held without references; they + * are disconnected at the time that we destroy the + * shared region, and since the shared region holds + * a reference on the object, no references in the other + * direction are required. + */ + object->object_slid = TRUE; + object->vo_slide_info = si; } - vm_object_unlock(object); + vm_object_unlock(object); if (is_map_locked == TRUE) { vm_map_unlock_read(map); } - return kr; -} -void* -vm_shared_region_get_slide_info(void) { - return (void*)&slide_info; + if (kr != KERN_SUCCESS) { + kmem_free(kernel_map, slide_info_entry, slide_info_size); + } + return kr; } void* -vm_shared_region_get_slide_info_entry(void) { - return (void*)slide_info.slide_info_entry; +vm_shared_region_get_slide_info_entry(vm_shared_region_t sr) { + return (void*)sr->sr_slide_info.slide_info_entry; } kern_return_t -vm_shared_region_slide_sanity_check(void) +vm_shared_region_slide_sanity_check(vm_shared_region_t sr) { uint32_t pageIndex=0; uint16_t entryIndex=0; uint16_t *toc = NULL; + vm_shared_region_slide_info_t si; vm_shared_region_slide_info_entry_t s_info; kern_return_t kr; - s_info = vm_shared_region_get_slide_info_entry(); + si = vm_shared_region_get_slide_info(sr); + s_info = si->slide_info_entry; toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset); kr = mach_vm_protect(kernel_map, - (mach_vm_offset_t)(vm_offset_t) slide_info.slide_info_entry, - (mach_vm_size_t) slide_info.slide_info_size, - VM_PROT_READ, TRUE); + (mach_vm_offset_t)(vm_offset_t)s_info, + (mach_vm_size_t) si->slide_info_size, + TRUE, VM_PROT_READ); if (kr != KERN_SUCCESS) { panic("vm_shared_region_slide_sanity_check: vm_protect() error 0x%x\n", kr); } @@ -1561,33 +1629,38 @@ vm_shared_region_slide_sanity_check(void) } return KERN_SUCCESS; fail: - if (slide_info.slide_info_entry != NULL) { + if (si->slide_info_entry != NULL) { kmem_free(kernel_map, - (vm_offset_t) slide_info.slide_info_entry, - (vm_size_t) slide_info.slide_info_size); - vm_object_deallocate(slide_info.slide_object); - slide_info.slide_object = NULL; - slide_info.start = 0; - slide_info.end = 0; - slide_info.slide = 0; - slide_info.slide_info_entry = NULL; - slide_info.slide_info_size = 0; - shared_region_completed_slide = FALSE; + (vm_offset_t) si->slide_info_entry, + (vm_size_t) si->slide_info_size); + + vm_object_lock(si->slide_object); + si->slide_object->object_slid = FALSE; + si->slide_object->vo_slide_info = NULL; + vm_object_unlock(si->slide_object); + + vm_object_deallocate(si->slide_object); + si->slide_object = NULL; + si->start = 0; + si->end = 0; + si->slide = 0; + si->slide_info_entry = NULL; + si->slide_info_size = 0; } return KERN_FAILURE; } kern_return_t -vm_shared_region_slide(vm_offset_t vaddr, uint32_t pageIndex) +vm_shared_region_slide_page(vm_shared_region_slide_info_t si, vm_offset_t vaddr, uint32_t pageIndex) { uint16_t *toc = NULL; slide_info_entry_toc_t bitmap = NULL; uint32_t i=0, j=0; uint8_t b = 0; - uint32_t slide = slide_info.slide; + uint32_t slide = si->slide; int is_64 = task_has_64BitAddr(current_task()); - vm_shared_region_slide_info_entry_t s_info = vm_shared_region_get_slide_info_entry(); + vm_shared_region_slide_info_entry_t s_info = si->slide_info_entry; toc = (uint16_t*)((uintptr_t)s_info + s_info->toc_offset); if (pageIndex >= s_info->toc_count) { @@ -1889,6 +1962,115 @@ vm_commpage_enter( return kr; } +int +vm_shared_region_slide(uint32_t slide, + mach_vm_offset_t entry_start_address, + mach_vm_size_t entry_size, + mach_vm_offset_t slide_start, + mach_vm_size_t slide_size, + memory_object_control_t sr_file_control) +{ + void *slide_info_entry = NULL; + int error; + vm_shared_region_t sr; + + SHARED_REGION_TRACE_DEBUG( + ("vm_shared_region_slide: -> slide %#x, entry_start %#llx, entry_size %#llx, slide_start %#llx, slide_size %#llx\n", + slide, entry_start_address, entry_size, slide_start, slide_size)); + + sr = vm_shared_region_get(current_task()); + if (sr == NULL) { + printf("%s: no shared region?\n", __FUNCTION__); + SHARED_REGION_TRACE_DEBUG( + ("vm_shared_region_slide: <- %d (no shared region)\n", + KERN_FAILURE)); + return KERN_FAILURE; + } + + /* + * Protect from concurrent access. + */ + vm_shared_region_lock(); + while(sr->sr_slide_in_progress) { + vm_shared_region_sleep(&sr->sr_slide_in_progress, THREAD_UNINT); + } + if (sr->sr_slid + || shared_region_completed_slide + ) { + vm_shared_region_unlock(); + + vm_shared_region_deallocate(sr); + printf("%s: shared region already slid?\n", __FUNCTION__); + SHARED_REGION_TRACE_DEBUG( + ("vm_shared_region_slide: <- %d (already slid)\n", + KERN_FAILURE)); + return KERN_FAILURE; + } + + sr->sr_slide_in_progress = TRUE; + vm_shared_region_unlock(); + + if((error = vm_shared_region_slide_init(sr, slide_size, entry_start_address, entry_size, slide, sr_file_control))) { + printf("slide_info initialization failed with kr=%d\n", error); + goto done; + } + + slide_info_entry = vm_shared_region_get_slide_info_entry(sr); + if (slide_info_entry == NULL){ + error = KERN_FAILURE; + } else { + error = copyin((user_addr_t)slide_start, + slide_info_entry, + (vm_size_t)slide_size); + if (error) { + error = KERN_INVALID_ADDRESS; + } + } + if (error) { + goto done; + } + + if (vm_shared_region_slide_sanity_check(sr) != KERN_SUCCESS) { + error = KERN_INVALID_ARGUMENT; + printf("Sanity Check failed for slide_info\n"); + } else { +#if DEBUG + printf("Succesfully init slide_info with start_address: %p region_size: %ld slide_header_size: %ld\n", + (void*)(uintptr_t)entry_start_address, + (unsigned long)entry_size, + (unsigned long)slide_size); +#endif + } +done: + vm_shared_region_lock(); + + assert(sr->sr_slide_in_progress); + assert(sr->sr_slid == FALSE); + sr->sr_slide_in_progress = FALSE; + thread_wakeup(&sr->sr_slide_in_progress); + + if (error == KERN_SUCCESS) { + sr->sr_slid = TRUE; + + /* + * We don't know how to tear down a slid shared region today, because + * we would have to invalidate all the pages that have been slid + * atomically with respect to anyone mapping the shared region afresh. + * Therefore, take a dangling reference to prevent teardown. + */ + sr->sr_ref_count++; + shared_region_completed_slide = TRUE; + } + vm_shared_region_unlock(); + + vm_shared_region_deallocate(sr); + + SHARED_REGION_TRACE_DEBUG( + ("vm_shared_region_slide: <- %d\n", + error)); + + return error; +} /* * This is called from powermanagement code to let kernel know the current source of power. @@ -1923,3 +2105,4 @@ post_sys_powersource_internal(int i, int internal) } } #endif /* __i386__ || __x86_64__ */ + diff --git a/osfmk/vm/vm_shared_region.h b/osfmk/vm/vm_shared_region.h index cec44658f..cd2caf182 100644 --- a/osfmk/vm/vm_shared_region.h +++ b/osfmk/vm/vm_shared_region.h @@ -43,7 +43,6 @@ extern int shared_region_version; extern int shared_region_persistence; -extern boolean_t shared_region_completed_slide; #if DEBUG extern int shared_region_debug; @@ -93,24 +92,6 @@ typedef struct vm_shared_region *vm_shared_region_t; #include #include -/* address space shared region descriptor */ -struct vm_shared_region { - uint32_t sr_ref_count; - queue_chain_t sr_q; - void *sr_root_dir; - cpu_type_t sr_cpu_type; - boolean_t sr_64bit; - boolean_t sr_mapping_in_progress; - boolean_t sr_persists; - ipc_port_t sr_mem_entry; - mach_vm_offset_t sr_first_mapping; - mach_vm_offset_t sr_base_address; - mach_vm_size_t sr_size; - mach_vm_offset_t sr_pmap_nesting_start; - mach_vm_size_t sr_pmap_nesting_size; - thread_call_t sr_timer_call; -}; - typedef struct vm_shared_region_slide_info_entry *vm_shared_region_slide_info_entry_t; struct vm_shared_region_slide_info_entry { uint32_t version; @@ -127,7 +108,7 @@ struct slide_info_entry_toc { uint8_t entry[NUM_SLIDING_BITMAPS_PER_PAGE]; }; -typedef struct vm_shared_region_slide_info vm_shared_region_slide_info_t; +typedef struct vm_shared_region_slide_info *vm_shared_region_slide_info_t; struct vm_shared_region_slide_info { mach_vm_offset_t start; mach_vm_offset_t end; @@ -135,11 +116,33 @@ struct vm_shared_region_slide_info { vm_object_t slide_object; mach_vm_size_t slide_info_size; vm_shared_region_slide_info_entry_t slide_info_entry; - vm_shared_region_t sr; }; -extern struct vm_shared_region_slide_info slide_info; +/* address space shared region descriptor */ +struct vm_shared_region { + uint32_t sr_ref_count; + queue_chain_t sr_q; + void *sr_root_dir; + cpu_type_t sr_cpu_type; + boolean_t sr_64bit; + boolean_t sr_mapping_in_progress; + boolean_t sr_slide_in_progress; + boolean_t sr_persists; + boolean_t sr_slid; + ipc_port_t sr_mem_entry; + mach_vm_offset_t sr_first_mapping; + mach_vm_offset_t sr_base_address; + mach_vm_size_t sr_size; + mach_vm_offset_t sr_pmap_nesting_start; + mach_vm_size_t sr_pmap_nesting_size; + thread_call_t sr_timer_call; + struct vm_shared_region_slide_info sr_slide_info; +}; +extern kern_return_t vm_shared_region_slide_page(vm_shared_region_slide_info_t si, + vm_offset_t vaddr, + uint32_t pageIndex); +extern vm_shared_region_slide_info_t vm_shared_region_get_slide_info(vm_shared_region_t sr); #else /* !MACH_KERNEL_PRIVATE */ struct vm_shared_region; @@ -168,6 +171,8 @@ extern mach_vm_size_t vm_shared_region_size( struct vm_shared_region *shared_region); extern ipc_port_t vm_shared_region_mem_entry( struct vm_shared_region *shared_region); +extern uint32_t vm_shared_region_get_slide( + vm_shared_region_t shared_region); extern void vm_shared_region_set( struct task *task, struct vm_shared_region *new_shared_region); @@ -192,17 +197,14 @@ extern kern_return_t vm_shared_region_map_file( void *root_dir, struct shared_file_mapping_np *mapping_to_slide); extern kern_return_t vm_shared_region_sliding_valid(uint32_t slide); -extern kern_return_t vm_shared_region_slide_sanity_check(void); -extern kern_return_t vm_shared_region_slide_init(mach_vm_size_t slide_info_size, +extern kern_return_t vm_shared_region_slide_sanity_check(vm_shared_region_t sr); +extern kern_return_t vm_shared_region_slide_init(vm_shared_region_t sr, + mach_vm_size_t slide_info_size, mach_vm_offset_t start, mach_vm_size_t size, uint32_t slide, memory_object_control_t); -extern void* vm_shared_region_get_slide_info(void); -extern void* vm_shared_region_get_slide_info_entry(void); -extern kern_return_t vm_shared_region_slide( - vm_offset_t vaddr, - uint32_t pageIndex); +extern void* vm_shared_region_get_slide_info_entry(vm_shared_region_t sr); extern void vm_commpage_init(void); extern void vm_commpage_text_init(void); extern kern_return_t vm_commpage_enter( @@ -211,6 +213,12 @@ extern kern_return_t vm_commpage_enter( extern kern_return_t vm_commpage_remove( struct _vm_map *map, struct task *task); +int vm_shared_region_slide(uint32_t, + mach_vm_offset_t, + mach_vm_size_t, + mach_vm_offset_t, + mach_vm_size_t, + memory_object_control_t); #endif /* KERNEL_PRIVATE */ diff --git a/osfmk/vm/vm_user.c b/osfmk/vm/vm_user.c index 05b51b4b2..5c66335d7 100644 --- a/osfmk/vm/vm_user.c +++ b/osfmk/vm/vm_user.c @@ -164,10 +164,12 @@ mach_vm_allocate( */ map_addr = vm_map_min(map); if (map_addr == 0) - map_addr += PAGE_SIZE; + map_addr += VM_MAP_PAGE_SIZE(map); } else - map_addr = vm_map_trunc_page(*addr); - map_size = vm_map_round_page(size); + map_addr = vm_map_trunc_page(*addr, + VM_MAP_PAGE_MASK(map)); + map_size = vm_map_round_page(size, + VM_MAP_PAGE_MASK(map)); if (map_size == 0) { return(KERN_INVALID_ARGUMENT); } @@ -231,10 +233,12 @@ vm_allocate( */ map_addr = vm_map_min(map); if (map_addr == 0) - map_addr += PAGE_SIZE; + map_addr += VM_MAP_PAGE_SIZE(map); } else - map_addr = vm_map_trunc_page(*addr); - map_size = vm_map_round_page(size); + map_addr = vm_map_trunc_page(*addr, + VM_MAP_PAGE_MASK(map)); + map_size = vm_map_round_page(size, + VM_MAP_PAGE_MASK(map)); if (map_size == 0) { return(KERN_INVALID_ARGUMENT); } @@ -273,8 +277,12 @@ mach_vm_deallocate( if (size == (mach_vm_offset_t) 0) return(KERN_SUCCESS); - return(vm_map_remove(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), VM_MAP_NO_FLAGS)); + return(vm_map_remove(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + VM_MAP_NO_FLAGS)); } /* @@ -295,8 +303,12 @@ vm_deallocate( if (size == (vm_offset_t) 0) return(KERN_SUCCESS); - return(vm_map_remove(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), VM_MAP_NO_FLAGS)); + return(vm_map_remove(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + VM_MAP_NO_FLAGS)); } /* @@ -319,8 +331,10 @@ mach_vm_inherit( return KERN_SUCCESS; return(vm_map_inherit(map, - vm_map_trunc_page(start), - vm_map_round_page(start+size), + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), new_inheritance)); } @@ -344,8 +358,10 @@ vm_inherit( return KERN_SUCCESS; return(vm_map_inherit(map, - vm_map_trunc_page(start), - vm_map_round_page(start+size), + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), new_inheritance)); } @@ -371,8 +387,10 @@ mach_vm_protect( return KERN_SUCCESS; return(vm_map_protect(map, - vm_map_trunc_page(start), - vm_map_round_page(start+size), + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), new_protection, set_maximum)); } @@ -400,8 +418,10 @@ vm_protect( return KERN_SUCCESS; return(vm_map_protect(map, - vm_map_trunc_page(start), - vm_map_round_page(start+size), + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), new_protection, set_maximum)); } @@ -425,11 +445,14 @@ mach_vm_machine_attribute( if (size == 0) return KERN_SUCCESS; - return vm_map_machine_attribute(map, - vm_map_trunc_page(addr), - vm_map_round_page(addr+size), - attribute, - value); + return vm_map_machine_attribute( + map, + vm_map_trunc_page(addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(addr+size, + VM_MAP_PAGE_MASK(map)), + attribute, + value); } /* @@ -452,11 +475,14 @@ vm_machine_attribute( if (size == 0) return KERN_SUCCESS; - return vm_map_machine_attribute(map, - vm_map_trunc_page(addr), - vm_map_round_page(addr+size), - attribute, - value); + return vm_map_machine_attribute( + map, + vm_map_trunc_page(addr, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(addr+size, + VM_MAP_PAGE_MASK(map)), + attribute, + value); } /* @@ -1107,11 +1133,20 @@ mach_vm_wire( return KERN_INVALID_ARGUMENT; if (access != VM_PROT_NONE) { - rc = vm_map_wire(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), access, TRUE); + rc = vm_map_wire(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + access, + TRUE); } else { - rc = vm_map_unwire(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), TRUE); + rc = vm_map_unwire(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + TRUE); } return rc; } @@ -1148,11 +1183,20 @@ vm_wire( if (size == 0) { rc = KERN_SUCCESS; } else if (access != VM_PROT_NONE) { - rc = vm_map_wire(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), access, TRUE); + rc = vm_map_wire(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + access, + TRUE); } else { - rc = vm_map_unwire(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), TRUE); + rc = vm_map_unwire(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + TRUE); } return rc; } @@ -1301,8 +1345,12 @@ mach_vm_behavior_set( if (size == 0) return KERN_SUCCESS; - return(vm_map_behavior_set(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), new_behavior)); + return(vm_map_behavior_set(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + new_behavior)); } /* @@ -1331,8 +1379,12 @@ vm_behavior_set( if (size == 0) return KERN_SUCCESS; - return(vm_map_behavior_set(map, vm_map_trunc_page(start), - vm_map_round_page(start+size), new_behavior)); + return(vm_map_behavior_set(map, + vm_map_trunc_page(start, + VM_MAP_PAGE_MASK(map)), + vm_map_round_page(start+size, + VM_MAP_PAGE_MASK(map)), + new_behavior)); } /* @@ -1609,7 +1661,7 @@ mach_vm_purgable_control( return KERN_INVALID_ARGUMENT; return vm_map_purgable_control(map, - vm_map_trunc_page(address), + vm_map_trunc_page(address, PAGE_MASK), control, state); } @@ -1625,7 +1677,7 @@ vm_purgable_control( return KERN_INVALID_ARGUMENT; return vm_map_purgable_control(map, - vm_map_trunc_page(address), + vm_map_trunc_page(address, PAGE_MASK), control, state); } @@ -1690,9 +1742,10 @@ mach_vm_page_query( if (VM_MAP_NULL == map) return KERN_INVALID_ARGUMENT; - return vm_map_page_query_internal(map, - vm_map_trunc_page(offset), - disposition, ref_count); + return vm_map_page_query_internal( + map, + vm_map_trunc_page(offset, PAGE_MASK), + disposition, ref_count); } kern_return_t @@ -1705,9 +1758,10 @@ vm_map_page_query( if (VM_MAP_NULL == map) return KERN_INVALID_ARGUMENT; - return vm_map_page_query_internal(map, - vm_map_trunc_page(offset), - disposition, ref_count); + return vm_map_page_query_internal( + map, + vm_map_trunc_page(offset, PAGE_MASK), + disposition, ref_count); } kern_return_t @@ -1836,18 +1890,28 @@ mach_make_memory_entry_64( vm_map_offset_t local_offset; vm_object_size_t mappable_size; + /* + * Stash the offset in the page for use by vm_map_enter_mem_object() + * in the VM_FLAGS_RETURN_DATA_ADDR/MAP_MEM_USE_DATA_ADDR case. + */ + vm_object_offset_t offset_in_page; + unsigned int access; vm_prot_t protections; vm_prot_t original_protections, mask_protections; unsigned int wimg_mode; boolean_t force_shadow = FALSE; + boolean_t use_data_addr; if (((permission & 0x00FF0000) & ~(MAP_MEM_ONLY | MAP_MEM_NAMED_CREATE | MAP_MEM_PURGABLE | - MAP_MEM_NAMED_REUSE))) { + MAP_MEM_NAMED_REUSE | + MAP_MEM_USE_DATA_ADDR | + MAP_MEM_VM_COPY | + MAP_MEM_VM_SHARE))) { /* * Unknown flag: reject for forward compatibility. */ @@ -1861,25 +1925,32 @@ mach_make_memory_entry_64( parent_entry = NULL; } + if (parent_entry && parent_entry->is_copy) { + return KERN_INVALID_ARGUMENT; + } + original_protections = permission & VM_PROT_ALL; protections = original_protections; mask_protections = permission & VM_PROT_IS_MASK; access = GET_MAP_MEM(permission); + use_data_addr = ((permission & MAP_MEM_USE_DATA_ADDR) != 0); user_handle = IP_NULL; user_entry = NULL; - map_offset = vm_map_trunc_page(offset); - map_size = vm_map_round_page(*size); + map_offset = vm_map_trunc_page(offset, PAGE_MASK); if (permission & MAP_MEM_ONLY) { boolean_t parent_is_object; - if (parent_entry == NULL) { + map_size = vm_map_round_page(*size, PAGE_MASK); + + if (use_data_addr || parent_entry == NULL) { return KERN_INVALID_ARGUMENT; } - parent_is_object = !(parent_entry->is_sub_map || parent_entry->is_pager); + parent_is_object = !(parent_entry->is_sub_map || + parent_entry->is_pager); object = parent_entry->backing.object; if(parent_is_object && object != VM_OBJECT_NULL) wimg_mode = object->wimg_bits; @@ -1918,9 +1989,13 @@ mach_make_memory_entry_64( if (object_handle) *object_handle = IP_NULL; return KERN_SUCCESS; - } + } else if (permission & MAP_MEM_NAMED_CREATE) { + map_size = vm_map_round_page(*size, PAGE_MASK); + + if (use_data_addr) { + return KERN_INVALID_ARGUMENT; + } - if(permission & MAP_MEM_NAMED_CREATE) { kr = mach_memory_entry_allocate(&user_entry, &user_handle); if (kr != KERN_SUCCESS) { return KERN_FAILURE; @@ -1990,6 +2065,7 @@ mach_make_memory_entry_64( user_entry->is_sub_map = FALSE; user_entry->is_pager = FALSE; user_entry->offset = 0; + user_entry->data_offset = 0; user_entry->protection = protections; SET_MAP_MEM(access, user_entry->protection); user_entry->size = map_size; @@ -2002,9 +2078,135 @@ mach_make_memory_entry_64( return KERN_SUCCESS; } + if (permission & MAP_MEM_VM_COPY) { + vm_map_copy_t copy; + + if (target_map == VM_MAP_NULL) { + return KERN_INVALID_TASK; + } + + if (use_data_addr) { + map_size = (vm_map_round_page(offset + *size, + PAGE_MASK) - + map_offset); + offset_in_page = offset - map_offset; + } else { + map_size = vm_map_round_page(*size, PAGE_MASK); + offset_in_page = 0; + } + + kr = vm_map_copyin(target_map, + map_offset, + map_size, + FALSE, + ©); + if (kr != KERN_SUCCESS) { + return kr; + } + + kr = mach_memory_entry_allocate(&user_entry, &user_handle); + if (kr != KERN_SUCCESS) { + vm_map_copy_discard(copy); + return KERN_FAILURE; + } + + user_entry->backing.copy = copy; + user_entry->internal = FALSE; + user_entry->is_sub_map = FALSE; + user_entry->is_pager = FALSE; + user_entry->is_copy = TRUE; + user_entry->offset = 0; + user_entry->protection = protections; + user_entry->size = map_size; + user_entry->data_offset = offset_in_page; + + *size = CAST_DOWN(vm_size_t, map_size); + *object_handle = user_handle; + return KERN_SUCCESS; + } + + if (permission & MAP_MEM_VM_SHARE) { + vm_map_copy_t copy; + vm_prot_t cur_prot, max_prot; + + if (target_map == VM_MAP_NULL) { + return KERN_INVALID_TASK; + } + + if (use_data_addr) { + map_size = (vm_map_round_page(offset + *size, + PAGE_MASK) - + map_offset); + offset_in_page = offset - map_offset; + } else { + map_size = vm_map_round_page(*size, PAGE_MASK); + offset_in_page = 0; + } + + kr = vm_map_copy_extract(target_map, + map_offset, + map_size, + ©, + &cur_prot, + &max_prot); + if (kr != KERN_SUCCESS) { + return kr; + } + + if (mask_protections) { + /* + * We just want as much of "original_protections" + * as we can get out of the actual "cur_prot". + */ + protections &= cur_prot; + if (protections == VM_PROT_NONE) { + /* no access at all: fail */ + vm_map_copy_discard(copy); + return KERN_PROTECTION_FAILURE; + } + } else { + /* + * We want exactly "original_protections" + * out of "cur_prot". + */ + if ((cur_prot & protections) != protections) { + vm_map_copy_discard(copy); + return KERN_PROTECTION_FAILURE; + } + } + + kr = mach_memory_entry_allocate(&user_entry, &user_handle); + if (kr != KERN_SUCCESS) { + vm_map_copy_discard(copy); + return KERN_FAILURE; + } + + user_entry->backing.copy = copy; + user_entry->internal = FALSE; + user_entry->is_sub_map = FALSE; + user_entry->is_pager = FALSE; + user_entry->is_copy = TRUE; + user_entry->offset = 0; + user_entry->protection = protections; + user_entry->size = map_size; + user_entry->data_offset = offset_in_page; + + *size = CAST_DOWN(vm_size_t, map_size); + *object_handle = user_handle; + return KERN_SUCCESS; + } + if (parent_entry == NULL || (permission & MAP_MEM_NAMED_REUSE)) { + if (use_data_addr) { + map_size = vm_map_round_page(offset + *size, PAGE_MASK) - map_offset; + offset_in_page = offset - map_offset; + } else { + map_size = vm_map_round_page(*size, PAGE_MASK); + offset_in_page = 0; + } + /* Create a named object based on address range within the task map */ /* Go find the object at given address */ @@ -2192,7 +2394,6 @@ redo_lookup: } } -#if !CONFIG_EMBEDDED if (vm_map_entry_should_cow_for_true_share(map_entry) && object->vo_size > map_size && map_size != 0) { @@ -2209,8 +2410,15 @@ redo_lookup: goto redo_lookup; } - vm_map_clip_start(target_map, map_entry, vm_map_trunc_page(offset)); - vm_map_clip_end(target_map, map_entry, vm_map_round_page(offset) + map_size); + vm_map_clip_start(target_map, + map_entry, + vm_map_trunc_page(offset, + VM_MAP_PAGE_MASK(target_map))); + vm_map_clip_end(target_map, + map_entry, + (vm_map_round_page(offset, + VM_MAP_PAGE_MASK(target_map)) + + map_size)); force_shadow = TRUE; map_size = map_entry->vme_end - map_entry->vme_start; @@ -2219,9 +2427,8 @@ redo_lookup: vm_map_lock_write_to_read(target_map); vm_object_lock(object); } -#endif /* !CONFIG_EMBEDDED */ - if(object->internal) { + if (object->internal) { /* vm_map_lookup_locked will create a shadow if */ /* needs_copy is set but does not check for the */ /* other two conditions shown. It is important to */ @@ -2231,8 +2438,12 @@ redo_lookup: if (force_shadow || ((map_entry->needs_copy || object->shadowed || - (object->vo_size > total_size)) && - !object->true_share)) { + (object->vo_size > total_size && + (map_entry->offset != 0 || + object->vo_size > + vm_map_round_page(total_size, + VM_MAP_PAGE_MASK(target_map))))) + && !object->true_share)) { /* * We have to unlock the VM object before * trying to upgrade the VM map lock, to @@ -2396,7 +2607,9 @@ redo_lookup: parent_entry->is_pager == FALSE && parent_entry->offset == obj_off && parent_entry->protection == protections && - parent_entry->size == map_size) { + parent_entry->size == map_size && + ((!use_data_addr && (parent_entry->data_offset == 0)) || + (use_data_addr && (parent_entry->data_offset == offset_in_page)))) { /* * We have a match: re-use "parent_entry". */ @@ -2429,6 +2642,7 @@ redo_lookup: user_entry->is_sub_map = FALSE; user_entry->is_pager = FALSE; user_entry->offset = obj_off; + user_entry->data_offset = offset_in_page; user_entry->protection = protections; SET_MAP_MEM(GET_MAP_MEM(permission), user_entry->protection); user_entry->size = map_size; @@ -2442,14 +2656,39 @@ redo_lookup: } else { /* The new object will be base on an existing named object */ - if (parent_entry == NULL) { kr = KERN_INVALID_ARGUMENT; goto make_mem_done; } - if((offset + map_size) > parent_entry->size) { - kr = KERN_INVALID_ARGUMENT; - goto make_mem_done; + + if (use_data_addr) { + /* + * submaps and pagers should only be accessible from within + * the kernel, which shouldn't use the data address flag, so can fail here. + */ + if (parent_entry->is_pager || parent_entry->is_sub_map) { + panic("Shouldn't be using data address with a parent entry that is a submap or pager."); + } + /* + * Account for offset to data in parent entry and + * compute our own offset to data. + */ + if((offset + *size + parent_entry->data_offset) > parent_entry->size) { + kr = KERN_INVALID_ARGUMENT; + goto make_mem_done; + } + + map_offset = vm_map_trunc_page(offset + parent_entry->data_offset, PAGE_MASK); + offset_in_page = (offset + parent_entry->data_offset) - map_offset; + map_size = vm_map_round_page(offset + parent_entry->data_offset + *size, PAGE_MASK) - map_offset; + } else { + map_size = vm_map_round_page(*size, PAGE_MASK); + offset_in_page = 0; + + if((offset + map_size) > parent_entry->size) { + kr = KERN_INVALID_ARGUMENT; + goto make_mem_done; + } } if (mask_protections) { @@ -2473,8 +2712,10 @@ redo_lookup: user_entry->size = map_size; user_entry->offset = parent_entry->offset + map_offset; + user_entry->data_offset = offset_in_page; user_entry->is_sub_map = parent_entry->is_sub_map; user_entry->is_pager = parent_entry->is_pager; + user_entry->is_copy = parent_entry->is_copy; user_entry->internal = parent_entry->internal; user_entry->protection = protections; @@ -2625,9 +2866,11 @@ mach_memory_entry_allocate( user_entry->backing.pager = NULL; user_entry->is_sub_map = FALSE; user_entry->is_pager = FALSE; + user_entry->is_copy = FALSE; user_entry->internal = FALSE; user_entry->size = 0; user_entry->offset = 0; + user_entry->data_offset = 0; user_entry->protection = VM_PROT_NONE; user_entry->ref_count = 1; @@ -2725,7 +2968,9 @@ mach_memory_entry_purgable_control( named_entry_lock(mem_entry); - if (mem_entry->is_sub_map || mem_entry->is_pager) { + if (mem_entry->is_sub_map || + mem_entry->is_pager || + mem_entry->is_copy) { named_entry_unlock(mem_entry); return KERN_INVALID_ARGUMENT; } @@ -2754,6 +2999,54 @@ mach_memory_entry_purgable_control( return kr; } +kern_return_t +mach_memory_entry_get_page_counts( + ipc_port_t entry_port, + unsigned int *resident_page_count, + unsigned int *dirty_page_count) +{ + kern_return_t kr; + vm_named_entry_t mem_entry; + vm_object_t object; + vm_object_offset_t offset; + vm_object_size_t size; + + if (entry_port == IP_NULL || + ip_kotype(entry_port) != IKOT_NAMED_ENTRY) { + return KERN_INVALID_ARGUMENT; + } + + mem_entry = (vm_named_entry_t) entry_port->ip_kobject; + + named_entry_lock(mem_entry); + + if (mem_entry->is_sub_map || + mem_entry->is_pager || + mem_entry->is_copy) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; + } + + object = mem_entry->backing.object; + if (object == VM_OBJECT_NULL) { + named_entry_unlock(mem_entry); + return KERN_INVALID_ARGUMENT; + } + + vm_object_lock(object); + + offset = mem_entry->offset; + size = mem_entry->size; + + named_entry_unlock(mem_entry); + + kr = vm_object_get_page_counts(object, offset, size, resident_page_count, dirty_page_count); + + vm_object_unlock(object); + + return kr; +} + /* * mach_memory_entry_port_release: * @@ -2797,10 +3090,14 @@ mach_destroy_memory_entry( if(named_entry->ref_count == 0) { if (named_entry->is_sub_map) { vm_map_deallocate(named_entry->backing.map); - } else if (!named_entry->is_pager) { - /* release the memory object we've been pointing to */ + } else if (named_entry->is_pager) { + /* JMM - need to drop reference on pager in that case */ + } else if (named_entry->is_copy) { + vm_map_copy_discard(named_entry->backing.copy); + } else { + /* release the VM object we've been pointing to */ vm_object_deallocate(named_entry->backing.object); - } /* else JMM - need to drop reference on pager in that case */ + } named_entry_unlock(named_entry); named_entry_lock_destroy(named_entry); @@ -2835,7 +3132,9 @@ mach_memory_entry_page_op( named_entry_lock(mem_entry); - if (mem_entry->is_sub_map || mem_entry->is_pager) { + if (mem_entry->is_sub_map || + mem_entry->is_pager || + mem_entry->is_copy) { named_entry_unlock(mem_entry); return KERN_INVALID_ARGUMENT; } @@ -2887,7 +3186,9 @@ mach_memory_entry_range_op( named_entry_lock(mem_entry); - if (mem_entry->is_sub_map || mem_entry->is_pager) { + if (mem_entry->is_sub_map || + mem_entry->is_pager || + mem_entry->is_copy) { named_entry_unlock(mem_entry); return KERN_INVALID_ARGUMENT; } @@ -3093,7 +3394,10 @@ vm_region_object_create( /* Create a named object based on a submap of specified size */ new_map = vm_map_create(PMAP_NULL, VM_MAP_MIN_ADDRESS, - vm_map_round_page(size), TRUE); + vm_map_round_page(size, + VM_MAP_PAGE_MASK(target_map)), + TRUE); + vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(target_map)); user_entry->backing.map = new_map; user_entry->internal = TRUE; @@ -3123,7 +3427,7 @@ vm_map_get_phys_page( vm_map_entry_t entry; ppnum_t phys_page = 0; - map_offset = vm_map_trunc_page(addr); + map_offset = vm_map_trunc_page(addr, PAGE_MASK); vm_map_lock(map); while (vm_map_lookup_entry(map, map_offset, &entry)) { @@ -3157,7 +3461,7 @@ vm_map_get_phys_page( offset = entry->offset + (map_offset - entry->vme_start); phys_page = (ppnum_t) ((entry->object.vm_object->vo_shadow_offset - + offset) >> 12); + + offset) >> PAGE_SHIFT); break; } @@ -3255,8 +3559,9 @@ kernel_object_iopl_request( /* offset from beginning of named entry offset in object */ offset = offset + named_entry->offset; - if(named_entry->is_sub_map) - return (KERN_INVALID_ARGUMENT); + if (named_entry->is_sub_map || + named_entry->is_copy) + return KERN_INVALID_ARGUMENT; named_entry_lock(named_entry); diff --git a/osfmk/x86_64/Makefile b/osfmk/x86_64/Makefile index 8e7e0cc88..a8a465850 100644 --- a/osfmk/x86_64/Makefile +++ b/osfmk/x86_64/Makefile @@ -7,6 +7,10 @@ include $(MakeInc_cmd) include $(MakeInc_def) +EXPORT_ONLY_FILES = machine_kpc.h +EXPORT_MD_LIST = ${EXPORT_ONLY_FILES} +EXPORT_MD_DIR = x86_64 + include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/osfmk/x86_64/WKdmCompress_new.s b/osfmk/x86_64/WKdmCompress_new.s new file mode 100644 index 000000000..6a8d316b9 --- /dev/null +++ b/osfmk/x86_64/WKdmCompress_new.s @@ -0,0 +1,439 @@ +/* + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* + This file contains x86_64 hand optimized implementation of WKdm memory page compressor. + + int WKdm_compress (WK_word* src_buf, WK_word* dest_buf, WK_word* scratch, unsigned int bytes_budget); + + input : + src_buf : address of input page (length = 1024 words) + dest_buf : address of output buffer (may not be 16-byte aligned) + scratch : a 16-byte aligned 4k bytes scratch memory provided by the caller, + bytes_budget : a given byte target in compression + + output : + + if the input buffer can be compressed within the given byte budget, the dest_buf is written with compressed data and the function returns with number of bytes for the compressed data + o.w., the function returns -1 to signal that the input data can not be compressed with the given byte budget. + During the scan and tag process, each word that can not be compressed will be written to dest_buf, followed by a 12-bytes header + 256-bytes tag area. + When the functions returns -1, dest_buf is filled with all those words that can not be compressed and should be considered undefined. + The worst-case scenario is that all words can not be compressed. Hence, the minimum size requirement for dest_buf should be 12+256+4096 = 4364 bytes to prevent from memory fault. + + The 4th argument bytes_budget is the target compress budget in bytes. + Should the input page can be compressed within the budget, the compressed data is written to *dest_buf, and the function returns the number of compressed bytes. + Otherwise, the function returns -1 (to signal to the caller that the page can not be compressed). + + WKdm Compression algorithm is briefly stated as follows: + + There is a dynamically updated dictionary consisting of 16 words. Each dictionary word is initialized to 1 at the point of entry to the function. + For a nonzero input word x, its 8-bits (10-bits scaled up) is used to determine a corresponding word from the dictionary, represented by dict_index (4-bits) and dict_word (32-bits). + a. k = (x>>10)&255; // 8-bit hash table index + b. dict_index = hashTable[k]; // 4-bit dictionary index, hashTable[] is fixed + c. dict_word = dictionary[dict_index]; // 32-bit dictionary word, dictionary[] is dynamically updated + + Each input word x is classified/tagged into 4 classes : + 0 : x = 0 + 1 : (x>>10) == (dict_word>>10), bits 10:31 of the input word match a dictionary word + 2 : (x>>10) != (dict_word>>10), the above condition (22 higher bits matched) is not met, meaning a dictionary miss + 3 : (x == dict_word), the exact input word is in the dictionary + + For each class, different numbers of bits are needed for the decompressor to reproduce the original input word. + 0 : 2-bits tag (32->2 compression) + 1 : 2-bits tag + 4-bits dict_index + 10-bits lower bits (32->16 compression) + 2 : 2-bits tag + 32-bits new word (32->34 expansion) + 3 : 2-bits tag + 4-bits dict_index (32->6 compression) + + It is obvious now that WKdm compress algorithm works well for pages where there are lots of zero words (32->2) and/or there are freqeunt repeats of some word patterns (32->6). + + the output bit stream (*dest_buf) consists of + a. 12 bytes header + b. 256 bytes for 1024 packed tags + c. (varying number of) words for new words not matched to dictionary word. + d. (varying number of) 32-bit words for packed 4-bit dict_indices (for class 1 and 3) + e. (varying number of) 32-bit words for packed 10-bit low bits (for class 1) + + the header is actually of 3 words that specify the ending offset (in 32-bit words) from the start of the bit stream of c,d,e, respectively. + Note that there might be padding bits in d (if the number of dict_indices does not divide by 8), and there are 2/12/22 padding bits for packing 3/2/1 low 10-bits in a 32-bit word. + + + The WKdm compress algorithm 1st runs a scan and classification pass, tagging and write unpacked data into temporary buffers. It follows by packing those data into the output buffer. + + The temp buffers are + + uint8_t tempTagsArray[1024]; // temporary saving for tags before final packing + uint8_t tempQPosArray[1024]; // temporary saving for dict_indices before final packing + uint16_t tempLowBitsArray[1024]; // temporary saving for partially matched lower 10 bits before final packing + + Since the new words (that can not matched fully or partially to the dictionary) are stored right after the header and the tags section and need no packing, we directly write them to + the destination buffer. + + uint32_t *new_word = dest_buf+3+64; // 3 words for header, 64 words for tags, new words come right after the tags. + + Now since we are given a byte budget for this compressor, we can monitor the byte usage on the fly in the scanning and tagging pass. + + bytes_budget -= 12 + 256; // header and tags (1024 * 2 /8 = 256 bytes) + + whenever an input word is classified as class + + 2 : bytes_budget-=4; if (bytes_budget<=0) exit -1; + + when writing the 8 4-bits/3 10-bits, monitor bytes_budget and exit -1 when byte_budget <=0; + + without showing the bit budget management, the pseudo code is given as follows: + + uint8_t *tags=tempTagsArray; + uint8_t *dict=tempQPosArray; + uint8_t *partial=tempLowBitsArray; + + for (i=0;i<1024;i++) { + x = *src_buf++; + if (x == 0) { // zero, 2-bits tag + *tags++ = 0; + } else { + + // find dict_index and dict_word from x + k = (x>>10)&255; + dict_index = hashTable[k]; + dict_word = dictionary[dict_index]; + + if (dict_word == x) { // exactly match + // 2-bits tag + 4-bits table index + *tags++ = 3; + *dict++ = dict_index; + } else if (((x^dict_word)>>10)==0) { // 22 higher bits matched + // 2-bits tag + 4-bits table index + 10-bits lower partial + *tags++ = 1; + *dict++ = dict_index; + *partial++ = x &0x3ff; + dictionary[dict_index] = x; + } else { // not matched + // 2-bits tag + 32-bits new word + *tags++ = 2; + *new_word++ = x; + dictionary[dict_index] = x; + } + } + } + + after this classification/tagging pass is completed, the 3 temp buffers are packed into the output *dest_buf: + + 1. 1024 tags are packed into 256 bytes right after the 12-bytes header + 2. dictionary indices (4-bits each) are packed into are right after the new words section + 3. 3 low 10-bits are packed into a 32-bit word, this is after the dictionary indices section. + + cclee, 11/30/12 +*/ + + .text + .align 4,0x90 + +.globl _WKdm_compress_new +_WKdm_compress_new: + pushq %rbp + movq %rsp, %rbp + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbx + subq $(24+64), %rsp + + + #define tempTagsArray 64(%rsp) + #define tempLowBitsArray 72(%rsp) + #define next_tag %r8 + #define next_input_word %rdi + #define end_of_input %r13 + #define next_full_patt %rbx + #define dict_location %rcx + #define next_qp %r10 + #define dictionary %rsp + #define scratch %r11 + #define dest_buf %r12 + #define hashTable %r14 + #define tempQPosArray %r15 + #define next_low_bits %rsi + #define byte_count %r9d + + movq %rsi, %r12 // dest_buf + movq %rdx, scratch // scratch = dictionary + + movq %rdx, tempTagsArray // &tempTagsArray[0] + movq %rdx, next_tag // next_tag always points to the one following the current tag + + leaq 1024(%rdx), tempQPosArray // &tempQPosArray[0] + movq tempQPosArray, next_qp // next_qp + + leaq 4096(%rdi), end_of_input // end_of_input = src_buf + num_input_words + leaq 268(%rsi), %rbx // dest_buf + [TAGS_AREA_OFFSET + (num_input_words / 16)]*4 + + movl %ecx, byte_count + subl $(12+256), byte_count // header + tags + jle L_budgetExhausted + + // PRELOAD_DICTIONARY; + movl $1, 0(dictionary) + movl $1, 4(dictionary) + movl $1, 8(dictionary) + movl $1, 12(dictionary) + movl $1, 16(dictionary) + movl $1, 20(dictionary) + movl $1, 24(dictionary) + movl $1, 28(dictionary) + movl $1, 32(dictionary) + movl $1, 36(dictionary) + movl $1, 40(dictionary) + movl $1, 44(dictionary) + movl $1, 48(dictionary) + movl $1, 52(dictionary) + movl $1, 56(dictionary) + movl $1, 60(dictionary) + + leaq 2048(%rdx), %rax // &tempLowBitsArray[0] + movq %rax, tempLowBitsArray // save for later reference + movq %rax, next_low_bits // next_low_bits + + leaq _hashLookupTable_new(%rip), hashTable // hash look up table + jmp L_scan_loop + + .align 4,0x90 +L_RECORD_ZERO: + movb $0, -1(next_tag) // *next_tag = ZERO; + addq $4, next_input_word // next_input_word++; + cmpq next_input_word, end_of_input // end_of_input vs next_input_word + jbe L_done_search +L_scan_loop: + movl (next_input_word), %edx + incq next_tag // next_tag++ + testl %edx, %edx + je L_RECORD_ZERO // if (input_word==0) RECORD_ZERO + movl %edx, %eax // a copy of input_word + shrl $10, %eax // input_high_bits = HIGH_BITS(input_word); + movzbl %al, %eax // 8-bit index to the Hash Table + movsbq (hashTable,%rax),%rax // HASH_TO_DICT_BYTE_OFFSET(input_word) + leaq (dictionary, %rax), dict_location // ((char*) dictionary) + HASH_TO_DICT_BYTE_OFFSET(input_word)); + movl (dict_location), %eax // dict_word = *dict_location; + addq $4, next_input_word // next_input_word++ + cmpl %eax, %edx // dict_word vs input_word + je L_RECORD_EXACT // if identical, RECORD_EXACT + xorl %edx, %eax + shrl $10, %eax // HIGH_BITS(dict_word) + je L_RECORD_PARTIAL // if identical, RECORD_PARTIAL + +L_RECORD_MISS: + movl %edx, (next_full_patt) // *next_full_patt = input_word; + addq $4, next_full_patt // next_full_patt++ + movl %edx, (dict_location) // *dict_location = input_word + movb $2, -1(next_tag) // *next_tag = 2 for miss + subl $4, byte_count // fill in a new 4-bytes word + jle L_budgetExhausted + cmpq next_input_word, end_of_input // end_of_input vs next_input_word + ja L_scan_loop + +L_done_search: + + // SET_QPOS_AREA_START(dest_buf,next_full_patt); + movq next_full_patt, %rax // next_full_patt + subq dest_buf, %rax // next_full_patt - dest_buf + sarq $2, %rax // offset in 4-bytes + movl %eax, %r13d // r13d = (next_full_patt - dest_buf) + movl %eax, 0(dest_buf) // dest_buf[0] = next_full_patt - dest_buf + decq next_tag + cmpq next_tag, tempTagsArray // &tempTagsArray[0] vs next_tag + jae L13 // if (&tempTagsArray[0] >= next_tag), skip the following + + // boundary_tmp = WK_pack_2bits(tempTagsArray, (WK_word *) next_tag, dest_buf + HEADER_SIZE_IN_WORDS); + + movq dest_buf, %rdi // dest_buf + movq tempTagsArray, %rcx // &tempTagsArray[0] + + .align 4,0x90 +L_pack_2bits: + movq 8(%rcx), %rax // w3 + addq $16, %rcx // tempTagsArray += 16; + shlq $4, %rax + addq $4, %rdi // dest_buf += 4; + orq -16(%rcx), %rax // w3 + movq %rax, %rdx + shrq $30, %rax + orl %edx, %eax + cmpq %rcx, next_tag // cmp next_tag vs dest_buf + movl %eax, 8(%rdi) // save at *(dest_buf + HEADER_SIZE_IN_WORDS) + ja L_pack_2bits // if (next_tag > dest_buf) repeat L_pack_2bits + + /* Pack the queue positions into the area just after the full words. */ + +L13: + mov next_qp, %rax // next_qp + sub tempQPosArray, %rax // num_bytes_to_pack = next_qp - (char *) tempQPosArray; + addl $7, %eax // num_bytes_to_pack+7 + shrl $3, %eax // num_packed_words = (num_bytes_to_pack + 7) >> 3 + + shll $2, %eax // turn into bytes + subl %eax, byte_count // + jl L_budgetExhausted + shrl $1, %eax // num_source_words = num_packed_words * 2; + + leaq (tempQPosArray,%rax,4), %rcx // endQPosArray = tempQPosArray + num_source_words + cmpq %rcx, next_qp // next_qp vs endQPosArray + jae L16 // if (next_qp >= endQPosArray) skip the following zero paddings + movq %rcx, %rax + subq next_qp, %rax + subl $4, %eax + jl 1f + .align 4,0x90 +0: movl $0, (next_qp) + addq $4, next_qp + subl $4, %eax + jge 0b +1: testl $2, %eax + je 1f + movw $0, (next_qp) + addq $2, next_qp +1: testl $1, %eax + je 1f + movb $0, (next_qp) + addq $1, next_qp +1: +L16: + movq next_full_patt, %rdi // next_full_patt + cmpq tempQPosArray, %rcx // endQPosArray vs tempQPosArray + jbe L20 // if (endQPosArray <= tempQPosArray) skip the following + movq tempQPosArray, %rdx // tempQPosArray + + /* byte_count -= (rcx - tempQPosArray)/2 */ + + .align 4,0x90 +L_pack_4bits: + movl 4(%rdx), %eax // src_next[1] + addq $8, %rdx // src_next += 2; + sall $4, %eax // (src_next[1] << 4) + addq $4, %rdi // dest_next++; + orl -8(%rdx), %eax // temp = src_next[0] | (src_next[1] << 4) + cmpq %rdx, %rcx // source_end vs src_next + movl %eax, -4(%rdi) // dest_next[0] = temp; + ja L_pack_4bits // while (src_next < source_end) repeat the loop + + // SET_LOW_BITS_AREA_START(dest_buf,boundary_tmp); + movq %rdi, %rax // boundary_tmp + subq dest_buf, %rax // boundary_tmp - dest_buf + movq %rax, %r13 // boundary_tmp - dest_buf + shrq $2, %r13 // boundary_tmp - dest_buf in words +L20: + movl %r13d, 4(dest_buf) // dest_buf[1] = boundary_tmp - dest_buf + + movq tempLowBitsArray, %rcx // tempLowBitsArray + movq next_low_bits, %rbx // next_low_bits + subq %rcx, %rbx // next_low_bits - tempLowBitsArray (in bytes) + sarq $1, %rbx // num_tenbits_to_pack (in half-words) + + #define size %ebx + + subl $3, size // pre-decrement num_tenbits_to_pack by 3 + jl 1f // if num_tenbits_to_pack < 3, skip the following loop + + .align 4,0x90 +0: + movzwl 4(%rcx), %eax // w2 + addq $6, %rcx // next w0/w1/w2 triplet + sall $10, %eax // w1 << 10 + or -4(%rcx), %ax // w1 + addq $4, %rdi // dest_buf++ + sall $10, %eax // w1 << 10 + or -6(%rcx), %ax // (w0) | (w1<<10) | (w2<<20) + subl $4, byte_count // fill in a new 4-bytes word + jle L_budgetExhausted + subl $3, size // num_tenbits_to_pack-=3 + movl %eax, -4(%rdi) // pack w0,w1,w2 into 1 dest_buf word + jge 0b // if no less than 3 elements, back to loop head + +1: addl $3, size // post-increment num_tenbits_to_pack by 3 + je 3f // if num_tenbits_to_pack is a multiple of 3, skip the following + movzwl (%rcx), %eax // w0 + subl $1, size // num_tenbits_to_pack-- + je 2f // + movzwl 2(%rcx), %edx // w1 + sall $10, %edx // w1 << 10 + orl %edx, %eax // w0 | (w1<<10) +2: + subl $4, byte_count // fill in a new 4-bytes word + jle L_budgetExhausted + movl %eax, (%rdi) // write the final dest_buf word + addq $4, %rdi // dest_buf++ + +3: movq %rdi, %rax // boundary_tmp + subq dest_buf, %rax // boundary_tmp - dest_buf + shrq $2, %rax // boundary_tmp - dest_buf in terms of words + movl %eax, 8(dest_buf) // SET_LOW_BITS_AREA_END(dest_buf,boundary_tmp) + shlq $2, %rax // boundary_tmp - dest_buf in terms of bytes + +L_done: + // restore registers and return + addq $(24+64), %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + leave + ret + + .align 4 +L_budgetExhausted: + mov $-1, %rax + jmp L_done + + + .align 4,0x90 +L_RECORD_EXACT: + subq dictionary, %rcx // dict_location - dictionary + sarq $2, %rcx // divide by 4 for word offset + movb $3, -1(next_tag) // *next_tag = 3 for exact + movb %cl, (next_qp) // *next_qp = word offset (4-bit) + incq next_qp // next_qp++ + cmpq next_input_word, end_of_input // end_of_input vs next_input_word + ja L_scan_loop + jmp L_done_search + + .align 4,0x90 +L_RECORD_PARTIAL: + movq %rcx, %rax // dict_location + movb $1, -1(next_tag) // *next_tag = 1 for partial matched + subq dictionary, %rax // dict_location - dictionary + movl %edx, (%rcx) // *dict_location = input_word; + sarq $2, %rax // offset in 32-bit word + movb %al, (next_qp) // update *next_qp + andl $1023, %edx // lower 10 bits + incq next_qp // next_qp++ + mov %dx, (next_low_bits) // save next_low_bits + addq $2, next_low_bits // next_low_bits++ + cmpq next_input_word, end_of_input // end_of_input vs next_input_word + ja L_scan_loop + jmp L_done_search + diff --git a/osfmk/x86_64/WKdmData_new.s b/osfmk/x86_64/WKdmData_new.s new file mode 100644 index 000000000..427c54c50 --- /dev/null +++ b/osfmk/x86_64/WKdmData_new.s @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +.globl _hashLookupTable_new + .const + .align 4 +_hashLookupTable_new: + .byte 0 + .byte 52 + .byte 8 + .byte 56 + .byte 16 + .byte 12 + .byte 28 + .byte 20 + .byte 4 + .byte 36 + .byte 48 + .byte 24 + .byte 44 + .byte 40 + .byte 32 + .byte 60 + .byte 8 + .byte 12 + .byte 28 + .byte 20 + .byte 4 + .byte 60 + .byte 16 + .byte 36 + .byte 24 + .byte 48 + .byte 44 + .byte 32 + .byte 52 + .byte 56 + .byte 40 + .byte 12 + .byte 8 + .byte 48 + .byte 16 + .byte 52 + .byte 60 + .byte 28 + .byte 56 + .byte 32 + .byte 20 + .byte 24 + .byte 36 + .byte 40 + .byte 44 + .byte 4 + .byte 8 + .byte 40 + .byte 60 + .byte 32 + .byte 20 + .byte 44 + .byte 4 + .byte 36 + .byte 52 + .byte 24 + .byte 16 + .byte 56 + .byte 48 + .byte 12 + .byte 28 + .byte 16 + .byte 8 + .byte 40 + .byte 36 + .byte 28 + .byte 32 + .byte 12 + .byte 4 + .byte 44 + .byte 52 + .byte 20 + .byte 24 + .byte 48 + .byte 60 + .byte 56 + .byte 40 + .byte 48 + .byte 8 + .byte 32 + .byte 28 + .byte 36 + .byte 4 + .byte 44 + .byte 20 + .byte 56 + .byte 60 + .byte 24 + .byte 52 + .byte 16 + .byte 12 + .byte 12 + .byte 4 + .byte 48 + .byte 20 + .byte 8 + .byte 52 + .byte 16 + .byte 60 + .byte 24 + .byte 36 + .byte 44 + .byte 28 + .byte 56 + .byte 40 + .byte 32 + .byte 36 + .byte 20 + .byte 24 + .byte 60 + .byte 40 + .byte 44 + .byte 52 + .byte 16 + .byte 32 + .byte 4 + .byte 48 + .byte 8 + .byte 28 + .byte 56 + .byte 12 + .byte 28 + .byte 32 + .byte 40 + .byte 52 + .byte 36 + .byte 16 + .byte 20 + .byte 48 + .byte 8 + .byte 4 + .byte 60 + .byte 24 + .byte 56 + .byte 44 + .byte 12 + .byte 8 + .byte 36 + .byte 24 + .byte 28 + .byte 16 + .byte 60 + .byte 20 + .byte 56 + .byte 32 + .byte 40 + .byte 48 + .byte 12 + .byte 4 + .byte 44 + .byte 52 + .byte 44 + .byte 40 + .byte 12 + .byte 56 + .byte 8 + .byte 36 + .byte 24 + .byte 60 + .byte 28 + .byte 48 + .byte 4 + .byte 32 + .byte 20 + .byte 16 + .byte 52 + .byte 60 + .byte 12 + .byte 24 + .byte 36 + .byte 8 + .byte 4 + .byte 16 + .byte 56 + .byte 48 + .byte 44 + .byte 40 + .byte 52 + .byte 32 + .byte 20 + .byte 28 + .byte 32 + .byte 12 + .byte 36 + .byte 28 + .byte 24 + .byte 56 + .byte 40 + .byte 16 + .byte 52 + .byte 44 + .byte 4 + .byte 20 + .byte 60 + .byte 8 + .byte 48 + .byte 48 + .byte 52 + .byte 12 + .byte 20 + .byte 32 + .byte 44 + .byte 36 + .byte 28 + .byte 4 + .byte 40 + .byte 24 + .byte 8 + .byte 56 + .byte 60 + .byte 16 + .byte 36 + .byte 32 + .byte 8 + .byte 40 + .byte 4 + .byte 52 + .byte 24 + .byte 44 + .byte 20 + .byte 12 + .byte 28 + .byte 48 + .byte 56 + .byte 16 + .byte 60 + .byte 4 + .byte 52 + .byte 60 + .byte 48 + .byte 20 + .byte 16 + .byte 56 + .byte 44 + .byte 24 + .byte 8 + .byte 40 + .byte 12 + .byte 32 + .byte 28 + .byte 36 + .byte 24 + .byte 32 + .byte 12 + .byte 4 + .byte 20 + .byte 16 + .byte 60 + .byte 36 + .byte 28 + .byte 8 + .byte 52 + .byte 40 + .byte 48 + .byte 44 + .byte 56 diff --git a/osfmk/x86_64/WKdmDecompress_new.s b/osfmk/x86_64/WKdmDecompress_new.s new file mode 100644 index 000000000..1c435c5e7 --- /dev/null +++ b/osfmk/x86_64/WKdmDecompress_new.s @@ -0,0 +1,280 @@ +/* + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* + This file contains x86_64 hand optimized implementation of WKdm memory page decompressor. + + void WKdm_decompress (WK_word* src_buf, WK_word* dest_buf, WK_word* scratch, __unused__ unsigned int words); + + input : + src_buf : address of input compressed data buffer + dest_buf : address of output decompressed buffer + scratch : a 16-byte aligned 4k bytes scratch memory provided by the caller + words : this argument is not used in the implementation + + output : + + the input buffer is decompressed and the dest_buf is written with decompressed data. + + Am algorithm description of the WKdm compress and bit stream format can be found in the WKdm Compress x86_64 assembly code WKdmCompress.s + + The bit stream (*src_buf) consists of + a. 12 bytes header + b. 256 bytes for 1024 packed tags + c. (varying number of) words for new words not matched to dictionary word. + d. (varying number of) 32-bit words for packed 4-bit dict_indices (for class 1 and 3) + e. (varying number of) 32-bit words for packed 10-bit low bits (for class 1) + + where the header (of 3 words) specifies the ending boundaries (in 32-bit words) from the start of the bit stream of c,d,e, respectively. + + The decompressor 1st unpacking the bit stream component b/d/e into temorary buffers. Then it sequentially decodes the decompressed word as follows + + for (i=0;i<1024;i++) { + tag = *next_tag++ + switch (tag) { + case 0 : *dest_buf++ = 0; break; + case 1 : dict_word = dictionary[*dict_index]; dictionary[*dict_index++] = *dest_buf++ = dict_word&0xfffffc00 | *LowBits++; break; + case 2 : x = *new_word++; k = (x>>10)&255; k = hashTable[k]; dictionary[k] = *dest_buf++ = x; break; + case 3 : *dest_buf++ = dictionary[*dict_index++]; break; + } + + cclee, 11/30/12 +*/ + + .text + + .globl _WKdm_decompress_new +_WKdm_decompress_new: + + // save registers, and allocate stack memory for local variables + + pushq %rbp + movq %rsp, %rbp + pushq %r12 + pushq %r13 + pushq %rbx + + subq $(64+8+16), %rsp + + movq %rsi, %r12 // dest_buf + movq %rdx, %r13 // scracht_buf + + // PRELOAD_DICTONARY; dictionary starting address : starting address 0(%rsp) +#if 1 + movl $1, 0(%rsp) + movl $1, 4(%rsp) + movl $1, 8(%rsp) + movl $1, 12(%rsp) + movl $1, 16(%rsp) + movl $1, 20(%rsp) + movl $1, 24(%rsp) + movl $1, 28(%rsp) + movl $1, 32(%rsp) + movl $1, 36(%rsp) + movl $1, 40(%rsp) + movl $1, 44(%rsp) + movl $1, 48(%rsp) + movl $1, 52(%rsp) + movl $1, 56(%rsp) + movl $1, 60(%rsp) +#else + mov $0x100000001, %rax + mov %rax, (%rsp) + mov %rax, 8(%rsp) + mov %rax, 16(%rsp) + mov %rax, 24(%rsp) + mov %rax, 32(%rsp) + mov %rax, 40(%rsp) + mov %rax, 48(%rsp) + mov %rax, 56(%rsp) +#endif + + // WK_unpack_2bits(TAGS_AREA_START(src_buf), TAGS_AREA_END(src_buf), tempTagsArray); + + leaq 268(%rdi), %r10 // TAGS_AREA_END + leaq 12(%rdi), %rax // TAGS_AREA_START + movq %r13, %rsi // tempTagsArray + cmpq %rax, %r10 // TAGS_AREA_END vs TAGS_AREA_START + jbe 1f // if TAGS_AREA_END <= TAGS_AREA_START, skip L_WK_unpack_2bits + movq %r13, %rcx // next_word + xorl %r8d, %r8d // i = 0 + mov $(50529027<<32)+50529027, %r9 +L_WK_unpack_2bits: + movl 12(%rdi,%r8, 4), %eax + movl 12(%rdi,%r8, 4), %edx + shrl $2, %eax + shlq $32, %rax + orq %rdx, %rax + movq %rax, %rdx + shrq $4, %rax + andq %r9, %rdx + andq %r9, %rax + incq %r8 // i++ + movq %rdx, (%rcx) + movq %rax, 8(%rcx) + addq $16, %rcx // next_tags += 16 + cmpq $64, %r8 // i vs 64 + jne L_WK_unpack_2bits // repeat loop until i==64 +1: + + + // WK_unpack_4bits(QPOS_AREA_START(src_buf), QPOS_AREA_END(src_buf), tempQPosArray); + + mov 4(%rdi), %eax // WKdm header qpos end + leaq (%rdi,%rax,4), %r9 // QPOS_AREA_END + mov 0(%rdi), %eax // WKdm header qpos start + leaq (%rdi,%rax,4), %r8 // QPOS_AREA_START + leaq 1024(%r13), %rbx // tempQPosArray + cmpq %r8, %r9 // QPOS_AREA_END vs QPOS_AREA_START + jbe 1f // if QPOS_AREA_END <= QPOS_AREA_START, skip L_WK_unpack_4bits + leaq 8(%rbx), %rcx // next_qpos + + mov $(252645135<<32)+252645135, %r11 +L_WK_unpack_4bits: + movl (%r8), %eax // w = *next_word + movl %eax, %edx // w + shlq $28, %rax + orq %rdx, %rax + addq $4, %r8 // next_word++ + andq %r11, %rax + movq %rax, -8(%rcx) + addq $8, %rcx // next_qpos+=8 + cmpq %r8, %r9 // QPOS_AREA_END vs QPOS_AREA_START + ja L_WK_unpack_4bits // repeat loop until QPOS_AREA_END <= QPOS_AREA_START + + +1: + + // WK_unpack_3_tenbits(LOW_BITS_AREA_START(src_buf), LOW_BITS_AREA_END(src_buf), tempLowBitsArray); + + movl 8(%rdi), %eax // LOW_BITS_AREA_END offset + leaq (%rdi,%rax,4), %rdi // LOW_BITS_AREA_END + leaq 2048(%r13), %r11 // tempLowBitsArray + leaq 4094(%r13), %r13 // final tenbits addr + sub %r9, %rdi // LOW_BITS_AREA_START vs LOW_BITS_AREA_END + jle 1f // if START>=END, skip L_WK_unpack_3_tenbits + movq %r11, %rcx // next_low_bits +L_WK_unpack_3_tenbits: + movl (%r9), %eax // w = *next_word, 0:c:b:a + movl $(1023<<10), %edx + movl $(1023<<20), %r8d + andl %eax, %edx // b << 10 + andl %eax, %r8d // c << 20 + andq $1023, %rax + shll $6, %edx + shlq $12, %r8 + orl %edx, %eax + orq %r8, %rax + cmp %r13, %rcx + je 2f + mov %rax, (%rcx) + jmp 3f +2: mov %ax, (%rcx) +3: + addq $4, %r9 // next_word++ + addq $6, %rcx // next_low_bits += 3 + sub $4, %rdi + jg L_WK_unpack_3_tenbits // repeat loop if LOW_BITS_AREA_END > next_word +1: + + + #define next_qpos %rbx + #define hash %r8 + #define tags_counter %edi + #define dest_buf %r12 + #define next_full_patt %r10 + + leaq _hashLookupTable_new(%rip), hash // hash look up table + movl $1024, tags_counter // tags_counter + jmp L_next + + .align 4,0x90 +L_nonpartital: + jl L_ZERO_TAG + cmpb $2, -1(%rsi) + je L_MISS_TAG + +L_EXACT_TAG: + movzbl (next_qpos), %eax // qpos = *next_qpos + incq next_qpos // next_qpos++ + decl tags_counter // tags_counter-- + movl (%rsp,%rax,4), %eax // w = dictionary[qpos] + movl %eax, -4(dest_buf) // *dest_buf = w + je L_done + +L_next: + incq %rsi // next_tag++ + addq $4, dest_buf + cmpb $1, -1(%rsi) + jne L_nonpartital + +L_PARTIAL_TAG: + movzbl (next_qpos),%edx // qpos = *next_qpos + incq next_qpos // next_qpos++ + movl (%rsp,%rdx,4), %eax // read dictionary word + andl $-1024, %eax // clear lower 10 bits + or (%r11), %ax // pad the lower 10-bits from *next_low_bits + addq $2, %r11 // next_low_bits++ + decl tags_counter // tags_counter-- + movl %eax, (%rsp,%rdx,4) // *dict_location = newly formed word + movl %eax, -4(dest_buf) // *dest_buf = newly formed word + jg L_next // repeat loop until next_tag==tag_area_end + +L_done: + + // release stack memory, restore registers, and return + + addq $(64+8+16), %rsp + popq %rbx + popq %r13 + popq %r12 + leave + ret + + .align 4,0x90 +L_MISS_TAG: + movl (next_full_patt), %edx // w = *next_full_patt + movl (next_full_patt), %eax // w = *next_full_patt + shrl $10, %edx // w>>10 + addq $4, next_full_patt // next_full_patt++ + movzbl %dl, %edx // 8-bit hash table index + movl %eax, -4(dest_buf) // *dest_buf = word + movzbl (hash,%rdx),%edx // qpos + decl tags_counter // tags_counter-- + movl %eax, (%rsp,%rdx) // dictionary[qpos] = word + jg L_next // repeat the loop + jmp L_done + + .align 4,0x90 +L_ZERO_TAG: + decl tags_counter // tags_counter-- + movl $0, -4(dest_buf) // *dest_buf = 0 + jg L_next // repeat the loop + jmp L_done + + diff --git a/osfmk/x86_64/idt64.s b/osfmk/x86_64/idt64.s index a4a62feaa..3eaf156d8 100644 --- a/osfmk/x86_64/idt64.s +++ b/osfmk/x86_64/idt64.s @@ -27,6 +27,7 @@ */ #include #include +#include #include #include #include @@ -131,7 +132,8 @@ /* A trap with a special-case handler, hence we don't need to define anything */ #define TRAP_SPC(n, f) -#define TRAP_IST(n, f) +#define TRAP_IST1(n, f) +#define TRAP_IST2(n, f) #define USER_TRAP_SPC(n, f) /* Generate all the stubs */ @@ -140,76 +142,71 @@ /* * Common dispatch point. * Determine what mode has been interrupted and save state accordingly. + * Here with: + * rsp from user-space: interrupt state in PCB, or + * from kernel-space: interrupt state in kernel or interrupt stack + * GSBASE from user-space: pthread area, or + * from kernel-space: cpu_data */ L_dispatch: cmpl $(KERNEL64_CS), ISF64_CS(%rsp) - je L_64bit_dispatch + je L_dispatch_kernel swapgs - /* - * Check for trap from EFI32, and restore cr3 and rsp if so. - * A trap from EFI32 is fatal. - */ - cmpl $(KERNEL32_CS), ISF64_CS(%rsp) - jne L_dispatch_continue - push %rcx - mov EXT(pal_efi_saved_cr3)(%rip), %rcx - mov %rcx, %cr3 - leaq (%rip), %rcx - shr $32, %rcx /* splice the upper 32-bits of rip */ - shl $32, %rsp /* .. and the lower 32-bits of rsp */ - shrd $32, %rcx, %rsp /* to recover the full 64-bits of rsp */ - pop %rcx - -L_dispatch_continue: +L_dispatch_user: cmpl $(TASK_MAP_32BIT), %gs:CPU_TASK_MAP - je L_32bit_dispatch /* 32-bit user task */ - /* fall through to 64bit user dispatch */ + je L_dispatch_U32 /* 32-bit user task */ + +L_dispatch_U64: + subq $(ISS64_OFFSET), %rsp + mov %r15, R64_R15(%rsp) + mov %rsp, %r15 + mov %gs:CPU_KERNEL_STACK, %rsp + jmp L_dispatch_64bit + +L_dispatch_kernel: + subq $(ISS64_OFFSET), %rsp + mov %r15, R64_R15(%rsp) + mov %rsp, %r15 /* * Here for 64-bit user task or kernel */ -L_64bit_dispatch: - subq $(ISS64_OFFSET), %rsp - movl $(SS_64), SS_FLAVOR(%rsp) +L_dispatch_64bit: + movl $(SS_64), SS_FLAVOR(%r15) - cld - /* * Save segment regs - for completeness since theyre not used. */ - movl %fs, R64_FS(%rsp) - movl %gs, R64_GS(%rsp) + movl %fs, R64_FS(%r15) + movl %gs, R64_GS(%r15) /* Save general-purpose registers */ - mov %rax, R64_RAX(%rsp) - mov %rcx, R64_RCX(%rsp) - mov %rbx, R64_RBX(%rsp) - mov %rbp, R64_RBP(%rsp) - mov %r11, R64_R11(%rsp) - mov %r12, R64_R12(%rsp) - mov %r13, R64_R13(%rsp) - mov %r14, R64_R14(%rsp) - mov %r15, R64_R15(%rsp) + mov %rax, R64_RAX(%r15) + mov %rbx, R64_RBX(%r15) + mov %rcx, R64_RCX(%r15) + mov %rdx, R64_RDX(%r15) + mov %rbp, R64_RBP(%r15) + mov %rdi, R64_RDI(%r15) + mov %rsi, R64_RSI(%r15) + mov %r8, R64_R8(%r15) + mov %r9, R64_R9(%r15) + mov %r10, R64_R10(%r15) + mov %r11, R64_R11(%r15) + mov %r12, R64_R12(%r15) + mov %r13, R64_R13(%r15) + mov %r14, R64_R14(%r15) /* cr2 is significant only for page-faults */ mov %cr2, %rax - mov %rax, R64_CR2(%rsp) + mov %rax, R64_CR2(%r15) - /* Other registers (which may contain syscall args) */ - mov %rdi, R64_RDI(%rsp) /* arg0 .. */ - mov %rsi, R64_RSI(%rsp) - mov %rdx, R64_RDX(%rsp) - mov %r10, R64_R10(%rsp) - mov %r8, R64_R8(%rsp) - mov %r9, R64_R9(%rsp) /* .. arg5 */ + mov R64_TRAPNO(%r15), %ebx /* %ebx := trapno for later */ + mov R64_TRAPFN(%r15), %rdx /* %rdx := trapfn for later */ + mov R64_CS(%r15), %esi /* %esi := cs for later */ - mov R64_TRAPNO(%rsp), %ebx /* %ebx := trapno for later */ - mov R64_TRAPFN(%rsp), %rdx /* %rdx := trapfn for later */ - mov R64_CS(%rsp), %esi /* %esi := cs for later */ - - jmp L_common_dispatch + jmp L_common_dispatch L_64bit_entry_reject: /* @@ -220,7 +217,7 @@ L_64bit_entry_reject: movq %rax, ISF64_TRAPFN+8(%rsp) popq %rax movq $(T_INVALID_OPCODE), ISF64_TRAPNO(%rsp) - jmp L_64bit_dispatch + jmp L_dispatch_U64 L_32bit_entry_check: /* @@ -230,56 +227,59 @@ L_32bit_entry_check: jne L_64bit_entry_reject /* fall through to 32-bit handler: */ -L_32bit_dispatch: /* 32-bit user task */ - subq $(ISC32_OFFSET), %rsp - movl $(SS_32), SS_FLAVOR(%rsp) +L_dispatch_U32: /* 32-bit user task */ + subq $(ISS64_OFFSET), %rsp + mov %rsp, %r15 + mov %gs:CPU_KERNEL_STACK, %rsp + movl $(SS_32), SS_FLAVOR(%r15) - cld /* * Save segment regs */ - movl %ds, R32_DS(%rsp) - movl %es, R32_ES(%rsp) - movl %fs, R32_FS(%rsp) - movl %gs, R32_GS(%rsp) + movl %ds, R32_DS(%r15) + movl %es, R32_ES(%r15) + movl %fs, R32_FS(%r15) + movl %gs, R32_GS(%r15) /* * Save general 32-bit registers */ - mov %eax, R32_EAX(%rsp) - mov %ebx, R32_EBX(%rsp) - mov %ecx, R32_ECX(%rsp) - mov %edx, R32_EDX(%rsp) - mov %ebp, R32_EBP(%rsp) - mov %esi, R32_ESI(%rsp) - mov %edi, R32_EDI(%rsp) + mov %eax, R32_EAX(%r15) + mov %ebx, R32_EBX(%r15) + mov %ecx, R32_ECX(%r15) + mov %edx, R32_EDX(%r15) + mov %ebp, R32_EBP(%r15) + mov %esi, R32_ESI(%r15) + mov %edi, R32_EDI(%r15) /* Unconditionally save cr2; only meaningful on page faults */ mov %cr2, %rax - mov %eax, R32_CR2(%rsp) + mov %eax, R32_CR2(%r15) /* * Copy registers already saved in the machine state * (in the interrupt stack frame) into the compat save area. */ - mov ISC32_RIP(%rsp), %eax - mov %eax, R32_EIP(%rsp) - mov ISC32_RFLAGS(%rsp), %eax - mov %eax, R32_EFLAGS(%rsp) - mov ISC32_RSP(%rsp), %eax - mov %eax, R32_UESP(%rsp) - mov ISC32_SS(%rsp), %eax - mov %eax, R32_SS(%rsp) -L_32bit_dispatch_after_fault: - mov ISC32_CS(%rsp), %esi /* %esi := %cs for later */ - mov %esi, R32_CS(%rsp) - mov ISC32_TRAPNO(%rsp), %ebx /* %ebx := trapno for later */ - mov %ebx, R32_TRAPNO(%rsp) - mov ISC32_ERR(%rsp), %eax - mov %eax, R32_ERR(%rsp) - mov ISC32_TRAPFN(%rsp), %rdx /* %rdx := trapfn for later */ + mov R64_RIP(%r15), %eax + mov %eax, R32_EIP(%r15) + mov R64_RFLAGS(%r15), %eax + mov %eax, R32_EFLAGS(%r15) + mov R64_RSP(%r15), %eax + mov %eax, R32_UESP(%r15) + mov R64_SS(%r15), %eax + mov %eax, R32_SS(%r15) +L_dispatch_U32_after_fault: + mov R64_CS(%r15), %esi /* %esi := %cs for later */ + mov %esi, R32_CS(%r15) + mov R64_TRAPNO(%r15), %ebx /* %ebx := trapno for later */ + mov %ebx, R32_TRAPNO(%r15) + mov R64_ERR(%r15), %eax + mov %eax, R32_ERR(%r15) + mov R64_TRAPFN(%r15), %rdx /* %rdx := trapfn for later */ L_common_dispatch: + cld /* Ensure the direction flag is clear in the kernel */ + /* * On entering the kernel, we don't need to switch cr3 * because the kernel shares the user's address space. @@ -384,12 +384,12 @@ Entry(ret_to_user) mov %rax, %dr7 /* Set DR7 */ movq $0, %gs:CPU_DR7 4: - cmpl $(SS_64), SS_FLAVOR(%rsp) /* 64-bit state? */ + cmpl $(SS_64), SS_FLAVOR(%r15) /* 64-bit state? */ je L_64bit_return L_32bit_return: #if DEBUG_IDT64 - cmpl $(SS_32), SS_FLAVOR(%rsp) /* 32-bit state? */ + cmpl $(SS_32), SS_FLAVOR(%r15) /* 32-bit state? */ je 1f cli POSTCODE2(0x6432) @@ -399,34 +399,35 @@ L_32bit_return: /* * Restore registers into the machine state for iret. + * Here on fault stack and PCB address in R11. */ - movl R32_EIP(%rsp), %eax - movl %eax, ISC32_RIP(%rsp) - movl R32_EFLAGS(%rsp), %eax - movl %eax, ISC32_RFLAGS(%rsp) - movl R32_CS(%rsp), %eax - movl %eax, ISC32_CS(%rsp) - movl R32_UESP(%rsp), %eax - movl %eax, ISC32_RSP(%rsp) - movl R32_SS(%rsp), %eax - movl %eax, ISC32_SS(%rsp) + movl R32_EIP(%r15), %eax + movl %eax, R64_RIP(%r15) + movl R32_EFLAGS(%r15), %eax + movl %eax, R64_RFLAGS(%r15) + movl R32_CS(%r15), %eax + movl %eax, R64_CS(%r15) + movl R32_UESP(%r15), %eax + movl %eax, R64_RSP(%r15) + movl R32_SS(%r15), %eax + movl %eax, R64_SS(%r15) /* * Restore general 32-bit registers */ - movl R32_EAX(%rsp), %eax - movl R32_EBX(%rsp), %ebx - movl R32_ECX(%rsp), %ecx - movl R32_EDX(%rsp), %edx - movl R32_EBP(%rsp), %ebp - movl R32_ESI(%rsp), %esi - movl R32_EDI(%rsp), %edi + movl R32_EAX(%r15), %eax + movl R32_EBX(%r15), %ebx + movl R32_ECX(%r15), %ecx + movl R32_EDX(%r15), %edx + movl R32_EBP(%r15), %ebp + movl R32_ESI(%r15), %esi + movl R32_EDI(%r15), %edi /* - * Restore segment registers. We make take an exception here but - * we've got enough space left in the save frame area to absorb - * a hardware frame plus the trapfn and trapno + * Restore segment registers. A segment exception taken here will + * push state on the IST1 stack and will not affect the "PCB stack". */ + mov %r15, %rsp /* Set the PCB as the stack */ swapgs EXT(ret32_set_ds): movl R32_DS(%rsp), %ds @@ -438,7 +439,7 @@ EXT(ret32_set_gs): movl R32_GS(%rsp), %gs /* pop compat frame + trapno, trapfn and error */ - add $(ISC32_OFFSET)+8+8+8, %rsp + add $(ISS64_OFFSET)+8+8+8, %rsp cmpl $(SYSENTER_CS),ISF64_CS-8-8-8(%rsp) /* test for fast entry/exit */ je L_fast_exit @@ -452,53 +453,60 @@ L_fast_exit: popf /* flags - carry denotes failure */ pop %rcx /* user return esp */ sti /* interrupts enabled after sysexit */ - .byte 0x0f,0x35 /* 32-bit sysexit */ + sysexitl /* 32-bit sysexit */ ret_to_kernel: #if DEBUG_IDT64 - cmpl $(SS_64), SS_FLAVOR(%rsp) /* 64-bit state? */ + cmpl $(SS_64), SS_FLAVOR(%r15) /* 64-bit state? */ je 1f cli POSTCODE2(0x6464) - CCALL1(panic_idt64, %rsp) + CCALL1(panic_idt64, %r15) hlt 1: - cmpl $(KERNEL64_CS), R64_CS(%rsp) + cmpl $(KERNEL64_CS), R64_CS(%r15) je 2f - CCALL1(panic_idt64, %rsp) + CCALL1(panic_idt64, %r15) hlt 2: #endif L_64bit_return: - testb $3, R64_CS(%rsp) /* returning to user-space? */ - jz 1f - swapgs -1: + /* + * Restore general 64-bit registers. + * Here on fault stack and PCB address in R15. + */ + mov R64_R14(%r15), %r14 + mov R64_R13(%r15), %r13 + mov R64_R12(%r15), %r12 + mov R64_R11(%r15), %r11 + mov R64_R10(%r15), %r10 + mov R64_R9(%r15), %r9 + mov R64_R8(%r15), %r8 + mov R64_RSI(%r15), %rsi + mov R64_RDI(%r15), %rdi + mov R64_RBP(%r15), %rbp + mov R64_RDX(%r15), %rdx + mov R64_RCX(%r15), %rcx + mov R64_RBX(%r15), %rbx + mov R64_RAX(%r15), %rax /* - * Restore general 64-bit registers + * We must swap GS base if we're returning to user-space, + * or we're returning from an NMI that occurred in a trampoline + * before the user GS had been swapped. In the latter case, the NMI + * handler will have flagged the high-order 32-bits of the CS. */ - mov R64_R15(%rsp), %r15 - mov R64_R14(%rsp), %r14 - mov R64_R13(%rsp), %r13 - mov R64_R12(%rsp), %r12 - mov R64_R11(%rsp), %r11 - mov R64_R10(%rsp), %r10 - mov R64_R9(%rsp), %r9 - mov R64_R8(%rsp), %r8 - mov R64_RSI(%rsp), %rsi - mov R64_RDI(%rsp), %rdi - mov R64_RBP(%rsp), %rbp - mov R64_RDX(%rsp), %rdx - mov R64_RBX(%rsp), %rbx - mov R64_RCX(%rsp), %rcx - mov R64_RAX(%rsp), %rax - - add $(ISS64_OFFSET)+24, %rsp /* pop saved state frame + - trapno + trapfn and error */ + cmpq $(KERNEL64_CS), R64_CS(%r15) + jz 1f + swapgs +1: + mov R64_R15(%r15), %rsp + xchg %r15, %rsp + add $(ISS64_OFFSET)+24, %rsp /* pop saved state */ + /* + trapno/trapfn/error */ cmpl $(SYSCALL_CS),ISF64_CS-24(%rsp) - /* test for fast entry/exit */ + /* test for fast entry/exit */ je L_sysret .globl _dump_iretq EXT(ret64_iret): @@ -508,7 +516,7 @@ L_sysret: /* * Here to load rcx/r11/rsp and perform the sysret back to user-space. * rcx user rip - * r1 user rflags + * r11 user rflags * rsp user stack pointer */ mov ISF64_RIP-24(%rsp), %rcx @@ -572,8 +580,8 @@ L_syscall_continue: movq $(T_SYSCALL), ISF64_TRAPNO(%rsp) /* trapno */ leaq HNDL_SYSCALL(%rip), %r11; movq %r11, ISF64_TRAPFN(%rsp) - mov ISF64_RFLAGS(%rsp), %r11 /* Avoid info leak,restore R11 */ - jmp L_64bit_dispatch /* this can only be a 64-bit task */ + mov ISF64_RFLAGS(%rsp), %r11 /* Avoid leak, restore R11 */ + jmp L_dispatch_U64 /* this can only be 64-bit */ /* * sysenter entry point @@ -616,17 +624,11 @@ Entry(idt64_page_fault) PUSH_FUNCTION(HNDL_ALLTRAPS) push $(T_PAGE_FAULT) push %rax /* save %rax temporarily */ - leaq EXT(idt64_unix_scall_copy_args)(%rip), %rax - cmp %rax, 8+ISF64_RIP(%rsp) /* fault during copy args? */ - je 1f /* - yes, handle copy arg fault */ testb $3, 8+ISF64_CS(%rsp) /* was trap from kernel? */ jz L_kernel_trap /* - yes, handle with care */ pop %rax /* restore %rax, swapgs, and continue */ swapgs - jmp L_dispatch_continue -1: - add $(8+ISF64_SIZE), %rsp /* remove entire intr stack frame */ - jmp L_copy_args_continue /* continue system call entry */ + jmp L_dispatch_user /* @@ -675,26 +677,20 @@ Entry(idt64_double_fault) leaq EXT(idt64_syscall)(%rip), %rax cmp %rax, ISF64_RIP+8(%rsp) pop %rax - jne L_64bit_dispatch + jne L_dispatch_kernel mov ISF64_RSP(%rsp), %rsp jmp L_syscall_continue /* - * General protection or segment-not-present fault. - * Check for a GP/NP fault in the kernel_return - * sequence; if there, report it as a GP/NP fault on the user's instruction. - * - * rsp-> 0 ISF64_TRAPNO: trap code (NP or GP) - * 8 ISF64_TRAPFN: trap function - * 16 ISF64_ERR: segment number in error (error code) - * 24 ISF64_RIP: rip - * 32 ISF64_CS: cs - * 40 ISF64_RFLAGS: rflags - * 48 ISF64_RIP: rsp - * 56 ISF64_SS: ss - * 64: old registers (trap is from kernel) + * For GP/NP/SS faults, we use the IST1 stack. + * For faults from user-space, we have to copy the machine state to the + * PCB stack and then dispatch as normal. + * For faults in kernel-space, we need to scrub for kernel exit faults and + * treat these as user-space faults. But for all other kernel-space faults + * we continue to run on the IST1 stack and we dispatch to handle the fault + * as fatal. */ Entry(idt64_gen_prot) PUSH_FUNCTION(HNDL_ALLTRAPS) @@ -711,14 +707,38 @@ Entry(idt64_segnp) pushq $(T_SEGMENT_NOT_PRESENT) /* indicate fault type */ trap_check_kernel_exit: - testb $3,ISF64_CS(%rsp) - jnz L_dispatch - /* - * trap was from kernel mode, - * so check for the kernel exit sequence - */ + testb $3,ISF64_CS(%rsp) + jz L_kernel_gpf + + /* Here for fault from user-space. Copy interrupt state to PCB. */ + swapgs push %rax + mov %rcx, %gs:CPU_UBER_TMP /* save user RCX */ + mov %gs:CPU_UBER_ISF, %rcx /* PCB stack addr */ + mov ISF64_SS+8(%rsp), %rax + mov %rax, ISF64_SS(%rcx) + mov ISF64_RSP+8(%rsp), %rax + mov %rax, ISF64_RSP(%rcx) + mov ISF64_RFLAGS+8(%rsp), %rax + mov %rax, ISF64_RFLAGS(%rcx) + mov ISF64_CS+8(%rsp), %rax + mov %rax, ISF64_CS(%rcx) + mov ISF64_RIP+8(%rsp), %rax + mov %rax, ISF64_RIP(%rcx) + mov ISF64_ERR+8(%rsp), %rax + mov %rax, ISF64_ERR(%rcx) + mov ISF64_TRAPFN+8(%rsp), %rax + mov %rax, ISF64_TRAPFN(%rcx) + mov ISF64_TRAPNO+8(%rsp), %rax + mov %rax, ISF64_TRAPNO(%rcx) + pop %rax + mov %gs:CPU_UBER_TMP, %rsp /* user RCX into RSP */ + xchg %rcx, %rsp /* to PCB stack with user RCX */ + jmp L_dispatch_user +L_kernel_gpf: + /* Here for GPF from kernel_space. Check for recoverable cases. */ + push %rax leaq EXT(ret32_iret)(%rip), %rax cmp %rax, 8+ISF64_RIP(%rsp) je L_fault_iret @@ -738,12 +758,7 @@ trap_check_kernel_exit: cmp %rax, 8+ISF64_RIP(%rsp) je L_32bit_fault_set_seg - leaq EXT(idt64_unix_scall_copy_args)(%rip), %rax - cmp %rax, 8+ISF64_RIP(%rsp) - cmove 8+ISF64_RSP(%rsp), %rsp - je L_copy_args_continue - - /* fall through */ + /* Fall through */ L_kernel_trap: /* @@ -752,19 +767,25 @@ L_kernel_trap: * Note: %rax has been pushed on stack. * Make sure we're not on the PCB stack, if so move to the kernel stack. * This is likely a fatal condition. - * But first, try to ensure we have the kernel gs base active... + * But first, ensure we have the kernel gs base active... */ - movq %gs:CPU_THIS, %rax /* get gs_base into %rax */ - test %rax, %rax /* test sign bit (MSB) */ - js 1f /* -ve kernel addr, no swap */ - swapgs /* +ve user addr, swap */ + push %rcx + push %rdx + mov $(MSR_IA32_GS_BASE), %ecx + rdmsr /* read kernel gsbase */ + test $0x80000000, %edx /* test MSB of address */ + jne 1f + swapgs /* so swap */ 1: + pop %rdx + pop %rcx + movq %gs:CPU_UBER_ISF, %rax /* PCB stack addr */ subq %rsp, %rax cmpq $(PAGE_SIZE), %rax /* current stack in PCB? */ jb 2f /* - yes, deal with it */ pop %rax /* - no, restore %rax */ - jmp L_64bit_dispatch + jmp L_dispatch_kernel 2: /* * Here if %rsp is in the PCB @@ -781,49 +802,74 @@ L_kernel_trap: pushq 8+ISF64_TRAPFN(%rax) pushq 8+ISF64_TRAPNO(%rax) movq (%rax), %rax - jmp L_64bit_dispatch + jmp L_dispatch_kernel + /* * GP/NP fault on IRET: CS or SS is in error. - * Note that the user ss is originally 16-byte aligned, we'd popped the - * stack back to contain just the rip/cs/rflags/rsp/ss before issuing the iret. - * On taking the GP/NP fault on the iret instruction, the stack is 16-byte - * aligned before pushed the interrupt frame. Hence, an 8-byte padding exists. + * User GSBASE is active. + * On IST1 stack containing: + * (rax saved above, which is immediately popped) + * 0 ISF64_TRAPNO: trap code (NP or GP) + * 8 ISF64_TRAPFN: trap function + * 16 ISF64_ERR: segment number in error (error code) + * 24 ISF64_RIP: kernel RIP + * 32 ISF64_CS: kernel CS + * 40 ISF64_RFLAGS: kernel RFLAGS + * 48 ISF64_RSP: kernel RSP + * 56 ISF64_SS: kernel SS + * On the PCB stack, pointed to by the kernel's RSP is: + * 0 user RIP + * 8 user CS + * 16 user RFLAGS + * 24 user RSP + * 32 user SS * - * on SP is - * (- rax saved above, which is immediately popped) + * We need to move the kernel's TRAPNO, TRAPFN and ERR to the PCB and handle + * as a user fault with: * 0 ISF64_TRAPNO: trap code (NP or GP) * 8 ISF64_TRAPFN: trap function * 16 ISF64_ERR: segment number in error (error code) - * 24 ISF64_RIP: rip - * 32 ISF64_CS: cs - * 40 ISF64_RFLAGS: rflags - * 48 ISF64_RSP: rsp <-- new trapno - * 56 ISF64_SS: ss <-- new trapfn - * 64 pad8 <-- new errcode - * 72 user rip - * 80 user cs - * 88 user rflags - * 96 user rsp - * 104 user ss (16-byte aligned) + * 24 user RIP + * 32 user CS + * 40 user RFLAGS + * 48 user RSP + * 56 user SS */ L_fault_iret: pop %rax /* recover saved %rax */ mov %rax, ISF64_RIP(%rsp) /* save rax (we don`t need saved rip) */ - mov ISF64_TRAPNO(%rsp), %rax - mov %rax, ISF64_RSP(%rsp) /* put in user trap number */ - mov ISF64_TRAPFN(%rsp), %rax - mov %rax, ISF64_SS(%rsp) /* put in user trap function */ - mov ISF64_ERR(%rsp), %rax /* get error code */ - mov %rax, 8+ISF64_SS(%rsp) /* put in user errcode */ - mov ISF64_RIP(%rsp), %rax /* restore rax */ - add $(ISF64_RSP),%rsp /* reset to new trapfn */ + mov ISF64_RSP(%rsp), %rax + xchg %rax, %rsp /* switch to PCB stack */ + push ISF64_ERR(%rax) + push ISF64_TRAPFN(%rax) + push ISF64_TRAPNO(%rax) + mov ISF64_RIP(%rax), %rax /* restore rax */ /* now treat as fault from user */ jmp L_dispatch /* * Fault restoring a segment register. All of the saved state is still * on the stack untouched since we haven't yet moved the stack pointer. + * On IST1 stack containing: + * (rax saved above, which is immediately popped) + * 0 ISF64_TRAPNO: trap code (NP or GP) + * 8 ISF64_TRAPFN: trap function + * 16 ISF64_ERR: segment number in error (error code) + * 24 ISF64_RIP: kernel RIP + * 32 ISF64_CS: kernel CS + * 40 ISF64_RFLAGS: kernel RFLAGS + * 48 ISF64_RSP: kernel RSP + * 56 ISF64_SS: kernel SS + * On the PCB stack, pointed to by the kernel's RSP is: + * 0 user trap code + * 8 user trap function + * 16 user err + * 24 user RIP + * 32 user CS + * 40 user RFLAGS + * 48 user RSP + * 56 user SS */ L_32bit_fault_set_seg: swapgs @@ -832,16 +878,15 @@ L_32bit_fault_set_seg: mov ISF64_TRAPFN(%rsp), %rcx mov ISF64_ERR(%rsp), %rdx mov ISF64_RSP(%rsp), %rsp /* reset stack to saved state */ - mov %rax,ISC32_TRAPNO(%rsp) - mov %rcx,ISC32_TRAPFN(%rsp) - mov %rdx,ISC32_ERR(%rsp) + mov %rax,R64_TRAPNO(%rsp) + mov %rcx,R64_TRAPFN(%rsp) + mov %rdx,R64_ERR(%rsp) /* now treat as fault from user */ /* except that all the state is */ /* already saved - we just have to */ /* move the trapno and error into */ /* the compatibility frame */ - jmp L_32bit_dispatch_after_fault - + jmp L_dispatch_U32_after_fault /* * Fatal exception handlers: @@ -862,12 +907,101 @@ Entry(idt64_mc) pushq $(T_MACHINE_CHECK) jmp L_dispatch +/* + * NMI + * This may or may not be fatal but extreme care is required + * because it may fall when control was already in another trampoline. + * + * We get here on IST2 stack which is used for NMIs only. + * We must be aware of the interrupted state: + * - from user-space, we + * - copy state to the PCB and continue; + * - from kernel-space, we + * - copy state to the kernel stack and continue, but + * - check what GSBASE was active, set the kernel base and + * - ensure that the active state is restored when the NMI is dismissed. + */ +Entry(idt64_nmi) + push %rax /* save RAX to ISF64_ERR */ + push %rcx /* save RCX to ISF64_TRAPFN */ + push %rdx /* save RDX to ISF64_TRAPNO */ + testb $3, ISF64_CS(%rsp) /* NMI from user-space? */ + je 1f + + /* From user-space: copy interrupt state to user PCB */ + swapgs + mov %gs:CPU_UBER_ISF, %rcx /* PCB stack addr */ + add $(ISF64_SIZE), %rcx /* adjust to base of ISF */ + swapgs /* swap back for L_dispatch */ + jmp 4f /* Copy state to PCB */ + +1: + /* + * From kernel-space: + * Determine whether the kernel or user GS is set. + * Set the kernel and ensure that we'll swap back correctly at IRET. + */ + mov $(MSR_IA32_GS_BASE), %ecx + rdmsr /* read kernel gsbase */ + test $0x80000000, %edx /* test MSB of address */ + jne 2f + swapgs /* so swap */ + movl $1, ISF64_CS+4(%rsp) /* and set flag in CS slot */ +2: + /* + * Determine whether we're on the kernel or interrupt stack + * when the NMI hit. + */ + mov ISF64_RSP(%rsp), %rcx + mov %gs:CPU_KERNEL_STACK, %rax + xor %rcx, %rax + and EXT(kernel_stack_mask)(%rip), %rax + test %rax, %rax /* are we on the kernel stack? */ + je 3f /* yes */ + + mov %gs:CPU_INT_STACK_TOP, %rax + dec %rax /* intr stack top is byte above max */ + xor %rcx, %rax + and EXT(kernel_stack_mask)(%rip), %rax + test %rax, %rax /* are we on the interrupt stack? */ + je 3f /* yes */ + + mov %gs:CPU_KERNEL_STACK, %rcx +3: + /* 16-byte-align kernel/interrupt stack for state push */ + and $0xFFFFFFFFFFFFFFF0, %rcx + +4: + /* + * Copy state from NMI stack (RSP) to the save area (RCX) which is + * the PCB for user or kernel/interrupt stack from kernel. + * ISF64_ERR(RSP) saved RAX + * ISF64_TRAPFN(RSP) saved RCX + * ISF64_TRAPNO(RSP) saved RDX + */ + xchg %rsp, %rcx /* set for pushes */ + push ISF64_SS(%rcx) + push ISF64_RSP(%rcx) + push ISF64_RFLAGS(%rcx) + push ISF64_CS(%rcx) + push ISF64_RIP(%rcx) + push $(0) /* error code 0 */ + lea HNDL_ALLINTRS(%rip), %rax + push %rax /* trapfn allintrs */ + push $(T_NMI) /* trapno T_NMI */ + mov ISF64_ERR(%rcx), %rax + mov ISF64_TRAPNO(%rcx), %rdx + mov ISF64_TRAPFN(%rcx), %rcx + jmp L_dispatch -/* All 'exceptions' enter hndl_alltraps: - * rsp -> x86_saved_state_t - * esi cs at trap + +/* All 'exceptions' enter hndl_alltraps, with: + * r15 x86_saved_state_t address + * rsp kernel stack if user-space, otherwise interrupt or kernel stack + * esi cs at trap * * The rest of the state is set up as: + * both rsp and r15 are 16-byte aligned * interrupts disabled * direction flag cleared */ @@ -883,10 +1017,7 @@ Entry(hndl_alltraps) mov TH_TASK(%rcx), %rbx TASK_VTIMER_CHECK(%rbx, %rcx) - movq %rsp, %rdi /* also pass it as arg0 */ - movq %gs:CPU_KERNEL_STACK,%rsp /* switch to kernel stack */ - - CCALL(user_trap) /* call user trap routine */ + CCALL1(user_trap, %r15) /* call user trap routine */ /* user_trap() unmasks interrupts */ cli /* hold off intrs - critical section */ xorl %ecx, %ecx /* don't check if we're in the PFZ */ @@ -895,37 +1026,39 @@ Entry(hndl_alltraps) #define STI sti Entry(return_from_trap) - movq %gs:CPU_ACTIVE_THREAD,%rsp - movq TH_PCB_ISS(%rsp), %rsp /* switch back to PCB stack */ + movq %gs:CPU_ACTIVE_THREAD,%r15 /* Get current thread */ + movl $-1, TH_IOTIER_OVERRIDE(%r15) /* Clear IO tier override before returning to userspace */ + cmpl $0, TH_RWLOCK_COUNT(%r15) /* Check if current thread has pending RW locks held */ + jz 1f + xorq %rbp, %rbp /* clear framepointer */ + mov %r15, %rdi /* Set RDI to current thread */ + CCALL(lck_rw_clear_promotions_x86) /* Clear promotions if needed */ +1: + movq TH_PCB_ISS(%r15), %r15 /* PCB stack */ movl %gs:CPU_PENDING_AST,%eax testl %eax,%eax - je EXT(return_to_user) /* branch if no AST */ + je EXT(return_to_user) /* branch if no AST */ L_return_from_trap_with_ast: - movq %rsp, %r13 - movq %gs:CPU_KERNEL_STACK, %rsp - testl %ecx, %ecx /* see if we need to check for an EIP in the PFZ */ je 2f /* no, go handle the AST */ - cmpl $(SS_64), SS_FLAVOR(%r13) /* are we a 64-bit task? */ + cmpl $(SS_64), SS_FLAVOR(%r15) /* are we a 64-bit task? */ je 1f /* no... 32-bit user mode */ - movl R32_EIP(%r13), %edi + movl R32_EIP(%r15), %edi xorq %rbp, %rbp /* clear framepointer */ CCALL(commpage_is_in_pfz32) testl %eax, %eax je 2f /* not in the PFZ... go service AST */ - movl %eax, R32_EBX(%r13) /* let the PFZ know we've pended an AST */ - movq %r13, %rsp /* switch back to PCB stack */ + movl %eax, R32_EBX(%r15) /* let the PFZ know we've pended an AST */ jmp EXT(return_to_user) 1: - movq R64_RIP(%r13), %rdi + movq R64_RIP(%r15), %rdi xorq %rbp, %rbp /* clear framepointer */ CCALL(commpage_is_in_pfz64) testl %eax, %eax je 2f /* not in the PFZ... go service AST */ - movl %eax, R64_RBX(%r13) /* let the PFZ know we've pended an AST */ - movq %r13, %rsp /* switch back to PCB stack */ + movl %eax, R64_RBX(%r15) /* let the PFZ know we've pended an AST */ jmp EXT(return_to_user) 2: STI /* interrupts always enabled on return to user mode */ @@ -935,6 +1068,7 @@ L_return_from_trap_with_ast: CCALL(i386_astintr) /* take the AST */ CLI + mov %rsp, %r15 /* AST changes stack, saved state */ xorl %ecx, %ecx /* don't check if we're in the PFZ */ jmp EXT(return_from_trap) /* and check again (rare) */ @@ -942,25 +1076,25 @@ L_return_from_trap_with_ast: * Trap from kernel mode. No need to switch stacks. * Interrupts must be off here - we will set them to state at time of trap * as soon as it's safe for us to do so and not recurse doing preemption + * */ -hndl_kerntrap: trap_from_kernel: - - movq %rsp, %rdi /* saved state addr */ - pushq R64_RIP(%rsp) /* Simulate a CALL from fault point */ + movq %r15, %rdi /* saved state addr */ + pushq R64_RIP(%r15) /* Simulate a CALL from fault point */ pushq %rbp /* Extend framepointer chain */ movq %rsp, %rbp CCALLWITHSP(kernel_trap) /* to kernel trap routine */ popq %rbp addq $8, %rsp + mov %rsp, %r15 /* DTrace slides stack/saved-state */ cli movl %gs:CPU_PENDING_AST,%eax /* get pending asts */ testl $(AST_URGENT),%eax /* any urgent preemption? */ je ret_to_kernel /* no, nothing to do */ - cmpl $(T_PREEMPT),R64_TRAPNO(%rsp) + cmpl $(T_PREEMPT),R64_TRAPNO(%r15) je ret_to_kernel /* T_PREEMPT handled in kernel_trap() */ - testl $(EFL_IF),R64_RFLAGS(%rsp) /* interrupts disabled? */ + testl $(EFL_IF),R64_RFLAGS(%r15) /* interrupts disabled? */ je ret_to_kernel cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */ jne ret_to_kernel @@ -972,14 +1106,18 @@ trap_from_kernel: jne ret_to_kernel /* no, skip it */ CCALL1(i386_astintr, $1) /* take the AST */ + + mov %rsp, %r15 /* AST changes stack, saved state */ jmp ret_to_kernel /* * All interrupts on all tasks enter here with: - * rsp-> x86_saved_state_t + * r15 x86_saved_state_t + * rsp kernel or interrupt stack * esi cs at trap * + * both rsp and r15 are 16-byte aligned * interrupts disabled * direction flag cleared */ @@ -1000,9 +1138,9 @@ Entry(hndl_allintrs) orl $(CR0_TS),%eax /* or in TS bit */ mov %rax,%cr0 /* set cr0 */ - subq $8, %rsp /* for 16-byte stack alignment */ pushq %rcx /* save pointer to old stack */ - movq %rcx,%gs:CPU_INT_STATE /* save intr state */ + pushq %gs:CPU_INT_STATE /* save previous intr state */ + movq %r15,%gs:CPU_INT_STATE /* set intr state */ TIME_INT_ENTRY /* do timing */ @@ -1014,13 +1152,9 @@ Entry(hndl_allintrs) incl %gs:CPU_PREEMPTION_LEVEL incl %gs:CPU_INTERRUPT_LEVEL - movq %gs:CPU_INT_STATE, %rdi - - CCALL(interrupt) /* call generic interrupt routine */ + CCALL1(interrupt, %r15) /* call generic interrupt routine */ cli /* just in case we returned with intrs enabled */ - xor %rax,%rax - movq %rax,%gs:CPU_INT_STATE /* clear intr state pointer */ .globl EXT(return_to_iret) LEXT(return_to_iret) /* (label for kdb_kintr and hardclock) */ @@ -1030,6 +1164,9 @@ LEXT(return_to_iret) /* (label for kdb_kintr and hardclock) */ TIME_INT_EXIT /* do timing */ + popq %gs:CPU_INT_STATE /* reset/clear intr state pointer */ + popq %rsp /* switch back to old stack */ + movq %gs:CPU_ACTIVE_THREAD,%rax movq TH_PCB_FPS(%rax),%rax /* get pcb's ifps */ cmpq $0,%rax /* Is there a context */ @@ -1044,24 +1181,22 @@ LEXT(return_to_iret) /* (label for kdb_kintr and hardclock) */ orl $(CR0_TS),%eax /* or in TS bit */ mov %rax,%cr0 /* set cr0 */ 2: - popq %rsp /* switch back to old stack */ - /* Load interrupted code segment into %eax */ - movl R32_CS(%rsp),%eax /* assume 32-bit state */ - cmpl $(SS_64),SS_FLAVOR(%rsp)/* 64-bit? */ + movl R32_CS(%r15),%eax /* assume 32-bit state */ + cmpl $(SS_64),SS_FLAVOR(%r15)/* 64-bit? */ #if DEBUG_IDT64 jne 4f - movl R64_CS(%rsp),%eax /* 64-bit user mode */ + movl R64_CS(%r15),%eax /* 64-bit user mode */ jmp 3f 4: - cmpl $(SS_32),SS_FLAVOR(%rsp) + cmpl $(SS_32),SS_FLAVOR(%r15) je 3f POSTCODE2(0x6431) - CCALL1(panic_idt64, %rsp) + CCALL1(panic_idt64, %r15) hlt #else jne 3f - movl R64_CS(%rsp),%eax /* 64-bit user mode */ + movl R64_CS(%r15),%eax /* 64-bit user mode */ #endif 3: testb $3,%al /* user mode, */ @@ -1078,13 +1213,6 @@ LEXT(return_to_iret) /* (label for kdb_kintr and hardclock) */ cmpl $0,%gs:CPU_PREEMPTION_LEVEL /* preemption disabled? */ jne ret_to_kernel /* yes, skip it */ - movq %gs:CPU_KERNEL_STACK,%rax - movq %rsp,%rcx - xorq %rax,%rcx - andq EXT(kernel_stack_mask)(%rip),%rcx - testq %rcx,%rcx /* are we on the kernel stack? */ - jne ret_to_kernel /* no, skip it */ - /* * Take an AST from kernel space. We don't need (and don't want) * to do as much as the case where the interrupt came from user @@ -1092,6 +1220,7 @@ LEXT(return_to_iret) /* (label for kdb_kintr and hardclock) */ */ CCALL1(i386_astintr, $1) + mov %rsp, %r15 /* AST changes stack, saved state */ jmp ret_to_kernel @@ -1102,17 +1231,18 @@ int_from_intstack: incl %gs:CPU_PREEMPTION_LEVEL incl %gs:CPU_INTERRUPT_LEVEL incl %gs:CPU_NESTED_ISTACK - mov %rsp, %rdi /* x86_saved_state */ - CCALL(interrupt) + + push %gs:CPU_INT_STATE + mov %r15, %gs:CPU_INT_STATE + + CCALL1(interrupt, %r15) + + pop %gs:CPU_INT_STATE decl %gs:CPU_INTERRUPT_LEVEL decl %gs:CPU_PREEMPTION_LEVEL decl %gs:CPU_NESTED_ISTACK -#if DEBUG_IDT64 - CCALL1(panic_idt64, %rsp) - POSTCODE2(0x6411) - hlt -#endif + jmp ret_to_kernel /* @@ -1136,7 +1266,10 @@ ast_from_interrupt_user: * 32bit Tasks * System call entries via INTR_GATE or sysenter: * - * rsp -> x86_saved_state32_t + * r15 x86_saved_state32_t + * rsp kernel stack + * + * both rsp and r15 are 16-byte aligned * interrupts disabled * direction flag cleared */ @@ -1146,38 +1279,15 @@ Entry(hndl_sysenter) * We can be here either for a mach syscall or a unix syscall, * as indicated by the sign of the code: */ - movl R32_EAX(%rsp),%eax + movl R32_EAX(%r15),%eax testl %eax,%eax js EXT(hndl_mach_scall) /* < 0 => mach */ /* > 0 => unix */ Entry(hndl_unix_scall) -/* If the caller (typically LibSystem) has recorded the cumulative size of - * the arguments in EAX, copy them over from the user stack directly. - * We recover from exceptions inline--if the copy loop doesn't complete - * due to an exception, we fall back to copyin from compatibility mode. - * We can potentially extend this mechanism to mach traps as well (DRK). - */ - testl $(I386_SYSCALL_ARG_BYTES_MASK), %eax - jz L_copy_args_continue - movl %eax, %ecx - mov %gs:CPU_UBER_ARG_STORE_VALID, %rbx - shrl $(I386_SYSCALL_ARG_DWORDS_SHIFT), %ecx - andl $(I386_SYSCALL_ARG_DWORDS_MASK), %ecx - mov %gs:CPU_UBER_ARG_STORE, %rdi - mov ISC32_RSP(%rsp), %rsi - add $4, %rsi - movl $0, (%rbx) - -EXT(idt64_unix_scall_copy_args): - rep movsl - movl $1, (%rbx) -L_copy_args_continue: TIME_TRAP_UENTRY - movq %gs:CPU_KERNEL_STACK,%rdi - xchgq %rdi,%rsp /* switch to kernel stack */ movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */ movq TH_TASK(%rcx),%rbx /* point to current task */ incl TH_SYSCALLS_UNIX(%rcx) /* increment call count */ @@ -1187,7 +1297,7 @@ L_copy_args_continue: sti - CCALL(unix_syscall) + CCALL1(unix_syscall, %r15) /* * always returns through thread_exception_return */ @@ -1196,8 +1306,6 @@ L_copy_args_continue: Entry(hndl_mach_scall) TIME_TRAP_UENTRY - movq %gs:CPU_KERNEL_STACK,%rdi - xchgq %rdi,%rsp /* switch to kernel stack */ movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */ movq TH_TASK(%rcx),%rbx /* point to current task */ incl TH_SYSCALLS_MACH(%rcx) /* increment call count */ @@ -1207,7 +1315,7 @@ Entry(hndl_mach_scall) sti - CCALL(mach_call_munger) + CCALL1(mach_call_munger, %r15) /* * always returns through thread_exception_return */ @@ -1216,9 +1324,6 @@ Entry(hndl_mach_scall) Entry(hndl_mdep_scall) TIME_TRAP_UENTRY - movq %gs:CPU_KERNEL_STACK,%rdi - xchgq %rdi,%rsp /* switch to kernel stack */ - /* Check for active vtimers in the current task */ movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */ movq TH_TASK(%rcx),%rbx /* point to current task */ @@ -1226,7 +1331,7 @@ Entry(hndl_mdep_scall) sti - CCALL(machdep_syscall) + CCALL1(machdep_syscall, %r15) /* * always returns through thread_exception_return */ @@ -1235,7 +1340,10 @@ Entry(hndl_mdep_scall) * 64bit Tasks * System call entries via syscall only: * - * rsp -> x86_saved_state64_t + * r15 x86_saved_state64_t + * rsp kernel stack + * + * both rsp and r15 are 16-byte aligned * interrupts disabled * direction flag cleared */ @@ -1243,8 +1351,6 @@ Entry(hndl_mdep_scall) Entry(hndl_syscall) TIME_TRAP_UENTRY - movq %gs:CPU_KERNEL_STACK,%rdi - xchgq %rdi,%rsp /* switch to kernel stack */ movq %gs:CPU_ACTIVE_THREAD,%rcx /* get current thread */ movq TH_TASK(%rcx),%rbx /* point to current task */ @@ -1255,7 +1361,7 @@ Entry(hndl_syscall) * We can be here either for a mach, unix machdep or diag syscall, * as indicated by the syscall class: */ - movl R64_RAX(%rdi), %eax /* syscall number/class */ + movl R64_RAX(%r15), %eax /* syscall number/class */ movl %eax, %edx andl $(SYSCALL_CLASS_MASK), %edx /* syscall class */ cmpl $(SYSCALL_CLASS_MACH< +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include + + + +/* Fixed counter mask -- three counters, each with OS and USER */ +#define IA32_FIXED_CTR_ENABLE_ALL_CTRS_ALL_RINGS (0x333) +#define IA32_FIXED_CTR_ENABLE_ALL_PMI (0x888) + +#define IA32_PERFEVTSEL_PMI (1ull << 20) +#define IA32_PERFEVTSEL_EN (1ull << 22) + +/* Non-serialising */ +#define USE_RDPMC + +#define RDPMC_FIXED_COUNTER_SELECTOR (1ULL<<30) + +/* track the last config we enabled */ +static uint32_t kpc_running = 0; + +/* PMC / MSR accesses */ + +static uint64_t +IA32_FIXED_CTR_CTRL(void) +{ + return rdmsr64( MSR_IA32_PERF_FIXED_CTR_CTRL ); +} + +static uint64_t +IA32_FIXED_CTRx(uint32_t ctr) +{ +#ifdef USE_RDPMC + return rdpmc64(RDPMC_FIXED_COUNTER_SELECTOR | ctr); +#else /* !USE_RDPMC */ + return rdmsr64(MSR_IA32_PERF_FIXED_CTR0 + ctr); +#endif /* !USE_RDPMC */ +} + +#ifdef FIXED_COUNTER_RELOAD +static void +wrIA32_FIXED_CTRx(uint32_t ctr, uint64_t value) +{ + return wrmsr64(MSR_IA32_PERF_FIXED_CTR0 + ctr, value); +} +#endif + +static uint64_t +IA32_PMCx(uint32_t ctr) +{ +#ifdef USE_RDPMC + return rdpmc64(ctr); +#else /* !USE_RDPMC */ + return rdmsr64(MSR_IA32_PERFCTR0 + ctr); +#endif /* !USE_RDPMC */ +} + +static void +wrIA32_PMCx(uint32_t ctr, uint64_t value) +{ + return wrmsr64(MSR_IA32_PERFCTR0 + ctr, value); +} + +static uint64_t +IA32_PERFEVTSELx(uint32_t ctr) +{ + return rdmsr64(MSR_IA32_EVNTSEL0 + ctr); +} + +static void +wrIA32_PERFEVTSELx(uint32_t ctr, uint64_t value) +{ + wrmsr64(MSR_IA32_EVNTSEL0 + ctr, value); +} + + +/* internal functions */ + +boolean_t +kpc_is_running_fixed(void) +{ + return (kpc_running & KPC_CLASS_FIXED_MASK) == KPC_CLASS_FIXED_MASK; +} + +boolean_t +kpc_is_running_configurable(void) +{ + return (kpc_running & KPC_CLASS_CONFIGURABLE_MASK) == KPC_CLASS_CONFIGURABLE_MASK; +} + +uint32_t +kpc_fixed_count(void) +{ + i386_cpu_info_t *info = NULL; + + info = cpuid_info(); + + return info->cpuid_arch_perf_leaf.fixed_number; +} + +uint32_t +kpc_configurable_count(void) +{ + i386_cpu_info_t *info = NULL; + + info = cpuid_info(); + + return info->cpuid_arch_perf_leaf.number; +} + +uint32_t +kpc_fixed_config_count(void) +{ + return KPC_X86_64_FIXED_CONFIGS; +} + +uint32_t +kpc_configurable_config_count(void) +{ + return kpc_configurable_count(); +} + +static uint8_t +kpc_fixed_width(void) +{ + i386_cpu_info_t *info = NULL; + + info = cpuid_info(); + + return info->cpuid_arch_perf_leaf.fixed_width; +} + +static uint8_t +kpc_configurable_width(void) +{ + i386_cpu_info_t *info = NULL; + + info = cpuid_info(); + + return info->cpuid_arch_perf_leaf.width; +} + +uint64_t +kpc_fixed_max(void) +{ + return (1ULL << kpc_fixed_width()) - 1; +} + +uint64_t +kpc_configurable_max(void) +{ + return (1ULL << kpc_configurable_width()) - 1; +} + +#ifdef FIXED_COUNTER_SHADOW +static uint64_t +kpc_reload_fixed(int ctr) +{ + uint64_t old = IA32_FIXED_CTRx(ctr); + wrIA32_FIXED_CTRx(ctr, FIXED_RELOAD(ctr)); + return old; +} +#endif + +static uint64_t +kpc_reload_configurable(int ctr) +{ + uint64_t cfg = IA32_PERFEVTSELx(ctr); + + /* counters must be disabled before they can be written to */ + uint64_t old = IA32_PMCx(ctr); + wrIA32_PERFEVTSELx(ctr, cfg & ~IA32_PERFEVTSEL_EN); + wrIA32_PMCx(ctr, CONFIGURABLE_RELOAD(ctr)); + wrIA32_PERFEVTSELx(ctr, cfg); + return old; +} + +void kpc_pmi_handler(x86_saved_state_t *state); + +static void +set_running_fixed(boolean_t on) +{ + uint64_t global = 0, mask = 0, fixed_ctrl = 0; + int i; + boolean_t enabled; + + if( on ) + /* these are per-thread in SMT */ + fixed_ctrl = IA32_FIXED_CTR_ENABLE_ALL_CTRS_ALL_RINGS | IA32_FIXED_CTR_ENABLE_ALL_PMI; + else + /* don't allow disabling fixed counters */ + return; + + wrmsr64( MSR_IA32_PERF_FIXED_CTR_CTRL, fixed_ctrl ); + + enabled = ml_set_interrupts_enabled(FALSE); + + /* rmw the global control */ + global = rdmsr64(MSR_IA32_PERF_GLOBAL_CTRL); + for( i = 0; i < (int) kpc_fixed_count(); i++ ) + mask |= (1ULL<<(32+i)); + + if( on ) + global |= mask; + else + global &= ~mask; + + wrmsr64(MSR_IA32_PERF_GLOBAL_CTRL, global); + + ml_set_interrupts_enabled(enabled); +} + +static void +set_running_configurable(boolean_t on) +{ + uint64_t global = 0, mask = 0; + uint64_t cfg, save; + int i; + boolean_t enabled; + int ncnt = (int) kpc_get_counter_count(KPC_CLASS_CONFIGURABLE_MASK); + + enabled = ml_set_interrupts_enabled(FALSE); + + /* rmw the global control */ + global = rdmsr64(MSR_IA32_PERF_GLOBAL_CTRL); + for( i = 0; i < ncnt; i++ ) { + mask |= (1ULL<classes; + kpc_config_t *new_config = mp_config->configv; + int count = 0; + boolean_t enabled; + + enabled = ml_set_interrupts_enabled(FALSE); + + if( classes & KPC_CLASS_FIXED_MASK ) + { + kpc_set_fixed_config(&new_config[count]); + count += kpc_get_config_count(KPC_CLASS_FIXED_MASK); + } + + if( classes & KPC_CLASS_CONFIGURABLE_MASK ) + { + kpc_set_configurable_config(&new_config[count]); + count += kpc_get_config_count(KPC_CLASS_CONFIGURABLE_MASK); + } + + ml_set_interrupts_enabled(enabled); +} + +static void +kpc_set_reload_mp_call(void *vmp_config) +{ + struct kpc_config_remote *mp_config = vmp_config; + uint64_t max = kpc_configurable_max(); + uint32_t i, count = kpc_get_counter_count(KPC_CLASS_CONFIGURABLE_MASK); + uint64_t *new_period; + uint64_t classes; + int enabled; + + classes = mp_config->classes; + new_period = mp_config->configv; + + if (classes & KPC_CLASS_CONFIGURABLE_MASK) { + enabled = ml_set_interrupts_enabled(FALSE); + + kpc_get_configurable_counters(&CONFIGURABLE_SHADOW(0)); + + for (i = 0; i < count; i++) { + if (new_period[i] == 0) + new_period[i] = kpc_configurable_max(); + + CONFIGURABLE_RELOAD(i) = max - new_period[i]; + + kpc_reload_configurable(i); + + /* clear overflow bit just in case */ + wrmsr64(MSR_IA32_PERF_GLOBAL_OVF_CTRL, 1ull << i); + } + + ml_set_interrupts_enabled(enabled); + } +} + +int +kpc_set_period_arch( struct kpc_config_remote *mp_config ) +{ + mp_cpus_call( CPUMASK_ALL, ASYNC, kpc_set_reload_mp_call, mp_config ); + + return 0; +} + + +/* interface functions */ + +uint32_t +kpc_get_classes(void) +{ + return KPC_CLASS_FIXED_MASK | KPC_CLASS_CONFIGURABLE_MASK; +} + +int +kpc_set_running(uint32_t new_state) +{ + lapic_set_pmi_func((i386_intr_func_t)kpc_pmi_handler); + + /* dispatch to all CPUs */ + mp_cpus_call( CPUMASK_ALL, ASYNC, kpc_set_running_mp_call, &new_state ); + + kpc_running = new_state; + + return 0; +} + +int +kpc_set_config_arch(struct kpc_config_remote *mp_config) +{ + mp_cpus_call( CPUMASK_ALL, ASYNC, kpc_set_config_mp_call, mp_config ); + + return 0; +} + +/* PMI stuff */ +void kpc_pmi_handler(__unused x86_saved_state_t *state) +{ + uint64_t status, extra; + uint32_t ctr; + int enabled; + + enabled = ml_set_interrupts_enabled(FALSE); + + status = rdmsr64(MSR_IA32_PERF_GLOBAL_STATUS); + +#ifdef FIXED_COUNTER_SHADOW + for (ctr = 0; ctr < kpc_fixed_count(); ctr++) { + if ((1ULL << (ctr + 32)) & status) { + extra = kpc_reload_fixed(ctr); + + FIXED_SHADOW(ctr) + += kpc_fixed_max() - FIXED_RELOAD(ctr) + extra; + + BUF_INFO(PERF_KPC_FCOUNTER, ctr, FIXED_SHADOW(ctr), extra, FIXED_ACTIONID(ctr)); + + if (FIXED_ACTIONID(ctr)) + kpc_sample_kperf(FIXED_ACTIONID(ctr)); + } + } +#endif + + for (ctr = 0; ctr < kpc_configurable_count(); ctr++) { + if ((1ULL << ctr) & status) { + extra = kpc_reload_configurable(ctr); + + CONFIGURABLE_SHADOW(ctr) + += kpc_configurable_max() - CONFIGURABLE_RELOAD(ctr) + extra; + + /* kperf can grab the PMCs when it samples so we need to make sure the overflow + * bits are in the correct state before the call to kperf_sample */ + wrmsr64(MSR_IA32_PERF_GLOBAL_OVF_CTRL, 1ull << ctr); + + BUF_INFO(PERF_KPC_COUNTER, ctr, CONFIGURABLE_SHADOW(ctr), extra, CONFIGURABLE_ACTIONID(ctr)); + + if (CONFIGURABLE_ACTIONID(ctr)) + kpc_sample_kperf(CONFIGURABLE_ACTIONID(ctr)); + } + } + + ml_set_interrupts_enabled(enabled); +} + + + + diff --git a/osfmk/x86_64/locore.s b/osfmk/x86_64/locore.s index 8ca0c92a4..f6af50273 100644 --- a/osfmk/x86_64/locore.s +++ b/osfmk/x86_64/locore.s @@ -127,7 +127,6 @@ ENTRY(rdmsr_carefully) rdmsr_fail: movq $1, %rax ret - /* * int rdmsr64_carefully(uint32_t msr, uint64_t *val); */ @@ -217,6 +216,54 @@ _pmap_safe_read_fail: xor %eax, %eax ret +/* + * 2-byte copy used by ml_copy_phys(). + * rdi: source address + * rsi: destination address + */ +Entry(_bcopy2) + RECOVERY_SECTION + RECOVER(_bcopy_fail) + movw (%rdi), %cx + RECOVERY_SECTION + RECOVER(_bcopy_fail) + movw %cx, (%rsi) + + xorl %eax,%eax /* return 0 for success */ + ret /* and return */ + +/* + * 4-byte copy used by ml_copy_phys(). + * rdi: source address + * rsi: destination address + */ +Entry(_bcopy4) + RECOVERY_SECTION + RECOVER(_bcopy_fail) + movl (%rdi), %ecx + RECOVERY_SECTION + RECOVER(_bcopy_fail) + mov %ecx, (%rsi) + + xorl %eax,%eax /* return 0 for success */ + ret /* and return */ + +/* + * 8-byte copy used by ml_copy_phys(). + * rdi: source address + * rsi: destination address + */ +Entry(_bcopy8) + RECOVERY_SECTION + RECOVER(_bcopy_fail) + movq (%rdi), %rcx + RECOVERY_SECTION + RECOVER(_bcopy_fail) + mov %rcx, (%rsi) + + xorl %eax,%eax /* return 0 for success */ + ret /* and return */ + /* diff --git a/osfmk/x86_64/loose_ends.c b/osfmk/x86_64/loose_ends.c index b912c6d9b..cd61326f3 100644 --- a/osfmk/x86_64/loose_ends.c +++ b/osfmk/x86_64/loose_ends.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -78,6 +78,10 @@ #include #include +#if !MACH_KDP +#include +#endif /* !MACH_KDP */ + #if 0 #undef KERNEL_DEBUG @@ -98,7 +102,7 @@ extern void mapping_set_ref(ppnum_t pn); extern void ovbcopy(const char *from, char *to, vm_size_t nbytes); -void machine_callstack(natural_t *buf, vm_size_t callstack_max); +void machine_callstack(uintptr_t *buf, vm_size_t callstack_max); #define value_64bit(value) ((value) & 0xFFFFFFFF00000000ULL) @@ -246,7 +250,7 @@ ovbcopy( static inline unsigned int ml_phys_read_data(pmap_paddr_t paddr, int size) { - unsigned int result; + unsigned int result = 0; if (!physmap_enclosed(paddr)) panic("%s: 0x%llx out of bounds\n", __FUNCTION__, paddr); @@ -519,7 +523,7 @@ hw_compare_and_store(uint32_t oldval, uint32_t newval, volatile uint32_t *dest) * levels of return pc information. */ void machine_callstack( - __unused natural_t *buf, + __unused uintptr_t *buf, __unused vm_size_t callstack_max) { } @@ -538,18 +542,6 @@ void fillPage(ppnum_t pa, unsigned int fill) *addr++ = fill; } -static inline void __sfence(void) -{ - __asm__ volatile("sfence"); -} -static inline void __mfence(void) -{ - __asm__ volatile("mfence"); -} -static inline void __wbinvd(void) -{ - __asm__ volatile("wbinvd"); -} static inline void __clflush(void *ptr) { __asm__ volatile("clflush (%0)" : : "r" (ptr)); @@ -560,14 +552,14 @@ void dcache_incoherent_io_store64(addr64_t pa, unsigned int count) addr64_t linesize = cpuid_info()->cache_linesize; addr64_t bound = (pa + count + linesize - 1) & ~(linesize - 1); - __mfence(); + mfence(); while (pa < bound) { __clflush(PHYSMAP_PTOV(pa)); pa += linesize; } - __mfence(); + mfence(); } void dcache_incoherent_io_flush64(addr64_t pa, unsigned int count) @@ -584,12 +576,12 @@ flush_dcache64(addr64_t addr, unsigned count, int phys) else { uint64_t linesize = cpuid_info()->cache_linesize; addr64_t bound = (addr + count + linesize -1) & ~(linesize - 1); - __mfence(); + mfence(); while (addr < bound) { __clflush((void *) (uintptr_t) addr); addr += linesize; } - __mfence(); + mfence(); } } @@ -615,15 +607,22 @@ mapping_set_ref(ppnum_t pn) pmap_set_reference(pn); } +extern i386_cpu_info_t cpuid_cpu_info; void cache_flush_page_phys(ppnum_t pa) { boolean_t istate; unsigned char *cacheline_addr; - int cacheline_size = cpuid_info()->cache_linesize; - int cachelines_to_flush = PAGE_SIZE/cacheline_size; + i386_cpu_info_t *cpuid_infop = cpuid_info(); + int cacheline_size; + int cachelines_to_flush; + + cacheline_size = cpuid_infop->cache_linesize; + if (cacheline_size == 0) + panic("cacheline_size=0 cpuid_infop=%p\n", cpuid_infop); + cachelines_to_flush = PAGE_SIZE/cacheline_size; - __mfence(); + mfence(); istate = ml_set_interrupts_enabled(FALSE); @@ -635,14 +634,15 @@ cache_flush_page_phys(ppnum_t pa) (void) ml_set_interrupts_enabled(istate); - __mfence(); + mfence(); } #if !MACH_KDP void -kdp_register_callout(void) +kdp_register_callout(kdp_callout_fn_t fn, void *arg) { +#pragma unused(fn,arg) } #endif diff --git a/osfmk/x86_64/machine_kpc.h b/osfmk/x86_64/machine_kpc.h new file mode 100644 index 000000000..0ad5ccc33 --- /dev/null +++ b/osfmk/x86_64/machine_kpc.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2012 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ +#ifndef _MACHINE_X86_64_KPC_H +#define _MACHINE_X86_64_KPC_H + +/* x86 config registers are 64-bit */ +typedef uint64_t kpc_config_t; + +/* Size to the maximum number of counters we could read from every + * class in one go + */ +#define KPC_MAX_COUNTERS (32) + +/* number of fixed config registers on x86_64 */ +#define KPC_X86_64_FIXED_CONFIGS (1) + +#define FIXED_ACTIONID(ctr) (kpc_actionid[(ctr)]) +#define CONFIGURABLE_ACTIONID(ctr) (kpc_actionid[(ctr) + kpc_fixed_count()]) + +#define FIXED_RELOAD(ctr) (current_cpu_datap()->cpu_kpc_reload[(ctr)]) +#define CONFIGURABLE_RELOAD(ctr) (current_cpu_datap()->cpu_kpc_reload[(ctr) + kpc_fixed_count()]) + +#define FIXED_SHADOW(ctr) (current_cpu_datap()->cpu_kpc_shadow[(ctr)]) +#define CONFIGURABLE_SHADOW(ctr) (current_cpu_datap()->cpu_kpc_shadow[(ctr) + kpc_fixed_count()]) + +#endif /* _MACHINE_X86_64_KPC_H */ diff --git a/osfmk/x86_64/machine_routines_asm.s b/osfmk/x86_64/machine_routines_asm.s index 1d2e3ed3f..0d304f26a 100644 --- a/osfmk/x86_64/machine_routines_asm.s +++ b/osfmk/x86_64/machine_routines_asm.s @@ -268,3 +268,14 @@ Entry(__vmxoff) ret #endif /* CONFIG_VMX */ + +/* + * mfence -- Memory Barrier + * Use out-of-line assembly to get + * standard x86-64 ABI guarantees + * about what the caller's codegen + * has in registers vs. memory + */ +Entry(do_mfence) + mfence + ret diff --git a/osfmk/x86_64/pmap.c b/osfmk/x86_64/pmap.c index 3f05e7f27..32d345f9c 100644 --- a/osfmk/x86_64/pmap.c +++ b/osfmk/x86_64/pmap.c @@ -493,6 +493,131 @@ pmap_virtual_space( *endp = virtual_end; } + + + +#if HIBERNATION + +#include + +int32_t pmap_npages; +int32_t pmap_teardown_last_valid_compact_indx = -1; + + +void hibernate_rebuild_pmap_structs(void); +void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *); +void pmap_pack_index(uint32_t); +int32_t pmap_unpack_index(pv_rooted_entry_t); + + +int32_t +pmap_unpack_index(pv_rooted_entry_t pv_h) +{ + int32_t indx = 0; + + indx = (int32_t)(*((uint64_t *)(&pv_h->qlink.next)) >> 48); + indx = indx << 16; + indx |= (int32_t)(*((uint64_t *)(&pv_h->qlink.prev)) >> 48); + + *((uint64_t *)(&pv_h->qlink.next)) |= ((uint64_t)0xffff << 48); + *((uint64_t *)(&pv_h->qlink.prev)) |= ((uint64_t)0xffff << 48); + + return (indx); +} + + +void +pmap_pack_index(uint32_t indx) +{ + pv_rooted_entry_t pv_h; + + pv_h = &pv_head_table[indx]; + + *((uint64_t *)(&pv_h->qlink.next)) &= ~((uint64_t)0xffff << 48); + *((uint64_t *)(&pv_h->qlink.prev)) &= ~((uint64_t)0xffff << 48); + + *((uint64_t *)(&pv_h->qlink.next)) |= ((uint64_t)(indx >> 16)) << 48; + *((uint64_t *)(&pv_h->qlink.prev)) |= ((uint64_t)(indx & 0xffff)) << 48; +} + + +void +hibernate_teardown_pmap_structs(addr64_t *unneeded_start, addr64_t *unneeded_end) +{ + int32_t i; + int32_t compact_target_indx; + + compact_target_indx = 0; + + for (i = 0; i < pmap_npages; i++) { + if (pv_head_table[i].pmap == PMAP_NULL) { + + if (pv_head_table[compact_target_indx].pmap != PMAP_NULL) + compact_target_indx = i; + } else { + pmap_pack_index((uint32_t)i); + + if (pv_head_table[compact_target_indx].pmap == PMAP_NULL) { + /* + * we've got a hole to fill, so + * move this pv_rooted_entry_t to it's new home + */ + pv_head_table[compact_target_indx] = pv_head_table[i]; + pv_head_table[i].pmap = PMAP_NULL; + + pmap_teardown_last_valid_compact_indx = compact_target_indx; + compact_target_indx++; + } else + pmap_teardown_last_valid_compact_indx = i; + } + } + *unneeded_start = (addr64_t)&pv_head_table[pmap_teardown_last_valid_compact_indx+1]; + *unneeded_end = (addr64_t)&pv_head_table[pmap_npages-1]; + + HIBLOG("hibernate_teardown_pmap_structs done: last_valid_compact_indx %d\n", pmap_teardown_last_valid_compact_indx); +} + + +void +hibernate_rebuild_pmap_structs(void) +{ + int32_t cindx, eindx, rindx; + pv_rooted_entry_t pv_h; + + eindx = (int32_t)pmap_npages; + + for (cindx = pmap_teardown_last_valid_compact_indx; cindx >= 0; cindx--) { + + pv_h = &pv_head_table[cindx]; + + rindx = pmap_unpack_index(pv_h); + assert(rindx < pmap_npages); + + if (rindx != cindx) { + /* + * this pv_rooted_entry_t was moved by hibernate_teardown_pmap_structs, + * so move it back to its real location + */ + pv_head_table[rindx] = pv_head_table[cindx]; + } + if (rindx+1 != eindx) { + /* + * the 'hole' between this vm_rooted_entry_t and the previous + * vm_rooted_entry_t we moved needs to be initialized as + * a range of zero'd vm_rooted_entry_t's + */ + bzero((char *)&pv_head_table[rindx+1], (eindx - rindx - 1) * sizeof (struct pv_rooted_entry)); + } + eindx = rindx; + } + if (rindx) + bzero ((char *)&pv_head_table[0], rindx * sizeof (struct pv_rooted_entry)); + + HIBLOG("hibernate_rebuild_pmap_structs done: last_valid_compact_indx %d\n", pmap_teardown_last_valid_compact_indx); +} + +#endif + /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap @@ -509,13 +634,13 @@ pmap_init(void) kernel_pmap->pm_obj_pml4 = &kpml4obj_object_store; - _vm_object_allocate((vm_object_size_t)NPML4PGS, &kpml4obj_object_store); + _vm_object_allocate((vm_object_size_t)NPML4PGS * PAGE_SIZE, &kpml4obj_object_store); kernel_pmap->pm_obj_pdpt = &kpdptobj_object_store; - _vm_object_allocate((vm_object_size_t)NPDPTPGS, &kpdptobj_object_store); + _vm_object_allocate((vm_object_size_t)NPDPTPGS * PAGE_SIZE, &kpdptobj_object_store); kernel_pmap->pm_obj = &kptobj_object_store; - _vm_object_allocate((vm_object_size_t)NPDEPGS, &kptobj_object_store); + _vm_object_allocate((vm_object_size_t)NPDEPGS * PAGE_SIZE, &kptobj_object_store); /* * Allocate memory for the pv_head_table and its lock bits, @@ -528,6 +653,9 @@ pmap_init(void) */ npages = i386_btop(avail_end); +#if HIBERNATION + pmap_npages = (uint32_t)npages; +#endif s = (vm_size_t) (sizeof(struct pv_rooted_entry) * npages + (sizeof (struct pv_hashed_entry_t *) * (npvhash+1)) + pv_lock_table_size(npages) @@ -1091,9 +1219,13 @@ pmap_create( bzero(p, sizeof(*p)); /* init counts now since we'll be bumping some */ simple_lock_init(&p->lock, 0); +#if 00 p->stats.resident_count = 0; p->stats.resident_max = 0; p->stats.wired_count = 0; +#else + bzero(&p->stats, sizeof (p->stats)); +#endif p->ref_count = 1; p->nx_enabled = 1; p->pm_shared = FALSE; @@ -1114,15 +1246,15 @@ pmap_create( /* allocate the vm_objs to hold the pdpt, pde and pte pages */ - p->pm_obj_pml4 = vm_object_allocate((vm_object_size_t)(NPML4PGS)); + p->pm_obj_pml4 = vm_object_allocate((vm_object_size_t)(NPML4PGS) * PAGE_SIZE); if (NULL == p->pm_obj_pml4) panic("pmap_create pdpt obj"); - p->pm_obj_pdpt = vm_object_allocate((vm_object_size_t)(NPDPTPGS)); + p->pm_obj_pdpt = vm_object_allocate((vm_object_size_t)(NPDPTPGS) * PAGE_SIZE); if (NULL == p->pm_obj_pdpt) panic("pmap_create pdpt obj"); - p->pm_obj = vm_object_allocate((vm_object_size_t)(NPDEPGS)); + p->pm_obj = vm_object_allocate((vm_object_size_t)(NPDEPGS) * PAGE_SIZE); if (NULL == p->pm_obj) panic("pmap_create pte obj"); @@ -1237,17 +1369,31 @@ pmap_remove_some_phys( } + +void +pmap_protect( + pmap_t map, + vm_map_offset_t sva, + vm_map_offset_t eva, + vm_prot_t prot) +{ + pmap_protect_options(map, sva, eva, prot, 0, NULL); +} + + /* * Set the physical protection on the * specified range of this map as requested. * Will not increase permissions. */ void -pmap_protect( +pmap_protect_options( pmap_t map, vm_map_offset_t sva, vm_map_offset_t eva, - vm_prot_t prot) + vm_prot_t prot, + unsigned int options, + void *arg) { pt_entry_t *pde; pt_entry_t *spte, *epte; @@ -1262,7 +1408,7 @@ pmap_protect( return; if (prot == VM_PROT_NONE) { - pmap_remove(map, sva, eva); + pmap_remove_options(map, sva, eva, options); return; } PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START, @@ -1312,9 +1458,12 @@ pmap_protect( } sva = lva; } - if (num_found) - PMAP_UPDATE_TLBS(map, orig_sva, eva); - + if (num_found) { + if (options & PMAP_OPTIONS_NOFLUSH) + PMAP_UPDATE_TLBS_DELAYED(map, orig_sva, eva, (pmap_flush_context *)arg); + else + PMAP_UPDATE_TLBS(map, orig_sva, eva); + } PMAP_UNLOCK(map); PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END, @@ -1410,12 +1559,12 @@ pmap_expand_pml4( } #if 0 /* DEBUG */ - if (0 != vm_page_lookup(map->pm_obj_pml4, (vm_object_offset_t)i)) { + if (0 != vm_page_lookup(map->pm_obj_pml4, (vm_object_offset_t)i * PAGE_SIZE)) { panic("pmap_expand_pml4: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n", map, map->pm_obj_pml4, vaddr, i); } #endif - vm_page_insert(m, map->pm_obj_pml4, (vm_object_offset_t)i); + vm_page_insert(m, map->pm_obj_pml4, (vm_object_offset_t)i * PAGE_SIZE); vm_object_unlock(map->pm_obj_pml4); /* @@ -1499,12 +1648,12 @@ pmap_expand_pdpt(pmap_t map, vm_map_offset_t vaddr, unsigned int options) } #if 0 /* DEBUG */ - if (0 != vm_page_lookup(map->pm_obj_pdpt, (vm_object_offset_t)i)) { + if (0 != vm_page_lookup(map->pm_obj_pdpt, (vm_object_offset_t)i * PAGE_SIZE)) { panic("pmap_expand_pdpt: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n", map, map->pm_obj_pdpt, vaddr, i); } #endif - vm_page_insert(m, map->pm_obj_pdpt, (vm_object_offset_t)i); + vm_page_insert(m, map->pm_obj_pdpt, (vm_object_offset_t)i * PAGE_SIZE); vm_object_unlock(map->pm_obj_pdpt); /* @@ -1619,12 +1768,12 @@ pmap_expand( } #if 0 /* DEBUG */ - if (0 != vm_page_lookup(map->pm_obj, (vm_object_offset_t)i)) { + if (0 != vm_page_lookup(map->pm_obj, (vm_object_offset_t)i * PAGE_SIZE)) { panic("pmap_expand: obj not empty, pmap 0x%x pm_obj 0x%x vaddr 0x%llx i 0x%llx\n", map, map->pm_obj, vaddr, i); } #endif - vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i); + vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i * PAGE_SIZE); vm_object_unlock(map->pm_obj); /* @@ -1811,7 +1960,7 @@ pmap_collect( vm_object_lock(p->pm_obj); - m = vm_page_lookup(p->pm_obj,(vm_object_offset_t)(pdp - (pt_entry_t *)&p->dirbase[0])); + m = vm_page_lookup(p->pm_obj,(vm_object_offset_t)(pdp - (pt_entry_t *)&p->dirbase[0]) * PAGE_SIZE); if (m == VM_PAGE_NULL) panic("pmap_collect: pte page not in object"); @@ -1899,15 +2048,12 @@ kern_return_t dtrace_copyio_preflight(__unused addr64_t va) { thread_t thread = current_thread(); uint64_t ccr3; - if (current_map() == kernel_map) return KERN_FAILURE; else if (((ccr3 = get_cr3_base()) != thread->map->pmap->pm_cr3) && (no_shared_cr3 == FALSE)) return KERN_FAILURE; else if (no_shared_cr3 && (ccr3 != kernel_pmap->pm_cr3)) return KERN_FAILURE; - else if (thread->machine.specFlags & CopyIOActive) - return KERN_FAILURE; else return KERN_SUCCESS; } @@ -2038,6 +2184,113 @@ pmap_cpuset_NMIPI(cpu_set cpu_mask) { cpu_pause(); } + +void +pmap_flush_context_init(pmap_flush_context *pfc) +{ + pfc->pfc_cpus = 0; + pfc->pfc_invalid_global = 0; +} + +void +pmap_flush( + pmap_flush_context *pfc) +{ + unsigned int my_cpu; + unsigned int cpu; + unsigned int cpu_bit; + cpu_set cpus_to_respond = 0; + cpu_set cpus_to_signal = 0; + cpu_set cpus_signaled = 0; + boolean_t flush_self = FALSE; + uint64_t deadline; + + mp_disable_preemption(); + + my_cpu = cpu_number(); + cpus_to_signal = pfc->pfc_cpus; + + PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_DELAYED_TLBS) | DBG_FUNC_START, + NULL, cpus_to_signal, 0, 0, 0); + + for (cpu = 0, cpu_bit = 1; cpu < real_ncpus && cpus_to_signal; cpu++, cpu_bit <<= 1) { + + if (cpus_to_signal & cpu_bit) { + + cpus_to_signal &= ~cpu_bit; + + if (!cpu_datap(cpu)->cpu_running) + continue; + + if (pfc->pfc_invalid_global & cpu_bit) + cpu_datap(cpu)->cpu_tlb_invalid_global = TRUE; + else + cpu_datap(cpu)->cpu_tlb_invalid_local = TRUE; + mfence(); + + if (cpu == my_cpu) { + flush_self = TRUE; + continue; + } + if (CPU_CR3_IS_ACTIVE(cpu)) { + cpus_to_respond |= cpu_bit; + i386_signal_cpu(cpu, MP_TLB_FLUSH, ASYNC); + } + } + } + cpus_signaled = cpus_to_respond; + + /* + * Flush local tlb if required. + * Do this now to overlap with other processors responding. + */ + if (flush_self && cpu_datap(my_cpu)->cpu_tlb_invalid != FALSE) + process_pmap_updates(); + + if (cpus_to_respond) { + + deadline = mach_absolute_time() + LockTimeOut; + /* + * Wait for those other cpus to acknowledge + */ + while (cpus_to_respond != 0) { + long orig_acks = 0; + + for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { + /* Consider checking local/global invalidity + * as appropriate in the PCID case. + */ + if ((cpus_to_respond & cpu_bit) != 0) { + if (!cpu_datap(cpu)->cpu_running || + cpu_datap(cpu)->cpu_tlb_invalid == FALSE || + !CPU_CR3_IS_ACTIVE(cpu)) { + cpus_to_respond &= ~cpu_bit; + } + cpu_pause(); + } + if (cpus_to_respond == 0) + break; + } + if (cpus_to_respond && (mach_absolute_time() > deadline)) { + if (machine_timeout_suspended()) + continue; + pmap_tlb_flush_timeout = TRUE; + orig_acks = NMIPI_acks; + pmap_cpuset_NMIPI(cpus_to_respond); + + panic("TLB invalidation IPI timeout: " + "CPU(s) failed to respond to interrupts, unresponsive CPU bitmap: 0x%lx, NMIPI acks: orig: 0x%lx, now: 0x%lx", + cpus_to_respond, orig_acks, NMIPI_acks); + } + } + } + PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_DELAYED_TLBS) | DBG_FUNC_END, + NULL, cpus_signaled, flush_self, 0, 0); + + mp_enable_preemption(); +} + + /* * Called with pmap locked, we: * - scan through per-cpu data to see which other cpus need to flush @@ -2049,7 +2302,7 @@ pmap_cpuset_NMIPI(cpu_set cpu_mask) { */ void -pmap_flush_tlbs(pmap_t pmap, vm_map_offset_t startv, vm_map_offset_t endv) +pmap_flush_tlbs(pmap_t pmap, vm_map_offset_t startv, vm_map_offset_t endv, int options, pmap_flush_context *pfc) { unsigned int cpu; unsigned int cpu_bit; @@ -2059,6 +2312,7 @@ pmap_flush_tlbs(pmap_t pmap, vm_map_offset_t startv, vm_map_offset_t endv) boolean_t flush_self = FALSE; uint64_t deadline; boolean_t pmap_is_shared = (pmap->pm_shared || (pmap == kernel_pmap)); + boolean_t need_global_flush = FALSE; assert((processor_avail_count < 2) || (ml_get_interrupts_enabled() && get_preemption_level() != 0)); @@ -2071,10 +2325,11 @@ pmap_flush_tlbs(pmap_t pmap, vm_map_offset_t startv, vm_map_offset_t endv) cpus_to_signal = 0; if (pmap_pcid_ncpus) { + if (pmap_is_shared) + need_global_flush = TRUE; pmap_pcid_invalidate_all_cpus(pmap); - __asm__ volatile("mfence":::"memory"); + mfence(); } - for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { if (!cpu_datap(cpu)->cpu_running) continue; @@ -2084,15 +2339,23 @@ pmap_flush_tlbs(pmap_t pmap, vm_map_offset_t startv, vm_map_offset_t endv) if ((pmap_cr3 == cpu_task_cr3) || (pmap_cr3 == cpu_active_cr3) || (pmap_is_shared)) { + + if (options & PMAP_DELAY_TLB_FLUSH) { + if (need_global_flush == TRUE) + pfc->pfc_invalid_global |= cpu_bit; + pfc->pfc_cpus |= cpu_bit; + + continue; + } if (cpu == my_cpu) { flush_self = TRUE; continue; } - if (pmap_pcid_ncpus && pmap_is_shared) + if (need_global_flush == TRUE) cpu_datap(cpu)->cpu_tlb_invalid_global = TRUE; else cpu_datap(cpu)->cpu_tlb_invalid_local = TRUE; - __asm__ volatile("mfence":::"memory"); + mfence(); /* * We don't need to signal processors which will flush @@ -2110,17 +2373,23 @@ pmap_flush_tlbs(pmap_t pmap, vm_map_offset_t startv, vm_map_offset_t endv) */ if (CPU_CR3_IS_ACTIVE(cpu) && (pmap_cr3 == CPU_GET_ACTIVE_CR3(cpu) || - pmap->pm_shared || - (pmap_cr3 == CPU_GET_TASK_CR3(cpu)))) { + pmap->pm_shared || + (pmap_cr3 == CPU_GET_TASK_CR3(cpu)))) { cpus_to_signal |= cpu_bit; i386_signal_cpu(cpu, MP_TLB_FLUSH, ASYNC); } } } + if ((options & PMAP_DELAY_TLB_FLUSH)) + return; - PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_START, - pmap, cpus_to_signal, flush_self, startv, endv); - + if (pmap == kernel_pmap) { + PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_KERN_TLBS) | DBG_FUNC_START, + pmap, cpus_to_signal, flush_self, startv, endv); + } else { + PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_START, + pmap, cpus_to_signal, flush_self, startv, endv); + } /* * Flush local tlb if required. * Do this now to overlap with other processors responding. @@ -2177,11 +2446,17 @@ pmap_flush_tlbs(pmap_t pmap, vm_map_offset_t startv, vm_map_offset_t endv) } if (__improbable((pmap == kernel_pmap) && (flush_self != TRUE))) { - panic("pmap_flush_tlbs: pmap == kernel_pmap && flush_self != TRUE; kernel CR3: 0x%llX, CPU active CR3: 0x%llX, CPU Task Map: %d", kernel_pmap->pm_cr3, current_cpu_datap()->cpu_active_cr3, current_cpu_datap()->cpu_task_map); + panic("pmap_flush_tlbs: pmap == kernel_pmap && flush_self != TRUE; kernel CR3: 0x%llX, pmap_cr3: 0x%llx, CPU active CR3: 0x%llX, CPU Task Map: %d", kernel_pmap->pm_cr3, pmap_cr3, current_cpu_datap()->cpu_active_cr3, current_cpu_datap()->cpu_task_map); + } + + if (pmap == kernel_pmap) { + PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_KERN_TLBS) | DBG_FUNC_END, + pmap, cpus_to_signal, startv, endv, 0); + } else { + PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_END, + pmap, cpus_to_signal, startv, endv, 0); } - PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_END, - pmap, cpus_to_signal, startv, endv, 0); } void @@ -2205,7 +2480,7 @@ process_pmap_updates(void) flush_tlb_raw(); } - __asm__ volatile("mfence"); + mfence(); } void @@ -2214,7 +2489,8 @@ pmap_update_interrupt(void) PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_START, 0, 0, 0, 0, 0); - process_pmap_updates(); + if (current_cpu_datap()->cpu_tlb_invalid) + process_pmap_updates(); PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_END, 0, 0, 0, 0, 0); diff --git a/osfmk/x86_64/start.s b/osfmk/x86_64/start.s index 22045a2d5..8d2230bf7 100644 --- a/osfmk/x86_64/start.s +++ b/osfmk/x86_64/start.s @@ -120,9 +120,8 @@ EXT(mc_task_stack_end): movl %cr0,%eax ;\ orl $(CR0_PG|CR0_WP),%eax /* enable paging */ ;\ movl %eax,%cr0 ;\ - /* "The Aussie Maneuver" ("Myria" variant) */ ;\ - pushl $(0xcb<<24)|KERNEL64_CS /* reload CS with 0x08 */ ;\ - call .-1 ;\ + ljmpl $KERNEL64_CS,$64f ;\ +64: ;\ .code64 /* diff --git a/pexpert/Makefile b/pexpert/Makefile index 6e7b0b31a..71a283d78 100644 --- a/pexpert/Makefile +++ b/pexpert/Makefile @@ -8,25 +8,17 @@ include $(MakeInc_cmd) include $(MakeInc_def) INSTINC_SUBDIRS = pexpert -INSTINC_SUBDIRS_I386 = pexpert INSTINC_SUBDIRS_X86_64 = pexpert INSTINC_SUBDIRS_ARM = pexpert EXPINC_SUBDIRS = pexpert -EXPINC_SUBDIRS_I386 = pexpert EXPINC_SUBDIRS_X86_64 = pexpert EXPINC_SUBDIRS_ARM = pexpert - -SETUP_SUBDIRS = - COMP_SUBDIRS = \ conf -INST_SUBDIRS = \ - - include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/pexpert/conf/MASTER b/pexpert/conf/MASTER index 0c3099fec..0f102b7c0 100644 --- a/pexpert/conf/MASTER +++ b/pexpert/conf/MASTER @@ -85,14 +85,10 @@ ident PEXPERT options MACH_PE # Objective-C support # options MACH_KERNEL options DEBUG # general debugging code # +options DEVELOPMENT # dev kernel # options MACH_ASSERT # # options CONFIG_DTRACE # dtrace support # options CONFIG_NO_PANIC_STRINGS # options CONFIG_NO_PRINTF_STRINGS # options CONFIG_NO_KPRINTF_STRINGS # - -# configurable kernel - general switch to say we are building for an -# embedded device -# -options CONFIG_EMBEDDED # diff --git a/pexpert/conf/MASTER.i386 b/pexpert/conf/MASTER.i386 deleted file mode 100644 index 94fb5056c..000000000 --- a/pexpert/conf/MASTER.i386 +++ /dev/null @@ -1,18 +0,0 @@ -###################################################################### -# -# Standard Apple Mac OS Configurations: -# -------- ----- ------ --------------- -# -# RELEASE = [ intel mach mach_pe panic_info config_dtrace ] -# PROFILE = [ RELEASE profile ] -# DEBUG = [ RELEASE debug ] -# -# EMBEDDED = [ intel mach mach_pe panic_info ] -# DEVELOPMENT = [ EMBEDDED config_dtrace ] -# -###################################################################### - -machine "i386" # -cpu "i386" # - -options NO_NESTED_PMAP # diff --git a/pexpert/conf/Makefile b/pexpert/conf/Makefile index 482f105be..25a42ef5e 100644 --- a/pexpert/conf/Makefile +++ b/pexpert/conf/Makefile @@ -3,56 +3,37 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - include $(MakeInc_cmd) include $(MakeInc_def) -SETUP_SUBDIRS = - -COMP_SUBDIRS = - -INST_SUBDIRS = - -ifndef PEXPERT_KERNEL_CONFIG -export PEXPERT_KERNEL_CONFIG = $(KERNEL_CONFIG) -endif +MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) -ifneq ($(MACHINE_CONFIG), DEFAULT) -export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT) -else -export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT) -endif - -MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(ARCH_CONFIG_LC).$(MACHINE_CONFIG_LC) - -$(COMPOBJROOT)/$(PEXPERT_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ - $(SOURCE)/MASTER.$(ARCH_CONFIG_LC) \ - $(SOURCE)/Makefile.template \ - $(SOURCE)/Makefile.$(ARCH_CONFIG_LC) \ - $(SOURCE)/files \ - $(SOURCE)/files.$(ARCH_CONFIG_LC) - $(_v)(doconf_target=$(addsuffix /conf, $(TARGET)); \ - $(MKDIR) $${doconf_target}; \ - cd $${doconf_target}; \ +$(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ + $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC) \ + $(SOURCE)/Makefile.template \ + $(SOURCE)/Makefile.$(CURRENT_ARCH_CONFIG_LC) \ + $(SOURCE)/files \ + $(SOURCE)/files.$(CURRENT_ARCH_CONFIG_LC) + $(_v)$(MKDIR) $(addsuffix /conf, $(TARGET)); \ + cd $(addsuffix /conf, $(TARGET)); \ rm -f $(notdir $?); \ - cp $? $${doconf_target}; \ - if [ -f $(MASTER_CPU_PER_SOC) ]; then cp $(MASTER_CPU_PER_SOC) $${doconf_target}; fi; \ - $(SRCROOT)/SETUP/config/doconf -c -cpu $(ARCH_CONFIG_LC) -soc $(MACHINE_CONFIG_LC) -d $(TARGET)/$(PEXPERT_KERNEL_CONFIG) $(PEXPERT_KERNEL_CONFIG); \ - ); - -do_all: $(COMPOBJROOT)/$(PEXPERT_KERNEL_CONFIG)/Makefile - $(_v)next_source=$(subst conf/,,$(SOURCE)); \ - next_relsource=$(subst conf/,,$(RELATIVE_SOURCE_PATH)); \ - ${MAKE} -C $(COMPOBJROOT)/$(PEXPERT_KERNEL_CONFIG) \ - MAKEFILES=$(TARGET)/$(PEXPERT_KERNEL_CONFIG)/Makefile \ - SOURCE=$${next_source} \ - RELATIVE_SOURCE_PATH=$${next_relsource} \ - TARGET=$(TARGET) \ - INCL_MAKEDEP=FALSE \ - KERNEL_CONFIG=$(PEXPERT_KERNEL_CONFIG) \ + cp $? .; \ + if [ $(MASTER_CPU_PER_SOC) -nt $@ ]; then cp $(MASTER_CPU_PER_SOC) .; fi; \ + $(SRCROOT)/SETUP/config/doconf -c -cpu $(CURRENT_ARCH_CONFIG_LC) -soc $(CURRENT_MACHINE_CONFIG_LC) -d $(TARGET)/$(CURRENT_KERNEL_CONFIG) $(CURRENT_KERNEL_CONFIG); + +do_all: $(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile + $(_v)${MAKE} \ + -C $(TARGET)/$(CURRENT_KERNEL_CONFIG) \ + -f $(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile \ + CURRENT_KERNEL_CONFIG=${CURRENT_KERNEL_CONFIG} \ + CURRENT_ARCH_CONFIG=${CURRENT_ARCH_CONFIG} \ + CURRENT_MACHINE_CONFIG=${CURRENT_MACHINE_CONFIG} \ + SOURCE=$(subst conf/,,$(SOURCE)) \ + TARGET=${TARGET} \ + OBJPATH=${OBJPATH} \ build_all; -do_build_all: do_all +do_build_all:: do_all include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/pexpert/conf/Makefile.i386 b/pexpert/conf/Makefile.i386 deleted file mode 100644 index d3cca3ad4..000000000 --- a/pexpert/conf/Makefile.i386 +++ /dev/null @@ -1,8 +0,0 @@ -###################################################################### -#BEGIN Machine dependent Makefile fragment for i386 -###################################################################### - -###################################################################### -#END Machine dependent Makefile fragment for i386 -###################################################################### - diff --git a/pexpert/conf/Makefile.template b/pexpert/conf/Makefile.template index 0fcca19b1..7a8e0038d 100644 --- a/pexpert/conf/Makefile.template +++ b/pexpert/conf/Makefile.template @@ -5,16 +5,6 @@ # the terms and conditions for use and redistribution. # -# -# Export IDENT for sub-makefiles -# -export IDENT - -# -# INCFLAGS -# -INCFLAGS_MAKEFILE= $(INCFLAGS_POSIX) - export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule @@ -26,9 +16,10 @@ include $(MakeInc_def) # # CFLAGS # -# -CFLAGS+= -include meta_features.h -DPEXPERT_KERNEL_PRIVATE \ - $(CFLAGS_INLINE_CONFIG) +CFLAGS+= -include meta_features.h -DPEXPERT_KERNEL_PRIVATE + +# Objects that don't want -Wcast-align warning (8474835) +pe_identify_machine.o_CWARNFLAGS_ADD = -Wno-cast-align # # Directories for mig generated files @@ -53,13 +44,8 @@ COMP_SUBDIRS = %CFILES -%MFILES - %SFILES -%BFILES - -%ORDERED %MACHDEP # @@ -69,23 +55,17 @@ COMP_SUBDIRS = # ${OBJS}: ${OBJSDEPS} - -%LOAD - LDOBJS = $(OBJS) $(COMPONENT).filelist: $(LDOBJS) @echo LDFILELIST $(COMPONENT) $(_v)( for obj in ${LDOBJS}; do \ - echo $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$${obj}; \ + echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \ done; ) > $(COMPONENT).filelist -do_depend: do_all - $(_v)${MD} -u Makedep -f -d `ls *.d`; - do_all: $(COMPONENT).filelist -do_build_all: do_depend +do_build_all:: do_all %RULES diff --git a/pexpert/conf/files.i386 b/pexpert/conf/files.i386 deleted file mode 100644 index 0ba9ffc18..000000000 --- a/pexpert/conf/files.i386 +++ /dev/null @@ -1,8 +0,0 @@ -OPTIONS/gprof optional gprof - -pexpert/i386/pe_init.c standard -pexpert/i386/pe_bootargs.c standard -pexpert/i386/pe_identify_machine.c standard -pexpert/i386/pe_kprintf.c standard -pexpert/i386/pe_interrupt.c standard -pexpert/i386/pe_serial.c standard diff --git a/pexpert/gen/bootargs.c b/pexpert/gen/bootargs.c index 6bc636010..d4d8e3caa 100644 --- a/pexpert/gen/bootargs.c +++ b/pexpert/gen/bootargs.c @@ -29,9 +29,7 @@ #include static boolean_t isargsep( char c); -#if !CONFIG_EMBEDDED static int argstrcpy(char *from, char *to); -#endif static int argstrcpy2(char *from,char *to, unsigned maxlen); static int argnumcpy(int val, void *to, unsigned maxlen); static int getval(char *s, int *val); @@ -55,10 +53,6 @@ PE_parse_boot_arg( { int max_len = -1; -#if CONFIG_EMBEDDED - /* Limit arg size to 4 byte when no size is given */ - max_len = 4; -#endif return PE_parse_boot_argn(arg_string, arg_ptr, max_len); } @@ -80,9 +74,6 @@ PE_parse_boot_argn( args = PE_boot_args(); if (*args == '\0') return FALSE; -#if CONFIG_EMBEDDED - if (max_len == -1) return FALSE; -#endif arg_found = FALSE; @@ -134,10 +125,8 @@ PE_parse_boot_argn( case STR: if(max_len > 0) //max_len of 0 performs no copy at all argstrcpy2(++cp, (char *)arg_ptr, max_len - 1); -#if !CONFIG_EMBEDDED else if(max_len == -1) // unreachable on embedded argstrcpy(++cp, (char *)arg_ptr); -#endif arg_found = TRUE; break; } @@ -164,7 +153,6 @@ isargsep( return(FALSE); } -#if !CONFIG_EMBEDDED static int argstrcpy( char *from, @@ -179,7 +167,6 @@ argstrcpy( *to = 0; return(i); } -#endif static int argstrcpy2( diff --git a/pexpert/i386/pe_kprintf.c b/pexpert/i386/pe_kprintf.c index 68d5fc2eb..3f655631b 100644 --- a/pexpert/i386/pe_kprintf.c +++ b/pexpert/i386/pe_kprintf.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -120,18 +121,15 @@ void kprintf(const char *fmt, ...) } /* - * Spin to get kprintf lock but re-enable interrupts while - * failing. - * This allows interrupts to be handled while waiting but - * interrupts are disabled once we have the lock. + * Spin to get kprintf lock but poll for incoming signals + * while interrupts are masked. */ state = ml_set_interrupts_enabled(FALSE); pal_preemption_assert(); while (!simple_lock_try(&kprintf_lock)) { - ml_set_interrupts_enabled(state); - ml_set_interrupts_enabled(FALSE); + (void) cpu_signal_handler(NULL); } if (cpu_number() != cpu_last_locked) { diff --git a/pexpert/pexpert/Makefile b/pexpert/pexpert/Makefile index 0680f3ca2..e31e88903 100644 --- a/pexpert/pexpert/Makefile +++ b/pexpert/pexpert/Makefile @@ -9,15 +9,12 @@ include $(MakeInc_def) INSTINC_SUBDIRS = \ machine -INSTINC_SUBDIRS_I386 = \ - i386 INSTINC_SUBDIRS_X86_64 = \ i386 INSTINC_SUBDIRS_ARM = \ arm EXPINC_SUBDIRS = ${INSTINC_SUBDIRS} -EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} EXPINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS_X86_64} EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM} diff --git a/pexpert/pexpert/i386/boot.h b/pexpert/pexpert/i386/boot.h index 369c2c477..b73e6ef24 100644 --- a/pexpert/pexpert/i386/boot.h +++ b/pexpert/pexpert/i386/boot.h @@ -124,6 +124,7 @@ typedef struct boot_icon_element boot_icon_element; /* Bitfields for boot_args->flags */ #define kBootArgsFlagRebootOnPanic (1 << 0) #define kBootArgsFlagHiDPI (1 << 1) +#define kBootArgsFlagBlack (1 << 2) typedef struct boot_args { uint16_t Revision; /* Revision of boot_args structure */ diff --git a/pexpert/pexpert/pexpert.h b/pexpert/pexpert/pexpert.h index 31209ff78..506361341 100644 --- a/pexpert/pexpert/pexpert.h +++ b/pexpert/pexpert/pexpert.h @@ -30,8 +30,10 @@ #include +#ifdef KERNEL #include #include +#endif __BEGIN_DECLS #include @@ -118,9 +120,11 @@ void PE_register_timebase_callback(timebase_callback_func callback); void PE_call_timebase_callback(void); +#ifdef KERNEL void PE_install_interrupt_handler( void *nub, int source, void *target, IOInterruptHandler handler, void *refCon); +#endif #ifndef _FN_KPRINTF #define _FN_KPRINTF diff --git a/security/Makefile b/security/Makefile index 4af4eb56e..07c961c42 100644 --- a/security/Makefile +++ b/security/Makefile @@ -3,22 +3,10 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = \ - -INSTINC_SUBDIRS_I386 = \ - -INSTINC_SUBDIRS_X86_64 = \ - -EXPINC_SUBDIRS = \ - -EXPINC_SUBDIRS_I386 = \ - -EXPINC_SUBDIRS_X86_64 = \ - - DATAFILES = \ mac.h \ mac_policy.h @@ -46,8 +34,6 @@ INSTALL_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} # /System/Library/Frameworks/Kernel.framework/PrivateHeaders INSTALL_KF_MI_LCL_LIST = ${DATAFILES} ${PRIVATE_DATAFILES} -SETUP_SUBDIRS = - COMP_SUBDIRS = conf include $(MakeInc_rule) diff --git a/security/_label.h b/security/_label.h index 9afcf5a0f..509d3a07d 100644 --- a/security/_label.h +++ b/security/_label.h @@ -68,7 +68,6 @@ * XXXMAC: This shouldn't be exported to userland, but is because of ucred.h * and various other messes. */ - #define MAC_MAX_SLOTS 7 #define MAC_FLAG_INITIALIZED 0x0000001 /* Is initialized for use. */ diff --git a/security/conf/MASTER b/security/conf/MASTER index 330092535..8a93c2486 100644 --- a/security/conf/MASTER +++ b/security/conf/MASTER @@ -72,3 +72,7 @@ options CONFIG_EXT_RESOLVER # e.g. memberd # options SECURE_KERNEL # options DEBUG # # options MACH_ASSERT # # + +options CONFIG_MEMORYSTATUS # +options CONFIG_JETSAM # +options CONFIG_FREEZE # diff --git a/security/conf/MASTER.i386 b/security/conf/MASTER.i386 deleted file mode 100644 index 60bcfbe5e..000000000 --- a/security/conf/MASTER.i386 +++ /dev/null @@ -1,32 +0,0 @@ -###################################################################### -# -# RELEASE = [ intel mach libkerncpp config_dtrace audit vm_pressure_events ] -# PROFILE = [ RELEASE profile ] -# DEBUG = [ RELEASE debug ] -# -# EMBEDDED = [ intel mach libkerncpp audit ] -# DEVELOPMENT = [ EMBEDDED config_dtrace ] -# -###################################################################### - -machine "i386" # -cpu "i386" # - -# -# Note: MAC/AUDIT options must be set in all the bsd/conf, osfmk/conf, and -# security/conf MASTER files. -# -options CONFIG_MACF # Mandatory Access Control Framework -options CONFIG_MACF_SOCKET_SUBSET # MACF subset of socket support -options CONFIG_FSE -options CONFIG_TRIGGERS -options CONFIG_VFS_FUNNEL -options CONFIG_EXT_RESOLVER -#options CONFIG_MACF_SOCKET -#options CONFIG_MACF_NET -#options CONFIG_MACF_ALWAYS_LABEL_MBUF -#options CONFIG_MACF_DEBUG -#options CONFIG_MACF_MACH -options CONFIG_AUDIT # kernel auditing - -options NO_NESTED_PMAP # diff --git a/security/conf/MASTER.x86_64 b/security/conf/MASTER.x86_64 index 4483af782..485e7bc7c 100644 --- a/security/conf/MASTER.x86_64 +++ b/security/conf/MASTER.x86_64 @@ -1,6 +1,6 @@ ###################################################################### # -# RELEASE = [ intel mach libkerncpp config_dtrace audit vm_pressure_events ] +# RELEASE = [ intel mach libkerncpp config_dtrace audit vm_pressure_events memorystatus ] # PROFILE = [ RELEASE profile ] # DEBUG = [ RELEASE debug mach_assert ] # diff --git a/security/conf/Makefile b/security/conf/Makefile index 3bab0d1ce..25a42ef5e 100644 --- a/security/conf/Makefile +++ b/security/conf/Makefile @@ -3,56 +3,37 @@ export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir - include $(MakeInc_cmd) include $(MakeInc_def) -SETUP_SUBDIRS = - -COMP_SUBDIRS = - -INST_SUBDIRS = - -ifndef SECURITY_KERNEL_CONFIG -export SECURITY_KERNEL_CONFIG = $(KERNEL_CONFIG) -endif +MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC).$(CURRENT_MACHINE_CONFIG_LC) -ifneq ($(MACHINE_CONFIG), DEFAULT) -export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)_$(MACHINE_CONFIG)/$(COMPONENT) -else -export COMPOBJROOT=$(OBJROOT)/$(KERNEL_CONFIG)_$(ARCH_CONFIG)/$(COMPONENT) -endif - -MASTER_CPU_PER_SOC = $(SOURCE)/MASTER.$(ARCH_CONFIG_LC).$(MACHINE_CONFIG_LC) - -$(COMPOBJROOT)/$(SECURITY_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ - $(SOURCE)/MASTER.$(ARCH_CONFIG_LC) \ - $(SOURCE)/Makefile.template \ - $(SOURCE)/Makefile.$(ARCH_CONFIG_LC) \ - $(SOURCE)/files \ - $(SOURCE)/files.$(ARCH_CONFIG_LC) - $(_v)(doconf_target=$(addsuffix /conf, $(TARGET)); \ - $(MKDIR) $${doconf_target}; \ - cd $${doconf_target}; \ +$(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile : $(SOURCE)/MASTER \ + $(SOURCE)/MASTER.$(CURRENT_ARCH_CONFIG_LC) \ + $(SOURCE)/Makefile.template \ + $(SOURCE)/Makefile.$(CURRENT_ARCH_CONFIG_LC) \ + $(SOURCE)/files \ + $(SOURCE)/files.$(CURRENT_ARCH_CONFIG_LC) + $(_v)$(MKDIR) $(addsuffix /conf, $(TARGET)); \ + cd $(addsuffix /conf, $(TARGET)); \ rm -f $(notdir $?); \ - cp $? $${doconf_target}; \ - if [ -f $(MASTER_CPU_PER_SOC) ]; then cp $(MASTER_CPU_PER_SOC) $${doconf_target}; fi; \ - $(SRCROOT)/SETUP/config/doconf -c -cpu $(ARCH_CONFIG_LC) -soc $(MACHINE_CONFIG_LC) -d $(TARGET)/$(SECURITY_KERNEL_CONFIG) $(SECURITY_KERNEL_CONFIG); \ - ); - -do_all: $(COMPOBJROOT)/$(SECURITY_KERNEL_CONFIG)/Makefile - $(_v)next_source=$(subst conf/,,$(SOURCE)); \ - next_relsource=$(subst conf/,,$(RELATIVE_SOURCE_PATH)); \ - ${MAKE} -C $(COMPOBJROOT)/$(SECURITY_KERNEL_CONFIG) \ - MAKEFILES=$(TARGET)/$(SECURITY_KERNEL_CONFIG)/Makefile \ - SOURCE=$${next_source} \ - RELATIVE_SOURCE_PATH=$${next_relsource} \ - TARGET=$(TARGET) \ - INCL_MAKEDEP=FALSE \ - KERNEL_CONFIG=$(SECURITY_KERNEL_CONFIG) \ + cp $? .; \ + if [ $(MASTER_CPU_PER_SOC) -nt $@ ]; then cp $(MASTER_CPU_PER_SOC) .; fi; \ + $(SRCROOT)/SETUP/config/doconf -c -cpu $(CURRENT_ARCH_CONFIG_LC) -soc $(CURRENT_MACHINE_CONFIG_LC) -d $(TARGET)/$(CURRENT_KERNEL_CONFIG) $(CURRENT_KERNEL_CONFIG); + +do_all: $(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile + $(_v)${MAKE} \ + -C $(TARGET)/$(CURRENT_KERNEL_CONFIG) \ + -f $(TARGET)/$(CURRENT_KERNEL_CONFIG)/Makefile \ + CURRENT_KERNEL_CONFIG=${CURRENT_KERNEL_CONFIG} \ + CURRENT_ARCH_CONFIG=${CURRENT_ARCH_CONFIG} \ + CURRENT_MACHINE_CONFIG=${CURRENT_MACHINE_CONFIG} \ + SOURCE=$(subst conf/,,$(SOURCE)) \ + TARGET=${TARGET} \ + OBJPATH=${OBJPATH} \ build_all; -do_build_all: do_all +do_build_all:: do_all include $(MakeInc_rule) include $(MakeInc_dir) diff --git a/security/conf/Makefile.i386 b/security/conf/Makefile.i386 deleted file mode 100644 index 3695a666c..000000000 --- a/security/conf/Makefile.i386 +++ /dev/null @@ -1,7 +0,0 @@ -###################################################################### -#BEGIN Machine dependent Makefile fragment for i386 -###################################################################### - -###################################################################### -#END Machine dependent Makefile fragment for i386 -###################################################################### diff --git a/security/conf/Makefile.template b/security/conf/Makefile.template index fd1ffeef0..0270cfb70 100644 --- a/security/conf/Makefile.template +++ b/security/conf/Makefile.template @@ -5,16 +5,6 @@ # the terms and conditions for use and redistribution. # -# -# Export IDENT for sub-makefiles -# -export IDENT - -# -# INCFLAGS to include security prototypes -# -INCFLAGS_MAKEFILE= -I$(SOURCE) -I$(SOURCE)/.. - export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule @@ -26,14 +16,26 @@ include $(MakeInc_def) # # XXX: CFLAGS # -CFLAGS+= -I. -include meta_features.h -DBSD_KERNEL_PRIVATE +CFLAGS+= -include meta_features.h -DBSD_KERNEL_PRIVATE + +# Objects that don't want -Wcast-align warning (8474835) +OBJS_NO_CAST_ALIGN = \ + mac_alloc.o \ + mac_base.o + +$(foreach file,$(OBJS_NO_CAST_ALIGN),$(eval $(call add_perfile_cflags,$(file),-Wno-cast-align))) # -# Directories for mig generated files +# INCFLAGS to include security prototypes # -COMP_SUBDIRS = +INCFLAGS_MAKEFILE= -I$(SOURCE)/.. +# +# Directories for mig generated files +# +COMP_SUBDIRS = +# # Make sure we don't remove this by accident if interrupted at the wrong # time. # @@ -59,13 +61,8 @@ COPYRIGHT_FILES = \ %CFILES -%MFILES - %SFILES -%BFILES - -%ORDERED %MACHDEP # @@ -75,30 +72,25 @@ COPYRIGHT_FILES = \ # ${OBJS}: ${OBJSDEPS} - -%LOAD - LDOBJS = $(OBJS) -$(COMPONENT).filelist: $(LDOBJS) - $(_v)$(RM) $(RMFLAGS) vers.c - $(_v)$(SRCROOT)/SETUP/newvers \ - `$(CAT) ${VERSION_FILES}` ${COPYRIGHT_FILES} - $(_v)${KCC} $(CFLAGS) $(INCLUDES) -c vers.c +$(COMPONENT).filelist: $(LDOBJS) vers.o @echo LDFILELIST $(COMPONENT) $(_v)( for obj in ${LDOBJS} vers.o; do \ - echo $(TARGET)$(COMP_OBJ_DIR)/$(KERNEL_CONFIG)/$${obj}; \ + echo $(TARGET)/$(CURRENT_KERNEL_CONFIG)/$${obj}; \ done; ) > $(COMPONENT).filelist do_all: $(COMPONENT).filelist -do_depend: do_all - $(_v)${MD} -u Makedep -f -d `ls *.d`; - -do_build_all: do_depend +do_build_all:: do_all -${SOBJS}: +vers.o: vers.c + @echo CC $@ + $(_v)${KCC} $(CFLAGS) -o ${@} -c ${INCFLAGS} $< +vers.c: + $(_v)$(SRCROOT)/SETUP/newvers \ + `$(CAT) ${VERSION_FILES}` ${COPYRIGHT_FILES} %RULES diff --git a/security/conf/files b/security/conf/files index 8c077cb19..75ba967af 100644 --- a/security/conf/files +++ b/security/conf/files @@ -27,10 +27,11 @@ security/mac_sysv_msg.c optional config_macf security/mac_posix_sem.c optional config_macf security/mac_posix_shm.c optional config_macf security/mac_socket.c optional config_macf -security/mac_stub.c standard security/mac_net.c optional config_macf_net security/mac_pipe.c optional config_macf security/mac_iokit.c optional config_macf security/mac_file.c optional config_macf security/mac_inet.c optional config_macf_net security/mac_priv.c optional config_macf +security/mac_pty.c optional config_macf +security/mac_kext.c optional config_macf diff --git a/security/conf/files.i386 b/security/conf/files.i386 deleted file mode 100644 index e69de29bb..000000000 diff --git a/security/mac.h b/security/mac.h index 480d1a30b..14800566a 100644 --- a/security/mac.h +++ b/security/mac.h @@ -97,6 +97,11 @@ struct mac { typedef struct mac *mac_t; #ifdef KERNEL + +#ifndef PRIVATE +#warning "MAC policy is not KPI, see Technical Q&A QA1574" +#endif + struct user_mac { user_size_t m_buflen; user_addr_t m_string; diff --git a/security/mac_alloc.c b/security/mac_alloc.c index 8cd41403d..cdc8f8a1f 100644 --- a/security/mac_alloc.c +++ b/security/mac_alloc.c @@ -84,6 +84,7 @@ mac_kfree(void * data, vm_size_t size) void * mac_mbuf_alloc(int len, int wait) { +#if CONFIG_MACF_SOCKET_SUBSET struct m_tag *t; t = m_tag_alloc(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_MAC_POLICY_LABEL, @@ -92,15 +93,23 @@ mac_mbuf_alloc(int len, int wait) return (NULL); return ((void *)(t + 1)); +#else +#pragma unused(len, wait) + return NULL; +#endif } void mac_mbuf_free(void *data) { +#if CONFIG_MACF_SOCKET_SUBSET struct m_tag *t; t = (struct m_tag *)((char *)data - sizeof(struct m_tag)); m_tag_free(t); +#else +#pragma unused(data) +#endif } /* diff --git a/security/mac_base.c b/security/mac_base.c index ae808e277..f7698f2e8 100644 --- a/security/mac_base.c +++ b/security/mac_base.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007 Apple Inc. All rights reserved. + * Copyright (c) 2007-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -1757,7 +1757,7 @@ __mac_get_fd(proc_t p, struct __mac_get_fd_args *uap, int *ret __unused) return (error); } - switch (fp->f_fglob->fg_type) { + switch (FILEGLOB_DTYPE(fp->f_fglob)) { case DTYPE_VNODE: intlabel = mac_vnode_label_alloc(); if (intlabel == NULL) { @@ -1952,7 +1952,7 @@ __mac_set_fd(proc_t p, struct __mac_set_fd_args *uap, int *ret __unused) return (error); } - switch (fp->f_fglob->fg_type) { + switch (FILEGLOB_DTYPE(fp->f_fglob)) { case DTYPE_VNODE: if (mac_label_vnodes == 0) { @@ -2297,6 +2297,10 @@ mac_do_machexc(int64_t code, int64_t subcode, uint32_t flags) #else /* MAC */ +void (*load_security_extensions_function)(void) = 0; + +struct sysctl_oid_list sysctl__security_mac_children; + int mac_policy_register(struct mac_policy_conf *mpc __unused, mac_policy_handle_t *handlep __unused, void *xd __unused) @@ -2319,12 +2323,6 @@ mac_audit_text(char *text __unused, mac_policy_handle_t handle __unused) return (0); } -int -mac_mount_label_get(struct mount *mp __unused, user_addr_t mac_p __unused) -{ - return (ENOSYS); -} - int mac_vnop_setxattr(struct vnode *vp __unused, const char *name __unused, char *buf __unused, size_t len __unused) { @@ -2347,116 +2345,36 @@ mac_vnop_removexattr(struct vnode *vp __unused, const char *name __unused) return (ENOENT); } -int -__mac_get_pid(proc_t p __unused, struct __mac_get_pid_args *uap __unused, int *ret __unused) +intptr_t mac_label_get(struct label *l __unused, int slot __unused) { - - return (ENOSYS); + return 0; } -int -__mac_get_proc(proc_t p __unused, struct __mac_get_proc_args *uap __unused, int *ret __unused) +void mac_label_set(struct label *l __unused, int slot __unused, intptr_t v __unused) { - - return (ENOSYS); -} - -int -__mac_set_proc(proc_t p __unused, struct __mac_set_proc_args *uap __unused, int *ret __unused) -{ - - return (ENOSYS); -} - -int -__mac_get_file(proc_t p __unused, struct __mac_get_file_args *uap __unused, int *ret __unused) -{ - - return (ENOSYS); -} - -int -__mac_get_link(proc_t p __unused, struct __mac_get_link_args *uap __unused, int *ret __unused) -{ - - return (ENOSYS); -} - -int -__mac_set_file(proc_t p __unused, struct __mac_set_file_args *uap __unused, int *ret __unused) -{ - - return (ENOSYS); -} - -int -__mac_set_link(proc_t p __unused, struct __mac_set_link_args *uap __unused, int *ret __unused) -{ - - return (ENOSYS); -} - -int -__mac_get_fd(proc_t p __unused, struct __mac_get_fd_args *uap __unused, int *ret __unused) -{ - - return (ENOSYS); -} - -int -__mac_set_fd(proc_t p __unused, struct __mac_set_fd_args *uap __unused, int *ret __unused) -{ - - return (ENOSYS); -} - -int -__mac_syscall(proc_t p __unused, struct __mac_syscall_args *uap __unused, int *ret __unused) -{ - - return (ENOSYS); -} - -int -__mac_get_lcid(proc_t p __unused, struct __mac_get_lcid_args *uap __unused, int *ret __unused) -{ - - return (ENOSYS); + return; } -int -__mac_get_lctx(proc_t p __unused, struct __mac_get_lctx_args *uap __unused, int *ret __unused) +struct label *mac_thread_get_threadlabel(struct thread *thread __unused) { - - return (ENOSYS); + return NULL; } -int -__mac_set_lctx(proc_t p __unused, struct __mac_set_lctx_args *uap __unused, int *ret __unused) +struct label *mac_thread_get_uthreadlabel(struct uthread *uthread __unused) { - - return (ENOSYS); + return NULL; } -int -__mac_get_mount(proc_t p __unused, - struct __mac_get_mount_args *uap __unused, int *ret __unused) +void mac_proc_set_enforce(proc_t p, int enforce_flags); +void mac_proc_set_enforce(proc_t p __unused, int enforce_flags __unused) { - - return (ENOSYS); + return; } -int -mac_schedule_userret(void) +int mac_iokit_check_hid_control(kauth_cred_t cred __unused); +int mac_iokit_check_hid_control(kauth_cred_t cred __unused) { - - return (1); + return 0; } -int -mac_do_machexc(int64_t code __unused, int64_t subcode __unused, uint32_t flags __unused) -{ - - return (1); -} #endif /* !MAC */ diff --git a/security/mac_data.h b/security/mac_data.h index 6a5d14025..74df5e6b3 100644 --- a/security/mac_data.h +++ b/security/mac_data.h @@ -54,6 +54,10 @@ #ifndef _SECURITY_MAC_DATA_H_ #define _SECURITY_MAC_DATA_H_ +#ifndef PRIVATE +#warning "MAC policy is not KPI, see Technical Q&A QA1574, this header will be removed in next version" +#endif + /** @brief Mac policy module data diff --git a/security/mac_framework.h b/security/mac_framework.h index 7c91a2469..777efd542 100644 --- a/security/mac_framework.h +++ b/security/mac_framework.h @@ -77,7 +77,9 @@ #error "no user-serviceable parts inside" #endif -#if CONFIG_MACF +#ifndef PRIVATE +#warning "MAC policy is not KPI, see Technical Q&A QA1574, this header will be removed in next version" +#endif struct attrlist; struct auditinfo; @@ -114,6 +116,7 @@ struct socket; struct task; struct thread; struct timespec; +struct tty; struct ucred; struct uio; struct uthread; @@ -123,6 +126,8 @@ struct vnode; struct vnode_attr; struct vop_setlabel_args; +#if CONFIG_MACF + #ifndef __IOKIT_PORTS_DEFINED__ #define __IOKIT_PORTS_DEFINED__ #ifdef __cplusplus @@ -156,8 +161,8 @@ void mac_bpfdesc_label_associate(kauth_cred_t cred, struct bpf_d *bpf_d); int mac_cred_check_label_update(kauth_cred_t cred, struct label *newlabel); int mac_cred_check_label_update_execve(vfs_context_t ctx, - struct vnode *vp, struct label *scriptvnodelabel, - struct label *execlabel, proc_t proc); + struct vnode *vp, struct vnode *scriptvp, struct label *scriptvnodelabel, + struct label *execlabel, proc_t proc, void *macextensions); int mac_cred_check_visible(kauth_cred_t u1, kauth_cred_t u2); struct label *mac_cred_label_alloc(void); void mac_cred_label_associate(kauth_cred_t cred_parent, @@ -172,8 +177,8 @@ void mac_cred_label_init(kauth_cred_t cred); int mac_cred_label_compare(struct label *a, struct label *b); void mac_cred_label_update(kauth_cred_t cred, struct label *newlabel); int mac_cred_label_update_execve(vfs_context_t ctx, kauth_cred_t newcred, - struct vnode *vp, struct label *scriptvnodelabel, - struct label *execlabel); + struct vnode *vp, struct vnode *scriptvp, struct label *scriptvnodelabel, + struct label *execlabel, void *macextensions); void mac_devfs_label_associate_device(dev_t dev, struct devnode *de, const char *fullpath); void mac_devfs_label_associate_directory(const char *dirname, int dirnamelen, @@ -329,6 +334,8 @@ void mac_posixshm_label_init(struct pshminfo *pshm); int mac_priv_check(kauth_cred_t cred, int priv); int mac_priv_grant(kauth_cred_t cred, int priv); int mac_proc_check_debug(proc_t proc1, proc_t proc2); +int mac_proc_check_cpumon(proc_t curp); +int mac_proc_check_proc_info(proc_t curp, proc_t target, int callnum, int flavor); int mac_proc_check_fork(proc_t proc); int mac_proc_check_suspend_resume(proc_t proc, int sr); int mac_proc_check_get_task_name(kauth_cred_t cred, struct proc *p); @@ -397,6 +404,7 @@ int mac_system_check_auditctl(kauth_cred_t cred, struct vnode *vp); int mac_system_check_auditon(kauth_cred_t cred, int cmd); int mac_system_check_chud(kauth_cred_t cred); int mac_system_check_host_priv(kauth_cred_t cred); +int mac_system_check_info(kauth_cred_t, const char *info_type); int mac_system_check_nfsd(kauth_cred_t cred); int mac_system_check_reboot(kauth_cred_t cred, int howto); int mac_system_check_settime(kauth_cred_t cred); @@ -468,8 +476,8 @@ int mac_vnode_check_exchangedata(vfs_context_t ctx, struct vnode *v1, int mac_vnode_check_exec(vfs_context_t ctx, struct vnode *vp, struct image_params *imgp); int mac_vnode_check_fsgetpath(vfs_context_t ctx, struct vnode *vp); -int mac_vnode_check_signature(struct vnode *vp, unsigned char *sha1, - void * signature, size_t size); +int mac_vnode_check_signature(struct vnode *vp, off_t macho_offset, + unsigned char *sha1, void * signature, size_t size); int mac_vnode_check_getattrlist(vfs_context_t ctx, struct vnode *vp, struct attrlist *alist); int mac_vnode_check_getextattr(vfs_context_t ctx, struct vnode *vp, @@ -549,9 +557,16 @@ int mac_vnode_notify_create(vfs_context_t ctx, struct mount *mp, void mac_vnode_notify_rename(vfs_context_t ctx, struct vnode *vp, struct vnode *dvp, struct componentname *cnp); void mac_vnode_notify_open(vfs_context_t ctx, struct vnode *vp, int acc_flags); +void mac_vnode_notify_link(vfs_context_t ctx, struct vnode *vp, + struct vnode *dvp, struct componentname *cnp); +int mac_vnode_find_sigs(struct proc *p, struct vnode *vp, off_t offsetInMacho); int vnode_label(struct mount *mp, struct vnode *dvp, struct vnode *vp, struct componentname *cnp, int flags, vfs_context_t ctx); void vnode_relabel(struct vnode *vp); +void mac_pty_notify_grant(proc_t p, struct tty *tp, dev_t dev, struct label *label); +void mac_pty_notify_close(proc_t p, struct tty *tp, dev_t dev, struct label *label); +int mac_kext_check_load(kauth_cred_t cred, const char *identifier); +int mac_kext_check_unload(kauth_cred_t cred, const char *identifier); void psem_label_associate(struct fileproc *fp, struct vnode *vp, struct vfs_context *ctx); void pshm_label_associate(struct fileproc *fp, struct vnode *vp, struct vfs_context *ctx); diff --git a/security/mac_internal.h b/security/mac_internal.h index 6ca8b699e..013f6563d 100644 --- a/security/mac_internal.h +++ b/security/mac_internal.h @@ -67,6 +67,10 @@ #ifndef _SECURITY_MAC_INTERNAL_H_ #define _SECURITY_MAC_INTERNAL_H_ +#ifndef PRIVATE +#warning "MAC policy is not KPI, see Technical Q&A QA1574, this header will be removed in next version" +#endif + #include #include #include @@ -202,13 +206,14 @@ extern unsigned int mac_label_mbufs; extern unsigned int mac_label_vnodes; -static int mac_proc_check_enforce(proc_t p, int enforce_flag); +static int mac_proc_check_enforce(proc_t p, int enforce_flags); static __inline__ int mac_proc_check_enforce(proc_t p, int enforce_flags) { #if CONFIG_MACF return ((p->p_mac_enforce & enforce_flags) != 0); #else +#pragma unused(p,enforce_flags) return 0; #endif } @@ -228,12 +233,16 @@ static __inline__ int mac_context_check_enforce(vfs_context_t ctx, int enforce_f static __inline__ void mac_context_set_enforce(vfs_context_t ctx, int enforce_flags) { +#if CONFIG_MACF proc_t proc = vfs_context_proc(ctx); if (proc == NULL) return; mac_proc_set_enforce(proc, enforce_flags); +#else +#pragma unused(ctx,enforce_flags) +#endif } diff --git a/security/mac_kext.c b/security/mac_kext.c new file mode 100644 index 000000000..97fbbcb6c --- /dev/null +++ b/security/mac_kext.c @@ -0,0 +1,22 @@ +#include +#include +#include +#include + +int +mac_kext_check_load(kauth_cred_t cred, const char *identifier) { + int error; + + MAC_CHECK(kext_check_load, cred, identifier); + + return (error); +} + +int +mac_kext_check_unload(kauth_cred_t cred, const char *identifier) { + int error; + + MAC_CHECK(kext_check_unload, cred, identifier); + + return (error); +} diff --git a/security/mac_mach_internal.h b/security/mac_mach_internal.h index 5393c750d..2a98a1196 100644 --- a/security/mac_mach_internal.h +++ b/security/mac_mach_internal.h @@ -54,6 +54,10 @@ #ifndef _SECURITY_MAC_MACH_INTERNAL_H_ #define _SECURITY_MAC_MACH_INTERNAL_H_ +#ifndef PRIVATE +#warning "MAC policy is not KPI, see Technical Q&A QA1574, this header will be removed in next version" +#endif + int mac_task_check_service(task_t self, task_t obj, const char *perm); void mac_task_label_update_internal(struct label *pl, struct task *t); int mac_port_label_compute(struct label *subj, struct label *obj, diff --git a/security/mac_policy.h b/security/mac_policy.h index a5c13843e..cf90fbaa6 100644 --- a/security/mac_policy.h +++ b/security/mac_policy.h @@ -80,6 +80,10 @@ #ifndef _SECURITY_MAC_POLICY_H_ #define _SECURITY_MAC_POLICY_H_ +#ifndef PRIVATE +#warning "MAC policy is not KPI, see Technical Q&A QA1574, this header will be removed in next version" +#endif + #include struct attrlist; @@ -96,15 +100,21 @@ struct mac_module_data; struct mac_policy_conf; struct mbuf; struct mount; +struct msg; +struct msqid_kernel; struct pipe; struct pseminfo; struct pshminfo; struct sbuf; struct semid_kernel; struct shmid_kernel; +struct socket; +struct sockopt; struct task; struct thread; +struct tty; struct ucred; +struct vfs_attr; struct vnode; /** @struct dummy */ @@ -292,6 +302,8 @@ typedef int mpo_bpfdesc_check_receive_t( @param scriptvnodelabel Script vnode label @param execlabel Userspace provided execution label @param proc Object process + @param macpolicyattr MAC policy-specific spawn attribute data + @param macpolicyattrlen Length of policy-specific spawn attribute data @see mac_execve @see mpo_cred_label_update_execve_t @see mpo_vnode_check_exec_t @@ -325,10 +337,13 @@ typedef int mpo_bpfdesc_check_receive_t( typedef int mpo_cred_check_label_update_execve_t( kauth_cred_t old, struct vnode *vp, + struct vnode *scriptvp, struct label *vnodelabel, struct label *scriptvnodelabel, struct label *execlabel, - struct proc *proc + struct proc *p, + void *macpolicyattr, + size_t macpolicyattrlen ); /** @brief Access control check for relabelling processes @@ -515,10 +530,13 @@ typedef int mpo_cred_label_internalize_t( @brief Update credential at exec time @param old_cred Existing subject credential @param new_cred New subject credential to be labeled + @param p Object process. @param vp File being executed @param vnodelabel Label corresponding to vp @param scriptvnodelabel Script vnode label @param execlabel Userspace provided execution label + @param macpolicyattr MAC policy-specific spawn attribute data. + @param macpolicyattrlen Length of policy-specific spawn attribute data. @see mac_execve @see mpo_cred_check_label_update_execve_t @see mpo_vnode_check_exec_t @@ -548,10 +566,14 @@ typedef int mpo_cred_label_internalize_t( typedef void mpo_cred_label_update_execve_t( kauth_cred_t old_cred, kauth_cred_t new_cred, + struct proc *p, struct vnode *vp, + struct vnode *scriptvp, struct label *vnodelabel, struct label *scriptvnodelabel, struct label *execlabel, + void *macpolicyattr, + size_t macpolicyattrlen, int *disjointp ); /** @@ -3144,6 +3166,36 @@ typedef int mpo_proc_check_ledger_t( struct proc *target, int op ); +/** + @brief Access control check for escaping default CPU usage monitor parameters. + @param cred Subject credential + + Determine if a credential has permission to program CPU usage monitor parameters + that are less restrictive than the global system-wide defaults. + + @return Return 0 if access is granted, otherwise an appropriate value for + errno should be returned. +*/ +typedef int mpo_proc_check_cpumon_t( + kauth_cred_t cred +); +/** + @brief Access control check for retrieving process information. + @param cred Subject credential + @param target Target process (may be null, may be zombie) + + Determine if a credential has permission to access process information as defined + by call number and flavor on target process + + @return Return 0 if access is granted, otherwise an appropriate value for + errno should be returned. +*/ +typedef int mpo_proc_check_proc_info_t( + kauth_cred_t cred, + struct proc *target, + int callnum, + int flavor +); /** @brief Access control check for mmap MAP_ANON @param proc User process requesting the memory @@ -3997,6 +4049,26 @@ typedef int mpo_system_check_chud_t( typedef int mpo_system_check_host_priv_t( kauth_cred_t cred ); +/** + @brief Access control check for obtaining system information + @param cred Subject credential + @param info_type A description of the information requested + + Determine whether the subject identified by the credential should be + allowed to obtain information about the system. + + This is a generic hook that can be used in a variety of situations where + information is being returned that might be considered sensitive. + Rather than adding a new MAC hook for every such interface, this hook can + be called with a string identifying the type of information requested. + + @return Return 0 if access is granted, otherwise an appropriate value for + errno should be returned. +*/ +typedef int mpo_system_check_info_t( + kauth_cred_t cred, + const char *info_type +); /** @brief Access control check for calling NFS services @param cred Subject credential @@ -4926,6 +4998,8 @@ typedef int mpo_vnode_check_exchangedata_t( @param label Policy label for vp @param execlabel Userspace provided execution label @param cnp Component name for file being executed + @param macpolicyattr MAC policy-specific spawn attribute data. + @param macpolicyattrlen Length of policy-specific spawn attribute data. Determine whether the subject identified by the credential can execute the passed vnode. Determination of execute privilege is made separately @@ -4946,7 +5020,9 @@ typedef int mpo_vnode_check_exec_t( struct label *label, struct label *execlabel, /* NULLOK */ struct componentname *cnp, - u_int *csflags + u_int *csflags, + void *macpolicyattr, + size_t macpolicyattrlen ); /** @brief Access control check for fsgetpath @@ -4969,7 +5045,7 @@ typedef int mpo_vnode_check_fsgetpath_t( @brief Access control check after determining the code directory hash */ typedef int mpo_vnode_check_signature_t(struct vnode *vp, struct label *label, - unsigned char *sha1, void *signature, + off_t macho_offset, unsigned char *sha1, void *signature, int size); /** @@ -5977,6 +6053,20 @@ typedef void mpo_vnode_label_update_t( struct label *vnodelabel, struct label *label ); +/** + @brief Find deatched signatures for a shared library + @param p file trying to find the signature + @param vp The vnode to relabel + @param offset offset in the macho that the signature is requested for (for fat binaries) + @param label Existing vnode label + +*/ +typedef int mpo_vnode_find_sigs_t( + struct proc *p, + struct vnode *vp, + off_t offset, + struct label *label +); /** @brief Create a new vnode, backed by extended attributes @param cred User credential for the creating process @@ -6045,22 +6135,111 @@ typedef void mpo_vnode_notify_rename_t( struct componentname *cnp ); +/** + @brief Inform MAC policies that a vnode has been linked + @param cred User credential for the renaming process + @param dvp Parent directory for the destination + @param dlabel Policy label for dvp + @param vp Vnode that's being linked + @param vlabel Policy label for vp + @param cnp Component name for the destination + + Inform MAC policies that a vnode has been linked. + */ +typedef void mpo_vnode_notify_link_t( + kauth_cred_t cred, + struct vnode *dvp, + struct label *dlabel, + struct vnode *vp, + struct label *vlabel, + struct componentname *cnp +); + +/** + @brief Inform MAC policies that a pty slave has been granted + @param p Responsible process + @param tp tty data structure + @param dev Major and minor numbers of device + @param label Policy label for tp + + Inform MAC policies that a pty slave has been granted. +*/ +typedef void mpo_pty_notify_grant_t( + proc_t p, + struct tty *tp, + dev_t dev, + struct label *label +); + +/** + @brief Inform MAC policies that a pty master has been closed + @param p Responsible process + @param tp tty data structure + @param dev Major and minor numbers of device + @param label Policy label for tp + + Inform MAC policies that a pty master has been closed. +*/ +typedef void mpo_pty_notify_close_t( + proc_t p, + struct tty *tp, + dev_t dev, + struct label *label +); + +/** + @brief Access control check for kext loading + @param cred Subject credential + @param identifier Kext identifier + + Determine whether the subject identified by the credential can load the + specified kext. + + @return Return 0 if access is granted, otherwise an appropriate value for + errno should be returned. Suggested failure: EPERM for lack of privilege. +*/ +typedef int mpo_kext_check_load_t( + kauth_cred_t cred, + const char *identifier +); + +/** + @brief Access control check for kext unloading + @param cred Subject credential + @param identifier Kext identifier + + Determine whether the subject identified by the credential can unload the + specified kext. + + @return Return 0 if access is granted, otherwise an appropriate value for + errno should be returned. Suggested failure: EPERM for lack of privilege. +*/ +typedef int mpo_kext_check_unload_t( + kauth_cred_t cred, + const char *identifier +); + /* * Placeholder for future events that may need mac hooks. */ typedef void mpo_reserved_hook_t(void); -/*! - \struct mac_policy_ops -*/ -#define MAC_POLICY_OPS_VERSION 13 /* inc when new reserved slots are taken */ +/* + * Policy module operations. + * + * Please note that this should be kept in sync with the check assumptions + * policy in bsd/kern/policy_check.c (policy_ops struct). + */ +#define MAC_POLICY_OPS_VERSION 24 /* inc when new reserved slots are taken */ struct mac_policy_ops { mpo_audit_check_postselect_t *mpo_audit_check_postselect; mpo_audit_check_preselect_t *mpo_audit_check_preselect; + mpo_bpfdesc_label_associate_t *mpo_bpfdesc_label_associate; mpo_bpfdesc_label_destroy_t *mpo_bpfdesc_label_destroy; mpo_bpfdesc_label_init_t *mpo_bpfdesc_label_init; mpo_bpfdesc_check_receive_t *mpo_bpfdesc_check_receive; + mpo_cred_check_label_update_execve_t *mpo_cred_check_label_update_execve; mpo_cred_check_label_update_t *mpo_cred_check_label_update; mpo_cred_check_visible_t *mpo_cred_check_visible; @@ -6075,12 +6254,14 @@ struct mac_policy_ops { mpo_cred_label_internalize_t *mpo_cred_label_internalize; mpo_cred_label_update_execve_t *mpo_cred_label_update_execve; mpo_cred_label_update_t *mpo_cred_label_update; + mpo_devfs_label_associate_device_t *mpo_devfs_label_associate_device; mpo_devfs_label_associate_directory_t *mpo_devfs_label_associate_directory; mpo_devfs_label_copy_t *mpo_devfs_label_copy; mpo_devfs_label_destroy_t *mpo_devfs_label_destroy; mpo_devfs_label_init_t *mpo_devfs_label_init; mpo_devfs_label_update_t *mpo_devfs_label_update; + mpo_file_check_change_offset_t *mpo_file_check_change_offset; mpo_file_check_create_t *mpo_file_check_create; mpo_file_check_dup_t *mpo_file_check_dup; @@ -6097,6 +6278,7 @@ struct mac_policy_ops { mpo_file_label_init_t *mpo_file_label_init; mpo_file_label_destroy_t *mpo_file_label_destroy; mpo_file_label_associate_t *mpo_file_label_associate; + mpo_ifnet_check_label_update_t *mpo_ifnet_check_label_update; mpo_ifnet_check_transmit_t *mpo_ifnet_check_transmit; mpo_ifnet_label_associate_t *mpo_ifnet_label_associate; @@ -6107,18 +6289,22 @@ struct mac_policy_ops { mpo_ifnet_label_internalize_t *mpo_ifnet_label_internalize; mpo_ifnet_label_update_t *mpo_ifnet_label_update; mpo_ifnet_label_recycle_t *mpo_ifnet_label_recycle; + mpo_inpcb_check_deliver_t *mpo_inpcb_check_deliver; mpo_inpcb_label_associate_t *mpo_inpcb_label_associate; mpo_inpcb_label_destroy_t *mpo_inpcb_label_destroy; mpo_inpcb_label_init_t *mpo_inpcb_label_init; mpo_inpcb_label_recycle_t *mpo_inpcb_label_recycle; mpo_inpcb_label_update_t *mpo_inpcb_label_update; + mpo_iokit_check_device_t *mpo_iokit_check_device; + mpo_ipq_label_associate_t *mpo_ipq_label_associate; mpo_ipq_label_compare_t *mpo_ipq_label_compare; mpo_ipq_label_destroy_t *mpo_ipq_label_destroy; mpo_ipq_label_init_t *mpo_ipq_label_init; mpo_ipq_label_update_t *mpo_ipq_label_update; + mpo_lctx_check_label_update_t *mpo_lctx_check_label_update; mpo_lctx_label_destroy_t *mpo_lctx_label_destroy; mpo_lctx_label_externalize_t *mpo_lctx_label_externalize; @@ -6128,6 +6314,7 @@ struct mac_policy_ops { mpo_lctx_notify_create_t *mpo_lctx_notify_create; mpo_lctx_notify_join_t *mpo_lctx_notify_join; mpo_lctx_notify_leave_t *mpo_lctx_notify_leave; + mpo_mbuf_label_associate_bpfdesc_t *mpo_mbuf_label_associate_bpfdesc; mpo_mbuf_label_associate_ifnet_t *mpo_mbuf_label_associate_ifnet; mpo_mbuf_label_associate_inpcb_t *mpo_mbuf_label_associate_inpcb; @@ -6139,6 +6326,7 @@ struct mac_policy_ops { mpo_mbuf_label_copy_t *mpo_mbuf_label_copy; mpo_mbuf_label_destroy_t *mpo_mbuf_label_destroy; mpo_mbuf_label_init_t *mpo_mbuf_label_init; + mpo_mount_check_fsctl_t *mpo_mount_check_fsctl; mpo_mount_check_getattr_t *mpo_mount_check_getattr; mpo_mount_check_label_update_t *mpo_mount_check_label_update; @@ -6152,9 +6340,11 @@ struct mac_policy_ops { mpo_mount_label_externalize_t *mpo_mount_label_externalize; mpo_mount_label_init_t *mpo_mount_label_init; mpo_mount_label_internalize_t *mpo_mount_label_internalize; + mpo_netinet_fragment_t *mpo_netinet_fragment; mpo_netinet_icmp_reply_t *mpo_netinet_icmp_reply; mpo_netinet_tcp_reply_t *mpo_netinet_tcp_reply; + mpo_pipe_check_ioctl_t *mpo_pipe_check_ioctl; mpo_pipe_check_kqfilter_t *mpo_pipe_check_kqfilter; mpo_pipe_check_label_update_t *mpo_pipe_check_label_update; @@ -6169,10 +6359,12 @@ struct mac_policy_ops { mpo_pipe_label_init_t *mpo_pipe_label_init; mpo_pipe_label_internalize_t *mpo_pipe_label_internalize; mpo_pipe_label_update_t *mpo_pipe_label_update; + mpo_policy_destroy_t *mpo_policy_destroy; mpo_policy_init_t *mpo_policy_init; mpo_policy_initbsd_t *mpo_policy_initbsd; mpo_policy_syscall_t *mpo_policy_syscall; + mpo_port_check_copy_send_t *mpo_port_check_copy_send; mpo_port_check_hold_receive_t *mpo_port_check_hold_receive; mpo_port_check_hold_send_once_t *mpo_port_check_hold_send_once; @@ -6195,6 +6387,7 @@ struct mac_policy_ops { mpo_port_label_init_t *mpo_port_label_init; mpo_port_label_update_cred_t *mpo_port_label_update_cred; mpo_port_label_update_kobject_t *mpo_port_label_update_kobject; + mpo_posixsem_check_create_t *mpo_posixsem_check_create; mpo_posixsem_check_open_t *mpo_posixsem_check_open; mpo_posixsem_check_post_t *mpo_posixsem_check_post; @@ -6212,6 +6405,7 @@ struct mac_policy_ops { mpo_posixshm_label_associate_t *mpo_posixshm_label_associate; mpo_posixshm_label_destroy_t *mpo_posixshm_label_destroy; mpo_posixshm_label_init_t *mpo_posixshm_label_init; + mpo_proc_check_debug_t *mpo_proc_check_debug; mpo_proc_check_fork_t *mpo_proc_check_fork; mpo_proc_check_get_task_name_t *mpo_proc_check_get_task_name; @@ -6228,6 +6422,7 @@ struct mac_policy_ops { mpo_proc_check_wait_t *mpo_proc_check_wait; mpo_proc_label_destroy_t *mpo_proc_label_destroy; mpo_proc_label_init_t *mpo_proc_label_init; + mpo_socket_check_accept_t *mpo_socket_check_accept; mpo_socket_check_accepted_t *mpo_socket_check_accepted; mpo_socket_check_bind_t *mpo_socket_check_bind; @@ -6252,11 +6447,13 @@ struct mac_policy_ops { mpo_socket_label_init_t *mpo_socket_label_init; mpo_socket_label_internalize_t *mpo_socket_label_internalize; mpo_socket_label_update_t *mpo_socket_label_update; + mpo_socketpeer_label_associate_mbuf_t *mpo_socketpeer_label_associate_mbuf; mpo_socketpeer_label_associate_socket_t *mpo_socketpeer_label_associate_socket; mpo_socketpeer_label_destroy_t *mpo_socketpeer_label_destroy; mpo_socketpeer_label_externalize_t *mpo_socketpeer_label_externalize; mpo_socketpeer_label_init_t *mpo_socketpeer_label_init; + mpo_system_check_acct_t *mpo_system_check_acct; mpo_system_check_audit_t *mpo_system_check_audit; mpo_system_check_auditctl_t *mpo_system_check_auditctl; @@ -6268,6 +6465,7 @@ struct mac_policy_ops { mpo_system_check_swapoff_t *mpo_system_check_swapoff; mpo_system_check_swapon_t *mpo_system_check_swapon; mpo_system_check_sysctl_t *mpo_system_check_sysctl; + mpo_sysvmsg_label_associate_t *mpo_sysvmsg_label_associate; mpo_sysvmsg_label_destroy_t *mpo_sysvmsg_label_destroy; mpo_sysvmsg_label_init_t *mpo_sysvmsg_label_init; @@ -6298,6 +6496,7 @@ struct mac_policy_ops { mpo_sysvshm_label_destroy_t *mpo_sysvshm_label_destroy; mpo_sysvshm_label_init_t *mpo_sysvshm_label_init; mpo_sysvshm_label_recycle_t *mpo_sysvshm_label_recycle; + mpo_task_label_associate_kernel_t *mpo_task_label_associate_kernel; mpo_task_label_associate_t *mpo_task_label_associate; mpo_task_label_copy_t *mpo_task_label_copy; @@ -6306,7 +6505,9 @@ struct mac_policy_ops { mpo_task_label_init_t *mpo_task_label_init; mpo_task_label_internalize_t *mpo_task_label_internalize; mpo_task_label_update_t *mpo_task_label_update; - mpo_iokit_check_hid_control_t *mpo_iokit_check_hid_control; + + mpo_iokit_check_hid_control_t *mpo_iokit_check_hid_control; + mpo_vnode_check_access_t *mpo_vnode_check_access; mpo_vnode_check_chdir_t *mpo_vnode_check_chdir; mpo_vnode_check_chroot_t *mpo_vnode_check_chroot; @@ -6362,32 +6563,52 @@ struct mac_policy_ops { mpo_vnode_check_signature_t *mpo_vnode_check_signature; mpo_vnode_check_uipc_bind_t *mpo_vnode_check_uipc_bind; mpo_vnode_check_uipc_connect_t *mpo_vnode_check_uipc_connect; + mac_proc_check_run_cs_invalid_t *mpo_proc_check_run_cs_invalid; mpo_proc_check_suspend_resume_t *mpo_proc_check_suspend_resume; + mpo_thread_userret_t *mpo_thread_userret; + mpo_iokit_check_set_properties_t *mpo_iokit_check_set_properties; + mpo_system_check_chud_t *mpo_system_check_chud; + mpo_vnode_check_searchfs_t *mpo_vnode_check_searchfs; + mpo_priv_check_t *mpo_priv_check; mpo_priv_grant_t *mpo_priv_grant; + mpo_proc_check_map_anon_t *mpo_proc_check_map_anon; + mpo_vnode_check_fsgetpath_t *mpo_vnode_check_fsgetpath; + mpo_iokit_check_open_t *mpo_iokit_check_open; + mpo_proc_check_ledger_t *mpo_proc_check_ledger; + mpo_vnode_notify_rename_t *mpo_vnode_notify_rename; + mpo_thread_label_init_t *mpo_thread_label_init; mpo_thread_label_destroy_t *mpo_thread_label_destroy; - mpo_system_check_kas_info_t *mpo_system_check_kas_info; - mpo_reserved_hook_t *mpo_reserved18; - mpo_vnode_notify_open_t *mpo_vnode_notify_open; - mpo_reserved_hook_t *mpo_reserved20; - mpo_reserved_hook_t *mpo_reserved21; - mpo_reserved_hook_t *mpo_reserved22; - mpo_reserved_hook_t *mpo_reserved23; - mpo_reserved_hook_t *mpo_reserved24; - mpo_reserved_hook_t *mpo_reserved25; - mpo_reserved_hook_t *mpo_reserved26; - mpo_reserved_hook_t *mpo_reserved27; + + mpo_system_check_kas_info_t *mpo_system_check_kas_info; + + mpo_proc_check_cpumon_t *mpo_proc_check_cpumon; + + mpo_vnode_notify_open_t *mpo_vnode_notify_open; + + mpo_system_check_info_t *mpo_system_check_info; + + mpo_pty_notify_grant_t *mpo_pty_notify_grant; + mpo_pty_notify_close_t *mpo_pty_notify_close; + + mpo_vnode_find_sigs_t *mpo_vnode_find_sigs; + + mpo_kext_check_load_t *mpo_kext_check_load; + mpo_kext_check_unload_t *mpo_kext_check_unload; + + mpo_proc_check_proc_info_t *mpo_proc_check_proc_info; + mpo_vnode_notify_link_t *mpo_vnode_notify_link; mpo_reserved_hook_t *mpo_reserved28; mpo_reserved_hook_t *mpo_reserved29; }; diff --git a/security/mac_process.c b/security/mac_process.c index 18fbdecca..a53f25c50 100644 --- a/security/mac_process.c +++ b/security/mac_process.c @@ -616,6 +616,40 @@ mac_proc_check_ledger(proc_t curp, proc_t proc, int ledger_op) return (error); } +int +mac_proc_check_cpumon(proc_t curp) +{ + kauth_cred_t cred; + int error = 0; + + if (!mac_proc_enforce || + !mac_proc_check_enforce(curp, MAC_PROC_ENFORCE)) + return (0); + + cred = kauth_cred_proc_ref(curp); + MAC_CHECK(proc_check_cpumon, cred); + kauth_cred_unref(&cred); + + return (error); +} + +int +mac_proc_check_proc_info(proc_t curp, proc_t target, int callnum, int flavor) +{ + kauth_cred_t cred; + int error = 0; + + if (!mac_proc_enforce || + !mac_proc_check_enforce(curp, MAC_PROC_ENFORCE)) + return (0); + + cred = kauth_cred_proc_ref(curp); + MAC_CHECK(proc_check_proc_info, cred, target, callnum, flavor); + kauth_cred_unref(&cred); + + return (error); +} + struct label * mac_thread_label_alloc(void) { diff --git a/security/mac_pty.c b/security/mac_pty.c new file mode 100644 index 000000000..af8a8732b --- /dev/null +++ b/security/mac_pty.c @@ -0,0 +1,16 @@ +#include +#include +#include +#include +#include +#include + +void +mac_pty_notify_grant(proc_t p, struct tty *tp, dev_t dev, struct label *label) { + MAC_PERFORM(pty_notify_grant, p, tp, dev, label); +} + +void +mac_pty_notify_close(proc_t p, struct tty *tp, dev_t dev, struct label *label) { + MAC_PERFORM(pty_notify_close, p, tp, dev, label); +} diff --git a/security/mac_socket.c b/security/mac_socket.c index 32acf01f5..b9bc4b698 100644 --- a/security/mac_socket.c +++ b/security/mac_socket.c @@ -472,6 +472,7 @@ mac_socket_check_accept(kauth_cred_t cred, struct socket *so) return (error); } +#if CONFIG_MACF_SOCKET_SUBSET int mac_socket_check_accepted(kauth_cred_t cred, struct socket *so) { @@ -490,6 +491,7 @@ mac_socket_check_accepted(kauth_cred_t cred, struct socket *so) } return (error); } +#endif int mac_socket_check_bind(kauth_cred_t ucred, struct socket *so, diff --git a/security/mac_stub.c b/security/mac_stub.c deleted file mode 100644 index b3e455817..000000000 --- a/security/mac_stub.c +++ /dev/null @@ -1,708 +0,0 @@ -/* - * Copyright (c) 2006-2007 Apple Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ - -#include -#include -#include - -#if !CONFIG_MACF -/* - * XXX stubs until we fix - */ -int mac_check_iokit_open(void) -{ - return 0; -} -int mac_check_iokit_set_properties(void) -{ - return 0; -} -int mac_check_iokit_hid_control(void) -{ - return 0; -} -int mac_check_ipc_method(void) -{ - return 0; -} -int mac_check_port_copy_send(void) -{ - return 0; -} -int mac_check_port_hold_receive(void) -{ - return 0; -} -int mac_check_port_hold_send(void) -{ - return 0; -} -int mac_check_port_make_send(void) -{ - return 0; -} -int mac_check_port_move_receive(void) -{ - return 0; -} -int mac_check_port_relabel(void) -{ - return 0; -} -int mac_check_port_send(void) -{ - return 0; -} -int mac_check_service_access(void) -{ - return 0; -} -int mac_copy_cred_to_task(void) -{ - return 0; -} -int mac_copy_port_label(void) -{ - return 0; -} -int mac_create_kernel_port(void) -{ - return 0; -} -int mac_create_kernel_task(void) -{ - return 0; -} -int mac_create_port(void) -{ - return 0; -} -int mac_create_task(void) -{ - return 0; -} -int mac_destroy_port_label(void) -{ - return 0; -} -int mac_destroy_task_label(void) -{ - return 0; -} -int mac_externalize_port_label(void) -{ - return 0; -} -int mac_externalize_task_label(void) -{ - return 0; -} -int mac_init(void) -{ - return 0; -} -int mac_init_mach(void) -{ - return 0; -} -int mac_init_port_label(void) -{ - return 0; -} -int mac_init_task_label(void) -{ - return 0; -} -int mac_internalize_port_label(void) -{ - return 0; -} -int mac_request_object_label(void) -{ - return 0; -} -int mac_update_port_from_cred_label(void) -{ - return 0; -} -int mac_update_port_kobject(void) -{ - return 0; -} -int mac_associate_vnode_devfs(void) -{ - return 0; -} -int mac_associate_vnode_extattr(void) -{ - return 0; -} -int mac_associate_vnode_singlelabel(void) -{ - return 0; -} -int mac_check_mount_getattr(void) -{ - return 0; -} -int mac_check_mount_setattr(void) -{ - return 0; -} -int mac_check_pipe_ioctl(void) -{ - return 0; -} -int mac_check_pipe_kqfilter(void) -{ - return 0; -} -int mac_check_pipe_read(void) -{ - return 0; -} -int mac_check_pipe_select(void) -{ - return 0; -} -int mac_check_pipe_stat(void) -{ - return 0; -} -int mac_check_pipe_write(void) -{ - return 0; -} -int mac_check_posix_sem_create(void) -{ - return 0; -} -int mac_check_posix_sem_open(void) -{ - return 0; -} -int mac_check_posix_sem_post(void) -{ - return 0; -} -int mac_check_posix_sem_unlink(void) -{ - return 0; -} -int mac_check_posix_sem_wait(void) -{ - return 0; -} -int mac_check_posix_shm_create(void) -{ - return 0; -} -int mac_check_posix_shm_mmap(void) -{ - return 0; -} -int mac_check_posix_shm_open(void) -{ - return 0; -} -int mac_check_posix_shm_stat(void) -{ - return 0; -} -int mac_check_posix_shm_truncate(void) -{ - return 0; -} -int mac_check_posix_shm_unlink(void) -{ - return 0; -} -int mac_check_proc_getlcid(void) -{ - return 0; -} -int mac_check_proc_fork(void) -{ - return 0; -} -int mac_check_proc_sched(void) -{ - return 0; -} -int mac_check_proc_setlcid(void) -{ - return 0; -} -int mac_check_proc_signal(void) -{ - return 0; -} -int mac_check_socket_received(void) -{ - return 0; -} -int mac_check_proc_wait(void) -{ - return 0; -} -int mac_check_system_acct(void) -{ - return 0; -} -int mac_check_system_chud(void) -{ - return 0; -} -int mac_check_system_nfsd(void) -{ - return 0; -} -int mac_check_system_reboot(void) -{ - return 0; -} -int mac_check_system_settime(void) -{ - return 0; -} -int mac_check_system_swapoff(void) -{ - return 0; -} -int mac_check_system_swapon(void) -{ - return 0; -} -int mac_check_system_sysctl(void) -{ - return 0; -} -int mac_check_vnode_access(void) -{ - return 0; -} -int mac_check_vnode_chdir(void) -{ - return 0; -} -int mac_check_vnode_chroot(void) -{ - return 0; -} -int mac_check_vnode_create(void) -{ - return 0; -} -int mac_check_vnode_delete(void) -{ - return 0; -} -int mac_check_vnode_deleteextattr(void) -{ - return 0; -} -int mac_check_vnode_exchangedata(void) -{ - return 0; -} -int mac_check_vnode_exec(void) -{ - return 0; -} -int mac_check_vnode_getattrlist(void) -{ - return 0; -} -int mac_check_vnode_getextattr(void) -{ - return 0; -} -int mac_check_vnode_kqfilter(void) -{ - return 0; -} -int mac_check_vnode_link(void) -{ - return 0; -} -int mac_check_vnode_listextattr(void) -{ - return 0; -} -int mac_check_vnode_lookup(void) -{ - return 0; -} -int mac_check_vnode_mmap(void) -{ - return 0; -} -int mac_check_vnode_open(void) -{ - return 0; -} -int mac_check_vnode_read(void) -{ - return 0; -} -int mac_check_vnode_readdir(void) -{ - return 0; -} -int mac_check_vnode_readlink(void) -{ - return 0; -} -int mac_check_vnode_rename_from(void) -{ - return 0; -} -int mac_check_vnode_rename_to(void) -{ - return 0; -} -int mac_check_vnode_revoke(void) -{ - return 0; -} -int mac_check_vnode_searchfs(void) -{ - return 0; -} -int mac_check_vnode_select(void) -{ - return 0; -} -int mac_check_vnode_setattrlist(void) -{ - return 0; -} -int mac_check_vnode_setextattr(void) -{ - return 0; -} -int mac_check_vnode_setflags(void) -{ - return 0; -} -int mac_check_vnode_setmode(void) -{ - return 0; -} -int mac_check_vnode_setowner(void) -{ - return 0; -} -int mac_check_vnode_setutimes(void) -{ - return 0; -} -int mac_check_vnode_stat(void) -{ - return 0; -} -int mac_check_vnode_write(void) -{ - return 0; -} -int mac_cleanup_vnode(void) -{ - return 0; -} -int mac_copy_devfs_label(void) -{ - return 0; -} -int mac_copy_vnode_label(void) -{ - return 0; -} -int mac_create_cred(void) -{ - return 0; -} -int mac_create_devfs_device(void) -{ - return 0; -} -int mac_create_devfs_directory(void) -{ - return 0; -} -int mac_create_mount(void) -{ - return 0; -} -int mac_create_pipe(void) -{ - return 0; -} -int mac_create_posix_sem(void) -{ - return 0; -} -int mac_create_posix_shm(void) -{ - return 0; -} -int mac_create_proc0(void) -{ - return 0; -} -int mac_create_proc1(void) -{ - return 0; -} -int mac_create_vnode_extattr(void) -{ - return 0; -} -int mac_cred_label_alloc(void) -{ - return 0; -} -int mac_cred_label_free(void) -{ - return 0; -} -int mac_destroy_cred(void) -{ - return 0; -} -int mac_destroy_devfsdirent(void) -{ - return 0; -} -int mac_destroy_mount(void) -{ - return 0; -} -int mac_destroy_pipe(void) -{ - return 0; -} -int mac_destroy_posix_sem(void) -{ - return 0; -} -int mac_destroy_posix_shm(void) -{ - return 0; -} -int mac_destroy_proc(void) -{ - return 0; -} -int mac_execve_enter(void) -{ - return 0; -} -int mac_execve_transition(void) -{ - return 0; -} -int mac_execve_will_transition(void) -{ - return 0; -} -int mac_init_bsd(void) -{ - return 0; -} -int mac_init_cred(void) -{ - return 0; -} -int mac_init_devfsdirent(void) -{ - return 0; -} -int mac_init_mount(void) -{ - return 0; -} -int mac_init_pipe(void) -{ - return 0; -} -int mac_init_posix_sem(void) -{ - return 0; -} -int mac_init_posix_shm(void) -{ - return 0; -} -int mac_init_proc(void) -{ - return 0; -} -int mac_init_vnode(void) -{ - return 0; -} -int mac_lctx_label_alloc(void) -{ - return 0; -} -int mac_lctx_label_free(void) -{ - return 0; -} -int mac_proc_create_lctx(void) -{ - return 0; -} -int mac_proc_join_lctx(void) -{ - return 0; -} -int mac_proc_leave_lctx(void) -{ - return 0; -} -int mac_relabel_cred(void) -{ - return 0; -} -int mac_relabel_vnode(void) -{ - return 0; -} -int mac_update_devfsdirent(void) -{ - return 0; -} -int mac_update_vnode_extattr(void) -{ - return 0; -} -int mac_vnode_label_alloc(void) -{ - return 0; -} -int mac_vnode_label_free(void) -{ - return 0; -} -int vop_stdsetlabel_ea(void) -{ - return 0; -} -int mac_kalloc(void) -{ - return 0; -} -int mac_kalloc_noblock(void) -{ - return 0; -} -int mac_kfree(void) -{ - return 0; -} -int mac_mbuf_alloc(void) -{ - return 0; -} -int mac_mbuf_free(void) -{ - return 0; -} -int mac_unwire(void) -{ - return 0; -} -int mac_wire(void) -{ - return 0; -} -int sysctl__security_mac_children(void) -{ - return 0; -} -int mac_check_socket_accept(void) -{ - return 0; -} -int mac_check_socket_accepted(void) -{ - return 0; -} -int mac_check_socket_bind(void) -{ - return 0; -} -int mac_check_socket_connect(void) -{ - return 0; -} -int mac_check_socket_create(void) -{ - return 0; -} -int mac_check_socket_getsockopt(void) -{ - return 0; -} -int mac_check_socket_listen(void) -{ - return 0; -} -int mac_check_socket_receive(void) -{ - return 0; -} -int mac_check_socket_send(void) -{ - return 0; -} -int mac_check_socket_setsockopt(void) -{ - return 0; -} -int mac_fork_proc(void) -{ - return 0; -} -int mac_proc_check_suspend_resume(void) -{ - return 0; -} -int mac_set_enforce_proc(void) -{ - return 0; -} -#endif /* CONFIG_MACF */ - -#if !CONFIG_AUDIT -int kau_will_audit(void) -{ - return 0; -} -#endif diff --git a/security/mac_system.c b/security/mac_system.c index f3de4ca13..621d24244 100644 --- a/security/mac_system.c +++ b/security/mac_system.c @@ -109,6 +109,19 @@ mac_system_check_host_priv(kauth_cred_t cred) return (error); } +int +mac_system_check_info(kauth_cred_t cred, const char *info_type) +{ + int error; + + if (!mac_system_enforce) + return (0); + + MAC_CHECK(system_check_info, cred, info_type); + + return (error); +} + int mac_system_check_nfsd(kauth_cred_t cred) { diff --git a/security/mac_vfs.c b/security/mac_vfs.c index 7bf550622..cf54eafa9 100644 --- a/security/mac_vfs.c +++ b/security/mac_vfs.c @@ -405,6 +405,20 @@ mac_vnode_notify_open(vfs_context_t ctx, struct vnode *vp, int acc_flags) MAC_PERFORM(vnode_notify_open, cred, vp, vp->v_label, acc_flags); } +void +mac_vnode_notify_link(vfs_context_t ctx, struct vnode *vp, + struct vnode *dvp, struct componentname *cnp) +{ + kauth_cred_t cred; + + if (!mac_vnode_enforce || + !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE)) + return; + + cred = vfs_context_ucred(ctx); + MAC_PERFORM(vnode_notify_link, cred, dvp, dvp->v_label, vp, vp->v_label, cnp); +} + /* * Extended attribute 'name' was updated via * vn_setxattr() or vn_removexattr(). Allow the @@ -449,7 +463,8 @@ mac_vnode_label_store(vfs_context_t ctx, struct vnode *vp, int mac_cred_label_update_execve(vfs_context_t ctx, kauth_cred_t new, struct vnode *vp, - struct label *scriptvnodelabel, struct label *execl) + struct vnode *scriptvp, struct label *scriptvnodelabel, struct label *execl, + void *macextensions) { kauth_cred_t cred; int disjoint = 0; @@ -462,15 +477,58 @@ mac_cred_label_update_execve(vfs_context_t ctx, kauth_cred_t new, struct vnode * pcred->cr_flags |= CRF_MAC_ENFORCE; cred = vfs_context_ucred(ctx); - MAC_PERFORM(cred_label_update_execve, cred, new, vp, vp->v_label, - scriptvnodelabel, execl, &disjoint); + + /* + * NB: Cannot use MAC_PERFORM macro because we need a sequence point after + * calling exec_spawnattr_getmacpolicyinfo() and before passing the + * spawnattrlen as an argument to the hook. + */ + { + struct mac_policy_conf *mpc; + u_int i; + + for (i = 0; i< mac_policy_list.staticmax; i++) { + mpc = mac_policy_list.entries[i].mpc; + if (mpc == NULL) + continue; + + mpo_cred_label_update_execve_t *hook = mpc->mpc_ops->mpo_cred_label_update_execve; + if (hook == NULL) + continue; + + size_t spawnattrlen = 0; + void *spawnattr = exec_spawnattr_getmacpolicyinfo(macextensions, mpc->mpc_name, &spawnattrlen); + + hook(cred, new, vfs_context_proc(ctx), vp, scriptvp, vp->v_label, + scriptvnodelabel, execl, spawnattr, spawnattrlen, &disjoint); + } + if (mac_policy_list_conditional_busy() != 0) { + for (; i <= mac_policy_list.maxindex; i++) { + mpc = mac_policy_list.entries[i].mpc; + if (mpc == NULL) + continue; + + mpo_cred_label_update_execve_t *hook = mpc->mpc_ops->mpo_cred_label_update_execve; + if (hook == NULL) + continue; + + size_t spawnattrlen = 0; + void *spawnattr = exec_spawnattr_getmacpolicyinfo(macextensions, mpc->mpc_name, &spawnattrlen); + + hook(cred, new, vfs_context_proc(ctx), vp, scriptvp, vp->v_label, + scriptvnodelabel, execl, spawnattr, spawnattrlen, &disjoint); + } + mac_policy_list_unbusy(); + } + } return (disjoint); } int mac_cred_check_label_update_execve(vfs_context_t ctx, struct vnode *vp, - struct label *scriptvnodelabel, struct label *execlabel, struct proc *p) + struct vnode *scriptvp, struct label *scriptvnodelabel, struct label *execlabel, + struct proc *p, void *macextensions) { kauth_cred_t cred; int result = 0; @@ -479,8 +537,48 @@ mac_cred_check_label_update_execve(vfs_context_t ctx, struct vnode *vp, return result; cred = vfs_context_ucred(ctx); - MAC_BOOLEAN(cred_check_label_update_execve, ||, cred, vp, vp->v_label, - scriptvnodelabel, execlabel, p); + + /* + * NB: Cannot use MAC_BOOLEAN macro because we need a sequence point after + * calling exec_spawnattr_getmacpolicyinfo() and before passing the + * spawnattrlen as an argument to the hook. + */ + { + struct mac_policy_conf *mpc; + u_int i; + + for (i = 0; i< mac_policy_list.staticmax; i++) { + mpc = mac_policy_list.entries[i].mpc; + if (mpc == NULL) + continue; + + mpo_cred_check_label_update_execve_t *hook = mpc->mpc_ops->mpo_cred_check_label_update_execve; + if (hook == NULL) + continue; + + size_t spawnattrlen = 0; + void *spawnattr = exec_spawnattr_getmacpolicyinfo(macextensions, mpc->mpc_name, &spawnattrlen); + + result = result || hook(cred, vp, scriptvp, vp->v_label, scriptvnodelabel, execlabel, p, spawnattr, spawnattrlen); + } + if (mac_policy_list_conditional_busy() != 0) { + for (; i <= mac_policy_list.maxindex; i++) { + mpc = mac_policy_list.entries[i].mpc; + if (mpc == NULL) + continue; + + mpo_cred_check_label_update_execve_t *hook = mpc->mpc_ops->mpo_cred_check_label_update_execve; + if (hook == NULL) + continue; + + size_t spawnattrlen = 0; + void *spawnattr = exec_spawnattr_getmacpolicyinfo(macextensions, mpc->mpc_name, &spawnattrlen); + + result = result || hook(cred, vp, scriptvp, vp->v_label, scriptvnodelabel, execlabel, p, spawnattr, spawnattrlen); + } + mac_policy_list_unbusy(); + } + } return (result); } @@ -658,16 +756,65 @@ mac_vnode_check_exec(vfs_context_t ctx, struct vnode *vp, struct image_params *imgp) { kauth_cred_t cred; - int error; + int error = 0; if (!mac_vnode_enforce || !mac_proc_enforce) return (0); cred = vfs_context_ucred(ctx); - MAC_CHECK(vnode_check_exec, cred, vp, vp->v_label, - (imgp != NULL) ? imgp->ip_execlabelp : NULL, - (imgp != NULL) ? &imgp->ip_ndp->ni_cnd : NULL, - (imgp != NULL) ? &imgp->ip_csflags : NULL); + + /* + * NB: Cannot use MAC_CHECK macro because we need a sequence point after + * calling exec_spawnattr_getmacpolicyinfo() and before passing the + * spawnattrlen as an argument to the hook. + */ + { + struct mac_policy_conf *mpc; + u_int i; + + for (i = 0; i< mac_policy_list.staticmax; i++) { + mpc = mac_policy_list.entries[i].mpc; + if (mpc == NULL) + continue; + + mpo_vnode_check_exec_t *hook = mpc->mpc_ops->mpo_vnode_check_exec; + if (hook == NULL) + continue; + + size_t spawnattrlen = 0; + void *spawnattr = (imgp != NULL) ? exec_spawnattr_getmacpolicyinfo(imgp->ip_px_smpx, mpc->mpc_name, &spawnattrlen) : NULL; + + error = mac_error_select( + hook(cred, vp, vp->v_label, + (imgp != NULL) ? imgp->ip_execlabelp : NULL, + (imgp != NULL) ? &imgp->ip_ndp->ni_cnd : NULL, + (imgp != NULL) ? &imgp->ip_csflags : NULL, + spawnattr, spawnattrlen), error); + } + if (mac_policy_list_conditional_busy() != 0) { + for (; i <= mac_policy_list.maxindex; i++) { + mpc = mac_policy_list.entries[i].mpc; + if (mpc == NULL) + continue; + + mpo_vnode_check_exec_t *hook = mpc->mpc_ops->mpo_vnode_check_exec; + if (hook == NULL) + continue; + + size_t spawnattrlen = 0; + void *spawnattr = (imgp != NULL) ? exec_spawnattr_getmacpolicyinfo(imgp->ip_px_smpx, mpc->mpc_name, &spawnattrlen) : NULL; + + error = mac_error_select( + hook(cred, vp, vp->v_label, + (imgp != NULL) ? imgp->ip_execlabelp : NULL, + (imgp != NULL) ? &imgp->ip_ndp->ni_cnd : NULL, + (imgp != NULL) ? &imgp->ip_csflags : NULL, + spawnattr, spawnattrlen), error); + } + mac_policy_list_unbusy(); + } + } + return (error); } @@ -687,7 +834,8 @@ mac_vnode_check_fsgetpath(vfs_context_t ctx, struct vnode *vp) } int -mac_vnode_check_signature(struct vnode *vp, unsigned char *sha1, +mac_vnode_check_signature(struct vnode *vp, off_t macho_offset, + unsigned char *sha1, void * signature, size_t size) { int error; @@ -695,7 +843,7 @@ mac_vnode_check_signature(struct vnode *vp, unsigned char *sha1, if (!mac_vnode_enforce || !mac_proc_enforce) return (0); - MAC_CHECK(vnode_check_signature, vp, vp->v_label, sha1, signature, size); + MAC_CHECK(vnode_check_signature, vp, vp->v_label, macho_offset, sha1, signature, size); return (error); } @@ -1194,6 +1342,19 @@ mac_vnode_label_update(vfs_context_t ctx, struct vnode *vp, struct label *newlab mac_vnode_label_free(tmpl); } +int +mac_vnode_find_sigs(struct proc *p, struct vnode *vp, off_t offset) +{ + int error; + + if (!mac_vnode_enforce || !mac_proc_enforce) + return (0); + + MAC_CHECK(vnode_find_sigs, p, vp, offset, vp->v_label); + + return (error); +} + void mac_mount_label_associate(vfs_context_t ctx, struct mount *mp) { @@ -1446,7 +1607,9 @@ mac_vnode_label_associate_fdesc(struct mount *mp, struct fdescnode *fnp, struct vnode *vp, vfs_context_t ctx) { struct fileproc *fp; +#if CONFIG_MACF_SOCKET_SUBSET struct socket *so; +#endif struct pipe *cpipe; struct vnode *fvp; struct proc *p; @@ -1473,7 +1636,7 @@ mac_vnode_label_associate_fdesc(struct mount *mp, struct fdescnode *fnp, goto out; } - switch (fp->f_fglob->fg_type) { + switch (FILEGLOB_DTYPE(fp->f_fglob)) { case DTYPE_VNODE: fvp = (struct vnode *)fp->f_fglob->fg_data; if ((error = vnode_getwithref(fvp))) @@ -1481,6 +1644,7 @@ mac_vnode_label_associate_fdesc(struct mount *mp, struct fdescnode *fnp, MAC_PERFORM(vnode_label_copy, fvp->v_label, vp->v_label); (void)vnode_put(fvp); break; +#if CONFIG_MACF_SOCKET_SUBSET case DTYPE_SOCKET: so = (struct socket *)fp->f_fglob->fg_data; socket_lock(so, 1); @@ -1489,6 +1653,7 @@ mac_vnode_label_associate_fdesc(struct mount *mp, struct fdescnode *fnp, vp, vp->v_label); socket_unlock(so, 1); break; +#endif case DTYPE_PSXSHM: pshm_label_associate(fp, vp, ctx); break; diff --git a/libsa/libsa/Makefile b/tools/Makefile similarity index 50% rename from libsa/libsa/Makefile rename to tools/Makefile index 196b8f326..95a2076fb 100644 --- a/libsa/libsa/Makefile +++ b/tools/Makefile @@ -7,23 +7,7 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir include $(MakeInc_cmd) include $(MakeInc_def) -INSTINC_SUBDIRS = -INSTINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS} -INSTINC_SUBDIRS_X86_64 = ${INSTINC_SUBDIRS} - -EXPINC_SUBDIRS = -EXPINC_SUBDIRS_I386 = ${EXPINC_SUBDIRS} -EXPINC_SUBDIRS_X86_64 = ${EXPINC_SUBDIRS} - -INSTALL_MI_LIST = - -INSTALL_MI_DIR = libsa - -EXPORT_MI_LIST = - -EXPORT_MI_DIR = libsa +CONFIG_SUBDIRS = lldbmacros include $(MakeInc_rule) include $(MakeInc_dir) - - diff --git a/tools/lldbmacros/.lldbinit b/tools/lldbmacros/.lldbinit new file mode 100644 index 000000000..5b2077709 --- /dev/null +++ b/tools/lldbmacros/.lldbinit @@ -0,0 +1,4 @@ +settings set target.process.python-os-plugin-path ./core/operating_system.py +settings set plugin.dynamic-loader.darwin-kernel.load-kexts false + +command script import ./xnu.py diff --git a/tools/lldbmacros/Makefile b/tools/lldbmacros/Makefile new file mode 100644 index 000000000..2e7b3aeed --- /dev/null +++ b/tools/lldbmacros/Makefile @@ -0,0 +1,65 @@ +export MakeInc_cmd=${SRCROOT}/makedefs/MakeInc.cmd +export MakeInc_def=${SRCROOT}/makedefs/MakeInc.def +export MakeInc_rule=${SRCROOT}/makedefs/MakeInc.rule +export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir + + +include $(MakeInc_cmd) +include $(MakeInc_def) + +.PHONY: lldbmacros_install + +do_config_all:: lldbmacros_install + +LLDBMACROS_SOURCE:=$(SRCROOT)/tools/lldbmacros/ +LLDBMACROS_DEST:=$(OBJPATH)/$(DSYMKERNELSYSDIR)/$(DSYMLLDBMACROSDIR)/lldbmacros/ + +LLDBMACROS_PYTHON_FILES = \ + core/standard.py \ + core/cvalue.py \ + core/__init__.py \ + core/configuration.py \ + core/caching.py \ + core/lazytarget.py \ + core/kernelcore.py \ + core/operating_system.py \ + core/xnu_lldb_init.py \ + plugins/__init__.py \ + plugins/zprint_perf_log.py \ + xnu.py \ + xnudefines.py \ + mbufdefines.py \ + netdefines.py \ + routedefines.py \ + ipc.py \ + scheduler.py \ + pmap.py \ + memory.py \ + mbufs.py \ + net.py \ + ioreg.py \ + utils.py \ + kdp.py \ + process.py \ + userspace.py \ + pci.py \ + misc.py \ + apic.py + +INSTALL_LLDBMACROS_PYTHON_FILES=$(addprefix $(LLDBMACROS_DEST), $(LLDBMACROS_PYTHON_FILES)) + +$(INSTALL_LLDBMACROS_PYTHON_FILES): $(LLDBMACROS_DEST)% : $(LLDBMACROS_SOURCE)% + $(_v)$(MKDIR) $(dir $@) + $(_v)$(PYTHON) $(LLDBMACROS_SOURCE)/core/syntax_checker.py $< $(_vstdout) + $(_v)$(INSTALL) $(DATA_INSTALL_FLAGS) $< $@ + $(_v)$(TOUCH) $(LLDBMACROS_DEST) + +$(LLDBMACROS_DEST)/../mach_kernel.py: $(LLDBMACROS_SOURCE)/core/xnu_lldb_init.py + $(_v)$(MKDIR) $(dir $@) + $(_v)$(PYTHON) $(LLDBMACROS_SOURCE)/core/syntax_checker.py $< $(_vstdout) + $(_v)$(INSTALL) $(DATA_INSTALL_FLAGS) $< $@ + +lldbmacros_install: $(INSTALL_LLDBMACROS_PYTHON_FILES) $(LLDBMACROS_DEST)/../mach_kernel.py + +include $(MakeInc_rule) +include $(MakeInc_dir) diff --git a/tools/lldbmacros/README b/tools/lldbmacros/README new file mode 100644 index 000000000..9cb143107 --- /dev/null +++ b/tools/lldbmacros/README @@ -0,0 +1,338 @@ +Table of Contents +================= +A. How to use lldb for kernel debugging +B. Design of lldb kernel debugging platform. +C. Kernel debugging commands. + i. Using commands. + ii. Writing new commands. +D. Kernel type summaries. + i. Using summaries + ii. Writing new summary functions +E. FAQ and Generel Coding Guidelines + i. Frequently Asked Questions + ii. Formatted Output printing guidelines [MUST READ] + iii. Coding conventions. [MUST READ] + iv. Submitting changes in lldbmacros [MUST READ] +F. Development and Debugging on lldb kernel debugging platform. + i. Reading a exception backtrace + ii. Loading custom or local lldbmacros and operating_system plugin + iii. Adding debug related 'printf's + + +======================================== +A. How to use lldb for kernel debugging +======================================== +lldb can be used for kernel debugging the same way as gdb. The simplest way is to start lldb with kernel symbol file. The lldb environment will ready to connect over kdp-remote '' or 'gdb-remote '. In case using a core file please do 'file --core /path/to/corefile' + +Following are detailed steps on how to debug a panic'ed / NMI'ed machine (For the curious souls). + lldb debugging in detail:- + * start lldb with the right symbols file. If you do not know the version apriori, then enable dsymForUUID to load symbols dynamically. + bash$ dsymForUUID --enable + bash$ lldb /path/to/mach_kernel.symbols + Current executable set to '/Sources/Symbols/xnu/xnu-2253~2/mach_kernel' (x86_64). + (lldb) + * connect to remote device or load a core file + # for kdp + (lldb) process connect --plugin kdp-remote udp://17.123.45.67:41139 + # for gdb (eg with astris) + (lldb) process connect --plugin gdb-remote gdb://17.123.45.67:8000 + # for loading a core file + (lldb) file --core /path/to/core/file /path/to/kernel_symbol_file + * Once connected you can debug with basic lldb commands like print, bt, expr etc. The xnu debug macros will also be loaded automatically from the dSYM files. + In case if you are working with older kernel files you can load kernel specific commands by doing - + (lldb) command script import /path/to/xnu/tools/lldbmacros/xnu.py + (lldb) showbootargs + debug=0x14e ncpus=2 + * You can do 'kgmhelp' to get a list of commands available through xnu.py + +SPECIAL: The xnu.py script brings in kernel type summary functions. To enable these please do - + (lldb) showlldbtypesummaries +These could be very handy in printing important information from structures easily. +For ex. + (lldb) print (thread_t)0x80d6a620 + (thread_t) $45 = 0x80d6a620 + thread thread_id processor pri io_policy state wait_queue wait_event wmesg thread_name + 0x80d6a620 0x317 0x902078c8 61 W 0x910cadd4 0x0 SystemSoundServer + + +============================================= +B. Design of lldb kernel debugging platform. +============================================= +The lldb debugger provides python scripting bridge for customizing commands and summaries in lldb. Following is the stack of platforms and how commands and summaries interact with it. + + |------- xnu scripts ----------| + | |- lldb Command/Scripting-| | <-- provides scriptability for kernel data structures through summary/command invocation. + | | |--lldb core--| | | <-- interacts with remote kernel or corefile. + | |-------------------------| | + |------------------------------| + + The xnu script in xnu/tools/lldbmacros provides the following: + * Custom functions to do plumbing of lldb command invocation to python function call. (see doc strings for @lldb_command) + The command interface provides some common features (which can be invoked after passing '--' on cmd line) like - + i. send the output of command to file on disk + ii. search for a string in the output and selectively print the line containing it. + iii. -v options to increase verbosity levels in commands. + For example: (lldb)showalltasks -- -s kernel_task --o /tmp/kernel_task.output -v + will show task summary output with lines matching string 'kernel_task' into a file /tmp/kernel_task.output and with a verbosity level of (default +1) + + * Customization for plugging in summary functions for lldb type summaries. (see doc strings for @lldb_summary) + It will automatically register give types with the functions withing the kernel category. + + * Ability to register test cases for macros (see doc strings for @xnudebug_test). + +The file layout is like following +xnu/ + |-tools/ + |-lldb/ + |-core/ # Core logic about kernel, lldb value abstraction, configs etc. **DO NOT TOUCH THIS DIR** + |-plugins/ # Holds plugins for kernel commands. + |-xnu.py # xnu debug framework along with kgmhelp, xnudebug commands. + |-xnudefines.py + |-utils.py + |-process.py # files containing commands/summaries code for each subsystem + |-... + +The lldbmacros directory has a Makefile that follows the build process for xnu. This packages lldbmacros scripts into the dSYM of each kernel build. This helps in rev-locking the lldb commands with changes in kernel sources. + +============================== +C. Kernel debugging commands. +============================== +i. Using commands. +------------------ +Using xnu debug commands is very similar to kgmacros in gdb. You can use 'kgmhelp' to get a listing of available commands. +If you need detailed help for a command please type 'help ' and the documentation for the command will be displayed. +ex. + (lldb) help pmap_walk + Perform a page-table walk in for . + You can pass -- -v for verbose output. To increase the verbosity add more -v args after the '--'. + Syntax: pmap_walk + +The basic format for every command provided under kgmhelp is like follows +(lldb) command_name [cmd_args..] [-CMDOPTIONS] [-xnuoptions] +where: + command_name : name of command as registed using the @lldb_command decorator and described in 'kgmhelp' + cmd_args : shell like arguments that are passed as is to the registered python function. + If there is error in these arguments than the implementor may display according error message. + xnuoptions : common options for stream based operations on the output of command_name. + Allowed options are + -h : show help string of a command + -s : print only the lines matching + -o : direct the output of command to . Will not display anything on terminal + -v : increase the verbosity of the command. Each '-v' encountered will increase verbosity by 1. + -p : pass the output of command to for processing and followup with command requests by it. + CMDOPTIONS : These are command level options (always a CAPITAL letter option) that are defined by the macro developer. Please do + help to know how each option operates on that particular command. + +ii. Writing new commands. +-------------------------- + The python modules are designed in such a way that the command from lldb invokes a python function with the arguments passed at lldb prompt. + It is recommended that you do a decoupled development for command interface and core utility function so that any function/code can + called as a simple util function and get the same output. i.e. + (lldb)showtask 0xabcdef000 is same as python >>> GetTaskSummary(0xabcdef000) or equivalent + + Following is a step by step guideline on how to add a new command ( e.g showtaskvme ). [extra tip: Always good idea to wrap your macro code within # Macro: , # EndMacro.] + 1. register a command to a function. Use the lldb_command decorator to map a 'command_name' to a function. Optionally you can provide getopt compatible option string for customizing your command invocation. Note: Only CAPITAL letter options are allowed. lowercase options are reserved for the framework level features. + 2. Immediately after the register define the function to handle the command invocation. The signature is always like Abc(cmd_args=None, cmd_options={}) + 3. Add documentation for Abc(). This is very important for lldb to show help for each command. [ Follow the guidelines above with documentation ] + 4. Use cmd_args array to get args passed on command. For example a command like "showtaskvme 0xabcdef00" will put have cmd_args=['0xabcdef00'] + - note that we use core.value class as an interface to underlying C structures. Refer [Section B] for more details. + - use kern.globals. & kern.GetValueFromAddress for building values from addresses. + - remember that the ideal type of object to be passed around is core.value + - Anything you 'print' will be relayed to lldb terminal output. + 5. If the user has passed any custom options they would be in cmd_options dict. the format is {'-':''}. The will be '' (empty string) for non-option flags. + 6. If your function finds issue with the passed argument then you can raise ArgumentError('error_message') to notify the user. The framework will automatically catch this and show appropriate help using the function doc string. + + Time for some code example? Try reading the code for function ShowTaskVmeHelper in memory.py. + +SPECIAL Note: Very often you will find yourself making changes to a file for some command/summary and would like to test it out in lldb. +To easily reload your changes in lldb please follow the below example. + * you fire up lldb and start using zprint. And soon you need to add functionality to zprint. + * you happily change a function code in memory.py file to zprint macro. + * now to reload that particular changes without killing your debug session do + (lldb) xnudebug reload memory + memory is reloaded from ./memory.py + (lldb) + + It is very important that you do reload using xnudebug command as it does the plumbing of commands and types for your change in the module. Otherwise you could easily get confused + why your changes are not reflected in the command. + + +========================== +D. Kernel type summaries. +========================== +i. Using summaries +------------------ + The lldb debugger provides ways for user to customize how a particular type of object be decsribed when printed. These are very useful in displaying complex and large structures + where only certain fields are important based on some flag or value in some field or variable. The way it works is every time lldb wants to print an object it checks + for regisetered summaries. We can define python functions and hook it up with lldb as callbacks for type summaries. + For example. + (lldb) print first_zone + (zone_t) $49 = 0xd007c000 + ZONE TOT_SZ ALLOC_ELTS FREE_ELTS FREE_SZ ELT_SZ ALLOC(ELTS PGS SLK) FLAGS NAME + 0x00000000d007c000 29808 182 25 3600 144 4096 28 1 64 X$ zones + (lldb) + Just printing the value of first_zone as (zone_t) 0xd007c000 wouldnt have been much help. But with the registered summary for zone_t we can see all the interesting info easily. + + You do not need to do anything special to use summaries. Once they are registered with lldb they show info automatically when printing objects. However if you wish to + see all the registered type summaries run the command 'type summary list -w kernel' on lldb prompt. + Also if you wish to quickly disable the summaries for a particular command use the 'showraw' command. + +ii. Writing new summary functions +--------------------------------- +lldb provides really flexible interface for building summaries for complex objects and data. If you find that a struct or list can be +diagnosed better if displayed differently, then feel free to add a type summary for that type. Following is an easy guide on how to do that. + + 1. Register a function as a callback for displaying information for a type. Use the @lldb_type_summary() decorator with an array of types you wish to register for callback + 2. Provide a header for the summary using @header() decorator. This is a strong requirement for summaries. This gets displayed before the output + of GetTypeSummary() is displayed. [In case you do not wish to have header then still define it as "" (empty string) ] + 3. Define the function with signature of GetSomeTypeSummary(valobj). It is highly recommended that the naming be consistent to Get.*?Summary(valobj) + The valobj argument holds the core.value object for display. + 4. Use the utility functions and memory read operations to pull out the required information. + [ use kern.globals & kern.GetValueFromAddress for building args to core functions. ] + [ remember that the ideal type of object to be passed around is core.value ] + 5. return a string that would be printed by the caller. When lldb makes a call back it expects a str to be returned. So do not print + directly out to console. [ debug info or logs output is okay to be printed anywhere :) ] + +Time for some code example? Try reading the code for GetTaskSummary() in process.py. + + +====================================== +E. FAQs and Generel Coding Guidelines +====================================== + +i. Frequently Asked Questions +----------------------------- + + Q. How do I avoid printing the summary and see the actual data in a structure? + A. There is a command called 'showraw'. This will disable all kernel specific type summaries and execute any command you provide. + Ex. + (lldb) print (thread_t) 0x80d6a620 + (thread_t) $45 = 0x80d6a620 + thread thread_id processor pri io_policy state wait_queue wait_event wmesg thread_name + 0x80d6a620 0x317 0x902078c8 61 W 0x910cadd4 0x0 SystemSoundServer + (lldb) showraw print (thread_t) 0x80d6a620 + (thread_t) $48 = 0x80d6a620 + + Q. I typed 'showallvnodes' and nothing happens for a long time? OR How do I get output of long running command instantly on the terminal? + A. The lldb command interface tries to build result object from output of a python function. So in case of functions with very long output or runtime it may + seem that the lldb process is hung. But it is not. You can use "-i" option to get immediate output on terminal. + ex. (lldb) showallvnodes -- -i + Immediate Output + .... + + Q. I made a change in a python file for a command or summary, but the output is not reflected in the lldb command? + A. The python framework does not allow for removing a loaded module and then reloading it. So sometimes if a command has a cached value from + old code that it will still call the old function and hence will not display new changes in file on disk. If you find yourself in such a situation + please see [Section C. -> SPECIAL Note]. If the change is to basic class or caching mechanism than it is advised to quit lldb and re-load all modules again. + + Q. I am new to python. I get an error message that I do not understand. what should I do? + A. The syntax for python is different from conventional programming languages. If you get any message with SyntaxError or TypeError or ValueError then please + review your code and look for common errors like + - wrong level of indentation? + - missed a ':' at the end of an if, elif, for, while statement? + - referencing a key in dictionary that doesnt exist? You might see KeyError in such cases. + - mistakenly used python reserved keyword as variable? (check http://docs.python.org/release/3.0.1/reference/lexical_analysis.html#id8) + - Trying to modify a string value? You can only create new strings but never modify existing ones. + - Trying to add a non string value to a string? This typically happens in print "time is " + gettime(). here gettime() returns int and not str. + - using a local variable with same name as global variable? + - assigning a value to global variable without declaring first? Its highly recommended to always declare global variable with 'global' keyword + If you still have difficulty you can look at the python documentation at http://docs.python.org + + Q. I wish to pass value of variable/expression to xnu lldb macro that accepts only pointers. How can I achieve that? + A. Many lldb macros have syntax that accepts pointers (eg showtaskstacks etc). In order to have your expression be evaluated before passing to command use `back ticks`. + For example: + (lldb) showtaskstacks `(task_t)tasks.next` + This way the expressing withing ` ` is evaluated by lldb and the value is passed to the command. + Note that if your argument pointer is bad or the memory is corrupted lldb macros will fail with a long backtrace that may not make sense. gdb used to fail silently but lldb does not. + Please see Section F(i) for more information on reading backtraces. + + +ii. Formatted output printing - zen and peace for life +------------------------------------------------------ + + To avoid the horrors of printing a tabular data on console and then 2 weeks later again messing with it for a new field, it is recommended to follow these guidelines. + * any python string can be invoked to "".format() and hence makes it very easy to play with formats + * As a convention, I suggest that for printing pointer values in hex use "{0: <#020x}".format(some_int_value). This will print nice 0x prefixed strings with length padded to 20. + * If you need help with format options take a look at http://docs.python.org/library/string.html#format-string-syntax + * [ I'd first create a format string for data and then for the header just change the x's and d's to s and pass the header strings to format command. see GetTaskSummary()] + * If you need to print a string from a core.value object then use str() to get string representation of value. + + +iii. Coding conventions +----------------------- + It is very very HIGHLY RECOMMENDED to follow these guidelines for writing any python code. + * Python is very sensitive to tabs and spaces for alignement. So please make sure you INDENT YOUR CODE WITH SPACES at all times. + * The standard tab width is 4 spaces. Each increasing indent adds 4 spaces begining of the line. + * The format for documentation is - + """ A one line summary describing what this function / class does + Detailed explanation if necessary along with params and return values. + """ + * All Classes and functions should have a doc string describing what the function does + A consistent format is expected. For ex. + def SumOfNumbers(a, b, c, d): + """ Calculate sum of numbers. + params: + a - int, value to be added. can be 0 + b - int/float, value to be added. + returns: + int/float - Sum of two values + raises: + TypeError - If any type is not identified in the params + """ + * A Class or Function should always start with CAPITAL letter and be CamelCase. If a function is for internal use only than it starts with '_'. + * Function params should always be lower_case and be word separated with '_' + * A local variable inside a function should be lower_case and separated with '_' + * A variable for internal use in object should start with '_'. + * if a class variable is supposed to hold non native type of object, it is good idea to comment what type it holds + * A class function with name matching Get(.*?)Summary() is always supposed to return a string which can be printed on stdout or any file. + * Functions begining with "Get" (eg. GetVnodePath()) mean they return a value and will not print any output to stdout. + * Functions degining with "Show" (eg. ShowZTrace()) mean they will print data on screen and may not return any value. + +iv. Submitting changes in lldbmacros +------------------------------------ + To contribute new commands or fixes to existing one, it is recommended that you follow the procedure below. + * Save the changes requried for new command or fix into lldbmacros directory. + * Make sure that the coding conventions are strictly followed. + * Run syntax checker on each of the modified files. It will find basic formatting errors in the changed files for you. + * If you are adding new file then please update the Makefile and xnu.py imports to ensure they get compiled during kernel build. + * Do a clean build of kernel from xnu top level directory. + * Verify that your changes are present in the dSYM directory of new build. + * Re-run all your test and verification steps with the lldbmacros from the newly packaged dSYM/Contents/Resources/Python/lldbmacros. + +=============================================================== +F. Development and Debugging on lldb kernel debugging platform. +=============================================================== + +i. Reading a exception backtrace +-------------------------------- + In case of an error the lldbmacros may print out an exception backtrace and halt immediately. The backtrace is very verbose and may be confusing. The important thing is to isolate possible causes of failure, and eventually filing a bug with kernel team. Following are some common ways where you may see an exception instead of your expected result. + * The lldbmacros cannot divine the type of memory by inspection. If a wrong pointer is passed from commandline then, the command code will try to read and show some results. It may still be junk or plain erronous. Please make sure your command arguments are correct. + For example: a common mistake is to pass task address to showactstack. In such a case lldb command may fail and show you a confusing backtrace. + * Kernel debugging is particularly tricky. Many parts of memory may not be readable. There could be failure in network, debugging protocol or just plain bad memory. In such a case please try to see if you can examine memory for the object you are trying to access. + * In case of memory corruption, the lldbmacros may have followed wrong pointer dereferencing. This might lead to failure and a exception to be thrown. + +ii. Loading custom or local lldbmacros and operating_system plugin +------------------------------------------------------------------ + The lldbmacros are packaged right into the dSYM for the kernel executable. This makes debugging very easy since they can get loaded automatically when symbols are loaded. + However, this setup makes it difficult for a lldbmacro developer to load custom/local macros. Following is the suggested solution for customizing your debugging setup: + * set up environment variable DEBUG_XNU_LLDBMACROS=1 on your shell. This will disable the automatic setup of lldbmacros and the operating_system.py from the symbols. + - bash$ export DEBUG_XNU_LLDBMACROS=1 + * start lldb from the shell + - bash$ lldb + * [optional] If you are making changes in the operating_system plugin then you need to set the plugin path for lldb to find your custom operating_system plugin file. + - (lldb)settings set target.process.python-os-plugin-path /path/to/xnu/tools/lldbmacros/core/operating_system.py + If you do not wish to change anything in operating_system plugin then just leave the setting empty. The symbol loading module will set one up for you. + * Load the xnu debug macros from your custom location. + - (lldb)command script import /path/to/xnu/tools/lldbmacros/xnu.py + +iii. Adding debug related 'printf's +----------------------------------- + The xnu debug framework provides a utility function (debuglog) in utils.py. Please use this for any of your debugging needs. It will not print any output unless the user turns on debug logging on the command. Please check the documentaiton of debuglog for usage and options. + + * To enable/disable logging + - (lldb) xnudebug debug + Enabled debug logging. + + + + diff --git a/tools/lldbmacros/apic.py b/tools/lldbmacros/apic.py new file mode 100644 index 000000000..506d98685 --- /dev/null +++ b/tools/lldbmacros/apic.py @@ -0,0 +1,354 @@ +from xnu import * +from misc import DoReadMsr64, DoWriteMsr64 + +###################################### +# Globals +###################################### +lapic_base_addr = 0xfee00000 +ioapic_base_addr = 0xfec00000 +ioapic_index_off = 0x0 +ioapic_data_off = 0x10 + + +###################################### +# LAPIC Helper functions +###################################### +def IsArchX86_64(): + """ Determines if target machine is x86_64 + Returns: + True if running on x86_64, False otherwise + """ + return kern.arch == "x86_64" + + +@static_var('x2apic_enabled', -1) +def IsX2ApicEnabled(): + """ Reads the APIC configuration MSR to determine if APIC is operating + in x2APIC mode. The MSR is read the first time this function is + called, and the answer is remembered for all subsequent calls. + Returns: + True if APIC is x2APIC mode + False if not + """ + apic_cfg_msr = 0x1b + apic_cfg_msr_x2en_mask = 0xc00 + if IsX2ApicEnabled.x2apic_enabled < 0: + if (int(DoReadMsr64(apic_cfg_msr, xnudefines.lcpu_self)) & apic_cfg_msr_x2en_mask == + apic_cfg_msr_x2en_mask): + IsX2ApicEnabled.x2apic_enabled = 1 + else: + IsX2ApicEnabled.x2apic_enabled = 0 + return IsX2ApicEnabled.x2apic_enabled == 1 + +def DoLapicRead32(offset, cpu): + """ Read the specified 32-bit LAPIC register + Params: + offset: int - index of LAPIC register to read + cpu: int - cpu ID + Returns: + The 32-bit LAPIC register value + """ + if IsX2ApicEnabled(): + return DoReadMsr64(offset >> 4, cpu) + else: + return ReadPhysInt(lapic_base_addr + offset, 32, cpu) + +def DoLapicWrite32(offset, val, cpu): + """ Write the specified 32-bit LAPIC register + Params: + offset: int - index of LAPIC register to write + val: int - write value + cpu: int - cpu ID + Returns: + True if success, False if error + """ + if IsX2ApicEnabled(): + return DoWriteMsr64(offset >> 4, cpu, val) + else: + return WritePhysInt(lapic_base_addr + offset, val, 32) + +###################################### +# LAPIC Register Print functions +###################################### +def GetLapicVersionFields(reg_val): + """ Helper function for DoLapicDump that prints the fields of the + version register. + Params: + reg_val: int - the value of the version register to print + Returns: + string showing the fields + """ + lvt_num = (reg_val >> 16) + 1 + version = reg_val & 0xff + return "[VERSION={:d} MaxLVT={:d}]".format(lvt_num, version) + +def GetLapicSpuriousVectorFields(reg_val): + """ Helper function for DoLapicDump that prints the fields of the + spurious vector register. + Params: + reg_val: int - the value of the spurious vector registre to print + Returns: + string showing the fields + """ + vector = reg_val & 0xff + enabled = (reg_val & 0x100) >> 8 + return "[VEC={:3d} ENABLED={:d}]".format(vector, enabled) + +def GetLapicIcrHiFields(reg_val): + """ Helper function for DoLapicDump that prints the fields of the + upper 32-bits of the Interrupt Control Register (ICR). + Params: + reg_val: int - the value of the ICR to show + Returns: + string showing the fields + """ + dest = reg_val >> 24 + return "[DEST={:d}]".format(dest) + +def GetLapicTimerDivideFields(reg_val): + """ Helper function for DoLapicDump that prints the fields of the + timer divide register. + Params: + reg_val: int - the value of the timer divide register + Returns: + string showing the fields + """ + divide_val = ((reg_val & 0x8) >> 1) | (reg_val & 0x3) + if divide_val == 0x7: + divide_by = 1 + else: + divide_by = 2 << divide_val + return "[Divide by {:d}]".format(divide_by) + +def GetApicFields(reg_val): + """ Helper function for DoLapicDump and DoIoapicDump that prints the + fields of the APIC register. + Params: + reg_val: int - the value of the APIC register to print + Returns: + string showing the fields + """ + vector = reg_val & 0xff + tsc_deadline = reg_val & 0x40000 + periodic = reg_val & 0x20000 + masked = reg_val & 0x10000 + trigger = reg_val & 0x8000 + polarity = reg_val & 0x2000 + pending = reg_val & 0x1000 + + ret_str = "[VEC={:3d} MASK={:3s} TRIG={:5s} POL={:4s} PEND={:3s}".format( + vector, + "no" if masked == 0 else "yes", + "edge" if trigger == 0 else "level", + "low" if polarity == 0 else "high", + "no" if pending == 0 else "yes") + if not periodic == 0: + ret_str += " PERIODIC" + if not tsc_deadline == 0: + ret_str += " TSC_DEADLINE" + ret_str += "]" + return ret_str + +def DoLapicDump(): + """ Prints all LAPIC registers + """ + print "LAPIC operating mode: {:s}".format( + "x2APIC" if IsX2ApicEnabled() else "xAPIC") + # LAPIC register offset, register name, field formatting function + lapic_dump_table = [ + (0x020, "ID", None), + (0x030, "VERSION", GetLapicVersionFields), + (0x080, "TASK PRIORITY", None), + (0x0A0, "PROCESSOR PRIORITY", None), + (0x0D0, "LOGICAL DEST", None), + (0x0E0, "DEST FORMAT", None), + (0x0F0, "SPURIOUS VECTOR", GetLapicSpuriousVectorFields), + (0x100, "ISR[031:000]", None), + (0x110, "ISR[063:032]", None), + (0x120, "ISR[095:064]", None), + (0x130, "ISR[127:096]", None), + (0x140, "ISR[159:128]", None), + (0x150, "ISR[191:160]", None), + (0x160, "ISR[223:192]", None), + (0x170, "ISR[225:224]", None), + (0x180, "TMR[031:000]", None), + (0x190, "TMR[063:032]", None), + (0x1A0, "TMR[095:064]", None), + (0x1B0, "TMR[127:096]", None), + (0x1C0, "TMR[159:128]", None), + (0x1D0, "TMR[191:160]", None), + (0x1E0, "TMR[223:192]", None), + (0x1F0, "TMR[225:224]", None), + (0x200, "IRR[031:000]", None), + (0x210, "IRR[063:032]", None), + (0x220, "IRR[095:064]", None), + (0x230, "IRR[127:096]", None), + (0x240, "IRR[159:128]", None), + (0x250, "IRR[191:160]", None), + (0x260, "IRR[223:192]", None), + (0x270, "IRR[225:224]", None), + (0x280, "ERROR STATUS", None), + (0x300, "Interrupt Command LO", GetApicFields), + (0x310, "Interrupt Command HI", GetLapicIcrHiFields), + (0x320, "LVT Timer", GetApicFields), + (0x350, "LVT LINT0", GetApicFields), + (0x360, "LVT LINT1", GetApicFields), + (0x370, "LVT Error", GetApicFields), + (0x340, "LVT PerfMon", GetApicFields), + (0x330, "LVT Thermal", GetApicFields), + (0x3e0, "Timer Divide", GetLapicTimerDivideFields), + (0x380, "Timer Init Count", None), + (0x390, "Timer Cur Count", None)] + for reg in lapic_dump_table: + reg_val = DoLapicRead32(reg[0], xnudefines.lcpu_self) + if reg[2] == None: + print "LAPIC[{:#05x}] {:21s}: {:#010x}".format(reg[0], reg[1], reg_val) + else: + print "LAPIC[{:#05x}] {:21s}: {:#010x} {:s}".format(reg[0], reg[1], + reg_val, reg[2](reg_val)) + +###################################### +# IOAPIC Helper functions +###################################### +def DoIoApicRead(offset): + """ Read the specified IOAPIC register + Params: + offset: int - index of IOAPIC register to read + Returns: + int 32-bit read value + """ + WritePhysInt(ioapic_base_addr + ioapic_index_off, offset, 8) + return ReadPhysInt(ioapic_base_addr + ioapic_data_off, 32) + +def DoIoApicWrite(offset, val): + """ Write the specified IOAPIC register + Params: + offset: int - index of IOAPIC register to write + Returns: + True if success, False if error + """ + WritePhysInt(ioapic_base_addr + ioapic_index_off, offset, 8) + return WritePhysInt(ioapic_base_addr + ioapic_data_off, val, 32) + +def DoIoApicDump(): + """ Prints all IOAPIC registers + """ + # Show IOAPIC ID register + ioapic_id = DoIoApicRead(0) + print "IOAPIC[0x00] {:9s}: {:#010x}".format("ID", ioapic_id) + # Show IOAPIC Version register + ioapic_ver = DoIoApicRead(1) + maxredir = ((ioapic_ver >> 16) & 0xff) + 1 + print "IOAPIC[0x01] {:9s}: {:#010x}".format("VERSION", ioapic_ver) +\ + " [MAXREDIR={:02d} PRQ={:d} VERSION={:#04x}]".format( + maxredir, + ioapic_ver >> 15 & 0x1, + ioapic_ver & 0xff) + # Show IOAPIC redirect regsiters + for redir in range(maxredir): + redir_val_lo = DoIoApicRead(0x10 + redir * 2) + redir_val_hi = DoIoApicRead(0x10 + (redir * 2) + 1) + print "IOAPIC[{:#04x}] IOREDIR{:02d}: {:#08x}{:08x} {:s}".format( + 0x10 + (redir * 2), + redir, + redir_val_hi, + redir_val_lo, + GetApicFields(redir_val_lo)) + +###################################### +# LLDB commands +###################################### +@lldb_command('lapic_read32') +def LapicRead32(cmd_args=None): + """ Read the LAPIC register at the specified offset. The CPU can + be optionally specified + Syntax: lapic_read32 [lcpu] + """ + if cmd_args == None or len(cmd_args) < 1: + print LapicRead32.__doc__ + return + if not IsArchX86_64(): + print "lapic_read32 not supported on this architecture." + return + + lcpu = xnudefines.lcpu_self + if len(cmd_args) > 1: + lcpu = ArgumentStringToInt(cmd_args[1]) + + offset = ArgumentStringToInt(cmd_args[0]) + read_val = DoLapicRead32(offset, lcpu) + print "LAPIC[{:#05x}]: {:#010x}".format(offset, read_val) + +@lldb_command('lapic_write32') +def LapicWrite32(cmd_args=None): + """ Write the LAPIC register at the specified offset. The CPU can + be optionally specified. Prints an error message if there was a + failure. Prints nothing upon success. + Syntax: lapic_write32 [lcpu] + """ + if cmd_args == None or len(cmd_args) < 2: + print LapicWrite32.__doc__ + return + if not IsArchX86_64(): + print "lapic_write32 not supported on this architecture." + return + offset = ArgumentStringToInt(cmd_args[0]) + write_val = ArgumentStringToInt(cmd_args[1]) + lcpu = xnudefines.lcpu_self + if len(cmd_args) > 2: + lcpu = ArgumentStringToInt(cmd_args[2]) + if not DoLapicWrite32(offset, write_val, lcpu): + print "lapic_write32 FAILED" + +@lldb_command('lapic_dump') +def LapicDump(cmd_args=None): + """ Prints all LAPIC entries + """ + if not IsArchX86_64(): + print "lapic_dump not supported on this architecture." + return + DoLapicDump() + +@lldb_command('ioapic_read32') +def IoApicRead32(cmd_args=None): + """ Read the IOAPIC register at the specified offset. + Syntax: ioapic_read32 + """ + if cmd_args == None or len(cmd_args) < 1: + print IoApicRead32.__doc__ + return + if not IsArchX86_64(): + print "ioapic_read32 not supported on this architecture." + return + + offset = ArgumentStringToInt(cmd_args[0]) + read_val = DoIoApicRead(offset) + print "IOAPIC[{:#04x}]: {:#010x}".format(offset, read_val) + +@lldb_command('ioapic_write32') +def IoApicWrite32(cmd_args=None): + """ Write the IOAPIC register at the specified offset. + Syntax: ioapic_write32 + """ + if cmd_args == None or len(cmd_args) < 2: + print IoApicWrite32.__doc__ + return + if not IsArchX86_64(): + print "ioapic_write32 not supported on this architecture." + return + + offset = ArgumentStringToInt(cmd_args[0]) + write_val = ArgumentStringToInt(cmd_args[1]) + if not DoIoApicWrite(offset, write_val): + print "ioapic_write32 FAILED" + return + +@lldb_command('ioapic_dump') +def IoApicDump(cmd_args=None): + """ Prints all IOAPIC entries + """ + if not IsArchX86_64(): + print "ioapic_dump not supported on this architecture." + return + DoIoApicDump() + diff --git a/tools/lldbmacros/core/__init__.py b/tools/lldbmacros/core/__init__.py new file mode 100644 index 000000000..a3c732e0b --- /dev/null +++ b/tools/lldbmacros/core/__init__.py @@ -0,0 +1,4 @@ +""" +Core classes and functions used for lldb kernel debugging. +""" +from cvalue import value diff --git a/tools/lldbmacros/core/caching.py b/tools/lldbmacros/core/caching.py new file mode 100644 index 000000000..4a0b2bd6d --- /dev/null +++ b/tools/lldbmacros/core/caching.py @@ -0,0 +1,165 @@ +""" +A basic caching module for xnu debug macros to use. +It is recommended to use [Get|Save][Static|Dynamic]CacheData() apis for +your caching needs. These APIs will handle the case of clearing caches when +a debugger continues and stops or hit a breakpoint. + +Use Static caches for data that will not change if the program is run and stopped again. e.g. typedata, version numbers etc. +An example invocation could be like +def getDSYMPathForUUID(uuid): + # Get the data from cache + cached_data = caching.GetStaticCacheData('dsym.for.uuid', {}) + + if uuid in cached_data: + return cached_data[uuid] + else: + path = #get info for uuid + cached_data[uuid] = path + + # save the cached_data object to cache. + caching.SaveStaticCacheData('dsym.for.uuid', cached_data) + + return cached_data[uuid] + +And use Dynamic caches for things like thread data, zones information etc. +These will automatically be dropped when debugger continues the target +An example use of Dynamic cache could be as follows + +def GetExecutablePathForPid(pid): + # Get the data from cache + cached_data = caching.GetDynamicCacheData('exec_for_path', {}) + + if pid in cached_data: + return cached_data[pid] + else: + exec_path = "/path/to/exec" #get exec path for pid + cached_data[pid] = path + + # save the cached_data object to cache. + caching.SaveDynamicCacheData('exec_for_path', cached_data) + + return cached_data[pid] + +""" + +#Private Routines and objects + +from configuration import * + +import sys + +""" +The format for the saved data dictionaries is +{ + 'key' : (valueobj, versno), + ... +} + +The versno is an int defining the version of obj. In case of version mismatch it will set valueobj to default upon access. + +""" +_static_data = {} +_dynamic_data = {} + + + +def _GetDebuggerSessionID(): + """ A default callable function that _GetCurrentSessionID uses to + identify a stopped session. + """ + return 0 + +def _GetCurrentSessionID(): + """ Get the current session id. This will update whenever + system is continued or if there is new information that would + cause the dynamic cache to be deleted. + + returns: int - session id number. + """ + session_id = _GetDebuggerSessionID() + return session_id; + + +#Public APIs + +def GetSizeOfCache(): + """ Returns number of bytes held in cache. + returns: + int - size of cache including static and dynamic + """ + global _static_data, _dynamic_data + return sys.getsizeof(_static_data) + sys.getsizeof(_dynamic_data) + + +def GetStaticCacheData(key, default_value = None): + """ Get cached object based on key from the cache of static information. + params: + key: str - a unique string identifying your data. + default_value : obj - an object that should be returned if key is not found. + returns: + default_value - if the static cache does not have your data. + obj - The data obj saved with SaveStaticCacheData() + """ + global _static_data + key = str(key) + if key in _static_data: + return _static_data[key][0] + return default_value + +def SaveStaticCacheData(key, value): + """ Save data into the cache identified by key. + It will overwrite any data that was previously associated with key + params: + key : str - a unique string identifying your data + value: obj - any object that is to be cached. + returns: + Nothing + """ + global _static_data + + if not config['CacheStaticData']: + return + + key = str(key) + _static_data[key] = (value, _GetCurrentSessionID()) + return + + +def GetDynamicCacheData(key, default_value=None): + """ Get cached object based on key from the cache of dynamic information. + params: + key: str - a unique string identifying cached object + default_value : obj - an object that should be returned if key is not found. + returns: + default_value - if dynamic cache does not have data or if the saved version mismatches with current session id. + obj - The data obj saved with SaveDynamicCacheData() + """ + global _dynamic_data + key = str(key) + if key in _dynamic_data: + if _GetCurrentSessionID() == _dynamic_data[key][1]: + return _dynamic_data[key][0] + else: + del _dynamic_data[key] + + return default_value + + +def SaveDynamicCacheData(key, value): + """ Save data into the cache identified by key. + It will overwrite any data that was previously associated with key + params: + key : str - a unique string identifying your data + value: obj - any object that is to be cached. + returns: + Nothing + """ + global _dynamic_data + + if not config['CacheDynamicData']: + return + + key = str(key) + _dynamic_data[key] = (value, _GetCurrentSessionID()) + + return diff --git a/tools/lldbmacros/core/configuration.py b/tools/lldbmacros/core/configuration.py new file mode 100644 index 000000000..855dd920b --- /dev/null +++ b/tools/lldbmacros/core/configuration.py @@ -0,0 +1,10 @@ +# global configs to be included by everybody. The recommended way is +# from core.configuration import * +# verbosity levels +(vSILENT, vHUMAN, vSCRIPT, vDETAIL) = (0, 1, 2, 3) + +config = {'debug': False, 'verbosity': vHUMAN, 'showTypeSummary': False, "CacheStaticData":True, "CacheDynamicData": True} +# Note: The above configuration dictionary holds the default values. +# 'debug' when True enables debug print messages in whole of xnu lldbmacros framework +# 'CacheStaticData' when True caches static data. Types, uuids etc. +# 'CacheDymanicData' when True caches dynamic data which will get cleared upon continuing, single stepping or detaching. diff --git a/tools/lldbmacros/core/cvalue.py b/tools/lldbmacros/core/cvalue.py new file mode 100644 index 000000000..07bc25aec --- /dev/null +++ b/tools/lldbmacros/core/cvalue.py @@ -0,0 +1,463 @@ +""" +Defines a class value which encapsulates the basic lldb Scripting Bridge APIs. This provides an easy +wrapper to extract information from C based constructs. + |------- core.value------------| + | |--lldb Scripting Bridge--| | + | | |--lldb core--| | | + | |-------------------------| | + |------------------------------| +Use the member function GetSBValue() to access the base Scripting Bridge value. +""" +import lldb +import re +from lazytarget import * + +_cstring_rex = re.compile("((?:\s*|const\s+)\s*char(?:\s+\*|\s+[A-Za-z_0-9]*\s*\[|)\s*)",re.MULTILINE|re.DOTALL) + +class value(object): + '''A class designed to wrap lldb.SBValue() objects so the resulting object + can be used as a variable would be in code. So if you have a Point structure + variable in your code in the current frame named "pt", you can initialize an instance + of this class with it: + + pt = lldb.value(lldb.frame.FindVariable("pt")) + print pt + print pt.x + print pt.y + + pt = lldb.value(lldb.frame.FindVariable("rectangle_array")) + print rectangle_array[12] + print rectangle_array[5].origin.x''' + def __init__(self, sbvalue): + #_sbval19k84obscure747 is specifically chosen to be obscure. + #This avoids conflicts when attributes could mean any field value in code + self._sbval19k84obscure747 = sbvalue + self._sbval19k84obscure747_type = sbvalue.GetType() + self._sbval19k84obscure747_is_ptr = sbvalue.GetType().IsPointerType() + self.sbvalue = sbvalue + + def __nonzero__(self): + return ( self._sbval19k84obscure747.__nonzero__() and self._GetValueAsUnsigned() != 0 ) + + def __repr__(self): + return self._sbval19k84obscure747.__str__() + + def __cmp__(self, other): + if type(other) is int: + me = int(self) + if type(me) is long: + other = long(other) + return me.__cmp__(other) + if type(other) is value: + return int(self).__cmp__(int(other)) + raise TypeError("Cannot compare value with this type") + + def __str__(self): + global _cstring_rex + type_name = self._sbval19k84obscure747_type.GetName() + if len(_cstring_rex.findall(type_name)) > 0 : + return self._GetValueAsString() + summary = self._sbval19k84obscure747.GetSummary() + if summary: + return summary.strip('"') + return self._sbval19k84obscure747.__str__() + + def __getitem__(self, key): + # Allow array access if this value has children... + if type(key) is slice: + _start = int(key.start) + _end = int(key.stop) + _step = 1 + if key.step != None: + _step = int(key.step) + retval = [] + while _start < _end: + retval.append(self[_start]) + _start += _step + return retval + if type(key) in (int, long): + return value(self._sbval19k84obscure747.GetValueForExpressionPath("[%i]" % key)) + if type(key) is value: + return value(self._sbval19k84obscure747.GetValueForExpressionPath("[%i]" % int(key))) + raise TypeError("Cannot fetch Array item for this type") + + def __getattr__(self, name): + child_sbvalue = self._sbval19k84obscure747.GetChildMemberWithName (name) + if child_sbvalue: + return value(child_sbvalue) + raise AttributeError("No field by name: "+name ) + + def __add__(self, other): + return int(self) + int(other) + + def __radd__(self, other): + return int(self) + int(other) + + def __sub__(self, other): + return int(self) - int(other) + + def __rsub__(self, other): + return int(other) - int(self) + + def __mul__(self, other): + return int(self) * int(other) + + def __rmul__(self, other): + return int(self) * int(other) + + def __floordiv__(self, other): + return int(self) // int(other) + + def __mod__(self, other): + return int(self) % int(other) + + def __rmod__(self, other): + return int(other) % int(self) + + def __divmod__(self, other): + return int(self) % int(other) + + def __rdivmod__(self, other): + return int(other) % int(self) + + def __pow__(self, other): + return int(self) ** int(other) + + def __lshift__(self, other): + return int(self) << int(other) + + def __rshift__(self, other): + return int(self) >> int(other) + + def __and__(self, other): + return int(self) & int(other) + + def __rand(self, other): + return int(self) & int(other) + + def __xor__(self, other): + return int(self) ^ int(other) + + def __or__(self, other): + return int(self) | int(other) + + def __div__(self, other): + return int(self) / int(other) + + def __rdiv__(self, other): + return int(other)/int(self) + + def __truediv__(self, other): + return int(self) / int(other) + + def __iadd__(self, other): + result = self.__add__(other) + self._sbval19k84obscure747.SetValueFromCString (str(result)) + return result + + def __isub__(self, other): + result = self.__sub__(other) + self._sbval19k84obscure747.SetValueFromCString (str(result)) + return result + + def __imul__(self, other): + result = self.__mul__(other) + self._sbval19k84obscure747.SetValueFromCString (str(result)) + return result + + def __idiv__(self, other): + result = self.__div__(other) + self._sbval19k84obscure747.SetValueFromCString (str(result)) + return result + + def __itruediv__(self, other): + result = self.__truediv__(other) + self._sbval19k84obscure747.SetValueFromCString (str(result)) + return result + + def __ifloordiv__(self, other): + result = self.__floordiv__(self, other) + self._sbval19k84obscure747.SetValueFromCString (str(result)) + return result + + def __imod__(self, other): + result = self.__and__(self, other) + self._sbval19k84obscure747.SetValueFromCString (str(result)) + return result + + def __ipow__(self, other): + result = self.__pow__(self, other) + self._sbval19k84obscure747.SetValueFromCString (str(result)) + return result + + def __ipow__(self, other, modulo): + result = self.__pow__(self, other, modulo) + self._sbval19k84obscure747.SetValueFromCString (str(result)) + return result + + def __ilshift__(self, other): + result = self.__lshift__(other) + self._sbval19k84obscure747.SetValueFromCString (str(result)) + return result + + def __irshift__(self, other): + result = self.__rshift__(other) + self._sbval19k84obscure747.SetValueFromCString (str(result)) + return result + + def __iand__(self, other): + result = self.__and__(self, other) + self._sbval19k84obscure747.SetValueFromCString (str(result)) + return result + + def __ixor__(self, other): + result = self.__xor__(self, other) + self._sbval19k84obscure747.SetValueFromCString (str(result)) + return result + + def __ior__(self, other): + result = self.__ior__(self, other) + self._sbval19k84obscure747.SetValueFromCString (str(result)) + return result + + def __neg__(self): + return -int(self) + + def __pos__(self): + return +int(self) + + def __abs__(self): + return abs(int(self)) + + def __invert__(self): + return ~int(self) + + def __complex__(self): + return complex (int(self)) + + def __int__(self): + if self._sbval19k84obscure747_is_ptr: + return self._GetValueAsUnsigned() + tname= self._sbval19k84obscure747_type.GetName() + if tname.find('uint') >= 0 or tname.find('unsigned') >= 0: + return self._GetValueAsUnsigned() + retval = self._sbval19k84obscure747.GetValueAsSigned() + # lldb python: GetValueAsSigned does not return the correct value + if (retval & 0x80000000): + retval = retval - 0x100000000 + return retval + + def __long__(self): + return self._sbval19k84obscure747.GetValueAsSigned() + + def __float__(self): + return float (self._sbval19k84obscure747.GetValueAsSigned()) + + def __oct__(self): + return '0%o' % self._GetValueAsUnsigned() + + def __hex__(self): + return '0x%x' % self._GetValueAsUnsigned() + + def __eq__(self, other): + self_err = lldb.SBError() + other_err = lldb.SBError() + self_val = self._sbval19k84obscure747.GetValueAsUnsigned(self_err) + if self_err.fail: + raise ValueError("unable to extract value of self") + if type(other) is value: + other_val = other._sbval19k84obscure747.GetValueAsUnsigned(other_err) + if other_err.fail: + raise ValueError("unable to extract value of other") + return self_val == other_val + if type(other) is int: + return int(self) == other + raise TypeError("Equality operation is not defined for this type.") + + def __neq__(self, other): + return not self.__eq__(other) + + def GetSBValue(self): + return self._sbval19k84obscure747 + + def _GetValueAsSigned(self): + serr = lldb.SBError() + retval = self._sbval19k84obscure747.GetValueAsSigned(serr) + if serr.success: + return retval + raise ValueError("Failed to read signed data. "+ str(self._sbval19k84obscure747) +"(type =" + str(self._sbval19k84obscure747_type) + ") Error description: " + serr.GetCString()) + + def _GetValueAsUnsigned(self): + serr = lldb.SBError() + retval = self._sbval19k84obscure747.GetValueAsUnsigned(serr) + if serr.success: + return retval + raise ValueError("Failed to read unsigned data. "+ str(self._sbval19k84obscure747) +"(type =" + str(self._sbval19k84obscure747_type) + ") Error description: " + serr.GetCString()) + + def _GetValueAsString(self, offset = 0, maxlen = 1024): + serr = lldb.SBError() + sbdata = None + if self._sbval19k84obscure747.TypeIsPointerType(): + sbdata = self._sbval19k84obscure747.GetPointeeData(offset, maxlen) + else: + sbdata = self._sbval19k84obscure747.GetData() + + retval = '' + bytesize = sbdata.GetByteSize() + if bytesize == 0 : + #raise ValueError('Unable to read value as string') + return '' + for i in range(0, bytesize) : + serr.Clear() + ch = chr(sbdata.GetUnsignedInt8(serr, i)) + if serr.fail : + raise ValueError("Unable to read string data: " + serr.GetCString()) + if ch == '\0': + break + retval += ch + return retval + + def __format__(self, format_spec): + ret_format = "{0:"+format_spec+"}" + # typechar is last char. see http://www.python.org/dev/peps/pep-3101/ + type_spec=format_spec.strip().lower()[-1] + if type_spec == 'x': + return ret_format.format(self._GetValueAsUnsigned()) + if type_spec == 'd': + return ret_format.format(int(self)) + if type_spec == 's': + return ret_format.format(str(self)) + if type_spec == 'o': + return ret_format.format(int(oct(self))) + if type_spec == 'c': + return ret_format.format(int(self)) + + return "unknown format " + format_spec + str(self) + + +def unsigned(val): + """ Helper function to get unsigned value from core.value + params: val - value (see value class above) representation of an integer type + returns: int which is unsigned. + raises : ValueError if the type cannot be represented as unsigned int. + """ + if type(val) is value: + return val._GetValueAsUnsigned() + return int(val) + +def sizeof(t): + """ Find the byte size of a type. + params: t - str : ex 'time_spec' returns equivalent of sizeof(time_spec) in C + t - value: ex a value object. returns size of the object + returns: int - byte size length + """ + if type(t) is value : + return t.GetSBValue().GetByteSize() + if type(t) is str: + return gettype(t).GetByteSize() + raise ValueError("Cannot get sizeof. Invalid argument") + + +def dereference(val): + """ Get a dereferenced obj for a pointer type obj + params: val - value object representing a pointer type C construct in lldb + returns: value - value + ex. val = dereference(ptr_obj) #python + is same as + obj_ptr = (int *)0x1234 #C + val = *obj_ptr #C + """ + if type(val) is value and val.GetSBValue().TypeIsPointerType(): + return value(val.GetSBValue().Dereference()) + raise TypeError('Cannot dereference this type.') + +def addressof(val): + """ Get address of a core.value object. + params: val - value object representing a C construct in lldb + returns: value - value object referring to 'type(val) *' type + ex. addr = addressof(hello_obj) #python + is same as + uintptr_t addr = (uintptr_t)&hello_obj #C + """ + if type(val) is value: + return value(val.GetSBValue().AddressOf()) + raise TypeError("Cannot do addressof for non-value type objects") + +def cast(obj, target_type): + """ Type cast an object to another C type. + params: + obj - core.value object representing some C construct in lldb + target_type - str : ex 'char *' + - lldb.SBType : + """ + dest_type = target_type + if type(target_type) is str: + dest_type = gettype(target_type) + elif type(target_type) is value: + dest_type = target_type.GetSBValue().GetType() + + if type(obj) is value : + return value(obj.GetSBValue().Cast(dest_type)) + elif type(obj) is int: + print "ERROR: You cannot cast an 'int' to %s, please use kern.GetValueFromAddress() for such purposes." % str(target_type) + raise TypeError("object of type %s cannot be casted to %s" % (str(type(obj)), str(target_type))) + +_value_types_cache={} + +def gettype(target_type): + """ Returns lldb.SBType of the given target_type + params: + target_type - str, ex. 'char', 'uint32_t' etc + returns: + lldb.SBType - SBType corresponding to the given target_type + raises: + NameError - Incase the type is not identified + """ + global _value_types_cache + # LLDB Somehow does not support finding types for 'struct pmap' while 'pmap' works fine + # + target_type = target_type.replace('struct', '') + target_type = str(target_type).strip() + if target_type not in _value_types_cache: + tmp_type = None + if target_type.endswith('*') : + tmp_type = LazyTarget.GetTarget().FindFirstType(target_type.rstrip('*').strip()) + if not tmp_type.IsValid(): + raise NameError('Unable to Cast to type '+target_type) + tmp_type = tmp_type.GetPointerType() + else : + tmp_type = LazyTarget.GetTarget().FindFirstType(target_type) + if not tmp_type.IsValid(): + raise NameError('Unable to Cast to type '+target_type) + _value_types_cache[target_type] = tmp_type + return _value_types_cache[target_type] + +def getfieldoffset(struct_type, field_name): + """ Returns the byte offset of a field inside a given struct + params: + struct_type - str or lldb.SBType, ex. 'struct ipc_port *' or port.gettype() + field_name - str, name of the field inside the struct ex. 'ip_messages' + returns: + int - byte offset of the field_name inside the struct_type + raises: + TypeError - - In case the struct_type has no field with the name field_name + """ + if type(struct_type) == str: + struct_type = gettype(struct_type) + offset = 0 + for field in struct_type.get_fields_array(): + if str(field.GetName()) == field_name: + return field.GetOffsetInBytes() + raise TypeError('Field name "%s" not found in type "%s"' % (field_name, str(struct_type))) + +def islong(x): + """ Returns True if a string represents a long integer, False otherwise + """ + try: + long(x,16) + except ValueError: + try: + long(x) + except ValueError: + return False + return True diff --git a/tools/lldbmacros/core/kernelcore.py b/tools/lldbmacros/core/kernelcore.py new file mode 100644 index 000000000..ce029d2bd --- /dev/null +++ b/tools/lldbmacros/core/kernelcore.py @@ -0,0 +1,376 @@ + +""" Please make sure you read the README COMPLETELY BEFORE reading anything below. + It is very critical that you read coding guidelines in Section E in README file. +""" + +from cvalue import * +from lazytarget import * +from configuration import * +import caching +import lldb + +def IterateTAILQ_HEAD(headval, element_name): + """ iterate over a TAILQ_HEAD in kernel. refer to bsd/sys/queue.h + params: + headval - value : value object representing the head of the list + element_name- str : string name of the field which holds the list links. + returns: + A generator does not return. It is used for iterating. + value : an object that is of type as headval->tqh_first. Always a pointer object + example usage: + list_head = kern.GetGlobalVariable('mountlist') + for entryobj in IterateTAILQ_HEAD(list_head, 'mnt_list'): + print GetEntrySummary(entryobj) + """ + iter_val = headval.tqh_first + while unsigned(iter_val) != 0 : + yield iter_val + iter_val = iter_val.__getattr__(element_name).tqe_next + #end of yield loop + +def IterateLinkedList(element, field_name): + """ iterate over a linked list. + This is equivalent to elt = element; while(elt) { do_work(elt); elt = elt->; } + params: + element - value : value object representing element in the list. + field_name - str : name of field that holds pointer to next element + returns: Nothing. This is used as iterable + example usage: + first_zone = kern.GetGlobalVariable('first_zone') + for zone in IterateLinkedList(first_zone, 'next_zone'): + print GetZoneSummary(zone) + """ + elt = element + while unsigned(elt) != 0: + yield elt + elt = elt.__getattr__(field_name) + #end of while loop + +def IterateListEntry(element, element_type, field_name): + """ iterate over a list as defined with LIST_HEAD in bsd/sys/queue.h + params: + element - value : Value object for lh_first + element_type - str : Type of the next element + field_name - str : Name of the field in next element's structure + returns: + A generator does not return. It is used for iterating + value : an object thats of type (element_type) head->le_next. Always a pointer object + example usage: + headp = kern.globals.initproc.p_children + for pp in IterateListEntry(headp, 'struct proc *', 'p_sibling'): + print GetProcInfo(pp) + """ + elt = element.lh_first + if type(element_type) == str: + element_type = gettype(element_type) + while unsigned(elt) != 0: + yield elt + next_el = elt.__getattr__(field_name).le_next + elt = cast(next_el, element_type) + +def IterateQueue(queue_head, element_ptr_type, element_field_name): + """ iterate over a queue in kernel of type queue_head_t. refer to osfmk/kern/queue.h + params: + queue_head - value : Value object for queue_head. + element_ptr_type - lldb.SBType : a pointer type of the element 'next' points to. Typically its structs like thread, task etc.. + - str : OR a string describing the type. ex. 'task *' + element_field_name - str : name of the field in target struct. + returns: + A generator does not return. It is used for iterating. + value : an object thats of type (element_type) queue_head->next. Always a pointer object + """ + if type(element_ptr_type) == str : + element_ptr_type = gettype(element_ptr_type) + + queue_head = queue_head.GetSBValue() + queue_head_addr = 0x0 + if queue_head.TypeIsPointerType(): + queue_head_addr = queue_head.GetValueAsUnsigned() + else: + queue_head_addr = queue_head.GetAddress().GetLoadAddress(LazyTarget.GetTarget()) + cur_elt = queue_head.GetChildMemberWithName('next') + while True: + + if not cur_elt.IsValid() or cur_elt.GetValueAsUnsigned() == 0 or cur_elt.GetValueAsUnsigned() == queue_head_addr: + break + elt = cur_elt.Cast(element_ptr_type) + yield value(elt) + cur_elt = elt.GetChildMemberWithName(element_field_name).GetChildMemberWithName('next') + + + +class KernelTarget(object): + """ A common kernel object that provides access to kernel objects and information. + The class holds global lists for task, terminated_tasks, procs, zones, zombroc etc. + It also provides a way to symbolicate an address or create a value from an address. + """ + def __init__(self, debugger): + """ Initialize the kernel debugging environment. + Target properties like architecture and connectedness are lazy-evaluted. + """ + self._debugger = debugger # This holds an lldb.SBDebugger object for debugger state + self._threads_list = [] + self._tasks_list = [] + self._allproc = [] + self._terminated_tasks_list = [] + self._zones_list = [] + self._zombproc_list = [] + self._kernel_types_cache = {} #this will cache the Type objects as and when requested. + self._version = None + self._arch = None + self._ptrsize = None # pointer size of kernel, not userspace + self.symbolicator = None + class _GlobalVariableFind(object): + def __init__(self, kern): + self._xnu_kernobj_12obscure12 = kern + def __getattr__(self, name): + v = self._xnu_kernobj_12obscure12.GetGlobalVariable(name) + if not v.GetSBValue().IsValid(): + raise ValueError('no such global variable by name: %s '%str(name)) + return v + self.globals = _GlobalVariableFind(self) + LazyTarget.Initialize(debugger) + + def _GetSymbolicator(self): + """ Internal function: To initialize the symbolication from lldb.utils + """ + if not self.symbolicator is None: + return self.symbolicator + + from lldb.utils import symbolication + symbolicator = symbolication.Symbolicator() + symbolicator.target = LazyTarget.GetTarget() + self.symbolicator = symbolicator + return self.symbolicator + + def Symbolicate(self, addr): + """ simple method to get name of function/variable from an address. this is equivalent of gdb 'output /a 0xaddress' + params: + addr - int : typically hex value like 0xffffff80002c0df0 + returns: + str - '' if no symbol found else the symbol name. + Note: this function only finds the first symbol. If you expect multiple symbol conflict please use SymbolicateFromAddress() + """ + ret_str = '' + syms = self.SymbolicateFromAddress(addr) + if len(syms) > 0: + ret_str +=syms[0].GetName() + return ret_str + + def SymbolicateFromAddress(self, addr): + """ symbolicates any given address based on modules loaded in the target. + params: + addr - int : typically hex value like 0xffffff80002c0df0 + returns: + [] of SBSymbol: In case we don't find anything than empty array is returned. + Note: a type of symbol can be figured out by gettype() function of SBSymbol. + example usage: + syms = kern.Symbolicate(0xffffff80002c0df0) + for s in syms: + if s.GetType() == lldb.eSymbolTypeCode: + print "Function", s.GetName() + if s.GetType() == lldb.eSymbolTypeData: + print "Variable", s.GetName() + """ + if type(int(1)) != type(addr): + if str(addr).strip().find("0x") == 0 : + addr = int(addr, 16) + else: + addr = int(addr) + ret_array = [] + symbolicator = self._GetSymbolicator() + syms = symbolicator.symbolicate(addr) + if not syms: + return ret_array + for s in syms: + ret_array.append(s.get_symbol_context().symbol) + return ret_array + + def IsDebuggerConnected(self): + proc_state = LazyTarget.GetProcess().state + if proc_state == lldb.eStateInvalid : return False + if proc_state in [lldb.eStateStopped, lldb.eStateSuspended] : return True + + def GetGlobalVariable(self, name): + """ Get the value object representation for a kernel global variable + params: + name : str - name of the variable. ex. version + returns: value - python object representing global variable. + raises : Exception in case the variable is not found. + """ + return value(LazyTarget.GetTarget().FindGlobalVariables(name, 0).GetValueAtIndex(0)) + + def GetLoadAddressForSymbol(self, name): + """ Get the load address of a symbol in the kernel. + params: + name : str - name of the symbol to lookup + returns: int - the load address as an integer. Use GetValueFromAddress to cast to a value. + raises : LookupError - if the symbol is not found. + """ + name = str(name) + target = LazyTarget.GetTarget() + syms_arr = target.FindSymbols(name) + if syms_arr.IsValid() and len(syms_arr) > 0: + symbol = syms_arr[0].GetSymbol() + if symbol.IsValid(): + return int(symbol.GetStartAddress().GetLoadAddress(target)) + + raise LookupError("Symbol not found: " + name) + + def GetValueFromAddress(self, addr, type_str = 'void *'): + """ convert a address to value + params: + addr - int : typically hex value like 0xffffff80008dc390 + type_str - str: type to cast to. Default type will be void * + returns: + value : a value object which has address as addr and type is type_str + """ + obj = value(self.globals.version.GetSBValue().CreateValueFromExpression(None,'(void *)'+str(addr))) + obj = cast(obj, type_str) + return obj + + def GetValueAsType(self, v, t): + """ Retrieves a global variable 'v' of type 't' wrapped in a vue object. + If 'v' is an address, creates a vue object of the appropriate type. + If 'v' is a name, looks for the global variable and asserts its type. + Throws: + NameError - If 'v' cannot be found + TypeError - If 'v' is of the wrong type + """ + if islong(v): + return self.GetValueFromAddress(v, t) + else: + var = LazyTarget.GetTarget().FindGlobalVariables(v, 1)[0] + if not var: + raise NameError("Failed to find global variable '{0}'".format(v)) + if var.GetTypeName() != t: + raise TypeError("{0} must be of type '{1}', not '{2}'".format(v, t, var.GetTypeName())) + return value(var) + + def _GetIterator(self, iter_head_name, next_element_name='next', iter_head_type=None): + """ returns an iterator for a collection in kernel memory. + params: + iter_head_name - str : name of queue_head or list head variable. + next_element_name - str : name of the element that leads to next element. + for ex. in struct zone list 'next_zone' is the linking element. + returns: + iterable : typically used in conjunction with "for varname in iterable:" + """ + head_element = self.GetGlobalVariable(iter_head_name) + return head_element.GetSBValue().linked_list_iter(next_element_name) + + def TruncPage(self, addr): + return (addr & ~(unsigned(self.GetGlobalVariable("page_size")) - 1)) + + def RoundPage(self, addr): + return trunc_page(addr + unsigned(self.GetGlobalVariable("page_size")) - 1) + + def StraddlesPage(self, addr, size): + if size > unsigned(self.GetGlobalVariable("page_size")): + return True + return (((addr + size) & (unsigned(self.GetGlobalVariable("page_size"))-1)) < size) + + def PhysToKernelVirt(self, addr): + if self.arch == 'x86_64': + return (addr + unsigned(self.GetGlobalVariable('physmap_base'))) + elif self.arch == 'arm': + return (addr - unsigned(self.GetGlobalVariable("gPhysBase")) + unsigned(self.GetGlobalVariable("gVirtBase"))) + else: + raise ValueError("PhysToVirt does not support {0}".format(arch)) + + def __getattribute__(self, name): + if name == 'zones' : + self._zones_list = caching.GetDynamicCacheData("kern._zones_list", []) + if len(self._zones_list) > 0: return self._zones_list + first_zone = self.GetGlobalVariable('first_zone') + for z in IterateLinkedList(first_zone, 'next_zone'): + self._zones_list.append(z) + caching.SaveDynamicCacheData("kern._zones_list", self._zones_list) + return self._zones_list + + if name == 'threads' : + self._threads_list = caching.GetDynamicCacheData("kern._threads_list", []) + if len(self._threads_list) > 0 : return self._threads_list + thread_queue_head = self.GetGlobalVariable('threads') + thread_type = LazyTarget.GetTarget().FindFirstType('thread') + thread_ptr_type = thread_type.GetPointerType() + for th in IterateQueue(thread_queue_head, thread_ptr_type, 'threads'): + self._threads_list.append(th) + caching.SaveDynamicCacheData("kern._threads_list", self._threads_list) + return self._threads_list + + if name == 'tasks' : + self._tasks_list = caching.GetDynamicCacheData("kern._tasks_list", []) + if len(self._tasks_list) > 0 : return self._tasks_list + task_queue_head = self.GetGlobalVariable('tasks') + task_type = LazyTarget.GetTarget().FindFirstType('task') + task_ptr_type = task_type.GetPointerType() + for tsk in IterateQueue(task_queue_head, task_ptr_type, 'tasks'): + self._tasks_list.append(tsk) + caching.SaveDynamicCacheData("kern._tasks_list", self._tasks_list) + return self._tasks_list + + if name == 'terminated_tasks' : + self._terminated_tasks_list = caching.GetDynamicCacheData("kern._terminated_tasks_list", []) + if len(self._terminated_tasks_list) > 0 : return self._terminated_tasks_list + task_queue_head = self.GetGlobalVariable('terminated_tasks') + task_type = LazyTarget.GetTarget().FindFirstType('task') + task_ptr_type = task_type.GetPointerType() + for tsk in IterateQueue(task_queue_head, task_ptr_type, 'tasks'): + self._terminated_tasks_list.append(tsk) + caching.SaveDynamicCacheData("kern._terminated_tasks_list", self._terminated_tasks_list) + return self._terminated_tasks_list + + if name == 'procs' : + self._allproc = caching.GetDynamicCacheData("kern._allproc", []) + if len(self._allproc) > 0 : return self._allproc + all_proc_head = self.GetGlobalVariable('allproc') + proc_val = cast(all_proc_head.lh_first, 'proc *') + while proc_val != 0: + self._allproc.append(proc_val) + proc_val = cast(proc_val.p_list.le_next, 'proc *') + caching.SaveDynamicCacheData("kern._allproc", self._allproc) + return self._allproc + + if name == 'zombprocs' : + self._zombproc_list = caching.GetDynamicCacheData("kern._zombproc_list", []) + if len(self._zombproc_list) > 0 : return self._zombproc_list + zproc_head = self.GetGlobalVariable('zombproc') + proc_val = cast(zproc_head.lh_first, 'proc *') + while proc_val != 0: + self._zombproc_list.append(proc_val) + proc_val = cast(proc_val.p_list.le_next, 'proc *') + caching.SaveDynamicCacheData("kern._zombproc_list", self._zombproc_list) + return self._zombproc_list + + if name == 'version' : + self._version = caching.GetStaticCacheData("kern.version", None) + if self._version != None : return self._version + self._version = str(self.GetGlobalVariable('version')) + caching.SaveStaticCacheData("kern.version", self._version) + return self._version + + if name == 'arch' : + self._arch = caching.GetStaticCacheData("kern.arch", None) + if self._arch != None : return self._arch + arch = LazyTarget.GetTarget().triple.split('-')[0] + if arch in ('armv7', 'armv7s'): + self._arch = 'arm' + else: + self._arch = arch + caching.SaveStaticCacheData("kern.arch", self._arch) + return self._arch + + if name == 'ptrsize' : + self._ptrsize = caching.GetStaticCacheData("kern.ptrsize", None) + if self._ptrsize != None : return self._ptrsize + arch = LazyTarget.GetTarget().triple.split('-')[0] + if arch in ('x86_64'): + self._ptrsize = 8 + else: + self._ptrsize = 4 + caching.SaveStaticCacheData("kern.ptrsize", self._ptrsize) + return self._ptrsize + + return object.__getattribute__(self, name) + diff --git a/tools/lldbmacros/core/lazytarget.py b/tools/lldbmacros/core/lazytarget.py new file mode 100644 index 000000000..d111031ae --- /dev/null +++ b/tools/lldbmacros/core/lazytarget.py @@ -0,0 +1,57 @@ + +""" Module to abstract lazy evaluation of lldb.SBTarget + for kernel +""" + +import lldb + +class LazyTarget(object): + """ A common object that lazy-evaluates and caches the lldb.SBTarget + and lldb.SBProcess for the current interactive debugging session. + """ + _debugger = None # This holds an lldb.SBDebugger object for debugger state + _target = None # This holds an lldb.SBTarget object for symbol lookup + _process = None # This holds an lldb.SBProcess object for reading memory + + @staticmethod + def Initialize(debugger): + """ Initialize the LazyTarget with an SBDebugger. + """ + LazyTarget._debugger = debugger + LazyTarget._target = None + LazyTarget._process = None + + @staticmethod + def GetTarget(): + """ Get an SBTarget for the most recently selected + target, or throw an exception. + """ + if not LazyTarget._target is None: + return LazyTarget._target + + target = LazyTarget._debugger.GetSelectedTarget() + if target is None: + raise AttributeError('No target selected') + + if not target.IsValid(): + raise AttributeError('Target is not valid') + + LazyTarget._target = target + return target + + @staticmethod + def GetProcess(): + """ Get an SBProcess for the most recently selected + target, or throw an exception. + """ + + target = LazyTarget.GetTarget() + process = target.process + + if process is None: + raise AttributeError('Target does not have a process') + + if not process.IsValid(): + raise AttributeError('Process is not valid') + + return process diff --git a/tools/lldbmacros/core/operating_system.py b/tools/lldbmacros/core/operating_system.py new file mode 100644 index 000000000..37d1aeba9 --- /dev/null +++ b/tools/lldbmacros/core/operating_system.py @@ -0,0 +1,600 @@ +#!/usr/bin/python +# + +#source of register info is from http://opensource.apple.com/source/gdb/gdb-962/src/gdb/arm-tdep.c +import lldb +import struct +osplugin_target_obj = None + +class PluginValue(lldb.SBValue): + def GetChildMemberWithName(val, name): + val_type = val.GetType() + if val_type.IsPointerType() == True: + val_type = val_type.GetPointeeType() + for i in range(val_type.GetNumberOfFields()): + if name == val_type.GetFieldAtIndex(i).GetName(): + return PluginValue(val.GetChildAtIndex(i)) + return None + + +class Armv7_RegisterSet(object): + """ register info set for armv7 32 bit architecture """ + def __init__(self): + self.register_info = {} + self.register_info['sets'] = ['GPR'] + self.register_info['registers'] = [ + { 'name':'r0' , 'bitsize' : 32, 'offset' : 0, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 0, 'dwarf' : 0}, + { 'name':'r1' , 'bitsize' : 32, 'offset' : 4, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 1, 'dwarf' : 1}, + { 'name':'r2' , 'bitsize' : 32, 'offset' : 8, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 2, 'dwarf' : 2}, + { 'name':'r3' , 'bitsize' : 32, 'offset' : 12, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 3, 'dwarf' : 3}, + { 'name':'r4' , 'bitsize' : 32, 'offset' : 16, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 4, 'dwarf' : 4}, + { 'name':'r5' , 'bitsize' : 32, 'offset' : 20, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 5, 'dwarf' : 5}, + { 'name':'r6' , 'bitsize' : 32, 'offset' : 24, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 6, 'dwarf' : 6}, + { 'name':'r7' , 'bitsize' : 32, 'offset' : 28, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 7, 'dwarf' : 7}, + { 'name':'r8' , 'bitsize' : 32, 'offset' : 32, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 8, 'dwarf' : 8}, + { 'name':'r9' , 'bitsize' : 32, 'offset' : 36, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc': 9, 'dwarf' : 9}, + { 'name':'r10' , 'bitsize' : 32, 'offset' : 40, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':10, 'dwarf' :10}, + { 'name':'r11' , 'bitsize' : 32, 'offset' : 44, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':11, 'dwarf' :11, 'alt-name': 'fp', 'generic': 'fp'}, + { 'name':'r12' , 'bitsize' : 32, 'offset' : 48, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':12, 'dwarf' :12}, + { 'name':'sp' , 'bitsize' : 32, 'offset' : 52, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':13, 'dwarf' :13, 'alt-name': 'sp', 'generic': 'sp'}, + { 'name':'lr' , 'bitsize' : 32, 'offset' : 56, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':14, 'dwarf' :14, 'alt-name': 'lr', 'generic': 'lr'}, + { 'name':'pc' , 'bitsize' : 32, 'offset' : 60, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':15, 'dwarf' :15, 'alt-name': 'pc', 'generic': 'pc'}, + { 'name':'cpsr' , 'bitsize' : 32, 'offset' : 64, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':25, 'dwarf' :16, 'alt-name':'cpsr','generic':'cpsr'}, + { 'name':'fsr' , 'bitsize' : 32, 'offset' : 68, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':17, 'dwarf' :17, 'alt-name':'fsr', 'generic': 'fsr'}, + { 'name':'far' , 'bitsize' : 32, 'offset' : 72, 'encoding':'uint', 'format':'hex', 'set':0, 'gcc':18, 'dwarf' :18, 'alt-name': 'far', 'generic': 'far'} + ] + self.switch_context_address = osplugin_target_obj.FindSymbols('load_reg')[0].GetSymbol().GetStartAddress().GetLoadAddress(osplugin_target_obj) + 8 + self.ResetRegisterValues() + def ResetRegisterValues(self): + self.r0 = 0 + self.r1 = 0 + self.r2 = 0 + self.r3 = 0 + self.r4 = 0 + self.r5 = 0 + self.r6 = 0 + self.r7 = 0 + self.r8 = 0 + self.r9 = 0 + self.r10 = 0 + self.r11 = 0 + self.r12 = 0 + self.sp = 0 + self.lr = 0 + self.pc = 0 + self.cpsr = 0 + self.fsr = 0 + self.far = 0 + + def __str__(self): + return """ + r0 = {o.r0: <#010x} + r1 = {o.r1: <#010x} + r2 = {o.r2: <#010x} + r3 = {o.r3: <#010x} + r4 = {o.r4: <#010x} + r5 = {o.r5: <#010x} + r6 = {o.r6: <#010x} + r7 = {o.r7: <#010x} + r8 = {o.r8: <#010x} + r9 = {o.r9: <#010x} + r10 = {o.r10: <#010x} + r11 = {o.r11: <#010x} + r12 = {o.r12: <#010x} + sp = {o.sp: <#010x} + lr = {o.lr: <#010x} + pc = {o.pc: <#010x} + cpsr = {o.cpsr: <#010x} + fsr = {o.fsr : <#010x} + far = {o.far : <#010x} + """.format(o=self) + + def GetPackedRegisterState(self): + return struct.pack('19I', self.r0, self.r1, self.r2, self.r3, + self.r4, self.r5, self.r6, self.r7, + self.r8, self.r9, self.r10, self.r11, + self.r12, self.sp, self.lr, self.pc, + self.cpsr, self.fsr, self.far) + + def ReadRegisterDataFromKDPSavedState(self, kdp_state, kernel_version): + saved_state = kernel_version.CreateValueFromExpression(None, '(struct arm_saved_state *) ' + str(kdp_state.GetValueAsUnsigned())) + saved_state = saved_state.Dereference() + saved_state = PluginValue(saved_state) + self.ResetRegisterValues() + self.r0 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(0).GetValueAsUnsigned() + self.r1 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(1).GetValueAsUnsigned() + self.r2 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(2).GetValueAsUnsigned() + self.r3 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(3).GetValueAsUnsigned() + self.r4 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(4).GetValueAsUnsigned() + self.r5 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(5).GetValueAsUnsigned() + self.r6 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(6).GetValueAsUnsigned() + self.r7 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(7).GetValueAsUnsigned() + self.r8 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(8).GetValueAsUnsigned() + self.r9 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(9).GetValueAsUnsigned() + self.r10 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(10).GetValueAsUnsigned() + self.r11 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(11).GetValueAsUnsigned() + self.r12 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(12).GetValueAsUnsigned() + self.sp = saved_state.GetChildMemberWithName('sp').GetValueAsUnsigned() + self.lr = saved_state.GetChildMemberWithName('lr').GetValueAsUnsigned() + self.pc = saved_state.GetChildMemberWithName('pc').GetValueAsUnsigned() + self.cpsr = saved_state.GetChildMemberWithName('cpsr').GetValueAsUnsigned() + self.fsr = saved_state.GetChildMemberWithName('fsr').GetValueAsUnsigned() + self.far = saved_state.GetChildMemberWithName('far').GetValueAsUnsigned() + return self + + def ReadRegisterDataFromKernelStack(self, kstack_saved_state_addr, kernel_version): + saved_state = kernel_version.CreateValueFromExpression(None, '(struct arm_saved_state *) '+ str(kstack_saved_state_addr)) + saved_state = saved_state.Dereference() + saved_state = PluginValue(saved_state) + self.ResetRegisterValues() + self.r0 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(0).GetValueAsUnsigned() + self.r1 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(1).GetValueAsUnsigned() + self.r2 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(2).GetValueAsUnsigned() + self.r3 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(3).GetValueAsUnsigned() + self.r4 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(4).GetValueAsUnsigned() + self.r5 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(5).GetValueAsUnsigned() + self.r6 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(6).GetValueAsUnsigned() + self.r7 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(7).GetValueAsUnsigned() + self.r8 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(8).GetValueAsUnsigned() + self.r9 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(9).GetValueAsUnsigned() + self.r10 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(10).GetValueAsUnsigned() + self.r11 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(11).GetValueAsUnsigned() + self.r12 = saved_state.GetChildMemberWithName('r').GetChildAtIndex(12).GetValueAsUnsigned() + self.sp = saved_state.GetChildMemberWithName('sp').GetValueAsUnsigned() + self.lr = saved_state.GetChildMemberWithName('lr').GetValueAsUnsigned() + # pc for a blocked thread is treated to be the next instruction it would run after thread switch. + self.pc = self.switch_context_address + self.cpsr = saved_state.GetChildMemberWithName('cpsr').GetValueAsUnsigned() + self.fsr = saved_state.GetChildMemberWithName('fsr').GetValueAsUnsigned() + self.far = saved_state.GetChildMemberWithName('far').GetValueAsUnsigned() + return self + + def ReadRegisterDataFromContinuation(self, continuation_ptr): + self.ResetRegisterValues() + self.pc = continuation_ptr + return self + + +class I386_RegisterSet(object): + """ register info set for i386 architecture + """ + def __init__(self): + self.register_info = [] + self.register_info['sets'] = ['GPR'] + self.register_info['registers'] = [ + { 'name': 'eax' , 'bitsize': 32, 'offset' : 0, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 0, 'dwarf': 0}, + { 'name': 'ebx' , 'bitsize': 32, 'offset' : 4, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 1, 'dwarf': 1}, + { 'name': 'ecx' , 'bitsize': 32, 'offset' : 8, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 2, 'dwarf': 2}, + { 'name': 'edx' , 'bitsize': 32, 'offset' :12, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 3, 'dwarf': 3}, + { 'name': 'edi' , 'bitsize': 32, 'offset' :16, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 4, 'dwarf': 4}, + { 'name': 'esi' , 'bitsize': 32, 'offset' :20, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 5, 'dwarf': 5}, + { 'name': 'ebp' , 'bitsize': 32, 'offset' :24, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 6, 'dwarf': 6}, + { 'name': 'esp' , 'bitsize': 32, 'offset' :28, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 7, 'dwarf': 7}, + { 'name': 'ss' , 'bitsize': 32, 'offset' :32, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 8, 'dwarf': 8}, + { 'name': 'eflags', 'bitsize': 32, 'offset' :36, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' : 9, 'dwarf': 9}, + { 'name': 'eip' , 'bitsize': 32, 'offset' :40, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :10, 'dwarf':10}, + { 'name': 'cs' , 'bitsize': 32, 'offset' :44, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :11, 'dwarf':11}, + { 'name': 'ds' , 'bitsize': 32, 'offset' :48, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :12, 'dwarf':12}, + { 'name': 'es' , 'bitsize': 32, 'offset' :52, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :13, 'dwarf':13}, + { 'name': 'fs' , 'bitsize': 32, 'offset' :56, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :14, 'dwarf':14}, + { 'name': 'gs' , 'bitsize': 32, 'offset' :60, 'encoding': 'uint' , 'format':'hex' , 'set': 0, 'gcc' :15, 'dwarf':15}, + ] + self.ResetRegisterValues() + def ResetRegisterValues(self): + """ set all registers to zero """ + self.eax = 0 + self.ebx = 0 + self.ecx = 0 + self.edx = 0 + self.edi = 0 + self.esi = 0 + self.ebp = 0 + self.esp = 0 + self.ss = 0 + self.eflags = 0 + self.eip = 0 + self.cs = 0 + self.ds = 0 + self.es = 0 + self.fs = 0 + self.gs = 0 + + def __str__(self): + return """ + eax = {o.eax: #010x} + ebx = {o.ebx: #010x} + ecx = {o.ecx: #010x} + edx = {o.edx: #010x} + edi = {o.edi: #010x} + esi = {o.esi: #010x} + ebp = {o.ebp: #010x} + esp = {o.esp: #010x} + ss = {o.ss: #010x} + eflags = {o.eflags: #010x} + eip = {o.eip: #010x} + cs = {o.cs: #010x} + ds = {o.ds: #010x} + es = {o.es: #010x} + fs = {o.fs: #010x} + gs = {o.gs: #010x} + """.format(o=self) + + def GetPackedRegisterState(self): + """ get a struct.pack register data """ + return struct.pack('16I', self.eax, self.ebx, self.ecx, + self.edx, self.edi, self.esi, + self.ebp, self.esp, self.ss, + self.eflags, self.eip, self.cs, + self.ds, self.es, self.fs, self.gs + ) + def ReadRegisterDataFromKDPSavedState(self, kdp_state, kernel_version): + """ to be implemented""" + return None + def ReadRegisterDataFromKernelStack(self, kstack_saved_state_addr, kernel_version): + """ to be implemented """ + return None + + def ReadRegisterDataFromContinuation(self, continuation_ptr): + self.ResetRegisterValues() + self.eip = continuation_ptr + return self + + +class X86_64RegisterSet(object): + """ register info set for x86_64 architecture """ + def __init__(self): + self.register_info = {} + self.register_info['sets'] = ['GPR', 'FPU', 'EXC'] + self.register_info['registers'] = [ + { 'name':'rax' , 'bitsize' : 64, 'offset' : 0, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 0, 'dwarf' : 0}, + { 'name':'rbx' , 'bitsize' : 64, 'offset' : 8, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 3, 'dwarf' : 3}, + { 'name':'rcx' , 'bitsize' : 64, 'offset' : 16, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 2, 'dwarf' : 2, 'generic':'arg4', 'alt-name':'arg4', }, + { 'name':'rdx' , 'bitsize' : 64, 'offset' : 24, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 1, 'dwarf' : 1, 'generic':'arg3', 'alt-name':'arg3', }, + { 'name':'rdi' , 'bitsize' : 64, 'offset' : 32, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 5, 'dwarf' : 5, 'generic':'arg1', 'alt-name':'arg1', }, + { 'name':'rsi' , 'bitsize' : 64, 'offset' : 40, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 4, 'dwarf' : 4, 'generic':'arg2', 'alt-name':'arg2', }, + { 'name':'rbp' , 'bitsize' : 64, 'offset' : 48, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 6, 'dwarf' : 6, 'generic':'fp' , 'alt-name':'fp', }, + { 'name':'rsp' , 'bitsize' : 64, 'offset' : 56, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 7, 'dwarf' : 7, 'generic':'sp' , 'alt-name':'sp', }, + { 'name':'r8' , 'bitsize' : 64, 'offset' : 64, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 8, 'dwarf' : 8, 'generic':'arg5', 'alt-name':'arg5', }, + { 'name':'r9' , 'bitsize' : 64, 'offset' : 72, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 9, 'dwarf' : 9, 'generic':'arg6', 'alt-name':'arg6', }, + { 'name':'r10' , 'bitsize' : 64, 'offset' : 80, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 10, 'dwarf' : 10}, + { 'name':'r11' , 'bitsize' : 64, 'offset' : 88, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 11, 'dwarf' : 11}, + { 'name':'r12' , 'bitsize' : 64, 'offset' : 96, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 12, 'dwarf' : 12}, + { 'name':'r13' , 'bitsize' : 64, 'offset' : 104, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 13, 'dwarf' : 13}, + { 'name':'r14' , 'bitsize' : 64, 'offset' : 112, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 14, 'dwarf' : 14}, + { 'name':'r15' , 'bitsize' : 64, 'offset' : 120, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 15, 'dwarf' : 15}, + { 'name':'rip' , 'bitsize' : 64, 'offset' : 128, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'gcc' : 16, 'dwarf' : 16, 'generic':'pc', 'alt-name':'pc' }, + { 'name':'rflags' , 'bitsize' : 64, 'offset' : 136, 'encoding':'uint' , 'format':'hex' , 'set': 0, 'generic':'flags', 'alt-name':'flags' }, + { 'name':'cs' , 'bitsize' : 64, 'offset' : 144, 'encoding':'uint' , 'format':'hex' , 'set': 0 }, + { 'name':'fs' , 'bitsize' : 64, 'offset' : 152, 'encoding':'uint' , 'format':'hex' , 'set': 0 }, + { 'name':'gs' , 'bitsize' : 64, 'offset' : 160, 'encoding':'uint' , 'format':'hex' , 'set': 0 }, + ] + self.ResetRegisterValues() + + def ResetRegisterValues(self): + """ set all the registers to zero. """ + self.rax = 0 + self.rbx = 0 + self.rcx = 0 + self.rdx = 0 + self.rdi = 0 + self.rsi = 0 + self.rbp = 0 + self.rsp = 0 + self.r8 = 0 + self.r9 = 0 + self.r10 = 0 + self.r11 = 0 + self.r12 = 0 + self.r13 = 0 + self.r14 = 0 + self.r15 = 0 + self.rip = 0 + self.rflags = 0 + self.cs = 0 + self.fs = 0 + self.gs = 0 + def __str__(self): + return """ + rax = {o.rax: <#018x} + rbx = {o.rbx: <#018x} + rcx = {o.rcx: <#018x} + rdx = {o.rdx: <#018x} + rdi = {o.rdi: <#018x} + rsi = {o.rsi: <#018x} + rbp = {o.rbp: <#018x} + rsp = {o.rsp: <#018x} + r8 = {o.r8: <#018x} + r9 = {o.r9: <#018x} + r10 = {o.r10: <#018x} + r11 = {o.r11: <#018x} + r12 = {o.r12: <#018x} + r13 = {o.r13: <#018x} + r14 = {o.r14: <#018x} + r15 = {o.r15: <#018x} + rip = {o.rip: <#018x} + rflags = {o.rflags: <#018x} + cs = {o.cs: <#018x} + fs = {o.fs: <#018x} + gs = {o.gs: <#018x} + """.format(o=self) + + def GetPackedRegisterState(self): + """ get a struct.pack register data for passing to C constructs """ + return struct.pack('21Q', self.rax, self.rbx, self.rcx, self.rdx, self.rdi, + self.rsi, self.rbp, self.rsp, self.r8, self.r9, + self.r10, self.r11, self.r12, self.r13, self.r14, + self.r15, self.rip, self.rflags, self.cs, self.fs, self.gs) + + def ReadRegisterDataFromKDPSavedState(self, kdp_state, kernel_version): + saved_state = kernel_version.CreateValueFromExpression(None, '(struct x86_saved_state64 *) '+ str(kdp_state.GetValueAsUnsigned())) + saved_state = saved_state.Dereference() + saved_state = PluginValue(saved_state) + self.ResetRegisterValues() + self.rdi = saved_state.GetChildMemberWithName('rdi').GetValueAsUnsigned() + self.rsi = saved_state.GetChildMemberWithName('rsi').GetValueAsUnsigned() + self.rdx = saved_state.GetChildMemberWithName('rdx').GetValueAsUnsigned() + self.r10 = saved_state.GetChildMemberWithName('r10').GetValueAsUnsigned() + self.r8 = saved_state.GetChildMemberWithName('r8').GetValueAsUnsigned() + self.r9 = saved_state.GetChildMemberWithName('r9').GetValueAsUnsigned() + self.r15 = saved_state.GetChildMemberWithName('r15').GetValueAsUnsigned() + self.r14 = saved_state.GetChildMemberWithName('r14').GetValueAsUnsigned() + self.r13 = saved_state.GetChildMemberWithName('r13').GetValueAsUnsigned() + self.r12 = saved_state.GetChildMemberWithName('r12').GetValueAsUnsigned() + self.r11 = saved_state.GetChildMemberWithName('r11').GetValueAsUnsigned() + self.rbp = saved_state.GetChildMemberWithName('rbp').GetValueAsUnsigned() + self.rbx = saved_state.GetChildMemberWithName('rbx').GetValueAsUnsigned() + self.rcx = saved_state.GetChildMemberWithName('rcx').GetValueAsUnsigned() + self.rax = saved_state.GetChildMemberWithName('rax').GetValueAsUnsigned() + self.rip = saved_state.GetChildMemberWithName('isf').GetChildMemberWithName('rip').GetValueAsUnsigned() + self.rflags = saved_state.GetChildMemberWithName('isf').GetChildMemberWithName('rflags').GetValueAsUnsigned() + self.rsp = saved_state.GetChildMemberWithName('isf').GetChildMemberWithName('rsp').GetValueAsUnsigned() + return self + + def ReadRegisterDataFromKernelStack(self, kstack_saved_state_addr, kernel_version): + saved_state = kernel_version.CreateValueFromExpression(None, '(struct x86_kernel_state *) '+ str(kstack_saved_state_addr)) + saved_state = saved_state.Dereference() + saved_state = PluginValue(saved_state) + self.ResetRegisterValues() + self.rbx = saved_state.GetChildMemberWithName('k_rbx').GetValueAsUnsigned() + self.rsp = saved_state.GetChildMemberWithName('k_rsp').GetValueAsUnsigned() + self.rbp = saved_state.GetChildMemberWithName('k_rbp').GetValueAsUnsigned() + self.r12 = saved_state.GetChildMemberWithName('k_r12').GetValueAsUnsigned() + self.r13 = saved_state.GetChildMemberWithName('k_r13').GetValueAsUnsigned() + self.r14 = saved_state.GetChildMemberWithName('k_r14').GetValueAsUnsigned() + self.r15 = saved_state.GetChildMemberWithName('k_r15').GetValueAsUnsigned() + self.rip = saved_state.GetChildMemberWithName('k_rip').GetValueAsUnsigned() + return self + + def ReadRegisterDataFromContinuation(self, continuation_ptr): + self.ResetRegisterValues() + self.rip = continuation_ptr + return self + + + + +def IterateQueue(queue_head, element_ptr_type, element_field_name): + """ iterate over a queue in kernel of type queue_head_t. refer to osfmk/kern/queue.h + params: + queue_head - lldb.SBValue : Value object for queue_head. + element_type - lldb.SBType : a pointer type of the element 'next' points to. Typically its structs like thread, task etc.. + element_field_name - str : name of the field in target struct. + returns: + A generator does not return. It is used for iterating. + SBValue : an object thats of type (element_type) queue_head->next. Always a pointer object + """ + queue_head_addr = 0x0 + if queue_head.TypeIsPointerType(): + queue_head_addr = queue_head.GetValueAsUnsigned() + else: + queue_head_addr = queue_head.GetAddress().GetLoadAddress(osplugin_target_obj) + cur_elt = queue_head.GetChildMemberWithName('next') + while True: + + if not cur_elt.IsValid() or cur_elt.GetValueAsUnsigned() == 0 or cur_elt.GetValueAsUnsigned() == queue_head_addr: + break + elt = cur_elt.Cast(element_ptr_type) + yield elt + cur_elt = elt.GetChildMemberWithName(element_field_name).GetChildMemberWithName('next') + +def GetUniqueSessionID(process_obj): + """ Create a unique session identifier. + params: + process_obj: lldb.SBProcess object refering to connected process. + returns: + int - a unique number identified by processid and stopid. + """ + session_key_str = "" + if hasattr(process_obj, "GetUniqueID"): + session_key_str += str(process_obj.GetUniqueID()) + ":" + else: + session_key_str += "0:" + + if hasattr(process_obj, "GetStopID"): + session_key_str += str(process_obj.GetStopID()) + else: + session_key_str +="1" + + return hash(session_key_str) + + +(archX86_64, archARMv7_family, archI386) = ("x86_64", ("armv7", "armv7s") , "i386") + +class OperatingSystemPlugIn(object): + """Class that provides data for an instance of a LLDB 'OperatingSystemPython' plug-in class""" + + def __init__(self, process): + '''Initialization needs a valid.SBProcess object''' + self.process = None + self.registers = None + self.threads = None + self.thread_cache = {} + self.current_session_id = 0 + self.kdp_thread = None + if type(process) is lldb.SBProcess and process.IsValid(): + global osplugin_target_obj + self.process = process + self._target = process.target + osplugin_target_obj = self._target + self.current_session_id = GetUniqueSessionID(self.process) + self.version = self._target.FindGlobalVariables('version', 0).GetValueAtIndex(0) + self.kernel_stack_size = self._target.FindGlobalVariables('kernel_stack_size', 0).GetValueAtIndex(0).GetValueAsUnsigned() + self.kernel_context_size = 0 + self.connected_over_kdp = False + plugin_string = self.process.GetPluginName().lower() + if plugin_string.find("kdp") >=0: + self.connected_over_kdp = True + #print "version", self.version, "kernel_stack_size", self.kernel_stack_size, "context_size", self.kernel_context_size + self.threads = None # Will be an dictionary containing info for each thread + triple = self.process.target.triple + arch = triple.split('-')[0].lower() + self.target_arch = "" + self.kernel_context_size = 0 + if arch == archX86_64 : + self.target_arch = archX86_64 + print "Target arch: x86_64" + self.register_set = X86_64RegisterSet() + self.kernel_context_size = self._target.FindFirstType('x86_kernel_state').GetByteSize() + elif arch in archARMv7_family : + self.target_arch = arch + print "Target arch: " + self.target_arch + self.register_set = Armv7_RegisterSet() + self.registers = self.register_set.register_info + + def create_thread(self, tid, context): + th_ptr = context + th = self.version.CreateValueFromExpression(str(th_ptr),'(struct thread *)' + str(th_ptr)) + thread_id = th.GetChildMemberWithName('thread_id').GetValueAsUnsigned() + if tid != thread_id: + print "FATAL ERROR: Creating thread from memory 0x%x with tid in mem=%d when requested tid = %d " % (context, thread_id, tid) + return None + thread_obj = { 'tid' : thread_id, + 'ptr' : th.GetValueAsUnsigned(), + 'name' : hex(th.GetValueAsUnsigned()).rstrip('L'), + 'queue' : hex(th.GetChildMemberWithName('wait_queue').GetValueAsUnsigned()).rstrip('L'), + 'state' : 'stopped', + 'stop_reason' : 'none' + } + if self.current_session_id != GetUniqueSessionID(self.process): + self.thread_cache = {} + self.current_session_id = GetUniqueSessionID(self.process) + + self.thread_cache[tid] = thread_obj + return thread_obj + + + def get_thread_info(self): + self.kdp_thread = None + self.kdp_state = None + if self.connected_over_kdp : + kdp = self._target.FindGlobalVariables('kdp',1).GetValueAtIndex(0) + kdp_state = kdp.GetChildMemberWithName('saved_state') + kdp_thread = kdp.GetChildMemberWithName('kdp_thread') + if kdp_thread and kdp_thread.GetValueAsUnsigned() != 0: + self.kdp_thread = kdp_thread + self.kdp_state = kdp_state + kdp_thid = kdp_thread.GetChildMemberWithName('thread_id').GetValueAsUnsigned() + self.create_thread(kdp_thid, kdp_thread.GetValueAsUnsigned()) + self.thread_cache[kdp_thid]['core']=0 + retval = [self.thread_cache[kdp_thid]] + return retval + else: + print "FATAL FAILURE: Unable to find kdp_thread state for this connection." + return [] + + num_threads = self._target.FindGlobalVariables('threads_count',1).GetValueAtIndex(0).GetValueAsUnsigned() + #In case we are caught before threads are initialized. Fallback to threads known by astris/gdb server. + if num_threads <=0 : + return [] + + self.current_session_id = GetUniqueSessionID(self.process) + self.threads = [] + self.thread_cache = {} + self.processors = [] + try: + processor_list_val = PluginValue(self._target.FindGlobalVariables('processor_list',1).GetValueAtIndex(0)) + while processor_list_val.IsValid() and processor_list_val.GetValueAsUnsigned() !=0 : + th = processor_list_val.GetChildMemberWithName('active_thread') + th_id = th.GetChildMemberWithName('thread_id').GetValueAsUnsigned() + cpu_id = processor_list_val.GetChildMemberWithName('cpu_id').GetValueAsUnsigned() + self.processors.append({'active_thread': th.GetValueAsUnsigned(), 'cpu_id': cpu_id}) + self.create_thread(th_id, th.GetValueAsUnsigned()) + self.thread_cache[th_id]['core'] = cpu_id + nth = self.thread_cache[th_id] + print "Found 0x%x on logical cpu %d" % ( nth['ptr'], nth['core']) + self.threads.append(nth) + self.thread_cache[nth['tid']] = nth + processor_list_val = processor_list_val.GetChildMemberWithName('processor_list') + except KeyboardInterrupt, ke: + print "OS Plugin Interrupted during thread loading process. \nWARNING:Thread registers and backtraces may not be accurate." + return self.threads + + if hasattr(self.process, 'CreateOSPluginThread'): + return self.threads + + # FIXME remove legacy code + try: + thread_q_head = self._target.FindGlobalVariables('threads', 0).GetValueAtIndex(0) + thread_type = self._target.FindFirstType('thread') + thread_ptr_type = thread_type.GetPointerType() + for th in IterateQueue(thread_q_head, thread_ptr_type, 'threads'): + th_id = th.GetChildMemberWithName('thread_id').GetValueAsUnsigned() + self.create_thread(th_id, th.GetValueAsUnsigned()) + nth = self.thread_cache[th_id] + for cputhread in self.processors: + if cputhread['active_thread'] == nth['ptr']: + nth['core'] = cputhread['cpu_id'] + #print "Found 0x%x on logical cpu %d" % ( nth['ptr'], cputhread['cpu_id']) + self.threads.append( nth ) + except KeyboardInterrupt, ke: + print "OS Plugin Interrupted during thread loading process. \nWARNING:Thread registers and backtraces may not be accurate." + return self.threads + # end legacy code + return self.threads + + def get_register_info(self): + if self.registers == None: + print "Register Information not found " + return self.register_set.register_info + + def get_register_data(self, tid): + #print "searching for tid", tid + thobj = None + try: + if self.current_session_id != GetUniqueSessionID(self.process): + self.thread_cache = {} + self.current_session_id = GetUniqueSessionID(self.process) + + if tid in self.thread_cache.keys(): + thobj = self.version.CreateValueFromExpression(self.thread_cache[tid]['name'], '(struct thread *)' + str(self.thread_cache[tid]['ptr'])) + regs = self.register_set + if thobj == None : + print "FATAL ERROR: Could not find thread with id %d" % tid + regs.ResetRegisterValues() + return regs.GetPackedRegisterState() + + if self.kdp_thread and self.kdp_thread.GetValueAsUnsigned() == thobj.GetValueAsUnsigned(): + regs.ReadRegisterDataFromKDPSavedState(self.kdp_state, self.version) + return regs.GetPackedRegisterState() + if int(PluginValue(thobj).GetChildMemberWithName('kernel_stack').GetValueAsUnsigned()) != 0 : + if self.target_arch == archX86_64 : + # we do have a stack so lets get register information + saved_state_addr = PluginValue(thobj).GetChildMemberWithName('kernel_stack').GetValueAsUnsigned() + self.kernel_stack_size - self.kernel_context_size + regs.ReadRegisterDataFromKernelStack(saved_state_addr, self.version) + return regs.GetPackedRegisterState() + elif self.target_arch in archARMv7_family and int(PluginValue(thobj).GetChildMemberWithName('machine').GetChildMemberWithName('kstackptr').GetValueAsUnsigned()) != 0: + #we have stack on the machine.kstackptr. + saved_state_addr = PluginValue(thobj).GetChildMemberWithName('machine').GetChildMemberWithName('kstackptr').GetValueAsUnsigned() + regs.ReadRegisterDataFromKernelStack(saved_state_addr, self.version) + return regs.GetPackedRegisterState() + elif self.target_arch == archX86_64 or self.target_arch in archARMv7_family: + regs.ReadRegisterDataFromContinuation( PluginValue(thobj).GetChildMemberWithName('continuation').GetValueAsUnsigned()) + return regs.GetPackedRegisterState() + #incase we failed very miserably + except KeyboardInterrupt, ke: + print "OS Plugin Interrupted during thread register load. \nWARNING:Thread registers and backtraces may not be accurate. for tid = %d" % tid + regs.ResetRegisterValues() + print "FATAL ERROR: Failed to get register state for thread id 0x%x " % tid + print thobj + return regs.GetPackedRegisterState() + diff --git a/tools/lldbmacros/core/standard.py b/tools/lldbmacros/core/standard.py new file mode 100644 index 000000000..547c49c21 --- /dev/null +++ b/tools/lldbmacros/core/standard.py @@ -0,0 +1,126 @@ +import getopt +import os +import sys +import re + +class ArgumentError(Exception): + """ Exception class for raising errors in command arguments. The lldb_command framework will catch this + class of exceptions and print suitable error message to user. + """ + def __init__(self, msg): + self.error_message = msg + def __str__(self): + return str(self.error_message) + + +class RedirectStdStreams(object): + def __init__(self, stdout=None, stderr=None): + self._stdout = stdout or sys.stdout + self._stderr = stderr or sys.stderr + + def __enter__(self): + self.old_stdout, self.old_stderr = sys.stdout, sys.stderr + self.old_stdout.flush(); self.old_stderr.flush() + sys.stdout, sys.stderr = self._stdout, self._stderr + + def __exit__(self, exc_type, exc_value, traceback): + self._stdout.flush(); self._stderr.flush() + sys.stdout = self.old_stdout + sys.stderr = self.old_stderr + +class CommandOutput(object): + """ + An output handler for all commands. Use Output.print to direct all output of macro via the handler. + These arguments are passed after a "--". eg + (lldb) zprint -- -o /tmp/zprint.out.txt + + Currently this provide capabilities + -o path/to/filename + The output of this command execution will be saved to file. Parser information or errors will + not be sent to file though. eg /tmp/output.txt + -s filter_string + the "filter_string" param is parsed to python regex expression and each line of output + will be printed/saved only if it matches the expression. + The command header will not be filtered in any case. + """ + def __init__(self, CommandResult): + """ Create a new instance to handle command output. + params: + CommandResult : SBCommandReturnObject result param from lldb's command invocation. + """ + self.fname=None + self.fhandle=None + self.FILTER=False + self.pluginRequired = False + self.pluginName = None + self.resultObj = CommandResult + self.immediateOutput = False + self.verbose_level = 0 + self.target_cmd_args = [] + self.target_cmd_options = {} + + def write(self, s): + """ Handler for all commands output. By default just print to stdout """ + if self.FILTER and not self.reg.search(s): return + if self.FILTER : s+="\n" + if self.fhandle != None: self.fhandle.write(s) + else: + if self.immediateOutput: + sys.__stdout__.write(s) + else: + res_str = s + if s.endswith("\n"): + res_str = s[:-1] + if self.resultObj and len(res_str) > 0: self.resultObj.AppendMessage(res_str) + + def flush(self): + if self.fhandle != None: + self.fhandle.flush() + + def __del__(self): + """ closes any open files. report on any errors """ + if self.fhandle != None : + self.fhandle.close() + + def setOptions(self,cmdargs, cmdoptions =''): + """ parse the arguments passed to the command + param : + cmdargs => [] of (typically args.split()) + cmdoptions : str - string of command level options. + These should be CAPITAL LETTER options only. + """ + opts=() + args = cmdargs + cmdoptions = cmdoptions.upper() + try: + opts,args = getopt.gnu_getopt(args,'hvo:s:p:'+ cmdoptions,[]) + self.target_cmd_args = args + except getopt.GetoptError,err: + raise ArgumentError(str(err)) + #continue with processing + for o,a in opts : + if o == "-h": + # This is misuse of exception but 'self' has no info on doc string. + # The caller may handle exception and display appropriate info + raise ArgumentError("HELP") + if o == "-o" and len(a) > 0: + self.fname=os.path.normpath(os.path.expanduser(a.strip())) + self.fhandle=open(self.fname,"w") + print "saving results in file ",str(a) + elif o == "-s" and len(a) > 0: + self.reg = re.compile(a.strip(),re.MULTILINE|re.DOTALL) + self.FILTER=True + print "showing results for regex:",a.strip() + elif o == "-p" and len(a) > 0: + self.pluginRequired = True + self.pluginName = a.strip() + #print "passing output to " + a.strip() + elif o == "-v" : + self.verbose_level += 1 + else: + o = o.strip() + self.target_cmd_options[o] = a + + + + diff --git a/tools/lldbmacros/core/syntax_checker.py b/tools/lldbmacros/core/syntax_checker.py new file mode 100755 index 000000000..223b1e988 --- /dev/null +++ b/tools/lldbmacros/core/syntax_checker.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python + +helpdoc = """ +A simple utility that verifies the syntax for python scripts. +The checks it does are : + * Check for 'tab' characters in .py files + * Compile errors in py sources +Usage: + python syntax_checker.py [ ..] +""" +import py_compile +import sys +import os +import re + +tabs_search_rex = re.compile("^\s*\t+",re.MULTILINE|re.DOTALL) + +if __name__ == "__main__": + if len(sys.argv) < 2: + print "Error: Unknown arguments" + print helpdoc + sys.exit(1) + for fname in sys.argv[1:]: + if not os.path.exists(fname): + print "Error: Cannot recognize %s as a file" % fname + sys.exit(1) + if fname.split('.')[-1] != 'py': + print "Note: %s is not a valid python file. Skipping." % fname + continue + fh = open(fname) + strdata = fh.readlines() + lineno = 0 + tab_check_status = True + for linedata in strdata: + lineno += 1 + if len(tabs_search_rex.findall(linedata)) > 0 : + print "Error: Found a TAB character at %s:%d" % (fname, lineno) + tab_check_status = False + if tab_check_status == False: + print "Error: Syntax check failed. Please fix the errors and try again." + sys.exit(1) + #now check for error in compilation + try: + compile_result = py_compile.compile(fname, cfile="/dev/null", doraise=True) + except py_compile.PyCompileError as exc: + print str(exc) + print "Error: Compilation failed. Please fix the errors and try again." + sys.exit(1) + print "Success: Checked %s. No syntax errors found." % fname + sys.exit(0) + diff --git a/tools/lldbmacros/core/xnu_lldb_init.py b/tools/lldbmacros/core/xnu_lldb_init.py new file mode 100644 index 000000000..a0f23d081 --- /dev/null +++ b/tools/lldbmacros/core/xnu_lldb_init.py @@ -0,0 +1,43 @@ +import os + +def GetSettingsValues(debugger, setting_variable_name): + """ Queries the lldb internal settings + params: + debugger : lldb.SBDebugger instance + setting_variable_name: str - string name of the setting(eg prompt) + returns: + [] : Array of strings. Empty array if setting is not found/set + """ + retval = [] + settings_val_list = debugger.GetInternalVariableValue(setting_variable_name, debugger.GetInstanceName()) + for s in settings_val_list: + retval.append(str(s)) + return retval + +def __lldb_init_module(debugger, internal_dict): + debug_session_enabled = False + if "DEBUG_XNU_LLDBMACROS" in os.environ and len(os.environ['DEBUG_XNU_LLDBMACROS']) > 0: + debug_session_enabled = True + prev_os_plugin = "".join(GetSettingsValues(debugger, 'target.process.python-os-plugin-path')) + print "Loading kernel debugging from %s" % __file__ + print "LLDB version %s" % debugger.GetVersionString() + self_path = str(__file__) + base_dir_name = self_path[:self_path.rfind("/")] + core_os_plugin = base_dir_name + "/lldbmacros/core/operating_system.py" + osplugin_cmd = "settings set target.process.python-os-plugin-path \"%s\"" % core_os_plugin + xnu_debug_path = base_dir_name + "/lldbmacros/xnu.py" + xnu_load_cmd = "command script import \"%s\"" % xnu_debug_path + if debug_session_enabled : + if len(prev_os_plugin) > 0: + print "\nDEBUG_XNU_LLDBMACROS is set. Skipping the setting of OS plugin from dSYM.\nYou can manually set the OS plugin by running\n" + osplugin_cmd + else: + print osplugin_cmd + debugger.HandleCommand(osplugin_cmd) + print "\nDEBUG_XNU_LLDBMACROS is set. Skipping the load of xnu debug framework.\nYou can manually load the framework by running\n" + xnu_load_cmd + else: + print osplugin_cmd + debugger.HandleCommand(osplugin_cmd) + print xnu_load_cmd + debugger.HandleCommand(xnu_load_cmd) + print "\n" + diff --git a/tools/lldbmacros/ioreg.py b/tools/lldbmacros/ioreg.py new file mode 100644 index 000000000..930dcc1ab --- /dev/null +++ b/tools/lldbmacros/ioreg.py @@ -0,0 +1,773 @@ +from xnu import * +from utils import * +import sys + +###################################### +# Globals +###################################### +plane = None + +###################################### +# Type Summaries +###################################### +@lldb_type_summary(['OSObject *']) +@header("") +def GetObjectSummary(obj): + """ Show info about an OSObject - its vtable ptr and retain count, & more info for simple container classes. + """ + if obj is None: + return + + vt = dereference(Cast(obj, 'uintptr_t *')) - 2 * sizeof('uintptr_t') + vtype = kern.SymbolicateFromAddress(vt) + if hasattr(obj, 'retainCount'): + retCount = (obj.retainCount & 0xffff) + cntnrRetCount = (retCount >> 16) + out_string = "`object 0x{0: <16x}, vt 0x{1: <16x} <{2:s}>, retain count {3:d}, container retain {4:d}` ".format(obj, vt, vtype[0].GetName(), retCount, cntnrRetCount) + else: + if len(vtype): + out_string = "`object 0x{0: <16x}, vt 0x{1: <16x} <{2:s}>` ".format(obj, vt, vtype[0].GetName()) + else: + out_string = "`object 0x{0: <16x}, vt 0x{1: <16x}` ".format(obj, vt) + + ztvAddr = kern.GetLoadAddressForSymbol('_ZTV8OSString') + if vt == ztvAddr: + out_string += GetString(obj) + return out_string + + ztvAddr = kern.GetLoadAddressForSymbol('_ZTV8OSSymbol') + if vt == ztvAddr: + out_string += GetString(obj) + return out_string + + ztvAddr = kern.GetLoadAddressForSymbol('_ZTV8OSNumber') + if vt == ztvAddr: + out_string += GetNumber(obj) + return out_string + + ztvAddr = kern.GetLoadAddressForSymbol('_ZTV9OSBoolean') + if vt == ztvAddr: + out_string += GetBoolean(obj) + return out_string + + ztvAddr = kern.GetLoadAddressForSymbol('_ZTV7OSArray') + if vt == ztvAddr: + out_string += "(" + GetArray(Cast(obj, 'OSArray *')) + ")" + return out_string + + ztvAddr = kern.GetLoadAddressForSymbol('_ZTV5OSSet') + if vt == ztvAddr: + out_string += GetSet(Cast(obj, 'OSSet *')) + return out_string + + ztvAddr = kern.GetLoadAddressForSymbol('_ZTV12OSDictionary') + if vt == ztvAddr: + out_string += GetDictionary(Cast(obj, 'OSDictionary *')) + return out_string + + return out_string + +@lldb_type_summary(['IORegistryEntry *']) +@header("") +def GetRegistryEntrySummary(entry): + """ returns a string containing summary information about an IORegistry + object including it's registry id , vtable ptr and retain count + """ + name = None + out_string = "" + registryTable = entry.fRegistryTable + propertyTable = entry.fPropertyTable + + name = LookupKeyInOSDict(registryTable, kern.globals.gIOServicePlane.nameKey) + if name is None: + name = LookupKeyInOSDict(registryTable, kern.globals.gIONameKey) + if name is None: + name = LookupKeyInOSDict(propertyTable, kern.globals.gIOClassKey) + + if name is not None: + out_string += "+-o {0:s} ".format(GetString(Cast(name, 'OSString *'))) + elif Cast(entry, 'IOService *').pwrMgt and Cast(entry, 'IOService *').pwrMgt.Name: + out_string += "+-o {0:s} ".format(Cast(entry, 'IOService *').pwrMgt.Name) + else: + out_string += "+-o ?? " + + # I'm using uintptr_t for now to work around FindFirstType & Co. should allow you to make pointer types directly + vtableAddr = dereference(Cast(entry, 'uintptr_t *')) - 2 * sizeof('uintptr_t *') + vtype = kern.SymbolicateFromAddress(vtableAddr) + if vtype is None or len(vtype) < 1: + out_string += " [lcpu (kernel's numbering convention)] + """ + if not cmd_args: + print "Please specify a port to read out of" + print ReadIOPort8.__doc__ + return + + portAddr = ArgumentStringToInt(cmd_args[0]) + if len(cmd_args) >= 2: + lcpu = ArgumentStringToInt(cmd_args[1]) + else: + lcpu = xnudefines.lcpu_self + + ReadIOPortInt(portAddr, 1, lcpu) + +@lldb_command('readioport16') +def ReadIOPort8(cmd_args=None): + """ Read value stored in the specified IO port. The CPU can be optionally + specified as well. + Prints 0xBAD10AD in case of a bad read + Syntax: (lldb) readioport16 [lcpu (kernel's numbering convention)] + """ + if not cmd_args: + print "Please specify a port to read out of" + print ReadIOPort16.__doc__ + return + + portAddr = ArgumentStringToInt(cmd_args[0]) + if len(cmd_args) >= 2: + lcpu = ArgumentStringToInt(cmd_args[1]) + else: + lcpu = xnudefines.lcpu_self + + ReadIOPortInt(portAddr, 2, lcpu) + +@lldb_command('readioport32') +def ReadIOPort8(cmd_args=None): + """ Read value stored in the specified IO port. The CPU can be optionally + specified as well. + Prints 0xBAD10AD in case of a bad read + Syntax: (lldb) readioport32 [lcpu (kernel's numbering convention)] + """ + if not cmd_args: + print "Please specify a port to read out of" + print ReadIOPort32.__doc__ + return + + portAddr = ArgumentStringToInt(cmd_args[0]) + if len(cmd_args) >= 2: + lcpu = ArgumentStringToInt(cmd_args[1]) + else: + lcpu = xnudefines.lcpu_self + + ReadIOPortInt(portAddr, 4, lcpu) + +@lldb_command('writeioport8') +def WriteIOPort8(cmd_args=None): + """ Write the value to the specified IO port. The size of the value is + determined by the name of the command. The CPU used can be optionally + specified as well. + Syntax: (lldb) writeioport8 [lcpu (kernel's numbering convention)] + """ + if not cmd_args or len(cmd_args) < 2: + print "Please specify a port to write to, followed by the value you want to write" + print WriteIOPort8.__doc__ + return + + portAddr = ArgumentStringToInt(cmd_args[0]) + value = ArgumentStringToInt(cmd_args[1]) + + if len(cmd_args) >= 3: + lcpu = ArgumentStringToInt(cmd_args[2]) + else: + lcpu = xnudefines.lcpu_self + + WriteIOPortInt(portAddr, 1, value, lcpu) + +@lldb_command('writeioport16') +def WriteIOPort8(cmd_args=None): + """ Write the value to the specified IO port. The size of the value is + determined by the name of the command. The CPU used can be optionally + specified as well. + Syntax: (lldb) writeioport16 [lcpu (kernel's numbering convention)] + """ + if not cmd_args or len(cmd_args) < 2: + print "Please specify a port to write to, followed by the value you want to write" + print WriteIOPort16.__doc__ + return + + portAddr = ArgumentStringToInt(cmd_args[0]) + value = ArgumentStringToInt(cmd_args[1]) + + if len(cmd_args) >= 3: + lcpu = ArgumentStringToInt(cmd_args[2]) + else: + lcpu = xnudefines.lcpu_self + + WriteIOPortInt(portAddr, 2, value, lcpu) + +@lldb_command('writeioport32') +def WriteIOPort8(cmd_args=None): + """ Write the value to the specified IO port. The size of the value is + determined by the name of the command. The CPU used can be optionally + specified as well. + Syntax: (lldb) writeioport32 [lcpu (kernel's numbering convention)] + """ + if not cmd_args or len(cmd_args) < 2: + print "Please specify a port to write to, followed by the value you want to write" + print WriteIOPort32.__doc__ + return + + portAddr = ArgumentStringToInt(cmd_args[0]) + value = ArgumentStringToInt(cmd_args[1]) + + if len(cmd_args) >= 3: + lcpu = ArgumentStringToInt(cmd_args[2]) + else: + lcpu = xnudefines.lcpu_self + + WriteIOPortInt(portAddr, 4, value, lcpu) + +@lldb_command('showioservicepm') +def ShowIOServicePM(cmd_args=None): + """ Routine to dump the IOServicePM object + Syntax: (lldb) showioservicepm + """ + if not cmd_args: + print "Please enter the pointer to the IOServicePM object you'd like to introspect" + print ShowIOServicePM.__doc__ + return + + iopmpriv = kern.GetValueFromAddress(cmd_args[0], 'IOServicePM *') + out_string = "MachineState {0: <6d} (".format(iopmpriv.MachineState) + + # Power state map + pstate_map = { + 0: 'kIOPM_Finished', + 1: 'kIOPM_OurChangeTellClientsPowerDown', + 2: 'kIOPM_OurChangeTellClientsPowerDown', + 3: 'kIOPM_OurChangeNotifyInterestedDriversWillChange', + 4: 'kIOPM_OurChangeSetPowerState', + 5: 'kIOPM_OurChangeWaitForPowerSettle', + 6: 'kIOPM_OurChangeNotifyInterestedDriversDidChange', + 7: 'kIOPM_OurChangeTellCapabilityDidChange', + 8: 'kIOPM_OurChangeFinish', + 9: 'Unused_MachineState_9', + 10: 'kIOPM_ParentChangeTellPriorityClientsPowerDown', + 11: 'kIOPM_ParentChangeNotifyInterestedDriversWillChange', + 12: 'kIOPM_ParentChangeSetPowerState', + 13: 'kIOPM_ParentChangeWaitForPowerSettle', + 14: 'kIOPM_ParentChangeNotifyInterestedDriversDidChange', + 15: 'kIOPM_ParentChangeTellCapabilityDidChange', + 16: 'kIOPM_ParentChangeAcknowledgePowerChange', + 17: 'kIOPM_NotifyChildrenStart', + 18: 'kIOPM_NotifyChildrenOrdered', + 19: 'kIOPM_NotifyChildrenDelayed', + 20: 'kIOPM_SyncTellClientsPowerDown', + 21: 'kIOPM_SyncTellPriorityClientsPowerDown', + 22: 'kIOPM_SyncNotifyWillChange', + 23: 'kIOPM_SyncNotifyDidChange', + 24: 'kIOPM_SyncTellCapabilityDidChange', + 25: 'kIOPM_SyncFinish', + 26: 'kIOPM_TellCapabilityChangeDone', + 27: 'kIOPM_DriverThreadCallDone' + } + powerstate = unsigned(iopmpriv.MachineState) + if powerstate in pstate_map: + out_string += "{0:s}".format(pstate_map[powerstate]) + else: + out_string += "Unknown_MachineState" + out_string += "), " + + if iopmpriv.MachineState != 20: + out_string += "DriverTimer = {0: <6d}, SettleTime = {1: < 6d}, HeadNoteFlags = {2: #12x}, HeadNotePendingAcks = {3: #012x}, ".format( + unsigned(iopmpriv.DriverTimer), + unsigned(iopmpriv.SettleTimeUS), + unsigned(iopmpriv.HeadNoteChangeFlags), + unsigned(iopmpriv.HeadNotePendingAcks)) + + if iopmpriv.DeviceOverrideEnabled != 0: + out_string += "DeviceOverrides, " + + out_string += "DeviceDesire = {0: <6d}, DesiredPowerState = {1: <6d}, PreviousRequest = {2: <6d}\n".format( + unsigned(iopmpriv.DeviceDesire), + unsigned(iopmpriv.DesiredPowerState), + unsigned(iopmpriv.PreviousRequestPowerFlags)) + + print out_string + +###################################### +# Helper routines +###################################### +def ShowRegistryEntryRecurse(entry, prefix, printProps): + """ prints registry entry summary and recurses through all its children. + """ + # Setup + global plane + out_string = "" + plen = (len(prefix)//2) + registryTable = entry.fRegistryTable + propertyTable = entry.fPropertyTable + + # Print entry details + print "{0:s}{1:s}".format(prefix, GetRegistryEntrySummary(entry)) + # Printing large property tables make it look like lldb is 'stuck' + if printProps: + print GetRegDictionary(propertyTable, prefix + " | ") + + # Recurse + if plane is None: + childKey = kern.globals.gIOServicePlane.keys[1] + else: + childKey = plane.keys[1] + childArray = LookupKeyInOSDict(registryTable, childKey) + if childArray is not None: + idx = 0 + ca = Cast(childArray, 'OSArray *') + count = unsigned(ca.count) + while idx < count: + if plen != 0 and plen != 1 and (plen & (plen - 1)) == 0: + ShowRegistryEntryRecurse(Cast(ca.array[idx], 'IORegistryEntry *'), prefix + "| ", printProps) + else: + ShowRegistryEntryRecurse(Cast(ca.array[idx], 'IORegistryEntry *'), prefix + " ", printProps) + idx += 1 + +def FindRegistryEntryRecurse(entry, search_name, stopAfterFirst): + """ Checks if given registry entry's name matches the search_name we're looking for + If yes, it prints the entry's summary and then recurses through its children + If no, it does nothing and recurses through its children + """ + # Setup + global plane + registryTable = entry.fRegistryTable + propertyTable = entry.fPropertyTable + + # Compare + name = None + name = LookupKeyInOSDict(registryTable, kern.globals.gIOServicePlane.nameKey) + if name is None: + name = LookupKeyInOSDict(registryTable, kern.globals.gIONameKey) + if name is None: + name = LookupKeyInOSDict(propertyTable, kern.globals.gIOClassKey) + + if name is not None: + if str(Cast(name, 'OSString *').string) == search_name: + print GetRegistryEntrySummary(entry) + if stopAfterFirst is True: + return True + elif Cast(entry, 'IOService *').pwrMgt and Cast(entry, 'IOService *').pwrMgt.Name: + name = Cast(entry, 'IOService *').pwrMgt.Name + if str(name) == search_name: + print GetRegistryEntrySummary(entry) + if stopAfterFirst is True: + return True + + # Recurse + if plane is None: + childKey = kern.globals.gIOServicePlane.keys[1] + else: + childKey = plane.keys[1] + childArray = LookupKeyInOSDict(registryTable, childKey) + if childArray is not None: + idx = 0 + ca = Cast(childArray, 'OSArray *') + count = unsigned(ca.count) + while idx < count: + if FindRegistryEntryRecurse(Cast(ca.array[idx], 'IORegistryEntry *'), search_name, stopAfterFirst) is True: + return True + idx += 1 + return False + +def FindRegistryObjectRecurse(entry, search_name): + """ Checks if given registry entry's name matches the search_name we're looking for + If yes, return the entry + If no, it does nothing and recurses through its children + Implicitly stops after finding the first entry + """ + # Setup + global plane + registryTable = entry.fRegistryTable + propertyTable = entry.fPropertyTable + + # Compare + name = None + name = LookupKeyInOSDict(registryTable, kern.globals.gIOServicePlane.nameKey) + if name is None: + name = LookupKeyInOSDict(registryTable, kern.globals.gIONameKey) + if name is None: + name = LookupKeyInOSDict(propertyTable, kern.globals.gIOClassKey) + + if name is not None: + if str(Cast(name, 'OSString *').string) == search_name: + return entry + elif Cast(entry, 'IOService *').pwrMgt and Cast(entry, 'IOService *').pwrMgt.Name: + name = Cast(entry, 'IOService *').pwrMgt.Name + if str(name) == search_name: + return entry + + # Recurse + if plane is None: + childKey = kern.globals.gIOServicePlane.keys[1] + else: + childKey = plane.keys[1] + childArray = LookupKeyInOSDict(registryTable, childKey) + if childArray is not None: + ca = Cast(childArray, 'OSArray *') + for idx in range(ca.count): + registry_object = FindRegistryObjectRecurse(Cast(ca.array[idx], 'IORegistryEntry *'), search_name) + if not registry_object or int(registry_object) == int(0): + continue + else: + return registry_object + return None + +def LookupKeyInOSDict(osdict, key): + """ Returns the value corresponding to a given key in a OSDictionary + Returns None if the key was not found + """ + if not osdict: + return + count = unsigned(osdict.count) + result = None + idx = 0 + while idx < count and result is None: + if key == osdict.dictionary[idx].key: + result = osdict.dictionary[idx].value + idx += 1 + return result + +def LookupKeyInPropTable(propertyTable, key_str): + """ Returns the value corresponding to a given key from a registry entry's property table + Returns None if the key was not found + The property that is being searched for is specified as a string in key_str + """ + if not propertyTable: + return + count = unsigned(propertyTable.count) + result = None + idx = 0 + while idx < count and result is None: + if key_str == str(propertyTable.dictionary[idx].key.string): + result = propertyTable.dictionary[idx].value + idx += 1 + return result + +def GetRegDictionary(osdict, prefix): + """ Returns a specially formatted string summary of the given OSDictionary + This is done in order to pretty-print registry property tables in showregistry + and other macros + """ + out_string = prefix + "{\n" + idx = 0 + count = unsigned(osdict.count) + + while idx < count: + out_string += prefix + " " + GetObjectSummary(osdict.dictionary[idx].key) + " = " + GetObjectSummary(osdict.dictionary[idx].value) + "\n" + idx += 1 + out_string += prefix + "}\n" + return out_string + +def GetString(string): + """ Returns the python string representation of a given OSString + """ + out_string = "\"{0:s}\"".format(Cast(string, 'OSString *').string) + return out_string + +def GetNumber(num): + out_string = "{0:d}".format(Cast(num, 'OSNumber *').value) + return out_string + +def GetBoolean(b): + """ Shows info about a given OSBoolean + """ + out_string = "" + if b == kern.globals.gOSBooleanFalse: + out_string += "No" + else: + out_string += "Yes" + return out_string + +def GetMetaClass(mc): + """ Shows info about a given OSSymbol + """ + out_string = "{0: <5d}x {1: >5d} bytes {2:s}\n".format(mc.instanceCount, mc.classSize, mc.className.string) + return out_string + +def GetArray(arr): + """ Returns a string containing info about a given OSArray + """ + out_string = "" + idx = 0 + count = unsigned(arr.count) + + while idx < count: + obj = arr.array[idx] + idx += 1 + out_string += GetObjectSummary(obj) + if idx < unsigned(arr.count): + out_string += "," + return out_string + +def GetDictionary(d): + """ Returns a string containing info about a given OSDictionary + """ + out_string = "{" + idx = 0 + count = unsigned(d.count) + + while idx < count: + obj = d.dictionary[idx].key + out_string += GetObjectSummary(obj) + "=" + obj = d.dictionary[idx].value + idx += 1 + out_string += GetObjectSummary(obj) + if idx < count: + out_string += "," + out_string += "}" + return out_string + +def GetSet(se): + """ Returns a string containing info about a given OSSet + """ + out_string += "[" + GetArray(se.members) + "]" + return out_string + +def ReadIOPortInt(addr, numbytes, lcpu): + """ Prints results after reading a given ioport + """ + result = 0xBAD10AD + + if "kdp" != GetConnectionProtocol(): + print "Target is not connected over kdp. Nothing to do here." + return + + # Set up the manual KDP packet + input_address = unsigned(addressof(kern.globals.manual_pkt.input)) + len_address = unsigned(addressof(kern.globals.manual_pkt.len)) + data_address = unsigned(addressof(kern.globals.manual_pkt.data)) + if not WriteInt32ToMemoryAddress(0, input_address): + print "0x{0: <4x}: 0x{1: <1x}".format(addr, result) + return + + kdp_pkt_size = GetType('kdp_readioport_req_t').GetByteSize() + if not WriteInt32ToMemoryAddress(kdp_pkt_size, len_address): + print "0x{0: <4x}: 0x{1: <1x}".format(addr, result) + return + + kgm_pkt = kern.GetValueFromAddress(data_address, 'kdp_readioport_req_t *') + + header_value = GetKDPPacketHeaderInt(request=GetEnumValue('kdp_req_t::KDP_READIOPORT'), length = kdp_pkt_size) + + if( WriteInt64ToMemoryAddress((header_value), int(addressof(kgm_pkt.hdr))) and + WriteInt16ToMemoryAddress(addr, int(addressof(kgm_pkt.address))) and + WriteInt32ToMemoryAddress(numbytes, int(addressof(kgm_pkt.nbytes))) and + WriteInt16ToMemoryAddress(lcpu, int(addressof(kgm_pkt.lcpu))) and + WriteInt32ToMemoryAddress(1, input_address) + ): + + result_pkt = Cast(addressof(kern.globals.manual_pkt.data), 'kdp_readioport_reply_t *') + + if(result_pkt.error == 0): + print "This macro is incomplete till is fixed" + # FIXME: Uncomment me when is fixed + #if numbytes == 1: + # result = dereference(Cast(result_pkt.data, 'uint8_t *')) + #elif numbytes == 2: + # result = dereference(Cast(result_pkt.data, 'uint16_t *')) + #elif numbytes == 4: + # result = dereference(cast(result_pkt.data, 'uint32_t *')) + + print "0x{0: <4x}: 0x{1: <1x}".format(addr, result) + +def WriteIOPortInt(addr, numbytes, value, lcpu): + """ Writes 'value' into ioport specified by 'addr'. Prints errors if it encounters any + """ + if "kdp" != GetConnectionProtocol(): + print "Target is not connected over kdp. Nothing to do here." + return + + # Set up the manual KDP packet + input_address = unsigned(addressof(kern.globals.manual_pkt.input)) + len_address = unsigned(addressof(kern.globals.manual_pkt.len)) + data_address = unsigned(addressof(kern.globals.manual_pkt.data)) + if not WriteInt32ToMemoryAddress(0, input_address): + print "error writing 0x{0: x} to port 0x{1: <4x}".format(value, addr) + return + + kdp_pkt_size = GetType('kdp_writeioport_req_t').GetByteSize() + if not WriteInt32ToMemoryAddress(kdp_pkt_size, len_address): + print "error writing 0x{0: x} to port 0x{1: <4x}".format(value, addr) + return + + kgm_pkt = kern.GetValueFromAddress(data_address, 'kdp_writeioport_req_t *') + + header_value = GetKDPPacketHeaderInt(request=GetEnumValue('kdp_req_t::KDP_WRITEIOPORT'), length = kdp_pkt_size) + + if( WriteInt64ToMemoryAddress((header_value), int(addressof(kgm_pkt.hdr))) and + WriteInt16ToMemoryAddress(addr, int(addressof(kgm_pkt.address))) and + WriteInt32ToMemoryAddress(numbytes, int(addressof(kgm_pkt.nbytes))) and + WriteInt16ToMemoryAddress(lcpu, int(addressof(kgm_pkt.lcpu))) + ): + print "This macro is incomplete till is fixed" + # FIXME: Uncomment me when is fixed + #if numbytes == 1: + # if not WriteInt8ToMemoryAddress(value, int(addressof(kgm_pkt.data))): + # print "error writing 0x{0: x} to port 0x{1: <4x}".format(value, addr) + #elif numbytes == 2: + # if not WriteInt16ToMemoryAddress(value, int(addressof(kgm_pkt.data))): + # print "error writing 0x{0: x} to port 0x{1: <4x}".format(value, addr) + #elif numbytes == 4: + # if not WriteInt32ToMemoryAddress(value, int(addressof(kgm_pkt.data))): + # print "error writing 0x{0: x} to port 0x{1: <4x}".format(value, addr) + + if not WriteInt32ToMemoryAddress(1, input_address): + print "error writing 0x{0: x} to port 0x{1: <4x}".format(value, addr) + return + + result_pkt = Cast(addressof(kern.globals.manual_pkt.data), 'kdp_writeioport_reply_t *') + + # Done with the write + if(result_pkt.error == 0): + print "Writing 0x {0: x} to port {1: <4x} was successful".format(value, addr) + else: + print "error writing 0x{0: x} to port 0x{1: <4x}".format(value, addr) + diff --git a/tools/lldbmacros/ipc.py b/tools/lldbmacros/ipc.py new file mode 100644 index 000000000..db09a4b46 --- /dev/null +++ b/tools/lldbmacros/ipc.py @@ -0,0 +1,589 @@ +""" Please make sure you read the README file COMPLETELY BEFORE reading anything below. + It is very critical that you read coding guidelines in Section E in README file. +""" +from xnu import * +import sys, shlex +from utils import * +from process import * +import xnudefines + +@header("{0: <20s} {1: <6s} {2: <6s} {3: <10s} {4: <15s}".format("task", "pid", '#acts', "tablesize", "command")) +def GetTaskIPCSummary(task): + """ Display a task's ipc summary. + params: + task : core.value represeting a Task in kernel + returns + str - string of ipc info for the task + """ + out_string = '' + format_string = "{0: <#020x} {1: <6d} {2: <6d} {3: <10d} {4: <15s}" + pval = Cast(task.bsd_info, 'proc *') + table_size = int(task.itk_space.is_table_size) + proc_name = str(pval.p_comm) + out_string += format_string.format(task, pval.p_pid, task.thread_count, table_size, proc_name) + return out_string + +@header("{0: <20s} {1: <28s} {2: <12s} {3: <6s} {4: <4s} {5: <20s} {6: <4s}\n".format( + "port", "mqueue", "recvname", "flags", "refs", "recvname", "dest")) +def GetPortSummary(port, show_kmsg_summary=True, prefix=""): + """ Display a port's summary + params: + port : core.value representing a port in the kernel + returns + str : string of ipc info for the given port + """ + out_string = "" + portp = Cast(port, 'struct ipc_port *') + destspacep = kern.GetValueFromAddress(0, 'struct ipc_space *') + spacep = portp.data.receiver + format_string = "{0: #019x} {1: #019x} {2: <8s} {3: #011x} {4: <5s} {5: #05x} {6: #019x} {7: <16s}\n" + if portp.ip_object.io_bits & 0x80000000: + out_string += prefix + format_string.format( + unsigned(portp), addressof(portp.ip_messages), ' '*8, + unsigned(portp.ip_messages.data.port.receiver_name), + "APort", portp.ip_object.io_references, + unsigned(portp.ip_messages.data.port.receiver_name), + GetPortDestProc(portp)) + else: + out_string += prefix + format_string.format( + unsigned(portp), addressof(portp.ip_messages), ' '*8, + unsigned(portp.ip_messages.data.port.receiver_name), + "DPort", portp.ip_object.io_references, unsigned(portp), + "inactive-port") + + if show_kmsg_summary: + kmsgp = Cast(portp.ip_messages.data.port.messages.ikmq_base, 'ipc_kmsg_t') + out_string += prefix + GetKMsgSummary.header + prefix + GetKMsgSummary(kmsgp) + + kmsgheadp = kmsgp + kmsgp = kmsgp.ikm_next + while (kmsgp) != (kmsgheadp): + out_string += prefix + GetKMsgSummary(kmsgp) + kmsgp = kmsgp.ikm_next + return out_string + +def GetPortDestProc(portp): + """ Display the name and pid of a given port's receiver + params: + portp : core.value representing a pointer to a port in the kernel + destspacep : core.value representing a pointer to an ipc_space + returns: + str : string containing receiver's name and pid + """ + spacep = portp.data.receiver + out_str = "Not found" + for tsk in kern.tasks: + if tsk.itk_space == spacep: + if tsk.bsd_info: + destprocp = Cast(tsk.bsd_info, 'struct proc *') + out_str = "{0:s}({1: + """ + if not cmd_args: + print "No arguments passed" + print ShowIPC.__doc__ + return False + ipc = kern.GetValueFromAddress(cmd_args[0], 'ipc_space *') + if not ipc: + print "unknown arguments:", str(cmd_args) + return False + print GetIPCInformation.header + print GetIPCInformation(ipc, False, False) + +# EndMacro: showipc + +# Macro: showtaskipc + +@lldb_command('showtaskipc') +def ShowTaskIPC(cmd_args=None): + """ Routine to print IPC summary of given task + Usage: showtaskipc
+ """ + if not cmd_args: + print "No arguments passed" + print ShowTaskIPC.__doc__ + return False + tval = kern.GetValueFromAddress(cmd_args[0], 'task *') + if not tval: + print "unknown arguments:", str(cmd_args) + return False + print GetTaskSummary.header + " " + GetProcSummary.header + pval = Cast(tval.bsd_info, 'proc *') + print GetTaskSummary(tval) + " " + GetProcSummary(pval) + print GetTaskIPCSummary.header + print GetTaskIPCSummary(tval) + +# EndMacro: showtaskipc + +# Macro: showallipc + +@lldb_command('showallipc') +def ShowAllIPC(cmd_args=None): + """ Routine to print IPC summary of all tasks + Usage: showallipc + """ + for t in kern.tasks: + print GetTaskSummary.header + " " + GetProcSummary.header + pval = Cast(t.bsd_info, 'proc *') + print GetTaskSummary(t) + " " + GetProcSummary(pval) + print GetIPCInformation.header + print GetIPCInformation(t.itk_space, False, False) + "\n\n" + +# EndMacro: showallipc + +@lldb_command('showipcsummary') +def ShowIPCSummary(cmd_args=None): + """ Summarizes the IPC state of all tasks. + This is a convenient way to dump some basic clues about IPC messaging. You can use the output to determine + tasks that are candidates for further investigation. + """ + print GetTaskIPCSummary.header + for t in kern.tasks: + print GetTaskIPCSummary(t) + return + +def GetKObjectFromPort(portval): + """ Get Kobject description from the port. + params: portval - core.value representation of 'ipc_port *' object + returns: str - string of kobject information + """ + kobject_str = "{0: <#020x}".format(portval.kdata.kobject) + io_bits = unsigned(portval.ip_object.io_bits) + objtype_index = io_bits & 0xfff + if objtype_index < len(xnudefines.kobject_types) : + desc_str = "kobject({0:s})".format(xnudefines.kobject_types[objtype_index]) + else: + desc_str = "kobject(UNKNOWN) {:d}".format(objtype_index) + return kobject_str + " " + desc_str + +@static_var('destcache', {}) +def GetDestinationProcessFromPort(port): + """ + params: port - core.value representation of 'ipc_port *' object + returns: str - name of process + """ + out_str = '' + dest_space = port.data.receiver + found_dest = False + #update destcache if data is not found + if hex(dest_space) not in GetDestinationProcessFromPort.destcache: + for t in kern.tasks: + if hex(t.itk_space) == hex(dest_space): + pval = Cast(t.bsd_info, 'proc *') + GetDestinationProcessFromPort.destcache[hex(dest_space)] = (t, pval) + found_dest = True + break + #end of for loop + else: found_dest = True + + if found_dest: + (ftask , fproc) = GetDestinationProcessFromPort.destcache[hex(dest_space)] + if fproc: + out_str = "{0:s}({1:d})".format(fproc.p_comm, fproc.p_pid ) + else: + out_str = "task {0: <#020x}".format(ftask) + return out_str + + + +@header("{0: <20s} {1: <20s}".format("destname", "destination") ) +def GetPortDestinationSummary(port): + """ Get destination information for a port. + params: port - core.value representation of 'ipc_port *' object + returns: str - string of info about ports destination + """ + out_str = '' + format_string = "{0: <20s} {1: <20s}" + destname_str = '' + destination_str = '' + ipc_space_kernel = unsigned(kern.globals.ipc_space_kernel) + target_spaceval = port.data.receiver + if unsigned(target_spaceval) == ipc_space_kernel : + destname_str = GetKObjectFromPort(port) + else: + if int(port.ip_object.io_bits) & 0x80000000 : + destname_str = "{0: <#020x}".format(port.ip_messages.data.port.receiver_name) + destination_str = GetDestinationProcessFromPort(port) + else: + destname_str = "{0: <#020x}".format(port) + destination_str = "inactive-port" + + out_str += format_string.format(destname_str, destination_str) + return out_str + +@lldb_type_summary(['ipc_entry_t']) +@header("{0: <20s} {1: <20s} {2: <8s} {3: <8s} {4: <20s} {5: <20s}".format("object", "name","rite", "urefs", "destname", "destination")) +def GetIPCEntrySummary(entry, ipc_name=''): + """ Get summary of a ipc entry. + params: + entry - core.value representing ipc_entry_t in the kernel + ipc_name - str of format '0x0123' for display in summary. + returns: + str - string of ipc entry related information + """ + out_str = '' + entry_ptr = int(hex(entry), 16) + format_string = "{0: <#020x} {1: <12s} {2: <8s} {3: <8d} {4: <20s} {5: <20s}" + right_str = '' + destname_str = '' + destination_str = '' + + ie_object = entry.ie_object + ie_bits = int(entry.ie_bits) + urefs = int(ie_bits & 0xffff) + if ie_bits & 0x00100000 : + right_str = 'Dead' + elif ie_bits & 0x00080000: + right_str = 'Set' + else: + if ie_bits & 0x00010000 : + if ie_bits & 0x00020000 : + right_str = 'SR' + else: + right_str = 'S' + elif ie_bits & 0x00020000: + right_str = 'R' + elif ie_bits & 0x00040000 : + right_str = 'O' + if int(entry.index.request) != 0: + portval = Cast(ie_object, 'ipc_port_t') + requestsval = portval.ip_requests + sorightval = requestsval[int(entry.index.request)].notify.port + soright_ptr = unsigned(sorightval) + if soright_ptr != 0: + if soright_ptr & 0x1 : right_str +='s' + elif soright_ptr & 0x2 : right_str +='d' + else : right_str +='n' + if ie_bits & 0x00800000 : right_str +='c' + # now show the port destination part + destname_str = GetPortDestinationSummary(Cast(ie_object, 'ipc_port_t')) + + out_str = format_string.format(ie_object, ipc_name, right_str, urefs, destname_str, destination_str) + return out_str + +@header("{0: >20s}".format("user bt") ) +def GetPortUserStack(port, task): + """ Get UserStack information for the given port & task. + params: port - core.value representation of 'ipc_port *' object + task - value representing 'task *' object + returns: str - string information on port's userstack + """ + out_str = '' + ie_port_callstack = port.ip_callstack + ie_port_spares = port.ip_spares[0] + proc_val = Cast(task.bsd_info, 'proc *') + if ie_port_callstack[0]: + out_str += "{: <10x}".format(ie_port_callstack[0]) + count = 1 + while count < 16 and ie_port_callstack[count]: + out_str += ": <10x".format(ie_port_callstack[count]) + count = count + 1 + if ie_port_spares != proc_val.p_pid: + out_str += " ({:<10d})".format(ie_port_spares) + out_str += '\n' + return out_str + +@lldb_type_summary(['ipc_space *']) +@header("{0: <20s} {1: <20s} {2: <20s} {3: <8s} {4: <10s} {5: <16s} {6: <10s} {7: <7s}".format('ipc_space', 'is_task', 'is_table', 'flags', 'ports', 'table_next', 'low_mod', 'high_mod')) +def GetIPCInformation(space, show_entries=False, show_userstack=False): + """ Provide a summary of the ipc space + """ + out_str = '' + format_string = "{0: <#020x} {1: <#020x} {2: <#020x} {3: <8s} {4: <10d} {5: <#01x} {6: >10d} {7: >10d}" + is_tableval = space.is_table + ports = int(space.is_table_size) + flags ='' + is_bits = int(space.is_bits) + if (is_bits & 0x40000000) == 0: flags +='A' + else: flags += ' ' + if (is_bits & 0x20000000) != 0: flags +='G' + out_str += format_string.format(space, space.is_task, space.is_table, flags, space.is_table_size, space.is_table_next, space.is_low_mod, space.is_high_mod) + + #should show the each individual entries if asked. + if show_entries == True: + out_str += "\n\t" + GetIPCEntrySummary.header + "\n" + num_entries = ports + index = 0 + while index < num_entries: + entryval = GetObjectAtIndexFromArray(is_tableval, index) + entry_ie_bits = unsigned(entryval.ie_bits) + if (int(entry_ie_bits) & 0x001f0000 ) != 0: + entry_name = "{0: <#020x}".format( (index <<8 | entry_ie_bits >> 24) ) + out_str += "\t" + GetIPCEntrySummary(entryval, entry_name) + "\n" + if show_userstack == True: + entryport = Cast(entryval.ie_object, 'ipc_port *') + if entryval.ie_object and (int(entry_ie_bits) & 0x00070000) and entryport.ip_callstack[0]: + out_str += GetPortUserStack.header + out_str += GetPortUserStack(entryport, space.is_task) + index +=1 + #done with showing entries + return out_str + +# Macro: showrights + +@lldb_command('showrights') +def ShowRights(cmd_args=None): + """ Routine to print rights information for the given IPC space + Usage: showrights
+ """ + if not cmd_args: + print "No arguments passed" + print ShowRights.__doc__ + return False + ipc = kern.GetValueFromAddress(cmd_args[0], 'ipc_space *') + if not ipc: + print "unknown arguments:", str(cmd_args) + return False + print GetIPCInformation.header + print GetIPCInformation(ipc, True, False) + +# EndMacro: showrights + +@lldb_command('showtaskrights') +def ShowTaskRights(cmd_args=None): + """ Routine to ipc rights information for a task + Usage: showtaskrights + """ + if cmd_args == None: + print "No arguments passed" + print ShowTaskStacksCmdHelper.__doc__ + return False + tval = kern.GetValueFromAddress(cmd_args[0], 'task *') + if not tval: + print "unknown arguments:", str(cmd_args) + return False + print GetTaskSummary.header + " " + GetProcSummary.header + pval = Cast(tval.bsd_info, 'proc *') + print GetTaskSummary(tval) + " " + GetProcSummary(pval) + print GetIPCInformation.header + print GetIPCInformation(tval.itk_space, True, False) + +# Macro: showataskrightsbt + +@lldb_command('showtaskrightsbt') +def ShowTaskRightsBt(cmd_args=None): + """ Routine to ipc rights information with userstacks for a task + Usage: showtaskrightsbt + """ + if cmd_args == None: + print "No arguments passed" + print ShowTaskRightsBt.__doc__ + return False + tval = kern.GetValueFromAddress(cmd_args[0], 'task *') + if not tval: + print "unknown arguments:", str(cmd_args) + return False + print GetTaskSummary.header + " " + GetProcSummary.header + pval = Cast(tval.bsd_info, 'proc *') + print GetTaskSummary(tval) + " " + GetProcSummary(pval) + print GetIPCInformation.header + print GetIPCInformation(tval.itk_space, True, True) + +# EndMacro: showtaskrightsbt + +# Macro: showallrights + +@lldb_command('showallrights') +def ShowAllRights(cmd_args=None): + """ Routine to print rights information for IPC space of all tasks + Usage: showallrights + """ + for t in kern.tasks: + print GetTaskSummary.header + " " + GetProcSummary.header + pval = Cast(t.bsd_info, 'proc *') + print GetTaskSummary(t) + " " + GetProcSummary(pval) + print GetIPCInformation.header + print GetIPCInformation(t.itk_space, True, False) + "\n\n" + +# EndMacro: showallrights + +# Macro: showpipestats +@lldb_command('showpipestats') +def ShowPipeStats(cmd_args=None): + """ Display pipes usage information in the kernel + """ + print "Number of pipes: {: d}".format(kern.globals.amountpipes) + print "Memory used by pipes: {:s}".format(sizeof_fmt(int(kern.globals.amountpipekva))) + print "Max memory allowed for pipes: {:s}".format(sizeof_fmt(int(kern.globals.maxpipekva))) +# EndMacro: showpipestats + +# Macro: showtaskbusyports +@lldb_command('showtaskbusyports') +def ShowTaskBusyPorts(cmd_args=None): + """ Routine to print information about receive rights belonging to this task that + have enqueued messages. This is oten a sign of a blocked or hung process + Usage: showtaskbusyports + """ + if not cmd_args: + print "No arguments passed. Please pass in the address of a task" + print ShowTaskBusyPorts.__doc__ + return + task = kern.GetValueFromAddress(cmd_args[0], 'task_t') + print GetTaskBusyPorts(task) + return + +def GetTaskBusyPorts(task): + """ Prints all busy ports for a given task. ie. all receive rights belonging + to this task that have enqueued messages. + params: + task : core.value representing a task in kernel + returns: + str : String containing information about the given task's busy ports + """ + isp = task.itk_space + i = 0 + out_string = "" + while i < isp.is_table_size: + iep = addressof(isp.is_table[i]) + if iep.ie_bits & 0x00020000: + port = Cast(iep.ie_object, 'ipc_port_t') + if port.ip_messages.data.port.msgcount > 0: + out_string += GetPortSummary.header + GetPortSummary(port) + i = i + 1 + return out_string +# EndMacro: showtaskbusyports + +# Macro: showallbusyports +@lldb_command('showallbusyports') +def ShowAllBusyPorts(cmd_args=None): + """ Routine to print information about all receive rights on the system that + have enqueued messages. + """ + task_queue_head = kern.globals.tasks + + for tsk in kern.tasks: + print GetTaskBusyPorts(tsk) + return +# EndMacro: showallbusyports + +# Macro: showmqueue: +@lldb_command('showmqueue') +def ShowMQueue(cmd_args=None): + """ Routine that lists details about a given mqueue + Syntax: (lldb) showmqueue 0xaddr + """ + if not cmd_args: + print "Please specify the address of the ipc_mqueue whose details you want to print" + print ShowMQueue.__doc__ + return + mqueue = kern.GetValueFromAddress(cmd_args[0], 'struct ipc_mqueue *') + wq_type = mqueue.data.pset.set_queue.wqs_wait_queue.wq_type + if int(wq_type) == 3: + psetoff = getfieldoffset('struct ipc_pset *', 'ips_messages') + pset = unsigned(ArgumentStringToInt(cmd_args[0])) - unsigned(psetoff) + print GetPortSetSummary.header + GetPortSetSummary(kern.GetValueFromAddress(pset, 'struct ipc_pset *')) + if int(wq_type) == 2: + portoff = getfieldoffset('struct ipc_port', 'ip_messages') + port = unsigned(ArgumentStringToInt(cmd_args[0])) - unsigned(portoff) + print GetPortSummary.header + GetPortSummary(kern.GetValueFromAddress(port, 'struct ipc_port *')) +# EndMacro: showmqueue + +# Macro: showpset +@lldb_command('showpset') +def ShowPSet(cmd_args=None): + """ Routine that prints details for a given ipc_pset * + Syntax: (lldb) showpset 0xaddr + """ + if not cmd_args: + print "Please specify the address of the pset whose details you want to print" + print ShowPSet.__doc__ + return + + print GetPortSetSummary.header + GetPortSetSummary(kern.GetValueFromAddress(cmd_args[0], 'ipc_pset *')) +# EndMacro: showpset + diff --git a/tools/lldbmacros/kdp.py b/tools/lldbmacros/kdp.py new file mode 100644 index 000000000..7c31630ea --- /dev/null +++ b/tools/lldbmacros/kdp.py @@ -0,0 +1,285 @@ +from xnu import * +from utils import * +import sys + +def GetKDPPacketHeaderInt(request=0, is_reply=False, seq=0, length=0, key=0): + """ create a 64 bit number that could be saved as pkt_hdr_t + params: + request:int - 7 bit kdp_req_t request type + is_reply:bool - False => request, True => reply + seq: int - 8 sequence number within session + length: int - 16 bit length of entire pkt including hdr + key: int - session key + returns: + int - 64 bit number to be saved in memory + """ + retval = request + if is_reply: + retval = 1<<7 |retval + retval = (seq << 8) | retval + retval = (length << 16) | retval + #retval = (retval << 32) | key + retval = (key << 32) | retval + return retval + + +def KDPDumpInfo(subcmd, file_name="", dest_ip="", router_ip="", port=0): + """ Setup the state for DUMP INFO commands for sending coredump etc + """ + if "kdp" != GetConnectionProtocol(): + print "Target is not connected over kdp. Nothing to do here." + return False + input_address = unsigned(addressof(kern.globals.manual_pkt.input)) + len_address = unsigned(addressof(kern.globals.manual_pkt.len)) + data_address = unsigned(addressof(kern.globals.manual_pkt.data)) + if not WriteInt32ToMemoryAddress(0, input_address): + return False + + kdp_pkt_size = GetType('kdp_dumpinfo_req_t').GetByteSize() + if not WriteInt32ToMemoryAddress(kdp_pkt_size, len_address): + return False + + data_addr = int(addressof(kern.globals.manual_pkt)) + pkt = kern.GetValueFromAddress(data_addr, 'kdp_dumpinfo_req_t *') + if len(file_name) > 49: + file_name = file_name[:49] + if len(dest_ip) > 15: + dest_ip = dest_ip[:15] + if len(router_ip) > 15: + router_ip = router_ip[:15] + + header_value =GetKDPPacketHeaderInt(request=GetEnumValue('kdp_req_t::KDP_DUMPINFO'), length=kdp_pkt_size) + # 0x1f is same as KDP_DUMPINFO + if ( WriteInt64ToMemoryAddress((header_value), int(addressof(pkt.hdr))) and + WriteInt32ToMemoryAddress(subcmd, int(addressof(pkt.type))) and + WriteStringToMemoryAddress(file_name, int(addressof(pkt.name))) and + WriteStringToMemoryAddress(dest_ip, int(addressof(pkt.destip))) and + WriteStringToMemoryAddress(router_ip, int(addressof(pkt.routerip))) + ): + #We have saved important data successfully + if port > 0: + if not WriteInt32ToMemoryAddress(port, int(addressof(pkt.port))): + return False + if WriteInt32ToMemoryAddress(1, input_address): + return True + return False + +@lldb_command('sendcore') +def KDPSendCore(cmd_args=None): + """ Configure kernel to send a coredump to the specified IP + Syntax: sendcore [filename] + Configure the kernel to transmit a kernel coredump to a server (kdumpd) + at the specified IP address. This is useful when the remote target has + not been previously configured to transmit coredumps, and you wish to + preserve kernel state for later examination. NOTE: You must issue a "continue" + command after using this macro to trigger the kernel coredump. The kernel + will resume waiting in the debugger after completion of the coredump. You + may disable coredumps by executing the "disablecore" macro. You can + optionally specify the filename to be used for the generated core file. + + """ + if cmd_args == None or len(cmd_args) < 1: + print KDPSendCore.__doc__ + return False + ip_address = cmd_args[0] + filename="" + if len(cmd_args) >=2: + filename = cmd_args[1].strip() + retval = KDPDumpInfo(GetEnumValue('kdp_dumpinfo_t::KDP_DUMPINFO_CORE'), file_name=filename, dest_ip=ip_address) + if retval: + print "Remote system has been setup for coredump. Please detach/continue the system. " + return True + else: + print "Something went wrong. Failed to setup the coredump on the target." + return False + + +@lldb_command('sendsyslog') +def KDPSendSyslog(cmd_args=None): + """ Configure kernel to send a system log to the specified IP + Syntax: sendsyslog [filename] + Configure the kernel to transmit a kernel system log to a server (kdumpd) + at the specified IP address. NOTE: You must issue a "continue" + command after using this macro to trigger the kernel system log. The kernel + will resume waiting in the debugger after completion. You can optionally + specify the name to be used for the generated system log. + """ + if cmd_args == None or len(cmd_args) < 1: + print KDPSendSyslog.__doc__ + return False + ip_address = cmd_args[0] + filename ="" + if len(cmd_args) >=2: + filename = cmd_args[1].strip() + retval = KDPDumpInfo(GetEnumValue('kdp_dumpinfo_t::KDP_DUMPINFO_SYSTEMLOG'), file_name = filename, dest_ip = ip_address) + if retval: + print "Remote system has been setup to send system log. please detach/continue the system." + return True + else: + print "Something went wrong. Failed to setup the systemlog on the target." + return False + +@lldb_command('sendpaniclog') +def KDPSendPaniclog(cmd_args=None): + """ Configure kernel to send a panic log to the specified IP + Syntax: sendpaniclog [filename] + Configure the kernel to transmit a kernel paniclog to a server (kdumpd) + at the specified IP address. NOTE: You must issue a "continue" + command after using this macro to trigger the kernel panic log. The kernel + will resume waiting in the debugger after completion. You can optionally + specify the name to be used for the generated panic log. + """ + if cmd_args == None or len(cmd_args) < 1: + print KDPSendPaniclog.__doc__ + return False + ip_address = cmd_args[0] + filename ="" + if len(cmd_args) >=2: + filename = cmd_args[1].strip() + retval = KDPDumpInfo(GetEnumValue('kdp_dumpinfo_t::KDP_DUMPINFO_PANICLOG'), file_name = filename, dest_ip = ip_address) + if retval: + print "Remote system has been setup to send panic log. please detach/continue the system." + return True + else: + print "Something went wrong. Failed to setup the paniclog on the target." + return False + + +@lldb_command('disablecore') +def KDPDisableCore(cmd_args=None): + """ Configure the kernel to disable coredump transmission + Reconfigures the kernel so that it no longer transmits kernel coredumps. This + complements the "sendcore" macro, but it may be used if the kernel has been + configured to transmit coredumps through boot-args as well. + + """ + retval = KDPDumpInfo(GetEnumValue('kdp_dumpinfo_t::KDP_DUMPINFO_DISABLE')) + if retval : + print "Disabled coredump functionality on remote system." + else: + print "Failed to disable coredump functionality." + return retval + +@lldb_command('resume_on') +def KDPResumeON(cmd_args=None): + """ The target system will resume when detaching or exiting from lldb. + This is the default behavior. + """ + subcmd = GetEnumValue('kdp_dumpinfo_t::KDP_DUMPINFO_SETINFO') | GetEnumValue('kdp_dumpinfo_t::KDP_DUMPINFO_RESUME') + retval = KDPDumpInfo(subcmd) + if retval : + print "Target system will resume on detaching from lldb." + else: + print "Failed to enable resume functionality." + return retval + +@lldb_command('resume_off') +def KDPResumeON(cmd_args=None): + """ The target system will not resume when detaching or exiting from lldb. + """ + subcmd = GetEnumValue('kdp_dumpinfo_t::KDP_DUMPINFO_SETINFO') | GetEnumValue('kdp_dumpinfo_t::KDP_DUMPINFO_NORESUME') + retval = KDPDumpInfo(subcmd) + if retval : + print "Target system will not resume on detaching from lldb." + else: + print "Failed to disable resume functionality." + return retval + + + +@lldb_command('getdumpinfo') +def KDPGetDumpInfo(cmd_args=None): + """ Retrieve the current remote dump settings. + """ + if not KDPDumpInfo(GetEnumValue('kdp_dumpinfo_t::KDP_DUMPINFO_GETINFO')): + print "Failed to get dump settings." + return False + dumpinfo = Cast(addressof(kern.globals.manual_pkt.data), 'kdp_dumpinfo_reply_t *') + target_dump_type = int(dumpinfo.type) + if target_dump_type & GetEnumValue('kdp_dumpinfo_t::KDP_DUMPINFO_REBOOT'): + print "System will reboot after kernel info gets dumped." + else: + print "System will not reboot after kernel info gets dumped." + if target_dump_type & GetEnumValue('kdp_dumpinfo_t::KDP_DUMPINFO_RESUME'): + print "System will allow a re-attach after KDP disconnect." + else: + print "System will not allow a re-attach after KDP disconnect." + target_dump_type = target_dump_type & GetEnumValue('kdp_dumpinfo_t::KDP_DUMPINFO_MASK') + if target_dump_type == GetEnumValue('kdp_dumpinfo_t::KDP_DUMPINFO_DISABLE'): + print "Kernel not setup for remote dumps." + else: + kern_dump_type = '' + if target_dump_type == GetEnumValue('kdp_dumpinfo_t::KDP_DUMPINFO_CORE'): + kern_dump_type = "Core File" + elif target_dump_type == GetEnumValue('kdp_dumpinfo_t::KDP_DUMPINFO_PANICLOG'): + kern_dump_type = "Panic Log" + elif target_dump_type == GetEnumValue('kdp_dumpinfo_t::KDP_DUMPINFO_SYSTEMLOG'): + kern_dump_type = "System Log" + print "Kernel dump type:" + kern_dump_type + fname = "(autogenerated)" + if int(dumpinfo.name[0]) != 0: + fname = str(dumpinfo.name) + print "Filename: " + fname + print "Network Info: {:s} [{:d}] , Router: {:s}".format(dumpinfo.destip, dumpinfo.port, dumpinfo.routerip) + # end of get dump info + + +@lldb_command('kdp-reenter') +def KDPReenter(cmd_args=None): + """ Schedules reentry into the debugger + after seconds, and resumes the target. + usage: kdp-reenter + """ + if len(cmd_args) < 1: + print "Please provide valid time in seconds" + print KDPReenter.__doc__ + return False + + if "kdp" != GetConnectionProtocol(): + print "Target is not connected over kdp. Nothing to do here." + return False + + num_seconds = ArgumentStringToInt(cmd_args[0]) + milliseconds_to_sleep = num_seconds * 1000 + if WriteInt32ToMemoryAddress(milliseconds_to_sleep, addressof(kern.globals.kdp_reentry_deadline)): + lldb.debugger.HandleCommand('process continue') + return True + print "Failed to setup kdp-reentry." + return False + +@lldb_command('kdp-reboot') +def KDPReboot(cmd_args=None): + """ Restart the remote target + """ + if "kdp" != GetConnectionProtocol(): + print "Target is not connected over kdp. Nothing to do here." + return False + + print "Rebooting the remote machine." + lldb.debugger.HandleCommand('process plugin packet send --command 0x13') + lldb.debugger.HandleCommand('detach') + return True + +@lldb_command('setdumpinfo') +def KDPSetDumpInfo(cmd_args=None): + """ Configure the current remote dump settings. + Specify "" if you want to use the defaults (filename) or previously configured + settings (ip/router). Specify 0 for the port if you wish to + use the previously configured/default setting for that. + Syntax: setdumpinfo + """ + if not cmd_args: + print KDPSetDumpInfo.__doc__ + return False + if len(cmd_args) < 4: + print "Not enough arguments." + print KDPSetDumpInfo.__doc__ + return False + portnum = ArgumentStringToInt(cmd_args[3]) + retval = KDPDumpInfo(GetEnumValue('kdp_dumpinfo_t::KDP_DUMPINFO_SETINFO'), cmd_args[0], cmd_args[1], cmd_args[2], portnum) + if retval: + print "Successfully saved the dumpinfo." + else: + print "Failed to save the dumpinfo." + return retval + diff --git a/tools/lldbmacros/mbufdefines.py b/tools/lldbmacros/mbufdefines.py new file mode 100644 index 000000000..66325463b --- /dev/null +++ b/tools/lldbmacros/mbufdefines.py @@ -0,0 +1,30 @@ +from xnu import * +from utils import * +import ctypes + +MBSHIFT = 20 +MSIZE = 256 +MCLBYTES = 2048 +M16KCLBYTES = 16384 +NBPG = 4096 + +MB_SCVALID = 4 + +MB_INUSE = 1 +MB_COMP_INUSE = 2 + +SLF_MAPPED = 0x0001 +SLF_PARTIAL = 0x0002 +SLF_DETACHED = 0x0004 + +INTP = ctypes.POINTER(ctypes.c_int) + +kgm_manual_pkt_ppc = 0x549C +kgm_manual_pkt_i386 = 0x249C +kgm_manual_pkt_x86_64 = 0xFFFFFF8000002930 +kgm_manual_pkt_arm = 0xFFFF04A0 +kgm_kdp_pkt_data_len = 128 + +MCF_NOCPUCACHE = 0x10 + +NSLABSPMB = 256 diff --git a/tools/lldbmacros/mbufs.py b/tools/lldbmacros/mbufs.py new file mode 100644 index 000000000..e379fda7d --- /dev/null +++ b/tools/lldbmacros/mbufs.py @@ -0,0 +1,778 @@ + +""" Please make sure you read the README COMPLETELY BEFORE reading anything below. + It is very critical that you read coding guidelines in Section E in README file. +""" + +from xnu import * +from utils import * + +from mbufdefines import * +import xnudefines + +# Macro: mbuf_stat +@lldb_command('mbuf_stat') +def MBufStat(cmd_args=None): + """ Print extended mbuf allocator statistics. + """ + hdr_format = "{0: <16s} {1: >8s} {2: >8s} {3: ^16s} {4: >8s} {5: >12s} {6: >8s} {7: >8s} {8: >8s}" + print hdr_format.format('class', 'total', 'cached', 'uncached', 'inuse', 'failed', 'waiter', 'notified', 'purge') + print hdr_format.format('name', 'objs', 'objs', 'objs/slabs', 'objs', 'alloc count', 'count', 'count', 'count') + print hdr_format.format('-'*16, '-'*8, '-'*8, '-'*16, '-'*8, '-'*12, '-'*8, '-'*8, '-'*8) + entry_format = "{0: <16s} {1: >8d} {2: >8d} {3:>7d} / {4:<6d} {5: >8d} {6: >12d} {7: >8d} {8: >8d} {9: >8d}" + num_items = sizeof(kern.globals.mbuf_table) / sizeof(kern.globals.mbuf_table[0]) + ncpus = int(kern.globals.ncpu) + for i in range(num_items): + mbuf = kern.globals.mbuf_table[i] + mcs = Cast(mbuf.mtbl_stats, 'mb_class_stat_t *') + mc = mbuf.mtbl_cache + total = 0 + total += int(mc.mc_full.bl_total) * int(mc.mc_cpu[0].cc_bktsize) + ccp_arr = mc.mc_cpu + for i in range(ncpus): + ccp = ccp_arr[i] + if int(ccp.cc_objs) > 0: + total += int(ccp.cc_objs) + if int(ccp.cc_pobjs) > 0: + total += int(ccp.cc_pobjs) + print entry_format.format(mcs.mbcl_cname, mcs.mbcl_total, total, + mcs.mbcl_infree, mcs.mbcl_slab_cnt, + (mcs.mbcl_total - total - mcs.mbcl_infree), + mcs.mbcl_fail_cnt, mbuf.mtbl_cache.mc_waiter_cnt, + mcs.mbcl_notified, mcs.mbcl_purge_cnt + ) +# EndMacro: mbuf_stat + +# Macro: mbuf_walkpkt +@lldb_command('mbuf_walkpkt') +def MbufWalkPacket(cmd_args=None): + """ Walk the mbuf packet chain (m_nextpkt) + """ + if (cmd_args == None or len(cmd_args) == 0): + print "Missing argument 0 in user function." + return + mp = kern.GetValueFromAddress(cmd_args[0], 'mbuf *') + cnt = 1 + tot = 0 + while (mp): + out_string = "" + mbuf_walk_packet_format = "{0:4d} 0x{1:x} [len {2:4d}, type {3:2d}, " + out_string += mbuf_walk_packet_format.format(cnt, mp, mp.m_hdr.mh_len, mp.m_hdr.mh_type) + if (kern.globals.mclaudit != 0): + out_string += GetMbufBuf2Mca(mp) + ", " + tot = tot + mp.m_hdr.mh_len + out_string += "total " + str(tot) + "]" + print out_string + mp = mp.m_hdr.mh_nextpkt + cnt += 1 +# EndMacro: mbuf_walkpkt + +# Macro: mbuf_walk +@lldb_command('mbuf_walk') +def MbufWalk(cmd_args=None): + """ Walk the mbuf chain (m_next) + """ + mp = kern.GetValueFromAddress(cmd_args[0], 'mbuf *') + cnt = 1 + tot = 0 + while (mp): + out_string = "" + mbuf_walk_format = "{0:4d} 0x{1:x} [len {2:4d}, type {3:2d}, " + out_string += mbuf_walk_format.format(cnt, mp, mp.m_hdr.mh_len, mp.m_hdr.mh_type) + if (kern.globals.mclaudit != 0): + out_string += GetMbufBuf2Mca(mp) + ", " + tot = tot + mp.m_hdr.mh_len + out_string += "total " + str(tot) + "]" + print out_string + mp = mp.m_hdr.mh_next + cnt += 1 +# EndMacro: mbuf_walk + +# Macro: mbuf_buf2slab +@lldb_command('mbuf_buf2slab') +def MbufBuf2Slab(cmd_args=None): + """ Given an mbuf object, find its corresponding slab address + """ + if (cmd_args == None or len(cmd_args) == 0): + print "Missing argument 0 in user function." + return + m = kern.GetValueFromAddress(cmd_args[0], 'mbuf *') + gix = (m - Cast(kern.globals.mbutl, 'char *')) >> MBSHIFT + slabstbl = kern.globals.slabstbl + ix = (m - Cast(slabstbl[int(gix)].slg_slab[0].sl_base, 'char *')) >> 12 + slab = addressof(slabstbl[int(gix)].slg_slab[int(ix)]) + if (kern.ptrsize == 8): + mbuf_slab_format = "0x{0:<16x}" + print mbuf_slab_format.format(slab) + else: + mbuf_slab_format = "0x{0:<8x}" + print mbuf_slab_format.format(slab) +# EndMacro: mbuf_buf2slab + +# Macro: mbuf_buf2mca +@lldb_command('mbuf_buf2mca') +def MbufBuf2Mca(cmd_args=None): + """ Find the mcache audit structure of the corresponding mbuf + """ + m = kern.GetValueFromAddress(cmd_args[0], 'mbuf *') + print GetMbufBuf2Mca(m) + return +# EndMacro: mbuf_buf2mca + +# Macro: mbuf_slabs +@lldb_command('mbuf_slabs') +def MbufSlabs(cmd_args=None): + """ Print all slabs in the group + """ + out_string = "" + slg = kern.GetValueFromAddress(cmd_args[0], 'mcl_slabg_t *') + x = 0 + + if (kern.ptrsize == 8): + slabs_string_format = "{0:>4d}: 0x{1:16x} 0x{2:16x} 0x{3:16x} {4:4s} {5:20d} {6:3d} {7:3d} {8:3d} {9:3d} {10:>6s} " + out_string += "slot slab next obj mca tstamp C R N size flags\n" + out_string += "---- ------------------ ------------------ ------------------ ------------------ ---------- -- -- -- ------ -----\n" + else: + slabs_string_format = "{0:>4d}: 0x{1:8x} 0x{2:8x} 0x{3:8x} {4:4s} {5:20d} {6:3d} {7:3d} {8:3d} {9:3d} {10:>6s} " + out_string += "slot slab next obj mca tstamp C R N size flags\n" + out_string += "---- ---------- ---------- ---------- ---------- ---------- -- -- -- ------ -----\n" + + mbutl = cast(kern.globals.mbutl, 'union mbigcluster *') + while x < NSLABSPMB: + sl = addressof(slg.slg_slab[x]) + mca = 0 + obj = sl.sl_base + ts = 0 + + if (kern.globals.mclaudit != 0): + ix = (obj - Cast(kern.globals.mbutl, 'char *')) >> 12 + clbase = mbutl + (sizeof(dereference(mbutl)) * ix) + mclidx = (obj - clbase) >> 8 + mca = kern.globals.mclaudit[int(ix)].cl_audit[int(mclidx)] + ts = mca.mca_tstamp + + out_string += slabs_string_format.format((x + 1), sl, sl.sl_next, obj, hex(mca), int(ts), int(sl.sl_class), int(sl.sl_refcnt), int(sl.sl_chunks), int(sl.sl_len), hex(sl.sl_flags)) + + if (sl.sl_flags != 0): + out_string += "<" + if sl.sl_flags & SLF_MAPPED: + out_string += "mapped" + if sl.sl_flags & SLF_PARTIAL: + out_string += ",partial" + if sl.sl_flags & SLF_DETACHED: + out_string += ",detached" + out_string += ">" + out_string += "\n" + + if sl.sl_chunks > 1: + z = 1 + c = sl.sl_len/sl.sl_chunks + + while z < sl.sl_chunks: + obj = sl.sl_base + (c * z) + mca = 0 + ts = 0 + + if (kern.globals.mclaudit != 0): + ix = (obj - Cast(kern.globals.mbutl, 'char *')) >> 12 + clbase = mbutl + (sizeof(dereference(mbutl)) * ix) + mclidx = (obj - clbase) >> 8 + mca = kern.globals.mclaudit[int(ix)].cl_audit[int(mclidx)] + ts = mca.mca_tstamp + + if (kern.ptrsize == 8): + out_string += " " + hex(obj) + " " + hex(mca) + " " + str(unsigned(ts)) + "\n" + else: + out_string += " " + hex(obj) + " " + hex(mca) + " " + str(unsigned(ts)) + "\n" + + z += 1 + x += 1 + print out_string +# EndMacro: mbuf_slabs + +# Macro: mbuf_slabstbl +@lldb_command('mbuf_slabstbl') +def MbufSlabsTbl(cmd_args=None): + """ Print slabs table + """ + out_string = "" + x = 0 + + if (kern.ptrsize == 8): + out_string += "slot slabg slabs range\n" + out_string += "---- ------------------ -------------------------------------------\n" + else: + out_string += "slot slabg slabs range\n" + out_string += "---- ---------- ---------------------------\n" + + slabstbl = kern.globals.slabstbl + slabs_table_blank_string_format = "{0:>3d}: - \n" + while (x < unsigned(kern.globals.maxslabgrp)): + slg = slabstbl[x] + if (slg == 0): + out_string += slabs_table_blank_string_format.format(x+1) + else: + if (kern.ptrsize == 8): + slabs_table_string_format = "{0:>3d}: 0x{1:16x} [ 0x{2:16x} - 0x{3:16x} ]\n" + out_string += slabs_table_string_format.format(x+1, slg, addressof(slg.slg_slab[0]), addressof(slg.slg_slab[NSLABSPMB-1])) + else: + slabs_table_string_format = "{0:>3d}: 0x{1:8x} [ 0x{2:8x} - 0x{3:8x} ]\n" + out_string += slabs_table_string_format.format(x+1, slg, addressof(slg.slg_slab[0]), addressof(slg.slg_slab[NSLABSPMB-1])) + + x += 1 + print out_string +# EndMacro: mbuf_slabstbl + + +def GetMbufBuf2Mca(m): + ix = (m - Cast(kern.globals.mbutl, 'char *')) >> 12 + #mbutl = Cast(kern.globals.mbutl, 'union mbigcluster *') + mbutl = cast(kern.globals.mbutl, 'union mbigcluster *') + clbase = mbutl + (sizeof(dereference(mbutl)) * ix) + mclidx = (m - clbase) >> 8 + mca = kern.globals.mclaudit[int(ix)].cl_audit[int(mclidx)] + return str(mca) + +def GetMbufWalkAllSlabs(show_a, show_f, show_tr): + out_string = "" + + kern.globals.slabstbl[0] + + x = 0 + total = 0 + total_a = 0 + total_f = 0 + + if (show_a and not(show_f)): + out_string += "Searching only for active... \n" + if (not(show_a) and show_f): + out_string += "Searching only for inactive... \n" + if (show_a and show_f): + out_string += "Displaying all... \n" + + if (kern.ptrsize == 8): + show_mca_string_format = "{0:>4s} {1:>4s} {2:>16s} {3:>16s} {4:>16} {5:>12s} {6:12s}" + out_string += show_mca_string_format.format("slot", "idx", "slab address", "mca address", "obj address", "type", "allocation state\n") + else: + show_mca_string_format = "{0:4s} {1:4s} {2:8s} {3:8s} {4:8} {5:12s} {6:12s}" + out_string += show_mca_string_format.format("slot", "idx", "slab address", "mca address", "obj address", "type", "allocation state\n") + + while (x < unsigned(kern.globals.slabgrp)): + slg = kern.globals.slabstbl[x] + y = 0 + stop = 0 + while ((y < NSLABSPMB) and (stop == 0)): + sl = addressof(slg.slg_slab[y]) + base = sl.sl_base + mbutl = cast(kern.globals.mbutl, 'union mbigcluster *') + ix = (base - mbutl) >> 12 + clbase = mbutl + (sizeof(dereference(mbutl)) * ix) + mclidx = (base - clbase) >> 8 + mca = kern.globals.mclaudit[int(ix)].cl_audit[int(mclidx)] + first = 1 + + while ((Cast(mca, 'int') != 0) and (unsigned(mca.mca_addr) != 0)): + printmca = 0 + if (mca.mca_uflags & (MB_INUSE|MB_COMP_INUSE)): + total_a = total_a + 1 + printmca = show_a + else: + total_f = total_f + 1 + printmca = show_f + + if (printmca != 0): + if (first == 1): + if (kern.ptrsize == 8): + mca_string_format = "{0:4d} {1:4d} 0x{2:16x} " + out_string += mca_string_format.format(x, y, sl) + else: + mca_string_format = "{0:4d} {1:4d} 0x{02:8x} " + out_string += mca_string_format.format(x, y, sl) + else: + if (kern.ptrsize == 8): + out_string += " " + else: + out_string += " " + + if (kern.ptrsize == 8): + mca_string_format = "0x{0:16x} 0x{1:16x}" + out_string += mca_string_format.format(mca, mca.mca_addr) + else: + mca_string_format = "0x{0:8x} 0x{1:8x}" + out_string += mca_string_format.format(mca, mca.mca_addr) + + out_string += GetMbufMcaCtype(mca, 0) + + if (mca.mca_uflags & (MB_INUSE|MB_COMP_INUSE)): + out_string += "active " + else: + out_string += " freed " + if (first == 1): + first = 0 + out_string += "\n" + total = total + 1 + + if (show_tr != 0): + out_string += "Recent transaction for this buffer (thread: 0x" + hex(mca.mca_thread) + "):\n" + cnt = 0 + while (cnt < mca.mca_depth): + kgm_pc = mca.mca_stack[int(cnt)] + out_string += str(int(cnt) + 1) + " " + out_string += GetPc(kgm_pc) + cnt += 1 + + mca = mca.mca_next + + y += 1 + if (slg.slg_slab[int(y)].sl_base == 0): + stop = 1 + x += 1 + + if (total and show_a and show_f): + out_string += "total objects = " + str(int(total)) + "\n" + out_string += "active/unfreed objects = " + str(int(total_a)) + "\n" + out_string += "freed/in_cache objects = " + str(int(total_f)) + "\n" + + return out_string + +def GetMbufMcaCtype(mca, vopt): + cp = mca.mca_cache + mca_class = unsigned(cp.mc_private) + csize = kern.globals.mbuf_table[mca_class].mtbl_stats.mbcl_size + done = 0 + out_string = " " + if (csize == MSIZE): + if (vopt): + out_string += "M (mbuf) " + else: + out_string += "M " + return out_string + if (csize == MCLBYTES): + if (vopt): + out_string += "CL (2K cluster) " + else: + out_string += "CL " + return out_string + if (csize == NBPG): + if (vopt): + out_string += "BCL (4K cluster) " + else: + out_string += "BCL " + return out_string + if (csize == M16KCLBYTES): + if (vopt): + out_string += "JCL (16K cluster) " + else: + out_string += "JCL " + return out_string + + if (csize == (MSIZE + MCLBYTES)): + if (mca.mca_uflags & MB_SCVALID): + if (mca.mca_uptr): + out_string += "M+CL " + if vopt: + out_string += "(paired mbuf, 2K cluster) " + else: + out_string += "M-CL " + if vopt: + out_string += "(unpaired mbuf, 2K cluster) " + else: + if (mca.mca_uptr): + out_string += "CL+M " + if vopt: + out_string += "(paired 2K cluster, mbuf) " + else: + out_string += "CL-M " + if vopt: + out_string += "(unpaired 2K cluster, mbuf) " + return out_string + + if (csize == (MSIZE + NBPG)): + if (mca.mca_uflags & MB_SCVALID): + if (mca.mca_uptr): + out_string += "M+BCL " + if vopt: + out_string += "(paired mbuf, 4K cluster) " + else: + out_string += "M-BCL " + if vopt: + out_string += "(unpaired mbuf, 4K cluster) " + else: + if (mca.mca_uptr): + out_string += "BCL+M " + if vopt: + out_string += "(paired 4K cluster, mbuf) " + else: + out_string += "BCL-m " + if vopt: + out_string += "(unpaired 4K cluster, mbuf) " + return out_string + + if (csize == (MSIZE + M16KCLBYTES)): + if (mca.mca_uflags & MB_SCVALID): + if (mca.mca_uptr): + out_string += "M+BCL " + if vopt: + out_string += "(paired mbuf, 4K cluster) " + else: + out_string += "M-BCL " + if vopt: + out_string += "(unpaired mbuf, 4K cluster) " + else: + if (mca.mca_uptr): + out_string += "BCL+M " + if vopt: + out_string += "(paired 4K cluster, mbuf) " + else: + out_string += "BCL-m " + if vopt: + out_string += "(unpaired 4K cluster, mbuf) " + return out_string + + out_string += "unknown: " + cp.mc_name + return out_string + +kgm_pkmod = 0 +kgm_pkmodst = 0 +kgm_pkmoden = 0 + +def GetPointerAsString(kgm_pc): + if (kern.ptrsize == 8): + pointer_format_string = "0x{0:<16x} " + else: + pointer_format_string = "0x{0:<8x} " + return pointer_format_string.format(kgm_pc) + +def GetKmodAddrIntAsString(kgm_pc): + global kgm_pkmod + global kgm_pkmodst + global kgm_pkmoden + + out_string = "" + mh_execute_addr = int(lldb_run_command('p/x (uintptr_t *)&_mh_execute_header').split('=')[-1].strip(), 16) + + out_string += GetPointerAsString(kgm_pc) + if ((unsigned(kgm_pc) >= unsigned(kgm_pkmodst)) and (unsigned(kgm_pc) < unsigned(kgm_pkmoden))): + kgm_off = kgm_pc - kgm_pkmodst + out_string += "<" + str(Cast(kgm_pkmod, 'kmod_info_t *').name) + " + 0x" + str(kgm_off) + ">" + else: + kgm_kmodp = kern.globals.kmod + if ((kern.arch == 'x86_64') and (long(kgm_pc) >= long(mh_execute_addr))): + kgm_kmodp = 0 + + while kgm_kmodp: + kgm_off = unsigned((kgm_pc - kgm_kmodp.address) & 0x00000000ffffffff) + if ((long(kgm_kmodp.address) <= long(kgm_pc)) and (kgm_off) < unsigned(kgm_kmodp.size)): + kgm_pkmod = kgm_kmodp + kgm_pkmodst = unsigned(kgm_kmodp.address) + kgm_pkmoden = unsigned(kgm_pkmodst + kgm_kmodp.size) + kgm_kmodp = 0 + else: + kgm_kmodp = kgm_kmodp.next + return out_string + +def GetPc(kgm_pc): + out_string = "" + mh_execute_addr = int(lldb_run_command('p/x (uintptr_t *)&_mh_execute_header').split('=')[-1].strip(), 16) + if (unsigned(kgm_pc) < unsigned(mh_execute_addr) or + unsigned(kgm_pc) >= unsigned(kern.globals.vm_kernel_top)): + out_string += GetKmodAddrIntAsString(kgm_pc) + else: + out_string += GetSourceInformationForAddress(int(kgm_pc)) + return out_string + "\n" + + +# Macro: mbuf_showactive +@lldb_command('mbuf_showactive') +def MbufShowActive(cmd_args=None): + """ Print all active/in-use mbuf objects + """ + if cmd_args != None and len(cmd_args) > 0 : + print GetMbufWalkAllSlabs(1, 0, cmd_args[0]) + else: + print GetMbufWalkAllSlabs(1, 0, 0) +# EndMacro: mbuf_showactive + + +# Macro: mbuf_showinactive +@lldb_command('mbuf_showinactive') +def MbufShowInactive(cmd_args=None): + """ Print all freed/in-cache mbuf objects + """ + print GetMbufWalkAllSlabs(0, 1, 0) +# EndMacro: mbuf_showinactive + + +# Macro: mbuf_showmca +@lldb_command('mbuf_showmca') +def MbufShowMca(cmd_args=None): + """ Print the contents of an mbuf mcache audit structure + """ + out_string = "" + if cmd_args != None and len(cmd_args) > 0 : + mca = kern.GetValueFromAddress(cmd_args[0], 'mcache_audit_t *') + cp = mca.mca_cache + out_string += "object type:\t" + out_string += GetMbufMcaCtype(mca, 1) + out_string += "\nControlling mcache :\t" + hex(mca.mca_cache) + " (" + str(cp.mc_name) + ")\n" + if (mca.mca_uflags & MB_SCVALID): + mbutl = cast(kern.globals.mbutl, 'union mbigcluster *') + ix = (mca.mca_addr - mbutl) >> 12 + clbase = mbutl + (sizeof(dereference(mbutl)) * ix) + mclidx = (mca.mca_addr - clbase) >> 8 + out_string += "mbuf obj :\t\t" + hex(mca.mca_addr) + "\n" + out_string += "mbuf index :\t\t" + str(mclidx + 1) + " (out of 16) in cluster base " + hex(clbase) + "\n" + if (int(mca.mca_uptr) != 0): + peer_mca = cast(mca.mca_uptr, 'mcache_audit_t *') + out_string += "paired cluster obj :\t" + hex(peer_mca.mca_addr) + " (mca " + hex(peer_mca) + ")\n" + out_string += "saved contents :\t" + hex(mca.mca_contents) + " (" + str(int(mca.mca_contents_size)) + " bytes)\n" + else: + out_string += "cluster obj :\t\t" + hex(mca.mca_addr) + "\n" + if (mca.mca_uptr != 0): + peer_mca = cast(mca.mca_uptr, 'mcache_audit_t *') + out_string += "paired mbuf obj :\t" + hex(peer_mca.mca_addr) + " (mca " + hex(peer_mca) + ")\n" + + out_string += "Recent transaction (tstamp " + str(unsigned(mca.mca_tstamp)) + ", thread " + hex(mca.mca_thread) + ") :\n" + cnt = 0 + while (cnt < mca.mca_depth): + kgm_pc = mca.mca_stack[cnt] + out_string += " " + str(cnt + 1) + ". " + out_string += GetPc(kgm_pc) + cnt += 1 + + if (mca.mca_pdepth > 0): + out_string += "previous transaction (tstamp " + str(unsigned(mca.mca_ptstamp)) + ", thread " + hex(mca.mca_pthread) + "):\n" + cnt = 0 + + while (cnt < mca.mca_pdepth): + kgm_pc = mca.mca_pstack[cnt] + out_string += " " + str(cnt + 1) + ". " + out_string += GetPc(kgm_pc) + cnt += 1 + + if (mca.mca_uflags & MB_SCVALID): + msc = cast(mca.mca_contents, 'mcl_saved_contents_t *') + msa = addressof(msc.sc_scratch) + if (msa.msa_depth > 0): + out_string += "Recent scratch transaction (tstamp " + str(unsigned(msa.msa_tstamp)) + ", thread " + hex(msa.msa_thread) + ") :\n" + cnt = 0 + while (cnt < msa.msa_depth): + kgm_pc = msa.msa_stack[cnt] + out_string += " " + str(cnt + 1) + ". " + out_string += GetPc(kgm_pc) + cnt += 1 + + if (msa.msa_pdepth > 0): + out_string += "previous scratch transaction (tstamp " + msa.msa_ptstamp + ", thread " + msa.msa_pthread + "):\n" + cnt = 0 + while (cnt < msa.msa_pdepth): + kgm_pc = msa.msa_pstack[cnt] + out_string += " " + str(cnt + 1) + ". " + out_string += GetPc(kgm_pc) + cnt += 1 + else : + out_string += "Missing argument 0 in user function." + + print out_string +# EndMacro: mbuf_showmca + + +# Macro: mbuf_showall +@lldb_command('mbuf_showall') +def MbufShowAll(cmd_args=None): + """ Print all mbuf objects + """ + print GetMbufWalkAllSlabs(1, 1, 0) +# EndMacro: mbuf_showall + +# Macro: mbuf_countchain +@lldb_command('mbuf_countchain') +def MbufCountChain(cmd_args=None): + """ Count the length of an mbuf chain + """ + if (cmd_args == None or len(cmd_args) == 0): + print "Missing argument 0 in user function." + return + mp = kern.GetValueFromAddress(cmd_args[0], 'mbuf *') + + pkt = 0 + nxt = 0 + + while (mp): + pkt = pkt + 1 + mn = mp.m_hdr.mh_next + while (mn): + nxt = nxt + 1 + mn = mn.m_hdr.mh_next + + mp = mp.m_hdr.mh_nextpkt + + if (((pkt + nxt) % 50) == 0): + print " ..." + str(pkt_nxt) + + print "Total: " + str(pkt + nxt) + " (via m_next: " + str(nxt) + ")" +# EndMacro: mbuf_countchain + + + +# Macro: mbuf_topleak +@lldb_command('mbuf_topleak') +def MbufTopLeak(cmd_args=None): + """ Print the top suspected mbuf leakers + """ + topcnt = 0 + if (int(len(cmd_args)) > 0 and int(cmd_args[0]) < 5): + maxcnt = cmd_args[0] + else: + maxcnt = 5 + while (topcnt < maxcnt): + print GetMbufTraceLeak(kern.globals.mleak_top_trace[topcnt]) + topcnt += 1 + +# EndMacro: mbuf_topleak + +def GetMbufTraceLeak(trace): + out_string = "" + if (trace.allocs != 0): + out_string += hex(trace) + ":" + str(trace.allocs) + " outstanding allocs\n" + out_string += "Backtrace saved " + str(trace.depth) + " deep\n" + if (trace.depth != 0): + cnt = 0 + while (cnt < trace.depth): + out_string += str(cnt + 1) + ": " + out_string += GetPc(trace.addr[cnt]) + out_string += "\n" + cnt += 1 + return out_string + + +# Macro: mbuf_traceleak +@lldb_command('mbuf_traceleak') +def MbufTraceLeak(cmd_args=None): + """ Print the leak information for a given leak address + Given an mbuf leak trace (mtrace) structure address, print out the + stored information with that trace + syntax: (lldb) mbuf_traceleak + """ + if (cmd_args == None or len(cmd_args) == 0): + print "Missing argument 0 in user function." + return + trace = kern.GetValueFromAddress(cmd_args[0], 'mtrace *') + print GetMbufTraceLeak(trace) +# EndMacro: mbuf_traceleak + + +# Macro: mcache_walkobj +@lldb_command('mcache_walkobj') +def McacheWalkObject(cmd_args=None): + """ Given a mcache object address, walk its obj_next pointer + """ + if (cmd_args == None or len(cmd_args) == 0): + print "Missing argument 0 in user function." + return + out_string = "" + p = kern.GetValueFromAddress(cmd_args[0], 'mcache_obj_t *') + cnt = 1 + total = 0 + while (p): + mcache_object_format = "{0:>4d}: 0x{1:>16x}" + out_string += mcache_object_format.format(cnt, p) + "\n" + p = p.obj_next + cnt += 1 + print out_string +# EndMacro: mcache_walkobj + +# Macro: mcache_stat +@lldb_command('mcache_stat') +def McacheStat(cmd_args=None): + """ Print all mcaches in the system. + """ + head = kern.globals.mcache_head + out_string = "" + mc = cast(head.lh_first, 'mcache *') + if (kern.ptrsize == 8): + mcache_stat_format_string = "{0:<24s} {1:>8s} {2:>20s} {3:>5s} {4:>5s} {5:>20s} {6:>30s} {7:>18s}" + else: + mcache_stat_format_string = "{0:<24s} {1:>8s} {2:>12s} {3:>5s} {4:>5s} {5:>12s} {6:>30s} {7:>18s}" + + if (kern.ptrsize == 8): + mcache_stat_data_format_string = "{0:<24s} {1:>12s} {2:>20s} {3:>5s} {4:>5s} {5:>22s} {6:>12d} {7:>8d} {8:>8d} {9:>18d}" + else: + mcache_stat_data_format_string = "{0:<24s} {1:>12s} {2:>12s} {3:>5s} {4:>5s} {5:>14s} {6:>12d} {7:>8d} {8:>8d} {9:>18d}" + + out_string += mcache_stat_format_string.format("cache name", "cache state" , "cache addr", "buf size", "buf align", "backing zone", "wait nowait failed", "bufs incache") + out_string += "\n" + + ncpu = int(kern.globals.ncpu) + while mc != 0: + bktsize = mc.mc_cpu[0].cc_bktsize + cache_state = "" + if (mc.mc_flags & MCF_NOCPUCACHE): + cache_state = "disabled" + else: + if (bktsize == 0): + cache_state = " offline" + else: + cache_state = " online" + if (mc.mc_slab_zone != 0): + backing_zone = mc.mc_slab_zone + else: + if (kern.ptrsize == 8): + backing_zone = " custom" + else: + backing_zone = " custom" + + total = 0 + total += mc.mc_full.bl_total * bktsize + n = 0 + while(n < ncpu): + ccp = mc.mc_cpu[n] + if (ccp.cc_objs > 0): + total += ccp.cc_objs + if (ccp.cc_pobjs > 0): + total += ccp.cc_pobjs + n += 1 + ccp += 1 + + out_string += mcache_stat_data_format_string.format(mc.mc_name, cache_state, hex(mc), str(int(mc.mc_bufsize)), str(int(mc.mc_align)), hex(mc.mc_slab_zone), int(mc.mc_wretry_cnt), int(mc.mc_nwretry_cnt), int(mc.mc_nwfail_cnt), total) + out_string += "\n" + mc = cast(mc.mc_list.le_next, 'mcache *') + print out_string +# EndMacro: mcache_stat + +# Macro: mcache_showcache +@lldb_command('mcache_showcache') +def McacheShowCache(cmd_args=None): + """Display the number of objects in cache. + """ + out_string = "" + cp = kern.GetValueFromAddress(cmd_args[0], 'mcache_t *') + bktsize = cp.mc_cpu[0].cc_bktsize + cnt = 0 + total = 0 + mcache_cache_format = "{0:<4d} {1:>8d} {2:>8d} {3:>8d}" + out_string += "Showing cache " + str(cp.mc_name) + " :\n\n" + out_string += " CPU cc_objs cc_pobjs total\n" + out_string += "---- ------- -------- --------\n" + ncpu = int(kern.globals.ncpu) + while (cnt < ncpu): + ccp = cp.mc_cpu[cnt] + objs = ccp.cc_objs + if (objs <= 0): + objs = 0 + pobjs = ccp.cc_pobjs + if (pobjs <= 0): + pobjs = 0 + tot_cpu = objs + pobjs + total += tot_cpu + out_string += mcache_cache_format.format(cnt, objs, pobjs, tot_cpu) + out_string += "\n" + cnt += 1 + + out_string += " ========\n" + out_string += " " + str(total) + "\n\n" + total += cp.mc_full.bl_total * bktsize + + out_string += "Total # of full buckets (" + str(int(bktsize)) + " objs/bkt):\t" + str(int(cp.mc_full.bl_total)) +"\n" + out_string += "Total # of objects cached:\t\t" + str(total) + "\n" + print out_string +# EndMacro: mcache_showcache diff --git a/tools/lldbmacros/memory.py b/tools/lldbmacros/memory.py new file mode 100644 index 000000000..d3cbeb3e3 --- /dev/null +++ b/tools/lldbmacros/memory.py @@ -0,0 +1,1911 @@ + +""" Please make sure you read the README file COMPLETELY BEFORE reading anything below. + It is very critical that you read coding guidelines in Section E in README file. +""" +from xnu import * +import sys, shlex +from utils import * +import xnudefines +from process import * + +# Macro: memstats +@lldb_command('memstats') +def Memstats(cmd_args=None): + """ Prints out a summary of various memory statistics. In particular vm_page_wire_count should be greater than 2K or you are under memory pressure. + """ + try: + print "memorystatus_level: {: >10d}".format(kern.globals.memorystatus_level) + except ValueError: + pass + try: + print "memorystatus_available_pages: {: >10d}".format(kern.globals.memorystatus_available_pages) + except ValueError: + pass + print "vm_page_throttled_count: {: >10d}".format(kern.globals.vm_page_throttled_count) + print "vm_page_active_count: {: >10d}".format(kern.globals.vm_page_active_count) + print "vm_page_inactive_count: {: >10d}".format(kern.globals.vm_page_inactive_count) + print "vm_page_wire_count: {: >10d}".format(kern.globals.vm_page_wire_count) + print "vm_page_free_count: {: >10d}".format(kern.globals.vm_page_free_count) + print "vm_page_purgeable_count: {: >10d}".format(kern.globals.vm_page_purgeable_count) + print "vm_page_inactive_target: {: >10d}".format(kern.globals.vm_page_inactive_target) + print "vm_page_free_target: {: >10d}".format(kern.globals.vm_page_free_target) + print "inuse_ptepages_count: {: >10d}".format(kern.globals.inuse_ptepages_count) + print "vm_page_free_reserved: {: >10d}".format(kern.globals.vm_page_free_reserved) + +@xnudebug_test('test_memstats') +def TestMemstats(kernel_target, config, lldb_obj, isConnected ): + """ Test the functionality of memstats command + returns + - False on failure + - True on success + """ + if not isConnected: + print "Target is not connected. Cannot test memstats" + return False + res = lldb.SBCommandReturnObject() + lldb_obj.debugger.GetCommandInterpreter().HandleCommand("memstats", res) + result = res.GetOutput() + if result.split(":")[1].strip().find('None') == -1 : + return True + else: + return False + +# EndMacro: memstats + +# Macro: showmemorystatus +def CalculateLedgerPeak(phys_footprint_entry): + """ Internal function to calculate ledger peak value for the given phys footprint entry + params: phys_footprint_entry - value representing struct ledger_entry * + return: value - representing the ledger peak for the given phys footprint entry + """ + now = kern.globals.sched_tick / 20 + ledger_peak = phys_footprint_entry.le_credit - phys_footprint_entry.le_debit + if (now - phys_footprint_entry._le.le_peaks[0].le_time <= 1) and (phys_footprint_entry._le.le_peaks[0].le_max > ledger_peak): + ledger_peak = phys_footprint_entry._le.le_peaks[0].le_max + if (now - phys_footprint_entry._le.le_peaks[1].le_time <= 1) and (phys_footprint_entry._le.le_peaks[1].le_max > ledger_peak): + ledger_peak = phys_footprint_entry._le.le_peaks[1].le_max + return ledger_peak + +@header("{: >8s} {: >22s} {: >22s} {: >11s} {: >11s} {: >12s} {: >10s} {: >13s} {: ^10s} {: >8s} {: <20s}\n".format( +'pid', 'effective priority', 'requested priority', 'state', 'user_data', 'physical', 'iokit', 'footprint', +'spike', 'limit', 'command')) +def GetMemoryStatusNode(proc_val): + """ Internal function to get memorystatus information from the given proc + params: proc - value representing struct proc * + return: str - formatted output information for proc object + """ + out_str = '' + task_val = Cast(proc_val.task, 'task *') + task_ledgerp = task_val.ledger + + task_physmem_footprint_ledger_entry = task_ledgerp.l_entries[kern.globals.task_ledgers.phys_mem] + task_iokit_footprint_ledger_entry = task_ledgerp.l_entries[kern.globals.task_ledgers.iokit_mem] + task_phys_footprint_ledger_entry = task_ledgerp.l_entries[kern.globals.task_ledgers.phys_footprint] + page_size = kern.globals.page_size + + phys_mem_footprint = (task_physmem_footprint_ledger_entry.le_credit - task_physmem_footprint_ledger_entry.le_debit) / page_size + iokit_footprint = (task_iokit_footprint_ledger_entry.le_credit - task_iokit_footprint_ledger_entry.le_debit) / page_size + phys_footprint = (task_phys_footprint_ledger_entry.le_credit - task_phys_footprint_ledger_entry.le_debit) / page_size + phys_footprint_limit = task_phys_footprint_ledger_entry.le_limit / page_size + ledger_peak = CalculateLedgerPeak(task_phys_footprint_ledger_entry) + phys_footprint_spike = ledger_peak / page_size + + format_string = '{0: >8d} {1: >22d} {2: >22d} {3: #011x} {4: #011x} {5: >12d} {6: >10d} {7: >13d}' + out_str += format_string.format(proc_val.p_pid, proc_val.p_memstat_effectivepriority, + proc_val.p_memstat_requestedpriority, proc_val.p_memstat_state, proc_val.p_memstat_userdata, + phys_mem_footprint, iokit_footprint, phys_footprint) + if phys_footprint != phys_footprint_spike: + out_str += "{: ^12d}".format(phys_footprint_spike) + else: + out_str += "{: ^12s}".format('-') + out_str += "{: 8d} {: <20s}\n".format(phys_footprint_limit, proc_val.p_comm) + return out_str + +@lldb_command('showmemorystatus') +def ShowMemoryStatus(cmd_args=None): + """ Routine to display each entry in jetsam list with a summary of pressure statistics + Usage: showmemorystatus + """ + bucket_index = 0 + bucket_count = 20 + print GetMemoryStatusNode.header + print "{: >91s} {: >10s} {: >13s} {: ^10s} {: >8s}\n".format("(pages)", "(pages)", "(pages)", + "(pages)", "(pages)") + while bucket_index < bucket_count: + current_bucket = kern.globals.memstat_bucket[bucket_index] + current_list = current_bucket.list + current_proc = Cast(current_list.tqh_first, 'proc *') + while unsigned(current_proc) != 0: + print GetMemoryStatusNode(current_proc) + current_proc = current_proc.p_memstat_list.tqe_next + bucket_index += 1 + print "\n\n" + Memstats() + +# EndMacro: showmemorystatus + +# Macro: zprint + +@lldb_type_summary(['zone','zone_t']) +@header("{:^18s} {:>10s} {:>10s} {:>10s} {:>10s} {:>10s} {:>10s} {:>10s}({:>6s} {:>6s} {:>6s}) {:^14s} {:<20s}".format( +'ZONE', 'TOT_SZ', 'PAGE_COUNT', 'ALLOC_ELTS', 'FREE_ELTS', 'FREE_SZ', 'ELT_SZ', 'ALLOC', 'ELTS', 'PGS', 'SLK', 'FLAGS', 'NAME')) +def GetZoneSummary(zone): + """ Summarize a zone with important information. See help zprint for description of each field + params: + zone: value - obj representing a zone in kernel + returns: + str - summary of the zone + """ + out_string = "" + format_string = '{:#018x} {:10d} {:10d} {:10d} {:10d} {:10d} {:10d} {:10d} {:6d} {:6d} {:6d} {markings} {name:s} ' + pagesize = 4096 + + free_elements = (zone.cur_size / zone.elem_size) - zone.count + free_size = free_elements * zone.elem_size + + alloc_count = zone.alloc_size / zone.elem_size + alloc_pages = zone.alloc_size / pagesize + alloc_slack = zone.alloc_size % zone.elem_size + marks = [ + ["collectable", "C"], + ["expandable", "X"], + ["noencrypt", "$"], + ["caller_acct", "@"], + ["exhaustible", "H"], + ["allows_foreign", "F"], + ["async_prio_refill", "R"], + ["no_callout", "O"], + ["zleak_on", "L"], + ["doing_alloc", "A"], + ["waiting", "W"], + ["doing_gc", "G"] + ] + if kern.arch == 'x86_64': + marks.append(["gzalloc_exempt", "M"]) + marks.append(["alignment_required", "N"]) + + markings="" + for mark in marks: + if zone.__getattr__(mark[0]) : + markings+=mark[1] + else: + markings+=" " + out_string += format_string.format(zone, zone.cur_size, zone.page_count, + zone.count, free_elements, free_size, + zone.elem_size, zone.alloc_size, alloc_count, + alloc_pages, alloc_slack, name = zone.zone_name, markings=markings) + + if zone.exhaustible : + out_string += "(max: {:d})".format(zone.max_size) + + return out_string + +@lldb_command('zprint') +def Zprint(cmd_args=None): + """ Routine to print a summary listing of all the kernel zones + All columns are printed in decimal + Legend: + C - collectable + X - expandable + $ - not encrypted during hibernation + @ - allocs and frees are accounted to caller process for KPRVT + H - exhaustible + F - allows foreign memory (memory not allocated from zone_map) + M - gzalloc will avoid monitoring this zone + R - will be refilled when below low water mark + O - does not allow refill callout to fill zone on noblock allocation + N - zone requires alignment (avoids padding this zone for debugging) + A - currently trying to allocate more backing memory from kernel_memory_allocate + W - another thread is waiting for more memory + L - zone is being monitored by zleaks + G - currently running GC + """ + global kern + print GetZoneSummary.header + for zval in kern.zones: + print GetZoneSummary(zval) + +@xnudebug_test('test_zprint') +def TestZprint(kernel_target, config, lldb_obj, isConnected ): + """ Test the functionality of zprint command + returns + - False on failure + - True on success + """ + if not isConnected: + print "Target is not connected. Cannot test memstats" + return False + res = lldb.SBCommandReturnObject() + lldb_obj.debugger.GetCommandInterpreter().HandleCommand("zprint", res) + result = res.GetOutput() + if len(result.split("\n")) > 2: + return True + else: + return False + + +# EndMacro: zprint + +# Macro: showzfreelist + +def ShowZfreeListHeader(zone): + """ Helper routine to print a header for zone freelist. + (Since the freelist does not have a custom type, this is not defined as a Type Summary). + params: + zone:zone_t - Zone object to print header info + returns: + None + """ + out_str = "" + out_str += "{0: <9s} {1: <12s} {2: <18s} {3: <18s} {4: <6s}\n".format('ELEM_SIZE', 'COUNT', 'NCOOKIE', 'PCOOKIE', 'FACTOR') + out_str += "{0: <9d} {1: <12d} 0x{2:0>16x} 0x{3:0>16x} {4: <2d}/{5: <2d}\n\n".format( + zone.elem_size, zone.count, kern.globals.zp_nopoison_cookie, kern.globals.zp_poisoned_cookie, zone.zp_count, kern.globals.zp_factor) + out_str += "{0: <7s} {1: <18s} {2: <18s} {3: <18s} {4: <18s} {5: <18s} {6: <14s}\n".format( + 'NUM', 'ELEM', 'NEXT', 'BACKUP', '^ NCOOKIE', '^ PCOOKIE', 'POISON (PREV)') + print out_str + +def ShowZfreeListChain(zone, zfirst, zlimit): + """ Helper routine to print a zone free list chain + params: + zone: zone_t - Zone object + zfirst: void * - A pointer to the first element of the free list chain + zlimit: int - Limit for the number of elements to be printed by showzfreelist + returns: + None + """ + current = Cast(zfirst, 'void *') + while ShowZfreeList.elts_found < zlimit: + ShowZfreeList.elts_found += 1 + znext = dereference(Cast(current, 'vm_offset_t *')) + backup_ptr = kern.GetValueFromAddress((unsigned(Cast(current, 'vm_offset_t')) + unsigned(zone.elem_size) - sizeof('vm_offset_t')), 'vm_offset_t *') + backup_val = dereference(backup_ptr) + n_unobfuscated = (unsigned(backup_val) ^ unsigned(kern.globals.zp_nopoison_cookie)) + p_unobfuscated = (unsigned(backup_val) ^ unsigned(kern.globals.zp_poisoned_cookie)) + poison_str = '' + if p_unobfuscated == unsigned(znext): + poison_str = "P ({0: 16x} 0x{2:0>16x} 0x{3:0>16x} 0x{4:0>16x} 0x{5:0>16x} {6: <14s}\n".format( + ShowZfreeList.elts_found, unsigned(current), unsigned(znext), unsigned(backup_val), n_unobfuscated, p_unobfuscated, poison_str) + if unsigned(znext) == 0: + break + current = Cast(znext, 'void *') + +@static_var('elts_found',0) +@static_var('last_poisoned',0) +@lldb_command('showzfreelist') +def ShowZfreeList(cmd_args=None): + """ Walk the freelist for a zone, printing out the primary and backup next pointers, the poisoning cookies, and the poisoning status of each element. + Usage: showzfreelist [iterations] + + Will walk up to 50 elements by default, pass a limit in 'iterations' to override. + """ + if not cmd_args: + print ShowZfreeList.__doc__ + return + ShowZfreeList.elts_found = 0 + ShowZfreeList.last_poisoned = 0 + + zone = kern.GetValueFromAddress(cmd_args[0], 'struct zone *') + zlimit = 50 + if len(cmd_args) >= 2: + zlimit = ArgumentStringToInt(cmd_args[1]) + ShowZfreeListHeader(zone) + + if unsigned(zone.use_page_list) == 1: + if unsigned(zone.allows_foreign) == 1: + for free_page_meta in IterateQueue(zone.pages.any_free_foreign, 'struct zone_page_metadata *', 'pages'): + if ShowZfreeList.elts_found == zlimit: + break + zfirst = Cast(free_page_meta.elements, 'void *') + if unsigned(zfirst) != 0: + ShowZfreeListChain(zone, zfirst, zlimit) + for free_page_meta in IterateQueue(zone.pages.intermediate, 'struct zone_page_metadata *', 'pages'): + if ShowZfreeList.elts_found == zlimit: + break + zfirst = Cast(free_page_meta.elements, 'void *') + if unsigned(zfirst) != 0: + ShowZfreeListChain(zone, zfirst, zlimit) + for free_page_meta in IterateQueue(zone.pages.all_free, 'struct zone_page_metadata *', 'pages'): + if ShowZfreeList.elts_found == zlimit: + break + zfirst = Cast(free_page_meta.elements, 'void *') + if unsigned(zfirst) != 0: + ShowZfreeListChain(zone, zfirst, zlimit) + else: + zfirst = Cast(zone.free_elements, 'void *') + if unsigned(zfirst) != 0: + ShowZfreeListChain(zone, zfirst, zlimit) + + if ShowZfreeList.elts_found == zlimit: + print "Stopped at {0: . If a is supplied, it prints log elements starting at . + Usage: zstack [] + + The suggested usage is to look at indexes below zcurrent and look for common stack traces. + The stack trace that occurs the most is probably the cause of the leak. Find the pc of the + function calling into zalloc and use the countpcs command to find out how often that pc occurs in the log. + The pc occuring in a high percentage of records is most likely the source of the leak. + + The findoldest command is also useful for leak debugging since it identifies the oldest record + in the log, which may indicate the leaker. + """ + if not cmd_args: + print Zstack.__doc__ + return + if int(kern.globals.log_records) == 0: + print "Zone logging not enabled. Add 'zlog=' to boot-args." + return + if int(kern.globals.zlog_btlog) == 0: + print "Zone logging enabled, but zone has not been initialized yet." + return + + count = 1 + if len(cmd_args) >= 2: + count = ArgumentStringToInt(cmd_args[1]) + zstack_index = unsigned(cmd_args[0]) + while count and (zstack_index != 0xffffff): + zstack_record_offset = zstack_index * unsigned(kern.globals.zlog_btlog.btrecord_size) + zstack_record = kern.GetValueFromAddress(unsigned(kern.globals.zlog_btlog.btrecords) + zstack_record_offset, 'btlog_record_t *') + ShowZStackRecord(zstack_record, zstack_index) + zstack_index = zstack_record.next + count -= 1 + +# EndMacro : zstack + +# Macro: findoldest + +@lldb_command('findoldest') +def FindOldest(cmd_args=None): + """ Zone leak debugging: find and print the oldest record in the log. + + Once it prints a stack trace, find the pc of the caller above all the zalloc, kalloc and + IOKit layers. Then use the countpcs command to see how often this caller has allocated + memory. A caller with a high percentage of records in the log is probably the leaker. + """ + if int(kern.globals.log_records) == 0: + print FindOldest.__doc__ + return + if int(kern.globals.zlog_btlog) == 0: + print "Zone logging enabled, but zone has not been initialized yet." + return + index = kern.globals.zlog_btlog.head + if unsigned(index) != 0xffffff: + print "Oldest record is at log index: {0: + in the stack trace. + Usage: countpcs + + This is useful for verifying a suspected as being the source of + the leak. If a high percentage of the log entries contain the given , then it's most + likely the source of the leak. Note that this command can take several minutes to run. + """ + if not cmd_args: + print Countpcs.__doc__ + return + if int(kern.globals.log_records) == 0: + print "Zone logging not enabled. Add 'zlog=' to boot-args." + return + if int(kern.globals.zlog_btlog) == 0: + print "Zone logging enabled, but zone has not been initialized yet." + return + + cpcs_index = unsigned(kern.globals.zlog_btlog.head) + target_pc = unsigned(kern.GetValueFromAddress(cmd_args[0], 'void *')) + found = 0 + depth = unsigned(kern.globals.zlog_btlog.btrecord_btdepth) + + while cpcs_index != 0xffffff: + cpcs_record_offset = cpcs_index * unsigned(kern.globals.zlog_btlog.btrecord_size) + cpcs_record = kern.GetValueFromAddress(unsigned(kern.globals.zlog_btlog.btrecords) + cpcs_record_offset, 'btlog_record_t *') + frame = 0 + while frame < depth: + frame_pc = unsigned(cpcs_record.bt[frame]) + if frame_pc == target_pc: + found += 1 + break + frame += 1 + cpcs_index = cpcs_record.next + print "Occured {0: + + When the kernel panics due to a corrupted zone element, get the + element address and use this command. This will show you the stack traces of all logged zalloc and + zfree operations which tells you who touched the element in the recent past. This also makes + double-frees readily apparent. + """ + if not cmd_args: + print FindElem.__doc__ + return + if int(kern.globals.log_records) == 0: + print "Zone logging not enabled. Add 'zlog=' to boot-args." + return + if int(kern.globals.zlog_btlog) == 0: + print "Zone logging enabled, but zone has not been initialized yet." + return + + target_element = unsigned(kern.GetValueFromAddress(cmd_args[0], 'void *')) + index = unsigned(kern.globals.zlog_btlog.head) + prev_op = -1 + + while index != 0xffffff: + findelem_record_offset = index * unsigned(kern.globals.zlog_btlog.btrecord_size) + findelem_record = kern.GetValueFromAddress(unsigned(kern.globals.zlog_btlog.btrecords) + findelem_record_offset, 'btlog_record_t *') + if unsigned(findelem_record.element) == target_element: + Zstack([index]) + if int(findelem_record.operation) == prev_op: + print "{0: + Usage: btlog_find -A + Note: Backtraces will be in chronological order, with oldest entries aged out in FIFO order as needed. + """ + if not cmd_args: + raise ArgumentError("Need a btlog_t parameter") + btlog = kern.GetValueFromAddress(cmd_args[0], 'btlog_t *') + printall = False + target_elem = 0xffffff + + if "-A" in cmd_options: + printall = True + else: + if not printall and len(cmd_args) < 2: + raise ArgumentError(" is missing in args. Need a search pointer.") + target_elem = unsigned(kern.GetValueFromAddress(cmd_args[1], 'void *')) + + index = unsigned(btlog.head) + progress = 0 + record_size = unsigned(btlog.btrecord_size) + while index != 0xffffff: + record_offset = index * record_size + record = kern.GetValueFromAddress(unsigned(btlog.btrecords) + record_offset, 'btlog_record_t *') + if unsigned(record.element) == target_elem or printall: + print '{0: + """ + if not cmd_args: + print ShowZalloc.__doc__ + return + if unsigned(kern.globals.zallocations) == 0: + print "zallocations array not initialized!" + return + zallocation = kern.globals.zallocations[ArgumentStringToInt(cmd_args[0])] + print zallocation + ShowZTrace([str(int(zallocation.za_trace_index))]) + +#EndMacro: showzalloc + +#Macro: showztrace + +@lldb_command('showztrace') +def ShowZTrace(cmd_args=None): + """ Prints the backtrace from the ztraces array at index + Usage: showztrace + """ + if not cmd_args: + print ShowZTrace.__doc__ + return + if unsigned(kern.globals.ztraces) == 0: + print "ztraces array not initialized!" + return + ztrace_addr = kern.globals.ztraces[ArgumentStringToInt(cmd_args[0])] + print ztrace_addr + ShowZstackTraceHelper(ztrace_addr.zt_stack, ztrace_addr.zt_depth) + +#EndMacro: showztrace + +#Macro: showztraceaddr + +@lldb_command('showztraceaddr') +def ShowZTraceAddr(cmd_args=None): + """ Prints the struct ztrace passed in. + Usage: showztraceaddr + """ + if not cmd_args: + print ShowZTraceAddr.__doc__ + return + ztrace_ptr = kern.GetValueFromAddress(cmd_args[0], 'struct ztrace *') + print dereference(ztrace_ptr) + ShowZstackTraceHelper(ztrace_ptr.zt_stack, ztrace_ptr.zt_depth) + +#EndMacro: showztraceaddr + +#Macro: showzstacktrace + +@lldb_command('showzstacktrace') +def ShowZstackTrace(cmd_args=None): + """ Routine to print a stacktrace stored by OSBacktrace. + Usage: showzstacktrace [size] + + size is optional, defaults to 15. + """ + if not cmd_args: + print ShowZstackTrace.__doc__ + return + void_ptr_type = gettype('void *') + void_double_ptr_type = void_ptr_type.GetPointerType() + trace = kern.GetValueFromAddress(cmd_args[0], void_double_ptr_type) + trace_size = 15 + if len(cmd_args) >= 2: + trace_size = ArgumentStringToInt(cmd_args[1]) + ShowZstackTraceHelper(trace, trace_size) + +#EndMacro: showzstacktrace + +def ShowZstackTraceHelper(stack, depth): + """ Helper routine for printing a zstack. + params: + stack: void *[] - An array of pointers representing the Zstack + depth: int - The depth of the ztrace stack + returns: + None + """ + trace_current = 0 + while trace_current < depth: + trace_addr = stack[trace_current] + symbol_arr = kern.SymbolicateFromAddress(unsigned(trace_addr)) + if symbol_arr: + symbol_str = str(symbol_arr[0].addr) + else: + symbol_str = '' + print '{0: <#x} {1: + """ + if not cmd_args: + print ShowZallocsForTrace.__doc__ + return + print '{0: <5s} {1: <18s} {2: <15s}'.format('INDEX','ADDRESS','SIZE') + target_index = ArgumentStringToInt(cmd_args[0]) + current_index = 0 + max_zallocation = unsigned(kern.globals.zleak_alloc_buckets) + allocation_count = 0 + while current_index < max_zallocation: + current_zalloc = kern.globals.zallocations[current_index] + if unsigned(current_zalloc.za_element) != 0 and (unsigned(current_zalloc.za_trace_index) == unsigned(target_index)): + print '{0: <5d} {1: <#018x} {2: <6d}'.format(current_index, current_zalloc.za_element, current_zalloc.za_size) + allocation_count += 1 + current_index += 1 + print 'Total Allocations: {0: 0 + """ + ShowZTracesAbove([0]) + +#EndMacro: showztraces + +#Macro: showztracesabove + +@lldb_command('showztracesabove') +def ShowZTracesAbove(cmd_args=None): + """ Prints all traces with size greater than X + Usage: showztracesabove + """ + if not cmd_args: + print ShowZTracesAbove.__doc__ + return + print '{0: <5s} {1: <6s}'.format('INDEX','SIZE') + current_index = 0 + ztrace_count = 0 + max_ztrace = unsigned(kern.globals.zleak_trace_buckets) + while current_index < max_ztrace: + ztrace_current = kern.globals.ztraces[current_index] + if ztrace_current.zt_size > unsigned(cmd_args[0]): + print '{0: <5d} {1: <6d}'.format(current_index, int(ztrace_current.zt_size)) + ztrace_count += 1 + current_index += 1 + print 'Total traces: {0: (ex. showtaskvme 0x00ataskptr00 ) + """ + task = kern.GetValueFromAddress(cmd_args[0], 'task *') + ShowTaskVMEntries(task) + +@lldb_command('showallvme') +def ShowAllVME(cmd_args=None): + """ Routine to print a summary listing of all the vm map entries + Go Through each task in system and show the vm info + """ + for task in kern.tasks: + ShowTaskVMEntries(task) + +@lldb_command('showallvm') +def ShowAllVM(cmd_args=None): + """ Routine to print a summary listing of all the vm maps + """ + for task in kern.tasks: + print GetTaskSummary.header + ' ' + GetProcSummary.header + print GetTaskSummary(task) + ' ' + GetProcSummary(Cast(task.bsd_info, 'proc *')) + print GetVMMapSummary.header + print GetVMMapSummary(task.map) + +@lldb_command("showtaskvm") +def ShowTaskVM(cmd_args=None): + """ Display info about the specified task's vm_map + syntax: (lldb) showtaskvm + """ + if not cmd_args: + print ShowTaskVM.__doc__ + return False + task = kern.GetValueFromAddress(cmd_args[0], 'task *') + if not task: + print "Unknown arguments." + return False + print GetTaskSummary.header + ' ' + GetProcSummary.header + print GetTaskSummary(task) + ' ' + GetProcSummary(Cast(task.bsd_info, 'proc *')) + print GetVMMapSummary.header + print GetVMMapSummary(task.map) + return True + +@lldb_command('showallvmstats') +def ShowAllVMStats(cmd_args=None): + """ Print a summary of vm statistics in a table format + """ + vmstats = lambda:None + vmstats.wired_count = 0 + vmstats.resident_count = 0 + vmstats.resident_max = 0 + vmstats.internal = 0 + vmstats.external = 0 + vmstats.reusable = 0 + vmstats.compressed = 0 + vmstats.compressed_peak = 0 + vmstats.compressed_lifetime = 0 + vmstats.error = '' + + hdr_format = "{0: >10s} {1: <20s} {2: >6s} {3: >10s} {4: >10s} {5: >10s} {6: >10s} {7: >10s} {8: >10s} {9: >10s} {10: >10s} {11: >10s} {12: >10s} {13: >10s} {14:}" + print hdr_format.format('pid', 'command', '#ents', 'wired', 'vsize', 'rsize', 'NEW RSIZE', 'max rsize', 'internal', 'external', 'reusable', 'compressed', 'compressed', 'compressed', '') + print hdr_format.format('', '', '', '(pages)', '(pages)', '(pages)', '(pages)', '(pages)', '(pages)', '(pages)', '(pages)', '(current)', '(peak)', '(lifetime)', '') + entry_format = "{p.p_pid: >10d} {p.p_comm: <20s} {m.hdr.nentries: >6d} {s.wired_count: >10d} {vsize: >10d} {s.resident_count: >10d} {s.new_resident_count: >10d} {s.resident_max: >10d} {s.internal: >10d} {s.external: >10d} {s.reusable: >10d} {s.compressed: >10d} {s.compressed_peak: >10d} {s.compressed_lifetime: >10d} {s.error}" + + for task in kern.tasks: + proc = Cast(task.bsd_info, 'proc *') + vmmap = Cast(task.map, '_vm_map *') + vmstats.error = '' + vmstats.wired_count = vmmap.pmap.stats.wired_count; + vmstats.resident_count = unsigned(vmmap.pmap.stats.resident_count); + vmstats.resident_max = vmmap.pmap.stats.resident_max; + vmstats.internal = unsigned(vmmap.pmap.stats.internal); + vmstats.external = unsigned(vmmap.pmap.stats.external); + vmstats.reusable = unsigned(vmmap.pmap.stats.reusable); + vmstats.compressed = unsigned(vmmap.pmap.stats.compressed); + vmstats.compressed_peak = unsigned(vmmap.pmap.stats.compressed_peak); + vmstats.compressed_lifetime = unsigned(vmmap.pmap.stats.compressed_lifetime); + vmstats.new_resident_count = vmstats.internal + vmstats.external + + if vmstats.internal < 0: + vmstats.error += '*' + if vmstats.external < 0: + vmstats.error += '*' + if vmstats.reusable < 0: + vmstats.error += '*' + if vmstats.compressed < 0: + vmstats.error += '*' + if vmstats.compressed_peak < 0: + vmstats.error += '*' + if vmstats.compressed_lifetime < 0: + vmstats.error += '*' + if vmstats.new_resident_count +vmstats.reusable != vmstats.resident_count: + vmstats.error += '*' + + print entry_format.format(p=proc, m=vmmap, vsize=(unsigned(vmmap.size) >> 12), t=task, s=vmstats) + + +def ShowTaskVMEntries(task): + """ Routine to print out a summary listing of all the entries in a vm_map + params: + task - core.value : a object of type 'task *' + returns: + None + """ + print "vm_map entries for task " + hex(task) + print GetTaskSummary.header + print GetTaskSummary(task) + if not task.map: + print "Task {0: <#020x} has map = 0x0" + return None + print GetVMMapSummary.header + print GetVMMapSummary(task.map) + vme_list_head = task.map.hdr.links + vme_ptr_type = GetType('vm_map_entry *') + print GetVMEntrySummary.header + for vme in IterateQueue(vme_list_head, vme_ptr_type, "links"): + print GetVMEntrySummary(vme) + return None + +@lldb_command("showmap") +def ShowMap(cmd_args=None): + """ Routine to print out info about the specified vm_map + usage: showmap + """ + if cmd_args == None or len(cmd_args) < 1: + print "Invalid argument.", ShowMap.__doc__ + return + map_val = kern.GetValueFromAddress(cmd_args[0], 'vm_map_t') + print GetVMMapSummary.header + print GetVMMapSummary(map_val) + +@lldb_command("showmapvme") +def ShowMapVME(cmd_args=None): + """Routine to print out info about the specified vm_map and its vm entries + usage: showmapvme + """ + if cmd_args == None or len(cmd_args) < 1: + print "Invalid argument.", ShowMap.__doc__ + return + map_val = kern.GetValueFromAddress(cmd_args[0], 'vm_map_t') + print GetVMMapSummary.header + print GetVMMapSummary(map_val) + vme_list_head = map_val.hdr.links + vme_ptr_type = GetType('vm_map_entry *') + print GetVMEntrySummary.header + for vme in IterateQueue(vme_list_head, vme_ptr_type, "links"): + print GetVMEntrySummary(vme) + return None + +@lldb_type_summary(['_vm_map *', 'vm_map_t']) +@header("{0: <20s} {1: <20s} {2: <20s} {3: >5s} {4: >5s} {5: <20s} {6: <20s}".format("vm_map", "pmap", "vm_size", "#ents", "rpage", "hint", "first_free")) +def GetVMMapSummary(vmmap): + """ Display interesting bits from vm_map struct """ + out_string = "" + format_string = "{0: <#020x} {1: <#020x} {2: <#020x} {3: >5d} {4: >5d} {5: <#020x} {6: <#020x}" + vm_size = uint64_t(vmmap.size).value + resident_pages = 0 + if vmmap.pmap != 0: resident_pages = int(vmmap.pmap.stats.resident_count) + out_string += format_string.format(vmmap, vmmap.pmap, vm_size, vmmap.hdr.nentries, resident_pages, vmmap.hint, vmmap.first_free) + return out_string + +@lldb_type_summary(['vm_map_entry']) +@header("{0: <20s} {1: <20s} {2: <5s} {3: >7s} {4: <20s} {5: <20s}".format("entry", "start", "prot", "#page", "object", "offset")) +def GetVMEntrySummary(vme): + """ Display vm entry specific information. """ + out_string = "" + format_string = "{0: <#020x} {1: <#20x} {2: <1x}{3: <1x}{4: <3s} {5: >7d} {6: <#020x} {7: <#020x}" + vme_protection = int(vme.protection) + vme_max_protection = int(vme.max_protection) + vme_extra_info_str ="SC-Ds"[int(vme.inheritance)] + if int(vme.is_sub_map) != 0 : + vme_extra_info_str +="s" + elif int(vme.needs_copy) != 0 : + vme_extra_info_str +="n" + num_pages = (unsigned(vme.links.end) - unsigned(vme.links.start)) >> 12 + out_string += format_string.format(vme, vme.links.start, vme_protection, vme_max_protection, vme_extra_info_str, num_pages, vme.object.vm_object, vme.offset) + return out_string + +# EndMacro: showtaskvme +@lldb_command('showmapwired') +def ShowMapWired(cmd_args=None): + """ Routine to print out a summary listing of all the entries with wired pages in a vm_map + """ + if cmd_args == None or len(cmd_args) < 1: + print "Invalid argument", ShowMapWired.__doc__ + return + map_val = kern.GetValueFromAddress(cmd_args[0], 'vm_map_t') + + +@lldb_type_summary(['kmod_info_t *']) +@header("{0: <20s} {1: <20s} {2: <20s} {3: >3s} {4: >5s} {5: >20s} {6: <30s}".format('kmod_info', 'address', 'size', 'id', 'refs', 'version', 'name')) +def GetKextSummary(kmod): + """ returns a string representation of kext information + """ + out_string = "" + format_string = "{0: <#020x} {1: <#020x} {2: <#020x} {3: >3d} {4: >5d} {5: >20s} {6: <30s}" + out_string += format_string.format(kmod, kmod.address, kmod.size, kmod.id, kmod.reference_count, kmod.version, kmod.name) + return out_string + +@lldb_type_summary(['uuid_t']) +@header("") +def GetUUIDSummary(uuid): + """ returns a string representation like CA50DA4C-CA10-3246-B8DC-93542489AA26 + """ + arr = Cast(addressof(uuid), 'uint8_t *') + data = [] + for i in range(16): + data.append(int(arr[i])) + return "{a[0]:02X}{a[1]:02X}{a[2]:02X}{a[3]:02X}-{a[4]:02X}{a[5]:02X}-{a[6]:02X}{a[7]:02X}-{a[8]:02X}{a[9]:02X}-{a[10]:02X}{a[11]:02X}{a[12]:02X}{a[13]:02X}{a[14]:02X}{a[15]:02X}".format(a=data) + +@lldb_command('showallkmods') +def ShowAllKexts(cmd_args=None): + """Display a summary listing of all loaded kexts (alias: showallkmods) + """ + kmod_val = kern.globals.kmod + print "{: <36s} ".format("UUID") + GetKextSummary.header + kextuuidinfo = GetKextLoadInformation() + for kval in IterateLinkedList(kmod_val, 'next'): + uuid = "........-....-....-....-............" + kaddr = unsigned(kval.address) + for l in kextuuidinfo : + if kaddr == int(l[1],16): + uuid = l[0] + break + print uuid + " " + GetKextSummary(kval) + +def GetKextLoadInformation(addr=0): + """ Extract the kext uuid and load address information from the kernel data structure. + params: + addr - int - optional integer that is the address to search for. + returns: + [] - array with each entry of format ( 'UUID', 'Hex Load Address') + """ + # because of , we can't find summaries directly + #addr = hex(addressof(kern.globals.gLoadedKextSummaries.summaries)) + baseaddr = unsigned(kern.globals.gLoadedKextSummaries) + 0x10 + summaries_begin = kern.GetValueFromAddress(baseaddr, 'OSKextLoadedKextSummary *') + total_summaries = int(kern.globals.gLoadedKextSummaries.numSummaries) + kext_version = int(kern.globals.gLoadedKextSummaries.version) + entry_size = 64 + 16 + 8 + 8 + 8 + 4 + 4 + if kext_version >= 2 : + entry_size = int(kern.globals.gLoadedKextSummaries.entry_size) + retval = [] + for i in range(total_summaries): + tmpaddress = unsigned(summaries_begin) + (i * entry_size) + current_kext = kern.GetValueFromAddress(tmpaddress, 'OSKextLoadedKextSummary *') + if addr != 0 : + if addr == unsigned(current_kext.address): + retval.append((GetUUIDSummary(current_kext.uuid) , hex(current_kext.address), str(current_kext.name) )) + else: + retval.append((GetUUIDSummary(current_kext.uuid) , hex(current_kext.address), str(current_kext.name) )) + + return retval + +lldb_alias('showallkexts', 'showallkmods') + +def GetOSKextVersion(version_num): + """ returns a string of format 1.2.3x from the version_num + params: version_num - int + return: str + """ + if version_num == -1 : + return "invalid" + (MAJ_MULT, MIN_MULT, REV_MULT,STAGE_MULT) = (100000000, 1000000, 10000, 1000) + version = version_num + + vers_major = version / MAJ_MULT + version = version - (vers_major * MAJ_MULT) + + vers_minor = version / MIN_MULT + version = version - (vers_minor * MIN_MULT) + + vers_revision = version / REV_MULT + version = version - (vers_revision * REV_MULT) + + vers_stage = version / STAGE_MULT + version = version - (vers_stage * STAGE_MULT) + + vers_stage_level = version + + out_str = "%d.%d" % (vers_major, vers_minor) + if vers_revision > 0: out_str += ".%d" % vers_revision + if vers_stage == 1 : out_str += "d%d" % vers_stage_level + if vers_stage == 3 : out_str += "a%d" % vers_stage_level + if vers_stage == 5 : out_str += "b%d" % vers_stage_level + if vers_stage == 6 : out_str += "fc%d" % vers_stage_level + + return out_str + +@lldb_command('showallknownkmods') +def ShowAllKnownKexts(cmd_args=None): + """ Display a summary listing of all kexts known in the system. + This is particularly useful to find if some kext was unloaded before this crash'ed state. + """ + kext_count = int(kern.globals.sKextsByID.count) + index = 0 + kext_dictionary = kern.globals.sKextsByID.dictionary + print "%d kexts in sKextsByID:" % kext_count + print "{0: <20s} {1: <20s} {2: >5s} {3: >20s} {4: <30s}".format('OSKEXT *', 'load_addr', 'id', 'version', 'name') + format_string = "{0: <#020x} {1: <20s} {2: >5s} {3: >20s} {4: <30s}" + + while index < kext_count: + kext_dict = GetObjectAtIndexFromArray(kext_dictionary, index) + kext_name = str(kext_dict.key.string) + osk = Cast(kext_dict.value, 'OSKext *') + if int(osk.flags.loaded) : + load_addr = "{0: <#020x}".format(osk.kmod_info) + id = "{0: >5d}".format(osk.loadTag) + else: + load_addr = "------" + id = "--" + version_num = unsigned(osk.version) + version = GetOSKextVersion(version_num) + print format_string.format(osk, load_addr, id, version, kext_name) + index += 1 + + return + +@lldb_command('showkmodaddr') +def ShowKmodAddr(cmd_args=[]): + """ Given an address, print the offset and name for the kmod containing it + Syntax: (lldb) showkmodaddr + """ + if len(cmd_args) < 1: + raise ArgumentError("Insufficient arguments") + + addr = ArgumentStringToInt(cmd_args[0]) + kmod_val = kern.globals.kmod + for kval in IterateLinkedList(kmod_val, 'next'): + if addr >= unsigned(kval.address) and addr <= (unsigned(kval.address) + unsigned(kval.size)): + print GetKextSummary.header + print GetKextSummary(kval) + " offset = {0: #0x}".format((addr - unsigned(kval.address))) + return True + return False + +@lldb_command('addkext','F:N:') +def AddKextSyms(cmd_args=[], cmd_options={}): + """ Add kext symbols into lldb. + This command finds symbols for a uuid and load the required executable + Usage: + addkext : Load one kext based on uuid. eg. (lldb)addkext 4DD2344C0-4A81-3EAB-BDCF-FEAFED9EB73E + addkext -F : Load kext executable at specified load address + addkext -N : Load one kext that matches the name provided. eg. (lldb) addkext -N corecrypto + addkext all : Will load all the kext symbols - SLOW + """ + + + if "-F" in cmd_options: + exec_path = cmd_options["-F"] + exec_full_path = ResolveFSPath(exec_path) + if not os.path.exists(exec_full_path): + raise ArgumentError("Unable to resolve {:s}".format(exec_path)) + + if not os.path.isfile(exec_full_path): + raise ArgumentError("Path is {:s} not a filepath. \nPlease check that path points to executable.\ +\nFor ex. path/to/Symbols/IOUSBFamily.kext/Contents/PlugIns/AppleUSBHub.kext/Contents/MacOS/AppleUSBHub.\ +\nNote: LLDB does not support adding kext based on directory paths like gdb used to.".format(exec_path)) + if not os.access(exec_full_path, os.X_OK): + raise ArgumentError("Path is {:s} not an executable file".format(exec_path)) + + slide_value = None + if cmd_args: + slide_value = cmd_args[0] + debuglog("loading slide value from user input %s" % cmd_args[0]) + + filespec = lldb.SBFileSpec(exec_full_path, False) + print "target modules add %s" % exec_full_path + print lldb_run_command("target modules add %s" % exec_full_path) + loaded_module = LazyTarget.GetTarget().FindModule(filespec) + if loaded_module.IsValid(): + uuid_str = loaded_module.GetUUIDString() + debuglog("added module %s with uuid %s" % (exec_full_path, uuid_str)) + if slide_value is None: + all_kexts_info = GetKextLoadInformation() + for k in all_kexts_info: + debuglog(k[0]) + if k[0].lower() == uuid_str.lower(): + slide_value = k[1] + debuglog("found the slide %s for uuid %s" % (k[1], k[0])) + + if slide_value is None: + raise ArgumentError("Unable to find load address for module described at %s " % exec_full_path) + load_cmd = "target modules load --file %s --slide %s" % (exec_full_path, str(slide_value)) + print load_cmd + print lldb_run_command(load_cmd) + kern.symbolicator = None + return True + + all_kexts_info = GetKextLoadInformation() + + if "-N" in cmd_options: + kext_name = cmd_options["-N"] + kext_name_matches = GetLongestMatchOption(kext_name, [str(x[2]) for x in all_kexts_info], True) + if len(kext_name_matches) != 1: + print "Ambiguous match for name: {:s}".format(kext_name) + if len(kext_name_matches) > 0: + print "Options are:\n\t" + "\n\t".join(kext_name_matches) + return + debuglog("matched the kext to name %s and uuid %s" % (kext_name_matches[0], kext_name)) + for x in all_kexts_info: + if kext_name_matches[0] == x[2]: + cur_uuid = x[0].lower() + print "Fetching dSYM for {:s}".format(cur_uuid) + info = dsymForUUID(cur_uuid) + if info and 'DBGSymbolRichExecutable' in info: + print "Adding dSYM ({0:s}) for {1:s}".format(cur_uuid, info['DBGSymbolRichExecutable']) + addDSYM(cur_uuid, info) + loadDSYM(cur_uuid, int(x[1],16)) + else: + print "Failed to get symbol info for {:s}".format(cur_uuid) + break + kern.symbolicator = None + return + + if len(cmd_args) < 1: + raise ArgumentError("No arguments specified.") + + uuid = cmd_args[0].lower() + + load_all_kexts = False + if uuid == "all": + load_all_kexts = True + + if not load_all_kexts and len(uuid_regex.findall(uuid)) == 0: + raise ArgumentError("Unknown argument {:s}".format(uuid)) + + for k_info in all_kexts_info: + cur_uuid = k_info[0].lower() + if load_all_kexts or (uuid == cur_uuid): + print "Fetching dSYM for %s" % cur_uuid + info = dsymForUUID(cur_uuid) + if info and 'DBGSymbolRichExecutable' in info: + print "Adding dSYM (%s) for %s" % (cur_uuid, info['DBGSymbolRichExecutable']) + addDSYM(cur_uuid, info) + loadDSYM(cur_uuid, int(k_info[1],16)) + else: + print "Failed to get symbol info for %s" % cur_uuid + #end of for loop + kern.symbolicator = None + return True + + + +lldb_alias('showkmod', 'showkmodaddr') +lldb_alias('showkext', 'showkmodaddr') +lldb_alias('showkextaddr', 'showkmodaddr') + +@lldb_type_summary(['mount *']) +@header("{0: <20s} {1: <20s} {2: <20s} {3: <12s} {4: <12s} {5: <12s} {6: >6s} {7: <30s} {8: <35s}".format('volume(mp)', 'mnt_data', 'mnt_devvp', 'flag', 'kern_flag', 'lflag', 'type', 'mnton', 'mntfrom')) +def GetMountSummary(mount): + """ Display a summary of mount on the system + """ + out_string = ("{mnt: <#020x} {mnt.mnt_data: <#020x} {mnt.mnt_devvp: <#020x} {mnt.mnt_flag: <#012x} " + + "{mnt.mnt_kern_flag: <#012x} {mnt.mnt_lflag: <#012x} {vfs.f_fstypename: >6s} " + + "{vfs.f_mntonname: <30s} {vfs.f_mntfromname: <35s}").format(mnt=mount, vfs=mount.mnt_vfsstat) + return out_string + +@lldb_command('showallmounts') +def ShowAllMounts(cmd_args=None): + """ Print all mount points + """ + mntlist = kern.globals.mountlist + print GetMountSummary.header + for mnt in IterateTAILQ_HEAD(mntlist, 'mnt_list'): + print GetMountSummary(mnt) + return + +lldb_alias('ShowAllVols', 'showallmounts') + +@lldb_command('systemlog') +def ShowSystemLog(cmd_args=None): + """ Display the kernel's printf ring buffer """ + msgbufp = kern.globals.msgbufp + msg_size = int(msgbufp.msg_size) + msg_bufx = int(msgbufp.msg_bufx) + msg_bufr = int(msgbufp.msg_bufr) + msg_bufc = msgbufp.msg_bufc + msg_bufc_data = msg_bufc.GetSBValue().GetPointeeData(0, msg_size) + + # the buffer is circular; start at the write pointer to end, + # then from beginning to write pointer + line = '' + err = lldb.SBError() + for i in range(msg_bufx, msg_size) + range(0, msg_bufx) : + err.Clear() + cbyte = msg_bufc_data.GetUnsignedInt8(err, i) + if not err.Success() : + raise ValueError("Failed to read character at offset " + i + ": " + err.GetCString()) + c = chr(cbyte) + if c == '\0' : + continue + elif c == '\n' : + print line + line = '' + else : + line += c + + if len(line) > 0 : + print line + + return + +@static_var('output','') +def _GetVnodePathName(vnode, vnodename): + """ Internal function to get vnode path string from vnode structure. + params: + vnode - core.value + vnodename - str + returns Nothing. The output will be stored in the static variable. + """ + if not vnode: + return + if int(vnode.v_flag) & 0x1 and int(hex(vnode.v_mount), 16) !=0: + if int(vnode.v_mount.mnt_vnodecovered): + _GetVnodePathName(vnode.v_mount.mnt_vnodecovered, str(vnode.v_mount.mnt_vnodecovered.v_name) ) + else: + _GetVnodePathName(vnode.v_parent, str(vnode.v_parent.v_name)) + _GetVnodePathName.output += "/%s" % vnodename + +def GetVnodePath(vnode): + """ Get string representation of the vnode + params: vnodeval - value representing vnode * in the kernel + return: str - of format /path/to/something + """ + out_str = '' + if vnode: + if (int(vnode.v_flag) & 0x000001) and int(hex(vnode.v_mount), 16) != 0 and (int(vnode.v_mount.mnt_flag) & 0x00004000) : + out_str += "/" + else: + _GetVnodePathName.output = '' + if abs(vnode.v_name) != 0: + _GetVnodePathName(vnode, str(vnode.v_name)) + out_str += _GetVnodePathName.output + else: + out_str += 'v_name = NULL' + _GetVnodePathName.output = '' + return out_str + + +@lldb_command('showvnodepath') +def ShowVnodePath(cmd_args=None): + """ Prints the path for a vnode + usage: showvnodepath + """ + if cmd_args != None and len(cmd_args) > 0 : + vnode_val = kern.GetValueFromAddress(cmd_args[0], 'vnode *') + if vnode_val: + print GetVnodePath(vnode_val) + return + +# Macro: showvnodedev +def GetVnodeDevInfo(vnode): + """ Internal function to get information from the device type vnodes + params: vnode - value representing struct vnode * + return: str - formatted output information for block and char vnode types passed as param + """ + vnodedev_output = "" + vblk_type = GetEnumValue('vtype::VBLK') + vchr_type = GetEnumValue('vtype::VCHR') + if (vnode.v_type == vblk_type) or (vnode.v_type == vchr_type): + devnode = Cast(vnode.v_data, 'devnode_t *') + devnode_dev = devnode.dn_typeinfo.dev + devnode_major = (devnode_dev >> 24) & 0xff + devnode_minor = devnode_dev & 0x00ffffff + + # boilerplate device information for a vnode + vnodedev_output += "Device Info:\n\t vnode:\t\t{:#x}".format(vnode) + vnodedev_output += "\n\t type:\t\t" + if (vnode.v_type == vblk_type): + vnodedev_output += "VBLK" + if (vnode.v_type == vchr_type): + vnodedev_output += "VCHR" + vnodedev_output += "\n\t name:\t\t{: 42) or (devnode_major < 0): + vnodedev_output += "Invalid major #\n" + # static assignments in conf + elif (devnode_major == 0): + vnodedev_output += "Console mux device\n" + elif (devnode_major == 2): + vnodedev_output += "Current tty alias\n" + elif (devnode_major == 3): + vnodedev_output += "NULL device\n" + elif (devnode_major == 4): + vnodedev_output += "Old pty slave\n" + elif (devnode_major == 5): + vnodedev_output += "Old pty master\n" + elif (devnode_major == 6): + vnodedev_output += "Kernel log\n" + elif (devnode_major == 12): + vnodedev_output += "Memory devices\n" + # Statically linked dynamic assignments + elif unsigned(kern.globals.cdevsw[devnode_major].d_open) == unsigned(kern.GetLoadAddressForSymbol('ptmx_open')): + vnodedev_output += "Cloning pty master not done\n" + #GetVnodeDevCpty(devnode_major, devnode_minor) + elif unsigned(kern.globals.cdevsw[devnode_major].d_open) == unsigned(kern.GetLoadAddressForSymbol('ptsd_open')): + vnodedev_output += "Cloning pty slave not done\n" + #GetVnodeDevCpty(devnode_major, devnode_minor) + else: + vnodedev_output += "RESERVED SLOT\n" + else: + vnodedev_output += "{:#x} is not a device".format(vnode) + return vnodedev_output + +@lldb_command('showvnodedev') +def ShowVnodeDev(cmd_args=None): + """ Routine to display details of all vnodes of block and character device types + Usage: showvnodedev
+ """ + if not cmd_args: + print "No arguments passed" + print ShowVnodeDev.__doc__ + return False + vnode_val = kern.GetValueFromAddress(cmd_args[0], 'vnode *') + if not vnode_val: + print "unknown arguments:", str(cmd_args) + return False + print GetVnodeDevInfo(vnode_val) + +# EndMacro: showvnodedev + +# Macro: showvnodelocks +def GetVnodeLock(lockf): + """ Internal function to get information from the given advisory lock + params: lockf - value representing v_lockf member in struct vnode * + return: str - formatted output information for the advisory lock + """ + vnode_lock_output = '' + lockf_flags = lockf.lf_flags + lockf_type = lockf.lf_type + if lockf_flags & 0x20: + vnode_lock_output += ("{: <8s}").format('flock') + if lockf_flags & 0x40: + vnode_lock_output += ("{: <8s}").format('posix') + if lockf_flags & 0x80: + vnode_lock_output += ("{: <8s}").format('prov') + if lockf_flags & 0x10: + vnode_lock_output += ("{: <4s}").format('W') + else: + vnode_lock_output += ("{: <4s}").format('.') + + # POSIX file vs advisory range locks + if lockf_flags & 0x40: + lockf_proc = Cast(lockf.lf_id, 'proc *') + vnode_lock_output += ("PID {: <18d}").format(lockf_proc.p_pid) + else: + vnode_lock_output += ("ID {: <#019x}").format(int(lockf.lf_id)) + + # lock type + if lockf_type == 1: + vnode_lock_output += ("{: <12s}").format('shared') + else: + if lockf_type == 3: + vnode_lock_output += ("{: <12s}").format('exclusive') + else: + if lockf_type == 2: + vnode_lock_output += ("{: <12s}").format('unlock') + else: + vnode_lock_output += ("{: <12s}").format('unknown') + + # start and stop values + vnode_lock_output += ("{: #018x} ..").format(lockf.lf_start) + vnode_lock_output += ("{: #018x}\n").format(lockf.lf_end) + return vnode_lock_output + +@header("{0: <3s} {1: <7s} {2: <3s} {3: <21s} {4: <11s} {5: ^19s} {6: ^17s}".format('*', 'type', 'W', 'held by', 'lock type', 'start', 'end')) +def GetVnodeLocksSummary(vnode): + """ Internal function to get summary of advisory locks for the given vnode + params: vnode - value representing the vnode object + return: str - formatted output information for the summary of advisory locks + """ + out_str = '' + if vnode: + lockf_list = vnode.v_lockf + for lockf_itr in IterateLinkedList(lockf_list, 'lf_next'): + out_str += ("{: <4s}").format('H') + out_str += GetVnodeLock(lockf_itr) + lockf_blocker = lockf_itr.lf_blkhd.tqh_first + while lockf_blocker: + out_str += ("{: <4s}").format('>') + out_str += GetVnodeLock(lockf_blocker) + lockf_blocker = lockf_blocker.lf_block.tqe_next + return out_str + +@lldb_command('showvnodelocks') +def ShowVnodeLocks(cmd_args=None): + """ Routine to display list of advisory record locks for the given vnode address + Usage: showvnodelocks
+ """ + if not cmd_args: + print "No arguments passed" + print ShowVnodeLocks.__doc__ + return False + vnode_val = kern.GetValueFromAddress(cmd_args[0], 'vnode *') + if not vnode_val: + print "unknown arguments:", str(cmd_args) + return False + print GetVnodeLocksSummary.header + print GetVnodeLocksSummary(vnode_val) + +# EndMacro: showvnodelocks + +# Macro: showproclocks + +@lldb_command('showproclocks') +def ShowProcLocks(cmd_args=None): + """ Routine to display list of advisory record locks for the given process + Usage: showproclocks
+ """ + if not cmd_args: + print "No arguments passed" + print ShowProcLocks.__doc__ + return False + proc = kern.GetValueFromAddress(cmd_args[0], 'proc *') + if not proc: + print "unknown arguments:", str(cmd_args) + return False + out_str = '' + proc_filedesc = proc.p_fd + fd_lastfile = proc_filedesc.fd_lastfile + fd_ofiles = proc_filedesc.fd_ofiles + count = 0 + seen = 0 + while count <= fd_lastfile: + if fd_ofiles[count]: + fglob = fd_ofiles[count].f_fglob + fo_type = fglob.fg_ops.fo_type + if fo_type == 1: + fg_data = fglob.fg_data + fg_vnode = Cast(fg_data, 'vnode *') + name = fg_vnode.v_name + lockf_itr = fg_vnode.v_lockf + if lockf_itr: + if not seen: + print GetVnodeLocksSummary.header + seen = seen + 1 + out_str += ("\n( fd {:d}, name ").format(count) + if not name: + out_str += "(null) )\n" + else: + out_str += "{:s} )\n".format(name) + print out_str + print GetVnodeLocksSummary(fg_vnode) + count = count + 1 + print "\n{0: d} total locks for {1: #018x}".format(seen, proc) + +# EndMacro: showproclocks + +@lldb_type_summary(['vnode_t', 'vnode *']) +@header("{0: <20s} {1: >8s} {2: >8s} {3: <20s} {4: <6s} {5: <20s} {6: <6s} {7: <35s}".format('vnode', 'usecount', 'iocount', 'v_data', 'vtype', 'parent', 'mapped', 'name')) +def GetVnodeSummary(vnode): + """ Get a summary of important information out of vnode + """ + out_str = '' + format_string = "{0: <#020x} {1: >8d} {2: >8d} {3: <#020x} {4: <6s} {5: <#020x} {6: <6s} {7: <35s}" + usecount = int(vnode.v_usecount) + iocount = int(vnode.v_iocount) + v_data_ptr = int(hex(vnode.v_data), 16) + vtype = int(vnode.v_type) + vtype_str = "%d" % vtype + vnode_types = ['VNON', 'VREG', 'VDIR', 'VBLK', 'VCHR', 'VLNK', 'VSOCK', 'VFIFO', 'VBAD', 'VSTR', 'VCPLX'] # see vnode.h for enum type definition + if vtype >= 0 and vtype < len(vnode_types): + vtype_str = vnode_types[vtype] + parent_ptr = int(hex(vnode.v_parent), 16) + name_ptr = int(hex(vnode.v_name), 16) + name ="" + if name_ptr != 0: + name = str(vnode.v_name) + elif int(vnode.v_tag) == 16 : + cnode = Cast(vnode.v_data, 'cnode *') + name = "hfs: %s" % str( Cast(cnode.c_desc.cd_nameptr, 'char *')) + mapped = '-' + if (vtype == 1) and (vnode.v_un.vu_ubcinfo != 0): + # Check to see if vnode is mapped/unmapped + if (vnode.v_un.vu_ubcinfo.ui_flags & 0x8) != 0: + mapped = '1' + else: + mapped = '0' + out_str += format_string.format(vnode, usecount, iocount, v_data_ptr, vtype_str, parent_ptr, mapped, name) + return out_str + +@lldb_command('showallvnodes') +def ShowAllVnodes(cmd_args=None): + """ Display info about all vnodes + """ + mntlist = kern.globals.mountlist + print GetVnodeSummary.header + for mntval in IterateTAILQ_HEAD(mntlist, 'mnt_list'): + for vnodeval in IterateTAILQ_HEAD(mntval.mnt_vnodelist, 'v_mntvnodes'): + print GetVnodeSummary(vnodeval) + return + +@lldb_command('showvnode') +def ShowVnode(cmd_args=None): + """ Display info about one vnode + usage: showvnode + """ + if cmd_args == None or len(cmd_args) < 1: + print "Please provide valid vnode argument. Type help showvnode for help." + return + vnodeval = kern.GetValueFromAddress(cmd_args[0],'vnode *') + print GetVnodeSummary.header + print GetVnodeSummary(vnodeval) + +@lldb_command('showvolvnodes') +def ShowVolVnodes(cmd_args=None): + """ Display info about all vnodes of a given mount_t + """ + if cmd_args == None or len(cmd_args) < 1: + print "Please provide a valide mount_t argument. Try 'help showvolvnodes' for help" + return + mntval = kern.GetValueFromAddress(cmd_args[0], 'mount_t') + print GetVnodeSummary.header + for vnodeval in IterateTAILQ_HEAD(mntval.mnt_vnodelist, 'v_mntvnodes'): + print GetVnodeSummary(vnodeval) + return + +@lldb_command('showvolbusyvnodes') +def ShowVolBusyVnodes(cmd_args=None): + """ Display info about busy (iocount!=0) vnodes of a given mount_t + """ + if cmd_args == None or len(cmd_args) < 1: + print "Please provide a valide mount_t argument. Try 'help showvolbusyvnodes' for help" + return + mntval = kern.GetValueFromAddress(cmd_args[0], 'mount_t') + print GetVnodeSummary.header + for vnodeval in IterateTAILQ_HEAD(mntval.mnt_vnodelist, 'v_mntvnodes'): + if int(vnodeval.v_iocount) != 0: + print GetVnodeSummary(vnodeval) + +@lldb_command('showallbusyvnodes') +def ShowAllBusyVnodes(cmd_args=None): + """ Display info about all busy (iocount!=0) vnodes + """ + mntlistval = kern.globals.mountlist + for mntval in IterateTAILQ_HEAD(mntlistval, 'mnt_list'): + ShowVolBusyVnodes([hex(mntval)]) + +@lldb_command('print_vnode') +def PrintVnode(cmd_args=None): + """ Prints out the fields of a vnode struct + Usage: print_vnode + """ + if not cmd_args: + print "Please provide valid vnode argument. Type help print_vnode for help." + return + ShowVnode(cmd_args) + +@lldb_command('showworkqvnodes') +def ShowWorkqVnodes(cmd_args=None): + """ Print the vnode worker list + Usage: showworkqvnodes + """ + if not cmd_args: + print "Please provide valid mount argument. Type help showworkqvnodes for help." + return + + mp = kern.GetValueFromAddress(cmd_args[0], 'mount *') + vp = Cast(mp.mnt_workerqueue.tqh_first, 'vnode *') + print GetVnodeSummary.header + while int(vp) != 0: + print GetVnodeSummary(vp) + vp = vp.v_mntvnodes.tqe_next + +@lldb_command('shownewvnodes') +def ShowNewVnodes(cmd_args=None): + """ Print the new vnode list + Usage: shownewvnodes + """ + if not cmd_args: + print "Please provide valid mount argument. Type help shownewvnodes for help." + return + mp = kern.GetValueFromAddress(cmd_args[0], 'mount *') + vp = Cast(mp.mnt_newvnodes.tqh_first, 'vnode *') + print GetVnodeSummary.header + while int(vp) != 0: + print GetVnodeSummary(vp) + vp = vp.v_mntvnodes.tqe_next + + +@lldb_command('showprocvnodes') +def ShowProcVnodes(cmd_args=None): + """ Routine to print out all the open fds which are vnodes in a process + Usage: showprocvnodes + """ + if not cmd_args: + print "Please provide valid proc argument. Type help showprocvnodes for help." + return + procptr = kern.GetValueFromAddress(cmd_args[0], 'proc *') + fdptr = Cast(procptr.p_fd, 'filedesc *') + if int(fdptr.fd_cdir) != 0: + print '{0: <25s}\n{1: llb fails to cast addresses to double pointers + fpptr = Cast(fdptr.fd_ofiles, 'fileproc *') + while count < fdptr.fd_nfiles: + fpp = dereference(fpptr) + fproc = Cast(fpp, 'fileproc *') + if int(fproc) != 0: + fglob = dereference(fproc).f_fglob + flags = "" + if (int(fglob) != 0) and (int(fglob.fg_ops.fo_type) == 1): + if (fdptr.fd_ofileflags[count] & 1): flags += 'E' + if (fdptr.fd_ofileflags[count] & 2): flags += 'F' + if (fdptr.fd_ofileflags[count] & 4): flags += 'R' + if (fdptr.fd_ofileflags[count] & 8): flags += 'C' + print '{0: <5d} {1: <7s}'.format(count, flags) + GetVnodeSummary(Cast(fglob.fg_data, 'vnode *')) + count += 1 + fpptr = kern.GetValueFromAddress(int(fpptr) + kern.ptrsize,'fileproc *') + +@lldb_command('showallprocvnodes') +def ShowAllProcVnodes(cmd_args=None): + """ Routine to print out all the open fds which are vnodes + """ + + procptr = Cast(kern.globals.allproc.lh_first, 'proc *') + while procptr and int(procptr) != 0: + print '{: 2 and result.find('VREG') != -1 and len(result.splitlines()[2].split()) > 5: + return True + else: + return False + +# Macro: showallmtx +@lldb_type_summary(['_lck_grp_ *']) +def GetMutexEntry(mtxg): + """ Summarize a mutex group entry with important information. + params: + mtxg: value - obj representing a mutex group in kernel + returns: + out_string - summary of the mutex group + """ + out_string = "" + + if kern.ptrsize == 8: + format_string = '{0:#018x} {1:10d} {2:10d} {3:10d} {4:10d} {5: <30s} ' + else: + format_string = '{0:#010x} {1:10d} {2:10d} {3:10d} {4:10d} {5: <30s} ' + + if mtxg.lck_grp_mtxcnt: + out_string += format_string.format(mtxg, mtxg.lck_grp_mtxcnt,mtxg.lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_util_cnt, + mtxg.lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_miss_cnt, + mtxg.lck_grp_stat.lck_grp_mtx_stat.lck_grp_mtx_wait_cnt, mtxg.lck_grp_name) + return out_string + +@lldb_command('showallmtx') +def ShowAllMtx(cmd_args=None): + """ Routine to print a summary listing of all mutexes + """ + + if kern.ptrsize == 8: + hdr_format = '{:<18s} {:>10s} {:>10s} {:>10s} {:>10s} {:<30s} ' + else: + hdr_format = '{:<10s} {:>10s} {:>10s} {:>10s} {:>10s} {:<30s} ' + + print hdr_format.format('LCK GROUP', 'CNT', 'UTIL', 'MISS', 'WAIT', 'NAME') + + mtxgrp_queue_head = kern.globals.lck_grp_queue + mtxgrp_ptr_type = GetType('_lck_grp_ *') + + for mtxgrp_ptr in IterateQueue(mtxgrp_queue_head, mtxgrp_ptr_type, "lck_grp_link"): + print GetMutexEntry(mtxgrp_ptr) + return +# EndMacro: showallmtx + +# Macro: showallrwlck +@lldb_type_summary(['_lck_grp_ *']) +def GetRWLEntry(rwlg): + """ Summarize a reader writer lock group with important information. + params: + rwlg: value - obj representing a reader writer lock group in kernel + returns: + out_string - summary of the reader writer lock group + """ + out_string = "" + + if kern.ptrsize == 8: + format_string = '{0:#018x} {1:10d} {2:10d} {3:10d} {4:10d} {5: <30s} ' + else: + format_string = '{0:#010x} {1:10d} {2:10d} {3:10d} {4:10d} {5: <30s} ' + + if rwlg.lck_grp_rwcnt: + out_string += format_string.format(rwlg, rwlg.lck_grp_rwcnt,rwlg.lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_util_cnt, + rwlg.lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_miss_cnt, + rwlg.lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt, rwlg.lck_grp_name) + return out_string + +@lldb_command('showallrwlck') +def ShowAllRWLck(cmd_args=None): + """ Routine to print a summary listing of all read/writer locks + """ + if kern.ptrsize == 8: + hdr_format = '{:<18s} {:>10s} {:>10s} {:>10s} {:>10s} {:<30s} ' + else: + hdr_format = '{:<10s} {:>10s} {:>10s} {:>10s} {:>10s} {:<30s} ' + + print hdr_format.format('LCK GROUP', 'CNT', 'UTIL', 'MISS', 'WAIT', 'NAME') + + rwlgrp_queue_head = kern.globals.lck_grp_queue + rwlgrp_ptr_type = GetType('_lck_grp_ *') + for rwlgrp_ptr in IterateQueue(rwlgrp_queue_head, rwlgrp_ptr_type, "lck_grp_link"): + print GetRWLEntry(rwlgrp_ptr) + return +# EndMacro: showallrwlck + +#Macro: showbootermemorymap +@lldb_command('showbootermemorymap') +def ShowBooterMemoryMap(cmd_args=None): + """ Prints out the phys memory map from kernelBootArgs + Supported only on x86_64 + """ + if kern.arch == 'x86_64': + voffset = unsigned(0xFFFFFF8000000000) + else: + print "showbootermemorymap not supported on this architecture" + return + + out_string = "" + + # Memory type map + memtype_dict = { + 0: 'Reserved', + 1: 'LoaderCode', + 2: 'LoaderData', + 3: 'BS_code', + 4: 'BS_data', + 5: 'RT_code', + 6: 'RT_data', + 7: 'Convention', + 8: 'Unusable', + 9: 'ACPI_recl', + 10: 'ACPI_NVS', + 11: 'MemMapIO', + 12: 'MemPortIO', + 13: 'PAL_code' + } + + boot_args = kern.globals.kernelBootArgs + msize = boot_args.MemoryMapDescriptorSize + mcount = (boot_args.MemoryMapSize) / unsigned(msize) + + out_string += "{0: <12s} {1: <19s} {2: <19s} {3: <19s} {4: <10s}\n".format("Type", "Physical Start", "Number of Pages", "Virtual Start", "Attributes") + + i = 0 + while i < mcount: + mptr = kern.GetValueFromAddress(unsigned(boot_args.MemoryMap) + voffset + unsigned(i*msize), 'EfiMemoryRange *') + mtype = unsigned(mptr.Type) + if mtype in memtype_dict: + out_string += "{0: <12s}".format(memtype_dict[mtype]) + else: + out_string += "{0: <12s}".format("UNKNOWN") + + if mptr.VirtualStart == 0: + out_string += "{0: #019x} {1: #019x} {2: <19s} {3: #019x}\n".format(mptr.PhysicalStart, mptr.NumberOfPages, ' '*19, mptr.Attribute) + else: + out_string += "{0: #019x} {1: #019x} {2: #019x} {3: #019x}\n".format(mptr.PhysicalStart, mptr.NumberOfPages, mptr.VirtualStart, mptr.Attribute) + i = i + 1 + + print out_string +#EndMacro: showbootermemorymap + diff --git a/tools/lldbmacros/misc.py b/tools/lldbmacros/misc.py new file mode 100644 index 000000000..b9ccb240a --- /dev/null +++ b/tools/lldbmacros/misc.py @@ -0,0 +1,341 @@ +""" +Miscellaneous (Intel) platform-specific commands. +""" + +from xnu import * +import xnudefines + +@lldb_command('showmcastate') +def showMCAstate(cmd_args=None): + """ + Print machine-check register state after MC exception. + """ + if kern.arch != 'x86_64': + print "Not available for current architecture." + return + + present = ["not present", "present"] + print 'MCA {:s}, control MSR {:s}, threshold status {:s}'.format( + present[int(kern.globals.mca_MCA_present)], + present[int(kern.globals.mca_control_MSR_present)], + present[int(kern.globals.mca_threshold_status_present)]) + print '{:d} error banks, family code {:#0x}, machine-check dump state: {:d}'.format( + kern.globals.mca_error_bank_count, + kern.globals.mca_dump_state, + kern.globals.mca_family) + cpu = 0 + while kern.globals.cpu_data_ptr[cpu]: + cd = kern.globals.cpu_data_ptr[cpu] + mc = cd.cpu_mca_state + if mc: + print 'CPU {:d}: mca_mcg_ctl: {:#018x} mca_mcg_status {:#018x}'.format(cpu, mc.mca_mcg_ctl, mc.mca_mcg_status.u64) + hdr = '{:<4s} {:<18s} {:<18s} {:<18s} {:<18s}' + val = '{:>3d}: {:#018x} {:#018x} {:#018x} {:#018x}' + print hdr.format('bank', + 'mca_mci_ctl', + 'mca_mci_status', + 'mca_mci_addr', + 'mca_mci_misc') + for i in range(int(kern.globals.mca_error_bank_count)): + bank = mc.mca_error_bank[i] + print val.format(i, + bank.mca_mci_ctl, + bank.mca_mci_status.u64, + bank.mca_mci_addr, + bank.mca_mci_misc) + print 'register state:' + reg = cd.cpu_desc_index.cdi_ktss.ist1 - sizeof('x86_saved_state_t') + print lldb_run_command('p/x *(x86_saved_state_t *) ' + hex(reg)) + cpu = cpu + 1 + +def dumpTimerList(anchor): + """ + Utility function to dump the timer entries in list (anchor). + """ + entry = Cast(anchor.head, 'queue_t') + if entry == addressof(anchor): + print '(empty)' + return + + thdr = ' {:<22s}{:<17s}{:<16s} {:<14s} {:<18s}' + print thdr.format('entry:','deadline','soft_deadline','to go','(*func)(param0,param1') + while entry != addressof(anchor): + timer_call = Cast(entry, 'timer_call_t') + call_entry = Cast(entry, 'struct call_entry *') + debugger_entry = kern.globals.debugger_entry_time + if (debugger_entry < call_entry.deadline): + delta_sign = ' ' + timer_fire = call_entry.deadline - debugger_entry + else: + delta_sign = '-' + timer_fire = debugger_entry - call_entry.deadline + tval = ' {:#018x}: {:16d} {:16d} {:s}{:3d}.{:09d} ({:#018x})({:#018x},{:#018x})' + print tval.format(entry, + call_entry.deadline, + timer_call.soft_deadline, + delta_sign, + timer_fire/1000000000, + timer_fire%1000000000, + call_entry.func, + call_entry.param0, + call_entry.param1) + entry = entry.next + +@lldb_command('longtermtimers') +def longtermTimers(cmd_args=None): + """ + Print details of long-term timers and stats. + """ + if kern.arch != 'x86_64': + print "Not available for current architecture." + return + + lt = kern.globals.timer_longterm + ltt = lt.threshold + EndofAllTime = -1 + if ltt.interval == EndofAllTime: + print "Longterm timers disabled" + return + + if lt.escalates > 0: + ratio = lt.enqueues / lt.escalates + else: + ratio = lt.enqueues + print 'Longterm timer object: {:#018x}'.format(addressof(lt)) + print ' queue count : {:d}' .format(lt.queue.count) + print ' number of enqueues : {:d}' .format(lt.enqueues) + print ' number of dequeues : {:d}' .format(lt.dequeues) + print ' number of escalates : {:d}' .format(lt.escalates) + print ' enqueues/escalates : {:d}' .format(ratio) + print ' threshold.interval : {:d}' .format(ltt.interval) + print ' threshold.margin : {:d}' .format(ltt.margin) + print ' scan_time : {:d}' .format(lt.scan_time) + if ltt.preempted == EndofAllTime: + print ' threshold.preempted : None' + else: + print ' threshold.preempted : {:d}' .format(ltt.preempted) + if ltt.deadline == EndofAllTime: + print ' threshold.deadline : None' + else: + print ' threshold.deadline : {:d}' .format(ltt.deadline) + print ' threshold.call : {:#018x}'.format(ltt.call) + print ' actual deadline set : {:d}' .format(ltt.deadline_set) + print ' threshold.scans : {:d}' .format(ltt.scans) + print ' threshold.preempts : {:d}' .format(ltt.preempts) + print ' threshold.latency : {:d}' .format(ltt.latency) + print ' - min : {:d}' .format(ltt.latency_min) + print ' - max : {:d}' .format(ltt.latency_max) + dumpTimerList(lt.queue) + + +@lldb_command('processortimers') +def processorTimers(cmd_args=None): + """ + Print details of processor timers, noting anything suspicious + Also include long-term timer details + """ + hdr = '{:<32s}{:<18s} {:<18s} {:<18s}' + print hdr.format('Processor','Last dispatch','Next deadline','difference') + p = kern.globals.processor_list + while p: + cpu = p.cpu_id + rt_timer = kern.globals.cpu_data_ptr[cpu].rtclock_timer + diff = p.last_dispatch - rt_timer.deadline + tmr = 'Processor {:d}: {:#018x} {:#018x} {:#018x} {:#018x} {:s}' + print tmr.format(cpu, + p, + p.last_dispatch, + rt_timer.deadline, + diff, + ['probably BAD', '(ok)'][int(diff < 0)]) + if kern.arch == 'x86_64': + print 'Next deadline set at: {:#018x}. Timer call list:'.format(rt_timer.when_set) + dumpTimerList(rt_timer.queue) + p = p.processor_list + longtermTimers() + + +@lldb_command('showtimerwakeupstats') +def showTimerWakeupStats(cmd_args=None): + """ + Displays interrupt and platform idle wakeup frequencies + associated with each thread, timer time-to-deadline frequencies, and + CPU time with user/system break down where applicable, with thread tags. + """ + for task in kern.tasks: + proc = Cast(task.bsd_info, 'proc_t') + print dereference(task) + print '{:d}({:s}), terminated thread timer wakeups: {:d} {:d} 2ms: {:d} 5ms: {:d} UT: {:d} ST: {:d}'.format( + proc.p_pid, + proc.p_comm, +# Commented-out references below to be addressed by rdar://13009660. + 0, #task.task_interrupt_wakeups, + 0, #task.task_platform_idle_wakeups, + task.task_timer_wakeups_bin_1, + task.task_timer_wakeups_bin_2, + task.total_user_time, + task.total_system_time) + tot_wakes = 0 #task.task_interrupt_wakeups + tot_platform_wakes = 0 #task.task_platform_idle_wakeups + for thread in IterateQueue(task.threads, 'thread_t', 'task_threads'): +# if thread.thread_interrupt_wakeups == 0: +# continue + print '\tThread ID 0x{:x}, Tag 0x{:x}, timer wakeups: {:d} {:d} {:d} {:d} <2ms: {:d}, <5ms: {:d} UT: {:d} ST: {:d}'.format( + thread.thread_id, + thread.thread_tag, + 0, #thread.thread_interrupt_wakeups, + 0, #thread.thread_platform_idle_wakeups, + 0, #thread.thread_callout_interrupt_wakeups, + 0, #thread.thread_callout_platform_idle_wakeups, + 0,0,0,0, + thread.thread_timer_wakeups_bin_1, + thread.thread_timer_wakeups_bin_2, + thread.user_timer.all_bits, + thread.system_timer.all_bits) + tot_wakes += 0 #thread.thread_interrupt_wakeups + tot_platform_wakes += 0 #thread.thread_platform_idle_wakeups + print 'Task total wakeups: {:d} {:d}'.format( + tot_wakes, tot_platform_wakes) + +def DoReadMsr64(msr_address, lcpu): + """ Read a 64-bit MSR from the specified CPU + Params: + msr_address: int - MSR index to read from + lcpu: int - CPU identifier + Returns: + 64-bit value read from the MSR + """ + result = 0xbad10ad + + if "kdp" != GetConnectionProtocol(): + print "Target is not connected over kdp. Cannot read MSR." + return result + + input_address = unsigned(addressof(kern.globals.manual_pkt.input)) + len_address = unsigned(addressof(kern.globals.manual_pkt.len)) + data_address = unsigned(addressof(kern.globals.manual_pkt.data)) + if not WriteInt32ToMemoryAddress(0, input_address): + print "DoReadMsr64() failed to write 0 to input_address" + return result + + kdp_pkt_size = GetType('kdp_readmsr64_req_t').GetByteSize() + if not WriteInt32ToMemoryAddress(kdp_pkt_size, len_address): + print "DoReadMsr64() failed to write kdp_pkt_size" + return result + + kgm_pkt = kern.GetValueFromAddress(data_address, 'kdp_readmsr64_req_t *') + header_value = GetKDPPacketHeaderInt( + request=GetEnumValue('kdp_req_t::KDP_READMSR64'), + length=kdp_pkt_size) + + if not WriteInt64ToMemoryAddress(header_value, int(addressof(kgm_pkt.hdr))): + print "DoReadMsr64() failed to write header_value" + return result + if not WriteInt32ToMemoryAddress(msr_address, int(addressof(kgm_pkt.address))): + print "DoReadMsr64() failed to write msr_address" + return result + if not WriteInt16ToMemoryAddress(lcpu, int(addressof(kgm_pkt.lcpu))): + print "DoReadMsr64() failed to write lcpu" + return result + if not WriteInt32ToMemoryAddress(1, input_address): + print "DoReadMsr64() failed to write to input_address" + return result + + result_pkt = Cast(addressof(kern.globals.manual_pkt.data), + 'kdp_readmsr64_reply_t *') + if (result_pkt.error == 0): + result = dereference(Cast(addressof(result_pkt.data), 'uint64_t *')) + else: + print "DoReadMsr64() result_pkt.error != 0" + return result + +def DoWriteMsr64(msr_address, lcpu, data): + """ Write a 64-bit MSR + Params: + msr_address: int - MSR index to write to + lcpu: int - CPU identifier + data: int - value to write + Returns: + True upon success, False if error + """ + if "kdp" != GetConnectionProtocol(): + print "Target is not connected over kdp. Cannot write MSR." + return False + + input_address = unsigned(addressof(kern.globals.manual_pkt.input)) + len_address = unsigned(addressof(kern.globals.manual_pkt.len)) + data_address = unsigned(addressof(kern.globals.manual_pkt.data)) + if not WriteInt32ToMemoryAddress(0, input_address): + print "DoWriteMsr64() failed to write 0 to input_address" + return False + + kdp_pkt_size = GetType('kdp_writemsr64_req_t').GetByteSize() + if not WriteInt32ToMemoryAddress(kdp_pkt_size, len_address): + print "DoWriteMsr64() failed to kdp_pkt_size" + return False + + kgm_pkt = kern.GetValueFromAddress(data_address, 'kdp_writemsr64_req_t *') + header_value = GetKDPPacketHeaderInt( + request=GetEnumValue('kdp_req_t::KDP_WRITEMSR64'), + length=kdp_pkt_size) + + if not WriteInt64ToMemoryAddress(header_value, int(addressof(kgm_pkt.hdr))): + print "DoWriteMsr64() failed to write header_value" + return False + if not WriteInt32ToMemoryAddress(msr_address, int(addressof(kgm_pkt.address))): + print "DoWriteMsr64() failed to write msr_address" + return False + if not WriteInt16ToMemoryAddress(lcpu, int(addressof(kgm_pkt.lcpu))): + print "DoWriteMsr64() failed to write lcpu" + return False + if not WriteInt64ToMemoryAddress(data, int(addressof(kgm_pkt.data))): + print "DoWriteMsr64() failed to write data" + return False + if not WriteInt32ToMemoryAddress(1, input_address): + print "DoWriteMsr64() failed to write to input_address" + return False + + result_pkt = Cast(addressof(kern.globals.manual_pkt.data), + 'kdp_writemsr64_reply_t *') + if not result_pkt.error == 0: + print "DoWriteMsr64() error received in reply packet" + return False + + return True + +@lldb_command('readmsr64') +def ReadMsr64(cmd_args=None): + """ Read the specified MSR. The CPU can be optionally specified + Syntax: readmsr64 [lcpu] + """ + if cmd_args == None or len(cmd_args) < 1: + print ReadMsr64.__doc__ + return + + msr_address = ArgumentStringToInt(cmd_args[0]) + if len(cmd_args) > 1: + lcpu = ArgumentStringToInt(cmd_args[1]) + else: + lcpu = int(xnudefines.lcpu_self) + + msr_value = DoReadMsr64(msr_address, lcpu) + print "MSR[{:x}]: {:#016x}".format(msr_address, msr_value) + +@lldb_command('writemsr64') +def WriteMsr64(cmd_args=None): + """ Write the specified MSR. The CPU can be optionally specified + Syntax: writemsr64 [lcpu] + """ + if cmd_args == None or len(cmd_args) < 2: + print WriteMsr64.__doc__ + return + msr_address = ArgumentStringToInt(cmd_args[0]) + write_val = ArgumentStringToInt(cmd_args[1]) + if len(cmd_args) > 2: + lcpu = ArgumentStringToInt(cmd_args[2]) + else: + lcpu = xnudefines.lcpu_self + + if not DoWriteMsr64(msr_address, lcpu, write_val): + print "writemsr64 FAILED" + diff --git a/tools/lldbmacros/net.py b/tools/lldbmacros/net.py new file mode 100644 index 000000000..86c5303a8 --- /dev/null +++ b/tools/lldbmacros/net.py @@ -0,0 +1,1890 @@ + +""" Please make sure you read the README COMPLETELY BEFORE reading anything below. + It is very critical that you read coding guidelines in Section E in README file. +""" + +from xnu import * +from utils import * +from string import * +from socket import * + +import xnudefines +from netdefines import * +from routedefines import * + +def GetIfFlagsAsString(if_flags): + """ Return a formatted string description of the interface flags + """ + out_string = "" + flags = (unsigned)(if_flags & 0xffff) + i = 0 + num = 1 + while num <= flags: + if flags & num: + out_string += if_flags_strings[i] + "," + i += 1 + num = num << 1 + return rstrip(out_string, ",") + + +def ShowIfConfiguration(ifnet): + """ Display ifconfig-like output for the ifnet + """ + iface = Cast(ifnet, 'ifnet *') + out_string = "" + format_string = "{0: index {3: 0: + showall = 1 + else: + showall = 0 + + ifnets = kern.globals.ifnet_head + for ifnet in IterateTAILQ_HEAD(ifnets, "if_link"): + ShowIfConfiguration(ifnet) + if (showall == 1): + print GetIfaddrs(ifnet) +# EndMacro: ifconfig + +def GetAddressAsStringColonHex(addr, count): + out_string = "" + i = 0 + addr_format_string = "{0:02x}" + while (i < count): + if (i == 0): + out_string += addr_format_string.format(addr[i])[-2:] + else: + out_string += ":" + addr_format_string.format(addr[i])[-2:] + i += 1 + return out_string + +def GetSocketAddrAsStringUnspec(sockaddr): + out_string = "" + out_string += GetAddressAsStringColonHex(sockaddr.sa_data, sockaddr.sa_len - 2) + return out_string + +def GetSocketAddrAsStringUnix(sockaddr): + sock_unix = Cast(sockaddr, 'sockaddr_un *') + if (sock_unix == 0): + return "(null)" + else: + if (len(str(sock_unix.sun_path)) > 0): + return str(sock_unix.sun_path) + else: + return "\"\"" + +def GetInAddrAsString(ia): + out_string = "" + inaddr = Cast(ia, 'in_addr *') + + packed_value = struct.pack('I', unsigned(ia.s_addr)) + out_string = inet_ntoa(packed_value) + return out_string + +def GetIn6AddrAsString(ia): + out_string = "" + addr = ia + + addr_format_string = "{0:02x}:{1:02x}:{2:02x}:{3:02x}{4:02x}:{5:02x}:{6:02x}:{7:02x}{8:02x}:{9:02x}:{10:02x}:{11:02x}{12:02x}:{13:02x}:{14:02x}:{15:02x}" + out_string += addr_format_string.format(unsigned(addr[0]), unsigned(addr[1]), unsigned(addr[2]), unsigned(addr[3]), unsigned(addr[4]), unsigned(addr[5]), unsigned(addr[6]), unsigned(addr[7]), unsigned(addr[8]), unsigned(addr[9]), unsigned(addr[10]), unsigned(addr[11]), unsigned(addr[12]), unsigned(addr[13]), unsigned(addr[14]), unsigned(addr[15])) + return out_string + +def GetSocketAddrAsStringInet(sockaddr): + sock_in = Cast(sockaddr, 'sockaddr_in *') + return GetInAddrAsString(sock_in.sin_addr) + +def GetSocketAddrAsStringInet6(sockaddr): + sock_in6 = Cast(sockaddr, 'sockaddr_in6 *') + return GetIn6AddrAsString(sock_in6.sin6_addr.__u6_addr.__u6_addr8) + +def GetSocketAddrAsStringLink(sockaddr): + sock_link = Cast(sockaddr, 'sockaddr_dl *') + if sock_link is None: + return "(null)" + else: + out_string = "" + if (sock_link.sdl_nlen == 0 and sock_link.sdl_alen == 0 and sock_link.sdl_slen == 0): + out_string = "link#" + str(int(sock_link.sdl_index)) + else: + out_string += GetAddressAsStringColonHex(addressof(sock_link.sdl_data[sock_link.sdl_nlen]), sock_link.sdl_alen) + return out_string + +def GetSocketAddrAsStringAT(sockaddr): + out_string = "" + sock_addr = Cast(sockaddr, 'sockaddr *') + out_string += GetAddressAsStringColonHex(sockaddr.sa_data, sockaddr.sa_len - 2) + return out_string + +def GetSocketAddrAsString(sockaddr): + if sockaddr is None : + return "(null)" + out_string = "" + if (sockaddr.sa_family == 0): + out_string += "UNSPC " + GetSocketAddrAsStringUnspec(sockaddr) + elif (sockaddr.sa_family == 1): + out_string += "UNIX " + out_string += GetSocketAddrAsStringUnix(sockaddr) + elif (sockaddr.sa_family == 2): + out_string += "INET " + out_string += GetSocketAddrAsStringInet(sockaddr) + elif (sockaddr.sa_family == 30): + out_string += "INET6 " + out_string += GetSocketAddrAsStringInet6(sockaddr) + elif (sockaddr.sa_family == 18): + out_string += "LINK " + out_string += GetSocketAddrAsStringLink(sockaddr) + elif (sockaddr.sa_family == 16): + out_string += "ATLK " + out_string += GetSocketAddrAsStringAT(sockaddr) + else: + out_string += "FAM " + str(sockaddr.sa_family) + out_string += GetAddressAsStringColonHex(sockaddr.sa_data, sockaddr.sa_len) + return out_string + +# Macro: showifaddrs +@lldb_command('showifaddrs') +def ShowIfaddrs(cmd_args=None): + """ Show the (struct ifnet).if_addrhead list of addresses for the given ifp + """ + if cmd_args != None and len(cmd_args) > 0 : + ifp = kern.GetValueFromAddress(cmd_args[0], 'ifnet *') + if not ifp: + print "Unknown value passed as argument." + return + i = 1 + for ifaddr in IterateTAILQ_HEAD(ifp.if_addrhead, "ifa_link"): + format_string = "\t{0: 0 : + showall = 1 + dlil_ifnets = kern.globals.dlil_ifnet_head + for dlil_ifnet in IterateTAILQ_HEAD(dlil_ifnets, "dl_if_link"): + ShowDlilIfnetConfiguration(dlil_ifnet, showall) +# EndMacro: showifnets + +# Macro: showifmultiaddrs +@lldb_command('showifmultiaddrs') +def ShowIfMultiAddrs(cmd_args=None) : + """ Show the list of multicast addresses for the given ifp + """ + out_string = "" + if cmd_args != None and len(cmd_args) > 0 : + ifp = kern.GetValueFromAddress(cmd_args[0], 'ifnet *') + if not ifp: + print "Unknown value passed as argument." + return + ifmulti = cast(ifp.if_multiaddrs.lh_first, 'ifmultiaddr *') + i = 0 + while ifmulti != 0: + ifma_format_string = "\t{0: > 8) == port_string[0])) and (((port_unsigned & 0x00ff) == port_string[1])): + out_string += ":" + str(int(port_unsigned)) + else: + out_string += ":" + str(int(((port_unsigned & 0xff00) >> 8) | ((port_unsigned & 0x00ff) << 8))) + + return out_string + +def GetIPv4SocketAsString(sock) : + out_string = "" + pcb = Cast(sock.so_pcb, 'inpcb *') + if (pcb == 0): + out_string += "inpcb: (null) " + else: + out_string += "inpcb: " + hex(pcb) + out_string += GetSocketProtocolAsString(sock) + + out_string += GetInAddr4to6AsString(addressof(pcb.inp_dependladdr.inp46_local)) + out_string += GetInPortAsString(addressof(pcb.inp_lport)) + out_string += " -> " + out_string += GetInAddr4to6AsString(addressof(pcb.inp_dependfaddr.inp46_foreign)) + out_string += GetInPortAsString(addressof(pcb.inp_fport)) + return out_string + +def GetIPv6SocketAsString(sock) : + out_string = "" + pcb = Cast(sock.so_pcb, 'inpcb *') + if (pcb == 0): + out_string += "inpcb: (null) " + else: + out_string += "inpcb: " + hex(pcb) + " " + out_string += GetSocketProtocolAsString(sock) + + out_string += GetIn6AddrAsString((pcb.inp_dependladdr.inp6_local.__u6_addr.__u6_addr8)) + out_string += GetInPortAsString(addressof(pcb.inp_lport)) + out_string += " -> " + out_string += GetIn6AddrAsString((pcb.inp_dependfaddr.inp6_foreign.__u6_addr.__u6_addr8)) + out_string += GetInPortAsString(addressof(pcb.inp_fport)) + return out_string + +def GetUnixDomainSocketAsString(sock) : + out_string = "" + pcb = Cast(sock.so_pcb, 'unpcb *') + if (pcb == 0): + out_string += "unpcb: (null) " + else: + out_string += "unpcb: " + hex(pcb) + " " + out_string += "unp_vnode: " + hex(pcb.unp_vnode) + " " + out_string += "unp_conn: " + hex(pcb.unp_conn) + " " + out_string += "unp_addr: " + GetSocketAddrAsStringUnix(pcb.unp_addr) + return out_string + +def GetSocket(socket) : + """ Show the contents of a socket + """ + so = kern.GetValueFromAddress(unsigned(socket), 'socket *') + if (so): + out_string = "" + sock_format_string = "so: 0x{0: 0): + out_string = "" + sock_format_string = "so: 0x{0: 0 : + proc = kern.GetValueFromAddress(cmd_args[0], 'proc *') + proc_fd = proc.p_fd + + if not proc: + print "Unknown value passed as argument." + return + else: + count = 0 + fpp = Cast(proc_fd.fd_ofiles, 'fileproc **') + while (count < proc_fd.fd_nfiles): + fp = Cast(dereference(fpp), 'fileproc *') + if (fp != 0): + fg = Cast(fp.f_fglob, 'fileglob *') + if (int(fg.fg_ops.fo_type) == 2): + if (proc_fd.fd_ofileflags[count] & 4): + out_string += "U: " + else: + out_string += " " + out_string += "fd = " + str(count) + " " + if (fg.fg_data != 0): + out_string += GetSocket(unsigned(fg.fg_data)) + out_string += "\n" + else: + out_string += "" + fpp = kern.GetValueFromAddress(unsigned(fpp + 8), 'fileproc **') + count += 1 + print out_string + else: + print "Missing argument 0 in user function." +# EndMacro: showprocsockets + +def GetProcSockets(proc): + """ Given a proc_t pointer, display information about its sockets + """ + out_string = "" + proc_fd = proc.p_fd + + if proc is None: + out_string += "Unknown value passed as argument." + else: + count = 0 + fpp = Cast(proc_fd.fd_ofiles, 'fileproc **') + while (count < proc_fd.fd_nfiles): + fp = Cast(dereference(fpp), 'fileproc *') + if (fp != 0): + fg = Cast(fp.f_fglob, 'fileglob *') + if (int(fg.fg_ops.fo_type) == 2): + if (proc_fd.fd_ofileflags[count] & 4): + out_string += "U: " + else: + out_string += " " + out_string += "fd = " + str(count) + " " + if (fg.fg_data != 0): + out_string += GetSocket(unsigned(fg.fg_data)) + out_string += "\n" + else: + out_string += "" + fpp = kern.GetValueFromAddress(unsigned(fpp + 8), 'fileproc **') + count += 1 + return out_string + + +# Macro: showallprocsockets +@lldb_command('showallprocsockets') +def ShowAllProcSockets(cmd_args=None): + """Display information about the sockets of all the processes + """ + for proc in kern.procs: + print "================================================================================" + print GetProcInfo(proc) + print GetProcSockets(proc) +# EndMacro: showallprocsockets + + +def GetRtEntryPrDetailsAsString(rte): + out_string = "" + rt = Cast(rte, 'rtentry *') + dst = Cast(rt.rt_nodes[0].rn_u.rn_leaf.rn_Key, 'sockaddr *') + isv6 = 0 + dst_string_format = "{0:<18s}" + if (dst.sa_family == AF_INET): + out_string += dst_string_format.format(GetSocketAddrAsStringInet(dst)) + " " + else: + if (dst.sa_family == AF_INET6): + out_string += dst_string_format.format(GetSocketAddrAsStringInet6(dst)) + " " + isv6 = 1 + else: + if (dst.sa_family == AF_LINK): + out_string += dst_string_format.format(GetSocketAddrAsStringLink(dst)) + if (isv6 == 1): + out_string += " " + else: + out_string += " " + else: + out_string += dst_string_format.format(GetSocketAddrAsStringUnspec(dst)) + " " + + gw = Cast(rt.rt_gateway, 'sockaddr *') + if (gw.sa_family == AF_INET): + out_string += dst_string_format.format(GetSocketAddrAsStringInet(gw)) + " " + else: + if (gw.sa_family == 30): + out_string += dst_string_format.format(GetSocketAddrAsStringInet6(gw)) + " " + isv6 = 1 + else: + if (gw.sa_family == 18): + out_string += dst_string_format.format(GetSocketAddrAsStringLink(gw)) + " " + if (isv6 == 1): + out_string += " " + else: + out_string += " " + else: + dst_string_format.format(GetSocketAddrAsStringUnspec(gw)) + + if (rt.rt_flags & RTF_WASCLONED): + if (kern.ptrsize == 8): + rt_flags_string_format = "0x{0:<16x}" + out_string += rt_flags_string_format.format(rt.rt_parent) + " " + else: + rt_flags_string_format = "0x{0:<8x}" + out_string += rt_flags_string_format.format(rt.rt_parent) + " " + else: + if (kern.ptrsize == 8): + out_string += " " + else: + out_string += " " + + rt_refcnt_rmx_string_format = "{0:10d} " + out_string += rt_refcnt_rmx_string_format.format(rt.rt_refcnt, rt.rt_rmx.rmx_pksent) + " " + + rtf_string_format = "{0:>s}" + if (rt.rt_flags & RTF_UP): + out_string += rtf_string_format.format("U") + if (rt.rt_flags & RTF_GATEWAY): + out_string += rtf_string_format.format("G") + if (rt.rt_flags & RTF_HOST): + out_string += rtf_string_format.format("H") + if (rt.rt_flags & RTF_REJECT): + out_string += rtf_string_format.format("R") + if (rt.rt_flags & RTF_DYNAMIC): + out_string += rtf_string_format.format("D") + if (rt.rt_flags & RTF_MODIFIED): + out_string += rtf_string_format.format("M") + if (rt.rt_flags & RTF_CLONING): + out_string += rtf_string_format.format("C") + if (rt.rt_flags & RTF_PRCLONING): + out_string += rtf_string_format.format("c") + if (rt.rt_flags & RTF_LLINFO): + out_string += rtf_string_format.format("L") + if (rt.rt_flags & RTF_STATIC): + out_string += rtf_string_format.format("S") + if (rt.rt_flags & RTF_PROTO1): + out_string += rtf_string_format.format("1") + if (rt.rt_flags & RTF_PROTO2): + out_string += rtf_string_format.format("2") + if (rt.rt_flags & RTF_PROTO3): + out_string += rtf_string_format.format("3") + if (rt.rt_flags & RTF_WASCLONED): + out_string += rtf_string_format.format("W") + if (rt.rt_flags & RTF_BROADCAST): + out_string += rtf_string_format.format("b") + if (rt.rt_flags & RTF_MULTICAST): + out_string += rtf_string_format.format("m") + if (rt.rt_flags & RTF_XRESOLVE): + out_string += rtf_string_format.format("X") + if (rt.rt_flags & RTF_BLACKHOLE): + out_string += rtf_string_format.format("B") + if (rt.rt_flags & RTF_IFSCOPE): + out_string += rtf_string_format.format("I") + if (rt.rt_flags & RTF_CONDEMNED): + out_string += rtf_string_format.format("Z") + if (rt.rt_flags & RTF_IFREF): + out_string += rtf_string_format.format("i") + if (rt.rt_flags & RTF_PROXY): + out_string += rtf_string_format.format("Y") + if (rt.rt_flags & RTF_ROUTER): + out_string += rtf_string_format.format("r") + + out_string += "/" + out_string += str(rt.rt_ifp.if_name) + out_string += str(int(rt.rt_ifp.if_unit)) + out_string += "\n" + return out_string + + +RNF_ROOT = 2 +def GetRtTableAsString(rt_tables): + out_string = "" + rn = Cast(rt_tables.rnh_treetop, 'radix_node *') + rnh_cnt = rt_tables.rnh_cnt + + while (rn.rn_bit >= 0): + rn = rn.rn_u.rn_node.rn_L + + while 1: + base = Cast(rn, 'radix_node *') + while ((rn.rn_parent.rn_u.rn_node.rn_R == rn) and (rn.rn_flags & RNF_ROOT == 0)): + rn = rn.rn_parent + rn = rn.rn_parent.rn_u.rn_node.rn_R + while (rn.rn_bit >= 0): + rn = rn.rn_u.rn_node.rn_L + next_rn = rn + while (base != 0): + rn = base + base = rn.rn_u.rn_leaf.rn_Dupedkey + if ((rn.rn_flags & RNF_ROOT) == 0): + rt = Cast(rn, 'rtentry *') + if (kern.ptrsize == 8): + rtentry_string_format = "0x{0:<18x}" + out_string += rtentry_string_format.format(rt) + " " + else: + rtentry_string_format = "0x{0:<10x}" + out_string += rtentry_string_format.format(rt) + " " + out_string += GetRtEntryPrDetailsAsString(rt) + " " + + rn = next_rn + if ((rn.rn_flags & RNF_ROOT) != 0): + break + return out_string + +def GetRtInetAsString(): + rt_tables = kern.globals.rt_tables[2] + if (kern.ptrsize == 8): + rt_table_header_format_string = "{0:<18s} {1: <16s} {2:<20s} {3:<16s} {4:<8s} {5:<8s} {6:<8s}" + print rt_table_header_format_string.format("rtentry", " dst", "gw", "parent", "Refs", "Use", "flags/if") + print rt_table_header_format_string.format("-" * 18, "-" * 16, "-" * 16, "-" * 16, "-" * 8, "-" * 8, "-" * 8) + print GetRtTableAsString(rt_tables) + else: + rt_table_header_format_string = "{0:<8s} {1:<16s} {2:<18s} {3:<8s} {4:<8s} {5:<8s} {6:<8s}" + print rt_table_header_format_string.format("rtentry", "dst", "gw", "parent", "Refs", "Use", "flags/if") + print rt_table_header_format_string.format("-" * 8, "-" * 16, "-" * 16, "-" * 8, "-" * 8, "-" * 8, "-" * 8) + print GetRtTableAsString(rt_tables) + +def GetRtInet6AsString(): + rt_tables = kern.globals.rt_tables[30] + if (kern.ptrsize == 8): + rt_table_header_format_string = "{0:<18s} {1: <16s} {2:<20s} {3:<16s} {4:<8s} {5:<8s} {6:<8s}" + print rt_table_header_format_string.format("rtentry", " dst", "gw", "parent", "Refs", "Use", "flags/if") + print rt_table_header_format_string.format("-" * 18, "-" * 16, "-" * 16, "-" * 16, "-" * 8, "-" * 8, "-" * 8) + print GetRtTableAsString(rt_tables) + else: + rt_table_header_format_string = "{0:<8s} {1:<16s} {2:<18s} {3:<8s} {4:<8s} {5:<8s} {6:<8s}" + print rt_table_header_format_string.format("rtentry", "dst", "gw", "parent", "Refs", "Use", "flags/if") + print rt_table_header_format_string.format("-" * 8, "-" * 16, "-" * 18, "-" * 8, "-" * 8, "-" * 8, "-" * 8) + print GetRtTableAsString(rt_tables) + +# Macro: show_rt_inet +@lldb_command('show_rt_inet') +def ShowRtInet(cmd_args=None): + """ Display the IPv4 routing table + """ + print GetRtInetAsString() +# EndMacro: show_rt_inet + +# Macro: show_rt_inet6 +@lldb_command('show_rt_inet6') +def ShowRtInet6(cmd_args=None): + """ Display the IPv6 routing table + """ + print GetRtInet6AsString() +# EndMacro: show_rt_inet6 + +# Macro: rtentry_showdbg +@lldb_command('rtentry_showdbg') +def ShowRtEntryDebug(cmd_args=None): + """ Print the debug information of a route entry + """ + if (cmd_args == None or len(cmd_args) == 0): + print "Missing argument 0 in user function." + return + out_string = "" + cnt = 0 + rtd = kern.GetValueFromAddress(cmd_args[0], 'rtentry_dbg *') + rtd_summary_format_string = "{0:s} {1:d}" + out_string += rtd_summary_format_string.format("Total holds : ", rtd.rtd_refhold_cnt) + "\n" + out_string += rtd_summary_format_string.format("Total releases : ", rtd.rtd_refrele_cnt) + "\n" + + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = rtd.rtd_alloc.pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nAlloc: (thread " + hex(rtd.rtd_alloc.th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = rtd.rtd_free.pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nFree: (thread " + hex(rtd.rtd_free.th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + + while (cnt < RTD_TRACE_HIST_SIZE): + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = rtd.rtd_refhold[cnt].pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nHold [" + str(int(cnt)) + "] (thread " + hex(rtd.rtd_refhold[cnt].th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + cnt += 1 + + cnt = 0 + while (cnt < RTD_TRACE_HIST_SIZE): + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = rtd.rtd_refrele[cnt].pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nRelease [" + str(int(cnt)) + "] (thread " + hex(rtd.rtd_refrele[cnt].th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + cnt += 1 + + out_string += "\nTotal locks : " + str(int(rtd.rtd_lock_cnt)) + out_string += "\nTotal unlocks : " + str(int(rtd.rtd_unlock_cnt)) + + cnt = 0 + while (cnt < RTD_TRACE_HIST_SIZE): + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = rtd.rtd_lock[cnt].pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nLock [" + str(int(cnt)) + "] (thread " + hex(rtd.rtd_lock[cnt].th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + cnt += 1 + + cnt = 0 + while (cnt < RTD_TRACE_HIST_SIZE): + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = rtd.rtd_unlock[cnt].pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nUnlock [" + str(int(cnt)) + "] (thread " + hex(rtd.rtd_unlock[cnt].th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + cnt += 1 + + print out_string +# EndMacro: rtentry_showdbg + +# Macro: inifa_showdbg +@lldb_command('inifa_showdbg') +def InIfaShowDebug(cmd_args=None): + """ Print the debug information of an IPv4 interface address + """ + if (cmd_args == None or len(cmd_args) == 0): + print "Missing argument 0 in user function." + return + out_string = "" + cnt = 0 + inifa = kern.GetValueFromAddress(cmd_args[0], 'in_ifaddr_dbg *') + in_ifaddr_summary_format_string = "{0:s} {1:d}" + out_string += in_ifaddr_summary_format_string.format("Total holds : ", inifa.inifa_refhold_cnt) + "\n" + out_string += in_ifaddr_summary_format_string.format("Total releases : ", inifa.inifa_refrele_cnt) + "\n" + + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = inifa.inifa_alloc.pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nAlloc: (thread " + hex(inifa.inifa_alloc.th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = inifa.inifa_free.pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nFree: (thread " + hex(inifa.inifa_free.th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + + while (cnt < INIFA_TRACE_HIST_SIZE): + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = inifa.inifa_refhold[cnt].pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nHold [" + str(int(cnt)) + "] (thread " + hex(inifa.inifa_refhold[cnt].th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + cnt += 1 + cnt = 0 + + while (cnt < INIFA_TRACE_HIST_SIZE): + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = inifa.inifa_refrele[cnt].pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nRelease [" + str(int(cnt)) + "] (thread " + hex(inifa.inifa_refrele[cnt].th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + cnt += 1 + print out_string +# EndMacro: inifa_showdbg + +# Macro: in6ifa_showdbg +@lldb_command('in6ifa_showdbg') +def In6IfaShowDebug(cmd_args=None): + """ Print the debug information of an IPv6 interface address + """ + if (cmd_args == None or len(cmd_args) == 0): + print "Missing argument 0 in user function." + return + out_string = "" + cnt = 0 + in6ifa = kern.GetValueFromAddress(cmd_args[0], 'in6_ifaddr_dbg *') + in6_ifaddr_summary_format_string = "{0:s} {1:d}" + print in6_ifaddr_summary_format_string.format("Total holds : ", in6ifa.in6ifa_refhold_cnt) + print in6_ifaddr_summary_format_string.format("Total releases : ", in6ifa.in6ifa_refrele_cnt) + + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = in6ifa.in6ifa_alloc.pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nAlloc: (thread " + hex(in6ifa.in6ifa_alloc.th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = in6ifa.in6ifa_free.pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nFree: (thread " + hex(in6ifa.in6ifa_free.th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + + while (cnt < IN6IFA_TRACE_HIST_SIZE): + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = in6ifa.in6ifa_refhold[cnt].pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nHold [" + str(int(cnt)) + "] (thread " + hex(in6ifa.in6ifa_refhold[cnt].th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + cnt += 1 + cnt = 0 + + while (cnt < IN6IFA_TRACE_HIST_SIZE): + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = in6ifa.in6ifa_refrele[cnt].pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nRelease [" + str(int(cnt)) + "] (thread " + hex(in6ifa.in6ifa_refrele[cnt].th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + cnt += 1 + print out_string +# EndMacro: in6ifa_showdbg + +# Macro: inm_showdbg +@lldb_command('inm_showdbg') +def InmShowDebug(cmd_args=None): + """ Print the debug information of an IPv4 multicast address + """ + if (cmd_args == None or len(cmd_args) == 0): + print "Missing argument 0 in user function." + return + out_string = "" + cnt = 0 + inm = kern.GetValueFromAddress(cmd_args[0], 'in_multi_dbg *') + in_multi_summary_format_string = "{0:s} {1:d}" + out_string += in_multi_summary_format_string.format("Total holds : ", inm.inm_refhold_cnt) + out_string += in_multi_summary_format_string.format("Total releases : ", inm.inm_refrele_cnt) + + while (cnt < INM_TRACE_HIST_SIZE): + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = inm.inm_refhold[cnt].pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nHold [" + str(int(cnt)) + "] (thread " + hex(inm.inm_refhold[cnt].th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + cnt += 1 + cnt = 0 + while (cnt < INM_TRACE_HIST_SIZE): + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = inm.inm_refrele[cnt].pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nRelease [" + str(int(cnt)) + "] (thread " + hex(inm.inm_refrele[cnt].th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + cnt += 1 + print out_string +# EndMacro: inm_showdbg + +# Macro: ifma_showdbg +@lldb_command('ifma_showdbg') +def IfmaShowDebug(cmd_args=None): + """ Print the debug information of a link multicast address + """ + if (cmd_args == None or len(cmd_args) == 0): + print "Missing argument 0 in user function." + return + out_string = "" + cnt = 0 + ifma = kern.GetValueFromAddress(cmd_args[0], 'ifmultiaddr_dbg *') + link_multi_summary_format_string = "{0:s} {1:d}" + out_string += link_multi_summary_format_string.format("Total holds : ", ifma.ifma_refhold_cnt) + "\n" + out_string += link_multi_summary_format_string.format("Total releases : ", ifma.ifma_refrele_cnt) + "\n" + + while (cnt < IFMA_TRACE_HIST_SIZE): + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = ifma.ifma_refhold[cnt].pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nHold [" + str(int(cnt)) + "] (thread " + hex(ifma.ifma_refhold[cnt].th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + cnt += 1 + cnt = 0 + while (cnt < IFMA_TRACE_HIST_SIZE): + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = ifma.ifma_refrele[cnt].pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nRelease [" + str(int(cnt)) + "] (thread " + hex(ifma.ifma_refrele[cnt].th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + cnt += 1 + print out_string +# EndMacro: ifma_showdbg + +# Macro: ifpref_showdbg +@lldb_command('ifpref_showdbg') +def IfpRefShowDebug(cmd_args=None): + """ Print the debug information of an interface ref count + """ + if (cmd_args == None or len(cmd_args) == 0): + print "Missing argument 0 in user function." + return + out_string = "" + cnt = 0 + dl_if = kern.GetValueFromAddress(cmd_args[0], 'dlil_ifnet_dbg *') + dl_if_summary_format_string = "{0:s} {1:d}" + out_string += dl_if_summary_format_string.format("Total holds : ", dl_if.dldbg_if_refhold_cnt) + out_string += dl_if_summary_format_string.format("Total releases : ", dl_if.dldbg_if_refrele_cnt) + + while (cnt < IF_REF_TRACE_HIST_SIZE): + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = dl_if.dldbg_if_refhold[cnt].pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nHold [" + str(int(cnt)) + "] (thread " + hex(dl_if.dldbg_if_refhold[cnt].th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + cnt += 1 + cnt = 0 + while (cnt < IF_REF_TRACE_HIST_SIZE): + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = dl_if.dldbg_if_refrele[cnt].pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nRelease [" + str(int(cnt)) + "] (thread " + hex(dl_if.dldbg_if_refrele[cnt].th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + cnt += 1 + print out_string +# EndMacro: ifpref_showdbg + +# Macro: ndpr_showdbg +@lldb_command('ndpr_showdbg') +def ndprShowDebug(cmd_args=None): + """ Print the debug information of a nd_prefix structure + """ + if (cmd_args == None or len(cmd_args) == 0): + print "Missing argument 0 in user function." + return + out_string = "" + cnt = 0 + ndpr = kern.GetValueFromAddress(cmd_args[0], 'nd_prefix_dbg *') + ndpr_summary_format_string = "{0:s} {1:d}" + out_string += ndpr_summary_format_string.format("Total holds : ", ndpr.ndpr_refhold_cnt) + out_string += ndpr_summary_format_string.format("Total releases : ", ndpr.ndpr_refrele_cnt) + + while (cnt < NDPR_TRACE_HIST_SIZE): + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = ndpr.ndpr_refhold[cnt].pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nHold [" + str(int(cnt)) + "] (thread " + hex(ndpr.ndpr_refhold[cnt].th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + cnt += 1 + cnt = 0 + while (cnt < NDPR_TRACE_HIST_SIZE): + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = ndpr.ndpr_refrele[cnt].pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nRelease [" + str(int(cnt)) + "] (thread " + hex(ndpr.ndpr_refrele[cnt].th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + cnt += 1 + print out_string +# EndMacro: ndpr_showdbg + +# Macro: nddr_showdbg +@lldb_command('nddr_showdbg') +def nddrShowDebug(cmd_args=None): + """ Print the debug information of a nd_defrouter structure + """ + if (cmd_args == None or len(cmd_args) == 0): + print "Missing argument 0 in user function." + return + out_string = "" + cnt = 0 + nddr = kern.GetValueFromAddress(cmd_args[0], 'nd_defrouter_dbg *') + nddr_summary_format_string = "{0:s} {1:d}" + out_string += nddr_summary_format_string.format("Total holds : ", nddr.nddr_refhold_cnt) + out_string += nddr_summary_format_string.format("Total releases : ", nddr.nddr_refrele_cnt) + + while (cnt < NDDR_TRACE_HIST_SIZE): + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = nddr.nddr_refhold[cnt].pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nHold [" + str(int(cnt)) + "] (thread " + hex(nddr.nddr_refhold[cnt].th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + cnt += 1 + cnt = 0 + while (cnt < NDDR_TRACE_HIST_SIZE): + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = nddr.nddr_refrele[cnt].pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nRelease [" + str(int(cnt)) + "] (thread " + hex(nddr.nddr_refrele[cnt].th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + cnt += 1 + print out_string +# EndMacro: nddr_showdbg + +# Macro: imo_showdbg +@lldb_command('imo_showdbg') +def IpmOptions(cmd_args=None): + """ Print the debug information of a ip_moptions structure + """ + if (cmd_args == None or len(cmd_args) == 0): + print "Missing argument 0 in user function." + return + out_string = "" + cnt = 0 + imo = kern.GetValueFromAddress(cmd_args[0], 'ip_moptions_dbg *') + imo_summary_format_string = "{0:s} {1:d}" + out_string += imo_summary_format_string.format("Total holds : ", imo.imo_refhold_cnt) + out_string += imo_summary_format_string.format("Total releases : ", imo.imo_refrele_cnt) + + while (cnt < IMO_TRACE_HIST_SIZE): + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = imo.imo_refhold[cnt].pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nHold [" + str(int(cnt)) + "] (thread " + hex(imo.imo_refhold[cnt].th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + cnt += 1 + cnt = 0 + while (cnt < IMO_TRACE_HIST_SIZE): + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = imo.imo_refrele[cnt].pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nRelease [" + str(int(cnt)) + "] (thread " + hex(imo.imo_refrele[cnt].th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + cnt += 1 + print out_string +# EndMacro: imo_showdbg + +# Macro: im6o_showdbg +@lldb_command('im6o_showdbg') +def IpmOptions(cmd_args=None): + """ Print the debug information of a ip6_moptions structure + """ + if (cmd_args == None or len(cmd_args) == 0): + print "Missing argument 0 in user function." + return + out_string = "" + cnt = 0 + im6o = kern.GetValueFromAddress(cmd_args[0], 'ip6_moptions_dbg *') + im6o_summary_format_string = "{0:s} {1:d}" + out_string += im6o_summary_format_string.format("Total holds : ", im6o.im6o_refhold_cnt) + out_string += im6o_summary_format_string.format("Total releases : ", im6o.im6o_refrele_cnt) + + while (cnt < IM6O_TRACE_HIST_SIZE): + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = im6o.im6o_refhold[cnt].pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nHold [" + str(int(cnt)) + "] (thread " + hex(im6o.im6o_refhold[cnt].th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + cnt += 1 + cnt = 0 + while (cnt < IM6O_TRACE_HIST_SIZE): + ix = 0 + while (ix < CTRACE_STACK_SIZE): + kgm_pc = im6o.im6o_refrele[cnt].pc[ix] + if (kgm_pc != 0): + if (ix == 0): + out_string += "\nRelease [" + str(int(cnt)) + "] (thread " + hex(im6o.im6o_refrele[cnt].th) + "):\n" + out_string += str(int(ix + 1)) + ": " + out_string += GetSourceInformationForAddress(kgm_pc) + out_string += "\n" + ix += 1 + cnt += 1 + print out_string +# EndMacro: im6o_showdbg + +# Macro: rtentry_trash +@lldb_command('rtentry_trash') +def RtEntryTrash(cmd_args=None): + """ Walk the list of trash route entries + """ + out_string = "" + rt_trash_head = kern.globals.rttrash_head + rtd = Cast(rt_trash_head.tqh_first, 'rtentry_dbg *') + rt_trash_format_string = "{0:4d}: {1:x} {2:3d} {3:6d} {4:6d}" + cnt = 0 + while (int(rtd) != 0): + if (cnt == 0): + if (kern.ptrsize == 8): + print " rtentry ref hold rele dst gw parent flags/if\n" + print " ----------------- --- ------ ------ --------------- ----- ------------------ -----------\n" + else: + print " rtentry ref hold rele dst gw parent flags/if\n" + print " --------- --- ------ ------ --------------- ----- ---------- -----------\n" + out_string += rt_trash_format_string.format(cnt, rtd, rtd.rtd_refhold_cnt - rtd.rtd_refrele_cnt, rtd.rtd_refhold_cnt, rtd.rtd_refrele_cnt) + " " + out_string += GetRtEntryPrDetailsAsString(rtd) + "\n" + rtd = rtd.rtd_trash_link.tqe_next + cnt += 1 + print out_string +# EndMacro: rtentry_trash + +# Macro: inifa_trash +@lldb_command('inifa_trash') +def InIfaTrash(cmd_args=None): + """ Walk the list of trash in_ifaddr entries + """ + out_string = "" + ifa_trash_head = kern.globals.inifa_trash_head + ifa = Cast(ifa_trash_head.tqh_first, 'in_ifaddr_dbg *') + inifa_trash_format_string = "{0:4d}: {1:x} {2:3d} {3:6d} {4:6d}" + cnt = 0 + while (int(ifa) != 0): + if (cnt == 0): + if (kern.ptrsize == 8): + print " in_ifa ref hold rele" + print " ------------------ --- ------ ----" + else: + print " in_ifa ref hold rele" + print " ---------- --- ----- ------" + out_string += inifa_trash_format_string.format(cnt + 1, ifa, ifa.inifa_refhold_cnt - ifa.inifa_refrele_cnt, ifa.inifa_refhold_cnt, ifa.inifa_refrele_cnt) + " " + out_string += GetSocketAddrAsStringInet(ifa.inifa.ia_ifa.ifa_addr) + "\n" + ifa = ifa.inifa_trash_link.tqe_next + cnt += 1 + print out_string +# EndMacro: inifa_trash + +# Macro: in6ifa_trash +@lldb_command('in6ifa_trash') +def In6IfaTrash(cmd_args=None): + """ Walk the list of trash in6_ifaddr entries + """ + out_string = "" + in6ifa_trash_head = kern.globals.in6ifa_trash_head + ifa = Cast(in6ifa_trash_head.tqh_first, 'in6_ifaddr_dbg *') + in6ifa_trash_format_string = "{0:4d}: 0x{1:x} {2:3d} {3:6d} {4:6d}" + cnt = 0 + while (int(ifa) != 0): + if (cnt == 0): + if (kern.ptrsize == 8): + print " in6_ifa ref hold rele" + print " ------------------ --- ------ ------" + else: + print " in6_ifa ref hold rele" + print " ---------- --- ------ ------" + out_string += in6ifa_trash_format_string.format(cnt + 1, ifa, ifa.in6ifa_refhold_cnt - ifa.in6ifa_refrele_cnt, ifa.in6ifa_refhold_cnt, ifa.in6ifa_refrele_cnt) + " " + out_string += GetSocketAddrAsStringInet6(ifa.in6ifa.ia_ifa.ifa_addr) + "\n" + ifa = ifa.in6ifa_trash_link.tqe_next + cnt += 1 + print out_string +# EndMacro: in6ifa_trash + +# Macro: inm_trash +@lldb_command('inm_trash') +def InmTrash(cmd_args=None): + """ Walk the list of trash in_multi entries + """ + out_string = "" + inm_trash_head = kern.globals.inm_trash_head + inm = Cast(inm_trash_head.tqh_first, 'in_multi_dbg *') + inm_trash_format_string = "{0:4d}: {1:x} {2:3d} {3:6d} {4:6d}" + cnt = 0 + while (int(inm) != 0): + if (cnt == 0): + if (kern.ptrsize == 8): + print " inm ref hold rele" + print " ------------------ --- ------ ------" + else: + print " inm ref hold rele" + print " ---------- --- ------ ------" + out_string += inm_trash_format_string.format(cnt + 1, inm, inm.inm_refhold_cnt - inm.inm_refrele_cnt, inm.inm_refhold_cnt, inm.inm_refrele_cnt) + " " + out_string += GetInAddrAsString(addressof(inm.inm.inm_addr)) + "\n" + inm = inm.inm_trash_link.tqe_next + cnt += 1 + print out_string +# EndMacro: inm_trash + +# Macro: in6m_trash +@lldb_command('in6m_trash') +def In6mTrash(cmd_args=None): + """ Walk the list of trash in6_multi entries + """ + out_string = "" + in6m_trash_head = kern.globals.in6m_trash_head + in6m = Cast(in6m_trash_head.tqh_first, 'in6_multi_dbg *') + in6m_trash_format_string = "{0:4d}: {1:x} {2:3d} {3:6d} {4:6d}" + cnt = 0 + while (int(in6m) != 0): + if (cnt == 0): + if (kern.ptrsize == 8): + print " in6m ref hold rele" + print " ------------------ --- ------ ------" + else: + print " in6m ref hold rele" + print " ---------- --- ------ ------" + out_string += in6m_trash_format_string.format(cnt + 1, in6m, in6m.in6m_refhold_cnt - in6m.in6m_refrele_cnt, in6m.in6m_refhold_cnt, in6m.in6m_refrele_cnt) + " " + out_string += GetIn6AddrAsString(addressof(in6m.in6m.in6m_addr)) + "\n" + in6m = in6m.in6m_trash_link.tqe_next + cnt += 1 + print out_string +# EndMacro: in6m_trash + +# Macro: ifma_trash +@lldb_command('ifma_trash') +def IfmaTrash(cmd_args=None): + """ Walk the list of trash ifmultiaddr entries + """ + out_string = "" + ifma_trash_head = kern.globals.ifma_trash_head + ifma = Cast(ifma_trash_head.tqh_first, 'ifmultiaddr_dbg *') + ifma_trash_format_string = "{0:4d}: {1:x} {2:3d} {3:6d} {4:6d}" + cnt = 0 + while (int(ifma) != 0): + if (cnt == 0): + if (kern.ptrsize == 8): + print " ifma ref hold rele" + print " ------------------ --- ------ ------" + else: + print " ifma ref hold rele" + print " ---------- --- ------ ------" + out_string += ifma_trash_format_string.format(cnt + 1, ifma, ifma.ifma_refhold_cnt - ifma.ifma_refrele_cnt, ifma.ifma_refhold_cnt, ifma.ifma_refrele_cnt) + " " + out_string += GetSocketAddrAsString(ifma.ifma.ifma_addr) + "\n" + out_string += " @ " + ifma.ifma.ifma_ifp.if_xname + ifma = ifma.ifma_trash_link.tqe_next + cnt += 1 + print out_string +# EndMacro: ifma_trash + +def GetInPcb(pcb, proto): + out_string = "" + out_string += hex(pcb) + + if (proto == IPPROTO_TCP): + out_string += " tcp" + else: + if (proto == IPPROTO_UDP): + out_string += " udp" + else: + out_string += str(proto) + "." + if (pcb.inp_vflag & INP_IPV4): + out_string += "4 " + if (pcb.inp_vflag & INP_IPV6): + out_string += "6 " + + if (pcb.inp_vflag & INP_IPV4): + out_string += " " + out_string += GetInAddrAsString(addressof(pcb.inp_dependladdr.inp46_local.ia46_addr4)) + else: + out_string += GetIn6AddrAsString((pcb.inp_dependladdr.inp6_local.__u6_addr.__u6_addr8)) + + out_string += " " + out_string += Getntohs(pcb.inp_lport) + out_string += " " + + if (pcb.inp_vflag & INP_IPV4): + out_string += " " + out_string += GetInAddrAsString(addressof(pcb.inp_dependfaddr.inp46_foreign.ia46_addr4)) + else: + out_string += GetIn6AddrAsString((pcb.inp_dependfaddr.inp6_foreign.__u6_addr.__u6_addr8)) + + out_string += " " + out_string += Getntohs(pcb.inp_fport) + out_string += " " + + if (proto == IPPROTO_TCP): + out_string += GetTcpState(pcb.inp_ppcb) + + if (pcb.inp_flags & INP_RECVOPTS): + out_string += "recvopts " + if (pcb.inp_flags & INP_RECVRETOPTS): + out_string += "recvretopts " + if (pcb.inp_flags & INP_RECVDSTADDR): + out_string += "recvdstaddr " + if (pcb.inp_flags & INP_HDRINCL): + out_string += "hdrincl " + if (pcb.inp_flags & INP_HIGHPORT): + out_string += "highport " + if (pcb.inp_flags & INP_LOWPORT): + out_string += "lowport " + if (pcb.inp_flags & INP_ANONPORT): + out_string += "anonport " + if (pcb.inp_flags & INP_RECVIF): + out_string += "recvif " + if (pcb.inp_flags & INP_MTUDISC): + out_string += "mtudisc " + if (pcb.inp_flags & INP_STRIPHDR): + out_string += "striphdr " + if (pcb.inp_flags & INP_RECV_ANYIF): + out_string += "recv_anyif " + if (pcb.inp_flags & INP_INADDR_ANY): + out_string += "inaddr_any " + if (pcb.inp_flags & INP_RECVTTL): + out_string += "recvttl " + if (pcb.inp_flags & INP_UDP_NOCKSUM): + out_string += "nocksum " + if (pcb.inp_flags & INP_BOUND_IF): + out_string += "boundif " + if (pcb.inp_flags & IN6P_IPV6_V6ONLY): + out_string += "v6only " + if (pcb.inp_flags & IN6P_PKTINFO): + out_string += "pktinfo " + if (pcb.inp_flags & IN6P_HOPLIMIT): + out_string += "hoplimit " + if (pcb.inp_flags & IN6P_HOPOPTS): + out_string += "hopopts " + if (pcb.inp_flags & IN6P_DSTOPTS): + out_string += "dstopts " + if (pcb.inp_flags & IN6P_RTHDR): + out_string += "rthdr " + if (pcb.inp_flags & IN6P_RTHDRDSTOPTS): + out_string += "rthdrdstopts " + if (pcb.inp_flags & IN6P_TCLASS): + out_string += "rcv_tclass " + if (pcb.inp_flags & IN6P_AUTOFLOWLABEL): + out_string += "autoflowlabel " + if (pcb.inp_flags & IN6P_BINDV6ONLY): + out_string += "bindv6only " + if (pcb.inp_flags & IN6P_RFC2292): + out_string += "RFC2292 " + if (pcb.inp_flags & IN6P_MTU): + out_string += "rcv_pmtu " + if (pcb.inp_flags & INP_PKTINFO): + out_string += "pktinfo " + if (pcb.inp_flags & INP_FLOW_SUSPENDED): + out_string += "suspended " + if (pcb.inp_flags & INP_NO_IFT_CELLULAR): + out_string += "nocellular " + if (pcb.inp_flags & INP_FLOW_CONTROLLED): + out_string += "flowctld " + if (pcb.inp_flags & INP_FC_FEEDBACK): + out_string += "fcfeedback " + if (pcb.inp_flags2 & INP2_TIMEWAIT): + out_string += "timewait " + if (pcb.inp_flags2 & INP2_IN_FCTREE): + out_string += "in_fctree " + if (pcb.inp_flags2 & INP2_WANT_FLOW_DIVERT): + out_string += "want_flow_divert " + + so = pcb.inp_socket + if (so != 0): + out_string += "[so=" + str(so) + " s=" + str(int(so.so_snd.sb_cc)) + " r=" + str(int(so.so_rcv.sb_cc)) + " usecnt=" + str(int(so.so_usecount)) + "] " + + if (pcb.inp_state == 0 or pcb.inp_state == INPCB_STATE_INUSE): + out_string += "inuse, " + else: + if (pcb.inp_state == INPCB_STATE_DEAD): + out_string += "dead, " + else: + out_string += "unknown (" + str(int(pcb.inp_state)) + "), " + + return out_string + +def GetPcbInfo(pcbi, proto): + out_string = "" + snd_cc = 0 + snd_buf = unsigned(0) + rcv_cc = 0 + rcv_buf = unsigned(0) + pcbseen = 0 + out_string += "lastport " + str(int(pcbi.ipi_lastport)) + " lastlow " + str(int(pcbi.ipi_lastlow)) + " lasthi " + str(int(pcbi.ipi_lasthi)) + "\n" + out_string += "active pcb count is " + str(int(pcbi.ipi_count)) + "\n" + hashsize = pcbi.ipi_hashmask + 1 + out_string += "hash size is " + str(int(hashsize)) + "\n" + out_string += str(pcbi.ipi_hashbase) + " has the following inpcb(s):\n" + if (kern.ptrsize == 8): + out_string += "pcb proto source address port destination address port\n" + else: + out_string += "pcb proto source address port destination address port\n\n" + + i = 0 + hashbase = pcbi.ipi_hashbase + while (i < hashsize): + head = hashbase[i] + pcb = cast(head.lh_first, 'inpcb *') + while pcb != 0: + pcbseen += 1 + out_string += GetInPcb(pcb, proto) + "\n" + so = pcb.inp_socket + if so != 0: + snd_cc += so.so_snd.sb_cc + mp = so.so_snd.sb_mb + while mp != 0: + snd_buf += 256 + if (mp.m_hdr.mh_flags & 0x01): + snd_buf = mp.M_dat.MH.MH_dat.MH_ext.ext_size + mp = mp.m_hdr.mh_next + rcv_cc += so.so_rcv.sb_cc + mp = so.so_rcv.sb_mb + while mp != 0: + rcv_buf += 256 + if (mp.m_hdr.mh_flags & 0x01): + rcv_buf += mp.M_dat.MH.MH_dat.MH_ext.ext_size + mp = mp.m_hdr.mh_next + pcb = cast(pcb.inp_hash.le_next, 'inpcb *') + i += 1 + + out_string += "total seen " + str(int(pcbseen)) + " snd_cc " + str(int(snd_cc)) + " rcv_cc " + str(int(rcv_cc)) + "\n" + out_string += "total snd_buf " + str(int(snd_buf)) + " rcv_buf " + str(int(rcv_buf)) + "\n" + out_string += "port hash base is " + hex(pcbi.ipi_porthashbase) + "\n" + + i = 0 + hashbase = pcbi.ipi_porthashbase + while (i < hashsize): + head = hashbase[i] + pcb = cast(head.lh_first, 'inpcbport *') + while pcb != 0: + out_string += "\t" + out_string += GetInPcbPort(pcb) + out_string += "\n" + pcb = cast(pcb.phd_hash.le_next, 'inpcbport *') + i += 1 + + return out_string + +def GetInPcbPort(ppcb): + out_string = "" + out_string += hex(ppcb) + ": lport " + out_string += Getntohs(ppcb.phd_port) + return out_string + + +def Getntohs(port): + out_string = "" + #p = unsigned(int(port) & 0x0000ffff) + p = ((port & 0x0000ff00) >> 8) + p |= ((port & 0x000000ff) << 8) + return str(p) + +# Macro: show_kern_event_pcbinfo +def GetKernEventPcbInfo(kev_pcb_head): + out_string = "" + pcb = Cast(kev_pcb_head.lh_first, 'kern_event_pcb *') + if (kern.ptrsize == 8): + kev_pcb_format_string = "0x{0:<16x} {1:12d} {2:16d} {3:16d}" + out_string += " evp socket vendor code class filter subclass filter\n" + out_string += "-------------- ----------- ------------ ---------------\n" + else: + kev_pcb_format_string = "0x{0:<8x} {1:12d} {2:16d} {3:16d}" + out_string += "evp socket vendor code class filter subclass filter\n" + out_string += "---------- ----------- ------------ ---------------\n" + while (pcb != 0): + out_string += kev_pcb_format_string.format(pcb.evp_socket, pcb.evp_vendor_code_filter, pcb.evp_class_filter, pcb.evp_subclass_filter) + out_string += "\n" + pcb = pcb.evp_link.le_next + return out_string + +@lldb_command('show_kern_event_pcbinfo') +def ShowKernEventPcbInfo(cmd_args=None): + """ Display the list of Kernel Event protocol control block information + """ + print GetKernEventPcbInfo(addressof(kern.globals.kern_event_head)) +# EndMacro: show_kern_event_pcbinfo + +# Macro: show_kern_control_pcbinfo +def GetKernControlPcbInfo(ctl_head): + out_string = "" + kctl = Cast(ctl_head.tqh_first, 'kctl *') + if (kern.ptrsize == 8): + kcb_format_string = "0x{0:<16x} {1:4d} {2:10d}\n" + else: + kcb_format_string = "0x{0:<8x} {1:4d} {2:10d}\n" + while unsigned(kctl) != 0: + kctl_name = "controller: " + str(kctl.name) + "\n" + out_string += kctl_name + kcb = Cast(kctl.kcb_head.tqh_first, 'ctl_cb *') + if unsigned(kcb) != 0: + if (kern.ptrsize == 8): + out_string += "socket unit usecount\n" + out_string += "------ ---- --------\n" + else: + out_string += "socket unit usecount\n" + out_string += "------ ---- --------\n" + while unsigned(kcb) != 0: + out_string += kcb_format_string.format(kcb.so, kcb.unit, kcb.usecount) + kcb = kcb.next.tqe_next + out_string += "\n" + kctl = kctl.next.tqe_next + return out_string + +@lldb_command('show_kern_control_pcbinfo') +def ShowKernControlPcbInfo(cmd_args=None): + """ Display the list of Kernel Control protocol control block information + """ + print GetKernControlPcbInfo(addressof(kern.globals.ctl_head)) +# EndMacro: show_kern_control_pcbinfo + +# Macro: show_tcp_pcbinfo +@lldb_command('show_tcp_pcbinfo') +def ShowTcpPcbInfo(cmd_args=None): + """ Display the list of TCP protocol control block information + """ + print GetPcbInfo(addressof(kern.globals.tcbinfo), IPPROTO_TCP) +# EndMacro: show_tcp_pcbinfo + +# Macro: show_udp_pcbinfo +@lldb_command('show_udp_pcbinfo') +def ShowUdpPcbInfo(cmd_args=None): + """ Display the list of UDP protocol control block information + """ + print GetPcbInfo(addressof(kern.globals.udbinfo), IPPROTO_UDP) +# EndMacro: show_udp_pcbinfo + +# Macro: show_tcp_timewaitslots +@lldb_command('show_tcp_timewaitslots') +def ShowTcpTimeWaitSlots(cmd_args=None): + """ Display the list of the TCP protocol control blocks in TIMEWAIT + """ + out_string = "" + slot = -1 + _all = 0 + + if len(cmd_args) > 0: + if (int(cmd_args[0]) == -1): + _all = 1 + else: + slot = int(cmd_args[0]) + + out_string += "time wait slot size " + str(N_TIME_WAIT_SLOTS) + " cur_tw_slot " + str(int(kern.globals.cur_tw_slot)) + "\n" + i = 0 + + while (i < N_TIME_WAIT_SLOTS): + perslot = 0 + head = kern.globals.time_wait_slots[i] + if (i == slot or slot == -1): + pcb0 = cast(head.lh_first, 'inpcb *') + while (pcb0 != 0): + perslot += 1 + pcb0 = pcb0.inp_list.le_next + + out_string += " slot " + str(i) + " count " + str(perslot) + "\n" + + if (_all or i == slot): + pcb0 = cast(head.lh_first, 'inpcb *') + while (pcb0 != 0): + out_string += "\t" + out_string += GetInPcb(pcb0, IPPROTO_TCP) + out_string += "\n" + pcb0 = pcb0.inp_list.le_next + + i += 1 + print out_string +# EndMacro: show_tcp_timewaitslots + +# Macro: show_domains +@lldb_command('show_domains') +def ShowDomains(cmd_args=None): + """ Display the list of the domains + """ + out_string = "" + domains = kern.globals.domains + dp = Cast(domains.tqh_first, 'domain *') + ifma_trash_format_string = "{0:4d}: {1:x} {2:3d} {3:6d} {4:6d}" + cnt = 0 + while (dp != 0): + out_string += "\"" + str(dp.dom_name) + "\"" + "[" + str(int(dp.dom_refs)) + " refs] domain " + hex(dp) + "\n" + out_string += " family:\t" + str(int(dp.dom_family)) + "\n" + out_string += " flags:0x\t" + str(int(dp.dom_flags)) + "\n" + out_string += " rtparams:\toff=" + str(int(dp.dom_rtoffset)) + ", maxrtkey=" + str(int(dp.dom_maxrtkey)) + "\n" + + if (dp.dom_init): + out_string += " init:\t" + out_string += GetSourceInformationForAddress(dp.dom_init) + "\n" + if (dp.dom_externalize): + out_string += " externalize:\t" + out_string += GetSourceInformationForAddress(dp.dom_externalize) + "\n" + if (dp.dom_dispose): + out_string += " dispose:\t" + out_string += GetSourceInformationForAddress(dp.dom_dispose) + "\n" + if (dp.dom_rtattach): + out_string += " rtattach:\t" + out_string += GetSourceInformationForAddress(dp.dom_rtattach) + "\n" + if (dp.dom_old): + out_string += " old:\t" + out_string += GetSourceInformationForAddress(dp.dom_old) + "\n" + + pr = Cast(dp.dom_protosw.tqh_first, 'protosw *') + while pr != 0: + pru = pr.pr_usrreqs + out_string += "\ttype " + str(int(pr.pr_type)) + ", protocol " + str(int(pr.pr_protocol)) + ", protosw " + hex(pr) + "\n" + out_string += "\t flags:0x\t" + hex(pr.pr_flags) + "\n" + if (pr.pr_input): + out_string += "\t input:\t" + out_string += GetSourceInformationForAddress(pr.pr_input) + "\n" + if (pr.pr_output): + out_string += "\t output:\t" + out_string += GetSourceInformationForAddress(pr.pr_output) + "\n" + if (pr.pr_ctlinput): + out_string += "\t ctlinput:\t" + out_string += GetSourceInformationForAddress(pr.pr_ctlinput) + "\n" + if (pr.pr_ctloutput): + out_string += "\t ctloutput:\t" + out_string += GetSourceInformationForAddress(pr.pr_ctloutput) + "\n" + if (pr.pr_init): + out_string += "\t init:\t" + out_string += GetSourceInformationForAddress(pr.pr_init) + "\n" + if (pr.pr_drain): + out_string += "\t drain:\t" + out_string += GetSourceInformationForAddress(pr.pr_drain) + "\n" + if (pr.pr_sysctl): + out_string += "\t sysctl:\t" + out_string += GetSourceInformationForAddress(pr.pr_sysctl) + "\n" + if (pr.pr_lock): + out_string += "\t lock:\t" + out_string += GetSourceInformationForAddress(pr.pr_lock) + "\n" + if (pr.pr_unlock): + out_string += "\t unlock:\t" + out_string += GetSourceInformationForAddress(pr.pr_unlock) + "\n" + if (pr.pr_getlock): + out_string += "\t getlock:\t" + out_string += GetSourceInformationForAddress(pr.pr_getlock) + "\n" + if (pr.pr_old): + out_string += "\t old:\t" + out_string += GetSourceInformationForAddress(pr.pr_old) + "\n" + + out_string += "\t pru_flags:0x\t" + hex(pru.pru_flags) + "\n" + out_string += "\t abort:\t" + out_string += GetSourceInformationForAddress(pru.pru_abort) + "\n" + out_string += "\t accept:\t" + out_string += GetSourceInformationForAddress(pru.pru_accept) + "\n" + out_string += "\t attach:\t" + out_string += GetSourceInformationForAddress(pru.pru_attach) + "\n" + out_string += "\t bind:\t" + out_string += GetSourceInformationForAddress(pru.pru_bind) + "\n" + out_string += "\t connect:\t" + out_string += GetSourceInformationForAddress(pru.pru_connect) + "\n" + out_string += "\t connect2:\t" + out_string += GetSourceInformationForAddress(pru.pru_connect2) + "\n" + out_string += "\t connectx:\t" + out_string += GetSourceInformationForAddress(pru.pru_connectx) + "\n" + out_string += "\t control:\t" + out_string += GetSourceInformationForAddress(pru.pru_control) + "\n" + out_string += "\t detach:\t" + out_string += GetSourceInformationForAddress(pru.pru_detach) + "\n" + out_string += "\t disconnect:\t" + out_string += GetSourceInformationForAddress(pru.pru_disconnect) + "\n" + out_string += "\t listen:\t" + out_string += GetSourceInformationForAddress(pru.pru_listen) + "\n" + out_string += "\t peeloff:\t" + out_string += GetSourceInformationForAddress(pru.pru_peeloff) + "\n" + out_string += "\t peeraddr:\t" + out_string += GetSourceInformationForAddress(pru.pru_peeraddr) + "\n" + out_string += "\t rcvd:\t" + out_string += GetSourceInformationForAddress(pru.pru_rcvd) + "\n" + out_string += "\t rcvoob:\t" + out_string += GetSourceInformationForAddress(pru.pru_rcvoob) + "\n" + out_string += "\t send:\t" + out_string += GetSourceInformationForAddress(pru.pru_send) + "\n" + out_string += "\t sense:\t" + out_string += GetSourceInformationForAddress(pru.pru_sense) + "\n" + out_string += "\t shutdown:\t" + out_string += GetSourceInformationForAddress(pru.pru_shutdown) + "\n" + out_string += "\t sockaddr:\t" + out_string += GetSourceInformationForAddress(pru.pru_sockaddr) + "\n" + out_string += "\t sopoll:\t" + out_string += GetSourceInformationForAddress(pru.pru_sopoll) + "\n" + out_string += "\t soreceive:\t" + out_string += GetSourceInformationForAddress(pru.pru_soreceive) + "\n" + out_string += "\t sosend:\t" + out_string += GetSourceInformationForAddress(pru.pru_sosend) + "\n" + pr = pr.pr_entry.tqe_next + dp = dp.dom_entry.tqe_next + + print out_string +# EndMacro: show_domains diff --git a/tools/lldbmacros/netdefines.py b/tools/lldbmacros/netdefines.py new file mode 100644 index 000000000..8fff027b4 --- /dev/null +++ b/tools/lldbmacros/netdefines.py @@ -0,0 +1,125 @@ +if_capenable_strings = ["RXCSUM", + "TXCSUM", + "VLAN_MTU", + "VLAN_HWTAGGING", + "JUMBO_MTU", + "TSO4", + "TSO6", + "LRO", + "AV", + "TXSTATUS" + ] + +if_flags_strings = ["UP", + "BROADCAST", + "DEBUG", + "LOOPBACK", + "POINTOPOINT", + "NOTRAILERS (deprecated)", + "RUNNING", + "NOARP", + "PROMISC", + "ALLMULTI", + "OACTIVE", + "SIMPLEX", + "LINK0", + "LINK1", + "LINK2-ALTPHYS", + "MULTICAST" + ] + +if_eflags_strings = ["AUTOCONFIGURING", + "unused", + "unused", + "unused", + "unused", + "DVR_REENTRY_OK (deprecated)", + "ACCEPT_RTADV", + "TXSTART", + "RXPOLL", + "VLAN", + "BOND", + "ARPLL", + "NOWINDOWSCALE", + "NOAUTOIPV6LL", + "unused", + "IPV4_ROUTER", + "IPV6_ROUTER", + "LOCALNET_PRIVATE", + "IPV6_ND6ALT", + "RESTRICTED_RECV", + "AWDL", + "NOACKPRI", + "unused", + "unused", + "unused", + "unused", + "unused", + "unused", + "SENDLIST", + "REUSE (deprecated)", + "INUSE (deprecated)", + "UPDOWNCHANGE" + ] + +AF_INET = 2 +AF_INET6 = 30 +AF_LINK = 18 + +INP_IPV4 = 0x1 +INP_IPV6 = 0x2 + +CTRACE_STACK_SIZE = 8 + +IFMA_TRACE_HIST_SIZE = 32 +RTD_TRACE_HIST_SIZE = 4 +INIFA_TRACE_HIST_SIZE = 32 +IN6IFA_TRACE_HIST_SIZE = 32 +INM_TRACE_HIST_SIZE = 32 +IF_REF_TRACE_HIST_SIZE = 8 +NDPR_TRACE_HIST_SIZE = 32 +NDDR_TRACE_HIST_SIZE = 32 +IMO_TRACE_HIST_SIZE = 32 +IM6O_TRACE_HIST_SIZE = 32 + +INP_RECVOPTS = 0x01 +INP_RECVRETOPTS = 0x02 +INP_RECVDSTADDR = 0x04 +INP_HDRINCL = 0x08 +INP_HIGHPORT = 0x10 +INP_LOWPORT = 0x20 +INP_ANONPORT = 0x40 +INP_RECVIF = 0x80 +INP_MTUDISC = 0x100 +INP_STRIPHDR = 0x200 +INP_RECV_ANYIF = 0x400 +INP_INADDR_ANY = 0x800 +INP_RECVTTL = 0x1000 +INP_UDP_NOCKSUM = 0x2000 +INP_BOUND_IF = 0x4000 +IN6P_IPV6_V6ONLY = 0x008000 +IN6P_PKTINFO = 0x010000 +IN6P_HOPLIMIT = 0x020000 +IN6P_HOPOPTS = 0x040000 +IN6P_DSTOPTS = 0x080000 +IN6P_RTHDR = 0x100000 +IN6P_RTHDRDSTOPTS = 0x200000 +IN6P_TCLASS = 0x400000 +IN6P_AUTOFLOWLABEL = 0x800000 +IN6P_BINDV6ONLY = 0x10000000 +IN6P_RFC2292 = 0x02000000 +IN6P_MTU = 0x04000000 +INP_PKTINFO = 0x08000000 +INP_FLOW_SUSPENDED = 0x10000000 +INP_NO_IFT_CELLULAR = 0x20000000 +INP_FLOW_CONTROLLED = 0x40000000 +INP_FC_FEEDBACK = 0x80000000 +INPCB_STATE_INUSE = 0x1 +INPCB_STATE_CACHED = 0x2 +INPCB_STATE_DEAD = 0x3 + +INP2_TIMEWAIT = 0x00000001 +INP2_IN_FCTREE = 0x00000002 +INP2_WANT_FLOW_DIVERT = 0x00000004 + +N_TIME_WAIT_SLOTS = 128 diff --git a/tools/lldbmacros/pci.py b/tools/lldbmacros/pci.py new file mode 100644 index 000000000..a839d6838 --- /dev/null +++ b/tools/lldbmacros/pci.py @@ -0,0 +1,265 @@ +from xnu import * + +###################################### +# Helper functions +###################################### +def GetMemMappedPciCfgAddrFromRegistry(): + """ Retrieve the base address of the memory mapped PCI config space. It is + found in registry entry AppleACPIPlatformExpert, property acpi-mmcfg-seg0. + Returns: + int base address of memory mapped PCI config space + """ + kgm_pci_cfg_base_default = 0xe0000000 + acpi_pe_obj = FindRegistryObjectRecurse(kern.globals.gRegistryRoot, + "AppleACPIPlatformExpert") + if acpi_pe_obj is None: + print "Could not find AppleACPIPlatformExpert in registry, \ + using default base address for memory mapped PCI config space" + return kgm_pci_cfg_base_default + entry = kern.GetValueFromAddress(int(acpi_pe_obj), 'IOService *') + acpi_mmcfg_seg_prop = LookupKeyInPropTable(entry.fPropertyTable, "acpi-mmcfg-seg0") + if acpi_mmcfg_seg_prop is None: + print "Could not find acpi-mmcfg-seg0 property, \ + using default base address for memory mapped PCI config space" + return kgm_pci_cfg_base_default + else: + return int(GetNumber(acpi_mmcfg_seg_prop)) + +@static_var('kgm_pci_cfg_base', -1) +def GetMemMappedPciCfgAddrBase(): + """ Returns the base address of the memory mapped PCI config space. The address + is retrieved once from the registry, and is remembered for all subsequent + calls to this function + Returns: + int base address of memory mapped PCI config space + """ + if GetMemMappedPciCfgAddrBase.kgm_pci_cfg_base == -1: + # Retrieve the base address from the registry if it hasn't been + # initialized yet + GetMemMappedPciCfgAddrBase.kgm_pci_cfg_base = GetMemMappedPciCfgAddrFromRegistry() + return GetMemMappedPciCfgAddrBase.kgm_pci_cfg_base + +def MakeMemMappedPciCfgAddr(bus, dev, func, offs): + """ Construct the memory address for the PCI config register specified by the + bus, device, function, and offset + Params: + bus, dev, func, offs: int - bus, device, function, and offset that specifies + the PCI config space register + Returns: + int - the physical memory address that maps to the PCI config space register + """ + return GetMemMappedPciCfgAddrBase() | (bus << 20) | (dev << 15) | (func << 12) | offs + +def DoPciCfgRead(bits, bus, dev, func, offs): + """ Helper function that performs PCI config space read + Params: + bits: int - bit width of access: 8, 16, or 32 bits + bus, dev, func, offs: int - PCI config bus, device, function and offset + Returns: + int - the value read from PCI config space + """ + phys_addr = MakeMemMappedPciCfgAddr(bus, dev, func, offs) + return ReadPhysInt(phys_addr, bits) + +def DoPciCfgWrite(bits, bus, dev, func, offs, val): + """ Helper function that performs PCI config space write + Params: + bits: int - bit width of access: 8, 16, or 32 bits + bus, dev, func, offs: int - PCI config bus, device, function and offset + Returns: + boolean - True upon success, False otherwise + """ + phys_addr = MakeMemMappedPciCfgAddr(bus, dev, func, offs) + return WritePhysInt(phys_addr, val, bits) + +def ShowPciCfgBytes(bus, dev, func, offset): + """ Prints 16 bytes of PCI config space starting at specified offset + Params: + bus, dev, func, offset: int - bus, dev, function, and offset of the + PCI config space register + """ + # Print mem-mapped address at beginning of each 16-byte line + phys_addr = MakeMemMappedPciCfgAddr(bus, dev, func, offset) + read_vals = [DoPciCfgRead(32, bus, dev, func, offset + byte) + for byte in range(0, 16, 4)] + # It would be nicer to have a shorter format that we could loop + # over, but each call to print results in a newline which + # would prevent us from printing all 16 bytes on one line. + bytes_fmt = "{:08x}:" + "{:02x} " * 16 + print bytes_fmt.format( + phys_addr, + read_vals[0] & 0xff, (read_vals[0] >> 8) & 0xff, + (read_vals[0] >> 16) & 0xff, (read_vals[0] >> 24) & 0xff, + read_vals[1] & 0xff, (read_vals[1] >> 8) & 0xff, + (read_vals[1] >> 16) & 0xff, (read_vals[1] >> 24) & 0xff, + read_vals[2] & 0xff, (read_vals[2] >> 8) & 0xff, + (read_vals[2] >> 16) & 0xff, (read_vals[2] >> 24) & 0xff, + read_vals[3] & 0xff, (read_vals[3] >> 8) & 0xff, + (read_vals[3] >> 16) & 0xff, (read_vals[3] >> 24) & 0xff) + +def DoPciCfgDump(bus, dev, func): + """ Dumps PCI config space of the PCI device specified by bus, dev, function + Params: + bus, dev, func: int - bus, dev, function of PCI config space to dump + """ + # Check for a valid PCI device + vendor_id = DoPciCfgRead(16, bus, dev, func, 0) + if (vendor_id == 0xbad10ad) or not (vendor_id > 0 and vendor_id < 0xffff): + return + # Show the standard PCI config space + print "address: 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F\n" + print "--------------------------------------------------------" + for offset in range(0, 256, 16): + ShowPciCfgBytes(bus, dev, func, offset) + # Check for PCIE extended capability config space + if DoPciCfgRead(8, bus, dev, func, 256) < 0xff: + print " \n" + for offset in range(256, 4096, 16): + ShowPciCfgBytes(bus, dev, func, offset) + +def DoPciCfgScan(max_bus, dump): + """ Do a PCI config scan starting at bus 0 up to specified max bus + Params: + max_bus: int - maximum bus to scan + dump: bool - if True, dump the config space of each scanned device + if False, print basic information of each scanned device + """ + max_dev = 32 + max_func = 8 + bdfs = ({'bus':bus, 'dev':dev, 'func':func} + for bus in range(max_bus) + for dev in range(max_dev) + for func in range(max_func)) + fmt_string = "{:03x}:" * 3 + " " + \ + "{:02x}" * 2 + " " + \ + "{:02x}" * 2 + " {:02x} | " + \ + "{:02x}" * 3 + for bdf in bdfs: + bus = bdf['bus'] + dev = bdf['dev'] + func = bdf['func'] + vend_dev_id = DoPciCfgRead(32, bus, dev, func, 0) + if not (vend_dev_id > 0 and vend_dev_id < 0xffffffff): + continue + if dump == False: + class_rev_id = DoPciCfgRead(32, bus, dev, func, 8) + print fmt_string.format( + bus, dev, func, + (vend_dev_id >> 8) & 0xff, vend_dev_id & 0xff, + (vend_dev_id >> 24) & 0xff, (vend_dev_id >> 16) & 0xff, + class_rev_id & 0xff, (class_rev_id >> 24) & 0xff, + (class_rev_id >> 16) & 0xff, (class_rev_id >> 8) & 0xff) + else: + print "{:03x}:{:03x}:{:03x}".format(bus, dev, func) + DoPciCfgDump(bus, dev, func) + +###################################### +# LLDB commands +###################################### +@lldb_command('pci_cfg_read') +def PciCfgRead(cmd_args=None): + """ Read PCI config space at the specified bus, device, function, and offset + Syntax: pci_cfg_read + bits: 8, 16, 32 + """ + if cmd_args == None or len(cmd_args) < 5: + print PciCfgRead.__doc__ + return + + bits = ArgumentStringToInt(cmd_args[0]) + bus = ArgumentStringToInt(cmd_args[1]) + dev = ArgumentStringToInt(cmd_args[2]) + func = ArgumentStringToInt(cmd_args[3]) + offs = ArgumentStringToInt(cmd_args[4]) + + read_val = DoPciCfgRead(bits, bus, dev, func, offs) + if read_val == 0xbad10ad: + print "ERROR: Failed to read PCI config space" + return + + format_for_bits = {8:"{:#04x}", 16:"{:#06x}", 32:"{:#010x}"} + phys_addr = MakeMemMappedPciCfgAddr(bus, dev, func, offs) + fmt_string = "{:08x}: " + format_for_bits[bits] + print fmt_string.format(phys_addr, read_val) + +lldb_alias('pci_cfg_read8', 'pci_cfg_read 8') +lldb_alias('pci_cfg_read16', 'pci_cfg_read 16') +lldb_alias('pci_cfg_read32', 'pci_cfg_read 32') + +@lldb_command('pci_cfg_write') +def PciCfgWrite(cmd_args=None): + """ Write PCI config space at the specified bus, device, function, and offset + Syntax: pci_cfg_write + bits: 8, 16, 32 + + Prints an error message if there was a problem + Prints nothing upon success. + """ + if cmd_args == None or len(cmd_args) < 6: + print PciCfgWrite.__doc__ + return + + bits = ArgumentStringToInt(cmd_args[0]) + bus = ArgumentStringToInt(cmd_args[1]) + dev = ArgumentStringToInt(cmd_args[2]) + func = ArgumentStringToInt(cmd_args[3]) + offs = ArgumentStringToInt(cmd_args[4]) + write_val = ArgumentStringToInt(cmd_args[5]) + + if DoPciCfgWrite(bits, bus, dev, func, offs, write_val) == False: + print "ERROR: Failed to write PCI config space" + +lldb_alias('pci_cfg_write8', 'pci_cfg_write 8') +lldb_alias('pci_cfg_write16', 'pci_cfg_write 16') +lldb_alias('pci_cfg_write32', 'pci_cfg_write 32') + +@lldb_command('pci_cfg_dump') +def PciCfgDump(cmd_args=None): + """ Dump PCI config space for specified bus, device, and function + If an invalid/inaccessible PCI device is specified, nothing will + be printed out. + Syntax: pci_cfg_dump + """ + if cmd_args == None or len(cmd_args) < 3: + print PciCfgDump.__doc__ + return + + bus = ArgumentStringToInt(cmd_args[0]) + dev = ArgumentStringToInt(cmd_args[1]) + func = ArgumentStringToInt(cmd_args[2]) + + DoPciCfgDump(bus, dev, func) + +@lldb_command('pci_cfg_scan') +def PciCfgScan(cmd_args=None): + """ Scan for pci devices. The maximum bus number to be scanned defaults to 8, + but can be specified as an argument + Syntax: pci_cfg_scan [max bus number] + """ + if cmd_args == None or len(cmd_args) == 0: + max_bus = 8 + elif len(cmd_args) == 1: + max_bus = ArgumentStringToInt(cmd_args[0]) + else: + print PciCfgScan.__doc__ + return + + print "bus:dev:fcn: vendor device rev | class" + print "--------------------------------------" + DoPciCfgScan(max_bus, False) + +@lldb_command('pci_cfg_dump_all') +def PciCfgDumpAll(cmd_args=None): + """ Dump config space for all scanned PCI devices. The maximum bus number to + be scanned defaults to 8, but can be specified as an argument + Syntax: pci_cfg_dump_all [max bus number] + """ + if cmd_args == None or len(cmd_args) == 0: + max_bus = 8 + elif len(cmd_args) == 1: + max_bus = ArgumentStringToInt(cmd_args[0]) + else: + print PciCfgDumpAll.__doc__ + return + + DoPciCfgScan(max_bus, True) diff --git a/tools/lldbmacros/plugins/__init__.py b/tools/lldbmacros/plugins/__init__.py new file mode 100644 index 000000000..8b49f55f4 --- /dev/null +++ b/tools/lldbmacros/plugins/__init__.py @@ -0,0 +1,3 @@ +""" +Plugins that process other lldb macros' output +""" diff --git a/tools/lldbmacros/plugins/speedtracer.py b/tools/lldbmacros/plugins/speedtracer.py new file mode 100644 index 000000000..8d9d9e2e1 --- /dev/null +++ b/tools/lldbmacros/plugins/speedtracer.py @@ -0,0 +1,47 @@ +# A basic Plugin that creates performance reports from zprint output +import urllib, urllib2 + +kern_version = None +def plugin_init(kernel_target, config, lldb_obj, isConnected): + """ initialize the common data as required by plugin """ + global kern_version + kern_version = str(kernel_target.version) + +def plugin_execute(command_name, result_output): + """ The xnu framework will call this function with output of a command. + The options for returning are as follows + returns: (status, outstr, further_cmds) + status: Boolean - specifying whether plugin execution succeeded(True) or failed. If failed then xnu will stop doing any further work with this command. + outstr: str - string output for user to be printed at the prompt + further_cmds: [] of str - this holds set of commands to execute at the lldb prompt. Empty array if nothing is required. + """ + status = True + outstr = '' + further_cmds = [] + submitvars = {} + submitvars['type']="text" + submitvars['log']=result_output + + submiturl = "http://speedtracer.apple.com/trace/analyze?format=xml" + encoded_data = urllib.urlencode(submitvars) + request = urllib2.Request(submiturl, encoded_data, {"Accept":"application/xml"}) + response = urllib2.urlopen(request) + + status = response.info()['status'] + if status == 201 or status == '201': + outstr += "CrashTracer data found at " + response.info()['location'] + newurl = response.info()['location'] + import webbrowser + webbrowser.open(newurl) + status = True + else: + outstr += "unknown response from server \n" + str(response.info()) + status = False + + return (status, outstr, further_cmds) + +def plugin_cleanup(): + """ A cleanup call from xnu which is a signal to wrap up any open file descriptors etc. """ + return None + + diff --git a/tools/lldbmacros/plugins/zprint_perf_log.py b/tools/lldbmacros/plugins/zprint_perf_log.py new file mode 100644 index 000000000..a47b3cb79 --- /dev/null +++ b/tools/lldbmacros/plugins/zprint_perf_log.py @@ -0,0 +1,31 @@ +# A basic Plugin that creates performance reports from zprint output +kern_version = None + +def plugin_init(kernel_target, config, lldb_obj, isConnected): + """ initialize the common data as required by plugin """ + global kern_version + kern_version = str(kernel_target.version) + +def plugin_execute(command_name, result_output): + """ The xnu framework will call this function with output of a command. + The options for returning are as follows + returns: (status, outstr, further_cmds) + status: Boolean - specifying whether plugin execution succeeded(True) or failed. If failed then xnu will stop doing any further work with this command. + outstr: str - string output for user to be printed at the prompt + further_cmds: [] of str - this holds set of commands to execute at the lldb prompt. Empty array if nothing is required. + """ + status = True + outstr = 'Nothing to be done here' + further_cmds = [] + further_cmds.append("memstats -- --plugin zprint_perf_log ") + + if command_name != 'zprint' : + status = False + else: + num_zones = len(result_output.split("\n")) -1 + outstr += "Num of zones analyzed =" + str(num_zones) + "\n" + return (status, outstr, further_cmds) + +def plugin_cleanup(): + """ A cleanup call from xnu which is a signal to wrap up any open file descriptors etc. """ + return None diff --git a/tools/lldbmacros/pmap.py b/tools/lldbmacros/pmap.py new file mode 100644 index 000000000..fc3a00529 --- /dev/null +++ b/tools/lldbmacros/pmap.py @@ -0,0 +1,560 @@ +from xnu import * +import xnudefines +from kdp import * +from utils import * + +def ReadPhysInt(phys_addr, bitsize = 64, cpuval = None): + """ Read a physical memory data based on address. + params: + phys_addr : int - Physical address to read + bitsize : int - defines how many bytes to read. defaults to 64 bit + cpuval : None (optional) + returns: + int - int value read from memory. in case of failure 0xBAD10AD is returned. + """ + if "kdp" == GetConnectionProtocol(): + return KDPReadPhysMEM(phys_addr, bitsize) + + #NO KDP. Attempt to use physical memory + paddr_in_kva = kern.PhysToKernelVirt(long(phys_addr)) + if paddr_in_kva : + if bitsize == 64 : + return kern.GetValueFromAddress(paddr_in_kva, 'uint64_t *').GetSBValue().Dereference().GetValueAsUnsigned() + if bitsize == 32 : + return kern.GetValueFromAddress(paddr_in_kva, 'uint32_t *').GetSBValue().Dereference().GetValueAsUnsigned() + if bitsize == 16 : + return kern.GetValueFromAddress(paddr_in_kva, 'uint16_t *').GetSBValue().Dereference().GetValueAsUnsigned() + if bitsize == 8 : + return kern.GetValueFromAddress(paddr_in_kva, 'uint8_t *').GetSBValue().Dereference().GetValueAsUnsigned() + return 0xBAD10AD + +@lldb_command('readphys') +def ReadPhys(cmd_args = None): + """ Reads the specified untranslated address + The argument is interpreted as a physical address, and the 64-bit word + addressed is displayed. + usage: readphys
+ nbits: 8,16,32,64 + address: 1234 or 0x1234 + """ + if cmd_args == None or len(cmd_args) < 2: + print "Insufficient arguments.", ReadPhys.__doc__ + return False + else: + nbits = ArgumentStringToInt(cmd_args[0]) + phys_addr = ArgumentStringToInt(cmd_args[1]) + print "{0: <#x}".format(ReadPhysInt(phys_addr, nbits)) + return True + +lldb_alias('readphys8', 'readphys 8 ') +lldb_alias('readphys16', 'readphys 16 ') +lldb_alias('readphys32', 'readphys 32 ') +lldb_alias('readphys64', 'readphys 64 ') + +def KDPReadPhysMEM(address, bits): + """ Setup the state for READPHYSMEM64 commands for reading data via kdp + params: + address : int - address where to read the data from + bits : int - number of bits in the intval (8/16/32/64) + returns: + int: read value from memory. + 0xBAD10AD: if failed to read data. + """ + retval = 0xBAD10AD + if "kdp" != GetConnectionProtocol(): + print "Target is not connected over kdp. Nothing to do here." + return retval + + input_address = unsigned(addressof(kern.globals.manual_pkt.input)) + len_address = unsigned(addressof(kern.globals.manual_pkt.len)) + data_address = unsigned(addressof(kern.globals.manual_pkt.data)) + if not WriteInt32ToMemoryAddress(0, input_address): + return retval + + kdp_pkt_size = GetType('kdp_readphysmem64_req_t').GetByteSize() + if not WriteInt32ToMemoryAddress(kdp_pkt_size, len_address): + return retval + + data_addr = int(addressof(kern.globals.manual_pkt)) + pkt = kern.GetValueFromAddress(data_addr, 'kdp_readphysmem64_req_t *') + + header_value =GetKDPPacketHeaderInt(request=GetEnumValue('kdp_req_t::KDP_READPHYSMEM64'), length=kdp_pkt_size) + + if ( WriteInt64ToMemoryAddress((header_value), int(addressof(pkt.hdr))) and + WriteInt64ToMemoryAddress(address, int(addressof(pkt.address))) and + WriteInt32ToMemoryAddress((bits/8), int(addressof(pkt.nbytes))) and + WriteInt16ToMemoryAddress(xnudefines.lcpu_self, int(addressof(pkt.lcpu))) + ): + + if WriteInt32ToMemoryAddress(1, input_address): + # now read data from the kdp packet + data_address = unsigned(addressof(kern.GetValueFromAddress(int(addressof(kern.globals.manual_pkt.data)), 'kdp_readphysmem64_reply_t *').data)) + if bits == 64 : + retval = kern.GetValueFromAddress(data_address, 'uint64_t *').GetSBValue().Dereference().GetValueAsUnsigned() + if bits == 32 : + retval = kern.GetValueFromAddress(data_address, 'uint32_t *').GetSBValue().Dereference().GetValueAsUnsigned() + if bits == 16 : + retval = kern.GetValueFromAddress(data_address, 'uint16_t *').GetSBValue().Dereference().GetValueAsUnsigned() + if bits == 8 : + retval = kern.GetValueFromAddress(data_address, 'uint8_t *').GetSBValue().Dereference().GetValueAsUnsigned() + return retval + + +def KDPWritePhysMEM(address, intval, bits): + """ Setup the state for WRITEPHYSMEM64 commands for saving data in kdp + params: + address : int - address where to save the data + intval : int - integer value to be stored in memory + bits : int - number of bits in the intval (8/16/32/64) + returns: + boolean: True if the write succeeded. + """ + if "kdp" != GetConnectionProtocol(): + print "Target is not connected over kdp. Nothing to do here." + return False + input_address = unsigned(addressof(kern.globals.manual_pkt.input)) + len_address = unsigned(addressof(kern.globals.manual_pkt.len)) + data_address = unsigned(addressof(kern.globals.manual_pkt.data)) + if not WriteInt32ToMemoryAddress(0, input_address): + return False + + kdp_pkt_size = GetType('kdp_writephysmem64_req_t').GetByteSize() + if not WriteInt32ToMemoryAddress(kdp_pkt_size, len_address): + return False + + data_addr = int(addressof(kern.globals.manual_pkt)) + pkt = kern.GetValueFromAddress(data_addr, 'kdp_writephysmem64_req_t *') + + header_value =GetKDPPacketHeaderInt(request=GetEnumValue('kdp_req_t::KDP_WRITEPHYSMEM64'), length=kdp_pkt_size) + + if ( WriteInt64ToMemoryAddress((header_value), int(addressof(pkt.hdr))) and + WriteInt64ToMemoryAddress(address, int(addressof(pkt.address))) and + WriteInt32ToMemoryAddress((bits/8), int(addressof(pkt.nbytes))) and + WriteInt16ToMemoryAddress(xnudefines.lcpu_self, int(addressof(pkt.lcpu))) + ): + + if bits == 8: + if not WriteInt8ToMemoryAddress(intval, int(addressof(pkt.data))): + return False + if bits == 16: + if not WriteInt16ToMemoryAddress(intval, int(addressof(pkt.data))): + return False + if bits == 32: + if not WriteInt32ToMemoryAddress(intval, int(addressof(pkt.data))): + return False + if bits == 64: + if not WriteInt64ToMemoryAddress(intval, int(addressof(pkt.data))): + return False + if WriteInt32ToMemoryAddress(1, input_address): + return True + return False + + +def WritePhysInt(phys_addr, int_val, bitsize = 64): + """ Write and integer value in a physical memory data based on address. + params: + phys_addr : int - Physical address to read + int_val : int - int value to write in memory + bitsize : int - defines how many bytes to read. defaults to 64 bit + returns: + bool - True if write was successful. + """ + if "kdp" == GetConnectionProtocol(): + if not KDPWritePhysMEM(phys_addr, int_val, bitsize): + print "Failed to write via KDP." + return False + return True + #We are not connected via KDP. So do manual math and savings. + print "Failed: Write to physical memory is not supported for %s connection." % GetConnectionProtocol() + return False + +@lldb_command('writephys') +def WritePhys(cmd_args=None): + """ writes to the specified untranslated address + The argument is interpreted as a physical address, and the 64-bit word + addressed is displayed. + usage: writephys
+ nbits: 8,16,32,64 + address: 1234 or 0x1234 + value: int value to be written + ex. (lldb)writephys 16 0x12345abcd 0x25 + """ + if cmd_args == None or len(cmd_args) < 3: + print "Invalid arguments.", WritePhys.__doc__ + else: + nbits = ArgumentStringToInt(cmd_args[0]) + phys_addr = ArgumentStringToInt(cmd_args[1]) + int_value = ArgumentStringToInt(cmd_args[2]) + print WritePhysInt(phys_addr, int_value, nbits) + + +lldb_alias('writephys8', 'writephys 8 ') +lldb_alias('writephys16', 'writephys 16 ') +lldb_alias('writephys32', 'writephys 32 ') +lldb_alias('writephys64', 'writephys 64 ') + +def _PT_Step(paddr, index, verbose_level = vSCRIPT): + """ + Step to lower-level page table and print attributes + paddr: current page table entry physical address + index: current page table entry index (0..511) + verbose_level: vHUMAN: print nothing + vSCRIPT: print basic information + vDETAIL: print basic information and hex table dump + returns: (pt_paddr, pt_valid, pt_large) + pt_paddr: next level page table entry physical address + or null if invalid + pt_valid: 1 if $kgm_pt_paddr is valid, 0 if the walk + should be aborted + pt_large: 1 if kgm_pt_paddr is a page frame address + of a large page and not another page table entry + """ + entry_addr = paddr + (8 * index) + entry = ReadPhysInt(entry_addr, 64, xnudefines.lcpu_self ) + out_string = '' + if verbose_level >= vDETAIL: + for pte_loop in range(0, 512): + paddr_tmp = paddr + (8 * pte_loop) + out_string += "{0: <#020x}:\t {1: <#020x}\n".format(paddr_tmp, ReadPhysInt(paddr_tmp, 64, xnudefines.lcpu_self)) + paddr_mask = ~((0xfff<<52) | 0xfff) + paddr_large_mask = ~((0xfff<<52) | 0x1fffff) + pt_valid = False + pt_large = False + pt_paddr = 0 + if verbose_level < vSCRIPT: + if entry & 0x1 : + pt_valid = True + pt_large = False + pt_paddr = entry & paddr_mask + if entry & (0x1 <<7): + pt_large = True + pt_paddr = entry & paddr_large_mask + else: + out_string+= "{0: <#020x}:\n\t{1:#020x}\n\t".format(entry_addr, entry) + if entry & 0x1: + out_string += " valid" + pt_paddr = entry & paddr_mask + pt_valid = True + else: + out_string += " invalid" + pt_paddr = 0 + pt_valid = False + #Stop decoding other bits + entry = 0 + if entry & (0x1 << 1): + out_string += " writable" + else: + out_string += " read-only" + + if entry & (0x1 << 2): + out_string += " user" + else: + out_string += " supervisor" + + if entry & (0x1 << 3): + out_string += " PWT" + + if entry & (0x1 << 4): + out_string += " PCD" + + if entry & (0x1 << 5): + out_string += " accessed" + + if entry & (0x1 << 6): + out_string += " dirty" + + if entry & (0x1 << 7): + out_string += " large" + pt_large = True + else: + pt_large = False + + if entry & (0x1 << 8): + out_string += " global" + + if entry & (0x3 << 9): + out_string += " avail:{0:x}".format((entry >> 9) & 0x3) + + if entry & (0x1 << 63): + out_string += " noexec" + print out_string + return (pt_paddr, pt_valid, pt_large) + + + + +def _PmapL4Walk(pmap_addr_val,vaddr, verbose_level = vSCRIPT): + """ Walk the l4 pmap entry. + params: pmap_addr_val - core.value representing kernel data of type pmap_addr_t + vaddr : int - virtual address to walk + """ + is_cpu64_bit = int(kern.globals.cpu_64bit) + pt_paddr = unsigned(pmap_addr_val) + pt_valid = (unsigned(pmap_addr_val) != 0) + pt_large = 0 + pframe_offset = 0 + if pt_valid and is_cpu64_bit: + # Lookup bits 47:39 of linear address in PML4T + pt_index = (vaddr >> 39) & 0x1ff + pframe_offset = vaddr & 0x7fffffffff + if verbose_level > vHUMAN : + print "pml4 (index {0:d}):".format(pt_index) + (pt_paddr, pt_valid, pt_large) = _PT_Step(pt_paddr, pt_index, verbose_level) + if pt_valid: + # Lookup bits 38:30 of the linear address in PDPT + pt_index = (vaddr >> 30) & 0x1ff + pframe_offset = vaddr & 0x3fffffff + if verbose_level > vHUMAN: + print "pdpt (index {0:d}):".format(pt_index) + (pt_paddr, pt_valid, pt_large) = _PT_Step(pt_paddr, pt_index, verbose_level) + if pt_valid and not pt_large: + #Lookup bits 29:21 of the linear address in PDPT + pt_index = (vaddr >> 21) & 0x1ff + pframe_offset = vaddr & 0x1fffff + if verbose_level > vHUMAN: + print "pdt (index {0:d}):".format(pt_index) + (pt_paddr, pt_valid, pt_large) = _PT_Step(pt_paddr, pt_index, verbose_level) + if pt_valid and not pt_large: + #Lookup bits 20:21 of linear address in PT + pt_index = (vaddr >> 12) & 0x1ff + pframe_offset = vaddr & 0xfff + if verbose_level > vHUMAN: + print "pt (index {0:d}):".format(pt_index) + (pt_paddr, pt_valid, pt_large) = _PT_Step(pt_paddr, pt_index, verbose_level) + paddr = 0 + paddr_isvalid = False + if pt_valid: + paddr = pt_paddr + pframe_offset + paddr_isvalid = True + + if verbose_level > vHUMAN: + if paddr_isvalid: + pvalue = ReadPhysInt(paddr, 32, xnudefines.lcpu_self) + print "phys {0: <#020x}: {1: <#020x}".format(paddr, pvalue) + else: + print "no translation" + + return + +def _PmapWalkARMLevel1Section(tte, vaddr, verbose_level = vSCRIPT): + paddr = 0 + out_string = "" + #Supersection or just section? + if (tte & 0x40000) == 0x40000: + paddr = ( (tte & 0xFF000000) | (vaddr & 0x00FFFFFF) ) + else: + paddr = ( (tte & 0xFFF00000) | (vaddr & 0x000FFFFF) ) + + if verbose_level >= vSCRIPT: + out_string += "{0: <#020x}\n\t{1: <#020x}\n\t".format(addressof(tte), tte) + #bit [1:0] evaluated in PmapWalkARM + # B bit 2 + b_bit = (tte & 0x4) >> 2 + # C bit 3 + c_bit = (tte & 0x8) >> 3 + #XN bit 4 + if (tte & 0x10) : + out_string += "no-execute" + else: + out_string += "execute" + #Domain bit [8:5] if not supersection + if (tte & 0x40000) == 0x0: + out_string += " domain ({:d})".format(((tte & 0x1e0) >> 5) ) + #IMP bit 9 + out_string += " imp({:d})".format( ((tte & 0x200) >> 9) ) + # AP bit 15 and [11:10] merged to a single 3 bit value + access = ( (tte & 0xc00) >> 10 ) | ((tte & 0x8000) >> 13) + out_string += xnudefines.arm_level2_access_strings[access] + + #TEX bit [14:12] + tex_bits = ((tte & 0x7000) >> 12) + #Print TEX, C , B all together + out_string += " TEX:C:B({:d}{:d}{:d}:{:d}:{:d})".format( + 1 if (tex_bits & 0x4) else 0, + 1 if (tex_bits & 0x2) else 0, + 1 if (tex_bits & 0x1) else 0, + c_bit, + b_bit + ) + # S bit 16 + if tte & 0x10000: + out_string += " shareable" + else: + out_string += " not-shareable" + # nG bit 17 + if tte & 0x20000 : + out_string += " not-global" + else: + out_string += " global" + # Supersection bit 18 + if tte & 0x40000: + out_string += " supersection" + else: + out_string += " section" + #NS bit 19 + if tte & 0x80000 : + out_string += " no-secure" + else: + out_string += " secure" + + print out_string + return paddr + + + +def _PmapWalkARMLevel2(tte, vaddr, verbose_level = vSCRIPT): + """ Pmap walk the level 2 tte. + params: + tte - value object + vaddr - int + returns: str - description of the tte + additional informaiton based on verbose_level + """ + pte_base = kern.PhysToKernelVirt(tte & 0xFFFFFC00) + pte_index = (vaddr >> 12) & 0xFF + pte_base_val = kern.GetValueFromAddress(pte_base, 'pt_entry_t *') + pte = pte_base_val[pte_index] + out_string = '' + if verbose_level >= vSCRIPT: + out_string += "{0: <#020x}\n\t{1: <#020x}\n\t".format(addressof(tte), tte) + # bit [1:0] evaluated in PmapWalkARM + # NS bit 3 + if tte & 0x8: + out_string += ' no-secure' + else: + out_string += ' secure' + #Domain bit [8:5] + out_string += " domain({:d})".format(((tte & 0x1e0) >> 5)) + # IMP bit 9 + out_string += " imp({:d})".format( ((tte & 0x200) >> 9)) + out_string += "\n" + if verbose_level >= vSCRIPT: + out_string += "second-level table (index {:d}):\n".format(pte_index) + if verbose_level >= vDETAIL: + for i in range(256): + tmp = pte_base_val[i] + out_string += "{0: <#020x}:\t{1: <#020x}\n".format(addressof(tmp), unsigned(tmp)) + + paddr = 0 + if pte & 0x2: + paddr = (unsigned(pte) & 0xFFFFF000) | (vaddr & 0xFFF) + + if verbose_level >= vSCRIPT: + out_string += " {0: <#020x}\n\t{1: <#020x}\n\t".format(addressof(pte), unsigned(pte)) + if (pte & 0x3) == 0x0: + out_string += " invalid" + else: + if (pte & 0x3) == 0x1: + out_string += " large" + # XN bit 15 + if pte & 0x8000 == 0x8000: + out_string+= " no-execute" + else: + out_string += " execute" + else: + out_string += " small" + # XN bit 0 + if (pte & 0x1) == 0x01: + out_string += " no-execute" + else: + out_string += " execute" + # B bit 2 + b_bit = (pte & 0x4) >> 2 + c_bit = (pte & 0x8) >> 3 + # AP bit 9 and [5:4], merged to a single 3-bit value + access = (pte & 0x30) >> 4 | (pte & 0x200) >> 7 + out_string += xnudefines.arm_level2_access_strings[access] + + #TEX bit [14:12] for large, [8:6] for small + tex_bits = ((pte & 0x1c0) >> 6) + if (pte & 0x3) == 0x1: + tex_bits = ((pte & 0x7000) >> 12) + + # Print TEX, C , B alltogether + out_string += " TEX:C:B({:d}{:d}{:d}:{:d}:{:d})".format( + 1 if (tex_bits & 0x4) else 0, + 1 if (tex_bits & 0x2) else 0, + 1 if (tex_bits & 0x1) else 0, + c_bit, + b_bit + ) + # S bit 10 + if pte & 0x400 : + out_string += " shareable" + else: + out_string += " not-shareable" + + # nG bit 11 + if pte & 0x800: + out_string += " not-global" + else: + out_string += " global" + print out_string + return paddr + #end of level 2 walking of arm + + +def PmapWalkARM(pmap, vaddr, verbose_level = vHUMAN): + """ Pmap walking for ARM kernel. + params: + pmapval: core.value - representing pmap_t in kernel + vaddr: int - integer representing virtual address to walk + """ + paddr = 0 + # shift by TTESHIFT (20) to get tte index + tte_index = ((vaddr - unsigned(pmap.min)) >> 20 ) + tte = pmap.tte[tte_index] + if verbose_level >= vSCRIPT: + print "First-level table (index {:d}):".format(tte_index) + if verbose_level >= vDETAIL: + for i in range(0, 4096): + ptr = unsigned(addressof(pmap.tte[i])) + val = unsigned(pmap.tte[i]) + print "{0: <#020x}:\t {1: <#020x}".format(ptr, val) + if (tte & 0x3) == 0x1: + paddr = _PmapWalkARMLevel2(tte, vaddr, verbose_level) + elif (tte & 0x3) == 0x2 : + paddr = _PmapWalkARMLevel1Section(tte, vaddr, verbose_level) + else: + paddr = 0 + if verbose_level >= vSCRIPT: + print "Invalid First-Level Translation Table Entry: {0: #020x}".format(tte) + + if verbose_level >= vHUMAN: + if paddr: + print "Translation of {:#x} is {:#x}.".format(vaddr, paddr) + else: + print "(no translation)" + + return paddr + +def PmapWalkX86_64(pmapval, vaddr): + """ + params: pmapval - core.value representing pmap_t in kernel + vaddr: int - int representing virtual address to walk + """ + _PmapL4Walk(pmapval.pm_cr3, vaddr, config['verbosity']) + +def assert_64bit(val): + assert(val < 2**64) + +def PmapWalk(pmap, vaddr, verbose_level = vHUMAN): + if kern.arch == 'x86_64': + return PmapWalkX86_64(pmap, vaddr) + elif kern.arch == 'arm': + return PmapWalkARM(pmap, vaddr, verbose_level) + else: + raise NotImplementedError("PmapWalk does not support {0}".format(kern.arch)) + +@lldb_command('pmap_walk') +def PmapWalkHelper(cmd_args=None): + """ Perform a page-table walk in for . + Syntax: (lldb) pmap_walk [-v] + Multiple -v's can be specified for increased verbosity + """ + if cmd_args == None or len(cmd_args) < 2: + raise ArgumentError("Too few arguments to pmap_walk.") + + pmap = kern.GetValueAsType(cmd_args[0], 'pmap_t') + addr = unsigned(kern.GetValueFromAddress(cmd_args[1], 'void *')) + PmapWalk(pmap, addr, config['verbosity']) + return diff --git a/tools/lldbmacros/process.py b/tools/lldbmacros/process.py new file mode 100644 index 000000000..11f9ef452 --- /dev/null +++ b/tools/lldbmacros/process.py @@ -0,0 +1,1320 @@ + +""" Please make sure you read the README file COMPLETELY BEFORE reading anything below. + It is very critical that you read coding guidelines in Section E in README file. +""" + +from xnu import * +import sys, shlex +from utils import * +from core.lazytarget import * +import xnudefines + +def GetProcInfo(proc): + """ returns a string name, pid, parent and task for a proc_t. Decodes cred, flag and p_stat fields. + params: + proc : value object representing a proc in the kernel + returns: + str : A string describing various information for process. + """ + out_string = "" + out_string += ("Process {p: <#020x}\n\tname {p.p_comm: <20s}\n\tpid:{p.p_pid: <6d} " + + "task:{p.task: <#020x} p_stat:{p.p_stat: <6d} parent pid: {p.p_ppid: <6d}\n" + ).format(p=proc) + #print the Creds + ucred = proc.p_ucred + if ucred: + out_string += "Cred: euid {:d} ruid {:d} svuid {:d}\n".format(ucred.cr_posix.cr_uid, + ucred.cr_posix.cr_ruid, + ucred.cr_posix.cr_svuid ) + #print the flags + flags = int(proc.p_flag) + out_string += "Flags: {0: <#020x}\n".format(flags) + i = 1 + num = 1 + while num <= flags: + if flags & num: + out_string += "\t" + xnudefines.proc_flag_explain_strings[i] + "\n" + elif num == 0x4: #special case for 32bit flag + out_string += "\t" + xnudefines.proc_flag_explain_strings[0] + "\n" + i += 1 + num = num << 1 + out_string += "State: " + state_val = proc.p_stat + if state_val < 1 or state_val > len(xnudefines.proc_state_strings) : + out_string += "(Unknown)" + else: + out_string += xnudefines.proc_state_strings[int(state_val)] + + return out_string + +def GetProcNameForPid(pid): + """ Finds the name of the process corresponding to a given pid + params: + pid : int, pid you want to find the procname for + returns + str : Name of the process corresponding to the pid, "Unknown" if not found + """ + for p in kern.procs: + if int(p.p_pid) == int(pid): + return str(p.p_comm) + return "Unknown" + +def GetProcForPid(search_pid): + """ Finds the value object representing a proc in the kernel based on its pid + params: + search_pid : int, pid whose proc structure you want to find + returns: + value : The value object representing the proc, if a proc corresponding + to the given pid is found. Returns None otherwise + """ + if search_pid == 0: + return kern.globals.initproc + else: + headp = kern.globals.allproc + for proc in IterateListEntry(headp, 'struct proc *', 'p_list'): + if proc.p_pid == search_pid: + return proc + return None + +@lldb_command('allproc') +def AllProc(cmd_args=None): + """ Walk through the allproc structure and print procinfo for each process structure. + params: + cmd_args - [] : array of strings passed from lldb command prompt + """ + for proc in kern.procs : + print GetProcInfo(proc) + + +@lldb_command('zombproc') +def ZombProc(cmd_args=None): + """ Routine to print out all procs in the zombie list + params: + cmd_args - [] : array of strings passed from lldb command prompt + """ + for proc in kern.zombprocs: + print GetProcInfo(proc) + +@lldb_command('zombstacks') +def ZombStacks(cmd_args=None): + """ Routine to print out all stacks of tasks that are exiting + """ + for proc in kern.zombprocs: + if proc.p_stat != 5: + t = Cast(proc.task, 'task *') + ShowTaskStacks(t) +#End of Zombstacks + +@lldb_type_summary(['task', 'task_t']) +@header("{0: <20s} {1: <20s} {2: <20s} {3: >5s} {4: <5s}".format("task","vm_map", "ipc_space", "#acts", "flags")) +def GetTaskSummary(task): + """ Summarizes the important fields in task structure. + params: task: value - value object representing a task in kernel + returns: str - summary of the task + """ + out_string = "" + format_string = '{0: <#020x} {1: <#020x} {2: <#020x} {3: >5d} {4: <5s}' + thread_count = int(task.thread_count) + task_flags = '' + if hasattr(task, "suppression_generation") and (int(task.suppression_generation) & 0x1) == 0x1: + task_flags += 'P' + if hasattr(task, "suspend_count") and int(task.suspend_count) > 0: + task_flags += 'S' + if hasattr(task, "imp_receiver") and int(task.imp_receiver) == 1: + task_flags += 'R' + if hasattr(task, "imp_donor") and int(task.imp_donor) == 1: + task_flags += 'D' + if hasattr(task, "task_imp_assertcnt") and int(task.task_imp_assertcnt) > 0: + task_flags += 'B' + out_string += format_string.format(task, task.map, task.itk_space, thread_count, task_flags) + return out_string + +@lldb_type_summary(['thread *', 'thread_t']) +@header("{0: <24s} {1: <10s} {2: <20s} {3: <6s} {4: <10s} {5: <5s} {6: <20s} {7: <45s} {8: <20s} {9: <20s}".format('thread', 'thread_id', 'processor', 'pri', 'io_policy', 'state', 'wait_queue', 'wait_event', 'wmesg', 'thread_name')) +def GetThreadSummary(thread): + """ Summarize the thread structure. It decodes the wait state and waitevents from the data in the struct. + params: thread: value - value objecte representing a thread in kernel + returns: str - summary of a thread + """ + out_string = "" + format_string = "{0: <24s} {1: <10s} {2: <20s} {3: <6s} {4: <10s} {5: <5s} {6: <20s} {7: <45s} {8: <20s} {9: <20s}" + thread_ptr_str = str("{0: <#020x}".format(thread)) + if int(thread.static_param) : + thread_ptr_str+="[WQ]" + thread_id = hex(thread.thread_id) + thread_name = '' + processor = hex(thread.last_processor) + sched_priority = str(int(thread.sched_pri)) + + io_policy_str = "" + if int(thread.uthread) != 0: + uthread = Cast(thread.uthread, 'uthread *') + #check for thread name + if int(uthread.pth_name) != 0 : + th_name_strval = Cast(uthread.pth_name, 'char *') + if len(str(th_name_strval)) > 0 : + thread_name = str(th_name_strval) + + #check for io_policy flags + if int(uthread.uu_flag) & 0x400: + io_policy_str+='RAGE ' + + #now flags for task_policy + + io_policy_str = "" + + if int(thread.effective_policy.darwinbg) != 0: + io_policy_str += "B" + if int(thread.effective_policy.lowpri_cpu) != 0: + io_policy_str += "L" + + if int(thread.effective_policy.io_tier) != 0: + io_policy_str += "T" + if int(thread.effective_policy.io_passive) != 0: + io_policy_str += "P" + if int(thread.effective_policy.terminated) != 0: + io_policy_str += "D" + + state = int(thread.state) + thread_state_chars = {0:'', 1:'W', 2:'S', 4:'R', 8:'U', 16:'H', 32:'A', 64:'P', 128:'I'} + state_str = '' + state_str += thread_state_chars[int(state & 0x1)] + state_str += thread_state_chars[int(state & 0x2)] + state_str += thread_state_chars[int(state & 0x4)] + state_str += thread_state_chars[int(state & 0x8)] + state_str += thread_state_chars[int(state & 0x10)] + state_str += thread_state_chars[int(state & 0x20)] + state_str += thread_state_chars[int(state & 0x40)] + state_str += thread_state_chars[int(state & 0x80)] + + #wait queue information + wait_queue_str = '' + wait_event_str = '' + wait_message = '' + if ( state & 0x1 ) != 0: + #we need to look at the waitqueue as well + wait_queue_str = str("{0: <#020x}".format(int(hex(thread.wait_queue), 16))) + wait_event_str = str("{0: <#020x}".format(int(hex(thread.wait_event), 16))) + wait_event_str_sym = kern.Symbolicate(int(hex(thread.wait_event), 16)) + if len(wait_event_str_sym) > 0: + wait_event_str = wait_event_str.strip() + " <" + wait_event_str_sym + ">" + if int(thread.uthread) != 0 : + uthread = Cast(thread.uthread, 'uthread *') + if int(uthread.uu_wmesg) != 0: + wait_message = str(Cast(uthread.uu_wmesg, 'char *')) + + out_string += format_string.format(thread_ptr_str, thread_id, processor, sched_priority, io_policy_str, state_str, wait_queue_str, wait_event_str, wait_message, thread_name ) + return out_string + + + +@lldb_type_summary(['proc']) +@header("{0: >6s} {1: ^20s} {2: >14s} {3: ^10s} {4: <20s}".format("pid", "process", "io_policy", "wq_state", "command")) +def GetProcSummary(proc): + """ Summarize the process data. + params: + proc : value - value representaitng a proc * in kernel + returns: + str - string summary of the process. + """ + out_string = "" + format_string= "{0: >6d} {1: >#020x} {2: >14s} {3: >2d} {4: >2d} {5: >2d} {6: <20s}" + pval = proc.GetSBValue() + #code.interact(local=locals()) + if str(pval.GetType()) != str(gettype('proc *')) : + return "Unknown type " + str(pval.GetType()) + " " + str(hex(proc)) + if not proc: + out_string += "Process " + hex(proc) + " is not valid." + return out_string + pid = int(proc.p_pid) + proc_addr = int(hex(proc), 16) + proc_rage_str = "" + if int(proc.p_lflag) & 0x400000 : + proc_rage_str = "RAGE" + + task = Cast(proc.task, 'task *') + + io_policy_str = "" + + if int(task.effective_policy.darwinbg) != 0: + io_policy_str += "B" + if int(task.effective_policy.lowpri_cpu) != 0: + io_policy_str += "L" + + if int(task.effective_policy.io_tier) != 0: + io_policy_str += "T" + if int(task.effective_policy.io_passive) != 0: + io_policy_str += "P" + if int(task.effective_policy.terminated) != 0: + io_policy_str += "D" + + if int(task.effective_policy.t_suspended) != 0: + io_policy_str += "S" + if int(task.effective_policy.t_latency_qos) != 0: + io_policy_str += "Q" + if int(task.effective_policy.t_sup_active) != 0: + io_policy_str += "A" + + + try: + work_queue = Cast(proc.p_wqptr, 'workqueue *') + if proc.p_wqptr != 0 : + wq_num_threads = int(work_queue.wq_nthreads) + wq_idle_threads = int(work_queue.wq_thidlecount) + wq_req_threads = int(work_queue.wq_reqcount) + else: + wq_num_threads = 0 + wq_idle_threads = 0 + wq_req_threads = 0 + except: + wq_num_threads = -1 + wq_idle_threads = -1 + wq_req_threads = -1 + process_name = str(proc.p_comm) + out_string += format_string.format(pid, proc_addr, " ".join([proc_rage_str, io_policy_str]), wq_num_threads, wq_idle_threads, wq_req_threads, process_name) + return out_string + +# Macro: showtask + +@lldb_command('showtask', 'F:') +def ShowTask(cmd_args=None, cmd_options={}): + """ Routine to print a summary listing of given task + Usage: showtask
+ or : showtask -F + """ + task_list = [] + if "-F" in cmd_options: + task_list = FindTasksByName(cmd_options['-F']) + else: + if not cmd_args: + raise ArgumentError("Invalid arguments passed.") + + tval = kern.GetValueFromAddress(cmd_args[0], 'task *') + if not tval: + raise ("Unknown arguments: %r" % cmd_args) + task_list.append(tval) + + for tval in task_list: + print GetTaskSummary.header + " " + GetProcSummary.header + pval = Cast(tval.bsd_info, 'proc *') + print GetTaskSummary(tval) +" "+ GetProcSummary(pval) + +# EndMacro: showtask + +# Macro: showpid + +@lldb_command('showpid') +def ShowPid(cmd_args=None): + """ Routine to print a summary listing of task corresponding to given pid + Usage: showpid + """ + if not cmd_args: + print "No arguments passed" + print ShowPid.__doc__ + return False + pidval = ArgumentStringToInt(cmd_args[0]) + for t in kern.tasks: + pval = Cast(t.bsd_info, 'proc *') + if pval and pval.p_pid == pidval: + print GetTaskSummary.header + " " + GetProcSummary.header + print GetTaskSummary(t) + " " + GetProcSummary(pval) + break + +# EndMacro: showpid + +# Macro: showproc + +@lldb_command('showproc') +def ShowProc(cmd_args=None): + """ Routine to print a summary listing of task corresponding to given proc + Usage: showproc
+ """ + if not cmd_args: + print "No arguments passed" + print ShowProc.__doc__ + return False + pval = kern.GetValueFromAddress(cmd_args[0], 'proc *') + if not pval: + print "unknown arguments:", str(cmd_args) + return False + print GetTaskSummary.header + " " + GetProcSummary.header + tval = Cast(pval.task, 'task *') + print GetTaskSummary(tval) +" "+ GetProcSummary(pval) + +# EndMacro: showproc + +# Macro: showprocinfo + +@lldb_command('showprocinfo') +def ShowProcInfo(cmd_args=None): + """ Routine to display name, pid, parent & task for the given proc address + It also shows the Cred, Flags and state of the process + Usage: showprocinfo
+ """ + if not cmd_args: + print "No arguments passed" + print ShowProcInfo.__doc__ + return False + pval = kern.GetValueFromAddress(cmd_args[0], 'proc *') + if not pval: + print "unknown arguments:", str(cmd_args) + return False + print GetProcInfo(pval) + +# EndMacro: showprocinfo + +#Macro: showprocfiles + +@lldb_command('showprocfiles') +def ShowProcFiles(cmd_args=None): + """ Given a proc_t pointer, display the list of open file descriptors for the referenced process. + Usage: showprocfiles + """ + if not cmd_args: + print ShowProcFiles.__doc__ + return + proc = kern.GetValueFromAddress(cmd_args[0], 'proc_t') + proc_filedesc = proc.p_fd + proc_lastfile = unsigned(proc_filedesc.fd_lastfile) + proc_ofiles = proc_filedesc.fd_ofiles + if unsigned(proc_ofiles) == 0: + print 'No open files for proc {0: + """ + if not cmd_args: + print ShowTTY.__doc__ + return + + tty = kern.GetValueFromAddress(cmd_args[0], 'struct tty *') + print "TTY structure at: {0: 0: + print '\t' + tty_state_info[index][1] + else: + if len(tty_state_info[index][0]) > 0: + print '\t' + tty_state_info[index][0] + index += 1 + mask = mask << 1 + print "Flags: 0x{0:0>8x}".format(unsigned(tty.t_flags)) + print "Foreground Process Group: 0x{0:0>16x}".format(unsigned(tty.t_pgrp)) + print "Enclosing session: 0x{0:0>16x}".format(unsigned(tty.t_session)) + print "Termios:" + print "\tInput Flags: 0x{0:0>8x}".format(unsigned(tty.t_termios.c_iflag)) + print "\tOutput Flags: 0x{0:0>8x}".format(unsigned(tty.t_termios.c_oflag)) + print "\tControl Flags: 0x{0:0>8x}".format(unsigned(tty.t_termios.c_cflag)) + print "\tLocal Flags: 0x{0:0>8x}".format(unsigned(tty.t_termios.c_lflag)) + print "\tInput Speed: {0: <8d}".format(tty.t_termios.c_ispeed) + print "\tOutput Speed: {0: <8d}".format(tty.t_termios.c_ospeed) + print "High Watermark: {0: + """ + if not cmd_args: + print DumpCallQueue.__doc__ + return + print "{0: <18s} {1: <18s} {2: <18s} {3: <64s} {4: <18s}".format('CALL_ENTRY', 'PARAM0', 'PARAM1', 'DEADLINE', 'FUNC') + callhead = kern.GetValueFromAddress(cmd_args[0], 'queue_head_t *') + count = 0 + for callentry in IterateQueue(callhead, 'struct call_entry *', 'q_link'): + print "{0: <#18x} {1: <#18x} {2: <#18x} {3: <64d} {4: <#18x}".format( + unsigned(callentry), unsigned(callentry.param0), unsigned(callentry.param1), + unsigned(callentry.deadline), unsigned(callentry.func)) + count += 1 + print "{0: reports "number of workq threads", "number of scheduled workq threads", "number of pending work items" + if "number of pending work items" seems stuck at non-zero, it may indicate that the workqueue mechanism is hung + io_policy -> RAGE - rapid aging of vnodes requested + NORM - normal I/O explicitly requested (this is the default) + PASS - passive I/O requested (i.e. I/Os do not affect throttling decisions) + THROT - throttled I/O requested (i.e. thread/task may be throttled after each I/O completes) + """ + global kern + print GetTaskSummary.header + " " + GetProcSummary.header + for t in kern.tasks: + pval = Cast(t.bsd_info, 'proc *') + print GetTaskSummary(t) +" "+ GetProcSummary(pval) + +@lldb_command('showterminatedtasks') +def ShowTerminatedTasks(cmd_args=None): + """ Routine to print a summary listing of all the terminated tasks + wq_state -> reports "number of workq threads", "number of scheduled workq threads", "number of pending work items" + if "number of pending work items" seems stuck at non-zero, it may indicate that the workqueue mechanism is hung + io_policy -> RAGE - rapid aging of vnodes requested + NORM - normal I/O explicitly requested (this is the default) + PASS - passive I/O requested (i.e. I/Os do not affect throttling decisions) + THROT - throttled I/O requested (i.e. thread/task may be throttled after each I/O completes) + syntax: (lldb)showallterminatedtasks + """ + global kern + print GetTaskSummary.header + " " + GetProcSummary.header + for t in kern.terminated_tasks: + pval = Cast(t.bsd_info, 'proc *') + print GetTaskSummary(t) +" "+ GetProcSummary(pval) + return True + +# Macro: showtaskstacks + +def ShowTaskStacks(task): + """ Print a task with summary and stack information for each of its threads + """ + global kern + print GetTaskSummary.header + " " + GetProcSummary.header + pval = Cast(task.bsd_info, 'proc *') + print GetTaskSummary(task) + " " + GetProcSummary(pval) + for th in IterateQueue(task.threads, 'thread *', 'task_threads'): + print " " + GetThreadSummary.header + print " " + GetThreadSummary(th) + print GetThreadBackTrace(th, prefix=" ") + "\n" + +def FindTasksByName(searchstr, ignore_case=True): + """ Search the list of tasks by name. + params: + searchstr: str - a regex like string to search for task + ignore_case: bool - If False then exact matching will be enforced + returns: + [] - array of task object. Empty if not found any + """ + re_options = 0 + if ignore_case: + re_options = re.IGNORECASE + search_regex = re.compile(searchstr, re_options) + retval = [] + for t in kern.tasks: + pval = Cast(t.bsd_info, "proc *") + process_name = "{:s}".format(pval.p_comm) + if search_regex.search(process_name): + retval.append(t) + return retval + +@lldb_command('showtaskstacks', 'F:') +def ShowTaskStacksCmdHelper(cmd_args=None, cmd_options={}): + """ Routine to print out the stack for each thread in a task + Usage: showtaskstacks <0xaddress of task> + or: showtaskstacks -F launchd + """ + + if "-F" in cmd_options: + find_task_str = cmd_options["-F"] + task_list = FindTasksByName(find_task_str) + for tval in task_list: + ShowTaskStacks(tval) + return + + if not cmd_args: + raise ArgumentError("No arguments passed") + + tval = kern.GetValueFromAddress(cmd_args[0], 'task *') + if not tval: + raise ArgumentError("unknown arguments: {:s}".format(str(cmd_args))) + else: + ShowTaskStacks(tval) + return + +# EndMacro: showtaskstacks + +@lldb_command('showallthreads') +def ShowAllThreads(cmd_args = None): + """ Display info about all threads in the system + """ + for t in kern.tasks: + ShowTaskThreads([str(int(t))]) + print " \n" + return + +@lldb_command('showtaskthreads', "F:") +def ShowTaskThreads(cmd_args = None, cmd_options={}): + """ Display thread information for a given task + Usage: showtaskthreads <0xaddress of task> + or: showtaskthreads -F + """ + task_list = [] + if "-F" in cmd_options: + task_list = FindTasksByName(cmd_options["-F"]) + elif cmd_args: + t = kern.GetValueFromAddress(cmd_args[0], 'task *') + task_list.append(t) + else: + raise ArgumentError("No arguments passed") + + for task in task_list: + print GetTaskSummary.header + " " + GetProcSummary.header + pval = Cast(task.bsd_info, 'proc *') + print GetTaskSummary(task) + " " + GetProcSummary(pval) + print "\t" + GetThreadSummary.header + for thval in IterateQueue(task.threads, 'thread *', 'task_threads'): + print "\t" + GetThreadSummary(thval) + return + +@lldb_command('showact') +def ShowAct(cmd_args=None): + """ Routine to print out the state of a specific thread. + usage: showact + """ + if cmd_args == None or len(cmd_args) < 1: + print "No arguments passed" + print ShowAct.__doc__ + return False + threadval = kern.GetValueFromAddress(cmd_args[0], 'thread *') + print GetThreadSummary.header + print GetThreadSummary(threadval) + +@lldb_command('showactstack') +def ShowActStack(cmd_args=None): + """ Routine to print out the stack of a specific thread. + usage: showactstack + """ + if cmd_args == None or len(cmd_args) < 1: + print "No arguments passed" + print ShowAct.__doc__.strip() + return False + threadval = kern.GetValueFromAddress(cmd_args[0], 'thread *') + print GetThreadSummary.header + print GetThreadSummary(threadval) + print GetThreadBackTrace(threadval, prefix="\t") + return + +@lldb_command('switchtoact') +def SwitchToAct(cmd_args=None): + """ Switch to different context specified by activation + This command allows gdb to examine the execution context and call + stack for the specified activation. For example, to view the backtrace + for an activation issue "switchtoact
", followed by "bt". + Before resuming execution, issue a "resetctx" command, to + return to the original execution context. + """ + if cmd_args == None or len(cmd_args) < 1: + print "No arguments passed" + print SwitchToAct.__doc__.strip() + return False + thval = kern.GetValueFromAddress(cmd_args[0], 'thread *') + lldbthread = GetLLDBThreadForKernelThread(thval) + print GetThreadSummary.header + print GetThreadSummary(thval) + LazyTarget.GetProcess().selected_thread = lldbthread + if not LazyTarget.GetProcess().SetSelectedThread(lldbthread): + print "Failed to switch thread." + return +# Macro: showallstacks +@lldb_command('showallstacks') +def ShowAllStacks(cmd_args=None): + """Routine to print out the stack for each thread in the system. + """ + for t in kern.tasks: + ShowTaskStacks(t) + print " \n" + return + +# EndMacro: showallstacks + +# Macro: showcurrentstacks +@lldb_command('showcurrentstacks') +def ShowCurrentStacks(cmd_args=None): + """ Routine to print out the thread running on each cpu (incl. its stack) + """ + processor_list = kern.GetGlobalVariable('processor_list') + current_processor = processor_list + while unsigned(current_processor) > 0: + print "\nProcessor {: <#020x} State {: #04x})".format(current_processor, int(current_processor.state), int(current_processor.cpu_id)) + active_thread = current_processor.active_thread + if unsigned(active_thread) != 0 : + task_val = active_thread.task + proc_val = Cast(task_val.bsd_info, 'proc *') + print GetTaskSummary.header + " " + GetProcSummary.header + print GetTaskSummary(task_val) + " " + GetProcSummary(proc_val) + print "\t" + GetThreadSummary.header + print "\t" + GetThreadSummary(active_thread) + print "\tBacktrace:" + print GetThreadBackTrace(active_thread, prefix="\t") + current_processor = current_processor.processor_list + return +# EndMacro: showcurrentstacks + +@lldb_command('showcurrentthreads') +def ShowCurrentThreads(cmd_args=None): + """ Display info about threads running on each cpu """ + processor_list = kern.GetGlobalVariable('processor_list') + current_processor = processor_list + while unsigned(current_processor) > 0: + print "Processor {: <#020x} State {: #04x})".format(current_processor, int(current_processor.state), int(current_processor.cpu_id)) + active_thread = current_processor.active_thread + if unsigned(active_thread) != 0 : + task_val = active_thread.task + proc_val = Cast(task_val.bsd_info, 'proc *') + print GetTaskSummary.header + " " + GetProcSummary.header + print GetTaskSummary(task_val) + " " + GetProcSummary(proc_val) + print "\t" + GetThreadSummary.header + print "\t" + GetThreadSummary(active_thread) + current_processor = current_processor.processor_list + return + +def GetFullBackTrace(frame_addr, verbosity = vHUMAN, prefix = ""): + """ Get backtrace across interrupt context. + params: frame_addr - int - address in memory which is a frame pointer (ie. rbp, r7) + prefix - str - prefix for each line of output. + + """ + out_string = "" + bt_count = 0 + frame_ptr = frame_addr + previous_frame_ptr = 0 + # lldb unable to find symbol for _mh_execute_header + mh_execute_addr = int(lldb_run_command('p/x (uintptr_t *)&_mh_execute_header').split('=')[-1].strip(), 16) + while frame_ptr and frame_ptr != previous_frame_ptr and bt_count < 128: + if (kern.arch != 'arm' and frame_ptr < mh_execute_addr) or (kern.arch == 'arm' and frame_ptr > mh_execute_addr): + break + pc_val = kern.GetValueFromAddress(frame_ptr + kern.ptrsize,'uintptr_t *') + pc_val = unsigned(dereference(pc_val)) + out_string += prefix + GetSourceInformationForAddress(pc_val) + "\n" + bt_count +=1 + previous_frame_ptr = frame_ptr + frame_val = kern.GetValueFromAddress((frame_ptr), 'uintptr_t *') + if unsigned(frame_val) == 0: + break + frame_ptr = unsigned(dereference(frame_val)) + + return out_string + +@lldb_command('fullbt') +def FullBackTrace(cmd_args=[]): + """ Show full backtrace across the interrupt boundary. + Syntax: fullbt + Example: kfullbt `$rbp` + """ + if len(cmd_args) < 1: + print FullBackTrace.__doc__ + return False + print GetFullBackTrace(ArgumentStringToInt(cmd_args[0]), prefix="\t") + + +@lldb_command('symbolicate') +def SymbolicateAddress(cmd_args=[]): + """ Symbolicate an address for symbol information from loaded symbols + Example: "symbolicate 0xaddr" is equivalent to "output/a 0xaddr" + """ + if len(cmd_args) < 1: + print "Invalid address.\nSyntax: symbolicate
" + return False + print GetSourceInformationForAddress(ArgumentStringToInt(cmd_args[0])) + return True + +@lldb_command('showinitchild') +def ShowInitChild(cmd_args=None): + """ Routine to print out all processes in the system + which are children of init process + """ + headp = kern.globals.initproc.p_children + for pp in IterateListEntry(headp, 'struct proc *', 'p_sibling'): + print GetProcInfo(pp) + return + +@lldb_command('showproctree') +def ShowProcTree(cmd_args=None): + """ Routine to print the processes in the system in a hierarchical tree form. This routine does not print zombie processes. + If no argument is given, showproctree will print all the processes in the system. + If pid is specified, showproctree prints all the descendants of the indicated process + """ + search_pid = 0 + if cmd_args: + search_pid = ArgumentStringToInt(cmd_args[0]) + + if search_pid < 0: + print "pid specified must be a positive number" + print ShowProcTree.__doc__ + return + + hdr_format = "{0: <6s} {1: <14s} {2: <9s}\n" + out_string = hdr_format.format("PID", "PROCESS", "POINTER") + out_string += hdr_format.format('='*3, '='*7, '='*7) + proc = GetProcForPid(search_pid) + out_string += "{0: <6d} {1: <14s} [ {2: #019x} ]\n".format(proc.p_ppid, proc.p_pptr.p_comm, unsigned(proc.p_pptr)) + out_string += "|--{0: <6d} {1: <16s} [ {2: #019x} ]\n".format(proc.p_pid, proc.p_comm, unsigned(proc)) + print out_string + ShowProcTreeRecurse(proc, "| ") + + return + +def ShowProcTreeRecurse(proc, prefix=""): + """ Prints descendants of a given proc in hierarchial tree form + params: + proc : core.value representing a struct proc * in the kernel + returns: + str : String containing info about a given proc and its descendants in tree form + """ + if proc.p_childrencnt > 0: + head_ptr = proc.p_children.lh_first + + for p in IterateListEntry(proc.p_children, 'struct proc *', 'p_sibling'): + print prefix + "|--{0: <6d} {1: <16s} [ {2: #019x} ]\n".format(p.p_pid, p.p_comm, unsigned(p)) + ShowProcTreeRecurse(p, prefix + "| ") + +@lldb_command('showthreadfortid') +def ShowThreadForTid(cmd_args=None): + """ The thread structure contains a unique thread_id value for each thread. + This command is used to retrieve the address of the thread structure(thread_t) + corresponding to a given thread_id. + """ + if not cmd_args: + print "Please provide thread_t whose tid you'd like to look up" + print ShowThreadForTid.__doc__ + return + search_tid = ArgumentStringToInt(cmd_args[0]) + for taskp in kern.tasks: + for actp in IterateQueue(taskp.threads, 'struct thread *', 'task_threads'): + if search_tid == int(actp.thread_id): + print "Found {0: #019x}".format(actp) + print GetThreadSummary.header + print GetThreadSummary(actp) + return + print "Not a valid thread_id" + +# Macro: showallprocessors + +def GetProcessorSummary(processor): + """ Internal function to print summary of processor + params: processor - value representing struct processor * + return: str - representing the details of given processor + """ + out_str = "Processor {: <#012x} ".format(processor) + out_str += "State {:d} (cpu_id {:#x})\n".format(processor.state, processor.cpu_id) + return out_str + +def GetRunQSummary(runq): + """ Internal function to print summary of run_queue + params: runq - value representing struct run_queue * + return: str - representing the details of given run_queue + """ + out_str = " Priority Run Queue Info: Count {: <10d}\n".format(runq.count) + runq_queue_i = 0 + runq_queue_count = sizeof(runq.queues)/sizeof(runq.queues[0]) + while runq.count and (runq_queue_i < runq_queue_count): + runq_queue_head = addressof(runq.queues[runq_queue_i]) + runq_queue_p = runq_queue_head.next + if unsigned(runq_queue_p) != unsigned(runq_queue_head): + runq_queue_this_count = 0 + while runq_queue_p != runq_queue_head: + runq_queue_this_count = runq_queue_this_count + 1 + runq_queue_p_thread = Cast(runq_queue_p, 'thread_t') + # Get the task information + out_str += GetTaskSummary.header + " " + GetProcSummary.header + pval = Cast(runq_queue_p_thread.task.bsd_info, 'proc *') + out_str += GetTaskSummary(runq_queue_p_thread.task) +" "+ GetProcSummary(pval) + # Get the thread information with related stack traces + out_str += GetThreadSummary.header + GetThreadSummary(runq_queue_p_thread) + out_str += GetThreadBackTrace(LazyTarget.GetProcess().GetThreadByID(int(runq_queue_p_thread.thread_id)), + prefix="\t") + runq_queue_p = runq_queue_p.next + + out_str += " Queue Priority {: <3d} [{: <#012x}] Count {:d}\n".format(runq_queue_i, + runq_queue_head, runq_queue_this_count) + + runq_queue_i = runq_queue_i + 1 + return out_str + +def GetGrrrSummary(grrr_runq): + """ Internal function to print summary of grrr_run_queue + params: grrr_runq - value representing struct grrr_run_queue * + return: str - representing the details of given grrr_run_queue + """ + out_str = " GRRR Info: Count {: <10d} Weight {: <10d} Current Group {: <#012x}\n".format(grrr_runq.count, + grrr_runq.weight, grrr_runq.current_group) + grrr_group_i = 0 + grrr_group_count = sizeof(grrr_runq.groups)/sizeof(grrr_runq.groups[0]) + while grrr_runq.count and (grrr_group_i < grrr_group_count): + grrr_group = addressof(grrr_runq.groups[grrr_group_i]) + runq_queue_p = runq_queue_head.next + if grrr_group.count > 0: + out_str += " Group {: <3d} [{: <#012x}] ".format(grrr_group.index, grrr_group) + out_str += "Count {:d} Weight {:d}\n".format(grrr_group.count, grrr_group.weight) + grrr_group_client_head = addressof(grrr_group.clients) + grrr_group_client = grrr_group_client_head.next + while grrr_group_client != grrr_group_client_head: + grrr_group_client_thread = Cast(grrr_group_client, 'thread_t') + # Get the task information + out_str += GetTaskSummary.header + " " + GetProcSummary.header + pval = Cast(grrr_group_client_thread.task.bsd_info, 'proc *') + out_str += GetTaskSummary(grrr_group_client_thread.task) +" "+ GetProcSummary(pval) + # Get the thread information with related stack traces + out_str += GetThreadSummary.header + GetThreadSummary(grrr_group_client_thread) + out_str += GetThreadBackTrace(LazyTarget.GetProcess().GetThreadByID(int(grrr_group_client_thread.thread_id)), + prefix="\t") + grrr_group_client = grrr_group_client.next + grrr_group_i = grrr_group_i + 1 + return out_str + +@lldb_command('showallprocessors') +def ShowAllProcessors(cmd_args=None): + """ Routine to print information of all psets and processors + Usage: showallprocessors + """ + pset = addressof(kern.globals.pset0) + show_grrr = 0 + show_priority_runq = 0 + show_priority_pset_runq = 0 + show_fairshare_grrr = 0 + show_fairshare_list = 0 + sched_enum_val = kern.globals._sched_enum + + if sched_enum_val == 1: + show_priority_runq = 1 + show_fairshare_list = 1 + elif sched_enum_val == 2: + show_priority_pset_runq = 1 + show_fairshare_list = 1 + elif sched_enum_val == 4: + show_grrr = 1 + show_fairshare_grrr = 1 + elif sched_enum_val == 5: + show_priority_runq = 1 + show_fairshare_list = 1 + elif sched_enum_val == 6: + show_priority_pset_runq = 1 + show_fairshare_list = 1 + + out_str = '' + while pset: + out_str += "Processor Set {: <#012x} Count {:d} (cpu_id {:<#x}-{:<#x})\n".format(pset, + pset.cpu_set_count, pset.cpu_set_low, pset.cpu_set_hi) + out_str += " Active Processors:\n" + active_queue_head = addressof(pset.active_queue) + active_elt = active_queue_head.next + while active_elt != active_queue_head: + processor = Cast(active_elt, 'processor *') + out_str += " " + out_str += GetProcessorSummary(processor) + if show_priority_runq: + runq = addressof(processor.runq) + out_str += GetRunQSummary(runq) + if show_grrr: + grrr_runq = addressof(processor.grrr_runq) + out_str += GetGrrrSummary(grrr_runq) + + if processor.processor_meta and (processor.processor_meta.primary == + processor): + processor_meta_idle_head = addressof(processor.processor_meta.idle_queue) + processor_meta_idle = processor_meta_idle_head.next + while processor_meta_idle != processor_meta_idle_head: + out_str += " Idle Meta Processor: " + out_str += GetProcessorSummary(processor_meta_idle) + processor_meta_idle = processor_meta_idle.next + active_elt = active_elt.next + + out_str += " Idle Processors:\n" + idle_queue_head = addressof(pset.idle_queue) + idle_elt = idle_queue_head.next + while idle_elt != idle_queue_head: + processor = Cast(idle_elt, 'processor *') + out_str += " " + out_str += GetProcessorSummary(processor) + + if processor.processor_meta and (processor.processor_meta.primary == + processor): + processor_meta_idle_head = addressof(processor.processor_meta.idle_queue) + processor_meta_idle = processor_meta_idle_head.next + while processor_meta_idle != processor_meta_idle_head: + out_str += " Idle Meta Processor: " + out_str += GetProcessorSummary(processor_meta_idle) + processor_meta_idle = processor_meta_idle.next + idle_elt = idle_elt.next + + if show_priority_pset_runq: + runq = addressof(pset.pset_runq) + out_str += "\n" + GetRunQSummary(runq) + pset = pset.pset_list + + out_str += "\nRealtime Queue Count {:d}\n".format(kern.globals.rt_runq.count) + rt_runq_head = addressof(kern.globals.rt_runq.queue) + rt_runq_local = rt_runq_head.next + while rt_runq_local != rt_runq_head: + rt_runq_thread = Cast(rt_runq_local, 'thread *') + out_str += ShowTask([unsigned(rt_runq_thread.task)]) + out_str += ShowAct([unsigned(rt_runq_thread)]) + rt_runq_local = rt_runq_local.next + + out_str += "\n" + if show_fairshare_list: + out_str += "Fair Share Queue Count {:d}\n".format(kern.globals.fs_runq.count) + fs_runq_head = addressof(kern.globals.fs_runq.queue) + fs_runq_local = fs_runq_head.next + while fs_runq_local != fs_runq_head: + fs_runq_thread = Cast(fs_runq, 'thread *') + out_str += ShowTask([unsigned(fs_runq_thread.task)]) + out_str += ShowAct([unsigned(rt_runq_thread)]) + fs_runq_local = fs_runq_local.next + if show_fairshare_grrr: + out_str += "Fair Share Queue Count {:d}\n".format(kern.globals.fs_grrr_runq.count) + fs_grrr = addressof(kern.globals.fs_grrr_runq) + out_str += GetGrrrSummary(fs_grrr) + + print out_str +# EndMacro: showallprocessors + +def GetLedgerEntrySummary(ledger_template, ledger, i): + """ Internal function to get internals of a ledger entry (*not* a ledger itself) + params: ledger_template - value representing struct ledger_template_t for the task or thread + ledger - value representing struct ledger_entry * + return: str - formatted output information of ledger entries + """ + ledger_limit_infinity = (uint64_t(0x1).value << 63) - 1 + lf_refill_scheduled = 0x0400 + lf_tracking_max = 0x4000 + + out_str = '' + now = kern.globals.sched_tick / 20 + lim_pct = 0 + + out_str += "{: >25s} {: unsigned(ledger.le_limit)): + out_str += " X " + else: + out_str += " " + + out_str += "{:#8x}\n".format(ledger.le_flags) + return out_str + +def GetThreadLedgerSummary(thread_val): + """ Internal function to get a summary of ledger entries for the given thread + params: thread - value representing struct thread * + return: str - formatted output information for ledger entries of the input thread + """ + out_str = " [{:#08x}]\n".format(thread_val) + ledgerp = thread_val.t_threadledger + if ledgerp: + i = 0 + while i != ledgerp.l_template.lt_cnt: + out_str += GetLedgerEntrySummary(kern.globals.thread_ledger_template, + ledgerp.l_entries[i], i) + i = i + 1 + return out_str + +@header("{0: <15s} {1: >9s} {2: <2s} {3: >12s} {4: >9s} {5: >6s} {6: >8s} {7: <10s} {8: <9s} \ + {9: <12s} {10: <7s} {11: <15s} {12: <8s} {13: <9s} {14: <6s} {15: >6s}".format( + "task [thread]", "entry", "#", "balance", "peakA", "(age)", "peakB", "(age)", "credit", + "debit", "limit", "refill period", "lim pct", "warn pct", "over?", "flags")) +def GetTaskLedgers(task_val): + """ Internal function to get summary of ledger entries from the task and its threads + params: task_val - value representing struct task * + return: str - formatted output information for ledger entries of the input task + """ + out_str = '' + task_ledgerp = task_val.ledger + i = 0 + out_str += "{: #08x} ".format(task_val) + pval = Cast(task_val.bsd_info, 'proc *') + if pval: + out_str += "{: <5s}:\n".format(pval.p_comm) + else: + out_str += "Invalid process:\n" + while i != task_ledgerp.l_template.lt_cnt: + out_str += GetLedgerEntrySummary(kern.globals.task_ledger_template, task_ledgerp.l_entries[i], i) + i = i + 1 + + # Now walk threads + for thval in IterateQueue(task_val.threads, 'thread *', 'task_threads'): + out_str += GetThreadLedgerSummary(thval) + + return out_str + +# Macro: showtaskledgers + +@lldb_command('showtaskledgers', 'F:') +def ShowTaskLedgers(cmd_args=None, cmd_options={}): + """ Routine to print a summary of ledger entries for the task and all of its threads + Usage: showtaskledgers
+ or : showtaskledgers -F + """ + if "-F" in cmd_options: + task_list = FindTasksByName(cmd_options["-F"]) + for tval in task_list: + print GetTaskLedgers.header + print GetTaskLedgers(tval) + return + + if not cmd_args: + raise ArgumentError("No arguments passed.") + tval = kern.GetValueFromAddress(cmd_args[0], 'task *') + if not tval: + raise ArgumentError("unknown arguments: %r" %cmd_args) + print GetTaskLedgers.header + print GetTaskLedgers(tval) + +# EndMacro: showtaskledgers + +# Macro: showalltaskledgers + +@lldb_command('showalltaskledgers') +def ShowAllTaskLedgers(cmd_args=None): + """ Routine to print a summary of ledger entries for all tasks and respective threads + Usage: showalltaskledgers + """ + for t in kern.tasks: + task_val = unsigned(t) + ShowTaskLedgers([task_val]) + +# EndMacro: showalltaskledgers + +# Macro: showprocuuidpolicytable + +@lldb_type_summary(['proc_uuid_policy_entry']) +@header("{0: <36s} {1: <10s}".format("uuid", "flags")) +def GetProcUUIDPolicyEntrySummary(entry): + """ Summarizes the important fields in proc_uuid_policy_entry structure. + params: entry: value - value object representing an entry + returns: str - summary of the entry + """ + data = [] + for i in range(16): + data.append(int(entry.uuid[i])) + flags = unsigned(entry.flags) + out_string = "{a[0]:02X}{a[1]:02X}{a[2]:02X}{a[3]:02X}-{a[4]:02X}{a[5]:02X}-{a[6]:02X}{a[7]:02X}-{a[8]:02X}{a[9]:02X}-{a[10]:02X}{a[11]:02X}{a[12]:02X}{a[13]:02X}{a[14]:02X}{a[15]:02X} 0x{b:0>8x}".format(a=data, b=flags) + return out_string + +@lldb_command('showprocuuidpolicytable') +def ShowProcUUIDPolicyTable(cmd_args=None): + """ Routine to print the proc UUID policy table + Usage: showprocuuidpolicytable + """ + hashslots = unsigned(kern.globals.proc_uuid_policy_hash_mask) + print "{0: <8s} ".format("slot") + GetProcUUIDPolicyEntrySummary.header + for i in range(0, hashslots+1): + headp = addressof(kern.globals.proc_uuid_policy_hashtbl[i]) + entrynum = 0 + for entry in IterateListEntry(headp, 'struct proc_uuid_policy_entry *', 'entries'): + print "{0: >2d}.{1: <5d} ".format(i, entrynum) + GetProcUUIDPolicyEntrySummary(entry) + entrynum += 1 + + +# EndMacro: showprocuuidpolicytable + +@lldb_command('showalltaskpolicy') +def ShowAllTaskPolicy(cmd_args=None): + """ + Routine to print a summary listing of all the tasks + wq_state -> reports "number of workq threads", "number of scheduled workq threads", "number of pending work items" + if "number of pending work items" seems stuck at non-zero, it may indicate that the workqueue mechanism is hung + io_policy -> RAGE - rapid aging of vnodes requested + NORM - normal I/O explicitly requested (this is the default) + PASS - passive I/O requested (i.e. I/Os do not affect throttling decisions) + THROT - throttled I/O requested (i.e. thread/task may be throttled after each I/O completes) + """ + global kern + print GetTaskSummary.header + " " + GetProcSummary.header + for t in kern.tasks: + pval = Cast(t.bsd_info, 'proc *') + print GetTaskSummary(t) +" "+ GetProcSummary(pval) + requested_strings = [ + ["int_darwinbg", "DBG-int"], + ["ext_darwinbg", "DBG-ext"], + ["int_iotier", "iotier-int"], + ["ext_iotier", "iotier-ext"], + ["int_iopassive", "passive-int"], + ["ext_iopassive", "passive-ext"], + ["bg_iotier", "bg-iotier"], + ["terminated", "terminated"], + ["th_pidbind_bg", "bg-pidbind"], + ["th_workq_bg", "bg-workq"], + ["t_apptype", "apptype"], + ["t_boosted", "boosted"], + ["t_int_gpu_deny", "gpudeny-int"], + ["t_ext_gpu_deny", "gpudeny-ext"], + ["t_role", "role"], + ["t_visibility", "vis"], + ["t_tal_enabled", "tal-enabled"], + ["t_base_latency_qos", "latency-base"], + ["t_over_latency_qos", "latency-override"], + ["t_base_through_qos", "throughput-base"], + ["t_over_through_qos", "throughput-override"] + ] + + requested="" + for value in requested_strings: + if t.requested_policy.__getattr__(value[0]) : + requested+=value[1] + ": " + str(t.requested_policy.__getattr__(value[0])) + " " + else: + requested+="" + + suppression_strings = [ + ["t_sup_active", "active"], + ["t_sup_lowpri_cpu", "lowpri-cpu"], + ["t_sup_timer", "timer-throttling"], + ["t_sup_disk", "disk-throttling"], + ["t_sup_cpu_limit", "cpu-limits"], + ["t_sup_suspend", "suspend"] + ] + + suppression="" + for value in suppression_strings: + if t.requested_policy.__getattr__(value[0]) : + suppression+=value[1] + ": " + str(t.requested_policy.__getattr__(value[0])) + " " + else: + suppression+="" + + effective_strings = [ + ["darwinbg", "background"], + ["lowpri_cpu", "lowpri-cpu"], + ["io_tier", "iotier"], + ["io_passive", "passive"], + ["all_sockets_bg", "bg-allsockets"], + ["new_sockets_bg", "bg-newsockets"], + ["bg_iotier", "bg-iotier"], + ["terminated", "terminated"], + ["t_gpu_deny", "gpu-deny"], + ["t_tal_engaged", "tal-engaged"], + ["t_suspended", "suspended"], + ["t_watchers_bg", "bg-watchers"], + ["t_latency_qos", "latency-qos"], + ["t_through_qos", "throughput-qos"], + ["t_sup_active", "suppression-active"], + ["t_role", "role"], + ["t_visibility", "vis"] + ] + + effective="" + for value in effective_strings: + if t.effective_policy.__getattr__(value[0]) : + effective+=value[1] + ": " + str(t.effective_policy.__getattr__(value[0])) + " " + else: + effective+="" + + + pended_strings = [ + ["t_updating_policy", "updating"], + ["update_sockets", "update_sockets"], + ["t_update_timers", "update_timers"], + ["t_update_watchers", "update_watchers"] + ] + + pended="" + for value in pended_strings: + if t.pended_policy.__getattr__(value[0]) : + pended+=value[1] + ": " + str(t.pended_policy.__getattr__(value[0])) + " " + else: + pended+="" + + print "requested: " + requested + print "suppression: " + suppression + print "effective: " + effective + print "pended: " + pended + + + + diff --git a/tools/lldbmacros/routedefines.py b/tools/lldbmacros/routedefines.py new file mode 100644 index 000000000..0da68d8f5 --- /dev/null +++ b/tools/lldbmacros/routedefines.py @@ -0,0 +1,27 @@ +RTF_UP = 0x1 +RTF_GATEWAY = 0x2 +RTF_HOST = 0x4 +RTF_REJECT = 0x8 +RTF_DYNAMIC = 0x10 +RTF_MODIFIED = 0x20 +RTF_DONE = 0x40 +RTF_DELCLONE = 0x80 +RTF_CLONING = 0x100 +RTF_XRESOLVE = 0x200 +RTF_LLINFO = 0x400 +RTF_STATIC = 0x800 +RTF_BLACKHOLE = 0x1000 +RTF_PROTO2 = 0x4000 +RTF_PROTO1 = 0x8000 +RTF_PRCLONING = 0x10000 +RTF_WASCLONED = 0x20000 +RTF_PROTO3 = 0x40000 +RTF_PINNED = 0x100000 +RTF_LOCAL = 0x200000 +RTF_BROADCAST = 0x400000 +RTF_MULTICAST = 0x800000 +RTF_IFSCOPE = 0x1000000 +RTF_CONDEMNED = 0x2000000 +RTF_IFREF = 0x4000000 +RTF_PROXY = 0x8000000 +RTF_ROUTER = 0x10000000 diff --git a/tools/lldbmacros/scheduler.py b/tools/lldbmacros/scheduler.py new file mode 100644 index 000000000..cf6baa3a6 --- /dev/null +++ b/tools/lldbmacros/scheduler.py @@ -0,0 +1,63 @@ +from xnu import * +from utils import * +from process import * + +# TODO: write scheduler related macros here + +# Macro: showinterrupts +@lldb_command('showinterrupts') +def ShowInterrupts(cmd_args=None): + """ Prints IRQ, IPI and TMR counts for each CPU + """ + bcdata = kern.GetValueFromAddress(kern.GetLoadAddressForSymbol('BootCpuData'), 'cpu_data_t *') + print "CPU 0 IRQ: {:d}\n".format(bcdata.cpu_stat.irq_ex_cnt) + print "CPU 0 IPI: {:d}\n".format(bcdata.cpu_stat.ipi_cnt) + print "CPU 0 TMR: {:d}\n".format(bcdata.cpu_stat.timer_cnt) + if (kern.globals.machine_info.physical_cpu == 2): + if kern.arch == 'arm': + cdentries = kern.GetValueFromAddress(kern.GetLoadAddressForSymbol('CpuDataEntries') + 20, 'uintptr_t *') + cpu_data_entry = Cast(dereference(cdentries), 'cpu_data_t *') + print "CPU 1 IRQ: {:d}\n".format(cpu_data_entry.cpu_stat.irq_ex_cnt) + print "CPU 1 IPI: {:d}\n".format(cpu_data_entry.cpu_stat.ipi_cnt) + print "CPU 1 TMR: {:d}\n".format(cpu_data_entry.cpu_stat.timer_cnt) + +# EndMacro: showinterrupts + +# Macro: showactiveinterrupts +@lldb_command('showactiveinterrupts') +def ShowActiveInterrupts(cmd_args=None): + """ Prints the interrupts that are unmasked & active with the Interrupt Controller + Usage: showactiveinterrupts
+ """ + if not cmd_args: + print "No arguments passed" + print ShowActiveInterrupts.__doc__ + return False + aic = kern.GetValueFromAddress(cmd_args[0], 'AppleInterruptController *') + if not aic: + print "unknown arguments:", str(cmd_args) + return False + + aic_base = unsigned(aic._aicBaseAddress) + current_interrupt = 0 + aic_imc_base = aic_base + 0x4180 + aic_him_offset = 0x80 + current_pointer = aic_imc_base + unmasked = dereference(kern.GetValueFromAddress(current_pointer, 'uintptr_t *')) + active = dereference(kern.GetValueFromAddress(current_pointer + aic_him_offset, 'uintptr_t *')) + group_count = 0 + mask = 1 + while current_interrupt < 192: + if (((unmasked & mask) == 0) and (active & mask)): + print "Interrupt {:d} unmasked and active\n".format(current_interrupt) + current_interrupt = current_interrupt + 1 + if (current_interrupt % 32 == 0): + mask = 1 + group_count = group_count + 1 + unmasked = dereference(kern.GetValueFromAddress(current_pointer + (4 * group_count), 'uintptr_t *')) + active = dereference(kern.GetValueFromAddress((current_pointer + aic_him_offset) + (4 * group_count), 'uintptr_t *')) + else: + mask = mask << 1 + +# EndMacro: showactiveinterrupts + diff --git a/tools/lldbmacros/userspace.py b/tools/lldbmacros/userspace.py new file mode 100644 index 000000000..afd24f662 --- /dev/null +++ b/tools/lldbmacros/userspace.py @@ -0,0 +1,525 @@ +from xnu import * +from utils import * +from process import * +from pmap import * + +def _GetIntegerDataFromTask(u_ptr, task_abi): + """ + params: + u_ptr : int - pointer in user memory + task_abi : int - what kind of user program is running + returns: + int - value stored at specified u_ptr. + """ + if kern.arch != "x86_64": + raise ValueError("This function does not work for non x86_64 arch") + if task_abi == 0xf : + return unsigned(dereference(kern.GetValueFromAddress(u_ptr, 'uint64_t *'))) + else: + return unsigned(dereference(kern.GetValueFromAddress(u_ptr, 'uint32_t *'))) + +def GetBinaryNameForPC(pc_val, user_lib_info = None): + """ find the binary in user_lib_info that the passed pc_val falls in range of. + params: + pc_val : int - integer form of the pc address + user_lib_info: [] of [] which hold start, end, binary name + returns: + str - Name of binary or "unknown" if not found. + """ + retval = "unknown" + if not user_lib_info: + return retval + matches = [] + for info in user_lib_info: + if pc_val >= info[0] and pc_val <= info[1]: + matches.append((pc_val - info[0], info[2])) + matches.sort() + if matches: + retval = matches[0][1] + return retval + +def ShowX86UserStack(thread, user_lib_info = None): + """ Display user space stack frame and pc addresses. + params: + thread: obj referencing thread value + returns: + Nothing + """ + iss = Cast(thread.machine.iss, 'x86_saved_state_t *') + abi = int(iss.flavor) + user_ip = 0 + user_frame = 0 + user_abi_ret_offset = 0 + if abi == 0xf: + debuglog("User process is 64 bit") + user_ip = iss.uss.ss_64.isf.rip + user_frame = iss.uss.ss_64.rbp + user_abi_ret_offset = 8 + else: + debuglog("user process is 32 bit") + user_ip = iss.uss.ss_32.eip + user_frame = iss.uss.ss_32.ebp + user_abi_ret_offset = 4 + + if user_ip == 0: + print "This activation does not appear to have a valid user context." + return False + + cur_ip = user_ip + cur_frame = user_frame + debuglog("ip= 0x%x , fr = 0x%x " % (cur_ip, cur_frame)) + kdp_pmap_addr = unsigned(addressof(kern.globals.kdp_pmap)) + if not WriteInt64ToMemoryAddress(unsigned(thread.task.map.pmap), kdp_pmap_addr): + print "Failed to write in kdp_pmap = 0x{0:0>16x} value.".format(thread.task.map.pmap) + return False + debuglog("newpmap = 0x{:x}".format(kern.globals.kdp_pmap)) + + frameformat = "{0:d} FP: 0x{1:x} PC: 0x{2:x}" + if user_lib_info is not None: + frameformat = "{0:d} {3: <30s} 0x{2:x}" + print frameformat.format(0, cur_frame, cur_ip, GetBinaryNameForPC(cur_ip, user_lib_info)) + + print kern.Symbolicate(cur_ip) + tmp_frame = unsigned(cur_frame) + prev_frame = _GetIntegerDataFromTask(tmp_frame, abi) + prev_ip = _GetIntegerDataFromTask(tmp_frame + user_abi_ret_offset, abi) + frameno = 1 + while prev_frame and prev_frame != 0x0000000800000008: + print frameformat.format(frameno, prev_frame, prev_ip, GetBinaryNameForPC(prev_ip, user_lib_info)) + print kern.Symbolicate(prev_ip) + prev_ip = _GetIntegerDataFromTask(prev_frame + user_abi_ret_offset, abi) + prev_frame = _GetIntegerDataFromTask(prev_frame, abi) + frameno +=1 + if not WriteInt64ToMemoryAddress(0, kdp_pmap_addr): + print "Failed to write in kdp_pmap = 0" + return False + return + +def _PrintARMUserStack(task, cur_pc, cur_fp, framesize, frametype, frameformat, user_lib_info=None): + if cur_pc == 0: + "No valid user context for this activation." + return + frameno = 0 + print frameformat.format(frameno, cur_fp, cur_pc, GetBinaryNameForPC(cur_pc, user_lib_info)) + while True: + frameno = frameno + 1 + frame = GetUserDataAsString(task, cur_fp, framesize) + cur_fp = _ExtractDataFromString(frame, 0, frametype) + cur_pc = _ExtractDataFromString(frame, (framesize / 2), frametype) + if not cur_fp: + break + print frameformat.format(frameno, cur_fp, cur_pc, GetBinaryNameForPC(cur_pc, user_lib_info)) + +def ShowARMUserStack(thread, user_lib_info = None): + cur_pc = unsigned(thread.machine.PcbData.pc) + cur_fp = unsigned(thread.machine.PcbData.r[7]) + frameformat = "{0:>2d} FP: 0x{1:x} PC: 0x{2:x}" + if user_lib_info is not None: + frameformat = "{0:>2d} {3: <30s} 0x{2:0>8x}" + framesize = 8 + frametype = "uint32_t" + _PrintARMUserStack(thread.task, cur_pc, cur_fp, framesize, frametype, frameformat, user_lib_info=user_lib_info) + + +@lldb_command('showthreaduserstack') +def ShowThreadUserStack(cmd_args=None): + """ Show user stack for a given thread. + Syntax: (lldb) showthreaduserstack + """ + if not cmd_args: + raise ArgumentError("Insufficient arguments") + + thread = kern.GetValueFromAddress(ArgumentStringToInt(cmd_args[0]), 'thread *') + if kern.arch == "x86_64": + ShowX86UserStack(thread) + elif kern.arch == "arm": + ShowARMUserStack(thread) + return True + +@lldb_command('showtaskuserstacks') +def ShowTaskUserStacks(cmd_args=None): + """ Print out the user stack for each thread in a task, followed by the user libraries. + Syntax: (lldb) showtaskuserstacks + The format is compatible with CrashTracer. You can also use the speedtracer plugin as follows + (lldb) showtaskuserstacks -p speedtracer + + Note: the address ranges are approximations. Also the list may not be completely accurate. This command expects memory read failures + and hence will skip a library if unable to read information. Please use your good judgement and not take the output as accurate + """ + if not cmd_args: + raise ArgumentError("Insufficient arguments") + + task = kern.GetValueFromAddress(cmd_args[0], 'task *') + #print GetTaskSummary.header + " " + GetProcSummary.header + pval = Cast(task.bsd_info, 'proc *') + #print GetTaskSummary(task) + " " + GetProcSummary(pval) + "\n \n" + crash_report_format_string = """\ +Process: {pid: <10d} +Path: {path: <50s} +Identifier: {pname: <30s} +Version: ??? (???) +Code Type: {parch: <20s} +Parent Process: {ppname: >20s}[{ppid:d}] + +Date/Time: {timest:s}.000 -0800 +OS Version: {osversion: <20s} +Report Version: 8 + +Exception Type: n/a +Exception Codes: n/a +Crashed Thread: 0 + +Application Specific Information: +Synthetic crash log generated from Kernel userstacks + +""" + user_lib_rex = re.compile("([0-9a-fx]+)\s-\s([0-9a-fx]+)\s+(.*?)\s", re.IGNORECASE|re.MULTILINE) + from datetime import datetime + ts = datetime.fromtimestamp(int(pval.p_start.tv_sec)) + date_string = ts.strftime('%Y-%m-%d %H:%M:%S') + is_64 = False + if pval.p_flag & 0x4 : + is_64 = True + + parch_s = "" + if kern.arch == "x86_64" or kern.arch == "i386": + osversion = "Mac OS X 10.8" + parch_s = "I386 (32 bit)" + if is_64: + parch_s = "X86-64 (Native)" + else: + parch_s = kern.arch + osversion = "iOS" + osversion += " ({:s})".format(kern.globals.osversion) + print crash_report_format_string.format(pid = pval.p_pid, + pname = pval.p_comm, + path = pval.p_comm, + ppid = pval.p_ppid, + ppname = GetProcNameForPid(pval.p_ppid), + timest = date_string, + parch = parch_s, + osversion = osversion + + ) + print "Binary Images:" + ShowTaskUserLibraries([hex(task)]) + usertask_lib_info = [] # will host [startaddr, endaddr, lib_name] entries + for entry in ShowTaskUserLibraries.found_images: + #print "processing line %s" % line + arr = user_lib_rex.findall(entry[3]) + #print "%r" % arr + if len(arr) == 0 : + continue + usertask_lib_info.append([int(arr[0][0],16), int(arr[0][1],16), str(arr[0][2]).strip()]) + + printthread_user_stack_ptr = ShowX86UserStack + if kern.arch == "arm": + printthread_user_stack_ptr = ShowARMUserStack + + counter = 0 + for thval in IterateQueue(task.threads, 'thread *', 'task_threads'): + print "\nThread {0:d} name:0x{1:x}\nThread {0:d}:".format(counter, thval) + counter += 1 + try: + printthread_user_stack_ptr(thval, usertask_lib_info) + except Exception as exc_err: + print "Failed to show user stack for thread 0x{0:x}".format(thval) + if config['debug']: + raise exc_err + else: + print "Enable debugging ('(lldb) xnudebug debug') to see detailed trace." + return + + +def GetUserDataAsString(task, addr, size): + """ Get data from task's address space as a string of bytes + params: + task: task object from which to extract information + addr: int - start address to get data from. + size: int - no of bytes to read. + returns: + str - a stream of bytes. Empty string if read fails. + """ + err = lldb.SBError() + if GetConnectionProtocol() == "kdp": + kdp_pmap_addr = unsigned(addressof(kern.globals.kdp_pmap)) + if not WriteInt64ToMemoryAddress(unsigned(task.map.pmap), kdp_pmap_addr): + debuglog("Failed to write in kdp_pmap from GetUserDataAsString.") + return "" + content = LazyTarget.GetProcess().ReadMemory(addr, size, err) + if not err.Success(): + debuglog("Failed to read process memory. Error: " + err.description) + return "" + if not WriteInt64ToMemoryAddress(0, kdp_pmap_addr): + debuglog("Failed to reset in kdp_pmap from GetUserDataAsString.") + return "" + elif kern.arch in ['arm'] and long(size) < (2 * kern.globals.page_size): + # Without the benefit of a KDP stub on the target, try to + # find the user task's physical mapping and memcpy the data. + # If it straddles a page boundary, copy in two passes + range1_addr = long(addr) + range1_size = long(size) + if kern.StraddlesPage(range1_addr, range1_size): + range2_addr = long(kern.TruncPage(range1_addr + range1_size)) + range2_size = long(range1_addr + range1_size - range2_addr) + range1_size = long(range2_addr - range1_addr) + else: + range2_addr = 0 + range2_size = 0 + range2_in_kva = 0 + + paddr_range1 = PmapWalk(task.map.pmap, range1_addr, vSILENT) + if not paddr_range1: + debuglog("Not mapped task 0x{:x} address 0x{:x}".format(task, addr)) + return "" + + range1_in_kva = kern.PhysToKernelVirt(paddr_range1) + content = LazyTarget.GetProcess().ReadMemory(range1_in_kva, range1_size, err) + if not err.Success(): + raise RuntimeError("Failed to read process memory. Error: " + err.description) + + if range2_addr: + paddr_range2 = PmapWalk(task.map.pmap, range2_addr, vSILENT) + if not paddr_range2: + debuglog("Not mapped task 0x{:x} address 0x{:x}".format(task, addr)) + return "" + range2_in_kva = kern.PhysToKernelVirt(paddr_range2) + content += LazyTarget.GetProcess().ReadMemory(range1_in_kva, range1_size, err) + if not err.Success(): + raise RuntimeError("Failed to read process memory. Error: " + err.description) + else: + raise NotImplementedError("GetUserDataAsString does not support this configuration") + + return content + +def _ExtractDataFromString(strdata, offset, data_type, length=0): + """ Extract specific data from string buffer + params: + strdata: str - string data give from GetUserDataAsString + offset: int - 0 based offset into the data. + data_type: str - defines what type to be read as. Supported values are: + 'uint64_t', 'uint32_t', 'string' + length: int - used when data_type=='string' + returns + None - if extraction failed. + obj - based on what is requested in data_type + """ + unpack_str = "s" + if data_type == 'uint64_t': + length = 8 + unpack_str = "Q" + elif data_type == "uint32_t": + length = 4 + unpack_str = "I" + else: + unpack_str= "%ds" % length + + data_len = len(strdata) + if offset > data_len or (offset + length) > data_len or offset < 0: + debuglog("Invalid arguments to _ExtractDataFromString.") + return 0 + return struct.unpack(unpack_str, strdata[offset:(offset + length)])[0] + +def GetPathForImage(task, path_address): + """ Maps 32 bytes at a time and packs as string + params: + task: obj - referencing task to read data from + path_address: int - address where the image path is stored + returns: + str - string path of the file. "" if failed to read. + """ + done = False + retval = "" + + if path_address == 0: + done = True + + while not done: + path_str_data = GetUserDataAsString(task, path_address, 32) + if len(path_str_data) == 0: + break + i = 0 + while i < 32: + if ord(path_str_data[i]): + retval += path_str_data[i] + else: + break + i += 1 + if i < 32: + done = True + else: + path_address += 32 + return retval + +def GetImageInfo(task, mh_image_address, mh_path_address, approx_end_address=None): + """ Print user library informaiton. + params: + task : obj referencing the task for which Image info printed + mh_image_address : int - address which has image info + mh_path_address : int - address which holds path name string + approx_end_address: int - address which lldbmacros think is end address. + returns: + str - string representing image info. "" if failure to read data. + """ + if approx_end_address: + image_end_load_address = int(approx_end_address) -1 + else: + image_end_load_address = int(mh_image_address) + 0xffffffff + + print_format = "0x{0:x} - 0x{1:x} {2: <50s} (??? - ???) <{3: <36s}> {4: <50s}" + # 32 bytes enough for mach_header/mach_header_64 + mh_data = GetUserDataAsString(task, mh_image_address, 32) + if len(mh_data) == 0: + debuglog("unable to get userdata for task 0x{:x} img_addr 0x{:x} path_address 0x{:x}".format( + task, mh_image_address, mh_path_address)) + return "" + mh_magic = _ExtractDataFromString(mh_data, (4 * 0), "uint32_t") + mh_cputype = _ExtractDataFromString(mh_data,(4 * 1), "uint32_t") + mh_cpusubtype = _ExtractDataFromString(mh_data,(4 * 2), "uint32_t") + mh_filetype = _ExtractDataFromString(mh_data,(4 * 3), "uint32_t") + mh_ncmds = _ExtractDataFromString(mh_data,(4 * 4), "uint32_t") + mh_sizeofcmds = _ExtractDataFromString(mh_data,(4 * 5), "uint32_t") + mh_flags = _ExtractDataFromString(mh_data,(4 * 6), "uint32_t") + + if mh_magic == 0xfeedfacf: + mh_64 = True + lc_address = mh_image_address + 32 + else: + mh_64 = False + lc_address = mh_image_address + 28 + + lc_idx = 0 + uuid_data = 0 + found_uuid_data = False + retval = None + while lc_idx < mh_ncmds: + # 24 bytes is the size of uuid_command + lcmd_data = GetUserDataAsString(task, lc_address, 24) + lc_cmd = _ExtractDataFromString(lcmd_data, 4 * 0, "uint32_t") + lc_cmd_size = _ExtractDataFromString(lcmd_data, 4 * 1, "uint32_t") + lc_data = _ExtractDataFromString(lcmd_data, 4*2, "string", 16) + + uuid_out_string = "" + path_out_string = "" + + if lc_cmd == 0x1b: + # need to print the uuid now. + uuid_data = [ord(x) for x in lc_data] + found_uuid_data = True + uuid_out_string = "{a[0]:02X}{a[1]:02X}{a[2]:02X}{a[3]:02X}-{a[4]:02X}{a[5]:02X}-{a[6]:02X}{a[7]:02X}-{a[8]:02X}{a[9]:02X}-{a[10]:02X}{a[11]:02X}{a[12]:02X}{a[13]:02X}{a[14]:02X}{a[15]:02X}".format(a=uuid_data) + #also print image path + path_out_string = GetPathForImage(task, mh_path_address) + path_base_name = path_out_string.split("/")[-1] + retval = print_format.format(mh_image_address, image_end_load_address, path_base_name, uuid_out_string, path_out_string) + elif lc_cmd == 0xe: + ShowTaskUserLibraries.exec_load_path = lc_address + _ExtractDataFromString(lcmd_data, 4*2, "uint32_t") + lc_address = lc_address + lc_cmd_size + lc_idx += 1 + + if not found_uuid_data: + path_out_string = GetPathForImage(task, mh_path_address) + path_base_name = path_out_string.split("/")[-1] + uuid_out_string = "" + + retval = print_format.format(mh_image_address, image_end_load_address, path_base_name, uuid_out_string, path_out_string) + return retval + +@static_var("found_images", []) # holds entries of format (startaddr, endaddr, image_path_addr, infostring) +@static_var("exec_load_path", 0) +@lldb_command("showtaskuserlibraries") +def ShowTaskUserLibraries(cmd_args=None): + """ Show binary images known by dyld in target task + For a given user task, inspect the dyld shared library state and print information about all Mach-O images. + Syntax: (lldb)showtaskuserlibraries + Note: the address ranges are approximations. Also the list may not be completely accurate. This command expects memory read failures + and hence will skip a library if unable to read information. Please use your good judgement and not take the output as accurate + """ + if not cmd_args: + raise ArgumentError("Insufficient arguments") + + #reset the found_images array + ShowTaskUserLibraries.found_images = [] + + task = kern.GetValueFromAddress(cmd_args[0], 'task_t') + is_task_64 = int(task.t_flags) & 0x1 + dyld_all_image_infos_address = unsigned(task.all_image_info_addr) + cur_data_offset = 0 + if dyld_all_image_infos_address == 0: + print "No dyld shared library information available for task" + return False + vers_info_data = GetUserDataAsString(task, dyld_all_image_infos_address, 112) + version = _ExtractDataFromString(vers_info_data, cur_data_offset, "uint32_t") + cur_data_offset += 4 + if version > 12: + print "Unknown dyld all_image_infos version number %d" % version + image_info_count = _ExtractDataFromString(vers_info_data, cur_data_offset, "uint32_t") + ShowTaskUserLibraries.exec_load_path = 0 + if is_task_64: + image_info_size = 24 + image_info_array_address = _ExtractDataFromString(vers_info_data, 8, "uint64_t") + dyld_load_address = _ExtractDataFromString(vers_info_data, 8*4, "uint64_t") + dyld_all_image_infos_address_from_struct = _ExtractDataFromString(vers_info_data, 8*13, "uint64_t") + else: + image_info_size = 12 + image_info_array_address = _ExtractDataFromString(vers_info_data, 4*2, "uint32_t") + dyld_load_address = _ExtractDataFromString(vers_info_data, 4*5, "uint32_t") + dyld_all_image_infos_address_from_struct = _ExtractDataFromString(vers_info_data, 4*14, "uint32_t") + # Account for ASLR slide before dyld can fix the structure + dyld_load_address = dyld_load_address + (dyld_all_image_infos_address - dyld_all_image_infos_address_from_struct) + + i = 0 + image_info_list = [] + while i < image_info_count: + image_info_address = image_info_array_address + i * image_info_size + n_im_info_addr = None + img_data = "" + try: + img_data = GetUserDataAsString(task, image_info_address, image_info_size) + except Exception, e: + debuglog("Failed to read user data for task 0x{:x} addr 0x{:x}, exception {:s}".format(task, image_info_address, str(e))) + pass + if is_task_64: + image_info_addr = _ExtractDataFromString(img_data, 0, "uint64_t") + image_info_path = _ExtractDataFromString(img_data, 8, "uint64_t") + else: + image_info_addr = _ExtractDataFromString(img_data, 0, "uint32_t") + image_info_path = _ExtractDataFromString(img_data, 4, "uint32_t") + + if image_info_addr : + image_info_list.append((image_info_addr, image_info_path)) + i += 1 + + image_info_list.sort() + num_images_found = len(image_info_list) + + for ii in range(num_images_found): + n_im_info_addr = dyld_load_address + if ii + 1 < num_images_found: + n_im_info_addr = image_info_list[ii+1][0] + + image_info_addr = image_info_list[ii][0] + image_info_path = image_info_list[ii][1] + try: + image_print_s = GetImageInfo(task, image_info_addr, image_info_path, approx_end_address=n_im_info_addr) + if len(image_print_s) > 0: + print image_print_s + ShowTaskUserLibraries.found_images.append((image_info_addr, n_im_info_addr, image_info_path, image_print_s)) + else: + debuglog("Failed to print image info for task 0x{:x} image_info 0x{:x}".format(task, image_info_addr)) + except Exception,e: + if config['debug']: + raise e + + # load_path might get set when the main executable is processed. + if ShowTaskUserLibraries.exec_load_path != 0: + image_print_s = GetImageInfo(task, dyld_load_address, ShowTaskUserLibraries.exec_load_path) + if len(image_print_s) > 0: + print image_print_s + ShowTaskUserLibraries.found_images.append((dyld_load_address, dyld_load_address + 0xffffffff, + ShowTaskUserLibraries.exec_load_path, image_print_s)) + else: + debuglog("Failed to print image for main executable for task 0x{:x} dyld_load_addr 0x{:x}".format(task, dyld_load_address)) + return + + + diff --git a/tools/lldbmacros/utils.py b/tools/lldbmacros/utils.py new file mode 100644 index 000000000..104a528cc --- /dev/null +++ b/tools/lldbmacros/utils.py @@ -0,0 +1,391 @@ +#General Utility functions for debugging or introspection + +""" Please make sure you read the README file COMPLETELY BEFORE reading anything below. + It is very critical that you read coding guidelines in Section E in README file. +""" +import sys, re, time, getopt, shlex, os, time +import lldb +import struct +from core.cvalue import * +from core.configuration import * +from core.lazytarget import * + +#DONOTTOUCHME: exclusive use for lldb_run_command only. +lldb_run_command_state = {'active':False} + +def lldb_run_command(cmdstring): + """ Run a lldb command and get the string output. + params: cmdstring - str : lldb command string which could be executed at (lldb) prompt. (eg. "register read") + returns: str - output of command. it may be "" in case if command did not return any output. + """ + global lldb_run_command_state + retval ="" + res = lldb.SBCommandReturnObject() + # set special attribute to notify xnu framework to not print on stdout + lldb_run_command_state['active'] = True + lldb.debugger.GetCommandInterpreter().HandleCommand(cmdstring, res) + lldb_run_command_state['active'] = False + if res.Succeeded(): + retval = res.GetOutput() + return retval + +def EnableLLDBAPILogging(): + """ Enable file based logging for lldb and also provide essential information about what information + to include when filing a bug with lldb or xnu. + """ + logfile_name = "/tmp/lldb.%d.log" % int(time.time()) + enable_log_base_cmd = "log enable --file %s " % logfile_name + cmd_str = enable_log_base_cmd + ' lldb api' + print cmd_str + print lldb_run_command(cmd_str) + cmd_str = enable_log_base_cmd + ' gdb-remote packets' + print cmd_str + print lldb_run_command(cmd_str) + cmd_str = enable_log_base_cmd + ' kdp-remote packets' + print cmd_str + print lldb_run_command(cmd_str) + print lldb_run_command("verison") + print "Please collect the logs from %s for filing a radar. If you had encountered an exception in a lldbmacro command please re-run it." % logfile_name + print "Please make sure to provide the output of 'verison', 'image list' and output of command that failed." + return + +def GetConnectionProtocol(): + """ Returns a string representing what kind of connection is used for debugging the target. + params: None + returns: + str - connection type. One of ("core","kdp","gdb", "unknown") + """ + retval = "unknown" + process_plugin_name = LazyTarget.GetProcess().GetPluginName().lower() + if "kdp" in process_plugin_name: + retval = "kdp" + elif "gdb" in process_plugin_name: + retval = "gdb" + elif "mach-o" in process_plugin_name and "core" in process_plugin_name: + retval = "core" + return retval + +def SBValueToPointer(sbval): + """ Helper function for getting pointer value from an object of pointer type. + ex. void *astring = 0x12345 + use SBValueToPointer(astring_val) to get 0x12345 + params: sbval - value object of type ' *' + returns: int - pointer value as an int. + """ + if type(sbval) == core.value: + sbval = sbval.GetSBValue() + if sbval.IsPointerType(): + return sbval.GetValueAsUnsigned() + else: + return int(sbval.GetAddress()) + +def ArgumentStringToInt(arg_string): + """ convert '1234' or '0x123' to int + params: + arg_string: str - typically string passed from commandline. ex '1234' or '0xA12CD' + returns: + int - integer representation of the string + """ + arg_string = arg_string.strip() + if arg_string.find('0x') >=0: + return int(arg_string, 16) + else: + return int(arg_string) + +def GetLongestMatchOption(searchstr, options=[], ignore_case=True): + """ Get longest matched string from set of options. + params: + searchstr : string of chars to be matched + options : array of strings that are to be matched + returns: + [] - array of matched options. The order of options is same as the arguments. + empty array is returned if searchstr does not match any option. + example: + subcommand = LongestMatch('Rel', ['decode', 'enable', 'reload'], ignore_case=True) + print subcommand # prints ['reload'] + """ + if ignore_case: + searchstr = searchstr.lower() + found_options = [] + for o in options: + so = o + if ignore_case: + so = o.lower() + if so.find(searchstr) >=0 : + found_options.append(o) + return found_options + +def GetType(target_type): + """ type cast an object to new type. + params: + target_type - str, ex. 'char', 'uint32_t' etc + returns: + lldb.SBType - a new Type that can be used as param to lldb.SBValue.Cast() + raises: + NameError - Incase the type is not identified + """ + return gettype(target_type) + + +def Cast(obj, target_type): + """ Type cast an object to another C type. + params: + obj - core.value object representing some C construct in lldb + target_type - str : ex 'char *' + - lldb.SBType : + """ + return cast(obj, target_type) + + +def loadLLDB(): + """ Util function to load lldb python framework in case not available in common include paths. + """ + try: + import lldb + print 'Found LLDB on path' + except: + platdir = subprocess.check_output('xcodebuild -version -sdk iphoneos PlatformPath'.split()) + offset = platdir.find("Contents/Developer") + if offset == -1: + lldb_py = os.path.join(os.path.dirname(os.path.dirname(platdir)), 'Library/PrivateFrameworks/LLDB.framework/Versions/A/Resources/Python') + else: + lldb_py = os.path.join(platdir[0:offset+8], 'SharedFrameworks/LLDB.framework/Versions/A/Resources/Python') + if os.path.isdir(lldb_py): + sys.path.append(lldb_py) + global lldb + lldb = __import__('lldb') + print 'Found LLDB in SDK' + else: + print 'Failed to locate lldb.py from', lldb_py + sys.exit(-1) + return True + +class Logger(): + """ A logging utility """ + def __init__(self, log_file_path="/tmp/xnu.log"): + self.log_file_handle = open(log_file_path, "w+") + self.redirect_to_stdout = False + + def log_debug(self, *args): + current_timestamp = time.time() + debug_line_str = "DEBUG:" + str(current_timestamp) + ":" + for arg in args: + debug_line_str += " " + str(arg).replace("\n", " ") + ", " + + self.log_file_handle.write(debug_line_str + "\n") + if self.redirect_to_stdout : + print debug_line_str + + def write(self, line): + self.log_debug(line) + + +def sizeof_fmt(num, unit_str='B'): + """ format large number into human readable values. + convert any number into Kilo, Mega, Giga, Tera format for human understanding. + params: + num - int : number to be converted + unit_str - str : a suffix for unit. defaults to 'B' for bytes. + returns: + str - formatted string for printing. + """ + for x in ['','K','M','G','T']: + if num < 1024.0: + return "%3.1f%s%s" % (num, x,unit_str) + num /= 1024.0 + return "%3.1f%s%s" % (num, 'P', unit_str) + +def WriteStringToMemoryAddress(stringval, addr): + """ write a null terminated string to address. + params: + stringval: str- string to be written to memory. a '\0' will be added at the end + addr : int - address where data is to be written + returns: + bool - True if successfully written + """ + serr = lldb.SBError() + length = len(stringval) + 1 + format_string = "%ds" % length + sdata = struct.pack(format_string,stringval) + numbytes = LazyTarget.GetProcess().WriteMemory(addr, sdata, serr) + if numbytes == length and serr.Success(): + return True + return False + +def WriteInt64ToMemoryAddress(intval, addr): + """ write a 64 bit integer at an address. + params: + intval - int - an integer value to be saved + addr - int - address where int is to be written + returns: + bool - True if successfully written. + """ + serr = lldb.SBError() + sdata = struct.pack('Q', intval) + addr = int(hex(addr).rstrip('L'), 16) + numbytes = LazyTarget.GetProcess().WriteMemory(addr,sdata, serr) + if numbytes == 8 and serr.Success(): + return True + return False + +def WritePtrDataToMemoryAddress(intval, addr): + """ Write data to pointer size memory. + This is equivalent of doing *(&((struct pmap *)addr)) = intval + It will identify 32/64 bit kernel and write memory accordingly. + params: + intval - int - an integer value to be saved + addr - int - address where int is to be written + returns: + bool - True if successfully written. + """ + if kern.ptrsize == 8: + return WriteInt64ToMemoryAddress(intval, addr) + else: + return WriteInt32ToMemoryAddress(intval, addr) + +def WriteInt32ToMemoryAddress(intval, addr): + """ write a 32 bit integer at an address. + params: + intval - int - an integer value to be saved + addr - int - address where int is to be written + returns: + bool - True if successfully written. + """ + serr = lldb.SBError() + sdata = struct.pack('I', intval) + addr = int(hex(addr).rstrip('L'), 16) + numbytes = LazyTarget.GetProcess().WriteMemory(addr,sdata, serr) + if numbytes == 4 and serr.Success(): + return True + return False + +def WriteInt16ToMemoryAddress(intval, addr): + """ write a 16 bit integer at an address. + params: + intval - int - an integer value to be saved + addr - int - address where int is to be written + returns: + bool - True if successfully written. + """ + serr = lldb.SBError() + sdata = struct.pack('H', intval) + addr = int(hex(addr).rstrip('L'), 16) + numbytes = LazyTarget.GetProcess().WriteMemory(addr,sdata, serr) + if numbytes == 2 and serr.Success(): + return True + return False + +def WriteInt8ToMemoryAddress(intval, addr): + """ write a 8 bit integer at an address. + params: + intval - int - an integer value to be saved + addr - int - address where int is to be written + returns: + bool - True if successfully written. + """ + serr = lldb.SBError() + sdata = struct.pack('B', intval) + addr = int(hex(addr).rstrip('L'), 16) + numbytes = LazyTarget.GetProcess().WriteMemory(addr,sdata, serr) + if numbytes == 1 and serr.Success(): + return True + return False + +_enum_cache = {} +def GetEnumValue(name): + """ Finds the value of a particular enum define. Ex kdp_req_t::KDP_VERSION => 0x3 + params: + name : str - name of enum in the format type::name + returns: + int - value of the particular enum. + raises: + TypeError - if the enum is not found + """ + name = name.strip() + global _enum_cache + if name not in _enum_cache: + res = lldb.SBCommandReturnObject() + lldb.debugger.GetCommandInterpreter().HandleCommand("p/x (`%s`)" % name, res) + if not res.Succeeded(): + raise TypeError("Enum not found with name: " + name) + # the result is of format '(int) $481 = 0x00000003\n' + _enum_cache[name] = int( res.GetOutput().split('=')[-1].strip(), 16) + return _enum_cache[name] + +def ResolveFSPath(path): + """ expand ~user directories and return absolute path. + params: path - str - eg "~rc/Software" + returns: + str - abs path with user directories and symlinks expanded. + str - if path resolution fails then returns the same string back + """ + expanded_path = os.path.expanduser(path) + norm_path = os.path.normpath(expanded_path) + return norm_path + +_dsymlist = {} +uuid_regex = re.compile("[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}",re.IGNORECASE|re.DOTALL) +def addDSYM(uuid, info): + """ add a module by dsym into the target modules. + params: uuid - str - uuid string eg. 4DD2344C0-4A81-3EAB-BDCF-FEAFED9EB73E + info - dict - info dictionary passed from dsymForUUID + """ + global _dsymlist + if "DBGSymbolRichExecutable" not in info: + print "Error: Unable to find syms for %s" % uuid + return False + if not uuid in _dsymlist: + # add the dsym itself + cmd_str = "target modules add --uuid %s" % uuid + debuglog(cmd_str) + lldb.debugger.HandleCommand(cmd_str) + # set up source path + #lldb.debugger.HandleCommand("settings append target.source-map %s %s" % (info["DBGBuildSourcePath"], info["DBGSourcePath"])) + # modify the list to show we loaded this + _dsymlist[uuid] = True + +def loadDSYM(uuid, load_address): + """ Load an already added symbols to a particular load address + params: uuid - str - uuid string + load_address - int - address where to load the symbols + returns bool: + True - if successful + False - if failed. possible because uuid is not presently loaded. + """ + if uuid not in _dsymlist: + return False + cmd_str = "target modules load --uuid %s --slide %d" % ( uuid, load_address) + debuglog(cmd_str) + lldb.debugger.HandleCommand(cmd_str) + +def dsymForUUID(uuid): + """ Get dsym informaiton by calling dsymForUUID + params: uuid - str - uuid string from executable. eg. 4DD2344C0-4A81-3EAB-BDCF-FEAFED9EB73E + returns: + {} - a dictionary holding dsym information printed by dsymForUUID. + None - if failed to find information + """ + import subprocess + import plistlib + output = subprocess.check_output(["/usr/local/bin/dsymForUUID", uuid]) + if output: + # because of + #plist = plistlib.readPlistFromString(output) + #beginworkaround + keyvalue_extract_re = re.compile("(.*?)\s*(.*?)",re.IGNORECASE|re.MULTILINE|re.DOTALL) + plist={} + plist[uuid] = {} + for item in keyvalue_extract_re.findall(output): + plist[uuid][item[0]] = item[1] + #endworkaround + if plist and plist[uuid]: + return plist[uuid] + return None + +def debuglog(s): + """ Print a object in the debug stream + """ + global config + if config['debug']: + print "DEBUG:",s + return None diff --git a/tools/lldbmacros/xnu.py b/tools/lldbmacros/xnu.py new file mode 100644 index 000000000..d72c3aeef --- /dev/null +++ b/tools/lldbmacros/xnu.py @@ -0,0 +1,626 @@ +import sys, subprocess, os, re, time, getopt, shlex +import lldb +from functools import wraps +from ctypes import c_ulonglong as uint64_t +from ctypes import c_void_p as voidptr_t +import code +import core +from core import caching +from core.standard import * +from core.configuration import * +from core.kernelcore import * +from utils import * +from core.lazytarget import * + +MODULE_NAME=__name__ + +""" Kernel Debugging macros for lldb. + Please make sure you read the README COMPLETELY BEFORE reading anything below. + It is very critical that you read coding guidelines in Section E in README file. +""" + +# End Utility functions +# Debugging specific utility functions + +#decorators. Not to be called directly. + +def static_var(var_name, initial_value): + def _set_var(obj): + setattr(obj, var_name, initial_value) + return obj + return _set_var + +def header(initial_value): + def _set_header(obj): + setattr(obj, 'header', initial_value) + return obj + return _set_header + +# holds type declarations done by xnu. +#DONOTTOUCHME: Exclusive use of lldb_type_summary only. +lldb_summary_definitions = {} +def lldb_type_summary(types_list): + """ A function decorator to register a summary for a type in lldb. + params: types_list - [] an array of types that you wish to register a summary callback function. (ex. ['task *', 'task_t']) + returns: Nothing. This is a decorator. + """ + def _get_summary(obj): + def _internal_summary_function(lldbval, internal_dict): + out_string= "" + if internal_dict != None and len(obj.header) > 0 : + out_string += "\n" + obj.header +"\n" + out_string += obj( core.value(lldbval) ) + return out_string + + myglobals = globals() + summary_function_name = "LLDBSummary" + obj.__name__ + myglobals[summary_function_name] = _internal_summary_function + summary_function = myglobals[summary_function_name] + summary_function.__doc__ = obj.__doc__ + + global lldb_summary_definitions + for single_type in types_list: + if config['showTypeSummary']: + if single_type in lldb_summary_definitions.keys(): + lldb.debugger.HandleCommand("type summary delete --category kernel \""+ single_type + "\"") + lldb.debugger.HandleCommand("type summary add \""+ single_type +"\" --category kernel --python-function " + MODULE_NAME + "." + summary_function_name) + lldb_summary_definitions[single_type] = obj + + return obj + return _get_summary + +#global cache of documentation for lldb commands exported by this module +#DONOTTOUCHME: Exclusive use of lldb_command only. +lldb_command_documentation = {} + +def lldb_command(cmd_name, option_string = ''): + """ A function decorator to define a command with namd 'cmd_name' in the lldb scope to call python function. + params: cmd_name - str : name of command to be set in lldb prompt. + option_string - str: getopt like option string. Only CAPITAL LETTER options allowed. + see README on Customizing command options. + """ + if option_string != option_string.upper(): + raise RuntimeError("Cannot setup command with lowercase option args. %s" % option_string) + + def _cmd(obj): + def _internal_command_function(debugger, command, result, internal_dict): + global config, lldb_run_command_state + stream = CommandOutput(result) + # need to avoid printing on stdout if called from lldb_run_command. + if 'active' in lldb_run_command_state and lldb_run_command_state['active']: + debuglog('Running %s from lldb_run_command' % command) + else: + result.SetImmediateOutputFile(sys.__stdout__) + + command_args = shlex.split(command) + lldb.debugger.HandleCommand('type category disable kernel' ) + def_verbose_level = config['verbosity'] + + try: + stream.setOptions(command_args, option_string) + if stream.verbose_level != 0: + config['verbosity'] = stream.verbose_level + with RedirectStdStreams(stdout=stream) : + if option_string: + obj(cmd_args=stream.target_cmd_args, cmd_options=stream.target_cmd_options) + else: + obj(cmd_args=stream.target_cmd_args) + except KeyboardInterrupt: + print "Execution interrupted by user" + except ArgumentError as arg_error: + if str(arg_error) != "HELP": + print "Argument Error: " + str(arg_error) + print "{0:s}:\n {1:s}".format(cmd_name, obj.__doc__.strip()) + return False + except Exception as exc: + if not config['debug']: + print """ +************ LLDB found an exception ************ +There has been an uncaught exception. A possible cause could be that remote connection has been disconnected. +However, it is recommended that you report the exception to lldb/kernel debugging team about it. +************ Please run 'xnudebug debug enable' to start collecting logs. ************ + """ + raise + + if config['showTypeSummary']: + lldb.debugger.HandleCommand('type category enable kernel' ) + + if stream.pluginRequired : + plugin = LoadXNUPlugin(stream.pluginName) + if plugin == None : + print "Could not load plugins."+stream.pluginName + return + plugin.plugin_init(kern, config, lldb, kern.IsDebuggerConnected()) + return_data = plugin.plugin_execute(cmd_name, result.GetOutput()) + ProcessXNUPluginResult(return_data) + plugin.plugin_cleanup() + + #restore the verbose level after command is complete + config['verbosity'] = def_verbose_level + + return + + myglobals = globals() + command_function_name = obj.__name__+"Command" + myglobals[command_function_name] = _internal_command_function + command_function = myglobals[command_function_name] + if not obj.__doc__ : + print "ERROR: Cannot register command({:s}) without documentation".format(cmd_name) + return obj + command_function.__doc__ = obj.__doc__ + global lldb_command_documentation + if cmd_name in lldb_command_documentation: + lldb.debugger.HandleCommand("command script delete "+cmd_name) + lldb_command_documentation[cmd_name] = (obj.__name__, obj.__doc__.lstrip(), option_string) + lldb.debugger.HandleCommand("command script add -f " + MODULE_NAME + "." + command_function_name + " " + cmd_name) + return obj + return _cmd + +def lldb_alias(alias_name, cmd_line): + """ define an alias in the lldb command line. + A programatic way of registering an alias. This basically does + (lldb)command alias alias_name "cmd_line" + ex. + lldb_alias('readphys16', 'readphys 16') + """ + alias_name = alias_name.strip() + cmd_line = cmd_line.strip() + lldb.debugger.HandleCommand("command alias " + alias_name + " "+ cmd_line) + +def SetupLLDBTypeSummaries(reset=False): + global lldb_summary_definitions, MODULE_NAME + if reset == True: + lldb.debugger.HandleCommand("type category delete kernel ") + for single_type in lldb_summary_definitions.keys(): + summary_function = lldb_summary_definitions[single_type] + lldb_cmd = "type summary add \""+ single_type +"\" --category kernel --python-function " + MODULE_NAME + ".LLDBSummary" + summary_function.__name__ + debuglog(lldb_cmd) + lldb.debugger.HandleCommand(lldb_cmd) + if config['showTypeSummary']: + lldb.debugger.HandleCommand("type category enable kernel") + else: + lldb.debugger.HandleCommand("type category disable kernel") + + return + +def LoadXNUPlugin(name): + """ Try to load a plugin from the plugins directory. + """ + retval = None + name=name.strip() + try: + module_obj = __import__('plugins.'+name, globals(), locals(), [], -1) + module_obj = module_obj.__dict__[name] + defs = dir(module_obj) + if 'plugin_init' in defs and 'plugin_execute' in defs and 'plugin_cleanup' in defs: + retval = module_obj + else: + print "Plugin is not correctly implemented. Please read documentation on implementing plugins" + except: + print "plugin not found :"+name + + return retval + +def ProcessXNUPluginResult(result_data): + """ Look at the returned data from plugin and see if anymore actions are required or not + params: result_data - list of format (status, out_string, more_commands) + """ + ret_status = result_data[0] + ret_string = result_data[1] + ret_commands = result_data[2] + + if ret_status == False: + print "Plugin failed: " + ret_string + return + print ret_string + if len(ret_commands) >= 0: + for cmd in ret_commands: + print "Running command on behalf of plugin:" + cmd + lldb.debugger.HandleCommand(cmd) + return + +# holds tests registered with xnu. +#DONOTTOUCHME: Exclusive use of xnudebug_test only +lldb_command_tests = {} +def xnudebug_test(test_name): + """ A function decoratore to register a test with the framework. Each test is supposed to be of format + def Test(kernel_target, config, lldb_obj, isConnected ) + + NOTE: The testname should start with "Test" else exception will be raised. + """ + def _test(obj): + global lldb_command_tests + if obj.__name__.find("Test") != 0 : + print "Test name ", obj.__name__ , " should start with Test" + raise ValueError + lldb_command_tests[test_name] = (test_name, obj.__name__, obj, obj.__doc__) + return obj + return _test + + +# End Debugging specific utility functions +# Kernel Debugging specific classes and accessor methods + +# global access object for target kernel + +def GetObjectAtIndexFromArray(array_base, index): + """ Subscript indexing for arrays that are represented in C as pointers. + for ex. int *arr = malloc(20*sizeof(int)); + now to get 3rd int from 'arr' you'd do + arr[2] in C + GetObjectAtIndexFromArray(arr_val,2) + params: + array_base : core.value - representing a pointer type (ex. base of type 'ipc_entry *') + index : int - 0 based index into the array + returns: + core.value : core.value of the same type as array_base_val but pointing to index'th element + """ + array_base_val = array_base.GetSBValue() + base_address = array_base_val.GetValueAsUnsigned() + size = array_base_val.GetType().GetPointeeType().GetByteSize() + obj_address = base_address + (index * size) + obj = kern.GetValueFromAddress(obj_address, array_base_val.GetType().GetName()) + return Cast(obj, array_base_val.GetType()) + + +kern = None + +def GetLLDBThreadForKernelThread(thread_obj): + """ Get a reference to lldb.SBThread representation for kernel thread. + params: + thread_obj : core.cvalue - thread object of type thread_t + returns + lldb.SBThread - lldb thread object for getting backtrace/registers etc. + """ + tid = unsigned(thread_obj.thread_id) + lldb_process = LazyTarget.GetProcess() + sbthread = lldb_process.GetThreadByID(tid) + if not sbthread.IsValid(): + # in case lldb doesnt know about this thread, create one + if hasattr(lldb_process, "CreateOSPluginThread"): + debuglog("creating os plugin thread on the fly for {0:d} 0x{1:x}".format(tid, thread_obj)) + lldb_process.CreateOSPluginThread(tid, unsigned(thread_obj)) + else: + raise RuntimeError("LLDB process does not support CreateOSPluginThread.") + sbthread = lldb_process.GetThreadByID(tid) + + if not sbthread.IsValid(): + raise RuntimeError("Unable to find lldb thread for tid={0:d} thread = {1:#018x}".format(tid, thread_obj)) + + return sbthread + +def GetThreadBackTrace(thread_obj, verbosity = vHUMAN, prefix = ""): + """ Get a string to display back trace for a thread. + params: + thread_obj - core.cvalue : a thread object of type thread_t. + verbosity - int : either of vHUMAN, vSCRIPT or vDETAIL to describe the verbosity of output + prefix - str : a string prefix added before the line for each frame. + isContinuation - bool : is thread a continuation? + returns: + str - a multi line string showing each frame in backtrace. + """ + is_continuation = not bool(unsigned(thread_obj.kernel_stack)) + thread_val = GetLLDBThreadForKernelThread(thread_obj) + out_string = "" + kernel_stack = unsigned(thread_obj.kernel_stack) + reserved_stack = unsigned(thread_obj.reserved_stack) + if not is_continuation: + if kernel_stack and reserved_stack: + out_string += prefix + "reserved_stack = {:#018x}\n".format(reserved_stack) + out_string += prefix + "kernel_stack = {:#018x}\n".format(kernel_stack) + else: + out_string += prefix + "continuation =" + iteration = 0 + last_frame_p = 0 + for frame in thread_val.frames: + addr = frame.GetPCAddress() + load_addr = addr.GetLoadAddress(LazyTarget.GetTarget()) + function = frame.GetFunction() + frame_p = frame.GetFP() + mod_name = frame.GetModule().GetFileSpec().GetFilename() + + if iteration == 0 and not is_continuation: + out_string += prefix +"stacktop = {:#018x}\n".format(frame_p) + + if not function: + # No debug info for 'function'. + symbol = frame.GetSymbol() + file_addr = addr.GetFileAddress() + start_addr = symbol.GetStartAddress().GetFileAddress() + symbol_name = symbol.GetName() + symbol_offset = file_addr - start_addr + out_string += prefix + if not is_continuation: + out_string += "{fp:#018x} ".format(fp = frame_p) + out_string += "{addr:#018x} {mod}`{symbol} + {offset} \n".format(addr=load_addr, mod=mod_name, symbol=symbol_name, offset=symbol_offset) + else: + # Debug info is available for 'function'. + func_name = frame.GetFunctionName() + file_name = frame.GetLineEntry().GetFileSpec().GetFilename() + line_num = frame.GetLineEntry().GetLine() + func_name = '%s [inlined]' % func_name if frame.IsInlined() else func_name + if is_continuation and frame.IsInlined(): + debuglog("Skipping frame for thread {:#018x} since its inlined".format(thread_obj)) + continue + out_string += prefix + if not is_continuation: + out_string += "{fp:#018x} ".format(fp=frame_p) + out_string += "{addr:#018x} {func}{args} \n".format(addr=load_addr, + func=func_name, + file=file_name, line=line_num, + args="(" + (str(frame.arguments).replace("\n", ", ") if len(frame.arguments) > 0 else "void") + ")") + iteration += 1 + if frame_p: + last_frame_p = frame_p + + if not is_continuation and last_frame_p: + out_string += prefix + "stackbottom = {:#018x}".format(last_frame_p) + out_string = out_string.replace("variable not available","") + return out_string + +def GetSourceInformationForAddress(addr): + """ convert and address to function +offset information. + params: addr - int address in the binary to be symbolicated + returns: string of format "0xaddress: function + offset" + """ + symbols = kern.SymbolicateFromAddress(addr) + format_string = "{0:#018x} <{1:s} + {2:#0x}>" + offset = 0 + function_name = "" + if len(symbols) > 0: + s = symbols[0] + function_name = str(s.name) + offset = addr - s.GetStartAddress().GetLoadAddress(LazyTarget.GetTarget()) + if function_name == "": + function_name = "???" + return format_string.format(addr, function_name, offset) + +def GetFrameLocalVariable(variable_name, frame_no=0): + """ Find a local variable by name + params: + variable_name: str - name of variable to search for + returns: + core.value - if the variable is found. + None - if not found or not Valid + """ + retval = None + sbval = None + lldb_SBThread = LazyTarget.GetProcess().GetSelectedThread() + frame = lldb_SBThread.GetSelectedFrame() + if frame_no : + frame = lldb_SBThread.GetFrameAtIndex(frame_no) + if frame : + sbval = frame.FindVariable(variable_name) + if sbval and sbval.IsValid(): + retval = core.cvalue.value(sbval) + return retval + +# Begin Macros for kernel debugging + +@lldb_command('kgmhelp') +def KernelDebugCommandsHelp(cmd_args=None): + """ Show a list of registered commands for kenel debugging. + """ + global lldb_command_documentation + print "List of commands provided by " + MODULE_NAME + " for kernel debugging." + cmds = lldb_command_documentation.keys() + cmds.sort() + for cmd in cmds: + if type(lldb_command_documentation[cmd][-1]) == type(""): + print " {0: <20s} - {1}".format(cmd , lldb_command_documentation[cmd][1].split("\n")[0].strip()) + else: + print " {0: <20s} - {1}".format(cmd , "No help string found.") + print """ + Each of the functions listed here accept the following common options. + -h Show the help string for the command. + -o The output of this command execution will be saved to file. Parser information or errors will + not be sent to file though. eg /tmp/output.txt + -s The "filter_string" param is parsed to python regex expression and each line of output + will be printed/saved only if it matches the expression. + -v [-v...] Each additional -v will increase the verbosity of the command. + -p Send the output of the command to plugin. Please see README for usage of plugins. + + Additionally, each command implementation may have more options. "(lldb) help " will show these options. + """ + return None + + +@lldb_command('showraw') +def ShowRawCommand(cmd_args=None): + """ A command to disable the kernel summaries and show data as seen by the system. + This is useful when trying to read every field of a struct as compared to brief summary + """ + command = " ".join(cmd_args) + lldb.debugger.HandleCommand('type category disable kernel' ) + lldb.debugger.HandleCommand( command ) + lldb.debugger.HandleCommand('type category enable kernel' ) + + +@lldb_command('xnudebug') +def XnuDebugCommand(cmd_args=None): + """ command interface for operating on the xnu macros. Allowed commands are as follows + reload: + Reload a submodule from the xnu/tools/lldb directory. Do not include the ".py" suffix in modulename. + usage: xnudebug reload (eg. memory, process, stats etc) + test: + Start running registered test with from various modules. + usage: xnudebug test (eg. test_memstats) + testall: + Go through all registered tests and run them + debug: + Toggle state of debug configuration flag. + """ + global config + command_args = cmd_args + if len(command_args) == 0: + raise ArgumentError("No command specified.") + supported_subcommands = ['debug', 'reload', 'test', 'testall'] + subcommand = GetLongestMatchOption(command_args[0], supported_subcommands, True) + + if len(subcommand) == 0: + raise ArgumentError("Subcommand (%s) is not a valid command. " % str(command_args[0])) + + subcommand = subcommand[0].lower() + if subcommand == 'debug': + if command_args[-1].lower().find('dis') >=0 and config['debug']: + config['debug'] = False + print "Disabled debug logging." + elif command_args[-1].lower().find('dis') < 0 and not config['debug']: + config['debug'] = True + EnableLLDBAPILogging() # provided by utils.py + print "Enabled debug logging. \nPlease run 'xnudebug debug disable' to disable it again. " + + if subcommand == 'reload': + module_name = command_args[-1] + if module_name in sys.modules: + reload(sys.modules[module_name]) + print module_name + " is reloaded from " + sys.modules[module_name].__file__ + else: + print "Unable to locate module named ", module_name + if subcommand == 'testall': + for test_name in lldb_command_tests.keys(): + print "[BEGIN]", test_name + res = lldb_command_tests[test_name][2](kern, config, lldb, True) + if res: + print "[PASSED] {:s}".format(test_name) + else: + print "[FAILED] {:s}".format(test_name) + if subcommand == 'test': + test_name = command_args[-1] + if test_name in lldb_command_tests: + test = lldb_command_tests[test_name] + print "Running test {:s}".format(test[0]) + if test[2](kern, config, lldb, True) : + print "[PASSED] {:s}".format(test[0]) + else: + print "[FAILED] {:s}".format(test[0]) + return "" + else: + print "No such test registered with name: {:s}".format(test_name) + print "XNUDEBUG Available tests are:" + for i in lldb_command_tests.keys(): + print i + return None + + return False + +@lldb_command('showversion') +def ShowVersion(cmd_args=None): + """ Read the kernel version string from a fixed address in low + memory. Useful if you don't know which kernel is on the other end, + and need to find the appropriate symbols. Beware that if you've + loaded a symbol file, but aren't connected to a remote target, + the version string from the symbol file will be displayed instead. + This macro expects to be connected to the remote kernel to function + correctly. + + """ + print kern.version + + +@lldb_command('paniclog') +def ShowPanicLog(cmd_args=None): + """ Display the paniclog information + """ + panic_buf = kern.globals.debug_buf + panic_buf_start = addressof(panic_buf) + panic_buf_end = unsigned(kern.globals.debug_buf_ptr) + num_bytes = panic_buf_end - panic_buf_start + if num_bytes == 0 : + return + panic_data = panic_buf.GetSBValue().GetData() + err = lldb.SBError() + line = '' + for i in range(0, num_bytes): + c = panic_data.GetUnsignedInt8(err, i) + if chr(c) == '\n': + if line =='': + line = " " + print line + line = '' + else: + line += chr(c) + + if len(line) > 0: + print line + + return + +@lldb_command('showbootargs') +def ShowBootArgs(cmd_args=None): + """ Display boot arguments passed to the target kernel + """ + bootargs = Cast(kern.GetGlobalVariable('PE_state').bootArgs, 'boot_args *') + bootargs_cmd = bootargs.CommandLine + print str(bootargs_cmd) + +@static_var("last_process_uniq_id", 1) +def GetDebuggerStopIDValue(): + """ Create a unique session identifier. + returns: + int - a unique number identified by processid and stopid. + """ + stop_id = 0 + process_obj = LazyTarget.GetProcess() + if hasattr(process_obj, "GetStopID"): + stop_id = process_obj.GetStopID() + proc_uniq_id = 0 + if hasattr(process_obj, 'GetUniqueID'): + proc_uniq_id = process_obj.GetUniqueID() + #FIXME forces us to do this twice + proc_uniq_id = process_obj.GetUniqueID() + else: + GetDebuggerStopIDValue.last_process_uniq_id +=1 + proc_uniq_id = GetDebuggerStopIDValue.last_process_uniq_id + 1 + + stop_id_str = "{:d}:{:d}".format(proc_uniq_id, stop_id) + return hash(stop_id_str) + +# The initialization code to add your commands +_xnu_framework_init = False +def __lldb_init_module(debugger, internal_dict): + global kern, lldb_command_documentation, config, _xnu_framework_init + if _xnu_framework_init: + return + _xnu_framework_init = True + caching._GetDebuggerSessionID = GetDebuggerStopIDValue + debugger.HandleCommand('type summary add --regex --summary-string "${var%s}" -C yes -p -v "char \[[0-9]*\]"') + debugger.HandleCommand('type format add --format hex -C yes uintptr_t') + kern = KernelTarget(debugger) + print "xnu debug macros loaded successfully. Run showlldbtypesummaries to enable type summaries." + +__lldb_init_module(lldb.debugger, None) + +@lldb_command("showlldbtypesummaries") +def ShowLLDBTypeSummaries(cmd_args=[]): + """ Enable/Disable kernel type summaries. Default is disabled. + Usage: showlldbtypesummaries [enable|disable] + default is enable + """ + global config + action = "enable" + trailer_msg = '' + if len(cmd_args) > 0 and cmd_args[0].lower().find('disable') >=0: + action = "disable" + config['showTypeSummary'] = False + trailer_msg = "Please run 'showlldbtypesummaries enable' to enable the summary feature." + else: + config['showTypeSummary'] = True + SetupLLDBTypeSummaries(True) + trailer_msg = "Please run 'showlldbtypesummaries disable' to disable the summary feature." + lldb_run_command("type category "+ action +" kernel") + print "Successfully "+action+"d the kernel type summaries. %s" % trailer_msg + +from memory import * +from process import * +from ipc import * +from pmap import * +from ioreg import * +from mbufs import * +from net import * +from kdp import * +from userspace import * +from pci import * +from misc import * +from apic import * +from scheduler import * diff --git a/tools/lldbmacros/xnudefines.py b/tools/lldbmacros/xnudefines.py new file mode 100644 index 000000000..9762ba3ec --- /dev/null +++ b/tools/lldbmacros/xnudefines.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python + +""" This file holds all static values that debugging macros need. These are typically object type strings, #defines in C etc. + The objective is to provide a single place to be the bridge between C code in xnu and the python macros used by lldb. + If you define a variable which has been copied/referred over from C code and has high chance of changing over time. It would + be best to define a supporting function of format "populate_". This will help in running them to repopulate. + + Please take a look at example of kobject_types below before making changes to this file. + Note: The Format of the function has to be populate_ so that the automated updating will pick it up. +""" +import os, re + +lcpu_self = 0xFFFE +arm_level2_access_strings = [ " noaccess", + " supervisor(readwrite) user(noaccess)", + " supervisor(readwrite) user(readonly)", + " supervisor(readwrite) user(readwrite)", + " noaccess(reserved)", + " supervisor(readonly) user(noaccess)", + " supervisor(readonly) user(readonly)", + " supervisor(readonly) user(readonly)", + " " + ] +proc_state_strings = [ "", "Idle", "Run", "Sleep", "Stop", "Zombie", "Reaping" ] +proc_flag_explain_strings = ["!0x00000004 - process is 32 bit", #only exception that does not follow bit settings + "0x00000001 - may hold advisory locks", + "0x00000002 - has a controlling tty", + "0x00000004 - process is 64 bit", + "0x00000008 - no SIGCHLD on child stop", + "0x00000010 - waiting for child exec/exit", + "0x00000020 - has started profiling", + "0x00000040 - in select; wakeup/waiting danger", + "0x00000080 - was stopped and continued", + "0x00000100 - has set privileges since exec", + "0x00000200 - system process: no signals, stats, or swap", + "0x00000400 - timing out during a sleep", + "0x00000800 - debugged process being traced", + "0x00001000 - debugging process has waited for child", + "0x00002000 - exit in progress", + "0x00004000 - process has called exec", + "0x00008000 - owe process an addupc() XXX", + "0x00010000 - affinity for Rosetta children", + "0x00020000 - wants to run Rosetta", + "0x00040000 - has wait() in progress", + "0x00080000 - kdebug tracing on for this process", + "0x00100000 - blocked due to SIGTTOU or SIGTTIN", + "0x00200000 - has called reboot()", + "0x00400000 - is TBE state", + "0x00800000 - signal exceptions", + "0x01000000 - has thread cwd", + "0x02000000 - has vfork() children", + "0x04000000 - not allowed to attach", + "0x08000000 - vfork() in progress", + "0x10000000 - no shared libraries", + "0x20000000 - force quota for root", + "0x40000000 - no zombies when children exit", + "0x80000000 - don't hang on remote FS ops" + ] +#File: xnu/osfmk/kern/ipc_kobject.h +# string representations for Kobject types +kobject_types = ['', 'THREAD', 'TASK', 'HOST', 'HOST_PRIV', 'PROCESSOR', 'PSET', 'PSET_NAME', 'TIMER', 'PAGER_REQ', 'DEVICE', 'XMM_OBJECT', 'XMM_PAGER', 'XMM_KERNEL', 'XMM_REPLY', + 'NOTDEF 15', 'NOTDEF 16', 'HOST_SEC', 'LEDGER', 'MASTER_DEV', 'ACTIVATION', 'SUBSYTEM', 'IO_DONE_QUE', 'SEMAPHORE', 'LOCK_SET', 'CLOCK', 'CLOCK_CTRL' , 'IOKIT_SPARE', + 'NAMED_MEM', 'IOKIT_CON', 'IOKIT_OBJ', 'UPL', 'MEM_OBJ_CONTROL', 'AU_SESSIONPORT', 'FILEPORT', 'LABELH'] + +def populate_kobject_types(xnu_dir_path): + """ Function to read data from header file xnu/osfmk/kern/ipc_kobject.h + and populate the known kobject types. + """ + filename = os.path.join(xnu_dir_path, 'osfmk', 'kern', 'ipc_kobject.h') + filedata = open(filename).read() + object_regex = re.compile("^#define\s+(IKOT_[A-Z_]*)\s+(\d+)\s*",re.MULTILINE|re.DOTALL) + kobject_found_types =[] + for v in object_regex.findall(filedata): + kobject_found_types.append(v[0]) + return kobject_found_types + +if __name__ == "__main__": + populate_kobject_types("../../") + \ No newline at end of file diff --git a/tools/remote_build.sh b/tools/remote_build.sh new file mode 100755 index 000000000..e3fcd5b2d --- /dev/null +++ b/tools/remote_build.sh @@ -0,0 +1,221 @@ +#!/bin/bash +# +# Script that rsyncs a source/build tree to a remote server, performs a build, +# and copies the result back +# + +# This script is invoked instead of the initial recursive make(1) in ./Makefile. +# First it must cache all binaries that might be used during the build by +# calling "make print_exports" (any target would work) with an overriden xcrun(1) +# which caches tools an SDKs into ./BUILD/obj/BuildTools. When the combined +# source+build tree is rsync-ed to the remote server, we run a script to +# re-initiate the build using an overriden xcrun(1) which hands back +# cached tools in ./BUILD/obj/BuildTools instead of whatever Xcode tools are on +# the remote system (or if no Xcode tools are installed remotely). Finally, +# the build results are copied back locally. +# + +function die() { + echo "$1" 1>&2 + exit 1 +} + + +TARGET= +declare -a ARGS +declare -a REMOTEARGS +index=0 +for arg in "$@"; do + case $arg in + _REMOTEBUILD_TARGET=*) + TARGET=`echo $arg | awk -F= '{print $2}'` + continue + ;; + _REMOTEBUILD_MAKE=*) + MAKE=`echo $arg | awk -F= '{print $2}'` + continue + ;; + REMOTEBUILD=*) + # Don't restart another remote build remotely + ;; + SRCROOT=*) + continue + ;; + OBJROOT=*) + continue + ;; + SYMROOT=*) + continue + ;; + DSTROOT=*) + continue + ;; + CCHROOT=*) + continue + ;; + RC_XBS=*) + # Remote build isn't chrooted or special in any way + arg="VERBOSE=YES" + continue + ;; + VERBOSE=YES) + set -x + ;; + esac + ARGS[$index]="$arg" + REMOTEARGS[$index]="\"$arg\"" + index=$(($index+1)) +done + + +ARGS[$index]="REMOTEBUILD=" +REMOTEARGS[$index]="\"REMOTEBUILD=\"" + +# For some targets like installsrc, we can't to a remote build +SKIPREMOTE=0 +case $TARGET in + clean) + SKIPREMOTE=1 + ;; + installsrc) + SKIPREMOTE=1 + ;; + installopensource) + SKIPREMOTE=1 + ;; + cscope) + SKIPREMOTE=1 + ;; + tags) + SKIPREMOTE=1 + ;; + help) + SKIPREMOTE=1 + ;; +esac + +if [ $SKIPREMOTE -eq 1 ]; then + exec "$MAKE" "$TARGET" "${ARGS[@]}" +fi + +SRC="$(pwd -P)" +SRCNAME="$(basename $SRC)" + +# Pick up build locations passed in the environment +OBJROOT="${OBJROOT}" +SYMROOT="${SYMROOT}" +DSTROOT="${DSTROOT}" + +if [ -z "${OBJROOT}" ]; then + die "OBJROOT not set in environment" +fi +mkdir -p "${OBJROOT}" || die "Could not create ${OBJROOT}" + +if [ -z "${SYMROOT}" ]; then + die "SYMROOT not set in environment" +fi +mkdir -p "${SYMROOT}" || die "Could not create ${SYMROOT}" + +if [ -z "${DSTROOT}" ]; then + die "DSTROOT not set in environment" +fi +mkdir -p "${DSTROOT}" || die "Could not create ${DSTROOT}" + +if [ "$REMOTEBUILD" = "$SPECIALREMOTEBUILD" ]; then + : +else + DOINSTALLSRC=0 + REMOTE_SRCREL="./" + BUILDTOOLSDIR="$OBJROOT" + REMOTE_BUILDTOOLSREL="./BUILD/obj" + BUILDSCRIPTDIR="$OBJROOT" + REMOTE_BUILDSCRIPTREL="./BUILD/obj" + BUILDSCRIPTNAME="build.sh" + if [ ! -d "${OBJROOT}/SETUP" ]; then + RSYNC_ARGS="--delete-excluded" + else + RSYNC_ARGS="" + fi + TARBUILDDIRS=0 +fi + +echo "Caching build tools..." 1>&2 +mkdir -p "${BUILDTOOLSDIR}" || die "Could not create BUILDTOOLSDIR" +$MAKE print_exports "${ARGS[@]}" XCRUN="${SRC}/tools/xcrun_cache.sh -c \"${BUILDTOOLSDIR}\"" >/dev/null || die "Could not cache build tools" + +# Cache the make(1) binary itself +MAKE_SDKROOT=`"${SRC}/tools/xcrun_cache.sh" -u "${BUILDTOOLSDIR}" -sdk / -show-sdk-path` +"${SRC}/tools/xcrun_cache.sh" -c "${BUILDTOOLSDIR}" -sdk "${MAKE_SDKROOT}" -find make >/dev/null || die "Could not cache make" + +# Create a canned build script that can restart the build on the remote server. +mkdir -p "${BUILDSCRIPTDIR}" || die "Could not create BUILDSCRIPTDIR" +cat > "${BUILDSCRIPTDIR}/${BUILDSCRIPTNAME}" <&2 + + ssh $REMOTEBUILD "mkdir -p \"${REMOTEBUILDPATH}/BUILD/obj\"" || die "Could not make remote build directory" + + # Copy source only + rsync -azv --delete --exclude=\*~ --exclude=.svn --exclude=.git --exclude=/BUILD . $REMOTEBUILD:"${REMOTEBUILDPATH}" || die "Could not rsync source tree" + + # Copy partial OBJROOT (just build tools and build script), and optionally delete everything else + rsync -azv --delete $RSYNC_ARGS --include=/build.sh --include=/BuildTools --include=/BuildTools/\*\* --exclude=\* "${OBJROOT}/" $REMOTEBUILD:"${REMOTEBUILDPATH}/BUILD/obj/" || die "Could not rsync build tree" + + # Start the build + echo ssh $REMOTEBUILD "cd \"${REMOTEBUILDPATH}\" && ${REMOTE_BUILDSCRIPTREL}/${BUILDSCRIPTNAME}" 1>&2 + ssh $REMOTEBUILD "cd \"${REMOTEBUILDPATH}\" && ${REMOTE_BUILDSCRIPTREL}/${BUILDSCRIPTNAME}" || die "Could not complete remote build" + + # Copy back build results except for object files (which might be several GB) + echo "Copying results back..." + rsync -azv --no-o --no-g --exclude=\*.o --exclude=\*.cpo --exclude=\*.d --exclude=\*.cpd --exclude=\*.non_lto --exclude=\*.ctf --exclude=conf $REMOTEBUILD:"${REMOTEBUILDPATH}/BUILD/obj/" "${OBJROOT}/" || die "Could not rsync build results" + rsync -azv --no-o --no-g $REMOTEBUILD:"${REMOTEBUILDPATH}/BUILD/sym/" "${SYMROOT}/" || die "Could not rsync build results" + rsync -azv --no-o --no-g $REMOTEBUILD:"${REMOTEBUILDPATH}/BUILD/dst/" "${DSTROOT}/" || die "Could not rsync build results" + +fi + +exit 0 diff --git a/tools/symbolify.py b/tools/symbolify.py index dde29a732..6ef18146b 100755 --- a/tools/symbolify.py +++ b/tools/symbolify.py @@ -4,6 +4,8 @@ import re import sys import os +SLIDE = 0 + NM_FORMAT = "([0-9a-f]+) ([UuAaTtDdBbCcSsIi]) (.*)" nm_re = re.compile(NM_FORMAT) @@ -34,6 +36,8 @@ class SymbolLookup: def __call__(self, saddr): addr = int(saddr.group(0), 16) last = (0, ' ', '') + if( addr > SLIDE ): + addr -= SLIDE # stupid linear search... feel free to improve for s in self.symbols: if s[0] == addr: @@ -56,7 +60,7 @@ def symbolify(objfile, input, *args, **kargs): def usage(): - print "usage: %s [filename]" % sys.argv[0] + print "usage: %s [filename] [slide]" % sys.argv[0] print "\tor speficy a filename in your SYMBOLIFY_KERNEL environment variable" # die now @@ -64,10 +68,13 @@ def usage(): KERNEL_FILE = None -if( len(sys.argv) > 2 ): +if( len(sys.argv) > 3 ): usage() -if( len(sys.argv) == 2 ): +if( len(sys.argv) == 3 ): + SLIDE = int(sys.argv[2], 16) + +if( len(sys.argv) >= 2 ): KERNEL_FILE = sys.argv[1] if( KERNEL_FILE is None ): @@ -76,7 +83,7 @@ if( KERNEL_FILE is None ): if( KERNEL_FILE is None ): usage() -print "using kernel file '%s'" % KERNEL_FILE +print "using kernel file '%s', slide 0x%x" % (KERNEL_FILE, SLIDE) symbolify(KERNEL_FILE, sys.stdin, min_width=40) diff --git a/tools/tests/MPMMTest/Makefile b/tools/tests/MPMMTest/Makefile index 0421a718a..914f680a0 100644 --- a/tools/tests/MPMMTest/Makefile +++ b/tools/tests/MPMMTest/Makefile @@ -1,27 +1,62 @@ -CFLAGS=-g -O2 -arch i386 -CFLAGS64=-g -O2 -arch x86_64 +SDKROOT ?= / +ifeq "$(RC_TARGET_CONFIG)" "iPhone" +Embedded?=YES +else +Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO) +endif -TARGETS = MPMMtest MPMMtest_64 KQMPMMtest KQMPMMtest_64 KQMPMMtestD KQMPMMtest_64D +CC:=xcrun -sdk "$(SDKROOT)" cc -all: $(TARGETS) +SYMROOT?=$(shell /bin/pwd) -MPMMtest: MPMMtest.c - ${CC} ${CFLAGS} -o $@ $? +CFLAGS := -g -O2 -MPMMtest_64: MPMMtest.c - ${CC} ${CFLAGS64} -o $@ $? +ifdef RC_ARCHS + ARCHS:=$(RC_ARCHS) + else + ifeq "$(Embedded)" "YES" + ARCHS:=armv7 armv7s + else + ARCHS:=x86_64 i386 + endif +endif -KQMPMMtest: KQMPMMtest.c - ${CC} ${CFLAGS} -o $@ $? +ARCH_32 := $(filter-out %64, $(ARCHS)) +ARCH_32_FLAGS := $(patsubst %, -arch %, $(ARCH_32)) +ARCH_64 := $(filter %64, $(ARCHS)) +ARCH_64_FLAGS := $(patsubst %, -arch %, $(ARCH_64)) -KQMPMMtest_64: KQMPMMtest.c - ${CC} ${CFLAGS64} -o $@ $? +DSTROOT?=$(shell /bin/pwd) -KQMPMMtestD: KQMPMMtest.c - ${CC} ${CFLAGS} -DDIRECT_MSG_RCV=1 -o $@ $? +ARCH_32_TARGETS := MPMMtest KQMPMMtest KQMPMMtestD +ARCH_64_TARGETS := MPMMtest_64 KQMPMMtest_64 KQMPMMtest_64D +TARGETS := $(if $(ARCH_64), $(ARCH_64_TARGETS)) $(if $(ARCH_32), $(ARCH_32_TARGETS)) -KQMPMMtest_64D: KQMPMMtest.c - ${CC} ${CFLAGS64} -DDIRECT_MSG_RCV=1 -o $@ $? +all: $(addprefix $(DSTROOT)/, $(TARGETS)) + +$(DSTROOT)/MPMMtest: MPMMtest.c + ${CC} ${CFLAGS} ${ARCH_32_FLAGS} -o $(SYMROOT)/$(notdir $@) $? + if [ ! -e $@ ]; then ditto $(SYMROOT)/$(notdir $@) $@; fi + +$(DSTROOT)/MPMMtest_64: MPMMtest.c + ${CC} ${CFLAGS} ${ARCH_64_FLAGS} -o $(SYMROOT)/$(notdir $@) $? + if [ ! -e $@ ]; then ditto $(SYMROOT)/$(notdir $@) $@; fi + +$(DSTROOT)/KQMPMMtest: KQMPMMtest.c + ${CC} ${CFLAGS} ${ARCH_32_FLAGS} -o $(SYMROOT)/$(notdir $@) $? + if [ ! -e $@ ]; then ditto $(SYMROOT)/$(notdir $@) $@; fi + +$(DSTROOT)/KQMPMMtest_64: KQMPMMtest.c + ${CC} ${CFLAGS} ${ARCH_64_FLAGS} -o $(SYMROOT)/$(notdir $@) $? + if [ ! -e $@ ]; then ditto $(SYMROOT)/$(notdir $@) $@; fi + +$(DSTROOT)/KQMPMMtestD: KQMPMMtest.c + ${CC} ${CFLAGS} ${ARCH_32_FLAGS} -DDIRECT_MSG_RCV=1 -o $(SYMROOT)/$(notdir $@) $? + if [ ! -e $@ ]; then ditto $(SYMROOT)/$(notdir $@) $@; fi + +$(DSTROOT)/KQMPMMtest_64D: KQMPMMtest.c + ${CC} ${CFLAG} ${ARCH_64_FLAGS} -DDIRECT_MSG_RCV=1 -o $(SYMROOT)/$(notdir $@) $? + if [ ! -e $@ ]; then ditto $(SYMROOT)/$(notdir $@) $@; fi clean: - rm -rf $(TARGETS) *.dSYM + rm -rf $(addprefix $(DSTROOT)/,$(TARGETS)) $(addprefix $(SYMROOT)/,$(TARGETS)) $(SYMROOT)/*.dSYM diff --git a/tools/tests/Makefile b/tools/tests/Makefile new file mode 100644 index 000000000..e69f54bcd --- /dev/null +++ b/tools/tests/Makefile @@ -0,0 +1,40 @@ +ifdef RC_ProjectName +DSTSUBPATH = $(DSTROOT)/AppleInternal/CoreOS +else +DSTSUBPATH = $(DSTROOT) +endif + +OBJROOT?=$(shell /bin/pwd) + +ifeq "$(RC_TARGET_CONFIG)" "iPhone" +Embedded?=YES +else +Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO) +endif + +COMMON_TARGETS = xnu_quick_test \ + MPMMTest \ + affinity \ + kqueue_tests \ + superpages \ + zero-to-n \ + jitter \ + perf_index \ + unit_tests + +IPHONE_TARGETS = memorystatus + +MAC_TARGETS = + +ifeq "$(Embedded)" "YES" +TARGETS = $(addprefix $(DSTSUBPATH)/, $(COMMON_TARGETS) $(IPHONE_TARGETS)) +else +TARGETS = $(addprefix $(DSTSUBPATH)/, $(COMMON_TARGETS) $(MAC_TARGETS)) +endif + +all: $(TARGETS) + +$(DSTSUBPATH)/%: + mkdir -p $@ + mkdir -p $(OBJROOT)/$(notdir $@) + $(MAKE) -C $(SRCROOT)/$(notdir $@) SRCROOT=$(SRCROOT)/$(notdir $@) DSTROOT=$@ OBJROOT=$(OBJROOT)/$(notdir $@) diff --git a/tools/tests/affinity/Makefile b/tools/tests/affinity/Makefile index b8563d54e..e66c7fe16 100644 --- a/tools/tests/affinity/Makefile +++ b/tools/tests/affinity/Makefile @@ -1,18 +1,52 @@ -CFLAGS =-g -arch i386 -CFLAGS64=-g -arch x86_64 +SDKROOT ?= / +ifeq "$(RC_TARGET_CONFIG)" "iPhone" +Embedded?=YES +else +Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO) +endif -TESTS = \ - sets \ - pool \ - tags \ +CC:=xcrun -sdk "$(SDKROOT)" cc -TARGETS = $(TESTS) $(TESTS:=64) +SYMROOT?=$(shell /bin/pwd) -all: $(TESTS) +ifdef RC_ARCHS + ARCHS:=$(RC_ARCHS) + else + ifeq "$(Embedded)" "YES" + ARCHS:=armv7 armv7s + else + ARCHS:=x86_64 i386 + endif +endif -.c: - gcc $(CFLAGS) $< -o $@ # 32-bit fat - gcc $(CFLAGS64) $< -o $@64 # 64-bit fat +ARCH_32 := $(filter-out %64, $(ARCHS)) +ARCH_32_FLAGS := $(patsubst %, -arch %, $(ARCH_32)) +ARCH_64 := $(filter %64, $(ARCHS)) +ARCH_64_FLAGS := $(patsubst %, -arch %, $(ARCH_64)) + +CFLAGS :=-g + +DSTROOT?=$(shell /bin/pwd) +SRCROOT?=$(shell /bin/pwd) + +SOURCES := sets.c pool.c tags.c +ARCH_32_TARGETS := $(addprefix $(DSTROOT)/, sets pool tags) +ARCH_64_TARGETS := $(addprefix $(DSTROOT)/, sets64 pool64 tags64) + +TARGETS := $(if $(ARCH_64), $(ARCH_64_TARGETS)) $(if $(ARCH_32), $(ARCH_32_TARGETS)) + + +all: $(TARGETS) + +$(ARCH_32_TARGETS): $(DSTROOT)/%: $(SRCROOT)/%.c + $(CC) $(CFLAGS) $(ARCH_32_FLAGS) $< -o $(SYMROOT)/$(notdir $@) # 32-bit fat + if [ ! -e $@ ]; then ditto $(SYMROOT)/$(notdir $@) $@; fi + +.SECONDEXPANSION: +$(ARCH_64_TARGETS): $(DSTROOT)/%: $(SRCROOT)/$$(subst 64,,%).c + $(CC) $(CFLAGS) $(ARCH_64_FLAGS) $< -o $(SYMROOT)/$(notdir $@) # 64-bit fat + if [ ! -e $@ ]; then ditto $(SYMROOT)/$(notdir $@) $@; fi clean: - rm -rf $(TARGETS) + rm -f $(TARGETS) + rm -rf $(SYMROOT)/*.dSYM diff --git a/tools/tests/affinity/sets.c b/tools/tests/affinity/sets.c index 50eda2626..cbaedcdef 100644 --- a/tools/tests/affinity/sets.c +++ b/tools/tests/affinity/sets.c @@ -13,6 +13,7 @@ #include #include #include +#include /* * Sets is a multithreaded test/benchmarking program to evaluate @@ -286,7 +287,7 @@ manager_fn(void *arg) static void auto_config(int npages, int *nbufs, int *nsets) { - int len; + size_t len; int ncpu; int llc; int64_t cacheconfig[MAX_CACHE_DEPTH]; diff --git a/tools/tests/jitter/Makefile b/tools/tests/jitter/Makefile index ade16e7f4..4b679bbea 100644 --- a/tools/tests/jitter/Makefile +++ b/tools/tests/jitter/Makefile @@ -1,16 +1,40 @@ +SDKROOT ?= / +ifeq "$(RC_TARGET_CONFIG)" "iPhone" +Embedded?=YES +else +Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO) +endif -ARCHS=x86_64 i386 -SDKROOT=/ -CC=xcrun -sdk "$(SDKROOT)" cc -CFLAGS=$(patsubst %, -arch %,$(ARCHS)) -g -Wall -Os -isysroot $(SDKROOT) +OBJROOT?=$(shell /bin/pwd) -all: jitter +CC:=xcrun -sdk "$(SDKROOT)" cc -timer_jitter.o: timer_jitter.c +ifdef RC_ARCHS + ARCHS:=$(RC_ARCHS) + else + ifeq "$(Embedded)" "YES" + ARCHS:=armv7 armv7s + else + ARCHS:=x86_64 i386 + endif +endif + + + +DSTROOT?=$(shell /bin/pwd) + +CFLAGS:=$(patsubst %, -arch %,$(ARCHS)) -g -Wall -Os $(ISYSROOT) + +all: $(DSTROOT)/jitter + +$(OBJROOT)/timer_jitter.o: timer_jitter.c $(CC) -c -o $@ $< $(CFLAGS) -cpu_number.o: cpu_number.s +$(OBJROOT)/cpu_number.o: cpu_number.s $(CC) -c -o $@ $< $(CFLAGS) -jitter: timer_jitter.o cpu_number.o +$(DSTROOT)/jitter: $(OBJROOT)/timer_jitter.o $(OBJROOT)/cpu_number.o $(CC) -o $@ $^ $(CFLAGS) + +clean: + rm -f $(DSTROOT)/jitter $(OBJROOT)/*.o diff --git a/tools/tests/jitter/cpu_number.s b/tools/tests/jitter/cpu_number.s index 2d29bb0cd..77c95875c 100644 --- a/tools/tests/jitter/cpu_number.s +++ b/tools/tests/jitter/cpu_number.s @@ -10,7 +10,7 @@ _cpu_number: sub $16,%rsp // space to read IDTR sidt (%rsp) // store limit:base on stack - movw (%rsp), %rax // get limit + movw (%rsp), %ax // get limit and $0xfff, %rax // mask off lower 12 bits to return mov %rbp,%rsp diff --git a/tools/tests/kqueue_tests/Makefile b/tools/tests/kqueue_tests/Makefile index b51ccd631..7bd1baacd 100755 --- a/tools/tests/kqueue_tests/Makefile +++ b/tools/tests/kqueue_tests/Makefile @@ -1,7 +1,36 @@ -all: file timer +SDKROOT ?= / +ifeq "$(RC_TARGET_CONFIG)" "iPhone" +Embedded?=YES +else +Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO) +endif -file: - gcc -o file_tests kqueue_file_tests.c -arch i386 +CC:=xcrun -sdk "$(SDKROOT)" cc -timer: - gcc -o timer_tests kqueue_timer_tests.c -arch i386 -arch x86_64 +ifdef RC_ARCHS + ARCHS:=$(RC_ARCHS) + else + ifeq "$(Embedded)" "YES" + ARCHS:=armv7 armv7s + else + ARCHS:=x86_64 i386 + endif +endif + +CFLAGS :=-g $(patsubst %, -arch %,$(ARCHS)) + +DSTROOT?=$(shell /bin/pwd) +SYMROOT?=$(shell /bin/pwd) + +all: $(addprefix $(DSTROOT)/, file timer) + +$(DSTROOT)/file: + $(CC) $(CFLAGS) -o $(SYMROOT)/file_tests kqueue_file_tests.c + if [ ! -e $(DSTROOT)/file_tests ]; then ditto $(SYMROOT)/file_tests $(DSTROOT)/file_tests; fi + +$(DSTROOT)/timer: + $(CC) $(CFLAGS) -o $(SYMROOT)/timer_tests kqueue_timer_tests.c + if [ ! -e $(DSTROOT)/timer_tests ]; then ditto $(SYMROOT)/timer_tests $(DSTROOT)/timer_tests; fi + +clean: + rm -rf $(DSTROOT)/file_tests $(DSTROOT)/timer_tests $(SYMROOT)/*.dSYM $(SYMROOT)/file_tests $(SYMROOT)/timer_tests diff --git a/tools/tests/kqueue_tests/kqueue_file_tests.c b/tools/tests/kqueue_tests/kqueue_file_tests.c index cef98009e..a4461b1e7 100644 --- a/tools/tests/kqueue_tests/kqueue_file_tests.c +++ b/tools/tests/kqueue_tests/kqueue_file_tests.c @@ -13,6 +13,7 @@ #include #include #include +#include #define DIR1 "dir1" #define DOTDOT ".." diff --git a/tools/tests/kqueue_tests/kqueue_timer_tests.c b/tools/tests/kqueue_tests/kqueue_timer_tests.c index 4111af382..9a4db010e 100644 --- a/tools/tests/kqueue_tests/kqueue_timer_tests.c +++ b/tools/tests/kqueue_tests/kqueue_timer_tests.c @@ -5,6 +5,7 @@ #include #include #include +#include int kq, passed, failed; @@ -185,6 +186,7 @@ test_repeating_kevent(int usec) } } +void test_updated_kevent(int first, int second) { struct kevent64_s kev; diff --git a/tools/tests/libMicro/bench.sh b/tools/tests/libMicro/bench.sh old mode 100644 new mode 100755 diff --git a/tools/tests/libMicro/benchDS.sh b/tools/tests/libMicro/benchDS.sh old mode 100644 new mode 100755 diff --git a/tools/tests/libMicro/coreos_bench.sh b/tools/tests/libMicro/coreos_bench.sh old mode 100644 new mode 100755 diff --git a/tools/tests/libMicro/create_stuff.sh b/tools/tests/libMicro/create_stuff.sh old mode 100644 new mode 100755 diff --git a/tools/tests/libMicro/embd_bench.sh b/tools/tests/libMicro/embd_bench.sh old mode 100644 new mode 100755 diff --git a/tools/tests/libMicro/multiview.sh b/tools/tests/libMicro/multiview.sh old mode 100644 new mode 100755 diff --git a/tools/tests/libMicro/od_account_create.sh b/tools/tests/libMicro/od_account_create.sh old mode 100644 new mode 100755 diff --git a/tools/tests/libMicro/od_account_delete.sh b/tools/tests/libMicro/od_account_delete.sh old mode 100644 new mode 100755 diff --git a/tools/tests/libMicro/wrapper.sh b/tools/tests/libMicro/wrapper.sh old mode 100644 new mode 100755 diff --git a/tools/tests/memorystatus/Makefile b/tools/tests/memorystatus/Makefile new file mode 100644 index 000000000..e158b0274 --- /dev/null +++ b/tools/tests/memorystatus/Makefile @@ -0,0 +1,38 @@ +#!/usr/bin/make + +DSTROOT?=$(shell /bin/pwd) +CC:=clang +CFLAGS:=-I. + +ifdef RC_ARCHS + ARCH:=$(RC_ARCHS) + else + ifeq "$(Embedded)" "YES" + ARCH:=armv7 armv7s + else + ARCH:=x86_64 i386 + endif +endif + +ifeq "$(RC_TARGET_CONFIG)" "iPhone" +Embedded?=YES +else +Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO) +endif + +ifeq "$(Embedded)" "NO" + SDKROOT:=$(shell xcodebuild -sdk macosx.internal -version Path) +else + SDKROOT:=$(shell xcodebuild -sdk iphoneos.internal -version Path) +endif + +MY_ARCH := $(patsubst %, -arch %, $(ARCH)) # allows building multiple archs. + +all: $(DSTROOT)/memorystatus + +$(DSTROOT)/memorystatus: memorystatus.c + xcrun -sdk $(SDKROOT) $(CC) $(MY_ARCH) -framework CoreFoundation -framework ServiceManagement -F $(SDKROOT)/System/Library/PrivateFrameworks/ -isystem $(SDKROOT)/System/Library/Frameworks/System.framework/PrivateHeaders -o $@ memorystatus.c $(CFLAGS) + codesign -f -s - $@ + +clean: + rm -f $(DSTROOT)/memorystatus diff --git a/tools/tests/memorystatus/memorystatus.c b/tools/tests/memorystatus/memorystatus.c new file mode 100644 index 000000000..337fc3404 --- /dev/null +++ b/tools/tests/memorystatus/memorystatus.c @@ -0,0 +1,1808 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include + +#include + +#include +#include + +#define CR_JOB "com.apple.ReportCrash.Jetsam" +#define CR_JOB_PLIST_PATH "/System/Library/LaunchDaemons/com.apple.ReportCrash.Jetsam.plist" + +#define ERR_BUF_LEN 1024 + +#ifndef VM_PAGE_SIZE +#define VM_PAGE_SIZE 4096 +#endif + +/* + * TODO: import header (currently vm_pageout.h) without pulling in extraneous definitions; + * see . + */ +#ifndef VM_PAGER_FREEZER_DEFAULT +#define VM_PAGER_FREEZER_DEFAULT 0x8 /* Freezer backed by default pager.*/ +#endif + +/* + * Special note to ourselves: the jetsam cause to look out for is *either* + * a high watermark kill, *or* a per-process kill. + */ +#define CAUSE_HIWAT_OR_PERPROC -1 + +typedef enum jetsam_test { + kSimpleJetsamTest = 1, + kPressureJetsamTestFG, + kPressureJetsamTestBG, + kHighwaterJetsamTest, + kVnodeJetsamTest, + kBackgroundJetsamTest +} jetsam_test_t; + +typedef enum idle_exit_test { + kDeferTimeoutCleanTest = 1, + kDeferTimeoutDirtyTest, + kCancelTimeoutCleanTest, + kCancelTimeoutDirtyTest +} idle_exit_test_t; + +typedef struct shared_mem_t { + pthread_mutex_t mutex; + pthread_cond_t cv; + boolean_t completed; + boolean_t pressure_event_fired; +} shared_mem_t; + +shared_mem_t *g_shared = NULL; +unsigned long g_physmem = 0; +int g_ledger_count = -1, g_footprint_index = -1; +int64_t g_per_process_limit = -1; + +#if TARGET_OS_EMBEDDED +static boolean_t set_priority(pid_t pid, int32_t priority, uint64_t user_data); +#endif + +extern int ledger(int cmd, caddr_t arg1, caddr_t arg2, caddr_t arg3); +static boolean_t check_properties(pid_t pid, int32_t requested_priority, int32_t requested_limit_mb, uint64_t requested_user_data, const char *test); + +/* Utilities. */ + +static void +printTestHeader(pid_t testPid, const char *testName, ...) +{ + va_list va; + printf("========================================\n"); + printf("[TEST] "); + va_start(va, testName); + vprintf(testName, va); + va_end(va); + printf("\n"); + printf("[PID] %d\n", testPid); + printf("========================================\n"); + printf("[BEGIN]\n"); +} + +static void +printTestResult(const char *testName, boolean_t didPass, const char *msg, ...) +{ + if (msg != NULL) { + va_list va; + printf("\t\t"); + va_start(va, msg); + vprintf(msg, va); + va_end(va); + printf("\n"); + } + if (didPass) { + printf("[PASS]\t%s\n\n", testName); + } else { + printf("[FAIL]\t%s\n\n", testName); + } +} + +static CFDictionaryRef create_dictionary_from_plist(const char *path) { + void *bytes = NULL; + CFDataRef data = NULL; + CFDictionaryRef options = NULL; + size_t bufLen; + int fd = open(path, O_RDONLY, 0); + if (fd == -1) { + goto exit; + } + struct stat sb; + if (fstat(fd, &sb) == -1) { + goto exit; + } + + bufLen = (size_t)sb.st_size; + bytes = malloc(bufLen); + if (bytes == NULL) { + goto exit; + } + + if (read(fd, bytes, bufLen) != bufLen) { + goto exit; + } + + data = CFDataCreateWithBytesNoCopy(kCFAllocatorDefault, (const UInt8 *) bytes, bufLen, kCFAllocatorNull); + if (data == NULL) { + goto exit; + } + + options = (CFDictionaryRef) CFPropertyListCreateFromXMLData(kCFAllocatorDefault, data, kCFPropertyListImmutable, NULL); + if (options == NULL) { + } + +exit: + if (data != NULL) { + CFRelease(data); + } + if (bytes != NULL) { + free(bytes); + } + if (fd != -1) { + close(fd); + } + + return options; +} + +#if TARGET_OS_EMBEDDED + +static void disable_crashreporter(void) { + if (!SMJobRemove(kSMDomainSystemLaunchd, CFSTR(CR_JOB), NULL, true, NULL)) { + printf ("\t\tCould not unload %s\n", CR_JOB); + } +} + +static void enable_crashreporter(void) { + CFDictionaryRef job_dict; + + job_dict = create_dictionary_from_plist(CR_JOB_PLIST_PATH); + if (!job_dict) { + printf("\t\tCould not create dictionary from %s\n", CR_JOB_PLIST_PATH); + } + + if (!SMJobSubmit(kSMDomainSystemLaunchd, job_dict, NULL, NULL)) { + printf ("\t\tCould not submit %s\n", CR_JOB); + } + + CFRelease(job_dict); +} + +static boolean_t verify_snapshot(pid_t pid, int32_t priority, uint32_t kill_cause, uint64_t user_data, bool expecting_snapshot) { + int size; + memorystatus_jetsam_snapshot_t *snapshot = NULL; + int i; + boolean_t res = false; + + if (kill_cause == CAUSE_HIWAT_OR_PERPROC) { + kill_cause = kMemorystatusKilledHiwat|kMemorystatusKilledVMPageShortage; + } + + size = memorystatus_control(MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT, 0, 0, NULL, 0); + if (size <= 0) { + if (expecting_snapshot) { + printf("\t\tCan't get snapshot size: %d!\n", size); + } + goto exit; + } + + snapshot = (memorystatus_jetsam_snapshot_t*)malloc(size); + if (!snapshot) { + printf("\t\tCan't allocate snapshot!\n"); + goto exit; + } + + size = memorystatus_control(MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT, 0, 0, snapshot, size); + if (size <= 0) { + printf("\t\tCan't retrieve snapshot (%d)!\n", size); + goto exit; + } + + if (((size - sizeof(memorystatus_jetsam_snapshot_t)) / sizeof(memorystatus_jetsam_snapshot_entry_t)) != snapshot->entry_count) { + printf("\t\tMalformed snapshot: %d! Expected %ld + %zd x %ld = %ld\n", size, + sizeof(memorystatus_jetsam_snapshot_t), snapshot->entry_count, sizeof(memorystatus_jetsam_snapshot_entry_t), + sizeof(memorystatus_jetsam_snapshot_t) + (snapshot->entry_count * sizeof(memorystatus_jetsam_snapshot_entry_t))); + goto exit; + } + + if (pid == -1) { + /* Just flushing the buffer */ + res = true; + goto exit; + } + + /* Locate */ + for (i = 0; i < snapshot->entry_count; i++) { + if (snapshot->entries[i].pid == pid) { + res = 0; + if ((priority == snapshot->entries[i].priority) && ((kill_cause | snapshot->entries[i].killed) == kill_cause) && (user_data == snapshot->entries[i].user_data)) { + res = true; + } else { + printf("\t\tMismatched snapshot properties for pid %d (expected/actual): priority %d/%d : kill cause 0x%x/0x%x : user data 0x%llx/0x%llx\n", + pid, priority, snapshot->entries[i].priority, kill_cause, snapshot->entries[i].killed, user_data, snapshot->entries[i].user_data); + } + goto exit; + } + } + +exit: + free(snapshot); + + return res; +} + +#endif /* TARGET_OS_EMBEDDED */ + +static void cleanup_and_exit(int status) { +#if TARGET_OS_EMBEDDED + /* Cleanup */ + enable_crashreporter(); +#endif + + /* Exit. Pretty literal. */ + exit(status); +} + +static void child_ready() { + pthread_mutex_lock(&g_shared->mutex); + pthread_cond_signal(&g_shared->cv); + pthread_mutex_unlock(&g_shared->mutex); +} + +static pid_t init_and_fork() { + int pid; + + g_shared->completed = 0; + g_shared->pressure_event_fired = 0; + + pthread_mutex_lock(&g_shared->mutex); + + pid = fork(); + if (pid == 0) { + return 0; + } else if (pid == -1) { + printTestResult(__func__, false, "Fork error!\n"); + cleanup_and_exit(-1); + } + + /* Wait for child's signal */ + pthread_cond_wait(&g_shared->cv, &g_shared->mutex); + pthread_mutex_unlock(&g_shared->mutex); + return (pid_t)pid; +} + +static memorystatus_priority_entry_t *get_priority_list(int *size) { + memorystatus_priority_entry_t *list = NULL; + + assert(size); + + *size = memorystatus_control(MEMORYSTATUS_CMD_GET_PRIORITY_LIST, 0, 0, NULL, 0); + if (*size <= 0) { + printf("\t\tCan't get list size: %d!\n", *size); + goto exit; + } + + list = (memorystatus_priority_entry_t*)malloc(*size); + if (!list) { + printf("\t\tCan't allocate list!\n"); + goto exit; + } + + *size = memorystatus_control(MEMORYSTATUS_CMD_GET_PRIORITY_LIST, 0, 0, list, *size); + if (*size <= 0) { + printf("\t\tCan't retrieve list!\n"); + goto exit; + } + +exit: + return list; +} + +/* Tests */ + +#if TARGET_OS_EMBEDDED + +/* Spawn tests */ + +static void spawn_test() { + int page_delta = 32768; /* 128MB */ + char *mem; + unsigned long total = 0; + + /* Spin */ + while (1) { + /* Priority will be shifted during this time... */ + sleep(1); + + /* ...then process will be backgrounded and hopefully killed by the memory limit */ + while(1) { + int i; + mem = malloc(page_delta * VM_PAGE_SIZE); + if (!mem) { + fprintf(stderr, "Failed to allocate memory!\n"); + while (1) { + sleep(1); + } + } + + total += page_delta; + memset(mem, 0xFF, page_delta * VM_PAGE_SIZE); + + set_priority(getpid(), JETSAM_PRIORITY_BACKGROUND, 0); + + while(1) { + sleep(1); + } + } + } +} + +#endif + +static boolean_t get_ledger_info(pid_t pid, int64_t *balance_mb, int64_t *limit_mb) { + struct ledger_entry_info *lei; + uint64_t count; + boolean_t res = false; + + lei = (struct ledger_entry_info *)malloc((size_t)(g_ledger_count * sizeof (*lei))); + if (lei) { + void *arg; + + arg = (void *)(long)pid; + count = g_ledger_count; + + if ((ledger(LEDGER_ENTRY_INFO, arg, (caddr_t)lei, (caddr_t)&count) >= 0) && (g_footprint_index < count)) { + if (balance_mb) { + *balance_mb = lei[g_footprint_index].lei_balance; + } + if (limit_mb) { + *limit_mb = lei[g_footprint_index].lei_limit; + } + res = true; + } + + free(lei); + } + + return res; +} + +static boolean_t get_priority_props(pid_t pid, int32_t *priority, int32_t *limit_mb, uint64_t *user_data) { + int size; + memorystatus_priority_entry_t *entries = NULL; + int i; + boolean_t res = false; + + entries = get_priority_list(&size); + if (!entries) { + goto exit; + } + + /* Locate */ + for (i = 0; i < size/sizeof(memorystatus_priority_entry_t); i++ ){ + if (entries[i].pid == pid) { + int64_t limit; + + *priority = entries[i].priority; + *user_data = entries[i].user_data; +#if 1 + *limit_mb = entries[i].limit; + res = true; +#else + res = get_ledger_info(entries[i].pid, NULL, &limit); + if (false == res) { + printf("Failed to get highwater!\n"); + } + /* The limit is retrieved in bytes, but set in MB, so rescale */ + *limit_mb = (int32_t)(limit/(1024 * 1024)); +#endif + goto exit; + } + } + + printf("\t\tCan't find pid: %d!\n", pid); + +exit: + free(entries); + + return res; +} + +static boolean_t check_properties(pid_t pid, int32_t requested_priority, int32_t requested_limit_mb, uint64_t requested_user_data, const char *test) { + const char *PROP_GET_ERROR_STRING = "failed to get properties"; + const char *PROP_CHECK_ERROR_STRING = "property mismatch"; + + int32_t actual_priority, actual_hiwat; + uint64_t actual_user_data; + + if (!get_priority_props(pid, &actual_priority, &actual_hiwat, &actual_user_data)) { + printf("\t\t%s test failed: %s\n", test, PROP_GET_ERROR_STRING); + return false; + } + + /* -1 really means the default per-process limit, which varies per device */ + if (requested_limit_mb <= 0) { + requested_limit_mb = g_per_process_limit; + } + + if (actual_priority != requested_priority || actual_hiwat != requested_limit_mb || actual_user_data != requested_user_data) { + printf("\t\t%s test failed: %s\n", test, PROP_CHECK_ERROR_STRING); + printf("priority is %d, should be %d\n", actual_priority, requested_priority); + printf("hiwat is %d, should be %d\n", actual_hiwat, requested_limit_mb); + printf("user data is 0x%llx, should be 0x%llx\n", actual_user_data, requested_user_data); + return false; + } + + printf("\t\t%s test ok...\n", test); + + return true; +} + +#if TARGET_OS_EMBEDDED + +static void spin() { + child_ready(); + + /* Spin */ + while (1) { + sleep(10); + } +} + +/* Priority tests */ + +static boolean_t set_priority(pid_t pid, int32_t priority, uint64_t user_data) { + int ret; + memorystatus_priority_properties_t props; + + props.priority = priority; + props.user_data = (uint32_t)user_data; + + return memorystatus_control(MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES, pid, 0, &props, sizeof(props)); +} + +static boolean_t set_memlimit(pid_t pid, int32_t limit_mb) { + return memorystatus_control(MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK, pid, limit_mb, NULL, 0); +} + +static boolean_t set_priority_properties(pid_t pid, int32_t priority, int32_t limit_mb, uint64_t user_data, const char *stage, boolean_t show_error) { + int ret; + + ret = set_priority(pid, priority, user_data); + if (ret == 0) { + ret = set_memlimit(pid, limit_mb); + } + + if (ret) { + if (show_error) { + printf("\t\t%s stage: failed to set properties!\n", stage); + } + + return false; + } + + return true; +} + +static void start_priority_test() { + const char *DEFAULT_TEST_STR = "Default"; + const char *INVALID_NEGATIVE_TEST_STR = "Invalid (Negative)"; + const char *INVALID_POSITIVE_TEST_STR = "Invalid (Positive)"; + const char *IDLE_ALIAS_TEST_STR = "Idle Alias"; + const char *DEFERRED_TEST_STR = "Deferred"; + const char *SUSPENDED_TEST_STR = "Suspended"; + const char *FOREGROUND_TEST_STR = "Foreground"; + const char *HIGHPRI_TEST_STR = "Highpri"; + + pid_t pid; + int status; + int success = false; + + pid = init_and_fork(); + if (pid == 0) { + spin(); + } else { + printTestHeader(pid, "Priority test"); + } + + /* Check the default properties */ + if (!check_properties(pid, JETSAM_PRIORITY_DEFAULT, -1, 0, DEFAULT_TEST_STR)) { + goto exit; + } + + /* Check that setting a negative value (other than -1) leaves properties unchanged */ + if (set_priority_properties(pid, -100, 0xABABABAB, 0, INVALID_NEGATIVE_TEST_STR, false) || !check_properties(pid, JETSAM_PRIORITY_DEFAULT, -1, 0, INVALID_NEGATIVE_TEST_STR)) { + goto exit; + } + + /* Check that setting an out-of-range positive value leaves properties unchanged */ + if (set_priority_properties(pid, 100, 0xCBCBCBCB, 0, INVALID_POSITIVE_TEST_STR, false) || !check_properties(pid, JETSAM_PRIORITY_DEFAULT, -1, 0, INVALID_POSITIVE_TEST_STR)) { + goto exit; + } + + /* Idle-deferred - this should be adjusted down to idle */ + if (!set_priority_properties(pid, JETSAM_PRIORITY_IDLE_DEFERRED, 0, 0xBEEF, DEFERRED_TEST_STR, true) || !check_properties(pid, JETSAM_PRIORITY_IDLE, 0, 0xBEEF, DEFERRED_TEST_STR)) { + goto exit; + } + + /* Suspended */ + if (!set_priority_properties(pid, JETSAM_PRIORITY_IDLE, 0, 0xCAFE, SUSPENDED_TEST_STR, true) || !check_properties(pid, JETSAM_PRIORITY_IDLE, 0, 0xCAFE, SUSPENDED_TEST_STR)) { + goto exit; + } + + /* Foreground */ + if (!set_priority_properties(pid, JETSAM_PRIORITY_FOREGROUND, 50, 0xBEEFF00D, FOREGROUND_TEST_STR, true) || !check_properties(pid, JETSAM_PRIORITY_FOREGROUND, 50, 0xBEEFF00D, FOREGROUND_TEST_STR)) { + goto exit; + } + + /* Hipri */ + if (!set_priority_properties(pid, JETSAM_PRIORITY_DEFAULT - 1, 0, 0x01234567, HIGHPRI_TEST_STR, true) || !check_properties(pid, JETSAM_PRIORITY_DEFAULT - 1, 0, 0x01234567, HIGHPRI_TEST_STR)) { + goto exit; + } + + /* Foreground again (to test that the limit is restored) */ + if (!set_priority_properties(pid, JETSAM_PRIORITY_FOREGROUND, 50, 0xBEEFF00D, FOREGROUND_TEST_STR, true) || !check_properties(pid, JETSAM_PRIORITY_FOREGROUND, 50, 0xBEEFF00D, FOREGROUND_TEST_STR)) { + goto exit; + } + + /* Set foreground priority again; this would have caught */ + if (!set_priority_properties(pid, JETSAM_PRIORITY_FOREGROUND, 50, 0xFEEDF00D, FOREGROUND_TEST_STR, true) || !check_properties(pid, JETSAM_PRIORITY_FOREGROUND, 50, 0xFEEDF00D, FOREGROUND_TEST_STR)) { + goto exit; + } + + /* Set foreground priority again but pass a large memory limit; this would have caught */ + if (!set_priority_properties(pid, JETSAM_PRIORITY_FOREGROUND, 4096, 0xBEEFF00D, FOREGROUND_TEST_STR, true) || !check_properties(pid, JETSAM_PRIORITY_FOREGROUND, 4096, 0xBEEFF00D, FOREGROUND_TEST_STR)) { + goto exit; + } + + /* Check that -1 aliases to JETSAM_PRIORITY_DEFAULT */ + if (!set_priority_properties(pid, -1, 0, 0xFADEF00D, IDLE_ALIAS_TEST_STR, true) || !check_properties(pid, JETSAM_PRIORITY_DEFAULT, 0, 0xFADEF00D, IDLE_ALIAS_TEST_STR)) { + goto exit; + } + + success = true; + +exit: + + /* Done here... */ + kill(pid, SIGKILL); + + /* Wait for exit */ + waitpid(pid, &status, 0); + + printTestResult("Priority test", success, NULL); +} + +/* Reordering */ + +static boolean_t check_reorder_priorities(pid_t pid1, pid_t pid2, int priority) { + int size; + memorystatus_priority_entry_t *entries = NULL; + int i; + boolean_t res = false; + + entries = get_priority_list(&size); + if (!entries) { + goto exit; + } + + /* Check relative priorities */ + for (i = 0; i < size/sizeof(memorystatus_priority_entry_t); i++ ){ + if (entries[i].pid == pid1) { + /* First process. The priority should match... */ + if (entries[i].priority != priority) { + goto exit; + } + + /* There should be one more daemon to follow... */ + if ((i + 1) >= size) { + goto exit; + } + + /* The next process should be pid2 */ + if (entries[i + 1].pid != pid2) { + goto exit; + } + + /* The priority should also match... */ + if (entries[i + 1].priority != priority) { + goto exit; + } + + break; + } + } + + res = true; + +exit: + + return res; +} + +static void start_fs_priority_test() { + const char *REORDER_TEST_STR = "Reorder"; + const int test_priority = JETSAM_PRIORITY_FOREGROUND_SUPPORT; + + pid_t pid1, pid2; + int status; + int success = false; + + pid1 = init_and_fork(); + if (pid1 == 0) { + spin(); + } + + pid2 = init_and_fork(); + if (pid2 == 0) { + spin(); + } + + printTestHeader(pid1, "Reorder test"); + + /* pid2 should follow pid1 in the bucket */ + if (!set_priority_properties(pid1, test_priority, 0, 0, REORDER_TEST_STR, true) || !set_priority_properties(pid2, test_priority, 0, 0, REORDER_TEST_STR, true)) { + printf("Cannot set priorities - #1!\n"); + goto exit; + } + + /* Check relative priorities */ + if (!check_reorder_priorities(pid1, pid2, test_priority)) { + printf("Bad pid1 -> pid2 priorities - #2!\n"); + goto exit; + } + + /* pid 1 should move to the back... */ + if (!set_priority_properties(pid1, test_priority, 0, 0, REORDER_TEST_STR, true)) { + printf("Cannot set priorities - #3!\n"); + goto exit; + } + + /* ...so validate */ + if (!check_reorder_priorities(pid2, pid1, test_priority)) { + printf("Bad pid2 -> pid1 priorities - #4!\n"); + goto exit; + } + + /* Again, pid 2 should move to the back... */ + if (!set_priority_properties(pid2, test_priority, 0, 0, REORDER_TEST_STR, true)) { + printf("Cannot set priorities - #5!\n"); + goto exit; + } + + /* ...so validate for the last time */ + if (!check_reorder_priorities(pid1, pid2, test_priority)) { + printf("Bad pid1 -> pid2 priorities - #6!\n"); + goto exit; + } + + success = true; + +exit: + + /* Done here... */ + kill(pid1, SIGKILL); + kill(pid2, SIGKILL); + + /* Wait for exit */ + waitpid(pid1, &status, 0); + waitpid(pid2, &status, 0); + + printTestResult("Reorder test", success, NULL); +} + +/* Jetsam tests */ + +/* + + ASL message format: + + Message is ReadUID 0 + Message is ReadGID 80 + Message is ASLMessageID 703 + Message is Level 7 + Message is Time 1333155901 + Message is Sender kernel + Message is Facility kern + + */ + +static void vnode_test(int page_delta, int interval, int verbose, int32_t priority, uint64_t user_data) { + memorystatus_priority_properties_t props; + + props.priority = priority; + props.user_data = user_data; + + if (memorystatus_control(MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES, getpid(), 0, &props, sizeof(props))) { + /*printf("\t\tFailed to set jetsam priority!\n");*/ + printTestResult(__func__, false, "Failed to set jetsam priority!"); + cleanup_and_exit(-1); + } + + /* Initialized... */ + child_ready(); + + /* ...so start stealing vnodes */ + while(1) { + sleep(1); + } +} + +static void *wait_for_pressure_event(void *s) { + int kq; + int res; + struct kevent event, mevent; + char errMsg[ERR_BUF_LEN + 1]; + + kq = kqueue(); + + EV_SET(&mevent, 0, EVFILT_VM, EV_ADD, NOTE_VM_PRESSURE, 0, 0); + + res = kevent(kq, &mevent, 1, NULL, 0, NULL); + if (res != 0) { + /*printf("\t\tKevent registration failed - returning: %d!\n", res);*/ + snprintf(errMsg, ERR_BUF_LEN, "Kevent registration failed - returning: %d!",res); + printTestResult(__func__, false, errMsg); + cleanup_and_exit(-1); + } + + while (1) { + memset(&event, 0, sizeof(struct kevent)); + res = kevent(kq, NULL, 0, &event, 1, NULL); + g_shared->pressure_event_fired = 1; + } +} + +static void wait_for_exit_event(int pid, uint32_t kill_cause) { + int kq; + int res; + uint32_t expected_flag, received_flag; + struct kevent event, mevent; + char errMsg[ERR_BUF_LEN + 1]; + + switch (kill_cause) { + case kMemorystatusKilledVnodes: expected_flag = NOTE_EXIT_MEMORY_VNODE; break; + case kMemorystatusKilledVMPageShortage: expected_flag = NOTE_EXIT_MEMORY_VMPAGESHORTAGE; break; + case kMemorystatusKilledVMThrashing: expected_flag = NOTE_EXIT_MEMORY_VMTHRASHING; break; + case kMemorystatusKilledHiwat: expected_flag = NOTE_EXIT_MEMORY_HIWAT; break; + case kMemorystatusKilledPerProcessLimit: expected_flag = NOTE_EXIT_MEMORY_PID; break; + case kMemorystatusKilledIdleExit: expected_flag = NOTE_EXIT_MEMORY_IDLE; break; + case CAUSE_HIWAT_OR_PERPROC: expected_flag = NOTE_EXIT_MEMORY_HIWAT|NOTE_EXIT_MEMORY_PID; break; + } + + kq = kqueue(); + + EV_SET(&mevent, pid, EVFILT_PROC, EV_ADD, NOTE_EXIT | NOTE_EXIT_DETAIL, 0, 0); + + res = kevent(kq, &mevent, 1, NULL, 0, NULL); + if (res != 0) { + snprintf(errMsg,ERR_BUF_LEN,"Exit kevent registration failed - returning: %d!",res); + printTestResult(__func__, false, errMsg); + cleanup_and_exit(-1); + } + + res = kevent(kq, NULL, 0, &event, 1, NULL); + + /* Check if appropriate flags are set */ + if (!event.fflags & NOTE_EXIT_MEMORY) { + printTestResult(__func__, false, "Exit event fflags do not contain NOTE_EXIT_MEMORY\n"); + cleanup_and_exit(-1); + } + + received_flag = event.data & NOTE_EXIT_MEMORY_DETAIL_MASK; + if ((received_flag | expected_flag) != expected_flag) { + printTestResult(__func__, false, "Exit event data does not contain the expected jetsam flag for cause %x.\n" + "\t\t(expected %x, got %x)", kill_cause, expected_flag, received_flag); + cleanup_and_exit(-1); + } +} + +static void munch_test(int page_delta, int interval, int verbose, int32_t priority, int32_t highwater, uint64_t user_data) { + const char *MUNCH_TEST_STR = "Munch"; + char *mem; + unsigned long total = 0; + pthread_t pe_thread; + int res; + + /* Start thread to watch for pressure messages */ + res = pthread_create(&pe_thread, NULL, wait_for_pressure_event, (void*)g_shared); + if (res) { + printTestResult(__func__, false, "Error creating pressure event thread!\n"); + cleanup_and_exit(-1); + } + + if (set_priority_properties(getpid(), priority, highwater, user_data, MUNCH_TEST_STR, false) == false) { + printTestResult(__func__, false, "Failed to set jetsam priority!"); + cleanup_and_exit(-1); + } + + if (!page_delta) { + page_delta = 4096; + } + + sleep(1); + + /* Initialized... */ + child_ready(); + + /* ...so start munch */ + while(1) { + int i; + mem = malloc(page_delta * VM_PAGE_SIZE); + if (!mem) { + fprintf(stderr, "Failed to allocate memory!\n"); + while (1) { + sleep(1); + } + } + + total += page_delta; + memset(mem, 0xFF, page_delta * VM_PAGE_SIZE); + + if (verbose) { + printf("\t\t%lu pages dirtied...\n", total); + } + + sleep(interval); + } +} + +static bool is_pressure_test(test) { + return ((test == kPressureJetsamTestFG) || (test == kPressureJetsamTestBG)); +} + +static bool verify_exit(pid_t pid, uint32_t kill_cause, time_t start_time, uint32_t test_pri, uint64_t test_user_data, jetsam_test_t test, bool expecting_snapshot) { + const char *msg_key = "Message"; + const char *time_key = "Time"; + aslmsg query; + aslresponse response; + aslmsg message; + char pid_buffer[16]; + const char *val; + int got_jetsam = 0; + bool got_snapshot = 0; + bool success; + + /* Wait for exit */ + wait_for_exit_event(pid, kill_cause); + + /* Let the messages filter through to the log - arbitrary */ + sleep(3); + + query = asl_new(ASL_TYPE_QUERY); + asl_set_query(query, ASL_KEY_SENDER, "kernel", ASL_QUERY_OP_EQUAL); + asl_set_query(query, ASL_KEY_MSG, "memorystatus", ASL_QUERY_OP_EQUAL|ASL_QUERY_OP_SUBSTRING); + snprintf(pid_buffer, sizeof(pid_buffer) - 1, "%d", pid); + asl_set_query(query, ASL_KEY_MSG, pid_buffer, ASL_QUERY_OP_EQUAL|ASL_QUERY_OP_SUBSTRING); + response = asl_search(NULL, query); + asl_free(query); + + while (NULL != (message = aslresponse_next(response))) + { + val = asl_get(message, time_key); + if (val) { + uint32_t msg_time = atoi(val); + if (msg_time > start_time) { + val = asl_get(message, msg_key); + if (val) { + printf("\t\tFound: %s\n", val); + got_jetsam = 1; + } + } + } + } + + if (got_jetsam) { + got_snapshot = verify_snapshot(pid, test_pri, kill_cause, test_user_data, expecting_snapshot); + } else { + printf("\t\tCouldn't find jetsam message in log!\n"); + } + + aslresponse_free(response); + + success = got_jetsam && (expecting_snapshot == got_snapshot) && (!(is_pressure_test(test)) || (is_pressure_test(test) && g_shared->pressure_event_fired)); + printTestResult("munch_test", success, "(test: %d, got_jetsam: %d, got_snapshot: %d, fired: %d)", test, got_jetsam, got_snapshot, g_shared->pressure_event_fired); + + return success; +} + +static void start_jetsam_test(jetsam_test_t test, const char *description) { + const char *msg_key = "Message"; + const char *time_key = "Time"; + const char *val; + aslmsg query; + aslresponse response; + aslmsg message; + time_t start_time; + pid_t pid; + char pid_buffer[16]; + int status; + int got_jetsam = 0; + int got_snapshot = 0; + uint32_t test_pri = 0; + uint64_t test_user_data = 0; + uint32_t kill_cause; + int success; + boolean_t expecting_snapshot = TRUE; + boolean_t big_mem = (g_physmem > 512 * 1024 * 1024); + + if (big_mem) { + /* + * On big memory machines (1GB+), there is a per-task memory limit. + * A munch test could fail because of this, if they manage to cross it; + * *or* because the high watermark was crossed, and the system was under + * enough mem pressure to go looking for a high watermark victim to kill. + */ + kill_cause = CAUSE_HIWAT_OR_PERPROC; + } else if (test == kHighwaterJetsamTest) { + /* + * On systems without the per-task memory limit, we shouldn't see any + * such kills; so that leaves high watermark kills as the only legitimate + * reason to kill a munch test that has a high watermark set. + */ + kill_cause = kMemorystatusKilledHiwat; + } else { + /* + * If this is a standard munch test and we're on a machine without the + * per-task memory limit, the only reason for kill should be that we need + * memory. + */ + kill_cause = kMemorystatusKilledVMPageShortage; + } + + start_time = time(NULL); + + switch (test) { + case kPressureJetsamTestFG: + test_pri = JETSAM_PRIORITY_FOREGROUND; /* Test that FG processes get pressure events */ + test_user_data = 0xDEADBEEF; + break; + case kPressureJetsamTestBG: + test_pri = JETSAM_PRIORITY_UI_SUPPORT; /* Test that BG processes get pressure events */ + test_user_data = 0xFADEBEEF; + break; + case kSimpleJetsamTest: + /* + * On 1GB devices, we should see a snapshot as the per-process limit is hit. + * On 512MB devices, we should see a normal jetsam, and no snapshot. + */ + expecting_snapshot = big_mem ? TRUE : FALSE; + test_pri = JETSAM_PRIORITY_IDLE; /* Suspended */ + test_user_data = 0xFACEF00D; + break; + default: + test_pri = JETSAM_PRIORITY_IDLE; /* Suspended */ + test_user_data = 0xCAFEF00D; + break; + } + + pid = init_and_fork(); + + if (pid == 0) { + switch (test) { + case kVnodeJetsamTest: + vnode_test(0, 0, 0, test_pri, test_user_data); + break; + case kHighwaterJetsamTest: + munch_test(0, 0, 0, test_pri, 8, test_user_data); + break; + default: + munch_test(0, 0, 0, test_pri, -1, test_user_data); + break; + } + } + else { + printTestHeader(pid, "%s test", description); + } + + verify_exit(pid, kill_cause, start_time, test_pri, test_user_data, test, expecting_snapshot); +} + +static void start_jetsam_test_background(const char *path) { + const char *argv[] = { + path, + "-s", + NULL + }; + + const uint32_t memlimit = 100; /* 100 MB */ + + time_t start_time; + pid_t pid = 1; + int status; + uint32_t test_pri = 0; + posix_spawnattr_t spattr; + int32_t pf_balance; + bool success; + + start_time = time(NULL); + + pid = 1; + status = 1; + + posix_spawnattr_init(&spattr); + posix_spawnattr_setjetsam(&spattr, (POSIX_SPAWN_JETSAM_USE_EFFECTIVE_PRIORITY | POSIX_SPAWN_JETSAM_HIWATER_BACKGROUND), JETSAM_PRIORITY_UI_SUPPORT, 100); + + if (posix_spawn(&pid, path, NULL, &spattr, (char *const *)argv, NULL) < 0) { + printf("posix_spawn() failed!\n"); + goto exit; + } + + printTestHeader(pid, "Background memory limit test"); + + /* Starts in background */ + if (!check_properties(pid, JETSAM_PRIORITY_UI_SUPPORT, memlimit, 0x0, "jetsam_test_background - #1 BG")) { + goto exit; + } + + /* Set to foreground - priority and memlimit should change */ + set_priority(pid, JETSAM_PRIORITY_FOREGROUND, 0); + if (!check_properties(pid, JETSAM_PRIORITY_FOREGROUND, 0, 0x0, "jetsam_test_background - #2 FG")) { + goto exit; + } + + /* ...and back */ + set_priority(pid, JETSAM_PRIORITY_BACKGROUND, 0); + if (!check_properties(pid, JETSAM_PRIORITY_BACKGROUND, memlimit, 0x0, "jetsam_test_background - #3 BG")) { + goto exit; + } + + /* ...and again */ + set_priority(pid, JETSAM_PRIORITY_FOREGROUND, 0); + if (!check_properties(pid, JETSAM_PRIORITY_FOREGROUND, 0, 0x0, "jetsam_test_background - #4 FG")) { + goto exit; + } + +#if 1 + /* + * For now, this is all we can do. Limitations of the ledger mean that this process is credited with + * the dirty pages, *not* the child. At least the memory limit is reported to have shifted dynamically + * by this point. Kill the child and continue. + */ + kill(pid, SIGKILL); +#else + /* Let the process dirty 128MB of memory, then background itself */ + verify_exit(pid, kMemorystatusKilledPerProcessLimit, start_time, test_pri, 0, kBackgroundJetsamTest); +#endif + + success = true; + +exit: + if (pid != -1) { + kill(pid, SIGKILL); + } + + /* Wait for exit */ + waitpid(pid, &status, 0); + + printTestResult("Background test", success, NULL); +} + +/* Freeze tests */ + +/* Cribbed from 'top'... */ +static int +in_shared_region(mach_vm_address_t addr, cpu_type_t type) { + mach_vm_address_t base = 0, size = 0; + + switch(type) { + case CPU_TYPE_ARM: + base = SHARED_REGION_BASE_ARM; + size = SHARED_REGION_SIZE_ARM; + break; + + case CPU_TYPE_X86_64: + base = SHARED_REGION_BASE_X86_64; + size = SHARED_REGION_SIZE_X86_64; + break; + + case CPU_TYPE_I386: + base = SHARED_REGION_BASE_I386; + size = SHARED_REGION_SIZE_I386; + break; + + case CPU_TYPE_POWERPC: + base = SHARED_REGION_BASE_PPC; + size = SHARED_REGION_SIZE_PPC; + break; + + case CPU_TYPE_POWERPC64: + base = SHARED_REGION_BASE_PPC64; + size = SHARED_REGION_SIZE_PPC64; + break; + + default: { + int t = type; + + fprintf(stderr, "unknown CPU type: 0x%x\n", t); + abort(); + } + break; + } + + return(addr >= base && addr < (base + size)); +} + +static unsigned long long get_rprvt(mach_port_t task, pid_t pid) { + kern_return_t kr; + + mach_vm_size_t rprvt = 0; + mach_vm_size_t empty = 0; + mach_vm_size_t fw_private = 0; + mach_vm_size_t pagesize = VM_PAGE_SIZE; + mach_vm_size_t regs = 0; + + mach_vm_address_t addr; + mach_vm_size_t size; + + int split = 0; + + for (addr = 0; ; addr += size) { + vm_region_top_info_data_t info; + mach_msg_type_number_t count = VM_REGION_TOP_INFO_COUNT; + mach_port_t object_name; + + kr = mach_vm_region(task, &addr, &size, VM_REGION_TOP_INFO, (vm_region_info_t)&info, &count, &object_name); + if (kr != KERN_SUCCESS) break; + + if (in_shared_region(addr, CPU_TYPE_ARM)) { + // Private Shared + fw_private += info.private_pages_resident * pagesize; + + /* + * Check if this process has the globally shared + * text and data regions mapped in. If so, set + * split to TRUE and avoid checking + * again. + */ + if (split == FALSE && info.share_mode == SM_EMPTY) { + vm_region_basic_info_data_64_t b_info; + mach_vm_address_t b_addr = addr; + mach_vm_size_t b_size = size; + count = VM_REGION_BASIC_INFO_COUNT_64; + + kr = mach_vm_region(task, &b_addr, &b_size, VM_REGION_BASIC_INFO, (vm_region_info_t)&b_info, &count, &object_name); + if (kr != KERN_SUCCESS) break; + + if (b_info.reserved) { + split = TRUE; + } + } + + /* + * Short circuit the loop if this isn't a shared + * private region, since that's the only region + * type we care about within the current address + * range. + */ + if (info.share_mode != SM_PRIVATE) { + continue; + } + } + + regs++; + + /* + * Update counters according to the region type. + */ + + if (info.share_mode == SM_COW && info.ref_count == 1) { + // Treat single reference SM_COW as SM_PRIVATE + info.share_mode = SM_PRIVATE; + } + + switch (info.share_mode) { + case SM_LARGE_PAGE: + // Treat SM_LARGE_PAGE the same as SM_PRIVATE + // since they are not shareable and are wired. + case SM_PRIVATE: + rprvt += info.private_pages_resident * pagesize; + rprvt += info.shared_pages_resident * pagesize; + break; + + case SM_EMPTY: + empty += size; + break; + + case SM_COW: + case SM_SHARED: + if (pid == 0) { + // Treat kernel_task specially + if (info.share_mode == SM_COW) { + rprvt += info.private_pages_resident * pagesize; + } + break; + } + + if (info.share_mode == SM_COW) { + rprvt += info.private_pages_resident * pagesize; + } + break; + + default: + assert(0); + break; + } + } + + return rprvt; +} + +static void freeze_test() { + const unsigned long DIRTY_ALLOC = 16 * 1024 * 1024; + unsigned long *ptr; + task_port_t task = mach_task_self(); + + child_ready(); + + /* Needs to be vm_allocate() here; otherwise the compiler will optimize memset away */ + vm_allocate(task, (vm_address_t *)&ptr, DIRTY_ALLOC, TRUE); + if (ptr) { + int i; + int pid = getpid(); + unsigned long long baseline_rprvt, half_rprvt, rprvt; + + /* Get baseline */ + baseline_rprvt = get_rprvt(task, pid); + + /* Dirty half */ + memset(ptr, 0xAB, DIRTY_ALLOC / 2); + + /* Check RPRVT */ + half_rprvt = get_rprvt(task, pid); + printf("\t\trprvt is %llu\n", half_rprvt); + + if (half_rprvt != (baseline_rprvt + (DIRTY_ALLOC / 2))) + { + printTestResult(__func__, false, "Failed to dirty memory"); + cleanup_and_exit(-1); + } + + /* Freeze */ + sysctlbyname("kern.memorystatus_freeze", NULL, 0, &pid, sizeof(pid)); + + sleep(2); + + /* Check RPRVT */ + rprvt = get_rprvt(task, pid); + printf("\t\trprvt is %llu\n", rprvt); + + if ((rprvt > (half_rprvt - (DIRTY_ALLOC / 2))) || (rprvt > (64 * 1024)) /* Sanity */) + { + printTestResult(__func__, false, "Failed to freeze memory"); + cleanup_and_exit(-1); + } + + /* Thaw */ + sysctlbyname("kern.memorystatus_thaw", NULL, 0, &pid, sizeof(pid)); + + sleep(2); + + /* Check RPRVT */ + rprvt = get_rprvt(task, pid); + printf("\t\trprvt is %llu\n", rprvt); + + if (rprvt < (baseline_rprvt + (DIRTY_ALLOC / 2))) + { + printTestResult(__func__, false, "Failed to thaw memory"); + cleanup_and_exit(-1); + } + + /* Dirty the rest */ + memset(ptr + (DIRTY_ALLOC / (2 * sizeof(unsigned long))), 0xBC, DIRTY_ALLOC / 2); + + /* Check RPRVT */ + rprvt = get_rprvt(task, pid); + printf("\t\trprvt is %llu\n", rprvt); + + if (rprvt < (baseline_rprvt + DIRTY_ALLOC)) + { + printTestResult(__func__, false, "Failed to dirty memory"); + cleanup_and_exit(-1); + } + + g_shared->completed = 1; + cleanup_and_exit(0); + } + + printTestResult(__func__, false, "Something bad happened..."); + cleanup_and_exit(-1); +} + +static void start_freeze_test() { + pid_t pid; + int status; + int mode; + size_t size; + + /* Check to see if the test is applicable */ + size = sizeof(mode); + if (sysctlbyname("vm.compressor_mode", &mode, &size, NULL, 0) != 0) { + printTestHeader(getpid(), "Freeze test"); + printTestResult(__func__, false, "Failed to retrieve compressor config"); + cleanup_and_exit(-1); + } + + if (mode != VM_PAGER_FREEZER_DEFAULT) { + printTestHeader(getpid(), "Freeze test"); + printTestResult(__func__, true, "Freeze disabled; skipping test"); + return; + } + + /* Reset */ + memset(g_shared, 0, sizeof(shared_mem_t)); + + pid = init_and_fork(); + if (pid == 0) { + freeze_test(); + } else { + printTestHeader(pid, "Freeze test"); + } + + /* Wait for exit */ + waitpid(pid, &status, 0); + + printTestResult("Freeze test", g_shared->completed, NULL); +} + +#endif + +static void start_list_validation_test() { + int size; + memorystatus_priority_entry_t *entries = NULL; + int i; + boolean_t valid = false; + + printTestHeader(getpid(), "List validation test"); + + entries = get_priority_list(&size); + if (!entries) { + printf("Can't get entries!\n"); + goto exit; + } + + /* Validate */ + for (i = 0; i < size/sizeof(memorystatus_priority_entry_t); i++ ) { + int dirty_ret; + uint32_t dirty_flags; + + /* Make sure launchd isn't in the list - */ + if (entries[i].pid <= 1) { + printf("\t\tBad process (%d) in list!\n", entries[i].pid); + goto exit; + } + + /* Sanity check idle exit state */ + dirty_ret = proc_get_dirty(entries[i].pid, &dirty_flags); + if (dirty_ret != 0) { + dirty_flags = 0; + } + + if (dirty_flags & PROC_DIRTY_ALLOWS_IDLE_EXIT) { + /* Check that the process isn't at idle priority when dirty */ + if ((entries[i].priority == JETSAM_PRIORITY_IDLE) && (dirty_flags & PROC_DIRTY_IS_DIRTY)) { + printf("\t\tProcess %d at idle priority when dirty (priority %d, flags 0x%x)!\n", entries[i].pid, entries[i].priority, dirty_flags); + goto exit; + } + /* Check that the process is at idle (or deferred) priority when clean. */ + if ((entries[i].priority > JETSAM_PRIORITY_IDLE_DEFERRED) && !(dirty_flags & PROC_DIRTY_IS_DIRTY)) { + printf("\t\tProcess %d not at non-idle priority when clean(priority %d, flags 0x%x)\n", entries[i].pid, entries[i].priority, dirty_flags); + goto exit; + } + } + } + + valid = true; + +exit: + free(entries); + + printTestResult("List validation test", valid, NULL); +} + +/* Random individual tests */ +static void start_general_sanity_test() { + int ret, size; + memorystatus_priority_entry_t *entries = NULL; + int i; + boolean_t valid = false; + + printTestHeader(getpid(), "Sanity test"); + + /* Should not be able to set the priority of launchd... */ + ret = set_priority(1, JETSAM_PRIORITY_FOREGROUND, 0); + if (ret != -1 || errno != EPERM) { + printf("\t\tAble to set priority of launchd (%d/%d)!\n", ret, errno); + goto exit; + } else { + printf("\t\tlaunchd priority test OK!\n"); + } + + /* ...nor the memory limit... */ + ret = set_memlimit(1, 100); + if (ret != -1 || errno != EPERM) { + printf("\t\tNo EPERM setting launchd memlimit (%d/%d)!\n", ret, errno); + goto exit; + } else { + printf("\t\tlaunchd memlimit test OK!\n"); + } + + /* ...nor tinker with transactions */ + ret = proc_track_dirty(1, PROC_DIRTY_TRACK | PROC_DIRTY_ALLOW_IDLE_EXIT | PROC_DIRTY_DEFER); + if (ret != EPERM) { + printf("\t\tNo EPERM tracking launchd (%d/%d)!\n", ret, errno); + goto exit; + } else { + printf("\t\tlaunchd track test OK!\n"); + } + + ret = proc_set_dirty(1, true); + if (ret != EPERM) { + printf("\t\tNo EPERM setting launchd dirty state (%d/%d)!\n", ret, errno); + goto exit; + } else { + printf("\t\tlaunchd dirty test OK!\n"); + } + + valid = true; + +exit: + free(entries); + + printTestResult("Idle exit test", valid, NULL); +} + +static void idle_exit_deferral_test(idle_exit_test_t test) { + int secs = DEFERRED_IDLE_EXIT_TIME_SECS; + + child_ready(); + + if (!check_properties(getpid(), JETSAM_PRIORITY_DEFAULT, -1, 0x0, "#1 - pre xpc_track_activity()")) { + goto exit; + } + + proc_track_dirty(getpid(), PROC_DIRTY_TRACK | PROC_DIRTY_ALLOW_IDLE_EXIT | PROC_DIRTY_DEFER); + + if (!check_properties(getpid(), JETSAM_PRIORITY_IDLE_DEFERRED, -1, 0x0, "#2 - post xpc_track_activity()")) { + goto exit; + } + + /* Toggle */ + proc_set_dirty(getpid(), true); + proc_set_dirty(getpid(), false); + proc_set_dirty(getpid(), true); + proc_set_dirty(getpid(), false); + + switch (test) { + case kDeferTimeoutCleanTest: + if (!check_properties(getpid(), JETSAM_PRIORITY_IDLE_DEFERRED, -1, 0x0, "#3 - post toggle")) { + goto exit; + } + + /* Approximate transition check */ + sleep(secs - 1); + + if (!check_properties(getpid(), JETSAM_PRIORITY_IDLE_DEFERRED, -1, 0x0, "#4 - pre timeout")) { + goto exit; + } + + sleep(2); + + if (!check_properties(getpid(), JETSAM_PRIORITY_IDLE, -1, 0x0, "#5 - post timeout")) { + goto exit; + } + + proc_set_dirty(getpid(), true); + + if (!check_properties(getpid(), JETSAM_PRIORITY_DEFAULT, -1, 0x0, "#6 - post dirty")) { + goto exit; + } + + proc_set_dirty(getpid(), false); + + if (!check_properties(getpid(), JETSAM_PRIORITY_IDLE, -1, 0x0, "#7 - post clean")) { + goto exit; + } + + break; + case kDeferTimeoutDirtyTest: + proc_set_dirty(getpid(), true); + + if (!check_properties(getpid(), JETSAM_PRIORITY_DEFAULT, -1, 0x0, "#3 - post dirty")) { + goto exit; + } + + /* Approximate transition check */ + sleep(secs - 1); + + if (!check_properties(getpid(), JETSAM_PRIORITY_DEFAULT, -1, 0x0, "#4 - pre timeout")) { + goto exit; + } + + sleep(2); + + if (!check_properties(getpid(), JETSAM_PRIORITY_DEFAULT, -1, 0x0, "#5 - post timeout")) { + goto exit; + } + + proc_set_dirty(getpid(), false); + + if (!check_properties(getpid(), JETSAM_PRIORITY_IDLE, -1, 0x0, "#6 - post clean")) { + goto exit; + } + + break; + case kCancelTimeoutDirtyTest: + proc_set_dirty(getpid(), true); + + if (!check_properties(getpid(), JETSAM_PRIORITY_DEFAULT, -1, 0x0, "#3 - post toggle")) { + goto exit; + } + + proc_track_dirty(getpid(), PROC_DIRTY_TRACK | PROC_DIRTY_ALLOW_IDLE_EXIT); + + if (!check_properties(getpid(), JETSAM_PRIORITY_DEFAULT, -1, 0x0, "#4 - post deferral cancellation")) { + goto exit; + } + + proc_set_dirty(getpid(), false); + + if (!check_properties(getpid(), JETSAM_PRIORITY_IDLE, -1, 0x0, "#5 - post toggle")) { + goto exit; + } + + break; + case kCancelTimeoutCleanTest: + if (!check_properties(getpid(), JETSAM_PRIORITY_IDLE_DEFERRED, -1, 0x0, "#3 - post toggle")) { + goto exit; + } + + proc_track_dirty(getpid(), PROC_DIRTY_TRACK | PROC_DIRTY_ALLOW_IDLE_EXIT); + + if (!check_properties(getpid(), JETSAM_PRIORITY_IDLE, -1, 0x0, "#4 - post deferral cancellation")) { + goto exit; + } + + proc_set_dirty(getpid(), true); + + if (!check_properties(getpid(), JETSAM_PRIORITY_DEFAULT, -1, 0x0, "#5 - post dirty")) { + goto exit; + } + + proc_set_dirty(getpid(), false); + + if (!check_properties(getpid(), JETSAM_PRIORITY_IDLE, -1, 0x0, "#6 - post clean")) { + goto exit; + } + + break; + } + + g_shared->completed = 1; + cleanup_and_exit(0); + +exit: + printTestResult(__func__, false, "Something bad happened..."); + cleanup_and_exit(-1); +} + +static void start_idle_exit_defer_test(idle_exit_test_t test) { + pid_t pid; + int status; + + /* Reset */ + memset(g_shared, 0, sizeof(shared_mem_t)); + + pid = init_and_fork(); + if (pid == 0) { + idle_exit_deferral_test(test); + } + else { + printTestHeader(pid, "Idle exit deferral test"); + } + + /* Wait for exit */ + waitpid(pid, &status, 0); + /* Idle exit not reported on embedded */ + // wait_for_exit_event(pid, kMemorystatusKilledIdleExit); + + printTestResult("Idle exit deferral test", g_shared->completed, NULL); +} + +static void ledger_init(void) { + const char *physFootprintName = "phys_footprint"; + struct ledger_info li; + int64_t template_cnt; + struct ledger_template_info *templateInfo; + void *arg; + int i; + + /* Grab ledger entries */ + arg = (void *)(long)getpid(); + if (ledger(LEDGER_INFO, arg, (caddr_t)&li, NULL) < 0) { + exit(-1); + } + + g_ledger_count = template_cnt = li.li_entries; + + templateInfo = malloc(template_cnt * sizeof (struct ledger_template_info)); + if (templateInfo == NULL) { + exit (-1); + } + + if (!(ledger(LEDGER_TEMPLATE_INFO, (caddr_t)templateInfo, (caddr_t)&template_cnt, NULL) < 0)) { + for (i = 0; i < template_cnt; i++) { + if (!strncmp(templateInfo[i].lti_name, physFootprintName, strlen(physFootprintName))) { + g_footprint_index = i; + break; + } + } + } + + free(templateInfo); +} + +static void run_tests(const char *path) { + /* Embedded-only */ +#if TARGET_OS_EMBEDDED + start_jetsam_test(kSimpleJetsamTest, "Simple munch"); + start_jetsam_test(kHighwaterJetsamTest, "Highwater munch"); + start_jetsam_test(kPressureJetsamTestBG, "Background pressure munch"); + start_jetsam_test(kPressureJetsamTestFG, "Foreground Pressure munch"); + start_jetsam_test_background(path); + start_freeze_test(); + start_priority_test(); + start_fs_priority_test(); +#else +#pragma unused(path) +#endif + + /* Generic */ + start_general_sanity_test(); + start_list_validation_test(); + start_idle_exit_defer_test(kDeferTimeoutCleanTest); + start_idle_exit_defer_test(kDeferTimeoutDirtyTest); + start_idle_exit_defer_test(kCancelTimeoutCleanTest); + start_idle_exit_defer_test(kCancelTimeoutDirtyTest); +} + +#if TARGET_OS_EMBEDDED + +static void +sigterm(int sig) +{ + /* Reload crash reporter job */ + enable_crashreporter(); + + /* Reset signal handlers and re-raise signal */ + signal(SIGTERM, SIG_DFL); + signal(SIGINT, SIG_DFL); + + kill(getpid(), sig); +} + +#endif + +int main(int argc, char **argv) +{ + pthread_mutexattr_t attr; + pthread_condattr_t cattr; + size_t size; +#if TARGET_OS_EMBEDDED + struct sigaction sa; +#endif + + /* Must be run as root for priority retrieval */ + if (getuid() != 0) { + fprintf(stderr, "%s must be run as root.\n", getprogname()); + exit(EXIT_FAILURE); + } + +#if TARGET_OS_EMBEDDED + /* Spawn test */ + if ((argc == 2) && !strcmp(argv[1], "-s")) { + spawn_test(); + } + + sa.sa_flags = 0; + sa.sa_handler = sigterm; + sigemptyset(&sa.sa_mask); + + /* Ensure we can reinstate CrashReporter on exit */ + sigaction(SIGTERM, &sa, NULL); + sigaction(SIGINT, &sa, NULL); + + /* Unload */ + disable_crashreporter(); + + /* Flush the jetsam snapshot */ + verify_snapshot(-1, 0, 0, 0, FALSE); +#endif + + /* Memory */ + size = sizeof(g_physmem); + if (sysctlbyname("hw.physmem", &g_physmem, &size, NULL, 0) != 0 || !g_physmem) { + printTestResult(__func__, false, "Failed to retrieve system memory"); + cleanup_and_exit(-1); + } + + /* Ledger; default limit applies to this process, so grab it here */ + ledger_init(); + if ((-1 == g_ledger_count) || (-1 == g_footprint_index) || (false == get_ledger_info(getpid(), NULL, &g_per_process_limit))) { + printTestResult("setup", false, "Unable to init ledger!\n"); + cleanup_and_exit(-1); + } + + /* Rescale to MB */ + g_per_process_limit /= (1024 * 1024); + + /* Shared memory */ + g_shared = mmap(NULL, sizeof(shared_mem_t), PROT_WRITE|PROT_READ, MAP_ANON|MAP_SHARED, 0, 0); + if (!g_shared) { + printTestResult(__func__, false, "Failed mmap"); + cleanup_and_exit(-1); + } + + pthread_mutexattr_init(&attr); + pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED ); + + pthread_condattr_init(&cattr); + pthread_condattr_setpshared(&cattr, PTHREAD_PROCESS_SHARED); + + if (pthread_mutex_init(&g_shared->mutex, &attr) || pthread_cond_init(&g_shared->cv, &cattr)) { + printTestResult("setup", false, "Unable to init condition variable!\n"); + cleanup_and_exit(-1); + } + + run_tests(argv[0]); + + /* Teardown */ + pthread_mutex_destroy(&g_shared->mutex); + pthread_cond_destroy(&g_shared->cv); + + pthread_mutexattr_destroy(&attr); + pthread_condattr_destroy(&cattr); + +#if TARGET_OS_EMBEDDED + /* Reload crash reporter */ + enable_crashreporter(); +#endif + + return 0; +} diff --git a/tools/tests/perf_index/Makefile b/tools/tests/perf_index/Makefile new file mode 100644 index 000000000..ae8f3a683 --- /dev/null +++ b/tools/tests/perf_index/Makefile @@ -0,0 +1,42 @@ +SDKROOT ?= / +Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO) +CC:=xcrun -sdk "$(SDKROOT)" cc +CFLAGS:=-c -Wall -pedantic +OPTIMIZATION:=-Os +LDFLAGS= + +SRCROOT?=$(shell /bin/pwd) +DSTROOT?=$(shell /bin/pwd) +OBJROOT?=$(shell /bin/pwd) + +SOURCES:=main.c stress_cpu.c stress_memory.c stress_syscall.c stress_fault.c md5.c stress_file_create.c stress_file_write.c stress_file_read.c stress_file_local.c stress_file_ram.c iperf.c compile.c stress_general.c +SOURCE_PATHS:=$(addprefix $(SRCROOT)/,$(SOURCES)) +OBJECTS:=$(addprefix $(OBJROOT)/,$(SOURCES:.c=.o)) +EXECUTABLE=perf_index + +ifdef RC_ARCHS +ARCHS:=$(RC_ARCHS) +else +ifeq ($(ARCHS),) +ifeq "$(Embedded)" "YES" +ARCHS:=armv7 armv7s +else +ARCHS:=x86_64 i386 +endif +endif +endif + +CFLAGS += $(patsubst %, -arch %, $(ARCHS)) +LDFLAGS += $(patsubst %, -arch %, $(ARCHS)) + +all: $(SOURCE_PATHS) $(EXECUTABLE) + +$(EXECUTABLE): $(OBJECTS) + $(CC) $(LDFLAGS) $(OBJECTS) -o $(DSTROOT)/$@ + +$(OBJROOT)/%.o: $(SRCROOT)/%.c + $(CC) $(CFLAGS) $(OPTIMIZATION) $< -o $@ + +clean: + rm -f *.o + rm -f $(EXECUTABLE) diff --git a/tools/tests/perf_index/compile.c b/tools/tests/perf_index/compile.c new file mode 100644 index 000000000..a43159895 --- /dev/null +++ b/tools/tests/perf_index/compile.c @@ -0,0 +1,34 @@ +#include +#include "perf_index.h" +#include + +static const char *src_dst = "/tmp/perf_index_compile_code"; +static const char *src_root = "/Network/Servers/xs1/release/Software/Zin/Projects/xnu/xnu-2050.7.9"; + +const stress_test_t compile_test = {"compile", &compile_init, &compile, &compile_cleanup, &no_validate}; + +DECL_INIT(compile_init) { + char *cmd; + const char *src = src_root; + if(test_argc >= 1) + src = test_argv[0]; + assert(asprintf(&cmd, "ditto \"%s\" \"%s\"", src, src_dst) >= 0); + assert(system(cmd) == 0); + free(cmd); +} + +DECL_CLEANUP(compile_cleanup) { + char *cmd; + assert(asprintf(&cmd, "rm -rf \"%s\"", src_dst) >= 0); + assert(system(cmd) == 0); + free(cmd); +} + +DECL_TEST(compile) { + char *cmd; + if(thread_id == 0) { + assert(asprintf(&cmd, "make -C \"%s\" MAKEJOBS=-j%d", src_dst, num_threads) >= 0); + assert(system(cmd) == 0); + free(cmd); + } +} diff --git a/tools/tests/perf_index/iperf.c b/tools/tests/perf_index/iperf.c new file mode 100644 index 000000000..a8d0f3a21 --- /dev/null +++ b/tools/tests/perf_index/iperf.c @@ -0,0 +1,16 @@ +#include +#include "perf_index.h" +#include + +const stress_test_t iperf_test = {"iperf", &stress_general_init, &iperf, &stress_general_cleanup, &validate_iperf}; + +DECL_VALIDATE(validate_iperf) { + return (test_argc >= 1); +} + +DECL_TEST(iperf) { + char *cmd; + assert(asprintf(&cmd, "iperf -c \"%s\" -n %lld > /dev/null", test_argv[0], length) >= 0); + assert(system(cmd) == 0); + free(cmd); +} diff --git a/tools/tests/perf_index/main.c b/tools/tests/perf_index/main.c new file mode 100644 index 000000000..fed6c205f --- /dev/null +++ b/tools/tests/perf_index/main.c @@ -0,0 +1,259 @@ +#include +#include +#include +#include +#include "perf_index.h" +#include +#include +#include + +#define CONTROL_PORT 17694 + +static const stress_test_t *stress_tests[] = +{&cpu_test, &memory_test, &syscall_test, &fault_test, &zfod_test, + &file_local_create_test, &file_local_write_test, &file_local_read_test, + &file_ram_create_test, &file_ram_read_test, &file_ram_write_test, &iperf_test, + &compile_test +}; + +static int num_threads; +static long long all_len; +static int test_type; +static const char *control_host = NULL; +static const char **test_argv; +static int test_argc; +struct in_addr control_host_addr; +int control_sock; +const char remote_str[] = "remote"; +const char ready_msg[] = "Ready"; +const char done_msg[] = "Done"; + +static pthread_cond_t threads_running_cvar; +static pthread_cond_t start_cvar; +static int thread_count; +static pthread_mutex_t count_lock; + +static void usage() { + int i; + fprintf(stderr, "usage: perf_index remote server\n" + "or\n" + "usage: pref_index type threads size [args]\n\n" + "where type is one of:\n"); + for(i=0; iname); + } + fprintf(stderr, "\n"); + exit(1); +} + +static int validate_args(int argc, const char **argv) { + int i; + int ret; + int found = 0; + + if(argc < 3) { + return -1; + } + if(argc==3 && strcmp(argv[1], remote_str) == 0) + return 0; + + + if(argc < 4) + return -1; + + ret = -1; + for(i=0; iname) == 0) { + ret = i; + found = 1; + break; + } + } + + if(!found) + return -1; + + if(stress_tests[i]->validate(argc-4, argv+4)) + return ret; + else + return -1; +} + +int host_to_addr(const char *hostname, struct in_addr *addr) { + struct addrinfo *info; + int err; + if((err = getaddrinfo(hostname, NULL, NULL, &info)) != 0) { + return -1; + } + *addr = ((struct sockaddr_in*)info->ai_addr)->sin_addr; + freeaddrinfo(info); + return 0; +} + +static void parse_args(int argc, const char **argv); + +static void read_params_from_server(void) { + struct sockaddr_in addr; + char readbuff[1024]; + int zerocount = 0; + ssize_t offset = 0; + ssize_t recv_count; + ssize_t i; + const char **newargv = malloc(sizeof(char*) * 4); + assert(newargv != NULL); + + if(host_to_addr(control_host, &control_host_addr)<0) { + fprintf(stderr, "Could not resolve: %s\n", control_host); + exit(2); + } + + control_sock = socket(PF_INET, SOCK_STREAM, 0); + assert(control_sock != -1); + addr.sin_family = AF_INET; + addr.sin_port = htons(CONTROL_PORT); + addr.sin_addr = control_host_addr; + bzero(addr.sin_zero, sizeof addr.sin_zero); + if(connect(control_sock, (struct sockaddr *)&addr, sizeof(struct sockaddr)) == -1) { + fprintf(stderr, "Failed to connect to host: %s\n", control_host); + exit(3); + } + + while(offset=2 && readbuff[offset-1] == '\0' && readbuff[offset-2] == '\0') + break; + } + if(zerocount < 3) { + fprintf(stderr, "Received invalid parameters"); + exit(4); + } + + parse_args(zerocount+1, newargv); +} + +static void parse_args(int argc, const char **argv) { + test_type = validate_args(argc, argv); + if(test_type < 0) + usage(); + if(strcmp(argv[1], remote_str) == 0) { + control_host = argv[2]; + read_params_from_server(); + } + else { + num_threads = strtoimax(argv[2], NULL, 10); + all_len = strtoll(argv[3], NULL, 10); + test_argc = argc - 4; + test_argv = argv + 4; + } +} + +static void *stress_loop(void *data) { + int my_index = (int)data; + long long work_size = all_len / num_threads; + int work_remainder = all_len % num_threads; + + if(work_remainder > my_index) { + work_size++; + } + + pthread_mutex_lock(&count_lock); + thread_count++; + if(thread_count == num_threads) + pthread_cond_signal(&threads_running_cvar); + pthread_cond_wait(&start_cvar, &count_lock); + pthread_mutex_unlock(&count_lock); + stress_tests[test_type]->stress(my_index, num_threads, work_size, test_argc, test_argv); + return NULL; +} + +void start_timer(struct timeval *tp) { + gettimeofday(tp, NULL); +} + +void end_timer(struct timeval *tp) { + struct timeval tend; + gettimeofday(&tend, NULL); + if(tend.tv_usec >= tp->tv_usec) { + tp->tv_sec = tend.tv_sec - tp->tv_sec; + tp->tv_usec = tend.tv_usec - tp->tv_usec; + } + else { + tp->tv_sec = tend.tv_sec - tp->tv_sec - 1; + tp->tv_usec = tend.tv_usec - tp->tv_usec + 1000000; + } +} + +void print_timer(struct timeval *tp) { + printf("%ld.%06d", tp->tv_sec, tp->tv_usec); +} + +void wait_start(void) { + char readbuff[1024]; + if(control_host != NULL) { + send(control_sock, ready_msg, strlen(ready_msg), 0); + while(recv(control_sock, readbuff, sizeof(readbuff), 0)>0); + } +} + +void done(void) { + send(control_sock, done_msg, strlen(done_msg), 0); +} + +int main(int argc, const char **argv) { + int thread_index; + pthread_t *threads; + parse_args(argc, argv); + struct timeval timer; + + stress_tests[test_type]->init(num_threads, all_len, test_argc, test_argv); + pthread_cond_init(&threads_running_cvar, NULL); + pthread_cond_init(&start_cvar, NULL); + pthread_mutex_init(&count_lock, NULL); + thread_count = 0; + + threads = (pthread_t*)malloc(sizeof(pthread_t)*num_threads); + for(thread_index = 0; thread_index < num_threads; thread_index++) { + assert(pthread_create(&threads[thread_index], NULL, stress_loop, (void*)thread_index) == 0); + } + + pthread_mutex_lock(&count_lock); + if(thread_count != num_threads) + pthread_cond_wait(&threads_running_cvar, &count_lock); + pthread_mutex_unlock(&count_lock); + + wait_start(); + + start_timer(&timer); + pthread_cond_broadcast(&start_cvar); + for(thread_index = 0; thread_index < num_threads; thread_index++) { + pthread_join(threads[thread_index], NULL); + } + end_timer(&timer); + done(); + + pthread_mutex_destroy(&count_lock); + pthread_cond_destroy(&start_cvar); + pthread_cond_destroy(&threads_running_cvar); + + stress_tests[test_type]->cleanup(num_threads, all_len); + + print_timer(&timer); + printf("\n"); + + return 0; +} diff --git a/tools/tests/perf_index/md5.c b/tools/tests/perf_index/md5.c new file mode 100644 index 000000000..cbc9bd74d --- /dev/null +++ b/tools/tests/perf_index/md5.c @@ -0,0 +1,154 @@ +/* + * ccmd5_ltc.c + * corecrypto + * + * Created by Fabrice Gautier on 12/3/10. + * Copyright 2010,2011 Apple Inc. All rights reserved. + * + */ + +#include +#include + +#define CCMD5_BLOCK_SIZE 64 + +#define F(x,y,z) (z ^ (x & (y ^ z))) +#define G(x,y,z) (y ^ (z & (y ^ x))) +#define H(x,y,z) (x^y^z) +#define I(x,y,z) (y^(x|(~z))) + +#define CC_ROLc(X,s) (((X) << (s)) | ((X) >> (32 - (s)))) + +#define FF(a,b,c,d,M,s,t) \ +a = (a + F(b,c,d) + M + t); a = CC_ROLc(a, s) + b; + +#define GG(a,b,c,d,M,s,t) \ +a = (a + G(b,c,d) + M + t); a = CC_ROLc(a, s) + b; + +#define HH(a,b,c,d,M,s,t) \ +a = (a + H(b,c,d) + M + t); a = CC_ROLc(a, s) + b; + +#define II(a,b,c,d,M,s,t) \ +a = (a + I(b,c,d) + M + t); a = CC_ROLc(a, s) + b; + +static void md5_compress(uint32_t *state, unsigned long nblocks, const void *in) +{ + uint32_t i, W[16], a, b, c, d; + uint32_t *s = state; + const unsigned char *buf = in; + + while(nblocks--) { + + /* copy the state into 512-bits into W[0..15] */ + for (i = 0; i < 16; i++) { + W[i] = ((uint32_t*)buf)[i]; + } + + /* copy state */ + a = s[0]; + b = s[1]; + c = s[2]; + d = s[3]; + + FF(a,b,c,d,W[0],7,0xd76aa478) + FF(d,a,b,c,W[1],12,0xe8c7b756) + FF(c,d,a,b,W[2],17,0x242070db) + FF(b,c,d,a,W[3],22,0xc1bdceee) + FF(a,b,c,d,W[4],7,0xf57c0faf) + FF(d,a,b,c,W[5],12,0x4787c62a) + FF(c,d,a,b,W[6],17,0xa8304613) + FF(b,c,d,a,W[7],22,0xfd469501) + FF(a,b,c,d,W[8],7,0x698098d8) + FF(d,a,b,c,W[9],12,0x8b44f7af) + FF(c,d,a,b,W[10],17,0xffff5bb1) + FF(b,c,d,a,W[11],22,0x895cd7be) + FF(a,b,c,d,W[12],7,0x6b901122) + FF(d,a,b,c,W[13],12,0xfd987193) + FF(c,d,a,b,W[14],17,0xa679438e) + FF(b,c,d,a,W[15],22,0x49b40821) + GG(a,b,c,d,W[1],5,0xf61e2562) + GG(d,a,b,c,W[6],9,0xc040b340) + GG(c,d,a,b,W[11],14,0x265e5a51) + GG(b,c,d,a,W[0],20,0xe9b6c7aa) + GG(a,b,c,d,W[5],5,0xd62f105d) + GG(d,a,b,c,W[10],9,0x02441453) + GG(c,d,a,b,W[15],14,0xd8a1e681) + GG(b,c,d,a,W[4],20,0xe7d3fbc8) + GG(a,b,c,d,W[9],5,0x21e1cde6) + GG(d,a,b,c,W[14],9,0xc33707d6) + GG(c,d,a,b,W[3],14,0xf4d50d87) + GG(b,c,d,a,W[8],20,0x455a14ed) + GG(a,b,c,d,W[13],5,0xa9e3e905) + GG(d,a,b,c,W[2],9,0xfcefa3f8) + GG(c,d,a,b,W[7],14,0x676f02d9) + GG(b,c,d,a,W[12],20,0x8d2a4c8a) + HH(a,b,c,d,W[5],4,0xfffa3942) + HH(d,a,b,c,W[8],11,0x8771f681) + HH(c,d,a,b,W[11],16,0x6d9d6122) + HH(b,c,d,a,W[14],23,0xfde5380c) + HH(a,b,c,d,W[1],4,0xa4beea44) + HH(d,a,b,c,W[4],11,0x4bdecfa9) + HH(c,d,a,b,W[7],16,0xf6bb4b60) + HH(b,c,d,a,W[10],23,0xbebfbc70) + HH(a,b,c,d,W[13],4,0x289b7ec6) + HH(d,a,b,c,W[0],11,0xeaa127fa) + HH(c,d,a,b,W[3],16,0xd4ef3085) + HH(b,c,d,a,W[6],23,0x04881d05) + HH(a,b,c,d,W[9],4,0xd9d4d039) + HH(d,a,b,c,W[12],11,0xe6db99e5) + HH(c,d,a,b,W[15],16,0x1fa27cf8) + HH(b,c,d,a,W[2],23,0xc4ac5665) + II(a,b,c,d,W[0],6,0xf4292244) + II(d,a,b,c,W[7],10,0x432aff97) + II(c,d,a,b,W[14],15,0xab9423a7) + II(b,c,d,a,W[5],21,0xfc93a039) + II(a,b,c,d,W[12],6,0x655b59c3) + II(d,a,b,c,W[3],10,0x8f0ccc92) + II(c,d,a,b,W[10],15,0xffeff47d) + II(b,c,d,a,W[1],21,0x85845dd1) + II(a,b,c,d,W[8],6,0x6fa87e4f) + II(d,a,b,c,W[15],10,0xfe2ce6e0) + II(c,d,a,b,W[6],15,0xa3014314) + II(b,c,d,a,W[13],21,0x4e0811a1) + II(a,b,c,d,W[4],6,0xf7537e82) + II(d,a,b,c,W[11],10,0xbd3af235) + II(c,d,a,b,W[2],15,0x2ad7d2bb) + II(b,c,d,a,W[9],21,0xeb86d391) + + /* store state */ + s[0] += a; + s[1] += b; + s[2] += c; + s[3] += d; + + buf+=CCMD5_BLOCK_SIZE; + } +} + +void md5_hash(uint8_t *message, uint64_t len, uint32_t *hash) { + hash[0] = 0x67452301; + hash[1] = 0xEFCDAB89; + hash[2] = 0x98BADCFE; + hash[3] = 0x10325476; + + md5_compress(hash, len/64, message); + + uint32_t blockbuff[16]; + uint8_t *byteptr = (uint8_t*)blockbuff; + + int left = len % 64; + memcpy(byteptr, message + len-left, left); + + byteptr[left] = 0x80; + left++; + if (64 - left >= 8) + bzero(byteptr + left, 56 - left); + else { + memset(byteptr + left, 0, 64 - left); + md5_compress(hash, 1, blockbuff); + bzero(blockbuff, 56); + } + blockbuff[14] = (uint32_t)(len << 3); + blockbuff[15] = (uint32_t)(len >> 29); + md5_compress(hash, 1, blockbuff); +} diff --git a/tools/tests/perf_index/perf_index.h b/tools/tests/perf_index/perf_index.h new file mode 100644 index 000000000..7925853a5 --- /dev/null +++ b/tools/tests/perf_index/perf_index.h @@ -0,0 +1,96 @@ +#ifndef __PERF_INDEX_H_ +#define __PERF_INDEX_H_ + +#include +#include +#include +#include +#include +#include +#include + +#define DECL_VALIDATE(validatetest) int validatetest(int test_argc, const char **test_argv) +#define DECL_INIT(inittest) void inittest(int num_threads, long long length, int test_argc, const char **test_argv) +#define DECL_TEST(test) void test(int thread_id, int num_threads, long long length, int test_argc, const char **test_argv) +#define DECL_CLEANUP(cleanuptest) void cleanuptest(int num_threads, long long length) + +#define MAXPATHLEN 1024 + +typedef DECL_INIT((*init_func)); +typedef DECL_TEST((*stress_func)); +typedef DECL_CLEANUP((*cleanup_func)); +typedef DECL_VALIDATE((*validate_func)); + +typedef struct { + char *name; + init_func init; + stress_func stress; + cleanup_func cleanup; + validate_func validate; +} stress_test_t; + +extern const stress_test_t cpu_test; +extern const stress_test_t memory_test; +extern const stress_test_t syscall_test; +extern const stress_test_t fault_test; +extern const stress_test_t zfod_test; +extern const stress_test_t file_local_create_test; +extern const stress_test_t file_local_write_test; +extern const stress_test_t file_local_read_test; +extern const stress_test_t file_ram_create_test; +extern const stress_test_t file_ram_write_test; +extern const stress_test_t file_ram_read_test; +extern const stress_test_t iperf_test; +extern const stress_test_t compile_test; + +DECL_VALIDATE(no_validate); +DECL_VALIDATE(validate_iperf); + +DECL_INIT(stress_memory_init); +DECL_INIT(stress_syscall_init); +DECL_INIT(stress_fault_init); +DECL_INIT(stress_file_local_create_init); +DECL_INIT(stress_file_local_read_init); +DECL_INIT(stress_file_local_write_init); +DECL_INIT(stress_file_ram_create_init); +DECL_INIT(stress_file_ram_read_init); +DECL_INIT(stress_file_ram_write_init); +DECL_INIT(compile_init); +DECL_INIT(stress_general_init); + +DECL_TEST(stress_memory); +DECL_TEST(stress_cpu); +DECL_TEST(stress_syscall); +DECL_TEST(stress_fault); +DECL_TEST(stress_zfod); +DECL_TEST(stress_file_local_create); +DECL_TEST(stress_file_local_read); +DECL_TEST(stress_file_local_write); +DECL_TEST(stress_file_ram_create); +DECL_TEST(stress_file_ram_read); +DECL_TEST(stress_file_ram_write); +DECL_TEST(iperf); +DECL_TEST(compile); +DECL_TEST(stress_general); + +DECL_CLEANUP(stress_general_cleanup); +DECL_CLEANUP(stress_file_local_create_cleanup); +DECL_CLEANUP(stress_file_local_read_cleanup); +DECL_CLEANUP(stress_file_local_write_cleanup); +DECL_CLEANUP(stress_file_ram_create_cleanup); +DECL_CLEANUP(stress_file_ram_read_cleanup); +DECL_CLEANUP(stress_file_ram_write_cleanup); +DECL_CLEANUP(compile_cleanup); + +void stress_file_create(const char *fs_path, int thread_id, int num_threads, long long length); + +void stress_file_write_init(const char *fs_path, int num_threads, long long length); +void stress_file_write(const char *fs_path, int thread_id, int num_threads, long long length, long long max_file_size); + +void stress_file_read_init(const char *fs_path, int num_threads, long long length, long long max_file_size); +void stress_file_read(const char *fs_path, int thread_id, int num_threads, long long length, long long max_file_size); +void stress_file_read_cleanup(const char *fs_path, int num_threads, long long length); + +void md5_hash(uint8_t *message, uint64_t len, uint32_t *hash); + +#endif diff --git a/tools/tests/perf_index/stress_cpu.c b/tools/tests/perf_index/stress_cpu.c new file mode 100644 index 000000000..1e0c4b2e0 --- /dev/null +++ b/tools/tests/perf_index/stress_cpu.c @@ -0,0 +1,11 @@ +#include "perf_index.h" + +const stress_test_t cpu_test = {"cpu", &stress_general_init, &stress_cpu, &stress_general_cleanup, &no_validate}; + +DECL_TEST(stress_cpu) { + long long i; + uint32_t digest[4]; + for(i=0; i +#include + +#if TARGET_OS_EMBEDDED +#define MEMSIZE (1L<<28) +#else +#define MEMSIZE (1L<<30) +#endif + +typedef enum { + TESTZFOD, + TESTFAULT +} testtype_t; + +const stress_test_t fault_test = {"fault", &stress_fault_init, &stress_fault, &stress_general_cleanup, &no_validate}; +const stress_test_t zfod_test = {"zfod", &stress_fault_init, &stress_zfod, &stress_general_cleanup, &no_validate}; + +static char *memblock; + +DECL_INIT(stress_fault_init) { + int pgsz = getpagesize(); + memblock = (char *)mmap(NULL, MEMSIZE, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); + char *ptr; + /* make sure memory is paged */ + for(ptr = memblock; ptr +#include "perf_index.h" +#include + +void stress_file_create(const char *fs_path, int thread_id, int num_threads, long long length) { + long long i; + int fd; + char filepath[MAXPATHLEN]; + for(i=0; i=0); + close(fd); + } + for(i=0; i=0); + } +} diff --git a/tools/tests/perf_index/stress_file_local.c b/tools/tests/perf_index/stress_file_local.c new file mode 100644 index 000000000..d948caf1a --- /dev/null +++ b/tools/tests/perf_index/stress_file_local.c @@ -0,0 +1,50 @@ +#include +#include "perf_index.h" +#include + + +const stress_test_t file_local_create_test = {"file_create", &stress_file_local_create_init, &stress_file_local_create, &stress_file_local_create_cleanup, &no_validate}; +const stress_test_t file_local_write_test = {"file_write", &stress_file_local_write_init, &stress_file_local_write, &stress_file_local_write_cleanup, &no_validate}; +const stress_test_t file_local_read_test = {"file_read", &stress_file_local_read_init, &stress_file_local_read, &stress_file_local_read_cleanup, &no_validate}; + +static char fs_path[MAXPATHLEN]; + +static void setup_local_volume(void) { + snprintf(fs_path, MAXPATHLEN, "%s", "/tmp"); +} + +DECL_INIT(stress_file_local_read_init) { + setup_local_volume(); + stress_file_read_init(fs_path, num_threads, length, 0L); +} + +DECL_TEST(stress_file_local_read) { + stress_file_read(fs_path, thread_id, num_threads, length, 0L); +} + +DECL_CLEANUP(stress_file_local_read_cleanup) { + stress_file_read_cleanup(fs_path, num_threads, length); +} + +DECL_INIT(stress_file_local_write_init) { + setup_local_volume(); + stress_file_write_init(fs_path, num_threads, length); +} + +DECL_TEST(stress_file_local_write) { + stress_file_write(fs_path, thread_id, num_threads, length, 0L); +} + +DECL_CLEANUP(stress_file_local_write_cleanup) { +} + +DECL_INIT(stress_file_local_create_init) { + setup_local_volume(); +} + +DECL_TEST(stress_file_local_create) { + stress_file_create(fs_path, thread_id, num_threads, length); +} + +DECL_CLEANUP(stress_file_local_create_cleanup) { +} diff --git a/tools/tests/perf_index/stress_file_ram.c b/tools/tests/perf_index/stress_file_ram.c new file mode 100644 index 000000000..6f203bae4 --- /dev/null +++ b/tools/tests/perf_index/stress_file_ram.c @@ -0,0 +1,65 @@ +#include +#include "perf_index.h" +#include + +#define MAX_FILE_SIZE 536870912L + +const stress_test_t file_ram_create_test = {"ram_file_create", &stress_file_ram_create_init, &stress_file_ram_create, &stress_file_ram_create_cleanup, &no_validate}; +const stress_test_t file_ram_write_test = {"ram_file_write", &stress_file_ram_write_init, &stress_file_ram_write, &stress_file_ram_write_cleanup, &no_validate}; +const stress_test_t file_ram_read_test = {"ram_file_read", &stress_file_ram_read_init, &stress_file_ram_read, &stress_file_ram_read_cleanup, &no_validate}; + +static const char ramdiskname[] = "StressRamDisk"; + +static const char fs_path[MAXPATHLEN] = "/Volumes/StressRamDisk"; + +static void setup_ram_volume(void) { + char *cmd; + assert(asprintf(&cmd, "diskutil erasevolume HFS+ \"%s\" `hdiutil attach -nomount ram://1500000` >/dev/null", ramdiskname) >= 0); + assert(system(cmd) == 0); + free(cmd); +} + +static void cleanup_ram_volume(void) { + char *cmd; + assert(asprintf(&cmd, "umount -f %s >/dev/null", fs_path) >= 0); + assert(system(cmd) == 0); + free(cmd); +} + +DECL_INIT(stress_file_ram_read_init) { + setup_ram_volume(); + stress_file_read_init(fs_path, num_threads, length, MAX_FILE_SIZE); +} + +DECL_TEST(stress_file_ram_read) { + stress_file_read(fs_path, thread_id, num_threads, length, MAX_FILE_SIZE); +} + +DECL_CLEANUP(stress_file_ram_read_cleanup) { + cleanup_ram_volume(); +} + +DECL_INIT(stress_file_ram_write_init) { + setup_ram_volume(); + stress_file_write_init(fs_path, num_threads, length); +} + +DECL_TEST(stress_file_ram_write) { + stress_file_write(fs_path, thread_id, num_threads, length, MAX_FILE_SIZE); +} + +DECL_CLEANUP(stress_file_ram_write_cleanup) { + cleanup_ram_volume(); +} + +DECL_INIT(stress_file_ram_create_init) { + setup_ram_volume(); +} + +DECL_TEST(stress_file_ram_create) { + stress_file_create(fs_path, thread_id, num_threads, length); +} + +DECL_CLEANUP(stress_file_ram_create_cleanup) { + cleanup_ram_volume(); +} diff --git a/tools/tests/perf_index/stress_file_read.c b/tools/tests/perf_index/stress_file_read.c new file mode 100644 index 000000000..29096db40 --- /dev/null +++ b/tools/tests/perf_index/stress_file_read.c @@ -0,0 +1,66 @@ +#include +#include "perf_index.h" +#include + +#define MAXFILESIZE 8589934592L +#define MIN(a,b) ((a)<(b) ? (a) : (b)) + +static char readbuff[4096]; + +void stress_file_read_init(const char *fs_path, int num_threads, long long length, long long max_file_size) { + int fd; + char filepath[MAXPATHLEN]; + long long left; + size_t writelen; + + if(max_file_size == 0) + max_file_size = MAXFILESIZE; + + left = MIN(length, max_file_size/num_threads); + + snprintf(filepath, sizeof(filepath), "%s/file_read", fs_path); + fd = open(filepath, O_CREAT | O_EXCL | O_WRONLY, 0644); + assert(fd > 0); + bzero(readbuff, sizeof(readbuff)); + + while(left > 0) { + writelen = sizeof(readbuff) < left ? sizeof(readbuff) : left; + assert(write(fd, readbuff, writelen) == writelen); + left -= writelen; + } +} + +void stress_file_read(const char *fs_path, int thread_id, int num_threads, long long length, long long max_file_size) { + long long left; + size_t file_offset = 0; + int readlen; + int fd; + char filepath[MAXPATHLEN]; + long long filesize; + + + if(max_file_size == 0) + max_file_size = MAXFILESIZE; + filesize = MIN(length, max_file_size/num_threads); + + snprintf(filepath, sizeof(filepath), "%s/file_read", fs_path); + fd = open(filepath, O_RDONLY); + assert(fd > 0); + for(left=length; left>0;) { + readlen = sizeof(readbuff) < left ? sizeof(readbuff) : left; + if(file_offset+readlen > filesize) { + lseek(fd, 0, SEEK_SET); + file_offset = 0; + continue; + } + assert(read(fd, readbuff, readlen) == readlen); + left -= readlen; + file_offset += readlen; + } +} + +void stress_file_read_cleanup(const char *fs_path, int num_threads, long long length) { + char filepath[MAXPATHLEN]; + snprintf(filepath, sizeof(filepath), "%s/file_read", fs_path); + assert(unlink(filepath)>=0); +} diff --git a/tools/tests/perf_index/stress_file_write.c b/tools/tests/perf_index/stress_file_write.c new file mode 100644 index 000000000..fc87fda10 --- /dev/null +++ b/tools/tests/perf_index/stress_file_write.c @@ -0,0 +1,46 @@ +#include +#include "perf_index.h" +#include + +#define MAXFILESIZE 8589934592L + +static int *fds = NULL; +static char writebuff[4096]; + +void stress_file_write_init(const char *fs_path, int num_threads, long long length) { + int i; + char filepath[MAXPATHLEN]; + + if(fds == NULL) + fds = (int*)malloc(sizeof(int)*num_threads); + for(i=0; i 0); + } + bzero(writebuff, sizeof(writebuff)); +} + +void stress_file_write(const char *fs_path, int thread_id, int num_threads, long long length, long long max_file_size) { + long long left; + size_t file_offset = 0; + int writelen; + char filepath[MAXPATHLEN]; + int fd = fds[thread_id]; + + if(max_file_size == 0) + max_file_size = MAXFILESIZE; + + for(left=length; left>0;) { + writelen = sizeof(writebuff) < left ? sizeof(writebuff) : left; + assert(write(fd, writebuff, writelen) == writelen); + left -= writelen; + file_offset += writelen; + if(file_offset>max_file_size/num_threads) { + lseek(fd, 0, SEEK_SET); + file_offset = 0; + } + } + snprintf(filepath, sizeof(filepath), "%s/file_write-%d", fs_path, thread_id); + assert(unlink(filepath)>=0); +} diff --git a/tools/tests/perf_index/stress_general.c b/tools/tests/perf_index/stress_general.c new file mode 100644 index 000000000..90e5e39ce --- /dev/null +++ b/tools/tests/perf_index/stress_general.c @@ -0,0 +1,10 @@ +#include "perf_index.h" +DECL_VALIDATE(no_validate) { + return 1; +} +DECL_INIT(stress_general_init) { +} +DECL_TEST(stress_general_test) { +} +DECL_CLEANUP(stress_general_cleanup) { +} diff --git a/tools/tests/perf_index/stress_memory.c b/tools/tests/perf_index/stress_memory.c new file mode 100644 index 000000000..0d6c4cad9 --- /dev/null +++ b/tools/tests/perf_index/stress_memory.c @@ -0,0 +1,62 @@ +#include "perf_index.h" +#include +#include + +static char *memblock; +static size_t memsize; + +const stress_test_t memory_test = {"memory", &stress_memory_init, &stress_memory, &stress_general_cleanup, &no_validate}; + +size_t hw_memsize(void) { + int mib[2]; + size_t len; + size_t my_memsize; + mib[0] = CTL_HW; + mib[1] = HW_MEMSIZE; + len = sizeof(my_memsize); + sysctl(mib, 2, &my_memsize, &len, NULL, 0); + return my_memsize; +} + +DECL_INIT(stress_memory_init) { + char *memblockfiller; + long long i; + int pgsz = getpagesize(); + + /* Heuristic: use half the physical memory, hopefully this should work on all + * devices. We use the amount of physical memory, rather than some softer + * metric, like amount of free memory, so that the memory allocated is always + * consistent for a given device. + */ + memsize = hw_memsize()/2; + memblock = (char*)malloc(memsize); + memblockfiller = memblock; + + /* Do this manually, to make sure everything is paged in */ + for(i=0; i0) { + copy_len = region_len < left ? region_len : left; + memcpy(memblock+region_start+memsize/2, memblock+region_start, copy_len); + left -= copy_len; + } +} diff --git a/tools/tests/perf_index/stress_syscall.c b/tools/tests/perf_index/stress_syscall.c new file mode 100644 index 000000000..c53b8afe7 --- /dev/null +++ b/tools/tests/perf_index/stress_syscall.c @@ -0,0 +1,13 @@ +#include "perf_index.h" + +const stress_test_t syscall_test = {"syscall", &stress_syscall_init, &stress_syscall, &stress_general_cleanup, &no_validate}; + +DECL_INIT(stress_syscall_init) { +} + +DECL_TEST(stress_syscall) { + long long i; + for(i=0; i 0: + rl, _, _ = select.select(client_sockets_set, [], []) + for client_socket in rl: + sentmsg = client_socket.recv(1024) + if sentmsg == msg: + client_sockets_set.remove(client_socket) + +def main(num_clients, test_type, num_threads, job_size, args): + client_sockets = [] + control_socket = socket.socket() + control_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + control_socket.bind(("", _CONTROL_PORT)) + control_socket.listen(num_clients) + while(len(client_sockets) diff --git a/tools/tests/unit_tests/Makefile b/tools/tests/unit_tests/Makefile new file mode 100644 index 000000000..08e4fd429 --- /dev/null +++ b/tools/tests/unit_tests/Makefile @@ -0,0 +1,237 @@ +#!/usr/bin/make +# This file lists all individual tests added over time to test various functionality. +# The Raft TestBot framework runs the tests based on the targets listed in this file. +# Please review the following guidelines to ensure successful execution of your test case +# +# == Steps followed by Raft testbot == +# * find target name from this Makefile. A target is identified by the string : related files +# * build the target with the command "make ". The current dir is same as of this Makefile +# * The test is executed with following commands "cd BUILD/dst/; ./ " +# * The exit value of is logged. (0 = PASS and = FAIL) +# * remove the BUILD directory +# +# == Note about SDKROOT == +# The environment variable SDKROOT must be passed to select the appropriate SDK. +# x86/OSX is the default, so to build for iphone, you must: +# +# 64-bit: $make SDKROOT=iphoneos.internal +# 32-bit: $make SDKROOT=iphoneos.internal ARCH_CONFIGS="armv7" +# +# == How to add a new test == +# * Create a test directory based on radar #. (for example test_) +# * Put test specific files in the directory. +# * Add an entry in this Makefile (reserved targetnames are {run_tests.sh, xnu_target_executables.list, build_*.log}) +# targetname: testdir/programname.c +# +# +# * Check if your target name is listed in the right configurations. +# $make list_targets +# optionally you can pass SDKROOT=iphoneos|iphoneos.internal|macosx|macosx.internal and verify +# the built binary is of right arch and config. +# +# * verify that your test setup works by running the following commands +# $make +# $cd BUILD/dst/ +# $./targetname +# +# == Easy Option == +# look at some example targets in this file and replicate that :) +# + +ifneq ($(SRCROOT),) +SRCDIR=$(SRCROOT) +else +SRCDIR?=$(shell /bin/pwd) +endif + +ifneq ($(DSTROOT),) +BUILDDIR?=$(DSTROOT) +else +BUILDDIR?=$(SRCDIR)/BUILD/dst +endif + +# make sure we have a build directory +$(shell [ -d "$(BUILDDIR)" ] || mkdir -p $(BUILDDIR)) + +SDKROOT ?= / +TARGETSDK:=$(SDKROOT) + + +# setup the TARGETSDK and SDKROOT variables +ifeq (/,$(SDKROOT)) +SDKROOTPATH=/ +else +SDKROOTPATH:=$(shell /usr/bin/xcodebuild -sdk $(TARGETSDK) -version Path) +endif + +ifeq ($(SDKROOTPATH),) +$(error "Unable to find any SDKROOT on host. Exiting") +endif + +PRIVATE_INCLUDES = $(SDKROOTPATH)/System/Library/Frameworks/System.framework/PrivateHeaders + +#arch configs if not provided +ifdef RC_ARCHS +ARCH_CONFIGS:=$(RC_ARCHS) +endif +ifeq ($(ARCH_CONFIGS),) +ARCH_CONFIGS:= +ifeq (iPhone,$(findstring iPhone,$(SDKROOTPATH))) +ARCH_CONFIGS:=-arch armv7 +endif + +else +TMP_ARCHCONF:=$(foreach argarch,$(ARCH_CONFIGS),-arch $(argarch) ) +override ARCH_CONFIGS:=$(TMP_ARCHCONF) +endif + + +#setup the compiler flags. +ifeq (iPhone,$(findstring iPhone,$(SDKROOTPATH))) +CFLAGS=-I$(BUILDDIR) -I. -isysroot $(SDKROOTPATH) $(ARCH_CONFIGS) +CC=xcrun -sdk $(TARGETSDK) clang +MIG=xcrun -sdk $(TARGETSDK) mig +XCODEBUILD=xcodebuild -sdk iphoneos.internal $(ARCH_CONFIGS) +CODESIGN=$(shell xcrun -sdk $(TARGETSDK) -find codesign) +CODESIGN_ALLOCATE=$(shell xcrun -sdk $(TARGETSDK) -find codesign_allocate) +TARGET_NAME=ios +else +#Compiler flags for macosx +CFLAGS=-I$(BUILDDIR) -I. $(ARCH_CONFIGS) +CC=clang +MIG=xcrun mig +XCODEBUILD=xcodebuild +CODESIGN=codesign +CODESIGN_ALLOCATE=$(shell xcrun -find codesign_allocate) +TARGET_NAME=osx +endif + +#Flags that define the environment +TARGETOSVERS:=$(shell /usr/bin/xcodebuild -sdk $(TARGETSDK) -version ProductVersion) +TARGETOSBUILDVERS:=$(shell /usr/bin/xcodebuild -sdk $(TARGETSDK) -version ProductBuildVersion) +SDKTARGET_STR:=$(subst .,_,$(TARGETSDK)) +MORECFLAGS=-D TARGET_SDK_$(SDKTARGET_STR)=1 -D TARGET_OS_VERS=\"$(TARGETOSVERS)\" -D TARGET_OS_BUILD_VERS=\"$(TARGETOSBUILDVERS)\" + +#special recipe for special targets: list_targets and clean +define _sed_target_extract_script +/^$$/ { n +/^[^ ]*:/p +} +endef +export sed_target_extract_script=$(_sed_target_extract_script) +all: + @ for TARGET in `make list_targets`; do \ + if [ $$TARGET != all ]; then \ + make $$TARGET DSTROOT="$(BUILDDIR)/$$TARGET"; \ + fi \ + done +list_targets: + @ make -rpn | sed -n -e "$$sed_target_extract_script" | cut -d':' -f1 | grep -v '^clean' | grep -v '^list_targets' + +clean: + rm -fr ./BUILD/ +# == List of targets for test cases == +#Note: target name should be same as the executable in $(BUILDDIR) +#And: target name has to be seperate from source directory name. Using "_src" suffix is a good idea. +sampletest: sampletest.c + $(CC) -o $(BUILDDIR)/$@ $^ $(CFLAGS) $(MORECFLAGS) + +pipe_test_10807398: pipe_test_10807398_src/parent.c pipe_test_10807398_src/child.c + $(CC) -o $(BUILDDIR)/$@ pipe_test_10807398_src/parent.c $(CFLAGS) + $(CC) -o $(BUILDDIR)/child pipe_test_10807398_src/child.c $(CFLAGS) + +pipes_fill_procinfo_11179336: pipes_fill_procinfo_11179336.c + $(CC) -o $(BUILDDIR)/$@ pipes_fill_procinfo_11179336.c $(CFLAGS) + +test_wq_exit_race_panic_10970548: test_wq_exit_race_panic_10970548.c + $(CC) -o $(BUILDDIR)/$@ test_wq_exit_race_panic_10970548.c $(CFLAGS) + +ptrace_tests_10767133: ptrace_tests_10767133_src/ptrace_tests_10767133.c + $(CC) -O0 -o $(BUILDDIR)/ptrace_tests_10767133 ptrace_tests_10767133_src/ptrace_tests_10767133.c $(CFLAGS) -Wall + +ptrace_test_12507045: ptrace_test_12507045_src/ptrace_test.c + $(CC) -O0 -o $(BUILDDIR)/ptrace_test_12507045 $< $(CFLAGS) + +clock_types_6368156: clock_types_6368156.c + $(CC) -o $(BUILDDIR)/$@ $^ $(CFLAGS) + +semctl_test_8534495: semctl_test_8534495_src/semctl_test_8534495.c + $(CC) -o $(BUILDDIR)/semctl_test_8534495 semctl_test_8534495_src/semctl_test_8534495.c $(CFLAGS) + +ptcwd_test_11269991: ptcwd_test_11269991_src/ptcwd_test_11269991.c + $(CC) -o $(BUILDDIR)/ptcwd_test_11269991 ptcwd_test_11269991_src/ptcwd_test_11269991.c $(CFLAGS) + +sprace_test_11891562: sprace_test_11891562_src/sprace_test_11891562.c + $(CC) -o $(BUILDDIR)/sprace_test_11891562 sprace_test_11891562_src/sprace_test_11891562.c $(CFLAGS) + +guarded_fd_tests_11746236: guarded_fd_tests_11746236_src/mach_exc.defs guarded_fd_tests_11746236_src/guarded_test_framework.c guarded_fd_tests_11746236_src/guarded_test.c + $(MIG) $(CFLAGS) \ + -user $(BUILDDIR)/mach_excUser.c \ + -server $(BUILDDIR)/mach_excServer.c \ + -header $(BUILDDIR)/mach_exc.h \ + guarded_fd_tests_11746236_src/mach_exc.defs + $(CC) -o $(BUILDDIR)/guarded_fd_tests_11746236 \ + guarded_fd_tests_11746236_src/guarded_test_framework.c \ + $(BUILDDIR)/mach_excServer.c $(CFLAGS) -I$(PRIVATE_INCLUDES) -I$(BUILDDIR) + $(CC) -o $(BUILDDIR)/guarded_test \ + guarded_fd_tests_11746236_src/guarded_test.c \ + -I$(PRIVATE_INCLUDES) $(CFLAGS) + +thread_get_state_11918811: thread_get_state_11918811_src/thread_get_state.c + $(MIG) $(CFLAGS) \ + -sheader $(BUILDDIR)/excserver.h \ + -server $(BUILDDIR)/excserver.c \ + -header /dev/null -user /dev/null \ + thread_get_state_11918811_src/excserver.defs + $(CC) -o $(BUILDDIR)/thread_get_state_11918811 \ + thread_get_state_11918811_src/thread_get_state.c \ + $(BUILDDIR)/excserver.c \ + $(CFLAGS) + +fcntlrangecheck_tests_11202484: fcntlrangecheck_tests_11202484_src/fcntlrangecheck_tests_11202484.c + $(CC) -o $(BUILDDIR)/fcntlrangecheck_tests_11202484 fcntlrangecheck_tests_11202484_src/fcntlrangecheck_tests_11202484.c $(CFLAGS) + +test_waitqlocktry_12053360: test_waitqlocktry_12053360.c + $(CC) -o $(BUILDDIR)/test_waitqlocktry_12053360 test_waitqlocktry_12053360.c $(CFLAGS) + +guarded_mach_port_tests_11178535: guarded_mach_port_tests_11178535_src/mach_exc.defs guarded_mach_port_tests_11178535_src/guarded_test_framework.c guarded_mach_port_tests_11178535_src/guarded_test.c + $(MIG) $(CFLAGS) \ + -user $(BUILDDIR)/mach_excUser.c \ + -server $(BUILDDIR)/mach_excServer.c \ + -header $(BUILDDIR)/mach_exc.h \ + guarded_mach_port_tests_11178535_src/mach_exc.defs + $(CC) -o $(BUILDDIR)/guarded_mach_port_tests_11178535 \ + guarded_mach_port_tests_11178535_src/guarded_test_framework.c \ + $(BUILDDIR)/mach_excServer.c $(CFLAGS) -I$(PRIVATE_INCLUDES) -I$(BUILDDIR) + $(CC) -o $(BUILDDIR)/guarded_mp_test \ + guarded_mach_port_tests_11178535_src/guarded_test.c \ + -I$(PRIVATE_INCLUDES) $(CFLAGS) + +cpu_monitor_tests_11646922: cpu_monitor_tests_11646922_src/cpumon_test_framework.c + $(MIG) $(CFLAGS) \ + -sheader $(BUILDDIR)/excserver.h \ + -server $(BUILDDIR)/excserver.c \ + -header /dev/null -user /dev/null \ + cpu_monitor_tests_11646922_src/mach_exc.defs + $(CC) -o $(BUILDDIR)/cpu_monitor_tests_11646922 \ + cpu_monitor_tests_11646922_src/cpumon_test_framework.c \ + $(BUILDDIR)/excserver.c \ + $(CFLAGS) $(MORECFLAGS) -I$(PRIVATE_INCLUDES) + $(XCODEBUILD) -project cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.xcodeproj TARGET_BUILD_DIR=$(BUILDDIR) + $(CC) -o $(BUILDDIR)/mem_hog \ + cpu_monitor_tests_11646922_src/mem_hog/mem_hog.c \ + $(CFLAGS) $(MORECFLAGS) -I$(PRIVATE_INCLUDES) + +monitor_stress_12901965: monitor_stress_12901965_src/monitor_stress/monitor_stress.m + echo '#!/bin/sh\n./monitor_stress -e 20\n./monitor_stress -w 3 -e 20' > $(BUILDDIR)/monitor_stress_12901965 + chmod +x $(BUILDDIR)/monitor_stress_12901965 + $(XCODEBUILD) -target $(TARGET_NAME) -project monitor_stress_12901965_src/monitor_stress.xcodeproj TARGET_BUILD_DIR=$(BUILDDIR) + +codesigntests: codesigntests.c codesigntests-entitlements.plist + $(CC) -o $(BUILDDIR)/codesigntests codesigntests.c $(CFLAGS) + env CODESIGN_ALLOCATE=$(CODESIGN_ALLOCATE) \ + $(CODESIGN) -s - --entitlements codesigntests-entitlements.plist $(BUILDDIR)/codesigntests + +libproc_privilege_test_13203438: libproc_privilege_test_13203438_src/libproc_privilege_test_13203438.c + $(CC) -o $(BUILDDIR)/libproc_privilege_test_13203438 libproc_privilege_test_13203438_src/libproc_privilege_test_13203438.c $(CFLAGS) + diff --git a/tools/tests/unit_tests/build_tests.sh b/tools/tests/unit_tests/build_tests.sh new file mode 100755 index 000000000..4de662147 --- /dev/null +++ b/tools/tests/unit_tests/build_tests.sh @@ -0,0 +1,134 @@ +#!/usr/bin/env bash + +function run_test() { + local testname="$1" + local out_status=1 + local out_str=" " + + echo "" + echo "[TEST] ${testname} " + if [ -x "./${testname}" ] + then + echo "[BEGIN] Executing test ${testname}" + out_str=$(./"${testname}" 2>&1) + out_status="$?" + else + echo "[FAIL] Failed to execute test with name ${testname}" + out_status=1 + fi + + if [ "${out_status}" == "0" ] + then + echo "[PASS] Successfully finished ${testname}" + else + echo $out_str + echo "[FAIL] Test failed ${testname} exit value $out_status" + echo " *** FAILURE of test ${testname} *** " + echo "" + fi + return $out_status +} + +function build_test(){ + local testtarget="$1" + local out_str=" " + local out_status=1 + + echo "[MAKE] Building test ${testtarget}" + out_str=$(make ${MAKE_ARGS} ${testtarget} 2>&1) + out_status=$? + echo ${out_str} >> ${BUILD_LOG_FILEMAME} + + if [ "${out_status}" == "0" ] + then + echo "[PASS][BUILD] Successfully built ${testtarget}" + else + echo ${out_str} + echo "[FAIL][BUILD] Failed to build ${testtarget}" + fi + return ${out_status} +} + +CMD=build +TARGET_MODE=$1 +TIMESTAMP=`date +%s` +PROGNAME=$0 +TARGET_LIST_FILE="xnu_target_executables.list" +BUILD_DIR="${PWD}/BUILD/" +BUILD_LOG_FILEMAME="${BUILD_DIR}/build_${TIMESTAMP}.log" + +# load the list of targets to build/run +if [ -f "$TARGET_LIST_FILE" ] +then + TARGET_NAMES=`cat $TARGET_LIST_FILE` +else + TARGET_NAMES=`make ${MAKE_ARGS} list_targets` +fi + +if [ "$CMD" == "build" ] +then + + # setup make arguments based on target requirements + if [ "${TARGET_MODE}" == "embedded" ] + then + T_ios=`/usr/bin/xcodebuild -sdk iphoneos.internal -version Path` + T_ios_name=iphoneos.internal + if [ "$T_ios" == "" ] + then + T_ios=`/usr/bin/xcodebuild -sdk iphoneos -version Path` + T_ios_name=iphoneos + fi + + if [ "$T_ios" == "" ] + then + echo "No iOS SDK found. Exiting." + exit 1 + fi + + MAKE_ARGS="SDKROOT=${T_ios_name}" + elif [ "${TARGET_MODE}" == "desktop" ] + then + MAKE_ARGS="" + else + echo "Usage: ${PROGNAME} " + exit 1 + fi + + if [ -d "${BUILD_DIR}" ] + then + mkdir -p ${BUILD_DIR} + fi + + echo " " + echo "=========== Building XNU Unit Tests =========" + echo " " + + for testname_target in ${TARGET_NAMES} + do + build_test ${makefilename} ${testname_target} + echo "" + done + + echo "Finished building tests. Saving list of targets in ${BUILD_DIR}/dst/${TARGET_LIST_FILE}" + echo "${TARGET_NAMES}" > ${BUILD_DIR}/dst/${TARGET_LIST_FILE} + cat "${PROGNAME}" | sed s/^CMD=build/CMD=run/g > ${BUILD_DIR}/dst/run_tests.sh + chmod +x ${BUILD_DIR}/dst/run_tests.sh + echo "Generated ${BUILD_DIR}/dst/run_tests.sh for running the tests." + exit 0 + +fi +# End of Build action + +# +if [ "$CMD" == "run" ] +then + echo " " + echo "=========== Running XNU Unit Tests =========" + echo " " + for testname_target in ${TARGET_NAMES} + do + run_test ${testname_target} + done + exit 0 +fi +# End of Run action diff --git a/tools/tests/unit_tests/clock_types_6368156.c b/tools/tests/unit_tests/clock_types_6368156.c new file mode 100644 index 000000000..bb7eb4e15 --- /dev/null +++ b/tools/tests/unit_tests/clock_types_6368156.c @@ -0,0 +1,20 @@ +#include +#include + +int main(void) +{ + long long good = 5 * 1000000000LL; + long long bad = 5 * NSEC_PER_SEC; + + printf("%lld\n%lld\n", good, bad); + if (good == bad ){ + printf("[PASS] successfully verified that (5 * 1000000000LL) == (5 * NSEC_PER_SEC). \n"); + return 0; + }else { + printf("[FAIL] NSEC_PER_SEC is not long long.\n"); + return -1; + } + /* by default return as error */ + + return 1; +} diff --git a/tools/tests/unit_tests/codesigntests-entitlements.plist b/tools/tests/unit_tests/codesigntests-entitlements.plist new file mode 100644 index 000000000..7caa664ab --- /dev/null +++ b/tools/tests/unit_tests/codesigntests-entitlements.plist @@ -0,0 +1,8 @@ + + + + + com.apple.security.some-entitlements + some-value + + diff --git a/tools/tests/unit_tests/codesigntests.c b/tools/tests/unit_tests/codesigntests.c new file mode 100644 index 000000000..0ec624ee2 --- /dev/null +++ b/tools/tests/unit_tests/codesigntests.c @@ -0,0 +1,130 @@ +#import +#import +#import +#import + +#import +#import +#import + +int +get_blob(pid_t pid, int op) +{ + uint8_t header[8]; + unsigned int cnt; + int rcent; + + for (cnt = 0; cnt < sizeof(header); cnt++) { + rcent = csops(pid, op, header, 1); + if (rcent != -1 && errno != ERANGE) + err(1, "errno != ERANGE for short header"); + } + + rcent = csops(pid, op, header, sizeof(header)); + if (rcent == -1 && errno == ERANGE) { + uint32_t len, bufferlen, bufferlen2; + + memcpy(&len, &header[4], 4); + bufferlen = ntohl(len); + if (bufferlen > 1024 * 1024) + errx(1, "invalid length on blob from kernel"); + else if (bufferlen == 0) + errx(1, "bufferlen == 0"); + else if (bufferlen < 8) + errx(1, "bufferlen <8 0"); + + uint8_t buffer[bufferlen + 1]; + + rcent = csops(pid, op, buffer, bufferlen - 1); + if (rcent != -1 && errno != ERANGE) + errx(1, "csops with full buffer - 1 failed"); + + rcent = csops(pid, op, buffer, bufferlen); + if (rcent != 0) + errx(1, "csops with full buffer failed"); + + memcpy(&len, &buffer[4], 4); + bufferlen2 = ntohl(len); + + if (op == CS_OPS_BLOB) { + if (bufferlen2 > bufferlen) + errx(1, "buffer larger on second try"); + if (bufferlen2 != bufferlen) + warnx("buffer shrunk since codesign can't tell the right size to codesign_allocate"); + } else { + if (bufferlen2 != bufferlen) + errx(1, "buffer sizes different"); + } + + rcent = csops(pid, op, buffer, bufferlen + 1); + if (rcent != 0) + errx(1, "csops with full buffer + 1 didn't pass"); + + return 0; + + } else if (rcent == 0) { + return 0; + } else { + return 1; + } +} + +int +main(int argc, const char * argv[]) +{ + uint32_t status; + int rcent; + pid_t pid; + + pid = getpid(); + + if (get_blob(pid, CS_OPS_ENTITLEMENTS_BLOB)) + errx(1, "failed to get entitlements"); + + if (get_blob(0, CS_OPS_ENTITLEMENTS_BLOB)) + errx(1, "failed to get entitlements"); + + if (get_blob(pid, CS_OPS_BLOB)) + errx(1, "failed to get blob"); + + if (get_blob(0, CS_OPS_BLOB)) + errx(1, "failed to get blob"); + + if (get_blob(pid, CS_OPS_IDENTITY)) + errx(1, "failed to get identity"); + + if (get_blob(0, CS_OPS_IDENTITY)) + errx(1, "failed to get identity"); + + rcent = csops(pid, CS_OPS_SET_STATUS, &status, sizeof(status) - 1); + if (rcent == 0) + err(1, "passed when passed in too short status buffer"); + + status = htonl(CS_RESTRICT); + rcent = csops(pid, CS_OPS_SET_STATUS, &status, sizeof(status)); + if (rcent != 0) + errx(1, "failed to mark proc RESTRICTED"); + + rcent = csops(pid, CS_OPS_MARKINVALID, NULL, 0); + if (rcent != 0) + errx(1, "failed to mark proc invalid"); + + status = htonl(CS_VALID); + rcent = csops(pid, CS_OPS_SET_STATUS, &status, sizeof(status)); + if (rcent == 0) + errx(1, "managed set flags on an INVALID proc"); + + if (!get_blob(pid, CS_OPS_ENTITLEMENTS_BLOB)) + errx(1, "got entitlements while invalid"); + + if (!get_blob(pid, CS_OPS_IDENTITY)) + errx(1, "got identity"); + + if (!get_blob(0, CS_OPS_IDENTITY)) + errx(1, "got identity"); + + if (!get_blob(pid, CS_OPS_BLOB)) + errx(1, "got blob"); + + return 0; +} diff --git a/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog-Entitlements.plist b/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog-Entitlements.plist new file mode 100644 index 000000000..a5398e575 --- /dev/null +++ b/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog-Entitlements.plist @@ -0,0 +1,8 @@ + + + + + com.apple.private.kernel.override-cpumon + + + diff --git a/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.m b/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.m new file mode 100644 index 000000000..64fe2cb76 --- /dev/null +++ b/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.m @@ -0,0 +1,470 @@ +#define TARGET_OS_EMBEDDED 1 +#include +#include +#include +#include +#include +#include +#include + +#import + +//#include "/Volumes/ws/10864999/Sundance10A273/Libc-849/BUILD_arm/Libc_Libc-849.roots/Libc_Libc-849~hdrDst/usr/local/include/libproc_internal.h" +#include +#include + +//#include "/Volumes/ws/10864999/Sundance10A273/Libc-849/BUILD_arm/Libc_Libc-849.roots/Libc_Libc-849~hdrDst/usr/local/include/libproc_internal.h" + +#define MAX_THREADS 1000 + +char *pname; + +volatile int spin = 0; +pthread_mutex_t lock; +pthread_cond_t cv; +volatile int ready = 0; + +int exit_after = 600; + +int limit = 0; // Worker thread should apply per-thread limit to self? +int limit_period = 5000; + +boolean_t reset_to_defaults = FALSE; + +boolean_t stress_test = FALSE; + +void usage(void) { + printf("usage: cpu_hog [-l percentage] [-s msecs] [-n nthreads] [-p percentage] [-i secs] [-e secs] [-t num]\n"); + printf("\t-l: worker thread should apply per-thread limit to self (default: no limit)\n"); + printf("\t-s: worker thread's per-thread limit refill period (msecs) (default: 5000)\n"); + printf("\t-n: create nthreads and choose 1 to be worker. (default: 2)\n"); + printf("\t-p: worker thread should consume this percent CPU over -i seconds (default: 1)\n"); + printf("\t-i: interval for CPU consumption given with -p (DEFAULT: 1 second)\n"); + printf("\t-x: disable CPU usage monitor after this many seconds (0 == upon launch)\n"); + printf("\t-r: reset CPU usage monitor to default params after this many seconds (0 == upon launch)\n"); + printf("\t-c: change this process's CPU monitor percentage to this value upon launch\n"); + printf("\t-C: change this process's CPU monitor interval to this value upon launch (requires -c)\n"); + printf("\t-d: change this process's CPU monitor percentage to this value (with -D interval) -- after -w seconds\n"); + printf("\t-D: change this process's CPU monitor interval to this value (with -d percentage) -- after -w seconds\n"); + printf("\t-w: wait this number of seconds until changing CPU monitor percentage to -d percent\n"); + printf("\t-e: exit after this many seconds (default: 10 mins)\n"); + printf("\t-P: confirm that this process's CPU monitor parameters match this percentage (requires -I)\n"); + printf("\t-I: interval to match (with -P)\n"); + printf("\t-t: spin up additional CPU burner threads (each will consume 100%% CPU)\n"); +} + +void set_my_limit(int percent, int refill_period) +{ + int err; + int cpupercent = percent | (refill_period << 8); + + if ((err = sysctlbyname("kern.setthread_cpupercent", 0, 0, + &cpupercent, sizeof (int))) != 0) { + printf("sysctl: error %d\n", err); + } +} + +static void print_cpumon_params(void) { + int new_percentage = -1, new_interval = -1; + + proc_get_cpumon_params(getpid(), &new_percentage, &new_interval); + + printf("CPU monitor params: percentage = %d interval = %d\n", new_percentage, new_interval); +} + +void *burner_thread(void *arg) +{ + int x = 1, y = 2; + + while (1) { + x = rand(); + y = x * rand(); + } +} + +void *spinner_thread(void *arg) +{ + int am_i_the_one = (arg != NULL) ? 1 : 0; + int j = 0; + int err; + + if (am_i_the_one) { + if ((err = pthread_mutex_lock(&lock)) != 0) { + printf("spinner: pthread_mutex_lock: %d", err); + exit(1); + } + + /* + * Apply per-thread limit to self? + */ + if (limit != 0) { + set_my_limit(limit, limit_period); + } + + /* + * Tell the main thread we're ready to get to work. + */ + ready = 1; + pthread_mutex_unlock(&lock); + pthread_cond_signal(&cv); + + while (1) { + /* + * Go to sleep until the main thread wakes us. + */ + pthread_cond_wait(&cv, &lock); + + /* + * Do useless work until the main thread tells us to + * stop. + */ + while (spin) { + j += rand(); + if (reset_to_defaults) { + reset_to_defaults = FALSE; + printf("%s: resetting CPU usage monitor to default params.\n", pname); + proc_set_cpumon_defaults(getpid()); + print_cpumon_params(); + } + + if (stress_test) { +// printf("%s: resetting CPU usage monitor to default params.\n", pname); + proc_set_cpumon_defaults(getpid()); +// print_cpumon_params(); +// printf("%s: disabling CPU usage monitor\n", pname); + proc_disable_cpumon(getpid()); +// print_cpumon_params(); + } + + } + } + } + + while(1) { + sleep(6000); + } +} + +void *disable_thread(void *arg) +{ + sleep((int)arg); + + printf("%s: disabling CPU usage monitor.\n", pname); + proc_disable_cpumon(getpid()); + print_cpumon_params(); + + return (NULL); +} + +void *reset_thread(void *arg) +{ + sleep((int)arg); + + reset_to_defaults = TRUE; + + return (NULL); +} + +void *exit_thread(void *arg) +{ + sleep(exit_after); + printf("...exiting.\n"); + exit(0); + + return (NULL); +} + +int delayed_cpumon_percentage = -1; +int delayed_cpumon_interval = -1; +int delayed_cpumon_percentage_wait = -1; + +void *change_cpumon_thread(void *arg) +{ + sleep(delayed_cpumon_percentage_wait); + printf("changing CPU monitor params to %d %% over %d seconds\n", delayed_cpumon_percentage, delayed_cpumon_interval); + proc_set_cpumon_params(getpid(), delayed_cpumon_percentage, delayed_cpumon_interval); + + print_cpumon_params(); + + return (NULL); +} + +int main(int argc, char *argv[]) +{ + int ch; + int i = 0; + int nthreads = 1; + int chosen_thr; + pthread_t chosen_thr_id; + int percent = 100; + + int interval = 2 * 1000000; // Default period for cycle is 2 seconds. Units are usecs. + int on_time, off_time; + + int new_cpumon_percentage = -1; + int new_cpumon_interval = -1; + + int disable_delay = -1; + int reset_params_delay = -1; + + int confirm_cpumon_percentage = -1; + int confirm_cpumon_interval = -1; + + int num_burner_threads = 0; + + pthread_t thr_id; + + printf("In CPU hogging test program...\n"); + + pname = argv[0]; + + while ((ch = getopt(argc, argv, "r:x:l:s:n:p:i:c:C:d:D:w:e:P:I:St:")) != -1) { + switch (ch) { + case 'l': + limit = atoi(optarg); + break; + case 's': + limit_period = atoi(optarg); + break; + case 'n': + nthreads = atoi(optarg); + break; + case 'p': + percent = atoi(optarg); + break; + case 'i': + interval = atoi(optarg) * 1000000; // using usleep + break; + case 'x': + disable_delay = atoi(optarg); + break; + case 'r': + reset_params_delay = atoi(optarg); + break; + case 'c': + new_cpumon_percentage = atoi(optarg); + break; + case 'C': + new_cpumon_interval = atoi(optarg); + break; + case 'd': + delayed_cpumon_percentage = atoi(optarg); + break; + case 'D': + delayed_cpumon_interval = atoi(optarg); + break; + case 'w': + delayed_cpumon_percentage_wait = atoi(optarg); + break; + case 'e': + exit_after = atoi(optarg); + break; + case 'P': + confirm_cpumon_percentage = atoi(optarg); + break; + case 'I': + confirm_cpumon_interval = atoi(optarg); + break; + case 'S': + stress_test = TRUE; + break; + case 't': + num_burner_threads = atoi(optarg); + break; + default: + usage(); + exit(1); + } + } + argc -= optind; + argv += optind; + + if (argc != 0) { + usage(); + exit(1); + } + + if (((delayed_cpumon_percentage != -1) && (delayed_cpumon_percentage_wait == -1)) || + ((delayed_cpumon_percentage == -1) && (delayed_cpumon_percentage_wait != -1))) { + printf("must specify -d and -w together\n"); + usage(); + exit(1); + } + + if ((nthreads <= 0) || (nthreads > MAX_THREADS)) { + printf("%s: %d threads too many (max is %d)\n", argv[0], + nthreads ,MAX_THREADS); + exit(1); + } + + if ((percent <= 0) || (percent > 100)) { + printf("%s: invalid percentage %d\n", argv[0], percent); + exit(1); + } + + if (interval <= 0) { + printf("%s: invalid interval %d\n", argv[0], interval); + exit(1); + } + + if ((new_cpumon_interval != -1) && (new_cpumon_percentage == -1)) { + printf("%s: -C requires that you also specify -c\n", argv[0]); + exit(1); + } + + print_cpumon_params(); + + if (confirm_cpumon_percentage != -1) { + int my_percentage, my_interval; + proc_get_cpumon_params(getpid(), &my_percentage, &my_interval); + if ((my_percentage != confirm_cpumon_percentage) || + (my_interval != confirm_cpumon_interval)) { + printf("parameters don't match values given with -P and -I\n"); + exit(1); + } + + printf("parameters match values given with -P and -I.\n"); + exit(0); + } + + on_time = (percent * interval) / 100; + off_time = interval - on_time; + + /* + * Randomly choose a thread to be the naughty one. + */ + srand(MAX_THREADS); // Want this to be repeatable, for now + chosen_thr = rand() % nthreads; + + if (pthread_mutex_init(&lock, NULL) != 0) { + perror("pthread_mutex_init"); + exit(1); + } + + if (pthread_cond_init(&cv, NULL) != 0) { + perror("pthread_cond_init"); + exit(1); + } + + if (pthread_mutex_lock(&lock) != 0) { + perror("pthread_mutex_lock"); + exit(1); + } + + if (pthread_create(&thr_id, NULL, exit_thread, NULL) != 0) { + perror("pthread_create"); + exit(1); + } + + if (delayed_cpumon_percentage != -1) { + if (pthread_create(&thr_id, NULL, change_cpumon_thread, NULL) != 0) { + perror("pthread_create"); + exit(1); + } + } + + printf("Creating %d threads. Thread %d will try to consume " + "%d%% of a CPU over %d seconds.\n", nthreads, chosen_thr, + percent, interval / 1000000); + if (limit != 0) { + printf("Worker thread %d will first self-apply a per-thread" + " CPU limit of %d percent over %d seconds\n", + chosen_thr, limit, limit_period); + } + + for (i = 0; i < nthreads; i++) { + if (pthread_create(&thr_id, NULL, spinner_thread, + (void *)((i == chosen_thr) ? (void *)1 : NULL)) != 0) { + perror("pthread_create"); + exit(1); + } + if (i == chosen_thr) { + chosen_thr_id = thr_id; + } + } + + /* + * Try to adjust the CPU usage monitor limit. + */ + if (new_cpumon_percentage != -1) { + proc_set_cpumon_params(getpid(), new_cpumon_percentage, new_cpumon_interval); + print_cpumon_params(); + } + + if (disable_delay != -1) { + if (pthread_create(&thr_id, NULL, disable_thread, (void *)disable_delay) != 0) { + perror("pthread_create"); + exit(1); + } + } + + if (reset_params_delay != -1) { + if (pthread_create(&thr_id, NULL, reset_thread, (void *)reset_params_delay) != 0) { + perror("pthread_create"); + exit(1); + } + } + + if (num_burner_threads > 0) { + for (i = 0; i < num_burner_threads; i++) { + if (pthread_create(&thr_id, NULL, burner_thread, NULL) != 0) { + perror("pthread_create"); + exit(1); + } + } + } + + // Wait for the worker thread to come alive and get ready to work. + while (ready == 0) { + pthread_cond_wait(&cv, &lock); + } + + if (pthread_mutex_unlock(&lock) != 0) { + perror("spinner: pthread_mutex_unlock"); + exit(1); + } + + /* + * Control the worker thread's CPU consumption. + */ + while (1) { + /* + * Worker thread is waiting for us to awaken him, with the + * lock dropped. + */ + if (pthread_mutex_lock(&lock) != 0) { + perror("pthread_mutex_lock"); + exit(1); + } + + /* + * Go to sleep until we are ready to awaken the worker. + */ + usleep(off_time); + + /* + * Tell the worker to get to work. + */ + spin = 1; + + if (pthread_mutex_unlock(&lock) != 0) { + perror("spinner: pthread_mutex_unlock"); + exit(1); + } + + pthread_cond_signal(&cv); + + /* + * Go to sleep until we're ready to stop the worker. + */ + usleep(on_time); + + /* + * Stop the worker. He will drop the lock and wait + * for us to wake him again. + */ + spin = 0; + } + + return (1); +} diff --git a/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.xcodeproj/project.pbxproj b/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.xcodeproj/project.pbxproj new file mode 100644 index 000000000..2f6a41f39 --- /dev/null +++ b/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.xcodeproj/project.pbxproj @@ -0,0 +1,356 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 46; + objects = { + +/* Begin PBXAggregateTarget section */ + 15A9B5C1157E853C00B44B4F /* default */ = { + isa = PBXAggregateTarget; + buildConfigurationList = 15A9B5C2157E853D00B44B4F /* Build configuration list for PBXAggregateTarget "default" */; + buildPhases = ( + ); + dependencies = ( + 15A9B5C6157E856F00B44B4F /* PBXTargetDependency */, + 15A9B5C8157E857000B44B4F /* PBXTargetDependency */, + ); + name = default; + productName = "cpu_hog-default"; + }; +/* End PBXAggregateTarget section */ + +/* Begin PBXBuildFile section */ + 155F2812157E81B100D7B917 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 159F7E4E1537850F00588242 /* Foundation.framework */; }; + 159F7E4F1537850F00588242 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 159F7E4E1537850F00588242 /* Foundation.framework */; }; + 15A9B5B4157E83C100B44B4F /* cpu_hog.m in Sources */ = {isa = PBXBuildFile; fileRef = 15A9B5B3157E83C100B44B4F /* cpu_hog.m */; }; + 15A9B5B5157E83C100B44B4F /* cpu_hog.m in Sources */ = {isa = PBXBuildFile; fileRef = 15A9B5B3157E83C100B44B4F /* cpu_hog.m */; }; +/* End PBXBuildFile section */ + +/* Begin PBXContainerItemProxy section */ + 15A9B5C5157E856F00B44B4F /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 159F7E411537850F00588242 /* Project object */; + proxyType = 1; + remoteGlobalIDString = 159F7E491537850F00588242; + remoteInfo = cpu_hog; + }; + 15A9B5C7157E857000B44B4F /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 159F7E411537850F00588242 /* Project object */; + proxyType = 1; + remoteGlobalIDString = 155F280E157E81B100D7B917; + remoteInfo = "cpu_hog-unentitled"; + }; +/* End PBXContainerItemProxy section */ + +/* Begin PBXCopyFilesBuildPhase section */ + 155F2813157E81B100D7B917 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; + 159F7E481537850F00588242 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ + 155F2818157E81B100D7B917 /* cpu_hog-unentitled */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "cpu_hog-unentitled"; sourceTree = BUILT_PRODUCTS_DIR; }; + 159F7E4A1537850F00588242 /* cpu_hog */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = cpu_hog; sourceTree = BUILT_PRODUCTS_DIR; }; + 159F7E4E1537850F00588242 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; }; + 15A9B5B3157E83C100B44B4F /* cpu_hog.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = cpu_hog.m; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 155F2811157E81B100D7B917 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 155F2812157E81B100D7B917 /* Foundation.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 159F7E471537850F00588242 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 159F7E4F1537850F00588242 /* Foundation.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 159F7E3F1537850F00588242 = { + isa = PBXGroup; + children = ( + 15A9B5B3157E83C100B44B4F /* cpu_hog.m */, + 159F7E4D1537850F00588242 /* Frameworks */, + 159F7E4B1537850F00588242 /* Products */, + ); + sourceTree = ""; + }; + 159F7E4B1537850F00588242 /* Products */ = { + isa = PBXGroup; + children = ( + 159F7E4A1537850F00588242 /* cpu_hog */, + 155F2818157E81B100D7B917 /* cpu_hog-unentitled */, + ); + name = Products; + sourceTree = ""; + }; + 159F7E4D1537850F00588242 /* Frameworks */ = { + isa = PBXGroup; + children = ( + 159F7E4E1537850F00588242 /* Foundation.framework */, + ); + name = Frameworks; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 155F280E157E81B100D7B917 /* cpu_hog-unentitled */ = { + isa = PBXNativeTarget; + buildConfigurationList = 155F2815157E81B100D7B917 /* Build configuration list for PBXNativeTarget "cpu_hog-unentitled" */; + buildPhases = ( + 155F280F157E81B100D7B917 /* Sources */, + 155F2811157E81B100D7B917 /* Frameworks */, + 155F2813157E81B100D7B917 /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = "cpu_hog-unentitled"; + productName = cpumon_test; + productReference = 155F2818157E81B100D7B917 /* cpu_hog-unentitled */; + productType = "com.apple.product-type.tool"; + }; + 159F7E491537850F00588242 /* cpu_hog */ = { + isa = PBXNativeTarget; + buildConfigurationList = 159F7E5A1537850F00588242 /* Build configuration list for PBXNativeTarget "cpu_hog" */; + buildPhases = ( + 159F7E461537850F00588242 /* Sources */, + 159F7E471537850F00588242 /* Frameworks */, + 159F7E481537850F00588242 /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = cpu_hog; + productName = cpumon_test; + productReference = 159F7E4A1537850F00588242 /* cpu_hog */; + productType = "com.apple.product-type.tool"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 159F7E411537850F00588242 /* Project object */ = { + isa = PBXProject; + attributes = { + LastUpgradeCheck = 0450; + ORGANIZATIONNAME = Apple; + }; + buildConfigurationList = 159F7E441537850F00588242 /* Build configuration list for PBXProject "cpu_hog" */; + compatibilityVersion = "Xcode 3.2"; + developmentRegion = English; + hasScannedForEncodings = 0; + knownRegions = ( + en, + ); + mainGroup = 159F7E3F1537850F00588242; + productRefGroup = 159F7E4B1537850F00588242 /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 15A9B5C1157E853C00B44B4F /* default */, + 159F7E491537850F00588242 /* cpu_hog */, + 155F280E157E81B100D7B917 /* cpu_hog-unentitled */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXSourcesBuildPhase section */ + 155F280F157E81B100D7B917 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 15A9B5B5157E83C100B44B4F /* cpu_hog.m in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 159F7E461537850F00588242 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 15A9B5B4157E83C100B44B4F /* cpu_hog.m in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin PBXTargetDependency section */ + 15A9B5C6157E856F00B44B4F /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 159F7E491537850F00588242 /* cpu_hog */; + targetProxy = 15A9B5C5157E856F00B44B4F /* PBXContainerItemProxy */; + }; + 15A9B5C8157E857000B44B4F /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 155F280E157E81B100D7B917 /* cpu_hog-unentitled */; + targetProxy = 15A9B5C7157E857000B44B4F /* PBXContainerItemProxy */; + }; +/* End PBXTargetDependency section */ + +/* Begin XCBuildConfiguration section */ + 155F2816157E81B100D7B917 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + CODE_SIGN_ENTITLEMENTS = ""; + CODE_SIGN_IDENTITY = "-"; + PRODUCT_NAME = "cpu_hog-unentitled"; + PROVISIONING_PROFILE = ""; + }; + name = Debug; + }; + 155F2817157E81B100D7B917 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CODE_SIGN_ENTITLEMENTS = ""; + CODE_SIGN_IDENTITY = "-"; + PRODUCT_NAME = "cpu_hog-unentitled"; + PROVISIONING_PROFILE = ""; + }; + name = Release; + }; + 159F7E581537850F00588242 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_DYNAMIC_NO_PIC = NO; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_SYMBOLS_PRIVATE_EXTERN = NO; + GCC_VERSION = com.apple.compilers.llvm.clang.1_0; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 6.0; + }; + name = Debug; + }; + 159F7E591537850F00588242 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_VERSION = com.apple.compilers.llvm.clang.1_0; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 6.0; + VALIDATE_PRODUCT = YES; + }; + name = Release; + }; + 159F7E5B1537850F00588242 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + CODE_SIGN_ENTITLEMENTS = "cpu_hog-Entitlements.plist"; + CODE_SIGN_IDENTITY = "-"; + PRODUCT_NAME = cpu_hog; + PROVISIONING_PROFILE = ""; + }; + name = Debug; + }; + 159F7E5C1537850F00588242 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CODE_SIGN_ENTITLEMENTS = "cpu_hog-Entitlements.plist"; + CODE_SIGN_IDENTITY = "-"; + PRODUCT_NAME = cpu_hog; + PROVISIONING_PROFILE = ""; + }; + name = Release; + }; + 15A9B5C3157E853D00B44B4F /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Debug; + }; + 15A9B5C4157E853D00B44B4F /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 155F2815157E81B100D7B917 /* Build configuration list for PBXNativeTarget "cpu_hog-unentitled" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 155F2816157E81B100D7B917 /* Debug */, + 155F2817157E81B100D7B917 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 159F7E441537850F00588242 /* Build configuration list for PBXProject "cpu_hog" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 159F7E581537850F00588242 /* Debug */, + 159F7E591537850F00588242 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 159F7E5A1537850F00588242 /* Build configuration list for PBXNativeTarget "cpu_hog" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 159F7E5B1537850F00588242 /* Debug */, + 159F7E5C1537850F00588242 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 15A9B5C2157E853D00B44B4F /* Build configuration list for PBXAggregateTarget "default" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 15A9B5C3157E853D00B44B4F /* Debug */, + 15A9B5C4157E853D00B44B4F /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 159F7E411537850F00588242 /* Project object */; +} diff --git a/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.xcodeproj/project.xcworkspace/contents.xcworkspacedata new file mode 100644 index 000000000..8b5db5879 --- /dev/null +++ b/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpu_hog/cpu_hog.xcodeproj/project.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,15 @@ + + + + + + + + + diff --git a/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpumon_test_framework.c b/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpumon_test_framework.c new file mode 100644 index 000000000..e49dd7750 --- /dev/null +++ b/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/cpumon_test_framework.c @@ -0,0 +1,529 @@ +/* + * Testing Framework for CPU Usage Monitor + * + * The framework tests for correctness of the CPU Usage Monitor. + * It creates a new exception port and an associated handling thread. + * For each test case, the framework sets its own exception port to the + * newly allocated port, execs a new child (which inherits the new + * exception port) and restores the parent's exception port to the + * original handler. The child process is invoked with a different + * parameters based on the scenario being tested. + * + * Usage: ./cpu_monitor_tests_11646922 [test case ID] + * If no test case ID is supplied, the framework runs all test cases. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_TEST_ID_LEN 16 +#define MAX_ARGV 8 + +#define GENERATE_TEST_EXC_CODE(type, flavor) \ + ((0) | ((type & 0x7ULL) << 61) | ((flavor & 0x7ULL) << 58)) + +/* + * To add a new test case to this framework: + * - Increment the NUMTESTS value + * - Add exec args for cpu_hog/cpu_hog unentitled to test the + * scenario. Also add a case to the main loop child_args assignment. + * - Add timeout for exception. If no timeout, specify 0. + * - Add expected duration for exception. 0 if no exception expected. + * - Add (Exception Type | flavor) to "test_exception_code" if the + * test case generates an exception; 0 otherwise + */ + +#define NUMTESTS 7 + +const char *test_description[] = { + "Basic test for EXC_RESOURCE.", + "Test Program stays under limit.", + "Test Program disables monitor.", + "Unentitled Test Program attempts to disable monitor.", + "Test Program resets monitor to default.", + "Set high watermark, munch past it, and confirm EXC_RESOURCE received for FLAVOR_HIGH_WATERMARK.", + "Set high watermark but don't munch past it. Confirm no EXC_RESOURCE received.", +}; + +/* + * Exec arguments for cpu hogging programs + * (NULL indicates test should not be run) + */ +char *test_argv_0[] = { "./cpu_hog-unentitled", "-c", "30", "-C", "10", "-p", "100", "-i", "1", NULL }; +char *test_argv_1[] = { "./cpu_hog-unentitled", "-c", "50", "-C", "15", "-p", "25", "-i", "1", NULL }; +#ifdef TARGET_SDK_iphoneos_internal +char *test_argv_2[] = { "./cpu_hog", "-c", "20", "-C", "15", "-x", "0", "-p", "100", "-i", "1", NULL }; +char *test_argv_3[] = { "./cpu_hog-unentitled", "-c", "20", "-C", "15", "-x", "1", "-p", "100", "-i", "1", NULL }; +#else +char *test_argv_2[] = { "./cpu_hog-unentitled", "-c", "20", "-C", "15", "-x", "0", "-p", "100", "-i", "1", NULL }; +char **test_argv_3 = NULL; +#endif +char *test_argv_4[] = { "./cpu_hog-unentitled", "-c", "20", "-C", "15", "-r", "1", "-p", "100", "-i", "1", NULL }; +#ifdef TARGET_SDK_iphoneos_internal +char *test_argv_5[] = { "./mem_hog", "-e", "-w", "50", "-m", "150", "10", "200", NULL }; +char *test_argv_6[] = { "./mem_hog", "-e", "-w", "190", "-m", "160", "10", "200", NULL }; +#else +char **test_argv_5 = NULL; +char **test_argv_6 = NULL; +#endif + +/* + * Timeout in seconds for test scenario to complete + * (0 indicates no timeout enabled) + */ +int timeout_secs[] = { + 15, + 20, + 20, + 110, + 110, + 20, + 20, +}; + +/* + * Exception should be generated within the specified duration + * (0 indicates no exception/time constraints for the exception + * to occur) + */ +int exc_expected_at[] = { + 0, + 0, + 0, + 90, + 90, + 10, + 0, +}; + +/* + * EXC_RESOURCE exception codes expected (0 indicates no + * exception expected) + */ +uint64_t test_exception_code[] = { + GENERATE_TEST_EXC_CODE(RESOURCE_TYPE_CPU, FLAVOR_CPU_MONITOR), + 0, + 0, + GENERATE_TEST_EXC_CODE(RESOURCE_TYPE_CPU, FLAVOR_CPU_MONITOR), + GENERATE_TEST_EXC_CODE(RESOURCE_TYPE_CPU, FLAVOR_CPU_MONITOR), + GENERATE_TEST_EXC_CODE(RESOURCE_TYPE_MEMORY, FLAVOR_HIGH_WATERMARK), + 0, +}; + +#define DEFAULT_PERCENTAGE "50" +#define DEFAULT_INTERVAL "180" + +/* Global Variables used by parent/child */ +mach_port_t exc_port; /* Exception port for child process */ +uint64_t exception_code; /* Exception code for the exception generated */ +int time_for_exc; /* Time (in secs.) for the exception to be generated */ +extern char **environ; /* Environment variables for the child process */ +int test_status; /* Test Suite Status */ +int indiv_results[NUMTESTS]; /* Results of individual tests (-1=didn't run; 0=pass; 1=fail) */ + +/* Cond Var and Mutex to indicate timeout for child process */ +pthread_cond_t cv; +pthread_mutex_t lock; + +/* Timer Routines to calculate elapsed time and run timer thread */ +time_t start_time; /* Test case start time (in secs.) */ + +int elapsed(void) +{ + return (time(NULL) - start_time); +} + +void *timeout_thread(void *arg) +{ + int err; + int timeout = (int)arg; + + sleep(timeout); + fprintf(stderr, "Test Program timed out... Terminating!\n"); + + if ((err = pthread_cond_broadcast(&cv)) != 0) { + fprintf(stderr, "pthread_cond_broadcast: %s\n", strerror(err)); + exit(1); + } + + return (NULL); +} + +/* Routine to wait for child to complete */ +void *wait4_child_thread(void *arg) +{ + int err; + int child_stat; + + wait4(-1, &child_stat, 0, NULL); + + if ((err = pthread_cond_broadcast(&cv)) != 0) { + fprintf(stderr, "pthread_cond_broadcast: %s\n", strerror(err)); + exit(1); + } + + return (NULL); +} + +/* Mach Server Routines */ +boolean_t mach_exc_server( + mach_msg_header_t *InHeadP, + mach_msg_header_t *OutHeadP); + +kern_return_t catch_mach_exception_raise +( + mach_port_t exception_port, + mach_port_t thread, + mach_port_t task, + exception_type_t exception, + mach_exception_data_t code, + mach_msg_type_number_t codeCnt + ) +{ + if (exception == EXC_RESOURCE) { + /* Set global variable to indicate exception received */ + exception_code = *((uint64_t *)code); + time_for_exc = elapsed(); + } else { + /* Terminate test on all other unexpected exceptions */ + fprintf(stderr, "received unexpected exception type %#x\n", exception); + exit(1); + } + + return (KERN_SUCCESS); +} + +kern_return_t catch_mach_exception_raise_state +( + mach_port_t exception_port, + exception_type_t exception, + const mach_exception_data_t code, + mach_msg_type_number_t codeCnt, + int *flavor, + const thread_state_t old_state, + mach_msg_type_number_t old_stateCnt, + thread_state_t new_state, + mach_msg_type_number_t *new_stateCnt + ) +{ + fprintf(stderr, "Unexpected exception handler called\n"); + exit(1); + return (KERN_FAILURE); +} + + +kern_return_t catch_mach_exception_raise_state_identity +( + mach_port_t exception_port, + mach_port_t thread, + mach_port_t task, + exception_type_t exception, + mach_exception_data_t code, + mach_msg_type_number_t codeCnt, + int *flavor, + thread_state_t old_state, + mach_msg_type_number_t old_stateCnt, + thread_state_t new_state, + mach_msg_type_number_t *new_stateCnt + ) +{ + fprintf(stderr, "Unexpected exception handler called\n"); + exit(1); + return (KERN_FAILURE); +} + +void *server_thread(void *arg) +{ + kern_return_t kr; + + while(1) { + /* Handle exceptions on exc_port */ + if ((kr = mach_msg_server_once(mach_exc_server, 4096, exc_port, 0)) != KERN_SUCCESS) { + fprintf(stderr, "mach_msg_server_once: error %#x\n", kr); + exit(1); + } + } + return (NULL); +} + +int main(int argc, char *argv[]) +{ + posix_spawnattr_t attrs; + uint64_t percent, interval; + int i, err, ret = 0; + + kern_return_t kr; + mach_port_t task = mach_task_self(); + mach_port_t child_task; + char **child_args; + + pthread_t exception_thread; + pthread_t timer_thread; + pthread_t wait_thread; + + mach_msg_type_number_t maskCount = 1; + exception_mask_t mask; + exception_handler_t handler; + exception_behavior_t behavior; + thread_state_flavor_t flavor; + + pid_t child_pid; + int test_case_id = -1; + + if (argc > 1) + test_case_id = atoi(argv[1]); + + /* Initialize mutex and condition variable */ + if ((err = pthread_mutex_init(&lock, NULL)) != 0) { + fprintf(stderr,"pthread_mutex_init: %s\n", strerror(err)); + exit(1); + } + + if ((err = pthread_cond_init(&cv, NULL)) != 0) { + fprintf(stderr, "pthread_cond_init: %s\n", strerror(err)); + exit(1); + } + + /* Allocate and initialize new exception port */ + if ((kr = mach_port_allocate(task, MACH_PORT_RIGHT_RECEIVE, &exc_port)) != KERN_SUCCESS) { + fprintf(stderr, "mach_port_allocate: %s\n", mach_error_string(kr)); + exit(1); + } + + if ((kr = mach_port_insert_right(task, exc_port, + exc_port, MACH_MSG_TYPE_MAKE_SEND)) != KERN_SUCCESS) { + fprintf(stderr, "mach_port_allocate: %s\n", mach_error_string(kr)); + exit(1); + } + + /* Get Current exception ports */ + if ((kr = task_get_exception_ports(task, EXC_MASK_RESOURCE, &mask, + &maskCount, &handler, &behavior, &flavor)) != KERN_SUCCESS) { + fprintf(stderr,"task_get_exception_ports: %s\n", mach_error_string(kr)); + exit(1); + } + + /* Create exception serving thread */ + if ((err = pthread_create(&exception_thread, NULL, server_thread, 0)) != 0) { + fprintf(stderr, "pthread_create server_thread: %s\n", strerror(err)); + exit(1); + } + + fprintf(stderr, "---------------System Configuration------------------------------------------\n"); + fprintf(stderr, "System Kernel Version: "); + system("uname -a"); + fprintf(stderr, "System SDK Version: "); + system("sw_vers"); + + for (i = 0; i < NUMTESTS; i++) { + indiv_results[i] = -1; + } + + /* Run Tests */ + for(i=0; i 0) + fprintf(stderr, "EXC_RESOURCE Received after %d secs\n", time_for_exc); + + if (!!exception_code != !!test_exception_code[i]) { + test_status = 1; + test_case_status = 1; + indiv_results[i] = 1; + } + + if (exception_code) { + /* Validate test success by checking code and expected time */ + if ((exception_code & test_exception_code[i]) != test_exception_code[i]) { + fprintf(stderr, "Test Failure Reason: EXC_RESOURCE code did not match expected exception code!\n"); + fprintf(stderr, "Expected: 0x%llx Found: 0x%llx\n", test_exception_code[i], exception_code); + test_status = 1; + test_case_status = 1; + indiv_results[i] = 1; + } + if(exc_expected_at[i] && + (time_for_exc < (exc_expected_at[i] - 10) || + time_for_exc > (exc_expected_at[i] + 10))) { + fprintf(stderr, "Test Failure Reason: Test case did not receive EXC_RESOURCE within expected time!\n"); + test_status = 1; + test_case_status = 1; + indiv_results[i] = 1; + } + } + + if(test_case_status) + fprintf(stderr, "[FAILED]\n"); + else + fprintf(stderr, "[PASSED]\n"); + fprintf(stderr, "-------------------------------------------------------------------------------\n"); + + } + + if (test_case_id == -1) { + fprintf(stderr, "--------------- Results Summary -----------------------------------------------\n"); + + for (i = 0; i < NUMTESTS; i++) { + fprintf(stderr, "%2d: %s\n", i, (indiv_results[i] < 0) ? "N/A" : + (indiv_results[i] == 0) ? "PASSED" : "FAILED"); + } + } + +cleanup: + kill(child_pid, SIGKILL); + exit(test_status); +} + + diff --git a/osfmk/i386/timer.h b/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/mach_exc.defs similarity index 88% rename from osfmk/i386/timer.h rename to tools/tests/unit_tests/cpu_monitor_tests_11646922_src/mach_exc.defs index 8f7bbdc60..4b6cc647b 100644 --- a/osfmk/i386/timer.h +++ b/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/mach_exc.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -30,7 +30,7 @@ */ /* * Mach Operating System - * Copyright (c) 1991 Carnegie Mellon University + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University * All Rights Reserved. * * Permission to use, copy, modify and distribute this software and its @@ -50,21 +50,8 @@ * Carnegie Mellon University * Pittsburgh PA 15213-3890 * - * any improvements or extensions that they make and grant Carnegie Mellon + * any improvements or extensions that they make and grant Carnegie Mellon * the rights to redistribute these changes. */ -/* - */ - -#ifndef _I386_TIMER_H_ -#define _I386_TIMER_H_ - -/* - * Machine dependent timer definitions. - */ - -#include - - -#endif /* _I386_TIMER_H_ */ +#include diff --git a/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/mem_hog/mem_hog.c b/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/mem_hog/mem_hog.c new file mode 100644 index 000000000..4579161d4 --- /dev/null +++ b/tools/tests/unit_tests/cpu_monitor_tests_11646922_src/mem_hog/mem_hog.c @@ -0,0 +1,221 @@ +#include +#include +#include +#include +#include +#include + +#define PAGESIZE 4096 + +/* Trigger forced jetsam */ +#define MEMORYSTATUS_CMD_TEST_JETSAM 1000 + +static void +dirty_chunk(void *chunk, int chunk_size) +{ + int i; + char *p; + + // Dirty every word in the chunk. + for (p = chunk; p < (char *)chunk + (chunk_size * 1024 * 1024); p += 4) { + *p = 'Z'; + } +} + +char *pname; + +void usage(void) { + printf("usage: %s [-re] [-l MB] [-w MB] [-m MB] [-o num] [-k pid] \n", pname); + printf("\t-r: after reaching max, re-dirty it all when the user prompts to do so.\n"); + printf("\t-l: program the task's physical footprint limit to this value (in MB).\n"); + printf("\t-w: program the task's jetsam high watermark to this value (in MB).\n"); + printf("\t-m: dirty no more than this amount (in MB).\n"); + printf("\t-e: exit after reaching -m max dirty.\n"); + printf("\t-o: oscillate at the max this number of times and then continue on up.\n"); + printf("\t-k: trigger explicit jetsam kill of this pid (and then exit).\n"); +} + +int main(int argc, char *argv[]) +{ + int ch; + void **chunks; + int nchunks; + int max_chunks; + int oscillations = -1; + int tot_mb = 0; + int chunk_size; + int interval; + int max = -1; + int limit = -2; + int high_watermark = -1; + int victim = -1; + int old_limit; + boolean_t redirty = FALSE; + boolean_t exit_after_max = FALSE; + + int oscillation_cnt = 0; + + pname = argv[0]; + + printf("pid: %d\n", getpid()); + + while ((ch = getopt(argc, argv, "rem:l:w:k:o:")) != -1) { + switch (ch) { + case 'm': + max = atoi(optarg); + break; + case 'l': + limit = atoi(optarg); + break; + case 'w': + high_watermark = atoi(optarg); + break; + case 'o': + oscillations = atoi(optarg); + break; + case 'r': + redirty = TRUE; + break; + case 'e': + exit_after_max = TRUE; + break; + case 'k': + victim = atoi(optarg); + break; + case 'h': + default: + usage(); + exit(1); + } + } + + argc -= optind; + argv += optind; + + if (victim != -1) { + int r; + /* + * int memorystatus_control(uint32_t command, int32_t pid, uint32_t flags, user_addr_t buffer, size_t buffersize); + */ + if ((r = memorystatus_control(MEMORYSTATUS_CMD_TEST_JETSAM, victim, 0, 0, 0)) != 0) { + perror("memorystatus_control"); + exit(1); + } + printf("killed process %d\n", victim); + + } + + if (argc != 2) { + usage(); + exit(1); + } + + chunk_size = atoi(argv[0]); + interval = atoi(argv[1]); + + if (limit != -2) { + kern_return_t kr; + if ((kr = task_set_phys_footprint_limit(mach_task_self(), limit, &old_limit)) != KERN_SUCCESS) { + fprintf(stderr, "task_set_phys_footprint_limit() failed: %s\n", mach_error_string(kr)); + exit(1); + } + printf("phys footprint limit set to %d MB (was: %d MB)\n", limit, old_limit); + } + + if (high_watermark != -1) { + int r; + /* + * int memorystatus_control(uint32_t command, int32_t pid, uint32_t flags, user_addr_t buffer, size_t buffersize); + */ + if ((r = memorystatus_control(MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK, getpid(), high_watermark, 0, 0)) != 0) { + perror("memorystatus_control"); + exit(1); + } + printf("high watermark set to %d MB\n", high_watermark); + } + + printf("consuming memory in chunks of %d MB every %d milliseconds.\n", chunk_size, interval); + + printf("total consumed: "); + fflush(stdout); + + /* + * Estimate max number of chunks possible, using 4GB as absolute max amount of memory + * we could ever use. + */ + max_chunks = 4000 / chunk_size; + if ((chunks = calloc(max_chunks, sizeof (*chunks))) == NULL) { + perror("malloc"); + exit(1); + } + nchunks = 0; + + while (1) { + if ((chunks[nchunks] = malloc(chunk_size * 1024 * 1024)) == NULL) { + perror("malloc"); + exit(1); + } + + tot_mb += chunk_size; + + dirty_chunk(chunks[nchunks], chunk_size); + + nchunks++; + + putchar(0x8); putchar(0x8); putchar(0x8); putchar(0x8); + printf("%4d", tot_mb); + fflush(stdout); + + if ((max != -1) && (tot_mb > max)) { + printf("\nMax reached.\n"); + + if (exit_after_max) { + exit(0); + } + + if ((oscillations == -1) || (oscillation_cnt < oscillations)) { + if (redirty) { + while (1) { + int i, ch; + + printf("Press any key to re-dirty ('q' to quit)..."); + fflush(stdout); + if ((ch = getchar()) == 'q') { + exit(0); + } + + for (i = 0; i < nchunks; i++) { + dirty_chunk(chunks[i], chunk_size); + } + } + } + + /* + * We've broken the limit of what we should be consuming; free the + * most recent three chunks and go round again. + */ + nchunks--; + free(chunks[nchunks]); + chunks[nchunks] = NULL; + tot_mb -= chunk_size; + + if (nchunks > 1) { + nchunks--; + free(chunks[nchunks]); + chunks[nchunks] = NULL; + tot_mb -= chunk_size; + nchunks--; + free(chunks[nchunks]); + chunks[nchunks] = NULL; + tot_mb -= chunk_size; + } + + oscillation_cnt++; + } + } + + usleep(interval * 1000); + } + + return (1); +} diff --git a/tools/tests/unit_tests/fcntlrangecheck_tests_11202484_src/fcntlrangecheck_tests_11202484.c b/tools/tests/unit_tests/fcntlrangecheck_tests_11202484_src/fcntlrangecheck_tests_11202484.c new file mode 100644 index 000000000..be921a3d5 --- /dev/null +++ b/tools/tests/unit_tests/fcntlrangecheck_tests_11202484_src/fcntlrangecheck_tests_11202484.c @@ -0,0 +1,210 @@ +#include +#include +#include +#include +#include +#include +#include + +#define SUCCESS 0 +#define FAILURE -1 + +int do_fcntl_lock(int fd, int cmd, short lock_type, off_t start, short when, off_t len, int ret){ + struct flock fl; + bzero(&fl, sizeof(fl)); + fl.l_start = start; + fl.l_len = len; + fl.l_type = lock_type; + fl.l_whence = when; + errno = 0; + int retval = fcntl(fd, cmd, &fl); + printf ("fcntl with flock(%lld,%lld,%d,%d) returned %d and errno %d \n", start, len, lock_type, when, retval, errno); + if ( retval < 0) + perror("fcntl"); + + if (retval != ret) { + printf("[FAILED] fcntl test failed\n"); + exit(-1); + } + return retval; +} + +#define read_lock(fd, offset, whence, len, ret) \ + do_fcntl_lock(fd, F_SETLK, F_RDLCK, offset, whence, len, ret) +#define readw_lock(fd, offset, whence, len, ret) \ + do_fcntl_lock(fd, F_SETLKW, F_RDLCK, offset, whence, len, ret) +#define write_lock(fd, offset, whence, len, ret) \ + do_fcntl_lock(fd, F_SETLK, F_WRLCK, offset, whence, len, ret) +#define writew_lock(fd, offset, whence, len, ret) \ + do_fcntl_lock(fd, F_SETLKW, F_WRLCK, offset, whence, len, ret) +#define un_lock(fd, offset, whence, len, ret) \ + do_fcntl_lock(fd, F_SETLK, F_UNLCK, offset, whence, len, ret) +#define is_read_lock(fd, offset, whence, len, ret) \ + do_fcntl_lock(fd, F_GETLK, F_RDLCK, offset, whence, len, ret) +#define is_write_lock(fd, offset, whence, len, ret) \ + do_fcntl_lock(fd, F_GETLK, F_WRLCK, offset, whence, len, ret) + + +int main(){ + int fd = 0; + char *tmpfile ="/tmp/fcntltry.txt"; + + unlink(tmpfile); + fd = creat(tmpfile, S_IRWXU); + if (fd < 0) { + perror("creat"); + goto failed; + } + + /* fcntl with seek position set to 1 */ + if (lseek(fd, (off_t)1, SEEK_SET) != 1){ + perror("lseek"); + goto failed; + } + off_t lock_start = 0, lock_len = 0; + + printf("Testing with SEEK_SET\n"); + + /* testing F_GETLK for SEEK_SET with lock_start = constant and len changes */ + lock_start = 0; + is_read_lock(fd, lock_start, SEEK_SET, 0, SUCCESS); + is_read_lock(fd, lock_start, SEEK_SET, LLONG_MAX, SUCCESS); + is_read_lock(fd, lock_start, SEEK_SET, LLONG_MIN, FAILURE); + + /* testing F_GETLK for SEEK_SET with len fixed 0 and lock_start changing */ + lock_len = 0; + is_read_lock(fd, 0, SEEK_SET, lock_len, SUCCESS); + is_read_lock(fd, LLONG_MAX, SEEK_SET, lock_len, SUCCESS); + is_read_lock(fd, LLONG_MIN, SEEK_SET, lock_len, FAILURE); + + /* testing F_GETLK for SEEK_SET with len fixed max and lock_start changing */ + lock_len = LLONG_MAX; + is_read_lock(fd, 0, SEEK_SET, lock_len, SUCCESS); + is_read_lock(fd, 1, SEEK_SET, lock_len, SUCCESS); + is_read_lock(fd, 2, SEEK_SET, lock_len, FAILURE); + is_read_lock(fd, LLONG_MAX, SEEK_SET, lock_len, FAILURE); + is_read_lock(fd, LLONG_MIN, SEEK_SET, lock_len, FAILURE); + + /* testing F_GETLK for SEEK_SET with len fixed min and lock_start changing */ + lock_len = LLONG_MIN; + is_read_lock(fd, 0, SEEK_SET, lock_len, FAILURE); + is_read_lock(fd, LLONG_MAX, SEEK_SET, lock_len, FAILURE); + is_read_lock(fd, LLONG_MIN, SEEK_SET, lock_len, FAILURE); + + /* testing F_GETLK for SEEK_SET with len fixed min and lock_start changing */ + lock_len = 20; + is_read_lock(fd, 0, SEEK_SET, lock_len, SUCCESS); + is_read_lock(fd, 100, SEEK_SET, lock_len, SUCCESS); + is_read_lock(fd, -100, SEEK_SET, lock_len, FAILURE); + + /* testing F_GETLK for SEEK_SET with len fixed min and lock_start changing */ + lock_len = -20; + is_read_lock(fd, 0, SEEK_SET, lock_len, FAILURE); + is_read_lock(fd, 100, SEEK_SET, lock_len, SUCCESS); + is_read_lock(fd, -100, SEEK_SET, lock_len, FAILURE); + + printf("Testing with SEEK_CUR with offset 1 \n"); + + /* testing F_GETLK for SEEK_CUR with lock_start = constant and len changes */ + lock_start = 0; + is_read_lock(fd, lock_start, SEEK_CUR, 0, SUCCESS); + is_read_lock(fd, lock_start, SEEK_CUR, LLONG_MAX, SUCCESS); + is_read_lock(fd, lock_start, SEEK_CUR, LLONG_MIN, FAILURE); + + /* testing F_GETLK for SEEK_CUR with len fixed 0 and lock_start changing */ + lock_len = 0; + is_read_lock(fd, 0, SEEK_CUR, lock_len, SUCCESS); + is_read_lock(fd, LLONG_MAX, SEEK_CUR, lock_len, FAILURE); + is_read_lock(fd, LLONG_MAX - 1, SEEK_CUR, lock_len, SUCCESS); + is_read_lock(fd, LLONG_MIN, SEEK_CUR, lock_len, FAILURE); + + /* testing F_GETLK for SEEK_CUR with len fixed max and lock_start changing */ + lock_len = LLONG_MAX; + is_read_lock(fd, 0, SEEK_CUR, lock_len, SUCCESS); + is_read_lock(fd, 1, SEEK_CUR, lock_len, FAILURE); + is_read_lock(fd, 2, SEEK_CUR, lock_len, FAILURE); + is_read_lock(fd, LLONG_MAX, SEEK_CUR, lock_len, FAILURE); + is_read_lock(fd, LLONG_MIN, SEEK_CUR, lock_len, FAILURE); + + /* testing F_GETLK for SEEK_CUR with len fixed min and lock_start changing */ + lock_len = LLONG_MIN; + is_read_lock(fd, 0, SEEK_CUR, lock_len, FAILURE); + is_read_lock(fd, LLONG_MAX, SEEK_CUR, lock_len, FAILURE); + is_read_lock(fd, LLONG_MIN, SEEK_CUR, lock_len, FAILURE); + + /* testing F_GETLK for SEEK_CUR with len fixed min and lock_start changing */ + lock_len = 20; + is_read_lock(fd, 0, SEEK_CUR, lock_len, SUCCESS); + is_read_lock(fd, 100, SEEK_CUR, lock_len, SUCCESS); + is_read_lock(fd, -100, SEEK_CUR, lock_len, FAILURE); + + /* testing F_GETLK for SEEK_CUR with len fixed min and lock_start changing */ + lock_len = -20; + is_read_lock(fd, 0, SEEK_CUR, lock_len, FAILURE); + is_read_lock(fd, 100, SEEK_CUR, lock_len, SUCCESS); + is_read_lock(fd, -100, SEEK_CUR, lock_len, FAILURE); + + close(fd); + + unlink(tmpfile); + fd = creat(tmpfile, S_IRWXU); + if (fd < 0) { + perror("creat"); + goto failed; + } + + /* fcntl with seek position set to 1 */ + if (lseek(fd, (off_t)LLONG_MAX - 1, SEEK_SET) != (LLONG_MAX - 1)){ + perror("lseek"); + goto failed; + } + + + printf("Testing with SEEK_CUR with offset LLONG_MAX - 1\n"); + + /* testing F_GETLK for SEEK_CUR with lock_start = constant and len changes */ + lock_start = 0; + is_read_lock(fd, lock_start, SEEK_CUR, 0, SUCCESS); + is_read_lock(fd, lock_start, SEEK_CUR, LLONG_MAX, FAILURE); + is_read_lock(fd, lock_start, SEEK_CUR, LLONG_MIN, FAILURE); + is_read_lock(fd, lock_start, SEEK_CUR, LLONG_MIN + 2, SUCCESS); + + /* testing F_GETLK for SEEK_CUR with len fixed 0 and lock_start changing */ + lock_len = 0; + is_read_lock(fd, 0, SEEK_CUR, lock_len, SUCCESS); + is_read_lock(fd, LLONG_MAX, SEEK_CUR, lock_len, FAILURE); + is_read_lock(fd, LLONG_MIN, SEEK_CUR, lock_len, FAILURE); + is_read_lock(fd, LLONG_MIN + 2, SEEK_CUR, lock_len, SUCCESS); + + /* testing F_GETLK for SEEK_CUR with len fixed max and lock_start changing */ + lock_len = LLONG_MAX; + is_read_lock(fd, 0, SEEK_CUR, lock_len, FAILURE); + is_read_lock(fd, LLONG_MAX, SEEK_CUR, lock_len, FAILURE); + is_read_lock(fd, LLONG_MIN, SEEK_CUR, lock_len, FAILURE); + is_read_lock(fd, LLONG_MIN + 2, SEEK_CUR, lock_len, SUCCESS); + + /* testing F_GETLK for SEEK_CUR with len fixed min and lock_start changing */ + lock_len = LLONG_MIN; + is_read_lock(fd, 0, SEEK_CUR, lock_len, FAILURE); + is_read_lock(fd, LLONG_MAX, SEEK_CUR, lock_len, FAILURE); + is_read_lock(fd, LLONG_MIN, SEEK_CUR, lock_len, FAILURE); + + /* testing F_GETLK for SEEK_CUR with len fixed min and lock_start changing */ + lock_len = 20; + is_read_lock(fd, 0, SEEK_CUR, lock_len, FAILURE); + is_read_lock(fd, -100, SEEK_CUR, lock_len, SUCCESS); + + /* testing F_GETLK for SEEK_CUR with len fixed min and lock_start changing */ + lock_len = -20; + is_read_lock(fd, 0, SEEK_CUR, lock_len, SUCCESS); + is_read_lock(fd, 100, SEEK_CUR, lock_len, FAILURE); + is_read_lock(fd, -100, SEEK_CUR, lock_len, SUCCESS); + + + printf("[PASSED] fcntl test passed \n"); + return 0; +failed: + printf("[FAILED] fcntl test failed\n"); + return -1; + +} diff --git a/tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test.c b/tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test.c new file mode 100644 index 000000000..ae587fdff --- /dev/null +++ b/tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test.c @@ -0,0 +1,532 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "guarded_test_common.h" + +#include + +#if !defined(SYS_guarded_kqueue_np) +#define guarded_kqueue_np(gp, gf) syscall(443, gp, gf) +#endif + +#if !defined(SYS_change_fdguard_np) +#define change_fdguard_np(fd, gp, gf, ngp, nfg, flp) \ + syscall(444, fd, gp, gf, ngp, nfg, flp) +#endif + +#define SERVER_NAME "/tmp/fdserver" + +typedef union { + struct cmsghdrcmsghdr; + u_char msg_control[0]; +} cmsghdr_msg_control_t; + +/* Test case for closing a guarded fd */ +void close_guarded_fd(int); +/* Test case for duping a guarded fd */ +void dup_guarded_fd(int); +/* Test case for removing flag from guarded fd */ +void remove_flag_guarded_fd(int); +/* Test case for closing guarded fd with bad guard */ +void badguard_close_guarded_fd(int, guardid_t); +/* Test case for guarded closing an unguarded fd */ +void guard_close_unguarded_fd(guardid_t); +/* Test case for guarded closing a guarded fd correctly */ +void guard_close_guarded_fd(int, guardid_t); +/* Test case for creating a file port from a guarded fd */ +void fileport_makeport_guarded_fd(int); +/* Test case for sending guarded fd over socket */ +void sendmsg_guarded_fd(int); +/* Test case for removing the guard from a guarded fd */ +void remove_guard(int, guardid_t, u_int, int); +/* Test case for adding a guard to a tcp socket */ +void add_guard_to_socket(guardid_t); +/* Test case for a guarded kqueue */ +void create_and_close_guarded_kqueue(guardid_t); + +/* Helper routines */ +void *client_recv_fd(void *); +int receive_fd_using_sockfd(int *, int); +int send_fd_using_sockfd(int, int); +int setup_server(const char *); + +const guardid_t guard = 0x123456789abcdefull; +char *pname; + +static void usage(void) +{ + printf("usage: %s [test number]\n", pname); + printf("test 0: Test case for closing a guarded fd\n"); + printf("test 1: Test case for duping a guarded fd\n"); + printf("test 2: Test case for removing FD_CLOEXEC flag from a guarded fd\n"); + printf("test 3: Test case for closing a guarded fd with a bad guard\n"); + printf("test 4: Test case for closing an unguarded fd using a guarded close\n"); + printf("test 5: Test case for closing a guarded fd with the correct guard\n"); + printf("test 6: Test case for creating a file port from a guarded fd\n"); + printf("test 7: Test case for sending a guarded fd over a socket\n"); + printf("test 8: Test case for removing the guard from a guarded fd\n"); + printf("test 9: Test case for adding a guard to a tcp socket\n"); + printf("test 10: Test case for a guarded kqueue\n"); +} + +int main(int argc, char *argv[]) +{ + int option, fd; + + pname = argv[0]; + if (argc != 2) { + usage(); + exit(1); + } + printf("Test Program invoked with option [%s]\n", argv[1]); + option = atoi(argv[1]); + + close(TEST_FD); + fd = guarded_open_np( + "/tmp/try.txt", + &guard, + GUARD_CLOSE | GUARD_DUP | GUARD_SOCKET_IPC | GUARD_FILEPORT, + O_CREAT | O_CLOEXEC | O_RDWR, + 0666); + + if (-1 == fd) { + perror("guarded_open_np"); + exit(1); + } + + switch(option) { + + case 0: + close_guarded_fd(fd); + break; + case 1: + dup_guarded_fd(fd); + break; + case 2: + remove_flag_guarded_fd(fd); + break; + case 3: + badguard_close_guarded_fd(fd, guard); + break; + case 4: + guard_close_unguarded_fd(guard); + break; + case 5: + guard_close_guarded_fd(fd, guard); + break; + case 6: + fileport_makeport_guarded_fd(fd); + break; + case 7: + sendmsg_guarded_fd(fd); + break; + case 8: + remove_guard(fd, guard, GUARD_CLOSE | GUARD_DUP | + GUARD_SOCKET_IPC | GUARD_FILEPORT, FD_CLOEXEC); + break; + case 9: + add_guard_to_socket(guard); + break; + case 10: + create_and_close_guarded_kqueue(guard); + break; + default: + usage(); + exit(1); + } + + return 0; +} + +void close_guarded_fd(int fd) +{ + int ret_val; + printf("Performing close on a guarded fd...\n"); + + /* Brute force way of ensuring that the child process + * uses the TEST_FD which is checked by the parent + */ + while(fd != TEST_FD && fd <= TEST_FD) { + fd = guarded_open_np( + "/tmp/try.txt", + &guard, + GUARD_CLOSE | GUARD_DUP | GUARD_SOCKET_IPC | GUARD_FILEPORT, + O_CREAT | O_CLOEXEC | O_RDWR, + 0666); + + if (-1 == fd) { + perror("guarded_open_np"); + exit(1); + } + } + + ret_val = close(TEST_FD); + fprintf(stderr, "close() returned (%d) on a guarded fd?!\n", ret_val); + exit(1); +} + +void dup_guarded_fd(int fd) +{ + int ret_val; + printf("Performing dup on a guarded fd...\n"); + ret_val = dup(fd); + fprintf(stderr, "dup() returned (%d) on a guarded fd?!\n", ret_val); + exit(1); +} + +void remove_flag_guarded_fd(int fd) +{ + int ret_val, value; + printf("Removing FD_CLOEXEC from a guarded fd...\n"); + value = fcntl(fd, F_GETFD); + if (-1 == value) { + fprintf(stderr, "fcntl:F_GETFD failed with %s!\n", strerror(errno)); + exit(1); + } + ret_val = fcntl(fd, F_SETFD, value & ~FD_CLOEXEC); + fprintf(stderr, "fcntl:F_SETFD returned (%d) on a guarded fd?!\n", ret_val); + exit(1); +} + +void badguard_close_guarded_fd(int fd, guardid_t guard) +{ + int ret_val; + printf("Closing guarded fd with a bad guard...\n"); + guardid_t badguard = guard << 1; + ret_val = guarded_close_np(fd, &badguard); + if (-1 == ret_val) { + switch (errno) { + case EPERM: + /* Expected */ + perror("guarded_close_np"); + exit(0); + default: + perror("guarded_close_np"); + break; + } + } + fprintf(stderr, + "Close with bad guard returned (%d) on a guarded fd?!\n", ret_val); + exit(1); +} + +void guard_close_unguarded_fd(guardid_t guard) +{ + printf("Closing Unguarded fd with guarded_close_np...\n"); + int newfd, ret_val; + + if ((newfd = dup(fileno(stderr))) == -1) { + fprintf(stderr, "Failed to dup stderr!\n"); + exit(1); + } + + ret_val = guarded_close_np(newfd, &guard); + if (-1 == ret_val) { + /* Expected */ + perror("guarded_close_np"); + exit(0); + } + else { + fprintf(stderr, "Closing unguarded fd with guarded_fd succeeded with return value (%d)?!\n", ret_val); + exit(1); + } +} + +void guard_close_guarded_fd(int fd, guardid_t guard) +{ + printf("Closing a guarded fd with correct guard...\n"); + if (-1 == guarded_close_np(fd, &guard)) { + fprintf(stderr, "Closing guarded fd with correct guard failed?!\n"); + exit(1); + } + /* Expected */ + exit(0); +} + +void fileport_makeport_guarded_fd(int fd) +{ + mach_port_name_t fdname = MACH_PORT_NULL; + int ret_val; + printf("Creating a file port from a guarded fd...\n"); + ret_val = fileport_makeport(fd, &fdname); + fprintf(stderr, "Creating a file port from guarded fd returned (%d)?!\n", ret_val); + exit(1); +} + +void sendmsg_guarded_fd(int fd) +{ + int sockfd, err; + int csockfd; + socklen_t len; + struct sockaddr_un client_unix_addr; + pthread_t client_thread; + int ret_val; + + /* Setup fd server */ + if ((sockfd = setup_server(SERVER_NAME)) < 0) { + exit(1); + } + + if(-1 == listen(sockfd, 5)) { + perror("listen"); + exit(1); + } + + /* Create client thread */ + if ((err = pthread_create(&client_thread, NULL, client_recv_fd, 0)) != 0) { + fprintf(stderr, "pthread_create server_thread: %s\n", strerror(err)); + exit(1); + } + + pthread_detach(client_thread); + + for (;;) { + len = sizeof (client_unix_addr); + csockfd = accept(sockfd, + (struct sockaddr *)&client_unix_addr, &len); + if (csockfd < 0) { + perror("accept"); + exit(1); + } + + printf("Sending guarded fd on a socket...\n"); + ret_val = send_fd_using_sockfd(fd, csockfd); + if(ret_val < 0) { + /* Expected */ + fprintf(stderr, "sendmsg failed with return value (%d)!\n", ret_val); + } + else { + fprintf(stderr, "Sending guarded fd on socket succeeded with return value (%d)?!\n", ret_val); + } + } + + exit(0); +} + +void +remove_guard(int fd, guardid_t guard, u_int guardflags, int fdflags) +{ + printf("Remove the guard from a guarded fd, then dup(2) it ...\n"); + + int ret_val = change_fdguard_np(fd, &guard, guardflags, NULL, 0, &fdflags); + + if (ret_val == -1) { + perror("change_fdguard_np"); + exit(1); + } + + printf("Dup-ing the unguarded fd ...\n"); + + /* + * Now that the GUARD_DUP has been removed, we should be able + * to dup the descriptor with no exception generation. + */ + int newfd = dup(fd); + + if (-1 == newfd) { + perror("dup"); + exit(1); + } + exit(0); +} + +void +add_guard_to_socket(guardid_t guard) +{ + printf("Add a close guard to an unguarded socket fd, then close it ...\n"); + + int s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + + if (-1 == s) { + perror("socket"); + exit(1); + } + + int ret_val = change_fdguard_np(s, NULL, 0, &guard, GUARD_CLOSE | GUARD_DUP, NULL); + + if (-1 == ret_val) { + perror("change_fdguard_np"); + exit(1); + } + + /* + * Now we've added a GUARD_CLOSE successfully, let's try and do a close + */ + if (-1 == close(s)) + perror("close"); + /* + * This is an error, because we should've received a fatal EXC_GUARD + */ + exit(1); +} + +void +create_and_close_guarded_kqueue(guardid_t guard) +{ + printf("Create a guarded kqueue, then guarded_close_np() it ...\n"); + + int kq = guarded_kqueue_np(&guard, GUARD_CLOSE | GUARD_DUP); + + int ret_val = guarded_close_np(kq, &guard); + if (-1 == ret_val) { + perror("guarded_close_np"); + exit(1); + } + + printf("Create a guarded kqueue, then close() it ...\n"); + + kq = guarded_kqueue_np(&guard, GUARD_CLOSE | GUARD_DUP); + if (-1 == close(kq)) + perror("close"); + /* + * This is always an error, because we should've received a fatal EXC_GUARD + */ + exit(1); +} + +/* + * Helper Routines + */ + +int setup_server(const char *name) +{ + int sockfd, len; + struct sockaddr_un server_unix_addr; + + if ((sockfd = socket(AF_LOCAL, SOCK_STREAM, 0)) < 0) { + perror("socket"); + return (sockfd); + } + + (void) unlink(name); + bzero(&server_unix_addr, sizeof (server_unix_addr)); + server_unix_addr.sun_family = AF_LOCAL; + (void) strcpy(server_unix_addr.sun_path, name); + len = strlen(name) + 1; + len += sizeof (server_unix_addr.sun_family); + + if (bind(sockfd, (struct sockaddr *)&server_unix_addr, len) < 0) { + (void) close(sockfd); + return (-1); + } + return (sockfd); +} + +int send_fd_using_sockfd(int fd, int sockfd) +{ + ssize_t ret; + struct iovec iovec[1]; + struct msghdr msg; + struct cmsghdr *cmsghdrp; + cmsghdr_msg_control_t *cmsghdr_msg_control; + + cmsghdr_msg_control = malloc(CMSG_SPACE(sizeof (int))); + + iovec[0].iov_base = ""; + iovec[0].iov_len = 1; + + msg.msg_name = 0; + msg.msg_namelen = 0; + msg.msg_iov = iovec; + msg.msg_iovlen = 1; + msg.msg_control = cmsghdr_msg_control->msg_control; + msg.msg_controllen = CMSG_SPACE(sizeof (int)); + msg.msg_flags = 0; + + cmsghdrp = CMSG_FIRSTHDR(&msg); + cmsghdrp->cmsg_len = CMSG_LEN(sizeof (int)); + cmsghdrp->cmsg_level = SOL_SOCKET; + cmsghdrp->cmsg_type = SCM_RIGHTS; + + *((int *)CMSG_DATA(cmsghdrp)) = fd; + + if ((ret = sendmsg(sockfd, &msg, 0)) < 0) { + perror("sendmsg"); + return ret; + } + + return 0; +} + +int receive_fd_using_sockfd(int *fd, int sockfd) +{ + ssize_t ret; + u_char c; + int errcount = 0; + struct iovec iovec[1]; + struct msghdr msg; + struct cmsghdr *cmsghdrp; + cmsghdr_msg_control_t *cmsghdr_msg_control; + + cmsghdr_msg_control = malloc(CMSG_SPACE(sizeof (int))); + + iovec[0].iov_base = &c; + iovec[0].iov_len = 1; + + msg.msg_name = 0; + msg.msg_namelen = 0; + msg.msg_iov = iovec; + msg.msg_iovlen = 1; + msg.msg_control = cmsghdr_msg_control->msg_control; + msg.msg_controllen = CMSG_SPACE(sizeof (int)); + msg.msg_flags = 0; + + if ((ret = recvmsg(sockfd, &msg, 0)) < 0) { + perror("recvmsg"); + return ret; + } + + cmsghdrp = CMSG_FIRSTHDR(&msg); + if (cmsghdrp == NULL) { + *fd = -1; + return ret; + } + + if (cmsghdrp->cmsg_len != CMSG_LEN(sizeof (int))) + errcount++; + if (cmsghdrp->cmsg_level != SOL_SOCKET) + errcount++; + if (cmsghdrp->cmsg_type != SCM_RIGHTS) + errcount++; + if (errcount) { + *fd = -1; + } else + *fd = *((int *)CMSG_DATA(cmsghdrp)); + return ret; +} + +void *client_recv_fd(void *arg) +{ + char buf[512]; + int fd = -1, sockfd, len, ret; + struct sockaddr_un server_unix_addr; + + bzero(&server_unix_addr, sizeof (server_unix_addr)); + strcpy(server_unix_addr.sun_path, SERVER_NAME); + server_unix_addr.sun_family = AF_LOCAL; + len = strlen(SERVER_NAME) + 1; + len += sizeof (server_unix_addr.sun_family); + + if ((sockfd = socket(AF_LOCAL, SOCK_STREAM, 0)) < 0) { + perror("socket"); + exit(1); + } + + if (connect(sockfd, (struct sockaddr *)&server_unix_addr, len) < 0) { + perror("connect"); + exit(1); + } + + ret = receive_fd_using_sockfd(&fd, sockfd); + return (NULL); +} diff --git a/tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test_common.h b/tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test_common.h new file mode 100644 index 000000000..ce3bcf7c8 --- /dev/null +++ b/tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test_common.h @@ -0,0 +1,11 @@ +/* + * Common file for Guarded fd Unit Tests + */ + +#ifndef _GUARDED_TEST_COMMON_H_ +#define _GUARDED_TEST_COMMON_H_ + +/* Exception causing fd for test program */ +#define TEST_FD 25 + +#endif /* _GUARDED_TEST_COMMON_H_ */ diff --git a/tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test_framework.c b/tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test_framework.c new file mode 100644 index 000000000..9fdc2d770 --- /dev/null +++ b/tools/tests/unit_tests/guarded_fd_tests_11746236_src/guarded_test_framework.c @@ -0,0 +1,276 @@ +/* + * Testing Framework for EXC_GUARD exceptions + * + * The framework tests for exception conditions for guarded fds. + * It creates a new exception port and an associated handling thread. + * For each test case, the framework sets its own exception port to the + * newly allocated port, execs a new child (which inherits the new + * exception port) and restores the parent's exception port to the + * original handler. The child process is invoked with a different + * test case identifier and invokes the corresponding test case. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "guarded_test_common.h" +#include + +#define MAX_TEST_ID_LEN 16 +#define MAX_ARGV 8 +#define EXC_GUARD_FLAVOR_SHIFT 32 +#define EXC_GUARD_TYPE_SHIFT 61 +#define EXC_GUARD_FD_MASK 0xFFFFFFFF + +/* + * To add a new test case to this framework: + * - Increment the NUMTESTS value + * - Add (Guard Type | flavor) to "test_exception_code" if the + * test case generates an exception; 0 otherwise + * - Add CHK_TEST_FD/IGN_TEST_FD depending on whether + * framework should look for TEST_FD in the exception message + * - Add a new case and routine in guarded_test.c to + * test the scenario + */ + +#define NUMTESTS 11 + +uint64_t test_exception_code[] = { + (((uint64_t)GUARD_TYPE_FD) << EXC_GUARD_TYPE_SHIFT) | (((uint64_t)kGUARD_EXC_CLOSE) << EXC_GUARD_FLAVOR_SHIFT), + (((uint64_t)GUARD_TYPE_FD) << EXC_GUARD_TYPE_SHIFT) | (((uint64_t)kGUARD_EXC_DUP) << EXC_GUARD_FLAVOR_SHIFT), + (((uint64_t)GUARD_TYPE_FD) << EXC_GUARD_TYPE_SHIFT) | (((uint64_t)kGUARD_EXC_NOCLOEXEC) << EXC_GUARD_FLAVOR_SHIFT), + 0, + 0, + 0, + (((uint64_t)GUARD_TYPE_FD) << EXC_GUARD_TYPE_SHIFT) | (((uint64_t)kGUARD_EXC_FILEPORT) << EXC_GUARD_FLAVOR_SHIFT), + (((uint64_t)GUARD_TYPE_FD) << EXC_GUARD_TYPE_SHIFT) | (((uint64_t)kGUARD_EXC_SOCKET_IPC) << EXC_GUARD_FLAVOR_SHIFT), + 0, + (((uint64_t)GUARD_TYPE_FD) << EXC_GUARD_TYPE_SHIFT) | (((uint64_t)kGUARD_EXC_CLOSE) << EXC_GUARD_FLAVOR_SHIFT), + (((uint64_t)GUARD_TYPE_FD) << EXC_GUARD_TYPE_SHIFT) | (((uint64_t)kGUARD_EXC_CLOSE) << EXC_GUARD_FLAVOR_SHIFT) +}; + +#define CHK_TEST_FD 1 +#define IGN_TEST_FD 0 + +uint64_t test_fd[] = { + CHK_TEST_FD, + IGN_TEST_FD, + IGN_TEST_FD, + IGN_TEST_FD, + IGN_TEST_FD, + IGN_TEST_FD, + IGN_TEST_FD, + IGN_TEST_FD, + IGN_TEST_FD, + IGN_TEST_FD, + IGN_TEST_FD +}; + +mach_port_t exc_port; +uint64_t exception_code; +extern char **environ; + +boolean_t mach_exc_server( + mach_msg_header_t *InHeadP, + mach_msg_header_t *OutHeadP); + +kern_return_t catch_mach_exception_raise +( + mach_port_t exception_port, + mach_port_t thread, + mach_port_t task, + exception_type_t exception, + mach_exception_data_t code, + mach_msg_type_number_t codeCnt, + int *flavor, + thread_state_t old_state, + mach_msg_type_number_t old_stateCnt, + thread_state_t new_state, + mach_msg_type_number_t *new_stateCnt + ) +{ + if (exception == EXC_GUARD) { + /* Set global variable to indicate exception received */ + exception_code = *((uint64_t *)code); + } else { + /* Terminate test on all other unexpected exceptions */ + fprintf(stderr, "received unexpected exception type %#x\n", exception); + exit(1); + } + + return (KERN_SUCCESS); +} + +kern_return_t catch_mach_exception_raise_state +( + mach_port_t exception_port, + exception_type_t exception, + const mach_exception_data_t code, + mach_msg_type_number_t codeCnt, + int *flavor, + const thread_state_t old_state, + mach_msg_type_number_t old_stateCnt, + thread_state_t new_state, + mach_msg_type_number_t *new_stateCnt + ) +{ + fprintf(stderr, "Unexpected exception handler called\n"); + exit(1); + return (KERN_FAILURE); +} + + +kern_return_t catch_mach_exception_raise_state_identity +( + mach_port_t exception_port, + mach_port_t thread, + mach_port_t task, + exception_type_t exception, + mach_exception_data_t code, + mach_msg_type_number_t codeCnt + ) +{ + fprintf(stderr, "Unexpected exception handler called\n"); + exit(1); + return (KERN_FAILURE); +} + + +void *server_thread(void *arg) +{ + kern_return_t kr; + + while(1) { + /* Handle exceptions on exc_port */ + if ((kr = mach_msg_server_once(mach_exc_server, 4096, exc_port, 0)) != KERN_SUCCESS) { + fprintf(stderr, "mach_msg_server_once: error %#x\n", kr); + exit(1); + } + } + return (NULL); +} + +int main(int argc, char *argv[]) +{ + posix_spawnattr_t attrs; + kern_return_t kr; + mach_port_t task = mach_task_self(); + + mach_msg_type_number_t maskCount = 1; + exception_mask_t mask; + exception_handler_t handler; + exception_behavior_t behavior; + thread_state_flavor_t flavor; + pthread_t exception_thread; + uint64_t exc_id; + unsigned int exc_fd; + + char *test_prog_name = "./guarded_test"; + char *child_args[MAX_ARGV]; + char test_id[MAX_TEST_ID_LEN]; + int i, err; + int child_status; + int test_status = 0; + + /* Allocate and initialize new exception port */ + if ((kr = mach_port_allocate(task, MACH_PORT_RIGHT_RECEIVE, &exc_port)) != KERN_SUCCESS) { + fprintf(stderr, "mach_port_allocate: %#x\n", kr); + exit(1); + } + + if ((kr = mach_port_insert_right(task, exc_port, + exc_port, MACH_MSG_TYPE_MAKE_SEND)) != KERN_SUCCESS) { + fprintf(stderr, "mach_port_allocate: %#x\n", kr); + exit(1); + } + + /* Get Current exception ports */ + if ((kr = task_get_exception_ports(task, EXC_MASK_GUARD, &mask, + &maskCount, &handler, &behavior, &flavor)) != KERN_SUCCESS) { + fprintf(stderr,"task_get_exception_ports: %#x\n", kr); + exit(1); + } + + /* Create exception serving thread */ + if ((err = pthread_create(&exception_thread, NULL, server_thread, 0)) != 0) { + fprintf(stderr, "pthread_create server_thread: %s\n", strerror(err)); + exit(1); + } + + pthread_detach(exception_thread); + + /* Initialize posix_spawn attributes */ + posix_spawnattr_init(&attrs); + + if ((err = posix_spawnattr_setflags(&attrs, POSIX_SPAWN_SETEXEC)) != 0) { + fprintf(stderr, "posix_spawnattr_setflags: %s\n", strerror(err)); + exit(1); + } + + /* Run Tests */ + for(i=0; i diff --git a/tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/guarded_test.c b/tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/guarded_test.c new file mode 100644 index 000000000..12e522aa4 --- /dev/null +++ b/tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/guarded_test.c @@ -0,0 +1,536 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CONTEXT_VALUE1 0x12345678 +#define CONTEXT_VALUE2 0x11111111 + +char *pname; + +static void usage(void) +{ + printf("usage: %s [test number]\n", pname); + printf("Test 0: Test case for constructing a mach port with options\n"); + printf("Test 1: Test case for destructing guarded mach port\n"); + printf("Test 2: Test case for destroying guarded mach port\n"); + printf("Test 3: Test case for mod_ref() guarded mach port\n"); + printf("Test 4: Test case for guarding mach port\n"); + printf("Test 5: Test case for unguarding mach port\n"); + printf("Test 6: Test case for unguarding a non-guarded port\n"); + printf("Test 7: Test case for guarding a mach port with context\n"); + printf("Test 8: Test case for mach_port_get_context()\n"); + printf("Test 9: Test case for mach_port_set_context()\n"); +} + +/* Test case for constructing a mach port with options */ +void construct_mach_port(); +/* Test case for destructing guarded mach port */ +void destruct_guarded_mach_port(); +/* Test case for destroying guarded mach port */ +void destroy_guarded_mach_port(); +/* Test case for mod_ref() guarded mach port */ +void mod_ref_guarded_mach_port(); +/* Test case for guarding mach port */ +void guard_mach_port(); +/* Test case for unguarding mach port */ +void unguard_mach_port(); +/* Test case for unguarding a non-guarded port */ +void unguard_nonguarded_mach_port(); +/* Test case for guarding a mach port with context */ +void guard_port_with_context(); +/* Test case for mach_port_get_context() */ +void get_context_mach_port(); +/* Test case for mach_port_set_context() */ +void set_context_mach_port(); + +int main(int argc, char *argv[]) +{ + int option, fd; + + pname = argv[0]; + if (argc != 2) { + usage(); + exit(1); + } + printf("Test Program invoked with option [%s]\n", argv[1]); + option = atoi(argv[1]); + + + switch(option) { + + case 0: + construct_mach_port(); + break; + case 1: + destruct_guarded_mach_port(); + break; + case 2: + destroy_guarded_mach_port(); + break; + case 3: + mod_ref_guarded_mach_port(); + break; + case 4: + guard_mach_port(); + break; + case 5: + unguard_mach_port(); + break; + case 6: + unguard_nonguarded_mach_port(); + break; + case 7: + guard_port_with_context(); + break; + case 8: + get_context_mach_port(); + break; + case 9: + set_context_mach_port(); + break; + default: + usage(); + exit(1); + } + + return 0; +} + +void construct_mach_port() +{ + mach_port_t port; + mach_port_options_t options; + mach_port_context_t gval = CONTEXT_VALUE1; + mach_port_context_t g; + int kret; + + printf("Testing All mach_port_construct() options...\n"); + + printf("No options specified: "); + options.flags = 0; + kret = mach_port_construct(mach_task_self(), &options, 0, &port); + if (kret == KERN_SUCCESS) + printf("[PASSED]\n"); + else + goto failed; + + printf("Options MPO_GUARD: "); + options.flags = MPO_CONTEXT_AS_GUARD; + kret = mach_port_construct(mach_task_self(), &options, gval, &port); + if (kret == KERN_SUCCESS) + printf("[PASSED]\n"); + else + goto failed; + + + printf("Options MPO_GUARD|MPO_STRICT: "); + options.flags = MPO_CONTEXT_AS_GUARD|MPO_STRICT; + kret = mach_port_construct(mach_task_self(), &options, gval, &port); + if (kret == KERN_SUCCESS) { + kret = mach_port_get_context(mach_task_self(), port, &g); + if (kret != KERN_SUCCESS || g != 0) + goto failed; + else + printf("[PASSED]\n"); + } + else + goto failed; + + printf("Options MPO_QLIMIT: "); + options.flags = MPO_QLIMIT; + mach_port_limits_t limits = { MACH_PORT_QLIMIT_SMALL }; + options.mpl = limits; + kret = mach_port_construct(mach_task_self(), &options, 0, &port); + if (kret == KERN_SUCCESS) + printf("[PASSED]\n"); + else + goto failed; + + printf("Options MPO_TEMPOWNER: "); + options.flags = MPO_TEMPOWNER; + kret = mach_port_construct(mach_task_self(), &options, 0, &port); + if (kret == KERN_SUCCESS) + printf("[PASSED]\n"); + else + goto failed; + + printf("Options MPO_IMPORTANCE_RECEIVER: "); + options.flags = MPO_IMPORTANCE_RECEIVER; + kret = mach_port_construct(mach_task_self(), &options, 0, &port); + if (kret == KERN_SUCCESS) + printf("[PASSED]\n"); + else + goto failed; + + printf("Options MPO_INSERT_SEND_RIGHT: "); + options.flags = MPO_INSERT_SEND_RIGHT; + kret = mach_port_construct(mach_task_self(), &options, 0, &port); + if (kret == KERN_SUCCESS) + printf("[PASSED]\n"); + else + goto failed; + + printf("No options specified (Construct Port-Set): "); + options.flags = 0; + kret = mach_port_construct(mach_task_self(), &options, 0, &port); + if (kret == KERN_SUCCESS) + printf("[PASSED]\n"); + else + goto failed; + + printf("...Complete\n"); + return; + +failed: + printf("[FAILED %d]\n", kret); + exit(1); +} + +void destruct_guarded_mach_port() +{ + mach_port_t port; + mach_port_options_t options; + mach_port_context_t gval = CONTEXT_VALUE1; + int kret; + + printf("Destructing guarded mach port with correct guard: "); + options.flags = (MPO_CONTEXT_AS_GUARD); + + kret = mach_port_construct(mach_task_self(), &options, gval, &port); + if (kret != KERN_SUCCESS) + exit(1); + + kret = mach_port_destruct(mach_task_self(), port, 0, gval); + if (kret == KERN_SUCCESS) + printf("[PASSED]\n"); + else + goto failed; + + printf("Destructing guarded mach ports with incorrect send right count: "); + options.flags = (MPO_CONTEXT_AS_GUARD); + + kret = mach_port_construct(mach_task_self(), &options, gval, &port); + if (kret != KERN_SUCCESS) + exit(1); + + kret = mach_port_destruct(mach_task_self(), port, -1, gval); + if (kret != KERN_SUCCESS) + printf("[PASSED]\n"); + else + goto failed; + + printf("Destructing guarded mach ports with correct send right and correct guard: "); + options.flags = (MPO_CONTEXT_AS_GUARD|MPO_INSERT_SEND_RIGHT); + + kret = mach_port_construct(mach_task_self(), &options, gval, &port); + if (kret != KERN_SUCCESS) + exit(1); + + kret = mach_port_destruct(mach_task_self(), port, -1, gval); + if (kret == KERN_SUCCESS) + printf("[PASSED]\n"); + else + goto failed; + + printf("Destructing guarded mach port with incorrect guard (Expecting exception)...\n"); + kret = mach_port_construct(mach_task_self(), &options, gval, &port); + if (kret != KERN_SUCCESS) + exit(1); + + kret = mach_port_destruct(mach_task_self(), port, 0, 0); + if (kret == KERN_SUCCESS) + goto failed; + return; + +failed: + printf("[FAILED]\n"); + exit(1); + +} + +void destroy_guarded_mach_port() +{ + mach_port_t port; + mach_port_options_t options; + mach_port_context_t gval = CONTEXT_VALUE1; + int kret; + + printf("Destroying guarded mach port (Expecting exception)...\n"); + options.flags = (MPO_CONTEXT_AS_GUARD); + + kret = mach_port_construct(mach_task_self(), &options, gval, &port); + if (kret != KERN_SUCCESS) + exit(1); + + kret = mach_port_destroy(mach_task_self(), port); + if (kret == KERN_SUCCESS) { + printf("[FAILED]\n"); + exit(1); + } + + return; +} + +void mod_ref_guarded_mach_port() +{ + mach_port_t port; + mach_port_options_t options; + mach_port_context_t gval = CONTEXT_VALUE1; + int kret; + + printf("mach_port_mod_refs() guarded mach port (Expecting exception)...\n"); + options.flags = (MPO_CONTEXT_AS_GUARD); + + kret = mach_port_construct(mach_task_self(), &options, gval, &port); + if (kret != KERN_SUCCESS) + exit(1); + + kret = mach_port_mod_refs(mach_task_self(), port, MACH_PORT_RIGHT_RECEIVE, -1); + if (kret == KERN_SUCCESS) { + printf("[FAILED]\n"); + exit(1); + } + + return; +} + +void guard_mach_port() +{ + mach_port_t port; + mach_port_options_t options; + mach_port_context_t gval = CONTEXT_VALUE1; + int kret; + + printf("Testing guarding a non-guarded mach port: "); + kret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &port); + if (kret != KERN_SUCCESS) + exit(1); + + kret = mach_port_guard(mach_task_self(), port, gval, 0); + if (kret == KERN_SUCCESS) + printf("[PASSED]\n"); + else + goto failed; + + printf("Testing guarding a guarded mach port: "); + kret = mach_port_guard(mach_task_self(), port, CONTEXT_VALUE2, 0); + if (kret != KERN_SUCCESS) + printf("[PASSED]\n"); + else + goto failed; + + return; + +failed: + printf("[FAILED]\n"); + exit(1); + +} + +void unguard_mach_port() +{ + mach_port_t port; + mach_port_options_t options; + mach_port_context_t gval = CONTEXT_VALUE1; + int kret; + + printf("Testing unguard with correct guard: \n"); + + options.flags = (MPO_CONTEXT_AS_GUARD); + + kret = mach_port_construct(mach_task_self(), &options, gval, &port); + if (kret != KERN_SUCCESS) + exit(1); + + kret = mach_port_unguard(mach_task_self(), port, gval); + if (kret == KERN_SUCCESS) + printf("[PASSED]\n"); + else + goto failed; + + printf("Testing unguard with incorrect guard (Expecting Exception)... \n"); + kret = mach_port_construct(mach_task_self(), &options, gval, &port); + if (kret != KERN_SUCCESS) + exit(1); + kret = mach_port_unguard(mach_task_self(), port, CONTEXT_VALUE2); + if (kret == KERN_SUCCESS) + goto failed; + + return; + +failed: + printf("[FAILED]\n"); + exit(1); + +} + +void unguard_nonguarded_mach_port() +{ + mach_port_t port; + mach_port_options_t options; + mach_port_context_t gval = CONTEXT_VALUE1; + int kret; + + printf("Testing mach_port_unguard() for non-guarded port (Expecting exception)...\n"); + + options.flags = 0; + + kret = mach_port_construct(mach_task_self(), &options, 0, &port); + if (kret != KERN_SUCCESS) + exit(1); + kret = mach_port_unguard(mach_task_self(), port, gval); + if (kret == KERN_SUCCESS) { + printf("[FAILED]\n"); + exit(1); + } + + return; +} + +void guard_port_with_context() +{ + mach_port_t port; + mach_port_options_t options; + mach_port_context_t gval = CONTEXT_VALUE1; + int kret; + + printf("Testing mach_port_guard() for a port with context: "); + kret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &port); + if (kret != KERN_SUCCESS) + exit(1); + + kret = mach_port_set_context(mach_task_self(), port, gval); + if (kret != KERN_SUCCESS) + exit(1); + kret = mach_port_guard(mach_task_self(), port, gval, 0); + if (kret != KERN_SUCCESS) + printf("[PASSED]\n"); + else { + printf("[FAILED]\n"); + exit(1); + } + return; +} + +void get_context_mach_port() +{ + mach_port_t port; + mach_port_options_t options; + mach_port_context_t gval = CONTEXT_VALUE1; + mach_port_context_t g; + int kret; + + options.flags = (MPO_CONTEXT_AS_GUARD); + + printf("Testing get_context() for non-strict guarded port: "); + + kret = mach_port_construct(mach_task_self(), &options, gval, &port); + if (kret != KERN_SUCCESS) + exit(1); + + kret = mach_port_get_context(mach_task_self(), port, &g); + if (kret != KERN_SUCCESS || g != gval) + goto failed; + else + printf("[PASSED]\n"); + + printf("Testing get_context() for strict guarded port: "); + options.flags = (MPO_CONTEXT_AS_GUARD|MPO_STRICT); + + kret = mach_port_construct(mach_task_self(), &options, gval, &port); + if (kret != KERN_SUCCESS) + exit(1); + + kret = mach_port_get_context(mach_task_self(), port, &g); + if (kret != KERN_SUCCESS || g != 0) + goto failed; + else + printf("[PASSED]\n"); + + printf("Testing get_context() for strict guard port (guarded using mach_port_guard): "); + kret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &port); + if (kret != KERN_SUCCESS) + exit(1); + kret = mach_port_guard(mach_task_self(), port, gval, 1); + if (kret != KERN_SUCCESS) + exit(1); + kret = mach_port_get_context(mach_task_self(), port, &g); + if (kret != KERN_SUCCESS || g != 0) + goto failed; + else + printf("[PASSED]\n"); + + printf("Testing get_context() for non-guarded port with context: "); + kret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &port); + if (kret != KERN_SUCCESS) + exit(1); + kret = mach_port_set_context(mach_task_self(), port, gval); + if (kret != KERN_SUCCESS) + exit(1); + kret = mach_port_get_context(mach_task_self(), port, &g); + if (kret != KERN_SUCCESS || g != gval) + goto failed; + else + printf("[PASSED]\n"); + + return; + + +failed: + printf("[FAILED]\n"); + exit(1); +} + +void set_context_mach_port() +{ + mach_port_t port; + mach_port_options_t options; + mach_port_context_t gval = CONTEXT_VALUE1; + mach_port_context_t g; + int kret; + + printf("Testing set_context() with non-guarded port: "); + kret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &port); + if (kret != KERN_SUCCESS) + exit(1); + kret = mach_port_set_context(mach_task_self(), port, gval); + if (kret != KERN_SUCCESS) + goto failed; + else + printf("[PASSED]\n"); + + printf("Testing setting context on non-guarded port with pre-existing context: "); + kret = mach_port_set_context(mach_task_self(), port, CONTEXT_VALUE2); + if (kret != KERN_SUCCESS) + goto failed; + else + printf("[PASSED]\n"); + + printf("Testing setting context on strict guarded port (Expecting Exception)...\n"); + + options.flags = (MPO_CONTEXT_AS_GUARD|MPO_STRICT); + + kret = mach_port_construct(mach_task_self(), &options, gval, &port); + if (kret != KERN_SUCCESS) + exit(1); + + kret = mach_port_set_context(mach_task_self(), port, CONTEXT_VALUE2); + if (kret == KERN_SUCCESS) + goto failed; + + return; + +failed: + printf("[FAILED]\n"); + exit(1); +} + + + + diff --git a/tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/guarded_test_framework.c b/tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/guarded_test_framework.c new file mode 100644 index 000000000..15a7d6702 --- /dev/null +++ b/tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/guarded_test_framework.c @@ -0,0 +1,255 @@ +/* + * Testing Framework for EXC_GUARD exceptions + * + * The framework tests for exception conditions for guarded mach ports. + * It creates a new exception port and an associated handling thread. + * For each test case, the framework sets its own exception port to the + * newly allocated port, execs a new child (which inherits the new + * exception port) and restores the parent's exception port to the + * original handler. The child process is invoked with a different + * test case identifier and invokes the corresponding test case. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_TEST_ID_LEN 16 +#define MAX_ARGV 8 +#define EXC_CODE_SHIFT 32 +#define EXC_GUARD_TYPE_SHIFT 29 + +/* + * To add a new test case to this framework: + * - Increment the NUMTESTS value + * - Add (Guard Type | flavor) to "test_exception_code" if the + * test case generates an exception; 0 otherwise + * - Add a new case and routine in guarded_test.c to + * test the scenario + */ + +#define NUMTESTS 10 + +uint64_t test_exception_code[] = { + 0, + (GUARD_TYPE_MACH_PORT << EXC_GUARD_TYPE_SHIFT) | kGUARD_EXC_DESTROY, + (GUARD_TYPE_MACH_PORT << EXC_GUARD_TYPE_SHIFT) | kGUARD_EXC_DESTROY, + (GUARD_TYPE_MACH_PORT << EXC_GUARD_TYPE_SHIFT) | kGUARD_EXC_MOD_REFS, + 0, + (GUARD_TYPE_MACH_PORT << EXC_GUARD_TYPE_SHIFT) | kGUARD_EXC_INCORRECT_GUARD, + (GUARD_TYPE_MACH_PORT << EXC_GUARD_TYPE_SHIFT) | kGUARD_EXC_UNGUARDED, + 0, + 0, + (GUARD_TYPE_MACH_PORT << EXC_GUARD_TYPE_SHIFT) | kGUARD_EXC_SET_CONTEXT +}; + +mach_port_t exc_port; +uint64_t exception_code; +extern char **environ; + +boolean_t mach_exc_server( + mach_msg_header_t *InHeadP, + mach_msg_header_t *OutHeadP); + +kern_return_t catch_mach_exception_raise +( + mach_port_t exception_port, + mach_port_t thread, + mach_port_t task, + exception_type_t exception, + mach_exception_data_t code, + mach_msg_type_number_t codeCnt, + int *flavor, + thread_state_t old_state, + mach_msg_type_number_t old_stateCnt, + thread_state_t new_state, + mach_msg_type_number_t *new_stateCnt + ) +{ + if (exception == EXC_GUARD) { + /* Set global variable to indicate exception received */ + exception_code = *((uint64_t *)code); + } else { + /* Terminate test on all other unexpected exceptions */ + fprintf(stderr, "received unexpected exception type %#x\n", exception); + exit(1); + } + + return (KERN_SUCCESS); +} + +kern_return_t catch_mach_exception_raise_state +( + mach_port_t exception_port, + exception_type_t exception, + const mach_exception_data_t code, + mach_msg_type_number_t codeCnt, + int *flavor, + const thread_state_t old_state, + mach_msg_type_number_t old_stateCnt, + thread_state_t new_state, + mach_msg_type_number_t *new_stateCnt + ) +{ + fprintf(stderr, "Unexpected exception handler called\n"); + exit(1); + return (KERN_FAILURE); +} + + +kern_return_t catch_mach_exception_raise_state_identity +( + mach_port_t exception_port, + mach_port_t thread, + mach_port_t task, + exception_type_t exception, + mach_exception_data_t code, + mach_msg_type_number_t codeCnt + ) +{ + fprintf(stderr, "Unexpected exception handler called\n"); + exit(1); + return (KERN_FAILURE); +} + + +void *server_thread(void *arg) +{ + kern_return_t kr; + + while(1) { + /* Handle exceptions on exc_port */ + if ((kr = mach_msg_server_once(mach_exc_server, 4096, exc_port, 0)) != KERN_SUCCESS) { + fprintf(stderr, "mach_msg_server_once: error %#x\n", kr); + exit(1); + } + } + return (NULL); +} + +int main(int argc, char *argv[]) +{ + posix_spawnattr_t attrs; + kern_return_t kr; + mach_port_t task = mach_task_self(); + + mach_msg_type_number_t maskCount = 1; + exception_mask_t mask; + exception_handler_t handler; + exception_behavior_t behavior; + thread_state_flavor_t flavor; + pthread_t exception_thread; + uint64_t exc_id; + unsigned int exc_fd; + + char *test_prog_name = "./guarded_mp_test"; + char *child_args[MAX_ARGV]; + char test_id[MAX_TEST_ID_LEN]; + int i, err; + int child_status; + int test_status = 0; + + /* Allocate and initialize new exception port */ + if ((kr = mach_port_allocate(task, MACH_PORT_RIGHT_RECEIVE, &exc_port)) != KERN_SUCCESS) { + fprintf(stderr, "mach_port_allocate: %#x\n", kr); + exit(1); + } + + if ((kr = mach_port_insert_right(task, exc_port, + exc_port, MACH_MSG_TYPE_MAKE_SEND)) != KERN_SUCCESS) { + fprintf(stderr, "mach_port_allocate: %#x\n", kr); + exit(1); + } + + /* Get Current exception ports */ + if ((kr = task_get_exception_ports(task, EXC_MASK_GUARD, &mask, + &maskCount, &handler, &behavior, &flavor)) != KERN_SUCCESS) { + fprintf(stderr,"task_get_exception_ports: %#x\n", kr); + exit(1); + } + + /* Create exception serving thread */ + if ((err = pthread_create(&exception_thread, NULL, server_thread, 0)) != 0) { + fprintf(stderr, "pthread_create server_thread: %s\n", strerror(err)); + exit(1); + } + + pthread_detach(exception_thread); + + /* Initialize posix_spawn attributes */ + posix_spawnattr_init(&attrs); + + if ((err = posix_spawnattr_setflags(&attrs, POSIX_SPAWN_SETEXEC)) != 0) { + fprintf(stderr, "posix_spawnattr_setflags: %s\n", strerror(err)); + exit(1); + } + + /* Run Tests */ + for(i=0; i> EXC_CODE_SHIFT); + printf("EXC_GUARD Received: "); + (exc_id != 0)?printf("Yes (Code 0x%llx)\n", exception_code):printf("No\n"); + printf("Expected Exception Code: 0x%llx\n", test_exception_code[i]); + printf("Test Result: "); + if((WIFEXITED(child_status) && WEXITSTATUS(child_status)) || + (exc_id != test_exception_code[i])) { + test_status = 1; + printf("FAILED\n"); + } + else { + printf("PASSED\n"); + } + printf("-------------------\n"); + + } + + exit(test_status); +} + + diff --git a/osfmk/i386/bzero.s b/tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/mach_exc.defs similarity index 71% rename from osfmk/i386/bzero.s rename to tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/mach_exc.defs index cb6a0536b..4b6cc647b 100644 --- a/osfmk/i386/bzero.s +++ b/tools/tests/unit_tests/guarded_mach_port_tests_11178535_src/mach_exc.defs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -25,12 +25,12 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ - /* +/* * @OSF_COPYRIGHT@ */ /* * Mach Operating System - * Copyright (c) 1991,1990 Carnegie Mellon University + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University * All Rights Reserved. * * Permission to use, copy, modify and distribute this software and its @@ -53,58 +53,5 @@ * any improvements or extensions that they make and grant Carnegie Mellon * the rights to redistribute these changes. */ -/* - */ - -#include - -/* - * void *memset(void * addr, int pattern, size_t length) - */ - -ENTRY(memset) - pushl %edi - movl 4+ 4(%esp),%edi /* addr */ - movb 4+ 8(%esp),%al /* pattern */ - movl 4+ 12(%esp),%edx /* length */ - movb %al,%ah - movw %ax,%cx - shll $16,%eax - movw %cx,%ax - cld -/* zero longs */ - movl %edx,%ecx - shrl $2,%ecx - rep - stosl -/* zero bytes */ - movl %edx,%ecx - andl $3,%ecx - rep - stosb - movl 4+ 4(%esp),%eax /* returns its first argument */ - popl %edi - ret -/* - * void bzero(char * addr, size_t length) - */ -Entry(blkclr) -ENTRY2(bzero,__bzero) - pushl %edi - movl 4+ 4(%esp),%edi /* addr */ - movl 4+ 8(%esp),%edx /* length */ - xorl %eax,%eax - cld -/* zero longs */ - movl %edx,%ecx - shrl $2,%ecx - rep - stosl -/* zero bytes */ - movl %edx,%ecx - andl $3,%ecx - rep - stosb - popl %edi - ret +#include diff --git a/tools/tests/unit_tests/libproc_privilege_test_13203438_src/libproc_privilege_test_13203438.c b/tools/tests/unit_tests/libproc_privilege_test_13203438_src/libproc_privilege_test_13203438.c new file mode 100644 index 000000000..65f876b70 --- /dev/null +++ b/tools/tests/unit_tests/libproc_privilege_test_13203438_src/libproc_privilege_test_13203438.c @@ -0,0 +1,127 @@ +/* + * Unit test to verify that PROC_PIDUNIQIDENTIFIERINFO is an unprivilege operation. + * + * Test calls PROC_PIDTBSDINFO, PROC_PIDTASKINFO, PROC_PIDT_SHORTBSDINFO, PROC_PIDUNIQIDENTIFIERINFO on the process + * as well as on launchd to verify that PROC_PIDT_SHORTBSDINFO and PROC_PIDUNIQIDENTIFIERINFO are unpirivilege + * operations while PROC_PIDTBSDINFO and PROC_PIDTASKINFO are privelege ones. + */ + +#include +#include +#include +#include +#include +#include + + +#define TEST_PASS 1 +#define TEST_FAIL 0 + +int +bsdinfo_test(int pid, int result) +{ + struct proc_bsdinfo bsdinfo; + int error; + + + error = proc_pidinfo(pid, PROC_PIDTBSDINFO, 0, &bsdinfo, sizeof(bsdinfo)); + if ((error > 0 && result == TEST_PASS) || (error <= 0 && result == TEST_FAIL)) { + printf("[PASS]: Privilege test on pid = %d for PROC_PIDTBSDINFO passed\n", pid); + return 0; + } else { + printf("[FAIL]: Privilege test on pid = %d for PROC_PIDTBSDINFO failed\n", pid); + return 1; + } + +} + +int +taskinfo_test(int pid, int result) +{ + struct proc_taskinfo taskinfo; + int error; + + + error = proc_pidinfo(pid, PROC_PIDTASKINFO, 0, &taskinfo, sizeof(taskinfo)); + if ((error > 0 && result == TEST_PASS) || (error <= 0 && result == TEST_FAIL)) { + printf("[PASS]: Privilege test on pid = %d for PROC_PIDTASKINFO passed\n", pid); + return 0; + } else { + printf("[FAIL] Privilege test on pid = %d for PROC_PIDTASKINFO failed\n", pid); + return 1; + } +} + +int +bsdshortinfo_test(int pid, int result) +{ + struct proc_bsdshortinfo bsdshortinfo; + int error; + + + error = proc_pidinfo(pid, PROC_PIDT_SHORTBSDINFO, 0, &bsdshortinfo, sizeof(bsdshortinfo)); + if ((error > 0 && result == TEST_PASS) || (error <= 0 && result == TEST_FAIL)) { + printf("[PASS]: Privilege test on pid = %d for PROC_PIDT_SHORTBSDINFO passed\n", pid); + return 0; + } else { + printf("[FAIL]: Privilege test on pid = %d for PROC_PIDT_SHORTBSDINFO failed\n", pid); + return 1; + } +} + + +int +piduniqid_test(int pid, int result) +{ + struct proc_uniqidentifierinfo uniqidinfo; + int error; + + + error = proc_pidinfo(pid, PROC_PIDUNIQIDENTIFIERINFO, 0, &uniqidinfo, sizeof(uniqidinfo)); + if ((error > 0 && result == TEST_PASS) || (error <= 0 && result == TEST_FAIL)) { + printf("[PASS]: Privilege test on pid = %d for PROC_PIDUNIQIDENTIFIERINFO passed\n", pid); + return 0; + } else { + printf("[FAIL]: Privilege test on pid = %d for PROC_PIDUNIQIDENTIFIERINFO failed\n", pid); + return 1; + } + +} + + +int main() +{ + int selfpid, launchdpid; + + selfpid = getpid(); + launchdpid = 1; + + if (bsdinfo_test(selfpid, TEST_PASS)) + goto fail; + if (bsdinfo_test(launchdpid, TEST_FAIL)) + goto fail; + + if (taskinfo_test(selfpid, TEST_PASS)) + goto fail; + if (taskinfo_test(launchdpid, TEST_FAIL)) + goto fail; + + if (bsdshortinfo_test(selfpid, TEST_PASS)) + goto fail; + if (bsdshortinfo_test(launchdpid, TEST_PASS)) + goto fail; + + if (piduniqid_test(selfpid, TEST_PASS)) + goto fail; + if (piduniqid_test(launchdpid, TEST_PASS)) + goto fail; + + + printf("Privilege test for libproc passed [PASS] \n"); + return 0; + +fail: + printf("Privilege test for libproc failed [FAIL] \n"); + return 1; +} + diff --git a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress-Entitlements.plist b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress-Entitlements.plist new file mode 100644 index 000000000..a5398e575 --- /dev/null +++ b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress-Entitlements.plist @@ -0,0 +1,8 @@ + + + + + com.apple.private.kernel.override-cpumon + + + diff --git a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.pbxproj b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.pbxproj new file mode 100644 index 000000000..0aa6e053d --- /dev/null +++ b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.pbxproj @@ -0,0 +1,324 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 46; + objects = { + +/* Begin PBXBuildFile section */ + 15181D991683B73E0002FB18 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 15181D981683B73E0002FB18 /* Foundation.framework */; }; + 15181D9E1683B73E0002FB18 /* monitor_stress.m in Sources */ = {isa = PBXBuildFile; fileRef = 15181D9D1683B73E0002FB18 /* monitor_stress.m */; }; + 15181DA91683B7550002FB18 /* monitor_stress.m in Sources */ = {isa = PBXBuildFile; fileRef = 15181D9D1683B73E0002FB18 /* monitor_stress.m */; }; + 15181DAB1683B7550002FB18 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 15181D981683B73E0002FB18 /* Foundation.framework */; }; +/* End PBXBuildFile section */ + +/* Begin PBXCopyFilesBuildPhase section */ + 15181D931683B73E0002FB18 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; + 15181DAC1683B7550002FB18 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ + 15181D951683B73E0002FB18 /* monitor_stress */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = monitor_stress; sourceTree = BUILT_PRODUCTS_DIR; }; + 15181D981683B73E0002FB18 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; }; + 15181D9C1683B73E0002FB18 /* AspenFamily.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; name = AspenFamily.xcconfig; path = AppleInternal/XcodeConfig/AspenFamily.xcconfig; sourceTree = DEVELOPER_DIR; }; + 15181D9D1683B73E0002FB18 /* monitor_stress.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = monitor_stress.m; sourceTree = ""; }; + 15181DB11683B7550002FB18 /* monitor_stress */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = monitor_stress; sourceTree = BUILT_PRODUCTS_DIR; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 15181D921683B73E0002FB18 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 15181D991683B73E0002FB18 /* Foundation.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 15181DAA1683B7550002FB18 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 15181DAB1683B7550002FB18 /* Foundation.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 15181D8C1683B73E0002FB18 = { + isa = PBXGroup; + children = ( + 15181D9A1683B73E0002FB18 /* monitor_stress */, + 15181D971683B73E0002FB18 /* Frameworks */, + 15181D961683B73E0002FB18 /* Products */, + ); + sourceTree = ""; + }; + 15181D961683B73E0002FB18 /* Products */ = { + isa = PBXGroup; + children = ( + 15181D951683B73E0002FB18 /* monitor_stress */, + 15181DB11683B7550002FB18 /* monitor_stress */, + ); + name = Products; + sourceTree = ""; + }; + 15181D971683B73E0002FB18 /* Frameworks */ = { + isa = PBXGroup; + children = ( + 15181D981683B73E0002FB18 /* Foundation.framework */, + ); + name = Frameworks; + sourceTree = ""; + }; + 15181D9A1683B73E0002FB18 /* monitor_stress */ = { + isa = PBXGroup; + children = ( + 15181D9D1683B73E0002FB18 /* monitor_stress.m */, + 15181D9B1683B73E0002FB18 /* Supporting Files */, + ); + path = monitor_stress; + sourceTree = ""; + }; + 15181D9B1683B73E0002FB18 /* Supporting Files */ = { + isa = PBXGroup; + children = ( + 15181D9C1683B73E0002FB18 /* AspenFamily.xcconfig */, + ); + name = "Supporting Files"; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 15181D941683B73E0002FB18 /* ios */ = { + isa = PBXNativeTarget; + buildConfigurationList = 15181DA41683B73E0002FB18 /* Build configuration list for PBXNativeTarget "ios" */; + buildPhases = ( + 15181D911683B73E0002FB18 /* Sources */, + 15181D921683B73E0002FB18 /* Frameworks */, + 15181D931683B73E0002FB18 /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = ios; + productName = monitor_stress; + productReference = 15181D951683B73E0002FB18 /* monitor_stress */; + productType = "com.apple.product-type.tool"; + }; + 15181DA71683B7550002FB18 /* osx */ = { + isa = PBXNativeTarget; + buildConfigurationList = 15181DAE1683B7550002FB18 /* Build configuration list for PBXNativeTarget "osx" */; + buildPhases = ( + 15181DA81683B7550002FB18 /* Sources */, + 15181DAA1683B7550002FB18 /* Frameworks */, + 15181DAC1683B7550002FB18 /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = osx; + productName = monitor_stress; + productReference = 15181DB11683B7550002FB18 /* monitor_stress */; + productType = "com.apple.product-type.tool"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 15181D8D1683B73E0002FB18 /* Project object */ = { + isa = PBXProject; + attributes = { + LastUpgradeCheck = 0460; + ORGANIZATIONNAME = apple; + }; + buildConfigurationList = 15181D901683B73E0002FB18 /* Build configuration list for PBXProject "monitor_stress" */; + compatibilityVersion = "Xcode 3.2"; + developmentRegion = English; + hasScannedForEncodings = 0; + knownRegions = ( + en, + ); + mainGroup = 15181D8C1683B73E0002FB18; + productRefGroup = 15181D961683B73E0002FB18 /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 15181DA71683B7550002FB18 /* osx */, + 15181D941683B73E0002FB18 /* ios */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXSourcesBuildPhase section */ + 15181D911683B73E0002FB18 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 15181D9E1683B73E0002FB18 /* monitor_stress.m in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 15181DA81683B7550002FB18 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 15181DA91683B7550002FB18 /* monitor_stress.m in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 15181DA21683B73E0002FB18 /* Debug */ = { + isa = XCBuildConfiguration; + baseConfigurationReference = 15181D9C1683B73E0002FB18 /* AspenFamily.xcconfig */; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_DYNAMIC_NO_PIC = NO; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_STRICT_ALIASING = YES; + GCC_SYMBOLS_PRIVATE_EXTERN = NO; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 7.0; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = iphoneos.internal; + }; + name = Debug; + }; + 15181DA31683B73E0002FB18 /* Release */ = { + isa = XCBuildConfiguration; + baseConfigurationReference = 15181D9C1683B73E0002FB18 /* AspenFamily.xcconfig */; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_STRICT_ALIASING = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 7.0; + SDKROOT = iphoneos.internal; + VALIDATE_PRODUCT = YES; + }; + name = Release; + }; + 15181DA51683B73E0002FB18 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + CODE_SIGN_ENTITLEMENTS = "monitor_stress-Entitlements.plist"; + CODE_SIGN_IDENTITY = "-"; + PRODUCT_NAME = "$(PROJECT_NAME)"; + PROVISIONING_PROFILE = ""; + }; + name = Debug; + }; + 15181DA61683B73E0002FB18 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CODE_SIGN_ENTITLEMENTS = "monitor_stress-Entitlements.plist"; + CODE_SIGN_IDENTITY = "-"; + PRODUCT_NAME = "$(PROJECT_NAME)"; + PROVISIONING_PROFILE = ""; + }; + name = Release; + }; + 15181DAF1683B7550002FB18 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_ENABLE_OBJC_ARC = NO; + PRODUCT_NAME = "$(PROJECT_NAME)"; + SDKROOT = macosx.internal; + SUPPORTED_PLATFORMS = macosx; + }; + name = Debug; + }; + 15181DB01683B7550002FB18 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_ENABLE_OBJC_ARC = NO; + PRODUCT_NAME = "$(PROJECT_NAME)"; + SDKROOT = macosx.internal; + SUPPORTED_PLATFORMS = macosx; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 15181D901683B73E0002FB18 /* Build configuration list for PBXProject "monitor_stress" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 15181DA21683B73E0002FB18 /* Debug */, + 15181DA31683B73E0002FB18 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 15181DA41683B73E0002FB18 /* Build configuration list for PBXNativeTarget "ios" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 15181DA51683B73E0002FB18 /* Debug */, + 15181DA61683B73E0002FB18 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 15181DAE1683B7550002FB18 /* Build configuration list for PBXNativeTarget "osx" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 15181DAF1683B7550002FB18 /* Debug */, + 15181DB01683B7550002FB18 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 15181D8D1683B73E0002FB18 /* Project object */; +} diff --git a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.xcworkspace/contents.xcworkspacedata new file mode 100644 index 000000000..99f7ee1f3 --- /dev/null +++ b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,7 @@ + + + + + diff --git a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.xcworkspace/xcuserdata/rab.xcuserdatad/UserInterfaceState.xcuserstate b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/project.xcworkspace/xcuserdata/rab.xcuserdatad/UserInterfaceState.xcuserstate new file mode 100644 index 0000000000000000000000000000000000000000..ea729bdfb8023c559d96edc98223a3f8b3dc5e48 GIT binary patch literal 87338 zcmdqK1$Z0B^9FpocRD`Zg|UspX~PVq7-Yva*0RH?>ex!+#<5*Pnlx#9Q)Xril$n|7 zSB8|CnVH*f=cJP*S&}8&{eR!{wS5{znt6A3c6XNFS=m%y*U~y=%1a#KD93XG=i$U& z{d)CZJZgQgxuveLVezQOmHP*4T3eQks&1}XQ^&rp9M#&`)SDwmZMnXea1u3{gq3h{Jx|tqI52HuYW9aepBzh`6gPu*#qZiUGbUVF_UP-T^ z*VCKmt@I9hH@%NONFSm9qfgRj==1bN`U-uWzD3`qAJC8KXY@<@4gH?}M1Q4!(7$<( z=XsG=c%3)--h4lPAU}lPnIF!N;>Ys4@)P+f{B(X6KbPN~-;2-Tef$Exh%e>$;Vbz? z{8D}ezaL-2uj1?YdcKiw=G*uU{3iYo9{3~pqxs|b6Zup4)A_UbbNLJSi}`K*rTi8A z)%>^AMCJWPqnZg`lH(^g9TgVd%guR6l zpnB6oSGUVXe?093Zp`2MPxX2Me}vxNwwktZ;&GvT&Mkrf`mMzHpJS zRk%dBT)0ZOR=7d9S-4HOQ@BUCUwBA(RCru?N_bXyL3l}cRd_>qTX;|SQ20dnT=-h} zPWWExHv)_DefXp6sL=`#XZDau}CZyOT;R1p}0s4i2I4v;!5!#ag(@NJXkzL zJXEwr5Dya%7mpB65Kk0O63-CN5ziGb6)zJn7q1Yn6t5Dm7OxR+6Ymo57Vj4y79SB` z5?>Zy5nmNw6JHnK5Z@Hv7e5id5x*6`6TcUK5dRSWluXHzdP%*dK2l$)pVVI(APtm; zN?Fn>avuADCy%EfY-Tp?G< zi{)kVzH+r(E3c8)%8hc9e29FgY|BT;$I8daXUJ#DXUXTuS1IQ!7b#nnOO(r%tCVY% z8o75I{y}D7|tRAW!rXHytqaLrGq@Jptp`NXtr(URTQMap? zsaLAksMo7Eskf?ksCTRPsSm1;sQ*)+RG(3wS6@_LQD0ZzQr}fSP(M~bQ@>QdQNLGz zQh!zdQ2*9Ajn_m?(R9t!dTaf(f!YvlXKlDPN*k-~s!i0UXw$V>+FWgSZ7(fH^JxpT zBCS;0N2}BpX-l;g+J0J%wo0qh>a|9#S!>faXq&V{G|-OFj@FLTPSj4(PS?)T&ebl^ zF4nebmugpNS8La4H)^+Nw`+H4_i7Jl4{MKUPiRkT&uKffm$lclH??=P_qC6-PqipSUL`bd3@zKcFVpR7;QXX6 zY&}me(D&9$^m5&=FVvUl%k_Z1QV;5NdcA&-zDeJ#AFLmuAFA8>QToyP3HllOnfh7! z+4{x$7JaLJseYAywSJ9$n|`}~hkmDimwuoAtp1$-y#9i|Lw`|!Nqz#Q5C!%J|m!!T8zu&G^&!$4k6|SMsV}!)tl_c>8+?d53z3c}I9h zd&haldnb9PdS`fNd*^xg@Xq(~t@W<%BL5Z}r~cz1w@A z_d)L?-v4=@^giQ#-ut5W74Pfbx4iFqKk$C+{mlEN_Z#o`-k-d`djIhLZE_}Wil$=f zrfK#z`gG&ZIxK%mfu=v zEwPqc0c)idwANT_tp@7=tJONtI>!;SE!I|Rn{|bC zrFE5ci*>7Yn{~T&pY^!)g!QELl=ZarjPYubWxUpp^`!hU~S~#kvzPhDl z8)tA{o7(&~&fPMbb)Znkgg#M#+1^JnBv z$)7nRcSgYKTbSot)Y!bXrK!3mnAcd-wl3Jv>ZaVE8^(>hf*ZgM}kH~bC`u@PMtG-;*9Cj=S-YCGdpi$ zes13ExwB{crp%s^6R?Icfb5!9Hf>Fu-RRs_Ha6C;tZpuBSy2=>W$l&z;266{bkN%TXrjQPi_G>?n-ViZa$aI<#4%N9_Qoo zxdOYl-N){0_p|%k1MGqJAbaqYTp_E%BCeP#;Y!)3a&8}c2&=-KfSd^AbRbvTLxJ1| zWFNa|87memP;vFjV12-{*x&qX8V@Y3USGGmy0x)+1^dERTgN_SHwUW&)|+tgwz zp{lj6zOHp6iuu_}qIF?i@W2pY1S|v*Q|&PjdBK*t)eQ*{FiRo)x?l^oL#CErS05~| zZe0_w`e(Pau$4=7Lu)9fEydLv8{1l0m91-Rz^n&aTAPC{Et59Xuy)tf+_=Bu+BCqe z!OpQCSIw>DYPec1$gSd5+dJFC>@0h@J;EMokFrN!!8*qN%#QnT>$nE4k!xZdV+@*b zA0VrMYyh$m2-roexR}$t;L5hutAk;S^^aK4Y4v`V&)VwdS~M&QhqAmh^hc;)^kF~c z1y@zK)weoOA+u$LzbePISW7GG7Zt25!4~$Rx|J=?SZC|yrW~n&rS*;}Dp{vaj1+iZ{B${k{l!(5LJr5-k0vIJa!!?+_5;Bb4Ft=y6Ju69vnNp)R=`|GjXiCpjP z+;QCT+zIx0dxAZ2J9iSZ+$r`X$6mGQ*LI0XkeQ@4=w~giVYTxyua0%n#)j%<;m)=v+oqeX^SEu?xXZcoxeK@pxr?}qxh>pQ zdx|~Po@P(CXV^3CS(kI$xl6c9xy#tUS8!L_v+X%Rs(|e0{8tTRrCqcf8(m&?Yjx$u zrr@$r#+)p(wwCRzh^v{eh^-e^Eujv;ekp7Xt}AtX=OEWjC-`lM`|(nFNF~9xjTF@D+cbvh51fnikqcJvL@CNDiU+{Nq=+A_c-??w)7|L`CGZC>}>2Rz48`TMtT=kXG^HXJ;&|X z#y!uyVCUEi?7dxpm$+BC-rKmBxmWC5J8v8J8uz;Gv-9nO6%3}mQkmaie@BN|QoWja zthU-<`N;Xf`5gWe8ZzGF-d|qokMs;SM!d)E<|gwY_eoqbpRr^L?Shf>kD3w%@)h^> zzNP+>wpMJV&QPFi~D;?i&Jx~F)wMYTNi9;tzOr(tTsGXVU5||$1bTrK zJ*(ULHH|H;lNUDDvpG%6eTFM)8|g!(9kw< z+VrW@CbzPoyJd1KOQL0R8?$C0^f7iU2)HFSD21E9`wQBSXnf%<-~+$#6RWgc+BO zSE2tZuq}_wWwW&Erj}rBmeWD9SodxUHn(of%4H6Dbz{i?4syF(tdQfIMULEWA z)f+_aMupHz}6vXCqyi^&qQlq@65$qKSB36T9rHCah&NG%DHRb(|;L+Z%>WG$&D z>qrA>Bu(T1(o9-ND`_L^$$?}8*+>o|o5*HzFgb)AN^An;FmgCKf*eVXB1e;B$g$)& zay&VKoJdY0CzDgispK?rIyr-!NzNi?lXJ+qKfILVZ zA`g>C$fM*j@;~x8d4fDio+3|^XUMbUIr2Ptf$ShJl9$NKH9w=b|Sv@fzRwzt?@?QQmU`x5(7`!f4-`wII?`zrft`x^UN`#Sr2`v&_)`zHHl z`xg6F`!@S_`wsg~`!4%#`yTsV`#$@A`vLnw`yu;b`w{z5`!V}}_T%;w_LKHg_S5z= z_OteL_Ve}&_73|+`z8Bj`xX0D`!)M@`wja|`z`xz`yKmT`#t-8`vdz!`y=~f`xE<9 z`!oA<`wRO^`z!lv`y2aP`#bx4`v?0+`zQNn`xpCH`#1Y{`w#n1`!D-%`ycyXARG|Z zl4zu;e&On9%$pSJQ$Os@Kfs6t&8ps$RV}XnVvI~%1fs6+-0mwukY*IKG$P^$` zflLE39motIGl9$kG8+h+fz1Un56Esnb_cQtkUfFy1!O*uY#=#6a)IOl@d3#PQUGKD zkU}7P11SPh41`T-O6|ROa2g##hti#vuw^$}39vbU%~mqQh%;krWz(fLHjUoAIXf?} zu(GhMG`l$9&nxmfV0;7PJRjs z4k96U9~po)Z}u-KiO#_EBw}Sbdq<1SLb1^a&=zD@U^Xj!xn(8g{y<(~gQ&m`;7g(5GQJ7s?87bB7C_5xxwjeh*u+UfGXEo)5?uCNm;sx`3IaLLL z!cw$UHdbsP^vTuAL9MazS_QsRUqyDMkLA8Bzp%Klv>=dMmgjSoe5f=(UMW)2!qVL0 zsyts{fiF9coj5QV@+Ai^E6;-8y+~RDOvcHmLJU5Wd+T@a@ zzK9WvP;FYgnm;$YG>~1)T5nlZMXoQva_?g~D63#LaB>wfY$@uGCb?{$pXFyOZq_3< zZ^lvv{DlRj0bgllVP&zeg#8&&*#}jICsko9R#aNznyC`SCnOcG@P+eOQH6Od&#qkH zYA!;}QE{4Ol?!|oPCbNjm|vV-5Xst7)EGeWf=%^}8*%50HRsBT!txlQ6(}?`Mku?u z(pOQ6Rb5a~R#on1V82w7Ws6wra3yO{a!3-)lBhbDs$oM zP%A4&E6-P6T($%&CQw|K>ojUtx*ny+B$F;L^H;J4nCoK+l@_=*? zVocJUTrS2SZ78>MLOe7_0Bgiedjrb$jgie`)fVLiH=)Gn7>SbX+%o^-K$ICOvnvXG z5yyE5stk%z@fQ`Avku^t%1s+kY^NBp+_Lf|f${~}e%1%D@wlQ#py;R=QRjCT(s6T6 z$96T2MvZ|9H2jrIiX(pHIFuS0BNZwN1I+U+EX4ksT~b&a=}{-5LTqF6`^vK&R~3`v zQ&4bh402)lg0fOy8FM}D?I+f7r-#ssg3S%V`YblzZmSP&-pneE_0K?Qc1a}PXQALw zl30)#xgt^*=ca=6SC&Ogd;tpXoCGq?(_f6T14waQ!&=8_gdJ zZWqd1Qe2qByhjQ1Gle1uh7rN@Fo603G?#Btjf{Ek&b^ID)os{V!xLK$_pcva3cy#Xcu6^ zkKb3Z(8uc6%}HTtAscG4OY;KE$+b@uZb8kca{3mBrxa`gkW*G1@<6T~Z%5(&?a3f! zfVs-b!qP}q?n0R{?J`)vz@oy+1vqw<=4Dsp1(@+G*vQ^qzI#!pU%L+L)e8b;fyyQ2 z5!eS%q;I=OSp}Lo9!0*Gk znKSxD#COC&xA$?Uf&xA^M|Dl`GRj7oz|Crm30^~?LG9`LS%JgWj2=YajLT+KY09SY zP8{l@?21yHD#h9VeH4o_6f1mG6_m#q{-cm=d3DX&>eb;HNeQ-WHa}pW)mJTUA+J#k!R< z@eU`((-eutB$LOi%RCKEh+JJyio`;-5uS0o3Nk9h)ONcC+7ol7HIyC`T^4LV*q)iJ z<5~TI1^I0F%&yFKb-buEHd@DZ0Fk+WSur}TsL3tg3zY`ODmjyf=rNY>i&CRvq@31- zRTp6Hx1((bbSOZ1RZcNmIi$$TVAL5GoeEAPSo?HZotuuYl&uTcw{Ed_LamsNv8aOe zJyy>6KE!9C=vd+pRv-Ar zF^`{|2r0ZS%M586BE_54F=Pre%?y>A9jdoxRkEFN$G?WUc%_dys_3rG&q*Zgh-K&F zsOh*fw?XWN!UIVxvLn2JtvOt&JyB{vBB_ePNZZOrp;2UE=(1^O2nf%>m=khF>e!hW zpNASd$7qBl*dP$=KluWb9i3b@G~Tf(RBo}G>%CDWE4fO@^|;a{ok;tu^4o_EzC3Bx z(eL<|u=z=ei|j|);mPyiR7hn0%P&On>})(1%qlKkSdzs~37goFN^{nXNz<|>W;yHr zf?z|i**QAfycxGt^4W|C=R>*9dqwCHTz10-6KQTyd`Wd z&pfX)W6yP=f(TWVAyg!XPIt;JV@*4|AiS^R0M-WW+%+Alr6#?$cVpgK;TRo6IQ#g)S0ERl~bv&7?;Ps;<9o~$Ib2m zh&nw3^J2FM6jzqH0IdkHdj)_tA zvs=s`gkWa+wqKa;DZq*Q-4sopn@$X{n`0gLfnOnWXeXk-7?HnRcS>T+g2(r zD=Uux9gaXL`d4vv!|Jx`)oddIt)0s@l>>$TKoy&)7nHI!Gk)P3>L`Sompo6gP~rBJ z?Jo%w`IfL5be@ZIY$s)l#mNoeE^QTdN_VyE0-b>6E?%4*>|_L+nNI1V zz?@aLyM4r;mNtZAYu0I*ar{e4E3@5&4u2*>%t|L02vJz-OjFsEHELbYpMw}_S9!Re za?8uv>@wF^9y5TPk5IGI%}x|l1zU!>)pQYpr zfeD|a#LV~aLG85KSy;QgxQgvxR{3L4?njjQ>Er=he!FS#Bbyh;E#MzQw6t7a7_B43 zqZwr=Y+2{5j%^^h4Jy*wA4jxIJy&_QpLLn|DZ*0-m14Hi-Yl_f{srvNmwAe+$SyAb zECS@GYwFM>CA=By9#Do>tWhoT1;kvM9FyJCYie$+Vb^G#g<06@?VCeUyEbg@TNz-f z=CKa%R&>OUFJU?MPC;AP;w-zqKC3=F z@rf>d?gC$KQNYi(*4dUZ&d6fUXZiOqg+<*;Av=fdE#VOn^I8l(uVWwh5EDtU&I;Qm zK`}z!%c(|RNjW=z$tlZ=%uPN)*s5;jJ)a$UI}^g}TpSK^+1?>Leux}j^Pgi9DaQHa zImd?X1)8$%-q!jA66~4+U<|Lyg+X6xC@@|#FxtV~| zK(#K>VDpSSvi#1#+WjTY}Y7CUGYCtc`{i)ofQNVv;_XM3=mM z#3bSF#SY#pB9`cnuw805Q61RNCZMbXmsPRTKDRG-=*GezOltqG7#&Lx8r0c@DSUB& z9lp56!lsjaxEF?Ex~saGu3Id3ChF`=<9!e3rcnG8Ll}l>F6?TW&IpI43QaSdbsml$ zYDq#bTyHh4i?XeuQ z>!nqZUOl77OzhkOWry;viDx5niv5eIsax*&@$4R>b92&Z&}`R;jq$jUXPpQ4rtvf) z(kkYqZwUuDV%k!+PrInO{O*xqej~683iH@4$_TRC)FSzvkHBSV^^e&6 zhQ_|a($Ib2viy8@6QL}h-95^!a5un(Tm<)}4<2P@wskpWar^OaB)|CxncfsO^wO0* z%+1P;t-!o0oT%Zo8rFyxl;yEZx1UT2g_uHm)3jtMIJ-TOAtbx949{-Z^njgcSGFJD z3dKE?k~=#Hr@)S=*%m9CRux99R@OzUMVb*?9~3~s6IWVdPI@=8N9W{?!lBs?#2rHBf#!{dkeM2KR4gm!*VUN5h2UE0~vBjt|O0G zeQieYyzYSK=ChrMoc3wXp@>+}Ekt%+$sAeKodw}A1YOV_(D1B`9l5v9Lykn$ZdDfR z@%hfe*{!T&x}8zBosjL^qhtpmF6!}!T9Tsp{$Nd8Gvk2h(Bs&p)2a%*cI})i;UREt z83S&wu9FbDuv^gVB$u5Cvpi>)yT^ONsR+BcTd?d>17_aGP}$M5vpn+Ug*IDUyPSck z_;EI+1p6{72vO6s|lL%1HbMyJ+drf}j^k2~8hc?ut(W}F)d7YEabE6K+wJux;NcKXZi;^3oB_9U5&@d3OAxUkT- zi0ws|M;N_&>)|}aLp{7lKpvGaqlo*qdEa)Hl@M7JlUT>-irT+suVV@uYJ(ffR=K+EzX4ee zWDyX?p0N@Z}7IekKylOn`e>{)o5^Q_zxi}KTm%Td%nK{$i6oB|0%+iP_YMlh9qtW+dMMl~r$=0(pa)(l2_26)DL`XMdg%IcQj zGWKV4u%4}A*9RH(F{xoS6e`uOp7HG3BV)I1x zxhcp&H)js}G-vj#gAWdO9nW;n9B$kto*AB*o>?Bot*{!%8X$E*_P@k4*E5d{_b`Tq zwLsPZsQ}Vw7cE)H7`TursAUnOkk_o8o-D$N<4h`*ky}}t_^jodLW=!7|-^b%eahBtz zt)5CC2c#fr@+@W~O`av5r9hg2v_)u|JS#l=Me`?lsy!=#v;b*kL`^X{9yx!L?eHnN z$*=b8AEUX}Qx9Z4kOS?4W#K#{dmGbhiqSj3(;V_+E>N52KvoWiYRbd-;@Ei$bGmwMrf+ehCLTzv4{JQ;Ol0DAD$l2QYs5~cm zP6l!)5F3fVOt(VA)O920FMq(%e#vu&=S_I1 z7M9N0p6$Z-o=b%97Bi;t`r77T!@{O&_W#ypH3;3%iX3cgbgd10{A7K68S)r~Ir1>A zV}TqG1kYj)x;rjcdLsf7McCl-L9O-)R8$1S(qdL0m)NVGme0Q=E zZWq4u3=qBpavbLCc$-muVU^#9>7C1r5~hpvJmh(p&D2=!M8=mG*2LIq*yeczU0S;0 zA^-B2=YQ-KRT?R5^E{5|>4>^5`$^B!?LG7vASby^`+02IFL-tUVU6(A_NM)^=e6iY z%Dv`!!)6RGry!l#;pfD(=XX5s$BBN3qNkx~V^)mlr=Bmkaoaqfc|Hen29PtidA{^~ z1>`IsXFD##bw&}gO4hU@gMCq5>!ipJlOm&g8X~S0e(?Ok_y;{ddVcc!?D@s>tLHb* z??BE0!YcYaAm;A9GZO)~zSE%?97fm|6PVhz)h zRSeULUM=hntY6WEbgt4d}udUn0&B{JG9LzpbU?&&VO`+t9?wDTB2R zuO0Slw(?_NvqV@2N{)<3n@M$_KHEx39f2v4n~p>iARlF(`L?P=wibn_285_G67rWE@4?w~GCNT*9msJFjvj2a7wg z_8<-shXT12$YuDEu^6$Bm)b?;Nt8y3W7_jF7RVKLfs41RI6lUsi<87D?XryB=&E+v z8RE=X**W68NFNay*U>fMJ|gbP?bt5vCC(Qa-4Sy^HvqXg(ow`bu^_tjiwnd;AlCu8 z9xa4Su8}GTjX_QGqkDu{D((}bULpE{+z8|*HtI3ukgpDDGT(&%xMf`|E{oAzF0z^A zEkJH{6OM4T#v~lDHvYfHiakUN0f3FIyy zcLTWx$h|=B19Cr*2Y@_y73+>o+&*Hn*dn%yZQ^?Dq}U}NLP|Yh1qdra2!e0~2uEUv zMW50)hYkH}n;PpHT3a06VaBS*I^y6kk!#4nxPKLF$!~0CvpdG7IUq#g$~ta{5IHBf zhAlK0g{{>$L|t6b*1%|T*#l1J?cVaxm;RbH!F8;+PART#X~jdB`XEcHoyaxxdk&jp z);bDokCsh@*(8;X`fk4trN^=`Fx1~e*>isl=Z>8k=Ckp(rLdu-uAwfHSC+e&zlWzj zuF;PakHNlnlz23dhk-n@O*~dS4#=ZGo<}>4iSe_s=}z)HOrrQ?u^Y4P5DB+1JbL*{Wm|l+D#ECwHWbjAM|D`|G52t|iVBFJ_Z) z@qFp3*e!1B=4$*qV=mpqg0o5lSU z9usocR5w>Mj@;%7c8xf=VnJPPkXz-!!6IX;*E?BQ@l>RUSz)M1t2d1d1kb zEx~F}L}u$!<%Ty{LLDW{m74(z@A#ONXb2^mC%1eJ^Ai!4-2a$_HxayB&IFXe$%#^3 z`@bc=(>|2G3*=pQDE)vCrV)!AD zkJ97mt(_k;IQ}U9(gE)`2k#SykT+%_DE=ieKG`e8zr}yVev!{XLK$fcyyLCm_E9`OR^wYeN<79K}ah zs=o&5`O9%!Dhh6NC$ZBz$c1KUj>y2Nj9Gtb-rN~eXU&=(nO5uq=fWtcQt8oN~*1LS8Qzpx#rP-A2y>`g&7MQ*5J{I|~9 zN!nE!&zPCRq{@u9c02nfv`NGM;-)@HnjD+hG--NtVzbzqPhy)ge?&1SOY@}Nq}{o{ zfw2Gn0`jLrj=Z>g4l90SiulaQ=<@HBpBLV6^PDZQ9h~h_j+860U7UY_{0o%ZF6B!F z(gL6aCqdTZo;oXVu{qQMNFa%F#&s02Lw>$Wo=WjCotBN?Ir_k`_x#q@_ST zKt-SuP#LHKRJ}}EF0F8hkb!DI_jYNJ_hEZo*j4_2%7iTKk4(sWN^7NhX`R#{HA+p= z0aCNnBDG3w(t7DYX@j&;I!M|iZI%v}4v`L(Yzd^pq{F2nq$8!Hq@$%{q+_Myq~oO% zq!Xo+q?4snq*JBSq|>D{q%)H63(pG7kv|YMHx>UMM zx?H*fs1DQs>IG^7wSe{l+8byepnZY%1KJe3d|Q&Sks8C@MLJvY9Z7 zO3ra?DsD>2Idjd3n^Fp$w`Ru6C^_e?X;BSI&QWU$)oDkNfI*1{(k-kBw6u0sdLPc_zoX|%49)UA0dZ&ao(obE< zZyJO)(ys`dNn^rl4?i|fN@yeffv72-h<7k=9HEW$Hv*)15ZwU)vz3U@M&=MH1(Qia zq$GqkGLK*>s0I>(#S_}dBBEsK50Vnv$O^)xpovJBsRV>JvW`G$lNRJAAX$-3gh{5T>gHAM#k#POJ7(5PZnH zB1+l>AD!@N$P*DQ(}tdqPeY!9P$`^cY~^+2(~zelKzc7Ex<=0+&%zYadp(jYg~-#P z4o|V=xrpASC$-p`ou_iK@1v7G#g=!+qi$win_mtX71rKsMG*E;e(2+$L+b>vD!io0H! z^p>`~2&GfsWRALuDlbLxG;R#bD^ReDH-_c?P`Q%}wb8k%LHSN@t+Kr`Imj+`%Bz4D zMYb_y=ap3F3-0jtpIk4m3++&{3zAVA$+3~ST;T&`c6}*vph0;9caMCKyotLXXqnA6 zUCM#7eYZ;7cS{bO>NjUMPzJUeC?5v2B5KQ2K2kmkgUtc01nPIf$%P}hzDB-QzD~YgzCpfGzDd4WzD2%OzD>SezC*rK zzDvGazDK@SzE8eien5Uuen@^;enfs$eoX$K{J8vt{G|Mp{IvXx{H*+({Ji{vyhDCb zeo1~=enoy&eocN|enWm!eoKB^en);+eoua1{y_dv{z(2<{zU#%{!IQ{{zCpz{!0E@ z{zm>*{!ad0{z3jx{z?8>{zd*({!RW}{zLv#{!9K_{zv{-;S{1!g;xZ{qlk*6$cmz< zil*p_p?DQjv6Nm)Z>5jYSLvtpR|Y5pl|jm2Wr#9V*-6=18Kz_@!<7-rNM)2VS{b8^ zRmLg1D7z}-l?lp3Ws)*knW9WprYX~v8Ols$mNHwJqs&$2DZ44VD|;w=DtjsOm24$P z$yM?cpOUW>C<~NAWpAZODOO69Ql(5OSN2gV6u(lbR4EIUMap7jiLz8#rYu)hDEle_ zWk01_S*g@0wMtM~rL0!gD0RyI%37sfS*J88jY^YpfYPkAD6LAHvR*k**`RDx4pKHL zo0Wr=LzF`mTLI-T<#6Q)>e-4|#8=zc(}fvyBv1GE-s5Gd>4tAVZoS_gE0plgBF16>ES z0ca!8CZGoZZ3fx`v=wL@(DgtM1iAs}MxX})-2`+q(1U><0`yRzHc$Y{deq@Sj{tfk z(4&AJ4fGhG#{xYL=Y=zT!%2l@ce2Z25W^kJZn0DTnbV?h50 z^l_k10DThZQ$U{v`V7!#fj$THd7v)<-2wDPpf3S^8R#oOUj_Oa(AR;!0rX9vZvlN9 z=sQ5)1^OP)_kn%@^h2N@0sR>0CqO?1`WevAfqnt>OQ2r?{Tk>uK)(h09nkNA{s8nx zpg#fq8R#!Se+Bv*(BFan0rXFxe*yg)=s!UJ1)c+*08fGEffs=H051YB0WSlu0IveC z0j~pZ0Ph9f1l|I^7x2A-?*n{a;QImJANT>l4+MS?@PmOL0{l?mcLIKA;D-U91^jT} zM*u$(_))-*27V0iV}TzB{4T)n3jBEBCjdVY_({M|27U_gQ-Plb{B+=F06!D>S-{T* zeh%<+fu9HbZouyj{2svX3H)Bb&j&sm_#EJKfzJcp2Yf#81(ESbaUO#Wiw{!PA?ljL ze5-81ajstn;iy<<%61eP5HG^sNkjz^QZ7TGksXARhEh|UM_EJS6*`7eQ=C(QPD80F z&PhN5F}Bc-&Zo2WpV1kE;%xk<7fMZWw)N8prKUI=`I#PED9&Dfh6fjlv!mZr2N#O7 z%b)SVh2m`WcRiGv5Ititw zIHv*Wgi=#NPpo2&Jlcb^H=k}OHO1L|&n%Rh z;%u#V9!gDd_RSN^#Tdld22U@Pn&Rwjr#h}uoK5PCj;j=B2fEWxYKpVZoK`3`)!9hS zB$S%!>=t(xN=g>KIgNzELraBv}>4j2LoxRhj=uwG6sj1GsXjA~L&O@oG&W>kPh_SFr z!cc0ev#*)f`KIb@VrCdhO?CDxQyhw_&NgIM4n$Qs)!BoKY6+oD;Mh=JsMER5#{}0(5lT(1L+O}^QBl${&!yFR6zxJNHMJ2H(+;JkHh0KUvQTPj8;W)w zN=@B>a$O6hrfx#r3__`?honf1Su`OjcX&FfLW;yfwGqq2t{#C3F}0l{l$v@pN@o^I zO+5~EG7P1ro`_1Fg;G;bL8(kasi~)TC_ty7)YP+3C)4{e>ba;D(=l9{g7{Es>IEno zAM>k|P-^PMsFZ#vHFX=xXAnwFy%ZJF45g-Cfx4s8MpXfw7CX|}`bkeMcf_g=TQd6Hp*|bBcsXI_S z)0-wqZ=R_yBU~nzh0?isroM)#DK2^>yuXt8=9&5?Vr6>$Dl!*LcJoYq2Vqm3_9x6@ z!kcI6`v{fd#5f^TB!|gwo~a*orRbgBJX1gIN`BM0d8U4Wz?n2AJUj2~=9&66qNcb^ z(ZRfNH_z1X5TGY+o~b_~QV-ocQ-49Q6n9`clnbpDfAdWJ9Z@p%2T5<9sed6{iu+3m zGnL@xnffmRrRZO=S71_wQqw3xbw8Av=INwtaal?l{Y;Y)C`B`92MT%1RH4)~4Z+g7 zq89}g6G~0DkkwE+m#%}{FEU<66~a-Xvi;KYe{ zy?lHqHEkyZOfjsr*L##>OMHJ(%R;Oay+0vV{QX63B%-7kdJ>|f5lT%PlacN4VkEMN zVUXcSke-0U*&klzX}e?$nEIxrHUR-sc&hfIhx$#jo0i(-jBJ?xO-pSW0(bkSr8W~` zx_Z-6n}gbEwX+mAEw$YcCGDG*Y}FoSRSvVy#V+8rJrOM}mxui*(H?-7ol%B5x%#Q) zAzG%ME8*2otpK4?%vRc)C6=w@)lY421nAjNYFY_m{`aBOv~o-*#RM>XcQUh3YMLKW zQ_SCzqsE0&(-tC94~J6ImLTYVc~@6kjwy60!W6c#sN1wrwDs}99JBx?l46||b?-Iq zLU72xI>~ehrKYV!*zQG2>Liq!7Q`e{jPuEJ9x+c$C^c;j0(L3%Qj{rJ@E<4QTgQlD z+FHc#UW-W`N=<9P6w+hwPf?i(L#b&8Aa<8R!9-anVJJ1N6`{M-(4(QoG8?6Ne_uNs zlj@#>h)t?PC^hXUOrlHuIj$butL(9%)U;y}zvrGHXeacPN!;jQ9YU#TCu0&_^7avv zBnYLZorbVoYBy0GIIU1>+L@Tt|9vPm?Ho+^|2CAGc0Q*0--c4tF2Xdr=TMS|Qq#5~ z`hO8hO}nHgkl6yZJ-V#BWW5}byVpk&gi_P4Lf9^ipOLPYB$S$VZI9(RI+U7rLyws_ zE|i*fGa{$hzlfT;WfV$HyDfc7#6Gr48A?sNGsFBwYOIsTVcIZZTGiq>xc4HSLA&n=Wci8mX_2q13dOFs1Z5NKC<^<`oV++nVYSN=l$!PxYIZS{n)WR!_gpA7?FYo`R&=iTP-@!G2$?D>Qlwobi}sd0l$!P%g7;)7 zHSJGC?BP&q+CK=|)1lOKf~ei9toTrBy3p;6CJ3dbONiRNm|ICgsp%?0_jo8Z-9Xs? zI+U7jVJiP&C^fwgCh%W{Qq%h*diSa|I^J1SC^dZ$LZ@;X-3X9}B49iH^J$6E?79`(_v*0oS-`Z(0hG?bb?9<|~kRmEi#qi01u_|PYzN@@av zXz{6?i$^g7=rd3}lTd2d5>r<>=SzjQRf`;P(T* zdb@s;ezSfH@GF6@0lt>8Lx*^?yG^L#Z*18RD*fG9`g?$95zpMb+^;{zNLBO)^au5a z^oR9F^hbeT1w8ABYk;oNv;JR9V;;Ba(v#>3wfY;I*t;B4Sc$``{J&7110H-;caMu~;hH#%`_Vx@8!KYMi( zV?mEiX=7F{3l_2(16r{pqbX`!EM!fOf(lveW%Np~0KYE6#h^QzB%OPukp5dpH}kdr zw*HR(uKu3>zW#y!q5hHnvHpqvss5S%x&DRzrT&%vwf>F%t^S?Zng~r}Skx^`v z7^Oy;QEu#GR2Y7v(x@^P8jFm@#u8(xvCLR*tT6U90>*wuwXxEuF=~yVvC3F&tTF10 z{f)Ioy|K<{FdB^};{c=CXfaxiHe>DaNVBX~yZs8OE8$S;pDMImWrhdB*w11;&NOMaIR( z7GtZi&Dd^SVq9umW?XJuVO(ikWn67sV_a)oXIyXGVBBcjWZZ1rV%%!nX54PvVccol zW!!DtW87=pXWVZ*U_59%WISvw!NI zcs9;#1pXl4HvzvH_=ABz1o%UNw}A)XnSVVTc;;V^1pX-Cj|To2;Ex5Kxzgi-KLPj? zfjK%_W^%D@DBk0An*?X|1j{60M9(eW5E9p_{V`~26+vRVRsPr0AWuM_5xu(2-zUyfRGD99tb`V@nQLKz6< zAh0+~6(IOQs05)3goPk10%0)-OF&o(!m@~WGoHaI+^~2Km&8mn#`7rLuY+(D(~R*V ziVTPsiDjBGUO}Oe9fXoH%^0tv!jO1{j!ZMgTPT=-LdL(OB#J`DcsGd{+lY@A`vAo< zVwy2NMwRrKW{l5JHVvj3<4Y9GlxfEJ2DLI|nlZjdrJiD%F@8d{jG1PPUs0{=Of$wG zsGkwVzVSDzq%+Dpi!(2W;$2~y@$#sd3Db;MM2$|EX1odtrNcDi)l*4E6RUVll#C_+ zNXRtf?Tvz&GR=7Vq1Nyi$`E!rJEySJ7hs3d<%NO5lI#MXOMl=Uh~nuI(|CuVbY{df z-knh=G2u)>MOh?*xOaGRxfuJ6Lb>#aX}n`mHWe|AcUP3ih?vGZv7-t*0?w;qZz1wn z>~%)0QCLHnaPwwHe^gcM3q*cq>sfL+TjsB2?-MGmLj>TrpBH!+2Mq za0-SR?|vu{#iEjcI>uXrGMQ1wcvqoLO6nMI9g3u)j`7x`L{v*iL>=R8M4_mb5P6K7 zlsd-Sj4DxuX0KA>sbjotC=rjGG$ii2+Nt{tdjyoaD{lnLCdM)Lr8 z0fnMSUi|FoR#^oLts96DJ0dQdsi|YUN5`QirjGF*hhkBNNi>(NuIW-m6n27OIWd*JR%7P$8zaQ_#V9Z$#btT2_200%}}Y+qJ#0i zlSnuT9gO#V6z+@;#`_UUbwUT@{S<{Vp@Z>$fg0)2!Faz$*^KC5yx*Zpx^yt!A3Kqb zp@Z@Mk~Hfb=wQ6RqiouAFy6mVJX89Iq;xRee-SRl1I`Zg525RosRNOj6j4*Wo9}>H zfKOxDI)tsD;sTMG9>hv74#t!bHpL6zgs?7AKujPqQ$wf}ciIy|MRJ&& z4#xC$rRbf~!I-_elHWAwV9dS!P4SJhyshD zgE8l%4S_8?5gm-V8$xuC4#wOQG19K`aKBF&IL^#QsBY51n0W}2;+c4RHKAELr-LyI z5HQ8C77G|Ipz{c!LpVEgZ^TN``x9ct)4`Y}h>~LHNr;jL9gJC?k?lICgE9RX1E!{f zF&83W3QyHu^iaP^Mh9aq$;gK3)4`a_5xCoQFlGQ@x=IIQu0-v$+F1%Z7&C|{Jw*p& zu0gc4Tpo7JM0)_{+Ke(3?ut28g~gSHr4hm+_YA~rK(tIfS3)`%^8kcOF0rzg5Vm`451r7#m?vWrDaQHaIggkph7QI&4FS7E9}#7W zAlrVmQpgx7V{FsPVbdzGVI6`;|?KL&C3yeMOrU^lcC4*s3Z-!YF>p&b*~f0 zCe?wm#k>}i=u)4H)Oc(Xv6L<54T#@!lr84XJ!O)llr83Mm_(O6a>OJFC|k@s5w=V1 zCaMFcMcHEBgGv40Q?{7*W4iyhlr82%nC5>=*`w_B&TdKA4l~6g0jVYswa>W zQMQ=RB69coNCL_h^96+M()bzadPyi-%$IsB$I+B6=Bqts;yB6{^9@8!vBwZKy~>EP z#e6$`OJqRVV!oGQej_#331y481kux51|&vLPT67!h~3S8n3A%^k}!oXQ?^(ts&|93#WE0}Cn#Gi z3z51)*<$rU%`Q^5Sp8AC=O|mOL5SBa7La(#7HcR%rea@+w9CvXTdZLS-jkFq)(AxG zVagV3G=latWs5ZqQM*-H@susr_-E*8r`65v2qdozoBfg@)0|ZYax!?ghihS zPoAwpl#V0K=pfyJvc)P!wXRXNSY@c2DP@aQfm(5l7;zbmqinINP$g~37He_m;!zYG z)-n{>Eoj7d2Ab+PyUz^>_20wHlQ>p$|zw*<$UVMxx0mTdZ}c z9A}Yunv;%{Emjk%Wk}g#wV+b^lr7f!G~}b(!$y>kYoPIEiKT3@Hls=>Hc!+d4n@6m zC|j()Pt8TN|6_O~yltx|-_Ny2gf<$qxU-r0S-o$+?XUt-*%YmdPxQU_CNfOfF{{ zs~VfvwM?!nM-aaYGKn!*IKM;*v**tpe|Y>>2qeeMcr;)%IH|E zOF*dEYF!2bV+L8lih^_%{?_KYhSmG7Yiy`v*$uR`HV0c;T)nHU>lw?4b&YkcbsY$+ zKv)gJn(fvN){PQ-D_#e};Xo-GYYIclu4%1XU)Q>^BG}Z}+?w0CuBkqVrS`Y2TUXt@ zF_e%$xOyEHn_aE$piT_+>hM!#U2A>Np~JB5;C5VM-D%xr-EG|i!u}u}074rG z2Ss>etoyBpG1m`R4_Xg_uoi@R5Y}zC9?at#R`AXK3TUw_~d5KV7@QAzZzIfF7U~>zrM&{kjnqM~ZL;ayzHrmolk7mli_ zuWo5sUh23W2z(8BH6d$BWi%Bvnm&WFocG`KEnl*gHNcO$I?w}w`bya?S_39S(ckBZr z=Z^^$zL#Oyk}Wf;qT;riy4vc3=IVyp;KZE9`q~|1#*W)%X(*QsOlNWEV-PVgt!?9W zWw4=j=!+93s*@&9VaYE-C&3EkWW(Q7T@%ci!dbknGPr5e-TZBvwk!Ml%vnq@ua5Pi z#s=13w#~#Z=gbXQy`zj$8#33ndAlb3K46)p{-WT z2t`hkK?OmQAS!|+k(@)3Ls5VPMI=d*tmGg;lq4b`N|Y?9Mb45H5hREpA{p*u_qqS> zIJd`mnq%+%tu^O6=eNh{9^KZ6cr&7LM6-z25#1tsMD&Rm9WgFqe#F-iYa+h=Z@^DP z{P)lB(*OMVKYv7Q8j_^nK~eD-}O?>;xHt$pa(zQ@t&j0?^|9wNQ zN8k209rBX@6@4>$TlB5yfBy5T_aBnvJ#YJvBm;&d8MrX|PDJkLd(rnJa{HSOB^l&z zG&Cg1;2}xg|M&lhsQd3vK%W2oU9I1*hoATV&zF?tO9lS-lm7iuz25!e|Nq~t-}&EP zW#7zODaC)|IT({Pq>f1zllsl}v>N%h}=+Ru+k8Or z|9@;yOs1IF2K9Ptz!C*(6hdb5P=r#H!$?!srU7l}LLc5^2tygpXvQ*uk66n-PVonS za)X=P;x>1=&%+Q>rNz%)s&r%^Be7&58#&0uv(%$Ib2uA9YW1hiP8sUcg4U=vbvwH8 z4(d%kit$WiIty6KdaQHmO>AKs-?5V)NaQsC-uDZ?a)C=+Q(mv#I2vIVKl0lRVqMkzkQCYDAQMr+6 zlp3R6q!C@wXOvk+tz;KJVMbBEaS?Nhy22mmHR>J@coc%G{Cbm=QnY3QcEN8{h%uX( zOqfATHgckl7F@+h-X90^?!fLj&gYWqfb4jn4^m<9J zhxBLoh4WnGGJo)A2pP;hgIY4kF@u^j%)xmXc88F0Da%;SN>=d=YgoqyHX_H2Tk%{* zb!1dWMs;LVM@DsIR7b`G$TZ^-j&YomoaQX*%lIqu&Zx$Wm$-sDGpaM=4Q_Ia+uX%W zGCt&S2$>>ylB6UjC8>!diWt(P-%OdvOjgVzQ%-V|mwXhU5Jf1)vlQofN>Q4!l&1nO z@iMRS8da%A4Qf%FI@F^9jc7tMTF{C(+R~nmbfzoa=uS_1(TBI`M}Gz|h#?GRI3pRw z7{)PykC?<1rZIzA%w{g1@F@#e#1fXWjODCk72mLib!=cGo7u{CcJMtvvWq?J;{b;^ z!ZD6>lGB{!7k=e8E^-OKpPByTFK*%YB-4E!g%BHo_c1m(em7zx@dFc^o{VJ1Z%AxT z@{o^$6yX_)Q}1R$b|&T!>$fS^3}P1||JW~(eXQTASh>fpMdq=akaz5N$T~I= zImhlt#<55F37N)T;Sc{WM-;IXM(>&RoLR4#U!w&b@Y|JHt(n!Bc^s?w7WZZLJCfOK zGMi17CrLwEoR{T!O5vO=&dK7OETfo+9r|&b3D~8aQ<=*rs3qqQB=R>8c^pD6b>xy^u7XtMP4tw@^SSI` zuKs+?0v4g4T+Ya~8|UP5PA=!<&P`#8Qj-QWVhE!di@D@Bm)y&+54r6_Zu^iYC34A= zfnvNs8RFNvz`w4r>C9q^t05UJ`H)B!Mu-m?&&Z15_LR%mR~~1mxM^7@O(Z!*4M=+=2 z@+uKaZt~Kc_H<+sE6{g|tJuL3w?lZo0#&dJ&yT{n&#U$Mef)%4OBSR!&tqOC&8wt& zm6UPGL%6?``IO2=4&v#<+sLQXx5%f|4ep_*7wp0dm3W2WjORmsKt}$9)=)YlInht) zIJ)5M((|z^rOl`GKghdG1TE2P8CjN*VHp{gkztufA(WM0*_6~nE@geU>^G>Z>=yra zI}0)^mye#j%X=K=G-pF7@3ZpuxcqadxxAXotGWC|uJcz2FWSo&Yhq3>&Oz=kE(xK+ zlcXUn-RR3Z{K!GvQ?UTfu2=$dsA#?w&9|aiRn)^vfd;7krPeIqOIC#N?|-mSDF$<> z)QV2XtCAU2l4YeMA-pW>mupaq)of;K2(Re<6}`WrzgN`s$`FR4u2)W?|5uAr3Uhna zXRnS$|F51w|F2#Op|Z~_SHxT^PhdJT`5m*bd^3dCH1XQ&RLAG9eZr^QLXTE4RPlM0 zI+#_J&soJcAymykc5*V1k&NPJ++X!d2(QV|mpX2DW zy4@1=cWbDt#&&kIH-wt*t?AyH?yc!N zHGQY1@6_DEzn{Cmmi%kwMgFyV;2o>gALrC^PA%ViBQtq;8uh=S{x{VB#xCs68%IN^ zU7T{fi2bQOi`iV~F84!t)BNA8PeT^768YBg`&P&ATOGe|b>vW|JaVYhnYVCfodtZ! z3QlsK3nA1^OD5D<*E4kuv91}`ji(QO2kOpY5lcA28O*KjqY&!JvR+EuS+6pluP3W| zvZ^PmdhV;YjtyKyKlT0!p?)@GR=)r>X+R@9SARGoSF~9~!oz10(ntJvX$+4VU6KtD&82 zD1(ObYj`_^M(N3j{2HmJQBKUdk(wH{pfzo2PXcOeq{l{s7|%3TBcDbm_!WINy2(G> z38Aq(8vAbJ&rna}&HRAxH1>Iuyy&BenKf~KlOc@5d)#CqQ<#J2n|Qv7T${+Xi94HY zVhhL6Qv(_)x=(|RbUQoIU(-|k#%0viR9#KYvgzXxnk69_YHAjVdYYx@Ddf?t zC}pWgC0?N_e!H60qBhNFM^E0O7kzjinK!e8%|0kY@6NX0gpmxuKwnkDa=bWp*L^SkM|ga8k&E?V!p=Qn*WNN zn!BU<6|RNQ;z{Jw!n@hRuD6IH9a+giZt^0h7SB?Gmyu74Ml{9#wlKpM@6ewC3`RaJ z%(I1gwlL2YllT}tx7fuk^wjcc3Q?QpIICqF+S7?H$h@WVTROkxK%C!F-Yq9$Pg+i8 z7IJSX_m=Zn%33zEnXS0HOtz^n*;u2%M- zmCReoyw$ttx0QNY4P^vVkbkS0=(p8e%%at2EW*3hN?ol^^B{!Q=_yS$^w@d=o@?#7 z*7|5|e_Ai*bH3$AcC(Lz9Kn2BUqJ1x@1XA1vW+v}xMZXxHD(-_m4X!E8H!Vq7kHJ* zyoUE6u0G}*XU=ggXpIcwy3+$$#FmYYqBb2E%51)7 zD}RU3PTlQtQ=YoCq8%Nv!ut2fNYXDdgFqC}q)S2Yq(XX9sFA7(Er~-O9p%x{c^zfY@m&Tm z7@2k)!6?SCfeRsY%77X>*~3or*~BjPa)86Ar;~a*okO0TE~B1Ke`2>fM;(MRXoA#}-tI=Yxam#)mf^Ii5~hFyN;JinpGE`M{2f4CDuSG(Rd z8G7uR68&{G@2;_Ar8woN${W9j=9EE1fVVEzf_pJ4t8lbMG3CzyYN`6rlv!a|myr-bEv#W$>F19mxK zE8p=wiKsJSKZp3Y-s7C&Ea%XBg5DGKo}l*xy(j2BLGKCoco_V*7kCo;(A_R{PeT+j z*t72D)V(sbsLi_!MQ`2J*>gA4J=G5y1&g&(=-uZZj=NQOHMq$5tpW){a`uMz$v-*@}7~`104)&tg zzEQ*?)4t7Vk8JwRXDQ1tpT75a5W?H#c$L=}zzFQp+dpssGk+%odVMD+O=*jB-*MhM zU$YwXe&->NL+DqFhBRg}^5|zC{;MOQpE>k99m2b6d^b0F=|FdSvXb?D%U|5bx&7wOR1~!K9UUu?RkZ$zl9d@!0Jq&QifTGB6fX@dEVhA!Fa0z=mFelC!SeVvyrYoPb z3h&6kpD@>fzlJakihA-ln99UPAg2g`8qIF=x%!Rj8o7MTv-g*_Q8lfmj3 ztd7BFxyns$ahrP~42d8K$w*0R^f^R5LmJSSX0)ISZ_|(d3_!+1hA@;5nZ#6P@G*0- zlS4jZSqMYNGJ%OqVLG#z!zcLO&_$TX(0{+bf>o?$9rk7DX1uFIy$?g}?@;{?)$dUK z4n52|sBLILdKOah7xZ z#_wF=8aMcx+t{sP4?|F582%*5uwTQ|5JftkB9^S=AU97_fWj2xIiBYQ%JL#F@d~f; zIyHEMI@G5TO=&@E+R}l}#M7O(=)*hoXCOlu#z;mpjt`l{RA%robD7Tq7PFKuS;^O| zVLcn!!ghA@BfHthL5}bfCpp6}oaZ8!`Gf1+c!H#)AT?=;Ap@DnLUwYI zm;4l>D9=)YQk0=Q6?vJ;RHZt#c$0cGq%m>GbA-W;7>|5L$Y6xtN33EGN72`aM>KGeE40;)>m$7;o+ZeMLYZhZiGl?mzU>zIyh09zGVVqjVrROPL;&rN{ zj&U*^*NchFLQms7Kkhqra*^x&6~cJ^jCaO(J2u`qFbgE^wV76_Yh{J;VClFjCORu_hr`hH6Pe6W&hkqLv$K&8@5=11^rkOgvysg_ z4q=YE=A@!3Z{i)8^C9+r&MfphN56A^4`FUD%xSK?=C&oC1QxM^ulS4m=zCsi?BKjt z_<)a?#9^E}PpzM1AunqEq!;fo5cB%Pygo6n`7)lL1NYBwNk=-fhAnL4Q3#(VM?Rmv z$ZP27)At#}IQDa#lOcST9vOY++|TUUXI;?GXJ7CQ&i?EV?8;~6v*2aqy`Uyb(CY$O zE|B3u87`FJ!fMEGVSVNxmxaFTkFZExi&E1bnJwzUx9s5i5Ekd8AcgVSVtc%}KWbjA z=EZ7WQiAeSz+NtyhB+;{gxr_>9m3~t(1<3iWh>i5Sek+CxM%5GID6>;%wehdE;ZjT z%<2n0d{KjWEMY0vxy$_!mer#<=CJH@Rw1u1&FD*6ewhnde>s)uAuNwTHp|nX_vLzD zuD|7KTE3gTA*@i>iu~w*#oG+V+*bVi*)jCLvJm=TSrVVG9EG{AJi)L07Q$E0WAGSKAcsTvrU>r;<^^W*DGNea zorFlDuuH2ww>qB9{6Jy|YqIh*`dlOTH6!={JFvzMtg!=Y9)_?s87ZjBo780p`dK@c zP5ACwb*)QFCNksRb?#l~-gUmS&Ue=N&iZJ4zTW-o<-a}w`LExAcWnL65H>hxgL5|c z-iCH`qdV%~p#BZ&|289b=iA)8$8bhsf4;rI?;&g~Pi3lL{u}4=2{(Cwd^gp`@7pH7 zZ=2+>X#{fEB=}7map$J%+~s}Bkawp!V;M@DnFF!_Ua?yN4m{ zcmh4`P~Q&q?XV|1vXPTK6reE0FsB{r-|;5u-O+)r*ts1&>5IE}*qh@!|ku>_?kgIW?R@-mgF zid+)qk|>u%xg<8kToPrI*b22Lwx<(aNx+;Ed-FE$GJwGhWdx%b%LFDeh3U*<4zf

W`=1~Q=Ca>B_PcX`dlGQp{?W*6{}kl0egAAL4(a8P{tuPJo*lABhnmuycNxlX)N<$x%=(a79y-cTsOOM+4y)&I z9-MX9?jEkno7BZQhn;iSIfoaqg0E1|Vf7qV&k>m%k;##Cyo`R1)I_dF^l_vg6PS){ zj;!KaHgS>b{1w8{RHWxAic*Tw=$DY>uwPc}EYUmZN7wIQArI zNXxU7p&a#TK`Z*>tYagP?=fc_`-TIY;1mx-_$e7Fc#g7^rzIWf%qS)@nQeGqev5P+Ra?;+tF*^lPxW9qYf<+pJ9+vElJE@Xc3NJio6(LAe85Lc;!D=BjzgT{ObBO^Vs2;L zeWp0&coAou=|*?teP$N3(es%fk^7me+~Rf!XS4D&`RPJ0`r!L#egCZQ|7=Hoeu_-A zB_6%~thS%k_Osf4k;5-)`9%)Dw4@`Q@%b;#`ej=P=j41&&gbNO?hP8zgz0?3r~Jk> zu7~hz5xld%zQDT-WjNc}&E62sr$t@oGt&?|dp?f2*!%N#_Pl=2>-YTa5Pq}gzm-8= zzr96&2C$mVYz^T;1gX&Xg(}#=3w4;rJnX`SUvcgQwO%Yl3DkP=ea0{j^SWqW7tQN; z8UJ1k_y2A_zxU-GcCwEHAzYHrrHshuQVqO^m-KYWE?k<)$DHMNE{AYA4>G#!+{^at zazFHQc>~|!?8^^AxMEkXn9r3rk@uBmtVOR^WO-GFS7mrrhF2RSzpHIohFq@t?jKR8 z>yIq-MrMBuVi$)w8p5?_C{0;>cFi7N8;zQ;srj0k|9qM1)I>jjE?`jz*JXY^6*9lx z6?a_k%lGW(U6@gXyq#ZB~i<538I)uTCf@UKt#f-gh3nG8MLjG+Y`=)@1m z{-#;|U5RSQ{_n5Z$mS4k>HAi0@}j0&db>3Qb=^A7$q@c2%=47OXa9_5ET{O5iy_?h z`Rx~}$T+4loeTWQjS%ispbD?!^E-3-gq!H`jvnv&{BCXPu$Yys3gMo2?_O52^B%() z$r;>#?@|c&YtfL#tiii^-@ExB1D<=36Ys@?{tRF*$I$0Pxj!`9hvn(cJ9sZ2u3#M- zxEI2sCrL_4D)16L@!d!2dbE{9c8BoTy^r1d*u9T^=dtfR_MOMu`S!DagE#D4G~XPqb~JX$Z}SOh$K&t znn<3bEamA)58RpLQ@-F!j&qLlAtGrSo%5`tUIeSi~VtafSyWB3V+B z5KWK9LNwx zFp9B^XFlsgM5@}jGgTbz@O&znrRqj^%r}*1Qq5o<@=vvpC9Gf->Psb;R2$G+>XhW6 z2qh^^IVw;I`KESn>V`C-IjvBCYPF}1$K9#rpL!-g@HY>693s*rB^{Z`Mo!$9rVvGW zmJ+x-jXTr0GmZPwxF=0p+>^#VY21^h7qUvz4|7fM& zxhR11qh7|YL{-K7qiXRc_8_V??u&9?l>4IG7p1Q#dmPo98EnE_qxBP=m8a1|bScW< z{OH$lZnXJCJ1g25(awl=Mzq?a2k}03En4l-&W)bKXW>8p@Lv7rA2FH#`{)1sb42vQ F{{!E!ey{)l literal 0 HcmV?d00001 diff --git a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress 2.xcscheme b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress 2.xcscheme new file mode 100644 index 000000000..e978aa76a --- /dev/null +++ b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress 2.xcscheme @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress copy.xcscheme b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress copy.xcscheme new file mode 100644 index 000000000..8018bd6a5 --- /dev/null +++ b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress copy.xcscheme @@ -0,0 +1,86 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress.xcscheme b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress.xcscheme new file mode 100644 index 000000000..15afb2f64 --- /dev/null +++ b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/monitor_stress.xcscheme @@ -0,0 +1,86 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/xcschememanagement.plist b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/xcschememanagement.plist new file mode 100644 index 000000000..fb0224c31 --- /dev/null +++ b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress.xcodeproj/xcuserdata/rab.xcuserdatad/xcschemes/xcschememanagement.plist @@ -0,0 +1,42 @@ + + + + + SchemeUserState + + monitor_stress 2.xcscheme + + orderHint + 2 + + monitor_stress copy.xcscheme + + orderHint + 1 + + monitor_stress.xcscheme + + orderHint + 0 + + + SuppressBuildableAutocreation + + 15181D941683B73E0002FB18 + + primary + + + 15181DA71683B7550002FB18 + + primary + + + 15181DB21683B8700002FB18 + + primary + + + + + diff --git a/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress/monitor_stress.m b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress/monitor_stress.m new file mode 100644 index 000000000..267bb86c5 --- /dev/null +++ b/tools/tests/unit_tests/monitor_stress_12901965_src/monitor_stress/monitor_stress.m @@ -0,0 +1,178 @@ +#define TARGET_OS_EMBEDDED 1 +#include +#include +#include +#include +#include +#include +#include +#include + +#import + +#include +#include + +#define MAX_THREADS 100 + +char *pname; + +int pid; + +int exit_after = -1; + +int percentage = 95, interval = 600; + +int wakemon_rate = 150; + +int limit = 0; // Worker thread should apply per-thread limit to self? +int limit_period = 5000; + +void usage(void) { + printf("usage: monitor_stress [ -c nthreads ] [ -w nthreads ] \n"); + printf("\t-c: number of CPU usage monitor stress threads to use (default: 2\n"); + printf("\t-w: number of wakeups monitor stress threads to use (default: 0\n"); + printf("\t-e: exit after this many seconds (default: run forever)\n"); + printf("\t-p: act on this pid (default: self)\n"); +} + +void *perthr_limit_thread(void *arg) +{ + int percent = 90, refill_period = 30; // time unit is milliseconds + int err; + int cpupercent; + +top: + cpupercent = percent | (refill_period << 8); + + if ((err = sysctlbyname("kern.setthread_cpupercent", 0, 0, + &cpupercent, sizeof (int))) != 0) { + printf("kern.setthread_cpupercent: error %d\n", err); + exit(1); + } + goto top; +} + +void *cpumon_stress_thread(void *arg) +{ +top: + if (proc_set_cpumon_params(pid, percentage, interval) != 0) { + perror("proc_set_cpumon_params"); + exit(1); + } + if (proc_disable_cpumon(pid) != 0) { + perror("proc_disable_cpumon"); + exit(1); + } + goto top; +} + +void *wakemon_stress_thread(void *arg) +{ +top: + if (proc_set_wakemon_params(pid, wakemon_rate, 0) != 0) { + perror("proc_set_wakemon_params"); + exit(1); + } + if (proc_disable_wakemon(pid) != 0) { + perror("proc_disable_wakemon"); + exit(1); + } + goto top; +} + +void *exit_thread(void *arg) +{ + sleep(exit_after); + printf("...exiting.\n"); + exit(0); + + return (NULL); +} + +int main(int argc, char *argv[]) +{ + int ch; + int i = 0; + int cpumon_threads = 2; + int wakemon_threads = 0; + + pthread_t thr_id; + + pname = basename(argv[0]); + pid = getpid(); + + while ((ch = getopt(argc, argv, "c:w:e:p:h?")) != -1) { + switch (ch) { + case 'c': + cpumon_threads = atoi(optarg); + break; + case 'w': + wakemon_threads = atoi(optarg); + break; + case 'e': + exit_after = atoi(optarg); + break; + case 'p': + pid = atoi(optarg); + break; + case 'h': + default: + usage(); + exit(1); + + } + } + argc -= optind; + argv += optind; + + if (argc != 0) { + usage(); + exit(1); + } + + if ((cpumon_threads <= 0) || (cpumon_threads > MAX_THREADS) || + (wakemon_threads < 0) || (wakemon_threads > MAX_THREADS)) { + printf("%s: %d/%d threads too many (max is %d)\n", pname, + cpumon_threads, wakemon_threads, MAX_THREADS); + exit(1); + } + + printf("%s: creating %d CPU usage monitor stress threads (1 will be main thread), ", pname, cpumon_threads); + if (wakemon_threads > 0) { + printf( "%d wakeups monitor stress threads, ", wakemon_threads); + } + printf("and 1 per-thread CPU limit stress thread.\n"); + + if (pthread_create(&thr_id, NULL, perthr_limit_thread, NULL) != 0) { + perror("pthread_create"); + exit(1); + } + + for (i = 0; i < wakemon_threads; i++) { + if (pthread_create(&thr_id, NULL, wakemon_stress_thread, NULL) != 0) { + perror("pthread_create"); + exit(1); + } + } + + // main thread will be used as stress thread too, so start count at 1 + for (i = 1; i < cpumon_threads; i++) { + if (pthread_create(&thr_id, NULL, cpumon_stress_thread, NULL) != 0) { + perror("pthread_create"); + exit(1); + } + } + + if (exit_after >= 0) { + printf("%s: will exit after %d seconds\n", pname, exit_after); + if (pthread_create(&thr_id, NULL, exit_thread, NULL) != 0) { + perror("pthread_create"); + exit(1); + } + } + + cpumon_stress_thread(NULL); + + return (0); +} diff --git a/tools/tests/unit_tests/pipe_test_10807398_src/child.c b/tools/tests/unit_tests/pipe_test_10807398_src/child.c new file mode 100644 index 000000000..fbd51922c --- /dev/null +++ b/tools/tests/unit_tests/pipe_test_10807398_src/child.c @@ -0,0 +1,27 @@ +#include +#include +#include + +int main(int argc, char **argv) +{ + int fd, r; + char buf[32]; + + if (argc != 2) { + fprintf(stderr, "Usage: %s fd\n", argv[0]); + return 1; + } + fd = atoi(argv[1]); + + printf("child read(%d)...\n", fd); + r = read(fd, buf, sizeof buf - 1); + if (r < 0) + perror("read"); + else { + buf[r] = 0; + printf("child read(%d) = \"%s\"\n", fd, buf); + } + close(fd); + printf("child done\n"); + return 0; +} diff --git a/tools/tests/unit_tests/pipe_test_10807398_src/parent.c b/tools/tests/unit_tests/pipe_test_10807398_src/parent.c new file mode 100644 index 000000000..bafaf0277 --- /dev/null +++ b/tools/tests/unit_tests/pipe_test_10807398_src/parent.c @@ -0,0 +1,50 @@ +#include +#include +#include +#include +#include + +int main(int argc, char **argv) +{ + int fd[2], pid, r; + char *args[3], buf[32]; + struct pollfd pfd; + extern char **environ; + + if (pipe(fd) < 0) { + perror("pipe"); + return 1; + } + + snprintf(buf, sizeof buf, "%d", fd[0]); + + args[0] = "./child"; + args[1] = buf; + args[2] = 0; + + switch (pid = fork()) { + case -1: + perror("fork"); + return 1; + case 0: /* child */ + close(fd[1]); + execve(args[0], args, environ); + perror(args[0]); + _exit(1); + default: /* parent */ + close(fd[0]); + pfd.fd = fd[1]; + pfd.events = POLLOUT; + pfd.revents = 0; + printf("parent poll(%d)...\n", pfd.fd); + errno = 0; + r = poll(&pfd, 1, -1); + printf("parent poll(%d) returned %d errno %d[%s]\n", + pfd.fd, r, errno, strerror(errno)); + write(fd[1], "howdy", 5); + close(fd[1]); + printf("parent done\n"); + } + + return 0; +} diff --git a/tools/tests/unit_tests/pipes_fill_procinfo_11179336.c b/tools/tests/unit_tests/pipes_fill_procinfo_11179336.c new file mode 100644 index 000000000..18bce49ed --- /dev/null +++ b/tools/tests/unit_tests/pipes_fill_procinfo_11179336.c @@ -0,0 +1,38 @@ +#include +#include +#include +#include +#include + +int main(){ + int pipe_fds[2]; + if (pipe(&pipe_fds[0]) < 0) { + perror("pipe"); + goto fail; + } + struct pipe_fdinfo pdinfo; + /* from the headers + int proc_pidfdinfo(int pid, int fd, int flavor, void * buffer, int buffersize) __OSX_AVAILABLE_STARTING(__MAC_10_5, __IPHONE_2_0); + */ + int mypid = getpid(); + int flavor = PROC_PIDFDPIPEINFO; + int nv = proc_pidfdinfo(mypid, pipe_fds[0], flavor, (void *) &pdinfo, sizeof(pdinfo)); + if (nv < 0) { + perror("proc_pidinfo"); + goto fail; + } + printf("handle value = %p \n", (void *)pdinfo.pipeinfo.pipe_handle); + struct stat mystat; + fstat(pipe_fds[0], &mystat); + printf("ino value = %p \n", (void *)mystat.st_ino); + + if ( (uintptr_t)mystat.st_ino == (uintptr_t)pdinfo.pipeinfo.pipe_handle) + goto success; + fail: + printf("[FAILED] fill_pipeinfo returned wrong values. (i.e. pipeinfo->pipe_handle != fstat->st_ino ) \n"); + return -1; + success: + printf("[PASSED] fill_pipeinfo returned correct values. (i.e. pipeinfo->pipe_handle == fstat->st_ino ) \n"); + return 0; +} + diff --git a/tools/tests/unit_tests/ptcwd_test_11269991_src/ptcwd_test_11269991.c b/tools/tests/unit_tests/ptcwd_test_11269991_src/ptcwd_test_11269991.c new file mode 100644 index 000000000..2f5866590 --- /dev/null +++ b/tools/tests/unit_tests/ptcwd_test_11269991_src/ptcwd_test_11269991.c @@ -0,0 +1,219 @@ +/* + * Test program for checking the per-thread current working directories + * are happy. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef SYS___pthread_chdir +#define SYS___pthread_chdir 348 +#endif + +#ifndef SYS___pthread_fchdir +#define SYS___pthread_fchdir 349 +#endif + +/* + * /tmp is a symlink, so use full path for strict compare + */ +#define WORKDIR "/private/tmp/ptwork" +#define WORKDIR1 WORKDIR "/one" +#define WORKDIR2 WORKDIR "/two" + + +int +pthread_chdir_np(char *path) +{ + return syscall(SYS___pthread_chdir, path); +} + +int +pthread_fchdir_np(int fd) +{ + return syscall(SYS___pthread_fchdir, fd); +} + + +/* + * This is a slow routine, just like getcwd(); people should remember that + * they set something, instead of asking us what they told us. + */ +char * +pthread_getcwd_np(char *buf, size_t size) +{ + int fd_cwd; + + /* + * XXX disable compatibility hack, since we have no compatibility + * XXX to protect. + */ + if (buf == NULL) + return (NULL); + + /* + * Open the "current working directory"; if we are running on a per + * thread working directory, that's the one we will get. + */ + if ((fd_cwd = open(".", O_RDONLY)) == -1) + return (NULL); + + /* + * Switch off the per thread current working directory, in case we + * were on one; this fails if we aren't running with one. + */ + if (pthread_fchdir_np( -1) == -1) { + /* We aren't runniing with one... alll done. */ + close (fd_cwd); + return (NULL); + } + + /* + * If we successfully switched off, then we switch back... + * this may fail catastrophically, if we no longer have rights; + * this should never happen, but threads may clobber our fd out + * from under us, etc.. + */ + if (pthread_fchdir_np(fd_cwd) == -1) { + close(fd_cwd); + errno = EBADF; /* sigil for catastrophic failure */ + return (NULL); + } + + /* Close our directory handle */ + close(fd_cwd); + + /* + * And call the regular getcwd(), which will return the per thread + * current working directory instead of the process one. + */ + return getcwd(buf, size); +} + + +int +main(int ac, char *av[]) +{ + char buf[MAXPATHLEN]; + char *p; + + /* + * First, verify that we are NOT using a per thread current working + * directory... + */ + if (pthread_fchdir_np( -1) != -1) { + fprintf(stderr, "FAIL: Started out on PT CWD\n"); + exit(1); + } + + /* Blow the umask to avoid shooting our foot */ + umask(0); /* "always successful" */ + + /* Now set us up the test directories... */ + + if (mkdir(WORKDIR, 0777) == -1 && errno != EEXIST) { + perror("FAIL: mkdir: " WORKDIR); + exit(2); + } + + printf("workdir \"" WORKDIR "\" created\n"); + + if (mkdir(WORKDIR1, 0777) == -1 && errno != EEXIST) { + perror("FAIL: mkdir: " WORKDIR1); + exit(2); + } + + printf("workdir \"" WORKDIR1 "\" created\n"); + + if (mkdir(WORKDIR2, 0777) == -1 && errno != EEXIST) { + perror("FAIL: mkdir: " WORKDIR2); + exit(2); + } + + printf("workdir \"" WORKDIR2 "\" created\n"); + + /* Change the process current working directory to WORKDIR1 */ + + if (chdir(WORKDIR1) == -1) { + perror("FAIL: chdir: \"" WORKDIR1 "\" failed\n"); + exit(3); + } + + printf("process current working directory changed to \"" WORKDIR1 "\"...\n"); + + printf("verifying; getcwd says: \"%s\"\n", getcwd(buf, MAXPATHLEN)); + if (strcmp(WORKDIR1, buf)) { + fprintf(stderr, "FAIL: \"%s\" != \"%s\"\n", WORKDIR1, buf); + exit(3); + } + printf("verified.\n"); + + /* Verify that we don't get an answer for pthread_getcwd_np() */ + + if ((p = pthread_getcwd_np(buf, MAXPATHLEN)) != NULL) { + fprintf(stderr, "FAIL: pthread_getcwd_np should fail, got \"%s\" instead\n", p); + exit(4); + } + + printf("Good so far: pthread_getcwd_np() got no answer (correct)\n"); + + if (pthread_chdir_np(WORKDIR2) == -1) { + perror("FAIL: pthread_chdir_np: " WORKDIR2); + exit(5); + } + + printf("Set per thread current working directory to \"" WORKDIR2"\"\n"); + printf("verifying; getcwd says: \"%s\"\n", getcwd(buf, MAXPATHLEN)); + if (strcmp(WORKDIR2, buf)) { + fprintf(stderr, "FAIL: \"%s\" != \"%s\"\n", WORKDIR2, buf); + exit(3); + } + printf("verified.\n"); + + /* Now verify we get an answer for pthread_getcwd_np() */ + if ((p = pthread_getcwd_np(buf, MAXPATHLEN)) == NULL) { + perror("FAIL: pthread_getcwd_np"); + exit(6); + } + + printf("verifying... pthread_getcwd_np says \"%s\"\n", p); + if (strcmp(WORKDIR2, buf)) { + fprintf(stderr, "FAIL: \"%s\" != \"%s\"\n", WORKDIR2, buf); + exit(7); + } + printf("verified.\n"); + + printf("verifying our old cwd still exists by going of PT CWD...\n"); + if (pthread_fchdir_np(-1) != 0) { + perror("FAIL: pthread_fchdir_np"); + exit(8); + } + printf("off... but are we really off?\n"); + + printf("Check by verifying that pthread_getcwd_np now fails\n"); + if ((p = pthread_getcwd_np(buf, MAXPATHLEN)) != NULL) { + fprintf(stderr, "FAIL: pthread_getcwd_np should fail, got \"%s\" instead\n", p); + exit(9); + } + + printf("verified.\n"); + + printf("One last check: see that getcwd says \"" WORKDIR1 "\" again\n"); + printf("verifying; getcwd says: \"%s\"\n", getcwd(buf, MAXPATHLEN)); + if (strcmp(WORKDIR1, buf)) { + fprintf(stderr, "FAIL: \"%s\" != \"%s\"\n", WORKDIR1, buf); + exit(10); + } + printf("verified.\n"); + + + printf("\nPASS: testing was successful\n"); + + exit(0); +} diff --git a/tools/tests/unit_tests/ptrace_test_12507045_src/ptrace_test.c b/tools/tests/unit_tests/ptrace_test_12507045_src/ptrace_test.c new file mode 100644 index 000000000..55728c60c --- /dev/null +++ b/tools/tests/unit_tests/ptrace_test_12507045_src/ptrace_test.c @@ -0,0 +1,749 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * We create a process hierarchy of: + * + * grandparent -> parent -> child + * \ + * \--> debugger + * + * When the debugger calls ptrace(2) on child, it + * is temporarily reparented. + * + * We may also create a hierarchy of: + * + * grandparent -> parent/debugger -> child + * + */ + +typedef enum { + eParentExitAfterWaitpid = 0, + eParentExitAfterWaitpidAndSIGCHLD, + eParentExitBeforeWaitpid, + eParentExitAfterDebuggerAttach, + eParentExitBeforeDebuggerAttach, + eParentIsDebugger +} parent_exit_t; + +typedef enum { + eDebuggerExitAfterKillAndWaitpid = 0, + eDebuggerExitAfterKillWithoutWaitpid, + eDebuggerExitAfterDetach, + eDebuggerExitWithoutDetach +} debugger_exit_t; + +void do_grandparent(pid_t parent, pid_t child, pid_t debugger, debugger_exit_t debugger_exit_time) __attribute__((noreturn)); +void do_parent(pid_t child, pid_t debugger, parent_exit_t parent_exit_time, debugger_exit_t debugger_exit_time) __attribute__((noreturn)); +void do_child(void) __attribute__((noreturn)); +void do_debugger(pid_t child, debugger_exit_t debugger_exit_time) __attribute__((noreturn)); + +bool iszombie(pid_t p); + +char *str_kev_filter(int filter); +char *str_kev_flags(int filter, uint16_t flags); +char *str_kev_fflags(int filter, uint32_t fflags); +char *str_kev_data(int filter, uint32_t fflags, int64_t data, uint64_t udata); +char *print_exit(pid_t p, int stat_loc); + +void logline(const char *format, ...); + +void usage(void); +int test_all_permutations(void); +void test(parent_exit_t parent_exit_time, debugger_exit_t debugger_exit_time) __attribute__((noreturn)); + +int main(int argc, char *argv[]) { + int ch; + + int parent_exit_time = -1; + int debugger_exit_time = -1; + + while ((ch = getopt(argc, argv, "p:w:")) != -1) { + switch (ch) { + case 'p': + parent_exit_time = atoi(optarg); + break; + case 'w': + debugger_exit_time = atoi(optarg); + break; + case '?': + default: + usage(); + } + } + + /* no explicit options, loop through them all */ + if (parent_exit_time == -1 && + debugger_exit_time == -1) { + return test_all_permutations(); + } + + if (parent_exit_time == -1 || + debugger_exit_time == -1) { + usage(); + } + + test((parent_exit_t)parent_exit_time, + (debugger_exit_t)debugger_exit_time); + + return 0; /* never reached */ +} + +void test(parent_exit_t parent_exit_time, debugger_exit_t debugger_exit_time) +{ + pid_t parent, child, debugger; + int ret; + int fds[2]; + + /* pipe for parent to send child pid to grandparent */ + ret = pipe(fds); + if (-1 == ret) { + err(1, "failed to create pipe"); + } + + parent = fork(); + if (parent == 0) { + /* parent sub-branch */ + + ret = close(fds[0]); + if (ret == -1) { + err(1, "close read end of pipe"); + } + + child = fork(); + if (child == 0) { + /* child */ + ret = close(fds[1]); + if (ret == -1) { + err(1, "close write end of pipe"); + } + + do_child(); + } else if (child == -1) { + err(1, "parent failed to fork child"); + } else { + /* parent */ + if (-1 == write(fds[1], &child, sizeof(child))) { + err(1, "writing child pid to grandparent"); + } + + if (parent_exit_time == eParentIsDebugger) { + debugger = -1; + + if (-1 == write(fds[1], &debugger, sizeof(debugger))) { + err(1, "writing debugger pid to grandparent"); + } + ret = close(fds[1]); + if (ret == -1) { + err(1, "close write end of pipe"); + } + + do_debugger(child, debugger_exit_time); + } else { + debugger = fork(); + if (debugger == 0) { + /* debugger */ + ret = close(fds[1]); + if (ret == -1) { + err(1, "close write end of pipe"); + } + + do_debugger(child, debugger_exit_time); + } else if (debugger == -1) { + err(1, "parent failed to fork debugger"); + } else { + /* still parent */ + if (-1 == write(fds[1], &debugger, sizeof(debugger))) { + err(1, "writing debugger pid to grandparent"); + } + ret = close(fds[1]); + if (ret == -1) { + err(1, "close write end of pipe"); + } + + do_parent(child, debugger, parent_exit_time, debugger_exit_time); + } + } + } + } else if (parent == -1) { + err(1, "grandparent failed to fork parent"); + } else { + ret = close(fds[1]); + if (ret == -1) { + err(1, "close write end of pipe"); + } + + if (-1 == read(fds[0], &child, sizeof(child))) { + err(1, "could not read child pid"); + } + + if (-1 == read(fds[0], &debugger, sizeof(debugger))) { + err(1, "could not read debugger pid"); + } + + ret = close(fds[0]); + if (ret == -1) { + err(1, "close read end of pipe"); + } + + do_grandparent(parent, child, debugger, debugger_exit_time); + } +} + +void usage(void) +{ + errx(1, "Usage: %s [-p -w ]", getprogname()); +} + +int test_all_permutations(void) +{ + int p, w; + bool has_failure = false; + + for (p = 0; p <= 5; p++) { + for (w = 0; w <= 3; w++) { + int testpid; + int ret; + + testpid = fork(); + if (testpid == 0) { + logline("-------------------------------------------------------"); + logline("*** Executing self-test: %s -p %d -w %d", + getprogname(), p, w); + test((parent_exit_t)p, + (debugger_exit_t)w); + _exit(1); /* never reached */ + } else if (testpid == -1) { + err(1, "failed to fork test pid"); + } else { + int stat_loc; + + ret = waitpid(testpid, &stat_loc, 0); + if (ret == -1) + err(1, "waitpid(%d) by test harness failed", testpid); + + logline("test process: %s", print_exit(testpid, stat_loc)); + if (!WIFEXITED(stat_loc) || (0 != WEXITSTATUS(stat_loc))) { + logline("FAILED TEST"); + has_failure = true; + } + } + } + } + + if (has_failure) { + logline("test failures found"); + return 1; + } + + return 0; +} + +void do_grandparent(pid_t parent, pid_t child, pid_t debugger, debugger_exit_t debugger_exit_time) +{ + pid_t result; + int stat_loc; + int exit_code = 0; + int kq; + int ret; + struct kevent64_s kev; + int neededdeathcount = (debugger != -1) ? 3 : 2; + + setprogname("GRANDPARENT"); + + logline("grandparent pid %d has parent pid %d and child pid %d. waiting for parent process exit...", getpid(), parent, child); + + /* make sure we can at least observe real child's exit */ + kq = kqueue(); + if (kq < 0) + err(1, "kqueue"); + + EV_SET64(&kev, child, EVFILT_PROC, EV_ADD|EV_ENABLE, + NOTE_EXIT, 0, child, 0, 0); + ret = kevent64(kq, &kev, 1, NULL, 0, 0, NULL); + if (ret == -1) + err(1, "kevent64 EVFILT_PROC"); + + EV_SET64(&kev, parent, EVFILT_PROC, EV_ADD|EV_ENABLE, + NOTE_EXIT, 0, parent, 0, 0); + ret = kevent64(kq, &kev, 1, NULL, 0, 0, NULL); + if (ret == -1) + err(1, "kevent64 EVFILT_PROC"); + + if (debugger != -1) { + EV_SET64(&kev, debugger, EVFILT_PROC, EV_ADD|EV_ENABLE, + NOTE_EXIT, 0, debugger, 0, 0); + ret = kevent64(kq, &kev, 1, NULL, 0, 0, NULL); + if (ret == -1) + err(1, "kevent64 EVFILT_PROC"); + } + + EV_SET64(&kev, 5, EVFILT_TIMER, EV_ADD|EV_ENABLE|EV_ONESHOT, + NOTE_SECONDS, 5, 0, 0, 0); + ret = kevent64(kq, &kev, 1, NULL, 0, 0, NULL); + if (ret == -1) + err(1, "kevent64 EVFILT_TIMER"); + + while(1) { + + ret = kevent64(kq, NULL, 0, &kev, 1, 0, NULL); + if (ret == -1) { + if (errno == EINTR) + continue; + err(1, "kevent64"); + } else if (ret == 0) { + break; + } + + logline("kevent64 returned ident %llu filter %s fflags %s data %s", + kev.ident, str_kev_filter(kev.filter), + str_kev_fflags(kev.filter, kev.fflags), + str_kev_data(kev.filter, kev.fflags, kev.data, kev.udata)); + if (kev.filter == EVFILT_PROC) { + if (child == kev.udata) { + neededdeathcount--; + } else if (parent == kev.udata) { + neededdeathcount--; + } else if ((debugger != -1) && (debugger == kev.udata)) { + neededdeathcount--; + } + } else if (kev.filter == EVFILT_TIMER) { + logline("timed out waiting for NOTE_EXIT"); + exit_code = 1; + break; + } + + if (neededdeathcount == 0) { + break; + } + } + + result = waitpid(parent, &stat_loc, 0); + if (result == -1) + err(1, "waitpid(%d) by grandparent failed", parent); + + + logline("parent process: %s", print_exit(parent, stat_loc)); + if (!WIFEXITED(stat_loc) || (0 != WEXITSTATUS(stat_loc))) { + exit_code = 1; + } + + if (iszombie(parent)) { + logline("parent %d is now a zombie", parent); + exit_code = 1; + } + + if (iszombie(child)) { + logline("child %d is now a zombie", child); + exit_code = 1; + } + + if ((debugger != -1) && iszombie(debugger)) { + logline("debugger %d is now a zombie", debugger); + exit_code = 1; + } + + exit(exit_code); +} + +/* + * debugger will register kevents, wait for quorum on events, then exit + */ +void do_parent(pid_t child, pid_t debugger, parent_exit_t parent_exit_time, debugger_exit_t debugger_exit_time) +{ + int kq; + int ret; + struct kevent64_s kev; + int deathcount = 0; + int childsignalcount = 0; + int stat_loc; + + setprogname("PARENT"); + + logline("parent pid %d has child pid %d and debugger pid %d. waiting for processes to exit...", getpid(), child, debugger); + + kq = kqueue(); + if (kq < 0) + err(1, "kqueue"); + + EV_SET64(&kev, child, EVFILT_PROC, EV_ADD|EV_ENABLE, + NOTE_EXIT|NOTE_EXITSTATUS|NOTE_EXIT_DETAIL|NOTE_FORK|NOTE_EXEC|NOTE_SIGNAL, + 0, child, 0, 0); + ret = kevent64(kq, &kev, 1, NULL, 0, 0, NULL); + if (ret == -1) + err(1, "kevent64 EVFILT_PROC"); + + EV_SET64(&kev, SIGCHLD, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, + 0, 0, child, 0, 0); + ret = kevent64(kq, &kev, 1, NULL, 0, 0, NULL); + if (ret == -1) + err(1, "kevent64 EVFILT_SIGNAL"); + + EV_SET64(&kev, 7, EVFILT_TIMER, EV_ADD|EV_ENABLE|EV_ONESHOT, + NOTE_SECONDS, 7, 0, 0, 0); + ret = kevent64(kq, &kev, 1, NULL, 0, 0, NULL); + if (ret == -1) + err(1, "kevent64 EVFILT_TIMER"); + + while(1) { + ret = kevent64(kq, NULL, 0, &kev, 1, 0, NULL); + if (ret == -1) { + if (errno == EINTR) + continue; + err(1, "kevent64"); + } else if (ret == 0) { + break; + } + + logline("kevent64 returned ident %llu filter %s fflags %s data %s", + kev.ident, str_kev_filter(kev.filter), + str_kev_fflags(kev.filter, kev.fflags), + str_kev_data(kev.filter, kev.fflags, kev.data, kev.udata)); + if (kev.filter == EVFILT_SIGNAL) { + /* must be SIGCHLD */ + deathcount++; + } else if (kev.filter == EVFILT_PROC) { + if (child == kev.udata) { + if ((kev.fflags & (NOTE_EXIT|NOTE_EXITSTATUS)) == (NOTE_EXIT|NOTE_EXITSTATUS)) { + deathcount++; + } else if (kev.fflags & NOTE_SIGNAL) { + childsignalcount++; + if ((parent_exit_time == eParentExitAfterDebuggerAttach) && (childsignalcount >= 2)) { + /* second signal is attach */ + logline("exiting because of eParentExitAfterDebuggerAttach"); + exit(0); + } + } else if (kev.fflags & NOTE_FORK) { + if (parent_exit_time == eParentExitBeforeDebuggerAttach) { + logline("exiting because of eParentExitBeforeDebuggerAttach"); + exit(0); + } + } + } + } else if (kev.filter == EVFILT_TIMER) { + errx(1, "timed out waiting for NOTE_EXIT"); + } + + if (deathcount >= (parent_exit_time == eParentExitAfterWaitpidAndSIGCHLD ? 2 : 1)) { + break; + } + } + + if (parent_exit_time == eParentExitBeforeWaitpid) { + logline("exiting because of eParentExitBeforeWaitpid"); + exit(0); + } + + ret = waitpid(child, &stat_loc, 0); + if (ret == -1) + err(1, "waitpid(%d) by parent failed", child); + + logline("child process: %s", print_exit(child, stat_loc)); + if (!WIFSIGNALED(stat_loc) || (SIGKILL != WTERMSIG(stat_loc))) + errx(1, "child did not exit as expected"); + + ret = waitpid(debugger, &stat_loc, 0); + if (ret == -1) + err(1, "waitpid(%d) by parent failed", debugger); + + logline("debugger process: %s", print_exit(debugger, stat_loc)); + if (!WIFEXITED(stat_loc) || (0 != WEXITSTATUS(stat_loc))) + errx(1, "debugger did not exit as expected"); + + /* Received both SIGCHLD and NOTE_EXIT, as needed */ + logline("exiting beacuse of eParentExitAfterWaitpid/eParentExitAfterWaitpidAndSIGCHLD"); + exit(0); +} + +/* child will spin waiting to be killed by debugger or parent or someone */ +void do_child(void) +{ + pid_t doublechild; + int ret; + setprogname("CHILD"); + + logline("child pid %d. waiting for external termination...", getpid()); + + usleep(500000); + + doublechild = fork(); + if (doublechild == 0) { + exit(0); + } else if (doublechild == -1) { + err(1, "doublechild"); + } else { + ret = waitpid(doublechild, NULL, 0); + if (ret == -1) + err(1, "waitpid(%d) by parent failed", doublechild); + } + + while (1) { + sleep(60); + } +} + +/* + * debugger will register kevents, attach+kill child, wait for quorum on events, + * then exit. + */ +void do_debugger(pid_t child, debugger_exit_t debugger_exit_time) +{ + int kq; + int ret; + struct kevent64_s kev; + int deathcount = 0; + int stat_loc; + + setprogname("DEBUGGER"); + + logline("debugger pid %d has child pid %d. waiting for process exit...", getpid(), child); + + sleep(1); + fprintf(stderr, "\n"); + ret = ptrace(PT_ATTACH, child, 0, 0); + if (ret == -1) + err(1, "ptrace(PT_ATTACH)"); + + ret = waitpid(child, &stat_loc, WUNTRACED); + if (ret == -1) + err(1, "waitpid(child, WUNTRACED)"); + + logline("child process stopped: %s", print_exit(child, stat_loc)); + + if (debugger_exit_time == eDebuggerExitWithoutDetach) { + logline("exiting because of eDebuggerExitWithoutDetach"); + exit(0); + } else if (debugger_exit_time == eDebuggerExitAfterDetach) { + ret = ptrace(PT_DETACH, child, 0, 0); + if (ret == -1) + err(1, "ptrace(PT_DETACH)"); + + ret = kill(child, SIGKILL); + if (ret == -1) + err(1, "kill(SIGKILL)"); + + logline("exiting because of eDebuggerExitAfterDetach"); + exit(0); + } + + kq = kqueue(); + if (kq < 0) + err(1, "kqueue"); + + EV_SET64(&kev, child, EVFILT_PROC, EV_ADD|EV_ENABLE, + NOTE_EXIT|NOTE_EXITSTATUS|NOTE_EXIT_DETAIL|NOTE_FORK|NOTE_EXEC|NOTE_SIGNAL, + 0, child, 0, 0); + ret = kevent64(kq, &kev, 1, NULL, 0, 0, NULL); + if (ret == -1) + err(1, "kevent64 EVFILT_PROC"); + + EV_SET64(&kev, SIGCHLD, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, + 0, 0, child, 0, 0); + ret = kevent64(kq, &kev, 1, NULL, 0, 0, NULL); + if (ret == -1) + err(1, "kevent64 EVFILT_SIGNAL"); + + sleep(1); + fprintf(stderr, "\n"); + ret = ptrace(PT_KILL, child, 0, 0); + if (ret == -1) + err(1, "ptrace(PT_KILL)"); + + while(1) { + ret = kevent64(kq, NULL, 0, &kev, 1, 0, NULL); + if (ret == -1) { + if (errno == EINTR) + continue; + err(1, "kevent64"); + } else if (ret == 0) { + continue; + } + + logline("kevent64 returned ident %llu filter %s fflags %s data %s", + kev.ident, str_kev_filter(kev.filter), + str_kev_fflags(kev.filter, kev.fflags), + str_kev_data(kev.filter, kev.fflags, kev.data, kev.udata)); + if (kev.filter == EVFILT_SIGNAL) { + /* must be SIGCHLD */ + deathcount++; + } else if (kev.filter == EVFILT_PROC) { + if ((kev.fflags & (NOTE_EXIT|NOTE_EXITSTATUS)) == (NOTE_EXIT|NOTE_EXITSTATUS)) { + deathcount++; + } + } + + if (deathcount >= 2) { + break; + } + } + + if (debugger_exit_time == eDebuggerExitAfterKillWithoutWaitpid) { + logline("exiting because of eDebuggerExitAfterKillWithoutWaitpid"); + exit(0); + } + + sleep(1); + fprintf(stderr, "\n"); + ret = waitpid(child, &stat_loc, 0); + if (ret == -1) + err(1, "waitpid(%d) by debugger failed", child); + + logline("child process: %s", print_exit(child, stat_loc)); + + /* Received both SIGCHLD and NOTE_EXIT */ + exit(0); +} + +void logline(const char *format, ...) +{ + char *line = NULL; + char newformat[1024]; + + snprintf(newformat, sizeof(newformat), "%s: %s\n", getprogname(), format); + + va_list va; + + va_start(va, format); + vasprintf(&line, newformat, va); + va_end(va); + + if (line) { + write(STDOUT_FILENO, line, strlen(line)); + free(line); + } else { + write(STDOUT_FILENO, "error\n", 6); + } +} + + +char *str_kev_filter(int filter) +{ + static char filter_string[32]; + if (filter == EVFILT_PROC) + strlcpy(filter_string, "EVFILT_PROC", sizeof(filter_string)); + else if (filter == EVFILT_SIGNAL) + strlcpy(filter_string, "EVFILT_SIGNAL", sizeof(filter_string)); + else if (filter == EVFILT_TIMER) + strlcpy(filter_string, "EVFILT_TIMER", sizeof(filter_string)); + else + strlcpy(filter_string, "EVFILT_UNKNOWN", sizeof(filter_string)); + + return filter_string; +} + +char *str_kev_flags(int filter, uint16_t flags) +{ + static char flags_string[128]; + + flags_string[0] = '\0'; + if (filter & EV_ADD) strlcat(flags_string, "|EV_ADD", sizeof(flags_string)); + if (filter & EV_DELETE) strlcat(flags_string, "|EV_DELETE", sizeof(flags_string)); + if (filter & EV_ENABLE) strlcat(flags_string, "|EV_ENABLE", sizeof(flags_string)); + if (filter & EV_DISABLE) strlcat(flags_string, "|EV_DISABLE", sizeof(flags_string)); + if (filter & EV_RECEIPT) strlcat(flags_string, "|EV_RECEIPT", sizeof(flags_string)); + if (filter & EV_ONESHOT) strlcat(flags_string, "|EV_ONESHOT", sizeof(flags_string)); + if (filter & EV_CLEAR) strlcat(flags_string, "|EV_CLEAR", sizeof(flags_string)); + if (filter & EV_DISPATCH) strlcat(flags_string, "|EV_DISPATCH", sizeof(flags_string)); + if (filter & EV_EOF) strlcat(flags_string, "|EV_EOF", sizeof(flags_string)); + if (filter & EV_ERROR) strlcat(flags_string, "|EV_ERROR", sizeof(flags_string)); + + if (flags_string[0] == '|') + return &flags_string[1]; + else + return flags_string; +} + +char *str_kev_fflags(int filter, uint32_t fflags) +{ + static char fflags_string[128]; + + fflags_string[0] = '\0'; + + if (filter == EVFILT_SIGNAL) { + if (fflags & NOTE_SIGNAL) strlcat(fflags_string, "|NOTE_SIGNAL", sizeof(fflags_string)); + } else if (filter == EVFILT_PROC) { + if (fflags & NOTE_EXIT) strlcat(fflags_string, "|NOTE_EXIT", sizeof(fflags_string)); + if (fflags & NOTE_FORK) strlcat(fflags_string, "|NOTE_FORK", sizeof(fflags_string)); + if (fflags & NOTE_EXEC) strlcat(fflags_string, "|NOTE_EXEC", sizeof(fflags_string)); + if (fflags & NOTE_SIGNAL) strlcat(fflags_string, "|NOTE_SIGNAL", sizeof(fflags_string)); + if (fflags & NOTE_EXITSTATUS) strlcat(fflags_string, "|NOTE_EXITSTATUS", sizeof(fflags_string)); + if (fflags & NOTE_EXIT_DETAIL) strlcat(fflags_string, "|NOTE_EXIT_DETAIL", sizeof(fflags_string)); + if (fflags & NOTE_EXIT_DECRYPTFAIL) strlcat(fflags_string, "|NOTE_EXIT_DECRYPTFAIL", sizeof(fflags_string)); + if (fflags & NOTE_EXIT_MEMORY) strlcat(fflags_string, "|NOTE_EXIT_MEMORY", sizeof(fflags_string)); +#ifdef NOTE_EXIT_CSERROR + if (fflags & NOTE_EXIT_CSERROR) strlcat(fflags_string, "|NOTE_EXIT_CSERROR", sizeof(fflags_string)); +#endif + } else if (filter == EVFILT_TIMER) { + if (fflags & NOTE_SECONDS) strlcat(fflags_string, "|NOTE_SECONDS", sizeof(fflags_string)); + } else { + strlcat(fflags_string, "UNKNOWN", sizeof(fflags_string)); + } + + if (fflags_string[0] == '|') + return &fflags_string[1]; + else + return fflags_string; +} + +char *str_kev_data(int filter, uint32_t fflags, int64_t data, uint64_t udata) +{ + static char data_string[128]; + + if (filter == EVFILT_PROC) { + if ((fflags & (NOTE_EXIT|NOTE_EXITSTATUS)) == (NOTE_EXIT|NOTE_EXITSTATUS)) { + if (WIFEXITED(data)) { + snprintf(data_string, sizeof(data_string), "pid %llu exited with status %d", udata, WEXITSTATUS(data)); + } else if (WIFSIGNALED(data)) { + snprintf(data_string, sizeof(data_string), "pid %llu received signal %d%s", udata, WTERMSIG(data), WCOREDUMP(data) ? " (core dumped)" : ""); + } else if (WIFSTOPPED(data)) { + snprintf(data_string, sizeof(data_string), "pid %llu stopped with signal %d", udata, WSTOPSIG(data)); + } else { + snprintf(data_string, sizeof(data_string), "pid %llu unknown exit status 0x%08llx", udata, data); + } + } else if (fflags & NOTE_EXIT) { + snprintf(data_string, sizeof(data_string), "pid %llu exited", udata); + } else { + data_string[0] = '\0'; + } + } else if (filter == EVFILT_TIMER) { + snprintf(data_string, sizeof(data_string), "timer fired %lld time(s)", data); + } else { + data_string[0] = '\0'; + } + + return data_string; +} + +char *print_exit(pid_t p, int stat_loc) +{ + return str_kev_data(EVFILT_PROC, NOTE_EXIT|NOTE_EXITSTATUS, stat_loc, p); +} + +bool iszombie(pid_t p) +{ + int ret; + struct proc_bsdshortinfo bsdinfo; + + ret = proc_pidinfo(p, PROC_PIDT_SHORTBSDINFO, 1, &bsdinfo, sizeof(bsdinfo)); + if (ret != sizeof(bsdinfo)) { + return false; + } + + if (bsdinfo.pbsi_status == SZOMB) { + return true; + } else { + return false; + } +} diff --git a/tools/tests/unit_tests/ptrace_tests_10767133_src/ptrace_tests_10767133.c b/tools/tests/unit_tests/ptrace_tests_10767133_src/ptrace_tests_10767133.c new file mode 100644 index 000000000..c863c03d1 --- /dev/null +++ b/tools/tests/unit_tests/ptrace_tests_10767133_src/ptrace_tests_10767133.c @@ -0,0 +1,281 @@ +/* + * File: ptrace_tests_10767133.c + * Test Description: Testing different functions of the ptrace call. + * Radar: + * compile command: cc -o ../BUILD/ptrace_tests_10767133 ptrace_tests_10767133.c + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define assert_condition(condition, exit_status, cause) \ + if (!(condition)) { \ + printf("[FAILED] %s:%s at %d error: %s \n", "test_10767133", __func__ , __LINE__, cause ); \ + if (errno) \ + perror(cause); \ + exit(exit_status); \ + } \ + +#define log_message(msg) \ + printf("%s:%d -> %s \n", __func__, __LINE__, msg); + + +typedef int * pipe_t; + +ssize_t pipe_read_data(pipe_t p, void *dest_buf, int size) +{ + int fd = p[0]; + int retval = read(fd, dest_buf, size); + if (retval == -1) { + printf("Error reading from buffer. "); + perror("pipe_read"); + } + return retval; +} + +ssize_t pipe_write_data(pipe_t p, void *src_buf, int size) +{ + int fd = p[1]; + int retval = write(fd, src_buf, size); + if (retval == -1) { + printf("Error writing to buffer. "); + perror("pipe_write"); + } + return retval; +} + + + +void test_ptrace_deny_tace_sigexc(); +void test_ptrace_attach_detach(); +void test_ptrace_step_kill(); + +int main(){ + int retval =0; + log_message(" Testing for PT_FORCEQUOTA. it should return EPERM for non root program. "); + errno=0; + retval = ptrace(PT_FORCEQUOTA, getpid(), NULL, 0); + assert_condition( (retval == -1 && errno == EPERM), -1, "PT_FORCEQUOTA"); + + log_message(" Testing to PT_DENY_ATTACH. should return successfully as nobody is tracing me.") + retval = ptrace(PT_DENY_ATTACH, getpid(), NULL, 0); + assert_condition (retval == 0 , -2, "PR_DENY_ATTACH"); + test_ptrace_deny_tace_sigexc(); + test_ptrace_attach_detach(); + test_ptrace_step_kill(); + success: + printf("[PASSED] Test test_10767133 passed. \n"); + return 0; + fail: + printf("[FAILED] Test test_10767133 failed. \n"); + return -1; +} + +void test_ptrace_step_kill(){ + int retval = 0, status=1; + int parentpipe[2], childpipe[2], data; + enum data_state { begin, finished_child_loop, finished_parent_detach }; + retval = pipe(childpipe); + assert_condition(retval == 0, -1, "Pipe create"); + retval = pipe(parentpipe); + assert_condition(retval == 0, -1, "Pipe create"); + int childpid = fork(); + assert_condition(childpid >=0, -1, "fork failed"); + + if (childpid == 0){ /* child */ + pipe_read_data(parentpipe, &data, sizeof(data)); + assert_condition(data == begin, -1, "child: parent not setting begin"); + pipe_write_data(childpipe, &data, sizeof(data)); + log_message("child: running the sleep loop"); + int i = 5; + log_message("child: sleep loop"); + while (i-- > 0){ + sleep(1); + printf(".z.\n"); + } + data = finished_child_loop; + log_message("child: finished sleep loop"); + pipe_write_data(childpipe, &data, sizeof(data)); + pipe_read_data(parentpipe, &data, sizeof(data)); + assert_condition(data == finished_parent_detach, -1, "child: parent not done with detach"); + i = 5; + log_message("child: sleep loop 2"); + while (i-- > 0){ + sleep(1); + printf(".Z.\n"); + } + exit(57); + }else{ /* parent */ + data = begin; + pipe_write_data(parentpipe, &data, sizeof(data)); + data = getpid(); + pipe_read_data(childpipe, &data, sizeof(data)); + assert_condition(data == begin, -1, "child is not ready with TRACE_ME setup"); + printf("parent: attaching to child with pid %d \n", childpid); + retval = ptrace(PT_ATTACH, childpid, NULL, 0); + assert_condition(retval == 0, -1, "parent: failed to attach to child"); + sleep(2); + log_message("parent: attached to child. Now PT_STEP through it"); + retval = ptrace(PT_STEP, childpid, (caddr_t)1, 0); + assert_condition(retval == 0, -1, "parent: failed to continue the child"); + sleep(2); + retval = ptrace(PT_STEP, childpid, (caddr_t)1, 0); + assert_condition(retval == 0, -1, "parent: failed to continue the child"); + log_message("parent: issuing PT_KILL to child "); + sleep(2); + retval = ptrace(PT_KILL, childpid, NULL, 0); + assert_condition(retval == 0, -1, "parent: failed to PT_KILL the child"); + data = finished_parent_detach; + pipe_write_data(parentpipe, &data, sizeof(data)); + waitpid(childpid,&status,0); + assert_condition(status != 57, -1, "child has exited successfully. It should have died with signal 9"); + assert_condition(status == 9, -1, "child has exited unexpectedly. Should have died with signal 9"); + } + +} + +void test_ptrace_attach_detach(){ + int retval = 0, status=1; + int parentpipe[2], childpipe[2], data; + enum data_state { begin, finished_child_loop, finished_parent_detach }; + retval = pipe(childpipe); + assert_condition(retval == 0, -1, "Pipe create"); + retval = pipe(parentpipe); + assert_condition(retval == 0, -1, "Pipe create"); + int childpid = fork(); + assert_condition(childpid >=0, -1, "fork failed"); + + if (childpid == 0){ /* child */ + //retval = ptrace(PT_TRACE_ME, getpid(), NULL, 0); + //assert_condition(retval == 0, -1, "PT_TRACE_ME failed"); + pipe_read_data(parentpipe, &data, sizeof(data)); + assert_condition(data == begin, -1, "child: parent not setting begin"); + pipe_write_data(childpipe, &data, sizeof(data)); + log_message("child: running the sleep loop"); + int i = 5; + log_message("child: sleep looping"); + while (i-- > 0){ + sleep(1); + printf(".z.\n"); + } + data = finished_child_loop; + log_message("child: finished sleep loop"); + pipe_write_data(childpipe, &data, sizeof(data)); + pipe_read_data(parentpipe, &data, sizeof(data)); + assert_condition(data == finished_parent_detach, -1, "child: parent not done with detach"); + i = 5; + log_message("child sleep looping too"); + while (i-- > 0){ + sleep(1); + printf(".Z.\n"); + } + exit(0); + }else{ /* parent */ + data = begin; + pipe_write_data(parentpipe, &data, sizeof(data)); + data = getpid(); + pipe_read_data(childpipe, &data, sizeof(data)); + assert_condition(data == begin, -1, "child is not ready with TRACE_ME setup"); + printf("parent: attaching to child with pid %d \n", childpid); + retval = ptrace(PT_ATTACH, childpid, NULL, 0); + assert_condition(retval == 0, -1, "parent: failed to attach to child"); + sleep(2); + log_message("parent: attached to child. Now continuing it"); + retval = ptrace(PT_CONTINUE, childpid, (caddr_t)1, 0); + assert_condition(retval == 0, -1, "parent: failed to continue the child"); + + pipe_read_data(childpipe, &data, sizeof(data)); + assert_condition(data == finished_child_loop, -1, "parent: child has not finished while loop"); + + retval = kill(childpid, SIGSTOP); + assert_condition(retval == 0, -1, "parent: failed to SIGSTOP child"); + sleep(2); + + log_message("parent: child has finished loop. Now detaching the child"); + retval = ptrace(PT_DETACH, childpid, NULL, 0); + assert_condition(retval == 0, -1, "parent: failed to detach"); + + data = finished_parent_detach; + pipe_write_data(parentpipe, &data, sizeof(data)); + waitpid(childpid,&status,0); + assert_condition(status == 0, -1, "child has exited unexpectedly"); + } +} + + +void test_ptrace_deny_tace_sigexc(){ + enum ptrace_state { begin,denied_attach, sigexc_tested,trace_me_set, attached, stepped, continued, killed }; + int retval =0; + int childpipe[2],parentpipe[2], data[2]; + retval = pipe(childpipe); + assert_condition( retval == 0, -3, "Pipe create"); + retval = pipe(parentpipe); + assert_condition( retval == 0, -3, "Pipe create"); + + data[0] = begin; // parent + data[1] = begin; //child + + int childpid = fork(); + int status = 0; + assert_condition(childpid >=0, -4, "fork failed"); + + if (childpid == 0){ + /* child */ + retval = ptrace(PT_DENY_ATTACH, getpid(), NULL,0); + data[1] = denied_attach; + pipe_write_data(childpipe, &data[1], sizeof(int)); + log_message("child: waiting for parent to write something"); + pipe_read_data(parentpipe, &data[0], sizeof(int)); + assert_condition(data[0] == begin , -5, "child: parent didnt begin with right state"); + + /* waiting for parent to verify that PT_SIGEXC fails since child is not yet traced. */ + + pipe_read_data(parentpipe, &data[0], sizeof(int)); + assert_condition(data[0] == sigexc_tested, -5, " child: parent didnt test for sigexc failure"); + log_message("child: setting myself to be traced"); + retval = ptrace(PT_TRACE_ME, getpid(), NULL ,0); + assert_condition(retval == 0, -6, "child: failed to setmyself for tracing"); + data[1]=trace_me_set; + pipe_write_data(childpipe, &data[1], sizeof(int)); + log_message("child: setting signals to be exceptions. PT_SIGEXC"); + retval = ptrace(PT_SIGEXC, getpid(), NULL, 0); + assert_condition(retval == 0, -7, "child: failed to set PT_SIGEXC"); + + exit(0); + + }else { + /* parent */ + // get status of child + pipe_read_data(childpipe, &data[1], sizeof(int)); + assert_condition(data[1] == denied_attach, -5, "parent: deny_attach_check"); + pipe_write_data(parentpipe, &data[0], sizeof(int)); + + log_message("parent: testing for failure fo PT_SIGEXC "); + retval = ptrace(PT_SIGEXC, childpid, NULL, 0); + assert_condition(retval < 0 , -5, "PT_SIGEXC did not fail for untraced child"); + data[0] = sigexc_tested; + pipe_write_data(parentpipe, &data[0], sizeof(int)); + + pipe_read_data(childpipe, &data[1], sizeof(int)); + assert_condition(data[1] == trace_me_set , -7, "parent: child has not set PT_TRACE_ME"); + + waitpid(childpid, &status, 0); + if ( status != 0){ + log_message("Child exited with non zero status"); + } + } + + close(childpipe[0]); + close(childpipe[1]); + + close(parentpipe[0]); + close(parentpipe[1]); + +} diff --git a/tools/tests/unit_tests/sampletest.c b/tools/tests/unit_tests/sampletest.c new file mode 100644 index 000000000..aee888903 --- /dev/null +++ b/tools/tests/unit_tests/sampletest.c @@ -0,0 +1,28 @@ +#include +/* +Sample test file. Do not remove this. +*/ +int main(int argc, char *argv[]){ + char os_version[20] = TARGET_OS_VERS; + char os_build[20] = TARGET_OS_BUILD_VERS; + printf("Sample test for xnu unit tests. This file is just an example for future unit tests.\n"); + printf("This test was build with OS version %s and build %s\n", os_version, os_build); + /* an example of how SDKTARGET is used for different builds */ +#ifdef TARGET_SDK_macosx + printf("The SDKTARGET for building this test is macosx\n"); +#endif + +#ifdef TARGET_SDK_macosx_internal + printf("The SDKTARGET for building this test is macosx.internal\n"); +#endif + +#ifdef TARGET_SDK_iphoneos + printf("The SDKTARGET for building this test is iphoneos\n"); +#endif + +#ifdef TARGET_SDK_iphoneos_internal + printf("The SDKTARGET for building this test is iphoneos.internal\n"); +#endif + + return 0; +} diff --git a/tools/tests/unit_tests/semctl_test_8534495_src/semctl_test_8534495.c b/tools/tests/unit_tests/semctl_test_8534495_src/semctl_test_8534495.c new file mode 100644 index 000000000..5c25f5581 --- /dev/null +++ b/tools/tests/unit_tests/semctl_test_8534495_src/semctl_test_8534495.c @@ -0,0 +1,38 @@ +#include +#include +#include +#include +#include + +int main(void) { + key_t key; + + if ((key = ftok(".", 1)) == (key_t)-1) { + perror("ftok"); + exit(EXIT_FAILURE); + } + + int semid; + if ((semid = semget(key, 1, IPC_CREAT | S_IRUSR | S_IWUSR)) == -1) { + perror("semget"); + exit(EXIT_FAILURE); + } + + union semun arg; + + /* Test for sem value > SEMVMX */ + arg.val = 32768; + if (semctl(semid, 0, SETVAL, arg) == 0) { + printf("semctl should have failed for SETVAL 32768\n"); + exit(EXIT_FAILURE); + } + + /* Test for sem value < 0 */ + arg.val = -1; + if (semctl(semid, 0, SETVAL, arg) == 0) { + printf("semctl should have failed for SETVAL -1\n"); + exit(EXIT_FAILURE); + } + + return 0; +} diff --git a/tools/tests/unit_tests/sprace_test_11891562_src/sprace_test_11891562.c b/tools/tests/unit_tests/sprace_test_11891562_src/sprace_test_11891562.c new file mode 100644 index 000000000..cf37ef7cd --- /dev/null +++ b/tools/tests/unit_tests/sprace_test_11891562_src/sprace_test_11891562.c @@ -0,0 +1,265 @@ + +/* + * File: sprace_test_11891562.c + * Test Description: The test ensures that there are no race conditions when multiple threads + * attempt to send messages to a mach port with a subset of threads waiting for a send possible + * notification. + * Radar: + */ +#include +#include +#include +#include +#include +#include +#include + +#include + +#define VERBOSE 1 +#define COUNT 3000000 + +semaphore_t sender_sema = SEMAPHORE_NULL; +mach_port_t msg_port = MACH_PORT_NULL; +boolean_t msg_port_modref = FALSE; + +void * +sender(void *arg) +{ + mach_msg_empty_send_t smsg; + mach_port_t notify, old_notify; + kern_return_t kr; + boolean_t msg_inited; + boolean_t use_sp = *(boolean_t *)arg; + int send_possible_count = 0; + + fprintf(stderr, "starting a thread %susing send-possible notifications.\n", + (!use_sp) ? "not " : ""); + + if (use_sp) { + kr = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, ¬ify); + if (KERN_SUCCESS != kr) { + mach_error("mach_port_allocate(notify)", kr); + exit(1); + } + + request: + kr = mach_port_request_notification(mach_task_self(), msg_port, + MACH_NOTIFY_SEND_POSSIBLE, 0 /* delayed */, + notify, MACH_MSG_TYPE_MAKE_SEND_ONCE, + &old_notify); + if (KERN_INVALID_ARGUMENT == kr && msg_port_modref) + goto done; + + if (KERN_SUCCESS != kr) { + mach_error("mach_port_request_notification(MACH_NOTIFY_SEND_POSSIBLE)", kr); + exit(1); + } + if (MACH_PORT_NULL != old_notify) { + fprintf(stderr, "unexecpted old notify port (0x%x)\n", old_notify); + exit(1); + } + } + + msg_inited = FALSE; + + for (;;) { + mach_send_possible_notification_t nmsg; + mach_msg_option_t options; + mach_msg_return_t mret; + + if (!msg_inited) { + mach_msg_option_t options; + + smsg.header.msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, 0); + smsg.header.msgh_remote_port = msg_port; + smsg.header.msgh_local_port = MACH_PORT_NULL; + smsg.header.msgh_size = sizeof(smsg); + smsg.header.msgh_id = 0; + msg_inited = TRUE; + } + + options = MACH_SEND_MSG | MACH_SEND_TIMEOUT; + if (use_sp) + options |= MACH_SEND_NOTIFY; + + mret = mach_msg(&smsg.header, options, + sizeof(smsg), 0, + MACH_PORT_NULL, + MACH_MSG_TIMEOUT_NONE /* immediate timeout */, + MACH_PORT_NULL); + + if (MACH_MSG_SUCCESS == mret) { + msg_inited = FALSE; + continue; + } + + if (MACH_SEND_INVALID_DEST == mret) + break; + + if (MACH_SEND_TIMED_OUT != mret) { + mach_error("mach_msg(send)", mret); + exit(1); + } + + if (use_sp) { + + /* Wait for the send-possible notification */ + mret = mach_msg(&nmsg.not_header, MACH_RCV_MSG | MACH_RCV_TIMEOUT, + 0, sizeof(nmsg), + notify, + 10000 /* 10 second timeout */, + MACH_PORT_NULL); + + if (msg_port_modref) + goto done; + + if (MACH_RCV_TIMED_OUT == mret) { + fprintf(stderr, "FAILED! Didn't receive send-possible notification\n"); + exit(1); + } + + if (MACH_MSG_SUCCESS != mret) { + mach_error("mach_msg_receive(notify)\n", mret); + exit(1); + } + + switch (nmsg.not_header.msgh_id) { + + case MACH_NOTIFY_SEND_POSSIBLE: + if (nmsg.not_port != msg_port) { + fprintf(stderr, "send possible notification about wrong port (0x%x != 0x%x)\n", nmsg.not_port, msg_port); + exit(1); + } + send_possible_count++; + + semaphore_signal_all(sender_sema); + goto request; + + case MACH_NOTIFY_DEAD_NAME: + if (nmsg.not_port != msg_port) { + fprintf(stderr, "dead name notification about wrong port (0x%x != 0x%x)\n", nmsg.not_port, msg_port); + exit(1); + } + goto done; + default: + fprintf(stderr, "unexected notify id (%d)\n", nmsg.not_header.msgh_id); + exit(1); + } + } else { + semaphore_wait(sender_sema); + } + } + + done: + if (use_sp) { + mach_port_destroy(mach_task_self(), notify); + fprintf(stderr, "received %d send-possible notifications\n", send_possible_count); + } + return(NULL); +} + +int +main(int argc, char **argv) { + mach_msg_return_t mret; + mach_port_limits_t limits; + pthread_t thread1, thread2, thread3; + boolean_t thread1_arg, thread2_arg, thread3_arg; + kern_return_t kr; + int i, res; + + /* allocate receive and send right for the message port */ + kr = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &msg_port); + if (KERN_SUCCESS != kr) { + mach_error("mach_port_allocate(msg_port)", kr); + exit(1); + } + kr = mach_port_insert_right(mach_task_self(), msg_port, msg_port, MACH_MSG_TYPE_MAKE_SEND); + if (KERN_SUCCESS != kr) { + mach_error("mach_port_insert_right(msg_port)", kr); + exit(1); + } + + /* bump its qlimit up enough to allow races to develop between threads */ + limits.mpl_qlimit = 100; + kr = mach_port_set_attributes(mach_task_self(), msg_port, + MACH_PORT_LIMITS_INFO, (mach_port_info_t)&limits, sizeof(limits)/sizeof(int)); + if (KERN_SUCCESS != kr) { + mach_error("mach_port_allocate(msg_port)", kr); + exit(1); + } + + kr = semaphore_create(mach_task_self(), &sender_sema, SYNC_POLICY_FIFO, 0 /* initial value */); + if (KERN_SUCCESS != kr) { + mach_error("semaphore_create(sender_sema)\n", kr); + exit(1); + } + + thread1_arg = FALSE; /* don't use send-possible notifications */ + res = pthread_create(&thread1, (pthread_attr_t *)NULL, sender, &thread1_arg); + if (res) { + perror("pthread_create(non-send-possible_thread-1)"); + exit(1); + } + + thread2_arg = FALSE; /* don't use send-possible notifications */ + res = pthread_create(&thread2, (pthread_attr_t *)NULL, sender, &thread2_arg); + if (res) { + perror("pthread_create(non-send-possible_thread-2)"); + exit(1); + } + + thread3_arg = TRUE; /* use send-possible notifications */ + res = pthread_create(&thread3, (pthread_attr_t *)NULL, sender, &thread3_arg); + if (res) { + perror("pthread_create(send-possible-thread-3)"); + exit(1); + } + + for (i=0; i < COUNT; i++) { + mach_msg_empty_rcv_t rmsg; + + mret = mach_msg(&rmsg.header, MACH_RCV_MSG, + 0, sizeof(rmsg), + msg_port, + MACH_MSG_TIMEOUT_NONE, + MACH_PORT_NULL); + if (MACH_MSG_SUCCESS != mret) { + mach_error("mach_msg_receive(msg_port)\n", mret); + exit(1); + } + } + + msg_port_modref = TRUE; + kr = mach_port_mod_refs(mach_task_self(), msg_port, MACH_PORT_RIGHT_RECEIVE, -1); + if (KERN_SUCCESS != kr) { + mach_error("mach_port_mod_refs(msg_port)", kr); + exit(1); + } + + kr = semaphore_destroy(mach_task_self(), sender_sema); + if (KERN_SUCCESS != kr) { + mach_error("semaphore_destroy(sender_sema)", kr); + exit(1); + } + + res = pthread_join(thread1, NULL); + if (res) { + perror("pthread_join(thread1)"); + exit(1); + } + res = pthread_join(thread2, NULL); + if (res) { + perror("pthread_join(thread2)"); + exit(1); + } + res = pthread_join(thread3, NULL); + if (res) { + perror("pthread_join(thread3)"); + exit(1); + } + + printf("[PASSED] Test sprace_test_11891562 passed. \n"); + exit(0); +} + diff --git a/tools/tests/unit_tests/test_waitqlocktry_12053360.c b/tools/tests/unit_tests/test_waitqlocktry_12053360.c new file mode 100644 index 000000000..6ce335f2a --- /dev/null +++ b/tools/tests/unit_tests/test_waitqlocktry_12053360.c @@ -0,0 +1,79 @@ +/* + * File: test_waitqlocktry_12053360.c + * Test Description: This is a load test for wait queues in the kernel. It is designed to excercise the locking of threads and + * wait queues in the face of timer expirations. The overall runtime is limited to 90 secs. + * In case of inconsistency we have found to be panic'ing within the first 15 secs. + * Radar: + */ + +#include +#include +#include +#include +#include +#include + +#define MAX_TEST_RUN_TIME 90 +uint32_t test_usleep_max; + +void* +test_thread(void *arg __unused) +{ + while (1) { + usleep(random() % test_usleep_max); + } + + return NULL; +} + + +int +main(int argc, const char **argv) +{ + pthread_t *threads; + uint32_t nthreads, i; + int tmp, result; + + if (argc != 3) { + printf("Usage: %s \n", argv[0]); + printf("Currently defaulting to 100us and 100 threads\n"); + test_usleep_max = 100; + nthreads = 100; + }else { + + tmp = atoi(argv[1]); + if (tmp < 0) { + printf("Sleep time must be > 0.\n"); + exit(1); + } + + test_usleep_max = (uint32_t)tmp; + + tmp = atoi(argv[2]); + if (tmp < 0) { + printf("Num threads must be > 0.\n"); + exit(1); + } + nthreads = (uint32_t)tmp; + } + threads = (pthread_t*)malloc(nthreads * sizeof(pthread_t)); + if (threads == NULL) { + printf("Failed to allocate thread array.\n"); + exit(1); + } + + printf("Creating %u threads with a max sleep time of %uusec.\n", nthreads, test_usleep_max); + srand(time(NULL)); + for (i = 0; i < nthreads; i++) { + result = pthread_create(&threads[i], NULL, test_thread, NULL); + if (result != 0) { + printf("Failed to allocate thread.\n"); + exit(1); + } + } + + printf("Main thread sleeping for %d secs\n", MAX_TEST_RUN_TIME); + sleep(MAX_TEST_RUN_TIME); + printf("Success. Exiting..\n"); + return 0; +} diff --git a/tools/tests/unit_tests/test_wq_exit_race_panic_10970548.c b/tools/tests/unit_tests/test_wq_exit_race_panic_10970548.c new file mode 100644 index 000000000..d71ab1bcc --- /dev/null +++ b/tools/tests/unit_tests/test_wq_exit_race_panic_10970548.c @@ -0,0 +1,32 @@ +#include +#include +#include + +int main(int argc, char *argv[]) +{ + char **envp = { NULL }; + char *mycount = "1"; + char *nargvp[] = { argv[0], mycount , NULL}; + char *progpath = argv[0]; + char buf[50]; + char oldcount[30]; + int envcount=0; + if (argc >= 2){ + envcount = atoi(argv[1]); + printf("count = %d \n", envcount); + sprintf(buf, "%d", envcount+1); + nargvp[1] = buf; + } + char **nargvpp = nargvp; + if (envcount < 8 ) + fork(); + if (envcount > 320) + exit(0); + dispatch_apply(32, + dispatch_get_global_queue(0,0), + ^(size_t i __attribute__((unused))) { + execve(progpath,nargvpp,envp); + }); + + return 0; +} diff --git a/tools/tests/unit_tests/thread_get_state_11918811_src/excserver.defs b/tools/tests/unit_tests/thread_get_state_11918811_src/excserver.defs new file mode 100644 index 000000000..e528df455 --- /dev/null +++ b/tools/tests/unit_tests/thread_get_state_11918811_src/excserver.defs @@ -0,0 +1 @@ +#include diff --git a/tools/tests/unit_tests/thread_get_state_11918811_src/thread_get_state.c b/tools/tests/unit_tests/thread_get_state_11918811_src/thread_get_state.c new file mode 100644 index 000000000..e5ab85d45 --- /dev/null +++ b/tools/tests/unit_tests/thread_get_state_11918811_src/thread_get_state.c @@ -0,0 +1,190 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "excserver.h" + +/* + * Test program that sets up a Mach exception handler, + * then performs 1000 invalid memory accesses and makes + * sure all thread_get_state variants can be executed + * from inside the exception handler. + */ +void *handler(void *); +void *spin(void *); +dispatch_semaphore_t start_sema; +volatile int iteration; + +#define COUNT 10000 + +int main(int argc, char *argv[]) { + int ret; + pthread_t handle_thread; + char *buffer = valloc(4096); + int i; + int ncpu; + size_t ncpucount = sizeof(ncpu); + + start_sema = dispatch_semaphore_create(0); + + ret = sysctlbyname("hw.ncpu", &ncpu, &ncpucount, NULL, 0); + if (ret) + err(1, "sysctlbyname"); + + for (i=0; i < ncpu; i++) { + pthread_t spin_thread; + + ret = pthread_create(&spin_thread, NULL, spin, NULL); + if (ret) + err(1, "pthread_create"); + } + + sleep(1); + ret = pthread_create(&handle_thread, NULL, handler, NULL); + if (ret) + err(1, "pthread_create"); + + dispatch_semaphore_wait(start_sema, DISPATCH_TIME_FOREVER); + + for (iteration = 0; iteration < COUNT; iteration++) { + ret = mprotect(buffer, 4096, PROT_NONE); + if (ret != 0) + err(1, "mprotect"); + + usleep(1000); + + volatile float a = ((float)iteration)/2.4f; + *buffer = '!'; + } + + return 0; +} + +void *handler(void *arg __unused) { + kern_return_t kret; + mach_port_t exception_port; + + kret = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, + &exception_port); + if (kret != KERN_SUCCESS) + errx(1, "mach_port_allocate: %s (%d)", mach_error_string(kret), kret); + + kret = mach_port_insert_right(mach_task_self(), exception_port, exception_port, MACH_MSG_TYPE_MAKE_SEND); + if (kret != KERN_SUCCESS) + errx(1, "mach_port_insert_right: %s (%d)", mach_error_string(kret), kret); + + kret = task_set_exception_ports(mach_task_self(), + EXC_MASK_BAD_ACCESS, + exception_port, + EXCEPTION_DEFAULT | MACH_EXCEPTION_CODES, + 0); + if (kret != KERN_SUCCESS) + errx(1, "task_set_exception_ports: %s (%d)", mach_error_string(kret), kret); + + dispatch_semaphore_signal(start_sema); + + kret = mach_msg_server(mach_exc_server, MACH_MSG_SIZE_RELIABLE, exception_port, 0); + if (kret != KERN_SUCCESS) + errx(1, "mach_msg_server: %s (%d)", mach_error_string(kret), kret); + + return NULL; +} + +kern_return_t catch_mach_exception_raise +( + mach_port_t exception_port, + mach_port_t thread, + mach_port_t task, + exception_type_t exception, + mach_exception_data_t code, + mach_msg_type_number_t codeCnt +) +{ + int ret; + kern_return_t kret; + thread_state_flavor_t flavors[128]; + thread_state_data_t state; + mach_msg_type_number_t count; + int i, flcount; + +// printf("Successfully caught EXC_BAD_ACCESS %s(%d) at 0x%016llx\n", mach_error_string((int)code[0]), (int)code[0], code[1]); + + count = sizeof(flavors)/sizeof(natural_t); + kret = thread_get_state(thread, THREAD_STATE_FLAVOR_LIST_NEW, (thread_state_t)flavors, &count); + if (kret == KERN_INVALID_ARGUMENT) { + /* try older query */ + count = sizeof(flavors)/sizeof(natural_t); + kret = thread_get_state(thread, THREAD_STATE_FLAVOR_LIST, (thread_state_t)flavors, &count); + if (kret != KERN_SUCCESS) + errx(1, "thread_get_state(THREAD_STATE_FLAVOR_LIST): %s (%d)", mach_error_string(kret), kret); + } else if (kret != KERN_SUCCESS) + errx(1, "thread_get_state(THREAD_STATE_FLAVOR_LIST_NEW): %s (%d)", mach_error_string(kret), kret); + + flcount = count; + for (i=0; i < flcount; i++) { + thread_state_flavor_t flavor; + + flavor = flavors[(i + iteration) % flcount]; + count = THREAD_STATE_MAX; + kret = thread_get_state(thread, flavor, (thread_state_t)state, &count); + if (kret != KERN_SUCCESS) + errx(1, "thread_get_state(%d): %s (%d)", flavor, mach_error_string(kret), kret); + } + + ret = mprotect((void *)code[1], 4096, PROT_WRITE); + if (ret != 0) + err(1, "mprotect"); + + return KERN_SUCCESS; +} + +kern_return_t catch_mach_exception_raise_state +( + mach_port_t exception_port, + exception_type_t exception, + const mach_exception_data_t code, + mach_msg_type_number_t codeCnt, + int *flavor, + const thread_state_t old_state, + mach_msg_type_number_t old_stateCnt, + thread_state_t new_state, + mach_msg_type_number_t *new_stateCnt +) +{ + errx(1, "Unsupported catch_mach_exception_raise_state"); + return KERN_NOT_SUPPORTED; +} + +kern_return_t catch_mach_exception_raise_state_identity +( + mach_port_t exception_port, + mach_port_t thread, + mach_port_t task, + exception_type_t exception, + mach_exception_data_t code, + mach_msg_type_number_t codeCnt, + int *flavor, + thread_state_t old_state, + mach_msg_type_number_t old_stateCnt, + thread_state_t new_state, + mach_msg_type_number_t *new_stateCnt +) +{ + errx(1, "Unsupported catch_mach_exception_raise_state_identity"); + return KERN_NOT_SUPPORTED; +} + +void *spin(void *arg __unused) { + volatile unsigned int a; + + while (1) { + a++; + } + + return NULL; +} diff --git a/tools/tests/unit_tests/xnu_raft_tests.py b/tools/tests/unit_tests/xnu_raft_tests.py new file mode 100755 index 000000000..bf273c79a --- /dev/null +++ b/tools/tests/unit_tests/xnu_raft_tests.py @@ -0,0 +1,169 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Python Imports +import os +import sys +import re + +""" +xnu_raft_tests +Automate testing of unit tests for xnu. + +2012/02/23 +""" + +# this needs to be first thing for raft to load its environment correctly +if __name__ == '__main__': + # The following code allows this test to be invoked outside the harness and should be left unchanged + args = [os.path.realpath(os.path.expanduser("/usr/local/bin/raft")), "-f"] + sys.argv + os.execv(args[0], args) + + +# Library Imports +from raftlibs.coreos import crashReporterStop, crashReporterStart, doPrivileged, runFunctionWithTestReRun +from raftlibs.coreos import runUniversalLogProcess, spotlightStopSubtest, spotlightStartSubtest, svnCheckoutTestTool, svnCheckoutToPath, runSimpleProcess + +from raft.core.logging import log_note + +# Raft Imports +from __test__ import __path__ + +# This is a Raft test. For more information see http://raft.apple.com +testDescription = "Runs all tests defined as targets in Makefile" # Add a brief description of test functionality +testVersion = "0.1" # Used to differentiate between results for different versions of the test +testState = DevelopmentState # Possible values: DevelopmentState, ProductionState + + +# class definitions +class xnuTest: + """ A container to hold test and its result """ + def __init__(self,testName): + self.name = str(testName) + self.buildStatus = False + self.executeStatus = False + self.exitValue = None + self.comments = '' + + def getName(self): + return self.name + + @staticmethod + def getSummaryHeader(): + return "| {0: ^40s} |{1: >6s} |{2: >5s} |{3: >10s} |{4}".format("Test Name", "Build", "Run", "ExitVal", "Comments") + + def getSummary(self): + formatString ="| {0: <40s} |{1: >6s} |{2: >5s} |{3: >10s} |{4}" + nameVal = str(self.name) + buildVal = str(self.buildStatus) + execVal = str(self.executeStatus) + exitVal = str(self.exitValue) + commentsVal = str(self.comments) + return formatString.format(nameVal, buildVal, execVal, exitVal, commentsVal) + +# global functions +def getTestsFromMakeFile(makeFilePath): + makeTargets=[] + targetRegex = re.compile("^\s*([a-zA-Z0-9_.]+)\s*:\s*([a-zA-Z0-9_.]*).*",re.IGNORECASE|re.DOTALL) + fh = open(makeFilePath,"r"); + for line in fh: + tmp_res = targetRegex.findall(line) + if len(tmp_res) == 1: + makeTargets.append(xnuTest(tmp_res[0][0])) + fh.close() + return makeTargets + + +def buildTest(test, path): + os.chdir(path) + result = doCommand("/usr/bin/make",test) + if result['status'] != 0: + print "Failed to Build %s" % test + print "**STDOUT**\n%s" % result['stdout'] + print "**STDERR**\n%s" % result['stderr'] + raise StandardError + log_note("Built %s successfully" % test) + +def executeTest(testObject,path): + os.chdir(path) + test = testObject.getName() + executable_path = os.path.join(path, test) + print "[TEST] %s" % test + print "[BEGIN] %s" % test + try: + result = runSimpleProcess(executable_path,testName()+"_"+test, wait_time=120) + testObject.exitValue = result['status'] + if result['status'] == 0: + print "[PASS] %s returned %d" % (test,result['status']) + except: + print "[FAIL] %s returned %d" % (test, result['status']) + testObject.comments = "Failed due to timeout or file not found error" + log_note("Completed running test %s" % test) + +def removeTestExecutable(test,path): + os.chdir(path) + doCommand("/bin/rm",test) + +def runTest(params): + # Change to /tmp, because make doesn't support directory paths with spaces + os.chdir("/private/tmp") + output= {'status': 1 } + try: + output = svnCheckoutTestTool("unit_tests") + except: + pass + if output['status'] != 0 : + # since we are not fully published yet. lets get data from a branch + print "Fetching unit_test roots from Branch instead of trunk" + baseURL = "http://src.apple.com/svn/xnu/branches/PR-10938974/tools/tests/unit_tests/" + output = svnCheckoutToPath(baseURL) + if output['status'] != 0 : + logFail("[FAIL] error in checkout from branch") + sys.exit(1) + + local_path = os.path.join(os.getcwd(), "unit_tests") + makefile_path = os.path.join(local_path, "Makefile") + build_path = os.path.join(local_path, "BUILD") + + + tests_to_run = getTestsFromMakeFile(makefile_path) + log_note("Starting raft tests for XNU") + stats = {"total":len(tests_to_run) , "pass":0, "fail":0} + for testObject in tests_to_run: + test = testObject.getName() + if test == "clean": + stats["pass"]+=1 + testObject.buildStatus = True + testObject.executeStatus = True + testObject.exitValue = 0 + continue + + log_note("Running test :%s" % test) + try: + buildTest(test,local_path) + testObject.buildStatus = True + res = executeTest(testObject,build_path) + testObject.executeStatus = True + if testObject.exitValue == 0 : + stats["pass"]+=1 + else: + stats["fail"]+=1 + removeTestExecutable(test,build_path) + logPass(test) + except: + logFail("[FAIL] %s failed." % test) + print "Finished running tests. Cleaning up" + doCommand("/usr/bin/make","clean") + #Now to print the Summary and statistics + print "\n\n Test Summary \n" + print xnuTest.getSummaryHeader() + for testObject in tests_to_run: + print testObject.getSummary() + print "\n===============================\n" + print "[SUMMARY]" + print "Total tests: %d" % stats["total"] + print "Passed : %d" % stats["pass"] + print "Failed : %d" % stats["fail"] + print "================================\n\n" + + logPass() # This line is implicit and can be removed diff --git a/tools/tests/xnu_quick_test/README b/tools/tests/xnu_quick_test/README index 3bf8a2d8f..587999af2 100644 --- a/tools/tests/xnu_quick_test/README +++ b/tools/tests/xnu_quick_test/README @@ -62,7 +62,7 @@ USAGE: xnu_quick_test -target TARGET_PATH -r[un] 1, 3, 10 - 19 # run specific tests. enter individual test numbers and/or range of numbers. use -list to list tests. -s[kip] # skip setuid tests -t[arget] TARGET_PATH # path to directory where tool will create test files. defaults to "/tmp/" - -x[ilog] # To run the xnu_quick_test with xilog reporting enabled. + -testbot # output results in CoreOS TestBot compatible format examples: --- Place all test files and directories at the root of volume "test_vol" --- diff --git a/tools/tests/xnu_quick_test/content_protection_test.c b/tools/tests/xnu_quick_test/content_protection_test.c index 9f2cceb1d..c372100b8 100644 --- a/tools/tests/xnu_quick_test/content_protection_test.c +++ b/tools/tests/xnu_quick_test/content_protection_test.c @@ -417,7 +417,7 @@ int content_protection_test(void * argp) goto end; } - fd = open(filepath, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC); + fd = open(filepath, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, 0777); if (fd == -1) { @@ -446,10 +446,34 @@ int content_protection_test(void * argp) } } - if (SET_PROT_CLASS(fd, PROTECTION_CLASS_D)) - { - printf("%s, line %d: failed to change protection class from F to D when unlocked, errno = %s.\n", - cpt_fail_header, __LINE__, strerror(errno)); + /* Query the filesystem for the default CP level (Is it C?) */ +#ifndef F_GETDEFAULTPROTLEVEL +#define F_GETDEFAULTPROTLEVEL 79 +#endif + + old_prot_class = fcntl(fd, F_GETDEFAULTPROTLEVEL); + if (old_prot_class == -1) { + printf("%s , line %d: failed to acquire default protection level for filesystem , errno = %s \n", + cpt_fail_header, __LINE__, strerror(errno)); + goto cleanup_file; + } + + /* XXX: Do we want to do anything with the level? What should it be? */ + + + /* + * files are allowed to move into F, but not out of it. They can also only do so + * when they do not have content. + */ + close (fd); + unlink (filepath); + + + /* re-create the file */ + fd = open (filepath, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC); + if (fd == -1) { + printf("%s, line %d: failed to create the test file, errno = %s.\n", + cpt_fail_header, __LINE__, strerror(errno)); goto cleanup_file; } @@ -611,6 +635,13 @@ int content_protection_test(void * argp) goto cleanup_file; } + if (GET_PROT_CLASS (fd) != PROTECTION_CLASS_B) { + printf("%s, line %d: Failed to switch to class B file \n", + cpt_fail_header, __LINE__ ); + goto cleanup_file; + } + + /* We should also be able to read/write to the file descriptor while it is open. */ current_byte = 0; @@ -675,9 +706,9 @@ int content_protection_test(void * argp) goto remove_dir; } - if (GET_PROT_CLASS(dir_fd) != PROTECTION_CLASS_D) + if ((GET_PROT_CLASS(dir_fd) != PROTECTION_CLASS_D) && (GET_PROT_CLASS(dir_fd) != PROTECTION_CLASS_DIR_NONE)) { - printf("%s, line %d: newly created directory had a non-D protection class.\n", + printf("%s, line %d: newly created directory had a non-D and non-NONE protection class.\n", cpt_fail_header, __LINE__); goto cleanup_dir; } @@ -710,8 +741,9 @@ int content_protection_test(void * argp) goto cleanup_dir; } - for (new_prot_class = PROTECTION_CLASS_A; new_prot_class <= PROTECTION_CLASS_E; new_prot_class++) + for (new_prot_class = PROTECTION_CLASS_A; new_prot_class <= PROTECTION_CLASS_D; new_prot_class++) { + int getclass_dir; old_prot_class = GET_PROT_CLASS(dir_fd); if (old_prot_class == -1) @@ -728,7 +760,15 @@ int content_protection_test(void * argp) goto cleanup_dir; } - fd = open(filepath, O_CREAT | O_EXCL | O_CLOEXEC); + getclass_dir = GET_PROT_CLASS(dir_fd); + if (getclass_dir != new_prot_class) { + printf("%s, line %d: failed to get the new protection class for the directory %d (got %d) \n", + cpt_fail_header, __LINE__, new_prot_class, getclass_dir); + goto cleanup_dir; + + } + + fd = open(filepath, O_CREAT | O_EXCL | O_CLOEXEC, 0777); if (fd == -1) { @@ -747,8 +787,9 @@ int content_protection_test(void * argp) } else if (local_result != new_prot_class) { - printf("%s, line %d: new file did not inherit the directory's protection class.\n", - cpt_fail_header, __LINE__, strerror(errno)); + + printf("%s, line %d: new file (%d) did not inherit the directory's protection class (%d) .\n", + cpt_fail_header, __LINE__, local_result, new_prot_class); goto cleanup_file; } @@ -778,7 +819,7 @@ int content_protection_test(void * argp) goto cleanup_dir; } - fd = open(filepath, O_CREAT | O_EXCL | O_CLOEXEC); + fd = open(filepath, O_CREAT | O_EXCL | O_CLOEXEC, 0777); if (fd != -1) { @@ -806,7 +847,7 @@ int content_protection_test(void * argp) goto cleanup_dir; } - fd = open(filepath, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC); + fd = open(filepath, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, 0777); if (fd == -1) { @@ -837,7 +878,7 @@ int content_protection_test(void * argp) goto cleanup_file; } - for (new_prot_class = PROTECTION_CLASS_A; new_prot_class <= PROTECTION_CLASS_E; new_prot_class++) + for (new_prot_class = PROTECTION_CLASS_A; new_prot_class <= PROTECTION_CLASS_D; new_prot_class++) { if (SET_PROT_CLASS(dir_fd, new_prot_class)) { diff --git a/tools/tests/xnu_quick_test/helpers/launch.c b/tools/tests/xnu_quick_test/helpers/launch.c index c9b135789..178a04f28 100644 --- a/tools/tests/xnu_quick_test/helpers/launch.c +++ b/tools/tests/xnu_quick_test/helpers/launch.c @@ -25,7 +25,7 @@ extern int do_execve_test(char * path, char * argv[], void * envp, int killwait); extern int get_bits(void); - +int g_testbots_active = 0; int main(int argc, const char * argv[]) { int my_err, my_status; diff --git a/tools/tests/xnu_quick_test/main.c b/tools/tests/xnu_quick_test/main.c index 5c526f246..dda938411 100644 --- a/tools/tests/xnu_quick_test/main.c +++ b/tools/tests/xnu_quick_test/main.c @@ -57,9 +57,6 @@ #include #include "tests.h" -#if !TARGET_OS_EMBEDDED -#include -#endif @@ -146,7 +143,6 @@ static const char *current_arch( void ); /* globals */ long g_max_failures = 0; int g_skip_setuid_tests = 0; -int g_xilog_active = 0; const char * g_cmd_namep; char g_target_path[ PATH_MAX ]; int g_is_single_user = 0; @@ -164,36 +160,58 @@ int main( int argc, const char * argv[] ) time_t my_start_time, my_end_time; struct stat my_stat_buf; char my_buffer[64]; - /* vars for XILog */ -#if !TARGET_OS_EMBEDDED - XILogRef logRef; - char *logPath = ""; - char *config = NULL; - int echo = 0; - int xml = 0; -#endif + uid_t sudo_uid = 0; + const char * sudo_uid_env; + gid_t sudo_gid; + const char * sudo_gid_env; sranddev( ); /* set up seed for our random name generator */ g_cmd_namep = argv[0]; - + + /* make sure SIGCHLD is not ignored, so wait4 calls work */ + signal(SIGCHLD, SIG_DFL); + /* NOTE - code in create_target_directory will append '/' if it is necessary */ my_targetp = getenv("TMPDIR"); if ( my_targetp == NULL ) my_targetp = "/tmp"; - /* make sure our executable is owned by root and has set uid bit */ - err = stat( g_cmd_namep, &my_stat_buf ); - if ( err != 0 ) { - err = errno; - printf( "stat call on our executable failed - \"%s\" \n", g_cmd_namep ); - printf( " failed with error %d - \"%s\" \n", err, strerror( err) ); + /* make sure we are running as root */ + if ( ( getuid() != 0 ) || ( geteuid() != 0 ) ) { + printf( "Test must be run as root\n", g_cmd_namep ); exit( -1 ); } - if ( my_stat_buf.st_uid != 0 || (my_stat_buf.st_mode & S_ISUID) == 0 ) { - printf( "executable file - \"%s\" \n", g_cmd_namep ); - printf( "does not have correct owner (must be root) or setuid bit is not set \n" ); - exit( -1 ); + + sudo_uid_env = getenv("SUDO_UID"); + if ( sudo_uid_env ) { + sudo_uid = strtol(sudo_uid_env, NULL, 10); + } + + /* switch real uid to a non_root user, while keeping effective uid as root */ + if ( sudo_uid != 0 ) { + setreuid( sudo_uid, 0 ); + } + else { + /* Default to 501 if no sudo uid found */ + setreuid( 501, 0 ); + } + + /* restore the gid if run through sudo */ + sudo_gid_env = getenv("SUDO_GID"); + if ( sudo_gid_env ) { + sudo_gid = strtol(sudo_gid_env, NULL, 10); } + if ( getgid() == 0 ) { + + if ( sudo_gid != 0 ) { + setgid( sudo_gid ); + } + else { + /* Default to 20 if no sudo gid found */ + setgid( 20 ); + } + } + /* parse input parameters */ for ( i = 1; i < argc; i++ ) { if ( strcmp( argv[i], "-u" ) == 0 ) { @@ -257,13 +275,10 @@ int main( int argc, const char * argv[] ) g_skip_setuid_tests = 1; continue; } -#if !TARGET_OS_EMBEDDED - if ( strcmp( argv[i], "-x" ) == 0 || - strcmp( argv[i], "-xilog" ) == 0 ) { - g_xilog_active = 1; + if ( strcmp( argv[i], "-testbot" ) == 0 ) { + g_testbots_active = 1; continue; } -#endif printf( "invalid argument \"%s\" \n", argv[i] ); usage( ); } @@ -290,16 +305,6 @@ g_testbots_active = 1; list_all_tests( ); return 0; } -#if !TARGET_OS_EMBEDDED - if (g_xilog_active == 1) { - logRef = XILogOpenLogExtended( logPath, "xnu_quick_test", "com.apple.coreos", config, xml, - echo, NULL, "ResultOwner", "com.apple.coreos", NULL ); - if( logRef == NULL ) { - fprintf(stderr,"Couldn't create log: %s",logPath); - exit(-1); - } - } -#endif /* build a test target directory that we use as our path to create any test * files and directories. @@ -324,12 +329,6 @@ g_testbots_active = 1; my_testp = &g_tests[i]; if ( my_testp->test_run_it == 0 || my_testp->test_routine == NULL ) continue; -#if !TARGET_OS_EMBEDDED - if (g_xilog_active == 1) { - XILogBeginTestCase( logRef, my_testp->test_infop, my_testp->test_infop ); - XILogMsg( "test #%d - %s \n", (i + 1), my_testp->test_infop ); - } -#endif if ( g_testbots_active == 1 ) { printf("[BEGIN] %s \n", my_testp->test_infop); @@ -340,20 +339,10 @@ g_testbots_active = 1; my_err = my_testp->test_routine( my_testp->test_input ); if ( my_err != 0 ) { printf("\t--> FAILED \n"); -#if !TARGET_OS_EMBEDDED - if (g_xilog_active == 1) { - XILogMsg("SysCall %s failed", my_testp->test_infop); - XILogErr("Result %d", my_err); - } -#endif + printf("SysCall %s failed", my_testp->test_infop); + printf("Result %d", my_err); my_failures++; if ( my_failures > g_max_failures ) { -#if !TARGET_OS_EMBEDDED - if (g_xilog_active == 1) { - XILogMsg("Reached the maximum number of failures - Aborting xnu_quick_test."); - XILogEndTestCase( logRef, kXILogTestPassOnErrorLevel ); - } -#endif printf( "\n Reached the maximum number of failures - Aborting xnu_quick_test. \n" ); /* Code added to run xnu_quick_test under testbots */ if ( g_testbots_active == 1 ) { @@ -363,20 +352,10 @@ g_testbots_active = 1; } /* Code added to run xnu_quick_test under testbots */ if ( g_testbots_active == 1 ) { - printf("[FAIL] %s \n", my_testp->test_infop); + printf("\n[FAIL] %s \n", my_testp->test_infop); } -#if !TARGET_OS_EMBEDDED - if (g_xilog_active == 1) { - XILogEndTestCase( logRef, kXILogTestPassOnErrorLevel ); - } -#endif continue; } -#if !TARGET_OS_EMBEDDED - if (g_xilog_active == 1) { - XILogEndTestCase(logRef, kXILogTestPassOnErrorLevel); - } -#endif /* Code added to run xnu_quick_test under testbots */ if ( g_testbots_active == 1 ) { printf("[PASS] %s \n", my_testp->test_infop); @@ -390,13 +369,8 @@ exit_this_routine: /* clean up our test directory */ rmdir( &g_target_path[0] ); -#if !TARGET_OS_EMBEDDED - if (g_xilog_active == 1) { - XILogCloseLog(logRef); - } -#endif - - return 0; + /* exit non zero if there are any failures */ + return my_failures != 0; } /* main */ @@ -569,9 +543,7 @@ static void usage( void ) printf( "\t -r[un] 1, 3, 10 - 19 # run specific tests. enter individual test numbers and/or range of numbers. use -list to list tests. \n" ); printf( "\t -s[kip] # skip setuid tests \n" ); printf( "\t -t[arget] TARGET_PATH # path to directory where tool will create test files. defaults to \"/tmp/\" \n" ); -#if !TARGET_OS_EMBEDDED - printf( "\t -x[ilog] # use XILog\n"); -#endif + printf( "\t -testbot # output results in CoreOS TestBot compatible format \n" ); printf( "\nexamples: \n" ); printf( "--- Place all test files and directories at the root of volume \"test_vol\" --- \n" ); printf( "%s -t /Volumes/test_vol/ \n", (my_ptr != NULL) ? my_ptr : g_cmd_namep ); diff --git a/tools/tests/xnu_quick_test/makefile b/tools/tests/xnu_quick_test/makefile index 9dbf1631b..9717d4c26 100644 --- a/tools/tests/xnu_quick_test/makefile +++ b/tools/tests/xnu_quick_test/makefile @@ -1,97 +1,94 @@ SDKROOT ?= / -Product=$(shell tconf --product) -Embedded=$(shell tconf --test TARGET_OS_EMBEDDED) +Product ?= $(shell xcodebuild -sdk $(SDKROOT) -version PlatformPath | head -1 | sed 's,^.*/\([^/]*\)\.platform$$,\1,') -SDKVERSION=$(shell xcodebuild -sdk $(SDKROOT) -version SDKVersion | head -1) +# This should not be a long term solution to ; this +# makefile needs to be changed to identify its targets appropriately in the +# absence of tconf, but we'll go with the quick change for now. + +ifeq "$(RC_TARGET_CONFIG)" "iPhone" +Embedded?=YES +else +Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO) +endif ifeq "$(Embedded)" "YES" -XILogFLAG = -SDKPATH = $(shell xcodebuild -sdk $(SDKROOT) -version Path) +Product?=iPhone +else +Product?=MacOSX +endif + +SDKVERSION:=$(shell xcodebuild -sdk $(SDKROOT) -version SDKVersion | head -1) + +ifeq "$(Product)" "iPhoneOS" +SDKPATH := $(shell xcodebuild -sdk $(SDKROOT) -version Path) CFLAGS += -isysroot $(SDKPATH) -miphoneos-version-min=$(SDKVERSION) LIBFLAGS += -isysroot $(SDKPATH) -miphoneos-version-min=$(SDKVERSION) else -XILogFLAG = -framework XILog CFLAGS += -mmacosx-version-min=$(SDKVERSION) LIBFLAGS += -mmacosx-version-min=$(SDKVERSION) +Product ?= MacOSX endif -HOSTCC = cc -CC = xcrun -sdk $(SDKROOT) cc +CC := xcrun -sdk $(SDKROOT) cc +HOSTCC := cc -ifdef RC_BUILDIT -DOING_BUILDIT=yes -endif +SRCROOT?=$(shell /bin/pwd) +OBJROOT?=$(SRCROOT)/BUILD/obj +DSTROOT?=$(SRCROOT)/BUILD/dst +SYMROOT?=$(SRCROOT)/BUILD/sym -ifdef RC_OS -DOING_BUILDIT=yes -endif +SRCSUBPATH := $(SRCROOT) +OBJSUBPATH := $(OBJROOT) -ifdef DOING_BUILDIT -include $(MAKEFILEPATH)/CoreOS/ReleaseControl/Common.make -MY_ARCH = $(patsubst %, -arch %, $(RC_ARCHS)) -install:: xnu_quick_test +ifdef RC_ARCHS + ARCH:=$(RC_ARCHS) else - ifndef SRCROOT - SRCROOT=$(shell /bin/pwd) - endif - ifndef OBJROOT - OBJROOT=$(SRCROOT)/BUILD/obj - endif - ifndef DSTROOT - DSTROOT=$(SRCROOT)/BUILD/dst - endif - - ifndef ARCH - ARCH=i386 x86_64 - # this hack should be removed once tconf gets - # - ifeq "$(Product)" "iPhone" - ARCH=armv7 - endif - ifeq "$(Product)" "AppleTV" - ARCH=i386 - endif - endif - - ifdef ARCH - MY_ARCH = $(patsubst %, -arch %, $(ARCH)) # allows building multiple archs. - endif - - CFLAGS += $(MY_ARCH) + ifeq "$(Product)" "MacOSX" + ARCH:=i386 x86_64 + else ifeq "$(Product)" "iPhoneOS" + ARCH:=armv7s armv7 + else ifeq "$(Product)" "AppleTV" + ARCH:=i386 +endif endif +MY_ARCH := $(patsubst %, -arch %, $(ARCH)) # allows building multiple archs. +ARCH_32 := $(filter-out %64, $(ARCH)) +ARCH_32_FLAGS := $(patsubst %, -arch %, $(ARCH_32)) +ARCH_64 := $(filter %64, $(ARCH)) +ARCH_64_FLAGS := $(patsubst %, -arch %, $(ARCH_64)) CFLAGS += -g -I $(SDKPATH)/System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders/ -F/AppleInternal/Library/Frameworks/ $(MORECFLAGS) -Wno-deprecated-declarations -LIBFLAGS += -I $(SDKPATH)/System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders -F/AppleInternal/Library/Frameworks/ $(XILogFLAG) + +LIBFLAGS += -I $(SDKPATH)/System/Library/Frameworks/System.framework/Versions/B/PrivateHeaders -F/AppleInternal/Library/Frameworks/ # The current implementation of the content protection test requires IOKit. -ifeq "$(Product)" "iPhone" +ifeq "$(Product)" "iPhoneOS" LIBFLAGS += -framework IOKit endif -MY_OBJECTS = $(OBJROOT)/main.o $(OBJROOT)/memory_tests.o $(OBJROOT)/misc.o \ - $(OBJROOT)/sema_tests.o $(OBJROOT)/shared_memory_tests.o \ - $(OBJROOT)/socket_tests.o $(OBJROOT)/tests.o \ - $(OBJROOT)/xattr_tests.o $(OBJROOT)/kqueue_tests.o \ - $(OBJROOT)/machvm_tests.o $(OBJROOT)/commpage_tests.o \ - $(OBJROOT)/atomic_fifo_queue_test.o $(OBJROOT)/sched_tests.o \ - $(OBJROOT)/pipes_tests.o +MY_OBJECTS := $(OBJSUBPATH)/main.o $(OBJSUBPATH)/memory_tests.o $(OBJSUBPATH)/misc.o \ + $(OBJSUBPATH)/sema_tests.o $(OBJSUBPATH)/shared_memory_tests.o \ + $(OBJSUBPATH)/socket_tests.o $(OBJSUBPATH)/tests.o \ + $(OBJSUBPATH)/xattr_tests.o $(OBJSUBPATH)/kqueue_tests.o \ + $(OBJSUBPATH)/machvm_tests.o $(OBJSUBPATH)/commpage_tests.o \ + $(OBJSUBPATH)/atomic_fifo_queue_test.o $(OBJSUBPATH)/sched_tests.o \ + $(OBJSUBPATH)/pipes_tests.o -ifneq "$(Product)" "iPhone" -MY_OBJECTS += $(OBJROOT)/32bit_inode_tests.o +ifneq "$(Product)" "iPhoneOS" +MY_OBJECTS += $(OBJSUBPATH)/32bit_inode_tests.o else -MY_OBJECTS += $(OBJROOT)/content_protection_test.o +MY_OBJECTS += $(OBJSUBPATH)/content_protection_test.o endif -# In networked home directories, the chown will fail; we notice and print a helpful message -CHOWN_COMMAND=sudo chown root $(DSTROOT)/xnu_quick_test -PERM_ADVICE="\tYou'll have to set the executable's permissions yourself: chown to root and chmod to 4755. You may need to move to a local volume to do that." -xnu_quick_test : $(OBJROOT) $(DSTROOT) $(MY_OBJECTS) helpers - sudo rm -rf $(DSTROOT)/xnu_quick_test - $(CC) $(MY_ARCH) $(LIBFLAGS) -o $(DSTROOT)/xnu_quick_test $(MY_OBJECTS) - @echo $(CHOWN_COMMAND) # Hack so we don't echo help-message echo - @$(CHOWN_COMMAND) || echo $(PERM_ADVICE) - sudo chmod 4755 $(DSTROOT)/xnu_quick_test + +xnu_quick_test : $(OBJSUBPATH) $(DSTROOT) $(SYMROOT) $(MY_OBJECTS) helpers +ifndef RC_ProjectName + rm -rf $(DSTROOT)/xnu_quick_test +endif + $(CC) -g $(MY_ARCH) $(LIBFLAGS) -o $(SYMROOT)/xnu_quick_test $(MY_OBJECTS) + /usr/bin/dsymutil $(SYMROOT)/xnu_quick_test + /usr/bin/ditto $(SYMROOT)/xnu_quick_test $(DSTROOT)/xnu_quick_test # This target is defined for testbots. # Before compiling this target, MORECFLAGS must be set to "-D RUN_UNDER_TESTBOTS=1", check README file for more details @@ -99,103 +96,111 @@ xnu_quick_test : $(OBJROOT) $(DSTROOT) $(MY_OBJECTS) helpers # report all the failures. testbots: xnu_quick_test - @(cd $(DSTROOT) ; ./xnu_quick_test -f 100) + @(cd $(DSTROOT) ; ./xnu_quick_test -f 100) # The helper binaries are used to test exec()'ing between 64bit and 32bit. # Creates test binaries with page zero sizes = 4KB and 4GB. Also creates 32-bit # helper processes for the 64-bit version of xnu_quick_test to test the conversion # from a 32-bit process to a 64-bit process. -helpers : helpers/sleep.c helpers/launch.c helpers/arch.c helpers/data_exec.c helperdir $(OBJROOT)/misc.o -ifneq "$(Product)" "iPhone" - $(CC) -arch i386 helpers/sleep.c -o $(DSTROOT)/helpers/sleep-i386 +helpers : $(SRCSUBPATH)/helpers/sleep.c $(SRCSUBPATH)/helpers/launch.c $(SRCSUBPATH)/helpers/arch.c $(SRCSUBPATH)/helpers/data_exec.c helperdir $(OBJSUBPATH)/misc.o +ifneq "$(Product)" "iPhoneOS" +ifneq "$(ARCH_32)" "" + $(CC) -g $(ARCH_32_FLAGS) $(SRCSUBPATH)/helpers/sleep.c -o $(SYMROOT)/sleep-i386 + /usr/bin/ditto $(SYMROOT)/sleep-i386 $(DSTROOT)/helpers/ +endif endif ifeq "$(Product)" "MacOSX" - $(CC) -arch x86_64 -pagezero_size 0x100000000 helpers/sleep.c -o $(DSTROOT)/helpers/sleep-x86_64-4G - $(CC) -arch x86_64 -pagezero_size 0x1000 helpers/sleep.c -o $(DSTROOT)/helpers/sleep-x86_64-4K +ifneq "$(ARCH_32)" "" + $(CC) -g $(LIBFLAGS) $(ARCH_32_FLAGS) $(OBJSUBPATH)/misc.o $(SRCSUBPATH)/helpers/launch.c -o $(SYMROOT)/launch-i386 + $(CC) -g $(ARCH_32_FLAGS) -DNXDATA32TESTNONX $(SRCSUBPATH)/helpers/data_exec.c -o $(SYMROOT)/data_exec32nonxspawn + /usr/bin/ditto $(SYMROOT)/launch-i386 $(SYMROOT)/data_exec32nonxspawn $(DSTROOT)/helpers/ endif -ifneq "$(Product)" "iPhone" - $(CC) $(LIBFLAGS) -arch i386 $(OBJROOT)/misc.o helpers/launch.c -o $(DSTROOT)/helpers/launch-i386 +ifneq "$(ARCH_64)" "" + $(CC) -g $(ARCH_64_FLAGS) -pagezero_size 0x100000000 $(SRCSUBPATH)/helpers/sleep.c -o $(SYMROOT)/sleep-x86_64-4G + $(CC) -g $(ARCH_64_FLAGS) -pagezero_size 0x1000 $(SRCSUBPATH)/helpers/sleep.c -o $(SYMROOT)/sleep-x86_64-4K + $(CC) -g $(LIBFLAGS) $(ARCH_64_FLAGS) $(OBJSUBPATH)/misc.o $(SRCSUBPATH)/helpers/launch.c -o $(SYMROOT)/launch-x86_64 + /usr/bin/ditto $(SYMROOT)/sleep-x86_64-4G $(SYMROOT)/sleep-x86_64-4K $(SYMROOT)/launch-x86_64 $(DSTROOT)/helpers/ endif -ifeq "$(Product)" "MacOSX" - $(CC) $(LIBFLAGS) -arch x86_64 $(OBJROOT)/misc.o helpers/launch.c -o $(DSTROOT)/helpers/launch-x86_64 - $(CC) $(MY_ARCH) helpers/arch.c -o $(DSTROOT)/helpers/arch - $(CC) $(MY_ARCH) helpers/data_exec.c -o $(DSTROOT)/helpers/data_exec - $(CC) -arch i386 -DNXDATA32TESTNONX helpers/data_exec.c -o $(DSTROOT)/helpers/data_exec32nonxspawn - + $(CC) -g $(MY_ARCH) $(SRCSUBPATH)/helpers/data_exec.c -o $(SYMROOT)/data_exec + /usr/bin/ditto $(SYMROOT)/data_exec $(DSTROOT)/helpers/ endif -ifeq "$(Product)" "iPhone" - $(CC) $(CFLAGS) helpers/sleep.c -o $(DSTROOT)/helpers/sleep-arm - $(CC) $(LIBFLAGS) $(CFLAGS) $(OBJROOT)/misc.o helpers/launch.c -o $(DSTROOT)/helpers/launch-arm - $(CC) $(MY_ARCH) $(CFLAGS) helpers/arch.c -o $(DSTROOT)/helpers/arch +ifeq "$(Product)" "iPhoneOS" +ifneq "$(ARCH_32)" "" + $(CC) $(CFLAGS) $(ARCH_32_FLAGS) $(SRCSUBPATH)/helpers/sleep.c -o $(SYMROOT)/sleep-arm + $(CC) $(LIBFLAGS) $(CFLAGS) $(ARCH_32_FLAGS) $(OBJSUBPATH)/misc.o $(SRCSUBPATH)/helpers/launch.c -o $(SYMROOT)/launch-arm + /usr/bin/ditto $(SYMROOT)/sleep-arm $(SYMROOT)/launch-arm $(DSTROOT)/helpers/ endif - +endif + $(CC) -g $(MY_ARCH) $(CFLAGS) $(SRCSUBPATH)/helpers/arch.c -o $(SYMROOT)/arch + /usr/bin/ditto $(SYMROOT)/arch $(DSTROOT)/helpers/ helperdir : mkdir -p $(DSTROOT)/helpers -$(OBJROOT) : - mkdir -p $(OBJROOT); +$(OBJSUBPATH) : + mkdir -p $(OBJSUBPATH); $(DSTROOT) : mkdir -p $(DSTROOT); -$(OBJROOT)/main.o : main.c tests.h - $(CC) $(CFLAGS) -c main.c -o $@ +$(SYMROOT) : + mkdir -p $(SYMROOT) + +$(OBJSUBPATH)/main.o : $(SRCSUBPATH)/main.c $(SRCSUBPATH)/tests.h + $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/main.c -o $@ -$(OBJROOT)/memory_tests.o : memory_tests.c tests.h - $(CC) $(CFLAGS) -c memory_tests.c -o $@ +$(OBJSUBPATH)/memory_tests.o : $(SRCSUBPATH)/memory_tests.c $(SRCSUBPATH)/tests.h + $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/memory_tests.c -o $@ # misc.o has to be built 3-way for the helpers to link -$(OBJROOT)/misc.o : misc.c tests.h -ifeq "$(Product)" "iPhone" - $(CC) -arch armv7 $(CFLAGS) -c misc.c -o $@ +$(OBJSUBPATH)/misc.o : $(SRCSUBPATH)/misc.c $(SRCSUBPATH)/tests.h +ifeq "$(Product)" "iPhoneOS" + $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/misc.c -o $@ else - $(CC) -arch i386 -arch x86_64 $(CFLAGS) -c misc.c -o $@ + $(CC) -arch i386 -arch x86_64 $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/misc.c -o $@ endif -$(OBJROOT)/sema_tests.o : sema_tests.c tests.h - $(CC) $(CFLAGS) -c sema_tests.c -o $@ +$(OBJSUBPATH)/sema_tests.o : $(SRCSUBPATH)/sema_tests.c $(SRCSUBPATH)/tests.h + $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/sema_tests.c -o $@ -$(OBJROOT)/shared_memory_tests.o : shared_memory_tests.c tests.h - $(CC) $(CFLAGS) -c shared_memory_tests.c -o $@ +$(OBJSUBPATH)/shared_memory_tests.o : $(SRCSUBPATH)/shared_memory_tests.c $(SRCSUBPATH)/tests.h + $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/shared_memory_tests.c -o $@ -$(OBJROOT)/socket_tests.o : socket_tests.c tests.h - $(CC) $(CFLAGS) -c socket_tests.c -o $@ +$(OBJSUBPATH)/socket_tests.o : $(SRCSUBPATH)/socket_tests.c $(SRCSUBPATH)/tests.h + $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/socket_tests.c -o $@ -$(OBJROOT)/tests.o : tests.c tests.h - $(CC) $(CFLAGS) -c tests.c -o $@ +$(OBJSUBPATH)/tests.o : $(SRCSUBPATH)/tests.c $(SRCSUBPATH)/tests.h + $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/tests.c -o $@ -$(OBJROOT)/xattr_tests.o : xattr_tests.c tests.h - $(CC) $(CFLAGS) -c xattr_tests.c -o $@ +$(OBJSUBPATH)/xattr_tests.o : $(SRCSUBPATH)/xattr_tests.c $(SRCSUBPATH)/tests.h + $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/xattr_tests.c -o $@ -$(OBJROOT)/machvm_tests.o : machvm_tests.c tests.h - $(CC) $(CFLAGS) -c machvm_tests.c -o $@ +$(OBJSUBPATH)/machvm_tests.o : $(SRCSUBPATH)/machvm_tests.c $(SRCSUBPATH)/tests.h + $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/machvm_tests.c -o $@ -$(OBJROOT)/sched_tests.o : sched_tests.c tests.h - $(CC) $(CFLAGS) -c sched_tests.c -o $@ +$(OBJSUBPATH)/sched_tests.o : $(SRCSUBPATH)/sched_tests.c $(SRCSUBPATH)/tests.h + $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/sched_tests.c -o $@ -$(OBJROOT)/kqueue_tests.o : kqueue_tests.c tests.h - $(CC) $(CFLAGS) -c kqueue_tests.c -o $@ +$(OBJSUBPATH)/kqueue_tests.o : $(SRCSUBPATH)/kqueue_tests.c $(SRCSUBPATH)/tests.h + $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/kqueue_tests.c -o $@ -$(OBJROOT)/32bit_inode_tests.o : 32bit_inode_tests.c tests.h - $(CC) $(CFLAGS) -c 32bit_inode_tests.c -o $@ +$(OBJSUBPATH)/32bit_inode_tests.o : $(SRCSUBPATH)/32bit_inode_tests.c $(SRCSUBPATH)/tests.h + $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/32bit_inode_tests.c -o $@ -$(OBJROOT)/commpage_tests.o : commpage_tests.c tests.h - $(CC) $(CFLAGS) -c commpage_tests.c -o $@ +$(OBJSUBPATH)/commpage_tests.o : $(SRCSUBPATH)/commpage_tests.c $(SRCSUBPATH)/tests.h + $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/commpage_tests.c -o $@ -$(OBJROOT)/atomic_fifo_queue_test.o : atomic_fifo_queue_test.c tests.h - $(CC) $(CFLAGS) -c atomic_fifo_queue_test.c -o $@ +$(OBJSUBPATH)/atomic_fifo_queue_test.o : $(SRCSUBPATH)/atomic_fifo_queue_test.c $(SRCSUBPATH)/tests.h + $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/atomic_fifo_queue_test.c -o $@ -$(OBJROOT)/content_protection_test.o : content_protection_test.c tests.h - $(CC) $(CFLAGS) -c content_protection_test.c -o $@ +$(OBJSUBPATH)/content_protection_test.o : $(SRCSUBPATH)/content_protection_test.c $(SRCSUBPATH)/tests.h + $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/content_protection_test.c -o $@ -$(OBJROOT)/pipes_tests.o : pipes_tests.c tests.h - $(CC) $(CFLAGS) -c pipes_tests.c -o $@ +$(OBJSUBPATH)/pipes_tests.o : $(SRCSUBPATH)/pipes_tests.c $(SRCSUBPATH)/tests.h + $(CC) $(CFLAGS) $(MY_ARCH) -c $(SRCSUBPATH)/pipes_tests.c -o $@ -ifndef DOING_BUILDIT .PHONY : clean clean : - sudo rm -Rf $(DSTROOT)/xnu_quick_test - sudo rm -Rf $(DSTROOT)/helpers/* - rm -Rf $(OBJROOT)/*.o -endif + rm -Rf $(DSTROOT)/xnu_quick_test + rm -Rf $(DSTROOT)/helpers/* + rm -Rf $(OBJSUBPATH)/*.o diff --git a/tools/tests/xnu_quick_test/memory_tests.c b/tools/tests/xnu_quick_test/memory_tests.c index 03e31a456..c14564a32 100644 --- a/tools/tests/xnu_quick_test/memory_tests.c +++ b/tools/tests/xnu_quick_test/memory_tests.c @@ -9,7 +9,6 @@ #include "tests.h" #include -#include /* crashcount() */ extern char g_target_path[ PATH_MAX ]; @@ -19,80 +18,15 @@ extern char g_target_path[ PATH_MAX ]; */ static volatile int my_err; -/* - * Handler; used by memory_tests() child to reset my_err so that it will - * exit normally following a SIGBUS, rather than triggering a crash report; - * this depends on setting the error non-zero before triggering the condition - * that would trigger a SIGBUS. To avoid confusion, this is most easily done - * right before the test in question, and if there are subsequent tests, then - * undone immediately after to avoid false test negatives. - */ void bus_handler(int sig, siginfo_t *si, void *mcontext) { /* Reset global error value when we see a SIGBUS */ - if (sig == SIGBUS) - my_err = 0; -} - -/* - * Count the number of crashes for us in /Library/Logs/CrashReporter/ - * - * XXX Assumes that CrashReporter uses our name as a prefix - * XXX Assumes no one lese has the same prefix as our name - */ -int -crashcount(char *namebuf1, char *namebuf2) -{ - char *crashdir1 = "/Library/Logs/CrashReporter"; - char *crashdir2 = "/Library/Logs/DiagnosticReports"; - char *crash_file_pfx = "xnu_quick_test"; - int crash_file_pfxlen = strlen(crash_file_pfx); - struct stat sb; - DIR *dirp1 = NULL, *dirp2 = NULL; - struct dirent *dep1, *dep2; - int count = 0; - - /* If we can't open the directory, dirp1 will be NULL */ - dirp1 = opendir(crashdir1); - - while(dirp1 != NULL && ((dep1 = readdir(dirp1)) != NULL)) { - if (strncmp(crash_file_pfx, dep1->d_name, crash_file_pfxlen)) - continue; - /* record each one to get the last one */ - if (namebuf1) { - strcpy(namebuf1, crashdir1); - strcat(namebuf1, "/"); - strcat(namebuf1, dep1->d_name); - } - count++; + if (sig == SIGBUS) { + _exit(0); } - - if (dirp1 != NULL) - closedir(dirp1); - -#if !TARGET_OS_EMBEDDED - /* If we can't open the directory, dirp2 will be NULL */ - dirp2 = opendir(crashdir2); - - while(dirp2 != NULL && (dep2 = readdir(dirp2)) != NULL) { - if (strncmp(crash_file_pfx, dep2->d_name, crash_file_pfxlen)) - continue; - /* record each one to get the last one */ - if (namebuf2) { - strcpy(namebuf2, crashdir2); - strcat(namebuf2, "/"); - strcat(namebuf2, dep2->d_name); - } - count++; - } - if (dirp2 != NULL) - closedir(dirp2); -#endif - return( count ); } - /* ************************************************************************************************************** * Test madvise, mincore, minherit, mlock, mlock, mmap, mprotect, msync, munmap system calls. * todo - see if Francois has better versions of these tests... @@ -110,9 +44,6 @@ int memory_tests( void * the_argp ) pid_t my_pid, my_wait_pid; kern_return_t my_kr; struct sigaction my_sa; - static int my_crashcount; - static char my_namebuf1[256]; /* XXX big enough */ - static char my_namebuf2[256]; my_kr = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t*)&my_pathp, PATH_MAX, VM_FLAGS_ANYWHERE); if(my_kr != KERN_SUCCESS){ @@ -149,18 +80,6 @@ int memory_tests( void * the_argp ) goto test_failed_exit; } - /* - * Find out how many crashes there have already been; if it's not - * zero, then don't even attempt this test. - */ - my_namebuf1[0] = '\0'; - my_namebuf2[0] = '\0'; - if ((my_crashcount = crashcount(my_namebuf1, my_namebuf2)) != 0) { - printf( "memtest aborted: can not distinguish our expected crash from \n"); - printf( "%d existing crashes including %s \n", my_crashcount, my_namebuf2); - goto test_failed_exit; - } - /* * spin off a child process that we will use for testing. */ @@ -348,13 +267,6 @@ int memory_tests( void * the_argp ) goto exit_child; } - /* - * Establish SIGBUS handler; will reset (disable itself) if it fires; - * we would need how to recover from the exceptional condition that - * raised the SIGBUS by modifying the contents of the (opaque to us) - * mcontext in order to prevent this from being terminal, so we let - * it be terminal. This is enough to avoid triggering crash reporter. - */ my_sa.sa_sigaction = bus_handler; my_sa.sa_flags = SA_SIGINFO | SA_RESETHAND; if ((my_err = sigaction(SIGBUS, &my_sa, NULL)) != 0) { @@ -390,43 +302,13 @@ exit_child: goto test_failed_exit; } - /* If we were not signalled, or we died from an unexpected signal, report it. - */ - if ( !WIFSIGNALED( my_status ) || WTERMSIG( my_status ) != SIGBUS) { - printf( "wait4 returned child died of status - 0x%02X \n", my_status ); - goto test_failed_exit; - } - - /* - * Wait long enough that CrashReporter has finished. - */ - sleep(5); - - /* - * Find out how many crashes there have already been; if it's not - * one, then don't even attempt this test. + /* If we did not exit cleanly, report it */ - my_namebuf1[0] = '\0'; - my_namebuf2[0] = '\0'; - my_crashcount = crashcount(my_namebuf1, my_namebuf2); - if (!(my_crashcount == 1 || my_crashcount == 2)) { - printf( "child did not crash as expected \n"); - printf( "saw %d crashes including %s \n", my_crashcount, my_namebuf1); + if ( !WIFEXITED( my_status ) || (WEXITSTATUS( my_status ) != 0)) { + printf( "wait4 returned child died of status - 0x%08X \n", my_status ); goto test_failed_exit; } - /* post-remove the expected crash report */ - if (unlink(my_namebuf1) && !(errno == ENOENT || errno == ENOTDIR)) { - printf("unlink of expected crash report '%s' failed \n", my_namebuf1); - goto test_failed_exit; - } -#if !TARGET_OS_EMBEDDED - /* /Library/Logs/DiagnosticReports/ does not exist on embedded targets. */ - if (unlink(my_namebuf2) && !(errno == ENOENT || errno == ENOTDIR)) { - printf("unlink of expected crash report '%s' failed \n", my_namebuf2); - goto test_failed_exit; - } -#endif /* make sure shared page got modified in child */ if ( strcmp( my_test_page_p, "parent data child data" ) != 0 ) { printf( "minherit did not work correctly - shared page looks wrong \n" ); diff --git a/tools/tests/xnu_quick_test/misc.c b/tools/tests/xnu_quick_test/misc.c index 5e3706211..72393c60e 100644 --- a/tools/tests/xnu_quick_test/misc.c +++ b/tools/tests/xnu_quick_test/misc.c @@ -2,6 +2,8 @@ #include "tests.h" #include +extern int g_testbots_active; + /* * create_random_name - creates a file with a random / unique name in the given directory. * when do_open is true we create a file else we generaate a name that does not exist in the @@ -358,19 +360,24 @@ int get_bits() */ /* Check for PPC 64 */ - if ((my_err = sysctlbyname("hw.optional.64bitops", NULL, &len, NULL, 0))) goto x86_64check; /* Request size */ - if (len > sizeof(buf)) goto x86_64check; - if ((my_err = sysctlbyname("hw.optional.64bitops", &buf, &len, NULL, 0))) goto x86_64check; /* Copy value out from kernel */ + if ((my_err = sysctlbyname("hw.optional.64bitops", NULL, &len, NULL, 0))) goto check64bit; /* Request size */ + if (len > sizeof(buf)) goto check64bit; + if ((my_err = sysctlbyname("hw.optional.64bitops", &buf, &len, NULL, 0))) goto check64bit; /* Copy value out from kernel */ if (buf == 1) rval = 64; goto finished; -x86_64check: +check64bit: +#if defined(__i386__) || defined(__x86_64__) /* Check for x86_64 */ if ((my_err = sysctlbyname("hw.optional.x86_64", NULL, &len, NULL, 0))) goto finished; /* Request size */ if (len > sizeof(buf)) goto finished; if ((my_err = sysctlbyname("hw.optional.x86_64", &buf, &len, NULL, 0))) goto finished; /* Copy value out from kernel */ if (buf == 1) rval = 64; +#else +#error Unknown architecture. +#endif + finished: return rval; } @@ -393,6 +400,14 @@ my_printf(const char * __restrict fmt, ...) int rv; va_list ap; + /* if we are running under a TestBot, do a normal printf */ + if (g_testbots_active) { + va_start(ap, fmt); + rv = vprintf(fmt, ap); + va_end(ap); + return rv; + } + /* Get the timestamp for this printf */ result = time(NULL); timeptr = localtime(&result); diff --git a/tools/tests/xnu_quick_test/pipes_tests.c b/tools/tests/xnu_quick_test/pipes_tests.c index c87f94d18..594a6aa62 100644 --- a/tools/tests/xnu_quick_test/pipes_tests.c +++ b/tools/tests/xnu_quick_test/pipes_tests.c @@ -44,6 +44,7 @@ #include #include #include +#include /**************************/ /**************************/ diff --git a/tools/tests/xnu_quick_test/shared_memory_tests.c b/tools/tests/xnu_quick_test/shared_memory_tests.c index ad0ddcd6f..9467e4d22 100644 --- a/tools/tests/xnu_quick_test/shared_memory_tests.c +++ b/tools/tests/xnu_quick_test/shared_memory_tests.c @@ -129,6 +129,14 @@ int bsd_shm_tests( void * the_argp ) printf( "shm_unlink failed with error %d - \"%s\" \n", errno, strerror( errno) ); goto test_failed_exit; } + + /* unlinking a non existent path */ + my_err = shm_unlink ( "/tmp/anonexistent_shm_oject" ); + my_err = errno; + if ( my_err != ENOENT ) { + printf( "shm_unlink of non existent path failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } my_addr = (char *) mmap( NULL, 4096, (PROT_READ | PROT_WRITE), (MAP_FILE | MAP_SHARED), my_fd, 0 ); if ( my_addr == (char *) -1 ) { diff --git a/tools/tests/xnu_quick_test/tests.c b/tools/tests/xnu_quick_test/tests.c index cf2867e8a..11aec361a 100644 --- a/tools/tests/xnu_quick_test/tests.c +++ b/tools/tests/xnu_quick_test/tests.c @@ -1186,7 +1186,7 @@ int chown_fchown_lchown_lstat_symlink_test( void * the_argp ) if ( my_new_gid1 == 0 ) { my_new_gid1 = my_groups[ i ]; } - else { + else if( my_new_gid1 != my_groups[ i ] ) { my_new_gid2 = my_groups[ i ]; break; } @@ -1758,6 +1758,7 @@ int uid_tests( void * the_argp ) exit( -1 ); } +#if !TARGET_OS_EMBEDDED /* * test to make sure setaudit_addr doesn't cause audit info to get lost from * the credential. @@ -1784,6 +1785,7 @@ int uid_tests( void * the_argp ) printf("test failed - wrong audit ID was set - %d \n", my_aia.ai_auid); exit( -1 ); } +#endif /* change real uid and effective uid to current euid */ my_err = setuid( my_euid ); @@ -2120,20 +2122,18 @@ int execve_kill_vfork_test( void * the_argp ) if (do_spawn_test(CPU_TYPE_I386, 0)) goto test_failed_exit; } - }else if(get_architecture() == ARM) { - if (bits == 32) { + } else if(get_architecture() == ARM) { - /* Running on arm hardware. Check cases 2. */ - errmsg = "execve failed: from arm forking and exec()ing 32-bit arm process.\n"; - argvs[0] = "sleep-arm"; - if (do_execve_test("helpers/sleep-arm", argvs, NULL, 1)) - goto test_failed_exit; + errmsg = "execve failed: from arm forking and exec()ing arm process.\n"; + argvs[0] = "sleep-arm"; + if (do_execve_test("helpers/sleep-arm", argvs, NULL, 1)) + goto test_failed_exit; + + /* Test posix_spawn for arm (should succeed) */ + errmsg = NULL; + if (do_spawn_test(CPU_TYPE_ARM, 0)) + goto test_failed_exit; - /* Test posix_spawn for arm (should succeed) */ - errmsg = NULL; - if (do_spawn_test(CPU_TYPE_ARM, 0)) - goto test_failed_exit; - } } else { /* Just in case someone decides we need more architectures in the future */ @@ -2715,7 +2715,7 @@ int signals_test( void * the_argp ) goto test_failed_exit; } - if ( WIFEXITED( my_status ) && WEXITSTATUS( my_status ) != 0 ) { + if ( WIFSIGNALED( my_status ) || ( WIFEXITED( my_status ) && WEXITSTATUS( my_status ) != 0 ) ) { goto test_failed_exit; } @@ -2867,6 +2867,8 @@ int acct_test( void * the_argp ) ssize_t my_count; char my_buffer[ (sizeof(struct acct) + 32) ]; kern_return_t my_kr; + int acct_record_found; + char * test_bin_name = NULL; if ( g_skip_setuid_tests != 0 ) { printf("\t skipping this test \n"); @@ -2931,7 +2933,7 @@ int acct_test( void * the_argp ) goto test_failed_exit; } - /* disable process accounting */ + /* disable process accounting */ my_err = acct( NULL ); if ( my_err == -1 ) { printf( "acct failed with error %d - \"%s\" \n", errno, strerror( errno) ); @@ -2947,27 +2949,45 @@ int acct_test( void * the_argp ) lseek( my_fd, 0, SEEK_SET ); bzero( (void *)&my_buffer[0], sizeof(my_buffer) ); - my_count = read( my_fd, &my_buffer[0], sizeof(struct acct) ); - if ( my_count == -1 ) { - printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) ); - goto test_failed_exit; - } + acct_record_found = 0; + test_bin_name = "true"; - my_acctp = (struct acct *) &my_buffer[0]; + while(1) { - /* first letters in ac_comm should match the name of the executable */ - if ( getuid( ) != my_acctp->ac_uid || getgid( ) != my_acctp->ac_gid || - my_acctp->ac_comm[0] != 't' || my_acctp->ac_comm[1] != 'r' ) { - printf( "------------------------\n" ); - printf( "my_acctp->ac_uid = %lu (should be: %lu)\n", (unsigned long) my_acctp->ac_uid, (unsigned long) getuid() ); - printf( "my_acctp->ac_gid = %lu (should be: %lu)\n", (unsigned long) my_acctp->ac_gid, (unsigned long) getgid() ); + my_count = read( my_fd, &my_buffer[0], sizeof(struct acct) ); - print_acct_debug_strings(my_acctp->ac_comm); - - goto test_failed_exit; - } - my_err = 0; - goto test_passed_exit; + if ( my_count == -1 ) { + printf( "read call failed with error %d - \"%s\" \n", errno, strerror( errno) ); + goto test_failed_exit; + } + + if ( my_count < sizeof(struct acct)) { + /* Indicates EOF or misaligned file size */ + printf("Reached end of accounting records with last read count: %d\n", my_count); + break; + } + + my_acctp = (struct acct *) &my_buffer[0]; + /* first letters in ac_comm should match the name of the executable */ + if ( (getuid() == my_acctp->ac_uid) && (getgid() == my_acctp->ac_gid) && + (!strncmp(my_acctp->ac_comm, test_bin_name, strlen(test_bin_name))) ) { + /* Expected accounting record found */ + acct_record_found = 1; + break; + } + + } + + if (acct_record_found) { + my_err = 0; + goto test_passed_exit; + } else { + printf( "------------------------\n" ); + printf( "Expected Accounting Record for child process %s not found\n", test_bin_name ); + printf( "Expected uid: %lu Expected gid: %lu\n" , (unsigned long) getuid(), (unsigned long) getgid() ); + printf( "Account file path: %s\n", my_pathp ); + goto test_failed_exit; + } test_failed_exit: my_err = -1; diff --git a/tools/tests/zero-to-n/Makefile b/tools/tests/zero-to-n/Makefile index 30a82361b..4c3b62c8e 100644 --- a/tools/tests/zero-to-n/Makefile +++ b/tools/tests/zero-to-n/Makefile @@ -1,5 +1,32 @@ -DEBUG=0 -CC=/usr/bin/llvm-gcc-4.2 +SDKROOT ?= / +ifeq "$(RC_TARGET_CONFIG)" "iPhone" +Embedded?=YES +else +Embedded?=$(shell echo $(SDKROOT) | grep -iq iphoneos && echo YES || echo NO) +endif -zn: zero-to-n.c - $(CC) -Wall -arch i386 -arch x86_64 zero-to-n.c -o zn -DDEBUG=$(DEBUG) -ggdb +CC:=xcrun -sdk "$(SDKROOT)" cc + +ifdef RC_ARCHS + ARCHS:=$(RC_ARCHS) + else + ifeq "$(Embedded)" "YES" + ARCHS:=armv7 armv7s + else + ARCHS:=x86_64 i386 + endif +endif + +CFLAGS := -g $(patsubst %, -arch %, $(ARCHS)) + +DSTROOT?=$(shell /bin/pwd) +SYMROOT?=$(shell /bin/pwd) + +DEBUG:=0 + +$(DSTROOT)/zn: zero-to-n.c + $(CC) $(CFLAGS) -Wall zero-to-n.c -o $(SYMROOT)/$(notdir $@) -DDEBUG=$(DEBUG) -ggdb + if [ ! -e $@ ]; then ditto $(SYMROOT)/$(notdir $@) $@; fi + +clean: + rm -rf $(DSTROOT)/zn $(SYMROOT)/*.dSYM $(SYMROOT)/zn diff --git a/tools/tests/zero-to-n/zero-to-n.c b/tools/tests/zero-to-n/zero-to-n.c index 0d4dcfe16..2df39a02e 100644 --- a/tools/tests/zero-to-n/zero-to-n.c +++ b/tools/tests/zero-to-n/zero-to-n.c @@ -64,7 +64,7 @@ typedef enum my_policy_type { MY_POLICY_REALTIME, MY_POLICY_TIMESHARE, MY_POLICY /* Declarations */ void* child_thread_func(void *arg); void print_usage(); -int thread_setup(); +int thread_setup(int my_id); my_policy_type_t parse_thread_policy(const char *str); int thread_finish_iteration(); @@ -79,6 +79,7 @@ uint64_t *g_thread_endtimes_abs; volatile int32_t g_done_threads; boolean_t g_do_spin = FALSE; boolean_t g_verbose = FALSE; +boolean_t g_do_affinity = FALSE; uint64_t g_starttime_abs; #if MIMIC_DIGI_LEAD_TIME int g_long_spinid; @@ -144,7 +145,7 @@ parse_wakeup_pattern(const char *str) * Set policy */ int -thread_setup() +thread_setup(int my_id) { int res; @@ -183,6 +184,15 @@ thread_setup() } } + if (g_do_affinity) { + thread_affinity_policy_data_t affinity; + + affinity.affinity_tag = my_id % 2; + + res = thread_policy_set(mach_thread_self(), THREAD_AFFINITY_POLICY, (thread_policy_t)&affinity, THREAD_AFFINITY_POLICY_COUNT); + assert(res == 0, fail); + } + return 0; fail: return 1; @@ -230,12 +240,14 @@ thread_finish_iteration(int id) debug_log("Thread %p signalling main thread.\n", pthread_self()); res = semaphore_signal(g_main_sem); } else { +#ifndef MIMIC_DIGI_LEAD_TIME if (g_do_spin) { while (g_done_threads < g_numthreads) { y = y + 1.5 + x; x = sqrt(y); } } +#endif } return res; @@ -254,7 +266,7 @@ child_thread_func(void *arg) int32_t new; /* Set policy and so forth */ - thread_setup(); + thread_setup(my_id); /* Tell main thread when everyone has set up */ new = OSAtomicIncrement32(&g_done_threads); @@ -351,7 +363,7 @@ fail: void print_usage() { - printf("Usage: zn [-trace ] [-spin] [-verbose]\n"); + printf("Usage: zn [-trace ] [-spin] [-affinity] [-verbose]\n"); } /* @@ -430,6 +442,8 @@ main(int argc, char **argv) } else if ((strcmp(argv[i], "-trace") == 0) && (i < (argc - 1))) { traceworthy_latency_ns = strtoull(argv[++i], NULL, 10); + } else if (strcmp(argv[i], "-affinity") == 0) { + g_do_affinity = TRUE; } else { print_usage(); goto fail; diff --git a/tools/xcrun_cache.sh b/tools/xcrun_cache.sh new file mode 100755 index 000000000..ccd065add --- /dev/null +++ b/tools/xcrun_cache.sh @@ -0,0 +1,276 @@ +#!/bin/sh + +# +# This shell-script is argument-compatible with xcrun(1) as invoked +# by xnu's Makefiles. Additionally, it supports caching tools +# in the local build directory. It is tightly coupled to exactly +# the queries that MakeInc.cmd makes, in exactly the order it makes +# them. ./tools/remote_build.sh invokes this indirectly in caching +# mode, so '$(XCRUN) -sdk foo -find bar' copies 'bar' from wherever +# it is on-disk into ./BUILD/BuildData, and returns that path to the +# caller. In '-u' mode on a remote build server, cache tools +# relative to the current build directory are returned without +# actually calling through to xcrun(1), since the remote build +# server may not even have Xcode installed. +# + +SDKROOT="" +FINDTOOL="" +SDKQUERY="" +VERBOSE="" +OBJROOT="" +CACHE=0 + +# echo "Calling $0 $@" 1>&2 + +while [ $# -gt 0 ]; do + case "$1" in + -c) + CACHE=1 + shift + OBJROOT="$1" + shift + ;; + -u) + CACHE=0 + shift + OBJROOT="$1" + shift + ;; + -sdk) + shift + SDKROOT="$1" + shift + ;; + -verbose) + VERBOSE="$1" + shift + set -x + ;; + -find) + shift + FINDTOOL="$1" + shift + ;; + -show-sdk-path) + SDKQUERY="$1" + shift + ;; + -show-sdk-platform-path) + SDKQUERY="$1" + shift + ;; + -show-sdk-version) + SDKQUERY="$1" + shift + ;; + *) + echo "Unrecognized argument $1" 1>&2 + exit 1 + esac +done + +function CreateFile() { + local string="$1" + local filepath="$2" + echo "${string}" > "${filepath}.new" + cmp -s "${filepath}" "${filepath}.new" + if [ $? -eq 0 ]; then + rm "${filepath}.new" + else + mv "${filepath}.new" "${filepath}" + fi +} + +if [ $CACHE -eq 1 ]; then + + if [ -n "$SDKQUERY" ]; then + # MakeInc.cmd makes SDK queries up-front first. Generally the + # SDKROOT that is an input to these are one of: + # "macosx" => Host SDK + # "iphonehostXXX" => iPhone Host SDK + # other shortcut or full path => Target SDK + # + # Once an initial lookup is made, subsequent SDKROOTs for + # that same SDK may use a full path or cached path + SDKTYPE="" + case "$SDKROOT" in + macosx) + SDKTYPE="host" + ;; + iphonehost*) + SDKTYPE="iphonehost" + ;; + *) + if [ -f "$SDKROOT/.sdktype" ]; then + SDKTYPE=`cat "$SDKROOT/.sdktype"` + else + SDKTYPE="target" + fi + ;; + esac + + # A cached SDK path can be passed to xcrun, so + # we need the original on-disk path + if [ -f "$SDKROOT/.realsdkpath" ]; then + REALSDKROOT=`cat "$SDKROOT/.realsdkpath"` + else + REALSDKROOT="$SDKROOT" + fi + + SDKPROPERTY=`/usr/bin/xcrun $VERBOSE -sdk "$REALSDKROOT" "$SDKQUERY"` + if [ $? -ne 0 ]; then + exit $? + fi + + case $SDKQUERY in + -show-sdk-path) + # Cache the SDK locally, and transform the resulting SDKPROPERTY + if [ -z "$SDKPROPERTY" ]; then + SDKPROPERTY="/" + SDKNAME="Slash.sdk" + else + SDKNAME=$(basename "${SDKPROPERTY}") + fi + mkdir -p "${OBJROOT}/BuildTools/${SDKNAME}" + mkdir -p "${OBJROOT}/BuildTools/${SDKNAME}/usr/include" + rsync -aq --exclude=c++ --exclude=php --exclude=soc "${SDKPROPERTY}/usr/include/" "${OBJROOT}/BuildTools/${SDKNAME}/usr/include/" + if [ "$SDKTYPE" = "iphonehost" ]; then + mkdir -p "${OBJROOT}/BuildTools/${SDKNAME}/usr/local/lib/system" + rsync -aq "${SDKPROPERTY}/usr/local/lib/system/" "${OBJROOT}/BuildTools/${SDKNAME}/usr/local/lib/system/" + else + mkdir -p "${OBJROOT}/BuildTools/${SDKNAME}/usr/lib" + rsync -aq "${SDKPROPERTY}/usr/lib/libSystem"* "${OBJROOT}/BuildTools/${SDKNAME}/usr/lib/" + rsync -aq "${SDKPROPERTY}/usr/lib/libc++"* "${OBJROOT}/BuildTools/${SDKNAME}/usr/lib/" + rsync -aq "${SDKPROPERTY}/usr/lib/libstdc++"* "${OBJROOT}/BuildTools/${SDKNAME}/usr/lib/" + mkdir -p "${OBJROOT}/BuildTools/${SDKNAME}/usr/lib/system" + rsync -aq --exclude=\*_debug.dylib --exclude=\*_profile.dylib "${SDKPROPERTY}/usr/lib/system/" "${OBJROOT}/BuildTools/${SDKNAME}/usr/lib/system/" + fi + if [ -f "${SDKPROPERTY}/usr/local/libexec/availability.pl" ]; then + mkdir -p "${OBJROOT}/BuildTools/${SDKNAME}/usr/local/libexec" + rsync -aq "${SDKPROPERTY}/usr/local/libexec/availability.pl" "${OBJROOT}/BuildTools/${SDKNAME}/usr/local/libexec/" + fi + CreateFile "${SDKPROPERTY}" "${OBJROOT}/BuildTools/${SDKNAME}/.realsdkpath" + CreateFile "${SDKTYPE}" "${OBJROOT}/BuildTools/${SDKNAME}/.sdktype" + CreateFile "BuildTools/${SDKNAME}" "${OBJROOT}/BuildTools/.${SDKTYPE}sdk" + echo "${OBJROOT}/BuildTools/${SDKNAME}" + exit 0 + ;; + -show-sdk-platform-path) + PLATFORMNAME=$(basename "${SDKPROPERTY}") + mkdir -p "${OBJROOT}/BuildTools/${PLATFORMNAME}" + if [ -f "${SDKPROPERTY}/usr/local/standalone/firmware/device_map.db" ]; then + mkdir -p "${OBJROOT}/BuildTools/${PLATFORMNAME}/usr/local/standalone/firmware" + rsync -aq "${SDKPROPERTY}/usr/local/standalone/firmware/device_map.db" \ + "${OBJROOT}/BuildTools/${PLATFORMNAME}/usr/local/standalone/firmware/" + fi + CreateFile "BuildTools/${PLATFORMNAME}" "${OBJROOT}/BuildTools/.targetplatform" + echo "${OBJROOT}/BuildTools/${PLATFORMNAME}" + exit 0 + ;; + -show-sdk-version) + CreateFile "${SDKPROPERTY}" "${OBJROOT}/BuildTools/.targetsdkversion" + echo "${SDKPROPERTY}" + exit 0 + ;; + esac + + elif [ -n "$FINDTOOL" ]; then + + # We assume SDK Queries have been performed first and subsequent + # SDKROOTs used to find tools are all using cached SDKs in + # the build directory, in which case metadata is present + + if [ ! -f "$SDKROOT/.realsdkpath" ]; then + exit 1 + fi + REALSDKROOT=`cat "$SDKROOT/.realsdkpath"` + + if [ ! -f "$SDKROOT/.sdktype" ]; then + exit 1 + fi + SDKTYPE=`cat "$SDKROOT/.sdktype"` + + TOOLPATH=`/usr/bin/xcrun $VERBOSE -sdk "$REALSDKROOT" -find "$FINDTOOL"` + if [ $? -ne 0 ]; then + exit $? + fi + + # Keep the parent directory when caching tools, along with Host vs. Target + TOOLNAME=$(basename "${TOOLPATH}") + TOOLDIR=$(basename $(dirname "${TOOLPATH}")) + if [ "$SDKTYPE" = "host" ]; then + NEWTOOLPATH="${OBJROOT}/BuildTools/Host/${TOOLDIR}/${TOOLNAME}" + mkdir -p "${OBJROOT}/BuildTools/Host" + CreateFile "BuildTools/Host/${TOOLDIR}/${TOOLNAME}" "${OBJROOT}/BuildTools/Host/.${TOOLNAME}" + else + NEWTOOLPATH="${OBJROOT}/BuildTools/Target/${TOOLDIR}/${TOOLNAME}" + mkdir -p "${OBJROOT}/BuildTools/Target" + CreateFile "BuildTools/Target/${TOOLDIR}/${TOOLNAME}" "${OBJROOT}/BuildTools/Target/.${TOOLNAME}" + fi + mkdir -p $(dirname "${NEWTOOLPATH}") + rsync -aq "${TOOLPATH}" "${NEWTOOLPATH}" + case "${TOOLNAME}" in + clang) + mkdir -p $(dirname $(dirname "${NEWTOOLPATH}"))/lib/clang + rsync -aq $(dirname "${TOOLPATH}")/ld $(dirname "${NEWTOOLPATH}")/ld + rsync -aq $(dirname $(dirname "${TOOLPATH}"))/lib/clang/ $(dirname $(dirname "${NEWTOOLPATH}"))/lib/clang/ + rsync -aq $(dirname $(dirname "${TOOLPATH}"))/lib/libLTO.dylib $(dirname $(dirname "${NEWTOOLPATH}"))/lib/libLTO.dylib + ;; + bison) + mkdir -p $(dirname $(dirname "${NEWTOOLPATH}"))/share/bison + rsync -aq $(dirname $(dirname "${TOOLPATH}"))/share/bison/ $(dirname $(dirname "${NEWTOOLPATH}"))/share/bison/ + ;; + esac + + echo "${NEWTOOLPATH}" + exit 0 + else + echo "Unrecognized option" 1>&2 + exit 1 + fi +fi + +# When using cached SDK information, first try to do +# an initial classification, and then read properties from +# cached locations +SDKTYPE="" +case "$SDKROOT" in + macosx) + SDKTYPE="host" + ;; + iphonehost*) + SDKTYPE="iphonehost" + ;; + *) + if [ -f "$SDKROOT/.sdktype" ]; then + SDKTYPE=`cat "$SDKROOT/.sdktype"` + else + SDKTYPE="target" + fi + ;; +esac + +if [ -n "$FINDTOOL" ]; then + TOOLNAME=$(basename "${FINDTOOL}") + if [ "${SDKTYPE}" = "host" ]; then + RELPATH=`cat ${OBJROOT}/BuildTools/Host/.${TOOLNAME}` + else + RELPATH=`cat ${OBJROOT}/BuildTools/Target/.${TOOLNAME}` + fi + echo "${OBJROOT}/${RELPATH}" +else + case $SDKQUERY in + -show-sdk-path) + RELPATH=`cat ${OBJROOT}/BuildTools/.${SDKTYPE}sdk` + echo "${OBJROOT}/${RELPATH}" + ;; + -show-sdk-platform-path) + RELPATH=`cat ${OBJROOT}/BuildTools/.targetplatform` + echo "${OBJROOT}/${RELPATH}" + ;; + -show-sdk-version) + echo `cat ${OBJROOT}/BuildTools/.targetsdkversion` + ;; + esac +fi -- 2.45.2